17 



















































































































   17 

   17 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
/*        $NetBSD: keccak.c,v 1.1 2017/11/30 05:47:24 riastradh Exp $        */

/*-
 * Copyright (c) 2015 Taylor R. Campbell
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>

#if defined(_KERNEL) || defined(_STANDALONE)
__KERNEL_RCSID(0, "$NetBSD: keccak.c,v 1.1 2017/11/30 05:47:24 riastradh Exp $");

#include <sys/types.h>
#else
__RCSID("$NetBSD: keccak.c,v 1.1 2017/11/30 05:47:24 riastradh Exp $");

#include <stdint.h>
#endif

#include "keccak.h"

#define        secret        /* can't use in variable-time operations, should zero */

#define        FOR5(X, STMT) do                                                      \
{                                                                              \
        (X) = 0; STMT;                                                              \
        (X) = 1; STMT;                                                              \
        (X) = 2; STMT;                                                              \
        (X) = 3; STMT;                                                              \
        (X) = 4; STMT;                                                              \
} while (0)

static inline secret uint64_t
rol64(secret uint64_t v, unsigned c)
{

        return ((v << c) | (v >> (64 - c)));
}

static inline void
keccakf1600_theta(secret uint64_t A[25])
{
        secret uint64_t C0, C1, C2, C3, C4;
        unsigned y;

        C0 = C1 = C2 = C3 = C4 = 0;
        FOR5(y, {
                C0 ^= A[0 + 5*y];
                C1 ^= A[1 + 5*y];
                C2 ^= A[2 + 5*y];
                C3 ^= A[3 + 5*y];
                C4 ^= A[4 + 5*y];
        });
        FOR5(y, {
                A[0 + 5*y] ^= C4 ^ rol64(C1, 1);
                A[1 + 5*y] ^= C0 ^ rol64(C2, 1);
                A[2 + 5*y] ^= C1 ^ rol64(C3, 1);
                A[3 + 5*y] ^= C2 ^ rol64(C4, 1);
                A[4 + 5*y] ^= C3 ^ rol64(C0, 1);
        });
}

static inline void
keccakf1600_rho_pi(secret uint64_t A[25])
{
        secret uint64_t T, U;

        /*
         * Permute by (x,y) |---> (y, 2x + 3y mod 5) starting at (1,0),
         * rotate the ith element by (i + 1)(i + 2)/2 mod 64.
         */
        U = A[ 1];                       T = U;
        U = A[10]; A[10] = rol64(T,  1); T = U;
        U = A[ 7]; A[ 7] = rol64(T,  3); T = U;
        U = A[11]; A[11] = rol64(T,  6); T = U;
        U = A[17]; A[17] = rol64(T, 10); T = U;
        U = A[18]; A[18] = rol64(T, 15); T = U;
        U = A[ 3]; A[ 3] = rol64(T, 21); T = U;
        U = A[ 5]; A[ 5] = rol64(T, 28); T = U;
        U = A[16]; A[16] = rol64(T, 36); T = U;
        U = A[ 8]; A[ 8] = rol64(T, 45); T = U;
        U = A[21]; A[21] = rol64(T, 55); T = U;
        U = A[24]; A[24] = rol64(T,  2); T = U;
        U = A[ 4]; A[ 4] = rol64(T, 14); T = U;
        U = A[15]; A[15] = rol64(T, 27); T = U;
        U = A[23]; A[23] = rol64(T, 41); T = U;
        U = A[19]; A[19] = rol64(T, 56); T = U;
        U = A[13]; A[13] = rol64(T,  8); T = U;
        U = A[12]; A[12] = rol64(T, 25); T = U;
        U = A[ 2]; A[ 2] = rol64(T, 43); T = U;
        U = A[20]; A[20] = rol64(T, 62); T = U;
        U = A[14]; A[14] = rol64(T, 18); T = U;
        U = A[22]; A[22] = rol64(T, 39); T = U;
        U = A[ 9]; A[ 9] = rol64(T, 61); T = U;
        U = A[ 6]; A[ 6] = rol64(T, 20); T = U;
                   A[ 1] = rol64(T, 44);
}

static inline void
keccakf1600_chi(secret uint64_t A[25])
{
        secret uint64_t B0, B1, B2, B3, B4;
        unsigned y;

        FOR5(y, {
                B0 = A[0 + 5*y];
                B1 = A[1 + 5*y];
                B2 = A[2 + 5*y];
                B3 = A[3 + 5*y];
                B4 = A[4 + 5*y];
                A[0 + 5*y] ^= ~B1 & B2;
                A[1 + 5*y] ^= ~B2 & B3;
                A[2 + 5*y] ^= ~B3 & B4;
                A[3 + 5*y] ^= ~B4 & B0;
                A[4 + 5*y] ^= ~B0 & B1;
        });
}

static void
keccakf1600_round(secret uint64_t A[25])
{

        keccakf1600_theta(A);
        keccakf1600_rho_pi(A);
        keccakf1600_chi(A);
}

void
keccakf1600(secret uint64_t A[25])
{
        /*
         * RC[i] = \sum_{j = 0,...,6} rc(j + 7i) 2^(2^j - 1),
         * rc(t) = (x^t mod x^8 + x^6 + x^5 + x^4 + 1) mod x in GF(2)[x]
         */
        static const uint64_t RC[24] = {
                0x0000000000000001ULL,
                0x0000000000008082ULL,
                0x800000000000808aULL,
                0x8000000080008000ULL,
                0x000000000000808bULL,
                0x0000000080000001ULL,
                0x8000000080008081ULL,
                0x8000000000008009ULL,
                0x000000000000008aULL,
                0x0000000000000088ULL,
                0x0000000080008009ULL,
                0x000000008000000aULL,
                0x000000008000808bULL,
                0x800000000000008bULL,
                0x8000000000008089ULL,
                0x8000000000008003ULL,
                0x8000000000008002ULL,
                0x8000000000000080ULL,
                0x000000000000800aULL,
                0x800000008000000aULL,
                0x8000000080008081ULL,
                0x8000000000008080ULL,
                0x0000000080000001ULL,
                0x8000000080008008ULL,
        };
        unsigned i;

        for (i = 0; i < 24; i++) {
                keccakf1600_round(A);
                A[0] ^= RC[i];
        }
}



































































































































































































    2 


    1 
    2 




































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
/*        $NetBSD: uftdi.c,v 1.76 2021/08/07 16:19:17 thorpej Exp $        */

/*
 * Copyright (c) 2000 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uftdi.c,v 1.76 2021/08/07 16:19:17 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/conf.h>
#include <sys/tty.h>

#include <dev/usb/usb.h>

#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <dev/usb/ucomvar.h>

#include <dev/usb/uftdireg.h>

#ifdef UFTDI_DEBUG
#define DPRINTF(x)        if (uftdidebug) printf x
#define DPRINTFN(n,x)        if (uftdidebug>(n)) printf x
int uftdidebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

#define UFTDI_CONFIG_NO                1

/*
 * These are the default number of bytes transferred per frame if the
 * endpoint doesn't tell us.  The output buffer size is a hard limit
 * for devices that use a 6-bit size encoding.
 */
#define UFTDIIBUFSIZE 64
#define UFTDIOBUFSIZE 64

/*
 * Magic constants!  Where do these come from?  They're what Linux uses...
 */
#define        UFTDI_MAX_IBUFSIZE        512
#define        UFTDI_MAX_OBUFSIZE        256

struct uftdi_softc {
        device_t                sc_dev;                /* base device */
        struct usbd_device *        sc_udev;        /* device */
        struct usbd_interface *        sc_iface;        /* interface */
        int                        sc_iface_no;

        enum uftdi_type                sc_type;
        u_int                        sc_flags;
#define FLAGS_BAUDCLK_12M        0x00000001
#define FLAGS_ROUNDOFF_232A        0x00000002
#define FLAGS_BAUDBITS_HINDEX        0x00000004
        u_int                        sc_hdrlen;
        u_int                        sc_chiptype;

        u_char                        sc_msr;
        u_char                        sc_lsr;

        device_t                sc_subdev;

        bool                        sc_dying;

        u_int                        last_lcr;
};

static void        uftdi_get_status(void *, int, u_char *, u_char *);
static void        uftdi_set(void *, int, int, int);
static int        uftdi_param(void *, int, struct termios *);
static int        uftdi_open(void *, int);
static void        uftdi_read(void *, int, u_char **, uint32_t *);
static void        uftdi_write(void *, int, u_char *, u_char *, uint32_t *);
static void        uftdi_break(void *, int, int);

static const struct ucom_methods uftdi_methods = {
        .ucom_get_status = uftdi_get_status,
        .ucom_set = uftdi_set,
        .ucom_param = uftdi_param,
        .ucom_open = uftdi_open,
        .ucom_read = uftdi_read,
        .ucom_write = uftdi_write,
};

/*
 * The devices default to UFTDI_TYPE_8U232AM.
 * Remember to update uftdi_attach() if it should be UFTDI_TYPE_SIO instead
 */
static const struct usb_devno uftdi_devs[] = {
        { USB_VENDOR_BBELECTRONICS, USB_PRODUCT_BBELECTRONICS_USOTL4 },
        { USB_VENDOR_FALCOM, USB_PRODUCT_FALCOM_TWIST },
        { USB_VENDOR_FALCOM, USB_PRODUCT_FALCOM_SAMBA },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_SERIAL_230X },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_SERIAL_232H },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_SERIAL_232RL },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_SERIAL_2232C },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_SERIAL_4232H },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_SERIAL_8U100AX },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_SERIAL_8U232AM },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MHAM_KW },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MHAM_YS },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MHAM_Y6 },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MHAM_Y8 },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MHAM_IC },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MHAM_DB9 },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MHAM_RS232 },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MHAM_Y9 },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_COASTAL_TNCX },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_CTI_485_MINI },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_CTI_NANO_485 },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_SEMC_DSS20 },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_LK202_24_USB },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_LK204_24_USB },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_MX200_USB },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_MX4_MX5_USB },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_CFA_631 },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_CFA_632 },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_CFA_633 },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_CFA_634 },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_LCD_CFA_635 },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_OPENRD_JTAGKEY },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_BEAGLEBONE },
        { USB_VENDOR_FTDI, USB_PRODUCT_FTDI_MAXSTREAM_PKG_U },
        { USB_VENDOR_xxFTDI, USB_PRODUCT_xxFTDI_SHEEVAPLUG_JTAG },
        { USB_VENDOR_INTREPIDCS, USB_PRODUCT_INTREPIDCS_VALUECAN },
        { USB_VENDOR_INTREPIDCS, USB_PRODUCT_INTREPIDCS_NEOVI },
        { USB_VENDOR_MELCO, USB_PRODUCT_MELCO_PCOPRS1 },
        { USB_VENDOR_RATOC, USB_PRODUCT_RATOC_REXUSB60F },
        { USB_VENDOR_RTSYS, USB_PRODUCT_RTSYS_CT57A },
        { USB_VENDOR_RTSYS, USB_PRODUCT_RTSYS_RTS03 },
        { USB_VENDOR_SEALEVEL, USB_PRODUCT_SEALEVEL_USBSERIAL },
        { USB_VENDOR_SEALEVEL, USB_PRODUCT_SEALEVEL_SEAPORT4P1 },
        { USB_VENDOR_SEALEVEL, USB_PRODUCT_SEALEVEL_SEAPORT4P2 },
        { USB_VENDOR_SEALEVEL, USB_PRODUCT_SEALEVEL_SEAPORT4P3 },
        { USB_VENDOR_SEALEVEL, USB_PRODUCT_SEALEVEL_SEAPORT4P4 },
        { USB_VENDOR_SIIG2, USB_PRODUCT_SIIG2_US2308 },
        { USB_VENDOR_MISC, USB_PRODUCT_MISC_TELLSTICK },
        { USB_VENDOR_MISC, USB_PRODUCT_MISC_TELLSTICK_DUO },
};
#define uftdi_lookup(v, p) usb_lookup(uftdi_devs, v, p)

static int        uftdi_match(device_t, cfdata_t, void *);
static void        uftdi_attach(device_t, device_t, void *);
static void        uftdi_childdet(device_t, device_t);
static int        uftdi_detach(device_t, int);

CFATTACH_DECL2_NEW(uftdi, sizeof(struct uftdi_softc), uftdi_match,
    uftdi_attach, uftdi_detach, NULL, NULL, uftdi_childdet);

static int
uftdi_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;

        DPRINTFN(20,("uftdi: vendor=%#x, product=%#x\n",
                     uiaa->uiaa_vendor, uiaa->uiaa_product));

        if (uiaa->uiaa_configno != UFTDI_CONFIG_NO)
                return UMATCH_NONE;

        return uftdi_lookup(uiaa->uiaa_vendor, uiaa->uiaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT_CONF_IFACE : UMATCH_NONE;
}

static void
uftdi_attach(device_t parent, device_t self, void *aux)
{
        struct uftdi_softc *sc = device_private(self);
        struct usbif_attach_arg *uiaa = aux;
        struct usbd_device *dev = uiaa->uiaa_device;
        struct usbd_interface *iface = uiaa->uiaa_iface;
        usb_device_descriptor_t *ddesc;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        int i;
        struct ucom_attach_args ucaa;

        DPRINTFN(10,("\nuftdi_attach: sc=%p\n", sc));

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        sc->sc_dev = self;
        sc->sc_udev = dev;
        sc->sc_dying = false;
        sc->sc_iface_no = uiaa->uiaa_ifaceno;
        sc->sc_type = UFTDI_TYPE_8U232AM; /* most devices are post-8U232AM */
        sc->sc_hdrlen = 0;

        ddesc = usbd_get_device_descriptor(dev);
        sc->sc_chiptype = UGETW(ddesc->bcdDevice);

        switch (sc->sc_chiptype) {
        case 0x0200:
                if (ddesc->iSerialNumber != 0)
                        sc->sc_flags |= FLAGS_ROUNDOFF_232A;
                ucaa.ucaa_portno = 0;
                break;
        case 0x0400:
                ucaa.ucaa_portno = 0;
                break;
        case 0x0500:
                sc->sc_flags |= FLAGS_BAUDBITS_HINDEX;
                ucaa.ucaa_portno = FTDI_PIT_SIOA + sc->sc_iface_no;
                break;
        case 0x0600:
                ucaa.ucaa_portno = 0;
                break;
        case 0x0700:
        case 0x0800:
        case 0x0900:
                sc->sc_flags |= FLAGS_BAUDCLK_12M;
                sc->sc_flags |= FLAGS_BAUDBITS_HINDEX;
                ucaa.ucaa_portno = FTDI_PIT_SIOA + sc->sc_iface_no;
                break;
        case 0x1000:
                sc->sc_flags |= FLAGS_BAUDBITS_HINDEX;
                ucaa.ucaa_portno = FTDI_PIT_SIOA + sc->sc_iface_no;
                break;
        default:
                if (sc->sc_chiptype < 0x0200) {
                        sc->sc_type = UFTDI_TYPE_SIO;
                        sc->sc_hdrlen = 1;
                }
                ucaa.ucaa_portno = 0;
                break;
        }

        id = usbd_get_interface_descriptor(iface);

        sc->sc_iface = iface;

        ucaa.ucaa_bulkin = ucaa.ucaa_bulkout = -1;
        ucaa.ucaa_ibufsize = ucaa.ucaa_obufsize = 0;
        for (i = 0; i < id->bNumEndpoints; i++) {
                int addr, dir, attr;
                ed = usbd_interface2endpoint_descriptor(iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "could not read endpoint descriptor\n");
                        goto bad;
                }

                addr = ed->bEndpointAddress;
                dir = UE_GET_DIR(ed->bEndpointAddress);
                attr = ed->bmAttributes & UE_XFERTYPE;
                if (dir == UE_DIR_IN && attr == UE_BULK) {
                        ucaa.ucaa_bulkin = addr;
                        ucaa.ucaa_ibufsize = UGETW(ed->wMaxPacketSize);
                        if (ucaa.ucaa_ibufsize >= UFTDI_MAX_IBUFSIZE)
                                ucaa.ucaa_ibufsize = UFTDI_MAX_IBUFSIZE;
                } else if (dir == UE_DIR_OUT && attr == UE_BULK) {
                        ucaa.ucaa_bulkout = addr;
                        ucaa.ucaa_obufsize = UGETW(ed->wMaxPacketSize)
                            - sc->sc_hdrlen;
                        if (ucaa.ucaa_obufsize >= UFTDI_MAX_OBUFSIZE)
                                ucaa.ucaa_obufsize = UFTDI_MAX_OBUFSIZE;
                        /* Limit length if we have a 6-bit header.  */
                        if ((sc->sc_hdrlen > 0) &&
                            (ucaa.ucaa_obufsize > UFTDIOBUFSIZE))
                                ucaa.ucaa_obufsize = UFTDIOBUFSIZE;
                } else {
                        aprint_error_dev(self, "unexpected endpoint\n");
                        goto bad;
                }
        }
        if (ucaa.ucaa_bulkin == -1) {
                aprint_error_dev(self, "Could not find data bulk in\n");
                goto bad;
        }
        if (ucaa.ucaa_bulkout == -1) {
                aprint_error_dev(self, "Could not find data bulk out\n");
                goto bad;
        }

        /* ucaa_bulkin, ucaa_bulkout set above */
        if (ucaa.ucaa_ibufsize == 0)
                ucaa.ucaa_ibufsize = UFTDIIBUFSIZE;
        ucaa.ucaa_ibufsizepad = ucaa.ucaa_ibufsize;
        if (ucaa.ucaa_obufsize == 0)
                ucaa.ucaa_obufsize = UFTDIOBUFSIZE - sc->sc_hdrlen;
        ucaa.ucaa_opkthdrlen = sc->sc_hdrlen;
        ucaa.ucaa_device = dev;
        ucaa.ucaa_iface = iface;
        ucaa.ucaa_methods = &uftdi_methods;
        ucaa.ucaa_arg = sc;
        ucaa.ucaa_info = NULL;

        DPRINTF(("uftdi: in=%#x out=%#x isize=%#x osize=%#x\n",
                ucaa.ucaa_bulkin, ucaa.ucaa_bulkout,
                ucaa.ucaa_ibufsize, ucaa.ucaa_obufsize));
        sc->sc_subdev = config_found(self, &ucaa, ucomprint,
            CFARGS(.submatch = ucomsubmatch));

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        return;

bad:
        DPRINTF(("uftdi_attach: ATTACH ERROR\n"));
        sc->sc_dying = true;
        return;
}

static void
uftdi_childdet(device_t self, device_t child)
{
        struct uftdi_softc *sc = device_private(self);

        KASSERT(child == sc->sc_subdev);
        sc->sc_subdev = NULL;
}

static int
uftdi_detach(device_t self, int flags)
{
        struct uftdi_softc *sc = device_private(self);
        int rv = 0;

        DPRINTF(("uftdi_detach: sc=%p flags=%d\n", sc, flags));

        sc->sc_dying = true;

        if (sc->sc_subdev != NULL) {
                rv = config_detach(sc->sc_subdev, flags);
                sc->sc_subdev = NULL;
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return rv;
}

static int
uftdi_open(void *vsc, int portno)
{
        struct uftdi_softc *sc = vsc;
        usb_device_request_t req;
        usbd_status err;
        struct termios t;

        DPRINTF(("uftdi_open: sc=%p\n", sc));

        if (sc->sc_dying)
                return EIO;

        /* Perform a full reset on the device */
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = FTDI_SIO_RESET;
        USETW(req.wValue, FTDI_SIO_RESET_SIO);
        USETW(req.wIndex, portno);
        USETW(req.wLength, 0);
        err = usbd_do_request(sc->sc_udev, &req, NULL);
        if (err)
                return EIO;

        /* Set 9600 baud, 2 stop bits, no parity, 8 bits */
        t.c_ospeed = 9600;
        t.c_cflag = CSTOPB | CS8;
        (void)uftdi_param(sc, portno, &t);

        /* Turn on RTS/CTS flow control */
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = FTDI_SIO_SET_FLOW_CTRL;
        USETW(req.wValue, 0);
        USETW2(req.wIndex, FTDI_SIO_RTS_CTS_HS, portno);
        USETW(req.wLength, 0);
        err = usbd_do_request(sc->sc_udev, &req, NULL);
        if (err)
                return EIO;

        return 0;
}

static void
uftdi_read(void *vsc, int portno, u_char **ptr, uint32_t *count)
{
        struct uftdi_softc *sc = vsc;
        u_char msr, lsr;

        DPRINTFN(15,("uftdi_read: sc=%p, port=%d count=%d\n", sc, portno,
                     *count));

        msr = FTDI_GET_MSR(*ptr);
        lsr = FTDI_GET_LSR(*ptr);

#ifdef UFTDI_DEBUG
        if (*count != 2)
                DPRINTFN(10,("uftdi_read: sc=%p, port=%d count=%d data[0]="
                            "0x%02x\n", sc, portno, *count, (*ptr)[2]));
#endif

        if (sc->sc_msr != msr ||
            (sc->sc_lsr & FTDI_LSR_MASK) != (lsr & FTDI_LSR_MASK)) {
                DPRINTF(("uftdi_read: status change msr=0x%02x(0x%02x) "
                         "lsr=0x%02x(0x%02x)\n", msr, sc->sc_msr,
                         lsr, sc->sc_lsr));
                sc->sc_msr = msr;
                sc->sc_lsr = lsr;
                ucom_status_change(device_private(sc->sc_subdev));
        }

        /* Adjust buffer pointer to skip status prefix */
        *ptr += 2;
}

static void
uftdi_write(void *vsc, int portno, u_char *to, u_char *from, uint32_t *count)
{
        struct uftdi_softc *sc = vsc;

        DPRINTFN(10,("uftdi_write: sc=%p, port=%d count=%u data[0]=0x%02x\n",
                     vsc, portno, *count, from[0]));

        /* Make length tag and copy data */
        if (sc->sc_hdrlen > 0)
                *to = FTDI_OUT_TAG(*count, portno);

        memcpy(to + sc->sc_hdrlen, from, *count);
        *count += sc->sc_hdrlen;
}

static void
uftdi_set(void *vsc, int portno, int reg, int onoff)
{
        struct uftdi_softc *sc = vsc;
        usb_device_request_t req;
        int ctl;

        DPRINTF(("uftdi_set: sc=%p, port=%d reg=%d onoff=%d\n", vsc, portno,
                 reg, onoff));

        if (sc->sc_dying)
                return;

        switch (reg) {
        case UCOM_SET_DTR:
                ctl = onoff ? FTDI_SIO_SET_DTR_HIGH : FTDI_SIO_SET_DTR_LOW;
                break;
        case UCOM_SET_RTS:
                ctl = onoff ? FTDI_SIO_SET_RTS_HIGH : FTDI_SIO_SET_RTS_LOW;
                break;
        case UCOM_SET_BREAK:
                uftdi_break(sc, portno, onoff);
                return;
        default:
                return;
        }
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = FTDI_SIO_MODEM_CTRL;
        USETW(req.wValue, ctl);
        USETW(req.wIndex, portno);
        USETW(req.wLength, 0);
        DPRINTFN(2,("uftdi_set: reqtype=0x%02x req=0x%02x value=0x%04x "
                    "index=0x%04x len=%d\n", req.bmRequestType, req.bRequest,
                    UGETW(req.wValue), UGETW(req.wIndex), UGETW(req.wLength)));
        (void)usbd_do_request(sc->sc_udev, &req, NULL);
}

/*
 * Return true if the given speed is within operational tolerance of the target
 * speed.  FTDI recommends that the hardware speed be within 3% of nominal.
 */
static inline bool
uftdi_baud_within_tolerance(uint64_t speed, uint64_t target)
{
        return ((speed >= (target * 100) / 103) &&
            (speed <= (target * 100) / 97));
}

static int
uftdi_encode_baudrate(struct uftdi_softc *sc, int speed, int *rate, int *ratehi)
{
        static const uint8_t encoded_fraction[8] = {
            0, 3, 2, 4, 1, 5, 6, 7
        };
        static const uint8_t roundoff_232a[16] = {
            0,  1,  0,  1,  0, -1,  2,  1,
            0, -1, -2, -3,  4,  3,  2,  1,
        };
        uint32_t clk, divisor, fastclk_flag, frac, hwspeed;

        /*
         * If this chip has the fast clock capability and the speed is within
         * range, use the 12MHz clock, otherwise the standard clock is 3MHz.
         */
        if ((sc->sc_flags & FLAGS_BAUDCLK_12M) && speed >= 1200) {
                clk = 12000000;
                fastclk_flag = (1 << 17);
        } else {
                clk = 3000000;
                fastclk_flag = 0;
        }

        /*
         * Make sure the requested speed is reachable with the available clock
         * and a 14-bit divisor.
         */
        if (speed < (clk >> 14) || speed > clk)
                return -1;

        /*
         * Calculate the divisor, initially yielding a fixed point number with a
         * 4-bit (1/16ths) fraction, then round it to the nearest fraction the
         * hardware can handle.  When the integral part of the divisor is
         * greater than one, the fractional part is in 1/8ths of the base clock.
         * The FT8U232AM chips can handle only 0.125, 0.250, and 0.5 fractions.
         * Later chips can handle all 1/8th fractions.
         *
         * If the integral part of the divisor is 1, a special rule applies: the
         * fractional part can only be .0 or .5 (this is a limitation of the
         * hardware).  We handle this by truncating the fraction rather than
         * rounding, because this only applies to the two fastest speeds the
         * chip can achieve and rounding doesn't matter, either you've asked for
         * that exact speed or you've asked for something the chip can't do.
         *
         * For the FT8U232AM chips, use a roundoff table to adjust the result
         * to the nearest 1/8th fraction that is supported by the hardware,
         * leaving a fixed-point number with a 3-bit fraction which exactly
         * represents the math the hardware divider will do.  For later-series
         * chips that support all 8 fractional divisors, just round 16ths to
         * 8ths by adding 1 and dividing by 2.
         */
        divisor = (clk << 4) / speed;
        if ((divisor & 0xf) == 1)
                divisor &= 0xfffffff8;
        else if (sc->sc_flags & FLAGS_ROUNDOFF_232A)
                divisor += roundoff_232a[divisor & 0x0f];
        else
                divisor += 1;  /* Rounds odd 16ths up to next 8th. */
        divisor >>= 1;

        /*
         * Ensure the resulting hardware speed will be within operational
         * tolerance (within 3% of nominal).
         */
        hwspeed = (clk << 3) / divisor;
        if (!uftdi_baud_within_tolerance(hwspeed, speed))
                return -1;

        /*
         * Re-pack the divisor into hardware format. The lower 14-bits hold the
         * integral part, while the upper bits specify the fraction by indexing
         * a table of fractions within the hardware which is laid out as:
         *    {0.0, 0.5, 0.25, 0.125, 0.325, 0.625, 0.725, 0.875}
         * The A-series chips only have the first four table entries; the
         * roundoff table logic above ensures that the fractional part for those
         * chips will be one of the first four values.
         *
         * When the divisor is 1 a special encoding applies:  1.0 is encoded as
         * 0.0, and 1.5 is encoded as 1.0.  The rounding logic above has already
         * ensured that the fraction is either .0 or .5 if the integral is 1.
         */
        frac = divisor & 0x07;
        divisor >>= 3;
        if (divisor == 1) {
                if (frac == 0)
                        divisor = 0;        /* 1.0 becomes 0.0 */
                else
                        frac = 0;        /* 1.5 becomes 1.0 */
        }
        divisor |= (encoded_fraction[frac] << 14) | fastclk_flag;

        *rate = (uint16_t)divisor;
        *ratehi = (uint16_t)(divisor >> 16);

        /*
         * If this chip requires the baud bits to be in the high byte of the
         * index word, move the bits up to that location.
         */
        if (sc->sc_flags & FLAGS_BAUDBITS_HINDEX)
                *ratehi <<= 8;

        return 0;
}

static int
uftdi_param(void *vsc, int portno, struct termios *t)
{
        struct uftdi_softc *sc = vsc;
        usb_device_request_t req;
        usbd_status err;
        int rate, ratehi, rerr, data, flow;

        DPRINTF(("uftdi_param: sc=%p\n", sc));

        if (sc->sc_dying)
                return EIO;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = FTDI_SIO_SET_BITMODE;
        USETW(req.wValue, FTDI_BITMODE_RESET << 8 | 0x00);
        USETW(req.wIndex, portno);
        USETW(req.wLength, 0);
        err = usbd_do_request(sc->sc_udev, &req, NULL);
        if (err)
                return EIO;

        switch (sc->sc_type) {
        case UFTDI_TYPE_SIO:
                switch (t->c_ospeed) {
                case 300: rate = ftdi_sio_b300; break;
                case 600: rate = ftdi_sio_b600; break;
                case 1200: rate = ftdi_sio_b1200; break;
                case 2400: rate = ftdi_sio_b2400; break;
                case 4800: rate = ftdi_sio_b4800; break;
                case 9600: rate = ftdi_sio_b9600; break;
                case 19200: rate = ftdi_sio_b19200; break;
                case 38400: rate = ftdi_sio_b38400; break;
                case 57600: rate = ftdi_sio_b57600; break;
                case 115200: rate = ftdi_sio_b115200; break;
                default:
                        return EINVAL;
                }
                ratehi = 0;
                break;
        case UFTDI_TYPE_8U232AM:
                rerr = uftdi_encode_baudrate(sc, t->c_ospeed, &rate, &ratehi);
                if (rerr != 0)
                        return EINVAL;
                break;
        default:
                return EINVAL;
        }
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = FTDI_SIO_SET_BAUD_RATE;
        USETW(req.wValue, rate);
        USETW(req.wIndex, portno | ratehi);
        USETW(req.wLength, 0);
        DPRINTFN(2,("uftdi_param: reqtype=0x%02x req=0x%02x value=0x%04x "
                    "index=0x%04x len=%d\n", req.bmRequestType, req.bRequest,
                    UGETW(req.wValue), UGETW(req.wIndex), UGETW(req.wLength)));
        err = usbd_do_request(sc->sc_udev, &req, NULL);
        if (err)
                return EIO;

        if (ISSET(t->c_cflag, CSTOPB))
                data = FTDI_SIO_SET_DATA_STOP_BITS_2;
        else
                data = FTDI_SIO_SET_DATA_STOP_BITS_1;
        if (ISSET(t->c_cflag, PARENB)) {
                if (ISSET(t->c_cflag, PARODD))
                        data |= FTDI_SIO_SET_DATA_PARITY_ODD;
                else
                        data |= FTDI_SIO_SET_DATA_PARITY_EVEN;
        } else
                data |= FTDI_SIO_SET_DATA_PARITY_NONE;
        switch (ISSET(t->c_cflag, CSIZE)) {
        case CS5:
                data |= FTDI_SIO_SET_DATA_BITS(5);
                break;
        case CS6:
                data |= FTDI_SIO_SET_DATA_BITS(6);
                break;
        case CS7:
                data |= FTDI_SIO_SET_DATA_BITS(7);
                break;
        case CS8:
                data |= FTDI_SIO_SET_DATA_BITS(8);
                break;
        }
        sc->last_lcr = data;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = FTDI_SIO_SET_DATA;
        USETW(req.wValue, data);
        USETW(req.wIndex, portno);
        USETW(req.wLength, 0);
        DPRINTFN(2,("uftdi_param: reqtype=0x%02x req=0x%02x value=0x%04x "
                    "index=0x%04x len=%d\n", req.bmRequestType, req.bRequest,
                    UGETW(req.wValue), UGETW(req.wIndex), UGETW(req.wLength)));
        err = usbd_do_request(sc->sc_udev, &req, NULL);
        if (err)
                return EIO;

        if (ISSET(t->c_cflag, CRTSCTS)) {
                flow = FTDI_SIO_RTS_CTS_HS;
                USETW(req.wValue, 0);
        } else if (ISSET(t->c_iflag, IXON) && ISSET(t->c_iflag, IXOFF)) {
                flow = FTDI_SIO_XON_XOFF_HS;
                USETW2(req.wValue, t->c_cc[VSTOP], t->c_cc[VSTART]);
        } else {
                flow = FTDI_SIO_DISABLE_FLOW_CTRL;
                USETW(req.wValue, 0);
        }
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = FTDI_SIO_SET_FLOW_CTRL;
        USETW2(req.wIndex, flow, portno);
        USETW(req.wLength, 0);
        err = usbd_do_request(sc->sc_udev, &req, NULL);
        if (err)
                return EIO;

        return 0;
}

static void
uftdi_get_status(void *vsc, int portno, u_char *lsr, u_char *msr)
{
        struct uftdi_softc *sc = vsc;

        DPRINTF(("uftdi_status: msr=0x%02x lsr=0x%02x\n",
                 sc->sc_msr, sc->sc_lsr));

        if (sc->sc_dying)
                return;

        *msr = sc->sc_msr;
        *lsr = sc->sc_lsr;
}

static void
uftdi_break(void *vsc, int portno, int onoff)
{
        struct uftdi_softc *sc = vsc;
        usb_device_request_t req;
        int data;

        DPRINTF(("uftdi_break: sc=%p, port=%d onoff=%d\n", vsc, portno,
                  onoff));

        if (onoff) {
                data = sc->last_lcr | FTDI_SIO_SET_BREAK;
        } else {
                data = sc->last_lcr;
        }

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = FTDI_SIO_SET_DATA;
        USETW(req.wValue, data);
        USETW(req.wIndex, portno);
        USETW(req.wLength, 0);
        (void)usbd_do_request(sc->sc_udev, &req, NULL);
}















































































































  155 


  150 
  157 








  145 










1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/*        $NetBSD: kern_module_hook.c,v 1.4 2019/12/13 08:02:53 skrll Exp $ */

/*-
 * Copyright (c) 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Kernel module support.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_module_hook.c,v 1.4 2019/12/13 08:02:53 skrll Exp $");

#include <sys/param.h>

#include <sys/atomic.h>
#include <sys/condvar.h>
#include <sys/module_hook.h>
#include <sys/mutex.h>
#include <sys/pserialize.h>

#include <uvm/uvm_extern.h>

/* Locking/synchronization stuff for module hooks */

static struct {
        kmutex_t        mtx;
        kcondvar_t        cv;
        pserialize_t        psz;
} module_hook __cacheline_aligned;

/*
 * We use pserialize_perform() to issue a memory barrier on the current
 * CPU and on all other CPUs so that all prior memory operations on the
 * current CPU globally happen before all subsequent memory operations
 * on the current CPU, as perceived by any other CPU.
 *
 * pserialize_perform() might be rather heavy-weight here, but it only
 * happens during module loading, and it allows MODULE_HOOK_CALL() to
 * work without any other memory barriers.
 */

void
module_hook_set(bool *hooked, struct localcount *lc)
{

        KASSERT(kernconfig_is_held());
        KASSERT(!*hooked);

        localcount_init(lc);

        /* Wait until setup has been witnessed by all CPUs.  */
        pserialize_perform(module_hook.psz);

        /* Let others use it */
        atomic_store_relaxed(hooked, true);
}

void
module_hook_unset(bool *hooked, struct localcount *lc)
{

        KASSERT(kernconfig_is_held());
        KASSERT(*hooked);

        /* Get exclusive with pserialize and localcount. */
        mutex_enter(&module_hook.mtx);

        /* Prevent new calls to module_hook_tryenter(). */
        atomic_store_relaxed(hooked, false);

        /* Wait for existing calls to module_hook_tryenter(). */
        pserialize_perform(module_hook.psz);

        /* Wait for module_hook_exit. */
        localcount_drain(lc, &module_hook.cv, &module_hook.mtx);

        /* All done! */
        mutex_exit(&module_hook.mtx);
        localcount_fini(lc);
}

bool
module_hook_tryenter(bool *hooked, struct localcount *lc)
{
        bool call_hook;
        int s;

        s = pserialize_read_enter();
        call_hook = atomic_load_relaxed(hooked);
        if (call_hook)
                localcount_acquire(lc);
        pserialize_read_exit(s);

        return call_hook;
}

void
module_hook_exit(struct localcount *lc)
{

        localcount_release(lc, &module_hook.cv, &module_hook.mtx);
}

void
module_hook_init(void)
{

        mutex_init(&module_hook.mtx, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&module_hook.cv, "mod_hook");
        module_hook.psz = pserialize_create();
}























































    8 
    8 
    7 
    6 




























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/*        $NetBSD: bt_proto.c,v 1.16 2016/01/21 15:41:30 riastradh Exp $        */

/*-
 * Copyright (c) 2005 Iain Hibbert.
 * Copyright (c) 2006 Itronix Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of Itronix Inc. may not be used to endorse
 *    or promote products derived from this software without specific
 *    prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: bt_proto.c,v 1.16 2016/01/21 15:41:30 riastradh Exp $");

#include <sys/param.h>
#include <sys/domain.h>
#include <sys/kernel.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/systm.h>

#include <net/route.h>

#include <netbt/bluetooth.h>
#include <netbt/hci.h>
#include <netbt/l2cap.h>
#include <netbt/rfcomm.h>
#include <netbt/sco.h>

DOMAIN_DEFINE(btdomain);        /* forward declare and add to link set */

static void        bt_init(void);

PR_WRAP_CTLOUTPUT(hci_ctloutput)
PR_WRAP_CTLOUTPUT(sco_ctloutput)
PR_WRAP_CTLOUTPUT(l2cap_ctloutput)
PR_WRAP_CTLOUTPUT(rfcomm_ctloutput)

#define        hci_ctloutput                hci_ctloutput_wrapper
#define        sco_ctloutput                sco_ctloutput_wrapper
#define        l2cap_ctloutput                l2cap_ctloutput_wrapper
#define        rfcomm_ctloutput        rfcomm_ctloutput_wrapper

static const struct protosw btsw[] = {
        { /* raw HCI commands */
                .pr_type = SOCK_RAW,
                .pr_domain = &btdomain,
                .pr_protocol = BTPROTO_HCI,
                .pr_flags = (PR_ADDR | PR_ATOMIC),
                .pr_init = hci_init,
                .pr_ctloutput = hci_ctloutput,
                .pr_usrreqs = &hci_usrreqs,
        },
        { /* HCI SCO data (audio) */
                .pr_type = SOCK_SEQPACKET,
                .pr_domain = &btdomain,
                .pr_protocol = BTPROTO_SCO,
                .pr_flags = (PR_CONNREQUIRED | PR_ATOMIC | PR_LISTEN),
                .pr_ctloutput = sco_ctloutput,
                .pr_usrreqs = &sco_usrreqs,
        },
        { /* L2CAP Connection Oriented */
                .pr_type = SOCK_SEQPACKET,
                .pr_domain = &btdomain,
                .pr_protocol = BTPROTO_L2CAP,
                .pr_flags = (PR_CONNREQUIRED | PR_ATOMIC | PR_LISTEN),
                .pr_ctloutput = l2cap_ctloutput,
                .pr_usrreqs = &l2cap_usrreqs,
                .pr_init = l2cap_init,
        },
        { /* RFCOMM */
                .pr_type = SOCK_STREAM,
                .pr_domain = &btdomain,
                .pr_protocol = BTPROTO_RFCOMM,
                .pr_flags = (PR_CONNREQUIRED | PR_LISTEN | PR_WANTRCVD),
                .pr_ctloutput = rfcomm_ctloutput,
                .pr_usrreqs = &rfcomm_usrreqs,
                .pr_init = rfcomm_init,
        },
};

struct domain btdomain = {
        .dom_family = AF_BLUETOOTH,
        .dom_name = "bluetooth",
        .dom_init = bt_init,
        .dom_protosw = btsw,
        .dom_protoswNPROTOSW = &btsw[__arraycount(btsw)],
};

kmutex_t *bt_lock;

static void
bt_init(void)
{

        bt_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
}










































































































































































































































































































































































    2 
    2 
    2 
    2 

    2 

    2 







    2 

    2 






    2 






    2 













































































    6 







    6 
    5 













    4 




    3 
    3 
    3 


    1 


    3 





    2 



    2 

    2 
    2 



    3 

    2 
    2 

    2 



    2 
    2 

    2 
    2 
    2 



    2 
    2 


    2 



    2 




    4 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
/*        $NetBSD: wskbdutil.c,v 1.19 2017/11/03 19:20:27 maya Exp $        */

/*-
 * Copyright (c) 1997 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Juergen Hannken-Illjes.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: wskbdutil.c,v 1.19 2017/11/03 19:20:27 maya Exp $");

#include <sys/param.h>
#include <sys/errno.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <dev/wscons/wsksymdef.h>
#include <dev/wscons/wsksymvar.h>

static struct compose_tab_s {
        keysym_t elem[2];
        keysym_t result;
} compose_tab[] = {
        { { KS_plus,                        KS_plus },                KS_numbersign },
        { { KS_a,                        KS_a },                        KS_at },
        { { KS_parenleft,                KS_parenleft },                KS_bracketleft },
        { { KS_slash,                        KS_slash },                KS_backslash },
        { { KS_parenright,                KS_parenright },        KS_bracketright },
        { { KS_parenleft,                KS_minus },                KS_braceleft },
        { { KS_slash,                        KS_minus },                KS_bar },
        { { KS_parenright,                KS_minus },                KS_braceright },
        { { KS_exclam,                        KS_exclam },                KS_exclamdown },
        { { KS_c,                        KS_slash },                KS_cent },
        { { KS_l,                        KS_minus },                KS_sterling },
        { { KS_y,                        KS_minus },                KS_yen },
        { { KS_s,                        KS_o },                        KS_section },
        { { KS_x,                        KS_o },                        KS_currency },
        { { KS_c,                        KS_o },                        KS_copyright },
        { { KS_less,                        KS_less },                KS_guillemotleft },
        { { KS_greater,                        KS_greater },                KS_guillemotright },
        { { KS_question,                KS_question },                KS_questiondown },
        { { KS_dead_acute,                KS_space },                KS_acute },
        { { KS_dead_grave,                KS_space },                KS_grave },
        { { KS_dead_tilde,                KS_space },                KS_asciitilde },
        { { KS_dead_circumflex,                KS_space },                KS_asciicircum },
        { { KS_dead_circumflex,                KS_A },                        KS_Acircumflex },
        { { KS_dead_diaeresis,                KS_A },                        KS_Adiaeresis },
        { { KS_dead_grave,                KS_A },                        KS_Agrave },
        { { KS_dead_abovering,                KS_A },                        KS_Aring },
        { { KS_dead_tilde,                KS_A },                        KS_Atilde },
        { { KS_dead_cedilla,                KS_C },                        KS_Ccedilla },
        { { KS_dead_acute,                KS_E },                        KS_Eacute },
        { { KS_dead_circumflex,                KS_E },                        KS_Ecircumflex },
        { { KS_dead_diaeresis,                KS_E },                        KS_Ediaeresis },
        { { KS_dead_grave,                KS_E },                        KS_Egrave },
        { { KS_dead_acute,                KS_I },                        KS_Iacute },
        { { KS_dead_circumflex,                KS_I },                        KS_Icircumflex },
        { { KS_dead_diaeresis,                KS_I },                        KS_Idiaeresis },
        { { KS_dead_grave,                KS_I },                        KS_Igrave },
        { { KS_dead_tilde,                KS_N },                        KS_Ntilde },
        { { KS_dead_acute,                KS_O },                        KS_Oacute },
        { { KS_dead_circumflex,                KS_O },                        KS_Ocircumflex },
        { { KS_dead_diaeresis,                KS_O },                        KS_Odiaeresis },
        { { KS_dead_grave,                KS_O },                        KS_Ograve },
        { { KS_dead_tilde,                KS_O },                        KS_Otilde },
        { { KS_dead_acute,                KS_U },                        KS_Uacute },
        { { KS_dead_circumflex,                KS_U },                        KS_Ucircumflex },
        { { KS_dead_diaeresis,                KS_U },                        KS_Udiaeresis },
        { { KS_dead_grave,                KS_U },                        KS_Ugrave },
        { { KS_dead_acute,                KS_Y },                        KS_Yacute },
        { { KS_dead_acute,                KS_a },                        KS_aacute },
        { { KS_dead_circumflex,                KS_a },                        KS_acircumflex },
        { { KS_dead_diaeresis,                KS_a },                        KS_adiaeresis },
        { { KS_dead_grave,                KS_a },                        KS_agrave },
        { { KS_dead_abovering,                KS_a },                        KS_aring },
        { { KS_dead_tilde,                KS_a },                        KS_atilde },
        { { KS_dead_cedilla,                KS_c },                        KS_ccedilla },
        { { KS_dead_acute,                KS_e },                        KS_eacute },
        { { KS_dead_circumflex,                KS_e },                        KS_ecircumflex },
        { { KS_dead_diaeresis,                KS_e },                        KS_ediaeresis },
        { { KS_dead_grave,                KS_e },                        KS_egrave },
        { { KS_dead_acute,                KS_i },                        KS_iacute },
        { { KS_dead_circumflex,                KS_i },                        KS_icircumflex },
        { { KS_dead_diaeresis,                KS_i },                        KS_idiaeresis },
        { { KS_dead_grave,                KS_i },                        KS_igrave },
        { { KS_dead_tilde,                KS_n },                        KS_ntilde },
        { { KS_dead_acute,                KS_o },                        KS_oacute },
        { { KS_dead_circumflex,                KS_o },                        KS_ocircumflex },
        { { KS_dead_diaeresis,                KS_o },                        KS_odiaeresis },
        { { KS_dead_grave,                KS_o },                        KS_ograve },
        { { KS_dead_tilde,                KS_o },                        KS_otilde },
        { { KS_dead_acute,                KS_u },                        KS_uacute },
        { { KS_dead_circumflex,                KS_u },                        KS_ucircumflex },
        { { KS_dead_diaeresis,                KS_u },                        KS_udiaeresis },
        { { KS_dead_grave,                KS_u },                        KS_ugrave },
        { { KS_dead_acute,                KS_y },                        KS_yacute },
        { { KS_dead_diaeresis,                KS_y },                        KS_ydiaeresis },
        { { KS_quotedbl,                KS_A },                        KS_Adiaeresis },
        { { KS_quotedbl,                KS_E },                        KS_Ediaeresis },
        { { KS_quotedbl,                KS_I },                        KS_Idiaeresis },
        { { KS_quotedbl,                KS_O },                        KS_Odiaeresis },
        { { KS_quotedbl,                KS_U },                        KS_Udiaeresis },
        { { KS_quotedbl,                KS_a },                        KS_adiaeresis },
        { { KS_quotedbl,                KS_e },                        KS_ediaeresis },
        { { KS_quotedbl,                KS_i },                        KS_idiaeresis },
        { { KS_quotedbl,                KS_o },                        KS_odiaeresis },
        { { KS_quotedbl,                KS_u },                        KS_udiaeresis },
        { { KS_quotedbl,                KS_y },                        KS_ydiaeresis },
        { { KS_acute,                        KS_A },                        KS_Aacute },
        { { KS_asciicircum,                KS_A },                        KS_Acircumflex },
        { { KS_grave,                        KS_A },                        KS_Agrave },
        { { KS_asterisk,                KS_A },                        KS_Aring },
        { { KS_asciitilde,                KS_A },                        KS_Atilde },
        { { KS_cedilla,                        KS_C },                        KS_Ccedilla },
        { { KS_acute,                        KS_E },                        KS_Eacute },
        { { KS_asciicircum,                KS_E },                        KS_Ecircumflex },
        { { KS_grave,                        KS_E },                        KS_Egrave },
        { { KS_acute,                        KS_I },                        KS_Iacute },
        { { KS_asciicircum,                KS_I },                        KS_Icircumflex },
        { { KS_grave,                        KS_I },                        KS_Igrave },
        { { KS_asciitilde,                KS_N },                        KS_Ntilde },
        { { KS_acute,                        KS_O },                        KS_Oacute },
        { { KS_asciicircum,                KS_O },                        KS_Ocircumflex },
        { { KS_grave,                        KS_O },                        KS_Ograve },
        { { KS_asciitilde,                KS_O },                        KS_Otilde },
        { { KS_acute,                        KS_U },                        KS_Uacute },
        { { KS_asciicircum,                KS_U },                        KS_Ucircumflex },
        { { KS_grave,                        KS_U },                        KS_Ugrave },
        { { KS_acute,                        KS_Y },                        KS_Yacute },
        { { KS_acute,                        KS_a },                        KS_aacute },
        { { KS_asciicircum,                KS_a },                        KS_acircumflex },
        { { KS_grave,                        KS_a },                        KS_agrave },
        { { KS_asterisk,                KS_a },                        KS_aring },
        { { KS_asciitilde,                KS_a },                        KS_atilde },
        { { KS_cedilla,                        KS_c },                        KS_ccedilla },
        { { KS_acute,                        KS_e },                        KS_eacute },
        { { KS_asciicircum,                KS_e },                        KS_ecircumflex },
        { { KS_grave,                        KS_e },                        KS_egrave },
        { { KS_acute,                        KS_i },                        KS_iacute },
        { { KS_asciicircum,                KS_i },                        KS_icircumflex },
        { { KS_grave,                        KS_i },                        KS_igrave },
        { { KS_asciitilde,                KS_n },                        KS_ntilde },
        { { KS_acute,                        KS_o },                        KS_oacute },
        { { KS_asciicircum,                KS_o },                        KS_ocircumflex },
        { { KS_grave,                        KS_o },                        KS_ograve },
        { { KS_asciitilde,                KS_o },                        KS_otilde },
        { { KS_acute,                        KS_u },                        KS_uacute },
        { { KS_asciicircum,                KS_u },                        KS_ucircumflex },
        { { KS_grave,                        KS_u },                        KS_ugrave },
        { { KS_acute,                        KS_y },                        KS_yacute },
        { { KS_dead_semi,                KS_gr_A },                KS_gr_At  },
        { { KS_dead_semi,                KS_gr_E },                KS_gr_Et  },
        { { KS_dead_semi,                KS_gr_H },                KS_gr_Ht  },
        { { KS_dead_semi,                KS_gr_I },                KS_gr_It  },
        { { KS_dead_semi,                KS_gr_O },                KS_gr_Ot  },
        { { KS_dead_semi,                KS_gr_Y },                KS_gr_Yt  },
        { { KS_dead_semi,                KS_gr_V },                KS_gr_Vt  },
        { { KS_dead_colon,                KS_gr_I },                KS_gr_Id  },
        { { KS_dead_colon,                KS_gr_Y },                KS_gr_Yd  },
        { { KS_dead_semi,                KS_gr_a },                KS_gr_at  },
        { { KS_dead_semi,                KS_gr_e },                KS_gr_et  },
        { { KS_dead_semi,                KS_gr_h },                KS_gr_ht  },
        { { KS_dead_semi,                KS_gr_i },                KS_gr_it  },
        { { KS_dead_semi,                KS_gr_o },                KS_gr_ot  },
        { { KS_dead_semi,                KS_gr_y },                KS_gr_yt  },
        { { KS_dead_semi,                KS_gr_v },                KS_gr_vt  },
        { { KS_dead_colon,                KS_gr_i },                KS_gr_id  },
        { { KS_dead_colon,                KS_gr_y },                KS_gr_yd  },

        /* Latin 2*/

        { { KS_dead_acute,                KS_S },                        KS_Sacute },
        { { KS_dead_acute,                KS_Z },                        KS_Zacute },
        { { KS_dead_acute,                KS_s },                        KS_sacute },
        { { KS_dead_acute,                KS_z },                        KS_zacute },
        { { KS_dead_acute,                KS_R },                        KS_Racute },
        { { KS_dead_acute,                KS_A },                        KS_Aacute },
        { { KS_dead_acute,                KS_L },                        KS_Lacute },
        { { KS_dead_acute,                KS_C },                        KS_Cacute },
        { { KS_dead_acute,                KS_E },                        KS_Eacute },
        { { KS_dead_acute,                KS_I },                        KS_Iacute },
        { { KS_dead_acute,                KS_N },                        KS_Nacute },
        { { KS_dead_acute,                KS_O },                        KS_Oacute },
        { { KS_dead_acute,                KS_U },                        KS_Uacute },
        { { KS_dead_acute,                KS_Y },                        KS_Yacute },
        { { KS_dead_acute,                KS_r },                        KS_racute },
        { { KS_dead_acute,                KS_a },                        KS_aacute },
        { { KS_dead_acute,                KS_l },                        KS_lacute },
        { { KS_dead_acute,                KS_c },                        KS_cacute },
        { { KS_dead_acute,                KS_e },                        KS_eacute },
        { { KS_dead_acute,                KS_i },                        KS_iacute },
        { { KS_dead_acute,                KS_n },                        KS_nacute },
        { { KS_dead_acute,                KS_o },                        KS_oacute },
        { { KS_dead_acute,                KS_u },                        KS_uacute },
        { { KS_dead_acute,                KS_y },                        KS_yacute },
        { { KS_dead_breve,                KS_A },                        KS_Abreve },
        { { KS_dead_breve,                KS_a },                        KS_abreve },
        { { KS_dead_caron,                KS_L },                        KS_Lcaron },
        { { KS_dead_caron,                KS_S },                        KS_Scaron },
        { { KS_dead_caron,                KS_T },                        KS_Tcaron },
        { { KS_dead_caron,                KS_Z },                        KS_Zcaron },
        { { KS_dead_caron,                KS_l },                        KS_lcaron },
        { { KS_dead_caron,                KS_s },                        KS_scaron },
        { { KS_dead_caron,                KS_t },                        KS_tcaron },
        { { KS_dead_caron,                KS_z },                        KS_zcaron },
        { { KS_dead_caron,                KS_C },                        KS_Ccaron },
        { { KS_dead_caron,                KS_E },                        KS_Ecaron },
        { { KS_dead_caron,                KS_D },                        KS_Dcaron },
        { { KS_dead_caron,                KS_N },                        KS_Ncaron },
        { { KS_dead_caron,                KS_R },                        KS_Rcaron },
        { { KS_dead_caron,                KS_c },                        KS_ccaron },
        { { KS_dead_caron,                KS_e },                        KS_ecaron },
        { { KS_dead_caron,                KS_d },                        KS_dcaron },
        { { KS_dead_caron,                KS_n },                        KS_ncaron },
        { { KS_dead_caron,                KS_r },                        KS_rcaron },
        { { KS_dead_cedilla,                KS_S },                        KS_Scedilla },
        { { KS_dead_cedilla,                KS_s },                        KS_scedilla },
        { { KS_dead_cedilla,                KS_C },                        KS_Ccedilla },
        { { KS_dead_cedilla,                KS_T },                        KS_Tcedilla },
        { { KS_dead_cedilla,                KS_c },                        KS_ccedilla },
        { { KS_dead_cedilla,                KS_t },                        KS_tcedilla },
        { { KS_dead_circumflex,                KS_A },                        KS_Acircumflex },
        { { KS_dead_circumflex,                KS_I },                        KS_Icircumflex },
        { { KS_dead_circumflex,                KS_O },                        KS_Ocircumflex },
        { { KS_dead_circumflex,                KS_a },                        KS_acircumflex },
        { { KS_dead_circumflex,                KS_i },                        KS_icircumflex },
        { { KS_dead_circumflex,                KS_o },                        KS_ocircumflex },
        { { KS_dead_diaeresis,                KS_A },                        KS_Adiaeresis },
        { { KS_dead_diaeresis,                KS_E },                        KS_Ediaeresis },
        { { KS_dead_diaeresis,                KS_O },                        KS_Odiaeresis },
        { { KS_dead_diaeresis,                KS_U },                        KS_Udiaeresis },
        { { KS_dead_diaeresis,                KS_a },                        KS_adiaeresis },
        { { KS_dead_diaeresis,                KS_e },                        KS_ediaeresis },
        { { KS_dead_diaeresis,                KS_o },                        KS_odiaeresis },
        { { KS_dead_diaeresis,                KS_u },                        KS_udiaeresis },
        { { KS_dead_dotaccent,                KS_Z },                        KS_Zabovedot },
        { { KS_dead_dotaccent,                KS_z },                        KS_zabovedot },
        { { KS_dead_hungarumlaut,        KS_O },                        KS_Odoubleacute },
        { { KS_dead_hungarumlaut,        KS_U },                        KS_Udoubleacute },
        { { KS_dead_hungarumlaut,        KS_o },                        KS_odoubleacute },
        { { KS_dead_hungarumlaut,        KS_u },                        KS_udoubleacute },
        { { KS_dead_ogonek,                KS_A },                        KS_Aogonek },
        { { KS_dead_ogonek,                KS_a },                        KS_aogonek },
        { { KS_dead_ogonek,                KS_E },                        KS_Eogonek },
        { { KS_dead_ogonek,                KS_e },                        KS_eogonek },
        { { KS_dead_abovering,                KS_U },                        KS_Uabovering },
        { { KS_dead_abovering,                KS_u },                        KS_uabovering },
        { { KS_dead_slash,                KS_L },                        KS_Lstroke },
        { { KS_dead_slash,                KS_l },                        KS_lstroke }
};

#define COMPOSE_SIZE        __arraycount(compose_tab)

static int compose_tab_inorder = 0;

static inline int compose_tab_cmp(struct compose_tab_s *, struct compose_tab_s *);
static keysym_t ksym_upcase(keysym_t);
static void fillmapentry(const keysym_t *, int, struct wscons_keymap *);

static inline int
compose_tab_cmp(struct compose_tab_s *i, struct compose_tab_s *j)
{
        if (i->elem[0] == j->elem[0])
                return(i->elem[1] - j->elem[1]);
        else
                return(i->elem[0] - j->elem[0]);
}

keysym_t
wskbd_compose_value(keysym_t *compose_buf)
{
        int i, j, r;
        struct compose_tab_s v;

        if (! compose_tab_inorder) {
                /* Insertion sort. */
                for (i = 1; i < COMPOSE_SIZE; i++) {
                        v = compose_tab[i];
                        /* find correct slot, moving others up */
                        for (j = i; --j >= 0 && compose_tab_cmp(& v, & compose_tab[j]) < 0; )
                                compose_tab[j + 1] = compose_tab[j];
                        compose_tab[j + 1] = v;
                }
                compose_tab_inorder = 1;
        }

        for (j = 0, i = COMPOSE_SIZE; i != 0; i /= 2) {
                if (compose_tab[j + i/2].elem[0] == compose_buf[0]) {
                        if (compose_tab[j + i/2].elem[1] == compose_buf[1])
                                return(compose_tab[j + i/2].result);
                        r = compose_tab[j + i/2].elem[1] < compose_buf[1];
                } else
                        r = compose_tab[j + i/2].elem[0] < compose_buf[0];
                if (r) {
                        j += i/2 + 1;
                        i--;
                }
        }

        return(KS_voidSymbol);
}

static const u_char latin1_to_upper[256] = {
/*      0  8  1  9  2  a  3  b  4  c  5  d  6  e  7  f               */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* 0 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* 0 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* 1 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* 1 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* 2 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* 2 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* 3 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* 3 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* 4 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* 4 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* 5 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* 5 */
        0x00,  'A',  'B',  'C',  'D',  'E',  'F',  'G',                /* 6 */
         'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',                /* 6 */
         'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',                /* 7 */
         'X',  'Y',  'Z', 0x00, 0x00, 0x00, 0x00, 0x00,                /* 7 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* 8 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* 8 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* 9 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* 9 */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* a */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* a */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* b */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* b */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* c */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* c */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* d */
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,                /* d */
        0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,                /* e */
        0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,                /* e */
        0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0x00,                /* f */
        0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0x00,                /* f */
};

static keysym_t
ksym_upcase(keysym_t ksym)
{
        if (ksym >= KS_f1 && ksym <= KS_f20)
                return(KS_F1 - KS_f1 + ksym);

        if (KS_GROUP(ksym) == KS_GROUP_Plain && ksym <= 0xff &&
            latin1_to_upper[ksym] != 0x00)
                return(latin1_to_upper[ksym]);

        return(ksym);
}

static void
fillmapentry(const keysym_t *kp, int len, struct wscons_keymap *mapentry)
{
        switch (len) {
        case 0:
                mapentry->group1[0] = KS_voidSymbol;
                mapentry->group1[1] = KS_voidSymbol;
                mapentry->group2[0] = KS_voidSymbol;
                mapentry->group2[1] = KS_voidSymbol;
                break;

        case 1:
                mapentry->group1[0] = kp[0];
                mapentry->group1[1] = ksym_upcase(kp[0]);
                mapentry->group2[0] = mapentry->group1[0];
                mapentry->group2[1] = mapentry->group1[1];
                break;

        case 2:
                mapentry->group1[0] = kp[0];
                mapentry->group1[1] = kp[1];
                mapentry->group2[0] = mapentry->group1[0];
                mapentry->group2[1] = mapentry->group1[1];
                break;

        case 3:
                mapentry->group1[0] = kp[0];
                mapentry->group1[1] = kp[1];
                mapentry->group2[0] = kp[2];
                mapentry->group2[1] = ksym_upcase(kp[2]);
                break;

        case 4:
                mapentry->group1[0] = kp[0];
                mapentry->group1[1] = kp[1];
                mapentry->group2[0] = kp[2];
                mapentry->group2[1] = kp[3];
                break;

        }
}

void
wskbd_get_mapentry(const struct wskbd_mapdata *mapdata, int kc,
        struct wscons_keymap *mapentry)
{
        kbd_t cur;
        const keysym_t *kp;
        const struct wscons_keydesc *mp;
        int l;

        mapentry->command = KS_voidSymbol;
        mapentry->group1[0] = KS_voidSymbol;
        mapentry->group1[1] = KS_voidSymbol;
        mapentry->group2[0] = KS_voidSymbol;
        mapentry->group2[1] = KS_voidSymbol;

        for (cur = mapdata->layout & ~KB_HANDLEDBYWSKBD; cur != 0; ) {
                mp = mapdata->keydesc;
                while (mp->map_size > 0) {
                        if (mp->name == cur)
                                break;
                        mp++;
                }

                /* If map not found, return */
                if (mp->map_size <= 0)
                        return;

                for (kp = mp->map; kp < mp->map + mp->map_size; kp++)
                        if (KS_GROUP(*kp) == KS_GROUP_Keycode &&
                            KS_VALUE(*kp) == kc) {
                                /* First skip keycode and possible command */
                                kp++;
                                if (KS_GROUP(*kp) == KS_GROUP_Command ||
                                    *kp == KS_Cmd || *kp == KS_Cmd1 || *kp == KS_Cmd2)
                                        mapentry->command = *kp++;

                                for (l = 0; kp + l < mp->map + mp->map_size; l++)
                                        if (KS_GROUP(kp[l]) == KS_GROUP_Keycode)
                                                break;
                                if (l > 4)
                                        panic("wskbd_get_mapentry: %d(%d): bad entry",
                                              mp->name, *kp);
                                fillmapentry(kp, l, mapentry);
                                return;
                        }

                cur = mp->base;
        }
}

void
wskbd_init_keymap(int newlen, struct wscons_keymap **map, int *maplen)
{
        int i;

        if (newlen != *maplen) {
                if (*maplen > 0)
                        free(*map, M_TEMP);
                *maplen = newlen;
                *map = malloc(newlen*sizeof(struct wscons_keymap),
                              M_TEMP, M_WAITOK);
        }

        for (i = 0; i < *maplen; i++) {
                (*map)[i].command = KS_voidSymbol;
                (*map)[i].group1[0] = KS_voidSymbol;
                (*map)[i].group1[1] = KS_voidSymbol;
                (*map)[i].group2[0] = KS_voidSymbol;
                (*map)[i].group2[1] = KS_voidSymbol;
        }
}

int
wskbd_load_keymap(const struct wskbd_mapdata *mapdata,
        struct wscons_keymap **map, int *maplen)
{
        int i, s, kc, stack_ptr;
        const keysym_t *kp;
        const struct wscons_keydesc *mp, *stack[10];
        kbd_t cur;

        for (cur = mapdata->layout & ~KB_HANDLEDBYWSKBD, stack_ptr = 0;
             cur != 0; stack_ptr++) {
                mp = mapdata->keydesc;
                while (mp->map_size > 0) {
                        if (cur == 0 || mp->name == cur) {
                                break;
                        }
                        mp++;
                }

                if (stack_ptr == __arraycount(stack))
                        panic("wskbd_load_keymap: %d: recursion too deep",
                              mapdata->layout);
                if (mp->map_size <= 0)
                        return(EINVAL);

                stack[stack_ptr] = mp;
                cur = mp->base;
        }

        for (i = 0, s = stack_ptr - 1; s >= 0; s--) {
                mp = stack[s];
                for (kp = mp->map; kp < mp->map + mp->map_size; kp++)
                        if (KS_GROUP(*kp) == KS_GROUP_Keycode && KS_VALUE(*kp) > i)
                                i = KS_VALUE(*kp);
        }

        wskbd_init_keymap(i + 1, map, maplen);

        for (s = stack_ptr - 1; s >= 0; s--) {
                mp = stack[s];
                for (kp = mp->map; kp < mp->map + mp->map_size; ) {
                        if (KS_GROUP(*kp) != KS_GROUP_Keycode)
                                panic("wskbd_load_keymap: %d(%d): bad entry",
                                      mp->name, *kp);

                        kc = KS_VALUE(*kp);
                        kp++;

                        if (KS_GROUP(*kp) == KS_GROUP_Command ||
                            *kp == KS_Cmd || *kp == KS_Cmd1 || *kp == KS_Cmd2) {
                                (*map)[kc].command = *kp;
                                kp++;
                        }

                        for (i = 0; kp + i < mp->map + mp->map_size; i++)
                                if (KS_GROUP(kp[i]) == KS_GROUP_Keycode)
                                        break;

                        if (i > 4)
                                panic("wskbd_load_keymap: %d(%d): bad entry",
                                      mp->name, *kp);

                        fillmapentry(kp, i, &(*map)[kc]);
                        kp += i;
                }
        }

        return(0);
}

































































































































































    1 

    1 







    1 






    1 












































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
/*        $NetBSD: pmap_pvt.c,v 1.15 2022/05/08 22:03:02 rin Exp $        */

/*-
 * Copyright (c) 2014, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__RCSID("$NetBSD: pmap_pvt.c,v 1.15 2022/05/08 22:03:02 rin Exp $");

#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/kmem.h>
#include <sys/pserialize.h>

#include <uvm/uvm.h>
#include <uvm/pmap/pmap_pvt.h>

#if !defined(PMAP_PV_TRACK_ONLY_STUBS)
/*
 * unmanaged pv-tracked ranges
 *
 * This is a linear list for now because the only user are the DRM
 * graphics drivers, with a single tracked range per device, for the
 * graphics aperture, so there are expected to be few of them.
 *
 * This is used only after the VM system is initialized well enough
 * that we can use kmem_alloc.
 */

struct pv_track {
        paddr_t                        pvt_start;
        psize_t                        pvt_size;
        struct pv_track                *pvt_next;
        struct pmap_page        pvt_pages[];
};

static struct {
        kmutex_t        lock;
        pserialize_t        psz;
        struct pv_track        *list;
} pv_unmanaged __cacheline_aligned;

void
pmap_pv_init(void)
{

        mutex_init(&pv_unmanaged.lock, MUTEX_DEFAULT, IPL_NONE);
        pv_unmanaged.psz = pserialize_create();
        pv_unmanaged.list = NULL;
}

void
pmap_pv_track(paddr_t start, psize_t size)
{
        struct pv_track *pvt;
        size_t npages;

        KASSERT(start == trunc_page(start));
        KASSERT(size == trunc_page(size));

        /* We may sleep for allocation.  */
        ASSERT_SLEEPABLE();

        npages = size >> PAGE_SHIFT;
        pvt = kmem_zalloc(offsetof(struct pv_track, pvt_pages[npages]),
            KM_SLEEP);
        pvt->pvt_start = start;
        pvt->pvt_size = size;

#ifdef PMAP_PAGE_INIT
        for (size_t i = 0; i < npages; i++)
                PMAP_PAGE_INIT(&pvt->pvt_pages[i]);
#endif

        mutex_enter(&pv_unmanaged.lock);
        pvt->pvt_next = pv_unmanaged.list;
        atomic_store_release(&pv_unmanaged.list, pvt);
        mutex_exit(&pv_unmanaged.lock);
}

void
pmap_pv_untrack(paddr_t start, psize_t size)
{
        struct pv_track **pvtp, *pvt;
        size_t npages;

        KASSERT(start == trunc_page(start));
        KASSERT(size == trunc_page(size));

        /* We may sleep for pserialize_perform.  */
        ASSERT_SLEEPABLE();

        mutex_enter(&pv_unmanaged.lock);
        for (pvtp = &pv_unmanaged.list;
             (pvt = *pvtp) != NULL;
             pvtp = &pvt->pvt_next) {
                if (pvt->pvt_start != start)
                        continue;
                if (pvt->pvt_size != size)
                        panic("pmap_pv_untrack: pv-tracking at 0x%"PRIxPADDR
                            ": 0x%"PRIxPSIZE" bytes, not 0x%"PRIxPSIZE" bytes",
                            pvt->pvt_start, pvt->pvt_size, size);

                /*
                 * Remove from list.  Readers can safely see the old
                 * and new states of the list.
                 */
                atomic_store_relaxed(pvtp, pvt->pvt_next);

                /* Wait for readers who can see the old state to finish.  */
                pserialize_perform(pv_unmanaged.psz);

                /*
                 * We now have exclusive access to pvt and can destroy
                 * it.  Poison it to catch bugs.
                 */
                explicit_memset(&pvt->pvt_next, 0x1a, sizeof pvt->pvt_next);
                goto out;
        }
        panic("pmap_pv_untrack: pages not pv-tracked at 0x%"PRIxPADDR
            " (0x%"PRIxPSIZE" bytes)",
            start, size);
out:        mutex_exit(&pv_unmanaged.lock);

        npages = size >> PAGE_SHIFT;
        kmem_free(pvt, offsetof(struct pv_track, pvt_pages[npages]));
}

struct pmap_page *
pmap_pv_tracked(paddr_t pa)
{
        struct pv_track *pvt;
        size_t pgno;
        int s;

        KASSERT(pa == trunc_page(pa));

        s = pserialize_read_enter();
        for (pvt = atomic_load_consume(&pv_unmanaged.list);
             pvt != NULL;
             pvt = pvt->pvt_next) {
                if ((pvt->pvt_start <= pa) &&
                    ((pa - pvt->pvt_start) < pvt->pvt_size))
                        break;
        }
        pserialize_read_exit(s);

        if (pvt == NULL)
                return NULL;
        KASSERT(pvt->pvt_start <= pa);
        KASSERT((pa - pvt->pvt_start) < pvt->pvt_size);
        pgno = (pa - pvt->pvt_start) >> PAGE_SHIFT;
        return &pvt->pvt_pages[pgno];
}

#else /* PMAP_PV_TRACK_ONLY_STUBS */
/*
 * Provide empty stubs just for MODULAR kernels.
 */

void
pmap_pv_init(void)
{

}

struct pmap_page *
pmap_pv_tracked(paddr_t pa)
{

        return NULL;
}

#if notdef
/*
 * pmap_pv_{,un}track() are intentionally commented out. If modules
 * call these functions, the result should be an inconsistent state.
 *
 * Such modules require real PV-tracking support. Let us make the
 * two symbols undefined, and prevent these modules from loaded.
 */
void
pmap_pv_track(paddr_t start, psize_t size)
{

        panic("PV-tracking not supported");
}

void
pmap_pv_untrack(paddr_t start, psize_t size)
{

        panic("PV-tracking not supported");
}
#endif /* notdef */

#endif /* PMAP_PV_TRACK_ONLY_STUBS */












































































































































































































    8 




















    8 















































































    8 








































    8 






































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
/*        $NetBSD: chacha_sse2.c,v 1.2 2020/07/27 20:48:18 riastradh Exp $        */

/*-
 * Copyright (c) 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/types.h>
#include <sys/endian.h>

#include "immintrin.h"

#include "chacha_sse2.h"

static inline __m128i
rol32(__m128i x, uint8_t n)
{

        return _mm_slli_epi32(x, n) | _mm_srli_epi32(x, 32 - n);
}

static inline void
chacha_permute(__m128i *p0, __m128i *p1, __m128i *p2, __m128i *p3,
    unsigned nr)
{
        __m128i r0, r1, r2, r3;
        __m128i c0, c1, c2, c3;

        r0 = *p0;
        r1 = *p1;
        r2 = *p2;
        r3 = *p3;

        for (; nr > 0; nr -= 2) {
                r0 = _mm_add_epi32(r0, r1); r3 ^= r0; r3 = rol32(r3, 16);
                r2 = _mm_add_epi32(r2, r3); r1 ^= r2; r1 = rol32(r1, 12);
                r0 = _mm_add_epi32(r0, r1); r3 ^= r0; r3 = rol32(r3, 8);
                r2 = _mm_add_epi32(r2, r3); r1 ^= r2; r1 = rol32(r1, 7);

                c0 = r0;
                c1 = _mm_shuffle_epi32(r1, 0x39);
                c2 = _mm_shuffle_epi32(r2, 0x4e);
                c3 = _mm_shuffle_epi32(r3, 0x93);

                c0 = _mm_add_epi32(c0, c1); c3 ^= c0; c3 = rol32(c3, 16);
                c2 = _mm_add_epi32(c2, c3); c1 ^= c2; c1 = rol32(c1, 12);
                c0 = _mm_add_epi32(c0, c1); c3 ^= c0; c3 = rol32(c3, 8);
                c2 = _mm_add_epi32(c2, c3); c1 ^= c2; c1 = rol32(c1, 7);

                r0 = c0;
                r1 = _mm_shuffle_epi32(c1, 0x93);
                r2 = _mm_shuffle_epi32(c2, 0x4e);
                r3 = _mm_shuffle_epi32(c3, 0x39);
        }

        *p0 = r0;
        *p1 = r1;
        *p2 = r2;
        *p3 = r3;
}

void
chacha_core_sse2(uint8_t out[restrict static 64],
    const uint8_t in[static 16],
    const uint8_t k[static 32],
    const uint8_t c[static 16],
    unsigned nr)
{
        __m128i in0, in1, in2, in3;
        __m128i r0, r1, r2, r3;

        r0 = in0 = _mm_loadu_si128((const __m128i *)c);
        r1 = in1 = _mm_loadu_si128((const __m128i *)k);
        r2 = in2 = _mm_loadu_si128((const __m128i *)k + 1);
        r3 = in3 = _mm_loadu_si128((const __m128i *)in);

        chacha_permute(&r0, &r1, &r2, &r3, nr);

        _mm_storeu_si128((__m128i *)out + 0, _mm_add_epi32(r0, in0));
        _mm_storeu_si128((__m128i *)out + 1, _mm_add_epi32(r1, in1));
        _mm_storeu_si128((__m128i *)out + 2, _mm_add_epi32(r2, in2));
        _mm_storeu_si128((__m128i *)out + 3, _mm_add_epi32(r3, in3));
}

void
hchacha_sse2(uint8_t out[restrict static 32],
    const uint8_t in[static 16],
    const uint8_t k[static 32],
    const uint8_t c[static 16],
    unsigned nr)
{
        __m128i r0, r1, r2, r3;

        r0 = _mm_loadu_si128((const __m128i *)c);
        r1 = _mm_loadu_si128((const __m128i *)k);
        r2 = _mm_loadu_si128((const __m128i *)k + 1);
        r3 = _mm_loadu_si128((const __m128i *)in);

        chacha_permute(&r0, &r1, &r2, &r3, nr);

        _mm_storeu_si128((__m128i *)out + 0, r0);
        _mm_storeu_si128((__m128i *)out + 1, r3);
}

#define        CHACHA_QUARTERROUND(a, b, c, d) do                                      \
{                                                                              \
        (a) = _mm_add_epi32((a), (b)); (d) ^= a; (d) = rol32((d), 16);              \
        (c) = _mm_add_epi32((c), (d)); (b) ^= c; (b) = rol32((b), 12);              \
        (a) = _mm_add_epi32((a), (b)); (d) ^= a; (d) = rol32((d), 8);              \
        (c) = _mm_add_epi32((c), (d)); (b) ^= c; (b) = rol32((b), 7);              \
} while (/*CONSTCOND*/0)

static inline __m128i
load1_epi32(const void *p)
{
        return (__m128i)_mm_load1_ps(p);
}

static inline __m128i
loadu_epi32(const void *p)
{
        return _mm_loadu_si128(p);
}

static inline void
storeu_epi32(void *p, __m128i v)
{
        return _mm_storeu_si128(p, v);
}

static inline __m128i
unpack0_epi32(__m128i a, __m128i b, __m128i c, __m128i d)
{
        __m128 lo = (__m128)_mm_unpacklo_epi32(a, b); /* (a[0], b[0], ...) */
        __m128 hi = (__m128)_mm_unpacklo_epi32(c, d); /* (c[0], d[0], ...) */

        /* (lo[0]=a[0], lo[1]=b[0], hi[0]=c[0], hi[1]=d[0]) */
        return (__m128i)_mm_movelh_ps(lo, hi);
}

static inline __m128i
unpack1_epi32(__m128i a, __m128i b, __m128i c, __m128i d)
{
        __m128 lo = (__m128)_mm_unpacklo_epi32(a, b); /* (..., a[1], b[1]) */
        __m128 hi = (__m128)_mm_unpacklo_epi32(c, d); /* (..., c[1], d[1]) */

        /* (lo[2]=a[1], lo[3]=b[1], hi[2]=c[1], hi[3]=d[1]) */
        return (__m128i)_mm_movehl_ps(hi, lo);
}

static inline __m128i
unpack2_epi32(__m128i a, __m128i b, __m128i c, __m128i d)
{
        __m128 lo = (__m128)_mm_unpackhi_epi32(a, b); /* (a[2], b[2], ...) */
        __m128 hi = (__m128)_mm_unpackhi_epi32(c, d); /* (c[2], d[2], ...) */

        /* (lo[0]=a[2], lo[1]=b[2], hi[0]=c[2], hi[1]=d[2]) */
        return (__m128i)_mm_movelh_ps(lo, hi);
}

static inline __m128i
unpack3_epi32(__m128i a, __m128i b, __m128i c, __m128i d)
{
        __m128 lo = (__m128)_mm_unpackhi_epi32(a, b); /* (..., a[3], b[3]) */
        __m128 hi = (__m128)_mm_unpackhi_epi32(c, d); /* (..., c[3], d[3]) */

        /* (lo[2]=a[3], lo[3]=b[3], hi[2]=c[3], hi[3]=d[3]) */
        return (__m128i)_mm_movehl_ps(hi, lo);
}

void
chacha_stream_sse2(uint8_t *restrict s, size_t n,
    uint32_t blkno,
    const uint8_t nonce[static 12],
    const uint8_t k[static 32],
    unsigned nr)
{
        __m128i x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
        __m128i y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
        __m128i z0,z1,z2,z3,z4,z5,z6,z7,z8,z9,z10,z11,z12,z13,z14,z15;
        unsigned r;

        if (n < 256)
                goto out;

        x0 = load1_epi32(chacha_const32 + 0);
        x1 = load1_epi32(chacha_const32 + 4);
        x2 = load1_epi32(chacha_const32 + 8);
        x3 = load1_epi32(chacha_const32 + 12);
        x4 = load1_epi32(k + 0);
        x5 = load1_epi32(k + 4);
        x6 = load1_epi32(k + 8);
        x7 = load1_epi32(k + 12);
        x8 = load1_epi32(k + 16);
        x9 = load1_epi32(k + 20);
        x10 = load1_epi32(k + 24);
        x11 = load1_epi32(k + 28);
        /* x12 set in the loop */
        x13 = load1_epi32(nonce + 0);
        x14 = load1_epi32(nonce + 4);
        x15 = load1_epi32(nonce + 8);

        for (; n >= 256; s += 256, n -= 256, blkno += 4) {
                x12 = _mm_add_epi32(_mm_set1_epi32(blkno),
                    _mm_set_epi32(3,2,1,0));
                y0 = x0;
                y1 = x1;
                y2 = x2;
                y3 = x3;
                y4 = x4;
                y5 = x5;
                y6 = x6;
                y7 = x7;
                y8 = x8;
                y9 = x9;
                y10 = x10;
                y11 = x11;
                y12 = x12;
                y13 = x13;
                y14 = x14;
                y15 = x15;
                for (r = nr; r > 0; r -= 2) {
                        CHACHA_QUARTERROUND( y0, y4, y8,y12);
                        CHACHA_QUARTERROUND( y1, y5, y9,y13);
                        CHACHA_QUARTERROUND( y2, y6,y10,y14);
                        CHACHA_QUARTERROUND( y3, y7,y11,y15);
                        CHACHA_QUARTERROUND( y0, y5,y10,y15);
                        CHACHA_QUARTERROUND( y1, y6,y11,y12);
                        CHACHA_QUARTERROUND( y2, y7, y8,y13);
                        CHACHA_QUARTERROUND( y3, y4, y9,y14);
                }
                y0 = _mm_add_epi32(y0, x0);
                y1 = _mm_add_epi32(y1, x1);
                y2 = _mm_add_epi32(y2, x2);
                y3 = _mm_add_epi32(y3, x3);
                y4 = _mm_add_epi32(y4, x4);
                y5 = _mm_add_epi32(y5, x5);
                y6 = _mm_add_epi32(y6, x6);
                y7 = _mm_add_epi32(y7, x7);
                y8 = _mm_add_epi32(y8, x8);
                y9 = _mm_add_epi32(y9, x9);
                y10 = _mm_add_epi32(y10, x10);
                y11 = _mm_add_epi32(y11, x11);
                y12 = _mm_add_epi32(y12, x12);
                y13 = _mm_add_epi32(y13, x13);
                y14 = _mm_add_epi32(y14, x14);
                y15 = _mm_add_epi32(y15, x15);

                z0 = unpack0_epi32(y0, y1, y2, y3);
                z1 = unpack0_epi32(y4, y5, y6, y7);
                z2 = unpack0_epi32(y8, y9, y10, y11);
                z3 = unpack0_epi32(y12, y13, y14, y15);
                z4 = unpack1_epi32(y0, y1, y2, y3);
                z5 = unpack1_epi32(y4, y5, y6, y7);
                z6 = unpack1_epi32(y8, y9, y10, y11);
                z7 = unpack1_epi32(y12, y13, y14, y15);
                z8 = unpack2_epi32(y0, y1, y2, y3);
                z9 = unpack2_epi32(y4, y5, y6, y7);
                z10 = unpack2_epi32(y8, y9, y10, y11);
                z11 = unpack2_epi32(y12, y13, y14, y15);
                z12 = unpack3_epi32(y0, y1, y2, y3);
                z13 = unpack3_epi32(y4, y5, y6, y7);
                z14 = unpack3_epi32(y8, y9, y10, y11);
                z15 = unpack3_epi32(y12, y13, y14, y15);

                storeu_epi32(s + 16*0, z0);
                storeu_epi32(s + 16*1, z1);
                storeu_epi32(s + 16*2, z2);
                storeu_epi32(s + 16*3, z3);
                storeu_epi32(s + 16*4, z4);
                storeu_epi32(s + 16*5, z5);
                storeu_epi32(s + 16*6, z6);
                storeu_epi32(s + 16*7, z7);
                storeu_epi32(s + 16*8, z8);
                storeu_epi32(s + 16*9, z9);
                storeu_epi32(s + 16*10, z10);
                storeu_epi32(s + 16*11, z11);
                storeu_epi32(s + 16*12, z12);
                storeu_epi32(s + 16*13, z13);
                storeu_epi32(s + 16*14, z14);
                storeu_epi32(s + 16*15, z15);
        }

out:        if (n) {
                const __m128i blkno_inc = _mm_set_epi32(0,0,0,1);
                __m128i in0, in1, in2, in3;
                __m128i r0, r1, r2, r3;

                in0 = _mm_loadu_si128((const __m128i *)chacha_const32);
                in1 = _mm_loadu_si128((const __m128i *)k);
                in2 = _mm_loadu_si128((const __m128i *)k + 1);
                in3 = _mm_set_epi32(le32dec(nonce + 8), le32dec(nonce + 4),
                    le32dec(nonce), blkno);

                for (; n; s += 64, n -= 64) {
                        r0 = in0;
                        r1 = in1;
                        r2 = in2;
                        r3 = in3;
                        chacha_permute(&r0, &r1, &r2, &r3, nr);
                        r0 = _mm_add_epi32(r0, in0);
                        r1 = _mm_add_epi32(r1, in1);
                        r2 = _mm_add_epi32(r2, in2);
                        r3 = _mm_add_epi32(r3, in3);

                        if (n < 64) {
                                uint8_t buf[64] __aligned(16);

                                _mm_storeu_si128((__m128i *)buf + 0, r0);
                                _mm_storeu_si128((__m128i *)buf + 1, r1);
                                _mm_storeu_si128((__m128i *)buf + 2, r2);
                                _mm_storeu_si128((__m128i *)buf + 3, r3);
                                memcpy(s, buf, n);

                                break;
                        }

                        _mm_storeu_si128((__m128i *)s + 0, r0);
                        _mm_storeu_si128((__m128i *)s + 1, r1);
                        _mm_storeu_si128((__m128i *)s + 2, r2);
                        _mm_storeu_si128((__m128i *)s + 3, r3);
                        in3 = _mm_add_epi32(in3, blkno_inc);
                }
        }
}

void
chacha_stream_xor_sse2(uint8_t *s, const uint8_t *p, size_t n,
    uint32_t blkno,
    const uint8_t nonce[static 12],
    const uint8_t k[static 32],
    unsigned nr)
{
        __m128i x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
        __m128i y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
        __m128i z0,z1,z2,z3,z4,z5,z6,z7,z8,z9,z10,z11,z12,z13,z14,z15;
        unsigned r;

        if (n < 256)
                goto out;

        x0 = load1_epi32(chacha_const32 + 0);
        x1 = load1_epi32(chacha_const32 + 4);
        x2 = load1_epi32(chacha_const32 + 8);
        x3 = load1_epi32(chacha_const32 + 12);
        x4 = load1_epi32(k + 0);
        x5 = load1_epi32(k + 4);
        x6 = load1_epi32(k + 8);
        x7 = load1_epi32(k + 12);
        x8 = load1_epi32(k + 16);
        x9 = load1_epi32(k + 20);
        x10 = load1_epi32(k + 24);
        x11 = load1_epi32(k + 28);
        /* x12 set in the loop */
        x13 = load1_epi32(nonce + 0);
        x14 = load1_epi32(nonce + 4);
        x15 = load1_epi32(nonce + 8);

        for (; n >= 256; s += 256, p += 256, n -= 256, blkno += 4) {
                x12 = _mm_add_epi32(_mm_set1_epi32(blkno),
                    _mm_set_epi32(3,2,1,0));
                y0 = x0;
                y1 = x1;
                y2 = x2;
                y3 = x3;
                y4 = x4;
                y5 = x5;
                y6 = x6;
                y7 = x7;
                y8 = x8;
                y9 = x9;
                y10 = x10;
                y11 = x11;
                y12 = x12;
                y13 = x13;
                y14 = x14;
                y15 = x15;
                for (r = nr; r > 0; r -= 2) {
                        CHACHA_QUARTERROUND( y0, y4, y8,y12);
                        CHACHA_QUARTERROUND( y1, y5, y9,y13);
                        CHACHA_QUARTERROUND( y2, y6,y10,y14);
                        CHACHA_QUARTERROUND( y3, y7,y11,y15);
                        CHACHA_QUARTERROUND( y0, y5,y10,y15);
                        CHACHA_QUARTERROUND( y1, y6,y11,y12);
                        CHACHA_QUARTERROUND( y2, y7, y8,y13);
                        CHACHA_QUARTERROUND( y3, y4, y9,y14);
                }
                y0 = _mm_add_epi32(y0, x0);
                y1 = _mm_add_epi32(y1, x1);
                y2 = _mm_add_epi32(y2, x2);
                y3 = _mm_add_epi32(y3, x3);
                y4 = _mm_add_epi32(y4, x4);
                y5 = _mm_add_epi32(y5, x5);
                y6 = _mm_add_epi32(y6, x6);
                y7 = _mm_add_epi32(y7, x7);
                y8 = _mm_add_epi32(y8, x8);
                y9 = _mm_add_epi32(y9, x9);
                y10 = _mm_add_epi32(y10, x10);
                y11 = _mm_add_epi32(y11, x11);
                y12 = _mm_add_epi32(y12, x12);
                y13 = _mm_add_epi32(y13, x13);
                y14 = _mm_add_epi32(y14, x14);
                y15 = _mm_add_epi32(y15, x15);

                z0 = unpack0_epi32(y0, y1, y2, y3);
                z1 = unpack0_epi32(y4, y5, y6, y7);
                z2 = unpack0_epi32(y8, y9, y10, y11);
                z3 = unpack0_epi32(y12, y13, y14, y15);
                z4 = unpack1_epi32(y0, y1, y2, y3);
                z5 = unpack1_epi32(y4, y5, y6, y7);
                z6 = unpack1_epi32(y8, y9, y10, y11);
                z7 = unpack1_epi32(y12, y13, y14, y15);
                z8 = unpack2_epi32(y0, y1, y2, y3);
                z9 = unpack2_epi32(y4, y5, y6, y7);
                z10 = unpack2_epi32(y8, y9, y10, y11);
                z11 = unpack2_epi32(y12, y13, y14, y15);
                z12 = unpack3_epi32(y0, y1, y2, y3);
                z13 = unpack3_epi32(y4, y5, y6, y7);
                z14 = unpack3_epi32(y8, y9, y10, y11);
                z15 = unpack3_epi32(y12, y13, y14, y15);

                storeu_epi32(s + 16*0, loadu_epi32(p + 16*0) ^ z0);
                storeu_epi32(s + 16*1, loadu_epi32(p + 16*1) ^ z1);
                storeu_epi32(s + 16*2, loadu_epi32(p + 16*2) ^ z2);
                storeu_epi32(s + 16*3, loadu_epi32(p + 16*3) ^ z3);
                storeu_epi32(s + 16*4, loadu_epi32(p + 16*4) ^ z4);
                storeu_epi32(s + 16*5, loadu_epi32(p + 16*5) ^ z5);
                storeu_epi32(s + 16*6, loadu_epi32(p + 16*6) ^ z6);
                storeu_epi32(s + 16*7, loadu_epi32(p + 16*7) ^ z7);
                storeu_epi32(s + 16*8, loadu_epi32(p + 16*8) ^ z8);
                storeu_epi32(s + 16*9, loadu_epi32(p + 16*9) ^ z9);
                storeu_epi32(s + 16*10, loadu_epi32(p + 16*10) ^ z10);
                storeu_epi32(s + 16*11, loadu_epi32(p + 16*11) ^ z11);
                storeu_epi32(s + 16*12, loadu_epi32(p + 16*12) ^ z12);
                storeu_epi32(s + 16*13, loadu_epi32(p + 16*13) ^ z13);
                storeu_epi32(s + 16*14, loadu_epi32(p + 16*14) ^ z14);
                storeu_epi32(s + 16*15, loadu_epi32(p + 16*15) ^ z15);
        }

out:        if (n) {
                const __m128i blkno_inc = _mm_set_epi32(0,0,0,1);
                __m128i in0, in1, in2, in3;
                __m128i r0, r1, r2, r3;

                in0 = _mm_loadu_si128((const __m128i *)chacha_const32);
                in1 = _mm_loadu_si128((const __m128i *)k);
                in2 = _mm_loadu_si128((const __m128i *)k + 1);
                in3 = _mm_set_epi32(le32dec(nonce + 8), le32dec(nonce + 4),
                    le32dec(nonce), blkno);

                for (; n; s += 64, p += 64, n -= 64) {
                        r0 = in0;
                        r1 = in1;
                        r2 = in2;
                        r3 = in3;
                        chacha_permute(&r0, &r1, &r2, &r3, nr);
                        r0 = _mm_add_epi32(r0, in0);
                        r1 = _mm_add_epi32(r1, in1);
                        r2 = _mm_add_epi32(r2, in2);
                        r3 = _mm_add_epi32(r3, in3);

                        if (n < 64) {
                                uint8_t buf[64] __aligned(16);
                                unsigned i;

                                _mm_storeu_si128((__m128i *)buf + 0, r0);
                                _mm_storeu_si128((__m128i *)buf + 1, r1);
                                _mm_storeu_si128((__m128i *)buf + 2, r2);
                                _mm_storeu_si128((__m128i *)buf + 3, r3);

                                for (i = 0; i < n - n%4; i += 4)
                                        le32enc(s + i,
                                            le32dec(p + i) ^ le32dec(buf + i));
                                for (; i < n; i++)
                                        s[i] = p[i] ^ buf[i];

                                break;
                        }

                        r0 ^= _mm_loadu_si128((const __m128i *)p + 0);
                        r1 ^= _mm_loadu_si128((const __m128i *)p + 1);
                        r2 ^= _mm_loadu_si128((const __m128i *)p + 2);
                        r3 ^= _mm_loadu_si128((const __m128i *)p + 3);
                        _mm_storeu_si128((__m128i *)s + 0, r0);
                        _mm_storeu_si128((__m128i *)s + 1, r1);
                        _mm_storeu_si128((__m128i *)s + 2, r2);
                        _mm_storeu_si128((__m128i *)s + 3, r3);
                        in3 = _mm_add_epi32(in3, blkno_inc);
                }
        }
}

void
xchacha_stream_sse2(uint8_t *restrict s, size_t nbytes,
    uint32_t blkno,
    const uint8_t nonce[static 24],
    const uint8_t k[static 32],
    unsigned nr)
{
        uint8_t subkey[32];
        uint8_t subnonce[12];

        hchacha_sse2(subkey, nonce/*[0:16)*/, k, chacha_const32, nr);
        memset(subnonce, 0, 4);
        memcpy(subnonce + 4, nonce + 16, 8);
        chacha_stream_sse2(s, nbytes, blkno, subnonce, subkey, nr);
}

void
xchacha_stream_xor_sse2(uint8_t *restrict c, const uint8_t *p, size_t nbytes,
    uint32_t blkno,
    const uint8_t nonce[static 24],
    const uint8_t k[static 32],
    unsigned nr)
{
        uint8_t subkey[32];
        uint8_t subnonce[12];

        hchacha_sse2(subkey, nonce/*[0:16)*/, k, chacha_const32, nr);
        memset(subnonce, 0, 4);
        memcpy(subnonce + 4, nonce + 16, 8);
        chacha_stream_xor_sse2(c, p, nbytes, blkno, subnonce, subkey, nr);
}




























































    4 
    4 











    1 

    1 
    1 





    1 






















































    9 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
/*        $NetBSD: vfs_hooks.c,v 1.6 2009/03/15 17:14:40 cegger Exp $        */

/*-
 * Copyright (c) 2005 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Julio M. Merino Vidal.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * VFS hooks.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_hooks.c,v 1.6 2009/03/15 17:14:40 cegger Exp $");

#include <sys/param.h>
#include <sys/queue.h>
#include <sys/mount.h>
#include <sys/mutex.h>

LIST_HEAD(vfs_hooks_head, vfs_hooks) vfs_hooks_head =
    LIST_HEAD_INITIALIZER(vfs_hooks_head);

kmutex_t vfs_hooks_lock;

void
vfs_hooks_init(void)
{

        mutex_init(&vfs_hooks_lock, MUTEX_DEFAULT, IPL_NONE);
}

int
vfs_hooks_attach(struct vfs_hooks *vfs_hooks)
{

        mutex_enter(&vfs_hooks_lock);
        LIST_INSERT_HEAD(&vfs_hooks_head, vfs_hooks, vfs_hooks_list);
        mutex_exit(&vfs_hooks_lock);

        return (0);
}

int
vfs_hooks_detach(struct vfs_hooks *vfs_hooks)
{
        struct vfs_hooks *hp;
        int ret = 0;

        mutex_enter(&vfs_hooks_lock);
        LIST_FOREACH(hp, &vfs_hooks_head, vfs_hooks_list) {
                if (hp == vfs_hooks) {
                        LIST_REMOVE(hp, vfs_hooks_list);
                        break;
                }
        }
        if (hp == NULL)
                       ret = ESRCH;
        mutex_exit(&vfs_hooks_lock);

        return (ret);
}

/*
 * Macro to be used in one of the vfs_hooks_* function for hooks that
 * return an error code.  Calls will stop as soon as one of the hooks
 * fails.
 */
#define VFS_HOOKS_W_ERROR(func, fargs, hook, hargs)                        \
int                                                                        \
func fargs                                                                \
{                                                                        \
        int error;                                                        \
        struct vfs_hooks *hp;                                                \
                                                                         \
        error = EJUSTRETURN;                                                \
                                                                         \
        mutex_enter(&vfs_hooks_lock);                                        \
        LIST_FOREACH(hp, &vfs_hooks_head, vfs_hooks_list) {                \
                if (hp-> hook != NULL) {                                \
                        error = hp-> hook hargs;                        \
                        if (error != 0)                                        \
                                break;                                        \
                }                                                        \
        }                                                                \
        mutex_exit(&vfs_hooks_lock);                                        \
                                                                         \
        return error;                                                        \
}

/*
 * Macro to be used in one of the vfs_hooks_* function for hooks that
 * do not return any error code.  All hooks will be executed
 * unconditionally.
 */
#define VFS_HOOKS_WO_ERROR(func, fargs, hook, hargs)                        \
void                                                                        \
func fargs                                                                \
{                                                                        \
        struct vfs_hooks *hp;                                                \
                                                                         \
        mutex_enter(&vfs_hooks_lock);                                        \
        LIST_FOREACH(hp, &vfs_hooks_head, vfs_hooks_list) {                \
                if (hp-> hook != NULL)                                        \
                        hp-> hook hargs;                                \
        }                                                                \
        mutex_exit(&vfs_hooks_lock);                                        \
}

/*
 * Routines to iterate over VFS hooks lists and execute them.
 */

VFS_HOOKS_WO_ERROR(vfs_hooks_unmount, (struct mount *mp), vh_unmount, (mp));
VFS_HOOKS_W_ERROR(vfs_hooks_reexport, (struct mount *mp, const char *path, void *data), vh_reexport, (mp, path, data));






































































































































































































































































































































































































































































































































































































   35 





















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
/*        $NetBSD: proc.h,v 1.370 2022/05/09 13:27:24 wiz Exp $        */

/*-
 * Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1986, 1989, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)proc.h        8.15 (Berkeley) 5/19/95
 */

#ifndef _SYS_PROC_H_
#define        _SYS_PROC_H_

#include <sys/lwp.h>

#if defined(_KMEMUSER) || defined(_KERNEL)

#if defined(_KERNEL_OPT)
#include "opt_multiprocessor.h"
#include "opt_kstack.h"
#include "opt_lockdebug.h"
#endif

#include <machine/proc.h>                /* Machine-dependent proc substruct */
#include <machine/pcb.h>
#include <sys/aio.h>
#include <sys/idtype.h>
#include <sys/rwlock.h>
#include <sys/mqueue.h>
#include <sys/mutex.h>
#include <sys/condvar.h>
#include <sys/queue.h>
#include <sys/radixtree.h>
#include <sys/signalvar.h>
#include <sys/siginfo.h>
#include <sys/event.h>
#include <sys/specificdata.h>

#ifndef _KERNEL
#include <sys/time.h>
#include <sys/resource.h>
#endif

/*
 * One structure allocated per session.
 */
struct session {
        int                s_count;        /* Ref cnt; pgrps in session */
        u_int                s_flags;
#define        S_LOGIN_SET        1                /* s_login set in this session */
        struct proc        *s_leader;        /* Session leader */
        struct vnode        *s_ttyvp;        /* Vnode of controlling terminal */
        struct tty        *s_ttyp;        /* Controlling terminal */
        char                s_login[MAXLOGNAME]; /* Setlogin() name */
        pid_t                s_sid;                /* Session ID (pid of leader) */
};

/*
 * One structure allocated per process group.
 */
struct pgrp {
        LIST_HEAD(, proc) pg_members;        /* Pointer to pgrp members */
        struct session        *pg_session;        /* Pointer to session */
        pid_t                pg_id;                /* Pgrp id */
        int                pg_jobc;        /*
                                         * Number of processes qualifying
                                         * pgrp for job control
                                         */
};

/*
 * Autoloadable syscall definition
 */
struct sc_autoload {
        u_int                al_code;
        const char        *al_module;
};

/*
 * One structure allocated per emulation.
 */
struct exec_package;
struct ras;
struct kauth_cred;

struct emul {
        const char        *e_name;        /* Symbolic name */
        const char        *e_path;        /* Extra emulation path (NULL if none)*/
#ifndef __HAVE_MINIMAL_EMUL
        int                e_flags;        /* Miscellaneous flags, see above */
                                        /* Syscall handling function */
        const int        *e_errno;        /* Errno array */
        int                e_nosys;        /* Offset of the nosys() syscall */
        int                e_nsysent;        /* Number of system call entries */
#endif
        struct sysent        *e_sysent;        /* System call array */
        const uint32_t        *e_nomodbits;        /* sys_nosys/sys_nomodule flags
                                         * for syscall_disestablish() */
        const char * const *e_syscallnames; /* System call name array */
        struct sc_autoload *e_sc_autoload; /* List of autoloadable syscalls */
                                        /* Signal sending function */
        void                (*e_sendsig)(const struct ksiginfo *,
                                          const sigset_t *);
        void                (*e_trapsignal)(struct lwp *, struct ksiginfo *);
        char                *e_sigcode;        /* Start of sigcode */
        char                *e_esigcode;        /* End of sigcode */
                                        /* Set registers before execution */
        struct uvm_object **e_sigobject;/* shared sigcode object */
        void                (*e_setregs)(struct lwp *, struct exec_package *,
                                          vaddr_t);

                                        /* Per-process hooks */
        void                (*e_proc_exec)(struct proc *, struct exec_package *);
        void                (*e_proc_fork)(struct proc *, struct lwp *, int);
        void                (*e_proc_exit)(struct proc *);
        void                (*e_lwp_fork)(struct lwp *, struct lwp *);
        void                (*e_lwp_exit)(struct lwp *);

#ifdef __HAVE_SYSCALL_INTERN
        void                (*e_syscall_intern)(struct proc *);
#else
        void                (*e_syscall)(void);
#endif
                                        /* Emulation specific sysctl data */
        struct sysctlnode *e_sysctlovly;

        vaddr_t                (*e_vm_default_addr)(struct proc *, vaddr_t, vsize_t,
                             int);

        /* Emulation-specific hook for userspace page faults */
        int                (*e_usertrap)(struct lwp *, vaddr_t, void *);

        size_t                e_ucsize;        /* size of ucontext_t */
        void                (*e_startlwp)(void *);

        /* Dtrace syscall probe */
        void                 (*e_dtrace_syscall)(uint32_t, register_t,
                            const struct sysent *, const void *,
                            const register_t *, int);

        /* Emulation specific support for ktracing signal posts */
        void                (*e_ktrpsig)(int, sig_t, const sigset_t *,
                            const struct ksiginfo *);
};

/*
 * Emulation miscellaneous flags
 */
#define        EMUL_HAS_SYS___syscall        0x001        /* Has SYS___syscall */

/*
 * Description of a process.
 *
 * This structure contains the information needed to manage a thread of
 * control, known in UN*X as a process; it has references to substructures
 * containing descriptions of things that the process uses, but may share
 * with related processes.  The process structure and the substructures
 * are always addressible except for those marked "(PROC ONLY)" below,
 * which might be addressible only on a processor on which the process
 * is running.
 *
 * Field markings and the corresponding locks:
 *
 * a:        p_auxlock
 * k:        ktrace_mutex
 * l:        proc_lock
 * t:        p_stmutex
 * p:        p_lock
 * (:        updated atomically
 * ::        unlocked, stable
 */
struct vmspace;

struct proc {
        LIST_ENTRY(proc) p_list;        /* l: List of all processes */
        kmutex_t        *p_lock;        /* :: general mutex */
        kcondvar_t        p_waitcv;        /* p: wait, stop CV on children */
        kcondvar_t        p_lwpcv;        /* p: wait, stop CV on LWPs */

        /* Substructures: */
        struct kauth_cred *p_cred;        /* p: Master copy of credentials */
        struct filedesc        *p_fd;                /* :: Ptr to open files structure */
        struct cwdinfo        *p_cwdi;        /* :: cdir/rdir/cmask info */
        struct pstats        *p_stats;        /* :: Accounting/stats (PROC ONLY) */
        struct plimit        *p_limit;        /* :: Process limits */
        struct vmspace        *p_vmspace;        /* :: Address space */
        struct sigacts        *p_sigacts;        /* :: Process sigactions */
        struct aioproc        *p_aio;                /* p: Asynchronous I/O data */
        u_int                p_mqueue_cnt;        /* (: Count of open message queues */
        specificdata_reference
                        p_specdataref;        /*    subsystem proc-specific data */

        int                p_exitsig;        /* l: signal to send to parent on exit */
        int                p_flag;                /* p: PK_* flags */
        int                p_sflag;        /* p: PS_* flags */
        int                p_slflag;        /* p, l: PSL_* flags */
        int                p_lflag;        /* l: PL_* flags */
        int                p_stflag;        /* t: PST_* flags */
        char                p_stat;                /* p: S* process status. */
        char                p_trace_enabled;/* p: cached by syscall_intern() */
        char                p_pad1[2];        /*  unused */

        pid_t                p_pid;                /* :: Process identifier. */
        LIST_ENTRY(proc) p_pglist;        /* l: List of processes in pgrp. */
        struct proc         *p_pptr;        /* l: Pointer to parent process. */
        LIST_ENTRY(proc) p_sibling;        /* l: List of sibling processes. */
        LIST_HEAD(, proc) p_children;        /* l: List of children. */
        LIST_HEAD(, lwp) p_lwps;        /* p: List of LWPs. */
        struct ras        *p_raslist;        /* a: List of RAS entries */

/* The following fields are all zeroed upon creation in fork. */
#define        p_startzero        p_nlwps

        int                 p_nlwps;        /* p: Number of LWPs */
        int                 p_nzlwps;        /* p: Number of zombie LWPs */
        int                p_nrlwps;        /* p: Number running/sleeping LWPs */
        int                p_nlwpwait;        /* p: Number of LWPs in lwp_wait1() */
        int                p_ndlwps;        /* p: Number of detached LWPs */
        u_int                p_nstopchild;        /* l: Count of stopped/dead children */
        u_int                p_waited;        /* l: parent has waited on child */
        struct lwp        *p_zomblwp;        /* p: detached LWP to be reaped */
        struct lwp        *p_vforklwp;        /* p: parent LWP waiting at vfork() */

        /* scheduling */
        void                *p_sched_info;        /* p: Scheduler-specific structure */
        fixpt_t                p_estcpu;        /* p: Time avg. value of p_cpticks */
        fixpt_t                p_estcpu_inherited; /* p: cpu inherited from children */
        unsigned int        p_forktime;
        fixpt_t         p_pctcpu;       /* p: %cpu from dead LWPs */

        struct proc        *p_opptr;        /* l: save parent during ptrace. */
        struct ptimers        *p_timers;        /*    Timers: real, virtual, profiling */
        struct bintime         p_rtime;        /* p: real time */
        u_quad_t         p_uticks;        /* t: Statclock hits in user mode */
        u_quad_t         p_sticks;        /* t: Statclock hits in system mode */
        u_quad_t         p_iticks;        /* t: Statclock hits processing intr */
        uint64_t        p_xutime;        /* p: utime exposed to userspace */
        uint64_t        p_xstime;        /* p: stime exposed to userspace */

        int                p_traceflag;        /* k: Kernel trace points */
        void                *p_tracep;        /* k: Trace private data */
        struct vnode         *p_textvp;        /* :: Vnode of executable */

        struct emul        *p_emul;        /* :: emulation information */
        void                *p_emuldata;        /* :: per-proc emul data, or NULL */
        const struct execsw *p_execsw;        /* :: exec package information */
        struct klist        p_klist;        /* p: knotes attached to proc */

        LIST_HEAD(, lwp) p_sigwaiters;        /* p: LWPs waiting for signals */
        sigpend_t        p_sigpend;        /* p: pending signals */
        struct lcproc        *p_lwpctl;        /* p, a: _lwp_ctl() information */
        pid_t                p_ppid;                /* :: cached parent pid */
        pid_t                p_oppid;        /* :: cached original parent pid */
        char                *p_path;        /* :: full pathname of executable */

/*
 * End area that is zeroed on creation
 */
#define        p_endzero        p_startcopy

/*
 * The following fields are all copied upon creation in fork.
 */
#define        p_startcopy        p_sigctx

        struct sigctx         p_sigctx;        /* p: Shared signal state */

        u_char                p_nice;                /* p: Process "nice" value */
        char                p_comm[MAXCOMLEN+1];
                                        /* p: basename of last exec file */
        struct pgrp         *p_pgrp;        /* l: Pointer to process group */

        vaddr_t                p_psstrp;        /* :: address of process's ps_strings */
        u_int                p_pax;                /* :: PAX flags */
        int                p_xexit;        /* p: exit code */
/*
 * End area that is copied on creation
 */
#define        p_endcopy        p_xsig
        u_short                p_xsig;                /* p: stop signal */
        u_short                p_acflag;        /* p: Acc. flags; see struct lwp also */
        struct mdproc        p_md;                /* p: Any machine-dependent fields */
        vaddr_t                p_stackbase;        /* :: ASLR randomized stack base */
        struct kdtrace_proc *p_dtrace;        /* :: DTrace-specific data. */
/*
 * Locks in their own cache line towards the end.
 */
        kmutex_t        p_auxlock        /* :: secondary, longer term lock */
            __aligned(COHERENCY_UNIT);
        kmutex_t        p_stmutex;        /* :: mutex on profiling state */
        krwlock_t        p_reflock;        /* :: lock for debugger, procfs */
};

#define        p_rlimit        p_limit->pl_rlimit
#define        p_session        p_pgrp->pg_session
#define        p_pgid                p_pgrp->pg_id

#endif        /* _KMEMUSER || _KERNEL */

/*
 * Status values.
 */
#define        SIDL                1                /* Process being created by fork */
#define        SACTIVE                2                /* Process is not stopped */
#define        SDYING                3                /* About to die */
#define        SSTOP                4                /* Process debugging or suspension */
#define        SZOMB                5                /* Awaiting collection by parent */
#define        SDEAD                 6                /* Almost a zombie */

#define        P_ZOMBIE(p)        \
    ((p)->p_stat == SZOMB || (p)->p_stat == SDYING || (p)->p_stat == SDEAD)

/*
 * These flags are kept in p_flag and are protected by p_lock.  Access from
 * process context only.
 */
#define        PK_ADVLOCK        0x00000001 /* Process may hold a POSIX advisory lock */
#define        PK_SYSTEM        0x00000002 /* System process (kthread) */
#define        PK_SYSVSEM        0x00000004 /* Used SysV semaphores */
#define        PK_SUGID        0x00000100 /* Had set id privileges since last exec */
#define        PK_KMEM                0x00000200 /* Has kmem access */
#define        PK_EXEC                0x00004000 /* Process called exec */
#define        PK_NOCLDWAIT        0x00020000 /* No zombies if child dies */
#define        PK_32                0x00040000 /* 32-bit process (used on 64-bit kernels) */
#define        PK_CLDSIGIGN        0x00080000 /* Process is ignoring SIGCHLD */
#define        PK_MARKER        0x80000000 /* Is a dummy marker process */

/*
 * These flags are kept in p_sflag and are protected by p_lock.  Access from
 * process context only.
 */
#define        PS_NOCLDSTOP        0x00000008 /* No SIGCHLD when children stop */
#define        PS_RUMP_LWPEXIT        0x00000400 /* LWPs in RUMP kernel should exit for GC */
#define        PS_WCORE        0x00001000 /* Process needs to dump core */
#define        PS_WEXIT        0x00002000 /* Working on exiting */
#define        PS_STOPFORK        0x00800000 /* Child will be stopped on fork(2) */
#define        PS_STOPEXEC        0x01000000 /* Will be stopped on exec(2) */
#define        PS_STOPEXIT        0x02000000 /* Will be stopped at process exit */
#define        PS_COREDUMP        0x20000000 /* Process core-dumped */
#define        PS_CONTINUED        0x40000000 /* Process is continued */
#define        PS_STOPPING        0x80000000 /* Transitioning SACTIVE -> SSTOP */

/*
 * These flags are kept in p_slflag and are protected by the proc_lock
 * and p_lock.  Access from process context only.
 */
#define        PSL_TRACEFORK        0x00000001 /* traced process wants fork events */
#define        PSL_TRACEVFORK        0x00000002 /* traced process wants vfork events */
#define        PSL_TRACEVFORK_DONE        \
                        0x00000004 /* traced process wants vfork done events */
#define        PSL_TRACELWP_CREATE        \
                        0x00000008 /* traced process wants LWP create events */
#define        PSL_TRACELWP_EXIT        \
                        0x00000010 /* traced process wants LWP exit events */
#define        PSL_TRACEPOSIX_SPAWN        \
                        0x00000020 /* traced process wants posix_spawn events */

#define        PSL_TRACED        0x00000800 /* Debugged process being traced */
#define        PSL_TRACEDCHILD 0x00001000 /* Report process birth */
#define        PSL_CHTRACED        0x00400000 /* Child has been traced & reparented */
#define        PSL_SYSCALL        0x04000000 /* process has PT_SYSCALL enabled */
#define        PSL_SYSCALLEMU        0x08000000 /* cancel in-progress syscall */

/*
 * Kept in p_stflag and protected by p_stmutex.
 */
#define        PST_PROFIL        0x00000020 /* Has started profiling */

/*
 * Kept in p_lflag and protected by the proc_lock.  Access
 * from process context only.
 */
#define        PL_CONTROLT        0x00000002 /* Has a controlling terminal */
#define        PL_PPWAIT        0x00000010 /* Parent is waiting for child exec/exit */
#define        PL_SIGCOMPAT        0x00000200 /* Has used compat signal trampoline */
#define        PL_ORPHANPG        0x20000000 /* Member of an orphaned pgrp */

#if defined(_KMEMUSER) || defined(_KERNEL)

/*
 * Macro to compute the exit signal to be delivered.
 */
#define        P_EXITSIG(p)        \
    (((p)->p_slflag & PSL_TRACED) ? SIGCHLD : p->p_exitsig)
/*
 * Compute a wait(2) 16 bit exit status code
 */
#define P_WAITSTATUS(p) W_EXITCODE((p)->p_xexit, ((p)->p_xsig | \
    (((p)->p_sflag & PS_COREDUMP) ? WCOREFLAG : 0)))

LIST_HEAD(proclist, proc);                /* A list of processes */

/*
 * This structure associates a proclist with its lock.
 */
struct proclist_desc {
        struct proclist        *pd_list;        /* The list */
        /*
         * XXX Add a pointer to the proclist's lock eventually.
         */
};

#ifdef _KERNEL

/*
 * We use process IDs <= PID_MAX until there are > 16k processes.
 * NO_PGID is used to represent "no process group" for a tty.
 */
#define        PID_MAX                30000
#define        NO_PGID                ((pid_t)-1)

#define        SESS_LEADER(p)        ((p)->p_session->s_leader == (p))

/*
 * Flags passed to fork1().
 */
#define        FORK_PPWAIT        0x0001                /* Block parent until child exit */
#define        FORK_SHAREVM        0x0002                /* Share vmspace with parent */
#define        FORK_SHARECWD        0x0004                /* Share cdir/rdir/cmask */
#define        FORK_SHAREFILES        0x0008                /* Share file descriptors */
#define        FORK_SHARESIGS        0x0010                /* Share signal actions */
#define        FORK_NOWAIT        0x0020                /* Make init the parent of the child */
#define        FORK_CLEANFILES        0x0040                /* Start with a clean descriptor set */
#define        FORK_SYSTEM        0x0080                /* Fork a kernel thread */

extern struct proc        proc0;                /* Process slot for swapper */
extern u_int                nprocs;                /* Current number of procs */
extern int                maxproc;        /* Max number of procs */
#define        vmspace_kernel()        (proc0.p_vmspace)

extern kmutex_t                proc_lock;
extern struct proclist        allproc;        /* List of all processes */
extern struct proclist        zombproc;        /* List of zombie processes */

extern struct proc        *initproc;        /* Process slots for init, pager */

extern const struct proclist_desc proclists[];

int                proc_find_locked(struct lwp *, struct proc **, pid_t);
proc_t *        proc_find_raw(pid_t);
proc_t *        proc_find(pid_t);                /* Find process by ID */
proc_t *        proc_find_lwpid(pid_t);                /* Find process by LWP ID */
struct lwp *        proc_find_lwp(proc_t *, pid_t);        /* Find LWP in proc by ID */
struct lwp *        proc_find_lwp_unlocked(proc_t *, pid_t);
                                                /* Find LWP, acquire proc */
struct lwp *        proc_find_lwp_acquire_proc(pid_t, proc_t **);
struct pgrp *        pgrp_find(pid_t);                /* Find process group by ID */

void        procinit(void);
void        procinit_sysctl(void);
int        proc_enterpgrp(struct proc *, pid_t, pid_t, bool);
void        proc_leavepgrp(struct proc *);
void        proc_sesshold(struct session *);
void        proc_sessrele(struct session *);
void        fixjobc(struct proc *, struct pgrp *, int);

int        tsleep(wchan_t, pri_t, const char *, int);
int        mtsleep(wchan_t, pri_t, const char *, int, kmutex_t *);
void        wakeup(wchan_t);
int        kpause(const char *, bool, int, kmutex_t *);
void        exit1(struct lwp *, int, int) __dead;
int        kill1(struct lwp *l, pid_t pid, ksiginfo_t *ksi, register_t *retval);
int        do_sys_wait(int *, int *, int, struct rusage *);
int        do_sys_waitid(idtype_t, id_t, int *, int *, int, struct wrusage *,
            siginfo_t *);

struct proc *proc_alloc(void);
void        proc0_init(void);
pid_t        proc_alloc_pid(struct proc *);
void        proc_free_pid(pid_t);
pid_t        proc_alloc_lwpid(struct proc *, struct lwp *);
void        proc_free_lwpid(struct proc *, pid_t);
void        proc_free_mem(struct proc *);
void        exit_lwps(struct lwp *l);
int        fork1(struct lwp *, int, int, void *, size_t,
            void (*)(void *), void *, register_t *);
int        pgid_in_session(struct proc *, pid_t);
void        cpu_lwp_fork(struct lwp *, struct lwp *, void *, size_t,
            void (*)(void *), void *);
void        cpu_lwp_free(struct lwp *, int);
void        cpu_lwp_free2(struct lwp *);
void        cpu_spawn_return(struct lwp*);

#ifdef __HAVE_SYSCALL_INTERN
void        syscall_intern(struct proc *);
#endif

void        md_child_return(struct lwp *);
void        child_return(void *);

int        proc_isunder(struct proc *, struct lwp *);
int        proc_uidmatch(kauth_cred_t, kauth_cred_t);

int        proc_vmspace_getref(struct proc *, struct vmspace **);
void        proc_crmod_leave(kauth_cred_t, kauth_cred_t, bool);
void        proc_crmod_enter(void);
int        proc_getauxv(struct proc *, void **, size_t *);

int        proc_specific_key_create(specificdata_key_t *, specificdata_dtor_t);
void        proc_specific_key_delete(specificdata_key_t);
void        proc_initspecific(struct proc *);
void        proc_finispecific(struct proc *);
void *        proc_getspecific(struct proc *, specificdata_key_t);
void        proc_setspecific(struct proc *, specificdata_key_t, void *);
int        proc_compare(const struct proc *, const struct lwp *,
    const struct proc *, const struct lwp *);

/*
 * Special handlers for delivering EVFILT_PROC notifications.  These
 * exist to handle some of the special locking considerations around
 * processes.
 */
void        knote_proc_exec(struct proc *);
void        knote_proc_fork(struct proc *, struct proc *);
void        knote_proc_exit(struct proc *);

int        proclist_foreach_call(struct proclist *,
    int (*)(struct proc *, void *arg), void *);

static __inline struct proc *
_proclist_skipmarker(struct proc *p0)
{
        struct proc *p = p0;

        while (p != NULL && p->p_flag & PK_MARKER)
                p = LIST_NEXT(p, p_list);

        return p;
}

#define PROC_PTRSZ(p) (((p)->p_flag & PK_32) ? sizeof(int) : sizeof(void *))
#define PROC_REGSZ(p) (((p)->p_flag & PK_32) ? \
    sizeof(process_reg32) : sizeof(struct reg))
#define PROC_FPREGSZ(p) (((p)->p_flag & PK_32) ? \
    sizeof(process_fpreg32) : sizeof(struct fpreg))
#define PROC_DBREGSZ(p) (((p)->p_flag & PK_32) ? \
    sizeof(process_dbreg32) : sizeof(struct dbreg))

/*
 * PROCLIST_FOREACH: iterate on the given proclist, skipping PK_MARKER ones.
 */
#define        PROCLIST_FOREACH(var, head)                                        \
        for ((var) = LIST_FIRST(head);                                        \
                ((var) = _proclist_skipmarker(var)) != NULL;                \
                (var) = LIST_NEXT(var, p_list))

#ifdef KSTACK_CHECK_MAGIC
void        kstack_setup_magic(const struct lwp *);
void        kstack_check_magic(const struct lwp *);
#else
#define        kstack_setup_magic(x)
#define        kstack_check_magic(x)
#endif

extern struct emul emul_netbsd;

#endif        /* _KERNEL */

/*
 * Kernel stack parameters.
 *
 * KSTACK_LOWEST_ADDR: return the lowest address of the LWP's kernel stack,
 * excluding red-zone.
 *
 * KSTACK_SIZE: the size kernel stack for a LWP, excluding red-zone.
 *
 * if <machine/proc.h> provides the MD definition, it will be used.
 */
#ifndef KSTACK_LOWEST_ADDR
#define        KSTACK_LOWEST_ADDR(l)        ((void *)ALIGN((struct pcb *)((l)->l_addr) + 1))
#endif
#ifndef KSTACK_SIZE
#define        KSTACK_SIZE                (USPACE - ALIGN(sizeof(struct pcb)))
#endif

#endif        /* _KMEMUSER || _KERNEL */

#endif        /* !_SYS_PROC_H_ */

























































































    1 






    1 

    1 
    1 
    1 


























































































    2 

    3 







    2 
    1 





    2 






    2 































    2 
















    2 





    2 

















    5 









    5 







    5 

    5 









    4 



















    4 



    4 

    4 
    2 

    4 














    1 

    1 

















    2 
    2 

    2 





































    1 








































    2 
    2 

    2 




























    2 


    2 




    2 






















    2 
























































































   21 











































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
/*        $NetBSD: ddp_usrreq.c,v 1.75 2021/09/21 15:01:59 christos Exp $         */

/*
 * Copyright (c) 1990,1991 Regents of The University of Michigan.
 * All Rights Reserved.
 *
 * Permission to use, copy, modify, and distribute this software and
 * its documentation for any purpose and without fee is hereby granted,
 * provided that the above copyright notice appears in all copies and
 * that both that copyright notice and this permission notice appear
 * in supporting documentation, and that the name of The University
 * of Michigan not be used in advertising or publicity pertaining to
 * distribution of the software without specific, written prior
 * permission. This software is supplied as is without expressed or
 * implied warranties of any kind.
 *
 * This product includes software developed by the University of
 * California, Berkeley and its contributors.
 *
 *        Research Systems Unix Group
 *        The University of Michigan
 *        c/o Wesley Craig
 *        535 W. William Street
 *        Ann Arbor, Michigan
 *        +1-313-764-2278
 *        netatalk@umich.edu
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ddp_usrreq.c,v 1.75 2021/09/21 15:01:59 christos Exp $");

#include "opt_mbuftrace.h"
#include "opt_atalk.h"

#include <sys/param.h>
#include <sys/errno.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/ioctl.h>
#include <sys/queue.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/route.h>
#include <net/if_ether.h>
#include <net/net_stats.h>
#include <netinet/in.h>

#include <netatalk/at.h>
#include <netatalk/at_var.h>
#include <netatalk/ddp_var.h>
#include <netatalk/ddp_private.h>
#include <netatalk/aarp.h>
#include <netatalk/at_extern.h>

static void at_pcbdisconnect(struct ddpcb *);
static void at_sockaddr(struct ddpcb *, struct sockaddr_at *);
static int at_pcbsetaddr(struct ddpcb *, struct sockaddr_at *);
static int at_pcbconnect(struct ddpcb *, struct sockaddr_at *);
static void ddp_detach(struct socket *);

struct ifqueue atintrq1, atintrq2;
struct ddpcb   *ddp_ports[ATPORT_LAST];
struct ddpcb   *ddpcb = NULL;
percpu_t *ddpstat_percpu;
struct at_ifaddrhead at_ifaddr;                /* Here as inited in this file */
u_long ddp_sendspace = DDP_MAXSZ;        /* Max ddp size + 1 (ddp_type) */
u_long ddp_recvspace = 25 * (587 + sizeof(struct sockaddr_at));

#ifdef MBUFTRACE
struct mowner atalk_rx_mowner = MOWNER_INIT("atalk", "rx");
struct mowner atalk_tx_mowner = MOWNER_INIT("atalk", "tx");
#endif

static void
at_sockaddr(struct ddpcb *ddp, struct sockaddr_at *addr)
{

        *addr = ddp->ddp_lsat;
}

static int
at_pcbsetaddr(struct ddpcb *ddp, struct sockaddr_at *sat)
{
        struct sockaddr_at lsat;
        struct at_ifaddr *aa;
        struct ddpcb   *ddpp;

        if (ddp->ddp_lsat.sat_port != ATADDR_ANYPORT) {        /* shouldn't be bound */
                return (EINVAL);
        }
        if (NULL != sat) {        /* validate passed address */

                if (sat->sat_family != AF_APPLETALK)
                        return (EAFNOSUPPORT);
                if (sat->sat_len != sizeof(*sat))
                        return EINVAL;

                if (sat->sat_addr.s_node != ATADDR_ANYNODE ||
                    sat->sat_addr.s_net != ATADDR_ANYNET) {
                        TAILQ_FOREACH(aa, &at_ifaddr, aa_list) {
                                if ((sat->sat_addr.s_net ==
                                    AA_SAT(aa)->sat_addr.s_net) &&
                                    (sat->sat_addr.s_node ==
                                    AA_SAT(aa)->sat_addr.s_node))
                                        break;
                        }
                        if (!aa)
                                return (EADDRNOTAVAIL);
                }
                if (sat->sat_port != ATADDR_ANYPORT) {
                        int error;

                        if (sat->sat_port < ATPORT_FIRST ||
                            sat->sat_port >= ATPORT_LAST)
                                return (EINVAL);

                        if (sat->sat_port < ATPORT_RESERVED &&
                            (error = kauth_authorize_network(
                            kauth_cred_get(),
                            KAUTH_NETWORK_BIND, KAUTH_REQ_NETWORK_BIND_PRIVPORT,
                            ddpcb->ddp_socket, sat, NULL)) != 0)
                                return (error);
                }
        } else {
                memset((void *) & lsat, 0, sizeof(struct sockaddr_at));
                lsat.sat_len = sizeof(struct sockaddr_at);
                lsat.sat_addr.s_node = ATADDR_ANYNODE;
                lsat.sat_addr.s_net = ATADDR_ANYNET;
                lsat.sat_family = AF_APPLETALK;
                sat = &lsat;
        }

        if (sat->sat_addr.s_node == ATADDR_ANYNODE &&
            sat->sat_addr.s_net == ATADDR_ANYNET) {
                if (TAILQ_EMPTY(&at_ifaddr))
                        return EADDRNOTAVAIL;
                sat->sat_addr = AA_SAT(TAILQ_FIRST(&at_ifaddr))->sat_addr;
        }
        ddp->ddp_lsat = *sat;

        /*
         * Choose port.
         */
        if (sat->sat_port == ATADDR_ANYPORT) {
                for (sat->sat_port = ATPORT_RESERVED;
                     sat->sat_port < ATPORT_LAST; sat->sat_port++) {
                        if (ddp_ports[sat->sat_port - 1] == 0)
                                break;
                }
                if (sat->sat_port == ATPORT_LAST) {
                        return (EADDRNOTAVAIL);
                }
                ddp->ddp_lsat.sat_port = sat->sat_port;
                ddp_ports[sat->sat_port - 1] = ddp;
        } else {
                for (ddpp = ddp_ports[sat->sat_port - 1]; ddpp;
                     ddpp = ddpp->ddp_pnext) {
                        if (ddpp->ddp_lsat.sat_addr.s_net ==
                            sat->sat_addr.s_net &&
                            ddpp->ddp_lsat.sat_addr.s_node ==
                            sat->sat_addr.s_node)
                                break;
                }
                if (ddpp != NULL)
                        return (EADDRINUSE);

                ddp->ddp_pnext = ddp_ports[sat->sat_port - 1];
                ddp_ports[sat->sat_port - 1] = ddp;
                if (ddp->ddp_pnext)
                        ddp->ddp_pnext->ddp_pprev = ddp;
        }

        return 0;
}

static int
at_pcbconnect(struct ddpcb *ddp, struct sockaddr_at *sat)
{
        struct rtentry *rt;
        const struct sockaddr_at *cdst;
        struct route *ro;
        struct at_ifaddr *aa;
        struct ifnet   *ifp;
        u_short         hintnet = 0, net;

        if (sat->sat_family != AF_APPLETALK)
                return EAFNOSUPPORT;
        if (sat->sat_len != sizeof(*sat))
                return EINVAL;

        /*
         * Under phase 2, network 0 means "the network".  We take "the
         * network" to mean the network the control block is bound to.
         * If the control block is not bound, there is an error.
         */
        if (sat->sat_addr.s_net == ATADDR_ANYNET
            && sat->sat_addr.s_node != ATADDR_ANYNODE) {
                if (ddp->ddp_lsat.sat_port == ATADDR_ANYPORT) {
                        return EADDRNOTAVAIL;
                }
                hintnet = ddp->ddp_lsat.sat_addr.s_net;
        }
        ro = &ddp->ddp_route;
        /*
         * If we've got an old route for this pcb, check that it is valid.
         * If we've changed our address, we may have an old "good looking"
         * route here.  Attempt to detect it.
         */
        if ((rt = rtcache_validate(ro)) != NULL ||
            (rt = rtcache_update(ro, 1)) != NULL) {
                if (hintnet) {
                        net = hintnet;
                } else {
                        net = sat->sat_addr.s_net;
                }
                if ((ifp = rt->rt_ifp) != NULL) {
                        TAILQ_FOREACH(aa, &at_ifaddr, aa_list) {
                                if (aa->aa_ifp == ifp &&
                                    ntohs(net) >= ntohs(aa->aa_firstnet) &&
                                    ntohs(net) <= ntohs(aa->aa_lastnet)) {
                                        break;
                                }
                        }
                } else
                        aa = NULL;
                cdst = satocsat(rtcache_getdst(ro));
                if (aa == NULL || (cdst->sat_addr.s_net !=
                    (hintnet ? hintnet : sat->sat_addr.s_net) ||
                    cdst->sat_addr.s_node != sat->sat_addr.s_node)) {
                        rtcache_unref(rt, ro);
                        rtcache_free(ro);
                        rt = NULL;
                }
        }
        /*
         * If we've got no route for this interface, try to find one.
         */
        if (rt == NULL) {
                union {
                        struct sockaddr                dst;
                        struct sockaddr_at        dsta;
                } u;

                sockaddr_at_init(&u.dsta, &sat->sat_addr, 0);
                if (hintnet)
                        u.dsta.sat_addr.s_net = hintnet;
                rt = rtcache_lookup(ro, &u.dst);
        }
        /*
         * Make sure any route that we have has a valid interface.
         */
        if (rt != NULL && (ifp = rt->rt_ifp) != NULL) {
                TAILQ_FOREACH(aa, &at_ifaddr, aa_list) {
                        if (aa->aa_ifp == ifp)
                                break;
                }
        } else
                aa = NULL;
        rtcache_unref(rt, ro);
        if (aa == NULL)
                return ENETUNREACH;
        ddp->ddp_fsat = *sat;
        if (ddp->ddp_lsat.sat_port == ATADDR_ANYPORT)
                return at_pcbsetaddr(ddp, NULL);
        return 0;
}

static void
at_pcbdisconnect(struct ddpcb *ddp)
{
        ddp->ddp_fsat.sat_addr.s_net = ATADDR_ANYNET;
        ddp->ddp_fsat.sat_addr.s_node = ATADDR_ANYNODE;
        ddp->ddp_fsat.sat_port = ATADDR_ANYPORT;
}

static int
ddp_attach(struct socket *so, int proto)
{
        struct ddpcb *ddp;
        int error;

        KASSERT(sotoddpcb(so) == NULL);
        sosetlock(so);
#ifdef MBUFTRACE
        so->so_rcv.sb_mowner = &atalk_rx_mowner;
        so->so_snd.sb_mowner = &atalk_tx_mowner;
#endif
        error = soreserve(so, ddp_sendspace, ddp_recvspace);
        if (error) {
                return error;
        }

        ddp = kmem_zalloc(sizeof(*ddp), KM_SLEEP);
        ddp->ddp_lsat.sat_port = ATADDR_ANYPORT;

        ddp->ddp_next = ddpcb;
        ddp->ddp_prev = NULL;
        ddp->ddp_pprev = NULL;
        ddp->ddp_pnext = NULL;
        if (ddpcb) {
                ddpcb->ddp_prev = ddp;
        }
        ddpcb = ddp;

        ddp->ddp_socket = so;
        so->so_pcb = ddp;
        return 0;
}

static void
ddp_detach(struct socket *so)
{
        struct ddpcb *ddp = sotoddpcb(so);

        soisdisconnected(so);
        so->so_pcb = NULL;
        /* sofree drops the lock */
        sofree(so);
        mutex_enter(softnet_lock);

        /* remove ddp from ddp_ports list */
        if (ddp->ddp_lsat.sat_port != ATADDR_ANYPORT &&
            ddp_ports[ddp->ddp_lsat.sat_port - 1] != NULL) {
                if (ddp->ddp_pprev != NULL) {
                        ddp->ddp_pprev->ddp_pnext = ddp->ddp_pnext;
                } else {
                        ddp_ports[ddp->ddp_lsat.sat_port - 1] = ddp->ddp_pnext;
                }
                if (ddp->ddp_pnext != NULL) {
                        ddp->ddp_pnext->ddp_pprev = ddp->ddp_pprev;
                }
        }
        rtcache_free(&ddp->ddp_route);
        if (ddp->ddp_prev) {
                ddp->ddp_prev->ddp_next = ddp->ddp_next;
        } else {
                ddpcb = ddp->ddp_next;
        }
        if (ddp->ddp_next) {
                ddp->ddp_next->ddp_prev = ddp->ddp_prev;
        }
        kmem_free(ddp, sizeof(*ddp));
}

static int
ddp_accept(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
ddp_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        KASSERT(solocked(so));
        KASSERT(sotoddpcb(so) != NULL);

        return at_pcbsetaddr(sotoddpcb(so), (struct sockaddr_at *)nam);
}

static int
ddp_listen(struct socket *so, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
ddp_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct ddpcb *ddp = sotoddpcb(so);
        int error = 0;

        KASSERT(solocked(so));
        KASSERT(ddp != NULL);
        KASSERT(nam != NULL);

        if (ddp->ddp_fsat.sat_port != ATADDR_ANYPORT)
                return EISCONN;
        error = at_pcbconnect(ddp, (struct sockaddr_at *)nam);
        if (error == 0)
                soisconnected(so);

        return error;
}

static int
ddp_connect2(struct socket *so, struct socket *so2)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
ddp_disconnect(struct socket *so)
{
        struct ddpcb *ddp = sotoddpcb(so);

        KASSERT(solocked(so));
        KASSERT(ddp != NULL);

        if (ddp->ddp_fsat.sat_addr.s_node == ATADDR_ANYNODE)
                return ENOTCONN;

        at_pcbdisconnect(ddp);
        soisdisconnected(so);
        return 0;
}

static int
ddp_shutdown(struct socket *so)
{
        KASSERT(solocked(so));

        socantsendmore(so);
        return 0;
}

static int
ddp_abort(struct socket *so)
{
        KASSERT(solocked(so));

        soisdisconnected(so);
        ddp_detach(so);
        return 0;
}

static int
ddp_ioctl(struct socket *so, u_long cmd, void *addr, struct ifnet *ifp)
{
        return at_control(cmd, addr, ifp);
}

static int
ddp_stat(struct socket *so, struct stat *ub)
{
        KASSERT(solocked(so));

        /* stat: don't bother with a blocksize. */
        return 0;
}

static int
ddp_peeraddr(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
ddp_sockaddr(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));
        KASSERT(sotoddpcb(so) != NULL);
        KASSERT(nam != NULL);

        at_sockaddr(sotoddpcb(so), (struct sockaddr_at *)nam);
        return 0;
}

static int
ddp_rcvd(struct socket *so, int flags, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
ddp_recvoob(struct socket *so, struct mbuf *m, int flags)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
ddp_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
    struct mbuf *control, struct lwp *l)
{
        struct ddpcb *ddp = sotoddpcb(so);
        int error = 0;
        int s = 0; /* XXX gcc 4.8 warns on sgimips */

        KASSERT(solocked(so));
        KASSERT(ddp != NULL);

        if (nam) {
                if (ddp->ddp_fsat.sat_port != ATADDR_ANYPORT)
                        return EISCONN;
                s = splnet();
                error = at_pcbconnect(ddp, (struct sockaddr_at *)nam);
                if (error) {
                        splx(s);
                        return error;
                }
        } else {
                if (ddp->ddp_fsat.sat_port == ATADDR_ANYPORT)
                        return ENOTCONN;
        }

        error = ddp_output(m, ddp);
        m = NULL;
        if (nam) {
                at_pcbdisconnect(ddp);
                splx(s);
        }

        return error;
}

static int
ddp_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
{
        KASSERT(solocked(so));

        m_freem(m);
        m_freem(control);

        return EOPNOTSUPP;
}

static int
ddp_purgeif(struct socket *so, struct ifnet *ifp)
{

        mutex_enter(softnet_lock);
        at_purgeif(ifp);
        mutex_exit(softnet_lock);

        return 0;
}

/*
 * For the moment, this just find the pcb with the correct local address.
 * In the future, this will actually do some real searching, so we can use
 * the sender's address to do de-multiplexing on a single port to many
 * sockets (pcbs).
 */
struct ddpcb   *
ddp_search(
    struct sockaddr_at *from,
    struct sockaddr_at *to,
    struct at_ifaddr *aa)
{
        struct ddpcb   *ddp;

        /*
         * Check for bad ports.
         */
        if (to->sat_port < ATPORT_FIRST || to->sat_port >= ATPORT_LAST)
                return NULL;

        /*
         * Make sure the local address matches the sent address.  What about
         * the interface?
         */
        for (ddp = ddp_ports[to->sat_port - 1]; ddp; ddp = ddp->ddp_pnext) {
                /* XXX should we handle 0.YY? */

                /* XXXX.YY to socket on destination interface */
                if (to->sat_addr.s_net == ddp->ddp_lsat.sat_addr.s_net &&
                    to->sat_addr.s_node == ddp->ddp_lsat.sat_addr.s_node) {
                        break;
                }
                /* 0.255 to socket on receiving interface */
                if (to->sat_addr.s_node == ATADDR_BCAST &&
                    (to->sat_addr.s_net == 0 ||
                    to->sat_addr.s_net == ddp->ddp_lsat.sat_addr.s_net) &&
                ddp->ddp_lsat.sat_addr.s_net == AA_SAT(aa)->sat_addr.s_net) {
                        break;
                }
                /* XXXX.0 to socket on destination interface */
                if (to->sat_addr.s_net == aa->aa_firstnet &&
                    to->sat_addr.s_node == 0 &&
                    ntohs(ddp->ddp_lsat.sat_addr.s_net) >=
                    ntohs(aa->aa_firstnet) &&
                    ntohs(ddp->ddp_lsat.sat_addr.s_net) <=
                    ntohs(aa->aa_lastnet)) {
                        break;
                }
        }
        return (ddp);
}

/*
 * Initialize all the ddp & appletalk stuff
 */
void
ddp_init(void)
{

        ddpstat_percpu = percpu_alloc(sizeof(uint64_t) * DDP_NSTATS);

        TAILQ_INIT(&at_ifaddr);
        atintrq1.ifq_maxlen = IFQ_MAXLEN;
        atintrq2.ifq_maxlen = IFQ_MAXLEN;
        IFQ_LOCK_INIT(&atintrq1);
        IFQ_LOCK_INIT(&atintrq2);

        MOWNER_ATTACH(&atalk_tx_mowner);
        MOWNER_ATTACH(&atalk_rx_mowner);
        MOWNER_ATTACH(&aarp_mowner);
}

PR_WRAP_USRREQS(ddp)
#define        ddp_attach        ddp_attach_wrapper
#define        ddp_detach        ddp_detach_wrapper
#define        ddp_accept        ddp_accept_wrapper
#define        ddp_bind        ddp_bind_wrapper
#define        ddp_listen        ddp_listen_wrapper
#define        ddp_connect        ddp_connect_wrapper
#define        ddp_connect2        ddp_connect2_wrapper
#define        ddp_disconnect        ddp_disconnect_wrapper
#define        ddp_shutdown        ddp_shutdown_wrapper
#define        ddp_abort        ddp_abort_wrapper
#define        ddp_ioctl        ddp_ioctl_wrapper
#define        ddp_stat        ddp_stat_wrapper
#define        ddp_peeraddr        ddp_peeraddr_wrapper
#define        ddp_sockaddr        ddp_sockaddr_wrapper
#define        ddp_rcvd        ddp_rcvd_wrapper
#define        ddp_recvoob        ddp_recvoob_wrapper
#define        ddp_send        ddp_send_wrapper
#define        ddp_sendoob        ddp_sendoob_wrapper
#define        ddp_purgeif        ddp_purgeif_wrapper

const struct pr_usrreqs ddp_usrreqs = {
        .pr_attach        = ddp_attach,
        .pr_detach        = ddp_detach,
        .pr_accept        = ddp_accept,
        .pr_bind        = ddp_bind,
        .pr_listen        = ddp_listen,
        .pr_connect        = ddp_connect,
        .pr_connect2        = ddp_connect2,
        .pr_disconnect        = ddp_disconnect,
        .pr_shutdown        = ddp_shutdown,
        .pr_abort        = ddp_abort,
        .pr_ioctl        = ddp_ioctl,
        .pr_stat        = ddp_stat,
        .pr_peeraddr        = ddp_peeraddr,
        .pr_sockaddr        = ddp_sockaddr,
        .pr_rcvd        = ddp_rcvd,
        .pr_recvoob        = ddp_recvoob,
        .pr_send        = ddp_send,
        .pr_sendoob        = ddp_sendoob,
        .pr_purgeif        = ddp_purgeif,
};

static int
sysctl_net_atalk_ddp_stats(SYSCTLFN_ARGS)
{

        return (NETSTAT_SYSCTL(ddpstat_percpu, DDP_NSTATS));
}

/*
 * Sysctl for DDP variables.
 */
SYSCTL_SETUP(sysctl_net_atalk_ddp_setup, "sysctl net.atalk.ddp subtree setup")
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "atalk", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_APPLETALK, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "ddp",
                       SYSCTL_DESCR("DDP related settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_APPLETALK, ATPROTO_DDP, CTL_EOL);
        
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "stats",
                       SYSCTL_DESCR("DDP statistics"),
                       sysctl_net_atalk_ddp_stats, 0, NULL, 0,
                       CTL_NET, PF_APPLETALK, ATPROTO_DDP, CTL_CREATE,
                       CTL_EOL);
}



















































    4 
    4 


    4 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
/*        $NetBSD: strnlen.c,v 1.2 2014/01/09 11:25:11 apb Exp $        */

/*-
 * Copyright (c) 2009 David Schultz <das@FreeBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif

#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
__RCSID("$NetBSD: strnlen.c,v 1.2 2014/01/09 11:25:11 apb Exp $");
#endif /* LIBC_SCCS and not lint */
/* FreeBSD: src/lib/libc/string/strnlen.c,v 1.1 2009/02/28 06:00:58 das Exp */

#if !defined(_KERNEL) && !defined(_STANDALONE)
#include <string.h>
#else
#include <lib/libkern/libkern.h>
#endif

#if !HAVE_STRNLEN
size_t
strnlen(const char *s, size_t maxlen)
{
        size_t len;

        for (len = 0; len < maxlen; len++, s++) {
                if (!*s)
                        break;
        }
        return (len);
}
#endif /* !HAVE_STRNLEN */




















































































































































   29 










   38 











   16 
















   14 








   10 
    9 

   10 











   10 




















   14 


















    9 





    8 












   10 

   10 


    7 









   14 
    6 

    5 

    6 









    3 









    4 





    3 
    2 








   10 


    7 

    7 







    4 











   10 






    9 
    9 
   10 


    3 














   18 






   16 
   16 
    4 


    3 
    2 

   17 








   30 



   19 

    7 
    5 







   10 
    8 
   10 



    1 
    8 








    8 
    8 
    1 

    7 


    7 
    7 


    2 

    2 
    7 
    8 
    8 

    1 



    9 
   28 



















































































   10 











    3 

    8 


    7 
    4 




    5 


    5 
    8 







































    9 
    3 


    2 

    3 


    3 


    9 
    7 





    1 
    6 


    6 





    5 

    7 






































   16 

   16 





   12 
   12 
   10 




    5 
    5 

























































































































   11 
   11 
    2 
    9 


























































































   15 







   11 



   11 


    5 
    1 
    1 





    5 
    5 











    5 

    5 




    5 

    1 




   15 










    9 




    9 








    5 
    4 
    2 

    2 

    4 
    3 

    8 


    4 
    2 



































































   16 



   16 


   16 





   16 



















































































































































































































































































































































































































































































    5 





   15 

   14 







   24 






   12 
   13 


    9 





   23 
















   16 








    9 

   16 
   10 




    7 


    9 





   23 







   21 
   22 







   19 
   19 
   12 
    4 

   17 
   16 

   18 




   17 







   16 







   12 


    3 



    2 





   16 






   17 



    8 


   16 

   10 








    2 




   11 







   15 




   15 
   16 























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
/*        $NetBSD: kern_time.c,v 1.217 2022/07/01 21:22:44 riastradh Exp $        */

/*-
 * Copyright (c) 2000, 2004, 2005, 2007, 2008, 2009, 2020
 *     The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christopher G. Demetriou, by Andrew Doran, and by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_time.c        8.4 (Berkeley) 5/26/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_time.c,v 1.217 2022/07/01 21:22:44 riastradh Exp $");

#include <sys/param.h>
#include <sys/resourcevar.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/signalvar.h>
#include <sys/syslog.h>
#include <sys/timetc.h>
#include <sys/timex.h>
#include <sys/kauth.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
#include <sys/cpu.h>

kmutex_t        itimer_mutex __cacheline_aligned;        /* XXX static */
static struct itlist itimer_realtime_changed_notify;

static void        ptimer_intr(void *);
static void        *ptimer_sih __read_mostly;
static TAILQ_HEAD(, ptimer) ptimer_queue;

#define        CLOCK_VIRTUAL_P(clockid)        \
        ((clockid) == CLOCK_VIRTUAL || (clockid) == CLOCK_PROF)

CTASSERT(ITIMER_REAL == CLOCK_REALTIME);
CTASSERT(ITIMER_VIRTUAL == CLOCK_VIRTUAL);
CTASSERT(ITIMER_PROF == CLOCK_PROF);
CTASSERT(ITIMER_MONOTONIC == CLOCK_MONOTONIC);

#define        DELAYTIMER_MAX        32

/*
 * Initialize timekeeping.
 */
void
time_init(void)
{

        mutex_init(&itimer_mutex, MUTEX_DEFAULT, IPL_SCHED);
        LIST_INIT(&itimer_realtime_changed_notify);

        TAILQ_INIT(&ptimer_queue);
        ptimer_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE,
            ptimer_intr, NULL);
}

/*
 * Check if the time will wrap if set to ts.
 *
 * ts - timespec describing the new time
 * delta - the delta between the current time and ts
 */
bool
time_wraps(struct timespec *ts, struct timespec *delta)
{

        /*
         * Don't allow the time to be set forward so far it
         * will wrap and become negative, thus allowing an
         * attacker to bypass the next check below.  The
         * cutoff is 1 year before rollover occurs, so even
         * if the attacker uses adjtime(2) to move the time
         * past the cutoff, it will take a very long time
         * to get to the wrap point.
         */
        if ((ts->tv_sec > LLONG_MAX - 365*24*60*60) ||
            (delta->tv_sec < 0 || delta->tv_nsec < 0))
                return true;

        return false;
}

/*
 * itimer_lock:
 *
 *        Acquire the interval timer data lock.
 */
void
itimer_lock(void)
{
        mutex_spin_enter(&itimer_mutex);
}

/*
 * itimer_unlock:
 *
 *        Release the interval timer data lock.
 */
void
itimer_unlock(void)
{
        mutex_spin_exit(&itimer_mutex);
}

/*
 * itimer_lock_held:
 *
 *        Check that the interval timer lock is held for diagnostic
 *        assertions.
 */
inline bool __diagused
itimer_lock_held(void)
{
        return mutex_owned(&itimer_mutex);
}

/*
 * Time of day and interval timer support.
 *
 * These routines provide the kernel entry points to get and set
 * the time-of-day and per-process interval timers.  Subroutines
 * here provide support for adding and subtracting timeval structures
 * and decrementing interval timers, optionally reloading the interval
 * timers when they expire.
 */

/* This function is used by clock_settime and settimeofday */
static int
settime1(struct proc *p, const struct timespec *ts, bool check_kauth)
{
        struct timespec delta, now;

        /*
         * The time being set to an unreasonable value will cause
         * unreasonable system behaviour.
         */
        if (ts->tv_sec < 0 || ts->tv_sec > (1LL << 36))
                return EINVAL;

        nanotime(&now);
        timespecsub(ts, &now, &delta);

        if (check_kauth && kauth_authorize_system(kauth_cred_get(),
            KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_SYSTEM, __UNCONST(ts),
            &delta, KAUTH_ARG(check_kauth ? false : true)) != 0) {
                return EPERM;
        }

#ifdef notyet
        if ((delta.tv_sec < 86400) && securelevel > 0) { /* XXX elad - notyet */
                return EPERM;
        }
#endif

        tc_setclock(ts);

        resettodr();

        /*
         * Notify pending CLOCK_REALTIME timers about the real time change.
         * There may be inactive timers on this list, but this happens
         * comparatively less often than timers firing, and so it's better
         * to put the extra checks here than to complicate the other code
         * path.
         */
        struct itimer *it;
        itimer_lock();
        LIST_FOREACH(it, &itimer_realtime_changed_notify, it_rtchgq) {
                KASSERT(it->it_ops->ito_realtime_changed != NULL);
                if (timespecisset(&it->it_time.it_value)) {
                        (*it->it_ops->ito_realtime_changed)(it);
                }
        }
        itimer_unlock();

        return 0;
}

int
settime(struct proc *p, struct timespec *ts)
{
        return settime1(p, ts, true);
}

/* ARGSUSED */
int
sys___clock_gettime50(struct lwp *l,
    const struct sys___clock_gettime50_args *uap, register_t *retval)
{
        /* {
                syscallarg(clockid_t) clock_id;
                syscallarg(struct timespec *) tp;
        } */
        int error;
        struct timespec ats;

        error = clock_gettime1(SCARG(uap, clock_id), &ats);
        if (error != 0)
                return error;

        return copyout(&ats, SCARG(uap, tp), sizeof(ats));
}

/* ARGSUSED */
int
sys___clock_settime50(struct lwp *l,
    const struct sys___clock_settime50_args *uap, register_t *retval)
{
        /* {
                syscallarg(clockid_t) clock_id;
                syscallarg(const struct timespec *) tp;
        } */
        int error;
        struct timespec ats;

        if ((error = copyin(SCARG(uap, tp), &ats, sizeof(ats))) != 0)
                return error;

        return clock_settime1(l->l_proc, SCARG(uap, clock_id), &ats, true);
}


int
clock_settime1(struct proc *p, clockid_t clock_id, const struct timespec *tp,
    bool check_kauth)
{
        int error;

        if (tp->tv_nsec < 0 || tp->tv_nsec >= 1000000000L)
                return EINVAL;

        switch (clock_id) {
        case CLOCK_REALTIME:
                if ((error = settime1(p, tp, check_kauth)) != 0)
                        return error;
                break;
        case CLOCK_MONOTONIC:
                return EINVAL;        /* read-only clock */
        default:
                return EINVAL;
        }

        return 0;
}

int
sys___clock_getres50(struct lwp *l, const struct sys___clock_getres50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(clockid_t) clock_id;
                syscallarg(struct timespec *) tp;
        } */
        struct timespec ts;
        int error;

        if ((error = clock_getres1(SCARG(uap, clock_id), &ts)) != 0)
                return error;

        if (SCARG(uap, tp))
                error = copyout(&ts, SCARG(uap, tp), sizeof(ts));

        return error;
}

int
clock_getres1(clockid_t clock_id, struct timespec *ts)
{

        switch (clock_id) {
        case CLOCK_REALTIME:
        case CLOCK_MONOTONIC:
                ts->tv_sec = 0;
                if (tc_getfrequency() > 1000000000)
                        ts->tv_nsec = 1;
                else
                        ts->tv_nsec = 1000000000 / tc_getfrequency();
                break;
        default:
                return EINVAL;
        }

        return 0;
}

/* ARGSUSED */
int
sys___nanosleep50(struct lwp *l, const struct sys___nanosleep50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(struct timespec *) rqtp;
                syscallarg(struct timespec *) rmtp;
        } */
        struct timespec rmt, rqt;
        int error, error1;

        error = copyin(SCARG(uap, rqtp), &rqt, sizeof(struct timespec));
        if (error)
                return error;

        error = nanosleep1(l, CLOCK_MONOTONIC, 0, &rqt,
            SCARG(uap, rmtp) ? &rmt : NULL);
        if (SCARG(uap, rmtp) == NULL || (error != 0 && error != EINTR))
                return error;

        error1 = copyout(&rmt, SCARG(uap, rmtp), sizeof(rmt));
        return error1 ? error1 : error;
}

/* ARGSUSED */
int
sys_clock_nanosleep(struct lwp *l, const struct sys_clock_nanosleep_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(clockid_t) clock_id;
                syscallarg(int) flags;
                syscallarg(struct timespec *) rqtp;
                syscallarg(struct timespec *) rmtp;
        } */
        struct timespec rmt, rqt;
        int error, error1;

        error = copyin(SCARG(uap, rqtp), &rqt, sizeof(struct timespec));
        if (error)
                goto out;

        error = nanosleep1(l, SCARG(uap, clock_id), SCARG(uap, flags), &rqt,
            SCARG(uap, rmtp) ? &rmt : NULL);
        if (SCARG(uap, rmtp) == NULL || (error != 0 && error != EINTR))
                goto out;

        if ((SCARG(uap, flags) & TIMER_ABSTIME) == 0 &&
            (error1 = copyout(&rmt, SCARG(uap, rmtp), sizeof(rmt))) != 0)
                error = error1;
out:
        *retval = error;
        return 0;
}

int
nanosleep1(struct lwp *l, clockid_t clock_id, int flags, struct timespec *rqt,
    struct timespec *rmt)
{
        struct timespec rmtstart;
        int error, timo;

        if ((error = ts2timo(clock_id, flags, rqt, &timo, &rmtstart)) != 0) {
                if (error == ETIMEDOUT) {
                        error = 0;
                        if (rmt != NULL)
                                rmt->tv_sec = rmt->tv_nsec = 0;
                }
                return error;
        }

        /*
         * Avoid inadvertently sleeping forever
         */
        if (timo == 0)
                timo = 1;
again:
        error = kpause("nanoslp", true, timo, NULL);
        if (error == EWOULDBLOCK)
                error = 0;
        if (rmt != NULL || error == 0) {
                struct timespec rmtend;
                struct timespec t0;
                struct timespec *t;
                int err;

                err = clock_gettime1(clock_id, &rmtend);
                if (err != 0)
                        return err;

                t = (rmt != NULL) ? rmt : &t0;
                if (flags & TIMER_ABSTIME) {
                        timespecsub(rqt, &rmtend, t);
                } else {
                        if (timespeccmp(&rmtend, &rmtstart, <))
                                timespecclear(t); /* clock wound back */
                        else
                                timespecsub(&rmtend, &rmtstart, t);
                        if (timespeccmp(rqt, t, <))
                                timespecclear(t);
                        else
                                timespecsub(rqt, t, t);
                }
                if (t->tv_sec < 0)
                        timespecclear(t);
                if (error == 0) {
                        timo = tstohz(t);
                        if (timo > 0)
                                goto again;
                }
        }

        if (error == ERESTART)
                error = EINTR;

        return error;
}

int
sys_clock_getcpuclockid2(struct lwp *l,
    const struct sys_clock_getcpuclockid2_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(idtype_t idtype;
                syscallarg(id_t id);
                syscallarg(clockid_t *)clock_id;
        } */
        pid_t pid;
        lwpid_t lid;
        clockid_t clock_id;
        id_t id = SCARG(uap, id);

        switch (SCARG(uap, idtype)) {
        case P_PID:
                pid = id == 0 ? l->l_proc->p_pid : id;
                clock_id = CLOCK_PROCESS_CPUTIME_ID | pid;
                break;
        case P_LWPID:
                lid = id == 0 ? l->l_lid : id;
                clock_id = CLOCK_THREAD_CPUTIME_ID | lid;
                break;
        default:
                return EINVAL;
        }
        return copyout(&clock_id, SCARG(uap, clock_id), sizeof(clock_id));
}

/* ARGSUSED */
int
sys___gettimeofday50(struct lwp *l, const struct sys___gettimeofday50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(struct timeval *) tp;
                syscallarg(void *) tzp;                really "struct timezone *";
        } */
        struct timeval atv;
        int error = 0;
        struct timezone tzfake;

        if (SCARG(uap, tp)) {
                memset(&atv, 0, sizeof(atv));
                microtime(&atv);
                error = copyout(&atv, SCARG(uap, tp), sizeof(atv));
                if (error)
                        return error;
        }
        if (SCARG(uap, tzp)) {
                /*
                 * NetBSD has no kernel notion of time zone, so we just
                 * fake up a timezone struct and return it if demanded.
                 */
                tzfake.tz_minuteswest = 0;
                tzfake.tz_dsttime = 0;
                error = copyout(&tzfake, SCARG(uap, tzp), sizeof(tzfake));
        }
        return error;
}

/* ARGSUSED */
int
sys___settimeofday50(struct lwp *l, const struct sys___settimeofday50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const struct timeval *) tv;
                syscallarg(const void *) tzp; really "const struct timezone *";
        } */

        return settimeofday1(SCARG(uap, tv), true, SCARG(uap, tzp), l, true);
}

int
settimeofday1(const struct timeval *utv, bool userspace,
    const void *utzp, struct lwp *l, bool check_kauth)
{
        struct timeval atv;
        struct timespec ts;
        int error;

        /* Verify all parameters before changing time. */

        /*
         * NetBSD has no kernel notion of time zone, and only an
         * obsolete program would try to set it, so we log a warning.
         */
        if (utzp)
                log(LOG_WARNING, "pid %d attempted to set the "
                    "(obsolete) kernel time zone\n", l->l_proc->p_pid);

        if (utv == NULL)
                return 0;

        if (userspace) {
                if ((error = copyin(utv, &atv, sizeof(atv))) != 0)
                        return error;
                utv = &atv;
        }

        if (utv->tv_usec < 0 || utv->tv_usec >= 1000000)
                return EINVAL;

        TIMEVAL_TO_TIMESPEC(utv, &ts);
        return settime1(l->l_proc, &ts, check_kauth);
}

int        time_adjusted;                        /* set if an adjustment is made */

/* ARGSUSED */
int
sys___adjtime50(struct lwp *l, const struct sys___adjtime50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const struct timeval *) delta;
                syscallarg(struct timeval *) olddelta;
        } */
        int error;
        struct timeval atv, oldatv;

        if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_TIME,
            KAUTH_REQ_SYSTEM_TIME_ADJTIME, NULL, NULL, NULL)) != 0)
                return error;

        if (SCARG(uap, delta)) {
                error = copyin(SCARG(uap, delta), &atv,
                    sizeof(*SCARG(uap, delta)));
                if (error)
                        return error;
        }
        adjtime1(SCARG(uap, delta) ? &atv : NULL,
            SCARG(uap, olddelta) ? &oldatv : NULL, l->l_proc);
        if (SCARG(uap, olddelta))
                error = copyout(&oldatv, SCARG(uap, olddelta),
                    sizeof(*SCARG(uap, olddelta)));
        return error;
}

void
adjtime1(const struct timeval *delta, struct timeval *olddelta, struct proc *p)
{
        extern int64_t time_adjtime;  /* in kern_ntptime.c */

        if (olddelta) {
                memset(olddelta, 0, sizeof(*olddelta));
                mutex_spin_enter(&timecounter_lock);
                olddelta->tv_sec = time_adjtime / 1000000;
                olddelta->tv_usec = time_adjtime % 1000000;
                if (olddelta->tv_usec < 0) {
                        olddelta->tv_usec += 1000000;
                        olddelta->tv_sec--;
                }
                mutex_spin_exit(&timecounter_lock);
        }

        if (delta) {
                mutex_spin_enter(&timecounter_lock);
                /*
                 * XXX This should maybe just report failure to
                 * userland for nonsense deltas.
                 */
                if (delta->tv_sec > INT64_MAX/1000000 - 1) {
                        time_adjtime = INT64_MAX;
                } else if (delta->tv_sec < INT64_MIN/1000000 + 1) {
                        time_adjtime = INT64_MIN;
                } else {
                        time_adjtime = delta->tv_sec * 1000000
                            + MAX(-999999, MIN(999999, delta->tv_usec));
                }

                if (time_adjtime) {
                        /* We need to save the system time during shutdown */
                        time_adjusted |= 1;
                }
                mutex_spin_exit(&timecounter_lock);
        }
}

/*
 * Interval timer support.
 *
 * The itimer_*() routines provide generic support for interval timers,
 * both real (CLOCK_REALTIME, CLOCK_MONOTIME), and virtual (CLOCK_VIRTUAL,
 * CLOCK_PROF).
 *
 * Real timers keep their deadline as an absolute time, and are fired
 * by a callout.  Virtual timers are kept as a linked-list of deltas,
 * and are processed by hardclock().
 *
 * Because the real time timer callout may be delayed in real time due
 * to interrupt processing on the system, it is possible for the real
 * time timeout routine (itimer_callout()) run past after its deadline.
 * It does not suffice, therefore, to reload the real timer .it_value
 * from the timer's .it_interval.  Rather, we compute the next deadline
 * in absolute time based on the current time and the .it_interval value,
 * and report any overruns.
 *
 * Note that while the virtual timers are supported in a generic fashion
 * here, they only (currently) make sense as per-process timers, and thus
 * only really work for that case.
 */

/*
 * itimer_init:
 *
 *        Initialize the common data for an interval timer.
 */
void
itimer_init(struct itimer * const it, const struct itimer_ops * const ops,
    clockid_t const id, struct itlist * const itl)
{

        KASSERT(itimer_lock_held());
        KASSERT(ops != NULL);

        timespecclear(&it->it_time.it_value);
        it->it_ops = ops;
        it->it_clockid = id;
        it->it_overruns = 0;
        it->it_dying = false;
        if (!CLOCK_VIRTUAL_P(id)) {
                KASSERT(itl == NULL);
                callout_init(&it->it_ch, CALLOUT_MPSAFE);
                if (id == CLOCK_REALTIME && ops->ito_realtime_changed != NULL) {
                        LIST_INSERT_HEAD(&itimer_realtime_changed_notify,
                            it, it_rtchgq);
                }
        } else {
                KASSERT(itl != NULL);
                it->it_vlist = itl;
                it->it_active = false;
        }
}

/*
 * itimer_poison:
 *
 *        Poison an interval timer, preventing it from being scheduled
 *        or processed, in preparation for freeing the timer.
 */
void
itimer_poison(struct itimer * const it)
{

        KASSERT(itimer_lock_held());

        it->it_dying = true;

        /*
         * For non-virtual timers, stop the callout, or wait for it to
         * run if it has already fired.  It cannot restart again after
         * this point: the callout won't restart itself when dying, no
         * other users holding the lock can restart it, and any other
         * users waiting for callout_halt concurrently (itimer_settime)
         * will restart from the top.
         */
        if (!CLOCK_VIRTUAL_P(it->it_clockid)) {
                callout_halt(&it->it_ch, &itimer_mutex);
                if (it->it_clockid == CLOCK_REALTIME &&
                    it->it_ops->ito_realtime_changed != NULL) {
                        LIST_REMOVE(it, it_rtchgq);
                }
        }
}

/*
 * itimer_fini:
 *
 *        Release resources used by an interval timer.
 *
 *        N.B. itimer_lock must be held on entry, and is released on exit.
 */
void
itimer_fini(struct itimer * const it)
{

        KASSERT(itimer_lock_held());

        /* All done with the global state. */
        itimer_unlock();

        /* Destroy the callout, if needed. */
        if (!CLOCK_VIRTUAL_P(it->it_clockid))
                callout_destroy(&it->it_ch);
}

/*
 * itimer_decr:
 *
 *        Decrement an interval timer by a specified number of nanoseconds,
 *        which must be less than a second, i.e. < 1000000000.  If the timer
 *        expires, then reload it.  In this case, carry over (nsec - old value)
 *        to reduce the value reloaded into the timer so that the timer does
 *        not drift.  This routine assumes that it is called in a context where
 *        the timers on which it is operating cannot change in value.
 *
 *        Returns true if the timer has expired.
 */
static bool
itimer_decr(struct itimer *it, int nsec)
{
        struct itimerspec *itp;
        int error __diagused;

        KASSERT(itimer_lock_held());
        KASSERT(CLOCK_VIRTUAL_P(it->it_clockid));

        itp = &it->it_time;
        if (itp->it_value.tv_nsec < nsec) {
                if (itp->it_value.tv_sec == 0) {
                        /* expired, and already in next interval */
                        nsec -= itp->it_value.tv_nsec;
                        goto expire;
                }
                itp->it_value.tv_nsec += 1000000000;
                itp->it_value.tv_sec--;
        }
        itp->it_value.tv_nsec -= nsec;
        nsec = 0;
        if (timespecisset(&itp->it_value))
                return false;
        /* expired, exactly at end of interval */
 expire:
        if (timespecisset(&itp->it_interval)) {
                itp->it_value = itp->it_interval;
                itp->it_value.tv_nsec -= nsec;
                if (itp->it_value.tv_nsec < 0) {
                        itp->it_value.tv_nsec += 1000000000;
                        itp->it_value.tv_sec--;
                }
                error = itimer_settime(it);
                KASSERT(error == 0); /* virtual, never fails */
        } else
                itp->it_value.tv_nsec = 0;                /* sec is already 0 */
        return true;
}

static void itimer_callout(void *);

/*
 * itimer_arm_real:
 *
 *        Arm a non-virtual timer.
 */
static void
itimer_arm_real(struct itimer * const it)
{
        /*
         * Don't need to check tshzto() return value, here.
         * callout_reset() does it for us.
         */
        callout_reset(&it->it_ch,
            (it->it_clockid == CLOCK_MONOTONIC
                ? tshztoup(&it->it_time.it_value)
                : tshzto(&it->it_time.it_value)),
            itimer_callout, it);
}

/*
 * itimer_callout:
 *
 *        Callout to expire a non-virtual timer.  Queue it up for processing,
 *        and then reload, if it is configured to do so.
 *
 *        N.B. A delay in processing this callout causes multiple
 *        SIGALRM calls to be compressed into one.
 */
static void
itimer_callout(void *arg)
{
        uint64_t last_val, next_val, interval, now_ns;
        struct timespec now, next;
        struct itimer * const it = arg;
        int backwards;

        itimer_lock();
        (*it->it_ops->ito_fire)(it);

        if (!timespecisset(&it->it_time.it_interval)) {
                timespecclear(&it->it_time.it_value);
                itimer_unlock();
                return;
        }

        if (it->it_clockid == CLOCK_MONOTONIC) {
                getnanouptime(&now);
        } else {
                getnanotime(&now);
        }

        backwards = (timespeccmp(&it->it_time.it_value, &now, >));

        /* Nonnegative interval guaranteed by itimerfix.  */
        KASSERT(it->it_time.it_interval.tv_sec >= 0);
        KASSERT(it->it_time.it_interval.tv_nsec >= 0);

        /* Handle the easy case of non-overflown timers first. */
        if (!backwards &&
            timespecaddok(&it->it_time.it_value, &it->it_time.it_interval)) {
                timespecadd(&it->it_time.it_value, &it->it_time.it_interval,
                    &next);
                it->it_time.it_value = next;
        } else {
                now_ns = timespec2ns(&now);
                last_val = timespec2ns(&it->it_time.it_value);
                interval = timespec2ns(&it->it_time.it_interval);

                next_val = now_ns +
                    (now_ns - last_val + interval - 1) % interval;

                if (backwards)
                        next_val += interval;
                else
                        it->it_overruns += (now_ns - last_val) / interval;

                it->it_time.it_value.tv_sec = next_val / 1000000000;
                it->it_time.it_value.tv_nsec = next_val % 1000000000;
        }

        /*
         * Reset the callout, if it's not going away.
         */
        if (!it->it_dying)
                itimer_arm_real(it);
        itimer_unlock();
}

/*
 * itimer_settime:
 *
 *        Set up the given interval timer. The value in it->it_time.it_value
 *        is taken to be an absolute time for CLOCK_REALTIME/CLOCK_MONOTONIC
 *        timers and a relative time for CLOCK_VIRTUAL/CLOCK_PROF timers.
 *
 *        If the callout had already fired but not yet run, fails with
 *        ERESTART -- caller must restart from the top to look up a timer.
 */
int
itimer_settime(struct itimer *it)
{
        struct itimer *itn, *pitn;
        struct itlist *itl;

        KASSERT(itimer_lock_held());

        if (!CLOCK_VIRTUAL_P(it->it_clockid)) {
                /*
                 * Try to stop the callout.  However, if it had already
                 * fired, we have to drop the lock to wait for it, so
                 * the world may have changed and pt may not be there
                 * any more.  In that case, tell the caller to start
                 * over from the top.
                 */
                if (callout_halt(&it->it_ch, &itimer_mutex))
                        return ERESTART;

                /* Now we can touch it and start it up again. */
                if (timespecisset(&it->it_time.it_value))
                        itimer_arm_real(it);
        } else {
                if (it->it_active) {
                        itn = LIST_NEXT(it, it_list);
                        LIST_REMOVE(it, it_list);
                        for ( ; itn; itn = LIST_NEXT(itn, it_list))
                                timespecadd(&it->it_time.it_value,
                                    &itn->it_time.it_value,
                                    &itn->it_time.it_value);
                }
                if (timespecisset(&it->it_time.it_value)) {
                        itl = it->it_vlist;
                        for (itn = LIST_FIRST(itl), pitn = NULL;
                             itn && timespeccmp(&it->it_time.it_value,
                                 &itn->it_time.it_value, >);
                             pitn = itn, itn = LIST_NEXT(itn, it_list))
                                timespecsub(&it->it_time.it_value,
                                    &itn->it_time.it_value,
                                    &it->it_time.it_value);

                        if (pitn)
                                LIST_INSERT_AFTER(pitn, it, it_list);
                        else
                                LIST_INSERT_HEAD(itl, it, it_list);

                        for ( ; itn ; itn = LIST_NEXT(itn, it_list))
                                timespecsub(&itn->it_time.it_value,
                                    &it->it_time.it_value,
                                    &itn->it_time.it_value);

                        it->it_active = true;
                } else {
                        it->it_active = false;
                }
        }

        /* Success!  */
        return 0;
}

/*
 * itimer_gettime:
 *
 *        Return the remaining time of an interval timer.
 */
void
itimer_gettime(const struct itimer *it, struct itimerspec *aits)
{
        struct timespec now;
        struct itimer *itn;

        KASSERT(itimer_lock_held());

        *aits = it->it_time;
        if (!CLOCK_VIRTUAL_P(it->it_clockid)) {
                /*
                 * Convert from absolute to relative time in .it_value
                 * part of real time timer.  If time for real time
                 * timer has passed return 0, else return difference
                 * between current time and time for the timer to go
                 * off.
                 */
                if (timespecisset(&aits->it_value)) {
                        if (it->it_clockid == CLOCK_REALTIME) {
                                getnanotime(&now);
                        } else { /* CLOCK_MONOTONIC */
                                getnanouptime(&now);
                        }
                        if (timespeccmp(&aits->it_value, &now, <))
                                timespecclear(&aits->it_value);
                        else
                                timespecsub(&aits->it_value, &now,
                                    &aits->it_value);
                }
        } else if (it->it_active) {
                for (itn = LIST_FIRST(it->it_vlist); itn && itn != it;
                     itn = LIST_NEXT(itn, it_list))
                        timespecadd(&aits->it_value,
                            &itn->it_time.it_value, &aits->it_value);
                KASSERT(itn != NULL); /* it should be findable on the list */
        } else
                timespecclear(&aits->it_value);
}

/*
 * Per-process timer support.
 *
 * Both the BSD getitimer() family and the POSIX timer_*() family of
 * routines are supported.
 *
 * All timers are kept in an array pointed to by p_timers, which is
 * allocated on demand - many processes don't use timers at all. The
 * first four elements in this array are reserved for the BSD timers:
 * element 0 is ITIMER_REAL, element 1 is ITIMER_VIRTUAL, element
 * 2 is ITIMER_PROF, and element 3 is ITIMER_MONOTONIC. The rest may be
 * allocated by the timer_create() syscall.
 *
 * These timers are a "sub-class" of interval timer.
 */

/*
 * ptimer_free:
 *
 *        Free the per-process timer at the specified index.
 */
static void
ptimer_free(struct ptimers *pts, int index)
{
        struct itimer *it;
        struct ptimer *pt;

        KASSERT(itimer_lock_held());

        it = pts->pts_timers[index];
        pt = container_of(it, struct ptimer, pt_itimer);
        pts->pts_timers[index] = NULL;
        itimer_poison(it);

        /*
         * Remove it from the queue to be signalled.  Must be done
         * after itimer is poisoned, because we may have had to wait
         * for the callout to complete.
         */
        if (pt->pt_queued) {
                TAILQ_REMOVE(&ptimer_queue, pt, pt_chain);
                pt->pt_queued = false;
        }

        itimer_fini(it);        /* releases itimer_lock */
        kmem_free(pt, sizeof(*pt));
}

/*
 * ptimers_alloc:
 *
 *        Allocate a ptimers for the specified process.
 */
static struct ptimers *
ptimers_alloc(struct proc *p)
{
        struct ptimers *pts;
        int i;

        pts = kmem_alloc(sizeof(*pts), KM_SLEEP);
        LIST_INIT(&pts->pts_virtual);
        LIST_INIT(&pts->pts_prof);
        for (i = 0; i < TIMER_MAX; i++)
                pts->pts_timers[i] = NULL;
        itimer_lock();
        if (p->p_timers == NULL) {
                p->p_timers = pts;
                itimer_unlock();
                return pts;
        }
        itimer_unlock();
        kmem_free(pts, sizeof(*pts));
        return p->p_timers;
}

/*
 * ptimers_free:
 *
 *        Clean up the per-process timers. If "which" is set to TIMERS_ALL,
 *        then clean up all timers and free all the data structures. If
 *        "which" is set to TIMERS_POSIX, only clean up the timers allocated
 *        by timer_create(), not the BSD setitimer() timers, and only free the
 *        structure if none of those remain.
 *
 *        This function is exported because it is needed in the exec and
 *        exit code paths.
 */
void
ptimers_free(struct proc *p, int which)
{
        struct ptimers *pts;
        struct itimer *itn;
        struct timespec ts;
        int i;

        if (p->p_timers == NULL)
                return;

        pts = p->p_timers;
        itimer_lock();
        if (which == TIMERS_ALL) {
                p->p_timers = NULL;
                i = 0;
        } else {
                timespecclear(&ts);
                for (itn = LIST_FIRST(&pts->pts_virtual);
                     itn && itn != pts->pts_timers[ITIMER_VIRTUAL];
                     itn = LIST_NEXT(itn, it_list)) {
                        KASSERT(itn->it_clockid == CLOCK_VIRTUAL);
                        timespecadd(&ts, &itn->it_time.it_value, &ts);
                }
                LIST_FIRST(&pts->pts_virtual) = NULL;
                if (itn) {
                        KASSERT(itn->it_clockid == CLOCK_VIRTUAL);
                        timespecadd(&ts, &itn->it_time.it_value,
                            &itn->it_time.it_value);
                        LIST_INSERT_HEAD(&pts->pts_virtual, itn, it_list);
                }
                timespecclear(&ts);
                for (itn = LIST_FIRST(&pts->pts_prof);
                     itn && itn != pts->pts_timers[ITIMER_PROF];
                     itn = LIST_NEXT(itn, it_list)) {
                        KASSERT(itn->it_clockid == CLOCK_PROF);
                        timespecadd(&ts, &itn->it_time.it_value, &ts);
                }
                LIST_FIRST(&pts->pts_prof) = NULL;
                if (itn) {
                        KASSERT(itn->it_clockid == CLOCK_PROF);
                        timespecadd(&ts, &itn->it_time.it_value,
                            &itn->it_time.it_value);
                        LIST_INSERT_HEAD(&pts->pts_prof, itn, it_list);
                }
                i = TIMER_MIN;
        }
        for ( ; i < TIMER_MAX; i++) {
                if (pts->pts_timers[i] != NULL) {
                        /* Free the timer and release the lock.  */
                        ptimer_free(pts, i);
                        /* Reacquire the lock for the next one.  */
                        itimer_lock();
                }
        }
        if (pts->pts_timers[0] == NULL && pts->pts_timers[1] == NULL &&
            pts->pts_timers[2] == NULL && pts->pts_timers[3] == NULL) {
                p->p_timers = NULL;
                itimer_unlock();
                kmem_free(pts, sizeof(*pts));
        } else
                itimer_unlock();
}

/*
 * ptimer_fire:
 *
 *        Fire a per-process timer.
 */
static void
ptimer_fire(struct itimer *it)
{
        struct ptimer *pt = container_of(it, struct ptimer, pt_itimer);

        KASSERT(itimer_lock_held());

        /*
         * XXX Can overrun, but we don't do signal queueing yet, anyway.
         * XXX Relying on the clock interrupt is stupid.
         */
        if (pt->pt_ev.sigev_notify != SIGEV_SIGNAL) {
                return;
        }

        if (!pt->pt_queued) {
                TAILQ_INSERT_TAIL(&ptimer_queue, pt, pt_chain);
                pt->pt_queued = true;
                softint_schedule(ptimer_sih);
        }
}

/*
 * Operations vector for per-process timers (BSD and POSIX).
 */
static const struct itimer_ops ptimer_itimer_ops = {
        .ito_fire = ptimer_fire,
};

/*
 * sys_timer_create:
 *
 *        System call to create a POSIX timer.
 */
int
sys_timer_create(struct lwp *l, const struct sys_timer_create_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(clockid_t) clock_id;
                syscallarg(struct sigevent *) evp;
                syscallarg(timer_t *) timerid;
        } */

        return timer_create1(SCARG(uap, timerid), SCARG(uap, clock_id),
            SCARG(uap, evp), copyin, l);
}

int
timer_create1(timer_t *tid, clockid_t id, struct sigevent *evp,
    copyin_t fetch_event, struct lwp *l)
{
        int error;
        timer_t timerid;
        struct itlist *itl;
        struct ptimers *pts;
        struct ptimer *pt;
        struct proc *p;

        p = l->l_proc;

        if ((u_int)id > CLOCK_MONOTONIC)
                return EINVAL;

        if ((pts = p->p_timers) == NULL)
                pts = ptimers_alloc(p);

        pt = kmem_zalloc(sizeof(*pt), KM_SLEEP);
        if (evp != NULL) {
                if (((error =
                    (*fetch_event)(evp, &pt->pt_ev, sizeof(pt->pt_ev))) != 0) ||
                    ((pt->pt_ev.sigev_notify < SIGEV_NONE) ||
                        (pt->pt_ev.sigev_notify > SIGEV_SA)) ||
                        (pt->pt_ev.sigev_notify == SIGEV_SIGNAL &&
                         (pt->pt_ev.sigev_signo <= 0 ||
                          pt->pt_ev.sigev_signo >= NSIG))) {
                        kmem_free(pt, sizeof(*pt));
                        return (error ? error : EINVAL);
                }
        }

        /* Find a free timer slot, skipping those reserved for setitimer(). */
        itimer_lock();
        for (timerid = TIMER_MIN; timerid < TIMER_MAX; timerid++)
                if (pts->pts_timers[timerid] == NULL)
                        break;
        if (timerid == TIMER_MAX) {
                itimer_unlock();
                kmem_free(pt, sizeof(*pt));
                return EAGAIN;
        }
        if (evp == NULL) {
                pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
                switch (id) {
                case CLOCK_REALTIME:
                case CLOCK_MONOTONIC:
                        pt->pt_ev.sigev_signo = SIGALRM;
                        break;
                case CLOCK_VIRTUAL:
                        pt->pt_ev.sigev_signo = SIGVTALRM;
                        break;
                case CLOCK_PROF:
                        pt->pt_ev.sigev_signo = SIGPROF;
                        break;
                }
                pt->pt_ev.sigev_value.sival_int = timerid;
        }

        switch (id) {
        case CLOCK_VIRTUAL:
                itl = &pts->pts_virtual;
                break;
        case CLOCK_PROF:
                itl = &pts->pts_prof;
                break;
        default:
                itl = NULL;
        }

        itimer_init(&pt->pt_itimer, &ptimer_itimer_ops, id, itl);
        pt->pt_proc = p;
        pt->pt_poverruns = 0;
        pt->pt_entry = timerid;
        pt->pt_queued = false;

        pts->pts_timers[timerid] = &pt->pt_itimer;
        itimer_unlock();

        return copyout(&timerid, tid, sizeof(timerid));
}

/*
 * sys_timer_delete:
 *
 *        System call to delete a POSIX timer.
 */
int
sys_timer_delete(struct lwp *l, const struct sys_timer_delete_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(timer_t) timerid;
        } */
        struct proc *p = l->l_proc;
        timer_t timerid;
        struct ptimers *pts;
        struct itimer *it, *itn;

        timerid = SCARG(uap, timerid);
        pts = p->p_timers;

        if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
                return EINVAL;

        itimer_lock();
        if ((it = pts->pts_timers[timerid]) == NULL) {
                itimer_unlock();
                return EINVAL;
        }

        if (CLOCK_VIRTUAL_P(it->it_clockid)) {
                if (it->it_active) {
                        itn = LIST_NEXT(it, it_list);
                        LIST_REMOVE(it, it_list);
                        for ( ; itn; itn = LIST_NEXT(itn, it_list))
                                timespecadd(&it->it_time.it_value,
                                    &itn->it_time.it_value,
                                    &itn->it_time.it_value);
                        it->it_active = false;
                }
        }

        /* Free the timer and release the lock.  */
        ptimer_free(pts, timerid);

        return 0;
}

/*
 * sys___timer_settime50:
 *
 *        System call to set/arm a POSIX timer.
 */
int
sys___timer_settime50(struct lwp *l,
    const struct sys___timer_settime50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(timer_t) timerid;
                syscallarg(int) flags;
                syscallarg(const struct itimerspec *) value;
                syscallarg(struct itimerspec *) ovalue;
        } */
        int error;
        struct itimerspec value, ovalue, *ovp = NULL;

        if ((error = copyin(SCARG(uap, value), &value,
            sizeof(struct itimerspec))) != 0)
                return error;

        if (SCARG(uap, ovalue))
                ovp = &ovalue;

        if ((error = dotimer_settime(SCARG(uap, timerid), &value, ovp,
            SCARG(uap, flags), l->l_proc)) != 0)
                return error;

        if (ovp)
                return copyout(&ovalue, SCARG(uap, ovalue),
                    sizeof(struct itimerspec));
        return 0;
}

int
dotimer_settime(int timerid, struct itimerspec *value,
    struct itimerspec *ovalue, int flags, struct proc *p)
{
        struct timespec now;
        struct itimerspec val, oval;
        struct ptimers *pts;
        struct itimer *it;
        int error;

        pts = p->p_timers;

        if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
                return EINVAL;
        val = *value;
        if ((error = itimespecfix(&val.it_value)) != 0 ||
            (error = itimespecfix(&val.it_interval)) != 0)
                return error;

        itimer_lock();
 restart:
        if ((it = pts->pts_timers[timerid]) == NULL) {
                itimer_unlock();
                return EINVAL;
        }

        oval = it->it_time;
        it->it_time = val;

        /*
         * If we've been passed a relative time for a realtime timer,
         * convert it to absolute; if an absolute time for a virtual
         * timer, convert it to relative and make sure we don't set it
         * to zero, which would cancel the timer, or let it go
         * negative, which would confuse the comparison tests.
         */
        if (timespecisset(&it->it_time.it_value)) {
                if (!CLOCK_VIRTUAL_P(it->it_clockid)) {
                        if ((flags & TIMER_ABSTIME) == 0) {
                                if (it->it_clockid == CLOCK_REALTIME) {
                                        getnanotime(&now);
                                } else { /* CLOCK_MONOTONIC */
                                        getnanouptime(&now);
                                }
                                timespecadd(&it->it_time.it_value, &now,
                                    &it->it_time.it_value);
                        }
                } else {
                        if ((flags & TIMER_ABSTIME) != 0) {
                                getnanotime(&now);
                                timespecsub(&it->it_time.it_value, &now,
                                    &it->it_time.it_value);
                                if (!timespecisset(&it->it_time.it_value) ||
                                    it->it_time.it_value.tv_sec < 0) {
                                        it->it_time.it_value.tv_sec = 0;
                                        it->it_time.it_value.tv_nsec = 1;
                                }
                        }
                }
        }

        error = itimer_settime(it);
        if (error == ERESTART) {
                KASSERT(!CLOCK_VIRTUAL_P(it->it_clockid));
                goto restart;
        }
        KASSERT(error == 0);
        itimer_unlock();

        if (ovalue)
                *ovalue = oval;

        return 0;
}

/*
 * sys___timer_gettime50:
 *
 *        System call to return the time remaining until a POSIX timer fires.
 */
int
sys___timer_gettime50(struct lwp *l,
    const struct sys___timer_gettime50_args *uap, register_t *retval)
{
        /* {
                syscallarg(timer_t) timerid;
                syscallarg(struct itimerspec *) value;
        } */
        struct itimerspec its;
        int error;

        if ((error = dotimer_gettime(SCARG(uap, timerid), l->l_proc,
            &its)) != 0)
                return error;

        return copyout(&its, SCARG(uap, value), sizeof(its));
}

int
dotimer_gettime(int timerid, struct proc *p, struct itimerspec *its)
{
        struct itimer *it;
        struct ptimers *pts;

        pts = p->p_timers;
        if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
                return EINVAL;
        itimer_lock();
        if ((it = pts->pts_timers[timerid]) == NULL) {
                itimer_unlock();
                return EINVAL;
        }
        itimer_gettime(it, its);
        itimer_unlock();

        return 0;
}

/*
 * sys_timer_getoverrun:
 *
 *        System call to return the number of times a POSIX timer has
 *        expired while a notification was already pending.  The counter
 *        is reset when a timer expires and a notification can be posted.
 */
int
sys_timer_getoverrun(struct lwp *l, const struct sys_timer_getoverrun_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(timer_t) timerid;
        } */
        struct proc *p = l->l_proc;
        struct ptimers *pts;
        int timerid;
        struct itimer *it;
        struct ptimer *pt;

        timerid = SCARG(uap, timerid);

        pts = p->p_timers;
        if (pts == NULL || timerid < 2 || timerid >= TIMER_MAX)
                return EINVAL;
        itimer_lock();
        if ((it = pts->pts_timers[timerid]) == NULL) {
                itimer_unlock();
                return EINVAL;
        }
        pt = container_of(it, struct ptimer, pt_itimer);
        *retval = pt->pt_poverruns;
        if (*retval >= DELAYTIMER_MAX)
                *retval = DELAYTIMER_MAX;
        itimer_unlock();

        return 0;
}

/*
 * sys___getitimer50:
 *
 *        System call to get the time remaining before a BSD timer fires.
 */
int
sys___getitimer50(struct lwp *l, const struct sys___getitimer50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) which;
                syscallarg(struct itimerval *) itv;
        } */
        struct proc *p = l->l_proc;
        struct itimerval aitv;
        int error;

        memset(&aitv, 0, sizeof(aitv));
        error = dogetitimer(p, SCARG(uap, which), &aitv);
        if (error)
                return error;
        return copyout(&aitv, SCARG(uap, itv), sizeof(struct itimerval));
}

int
dogetitimer(struct proc *p, int which, struct itimerval *itvp)
{
        struct ptimers *pts;
        struct itimer *it;
        struct itimerspec its;

        if ((u_int)which > ITIMER_MONOTONIC)
                return EINVAL;

        itimer_lock();
        pts = p->p_timers;
        if (pts == NULL || (it = pts->pts_timers[which]) == NULL) {
                timerclear(&itvp->it_value);
                timerclear(&itvp->it_interval);
        } else {
                itimer_gettime(it, &its);
                TIMESPEC_TO_TIMEVAL(&itvp->it_value, &its.it_value);
                TIMESPEC_TO_TIMEVAL(&itvp->it_interval, &its.it_interval);
        }
        itimer_unlock();

        return 0;
}

/*
 * sys___setitimer50:
 *
 *        System call to set/arm a BSD timer.
 */
int
sys___setitimer50(struct lwp *l, const struct sys___setitimer50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) which;
                syscallarg(const struct itimerval *) itv;
                syscallarg(struct itimerval *) oitv;
        } */
        struct proc *p = l->l_proc;
        int which = SCARG(uap, which);
        struct sys___getitimer50_args getargs;
        const struct itimerval *itvp;
        struct itimerval aitv;
        int error;

        itvp = SCARG(uap, itv);
        if (itvp &&
            (error = copyin(itvp, &aitv, sizeof(struct itimerval))) != 0)
                return error;
        if (SCARG(uap, oitv) != NULL) {
                SCARG(&getargs, which) = which;
                SCARG(&getargs, itv) = SCARG(uap, oitv);
                if ((error = sys___getitimer50(l, &getargs, retval)) != 0)
                        return error;
        }
        if (itvp == 0)
                return 0;

        return dosetitimer(p, which, &aitv);
}

int
dosetitimer(struct proc *p, int which, struct itimerval *itvp)
{
        struct timespec now;
        struct ptimers *pts;
        struct ptimer *spare;
        struct itimer *it;
        struct itlist *itl;
        int error;

        if ((u_int)which > ITIMER_MONOTONIC)
                return EINVAL;
        if (itimerfix(&itvp->it_value) || itimerfix(&itvp->it_interval))
                return EINVAL;

        /*
         * Don't bother allocating data structures if the process just
         * wants to clear the timer.
         */
        spare = NULL;
        pts = p->p_timers;
 retry:
        if (!timerisset(&itvp->it_value) && (pts == NULL ||
            pts->pts_timers[which] == NULL))
                return 0;
        if (pts == NULL)
                pts = ptimers_alloc(p);
        itimer_lock();
 restart:
        it = pts->pts_timers[which];
        if (it == NULL) {
                struct ptimer *pt;

                if (spare == NULL) {
                        itimer_unlock();
                        spare = kmem_zalloc(sizeof(*spare), KM_SLEEP);
                        goto retry;
                }
                pt = spare;
                spare = NULL;

                it = &pt->pt_itimer;
                pt->pt_ev.sigev_notify = SIGEV_SIGNAL;
                pt->pt_ev.sigev_value.sival_int = which;

                switch (which) {
                case ITIMER_REAL:
                case ITIMER_MONOTONIC:
                        itl = NULL;
                        pt->pt_ev.sigev_signo = SIGALRM;
                        break;
                case ITIMER_VIRTUAL:
                        itl = &pts->pts_virtual;
                        pt->pt_ev.sigev_signo = SIGVTALRM;
                        break;
                case ITIMER_PROF:
                        itl = &pts->pts_prof;
                        pt->pt_ev.sigev_signo = SIGPROF;
                        break;
                default:
                        panic("%s: can't happen %d", __func__, which);
                }
                itimer_init(it, &ptimer_itimer_ops, which, itl);
                pt->pt_proc = p;
                pt->pt_entry = which;

                pts->pts_timers[which] = it;
        }

        TIMEVAL_TO_TIMESPEC(&itvp->it_value, &it->it_time.it_value);
        TIMEVAL_TO_TIMESPEC(&itvp->it_interval, &it->it_time.it_interval);

        error = 0;
        if (timespecisset(&it->it_time.it_value)) {
                /* Convert to absolute time */
                /* XXX need to wrap in splclock for timecounters case? */
                switch (which) {
                case ITIMER_REAL:
                        getnanotime(&now);
                        if (!timespecaddok(&it->it_time.it_value, &now)) {
                                error = EINVAL;
                                goto out;
                        }
                        timespecadd(&it->it_time.it_value, &now,
                            &it->it_time.it_value);
                        break;
                case ITIMER_MONOTONIC:
                        getnanouptime(&now);
                        if (!timespecaddok(&it->it_time.it_value, &now)) {
                                error = EINVAL;
                                goto out;
                        }
                        timespecadd(&it->it_time.it_value, &now,
                            &it->it_time.it_value);
                        break;
                default:
                        break;
                }
        }

        error = itimer_settime(it);
        if (error == ERESTART) {
                KASSERT(!CLOCK_VIRTUAL_P(it->it_clockid));
                goto restart;
        }
        KASSERT(error == 0);
out:
        itimer_unlock();
        if (spare != NULL)
                kmem_free(spare, sizeof(*spare));

        return error;
}

/*
 * ptimer_tick:
 *
 *        Called from hardclock() to decrement per-process virtual timers.
 */
void
ptimer_tick(lwp_t *l, bool user)
{
        struct ptimers *pts;
        struct itimer *it;
        proc_t *p;

        p = l->l_proc;
        if (p->p_timers == NULL)
                return;

        itimer_lock();
        if ((pts = l->l_proc->p_timers) != NULL) {
                /*
                 * Run current process's virtual and profile time, as needed.
                 */
                if (user && (it = LIST_FIRST(&pts->pts_virtual)) != NULL)
                        if (itimer_decr(it, tick * 1000))
                                (*it->it_ops->ito_fire)(it);
                if ((it = LIST_FIRST(&pts->pts_prof)) != NULL)
                        if (itimer_decr(it, tick * 1000))
                                (*it->it_ops->ito_fire)(it);
        }
        itimer_unlock();
}

/*
 * ptimer_intr:
 *
 *        Software interrupt handler for processing per-process
 *        timer expiration.
 */
static void
ptimer_intr(void *cookie)
{
        ksiginfo_t ksi;
        struct itimer *it;
        struct ptimer *pt;
        proc_t *p;

        mutex_enter(&proc_lock);
        itimer_lock();
        while ((pt = TAILQ_FIRST(&ptimer_queue)) != NULL) {
                it = &pt->pt_itimer;

                TAILQ_REMOVE(&ptimer_queue, pt, pt_chain);
                KASSERT(pt->pt_queued);
                pt->pt_queued = false;

                p = pt->pt_proc;
                if (p->p_timers == NULL) {
                        /* Process is dying. */
                        continue;
                }
                if (pt->pt_ev.sigev_notify != SIGEV_SIGNAL) {
                        continue;
                }
                if (sigismember(&p->p_sigpend.sp_set, pt->pt_ev.sigev_signo)) {
                        it->it_overruns++;
                        continue;
                }

                KSI_INIT(&ksi);
                ksi.ksi_signo = pt->pt_ev.sigev_signo;
                ksi.ksi_code = SI_TIMER;
                ksi.ksi_value = pt->pt_ev.sigev_value;
                pt->pt_poverruns = it->it_overruns;
                it->it_overruns = 0;
                itimer_unlock();
                kpsignal(p, &ksi, NULL);
                itimer_lock();
        }
        itimer_unlock();
        mutex_exit(&proc_lock);
}





























































































































































































































 2281 


































































































































    5 

    5 
    4 




    2 











    3 

    3 
    1 




    1 




































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
/*        $NetBSD: kern_clock.c,v 1.148 2022/03/19 14:34:47 riastradh Exp $        */

/*-
 * Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1982, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_clock.c        8.5 (Berkeley) 1/21/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.148 2022/03/19 14:34:47 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_dtrace.h"
#include "opt_gprof.h"
#include "opt_multiprocessor.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/sysctl.h>
#include <sys/timex.h>
#include <sys/sched.h>
#include <sys/time.h>
#include <sys/timetc.h>
#include <sys/cpu.h>
#include <sys/atomic.h>
#include <sys/rndsource.h>

#ifdef GPROF
#include <sys/gmon.h>
#endif

#ifdef KDTRACE_HOOKS
#include <sys/dtrace_bsd.h>
#include <sys/cpu.h>

cyclic_clock_func_t        cyclic_clock_func[MAXCPUS];
#endif

static int sysctl_kern_clockrate(SYSCTLFN_PROTO);

/*
 * Clock handling routines.
 *
 * This code is written to operate with two timers that run independently of
 * each other.  The main clock, running hz times per second, is used to keep
 * track of real time.  The second timer handles kernel and user profiling,
 * and does resource use estimation.  If the second timer is programmable,
 * it is randomized to avoid aliasing between the two clocks.  For example,
 * the randomization prevents an adversary from always giving up the CPU
 * just before its quantum expires.  Otherwise, it would never accumulate
 * CPU ticks.  The mean frequency of the second timer is stathz.
 *
 * If no second timer exists, stathz will be zero; in this case we drive
 * profiling and statistics off the main clock.  This WILL NOT be accurate;
 * do not do it unless absolutely necessary.
 *
 * The statistics clock may (or may not) be run at a higher rate while
 * profiling.  This profile clock runs at profhz.  We require that profhz
 * be an integral multiple of stathz.
 *
 * If the statistics clock is running fast, it must be divided by the ratio
 * profhz/stathz for statistics.  (For profiling, every tick counts.)
 */

int        stathz;
int        profhz;
int        profsrc;
int        schedhz;
int        profprocs;
static int hardclock_ticks;
static int hardscheddiv; /* hard => sched divider (used if schedhz == 0) */
static int psdiv;                        /* prof => stat divider */
int        psratio;                        /* ratio: prof / stat */

struct clockrnd {
        struct krndsource source;
        unsigned needed;
};

static struct clockrnd hardclockrnd __aligned(COHERENCY_UNIT);
static struct clockrnd statclockrnd __aligned(COHERENCY_UNIT);

static void
clockrnd_get(size_t needed, void *cookie)
{
        struct clockrnd *C = cookie;

        /* Start sampling.  */
        atomic_store_relaxed(&C->needed, 2*NBBY*needed);
}

static void
clockrnd_sample(struct clockrnd *C)
{
        struct cpu_info *ci = curcpu();

        /* If there's nothing needed right now, stop here.  */
        if (__predict_true(atomic_load_relaxed(&C->needed) == 0))
                return;

        /*
         * If we're not the primary core of a package, we're probably
         * driven by the same clock as the primary core, so don't
         * bother.
         */
        if (ci != ci->ci_package1st)
                return;

        /* Take a sample and enter it into the pool.  */
        rnd_add_uint32(&C->source, 0);

        /*
         * On the primary CPU, count down.  Using an atomic decrement
         * here isn't really necessary -- on every platform we care
         * about, stores to unsigned int are atomic, and the only other
         * memory operation that could happen here is for another CPU
         * to store a higher value for needed.  But using an atomic
         * decrement avoids giving the impression of data races, and is
         * unlikely to hurt because only one CPU will ever be writing
         * to the location.
         */
        if (CPU_IS_PRIMARY(curcpu())) {
                unsigned needed __diagused;

                needed = atomic_dec_uint_nv(&C->needed);
                KASSERT(needed != UINT_MAX);
        }
}

static u_int get_intr_timecount(struct timecounter *);

static struct timecounter intr_timecounter = {
        .tc_get_timecount        = get_intr_timecount,
        .tc_poll_pps                = NULL,
        .tc_counter_mask        = ~0u,
        .tc_frequency                = 0,
        .tc_name                = "clockinterrupt",
        /* quality - minimum implementation level for a clock */
        .tc_quality                = 0,
        .tc_priv                = NULL,
};

static u_int
get_intr_timecount(struct timecounter *tc)
{

        return (u_int)getticks();
}

int
getticks(void)
{
        return atomic_load_relaxed(&hardclock_ticks);
}

/*
 * Initialize clock frequencies and start both clocks running.
 */
void
initclocks(void)
{
        static struct sysctllog *clog;
        int i;

        /*
         * Set divisors to 1 (normal case) and let the machine-specific
         * code do its bit.
         */
        psdiv = 1;

        /*
         * Call cpu_initclocks() before registering the default
         * timecounter, in case it needs to adjust hz.
         */
        const int old_hz = hz;
        cpu_initclocks();
        if (old_hz != hz) {
                tick = 1000000 / hz;
                tickadj = (240000 / (60 * hz)) ? (240000 / (60 * hz)) : 1;
        }

        /*
         * provide minimum default time counter
         * will only run at interrupt resolution
         */
        intr_timecounter.tc_frequency = hz;
        tc_init(&intr_timecounter);

        /*
         * Compute profhz and stathz, fix profhz if needed.
         */
        i = stathz ? stathz : hz;
        if (profhz == 0)
                profhz = i;
        psratio = profhz / i;
        if (schedhz == 0) {
                /* 16Hz is best */
                hardscheddiv = hz / 16;
                if (hardscheddiv <= 0)
                        panic("hardscheddiv");
        }

        sysctl_createv(&clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "clockrate",
                       SYSCTL_DESCR("Kernel clock rates"),
                       sysctl_kern_clockrate, 0, NULL,
                       sizeof(struct clockinfo),
                       CTL_KERN, KERN_CLOCKRATE, CTL_EOL);
        sysctl_createv(&clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "hardclock_ticks",
                       SYSCTL_DESCR("Number of hardclock ticks"),
                       NULL, 0, &hardclock_ticks, sizeof(hardclock_ticks),
                       CTL_KERN, KERN_HARDCLOCK_TICKS, CTL_EOL);

        rndsource_setcb(&hardclockrnd.source, clockrnd_get, &hardclockrnd);
        rnd_attach_source(&hardclockrnd.source, "hardclock", RND_TYPE_SKEW,
            RND_FLAG_COLLECT_TIME|RND_FLAG_HASCB);
        if (stathz) {
                rndsource_setcb(&statclockrnd.source, clockrnd_get,
                    &statclockrnd);
                rnd_attach_source(&statclockrnd.source, "statclock",
                    RND_TYPE_SKEW, RND_FLAG_COLLECT_TIME|RND_FLAG_HASCB);
        }
}

/*
 * The real-time timer, interrupting hz times per second.
 */
void
hardclock(struct clockframe *frame)
{
        struct lwp *l;
        struct cpu_info *ci;

        clockrnd_sample(&hardclockrnd);

        ci = curcpu();
        l = ci->ci_onproc;

        ptimer_tick(l, CLKF_USERMODE(frame));

        /*
         * If no separate statistics clock is available, run it from here.
         */
        if (stathz == 0)
                statclock(frame);
        /*
         * If no separate schedclock is provided, call it here
         * at about 16 Hz.
         */
        if (schedhz == 0) {
                if ((int)(--ci->ci_schedstate.spc_schedticks) <= 0) {
                        schedclock(l);
                        ci->ci_schedstate.spc_schedticks = hardscheddiv;
                }
        }
        if ((--ci->ci_schedstate.spc_ticks) <= 0)
                sched_tick(ci);

        if (CPU_IS_PRIMARY(ci)) {
                atomic_store_relaxed(&hardclock_ticks,
                    atomic_load_relaxed(&hardclock_ticks) + 1);
                tc_ticktock();
        }

        /*
         * Update real-time timeout queue.
         */
        callout_hardclock();
}

/*
 * Start profiling on a process.
 *
 * Kernel profiling passes proc0 which never exits and hence
 * keeps the profile clock running constantly.
 */
void
startprofclock(struct proc *p)
{

        KASSERT(mutex_owned(&p->p_stmutex));

        if ((p->p_stflag & PST_PROFIL) == 0) {
                p->p_stflag |= PST_PROFIL;
                /*
                 * This is only necessary if using the clock as the
                 * profiling source.
                 */
                if (++profprocs == 1 && stathz != 0)
                        psdiv = psratio;
        }
}

/*
 * Stop profiling on a process.
 */
void
stopprofclock(struct proc *p)
{

        KASSERT(mutex_owned(&p->p_stmutex));

        if (p->p_stflag & PST_PROFIL) {
                p->p_stflag &= ~PST_PROFIL;
                /*
                 * This is only necessary if using the clock as the
                 * profiling source.
                 */
                if (--profprocs == 0 && stathz != 0)
                        psdiv = 1;
        }
}

void
schedclock(struct lwp *l)
{
        if ((l->l_flag & LW_IDLE) != 0)
                return;

        sched_schedclock(l);
}

/*
 * Statistics clock.  Grab profile sample, and if divider reaches 0,
 * do process and kernel statistics.
 */
void
statclock(struct clockframe *frame)
{
#ifdef GPROF
        struct gmonparam *g;
        intptr_t i;
#endif
        struct cpu_info *ci = curcpu();
        struct schedstate_percpu *spc = &ci->ci_schedstate;
        struct proc *p;
        struct lwp *l;

        if (stathz)
                clockrnd_sample(&statclockrnd);

        /*
         * Notice changes in divisor frequency, and adjust clock
         * frequency accordingly.
         */
        if (spc->spc_psdiv != psdiv) {
                spc->spc_psdiv = psdiv;
                spc->spc_pscnt = psdiv;
                if (psdiv == 1) {
                        setstatclockrate(stathz);
                } else {
                        setstatclockrate(profhz);
                }
        }
        l = ci->ci_onproc;
        if ((l->l_flag & LW_IDLE) != 0) {
                /*
                 * don't account idle lwps as swapper.
                 */
                p = NULL;
        } else {
                p = l->l_proc;
                mutex_spin_enter(&p->p_stmutex);
        }

        if (CLKF_USERMODE(frame)) {
                KASSERT(p != NULL);
                if ((p->p_stflag & PST_PROFIL) && profsrc == PROFSRC_CLOCK)
                        addupc_intr(l, CLKF_PC(frame));
                if (--spc->spc_pscnt > 0) {
                        mutex_spin_exit(&p->p_stmutex);
                        return;
                }

                /*
                 * Came from user mode; CPU was in user state.
                 * If this process is being profiled record the tick.
                 */
                p->p_uticks++;
                if (p->p_nice > NZERO)
                        spc->spc_cp_time[CP_NICE]++;
                else
                        spc->spc_cp_time[CP_USER]++;
        } else {
#ifdef GPROF
                /*
                 * Kernel statistics are just like addupc_intr, only easier.
                 */
#if defined(MULTIPROCESSOR) && !defined(_RUMPKERNEL)
                g = curcpu()->ci_gmon;
                if (g != NULL &&
                    profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) {
#else
                g = &_gmonparam;
                if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) {
#endif
                        i = CLKF_PC(frame) - g->lowpc;
                        if (i < g->textsize) {
                                i /= HISTFRACTION * sizeof(*g->kcount);
                                g->kcount[i]++;
                        }
                }
#endif
#ifdef LWP_PC
                if (p != NULL && profsrc == PROFSRC_CLOCK &&
                    (p->p_stflag & PST_PROFIL)) {
                        addupc_intr(l, LWP_PC(l));
                }
#endif
                if (--spc->spc_pscnt > 0) {
                        if (p != NULL)
                                mutex_spin_exit(&p->p_stmutex);
                        return;
                }
                /*
                 * Came from kernel mode, so we were:
                 * - handling an interrupt,
                 * - doing syscall or trap work on behalf of the current
                 *   user process, or
                 * - spinning in the idle loop.
                 * Whichever it is, charge the time as appropriate.
                 * Note that we charge interrupts to the current process,
                 * regardless of whether they are ``for'' that process,
                 * so that we know how much of its real time was spent
                 * in ``non-process'' (i.e., interrupt) work.
                 */
                if (CLKF_INTR(frame) || (curlwp->l_pflag & LP_INTR) != 0) {
                        if (p != NULL) {
                                p->p_iticks++;
                        }
                        spc->spc_cp_time[CP_INTR]++;
                } else if (p != NULL) {
                        p->p_sticks++;
                        spc->spc_cp_time[CP_SYS]++;
                } else {
                        spc->spc_cp_time[CP_IDLE]++;
                }
        }
        spc->spc_pscnt = psdiv;

        if (p != NULL) {
                atomic_inc_uint(&l->l_cpticks);
                mutex_spin_exit(&p->p_stmutex);
        }

#ifdef KDTRACE_HOOKS
        cyclic_clock_func_t func = cyclic_clock_func[cpu_index(ci)];
        if (func) {
                (*func)((struct clockframe *)frame);
        }
#endif
}

/*
 * sysctl helper routine for kern.clockrate. Assembles a struct on
 * the fly to be returned to the caller.
 */
static int
sysctl_kern_clockrate(SYSCTLFN_ARGS)
{
        struct clockinfo clkinfo;
        struct sysctlnode node;

        clkinfo.tick = tick;
        clkinfo.tickadj = tickadj;
        clkinfo.hz = hz;
        clkinfo.profhz = profhz;
        clkinfo.stathz = stathz ? stathz : hz;

        node = *rnode;
        node.sysctl_data = &clkinfo;
        return (sysctl_lookup(SYSCTLFN_CALL(&node)));
}























































































































































































































    4 






    4 





















   47 

   47 



   47 









   46 


   47 






   47 







   46 
   47 






   47 


















   80 

   81 
   46 

















   80 
   81 


   79 






   82 

   82 







   27 



   27 

















   27 


   27 



















   80 


   82 
   82 
   81 

   81 















   80 


   80 

   80 
   29 


   80 
   81 

   78 











   82 












   27 











   27 
   27 

   27 







   80 

   79 
   81 

   43 

   81 








   26 






   80 






   63 






   81 

   82 







   82 
   82 











   29 


































   29 






   29 
































































































































   63 
   62 
   63 



   62 



   63 





   63 












   63 









































   63 



   63 
    1 


   62 




   63 

   63 





   63 


















































































   81 
   81 






   80 








   79 


   81 



   80 



   81 
   81 
   81 
   81 
   81 
   80 
   81 
   81 














































































































































   81 









  563 



  563 

  563 
  564 
  564 
  508 



  564 

  543 




  543 





   80 


  564 




  564 




















   79 
   81 
   79 
   78 

   79 
   81 
   81 
   81 
   80 
   81 
   81 
   80 


   80 
   79 





   81 





   81 









   81 

   81 
   81 










   63 
   63 

   63 

























   80 
   81 
   81 
   81 














   63 









   63 















   80 

   80 
   81 













   81 

   79 









   64 





   81 
   80 

   79 
   80 

   80 





   80 










  121 
  121 




  120 
  106 







   27 







   27 
   27 



   27 





   27 



   27 































   27 

   26 
   27 




   27 
   25 
   25 




   27 
   26 
    9 
    9 








   27 
   27 
   18 



   19 


   19 









   27 





















































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
/*        $NetBSD: subr_vmem.c,v 1.108 2022/05/31 08:43:16 andvar Exp $        */

/*-
 * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * reference:
 * -        Magazines and Vmem: Extending the Slab Allocator
 *        to Many CPUs and Arbitrary Resources
 *        http://www.usenix.org/event/usenix01/bonwick.html
 *
 * locking & the boundary tag pool:
 * -         A pool(9) is used for vmem boundary tags
 * -         During a pool get call the global vmem_btag_refill_lock is taken,
 *        to serialize access to the allocation reserve, but no other
 *        vmem arena locks.
 * -        During pool_put calls no vmem mutexes are locked.
 * -         pool_drain doesn't hold the pool's mutex while releasing memory to
 *         its backing therefore no interference with any vmem mutexes.
 * -        The boundary tag pool is forced to put page headers into pool pages
 *          (PR_PHINPAGE) and not off page to avoid pool recursion.
 *          (due to sizeof(bt_t) it should be the case anyway)
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.108 2022/05/31 08:43:16 andvar Exp $");

#if defined(_KERNEL) && defined(_KERNEL_OPT)
#include "opt_ddb.h"
#endif /* defined(_KERNEL) && defined(_KERNEL_OPT) */

#include <sys/param.h>
#include <sys/hash.h>
#include <sys/queue.h>
#include <sys/bitops.h>

#if defined(_KERNEL)
#include <sys/systm.h>
#include <sys/kernel.h>        /* hz */
#include <sys/callout.h>
#include <sys/kmem.h>
#include <sys/pool.h>
#include <sys/vmem.h>
#include <sys/vmem_impl.h>
#include <sys/workqueue.h>
#include <sys/atomic.h>
#include <uvm/uvm.h>
#include <uvm/uvm_extern.h>
#include <uvm/uvm_km.h>
#include <uvm/uvm_page.h>
#include <uvm/uvm_pdaemon.h>
#else /* defined(_KERNEL) */
#include <stdio.h>
#include <errno.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "../sys/vmem.h"
#include "../sys/vmem_impl.h"
#endif /* defined(_KERNEL) */


#if defined(_KERNEL)
#include <sys/evcnt.h>
#define VMEM_EVCNT_DEFINE(name) \
struct evcnt vmem_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \
    "vmem", #name); \
EVCNT_ATTACH_STATIC(vmem_evcnt_##name);
#define VMEM_EVCNT_INCR(ev)        vmem_evcnt_##ev.ev_count++
#define VMEM_EVCNT_DECR(ev)        vmem_evcnt_##ev.ev_count--

VMEM_EVCNT_DEFINE(static_bt_count)
VMEM_EVCNT_DEFINE(static_bt_inuse)

#define        VMEM_CONDVAR_INIT(vm, wchan)        cv_init(&vm->vm_cv, wchan)
#define        VMEM_CONDVAR_DESTROY(vm)        cv_destroy(&vm->vm_cv)
#define        VMEM_CONDVAR_WAIT(vm)                cv_wait(&vm->vm_cv, &vm->vm_lock)
#define        VMEM_CONDVAR_BROADCAST(vm)        cv_broadcast(&vm->vm_cv)

#else /* defined(_KERNEL) */

#define VMEM_EVCNT_INCR(ev)        /* nothing */
#define VMEM_EVCNT_DECR(ev)        /* nothing */

#define        VMEM_CONDVAR_INIT(vm, wchan)        /* nothing */
#define        VMEM_CONDVAR_DESTROY(vm)        /* nothing */
#define        VMEM_CONDVAR_WAIT(vm)                /* nothing */
#define        VMEM_CONDVAR_BROADCAST(vm)        /* nothing */

#define        UNITTEST
#define        KASSERT(a)                assert(a)
#define        mutex_init(a, b, c)        /* nothing */
#define        mutex_destroy(a)        /* nothing */
#define        mutex_enter(a)                /* nothing */
#define        mutex_tryenter(a)        true
#define        mutex_exit(a)                /* nothing */
#define        mutex_owned(a)                /* nothing */
#define        ASSERT_SLEEPABLE()        /* nothing */
#define        panic(...)                printf(__VA_ARGS__); abort()
#endif /* defined(_KERNEL) */

#if defined(VMEM_SANITY)
static void vmem_check(vmem_t *);
#else /* defined(VMEM_SANITY) */
#define vmem_check(vm)        /* nothing */
#endif /* defined(VMEM_SANITY) */

#define        VMEM_HASHSIZE_MIN        1        /* XXX */
#define        VMEM_HASHSIZE_MAX        65536        /* XXX */
#define        VMEM_HASHSIZE_INIT        1

#define        VM_FITMASK        (VM_BESTFIT | VM_INSTANTFIT)

#if defined(_KERNEL)
static bool vmem_bootstrapped = false;
static kmutex_t vmem_list_lock;
static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list);
#endif /* defined(_KERNEL) */

/* ---- misc */

#define        VMEM_LOCK(vm)                mutex_enter(&vm->vm_lock)
#define        VMEM_TRYLOCK(vm)        mutex_tryenter(&vm->vm_lock)
#define        VMEM_UNLOCK(vm)                mutex_exit(&vm->vm_lock)
#define        VMEM_LOCK_INIT(vm, ipl)        mutex_init(&vm->vm_lock, MUTEX_DEFAULT, ipl)
#define        VMEM_LOCK_DESTROY(vm)        mutex_destroy(&vm->vm_lock)
#define        VMEM_ASSERT_LOCKED(vm)        KASSERT(mutex_owned(&vm->vm_lock))

#define        VMEM_ALIGNUP(addr, align) \
        (-(-(addr) & -(align)))

#define        VMEM_CROSS_P(addr1, addr2, boundary) \
        ((((addr1) ^ (addr2)) & -(boundary)) != 0)

#define        ORDER2SIZE(order)        ((vmem_size_t)1 << (order))
#define        SIZE2ORDER(size)        ((int)ilog2(size))

#if !defined(_KERNEL)
#define        xmalloc(sz, flags)        malloc(sz)
#define        xfree(p, sz)                free(p)
#define        bt_alloc(vm, flags)        malloc(sizeof(bt_t))
#define        bt_free(vm, bt)                free(bt)
#else /* defined(_KERNEL) */

#define        xmalloc(sz, flags) \
    kmem_alloc(sz, ((flags) & VM_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
#define        xfree(p, sz)                kmem_free(p, sz);

/*
 * BT_RESERVE calculation:
 * we allocate memory for boundary tags with vmem; therefore we have
 * to keep a reserve of bts used to allocated memory for bts.
 * This reserve is 4 for each arena involved in allocating vmems memory.
 * BT_MAXFREE: don't cache excessive counts of bts in arenas
 */
#define STATIC_BT_COUNT 200
#define BT_MINRESERVE 4
#define BT_MAXFREE 64

static struct vmem_btag static_bts[STATIC_BT_COUNT];
static int static_bt_count = STATIC_BT_COUNT;

static struct vmem kmem_va_meta_arena_store;
vmem_t *kmem_va_meta_arena;
static struct vmem kmem_meta_arena_store;
vmem_t *kmem_meta_arena = NULL;

static kmutex_t vmem_btag_refill_lock;
static kmutex_t vmem_btag_lock;
static LIST_HEAD(, vmem_btag) vmem_btag_freelist;
static size_t vmem_btag_freelist_count = 0;
static struct pool vmem_btag_pool;

static void vmem_xfree_bt(vmem_t *, bt_t *);

static void
vmem_kick_pdaemon(void)
{
#if defined(_KERNEL)
        uvm_kick_pdaemon();
#endif
}

/* ---- boundary tag */

static int bt_refill(vmem_t *vm);
static int bt_refill_locked(vmem_t *vm);

static void *
pool_page_alloc_vmem_meta(struct pool *pp, int flags)
{
        const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP;
        vmem_addr_t va;
        int ret;

        ret = vmem_alloc(kmem_meta_arena, pp->pr_alloc->pa_pagesz,
            (vflags & ~VM_FITMASK) | VM_INSTANTFIT | VM_POPULATING, &va);

        return ret ? NULL : (void *)va;
}

static void
pool_page_free_vmem_meta(struct pool *pp, void *v)
{

        vmem_free(kmem_meta_arena, (vmem_addr_t)v, pp->pr_alloc->pa_pagesz);
}

/* allocator for vmem-pool metadata */
struct pool_allocator pool_allocator_vmem_meta = {
        .pa_alloc = pool_page_alloc_vmem_meta,
        .pa_free = pool_page_free_vmem_meta,
        .pa_pagesz = 0
};

static int
bt_refill_locked(vmem_t *vm)
{
        bt_t *bt;

        VMEM_ASSERT_LOCKED(vm);

        if (vm->vm_nfreetags > BT_MINRESERVE) {
                return 0;
        }

        mutex_enter(&vmem_btag_lock);
        while (!LIST_EMPTY(&vmem_btag_freelist) &&
            vm->vm_nfreetags <= BT_MINRESERVE) {
                bt = LIST_FIRST(&vmem_btag_freelist);
                LIST_REMOVE(bt, bt_freelist);
                LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist);
                vm->vm_nfreetags++;
                vmem_btag_freelist_count--;
                VMEM_EVCNT_INCR(static_bt_inuse);
        }
        mutex_exit(&vmem_btag_lock);

        while (vm->vm_nfreetags <= BT_MINRESERVE) {
                VMEM_UNLOCK(vm);
                mutex_enter(&vmem_btag_refill_lock);
                bt = pool_get(&vmem_btag_pool, PR_NOWAIT);
                mutex_exit(&vmem_btag_refill_lock);
                VMEM_LOCK(vm);
                if (bt == NULL)
                        break;
                LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist);
                vm->vm_nfreetags++;
        }

        if (vm->vm_nfreetags <= BT_MINRESERVE) {
                return ENOMEM;
        }

        if (kmem_meta_arena != NULL) {
                VMEM_UNLOCK(vm);
                (void)bt_refill(kmem_arena);
                (void)bt_refill(kmem_va_meta_arena);
                (void)bt_refill(kmem_meta_arena);
                VMEM_LOCK(vm);
        }

        return 0;
}

static int
bt_refill(vmem_t *vm)
{
        int rv;

        VMEM_LOCK(vm);
        rv = bt_refill_locked(vm);
        VMEM_UNLOCK(vm);
        return rv;
}

static bt_t *
bt_alloc(vmem_t *vm, vm_flag_t flags)
{
        bt_t *bt;

        VMEM_ASSERT_LOCKED(vm);

        while (vm->vm_nfreetags <= BT_MINRESERVE && (flags & VM_POPULATING) == 0) {
                if (bt_refill_locked(vm)) {
                        if ((flags & VM_NOSLEEP) != 0) {
                                return NULL;
                        }

                        /*
                         * It would be nice to wait for something specific here
                         * but there are multiple ways that a retry could
                         * succeed and we can't wait for multiple things
                         * simultaneously.  So we'll just sleep for an arbitrary
                         * short period of time and retry regardless.
                         * This should be a very rare case.
                         */

                        vmem_kick_pdaemon();
                        kpause("btalloc", false, 1, &vm->vm_lock);
                }
        }
        bt = LIST_FIRST(&vm->vm_freetags);
        LIST_REMOVE(bt, bt_freelist);
        vm->vm_nfreetags--;

        return bt;
}

static void
bt_free(vmem_t *vm, bt_t *bt)
{

        VMEM_ASSERT_LOCKED(vm);

        LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist);
        vm->vm_nfreetags++;
}

static void
bt_freetrim(vmem_t *vm, int freelimit)
{
        bt_t *t;
        LIST_HEAD(, vmem_btag) tofree;

        VMEM_ASSERT_LOCKED(vm);

        LIST_INIT(&tofree);

        while (vm->vm_nfreetags > freelimit) {
                bt_t *bt = LIST_FIRST(&vm->vm_freetags);
                LIST_REMOVE(bt, bt_freelist);
                vm->vm_nfreetags--;
                if (bt >= static_bts
                    && bt < &static_bts[STATIC_BT_COUNT]) {
                        mutex_enter(&vmem_btag_lock);
                        LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist);
                        vmem_btag_freelist_count++;
                        mutex_exit(&vmem_btag_lock);
                        VMEM_EVCNT_DECR(static_bt_inuse);
                } else {
                        LIST_INSERT_HEAD(&tofree, bt, bt_freelist);
                }
        }

        VMEM_UNLOCK(vm);
        while (!LIST_EMPTY(&tofree)) {
                t = LIST_FIRST(&tofree);
                LIST_REMOVE(t, bt_freelist);
                pool_put(&vmem_btag_pool, t);
        }
}
#endif        /* defined(_KERNEL) */

/*
 * freelist[0] ... [1, 1]
 * freelist[1] ... [2, 3]
 * freelist[2] ... [4, 7]
 * freelist[3] ... [8, 15]
 *  :
 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1]
 *  :
 */

static struct vmem_freelist *
bt_freehead_tofree(vmem_t *vm, vmem_size_t size)
{
        const vmem_size_t qsize = size >> vm->vm_quantum_shift;
        const int idx = SIZE2ORDER(qsize);

        KASSERT(size != 0 && qsize != 0);
        KASSERT((size & vm->vm_quantum_mask) == 0);
        KASSERT(idx >= 0);
        KASSERT(idx < VMEM_MAXORDER);

        return &vm->vm_freelist[idx];
}

/*
 * bt_freehead_toalloc: return the freelist for the given size and allocation
 * strategy.
 *
 * for VM_INSTANTFIT, return the list in which any blocks are large enough
 * for the requested size.  otherwise, return the list which can have blocks
 * large enough for the requested size.
 */

static struct vmem_freelist *
bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat)
{
        const vmem_size_t qsize = size >> vm->vm_quantum_shift;
        int idx = SIZE2ORDER(qsize);

        KASSERT(size != 0 && qsize != 0);
        KASSERT((size & vm->vm_quantum_mask) == 0);

        if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) {
                idx++;
                /* check too large request? */
        }
        KASSERT(idx >= 0);
        KASSERT(idx < VMEM_MAXORDER);

        return &vm->vm_freelist[idx];
}

/* ---- boundary tag hash */

static struct vmem_hashlist *
bt_hashhead(vmem_t *vm, vmem_addr_t addr)
{
        struct vmem_hashlist *list;
        unsigned int hash;

        hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT);
        list = &vm->vm_hashlist[hash & vm->vm_hashmask];

        return list;
}

static bt_t *
bt_lookupbusy(vmem_t *vm, vmem_addr_t addr)
{
        struct vmem_hashlist *list;
        bt_t *bt;

        list = bt_hashhead(vm, addr);
        LIST_FOREACH(bt, list, bt_hashlist) {
                if (bt->bt_start == addr) {
                        break;
                }
        }

        return bt;
}

static void
bt_rembusy(vmem_t *vm, bt_t *bt)
{

        KASSERT(vm->vm_nbusytag > 0);
        vm->vm_inuse -= bt->bt_size;
        vm->vm_nbusytag--;
        LIST_REMOVE(bt, bt_hashlist);
}

static void
bt_insbusy(vmem_t *vm, bt_t *bt)
{
        struct vmem_hashlist *list;

        KASSERT(bt->bt_type == BT_TYPE_BUSY);

        list = bt_hashhead(vm, bt->bt_start);
        LIST_INSERT_HEAD(list, bt, bt_hashlist);
        if (++vm->vm_nbusytag > vm->vm_maxbusytag) {
                vm->vm_maxbusytag = vm->vm_nbusytag;
        }
        vm->vm_inuse += bt->bt_size;
}

/* ---- boundary tag list */

static void
bt_remseg(vmem_t *vm, bt_t *bt)
{

        TAILQ_REMOVE(&vm->vm_seglist, bt, bt_seglist);
}

static void
bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev)
{

        TAILQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist);
}

static void
bt_insseg_tail(vmem_t *vm, bt_t *bt)
{

        TAILQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist);
}

static void
bt_remfree(vmem_t *vm, bt_t *bt)
{

        KASSERT(bt->bt_type == BT_TYPE_FREE);

        LIST_REMOVE(bt, bt_freelist);
}

static void
bt_insfree(vmem_t *vm, bt_t *bt)
{
        struct vmem_freelist *list;

        list = bt_freehead_tofree(vm, bt->bt_size);
        LIST_INSERT_HEAD(list, bt, bt_freelist);
}

/* ---- vmem internal functions */

#if defined(QCACHE)
static inline vm_flag_t
prf_to_vmf(int prflags)
{
        vm_flag_t vmflags;

        KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0);
        if ((prflags & PR_WAITOK) != 0) {
                vmflags = VM_SLEEP;
        } else {
                vmflags = VM_NOSLEEP;
        }
        return vmflags;
}

static inline int
vmf_to_prf(vm_flag_t vmflags)
{
        int prflags;

        if ((vmflags & VM_SLEEP) != 0) {
                prflags = PR_WAITOK;
        } else {
                prflags = PR_NOWAIT;
        }
        return prflags;
}

static size_t
qc_poolpage_size(size_t qcache_max)
{
        int i;

        for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) {
                /* nothing */
        }
        return ORDER2SIZE(i);
}

static void *
qc_poolpage_alloc(struct pool *pool, int prflags)
{
        qcache_t *qc = QC_POOL_TO_QCACHE(pool);
        vmem_t *vm = qc->qc_vmem;
        vmem_addr_t addr;

        if (vmem_alloc(vm, pool->pr_alloc->pa_pagesz,
            prf_to_vmf(prflags) | VM_INSTANTFIT, &addr) != 0)
                return NULL;
        return (void *)addr;
}

static void
qc_poolpage_free(struct pool *pool, void *addr)
{
        qcache_t *qc = QC_POOL_TO_QCACHE(pool);
        vmem_t *vm = qc->qc_vmem;

        vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz);
}

static void
qc_init(vmem_t *vm, size_t qcache_max, int ipl)
{
        qcache_t *prevqc;
        struct pool_allocator *pa;
        int qcache_idx_max;
        int i;

        KASSERT((qcache_max & vm->vm_quantum_mask) == 0);
        if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) {
                qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift;
        }
        vm->vm_qcache_max = qcache_max;
        pa = &vm->vm_qcache_allocator;
        memset(pa, 0, sizeof(*pa));
        pa->pa_alloc = qc_poolpage_alloc;
        pa->pa_free = qc_poolpage_free;
        pa->pa_pagesz = qc_poolpage_size(qcache_max);

        qcache_idx_max = qcache_max >> vm->vm_quantum_shift;
        prevqc = NULL;
        for (i = qcache_idx_max; i > 0; i--) {
                qcache_t *qc = &vm->vm_qcache_store[i - 1];
                size_t size = i << vm->vm_quantum_shift;
                pool_cache_t pc;

                qc->qc_vmem = vm;
                snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu",
                    vm->vm_name, size);

                pc = pool_cache_init(size,
                    ORDER2SIZE(vm->vm_quantum_shift), 0,
                    PR_NOALIGN | PR_NOTOUCH | PR_RECURSIVE /* XXX */,
                    qc->qc_name, pa, ipl, NULL, NULL, NULL);

                KASSERT(pc);

                qc->qc_cache = pc;
                KASSERT(qc->qc_cache != NULL);        /* XXX */
                if (prevqc != NULL &&
                    qc->qc_cache->pc_pool.pr_itemsperpage ==
                    prevqc->qc_cache->pc_pool.pr_itemsperpage) {
                        pool_cache_destroy(qc->qc_cache);
                        vm->vm_qcache[i - 1] = prevqc;
                        continue;
                }
                qc->qc_cache->pc_pool.pr_qcache = qc;
                vm->vm_qcache[i - 1] = qc;
                prevqc = qc;
        }
}

static void
qc_destroy(vmem_t *vm)
{
        const qcache_t *prevqc;
        int i;
        int qcache_idx_max;

        qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift;
        prevqc = NULL;
        for (i = 0; i < qcache_idx_max; i++) {
                qcache_t *qc = vm->vm_qcache[i];

                if (prevqc == qc) {
                        continue;
                }
                pool_cache_destroy(qc->qc_cache);
                prevqc = qc;
        }
}
#endif

#if defined(_KERNEL)
static void
vmem_bootstrap(void)
{

        mutex_init(&vmem_list_lock, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&vmem_btag_lock, MUTEX_DEFAULT, IPL_VM);
        mutex_init(&vmem_btag_refill_lock, MUTEX_DEFAULT, IPL_VM);

        while (static_bt_count-- > 0) {
                bt_t *bt = &static_bts[static_bt_count];
                LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist);
                VMEM_EVCNT_INCR(static_bt_count);
                vmem_btag_freelist_count++;
        }
        vmem_bootstrapped = TRUE;
}

void
vmem_subsystem_init(vmem_t *vm)
{

        kmem_va_meta_arena = vmem_init(&kmem_va_meta_arena_store, "vmem-va",
            0, 0, PAGE_SIZE, vmem_alloc, vmem_free, vm,
            0, VM_NOSLEEP | VM_BOOTSTRAP | VM_LARGEIMPORT,
            IPL_VM);

        kmem_meta_arena = vmem_init(&kmem_meta_arena_store, "vmem-meta",
            0, 0, PAGE_SIZE,
            uvm_km_kmem_alloc, uvm_km_kmem_free, kmem_va_meta_arena,
            0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM);

        pool_init(&vmem_btag_pool, sizeof(bt_t), coherency_unit, 0,
            PR_PHINPAGE, "vmembt", &pool_allocator_vmem_meta, IPL_VM);
}
#endif /* defined(_KERNEL) */

static int
vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags,
    int spanbttype)
{
        bt_t *btspan;
        bt_t *btfree;

        VMEM_ASSERT_LOCKED(vm);
        KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
        KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
        KASSERT(spanbttype == BT_TYPE_SPAN ||
            spanbttype == BT_TYPE_SPAN_STATIC);

        btspan = bt_alloc(vm, flags);
        if (btspan == NULL) {
                return ENOMEM;
        }
        btfree = bt_alloc(vm, flags);
        if (btfree == NULL) {
                bt_free(vm, btspan);
                return ENOMEM;
        }

        btspan->bt_type = spanbttype;
        btspan->bt_start = addr;
        btspan->bt_size = size;

        btfree->bt_type = BT_TYPE_FREE;
        btfree->bt_start = addr;
        btfree->bt_size = size;

        bt_insseg_tail(vm, btspan);
        bt_insseg(vm, btfree, btspan);
        bt_insfree(vm, btfree);
        vm->vm_size += size;

        return 0;
}

static void
vmem_destroy1(vmem_t *vm)
{

#if defined(QCACHE)
        qc_destroy(vm);
#endif /* defined(QCACHE) */
        VMEM_LOCK(vm);

        for (int i = 0; i < vm->vm_hashsize; i++) {
                bt_t *bt;

                while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) {
                        KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC);
                        LIST_REMOVE(bt, bt_hashlist);
                        bt_free(vm, bt);
                }
        }

        /* bt_freetrim() drops the lock. */
        bt_freetrim(vm, 0);
        if (vm->vm_hashlist != &vm->vm_hash0) {
                xfree(vm->vm_hashlist,
                    sizeof(struct vmem_hashlist) * vm->vm_hashsize);
        }

        VMEM_CONDVAR_DESTROY(vm);
        VMEM_LOCK_DESTROY(vm);
        xfree(vm, sizeof(*vm));
}

static int
vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags)
{
        vmem_addr_t addr;
        int rc;

        VMEM_ASSERT_LOCKED(vm);

        if (vm->vm_importfn == NULL) {
                return EINVAL;
        }

        if (vm->vm_flags & VM_LARGEIMPORT) {
                size *= 16;
        }

        VMEM_UNLOCK(vm);
        if (vm->vm_flags & VM_XIMPORT) {
                rc = __FPTRCAST(vmem_ximport_t *, vm->vm_importfn)(vm->vm_arg,
                    size, &size, flags, &addr);
        } else {
                rc = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr);
        }
        VMEM_LOCK(vm);

        if (rc) {
                return ENOMEM;
        }

        if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) != 0) {
                VMEM_UNLOCK(vm);
                (*vm->vm_releasefn)(vm->vm_arg, addr, size);
                VMEM_LOCK(vm);
                return ENOMEM;
        }

        return 0;
}

static int
vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags)
{
        bt_t *bt;
        int i;
        struct vmem_hashlist *newhashlist;
        struct vmem_hashlist *oldhashlist;
        size_t oldhashsize;

        KASSERT(newhashsize > 0);

        /* Round hash size up to a power of 2. */
        newhashsize = 1 << (ilog2(newhashsize) + 1);

        newhashlist =
            xmalloc(sizeof(struct vmem_hashlist) * newhashsize, flags);
        if (newhashlist == NULL) {
                return ENOMEM;
        }
        for (i = 0; i < newhashsize; i++) {
                LIST_INIT(&newhashlist[i]);
        }

        VMEM_LOCK(vm);
        /* Decay back to a small hash slowly. */
        if (vm->vm_maxbusytag >= 2) {
                vm->vm_maxbusytag = vm->vm_maxbusytag / 2 - 1;
                if (vm->vm_nbusytag > vm->vm_maxbusytag) {
                        vm->vm_maxbusytag = vm->vm_nbusytag;
                }
        } else {
                vm->vm_maxbusytag = vm->vm_nbusytag;
        }
        oldhashlist = vm->vm_hashlist;
        oldhashsize = vm->vm_hashsize;
        vm->vm_hashlist = newhashlist;
        vm->vm_hashsize = newhashsize;
        vm->vm_hashmask = newhashsize - 1;
        if (oldhashlist == NULL) {
                VMEM_UNLOCK(vm);
                return 0;
        }
        for (i = 0; i < oldhashsize; i++) {
                while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) {
                        bt_rembusy(vm, bt); /* XXX */
                        bt_insbusy(vm, bt);
                }
        }
        VMEM_UNLOCK(vm);

        if (oldhashlist != &vm->vm_hash0) {
                xfree(oldhashlist,
                    sizeof(struct vmem_hashlist) * oldhashsize);
        }

        return 0;
}

/*
 * vmem_fit: check if a bt can satisfy the given restrictions.
 *
 * it's a caller's responsibility to ensure the region is big enough
 * before calling us.
 */

static int
vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align,
    vmem_size_t phase, vmem_size_t nocross,
    vmem_addr_t minaddr, vmem_addr_t maxaddr, vmem_addr_t *addrp)
{
        vmem_addr_t start;
        vmem_addr_t end;

        KASSERT(size > 0);
        KASSERT(bt->bt_size >= size); /* caller's responsibility */

        /*
         * XXX assumption: vmem_addr_t and vmem_size_t are
         * unsigned integer of the same size.
         */

        start = bt->bt_start;
        if (start < minaddr) {
                start = minaddr;
        }
        end = BT_END(bt);
        if (end > maxaddr) {
                end = maxaddr;
        }
        if (start > end) {
                return ENOMEM;
        }

        start = VMEM_ALIGNUP(start - phase, align) + phase;
        if (start < bt->bt_start) {
                start += align;
        }
        if (VMEM_CROSS_P(start, start + size - 1, nocross)) {
                KASSERT(align < nocross);
                start = VMEM_ALIGNUP(start - phase, nocross) + phase;
        }
        if (start <= end && end - start >= size - 1) {
                KASSERT((start & (align - 1)) == phase);
                KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross));
                KASSERT(minaddr <= start);
                KASSERT(maxaddr == 0 || start + size - 1 <= maxaddr);
                KASSERT(bt->bt_start <= start);
                KASSERT(BT_END(bt) - start >= size - 1);
                *addrp = start;
                return 0;
        }
        return ENOMEM;
}

/* ---- vmem API */

/*
 * vmem_init: creates a vmem arena.
 */

vmem_t *
vmem_init(vmem_t *vm, const char *name,
    vmem_addr_t base, vmem_size_t size, vmem_size_t quantum,
    vmem_import_t *importfn, vmem_release_t *releasefn,
    vmem_t *arg, vmem_size_t qcache_max, vm_flag_t flags, int ipl)
{
        int i;

        KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
        KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
        KASSERT(quantum > 0);

#if defined(_KERNEL)
        /* XXX: SMP, we get called early... */
        if (!vmem_bootstrapped) {
                vmem_bootstrap();
        }
#endif /* defined(_KERNEL) */

        if (vm == NULL) {
                vm = xmalloc(sizeof(*vm), flags);
        }
        if (vm == NULL) {
                return NULL;
        }

        VMEM_CONDVAR_INIT(vm, "vmem");
        VMEM_LOCK_INIT(vm, ipl);
        vm->vm_flags = flags;
        vm->vm_nfreetags = 0;
        LIST_INIT(&vm->vm_freetags);
        strlcpy(vm->vm_name, name, sizeof(vm->vm_name));
        vm->vm_quantum_mask = quantum - 1;
        vm->vm_quantum_shift = SIZE2ORDER(quantum);
        KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum);
        vm->vm_importfn = importfn;
        vm->vm_releasefn = releasefn;
        vm->vm_arg = arg;
        vm->vm_nbusytag = 0;
        vm->vm_maxbusytag = 0;
        vm->vm_size = 0;
        vm->vm_inuse = 0;
#if defined(QCACHE)
        qc_init(vm, qcache_max, ipl);
#endif /* defined(QCACHE) */

        TAILQ_INIT(&vm->vm_seglist);
        for (i = 0; i < VMEM_MAXORDER; i++) {
                LIST_INIT(&vm->vm_freelist[i]);
        }
        memset(&vm->vm_hash0, 0, sizeof(vm->vm_hash0));
        vm->vm_hashsize = 1;
        vm->vm_hashmask = vm->vm_hashsize - 1;
        vm->vm_hashlist = &vm->vm_hash0;

        if (size != 0) {
                if (vmem_add(vm, base, size, flags) != 0) {
                        vmem_destroy1(vm);
                        return NULL;
                }
        }

#if defined(_KERNEL)
        if (flags & VM_BOOTSTRAP) {
                bt_refill(vm);
        }

        mutex_enter(&vmem_list_lock);
        LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist);
        mutex_exit(&vmem_list_lock);
#endif /* defined(_KERNEL) */

        return vm;
}



/*
 * vmem_create: create an arena.
 *
 * => must not be called from interrupt context.
 */

vmem_t *
vmem_create(const char *name, vmem_addr_t base, vmem_size_t size,
    vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn,
    vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl)
{

        KASSERT((flags & (VM_XIMPORT)) == 0);

        return vmem_init(NULL, name, base, size, quantum,
            importfn, releasefn, source, qcache_max, flags, ipl);
}

/*
 * vmem_xcreate: create an arena takes alternative import func.
 *
 * => must not be called from interrupt context.
 */

vmem_t *
vmem_xcreate(const char *name, vmem_addr_t base, vmem_size_t size,
    vmem_size_t quantum, vmem_ximport_t *importfn, vmem_release_t *releasefn,
    vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl)
{

        KASSERT((flags & (VM_XIMPORT)) == 0);

        return vmem_init(NULL, name, base, size, quantum,
            __FPTRCAST(vmem_import_t *, importfn), releasefn, source,
            qcache_max, flags | VM_XIMPORT, ipl);
}

void
vmem_destroy(vmem_t *vm)
{

#if defined(_KERNEL)
        mutex_enter(&vmem_list_lock);
        LIST_REMOVE(vm, vm_alllist);
        mutex_exit(&vmem_list_lock);
#endif /* defined(_KERNEL) */

        vmem_destroy1(vm);
}

vmem_size_t
vmem_roundup_size(vmem_t *vm, vmem_size_t size)
{

        return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask;
}

/*
 * vmem_alloc: allocate resource from the arena.
 */

int
vmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, vmem_addr_t *addrp)
{
        const vm_flag_t strat __diagused = flags & VM_FITMASK;
        int error;

        KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0);
        KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0);

        KASSERT(size > 0);
        KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
        if ((flags & VM_SLEEP) != 0) {
                ASSERT_SLEEPABLE();
        }

#if defined(QCACHE)
        if (size <= vm->vm_qcache_max) {
                void *p;
                int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift;
                qcache_t *qc = vm->vm_qcache[qidx - 1];

                p = pool_cache_get(qc->qc_cache, vmf_to_prf(flags));
                if (addrp != NULL)
                        *addrp = (vmem_addr_t)p;
                error = (p == NULL) ? ENOMEM : 0;
                goto out;
        }
#endif /* defined(QCACHE) */

        error = vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX,
            flags, addrp);
out:
        KASSERTMSG(error || addrp == NULL ||
            (*addrp & vm->vm_quantum_mask) == 0,
            "vmem %s mask=0x%jx addr=0x%jx",
            vm->vm_name, (uintmax_t)vm->vm_quantum_mask, (uintmax_t)*addrp);
        KASSERT(error == 0 || (flags & VM_SLEEP) == 0);
        return error;
}

int
vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align,
    const vmem_size_t phase, const vmem_size_t nocross,
    const vmem_addr_t minaddr, const vmem_addr_t maxaddr, const vm_flag_t flags,
    vmem_addr_t *addrp)
{
        struct vmem_freelist *list;
        struct vmem_freelist *first;
        struct vmem_freelist *end;
        bt_t *bt;
        bt_t *btnew;
        bt_t *btnew2;
        const vmem_size_t size = vmem_roundup_size(vm, size0);
        vm_flag_t strat = flags & VM_FITMASK;
        vmem_addr_t start;
        int rc;

        KASSERT(size0 > 0);
        KASSERT(size > 0);
        KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT);
        if ((flags & VM_SLEEP) != 0) {
                ASSERT_SLEEPABLE();
        }
        KASSERT((align & vm->vm_quantum_mask) == 0);
        KASSERT((align & (align - 1)) == 0);
        KASSERT((phase & vm->vm_quantum_mask) == 0);
        KASSERT((nocross & vm->vm_quantum_mask) == 0);
        KASSERT((nocross & (nocross - 1)) == 0);
        KASSERT((align == 0 && phase == 0) || phase < align);
        KASSERT(nocross == 0 || nocross >= size);
        KASSERT(minaddr <= maxaddr);
        KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross));

        if (align == 0) {
                align = vm->vm_quantum_mask + 1;
        }

        /*
         * allocate boundary tags before acquiring the vmem lock.
         */
        VMEM_LOCK(vm);
        btnew = bt_alloc(vm, flags);
        if (btnew == NULL) {
                VMEM_UNLOCK(vm);
                return ENOMEM;
        }
        btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */
        if (btnew2 == NULL) {
                bt_free(vm, btnew);
                VMEM_UNLOCK(vm);
                return ENOMEM;
        }

        /*
         * choose a free block from which we allocate.
         */
retry_strat:
        first = bt_freehead_toalloc(vm, size, strat);
        end = &vm->vm_freelist[VMEM_MAXORDER];
retry:
        bt = NULL;
        vmem_check(vm);
        if (strat == VM_INSTANTFIT) {
                /*
                 * just choose the first block which satisfies our restrictions.
                 *
                 * note that we don't need to check the size of the blocks
                 * because any blocks found on these list should be larger than
                 * the given size.
                 */
                for (list = first; list < end; list++) {
                        bt = LIST_FIRST(list);
                        if (bt != NULL) {
                                rc = vmem_fit(bt, size, align, phase,
                                    nocross, minaddr, maxaddr, &start);
                                if (rc == 0) {
                                        goto gotit;
                                }
                                /*
                                 * don't bother to follow the bt_freelist link
                                 * here.  the list can be very long and we are
                                 * told to run fast.  blocks from the later free
                                 * lists are larger and have better chances to
                                 * satisfy our restrictions.
                                 */
                        }
                }
        } else { /* VM_BESTFIT */
                /*
                 * we assume that, for space efficiency, it's better to
                 * allocate from a smaller block.  thus we will start searching
                 * from the lower-order list than VM_INSTANTFIT.
                 * however, don't bother to find the smallest block in a free
                 * list because the list can be very long.  we can revisit it
                 * if/when it turns out to be a problem.
                 *
                 * note that the 'first' list can contain blocks smaller than
                 * the requested size.  thus we need to check bt_size.
                 */
                for (list = first; list < end; list++) {
                        LIST_FOREACH(bt, list, bt_freelist) {
                                if (bt->bt_size >= size) {
                                        rc = vmem_fit(bt, size, align, phase,
                                            nocross, minaddr, maxaddr, &start);
                                        if (rc == 0) {
                                                goto gotit;
                                        }
                                }
                        }
                }
        }
#if 1
        if (strat == VM_INSTANTFIT) {
                strat = VM_BESTFIT;
                goto retry_strat;
        }
#endif
        if (align != vm->vm_quantum_mask + 1 || phase != 0 || nocross != 0) {

                /*
                 * XXX should try to import a region large enough to
                 * satisfy restrictions?
                 */

                goto fail;
        }
        /* XXX eeek, minaddr & maxaddr not respected */
        if (vmem_import(vm, size, flags) == 0) {
                goto retry;
        }
        /* XXX */

        if ((flags & VM_SLEEP) != 0) {
                vmem_kick_pdaemon();
                VMEM_CONDVAR_WAIT(vm);
                goto retry;
        }
fail:
        bt_free(vm, btnew);
        bt_free(vm, btnew2);
        VMEM_UNLOCK(vm);
        return ENOMEM;

gotit:
        KASSERT(bt->bt_type == BT_TYPE_FREE);
        KASSERT(bt->bt_size >= size);
        bt_remfree(vm, bt);
        vmem_check(vm);
        if (bt->bt_start != start) {
                btnew2->bt_type = BT_TYPE_FREE;
                btnew2->bt_start = bt->bt_start;
                btnew2->bt_size = start - bt->bt_start;
                bt->bt_start = start;
                bt->bt_size -= btnew2->bt_size;
                bt_insfree(vm, btnew2);
                bt_insseg(vm, btnew2, TAILQ_PREV(bt, vmem_seglist, bt_seglist));
                btnew2 = NULL;
                vmem_check(vm);
        }
        KASSERT(bt->bt_start == start);
        if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) {
                /* split */
                btnew->bt_type = BT_TYPE_BUSY;
                btnew->bt_start = bt->bt_start;
                btnew->bt_size = size;
                bt->bt_start = bt->bt_start + size;
                bt->bt_size -= size;
                bt_insfree(vm, bt);
                bt_insseg(vm, btnew, TAILQ_PREV(bt, vmem_seglist, bt_seglist));
                bt_insbusy(vm, btnew);
                vmem_check(vm);
        } else {
                bt->bt_type = BT_TYPE_BUSY;
                bt_insbusy(vm, bt);
                vmem_check(vm);
                bt_free(vm, btnew);
                btnew = bt;
        }
        if (btnew2 != NULL) {
                bt_free(vm, btnew2);
        }
        KASSERT(btnew->bt_size >= size);
        btnew->bt_type = BT_TYPE_BUSY;
        if (addrp != NULL)
                *addrp = btnew->bt_start;
        VMEM_UNLOCK(vm);
        KASSERTMSG(addrp == NULL ||
            (*addrp & vm->vm_quantum_mask) == 0,
            "vmem %s mask=0x%jx addr=0x%jx",
            vm->vm_name, (uintmax_t)vm->vm_quantum_mask, (uintmax_t)*addrp);
        return 0;
}

/*
 * vmem_free: free the resource to the arena.
 */

void
vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
{

        KASSERT(size > 0);
        KASSERTMSG((addr & vm->vm_quantum_mask) == 0,
            "vmem %s mask=0x%jx addr=0x%jx",
            vm->vm_name, (uintmax_t)vm->vm_quantum_mask, (uintmax_t)addr);

#if defined(QCACHE)
        if (size <= vm->vm_qcache_max) {
                int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift;
                qcache_t *qc = vm->vm_qcache[qidx - 1];

                pool_cache_put(qc->qc_cache, (void *)addr);
                return;
        }
#endif /* defined(QCACHE) */

        vmem_xfree(vm, addr, size);
}

void
vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
{
        bt_t *bt;

        KASSERT(size > 0);
        KASSERTMSG((addr & vm->vm_quantum_mask) == 0,
            "vmem %s mask=0x%jx addr=0x%jx",
            vm->vm_name, (uintmax_t)vm->vm_quantum_mask, (uintmax_t)addr);

        VMEM_LOCK(vm);

        bt = bt_lookupbusy(vm, addr);
        KASSERTMSG(bt != NULL, "vmem %s addr 0x%jx size 0x%jx",
            vm->vm_name, (uintmax_t)addr, (uintmax_t)size);
        KASSERT(bt->bt_start == addr);
        KASSERT(bt->bt_size == vmem_roundup_size(vm, size) ||
            bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask);

        /* vmem_xfree_bt() drops the lock. */
        vmem_xfree_bt(vm, bt);
}

void
vmem_xfreeall(vmem_t *vm)
{
        bt_t *bt;

        /* This can't be used if the arena has a quantum cache. */
        KASSERT(vm->vm_qcache_max == 0);

        for (;;) {
                VMEM_LOCK(vm);
                TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
                        if (bt->bt_type == BT_TYPE_BUSY)
                                break;
                }
                if (bt != NULL) {
                        /* vmem_xfree_bt() drops the lock. */
                        vmem_xfree_bt(vm, bt);
                } else {
                        VMEM_UNLOCK(vm);
                        return;
                }
        }
}

static void
vmem_xfree_bt(vmem_t *vm, bt_t *bt)
{
        bt_t *t;

        VMEM_ASSERT_LOCKED(vm);

        KASSERT(bt->bt_type == BT_TYPE_BUSY);
        bt_rembusy(vm, bt);
        bt->bt_type = BT_TYPE_FREE;

        /* coalesce */
        t = TAILQ_NEXT(bt, bt_seglist);
        if (t != NULL && t->bt_type == BT_TYPE_FREE) {
                KASSERT(BT_END(bt) < t->bt_start);        /* YYY */
                bt_remfree(vm, t);
                bt_remseg(vm, t);
                bt->bt_size += t->bt_size;
                bt_free(vm, t);
        }
        t = TAILQ_PREV(bt, vmem_seglist, bt_seglist);
        if (t != NULL && t->bt_type == BT_TYPE_FREE) {
                KASSERT(BT_END(t) < bt->bt_start);        /* YYY */
                bt_remfree(vm, t);
                bt_remseg(vm, t);
                bt->bt_size += t->bt_size;
                bt->bt_start = t->bt_start;
                bt_free(vm, t);
        }

        t = TAILQ_PREV(bt, vmem_seglist, bt_seglist);
        KASSERT(t != NULL);
        KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY);
        if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN &&
            t->bt_size == bt->bt_size) {
                vmem_addr_t spanaddr;
                vmem_size_t spansize;

                KASSERT(t->bt_start == bt->bt_start);
                spanaddr = bt->bt_start;
                spansize = bt->bt_size;
                bt_remseg(vm, bt);
                bt_free(vm, bt);
                bt_remseg(vm, t);
                bt_free(vm, t);
                vm->vm_size -= spansize;
                VMEM_CONDVAR_BROADCAST(vm);
                /* bt_freetrim() drops the lock. */
                bt_freetrim(vm, BT_MAXFREE);
                (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize);
        } else {
                bt_insfree(vm, bt);
                VMEM_CONDVAR_BROADCAST(vm);
                /* bt_freetrim() drops the lock. */
                bt_freetrim(vm, BT_MAXFREE);
        }
}

/*
 * vmem_add:
 *
 * => caller must ensure appropriate spl,
 *    if the arena can be accessed from interrupt context.
 */

int
vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags)
{
        int rv;

        VMEM_LOCK(vm);
        rv = vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC);
        VMEM_UNLOCK(vm);

        return rv;
}

/*
 * vmem_size: information about arenas size
 *
 * => return free/allocated size in arena
 */
vmem_size_t
vmem_size(vmem_t *vm, int typemask)
{

        switch (typemask) {
        case VMEM_ALLOC:
                return vm->vm_inuse;
        case VMEM_FREE:
                return vm->vm_size - vm->vm_inuse;
        case VMEM_FREE|VMEM_ALLOC:
                return vm->vm_size;
        default:
                panic("vmem_size");
        }
}

/* ---- rehash */

#if defined(_KERNEL)
static struct callout vmem_rehash_ch;
static int vmem_rehash_interval;
static struct workqueue *vmem_rehash_wq;
static struct work vmem_rehash_wk;

static void
vmem_rehash_all(struct work *wk, void *dummy)
{
        vmem_t *vm;

        KASSERT(wk == &vmem_rehash_wk);
        mutex_enter(&vmem_list_lock);
        LIST_FOREACH(vm, &vmem_list, vm_alllist) {
                size_t desired;
                size_t current;

                desired = atomic_load_relaxed(&vm->vm_maxbusytag);
                current = atomic_load_relaxed(&vm->vm_hashsize);

                if (desired > VMEM_HASHSIZE_MAX) {
                        desired = VMEM_HASHSIZE_MAX;
                } else if (desired < VMEM_HASHSIZE_MIN) {
                        desired = VMEM_HASHSIZE_MIN;
                }
                if (desired > current * 2 || desired * 2 < current) {
                        vmem_rehash(vm, desired, VM_NOSLEEP);
                }
        }
        mutex_exit(&vmem_list_lock);

        callout_schedule(&vmem_rehash_ch, vmem_rehash_interval);
}

static void
vmem_rehash_all_kick(void *dummy)
{

        workqueue_enqueue(vmem_rehash_wq, &vmem_rehash_wk, NULL);
}

void
vmem_rehash_start(void)
{
        int error;

        error = workqueue_create(&vmem_rehash_wq, "vmem_rehash",
            vmem_rehash_all, NULL, PRI_VM, IPL_SOFTCLOCK, WQ_MPSAFE);
        if (error) {
                panic("%s: workqueue_create %d\n", __func__, error);
        }
        callout_init(&vmem_rehash_ch, CALLOUT_MPSAFE);
        callout_setfunc(&vmem_rehash_ch, vmem_rehash_all_kick, NULL);

        vmem_rehash_interval = hz * 10;
        callout_schedule(&vmem_rehash_ch, vmem_rehash_interval);
}
#endif /* defined(_KERNEL) */

/* ---- debug */

#if defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY)

static void bt_dump(const bt_t *, void (*)(const char *, ...)
    __printflike(1, 2));

static const char *
bt_type_string(int type)
{
        static const char * const table[] = {
                [BT_TYPE_BUSY] = "busy",
                [BT_TYPE_FREE] = "free",
                [BT_TYPE_SPAN] = "span",
                [BT_TYPE_SPAN_STATIC] = "static span",
        };

        if (type >= __arraycount(table)) {
                return "BOGUS";
        }
        return table[type];
}

static void
bt_dump(const bt_t *bt, void (*pr)(const char *, ...))
{

        (*pr)("\t%p: %" PRIu64 ", %" PRIu64 ", %d(%s)\n",
            bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size,
            bt->bt_type, bt_type_string(bt->bt_type));
}

static void
vmem_dump(const vmem_t *vm , void (*pr)(const char *, ...) __printflike(1, 2))
{
        const bt_t *bt;
        int i;

        (*pr)("vmem %p '%s'\n", vm, vm->vm_name);
        TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
                bt_dump(bt, pr);
        }

        for (i = 0; i < VMEM_MAXORDER; i++) {
                const struct vmem_freelist *fl = &vm->vm_freelist[i];

                if (LIST_EMPTY(fl)) {
                        continue;
                }

                (*pr)("freelist[%d]\n", i);
                LIST_FOREACH(bt, fl, bt_freelist) {
                        bt_dump(bt, pr);
                }
        }
}

#endif /* defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) */

#if defined(DDB)
static bt_t *
vmem_whatis_lookup(vmem_t *vm, uintptr_t addr)
{
        bt_t *bt;

        TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
                if (BT_ISSPAN_P(bt)) {
                        continue;
                }
                if (bt->bt_start <= addr && addr <= BT_END(bt)) {
                        return bt;
                }
        }

        return NULL;
}

void
vmem_whatis(uintptr_t addr, void (*pr)(const char *, ...))
{
        vmem_t *vm;

        LIST_FOREACH(vm, &vmem_list, vm_alllist) {
                bt_t *bt;

                bt = vmem_whatis_lookup(vm, addr);
                if (bt == NULL) {
                        continue;
                }
                (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n",
                    (void *)addr, (void *)bt->bt_start,
                    (size_t)(addr - bt->bt_start), vm->vm_name,
                    (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free");
        }
}

void
vmem_printall(const char *modif, void (*pr)(const char *, ...))
{
        const vmem_t *vm;

        LIST_FOREACH(vm, &vmem_list, vm_alllist) {
                vmem_dump(vm, pr);
        }
}

void
vmem_print(uintptr_t addr, const char *modif, void (*pr)(const char *, ...))
{
        const vmem_t *vm = (const void *)addr;

        vmem_dump(vm, pr);
}
#endif /* defined(DDB) */

#if defined(_KERNEL)
#define vmem_printf printf
#else
#include <stdio.h>
#include <stdarg.h>

static void
vmem_printf(const char *fmt, ...)
{
        va_list ap;
        va_start(ap, fmt);
        vprintf(fmt, ap);
        va_end(ap);
}
#endif

#if defined(VMEM_SANITY)

static bool
vmem_check_sanity(vmem_t *vm)
{
        const bt_t *bt, *bt2;

        KASSERT(vm != NULL);

        TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
                if (bt->bt_start > BT_END(bt)) {
                        printf("corrupted tag\n");
                        bt_dump(bt, vmem_printf);
                        return false;
                }
        }
        TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) {
                TAILQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) {
                        if (bt == bt2) {
                                continue;
                        }
                        if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) {
                                continue;
                        }
                        if (bt->bt_start <= BT_END(bt2) &&
                            bt2->bt_start <= BT_END(bt)) {
                                printf("overwrapped tags\n");
                                bt_dump(bt, vmem_printf);
                                bt_dump(bt2, vmem_printf);
                                return false;
                        }
                }
        }

        return true;
}

static void
vmem_check(vmem_t *vm)
{

        if (!vmem_check_sanity(vm)) {
                panic("insanity vmem %p", vm);
        }
}

#endif /* defined(VMEM_SANITY) */

#if defined(UNITTEST)
int
main(void)
{
        int rc;
        vmem_t *vm;
        vmem_addr_t p;
        struct reg {
                vmem_addr_t p;
                vmem_size_t sz;
                bool x;
        } *reg = NULL;
        int nreg = 0;
        int nalloc = 0;
        int nfree = 0;
        vmem_size_t total = 0;
#if 1
        vm_flag_t strat = VM_INSTANTFIT;
#else
        vm_flag_t strat = VM_BESTFIT;
#endif

        vm = vmem_create("test", 0, 0, 1, NULL, NULL, NULL, 0, VM_SLEEP,
#ifdef _KERNEL
            IPL_NONE
#else
            0
#endif
            );
        if (vm == NULL) {
                printf("vmem_create\n");
                exit(EXIT_FAILURE);
        }
        vmem_dump(vm, vmem_printf);

        rc = vmem_add(vm, 0, 50, VM_SLEEP);
        assert(rc == 0);
        rc = vmem_add(vm, 100, 200, VM_SLEEP);
        assert(rc == 0);
        rc = vmem_add(vm, 2000, 1, VM_SLEEP);
        assert(rc == 0);
        rc = vmem_add(vm, 40000, 65536, VM_SLEEP);
        assert(rc == 0);
        rc = vmem_add(vm, 10000, 10000, VM_SLEEP);
        assert(rc == 0);
        rc = vmem_add(vm, 500, 1000, VM_SLEEP);
        assert(rc == 0);
        rc = vmem_add(vm, 0xffffff00, 0x100, VM_SLEEP);
        assert(rc == 0);
        rc = vmem_xalloc(vm, 0x101, 0, 0, 0,
            0xffffff00, 0xffffffff, strat|VM_SLEEP, &p);
        assert(rc != 0);
        rc = vmem_xalloc(vm, 50, 0, 0, 0, 0, 49, strat|VM_SLEEP, &p);
        assert(rc == 0 && p == 0);
        vmem_xfree(vm, p, 50);
        rc = vmem_xalloc(vm, 25, 0, 0, 0, 0, 24, strat|VM_SLEEP, &p);
        assert(rc == 0 && p == 0);
        rc = vmem_xalloc(vm, 0x100, 0, 0, 0,
            0xffffff01, 0xffffffff, strat|VM_SLEEP, &p);
        assert(rc != 0);
        rc = vmem_xalloc(vm, 0x100, 0, 0, 0,
            0xffffff00, 0xfffffffe, strat|VM_SLEEP, &p);
        assert(rc != 0);
        rc = vmem_xalloc(vm, 0x100, 0, 0, 0,
            0xffffff00, 0xffffffff, strat|VM_SLEEP, &p);
        assert(rc == 0);
        vmem_dump(vm, vmem_printf);
        for (;;) {
                struct reg *r;
                int t = rand() % 100;

                if (t > 45) {
                        /* alloc */
                        vmem_size_t sz = rand() % 500 + 1;
                        bool x;
                        vmem_size_t align, phase, nocross;
                        vmem_addr_t minaddr, maxaddr;

                        if (t > 70) {
                                x = true;
                                /* XXX */
                                align = 1 << (rand() % 15);
                                phase = rand() % 65536;
                                nocross = 1 << (rand() % 15);
                                if (align <= phase) {
                                        phase = 0;
                                }
                                if (VMEM_CROSS_P(phase, phase + sz - 1,
                                    nocross)) {
                                        nocross = 0;
                                }
                                do {
                                        minaddr = rand() % 50000;
                                        maxaddr = rand() % 70000;
                                } while (minaddr > maxaddr);
                                printf("=== xalloc %" PRIu64
                                    " align=%" PRIu64 ", phase=%" PRIu64
                                    ", nocross=%" PRIu64 ", min=%" PRIu64
                                    ", max=%" PRIu64 "\n",
                                    (uint64_t)sz,
                                    (uint64_t)align,
                                    (uint64_t)phase,
                                    (uint64_t)nocross,
                                    (uint64_t)minaddr,
                                    (uint64_t)maxaddr);
                                rc = vmem_xalloc(vm, sz, align, phase, nocross,
                                    minaddr, maxaddr, strat|VM_SLEEP, &p);
                        } else {
                                x = false;
                                printf("=== alloc %" PRIu64 "\n", (uint64_t)sz);
                                rc = vmem_alloc(vm, sz, strat|VM_SLEEP, &p);
                        }
                        printf("-> %" PRIu64 "\n", (uint64_t)p);
                        vmem_dump(vm, vmem_printf);
                        if (rc != 0) {
                                if (x) {
                                        continue;
                                }
                                break;
                        }
                        nreg++;
                        reg = realloc(reg, sizeof(*reg) * nreg);
                        r = &reg[nreg - 1];
                        r->p = p;
                        r->sz = sz;
                        r->x = x;
                        total += sz;
                        nalloc++;
                } else if (nreg != 0) {
                        /* free */
                        r = &reg[rand() % nreg];
                        printf("=== free %" PRIu64 ", %" PRIu64 "\n",
                            (uint64_t)r->p, (uint64_t)r->sz);
                        if (r->x) {
                                vmem_xfree(vm, r->p, r->sz);
                        } else {
                                vmem_free(vm, r->p, r->sz);
                        }
                        total -= r->sz;
                        vmem_dump(vm, vmem_printf);
                        *r = reg[nreg - 1];
                        nreg--;
                        nfree++;
                }
                printf("total=%" PRIu64 "\n", (uint64_t)total);
        }
        fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n",
            (uint64_t)total, nalloc, nfree);
        exit(EXIT_SUCCESS);
}
#endif /* defined(UNITTEST) */




































































    1 

    1 







    8 











    2 
    2 

    2 




    1 

    1 










    7 



    2 

    2 


    1 







    3 
    3 


    3 


    2 

    1 
    1 


    1 



    1 











    1 




































    1 

    1 

    1 

    1 

    1 







    1 













































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
/*        $NetBSD: pci_usrreq.c,v 1.31 2021/09/05 03:47:24 mrg Exp $        */

/*
 * Copyright 2001 Wasabi Systems, Inc.
 * All rights reserved.
 *
 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed for the NetBSD Project by
 *        Wasabi Systems, Inc.
 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
 *    or promote products derived from this software without specific prior
 *    written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * User -> kernel interface for PCI bus access.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: pci_usrreq.c,v 1.31 2021/09/05 03:47:24 mrg Exp $");

#ifdef _KERNEL_OPT
#include "opt_pci.h"
#endif

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/fcntl.h>
#include <sys/kauth.h>

#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pciio.h>

static int
pciopen(dev_t dev, int flags, int mode, struct lwp *l)
{
        device_t dv;

        dv = device_lookup(&pci_cd, minor(dev));
        if (dv == NULL)
                return ENXIO;

        return 0;
}

static int
pciioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        struct pci_softc *sc = device_lookup_private(&pci_cd, minor(dev));
        struct pci_child *child;
        struct pciio_bdf_cfgreg *bdfr;
        struct pciio_businfo *binfo;
        struct pciio_drvname *dname;
        struct pciio_drvnameonbus *dnameonbus;
        pcitag_t tag;

        switch (cmd) {
        case PCI_IOC_BDF_CFGREAD:
        case PCI_IOC_BDF_CFGWRITE:
                bdfr = data;
                if (bdfr->bus > 255 || bdfr->device >= sc->sc_maxndevs ||
                    bdfr->function > 7 || ISSET(bdfr->cfgreg.reg, 3))
                        return EINVAL;
                tag = pci_make_tag(sc->sc_pc, bdfr->bus, bdfr->device,
                    bdfr->function);

                if (cmd == PCI_IOC_BDF_CFGREAD) {
                        bdfr->cfgreg.val = pci_conf_read(sc->sc_pc, tag,
                            bdfr->cfgreg.reg);
                } else {
                        if ((flag & FWRITE) == 0)
                                return EBADF;
                        pci_conf_write(sc->sc_pc, tag, bdfr->cfgreg.reg,
                            bdfr->cfgreg.val);
                }
                return 0;

        case PCI_IOC_BUSINFO:
                binfo = data;
                binfo->busno = sc->sc_bus;
                binfo->maxdevs = sc->sc_maxndevs;
                return 0;

        case PCI_IOC_DRVNAME:
                dname = data;
                if (dname->device >= sc->sc_maxndevs || dname->function > 7)
                        return EINVAL;
                child = &sc->PCI_SC_DEVICESC(dname->device, dname->function);
                if (!child->c_dev)
                        return ENXIO;
                strlcpy(dname->name, device_xname(child->c_dev),
                        sizeof dname->name);
                return 0;

        case PCI_IOC_DRVNAMEONBUS:
                dnameonbus = data;
                int i;

                for (i = 0; i < pci_cd.cd_ndevs; i++) {
                        sc = device_lookup_private(&pci_cd, i);
                        if (sc == NULL)
                                continue;
                        if (sc->sc_bus == dnameonbus->bus)
                                break;        /* found the right bus */
                }
                if (i == pci_cd.cd_ndevs || sc == NULL)
                        return ENXIO;
                if (dnameonbus->device >= sc->sc_maxndevs ||
                    dnameonbus->function > 7)
                        return EINVAL;

                child = &sc->PCI_SC_DEVICESC(dnameonbus->device,
                                             dnameonbus->function);
                if (!child->c_dev)
                        return ENXIO;
                strlcpy(dnameonbus->name, device_xname(child->c_dev),
                        sizeof dnameonbus->name);
                return 0;

        default:
                return ENOTTY;
        }
}

static paddr_t
pcimmap(dev_t dev, off_t offset, int prot)
{
        struct pci_softc *sc = device_lookup_private(&pci_cd, minor(dev));
        struct pci_child *c;
        struct pci_range *r;
        int flags = 0;
        int device, range;

        if (kauth_authorize_machdep(kauth_cred_get(), KAUTH_MACHDEP_UNMANAGEDMEM,
            NULL, NULL, NULL, NULL) != 0) {
                return -1;
        }
        /*
         * Since we allow mapping of the entire bus, we
         * take the offset to be the address on the bus,
         * and pass 0 as the offset into that range.
         *
         * XXX Need a way to deal with linear/etc.
         *
         * XXX we rely on MD mmap() methods to enforce limits since these
         * are hidden in *_tag_t structs if they exist at all 
         */

#ifdef PCI_MAGIC_IO_RANGE
        /* 
         * first, check if someone's trying to map the IO range
         * XXX this assumes 64kB IO space even though some machines can have
         * significantly more than that - macppc's bandit host bridge allows
         * 8MB IO space and sparc64 may have the entire 4GB available. The
         * firmware on both tries to use the lower 64kB first though and
         * exausting it is pretty difficult so we should be safe
         */
        if ((offset >= PCI_MAGIC_IO_RANGE) &&
            (offset < (PCI_MAGIC_IO_RANGE + 0x10000))) {
                return bus_space_mmap(sc->sc_iot, offset - PCI_MAGIC_IO_RANGE,
                    0, prot, 0);
        }
#endif /* PCI_MAGIC_IO_RANGE */

        for (device = 0; device < __arraycount(sc->sc_devices); device++) {
                c = &sc->sc_devices[device];
                if (c->c_dev == NULL)
                        continue;
                for (range = 0; range < __arraycount(c->c_range); range++) {
                        r = &c->c_range[range];
                        if (r->r_size == 0)
                                break;
                        if (offset >= r->r_offset &&
                            offset < r->r_offset + r->r_size) {
                                flags = r->r_flags;
                                break;
                        }
                }
        }

        return bus_space_mmap(sc->sc_memt, offset, 0, prot, flags);
}

const struct cdevsw pci_cdevsw = {
        .d_open = pciopen,
        .d_close = nullclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = pciioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = pcimmap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

/*
 * pci_devioctl:
 *
 *        PCI ioctls that can be performed on devices directly.
 */
int
pci_devioctl(pci_chipset_tag_t pc, pcitag_t tag, u_long cmd, void *data,
    int flag, struct lwp *l)
{
        struct pciio_cfgreg *r = (void *) data;

        switch (cmd) {
        case PCI_IOC_CFGREAD:
                r->val = pci_conf_read(pc, tag, r->reg);
                break;

        case PCI_IOC_CFGWRITE:
                if ((flag & FWRITE) == 0)
                        return EBADF;
                pci_conf_write(pc, tag, r->reg, r->val);
                break;

        default:
                return EPASSTHROUGH;
        }

        return 0;
}




























































































































































































    1 













    1 








































































































   37 















   37 
   34 









    2 

    1 

    1 































   36 















































































































































































































































































































































































   16 






















   13 
    1 


   12 
    1 




   11 












   11 



   14 
    2 


    2 




   12 


   13 







   11 




   11 





   11 


    4 





























    3 
   10 










   11 
    7 





   11 









































    5 
    3 


    6 





    4 






    4 

    4 





    2 
    2 
























    6 






    6 












    6 






    6 



























































   10 


   16 

    3 
    1 
    3 

   16 









    7 

















    7 













    5 























    2 























   10 

   10 





    7 



















    2 

    2 
















































































    2 
    2 

    2 




























   16 
   16 













    2 














































































































































































































   47 









































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
/* $NetBSD: udp6_usrreq.c,v 1.150 2021/02/19 14:52:00 christos Exp $ */
/* $KAME: udp6_usrreq.c,v 1.86 2001/05/27 17:33:00 itojun Exp $ */
/* $KAME: udp6_output.c,v 1.43 2001/10/15 09:19:52 itojun Exp $ */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)udp_var.h        8.1 (Berkeley) 6/10/93
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: udp6_usrreq.c,v 1.150 2021/02/19 14:52:00 christos Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_inet_csum.h"
#include "opt_ipsec.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/syslog.h>
#include <sys/domain.h>
#include <sys/sysctl.h>

#include <net/if.h>
#include <net/if_types.h>

#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_systm.h>
#include <netinet/in_offload.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/in_pcb.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <netinet/udp_private.h>

#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/ip6_private.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/udp6_var.h>
#include <netinet6/udp6_private.h>
#include <netinet6/ip6protosw.h>
#include <netinet6/scope6_var.h>

#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/esp.h>
#ifdef INET6
#include <netipsec/ipsec6.h>
#endif
#endif

#include "faith.h"
#if defined(NFAITH) && NFAITH > 0
#include <net/if_faith.h>
#endif

/*
 * UDP protocol implementation.
 * Per RFC 768, August, 1980.
 */

extern struct inpcbtable udbtable;

percpu_t *udp6stat_percpu;

/* UDP on IP6 parameters */
static int udp6_sendspace = 9216;        /* really max datagram size */
static int udp6_recvspace = 40 * (1024 + sizeof(struct sockaddr_in6));
                                        /* 40 1K datagrams */

static void udp6_notify(struct in6pcb *, int);
static void sysctl_net_inet6_udp6_setup(struct sysctllog **);
#ifdef IPSEC
static int udp6_espinudp(struct mbuf **, int);
#endif

#ifdef UDP_CSUM_COUNTERS
#include <sys/device.h>
struct evcnt udp6_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "udp6", "hwcsum bad");
struct evcnt udp6_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "udp6", "hwcsum ok");
struct evcnt udp6_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "udp6", "hwcsum data");
struct evcnt udp6_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "udp6", "swcsum");

EVCNT_ATTACH_STATIC(udp6_hwcsum_bad);
EVCNT_ATTACH_STATIC(udp6_hwcsum_ok);
EVCNT_ATTACH_STATIC(udp6_hwcsum_data);
EVCNT_ATTACH_STATIC(udp6_swcsum);

#define        UDP_CSUM_COUNTER_INCR(ev)        (ev)->ev_count++
#else
#define        UDP_CSUM_COUNTER_INCR(ev)        /* nothing */
#endif

void
udp6_init(void)
{
        sysctl_net_inet6_udp6_setup(NULL);
        udp6stat_percpu = percpu_alloc(sizeof(uint64_t) * UDP6_NSTATS);

        udp_init_common();
}

/*
 * Notify a udp user of an asynchronous error;
 * just wake up so that he can collect error status.
 */
static        void
udp6_notify(struct in6pcb *in6p, int errno)
{
        in6p->in6p_socket->so_error = errno;
        sorwakeup(in6p->in6p_socket);
        sowwakeup(in6p->in6p_socket);
}

void *
udp6_ctlinput(int cmd, const struct sockaddr *sa, void *d)
{
        struct udphdr uh;
        struct ip6_hdr *ip6;
        const struct sockaddr_in6 *sa6 = (const struct sockaddr_in6 *)sa;
        struct mbuf *m;
        int off;
        void *cmdarg;
        struct ip6ctlparam *ip6cp = NULL;
        const struct sockaddr_in6 *sa6_src = NULL;
        void (*notify)(struct in6pcb *, int) = udp6_notify;
        struct udp_portonly {
                u_int16_t uh_sport;
                u_int16_t uh_dport;
        } *uhp;

        if (sa->sa_family != AF_INET6 ||
            sa->sa_len != sizeof(struct sockaddr_in6))
                return NULL;

        if ((unsigned)cmd >= PRC_NCMDS)
                return NULL;
        if (PRC_IS_REDIRECT(cmd))
                notify = in6_rtchange, d = NULL;
        else if (cmd == PRC_HOSTDEAD)
                d = NULL;
        else if (cmd == PRC_MSGSIZE) {
                /* special code is present, see below */
                notify = in6_rtchange;
        }
        else if (inet6ctlerrmap[cmd] == 0)
                return NULL;

        /* if the parameter is from icmp6, decode it. */
        if (d != NULL) {
                ip6cp = (struct ip6ctlparam *)d;
                m = ip6cp->ip6c_m;
                ip6 = ip6cp->ip6c_ip6;
                off = ip6cp->ip6c_off;
                cmdarg = ip6cp->ip6c_cmdarg;
                sa6_src = ip6cp->ip6c_src;
        } else {
                m = NULL;
                ip6 = NULL;
                cmdarg = NULL;
                sa6_src = &sa6_any;
                off = 0;
        }

        if (ip6) {
                /* check if we can safely examine src and dst ports */
                if (m->m_pkthdr.len < off + sizeof(*uhp)) {
                        if (cmd == PRC_MSGSIZE)
                                icmp6_mtudisc_update((struct ip6ctlparam *)d, 0);
                        return NULL;
                }

                memset(&uh, 0, sizeof(uh));
                m_copydata(m, off, sizeof(*uhp), (void *)&uh);

                if (cmd == PRC_MSGSIZE) {
                        int valid = 0;

                        /*
                         * Check to see if we have a valid UDP socket
                         * corresponding to the address in the ICMPv6 message
                         * payload.
                         */
                        if (in6_pcblookup_connect(&udbtable, &sa6->sin6_addr,
                            uh.uh_dport, (const struct in6_addr *)&sa6_src->sin6_addr,
                            uh.uh_sport, 0, 0))
                                valid++;
#if 0
                        /*
                         * As the use of sendto(2) is fairly popular,
                         * we may want to allow non-connected pcb too.
                         * But it could be too weak against attacks...
                         * We should at least check if the local address (= s)
                         * is really ours.
                         */
                        else if (in6_pcblookup_bind(&udbtable, &sa6->sin6_addr,
                            uh.uh_dport, 0))
                                valid++;
#endif

                        /*
                         * Depending on the value of "valid" and routing table
                         * size (mtudisc_{hi,lo}wat), we will:
                         * - recalculate the new MTU and create the
                         *   corresponding routing entry, or
                         * - ignore the MTU change notification.
                         */
                        icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);

                        /*
                         * regardless of if we called
                         * icmp6_mtudisc_update(), we need to call
                         * in6_pcbnotify(), to notify path MTU change
                         * to the userland (RFC3542), because some
                         * unconnected sockets may share the same
                         * destination and want to know the path MTU.
                         */
                }

                (void)in6_pcbnotify(&udbtable, sa, uh.uh_dport,
                    sin6tocsa(sa6_src), uh.uh_sport, cmd, cmdarg,
                    notify);
        } else {
                (void)in6_pcbnotify(&udbtable, sa, 0,
                    sin6tocsa(sa6_src), 0, cmd, cmdarg, notify);
        }
        return NULL;
}

int
udp6_ctloutput(int op, struct socket *so, struct sockopt *sopt)
{
        int s;
        int error = 0;
        struct in6pcb *in6p;
        int family;
        int optval;

        family = so->so_proto->pr_domain->dom_family;

        s = splsoftnet();
        switch (family) {
#ifdef INET
        case PF_INET:
                if (sopt->sopt_level != IPPROTO_UDP) {
                        error = ip_ctloutput(op, so, sopt);
                        goto end;
                }
                break;
#endif
#ifdef INET6
        case PF_INET6:
                if (sopt->sopt_level != IPPROTO_UDP) {
                        error = ip6_ctloutput(op, so, sopt);
                        goto end;
                }
                break;
#endif
        default:
                error = EAFNOSUPPORT;
                goto end;
        }

        switch (op) {
        case PRCO_SETOPT:
                in6p = sotoin6pcb(so);

                switch (sopt->sopt_name) {
                case UDP_ENCAP:
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                break;

                        switch(optval) {
                        case 0:
                                in6p->in6p_flags &= ~IN6P_ESPINUDP;
                                break;

                        case UDP_ENCAP_ESPINUDP:
                                in6p->in6p_flags |= IN6P_ESPINUDP;
                                break;

                        default:
                                error = EINVAL;
                                break;
                        }
                        break;

                default:
                        error = ENOPROTOOPT;
                        break;
                }
                break;

        default:
                error = EINVAL;
                break;
        }

end:
        splx(s);
        return error;
}

static void
udp6_sendup(struct mbuf *m, int off /* offset of data portion */,
    struct sockaddr *src, struct socket *so)
{
        struct mbuf *opts = NULL;
        struct mbuf *n;
        struct in6pcb *in6p;

        KASSERT(so != NULL);
        KASSERT(so->so_proto->pr_domain->dom_family == AF_INET6);
        in6p = sotoin6pcb(so);
        KASSERT(in6p != NULL);

#if defined(IPSEC)
        if (ipsec_used && ipsec_in_reject(m, in6p)) {
                if ((n = m_copypacket(m, M_DONTWAIT)) != NULL)
                        icmp6_error(n, ICMP6_DST_UNREACH,
                            ICMP6_DST_UNREACH_ADMIN, 0);
                return;
        }
#endif

        if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) {
                if (in6p->in6p_flags & IN6P_CONTROLOPTS ||
                    SOOPT_TIMESTAMP(in6p->in6p_socket->so_options)) {
                        struct ip6_hdr *ip6 = mtod(n, struct ip6_hdr *);
                        ip6_savecontrol(in6p, &opts, ip6, n);
                }

                m_adj(n, off);
                if (sbappendaddr(&so->so_rcv, src, n, opts) == 0) {
                        m_freem(n);
                        if (opts)
                                m_freem(opts);
                        UDP6_STATINC(UDP6_STAT_FULLSOCK);
                        soroverflow(so);
                } else
                        sorwakeup(so);
        }
}

int
udp6_realinput(int af, struct sockaddr_in6 *src, struct sockaddr_in6 *dst,
    struct mbuf **mp, int off)
{
        u_int16_t sport, dport;
        int rcvcnt;
        struct in6_addr src6, *dst6;
        const struct in_addr *dst4;
        struct inpcb_hdr *inph;
        struct in6pcb *in6p;
        struct mbuf *m = *mp;

        rcvcnt = 0;
        off += sizeof(struct udphdr);        /* now, offset of payload */

        if (af != AF_INET && af != AF_INET6)
                goto bad;
        if (src->sin6_family != AF_INET6 || dst->sin6_family != AF_INET6)
                goto bad;

        src6 = src->sin6_addr;
        if (sa6_recoverscope(src) != 0) {
                /* XXX: should be impossible. */
                goto bad;
        }
        sport = src->sin6_port;

        dport = dst->sin6_port;
        dst4 = (struct in_addr *)&dst->sin6_addr.s6_addr[12];
        dst6 = &dst->sin6_addr;

        if (IN6_IS_ADDR_MULTICAST(dst6) ||
            (af == AF_INET && IN_MULTICAST(dst4->s_addr))) {
                /*
                 * Deliver a multicast or broadcast datagram to *all* sockets
                 * for which the local and remote addresses and ports match
                 * those of the incoming datagram.  This allows more than
                 * one process to receive multi/broadcasts on the same port.
                 * (This really ought to be done for unicast datagrams as
                 * well, but that would cause problems with existing
                 * applications that open both address-specific sockets and
                 * a wildcard socket listening to the same port -- they would
                 * end up receiving duplicates of every unicast datagram.
                 * Those applications open the multiple sockets to overcome an
                 * inadequacy of the UDP socket interface, but for backwards
                 * compatibility we avoid the problem here rather than
                 * fixing the interface.  Maybe 4.5BSD will remedy this?)
                 */

                /*
                 * KAME note: traditionally we dropped udpiphdr from mbuf here.
                 * we need udpiphdr for IPsec processing so we do that later.
                 */
                /*
                 * Locate pcb(s) for datagram.
                 */
                TAILQ_FOREACH(inph, &udbtable.inpt_queue, inph_queue) {
                        in6p = (struct in6pcb *)inph;
                        if (in6p->in6p_af != AF_INET6)
                                continue;

                        if (in6p->in6p_lport != dport)
                                continue;
                        if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
                                if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr,
                                    dst6))
                                        continue;
                        } else {
                                if (IN6_IS_ADDR_V4MAPPED(dst6) &&
                                    (in6p->in6p_flags & IN6P_IPV6_V6ONLY))
                                        continue;
                        }
                        if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
                                if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr,
                                    &src6) || in6p->in6p_fport != sport)
                                        continue;
                        } else {
                                if (IN6_IS_ADDR_V4MAPPED(&src6) &&
                                    (in6p->in6p_flags & IN6P_IPV6_V6ONLY))
                                        continue;
                        }

                        udp6_sendup(m, off, sin6tosa(src), in6p->in6p_socket);
                        rcvcnt++;

                        /*
                         * Don't look for additional matches if this one does
                         * not have either the SO_REUSEPORT or SO_REUSEADDR
                         * socket options set.  This heuristic avoids searching
                         * through all pcbs in the common case of a non-shared
                         * port.  It assumes that an application will never
                         * clear these options after setting them.
                         */
                        if ((in6p->in6p_socket->so_options &
                            (SO_REUSEPORT|SO_REUSEADDR)) == 0)
                                break;
                }
        } else {
                /*
                 * Locate pcb for datagram.
                 */
                in6p = in6_pcblookup_connect(&udbtable, &src6, sport, dst6,
                                             dport, 0, 0);
                if (in6p == 0) {
                        UDP_STATINC(UDP_STAT_PCBHASHMISS);
                        in6p = in6_pcblookup_bind(&udbtable, dst6, dport, 0);
                        if (in6p == 0)
                                return rcvcnt;
                }

#ifdef IPSEC
                /* Handle ESP over UDP */
                if (in6p->in6p_flags & IN6P_ESPINUDP) {
                        switch (udp6_espinudp(mp, off)) {
                        case -1: /* Error, m was freed */
                                rcvcnt = -1;
                                goto bad;

                        case 1: /* ESP over UDP */
                                rcvcnt++;
                                goto bad;

                        case 0: /* plain UDP */
                        default: /* Unexpected */
                                /*
                                 * Normal UDP processing will take place,
                                 * m may have changed.
                                 */
                                m = *mp;
                                break;
                        }
                }
#endif

                if (in6p->in6p_overudp_cb != NULL) {
                        int ret;
                        ret = in6p->in6p_overudp_cb(mp, off, in6p->in6p_socket,
                            sin6tosa(src), in6p->in6p_overudp_arg);
                        switch (ret) {
                        case -1: /* Error, m was freed */
                                rcvcnt = -1;
                                goto bad;

                        case 1: /* Foo over UDP */
                                KASSERT(*mp == NULL);
                                rcvcnt++;
                                goto bad;

                        case 0: /* plain UDP */
                        default: /* Unexpected */
                                /*
                                 * Normal UDP processing will take place,
                                 * m may have changed.
                                 */
                                break;
                        }
                }

                udp6_sendup(m, off, sin6tosa(src), in6p->in6p_socket);
                rcvcnt++;
        }

bad:
        return rcvcnt;
}

int
udp6_input_checksum(struct mbuf *m, const struct udphdr *uh, int off, int len)
{

        /*
         * XXX it's better to record and check if this mbuf is
         * already checked.
         */

        if (__predict_false((m->m_flags & M_LOOP) && !udp_do_loopback_cksum)) {
                goto good;
        }
        if (uh->uh_sum == 0) {
                UDP6_STATINC(UDP6_STAT_NOSUM);
                goto bad;
        }

        switch (m->m_pkthdr.csum_flags &
            ((m_get_rcvif_NOMPSAFE(m)->if_csum_flags_rx & M_CSUM_UDPv6) |
            M_CSUM_TCP_UDP_BAD | M_CSUM_DATA)) {
        case M_CSUM_UDPv6|M_CSUM_TCP_UDP_BAD:
                UDP_CSUM_COUNTER_INCR(&udp6_hwcsum_bad);
                UDP6_STATINC(UDP6_STAT_BADSUM);
                goto bad;

#if 0 /* notyet */
        case M_CSUM_UDPv6|M_CSUM_DATA:
#endif

        case M_CSUM_UDPv6:
                /* Checksum was okay. */
                UDP_CSUM_COUNTER_INCR(&udp6_hwcsum_ok);
                break;

        default:
                /*
                 * Need to compute it ourselves.  Maybe skip checksum
                 * on loopback interfaces.
                 */
                UDP_CSUM_COUNTER_INCR(&udp6_swcsum);
                if (in6_cksum(m, IPPROTO_UDP, off, len) != 0) {
                        UDP6_STATINC(UDP6_STAT_BADSUM);
                        goto bad;
                }
        }

good:
        return 0;
bad:
        return -1;
}

int
udp6_input(struct mbuf **mp, int *offp, int proto)
{
        struct mbuf *m = *mp;
        int off = *offp;
        struct sockaddr_in6 src, dst;
        struct ip6_hdr *ip6;
        struct udphdr *uh;
        u_int32_t plen, ulen;

        ip6 = mtod(m, struct ip6_hdr *);

#if defined(NFAITH) && 0 < NFAITH
        if (faithprefix(&ip6->ip6_dst)) {
                /* send icmp6 host unreach? */
                m_freem(m);
                return IPPROTO_DONE;
        }
#endif

        UDP6_STATINC(UDP6_STAT_IPACKETS);

        /* Check for jumbogram is done in ip6_input. We can trust pkthdr.len. */
        plen = m->m_pkthdr.len - off;
        IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(struct udphdr));
        if (uh == NULL) {
                IP6_STATINC(IP6_STAT_TOOSHORT);
                return IPPROTO_DONE;
        }

        /*
         * Enforce alignment requirements that are violated in
         * some cases, see kern/50766 for details.
         */
        if (ACCESSIBLE_POINTER(uh, struct udphdr) == 0) {
                m = m_copyup(m, off + sizeof(struct udphdr), 0);
                if (m == NULL) {
                        IP6_STATINC(IP6_STAT_TOOSHORT);
                        return IPPROTO_DONE;
                }
                ip6 = mtod(m, struct ip6_hdr *);
                uh = (struct udphdr *)(mtod(m, char *) + off);
        }
        KASSERT(ACCESSIBLE_POINTER(uh, struct udphdr));
        ulen = ntohs((u_short)uh->uh_ulen);

        /*
         * RFC2675 section 4: jumbograms will have 0 in the UDP header field,
         * iff payload length > 0xffff.
         */
        if (ulen == 0 && plen > 0xffff)
                ulen = plen;

        if (plen != ulen) {
                UDP6_STATINC(UDP6_STAT_BADLEN);
                goto bad;
        }

        /* destination port of 0 is illegal, based on RFC768. */
        if (uh->uh_dport == 0)
                goto bad;

        /*
         * Checksum extended UDP header and data.  Maybe skip checksum
         * on loopback interfaces.
         */
        if (udp6_input_checksum(m, uh, off, ulen))
                goto bad;

        /*
         * Construct source and dst sockaddrs.
         */
        memset(&src, 0, sizeof(src));
        src.sin6_family = AF_INET6;
        src.sin6_len = sizeof(struct sockaddr_in6);
        src.sin6_addr = ip6->ip6_src;
        src.sin6_port = uh->uh_sport;
        memset(&dst, 0, sizeof(dst));
        dst.sin6_family = AF_INET6;
        dst.sin6_len = sizeof(struct sockaddr_in6);
        dst.sin6_addr = ip6->ip6_dst;
        dst.sin6_port = uh->uh_dport;

        if (udp6_realinput(AF_INET6, &src, &dst, &m, off) == 0) {
                if (m->m_flags & M_MCAST) {
                        UDP6_STATINC(UDP6_STAT_NOPORTMCAST);
                        goto bad;
                }
                UDP6_STATINC(UDP6_STAT_NOPORT);
                icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0);
                m = NULL;
        }

bad:
        if (m)
                m_freem(m);
        return IPPROTO_DONE;
}

int
udp6_output(struct in6pcb * const in6p, struct mbuf *m,
    struct sockaddr_in6 * const addr6, struct mbuf * const control,
    struct lwp * const l)
{
        u_int32_t ulen = m->m_pkthdr.len;
        u_int32_t plen = sizeof(struct udphdr) + ulen;
        struct ip6_hdr *ip6;
        struct udphdr *udp6;
        struct in6_addr _laddr, *laddr, *faddr;
        struct in6_addr laddr_mapped; /* XXX ugly */
        struct sockaddr_in6 *sin6 = NULL;
        struct ifnet *oifp = NULL;
        int scope_ambiguous = 0;
        u_int16_t fport;
        int error = 0;
        struct ip6_pktopts *optp = NULL;
        struct ip6_pktopts opt;
        int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
#ifdef INET
        struct ip *ip;
        struct udpiphdr *ui;
        int flags = 0;
#endif
        struct sockaddr_in6 tmp;

        if (addr6) {
                sin6 = addr6;
                if (sin6->sin6_len != sizeof(*sin6)) {
                        error = EINVAL;
                        goto release;
                }
                if (sin6->sin6_family != AF_INET6) {
                        error = EAFNOSUPPORT;
                        goto release;
                }

                /* protect *sin6 from overwrites */
                tmp = *sin6;
                sin6 = &tmp;

                /*
                 * Application should provide a proper zone ID or the use of
                 * default zone IDs should be enabled.  Unfortunately, some
                 * applications do not behave as it should, so we need a
                 * workaround.  Even if an appropriate ID is not determined,
                 * we'll see if we can determine the outgoing interface.  If we
                 * can, determine the zone ID based on the interface below.
                 */
                if (sin6->sin6_scope_id == 0 && !ip6_use_defzone)
                        scope_ambiguous = 1;
                if ((error = sa6_embedscope(sin6, ip6_use_defzone)) != 0)
                        goto release;
        }

        if (control) {
                if (__predict_false(l == NULL)) {
                        panic("%s: control but no lwp", __func__);
                }
                if ((error = ip6_setpktopts(control, &opt,
                    in6p->in6p_outputopts, l->l_cred, IPPROTO_UDP)) != 0)
                        goto release;
                optp = &opt;
        } else
                optp = in6p->in6p_outputopts;


        if (sin6) {
                /*
                 * Slightly different than v4 version in that we call
                 * in6_selectsrc and in6_pcbsetport to fill in the local
                 * address and port rather than in_pcbconnect. in_pcbconnect
                 * sets in6p_faddr which causes EISCONN below to be hit on
                 * subsequent sendto.
                 */
                if (sin6->sin6_port == 0) {
                        error = EADDRNOTAVAIL;
                        goto release;
                }

                if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
                        /* how about ::ffff:0.0.0.0 case? */
                        error = EISCONN;
                        goto release;
                }

                faddr = &sin6->sin6_addr;
                fport = sin6->sin6_port; /* allow 0 port */

                if (IN6_IS_ADDR_V4MAPPED(faddr)) {
                        if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY)) {
                                /*
                                 * I believe we should explicitly discard the
                                 * packet when mapped addresses are disabled,
                                 * rather than send the packet as an IPv6 one.
                                 * If we chose the latter approach, the packet
                                 * might be sent out on the wire based on the
                                 * default route, the situation which we'd
                                 * probably want to avoid.
                                 * (20010421 jinmei@kame.net)
                                 */
                                error = EINVAL;
                                goto release;
                        }
                        if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
                            !IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) {
                                /*
                                 * when remote addr is an IPv4-mapped address,
                                 * local addr should not be an IPv6 address,
                                 * since you cannot determine how to map IPv6
                                 * source address to IPv4.
                                 */
                                error = EINVAL;
                                goto release;
                        }

                        af = AF_INET;
                }

                if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
                        struct psref psref;
                        int bound = curlwp_bind();

                        error = in6_selectsrc(sin6, optp,
                            in6p->in6p_moptions,
                            &in6p->in6p_route,
                            &in6p->in6p_laddr, &oifp, &psref, &_laddr);
                        if (error)
                                laddr = NULL;
                        else
                                laddr = &_laddr;
                        if (oifp && scope_ambiguous &&
                            (error = in6_setscope(&sin6->sin6_addr,
                            oifp, NULL))) {
                                if_put(oifp, &psref);
                                curlwp_bindx(bound);
                                goto release;
                        }
                        if_put(oifp, &psref);
                        curlwp_bindx(bound);
                } else {
                        /*
                         * XXX: freebsd[34] does not have in_selectsrc, but
                         * we can omit the whole part because freebsd4 calls
                         * udp_output() directly in this case, and thus we'll
                         * never see this path.
                         */
                        if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
                                struct sockaddr_in sin_dst;
                                struct in_addr ina;
                                struct in_ifaddr *ia4;
                                struct psref _psref;
                                int bound;

                                memcpy(&ina, &faddr->s6_addr[12], sizeof(ina));
                                sockaddr_in_init(&sin_dst, &ina, 0);
                                bound = curlwp_bind();
                                ia4 = in_selectsrc(&sin_dst, &in6p->in6p_route,
                                    in6p->in6p_socket->so_options, NULL,
                                    &error, &_psref);
                                if (ia4 == NULL) {
                                        curlwp_bindx(bound);
                                        if (error == 0)
                                                error = EADDRNOTAVAIL;
                                        goto release;
                                }
                                memset(&laddr_mapped, 0, sizeof(laddr_mapped));
                                laddr_mapped.s6_addr16[5] = 0xffff; /* ugly */
                                memcpy(&laddr_mapped.s6_addr[12],
                                      &IA_SIN(ia4)->sin_addr,
                                      sizeof(IA_SIN(ia4)->sin_addr));
                                ia4_release(ia4, &_psref);
                                curlwp_bindx(bound);
                                laddr = &laddr_mapped;
                        } else
                        {
                                laddr = &in6p->in6p_laddr;        /* XXX */
                        }
                }
                if (laddr == NULL) {
                        if (error == 0)
                                error = EADDRNOTAVAIL;
                        goto release;
                }
                if (in6p->in6p_lport == 0) {
                        /*
                         * Craft a sockaddr_in6 for the local endpoint. Use the
                         * "any" as a base, set the address, and recover the
                         * scope.
                         */
                        struct sockaddr_in6 lsin6 =
                            *((const struct sockaddr_in6 *)in6p->in6p_socket->so_proto->pr_domain->dom_sa_any);
                        lsin6.sin6_addr = *laddr;
                        error = sa6_recoverscope(&lsin6);
                        if (error)
                                goto release;

                        error = in6_pcbsetport(&lsin6, in6p, l);

                        if (error) {
                                in6p->in6p_laddr = in6addr_any;
                                goto release;
                        }
                }
        } else {
                if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
                        error = ENOTCONN;
                        goto release;
                }
                if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) {
                        if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY))
                        {
                                /*
                                 * XXX: this case would happen when the
                                 * application sets the V6ONLY flag after
                                 * connecting the foreign address.
                                 * Such applications should be fixed,
                                 * so we bark here.
                                 */
                                log(LOG_INFO, "udp6_output: IPV6_V6ONLY "
                                    "option was set for a connected socket\n");
                                error = EINVAL;
                                goto release;
                        } else
                                af = AF_INET;
                }
                laddr = &in6p->in6p_laddr;
                faddr = &in6p->in6p_faddr;
                fport = in6p->in6p_fport;
        }

        if (af == AF_INET)
                hlen = sizeof(struct ip);

        /*
         * Calculate data length and get a mbuf
         * for UDP and IP6 headers.
         */
        M_PREPEND(m, hlen + sizeof(struct udphdr), M_DONTWAIT);
        if (m == NULL) {
                error = ENOBUFS;
                goto release;
        }

        /*
         * Stuff checksum and output datagram.
         */
        udp6 = (struct udphdr *)(mtod(m, char *) + hlen);
        udp6->uh_sport = in6p->in6p_lport; /* lport is always set in the PCB */
        udp6->uh_dport = fport;
        if (plen <= 0xffff)
                udp6->uh_ulen = htons((u_int16_t)plen);
        else
                udp6->uh_ulen = 0;
        udp6->uh_sum = 0;

        switch (af) {
        case AF_INET6:
                ip6 = mtod(m, struct ip6_hdr *);
                ip6->ip6_flow        = in6p->in6p_flowinfo & IPV6_FLOWINFO_MASK;
                ip6->ip6_vfc         &= ~IPV6_VERSION_MASK;
                ip6->ip6_vfc         |= IPV6_VERSION;
#if 0                /* ip6_plen will be filled in ip6_output. */
                ip6->ip6_plen        = htons((u_int16_t)plen);
#endif
                ip6->ip6_nxt        = IPPROTO_UDP;
                ip6->ip6_hlim        = in6_selecthlim_rt(in6p);
                ip6->ip6_src        = *laddr;
                ip6->ip6_dst        = *faddr;

                udp6->uh_sum = in6_cksum_phdr(laddr, faddr,
                    htonl(plen), htonl(IPPROTO_UDP));
                m->m_pkthdr.csum_flags = M_CSUM_UDPv6;
                m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);

                UDP6_STATINC(UDP6_STAT_OPACKETS);
                error = ip6_output(m, optp, &in6p->in6p_route, 0,
                    in6p->in6p_moptions, in6p, NULL);
                break;
        case AF_INET:
#ifdef INET
                /* can't transmit jumbogram over IPv4 */
                if (plen > 0xffff) {
                        error = EMSGSIZE;
                        goto release;
                }

                ip = mtod(m, struct ip *);
                ui = (struct udpiphdr *)ip;
                memset(ui->ui_x1, 0, sizeof(ui->ui_x1));
                ui->ui_pr = IPPROTO_UDP;
                ui->ui_len = htons(plen);
                memcpy(&ui->ui_src, &laddr->s6_addr[12], sizeof(ui->ui_src));
                ui->ui_ulen = ui->ui_len;

                flags = (in6p->in6p_socket->so_options &
                         (SO_DONTROUTE | SO_BROADCAST));
                memcpy(&ui->ui_dst, &faddr->s6_addr[12], sizeof(ui->ui_dst));

                udp6->uh_sum = in_cksum(m, hlen + plen);
                if (udp6->uh_sum == 0)
                        udp6->uh_sum = 0xffff;

                ip->ip_len = htons(hlen + plen);
                ip->ip_ttl = in6_selecthlim(in6p, NULL); /* XXX */
                ip->ip_tos = 0;        /* XXX */

                UDP_STATINC(UDP_STAT_OPACKETS);
                error = ip_output(m, NULL, &in6p->in6p_route, flags /* XXX */,
                    in6p->in6p_v4moptions, NULL);
                break;
#else
                error = EAFNOSUPPORT;
                goto release;
#endif
        }
        goto releaseopt;

release:
        m_freem(m);

releaseopt:
        if (control) {
                if (optp == &opt)
                        ip6_clearpktopts(&opt, -1);
                m_freem(control);
        }
        return (error);
}

static int
udp6_attach(struct socket *so, int proto)
{
        struct in6pcb *in6p;
        int s, error;

        KASSERT(sotoin6pcb(so) == NULL);
        sosetlock(so);

        error = soreserve(so, udp6_sendspace, udp6_recvspace);
        if (error) {
                return error;
        }

        /*
         * MAPPED_ADDR implementation spec:
         *  Always attach for IPv6, and only when necessary for IPv4.
         */
        s = splsoftnet();
        error = in6_pcballoc(so, &udbtable);
        splx(s);
        if (error) {
                return error;
        }

        in6p = sotoin6pcb(so);
        in6p->in6p_cksum = -1;        /* just to be sure */

        KASSERT(solocked(so));
        return 0;
}

static void
udp6_detach(struct socket *so)
{
        struct in6pcb *in6p = sotoin6pcb(so);
        int s;

        KASSERT(solocked(so));
        KASSERT(in6p != NULL);

        s = splsoftnet();
        in6_pcbdetach(in6p);
        splx(s);
}

static int
udp6_accept(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
udp6_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct in6pcb *in6p = sotoin6pcb(so);
        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
        int error = 0;
        int s;

        KASSERT(solocked(so));
        KASSERT(in6p != NULL);

        s = splsoftnet();
        error = in6_pcbbind(in6p, sin6, l);
        splx(s);
        return error;
}

static int
udp6_listen(struct socket *so, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
udp6_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct in6pcb *in6p = sotoin6pcb(so);
        int error = 0;
        int s;

        KASSERT(solocked(so));
        KASSERT(in6p != NULL);

        if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr))
                return EISCONN;
        s = splsoftnet();
        error = in6_pcbconnect(in6p, (struct sockaddr_in6 *)nam, l);
        splx(s);
        if (error == 0)
                soisconnected(so);

        return error;
}

static int
udp6_connect2(struct socket *so, struct socket *so2)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
udp6_disconnect(struct socket *so)
{
        struct in6pcb *in6p = sotoin6pcb(so);
        int s;

        KASSERT(solocked(so));
        KASSERT(in6p != NULL);

        if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr))
                return ENOTCONN;

        s = splsoftnet();
        in6_pcbdisconnect(in6p);
        memset((void *)&in6p->in6p_laddr, 0, sizeof(in6p->in6p_laddr));
        splx(s);

        so->so_state &= ~SS_ISCONNECTED;        /* XXX */
        in6_pcbstate(in6p, IN6P_BOUND);                /* XXX */
        return 0;
}

static int
udp6_shutdown(struct socket *so)
{
        int s;

        s = splsoftnet();
        socantsendmore(so);
        splx(s);

        return 0;
}

static int
udp6_abort(struct socket *so)
{
        int s;

        KASSERT(solocked(so));
        KASSERT(sotoin6pcb(so) != NULL);

        s = splsoftnet();
        soisdisconnected(so);
        in6_pcbdetach(sotoin6pcb(so));
        splx(s);

        return 0;
}

static int
udp6_ioctl(struct socket *so, u_long cmd, void *addr6, struct ifnet *ifp)
{
        /*
         * MAPPED_ADDR implementation info:
         *  Mapped addr support for PRU_CONTROL is not necessary.
         *  Because typical user of PRU_CONTROL is such as ifconfig,
         *  and they don't associate any addr to their socket.  Then
         *  socket family is only hint about the PRU_CONTROL'ed address
         *  family, especially when getting addrs from kernel.
         *  So AF_INET socket need to be used to control AF_INET addrs,
         *  and AF_INET6 socket for AF_INET6 addrs.
         */
        return in6_control(so, cmd, addr6, ifp);
}

static int
udp6_stat(struct socket *so, struct stat *ub)
{
        KASSERT(solocked(so));

        /* stat: don't bother with a blocksize */
        return 0;
}

static int
udp6_peeraddr(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));
        KASSERT(sotoin6pcb(so) != NULL);
        KASSERT(nam != NULL);

        in6_setpeeraddr(sotoin6pcb(so), (struct sockaddr_in6 *)nam);
        return 0;
}

static int
udp6_sockaddr(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));
        KASSERT(sotoin6pcb(so) != NULL);
        KASSERT(nam != NULL);

        in6_setsockaddr(sotoin6pcb(so), (struct sockaddr_in6 *)nam);
        return 0;
}

static int
udp6_rcvd(struct socket *so, int flags, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
udp6_recvoob(struct socket *so, struct mbuf *m, int flags)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
udp6_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
    struct mbuf *control, struct lwp *l)
{
        struct in6pcb *in6p = sotoin6pcb(so);
        int error = 0;
        int s;

        KASSERT(solocked(so));
        KASSERT(in6p != NULL);
        KASSERT(m != NULL);

        s = splsoftnet();
        error = udp6_output(in6p, m, (struct sockaddr_in6 *)nam, control, l);
        splx(s);

        return error;
}

static int
udp6_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
{
        KASSERT(solocked(so));

        m_freem(m);
        m_freem(control);

        return EOPNOTSUPP;
}

static int
udp6_purgeif(struct socket *so, struct ifnet *ifp)
{

        mutex_enter(softnet_lock);
        in6_pcbpurgeif0(&udbtable, ifp);
#ifdef NET_MPSAFE
        mutex_exit(softnet_lock);
#endif
        in6_purgeif(ifp);
#ifdef NET_MPSAFE
        mutex_enter(softnet_lock);
#endif
        in6_pcbpurgeif(&udbtable, ifp);
        mutex_exit(softnet_lock);

        return 0;
}

static int
sysctl_net_inet6_udp6_stats(SYSCTLFN_ARGS)
{

        return (NETSTAT_SYSCTL(udp6stat_percpu, UDP6_NSTATS));
}

static void
sysctl_net_inet6_udp6_setup(struct sysctllog **clog)
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "inet6", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET6, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "udp6",
                       SYSCTL_DESCR("UDPv6 related settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_UDP, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "sendspace",
                       SYSCTL_DESCR("Default UDP send buffer size"),
                       NULL, 0, &udp6_sendspace, 0,
                       CTL_NET, PF_INET6, IPPROTO_UDP, UDP6CTL_SENDSPACE,
                       CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "recvspace",
                       SYSCTL_DESCR("Default UDP receive buffer size"),
                       NULL, 0, &udp6_recvspace, 0,
                       CTL_NET, PF_INET6, IPPROTO_UDP, UDP6CTL_RECVSPACE,
                       CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "do_loopback_cksum",
                       SYSCTL_DESCR("Perform UDP checksum on loopback"),
                       NULL, 0, &udp_do_loopback_cksum, 0,
                       CTL_NET, PF_INET6, IPPROTO_UDP, UDP6CTL_LOOPBACKCKSUM,
                       CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "pcblist",
                       SYSCTL_DESCR("UDP protocol control block list"),
                       sysctl_inpcblist, 0, &udbtable, 0,
                       CTL_NET, PF_INET6, IPPROTO_UDP, CTL_CREATE,
                       CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "stats",
                       SYSCTL_DESCR("UDPv6 statistics"),
                       sysctl_net_inet6_udp6_stats, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_UDP, UDP6CTL_STATS,
                       CTL_EOL);
}

void
udp6_statinc(u_int stat)
{

        KASSERT(stat < UDP6_NSTATS);
        UDP6_STATINC(stat);
}

#ifdef IPSEC
/*
 * Returns:
 *     1 if the packet was processed
 *     0 if normal UDP processing should take place
 *    -1 if an error occurred and m was freed
 */
static int
udp6_espinudp(struct mbuf **mp, int off)
{
        const size_t skip = sizeof(struct udphdr);
        size_t len;
        void *data;
        size_t minlen;
        int ip6hdrlen;
        struct ip6_hdr *ip6;
        struct m_tag *tag;
        struct udphdr *udphdr;
        u_int16_t sport, dport;
        struct mbuf *m = *mp;
        uint32_t *marker;

        /*
         * Collapse the mbuf chain if the first mbuf is too short
         * The longest case is: UDP + non ESP marker + ESP
         */
        minlen = off + sizeof(u_int64_t) + sizeof(struct esp);
        if (minlen > m->m_pkthdr.len)
                minlen = m->m_pkthdr.len;

        if (m->m_len < minlen) {
                if ((*mp = m_pullup(m, minlen)) == NULL) {
                        return -1;
                }
                m = *mp;
        }

        len = m->m_len - off;
        data = mtod(m, char *) + off;

        /* Ignore keepalive packets */
        if ((len == 1) && (*(unsigned char *)data == 0xff)) {
                m_freem(m);
                *mp = NULL; /* avoid any further processing by caller ... */
                return 1;
        }

        /* Handle Non-ESP marker (32bit). If zero, then IKE. */
        marker = (uint32_t *)data;
        if (len <= sizeof(uint32_t))
                return 0;
        if (marker[0] == 0)
                return 0;

        /*
         * Get the UDP ports. They are handled in network
         * order everywhere in IPSEC_NAT_T code.
         */
        udphdr = (struct udphdr *)((char *)data - skip);
        sport = udphdr->uh_sport;
        dport = udphdr->uh_dport;

        /*
         * Remove the UDP header (and possibly the non ESP marker)
         * IPv6 header length is ip6hdrlen
         * Before:
         *   <---- off --->
         *   +-----+------+-----+
         *   | IP6 |  UDP | ESP |
         *   +-----+------+-----+
         *         <-skip->
         * After:
         *          +-----+-----+
         *          | IP6 | ESP |
         *          +-----+-----+
         *   <-skip->
         */
        ip6hdrlen = off - sizeof(struct udphdr);
        memmove(mtod(m, char *) + skip, mtod(m, void *), ip6hdrlen);
        m_adj(m, skip);

        ip6 = mtod(m, struct ip6_hdr *);
        ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - skip);
        ip6->ip6_nxt = IPPROTO_ESP;

        /*
         * We have modified the packet - it is now ESP, so we should not
         * return to UDP processing ...
         *
         * Add a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember
         * the source UDP port. This is required if we want
         * to select the right SPD for multiple hosts behind
         * same NAT
         */
        if ((tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
            sizeof(sport) + sizeof(dport), M_DONTWAIT)) == NULL) {
                m_freem(m);
                return -1;
        }
        ((u_int16_t *)(tag + 1))[0] = sport;
        ((u_int16_t *)(tag + 1))[1] = dport;
        m_tag_prepend(m, tag);

        if (ipsec_used)
                ipsec6_common_input(&m, &ip6hdrlen, IPPROTO_ESP);
        else
                m_freem(m);

        /* We handled it, it shouldn't be handled by UDP */
        *mp = NULL; /* avoid free by caller ... */
        return 1;
}
#endif /* IPSEC */

PR_WRAP_USRREQS(udp6)
#define        udp6_attach        udp6_attach_wrapper
#define        udp6_detach        udp6_detach_wrapper
#define        udp6_accept        udp6_accept_wrapper
#define        udp6_bind        udp6_bind_wrapper
#define        udp6_listen        udp6_listen_wrapper
#define        udp6_connect        udp6_connect_wrapper
#define        udp6_connect2        udp6_connect2_wrapper
#define        udp6_disconnect        udp6_disconnect_wrapper
#define        udp6_shutdown        udp6_shutdown_wrapper
#define        udp6_abort        udp6_abort_wrapper
#define        udp6_ioctl        udp6_ioctl_wrapper
#define        udp6_stat        udp6_stat_wrapper
#define        udp6_peeraddr        udp6_peeraddr_wrapper
#define        udp6_sockaddr        udp6_sockaddr_wrapper
#define        udp6_rcvd        udp6_rcvd_wrapper
#define        udp6_recvoob        udp6_recvoob_wrapper
#define        udp6_send        udp6_send_wrapper
#define        udp6_sendoob        udp6_sendoob_wrapper
#define        udp6_purgeif        udp6_purgeif_wrapper

const struct pr_usrreqs udp6_usrreqs = {
        .pr_attach        = udp6_attach,
        .pr_detach        = udp6_detach,
        .pr_accept        = udp6_accept,
        .pr_bind        = udp6_bind,
        .pr_listen        = udp6_listen,
        .pr_connect        = udp6_connect,
        .pr_connect2        = udp6_connect2,
        .pr_disconnect        = udp6_disconnect,
        .pr_shutdown        = udp6_shutdown,
        .pr_abort        = udp6_abort,
        .pr_ioctl        = udp6_ioctl,
        .pr_stat        = udp6_stat,
        .pr_peeraddr        = udp6_peeraddr,
        .pr_sockaddr        = udp6_sockaddr,
        .pr_rcvd        = udp6_rcvd,
        .pr_recvoob        = udp6_recvoob,
        .pr_send        = udp6_send,
        .pr_sendoob        = udp6_sendoob,
        .pr_purgeif        = udp6_purgeif,
};
































































































    2 








    2 


































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
/*        $NetBSD: umodem.c,v 1.74 2020/04/12 01:10:54 simonb Exp $        */

/*
 * Copyright (c) 1998 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Comm Class spec:  http://www.usb.org/developers/devclass_docs/usbccs10.pdf
 *                   http://www.usb.org/developers/devclass_docs/usbcdc11.pdf
 */

/*
 * TODO:
 * - Add error recovery in various places; the big problem is what
 *   to do in a callback if there is an error.
 * - Implement a Call Device for modems without multiplexed commands.
 *
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: umodem.c,v 1.74 2020/04/12 01:10:54 simonb Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/ioctl.h>
#include <sys/conf.h>
#include <sys/tty.h>
#include <sys/file.h>
#include <sys/select.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/device.h>
#include <sys/poll.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbcdc.h>

#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usb_quirks.h>

#include <dev/usb/ucomvar.h>
#include <dev/usb/umodemvar.h>

Static const struct ucom_methods umodem_methods = {
        .ucom_get_status = umodem_get_status,
        .ucom_set = umodem_set,
        .ucom_param = umodem_param,
        .ucom_ioctl = umodem_ioctl,
        .ucom_open = umodem_open,
        .ucom_close = umodem_close,
};

static int        umodem_match(device_t, cfdata_t, void *);
static void        umodem_attach(device_t, device_t, void *);
static int        umodem_detach(device_t, int);



CFATTACH_DECL_NEW(umodem, sizeof(struct umodem_softc), umodem_match,
    umodem_attach, umodem_detach, NULL);

static int
umodem_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;
        usb_interface_descriptor_t *id;
        int cm, acm;

        if (uiaa->uiaa_class != UICLASS_CDC ||
            uiaa->uiaa_subclass != UISUBCLASS_ABSTRACT_CONTROL_MODEL ||
            !(uiaa->uiaa_proto == UIPROTO_CDC_NOCLASS || uiaa->uiaa_proto == UIPROTO_CDC_AT))
                return UMATCH_NONE;

        id = usbd_get_interface_descriptor(uiaa->uiaa_iface);
        if (umodem_get_caps(uiaa->uiaa_device, &cm, &acm, id) == -1)
                return UMATCH_NONE;

        return UMATCH_IFACECLASS_IFACESUBCLASS_IFACEPROTO;
}

static void
umodem_attach(device_t parent, device_t self, void *aux)
{
        struct umodem_softc *sc = device_private(self);
        struct usbif_attach_arg *uiaa = aux;
        struct ucom_attach_args ucaa;

        memset(&ucaa, 0, sizeof(ucaa));

        ucaa.ucaa_portno = UCOM_UNK_PORTNO;
        ucaa.ucaa_methods = &umodem_methods;
        ucaa.ucaa_info = NULL;

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler");

        if (umodem_common_attach(self, sc, uiaa, &ucaa))
                return;
        return;
}

static int
umodem_detach(device_t self, int flags)
{
        struct umodem_softc *sc = device_private(self);

        pmf_device_deregister(self);

        return umodem_common_detach(sc, flags);
}














































































































































































































































































































































































































































































































   95 
   97 




    2 

   26 












   95 
   92 





































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
/*        $NetBSD: route.h,v 1.129 2021/08/09 20:49:10 andvar Exp $        */

/*
 * Copyright (c) 1980, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)route.h        8.5 (Berkeley) 2/8/95
 */

#ifndef _NET_ROUTE_H_
#define _NET_ROUTE_H_

#include <sys/queue.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <net/if.h>
#ifdef _KERNEL
#include <sys/rwlock.h>
#include <sys/condvar.h>
#include <sys/pserialize.h>
#include <sys/percpu.h>
#endif
#include <sys/psref.h>

#if !(defined(_KERNEL) || defined(_STANDALONE))
#include <stdbool.h>
#endif

/*
 * Kernel resident routing tables.
 *
 * The routing tables are initialized when interface addresses
 * are set by making entries for all directly connected interfaces.
 */

/*
 * A route consists of a destination address and a reference
 * to a routing entry.  These are often held by protocols
 * in their control blocks, e.g. inpcb.
 */
struct route {
        struct        rtentry                *_ro_rt;
        struct        sockaddr        *ro_sa;
        uint64_t                ro_rtcache_generation;
        struct        psref                ro_psref;
        int                        ro_bound;
};

/*
 * These numbers are used by reliable protocols for determining
 * retransmission behavior and are included in the routing structure.
 */
struct rt_metrics {
        uint64_t rmx_locks;        /* Kernel must leave these values alone */
        uint64_t rmx_mtu;        /* MTU for this path */
        uint64_t rmx_hopcount;        /* max hops expected */
        uint64_t rmx_recvpipe;        /* inbound delay-bandwidth product */
        uint64_t rmx_sendpipe;        /* outbound delay-bandwidth product */
        uint64_t rmx_ssthresh;        /* outbound gateway buffer limit */
        uint64_t rmx_rtt;        /* estimated round trip time */
        uint64_t rmx_rttvar;        /* estimated rtt variance */
        time_t        rmx_expire;        /* lifetime for route, e.g. redirect */
        time_t        rmx_pksent;        /* packets sent using this route */
};

/*
 * rmx_rtt and rmx_rttvar are stored as microseconds;
 * RTTTOPRHZ(rtt) converts to a value suitable for use
 * by a protocol slowtimo counter.
 */
#define        RTM_RTTUNIT        1000000        /* units for rtt, rttvar, as units per sec */
#define        RTTTOPRHZ(r)        ((r) / (RTM_RTTUNIT / PR_SLOWHZ))

/*
 * We distinguish between routes to hosts and routes to networks,
 * preferring the former if available.  For each route we infer
 * the interface to use from the gateway address supplied when
 * the route was entered.  Routes that forward packets through
 * gateways are marked so that the output routines know to address the
 * gateway rather than the ultimate destination.
 */
#ifndef RNF_NORMAL
#include <net/radix.h>
#endif
struct rtentry {
        struct        radix_node rt_nodes[2];        /* tree glue, and other values */
#define        rt_mask(r)        ((const struct sockaddr *)((r)->rt_nodes->rn_mask))
        struct        sockaddr *rt_gateway;        /* value */
        int        rt_flags;                /* up/down?, host/net */
        int        rt_refcnt;                /* # held references */
        uint64_t rt_use;                        /* raw # packets forwarded */
        struct        ifnet *rt_ifp;                /* the answer: interface to use */
        struct        ifaddr *rt_ifa;                /* the answer: interface to use */
        uint32_t rt_ifa_seqno;
        void *        rt_llinfo;                /* pointer to link level info cache */
        struct        rt_metrics rt_rmx;        /* metrics used by rx'ing protocols */
        struct        rtentry *rt_gwroute;        /* implied entry for gatewayed routes */
        LIST_HEAD(, rttimer) rt_timer;  /* queue of timeouts for misc funcs */
        struct        rtentry *rt_parent;        /* parent of cloned route */
        struct        sockaddr *_rt_key;
        struct        sockaddr *rt_tag;        /* route tagging info */
#ifdef _KERNEL
        kcondvar_t rt_cv;
        struct psref_target rt_psref;
        SLIST_ENTRY(rtentry) rt_free;        /* queue of deferred frees */
#endif
};

static __inline const struct sockaddr *
rt_getkey(const struct rtentry *rt)
{
        return rt->_rt_key;
}

/*
 * Following structure necessary for 4.3 compatibility;
 * We should eventually move it to a compat file.
 */
struct ortentry {
        uint32_t rt_hash;                /* to speed lookups */
        struct        sockaddr rt_dst;        /* key */
        struct        sockaddr rt_gateway;        /* value */
        int16_t        rt_flags;                /* up/down?, host/net */
        int16_t        rt_refcnt;                /* # held references */
        uint32_t rt_use;                /* raw # packets forwarded */
        struct        ifnet *rt_ifp;                /* the answer: interface to use */
};

#define        RTF_UP                0x1                /* route usable */
#define        RTF_GATEWAY        0x2                /* destination is a gateway */
#define        RTF_HOST        0x4                /* host entry (net otherwise) */
#define        RTF_REJECT        0x8                /* host or net unreachable */
#define        RTF_DYNAMIC        0x10                /* created dynamically (by redirect) */
#define        RTF_MODIFIED        0x20                /* modified dynamically (by redirect) */
#define RTF_DONE        0x40                /* message confirmed */
#define RTF_MASK        0x80                /* subnet mask present */
// #define RTF_CLONING        0x100                /* generate new routes on use */
#define RTF_CONNECTED        0x100                /* hosts on this route are neighbours */
// #define RTF_XRESOLVE        0x200                /* external daemon resolves name */
// #define RTF_LLINFO        0x400                /* generated by ARP or NDP */
#define RTF_LLDATA        0x400                /* used by apps to add/del L2 entries */
#define RTF_STATIC        0x800                /* manually added */
#define RTF_BLACKHOLE        0x1000                /* just discard pkts (during updates) */
// #define RTF_CLONED        0x2000                /* this is a cloned route */
#define RTF_PROTO2        0x4000                /* protocol specific routing flag */
#define RTF_PROTO1        0x8000                /* protocol specific routing flag */
#define RTF_SRC                0x10000                /* route has fixed source address */
#define RTF_ANNOUNCE        0x20000                /* announce new ARP or NDP entry */
#define RTF_LOCAL        0x40000                /* route represents a local address */
#define RTF_BROADCAST        0x80000                /* route represents a bcast address */
#define RTF_UPDATING        0x100000        /* route is updating */
/*
 * The flag is nevert set to rt_flags.  It just tells rtrequest1 to set a passed
 * ifa to rt_ifa (via rti_ifa) and not replace rt_ifa in ifa_rtrequest.
 */
#define RTF_DONTCHANGEIFA        0x200000        /* suppress rt_ifa replacement */

/*
 * 0x400 is exposed to userland just for backward compatibility. For that
 * purpose, it should be shown as LLINFO.
 */
#define RTFBITS "\020\1UP\2GATEWAY\3HOST\4REJECT\5DYNAMIC\6MODIFIED\7DONE" \
    "\010MASK_PRESENT\011CONNECTED\012XRESOLVE\013LLINFO\014STATIC" \
    "\015BLACKHOLE\016CLONED\017PROTO2\020PROTO1\021SRC\022ANNOUNCE" \
    "\023LOCAL\024BROADCAST\025UPDATING"


/*
 * Routing statistics.
 */
struct        rtstat {
        uint64_t rts_badredirect;        /* bogus redirect calls */
        uint64_t rts_dynamic;                /* routes created by redirects */
        uint64_t rts_newgateway;        /* routes modified by redirects */
        uint64_t rts_unreach;                /* lookups which failed */
        uint64_t rts_wildcard;                /* lookups satisfied by a wildcard */
};

/*
 * Structures for routing messages.  By forcing the first member to be aligned
 * at a 64-bit boundary, we also force the size to be a multiple of 64-bits.
 */

#if !defined(_KERNEL) || !defined(COMPAT_RTSOCK)
/*
 * If we aren't being compiled for backwards compatibility, enforce 64-bit
 * alignment so any routing message is the same regardless if the kernel
 * is an ILP32 or LP64 kernel.
 */
#define        __align64        __aligned(sizeof(uint64_t))
#else
#define        __align64
#endif

struct rt_msghdr {
        u_short        rtm_msglen __align64;
                                /* to skip over non-understood messages */
        u_char        rtm_version;        /* future binary compatibility */
        u_char        rtm_type;        /* message type */
        u_short        rtm_index;        /* index for associated ifp */
        int        rtm_flags;        /* flags, incl. kern & message, e.g. DONE */
        int        rtm_addrs;        /* bitmask identifying sockaddrs in msg */
        pid_t        rtm_pid;        /* identify sender */
        int        rtm_seq;        /* for sender to identify action */
        int        rtm_errno;        /* why failed */
        int        rtm_use;        /* from rtentry */
        int        rtm_inits;        /* which metrics we are initializing */
        struct        rt_metrics rtm_rmx __align64;
                                /* metrics themselves */
};

#undef __align64

#define RTM_VERSION        4        /* Up the ante and ignore older versions */

#define RTM_ADD                0x1        /* Add Route */
#define RTM_DELETE        0x2        /* Delete Route */
#define RTM_CHANGE        0x3        /* Change Metrics or flags */
#define RTM_GET                0x4        /* Report Metrics */
#define RTM_LOSING        0x5        /* Kernel Suspects Partitioning */
#define RTM_REDIRECT        0x6        /* Told to use different route */
#define RTM_MISS        0x7        /* Lookup failed on this address */
#define RTM_LOCK        0x8        /* fix specified metrics */
#define RTM_OLDADD        0x9        /* caused by SIOCADDRT */
#define RTM_OLDDEL        0xa        /* caused by SIOCDELRT */
// #define RTM_RESOLVE        0xb        /* req to resolve dst to LL addr */
#define RTM_ONEWADDR        0xc        /* Old (pre-8.0) RTM_NEWADDR message */
#define RTM_ODELADDR        0xd        /* Old (pre-8.0) RTM_DELADDR message */
#define RTM_OOIFINFO        0xe        /* Old (pre-1.5) RTM_IFINFO message */
#define RTM_OIFINFO        0xf        /* Old (pre-64bit time) RTM_IFINFO message */
#define        RTM_IFANNOUNCE        0x10        /* iface arrival/departure */
#define        RTM_IEEE80211        0x11        /* IEEE80211 wireless event */
#define        RTM_SETGATE        0x12        /* set prototype gateway for clones
                                 * (see example in arp_rtrequest).
                                 */
#define        RTM_LLINFO_UPD        0x13        /* indication to ARP/NDP/etc. that link-layer
                                 * address has changed
                                 */
#define RTM_IFINFO        0x14        /* iface/link going up/down etc. */
#define RTM_OCHGADDR        0x15        /* Old (pre-8.0) RTM_CHGADDR message */
#define RTM_NEWADDR        0x16        /* address being added to iface */
#define RTM_DELADDR        0x17        /* address being removed from iface */
#define RTM_CHGADDR        0x18        /* address properties changed */

#ifdef RTM_NAMES
static const char *rtm_names[] = {
    "*none*", "add", "delete", "change", "get",
    "losing", "redirect", "miss", "lock", "oldadd",
    "olddel", "*resolve*", "onewaddr", "odeladdr", "ooifinfo",
    "oifinfo", "ifannounce", "ieee80211", "setgate", "llinfo_upd",
    "ifinfo", "ochgaddr",  "newaddr", "deladdr", "chgaddr",
};
#endif

/*
 * setsockopt defines used for the filtering.
 */
#define        RO_MSGFILTER        1        /* array of which rtm_type to send to client */
#define        RO_MISSFILTER        2        /* array of sockaddrs to match miss dst */

#define        RO_FILTSA_MAX        30        /* maximum number of sockaddrs per filter */

#define RTV_MTU                0x1        /* init or lock _mtu */
#define RTV_HOPCOUNT        0x2        /* init or lock _hopcount */
#define RTV_EXPIRE        0x4        /* init or lock _expire */
#define RTV_RPIPE        0x8        /* init or lock _recvpipe */
#define RTV_SPIPE        0x10        /* init or lock _sendpipe */
#define RTV_SSTHRESH        0x20        /* init or lock _ssthresh */
#define RTV_RTT                0x40        /* init or lock _rtt */
#define RTV_RTTVAR        0x80        /* init or lock _rttvar */

#define RTVBITS "\020\1MTU\2HOPCOUNT\3EXPIRE\4RECVPIPE\5SENDPIPE" \
    "\6SSTHRESH\7RTT\010RTTVAR"

/*
 * Bitmask values for rtm_addr.
 */
#define RTA_DST                0x1        /* destination sockaddr present */
#define RTA_GATEWAY        0x2        /* gateway sockaddr present */
#define RTA_NETMASK        0x4        /* netmask sockaddr present */
#define RTA_GENMASK        0x8        /* cloning mask sockaddr present */
#define RTA_IFP                0x10        /* interface name sockaddr present */
#define RTA_IFA                0x20        /* interface addr sockaddr present */
#define RTA_AUTHOR        0x40        /* sockaddr for author of redirect */
#define RTA_BRD                0x80        /* for NEWADDR, broadcast or p-p dest addr */
#define RTA_TAG                0x100        /* route tag */

#define RTABITS "\020\1DST\2GATEWAY\3NETMASK\4GENMASK\5IFP\6IFA\7AUTHOR" \
    "\010BRD\011TAG"

/*
 * Index offsets for sockaddr array for alternate internal encoding.
 */
#define RTAX_DST        0        /* destination sockaddr present */
#define RTAX_GATEWAY        1        /* gateway sockaddr present */
#define RTAX_NETMASK        2        /* netmask sockaddr present */
#define RTAX_GENMASK        3        /* cloning mask sockaddr present */
#define RTAX_IFP        4        /* interface name sockaddr present */
#define RTAX_IFA        5        /* interface addr sockaddr present */
#define RTAX_AUTHOR        6        /* sockaddr for author of redirect */
#define RTAX_BRD        7        /* for NEWADDR, broadcast or p-p dest addr */
#define RTAX_TAG        8        /* route tag */
#define RTAX_MAX        9        /* size of array to allocate */

#define RT_ROUNDUP2(a, n)        ((a) > 0 ? (1 + (((a) - 1U) | ((n) - 1))) : (n))
#define RT_ROUNDUP(a)                RT_ROUNDUP2((a), sizeof(uint64_t))
#define RT_ADVANCE(x, n)        (x += RT_ROUNDUP((n)->sa_len))

struct rt_addrinfo {
        int        rti_addrs;
        const struct        sockaddr *rti_info[RTAX_MAX];
        int        rti_flags;
        struct        ifaddr *rti_ifa;
        struct        ifnet *rti_ifp;
};

struct route_cb {
        int        ip_count;
        int        ip6_count;
        int        unused1;
        int        mpls_count;
        int        any_count;
};

/*
 * This structure, and the prototypes for the rt_timer_{init,remove_all,
 * add,timer} functions all used with the kind permission of BSDI.
 * These allow functions to be called for routes at specific times.
 */

struct rttimer {
        TAILQ_ENTRY(rttimer)        rtt_next;  /* entry on timer queue */
        LIST_ENTRY(rttimer)         rtt_link;  /* multiple timers per rtentry */
        struct rttimer_queue   *rtt_queue; /* back pointer to queue */
        struct rtentry         *rtt_rt;    /* Back pointer to the route */
        void                      (*rtt_func)(struct rtentry *, struct rttimer *);
        time_t                  rtt_time;  /* When this timer was registered */
};

struct rttimer_queue {
        long                                rtq_timeout;
        unsigned long                        rtq_count;
        TAILQ_HEAD(, rttimer)                rtq_head;
        LIST_ENTRY(rttimer_queue)        rtq_link;
};


struct rtbl;
typedef struct rtbl rtbl_t;

#ifdef _KERNEL

struct rtbl {
        struct radix_node_head t_rnh;
};

struct rt_walkarg {
        int        w_op;
        int        w_arg;
        int        w_given;
        int        w_needed;
        void *        w_where;
        int        w_tmemsize;
        int        w_tmemneeded;
        void *        w_tmem;
};

#if 0
#define        RT_DPRINTF(__fmt, ...)        do { } while (/*CONSTCOND*/0)
#else
#define        RT_DPRINTF(__fmt, ...)        /* do nothing */
#endif

struct rtwalk {
        int (*rw_f)(struct rtentry *, void *);
        void *rw_v;
};

/*
 * Global data specific to the routing socket.
 */
struct route_info {
        struct sockaddr ri_dst;
        struct sockaddr ri_src;
        struct route_cb ri_cb;
        int ri_maxqlen;
        struct ifqueue ri_intrq;
        void *ri_sih;
};

extern        struct        route_info route_info;
extern        struct        rtstat        rtstat;

struct socket;

void        rt_init(void);

int        rt_timer_add(struct rtentry *,
            void(*)(struct rtentry *, struct rttimer *),
            struct rttimer_queue *);
unsigned long
        rt_timer_count(struct rttimer_queue *);
void        rt_timer_queue_change(struct rttimer_queue *, long);
struct rttimer_queue *
        rt_timer_queue_create(u_int);
void        rt_timer_queue_destroy(struct rttimer_queue *);

void        rt_free(struct rtentry *);
void        rt_unref(struct rtentry *);

int        rt_update(struct rtentry *, struct rt_addrinfo *, void *);
int        rt_update_prepare(struct rtentry *);
void        rt_update_finish(struct rtentry *);

void        rt_newmsg(const int, const struct rtentry *);
struct rtentry *
        rtalloc1(const struct sockaddr *, int);
int        rtinit(struct ifaddr *, int, int);
void        rtredirect(const struct sockaddr *, const struct sockaddr *,
            const struct sockaddr *, int, const struct sockaddr *,
            struct rtentry **);
int        rtrequest(int, const struct sockaddr *,
            const struct sockaddr *, const struct sockaddr *, int,
            struct rtentry **);
int        rtrequest1(int, struct rt_addrinfo *, struct rtentry **);
int        rtrequest_newmsg(const int, const struct sockaddr *,
            const struct sockaddr *, const struct sockaddr *, const int);

int        rt_ifa_addlocal(struct ifaddr *);
int        rt_ifa_remlocal(struct ifaddr *, struct ifaddr *);
struct ifaddr *
        rt_get_ifa(struct rtentry *);
void        rt_replace_ifa(struct rtentry *, struct ifaddr *);
int        rt_setgate(struct rtentry *, const struct sockaddr *);

const struct sockaddr *
        rt_settag(struct rtentry *, const struct sockaddr *);
struct sockaddr *
        rt_gettag(const struct rtentry *);

int        rt_check_reject_route(const struct rtentry *, const struct ifnet *);
void        rt_delete_matched_entries(sa_family_t,
            int (*)(struct rtentry *, void *), void *);
int        rt_walktree(sa_family_t, int (*)(struct rtentry *, void *), void *);

static __inline void
rt_assert_referenced(const struct rtentry *rt)
{

        KASSERT(rt->rt_refcnt > 0);
}

void        rtcache_copy(struct route *, struct route *);
void        rtcache_free(struct route *);
struct rtentry *
        rtcache_init(struct route *);
struct rtentry *
        rtcache_init_noclone(struct route *);
struct rtentry *
        rtcache_lookup2(struct route *, const struct sockaddr *, int,
            int *);
int        rtcache_setdst(struct route *, const struct sockaddr *);
struct rtentry *
        rtcache_update(struct route *, int);

static __inline void
rtcache_invariants(const struct route *ro)
{

        KASSERT(ro->ro_sa != NULL || ro->_ro_rt == NULL);
}

static __inline struct rtentry *
rtcache_lookup1(struct route *ro, const struct sockaddr *dst, int clone)
{
        int hit;

        return rtcache_lookup2(ro, dst, clone, &hit);
}

static __inline struct rtentry *
rtcache_lookup(struct route *ro, const struct sockaddr *dst)
{
        return rtcache_lookup1(ro, dst, 1);
}

static __inline const struct sockaddr *
rtcache_getdst(const struct route *ro)
{

        rtcache_invariants(ro);
        return ro->ro_sa;
}

struct rtentry *
        rtcache_validate(struct route *);

void        rtcache_unref(struct rtentry *, struct route *);

percpu_t *
        rtcache_percpu_alloc(void);

static inline struct route *
rtcache_percpu_getref(percpu_t *pc)
{

        return *(struct route **)percpu_getref(pc);
}

static inline void
rtcache_percpu_putref(percpu_t *pc)
{

        percpu_putref(pc);
}


/* rtsock */
void        rt_ieee80211msg(struct ifnet *, int, void *, size_t);
void        rt_ifannouncemsg(struct ifnet *, int);
void        rt_ifmsg(struct ifnet *);
void        rt_missmsg(int, const struct rt_addrinfo *, int, int);
struct mbuf *
        rt_msg1(int, struct rt_addrinfo *, void *, int);
int        rt_msg3(int, struct rt_addrinfo *, void *, struct rt_walkarg *, int *);
void        rt_addrmsg(int, struct ifaddr *);
void        rt_addrmsg_src(int, struct ifaddr *, const struct sockaddr *);
void        rt_addrmsg_rt(int, struct ifaddr *, int, struct rtentry *);
void        route_enqueue(struct mbuf *, int);

struct llentry;
void        rt_clonedmsg(int, const struct sockaddr *, const struct sockaddr *,
            const uint8_t *, const struct ifnet *);

void        rt_setmetrics(void *, struct rtentry *);

/* rtbl */
int        rt_addaddr(rtbl_t *, struct rtentry *, const struct sockaddr *);
void        rt_assert_inactive(const struct rtentry *);
struct rtentry *
        rt_deladdr(rtbl_t *, const struct sockaddr *,
            const struct sockaddr *);
rtbl_t *rt_gettable(sa_family_t);
int        rt_inithead(rtbl_t **, int);
struct rtentry *
        rt_lookup(rtbl_t *, const struct sockaddr *,
            const struct sockaddr *);
struct rtentry *
        rt_matchaddr(rtbl_t *, const struct sockaddr *);
int        rt_refines(const struct sockaddr *, const struct sockaddr *);
int        rtbl_walktree(sa_family_t, int (*)(struct rtentry *, void *), void *);
struct rtentry *
        rtbl_search_matched_entry(sa_family_t,
            int (*)(struct rtentry *, void *), void *);
void        rtbl_init(void);

void sysctl_net_route_setup(struct sysctllog **, int, const char *);

#endif /* _KERNEL */

#endif /* !_NET_ROUTE_H_ */

















































































 4314 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/*        $NetBSD: userret.h,v 1.13 2018/07/26 09:29:08 maxv Exp $        */

/*
 * XXXfvdl same as i386 counterpart, but should probably be independent.
 */

/*-
 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1990 The Regents of the University of California.
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * William Jolitz.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

#include <sys/userret.h>

static __inline void userret(struct lwp *);

/*
 * Define the code needed before returning to user mode, for
 * trap and syscall.
 */
static __inline void
userret(struct lwp *l)
{
        /* Invoke MI userret code */
        mi_userret(l);
}


























































































    2 




















































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
/*        $NetBSD: rf_dagfuncs.c,v 1.35 2021/08/07 16:19:15 thorpej Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Mark Holland, William V. Courtright II
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*
 * dagfuncs.c -- DAG node execution routines
 *
 * Rules:
 * 1. Every DAG execution function must eventually cause node->status to
 *    get set to "good" or "bad", and "FinishNode" to be called. In the
 *    case of nodes that complete immediately (xor, NullNodeFunc, etc),
 *    the node execution function can do these two things directly. In
 *    the case of nodes that have to wait for some event (a disk read to
 *    complete, a lock to be released, etc) to occur before they can
 *    complete, this is typically achieved by having whatever module
 *    is doing the operation call GenericWakeupFunc upon completion.
 * 2. DAG execution functions should check the status in the DAG header
 *    and NOP out their operations if the status is not "enable". However,
 *    execution functions that release resources must be sure to release
 *    them even when they NOP out the function that would use them.
 *    Functions that acquire resources should go ahead and acquire them
 *    even when they NOP, so that a downstream release node will not have
 *    to check to find out whether or not the acquire was suppressed.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_dagfuncs.c,v 1.35 2021/08/07 16:19:15 thorpej Exp $");

#include <sys/param.h>
#include <sys/ioctl.h>

#include "rf_archs.h"
#include "rf_raid.h"
#include "rf_dag.h"
#include "rf_layout.h"
#include "rf_etimer.h"
#include "rf_acctrace.h"
#include "rf_diskqueue.h"
#include "rf_dagfuncs.h"
#include "rf_general.h"
#include "rf_engine.h"
#include "rf_dagutils.h"

#include "rf_kintf.h"

#if RF_INCLUDE_PARITYLOGGING > 0
#include "rf_paritylog.h"
#endif                                /* RF_INCLUDE_PARITYLOGGING > 0 */

void     (*rf_DiskReadFunc) (RF_DagNode_t *);
void     (*rf_DiskWriteFunc) (RF_DagNode_t *);
void     (*rf_DiskReadUndoFunc) (RF_DagNode_t *);
void     (*rf_DiskWriteUndoFunc) (RF_DagNode_t *);
void     (*rf_RegularXorUndoFunc) (RF_DagNode_t *);
void     (*rf_SimpleXorUndoFunc) (RF_DagNode_t *);
void     (*rf_RecoveryXorUndoFunc) (RF_DagNode_t *);

/*****************************************************************************
 * main (only) configuration routine for this module
 ****************************************************************************/
int
rf_ConfigureDAGFuncs(RF_ShutdownList_t **listp)
{
        RF_ASSERT(((sizeof(long) == 8) && RF_LONGSHIFT == 3) ||
                  ((sizeof(long) == 4) && RF_LONGSHIFT == 2));
        rf_DiskReadFunc = rf_DiskReadFuncForThreads;
        rf_DiskReadUndoFunc = rf_DiskUndoFunc;
        rf_DiskWriteFunc = rf_DiskWriteFuncForThreads;
        rf_DiskWriteUndoFunc = rf_DiskUndoFunc;
        rf_RegularXorUndoFunc = rf_NullNodeUndoFunc;
        rf_SimpleXorUndoFunc = rf_NullNodeUndoFunc;
        rf_RecoveryXorUndoFunc = rf_NullNodeUndoFunc;
        return (0);
}



/*****************************************************************************
 * the execution function associated with a terminate node
 ****************************************************************************/
void
rf_TerminateFunc(RF_DagNode_t *node)
{
        RF_ASSERT(node->dagHdr->numCommits == node->dagHdr->numCommitNodes);
        node->status = rf_good;
        rf_FinishNode(node, RF_THREAD_CONTEXT);
}

void
rf_TerminateUndoFunc(RF_DagNode_t *node)
{
}


/*****************************************************************************
 * execution functions associated with a mirror node
 *
 * parameters:
 *
 * 0 - physical disk address of data
 * 1 - buffer for holding read data
 * 2 - parity stripe ID
 * 3 - flags
 * 4 - physical disk address of mirror (parity)
 *
 ****************************************************************************/

void
rf_DiskReadMirrorIdleFunc(RF_DagNode_t *node)
{
        /* select the mirror copy with the shortest queue and fill in node
         * parameters with physical disk address */

        rf_SelectMirrorDiskIdle(node);
        rf_DiskReadFunc(node);
}

#if (RF_INCLUDE_CHAINDECLUSTER > 0) || (RF_INCLUDE_INTERDECLUSTER > 0) || (RF_DEBUG_VALIDATE_DAG > 0)
void
rf_DiskReadMirrorPartitionFunc(RF_DagNode_t *node)
{
        /* select the mirror copy with the shortest queue and fill in node
         * parameters with physical disk address */

        rf_SelectMirrorDiskPartition(node);
        rf_DiskReadFunc(node);
}
#endif

void
rf_DiskReadMirrorUndoFunc(RF_DagNode_t *node)
{
}



#if RF_INCLUDE_PARITYLOGGING > 0
/*****************************************************************************
 * the execution function associated with a parity log update node
 ****************************************************************************/
void
rf_ParityLogUpdateFunc(RF_DagNode_t *node)
{
        RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
        void *bf = (void *) node->params[1].p;
        RF_ParityLogData_t *logData;
#if RF_ACC_TRACE > 0
        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
        RF_Etimer_t timer;
#endif

        if (node->dagHdr->status == rf_enable) {
#if RF_ACC_TRACE > 0
                RF_ETIMER_START(timer);
#endif
                logData = rf_CreateParityLogData(RF_UPDATE, pda, bf,
                    (RF_Raid_t *) (node->dagHdr->raidPtr),
                    node->wakeFunc, node,
                    node->dagHdr->tracerec, timer);
                if (logData)
                        rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
                else {
#if RF_ACC_TRACE > 0
                        RF_ETIMER_STOP(timer);
                        RF_ETIMER_EVAL(timer);
                        tracerec->plog_us += RF_ETIMER_VAL_US(timer);
#endif
                        (node->wakeFunc) (node, ENOMEM);
                }
        }
}


/*****************************************************************************
 * the execution function associated with a parity log overwrite node
 ****************************************************************************/
void
rf_ParityLogOverwriteFunc(RF_DagNode_t *node)
{
        RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
        void *bf = (void *) node->params[1].p;
        RF_ParityLogData_t *logData;
#if RF_ACC_TRACE > 0
        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
        RF_Etimer_t timer;
#endif

        if (node->dagHdr->status == rf_enable) {
#if RF_ACC_TRACE > 0
                RF_ETIMER_START(timer);
#endif
                logData = rf_CreateParityLogData(RF_OVERWRITE, pda, bf,
(RF_Raid_t *) (node->dagHdr->raidPtr),
                    node->wakeFunc, node, node->dagHdr->tracerec, timer);
                if (logData)
                        rf_ParityLogAppend(logData, RF_FALSE, NULL, RF_FALSE);
                else {
#if RF_ACC_TRACE > 0
                        RF_ETIMER_STOP(timer);
                        RF_ETIMER_EVAL(timer);
                        tracerec->plog_us += RF_ETIMER_VAL_US(timer);
#endif
                        (node->wakeFunc) (node, ENOMEM);
                }
        }
}

void
rf_ParityLogUpdateUndoFunc(RF_DagNode_t *node)
{
}

void
rf_ParityLogOverwriteUndoFunc(RF_DagNode_t *node)
{
}
#endif                                /* RF_INCLUDE_PARITYLOGGING > 0 */

/*****************************************************************************
 * the execution function associated with a NOP node
 ****************************************************************************/
void
rf_NullNodeFunc(RF_DagNode_t *node)
{
        node->status = rf_good;
        rf_FinishNode(node, RF_THREAD_CONTEXT);
}

void
rf_NullNodeUndoFunc(RF_DagNode_t *node)
{
        node->status = rf_undone;
        rf_FinishNode(node, RF_THREAD_CONTEXT);
}


/*****************************************************************************
 * the execution function associated with a disk-read node
 ****************************************************************************/
void
rf_DiskReadFuncForThreads(RF_DagNode_t *node)
{
        RF_DiskQueueData_t *req;
        RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
        void *bf = (void *) node->params[1].p;
        RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
        unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
        unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
        RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_READ : RF_IO_TYPE_NOP;
        RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;

        req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
            bf, parityStripeID, which_ru, node->wakeFunc, node,
#if RF_ACC_TRACE > 0
             node->dagHdr->tracerec,
#else
             NULL,
#endif
            (void *) (node->dagHdr->raidPtr), 0, node->dagHdr->bp);

        node->dagFuncData = (void *) req;
        rf_DiskIOEnqueue(&(dqs[pda->col]), req, priority);
}


/*****************************************************************************
 * the execution function associated with a disk-write node
 ****************************************************************************/
void
rf_DiskWriteFuncForThreads(RF_DagNode_t *node)
{
        RF_DiskQueueData_t *req;
        RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
        void *bf = (void *) node->params[1].p;
        RF_StripeNum_t parityStripeID = (RF_StripeNum_t) node->params[2].v;
        unsigned priority = RF_EXTRACT_PRIORITY(node->params[3].v);
        unsigned which_ru = RF_EXTRACT_RU(node->params[3].v);
        RF_IoType_t iotype = (node->dagHdr->status == rf_enable) ? RF_IO_TYPE_WRITE : RF_IO_TYPE_NOP;
        RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;

        /* normal processing (rollaway or forward recovery) begins here */
        req = rf_CreateDiskQueueData(iotype, pda->startSector, pda->numSector,
            bf, parityStripeID, which_ru, node->wakeFunc, node,
#if RF_ACC_TRACE > 0
            node->dagHdr->tracerec,
#else
            NULL,
#endif
            (void *) (node->dagHdr->raidPtr),
            0, node->dagHdr->bp);

        node->dagFuncData = (void *) req;
        rf_DiskIOEnqueue(&(dqs[pda->col]), req, priority);
}
/*****************************************************************************
 * the undo function for disk nodes
 * Note:  this is not a proper undo of a write node, only locks are released.
 *        old data is not restored to disk!
 ****************************************************************************/
void
rf_DiskUndoFunc(RF_DagNode_t *node)
{
        RF_DiskQueueData_t *req;
        RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
        RF_DiskQueue_t *dqs = ((RF_Raid_t *) (node->dagHdr->raidPtr))->Queues;

        req = rf_CreateDiskQueueData(RF_IO_TYPE_NOP,
            0L, 0, NULL, 0L, 0, node->wakeFunc, node,
#if RF_ACC_TRACE > 0
             node->dagHdr->tracerec,
#else
             NULL,
#endif
            (void *) (node->dagHdr->raidPtr),
            0, NULL);

        node->dagFuncData = (void *) req;
        rf_DiskIOEnqueue(&(dqs[pda->col]), req, RF_IO_NORMAL_PRIORITY);
}

/*****************************************************************************
 * Callback routine for DiskRead and DiskWrite nodes.  When the disk
 * op completes, the routine is called to set the node status and
 * inform the execution engine that the node has fired.
 ****************************************************************************/
void
rf_GenericWakeupFunc(void *v, int status)
{
        RF_DagNode_t *node = v;

        switch (node->status) {
        case rf_fired:
                if (status)
                        node->status = rf_bad;
                else
                        node->status = rf_good;
                break;
        case rf_recover:
                /* probably should never reach this case */
                if (status)
                        node->status = rf_panic;
                else
                        node->status = rf_undone;
                break;
        default:
                printf("rf_GenericWakeupFunc:");
                printf("node->status is %d,", node->status);
                printf("status is %d \n", status);
                RF_PANIC();
                break;
        }
        if (node->dagFuncData)
                rf_FreeDiskQueueData((RF_DiskQueueData_t *) node->dagFuncData);
        rf_FinishNode(node, RF_INTR_CONTEXT);
}


/*****************************************************************************
 * there are three distinct types of xor nodes:

 * A "regular xor" is used in the fault-free case where the access
 * spans a complete stripe unit.  It assumes that the result buffer is
 * one full stripe unit in size, and uses the stripe-unit-offset
 * values that it computes from the PDAs to determine where within the
 * stripe unit to XOR each argument buffer.
 *
 * A "simple xor" is used in the fault-free case where the access
 * touches only a portion of one (or two, in some cases) stripe
 * unit(s).  It assumes that all the argument buffers are of the same
 * size and have the same stripe unit offset.
 *
 * A "recovery xor" is used in the degraded-mode case.  It's similar
 * to the regular xor function except that it takes the failed PDA as
 * an additional parameter, and uses it to determine what portions of
 * the argument buffers need to be xor'd into the result buffer, and
 * where in the result buffer they should go.
 ****************************************************************************/

/* xor the params together and store the result in the result field.
 * assume the result field points to a buffer that is the size of one
 * SU, and use the pda params to determine where within the buffer to
 * XOR the input buffers.  */
void
rf_RegularXorFunc(RF_DagNode_t *node)
{
        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
#if RF_ACC_TRACE > 0
        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
        RF_Etimer_t timer;
#endif
        int     i, retcode;

        retcode = 0;
        if (node->dagHdr->status == rf_enable) {
                /* don't do the XOR if the input is the same as the output */
#if RF_ACC_TRACE > 0
                RF_ETIMER_START(timer);
#endif
                for (i = 0; i < node->numParams - 1; i += 2)
                        if (node->params[i + 1].p != node->results[0]) {
                                retcode = rf_XorIntoBuffer(raidPtr, (RF_PhysDiskAddr_t *) node->params[i].p,
                                                           (char *) node->params[i + 1].p, (char *) node->results[0]);
                        }
#if RF_ACC_TRACE > 0
                RF_ETIMER_STOP(timer);
                RF_ETIMER_EVAL(timer);
                tracerec->xor_us += RF_ETIMER_VAL_US(timer);
#endif
        }
        rf_GenericWakeupFunc(node, retcode);        /* call wake func
                                                 * explicitly since no
                                                 * I/O in this node */
}
/* xor the inputs into the result buffer, ignoring placement issues */
void
rf_SimpleXorFunc(RF_DagNode_t *node)
{
        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
        int     i, retcode = 0;
#if RF_ACC_TRACE > 0
        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
        RF_Etimer_t timer;
#endif

        if (node->dagHdr->status == rf_enable) {
#if RF_ACC_TRACE > 0
                RF_ETIMER_START(timer);
#endif
                /* don't do the XOR if the input is the same as the output */
                for (i = 0; i < node->numParams - 1; i += 2)
                        if (node->params[i + 1].p != node->results[0]) {
                                retcode = rf_bxor((char *) node->params[i + 1].p, (char *) node->results[0],
                                    rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[i].p)->numSector));
                        }
#if RF_ACC_TRACE > 0
                RF_ETIMER_STOP(timer);
                RF_ETIMER_EVAL(timer);
                tracerec->xor_us += RF_ETIMER_VAL_US(timer);
#endif
        }
        rf_GenericWakeupFunc(node, retcode);        /* call wake func
                                                 * explicitly since no
                                                 * I/O in this node */
}
/* this xor is used by the degraded-mode dag functions to recover lost
 * data.  the second-to-last parameter is the PDA for the failed
 * portion of the access.  the code here looks at this PDA and assumes
 * that the xor target buffer is equal in size to the number of
 * sectors in the failed PDA.  It then uses the other PDAs in the
 * parameter list to determine where within the target buffer the
 * corresponding data should be xored.  */
void
rf_RecoveryXorFunc(RF_DagNode_t *node)
{
        RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
        RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
        RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
        int     i, retcode = 0;
        RF_PhysDiskAddr_t *pda;
        int     suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
        char   *srcbuf, *destbuf;
#if RF_ACC_TRACE > 0
        RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
        RF_Etimer_t timer;
#endif

        if (node->dagHdr->status == rf_enable) {
#if RF_ACC_TRACE > 0
                RF_ETIMER_START(timer);
#endif
                for (i = 0; i < node->numParams - 2; i += 2)
                        if (node->params[i + 1].p != node->results[0]) {
                                pda = (RF_PhysDiskAddr_t *) node->params[i].p;
                                srcbuf = (char *) node->params[i + 1].p;
                                suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
                                destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
                                retcode = rf_bxor(srcbuf, destbuf, rf_RaidAddressToByte(raidPtr, pda->numSector));
                        }
#if RF_ACC_TRACE > 0
                RF_ETIMER_STOP(timer);
                RF_ETIMER_EVAL(timer);
                tracerec->xor_us += RF_ETIMER_VAL_US(timer);
#endif
        }
        rf_GenericWakeupFunc(node, retcode);
}
/*****************************************************************************
 * The next three functions are utilities used by the above
 * xor-execution functions.
 ****************************************************************************/


/*
 * this is just a glorified buffer xor.  targbuf points to a buffer
 * that is one full stripe unit in size.  srcbuf points to a buffer
 * that may be less than 1 SU, but never more.  When the access
 * described by pda is one SU in size (which by implication means it's
 * SU-aligned), all that happens is (targbuf) <- (srcbuf ^ targbuf).
 * When the access is less than one SU in size the XOR occurs on only
 * the portion of targbuf identified in the pda.  */

int
rf_XorIntoBuffer(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda,
                 char *srcbuf, char *targbuf)
{
        char   *targptr;
        int     sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
        int     SUOffset = pda->startSector % sectPerSU;
        int     length, retcode = 0;

        RF_ASSERT(pda->numSector <= sectPerSU);

        targptr = targbuf + rf_RaidAddressToByte(raidPtr, SUOffset);
        length = rf_RaidAddressToByte(raidPtr, pda->numSector);
        retcode = rf_bxor(srcbuf, targptr, length);
        return (retcode);
}
/* it really should be the case that the buffer pointers (returned by
 * malloc) are aligned to the natural word size of the machine, so
 * this is the only case we optimize for.  The length should always be
 * a multiple of the sector size, so there should be no problem with
 * leftover bytes at the end.  */
int
rf_bxor(char *src, char *dest, int len)
{
        unsigned mask = sizeof(long) - 1, retcode = 0;

        if (!(((unsigned long) src) & mask) &&
            !(((unsigned long) dest) & mask) && !(len & mask)) {
                retcode = rf_longword_bxor((unsigned long *) src,
                                           (unsigned long *) dest,
                                           len >> RF_LONGSHIFT);
        } else {
                RF_ASSERT(0);
        }
        return (retcode);
}

/* When XORing in kernel mode, we need to map each user page to kernel
 * space before we can access it.  We don't want to assume anything
 * about which input buffers are in kernel/user space, nor about their
 * alignment, so in each loop we compute the maximum number of bytes
 * that we can xor without crossing any page boundaries, and do only
 * this many bytes before the next remap.
 *
 * len - is in longwords
 */
int
rf_longword_bxor(unsigned long *src, unsigned long *dest, int len)
{
        unsigned long *end = src + len;
        unsigned long d0, d1, d2, d3, s0, s1, s2, s3;        /* temps */
        unsigned long *pg_src, *pg_dest;   /* per-page source/dest pointers */
        int     longs_this_time;/* # longwords to xor in the current iteration */

        pg_src = src;
        pg_dest = dest;
        if (!pg_src || !pg_dest)
                return (EFAULT);

        while (len >= 4) {
                longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(pg_src), RF_BLIP(pg_dest)) >> RF_LONGSHIFT);        /* note len in longwords */
                src += longs_this_time;
                dest += longs_this_time;
                len -= longs_this_time;
                while (longs_this_time >= 4) {
                        d0 = pg_dest[0];
                        d1 = pg_dest[1];
                        d2 = pg_dest[2];
                        d3 = pg_dest[3];
                        s0 = pg_src[0];
                        s1 = pg_src[1];
                        s2 = pg_src[2];
                        s3 = pg_src[3];
                        pg_dest[0] = d0 ^ s0;
                        pg_dest[1] = d1 ^ s1;
                        pg_dest[2] = d2 ^ s2;
                        pg_dest[3] = d3 ^ s3;
                        pg_src += 4;
                        pg_dest += 4;
                        longs_this_time -= 4;
                }
                while (longs_this_time > 0) {        /* cannot cross any page
                                                 * boundaries here */
                        *pg_dest++ ^= *pg_src++;
                        longs_this_time--;
                }

                /* either we're done, or we've reached a page boundary on one
                 * (or possibly both) of the pointers */
                if (len) {
                        if (RF_PAGE_ALIGNED(src))
                                pg_src = src;
                        if (RF_PAGE_ALIGNED(dest))
                                pg_dest = dest;
                        if (!pg_src || !pg_dest)
                                return (EFAULT);
                }
        }
        while (src < end) {
                *pg_dest++ ^= *pg_src++;
                src++;
                dest++;
                len--;
                if (RF_PAGE_ALIGNED(src))
                        pg_src = src;
                if (RF_PAGE_ALIGNED(dest))
                        pg_dest = dest;
        }
        RF_ASSERT(len == 0);
        return (0);
}

#if 0
/*
   dst = a ^ b ^ c;
   a may equal dst
   see comment above longword_bxor
   len is length in longwords
*/
int
rf_longword_bxor3(unsigned long *dst, unsigned long *a, unsigned long *b,
                  unsigned long *c, int len, void *bp)
{
        unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
        unsigned long *pg_a, *pg_b, *pg_c, *pg_dst;        /* per-page source/dest
                                                                 * pointers */
        int     longs_this_time;/* # longs to xor in the current iteration */
        char    dst_is_a = 0;

        pg_a = a;
        pg_b = b;
        pg_c = c;
        if (a == dst) {
                pg_dst = pg_a;
                dst_is_a = 1;
        } else {
                pg_dst = dst;
        }

        /* align dest to cache line.  Can't cross a pg boundary on dst here. */
        while ((((unsigned long) pg_dst) & 0x1f)) {
                *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
                dst++;
                a++;
                b++;
                c++;
                if (RF_PAGE_ALIGNED(a)) {
                        pg_a = a;
                        if (!pg_a)
                                return (EFAULT);
                }
                if (RF_PAGE_ALIGNED(b)) {
                        pg_b = a;
                        if (!pg_b)
                                return (EFAULT);
                }
                if (RF_PAGE_ALIGNED(c)) {
                        pg_c = a;
                        if (!pg_c)
                                return (EFAULT);
                }
                len--;
        }

        while (len > 4) {
                longs_this_time = RF_MIN(len, RF_MIN(RF_BLIP(a), RF_MIN(RF_BLIP(b), RF_MIN(RF_BLIP(c), RF_BLIP(dst)))) >> RF_LONGSHIFT);
                a += longs_this_time;
                b += longs_this_time;
                c += longs_this_time;
                dst += longs_this_time;
                len -= longs_this_time;
                while (longs_this_time >= 4) {
                        a0 = pg_a[0];
                        longs_this_time -= 4;

                        a1 = pg_a[1];
                        a2 = pg_a[2];

                        a3 = pg_a[3];
                        pg_a += 4;

                        b0 = pg_b[0];
                        b1 = pg_b[1];

                        b2 = pg_b[2];
                        b3 = pg_b[3];
                        /* start dual issue */
                        a0 ^= b0;
                        b0 = pg_c[0];

                        pg_b += 4;
                        a1 ^= b1;

                        a2 ^= b2;
                        a3 ^= b3;

                        b1 = pg_c[1];
                        a0 ^= b0;

                        b2 = pg_c[2];
                        a1 ^= b1;

                        b3 = pg_c[3];
                        a2 ^= b2;

                        pg_dst[0] = a0;
                        a3 ^= b3;
                        pg_dst[1] = a1;
                        pg_c += 4;
                        pg_dst[2] = a2;
                        pg_dst[3] = a3;
                        pg_dst += 4;
                }
                while (longs_this_time > 0) {        /* cannot cross any page
                                                 * boundaries here */
                        *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
                        longs_this_time--;
                }

                if (len) {
                        if (RF_PAGE_ALIGNED(a)) {
                                pg_a = a;
                                if (!pg_a)
                                        return (EFAULT);
                                if (dst_is_a)
                                        pg_dst = pg_a;
                        }
                        if (RF_PAGE_ALIGNED(b)) {
                                pg_b = b;
                                if (!pg_b)
                                        return (EFAULT);
                        }
                        if (RF_PAGE_ALIGNED(c)) {
                                pg_c = c;
                                if (!pg_c)
                                        return (EFAULT);
                        }
                        if (!dst_is_a)
                                if (RF_PAGE_ALIGNED(dst)) {
                                        pg_dst = dst;
                                        if (!pg_dst)
                                                return (EFAULT);
                                }
                }
        }
        while (len) {
                *pg_dst++ = *pg_a++ ^ *pg_b++ ^ *pg_c++;
                dst++;
                a++;
                b++;
                c++;
                if (RF_PAGE_ALIGNED(a)) {
                        pg_a = a;
                        if (!pg_a)
                                return (EFAULT);
                        if (dst_is_a)
                                pg_dst = pg_a;
                }
                if (RF_PAGE_ALIGNED(b)) {
                        pg_b = b;
                        if (!pg_b)
                                return (EFAULT);
                }
                if (RF_PAGE_ALIGNED(c)) {
                        pg_c = c;
                        if (!pg_c)
                                return (EFAULT);
                }
                if (!dst_is_a)
                        if (RF_PAGE_ALIGNED(dst)) {
                                pg_dst = dst;
                                if (!pg_dst)
                                        return (EFAULT);
                        }
                len--;
        }
        return (0);
}

int
rf_bxor3(unsigned char *dst, unsigned char *a, unsigned char *b,
         unsigned char *c, unsigned long len, void *bp)
{
        RF_ASSERT(((RF_UL(dst) | RF_UL(a) | RF_UL(b) | RF_UL(c) | len) & 0x7) == 0);

        return (rf_longword_bxor3((unsigned long *) dst, (unsigned long *) a,
                (unsigned long *) b, (unsigned long *) c, len >> RF_LONGSHIFT, bp));
}
#endif











































































































































































































































































































































































































































































































































































































































































































































































    3 
    3 


















































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
/*        $NetBSD: if_smsc.c,v 1.93 2022/08/20 14:08:59 riastradh Exp $        */

/*        $OpenBSD: if_smsc.c,v 1.4 2012/09/27 12:38:11 jsg Exp $        */
/*        $FreeBSD: src/sys/dev/usb/net/if_smsc.c,v 1.1 2012/08/15 04:03:55 gonzo Exp $ */
/*-
 * Copyright (c) 2012
 *        Ben Gray <bgray@freebsd.org>.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * SMSC LAN9xxx devices (http://www.smsc.com/)
 *
 * The LAN9500 & LAN9500A devices are stand-alone USB to Ethernet chips that
 * support USB 2.0 and 10/100 Mbps Ethernet.
 *
 * The LAN951x devices are an integrated USB hub and USB to Ethernet adapter.
 * The driver only covers the Ethernet part, the standard USB hub driver
 * supports the hub part.
 *
 * This driver is closely modelled on the Linux driver written and copyrighted
 * by SMSC.
 *
 * H/W TCP & UDP Checksum Offloading
 * ---------------------------------
 * The chip supports both tx and rx offloading of UDP & TCP checksums, this
 * feature can be dynamically enabled/disabled.
 *
 * RX checksuming is performed across bytes after the IPv4 header to the end of
 * the Ethernet frame, this means if the frame is padded with non-zero values
 * the H/W checksum will be incorrect, however the rx code compensates for this.
 *
 * TX checksuming is more complicated, the device requires a special header to
 * be prefixed onto the start of the frame which indicates the start and end
 * positions of the UDP or TCP frame.  This requires the driver to manually
 * go through the packet data and decode the headers prior to sending.
 * On Linux they generally provide cues to the location of the csum and the
 * area to calculate it over, on FreeBSD we seem to have to do it all ourselves,
 * hence this is not as optimal and therefore h/w TX checksum is currently not
 * implemented.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_smsc.c,v 1.93 2022/08/20 14:08:59 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>

#include <dev/usb/usbnet.h>
#include <dev/usb/usbhist.h>

#include <dev/usb/if_smscreg.h>

#include "ioconf.h"

struct smsc_softc {
        struct usbnet                smsc_un;

        /*
         * The following stores the settings in the mac control (MAC_CSR)
         * register
         */
        uint32_t                sc_mac_csr;
        uint32_t                sc_rev_id;

        uint32_t                sc_coe_ctrl;
};

#define SMSC_MIN_BUFSZ                2048
#define SMSC_MAX_BUFSZ                18944

/*
 * Various supported device vendors/products.
 */
static const struct usb_devno smsc_devs[] = {
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_LAN89530 },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_LAN9530 },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_LAN9730 },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_SMSC9500 },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_SMSC9500A },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_SMSC9500A_ALT },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_SMSC9500A_HAL },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_SMSC9500A_SAL10 },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_SMSC9500_ALT },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_SMSC9500_SAL10 },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_SMSC9505 },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_SMSC9505A },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_SMSC9505A_HAL },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_SMSC9505A_SAL10 },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_SMSC9505_SAL10 },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_SMSC9512_14 },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_SMSC9512_14_ALT },
        { USB_VENDOR_SMSC,        USB_PRODUCT_SMSC_SMSC9512_14_SAL10 }
};

#ifdef USB_DEBUG
#ifndef USMSC_DEBUG
#define usmscdebug 0
#else
static int usmscdebug = 1;

SYSCTL_SETUP(sysctl_hw_smsc_setup, "sysctl hw.usmsc setup")
{
        int err;
        const struct sysctlnode *rnode;
        const struct sysctlnode *cnode;

        err = sysctl_createv(clog, 0, NULL, &rnode,
            CTLFLAG_PERMANENT, CTLTYPE_NODE, "usmsc",
            SYSCTL_DESCR("usmsc global controls"),
            NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);

        if (err)
                goto fail;

        /* control debugging printfs */
        err = sysctl_createv(clog, 0, &rnode, &cnode,
            CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_INT,
            "debug", SYSCTL_DESCR("Enable debugging output"),
            NULL, 0, &usmscdebug, sizeof(usmscdebug), CTL_CREATE, CTL_EOL);
        if (err)
                goto fail;

        return;
fail:
        aprint_error("%s: sysctl_createv failed (err = %d)\n", __func__, err);
}

#endif /* SMSC_DEBUG */
#endif /* USB_DEBUG */

#define DPRINTF(FMT,A,B,C,D)        USBHIST_LOG(usmscdebug,FMT,A,B,C,D)
#define DPRINTFN(N,FMT,A,B,C,D)        USBHIST_LOGN(usmscdebug,N,FMT,A,B,C,D)
#define USMSCHIST_FUNC()        USBHIST_FUNC()
#define USMSCHIST_CALLED()        USBHIST_CALLED(usmscdebug)

#define smsc_warn_printf(un, fmt, args...) \
        printf("%s: warning: " fmt, device_xname((un)->un_dev), ##args)

#define smsc_err_printf(un, fmt, args...) \
        printf("%s: error: " fmt, device_xname((un)->un_dev), ##args)

/* Function declarations */
static int         smsc_match(device_t, cfdata_t, void *);
static void         smsc_attach(device_t, device_t, void *);

CFATTACH_DECL_NEW(usmsc, sizeof(struct smsc_softc),
    smsc_match, smsc_attach, usbnet_detach, usbnet_activate);

static int         smsc_chip_init(struct usbnet *);
static int         smsc_setmacaddress(struct usbnet *, const uint8_t *);

static int         smsc_uno_init(struct ifnet *);
static void         smsc_uno_stop(struct ifnet *, int);

static void         smsc_reset(struct smsc_softc *);

static void         smsc_uno_miibus_statchg(struct ifnet *);
static int         smsc_readreg(struct usbnet *, uint32_t, uint32_t *);
static int         smsc_writereg(struct usbnet *, uint32_t, uint32_t);
static int         smsc_wait_for_bits(struct usbnet *, uint32_t, uint32_t);
static int         smsc_uno_miibus_readreg(struct usbnet *, int, int, uint16_t *);
static int         smsc_uno_miibus_writereg(struct usbnet *, int, int, uint16_t);

static int         smsc_uno_ioctl(struct ifnet *, u_long, void *);
static void         smsc_uno_mcast(struct ifnet *);
static unsigned         smsc_uno_tx_prepare(struct usbnet *, struct mbuf *,
                     struct usbnet_chain *);
static void         smsc_uno_rx_loop(struct usbnet *, struct usbnet_chain *,
                     uint32_t);

static const struct usbnet_ops smsc_ops = {
        .uno_stop = smsc_uno_stop,
        .uno_ioctl = smsc_uno_ioctl,
        .uno_mcast = smsc_uno_mcast,
        .uno_read_reg = smsc_uno_miibus_readreg,
        .uno_write_reg = smsc_uno_miibus_writereg,
        .uno_statchg = smsc_uno_miibus_statchg,
        .uno_tx_prepare = smsc_uno_tx_prepare,
        .uno_rx_loop = smsc_uno_rx_loop,
        .uno_init = smsc_uno_init,
};

static int
smsc_readreg(struct usbnet *un, uint32_t off, uint32_t *data)
{
        usb_device_request_t req;
        uint32_t buf;
        usbd_status err;

        if (usbnet_isdying(un))
                return 0;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = SMSC_UR_READ_REG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, off);
        USETW(req.wLength, 4);

        err = usbd_do_request(un->un_udev, &req, &buf);
        if (err != 0)
                smsc_warn_printf(un, "Failed to read register 0x%0x\n", off);

        *data = le32toh(buf);

        return err;
}

static int
smsc_writereg(struct usbnet *un, uint32_t off, uint32_t data)
{
        usb_device_request_t req;
        uint32_t buf;
        usbd_status err;

        if (usbnet_isdying(un))
                return 0;

        buf = htole32(data);

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = SMSC_UR_WRITE_REG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, off);
        USETW(req.wLength, 4);

        err = usbd_do_request(un->un_udev, &req, &buf);
        if (err != 0)
                smsc_warn_printf(un, "Failed to write register 0x%0x\n", off);

        return err;
}

static int
smsc_wait_for_bits(struct usbnet *un, uint32_t reg, uint32_t bits)
{
        uint32_t val;
        int err, i;

        for (i = 0; i < 100; i++) {
                if (usbnet_isdying(un))
                        return ENXIO;
                if ((err = smsc_readreg(un, reg, &val)) != 0)
                        return err;
                if (!(val & bits))
                        return 0;
                DELAY(5);
        }

        return 1;
}

static int
smsc_uno_miibus_readreg(struct usbnet *un, int phy, int reg, uint16_t *val)
{
        uint32_t addr;
        uint32_t data = 0;

        if (un->un_phyno != phy) {
                *val = 0;
                return EINVAL;
        }

        if (smsc_wait_for_bits(un, SMSC_MII_ADDR, SMSC_MII_BUSY) != 0) {
                smsc_warn_printf(un, "MII is busy\n");
                *val = 0;
                return ETIMEDOUT;
        }

        addr = (phy << 11) | (reg << 6) | SMSC_MII_READ;
        smsc_writereg(un, SMSC_MII_ADDR, addr);

        if (smsc_wait_for_bits(un, SMSC_MII_ADDR, SMSC_MII_BUSY) != 0) {
                smsc_warn_printf(un, "MII read timeout\n");
                *val = 0;
                return ETIMEDOUT;
        }

        smsc_readreg(un, SMSC_MII_DATA, &data);

        *val = data & 0xffff;
        return 0;
}

static int
smsc_uno_miibus_writereg(struct usbnet *un, int phy, int reg, uint16_t val)
{
        uint32_t addr;

        if (un->un_phyno != phy)
                return EINVAL;

        if (smsc_wait_for_bits(un, SMSC_MII_ADDR, SMSC_MII_BUSY) != 0) {
                smsc_warn_printf(un, "MII is busy\n");
                return ETIMEDOUT;
        }

        smsc_writereg(un, SMSC_MII_DATA, val);

        addr = (phy << 11) | (reg << 6) | SMSC_MII_WRITE;
        smsc_writereg(un, SMSC_MII_ADDR, addr);

        if (smsc_wait_for_bits(un, SMSC_MII_ADDR, SMSC_MII_BUSY) != 0) {
                smsc_warn_printf(un, "MII write timeout\n");
                return ETIMEDOUT;
        }

        return 0;
}

static void
smsc_uno_miibus_statchg(struct ifnet *ifp)
{
        USMSCHIST_FUNC(); USMSCHIST_CALLED();
        struct usbnet * const un = ifp->if_softc;

        if (usbnet_isdying(un))
                return;

        struct smsc_softc * const sc = usbnet_softc(un);
        struct mii_data * const mii = usbnet_mii(un);
        uint32_t flow;
        uint32_t afc_cfg;

        if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
            (IFM_ACTIVE | IFM_AVALID)) {
                switch (IFM_SUBTYPE(mii->mii_media_active)) {
                        case IFM_10_T:
                        case IFM_100_TX:
                                usbnet_set_link(un, true);
                                break;
                        case IFM_1000_T:
                                /* Gigabit ethernet not supported by chipset */
                                break;
                        default:
                                break;
                }
        }

        /* Lost link, do nothing. */
        if (!usbnet_havelink(un))
                return;

        int err = smsc_readreg(un, SMSC_AFC_CFG, &afc_cfg);
        if (err) {
                smsc_warn_printf(un, "failed to read initial AFC_CFG, "
                    "error %d\n", err);
                return;
        }

        /* Enable/disable full duplex operation and TX/RX pause */
        if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) {
                DPRINTF("full duplex operation", 0, 0, 0, 0);
                sc->sc_mac_csr &= ~SMSC_MAC_CSR_RCVOWN;
                sc->sc_mac_csr |= SMSC_MAC_CSR_FDPX;

                if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_RXPAUSE) != 0)
                        flow = 0xffff0002;
                else
                        flow = 0;

                if ((IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_TXPAUSE) != 0)
                        afc_cfg |= 0xf;
                else
                        afc_cfg &= ~0xf;
        } else {
                DPRINTF("half duplex operation", 0, 0, 0, 0);
                sc->sc_mac_csr &= ~SMSC_MAC_CSR_FDPX;
                sc->sc_mac_csr |= SMSC_MAC_CSR_RCVOWN;

                flow = 0;
                afc_cfg |= 0xf;
        }

        err = smsc_writereg(un, SMSC_MAC_CSR, sc->sc_mac_csr);
        err += smsc_writereg(un, SMSC_FLOW, flow);
        err += smsc_writereg(un, SMSC_AFC_CFG, afc_cfg);

        if (err)
                smsc_warn_printf(un, "media change failed, error %d\n", err);
}

static inline uint32_t
smsc_hash(uint8_t addr[ETHER_ADDR_LEN])
{

        return (ether_crc32_be(addr, ETHER_ADDR_LEN) >> 26) & 0x3f;
}

static void
smsc_uno_mcast(struct ifnet *ifp)
{
        USMSCHIST_FUNC(); USMSCHIST_CALLED();
        struct usbnet * const un = ifp->if_softc;
        struct smsc_softc * const sc = usbnet_softc(un);
        struct ethercom *ec = usbnet_ec(un);
        struct ether_multi *enm;
        struct ether_multistep step;
        uint32_t hashtbl[2] = { 0, 0 };
        uint32_t hash;

        if (usbnet_isdying(un))
                return;

        if (usbnet_ispromisc(un)) {
                ETHER_LOCK(ec);
allmulti:
                ec->ec_flags |= ETHER_F_ALLMULTI;
                ETHER_UNLOCK(ec);
                DPRINTF("receive all multicast enabled", 0, 0, 0, 0);
                sc->sc_mac_csr |= SMSC_MAC_CSR_MCPAS;
                sc->sc_mac_csr &= ~SMSC_MAC_CSR_HPFILT;
                smsc_writereg(un, SMSC_MAC_CSR, sc->sc_mac_csr);
                return;
        } else {
                sc->sc_mac_csr |= SMSC_MAC_CSR_HPFILT;
                sc->sc_mac_csr &= ~(SMSC_MAC_CSR_PRMS | SMSC_MAC_CSR_MCPAS);
        }

        ETHER_LOCK(ec);
        ETHER_FIRST_MULTI(step, ec, enm);
        while (enm != NULL) {
                if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
                        goto allmulti;
                }

                hash = smsc_hash(enm->enm_addrlo);
                hashtbl[hash >> 5] |= 1 << (hash & 0x1F);
                ETHER_NEXT_MULTI(step, enm);
        }
        ec->ec_flags &= ~ETHER_F_ALLMULTI;
        ETHER_UNLOCK(ec);

        /* Debug */
        if (sc->sc_mac_csr & SMSC_MAC_CSR_HPFILT) {
                DPRINTF("receive select group of macs", 0, 0, 0, 0);
        } else {
                DPRINTF("receive own packets only", 0, 0, 0, 0);
        }

        /* Write the hash table and mac control registers */

        //XXX should we be doing this?
        smsc_writereg(un, SMSC_HASHH, hashtbl[1]);
        smsc_writereg(un, SMSC_HASHL, hashtbl[0]);
        smsc_writereg(un, SMSC_MAC_CSR, sc->sc_mac_csr);
}

static int
smsc_setoe_locked(struct usbnet *un)
{
        struct smsc_softc * const sc = usbnet_softc(un);
        struct ifnet * const ifp = usbnet_ifp(un);
        uint32_t val;
        int err;

        KASSERT(IFNET_LOCKED(ifp));

        err = smsc_readreg(un, SMSC_COE_CTRL, &val);
        if (err != 0) {
                smsc_warn_printf(un, "failed to read SMSC_COE_CTRL (err=%d)\n",
                    err);
                return err;
        }

        /* Enable/disable the Rx checksum */
        if (ifp->if_capenable & (IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx))
                val |= (SMSC_COE_CTRL_RX_EN | SMSC_COE_CTRL_RX_MODE);
        else
                val &= ~(SMSC_COE_CTRL_RX_EN | SMSC_COE_CTRL_RX_MODE);

        /* Enable/disable the Tx checksum (currently not supported) */
        if (ifp->if_capenable & (IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx))
                val |= SMSC_COE_CTRL_TX_EN;
        else
                val &= ~SMSC_COE_CTRL_TX_EN;

        sc->sc_coe_ctrl = val;

        err = smsc_writereg(un, SMSC_COE_CTRL, val);
        if (err != 0) {
                smsc_warn_printf(un, "failed to write SMSC_COE_CTRL (err=%d)\n",
                    err);
                return err;
        }

        return 0;
}

static int
smsc_setmacaddress(struct usbnet *un, const uint8_t *addr)
{
        USMSCHIST_FUNC(); USMSCHIST_CALLED();
        int err;
        uint32_t val;

        DPRINTF("setting mac address to %02jx:%02jx:%02jx:...", addr[0],
            addr[1], addr[2], 0);

        DPRINTF("... %02jx:%02jx:%02jx", addr[3], addr[4], addr[5], 0);

        val = ((uint32_t)addr[3] << 24) | (addr[2] << 16) | (addr[1] << 8)
            | addr[0];
        if ((err = smsc_writereg(un, SMSC_MAC_ADDRL, val)) != 0)
                goto done;

        val = (addr[5] << 8) | addr[4];
        err = smsc_writereg(un, SMSC_MAC_ADDRH, val);

done:
        return err;
}

static void
smsc_reset(struct smsc_softc *sc)
{
        struct usbnet * const un = &sc->smsc_un;

        if (usbnet_isdying(un))
                return;

        /* Wait a little while for the chip to get its brains in order. */
        DELAY(1000);

        /* Reinitialize controller to achieve full reset. */
        smsc_chip_init(un);
}

static int
smsc_uno_init(struct ifnet *ifp)
{
        struct usbnet * const un = ifp->if_softc;
        struct smsc_softc * const sc = usbnet_softc(un);

        /* Reset the ethernet interface. */
        smsc_reset(sc);

        /* TCP/UDP checksum offload engines. */
        smsc_setoe_locked(un);

        return 0;
}

static void
smsc_uno_stop(struct ifnet *ifp, int disable)
{
        struct usbnet * const un = ifp->if_softc;
        struct smsc_softc * const sc = usbnet_softc(un);

        // XXXNH didn't do this before
        smsc_reset(sc);
}

static int
smsc_chip_init(struct usbnet *un)
{
        struct smsc_softc * const sc = usbnet_softc(un);
        uint32_t reg_val;
        int burst_cap;
        int err;

        /* Enter H/W config mode */
        smsc_writereg(un, SMSC_HW_CFG, SMSC_HW_CFG_LRST);

        if ((err = smsc_wait_for_bits(un, SMSC_HW_CFG,
            SMSC_HW_CFG_LRST)) != 0) {
                smsc_warn_printf(un, "timed-out waiting for reset to "
                    "complete\n");
                goto init_failed;
        }

        /* Reset the PHY */
        smsc_writereg(un, SMSC_PM_CTRL, SMSC_PM_CTRL_PHY_RST);

        if ((err = smsc_wait_for_bits(un, SMSC_PM_CTRL,
            SMSC_PM_CTRL_PHY_RST)) != 0) {
                smsc_warn_printf(un, "timed-out waiting for phy reset to "
                    "complete\n");
                goto init_failed;
        }
        usbd_delay_ms(un->un_udev, 40);

        /* Set the mac address */
        struct ifnet * const ifp = usbnet_ifp(un);
        const char *eaddr = CLLADDR(ifp->if_sadl);
        if ((err = smsc_setmacaddress(un, eaddr)) != 0) {
                smsc_warn_printf(un, "failed to set the MAC address\n");
                goto init_failed;
        }

        /*
         * Don't know what the HW_CFG_BIR bit is, but following the reset
         * sequence as used in the Linux driver.
         */
        if ((err = smsc_readreg(un, SMSC_HW_CFG, &reg_val)) != 0) {
                smsc_warn_printf(un, "failed to read HW_CFG: %d\n", err);
                goto init_failed;
        }
        reg_val |= SMSC_HW_CFG_BIR;
        smsc_writereg(un, SMSC_HW_CFG, reg_val);

        /*
         * There is a so called 'turbo mode' that the linux driver supports, it
         * seems to allow you to jam multiple frames per Rx transaction.
         * By default this driver supports that and therefore allows multiple
         * frames per USB transfer.
         *
         * The xfer buffer size needs to reflect this as well, therefore based
         * on the calculations in the Linux driver the RX bufsize is set to
         * 18944,
         *     bufsz = (16 * 1024 + 5 * 512)
         *
         * Burst capability is the number of URBs that can be in a burst of
         * data/ethernet frames.
         */

        if (un->un_udev->ud_speed == USB_SPEED_HIGH)
                burst_cap = 37;
        else
                burst_cap = 128;

        smsc_writereg(un, SMSC_BURST_CAP, burst_cap);

        /* Set the default bulk in delay (magic value from Linux driver) */
        smsc_writereg(un, SMSC_BULK_IN_DLY, 0x00002000);

        /*
         * Initialise the RX interface
         */
        if ((err = smsc_readreg(un, SMSC_HW_CFG, &reg_val)) < 0) {
                smsc_warn_printf(un, "failed to read HW_CFG: (err = %d)\n",
                    err);
                goto init_failed;
        }

        /*
         * The following settings are used for 'turbo mode', a.k.a multiple
         * frames per Rx transaction (again info taken form Linux driver).
         */
        reg_val |= (SMSC_HW_CFG_MEF | SMSC_HW_CFG_BCE);

        /*
         * set Rx data offset to ETHER_ALIGN which will make the IP header
         * align on a word boundary.
         */
        reg_val |= ETHER_ALIGN << SMSC_HW_CFG_RXDOFF_SHIFT;

        smsc_writereg(un, SMSC_HW_CFG, reg_val);

        /* Clear the status register ? */
        smsc_writereg(un, SMSC_INTR_STATUS, 0xffffffff);

        /* Read and display the revision register */
        if ((err = smsc_readreg(un, SMSC_ID_REV, &sc->sc_rev_id)) < 0) {
                smsc_warn_printf(un, "failed to read ID_REV (err = %d)\n", err);
                goto init_failed;
        }

        /* GPIO/LED setup */
        reg_val = SMSC_LED_GPIO_CFG_SPD_LED | SMSC_LED_GPIO_CFG_LNK_LED |
            SMSC_LED_GPIO_CFG_FDX_LED;
        smsc_writereg(un, SMSC_LED_GPIO_CFG, reg_val);

        /*
         * Initialise the TX interface
         */
        smsc_writereg(un, SMSC_FLOW, 0);

        smsc_writereg(un, SMSC_AFC_CFG, AFC_CFG_DEFAULT);

        /* Read the current MAC configuration */
        if ((err = smsc_readreg(un, SMSC_MAC_CSR, &sc->sc_mac_csr)) < 0) {
                smsc_warn_printf(un, "failed to read MAC_CSR (err=%d)\n", err);
                goto init_failed;
        }

        /* disable pad stripping, collides with checksum offload */
        sc->sc_mac_csr &= ~SMSC_MAC_CSR_PADSTR;

        /* Vlan */
        smsc_writereg(un, SMSC_VLAN1, (uint32_t)ETHERTYPE_VLAN);

        /*
         * Start TX
         */
        sc->sc_mac_csr |= SMSC_MAC_CSR_TXEN;
        smsc_writereg(un, SMSC_MAC_CSR, sc->sc_mac_csr);
        smsc_writereg(un, SMSC_TX_CFG, SMSC_TX_CFG_ON);

        /*
         * Start RX
         */
        sc->sc_mac_csr |= SMSC_MAC_CSR_RXEN;
        smsc_writereg(un, SMSC_MAC_CSR, sc->sc_mac_csr);

        return 0;

init_failed:
        smsc_err_printf(un, "smsc_chip_init failed (err=%d)\n", err);
        return err;
}

static int
smsc_uno_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
        struct usbnet * const un = ifp->if_softc;

        switch (cmd) {
        case SIOCSIFCAP:
                smsc_setoe_locked(un);
                break;
        default:
                break;
        }

        return 0;
}

static int
smsc_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return (usb_lookup(smsc_devs, uaa->uaa_vendor, uaa->uaa_product) != NULL) ?
            UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
smsc_attach(device_t parent, device_t self, void *aux)
{
        USBNET_MII_DECL_DEFAULT(unm);
        struct smsc_softc * const sc = device_private(self);
        struct usbnet * const un = &sc->smsc_un;
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        unsigned bufsz;
        int err, i;
        uint32_t mac_h, mac_l;

        KASSERT((void *)sc == un);

        aprint_naive("\n");
        aprint_normal("\n");

        un->un_dev = self;
        un->un_udev = dev;
        un->un_sc = sc;
        un->un_ops = &smsc_ops;
        un->un_rx_xfer_flags = USBD_SHORT_XFER_OK;
        un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER;
        un->un_rx_list_cnt = SMSC_RX_LIST_CNT;
        un->un_tx_list_cnt = SMSC_TX_LIST_CNT;

        devinfop = usbd_devinfo_alloc(un->un_udev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        err = usbd_set_config_no(dev, SMSC_CONFIG_INDEX, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(err));
                return;
        }

        /* Setup the endpoints for the SMSC LAN95xx device(s) */
        err = usbd_device2interface_handle(dev, SMSC_IFACE_IDX, &un->un_iface);
        if (err) {
                aprint_error_dev(self, "getting interface handle failed\n");
                return;
        }

        id = usbd_get_interface_descriptor(un->un_iface);

        if (dev->ud_speed >= USB_SPEED_HIGH) {
                bufsz = SMSC_MAX_BUFSZ;
        } else {
                bufsz = SMSC_MIN_BUFSZ;
        }
        un->un_rx_bufsz = bufsz;
        un->un_tx_bufsz = bufsz;

        /* Find endpoints. */
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(un->un_iface, i);
                if (!ed) {
                        aprint_error_dev(self, "couldn't get ep %d\n", i);
                        return;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress;
#if 0 /* not used yet */
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        un->un_ed[USBNET_ENDPT_INTR] = ed->bEndpointAddress;
#endif
                }
        }

        usbnet_attach(un);

#ifdef notyet
        /*
         * We can do TCPv4, and UDPv4 checksums in hardware.
         */
        struct ifnet *ifp = usbnet_ifp(un);

        ifp->if_capabilities |=
            /*IFCAP_CSUM_TCPv4_Tx |*/ IFCAP_CSUM_TCPv4_Rx |
            /*IFCAP_CSUM_UDPv4_Tx |*/ IFCAP_CSUM_UDPv4_Rx;
#endif
        struct ethercom *ec = usbnet_ec(un);
        ec->ec_capabilities = ETHERCAP_VLAN_MTU;

        /* Setup some of the basics */
        un->un_phyno = 1;

        /*
         * Attempt to get the mac address, if an EEPROM is not attached this
         * will just return FF:FF:FF:FF:FF:FF, so in such cases we invent a MAC
         * address based on urandom.
         */
        memset(un->un_eaddr, 0xff, ETHER_ADDR_LEN);

        prop_dictionary_t dict = device_properties(self);
        prop_data_t eaprop = prop_dictionary_get(dict, "mac-address");

        if (eaprop != NULL) {
                KASSERT(prop_object_type(eaprop) == PROP_TYPE_DATA);
                KASSERT(prop_data_size(eaprop) == ETHER_ADDR_LEN);
                memcpy(un->un_eaddr, prop_data_value(eaprop),
                    ETHER_ADDR_LEN);
        } else {
                /* Check if there is already a MAC address in the register */
                if ((smsc_readreg(un, SMSC_MAC_ADDRL, &mac_l) == 0) &&
                    (smsc_readreg(un, SMSC_MAC_ADDRH, &mac_h) == 0)) {
                        un->un_eaddr[5] = (uint8_t)((mac_h >> 8) & 0xff);
                        un->un_eaddr[4] = (uint8_t)((mac_h) & 0xff);
                        un->un_eaddr[3] = (uint8_t)((mac_l >> 24) & 0xff);
                        un->un_eaddr[2] = (uint8_t)((mac_l >> 16) & 0xff);
                        un->un_eaddr[1] = (uint8_t)((mac_l >> 8) & 0xff);
                        un->un_eaddr[0] = (uint8_t)((mac_l) & 0xff);
                }
        }

        usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST,
            0, &unm);
}

static void
smsc_uno_rx_loop(struct usbnet *un, struct usbnet_chain *c, uint32_t total_len)
{
        USMSCHIST_FUNC(); USMSCHIST_CALLED();
        struct smsc_softc * const sc = usbnet_softc(un);
        struct ifnet *ifp = usbnet_ifp(un);
        uint8_t *buf = c->unc_buf;
        int count;

        count = 0;
        DPRINTF("total_len %jd/%#jx", total_len, total_len, 0, 0);
        while (total_len != 0) {
                uint32_t rxhdr;
                if (total_len < sizeof(rxhdr)) {
                        DPRINTF("total_len %jd < sizeof(rxhdr) %jd",
                            total_len, sizeof(rxhdr), 0, 0);
                        if_statinc(ifp, if_ierrors);
                        return;
                }

                memcpy(&rxhdr, buf, sizeof(rxhdr));
                rxhdr = le32toh(rxhdr);
                buf += sizeof(rxhdr);
                total_len -= sizeof(rxhdr);

                if (rxhdr & SMSC_RX_STAT_COLLISION)
                        if_statinc(ifp, if_collisions);

                if (rxhdr & (SMSC_RX_STAT_ERROR
                           | SMSC_RX_STAT_LENGTH_ERROR
                           | SMSC_RX_STAT_MII_ERROR)) {
                        DPRINTF("rx error (hdr 0x%08jx)", rxhdr, 0, 0, 0);
                        if_statinc(ifp, if_ierrors);
                        return;
                }

                uint16_t pktlen = (uint16_t)SMSC_RX_STAT_FRM_LENGTH(rxhdr);
                DPRINTF("total_len %jd pktlen %jd rxhdr 0x%08jx", total_len,
                    pktlen, rxhdr, 0);

                if (pktlen < ETHER_HDR_LEN) {
                        DPRINTF("pktlen %jd < ETHER_HDR_LEN %jd", pktlen,
                            ETHER_HDR_LEN, 0, 0);
                        if_statinc(ifp, if_ierrors);
                        return;
                }

                pktlen += ETHER_ALIGN;

                if (pktlen > MCLBYTES) {
                        DPRINTF("pktlen %jd > MCLBYTES %jd", pktlen, MCLBYTES, 0,
                            0);
                        if_statinc(ifp, if_ierrors);
                        return;
                }

                if (pktlen > total_len) {
                        DPRINTF("pktlen %jd > total_len %jd", pktlen, total_len,
                            0, 0);
                        if_statinc(ifp, if_ierrors);
                        return;
                }

                uint8_t *pktbuf = buf + ETHER_ALIGN;
                size_t buflen = pktlen - ETHER_ALIGN;
                int mbuf_flags = M_HASFCS;
                int csum_flags = 0;
                uint16_t csum_data = 0;

                 KASSERT(pktlen < MCLBYTES);

                /* Check if RX TCP/UDP checksumming is being offloaded */
                if (sc->sc_coe_ctrl & SMSC_COE_CTRL_RX_EN) {
                        DPRINTF("RX checksum offload checking", 0, 0, 0, 0);
                        struct ether_header *eh = (struct ether_header *)pktbuf;
                        const size_t cssz = sizeof(csum_data);

                        /* Remove the extra 2 bytes of the csum */
                        buflen -= cssz;

                        /*
                         * The checksum appears to be simplistically calculated
                         * over the udp/tcp header and data up to the end of the
                         * eth frame.  Which means if the eth frame is padded
                         * the csum calculation is incorrectly performed over
                         * the padding bytes as well. Therefore to be safe we
                         * ignore the H/W csum on frames less than or equal to
                         * 64 bytes.
                         *
                         * Ignore H/W csum for non-IPv4 packets.
                         */
                        DPRINTF("Ethertype %02jx pktlen %02jx",
                            be16toh(eh->ether_type), pktlen, 0, 0);
                        if (be16toh(eh->ether_type) == ETHERTYPE_IP &&
                            pktlen > ETHER_MIN_LEN) {

                                csum_flags |=
                                    (M_CSUM_TCPv4 | M_CSUM_UDPv4 | M_CSUM_DATA);

                                /*
                                 * Copy the TCP/UDP checksum from the last 2
                                 * bytes of the transfer and put in the
                                 * csum_data field.
                                 */
                                memcpy(&csum_data, buf + pktlen - cssz, cssz);

                                /*
                                 * The data is copied in network order, but the
                                 * csum algorithm in the kernel expects it to be
                                 * in host network order.
                                 */
                                csum_data = ntohs(csum_data);
                                DPRINTF("RX checksum offloaded (0x%04jx)",
                                    csum_data, 0, 0, 0);
                        }
                }

                /* round up to next longword */
                pktlen = (pktlen + 3) & ~0x3;

                /* total_len does not include the padding */
                if (pktlen > total_len)
                        pktlen = total_len;

                buf += pktlen;
                total_len -= pktlen;

                /* push the packet up */
                usbnet_enqueue(un, pktbuf, buflen, csum_flags, csum_data,
                    mbuf_flags);

                count++;
        }

        if (count != 0)
                rnd_add_uint32(usbnet_rndsrc(un), count);
}

static unsigned
smsc_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c)
{
        uint32_t txhdr;
        uint32_t frm_len = 0;

        const size_t hdrsz = sizeof(txhdr) * 2;

        if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz - hdrsz)
                return 0;

        /*
         * Each frame is prefixed with two 32-bit values describing the
         * length of the packet and buffer.
         */
        txhdr = SMSC_TX_CTRL_0_BUF_SIZE(m->m_pkthdr.len) |
            SMSC_TX_CTRL_0_FIRST_SEG | SMSC_TX_CTRL_0_LAST_SEG;
        txhdr = htole32(txhdr);
        memcpy(c->unc_buf, &txhdr, sizeof(txhdr));

        txhdr = SMSC_TX_CTRL_1_PKT_LENGTH(m->m_pkthdr.len);
        txhdr = htole32(txhdr);
        memcpy(c->unc_buf + sizeof(txhdr), &txhdr, sizeof(txhdr));

        frm_len += hdrsz;

        /* Next copy in the actual packet */
        m_copydata(m, 0, m->m_pkthdr.len, c->unc_buf + frm_len);
        frm_len += m->m_pkthdr.len;

        return frm_len;
}

#ifdef _MODULE
#include "ioconf.c"
#endif

USBNET_MODULE(smsc)








































































































 1005 




















 1850 




 1852 


 1850 

 1848 








  438 




  436 


  438 

  437 

  437 


  437 


























 2503 






















 2424 


 2500 






 2504 

































 2499 
 2502 

  843 













 2420 
  957 





 2421 
 2420 

 2500 
 2500 


 2502 


























 2501 



















 1975 







 1973 
 1971 









 1975 
























 1974 





 1139 
 1138 











 1993 






 1994 
 1994 



 1991 

 1992 
 1993 




 1992 
 1994 

 1994 



 1587 









 1588 






  232 


 1481 



 1480 



 1356 




 1941 
 1942 
 1942 
 1940 
 1942 










 1552 

 1550 
 1552 

 1551 

 1942 
 1939 






 1940 

 1936 
 1940 
 1940 
 1939 




 1937 





  476 





 1061 
  193 
 1360 






 1359 









  533 






  477 


 1359 






  193 














  652 

  651 




  558 
  558 





  191 
  191 





  652 



  652 







   54 





   54 



   45 

   20 



   20 
   20 



  645 











  558 
  558 
  556 












  192 

















  651 
  652 



















  649 













  265 














  334 






  335 
  335 
  335 







  333 











   33 
   33 
  313 
   13 

  334 









 1427 























 1429 
 1358 




  673 









  334 
  335 
  335 









  650 













  371 
  370 



  370 
  369 









  371 
  174 
  109 
  101 











   75 
   75 


   75 
   75 












   54 
   15 
   55 
   55 



   50 








  162 
  299 
  299 
  274 

  255 
  254 
  255 










  254 








  231 
  231 

  231 











  116 
  116 


  116 

























  230 
  230 
  230 










 1080 
  651 




 1081 

  574 

  438 









  651 




















  648 

 1081 


















































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
/*        $NetBSD: rb.c,v 1.16 2021/09/16 21:29:41 andvar Exp $        */

/*-
 * Copyright (c) 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Matt Thomas <matt@3am-software.com>.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif

#if !defined(_KERNEL) && !defined(_STANDALONE)
#include <sys/types.h>
#include <stddef.h>
#include <assert.h>
#include <stdbool.h>
#ifdef RBDEBUG
#define        KASSERT(s)        assert(s)
#define        __rbt_unused
#else
#define KASSERT(s)        do { } while (/*CONSTCOND*/ 0)
#define        __rbt_unused        __unused
#endif
__RCSID("$NetBSD: rb.c,v 1.16 2021/09/16 21:29:41 andvar Exp $");
#else
#include <lib/libkern/libkern.h>
__KERNEL_RCSID(0, "$NetBSD: rb.c,v 1.16 2021/09/16 21:29:41 andvar Exp $");
#ifndef DIAGNOSTIC
#define        __rbt_unused        __unused
#else
#define        __rbt_unused
#endif
#endif

#ifdef _LIBC
__weak_alias(rb_tree_init, _rb_tree_init)
__weak_alias(rb_tree_find_node, _rb_tree_find_node)
__weak_alias(rb_tree_find_node_geq, _rb_tree_find_node_geq)
__weak_alias(rb_tree_find_node_leq, _rb_tree_find_node_leq)
__weak_alias(rb_tree_insert_node, _rb_tree_insert_node)
__weak_alias(rb_tree_remove_node, _rb_tree_remove_node)
__weak_alias(rb_tree_iterate, _rb_tree_iterate)
#ifdef RBDEBUG
__weak_alias(rb_tree_check, _rb_tree_check)
__weak_alias(rb_tree_depths, _rb_tree_depths)
#endif

#include "namespace.h"
#endif

#ifdef RBTEST
#include "rbtree.h"
#else
#include <sys/rbtree.h>
#endif

static void rb_tree_insert_rebalance(struct rb_tree *, struct rb_node *);
static void rb_tree_removal_rebalance(struct rb_tree *, struct rb_node *,
        unsigned int);
#ifdef RBDEBUG
static const struct rb_node *rb_tree_iterate_const(const struct rb_tree *,
        const struct rb_node *, const unsigned int);
static bool rb_tree_check_node(const struct rb_tree *, const struct rb_node *,
        const struct rb_node *, bool);
#else
#define        rb_tree_check_node(a, b, c, d)        true
#endif

#define        RB_NODETOITEM(rbto, rbn)        \
    ((void *)((uintptr_t)(rbn) - (rbto)->rbto_node_offset))
#define        RB_ITEMTONODE(rbto, rbn)        \
    ((rb_node_t *)((uintptr_t)(rbn) + (rbto)->rbto_node_offset))

#define        RB_SENTINEL_NODE        NULL

void
rb_tree_init(struct rb_tree *rbt, const rb_tree_ops_t *ops)
{

        rbt->rbt_ops = ops;
        rbt->rbt_root = RB_SENTINEL_NODE;
        RB_TAILQ_INIT(&rbt->rbt_nodes);
#ifndef RBSMALL
        rbt->rbt_minmax[RB_DIR_LEFT] = rbt->rbt_root;        /* minimum node */
        rbt->rbt_minmax[RB_DIR_RIGHT] = rbt->rbt_root;        /* maximum node */
#endif
#ifdef RBSTATS
        rbt->rbt_count = 0;
        rbt->rbt_insertions = 0;
        rbt->rbt_removals = 0;
        rbt->rbt_insertion_rebalance_calls = 0;
        rbt->rbt_insertion_rebalance_passes = 0;
        rbt->rbt_removal_rebalance_calls = 0;
        rbt->rbt_removal_rebalance_passes = 0;
#endif
}

void *
rb_tree_find_node(struct rb_tree *rbt, const void *key)
{
        const rb_tree_ops_t *rbto = rbt->rbt_ops;
        rbto_compare_key_fn compare_key = rbto->rbto_compare_key;
        struct rb_node *parent = rbt->rbt_root;

        while (!RB_SENTINEL_P(parent)) {
                void *pobj = RB_NODETOITEM(rbto, parent);
                const signed int diff = (*compare_key)(rbto->rbto_context,
                    pobj, key);
                if (diff == 0)
                        return pobj;
                parent = parent->rb_nodes[diff < 0];
        }

        return NULL;
}

void *
rb_tree_find_node_geq(struct rb_tree *rbt, const void *key)
{
        const rb_tree_ops_t *rbto = rbt->rbt_ops;
        rbto_compare_key_fn compare_key = rbto->rbto_compare_key;
        struct rb_node *parent = rbt->rbt_root, *last = NULL;

        while (!RB_SENTINEL_P(parent)) {
                void *pobj = RB_NODETOITEM(rbto, parent);
                const signed int diff = (*compare_key)(rbto->rbto_context,
                    pobj, key);
                if (diff == 0)
                        return pobj;
                if (diff > 0)
                        last = parent;
                parent = parent->rb_nodes[diff < 0];
        }

        return last == NULL ? NULL : RB_NODETOITEM(rbto, last);
}

void *
rb_tree_find_node_leq(struct rb_tree *rbt, const void *key)
{
        const rb_tree_ops_t *rbto = rbt->rbt_ops;
        rbto_compare_key_fn compare_key = rbto->rbto_compare_key;
        struct rb_node *parent = rbt->rbt_root, *last = NULL;

        while (!RB_SENTINEL_P(parent)) {
                void *pobj = RB_NODETOITEM(rbto, parent);
                const signed int diff = (*compare_key)(rbto->rbto_context,
                    pobj, key);
                if (diff == 0)
                        return pobj;
                if (diff < 0)
                        last = parent;
                parent = parent->rb_nodes[diff < 0];
        }

        return last == NULL ? NULL : RB_NODETOITEM(rbto, last);
}

void *
rb_tree_insert_node(struct rb_tree *rbt, void *object)
{
        const rb_tree_ops_t *rbto = rbt->rbt_ops;
        rbto_compare_nodes_fn compare_nodes = rbto->rbto_compare_nodes;
        struct rb_node *parent, *tmp, *self = RB_ITEMTONODE(rbto, object);
        unsigned int position;
        bool rebalance;

        RBSTAT_INC(rbt->rbt_insertions);

        tmp = rbt->rbt_root;
        /*
         * This is a hack.  Because rbt->rbt_root is just a struct rb_node *,
         * just like rb_node->rb_nodes[RB_DIR_LEFT], we can use this fact to
         * avoid a lot of tests for root and know that even at root,
         * updating RB_FATHER(rb_node)->rb_nodes[RB_POSITION(rb_node)] will
         * update rbt->rbt_root.
         */
        parent = (struct rb_node *)(void *)&rbt->rbt_root;
        position = RB_DIR_LEFT;

        /*
         * Find out where to place this new leaf.
         */
        while (!RB_SENTINEL_P(tmp)) {
                void *tobj = RB_NODETOITEM(rbto, tmp);
                const signed int diff = (*compare_nodes)(rbto->rbto_context,
                    tobj, object);
                if (__predict_false(diff == 0)) {
                        /*
                         * Node already exists; return it.
                         */
                        return tobj;
                }
                parent = tmp;
                position = (diff < 0);
                tmp = parent->rb_nodes[position];
        }

#ifdef RBDEBUG
        {
                struct rb_node *prev = NULL, *next = NULL;

                if (position == RB_DIR_RIGHT)
                        prev = parent;
                else if (tmp != rbt->rbt_root)
                        next = parent;

                /*
                 * Verify our sequential position
                 */
                KASSERT(prev == NULL || !RB_SENTINEL_P(prev));
                KASSERT(next == NULL || !RB_SENTINEL_P(next));
                if (prev != NULL && next == NULL)
                        next = TAILQ_NEXT(prev, rb_link);
                if (prev == NULL && next != NULL)
                        prev = TAILQ_PREV(next, rb_node_qh, rb_link);
                KASSERT(prev == NULL || !RB_SENTINEL_P(prev));
                KASSERT(next == NULL || !RB_SENTINEL_P(next));
                KASSERT(prev == NULL || (*compare_nodes)(rbto->rbto_context,
                    RB_NODETOITEM(rbto, prev), RB_NODETOITEM(rbto, self)) < 0);
                KASSERT(next == NULL || (*compare_nodes)(rbto->rbto_context,
                    RB_NODETOITEM(rbto, self), RB_NODETOITEM(rbto, next)) < 0);
        }
#endif

        /*
         * Initialize the node and insert as a leaf into the tree.
         */
        RB_SET_FATHER(self, parent);
        RB_SET_POSITION(self, position);
        if (__predict_false(parent == (struct rb_node *)(void *)&rbt->rbt_root)) {
                RB_MARK_BLACK(self);                /* root is always black */
#ifndef RBSMALL
                rbt->rbt_minmax[RB_DIR_LEFT] = self;
                rbt->rbt_minmax[RB_DIR_RIGHT] = self;
#endif
                rebalance = false;
        } else {
                KASSERT(position == RB_DIR_LEFT || position == RB_DIR_RIGHT);
#ifndef RBSMALL
                /*
                 * Keep track of the minimum and maximum nodes.  If our
                 * parent is a minmax node and we on their min/max side,
                 * we must be the new min/max node.
                 */
                if (parent == rbt->rbt_minmax[position])
                        rbt->rbt_minmax[position] = self;
#endif /* !RBSMALL */
                /*
                 * All new nodes are colored red.  We only need to rebalance
                 * if our parent is also red.
                 */
                RB_MARK_RED(self);
                rebalance = RB_RED_P(parent);
        }
        KASSERT(RB_SENTINEL_P(parent->rb_nodes[position]));
        self->rb_left = parent->rb_nodes[position];
        self->rb_right = parent->rb_nodes[position];
        parent->rb_nodes[position] = self;
        KASSERT(RB_CHILDLESS_P(self));

        /*
         * Insert the new node into a sorted list for easy sequential access
         */
        RBSTAT_INC(rbt->rbt_count);
#ifdef RBDEBUG
        if (RB_ROOT_P(rbt, self)) {
                RB_TAILQ_INSERT_HEAD(&rbt->rbt_nodes, self, rb_link);
        } else if (position == RB_DIR_LEFT) {
                KASSERT((*compare_nodes)(rbto->rbto_context,
                    RB_NODETOITEM(rbto, self),
                    RB_NODETOITEM(rbto, RB_FATHER(self))) < 0);
                RB_TAILQ_INSERT_BEFORE(RB_FATHER(self), self, rb_link);
        } else {
                KASSERT((*compare_nodes)(rbto->rbto_context,
                    RB_NODETOITEM(rbto, RB_FATHER(self)),
                    RB_NODETOITEM(rbto, self)) < 0);
                RB_TAILQ_INSERT_AFTER(&rbt->rbt_nodes, RB_FATHER(self),
                    self, rb_link);
        }
#endif
        KASSERT(rb_tree_check_node(rbt, self, NULL, !rebalance));

        /*
         * Rebalance tree after insertion
         */
        if (rebalance) {
                rb_tree_insert_rebalance(rbt, self);
                KASSERT(rb_tree_check_node(rbt, self, NULL, true));
        }

        /* Successfully inserted, return our node pointer. */
        return object;
}

/*
 * Swap the location and colors of 'self' and its child @ which.  The child
 * can not be a sentinel node.  This is our rotation function.  However,
 * since it preserves coloring, it great simplifies both insertion and
 * removal since rotation almost always involves the exchanging of colors
 * as a separate step.
 */
static void
rb_tree_reparent_nodes(__rbt_unused struct rb_tree *rbt,
        struct rb_node *old_father, const unsigned int which)
{
        const unsigned int other = which ^ RB_DIR_OTHER;
        struct rb_node * const grandpa = RB_FATHER(old_father);
        struct rb_node * const old_child = old_father->rb_nodes[which];
        struct rb_node * const new_father = old_child;
        struct rb_node * const new_child = old_father;

        KASSERT(which == RB_DIR_LEFT || which == RB_DIR_RIGHT);

        KASSERT(!RB_SENTINEL_P(old_child));
        KASSERT(RB_FATHER(old_child) == old_father);

        KASSERT(rb_tree_check_node(rbt, old_father, NULL, false));
        KASSERT(rb_tree_check_node(rbt, old_child, NULL, false));
        KASSERT(RB_ROOT_P(rbt, old_father) ||
            rb_tree_check_node(rbt, grandpa, NULL, false));

        /*
         * Exchange descendant linkages.
         */
        grandpa->rb_nodes[RB_POSITION(old_father)] = new_father;
        new_child->rb_nodes[which] = old_child->rb_nodes[other];
        new_father->rb_nodes[other] = new_child;

        /*
         * Update ancestor linkages
         */
        RB_SET_FATHER(new_father, grandpa);
        RB_SET_FATHER(new_child, new_father);

        /*
         * Exchange properties between new_father and new_child.  The only
         * change is that new_child's position is now on the other side.
         */
#if 0
        {
                struct rb_node tmp;
                tmp.rb_info = 0;
                RB_COPY_PROPERTIES(&tmp, old_child);
                RB_COPY_PROPERTIES(new_father, old_father);
                RB_COPY_PROPERTIES(new_child, &tmp);
        }
#else
        RB_SWAP_PROPERTIES(new_father, new_child);
#endif
        RB_SET_POSITION(new_child, other);

        /*
         * Make sure to reparent the new child to ourself.
         */
        if (!RB_SENTINEL_P(new_child->rb_nodes[which])) {
                RB_SET_FATHER(new_child->rb_nodes[which], new_child);
                RB_SET_POSITION(new_child->rb_nodes[which], which);
        }

        KASSERT(rb_tree_check_node(rbt, new_father, NULL, false));
        KASSERT(rb_tree_check_node(rbt, new_child, NULL, false));
        KASSERT(RB_ROOT_P(rbt, new_father) ||
            rb_tree_check_node(rbt, grandpa, NULL, false));
}

static void
rb_tree_insert_rebalance(struct rb_tree *rbt, struct rb_node *self)
{
        struct rb_node * father = RB_FATHER(self);
        struct rb_node * grandpa = RB_FATHER(father);
        struct rb_node * uncle;
        unsigned int which;
        unsigned int other;

        KASSERT(!RB_ROOT_P(rbt, self));
        KASSERT(RB_RED_P(self));
        KASSERT(RB_RED_P(father));
        RBSTAT_INC(rbt->rbt_insertion_rebalance_calls);

        for (;;) {
                KASSERT(!RB_SENTINEL_P(self));

                KASSERT(RB_RED_P(self));
                KASSERT(RB_RED_P(father));
                /*
                 * We are red and our parent is red, therefore we must have a
                 * grandfather and he must be black.
                 */
                grandpa = RB_FATHER(father);
                KASSERT(RB_BLACK_P(grandpa));
                KASSERT(RB_DIR_RIGHT == 1 && RB_DIR_LEFT == 0);
                which = (father == grandpa->rb_right);
                other = which ^ RB_DIR_OTHER;
                uncle = grandpa->rb_nodes[other];

                if (RB_BLACK_P(uncle))
                        break;

                RBSTAT_INC(rbt->rbt_insertion_rebalance_passes);
                /*
                 * Case 1: our uncle is red
                 *   Simply invert the colors of our parent and
                 *   uncle and make our grandparent red.  And
                 *   then solve the problem up at his level.
                 */
                RB_MARK_BLACK(uncle);
                RB_MARK_BLACK(father);
                if (__predict_false(RB_ROOT_P(rbt, grandpa))) {
                        /*
                         * If our grandpa is root, don't bother
                         * setting him to red, just return.
                         */
                        KASSERT(RB_BLACK_P(grandpa));
                        return;
                }
                RB_MARK_RED(grandpa);
                self = grandpa;
                father = RB_FATHER(self);
                KASSERT(RB_RED_P(self));
                if (RB_BLACK_P(father)) {
                        /*
                         * If our greatgrandpa is black, we're done.
                         */
                        KASSERT(RB_BLACK_P(rbt->rbt_root));
                        return;
                }
        }

        KASSERT(!RB_ROOT_P(rbt, self));
        KASSERT(RB_RED_P(self));
        KASSERT(RB_RED_P(father));
        KASSERT(RB_BLACK_P(uncle));
        KASSERT(RB_BLACK_P(grandpa));
        /*
         * Case 2&3: our uncle is black.
         */
        if (self == father->rb_nodes[other]) {
                /*
                 * Case 2: we are on the same side as our uncle
                 *   Swap ourselves with our parent so this case
                 *   becomes case 3.  Basically our parent becomes our
                 *   child.
                 */
                rb_tree_reparent_nodes(rbt, father, other);
                KASSERT(RB_FATHER(father) == self);
                KASSERT(self->rb_nodes[which] == father);
                KASSERT(RB_FATHER(self) == grandpa);
                self = father;
                father = RB_FATHER(self);
        }
        KASSERT(RB_RED_P(self) && RB_RED_P(father));
        KASSERT(grandpa->rb_nodes[which] == father);
        /*
         * Case 3: we are opposite a child of a black uncle.
         *   Swap our parent and grandparent.  Since our grandfather
         *   is black, our father will become black and our new sibling
         *   (former grandparent) will become red.
         */
        rb_tree_reparent_nodes(rbt, grandpa, which);
        KASSERT(RB_FATHER(self) == father);
        KASSERT(RB_FATHER(self)->rb_nodes[RB_POSITION(self) ^ RB_DIR_OTHER] == grandpa);
        KASSERT(RB_RED_P(self));
        KASSERT(RB_BLACK_P(father));
        KASSERT(RB_RED_P(grandpa));

        /*
         * Final step: Set the root to black.
         */
        RB_MARK_BLACK(rbt->rbt_root);
}

static void
rb_tree_prune_node(struct rb_tree *rbt, struct rb_node *self, bool rebalance)
{
        const unsigned int which = RB_POSITION(self);
        struct rb_node *father = RB_FATHER(self);
#ifndef RBSMALL
        const bool was_root = RB_ROOT_P(rbt, self);
#endif

        KASSERT(rebalance || (RB_ROOT_P(rbt, self) || RB_RED_P(self)));
        KASSERT(!rebalance || RB_BLACK_P(self));
        KASSERT(RB_CHILDLESS_P(self));
        KASSERT(rb_tree_check_node(rbt, self, NULL, false));

        /*
         * Since we are childless, we know that self->rb_left is pointing
         * to the sentinel node.
         */
        father->rb_nodes[which] = self->rb_left;

        /*
         * Remove ourselves from the node list, decrement the count,
         * and update min/max.
         */
        RB_TAILQ_REMOVE(&rbt->rbt_nodes, self, rb_link);
        RBSTAT_DEC(rbt->rbt_count);
#ifndef RBSMALL
        if (__predict_false(rbt->rbt_minmax[RB_POSITION(self)] == self)) {
                rbt->rbt_minmax[RB_POSITION(self)] = father;
                /*
                 * When removing the root, rbt->rbt_minmax[RB_DIR_LEFT] is
                 * updated automatically, but we also need to update 
                 * rbt->rbt_minmax[RB_DIR_RIGHT];
                 */
                if (__predict_false(was_root)) {
                        rbt->rbt_minmax[RB_DIR_RIGHT] = father;
                }
        }
        RB_SET_FATHER(self, NULL);
#endif

        /*
         * Rebalance if requested.
         */
        if (rebalance)
                rb_tree_removal_rebalance(rbt, father, which);
        KASSERT(was_root || rb_tree_check_node(rbt, father, NULL, true));
}

/*
 * When deleting an interior node
 */
static void
rb_tree_swap_prune_and_rebalance(struct rb_tree *rbt, struct rb_node *self,
        struct rb_node *standin)
{
        const unsigned int standin_which = RB_POSITION(standin);
        unsigned int standin_other = standin_which ^ RB_DIR_OTHER;
        struct rb_node *standin_son;
        struct rb_node *standin_father = RB_FATHER(standin);
        bool rebalance = RB_BLACK_P(standin);

        if (standin_father == self) {
                /*
                 * As a child of self, any childen would be opposite of
                 * our parent.
                 */
                KASSERT(RB_SENTINEL_P(standin->rb_nodes[standin_other]));
                standin_son = standin->rb_nodes[standin_which];
        } else {
                /*
                 * Since we aren't a child of self, any childen would be
                 * on the same side as our parent.
                 */
                KASSERT(RB_SENTINEL_P(standin->rb_nodes[standin_which]));
                standin_son = standin->rb_nodes[standin_other];
        }

        /*
         * the node we are removing must have two children.
         */
        KASSERT(RB_TWOCHILDREN_P(self));
        /*
         * If standin has a child, it must be red.
         */
        KASSERT(RB_SENTINEL_P(standin_son) || RB_RED_P(standin_son));

        /*
         * Verify things are sane.
         */
        KASSERT(rb_tree_check_node(rbt, self, NULL, false));
        KASSERT(rb_tree_check_node(rbt, standin, NULL, false));

        if (__predict_false(RB_RED_P(standin_son))) {
                /*
                 * We know we have a red child so if we flip it to black
                 * we don't have to rebalance.
                 */
                KASSERT(rb_tree_check_node(rbt, standin_son, NULL, true));
                RB_MARK_BLACK(standin_son);
                rebalance = false;

                if (standin_father == self) {
                        KASSERT(RB_POSITION(standin_son) == standin_which);
                } else {
                        KASSERT(RB_POSITION(standin_son) == standin_other);
                        /*
                         * Change the son's parentage to point to his grandpa.
                         */
                        RB_SET_FATHER(standin_son, standin_father);
                        RB_SET_POSITION(standin_son, standin_which);
                }
        }

        if (standin_father == self) {
                /*
                 * If we are about to delete the standin's father, then when
                 * we call rebalance, we need to use ourselves as our father.
                 * Otherwise remember our original father.  Also, sincef we are
                 * our standin's father we only need to reparent the standin's
                 * brother.
                 *
                 * |    R      -->     S    |
                 * |  Q   S    -->   Q   T  |
                 * |        t  -->          |
                 */
                KASSERT(RB_SENTINEL_P(standin->rb_nodes[standin_other]));
                KASSERT(!RB_SENTINEL_P(self->rb_nodes[standin_other]));
                KASSERT(self->rb_nodes[standin_which] == standin);
                /*
                 * Have our son/standin adopt his brother as his new son.
                 */
                standin_father = standin;
        } else {
                /*
                 * |    R          -->    S       .  |
                 * |   / \  |   T  -->   / \  |  /   |
                 * |  ..... | S    -->  ..... | T    |
                 *
                 * Sever standin's connection to his father.
                 */
                standin_father->rb_nodes[standin_which] = standin_son;
                /*
                 * Adopt the far son.
                 */
                standin->rb_nodes[standin_other] = self->rb_nodes[standin_other];
                RB_SET_FATHER(standin->rb_nodes[standin_other], standin);
                KASSERT(RB_POSITION(self->rb_nodes[standin_other]) == standin_other);
                /*
                 * Use standin_other because we need to preserve standin_which
                 * for the removal_rebalance.
                 */
                standin_other = standin_which;
        }

        /*
         * Move the only remaining son to our standin.  If our standin is our
         * son, this will be the only son needed to be moved.
         */
        KASSERT(standin->rb_nodes[standin_other] != self->rb_nodes[standin_other]);
        standin->rb_nodes[standin_other] = self->rb_nodes[standin_other];
        RB_SET_FATHER(standin->rb_nodes[standin_other], standin);

        /*
         * Now copy the result of self to standin and then replace
         * self with standin in the tree.
         */
        RB_COPY_PROPERTIES(standin, self);
        RB_SET_FATHER(standin, RB_FATHER(self));
        RB_FATHER(standin)->rb_nodes[RB_POSITION(standin)] = standin;

        /*
         * Remove ourselves from the node list, decrement the count,
         * and update min/max.
         */
        RB_TAILQ_REMOVE(&rbt->rbt_nodes, self, rb_link);
        RBSTAT_DEC(rbt->rbt_count);
#ifndef RBSMALL
        if (__predict_false(rbt->rbt_minmax[RB_POSITION(self)] == self))
                rbt->rbt_minmax[RB_POSITION(self)] = RB_FATHER(self);
        RB_SET_FATHER(self, NULL);
#endif

        KASSERT(rb_tree_check_node(rbt, standin, NULL, false));
        KASSERT(RB_FATHER_SENTINEL_P(standin)
                || rb_tree_check_node(rbt, standin_father, NULL, false));
        KASSERT(RB_LEFT_SENTINEL_P(standin)
                || rb_tree_check_node(rbt, standin->rb_left, NULL, false));
        KASSERT(RB_RIGHT_SENTINEL_P(standin)
                || rb_tree_check_node(rbt, standin->rb_right, NULL, false));

        if (!rebalance)
                return;

        rb_tree_removal_rebalance(rbt, standin_father, standin_which);
        KASSERT(rb_tree_check_node(rbt, standin, NULL, true));
}

/*
 * We could do this by doing
 *        rb_tree_node_swap(rbt, self, which);
 *        rb_tree_prune_node(rbt, self, false);
 *
 * But it's more efficient to just evalate and recolor the child.
 */
static void
rb_tree_prune_blackred_branch(struct rb_tree *rbt, struct rb_node *self,
        unsigned int which)
{
        struct rb_node *father = RB_FATHER(self);
        struct rb_node *son = self->rb_nodes[which];
#ifndef RBSMALL
        const bool was_root = RB_ROOT_P(rbt, self);
#endif

        KASSERT(which == RB_DIR_LEFT || which == RB_DIR_RIGHT);
        KASSERT(RB_BLACK_P(self) && RB_RED_P(son));
        KASSERT(!RB_TWOCHILDREN_P(son));
        KASSERT(RB_CHILDLESS_P(son));
        KASSERT(rb_tree_check_node(rbt, self, NULL, false));
        KASSERT(rb_tree_check_node(rbt, son, NULL, false));

        /*
         * Remove ourselves from the tree and give our former child our
         * properties (position, color, root).
         */
        RB_COPY_PROPERTIES(son, self);
        father->rb_nodes[RB_POSITION(son)] = son;
        RB_SET_FATHER(son, father);

        /*
         * Remove ourselves from the node list, decrement the count,
         * and update minmax.
         */
        RB_TAILQ_REMOVE(&rbt->rbt_nodes, self, rb_link);
        RBSTAT_DEC(rbt->rbt_count);
#ifndef RBSMALL
        if (__predict_false(was_root)) {
                KASSERT(rbt->rbt_minmax[which] == son);
                rbt->rbt_minmax[which ^ RB_DIR_OTHER] = son;
        } else if (rbt->rbt_minmax[RB_POSITION(self)] == self) {
                rbt->rbt_minmax[RB_POSITION(self)] = son;
        }
        RB_SET_FATHER(self, NULL);
#endif

        KASSERT(was_root || rb_tree_check_node(rbt, father, NULL, true));
        KASSERT(rb_tree_check_node(rbt, son, NULL, true));
}

void
rb_tree_remove_node(struct rb_tree *rbt, void *object)
{
        const rb_tree_ops_t *rbto = rbt->rbt_ops;
        struct rb_node *standin, *self = RB_ITEMTONODE(rbto, object);
        unsigned int which;

        KASSERT(!RB_SENTINEL_P(self));
        RBSTAT_INC(rbt->rbt_removals);

        /*
         * In the following diagrams, we (the node to be removed) are S.  Red
         * nodes are lowercase.  T could be either red or black.
         *
         * Remember the major axiom of the red-black tree: the number of
         * black nodes from the root to each leaf is constant across all
         * leaves, only the number of red nodes varies.
         *
         * Thus removing a red leaf doesn't require any other changes to a
         * red-black tree.  So if we must remove a node, attempt to rearrange
         * the tree so we can remove a red node.
         *
         * The simpliest case is a childless red node or a childless root node:
         *
         * |    T  -->    T  |    or    |  R  -->  *  |
         * |  s    -->  *    |
         */
        if (RB_CHILDLESS_P(self)) {
                const bool rebalance = RB_BLACK_P(self) && !RB_ROOT_P(rbt, self);
                rb_tree_prune_node(rbt, self, rebalance);
                return;
        }
        KASSERT(!RB_CHILDLESS_P(self));
        if (!RB_TWOCHILDREN_P(self)) {
                /*
                 * The next simpliest case is the node we are deleting is
                 * black and has one red child.
                 *
                 * |      T  -->      T  -->      T  |
                 * |    S    -->  R      -->  R      |
                 * |  r      -->    s    -->    *    |
                 */
                which = RB_LEFT_SENTINEL_P(self) ? RB_DIR_RIGHT : RB_DIR_LEFT;
                KASSERT(RB_BLACK_P(self));
                KASSERT(RB_RED_P(self->rb_nodes[which]));
                KASSERT(RB_CHILDLESS_P(self->rb_nodes[which]));
                rb_tree_prune_blackred_branch(rbt, self, which);
                return;
        }
        KASSERT(RB_TWOCHILDREN_P(self));

        /*
         * We invert these because we prefer to remove from the inside of
         * the tree.
         */
        which = RB_POSITION(self) ^ RB_DIR_OTHER;

        /*
         * Let's find the node closes to us opposite of our parent
         * Now swap it with ourself, "prune" it, and rebalance, if needed.
         */
        standin = RB_ITEMTONODE(rbto, rb_tree_iterate(rbt, object, which));
        rb_tree_swap_prune_and_rebalance(rbt, self, standin);
}

static void
rb_tree_removal_rebalance(struct rb_tree *rbt, struct rb_node *parent,
        unsigned int which)
{
        KASSERT(!RB_SENTINEL_P(parent));
        KASSERT(RB_SENTINEL_P(parent->rb_nodes[which]));
        KASSERT(which == RB_DIR_LEFT || which == RB_DIR_RIGHT);
        RBSTAT_INC(rbt->rbt_removal_rebalance_calls);

        while (RB_BLACK_P(parent->rb_nodes[which])) {
                unsigned int other = which ^ RB_DIR_OTHER;
                struct rb_node *brother = parent->rb_nodes[other];

                RBSTAT_INC(rbt->rbt_removal_rebalance_passes);

                KASSERT(!RB_SENTINEL_P(brother));
                /*
                 * For cases 1, 2a, and 2b, our brother's children must
                 * be black and our father must be black
                 */
                if (RB_BLACK_P(parent)
                    && RB_BLACK_P(brother->rb_left)
                    && RB_BLACK_P(brother->rb_right)) {
                        if (RB_RED_P(brother)) {
                                /*
                                 * Case 1: Our brother is red, swap its
                                 * position (and colors) with our parent. 
                                 * This should now be case 2b (unless C or E
                                 * has a red child which is case 3; thus no
                                 * explicit branch to case 2b).
                                 *
                                 *    B         ->        D
                                 *  A     d     ->    b     E
                                 *      C   E   ->  A   C
                                 */
                                KASSERT(RB_BLACK_P(parent));
                                rb_tree_reparent_nodes(rbt, parent, other);
                                brother = parent->rb_nodes[other];
                                KASSERT(!RB_SENTINEL_P(brother));
                                KASSERT(RB_RED_P(parent));
                                KASSERT(RB_BLACK_P(brother));
                                KASSERT(rb_tree_check_node(rbt, brother, NULL, false));
                                KASSERT(rb_tree_check_node(rbt, parent, NULL, false));
                        } else {
                                /*
                                 * Both our parent and brother are black.
                                 * Change our brother to red, advance up rank
                                 * and go through the loop again.
                                 *
                                 *    B         ->   *B
                                 * *A     D     ->  A     d
                                 *      C   E   ->      C   E
                                 */
                                RB_MARK_RED(brother);
                                KASSERT(RB_BLACK_P(brother->rb_left));
                                KASSERT(RB_BLACK_P(brother->rb_right));
                                if (RB_ROOT_P(rbt, parent))
                                        return;        /* root == parent == black */
                                KASSERT(rb_tree_check_node(rbt, brother, NULL, false));
                                KASSERT(rb_tree_check_node(rbt, parent, NULL, false));
                                which = RB_POSITION(parent);
                                parent = RB_FATHER(parent);
                                continue;
                        }
                }
                /*
                 * Avoid an else here so that case 2a above can hit either
                 * case 2b, 3, or 4.
                 */
                if (RB_RED_P(parent)
                    && RB_BLACK_P(brother)
                    && RB_BLACK_P(brother->rb_left)
                    && RB_BLACK_P(brother->rb_right)) {
                        KASSERT(RB_RED_P(parent));
                        KASSERT(RB_BLACK_P(brother));
                        KASSERT(RB_BLACK_P(brother->rb_left));
                        KASSERT(RB_BLACK_P(brother->rb_right));
                        /*
                         * We are black, our father is red, our brother and
                         * both nephews are black.  Simply invert/exchange the
                         * colors of our father and brother (to black and red
                         * respectively).
                         *
                         *        |    f        -->    F        |
                         *        |  *     B    -->  *     b    |
                         *        |      N   N  -->      N   N  |
                         */
                        RB_MARK_BLACK(parent);
                        RB_MARK_RED(brother);
                        KASSERT(rb_tree_check_node(rbt, brother, NULL, true));
                        break;                /* We're done! */
                } else {
                        /*
                         * Our brother must be black and have at least one
                         * red child (it may have two).
                         */
                        KASSERT(RB_BLACK_P(brother));
                        KASSERT(RB_RED_P(brother->rb_nodes[which]) ||
                                RB_RED_P(brother->rb_nodes[other]));
                        if (RB_BLACK_P(brother->rb_nodes[other])) {
                                /*
                                 * Case 3: our brother is black, our near
                                 * nephew is red, and our far nephew is black.
                                 * Swap our brother with our near nephew.  
                                 * This result in a tree that matches case 4.
                                 * (Our father could be red or black).
                                 *
                                 *        |    F      -->    F      |
                                 *        |  x     B  -->  x   B    |
                                 *        |      n    -->        n  |
                                 */
                                KASSERT(RB_RED_P(brother->rb_nodes[which]));
                                rb_tree_reparent_nodes(rbt, brother, which);
                                KASSERT(RB_FATHER(brother) == parent->rb_nodes[other]);
                                brother = parent->rb_nodes[other];
                                KASSERT(RB_RED_P(brother->rb_nodes[other]));
                        }
                        /*
                         * Case 4: our brother is black and our far nephew
                         * is red.  Swap our father and brother locations and
                         * change our far nephew to black.  (these can be
                         * done in either order so we change the color first).
                         * The result is a valid red-black tree and is a
                         * terminal case.  (again we don't care about the
                         * father's color)
                         *
                         * If the father is red, we will get a red-black-black
                         * tree:
                         *        |  f      ->  f      -->    b    |
                         *        |    B    ->    B    -->  F   N  |
                         *        |      n  ->      N  -->         |
                         *
                         * If the father is black, we will get an all black
                         * tree:
                         *        |  F      ->  F      -->    B    |
                         *        |    B    ->    B    -->  F   N  |
                         *        |      n  ->      N  -->         |
                         *
                         * If we had two red nephews, then after the swap,
                         * our former father would have a red grandson. 
                         */
                        KASSERT(RB_BLACK_P(brother));
                        KASSERT(RB_RED_P(brother->rb_nodes[other]));
                        RB_MARK_BLACK(brother->rb_nodes[other]);
                        rb_tree_reparent_nodes(rbt, parent, other);
                        break;                /* We're done! */
                }
        }
        KASSERT(rb_tree_check_node(rbt, parent, NULL, true));
}

void *
rb_tree_iterate(struct rb_tree *rbt, void *object, const unsigned int direction)
{
        const rb_tree_ops_t *rbto = rbt->rbt_ops;
        const unsigned int other = direction ^ RB_DIR_OTHER;
        struct rb_node *self;

        KASSERT(direction == RB_DIR_LEFT || direction == RB_DIR_RIGHT);

        if (object == NULL) {
#ifndef RBSMALL
                if (RB_SENTINEL_P(rbt->rbt_root))
                        return NULL;
                return RB_NODETOITEM(rbto, rbt->rbt_minmax[direction]);
#else
                self = rbt->rbt_root;
                if (RB_SENTINEL_P(self))
                        return NULL;
                while (!RB_SENTINEL_P(self->rb_nodes[direction]))
                        self = self->rb_nodes[direction];
                return RB_NODETOITEM(rbto, self);
#endif /* !RBSMALL */
        }
        self = RB_ITEMTONODE(rbto, object);
        KASSERT(!RB_SENTINEL_P(self));
        /*
         * We can't go any further in this direction.  We proceed up in the
         * opposite direction until our parent is in direction we want to go.
         */
        if (RB_SENTINEL_P(self->rb_nodes[direction])) {
                while (!RB_ROOT_P(rbt, self)) {
                        if (other == RB_POSITION(self))
                                return RB_NODETOITEM(rbto, RB_FATHER(self));
                        self = RB_FATHER(self);
                }
                return NULL;
        }

        /*
         * Advance down one in current direction and go down as far as possible
         * in the opposite direction.
         */
        self = self->rb_nodes[direction];
        KASSERT(!RB_SENTINEL_P(self));
        while (!RB_SENTINEL_P(self->rb_nodes[other]))
                self = self->rb_nodes[other];
        return RB_NODETOITEM(rbto, self);
}

#ifdef RBDEBUG
static const struct rb_node *
rb_tree_iterate_const(const struct rb_tree *rbt, const struct rb_node *self,
        const unsigned int direction)
{
        const unsigned int other = direction ^ RB_DIR_OTHER;
        KASSERT(direction == RB_DIR_LEFT || direction == RB_DIR_RIGHT);

        if (self == NULL) {
#ifndef RBSMALL
                if (RB_SENTINEL_P(rbt->rbt_root))
                        return NULL;
                return rbt->rbt_minmax[direction];
#else
                self = rbt->rbt_root;
                if (RB_SENTINEL_P(self))
                        return NULL;
                while (!RB_SENTINEL_P(self->rb_nodes[direction]))
                        self = self->rb_nodes[direction];
                return self;
#endif /* !RBSMALL */
        }
        KASSERT(!RB_SENTINEL_P(self));
        /*
         * We can't go any further in this direction.  We proceed up in the
         * opposite direction until our parent is in direction we want to go.
         */
        if (RB_SENTINEL_P(self->rb_nodes[direction])) {
                while (!RB_ROOT_P(rbt, self)) {
                        if (other == RB_POSITION(self))
                                return RB_FATHER(self);
                        self = RB_FATHER(self);
                }
                return NULL;
        }

        /*
         * Advance down one in current direction and go down as far as possible
         * in the opposite direction.
         */
        self = self->rb_nodes[direction];
        KASSERT(!RB_SENTINEL_P(self));
        while (!RB_SENTINEL_P(self->rb_nodes[other]))
                self = self->rb_nodes[other];
        return self;
}

static unsigned int
rb_tree_count_black(const struct rb_node *self)
{
        unsigned int left, right;

        if (RB_SENTINEL_P(self))
                return 0;

        left = rb_tree_count_black(self->rb_left);
        right = rb_tree_count_black(self->rb_right);

        KASSERT(left == right);

        return left + RB_BLACK_P(self);
}

static bool
rb_tree_check_node(const struct rb_tree *rbt, const struct rb_node *self,
        const struct rb_node *prev, bool red_check)
{
        const rb_tree_ops_t *rbto = rbt->rbt_ops;
        rbto_compare_nodes_fn compare_nodes = rbto->rbto_compare_nodes;

        KASSERT(!RB_SENTINEL_P(self));
        KASSERT(prev == NULL || (*compare_nodes)(rbto->rbto_context,
            RB_NODETOITEM(rbto, prev), RB_NODETOITEM(rbto, self)) < 0);

        /*
         * Verify our relationship to our parent.
         */
        if (RB_ROOT_P(rbt, self)) {
                KASSERT(self == rbt->rbt_root);
                KASSERT(RB_POSITION(self) == RB_DIR_LEFT);
                KASSERT(RB_FATHER(self)->rb_nodes[RB_DIR_LEFT] == self);
                KASSERT(RB_FATHER(self) == (const struct rb_node *) &rbt->rbt_root);
        } else {
                int diff = (*compare_nodes)(rbto->rbto_context,
                    RB_NODETOITEM(rbto, self),
                    RB_NODETOITEM(rbto, RB_FATHER(self)));

                KASSERT(self != rbt->rbt_root);
                KASSERT(!RB_FATHER_SENTINEL_P(self));
                if (RB_POSITION(self) == RB_DIR_LEFT) {
                        KASSERT(diff < 0);
                        KASSERT(RB_FATHER(self)->rb_nodes[RB_DIR_LEFT] == self);
                } else {
                        KASSERT(diff > 0);
                        KASSERT(RB_FATHER(self)->rb_nodes[RB_DIR_RIGHT] == self);
                }
        }

        /*
         * Verify our position in the linked list against the tree itself.
         */
        {
                const struct rb_node *prev0 = rb_tree_iterate_const(rbt, self, RB_DIR_LEFT);
                const struct rb_node *next0 = rb_tree_iterate_const(rbt, self, RB_DIR_RIGHT);
                KASSERT(prev0 == TAILQ_PREV(self, rb_node_qh, rb_link));
                KASSERT(next0 == TAILQ_NEXT(self, rb_link));
#ifndef RBSMALL
                KASSERT(prev0 != NULL || self == rbt->rbt_minmax[RB_DIR_LEFT]);
                KASSERT(next0 != NULL || self == rbt->rbt_minmax[RB_DIR_RIGHT]);
#endif
        }

        /*
         * The root must be black.
         * There can never be two adjacent red nodes. 
         */
        if (red_check) {
                KASSERT(!RB_ROOT_P(rbt, self) || RB_BLACK_P(self));
                (void) rb_tree_count_black(self);
                if (RB_RED_P(self)) {
                        const struct rb_node *brother;
                        KASSERT(!RB_ROOT_P(rbt, self));
                        brother = RB_FATHER(self)->rb_nodes[RB_POSITION(self) ^ RB_DIR_OTHER];
                        KASSERT(RB_BLACK_P(RB_FATHER(self)));
                        /* 
                         * I'm red and have no children, then I must either
                         * have no brother or my brother also be red and
                         * also have no children.  (black count == 0)
                         */
                        KASSERT(!RB_CHILDLESS_P(self)
                                || RB_SENTINEL_P(brother)
                                || RB_RED_P(brother)
                                || RB_CHILDLESS_P(brother));
                        /*
                         * If I'm not childless, I must have two children
                         * and they must be both be black.
                         */
                        KASSERT(RB_CHILDLESS_P(self)
                                || (RB_TWOCHILDREN_P(self)
                                    && RB_BLACK_P(self->rb_left)
                                    && RB_BLACK_P(self->rb_right)));
                        /*
                         * If I'm not childless, thus I have black children,
                         * then my brother must either be black or have two
                         * black children.
                         */
                        KASSERT(RB_CHILDLESS_P(self)
                                || RB_BLACK_P(brother)
                                || (RB_TWOCHILDREN_P(brother)
                                    && RB_BLACK_P(brother->rb_left)
                                    && RB_BLACK_P(brother->rb_right)));
                } else {
                        /*
                         * If I'm black and have one child, that child must
                         * be red and childless.
                         */
                        KASSERT(RB_CHILDLESS_P(self)
                                || RB_TWOCHILDREN_P(self)
                                || (!RB_LEFT_SENTINEL_P(self)
                                    && RB_RIGHT_SENTINEL_P(self)
                                    && RB_RED_P(self->rb_left)
                                    && RB_CHILDLESS_P(self->rb_left))
                                || (!RB_RIGHT_SENTINEL_P(self)
                                    && RB_LEFT_SENTINEL_P(self)
                                    && RB_RED_P(self->rb_right)
                                    && RB_CHILDLESS_P(self->rb_right)));

                        /*
                         * If I'm a childless black node and my parent is
                         * black, my 2nd closet relative away from my parent
                         * is either red or has a red parent or red children.
                         */
                        if (!RB_ROOT_P(rbt, self)
                            && RB_CHILDLESS_P(self)
                            && RB_BLACK_P(RB_FATHER(self))) {
                                const unsigned int which = RB_POSITION(self);
                                const unsigned int other = which ^ RB_DIR_OTHER;
                                const struct rb_node *relative0, *relative;

                                relative0 = rb_tree_iterate_const(rbt,
                                    self, other);
                                KASSERT(relative0 != NULL);
                                relative = rb_tree_iterate_const(rbt,
                                    relative0, other);
                                KASSERT(relative != NULL);
                                KASSERT(RB_SENTINEL_P(relative->rb_nodes[which]));
#if 0
                                KASSERT(RB_RED_P(relative)
                                        || RB_RED_P(relative->rb_left)
                                        || RB_RED_P(relative->rb_right)
                                        || RB_RED_P(RB_FATHER(relative)));
#endif
                        }
                }
                /*
                 * A grandparent's children must be real nodes and not
                 * sentinels.  First check out grandparent.
                 */
                KASSERT(RB_ROOT_P(rbt, self)
                        || RB_ROOT_P(rbt, RB_FATHER(self))
                        || RB_TWOCHILDREN_P(RB_FATHER(RB_FATHER(self))));
                /*
                 * If we are have grandchildren on our left, then
                 * we must have a child on our right.
                 */
                KASSERT(RB_LEFT_SENTINEL_P(self)
                        || RB_CHILDLESS_P(self->rb_left)
                        || !RB_RIGHT_SENTINEL_P(self));
                /*
                 * If we are have grandchildren on our right, then
                 * we must have a child on our left.
                 */
                KASSERT(RB_RIGHT_SENTINEL_P(self)
                        || RB_CHILDLESS_P(self->rb_right)
                        || !RB_LEFT_SENTINEL_P(self));

                /*
                 * If we have a child on the left and it doesn't have two
                 * children make sure we don't have great-great-grandchildren on
                 * the right.
                 */
                KASSERT(RB_TWOCHILDREN_P(self->rb_left)
                        || RB_CHILDLESS_P(self->rb_right)
                        || RB_CHILDLESS_P(self->rb_right->rb_left)
                        || RB_CHILDLESS_P(self->rb_right->rb_left->rb_left)
                        || RB_CHILDLESS_P(self->rb_right->rb_left->rb_right)
                        || RB_CHILDLESS_P(self->rb_right->rb_right)
                        || RB_CHILDLESS_P(self->rb_right->rb_right->rb_left)
                        || RB_CHILDLESS_P(self->rb_right->rb_right->rb_right));

                /*
                 * If we have a child on the right and it doesn't have two
                 * children make sure we don't have great-great-grandchildren on
                 * the left.
                 */
                KASSERT(RB_TWOCHILDREN_P(self->rb_right)
                        || RB_CHILDLESS_P(self->rb_left)
                        || RB_CHILDLESS_P(self->rb_left->rb_left)
                        || RB_CHILDLESS_P(self->rb_left->rb_left->rb_left)
                        || RB_CHILDLESS_P(self->rb_left->rb_left->rb_right)
                        || RB_CHILDLESS_P(self->rb_left->rb_right)
                        || RB_CHILDLESS_P(self->rb_left->rb_right->rb_left)
                        || RB_CHILDLESS_P(self->rb_left->rb_right->rb_right));

                /*
                 * If we are fully interior node, then our predecessors and
                 * successors must have no children in our direction.
                 */
                if (RB_TWOCHILDREN_P(self)) {
                        const struct rb_node *prev0;
                        const struct rb_node *next0;

                        prev0 = rb_tree_iterate_const(rbt, self, RB_DIR_LEFT);
                        KASSERT(prev0 != NULL);
                        KASSERT(RB_RIGHT_SENTINEL_P(prev0));

                        next0 = rb_tree_iterate_const(rbt, self, RB_DIR_RIGHT);
                        KASSERT(next0 != NULL);
                        KASSERT(RB_LEFT_SENTINEL_P(next0));
                }
        }

        return true;
}

void
rb_tree_check(const struct rb_tree *rbt, bool red_check)
{
        const struct rb_node *self;
        const struct rb_node *prev;
#ifdef RBSTATS
        unsigned int count = 0;
#endif

        KASSERT(rbt->rbt_root != NULL);
        KASSERT(RB_LEFT_P(rbt->rbt_root));

#if defined(RBSTATS) && !defined(RBSMALL)
        KASSERT(rbt->rbt_count > 1
            || rbt->rbt_minmax[RB_DIR_LEFT] == rbt->rbt_minmax[RB_DIR_RIGHT]);
#endif

        prev = NULL;
        TAILQ_FOREACH(self, &rbt->rbt_nodes, rb_link) {
                rb_tree_check_node(rbt, self, prev, false);
#ifdef RBSTATS
                count++;
#endif
        }
#ifdef RBSTATS
        KASSERT(rbt->rbt_count == count);
#endif
        if (red_check) {
                KASSERT(RB_BLACK_P(rbt->rbt_root));
                KASSERT(RB_SENTINEL_P(rbt->rbt_root)
                        || rb_tree_count_black(rbt->rbt_root));

                /*
                 * The root must be black.
                 * There can never be two adjacent red nodes. 
                 */
                TAILQ_FOREACH(self, &rbt->rbt_nodes, rb_link) {
                        rb_tree_check_node(rbt, self, NULL, true);
                }
        }
}
#endif /* RBDEBUG */

#ifdef RBSTATS
static void
rb_tree_mark_depth(const struct rb_tree *rbt, const struct rb_node *self,
        size_t *depths, size_t depth)
{
        if (RB_SENTINEL_P(self))
                return;

        if (RB_TWOCHILDREN_P(self)) {
                rb_tree_mark_depth(rbt, self->rb_left, depths, depth + 1);
                rb_tree_mark_depth(rbt, self->rb_right, depths, depth + 1);
                return;
        }
        depths[depth]++;
        if (!RB_LEFT_SENTINEL_P(self)) {
                rb_tree_mark_depth(rbt, self->rb_left, depths, depth + 1);
        }
        if (!RB_RIGHT_SENTINEL_P(self)) {
                rb_tree_mark_depth(rbt, self->rb_right, depths, depth + 1);
        }
}

void
rb_tree_depths(const struct rb_tree *rbt, size_t *depths)
{
        rb_tree_mark_depth(rbt, rbt->rbt_root, depths, 1);
}
#endif /* RBSTATS */















































































































































    5 









    1 











    2 














    1 



















    2 

    2 



























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
/*        $NetBSD: hci_ioctl.c,v 1.15 2021/09/21 15:03:08 christos Exp $        */

/*-
 * Copyright (c) 2005 Iain Hibbert.
 * Copyright (c) 2006 Itronix Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of Itronix Inc. may not be used to endorse
 *    or promote products derived from this software without specific
 *    prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: hci_ioctl.c,v 1.15 2021/09/21 15:03:08 christos Exp $");

#include <sys/param.h>
#include <sys/domain.h>
#include <sys/ioctl.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/systm.h>

#include <netbt/bluetooth.h>
#include <netbt/hci.h>
#include <netbt/l2cap.h>
#include <netbt/rfcomm.h>

#ifdef BLUETOOTH_DEBUG
#define BDADDR(bd)        (bd).b[5], (bd).b[4], (bd).b[3],        \
                        (bd).b[2], (bd).b[1], (bd).b[0]

static void
hci_dump(void)
{
        struct hci_unit *unit;
        struct hci_link *link;
        struct l2cap_channel *chan;
        struct rfcomm_session *rs;
        struct rfcomm_dlc *dlc;

        uprintf("HCI:\n");
        SIMPLEQ_FOREACH(unit, &hci_unit_list, hci_next) {
                uprintf("UNIT %s: flags 0x%4.4x, "
                        "num_cmd=%d, num_acl=%d, num_sco=%d\n",
                        device_xname(unit->hci_dev), unit->hci_flags,
                        unit->hci_num_cmd_pkts,
                        unit->hci_num_acl_pkts,
                        unit->hci_num_sco_pkts);
                TAILQ_FOREACH(link, &unit->hci_links, hl_next) {
                        uprintf("+HANDLE #%d: %s "
                            "raddr=%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x, "
                            "state %d, refcnt %d\n",
                            link->hl_handle,
                            (link->hl_type == HCI_LINK_ACL ? "ACL":"SCO"),
                            BDADDR(link->hl_bdaddr),
                            link->hl_state, link->hl_refcnt);
                }
        }

        uprintf("L2CAP:\n");
        LIST_FOREACH(chan, &l2cap_active_list, lc_ncid) {
                uprintf("CID #%d state %d, psm=0x%4.4x, "
                    "laddr=%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x, "
                    "raddr=%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n",
                    chan->lc_lcid, chan->lc_state, chan->lc_raddr.bt_psm,
                    BDADDR(chan->lc_laddr.bt_bdaddr),
                    BDADDR(chan->lc_raddr.bt_bdaddr));
        }

        LIST_FOREACH(chan, &l2cap_listen_list, lc_ncid) {
                uprintf("LISTEN psm=0x%4.4x, "
                    "laddr=%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n",
                    chan->lc_laddr.bt_psm,
                    BDADDR(chan->lc_laddr.bt_bdaddr));
        }

        uprintf("RFCOMM:\n");
        LIST_FOREACH(rs, &rfcomm_session_active, rs_next) {
                chan = rs->rs_l2cap;
                uprintf("SESSION: state=%d, flags=0x%4.4x, psm 0x%4.4x "
                    "laddr=%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x, "
                    "raddr=%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n",
                    rs->rs_state, rs->rs_flags, chan->lc_raddr.bt_psm,
                    BDADDR(chan->lc_laddr.bt_bdaddr),
                    BDADDR(chan->lc_raddr.bt_bdaddr));
                LIST_FOREACH(dlc, &rs->rs_dlcs, rd_next) {
                        uprintf("+DLC channel=%d, dlci=%d, "
                            "state=%d, flags=0x%4.4x, rxcred=%d, rxsize=%ld, "
                            "txcred=%d, pending=%d, txqlen=%d\n",
                            dlc->rd_raddr.bt_channel, dlc->rd_dlci,
                            dlc->rd_state, dlc->rd_flags,
                            dlc->rd_rxcred, (unsigned long)dlc->rd_rxsize,
                            dlc->rd_txcred, dlc->rd_pending,
                            (dlc->rd_txbuf ? dlc->rd_txbuf->m_pkthdr.len : 0));
                }
        }

        LIST_FOREACH(rs, &rfcomm_session_listen, rs_next) {
                chan = rs->rs_l2cap;
                uprintf("LISTEN: psm 0x%4.4x, "
                    "laddr=%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n",
                    chan->lc_laddr.bt_psm,
                    BDADDR(chan->lc_laddr.bt_bdaddr));
                LIST_FOREACH(dlc, &rs->rs_dlcs, rd_next)
                        uprintf("+DLC channel=%d\n", dlc->rd_laddr.bt_channel);
        }
}

#undef BDADDR
#endif

int
hci_ioctl_pcb(unsigned long cmd, void *data)
{
        struct btreq *btr = data;
        struct hci_unit *unit;
        int err = 0;

        DPRINTFN(1, "cmd %#lx\n", cmd);

        switch(cmd) {
#ifdef BLUETOOTH_DEBUG
        case SIOCBTDUMP:
                hci_dump();
                return 0;
#endif
        /*
         * Get unit info based on address rather than name
         */
        case SIOCGBTINFOA:
                unit = hci_unit_lookup(&btr->btr_bdaddr);
                if (unit == NULL)
                        return ENXIO;

                break;

        /*
         * The remaining ioctl's all use the same btreq structure and
         * index on the name of the device, so we look that up first.
         */
        case SIOCNBTINFO:
                /* empty name means give the first unit */
                if (btr->btr_name[0] == '\0') {
                        unit = NULL;
                        break;
                }

                /* else fall through and look it up */
                /* FALLTHROUGH */
        case SIOCGBTINFO:
        case SIOCSBTFLAGS:
        case SIOCSBTPOLICY:
        case SIOCSBTPTYPE:
        case SIOCGBTSTATS:
        case SIOCZBTSTATS:
        case SIOCSBTSCOMTU:
        case SIOCGBTFEAT:
                SIMPLEQ_FOREACH(unit, &hci_unit_list, hci_next) {
                        if (strncmp(device_xname(unit->hci_dev),
                            btr->btr_name, HCI_DEVNAME_SIZE) == 0)
                                break;
                }

                if (unit == NULL)
                        return ENXIO;

                break;

        default:        /* not one of mine */
                return EPASSTHROUGH;
        }

        switch(cmd) {
        case SIOCNBTINFO:        /* get next info */
                if (unit)
                        unit = SIMPLEQ_NEXT(unit, hci_next);
                else
                        unit = SIMPLEQ_FIRST(&hci_unit_list);

                if (unit == NULL) {
                        err = ENXIO;
                        break;
                }

                /* FALLTHROUGH */
        case SIOCGBTINFO:        /* get unit info */
                /* FALLTHROUGH */
        case SIOCGBTINFOA:        /* get info by address */
                memset(btr, 0, sizeof(struct btreq));
                strlcpy(btr->btr_name, device_xname(unit->hci_dev), HCI_DEVNAME_SIZE);
                bdaddr_copy(&btr->btr_bdaddr, &unit->hci_bdaddr);

                btr->btr_flags = unit->hci_flags;

                btr->btr_num_cmd = unit->hci_num_cmd_pkts;
                btr->btr_num_acl = unit->hci_num_acl_pkts;
                btr->btr_num_sco = unit->hci_num_sco_pkts;
                btr->btr_acl_mtu = unit->hci_max_acl_size;
                btr->btr_sco_mtu = unit->hci_max_sco_size;
                btr->btr_max_acl = unit->hci_max_acl_pkts;
                btr->btr_max_sco = unit->hci_max_sco_pkts;

                btr->btr_packet_type = unit->hci_packet_type;
                btr->btr_link_policy = unit->hci_link_policy;
                break;

        case SIOCSBTFLAGS:        /* set unit flags (privileged) */
                err = kauth_authorize_device(kauth_cred_get(),
                    KAUTH_DEVICE_BLUETOOTH_SETPRIV, unit, KAUTH_ARG(cmd),
                    btr, NULL);
                if (err)
                        break;

                if ((unit->hci_flags & BTF_UP)
                    && (btr->btr_flags & BTF_UP) == 0) {
                        hci_disable(unit);
                        unit->hci_flags &= ~BTF_UP;
                }

                unit->hci_flags &= ~BTF_MASTER;
                unit->hci_flags |= (btr->btr_flags & (BTF_INIT | BTF_MASTER));

                if ((unit->hci_flags & BTF_UP) == 0
                    && (btr->btr_flags & BTF_UP)) {
                        err = hci_enable(unit);
                        if (err)
                                break;

                        unit->hci_flags |= BTF_UP;
                }

                btr->btr_flags = unit->hci_flags;
                break;

        case SIOCSBTPOLICY:        /* set unit link policy (privileged) */
                err = kauth_authorize_device(kauth_cred_get(),
                    KAUTH_DEVICE_BLUETOOTH_SETPRIV, unit, KAUTH_ARG(cmd),
                    btr, NULL);
                if (err)
                        break;

                unit->hci_link_policy = btr->btr_link_policy;
                unit->hci_link_policy &= unit->hci_lmp_mask;
                btr->btr_link_policy = unit->hci_link_policy;
                break;

        case SIOCSBTPTYPE:        /* set unit packet types (privileged) */
                err = kauth_authorize_device(kauth_cred_get(),
                    KAUTH_DEVICE_BLUETOOTH_SETPRIV, unit, KAUTH_ARG(cmd),
                    btr, NULL);
                if (err)
                        break;

                unit->hci_packet_type = btr->btr_packet_type;
                unit->hci_packet_type &= unit->hci_acl_mask;
                btr->btr_packet_type = unit->hci_packet_type;
                break;

        case SIOCGBTSTATS:        /* get unit statistics */
                (*unit->hci_if->get_stats)(unit->hci_dev, &btr->btr_stats, 0);
                break;

        case SIOCZBTSTATS:        /* get & reset unit statistics */
                err = kauth_authorize_device(kauth_cred_get(),
                    KAUTH_DEVICE_BLUETOOTH_SETPRIV, unit, KAUTH_ARG(cmd),
                    btr, NULL);
                if (err)
                        break;

                (*unit->hci_if->get_stats)(unit->hci_dev, &btr->btr_stats, 1);
                break;

        case SIOCSBTSCOMTU:        /* set sco_mtu value for unit */
                /*
                 * This is a temporary ioctl and may not be supported
                 * in the future. The need is that if SCO packets are
                 * sent to USB bluetooth controllers that are not an
                 * integer number of frame sizes, the USB bus locks up.
                 */
                err = kauth_authorize_device(kauth_cred_get(),
                    KAUTH_DEVICE_BLUETOOTH_SETPRIV, unit, KAUTH_ARG(cmd),
                    btr, NULL);
                if (err)
                        break;

                unit->hci_max_sco_size = btr->btr_sco_mtu;
                break;

        case SIOCGBTFEAT:        /* get unit features */
                memset(btr, 0, sizeof(struct btreq));
                strlcpy(btr->btr_name, device_xname(unit->hci_dev), HCI_DEVNAME_SIZE);
                memcpy(btr->btr_features0, unit->hci_feat0, HCI_FEATURES_SIZE);
                memcpy(btr->btr_features1, unit->hci_feat1, HCI_FEATURES_SIZE);
                memcpy(btr->btr_features2, unit->hci_feat2, HCI_FEATURES_SIZE);
                break;

        default:
                err = EFAULT;
                break;
        }

        return err;
}











































































































































































































































































    3 




    3 































    3 

    3 
























    3 











    1 




    1 
    1 


















    3 




    3 




    3 


    3 

    3 





    3 






    3 


    3 



















    1 





    1 





    1 





    1 




    1 



    1 




    1 









    1 




    1 




    1 






    1 
    1 



    1 





















    4 
























    4 





































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 




    1 













































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
/*        $NetBSD: if_ppp.c,v 1.169 2022/07/06 08:07:23 riastradh Exp $        */
/*        Id: if_ppp.c,v 1.6 1997/03/04 03:33:00 paulus Exp         */

/*
 * if_ppp.c - Point-to-Point Protocol (PPP) Asynchronous driver.
 *
 * Copyright (c) 1984-2000 Carnegie Mellon University. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The name "Carnegie Mellon University" must not be used to
 *    endorse or promote products derived from this software without
 *    prior written permission. For permission or any legal
 *    details, please contact
 *      Office of Technology Transfer
 *      Carnegie Mellon University
 *      5000 Forbes Avenue
 *      Pittsburgh, PA  15213-3890
 *      (412) 268-4387, fax: (412) 268-7395
 *      tech-transfer@andrew.cmu.edu
 *
 * 4. Redistributions of any form whatsoever must retain the following
 *    acknowledgment:
 *    "This product includes software developed by Computing Services
 *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
 *
 * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
 * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 * Based on:
 *        @(#)if_sl.c        7.6.1.2 (Berkeley) 2/15/89
 *
 * Copyright (c) 1987 Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms are permitted
 * provided that the above copyright notice and this paragraph are
 * duplicated in all such forms and that any documentation,
 * advertising materials, and other materials related to such
 * distribution and use acknowledge that the software was developed
 * by the University of California, Berkeley.  The name of the
 * University may not be used to endorse or promote products derived
 * from this software without specific prior written permission.
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Serial Line interface
 *
 * Rick Adams
 * Center for Seismic Studies
 * 1300 N 17th Street, Suite 1450
 * Arlington, Virginia 22209
 * (703)276-7900
 * rick@seismo.ARPA
 * seismo!rick
 *
 * Pounded on heavily by Chris Torek (chris@mimsy.umd.edu, umcp-cs!chris).
 * Converted to 4.3BSD Beta by Chris Torek.
 * Other changes made at Berkeley, based in part on code by Kirk Smith.
 *
 * Converted to 4.3BSD+ 386BSD by Brad Parker (brad@cayman.com)
 * Added VJ tcp header compression; more unified ioctls
 *
 * Extensively modified by Paul Mackerras (paulus@cs.anu.edu.au).
 * Cleaned up a lot of the mbuf-related code to fix bugs that
 * caused system crashes and packet corruption.  Changed pppstart
 * so that it doesn't just give up with a collision if the whole
 * packet doesn't fit in the output ring buffer.
 *
 * Added priority queueing for interactive IP packets, following
 * the model of if_sl.c, plus hooks for bpf.
 * Paul Mackerras (paulus@cs.anu.edu.au).
 */

/* from if_sl.c,v 1.11 84/10/04 12:54:47 rick Exp */
/* from NetBSD: if_ppp.c,v 1.15.2.2 1994/07/28 05:17:58 cgd Exp */

/*
 * XXX IMP ME HARDER
 *
 * This is an explanation of that comment.  This code used to use
 * splimp() to block both network and tty interrupts.  However,
 * that call is deprecated.  So, we have replaced the uses of
 * splimp() with splhigh() in order to applomplish what it needs
 * to accomplish, and added that happy little comment.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_ppp.c,v 1.169 2022/07/06 08:07:23 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "ppp.h"
#include "opt_inet.h"
#include "opt_gateway.h"
#include "opt_ppp.h"
#endif

#ifdef INET
#define VJC
#endif
#define PPP_COMPRESS

#include <sys/param.h>
#include <sys/proc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/once.h>
#include <sys/conf.h>
#include <sys/kauth.h>
#include <sys/intr.h>
#include <sys/socketvar.h>
#include <sys/device.h>
#include <sys/module.h>

#include <net/if.h>
#include <net/if_types.h>
#include <net/netisr.h>
#include <net/route.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#ifdef INET
#include <netinet/ip.h>
#endif

#include <net/bpf.h>
#include <net/slip.h>

#ifdef VJC
#include <net/slcompress.h>
#endif

#include <net/ppp_defs.h>
#include <net/if_ppp.h>
#include <net/if_pppvar.h>
#include <sys/cpu.h>

#ifdef PPP_COMPRESS
#define PACKETPTR        struct mbuf *
#include <net/ppp-comp.h>
#endif

#include "ioconf.h"

static int        pppsioctl(struct ifnet *, u_long, void *);
static void        ppp_requeue(struct ppp_softc *);
static void        ppp_ccp(struct ppp_softc *, struct mbuf *m, int rcvd);
static void        ppp_ccp_closed(struct ppp_softc *);
static void        ppp_inproc(struct ppp_softc *, struct mbuf *);
static void        pppdumpm(struct mbuf *m0);
#ifdef ALTQ
static void        ppp_ifstart(struct ifnet *ifp);
#endif

static void        pppintr(void *);

extern struct linesw ppp_disc;

/*
 * We define two link layer specific mbuf flags, to mark high-priority
 * packets for output, and received packets following lost/corrupted
 * packets.
 */
#define        M_HIGHPRI        M_LINK0        /* output packet for sc_fastq */
#define        M_ERRMARK        M_LINK1        /* rx packet following lost/corrupted pkt */

static int ppp_clone_create(struct if_clone *, int);
static int ppp_clone_destroy(struct ifnet *);

static struct ppp_softc *ppp_create(const char *, int);

static struct {
        LIST_HEAD(ppp_sclist, ppp_softc) list;
        kmutex_t lock;
} ppp_softcs __cacheline_aligned;

struct if_clone ppp_cloner =
    IF_CLONE_INITIALIZER("ppp", ppp_clone_create, ppp_clone_destroy);

#ifdef PPP_COMPRESS
static LIST_HEAD(, compressor) ppp_compressors = { NULL };
static kmutex_t ppp_compressors_mtx;

static int ppp_compressor_init(void);
static int ppp_compressor_destroy(void);
static struct compressor *ppp_get_compressor(uint8_t);
static void ppp_compressor_rele(struct compressor *);
#endif /* PPP_COMPRESS */


/*
 * Called from boot code to establish ppp interfaces.
 */
void
pppattach(int n __unused)
{

        /*
         * Nothing to do here, initialization is handled by the
         * module initialization code in pppinit() below).
         */
}

static void
pppinit(void)
{
        /* Init the compressor sub-sub-system */
        ppp_compressor_init();

        if (ttyldisc_attach(&ppp_disc) != 0)
                panic("%s", __func__);

        mutex_init(&ppp_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
        LIST_INIT(&ppp_softcs.list);
        if_clone_attach(&ppp_cloner);
}

static int
pppdetach(void)
{
        int error = 0;

        if (!LIST_EMPTY(&ppp_softcs.list))
                error = EBUSY;

        if (error == 0)
                error = ttyldisc_detach(&ppp_disc);

        if (error == 0) {
                mutex_destroy(&ppp_softcs.lock);
                if_clone_detach(&ppp_cloner);
                ppp_compressor_destroy();
        }

        return error;
}

static struct ppp_softc *
ppp_create(const char *name, int unit)
{
        struct ppp_softc *sc, *sci, *scl = NULL;

        sc = malloc(sizeof(*sc), M_DEVBUF, M_WAIT|M_ZERO);

        mutex_enter(&ppp_softcs.lock);
        if (unit == -1) {
                int i = 0;
                LIST_FOREACH(sci, &ppp_softcs.list, sc_iflist) {
                        scl = sci;
                        if (i < sci->sc_unit) {
                                unit = i;
                                break;
                        } else {
#ifdef DIAGNOSTIC
                                KASSERT(i == sci->sc_unit);
#endif
                                i++;
                        }
                }
                if (unit == -1)
                        unit = i;
        } else {
                LIST_FOREACH(sci, &ppp_softcs.list, sc_iflist) {
                        scl = sci;
                        if (unit < sci->sc_unit)
                                break;
                        else if (unit == sci->sc_unit) {
                                free(sc, M_DEVBUF);
                                mutex_exit(&ppp_softcs.lock);
                                return NULL;
                        }
                }
        }

        if (sci != NULL)
                LIST_INSERT_BEFORE(sci, sc, sc_iflist);
        else if (scl != NULL)
                LIST_INSERT_AFTER(scl, sc, sc_iflist);
        else
                LIST_INSERT_HEAD(&ppp_softcs.list, sc, sc_iflist);

        mutex_exit(&ppp_softcs.lock);

        if_initname(&sc->sc_if, name, sc->sc_unit = unit);
        callout_init(&sc->sc_timo_ch, 0);
        sc->sc_if.if_softc = sc;
        sc->sc_if.if_mtu = PPP_MTU;
        sc->sc_if.if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
        sc->sc_if.if_type = IFT_PPP;
        sc->sc_if.if_hdrlen = PPP_HDRLEN;
        sc->sc_if.if_dlt = DLT_NULL;
        sc->sc_if.if_ioctl = pppsioctl;
        sc->sc_if.if_output = pppoutput;
#ifdef ALTQ
        sc->sc_if.if_start = ppp_ifstart;
#endif
        IFQ_SET_MAXLEN(&sc->sc_if.if_snd, IFQ_MAXLEN);
        sc->sc_inq.ifq_maxlen = IFQ_MAXLEN;
        sc->sc_fastq.ifq_maxlen = IFQ_MAXLEN;
        sc->sc_rawq.ifq_maxlen = IFQ_MAXLEN;
        /* Ratio of 1:2 packets between the regular and the fast queue */
        sc->sc_maxfastq = 2;
        IFQ_SET_READY(&sc->sc_if.if_snd);
        if_attach(&sc->sc_if);
        if_alloc_sadl(&sc->sc_if);
        bpf_attach(&sc->sc_if, DLT_NULL, 0);
        return sc;
}

static int
ppp_clone_create(struct if_clone *ifc, int unit)
{
        return ppp_create(ifc->ifc_name, unit) == NULL ? EEXIST : 0;
}

static int
ppp_clone_destroy(struct ifnet *ifp)
{
        struct ppp_softc *sc = (struct ppp_softc *)ifp->if_softc;

        if (sc->sc_devp != NULL)
                return EBUSY; /* Not removing it */

        mutex_enter(&ppp_softcs.lock);
        LIST_REMOVE(sc, sc_iflist);
        mutex_exit(&ppp_softcs.lock);

        bpf_detach(ifp);
        if_detach(ifp);

        free(sc, M_DEVBUF);
        return 0;
}

/*
 * Allocate a ppp interface unit and initialize it.
 */
struct ppp_softc *
pppalloc(pid_t pid)
{
        struct ppp_softc *sc = NULL, *scf;
        int i;

        mutex_enter(&ppp_softcs.lock);
        LIST_FOREACH(scf, &ppp_softcs.list, sc_iflist) {
                if (scf->sc_xfer == pid) {
                        scf->sc_xfer = 0;
                        mutex_exit(&ppp_softcs.lock);
                        return scf;
                }
                if (scf->sc_devp == NULL && sc == NULL)
                        sc = scf;
        }
        mutex_exit(&ppp_softcs.lock);

        if (sc == NULL)
                sc = ppp_create(ppp_cloner.ifc_name, -1);

        sc->sc_si = softint_establish(SOFTINT_NET, pppintr, sc);
        if (sc->sc_si == NULL) {
                printf("%s: unable to establish softintr\n",
                    sc->sc_if.if_xname);
                return NULL;
        }
        sc->sc_flags = 0;
        sc->sc_mru = PPP_MRU;
        sc->sc_relinq = NULL;
        (void)memset(&sc->sc_stats, 0, sizeof(sc->sc_stats));
#ifdef VJC
        sc->sc_comp = malloc(sizeof(struct slcompress), M_DEVBUF, M_NOWAIT);
        if (sc->sc_comp)
                sl_compress_init(sc->sc_comp);
#endif
#ifdef PPP_COMPRESS
        sc->sc_xc_state = NULL;
        sc->sc_rc_state = NULL;
#endif /* PPP_COMPRESS */
        for (i = 0; i < NUM_NP; ++i)
                sc->sc_npmode[i] = NPMODE_ERROR;
        sc->sc_npqueue = NULL;
        sc->sc_npqtail = &sc->sc_npqueue;
        sc->sc_last_sent = sc->sc_last_recv = time_second;

        return sc;
}

/*
 * Deallocate a ppp unit.  Must be called at splsoftnet or higher.
 */
void
pppdealloc(struct ppp_softc *sc)
{
        struct mbuf *m;

        softint_disestablish(sc->sc_si);
        if_down(&sc->sc_if);
        sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
        sc->sc_devp = NULL;
        sc->sc_xfer = 0;
        for (;;) {
                IF_DEQUEUE(&sc->sc_rawq, m);
                if (m == NULL)
                        break;
                m_freem(m);
        }
        for (;;) {
                IF_DEQUEUE(&sc->sc_inq, m);
                if (m == NULL)
                        break;
                m_freem(m);
        }
        for (;;) {
                IF_DEQUEUE(&sc->sc_fastq, m);
                if (m == NULL)
                        break;
                m_freem(m);
        }
        while ((m = sc->sc_npqueue) != NULL) {
                sc->sc_npqueue = m->m_nextpkt;
                m_freem(m);
        }
        if (sc->sc_togo != NULL) {
                m_freem(sc->sc_togo);
                sc->sc_togo = NULL;
        }
#ifdef PPP_COMPRESS
        ppp_ccp_closed(sc);
        sc->sc_xc_state = NULL;
        sc->sc_rc_state = NULL;
#endif /* PPP_COMPRESS */
#ifdef PPP_FILTER
        if (sc->sc_pass_filt_in.bf_insns != 0) {
                free(sc->sc_pass_filt_in.bf_insns, M_DEVBUF);
                sc->sc_pass_filt_in.bf_insns = 0;
                sc->sc_pass_filt_in.bf_len = 0;
        }
        if (sc->sc_pass_filt_out.bf_insns != 0) {
                free(sc->sc_pass_filt_out.bf_insns, M_DEVBUF);
                sc->sc_pass_filt_out.bf_insns = 0;
                sc->sc_pass_filt_out.bf_len = 0;
        }
        if (sc->sc_active_filt_in.bf_insns != 0) {
                free(sc->sc_active_filt_in.bf_insns, M_DEVBUF);
                sc->sc_active_filt_in.bf_insns = 0;
                sc->sc_active_filt_in.bf_len = 0;
        }
        if (sc->sc_active_filt_out.bf_insns != 0) {
                free(sc->sc_active_filt_out.bf_insns, M_DEVBUF);
                sc->sc_active_filt_out.bf_insns = 0;
                sc->sc_active_filt_out.bf_len = 0;
        }
#endif /* PPP_FILTER */
#ifdef VJC
        if (sc->sc_comp != 0) {
                free(sc->sc_comp, M_DEVBUF);
                sc->sc_comp = 0;
        }
#endif
        (void)ppp_clone_destroy(&sc->sc_if);
}

/*
 * Ioctl routine for generic ppp devices.
 */
int
pppioctl(struct ppp_softc *sc, u_long cmd, void *data, int flag,
    struct lwp *l)
{
        int s, error, flags, mru, npx;
        u_int nb;
        struct ppp_option_data *odp;
        struct compressor *cp;
        struct npioctl *npi;
        time_t t;
#ifdef PPP_FILTER
        struct bpf_program *bp, *nbp;
        struct bpf_insn *newcode, *oldcode;
        int newcodelen;
#endif /* PPP_FILTER */
#ifdef        PPP_COMPRESS
        u_char ccp_option[CCP_MAX_OPTION_LENGTH];
#endif

        switch (cmd) {
        case PPPIOCSFLAGS:
        case PPPIOCSMRU:
        case PPPIOCSMAXCID:
        case PPPIOCSCOMPRESS:
        case PPPIOCSNPMODE:
                if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
                        KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, &sc->sc_if,
                        KAUTH_ARG(cmd), NULL) != 0)
                        return EPERM;
                break;
        case PPPIOCXFERUNIT:
                /* XXX: Why is this privileged?! */
                if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
                        KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, &sc->sc_if,
                        KAUTH_ARG(cmd), NULL) != 0)
                        return EPERM;
                break;
        default:
                break;
        }

        switch (cmd) {
        case FIONREAD:
                *(int *)data = sc->sc_inq.ifq_len;
                break;

        case PPPIOCGUNIT:
                *(int *)data = sc->sc_unit;
                break;

        case PPPIOCGFLAGS:
                *(u_int *)data = sc->sc_flags;
                break;

        case PPPIOCGRAWIN:
        {
                struct ppp_rawin *rwin = (struct ppp_rawin *)data;
                u_char c, q = 0;

                for (c = sc->sc_rawin_start; c < sizeof(sc->sc_rawin.buf);)
                        rwin->buf[q++] = sc->sc_rawin.buf[c++];

                for (c = 0; c < sc->sc_rawin_start;)
                        rwin->buf[q++] = sc->sc_rawin.buf[c++];

                rwin->count = sc->sc_rawin.count;
        }
        break;

        case PPPIOCSFLAGS:
                flags = *(int *)data & SC_MASK;
                s = splsoftnet();
#ifdef PPP_COMPRESS
                if (sc->sc_flags & SC_CCP_OPEN && !(flags & SC_CCP_OPEN))
                        ppp_ccp_closed(sc);
#endif
                splhigh();        /* XXX IMP ME HARDER */
                sc->sc_flags = (sc->sc_flags & ~SC_MASK) | flags;
                splx(s);
                break;

        case PPPIOCSMRU:
                mru = *(int *)data;
                if (mru >= PPP_MINMRU && mru <= PPP_MAXMRU)
                        sc->sc_mru = mru;
                break;

        case PPPIOCGMRU:
                *(int *)data = sc->sc_mru;
                break;

#ifdef VJC
        case PPPIOCSMAXCID:
                if (sc->sc_comp) {
                        s = splsoftnet();
                        sl_compress_setup(sc->sc_comp, *(int *)data);
                        splx(s);
                }
                break;
#endif

        case PPPIOCXFERUNIT:
                sc->sc_xfer = l->l_proc->p_pid;
                break;

#ifdef PPP_COMPRESS
        case PPPIOCSCOMPRESS:
                odp = (struct ppp_option_data *)data;
                nb = odp->length;
                if (nb > sizeof(ccp_option))
                        nb = sizeof(ccp_option);
                if (nb < 3)
                        return EINVAL;
                if ((error = copyin(odp->ptr, ccp_option, nb)) != 0)
                        return error;
                /* preliminary check on the length byte */
                if (ccp_option[1] < 2)
                        return EINVAL;
                cp = ppp_get_compressor(ccp_option[0]);
                if (cp == NULL) {
                        if (sc->sc_flags & SC_DEBUG)
                                printf("%s: no compressor for [%x %x %x], %x\n",
                                    sc->sc_if.if_xname, ccp_option[0],
                                    ccp_option[1], ccp_option[2], nb);
                        return EINVAL;        /* no handler found */
                }
                /*
                 * Found a handler for the protocol - try to allocate
                 * a compressor or decompressor.
                 */
                error = 0;
                if (odp->transmit) {
                        s = splsoftnet();
                        if (sc->sc_xc_state != NULL) {
                                (*sc->sc_xcomp->comp_free)(sc->sc_xc_state);
                                ppp_compressor_rele(sc->sc_xcomp);
                        }
                        sc->sc_xcomp = cp;
                        sc->sc_xc_state = cp->comp_alloc(ccp_option, nb);
                        if (sc->sc_xc_state == NULL) {
                                if (sc->sc_flags & SC_DEBUG)
                                        printf("%s: comp_alloc failed\n",
                                            sc->sc_if.if_xname);
                                error = ENOBUFS;
                        }
                        splhigh();        /* XXX IMP ME HARDER */
                        sc->sc_flags &= ~SC_COMP_RUN;
                        splx(s);
                } else {
                        s = splsoftnet();
                        if (sc->sc_rc_state != NULL) {
                                (*sc->sc_rcomp->decomp_free)(sc->sc_rc_state);
                                ppp_compressor_rele(sc->sc_rcomp);
                        }
                        sc->sc_rcomp = cp;
                        sc->sc_rc_state = cp->decomp_alloc(ccp_option, nb);
                        if (sc->sc_rc_state == NULL) {
                                if (sc->sc_flags & SC_DEBUG)
                                        printf("%s: decomp_alloc failed\n",
                                            sc->sc_if.if_xname);
                                error = ENOBUFS;
                        }
                        splhigh();        /* XXX IMP ME HARDER */
                        sc->sc_flags &= ~SC_DECOMP_RUN;
                        splx(s);
                }
                return error;
#endif /* PPP_COMPRESS */

        case PPPIOCGNPMODE:
        case PPPIOCSNPMODE:
                npi = (struct npioctl *)data;
                switch (npi->protocol) {
                case PPP_IP:
                        npx = NP_IP;
                        break;
                case PPP_IPV6:
                        npx = NP_IPV6;
                        break;
                default:
                        return EINVAL;
                }
                if (cmd == PPPIOCGNPMODE) {
                        npi->mode = sc->sc_npmode[npx];
                } else {
                        if (npi->mode != sc->sc_npmode[npx]) {
                                s = splnet();
                                sc->sc_npmode[npx] = npi->mode;
                                if (npi->mode != NPMODE_QUEUE) {
                                        ppp_requeue(sc);
                                        ppp_restart(sc);
                                }
                                splx(s);
                        }
                }
                break;

        case PPPIOCGIDLE:
                s = splsoftnet();
                t = time_second;
                ((struct ppp_idle *)data)->xmit_idle = t - sc->sc_last_sent;
                ((struct ppp_idle *)data)->recv_idle = t - sc->sc_last_recv;
                splx(s);
                break;

#ifdef PPP_FILTER
        case PPPIOCSPASS:
        case PPPIOCSACTIVE:
                /* These are no longer supported. */
                return EOPNOTSUPP;

        case PPPIOCSIPASS:
        case PPPIOCSOPASS:
        case PPPIOCSIACTIVE:
        case PPPIOCSOACTIVE:
                nbp = (struct bpf_program *)data;
                if ((unsigned) nbp->bf_len > BPF_MAXINSNS)
                        return EINVAL;
                newcodelen = nbp->bf_len * sizeof(struct bpf_insn);
                if (newcodelen != 0) {
                        newcode = malloc(newcodelen, M_DEVBUF, M_WAITOK);
                        /* WAITOK -- malloc() never fails. */
                        if ((error = copyin((void *)nbp->bf_insns,
                                    (void *)newcode, newcodelen)) != 0) {
                                free(newcode, M_DEVBUF);
                                return error;
                        }
                        if (!bpf_validate(newcode, nbp->bf_len)) {
                                free(newcode, M_DEVBUF);
                                return EINVAL;
                        }
                } else
                        newcode = 0;
                switch (cmd) {
                case PPPIOCSIPASS:
                        bp = &sc->sc_pass_filt_in;
                        break;

                case PPPIOCSOPASS:
                        bp = &sc->sc_pass_filt_out;
                        break;

                case PPPIOCSIACTIVE:
                        bp = &sc->sc_active_filt_in;
                        break;

                case PPPIOCSOACTIVE:
                        bp = &sc->sc_active_filt_out;
                        break;
                default:
                        free(newcode, M_DEVBUF);
                        return EPASSTHROUGH;
                }
                oldcode = bp->bf_insns;
                s = splnet();
                bp->bf_len = nbp->bf_len;
                bp->bf_insns = newcode;
                splx(s);
                if (oldcode != 0)
                        free(oldcode, M_DEVBUF);
                break;
#endif /* PPP_FILTER */

        default:
                return EPASSTHROUGH;
        }
        return 0;
}

/*
 * Process an ioctl request to the ppp network interface.
 */
static int
pppsioctl(struct ifnet *ifp, u_long cmd, void *data)
{
        struct ppp_softc *sc = ifp->if_softc;
        struct ifaddr *ifa = (struct ifaddr *)data;
        struct ifreq *ifr = (struct ifreq *)data;
        struct ppp_stats *psp;
#ifdef        PPP_COMPRESS
        struct ppp_comp_stats *pcp;
#endif
        int s = splnet(), error = 0;

        switch (cmd) {
        case SIOCSIFFLAGS:
                if ((error = ifioctl_common(ifp, cmd, data)) != 0)
                        break;
                if ((ifp->if_flags & IFF_RUNNING) == 0)
                        ifp->if_flags &= ~IFF_UP;
                break;

        case SIOCINITIFADDR:
                switch (ifa->ifa_addr->sa_family) {
#ifdef INET
                case AF_INET:
                        break;
#endif
#ifdef INET6
                case AF_INET6:
                        break;
#endif
                default:
                        printf("%s: af%d not supported\n", ifp->if_xname,
                            ifa->ifa_addr->sa_family);
                        error = EAFNOSUPPORT;
                        break;
                }
                ifa->ifa_rtrequest = p2p_rtrequest;
                break;

        case SIOCADDMULTI:
        case SIOCDELMULTI:
                if (ifr == NULL) {
                        error = EAFNOSUPPORT;
                        break;
                }
                switch (ifreq_getaddr(cmd, ifr)->sa_family) {
#ifdef INET
                case AF_INET:
                        break;
#endif
#ifdef INET6
                case AF_INET6:
                        break;
#endif
                default:
                        error = EAFNOSUPPORT;
                        break;
                }
                break;

        case SIOCGPPPSTATS:
                psp = &((struct ifpppstatsreq *)data)->stats;
                memset(psp, 0, sizeof(*psp));
                psp->p = sc->sc_stats;
#if defined(VJC) && !defined(SL_NO_STATS)
                if (sc->sc_comp) {
                        psp->vj.vjs_packets = sc->sc_comp->sls_packets;
                        psp->vj.vjs_compressed = sc->sc_comp->sls_compressed;
                        psp->vj.vjs_searches = sc->sc_comp->sls_searches;
                        psp->vj.vjs_misses = sc->sc_comp->sls_misses;
                        psp->vj.vjs_uncompressedin = sc->sc_comp->sls_uncompressedin;
                        psp->vj.vjs_compressedin = sc->sc_comp->sls_compressedin;
                        psp->vj.vjs_errorin = sc->sc_comp->sls_errorin;
                        psp->vj.vjs_tossed = sc->sc_comp->sls_tossed;
                }
#endif /* VJC */
                break;

#ifdef PPP_COMPRESS
        case SIOCGPPPCSTATS:
                pcp = &((struct ifpppcstatsreq *)data)->stats;
                memset(pcp, 0, sizeof(*pcp));
                if (sc->sc_xc_state != NULL)
                        (*sc->sc_xcomp->comp_stat)(sc->sc_xc_state, &pcp->c);
                if (sc->sc_rc_state != NULL)
                        (*sc->sc_rcomp->decomp_stat)(sc->sc_rc_state, &pcp->d);
                break;
#endif /* PPP_COMPRESS */

        default:
                if ((error = ifioctl_common(&sc->sc_if, cmd, data)) == ENETRESET)
                        error = 0;
                break;
        }
        splx(s);
        return error;
}

/*
 * Queue a packet.  Start transmission if not active.
 * Packet is placed in Information field of PPP frame.
 */
int
pppoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
    const struct rtentry *rtp)
{
        struct ppp_softc *sc = ifp->if_softc;
        int protocol, address, control;
        u_char *cp;
        int s, error;
#ifdef INET
        struct ip *ip;
#endif
        struct ifqueue *ifq;
        enum NPmode mode;
        int len;

            if (sc->sc_devp == NULL ||
                (ifp->if_flags & IFF_RUNNING) == 0 ||
                ((ifp->if_flags & IFF_UP) == 0 &&
                    dst->sa_family != AF_UNSPEC)) {
                    error = ENETDOWN;        /* sort of */
                    goto bad;
            }

        IFQ_CLASSIFY(&ifp->if_snd, m0, dst->sa_family);

        /*
         * Compute PPP header.
         */
        m0->m_flags &= ~M_HIGHPRI;
        switch (dst->sa_family) {
#ifdef INET
        case AF_INET:
                address = PPP_ALLSTATIONS;
                control = PPP_UI;
                protocol = PPP_IP;
                mode = sc->sc_npmode[NP_IP];

                /*
                 * If this packet has the "low delay" bit set in the IP header,
                 * put it on the fastq instead.
                 */
                ip = mtod(m0, struct ip *);
                if (ip->ip_tos & IPTOS_LOWDELAY)
                        m0->m_flags |= M_HIGHPRI;
                break;
#endif
#ifdef INET6
        case AF_INET6:
                address = PPP_ALLSTATIONS;        /*XXX*/
                control = PPP_UI;                /*XXX*/
                protocol = PPP_IPV6;
                mode = sc->sc_npmode[NP_IPV6];

#if 0        /* XXX flowinfo/traffic class, maybe? */
        /*
         * If this packet has the "low delay" bit set in the IP header,
         * put it on the fastq instead.
         */
                ip = mtod(m0, struct ip *);
                if (ip->ip_tos & IPTOS_LOWDELAY)
                        m0->m_flags |= M_HIGHPRI;
#endif
                break;
#endif
        case AF_UNSPEC:
                address = PPP_ADDRESS(dst->sa_data);
                control = PPP_CONTROL(dst->sa_data);
                protocol = PPP_PROTOCOL(dst->sa_data);
                mode = NPMODE_PASS;
                break;
        default:
                printf("%s: af%d not supported\n", ifp->if_xname,
                    dst->sa_family);
                error = EAFNOSUPPORT;
                goto bad;
        }

        /*
         * Drop this packet, or return an error, if necessary.
         */
        if (mode == NPMODE_ERROR) {
                error = ENETDOWN;
                goto bad;
        }
        if (mode == NPMODE_DROP) {
                error = 0;
                goto bad;
        }

        /*
         * Add PPP header.
         */
        M_PREPEND(m0, PPP_HDRLEN, M_DONTWAIT);
        if (m0 == NULL) {
                error = ENOBUFS;
                goto bad;
        }

        cp = mtod(m0, u_char *);
        *cp++ = address;
        *cp++ = control;
        *cp++ = protocol >> 8;
        *cp++ = protocol & 0xff;

        len = m_length(m0);

        if (sc->sc_flags & SC_LOG_OUTPKT) {
                printf("%s output: ", ifp->if_xname);
                pppdumpm(m0);
        }

        if ((protocol & 0x8000) == 0) {
#ifdef PPP_FILTER
                /*
                 * Apply the pass and active filters to the packet,
                 * but only if it is a data packet.
                 */
                if (sc->sc_pass_filt_out.bf_insns != 0 &&
                    bpf_filter(sc->sc_pass_filt_out.bf_insns,
                        (u_char *)m0, len, 0) == 0) {
                        error = 0;                /* drop this packet */
                        goto bad;
                }

                /*
                 * Update the time we sent the most recent packet.
                 */
                if (sc->sc_active_filt_out.bf_insns == 0 ||
                    bpf_filter(sc->sc_active_filt_out.bf_insns,
                        (u_char *)m0, len, 0))
                        sc->sc_last_sent = time_second;
#else
                /*
                 * Update the time we sent the most recent packet.
                 */
                sc->sc_last_sent = time_second;
#endif /* PPP_FILTER */
        }

        /*
         * See if bpf wants to look at the packet.
         */
        bpf_mtap(&sc->sc_if, m0, BPF_D_OUT);

        /*
         * Put the packet on the appropriate queue.
         */
        s = splnet();
        if (mode == NPMODE_QUEUE) {
                /* XXX we should limit the number of packets on this queue */
                *sc->sc_npqtail = m0;
                m0->m_nextpkt = NULL;
                sc->sc_npqtail = &m0->m_nextpkt;
        } else {
                ifq = (m0->m_flags & M_HIGHPRI) ? &sc->sc_fastq : NULL;
                if ((error = ifq_enqueue2(&sc->sc_if, ifq, m0)) != 0) {
                        splx(s);
                        if_statinc(&sc->sc_if, if_oerrors);
                        sc->sc_stats.ppp_oerrors++;
                        return error;
                }
                ppp_restart(sc);
        }
        if_statadd2(ifp, if_opackets, 1, if_obytes, len);

        splx(s);
        return 0;

bad:
        m_freem(m0);
        return error;
}

/*
 * After a change in the NPmode for some NP, move packets from the
 * npqueue to the send queue or the fast queue as appropriate.
 * Should be called at splnet, since we muck with the queues.
 */
static void
ppp_requeue(struct ppp_softc *sc)
{
        struct mbuf *m, **mpp;
        struct ifqueue *ifq;
        enum NPmode mode;
        int error;

        for (mpp = &sc->sc_npqueue; (m = *mpp) != NULL; ) {
                switch (PPP_PROTOCOL(mtod(m, u_char *))) {
                case PPP_IP:
                        mode = sc->sc_npmode[NP_IP];
                        break;
                case PPP_IPV6:
                        mode = sc->sc_npmode[NP_IPV6];
                        break;
                default:
                        mode = NPMODE_PASS;
                }

                switch (mode) {
                case NPMODE_PASS:
                        /*
                         * This packet can now go on one of the queues to
                         * be sent.
                         */
                        *mpp = m->m_nextpkt;
                        m->m_nextpkt = NULL;
                        ifq = (m->m_flags & M_HIGHPRI) ? &sc->sc_fastq : NULL;
                        if ((error = ifq_enqueue2(&sc->sc_if, ifq, m)) != 0) {
                                if_statinc(&sc->sc_if, if_oerrors);
                                sc->sc_stats.ppp_oerrors++;
                        }
                        break;

                case NPMODE_DROP:
                case NPMODE_ERROR:
                        *mpp = m->m_nextpkt;
                        m_freem(m);
                        break;

                case NPMODE_QUEUE:
                        mpp = &m->m_nextpkt;
                        break;
                }
        }
        sc->sc_npqtail = mpp;
}

/*
 * Transmitter has finished outputting some stuff;
 * remember to call sc->sc_start later at splsoftnet.
 */
void
ppp_restart(struct ppp_softc *sc)
{
        int s = splhigh();        /* XXX IMP ME HARDER */

        sc->sc_flags &= ~SC_TBUSY;
        softint_schedule(sc->sc_si);
        splx(s);
}

/*
 * Get a packet to send.  This procedure is intended to be called at
 * splsoftnet, since it may involve time-consuming operations such as
 * applying VJ compression, packet compression, address/control and/or
 * protocol field compression to the packet.
 */
struct mbuf *
ppp_dequeue(struct ppp_softc *sc)
{
        struct mbuf *m, *mp;
        u_char *cp;
        int address, control, protocol;
        int s;

        /*
         * Grab a packet to send: first try the fast queue, then the
         * normal queue.
         */
        s = splnet();
        if (sc->sc_nfastq < sc->sc_maxfastq) {
                IF_DEQUEUE(&sc->sc_fastq, m);
                if (m != NULL)
                        sc->sc_nfastq++;
                else
                        IFQ_DEQUEUE(&sc->sc_if.if_snd, m);
        } else {
                sc->sc_nfastq = 0;
                IFQ_DEQUEUE(&sc->sc_if.if_snd, m);
                if (m == NULL) {
                        IF_DEQUEUE(&sc->sc_fastq, m);
                        if (m != NULL)
                                sc->sc_nfastq++;
                }
        }
        splx(s);

        if (m == NULL)
                return NULL;

        ++sc->sc_stats.ppp_opackets;

        /*
         * Extract the ppp header of the new packet.
         * The ppp header will be in one mbuf.
         */
        cp = mtod(m, u_char *);
        address = PPP_ADDRESS(cp);
        control = PPP_CONTROL(cp);
        protocol = PPP_PROTOCOL(cp);

        switch (protocol) {
        case PPP_IP:
#ifdef VJC
                /*
                 * If the packet is a TCP/IP packet, see if we can compress it.
                 */
                if ((sc->sc_flags & SC_COMP_TCP) && sc->sc_comp != NULL) {
                        struct ip *ip;
                        int type;

                        mp = m;
                        ip = (struct ip *)(cp + PPP_HDRLEN);
                        if (mp->m_len <= PPP_HDRLEN) {
                                mp = mp->m_next;
                                if (mp == NULL)
                                        break;
                                ip = mtod(mp, struct ip *);
                        }
                        /*
                         * This code assumes the IP/TCP header is in one
                         * non-shared mbuf
                         */
                        if (ip->ip_p == IPPROTO_TCP) {
                                type = sl_compress_tcp(mp, ip, sc->sc_comp,
                                    !(sc->sc_flags & SC_NO_TCP_CCID));
                                switch (type) {
                                case TYPE_UNCOMPRESSED_TCP:
                                        protocol = PPP_VJC_UNCOMP;
                                        break;
                                case TYPE_COMPRESSED_TCP:
                                        protocol = PPP_VJC_COMP;
                                        cp = mtod(m, u_char *);
                                        cp[0] = address; /* Header has moved */
                                        cp[1] = control;
                                        cp[2] = 0;
                                        break;
                                }
                                /* Update protocol in PPP header */
                                cp[3] = protocol;
                        }
                }
#endif        /* VJC */
                break;

#ifdef PPP_COMPRESS
        case PPP_CCP:
                ppp_ccp(sc, m, 0);
                break;
#endif        /* PPP_COMPRESS */
        }

#ifdef PPP_COMPRESS
        if (protocol != PPP_LCP && protocol != PPP_CCP &&
            sc->sc_xc_state && (sc->sc_flags & SC_COMP_RUN)) {
                struct mbuf *mcomp = NULL;
                int slen;

                slen = 0;
                for (mp = m; mp != NULL; mp = mp->m_next)
                        slen += mp->m_len;
                (*sc->sc_xcomp->compress)
                    (sc->sc_xc_state, &mcomp, m, slen, sc->sc_if.if_mtu + PPP_HDRLEN);
                if (mcomp != NULL) {
                        if (sc->sc_flags & SC_CCP_UP) {
                                /*
                                 * Send the compressed packet instead of the
                                 * original.
                                 */
                                m_freem(m);
                                m = mcomp;
                                cp = mtod(m, u_char *);
                                protocol = cp[3];
                        } else {
                                /*
                                 * Can't transmit compressed packets until CCP
                                 * is up.
                                 */
                                m_freem(mcomp);
                        }
                }
        }
#endif        /* PPP_COMPRESS */

        /*
         * Compress the address/control and protocol, if possible.
         */
        if (sc->sc_flags & SC_COMP_AC && address == PPP_ALLSTATIONS &&
            control == PPP_UI && protocol != PPP_ALLSTATIONS &&
            protocol != PPP_LCP) {
                /* can compress address/control */
                m->m_data += 2;
                m->m_len -= 2;
        }
        if (sc->sc_flags & SC_COMP_PROT && protocol < 0xFF) {
                /* can compress protocol */
                if (mtod(m, u_char *) == cp) {
                        cp[2] = cp[1];        /* move address/control up */
                        cp[1] = cp[0];
                }
                ++m->m_data;
                --m->m_len;
        }

        return m;
}

/*
 * Software interrupt routine, called at splsoftnet.
 */
static void
pppintr(void *arg)
{
        struct ppp_softc *sc = arg;
        struct mbuf *m;
        int s;

        mutex_enter(softnet_lock);
        if (!(sc->sc_flags & SC_TBUSY) &&
            (IFQ_IS_EMPTY(&sc->sc_if.if_snd) == 0 ||
                sc->sc_fastq.ifq_head ||
                sc->sc_outm)) {
                s = splhigh();        /* XXX IMP ME HARDER */
                sc->sc_flags |= SC_TBUSY;
                splx(s);
                (*sc->sc_start)(sc);
        }
        for (;;) {
                s = splnet();
                IF_DEQUEUE(&sc->sc_rawq, m);
                splx(s);
                if (m == NULL)
                        break;
                ppp_inproc(sc, m);
        }
        mutex_exit(softnet_lock);
}

#ifdef PPP_COMPRESS
/*
 * Handle a CCP packet.         `rcvd' is 1 if the packet was received,
 * 0 if it is about to be transmitted.
 */
static void
ppp_ccp(struct ppp_softc *sc, struct mbuf *m, int rcvd)
{
        u_char *dp, *ep;
        struct mbuf *mp;
        int slen, s;

        /*
         * Get a pointer to the data after the PPP header.
         */
        if (m->m_len <= PPP_HDRLEN) {
                mp = m->m_next;
                if (mp == NULL)
                        return;
                dp = mtod(mp, u_char *);
        } else {
                mp = m;
                dp = mtod(mp, u_char *) + PPP_HDRLEN;
        }

        ep = mtod(mp, u_char *) + mp->m_len;
        if (dp + CCP_HDRLEN > ep)
                return;
        slen = CCP_LENGTH(dp);
        if (dp + slen > ep) {
                if (sc->sc_flags & SC_DEBUG)
                        printf("if_ppp/ccp: not enough data in mbuf (%p+%x > %p+%x)\n",
                            dp, slen, mtod(mp, u_char *), mp->m_len);
                return;
        }

        switch (CCP_CODE(dp)) {
        case CCP_CONFREQ:
        case CCP_TERMREQ:
        case CCP_TERMACK:
                /* CCP must be going down - disable compression */
                if (sc->sc_flags & SC_CCP_UP) {
                        s = splhigh();        /* XXX IMP ME HARDER */
                        sc->sc_flags &= ~(SC_CCP_UP | SC_COMP_RUN | SC_DECOMP_RUN);
                        splx(s);
                }
                break;

        case CCP_CONFACK:
                if (sc->sc_flags & SC_CCP_OPEN &&
                    !(sc->sc_flags & SC_CCP_UP) &&
                    slen >= CCP_HDRLEN + CCP_OPT_MINLEN &&
                    slen >= CCP_OPT_LENGTH(dp + CCP_HDRLEN) + CCP_HDRLEN) {
                        if (!rcvd) {
                                /* We're agreeing to send compressed packets. */
                                if (sc->sc_xc_state != NULL &&
                                    (*sc->sc_xcomp->comp_init)(sc->sc_xc_state,
                                        dp + CCP_HDRLEN, slen - CCP_HDRLEN,
                                        sc->sc_unit, 0,
                                        sc->sc_flags & SC_DEBUG)) {
                                        s = splhigh();        /* XXX IMP ME HARDER */
                                        sc->sc_flags |= SC_COMP_RUN;
                                        splx(s);
                                }
                        } else {
                                /*
                                 * Peer is agreeing to send compressed
                                 * packets.
                                 */
                                if (sc->sc_rc_state != NULL &&
                                    (*sc->sc_rcomp->decomp_init)(
                                        sc->sc_rc_state,
                                        dp + CCP_HDRLEN, slen - CCP_HDRLEN,
                                        sc->sc_unit, 0, sc->sc_mru,
                                        sc->sc_flags & SC_DEBUG)) {
                                        s = splhigh();        /* XXX IMP ME HARDER */
                                        sc->sc_flags |= SC_DECOMP_RUN;
                                        sc->sc_flags &=
                                            ~(SC_DC_ERROR | SC_DC_FERROR);
                                        splx(s);
                                }
                        }
                }
                break;

        case CCP_RESETACK:
                if (sc->sc_flags & SC_CCP_UP) {
                        if (!rcvd) {
                                if (sc->sc_xc_state && (sc->sc_flags & SC_COMP_RUN))
                                        (*sc->sc_xcomp->comp_reset)(sc->sc_xc_state);
                        } else {
                                if (sc->sc_rc_state && (sc->sc_flags & SC_DECOMP_RUN)) {
                                        (*sc->sc_rcomp->decomp_reset)(sc->sc_rc_state);
                                        s = splhigh();        /* XXX IMP ME HARDER */
                                        sc->sc_flags &= ~SC_DC_ERROR;
                                        splx(s);
                                }
                        }
                }
                break;
        }
}

/*
 * CCP is down; free (de)compressor state if necessary.
 */
static void
ppp_ccp_closed(struct ppp_softc *sc)
{
        if (sc->sc_xc_state) {
                (*sc->sc_xcomp->comp_free)(sc->sc_xc_state);
                ppp_compressor_rele(sc->sc_xcomp);
                sc->sc_xc_state = NULL;
        }
        if (sc->sc_rc_state) {
                (*sc->sc_rcomp->decomp_free)(sc->sc_rc_state);
                ppp_compressor_rele(sc->sc_rcomp);
                sc->sc_rc_state = NULL;
        }
}
#endif /* PPP_COMPRESS */

/*
 * PPP packet input routine.
 * The caller has checked and removed the FCS and has inserted
 * the address/control bytes and the protocol high byte if they
 * were omitted.
 */
void
ppppktin(struct ppp_softc *sc, struct mbuf *m, int lost)
{
        int s = splhigh();        /* XXX IMP ME HARDER */

        if (lost)
                m->m_flags |= M_ERRMARK;
        IF_ENQUEUE(&sc->sc_rawq, m);
        softint_schedule(sc->sc_si);
        splx(s);
}

/*
 * Process a received PPP packet, doing decompression as necessary.
 * Should be called at splsoftnet.
 */
#define COMPTYPE(proto)                                                              \
        ((proto) == PPP_VJC_COMP ? TYPE_COMPRESSED_TCP                              \
            : TYPE_UNCOMPRESSED_TCP)

static void
ppp_inproc(struct ppp_softc *sc, struct mbuf *m)
{
        struct ifnet *ifp = &sc->sc_if;
        pktqueue_t *pktq = NULL;
        struct ifqueue *inq = NULL;
        int s, ilen, proto, rv;
        u_char *cp, adrs, ctrl;
        struct mbuf *mp, *dmp = NULL;
#ifdef VJC
        int xlen;
        u_char *iphdr;
        u_int hlen;
#endif

        sc->sc_stats.ppp_ipackets++;

        if (sc->sc_flags & SC_LOG_INPKT) {
                ilen = 0;
                for (mp = m; mp != NULL; mp = mp->m_next)
                        ilen += mp->m_len;
                printf("%s: got %d bytes\n", ifp->if_xname, ilen);
                pppdumpm(m);
        }

        cp = mtod(m, u_char *);
        adrs = PPP_ADDRESS(cp);
        ctrl = PPP_CONTROL(cp);
        proto = PPP_PROTOCOL(cp);

        if (m->m_flags & M_ERRMARK) {
                m->m_flags &= ~M_ERRMARK;
                s = splhigh();        /* XXX IMP ME HARDER */
                sc->sc_flags |= SC_VJ_RESET;
                splx(s);
        }

#ifdef PPP_COMPRESS
        /*
         * Decompress this packet if necessary, update the receiver's
         * dictionary, or take appropriate action on a CCP packet.
         */
        if (proto == PPP_COMP &&
            sc->sc_rc_state &&
            (sc->sc_flags & SC_DECOMP_RUN) &&
            !(sc->sc_flags & SC_DC_ERROR) &&
            !(sc->sc_flags & SC_DC_FERROR)) {
                /* Decompress this packet */
                rv = (*sc->sc_rcomp->decompress)(sc->sc_rc_state, m, &dmp);
                if (rv == DECOMP_OK) {
                        m_freem(m);
                        if (dmp == NULL) {
                                /*
                                 * No error, but no decompressed packet
                                 * produced
                                 */
                                return;
                        }
                        m = dmp;
                        cp = mtod(m, u_char *);
                        proto = PPP_PROTOCOL(cp);

                } else {
                        /*
                         * An error has occurred in decompression.
                         * Pass the compressed packet up to pppd, which may
                         * take CCP down or issue a Reset-Req.
                         */
                        if (sc->sc_flags & SC_DEBUG)
                                printf("%s: decompress failed %d\n",
                                    ifp->if_xname, rv);
                        s = splhigh();        /* XXX IMP ME HARDER */
                        sc->sc_flags |= SC_VJ_RESET;
                        if (rv == DECOMP_ERROR)
                                sc->sc_flags |= SC_DC_ERROR;
                        else
                                sc->sc_flags |= SC_DC_FERROR;
                        splx(s);
                }

        } else {
                if (sc->sc_rc_state && (sc->sc_flags & SC_DECOMP_RUN))
                        (*sc->sc_rcomp->incomp)(sc->sc_rc_state, m);
                if (proto == PPP_CCP)
                        ppp_ccp(sc, m, 1);
        }
#endif

        ilen = 0;
        for (mp = m; mp != NULL; mp = mp->m_next)
                ilen += mp->m_len;

#ifdef VJC
        if (sc->sc_flags & SC_VJ_RESET) {
                /*
                 * If we've missed a packet, we must toss subsequent compressed
                 * packets which don't have an explicit connection ID.
                 */
                if (sc->sc_comp)
                        sl_uncompress_tcp(NULL, 0, TYPE_ERROR, sc->sc_comp);
                s = splhigh();        /* XXX IMP ME HARDER */
                sc->sc_flags &= ~SC_VJ_RESET;
                splx(s);
        }

        /*
         * See if we have a VJ-compressed packet to uncompress.
         */
        if (proto == PPP_VJC_COMP) {
                if ((sc->sc_flags & SC_REJ_COMP_TCP) || sc->sc_comp == 0)
                        goto bad;

                xlen = sl_uncompress_tcp_core(cp + PPP_HDRLEN,
                    m->m_len - PPP_HDRLEN, ilen - PPP_HDRLEN,
                    TYPE_COMPRESSED_TCP, sc->sc_comp, &iphdr, &hlen);
                if (xlen <= 0) {
                        if (sc->sc_flags & SC_DEBUG) {
                                printf("%s: VJ uncompress failed"
                                    " on type comp\n",
                                    ifp->if_xname);
                        }
                        goto bad;
                }

                /* Copy the PPP and IP headers into a new mbuf. */
                MGETHDR(mp, M_DONTWAIT, MT_DATA);
                if (mp == NULL)
                        goto bad;
                mp->m_len = 0;
                mp->m_next = NULL;
                if (hlen + PPP_HDRLEN > MHLEN) {
                        MCLGET(mp, M_DONTWAIT);
                        if (M_TRAILINGSPACE(mp) < hlen + PPP_HDRLEN) {
                                /* Lose if big headers and no clusters */
                                m_freem(mp);
                                goto bad;
                        }
                }
                cp = mtod(mp, u_char *);
                cp[0] = adrs;
                cp[1] = ctrl;
                cp[2] = 0;
                cp[3] = PPP_IP;
                proto = PPP_IP;
                bcopy(iphdr, cp + PPP_HDRLEN, hlen);
                mp->m_len = hlen + PPP_HDRLEN;

                /*
                 * Trim the PPP and VJ headers off the old mbuf
                 * and stick the new and old mbufs together.
                 */
                m->m_data += PPP_HDRLEN + xlen;
                m->m_len -= PPP_HDRLEN + xlen;
                if (m->m_len <= M_TRAILINGSPACE(mp)) {
                        bcopy(mtod(m, u_char *),
                            mtod(mp, u_char *) + mp->m_len, m->m_len);
                        mp->m_len += m->m_len;
                        mp->m_next = m_free(m);
                } else
                        mp->m_next = m;
                m = mp;
                ilen += hlen - xlen;

        } else if (proto == PPP_VJC_UNCOMP) {
                if ((sc->sc_flags & SC_REJ_COMP_TCP) || sc->sc_comp == 0)
                        goto bad;

                xlen = sl_uncompress_tcp_core(cp + PPP_HDRLEN,
                    m->m_len - PPP_HDRLEN, ilen - PPP_HDRLEN,
                    TYPE_UNCOMPRESSED_TCP, sc->sc_comp, &iphdr, &hlen);
                if (xlen < 0) {
                        if (sc->sc_flags & SC_DEBUG) {
                                printf("%s: VJ uncompress failed"
                                    " on type uncomp\n",
                                    ifp->if_xname);
                        }
                        goto bad;
                }

                proto = PPP_IP;
                cp[3] = PPP_IP;
        }
#endif /* VJC */

        /*
         * If the packet will fit in a header mbuf, don't waste a
         * whole cluster on it.
         */
        if (ilen <= MHLEN && (m->m_flags & M_EXT)) {
                MGETHDR(mp, M_DONTWAIT, MT_DATA);
                if (mp != NULL) {
                        m_copydata(m, 0, ilen, mtod(mp, void *));
                        m_freem(m);
                        m = mp;
                        m->m_len = ilen;
                }
        }
        m->m_pkthdr.len = ilen;
        m_set_rcvif(m, ifp);

        if ((proto & 0x8000) == 0) {
#ifdef PPP_FILTER
                /*
                 * See whether we want to pass this packet, and
                 * if it counts as link activity.
                 */
                if (sc->sc_pass_filt_in.bf_insns != 0 &&
                    bpf_filter(sc->sc_pass_filt_in.bf_insns,
                        (u_char *)m, ilen, 0) == 0) {
                        /* drop this packet */
                        m_freem(m);
                        return;
                }
                if (sc->sc_active_filt_in.bf_insns == 0 ||
                    bpf_filter(sc->sc_active_filt_in.bf_insns,
                        (u_char *)m, ilen, 0))
                        sc->sc_last_recv = time_second;
#else
                /*
                 * Record the time that we received this packet.
                 */
                sc->sc_last_recv = time_second;
#endif /* PPP_FILTER */
        }

        /* See if bpf wants to look at the packet. */
        bpf_mtap(&sc->sc_if, m, BPF_D_IN);

        switch (proto) {
#ifdef INET
        case PPP_IP:
                /*
                 * IP packet - take off the ppp header and pass it up to IP.
                 */
                if ((ifp->if_flags & IFF_UP) == 0 ||
                    sc->sc_npmode[NP_IP] != NPMODE_PASS) {
                        /* Interface is down - drop the packet. */
                        m_freem(m);
                        return;
                }
                m->m_pkthdr.len -= PPP_HDRLEN;
                m->m_data += PPP_HDRLEN;
                m->m_len -= PPP_HDRLEN;
#ifdef GATEWAY
                if (ipflow_fastforward(m))
                        return;
#endif
                pktq = ip_pktq;
                break;
#endif

#ifdef INET6
        case PPP_IPV6:
                /*
                 * IPv6 packet - take off the ppp header and pass it up to
                 * IPv6.
                 */
                if ((ifp->if_flags & IFF_UP) == 0 ||
                    sc->sc_npmode[NP_IPV6] != NPMODE_PASS) {
                        /* interface is down - drop the packet. */
                        m_freem(m);
                        return;
                }
                m->m_pkthdr.len -= PPP_HDRLEN;
                m->m_data += PPP_HDRLEN;
                m->m_len -= PPP_HDRLEN;
#ifdef GATEWAY
                if (ip6flow_fastforward(&m))
                        return;
#endif
                pktq = ip6_pktq;
                break;
#endif

        default:
                /*
                 * Some other protocol - place on input queue for read().
                 */
                inq = &sc->sc_inq;
                pktq = NULL;
                break;
        }

        /*
         * Put the packet on the appropriate input queue.
         */
        s = splnet();

        /* pktq: inet or inet6 cases */
        if (__predict_true(pktq)) {
                if (__predict_false(!pktq_enqueue(pktq, m, 0))) {
                        splx(s);
                        if_statinc(ifp, if_iqdrops);
                        goto bad;
                }
                if_statadd2(ifp, if_ipackets, 1, if_ibytes, ilen);
                splx(s);
                return;
        }

        /* ifq: other protocol cases */
        if (!inq) {
                splx(s);
                goto bad;
        }
        if (IF_QFULL(inq)) {
                IF_DROP(inq);
                splx(s);
                if (sc->sc_flags & SC_DEBUG)
                        printf("%s: input queue full\n", ifp->if_xname);
                if_statinc(ifp, if_iqdrops);
                goto bad;
        }
        IF_ENQUEUE(inq, m);
        splx(s);
        if_statadd2(ifp, if_ipackets, 1, if_ibytes, ilen);

        (*sc->sc_ctlp)(sc);

        return;

bad:
        m_freem(m);
        if_statinc(&sc->sc_if, if_ierrors);
        sc->sc_stats.ppp_ierrors++;
}

#define MAX_DUMP_BYTES        128

static void
pppdumpm(struct mbuf *m0)
{
        char buf[3*MAX_DUMP_BYTES+4];
        char *bp = buf;
        struct mbuf *m;

        for (m = m0; m; m = m->m_next) {
                int l = m->m_len;
                u_char *rptr = (u_char *)m->m_data;

                while (l--) {
                        if (bp > buf + sizeof(buf) - 4)
                                goto done;
                        /* Convert byte to ascii hex */
                        *bp++ = hexdigits[*rptr >> 4];
                        *bp++ = hexdigits[*rptr++ & 0xf];
                }

                if (m->m_next) {
                        if (bp > buf + sizeof(buf) - 3)
                                goto done;
                        *bp++ = '|';
                } else
                        *bp++ = ' ';
        }
done:
        if (m)
                *bp++ = '>';
        *bp = 0;
        printf("%s\n", buf);
}

#ifdef ALTQ
/*
 * A wrapper to transmit a packet from if_start since ALTQ uses
 * if_start to send a packet.
 */
static void
ppp_ifstart(struct ifnet *ifp)
{
        struct ppp_softc *sc;

        sc = ifp->if_softc;
        (*sc->sc_start)(sc);
}
#endif

static const struct ppp_known_compressor {
        uint8_t code;
        const char *module;
} ppp_known_compressors[] = {
        { CI_DEFLATE, "ppp_deflate" },
        { CI_DEFLATE_DRAFT, "ppp_deflate" },
        { CI_BSD_COMPRESS, "ppp_bsdcomp" },
        { CI_MPPE, "ppp_mppe" },
        { 0, NULL }
};

static int
ppp_compressor_init(void)
{

        mutex_init(&ppp_compressors_mtx, MUTEX_DEFAULT, IPL_NONE);
        return 0;
}

static int
ppp_compressor_destroy(void)
{

        mutex_destroy(&ppp_compressors_mtx);
        return 0;
}

static void
ppp_compressor_rele(struct compressor *cp)
{

        mutex_enter(&ppp_compressors_mtx);
        --cp->comp_refcnt;
        mutex_exit(&ppp_compressors_mtx);
}

static struct compressor *
ppp_get_compressor_noload(uint8_t ci, bool hold)
{
        struct compressor *cp;

        KASSERT(mutex_owned(&ppp_compressors_mtx));
        LIST_FOREACH(cp, &ppp_compressors, comp_list) {
                if (cp->compress_proto == ci) {
                        if (hold)
                                ++cp->comp_refcnt;
                        return cp;
                }
        }

        return NULL;
}

static struct compressor *
ppp_get_compressor(uint8_t ci)
{
        struct compressor *cp = NULL;
        const struct ppp_known_compressor *pkc;

        mutex_enter(&ppp_compressors_mtx);
        cp = ppp_get_compressor_noload(ci, true);
        mutex_exit(&ppp_compressors_mtx);
        if (cp != NULL)
                return cp;

        kernconfig_lock();
        mutex_enter(&ppp_compressors_mtx);
        cp = ppp_get_compressor_noload(ci, true);
        mutex_exit(&ppp_compressors_mtx);
        if (cp == NULL) {
                /* Not found, so try to autoload a module */
                for (pkc = ppp_known_compressors; pkc->module != NULL; pkc++) {
                        if (pkc->code == ci) {
                                if (module_autoload(pkc->module,
                                        MODULE_CLASS_MISC) != 0)
                                        break;
                                mutex_enter(&ppp_compressors_mtx);
                                cp = ppp_get_compressor_noload(ci, true);
                                mutex_exit(&ppp_compressors_mtx);
                                break;
                        }
                }
        }
        kernconfig_unlock();

        return cp;
}

int
ppp_register_compressor(struct compressor *pc, size_t ncomp)
{
        int error = 0;
        size_t i;

        mutex_enter(&ppp_compressors_mtx);
        for (i = 0; i < ncomp; i++) {
                if (ppp_get_compressor_noload(pc[i].compress_proto,
                        false) != NULL)
                        error = EEXIST;
        }
        if (!error) {
                for (i = 0; i < ncomp; i++) {
                        pc[i].comp_refcnt = 0;
                        LIST_INSERT_HEAD(&ppp_compressors, &pc[i], comp_list);
                }
        }
        mutex_exit(&ppp_compressors_mtx);

        return error;
}

int
ppp_unregister_compressor(struct compressor *pc, size_t ncomp)
{
        int error = 0;
        size_t i;

        mutex_enter(&ppp_compressors_mtx);
        for (i = 0; i < ncomp; i++) {
                if (ppp_get_compressor_noload(pc[i].compress_proto,
                        false) != &pc[i])
                        error = ENOENT;
                else if (pc[i].comp_refcnt != 0)
                        error = EBUSY;
        }
        if (!error) {
                for (i = 0; i < ncomp; i++) {
                        LIST_REMOVE(&pc[i], comp_list);
                }
        }
        mutex_exit(&ppp_compressors_mtx);

        return error;
}

/*
 * Module infrastructure
 */
#include "if_module.h"

#ifdef PPP_FILTER
#define PPP_DEP "bpf_filter,"
#else
#define PPP_DEP
#endif

IF_MODULE(MODULE_CLASS_DRIVER, ppp, PPP_DEP "slcompress")





























































































































































































































































































































































































  438 












  439 


  439 


  437 

  439 

  438 

  439 












































  461 






  434 





  459 











  461 
   22 



   21 
  458 



  461 




  461 




  461 



  460 
  460 









   24 










   24 
   24 



   24 






   24 

   24 


    4 

    4 









































   23 
   23 










   24 
   24 











































































































































































































  461 





  462 


  462 
  463 


  463 


















  463 
  463 




  463 





















































































































































  463 
  462 

















































































































































































































































































  438 



  438 



  439 























  437 
  439 


  437 




























































  463 


























































  438 







  439 












   24 







   24 

























































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
/*        $NetBSD: bus_dma.c,v 1.89 2022/08/20 23:48:51 riastradh Exp $        */

/*-
 * Copyright (c) 1996, 1997, 1998, 2007, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace
 * Simulation Facility NASA Ames Research Center, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: bus_dma.c,v 1.89 2022/08/20 23:48:51 riastradh Exp $");

/*
 * The following is included because _bus_dma_uiomove is derived from
 * uiomove() in kern_subr.c.
 */

/*
 * Copyright (c) 1982, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Copyright (c) 1992, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This software was developed by the Computer Systems Engineering group
 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
 * contributed to Berkeley.
 *
 * All advertising materials mentioning features or use of this software
 * must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Lawrence Berkeley Laboratory.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include "ioapic.h"
#include "isa.h"
#include "opt_mpbios.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/asan.h>
#include <sys/msan.h>

#include <sys/bus.h>
#include <machine/bus_private.h>
#if NIOAPIC > 0
#include <machine/i82093var.h>
#endif
#ifdef MPBIOS
#include <machine/mpbiosvar.h>
#endif
#include <machine/pmap_private.h>

#if NISA > 0
#include <dev/isa/isareg.h>
#include <dev/isa/isavar.h>
#endif

#include <uvm/uvm.h>

extern        paddr_t avail_end;

#define        IDTVEC(name)        __CONCAT(X,name)
typedef void (vector)(void);
extern vector *IDTVEC(intr)[];

#define        BUSDMA_BOUNCESTATS

#ifdef BUSDMA_BOUNCESTATS
#define        BUSDMA_EVCNT_DECL(name)                                                \
static struct evcnt bus_dma_ev_##name =                                        \
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "bus_dma", #name);                \
EVCNT_ATTACH_STATIC(bus_dma_ev_##name)

#define        STAT_INCR(name)                                                        \
    bus_dma_ev_##name.ev_count++
#define        STAT_DECR(name)                                                        \
    bus_dma_ev_##name.ev_count--

BUSDMA_EVCNT_DECL(nbouncebufs);
BUSDMA_EVCNT_DECL(loads);
BUSDMA_EVCNT_DECL(bounces);
#else
#define STAT_INCR(x)
#define STAT_DECR(x)
#endif

static int        _bus_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t,
            bus_size_t, int, bus_dmamap_t *);
static void        _bus_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t);
static int        _bus_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *,
            bus_size_t, struct proc *, int);
static int        _bus_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t,
            struct mbuf *, int);
static int        _bus_dmamap_load_uio(bus_dma_tag_t, bus_dmamap_t,
            struct uio *, int);
static int        _bus_dmamap_load_raw(bus_dma_tag_t, bus_dmamap_t,
            bus_dma_segment_t *, int, bus_size_t, int);
static void        _bus_dmamap_unload(bus_dma_tag_t, bus_dmamap_t);
static void        _bus_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
            bus_size_t, int);

static int        _bus_dmamem_alloc(bus_dma_tag_t tag, bus_size_t size,
            bus_size_t alignment, bus_size_t boundary,
            bus_dma_segment_t *segs, int nsegs, int *rsegs, int flags);
static void        _bus_dmamem_free(bus_dma_tag_t tag, bus_dma_segment_t *segs,
            int nsegs);
static int        _bus_dmamem_map(bus_dma_tag_t tag, bus_dma_segment_t *segs,
            int nsegs, size_t size, void **kvap, int flags);
static void        _bus_dmamem_unmap(bus_dma_tag_t tag, void *kva, size_t size);
static paddr_t        _bus_dmamem_mmap(bus_dma_tag_t tag, bus_dma_segment_t *segs,
            int nsegs, off_t off, int prot, int flags);

static int        _bus_dmatag_subregion(bus_dma_tag_t tag, bus_addr_t min_addr,
            bus_addr_t max_addr, bus_dma_tag_t *newtag, int flags);
static void        _bus_dmatag_destroy(bus_dma_tag_t tag);

static int _bus_dma_uiomove(void *, struct uio *, size_t, int);
static int _bus_dma_alloc_bouncebuf(bus_dma_tag_t t, bus_dmamap_t map,
            bus_size_t size, int flags);
static void _bus_dma_free_bouncebuf(bus_dma_tag_t t, bus_dmamap_t map);
static int _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map,
            void *buf, bus_size_t buflen, struct vmspace *vm, int flags);
static int _bus_dmamap_load_busaddr(bus_dma_tag_t, bus_dmamap_t,
    bus_addr_t, bus_size_t);

#ifndef _BUS_DMAMEM_ALLOC_RANGE
static int        _bus_dmamem_alloc_range(bus_dma_tag_t tag, bus_size_t size,
            bus_size_t alignment, bus_size_t boundary,
            bus_dma_segment_t *segs, int nsegs, int *rsegs, int flags,
            bus_addr_t low, bus_addr_t high);

#define _BUS_DMAMEM_ALLOC_RANGE _bus_dmamem_alloc_range

/*
 * Allocate physical memory from the given physical address range.
 * Called by DMA-safe memory allocation methods.
 */
static int
_bus_dmamem_alloc_range(bus_dma_tag_t t, bus_size_t size,
    bus_size_t alignment, bus_size_t boundary, bus_dma_segment_t *segs,
    int nsegs, int *rsegs, int flags, bus_addr_t low, bus_addr_t high)
{
        paddr_t curaddr, lastaddr;
        struct vm_page *m;
        struct pglist mlist;
        int curseg, error;
        bus_size_t uboundary;

        /* Always round the size. */
        size = round_page(size);

        KASSERT(boundary >= PAGE_SIZE || boundary == 0);

        /*
         * Allocate pages from the VM system.
         * We accept boundaries < size, splitting in multiple segments
         * if needed. uvm_pglistalloc does not, so compute an appropriate
         * boundary: next power of 2 >= size
         */

        if (boundary == 0)
                uboundary = 0;
        else {
                uboundary = boundary;
                while (uboundary < size)
                        uboundary = uboundary << 1;
        }
        error = uvm_pglistalloc(size, low, high, alignment, uboundary,
            &mlist, nsegs, (flags & BUS_DMA_NOWAIT) == 0);
        if (error)
                return (error);

        /*
         * Compute the location, size, and number of segments actually
         * returned by the VM code.
         */
        m = TAILQ_FIRST(&mlist);
        curseg = 0;
        lastaddr = segs[curseg].ds_addr = VM_PAGE_TO_PHYS(m);
        segs[curseg].ds_len = PAGE_SIZE;
        m = m->pageq.queue.tqe_next;

        for (; m != NULL; m = m->pageq.queue.tqe_next) {
                curaddr = VM_PAGE_TO_PHYS(m);
                KASSERTMSG(curaddr >= low, "curaddr=%#"PRIxPADDR
                    " low=%#"PRIxBUSADDR" high=%#"PRIxBUSADDR,
                    curaddr, low, high);
                KASSERTMSG(curaddr < high, "curaddr=%#"PRIxPADDR
                    " low=%#"PRIxBUSADDR" high=%#"PRIxBUSADDR,
                    curaddr, low, high);
                if (curaddr == (lastaddr + PAGE_SIZE) &&
                    (lastaddr & boundary) == (curaddr & boundary)) {
                        segs[curseg].ds_len += PAGE_SIZE;
                } else {
                        curseg++;
                        KASSERTMSG(curseg < nsegs, "curseg %d size %llx",
                            curseg, (long long)size);
                        segs[curseg].ds_addr = curaddr;
                        segs[curseg].ds_len = PAGE_SIZE;
                }
                lastaddr = curaddr;
        }

        *rsegs = curseg + 1;

        return (0);
}
#endif /* _BUS_DMAMEM_ALLOC_RANGE */

/*
 * Create a DMA map.
 */
static int
_bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
    bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
{
        struct x86_bus_dma_cookie *cookie;
        bus_dmamap_t map;
        int error, cookieflags;
        void *cookiestore, *mapstore;
        size_t cookiesize, mapsize;

        /*
         * Allocate and initialize the DMA map.  The end of the map
         * is a variable-sized array of segments, so we allocate enough
         * room for them in one shot.
         *
         * Note we don't preserve the WAITOK or NOWAIT flags.  Preservation
         * of ALLOCNOW notifies others that we've reserved these resources,
         * and they are not to be freed.
         *
         * The bus_dmamap_t includes one bus_dma_segment_t, hence
         * the (nsegments - 1).
         */
        error = 0;
        mapsize = sizeof(struct x86_bus_dmamap) +
            (sizeof(bus_dma_segment_t) * (nsegments - 1));
        if ((mapstore = malloc(mapsize, M_DMAMAP, M_ZERO |
            ((flags & BUS_DMA_NOWAIT) ? M_NOWAIT : M_WAITOK))) == NULL)
                return (ENOMEM);

        map = (struct x86_bus_dmamap *)mapstore;
        map->_dm_size = size;
        map->_dm_segcnt = nsegments;
        map->_dm_maxmaxsegsz = maxsegsz;
        map->_dm_boundary = boundary;
        map->_dm_bounce_thresh = t->_bounce_thresh;
        map->_dm_flags = flags & ~(BUS_DMA_WAITOK|BUS_DMA_NOWAIT);
        map->dm_maxsegsz = maxsegsz;
        map->dm_mapsize = 0;                /* no valid mappings */
        map->dm_nsegs = 0;

        if (t->_bounce_thresh == 0 || _BUS_AVAIL_END <= t->_bounce_thresh - 1)
                map->_dm_bounce_thresh = 0;
        cookieflags = 0;

        if (t->_may_bounce != NULL) {
                error = t->_may_bounce(t, map, flags, &cookieflags);
                if (error != 0)
                        goto out;
        }

        if (map->_dm_bounce_thresh != 0)
                cookieflags |= X86_DMA_MIGHT_NEED_BOUNCE;

        if ((cookieflags & X86_DMA_MIGHT_NEED_BOUNCE) == 0) {
                *dmamp = map;
                return 0;
        }

        cookiesize = sizeof(struct x86_bus_dma_cookie) +
            (sizeof(bus_dma_segment_t) * map->_dm_segcnt);

        /*
         * Allocate our cookie.
         */
        if ((cookiestore = malloc(cookiesize, M_DMAMAP, M_ZERO |
            ((flags & BUS_DMA_NOWAIT) ? M_NOWAIT : M_WAITOK))) == NULL) {
                error = ENOMEM;
                goto out;
        }
        cookie = (struct x86_bus_dma_cookie *)cookiestore;
        cookie->id_flags = cookieflags;
        map->_dm_cookie = cookie;

        error = _bus_dma_alloc_bouncebuf(t, map, size, flags);
 out:
        if (error)
                _bus_dmamap_destroy(t, map);
        else
                *dmamp = map;

        return (error);
}

/*
 * Destroy a DMA map.
 */
static void
_bus_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map)
{
        struct x86_bus_dma_cookie *cookie = map->_dm_cookie;

        /*
         * Free any bounce pages this map might hold.
         */
        if (cookie != NULL) {
                if (cookie->id_flags & X86_DMA_HAS_BOUNCE)
                        _bus_dma_free_bouncebuf(t, map);
                free(cookie, M_DMAMAP);
        }

        free(map, M_DMAMAP);
}

/*
 * Load a DMA map with a linear buffer.
 */
static int
_bus_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
    bus_size_t buflen, struct proc *p, int flags)
{
        struct x86_bus_dma_cookie *cookie = map->_dm_cookie;
        int error;
        struct vmspace *vm;

        STAT_INCR(loads);

        /*
         * Make sure that on error condition we return "no valid mappings."
         */
        map->dm_mapsize = 0;
        map->dm_nsegs = 0;
        KASSERT(map->dm_maxsegsz <= map->_dm_maxmaxsegsz);

        if (buflen > map->_dm_size)
                return EINVAL;

        if (p != NULL) {
                vm = p->p_vmspace;
        } else {
                vm = vmspace_kernel();
        }
        error = _bus_dmamap_load_buffer(t, map, buf, buflen, vm, flags);
        if (error == 0) {
                if (cookie != NULL)
                        cookie->id_flags &= ~X86_DMA_IS_BOUNCING;
                map->dm_mapsize = buflen;
                return 0;
        }

        if (cookie == NULL ||
            (cookie->id_flags & X86_DMA_MIGHT_NEED_BOUNCE) == 0)
                return error;

        /*
         * First attempt failed; bounce it.
         */

        STAT_INCR(bounces);

        /*
         * Allocate bounce pages, if necessary.
         */
        if ((cookie->id_flags & X86_DMA_HAS_BOUNCE) == 0) {
                error = _bus_dma_alloc_bouncebuf(t, map, buflen, flags);
                if (error)
                        return (error);
        }

        /*
         * Cache a pointer to the caller's buffer and load the DMA map
         * with the bounce buffer.
         */
        cookie->id_origbuf = buf;
        cookie->id_origbuflen = buflen;
        cookie->id_buftype = X86_DMA_BUFTYPE_LINEAR;
        map->dm_nsegs = 0;
        error = bus_dmamap_load(t, map, cookie->id_bouncebuf, buflen,
            p, flags);
        if (error)
                return (error);

        /* ...so _bus_dmamap_sync() knows we're bouncing */
        cookie->id_flags |= X86_DMA_IS_BOUNCING;
        return (0);
}

static int
_bus_dmamap_load_busaddr(bus_dma_tag_t t, bus_dmamap_t map,
    bus_addr_t addr, bus_size_t size)
{
        bus_dma_segment_t * const segs = map->dm_segs;
        int nseg = map->dm_nsegs;
        bus_addr_t bmask = ~(map->_dm_boundary - 1);
        bus_addr_t lastaddr = 0xdead; /* XXX gcc */
        bus_size_t sgsize;

        if (nseg > 0)
                lastaddr = segs[nseg-1].ds_addr + segs[nseg-1].ds_len;
again:
        sgsize = size;
        /*
         * Make sure we don't cross any boundaries.
         */
        if (map->_dm_boundary > 0) {
                bus_addr_t baddr; /* next boundary address */

                baddr = (addr + map->_dm_boundary) & bmask;
                if (sgsize > (baddr - addr))
                        sgsize = (baddr - addr);
        }

        /*
         * Insert chunk into a segment, coalescing with
         * previous segment if possible.
         */
        if (nseg > 0 && addr == lastaddr &&
            segs[nseg-1].ds_len + sgsize <= map->dm_maxsegsz &&
            (map->_dm_boundary == 0 ||
             (segs[nseg-1].ds_addr & bmask) == (addr & bmask))) {
                /* coalesce */
                segs[nseg-1].ds_len += sgsize;
        } else if (nseg >= map->_dm_segcnt) {
                return EFBIG;
        } else {
                /* new segment */
                segs[nseg].ds_addr = addr;
                segs[nseg].ds_len = sgsize;
                nseg++;
        }

        lastaddr = addr + sgsize;
        if (map->_dm_bounce_thresh != 0 && lastaddr > map->_dm_bounce_thresh)
                return EINVAL;

        addr += sgsize;
        size -= sgsize;
        if (size > 0)
                goto again;

        map->dm_nsegs = nseg;
        return 0;
}

/*
 * Like _bus_dmamap_load(), but for mbufs.
 */
static int
_bus_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map, struct mbuf *m0,
    int flags)
{
        struct x86_bus_dma_cookie *cookie = map->_dm_cookie;
        int error;
        struct mbuf *m;

        /*
         * Make sure on error condition we return "no valid mappings."
         */
        map->dm_mapsize = 0;
        map->dm_nsegs = 0;
        KASSERT(map->dm_maxsegsz <= map->_dm_maxmaxsegsz);

        KASSERT(m0->m_flags & M_PKTHDR);
        if (m0->m_pkthdr.len > map->_dm_size)
                return (EINVAL);

        error = 0;
        for (m = m0; m != NULL && error == 0; m = m->m_next) {
                int offset;
                int remainbytes;
                const struct vm_page * const *pgs;
                paddr_t paddr;
                int size;

                if (m->m_len == 0)
                        continue;
                switch (m->m_flags & (M_EXT|M_EXT_CLUSTER|M_EXT_PAGES)) {
                case M_EXT|M_EXT_CLUSTER:
                        /* XXX KDASSERT */
                        KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID);
                        paddr = m->m_ext.ext_paddr +
                            (m->m_data - m->m_ext.ext_buf);
                        size = m->m_len;
                        error = _bus_dmamap_load_busaddr(t, map,
                            _BUS_PHYS_TO_BUS(paddr), size);
                        break;

                case M_EXT|M_EXT_PAGES:
                        KASSERT(m->m_ext.ext_buf <= m->m_data);
                        KASSERT(m->m_data <=
                            m->m_ext.ext_buf + m->m_ext.ext_size);

                        offset = (vaddr_t)m->m_data -
                            trunc_page((vaddr_t)m->m_ext.ext_buf);
                        remainbytes = m->m_len;

                        /* skip uninteresting pages */
                        pgs = (const struct vm_page * const *)
                            m->m_ext.ext_pgs + (offset >> PAGE_SHIFT);

                        offset &= PAGE_MASK; /* offset in the first page */

                        /* load each pages */
                        while (remainbytes > 0) {
                                const struct vm_page *pg;
                                bus_addr_t busaddr;

                                size = MIN(remainbytes, PAGE_SIZE - offset);

                                pg = *pgs++;
                                KASSERT(pg);
                                busaddr = _BUS_VM_PAGE_TO_BUS(pg) + offset;

                                error = _bus_dmamap_load_busaddr(t, map,
                                    busaddr, size);
                                if (error)
                                        break;
                                offset = 0;
                                remainbytes -= size;
                        }
                        break;

                case 0:
                        paddr = m->m_paddr + M_BUFOFFSET(m) +
                            (m->m_data - M_BUFADDR(m));
                        size = m->m_len;
                        error = _bus_dmamap_load_busaddr(t, map,
                            _BUS_PHYS_TO_BUS(paddr), size);
                        break;

                default:
                        error = _bus_dmamap_load_buffer(t, map, m->m_data,
                            m->m_len, vmspace_kernel(), flags);
                }
        }
        if (error == 0) {
                map->dm_mapsize = m0->m_pkthdr.len;
                return 0;
        }

        map->dm_nsegs = 0;

        if (cookie == NULL ||
            (cookie->id_flags & X86_DMA_MIGHT_NEED_BOUNCE) == 0)
                return error;

        /*
         * First attempt failed; bounce it.
         */

        STAT_INCR(bounces);

        /*
         * Allocate bounce pages, if necessary.
         */
        if ((cookie->id_flags & X86_DMA_HAS_BOUNCE) == 0) {
                error = _bus_dma_alloc_bouncebuf(t, map, m0->m_pkthdr.len,
                    flags);
                if (error)
                        return (error);
        }

        /*
         * Cache a pointer to the caller's buffer and load the DMA map
         * with the bounce buffer.
         */
        cookie->id_origbuf = m0;
        cookie->id_origbuflen = m0->m_pkthdr.len;        /* not really used */
        cookie->id_buftype = X86_DMA_BUFTYPE_MBUF;
        error = bus_dmamap_load(t, map, cookie->id_bouncebuf,
            m0->m_pkthdr.len, NULL, flags);
        if (error)
                return (error);

        /* ...so _bus_dmamap_sync() knows we're bouncing */
        cookie->id_flags |= X86_DMA_IS_BOUNCING;
        return (0);
}

/*
 * Like _bus_dmamap_load(), but for uios.
 */
static int
_bus_dmamap_load_uio(bus_dma_tag_t t, bus_dmamap_t map, struct uio *uio,
    int flags)
{
        int i, error;
        bus_size_t minlen, resid;
        struct vmspace *vm;
        struct iovec *iov;
        void *addr;
        struct x86_bus_dma_cookie *cookie = map->_dm_cookie;

        /*
         * Make sure that on error condition we return "no valid mappings."
         */
        map->dm_mapsize = 0;
        map->dm_nsegs = 0;
        KASSERT(map->dm_maxsegsz <= map->_dm_maxmaxsegsz);

        resid = uio->uio_resid;
        iov = uio->uio_iov;

        vm = uio->uio_vmspace;

        error = 0;
        for (i = 0; i < uio->uio_iovcnt && resid != 0 && error == 0; i++) {
                /*
                 * Now at the first iovec to load.  Load each iovec
                 * until we have exhausted the residual count.
                 */
                minlen = resid < iov[i].iov_len ? resid : iov[i].iov_len;
                addr = (void *)iov[i].iov_base;

                error = _bus_dmamap_load_buffer(t, map, addr, minlen,
                    vm, flags);

                resid -= minlen;
        }
        if (error == 0) {
                map->dm_mapsize = uio->uio_resid;
                return 0;
        }

        map->dm_nsegs = 0;

        if (cookie == NULL ||
            (cookie->id_flags & X86_DMA_MIGHT_NEED_BOUNCE) == 0)
                return error;

        STAT_INCR(bounces);

        /*
         * Allocate bounce pages, if necessary.
         */
        if ((cookie->id_flags & X86_DMA_HAS_BOUNCE) == 0) {
                error = _bus_dma_alloc_bouncebuf(t, map, uio->uio_resid,
                    flags);
                if (error)
                        return (error);
        }

        /*
         * Cache a pointer to the caller's buffer and load the DMA map
         * with the bounce buffer.
         */
        cookie->id_origbuf = uio;
        cookie->id_origbuflen = uio->uio_resid;
        cookie->id_buftype = X86_DMA_BUFTYPE_UIO;
        error = bus_dmamap_load(t, map, cookie->id_bouncebuf,
            uio->uio_resid, NULL, flags);
        if (error)
                return (error);

        /* ...so _bus_dmamap_sync() knows we're bouncing */
        cookie->id_flags |= X86_DMA_IS_BOUNCING;
        return (0);
}

/*
 * Like _bus_dmamap_load(), but for raw memory allocated with
 * bus_dmamem_alloc().
 */
static int
_bus_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t map,
    bus_dma_segment_t *segs, int nsegs, bus_size_t size0, int flags)
{
        bus_size_t size;
        int i, error = 0;

        /*
         * Make sure that on error condition we return "no valid mappings."
         */
        map->dm_mapsize = 0;
        map->dm_nsegs = 0;
        KASSERT(map->dm_maxsegsz <= map->_dm_maxmaxsegsz);

        if (size0 > map->_dm_size)
                return EINVAL;

        for (i = 0, size = size0; i < nsegs && size > 0; i++) {
                bus_dma_segment_t *ds = &segs[i];
                bus_size_t sgsize;

                sgsize = MIN(ds->ds_len, size);
                if (sgsize == 0)
                        continue;
                error = _bus_dmamap_load_busaddr(t, map, ds->ds_addr, sgsize);
                if (error != 0)
                        break;
                size -= sgsize;
        }

        if (error != 0) {
                map->dm_mapsize = 0;
                map->dm_nsegs = 0;
                return error;
        }

        /* XXX TBD bounce */

        map->dm_mapsize = size0;
        return 0;
}

/*
 * Unload a DMA map.
 */
static void
_bus_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
{
        struct x86_bus_dma_cookie *cookie = map->_dm_cookie;

        /*
         * If we have bounce pages, free them, unless they're
         * reserved for our exclusive use.
         */
        if (cookie != NULL) {
                cookie->id_flags &= ~X86_DMA_IS_BOUNCING;
                cookie->id_buftype = X86_DMA_BUFTYPE_INVALID;
        }
        map->dm_maxsegsz = map->_dm_maxmaxsegsz;
        map->dm_mapsize = 0;
        map->dm_nsegs = 0;
}

/*
 * Synchronize a DMA map.
 *
 * Reference:
 *
 *        AMD64 Architecture Programmer's Manual, Volume 2: System
 *        Programming, 24593--Rev. 3.38--November 2021, Sec. 7.4.2 Memory
 *        Barrier Interaction with Memory Types, Table 7-3, p. 196.
 *        https://web.archive.org/web/20220625040004/https://www.amd.com/system/files/TechDocs/24593.pdf#page=256
 */
static void
_bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t offset,
    bus_size_t len, int ops)
{
        struct x86_bus_dma_cookie *cookie = map->_dm_cookie;

        /*
         * Mixing PRE and POST operations is not allowed.
         */
        if ((ops & (BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE)) != 0 &&
            (ops & (BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE)) != 0)
                panic("%s: mix PRE and POST", __func__);

        if ((ops & (BUS_DMASYNC_PREWRITE|BUS_DMASYNC_POSTREAD)) != 0) {
                KASSERTMSG(offset < map->dm_mapsize,
                    "bad offset 0x%"PRIxBUSADDR" >= 0x%"PRIxBUSSIZE,
                    offset, map->dm_mapsize);
                KASSERTMSG(len <= map->dm_mapsize - offset,
                    "bad length 0x%"PRIxBUSADDR" + %"PRIxBUSSIZE
                    " > %"PRIxBUSSIZE,
                    offset, len, map->dm_mapsize);
        }

        /*
         * BUS_DMASYNC_POSTREAD: The caller has been alerted to DMA
         * completion by reading a register or DMA descriptor, and the
         * caller is about to read out of the DMA memory buffer that
         * the device just filled.
         *
         * => LFENCE ensures that these happen in order so that the
         *    caller, or the bounce buffer logic here, doesn't proceed
         *    to read any stale data from cache or speculation.  x86
         *    never reorders loads from wp/wt/wb or uc memory, but it
         *    may execute loads from wc/wc+ memory early, e.g. with
         *    BUS_SPACE_MAP_PREFETCHABLE.
         */
        if (ops & BUS_DMASYNC_POSTREAD)
                x86_lfence();

        /*
         * If we're not bouncing, just return; nothing to do.
         */
        if (len == 0 || cookie == NULL ||
            (cookie->id_flags & X86_DMA_IS_BOUNCING) == 0)
                goto end;

        switch (cookie->id_buftype) {
        case X86_DMA_BUFTYPE_LINEAR:
                /*
                 * Nothing to do for pre-read.
                 */

                if (ops & BUS_DMASYNC_PREWRITE) {
                        /*
                         * Copy the caller's buffer to the bounce buffer.
                         */
                        memcpy((char *)cookie->id_bouncebuf + offset,
                            (char *)cookie->id_origbuf + offset, len);
                }

                if (ops & BUS_DMASYNC_POSTREAD) {
                        /*
                         * Copy the bounce buffer to the caller's buffer.
                         */
                        memcpy((char *)cookie->id_origbuf + offset,
                            (char *)cookie->id_bouncebuf + offset, len);
                }

                /*
                 * Nothing to do for post-write.
                 */
                break;

        case X86_DMA_BUFTYPE_MBUF:
            {
                struct mbuf *m, *m0 = cookie->id_origbuf;
                bus_size_t minlen, moff;

                /*
                 * Nothing to do for pre-read.
                 */

                if (ops & BUS_DMASYNC_PREWRITE) {
                        /*
                         * Copy the caller's buffer to the bounce buffer.
                         */
                        m_copydata(m0, offset, len,
                            (char *)cookie->id_bouncebuf + offset);
                }

                if (ops & BUS_DMASYNC_POSTREAD) {
                        /*
                         * Copy the bounce buffer to the caller's buffer.
                         */
                        for (moff = offset, m = m0; m != NULL && len != 0;
                             m = m->m_next) {
                                /* Find the beginning mbuf. */
                                if (moff >= m->m_len) {
                                        moff -= m->m_len;
                                        continue;
                                }

                                /*
                                 * Now at the first mbuf to sync; nail
                                 * each one until we have exhausted the
                                 * length.
                                 */
                                minlen = len < m->m_len - moff ?
                                    len : m->m_len - moff;

                                memcpy(mtod(m, char *) + moff,
                                    (char *)cookie->id_bouncebuf + offset,
                                    minlen);

                                moff = 0;
                                len -= minlen;
                                offset += minlen;
                        }
                }

                /*
                 * Nothing to do for post-write.
                 */
                break;
            }
        case X86_DMA_BUFTYPE_UIO:
            {
                struct uio *uio;

                uio = (struct uio *)cookie->id_origbuf;

                /*
                 * Nothing to do for pre-read.
                 */

                if (ops & BUS_DMASYNC_PREWRITE) {
                        /*
                         * Copy the caller's buffer to the bounce buffer.
                         */
                        _bus_dma_uiomove((char *)cookie->id_bouncebuf + offset,
                            uio, len, UIO_WRITE);
                }

                if (ops & BUS_DMASYNC_POSTREAD) {
                        _bus_dma_uiomove((char *)cookie->id_bouncebuf + offset,
                            uio, len, UIO_READ);
                }

                /*
                 * Nothing to do for post-write.
                 */
                break;
            }

        case X86_DMA_BUFTYPE_RAW:
                panic("%s: X86_DMA_BUFTYPE_RAW", __func__);
                break;

        case X86_DMA_BUFTYPE_INVALID:
                panic("%s: X86_DMA_BUFTYPE_INVALID", __func__);
                break;

        default:
                panic("%s: unknown buffer type %d", __func__,
                    cookie->id_buftype);
                break;
        }
end:
        /*
         * BUS_DMASYNC_PREREAD: The caller may have previously been
         * using a DMA memory buffer, with loads and stores, and is
         * about to trigger DMA by writing to a register or DMA
         * descriptor.
         *
         * => SFENCE ensures that the stores happen in order, in case
         *    the latter one is non-temporal or to wc/wc+ memory and
         *    thus may be executed early.  x86 never reorders
         *    load;store to store;load for any memory type, so no
         *    barrier is needed for prior loads.
         *
         * BUS_DMASYNC_PREWRITE: The caller has just written to a DMA
         * memory buffer, or we just wrote to to the bounce buffer,
         * data that the device needs to use, and the caller is about
         * to trigger DMA by writing to a register or DMA descriptor.
         *
         * => SFENCE ensures that these happen in order so that any
         *    buffered stores are visible to the device before the DMA
         *    is triggered.  x86 never reorders (non-temporal) stores
         *    to wp/wt/wb or uc memory, but it may reorder two stores
         *    if one is to wc/wc+ memory, e.g. if the DMA descriptor is
         *    mapped with BUS_SPACE_MAP_PREFETCHABLE.
         */
        if (ops & (BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE))
                x86_sfence();

        /*
         * BUS_DMASYNC_POSTWRITE: The caller has been alerted to DMA
         * completion by reading a register or DMA descriptor, and the
         * caller may proceed to reuse the DMA memory buffer, with
         * loads and stores.
         *
         * => No barrier is needed.  Since the DMA memory buffer is not
         *    changing (we're sending data to the device, not receiving
         *    data from the device), prefetched loads are safe.  x86
         *    never reoreders load;store to store;load for any memory
         *    type, so early execution of stores prior to witnessing
         *    the DMA completion is not possible.
         */
}

/*
 * Allocate memory safe for DMA.
 */
static int
_bus_dmamem_alloc(bus_dma_tag_t t, bus_size_t size, bus_size_t alignment,
    bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs,
    int flags)
{
        bus_addr_t high;

        if (t->_bounce_alloc_hi != 0 && _BUS_AVAIL_END > t->_bounce_alloc_hi - 1)
                high = t->_bounce_alloc_hi - 1;
        else
                high = _BUS_AVAIL_END;

        return (_BUS_DMAMEM_ALLOC_RANGE(t, size, alignment, boundary,
            segs, nsegs, rsegs, flags, t->_bounce_alloc_lo, high));
}

static int
_bus_dma_alloc_bouncebuf(bus_dma_tag_t t, bus_dmamap_t map,
    bus_size_t size, int flags)
{
        struct x86_bus_dma_cookie *cookie = map->_dm_cookie;
        int error = 0;

        KASSERT(cookie != NULL);

        cookie->id_bouncebuflen = round_page(size);
        error = _bus_dmamem_alloc(t, cookie->id_bouncebuflen,
            PAGE_SIZE, map->_dm_boundary, cookie->id_bouncesegs,
            map->_dm_segcnt, &cookie->id_nbouncesegs, flags);
        if (error) {
                cookie->id_bouncebuflen = 0;
                cookie->id_nbouncesegs = 0;
                return error;
        }

        error = _bus_dmamem_map(t, cookie->id_bouncesegs,
            cookie->id_nbouncesegs, cookie->id_bouncebuflen,
            (void **)&cookie->id_bouncebuf, flags);

        if (error) {
                _bus_dmamem_free(t, cookie->id_bouncesegs,
                    cookie->id_nbouncesegs);
                cookie->id_bouncebuflen = 0;
                cookie->id_nbouncesegs = 0;
        } else {
                cookie->id_flags |= X86_DMA_HAS_BOUNCE;
                STAT_INCR(nbouncebufs);
        }

        return (error);
}

static void
_bus_dma_free_bouncebuf(bus_dma_tag_t t, bus_dmamap_t map)
{
        struct x86_bus_dma_cookie *cookie = map->_dm_cookie;

        KASSERT(cookie != NULL);

        STAT_DECR(nbouncebufs);

        _bus_dmamem_unmap(t, cookie->id_bouncebuf, cookie->id_bouncebuflen);
        _bus_dmamem_free(t, cookie->id_bouncesegs,
            cookie->id_nbouncesegs);
        cookie->id_bouncebuflen = 0;
        cookie->id_nbouncesegs = 0;
        cookie->id_flags &= ~X86_DMA_HAS_BOUNCE;
}


/*
 * This function does the same as uiomove, but takes an explicit
 * direction, and does not update the uio structure.
 */
static int
_bus_dma_uiomove(void *buf, struct uio *uio, size_t n, int direction)
{
        struct iovec *iov;
        int error;
        struct vmspace *vm;
        char *cp;
        size_t resid, cnt;
        int i;

        iov = uio->uio_iov;
        vm = uio->uio_vmspace;
        cp = buf;
        resid = n;

        for (i = 0; i < uio->uio_iovcnt && resid > 0; i++) {
                iov = &uio->uio_iov[i];
                if (iov->iov_len == 0)
                        continue;
                cnt = MIN(resid, iov->iov_len);

                if (!VMSPACE_IS_KERNEL_P(vm)) {
                        preempt_point();
                }
                if (direction == UIO_READ) {
                        error = copyout_vmspace(vm, cp, iov->iov_base, cnt);
                } else {
                        error = copyin_vmspace(vm, iov->iov_base, cp, cnt);
                }
                if (error)
                        return (error);
                cp += cnt;
                resid -= cnt;
        }
        return (0);
}

/*
 * Common function for freeing DMA-safe memory.  May be called by
 * bus-specific DMA memory free functions.
 */
static void
_bus_dmamem_free(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs)
{
        struct vm_page *m;
        bus_addr_t addr;
        struct pglist mlist;
        int curseg;

        /*
         * Build a list of pages to free back to the VM system.
         */
        TAILQ_INIT(&mlist);
        for (curseg = 0; curseg < nsegs; curseg++) {
                for (addr = segs[curseg].ds_addr;
                    addr < (segs[curseg].ds_addr + segs[curseg].ds_len);
                    addr += PAGE_SIZE) {
                        m = _BUS_BUS_TO_VM_PAGE(addr);
                        TAILQ_INSERT_TAIL(&mlist, m, pageq.queue);
                }
        }

        uvm_pglistfree(&mlist);
}

/*
 * Common function for mapping DMA-safe memory.  May be called by
 * bus-specific DMA memory map functions.
 * This supports BUS_DMA_NOCACHE.
 */
static int
_bus_dmamem_map(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs,
    size_t size, void **kvap, int flags)
{
        vaddr_t va;
        bus_addr_t addr;
        int curseg;
        const uvm_flag_t kmflags =
            (flags & BUS_DMA_NOWAIT) != 0 ? UVM_KMF_NOWAIT : 0;
        u_int pmapflags = PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE;

        size = round_page(size);
        if (flags & BUS_DMA_NOCACHE)
                pmapflags |= PMAP_NOCACHE;

        va = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_VAONLY | kmflags);

        if (va == 0)
                return ENOMEM;

        *kvap = (void *)va;

        for (curseg = 0; curseg < nsegs; curseg++) {
                for (addr = segs[curseg].ds_addr;
                    addr < (segs[curseg].ds_addr + segs[curseg].ds_len);
                    addr += PAGE_SIZE, va += PAGE_SIZE, size -= PAGE_SIZE) {
                        if (size == 0)
                                panic("_bus_dmamem_map: size botch");
                        _BUS_PMAP_ENTER(pmap_kernel(), va, addr,
                            VM_PROT_READ | VM_PROT_WRITE,
                            pmapflags);
                }
        }
        pmap_update(pmap_kernel());

        return 0;
}

/*
 * Common function for unmapping DMA-safe memory.  May be called by
 * bus-specific DMA memory unmapping functions.
 */

static void
_bus_dmamem_unmap(bus_dma_tag_t t, void *kva, size_t size)
{
        pt_entry_t *pte, opte;
        vaddr_t va, sva, eva;

        KASSERTMSG(((uintptr_t)kva & PGOFSET) == 0, "kva=%p", kva);

        size = round_page(size);
        sva = (vaddr_t)kva;
        eva = sva + size;

        /*
         * mark pages cacheable again.
         */
        for (va = sva; va < eva; va += PAGE_SIZE) {
                pte = kvtopte(va);
                opte = *pte;
                if ((opte & PTE_PCD) != 0)
                        pmap_pte_clearbits(pte, PTE_PCD);
        }
        pmap_remove(pmap_kernel(), (vaddr_t)kva, (vaddr_t)kva + size);
        pmap_update(pmap_kernel());
        uvm_km_free(kernel_map, (vaddr_t)kva, size, UVM_KMF_VAONLY);
}

/*
 * Common function for mmap(2)'ing DMA-safe memory.  May be called by
 * bus-specific DMA mmap(2)'ing functions.
 */
static paddr_t
_bus_dmamem_mmap(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs,
    off_t off, int prot, int flags)
{
        int i;

        for (i = 0; i < nsegs; i++) {
                KASSERTMSG((off & PGOFSET) == 0, "off=0x%jx", (uintmax_t)off);
                KASSERTMSG((segs[i].ds_addr & PGOFSET) == 0,
                    "segs[%u].ds_addr=%"PRIxBUSADDR, i, segs[i].ds_addr);
                KASSERTMSG((segs[i].ds_len & PGOFSET) == 0,
                    "segs[%u].ds_len=%"PRIxBUSSIZE, i, segs[i].ds_len);
                if (off >= segs[i].ds_len) {
                        off -= segs[i].ds_len;
                        continue;
                }

                return (x86_btop(_BUS_BUS_TO_PHYS(segs[i].ds_addr + off)));
        }

        /* Page not found. */
        return (-1);
}

/**********************************************************************
 * DMA utility functions
 **********************************************************************/

/*
 * Utility function to load a linear buffer.
 */
static int
_bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
    bus_size_t buflen, struct vmspace *vm, int flags)
{
        bus_size_t sgsize;
        bus_addr_t curaddr;
        vaddr_t vaddr = (vaddr_t)buf;
        pmap_t pmap;

        if (vm != NULL)
                pmap = vm_map_pmap(&vm->vm_map);
        else
                pmap = pmap_kernel();

        while (buflen > 0) {
                int error;

                /*
                 * Get the bus address for this segment.
                 */
                curaddr = _BUS_VIRT_TO_BUS(pmap, vaddr);

                /*
                 * Compute the segment size, and adjust counts.
                 */
                sgsize = PAGE_SIZE - ((u_long)vaddr & PGOFSET);
                if (buflen < sgsize)
                        sgsize = buflen;

                /*
                 * If we're beyond the bounce threshold, notify
                 * the caller.
                 */
                if (map->_dm_bounce_thresh != 0 &&
                    curaddr + sgsize >= map->_dm_bounce_thresh)
                        return (EINVAL);


                error = _bus_dmamap_load_busaddr(t, map, curaddr, sgsize);
                if (error)
                        return error;

                vaddr += sgsize;
                buflen -= sgsize;
        }

        return (0);
}

static int
_bus_dmatag_subregion(bus_dma_tag_t tag, bus_addr_t min_addr,
                      bus_addr_t max_addr, bus_dma_tag_t *newtag, int flags)
{

        if ((tag->_bounce_thresh != 0   && max_addr >= tag->_bounce_thresh - 1) &&
            (tag->_bounce_alloc_hi != 0 && max_addr >= tag->_bounce_alloc_hi - 1) &&
            (min_addr <= tag->_bounce_alloc_lo)) {
                *newtag = tag;
                /* if the tag must be freed, add a reference */
                if (tag->_tag_needs_free)
                        (tag->_tag_needs_free)++;
                return 0;
        }

        if ((*newtag = malloc(sizeof(struct x86_bus_dma_tag), M_DMAMAP,
            (flags & BUS_DMA_NOWAIT) ? M_NOWAIT : M_WAITOK)) == NULL)
                return ENOMEM;

        **newtag = *tag;
        (*newtag)->_tag_needs_free = 1;

        if (tag->_bounce_thresh == 0 || max_addr < tag->_bounce_thresh)
                (*newtag)->_bounce_thresh = max_addr;
        if (tag->_bounce_alloc_hi == 0 || max_addr < tag->_bounce_alloc_hi)
                (*newtag)->_bounce_alloc_hi = max_addr;
        if (min_addr > tag->_bounce_alloc_lo)
                (*newtag)->_bounce_alloc_lo = min_addr;

        return 0;
}

static void
_bus_dmatag_destroy(bus_dma_tag_t tag)
{

        switch (tag->_tag_needs_free) {
        case 0:
                break;                                /* not allocated with malloc */
        case 1:
                free(tag, M_DMAMAP);                /* last reference to tag */
                break;
        default:
                (tag->_tag_needs_free)--;        /* one less reference */
        }
}


void
bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t p, bus_addr_t o, bus_size_t l,
                int ops)
{
        bus_dma_tag_t it;

        kasan_dma_sync(p, o, l, ops);
        kmsan_dma_sync(p, o, l, ops);

        if ((t->bdt_exists & BUS_DMAMAP_OVERRIDE_SYNC) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bdt_super) {
                if ((it->bdt_present & BUS_DMAMAP_OVERRIDE_SYNC) == 0)
                        continue;
                (*it->bdt_ov->ov_dmamap_sync)(it->bdt_ctx, t, p, o,
                    l, ops);
                return;
        }

        _bus_dmamap_sync(t, p, o, l, ops);
}

int
bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
                  bus_size_t maxsegsz, bus_size_t boundary, int flags,
                  bus_dmamap_t *dmamp)
{
        bus_dma_tag_t it;

        if ((t->bdt_exists & BUS_DMAMAP_OVERRIDE_CREATE) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bdt_super) {
                if ((it->bdt_present & BUS_DMAMAP_OVERRIDE_CREATE) == 0)
                        continue;
                return (*it->bdt_ov->ov_dmamap_create)(it->bdt_ctx, t, size,
                    nsegments, maxsegsz, boundary, flags, dmamp);
        }

        return _bus_dmamap_create(t, size, nsegments, maxsegsz,
            boundary, flags, dmamp);
}

void
bus_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t dmam)
{
        bus_dma_tag_t it;

        if ((t->bdt_exists & BUS_DMAMAP_OVERRIDE_DESTROY) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bdt_super) {
                if ((it->bdt_present & BUS_DMAMAP_OVERRIDE_DESTROY) == 0)
                        continue;
                (*it->bdt_ov->ov_dmamap_destroy)(it->bdt_ctx, t, dmam);
                return;
        }

        _bus_dmamap_destroy(t, dmam);
}

int
bus_dmamap_load(bus_dma_tag_t t, bus_dmamap_t dmam, void *buf,
                bus_size_t buflen, struct proc *p, int flags)
{
        bus_dma_tag_t it;

        kasan_dma_load(dmam, buf, buflen, KASAN_DMA_LINEAR);
        kmsan_dma_load(dmam, buf, buflen, KMSAN_DMA_LINEAR);

        if ((t->bdt_exists & BUS_DMAMAP_OVERRIDE_LOAD) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bdt_super) {
                if ((it->bdt_present & BUS_DMAMAP_OVERRIDE_LOAD) == 0)
                        continue;
                return (*it->bdt_ov->ov_dmamap_load)(it->bdt_ctx, t, dmam,
                    buf, buflen, p, flags);
        }

        return _bus_dmamap_load(t, dmam, buf, buflen, p, flags);
}

int
bus_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t dmam,
                     struct mbuf *chain, int flags)
{
        bus_dma_tag_t it;

        kasan_dma_load(dmam, chain, 0, KASAN_DMA_MBUF);
        kmsan_dma_load(dmam, chain, 0, KMSAN_DMA_MBUF);

        if ((t->bdt_exists & BUS_DMAMAP_OVERRIDE_LOAD_MBUF) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bdt_super) {
                if ((it->bdt_present & BUS_DMAMAP_OVERRIDE_LOAD_MBUF) == 0)
                        continue;
                return (*it->bdt_ov->ov_dmamap_load_mbuf)(it->bdt_ctx, t, dmam,
                    chain, flags);
        }

        return _bus_dmamap_load_mbuf(t, dmam, chain, flags);
}

int
bus_dmamap_load_uio(bus_dma_tag_t t, bus_dmamap_t dmam,
                    struct uio *uio, int flags)
{
        bus_dma_tag_t it;

        kasan_dma_load(dmam, uio, 0, KASAN_DMA_UIO);
        kmsan_dma_load(dmam, uio, 0, KMSAN_DMA_UIO);

        if ((t->bdt_exists & BUS_DMAMAP_OVERRIDE_LOAD_UIO) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bdt_super) {
                if ((it->bdt_present & BUS_DMAMAP_OVERRIDE_LOAD_UIO) == 0)
                        continue;
                return (*it->bdt_ov->ov_dmamap_load_uio)(it->bdt_ctx, t, dmam,
                    uio, flags);
        }

        return _bus_dmamap_load_uio(t, dmam, uio, flags);
}

int
bus_dmamap_load_raw(bus_dma_tag_t t, bus_dmamap_t dmam,
                    bus_dma_segment_t *segs, int nsegs,
                    bus_size_t size, int flags)
{
        bus_dma_tag_t it;

        kasan_dma_load(dmam, NULL, 0, KASAN_DMA_RAW);
        kmsan_dma_load(dmam, NULL, 0, KMSAN_DMA_RAW);

        if ((t->bdt_exists & BUS_DMAMAP_OVERRIDE_LOAD_RAW) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bdt_super) {
                if ((it->bdt_present & BUS_DMAMAP_OVERRIDE_LOAD_RAW) == 0)
                        continue;
                return (*it->bdt_ov->ov_dmamap_load_raw)(it->bdt_ctx, t, dmam,
                    segs, nsegs, size, flags);
        }

        return _bus_dmamap_load_raw(t, dmam, segs, nsegs, size, flags);
}

void
bus_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t dmam)
{
        bus_dma_tag_t it;

        if ((t->bdt_exists & BUS_DMAMAP_OVERRIDE_UNLOAD) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bdt_super) {
                if ((it->bdt_present & BUS_DMAMAP_OVERRIDE_UNLOAD) == 0)
                        continue;
                (*it->bdt_ov->ov_dmamap_unload)(it->bdt_ctx, t, dmam);
                return;
        }

        _bus_dmamap_unload(t, dmam);
}

int
bus_dmamem_alloc(bus_dma_tag_t t, bus_size_t size, bus_size_t alignment,
                 bus_size_t boundary, bus_dma_segment_t *segs, int nsegs,
                 int *rsegs, int flags)
{
        bus_dma_tag_t it;

        if ((t->bdt_exists & BUS_DMAMEM_OVERRIDE_ALLOC) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bdt_super) {
                if ((it->bdt_present & BUS_DMAMEM_OVERRIDE_ALLOC) == 0)
                        continue;
                return (*it->bdt_ov->ov_dmamem_alloc)(it->bdt_ctx, t, size,
                    alignment, boundary, segs, nsegs, rsegs, flags);
        }

        return _bus_dmamem_alloc(t, size, alignment, boundary, segs,
            nsegs, rsegs, flags);
}

void
bus_dmamem_free(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs)
{
        bus_dma_tag_t it;

        if ((t->bdt_exists & BUS_DMAMEM_OVERRIDE_FREE) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bdt_super) {
                if ((it->bdt_present & BUS_DMAMEM_OVERRIDE_FREE) == 0)
                        continue;
                (*it->bdt_ov->ov_dmamem_free)(it->bdt_ctx, t, segs, nsegs);
                return;
        }

        _bus_dmamem_free(t, segs, nsegs);
}

int
bus_dmamem_map(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs,
               size_t size, void **kvap, int flags)
{
        bus_dma_tag_t it;

        if ((t->bdt_exists & BUS_DMAMEM_OVERRIDE_MAP) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bdt_super) {
                if ((it->bdt_present & BUS_DMAMEM_OVERRIDE_MAP) == 0)
                        continue;
                return (*it->bdt_ov->ov_dmamem_map)(it->bdt_ctx, t,
                    segs, nsegs, size, kvap, flags);
        }

        return _bus_dmamem_map(t, segs, nsegs, size, kvap, flags);
}

void
bus_dmamem_unmap(bus_dma_tag_t t, void *kva, size_t size)
{
        bus_dma_tag_t it;

        if ((t->bdt_exists & BUS_DMAMEM_OVERRIDE_UNMAP) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bdt_super) {
                if ((it->bdt_present & BUS_DMAMEM_OVERRIDE_UNMAP) == 0)
                        continue;
                (*it->bdt_ov->ov_dmamem_unmap)(it->bdt_ctx, t, kva, size);
                return;
        }

        _bus_dmamem_unmap(t, kva, size);
}

paddr_t
bus_dmamem_mmap(bus_dma_tag_t t, bus_dma_segment_t *segs, int nsegs,
                off_t off, int prot, int flags)
{
        bus_dma_tag_t it;

        if ((t->bdt_exists & BUS_DMAMEM_OVERRIDE_MMAP) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bdt_super) {
                if ((it->bdt_present & BUS_DMAMEM_OVERRIDE_MMAP) == 0)
                        continue;
                return (*it->bdt_ov->ov_dmamem_mmap)(it->bdt_ctx, t, segs,
                    nsegs, off, prot, flags);
        }

        return _bus_dmamem_mmap(t, segs, nsegs, off, prot, flags);
}

int
bus_dmatag_subregion(bus_dma_tag_t t, bus_addr_t min_addr,
                     bus_addr_t max_addr, bus_dma_tag_t *newtag, int flags)
{
        bus_dma_tag_t it;

        if ((t->bdt_exists & BUS_DMATAG_OVERRIDE_SUBREGION) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bdt_super) {
                if ((it->bdt_present & BUS_DMATAG_OVERRIDE_SUBREGION) == 0)
                        continue;
                return (*it->bdt_ov->ov_dmatag_subregion)(it->bdt_ctx, t,
                    min_addr, max_addr, newtag, flags);
        }

        return _bus_dmatag_subregion(t, min_addr, max_addr, newtag, flags);
}

void
bus_dmatag_destroy(bus_dma_tag_t t)
{
        bus_dma_tag_t it;

        if ((t->bdt_exists & BUS_DMATAG_OVERRIDE_DESTROY) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bdt_super) {
                if ((it->bdt_present & BUS_DMATAG_OVERRIDE_DESTROY) == 0)
                        continue;
                (*it->bdt_ov->ov_dmatag_destroy)(it->bdt_ctx, t);
                return;
        }

        _bus_dmatag_destroy(t);
}

static const void *
bit_to_function_pointer(const struct bus_dma_overrides *ov, uint64_t bit)
{
        switch (bit) {
        case BUS_DMAMAP_OVERRIDE_CREATE:
                return ov->ov_dmamap_create;
        case BUS_DMAMAP_OVERRIDE_DESTROY:
                return ov->ov_dmamap_destroy;
        case BUS_DMAMAP_OVERRIDE_LOAD:
                return ov->ov_dmamap_load;
        case BUS_DMAMAP_OVERRIDE_LOAD_MBUF:
                return ov->ov_dmamap_load_mbuf;
        case BUS_DMAMAP_OVERRIDE_LOAD_UIO:
                return ov->ov_dmamap_load_uio;
        case BUS_DMAMAP_OVERRIDE_LOAD_RAW:
                return ov->ov_dmamap_load_raw;
        case BUS_DMAMAP_OVERRIDE_UNLOAD:
                return ov->ov_dmamap_unload;
        case BUS_DMAMAP_OVERRIDE_SYNC:
                return ov->ov_dmamap_sync;
        case BUS_DMAMEM_OVERRIDE_ALLOC:
                return ov->ov_dmamem_alloc;
        case BUS_DMAMEM_OVERRIDE_FREE:
                return ov->ov_dmamem_free;
        case BUS_DMAMEM_OVERRIDE_MAP:
                return ov->ov_dmamem_map;
        case BUS_DMAMEM_OVERRIDE_UNMAP:
                return ov->ov_dmamem_unmap;
        case BUS_DMAMEM_OVERRIDE_MMAP:
                return ov->ov_dmamem_mmap;
        case BUS_DMATAG_OVERRIDE_SUBREGION:
                return ov->ov_dmatag_subregion;
        case BUS_DMATAG_OVERRIDE_DESTROY:
                return ov->ov_dmatag_destroy;
        default:
                return NULL;
        }
}

void
bus_dma_tag_destroy(bus_dma_tag_t bdt)
{
        if (bdt->bdt_super != NULL)
                bus_dmatag_destroy(bdt->bdt_super);
        kmem_free(bdt, sizeof(struct x86_bus_dma_tag));
}

int
bus_dma_tag_create(bus_dma_tag_t obdt, const uint64_t present,
    const struct bus_dma_overrides *ov, void *ctx, bus_dma_tag_t *bdtp)
{
        uint64_t bit, bits, nbits;
        bus_dma_tag_t bdt;
        const void *fp;

        if (ov == NULL || present == 0)
                return EINVAL;

        bdt = kmem_alloc(sizeof(struct x86_bus_dma_tag), KM_SLEEP);
        *bdt = *obdt;
        /* don't let bus_dmatag_destroy free these */
        bdt->_tag_needs_free = 0;

        bdt->bdt_super = obdt;

        for (bits = present; bits != 0; bits = nbits) {
                nbits = bits & (bits - 1);
                bit = nbits ^ bits;
                if ((fp = bit_to_function_pointer(ov, bit)) == NULL) {
#ifdef DEBUG
                        printf("%s: missing bit %" PRIx64 "\n", __func__, bit);
#endif
                        goto einval;
                }
        }

        bdt->bdt_ov = ov;
        bdt->bdt_exists = obdt->bdt_exists | present;
        bdt->bdt_present = present;
        bdt->bdt_ctx = ctx;

        *bdtp = bdt;
        if (obdt->_tag_needs_free)
                obdt->_tag_needs_free++;

        return 0;
einval:
        kmem_free(bdt, sizeof(struct x86_bus_dma_tag));
        return EINVAL;
}
































































































































































































































































































































































































































    2 


    2 

    1 


    2 



























    3 














































































    3 


















    1 




    1 


    1 






    1 




    1 

    1 



























    2 





























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
/*        $NetBSD: prop_kern.c,v 1.25 2022/08/03 21:13:46 riastradh Exp $        */

/*-
 * Copyright (c) 2006, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#if defined(__NetBSD__)

#include <sys/types.h>
#include <sys/ioctl.h>

#include <prop/proplib.h>

#if !defined(_KERNEL) && !defined(_STANDALONE)
#include <sys/mman.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>

#ifdef RUMP_ACTION
#include <rump/rump_syscalls.h>
#define ioctl(a,b,c) rump_sys_ioctl(a,b,c)
#endif

static int
_prop_object_externalize_to_pref(prop_object_t obj, struct plistref *pref,
                                        char **bufp)
{
        char *buf;

        switch (prop_object_type(obj)) {
        case PROP_TYPE_DICTIONARY:
                buf = prop_dictionary_externalize(obj);
                break;
        case PROP_TYPE_ARRAY:
                buf = prop_array_externalize(obj);
                break;
        default:
                return (ENOTSUP);
        }
        if (buf == NULL) {
                /* Assume we ran out of memory. */
                return (ENOMEM);
        }
        pref->pref_plist = buf;
        pref->pref_len = strlen(buf) + 1;

        *bufp = buf;

        return (0);
}

bool
prop_array_externalize_to_pref(prop_array_t array, struct plistref *prefp)
{
        char *buf;
        int rv;

        rv = _prop_object_externalize_to_pref(array, prefp, &buf);
        if (rv != 0)
                errno = rv;        /* pass up error value in errno */
        return (rv == 0);
}

/*
 * prop_array_externalize_to_pref --
 *        Externalize an array into a plistref for sending to the kernel.
 */
int
prop_array_send_syscall(prop_array_t array, struct plistref *prefp)
{
        if (prop_array_externalize_to_pref(array, prefp))
                return 0;
        else
                return errno;
}

bool
prop_dictionary_externalize_to_pref(prop_dictionary_t dict,
                                    struct plistref *prefp)
{
        char *buf;
        int rv;

        rv = _prop_object_externalize_to_pref(dict, prefp, &buf);
        if (rv != 0)
                errno = rv;        /* pass up error value in errno */
        return (rv == 0);
}

/*
 * prop_dictionary_externalize_to_pref --
 *        Externalize an dictionary into a plistref for sending to the kernel.
 */
int
prop_dictionary_send_syscall(prop_dictionary_t dict,
                             struct plistref *prefp)
{
        if (prop_dictionary_externalize_to_pref(dict, prefp))
                return 0;
        else
                return errno;
}

static int
_prop_object_send_ioctl(prop_object_t obj, int fd, unsigned long cmd)
{
        struct plistref pref;
        char *buf;
        int error;

        error = _prop_object_externalize_to_pref(obj, &pref, &buf);
        if (error)
                return (error);

        if (ioctl(fd, cmd, &pref) == -1)
                error = errno;
        else
                error = 0;

        free(buf);

        return (error);
}

/*
 * prop_array_send_ioctl --
 *        Send an array to the kernel using the specified ioctl.
 */
int
prop_array_send_ioctl(prop_array_t array, int fd, unsigned long cmd)
{
        int rv;

        rv = _prop_object_send_ioctl(array, fd, cmd);
        if (rv != 0) {
                errno = rv;        /* pass up error value in errno */
                return rv;
        } else
                return 0;
}

/*
 * prop_dictionary_send_ioctl --
 *        Send a dictionary to the kernel using the specified ioctl.
 */
int
prop_dictionary_send_ioctl(prop_dictionary_t dict, int fd, unsigned long cmd)
{
        int rv;

        rv = _prop_object_send_ioctl(dict, fd, cmd);
        if (rv != 0) {
                errno = rv;        /* pass up error value in errno */
                return rv;
        } else
                return 0;
}

static int
_prop_object_internalize_from_pref(const struct plistref *pref,
                                   prop_type_t type, prop_object_t *objp)
{
        prop_object_t obj = NULL;
        char *buf;
        int error = 0;

        if (pref->pref_len == 0) {
                /*
                 * This should never happen; we should always get the XML
                 * for an empty dictionary if it's really empty.
                 */
                error = EIO;
                goto out;
        } else {
                buf = pref->pref_plist;
                buf[pref->pref_len - 1] = '\0';        /* extra insurance */
                switch (type) {
                case PROP_TYPE_DICTIONARY:
                        obj = prop_dictionary_internalize(buf);
                        break;
                case PROP_TYPE_ARRAY:
                        obj = prop_array_internalize(buf);
                        break;
                default:
                        error = ENOTSUP;
                }
                (void) munmap(buf, pref->pref_len);
                if (obj == NULL && error == 0)
                        error = EIO;
        }

 out:
        if (error == 0)
                *objp = obj;
        return (error);
}

/*
 * prop_array_internalize_from_pref --
 *         Internalize a pref into a prop_array_t object.
 */
bool
prop_array_internalize_from_pref(const struct plistref *prefp,
                                 prop_array_t *arrayp)
{
        int rv;

        rv = _prop_object_internalize_from_pref(prefp, PROP_TYPE_ARRAY,
            (prop_object_t *)arrayp);
        if (rv != 0)
                errno = rv;     /* pass up error value in errno */
        return (rv == 0);
}

/*
 * prop_array_recv_syscall --
 *         Internalize an array received from the kernel as pref.
 */
int
prop_array_recv_syscall(const struct plistref *prefp,
                        prop_array_t *arrayp)
{
        if (prop_array_internalize_from_pref(prefp, arrayp))
                return 0;
        else
                return errno;
}

/*
 * prop_dictionary_internalize_from_pref --
 *         Internalize a pref into a prop_dictionary_t object.
 */
bool
prop_dictionary_internalize_from_pref(const struct plistref *prefp,
                                      prop_dictionary_t *dictp)
{
        int rv;

        rv = _prop_object_internalize_from_pref(prefp, PROP_TYPE_DICTIONARY,
            (prop_object_t *)dictp);
        if (rv != 0)
                errno = rv;     /* pass up error value in errno */
        return (rv == 0);
}

/*
 * prop_dictionary_recv_syscall --
 *        Internalize a dictionary received from the kernel as pref.
 */
int
prop_dictionary_recv_syscall(const struct plistref *prefp,
                             prop_dictionary_t *dictp)
{
        if (prop_dictionary_internalize_from_pref(prefp, dictp))
                return 0;
        else
                return errno;
}


/*
 * prop_array_recv_ioctl --
 *        Receive an array from the kernel using the specified ioctl.
 */
int
prop_array_recv_ioctl(int fd, unsigned long cmd, prop_array_t *arrayp)
{
        int rv;
        struct plistref pref;

        rv = ioctl(fd, cmd, &pref);
        if (rv == -1)
                return errno;

        rv = _prop_object_internalize_from_pref(&pref, PROP_TYPE_ARRAY,
                            (prop_object_t *)arrayp);
        if (rv != 0) {
                errno = rv;     /* pass up error value in errno */
                return rv;
        } else
                return 0;
}

/*
 * prop_dictionary_recv_ioctl --
 *        Receive a dictionary from the kernel using the specified ioctl.
 */
int
prop_dictionary_recv_ioctl(int fd, unsigned long cmd, prop_dictionary_t *dictp)
{
        int rv;
        struct plistref pref;

        rv = ioctl(fd, cmd, &pref);
        if (rv == -1)
                return errno;

        rv = _prop_object_internalize_from_pref(&pref, PROP_TYPE_DICTIONARY,
                            (prop_object_t *)dictp);
        if (rv != 0) {
                errno = rv;     /* pass up error value in errno */
                return rv;
        } else
                return 0;
}

/*
 * prop_dictionary_sendrecv_ioctl --
 *        Combination send/receive a dictionary to/from the kernel using
 *        the specified ioctl.
 */
int
prop_dictionary_sendrecv_ioctl(prop_dictionary_t dict, int fd,
                               unsigned long cmd, prop_dictionary_t *dictp)
{
        struct plistref pref;
        char *buf;
        int error;

        error = _prop_object_externalize_to_pref(dict, &pref, &buf);
        if (error != 0) {
                errno = error;
                return error;
        }

        if (ioctl(fd, cmd, &pref) == -1)
                error = errno;
        else
                error = 0;

        free(buf);

        if (error != 0)
                return error;

        error = _prop_object_internalize_from_pref(&pref, PROP_TYPE_DICTIONARY,
                            (prop_object_t *)dictp);
        if (error != 0) {
                errno = error;     /* pass up error value in errno */
                return error;
        } else
                return 0;
}
#endif /* !_KERNEL && !_STANDALONE */

#if defined(_KERNEL)
#include <sys/param.h>
#include <sys/mman.h>
#include <sys/errno.h>
#include <sys/malloc.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/resource.h>
#include <sys/pool.h>

#include <uvm/uvm_extern.h>

#include "prop_object_impl.h"

/* Arbitrary limit ioctl input to 128KB */
unsigned int prop_object_copyin_limit = 128 * 1024;

/* initialize proplib for use in the kernel */
void
prop_kern_init(void)
{
        __link_set_decl(prop_linkpools, struct prop_pool_init);
        struct prop_pool_init * const *pi;

        __link_set_foreach(pi, prop_linkpools)
                pool_init((*pi)->pp, (*pi)->size, 0, 0, 0, (*pi)->wchan,
                    &pool_allocator_nointr, IPL_NONE);
}

static int
_prop_object_copyin(const struct plistref *pref, const prop_type_t type,
                          prop_object_t *objp, size_t lim)
{
        prop_object_t obj = NULL;
        char *buf;
        int error;

        if (pref->pref_len >= lim)
                return E2BIG;

        /*
         * Allocate an extra byte so we can guarantee NUL-termination.
         */
        buf = malloc(pref->pref_len + 1, M_TEMP, M_WAITOK);
        if (buf == NULL)
                return (ENOMEM);
        error = copyin(pref->pref_plist, buf, pref->pref_len);
        if (error) {
                free(buf, M_TEMP);
                return (error);
        }
        buf[pref->pref_len] = '\0';

        switch (type) {
        case PROP_TYPE_ARRAY:
                obj = prop_array_internalize(buf);
                break;
        case PROP_TYPE_DICTIONARY:
                obj = prop_dictionary_internalize(buf);
                break;
        default:
                error = ENOTSUP;
        }

        free(buf, M_TEMP);
        if (obj == NULL) {
                if (error == 0)
                        error = EIO;
        } else {
                *objp = obj;
        }
        return (error);
}


static int
_prop_object_copyin_ioctl(const struct plistref *pref, const prop_type_t type,
                          const u_long cmd, prop_object_t *objp, size_t lim)
{
        if ((cmd & IOC_IN) == 0)
                return (EFAULT);

        return _prop_object_copyin(pref, type, objp, lim);
}

/*
 * prop_array_copyin --
 *        Copy in an array passed as a syscall arg.
 */
int
prop_array_copyin_size(const struct plistref *pref, prop_array_t *arrayp,
        size_t lim)
{
        return _prop_object_copyin(pref, PROP_TYPE_ARRAY,
            (prop_object_t *)arrayp, lim);
}

int
prop_array_copyin(const struct plistref *pref, prop_array_t *arrayp)
{
        return prop_array_copyin_size(pref, arrayp, prop_object_copyin_limit);
}

/*
 * prop_dictionary_copyin --
 *        Copy in a dictionary passed as a syscall arg.
 */
int
prop_dictionary_copyin_size(const struct plistref *pref,
    prop_dictionary_t *dictp, size_t lim)
{
        return _prop_object_copyin(pref, PROP_TYPE_DICTIONARY,
            (prop_object_t *)dictp, lim);
}

int
prop_dictionary_copyin(const struct plistref *pref, prop_dictionary_t *dictp)
{
        return prop_dictionary_copyin_size(pref, dictp,
            prop_object_copyin_limit);
}

/*
 * prop_array_copyin_ioctl --
 *        Copy in an array send with an ioctl.
 */
int
prop_array_copyin_ioctl_size(const struct plistref *pref, const u_long cmd,
    prop_array_t *arrayp, size_t lim)
{
        return _prop_object_copyin_ioctl(pref, PROP_TYPE_ARRAY,
            cmd, (prop_object_t *)arrayp, lim);
}

int
prop_array_copyin_ioctl(const struct plistref *pref, const u_long cmd,
    prop_array_t *arrayp)
{
        return prop_array_copyin_ioctl_size(pref, cmd, arrayp,
            prop_object_copyin_limit);
}

/*
 * prop_dictionary_copyin_ioctl --
 *        Copy in a dictionary sent with an ioctl.
 */
int
prop_dictionary_copyin_ioctl_size(const struct plistref *pref, const u_long cmd,
    prop_dictionary_t *dictp, size_t lim)
{
        return _prop_object_copyin_ioctl(pref, PROP_TYPE_DICTIONARY,
            cmd, (prop_object_t *)dictp, lim);
}

int
prop_dictionary_copyin_ioctl(const struct plistref *pref, const u_long cmd,
    prop_dictionary_t *dictp)
{
    return prop_dictionary_copyin_ioctl_size(pref, cmd, dictp,
        prop_object_copyin_limit);
}

static int
_prop_object_copyout(struct plistref *pref, prop_object_t obj)
{
        struct lwp *l = curlwp;                /* XXX */
        struct proc *p = l->l_proc;
        char *buf;
        void *uaddr;
        size_t len, rlen;
        int error = 0;

        switch (prop_object_type(obj)) {
        case PROP_TYPE_ARRAY:
                buf = prop_array_externalize(obj);
                break;
        case PROP_TYPE_DICTIONARY:
                buf = prop_dictionary_externalize(obj);
                break;
        default:
                return (ENOTSUP);
        }
        if (buf == NULL)
                return (ENOMEM);

        len = strlen(buf) + 1;
        rlen = round_page(len);
        uaddr = NULL;
        error = uvm_mmap_anon(p, &uaddr, rlen);
        if (error == 0) {
                error = copyout(buf, uaddr, len);
                if (error == 0) {
                        pref->pref_plist = uaddr;
                        pref->pref_len   = len;
                }
        }

        free(buf, M_TEMP);

        return (error);
}

/*
 * prop_array_copyout --
 *        Copy out an array to a syscall arg.
 */
int
prop_array_copyout(struct plistref *pref, prop_array_t array)
{
        return (_prop_object_copyout(pref, array));
}

/*
 * prop_dictionary_copyout --
 *        Copy out a dictionary to a syscall arg.
 */
int
prop_dictionary_copyout(struct plistref *pref, prop_dictionary_t dict)
{
        return (_prop_object_copyout(pref, dict));
}

static int
_prop_object_copyout_ioctl(struct plistref *pref, const u_long cmd,
                           prop_object_t obj)
{
        if ((cmd & IOC_OUT) == 0)
                return (EFAULT);
        return _prop_object_copyout(pref, obj);
}


/*
 * prop_array_copyout_ioctl --
 *        Copy out an array being received with an ioctl.
 */
int
prop_array_copyout_ioctl(struct plistref *pref, const u_long cmd,
                         prop_array_t array)
{
        return (_prop_object_copyout_ioctl(pref, cmd, array));
}

/*
 * prop_dictionary_copyout_ioctl --
 *        Copy out a dictionary being received with an ioctl.
 */
int
prop_dictionary_copyout_ioctl(struct plistref *pref, const u_long cmd,
                              prop_dictionary_t dict)
{
        return (
            _prop_object_copyout_ioctl(pref, cmd, dict));
}
#endif /* _KERNEL */

#endif /* __NetBSD__ */


































































    4 
    4 
    4 

    4 




    4 


    4 


    4 



























    2 




























































































































    3 
    3 







    3 












    1 


    1 
    1 




































































    1 











    3 

    1 


    1 











1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
/*        $NetBSD: sco_upper.c,v 1.16 2014/08/05 07:55:32 rtr Exp $        */

/*-
 * Copyright (c) 2006 Itronix Inc.
 * All rights reserved.
 *
 * Written by Iain Hibbert for Itronix Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of Itronix Inc. may not be used to endorse
 *    or promote products derived from this software without specific
 *    prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sco_upper.c,v 1.16 2014/08/05 07:55:32 rtr Exp $");

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/socketvar.h>
#include <sys/systm.h>

#include <netbt/bluetooth.h>
#include <netbt/hci.h>
#include <netbt/sco.h>

/****************************************************************************
 *
 *        SCO - Upper Protocol API
 */

struct sco_pcb_list sco_pcb = LIST_HEAD_INITIALIZER(sco_pcb);

/*
 * sco_attach_pcb(handle, proto, upper)
 *
 *        Attach a new instance of SCO pcb to handle
 */
int
sco_attach_pcb(struct sco_pcb **handle,
                const struct btproto *proto, void *upper)
{
        struct sco_pcb *pcb;

        KASSERT(handle != NULL);
        KASSERT(proto != NULL);
        KASSERT(upper != NULL);

        pcb = malloc(sizeof(struct sco_pcb), M_BLUETOOTH,
                        M_NOWAIT | M_ZERO);
        if (pcb == NULL)
                return ENOMEM;

        pcb->sp_proto = proto;
        pcb->sp_upper = upper;

        LIST_INSERT_HEAD(&sco_pcb, pcb, sp_next);

        *handle = pcb;
        return 0;
}

/*
 * sco_bind_pcb(pcb, sockaddr)
 *
 *        Bind SCO pcb to local address
 */
int
sco_bind_pcb(struct sco_pcb *pcb, struct sockaddr_bt *addr)
{

        if (pcb->sp_link != NULL || pcb->sp_flags & SP_LISTENING)
                return EINVAL;

        bdaddr_copy(&pcb->sp_laddr, &addr->bt_bdaddr);
        return 0;
}

/*
 * sco_sockaddr_pcb(pcb, sockaddr)
 *
 *        Copy local address of PCB to sockaddr
 */
int
sco_sockaddr_pcb(struct sco_pcb *pcb, struct sockaddr_bt *addr)
{

        memset(addr, 0, sizeof(struct sockaddr_bt));
        addr->bt_len = sizeof(struct sockaddr_bt);
        addr->bt_family = AF_BLUETOOTH;
        bdaddr_copy(&addr->bt_bdaddr, &pcb->sp_laddr);
        return 0;
}

/*
 * sco_connect_pcb(pcb, sockaddr)
 *
 *        Initiate a SCO connection to the destination address.
 */
int
sco_connect_pcb(struct sco_pcb *pcb, struct sockaddr_bt *dest)
{
        hci_add_sco_con_cp cp;
        struct hci_unit *unit;
        struct hci_link *acl, *sco;
        int err;

        if (pcb->sp_flags & SP_LISTENING)
                return EINVAL;

        bdaddr_copy(&pcb->sp_raddr, &dest->bt_bdaddr);

        if (bdaddr_any(&pcb->sp_raddr))
                return EDESTADDRREQ;

        if (bdaddr_any(&pcb->sp_laddr)) {
                err = hci_route_lookup(&pcb->sp_laddr, &pcb->sp_raddr);
                if (err)
                        return err;
        }

        unit = hci_unit_lookup(&pcb->sp_laddr);
        if (unit == NULL)
                return ENETDOWN;

        /*
         * We must have an already open ACL connection before we open the SCO
         * connection, and since SCO connections dont happen on their own we
         * will not open one, the application wanting this should have opened
         * it previously.
         */
        acl = hci_link_lookup_bdaddr(unit, &pcb->sp_raddr, HCI_LINK_ACL);
        if (acl == NULL || acl->hl_state != HCI_LINK_OPEN)
                return EHOSTUNREACH;

        sco = hci_link_alloc(unit, &pcb->sp_raddr, HCI_LINK_SCO);
        if (sco == NULL)
                return ENOMEM;

        sco->hl_link = hci_acl_open(unit, &pcb->sp_raddr);
        KASSERT(sco->hl_link == acl);

        cp.con_handle = htole16(acl->hl_handle);
        cp.pkt_type = htole16(0x00e0);                /* HV1, HV2, HV3 */
        err = hci_send_cmd(unit, HCI_CMD_ADD_SCO_CON, &cp, sizeof(cp));
        if (err) {
                hci_link_free(sco, err);
                return err;
        }

        sco->hl_sco = pcb;
        pcb->sp_link = sco;

        pcb->sp_mtu = unit->hci_max_sco_size;
        return 0;
}

/*
 * sco_peeraddr_pcb(pcb, sockaddr)
 *
 *        Copy remote address of SCO pcb to sockaddr
 */
int
sco_peeraddr_pcb(struct sco_pcb *pcb, struct sockaddr_bt *addr)
{

        memset(addr, 0, sizeof(struct sockaddr_bt));
        addr->bt_len = sizeof(struct sockaddr_bt);
        addr->bt_family = AF_BLUETOOTH;
        bdaddr_copy(&addr->bt_bdaddr, &pcb->sp_raddr);
        return 0;
}

/*
 * sco_disconnect_pcb(pcb, linger)
 *
 *        Initiate disconnection of connected SCO pcb
 */
int
sco_disconnect_pcb(struct sco_pcb *pcb, int linger)
{
        hci_discon_cp cp;
        struct hci_link *sco;
        int err;

        sco = pcb->sp_link;
        if (sco == NULL)
                return EINVAL;

        cp.con_handle = htole16(sco->hl_handle);
        cp.reason = 0x13;        /* "Remote User Terminated Connection" */

        err = hci_send_cmd(sco->hl_unit, HCI_CMD_DISCONNECT, &cp, sizeof(cp));
        if (err || linger == 0) {
                sco->hl_sco = NULL;
                pcb->sp_link = NULL;
                hci_link_free(sco, err);
        }

        return err;
}

/*
 * sco_detach_pcb(handle)
 *
 *        Detach SCO pcb from handle and clear up
 */
void
sco_detach_pcb(struct sco_pcb **handle)
{
        struct sco_pcb *pcb;

        KASSERT(handle != NULL);
        pcb = *handle;
        *handle = NULL;

        if (pcb->sp_link != NULL) {
                sco_disconnect_pcb(pcb, 0);
                pcb->sp_link = NULL;
        }

        LIST_REMOVE(pcb, sp_next);
        free(pcb, M_BLUETOOTH);
}

/*
 * sco_listen_pcb(pcb)
 *
 *        Mark pcb as a listener.
 */
int
sco_listen_pcb(struct sco_pcb *pcb)
{

        if (pcb->sp_link != NULL)
                return EINVAL;

        pcb->sp_flags |= SP_LISTENING;
        return 0;
}

/*
 * sco_send_pcb(pcb, mbuf)
 *
 *        Send data on SCO pcb.
 *
 * Gross hackage, we just output the packet directly onto the unit queue.
 * This will work fine for one channel per unit, but for more channels it
 * really needs fixing. We set the context so that when the packet is sent,
 * we can drop a record from the socket buffer.
 */
int
sco_send_pcb(struct sco_pcb *pcb, struct mbuf *m)
{
        hci_scodata_hdr_t *hdr;
        int plen;

        if (pcb->sp_link == NULL) {
                m_freem(m);
                return EINVAL;
        }

        plen = m->m_pkthdr.len;
        DPRINTFN(10, "%d bytes\n", plen);

        /*
         * This is a temporary limitation, as USB devices cannot
         * handle SCO packet sizes that are not an integer number
         * of Isochronous frames. See ubt(4)
         */
        if (plen != pcb->sp_mtu) {
                m_freem(m);
                return EMSGSIZE;
        }

        M_PREPEND(m, sizeof(hci_scodata_hdr_t), M_DONTWAIT);
        if (m == NULL)
                return ENOMEM;

        hdr = mtod(m, hci_scodata_hdr_t *);
        hdr->type = HCI_SCO_DATA_PKT;
        hdr->con_handle = htole16(pcb->sp_link->hl_handle);
        hdr->length = plen;

        pcb->sp_pending++;
        M_SETCTX(m, pcb->sp_link);
        hci_output_sco(pcb->sp_link->hl_unit, m);

        return 0;
}

/*
 * sco_setopt(pcb, sopt)
 *
 *        Set SCO pcb options
 */
int
sco_setopt(struct sco_pcb *pcb, const struct sockopt *sopt)
{
        int err = 0;

        switch (sopt->sopt_name) {
        default:
                err = ENOPROTOOPT;
                break;
        }

        return err;
}

/*
 * sco_getopt(pcb, sopt)
 *
 *        Get SCO pcb options
 */
int
sco_getopt(struct sco_pcb *pcb, struct sockopt *sopt)
{

        switch (sopt->sopt_name) {
        case SO_SCO_MTU:
                return sockopt_set(sopt, &pcb->sp_mtu, sizeof(uint16_t));

        case SO_SCO_HANDLE:
                if (pcb->sp_link)
                        return sockopt_set(sopt,
                            &pcb->sp_link->hl_handle, sizeof(uint16_t));

                return ENOTCONN;

        default:
                break;
        }

        return ENOPROTOOPT;
}












































































































    5 




















































































































    4 






    9 




















    5 
    5 









    4 

    4 






















    4 























    5 

    5 


    5 



    5 



    4 


































































    7 



    7 

















    7 




































































    7 



    7 
    7 



    7 




















    7 
    7 

    7 

    7 


    7 



    7 




    7 

    7 


    7 







































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
/* $NetBSD: pckbport.c,v 1.20 2021/08/07 16:19:15 thorpej Exp $ */

/*
 * Copyright (c) 2004 Ben Harris
 * Copyright (c) 1998
 *        Matthias Drochner.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: pckbport.c,v 1.20 2021/08/07 16:19:15 thorpej Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/device.h>
#include <sys/malloc.h>
#include <sys/errno.h>
#include <sys/queue.h>

#include <dev/pckbport/pckbdreg.h>
#include <dev/pckbport/pckbportvar.h>

#include "locators.h"

#include "pckbd.h"
#if (NPCKBD > 0)
#include <dev/pckbport/pckbdvar.h>
#endif

/* descriptor for one device command */
struct pckbport_devcmd {
        TAILQ_ENTRY(pckbport_devcmd) next;
        int flags;
#define KBC_CMDFLAG_SYNC 1 /* give descriptor back to caller */
#define KBC_CMDFLAG_SLOW 2
        u_char cmd[4];
        int cmdlen, cmdidx, retries;
        u_char response[4];
        int status, responselen, responseidx;
};

/* data per slave device */
struct pckbport_slotdata {
        int polling;        /* don't process data in interrupt handler */
        TAILQ_HEAD(, pckbport_devcmd) cmdqueue; /* active commands */
        TAILQ_HEAD(, pckbport_devcmd) freequeue; /* free commands */
#define NCMD 5
        struct pckbport_devcmd cmds[NCMD];
};

#define CMD_IN_QUEUE(q) (TAILQ_FIRST(&(q)->cmdqueue) != NULL)

static void pckbport_init_slotdata(struct pckbport_slotdata *);
static int pckbportprint(void *, const char *);

static struct pckbport_slotdata pckbport_cons_slotdata;

static int pckbport_poll_data1(pckbport_tag_t, pckbport_slot_t);
static int pckbport_send_devcmd(struct pckbport_tag *, pckbport_slot_t,
                                  u_char);
static void pckbport_poll_cmd1(struct pckbport_tag *, pckbport_slot_t,
                                 struct pckbport_devcmd *);

static void pckbport_cleanqueue(struct pckbport_slotdata *);
static void pckbport_cleanup(void *);
static int pckbport_cmdresponse(struct pckbport_tag *, pckbport_slot_t,
                                        u_char);
static void pckbport_start(struct pckbport_tag *, pckbport_slot_t);

static const char * const pckbport_slot_names[] = { "kbd", "aux" };

static struct pckbport_tag pckbport_cntag;

#define        KBD_DELAY        DELAY(8)

#ifdef PCKBPORTDEBUG
#define DPRINTF(a)        printf a
#else
#define DPRINTF(a)
#endif

static int
pckbport_poll_data1(pckbport_tag_t t, pckbport_slot_t slot)
{

        return t->t_ops->t_poll_data1(t->t_cookie, slot);
}

static int
pckbport_send_devcmd(struct pckbport_tag *t, pckbport_slot_t slot, u_char val)
{

        return t->t_ops->t_send_devcmd(t->t_cookie, slot, val);
}

pckbport_tag_t
pckbport_attach(void *cookie, struct pckbport_accessops const *ops)
{
        pckbport_tag_t t;

        if (cookie == pckbport_cntag.t_cookie &&
            ops == pckbport_cntag.t_ops)
                return &pckbport_cntag;
        t = malloc(sizeof(struct pckbport_tag), M_DEVBUF, M_WAITOK | M_ZERO);
        callout_init(&t->t_cleanup, 0);
        t->t_cookie = cookie;
        t->t_ops = ops;
        return t;
}

device_t
pckbport_attach_slot(device_t dev, pckbport_tag_t t,
    pckbport_slot_t slot)
{
        struct pckbport_attach_args pa;
        void *sdata;
        device_t found;
        int alloced = 0;
        int locs[PCKBPORTCF_NLOCS];

        pa.pa_tag = t;
        pa.pa_slot = slot;

        if (t->t_slotdata[slot] == NULL) {
                sdata = malloc(sizeof(struct pckbport_slotdata),
                    M_DEVBUF, M_WAITOK);
                t->t_slotdata[slot] = sdata;
                pckbport_init_slotdata(t->t_slotdata[slot]);
                alloced++;
        }

        locs[PCKBPORTCF_SLOT] = slot;

        found = config_found(dev, &pa, pckbportprint,
            CFARGS(.submatch = config_stdsubmatch,
                   .iattr = "pckbport",
                   .locators = locs));

        if (found == NULL && alloced) {
                free(t->t_slotdata[slot], M_DEVBUF);
                t->t_slotdata[slot] = NULL;
        }

        return found;
}

int
pckbportprint(void *aux, const char *pnp)
{
        struct pckbport_attach_args *pa = aux;

        if (!pnp)
                aprint_normal(" (%s slot)", pckbport_slot_names[pa->pa_slot]);
        return QUIET;
}

void
pckbport_init_slotdata(struct pckbport_slotdata *q)
{
        int i;

        TAILQ_INIT(&q->cmdqueue);
        TAILQ_INIT(&q->freequeue);

        for (i = 0; i < NCMD; i++)
                TAILQ_INSERT_TAIL(&q->freequeue, &(q->cmds[i]), next);

        q->polling = 0;
}

void
pckbport_flush(pckbport_tag_t t, pckbport_slot_t slot)
{

        (void)pckbport_poll_data1(t, slot);
}

int
pckbport_poll_data(pckbport_tag_t t, pckbport_slot_t slot)
{
        struct pckbport_slotdata *q = t->t_slotdata[slot];
        int c;

        c = pckbport_poll_data1(t, slot);
        if (c != -1 && q && CMD_IN_QUEUE(q))
                /*
                 * we jumped into a running command - try to deliver
                 * the response
                 */
                if (pckbport_cmdresponse(t, slot, c))
                        return -1;
        return c;
}

/*
 * switch scancode translation on / off
 * return nonzero on success
 */
int
pckbport_xt_translation(pckbport_tag_t t, pckbport_slot_t slot,        int on)
{

        return t->t_ops->t_xt_translation(t->t_cookie, slot, on);
}

void
pckbport_slot_enable(pckbport_tag_t t, pckbport_slot_t slot, int on)
{

        t->t_ops->t_slot_enable(t->t_cookie, slot, on);
}

void
pckbport_set_poll(pckbport_tag_t t, pckbport_slot_t slot, int on)
{

        t->t_slotdata[slot]->polling = on;
        t->t_ops->t_set_poll(t->t_cookie, slot, on);
}

/*
 * Pass command to device, poll for ACK and data.
 * to be called at spltty()
 */
static void
pckbport_poll_cmd1(struct pckbport_tag *t, pckbport_slot_t slot,
    struct pckbport_devcmd *cmd)
{
        int i, c = 0;

        while (cmd->cmdidx < cmd->cmdlen) {
                if (!pckbport_send_devcmd(t, slot, cmd->cmd[cmd->cmdidx])) {
                        printf("pckbport_cmd: send error\n");
                        cmd->status = EIO;
                        return;
                }
                for (i = 10; i; i--) { /* 1s ??? */
                        c = pckbport_poll_data1(t, slot);
                        if (c != -1)
                                break;
                }
                switch (c) {
                case KBR_ACK:
                        cmd->cmdidx++;
                        continue;
                case KBR_BAT_DONE:
                case KBR_BAT_FAIL:
                case KBR_RESEND:
                        DPRINTF(("%s: %s\n", __func__, c == KBR_RESEND ?
                            "RESEND" : (c == KBR_BAT_DONE ? "BAT_DONE" :
                            "BAT_FAIL")));
                        if (cmd->retries++ < 5)
                                continue;
                        else {
                                DPRINTF(("%s: cmd failed\n", __func__));
                                cmd->status = EIO;
                                return;
                        }
                case -1:
                        DPRINTF(("%s: timeout\n", __func__));
                        cmd->status = EIO;
                        return;
                }
                DPRINTF(("%s: lost 0x%x\n", __func__, c));
        }

        while (cmd->responseidx < cmd->responselen) {
                if (cmd->flags & KBC_CMDFLAG_SLOW)
                        i = 100; /* 10s ??? */
                else
                        i = 10; /* 1s ??? */
                while (i--) {
                        c = pckbport_poll_data1(t, slot);
                        if (c != -1)
                                break;
                }
                if (c == -1) {
                        DPRINTF(("%s: no data\n", __func__));
                        cmd->status = ETIMEDOUT;
                        return;
                } else
                        cmd->response[cmd->responseidx++] = c;
        }
}

/* for use in autoconfiguration */
int
pckbport_poll_cmd(pckbport_tag_t t, pckbport_slot_t slot, const u_char *cmd,
    int len, int responselen, u_char *respbuf, int slow)
{
        struct pckbport_devcmd nc;

        if ((len > 4) || (responselen > 4))
                return (EINVAL);

        memset(&nc, 0, sizeof(nc));
        memcpy(nc.cmd, cmd, len);
        nc.cmdlen = len;
        nc.responselen = responselen;
        nc.flags = (slow ? KBC_CMDFLAG_SLOW : 0);

        pckbport_poll_cmd1(t, slot, &nc);

        if (nc.status == 0 && respbuf)
                memcpy(respbuf, nc.response, responselen);

        return nc.status;
}

/*
 * Clean up a command queue, throw away everything.
 */
void
pckbport_cleanqueue(struct pckbport_slotdata *q)
{
        struct pckbport_devcmd *cmd;

        while ((cmd = TAILQ_FIRST(&q->cmdqueue))) {
                TAILQ_REMOVE(&q->cmdqueue, cmd, next);
#ifdef PCKBPORTDEBUG
                printf("%s: removing", __func__);
                for (int i = 0; i < cmd->cmdlen; i++)
                        printf(" %02x", cmd->cmd[i]);
                printf("\n");
#endif
                TAILQ_INSERT_TAIL(&q->freequeue, cmd, next);
        }
}

/*
 * Timeout error handler: clean queues and data port.
 * XXX could be less invasive.
 */
void
pckbport_cleanup(void *self)
{
        struct pckbport_tag *t = self;
        int s;
        u_char cmd[1], resp[2];

        printf("pckbport: command timeout\n");

        s = spltty();

        if (t->t_slotdata[PCKBPORT_KBD_SLOT])
                pckbport_cleanqueue(t->t_slotdata[PCKBPORT_KBD_SLOT]);
        if (t->t_slotdata[PCKBPORT_AUX_SLOT])
                pckbport_cleanqueue(t->t_slotdata[PCKBPORT_AUX_SLOT]);

#if 0 /* XXXBJH Move to controller driver? */
        while (bus_space_read_1(t->t_iot, t->t_ioh_c, 0) & KBS_DIB) {
                KBD_DELAY;
                (void) bus_space_read_1(t->t_iot, t->t_ioh_d, 0);
        }
#endif

        cmd[0] = KBC_RESET;
        (void)pckbport_poll_cmd(t, PCKBPORT_KBD_SLOT, cmd, 1, 2, resp, 1);
        pckbport_flush(t, PCKBPORT_KBD_SLOT);

        splx(s);
}

/*
 * Pass command to device during normal operation.
 * to be called at spltty()
 */
void
pckbport_start(struct pckbport_tag *t, pckbport_slot_t slot)
{
        struct pckbport_slotdata *q = t->t_slotdata[slot];
        struct pckbport_devcmd *cmd = TAILQ_FIRST(&q->cmdqueue);

        KASSERT(cmd != NULL);
        if (q->polling) {
                do {
                        pckbport_poll_cmd1(t, slot, cmd);
                        if (cmd->status)
                                printf("pckbport_start: command error\n");

                        TAILQ_REMOVE(&q->cmdqueue, cmd, next);
                        if (cmd->flags & KBC_CMDFLAG_SYNC)
                                wakeup(cmd);
                        else {
                                callout_stop(&t->t_cleanup);
                                TAILQ_INSERT_TAIL(&q->freequeue, cmd, next);
                        }
                        cmd = TAILQ_FIRST(&q->cmdqueue);
                } while (cmd);
                return;
        }

        if (!pckbport_send_devcmd(t, slot, cmd->cmd[cmd->cmdidx])) {
                printf("pckbport_start: send error\n");
                /* XXX what now? */
                return;
        }
}

/*
 * Handle command responses coming in asynchronously,
 * return nonzero if valid response.
 * to be called at spltty()
 */
int
pckbport_cmdresponse(struct pckbport_tag *t, pckbport_slot_t slot, u_char data)
{
        struct pckbport_slotdata *q = t->t_slotdata[slot];
        struct pckbport_devcmd *cmd = TAILQ_FIRST(&q->cmdqueue);

        KASSERT(cmd != NULL);
        if (cmd->cmdidx < cmd->cmdlen) {
                if (data != KBR_ACK && data != KBR_RESEND)
                        return 0;

                if (data == KBR_RESEND) {
                        if (cmd->retries++ < 5)
                                /* try again last command */
                                goto restart;
                        else {
                                DPRINTF(("%s: cmd failed\n", __func__));
                                cmd->status = EIO;
                                /* dequeue */
                        }
                } else {
                        if (++cmd->cmdidx < cmd->cmdlen)
                                goto restart;
                        if (cmd->responselen)
                                return 1;
                        /* else dequeue */
                }
        } else if (cmd->responseidx < cmd->responselen) {
                cmd->response[cmd->responseidx++] = data;
                if (cmd->responseidx < cmd->responselen)
                        return 1;
                /* else dequeue */
        } else
                return 0;

        /* dequeue: */
        TAILQ_REMOVE(&q->cmdqueue, cmd, next);
        if (cmd->flags & KBC_CMDFLAG_SYNC)
                wakeup(cmd);
        else {
                callout_stop(&t->t_cleanup);
                TAILQ_INSERT_TAIL(&q->freequeue, cmd, next);
        }
        if (!CMD_IN_QUEUE(q))
                return 1;
restart:
        pckbport_start(t, slot);
        return 1;
}

/*
 * Put command into the device's command queue, return zero or errno.
 */
int
pckbport_enqueue_cmd(pckbport_tag_t t, pckbport_slot_t slot, const u_char *cmd,
    int len, int responselen, int sync, u_char *respbuf)
{
        struct pckbport_slotdata *q = t->t_slotdata[slot];
        struct pckbport_devcmd *nc;
        int s, isactive, res = 0;

        if ((len > 4) || (responselen > 4))
                return EINVAL;
        s = spltty();
        nc = TAILQ_FIRST(&q->freequeue);
        if (nc)
                TAILQ_REMOVE(&q->freequeue, nc, next);
        splx(s);
        if (!nc)
                return ENOMEM;

        memset(nc, 0, sizeof(*nc));
        memcpy(nc->cmd, cmd, len);
        nc->cmdlen = len;
        nc->responselen = responselen;
        nc->flags = (sync ? KBC_CMDFLAG_SYNC : 0);

        s = spltty();

        if (q->polling && sync)
                /*
                 * XXX We should poll until the queue is empty.
                 * But we don't come here normally, so make
                 * it simple and throw away everything.
                 */
                pckbport_cleanqueue(q);

        isactive = CMD_IN_QUEUE(q);
        TAILQ_INSERT_TAIL(&q->cmdqueue, nc, next);
        if (!isactive)
                pckbport_start(t, slot);

        if (q->polling)
                res = (sync ? nc->status : 0);
        else if (sync) {
                if ((res = tsleep(nc, 0, "kbccmd", 1*hz))) {
                        TAILQ_REMOVE(&q->cmdqueue, nc, next);
                        pckbport_cleanup(t);
                } else
                        res = nc->status;
        } else
                callout_reset(&t->t_cleanup, hz, pckbport_cleanup, t);

        if (sync) {
                if (respbuf)
                        memcpy(respbuf, nc->response, responselen);
                TAILQ_INSERT_TAIL(&q->freequeue, nc, next);
        }

        splx(s);

        return res;
}

void
pckbport_set_inputhandler(pckbport_tag_t t, pckbport_slot_t slot,
    pckbport_inputfcn func, void *arg, const char *name)
{

        if (slot >= PCKBPORT_NSLOTS)
                panic("pckbport_set_inputhandler: bad slot %d", slot);

        t->t_ops->t_intr_establish(t->t_cookie, slot);

        t->t_inputhandler[slot] = func;
        t->t_inputarg[slot] = arg;
        t->t_subname[slot] = name;
}

void
pckbportintr(pckbport_tag_t t, pckbport_slot_t slot, int data)
{
        struct pckbport_slotdata *q;

        q = t->t_slotdata[slot];

        if (!q) {
                /* XXX do something for live insertion? */
                printf("pckbportintr: no dev for slot %d\n", slot);
                return;
        }

        if (CMD_IN_QUEUE(q) && pckbport_cmdresponse(t, slot, data))
                return;

        if (t->t_inputhandler[slot]) {
                (*t->t_inputhandler[slot])(t->t_inputarg[slot], data);
                return;
        }
        DPRINTF(("%s: slot %d lost %d\n", __func__, slot, data));
}

int
pckbport_cnattach(void *cookie, struct pckbport_accessops const *ops,
    pckbport_slot_t slot)
{
        int res = 0;
        pckbport_tag_t t = &pckbport_cntag;

        callout_init(&t->t_cleanup, 0);
        t->t_cookie = cookie;
        t->t_ops = ops;

        /* flush */
        pckbport_flush(t, slot);

#if (NPCKBD > 0)
        res = pckbd_cnattach(t, slot);
#elif (NPCKBPORT_MACHDEP_CNATTACH > 0)
        res = pckbport_machdep_cnattach(t, slot);
#else
        res = ENXIO;
#endif /* NPCKBPORT_MACHDEP_CNATTACH > 0 */

        if (res == 0) {
                t->t_slotdata[slot] = &pckbport_cons_slotdata;
                pckbport_init_slotdata(&pckbport_cons_slotdata);
        }

        return res;
}






































































































































































































   27 









































   27 


   27 



   27 
    1 
   26 
   12 

   14 

    1 


   12 
    1 
   23 









































































































































    1 

   24 




    1 





   24 



   23 






   24 




   24 





   24 

   24 






   23 




   24 















   24 




   25 


















































































































































































































































































































































































































































































































































































































































































































































































































































   10 









    9 









    9 







    1 






    1 






















    7 














    7 







    7 












    7 


    7 













    7 









    5 











    5 




    5 










    5 












    5 










































































   10 
































   10 











































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
/*        $NetBSD: if_ethersubr.c,v 1.315 2022/06/20 12:22:00 martin Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)if_ethersubr.c        8.2 (Berkeley) 4/4/96
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_ethersubr.c,v 1.315 2022/06/20 12:22:00 martin Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_atalk.h"
#include "opt_mbuftrace.h"
#include "opt_mpls.h"
#include "opt_gateway.h"
#include "opt_pppoe.h"
#include "opt_net_mpsafe.h"
#endif

#include "vlan.h"
#include "pppoe.h"
#include "bridge.h"
#include "arp.h"
#include "agr.h"

#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/ioctl.h>
#include <sys/errno.h>
#include <sys/device.h>
#include <sys/entropy.h>
#include <sys/rndsource.h>
#include <sys/cpu.h>
#include <sys/kmem.h>
#include <sys/hook.h>

#include <net/if.h>
#include <net/netisr.h>
#include <net/route.h>
#include <net/if_llc.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/pktqueue.h>

#include <net/if_media.h>
#include <dev/mii/mii.h>
#include <dev/mii/miivar.h>

#if NARP == 0
/*
 * XXX there should really be a way to issue this warning from within config(8)
 */
#error You have included NETATALK or a pseudo-device in your configuration that depends on the presence of ethernet interfaces, but have no such interfaces configured. Check if you really need pseudo-device bridge, pppoe, vlan or options NETATALK.
#endif

#include <net/bpf.h>

#include <net/if_ether.h>
#include <net/if_vlanvar.h>

#if NPPPOE > 0
#include <net/if_pppoe.h>
#endif

#if NAGR > 0
#include <net/ether_slowprotocols.h>
#include <net/agr/ieee8023ad.h>
#include <net/agr/if_agrvar.h>
#endif

#if NBRIDGE > 0
#include <net/if_bridgevar.h>
#endif

#include <netinet/in.h>
#ifdef INET
#include <netinet/in_var.h>
#endif
#include <netinet/if_inarp.h>

#ifdef INET6
#ifndef INET
#include <netinet/in.h>
#endif
#include <netinet6/in6_var.h>
#include <netinet6/nd6.h>
#endif

#include "carp.h"
#if NCARP > 0
#include <netinet/ip_carp.h>
#endif

#ifdef NETATALK
#include <netatalk/at.h>
#include <netatalk/at_var.h>
#include <netatalk/at_extern.h>

#define llc_snap_org_code llc_un.type_snap.org_code
#define llc_snap_ether_type llc_un.type_snap.ether_type

extern u_char        at_org_code[3];
extern u_char        aarp_org_code[3];
#endif /* NETATALK */

#ifdef MPLS
#include <netmpls/mpls.h>
#include <netmpls/mpls_var.h>
#endif

CTASSERT(sizeof(struct ether_addr) == 6);
CTASSERT(sizeof(struct ether_header) == 14);

#ifdef DIAGNOSTIC
static struct timeval bigpktppslim_last;
static int bigpktppslim = 2;        /* XXX */
static int bigpktpps_count;
static kmutex_t bigpktpps_lock __cacheline_aligned;
#endif

const uint8_t etherbroadcastaddr[ETHER_ADDR_LEN] =
    { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN] =
    { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 };
#define senderr(e) { error = (e); goto bad;}

static pktq_rps_hash_func_t ether_pktq_rps_hash_p;

static int ether_output(struct ifnet *, struct mbuf *,
    const struct sockaddr *, const struct rtentry *);

/*
 * Ethernet output routine.
 * Encapsulate a packet of type family for the local net.
 * Assumes that ifp is actually pointer to ethercom structure.
 */
static int
ether_output(struct ifnet * const ifp0, struct mbuf * const m0,
    const struct sockaddr * const dst, const struct rtentry *rt)
{
        uint8_t esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN];
        uint16_t etype = 0;
        int error = 0, hdrcmplt = 0;
        struct mbuf *m = m0;
        struct mbuf *mcopy = NULL;
        struct ether_header *eh;
        struct ifnet *ifp = ifp0;
#ifdef INET
        struct arphdr *ah;
#endif
#ifdef NETATALK
        struct at_ifaddr *aa;
#endif

#ifdef MBUFTRACE
        m_claimm(m, ifp->if_mowner);
#endif

#if NCARP > 0
        if (ifp->if_type == IFT_CARP) {
                struct ifaddr *ifa;
                int s = pserialize_read_enter();

                /* loop back if this is going to the carp interface */
                if (dst != NULL && ifp0->if_link_state == LINK_STATE_UP &&
                    (ifa = ifa_ifwithaddr(dst)) != NULL) {
                        if (ifa->ifa_ifp == ifp0) {
                                pserialize_read_exit(s);
                                return looutput(ifp0, m, dst, rt);
                        }
                }
                pserialize_read_exit(s);

                ifp = ifp->if_carpdev;
                /* ac = (struct arpcom *)ifp; */

                if ((ifp0->if_flags & (IFF_UP | IFF_RUNNING)) !=
                    (IFF_UP | IFF_RUNNING))
                        senderr(ENETDOWN);
        }
#endif

        if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) != (IFF_UP | IFF_RUNNING))
                senderr(ENETDOWN);

        switch (dst->sa_family) {

#ifdef INET
        case AF_INET:
                if (m->m_flags & M_BCAST) {
                        memcpy(edst, etherbroadcastaddr, sizeof(edst));
                } else if (m->m_flags & M_MCAST) {
                        ETHER_MAP_IP_MULTICAST(&satocsin(dst)->sin_addr, edst);
                } else {
                        error = arpresolve(ifp0, rt, m, dst, edst, sizeof(edst));
                        if (error)
                                return (error == EWOULDBLOCK) ? 0 : error;
                }
                /* If broadcasting on a simplex interface, loopback a copy */
                if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX))
                        mcopy = m_copypacket(m, M_DONTWAIT);
                etype = htons(ETHERTYPE_IP);
                break;

        case AF_ARP:
                ah = mtod(m, struct arphdr *);
                if (m->m_flags & M_BCAST) {
                        memcpy(edst, etherbroadcastaddr, sizeof(edst));
                } else {
                        void *tha = ar_tha(ah);

                        if (tha == NULL) {
                                /* fake with ARPHRD_IEEE1394 */
                                m_freem(m);
                                return 0;
                        }
                        memcpy(edst, tha, sizeof(edst));
                }

                ah->ar_hrd = htons(ARPHRD_ETHER);

                switch (ntohs(ah->ar_op)) {
                case ARPOP_REVREQUEST:
                case ARPOP_REVREPLY:
                        etype = htons(ETHERTYPE_REVARP);
                        break;

                case ARPOP_REQUEST:
                case ARPOP_REPLY:
                default:
                        etype = htons(ETHERTYPE_ARP);
                }
                break;
#endif

#ifdef INET6
        case AF_INET6:
                if (m->m_flags & M_BCAST) {
                        memcpy(edst, etherbroadcastaddr, sizeof(edst));
                } else if (m->m_flags & M_MCAST) {
                        ETHER_MAP_IPV6_MULTICAST(&satocsin6(dst)->sin6_addr,
                            edst);
                } else {
                        error = nd6_resolve(ifp0, rt, m, dst, edst,
                            sizeof(edst));
                        if (error)
                                return (error == EWOULDBLOCK) ? 0 : error;
                }
                etype = htons(ETHERTYPE_IPV6);
                break;
#endif

#ifdef NETATALK
        case AF_APPLETALK: {
                struct ifaddr *ifa;
                int s;

                KERNEL_LOCK(1, NULL);

                if (!aarpresolve(ifp, m, (const struct sockaddr_at *)dst, edst)) {
                        KERNEL_UNLOCK_ONE(NULL);
                        return 0;
                }

                /*
                 * ifaddr is the first thing in at_ifaddr
                 */
                s = pserialize_read_enter();
                ifa = at_ifawithnet((const struct sockaddr_at *)dst, ifp);
                if (ifa == NULL) {
                        pserialize_read_exit(s);
                        KERNEL_UNLOCK_ONE(NULL);
                        senderr(EADDRNOTAVAIL);
                }
                aa = (struct at_ifaddr *)ifa;

                /*
                 * In the phase 2 case, we need to prepend an mbuf for the
                 * llc header.
                 */
                if (aa->aa_flags & AFA_PHASE2) {
                        struct llc llc;

                        M_PREPEND(m, sizeof(struct llc), M_DONTWAIT);
                        if (m == NULL) {
                                pserialize_read_exit(s);
                                KERNEL_UNLOCK_ONE(NULL);
                                senderr(ENOBUFS);
                        }

                        llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
                        llc.llc_control = LLC_UI;
                        memcpy(llc.llc_snap_org_code, at_org_code,
                            sizeof(llc.llc_snap_org_code));
                        llc.llc_snap_ether_type = htons(ETHERTYPE_ATALK);
                        memcpy(mtod(m, void *), &llc, sizeof(struct llc));
                } else {
                        etype = htons(ETHERTYPE_ATALK);
                }
                pserialize_read_exit(s);
                KERNEL_UNLOCK_ONE(NULL);
                break;
        }
#endif /* NETATALK */

        case pseudo_AF_HDRCMPLT:
                hdrcmplt = 1;
                memcpy(esrc,
                    ((const struct ether_header *)dst->sa_data)->ether_shost,
                    sizeof(esrc));
                /* FALLTHROUGH */

        case AF_UNSPEC:
                memcpy(edst,
                    ((const struct ether_header *)dst->sa_data)->ether_dhost,
                    sizeof(edst));
                /* AF_UNSPEC doesn't swap the byte order of the ether_type. */
                etype = ((const struct ether_header *)dst->sa_data)->ether_type;
                break;

        default:
                printf("%s: can't handle af%d\n", ifp->if_xname,
                    dst->sa_family);
                senderr(EAFNOSUPPORT);
        }

#ifdef MPLS
        {
                struct m_tag *mtag;
                mtag = m_tag_find(m, PACKET_TAG_MPLS);
                if (mtag != NULL) {
                        /* Having the tag itself indicates it's MPLS */
                        etype = htons(ETHERTYPE_MPLS);
                        m_tag_delete(m, mtag);
                }
        }
#endif

        if (mcopy)
                (void)looutput(ifp, mcopy, dst, rt);

        KASSERT((m->m_flags & M_PKTHDR) != 0);

        /*
         * If no ether type is set, this must be a 802.2 formatted packet.
         */
        if (etype == 0)
                etype = htons(m->m_pkthdr.len);

        /*
         * Add local net header. If no space in first mbuf, allocate another.
         */
        M_PREPEND(m, sizeof(struct ether_header), M_DONTWAIT);
        if (m == NULL)
                senderr(ENOBUFS);

        eh = mtod(m, struct ether_header *);
        /* Note: etype is already in network byte order. */
        memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
        memcpy(eh->ether_dhost, edst, sizeof(edst));
        if (hdrcmplt) {
                memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost));
        } else {
                 memcpy(eh->ether_shost, CLLADDR(ifp->if_sadl),
                    sizeof(eh->ether_shost));
        }

#if NCARP > 0
        if (ifp0 != ifp && ifp0->if_type == IFT_CARP) {
                 memcpy(eh->ether_shost, CLLADDR(ifp0->if_sadl),
                    sizeof(eh->ether_shost));
        }
#endif

        if ((error = pfil_run_hooks(ifp->if_pfil, &m, ifp, PFIL_OUT)) != 0)
                return error;
        if (m == NULL)
                return 0;

#if NBRIDGE > 0
        /*
         * Bridges require special output handling.
         */
        if (ifp->if_bridge)
                return bridge_output(ifp, m, NULL, NULL);
#endif

#if NCARP > 0
        if (ifp != ifp0)
                if_statadd(ifp0, if_obytes, m->m_pkthdr.len + ETHER_HDR_LEN);
#endif

#ifdef ALTQ
        KERNEL_LOCK(1, NULL);
        /*
         * If ALTQ is enabled on the parent interface, do
         * classification; the queueing discipline might not
         * require classification, but might require the
         * address family/header pointer in the pktattr.
         */
        if (ALTQ_IS_ENABLED(&ifp->if_snd))
                altq_etherclassify(&ifp->if_snd, m);
        KERNEL_UNLOCK_ONE(NULL);
#endif
        return ifq_enqueue(ifp, m);

bad:
        if_statinc(ifp, if_oerrors);
        if (m)
                m_freem(m);
        return error;
}

#ifdef ALTQ
/*
 * This routine is a slight hack to allow a packet to be classified
 * if the Ethernet headers are present.  It will go away when ALTQ's
 * classification engine understands link headers.
 *
 * XXX: We may need to do m_pullups here. First to ensure struct ether_header
 * is indeed contiguous, then to read the LLC and so on.
 */
void
altq_etherclassify(struct ifaltq *ifq, struct mbuf *m)
{
        struct ether_header *eh;
        struct mbuf *mtop = m;
        uint16_t ether_type;
        int hlen, af, hdrsize;
        void *hdr;

        KASSERT((mtop->m_flags & M_PKTHDR) != 0);

        hlen = ETHER_HDR_LEN;
        eh = mtod(m, struct ether_header *);

        ether_type = htons(eh->ether_type);

        if (ether_type < ETHERMTU) {
                /* LLC/SNAP */
                struct llc *llc = (struct llc *)(eh + 1);
                hlen += 8;

                if (m->m_len < hlen ||
                    llc->llc_dsap != LLC_SNAP_LSAP ||
                    llc->llc_ssap != LLC_SNAP_LSAP ||
                    llc->llc_control != LLC_UI) {
                        /* Not SNAP. */
                        goto bad;
                }

                ether_type = htons(llc->llc_un.type_snap.ether_type);
        }

        switch (ether_type) {
        case ETHERTYPE_IP:
                af = AF_INET;
                hdrsize = 20;                /* sizeof(struct ip) */
                break;

        case ETHERTYPE_IPV6:
                af = AF_INET6;
                hdrsize = 40;                /* sizeof(struct ip6_hdr) */
                break;

        default:
                af = AF_UNSPEC;
                hdrsize = 0;
                break;
        }

        while (m->m_len <= hlen) {
                hlen -= m->m_len;
                m = m->m_next;
                if (m == NULL)
                        goto bad;
        }

        if (m->m_len < (hlen + hdrsize)) {
                /*
                 * protocol header not in a single mbuf.
                 * We can't cope with this situation right
                 * now (but it shouldn't ever happen, really, anyhow).
                 */
#ifdef DEBUG
                printf("altq_etherclassify: headers span multiple mbufs: "
                    "%d < %d\n", m->m_len, (hlen + hdrsize));
#endif
                goto bad;
        }

        m->m_data += hlen;
        m->m_len -= hlen;

        hdr = mtod(m, void *);

        if (ALTQ_NEEDS_CLASSIFY(ifq)) {
                mtop->m_pkthdr.pattr_class =
                    (*ifq->altq_classify)(ifq->altq_clfier, m, af);
        }
        mtop->m_pkthdr.pattr_af = af;
        mtop->m_pkthdr.pattr_hdr = hdr;

        m->m_data -= hlen;
        m->m_len += hlen;

        return;

bad:
        mtop->m_pkthdr.pattr_class = NULL;
        mtop->m_pkthdr.pattr_hdr = NULL;
        mtop->m_pkthdr.pattr_af = AF_UNSPEC;
}
#endif /* ALTQ */

#if defined (LLC) || defined (NETATALK)
static void
ether_input_llc(struct ifnet *ifp, struct mbuf *m, struct ether_header *eh)
{
        struct ifqueue *inq = NULL;
        int isr = 0;
        struct llc *l;

        if (m->m_len < sizeof(*eh) + sizeof(struct llc))
                goto error;

        l = (struct llc *)(eh+1);
        switch (l->llc_dsap) {
#ifdef NETATALK
        case LLC_SNAP_LSAP:
                switch (l->llc_control) {
                case LLC_UI:
                        if (l->llc_ssap != LLC_SNAP_LSAP)
                                goto error;

                        if (memcmp(&(l->llc_snap_org_code)[0],
                            at_org_code, sizeof(at_org_code)) == 0 &&
                            ntohs(l->llc_snap_ether_type) ==
                            ETHERTYPE_ATALK) {
                                inq = &atintrq2;
                                m_adj(m, sizeof(struct ether_header)
                                    + sizeof(struct llc));
                                isr = NETISR_ATALK;
                                break;
                        }

                        if (memcmp(&(l->llc_snap_org_code)[0],
                            aarp_org_code,
                            sizeof(aarp_org_code)) == 0 &&
                            ntohs(l->llc_snap_ether_type) ==
                            ETHERTYPE_AARP) {
                                m_adj(m, sizeof(struct ether_header)
                                    + sizeof(struct llc));
                                aarpinput(ifp, m); /* XXX queue? */
                                return;
                        }

                default:
                        goto error;
                }
                break;
#endif
        default:
                goto noproto;
        }

        KASSERT(inq != NULL);
        IFQ_ENQUEUE_ISR(inq, m, isr);
        return;

noproto:
        m_freem(m);
        if_statinc(ifp, if_noproto);
        return;
error:
        m_freem(m);
        if_statinc(ifp, if_ierrors);
        return;
}
#endif /* defined (LLC) || defined (NETATALK) */

/*
 * Process a received Ethernet packet;
 * the packet is in the mbuf chain m with
 * the ether header.
 */
void
ether_input(struct ifnet *ifp, struct mbuf *m)
{
#if NVLAN > 0 || defined(MBUFTRACE)
        struct ethercom *ec = (struct ethercom *) ifp;
#endif
        pktqueue_t *pktq = NULL;
        struct ifqueue *inq = NULL;
        uint16_t etype;
        struct ether_header *eh;
        size_t ehlen;
        static int earlypkts;
        int isr = 0;

        KASSERT(!cpu_intr_p());
        KASSERT((m->m_flags & M_PKTHDR) != 0);

        if ((ifp->if_flags & IFF_UP) == 0)
                goto drop;

#ifdef MBUFTRACE
        m_claimm(m, &ec->ec_rx_mowner);
#endif

        if (__predict_false(m->m_len < sizeof(*eh))) {
                if ((m = m_pullup(m, sizeof(*eh))) == NULL) {
                        if_statinc(ifp, if_ierrors);
                        return;
                }
        }

        eh = mtod(m, struct ether_header *);
        etype = ntohs(eh->ether_type);
        ehlen = sizeof(*eh);

        if (__predict_false(earlypkts < 100 ||
                entropy_epoch() == (unsigned)-1)) {
                rnd_add_data(NULL, eh, ehlen, 0);
                earlypkts++;
        }

        /*
         * Determine if the packet is within its size limits. For MPLS the
         * header length is variable, so we skip the check.
         */
        if (etype != ETHERTYPE_MPLS && m->m_pkthdr.len >
            ETHER_MAX_FRAME(ifp, etype, m->m_flags & M_HASFCS)) {
#ifdef DIAGNOSTIC
                mutex_enter(&bigpktpps_lock);
                if (ppsratecheck(&bigpktppslim_last, &bigpktpps_count,
                    bigpktppslim)) {
                        printf("%s: discarding oversize frame (len=%d)\n",
                            ifp->if_xname, m->m_pkthdr.len);
                }
                mutex_exit(&bigpktpps_lock);
#endif
                goto error;
        }

        if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
                /*
                 * If this is not a simplex interface, drop the packet
                 * if it came from us.
                 */
                if ((ifp->if_flags & IFF_SIMPLEX) == 0 &&
                    memcmp(CLLADDR(ifp->if_sadl), eh->ether_shost,
                    ETHER_ADDR_LEN) == 0) {
                        goto drop;
                }

                if (memcmp(etherbroadcastaddr,
                    eh->ether_dhost, ETHER_ADDR_LEN) == 0)
                        m->m_flags |= M_BCAST;
                else
                        m->m_flags |= M_MCAST;
                if_statinc(ifp, if_imcasts);
        }

        /* If the CRC is still on the packet, trim it off. */
        if (m->m_flags & M_HASFCS) {
                m_adj(m, -ETHER_CRC_LEN);
                m->m_flags &= ~M_HASFCS;
        }

        if_statadd(ifp, if_ibytes, m->m_pkthdr.len);

        if (!vlan_has_tag(m) && etype == ETHERTYPE_VLAN) {
                m = ether_strip_vlantag(m);
                if (m == NULL) {
                        if_statinc(ifp, if_ierrors);
                        return;
                }

                eh = mtod(m, struct ether_header *);
                etype = ntohs(eh->ether_type);
                ehlen = sizeof(*eh);
        }

        if ((m->m_flags & (M_BCAST | M_MCAST | M_PROMISC)) == 0 &&
            (ifp->if_flags & IFF_PROMISC) != 0 &&
            memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost,
             ETHER_ADDR_LEN) != 0) {
                m->m_flags |= M_PROMISC;
        }

        if ((m->m_flags & M_PROMISC) == 0) {
                if (pfil_run_hooks(ifp->if_pfil, &m, ifp, PFIL_IN) != 0)
                        return;
                if (m == NULL)
                        return;

                eh = mtod(m, struct ether_header *);
                etype = ntohs(eh->ether_type);
        }

        /*
         * Processing a logical interfaces that are able
         * to configure vlan(4).
        */
#if NAGR > 0
        if (ifp->if_lagg != NULL &&
            __predict_true(etype != ETHERTYPE_SLOWPROTOCOLS)) {
                m->m_flags &= ~M_PROMISC;
                agr_input(ifp, m);
                return;
        }
#endif

        /*
         * VLAN processing.
         *
         * VLAN provides service delimiting so the frames are
         * processed before other handlings. If a VLAN interface
         * does not exist to take those frames, they're returned
         * to ether_input().
         */

        if (vlan_has_tag(m)) {
                if (EVL_VLANOFTAG(vlan_get_tag(m)) == 0) {
                        if (etype == ETHERTYPE_VLAN ||
                             etype == ETHERTYPE_QINQ)
                                goto drop;

                        /* XXX we should actually use the prio value? */
                        m->m_flags &= ~M_VLANTAG;
                } else {
#if NVLAN > 0
                        if (ec->ec_nvlans > 0) {
                                m = vlan_input(ifp, m);

                                /* vlan_input() called ether_input() recursively */
                                if (m == NULL)
                                        return;
                        }
#endif
                        /* drop VLAN frames not for this port. */
                        goto noproto;
                }
        }

#if NCARP > 0
        if (__predict_false(ifp->if_carp && ifp->if_type != IFT_CARP)) {
                /*
                 * Clear M_PROMISC, in case the packet comes from a
                 * vlan.
                 */
                m->m_flags &= ~M_PROMISC;
                if (carp_input(m, (uint8_t *)&eh->ether_shost,
                    (uint8_t *)&eh->ether_dhost, eh->ether_type) == 0)
                        return;
        }
#endif

        /*
         * Handle protocols that expect to have the Ethernet header
         * (and possibly FCS) intact.
         */
        switch (etype) {
#if NPPPOE > 0
        case ETHERTYPE_PPPOEDISC:
                pppoedisc_input(ifp, m);
                return;

        case ETHERTYPE_PPPOE:
                pppoe_input(ifp, m);
                return;
#endif

        case ETHERTYPE_SLOWPROTOCOLS: {
                uint8_t subtype;

                if (m->m_pkthdr.len < sizeof(*eh) + sizeof(subtype))
                        goto error;

                m_copydata(m, sizeof(*eh), sizeof(subtype), &subtype);
                switch (subtype) {
#if NAGR > 0
                case SLOWPROTOCOLS_SUBTYPE_LACP:
                        if (ifp->if_lagg != NULL) {
                                ieee8023ad_lacp_input(ifp, m);
                                return;
                        }
                        break;

                case SLOWPROTOCOLS_SUBTYPE_MARKER:
                        if (ifp->if_lagg != NULL) {
                                ieee8023ad_marker_input(ifp, m);
                                return;
                        }
                        break;
#endif

                default:
                        if (subtype == 0 || subtype > 10) {
                                /* illegal value */
                                goto error;
                        }
                        /* unknown subtype */
                        break;
                }
        }
        /* FALLTHROUGH */
        default:
                if (m->m_flags & M_PROMISC)
                        goto drop;
        }

        /* If the CRC is still on the packet, trim it off. */
        if (m->m_flags & M_HASFCS) {
                m_adj(m, -ETHER_CRC_LEN);
                m->m_flags &= ~M_HASFCS;
        }

        /* etype represents the size of the payload in this case */
        if (etype <= ETHERMTU + sizeof(struct ether_header)) {
                KASSERT(ehlen == sizeof(*eh));
#if defined (LLC) || defined (NETATALK)
                ether_input_llc(ifp, m, eh);
                return;
#else
                /* ethertype of 0-1500 is regarded as noproto */
                goto noproto;
#endif
        }

        /* Strip off the Ethernet header. */
        m_adj(m, ehlen);

        switch (etype) {
#ifdef INET
        case ETHERTYPE_IP:
#ifdef GATEWAY
                if (ipflow_fastforward(m))
                        return;
#endif
                pktq = ip_pktq;
                break;

        case ETHERTYPE_ARP:
                isr = NETISR_ARP;
                inq = &arpintrq;
                break;

        case ETHERTYPE_REVARP:
                revarpinput(m);        /* XXX queue? */
                return;
#endif

#ifdef INET6
        case ETHERTYPE_IPV6:
                if (__predict_false(!in6_present))
                        goto noproto;
#ifdef GATEWAY
                if (ip6flow_fastforward(&m))
                        return;
#endif
                pktq = ip6_pktq;
                break;
#endif

#ifdef NETATALK
        case ETHERTYPE_ATALK:
                isr = NETISR_ATALK;
                inq = &atintrq1;
                break;

        case ETHERTYPE_AARP:
                aarpinput(ifp, m); /* XXX queue? */
                return;
#endif

#ifdef MPLS
        case ETHERTYPE_MPLS:
                isr = NETISR_MPLS;
                inq = &mplsintrq;
                break;
#endif

        default:
                goto noproto;
        }

        if (__predict_true(pktq)) {
                const uint32_t h = pktq_rps_hash(&ether_pktq_rps_hash_p, m);
                if (__predict_false(!pktq_enqueue(pktq, m, h))) {
                        m_freem(m);
                }
                return;
        }

        if (__predict_false(!inq)) {
                /* Should not happen. */
                goto error;
        }

        IFQ_ENQUEUE_ISR(inq, m, isr);
        return;

drop:
        m_freem(m);
        if_statinc(ifp, if_iqdrops);
        return;
noproto:
        m_freem(m);
        if_statinc(ifp, if_noproto);
        return;
error:
        m_freem(m);
        if_statinc(ifp, if_ierrors);
        return;
}

static void
ether_bpf_mtap(struct bpf_if *bp, struct mbuf *m, u_int direction)
{
        struct ether_vlan_header evl;
        struct m_hdr mh, md;

        KASSERT(bp != NULL);

        if (!vlan_has_tag(m)) {
                bpf_mtap3(bp, m, direction);
                return;
        }

        memcpy(&evl, mtod(m, char *), ETHER_HDR_LEN);
        evl.evl_proto = evl.evl_encap_proto;
        evl.evl_encap_proto = htons(ETHERTYPE_VLAN);
        evl.evl_tag = htons(vlan_get_tag(m));

        md.mh_flags = 0;
        md.mh_data = m->m_data + ETHER_HDR_LEN;
        md.mh_len = m->m_len - ETHER_HDR_LEN;
        md.mh_next = m->m_next;

        mh.mh_flags = 0;
        mh.mh_data = (char *)&evl;
        mh.mh_len = sizeof(evl);
        mh.mh_next = (struct mbuf *)&md;

        bpf_mtap3(bp, (struct mbuf *)&mh, direction);
}

/*
 * Convert Ethernet address to printable (loggable) representation.
 */
char *
ether_sprintf(const u_char *ap)
{
        static char etherbuf[3 * ETHER_ADDR_LEN];
        return ether_snprintf(etherbuf, sizeof(etherbuf), ap);
}

char *
ether_snprintf(char *buf, size_t len, const u_char *ap)
{
        char *cp = buf;
        size_t i;

        for (i = 0; i < len / 3; i++) {
                *cp++ = hexdigits[*ap >> 4];
                *cp++ = hexdigits[*ap++ & 0xf];
                *cp++ = ':';
        }
        *--cp = '\0';
        return buf;
}

/*
 * Perform common duties while attaching to interface list
 */
void
ether_ifattach(struct ifnet *ifp, const uint8_t *lla)
{
        struct ethercom *ec = (struct ethercom *)ifp;
        char xnamebuf[HOOKNAMSIZ];

        ifp->if_type = IFT_ETHER;
        ifp->if_hdrlen = ETHER_HDR_LEN;
        ifp->if_dlt = DLT_EN10MB;
        ifp->if_mtu = ETHERMTU;
        ifp->if_output = ether_output;
        ifp->_if_input = ether_input;
        ifp->if_bpf_mtap = ether_bpf_mtap;
        if (ifp->if_baudrate == 0)
                ifp->if_baudrate = IF_Mbps(10);                /* just a default */

        if (lla != NULL)
                if_set_sadl(ifp, lla, ETHER_ADDR_LEN, !ETHER_IS_LOCAL(lla));

        LIST_INIT(&ec->ec_multiaddrs);
        SIMPLEQ_INIT(&ec->ec_vids);
        ec->ec_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
        ec->ec_flags = 0;
        ifp->if_broadcastaddr = etherbroadcastaddr;
        bpf_attach(ifp, DLT_EN10MB, sizeof(struct ether_header));
        snprintf(xnamebuf, sizeof(xnamebuf),
            "%s-ether_ifdetachhooks", ifp->if_xname);
        ec->ec_ifdetach_hooks = simplehook_create(IPL_NET, xnamebuf);
#ifdef MBUFTRACE
        mowner_init_owner(&ec->ec_tx_mowner, ifp->if_xname, "tx");
        mowner_init_owner(&ec->ec_rx_mowner, ifp->if_xname, "rx");
        MOWNER_ATTACH(&ec->ec_tx_mowner);
        MOWNER_ATTACH(&ec->ec_rx_mowner);
        ifp->if_mowner = &ec->ec_tx_mowner;
#endif
}

void
ether_ifdetach(struct ifnet *ifp)
{
        struct ethercom *ec = (void *) ifp;
        struct ether_multi *enm;

        IFNET_ASSERT_UNLOCKED(ifp);
        /*
         * Prevent further calls to ioctl (for example turning off
         * promiscuous mode from the bridge code), which eventually can
         * call if_init() which can cause panics because the interface
         * is in the process of being detached. Return device not configured
         * instead.
         */
        ifp->if_ioctl = __FPTRCAST(int (*)(struct ifnet *, u_long, void *),
            enxio);

        simplehook_dohooks(ec->ec_ifdetach_hooks);
        KASSERT(!simplehook_has_hooks(ec->ec_ifdetach_hooks));
        simplehook_destroy(ec->ec_ifdetach_hooks);

        bpf_detach(ifp);

        ETHER_LOCK(ec);
        KASSERT(ec->ec_nvlans == 0);
        while ((enm = LIST_FIRST(&ec->ec_multiaddrs)) != NULL) {
                LIST_REMOVE(enm, enm_list);
                kmem_free(enm, sizeof(*enm));
                ec->ec_multicnt--;
        }
        ETHER_UNLOCK(ec);

        mutex_obj_free(ec->ec_lock);
        ec->ec_lock = NULL;

        ifp->if_mowner = NULL;
        MOWNER_DETACH(&ec->ec_rx_mowner);
        MOWNER_DETACH(&ec->ec_tx_mowner);
}

void *
ether_ifdetachhook_establish(struct ifnet *ifp,
    void (*fn)(void *), void *arg)
{
        struct ethercom *ec;
        khook_t *hk;

        if (ifp->if_type != IFT_ETHER)
                return NULL;

        ec = (struct ethercom *)ifp;
        hk = simplehook_establish(ec->ec_ifdetach_hooks,
            fn, arg);

        return (void *)hk;
}

void
ether_ifdetachhook_disestablish(struct ifnet *ifp,
    void *vhook, kmutex_t *lock)
{
        struct ethercom *ec;

        if (vhook == NULL)
                return;

        ec = (struct ethercom *)ifp;
        simplehook_disestablish(ec->ec_ifdetach_hooks, vhook, lock);
}

#if 0
/*
 * This is for reference.  We have a table-driven version
 * of the little-endian crc32 generator, which is faster
 * than the double-loop.
 */
uint32_t
ether_crc32_le(const uint8_t *buf, size_t len)
{
        uint32_t c, crc, carry;
        size_t i, j;

        crc = 0xffffffffU;        /* initial value */

        for (i = 0; i < len; i++) {
                c = buf[i];
                for (j = 0; j < 8; j++) {
                        carry = ((crc & 0x01) ? 1 : 0) ^ (c & 0x01);
                        crc >>= 1;
                        c >>= 1;
                        if (carry)
                                crc = (crc ^ ETHER_CRC_POLY_LE);
                }
        }

        return (crc);
}
#else
uint32_t
ether_crc32_le(const uint8_t *buf, size_t len)
{
        static const uint32_t crctab[] = {
                0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
                0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
                0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
                0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
        };
        uint32_t crc;
        size_t i;

        crc = 0xffffffffU;        /* initial value */

        for (i = 0; i < len; i++) {
                crc ^= buf[i];
                crc = (crc >> 4) ^ crctab[crc & 0xf];
                crc = (crc >> 4) ^ crctab[crc & 0xf];
        }

        return (crc);
}
#endif

uint32_t
ether_crc32_be(const uint8_t *buf, size_t len)
{
        uint32_t c, crc, carry;
        size_t i, j;

        crc = 0xffffffffU;        /* initial value */

        for (i = 0; i < len; i++) {
                c = buf[i];
                for (j = 0; j < 8; j++) {
                        carry = ((crc & 0x80000000U) ? 1 : 0) ^ (c & 0x01);
                        crc <<= 1;
                        c >>= 1;
                        if (carry)
                                crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
                }
        }

        return (crc);
}

#ifdef INET
const uint8_t ether_ipmulticast_min[ETHER_ADDR_LEN] =
    { 0x01, 0x00, 0x5e, 0x00, 0x00, 0x00 };
const uint8_t ether_ipmulticast_max[ETHER_ADDR_LEN] =
    { 0x01, 0x00, 0x5e, 0x7f, 0xff, 0xff };
#endif
#ifdef INET6
const uint8_t ether_ip6multicast_min[ETHER_ADDR_LEN] =
    { 0x33, 0x33, 0x00, 0x00, 0x00, 0x00 };
const uint8_t ether_ip6multicast_max[ETHER_ADDR_LEN] =
    { 0x33, 0x33, 0xff, 0xff, 0xff, 0xff };
#endif

/*
 * ether_aton implementation, not using a static buffer.
 */
int
ether_aton_r(u_char *dest, size_t len, const char *str)
{
        const u_char *cp = (const void *)str;
        u_char *ep;

#define atox(c)        (((c) <= '9') ? ((c) - '0') : ((toupper(c) - 'A') + 10))

        if (len < ETHER_ADDR_LEN)
                return ENOSPC;

        ep = dest + ETHER_ADDR_LEN;

        while (*cp) {
                if (!isxdigit(*cp))
                        return EINVAL;

                *dest = atox(*cp);
                cp++;
                if (isxdigit(*cp)) {
                        *dest = (*dest << 4) | atox(*cp);
                        cp++;
                }
                dest++;

                if (dest == ep)
                        return (*cp == '\0') ? 0 : ENAMETOOLONG;

                switch (*cp) {
                case ':':
                case '-':
                case '.':
                        cp++;
                        break;
                }
        }
        return ENOBUFS;
}

/*
 * Convert a sockaddr into an Ethernet address or range of Ethernet
 * addresses.
 */
int
ether_multiaddr(const struct sockaddr *sa, uint8_t addrlo[ETHER_ADDR_LEN],
    uint8_t addrhi[ETHER_ADDR_LEN])
{
#ifdef INET
        const struct sockaddr_in *sin;
#endif
#ifdef INET6
        const struct sockaddr_in6 *sin6;
#endif

        switch (sa->sa_family) {

        case AF_UNSPEC:
                memcpy(addrlo, sa->sa_data, ETHER_ADDR_LEN);
                memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
                break;

#ifdef INET
        case AF_INET:
                sin = satocsin(sa);
                if (sin->sin_addr.s_addr == INADDR_ANY) {
                        /*
                         * An IP address of INADDR_ANY means listen to
                         * or stop listening to all of the Ethernet
                         * multicast addresses used for IP.
                         * (This is for the sake of IP multicast routers.)
                         */
                        memcpy(addrlo, ether_ipmulticast_min, ETHER_ADDR_LEN);
                        memcpy(addrhi, ether_ipmulticast_max, ETHER_ADDR_LEN);
                } else {
                        ETHER_MAP_IP_MULTICAST(&sin->sin_addr, addrlo);
                        memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
                }
                break;
#endif
#ifdef INET6
        case AF_INET6:
                sin6 = satocsin6(sa);
                if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
                        /*
                         * An IP6 address of 0 means listen to or stop
                         * listening to all of the Ethernet multicast
                         * address used for IP6.
                         * (This is used for multicast routers.)
                         */
                        memcpy(addrlo, ether_ip6multicast_min, ETHER_ADDR_LEN);
                        memcpy(addrhi, ether_ip6multicast_max, ETHER_ADDR_LEN);
                } else {
                        ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, addrlo);
                        memcpy(addrhi, addrlo, ETHER_ADDR_LEN);
                }
                break;
#endif

        default:
                return EAFNOSUPPORT;
        }
        return 0;
}

/*
 * Add an Ethernet multicast address or range of addresses to the list for a
 * given interface.
 */
int
ether_addmulti(const struct sockaddr *sa, struct ethercom *ec)
{
        struct ether_multi *enm, *_enm;
        u_char addrlo[ETHER_ADDR_LEN];
        u_char addrhi[ETHER_ADDR_LEN];
        int error = 0;

        /* Allocate out of lock */
        enm = kmem_alloc(sizeof(*enm), KM_SLEEP);

        ETHER_LOCK(ec);
        error = ether_multiaddr(sa, addrlo, addrhi);
        if (error != 0)
                goto out;

        /*
         * Verify that we have valid Ethernet multicast addresses.
         */
        if (!ETHER_IS_MULTICAST(addrlo) || !ETHER_IS_MULTICAST(addrhi)) {
                error = EINVAL;
                goto out;
        }

        /*
         * See if the address range is already in the list.
         */
        _enm = ether_lookup_multi(addrlo, addrhi, ec);
        if (_enm != NULL) {
                /*
                 * Found it; just increment the reference count.
                 */
                ++_enm->enm_refcount;
                error = 0;
                goto out;
        }

        /*
         * Link a new multicast record into the interface's multicast list.
         */
        memcpy(enm->enm_addrlo, addrlo, ETHER_ADDR_LEN);
        memcpy(enm->enm_addrhi, addrhi, ETHER_ADDR_LEN);
        enm->enm_refcount = 1;
        LIST_INSERT_HEAD(&ec->ec_multiaddrs, enm, enm_list);
        ec->ec_multicnt++;

        /*
         * Return ENETRESET to inform the driver that the list has changed
         * and its reception filter should be adjusted accordingly.
         */
        error = ENETRESET;
        enm = NULL;

out:
        ETHER_UNLOCK(ec);
        if (enm != NULL)
                kmem_free(enm, sizeof(*enm));
        return error;
}

/*
 * Delete a multicast address record.
 */
int
ether_delmulti(const struct sockaddr *sa, struct ethercom *ec)
{
        struct ether_multi *enm;
        u_char addrlo[ETHER_ADDR_LEN];
        u_char addrhi[ETHER_ADDR_LEN];
        int error;

        ETHER_LOCK(ec);
        error = ether_multiaddr(sa, addrlo, addrhi);
        if (error != 0)
                goto error;

        /*
         * Look up the address in our list.
         */
        enm = ether_lookup_multi(addrlo, addrhi, ec);
        if (enm == NULL) {
                error = ENXIO;
                goto error;
        }
        if (--enm->enm_refcount != 0) {
                /*
                 * Still some claims to this record.
                 */
                error = 0;
                goto error;
        }

        /*
         * No remaining claims to this record; unlink and free it.
         */
        LIST_REMOVE(enm, enm_list);
        ec->ec_multicnt--;
        ETHER_UNLOCK(ec);
        kmem_free(enm, sizeof(*enm));

        /*
         * Return ENETRESET to inform the driver that the list has changed
         * and its reception filter should be adjusted accordingly.
         */
        return ENETRESET;

error:
        ETHER_UNLOCK(ec);
        return error;
}

void
ether_set_ifflags_cb(struct ethercom *ec, ether_cb_t cb)
{
        ec->ec_ifflags_cb = cb;
}

void
ether_set_vlan_cb(struct ethercom *ec, ether_vlancb_t cb)
{

        ec->ec_vlan_cb = cb;
}

static int
ether_ioctl_reinit(struct ethercom *ec)
{
        struct ifnet *ifp = &ec->ec_if;
        int error;

        KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);

        switch (ifp->if_flags & (IFF_UP | IFF_RUNNING)) {
        case IFF_RUNNING:
                /*
                 * If interface is marked down and it is running,
                 * then stop and disable it.
                 */
                if_stop(ifp, 1);
                break;
        case IFF_UP:
                /*
                 * If interface is marked up and it is stopped, then
                 * start it.
                 */
                return if_init(ifp);
        case IFF_UP | IFF_RUNNING:
                error = 0;
                if (ec->ec_ifflags_cb != NULL) {
                        error = (*ec->ec_ifflags_cb)(ec);
                        if (error == ENETRESET) {
                                /*
                                 * Reset the interface to pick up
                                 * changes in any other flags that
                                 * affect the hardware state.
                                 */
                                return if_init(ifp);
                        }
                } else
                        error = if_init(ifp);
                return error;
        case 0:
                break;
        }

        return 0;
}

/*
 * Common ioctls for Ethernet interfaces.  Note, we must be
 * called at splnet().
 */
int
ether_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
        struct ethercom *ec = (void *)ifp;
        struct eccapreq *eccr;
        struct ifreq *ifr = (struct ifreq *)data;
        struct if_laddrreq *iflr = data;
        const struct sockaddr_dl *sdl;
        static const uint8_t zero[ETHER_ADDR_LEN];
        int error;

        switch (cmd) {
        case SIOCINITIFADDR:
            {
                struct ifaddr *ifa = (struct ifaddr *)data;
                if (ifa->ifa_addr->sa_family != AF_LINK
                    && (ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
                       (IFF_UP | IFF_RUNNING)) {
                        ifp->if_flags |= IFF_UP;
                        if ((error = if_init(ifp)) != 0)
                                return error;
                }
#ifdef INET
                if (ifa->ifa_addr->sa_family == AF_INET)
                        arp_ifinit(ifp, ifa);
#endif
                return 0;
            }

        case SIOCSIFMTU:
            {
                int maxmtu;

                if (ec->ec_capabilities & ETHERCAP_JUMBO_MTU)
                        maxmtu = ETHERMTU_JUMBO;
                else
                        maxmtu = ETHERMTU;

                if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > maxmtu)
                        return EINVAL;
                else if ((error = ifioctl_common(ifp, cmd, data)) != ENETRESET)
                        return error;
                else if (ifp->if_flags & IFF_UP) {
                        /* Make sure the device notices the MTU change. */
                        return if_init(ifp);
                } else
                        return 0;
            }

        case SIOCSIFFLAGS:
                if ((error = ifioctl_common(ifp, cmd, data)) != 0)
                        return error;
                return ether_ioctl_reinit(ec);
        case SIOCGIFFLAGS:
                error = ifioctl_common(ifp, cmd, data);
                if (error == 0) {
                        /* Set IFF_ALLMULTI for backcompat */
                        ifr->ifr_flags |= (ec->ec_flags & ETHER_F_ALLMULTI) ?
                            IFF_ALLMULTI : 0;
                }
                return error;
        case SIOCGETHERCAP:
                eccr = (struct eccapreq *)data;
                eccr->eccr_capabilities = ec->ec_capabilities;
                eccr->eccr_capenable = ec->ec_capenable;
                return 0;
        case SIOCSETHERCAP:
                eccr = (struct eccapreq *)data;
                if ((eccr->eccr_capenable & ~ec->ec_capabilities) != 0)
                        return EINVAL;
                if (eccr->eccr_capenable == ec->ec_capenable)
                        return 0;
#if 0 /* notyet */
                ec->ec_capenable = (ec->ec_capenable & ETHERCAP_CANTCHANGE)
                    | (eccr->eccr_capenable & ~ETHERCAP_CANTCHANGE);
#else
                ec->ec_capenable = eccr->eccr_capenable;
#endif
                return ether_ioctl_reinit(ec);
        case SIOCADDMULTI:
                return ether_addmulti(ifreq_getaddr(cmd, ifr), ec);
        case SIOCDELMULTI:
                return ether_delmulti(ifreq_getaddr(cmd, ifr), ec);
        case SIOCSIFMEDIA:
        case SIOCGIFMEDIA:
                if (ec->ec_mii != NULL)
                        return ifmedia_ioctl(ifp, ifr, &ec->ec_mii->mii_media,
                            cmd);
                else if (ec->ec_ifmedia != NULL)
                        return ifmedia_ioctl(ifp, ifr, ec->ec_ifmedia, cmd);
                else
                        return ENOTTY;
                break;
        case SIOCALIFADDR:
                sdl = satocsdl(sstocsa(&iflr->addr));
                if (sdl->sdl_family != AF_LINK)
                        ;
                else if (ETHER_IS_MULTICAST(CLLADDR(sdl)))
                        return EINVAL;
                else if (memcmp(zero, CLLADDR(sdl), sizeof(zero)) == 0)
                        return EINVAL;
                /*FALLTHROUGH*/
        default:
                return ifioctl_common(ifp, cmd, data);
        }
        return 0;
}

/*
 * Enable/disable passing VLAN packets if the parent interface supports it.
 * Return:
 *          0: Ok
 *        -1: Parent interface does not support vlans
 *        >0: Error
 */
int
ether_enable_vlan_mtu(struct ifnet *ifp)
{
        int error;
        struct ethercom *ec = (void *)ifp;

        /* Parent does not support VLAN's */
        if ((ec->ec_capabilities & ETHERCAP_VLAN_MTU) == 0)
                return -1;

        /*
         * Parent supports the VLAN_MTU capability,
         * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames;
         * enable it.
         */
        ec->ec_capenable |= ETHERCAP_VLAN_MTU;

        /* Interface is down, defer for later */
        if ((ifp->if_flags & IFF_UP) == 0)
                return 0;

        if ((error = if_flags_set(ifp, ifp->if_flags)) == 0)
                return 0;

        ec->ec_capenable &= ~ETHERCAP_VLAN_MTU;
        return error;
}

int
ether_disable_vlan_mtu(struct ifnet *ifp)
{
        int error;
        struct ethercom *ec = (void *)ifp;

        /* We still have VLAN's, defer for later */
        if (ec->ec_nvlans != 0)
                return 0;

        /* Parent does not support VLAB's, nothing to do. */
        if ((ec->ec_capenable & ETHERCAP_VLAN_MTU) == 0)
                return -1;

        /*
         * Disable Tx/Rx of VLAN-sized frames.
         */
        ec->ec_capenable &= ~ETHERCAP_VLAN_MTU;

        /* Interface is down, defer for later */
        if ((ifp->if_flags & IFF_UP) == 0)
                return 0;

        if ((error = if_flags_set(ifp, ifp->if_flags)) == 0)
                return 0;

        ec->ec_capenable |= ETHERCAP_VLAN_MTU;
        return error;
}

/*
 * Add and delete VLAN TAG
 */
int
ether_add_vlantag(struct ifnet *ifp, uint16_t vtag, bool *vlanmtu_status)
{
        struct ethercom *ec = (void *)ifp;
        struct vlanid_list *vidp;
        bool vlanmtu_enabled;
        uint16_t vid = EVL_VLANOFTAG(vtag);
        int error;

        vlanmtu_enabled = false;

        /* Add a vid to the list */
        vidp = kmem_alloc(sizeof(*vidp), KM_SLEEP);
        vidp->vid = vid;

        ETHER_LOCK(ec);
        ec->ec_nvlans++;
        SIMPLEQ_INSERT_TAIL(&ec->ec_vids, vidp, vid_list);
        ETHER_UNLOCK(ec);

        if (ec->ec_nvlans == 1) {
                IFNET_LOCK(ifp);
                error = ether_enable_vlan_mtu(ifp);
                IFNET_UNLOCK(ifp);

                if (error == 0) {
                        vlanmtu_enabled = true;
                } else if (error != -1) {
                        goto fail;
                }
        }

        if (ec->ec_vlan_cb != NULL) {
                error = (*ec->ec_vlan_cb)(ec, vid, true);
                if (error != 0)
                        goto fail;
        }

        if (vlanmtu_status != NULL)
                *vlanmtu_status = vlanmtu_enabled;

        return 0;
fail:
        ETHER_LOCK(ec);
        ec->ec_nvlans--;
        SIMPLEQ_REMOVE(&ec->ec_vids, vidp, vlanid_list, vid_list);
        ETHER_UNLOCK(ec);

        if (vlanmtu_enabled) {
                IFNET_LOCK(ifp);
                (void)ether_disable_vlan_mtu(ifp);
                IFNET_UNLOCK(ifp);
        }

        kmem_free(vidp, sizeof(*vidp));

        return error;
}

int
ether_del_vlantag(struct ifnet *ifp, uint16_t vtag)
{
        struct ethercom *ec = (void *)ifp;
        struct vlanid_list *vidp;
        uint16_t vid = EVL_VLANOFTAG(vtag);

        ETHER_LOCK(ec);
        SIMPLEQ_FOREACH(vidp, &ec->ec_vids, vid_list) {
                if (vidp->vid == vid) {
                        SIMPLEQ_REMOVE(&ec->ec_vids, vidp,
                            vlanid_list, vid_list);
                        ec->ec_nvlans--;
                        break;
                }
        }
        ETHER_UNLOCK(ec);

        if (vidp == NULL)
                return ENOENT;

        if (ec->ec_vlan_cb != NULL) {
                (void)(*ec->ec_vlan_cb)(ec, vidp->vid, false);
        }

        if (ec->ec_nvlans == 0) {
                IFNET_LOCK(ifp);
                (void)ether_disable_vlan_mtu(ifp);
                IFNET_UNLOCK(ifp);
        }

        kmem_free(vidp, sizeof(*vidp));

        return 0;
}

int
ether_inject_vlantag(struct mbuf **mp, uint16_t etype, uint16_t tag)
{
        static const size_t min_data_len =
            ETHER_MIN_LEN - ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN;
        /* Used to pad ethernet frames with < ETHER_MIN_LEN bytes */
        static const char vlan_zero_pad_buff[ETHER_MIN_LEN] = { 0 };

        struct ether_vlan_header *evl;
        struct mbuf *m = *mp;
        int error;

        error = 0;

        M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT);
        if (m == NULL) {
                error = ENOBUFS;
                goto out;
        }

        if (m->m_len < sizeof(*evl)) {
                m = m_pullup(m, sizeof(*evl));
                if (m == NULL) {
                        error = ENOBUFS;
                        goto out;
                }
        }

        /*
         * Transform the Ethernet header into an
         * Ethernet header with 802.1Q encapsulation.
         */
        memmove(mtod(m, void *),
            mtod(m, char *) + ETHER_VLAN_ENCAP_LEN,
            sizeof(struct ether_header));
        evl = mtod(m, struct ether_vlan_header *);
        evl->evl_proto = evl->evl_encap_proto;
        evl->evl_encap_proto = htons(etype);
        evl->evl_tag = htons(tag);

        /*
         * To cater for VLAN-aware layer 2 ethernet
         * switches which may need to strip the tag
         * before forwarding the packet, make sure
         * the packet+tag is at least 68 bytes long.
         * This is necessary because our parent will
         * only pad to 64 bytes (ETHER_MIN_LEN) and
         * some switches will not pad by themselves
         * after deleting a tag.
         */
        if (m->m_pkthdr.len < min_data_len) {
                m_copyback(m, m->m_pkthdr.len,
                    min_data_len - m->m_pkthdr.len,
                    vlan_zero_pad_buff);
        }

        m->m_flags &= ~M_VLANTAG;

out:
        *mp = m;
        return error;
}

struct mbuf *
ether_strip_vlantag(struct mbuf *m)
{
        struct ether_vlan_header *evl;

        if (m->m_len < sizeof(*evl) &&
            (m = m_pullup(m, sizeof(*evl))) == NULL) {
                return NULL;
        }

        if (m_makewritable(&m, 0, sizeof(*evl), M_DONTWAIT)) {
                m_freem(m);
                return NULL;
        }

        evl = mtod(m, struct ether_vlan_header *);
        KASSERT(ntohs(evl->evl_encap_proto) == ETHERTYPE_VLAN);

        vlan_set_tag(m, ntohs(evl->evl_tag));

        /*
         * Restore the original ethertype.  We'll remove
         * the encapsulation after we've found the vlan
         * interface corresponding to the tag.
         */
        evl->evl_encap_proto = evl->evl_proto;

        /*
         * Remove the encapsulation header and append tag.
         * The original header has already been fixed up above.
         */
        vlan_set_tag(m, ntohs(evl->evl_tag));
        memmove((char *)evl + ETHER_VLAN_ENCAP_LEN, evl,
            offsetof(struct ether_vlan_header, evl_encap_proto));
        m_adj(m, ETHER_VLAN_ENCAP_LEN);

        return m;
}

static int
ether_multicast_sysctl(SYSCTLFN_ARGS)
{
        struct ether_multi *enm;
        struct ifnet *ifp;
        struct ethercom *ec;
        int error = 0;
        size_t written;
        struct psref psref;
        int bound;
        unsigned int multicnt;
        struct ether_multi_sysctl *addrs;
        int i;

        if (namelen != 1)
                return EINVAL;

        bound = curlwp_bind();
        ifp = if_get_byindex(name[0], &psref);
        if (ifp == NULL) {
                error = ENODEV;
                goto out;
        }
        if (ifp->if_type != IFT_ETHER) {
                if_put(ifp, &psref);
                *oldlenp = 0;
                goto out;
        }
        ec = (struct ethercom *)ifp;

        if (oldp == NULL) {
                if_put(ifp, &psref);
                *oldlenp = ec->ec_multicnt * sizeof(*addrs);
                goto out;
        }

        /*
         * ec->ec_lock is a spin mutex so we cannot call sysctl_copyout, which
         * is sleepable, while holding it. Copy data to a local buffer first
         * with the lock taken and then call sysctl_copyout without holding it.
         */
retry:
        multicnt = ec->ec_multicnt;

        if (multicnt == 0) {
                if_put(ifp, &psref);
                *oldlenp = 0;
                goto out;
        }

        addrs = kmem_zalloc(sizeof(*addrs) * multicnt, KM_SLEEP);

        ETHER_LOCK(ec);
        if (multicnt != ec->ec_multicnt) {
                /* The number of multicast addresses has changed */
                ETHER_UNLOCK(ec);
                kmem_free(addrs, sizeof(*addrs) * multicnt);
                goto retry;
        }

        i = 0;
        LIST_FOREACH(enm, &ec->ec_multiaddrs, enm_list) {
                struct ether_multi_sysctl *addr = &addrs[i];
                addr->enm_refcount = enm->enm_refcount;
                memcpy(addr->enm_addrlo, enm->enm_addrlo, ETHER_ADDR_LEN);
                memcpy(addr->enm_addrhi, enm->enm_addrhi, ETHER_ADDR_LEN);
                i++;
        }
        ETHER_UNLOCK(ec);

        error = 0;
        written = 0;
        for (i = 0; i < multicnt; i++) {
                struct ether_multi_sysctl *addr = &addrs[i];

                if (written + sizeof(*addr) > *oldlenp)
                        break;
                error = sysctl_copyout(l, addr, oldp, sizeof(*addr));
                if (error)
                        break;
                written += sizeof(*addr);
                oldp = (char *)oldp + sizeof(*addr);
        }
        kmem_free(addrs, sizeof(*addrs) * multicnt);

        if_put(ifp, &psref);

        *oldlenp = written;
out:
        curlwp_bindx(bound);
        return error;
}

static void
ether_sysctl_setup(struct sysctllog **clog)
{
        const struct sysctlnode *rnode = NULL;

        sysctl_createv(clog, 0, NULL, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "ether",
                       SYSCTL_DESCR("Ethernet-specific information"),
                       NULL, 0, NULL, 0,
                       CTL_NET, CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "multicast",
                       SYSCTL_DESCR("multicast addresses"),
                       ether_multicast_sysctl, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                       CTLTYPE_STRING, "rps_hash",
                       SYSCTL_DESCR("Interface rps hash function control"),
                       sysctl_pktq_rps_hash_handler, 0, (void *)&ether_pktq_rps_hash_p,
                       PKTQ_RPS_HASH_NAME_LEN,
                       CTL_CREATE, CTL_EOL);
}

void
etherinit(void)
{

#ifdef DIAGNOSTIC
        mutex_init(&bigpktpps_lock, MUTEX_DEFAULT, IPL_NET);
#endif
        ether_pktq_rps_hash_p = pktq_rps_hash_default;
        ether_sysctl_setup(NULL);
}











































































































































































































































    9 








































    9 














































































    9 






    9 




    9 

    6 
    8 













    8 









    8 
    8 






    8 

    8 


    2 





    2 






    2 








    6 











    6 
    5 


    6 
    6 






    6 



























    2 



    8 




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    6 




















    6 














    6 





    6 

























    6 









    6 
















    6 




    6 














    6 









    6 












    6 


















    6 


    5 
    5 





    6 




































































































































































































































































































































































































































































































































    4 

   67 
   62 

   67 

    2 


    1 


    1 














    2 


    1 



    1 































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
/*        $NetBSD: icmp6.c,v 1.251 2022/08/22 09:25:55 knakahara Exp $        */
/*        $KAME: icmp6.c,v 1.217 2001/06/20 15:03:29 jinmei Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ip_icmp.c        8.2 (Berkeley) 1/4/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: icmp6.c,v 1.251 2022/08/22 09:25:55 knakahara Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
#include "opt_inet.h"
#include "opt_ipsec.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <sys/domain.h>
#include <sys/sysctl.h>

#include <net/if.h>
#include <net/route.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/nd.h>

#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/ip6.h>
#include <netinet/wqinput.h>
#include <netinet6/ip6_var.h>
#include <netinet6/ip6_private.h>
#include <netinet/icmp6.h>
#include <netinet6/icmp6_private.h>
#include <netinet6/mld6_var.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/in6_ifattach.h>
#include <netinet6/ip6protosw.h>
#include <netinet6/nd6.h>
#include <netinet6/scope6_var.h>

#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
#include <netipsec/key.h>
#endif

#include "faith.h"
#if defined(NFAITH) && 0 < NFAITH
#include <net/if_faith.h>
#endif

/* Ensure that non packed structures are the desired size. */
__CTASSERT(sizeof(struct icmp6_hdr) == 8);
__CTASSERT(sizeof(struct icmp6_nodeinfo) == 16);
__CTASSERT(sizeof(struct icmp6_namelookup) == 20);
__CTASSERT(sizeof(struct icmp6_router_renum) == 16);

__CTASSERT(sizeof(struct nd_router_solicit) == 8);
__CTASSERT(sizeof(struct nd_router_advert) == 16);
__CTASSERT(sizeof(struct nd_neighbor_solicit) == 24);
__CTASSERT(sizeof(struct nd_neighbor_advert) == 24);
__CTASSERT(sizeof(struct nd_redirect) == 40);
__CTASSERT(sizeof(struct nd_opt_hdr) == 2);
__CTASSERT(sizeof(struct nd_opt_route_info) == 8);
__CTASSERT(sizeof(struct nd_opt_prefix_info) == 32);
__CTASSERT(sizeof(struct nd_opt_rd_hdr) == 8);
__CTASSERT(sizeof(struct nd_opt_mtu) == 8);
__CTASSERT(sizeof(struct nd_opt_nonce) == 2 + ND_OPT_NONCE_LEN);
__CTASSERT(sizeof(struct nd_opt_rdnss) == 8);
__CTASSERT(sizeof(struct nd_opt_dnssl) == 8);

__CTASSERT(sizeof(struct mld_hdr) == 24);
__CTASSERT(sizeof(struct ni_reply_fqdn) == 8);
__CTASSERT(sizeof(struct rr_pco_match) == 24);
__CTASSERT(sizeof(struct rr_pco_use) == 32);
__CTASSERT(sizeof(struct rr_result) == 24);

extern struct domain inet6domain;

percpu_t *icmp6stat_percpu;

extern struct inpcbtable raw6cbtable;
extern int icmp6errppslim;
static int icmp6errpps_count = 0;
static struct timeval icmp6errppslim_last;
extern int icmp6_nodeinfo;

/*
 * List of callbacks to notify when Path MTU changes are made.
 */
struct icmp6_mtudisc_callback {
        LIST_ENTRY(icmp6_mtudisc_callback) mc_list;
        void (*mc_func)(struct in6_addr *);
};

LIST_HEAD(, icmp6_mtudisc_callback) icmp6_mtudisc_callbacks =
    LIST_HEAD_INITIALIZER(&icmp6_mtudisc_callbacks);

static struct rttimer_queue *icmp6_mtudisc_timeout_q = NULL;
extern int pmtu_expire;

/* XXX do these values make any sense? */
static int icmp6_mtudisc_hiwat = 1280;
static int icmp6_mtudisc_lowat = 256;

/*
 * keep track of # of redirect routes.
 */
static struct rttimer_queue *icmp6_redirect_timeout_q = NULL;

/* XXX experimental, turned off */
static int icmp6_redirect_hiwat = -1;
static int icmp6_redirect_lowat = -1;

/* Protect mtudisc and redirect stuffs */
static kmutex_t icmp6_mtx __cacheline_aligned;

static bool icmp6_reflect_pmtu = false;

static void icmp6_errcount(u_int, int, int);
static int icmp6_rip6_input(struct mbuf **, int);
static void icmp6_reflect(struct mbuf *, size_t);
static int icmp6_ratelimit(const struct in6_addr *, const int, const int);
static const char *icmp6_redirect_diag(char *, size_t, struct in6_addr *,
    struct in6_addr *, struct in6_addr *);
static void icmp6_redirect_input(struct mbuf *, int);
static struct mbuf *ni6_input(struct mbuf *, int);
static struct mbuf *ni6_nametodns(const char *, int, int);
static int ni6_dnsmatch(const char *, int, const char *, int);
static int ni6_addrs(struct icmp6_nodeinfo *, struct ifnet **, char *,
    struct psref *);
static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
    struct ifnet *, int);
static int icmp6_notify_error(struct mbuf *, int, int, int);
static struct rtentry *icmp6_mtudisc_clone(struct sockaddr *);
static void icmp6_mtudisc_timeout(struct rtentry *, struct rttimer *);
static void icmp6_redirect_timeout(struct rtentry *, struct rttimer *);
static void sysctl_net_inet6_icmp6_setup(struct sysctllog **);

/* workqueue-based pr_input */
static struct wqinput *icmp6_wqinput;
static void _icmp6_input(struct mbuf *m, int off, int proto);

void
icmp6_init(void)
{

        sysctl_net_inet6_icmp6_setup(NULL);
        mld_init();

        mutex_init(&icmp6_mtx, MUTEX_DEFAULT, IPL_NONE);
        mutex_enter(&icmp6_mtx);
        icmp6_mtudisc_timeout_q = rt_timer_queue_create(pmtu_expire);
        icmp6_redirect_timeout_q = rt_timer_queue_create(icmp6_redirtimeout);
        mutex_exit(&icmp6_mtx);

        icmp6stat_percpu = percpu_alloc(sizeof(uint64_t) * ICMP6_NSTATS);

        icmp6_wqinput = wqinput_create("icmp6", _icmp6_input);
}

static void
icmp6_errcount(u_int base, int type, int code)
{
        switch (type) {
        case ICMP6_DST_UNREACH:
                switch (code) {
                case ICMP6_DST_UNREACH_NOROUTE:
                        ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_NOROUTE);
                        return;
                case ICMP6_DST_UNREACH_ADMIN:
                        ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_ADMIN);
                        return;
                case ICMP6_DST_UNREACH_BEYONDSCOPE:
                        ICMP6_STATINC(base +
                                      ICMP6_ERRSTAT_DST_UNREACH_BEYONDSCOPE);
                        return;
                case ICMP6_DST_UNREACH_ADDR:
                        ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_ADDR);
                        return;
                case ICMP6_DST_UNREACH_NOPORT:
                        ICMP6_STATINC(base + ICMP6_ERRSTAT_DST_UNREACH_NOPORT);
                        return;
                }
                break;
        case ICMP6_PACKET_TOO_BIG:
                ICMP6_STATINC(base + ICMP6_ERRSTAT_PACKET_TOO_BIG);
                return;
        case ICMP6_TIME_EXCEEDED:
                switch (code) {
                case ICMP6_TIME_EXCEED_TRANSIT:
                        ICMP6_STATINC(base + ICMP6_ERRSTAT_TIME_EXCEED_TRANSIT);
                        return;
                case ICMP6_TIME_EXCEED_REASSEMBLY:
                        ICMP6_STATINC(base +
                                      ICMP6_ERRSTAT_TIME_EXCEED_REASSEMBLY);
                        return;
                }
                break;
        case ICMP6_PARAM_PROB:
                switch (code) {
                case ICMP6_PARAMPROB_HEADER:
                        ICMP6_STATINC(base + ICMP6_ERRSTAT_PARAMPROB_HEADER);
                        return;
                case ICMP6_PARAMPROB_NEXTHEADER:
                        ICMP6_STATINC(base +
                                      ICMP6_ERRSTAT_PARAMPROB_NEXTHEADER);
                        return;
                case ICMP6_PARAMPROB_OPTION:
                        ICMP6_STATINC(base + ICMP6_ERRSTAT_PARAMPROB_OPTION);
                        return;
                }
                break;
        case ND_REDIRECT:
                ICMP6_STATINC(base + ICMP6_ERRSTAT_REDIRECT);
                return;
        }
        ICMP6_STATINC(base + ICMP6_ERRSTAT_UNKNOWN);
}

/*
 * Register a Path MTU Discovery callback.
 */
void
icmp6_mtudisc_callback_register(void (*func)(struct in6_addr *))
{
        struct icmp6_mtudisc_callback *mc, *new;

        new = kmem_alloc(sizeof(*mc), KM_SLEEP);

        mutex_enter(&icmp6_mtx);
        for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL;
             mc = LIST_NEXT(mc, mc_list)) {
                if (mc->mc_func == func) {
                        mutex_exit(&icmp6_mtx);
                        kmem_free(new, sizeof(*mc));
                        return;
                }
        }

        new->mc_func = func;
        LIST_INSERT_HEAD(&icmp6_mtudisc_callbacks, new, mc_list);
        mutex_exit(&icmp6_mtx);
}

/*
 * A wrapper function for icmp6_error() necessary when the erroneous packet
 * may not contain enough scope zone information.
 */
void
icmp6_error2(struct mbuf *m, int type, int code, int param,
        struct ifnet *ifp, struct in6_addr *src)
{
        struct ip6_hdr *ip6;

        KASSERT(ifp != NULL);

        if (m->m_len < sizeof(struct ip6_hdr)) {
                m = m_pullup(m, sizeof(struct ip6_hdr));
                if (m == NULL)
                        return;
        }

        ip6 = mtod(m, struct ip6_hdr *);

        if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0)
                goto out;
        if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
                goto out;

        *src = ip6->ip6_src;
        icmp6_error(m, type, code, param);
        return;

out:
        m_freem(m);
}

/*
 * Generate an error packet of type error in response to bad IP6 packet.
 */
void
icmp6_error(struct mbuf *m, int type, int code, int param)
{
        struct ip6_hdr *oip6, *nip6;
        struct icmp6_hdr *icmp6;
        u_int preplen;
        int off;
        int nxt;

        ICMP6_STATINC(ICMP6_STAT_ERROR);

        /* count per-type-code statistics */
        icmp6_errcount(ICMP6_STAT_OUTERRHIST, type, code);

        if (m->m_flags & M_DECRYPTED) {
                ICMP6_STATINC(ICMP6_STAT_CANTERROR);
                goto freeit;
        }

        if (M_UNWRITABLE(m, sizeof(struct ip6_hdr)) &&
            (m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL)
                return;
        oip6 = mtod(m, struct ip6_hdr *);

        /*
         * If the destination address of the erroneous packet is a multicast
         * address, or the packet was sent using link-layer multicast,
         * we should basically suppress sending an error (RFC 2463, Section
         * 2.4).
         * We have two exceptions (the item e.2 in that section):
         * - the Packet Too Big message can be sent for path MTU discovery.
         * - the Parameter Problem Message that can be allowed an icmp6 error
         *   in the option type field.  This check has been done in
         *   ip6_unknown_opt(), so we can just check the type and code.
         */
        if ((m->m_flags & (M_BCAST|M_MCAST) ||
             IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
            (type != ICMP6_PACKET_TOO_BIG &&
             (type != ICMP6_PARAM_PROB ||
              code != ICMP6_PARAMPROB_OPTION)))
                goto freeit;

        /*
         * RFC 2463, 2.4 (e.5): source address check.
         * XXX: the case of anycast source?
         */
        if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
            IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
                goto freeit;

        /*
         * If we are about to send ICMPv6 against ICMPv6 error/redirect,
         * don't do it.
         */
        nxt = -1;
        off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
        if (off >= 0 && nxt == IPPROTO_ICMPV6) {
                struct icmp6_hdr *icp;

                IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
                        sizeof(*icp));
                if (icp == NULL) {
                        ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
                        return;
                }
                if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
                    icp->icmp6_type == ND_REDIRECT) {
                        /*
                         * ICMPv6 error
                         * Special case: for redirect (which is
                         * informational) we must not send icmp6 error.
                         */
                        ICMP6_STATINC(ICMP6_STAT_CANTERROR);
                        goto freeit;
                } else {
                        /* ICMPv6 informational - send the error */
                }
        } else {
                /* non-ICMPv6 - send the error */
        }

        oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */

        /* Finally, do rate limitation check. */
        if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
                ICMP6_STATINC(ICMP6_STAT_TOOFREQ);
                goto freeit;
        }

        /*
         * OK, ICMP6 can be generated.
         */

        if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
                m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);

        preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
        M_PREPEND(m, preplen, M_DONTWAIT);
        if (m && M_UNWRITABLE(m, preplen))
                m = m_pullup(m, preplen);
        if (m == NULL) {
                nd6log(LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__);
                return;
        }

        nip6 = mtod(m, struct ip6_hdr *);
        nip6->ip6_src  = oip6->ip6_src;
        nip6->ip6_dst  = oip6->ip6_dst;

        in6_clearscope(&oip6->ip6_src);
        in6_clearscope(&oip6->ip6_dst);

        icmp6 = (struct icmp6_hdr *)(nip6 + 1);
        icmp6->icmp6_type = type;
        icmp6->icmp6_code = code;
        icmp6->icmp6_pptr = htonl((u_int32_t)param);

        /*
         * icmp6_reflect() is designed to be in the input path.
         * icmp6_error() can be called from both input and output path,
         * and if we are in output path rcvif could contain bogus value.
         * clear m->m_pkthdr.rcvif for safety, we should have enough scope
         * information in ip header (nip6).
         */
        m_reset_rcvif(m);

        ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);

        /* header order: IPv6 - ICMPv6 */
        icmp6_reflect(m, sizeof(struct ip6_hdr));

        return;

freeit:
        /*
         * If we can't tell whether or not we can generate ICMP6, free it.
         */
        m_freem(m);
}

/*
 * Process a received ICMP6 message.
 */
static void
_icmp6_input(struct mbuf *m, int off, int proto)
{
        struct mbuf *n;
        struct ip6_hdr *ip6, *nip6;
        struct icmp6_hdr *icmp6, *nicmp6;
        int icmp6len = m->m_pkthdr.len - off;
        int code, sum;
        struct ifnet *rcvif;
        struct psref psref;
        char ip6buf[INET6_ADDRSTRLEN], ip6buf2[INET6_ADDRSTRLEN];

        rcvif = m_get_rcvif_psref(m, &psref);
        if (__predict_false(rcvif == NULL))
                goto freeit;

#define ICMP6_MAXLEN (sizeof(*nip6) + sizeof(*nicmp6) + 4)
        KASSERT(ICMP6_MAXLEN < MCLBYTES);
        icmp6_ifstat_inc(rcvif, ifs6_in_msg);

        /*
         * Locate icmp6 structure in mbuf, and check
         * that not corrupted and of at least minimum length
         */

        if (icmp6len < sizeof(struct icmp6_hdr)) {
                ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
                icmp6_ifstat_inc(rcvif, ifs6_in_error);
                goto freeit;
        }

        if (m->m_len < sizeof(struct ip6_hdr)) {
                m = m_pullup(m, sizeof(struct ip6_hdr));
                if (m == NULL) {
                        ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
                        icmp6_ifstat_inc(rcvif, ifs6_in_error);
                        goto freeit;
                }
        }

        ip6 = mtod(m, struct ip6_hdr *);
        IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
        if (icmp6 == NULL) {
                ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
                icmp6_ifstat_inc(rcvif, ifs6_in_error);
                goto freeit;
        }

        /*
         * Enforce alignment requirements that are violated in
         * some cases, see kern/50766 for details.
         */
        if (ACCESSIBLE_POINTER(icmp6, struct ip6_hdr) == 0) {
                m = m_copyup(m, off + sizeof(struct icmp6_hdr), 0);
                if (m == NULL) {
                        ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
                        icmp6_ifstat_inc(rcvif, ifs6_in_error);
                        goto freeit;
                }
                ip6 = mtod(m, struct ip6_hdr *);
                icmp6 = (struct icmp6_hdr *)(mtod(m, char *) + off);
        }
        KASSERT(ACCESSIBLE_POINTER(icmp6, struct ip6_hdr));

        /*
         * calculate the checksum
         */
        if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
                nd6log(LOG_ERR, "ICMP6 checksum error(%d|%x) %s\n",
                    icmp6->icmp6_type, sum, IN6_PRINT(ip6buf, &ip6->ip6_src));
                ICMP6_STATINC(ICMP6_STAT_CHECKSUM);
                icmp6_ifstat_inc(rcvif, ifs6_in_error);
                goto freeit;
        }

#if defined(NFAITH) && 0 < NFAITH
        if (faithprefix(&ip6->ip6_dst)) {
                /*
                 * Deliver very specific ICMP6 type only.
                 * This is important to deliver TOOBIG.  Otherwise PMTUD
                 * will not work.
                 */
                switch (icmp6->icmp6_type) {
                case ICMP6_DST_UNREACH:
                case ICMP6_PACKET_TOO_BIG:
                case ICMP6_TIME_EXCEEDED:
                        break;
                default:
                        goto freeit;
                }
        }
#endif

        code = icmp6->icmp6_code;
        ICMP6_STATINC(ICMP6_STAT_INHIST + icmp6->icmp6_type);

        switch (icmp6->icmp6_type) {
        case ICMP6_DST_UNREACH:
                icmp6_ifstat_inc(rcvif, ifs6_in_dstunreach);
                switch (code) {
                case ICMP6_DST_UNREACH_NOROUTE:
                        code = PRC_UNREACH_NET;
                        break;
                case ICMP6_DST_UNREACH_ADMIN:
                        icmp6_ifstat_inc(rcvif, ifs6_in_adminprohib);
                        code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
                        break;
                case ICMP6_DST_UNREACH_ADDR:
                        code = PRC_HOSTDEAD;
                        break;
                case ICMP6_DST_UNREACH_BEYONDSCOPE:
                        /* I mean "source address was incorrect." */
                        code = PRC_UNREACH_NET;
                        break;
                case ICMP6_DST_UNREACH_NOPORT:
                        code = PRC_UNREACH_PORT;
                        break;
                default:
                        goto badcode;
                }
                goto deliver;

        case ICMP6_PACKET_TOO_BIG:
                icmp6_ifstat_inc(rcvif, ifs6_in_pkttoobig);

                /*
                 * MTU is checked in icmp6_mtudisc.
                 */
                code = PRC_MSGSIZE;

                /*
                 * Updating the path MTU will be done after examining
                 * intermediate extension headers.
                 */
                goto deliver;

        case ICMP6_TIME_EXCEEDED:
                icmp6_ifstat_inc(rcvif, ifs6_in_timeexceed);
                switch (code) {
                case ICMP6_TIME_EXCEED_TRANSIT:
                        code = PRC_TIMXCEED_INTRANS;
                        break;
                case ICMP6_TIME_EXCEED_REASSEMBLY:
                        code = PRC_TIMXCEED_REASS;
                        break;
                default:
                        goto badcode;
                }
                goto deliver;

        case ICMP6_PARAM_PROB:
                icmp6_ifstat_inc(rcvif, ifs6_in_paramprob);
                switch (code) {
                case ICMP6_PARAMPROB_NEXTHEADER:
                        code = PRC_UNREACH_PROTOCOL;
                        break;
                case ICMP6_PARAMPROB_HEADER:
                case ICMP6_PARAMPROB_OPTION:
                        code = PRC_PARAMPROB;
                        break;
                default:
                        goto badcode;
                }
                goto deliver;

        case ICMP6_ECHO_REQUEST:
                icmp6_ifstat_inc(rcvif, ifs6_in_echo);
                if (code != 0)
                        goto badcode;
                /*
                 * Copy mbuf to send to two data paths: userland socket(s),
                 * and to the querier (echo reply).
                 * m: a copy for socket, n: a copy for querier
                 *
                 * If the first mbuf is shared, or the first mbuf is too short,
                 * copy the first part of the data into a fresh mbuf.
                 * Otherwise, we will wrongly overwrite both copies.
                 */
                if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
                        /* Give up local */
                        n = m;
                        m = NULL;
                } else if (M_UNWRITABLE(n, off + sizeof(struct icmp6_hdr))) {
                        struct mbuf *n0 = n;

                        /*
                         * Prepare an internal mbuf.  m_pullup() doesn't
                         * always copy the length we specified.
                         */
                        if ((n = m_dup(n0, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
                                /* Give up local */
                                n = m;
                                m = NULL;
                        }
                        m_freem(n0);
                }
                IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off,
                    sizeof(*nicmp6));
                if (nicmp6 == NULL)
                        goto freeit;
                nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
                nicmp6->icmp6_code = 0;
                if (n) {
                        uint64_t *icmp6s = ICMP6_STAT_GETREF();
                        icmp6s[ICMP6_STAT_REFLECT]++;
                        icmp6s[ICMP6_STAT_OUTHIST + ICMP6_ECHO_REPLY]++;
                        ICMP6_STAT_PUTREF();
                        icmp6_reflect(n, off);
                }
                if (!m)
                        goto freeit;
                break;

        case ICMP6_ECHO_REPLY:
                icmp6_ifstat_inc(rcvif, ifs6_in_echoreply);
                if (code != 0)
                        goto badcode;
                break;

        case MLD_LISTENER_QUERY:
        case MLD_LISTENER_REPORT:
                if (icmp6len < sizeof(struct mld_hdr))
                        goto badlen;
                if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */
                        icmp6_ifstat_inc(rcvif, ifs6_in_mldquery);
                else
                        icmp6_ifstat_inc(rcvif, ifs6_in_mldreport);
                if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
                        /* give up local */
                        mld_input(m, off);
                        m = NULL;
                        goto freeit;
                }
                mld_input(n, off);
                /* m stays. */
                break;

        case MLD_LISTENER_DONE:
                icmp6_ifstat_inc(rcvif, ifs6_in_mlddone);
                if (icmp6len < sizeof(struct mld_hdr))        /* necessary? */
                        goto badlen;
                break;                /* nothing to be done in kernel */

        case MLD_MTRACE_RESP:
        case MLD_MTRACE:
                /* XXX: these two are experimental.  not officially defined. */
                /* XXX: per-interface statistics? */
                break;                /* just pass it to applications */

        case ICMP6_WRUREQUEST:        /* ICMP6_FQDN_QUERY */
            {
                enum { WRU, FQDN } mode;

                if (!icmp6_nodeinfo)
                        break;

                if (icmp6len == sizeof(struct icmp6_hdr) + 4)
                        mode = WRU;
                else if (icmp6len >= sizeof(struct icmp6_nodeinfo))
                        mode = FQDN;
                else
                        goto badlen;

                if (mode == FQDN) {
                        n = m_copypacket(m, M_DONTWAIT);
                        if (n)
                                n = ni6_input(n, off);
                } else {
                        u_char *p;
                        int maxhlen;

                        if ((icmp6_nodeinfo & 5) != 5)
                                break;

                        if (code != 0)
                                goto badcode;
                        MGETHDR(n, M_DONTWAIT, m->m_type);
                        if (n && ICMP6_MAXLEN > MHLEN) {
                                MCLGET(n, M_DONTWAIT);
                                if ((n->m_flags & M_EXT) == 0) {
                                        m_free(n);
                                        n = NULL;
                                }
                        }
                        if (n == NULL) {
                                /* Give up remote */
                                break;
                        }
                        m_reset_rcvif(n);
                        n->m_len = 0;
                        maxhlen = M_TRAILINGSPACE(n) - ICMP6_MAXLEN;
                        if (maxhlen < 0) {
                                m_free(n);
                                break;
                        }
                        if (maxhlen > hostnamelen)
                                maxhlen = hostnamelen;
                        /*
                         * Copy IPv6 and ICMPv6 only.
                         */
                        nip6 = mtod(n, struct ip6_hdr *);
                        memcpy(nip6, ip6, sizeof(struct ip6_hdr));
                        nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
                        memcpy(nicmp6, icmp6, sizeof(struct icmp6_hdr));

                        p = (u_char *)(nicmp6 + 1);
                        memset(p, 0, 4);
                        memcpy(p + 4, hostname, maxhlen); /* meaningless TTL */

                        m_copy_pkthdr(n, m);
                        n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
                                sizeof(struct icmp6_hdr) + 4 + maxhlen;
                        nicmp6->icmp6_type = ICMP6_WRUREPLY;
                        nicmp6->icmp6_code = 0;
                }
                if (n) {
                        uint64_t *icmp6s = ICMP6_STAT_GETREF();
                        icmp6s[ICMP6_STAT_REFLECT]++;
                        icmp6s[ICMP6_STAT_OUTHIST + ICMP6_WRUREPLY]++;
                        ICMP6_STAT_PUTREF();
                        icmp6_reflect(n, sizeof(struct ip6_hdr));
                }
                break;
            }

        case ICMP6_WRUREPLY:
                if (code != 0)
                        goto badcode;
                break;

        case ND_ROUTER_SOLICIT:
                icmp6_ifstat_inc(rcvif, ifs6_in_routersolicit);
                /* FALLTHROUGH */
        case ND_ROUTER_ADVERT:
                if (icmp6->icmp6_type == ND_ROUTER_ADVERT)
                        icmp6_ifstat_inc(rcvif, ifs6_in_routeradvert);
                if (code != 0)
                        goto badcode;
                if ((icmp6->icmp6_type == ND_ROUTER_SOLICIT &&
                    icmp6len < sizeof(struct nd_router_solicit)) ||
                    (icmp6->icmp6_type == ND_ROUTER_ADVERT &&
                    icmp6len < sizeof(struct nd_router_advert)))
                        goto badlen;
                if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
                        /* give up local */
                        nd6_rtr_cache(m, off, icmp6len, icmp6->icmp6_type);
                        m = NULL;
                        goto freeit;
                }
                nd6_rtr_cache(n, off, icmp6len, icmp6->icmp6_type);
                /* m stays. */
                break;

        case ND_NEIGHBOR_SOLICIT:
                icmp6_ifstat_inc(rcvif, ifs6_in_neighborsolicit);
                if (code != 0)
                        goto badcode;
                if (icmp6len < sizeof(struct nd_neighbor_solicit))
                        goto badlen;
                if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
                        /* give up local */
                        nd6_ns_input(m, off, icmp6len);
                        m = NULL;
                        goto freeit;
                }
                nd6_ns_input(n, off, icmp6len);
                /* m stays. */
                break;

        case ND_NEIGHBOR_ADVERT:
                icmp6_ifstat_inc(rcvif, ifs6_in_neighboradvert);
                if (code != 0)
                        goto badcode;
                if (icmp6len < sizeof(struct nd_neighbor_advert))
                        goto badlen;
                if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
                        /* give up local */
                        nd6_na_input(m, off, icmp6len);
                        m = NULL;
                        goto freeit;
                }
                nd6_na_input(n, off, icmp6len);
                /* m stays. */
                break;

        case ND_REDIRECT:
                icmp6_ifstat_inc(rcvif, ifs6_in_redirect);
                if (code != 0)
                        goto badcode;
                if (icmp6len < sizeof(struct nd_redirect))
                        goto badlen;
                if ((n = m_copypacket(m, M_DONTWAIT)) == NULL) {
                        /* give up local */
                        icmp6_redirect_input(m, off);
                        m = NULL;
                        goto freeit;
                }
                icmp6_redirect_input(n, off);
                /* m stays. */
                break;

        case ICMP6_ROUTER_RENUMBERING:
                if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
                    code != ICMP6_ROUTER_RENUMBERING_RESULT)
                        goto badcode;
                if (icmp6len < sizeof(struct icmp6_router_renum))
                        goto badlen;
                break;

        default:
                nd6log(LOG_DEBUG,
                    "unknown type %d(src=%s, dst=%s, ifid=%d)\n",
                    icmp6->icmp6_type,
                    IN6_PRINT(ip6buf, &ip6->ip6_src),
                    IN6_PRINT(ip6buf2, &ip6->ip6_dst),
                    rcvif ? rcvif->if_index : 0);
                if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
                        /* ICMPv6 error: MUST deliver it by spec... */
                        code = PRC_NCMDS;
                        /* deliver */
                } else {
                        /* ICMPv6 informational: MUST not deliver */
                        break;
                }
        deliver:
                if (icmp6_notify_error(m, off, icmp6len, code)) {
                        /* In this case, m should've been freed. */
                        m_put_rcvif_psref(rcvif, &psref);
                        return;
                }
                break;

        badcode:
                ICMP6_STATINC(ICMP6_STAT_BADCODE);
                break;

        badlen:
                ICMP6_STATINC(ICMP6_STAT_BADLEN);
                break;
        }
        m_put_rcvif_psref(rcvif, &psref);

        /* deliver the packet to appropriate sockets */
        icmp6_rip6_input(&m, off);

        return;

freeit:
        m_put_rcvif_psref(rcvif, &psref);
        m_freem(m);
        return;
}

int
icmp6_input(struct mbuf **mp, int *offp, int proto)
{

        wqinput_input(icmp6_wqinput, *mp, *offp, proto);

        return IPPROTO_DONE;
}

static int
icmp6_notify_error(struct mbuf *m, int off, int icmp6len, int code)
{
        struct icmp6_hdr *icmp6;
        struct ip6_hdr *eip6;
        u_int32_t notifymtu;
        struct sockaddr_in6 icmp6src, icmp6dst;

        if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
                ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
                goto freeit;
        }
        IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
            sizeof(*icmp6) + sizeof(struct ip6_hdr));
        if (icmp6 == NULL) {
                ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
                return (-1);
        }
        eip6 = (struct ip6_hdr *)(icmp6 + 1);

        /* Detect the upper level protocol */
        {
                void *(*ctlfunc)(int, const struct sockaddr *, void *);
                u_int8_t nxt = eip6->ip6_nxt;
                int eoff = off + sizeof(struct icmp6_hdr) +
                        sizeof(struct ip6_hdr);
                struct ip6ctlparam ip6cp;
                struct in6_addr *finaldst = NULL;
                int icmp6type = icmp6->icmp6_type;
                struct ip6_frag *fh;
                struct ip6_rthdr *rth;
                struct ifnet *rcvif;
                int s;

                while (1) { /* XXX: should avoid infinite loop explicitly? */
                        struct ip6_ext *eh;

                        switch (nxt) {
                        case IPPROTO_HOPOPTS:
                        case IPPROTO_DSTOPTS:
                        case IPPROTO_AH:
                                IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
                                    eoff, sizeof(*eh));
                                if (eh == NULL) {
                                        ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
                                        return (-1);
                                }

                                if (nxt == IPPROTO_AH)
                                        eoff += (eh->ip6e_len + 2) << 2;
                                else
                                        eoff += (eh->ip6e_len + 1) << 3;
                                nxt = eh->ip6e_nxt;
                                break;
                        case IPPROTO_ROUTING:
                                /* Ignore the option. */
                                IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
                                    eoff, sizeof(*rth));
                                if (rth == NULL) {
                                        ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
                                        return (-1);
                                }

                                eoff += (rth->ip6r_len + 1) << 3;
                                nxt = rth->ip6r_nxt;
                                break;
                        case IPPROTO_FRAGMENT:
                                IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
                                    eoff, sizeof(*fh));
                                if (fh == NULL) {
                                        ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
                                        return (-1);
                                }
                                /*
                                 * Data after a fragment header is meaningless
                                 * unless it is the first fragment, but
                                 * we'll go to the notify label for path MTU
                                 * discovery.
                                 */
                                if (fh->ip6f_offlg & IP6F_OFF_MASK)
                                        goto notify;

                                eoff += sizeof(struct ip6_frag);
                                nxt = fh->ip6f_nxt;
                                break;
                        default:
                                /*
                                 * This case includes ESP and the No Next
                                 * Header.  In such cases going to the notify
                                 * label does not have any meaning
                                 * (i.e. ctlfunc will be NULL), but we go
                                 * anyway since we might have to update
                                 * path MTU information.
                                 */
                                goto notify;
                        }
                }
          notify:
                IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
                    sizeof(*icmp6) + sizeof(struct ip6_hdr));
                if (icmp6 == NULL) {
                        ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
                        return (-1);
                }

                /*
                 * retrieve parameters from the inner IPv6 header, and convert
                 * them into sockaddr structures.
                 * XXX: there is no guarantee that the source or destination
                 * addresses of the inner packet are in the same scope zone as
                 * the addresses of the icmp packet.  But there is no other
                 * way to determine the zone.
                 */
                eip6 = (struct ip6_hdr *)(icmp6 + 1);

                rcvif = m_get_rcvif(m, &s);
                if (__predict_false(rcvif == NULL))
                        goto freeit;
                sockaddr_in6_init(&icmp6dst,
                    (finaldst == NULL) ? &eip6->ip6_dst : finaldst, 0, 0, 0);
                if (in6_setscope(&icmp6dst.sin6_addr, rcvif, NULL)) {
                        m_put_rcvif(rcvif, &s);
                        goto freeit;
                }
                sockaddr_in6_init(&icmp6src, &eip6->ip6_src, 0, 0, 0);
                if (in6_setscope(&icmp6src.sin6_addr, rcvif, NULL)) {
                        m_put_rcvif(rcvif, &s);
                        goto freeit;
                }
                m_put_rcvif(rcvif, &s);

                icmp6src.sin6_flowinfo =
                        (eip6->ip6_flow & IPV6_FLOWLABEL_MASK);

                if (finaldst == NULL)
                        finaldst = &eip6->ip6_dst;
                ip6cp.ip6c_m = m;
                ip6cp.ip6c_icmp6 = icmp6;
                ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
                ip6cp.ip6c_off = eoff;
                ip6cp.ip6c_finaldst = finaldst;
                ip6cp.ip6c_src = &icmp6src;
                ip6cp.ip6c_nxt = nxt;

                if (icmp6type == ICMP6_PACKET_TOO_BIG) {
                        notifymtu = ntohl(icmp6->icmp6_mtu);
                        ip6cp.ip6c_cmdarg = (void *)&notifymtu;
                }

                ctlfunc = inet6sw[ip6_protox[nxt]].pr_ctlinput;
                if (ctlfunc) {
                        (void)(*ctlfunc)(code, sin6tosa(&icmp6dst), &ip6cp);
                }
        }
        return (0);

freeit:
        m_freem(m);
        return (-1);
}

void
icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated)
{
        unsigned long rtcount;
        struct icmp6_mtudisc_callback *mc;
        struct in6_addr *dst = ip6cp->ip6c_finaldst;
        struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
        struct mbuf *m = ip6cp->ip6c_m;        /* will be necessary for scope issue */
        u_int mtu = ntohl(icmp6->icmp6_mtu);
        struct rtentry *rt = NULL;
        struct sockaddr_in6 sin6;
        struct ifnet *rcvif;
        int s;

        /*
         * The MTU should not be less than the minimal IPv6 MTU except for the
         * hack in ip6_output/ip6_setpmtu where we always include a frag header.
         * In that one case, the MTU might be less than 1280.
         */
        if (__predict_false(mtu < IPV6_MMTU - sizeof(struct ip6_frag))) {
                /* is the mtu even sane? */
                if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8)
                        return;
                if (!validated)
                        return;
                mtu = IPV6_MMTU - sizeof(struct ip6_frag);
        }

        /*
         * allow non-validated cases if memory is plenty, to make traffic
         * from non-connected pcb happy.
         */
        mutex_enter(&icmp6_mtx);
        rtcount = rt_timer_count(icmp6_mtudisc_timeout_q);
        if (validated) {
                if (0 <= icmp6_mtudisc_hiwat && rtcount > icmp6_mtudisc_hiwat) {
                        mutex_exit(&icmp6_mtx);
                        return;
                } else if (0 <= icmp6_mtudisc_lowat &&
                    rtcount > icmp6_mtudisc_lowat) {
                        /*
                         * XXX nuke a victim, install the new one.
                         */
                }
        } else {
                if (0 <= icmp6_mtudisc_lowat && rtcount > icmp6_mtudisc_lowat) {
                        mutex_exit(&icmp6_mtx);
                        return;
                }
        }
        mutex_exit(&icmp6_mtx);

        memset(&sin6, 0, sizeof(sin6));
        sin6.sin6_family = PF_INET6;
        sin6.sin6_len = sizeof(struct sockaddr_in6);
        sin6.sin6_addr = *dst;
        rcvif = m_get_rcvif(m, &s);
        if (__predict_false(rcvif == NULL))
                return;
        if (in6_setscope(&sin6.sin6_addr, rcvif, NULL)) {
                m_put_rcvif(rcvif, &s);
                return;
        }
        m_put_rcvif(rcvif, &s);

        rt = icmp6_mtudisc_clone(sin6tosa(&sin6));

        if (rt && (rt->rt_flags & RTF_HOST) &&
            !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
            (rt->rt_rmx.rmx_mtu > mtu || rt->rt_rmx.rmx_mtu == 0)) {
                if (mtu < rt->rt_ifp->if_mtu) {
                        ICMP6_STATINC(ICMP6_STAT_PMTUCHG);
                        rt->rt_rmx.rmx_mtu = mtu;
                }
        }
        if (rt) {
                rt_unref(rt);
        }

        /*
         * Notify protocols that the MTU for this destination
         * has changed.
         */
        mutex_enter(&icmp6_mtx);
        for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL;
             mc = LIST_NEXT(mc, mc_list))
                (*mc->mc_func)(&sin6.sin6_addr);
        mutex_exit(&icmp6_mtx);
}

/*
 * Process a Node Information Query packet, based on
 * draft-ietf-ipngwg-icmp-name-lookups-07.
 *
 * Spec incompatibilities:
 * - IPv6 Subject address handling
 * - IPv4 Subject address handling support missing
 * - Proxy reply (answer even if it's not for me)
 * - joins NI group address at in6_ifattach() time only, does not cope
 *   with hostname changes by sethostname(3)
 */
static struct mbuf *
ni6_input(struct mbuf *m, int off)
{
        struct icmp6_nodeinfo *ni6, *nni6;
        struct mbuf *n = NULL;
        u_int16_t qtype;
        int subjlen;
        int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
        struct ni_reply_fqdn *fqdn;
        int addrs;                /* for NI_QTYPE_NODEADDR */
        struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */
        struct sockaddr_in6 sin6; /* ip6_dst */
        struct in6_addr in6_subj; /* subject address */
        struct ip6_hdr *ip6;
        int oldfqdn = 0;        /* if 1, return pascal string (03 draft) */
        char *subj = NULL;
        struct ifnet *rcvif;
        int s, ss;
        struct ifaddr *ifa;
        struct psref psref;

        ip6 = mtod(m, struct ip6_hdr *);
        IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6));
        if (ni6 == NULL) {
                /* m is already reclaimed */
                return NULL;
        }
        KASSERT((m->m_flags & M_PKTHDR) != 0);

        /*
         * Validate IPv6 destination address.
         *
         * The Responder must discard the Query without further processing
         * unless it is one of the Responder's unicast or anycast addresses, or
         * a link-local scope multicast address which the Responder has joined.
         * [icmp-name-lookups-07, Section 4.]
         */
        sockaddr_in6_init(&sin6, &ip6->ip6_dst, 0, 0, 0);
        /* XXX scopeid */
        ss = pserialize_read_enter();
        ifa = ifa_ifwithaddr(sin6tosa(&sin6));
        if (ifa != NULL) {
                ; /* unicast/anycast, fine */
        } else if (IN6_IS_ADDR_MC_LINKLOCAL(&sin6.sin6_addr)) {
                ; /* link-local multicast, fine */
        } else {
                pserialize_read_exit(ss);
                goto bad;
        }
        pserialize_read_exit(ss);

        /* validate query Subject field. */
        qtype = ntohs(ni6->ni_qtype);
        subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo);
        switch (qtype) {
        case NI_QTYPE_NOOP:
        case NI_QTYPE_SUPTYPES:
                /* 07 draft */
                if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0)
                        break;
                /* FALLTHROUGH */
        case NI_QTYPE_FQDN:
        case NI_QTYPE_NODEADDR:
        case NI_QTYPE_IPV4ADDR:
                switch (ni6->ni_code) {
                case ICMP6_NI_SUBJ_IPV6:
#if ICMP6_NI_SUBJ_IPV6 != 0
                case 0:
#endif
                        /*
                         * backward compatibility - try to accept 03 draft
                         * format, where no Subject is present.
                         */
                        if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 &&
                            subjlen == 0) {
                                oldfqdn++;
                                break;
                        }
#if ICMP6_NI_SUBJ_IPV6 != 0
                        if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6)
                                goto bad;
#endif

                        if (subjlen != sizeof(sin6.sin6_addr))
                                goto bad;

                        /*
                         * Validate Subject address.
                         *
                         * Not sure what exactly "address belongs to the node"
                         * means in the spec, is it just unicast, or what?
                         *
                         * At this moment we consider Subject address as
                         * "belong to the node" if the Subject address equals
                         * to the IPv6 destination address; validation for
                         * IPv6 destination address should have done enough
                         * check for us.
                         *
                         * We do not do proxy at this moment.
                         */
                        /* m_pulldown instead of copy? */
                        m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
                            subjlen, (void *)&in6_subj);
                        rcvif = m_get_rcvif(m, &s);
                        if (__predict_false(rcvif == NULL))
                                goto bad;
                        if (in6_setscope(&in6_subj, rcvif, NULL)) {
                                m_put_rcvif(rcvif, &s);
                                goto bad;
                        }
                        m_put_rcvif(rcvif, &s);

                        subj = (char *)&in6_subj;
                        if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj))
                                break;

                        /*
                         * XXX if we are to allow other cases, we should really
                         * be careful about scope here.
                         * basically, we should disallow queries toward IPv6
                         * destination X with subject Y, if scope(X) > scope(Y).
                         * if we allow scope(X) > scope(Y), it will result in
                         * information leakage across scope boundary.
                         */
                        goto bad;

                case ICMP6_NI_SUBJ_FQDN:
                        /*
                         * Validate Subject name with gethostname(3).
                         *
                         * The behavior may need some debate, since:
                         * - we are not sure if the node has FQDN as
                         *   hostname (returned by gethostname(3)).
                         * - the code does wildcard match for truncated names.
                         *   however, we are not sure if we want to perform
                         *   wildcard match, if gethostname(3) side has
                         *   truncated hostname.
                         */
                        n = ni6_nametodns(hostname, hostnamelen, 0);
                        if (!n || n->m_next || n->m_len == 0)
                                goto bad;
                        IP6_EXTHDR_GET(subj, char *, m,
                            off + sizeof(struct icmp6_nodeinfo), subjlen);
                        if (subj == NULL)
                                goto bad;
                        if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
                            n->m_len)) {
                                goto bad;
                        }
                        m_freem(n);
                        n = NULL;
                        break;

                case ICMP6_NI_SUBJ_IPV4:        /* XXX: to be implemented? */
                default:
                        goto bad;
                }
                break;
        }

        /* refuse based on configuration.  XXX ICMP6_NI_REFUSED? */
        switch (qtype) {
        case NI_QTYPE_FQDN:
                if ((icmp6_nodeinfo & 1) == 0)
                        goto bad;
                break;
        case NI_QTYPE_NODEADDR:
        case NI_QTYPE_IPV4ADDR:
                if ((icmp6_nodeinfo & 2) == 0)
                        goto bad;
                break;
        }

        /* guess reply length */
        switch (qtype) {
        case NI_QTYPE_NOOP:
                break;                /* no reply data */
        case NI_QTYPE_SUPTYPES:
                replylen += sizeof(u_int32_t);
                break;
        case NI_QTYPE_FQDN:
                /* will append an mbuf */
                replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
                break;
        case NI_QTYPE_NODEADDR:
                addrs = ni6_addrs(ni6, &ifp, subj, &psref);
                replylen += addrs *
                    (sizeof(struct in6_addr) + sizeof(u_int32_t));
                if (replylen > MCLBYTES)
                        replylen = MCLBYTES; /* XXX: will truncate pkt later */
                break;
        case NI_QTYPE_IPV4ADDR:
                /* unsupported - should respond with unknown Qtype? */
                goto bad;
        default:
                /*
                 * XXX: We must return a reply with the ICMP6 code
                 * `unknown Qtype' in this case.  However we regard the case
                 * as an FQDN query for backward compatibility.
                 * Older versions set a random value to this field,
                 * so it rarely varies in the defined qtypes.
                 * But the mechanism is not reliable...
                 * maybe we should obsolete older versions.
                 */
                qtype = NI_QTYPE_FQDN;
                /* will append an mbuf */
                replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
                oldfqdn++;
                break;
        }

        /* allocate an mbuf to reply. */
        MGETHDR(n, M_DONTWAIT, m->m_type);
        if (n == NULL) {
                goto bad;
        }
        m_move_pkthdr(n, m);
        if (replylen > MHLEN) {
                if (replylen > MCLBYTES) {
                        /*
                         * XXX: should we try to allocate more? But MCLBYTES
                         * is probably much larger than IPV6_MMTU...
                         */
                        goto bad;
                }
                MCLGET(n, M_DONTWAIT);
                if ((n->m_flags & M_EXT) == 0) {
                        goto bad;
                }
        }
        n->m_pkthdr.len = n->m_len = replylen;

        /* copy mbuf header and IPv6 + Node Information base headers */
        bcopy(mtod(m, void *), mtod(n, void *), sizeof(struct ip6_hdr));
        nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1);
        bcopy((void *)ni6, (void *)nni6, sizeof(struct icmp6_nodeinfo));

        /* qtype dependent procedure */
        switch (qtype) {
        case NI_QTYPE_NOOP:
                nni6->ni_code = ICMP6_NI_SUCCESS;
                nni6->ni_flags = 0;
                break;
        case NI_QTYPE_SUPTYPES:
        {
                u_int32_t v;
                nni6->ni_code = ICMP6_NI_SUCCESS;
                nni6->ni_flags = htons(0x0000);        /* raw bitmap */
                /* supports NOOP, SUPTYPES, FQDN, and NODEADDR */
                v = (u_int32_t)htonl(0x0000000f);
                memcpy(nni6 + 1, &v, sizeof(u_int32_t));
                break;
        }
        case NI_QTYPE_FQDN:
                nni6->ni_code = ICMP6_NI_SUCCESS;
                fqdn = (struct ni_reply_fqdn *)(mtod(n, char *) +
                    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo));
                nni6->ni_flags = 0; /* XXX: meaningless TTL */
                fqdn->ni_fqdn_ttl = 0;        /* ditto. */
                /*
                 * XXX do we really have FQDN in variable "hostname"?
                 */
                n->m_next = ni6_nametodns(hostname, hostnamelen, oldfqdn);
                if (n->m_next == NULL)
                        goto bad;
                /* XXX we assume that n->m_next is not a chain */
                if (n->m_next->m_next != NULL)
                        goto bad;
                n->m_pkthdr.len += n->m_next->m_len;
                break;
        case NI_QTYPE_NODEADDR:
        {
                int lenlim, copied;

                nni6->ni_code = ICMP6_NI_SUCCESS;
                n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
                    sizeof(struct icmp6_nodeinfo);
                lenlim = M_TRAILINGSPACE(n);
                copied = ni6_store_addrs(ni6, nni6, ifp, lenlim);
                if_put(ifp, &psref);
                ifp = NULL;
                /* update mbuf length */
                n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
                    sizeof(struct icmp6_nodeinfo) + copied;
                break;
        }
        default:
                panic("%s: impossible", __func__);
                break;
        }

        nni6->ni_type = ICMP6_NI_REPLY;
        m_freem(m);
        return n;

bad:
        if_put(ifp, &psref);
        m_freem(m);
        if (n)
                m_freem(n);
        return NULL;
}

#define isupper(x) ('A' <= (x) && (x) <= 'Z')
#define isalpha(x) (('A' <= (x) && (x) <= 'Z') || ('a' <= (x) && (x) <= 'z'))
#define isalnum(x) (isalpha(x) || ('0' <= (x) && (x) <= '9'))
#define tolower(x) (isupper(x) ? (x) + 'a' - 'A' : (x))

/*
 * make a mbuf with DNS-encoded string.  no compression support.
 *
 * XXX names with less than 2 dots (like "foo" or "foo.section") will be
 * treated as truncated name (two \0 at the end).  this is a wild guess.
 *
 * old - return pascal string if non-zero
 */
static struct mbuf *
ni6_nametodns(const char *name, int namelen, int old)
{
        struct mbuf *m;
        char *cp, *ep;
        const char *p, *q;
        int i, len, nterm;

        if (old)
                len = namelen + 1;
        else
                len = MCLBYTES;

        /* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */
        MGET(m, M_DONTWAIT, MT_DATA);
        if (m && len > MLEN) {
                MCLGET(m, M_DONTWAIT);
                if ((m->m_flags & M_EXT) == 0)
                        goto fail;
        }
        if (!m)
                goto fail;
        m->m_next = NULL;

        if (old) {
                m->m_len = len;
                *mtod(m, char *) = namelen;
                memcpy(mtod(m, char *) + 1, name, namelen);
                return m;
        } else {
                m->m_len = 0;
                cp = mtod(m, char *);
                ep = mtod(m, char *) + M_TRAILINGSPACE(m);

                /* if not certain about my name, return empty buffer */
                if (namelen == 0)
                        return m;

                /*
                 * guess if it looks like shortened hostname, or FQDN.
                 * shortened hostname needs two trailing "\0".
                 */
                i = 0;
                for (p = name; p < name + namelen; p++) {
                        if (*p == '.')
                                i++;
                }
                if (i < 2)
                        nterm = 2;
                else
                        nterm = 1;

                p = name;
                while (cp < ep && p < name + namelen) {
                        i = 0;
                        for (q = p; q < name + namelen && *q && *q != '.'; q++)
                                i++;
                        /* result does not fit into mbuf */
                        if (cp + i + 1 >= ep)
                                goto fail;
                        /*
                         * DNS label length restriction, RFC1035 page 8.
                         * "i == 0" case is included here to avoid returning
                         * 0-length label on "foo..bar".
                         */
                        if (i <= 0 || i >= 64)
                                goto fail;
                        *cp++ = i;
                        if (!isalpha(p[0]) || !isalnum(p[i - 1]))
                                goto fail;
                        while (i > 0) {
                                if (!isalnum(*p) && *p != '-')
                                        goto fail;
                                if (isupper(*p)) {
                                        *cp++ = tolower(*p);
                                        p++;
                                } else
                                        *cp++ = *p++;
                                i--;
                        }
                        p = q;
                        if (p < name + namelen && *p == '.')
                                p++;
                }
                /* termination */
                if (cp + nterm >= ep)
                        goto fail;
                while (nterm-- > 0)
                        *cp++ = '\0';
                m->m_len = cp - mtod(m, char *);
                return m;
        }

        panic("should not reach here");
        /* NOTREACHED */

fail:
        if (m)
                m_freem(m);
        return NULL;
}

/*
 * check if two DNS-encoded string matches.  takes care of truncated
 * form (with \0\0 at the end).  no compression support.
 * XXX upper/lowercase match (see RFC2065)
 */
static int
ni6_dnsmatch(const char *a, int alen, const char *b, int blen)
{
        const char *a0, *b0;
        int l;

        /* simplest case - need validation? */
        if (alen == blen && memcmp(a, b, alen) == 0)
                return 1;

        a0 = a;
        b0 = b;

        /* termination is mandatory */
        if (alen < 2 || blen < 2)
                return 0;
        if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0')
                return 0;
        alen--;
        blen--;

        while (a - a0 < alen && b - b0 < blen) {
                if (a - a0 + 1 > alen || b - b0 + 1 > blen)
                        return 0;

                if ((signed char)a[0] < 0 || (signed char)b[0] < 0)
                        return 0;
                /* we don't support compression yet */
                if (a[0] >= 64 || b[0] >= 64)
                        return 0;

                /* truncated case */
                if (a[0] == 0 && a - a0 == alen - 1)
                        return 1;
                if (b[0] == 0 && b - b0 == blen - 1)
                        return 1;
                if (a[0] == 0 || b[0] == 0)
                        return 0;

                if (a[0] != b[0])
                        return 0;
                l = a[0];
                if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen)
                        return 0;
                if (memcmp(a + 1, b + 1, l) != 0)
                        return 0;

                a += 1 + l;
                b += 1 + l;
        }

        if (a - a0 == alen && b - b0 == blen)
                return 1;
        else
                return 0;
}

/*
 * calculate the number of addresses to be returned in the node info reply.
 */
static int
ni6_addrs(struct icmp6_nodeinfo *ni6, struct ifnet **ifpp, char *subj,
    struct psref *psref)
{
        struct ifnet *ifp;
        struct in6_ifaddr *ia6;
        struct ifaddr *ifa;
        struct sockaddr_in6 *subj_ip6 = NULL; /* XXX pedant */
        int addrs = 0, addrsofif, iffound = 0;
        int niflags = ni6->ni_flags;
        int s;

        if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
                switch (ni6->ni_code) {
                case ICMP6_NI_SUBJ_IPV6:
                        if (subj == NULL) /* must be impossible... */
                                return 0;
                        subj_ip6 = (struct sockaddr_in6 *)subj;
                        break;
                default:
                        /*
                         * XXX: we only support IPv6 subject address for
                         * this Qtype.
                         */
                        return 0;
                }
        }

        s = pserialize_read_enter();
        IFNET_READER_FOREACH(ifp) {
                addrsofif = 0;
                IFADDR_READER_FOREACH(ifa, ifp) {
                        if (ifa->ifa_addr->sa_family != AF_INET6)
                                continue;
                        ia6 = (struct in6_ifaddr *)ifa;

                        if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
                            IN6_ARE_ADDR_EQUAL(&subj_ip6->sin6_addr,
                             &ia6->ia_addr.sin6_addr))
                                iffound = 1;

                        /*
                         * IPv4-mapped addresses can only be returned by a
                         * Node Information proxy, since they represent
                         * addresses of IPv4-only nodes, which perforce do
                         * not implement this protocol.
                         * [icmp-name-lookups-07, Section 5.4]
                         * So we don't support NI_NODEADDR_FLAG_COMPAT in
                         * this function at this moment.
                         */

                        /* What do we have to do about ::1? */
                        switch (in6_addrscope(&ia6->ia_addr.sin6_addr)) {
                        case IPV6_ADDR_SCOPE_LINKLOCAL:
                                if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
                                        continue;
                                break;
                        case IPV6_ADDR_SCOPE_SITELOCAL:
                                if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
                                        continue;
                                break;
                        case IPV6_ADDR_SCOPE_GLOBAL:
                                if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
                                        continue;
                                break;
                        default:
                                continue;
                        }

                        /*
                         * check if anycast is okay.
                         * XXX: just experimental.  not in the spec.
                         */
                        if ((ia6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
                            (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
                                continue; /* we need only unicast addresses */

                        addrsofif++; /* count the address */
                }
                if (iffound) {
                        if_acquire(ifp, psref);
                        pserialize_read_exit(s);
                        *ifpp = ifp;
                        return addrsofif;
                }

                addrs += addrsofif;
        }
        pserialize_read_exit(s);

        return addrs;
}

static int
ni6_store_addrs(struct icmp6_nodeinfo *ni6,
        struct icmp6_nodeinfo *nni6, struct ifnet *ifp0,
        int resid)
{
        struct ifnet *ifp;
        struct in6_ifaddr *ia6;
        struct ifaddr *ifa;
        struct ifnet *ifp_dep = NULL;
        int copied = 0, allow_deprecated = 0;
        u_char *cp = (u_char *)(nni6 + 1);
        int niflags = ni6->ni_flags;
        u_int32_t ltime;
        int s;

        if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
                return 0;        /* needless to copy */

        s = pserialize_read_enter();
        ifp = ifp0 ? ifp0 : IFNET_READER_FIRST();
again:

        for (; ifp; ifp = IFNET_READER_NEXT(ifp))
        {
                IFADDR_READER_FOREACH(ifa, ifp) {
                        if (ifa->ifa_addr->sa_family != AF_INET6)
                                continue;
                        ia6 = (struct in6_ifaddr *)ifa;

                        if ((ia6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
                            allow_deprecated == 0) {
                                /*
                                 * prefererred address should be put before
                                 * deprecated addresses.
                                 */

                                /* record the interface for later search */
                                if (ifp_dep == NULL)
                                        ifp_dep = ifp;

                                continue;
                        }
                        else if ((ia6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
                                 allow_deprecated != 0)
                                continue; /* we now collect deprecated addrs */

                        /* What do we have to do about ::1? */
                        switch (in6_addrscope(&ia6->ia_addr.sin6_addr)) {
                        case IPV6_ADDR_SCOPE_LINKLOCAL:
                                if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
                                        continue;
                                break;
                        case IPV6_ADDR_SCOPE_SITELOCAL:
                                if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
                                        continue;
                                break;
                        case IPV6_ADDR_SCOPE_GLOBAL:
                                if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
                                        continue;
                                break;
                        default:
                                continue;
                        }

                        /*
                         * check if anycast is okay.
                         * XXX: just experimental.  not in the spec.
                         */
                        if ((ia6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
                            (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
                                continue;

                        /* now we can copy the address */
                        if (resid < sizeof(struct in6_addr) +
                            sizeof(u_int32_t)) {
                                /*
                                 * We give up much more copy.
                                 * Set the truncate flag and return.
                                 */
                                nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE;
                                goto out;
                        }

                        /*
                         * Set the TTL of the address.
                         * The TTL value should be one of the following
                         * according to the specification:
                         *
                         * 1. The remaining lifetime of a DHCP lease on the
                         *    address, or
                         * 2. The remaining Valid Lifetime of a prefix from
                         *    which the address was derived through Stateless
                         *    Autoconfiguration.
                         *
                         * Note that we currently do not support stateful
                         * address configuration by DHCPv6, so the former
                         * case can't happen.
                         *
                         * TTL must be 2^31 > TTL >= 0.
                         */
                        if (ia6->ia6_lifetime.ia6t_expire == 0)
                                ltime = ND6_INFINITE_LIFETIME;
                        else {
                                if (ia6->ia6_lifetime.ia6t_expire >
                                    time_uptime)
                                        ltime = ia6->ia6_lifetime.ia6t_expire -
                                            time_uptime;
                                else
                                        ltime = 0;
                        }
                        if (ltime > 0x7fffffff)
                                ltime = 0x7fffffff;
                        ltime = htonl(ltime);

                        memcpy(cp, &ltime, sizeof(u_int32_t));
                        cp += sizeof(u_int32_t);

                        /* copy the address itself */
                        bcopy(&ia6->ia_addr.sin6_addr, cp,
                              sizeof(struct in6_addr));
                        in6_clearscope((struct in6_addr *)cp); /* XXX */
                        cp += sizeof(struct in6_addr);

                        resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
                        copied += (sizeof(struct in6_addr) + sizeof(u_int32_t));
                }
                if (ifp0)        /* we need search only on the specified IF */
                        break;
        }

        if (allow_deprecated == 0 && ifp_dep != NULL) {
                ifp = ifp_dep;
                allow_deprecated = 1;

                goto again;
        }
out:
        pserialize_read_exit(s);
        return copied;
}

/*
 * XXX almost dup'ed code with rip6_input.
 */
static int
icmp6_rip6_input(struct mbuf **mp, int off)
{
        struct mbuf *m = *mp;
        struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
        struct inpcb_hdr *inph;
        struct in6pcb *in6p;
        struct in6pcb *last = NULL;
        struct sockaddr_in6 rip6src;
        struct icmp6_hdr *icmp6;
        struct mbuf *n, *opts = NULL;

        IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
        if (icmp6 == NULL) {
                /* m is already reclaimed */
                return IPPROTO_DONE;
        }

        /*
         * XXX: the address may have embedded scope zone ID, which should be
         * hidden from applications.
         */
        sockaddr_in6_init(&rip6src, &ip6->ip6_src, 0, 0, 0);
        if (sa6_recoverscope(&rip6src)) {
                m_freem(m);
                return IPPROTO_DONE;
        }

        TAILQ_FOREACH(inph, &raw6cbtable.inpt_queue, inph_queue) {
                in6p = (struct in6pcb *)inph;
                if (in6p->in6p_af != AF_INET6)
                        continue;
                if (in6p->in6p_ip6.ip6_nxt != IPPROTO_ICMPV6)
                        continue;
                if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
                    !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
                        continue;
                if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
                    !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
                        continue;
                if (in6p->in6p_icmp6filt &&
                    ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
                    in6p->in6p_icmp6filt))
                        continue;

                if (last == NULL) {
                        ;
                }
#ifdef IPSEC
                else if (ipsec_used && ipsec_in_reject(m, last)) {
                        /* do not inject data into pcb */
                }
#endif
                else if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) {
                        if (last->in6p_flags & IN6P_CONTROLOPTS)
                                ip6_savecontrol(last, &opts, ip6, n);
                        /* strip intermediate headers */
                        m_adj(n, off);
                        if (sbappendaddr(&last->in6p_socket->so_rcv,
                            sin6tosa(&rip6src), n, opts) == 0) {
                                soroverflow(last->in6p_socket);
                                m_freem(n);
                                if (opts)
                                        m_freem(opts);
                        } else {
                                sorwakeup(last->in6p_socket);
                        }
                        opts = NULL;
                }

                last = in6p;
        }

#ifdef IPSEC
        if (ipsec_used && last && ipsec_in_reject(m, last)) {
                m_freem(m);
                IP6_STATDEC(IP6_STAT_DELIVERED);
                /* do not inject data into pcb */
        } else
#endif
        if (last) {
                if (last->in6p_flags & IN6P_CONTROLOPTS)
                        ip6_savecontrol(last, &opts, ip6, m);
                /* strip intermediate headers */
                m_adj(m, off);
                if (sbappendaddr(&last->in6p_socket->so_rcv,
                    sin6tosa(&rip6src), m, opts) == 0) {
                        soroverflow(last->in6p_socket);
                        m_freem(m);
                        if (opts)
                                m_freem(opts);
                } else {
                        sorwakeup(last->in6p_socket);
                }
        } else {
                m_freem(m);
                IP6_STATDEC(IP6_STAT_DELIVERED);
        }
        return IPPROTO_DONE;
}

/*
 * Reflect the ip6 packet back to the source.
 * OFF points to the icmp6 header, counted from the top of the mbuf.
 *
 * Note: RFC 1885 required that an echo reply should be truncated if it
 * did not fit in with (return) path MTU, and KAME code supported the
 * behavior.  However, as a clarification after the RFC, this limitation
 * was removed in a revised version of the spec, RFC 2463.  We had kept the
 * old behavior, with a (non-default) ifdef block, while the new version of
 * the spec was an internet-draft status, and even after the new RFC was
 * published.  But it would rather make sense to clean the obsoleted part
 * up, and to make the code simpler at this stage.
 */
static void
icmp6_reflect(struct mbuf *m, size_t off)
{
        struct ip6_hdr *ip6;
        struct icmp6_hdr *icmp6;
        const struct in6_ifaddr *ia;
        const struct ip6aux *ip6a;
        int plen;
        int type, code;
        struct ifnet *outif = NULL;
        struct in6_addr origdst;
        struct ifnet *rcvif;
        int s;
        bool ip6_src_filled = false;
        int flags;

        /* too short to reflect */
        if (off < sizeof(struct ip6_hdr)) {
                nd6log(LOG_DEBUG,
                    "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
                    (u_long)off, (u_long)sizeof(struct ip6_hdr),
                    __FILE__, __LINE__);
                goto bad;
        }

        /*
         * If there are extra headers between IPv6 and ICMPv6, strip
         * off that header first.
         */
        CTASSERT(sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) <= MHLEN);
        if (off > sizeof(struct ip6_hdr)) {
                size_t l;
                struct ip6_hdr nip6;

                l = off - sizeof(struct ip6_hdr);
                m_copydata(m, 0, sizeof(nip6), (void *)&nip6);
                m_adj(m, l);
                l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
                if (m->m_len < l) {
                        if ((m = m_pullup(m, l)) == NULL)
                                return;
                }
                memcpy(mtod(m, void *), (void *)&nip6, sizeof(nip6));
        } else {
                size_t l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
                if (m->m_len < l) {
                        if ((m = m_pullup(m, l)) == NULL)
                                return;
                }
        }

        plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
        ip6 = mtod(m, struct ip6_hdr *);
        ip6->ip6_nxt = IPPROTO_ICMPV6;
        icmp6 = (struct icmp6_hdr *)(ip6 + 1);
        type = icmp6->icmp6_type; /* keep type for statistics */
        code = icmp6->icmp6_code; /* ditto. */

        origdst = ip6->ip6_dst;
        /*
         * ip6_input() drops a packet if its src is multicast.
         * So, the src is never multicast.
         */
        ip6->ip6_dst = ip6->ip6_src;

        /*
         * If the incoming packet was addressed directly to us (i.e. unicast),
         * use dst as the src for the reply.
         * The IN6_IFF_NOTREADY case should be VERY rare, but is possible
         * (for example) when we encounter an error while forwarding procedure
         * destined to a duplicated address of ours.
         * Note that ip6_getdstifaddr() may fail if we are in an error handling
         * procedure of an outgoing packet of our own, in which case we need
         * to search in the ifaddr list.
         */
        if (IN6_IS_ADDR_MULTICAST(&origdst)) {
                ;
        } else if ((ip6a = ip6_getdstifaddr(m)) != NULL) {
                if ((ip6a->ip6a_flags &
                     (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) {
                        ip6->ip6_src = ip6a->ip6a_src;
                        ip6_src_filled = true;
                }
        } else {
                union {
                        struct sockaddr_in6 sin6;
                        struct sockaddr sa;
                } u;
                int _s;
                struct ifaddr *ifa;

                sockaddr_in6_init(&u.sin6, &origdst, 0, 0, 0);

                _s = pserialize_read_enter();
                ifa = ifa_ifwithaddr(&u.sa);

                if (ifa != NULL) {
                        ia = ifatoia6(ifa);
                        if ((ia->ia6_flags &
                                 (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) {
                                ip6->ip6_src = ia->ia_addr.sin6_addr;
                                ip6_src_filled = true;
                        }
                }
                pserialize_read_exit(_s);
        }

        if (!ip6_src_filled) {
                int e;
                struct sockaddr_in6 sin6;
                struct route ro;

                /*
                 * This case matches to multicasts, our anycast, or unicasts
                 * that we do not own.  Select a source address based on the
                 * source address of the erroneous packet.
                 */
                /* zone ID should be embedded */
                sockaddr_in6_init(&sin6, &ip6->ip6_dst, 0, 0, 0);

                memset(&ro, 0, sizeof(ro));
                e = in6_selectsrc(&sin6, NULL, NULL, &ro, NULL, NULL, NULL,
                    &ip6->ip6_src);
                rtcache_free(&ro);
                if (e != 0) {
                        char ip6buf[INET6_ADDRSTRLEN];
                        nd6log(LOG_DEBUG,
                            "source can't be determined: "
                            "dst=%s, error=%d\n",
                            IN6_PRINT(ip6buf, &sin6.sin6_addr), e);
                        goto bad;
                }
        }

        ip6->ip6_flow = 0;
        ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
        ip6->ip6_vfc |= IPV6_VERSION;
        ip6->ip6_nxt = IPPROTO_ICMPV6;
        rcvif = m_get_rcvif(m, &s);
        if (rcvif) {
                /* XXX: This may not be the outgoing interface */
                ip6->ip6_hlim = ND_IFINFO(rcvif)->chlim;
        } else {
                ip6->ip6_hlim = ip6_defhlim;
        }
        m_put_rcvif(rcvif, &s);

        m->m_pkthdr.csum_flags = 0;
        icmp6->icmp6_cksum = 0;
        icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
            sizeof(struct ip6_hdr), plen);

        /*
         * XXX option handling
         */

        m->m_flags &= ~(M_BCAST|M_MCAST);

        /*
         * Note for icmp6_reflect_pmtu == false
         * To avoid a "too big" situation at an intermediate router
         * and the path MTU discovery process, specify the IPV6_MINMTU flag.
         * Note that only echo and node information replies are affected,
         * since the length of ICMP6 errors is limited to the minimum MTU.
         */
        flags = icmp6_reflect_pmtu ? 0 : IPV6_MINMTU;
        if (ip6_output(m, NULL, NULL, flags, NULL, NULL, &outif) != 0 &&
            outif)
                icmp6_ifstat_inc(outif, ifs6_out_error);
        if (outif)
                icmp6_ifoutstat_inc(outif, type, code);

        return;

 bad:
        m_freem(m);
        return;
}

static const char *
icmp6_redirect_diag(char *buf, size_t buflen, struct in6_addr *src6,
    struct in6_addr *dst6,  struct in6_addr *tgt6)
{
        char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
        char ip6buft[INET6_ADDRSTRLEN];

        snprintf(buf, buflen, "(src=%s dst=%s tgt=%s)",
            IN6_PRINT(ip6bufs, src6), IN6_PRINT(ip6bufd, dst6),
            IN6_PRINT(ip6buft, tgt6));
        return buf;
}

static void
icmp6_redirect_input(struct mbuf *m, int off)
{
        struct ifnet *ifp;
        struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
        struct nd_redirect *nd_rd;
        int icmp6len = m->m_pkthdr.len - off;
        char *lladdr = NULL;
        int lladdrlen = 0;
        struct rtentry *rt = NULL;
        int is_router;
        int is_onlink;
        struct in6_addr src6 = ip6->ip6_src;
        struct in6_addr redtgt6;
        struct in6_addr reddst6;
        union nd_opts ndopts;
        struct psref psref;
        char ip6buf[INET6_ADDRSTRLEN];
        char diagbuf[256];

        ifp = m_get_rcvif_psref(m, &psref);
        if (ifp == NULL)
                goto freeit;

        /* XXX if we are router, we don't update route by icmp6 redirect */
        if (ip6_forwarding)
                goto freeit;
        if (!icmp6_rediraccept)
                goto freeit;

        IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
        if (nd_rd == NULL) {
                ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
                m_put_rcvif_psref(ifp, &psref);
                return;
        }
        redtgt6 = nd_rd->nd_rd_target;
        reddst6 = nd_rd->nd_rd_dst;

        if (in6_setscope(&redtgt6, ifp, NULL) ||
            in6_setscope(&reddst6, ifp, NULL)) {
                goto freeit;
        }

        /* validation */
        if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
                nd6log(LOG_ERR,
                    "ICMP6 redirect sent from %s rejected; "
                    "must be from linklocal\n", IN6_PRINT(ip6buf, &src6));
                goto bad;
        }
        if (ip6->ip6_hlim != 255) {
                nd6log(LOG_ERR,
                    "ICMP6 redirect sent from %s rejected; "
                    "hlim=%d (must be 255)\n",
                    IN6_PRINT(ip6buf, &src6), ip6->ip6_hlim);
                goto bad;
        }

    {
        /* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
        struct sockaddr_in6 sin6;
        struct in6_addr *gw6;

        sockaddr_in6_init(&sin6, &reddst6, 0, 0, 0);
        rt = rtalloc1(sin6tosa(&sin6), 0);
        if (rt) {
                if (rt->rt_gateway == NULL ||
                    rt->rt_gateway->sa_family != AF_INET6) {
                        nd6log(LOG_ERR,
                            "ICMP6 redirect rejected; no route "
                            "with inet6 gateway found for redirect dst: %s\n",
                            icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
                            &src6, &reddst6, &redtgt6));
                        rt_unref(rt);
                        goto bad;
                }

                gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr);
                if (memcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
                        nd6log(LOG_ERR,
                            "ICMP6 redirect rejected; "
                            "not equal to gw-for-src=%s (must be same): %s\n",
                            IN6_PRINT(ip6buf, gw6),
                            icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
                            &src6, &reddst6, &redtgt6));
                        rt_unref(rt);
                        goto bad;
                }
        } else {
                nd6log(LOG_ERR, "ICMP6 redirect rejected; "
                    "no route found for redirect dst: %s\n",
                    icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
                    &src6, &reddst6, &redtgt6));
                goto bad;
        }
        rt_unref(rt);
        rt = NULL;
    }

        if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
                nd6log(LOG_ERR, "ICMP6 redirect rejected; "
                    "redirect dst must be unicast: %s\n",
                    icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
                    &src6, &reddst6, &redtgt6));
                goto bad;
        }

        is_router = is_onlink = 0;
        if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
                is_router = 1;        /* router case */
        if (memcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
                is_onlink = 1;        /* on-link destination case */
        if (!is_router && !is_onlink) {
                nd6log(LOG_ERR, "ICMP6 redirect rejected; "
                    "neither router case nor onlink case: %s\n",
                    icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
                    &src6, &reddst6, &redtgt6));
                goto bad;
        }
        /* validation passed */

        icmp6len -= sizeof(*nd_rd);
        nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
        if (nd6_options(&ndopts) < 0) {
                nd6log(LOG_INFO, "invalid ND option, rejected: %s\n",
                    icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
                    &src6, &reddst6, &redtgt6));
                /* nd6_options have incremented stats */
                goto freeit;
        }

        if (ndopts.nd_opts_tgt_lladdr) {
                lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
                lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
        }

        if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
                nd6log(LOG_INFO, "lladdrlen mismatch for %s "
                    "(if %d, icmp6 packet %d): %s\n",
                    IN6_PRINT(ip6buf, &redtgt6),
                    ifp->if_addrlen, lladdrlen - 2,
                    icmp6_redirect_diag(diagbuf, sizeof(diagbuf),
                    &src6, &reddst6, &redtgt6));
                goto bad;
        }

        /* RFC 2461 8.3 */
        nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
            is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);

        m_put_rcvif_psref(ifp, &psref);
        ifp = NULL;

        if (!is_onlink) {        /* better router case.  perform rtredirect. */
                /* perform rtredirect */
                struct sockaddr_in6 sdst;
                struct sockaddr_in6 sgw;
                struct sockaddr_in6 ssrc;
                unsigned long rtcount;
                struct rtentry *newrt = NULL;

                /*
                 * do not install redirect route, if the number of entries
                 * is too much (> hiwat).  note that, the node (= host) will
                 * work just fine even if we do not install redirect route
                 * (there will be additional hops, though).
                 */
                mutex_enter(&icmp6_mtx);
                rtcount = rt_timer_count(icmp6_redirect_timeout_q);
                if (0 <= ip6_maxdynroutes && rtcount >= ip6_maxdynroutes) {
                        mutex_exit(&icmp6_mtx);
                        goto freeit;
                }
                if (0 <= icmp6_redirect_hiwat && rtcount > icmp6_redirect_hiwat) {
                        mutex_exit(&icmp6_mtx);
                        goto freeit;
                } else if (0 <= icmp6_redirect_lowat &&
                    rtcount > icmp6_redirect_lowat) {
                        /*
                         * XXX nuke a victim, install the new one.
                         */
                }

                memset(&sdst, 0, sizeof(sdst));
                memset(&sgw, 0, sizeof(sgw));
                memset(&ssrc, 0, sizeof(ssrc));
                sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6;
                sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len =
                    sizeof(struct sockaddr_in6);
                bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr));
                bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
                bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
                rtredirect(sin6tosa(&sdst), sin6tosa(&sgw), NULL,
                    RTF_GATEWAY | RTF_HOST, sin6tosa(&ssrc), &newrt);

                if (newrt) {
                        (void)rt_timer_add(newrt, icmp6_redirect_timeout,
                            icmp6_redirect_timeout_q);
                        rt_unref(newrt);
                }
                mutex_exit(&icmp6_mtx);
        }
        /* finally update cached route in each socket via pfctlinput */
        {
                struct sockaddr_in6 sdst;

                sockaddr_in6_init(&sdst, &reddst6, 0, 0, 0);
                pfctlinput(PRC_REDIRECT_HOST, sin6tosa(&sdst));
#if defined(IPSEC)
                if (ipsec_used)
                        key_sa_routechange(sin6tosa(&sdst));
#endif
        }

freeit:
        if (ifp != NULL)
                m_put_rcvif_psref(ifp, &psref);
        m_freem(m);
        return;

bad:
        m_put_rcvif_psref(ifp, &psref);
        ICMP6_STATINC(ICMP6_STAT_BADREDIRECT);
        m_freem(m);
}

void
icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
{
        struct ifnet *ifp;        /* my outgoing interface */
        struct in6_addr *ifp_ll6;
        struct in6_addr *nexthop;
        struct ip6_hdr *sip6;        /* m0 as struct ip6_hdr */
        struct mbuf *m = NULL;        /* newly allocated one */
        struct ip6_hdr *ip6;        /* m as struct ip6_hdr */
        struct nd_redirect *nd_rd;
        size_t maxlen;
        u_char *p;
        struct sockaddr_in6 src_sa;

        icmp6_errcount(ICMP6_STAT_OUTERRHIST, ND_REDIRECT, 0);

        /* if we are not router, we don't send icmp6 redirect */
        if (!ip6_forwarding)
                goto fail;

        /* sanity check */
        KASSERT(m0 != NULL);
        KASSERT(rt != NULL);

        ifp = rt->rt_ifp;

        /*
         * Address check:
         *  the source address must identify a neighbor, and
         *  the destination address must not be a multicast address
         *  [RFC 2461, sec 8.2]
         */
        sip6 = mtod(m0, struct ip6_hdr *);
        sockaddr_in6_init(&src_sa, &sip6->ip6_src, 0, 0, 0);
        if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
                goto fail;
        if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
                goto fail;        /* what should we do here? */

        /* rate limit */
        if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
                goto fail;

        /*
         * Since we are going to append up to 1280 bytes (= IPV6_MMTU),
         * we almost always ask for an mbuf cluster for simplicity.
         * (MHLEN < IPV6_MMTU is almost always true)
         */
        MGETHDR(m, M_DONTWAIT, MT_HEADER);
        if (m && IPV6_MMTU >= MHLEN) {
#if IPV6_MMTU >= MCLBYTES
                MEXTMALLOC(m, IPV6_MMTU, M_NOWAIT);
#else
                MCLGET(m, M_DONTWAIT);
#endif
        }

        if (!m)
                goto fail;
        m_reset_rcvif(m);
        m->m_len = 0;
        maxlen = M_TRAILINGSPACE(m);
        maxlen = uimin(IPV6_MMTU, maxlen);

        /* just for safety */
        if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct nd_redirect) +
            ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
                goto fail;
        }

        {
                /* get ip6 linklocal address for ifp(my outgoing interface). */
                struct in6_ifaddr *ia;
                int s = pserialize_read_enter();
                if ((ia = in6ifa_ifpforlinklocal(ifp,
                                                 IN6_IFF_NOTREADY|
                                                 IN6_IFF_ANYCAST)) == NULL) {
                        pserialize_read_exit(s);
                        goto fail;
                }
                ifp_ll6 = &ia->ia_addr.sin6_addr;
                pserialize_read_exit(s);
        }

        /* get ip6 linklocal address for the router. */
        if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
                struct sockaddr_in6 *sin6;
                sin6 = (struct sockaddr_in6 *)rt->rt_gateway;
                nexthop = &sin6->sin6_addr;
                if (!IN6_IS_ADDR_LINKLOCAL(nexthop))
                        nexthop = NULL;
        } else
                nexthop = NULL;

        /* ip6 */
        ip6 = mtod(m, struct ip6_hdr *);
        ip6->ip6_flow = 0;
        ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
        ip6->ip6_vfc |= IPV6_VERSION;
        /* ip6->ip6_plen will be set later */
        ip6->ip6_nxt = IPPROTO_ICMPV6;
        ip6->ip6_hlim = 255;
        /* ip6->ip6_src must be linklocal addr for my outgoing if. */
        bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
        bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));

        /* ND Redirect */
        nd_rd = (struct nd_redirect *)(ip6 + 1);
        nd_rd->nd_rd_type = ND_REDIRECT;
        nd_rd->nd_rd_code = 0;
        nd_rd->nd_rd_reserved = 0;
        if (rt->rt_flags & RTF_GATEWAY) {
                /*
                 * nd_rd->nd_rd_target must be a link-local address in
                 * better router cases.
                 */
                if (!nexthop)
                        goto fail;
                bcopy(nexthop, &nd_rd->nd_rd_target,
                      sizeof(nd_rd->nd_rd_target));
                bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
                      sizeof(nd_rd->nd_rd_dst));
        } else {
                /* make sure redtgt == reddst */
                nexthop = &sip6->ip6_dst;
                bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
                      sizeof(nd_rd->nd_rd_target));
                bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
                      sizeof(nd_rd->nd_rd_dst));
        }

        p = (u_char *)(nd_rd + 1);

        {
                /* target lladdr option */
                struct llentry *ln = NULL;
                int len, pad;
                struct nd_opt_hdr *nd_opt;
                char *lladdr;

                ln = nd6_lookup(nexthop, ifp, false);
                if (ln == NULL)
                        goto nolladdropt;
                len = sizeof(*nd_opt) + ifp->if_addrlen;
                len = (len + 7) & ~7;        /* round by 8 */
                pad = len - (sizeof(*nd_opt) + ifp->if_addrlen);

                /* safety check */
                if (len + (p - (u_char *)ip6) > maxlen) {
                        LLE_RUNLOCK(ln);
                        goto nolladdropt;
                }

                if (ln->la_flags & LLE_VALID) {
                        nd_opt = (struct nd_opt_hdr *)p;
                        nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
                        nd_opt->nd_opt_len = len >> 3;
                        lladdr = (char *)(nd_opt + 1);
                        memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen);
                        memset(lladdr + ifp->if_addrlen, 0, pad);
                        p += len;
                }
                LLE_RUNLOCK(ln);
        }
nolladdropt:

        m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;

        /* just to be safe */
        if (m0->m_flags & M_DECRYPTED)
                goto noredhdropt;
        if (p - (u_char *)ip6 > maxlen)
                goto noredhdropt;

        {
                /* redirected header option */
                int len;
                struct nd_opt_rd_hdr *nd_opt_rh;

                /*
                 * compute the maximum size for icmp6 redirect header option.
                 * XXX room for auth header?
                 */
                len = maxlen - (p - (u_char *)ip6);
                len &= ~7;

                if (len < sizeof(*nd_opt_rh)) {
                        goto noredhdropt;
                }

                /*
                 * Redirected header option spec (RFC2461 4.6.3) talks nothing
                 * about padding/truncate rule for the original IP packet.
                 * From the discussion on IPv6imp in Feb 1999,
                 * the consensus was:
                 * - "attach as much as possible" is the goal
                 * - pad if not aligned (original size can be guessed by
                 *   original ip6 header)
                 * Following code adds the padding if it is simple enough,
                 * and truncates if not.
                 */
                if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
                        /* not enough room, truncate */
                        m_adj(m0, (len - sizeof(*nd_opt_rh)) -
                            m0->m_pkthdr.len);
                } else {
                        /*
                         * enough room, truncate if not aligned.
                         * we don't pad here for simplicity.
                         */
                        int extra;

                        extra = m0->m_pkthdr.len % 8;
                        if (extra) {
                                /* truncate */
                                m_adj(m0, -extra);
                        }
                        len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
                }

                nd_opt_rh = (struct nd_opt_rd_hdr *)p;
                memset(nd_opt_rh, 0, sizeof(*nd_opt_rh));
                nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
                nd_opt_rh->nd_opt_rh_len = len >> 3;
                p += sizeof(*nd_opt_rh);
                m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;

                /* connect m0 to m */
                m->m_pkthdr.len += m0->m_pkthdr.len;
                m_cat(m, m0);
                m0 = NULL;
        }
noredhdropt:
        if (m0) {
                m_freem(m0);
                m0 = NULL;
        }

        /* XXX: clear embedded link IDs in the inner header */
        in6_clearscope(&sip6->ip6_src);
        in6_clearscope(&sip6->ip6_dst);
        in6_clearscope(&nd_rd->nd_rd_target);
        in6_clearscope(&nd_rd->nd_rd_dst);

        ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));

        nd_rd->nd_rd_cksum = 0;
        nd_rd->nd_rd_cksum =
            in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen));

        /* send the packet to outside... */
        if (ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL) != 0)
                icmp6_ifstat_inc(ifp, ifs6_out_error);

        icmp6_ifstat_inc(ifp, ifs6_out_msg);
        icmp6_ifstat_inc(ifp, ifs6_out_redirect);
        ICMP6_STATINC(ICMP6_STAT_OUTHIST + ND_REDIRECT);

        return;

fail:
        if (m)
                m_freem(m);
        if (m0)
                m_freem(m0);
}

/*
 * ICMPv6 socket option processing.
 */
int
icmp6_ctloutput(int op, struct socket *so, struct sockopt *sopt)
{
        int error = 0;
        struct in6pcb *in6p = sotoin6pcb(so);

        if (sopt->sopt_level != IPPROTO_ICMPV6)
                return rip6_ctloutput(op, so, sopt);

        switch (op) {
        case PRCO_SETOPT:
                switch (sopt->sopt_name) {
                case ICMP6_FILTER:
                    {
                        struct icmp6_filter fil;

                        error = sockopt_get(sopt, &fil, sizeof(fil));
                        if (error)
                                break;
                        memcpy(in6p->in6p_icmp6filt, &fil,
                            sizeof(struct icmp6_filter));
                        error = 0;
                        break;
                    }

                default:
                        error = ENOPROTOOPT;
                        break;
                }
                break;

        case PRCO_GETOPT:
                switch (sopt->sopt_name) {
                case ICMP6_FILTER:
                    {
                        if (in6p->in6p_icmp6filt == NULL) {
                                error = EINVAL;
                                break;
                        }
                        error = sockopt_set(sopt, in6p->in6p_icmp6filt,
                            sizeof(struct icmp6_filter));
                        break;
                    }

                default:
                        error = ENOPROTOOPT;
                        break;
                }
                break;
        }

        return error;
}

/*
 * Perform rate limit check.
 * Returns 0 if it is okay to send the icmp6 packet.
 * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
 * limitation.
 *
 * XXX per-destination/type check necessary?
 */
static int
icmp6_ratelimit(
        const struct in6_addr *dst,        /* not used at this moment */
        const int type,                /* not used at this moment */
        const int code)                /* not used at this moment */
{
        int ret;

        ret = 0;        /* okay to send */

        /* PPS limit */
        if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count,
            icmp6errppslim)) {
                /* The packet is subject to rate limit */
                ret++;
        }

        return ret;
}

static struct rtentry *
icmp6_mtudisc_clone(struct sockaddr *dst)
{
        struct rtentry *rt;
        int    error;

        rt = rtalloc1(dst, 1);
        if (rt == NULL)
                return NULL;

        /* If we didn't get a host route, allocate one */
        if ((rt->rt_flags & RTF_HOST) == 0) {
                struct rtentry *nrt;

                error = rtrequest(RTM_ADD, dst, rt->rt_gateway, NULL,
                    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
                if (error) {
                        rt_unref(rt);
                        return NULL;
                }
                nrt->rt_rmx = rt->rt_rmx;
                rt_unref(rt);
                rt = nrt;
        }

        mutex_enter(&icmp6_mtx);
        error = rt_timer_add(rt, icmp6_mtudisc_timeout,
                        icmp6_mtudisc_timeout_q);
        mutex_exit(&icmp6_mtx);

        if (error) {
                rt_unref(rt);
                return NULL;
        }

        return rt;        /* caller need to call rtfree() */
}

static void
icmp6_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
{
        struct rtentry *retrt;

        KASSERT(rt != NULL);
        rt_assert_referenced(rt);

        if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
            (RTF_DYNAMIC | RTF_HOST)) {
                rtrequest(RTM_DELETE, rt_getkey(rt),
                    rt->rt_gateway, rt_mask(rt), rt->rt_flags, &retrt);
                rt_unref(rt);
                rt_free(retrt);
        } else {
                if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
                        rt->rt_rmx.rmx_mtu = 0;
        }
}

static void
icmp6_redirect_timeout(struct rtentry *rt, struct rttimer *r)
{
        struct rtentry *retrt;

        KASSERT(rt != NULL);
        rt_assert_referenced(rt);

        if ((rt->rt_flags & (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) ==
            (RTF_GATEWAY | RTF_DYNAMIC | RTF_HOST)) {
                rtrequest(RTM_DELETE, rt_getkey(rt),
                    rt->rt_gateway, rt_mask(rt), rt->rt_flags, &retrt);
                rt_unref(rt);
                rt_free(retrt);
        }
}

#ifdef COMPAT_90
/*
 * sysctl helper routine for the net.inet6.icmp6.nd6 nodes.  silly?
 */
static int
sysctl_net_inet6_icmp6_nd6(SYSCTLFN_ARGS)
{
        (void)&name;
        (void)&l;
        (void)&oname;

        if (namelen != 0)
                return (EINVAL);

        return (nd6_sysctl(rnode->sysctl_num, oldp, oldlenp,
            /*XXXUNCONST*/
            __UNCONST(newp), newlen));
}
#endif

static int
sysctl_net_inet6_icmp6_stats(SYSCTLFN_ARGS)
{

        return (NETSTAT_SYSCTL(icmp6stat_percpu, ICMP6_NSTATS));
}

static int
sysctl_net_inet6_icmp6_redirtimeout(SYSCTLFN_ARGS)
{
        int error, tmp;
        struct sysctlnode node;

        mutex_enter(&icmp6_mtx);

        node = *rnode;
        node.sysctl_data = &tmp;
        tmp = icmp6_redirtimeout;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                goto out;
        if (tmp < 0) {
                error = EINVAL;
                goto out;
        }
        icmp6_redirtimeout = tmp;

        if (icmp6_redirect_timeout_q != NULL) {
                if (icmp6_redirtimeout == 0) {
                        rt_timer_queue_destroy(icmp6_redirect_timeout_q);
                } else {
                        rt_timer_queue_change(icmp6_redirect_timeout_q,
                            icmp6_redirtimeout);
                }
        } else if (icmp6_redirtimeout > 0) {
                icmp6_redirect_timeout_q =
                    rt_timer_queue_create(icmp6_redirtimeout);
        }
        error = 0;
out:
        mutex_exit(&icmp6_mtx);
        return error;
}

static void
sysctl_net_inet6_icmp6_setup(struct sysctllog **clog)
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "inet6", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET6, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "icmp6",
                       SYSCTL_DESCR("ICMPv6 related settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "stats",
                       SYSCTL_DESCR("ICMPv6 transmission statistics"),
                       sysctl_net_inet6_icmp6_stats, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_STATS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "rediraccept",
                       SYSCTL_DESCR("Accept and process redirect messages"),
                       NULL, 0, &icmp6_rediraccept, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_REDIRACCEPT, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "redirtimeout",
                       SYSCTL_DESCR("Redirect generated route lifetime"),
                       sysctl_net_inet6_icmp6_redirtimeout, 0,
                       &icmp6_redirtimeout, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_REDIRTIMEOUT, CTL_EOL);
#if 0 /* obsoleted */
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "errratelimit", NULL,
                       NULL, 0, &icmp6_errratelimit, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_ERRRATELIMIT, CTL_EOL);
#endif
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nd6_prune",
                       SYSCTL_DESCR("Neighbor discovery prune interval"),
                       NULL, 0, &nd6_prune, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_ND6_PRUNE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nd6_delay",
                       SYSCTL_DESCR("First probe delay time"),
                       NULL, 0, &nd6_nd_domain.nd_delay, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_ND6_DELAY, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nd6_mmaxtries",
                       SYSCTL_DESCR("Number of multicast discovery attempts"),
                       NULL, 0, &nd6_nd_domain.nd_mmaxtries, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_ND6_MMAXTRIES, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nd6_umaxtries",
                       SYSCTL_DESCR("Number of unicast discovery attempts"),
                       NULL, 0, &nd6_nd_domain.nd_umaxtries, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_ND6_UMAXTRIES, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nd6_maxnudhint",
                       SYSCTL_DESCR("Maximum neighbor unreachable hint count"),
                       NULL, 0, &nd6_nd_domain.nd_maxnudhint, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_ND6_MAXNUDHINT, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxqueuelen",
                       SYSCTL_DESCR("max packet queue len for a unresolved ND"),
                       NULL, 1, &nd6_nd_domain.nd_maxqueuelen, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_ND6_MAXQLEN, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nd6_useloopback",
                       SYSCTL_DESCR("Use loopback interface for local traffic"),
                       NULL, 0, &nd6_useloopback, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_ND6_USELOOPBACK, CTL_EOL);
#if 0 /* obsoleted */
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nd6_proxyall", NULL,
                       NULL, 0, &nd6_proxyall, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_ND6_PROXYALL, CTL_EOL);
#endif
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nodeinfo",
                       SYSCTL_DESCR("Respond to node information requests"),
                       NULL, 0, &icmp6_nodeinfo, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_NODEINFO, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "errppslimit",
                       SYSCTL_DESCR("Maximum ICMP errors sent per second"),
                       NULL, 0, &icmp6errppslim, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_ERRPPSLIMIT, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "mtudisc_hiwat",
                       SYSCTL_DESCR("Low mark on MTU Discovery route timers"),
                       NULL, 0, &icmp6_mtudisc_hiwat, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_MTUDISC_HIWAT, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "mtudisc_lowat",
                       SYSCTL_DESCR("Low mark on MTU Discovery route timers"),
                       NULL, 0, &icmp6_mtudisc_lowat, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_MTUDISC_LOWAT, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nd6_debug",
                       SYSCTL_DESCR("Enable neighbor discovery debug output"),
                       NULL, 0, &nd6_debug, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_ND6_DEBUG, CTL_EOL);
#ifdef COMPAT_90
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "nd6_drlist",
                       SYSCTL_DESCR("Default router list"),
                       sysctl_net_inet6_icmp6_nd6, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       OICMPV6CTL_ND6_DRLIST, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "nd6_prlist",
                       SYSCTL_DESCR("Prefix list"),
                       sysctl_net_inet6_icmp6_nd6, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       OICMPV6CTL_ND6_PRLIST, CTL_EOL);
#endif
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_BOOL, "reflect_pmtu",
                       SYSCTL_DESCR("Use path MTU Discovery for icmpv6 reflect"),
                       NULL, 0, &icmp6_reflect_pmtu, 0,
                       CTL_NET, PF_INET6, IPPROTO_ICMPV6,
                       ICMPV6CTL_REFLECT_PMTU, CTL_EOL);
}

void
icmp6_statinc(u_int stat)
{

        KASSERT(stat < ICMP6_NSTATS);
        ICMP6_STATINC(stat);
}























































































































    2 





    2 




















































    2 














































    2 

    2 





























































































    2 



    2 







    2 






    2 





































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
/*        $NetBSD: rf_disks.c,v 1.93 2022/08/10 01:16:38 mrg Exp $        */
/*-
 * Copyright (c) 1999 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Greg Oster
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Mark Holland
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/***************************************************************
 * rf_disks.c -- code to perform operations on the actual disks
 ***************************************************************/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_disks.c,v 1.93 2022/08/10 01:16:38 mrg Exp $");

#include <dev/raidframe/raidframevar.h>

#include "rf_raid.h"
#include "rf_alloclist.h"
#include "rf_utils.h"
#include "rf_general.h"
#include "rf_options.h"
#include "rf_kintf.h"
#include "rf_netbsd.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/namei.h> /* for pathbuf */
#include <sys/kauth.h>
#include <miscfs/specfs/specdev.h> /* for v_rdev */

static int rf_AllocDiskStructures(RF_Raid_t *, RF_Config_t *);
static void rf_print_label_status( RF_Raid_t *, int, char *,
                                  RF_ComponentLabel_t *);
static int rf_check_label_vitals( RF_Raid_t *, int, int, char *,
                                  RF_ComponentLabel_t *, int, int );

#define DPRINTF6(a,b,c,d,e,f) if (rf_diskDebug) printf(a,b,c,d,e,f)
#define DPRINTF7(a,b,c,d,e,f,g) if (rf_diskDebug) printf(a,b,c,d,e,f,g)

/**************************************************************************
 *
 * initialize the disks comprising the array
 *
 * We want the spare disks to have regular row,col numbers so that we can
 * easily substitue a spare for a failed disk.  But, the driver code assumes
 * throughout that the array contains numRow by numCol _non-spare_ disks, so
 * it's not clear how to fit in the spares.  This is an unfortunate holdover
 * from raidSim.  The quick and dirty fix is to make row zero bigger than the
 * rest, and put all the spares in it.  This probably needs to get changed
 * eventually.
 *
 **************************************************************************/

int
rf_ConfigureDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                  RF_Config_t *cfgPtr)
{
        RF_RaidDisk_t *disks;
        RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
        RF_RowCol_t c;
        int bs, ret;
        unsigned i, count, foundone = 0, numFailuresThisRow;
        int force;

        force = cfgPtr->force;

        ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
        if (ret)
                goto fail;

        disks = raidPtr->Disks;

        numFailuresThisRow = 0;
        for (c = 0; c < raidPtr->numCol; c++) {
                ret = rf_ConfigureDisk(raidPtr,
                                       &cfgPtr->devnames[0][c][0],
                                       &disks[c], c);

                if (ret)
                        goto fail;

                if (disks[c].status == rf_ds_optimal) {
                        ret = raidfetch_component_label(raidPtr, c);
                        if (ret)
                                goto fail;

                        /* mark it as failed if the label looks bogus... */
                        if (!rf_reasonable_label(&raidPtr->raid_cinfo[c].ci_label,0) && !force) {
                                disks[c].status = rf_ds_failed;
                        }
                }

                if (disks[c].status != rf_ds_optimal) {
                        numFailuresThisRow++;
                } else {
                        if (disks[c].numBlocks < min_numblks)
                                min_numblks = disks[c].numBlocks;
                        DPRINTF6("Disk at col %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
                                 c, disks[c].devname,
                                 disks[c].numBlocks,
                                 disks[c].blockSize,
                                 (long int) disks[c].numBlocks *
                                 disks[c].blockSize / 1024 / 1024);
                }
        }
        /* XXX fix for n-fault tolerant */
        /* XXX this should probably check to see how many failures
           we can handle for this configuration! */
        if (numFailuresThisRow > 0)
                raidPtr->status = rf_rs_degraded;

        /* all disks must be the same size & have the same block size, bs must
         * be a power of 2 */
        bs = 0;
        foundone = 0;
        for (c = 0; c < raidPtr->numCol; c++) {
                if (disks[c].status == rf_ds_optimal) {
                        bs = disks[c].blockSize;
                        foundone = 1;
                        break;
                }
        }
        if (!foundone) {
                RF_ERRORMSG("RAIDFRAME: Did not find any live disks in the array.\n");
                ret = EINVAL;
                goto fail;
        }
        for (count = 0, i = 1; i; i <<= 1)
                if (bs & i)
                        count++;
        if (count != 1) {
                RF_ERRORMSG1("Error: block size on disks (%d) must be a power of 2\n", bs);
                ret = EINVAL;
                goto fail;
        }

        if (rf_CheckLabels( raidPtr, cfgPtr )) {
                printf("raid%d: There were fatal errors\n", raidPtr->raidid);
                if (force != 0) {
                        printf("raid%d: Fatal errors being ignored.\n",
                               raidPtr->raidid);
                } else {
                        ret = EINVAL;
                        goto fail;
                }
        }

        for (c = 0; c < raidPtr->numCol; c++) {
                if (disks[c].status == rf_ds_optimal) {
                        if (disks[c].blockSize != bs) {
                                RF_ERRORMSG1("Error: block size of disk at c %d different from disk at c 0\n", c);
                                ret = EINVAL;
                                goto fail;
                        }
                        if (disks[c].numBlocks != min_numblks) {
                                RF_ERRORMSG2("WARNING: truncating disk at c %d to %d blocks\n",
                                             c, (int) min_numblks);
                                disks[c].numBlocks = min_numblks;
                        }
                }
        }

        raidPtr->sectorsPerDisk = min_numblks;
        raidPtr->logBytesPerSector = ffs(bs) - 1;
        raidPtr->bytesPerSector = bs;
        raidPtr->sectorMask = bs - 1;
        return (0);

fail:

        rf_UnconfigureVnodes( raidPtr );

        return (ret);
}


/****************************************************************************
 * set up the data structures describing the spare disks in the array
 * recall from the above comment that the spare disk descriptors are stored
 * in row zero, which is specially expanded to hold them.
 ****************************************************************************/
int
rf_ConfigureSpareDisks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                       RF_Config_t *cfgPtr)
{
        int     i, ret;
        unsigned int bs;
        RF_RaidDisk_t *disks;
        int     num_spares_done;

        num_spares_done = 0;

        /* The space for the spares should have already been allocated by
         * ConfigureDisks() */

        disks = &raidPtr->Disks[raidPtr->numCol];
        for (i = 0; i < raidPtr->numSpare; i++) {
                ret = rf_ConfigureDisk(raidPtr, &cfgPtr->spare_names[i][0],
                                       &disks[i], raidPtr->numCol + i);
                if (ret)
                        goto fail;
                if (disks[i].status != rf_ds_optimal) {
                        RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
                                     &cfgPtr->spare_names[i][0]);
                } else {
                        disks[i].status = rf_ds_spare;        /* change status to
                                                         * spare */
                        DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n", i,
                            disks[i].devname,
                            disks[i].numBlocks, disks[i].blockSize,
                            (long int) disks[i].numBlocks *
                                 disks[i].blockSize / 1024 / 1024);
                }
                num_spares_done++;
        }

        /* check sizes and block sizes on spare disks */
        bs = 1 << raidPtr->logBytesPerSector;
        for (i = 0; i < raidPtr->numSpare; i++) {
                if (disks[i].blockSize != bs) {
                        RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[i].blockSize, disks[i].devname, bs);
                        ret = EINVAL;
                        goto fail;
                }
                if (disks[i].numBlocks < raidPtr->sectorsPerDisk) {
                        RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
                                     disks[i].devname, disks[i].blockSize,
                                     raidPtr->sectorsPerDisk);
                        ret = EINVAL;
                        goto fail;
                } else
                        if (disks[i].numBlocks > raidPtr->sectorsPerDisk) {
                                RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
                                    disks[i].devname,
                                    raidPtr->sectorsPerDisk,
                                    disks[i].numBlocks);

                                disks[i].numBlocks = raidPtr->sectorsPerDisk;
                        }
        }

        return (0);

fail:

        /* Release the hold on the main components.  We've failed to allocate
         * a spare, and since we're failing, we need to free things..

         XXX failing to allocate a spare is *not* that big of a deal...
         We *can* survive without it, if need be, esp. if we get hot
         adding working.

         If we don't fail out here, then we need a way to remove this spare...
         that should be easier to do here than if we are "live"...

         */

        rf_UnconfigureVnodes( raidPtr );

        return (ret);
}

static int
rf_AllocDiskStructures(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
{
        int ret;
        size_t entries = raidPtr->numCol + RF_MAXSPARE;

        /* We allocate RF_MAXSPARE on the first row so that we
           have room to do hot-swapping of spares */
        raidPtr->Disks = RF_MallocAndAdd(
            entries * sizeof(*raidPtr->Disks), raidPtr->cleanupList);
        if (raidPtr->Disks == NULL) {
                ret = ENOMEM;
                goto fail;
        }

        /* get space for device specific stuff.. */
        raidPtr->raid_cinfo = RF_MallocAndAdd(
            entries * sizeof(*raidPtr->raid_cinfo), raidPtr->cleanupList);
        if (raidPtr->raid_cinfo == NULL) {
                ret = ENOMEM;
                goto fail;
        }

        return(0);
fail:
        rf_UnconfigureVnodes( raidPtr );

        return(ret);
}


/* configure a single disk during auto-configuration at boot */
int
rf_AutoConfigureDisks(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr,
                      RF_AutoConfig_t *auto_config)
{
        RF_RaidDisk_t *disks;
        RF_RaidDisk_t *diskPtr;
        RF_RowCol_t c;
        RF_SectorCount_t min_numblks = (RF_SectorCount_t) 0x7FFFFFFFFFFFLL;
        int bs, ret;
        int numFailuresThisRow;
        RF_AutoConfig_t *ac;
        int parity_good;
        int mod_counter;
        int mod_counter_found;

#if DEBUG
        printf("Starting autoconfiguration of RAID set...\n");
#endif

        ret = rf_AllocDiskStructures(raidPtr, cfgPtr);
        if (ret)
                goto fail;

        disks = raidPtr->Disks;

        /* assume the parity will be fine.. */
        parity_good = RF_RAID_CLEAN;

        /* Check for mod_counters that are too low */
        mod_counter_found = 0;
        mod_counter = 0;
        ac = auto_config;
        while(ac!=NULL) {
                if (mod_counter_found==0) {
                        mod_counter = ac->clabel->mod_counter;
                        mod_counter_found = 1;
                } else {
                        if (ac->clabel->mod_counter > mod_counter) {
                                mod_counter = ac->clabel->mod_counter;
                        }
                }
                ac->flag = 0; /* clear the general purpose flag */
                ac = ac->next;
        }

        bs = 0;

        numFailuresThisRow = 0;
        for (c = 0; c < raidPtr->numCol; c++) {
                diskPtr = &disks[c];

                /* find this row/col in the autoconfig */
#if DEBUG
                printf("Looking for %d in autoconfig\n",c);
#endif
                ac = auto_config;
                while(ac!=NULL) {
                        if (ac->clabel==NULL) {
                                /* big-time bad news. */
                                goto fail;
                        }
                        if ((ac->clabel->column == c) &&
                            (ac->clabel->mod_counter == mod_counter)) {
                                /* it's this one... */
                                /* flag it as 'used', so we don't
                                   free it later. */
                                ac->flag = 1;
#if DEBUG
                                printf("Found: %s at %d\n",
                                       ac->devname,c);
#endif

                                break;
                        }
                        ac=ac->next;
                }

                if (ac==NULL) {
                        /* we didn't find an exact match with a
                           correct mod_counter above... can we find
                           one with an incorrect mod_counter to use
                           instead?  (this one, if we find it, will be
                           marked as failed once the set configures)
                        */

                        ac = auto_config;
                        while(ac!=NULL) {
                                if (ac->clabel==NULL) {
                                        /* big-time bad news. */
                                        goto fail;
                                }
                                if (ac->clabel->column == c) {
                                        /* it's this one...
                                           flag it as 'used', so we
                                           don't free it later. */
                                        ac->flag = 1;
#if DEBUG
                                        printf("Found(low mod_counter): %s at %d\n",
                                               ac->devname,c);
#endif

                                        break;
                                }
                                ac=ac->next;
                        }
                }



                if (ac!=NULL) {
                        /* Found it.  Configure it.. */
                        diskPtr->blockSize = ac->clabel->blockSize;
                        diskPtr->numBlocks =
                            rf_component_label_numblocks(ac->clabel);
                        /* Note: rf_protectedSectors is already
                           factored into numBlocks here */
                        raidPtr->raid_cinfo[c].ci_vp = ac->vp;
                        raidPtr->raid_cinfo[c].ci_dev = ac->dev;

                        memcpy(raidget_component_label(raidPtr, c),
                            ac->clabel, sizeof(*ac->clabel));
                        snprintf(diskPtr->devname, sizeof(diskPtr->devname),
                            "/dev/%s", ac->devname);

                        /* note the fact that this component was
                           autoconfigured.  You'll need this info
                           later.  Trust me :) */
                        diskPtr->auto_configured = 1;
                        diskPtr->dev = ac->dev;

                        /*
                         * we allow the user to specify that
                         * only a fraction of the disks should
                         * be used this is just for debug: it
                         * speeds up the parity scan
                         */

                        diskPtr->numBlocks = diskPtr->numBlocks *
                                rf_sizePercentage / 100;

                        /* XXX these will get set multiple times,
                           but since we're autoconfiguring, they'd
                           better be always the same each time!
                           If not, this is the least of your worries */

                        bs = diskPtr->blockSize;
                        min_numblks = diskPtr->numBlocks;

                        /* this gets done multiple times, but that's
                           fine -- the serial number will be the same
                           for all components, guaranteed */
                        raidPtr->serial_number = ac->clabel->serial_number;
                        /* check the last time the label was modified */

                        if (ac->clabel->mod_counter != mod_counter) {
                                /* Even though we've filled in all of
                                   the above, we don't trust this
                                   component since its modification
                                   counter is not in sync with the
                                   rest, and we really consider it to
                                   be failed.  */
                                disks[c].status = rf_ds_failed;
                                numFailuresThisRow++;
                        } else {
                                if (ac->clabel->clean != RF_RAID_CLEAN) {
                                        parity_good = RF_RAID_DIRTY;
                                }
                        }
                } else {
                        /* Didn't find it at all!!  Component must
                           really be dead */
                        disks[c].status = rf_ds_failed;
                        snprintf(disks[c].devname, sizeof(disks[c].devname),
                            "component%d", c);
                        numFailuresThisRow++;
                }
        }
        /* XXX fix for n-fault tolerant */
        /* XXX this should probably check to see how many failures
           we can handle for this configuration! */
        if (numFailuresThisRow > 0) {
                raidPtr->status = rf_rs_degraded;
                raidPtr->numFailures = numFailuresThisRow;
        }

        /* close the device for the ones that didn't get used */

        ac = auto_config;
        while(ac!=NULL) {
                if (ac->flag == 0) {
                        vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
                        VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
                        vput(ac->vp);
                        ac->vp = NULL;
#if DEBUG
                        printf("Released %s from auto-config set.\n",
                               ac->devname);
#endif
                }
                ac = ac->next;
        }

        raidPtr->mod_counter = mod_counter;

        /* note the state of the parity, if any */
        raidPtr->parity_good = parity_good;
        raidPtr->sectorsPerDisk = min_numblks;
        raidPtr->logBytesPerSector = ffs(bs) - 1;
        raidPtr->bytesPerSector = bs;
        raidPtr->sectorMask = bs - 1;
        return (0);

fail:

        rf_UnconfigureVnodes( raidPtr );

        return (ret);

}

/* configure a single disk in the array */
int
rf_ConfigureDisk(RF_Raid_t *raidPtr, char *bf, RF_RaidDisk_t *diskPtr,
                 RF_RowCol_t col)
{
        char   *p;
        struct pathbuf *pb;
        struct vnode *vp;
        int     error;

        p = rf_find_non_white(bf);
        if (p[strlen(p) - 1] == '\n') {
                /* strip off the newline */
                p[strlen(p) - 1] = '\0';
        }
        (void) strcpy(diskPtr->devname, p);

        /* Let's start by claiming the component is fine and well... */
        diskPtr->status = rf_ds_optimal;

        raidPtr->raid_cinfo[col].ci_vp = NULL;
        raidPtr->raid_cinfo[col].ci_dev = 0;

        if (!strcmp("absent", diskPtr->devname)) {
                printf("Ignoring missing component at column %d\n", col);
                snprintf(diskPtr->devname, sizeof(diskPtr->devname),
                    "component%d", col);
                diskPtr->status = rf_ds_failed;
                return (0);
        }

        pb = pathbuf_create(diskPtr->devname);
        if (pb == NULL) {
                printf("pathbuf_create for device: %s failed!\n",
                       diskPtr->devname);
                return ENOMEM;
        }
        error = vn_bdev_openpath(pb, &vp, curlwp);
        pathbuf_destroy(pb);
        if (error) {
                printf("open device: '%s' failed: %d\n", diskPtr->devname, error);
                if (error == ENXIO) {
                        /* the component isn't there... must be dead :-( */
                        diskPtr->status = rf_ds_failed;
                        return 0;
                } else {
                        return (error);
                }
        }

        if ((error = rf_getdisksize(vp, diskPtr)) != 0)
                return (error);

        /*
         * If this raidPtr's bytesPerSector is zero, fill it in with this
         * components blockSize.  This will give us something to work with
         * initially, and if it is wrong, we'll get errors later.
         */
        if (raidPtr->bytesPerSector == 0)
                raidPtr->bytesPerSector = diskPtr->blockSize;

        if (diskPtr->status == rf_ds_optimal) {
                raidPtr->raid_cinfo[col].ci_vp = vp;
                raidPtr->raid_cinfo[col].ci_dev = vp->v_rdev;

                /* This component was not automatically configured */
                diskPtr->auto_configured = 0;
                diskPtr->dev = vp->v_rdev;

                /* we allow the user to specify that only a fraction of the
                 * disks should be used this is just for debug:  it speeds up
                 * the parity scan */
                diskPtr->numBlocks = diskPtr->numBlocks *
                        rf_sizePercentage / 100;
        }
        return (0);
}

static void
rf_print_label_status(RF_Raid_t *raidPtr, int column, char *dev_name,
                      RF_ComponentLabel_t *ci_label)
{

        printf("raid%d: Component %s being configured at col: %d\n",
               raidPtr->raidid, dev_name, column );
        printf("         Column: %d Num Columns: %d\n",
               ci_label->column,
               ci_label->num_columns);
        printf("         Version: %d Serial Number: %d Mod Counter: %d\n",
               ci_label->version, ci_label->serial_number,
               ci_label->mod_counter);
        printf("         Clean: %s Status: %d\n",
               ci_label->clean ? "Yes" : "No", ci_label->status );
}

static int rf_check_label_vitals(RF_Raid_t *raidPtr, int row, int column,
                                 char *dev_name, RF_ComponentLabel_t *ci_label,
                                 int serial_number, int mod_counter)
{
        int fatal_error = 0;

        if (serial_number != ci_label->serial_number) {
                printf("%s has a different serial number: %d %d\n",
                       dev_name, serial_number, ci_label->serial_number);
                fatal_error = 1;
        }
        if (mod_counter != ci_label->mod_counter) {
                printf("%s has a different modification count: %d %d\n",
                       dev_name, mod_counter, ci_label->mod_counter);
        }

        if (row != ci_label->row) {
                printf("Row out of alignment for: %s\n", dev_name);
                fatal_error = 1;
        }
        if (column != ci_label->column) {
                printf("Column out of alignment for: %s\n", dev_name);
                fatal_error = 1;
        }
        if (raidPtr->numCol != ci_label->num_columns) {
                printf("Number of columns do not match for: %s\n", dev_name);
                fatal_error = 1;
        }
        if (ci_label->clean == 0) {
                /* it's not clean, but that's not fatal */
                printf("%s is not clean!\n", dev_name);
        }
        return(fatal_error);
}


static void
rf_handle_hosed(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, int hosed_column,
    int again)
{
        printf("Hosed component: %s\n", &cfgPtr->devnames[0][hosed_column][0]);
        if (cfgPtr->force)
                return;

        /* we'll fail this component, as if there are
           other major errors, we aren't forcing things
           and we'll abort the config anyways */
        if (again && raidPtr->Disks[hosed_column].status == rf_ds_failed)
                return;

        raidPtr->Disks[hosed_column].status = rf_ds_failed;
        raidPtr->numFailures++;
        raidPtr->status = rf_rs_degraded;
}

/*

   rf_CheckLabels() - check all the component labels for consistency.
   Return an error if there is anything major amiss.

 */

int
rf_CheckLabels(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr)
{
        int c;
        char *dev_name;
        RF_ComponentLabel_t *ci_label;
        int serial_number = 0;
        int mod_number = 0;
        int fatal_error = 0;
        int mod_values[4];
        int mod_count[4];
        int ser_values[4];
        int ser_count[4];
        int num_ser;
        int num_mod;
        int i;
        int found;
        int hosed_column;
        int too_fatal;
        int parity_good;

        hosed_column = -1;
        too_fatal = 0;

        /*
           We're going to try to be a little intelligent here.  If one
           component's label is bogus, and we can identify that it's the
           *only* one that's gone, we'll mark it as "failed" and allow
           the configuration to proceed.  This will be the *only* case
           that we'll proceed if there would be (otherwise) fatal errors.

           Basically we simply keep a count of how many components had
           what serial number.  If all but one agree, we simply mark
           the disagreeing component as being failed, and allow
           things to come up "normally".

           We do this first for serial numbers, and then for "mod_counter".

         */

        num_ser = 0;
        num_mod = 0;

        ser_values[0] = ser_values[1] = ser_values[2] = ser_values[3] = 0;
        ser_count[0] = ser_count[1] = ser_count[2] = ser_count[3] = 0;
        mod_values[0] = mod_values[1] = mod_values[2] = mod_values[3] = 0;
        mod_count[0] = mod_count[1] = mod_count[2] = mod_count[3] = 0;

        for (c = 0; c < raidPtr->numCol; c++) {
                if (raidPtr->Disks[c].status != rf_ds_optimal)
                        continue;
                ci_label = raidget_component_label(raidPtr, c);
                found=0;
                for(i=0;i<num_ser;i++) {
                        if (ser_values[i] == ci_label->serial_number) {
                                ser_count[i]++;
                                found=1;
                                break;
                        }
                }
                if (!found) {
                        ser_values[num_ser] = ci_label->serial_number;
                        ser_count[num_ser] = 1;
                        num_ser++;
                        if (num_ser>2) {
                                fatal_error = 1;
                                break;
                        }
                }
                found=0;
                for(i=0;i<num_mod;i++) {
                        if (mod_values[i] == ci_label->mod_counter) {
                                mod_count[i]++;
                                found=1;
                                break;
                        }
                }
                if (!found) {
                        mod_values[num_mod] = ci_label->mod_counter;
                        mod_count[num_mod] = 1;
                        num_mod++;
                        if (num_mod>2) {
                                fatal_error = 1;
                                break;
                        }
                }
        }
#if DEBUG
        printf("raid%d: Summary of serial numbers:\n", raidPtr->raidid);
        for(i=0;i<num_ser;i++) {
                printf("%d %d\n", ser_values[i], ser_count[i]);
        }
        printf("raid%d: Summary of mod counters:\n", raidPtr->raidid);
        for(i=0;i<num_mod;i++) {
                printf("%d %d\n", mod_values[i], mod_count[i]);
        }
#endif
        serial_number = ser_values[0];
        if (num_ser == 2) {
                if ((ser_count[0] == 1) || (ser_count[1] == 1)) {
                        /* Locate the maverick component */
                        if (ser_count[1] > ser_count[0]) {
                                serial_number = ser_values[1];
                        }

                        for (c = 0; c < raidPtr->numCol; c++) {
                                if (raidPtr->Disks[c].status != rf_ds_optimal)
                                        continue;
                                ci_label = raidget_component_label(raidPtr, c);
                                if (serial_number != ci_label->serial_number) {
                                        hosed_column = c;
                                        break;
                                }
                        }
                        if (hosed_column != -1)
                                rf_handle_hosed(raidPtr, cfgPtr, hosed_column,
                                    0);
                } else {
                        too_fatal = 1;
                }
                if (cfgPtr->parityConfig == '0') {
                        /* We've identified two different serial numbers.
                           RAID 0 can't cope with that, so we'll punt */
                        too_fatal = 1;
                }

        }

        /* record the serial number for later.  If we bail later, setting
           this doesn't matter, otherwise we've got the best guess at the
           correct serial number */
        raidPtr->serial_number = serial_number;

        mod_number = mod_values[0];
        if (num_mod == 2) {
                if ((mod_count[0] == 1) || (mod_count[1] == 1)) {
                        /* Locate the maverick component */
                        if (mod_count[1] > mod_count[0]) {
                                mod_number = mod_values[1];
                        } else if (mod_count[1] < mod_count[0]) {
                                mod_number = mod_values[0];
                        } else {
                                /* counts of different modification values
                                   are the same.   Assume greater value is
                                   the correct one, all other things
                                   considered */
                                if (mod_values[0] > mod_values[1]) {
                                        mod_number = mod_values[0];
                                } else {
                                        mod_number = mod_values[1];
                                }

                        }

                        for (c = 0; c < raidPtr->numCol; c++) {
                                if (raidPtr->Disks[c].status != rf_ds_optimal)
                                        continue;

                                ci_label = raidget_component_label(raidPtr, c);
                                if (mod_number != ci_label->mod_counter) {
                                        if (hosed_column == c) {
                                                /* same one.  Can
                                                   deal with it.  */
                                        } else {
                                                hosed_column = c;
                                                if (num_ser != 1) {
                                                        too_fatal = 1;
                                                        break;
                                                }
                                        }
                                }
                        }
                        if (hosed_column != -1)
                                rf_handle_hosed(raidPtr, cfgPtr, hosed_column,
                                    1);
                } else {
                        too_fatal = 1;
                }
                if (cfgPtr->parityConfig == '0') {
                        /* We've identified two different mod counters.
                           RAID 0 can't cope with that, so we'll punt */
                        too_fatal = 1;
                }
        }

        raidPtr->mod_counter = mod_number;

        if (too_fatal) {
                /* we've had both a serial number mismatch, and a mod_counter
                   mismatch -- and they involved two different components!!
                   Bail -- make things fail so that the user must force
                   the issue... */
                hosed_column = -1;
                fatal_error = 1;
        }

        if (num_ser > 2) {
                printf("raid%d: Too many different serial numbers!\n",
                       raidPtr->raidid);
                fatal_error = 1;
        }

        if (num_mod > 2) {
                printf("raid%d: Too many different mod counters!\n",
                       raidPtr->raidid);
                fatal_error = 1;
        }

        for (c = 0; c < raidPtr->numCol; c++) {
                if (raidPtr->Disks[c].status != rf_ds_optimal) {
                        hosed_column = c;
                        break;
                }
        }

        /* we start by assuming the parity will be good, and flee from
           that notion at the slightest sign of trouble */

        parity_good = RF_RAID_CLEAN;

        for (c = 0; c < raidPtr->numCol; c++) {
                dev_name = &cfgPtr->devnames[0][c][0];
                ci_label = raidget_component_label(raidPtr, c);

                if (c == hosed_column) {
                        printf("raid%d: Ignoring %s\n",
                               raidPtr->raidid, dev_name);
                } else {
                        rf_print_label_status( raidPtr, c, dev_name, ci_label);
                        if (rf_check_label_vitals( raidPtr, 0, c,
                                                   dev_name, ci_label,
                                                   serial_number,
                                                   mod_number )) {
                                fatal_error = 1;
                        }
                        if (ci_label->clean != RF_RAID_CLEAN) {
                                parity_good = RF_RAID_DIRTY;
                        }
                }
        }

        if (fatal_error) {
                parity_good = RF_RAID_DIRTY;
        }

        /* we note the state of the parity */
        raidPtr->parity_good = parity_good;

        return(fatal_error);
}

int
rf_add_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
{
        RF_RaidDisk_t *disks;
        RF_DiskQueue_t *spareQueues;
        int ret;
        unsigned int bs;
        int spare_number;

        ret=0;

        if (raidPtr->numSpare >= RF_MAXSPARE) {
                RF_ERRORMSG1("Too many spares: %d\n", raidPtr->numSpare);
                return(EINVAL);
        }

        rf_lock_mutex2(raidPtr->mutex);
        while (raidPtr->adding_hot_spare == 1) {
                rf_wait_cond2(raidPtr->adding_hot_spare_cv, raidPtr->mutex);
        }
        raidPtr->adding_hot_spare = 1;
        rf_unlock_mutex2(raidPtr->mutex);

        /* the beginning of the spares... */
        disks = &raidPtr->Disks[raidPtr->numCol];

        spare_number = raidPtr->numSpare;

        ret = rf_ConfigureDisk(raidPtr, sparePtr->component_name,
                               &disks[spare_number],
                               raidPtr->numCol + spare_number);

        if (ret)
                goto fail;
        if (disks[spare_number].status != rf_ds_optimal) {
                RF_ERRORMSG1("Warning: spare disk %s failed TUR\n",
                             sparePtr->component_name);
                rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
                ret=EINVAL;
                goto fail;
        } else {
                disks[spare_number].status = rf_ds_spare;
                DPRINTF6("Spare Disk %d: dev %s numBlocks %" PRIu64 " blockSize %d (%ld MB)\n",
                         spare_number,
                         disks[spare_number].devname,
                         disks[spare_number].numBlocks,
                         disks[spare_number].blockSize,
                         (long int) disks[spare_number].numBlocks *
                         disks[spare_number].blockSize / 1024 / 1024);
        }


        /* check sizes and block sizes on the spare disk */
        bs = 1 << raidPtr->logBytesPerSector;
        if (disks[spare_number].blockSize != bs) {
                RF_ERRORMSG3("Block size of %d on spare disk %s is not the same as on other disks (%d)\n", disks[spare_number].blockSize, disks[spare_number].devname, bs);
                rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
                ret = EINVAL;
                goto fail;
        }
        if (disks[spare_number].numBlocks < raidPtr->sectorsPerDisk) {
                RF_ERRORMSG3("Spare disk %s (%d blocks) is too small to serve as a spare (need %" PRIu64 " blocks)\n",
                             disks[spare_number].devname,
                             disks[spare_number].blockSize,
                             raidPtr->sectorsPerDisk);
                rf_close_component(raidPtr, raidPtr->raid_cinfo[raidPtr->numCol+spare_number].ci_vp, 0);
                ret = EINVAL;
                goto fail;
        } else {
                if (disks[spare_number].numBlocks >
                    raidPtr->sectorsPerDisk) {
                        RF_ERRORMSG3("Warning: truncating spare disk %s to %" PRIu64 " blocks (from %" PRIu64 ")\n",
                            disks[spare_number].devname,
                            raidPtr->sectorsPerDisk,
                            disks[spare_number].numBlocks);

                        disks[spare_number].numBlocks = raidPtr->sectorsPerDisk;
                }
        }

        spareQueues = &raidPtr->Queues[raidPtr->numCol];
        ret = rf_ConfigureDiskQueue( raidPtr, &spareQueues[spare_number],
                                 raidPtr->numCol + spare_number,
                                 raidPtr->qType,
                                 raidPtr->sectorsPerDisk,
                                 raidPtr->Disks[raidPtr->numCol +
                                                  spare_number].dev,
                                 raidPtr->maxOutstanding,
                                 &raidPtr->shutdownList,
                                 raidPtr->cleanupList);

        rf_lock_mutex2(raidPtr->mutex);
        raidPtr->numSpare++;
        rf_unlock_mutex2(raidPtr->mutex);

fail:
        rf_lock_mutex2(raidPtr->mutex);
        raidPtr->adding_hot_spare = 0;
        rf_signal_cond2(raidPtr->adding_hot_spare_cv);
        rf_unlock_mutex2(raidPtr->mutex);

        return(ret);
}

int
rf_remove_hot_spare(RF_Raid_t *raidPtr, RF_SingleComponent_t *sparePtr)
{
#if 0
        int spare_number;
#endif

        if (raidPtr->numSpare==0) {
                printf("No spares to remove!\n");
                return(EINVAL);
        }

        return(EINVAL); /* XXX not implemented yet */
#if 0
        spare_number = sparePtr->column;

        if (spare_number < 0 || spare_number > raidPtr->numSpare) {
                return(EINVAL);
        }

        /* verify that this spare isn't in use... */




        /* it's gone.. */

        raidPtr->numSpare--;

        return(0);
#endif
}


int
rf_delete_component(RF_Raid_t *raidPtr, RF_SingleComponent_t *component)
{
#if 0
        RF_RaidDisk_t *disks;
#endif

        if ((component->column < 0) ||
            (component->column >= raidPtr->numCol)) {
                return(EINVAL);
        }

#if 0
        disks = &raidPtr->Disks[component->column];
#endif

        /* 1. This component must be marked as 'failed' */

        return(EINVAL); /* Not implemented yet. */
}

int
rf_incorporate_hot_spare(RF_Raid_t *raidPtr,
    RF_SingleComponent_t *component)
{

        /* Issues here include how to 'move' this in if there is IO
           taking place (e.g. component queues and such) */

        return(EINVAL); /* Not implemented yet. */
}





































































































 1309 













  867 

  867 
  860 
  860 

   21 


  862 





















  865 
   14 






  860 





























  928 
   54 


  914 

  912 



   84 


   34 






   78 
   79 




  891 
















  925 
  926 
  924 

  925 






  157 




  924 

















































































  926 
    2 


  158 

  158 

  823 












  926 
   89 


  885 




























   25 
    2 
  109 

  105 









    2 


    2 




  898 
   91 

  898 












    4 

    4 
    4 
    4 
    4 


    4 




    3 












   34 



   34 

   34 

   26 






































    2 







    2 







































   84 
   84 









   78 


    5 











    9 



    9 











   75 



   75 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
/*        $NetBSD: kern_sleepq.c,v 1.73 2022/06/29 22:27:01 riastradh Exp $        */

/*-
 * Copyright (c) 2006, 2007, 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Sleep queue implementation, used by turnstiles and general sleep/wakeup
 * interfaces.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.73 2022/06/29 22:27:01 riastradh Exp $");

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/cpu.h>
#include <sys/intr.h>
#include <sys/pool.h>
#include <sys/proc.h> 
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/systm.h>
#include <sys/sleepq.h>
#include <sys/ktrace.h>

/*
 * for sleepq_abort:
 * During autoconfiguration or after a panic, a sleep will simply lower the
 * priority briefly to allow interrupts, then return.  The priority to be
 * used (IPL_SAFEPRI) is machine-dependent, thus this value is initialized and
 * maintained in the machine-dependent layers.  This priority will typically
 * be 0, or the lowest priority that is safe for use on the interrupt stack;
 * it can be made higher to block network software interrupts after panics.
 */
#ifndef        IPL_SAFEPRI
#define        IPL_SAFEPRI        0
#endif

static int        sleepq_sigtoerror(lwp_t *, int);

/* General purpose sleep table, used by mtsleep() and condition variables. */
sleeptab_t        sleeptab __cacheline_aligned;
sleepqlock_t        sleepq_locks[SLEEPTAB_HASH_SIZE] __cacheline_aligned;

/*
 * sleeptab_init:
 *
 *        Initialize a sleep table.
 */
void
sleeptab_init(sleeptab_t *st)
{
        static bool again;
        int i;

        for (i = 0; i < SLEEPTAB_HASH_SIZE; i++) {
                if (!again) {
                        mutex_init(&sleepq_locks[i].lock, MUTEX_DEFAULT,
                            IPL_SCHED);
                }
                sleepq_init(&st->st_queue[i]);
        }
        again = true;
}

/*
 * sleepq_init:
 *
 *        Prepare a sleep queue for use.
 */
void
sleepq_init(sleepq_t *sq)
{

        LIST_INIT(sq);
}

/*
 * sleepq_remove:
 *
 *        Remove an LWP from a sleep queue and wake it up.
 */
void
sleepq_remove(sleepq_t *sq, lwp_t *l)
{
        struct schedstate_percpu *spc;
        struct cpu_info *ci;

        KASSERT(lwp_locked(l, NULL));

        if ((l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_NULL) == 0) {
                KASSERT(sq != NULL);
                LIST_REMOVE(l, l_sleepchain);
        } else {
                KASSERT(sq == NULL);
        }

        l->l_syncobj = &sched_syncobj;
        l->l_wchan = NULL;
        l->l_sleepq = NULL;
        l->l_flag &= ~LW_SINTR;

        ci = l->l_cpu;
        spc = &ci->ci_schedstate;

        /*
         * If not sleeping, the LWP must have been suspended.  Let whoever
         * holds it stopped set it running again.
         */
        if (l->l_stat != LSSLEEP) {
                KASSERT(l->l_stat == LSSTOP || l->l_stat == LSSUSPENDED);
                lwp_setlock(l, spc->spc_lwplock);
                return;
        }

        /*
         * If the LWP is still on the CPU, mark it as LSONPROC.  It may be
         * about to call mi_switch(), in which case it will yield.
         */
        if ((l->l_pflag & LP_RUNNING) != 0) {
                l->l_stat = LSONPROC;
                l->l_slptime = 0;
                lwp_setlock(l, spc->spc_lwplock);
                return;
        }

        /* Update sleep time delta, call the wake-up handler of scheduler */
        l->l_slpticksum += (getticks() - l->l_slpticks);
        sched_wakeup(l);

        /* Look for a CPU to wake up */
        l->l_cpu = sched_takecpu(l);
        ci = l->l_cpu;
        spc = &ci->ci_schedstate;

        /*
         * Set it running.
         */
        spc_lock(ci);
        lwp_setlock(l, spc->spc_mutex);
        sched_setrunnable(l);
        l->l_stat = LSRUN;
        l->l_slptime = 0;
        sched_enqueue(l);
        sched_resched_lwp(l, true);
        /* LWP & SPC now unlocked, but we still hold sleep queue lock. */
}

/*
 * sleepq_insert:
 *
 *        Insert an LWP into the sleep queue, optionally sorting by priority.
 */
static void
sleepq_insert(sleepq_t *sq, lwp_t *l, syncobj_t *sobj)
{

        if ((sobj->sobj_flag & SOBJ_SLEEPQ_NULL) != 0) {
                KASSERT(sq == NULL); 
                return;
        }
        KASSERT(sq != NULL);

        if ((sobj->sobj_flag & SOBJ_SLEEPQ_SORTED) != 0) {
                lwp_t *l2, *l_last = NULL;
                const pri_t pri = lwp_eprio(l);

                LIST_FOREACH(l2, sq, l_sleepchain) {
                        l_last = l2;
                        if (lwp_eprio(l2) < pri) {
                                LIST_INSERT_BEFORE(l2, l, l_sleepchain);
                                return;
                        }
                }
                /*
                 * Ensure FIFO ordering if no waiters are of lower priority.
                 */
                if (l_last != NULL) {
                        LIST_INSERT_AFTER(l_last, l, l_sleepchain);
                        return;
                }
        }

        LIST_INSERT_HEAD(sq, l, l_sleepchain);
}

/*
 * sleepq_enqueue:
 *
 *        Enter an LWP into the sleep queue and prepare for sleep.  The sleep
 *        queue must already be locked, and any interlock (such as the kernel
 *        lock) must have be released (see sleeptab_lookup(), sleepq_enter()).
 */
void
sleepq_enqueue(sleepq_t *sq, wchan_t wchan, const char *wmesg, syncobj_t *sobj,
    bool catch_p)
{
        lwp_t *l = curlwp;

        KASSERT(lwp_locked(l, NULL));
        KASSERT(l->l_stat == LSONPROC);
        KASSERT(l->l_wchan == NULL && l->l_sleepq == NULL);
        KASSERT((l->l_flag & LW_SINTR) == 0);

        l->l_syncobj = sobj;
        l->l_wchan = wchan;
        l->l_sleepq = sq;
        l->l_wmesg = wmesg;
        l->l_slptime = 0;
        l->l_stat = LSSLEEP;
        if (catch_p)
                l->l_flag |= LW_SINTR;

        sleepq_insert(sq, l, sobj);

        /* Save the time when thread has slept */
        l->l_slpticks = getticks();
        sched_slept(l);
}

/*
 * sleepq_transfer:
 *
 *        Move an LWP from one sleep queue to another.  Both sleep queues
 *        must already be locked.
 *
 *        The LWP will be updated with the new sleepq, wchan, wmesg,
 *        sobj, and mutex.  The interruptible flag will also be updated.
 */
void
sleepq_transfer(lwp_t *l, sleepq_t *from_sq, sleepq_t *sq, wchan_t wchan,
    const char *wmesg, syncobj_t *sobj, kmutex_t *mp, bool catch_p)
{

        KASSERT(l->l_sleepq == from_sq);

        LIST_REMOVE(l, l_sleepchain);
        l->l_syncobj = sobj;
        l->l_wchan = wchan;
        l->l_sleepq = sq;
        l->l_wmesg = wmesg;

        if (catch_p)
                l->l_flag = LW_SINTR | LW_CATCHINTR;
        else
                l->l_flag = ~(LW_SINTR | LW_CATCHINTR);

        /*
         * This allows the transfer from one sleepq to another where
         * it is known that they're both protected by the same lock.
         */
        if (mp != NULL)
                lwp_setlock(l, mp);

        sleepq_insert(sq, l, sobj);
}

/*
 * sleepq_uncatch:
 *
 *        Mark the LWP as no longer sleeping interruptibly.
 */
void
sleepq_uncatch(lwp_t *l)
{
        l->l_flag = ~(LW_SINTR | LW_CATCHINTR);
}

/*
 * sleepq_block:
 *
 *        After any intermediate step such as releasing an interlock, switch.
 *         sleepq_block() may return early under exceptional conditions, for
 *         example if the LWP's containing process is exiting.
 *
 *        timo is a timeout in ticks.  timo = 0 specifies an infinite timeout.
 */
int
sleepq_block(int timo, bool catch_p, struct syncobj *syncobj)
{
        int error = 0, sig;
        struct proc *p;
        lwp_t *l = curlwp;
        bool early = false;
        int biglocks = l->l_biglocks;

        ktrcsw(1, 0, syncobj);

        /*
         * If sleeping interruptably, check for pending signals, exits or
         * core dump events.
         *
         * Note the usage of LW_CATCHINTR.  This expresses our intent
         * to catch or not catch sleep interruptions, which might change
         * while we are sleeping.  It is independent from LW_SINTR because
         * we don't want to leave LW_SINTR set when the LWP is not asleep.
         */
        if (catch_p) {
                if ((l->l_flag & (LW_CANCELLED|LW_WEXIT|LW_WCORE)) != 0) {
                        l->l_flag &= ~LW_CANCELLED;
                        error = EINTR;
                        early = true;
                } else if ((l->l_flag & LW_PENDSIG) != 0 && sigispending(l, 0))
                        early = true;
                l->l_flag |= LW_CATCHINTR;
        } else
                l->l_flag &= ~LW_CATCHINTR;

        if (early) {
                /* lwp_unsleep() will release the lock */
                lwp_unsleep(l, true);
        } else {
                /*
                 * The LWP may have already been awoken if the caller
                 * dropped the sleep queue lock between sleepq_enqueue() and
                 * sleepq_block().  If that happens l_stat will be LSONPROC
                 * and mi_switch() will treat this as a preemption.  No need
                 * to do anything special here.
                 */
                if (timo) {
                        l->l_flag &= ~LW_STIMO;
                        callout_schedule(&l->l_timeout_ch, timo);
                }
                spc_lock(l->l_cpu);
                mi_switch(l);

                /* The LWP and sleep queue are now unlocked. */
                if (timo) {
                        /*
                         * Even if the callout appears to have fired, we
                         * need to stop it in order to synchronise with
                         * other CPUs.  It's important that we do this in
                         * this LWP's context, and not during wakeup, in
                         * order to keep the callout & its cache lines
                         * co-located on the CPU with the LWP.
                         */
                        (void)callout_halt(&l->l_timeout_ch, NULL);
                        error = (l->l_flag & LW_STIMO) ? EWOULDBLOCK : 0;
                }
        }

        /*
         * LW_CATCHINTR is only modified in this function OR when we
         * are asleep (with the sleepq locked).  We can therefore safely
         * test it unlocked here as it is guaranteed to be stable by
         * virtue of us running.
         *
         * We do not bother clearing it if set; that would require us
         * to take the LWP lock, and it doesn't seem worth the hassle
         * considering it is only meaningful here inside this function,
         * and is set to reflect intent upon entry.
         */
        if ((l->l_flag & LW_CATCHINTR) != 0 && error == 0) {
                p = l->l_proc;
                if ((l->l_flag & (LW_CANCELLED | LW_WEXIT | LW_WCORE)) != 0)
                        error = EINTR;
                else if ((l->l_flag & LW_PENDSIG) != 0) {
                        /*
                         * Acquiring p_lock may cause us to recurse
                         * through the sleep path and back into this
                         * routine, but is safe because LWPs sleeping
                         * on locks are non-interruptable and we will
                         * not recurse again.
                         */
                        mutex_enter(p->p_lock);
                        if (((sig = sigispending(l, 0)) != 0 &&
                            (sigprop[sig] & SA_STOP) == 0) ||
                            (sig = issignal(l)) != 0)
                                error = sleepq_sigtoerror(l, sig);
                        mutex_exit(p->p_lock);
                }
        }

        ktrcsw(0, 0, syncobj);
        if (__predict_false(biglocks != 0)) {
                KERNEL_LOCK(biglocks, NULL);
        }
        return error;
}

/*
 * sleepq_wake:
 *
 *        Wake zero or more LWPs blocked on a single wait channel.
 */
void
sleepq_wake(sleepq_t *sq, wchan_t wchan, u_int expected, kmutex_t *mp)
{
        lwp_t *l, *next;

        KASSERT(mutex_owned(mp));

        for (l = LIST_FIRST(sq); l != NULL; l = next) {
                KASSERT(l->l_sleepq == sq);
                KASSERT(l->l_mutex == mp);
                next = LIST_NEXT(l, l_sleepchain);
                if (l->l_wchan != wchan)
                        continue;
                sleepq_remove(sq, l);
                if (--expected == 0)
                        break;
        }

        mutex_spin_exit(mp);
}

/*
 * sleepq_unsleep:
 *
 *        Remove an LWP from its sleep queue and set it runnable again. 
 *        sleepq_unsleep() is called with the LWP's mutex held, and will
 *        release it if "unlock" is true.
 */
void
sleepq_unsleep(lwp_t *l, bool unlock)
{
        sleepq_t *sq = l->l_sleepq;
        kmutex_t *mp = l->l_mutex;

        KASSERT(lwp_locked(l, mp));
        KASSERT(l->l_wchan != NULL);

        sleepq_remove(sq, l);
        if (unlock) {
                mutex_spin_exit(mp);
        }
}

/*
 * sleepq_timeout:
 *
 *        Entered via the callout(9) subsystem to time out an LWP that is on a
 *        sleep queue.
 */
void
sleepq_timeout(void *arg)
{
        lwp_t *l = arg;

        /*
         * Lock the LWP.  Assuming it's still on the sleep queue, its
         * current mutex will also be the sleep queue mutex.
         */
        lwp_lock(l);

        if (l->l_wchan == NULL) {
                /* Somebody beat us to it. */
                lwp_unlock(l);
                return;
        }

        l->l_flag |= LW_STIMO;
        lwp_unsleep(l, true);
}

/*
 * sleepq_sigtoerror:
 *
 *        Given a signal number, interpret and return an error code.
 */
static int
sleepq_sigtoerror(lwp_t *l, int sig)
{
        struct proc *p = l->l_proc;
        int error;

        KASSERT(mutex_owned(p->p_lock));

        /*
         * If this sleep was canceled, don't let the syscall restart.
         */
        if ((SIGACTION(p, sig).sa_flags & SA_RESTART) == 0)
                error = EINTR;
        else
                error = ERESTART;

        return error;
}

/*
 * sleepq_abort:
 *
 *        After a panic or during autoconfiguration, lower the interrupt
 *        priority level to give pending interrupts a chance to run, and
 *        then return.  Called if sleepq_dontsleep() returns non-zero, and
 *        always returns zero.
 */
int
sleepq_abort(kmutex_t *mtx, int unlock)
{ 
        int s;

        s = splhigh();
        splx(IPL_SAFEPRI);
        splx(s);
        if (mtx != NULL && unlock != 0)
                mutex_exit(mtx);

        return 0;
}

/*
 * sleepq_reinsert:
 *
 *        Move the position of the lwp in the sleep queue after a possible
 *        change of the lwp's effective priority.
 */
static void
sleepq_reinsert(sleepq_t *sq, lwp_t *l)
{

        KASSERT(l->l_sleepq == sq);
        if ((l->l_syncobj->sobj_flag & SOBJ_SLEEPQ_SORTED) == 0) { 
                return;
        }

        /*
         * Don't let the sleep queue become empty, even briefly.
         * cv_signal() and cv_broadcast() inspect it without the
         * sleep queue lock held and need to see a non-empty queue
         * head if there are waiters.
         */
        if (LIST_FIRST(sq) == l && LIST_NEXT(l, l_sleepchain) == NULL) {
                return;
        }
        LIST_REMOVE(l, l_sleepchain);
        sleepq_insert(sq, l, l->l_syncobj);
}

/*
 * sleepq_changepri:
 *
 *        Adjust the priority of an LWP residing on a sleepq.
 */
void
sleepq_changepri(lwp_t *l, pri_t pri)
{
        sleepq_t *sq = l->l_sleepq;

        KASSERT(lwp_locked(l, NULL));

        l->l_priority = pri;
        sleepq_reinsert(sq, l);
}

/*
 * sleepq_changepri:
 *
 *        Adjust the lended priority of an LWP residing on a sleepq.
 */
void
sleepq_lendpri(lwp_t *l, pri_t pri)
{
        sleepq_t *sq = l->l_sleepq;

        KASSERT(lwp_locked(l, NULL));

        l->l_inheritedprio = pri;
        l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio);
        sleepq_reinsert(sq, l);
}



























































































    3 








    3 


    2 
    1 












































































































































































































































































































    4 
























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
/*        $NetBSD: tmpfs_vfsops.c,v 1.77 2020/04/04 20:49:30 ad Exp $        */

/*
 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
 * 2005 program.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Efficient memory file system.
 *
 * tmpfs is a file system that uses NetBSD's virtual memory sub-system
 * (the well-known UVM) to store file data and metadata in an efficient
 * way.  This means that it does not follow the structure of an on-disk
 * file system because it simply does not need to.  Instead, it uses
 * memory-specific data structures and algorithms to automatically
 * allocate and release resources.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.77 2020/04/04 20:49:30 ad Exp $");

#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/types.h>
#include <sys/kmem.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/systm.h>
#include <sys/vnode.h>
#include <sys/kauth.h>
#include <sys/module.h>

#include <miscfs/genfs/genfs.h>
#include <fs/tmpfs/tmpfs.h>
#include <fs/tmpfs/tmpfs_args.h>

MODULE(MODULE_CLASS_VFS, tmpfs, NULL);

struct pool        tmpfs_dirent_pool;
struct pool        tmpfs_node_pool;

void
tmpfs_init(void)
{

        pool_init(&tmpfs_dirent_pool, sizeof(tmpfs_dirent_t), 0, 0, 0,
            "tmpfs_dirent", &pool_allocator_nointr, IPL_NONE);
        pool_init(&tmpfs_node_pool, sizeof(tmpfs_node_t), 0, 0, 0,
            "tmpfs_node", &pool_allocator_nointr, IPL_NONE);
}

void
tmpfs_done(void)
{

        pool_destroy(&tmpfs_dirent_pool);
        pool_destroy(&tmpfs_node_pool);
}

int
tmpfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct tmpfs_args *args = data;
        tmpfs_mount_t *tmp;
        tmpfs_node_t *root;
        struct vattr va;
        struct vnode *vp;
        uint64_t memlimit;
        ino_t nodes;
        int error, flags;
        bool set_memlimit;
        bool set_nodes;

        if (args == NULL)
                return EINVAL;

        /* Validate the version. */
        if (*data_len < sizeof(*args) ||
            args->ta_version != TMPFS_ARGS_VERSION)
                return EINVAL;

        /* Handle retrieval of mount point arguments. */
        if (mp->mnt_flag & MNT_GETARGS) {
                if (mp->mnt_data == NULL)
                        return EIO;
                tmp = VFS_TO_TMPFS(mp);

                args->ta_version = TMPFS_ARGS_VERSION;
                args->ta_nodes_max = tmp->tm_nodes_max;
                args->ta_size_max = tmp->tm_mem_limit;

                root = tmp->tm_root;
                args->ta_root_uid = root->tn_uid;
                args->ta_root_gid = root->tn_gid;
                args->ta_root_mode = root->tn_mode;

                *data_len = sizeof(*args);
                return 0;
        }


        /* Prohibit mounts if there is not enough memory. */
        if (tmpfs_mem_info(true) < uvmexp.freetarg)
                return EINVAL;

        /* Check for invalid uid and gid arguments */
        if (args->ta_root_uid == VNOVAL || args->ta_root_gid == VNOVAL)
                return EINVAL;

        /* Get the memory usage limit for this file-system. */
        if (args->ta_size_max < PAGE_SIZE) {
                memlimit = UINT64_MAX;
                set_memlimit = false;
        } else {
                memlimit = args->ta_size_max;
                set_memlimit = true;
        }
        KASSERT(memlimit > 0);

        if (args->ta_nodes_max <= 3) {
                nodes = 3 + (memlimit / 1024);
                set_nodes = false;
        } else {
                nodes = args->ta_nodes_max;
                set_nodes = true;
        }
        nodes = MIN(nodes, INT_MAX);
        KASSERT(nodes >= 3);

        if (mp->mnt_flag & MNT_UPDATE) {
                tmp = VFS_TO_TMPFS(mp);
                if (set_nodes && nodes < tmp->tm_nodes_cnt)
                        return EBUSY;
                if ((mp->mnt_iflag & IMNT_WANTRDONLY)) {
                        /* Changing from read/write to read-only. */
                        flags = WRITECLOSE;
                        if ((mp->mnt_flag & MNT_FORCE))
                                flags |= FORCECLOSE;
                        error = vflush(mp, NULL, flags);
                        if (error)
                                return error;
                }
                if (set_memlimit) {
                        if ((error = tmpfs_mntmem_set(tmp, memlimit)) != 0)
                                return error;
                }
                if (set_nodes)
                        tmp->tm_nodes_max = nodes;
                root = tmp->tm_root;
                root->tn_uid = args->ta_root_uid;
                root->tn_gid = args->ta_root_gid;
                root->tn_mode = args->ta_root_mode;
                return 0;
        }

        mp->mnt_flag |= MNT_LOCAL;
        mp->mnt_stat.f_namemax = TMPFS_MAXNAMLEN;
        mp->mnt_fs_bshift = PAGE_SHIFT;
        mp->mnt_dev_bshift = DEV_BSHIFT;
        mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO | IMNT_SHRLOOKUP |
            IMNT_NCLOOKUP;
        vfs_getnewfsid(mp);

        /* Allocate the tmpfs mount structure and fill it. */
        tmp = kmem_zalloc(sizeof(tmpfs_mount_t), KM_SLEEP);
        tmp->tm_nodes_max = nodes;
        tmp->tm_nodes_cnt = 0;
        LIST_INIT(&tmp->tm_nodes);

        mutex_init(&tmp->tm_lock, MUTEX_DEFAULT, IPL_NONE);
        tmpfs_mntmem_init(tmp, memlimit);
        mp->mnt_data = tmp;

        /* Allocate the root node. */
        vattr_null(&va);
        va.va_type = VDIR;
        va.va_mode = args->ta_root_mode & ALLPERMS;
        va.va_uid = args->ta_root_uid;
        va.va_gid = args->ta_root_gid;
        error = vcache_new(mp, NULL, &va, NOCRED, NULL, &vp);
        if (error) {
                mp->mnt_data = NULL;
                tmpfs_mntmem_destroy(tmp);
                mutex_destroy(&tmp->tm_lock);
                kmem_free(tmp, sizeof(*tmp));
                return error;
        }
        KASSERT(vp != NULL);
        root = VP_TO_TMPFS_NODE(vp);
        KASSERT(root != NULL);

        /*
         * Parent of the root inode is itself.  Also, root inode has no
         * directory entry (i.e. is never attached), thus hold an extra
         * reference (link) for it.
         */
        root->tn_links++;
        root->tn_spec.tn_dir.tn_parent = root;
        tmp->tm_root = root;
        vrele(vp);

        error = set_statvfs_info(path, UIO_USERSPACE, "tmpfs", UIO_SYSSPACE,
            mp->mnt_op->vfs_name, mp, curlwp);
        if (error) {
                (void)tmpfs_unmount(mp, MNT_FORCE);
        }
        return error;
}

int
tmpfs_start(struct mount *mp, int flags)
{

        return 0;
}

int
tmpfs_unmount(struct mount *mp, int mntflags)
{
        tmpfs_mount_t *tmp = VFS_TO_TMPFS(mp);
        tmpfs_node_t *node, *cnode;
        int error, flags = 0;

        /* Handle forced unmounts. */
        if (mntflags & MNT_FORCE)
                flags |= FORCECLOSE;

        /* Finalize all pending I/O. */
        error = vflush(mp, NULL, flags);
        if (error != 0)
                return error;

        /*
         * First round, detach and destroy all directory entries.
         * Also, clear the pointers to the vnodes - they are gone.
         */
        LIST_FOREACH(node, &tmp->tm_nodes, tn_entries) {
                tmpfs_dirent_t *de;

                node->tn_vnode = NULL;
                if (node->tn_type != VDIR) {
                        continue;
                }
                while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) {
                        cnode = de->td_node;
                        if (cnode && cnode != TMPFS_NODE_WHITEOUT) {
                                cnode->tn_vnode = NULL;
                        }
                        tmpfs_dir_detach(node, de);
                        tmpfs_free_dirent(tmp, de);
                }
                /* Extra virtual entry (itself for the root). */
                node->tn_links--;
        }

        /* Release the reference on root (diagnostic). */
        node = tmp->tm_root;
        node->tn_links--;

        /* Second round, destroy all inodes. */
        while ((node = LIST_FIRST(&tmp->tm_nodes)) != NULL) {
                tmpfs_free_node(tmp, node);
        }

        /* Throw away the tmpfs_mount structure. */
        tmpfs_mntmem_destroy(tmp);
        mutex_destroy(&tmp->tm_lock);
        kmem_free(tmp, sizeof(*tmp));
        mp->mnt_data = NULL;

        return 0;
}

int
tmpfs_root(struct mount *mp, int lktype, vnode_t **vpp)
{
        tmpfs_node_t *node = VFS_TO_TMPFS(mp)->tm_root;
        int error;

        error = vcache_get(mp, &node, sizeof(node), vpp);
        if (error)
                return error;
        error = vn_lock(*vpp, lktype);
        if (error) {
                vrele(*vpp);
                *vpp = NULL;
                return error;
        }

        return 0;
}

int
tmpfs_vget(struct mount *mp, ino_t ino, int lktype, vnode_t **vpp)
{

        return EOPNOTSUPP;
}

int
tmpfs_fhtovp(struct mount *mp, struct fid *fhp, int lktype, vnode_t **vpp)
{
        tmpfs_mount_t *tmp = VFS_TO_TMPFS(mp);
        tmpfs_node_t *node;
        tmpfs_fid_t tfh;
        int error;

        if (fhp->fid_len != sizeof(tmpfs_fid_t)) {
                return EINVAL;
        }
        memcpy(&tfh, fhp, sizeof(tmpfs_fid_t));

        mutex_enter(&tmp->tm_lock);
        /* XXX big oof .. use a better data structure */
        LIST_FOREACH(node, &tmp->tm_nodes, tn_entries) {
                if (node->tn_id == tfh.tf_id) {
                        /* Prevent this node from disappearing. */
                        atomic_inc_32(&node->tn_holdcount);
                        break;
                }
        }
        mutex_exit(&tmp->tm_lock);
        if (node == NULL)
                return ESTALE;

        error = vcache_get(mp, &node, sizeof(node), vpp);
        /* If this node has been reclaimed free it now. */
        if (atomic_dec_32_nv(&node->tn_holdcount) == TMPFS_NODE_RECLAIMED) {
                KASSERT(error != 0);
                tmpfs_free_node(tmp, node);
        }
        if (error)
                return (error == ENOENT ? ESTALE : error);
        error = vn_lock(*vpp, lktype);
        if (error) {
                vrele(*vpp);
                *vpp = NULL;
                return error;
        }
        if (TMPFS_NODE_GEN(node) != tfh.tf_gen) {
                vput(*vpp);
                *vpp = NULL;
                return ESTALE;
        }

        return 0;
}

int
tmpfs_vptofh(vnode_t *vp, struct fid *fhp, size_t *fh_size)
{
        tmpfs_fid_t tfh;
        tmpfs_node_t *node;

        if (*fh_size < sizeof(tmpfs_fid_t)) {
                *fh_size = sizeof(tmpfs_fid_t);
                return E2BIG;
        }
        *fh_size = sizeof(tmpfs_fid_t);
        node = VP_TO_TMPFS_NODE(vp);

        memset(&tfh, 0, sizeof(tfh));
        tfh.tf_len = sizeof(tmpfs_fid_t);
        tfh.tf_gen = TMPFS_NODE_GEN(node);
        tfh.tf_id = node->tn_id;
        memcpy(fhp, &tfh, sizeof(tfh));

        return 0;
}

int
tmpfs_statvfs(struct mount *mp, struct statvfs *sbp)
{
        tmpfs_mount_t *tmp;
        fsfilcnt_t freenodes;
        size_t avail;

        tmp = VFS_TO_TMPFS(mp);

        sbp->f_iosize = sbp->f_frsize = sbp->f_bsize = PAGE_SIZE;

        mutex_enter(&tmp->tm_acc_lock);
        avail =  tmpfs_pages_avail(tmp);
        sbp->f_blocks = (tmpfs_bytes_max(tmp) >> PAGE_SHIFT);
        sbp->f_bavail = sbp->f_bfree = avail;
        sbp->f_bresvd = 0;

        freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_cnt,
            avail * PAGE_SIZE / sizeof(tmpfs_node_t));

        sbp->f_files = tmp->tm_nodes_cnt + freenodes;
        sbp->f_favail = sbp->f_ffree = freenodes;
        sbp->f_fresvd = 0;
        mutex_exit(&tmp->tm_acc_lock);

        copy_statvfs_info(sbp, mp);

        return 0;
}

int
tmpfs_sync(struct mount *mp, int waitfor, kauth_cred_t uc)
{

        return 0;
}

int
tmpfs_snapshot(struct mount *mp, vnode_t *vp, struct timespec *ctime)
{

        return EOPNOTSUPP;
}

/*
 * tmpfs vfs operations.
 */

extern const struct vnodeopv_desc tmpfs_fifoop_opv_desc;
extern const struct vnodeopv_desc tmpfs_specop_opv_desc;
extern const struct vnodeopv_desc tmpfs_vnodeop_opv_desc;

const struct vnodeopv_desc * const tmpfs_vnodeopv_descs[] = {
        &tmpfs_fifoop_opv_desc,
        &tmpfs_specop_opv_desc,
        &tmpfs_vnodeop_opv_desc,
        NULL,
};

struct vfsops tmpfs_vfsops = {
        .vfs_name = MOUNT_TMPFS,
        .vfs_min_mount_data = sizeof (struct tmpfs_args),
        .vfs_mount = tmpfs_mount,
        .vfs_start = tmpfs_start,
        .vfs_unmount = tmpfs_unmount,
        .vfs_root = tmpfs_root,
        .vfs_quotactl = (void *)eopnotsupp,
        .vfs_statvfs = tmpfs_statvfs,
        .vfs_sync = tmpfs_sync,
        .vfs_vget = tmpfs_vget,
        .vfs_loadvnode = tmpfs_loadvnode,
        .vfs_newvnode = tmpfs_newvnode,
        .vfs_fhtovp = tmpfs_fhtovp,
        .vfs_vptofh = tmpfs_vptofh,
        .vfs_init = tmpfs_init,
        .vfs_done = tmpfs_done,
        .vfs_snapshot = tmpfs_snapshot,
        .vfs_extattrctl = vfs_stdextattrctl,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_renamelock_enter = genfs_renamelock_enter,
        .vfs_renamelock_exit = genfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = tmpfs_vnodeopv_descs
};

static int
tmpfs_modcmd(modcmd_t cmd, void *arg)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                return vfs_attach(&tmpfs_vfsops);
        case MODULE_CMD_FINI:
                return vfs_detach(&tmpfs_vfsops);
        default:
                return ENOTTY;
        }
}


















































































































































































































































































































































































































































































































  276 














    1 





    1 







  278 

  278 



  275 












  276 







  241 



  238 















  236 





   45 






   44 


   45 








































   17 



   17 
   17 





   17 
   17 



   16 



   17 



   17 


   17 
   17 






   18 












   15 







   18 


   18 

   18 


    6 

    6 

    2 




   18 
   18 
   18 
    2 





   18 





   17 
    6 



    5 


   17 


   13 
   13 
   13 














    8 




   17 
   17 



   16 
   17 


   17 


   17 















    6 



    6 





    5 


    1 
    1 

    5 


    5 

    1 




    1 




    1 
    1 

    1 


    1 





    5 











   27 
   27 

   27 
   15 


   15 

    3 


   27 
   27 




   27 









































   10 

   10 



    2 
    1 




    1 




    2 
    1 








    9 


    9 




    9 
    2 

    2 



    7 










    7 

    7 
    7 


    1 



    7 















    5 








    5 
    5 







    5 
    4 

    5 




    4 
    4 


    4 

    4 









    4 
    4 

    4 











    5 


    5 
    4 

    4 

    4 




















    5 

    5 





    5 
    5 

    3 



    5 






    5 






    5 







































    2 







    1 
    1 








    1 






    1 

















    1 
    1 
    1 










    1 












    4 
    5 















































































































































































































   18 









   17 



   18 










   19 
   19 

   19 









   19 






































   37 
   36 
   37 
   37 


   36 










   36 
    3 




   13 






   13 




   13 
   13 
   13 

   13 























    3 


   37 
   35 
   37 







   37 




































































   36 

   36 


   35 





   13 







   37 
   13 
   13 


   36 


























































































































   11 
   11 

   11 

   11 










   21 
   21 

   21 

   21 















  138 
  139 

  139 

  138 
  137 
















































































   29 




   29 
   28 
   28 


   28 



   28 
   26 




    3 



    3 





    2 









   26 


   26 
   14 

   12 








   12 




   28 















  222 


  157 

  221 


   29 

  210 


  212 




  218 





  169 


  162 













































































































































































































































































































































  188 

  188 












    6 

    6 

    5 





























   11 

   11 












   11 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
/*        $NetBSD: uipc_mbuf.c,v 1.246 2022/04/09 23:38:33 riastradh Exp $        */

/*
 * Copyright (c) 1999, 2001, 2018 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, and Maxime Villard.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)uipc_mbuf.c        8.4 (Berkeley) 2/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.246 2022/04/09 23:38:33 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_mbuftrace.h"
#include "opt_nmbclusters.h"
#include "opt_ddb.h"
#include "ether.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/atomic.h>
#include <sys/cpu.h>
#include <sys/proc.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/percpu.h>
#include <sys/pool.h>
#include <sys/socket.h>
#include <sys/sysctl.h>

#include <net/if.h>

pool_cache_t mb_cache;        /* mbuf cache */
static pool_cache_t mcl_cache;        /* mbuf cluster cache */

struct mbstat mbstat;
int max_linkhdr;
int max_protohdr;
int max_hdr;
int max_datalen;

static void mb_drain(void *, int);
static int mb_ctor(void *, void *, int);

static void sysctl_kern_mbuf_setup(void);

static struct sysctllog *mbuf_sysctllog;

static struct mbuf *m_copy_internal(struct mbuf *, int, int, int, bool);
static struct mbuf *m_split_internal(struct mbuf *, int, int, bool);
static int m_copyback_internal(struct mbuf **, int, int, const void *,
    int, int);

/* Flags for m_copyback_internal. */
#define        CB_COPYBACK        0x0001        /* copyback from cp */
#define        CB_PRESERVE        0x0002        /* preserve original data */
#define        CB_COW                0x0004        /* do copy-on-write */
#define        CB_EXTEND        0x0008        /* extend chain */

static const char mclpool_warnmsg[] =
    "WARNING: mclpool limit reached; increase kern.mbuf.nmbclusters";

MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");

static percpu_t *mbstat_percpu;

#ifdef MBUFTRACE
struct mownerhead mowners = LIST_HEAD_INITIALIZER(mowners);
struct mowner unknown_mowners[] = {
        MOWNER_INIT("unknown", "free"),
        MOWNER_INIT("unknown", "data"),
        MOWNER_INIT("unknown", "header"),
        MOWNER_INIT("unknown", "soname"),
        MOWNER_INIT("unknown", "soopts"),
        MOWNER_INIT("unknown", "ftable"),
        MOWNER_INIT("unknown", "control"),
        MOWNER_INIT("unknown", "oobdata"),
};
struct mowner revoked_mowner = MOWNER_INIT("revoked", "");
#endif

#define        MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m))

#define        MCLADDREFERENCE(o, n)                                                \
do {                                                                        \
        KASSERT(((o)->m_flags & M_EXT) != 0);                                \
        KASSERT(((n)->m_flags & M_EXT) == 0);                                \
        KASSERT((o)->m_ext.ext_refcnt >= 1);                                \
        (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS);                \
        atomic_inc_uint(&(o)->m_ext.ext_refcnt);                        \
        (n)->m_ext_ref = (o)->m_ext_ref;                                \
        mowner_ref((n), (n)->m_flags);                                        \
} while (/* CONSTCOND */ 0)

static int
nmbclusters_limit(void)
{
#if defined(PMAP_MAP_POOLPAGE)
        /* direct mapping, doesn't use space in kmem_arena */
        vsize_t max_size = physmem / 4;
#else
        vsize_t max_size = MIN(physmem / 4, nkmempages / 4);
#endif

        max_size = max_size * PAGE_SIZE / MCLBYTES;
#ifdef NMBCLUSTERS_MAX
        max_size = MIN(max_size, NMBCLUSTERS_MAX);
#endif

        return max_size;
}

/*
 * Initialize the mbuf allocator.
 */
void
mbinit(void)
{

        CTASSERT(sizeof(struct _m_ext) <= MHLEN);
        CTASSERT(sizeof(struct mbuf) == MSIZE);

        sysctl_kern_mbuf_setup();

        mb_cache = pool_cache_init(msize, 0, 0, 0, "mbpl",
            NULL, IPL_VM, mb_ctor, NULL, NULL);
        KASSERT(mb_cache != NULL);

        mcl_cache = pool_cache_init(mclbytes, COHERENCY_UNIT, 0, 0, "mclpl",
            NULL, IPL_VM, NULL, NULL, NULL);
        KASSERT(mcl_cache != NULL);

        pool_cache_set_drain_hook(mb_cache, mb_drain, NULL);
        pool_cache_set_drain_hook(mcl_cache, mb_drain, NULL);

        /*
         * Set an arbitrary default limit on the number of mbuf clusters.
         */
#ifdef NMBCLUSTERS
        nmbclusters = MIN(NMBCLUSTERS, nmbclusters_limit());
#else
        nmbclusters = MAX(1024,
            (vsize_t)physmem * PAGE_SIZE / MCLBYTES / 16);
        nmbclusters = MIN(nmbclusters, nmbclusters_limit());
#endif

        /*
         * Set the hard limit on the mclpool to the number of
         * mbuf clusters the kernel is to support.  Log the limit
         * reached message max once a minute.
         */
        pool_cache_sethardlimit(mcl_cache, nmbclusters, mclpool_warnmsg, 60);

        mbstat_percpu = percpu_alloc(sizeof(struct mbstat_cpu));

        /*
         * Set a low water mark for both mbufs and clusters.  This should
         * help ensure that they can be allocated in a memory starvation
         * situation.  This is important for e.g. diskless systems which
         * must allocate mbufs in order for the pagedaemon to clean pages.
         */
        pool_cache_setlowat(mb_cache, mblowat);
        pool_cache_setlowat(mcl_cache, mcllowat);

#ifdef MBUFTRACE
        {
                /*
                 * Attach the unknown mowners.
                 */
                int i;
                MOWNER_ATTACH(&revoked_mowner);
                for (i = sizeof(unknown_mowners)/sizeof(unknown_mowners[0]);
                     i-- > 0; )
                        MOWNER_ATTACH(&unknown_mowners[i]);
        }
#endif
}

static void
mb_drain(void *arg, int flags)
{
        struct domain *dp;
        const struct protosw *pr;
        struct ifnet *ifp;
        int s;

        KERNEL_LOCK(1, NULL);
        s = splvm();
        DOMAIN_FOREACH(dp) {
                for (pr = dp->dom_protosw;
                     pr < dp->dom_protoswNPROTOSW; pr++)
                        if (pr->pr_drain)
                                (*pr->pr_drain)();
        }
        /* XXX we cannot use psref in H/W interrupt */
        if (!cpu_intr_p()) {
                int bound = curlwp_bind();
                IFNET_READER_FOREACH(ifp) {
                        struct psref psref;

                        if_acquire(ifp, &psref);

                        if (ifp->if_drain)
                                (*ifp->if_drain)(ifp);

                        if_release(ifp, &psref);
                }
                curlwp_bindx(bound);
        }
        splx(s);
        mbstat.m_drain++;
        KERNEL_UNLOCK_ONE(NULL);
}

/*
 * sysctl helper routine for the kern.mbuf subtree.
 * nmbclusters, mblowat and mcllowat need range
 * checking and pool tweaking after being reset.
 */
static int
sysctl_kern_mbuf(SYSCTLFN_ARGS)
{
        int error, newval;
        struct sysctlnode node;

        node = *rnode;
        node.sysctl_data = &newval;
        switch (rnode->sysctl_num) {
        case MBUF_NMBCLUSTERS:
        case MBUF_MBLOWAT:
        case MBUF_MCLLOWAT:
                newval = *(int*)rnode->sysctl_data;
                break;
        default:
                return EOPNOTSUPP;
        }

        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;
        if (newval < 0)
                return EINVAL;

        switch (node.sysctl_num) {
        case MBUF_NMBCLUSTERS:
                if (newval < nmbclusters)
                        return EINVAL;
                if (newval > nmbclusters_limit())
                        return EINVAL;
                nmbclusters = newval;
                pool_cache_sethardlimit(mcl_cache, nmbclusters,
                    mclpool_warnmsg, 60);
                break;
        case MBUF_MBLOWAT:
                mblowat = newval;
                pool_cache_setlowat(mb_cache, mblowat);
                break;
        case MBUF_MCLLOWAT:
                mcllowat = newval;
                pool_cache_setlowat(mcl_cache, mcllowat);
                break;
        }

        return 0;
}

#ifdef MBUFTRACE
static void
mowner_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci)
{
        struct mowner_counter *mc = v1;
        struct mowner_user *mo_user = v2;
        int i;

        for (i = 0; i < MOWNER_COUNTER_NCOUNTERS; i++) {
                mo_user->mo_counter[i] += mc->mc_counter[i];
        }
}

static void
mowner_convert_to_user(struct mowner *mo, struct mowner_user *mo_user)
{

        memset(mo_user, 0, sizeof(*mo_user));
        CTASSERT(sizeof(mo_user->mo_name) == sizeof(mo->mo_name));
        CTASSERT(sizeof(mo_user->mo_descr) == sizeof(mo->mo_descr));
        memcpy(mo_user->mo_name, mo->mo_name, sizeof(mo->mo_name));
        memcpy(mo_user->mo_descr, mo->mo_descr, sizeof(mo->mo_descr));
        percpu_foreach(mo->mo_counters, mowner_convert_to_user_cb, mo_user);
}

static int
sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS)
{
        struct mowner *mo;
        size_t len = 0;
        int error = 0;

        if (namelen != 0)
                return EINVAL;
        if (newp != NULL)
                return EPERM;

        LIST_FOREACH(mo, &mowners, mo_link) {
                struct mowner_user mo_user;

                mowner_convert_to_user(mo, &mo_user);

                if (oldp != NULL) {
                        if (*oldlenp - len < sizeof(mo_user)) {
                                error = ENOMEM;
                                break;
                        }
                        error = copyout(&mo_user, (char *)oldp + len,
                            sizeof(mo_user));
                        if (error)
                                break;
                }
                len += sizeof(mo_user);
        }

        if (error == 0)
                *oldlenp = len;

        return error;
}
#endif /* MBUFTRACE */

void
mbstat_type_add(int type, int diff)
{
        struct mbstat_cpu *mb;
        int s;

        s = splvm();
        mb = percpu_getref(mbstat_percpu);
        mb->m_mtypes[type] += diff;
        percpu_putref(mbstat_percpu);
        splx(s);
}

static void
mbstat_convert_to_user_cb(void *v1, void *v2, struct cpu_info *ci)
{
        struct mbstat_cpu *mbsc = v1;
        struct mbstat *mbs = v2;
        int i;

        for (i = 0; i < __arraycount(mbs->m_mtypes); i++) {
                mbs->m_mtypes[i] += mbsc->m_mtypes[i];
        }
}

static void
mbstat_convert_to_user(struct mbstat *mbs)
{

        memset(mbs, 0, sizeof(*mbs));
        mbs->m_drain = mbstat.m_drain;
        percpu_foreach(mbstat_percpu, mbstat_convert_to_user_cb, mbs);
}

static int
sysctl_kern_mbuf_stats(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        struct mbstat mbs;

        mbstat_convert_to_user(&mbs);
        node = *rnode;
        node.sysctl_data = &mbs;
        node.sysctl_size = sizeof(mbs);
        return sysctl_lookup(SYSCTLFN_CALL(&node));
}

static void
sysctl_kern_mbuf_setup(void)
{

        KASSERT(mbuf_sysctllog == NULL);
        sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "mbuf",
                       SYSCTL_DESCR("mbuf control variables"),
                       NULL, 0, NULL, 0,
                       CTL_KERN, KERN_MBUF, CTL_EOL);

        sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "msize",
                       SYSCTL_DESCR("mbuf base size"),
                       NULL, msize, NULL, 0,
                       CTL_KERN, KERN_MBUF, MBUF_MSIZE, CTL_EOL);
        sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "mclbytes",
                       SYSCTL_DESCR("mbuf cluster size"),
                       NULL, mclbytes, NULL, 0,
                       CTL_KERN, KERN_MBUF, MBUF_MCLBYTES, CTL_EOL);
        sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nmbclusters",
                       SYSCTL_DESCR("Limit on the number of mbuf clusters"),
                       sysctl_kern_mbuf, 0, &nmbclusters, 0,
                       CTL_KERN, KERN_MBUF, MBUF_NMBCLUSTERS, CTL_EOL);
        sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "mblowat",
                       SYSCTL_DESCR("mbuf low water mark"),
                       sysctl_kern_mbuf, 0, &mblowat, 0,
                       CTL_KERN, KERN_MBUF, MBUF_MBLOWAT, CTL_EOL);
        sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "mcllowat",
                       SYSCTL_DESCR("mbuf cluster low water mark"),
                       sysctl_kern_mbuf, 0, &mcllowat, 0,
                       CTL_KERN, KERN_MBUF, MBUF_MCLLOWAT, CTL_EOL);
        sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "stats",
                       SYSCTL_DESCR("mbuf allocation statistics"),
                       sysctl_kern_mbuf_stats, 0, NULL, 0,
                       CTL_KERN, KERN_MBUF, MBUF_STATS, CTL_EOL);
#ifdef MBUFTRACE
        sysctl_createv(&mbuf_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "mowners",
                       SYSCTL_DESCR("Information about mbuf owners"),
                       sysctl_kern_mbuf_mowners, 0, NULL, 0,
                       CTL_KERN, KERN_MBUF, MBUF_MOWNERS, CTL_EOL);
#endif
}

static int
mb_ctor(void *arg, void *object, int flags)
{
        struct mbuf *m = object;

#ifdef POOL_VTOPHYS
        m->m_paddr = POOL_VTOPHYS(m);
#else
        m->m_paddr = M_PADDR_INVALID;
#endif
        return 0;
}

/*
 * Add mbuf to the end of a chain
 */
struct mbuf *
m_add(struct mbuf *c, struct mbuf *m)
{
        struct mbuf *n;

        if (c == NULL)
                return m;

        for (n = c; n->m_next != NULL; n = n->m_next)
                continue;
        n->m_next = m;
        return c;
}

struct mbuf *
m_get(int how, int type)
{
        struct mbuf *m;

        KASSERT(type != MT_FREE);

        m = pool_cache_get(mb_cache,
            how == M_WAIT ? PR_WAITOK|PR_LIMITFAIL : PR_NOWAIT);
        if (m == NULL)
                return NULL;
        KASSERT(((vaddr_t)m->m_dat & PAGE_MASK) + MLEN <= PAGE_SIZE);

        mbstat_type_add(type, 1);

        mowner_init(m, type);
        m->m_ext_ref = m; /* default */
        m->m_type = type;
        m->m_len = 0;
        m->m_next = NULL;
        m->m_nextpkt = NULL; /* default */
        m->m_data = m->m_dat;
        m->m_flags = 0; /* default */

        return m;
}

struct mbuf *
m_gethdr(int how, int type)
{
        struct mbuf *m;

        m = m_get(how, type);
        if (m == NULL)
                return NULL;

        m->m_data = m->m_pktdat;
        m->m_flags = M_PKTHDR;

        m_reset_rcvif(m);
        m->m_pkthdr.len = 0;
        m->m_pkthdr.csum_flags = 0;
        m->m_pkthdr.csum_data = 0;
        m->m_pkthdr.segsz = 0;
        m->m_pkthdr.ether_vtag = 0;
        m->m_pkthdr.pkthdr_flags = 0;
        SLIST_INIT(&m->m_pkthdr.tags);

        m->m_pkthdr.pattr_class = NULL;
        m->m_pkthdr.pattr_af = AF_UNSPEC;
        m->m_pkthdr.pattr_hdr = NULL;

        return m;
}

void
m_clget(struct mbuf *m, int how)
{
        m->m_ext_storage.ext_buf = (char *)pool_cache_get_paddr(mcl_cache,
            how == M_WAIT ? (PR_WAITOK|PR_LIMITFAIL) : PR_NOWAIT,
            &m->m_ext_storage.ext_paddr);

        if (m->m_ext_storage.ext_buf == NULL)
                return;

        KASSERT(((vaddr_t)m->m_ext_storage.ext_buf & PAGE_MASK) + mclbytes
            <= PAGE_SIZE);

        MCLINITREFERENCE(m);
        m->m_data = m->m_ext.ext_buf;
        m->m_flags = (m->m_flags & ~M_EXTCOPYFLAGS) |
            M_EXT|M_EXT_CLUSTER|M_EXT_RW;
        m->m_ext.ext_size = MCLBYTES;
        m->m_ext.ext_free = NULL;
        m->m_ext.ext_arg = NULL;
        /* ext_paddr initialized above */

        mowner_ref(m, M_EXT|M_EXT_CLUSTER);
}

struct mbuf *
m_getcl(int how, int type, int flags)
{
        struct mbuf *mp;

        if ((flags & M_PKTHDR) != 0)
                mp = m_gethdr(how, type);
        else
                mp = m_get(how, type);

        if (mp == NULL)
                return NULL;

        MCLGET(mp, how);
        if ((mp->m_flags & M_EXT) != 0)
                return mp;

        m_free(mp);
        return NULL;
}

/*
 * Utility function for M_PREPEND. Do *NOT* use it directly.
 */
struct mbuf *
m_prepend(struct mbuf *m, int len, int how)
{
        struct mbuf *mn;

        if (__predict_false(len > MHLEN)) {
                panic("%s: len > MHLEN", __func__);
        }

        KASSERT(len != M_COPYALL);
        mn = m_get(how, m->m_type);
        if (mn == NULL) {
                m_freem(m);
                return NULL;
        }

        if (m->m_flags & M_PKTHDR) {
                m_move_pkthdr(mn, m);
        } else {
                MCLAIM(mn, m->m_owner);
        }
        mn->m_next = m;
        m = mn;

        if (m->m_flags & M_PKTHDR) {
                if (len < MHLEN)
                        m_align(m, len);
        } else {
                if (len < MLEN)
                        m_align(m, len);
        }

        m->m_len = len;
        return m;
}

struct mbuf *
m_copym(struct mbuf *m, int off, int len, int wait)
{
        /* Shallow copy on M_EXT. */
        return m_copy_internal(m, off, len, wait, false);
}

struct mbuf *
m_dup(struct mbuf *m, int off, int len, int wait)
{
        /* Deep copy. */
        return m_copy_internal(m, off, len, wait, true);
}

static inline int
m_copylen(int len, int copylen)
{
        return (len == M_COPYALL) ? copylen : uimin(len, copylen);
}

static struct mbuf *
m_copy_internal(struct mbuf *m, int off0, int len, int wait, bool deep)
{
        struct mbuf *n, **np;
        int off = off0;
        struct mbuf *top;
        int copyhdr = 0;

        if (off < 0 || (len != M_COPYALL && len < 0))
                panic("%s: off %d, len %d", __func__, off, len);
        if (off == 0 && m->m_flags & M_PKTHDR)
                copyhdr = 1;
        while (off > 0) {
                if (m == NULL)
                        panic("%s: m == NULL, off %d", __func__, off);
                if (off < m->m_len)
                        break;
                off -= m->m_len;
                m = m->m_next;
        }

        np = &top;
        top = NULL;
        while (len == M_COPYALL || len > 0) {
                if (m == NULL) {
                        if (len != M_COPYALL)
                                panic("%s: m == NULL, len %d [!COPYALL]",
                                    __func__, len);
                        break;
                }

                n = m_get(wait, m->m_type);
                *np = n;
                if (n == NULL)
                        goto nospace;
                MCLAIM(n, m->m_owner);

                if (copyhdr) {
                        m_copy_pkthdr(n, m);
                        if (len == M_COPYALL)
                                n->m_pkthdr.len -= off0;
                        else
                                n->m_pkthdr.len = len;
                        copyhdr = 0;
                }
                n->m_len = m_copylen(len, m->m_len - off);

                if (m->m_flags & M_EXT) {
                        if (!deep) {
                                n->m_data = m->m_data + off;
                                MCLADDREFERENCE(m, n);
                        } else {
                                /*
                                 * We don't care if MCLGET fails. n->m_len is
                                 * recomputed and handles that.
                                 */
                                MCLGET(n, wait);
                                n->m_len = 0;
                                n->m_len = M_TRAILINGSPACE(n);
                                n->m_len = m_copylen(len, n->m_len);
                                n->m_len = uimin(n->m_len, m->m_len - off);
                                memcpy(mtod(n, void *), mtod(m, char *) + off,
                                    (unsigned)n->m_len);
                        }
                } else {
                        memcpy(mtod(n, void *), mtod(m, char *) + off,
                            (unsigned)n->m_len);
                }

                if (len != M_COPYALL)
                        len -= n->m_len;
                off += n->m_len;

                KASSERT(off <= m->m_len);

                if (off == m->m_len) {
                        m = m->m_next;
                        off = 0;
                }
                np = &n->m_next;
        }

        return top;

nospace:
        m_freem(top);
        return NULL;
}

/*
 * Copy an entire packet, including header (which must be present).
 * An optimization of the common case 'm_copym(m, 0, M_COPYALL, how)'.
 */
struct mbuf *
m_copypacket(struct mbuf *m, int how)
{
        struct mbuf *top, *n, *o;

        if (__predict_false((m->m_flags & M_PKTHDR) == 0)) {
                panic("%s: no header (m = %p)", __func__, m);
        }

        n = m_get(how, m->m_type);
        top = n;
        if (!n)
                goto nospace;

        MCLAIM(n, m->m_owner);
        m_copy_pkthdr(n, m);
        n->m_len = m->m_len;
        if (m->m_flags & M_EXT) {
                n->m_data = m->m_data;
                MCLADDREFERENCE(m, n);
        } else {
                memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
        }

        m = m->m_next;
        while (m) {
                o = m_get(how, m->m_type);
                if (!o)
                        goto nospace;

                MCLAIM(o, m->m_owner);
                n->m_next = o;
                n = n->m_next;

                n->m_len = m->m_len;
                if (m->m_flags & M_EXT) {
                        n->m_data = m->m_data;
                        MCLADDREFERENCE(m, n);
                } else {
                        memcpy(mtod(n, char *), mtod(m, char *), n->m_len);
                }

                m = m->m_next;
        }
        return top;

nospace:
        m_freem(top);
        return NULL;
}

void
m_copydata(struct mbuf *m, int off, int len, void *cp)
{
        unsigned int count;
        struct mbuf *m0 = m;
        int len0 = len;
        int off0 = off;
        void *cp0 = cp;

        KASSERT(len != M_COPYALL);
        if (off < 0 || len < 0)
                panic("m_copydata: off %d, len %d", off, len);
        while (off > 0) {
                if (m == NULL)
                        panic("m_copydata(%p,%d,%d,%p): m=NULL, off=%d (%d)",
                            m0, len0, off0, cp0, off, off0 - off);
                if (off < m->m_len)
                        break;
                off -= m->m_len;
                m = m->m_next;
        }
        while (len > 0) {
                if (m == NULL)
                        panic("m_copydata(%p,%d,%d,%p): "
                            "m=NULL, off=%d (%d), len=%d (%d)",
                            m0, len0, off0, cp0,
                            off, off0 - off, len, len0 - len);
                count = uimin(m->m_len - off, len);
                memcpy(cp, mtod(m, char *) + off, count);
                len -= count;
                cp = (char *)cp + count;
                off = 0;
                m = m->m_next;
        }
}

/*
 * Concatenate mbuf chain n to m.
 * n might be copied into m (when n->m_len is small), therefore data portion of
 * n could be copied into an mbuf of different mbuf type.
 * Any m_pkthdr is not updated.
 */
void
m_cat(struct mbuf *m, struct mbuf *n)
{

        while (m->m_next)
                m = m->m_next;
        while (n) {
                if (M_READONLY(m) || n->m_len > M_TRAILINGSPACE(m)) {
                        /* just join the two chains */
                        m->m_next = n;
                        return;
                }
                /* splat the data from one into the other */
                memcpy(mtod(m, char *) + m->m_len, mtod(n, void *),
                    (u_int)n->m_len);
                m->m_len += n->m_len;
                n = m_free(n);
        }
}

void
m_adj(struct mbuf *mp, int req_len)
{
        int len = req_len;
        struct mbuf *m;
        int count;

        if ((m = mp) == NULL)
                return;
        if (len >= 0) {
                /*
                 * Trim from head.
                 */
                while (m != NULL && len > 0) {
                        if (m->m_len <= len) {
                                len -= m->m_len;
                                m->m_len = 0;
                                m = m->m_next;
                        } else {
                                m->m_len -= len;
                                m->m_data += len;
                                len = 0;
                        }
                }
                if (mp->m_flags & M_PKTHDR)
                        mp->m_pkthdr.len -= (req_len - len);
        } else {
                /*
                 * Trim from tail.  Scan the mbuf chain,
                 * calculating its length and finding the last mbuf.
                 * If the adjustment only affects this mbuf, then just
                 * adjust and return.  Otherwise, rescan and truncate
                 * after the remaining size.
                 */
                len = -len;
                count = 0;
                for (;;) {
                        count += m->m_len;
                        if (m->m_next == NULL)
                                break;
                        m = m->m_next;
                }
                if (m->m_len >= len) {
                        m->m_len -= len;
                        if (mp->m_flags & M_PKTHDR)
                                mp->m_pkthdr.len -= len;
                        return;
                }

                count -= len;
                if (count < 0)
                        count = 0;

                /*
                 * Correct length for chain is "count".
                 * Find the mbuf with last data, adjust its length,
                 * and toss data from remaining mbufs on chain.
                 */
                m = mp;
                if (m->m_flags & M_PKTHDR)
                        m->m_pkthdr.len = count;
                for (; m; m = m->m_next) {
                        if (m->m_len >= count) {
                                m->m_len = count;
                                break;
                        }
                        count -= m->m_len;
                }
                if (m) {
                        while (m->m_next)
                                (m = m->m_next)->m_len = 0;
                }
        }
}

/*
 * m_ensure_contig: rearrange an mbuf chain that given length of bytes
 * would be contiguous and in the data area of an mbuf (therefore, mtod()
 * would work for a structure of given length).
 *
 * => On success, returns true and the resulting mbuf chain; false otherwise.
 * => The mbuf chain may change, but is always preserved valid.
 */
bool
m_ensure_contig(struct mbuf **m0, int len)
{
        struct mbuf *n = *m0, *m;
        size_t count, space;

        KASSERT(len != M_COPYALL);
        /*
         * If first mbuf has no cluster, and has room for len bytes
         * without shifting current data, pullup into it,
         * otherwise allocate a new mbuf to prepend to the chain.
         */
        if ((n->m_flags & M_EXT) == 0 &&
            n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
                if (n->m_len >= len) {
                        return true;
                }
                m = n;
                n = n->m_next;
                len -= m->m_len;
        } else {
                if (len > MHLEN) {
                        return false;
                }
                m = m_get(M_DONTWAIT, n->m_type);
                if (m == NULL) {
                        return false;
                }
                MCLAIM(m, n->m_owner);
                if (n->m_flags & M_PKTHDR) {
                        m_move_pkthdr(m, n);
                }
        }
        space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
        do {
                count = MIN(MIN(MAX(len, max_protohdr), space), n->m_len);
                memcpy(mtod(m, char *) + m->m_len, mtod(n, void *),
                  (unsigned)count);
                len -= count;
                m->m_len += count;
                n->m_len -= count;
                space -= count;
                if (n->m_len)
                        n->m_data += count;
                else
                        n = m_free(n);
        } while (len > 0 && n);

        m->m_next = n;
        *m0 = m;

        return len <= 0;
}

/*
 * m_pullup: same as m_ensure_contig(), but destroys mbuf chain on error.
 */
struct mbuf *
m_pullup(struct mbuf *n, int len)
{
        struct mbuf *m = n;

        KASSERT(len != M_COPYALL);
        if (!m_ensure_contig(&m, len)) {
                KASSERT(m != NULL);
                m_freem(m);
                m = NULL;
        }
        return m;
}

/*
 * ensure that [off, off + len) is contiguous on the mbuf chain "m".
 * packet chain before "off" is kept untouched.
 * if offp == NULL, the target will start at <retval, 0> on resulting chain.
 * if offp != NULL, the target will start at <retval, *offp> on resulting chain.
 *
 * on error return (NULL return value), original "m" will be freed.
 *
 * XXX M_TRAILINGSPACE/M_LEADINGSPACE on shared cluster (sharedcluster)
 */
struct mbuf *
m_pulldown(struct mbuf *m, int off, int len, int *offp)
{
        struct mbuf *n, *o;
        int hlen, tlen, olen;
        int sharedcluster;

        /* Check invalid arguments. */
        if (m == NULL)
                panic("%s: m == NULL", __func__);
        if (len > MCLBYTES) {
                m_freem(m);
                return NULL;
        }

        n = m;
        while (n != NULL && off > 0) {
                if (n->m_len > off)
                        break;
                off -= n->m_len;
                n = n->m_next;
        }
        /* Be sure to point non-empty mbuf. */
        while (n != NULL && n->m_len == 0)
                n = n->m_next;
        if (!n) {
                m_freem(m);
                return NULL;        /* mbuf chain too short */
        }

        sharedcluster = M_READONLY(n);

        /*
         * The target data is on <n, off>. If we got enough data on the mbuf
         * "n", we're done.
         */
#ifdef __NO_STRICT_ALIGNMENT
        if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster)
#else
        if ((off == 0 || offp) && len <= n->m_len - off && !sharedcluster &&
            ALIGNED_POINTER((mtod(n, char *) + off), uint32_t))
#endif
                goto ok;

        /*
         * When (len <= n->m_len - off) and (off != 0), it is a special case.
         * Len bytes from <n, off> sit in single mbuf, but the caller does
         * not like the starting position (off).
         *
         * Chop the current mbuf into two pieces, set off to 0.
         */
        if (len <= n->m_len - off) {
                struct mbuf *mlast;

                o = m_dup(n, off, n->m_len - off, M_DONTWAIT);
                if (o == NULL) {
                        m_freem(m);
                        return NULL;        /* ENOBUFS */
                }
                KASSERT(o->m_len >= len);
                for (mlast = o; mlast->m_next != NULL; mlast = mlast->m_next)
                        ;
                n->m_len = off;
                mlast->m_next = n->m_next;
                n->m_next = o;
                n = o;
                off = 0;
                goto ok;
        }

        /*
         * We need to take hlen from <n, off> and tlen from <n->m_next, 0>,
         * and construct contiguous mbuf with m_len == len.
         *
         * Note that hlen + tlen == len, and tlen > 0.
         */
        hlen = n->m_len - off;
        tlen = len - hlen;

        /*
         * Ensure that we have enough trailing data on mbuf chain. If not,
         * we can do nothing about the chain.
         */
        olen = 0;
        for (o = n->m_next; o != NULL; o = o->m_next)
                olen += o->m_len;
        if (hlen + olen < len) {
                m_freem(m);
                return NULL;        /* mbuf chain too short */
        }

        /*
         * Easy cases first. We need to use m_copydata() to get data from
         * <n->m_next, 0>.
         */
        if ((off == 0 || offp) && M_TRAILINGSPACE(n) >= tlen &&
            !sharedcluster) {
                m_copydata(n->m_next, 0, tlen, mtod(n, char *) + n->m_len);
                n->m_len += tlen;
                m_adj(n->m_next, tlen);
                goto ok;
        }
        if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen &&
#ifndef __NO_STRICT_ALIGNMENT
            ALIGNED_POINTER((n->m_next->m_data - hlen), uint32_t) &&
#endif
            !sharedcluster && n->m_next->m_len >= tlen) {
                n->m_next->m_data -= hlen;
                n->m_next->m_len += hlen;
                memcpy(mtod(n->m_next, void *), mtod(n, char *) + off, hlen);
                n->m_len -= hlen;
                n = n->m_next;
                off = 0;
                goto ok;
        }

        /*
         * Now, we need to do the hard way. Don't copy as there's no room
         * on both ends.
         */
        o = m_get(M_DONTWAIT, m->m_type);
        if (o && len > MLEN) {
                MCLGET(o, M_DONTWAIT);
                if ((o->m_flags & M_EXT) == 0) {
                        m_free(o);
                        o = NULL;
                }
        }
        if (!o) {
                m_freem(m);
                return NULL;        /* ENOBUFS */
        }
        /* get hlen from <n, off> into <o, 0> */
        o->m_len = hlen;
        memcpy(mtod(o, void *), mtod(n, char *) + off, hlen);
        n->m_len -= hlen;
        /* get tlen from <n->m_next, 0> into <o, hlen> */
        m_copydata(n->m_next, 0, tlen, mtod(o, char *) + o->m_len);
        o->m_len += tlen;
        m_adj(n->m_next, tlen);
        o->m_next = n->m_next;
        n->m_next = o;
        n = o;
        off = 0;

ok:
        if (offp)
                *offp = off;
        return n;
}

/*
 * Like m_pullup(), except a new mbuf is always allocated, and we allow
 * the amount of empty space before the data in the new mbuf to be specified
 * (in the event that the caller expects to prepend later).
 */
struct mbuf *
m_copyup(struct mbuf *n, int len, int dstoff)
{
        struct mbuf *m;
        int count, space;

        KASSERT(len != M_COPYALL);
        if (len > ((int)MHLEN - dstoff))
                goto bad;
        m = m_get(M_DONTWAIT, n->m_type);
        if (m == NULL)
                goto bad;
        MCLAIM(m, n->m_owner);
        if (n->m_flags & M_PKTHDR) {
                m_move_pkthdr(m, n);
        }
        m->m_data += dstoff;
        space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
        do {
                count = uimin(uimin(uimax(len, max_protohdr), space), n->m_len);
                memcpy(mtod(m, char *) + m->m_len, mtod(n, void *),
                    (unsigned)count);
                len -= count;
                m->m_len += count;
                n->m_len -= count;
                space -= count;
                if (n->m_len)
                        n->m_data += count;
                else
                        n = m_free(n);
        } while (len > 0 && n);
        if (len > 0) {
                (void) m_free(m);
                goto bad;
        }
        m->m_next = n;
        return m;
 bad:
        m_freem(n);
        return NULL;
}

struct mbuf *
m_split(struct mbuf *m0, int len, int wait)
{
        return m_split_internal(m0, len, wait, true);
}

static struct mbuf *
m_split_internal(struct mbuf *m0, int len0, int wait, bool copyhdr)
{
        struct mbuf *m, *n;
        unsigned len = len0, remain, len_save;

        KASSERT(len0 != M_COPYALL);
        for (m = m0; m && len > m->m_len; m = m->m_next)
                len -= m->m_len;
        if (m == NULL)
                return NULL;

        remain = m->m_len - len;
        if (copyhdr && (m0->m_flags & M_PKTHDR)) {
                n = m_gethdr(wait, m0->m_type);
                if (n == NULL)
                        return NULL;

                MCLAIM(n, m0->m_owner);
                m_copy_rcvif(n, m0);
                n->m_pkthdr.len = m0->m_pkthdr.len - len0;
                len_save = m0->m_pkthdr.len;
                m0->m_pkthdr.len = len0;

                if (m->m_flags & M_EXT)
                        goto extpacket;

                if (remain > MHLEN) {
                        /* m can't be the lead packet */
                        m_align(n, 0);
                        n->m_len = 0;
                        n->m_next = m_split(m, len, wait);
                        if (n->m_next == NULL) {
                                (void)m_free(n);
                                m0->m_pkthdr.len = len_save;
                                return NULL;
                        }
                        return n;
                } else {
                        m_align(n, remain);
                }
        } else if (remain == 0) {
                n = m->m_next;
                m->m_next = NULL;
                return n;
        } else {
                n = m_get(wait, m->m_type);
                if (n == NULL)
                        return NULL;
                MCLAIM(n, m->m_owner);
                m_align(n, remain);
        }

extpacket:
        if (m->m_flags & M_EXT) {
                n->m_data = m->m_data + len;
                MCLADDREFERENCE(m, n);
        } else {
                memcpy(mtod(n, void *), mtod(m, char *) + len, remain);
        }

        n->m_len = remain;
        m->m_len = len;
        n->m_next = m->m_next;
        m->m_next = NULL;
        return n;
}

/*
 * Routine to copy from device local memory into mbufs.
 */
struct mbuf *
m_devget(char *buf, int totlen, int off, struct ifnet *ifp)
{
        struct mbuf *m;
        struct mbuf *top = NULL, **mp = &top;
        char *cp, *epkt;
        int len;

        cp = buf;
        epkt = cp + totlen;
        if (off) {
                /*
                 * If 'off' is non-zero, packet is trailer-encapsulated,
                 * so we have to skip the type and length fields.
                 */
                cp += off + 2 * sizeof(uint16_t);
                totlen -= 2 * sizeof(uint16_t);
        }

        m = m_gethdr(M_DONTWAIT, MT_DATA);
        if (m == NULL)
                return NULL;
        m_set_rcvif(m, ifp);
        m->m_pkthdr.len = totlen;
        m->m_len = MHLEN;

        while (totlen > 0) {
                if (top) {
                        m = m_get(M_DONTWAIT, MT_DATA);
                        if (m == NULL) {
                                m_freem(top);
                                return NULL;
                        }
                        m->m_len = MLEN;
                }

                len = uimin(totlen, epkt - cp);

                if (len >= MINCLSIZE) {
                        MCLGET(m, M_DONTWAIT);
                        if ((m->m_flags & M_EXT) == 0) {
                                m_free(m);
                                m_freem(top);
                                return NULL;
                        }
                        m->m_len = len = uimin(len, MCLBYTES);
                } else {
                        /*
                         * Place initial small packet/header at end of mbuf.
                         */
                        if (len < m->m_len) {
                                if (top == 0 && len + max_linkhdr <= m->m_len)
                                        m->m_data += max_linkhdr;
                                m->m_len = len;
                        } else
                                len = m->m_len;
                }

                memcpy(mtod(m, void *), cp, (size_t)len);

                cp += len;
                *mp = m;
                mp = &m->m_next;
                totlen -= len;
                if (cp == epkt)
                        cp = buf;
        }

        return top;
}

/*
 * Copy data from a buffer back into the indicated mbuf chain,
 * starting "off" bytes from the beginning, extending the mbuf
 * chain if necessary.
 */
void
m_copyback(struct mbuf *m0, int off, int len, const void *cp)
{
#if defined(DEBUG)
        struct mbuf *origm = m0;
        int error;
#endif

        if (m0 == NULL)
                return;

#if defined(DEBUG)
        error =
#endif
        m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_EXTEND,
            M_DONTWAIT);

#if defined(DEBUG)
        if (error != 0 || (m0 != NULL && origm != m0))
                panic("m_copyback");
#endif
}

struct mbuf *
m_copyback_cow(struct mbuf *m0, int off, int len, const void *cp, int how)
{
        int error;

        /* don't support chain expansion */
        KASSERT(len != M_COPYALL);
        KDASSERT(off + len <= m_length(m0));

        error = m_copyback_internal(&m0, off, len, cp, CB_COPYBACK|CB_COW,
            how);
        if (error) {
                /*
                 * no way to recover from partial success.
                 * just free the chain.
                 */
                m_freem(m0);
                return NULL;
        }
        return m0;
}

int
m_makewritable(struct mbuf **mp, int off, int len, int how)
{
        int error;
#if defined(DEBUG)
        int origlen = m_length(*mp);
#endif

        error = m_copyback_internal(mp, off, len, NULL, CB_PRESERVE|CB_COW,
            how);
        if (error)
                return error;

#if defined(DEBUG)
        int reslen = 0;
        for (struct mbuf *n = *mp; n; n = n->m_next)
                reslen += n->m_len;
        if (origlen != reslen)
                panic("m_makewritable: length changed");
        if (((*mp)->m_flags & M_PKTHDR) != 0 && reslen != (*mp)->m_pkthdr.len)
                panic("m_makewritable: inconsist");
#endif

        return 0;
}

static int
m_copyback_internal(struct mbuf **mp0, int off, int len, const void *vp,
    int flags, int how)
{
        int mlen;
        struct mbuf *m, *n;
        struct mbuf **mp;
        int totlen = 0;
        const char *cp = vp;

        KASSERT(mp0 != NULL);
        KASSERT(*mp0 != NULL);
        KASSERT((flags & CB_PRESERVE) == 0 || cp == NULL);
        KASSERT((flags & CB_COPYBACK) == 0 || cp != NULL);

        if (len == M_COPYALL)
                len = m_length(*mp0) - off;

        /*
         * we don't bother to update "totlen" in the case of CB_COW,
         * assuming that CB_EXTEND and CB_COW are exclusive.
         */

        KASSERT((~flags & (CB_EXTEND|CB_COW)) != 0);

        mp = mp0;
        m = *mp;
        while (off > (mlen = m->m_len)) {
                off -= mlen;
                totlen += mlen;
                if (m->m_next == NULL) {
                        int tspace;
extend:
                        if ((flags & CB_EXTEND) == 0)
                                goto out;

                        /*
                         * try to make some space at the end of "m".
                         */

                        mlen = m->m_len;
                        if (off + len >= MINCLSIZE &&
                            (m->m_flags & M_EXT) == 0 && m->m_len == 0) {
                                MCLGET(m, how);
                        }
                        tspace = M_TRAILINGSPACE(m);
                        if (tspace > 0) {
                                tspace = uimin(tspace, off + len);
                                KASSERT(tspace > 0);
                                memset(mtod(m, char *) + m->m_len, 0,
                                    uimin(off, tspace));
                                m->m_len += tspace;
                                off += mlen;
                                totlen -= mlen;
                                continue;
                        }

                        /*
                         * need to allocate an mbuf.
                         */

                        if (off + len >= MINCLSIZE) {
                                n = m_getcl(how, m->m_type, 0);
                        } else {
                                n = m_get(how, m->m_type);
                        }
                        if (n == NULL) {
                                goto out;
                        }
                        n->m_len = uimin(M_TRAILINGSPACE(n), off + len);
                        memset(mtod(n, char *), 0, uimin(n->m_len, off));
                        m->m_next = n;
                }
                mp = &m->m_next;
                m = m->m_next;
        }
        while (len > 0) {
                mlen = m->m_len - off;
                if (mlen != 0 && M_READONLY(m)) {
                        /*
                         * This mbuf is read-only. Allocate a new writable
                         * mbuf and try again.
                         */
                        char *datap;
                        int eatlen;

                        KASSERT((flags & CB_COW) != 0);

                        /*
                         * if we're going to write into the middle of
                         * a mbuf, split it first.
                         */
                        if (off > 0) {
                                n = m_split_internal(m, off, how, false);
                                if (n == NULL)
                                        goto enobufs;
                                m->m_next = n;
                                mp = &m->m_next;
                                m = n;
                                off = 0;
                                continue;
                        }

                        /*
                         * XXX TODO coalesce into the trailingspace of
                         * the previous mbuf when possible.
                         */

                        /*
                         * allocate a new mbuf.  copy packet header if needed.
                         */
                        n = m_get(how, m->m_type);
                        if (n == NULL)
                                goto enobufs;
                        MCLAIM(n, m->m_owner);
                        if (off == 0 && (m->m_flags & M_PKTHDR) != 0) {
                                m_move_pkthdr(n, m);
                                n->m_len = MHLEN;
                        } else {
                                if (len >= MINCLSIZE)
                                        MCLGET(n, M_DONTWAIT);
                                n->m_len =
                                    (n->m_flags & M_EXT) ? MCLBYTES : MLEN;
                        }
                        if (n->m_len > len)
                                n->m_len = len;

                        /*
                         * free the region which has been overwritten.
                         * copying data from old mbufs if requested.
                         */
                        if (flags & CB_PRESERVE)
                                datap = mtod(n, char *);
                        else
                                datap = NULL;
                        eatlen = n->m_len;
                        while (m != NULL && M_READONLY(m) &&
                            n->m_type == m->m_type && eatlen > 0) {
                                mlen = uimin(eatlen, m->m_len);
                                if (datap) {
                                        m_copydata(m, 0, mlen, datap);
                                        datap += mlen;
                                }
                                m->m_data += mlen;
                                m->m_len -= mlen;
                                eatlen -= mlen;
                                if (m->m_len == 0)
                                        *mp = m = m_free(m);
                        }
                        if (eatlen > 0)
                                n->m_len -= eatlen;
                        n->m_next = m;
                        *mp = m = n;
                        continue;
                }
                mlen = uimin(mlen, len);
                if (flags & CB_COPYBACK) {
                        memcpy(mtod(m, char *) + off, cp, (unsigned)mlen);
                        cp += mlen;
                }
                len -= mlen;
                mlen += off;
                off = 0;
                totlen += mlen;
                if (len == 0)
                        break;
                if (m->m_next == NULL) {
                        goto extend;
                }
                mp = &m->m_next;
                m = m->m_next;
        }

out:
        if (((m = *mp0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) {
                KASSERT((flags & CB_EXTEND) != 0);
                m->m_pkthdr.len = totlen;
        }

        return 0;

enobufs:
        return ENOBUFS;
}

/*
 * Compress the mbuf chain. Return the new mbuf chain on success, NULL on
 * failure. The first mbuf is preserved, and on success the pointer returned
 * is the same as the one passed.
 */
struct mbuf *
m_defrag(struct mbuf *m, int how)
{
        struct mbuf *m0, *mn, *n;
        int sz;

        KASSERT((m->m_flags & M_PKTHDR) != 0);

        if (m->m_next == NULL)
                return m;

        /* Defrag to single mbuf if at all possible */
        if ((m->m_flags & M_EXT) == 0 && m->m_pkthdr.len <= MCLBYTES) {
                if (m->m_pkthdr.len <= MHLEN) {
                        if (M_TRAILINGSPACE(m) < (m->m_pkthdr.len - m->m_len)) {
                                KASSERTMSG(M_LEADINGSPACE(m) +
                                    M_TRAILINGSPACE(m) >=
                                    (m->m_pkthdr.len - m->m_len),
                                    "too small leading %d trailing %d ro? %d"
                                    " pkthdr.len %d mlen %d",
                                    (int)M_LEADINGSPACE(m),
                                    (int)M_TRAILINGSPACE(m),
                                    M_READONLY(m),
                                    m->m_pkthdr.len, m->m_len);

                                memmove(m->m_pktdat, m->m_data, m->m_len);
                                m->m_data = m->m_pktdat;

                                KASSERT(M_TRAILINGSPACE(m) >=
                                    (m->m_pkthdr.len - m->m_len));
                        }
                } else {
                        /* Must copy data before adding cluster */
                        m0 = m_get(how, MT_DATA);
                        if (m0 == NULL)
                                return NULL;
                        KASSERT(m->m_len <= MHLEN);
                        m_copydata(m, 0, m->m_len, mtod(m0, void *));

                        MCLGET(m, how);
                        if ((m->m_flags & M_EXT) == 0) {
                                m_free(m0);
                                return NULL;
                        }
                        memcpy(m->m_data, mtod(m0, void *), m->m_len);
                        m_free(m0);
                }
                KASSERT(M_TRAILINGSPACE(m) >= (m->m_pkthdr.len - m->m_len));
                m_copydata(m->m_next, 0, m->m_pkthdr.len - m->m_len,
                            mtod(m, char *) + m->m_len);
                m->m_len = m->m_pkthdr.len;
                m_freem(m->m_next);
                m->m_next = NULL;
                return m;
        }

        m0 = m_get(how, MT_DATA);
        if (m0 == NULL)
                return NULL;
        mn = m0;

        sz = m->m_pkthdr.len - m->m_len;
        KASSERT(sz >= 0);

        do {
                if (sz > MLEN) {
                        MCLGET(mn, how);
                        if ((mn->m_flags & M_EXT) == 0) {
                                m_freem(m0);
                                return NULL;
                        }
                }

                mn->m_len = MIN(sz, MCLBYTES);

                m_copydata(m, m->m_pkthdr.len - sz, mn->m_len,
                     mtod(mn, void *));

                sz -= mn->m_len;

                if (sz > 0) {
                        /* need more mbufs */
                        n = m_get(how, MT_DATA);
                        if (n == NULL) {
                                m_freem(m0);
                                return NULL;
                        }

                        mn->m_next = n;
                        mn = n;
                }
        } while (sz > 0);

        m_freem(m->m_next);
        m->m_next = m0;

        return m;
}

void
m_remove_pkthdr(struct mbuf *m)
{
        KASSERT(m->m_flags & M_PKTHDR);

        m_tag_delete_chain(m);
        m->m_flags &= ~M_PKTHDR;
        memset(&m->m_pkthdr, 0, sizeof(m->m_pkthdr));
}

void
m_copy_pkthdr(struct mbuf *to, struct mbuf *from)
{
        KASSERT((to->m_flags & M_EXT) == 0);
        KASSERT((to->m_flags & M_PKTHDR) == 0 ||
            SLIST_FIRST(&to->m_pkthdr.tags) == NULL);
        KASSERT((from->m_flags & M_PKTHDR) != 0);

        to->m_pkthdr = from->m_pkthdr;
        to->m_flags = from->m_flags & M_COPYFLAGS;
        to->m_data = to->m_pktdat;

        SLIST_INIT(&to->m_pkthdr.tags);
        m_tag_copy_chain(to, from);
}

void
m_move_pkthdr(struct mbuf *to, struct mbuf *from)
{
        KASSERT((to->m_flags & M_EXT) == 0);
        KASSERT((to->m_flags & M_PKTHDR) == 0 ||
            SLIST_FIRST(&to->m_pkthdr.tags) == NULL);
        KASSERT((from->m_flags & M_PKTHDR) != 0);

        to->m_pkthdr = from->m_pkthdr;
        to->m_flags = from->m_flags & M_COPYFLAGS;
        to->m_data = to->m_pktdat;

        from->m_flags &= ~M_PKTHDR;
}

/*
 * Set the m_data pointer of a newly-allocated mbuf to place an object of the
 * specified size at the end of the mbuf, longword aligned.
 */
void
m_align(struct mbuf *m, int len)
{
        int buflen, adjust;

        KASSERT(len != M_COPYALL);
        KASSERT(M_LEADINGSPACE(m) == 0);

        buflen = M_BUFSIZE(m);

        KASSERT(len <= buflen);
        adjust = buflen - len;
        m->m_data += adjust &~ (sizeof(long)-1);
}

/*
 * Apply function f to the data in an mbuf chain starting "off" bytes from the
 * beginning, continuing for "len" bytes.
 */
int
m_apply(struct mbuf *m, int off, int len,
    int (*f)(void *, void *, unsigned int), void *arg)
{
        unsigned int count;
        int rval;

        KASSERT(len != M_COPYALL);
        KASSERT(len >= 0);
        KASSERT(off >= 0);

        while (off > 0) {
                KASSERT(m != NULL);
                if (off < m->m_len)
                        break;
                off -= m->m_len;
                m = m->m_next;
        }
        while (len > 0) {
                KASSERT(m != NULL);
                count = uimin(m->m_len - off, len);

                rval = (*f)(arg, mtod(m, char *) + off, count);
                if (rval)
                        return rval;

                len -= count;
                off = 0;
                m = m->m_next;
        }

        return 0;
}

/*
 * Return a pointer to mbuf/offset of location in mbuf chain.
 */
struct mbuf *
m_getptr(struct mbuf *m, int loc, int *off)
{

        while (loc >= 0) {
                /* Normal end of search */
                if (m->m_len > loc) {
                        *off = loc;
                        return m;
                }

                loc -= m->m_len;

                if (m->m_next == NULL) {
                        if (loc == 0) {
                                /* Point at the end of valid data */
                                *off = m->m_len;
                                return m;
                        }
                        return NULL;
                } else {
                        m = m->m_next;
                }
        }

        return NULL;
}

/*
 * Release a reference to the mbuf external storage.
 *
 * => free the mbuf m itself as well.
 */
static void
m_ext_free(struct mbuf *m)
{
        const bool embedded = MEXT_ISEMBEDDED(m);
        bool dofree = true;
        u_int refcnt;

        KASSERT((m->m_flags & M_EXT) != 0);
        KASSERT(MEXT_ISEMBEDDED(m->m_ext_ref));
        KASSERT((m->m_ext_ref->m_flags & M_EXT) != 0);
        KASSERT((m->m_flags & M_EXT_CLUSTER) ==
            (m->m_ext_ref->m_flags & M_EXT_CLUSTER));

        if (__predict_false(m->m_type == MT_FREE)) {
                panic("mbuf %p already freed", m);
        }

        if (__predict_true(m->m_ext.ext_refcnt == 1)) {
                refcnt = m->m_ext.ext_refcnt = 0;
        } else {
#ifndef __HAVE_ATOMIC_AS_MEMBAR
                membar_release();
#endif
                refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt);
        }

        if (refcnt > 0) {
                if (embedded) {
                        /*
                         * other mbuf's m_ext_ref still points to us.
                         */
                        dofree = false;
                } else {
                        m->m_ext_ref = m;
                }
        } else {
                /*
                 * dropping the last reference
                 */
#ifndef __HAVE_ATOMIC_AS_MEMBAR
                membar_acquire();
#endif
                if (!embedded) {
                        m->m_ext.ext_refcnt++; /* XXX */
                        m_ext_free(m->m_ext_ref);
                        m->m_ext_ref = m;
                } else if ((m->m_flags & M_EXT_CLUSTER) != 0) {
                        pool_cache_put_paddr(mcl_cache,
                            m->m_ext.ext_buf, m->m_ext.ext_paddr);
                } else if (m->m_ext.ext_free) {
                        (*m->m_ext.ext_free)(m,
                            m->m_ext.ext_buf, m->m_ext.ext_size,
                            m->m_ext.ext_arg);
                        /*
                         * 'm' is already freed by the ext_free callback.
                         */
                        dofree = false;
                } else {
                        free(m->m_ext.ext_buf, 0);
                }
        }

        if (dofree) {
                m->m_type = MT_FREE;
                m->m_data = NULL;
                pool_cache_put(mb_cache, m);
        }
}

/*
 * Free a single mbuf and associated external storage. Return the
 * successor, if any.
 */
struct mbuf *
m_free(struct mbuf *m)
{
        struct mbuf *n;

        mowner_revoke(m, 1, m->m_flags);
        mbstat_type_add(m->m_type, -1);

        if (m->m_flags & M_PKTHDR)
                m_tag_delete_chain(m);

        n = m->m_next;

        if (m->m_flags & M_EXT) {
                m_ext_free(m);
        } else {
                if (__predict_false(m->m_type == MT_FREE)) {
                        panic("mbuf %p already freed", m);
                }
                m->m_type = MT_FREE;
                m->m_data = NULL;
                pool_cache_put(mb_cache, m);
        }

        return n;
}

void
m_freem(struct mbuf *m)
{
        if (m == NULL)
                return;
        do {
                m = m_free(m);
        } while (m);
}

#if defined(DDB)
void
m_print(const struct mbuf *m, const char *modif, void (*pr)(const char *, ...))
{
        char ch;
        bool opt_c = false;
        bool opt_d = false;
#if NETHER > 0
        bool opt_v = false;
        const struct mbuf *m0 = NULL;
#endif
        int no = 0;
        char buf[512];

        while ((ch = *(modif++)) != '\0') {
                switch (ch) {
                case 'c':
                        opt_c = true;
                        break;
                case 'd':
                        opt_d = true;
                        break;
#if NETHER > 0
                case 'v':
                        opt_v = true;
                        m0 = m;
                        break;
#endif
                default:
                        break;
                }
        }

nextchain:
        (*pr)("MBUF(%d) %p\n", no, m);
        snprintb(buf, sizeof(buf), M_FLAGS_BITS, (u_int)m->m_flags);
        (*pr)("  data=%p, len=%d, type=%d, flags=%s\n",
            m->m_data, m->m_len, m->m_type, buf);
        if (opt_d) {
                int i;
                unsigned char *p = m->m_data;

                (*pr)("  data:");

                for (i = 0; i < m->m_len; i++) {
                        if (i % 16 == 0)
                                (*pr)("\n");
                        (*pr)(" %02x", p[i]);
                }

                (*pr)("\n");
        }
        (*pr)("  owner=%p, next=%p, nextpkt=%p\n", m->m_owner, m->m_next,
            m->m_nextpkt);
        (*pr)("  leadingspace=%u, trailingspace=%u, readonly=%u\n",
            (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(m),
            (int)M_READONLY(m));
        if ((m->m_flags & M_PKTHDR) != 0) {
                snprintb(buf, sizeof(buf), M_CSUM_BITS, m->m_pkthdr.csum_flags);
                (*pr)("  pktlen=%d, rcvif=%p, csum_flags=%s, csum_data=0x%"
                    PRIx32 ", segsz=%u\n",
                    m->m_pkthdr.len, m_get_rcvif_NOMPSAFE(m),
                    buf, m->m_pkthdr.csum_data, m->m_pkthdr.segsz);
        }
        if ((m->m_flags & M_EXT)) {
                (*pr)("  ext_refcnt=%u, ext_buf=%p, ext_size=%zd, "
                    "ext_free=%p, ext_arg=%p\n",
                    m->m_ext.ext_refcnt,
                    m->m_ext.ext_buf, m->m_ext.ext_size,
                    m->m_ext.ext_free, m->m_ext.ext_arg);
        }
        if ((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0) {
                vaddr_t sva = (vaddr_t)m->m_ext.ext_buf;
                vaddr_t eva = sva + m->m_ext.ext_size;
                int n = (round_page(eva) - trunc_page(sva)) >> PAGE_SHIFT;
                int i;

                (*pr)("  pages:");
                for (i = 0; i < n; i ++) {
                        (*pr)(" %p", m->m_ext.ext_pgs[i]);
                }
                (*pr)("\n");
        }

        if (opt_c) {
                m = m->m_next;
                if (m != NULL) {
                        no++;
                        goto nextchain;
                }
        }

#if NETHER > 0
        if (opt_v && m0)
                m_examine(m0, AF_ETHER, modif, pr);
#endif
}
#endif /* defined(DDB) */

#if defined(MBUFTRACE)
void
mowner_init_owner(struct mowner *mo, const char *name, const char *descr)
{
        memset(mo, 0, sizeof(*mo));
        strlcpy(mo->mo_name, name, sizeof(mo->mo_name));
        strlcpy(mo->mo_descr, descr, sizeof(mo->mo_descr));
}

void
mowner_attach(struct mowner *mo)
{

        KASSERT(mo->mo_counters == NULL);
        mo->mo_counters = percpu_alloc(sizeof(struct mowner_counter));

        /* XXX lock */
        LIST_INSERT_HEAD(&mowners, mo, mo_link);
}

void
mowner_detach(struct mowner *mo)
{

        KASSERT(mo->mo_counters != NULL);

        /* XXX lock */
        LIST_REMOVE(mo, mo_link);

        percpu_free(mo->mo_counters, sizeof(struct mowner_counter));
        mo->mo_counters = NULL;
}

void
mowner_init(struct mbuf *m, int type)
{
        struct mowner_counter *mc;
        struct mowner *mo;
        int s;

        m->m_owner = mo = &unknown_mowners[type];
        s = splvm();
        mc = percpu_getref(mo->mo_counters);
        mc->mc_counter[MOWNER_COUNTER_CLAIMS]++;
        percpu_putref(mo->mo_counters);
        splx(s);
}

void
mowner_ref(struct mbuf *m, int flags)
{
        struct mowner *mo = m->m_owner;
        struct mowner_counter *mc;
        int s;

        s = splvm();
        mc = percpu_getref(mo->mo_counters);
        if ((flags & M_EXT) != 0)
                mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++;
        if ((flags & M_EXT_CLUSTER) != 0)
                mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++;
        percpu_putref(mo->mo_counters);
        splx(s);
}

void
mowner_revoke(struct mbuf *m, bool all, int flags)
{
        struct mowner *mo = m->m_owner;
        struct mowner_counter *mc;
        int s;

        s = splvm();
        mc = percpu_getref(mo->mo_counters);
        if ((flags & M_EXT) != 0)
                mc->mc_counter[MOWNER_COUNTER_EXT_RELEASES]++;
        if ((flags & M_EXT_CLUSTER) != 0)
                mc->mc_counter[MOWNER_COUNTER_CLUSTER_RELEASES]++;
        if (all)
                mc->mc_counter[MOWNER_COUNTER_RELEASES]++;
        percpu_putref(mo->mo_counters);
        splx(s);
        if (all)
                m->m_owner = &revoked_mowner;
}

static void
mowner_claim(struct mbuf *m, struct mowner *mo)
{
        struct mowner_counter *mc;
        int flags = m->m_flags;
        int s;

        s = splvm();
        mc = percpu_getref(mo->mo_counters);
        mc->mc_counter[MOWNER_COUNTER_CLAIMS]++;
        if ((flags & M_EXT) != 0)
                mc->mc_counter[MOWNER_COUNTER_EXT_CLAIMS]++;
        if ((flags & M_EXT_CLUSTER) != 0)
                mc->mc_counter[MOWNER_COUNTER_CLUSTER_CLAIMS]++;
        percpu_putref(mo->mo_counters);
        splx(s);
        m->m_owner = mo;
}

void
m_claim(struct mbuf *m, struct mowner *mo)
{

        if (m->m_owner == mo || mo == NULL)
                return;

        mowner_revoke(m, true, m->m_flags);
        mowner_claim(m, mo);
}

void
m_claimm(struct mbuf *m, struct mowner *mo)
{

        for (; m != NULL; m = m->m_next)
                m_claim(m, mo);
}
#endif /* defined(MBUFTRACE) */

#ifdef DIAGNOSTIC
/*
 * Verify that the mbuf chain is not malformed. Used only for diagnostic.
 * Panics on error.
 */
void
m_verify_packet(struct mbuf *m)
{
        struct mbuf *n = m;
        char *low, *high, *dat;
        int totlen = 0, len;

        if (__predict_false((m->m_flags & M_PKTHDR) == 0)) {
                panic("%s: mbuf doesn't have M_PKTHDR", __func__);
        }

        while (n != NULL) {
                if (__predict_false(n->m_type == MT_FREE)) {
                        panic("%s: mbuf already freed (n = %p)", __func__, n);
                }
#if 0
                /*
                 * This ought to be a rule of the mbuf API. Unfortunately,
                 * many places don't respect that rule.
                 */
                if (__predict_false((n != m) && (n->m_flags & M_PKTHDR) != 0)) {
                        panic("%s: M_PKTHDR set on secondary mbuf", __func__);
                }
#endif
                if (__predict_false(n->m_nextpkt != NULL)) {
                        panic("%s: m_nextpkt not null (m_nextpkt = %p)",
                            __func__, n->m_nextpkt);
                }

                dat = n->m_data;
                len = n->m_len;
                if (__predict_false(len < 0)) {
                        panic("%s: incorrect length (len = %d)", __func__, len);
                }

                low = M_BUFADDR(n);
                high = low + M_BUFSIZE(n);
                if (__predict_false((dat < low) || (dat + len > high))) {
                        panic("%s: m_data not in packet"
                            "(dat = %p, len = %d, low = %p, high = %p)",
                            __func__, dat, len, low, high);
                }

                totlen += len;
                n = n->m_next;
        }

        if (__predict_false(totlen != m->m_pkthdr.len)) {
                panic("%s: inconsistent mbuf length (%d != %d)", __func__,
                    totlen, m->m_pkthdr.len);
        }
}
#endif

struct m_tag *
m_tag_get(int type, int len, int wait)
{
        struct m_tag *t;

        if (len < 0)
                return NULL;
        t = malloc(len + sizeof(struct m_tag), M_PACKET_TAGS, wait);
        if (t == NULL)
                return NULL;
        t->m_tag_id = type;
        t->m_tag_len = len;
        return t;
}

void
m_tag_free(struct m_tag *t)
{
        free(t, M_PACKET_TAGS);
}

void
m_tag_prepend(struct mbuf *m, struct m_tag *t)
{
        KASSERT((m->m_flags & M_PKTHDR) != 0);
        SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link);
}

void
m_tag_unlink(struct mbuf *m, struct m_tag *t)
{
        KASSERT((m->m_flags & M_PKTHDR) != 0);
        SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link);
}

void
m_tag_delete(struct mbuf *m, struct m_tag *t)
{
        m_tag_unlink(m, t);
        m_tag_free(t);
}

void
m_tag_delete_chain(struct mbuf *m)
{
        struct m_tag *p, *q;

        KASSERT((m->m_flags & M_PKTHDR) != 0);

        p = SLIST_FIRST(&m->m_pkthdr.tags);
        if (p == NULL)
                return;
        while ((q = SLIST_NEXT(p, m_tag_link)) != NULL)
                m_tag_delete(m, q);
        m_tag_delete(m, p);
}

struct m_tag *
m_tag_find(const struct mbuf *m, int type)
{
        struct m_tag *p;

        KASSERT((m->m_flags & M_PKTHDR) != 0);

        p = SLIST_FIRST(&m->m_pkthdr.tags);
        while (p != NULL) {
                if (p->m_tag_id == type)
                        return p;
                p = SLIST_NEXT(p, m_tag_link);
        }
        return NULL;
}

struct m_tag *
m_tag_copy(struct m_tag *t)
{
        struct m_tag *p;

        p = m_tag_get(t->m_tag_id, t->m_tag_len, M_NOWAIT);
        if (p == NULL)
                return NULL;
        memcpy(p + 1, t + 1, t->m_tag_len);
        return p;
}

/*
 * Copy two tag chains. The destination mbuf (to) loses any attached
 * tags even if the operation fails. This should not be a problem, as
 * m_tag_copy_chain() is typically called with a newly-allocated
 * destination mbuf.
 */
int
m_tag_copy_chain(struct mbuf *to, struct mbuf *from)
{
        struct m_tag *p, *t, *tprev = NULL;

        KASSERT((from->m_flags & M_PKTHDR) != 0);

        m_tag_delete_chain(to);
        SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) {
                t = m_tag_copy(p);
                if (t == NULL) {
                        m_tag_delete_chain(to);
                        return 0;
                }
                if (tprev == NULL)
                        SLIST_INSERT_HEAD(&to->m_pkthdr.tags, t, m_tag_link);
                else
                        SLIST_INSERT_AFTER(tprev, t, m_tag_link);
                tprev = t;
        }
        return 1;
}





















































































































































    3 
    3 



























































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
/*        $NetBSD: if_udav.c,v 1.99 2022/08/20 14:09:20 riastradh Exp $        */
/*        $nabe: if_udav.c,v 1.3 2003/08/21 16:57:19 nabe Exp $        */

/*
 * Copyright (c) 2003
 *     Shingo WATANABE <nabe@nabechan.org>.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the author nor the names of any co-contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

/*
 * DM9601(DAVICOM USB to Ethernet MAC Controller with Integrated 10/100 PHY)
 * The spec can be found at the following url.
 *   http://www.davicom.com.tw/big5/download/Data%20Sheet/DM9601-DS-F01-062202s.pdf
 */

/*
 * TODO:
 *        Interrupt Endpoint support
 *        External PHYs
 *        powerhook() support?
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_udav.c,v 1.99 2022/08/20 14:09:20 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>

#include <dev/usb/usbnet.h>
#include <dev/usb/if_udavreg.h>

/* Function declarations */
static int        udav_match(device_t, cfdata_t, void *);
static void        udav_attach(device_t, device_t, void *);

CFATTACH_DECL_NEW(udav, sizeof(struct usbnet), udav_match, udav_attach,
    usbnet_detach, usbnet_activate);

static void udav_chip_init(struct usbnet *);

static unsigned udav_uno_tx_prepare(struct usbnet *, struct mbuf *,
                                    struct usbnet_chain *);
static void udav_uno_rx_loop(struct usbnet *, struct usbnet_chain *, uint32_t);
static void udav_uno_stop(struct ifnet *, int);
static void udav_uno_mcast(struct ifnet *);
static int udav_uno_mii_read_reg(struct usbnet *, int, int, uint16_t *);
static int udav_uno_mii_write_reg(struct usbnet *, int, int, uint16_t);
static void udav_uno_mii_statchg(struct ifnet *);
static int udav_uno_init(struct ifnet *);
static void udav_reset(struct usbnet *);

static int udav_csr_read(struct usbnet *, int, void *, int);
static int udav_csr_write(struct usbnet *, int, void *, int);
static int udav_csr_read1(struct usbnet *, int);
static int udav_csr_write1(struct usbnet *, int, unsigned char);

#if 0
static int udav_mem_read(struct usbnet *, int, void *, int);
static int udav_mem_write(struct usbnet *, int, void *, int);
static int udav_mem_write1(struct usbnet *, int, unsigned char);
#endif

/* Macros */
#ifdef UDAV_DEBUG
#define DPRINTF(x)        if (udavdebug) printf x
#define DPRINTFN(n, x)        if (udavdebug >= (n)) printf x
int udavdebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n, x)
#endif

#define        UDAV_SETBIT(un, reg, x)        \
        udav_csr_write1(un, reg, udav_csr_read1(un, reg) | (x))

#define        UDAV_CLRBIT(un, reg, x)        \
        udav_csr_write1(un, reg, udav_csr_read1(un, reg) & ~(x))

static const struct udav_type {
        struct usb_devno udav_dev;
        uint16_t udav_flags;
#define UDAV_EXT_PHY        0x0001
#define UDAV_NO_PHY        0x0002
} udav_devs [] = {
        /* Corega USB-TXC */
        {{ USB_VENDOR_COREGA, USB_PRODUCT_COREGA_FETHER_USB_TXC }, 0},
        /* ShanTou ST268 USB NIC */
        {{ USB_VENDOR_SHANTOU, USB_PRODUCT_SHANTOU_ST268_USB_NIC }, 0},
        /* ShanTou ADM8515 */
        {{ USB_VENDOR_SHANTOU, USB_PRODUCT_SHANTOU_ADM8515 }, 0},
        /* SUNRISING SR9600 */
        {{ USB_VENDOR_SUNRISING, USB_PRODUCT_SUNRISING_SR9600 }, 0 },
        /* SUNRISING QF9700 */
        {{ USB_VENDOR_SUNRISING, USB_PRODUCT_SUNRISING_QF9700 }, UDAV_NO_PHY },
        /* QUAN DM9601 */
        {{USB_VENDOR_QUAN, USB_PRODUCT_QUAN_DM9601 }, 0},
#if 0
        /* DAVICOM DM9601 Generic? */
        /*  XXX: The following ids was obtained from the data sheet. */
        {{ 0x0a46, 0x9601 }, 0},
#endif
};
#define udav_lookup(v, p) ((const struct udav_type *)usb_lookup(udav_devs, v, p))

static const struct usbnet_ops udav_ops = {
        .uno_stop = udav_uno_stop,
        .uno_mcast = udav_uno_mcast,
        .uno_read_reg = udav_uno_mii_read_reg,
        .uno_write_reg = udav_uno_mii_write_reg,
        .uno_statchg = udav_uno_mii_statchg,
        .uno_tx_prepare = udav_uno_tx_prepare,
        .uno_rx_loop = udav_uno_rx_loop,
        .uno_init = udav_uno_init,
};

/* Probe */
static int
udav_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return udav_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

/* Attach */
static void
udav_attach(device_t parent, device_t self, void *aux)
{
        USBNET_MII_DECL_DEFAULT(unm);
        struct usbnet_mii *unmp;
        struct usbnet * const un = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        struct usbd_interface *iface;
        usbd_status err;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        int i;

        aprint_naive("\n");
        aprint_normal("\n");
        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        un->un_dev = self;
        un->un_udev = dev;
        un->un_sc = un;
        un->un_ops = &udav_ops;
        un->un_rx_xfer_flags = USBD_SHORT_XFER_OK;
        un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER;
        un->un_rx_list_cnt = UDAV_RX_LIST_CNT;
        un->un_tx_list_cnt = UDAV_TX_LIST_CNT;
        un->un_rx_bufsz = UDAV_BUFSZ;
        un->un_tx_bufsz = UDAV_BUFSZ;

        /* Move the device into the configured state. */
        err = usbd_set_config_no(dev, UDAV_CONFIG_NO, 1); /* idx 0 */
        if (err) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(err));
                return;
        }

        /* get control interface */
        err = usbd_device2interface_handle(dev, UDAV_IFACE_INDEX, &iface);
        if (err) {
                aprint_error_dev(self, "failed to get interface, err=%s\n",
                       usbd_errstr(err));
                return;
        }

        un->un_iface = iface;
        un->un_flags = udav_lookup(uaa->uaa_vendor,
            uaa->uaa_product)->udav_flags;

        /* get interface descriptor */
        id = usbd_get_interface_descriptor(un->un_iface);

        /* find endpoints */
        un->un_ed[USBNET_ENDPT_RX] = un->un_ed[USBNET_ENDPT_TX] =
            un->un_ed[USBNET_ENDPT_INTR] = -1;
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(un->un_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self, "couldn't get endpoint %d\n", i);
                        return;
                }
                if ((ed->bmAttributes & UE_XFERTYPE) == UE_BULK &&
                    UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN)
                        un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress;
                else if ((ed->bmAttributes & UE_XFERTYPE) == UE_BULK &&
                         UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT)
                        un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress;
                else if ((ed->bmAttributes & UE_XFERTYPE) == UE_INTERRUPT &&
                         UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN)
                        un->un_ed[USBNET_ENDPT_INTR] = ed->bEndpointAddress;
        }

        if (un->un_ed[USBNET_ENDPT_RX] == 0 ||
            un->un_ed[USBNET_ENDPT_TX] == 0 ||
            un->un_ed[USBNET_ENDPT_INTR] == 0) {
                aprint_error_dev(self, "missing endpoint\n");
                return;
        }

        /* Not supported yet. */
        un->un_ed[USBNET_ENDPT_INTR] = 0;

        usbnet_attach(un);

//         /* reset the adapter */
//         udav_reset(un);

        /* Get Ethernet Address */
        err = udav_csr_read(un, UDAV_PAR, un->un_eaddr, ETHER_ADDR_LEN);
        if (err) {
                aprint_error_dev(self, "read MAC address failed\n");
                return;
        }

        if (ISSET(un->un_flags, UDAV_NO_PHY))
                unmp = NULL;
        else
                unmp = &unm;

        /* initialize interface information */
        usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST,
            0, unmp);

        return;
}

#if 0
/* read memory */
static int
udav_mem_read(struct usbnet *un, int offset, void *buf, int len)
{
        usb_device_request_t req;
        usbd_status err;

        DPRINTFN(0x200,
                ("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        if (usbnet_isdying(un))
                return 0;

        offset &= 0xffff;
        len &= 0xff;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = UDAV_REQ_MEM_READ;
        USETW(req.wValue, 0x0000);
        USETW(req.wIndex, offset);
        USETW(req.wLength, len);

        err = usbd_do_request(un->un_udev, &req, buf);
        if (err) {
                DPRINTF(("%s: %s: read failed. off=%04x, err=%d\n",
                         device_xname(un->un_dev), __func__, offset, err));
        }

        return err;
}

/* write memory */
static int
udav_mem_write(struct usbnet *un, int offset, void *buf, int len)
{
        usb_device_request_t req;
        usbd_status err;

        DPRINTFN(0x200,
                ("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        if (usbnet_isdying(un))
                return 0;

        offset &= 0xffff;
        len &= 0xff;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = UDAV_REQ_MEM_WRITE;
        USETW(req.wValue, 0x0000);
        USETW(req.wIndex, offset);
        USETW(req.wLength, len);

        err = usbd_do_request(un->un_udev, &req, buf);
        if (err) {
                DPRINTF(("%s: %s: write failed. off=%04x, err=%d\n",
                         device_xname(un->un_dev), __func__, offset, err));
        }

        return err;
}

/* write memory */
static int
udav_mem_write1(struct usbnet *un, int offset, unsigned char ch)
{
        usb_device_request_t req;
        usbd_status err;

        DPRINTFN(0x200,
                ("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        if (usbnet_isdying(un))
                return 0;

        offset &= 0xffff;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = UDAV_REQ_MEM_WRITE1;
        USETW(req.wValue, ch);
        USETW(req.wIndex, offset);
        USETW(req.wLength, 0x0000);

        err = usbd_do_request(un->un_udev, &req, NULL);
        if (err) {
                DPRINTF(("%s: %s: write failed. off=%04x, err=%d\n",
                         device_xname(un->un_dev), __func__, offset, err));
        }

        return err;
}
#endif

/* read register(s) */
static int
udav_csr_read(struct usbnet *un, int offset, void *buf, int len)
{
        usb_device_request_t req;
        usbd_status err;

        if (usbnet_isdying(un))
                return USBD_IOERROR;

        DPRINTFN(0x200,
                ("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        offset &= 0xff;
        len &= 0xff;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = UDAV_REQ_REG_READ;
        USETW(req.wValue, 0x0000);
        USETW(req.wIndex, offset);
        USETW(req.wLength, len);

        err = usbd_do_request(un->un_udev, &req, buf);
        if (err) {
                DPRINTF(("%s: %s: read failed. off=%04x, err=%d\n",
                         device_xname(un->un_dev), __func__, offset, err));
                memset(buf, 0, len);
        }

        return err;
}

/* write register(s) */
static int
udav_csr_write(struct usbnet *un, int offset, void *buf, int len)
{
        usb_device_request_t req;
        usbd_status err;

        if (usbnet_isdying(un))
                return USBD_IOERROR;

        DPRINTFN(0x200,
                ("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        offset &= 0xff;
        len &= 0xff;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = UDAV_REQ_REG_WRITE;
        USETW(req.wValue, 0x0000);
        USETW(req.wIndex, offset);
        USETW(req.wLength, len);

        err = usbd_do_request(un->un_udev, &req, buf);
        if (err) {
                DPRINTF(("%s: %s: write failed. off=%04x, err=%d\n",
                         device_xname(un->un_dev), __func__, offset, err));
        }

        return err;
}

static int
udav_csr_read1(struct usbnet *un, int offset)
{
        uint8_t val = 0;

        DPRINTFN(0x200,
                ("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        if (usbnet_isdying(un))
                return 0;

        return udav_csr_read(un, offset, &val, 1) ? 0 : val;
}

/* write a register */
static int
udav_csr_write1(struct usbnet *un, int offset, unsigned char ch)
{
        usb_device_request_t req;
        usbd_status err;

        if (usbnet_isdying(un))
                return USBD_IOERROR;

        DPRINTFN(0x200,
                ("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        offset &= 0xff;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = UDAV_REQ_REG_WRITE1;
        USETW(req.wValue, ch);
        USETW(req.wIndex, offset);
        USETW(req.wLength, 0x0000);

        err = usbd_do_request(un->un_udev, &req, NULL);
        if (err) {
                DPRINTF(("%s: %s: write failed. off=%04x, err=%d\n",
                         device_xname(un->un_dev), __func__, offset, err));
        }

        return err;
}

static int
udav_uno_init(struct ifnet *ifp)
{
        struct usbnet * const un = ifp->if_softc;
        struct mii_data * const mii = usbnet_mii(un);
        uint8_t eaddr[ETHER_ADDR_LEN];
        int rc = 0;

        DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        memcpy(eaddr, CLLADDR(ifp->if_sadl), sizeof(eaddr));
        udav_csr_write(un, UDAV_PAR, eaddr, ETHER_ADDR_LEN);

        /* Initialize network control register */
        /*  Disable loopback  */
        UDAV_CLRBIT(un, UDAV_NCR, UDAV_NCR_LBK0 | UDAV_NCR_LBK1);

        /* Initialize RX control register */
        UDAV_SETBIT(un, UDAV_RCR, UDAV_RCR_DIS_LONG | UDAV_RCR_DIS_CRC);

        /* If we want promiscuous mode, accept all physical frames. */
        if (usbnet_ispromisc(un))
                UDAV_SETBIT(un, UDAV_RCR, UDAV_RCR_ALL | UDAV_RCR_PRMSC);
        else
                UDAV_CLRBIT(un, UDAV_RCR, UDAV_RCR_ALL | UDAV_RCR_PRMSC);

        /* Enable RX */
        UDAV_SETBIT(un, UDAV_RCR, UDAV_RCR_RXEN);

        /* clear POWER_DOWN state of internal PHY */
        UDAV_SETBIT(un, UDAV_GPCR, UDAV_GPCR_GEP_CNTL0);
        UDAV_CLRBIT(un, UDAV_GPR, UDAV_GPR_GEPIO0);

        if (mii && (rc = mii_mediachg(mii)) == ENXIO)
                rc = 0;

        if (rc != 0) {
                return rc;
        }

        if (usbnet_isdying(un))
                return EIO;

        return 0;
}

static void
udav_reset(struct usbnet *un)
{

        if (usbnet_isdying(un))
                return;

        DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        udav_chip_init(un);
}

static void
udav_chip_init(struct usbnet *un)
{

        /* Select PHY */
#if 1
        /*
         * XXX: force select internal phy.
         *        external phy routines are not tested.
         */
        UDAV_CLRBIT(un, UDAV_NCR, UDAV_NCR_EXT_PHY);
#else
        if (un->un_flags & UDAV_EXT_PHY) {
                UDAV_SETBIT(un, UDAV_NCR, UDAV_NCR_EXT_PHY);
        } else {
                UDAV_CLRBIT(un, UDAV_NCR, UDAV_NCR_EXT_PHY);
        }
#endif

        UDAV_SETBIT(un, UDAV_NCR, UDAV_NCR_RST);

        for (int i = 0; i < UDAV_TX_TIMEOUT; i++) {
                if (usbnet_isdying(un))
                        return;
                if (!(udav_csr_read1(un, UDAV_NCR) & UDAV_NCR_RST))
                        break;
                delay(10);        /* XXX */
        }
        delay(10000);                /* XXX */
}

#define UDAV_BITS        6

#define UDAV_CALCHASH(addr) \
        (ether_crc32_le((addr), ETHER_ADDR_LEN) & ((1 << UDAV_BITS) - 1))

static void
udav_uno_mcast(struct ifnet *ifp)
{
        struct usbnet * const un = ifp->if_softc;
        struct ethercom *ec = usbnet_ec(un);
        struct ether_multi *enm;
        struct ether_multistep step;
        uint8_t hashes[8];
        int h = 0;

        DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        if (usbnet_isdying(un))
                return;

        if (ISSET(un->un_flags, UDAV_NO_PHY)) {
                UDAV_SETBIT(un, UDAV_RCR, UDAV_RCR_ALL);
                UDAV_SETBIT(un, UDAV_RCR, UDAV_RCR_PRMSC);
                return;
        }

        if (usbnet_ispromisc(un)) {
                ETHER_LOCK(ec);
                ec->ec_flags |= ETHER_F_ALLMULTI;
                ETHER_UNLOCK(ec);
                UDAV_SETBIT(un, UDAV_RCR, UDAV_RCR_ALL | UDAV_RCR_PRMSC);
                return;
        }

        /* first, zot all the existing hash bits */
        memset(hashes, 0x00, sizeof(hashes));
        hashes[7] |= 0x80;        /* broadcast address */
        udav_csr_write(un, UDAV_MAR, hashes, sizeof(hashes));

        /* now program new ones */
        ETHER_LOCK(ec);
        ETHER_FIRST_MULTI(step, ec, enm);
        while (enm != NULL) {
                if (memcmp(enm->enm_addrlo, enm->enm_addrhi,
                    ETHER_ADDR_LEN) != 0) {
                        ec->ec_flags |= ETHER_F_ALLMULTI;
                        ETHER_UNLOCK(ec);
                        UDAV_SETBIT(un, UDAV_RCR, UDAV_RCR_ALL);
                        UDAV_CLRBIT(un, UDAV_RCR, UDAV_RCR_PRMSC);
                        return;
                }

                h = UDAV_CALCHASH(enm->enm_addrlo);
                hashes[h>>3] |= 1 << (h & 0x7);
                ETHER_NEXT_MULTI(step, enm);
        }
        ec->ec_flags &= ~ETHER_F_ALLMULTI;
        ETHER_UNLOCK(ec);

        /* disable all multicast */
        UDAV_CLRBIT(un, UDAV_RCR, UDAV_RCR_ALL);

        /* write hash value to the register */
        udav_csr_write(un, UDAV_MAR, hashes, sizeof(hashes));
}

static unsigned
udav_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c)
{
        int total_len;
        uint8_t *buf = c->unc_buf;

        DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz - 2)
                return 0;

        /* Copy the mbuf data into a contiguous buffer */
        m_copydata(m, 0, m->m_pkthdr.len, buf + 2);
        total_len = m->m_pkthdr.len;
        if (total_len < UDAV_MIN_FRAME_LEN) {
                memset(buf + 2 + total_len, 0,
                    UDAV_MIN_FRAME_LEN - total_len);
                total_len = UDAV_MIN_FRAME_LEN;
        }

        /* Frame length is specified in the first 2bytes of the buffer */
        buf[0] = (uint8_t)total_len;
        buf[1] = (uint8_t)(total_len >> 8);
        total_len += 2;

        DPRINTF(("%s: %s: send %d bytes\n", device_xname(un->un_dev),
            __func__, total_len));

        return total_len;
}

static void
udav_uno_rx_loop(struct usbnet *un, struct usbnet_chain *c, uint32_t total_len)
{
        struct ifnet *ifp = usbnet_ifp(un);
        uint8_t *buf = c->unc_buf;
        uint16_t pkt_len;
        uint8_t pktstat;

        DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        /* first byte in received data */
        pktstat = *buf;
        total_len -= sizeof(pktstat);
        buf += sizeof(pktstat);

        DPRINTF(("%s: RX Status: 0x%02x\n", device_xname(un->un_dev), pktstat));

        pkt_len = UGETW(buf);
         total_len -= sizeof(pkt_len);
        buf += sizeof(pkt_len);

        DPRINTF(("%s: RX Length: 0x%02x\n", device_xname(un->un_dev), pkt_len));

        if (pktstat & UDAV_RSR_LCS) {
                if_statinc(ifp, if_collisions);
                return;
        }

        if (pkt_len < sizeof(struct ether_header) ||
            pkt_len > total_len ||
            (pktstat & UDAV_RSR_ERR)) {
                if_statinc(ifp, if_ierrors);
                return;
        }

        pkt_len -= ETHER_CRC_LEN;

        DPRINTF(("%s: Rx deliver: 0x%02x\n", device_xname(un->un_dev), pkt_len));

        usbnet_enqueue(un, buf, pkt_len, 0, 0, 0);
}

/* Stop the adapter and free any mbufs allocated to the RX and TX lists. */
static void
udav_uno_stop(struct ifnet *ifp, int disable)
{
        struct usbnet * const un = ifp->if_softc;

        DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        udav_reset(un);
}

static int
udav_uno_mii_read_reg(struct usbnet *un, int phy, int reg, uint16_t *val)
{
        uint8_t data[2];

        DPRINTFN(0xff, ("%s: %s: enter, phy=%d reg=0x%04x\n",
                 device_xname(un->un_dev), __func__, phy, reg));

        if (usbnet_isdying(un)) {
#ifdef DIAGNOSTIC
                printf("%s: %s: dying\n", device_xname(un->un_dev),
                       __func__);
#endif
                *val = 0;
                return EINVAL;
        }

        /* XXX: one PHY only for the internal PHY */
        if (phy != 0) {
                DPRINTFN(0xff, ("%s: %s: phy=%d is not supported\n",
                         device_xname(un->un_dev), __func__, phy));
                *val = 0;
                return EINVAL;
        }

        /* select internal PHY and set PHY register address */
        udav_csr_write1(un, UDAV_EPAR,
                        UDAV_EPAR_PHY_ADR0 | (reg & UDAV_EPAR_EROA_MASK));

        /* select PHY operation and start read command */
        udav_csr_write1(un, UDAV_EPCR, UDAV_EPCR_EPOS | UDAV_EPCR_ERPRR);

        /* XXX: should be wait? */

        /* end read command */
        UDAV_CLRBIT(un, UDAV_EPCR, UDAV_EPCR_ERPRR);

        /* retrieve the result from data registers */
        udav_csr_read(un, UDAV_EPDRL, data, 2);

        *val = data[0] | (data[1] << 8);

        DPRINTFN(0xff, ("%s: %s: phy=%d reg=0x%04x => 0x%04hx\n",
                device_xname(un->un_dev), __func__, phy, reg, *val));

        return 0;
}

static int
udav_uno_mii_write_reg(struct usbnet *un, int phy, int reg, uint16_t val)
{
        uint8_t data[2];

        DPRINTFN(0xff, ("%s: %s: enter, phy=%d reg=0x%04x val=0x%04hx\n",
                 device_xname(un->un_dev), __func__, phy, reg, val));

        if (usbnet_isdying(un)) {
#ifdef DIAGNOSTIC
                printf("%s: %s: dying\n", device_xname(un->un_dev),
                       __func__);
#endif
                return EIO;
        }

        /* XXX: one PHY only for the internal PHY */
        if (phy != 0) {
                DPRINTFN(0xff, ("%s: %s: phy=%d is not supported\n",
                         device_xname(un->un_dev), __func__, phy));
                return EIO;
        }

        /* select internal PHY and set PHY register address */
        udav_csr_write1(un, UDAV_EPAR,
                        UDAV_EPAR_PHY_ADR0 | (reg & UDAV_EPAR_EROA_MASK));

        /* put the value to the data registers */
        data[0] = val & 0xff;
        data[1] = (val >> 8) & 0xff;
        udav_csr_write(un, UDAV_EPDRL, data, 2);

        /* select PHY operation and start write command */
        udav_csr_write1(un, UDAV_EPCR, UDAV_EPCR_EPOS | UDAV_EPCR_ERPRW);

        /* XXX: should be wait? */

        /* end write command */
        UDAV_CLRBIT(un, UDAV_EPCR, UDAV_EPCR_ERPRW);

        return 0;
}

static void
udav_uno_mii_statchg(struct ifnet *ifp)
{
        struct usbnet * const un = ifp->if_softc;
        struct mii_data * const mii = usbnet_mii(un);

        DPRINTF(("%s: %s: enter\n", ifp->if_xname, __func__));

        if (usbnet_isdying(un))
                return;

        if ((mii->mii_media_status & IFM_ACTIVE) &&
            IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) {
                DPRINTF(("%s: %s: got link\n",
                         device_xname(un->un_dev), __func__));
                usbnet_set_link(un, true);
        }
}

#ifdef _MODULE
#include "ioconf.c"
#endif

USBNET_MODULE(udav)








































































































































































































   19 
 1597 
  518 

 1572 










 1600 



 1602 



























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
/*        $NetBSD: wapbl.h,v 1.21 2018/12/10 21:19:33 jdolecek Exp $        */

/*-
 * Copyright (c) 2003,2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _SYS_WAPBL_H
#define        _SYS_WAPBL_H

#include <sys/mutex.h>

#if defined(_KERNEL) || defined(_KMEMUSER)
#include <miscfs/specfs/specdev.h>
#endif

/* This header file describes the api and data structures for
 * write ahead physical block logging (WAPBL) support.
 */

#if defined(_KERNEL_OPT)
#include "opt_wapbl.h"
#endif

#ifdef WAPBL_DEBUG
#ifndef WAPBL_DEBUG_PRINT
#define        WAPBL_DEBUG_PRINT (WAPBL_PRINT_REPLAY | WAPBL_PRINT_OPEN)
#endif

#if 0
#define        WAPBL_DEBUG_BUFBYTES
#endif

#endif

#ifdef WAPBL_DEBUG_PRINT

enum {
        WAPBL_PRINT_OPEN = 0x1,
        WAPBL_PRINT_FLUSH = 0x2,
        WAPBL_PRINT_TRUNCATE = 0x4,
        WAPBL_PRINT_TRANSACTION = 0x8,
        WAPBL_PRINT_BUFFER = 0x10,
        WAPBL_PRINT_BUFFER2 = 0x20,
        WAPBL_PRINT_ALLOC = 0x40,
        WAPBL_PRINT_INODE = 0x80,
        WAPBL_PRINT_WRITE = 0x100,
        WAPBL_PRINT_IO = 0x200,
        WAPBL_PRINT_REPLAY = 0x400,
        WAPBL_PRINT_ERROR = 0x800,
        WAPBL_PRINT_DISCARD = 0x1000,
        WAPBL_PRINT_BIODONE = 0x2000,
};

#define        WAPBL_PRINTF(mask, a) if (wapbl_debug_print & (mask)) printf  a
extern int wapbl_debug_print;
#else
#define        WAPBL_PRINTF(mask, a)
#endif

/****************************************************************/

#include <sys/queue.h>
#include <sys/vnode.h>
#include <sys/buf.h>

#ifdef _KERNEL

struct wapbl_entry;
struct wapbl_replay;
struct wapbl;

struct wapbl_dealloc {
        TAILQ_ENTRY(wapbl_dealloc) wd_entries;
        daddr_t wd_blkno;        /* address of block */
        int wd_len;                /* size of block */
};

typedef void (*wapbl_flush_fn_t)(struct mount *, struct wapbl_dealloc *);

/*
 * This structure holds per transaction log information
 */
struct wapbl_entry {
        struct wapbl *we_wapbl;
        SIMPLEQ_ENTRY(wapbl_entry) we_entries;
        size_t we_bufcount;                /* Count of unsynced buffers */
        size_t we_reclaimable_bytes;        /* Number on disk bytes for this
                                           transaction */
        int        we_error;
#ifdef WAPBL_DEBUG_BUFBYTES
        size_t we_unsynced_bufbytes;        /* Byte count of unsynced buffers */
#endif
};

/* Start using a log */
int        wapbl_start(struct wapbl **, struct mount *, struct vnode *, daddr_t,
                    size_t, size_t, struct wapbl_replay *,
                    wapbl_flush_fn_t, wapbl_flush_fn_t);

/* Discard the current transaction, potentially dangerous */
void        wapbl_discard(struct wapbl *);

/* stop using a log */
int        wapbl_stop(struct wapbl *, int);

/*
 * Begin a new transaction or increment transaction recursion
 * level if called while a transaction is already in progress
 * by the current process.
 */
int        wapbl_begin(struct wapbl *, const char *, int);


/* End a transaction or decrement the transaction recursion level */
void        wapbl_end(struct wapbl *);

/*
 * Add a new buffer to the current transaction.  The buffers
 * data will be copied to the current transaction log and the
 * buffer will be marked B_LOCKED so that it will not be
 * flushed to disk by the syncer or reallocated.
 */
void        wapbl_add_buf(struct wapbl *, struct buf *);

/* Remove a buffer from the current transaction. */
void        wapbl_remove_buf(struct wapbl *, struct buf *);

void        wapbl_resize_buf(struct wapbl *, struct buf *, long, long);

/*
 * This will flush all completed transactions to disk and
 * start asynchronous writes on the associated buffers
 */
int        wapbl_flush(struct wapbl *, int);

/*
 * Inodes that are allocated but have zero link count
 * must be registered with the current transaction
 * so they may be recorded in the log and cleaned up later.
 * registration/unregistration of ino numbers already registered is ok.
 */
void        wapbl_register_inode(struct wapbl *, ino_t, mode_t);
void        wapbl_unregister_inode(struct wapbl *, ino_t, mode_t);

/*
 * Metadata block deallocations must be registered so
 * that revocations records can be written and to prevent
 * the corresponding blocks from being reused as data
 * blocks until the log is on disk.
 */
int        wapbl_register_deallocation(struct wapbl *, daddr_t, int, bool,
                void **);
void        wapbl_unregister_deallocation(struct wapbl *, void *);

void        wapbl_jlock_assert(struct wapbl *wl);
void        wapbl_junlock_assert(struct wapbl *wl);

void        wapbl_print(struct wapbl *wl, int full, void (*pr)(const char *, ...)
    __printflike(1, 2));

#if defined(WAPBL_DEBUG) || defined(DDB)
void        wapbl_dump(struct wapbl *);
#endif

void        wapbl_biodone(struct buf *);

extern const struct wapbl_ops wapbl_ops;

static __inline struct mount *
wapbl_vptomp(struct vnode *vp)
{
        struct mount *mp;

        mp = NULL;
        if (vp != NULL) {
                if (vp->v_type == VBLK)
                        mp = spec_node_getmountedfs(vp);
                else
                        mp = vp->v_mount;
        }

        return mp;
}

static __inline bool
wapbl_vphaswapbl(struct vnode *vp)
{
        struct mount *mp;

        if (vp == NULL)
                return false;

        mp = wapbl_vptomp(vp);
        return mp && mp->mnt_wapbl;
}

#endif /* _KERNEL */

/****************************************************************/
/* Replay support */

#ifdef WAPBL_INTERNAL
LIST_HEAD(wapbl_blk_head, wapbl_blk);
struct wapbl_replay {
        struct vnode *wr_logvp;
        struct vnode *wr_devvp;
        daddr_t wr_logpbn;

        int wr_log_dev_bshift;
        int wr_fs_dev_bshift;
        int64_t wr_circ_off;
        int64_t wr_circ_size;        
        uint32_t wr_generation;

        void *wr_scratch;

        struct wapbl_blk_head *wr_blkhash;
        u_long wr_blkhashmask;
        int wr_blkhashcnt;

        off_t wr_inodeshead;
        off_t wr_inodestail;
        int wr_inodescnt;
        struct {
                uint32_t wr_inumber;
                uint32_t wr_imode;
        } *wr_inodes;
};

#define        wapbl_replay_isopen(wr) ((wr)->wr_scratch != 0)

/* Supply this to provide i/o support */
int wapbl_write(void *, size_t, struct vnode *, daddr_t);
int wapbl_read(void *, size_t, struct vnode *, daddr_t);

/****************************************************************/
#else
struct wapbl_replay;
#endif /* WAPBL_INTERNAL */

/****************************************************************/

int        wapbl_replay_start(struct wapbl_replay **, struct vnode *,
        daddr_t, size_t, size_t);
void        wapbl_replay_stop(struct wapbl_replay *);
void        wapbl_replay_free(struct wapbl_replay *);
int        wapbl_replay_write(struct wapbl_replay *, struct vnode *);
int        wapbl_replay_can_read(struct wapbl_replay *, daddr_t, long);
int        wapbl_replay_read(struct wapbl_replay *, void *, daddr_t, long);

/****************************************************************/

#endif /* !_SYS_WAPBL_H */










































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 


    2 



    2 




    2 
    2 

























































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
/*        $NetBSD: if_urndis.c,v 1.47 2022/03/03 05:56:58 riastradh Exp $ */
/*        $OpenBSD: if_urndis.c,v 1.31 2011/07/03 15:47:17 matthew Exp $ */

/*
 * Copyright (c) 2010 Jonathan Armani <armani@openbsd.org>
 * Copyright (c) 2010 Fabien Romano <fabien@openbsd.org>
 * Copyright (c) 2010 Michael Knudsen <mk@openbsd.org>
 * All rights reserved.
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_urndis.c,v 1.47 2022/03/03 05:56:58 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/kmem.h>

#include <dev/usb/usbnet.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usbcdc.h>

#include <dev/ic/rndisreg.h>

#define RNDIS_RX_LIST_CNT        1
#define RNDIS_TX_LIST_CNT        1
#define RNDIS_BUFSZ                1562

struct urndis_softc {
        struct usbnet                        sc_un;

        int                                sc_ifaceno_ctl;

        /* RNDIS device info */
        uint32_t                        sc_filter;
        uint32_t                        sc_maxppt;
        uint32_t                        sc_maxtsz;
        uint32_t                        sc_palign;
};

#ifdef URNDIS_DEBUG
#define DPRINTF(x)      do { printf x; } while (0)
#else
#define DPRINTF(x)
#endif

#define DEVNAME(un)        (device_xname(un->un_dev))

#define URNDIS_RESPONSE_LEN 0x400

#if 0
static void urndis_watchdog(struct ifnet *);
#endif

static int urndis_uno_init(struct ifnet *);
static void urndis_uno_rx_loop(struct usbnet *, struct usbnet_chain *,
                               uint32_t);
static unsigned urndis_uno_tx_prepare(struct usbnet *, struct mbuf *,
                                      struct usbnet_chain *);

static uint32_t urndis_ctrl_handle_init(struct usbnet *,
    const struct rndis_comp_hdr *);
static uint32_t urndis_ctrl_handle_query(struct usbnet *,
    const struct rndis_comp_hdr *, void **, size_t *);
static uint32_t urndis_ctrl_handle_reset(struct usbnet *,
    const struct rndis_comp_hdr *);
static uint32_t urndis_ctrl_handle_status(struct usbnet *,
    const struct rndis_comp_hdr *);

static uint32_t urndis_ctrl_set(struct usbnet *, uint32_t, void *,
    size_t);

static int urndis_match(device_t, cfdata_t, void *);
static void urndis_attach(device_t, device_t, void *);

static const struct usbnet_ops urndis_ops = {
        .uno_init = urndis_uno_init,
        .uno_tx_prepare = urndis_uno_tx_prepare,
        .uno_rx_loop = urndis_uno_rx_loop,
};

CFATTACH_DECL_NEW(urndis, sizeof(struct urndis_softc),
    urndis_match, urndis_attach, usbnet_detach, usbnet_activate);

/*
 * Supported devices that we can't match by class IDs.
 */
static const struct usb_devno urndis_devs[] = {
        { USB_VENDOR_HTC,        USB_PRODUCT_HTC_ANDROID },
        { USB_VENDOR_SAMSUNG,        USB_PRODUCT_SAMSUNG_ANDROID2 },
        { USB_VENDOR_SAMSUNG,        USB_PRODUCT_SAMSUNG_ANDROID },
};

static usbd_status
urndis_ctrl_msg(struct usbnet *un, uint8_t rt, uint8_t r,
    uint16_t index, uint16_t value, void *buf, size_t buflen)
{
        usb_device_request_t req;

        req.bmRequestType = rt;
        req.bRequest = r;
        USETW(req.wValue, value);
        USETW(req.wIndex, index);
        USETW(req.wLength, buflen);

        return usbd_do_request(un->un_udev, &req, buf);
}

static usbd_status
urndis_ctrl_send(struct usbnet *un, void *buf, size_t len)
{
        struct urndis_softc        *sc = usbnet_softc(un);
        usbd_status err;

        if (usbnet_isdying(un))
                return(0);

        err = urndis_ctrl_msg(un, UT_WRITE_CLASS_INTERFACE, UR_GET_STATUS,
            sc->sc_ifaceno_ctl, 0, buf, len);

        if (err != USBD_NORMAL_COMPLETION)
                printf("%s: %s\n", DEVNAME(un), usbd_errstr(err));

        return err;
}

static struct rndis_comp_hdr *
urndis_ctrl_recv(struct usbnet *un)
{
        struct urndis_softc        *sc = usbnet_softc(un);
        struct rndis_comp_hdr        *hdr;
        char                        *buf;
        usbd_status                 err;

        if (usbnet_isdying(un))
                return(0);

        buf = kmem_alloc(URNDIS_RESPONSE_LEN, KM_SLEEP);
        err = urndis_ctrl_msg(un, UT_READ_CLASS_INTERFACE, UR_CLEAR_FEATURE,
            sc->sc_ifaceno_ctl, 0, buf, URNDIS_RESPONSE_LEN);

        if (err != USBD_NORMAL_COMPLETION && err != USBD_SHORT_XFER) {
                printf("%s: %s\n", DEVNAME(un), usbd_errstr(err));
                kmem_free(buf, URNDIS_RESPONSE_LEN);
                return NULL;
        }

        hdr = (struct rndis_comp_hdr *)buf;
        DPRINTF(("%s: urndis_ctrl_recv: type %#x len %u\n",
            DEVNAME(un),
            le32toh(hdr->rm_type),
            le32toh(hdr->rm_len)));

        if (le32toh(hdr->rm_len) > URNDIS_RESPONSE_LEN) {
                printf("%s: ctrl message error: wrong size %u > %u\n",
                    DEVNAME(un),
                    le32toh(hdr->rm_len),
                    URNDIS_RESPONSE_LEN);
                kmem_free(buf, URNDIS_RESPONSE_LEN);
                return NULL;
        }

        return hdr;
}

static uint32_t
urndis_ctrl_handle(struct usbnet *un, struct rndis_comp_hdr *hdr,
    void **buf, size_t *bufsz)
{
        uint32_t rval;

        DPRINTF(("%s: urndis_ctrl_handle\n", DEVNAME(un)));

        if (buf && bufsz) {
                *buf = NULL;
                *bufsz = 0;
        }

        switch (le32toh(hdr->rm_type)) {
                case REMOTE_NDIS_INITIALIZE_CMPLT:
                        rval = urndis_ctrl_handle_init(un, hdr);
                        break;

                case REMOTE_NDIS_QUERY_CMPLT:
                        rval = urndis_ctrl_handle_query(un, hdr, buf, bufsz);
                        break;

                case REMOTE_NDIS_RESET_CMPLT:
                        rval = urndis_ctrl_handle_reset(un, hdr);
                        break;

                case REMOTE_NDIS_KEEPALIVE_CMPLT:
                case REMOTE_NDIS_SET_CMPLT:
                        rval = le32toh(hdr->rm_status);
                        break;

                case REMOTE_NDIS_INDICATE_STATUS_MSG:
                        rval = urndis_ctrl_handle_status(un, hdr);
                        break;

                default:
                        printf("%s: ctrl message error: unknown event %#x\n",
                            DEVNAME(un), le32toh(hdr->rm_type));
                        rval = RNDIS_STATUS_FAILURE;
        }

        kmem_free(hdr, URNDIS_RESPONSE_LEN);

        return rval;
}

static uint32_t
urndis_ctrl_handle_init(struct usbnet *un, const struct rndis_comp_hdr *hdr)
{
        struct urndis_softc                *sc = usbnet_softc(un);
        const struct rndis_init_comp        *msg;

        msg = (const struct rndis_init_comp *) hdr;

        DPRINTF(("%s: urndis_ctrl_handle_init: len %u rid %u status %#x "
            "ver_major %u ver_minor %u devflags %#x medium %#x pktmaxcnt %u "
            "pktmaxsz %u align %u aflistoffset %u aflistsz %u\n",
            DEVNAME(un),
            le32toh(msg->rm_len),
            le32toh(msg->rm_rid),
            le32toh(msg->rm_status),
            le32toh(msg->rm_ver_major),
            le32toh(msg->rm_ver_minor),
            le32toh(msg->rm_devflags),
            le32toh(msg->rm_medium),
            le32toh(msg->rm_pktmaxcnt),
            le32toh(msg->rm_pktmaxsz),
            le32toh(msg->rm_align),
            le32toh(msg->rm_aflistoffset),
            le32toh(msg->rm_aflistsz)));

        if (le32toh(msg->rm_status) != RNDIS_STATUS_SUCCESS) {
                printf("%s: init failed %#x\n",
                    DEVNAME(un),
                    le32toh(msg->rm_status));

                return le32toh(msg->rm_status);
        }

        if (le32toh(msg->rm_devflags) != RNDIS_DF_CONNECTIONLESS) {
                printf("%s: wrong device type (current type: %#x)\n",
                    DEVNAME(un),
                    le32toh(msg->rm_devflags));

                return RNDIS_STATUS_FAILURE;
        }

        if (le32toh(msg->rm_medium) != RNDIS_MEDIUM_802_3) {
                printf("%s: medium not 802.3 (current medium: %#x)\n",
                    DEVNAME(un), le32toh(msg->rm_medium));

                return RNDIS_STATUS_FAILURE;
        }

        if (le32toh(msg->rm_ver_major) != RNDIS_MAJOR_VERSION ||
            le32toh(msg->rm_ver_minor) != RNDIS_MINOR_VERSION) {
                printf("%s: version not %u.%u (current version: %u.%u)\n",
                    DEVNAME(un), RNDIS_MAJOR_VERSION, RNDIS_MINOR_VERSION,
                    le32toh(msg->rm_ver_major), le32toh(msg->rm_ver_minor));

                return RNDIS_STATUS_FAILURE;
        }

        sc->sc_maxppt = le32toh(msg->rm_pktmaxcnt);
        sc->sc_maxtsz = le32toh(msg->rm_pktmaxsz);
        sc->sc_palign = 1U << le32toh(msg->rm_align);

        return le32toh(msg->rm_status);
}

static uint32_t
urndis_ctrl_handle_query(struct usbnet *un,
    const struct rndis_comp_hdr *hdr, void **buf, size_t *bufsz)
{
        const struct rndis_query_comp        *msg;

        msg = (const struct rndis_query_comp *) hdr;

        DPRINTF(("%s: urndis_ctrl_handle_query: len %u rid %u status %#x "
            "buflen %u bufoff %u\n",
            DEVNAME(un),
            le32toh(msg->rm_len),
            le32toh(msg->rm_rid),
            le32toh(msg->rm_status),
            le32toh(msg->rm_infobuflen),
            le32toh(msg->rm_infobufoffset)));

        if (buf && bufsz) {
                *buf = NULL;
                *bufsz = 0;
        }

        if (le32toh(msg->rm_status) != RNDIS_STATUS_SUCCESS) {
                printf("%s: query failed %#x\n",
                    DEVNAME(un),
                    le32toh(msg->rm_status));

                return le32toh(msg->rm_status);
        }

        if (le32toh(msg->rm_infobuflen) + le32toh(msg->rm_infobufoffset) +
            RNDIS_HEADER_OFFSET > le32toh(msg->rm_len)) {
                printf("%s: ctrl message error: invalid query info "
                    "len/offset/end_position(%u/%u/%u) -> "
                    "go out of buffer limit %u\n",
                    DEVNAME(un),
                    le32toh(msg->rm_infobuflen),
                    le32toh(msg->rm_infobufoffset),
                    le32toh(msg->rm_infobuflen) +
                    le32toh(msg->rm_infobufoffset) + (uint32_t)RNDIS_HEADER_OFFSET,
                    le32toh(msg->rm_len));
                return RNDIS_STATUS_FAILURE;
        }

        if (buf && bufsz) {
                const char *p;

                *buf = kmem_alloc(le32toh(msg->rm_infobuflen), KM_SLEEP);
                *bufsz = le32toh(msg->rm_infobuflen);

                p = (const char *)&msg->rm_rid;
                p += le32toh(msg->rm_infobufoffset);
                memcpy(*buf, p, le32toh(msg->rm_infobuflen));
        }

        return le32toh(msg->rm_status);
}

static uint32_t
urndis_ctrl_handle_reset(struct usbnet *un, const struct rndis_comp_hdr *hdr)
{
        struct urndis_softc                *sc = usbnet_softc(un);
        const struct rndis_reset_comp        *msg;
        uint32_t                         rval;

        msg = (const struct rndis_reset_comp *) hdr;

        rval = le32toh(msg->rm_status);

        DPRINTF(("%s: urndis_ctrl_handle_reset: len %u status %#x "
            "adrreset %u\n",
            DEVNAME(un),
            le32toh(msg->rm_len),
            rval,
            le32toh(msg->rm_adrreset)));

        if (rval != RNDIS_STATUS_SUCCESS) {
                printf("%s: reset failed %#x\n", DEVNAME(un), rval);
                return rval;
        }

        if (le32toh(msg->rm_adrreset) != 0) {
                uint32_t filter;

                filter = htole32(sc->sc_filter);
                rval = urndis_ctrl_set(un, OID_GEN_CURRENT_PACKET_FILTER,
                    &filter, sizeof(filter));
                if (rval != RNDIS_STATUS_SUCCESS) {
                        printf("%s: unable to reset data filters\n",
                            DEVNAME(un));
                        return rval;
                }
        }

        return rval;
}

static uint32_t
urndis_ctrl_handle_status(struct usbnet *un,
    const struct rndis_comp_hdr *hdr)
{
        const struct rndis_status_msg        *msg;
        uint32_t                        rval;

        msg = (const struct rndis_status_msg *)hdr;

        rval = le32toh(msg->rm_status);

        DPRINTF(("%s: urndis_ctrl_handle_status: len %u status %#x "
            "stbuflen %u\n",
            DEVNAME(un),
            le32toh(msg->rm_len),
            rval,
            le32toh(msg->rm_stbuflen)));

        switch (rval) {
                case RNDIS_STATUS_MEDIA_CONNECT:
                case RNDIS_STATUS_MEDIA_DISCONNECT:
                case RNDIS_STATUS_OFFLOAD_CURRENT_CONFIG:
                        rval = RNDIS_STATUS_SUCCESS;
                        break;

                default:
                        printf("%s: status %#x\n", DEVNAME(un), rval);
        }

        return rval;
}

static uint32_t
urndis_ctrl_init(struct usbnet *un)
{
        struct rndis_init_req        *msg;
        uint32_t                 rval;
        struct rndis_comp_hdr        *hdr;

        msg = kmem_alloc(sizeof(*msg), KM_SLEEP);
        msg->rm_type = htole32(REMOTE_NDIS_INITIALIZE_MSG);
        msg->rm_len = htole32(sizeof(*msg));
        msg->rm_rid = htole32(0);
        msg->rm_ver_major = htole32(RNDIS_MAJOR_VERSION);
        msg->rm_ver_minor = htole32(RNDIS_MINOR_VERSION);
        msg->rm_max_xfersz = htole32(RNDIS_BUFSZ);

        DPRINTF(("%s: urndis_ctrl_init send: type %u len %u rid %u ver_major %u "
            "ver_minor %u max_xfersz %u\n",
            DEVNAME(un),
            le32toh(msg->rm_type),
            le32toh(msg->rm_len),
            le32toh(msg->rm_rid),
            le32toh(msg->rm_ver_major),
            le32toh(msg->rm_ver_minor),
            le32toh(msg->rm_max_xfersz)));

        rval = urndis_ctrl_send(un, msg, sizeof(*msg));
        kmem_free(msg, sizeof(*msg));

        if (rval != RNDIS_STATUS_SUCCESS) {
                printf("%s: init failed\n", DEVNAME(un));
                return rval;
        }

        if ((hdr = urndis_ctrl_recv(un)) == NULL) {
                printf("%s: unable to get init response\n", DEVNAME(un));
                return RNDIS_STATUS_FAILURE;
        }
        rval = urndis_ctrl_handle(un, hdr, NULL, NULL);

        return rval;
}

#if 0
static uint32_t
urndis_ctrl_halt(struct usbnet *un)
{
        struct rndis_halt_req        *msg;
        uint32_t                 rval;

        msg = kmem_alloc(sizeof(*msg), KM_SLEEP);
        msg->rm_type = htole32(REMOTE_NDIS_HALT_MSG);
        msg->rm_len = htole32(sizeof(*msg));
        msg->rm_rid = 0;

        DPRINTF(("%s: urndis_ctrl_halt send: type %u len %u rid %u\n",
            DEVNAME(un),
            le32toh(msg->rm_type),
            le32toh(msg->rm_len),
            le32toh(msg->rm_rid)));

        rval = urndis_ctrl_send(un, msg, sizeof(*msg));
        kmem_free(msg, sizeof(*msg));

        if (rval != RNDIS_STATUS_SUCCESS)
                printf("%s: halt failed\n", DEVNAME(un));

        return rval;
}
#endif

static uint32_t
urndis_ctrl_query(struct usbnet *un, uint32_t oid,
    void *qbuf, size_t qlen,
    void **rbuf, size_t *rbufsz)
{
        struct rndis_query_req        *msg;
        uint32_t                 rval;
        struct rndis_comp_hdr        *hdr;

        msg = kmem_alloc(sizeof(*msg) + qlen, KM_SLEEP);
        msg->rm_type = htole32(REMOTE_NDIS_QUERY_MSG);
        msg->rm_len = htole32(sizeof(*msg) + qlen);
        msg->rm_rid = 0; /* XXX */
        msg->rm_oid = htole32(oid);
        msg->rm_infobuflen = htole32(qlen);
        if (qlen != 0) {
                msg->rm_infobufoffset = htole32(20);
                memcpy((char*)msg + 20, qbuf, qlen);
        } else
                msg->rm_infobufoffset = 0;
        msg->rm_devicevchdl = 0;

        DPRINTF(("%s: urndis_ctrl_query send: type %u len %u rid %u oid %#x "
            "infobuflen %u infobufoffset %u devicevchdl %u\n",
            DEVNAME(un),
            le32toh(msg->rm_type),
            le32toh(msg->rm_len),
            le32toh(msg->rm_rid),
            le32toh(msg->rm_oid),
            le32toh(msg->rm_infobuflen),
            le32toh(msg->rm_infobufoffset),
            le32toh(msg->rm_devicevchdl)));

        rval = urndis_ctrl_send(un, msg, sizeof(*msg));
        kmem_free(msg, sizeof(*msg) + qlen);

        if (rval != RNDIS_STATUS_SUCCESS) {
                printf("%s: query failed\n", DEVNAME(un));
                return rval;
        }

        if ((hdr = urndis_ctrl_recv(un)) == NULL) {
                printf("%s: unable to get query response\n", DEVNAME(un));
                return RNDIS_STATUS_FAILURE;
        }
        rval = urndis_ctrl_handle(un, hdr, rbuf, rbufsz);

        return rval;
}

static uint32_t
urndis_ctrl_set(struct usbnet *un, uint32_t oid, void *buf, size_t len)
{
        struct rndis_set_req        *msg;
        uint32_t                 rval;
        struct rndis_comp_hdr        *hdr;

        msg = kmem_alloc(sizeof(*msg) + len, KM_SLEEP);
        msg->rm_type = htole32(REMOTE_NDIS_SET_MSG);
        msg->rm_len = htole32(sizeof(*msg) + len);
        msg->rm_rid = 0; /* XXX */
        msg->rm_oid = htole32(oid);
        msg->rm_infobuflen = htole32(len);
        if (len != 0) {
                msg->rm_infobufoffset = htole32(20);
                memcpy((char*)msg + 20, buf, len);
        } else
                msg->rm_infobufoffset = 0;
        msg->rm_devicevchdl = 0;

        DPRINTF(("%s: urndis_ctrl_set send: type %u len %u rid %u oid %#x "
            "infobuflen %u infobufoffset %u devicevchdl %u\n",
            DEVNAME(un),
            le32toh(msg->rm_type),
            le32toh(msg->rm_len),
            le32toh(msg->rm_rid),
            le32toh(msg->rm_oid),
            le32toh(msg->rm_infobuflen),
            le32toh(msg->rm_infobufoffset),
            le32toh(msg->rm_devicevchdl)));

        rval = urndis_ctrl_send(un, msg, sizeof(*msg));
        kmem_free(msg, sizeof(*msg) + len);

        if (rval != RNDIS_STATUS_SUCCESS) {
                printf("%s: set failed\n", DEVNAME(un));
                return rval;
        }

        if ((hdr = urndis_ctrl_recv(un)) == NULL) {
                printf("%s: unable to get set response\n", DEVNAME(un));
                return RNDIS_STATUS_FAILURE;
        }
        rval = urndis_ctrl_handle(un, hdr, NULL, NULL);
        if (rval != RNDIS_STATUS_SUCCESS)
                printf("%s: set failed %#x\n", DEVNAME(un), rval);

        return rval;
}

#if 0
static uint32_t
urndis_ctrl_set_param(struct urndis_softc *un,
    const char *name,
    uint32_t type,
    void *buf,
    size_t len)
{
        struct rndis_set_parameter        *param;
        uint32_t                         rval;
        size_t                                 namelen, tlen;

        if (name)
                namelen = strlen(name);
        else
                namelen = 0;
        tlen = sizeof(*param) + len + namelen;
        param = kmem_alloc(tlen, KM_SLEEP);
        param->rm_namelen = htole32(namelen);
        param->rm_valuelen = htole32(len);
        param->rm_type = htole32(type);
        if (namelen != 0) {
                param->rm_nameoffset = htole32(20);
                memcpy(param + 20, name, namelen);
        } else
                param->rm_nameoffset = 0;
        if (len != 0) {
                param->rm_valueoffset = htole32(20 + namelen);
                memcpy(param + 20 + namelen, buf, len);
        } else
                param->rm_valueoffset = 0;

        DPRINTF(("%s: urndis_ctrl_set_param send: nameoffset %u namelen %u "
            "type %#x valueoffset %u valuelen %u\n",
            DEVNAME(un),
            le32toh(param->rm_nameoffset),
            le32toh(param->rm_namelen),
            le32toh(param->rm_type),
            le32toh(param->rm_valueoffset),
            le32toh(param->rm_valuelen)));

        rval = urndis_ctrl_set(un, OID_GEN_RNDIS_CONFIG_PARAMETER, param, tlen);
        kmem_free(param, tlen);
        if (rval != RNDIS_STATUS_SUCCESS)
                printf("%s: set param failed %#x\n", DEVNAME(un), rval);

        return rval;
}

/* XXX : adrreset, get it from response */
static uint32_t
urndis_ctrl_reset(struct usbnet *un)
{
        struct rndis_reset_req                *reset;
        uint32_t                         rval;
        struct rndis_comp_hdr                *hdr;

        reset = kmem_alloc(sizeof(*reset), KM_SLEEP);
        reset->rm_type = htole32(REMOTE_NDIS_RESET_MSG);
        reset->rm_len = htole32(sizeof(*reset));
        reset->rm_rid = 0; /* XXX rm_rid == reserved ... remove ? */

        DPRINTF(("%s: urndis_ctrl_reset send: type %u len %u rid %u\n",
            DEVNAME(un),
            le32toh(reset->rm_type),
            le32toh(reset->rm_len),
            le32toh(reset->rm_rid)));

        rval = urndis_ctrl_send(un, reset, sizeof(*reset));
        kmem_free(reset, sizeof(*reset));

        if (rval != RNDIS_STATUS_SUCCESS) {
                printf("%s: reset failed\n", DEVNAME(un));
                return rval;
        }

        if ((hdr = urndis_ctrl_recv(un)) == NULL) {
                printf("%s: unable to get reset response\n", DEVNAME(un));
                return RNDIS_STATUS_FAILURE;
        }
        rval = urndis_ctrl_handle(un, hdr, NULL, NULL);

        return rval;
}

static uint32_t
urndis_ctrl_keepalive(struct usbnet *un)
{
        struct rndis_keepalive_req        *keep;
        uint32_t                         rval;
        struct rndis_comp_hdr                *hdr;

        keep = kmem_alloc(sizeof(*keep), KM_SLEEP);
        keep->rm_type = htole32(REMOTE_NDIS_KEEPALIVE_MSG);
        keep->rm_len = htole32(sizeof(*keep));
        keep->rm_rid = 0; /* XXX rm_rid == reserved ... remove ? */

        DPRINTF(("%s: urndis_ctrl_keepalive: type %u len %u rid %u\n",
            DEVNAME(un),
            le32toh(keep->rm_type),
            le32toh(keep->rm_len),
            le32toh(keep->rm_rid)));

        rval = urndis_ctrl_send(un, keep, sizeof(*keep));
        kmem_free(keep, sizeof(*keep));

        if (rval != RNDIS_STATUS_SUCCESS) {
                printf("%s: keepalive failed\n", DEVNAME(un));
                return rval;
        }

        if ((hdr = urndis_ctrl_recv(un)) == NULL) {
                printf("%s: unable to get keepalive response\n", DEVNAME(un));
                return RNDIS_STATUS_FAILURE;
        }
        rval = urndis_ctrl_handle(un, hdr, NULL, NULL);
        if (rval != RNDIS_STATUS_SUCCESS) {
                printf("%s: keepalive failed %#x\n", DEVNAME(un), rval);
                urndis_ctrl_reset(un);
        }

        return rval;
}
#endif

static unsigned
urndis_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c)
{
        struct rndis_packet_msg                *msg;

        if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz - sizeof(*msg))
                return 0;

        msg = (struct rndis_packet_msg *)c->unc_buf;

        memset(msg, 0, sizeof(*msg));
        msg->rm_type = htole32(REMOTE_NDIS_PACKET_MSG);
        msg->rm_len = htole32(sizeof(*msg) + m->m_pkthdr.len);

        msg->rm_dataoffset = htole32(RNDIS_DATA_OFFSET);
        msg->rm_datalen = htole32(m->m_pkthdr.len);

        m_copydata(m, 0, m->m_pkthdr.len,
            ((char*)msg + RNDIS_DATA_OFFSET + RNDIS_HEADER_OFFSET));

        DPRINTF(("%s: %s type %#x len %u data(off %u len %u)\n",
            __func__,
            DEVNAME(un),
            le32toh(msg->rm_type),
            le32toh(msg->rm_len),
            le32toh(msg->rm_dataoffset),
            le32toh(msg->rm_datalen)));

        return le32toh(msg->rm_len);
}

static void
urndis_uno_rx_loop(struct usbnet * un, struct usbnet_chain *c,
                   uint32_t total_len)
{
        struct rndis_packet_msg        *msg;
        struct ifnet                *ifp = usbnet_ifp(un);
        int                         offset;

        offset = 0;

        while (total_len > 1) {
                msg = (struct rndis_packet_msg *)((char*)c->unc_buf + offset);

                DPRINTF(("%s: %s buffer size left %u\n", DEVNAME(un), __func__,
                    total_len));

                if (total_len < sizeof(*msg)) {
                        printf("%s: urndis_decap invalid buffer total_len %u < "
                            "minimum header %zu\n",
                            DEVNAME(un),
                            total_len,
                            sizeof(*msg));
                        return;
                }

                DPRINTF(("%s: urndis_decap total_len %u data(off:%u len:%u) "
                    "oobdata(off:%u len:%u nb:%u) perpacket(off:%u len:%u)\n",
                    DEVNAME(un),
                    le32toh(msg->rm_len),
                    le32toh(msg->rm_dataoffset),
                    le32toh(msg->rm_datalen),
                    le32toh(msg->rm_oobdataoffset),
                    le32toh(msg->rm_oobdatalen),
                    le32toh(msg->rm_oobdataelements),
                    le32toh(msg->rm_pktinfooffset),
                    le32toh(msg->rm_pktinfooffset)));

                if (le32toh(msg->rm_type) != REMOTE_NDIS_PACKET_MSG) {
                        printf("%s: urndis_decap invalid type %#x != %#x\n",
                            DEVNAME(un),
                            le32toh(msg->rm_type),
                            REMOTE_NDIS_PACKET_MSG);
                        return;
                }
                if (le32toh(msg->rm_len) < sizeof(*msg)) {
                        printf("%s: urndis_decap invalid msg len %u < %zu\n",
                            DEVNAME(un),
                            le32toh(msg->rm_len),
                            sizeof(*msg));
                        return;
                }
                if (le32toh(msg->rm_len) > total_len) {
                        printf("%s: urndis_decap invalid msg len %u > buffer "
                            "total_len %u\n",
                            DEVNAME(un),
                            le32toh(msg->rm_len),
                            total_len);
                        return;
                }

                if (le32toh(msg->rm_dataoffset) +
                    le32toh(msg->rm_datalen) + RNDIS_HEADER_OFFSET
                        > le32toh(msg->rm_len)) {
                        printf("%s: urndis_decap invalid data "
                            "len/offset/end_position(%u/%u/%u) -> "
                            "go out of receive buffer limit %u\n",
                            DEVNAME(un),
                            le32toh(msg->rm_datalen),
                            le32toh(msg->rm_dataoffset),
                            le32toh(msg->rm_dataoffset) +
                            le32toh(msg->rm_datalen) + (uint32_t)RNDIS_HEADER_OFFSET,
                            le32toh(msg->rm_len));
                        return;
                }

                if (le32toh(msg->rm_datalen) < sizeof(struct ether_header)) {
                        if_statinc(ifp, if_ierrors);
                        printf("%s: urndis_decap invalid ethernet size "
                            "%d < %zu\n",
                            DEVNAME(un),
                            le32toh(msg->rm_datalen),
                            sizeof(struct ether_header));
                        return;
                }

                usbnet_enqueue(un,
                    ((char*)&msg->rm_dataoffset + le32toh(msg->rm_dataoffset)),
                    le32toh(msg->rm_datalen), 0, 0, 0);

                offset += le32toh(msg->rm_len);
                total_len -= le32toh(msg->rm_len);
        }
}

#if 0
static void
urndis_watchdog(struct ifnet *ifp)
{
        struct urndis_softc        *sc = usbnet_softc(un);

        if (un->un_dying)
                return;

        if_statinc(ifp, if_oerrors);
        printf("%s: watchdog timeout\n", DEVNAME(un));

        urndis_ctrl_keepalive(un);
}
#endif

static int
urndis_uno_init(struct ifnet *ifp)
{
        struct usbnet *un = ifp->if_softc;

        KASSERT(IFNET_LOCKED(ifp));

        if (urndis_ctrl_init(un) != RNDIS_STATUS_SUCCESS)
                return EIO;

        return 0;
}

static int
urndis_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg                *uiaa = aux;
        usb_interface_descriptor_t        *id;

        if (!uiaa->uiaa_iface)
                return UMATCH_NONE;

        id = usbd_get_interface_descriptor(uiaa->uiaa_iface);
        if (id == NULL)
                return UMATCH_NONE;

        if (id->bInterfaceClass == UICLASS_WIRELESS &&
            id->bInterfaceSubClass == UISUBCLASS_RF &&
            id->bInterfaceProtocol == UIPROTO_RNDIS)
                return UMATCH_IFACECLASS_IFACESUBCLASS_IFACEPROTO;

        return usb_lookup(urndis_devs, uiaa->uiaa_vendor, uiaa->uiaa_product) != NULL ?
            UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
urndis_attach(device_t parent, device_t self, void *aux)
{
        struct urndis_softc                *sc = device_private(self);
        struct usbnet * const                 un = &sc->sc_un;
        struct usbif_attach_arg                *uiaa = aux;
        struct usbd_device                *dev = uiaa->uiaa_device;
        usb_interface_descriptor_t        *id;
        usb_endpoint_descriptor_t        *ed;
        usb_config_descriptor_t                *cd;
        struct usbd_interface                *iface_ctl;
        const usb_cdc_union_descriptor_t *ud;
        const usb_cdc_header_descriptor_t *desc;
        usbd_desc_iter_t                 iter;
        int                                 if_ctl, if_data;
        int                                 i, j, altcnt;
        void                                *buf;
        size_t                                 bufsz;
        uint32_t                         filter;
        char                                *devinfop;

        KASSERT((void *)sc == un);

        aprint_naive("\n");
        aprint_normal("\n");
        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        un->un_dev = self;
        un->un_udev = dev;
        un->un_sc = sc;
        un->un_ops = &urndis_ops;
        un->un_rx_xfer_flags = USBD_SHORT_XFER_OK;
        un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER;
        un->un_rx_list_cnt = RNDIS_RX_LIST_CNT;
        un->un_tx_list_cnt = RNDIS_TX_LIST_CNT;
        un->un_rx_bufsz = RNDIS_BUFSZ;
        un->un_tx_bufsz = RNDIS_BUFSZ;

        iface_ctl = uiaa->uiaa_iface;
        un->un_iface = uiaa->uiaa_iface;
        id = usbd_get_interface_descriptor(iface_ctl);
        if_ctl = id->bInterfaceNumber;
        sc->sc_ifaceno_ctl = if_ctl;
        if_data = -1;

        usb_desc_iter_init(un->un_udev, &iter);
        while ((desc = (const void *)usb_desc_iter_next(&iter)) != NULL) {

                if (desc->bDescriptorType != UDESC_CS_INTERFACE) {
                        continue;
                }
                switch (desc->bDescriptorSubtype) {
                case UDESCSUB_CDC_UNION:
                        /* XXX bail out when found first? */
                        ud = (const usb_cdc_union_descriptor_t *)desc;
                        if (if_data == -1)
                                if_data = ud->bSlaveInterface[0];
                        break;
                }
        }

        if (if_data == -1) {
                DPRINTF(("urndis_attach: no union interface\n"));
                un->un_iface = iface_ctl;
        } else {
                DPRINTF(("urndis_attach: union interface: ctl %u, data %u\n",
                    if_ctl, if_data));
                for (i = 0; i < uiaa->uiaa_nifaces; i++) {
                        if (uiaa->uiaa_ifaces[i] != NULL) {
                                id = usbd_get_interface_descriptor(
                                    uiaa->uiaa_ifaces[i]);
                                if (id != NULL && id->bInterfaceNumber ==
                                    if_data) {
                                        un->un_iface = uiaa->uiaa_ifaces[i];
                                        uiaa->uiaa_ifaces[i] = NULL;
                                }
                        }
                }
        }

        if (un->un_iface == NULL) {
                aprint_error("%s: no data interface\n", DEVNAME(un));
                return;
        }

        id = usbd_get_interface_descriptor(un->un_iface);
        cd = usbd_get_config_descriptor(un->un_udev);
        altcnt = usbd_get_no_alts(cd, id->bInterfaceNumber);

        for (j = 0; j < altcnt; j++) {
                if (usbd_set_interface(un->un_iface, j)) {
                        aprint_error("%s: interface alternate setting %u "
                            "failed\n", DEVNAME(un), j);
                        return;
                }
                /* Find endpoints. */
                id = usbd_get_interface_descriptor(un->un_iface);
                un->un_ed[USBNET_ENDPT_RX] = un->un_ed[USBNET_ENDPT_TX] = 0;
                for (i = 0; i < id->bNumEndpoints; i++) {
                        ed = usbd_interface2endpoint_descriptor(
                            un->un_iface, i);
                        if (!ed) {
                                aprint_error("%s: no descriptor for bulk "
                                    "endpoint %u\n", DEVNAME(un), i);
                                return;
                        }
                        if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                            UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                                un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress;
                        }
                        else if (
                            UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                            UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                                un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress;
                        }
                }

                if (un->un_ed[USBNET_ENDPT_RX] != 0 && un->un_ed[USBNET_ENDPT_TX] != 0) {
                        DPRINTF(("%s: in=%#x, out=%#x\n",
                            DEVNAME(un),
                            un->un_ed[USBNET_ENDPT_RX],
                            un->un_ed[USBNET_ENDPT_TX]));
                        break;
                }
        }

        if (un->un_ed[USBNET_ENDPT_RX] == 0)
                aprint_error("%s: could not find data bulk in\n", DEVNAME(un));
        if (un->un_ed[USBNET_ENDPT_TX] == 0)
                aprint_error("%s: could not find data bulk out\n",DEVNAME(un));
        if (un->un_ed[USBNET_ENDPT_RX] == 0 || un->un_ed[USBNET_ENDPT_TX] == 0)
                return;

#if 0
        ifp->if_watchdog = urndis_watchdog;
#endif

        usbnet_attach(un);

        if (urndis_ctrl_init(un) != RNDIS_STATUS_SUCCESS) {
                aprint_error("%s: unable to initialize hardware\n",
                    DEVNAME(un));
                return;
        }

        if (urndis_ctrl_query(un, OID_802_3_PERMANENT_ADDRESS, NULL, 0,
            &buf, &bufsz) != RNDIS_STATUS_SUCCESS) {
                aprint_error("%s: unable to get hardware address\n",
                    DEVNAME(un));
                return;
        }

        if (bufsz == ETHER_ADDR_LEN) {
                memcpy(un->un_eaddr, buf, ETHER_ADDR_LEN);
                kmem_free(buf, bufsz);
        } else {
                aprint_error("%s: invalid address\n", DEVNAME(un));
                if (buf && bufsz)
                        kmem_free(buf, bufsz);
                return;
        }

        /* Initialize packet filter */
        sc->sc_filter = RNDIS_PACKET_TYPE_BROADCAST;
        sc->sc_filter |= RNDIS_PACKET_TYPE_ALL_MULTICAST;
        filter = htole32(sc->sc_filter);
        if (urndis_ctrl_set(un, OID_GEN_CURRENT_PACKET_FILTER, &filter,
            sizeof(filter)) != RNDIS_STATUS_SUCCESS) {
                aprint_error("%s: unable to set data filters\n", DEVNAME(un));
                return;
        }

        usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST,
            0, NULL);
}

#ifdef _MODULE
#include "ioconf.c"
#endif

USBNET_MODULE(urndis)




























































































































































































































































































    3 









    3 

    3 


































































































































































































































































































































































































































































































































































   38 




   38 



   36 

























































































































































































  166 

  166 

  166 

  166 






  166 






  166 
  166 





  166 





  166 

  166 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
/*        $NetBSD: uhub.c,v 1.161 2022/04/06 22:01:45 mlelstv Exp $        */
/*        $FreeBSD: src/sys/dev/usb/uhub.c,v 1.18 1999/11/17 22:33:43 n_hibma Exp $        */
/*        $OpenBSD: uhub.c,v 1.86 2015/06/29 18:27:40 mpi Exp $ */

/*
 * Copyright (c) 1998, 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * USB spec: http://www.usb.org/developers/docs/usbspec.zip
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uhub.c,v 1.161 2022/04/06 22:01:45 mlelstv Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>

#include <sys/bus.h>
#include <sys/device.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/kcov.h>
#include <sys/sdt.h>

#include <dev/usb/usb.h>
#include <dev/usb/usb_sdt.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbhist.h>

SDT_PROBE_DEFINE1(usb, hub, explore, start,
    "struct usbd_device *"/*hub*/);
SDT_PROBE_DEFINE1(usb, hub, explore, done,
    "struct usbd_device *"/*hub*/);

SDT_PROBE_DEFINE3(usb, hub, explore, rescan,
    "struct usbd_device *"/*hub*/,
    "int"/*portno*/,
    "struct usbd_port *"/*port*/);
SDT_PROBE_DEFINE5(usb, hub, explore, portstat,
    "struct usbd_device *"/*hub*/,
    "int"/*portno*/,
    "int"/*status*/,
    "int"/*change*/,
    "int"/*reattach*/);
SDT_PROBE_DEFINE3(usb, hub, explore, disconnect,
    "struct usbd_device *"/*hub*/,
    "int"/*portno*/,
    "struct usbd_port *"/*port*/);
SDT_PROBE_DEFINE5(usb, hub, explore, reset,
    "struct usbd_device *"/*hub*/,
    "int"/*portno*/,
    "struct usbd_port *"/*port*/,
    "int"/*status*/,
    "int"/*change*/);
SDT_PROBE_DEFINE4(usb, hub, explore, connect,
    "struct usbd_device *"/*hub*/,
    "int"/*portno*/,
    "struct usbd_port *"/*port*/,
    "int"/*speed*/);
SDT_PROBE_DEFINE4(usb, hub, explore, connected,
    "struct usbd_device *"/*hub*/,
    "int"/*portno*/,
    "struct usbd_port *"/*port*/,
    "int"/*speed*/);

SDT_PROBE_DEFINE2(usb, hub, interrupt, ,
    "struct usbd_device *"/*hub*/,
    "usbd_status"/*status*/);

#ifdef USB_DEBUG
#ifndef UHUB_DEBUG
#define uhubdebug 0
#else
static int uhubdebug = 0;

SYSCTL_SETUP(sysctl_hw_uhub_setup, "sysctl hw.uhub setup")
{
        int err;
        const struct sysctlnode *rnode;
        const struct sysctlnode *cnode;

        err = sysctl_createv(clog, 0, NULL, &rnode,
            CTLFLAG_PERMANENT, CTLTYPE_NODE, "uhub",
            SYSCTL_DESCR("uhub global controls"),
            NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);

        if (err)
                goto fail;

        /* control debugging printfs */
        err = sysctl_createv(clog, 0, &rnode, &cnode,
            CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
            "debug", SYSCTL_DESCR("Enable debugging output"),
            NULL, 0, &uhubdebug, sizeof(uhubdebug), CTL_CREATE, CTL_EOL);
        if (err)
                goto fail;

        return;
fail:
        aprint_error("%s: sysctl_createv failed (err = %d)\n", __func__, err);
}

#endif /* UHUB_DEBUG */
#endif /* USB_DEBUG */

#define DPRINTF(FMT,A,B,C,D)        USBHIST_LOGN(uhubdebug,1,FMT,A,B,C,D)
#define DPRINTFN(N,FMT,A,B,C,D)        USBHIST_LOGN(uhubdebug,N,FMT,A,B,C,D)
#define UHUBHIST_FUNC()                USBHIST_FUNC()
#define UHUBHIST_CALLED(name)        USBHIST_CALLED(uhubdebug)
#define UHUBHIST_CALLARGS(FMT,A,B,C,D) \
                                USBHIST_CALLARGS(uhubdebug,FMT,A,B,C,D)

struct uhub_softc {
        device_t                 sc_dev;        /* base device */
        struct usbd_device        *sc_hub;        /* USB device */
        int                         sc_proto;        /* device protocol */
        struct usbd_pipe        *sc_ipipe;        /* interrupt pipe */

        kmutex_t                 sc_lock;
        kcondvar_t                 sc_cv;

        uint8_t                        *sc_statusbuf;
        uint8_t                        *sc_statuspend;
        uint8_t                        *sc_status;
        size_t                         sc_statuslen;
        bool                         sc_explorepending;
        bool                         sc_first_explore;
        bool                         sc_running;
        bool                         sc_rescan;

        struct lwp                *sc_exploring;
};

#define UHUB_IS_HIGH_SPEED(sc) \
    ((sc)->sc_proto == UDPROTO_HSHUBSTT || (sc)->sc_proto == UDPROTO_HSHUBMTT)
#define UHUB_IS_SINGLE_TT(sc) ((sc)->sc_proto == UDPROTO_HSHUBSTT)

#define PORTSTAT_ISSET(sc, port) \
        ((sc)->sc_status[(port) / 8] & (1 << ((port) % 8)))

Static usbd_status uhub_explore(struct usbd_device *);
Static void uhub_intr(struct usbd_xfer *, void *, usbd_status);


/*
 * We need two attachment points:
 * hub to usb and hub to hub
 * Every other driver only connects to hubs
 */

static int uhub_match(device_t, cfdata_t, void *);
static void uhub_attach(device_t, device_t, void *);
static int uhub_rescan(device_t, const char *, const int *);
static void uhub_childdet(device_t, device_t);
static int uhub_detach(device_t, int);

CFATTACH_DECL3_NEW(uhub, sizeof(struct uhub_softc), uhub_match,
    uhub_attach, uhub_detach, NULL, uhub_rescan, uhub_childdet,
    DVF_DETACH_SHUTDOWN);
CFATTACH_DECL3_NEW(uroothub, sizeof(struct uhub_softc), uhub_match,
    uhub_attach, uhub_detach, NULL, uhub_rescan, uhub_childdet,
    DVF_DETACH_SHUTDOWN);

/*
 * Setting this to 1 makes sure than an uhub attaches even at higher
 * priority than ugen when ugen_override is set to 1.  This allows to
 * probe the whole USB bus and attach functions with ugen.
 */
int uhub_ubermatch = 0;

static usbd_status
usbd_get_hub_desc(struct usbd_device *dev, usb_hub_descriptor_t *hd, int speed)
{
        usb_device_request_t req;
        usbd_status err;
        int nports;

        UHUBHIST_FUNC(); UHUBHIST_CALLED();

        /* don't issue UDESC_HUB to SS hub, or it would stall */
        if (dev->ud_depth != 0 && USB_IS_SS(dev->ud_speed)) {
                usb_hub_ss_descriptor_t hssd;
                int rmvlen;

                memset(&hssd, 0, sizeof(hssd));
                req.bmRequestType = UT_READ_CLASS_DEVICE;
                req.bRequest = UR_GET_DESCRIPTOR;
                USETW2(req.wValue, UDESC_SS_HUB, 0);
                USETW(req.wIndex, 0);
                USETW(req.wLength, USB_HUB_SS_DESCRIPTOR_SIZE);
                DPRINTFN(1, "getting sshub descriptor", 0, 0, 0, 0);
                err = usbd_do_request(dev, &req, &hssd);
                nports = hssd.bNbrPorts;
                if (dev->ud_depth != 0 && nports > UHD_SS_NPORTS_MAX) {
                        DPRINTF("num of ports %jd exceeds maxports %jd",
                            nports, UHD_SS_NPORTS_MAX, 0, 0);
                        nports = hd->bNbrPorts = UHD_SS_NPORTS_MAX;
                }
                rmvlen = (nports + 7) / 8;
                hd->bDescLength = USB_HUB_DESCRIPTOR_SIZE +
                    (rmvlen > 1 ? rmvlen : 1) - 1;
                memcpy(hd->DeviceRemovable, hssd.DeviceRemovable, rmvlen);
                hd->bDescriptorType                = hssd.bDescriptorType;
                hd->bNbrPorts                        = hssd.bNbrPorts;
                hd->wHubCharacteristics[0]        = hssd.wHubCharacteristics[0];
                hd->wHubCharacteristics[1]        = hssd.wHubCharacteristics[1];
                hd->bPwrOn2PwrGood                = hssd.bPwrOn2PwrGood;
                hd->bHubContrCurrent                = hssd.bHubContrCurrent;
        } else {
                req.bmRequestType = UT_READ_CLASS_DEVICE;
                req.bRequest = UR_GET_DESCRIPTOR;
                USETW2(req.wValue, UDESC_HUB, 0);
                USETW(req.wIndex, 0);
                USETW(req.wLength, USB_HUB_DESCRIPTOR_SIZE);
                DPRINTFN(1, "getting hub descriptor", 0, 0, 0, 0);
                err = usbd_do_request(dev, &req, hd);
                nports = hd->bNbrPorts;
                if (!err && nports > 7) {
                        USETW(req.wLength,
                            USB_HUB_DESCRIPTOR_SIZE + (nports+1) / 8);
                        err = usbd_do_request(dev, &req, hd);
                }
        }

        return err;
}

static usbd_status
usbd_set_hub_depth(struct usbd_device *dev, int depth)
{
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_CLASS_DEVICE;
        req.bRequest = UR_SET_HUB_DEPTH;
        USETW(req.wValue, depth);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);
        return usbd_do_request(dev, &req, 0);
}

static int
uhub_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;
        int matchvalue;

        UHUBHIST_FUNC(); UHUBHIST_CALLED();

        if (uhub_ubermatch)
                matchvalue = UMATCH_HIGHEST+1;
        else
                matchvalue = UMATCH_DEVCLASS_DEVSUBCLASS;

        DPRINTFN(5, "uaa=%#jx", (uintptr_t)uaa, 0, 0, 0);
        /*
         * The subclass for hubs seems to be 0 for some and 1 for others,
         * so we just ignore the subclass.
         */
        if (uaa->uaa_class == UDCLASS_HUB)
                return matchvalue;
        return UMATCH_NONE;
}

static void
uhub_attach(device_t parent, device_t self, void *aux)
{
        struct uhub_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        char *devinfop;
        usbd_status err;
        struct usbd_hub *hub = NULL;
        usb_hub_descriptor_t hubdesc;
        int p, port, nports, nremov, pwrdly;
        struct usbd_interface *iface;
        usb_endpoint_descriptor_t *ed;
        struct usbd_tt *tts = NULL;

        UHUBHIST_FUNC(); UHUBHIST_CALLED();

        KASSERT(usb_in_event_thread(parent));

        config_pending_incr(self);

        sc->sc_dev = self;
        sc->sc_hub = dev;
        sc->sc_proto = uaa->uaa_proto;

        devinfop = usbd_devinfo_alloc(dev, 1);
        aprint_naive("\n");
        aprint_normal(": %s\n", devinfop);
        usbd_devinfo_free(devinfop);

        if (dev->ud_depth > 0 && UHUB_IS_HIGH_SPEED(sc)) {
                aprint_normal_dev(self, "%s transaction translator%s\n",
                       UHUB_IS_SINGLE_TT(sc) ? "single" : "multiple",
                       UHUB_IS_SINGLE_TT(sc) ? "" : "s");
        }

        err = usbd_set_config_index(dev, 0, 1);
        if (err) {
                DPRINTF("configuration failed, sc %#jx error %jd",
                    (uintptr_t)sc, err, 0, 0);
                goto bad2;
        }

        if (dev->ud_depth > USB_HUB_MAX_DEPTH) {
                aprint_error_dev(self,
                    "hub depth (%d) exceeded, hub ignored\n",
                    USB_HUB_MAX_DEPTH);
                goto bad2;
        }

        /* Get hub descriptor. */
        memset(&hubdesc, 0, sizeof(hubdesc));
        err = usbd_get_hub_desc(dev, &hubdesc, dev->ud_speed);
        nports = hubdesc.bNbrPorts;
        if (err) {
                DPRINTF("getting hub descriptor failed, uhub%jd error %jd",
                    device_unit(self), err, 0, 0);
                goto bad2;
        }

        for (nremov = 0, port = 1; port <= nports; port++)
                if (!UHD_NOT_REMOV(&hubdesc, port))
                        nremov++;
        aprint_verbose_dev(self, "%d port%s with %d removable, %s powered\n",
            nports, nports != 1 ? "s" : "", nremov,
            dev->ud_selfpowered ? "self" : "bus");

        if (nports == 0) {
                aprint_debug_dev(self, "no ports, hub ignored\n");
                goto bad;
        }

        hub = kmem_alloc(sizeof(*hub) + (nports-1) * sizeof(struct usbd_port),
            KM_SLEEP);
        dev->ud_hub = hub;
        dev->ud_hub->uh_hubsoftc = sc;
        hub->uh_explore = uhub_explore;
        hub->uh_hubdesc = hubdesc;

        if (USB_IS_SS(dev->ud_speed) && dev->ud_depth != 0) {
                aprint_debug_dev(self, "setting hub depth %u\n",
                    dev->ud_depth - 1);
                err = usbd_set_hub_depth(dev, dev->ud_depth - 1);
                if (err) {
                        aprint_error_dev(self, "can't set depth\n");
                        goto bad;
                }
        }

        /* Set up interrupt pipe. */
        err = usbd_device2interface_handle(dev, 0, &iface);
        if (err) {
                aprint_error_dev(self, "no interface handle\n");
                goto bad;
        }

        if (UHUB_IS_HIGH_SPEED(sc) && !UHUB_IS_SINGLE_TT(sc)) {
                err = usbd_set_interface(iface, 1);
                if (err)
                        aprint_error_dev(self, "can't enable multiple TTs\n");
        }

        ed = usbd_interface2endpoint_descriptor(iface, 0);
        if (ed == NULL) {
                aprint_error_dev(self, "no endpoint descriptor\n");
                goto bad;
        }
        if ((ed->bmAttributes & UE_XFERTYPE) != UE_INTERRUPT) {
                aprint_error_dev(self, "bad interrupt endpoint\n");
                goto bad;
        }

        sc->sc_statuslen = (nports + 1 + 7) / 8;
        sc->sc_statusbuf = kmem_alloc(sc->sc_statuslen, KM_SLEEP);
        sc->sc_statuspend = kmem_zalloc(sc->sc_statuslen, KM_SLEEP);
        sc->sc_status = kmem_alloc(sc->sc_statuslen, KM_SLEEP);
        mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_SOFTUSB);
        cv_init(&sc->sc_cv, "uhubex");

        /* force initial scan */
        memset(sc->sc_status, 0xff, sc->sc_statuslen);
        sc->sc_explorepending = true;

        err = usbd_open_pipe_intr(iface, ed->bEndpointAddress,
                  USBD_SHORT_XFER_OK|USBD_MPSAFE, &sc->sc_ipipe, sc,
                  sc->sc_statusbuf, sc->sc_statuslen,
                  uhub_intr, USBD_DEFAULT_INTERVAL);
        if (err) {
                aprint_error_dev(self, "cannot open interrupt pipe\n");
                goto bad;
        }

        /* Wait with power off for a while if we are not a root hub */
        if (dev->ud_powersrc->up_parent != NULL)
                usbd_delay_ms(dev, USB_POWER_DOWN_TIME);

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, dev, sc->sc_dev);

        /*
         * To have the best chance of success we do things in the exact same
         * order as Windows 98.  This should not be necessary, but some
         * devices do not follow the USB specs to the letter.
         *
         * These are the events on the bus when a hub is attached:
         *  Get device and config descriptors (see attach code)
         *  Get hub descriptor (see above)
         *  For all ports
         *     turn on power
         *     wait for power to become stable
         * (all below happens in explore code)
         *  For all ports
         *     clear C_PORT_CONNECTION
         *  For all ports
         *     get port status
         *     if device connected
         *        wait 100 ms
         *        turn on reset
         *        wait
         *        clear C_PORT_RESET
         *        get port status
         *        proceed with device attachment
         */

        if (UHUB_IS_HIGH_SPEED(sc) && nports > 0) {
                tts = kmem_alloc((UHUB_IS_SINGLE_TT(sc) ? 1 : nports) *
                             sizeof(struct usbd_tt), KM_SLEEP);
        }
        /* Set up data structures */
        for (p = 1; p <= nports; p++) {
                struct usbd_port *up = &hub->uh_ports[p - 1];
                up->up_dev = NULL;
                up->up_parent = dev;
                up->up_portno = p;
                if (dev->ud_selfpowered)
                        /* Self powered hub, give ports maximum current. */
                        up->up_power = USB_MAX_POWER;
                else
                        up->up_power = USB_MIN_POWER;
                up->up_restartcnt = 0;
                up->up_reattach = 0;
                if (UHUB_IS_HIGH_SPEED(sc)) {
                        up->up_tt = &tts[UHUB_IS_SINGLE_TT(sc) ? 0 : p - 1];
                        up->up_tt->utt_hub = hub;
                } else {
                        up->up_tt = NULL;
                }
        }

        /* XXX should check for none, individual, or ganged power? */

        pwrdly = dev->ud_hub->uh_hubdesc.bPwrOn2PwrGood * UHD_PWRON_FACTOR
            + USB_EXTRA_POWER_UP_TIME;
        for (port = 1; port <= nports; port++) {
                /* Turn the power on. */
                err = usbd_set_port_feature(dev, port, UHF_PORT_POWER);
                if (err)
                        aprint_error_dev(self, "port %d power on failed, %s\n",
                            port, usbd_errstr(err));
                DPRINTF("uhub%jd turn on port %jd power", device_unit(self),
                    port, 0, 0);
        }

        /* Wait for stable power if we are not a root hub */
        if (dev->ud_powersrc->up_parent != NULL)
                usbd_delay_ms(dev, pwrdly);

        /* The usual exploration will finish the setup. */
        sc->sc_running = true;
        sc->sc_first_explore = true;

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        return;

 bad:
        if (sc->sc_status)
                kmem_free(sc->sc_status, sc->sc_statuslen);
        if (sc->sc_statuspend)
                kmem_free(sc->sc_statuspend, sc->sc_statuslen);
        if (sc->sc_statusbuf)
                kmem_free(sc->sc_statusbuf, sc->sc_statuslen);
        if (hub)
                kmem_free(hub,
                    sizeof(*hub) + (nports-1) * sizeof(struct usbd_port));
        dev->ud_hub = NULL;
 bad2:
        config_pending_decr(self);
}

usbd_status
uhub_explore(struct usbd_device *dev)
{
        usb_hub_descriptor_t *hd = &dev->ud_hub->uh_hubdesc;
        struct uhub_softc *sc = dev->ud_hub->uh_hubsoftc;
        struct usbd_port *up;
        struct usbd_device *subdev;
        usbd_status err;
        int speed;
        int port;
        int change, status, reconnect, rescan;

        UHUBHIST_FUNC();
        UHUBHIST_CALLARGS("uhub%jd dev=%#jx addr=%jd speed=%ju",
            device_unit(sc->sc_dev), (uintptr_t)dev, dev->ud_addr,
            dev->ud_speed);

        KASSERT(usb_in_event_thread(sc->sc_dev));

        if (!sc->sc_running)
                return USBD_NOT_STARTED;

        /* Ignore hubs that are too deep. */
        if (dev->ud_depth > USB_HUB_MAX_DEPTH)
                return USBD_TOO_DEEP;

        SDT_PROBE1(usb, hub, explore, start,  dev);

        /* Process rescan if requested.  */
        mutex_enter(&sc->sc_lock);
        rescan = sc->sc_rescan;
        sc->sc_rescan = false;
        mutex_exit(&sc->sc_lock);
        if (rescan) {
                for (port = 1; port <= hd->bNbrPorts; port++) {
                        SDT_PROBE3(usb, hub, explore, rescan,
                            dev, port, &dev->ud_hub->uh_ports[port - 1]);
                        subdev = dev->ud_hub->uh_ports[port - 1].up_dev;
                        if (subdev == NULL)
                                continue;
                        usbd_reattach_device(sc->sc_dev, subdev, port, NULL);
                }
        }

        if (PORTSTAT_ISSET(sc, 0)) { /* hub status change */
                usb_hub_status_t hs;

                err = usbd_get_hub_status(dev, &hs);
                if (err) {
                        DPRINTF("uhub%jd get hub status failed, err %jd",
                            device_unit(sc->sc_dev), err, 0, 0);
                } else {
                        /* just acknowledge */
                        status = UGETW(hs.wHubStatus);
                        change = UGETW(hs.wHubChange);
                        SDT_PROBE5(usb, hub, explore, portstat,
                            dev, /*portno*/0, status, change, /*reattach*/0);
                        DPRINTF("uhub%jd s/c=%jx/%jx", device_unit(sc->sc_dev),
                            status, change, 0);

                        if (change & UHS_LOCAL_POWER)
                                usbd_clear_hub_feature(dev,
                                                       UHF_C_HUB_LOCAL_POWER);
                        if (change & UHS_OVER_CURRENT)
                                usbd_clear_hub_feature(dev,
                                                       UHF_C_HUB_OVER_CURRENT);
                }
        }

        for (port = 1; port <= hd->bNbrPorts; port++) {
                up = &dev->ud_hub->uh_ports[port - 1];

                /* reattach is needed after firmware upload */
                reconnect = up->up_reattach;
                up->up_reattach = 0;

                status = change = 0;

                /* don't check if no change summary notification */
                if (PORTSTAT_ISSET(sc, port) || reconnect) {
                        err = usbd_get_port_status(dev, port, &up->up_status);
                        if (err) {
                                DPRINTF("uhub%jd get port stat failed, err %jd",
                                    device_unit(sc->sc_dev), err, 0, 0);
                                continue;
                        }
                        status = UGETW(up->up_status.wPortStatus);
                        change = UGETW(up->up_status.wPortChange);

                        DPRINTF("uhub%jd port %jd: s/c=%jx/%jx",
                            device_unit(sc->sc_dev), port, status, change);
                }
                SDT_PROBE5(usb, hub, explore, portstat,
                    dev, port, status, change, reconnect);
                if (!change && !reconnect) {
                        /* No status change, just do recursive explore. */
                        if (up->up_dev != NULL && up->up_dev->ud_hub != NULL)
                                up->up_dev->ud_hub->uh_explore(up->up_dev);
                        continue;
                }

                if (change & UPS_C_PORT_ENABLED) {
                        DPRINTF("uhub%jd port %jd C_PORT_ENABLED",
                            device_unit(sc->sc_dev), port, 0, 0);
                        usbd_clear_port_feature(dev, port, UHF_C_PORT_ENABLE);
                        if (change & UPS_C_CONNECT_STATUS) {
                                /* Ignore the port error if the device
                                   vanished. */
                        } else if (status & UPS_PORT_ENABLED) {
                                aprint_error_dev(sc->sc_dev,
                                    "illegal enable change, port %d\n", port);
                        } else {
                                /* Port error condition. */
                                if (up->up_restartcnt) /* no message first time */
                                        aprint_error_dev(sc->sc_dev,
                                            "port error, restarting port %d\n",
                                            port);

                                if (up->up_restartcnt++ < USBD_RESTART_MAX)
                                        goto disco;
                                else
                                        aprint_error_dev(sc->sc_dev,
                                            "port error, giving up port %d\n",
                                            port);
                        }
                }
                if (change & UPS_C_PORT_RESET) {
                        /*
                         * some xHCs set PortResetChange instead of CSC
                         * when port is reset.
                         */
                        if ((status & UPS_CURRENT_CONNECT_STATUS) != 0) {
                                change |= UPS_C_CONNECT_STATUS;
                        }
                        usbd_clear_port_feature(dev, port, UHF_C_PORT_RESET);
                }
                if (change & UPS_C_BH_PORT_RESET) {
                        /*
                         * some xHCs set WarmResetChange instead of CSC
                         * when port is reset.
                         */
                        if ((status & UPS_CURRENT_CONNECT_STATUS) != 0) {
                                change |= UPS_C_CONNECT_STATUS;
                        }
                        usbd_clear_port_feature(dev, port,
                            UHF_C_BH_PORT_RESET);
                }
                if (change & UPS_C_PORT_LINK_STATE)
                        usbd_clear_port_feature(dev, port,
                            UHF_C_PORT_LINK_STATE);
                if (change & UPS_C_PORT_CONFIG_ERROR)
                        usbd_clear_port_feature(dev, port,
                            UHF_C_PORT_CONFIG_ERROR);

                /* XXX handle overcurrent and resume events! */

                if (!reconnect && !(change & UPS_C_CONNECT_STATUS)) {
                        /* No status change, just do recursive explore. */
                        if (up->up_dev != NULL && up->up_dev->ud_hub != NULL)
                                up->up_dev->ud_hub->uh_explore(up->up_dev);
                        continue;
                }

                /* We have a connect status change, handle it. */

                DPRINTF("uhub%jd status change port %jd",
                    device_unit(sc->sc_dev), port, 0, 0);
                usbd_clear_port_feature(dev, port, UHF_C_PORT_CONNECTION);
                /*
                 * If there is already a device on the port the change status
                 * must mean that is has disconnected.  Looking at the
                 * current connect status is not enough to figure this out
                 * since a new unit may have been connected before we handle
                 * the disconnect.
                 */
        disco:
                if (up->up_dev != NULL) {
                        /* Disconnected */
                        DPRINTF("uhub%jd device addr=%jd disappeared on "
                            "port %jd",
                            device_unit(sc->sc_dev), up->up_dev->ud_addr, port,
                            0);

                        SDT_PROBE3(usb, hub, explore, disconnect,
                            dev, port, up);
                        usb_disconnect_port(up, sc->sc_dev, DETACH_FORCE);
                        usbd_clear_port_feature(dev, port,
                                                UHF_C_PORT_CONNECTION);
                }
                if (!(status & UPS_CURRENT_CONNECT_STATUS)) {
                        /* Nothing connected, just ignore it. */
                        DPRINTFN(3, "uhub%jd port %jd !CURRENT_CONNECT_STATUS",
                            device_unit(sc->sc_dev), port, 0, 0);
                        SDT_PROBE3(usb, hub, explore, disconnect,
                            dev, port, up);
                        usb_disconnect_port(up, sc->sc_dev, DETACH_FORCE);
                        usbd_clear_port_feature(dev, port,
                                                UHF_C_PORT_CONNECTION);
                        continue;
                }

                /* Connected */
                DPRINTF("unit %jd dev->speed=%ju dev->depth=%ju",
                    device_unit(sc->sc_dev), dev->ud_speed, dev->ud_depth, 0);

                /* Wait for maximum device power up time. */
                usbd_delay_ms(dev, USB_PORT_POWERUP_DELAY);

                /* Reset port, which implies enabling it. */
                if (usbd_reset_port(dev, port, &up->up_status)) {
                        aprint_error_dev(sc->sc_dev,
                            "port %d reset failed\n", port);
                        continue;
                }
#if 0
                /* Get port status again, it might have changed during reset */
                err = usbd_get_port_status(dev, port, &up->up_status);
                if (err) {
                        DPRINTF("uhub%jd port %jd get port status failed, "
                            "err %jd", device_unit(sc->sc_dev), port, err, 0);
                        continue;
                }
#endif
                /*
                 * Use the port status from the reset to check for the device
                 * disappearing, the port enable status, and the port speed
                 */
                status = UGETW(up->up_status.wPortStatus);
                change = UGETW(up->up_status.wPortChange);
                SDT_PROBE5(usb, hub, explore, reset,
                    dev, port, up, status, change);
                DPRINTF("uhub%jd port %jd after reset: s/c=%jx/%jx",
                    device_unit(sc->sc_dev), port, status, change);

                if (!(status & UPS_CURRENT_CONNECT_STATUS)) {
                        /* Nothing connected, just ignore it. */
#ifdef DIAGNOSTIC
                        aprint_debug_dev(sc->sc_dev,
                            "port %d, device disappeared after reset\n", port);
#endif
                        continue;
                }
                if (!(status & UPS_PORT_ENABLED)) {
                        /* Not allowed send/receive packet. */
#ifdef DIAGNOSTIC
                        printf("%s: port %d, device not enabled\n",
                               device_xname(sc->sc_dev), port);
#endif
                        continue;
                }
                /* port reset may cause Warm Reset Change, drop it. */
                if (change & UPS_C_BH_PORT_RESET)
                        usbd_clear_port_feature(dev, port,
                            UHF_C_BH_PORT_RESET);

                /*
                 * Figure out device speed from power bit of port status.
                 *  USB 2.0 ch 11.24.2.7.1
                 *  USB 3.1 ch 10.16.2.6.1
                 */
                int sts = status;
                if ((sts & UPS_PORT_POWER) == 0)
                        sts &= ~UPS_PORT_POWER_SS;

                if (sts & UPS_HIGH_SPEED)
                        speed = USB_SPEED_HIGH;
                else if (sts & UPS_LOW_SPEED)
                        speed = USB_SPEED_LOW;
                else {
                        /*
                         * If there is no power bit set, it is certainly
                         * a Super Speed device, so use the speed of its
                         * parent hub.
                         */
                        if (sts & UPS_PORT_POWER)
                                speed = USB_SPEED_FULL;
                        else
                                speed = dev->ud_speed;
                }

                /*
                 * Reduce the speed, otherwise we won't setup the proper
                 * transfer methods.
                 */
                if (speed > dev->ud_speed)
                        speed = dev->ud_speed;

                DPRINTF("uhub%jd speed %ju", device_unit(sc->sc_dev), speed, 0,
                    0);

                /*
                 * To check whether port has power,
                 *  check UPS_PORT_POWER_SS bit if port speed is SS, and
                 *  check UPS_PORT_POWER bit if port speed is HS/FS/LS.
                 */
                if (USB_IS_SS(speed)) {
                        /* SS hub port */
                        if (!(status & UPS_PORT_POWER_SS))
                                aprint_normal_dev(sc->sc_dev,
                                    "strange, connected port %d has no power\n",
                                    port);
                } else {
                        /* HS/FS/LS hub port */
                        if (!(status & UPS_PORT_POWER))
                                aprint_normal_dev(sc->sc_dev,
                                    "strange, connected port %d has no power\n",
                                    port);
                }

                if (dev->ud_bus->ub_hctype == USBHCTYPE_VHCI) {
                        kcov_remote_enter(KCOV_REMOTE_VHCI,
                            KCOV_REMOTE_VHCI_ID(dev->ud_bus->ub_busnum, port));
                }

                SDT_PROBE4(usb, hub, explore, connect,
                    dev, port, up, speed);

                /* Get device info and set its address. */
                err = usbd_new_device(sc->sc_dev, dev->ud_bus,
                          dev->ud_depth + 1, speed, port, up);

                if (dev->ud_bus->ub_hctype == USBHCTYPE_VHCI) {
                        kcov_remote_leave(KCOV_REMOTE_VHCI,
                            KCOV_REMOTE_VHCI_ID(dev->ud_bus->ub_busnum, port));
                }

                /* XXX retry a few times? */
                if (err) {
                        DPRINTF("uhub%jd: usbd_new_device failed, error %jd",
                            device_unit(sc->sc_dev), err, 0, 0);
                        /* Avoid addressing problems by disabling. */
                        /* usbd_reset_port(dev, port, &up->status); */

                        /*
                         * The unit refused to accept a new address, or had
                         * some other serious problem.  Since we cannot leave
                         * at 0 we have to disable the port instead.
                         */
                        aprint_error_dev(sc->sc_dev,
                            "device problem, disabling port %d\n", port);
                        usbd_clear_port_feature(dev, port, UHF_PORT_ENABLE);
                } else {
                        SDT_PROBE4(usb, hub, explore, connected,
                            dev, port, up, speed);
                        /* The port set up succeeded, reset error count. */
                        up->up_restartcnt = 0;

                        if (up->up_dev->ud_hub)
                                up->up_dev->ud_hub->uh_explore(up->up_dev);
                }
        }
        mutex_enter(&sc->sc_lock);
        sc->sc_explorepending = false;
        for (int i = 0; i < sc->sc_statuslen; i++) {
                if (sc->sc_statuspend[i] != 0) {
                        memcpy(sc->sc_status, sc->sc_statuspend,
                            sc->sc_statuslen);
                        memset(sc->sc_statuspend, 0, sc->sc_statuslen);
                        usb_needs_explore(sc->sc_hub);
                        break;
                }
        }
        mutex_exit(&sc->sc_lock);
        if (sc->sc_first_explore) {
                config_pending_decr(sc->sc_dev);
                sc->sc_first_explore = false;
        }

        SDT_PROBE1(usb, hub, explore, done,  dev);

        return USBD_NORMAL_COMPLETION;
}

/*
 * Called from process context when the hub is gone.
 * Detach all devices on active ports.
 */
static int
uhub_detach(device_t self, int flags)
{
        struct uhub_softc *sc = device_private(self);
        struct usbd_hub *hub = sc->sc_hub->ud_hub;
        struct usbd_port *rup;
        int nports, port, rc;

        UHUBHIST_FUNC(); UHUBHIST_CALLED();

        DPRINTF("uhub%jd flags=%jd", device_unit(self), flags, 0, 0);

        if (hub == NULL)                /* Must be partially working */
                return 0;

        /* XXXSMP usb */
        KERNEL_LOCK(1, curlwp);

        nports = hub->uh_hubdesc.bNbrPorts;
        for (port = 1; port <= nports; port++) {
                rup = &hub->uh_ports[port - 1];
                if (rup->up_dev == NULL)
                        continue;
                if ((rc = usb_disconnect_port(rup, self, flags)) != 0) {
                        /* XXXSMP usb */
                        KERNEL_UNLOCK_ONE(curlwp);

                        return rc;
                }
        }

        pmf_device_deregister(self);
        usbd_abort_pipe(sc->sc_ipipe);
        usbd_close_pipe(sc->sc_ipipe);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_hub, sc->sc_dev);

        if (hub->uh_ports[0].up_tt)
                kmem_free(hub->uh_ports[0].up_tt,
                    (UHUB_IS_SINGLE_TT(sc) ? 1 : nports) *
                    sizeof(struct usbd_tt));
        kmem_free(hub,
            sizeof(*hub) + (nports-1) * sizeof(struct usbd_port));
        sc->sc_hub->ud_hub = NULL;
        if (sc->sc_status)
                kmem_free(sc->sc_status, sc->sc_statuslen);
        if (sc->sc_statuspend)
                kmem_free(sc->sc_statuspend, sc->sc_statuslen);
        if (sc->sc_statusbuf)
                kmem_free(sc->sc_statusbuf, sc->sc_statuslen);

        cv_destroy(&sc->sc_cv);
        mutex_destroy(&sc->sc_lock);

        /* XXXSMP usb */
        KERNEL_UNLOCK_ONE(curlwp);

        return 0;
}

static int
uhub_rescan(device_t self, const char *ifattr, const int *locators)
{
        struct uhub_softc *sc = device_private(self);

        UHUBHIST_FUNC();
        UHUBHIST_CALLARGS("uhub%jd", device_unit(sc->sc_dev), 0, 0, 0);

        KASSERT(KERNEL_LOCKED_P());

        /* Trigger bus exploration.  */
        /* XXX locators */
        mutex_enter(&sc->sc_lock);
        sc->sc_rescan = true;
        mutex_exit(&sc->sc_lock);
        usb_needs_explore(sc->sc_hub);

        return 0;
}

/* Called when a device has been detached from it */
static void
uhub_childdet(device_t self, device_t child)
{
        struct uhub_softc *sc = device_private(self);
        struct usbd_device *devhub = sc->sc_hub;
        struct usbd_device *dev;
        int nports;
        int port;
        int i;

        KASSERT(KERNEL_LOCKED_P());

        if (!devhub->ud_hub)
                /* should never happen; children are only created after init */
                panic("hub not fully initialised, but child deleted?");

        nports = devhub->ud_hub->uh_hubdesc.bNbrPorts;
        for (port = 1; port <= nports; port++) {
                dev = devhub->ud_hub->uh_ports[port - 1].up_dev;
                if (!dev || dev->ud_subdevlen == 0)
                        continue;
                for (i = 0; i < dev->ud_subdevlen; i++) {
                        if (dev->ud_subdevs[i] == child) {
                                dev->ud_subdevs[i] = NULL;
                                dev->ud_nifaces_claimed--;
                        }
                }
                if (dev->ud_nifaces_claimed == 0) {
                        kmem_free(dev->ud_subdevs,
                            dev->ud_subdevlen * sizeof(device_t));
                        dev->ud_subdevs = NULL;
                        dev->ud_subdevlen = 0;
                }
        }
}


/*
 * Hub interrupt.
 * This an indication that some port has changed status.
 * Notify the bus event handler thread that we need
 * to be explored again.
 */
void
uhub_intr(struct usbd_xfer *xfer, void *addr, usbd_status status)
{
        struct uhub_softc *sc = addr;

        UHUBHIST_FUNC(); UHUBHIST_CALLARGS("called! uhub%jd status=%jx",
            device_unit(sc->sc_dev), status, 0, 0);

        SDT_PROBE2(usb, hub, interrupt, ,  sc->sc_hub, status);

        if (status == USBD_STALLED)
                usbd_clear_endpoint_stall_async(sc->sc_ipipe);
        else if (status == USBD_NORMAL_COMPLETION) {

                mutex_enter(&sc->sc_lock);

                DPRINTFN(5, "uhub%jd: explore pending %jd",
                    device_unit(sc->sc_dev), sc->sc_explorepending, 0, 0);

                /* merge port bitmap into pending interrupts list */
                for (size_t i = 0; i < sc->sc_statuslen; i++) {
                        sc->sc_statuspend[i] |= sc->sc_statusbuf[i];

                        DPRINTFN(5, "uhub%jd: pending/new ports "
                            "[%jd] %#jx/%#jx", device_unit(sc->sc_dev),
                            i, sc->sc_statuspend[i], sc->sc_statusbuf[i]);
                }

                if (!sc->sc_explorepending) {
                        sc->sc_explorepending = true;

                        memcpy(sc->sc_status, sc->sc_statuspend,
                            sc->sc_statuslen);
                        memset(sc->sc_statuspend, 0, sc->sc_statuslen);

                        for (size_t i = 0; i < sc->sc_statuslen; i++) {
                                DPRINTFN(5, "uhub%jd: exploring ports "
                                    "[%jd] %#jx", device_unit(sc->sc_dev),
                                    i, sc->sc_status[i], 0);
                        }

                        usb_needs_explore(sc->sc_hub);
                }
                mutex_exit(&sc->sc_lock);
        }
}









































































































































































































































































































































































































































































































































































































































































































































































    3 









    3 
    3 
















































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
/*        $NetBSD: if_aue.c,v 1.191 2022/08/20 14:08:59 riastradh Exp $        */

/*
 * Copyright (c) 1997, 1998, 1999, 2000
 *        Bill Paul <wpaul@ee.columbia.edu>.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by Bill Paul.
 * 4. Neither the name of the author nor the names of any co-contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 *
 * $FreeBSD: src/sys/dev/usb/if_aue.c,v 1.11 2000/01/14 01:36:14 wpaul Exp $
 */

/*
 * ADMtek AN986 Pegasus and AN8511 Pegasus II USB to ethernet driver.
 * Datasheet is available from http://www.admtek.com.tw.
 *
 * Written by Bill Paul <wpaul@ee.columbia.edu>
 * Electrical Engineering Department
 * Columbia University, New York City
 */

/*
 * The Pegasus chip uses four USB "endpoints" to provide 10/100 ethernet
 * support: the control endpoint for reading/writing registers, burst
 * read endpoint for packet reception, burst write for packet transmission
 * and one for "interrupts." The chip uses the same RX filter scheme
 * as the other ADMtek ethernet parts: one perfect filter entry for the
 * the station address and a 64-bit multicast hash table. The chip supports
 * both MII and HomePNA attachments.
 *
 * Since the maximum data transfer speed of USB is supposed to be 12Mbps,
 * you're never really going to get 100Mbps speeds from this device. I
 * think the idea is to allow the device to connect to 10 or 100Mbps
 * networks, not necessarily to provide 100Mbps performance. Also, since
 * the controller uses an external PHY chip, it's possible that board
 * designers might simply choose a 10Mbps PHY.
 *
 * Registers are accessed using usbd_do_request(). Packet transfers are
 * done using usbd_transfer() and friends.
 */

/*
 * Ported to NetBSD and somewhat rewritten by Lennart Augustsson.
 */

/*
 * TODO:
 * better error messages from rxstat
 * more error checks
 * investigate short rx problem
 * proper cleanup on errors
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_aue.c,v 1.191 2022/08/20 14:08:59 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#include "opt_inet.h"
#endif

#include <sys/param.h>

#include <dev/usb/usbnet.h>
#include <dev/usb/usbhist.h>
#include <dev/usb/if_auereg.h>

#ifdef INET
#include <netinet/in.h>
#include <netinet/if_inarp.h>
#endif

#ifdef USB_DEBUG
#ifndef AUE_DEBUG
#define auedebug 0
#else
static int auedebug = 10;

SYSCTL_SETUP(sysctl_hw_aue_setup, "sysctl hw.aue setup")
{
        int err;
        const struct sysctlnode *rnode;
        const struct sysctlnode *cnode;

        err = sysctl_createv(clog, 0, NULL, &rnode,
            CTLFLAG_PERMANENT, CTLTYPE_NODE, "aue",
            SYSCTL_DESCR("aue global controls"),
            NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);

        if (err)
                goto fail;

        /* control debugging printfs */
        err = sysctl_createv(clog, 0, &rnode, &cnode,
            CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_INT,
            "debug", SYSCTL_DESCR("Enable debugging output"),
            NULL, 0, &auedebug, sizeof(auedebug), CTL_CREATE, CTL_EOL);
        if (err)
                goto fail;

        return;
fail:
        aprint_error("%s: sysctl_createv failed (err = %d)\n", __func__, err);
}

#endif /* AUE_DEBUG */
#endif /* USB_DEBUG */

#define DPRINTF(FMT,A,B,C,D)        USBHIST_LOGN(auedebug,1,FMT,A,B,C,D)
#define DPRINTFN(N,FMT,A,B,C,D)        USBHIST_LOGN(auedebug,N,FMT,A,B,C,D)
#define AUEHIST_FUNC()                USBHIST_FUNC()
#define AUEHIST_CALLED(name)        USBHIST_CALLED(auedebug)
#define AUEHIST_CALLARGS(FMT,A,B,C,D) \
                                USBHIST_CALLARGS(auedebug,FMT,A,B,C,D)
#define AUEHIST_CALLARGSN(N,FMT,A,B,C,D) \
                                USBHIST_CALLARGSN(auedebug,N,FMT,A,B,C,D)

#define AUE_TX_LIST_CNT                1
#define AUE_RX_LIST_CNT                1

struct aue_softc {
        struct usbnet                aue_un;
        struct usbnet_intr        aue_intr;
        struct aue_intrpkt        aue_ibuf;
};

#define AUE_TIMEOUT                1000
#define AUE_BUFSZ                1536
#define AUE_MIN_FRAMELEN        60
#define AUE_TX_TIMEOUT                10000 /* ms */
#define AUE_INTR_INTERVAL        100 /* ms */

/*
 * Various supported device vendors/products.
 */
struct aue_type {
        struct usb_devno        aue_dev;
        uint16_t                aue_flags;
#define LSYS        0x0001                /* use Linksys reset */
#define PNA        0x0002                /* has Home PNA */
#define PII        0x0004                /* Pegasus II chip */
};

static const struct aue_type aue_devs[] = {
 {{ USB_VENDOR_3COM,                USB_PRODUCT_3COM_3C460B},          PII },
 {{ USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_XX1},          PNA | PII },
 {{ USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_XX2},          PII },
 {{ USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_UFE1000},          LSYS },
 {{ USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_XX4},          PNA },
 {{ USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_XX5},          PNA },
 {{ USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_XX6},          PII },
 {{ USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_XX7},          PII },
 {{ USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_XX8},          PII },
 {{ USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_XX9},          PNA },
 {{ USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_XX10},          0 },
 {{ USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_DSB650TX_PNA}, 0 },
 {{ USB_VENDOR_ACCTON,                USB_PRODUCT_ACCTON_USB320_EC},          0 },
 {{ USB_VENDOR_ACCTON,                USB_PRODUCT_ACCTON_SS1001},          PII },
 {{ USB_VENDOR_ADMTEK,                USB_PRODUCT_ADMTEK_PEGASUS},          PNA },
 {{ USB_VENDOR_ADMTEK,                USB_PRODUCT_ADMTEK_PEGASUSII},          PII },
 {{ USB_VENDOR_ADMTEK,                USB_PRODUCT_ADMTEK_PEGASUSII_2},  PII },
 {{ USB_VENDOR_ADMTEK,                USB_PRODUCT_ADMTEK_PEGASUSII_3},  PII },
 {{ USB_VENDOR_AEI,                USB_PRODUCT_AEI_USBTOLAN},          PII },
 {{ USB_VENDOR_BELKIN,                USB_PRODUCT_BELKIN_USB2LAN},          PII },
 {{ USB_VENDOR_BILLIONTON,        USB_PRODUCT_BILLIONTON_USB100},          0 },
 {{ USB_VENDOR_BILLIONTON,        USB_PRODUCT_BILLIONTON_USBLP100}, PNA },
 {{ USB_VENDOR_BILLIONTON,        USB_PRODUCT_BILLIONTON_USBEL100}, 0 },
 {{ USB_VENDOR_BILLIONTON,        USB_PRODUCT_BILLIONTON_USBE100},  PII },
 {{ USB_VENDOR_COMPAQ,                USB_PRODUCT_COMPAQ_HNE200},          PII },
 {{ USB_VENDOR_COREGA,                USB_PRODUCT_COREGA_FETHER_USB_TX}, 0 },
 {{ USB_VENDOR_COREGA,                USB_PRODUCT_COREGA_FETHER_USB_TXS},PII },
 {{ USB_VENDOR_DLINK,                USB_PRODUCT_DLINK_DSB650TX4},          LSYS | PII },
 {{ USB_VENDOR_DLINK,                USB_PRODUCT_DLINK_DSB650TX1},          LSYS },
 {{ USB_VENDOR_DLINK,                USB_PRODUCT_DLINK_DSB650TX},          LSYS },
 {{ USB_VENDOR_DLINK,                USB_PRODUCT_DLINK_DSB650TX_PNA},  PNA },
 {{ USB_VENDOR_DLINK,                USB_PRODUCT_DLINK_DSB650TX3},          LSYS | PII },
 {{ USB_VENDOR_DLINK,                USB_PRODUCT_DLINK_DSB650TX2},          LSYS | PII },
 {{ USB_VENDOR_DLINK,                USB_PRODUCT_DLINK_DSB650},          0 },
 {{ USB_VENDOR_ELECOM,                USB_PRODUCT_ELECOM_LDUSBTX0},          0 },
 {{ USB_VENDOR_ELECOM,                USB_PRODUCT_ELECOM_LDUSBTX1},          LSYS },
 {{ USB_VENDOR_ELECOM,                USB_PRODUCT_ELECOM_LDUSBTX2},          0 },
 {{ USB_VENDOR_ELECOM,                USB_PRODUCT_ELECOM_LDUSBTX3},          LSYS },
 {{ USB_VENDOR_ELECOM,                USB_PRODUCT_ELECOM_LDUSBLTX},          PII },
 {{ USB_VENDOR_ELSA,                USB_PRODUCT_ELSA_USB2ETHERNET},          0 },
 {{ USB_VENDOR_HAWKING,                USB_PRODUCT_HAWKING_UF100},          PII },
 {{ USB_VENDOR_HP,                USB_PRODUCT_HP_HN210E},                  PII },
 {{ USB_VENDOR_IODATA,                USB_PRODUCT_IODATA_USBETTX},          0 },
 {{ USB_VENDOR_IODATA,                USB_PRODUCT_IODATA_USBETTXS},          PII },
 {{ USB_VENDOR_IODATA,                USB_PRODUCT_IODATA_ETXUS2},          PII },
 {{ USB_VENDOR_KINGSTON,        USB_PRODUCT_KINGSTON_KNU101TX},          0 },
 {{ USB_VENDOR_LINKSYS,                USB_PRODUCT_LINKSYS_USB10TX1},          LSYS | PII },
 {{ USB_VENDOR_LINKSYS,                USB_PRODUCT_LINKSYS_USB10T},          LSYS },
 {{ USB_VENDOR_LINKSYS,                USB_PRODUCT_LINKSYS_USB100TX},          LSYS },
 {{ USB_VENDOR_LINKSYS,                USB_PRODUCT_LINKSYS_USB100H1},          LSYS | PNA },
 {{ USB_VENDOR_LINKSYS,                USB_PRODUCT_LINKSYS_USB10TA},          LSYS },
 {{ USB_VENDOR_LINKSYS,                USB_PRODUCT_LINKSYS_USB10TX2},          LSYS | PII },
 {{ USB_VENDOR_MELCO,                USB_PRODUCT_MELCO_LUATX1},          0 },
 {{ USB_VENDOR_MELCO,                USB_PRODUCT_MELCO_LUATX5},          0 },
 {{ USB_VENDOR_MELCO,                USB_PRODUCT_MELCO_LUA2TX5},          PII },
 {{ USB_VENDOR_MICROSOFT,        USB_PRODUCT_MICROSOFT_MN110},          PII },
 {{ USB_VENDOR_NETGEAR,                USB_PRODUCT_NETGEAR_FA101},          PII },
 {{ USB_VENDOR_SIEMENS,                USB_PRODUCT_SIEMENS_SPEEDSTREAM}, PII },
 {{ USB_VENDOR_SMARTBRIDGES,        USB_PRODUCT_SMARTBRIDGES_SMARTNIC},PII },
 {{ USB_VENDOR_SMC,                USB_PRODUCT_SMC_2202USB},          0 },
 {{ USB_VENDOR_SMC,                USB_PRODUCT_SMC_2206USB},          PII },
 {{ USB_VENDOR_SOHOWARE,        USB_PRODUCT_SOHOWARE_NUB100},          0 },
};
#define aue_lookup(v, p) ((const struct aue_type *)usb_lookup(aue_devs, v, p))

static int aue_match(device_t, cfdata_t, void *);
static void aue_attach(device_t, device_t, void *);

CFATTACH_DECL_NEW(aue, sizeof(struct aue_softc), aue_match, aue_attach,
    usbnet_detach, usbnet_activate);

static void aue_reset_pegasus_II(struct aue_softc *);

static void aue_uno_stop(struct ifnet *, int);
static void aue_uno_mcast(struct ifnet *);
static int aue_uno_mii_read_reg(struct usbnet *, int, int, uint16_t *);
static int aue_uno_mii_write_reg(struct usbnet *, int, int, uint16_t);
static void aue_uno_mii_statchg(struct ifnet *);
static unsigned aue_uno_tx_prepare(struct usbnet *, struct mbuf *,
                                   struct usbnet_chain *);
static void aue_uno_rx_loop(struct usbnet *, struct usbnet_chain *, uint32_t);
static int aue_uno_init(struct ifnet *);
static void aue_uno_intr(struct usbnet *, usbd_status);

static const struct usbnet_ops aue_ops = {
        .uno_stop = aue_uno_stop,
        .uno_mcast = aue_uno_mcast,
        .uno_read_reg = aue_uno_mii_read_reg,
        .uno_write_reg = aue_uno_mii_write_reg,
        .uno_statchg = aue_uno_mii_statchg,
        .uno_tx_prepare = aue_uno_tx_prepare,
        .uno_rx_loop = aue_uno_rx_loop,
        .uno_init = aue_uno_init,
        .uno_intr = aue_uno_intr,
};

static uint32_t aue_crc(void *);
static void aue_reset(struct aue_softc *);

static int aue_csr_read_1(struct aue_softc *, int);
static int aue_csr_write_1(struct aue_softc *, int, int);
static int aue_csr_read_2(struct aue_softc *, int);
static int aue_csr_write_2(struct aue_softc *, int, int);

#define AUE_SETBIT(sc, reg, x)                                \
        aue_csr_write_1(sc, reg, aue_csr_read_1(sc, reg) | (x))

#define AUE_CLRBIT(sc, reg, x)                                \
        aue_csr_write_1(sc, reg, aue_csr_read_1(sc, reg) & ~(x))

static int
aue_csr_read_1(struct aue_softc *sc, int reg)
{
        struct usbnet * const        un = &sc->aue_un;
        usb_device_request_t        req;
        usbd_status                err;
        uByte                        val = 0;

        if (usbnet_isdying(un))
                return 0;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = AUE_UR_READREG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 1);

        err = usbd_do_request(un->un_udev, &req, &val);

        if (err) {
                AUEHIST_FUNC();
                AUEHIST_CALLARGS("aue%jd: reg=%#jx err=%jd",
                    device_unit(un->un_dev), reg, err, 0);
                return 0;
        }

        return val;
}

static int
aue_csr_read_2(struct aue_softc *sc, int reg)
{
        struct usbnet * const        un = &sc->aue_un;
        usb_device_request_t        req;
        usbd_status                err;
        uWord                        val;

        if (usbnet_isdying(un))
                return 0;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = AUE_UR_READREG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 2);

        err = usbd_do_request(un->un_udev, &req, &val);

        if (err) {
                AUEHIST_FUNC();
                AUEHIST_CALLARGS("aue%jd: reg=%#jx err=%jd",
                    device_unit(un->un_dev), reg, err, 0);
                return 0;
        }

        return UGETW(val);
}

static int
aue_csr_write_1(struct aue_softc *sc, int reg, int aval)
{
        struct usbnet * const        un = &sc->aue_un;
        usb_device_request_t        req;
        usbd_status                err;
        uByte                        val;

        if (usbnet_isdying(un))
                return 0;

        val = aval;
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = AUE_UR_WRITEREG;
        USETW(req.wValue, val);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 1);

        err = usbd_do_request(un->un_udev, &req, &val);

        if (err) {
                AUEHIST_FUNC();
                AUEHIST_CALLARGS("aue%jd: reg=%#jx err=%jd",
                    device_unit(un->un_dev), reg, err, 0);
                return -1;
        }

        return 0;
}

static int
aue_csr_write_2(struct aue_softc *sc, int reg, int aval)
{
        struct usbnet * const        un = &sc->aue_un;
        usb_device_request_t        req;
        usbd_status                err;
        uWord                        val;

        if (usbnet_isdying(un))
                return 0;

        USETW(val, aval);
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = AUE_UR_WRITEREG;
        USETW(req.wValue, aval);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 2);

        err = usbd_do_request(un->un_udev, &req, &val);

        if (err) {
                AUEHIST_FUNC();
                AUEHIST_CALLARGS("aue%jd: reg=%#jx err=%jd",
                    device_unit(un->un_dev), reg, err, 0);
                return -1;
        }

        return 0;
}

/*
 * Read a word of data stored in the EEPROM at address 'addr.'
 */
static int
aue_eeprom_getword(struct aue_softc *sc, int addr)
{
        struct usbnet * const        un = &sc->aue_un;
        int                        i;

        AUEHIST_FUNC(); AUEHIST_CALLED();

        aue_csr_write_1(sc, AUE_EE_REG, addr);
        aue_csr_write_1(sc, AUE_EE_CTL, AUE_EECTL_READ);

        for (i = 0; i < AUE_TIMEOUT; i++) {
                if (aue_csr_read_1(sc, AUE_EE_CTL) & AUE_EECTL_DONE)
                        break;
        }

        if (i == AUE_TIMEOUT) {
                printf("%s: EEPROM read timed out\n",
                    device_xname(un->un_dev));
        }

        return aue_csr_read_2(sc, AUE_EE_DATA);
}

/*
 * Read the MAC from the EEPROM.  It's at offset 0.
 */
static void
aue_read_mac(struct usbnet *un)
{
        struct aue_softc        *sc = usbnet_softc(un);
        int                        i;
        int                        off = 0;
        int                        word;

        AUEHIST_FUNC();
        AUEHIST_CALLARGS("aue%jd: enter",
            device_unit(un->un_dev), 0, 0, 0);

        for (i = 0; i < 3; i++) {
                word = aue_eeprom_getword(sc, off + i);
                un->un_eaddr[2 * i] =     (u_char)word;
                un->un_eaddr[2 * i + 1] = (u_char)(word >> 8);
        }
}

static int
aue_uno_mii_read_reg(struct usbnet *un, int phy, int reg, uint16_t *val)
{
        struct aue_softc        *sc = usbnet_softc(un);
        int                        i;

        AUEHIST_FUNC();

#if 0
        /*
         * The Am79C901 HomePNA PHY actually contains
         * two transceivers: a 1Mbps HomePNA PHY and a
         * 10Mbps full/half duplex ethernet PHY with
         * NWAY autoneg. However in the ADMtek adapter,
         * only the 1Mbps PHY is actually connected to
         * anything, so we ignore the 10Mbps one. It
         * happens to be configured for MII address 3,
         * so we filter that out.
         */
        if (sc->aue_vendor == USB_VENDOR_ADMTEK &&
            sc->aue_product == USB_PRODUCT_ADMTEK_PEGASUS) {
                if (phy == 3) {
                        *val = 0;
                        return EINVAL;
                }
        }
#endif

        aue_csr_write_1(sc, AUE_PHY_ADDR, phy);
        aue_csr_write_1(sc, AUE_PHY_CTL, reg | AUE_PHYCTL_READ);

        for (i = 0; i < AUE_TIMEOUT; i++) {
                if (usbnet_isdying(un)) {
                        *val = 0;
                        return ENXIO;
                }
                if (aue_csr_read_1(sc, AUE_PHY_CTL) & AUE_PHYCTL_DONE)
                        break;
        }

        if (i == AUE_TIMEOUT) {
                AUEHIST_CALLARGS("aue%jd: phy=%#jx reg=%#jx read timed out",
                    device_unit(un->un_dev), phy, reg, 0);
                *val = 0;
                return ETIMEDOUT;
        }

        *val = aue_csr_read_2(sc, AUE_PHY_DATA);

        AUEHIST_CALLARGSN(11, "aue%jd: phy=%#jx reg=%#jx => 0x%04jx",
            device_unit(un->un_dev), phy, reg, *val);

        return 0;
}

static int
aue_uno_mii_write_reg(struct usbnet *un, int phy, int reg, uint16_t val)
{
        struct aue_softc        *sc = usbnet_softc(un);
        int                        i;

        AUEHIST_FUNC();
        AUEHIST_CALLARGSN(11, "aue%jd: phy=%jd reg=%jd data=0x%04jx",
            device_unit(un->un_dev), phy, reg, val);

#if 0
        if (sc->aue_vendor == USB_VENDOR_ADMTEK &&
            sc->aue_product == USB_PRODUCT_ADMTEK_PEGASUS) {
                if (phy == 3)
                        return EINVAL;
        }
#endif

        aue_csr_write_2(sc, AUE_PHY_DATA, val);
        aue_csr_write_1(sc, AUE_PHY_ADDR, phy);
        aue_csr_write_1(sc, AUE_PHY_CTL, reg | AUE_PHYCTL_WRITE);

        for (i = 0; i < AUE_TIMEOUT; i++) {
                if (usbnet_isdying(un))
                        return ENXIO;
                if (aue_csr_read_1(sc, AUE_PHY_CTL) & AUE_PHYCTL_DONE)
                        break;
        }

        if (i == AUE_TIMEOUT) {
                DPRINTF("aue%jd: phy=%#jx reg=%#jx val=%#jx write timed out",
                    device_unit(un->un_dev), phy, reg, val);
                return ETIMEDOUT;
        }

        return 0;
}

static void
aue_uno_mii_statchg(struct ifnet *ifp)
{
        struct usbnet *un = ifp->if_softc;
        struct aue_softc *sc = usbnet_softc(un);
        struct mii_data        *mii = usbnet_mii(un);
        const bool hadlink __diagused = usbnet_havelink(un);

        AUEHIST_FUNC(); AUEHIST_CALLED();
        AUEHIST_CALLARGSN(5, "aue%jd: ifp=%#jx link=%jd",
            device_unit(un->un_dev), (uintptr_t)ifp, hadlink, 0);

        AUE_CLRBIT(sc, AUE_CTL0, AUE_CTL0_RX_ENB | AUE_CTL0_TX_ENB);

        if (IFM_SUBTYPE(mii->mii_media_active) == IFM_100_TX) {
                AUE_SETBIT(sc, AUE_CTL1, AUE_CTL1_SPEEDSEL);
        } else {
                AUE_CLRBIT(sc, AUE_CTL1, AUE_CTL1_SPEEDSEL);
        }

        if ((mii->mii_media_active & IFM_FDX) != 0)
                AUE_SETBIT(sc, AUE_CTL1, AUE_CTL1_DUPLEX);
        else
                AUE_CLRBIT(sc, AUE_CTL1, AUE_CTL1_DUPLEX);

        AUE_SETBIT(sc, AUE_CTL0, AUE_CTL0_RX_ENB | AUE_CTL0_TX_ENB);

        if (mii->mii_media_status & IFM_ACTIVE &&
            IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) {
                usbnet_set_link(un, true);
        }

        /*
         * Set the LED modes on the LinkSys adapter.
         * This turns on the 'dual link LED' bin in the auxmode
         * register of the Broadcom PHY.
         */
        if (!usbnet_isdying(un) && (un->un_flags & LSYS)) {
                uint16_t auxmode;
                aue_uno_mii_read_reg(un, 0, 0x1b, &auxmode);
                aue_uno_mii_write_reg(un, 0, 0x1b, auxmode | 0x04);
        }

        if (usbnet_havelink(un) != hadlink) {
                DPRINTFN(5, "aue%jd: exit link %jd",
                    device_unit(un->un_dev), usbnet_havelink(un), 0, 0);
        }
}

#define AUE_POLY        0xEDB88320
#define AUE_BITS        6

static uint32_t
aue_crc(void *addrv)
{
        uint32_t                idx, bit, data, crc;
        char *addr = addrv;

        /* Compute CRC for the address value. */
        crc = 0xFFFFFFFF; /* initial value */

        for (idx = 0; idx < 6; idx++) {
                for (data = *addr++, bit = 0; bit < 8; bit++, data >>= 1)
                        crc = (crc >> 1) ^ (((crc ^ data) & 1) ? AUE_POLY : 0);
        }

        return crc & ((1 << AUE_BITS) - 1);
}

static void
aue_uno_mcast(struct ifnet *ifp)
{
        struct usbnet * const un = ifp->if_softc;
        struct aue_softc * const sc = usbnet_softc(un);
        struct ethercom *        ec = usbnet_ec(un);
        struct ether_multi        *enm;
        struct ether_multistep        step;
        uint32_t                h = 0, i;
        uint8_t hashtbl[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };

        AUEHIST_FUNC();
        AUEHIST_CALLARGSN(5, "aue%jd: enter", device_unit(un->un_dev), 0, 0, 0);

        if (usbnet_ispromisc(un)) {
                ETHER_LOCK(ec);
allmulti:
                ec->ec_flags |= ETHER_F_ALLMULTI;
                ETHER_UNLOCK(ec);
                AUE_SETBIT(sc, AUE_CTL0, AUE_CTL0_ALLMULTI);
                return;
        }

        /* now program new ones */
        ETHER_LOCK(ec);
        ETHER_FIRST_MULTI(step, ec, enm);
        while (enm != NULL) {
                if (memcmp(enm->enm_addrlo,
                    enm->enm_addrhi, ETHER_ADDR_LEN) != 0) {
                        goto allmulti;
                }

                h = aue_crc(enm->enm_addrlo);
                hashtbl[h >> 3] |= 1 << (h & 0x7);
                ETHER_NEXT_MULTI(step, enm);
        }
        ec->ec_flags &= ~ETHER_F_ALLMULTI;
        ETHER_UNLOCK(ec);

        AUE_CLRBIT(sc, AUE_CTL0, AUE_CTL0_ALLMULTI);

        /* write the hashtable */
        for (i = 0; i < 8; i++)
                aue_csr_write_1(sc, AUE_MAR0 + i, hashtbl[i]);
}

static void
aue_reset_pegasus_II(struct aue_softc *sc)
{
        /* Magic constants taken from Linux driver. */
        aue_csr_write_1(sc, AUE_REG_1D, 0);
        aue_csr_write_1(sc, AUE_REG_7B, 2);
#if 0
        if ((un->un_flags & PNA) && mii_mode)
                aue_csr_write_1(sc, AUE_REG_81, 6);
        else
#endif
                aue_csr_write_1(sc, AUE_REG_81, 2);
}

static void
aue_reset(struct aue_softc *sc)
{
        struct usbnet * const un = &sc->aue_un;
        int                i;

        AUEHIST_FUNC();
        AUEHIST_CALLARGSN(2, "aue%jd: enter", device_unit(un->un_dev), 0, 0, 0);

        AUE_SETBIT(sc, AUE_CTL1, AUE_CTL1_RESETMAC);

        for (i = 0; i < AUE_TIMEOUT; i++) {
                if (usbnet_isdying(un))
                        return;
                if (!(aue_csr_read_1(sc, AUE_CTL1) & AUE_CTL1_RESETMAC))
                        break;
        }

        if (i == AUE_TIMEOUT)
                printf("%s: reset failed\n", device_xname(un->un_dev));

#if 0
        /* XXX what is mii_mode supposed to be */
        if (sc->sc_mii_mode && (un->un_flags & PNA))
                aue_csr_write_1(sc, AUE_GPIO1, 0x34);
        else
                aue_csr_write_1(sc, AUE_GPIO1, 0x26);
#endif

        /*
         * The PHY(s) attached to the Pegasus chip may be held
         * in reset until we flip on the GPIO outputs. Make sure
         * to set the GPIO pins high so that the PHY(s) will
         * be enabled.
         *
         * Note: We force all of the GPIO pins low first, *then*
         * enable the ones we want.
         */
        if (un->un_flags & LSYS) {
                /* Grrr. LinkSys has to be different from everyone else. */
                aue_csr_write_1(sc, AUE_GPIO0,
                    AUE_GPIO_SEL0 | AUE_GPIO_SEL1);
        } else {
                aue_csr_write_1(sc, AUE_GPIO0,
                    AUE_GPIO_OUT0 | AUE_GPIO_SEL0);
        }
        aue_csr_write_1(sc, AUE_GPIO0,
            AUE_GPIO_OUT0 | AUE_GPIO_SEL0 | AUE_GPIO_SEL1);

        if (un->un_flags & PII)
                aue_reset_pegasus_II(sc);

        /* Wait a little while for the chip to get its brains in order. */
        delay(10000);        /* XXX */
        //usbd_delay_ms(un->un_udev, 10);        /* XXX */

        DPRINTFN(2, "aue%jd: exit", device_unit(un->un_dev), 0, 0, 0);
}

/*
 * Probe for a Pegasus chip.
 */
static int
aue_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        /*
         * Some manufacturers use the same vendor and product id for
         * different devices. We need to sanity check the DeviceClass
         * in this case
         * Currently known guilty products:
         * 0x050d/0x0121 Belkin Bluetooth and USB2LAN
         *
         * If this turns out to be more common, we could use a quirk
         * table.
         */
        if (uaa->uaa_vendor == USB_VENDOR_BELKIN &&
                uaa->uaa_product == USB_PRODUCT_BELKIN_USB2LAN) {
                usb_device_descriptor_t *dd;

                dd = usbd_get_device_descriptor(uaa->uaa_device);
                if (dd != NULL &&
                        dd->bDeviceClass != UDCLASS_IN_INTERFACE)
                        return UMATCH_NONE;
        }

        return aue_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

/*
 * Attach the interface. Allocate softc structures, do ifmedia
 * setup and ethernet/BPF attach.
 */
static void
aue_attach(device_t parent, device_t self, void *aux)
{
        USBNET_MII_DECL_DEFAULT(unm);
        struct aue_softc * const sc = device_private(self);
        struct usbnet * const un = &sc->aue_un;
        struct usb_attach_arg *uaa = aux;
        char                        *devinfop;
        struct usbd_device        *dev = uaa->uaa_device;
        usbd_status                err;
        usb_interface_descriptor_t        *id;
        usb_endpoint_descriptor_t        *ed;
        int                        i;

        AUEHIST_FUNC();
        AUEHIST_CALLARGSN(2, "aue%jd: enter sc=%#jx",
            device_unit(self), (uintptr_t)sc, 0, 0);

        KASSERT((void *)sc == un);

        aprint_naive("\n");
        aprint_normal("\n");
        devinfop = usbd_devinfo_alloc(uaa->uaa_device, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        un->un_dev = self;
        un->un_udev = dev;
        un->un_sc = sc;
        un->un_ops = &aue_ops;
        un->un_intr = &sc->aue_intr;
        un->un_rx_xfer_flags = USBD_SHORT_XFER_OK;
        un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER;
        un->un_rx_list_cnt = AUE_RX_LIST_CNT;
        un->un_tx_list_cnt = AUE_RX_LIST_CNT;
        un->un_rx_bufsz = AUE_BUFSZ;
        un->un_tx_bufsz = AUE_BUFSZ;

        sc->aue_intr.uni_buf = &sc->aue_ibuf;
        sc->aue_intr.uni_bufsz = sizeof(sc->aue_ibuf);
        sc->aue_intr.uni_interval = AUE_INTR_INTERVAL;

        err = usbd_set_config_no(dev, AUE_CONFIG_NO, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(err));
                return;
        }

        err = usbd_device2interface_handle(dev, AUE_IFACE_IDX, &un->un_iface);
        if (err) {
                aprint_error_dev(self, "getting interface handle failed\n");
                return;
        }

        un->un_flags = aue_lookup(uaa->uaa_vendor, uaa->uaa_product)->aue_flags;

        id = usbd_get_interface_descriptor(un->un_iface);

        /* Find endpoints. */
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(un->un_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "couldn't get endpoint descriptor %d\n", i);
                        return;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        un->un_ed[USBNET_ENDPT_INTR] = ed->bEndpointAddress;
                }
        }

        if (un->un_ed[USBNET_ENDPT_RX] == 0 ||
            un->un_ed[USBNET_ENDPT_TX] == 0 ||
            un->un_ed[USBNET_ENDPT_INTR] == 0) {
                aprint_error_dev(self, "missing endpoint\n");
                return;
        }

        /* First level attach. */
        usbnet_attach(un);

        /* Reset the adapter and get station address from the EEPROM.  */
        aue_reset(sc);
        aue_read_mac(un);

        usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST,
            0, &unm);
}

static void
aue_uno_intr(struct usbnet *un, usbd_status status)
{
        struct ifnet                *ifp = usbnet_ifp(un);
        struct aue_softc        *sc = usbnet_softc(un);
        struct aue_intrpkt        *p = &sc->aue_ibuf;

        AUEHIST_FUNC();
        AUEHIST_CALLARGSN(20, "aue%jd: enter txstat0 %#jx\n",
            device_unit(un->un_dev), p->aue_txstat0, 0, 0);

        if (p->aue_txstat0)
                if_statinc(ifp, if_oerrors);

        if (p->aue_txstat0 & (AUE_TXSTAT0_LATECOLL | AUE_TXSTAT0_EXCESSCOLL))
                if_statinc(ifp, if_collisions);
}

static void
aue_uno_rx_loop(struct usbnet *un, struct usbnet_chain *c, uint32_t total_len)
{
        struct ifnet                *ifp = usbnet_ifp(un);
        uint8_t                        *buf = c->unc_buf;
        struct aue_rxpkt        r;
        uint32_t                pktlen;

        AUEHIST_FUNC();
        AUEHIST_CALLARGSN(10, "aue%jd: enter len %ju",
            device_unit(un->un_dev), total_len, 0, 0);

        if (total_len <= 4 + ETHER_CRC_LEN) {
                if_statinc(ifp, if_ierrors);
                return;
        }

        memcpy(&r, buf + total_len - 4, sizeof(r));

        /* Turn off all the non-error bits in the rx status word. */
        r.aue_rxstat &= AUE_RXSTAT_MASK;
        if (r.aue_rxstat) {
                if_statinc(ifp, if_ierrors);
                return;
        }

        /* No errors; receive the packet. */
        pktlen = total_len - ETHER_CRC_LEN - 4;

        usbnet_enqueue(un, buf, pktlen, 0, 0, 0);
}

static unsigned
aue_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c)
{
        uint8_t                        *buf = c->unc_buf;
        int                        total_len;

        AUEHIST_FUNC();
        AUEHIST_CALLARGSN(10, "aue%jd: enter pktlen=%jd",
            device_unit(un->un_dev), m->m_pkthdr.len, 0, 0);

        if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz - 2)
                return 0;

        /*
         * Copy the mbuf data into a contiguous buffer, leaving two
         * bytes at the beginning to hold the frame length.
         */
        m_copydata(m, 0, m->m_pkthdr.len, buf + 2);

        /*
         * The ADMtek documentation says that the packet length is
         * supposed to be specified in the first two bytes of the
         * transfer, however it actually seems to ignore this info
         * and base the frame size on the bulk transfer length.
         */
        buf[0] = (uint8_t)m->m_pkthdr.len;
        buf[1] = (uint8_t)(m->m_pkthdr.len >> 8);
        total_len = m->m_pkthdr.len + 2;

        DPRINTFN(5, "aue%jd: send %jd bytes",
            device_unit(un->un_dev), total_len, 0, 0);

        return total_len;
}

static int
aue_uno_init(struct ifnet *ifp)
{
        struct usbnet * const        un = ifp->if_softc;
        struct aue_softc        *sc = usbnet_softc(un);
        int                        i;
        const u_char                *eaddr;

        AUEHIST_FUNC();
        AUEHIST_CALLARGSN(5, "aue%jd: enter link=%jd",
            device_unit(un->un_dev), usbnet_havelink(un), 0, 0);

        /* Reset the interface. */
        aue_reset(sc);

        eaddr = CLLADDR(ifp->if_sadl);
        for (i = 0; i < ETHER_ADDR_LEN; i++)
                aue_csr_write_1(sc, AUE_PAR0 + i, eaddr[i]);

         /* If we want promiscuous mode, set the allframes bit. */
        if (usbnet_ispromisc(un))
                AUE_SETBIT(sc, AUE_CTL2, AUE_CTL2_RX_PROMISC);
        else
                AUE_CLRBIT(sc, AUE_CTL2, AUE_CTL2_RX_PROMISC);

        /* Enable RX and TX */
        aue_csr_write_1(sc, AUE_CTL0, AUE_CTL0_RXSTAT_APPEND | AUE_CTL0_RX_ENB);
        AUE_SETBIT(sc, AUE_CTL0, AUE_CTL0_TX_ENB);
        AUE_SETBIT(sc, AUE_CTL2, AUE_CTL2_EP3_CLR);

        return 0;
}

static void
aue_uno_stop(struct ifnet *ifp, int disable)
{
        struct usbnet * const        un = ifp->if_softc;
        struct aue_softc * const sc = usbnet_softc(un);

        AUEHIST_FUNC();
        AUEHIST_CALLARGSN(5, "aue%jd: enter", device_unit(un->un_dev), 0, 0, 0);

        aue_csr_write_1(sc, AUE_CTL0, 0);
        aue_csr_write_1(sc, AUE_CTL1, 0);
        aue_reset(sc);
}

#ifdef _MODULE
#include "ioconf.c"
#endif

USBNET_MODULE(aue)









































































































































































    1 

























    1 












































    1 

























































































































































































































































































































































































































































































































































































































































































































    1 





















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
/*        $NetBSD: sysmon_envsys.c,v 1.151 2022/05/20 21:31:24 andvar Exp $        */

/*-
 * Copyright (c) 2007, 2008 Juan Romero Pardines.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 2000 Zembu Labs, Inc.
 * All rights reserved.
 *
 * Author: Jason R. Thorpe <thorpej@zembu.com>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by Zembu Labs, Inc.
 * 4. Neither the name of Zembu Labs nor the names of its employees may
 *    be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ZEMBU LABS, INC. ``AS IS'' AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WAR-
 * RANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DIS-
 * CLAIMED.  IN NO EVENT SHALL ZEMBU LABS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Environmental sensor framework for sysmon, exported to userland
 * with proplib(3).
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sysmon_envsys.c,v 1.151 2022/05/20 21:31:24 andvar Exp $");

#include <sys/param.h>
#include <sys/types.h>
#include <sys/conf.h>
#include <sys/errno.h>
#include <sys/fcntl.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/mutex.h>
#include <sys/kmem.h>
#include <sys/rndsource.h>
#include <sys/module.h>
#include <sys/once.h>

#include <dev/sysmon/sysmonvar.h>
#include <dev/sysmon/sysmon_envsysvar.h>
#include <dev/sysmon/sysmon_taskq.h>

kmutex_t sme_global_mtx;

prop_dictionary_t sme_propd;

struct sysmon_envsys_lh sysmon_envsys_list;

static uint32_t sysmon_envsys_next_sensor_index;
static struct sysmon_envsys *sysmon_envsys_find_40(u_int);

static void sysmon_envsys_destroy_plist(prop_array_t);
static void sme_remove_userprops(void);
static int sme_add_property_dictionary(struct sysmon_envsys *, prop_array_t,
                                       prop_dictionary_t);
static sme_event_drv_t * sme_add_sensor_dictionary(struct sysmon_envsys *,
        prop_array_t, prop_dictionary_t, envsys_data_t *);
static void sme_initial_refresh(void *);
static uint32_t sme_get_max_value(struct sysmon_envsys *,
     bool (*)(const envsys_data_t*), bool);

MODULE(MODULE_CLASS_DRIVER, sysmon_envsys, "sysmon,sysmon_taskq,sysmon_power");

static struct sysmon_opvec sysmon_envsys_opvec = {
        sysmonopen_envsys, sysmonclose_envsys, sysmonioctl_envsys,
        NULL, NULL, NULL
};

ONCE_DECL(once_envsys);

static int
sme_preinit(void)
{

        LIST_INIT(&sysmon_envsys_list);
        mutex_init(&sme_global_mtx, MUTEX_DEFAULT, IPL_NONE);
        sme_propd = prop_dictionary_create();

        return 0;
}

/*
 * sysmon_envsys_init:
 *
 *         + Initialize global mutex, dictionary and the linked list.
 */
int
sysmon_envsys_init(void)
{
        int error;

        (void)RUN_ONCE(&once_envsys, sme_preinit);

        error = sysmon_attach_minor(SYSMON_MINOR_ENVSYS, &sysmon_envsys_opvec);

        return error;
}

int
sysmon_envsys_fini(void)
{
        int error;

        if ( ! LIST_EMPTY(&sysmon_envsys_list))
                error = EBUSY;
        else
                error = sysmon_attach_minor(SYSMON_MINOR_ENVSYS, NULL);

        if (error == 0)
                mutex_destroy(&sme_global_mtx);

        // XXX: prop_dictionary ???

        return error;
}

/*
 * sysmonopen_envsys:
 *
 *        + Open the system monitor device.
 */
int
sysmonopen_envsys(dev_t dev, int flag, int mode, struct lwp *l)
{
        return 0;
}

/*
 * sysmonclose_envsys:
 *
 *        + Close the system monitor device.
 */
int
sysmonclose_envsys(dev_t dev, int flag, int mode, struct lwp *l)
{
        return 0;
}

/*
 * sysmonioctl_envsys:
 *
 *        + Perform a sysmon envsys control request.
 */
int
sysmonioctl_envsys(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        struct sysmon_envsys *sme = NULL;
        int error = 0;
        u_int oidx;

        switch (cmd) {
        /*
         * To update the global dictionary with latest data from devices.
         */
        case ENVSYS_GETDICTIONARY:
            {
                struct plistref *plist = (struct plistref *)data;

                /*
                 * Update dictionaries on all sysmon envsys devices
                 * registered.
                 */
                mutex_enter(&sme_global_mtx);
                LIST_FOREACH(sme, &sysmon_envsys_list, sme_list) {
                        sysmon_envsys_acquire(sme, false);
                        error = sme_update_dictionary(sme);
                        if (error) {
                                DPRINTF(("%s: sme_update_dictionary, "
                                    "error=%d\n", __func__, error));
                                sysmon_envsys_release(sme, false);
                                mutex_exit(&sme_global_mtx);
                                return error;
                        }
                        sysmon_envsys_release(sme, false);
                }
                mutex_exit(&sme_global_mtx);
                /*
                 * Copy global dictionary to userland.
                 */
                error = prop_dictionary_copyout_ioctl(plist, cmd, sme_propd);
                break;
            }
        /*
         * To set properties on multiple devices.
         */
        case ENVSYS_SETDICTIONARY:
            {
                const struct plistref *plist = (const struct plistref *)data;
                prop_dictionary_t udict;
                prop_object_iterator_t iter, iter2;
                prop_object_t obj, obj2;
                prop_array_t array_u, array_k;
                const char *devname = NULL;

                if ((flag & FWRITE) == 0)
                        return EPERM;

                /*
                 * Get dictionary from userland.
                 */
                error = prop_dictionary_copyin_ioctl(plist, cmd, &udict);
                if (error) {
                        DPRINTF(("%s: copyin_ioctl error=%d\n",
                            __func__, error));
                        break;
                }

                iter = prop_dictionary_iterator(udict);
                if (!iter) {
                        prop_object_release(udict);
                        return ENOMEM;
                }

                /*
                 * Iterate over the userland dictionary and process
                 * the list of devices.
                 */
                while ((obj = prop_object_iterator_next(iter))) {
                        array_u = prop_dictionary_get_keysym(udict, obj);
                        if (prop_object_type(array_u) != PROP_TYPE_ARRAY) {
                                prop_object_iterator_release(iter);
                                prop_object_release(udict);
                                return EINVAL;
                        }

                        devname = prop_dictionary_keysym_value(obj);
                        DPRINTF(("%s: processing the '%s' array requests\n",
                            __func__, devname));

                        /*
                         * find the correct sme device.
                         */
                        sme = sysmon_envsys_find(devname);
                        if (!sme) {
                                DPRINTF(("%s: NULL sme\n", __func__));
                                prop_object_iterator_release(iter);
                                prop_object_release(udict);
                                return EINVAL;
                        }

                        /*
                         * Find the correct array object with the string
                         * supplied by the userland dictionary.
                         */
                        array_k = prop_dictionary_get(sme_propd, devname);
                        if (prop_object_type(array_k) != PROP_TYPE_ARRAY) {
                                DPRINTF(("%s: array device failed\n",
                                    __func__));
                                sysmon_envsys_release(sme, false);
                                prop_object_iterator_release(iter);
                                prop_object_release(udict);
                                return EINVAL;
                        }

                        iter2 = prop_array_iterator(array_u);
                        if (!iter2) {
                                sysmon_envsys_release(sme, false);
                                prop_object_iterator_release(iter);
                                prop_object_release(udict);
                                return ENOMEM;
                        }

                        /*
                         * Iterate over the array of dictionaries to
                         * process the list of sensors and properties.
                         */
                        while ((obj2 = prop_object_iterator_next(iter2))) {
                                /*
                                 * do the real work now.
                                 */
                                error = sme_userset_dictionary(sme,
                                                               obj2,
                                                               array_k);
                                if (error) {
                                        sysmon_envsys_release(sme, false);
                                        prop_object_iterator_release(iter2);
                                        prop_object_iterator_release(iter);
                                        prop_object_release(udict);
                                        return error;
                                }
                        }

                        sysmon_envsys_release(sme, false);
                        prop_object_iterator_release(iter2);
                }

                prop_object_iterator_release(iter);
                prop_object_release(udict);
                break;
            }
        /*
         * To remove all properties from all devices registered.
         */
        case ENVSYS_REMOVEPROPS:
            {
                const struct plistref *plist = (const struct plistref *)data;
                prop_dictionary_t udict;
                prop_object_t obj;

                if ((flag & FWRITE) == 0)
                        return EPERM;

                error = prop_dictionary_copyin_ioctl(plist, cmd, &udict);
                if (error) {
                        DPRINTF(("%s: copyin_ioctl error=%d\n",
                            __func__, error));
                        break;
                }

                obj = prop_dictionary_get(udict, "envsys-remove-props");
                if (!obj || !prop_bool_true(obj)) {
                        DPRINTF(("%s: invalid 'envsys-remove-props'\n",
                             __func__));
                        return EINVAL;
                }

                prop_object_release(udict);
                sme_remove_userprops();

                break;
            }
        /*
         * Compatibility ioctls with the old interface, only implemented
         * ENVSYS_GTREDATA and ENVSYS_GTREINFO; enough to make old
         * applications work.
         */
        case ENVSYS_GTREDATA:
            {
                struct envsys_tre_data *tred = (void *)data;
                envsys_data_t *edata = NULL;
                bool found = false;

                tred->validflags = 0;

                sme = sysmon_envsys_find_40(tred->sensor);
                if (!sme)
                        break;

                oidx = tred->sensor;
                tred->sensor = SME_SENSOR_IDX(sme, tred->sensor);

                DPRINTFOBJ(("%s: sensor=%d oidx=%d dev=%s nsensors=%d\n",
                    __func__, tred->sensor, oidx, sme->sme_name,
                    sme->sme_nsensors));

                TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) {
                        if (edata->sensor == tred->sensor) {
                                found = true;
                                break;
                        }
                }

                if (!found) {
                        sysmon_envsys_release(sme, false);
                        error = ENODEV;
                        break;
                }

                if (tred->sensor < sme->sme_nsensors) {
                        if ((sme->sme_flags & SME_POLL_ONLY) == 0) {
                                mutex_enter(&sme->sme_mtx);
                                sysmon_envsys_refresh_sensor(sme, edata);
                                mutex_exit(&sme->sme_mtx);
                        }

                        /*
                         * copy required values to the old interface.
                         */
                        tred->sensor = edata->sensor;
                        tred->cur.data_us = edata->value_cur;
                        tred->cur.data_s = edata->value_cur;
                        tred->max.data_us = edata->value_max;
                        tred->max.data_s = edata->value_max;
                        tred->min.data_us = edata->value_min;
                        tred->min.data_s = edata->value_min;
                        tred->avg.data_us = 0;
                        tred->avg.data_s = 0;
                        if (edata->units == ENVSYS_BATTERY_CHARGE)
                                tred->units = ENVSYS_INDICATOR;
                        else
                                tred->units = edata->units;

                        tred->validflags |= ENVSYS_FVALID;
                        tred->validflags |= ENVSYS_FCURVALID;

                        if (edata->flags & ENVSYS_FPERCENT) {
                                tred->validflags |= ENVSYS_FMAXVALID;
                                tred->validflags |= ENVSYS_FFRACVALID;
                        }

                        if (edata->state == ENVSYS_SINVALID) {
                                tred->validflags &= ~ENVSYS_FCURVALID;
                                tred->cur.data_us = tred->cur.data_s = 0;
                        }

                        DPRINTFOBJ(("%s: sensor=%s tred->cur.data_s=%d\n",
                            __func__, edata->desc, tred->cur.data_s));
                        DPRINTFOBJ(("%s: tred->validflags=%d tred->units=%d"
                            " tred->sensor=%d\n", __func__, tred->validflags,
                            tred->units, tred->sensor));
                }
                tred->sensor = oidx;
                sysmon_envsys_release(sme, false);

                break;
            }
        case ENVSYS_GTREINFO:
            {
                struct envsys_basic_info *binfo = (void *)data;
                envsys_data_t *edata = NULL;
                bool found = false;

                binfo->validflags = 0;

                sme = sysmon_envsys_find_40(binfo->sensor);
                if (!sme)
                        break;

                oidx = binfo->sensor;
                binfo->sensor = SME_SENSOR_IDX(sme, binfo->sensor);

                TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) {
                        if (edata->sensor == binfo->sensor) {
                                found = true;
                                break;
                        }
                }

                if (!found) {
                        sysmon_envsys_release(sme, false);
                        error = ENODEV;
                        break;
                }

                binfo->validflags |= ENVSYS_FVALID;

                if (binfo->sensor < sme->sme_nsensors) {
                        if (edata->units == ENVSYS_BATTERY_CHARGE)
                                binfo->units = ENVSYS_INDICATOR;
                        else
                                binfo->units = edata->units;

                        /*
                         * previously, the ACPI sensor names included the
                         * device name. Include that in compatibility code.
                         */
                        if (strncmp(sme->sme_name, "acpi", 4) == 0)
                                (void)snprintf(binfo->desc, sizeof(binfo->desc),
                                    "%s %s", sme->sme_name, edata->desc);
                        else
                                (void)strlcpy(binfo->desc, edata->desc,
                                    sizeof(binfo->desc));
                }

                DPRINTFOBJ(("%s: binfo->units=%d binfo->validflags=%d\n",
                    __func__, binfo->units, binfo->validflags));
                DPRINTFOBJ(("%s: binfo->desc=%s binfo->sensor=%d\n",
                    __func__, binfo->desc, binfo->sensor));

                binfo->sensor = oidx;
                sysmon_envsys_release(sme, false);

                break;
            }
        default:
                error = ENOTTY;
                break;
        }

        return error;
}

/*
 * sysmon_envsys_create:
 *
 *         + Allocates a new sysmon_envsys object and initializes the
 *           stuff for sensors and events.
 */
struct sysmon_envsys *
sysmon_envsys_create(void)
{
        struct sysmon_envsys *sme;

        CTASSERT(SME_CALLOUT_INVALID == 0);

        sme = kmem_zalloc(sizeof(*sme), KM_SLEEP);
        TAILQ_INIT(&sme->sme_sensors_list);
        LIST_INIT(&sme->sme_events_list);
        mutex_init(&sme->sme_mtx, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&sme->sme_work_mtx, MUTEX_DEFAULT, IPL_SOFTCLOCK);
        cv_init(&sme->sme_condvar, "sme_wait");

        return sme;
}

/*
 * sysmon_envsys_destroy:
 *
 *         + Removes all sensors from the tail queue, destroys the callout
 *           and frees the sysmon_envsys object.
 */
void
sysmon_envsys_destroy(struct sysmon_envsys *sme)
{
        envsys_data_t *edata;

        KASSERT(sme != NULL);

        while (!TAILQ_EMPTY(&sme->sme_sensors_list)) {
                edata = TAILQ_FIRST(&sme->sme_sensors_list);
                TAILQ_REMOVE(&sme->sme_sensors_list, edata, sensors_head);
        }
        mutex_destroy(&sme->sme_mtx);
        mutex_destroy(&sme->sme_work_mtx);
        cv_destroy(&sme->sme_condvar);
        kmem_free(sme, sizeof(*sme));
}

/*
 * sysmon_envsys_sensor_attach:
 *
 *         + Attaches a sensor into a sysmon_envsys device checking that units
 *           is set to a valid type and description is unique and not empty.
 */
int
sysmon_envsys_sensor_attach(struct sysmon_envsys *sme, envsys_data_t *edata)
{
        const struct sme_descr_entry *sdt_units;
        envsys_data_t *oedata;

        KASSERT(sme != NULL || edata != NULL);

        /*
         * Find the correct units for this sensor.
         */
        sdt_units = sme_find_table_entry(SME_DESC_UNITS, edata->units);
        if (sdt_units == NULL || sdt_units->type == -1)
                return EINVAL;

        /*
         * Check that description is not empty or duplicate.
         */
        if (strlen(edata->desc) == 0)
                return EINVAL;

        mutex_enter(&sme->sme_mtx);
        sysmon_envsys_acquire(sme, true);
        TAILQ_FOREACH(oedata, &sme->sme_sensors_list, sensors_head) {
                if (strcmp(oedata->desc, edata->desc) == 0) {
                        sysmon_envsys_release(sme, true);
                        mutex_exit(&sme->sme_mtx);
                        return EEXIST;
                }
        }
        /*
         * Ok, the sensor has been added into the device queue.
         */
        TAILQ_INSERT_TAIL(&sme->sme_sensors_list, edata, sensors_head);

        /*
         * Give the sensor an index position.
         */
        edata->sensor = sme->sme_nsensors;
        sme->sme_nsensors++;
        sysmon_envsys_release(sme, true);
        mutex_exit(&sme->sme_mtx);

        DPRINTF(("%s: attached #%d (%s), units=%d (%s)\n",
            __func__, edata->sensor, edata->desc,
            sdt_units->type, sdt_units->desc));

        return 0;
}

/*
 * sysmon_envsys_sensor_detach:
 *
 *         + Detachs a sensor from a sysmon_envsys device and decrements the
 *           sensors count on success.
 */
int
sysmon_envsys_sensor_detach(struct sysmon_envsys *sme, envsys_data_t *edata)
{
        envsys_data_t *oedata;
        bool found = false;
        bool destroy = false;

        KASSERT(sme != NULL || edata != NULL);

        /*
         * Check the sensor is already on the list.
         */
        mutex_enter(&sme->sme_mtx);
        sysmon_envsys_acquire(sme, true);
        TAILQ_FOREACH(oedata, &sme->sme_sensors_list, sensors_head) {
                if (oedata->sensor == edata->sensor) {
                        found = true;
                        break;
                }
        }

        if (!found) {
                sysmon_envsys_release(sme, true);
                mutex_exit(&sme->sme_mtx);
                return EINVAL;
        }

        /*
         * remove it, unhook from rnd(4), and decrement the sensors count.
         */
        if (oedata->flags & ENVSYS_FHAS_ENTROPY)
                rnd_detach_source(&oedata->rnd_src);
        sme_event_unregister_sensor(sme, edata);
        mutex_enter(&sme->sme_work_mtx);
        if (LIST_EMPTY(&sme->sme_events_list)) {
                if (sme->sme_callout_state == SME_CALLOUT_READY)
                        sme_events_halt_callout(sme);
                destroy = true;
        }
        mutex_exit(&sme->sme_work_mtx);
        TAILQ_REMOVE(&sme->sme_sensors_list, edata, sensors_head);
        sme->sme_nsensors--;
        sysmon_envsys_release(sme, true);
        mutex_exit(&sme->sme_mtx);

        if (destroy)
                sme_events_destroy(sme);

        return 0;
}


/*
 * sysmon_envsys_register:
 *
 *        + Register a sysmon envsys device.
 *        + Create array of dictionaries for a device.
 */
int
sysmon_envsys_register(struct sysmon_envsys *sme)
{
        struct sme_evdrv {
                SLIST_ENTRY(sme_evdrv) evdrv_head;
                sme_event_drv_t *evdrv;
        };
        SLIST_HEAD(, sme_evdrv) sme_evdrv_list;
        struct sme_evdrv *evdv = NULL;
        struct sysmon_envsys *lsme;
        prop_array_t array = NULL;
        prop_dictionary_t dict, dict2;
        envsys_data_t *edata = NULL;
        sme_event_drv_t *this_evdrv;
        int nevent;
        int error = 0;
        char rnd_name[sizeof(edata->rnd_src.name)];

        KASSERT(sme != NULL);
        KASSERT(sme->sme_name != NULL);

        (void)RUN_ONCE(&once_envsys, sme_preinit);

        /*
         * Check if requested sysmon_envsys device is valid
         * and does not exist already in the list.
         */
        mutex_enter(&sme_global_mtx);
        LIST_FOREACH(lsme, &sysmon_envsys_list, sme_list) {
               if (strcmp(lsme->sme_name, sme->sme_name) == 0) {
                        mutex_exit(&sme_global_mtx);
                        return EEXIST;
               }
        }
        mutex_exit(&sme_global_mtx);

        /*
         * sanity check: if SME_DISABLE_REFRESH is not set,
         * the sme_refresh function callback must be non NULL.
         */
        if ((sme->sme_flags & SME_DISABLE_REFRESH) == 0)
                if (!sme->sme_refresh)
                        return EINVAL;

        /*
         * If the list of sensors is empty, there's no point to continue...
         */
        if (TAILQ_EMPTY(&sme->sme_sensors_list)) {
                DPRINTF(("%s: sensors list empty for %s\n", __func__,
                    sme->sme_name));
                return ENOTSUP;
        }

        /*
         * Initialize the singly linked list for driver events.
         */
        SLIST_INIT(&sme_evdrv_list);

        array = prop_array_create();
        if (!array)
                return ENOMEM;

        /*
         * Iterate over all sensors and create a dictionary per sensor.
         * We must respect the order in which the sensors were added.
         */
        TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) {
                dict = prop_dictionary_create();
                if (!dict) {
                        error = ENOMEM;
                        goto out2;
                }

                /*
                 * Create all objects in sensor's dictionary.
                 */
                this_evdrv = sme_add_sensor_dictionary(sme, array,
                                                       dict, edata);
                if (this_evdrv) {
                        evdv = kmem_zalloc(sizeof(*evdv), KM_SLEEP);
                        evdv->evdrv = this_evdrv;
                        SLIST_INSERT_HEAD(&sme_evdrv_list, evdv, evdrv_head);
                }
        }

        /*
         * If the array does not contain any object (sensor), there's
         * no need to attach the driver.
         */
        if (prop_array_count(array) == 0) {
                error = EINVAL;
                DPRINTF(("%s: empty array for '%s'\n", __func__,
                    sme->sme_name));
                goto out;
        }

        /*
         * Add the dictionary for the global properties of this device.
         */
        dict2 = prop_dictionary_create();
        if (!dict2) {
                error = ENOMEM;
                goto out;
        }

        error = sme_add_property_dictionary(sme, array, dict2);
        if (error) {
                prop_object_release(dict2);
                goto out;
        }

        /*
         * Add the array into the global dictionary for the driver.
         *
         * <dict>
         *         <key>foo0</key>
         *         <array>
         *                 ...
         */
        mutex_enter(&sme_global_mtx);
        if (!prop_dictionary_set(sme_propd, sme->sme_name, array)) {
                error = EINVAL;
                mutex_exit(&sme_global_mtx);
                DPRINTF(("%s: prop_dictionary_set for '%s'\n", __func__,
                    sme->sme_name));
                goto out;
        }

        /*
         * Add the device into the list.
         */
        LIST_INSERT_HEAD(&sysmon_envsys_list, sme, sme_list);
        sme->sme_fsensor = sysmon_envsys_next_sensor_index;
        sysmon_envsys_next_sensor_index += sme->sme_nsensors;
        mutex_exit(&sme_global_mtx);

out:
        /*
         * No errors?  Make an initial data refresh if was requested,
         * then register the events that were set in the driver.  Do
         * the refresh first in case it is needed to establish the
         * limits or max_value needed by some events.
         */
        if (error == 0) {
                nevent = 0;

                /*
                 * Hook the sensor into rnd(4) entropy pool if requested
                 */
                TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) {
                        if (edata->flags & ENVSYS_FHAS_ENTROPY) {
                                uint32_t rnd_type, rnd_flag = 0;
                                size_t n;
                                int tail = 1;

                                snprintf(rnd_name, sizeof(rnd_name), "%s-%s",
                                    sme->sme_name, edata->desc);
                                n = strlen(rnd_name);
                                /*
                                 * 1) Remove trailing white space(s).
                                 * 2) If space exist, replace it with '-'
                                 */
                                while (--n) {
                                        if (rnd_name[n] == ' ') {
                                                if (tail != 0)
                                                        rnd_name[n] = '\0';
                                                else
                                                        rnd_name[n] = '-';
                                        } else
                                                tail = 0;
                                }
                                rnd_flag |= RND_FLAG_COLLECT_TIME;
                                rnd_flag |= RND_FLAG_ESTIMATE_TIME;

                                switch (edata->units) {
                                    case ENVSYS_STEMP:
                                    case ENVSYS_SFANRPM:
                                    case ENVSYS_INTEGER:
                                        rnd_type = RND_TYPE_ENV;
                                        rnd_flag |= RND_FLAG_COLLECT_VALUE;
                                        rnd_flag |= RND_FLAG_ESTIMATE_VALUE;
                                        break;
                                    case ENVSYS_SVOLTS_AC:
                                    case ENVSYS_SVOLTS_DC:
                                    case ENVSYS_SOHMS:
                                    case ENVSYS_SWATTS:
                                    case ENVSYS_SAMPS:
                                    case ENVSYS_SWATTHOUR:
                                    case ENVSYS_SAMPHOUR:
                                        rnd_type = RND_TYPE_POWER;
                                        rnd_flag |= RND_FLAG_COLLECT_VALUE;
                                        rnd_flag |= RND_FLAG_ESTIMATE_VALUE;
                                        break;
                                    default:
                                        rnd_type = RND_TYPE_UNKNOWN;
                                        break;
                                }
                                rnd_attach_source(&edata->rnd_src, rnd_name,
                                    rnd_type, rnd_flag);
                        }
                }

                if (sme->sme_flags & SME_INIT_REFRESH) {
                        sysmon_task_queue_sched(0, sme_initial_refresh, sme);
                        DPRINTF(("%s: scheduled initial refresh for '%s'\n",
                                __func__, sme->sme_name));
                }
                SLIST_FOREACH(evdv, &sme_evdrv_list, evdrv_head) {
                        sysmon_task_queue_sched(0,
                            sme_event_drvadd, evdv->evdrv);
                        nevent++;
                }
                DPRINTF(("%s: driver '%s' registered (nsens=%d nevent=%d)\n",
                    __func__, sme->sme_name, sme->sme_nsensors, nevent));
        }

out2:
        while (!SLIST_EMPTY(&sme_evdrv_list)) {
                evdv = SLIST_FIRST(&sme_evdrv_list);
                SLIST_REMOVE_HEAD(&sme_evdrv_list, evdrv_head);
                kmem_free(evdv, sizeof(*evdv));
        }
        if (!error)
                return 0;

        /*
         * Ugh... something wasn't right; unregister all events and sensors
         * previously assigned and destroy the array with all its objects.
         */
        DPRINTF(("%s: failed to register '%s' (%d)\n", __func__,
            sme->sme_name, error));

        sme_event_unregister_all(sme);
        while (!TAILQ_EMPTY(&sme->sme_sensors_list)) {
                edata = TAILQ_FIRST(&sme->sme_sensors_list);
                TAILQ_REMOVE(&sme->sme_sensors_list, edata, sensors_head);
        }
        sysmon_envsys_destroy_plist(array);
        return error;
}

/*
 * sysmon_envsys_destroy_plist:
 *
 *         + Remove all objects from the array of dictionaries that is
 *           created in a sysmon envsys device.
 */
static void
sysmon_envsys_destroy_plist(prop_array_t array)
{
        prop_object_iterator_t iter, iter2;
        prop_dictionary_t dict;
        prop_object_t obj;

        KASSERT(array != NULL);
        KASSERT(prop_object_type(array) == PROP_TYPE_ARRAY);

        DPRINTFOBJ(("%s: objects in array=%d\n", __func__,
            prop_array_count(array)));

        iter = prop_array_iterator(array);
        if (!iter)
                return;

        while ((dict = prop_object_iterator_next(iter))) {
                KASSERT(prop_object_type(dict) == PROP_TYPE_DICTIONARY);
                iter2 = prop_dictionary_iterator(dict);
                if (!iter2)
                        goto out;
                DPRINTFOBJ(("%s: iterating over dictionary\n", __func__));
                while ((obj = prop_object_iterator_next(iter2)) != NULL) {
                        DPRINTFOBJ(("%s: obj=%s\n", __func__,
                            prop_dictionary_keysym_value(obj)));
                        prop_dictionary_remove(dict,
                            prop_dictionary_keysym_value(obj));
                        prop_object_iterator_reset(iter2);
                }
                prop_object_iterator_release(iter2);
                DPRINTFOBJ(("%s: objects in dictionary:%d\n",
                    __func__, prop_dictionary_count(dict)));
                prop_object_release(dict);
        }

out:
        prop_object_iterator_release(iter);
        prop_object_release(array);
}

/*
 * sysmon_envsys_unregister:
 *
 *        + Unregister a sysmon envsys device.
 */
void
sysmon_envsys_unregister(struct sysmon_envsys *sme)
{
        prop_array_t array;
        struct sysmon_envsys *osme;
        envsys_data_t *edata;

        KASSERT(sme != NULL);

        /*
         * Decrement global sensors counter and the first_sensor index
         * for remaining devices in the list (only used for compatibility
         * with previous API), and remove the device from the list.
         */
        mutex_enter(&sme_global_mtx);
        sysmon_envsys_next_sensor_index -= sme->sme_nsensors;
        LIST_FOREACH(osme, &sysmon_envsys_list, sme_list) {
                if (osme->sme_fsensor >= sme->sme_fsensor)
                        osme->sme_fsensor -= sme->sme_nsensors;
        }
        LIST_REMOVE(sme, sme_list);
        mutex_exit(&sme_global_mtx);

        while ((edata = TAILQ_FIRST(&sme->sme_sensors_list)) != NULL) {
                sysmon_envsys_sensor_detach(sme, edata);
        }

        /*
         * Unregister all events associated with device.
         */
        sme_event_unregister_all(sme);

        /*
         * Remove the device (and all its objects) from the global dictionary.
         */
        array = prop_dictionary_get(sme_propd, sme->sme_name);
        if (array && prop_object_type(array) == PROP_TYPE_ARRAY) {
                mutex_enter(&sme_global_mtx);
                prop_dictionary_remove(sme_propd, sme->sme_name);
                mutex_exit(&sme_global_mtx);
                sysmon_envsys_destroy_plist(array);
        }
        /*
         * And finally destroy the sysmon_envsys object.
         */
        sysmon_envsys_destroy(sme);
}

/*
 * sysmon_envsys_find:
 *
 *        + Find a sysmon envsys device and mark it as busy
 *          once it's available.
 */
struct sysmon_envsys *
sysmon_envsys_find(const char *name)
{
        struct sysmon_envsys *sme;

        mutex_enter(&sme_global_mtx);
        LIST_FOREACH(sme, &sysmon_envsys_list, sme_list) {
                if (strcmp(sme->sme_name, name) == 0) {
                        sysmon_envsys_acquire(sme, false);
                        break;
                }
        }
        mutex_exit(&sme_global_mtx);

        return sme;
}

/*
 * Compatibility function with the old API.
 */
struct sysmon_envsys *
sysmon_envsys_find_40(u_int idx)
{
        struct sysmon_envsys *sme;

        mutex_enter(&sme_global_mtx);
        LIST_FOREACH(sme, &sysmon_envsys_list, sme_list) {
                if (idx >= sme->sme_fsensor &&
                        idx < (sme->sme_fsensor + sme->sme_nsensors)) {
                        sysmon_envsys_acquire(sme, false);
                        break;
                }
        }
        mutex_exit(&sme_global_mtx);

        return sme;
}

/*
 * sysmon_envsys_acquire:
 *
 *         + Wait until a sysmon envsys device is available and mark
 *           it as busy.
 */
void
sysmon_envsys_acquire(struct sysmon_envsys *sme, bool locked)
{
        KASSERT(sme != NULL);

        if (locked) {
                while (sme->sme_flags & SME_FLAG_BUSY)
                        cv_wait(&sme->sme_condvar, &sme->sme_mtx);
                sme->sme_flags |= SME_FLAG_BUSY;
        } else {
                mutex_enter(&sme->sme_mtx);
                while (sme->sme_flags & SME_FLAG_BUSY)
                        cv_wait(&sme->sme_condvar, &sme->sme_mtx);
                sme->sme_flags |= SME_FLAG_BUSY;
                mutex_exit(&sme->sme_mtx);
        }
}

/*
 * sysmon_envsys_release:
 *
 *         + Unmark a sysmon envsys device as busy, and notify
 *           waiters.
 */
void
sysmon_envsys_release(struct sysmon_envsys *sme, bool locked)
{
        KASSERT(sme != NULL);

        if (locked) {
                sme->sme_flags &= ~SME_FLAG_BUSY;
                cv_broadcast(&sme->sme_condvar);
        } else {
                mutex_enter(&sme->sme_mtx);
                sme->sme_flags &= ~SME_FLAG_BUSY;
                cv_broadcast(&sme->sme_condvar);
                mutex_exit(&sme->sme_mtx);
        }
}

/*
 * sme_initial_refresh:
 *
 *         + Do an initial refresh of the sensors in a device just after
 *           interrupts are enabled in the autoconf(9) process.
 *
 */
static void
sme_initial_refresh(void *arg)
{
        struct sysmon_envsys *sme = arg;
        envsys_data_t *edata;

        mutex_enter(&sme->sme_mtx);
        sysmon_envsys_acquire(sme, true);
        TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head)
                sysmon_envsys_refresh_sensor(sme, edata);
        sysmon_envsys_release(sme, true);
        mutex_exit(&sme->sme_mtx);
}

/*
 * sme_sensor_dictionary_get:
 *
 *         + Returns a dictionary of a device specified by its index
 *           position.
 */
prop_dictionary_t
sme_sensor_dictionary_get(prop_array_t array, const char *index)
{
        prop_object_iterator_t iter;
        prop_dictionary_t dict;
        prop_object_t obj;

        KASSERT(array != NULL || index != NULL);

        iter = prop_array_iterator(array);
        if (!iter)
                return NULL;

        while ((dict = prop_object_iterator_next(iter))) {
                obj = prop_dictionary_get(dict, "index");
                if (prop_string_equals_string(obj, index))
                        break;
        }

        prop_object_iterator_release(iter);
        return dict;
}

/*
 * sme_remove_userprops:
 *
 *         + Remove all properties from all devices that were set by
 *           the ENVSYS_SETDICTIONARY ioctl.
 */
static void
sme_remove_userprops(void)
{
        struct sysmon_envsys *sme;
        prop_array_t array;
        prop_dictionary_t sdict;
        envsys_data_t *edata = NULL;
        char tmp[ENVSYS_DESCLEN];
        char rnd_name[sizeof(edata->rnd_src.name)];
        sysmon_envsys_lim_t lims;
        const struct sme_descr_entry *sdt_units;
        uint32_t props;
        int ptype;

        mutex_enter(&sme_global_mtx);
        LIST_FOREACH(sme, &sysmon_envsys_list, sme_list) {
                sysmon_envsys_acquire(sme, false);
                array = prop_dictionary_get(sme_propd, sme->sme_name);

                TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) {
                        (void)snprintf(tmp, sizeof(tmp), "sensor%d",
                                       edata->sensor);
                        sdict = sme_sensor_dictionary_get(array, tmp);
                        KASSERT(sdict != NULL);

                        ptype = 0;
                        if (edata->upropset & PROP_BATTCAP) {
                                prop_dictionary_remove(sdict,
                                    "critical-capacity");
                                ptype = PENVSYS_EVENT_CAPACITY;
                        }

                        if (edata->upropset & PROP_BATTWARN) {
                                prop_dictionary_remove(sdict,
                                    "warning-capacity");
                                ptype = PENVSYS_EVENT_CAPACITY;
                        }

                        if (edata->upropset & PROP_BATTHIGH) {
                                prop_dictionary_remove(sdict,
                                    "high-capacity");
                                ptype = PENVSYS_EVENT_CAPACITY;
                        }

                        if (edata->upropset & PROP_BATTMAX) {
                                prop_dictionary_remove(sdict,
                                    "maximum-capacity");
                                ptype = PENVSYS_EVENT_CAPACITY;
                        }
                        if (edata->upropset & PROP_WARNMAX) {
                                prop_dictionary_remove(sdict, "warning-max");
                                ptype = PENVSYS_EVENT_LIMITS;
                        }

                        if (edata->upropset & PROP_WARNMIN) {
                                prop_dictionary_remove(sdict, "warning-min");
                                ptype = PENVSYS_EVENT_LIMITS;
                        }

                        if (edata->upropset & PROP_CRITMAX) {
                                prop_dictionary_remove(sdict, "critical-max");
                                ptype = PENVSYS_EVENT_LIMITS;
                        }

                        if (edata->upropset & PROP_CRITMIN) {
                                prop_dictionary_remove(sdict, "critical-min");
                                ptype = PENVSYS_EVENT_LIMITS;
                        }
                        if (edata->upropset & PROP_RFACT) {
                                (void)sme_sensor_upint32(sdict, "rfact", 0);
                                edata->rfact = 0;
                        }

                        if (edata->upropset & PROP_DESC)
                                (void)sme_sensor_upstring(sdict,
                                      "description", edata->desc);

                        if (ptype == 0)
                                continue;

                        /*
                         * If there were any limit values removed, we
                         * need to revert to initial limits.
                         *
                         * First, tell the driver that we need it to
                         * restore any h/w limits which may have been
                         * changed to stored, boot-time values.
                         */
                        if (sme->sme_set_limits) {
                                DPRINTF(("%s: reset limits for %s %s\n",
                                        __func__, sme->sme_name, edata->desc));
                                (*sme->sme_set_limits)(sme, edata, NULL, NULL);
                        }

                        /*
                         * Next, we need to retrieve those initial limits.
                         */
                        props = 0;
                        edata->upropset &= ~PROP_LIMITS;
                        if (sme->sme_get_limits) {
                                DPRINTF(("%s: retrieve limits for %s %s\n",
                                        __func__, sme->sme_name, edata->desc));
                                lims = edata->limits;
                                (*sme->sme_get_limits)(sme, edata, &lims,
                                                       &props);
                        }

                        /*
                         * If the sensor is providing entropy data,
                         * get rid of the rndsrc;  we'll provide a new
                         * one shortly.
                         */
                        if (edata->flags & ENVSYS_FHAS_ENTROPY)
                                rnd_detach_source(&edata->rnd_src);

                        /*
                         * Remove the old limits event, if any
                         */
                        sme_event_unregister(sme, edata->desc,
                            PENVSYS_EVENT_LIMITS);

                        /*
                         * Create and install a new event (which will
                         * update the dictionary) with the correct
                         * units.
                         */
                        sdt_units = sme_find_table_entry(SME_DESC_UNITS,
                            edata->units);

                        if (props & PROP_LIMITS) {
                                DPRINTF(("%s: install limits for %s %s\n",
                                        __func__, sme->sme_name, edata->desc));

                                sme_event_register(sdict, edata, sme,
                                    &lims, props, PENVSYS_EVENT_LIMITS,
                                    sdt_units->crittype);
                        }

                        /* Finally, if the sensor provides entropy,
                         * create an additional event entry and attach
                         * the rndsrc
                         */
                        if (edata->flags & ENVSYS_FHAS_ENTROPY) {
                                sme_event_register(sdict, edata, sme,
                                    &lims, props, PENVSYS_EVENT_NULL,
                                    sdt_units->crittype);
                                snprintf(rnd_name, sizeof(rnd_name), "%s-%s",
                                    sme->sme_name, edata->desc);
                                rnd_attach_source(&edata->rnd_src, rnd_name,
                                    RND_TYPE_ENV, RND_FLAG_COLLECT_VALUE|
                                                  RND_FLAG_COLLECT_TIME|
                                                  RND_FLAG_ESTIMATE_VALUE|
                                                  RND_FLAG_ESTIMATE_TIME);
                        }
                }

                /*
                 * Restore default timeout value.
                 */
                mutex_enter(&sme->sme_work_mtx);
                sme->sme_events_timeout = SME_EVENTS_DEFTIMEOUT;
                sme_schedule_callout(sme);
                mutex_exit(&sme->sme_work_mtx);

                sysmon_envsys_release(sme, false);
        }
        mutex_exit(&sme_global_mtx);
}

/*
 * sme_add_property_dictionary:
 *
 *         + Add global properties into a device.
 */
static int
sme_add_property_dictionary(struct sysmon_envsys *sme, prop_array_t array,
                            prop_dictionary_t dict)
{
        prop_dictionary_t pdict;
        uint64_t timo;
        const char *class;
        int error = 0;

        pdict = prop_dictionary_create();
        if (!pdict)
                return EINVAL;

        /*
         * Add the 'refresh-timeout' and 'dev-class' objects into the
         * 'device-properties' dictionary.
         *
         *         ...
         *         <dict>
         *                 <key>device-properties</key>
         *                 <dict>
         *                         <key>refresh-timeout</key>
         *                         <integer>120</integer<
         *                        <key>device-class</key>
         *                        <string>class_name</string>
         *                 </dict>
         *         </dict>
         *         ...
         *
         */
        mutex_enter(&sme->sme_work_mtx);
        if (sme->sme_events_timeout == 0) {
                sme->sme_events_timeout = SME_EVENTS_DEFTIMEOUT;
                sme_schedule_callout(sme);
        }
        timo = sme->sme_events_timeout;
        mutex_exit(&sme->sme_work_mtx);

        if (!prop_dictionary_set_uint64(pdict, "refresh-timeout", timo)) {
                error = EINVAL;
                goto out;
        }
        if (sme->sme_class == SME_CLASS_BATTERY)
                class = "battery";
        else if (sme->sme_class == SME_CLASS_ACADAPTER)
                class = "ac-adapter";
        else
                class = "other";
        if (!prop_dictionary_set_string_nocopy(pdict, "device-class", class)) {
                error = EINVAL;
                goto out;
        }

        if (!prop_dictionary_set(dict, "device-properties", pdict)) {
                error = EINVAL;
                goto out;
        }

        /*
         * Add the device dictionary into the sysmon envsys array.
         */
        if (!prop_array_add(array, dict))
                error = EINVAL;

out:
        prop_object_release(pdict);
        return error;
}

/*
 * sme_add_sensor_dictionary:
 *
 *         + Adds the sensor objects into the dictionary and returns a pointer
 *           to a sme_event_drv_t object if a monitoring flag was set
 *           (or NULL otherwise).
 */
static sme_event_drv_t *
sme_add_sensor_dictionary(struct sysmon_envsys *sme, prop_array_t array,
                              prop_dictionary_t dict, envsys_data_t *edata)
{
        const struct sme_descr_entry *sdt;
        int error;
        sme_event_drv_t *sme_evdrv_t = NULL;
        char indexstr[ENVSYS_DESCLEN];
        bool mon_supported, allow_rfact;

        /*
         * Add the index sensor string.
         *
         *                 ...
         *                 <key>index</eyr
         *                 <string>sensor0</string>
         *                 ...
         */
        (void)snprintf(indexstr, sizeof(indexstr), "sensor%d", edata->sensor);
        if (sme_sensor_upstring(dict, "index", indexstr))
                goto bad;

        /*
         *                 ...
         *                 <key>description</key>
         *                 <string>blah blah</string>
         *                 ...
         */
        if (sme_sensor_upstring(dict, "description", edata->desc))
                goto bad;

        /*
         * Add the monitoring boolean object:
         *
         *                 ...
         *                 <key>monitoring-supported</key>
         *                 <true/>
         *                ...
         *
         * always false on Battery {capacity,charge}, Drive and Indicator types.
         * They cannot be monitored.
         *
         */
        if ((edata->flags & ENVSYS_FMONNOTSUPP) ||
            (edata->units == ENVSYS_INDICATOR) ||
            (edata->units == ENVSYS_DRIVE) ||
            (edata->units == ENVSYS_BATTERY_CAPACITY) ||
            (edata->units == ENVSYS_BATTERY_CHARGE))
                mon_supported = false;
        else
                mon_supported = true;
        if (sme_sensor_upbool(dict, "monitoring-supported", mon_supported))
                goto out;

        /*
         * Add the allow-rfact boolean object, true if
         * ENVSYS_FCHANGERFACT is set, false otherwise.
         *
         *                 ...
         *                 <key>allow-rfact</key>
         *                 <true/>
         *                 ...
         */
        if (edata->units == ENVSYS_SVOLTS_DC ||
            edata->units == ENVSYS_SVOLTS_AC) {
                if (edata->flags & ENVSYS_FCHANGERFACT)
                        allow_rfact = true;
                else
                        allow_rfact = false;
                if (sme_sensor_upbool(dict, "allow-rfact", allow_rfact))
                        goto out;
        }

        error = sme_update_sensor_dictionary(dict, edata,
                        (edata->state == ENVSYS_SVALID));
        if (error < 0)
                goto bad;
        else if (error)
                goto out;

        /*
         *         ...
         * </dict>
         *
         * Add the dictionary into the array.
         *
         */
        if (!prop_array_add(array, dict)) {
                DPRINTF(("%s: prop_array_add\n", __func__));
                goto bad;
        }

        /*
         * Register new event(s) if any monitoring flag was set or if
         * the sensor provides entropy for rnd(4).
         */
        if (edata->flags & (ENVSYS_FMONANY | ENVSYS_FHAS_ENTROPY)) {
                sme_evdrv_t = kmem_zalloc(sizeof(*sme_evdrv_t), KM_SLEEP);
                sme_evdrv_t->sed_sdict = dict;
                sme_evdrv_t->sed_edata = edata;
                sme_evdrv_t->sed_sme = sme;
                sdt = sme_find_table_entry(SME_DESC_UNITS, edata->units);
                sme_evdrv_t->sed_powertype = sdt->crittype;
        }

out:
        return sme_evdrv_t;

bad:
        prop_object_release(dict);
        return NULL;
}

/*
 * Find the maximum of all currently reported values.
 * The provided callback decides whether a sensor is part of the
 * maximum calculation (by returning true) or ignored (callback
 * returns false). Example usage: callback selects temperature
 * sensors in a given thermal zone, the function calculates the
 * maximum currently reported temperature in this zone.
 * If the parameter "refresh" is true, new values will be acquired
 * from the hardware, if not, the last reported value will be used.
 */
uint32_t
sysmon_envsys_get_max_value(bool (*predicate)(const envsys_data_t*),
        bool refresh)
{
        struct sysmon_envsys *sme;
        uint32_t maxv, v;

        maxv = 0;
        mutex_enter(&sme_global_mtx);
        LIST_FOREACH(sme, &sysmon_envsys_list, sme_list) {
                sysmon_envsys_acquire(sme, false);
                v = sme_get_max_value(sme, predicate, refresh);
                sysmon_envsys_release(sme, false);
                if (v > maxv)
                        maxv = v;
        }
        mutex_exit(&sme_global_mtx);
        return maxv;
}

static uint32_t
sme_get_max_value(struct sysmon_envsys *sme,
    bool (*predicate)(const envsys_data_t*),
    bool refresh)
{
        envsys_data_t *edata;
        uint32_t maxv, v;

        /*
         * Iterate over all sensors that match the predicate
         */
        maxv = 0;
        TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) {
                if (!(*predicate)(edata))
                        continue;

                /*
                 * refresh sensor data
                 */
                mutex_enter(&sme->sme_mtx);
                sysmon_envsys_refresh_sensor(sme, edata);
                mutex_exit(&sme->sme_mtx);

                v = edata->value_cur;
                if (v > maxv)
                        maxv = v;

        }

        return maxv;
}

/*
 * sme_update_dictionary:
 *
 *         + Update per-sensor dictionaries with new values if there were
 *           changes, otherwise the object in dictionary is untouched.
 */
int
sme_update_dictionary(struct sysmon_envsys *sme)
{
        envsys_data_t *edata;
        prop_object_t array, dict, obj, obj2;
        uint64_t timo;
        int error = 0;

        /*
         * Retrieve the array of dictionaries in device.
         */
        array = prop_dictionary_get(sme_propd, sme->sme_name);
        if (prop_object_type(array) != PROP_TYPE_ARRAY) {
                DPRINTF(("%s: not an array (%s)\n", __func__, sme->sme_name));
                return EINVAL;
        }

        /*
         * Get the last dictionary on the array, this contains the
         * 'device-properties' sub-dictionary.
         */
        obj = prop_array_get(array, prop_array_count(array) - 1);
        if (!obj || prop_object_type(obj) != PROP_TYPE_DICTIONARY) {
                DPRINTF(("%s: not a device-properties dictionary\n", __func__));
                return EINVAL;
        }

        obj2 = prop_dictionary_get(obj, "device-properties");
        if (!obj2)
                return EINVAL;

        /*
         * Update the 'refresh-timeout' property.
         */
        mutex_enter(&sme->sme_work_mtx);
        timo = sme->sme_events_timeout;
        mutex_exit(&sme->sme_work_mtx);
        if (!prop_dictionary_set_uint64(obj2, "refresh-timeout", timo))
                return EINVAL;

        /*
         * - iterate over all sensors.
         * - fetch new data.
         * - check if data in dictionary is different than new data.
         * - update dictionary if there were changes.
         */
        DPRINTF(("%s: updating '%s' with nsensors=%d\n", __func__,
            sme->sme_name, sme->sme_nsensors));

        /*
         * Don't bother with locking when traversing the queue,
         * the device is already marked as busy; if a sensor
         * is going to be removed or added it will have to wait.
         */
        TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) {
                /*
                 * refresh sensor data via sme_envsys_refresh_sensor
                 */
                mutex_enter(&sme->sme_mtx);
                sysmon_envsys_refresh_sensor(sme, edata);
                mutex_exit(&sme->sme_mtx);

                /*
                 * retrieve sensor's dictionary.
                 */
                dict = prop_array_get(array, edata->sensor);
                if (prop_object_type(dict) != PROP_TYPE_DICTIONARY) {
                        DPRINTF(("%s: not a dictionary (%d:%s)\n",
                            __func__, edata->sensor, sme->sme_name));
                        return EINVAL;
                }

                /*
                 * update sensor's state.
                 */
                error = sme_update_sensor_dictionary(dict, edata, true);

                if (error)
                        break;
        }

        return error;
}

int
sme_update_sensor_dictionary(prop_object_t dict, envsys_data_t *edata,
        bool value_update)
{
        const struct sme_descr_entry *sdt;
        int error = 0;

        sdt = sme_find_table_entry(SME_DESC_STATES, edata->state);
        if (sdt == NULL) {
                printf("sme_update_sensor_dictionary: cannot update sensor %d "
                    "state %d unknown\n", edata->sensor, edata->state);
                return EINVAL;
        }

        DPRINTFOBJ(("%s: sensor #%d type=%d (%s) flags=%d\n", __func__,
            edata->sensor, sdt->type, sdt->desc, edata->flags));

        error = sme_sensor_upstring(dict, "state", sdt->desc);
        if (error)
                return (-error);

        /*
         * update sensor's type.
         */
        sdt = sme_find_table_entry(SME_DESC_UNITS, edata->units);
        if (sdt == NULL)
                return EINVAL;

        DPRINTFOBJ(("%s: sensor #%d units=%d (%s)\n", __func__, edata->sensor,
            sdt->type, sdt->desc));

        error = sme_sensor_upstring(dict, "type", sdt->desc);
        if (error)
                return (-error);

        if (value_update) {
                /*
                 * update sensor's current value.
                 */
                error = sme_sensor_upint32(dict, "cur-value", edata->value_cur);
                if (error)
                        return error;
        }

        /*
         * Battery charge and Indicator types do not
         * need the remaining objects, so skip them.
         */
        if (edata->units == ENVSYS_INDICATOR ||
            edata->units == ENVSYS_BATTERY_CHARGE)
                return error;

        /*
         * update sensor flags.
         */
        if (edata->flags & ENVSYS_FPERCENT) {
                error = sme_sensor_upbool(dict, "want-percentage", true);
                if (error)
                        return error;
        }

        if (value_update) {
                /*
                 * update sensor's {max,min}-value.
                 */
                if (edata->flags & ENVSYS_FVALID_MAX) {
                        error = sme_sensor_upint32(dict, "max-value",
                                                   edata->value_max);
                        if (error)
                                return error;
                }

                if (edata->flags & ENVSYS_FVALID_MIN) {
                        error = sme_sensor_upint32(dict, "min-value",
                                                   edata->value_min);
                        if (error)
                                return error;
                }

                /*
                 * update 'rpms' only for ENVSYS_SFANRPM sensors.
                 */
                if (edata->units == ENVSYS_SFANRPM) {
                        error = sme_sensor_upuint32(dict, "rpms", edata->rpms);
                        if (error)
                                return error;
                }

                /*
                 * update 'rfact' only for ENVSYS_SVOLTS_[AD]C sensors.
                 */
                if (edata->units == ENVSYS_SVOLTS_AC ||
                    edata->units == ENVSYS_SVOLTS_DC) {
                        error = sme_sensor_upint32(dict, "rfact", edata->rfact);
                        if (error)
                                return error;
                }
        }

        /*
         * update 'drive-state' only for ENVSYS_DRIVE sensors.
         */
        if (edata->units == ENVSYS_DRIVE) {
                sdt = sme_find_table_entry(SME_DESC_DRIVE_STATES,
                                           edata->value_cur);
                if (sdt == NULL)
                        return EINVAL;
                error = sme_sensor_upstring(dict, "drive-state", sdt->desc);
                if (error)
                        return error;
        }

        /*
         * update 'battery-capacity' only for ENVSYS_BATTERY_CAPACITY
         * sensors.
         */
        if (edata->units == ENVSYS_BATTERY_CAPACITY) {
                sdt = sme_find_table_entry(SME_DESC_BATTERY_CAPACITY,
                    edata->value_cur);
                if (sdt == NULL)
                        return EINVAL;
                error = sme_sensor_upstring(dict, "battery-capacity",
                                            sdt->desc);
                if (error)
                        return error;
        }

        return error;
}

/*
 * sme_userset_dictionary:
 *
 *         + Parse the userland dictionary and run the appropriate tasks
 *           that were specified.
 */
int
sme_userset_dictionary(struct sysmon_envsys *sme, prop_dictionary_t udict,
                       prop_array_t array)
{
        const struct sme_descr_entry *sdt;
        envsys_data_t *edata;
        prop_dictionary_t dict, tdict = NULL;
        prop_object_t obj, obj1, obj2, tobj = NULL;
        uint32_t props;
        uint64_t refresh_timo = 0;
        sysmon_envsys_lim_t lims;
        int i, error = 0;
        const char *blah;
        bool targetfound = false;

        /*
         * The user wanted to change the refresh timeout value for this
         * device.
         *
         * Get the 'device-properties' object from the userland dictionary.
         */
        obj = prop_dictionary_get(udict, "device-properties");
        if (obj && prop_object_type(obj) == PROP_TYPE_DICTIONARY) {
                /*
                 * Get the 'refresh-timeout' property for this device.
                 */
                obj1 = prop_dictionary_get(obj, "refresh-timeout");
                if (obj1 && prop_object_type(obj1) == PROP_TYPE_NUMBER) {
                        targetfound = true;
                        refresh_timo =
                            prop_number_unsigned_value(obj1);
                        if (refresh_timo < 1)
                                error = EINVAL;
                        else {
                                mutex_enter(&sme->sme_work_mtx);
                                if (sme->sme_events_timeout != refresh_timo) {
                                        sme->sme_events_timeout = refresh_timo;
                                        sme_schedule_callout(sme);
                                }
                                mutex_exit(&sme->sme_work_mtx);
                        }
                }
                return error;

        } else if (!obj) {
                /*
                 * Get sensor's index from userland dictionary.
                 */
                obj = prop_dictionary_get(udict, "index");
                if (!obj)
                        return EINVAL;
                if (prop_object_type(obj) != PROP_TYPE_STRING) {
                        DPRINTF(("%s: 'index' not a string\n", __func__));
                        return EINVAL;
                }
        } else
                return EINVAL;

        /*
         * Don't bother with locking when traversing the queue,
         * the device is already marked as busy; if a sensor
         * is going to be removed or added it will have to wait.
         */
        TAILQ_FOREACH(edata, &sme->sme_sensors_list, sensors_head) {
                /*
                 * Get a dictionary and check if it's our sensor by checking
                 * at its index position.
                 */
                dict = prop_array_get(array, edata->sensor);
                obj1 = prop_dictionary_get(dict, "index");

                /*
                 * is it our sensor?
                 */
                if (!prop_string_equals(obj1, obj))
                        continue;

                props = 0;

                /*
                 * Check if a new description operation was
                 * requested by the user and set new description.
                 */
                obj2 = prop_dictionary_get(udict, "description");
                if (obj2 && prop_object_type(obj2) == PROP_TYPE_STRING) {
                        targetfound = true;
                        blah = prop_string_value(obj2);

                        /*
                         * Check for duplicate description.
                         */
                        for (i = 0; i < sme->sme_nsensors; i++) {
                                if (i == edata->sensor)
                                        continue;
                                tdict = prop_array_get(array, i);
                                tobj =
                                    prop_dictionary_get(tdict, "description");
                                if (prop_string_equals(obj2, tobj)) {
                                        error = EEXIST;
                                        goto out;
                                }
                        }

                        /*
                         * Update the object in dictionary.
                         */
                        mutex_enter(&sme->sme_mtx);
                        error = sme_sensor_upstring(dict,
                                                    "description",
                                                    blah);
                        if (error) {
                                mutex_exit(&sme->sme_mtx);
                                goto out;
                        }

                        DPRINTF(("%s: sensor%d changed desc to: %s\n",
                            __func__, edata->sensor, blah));
                        edata->upropset |= PROP_DESC;
                        mutex_exit(&sme->sme_mtx);
                }

                /*
                 * did the user want to change the rfact?
                 */
                obj2 = prop_dictionary_get(udict, "rfact");
                if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) {
                        targetfound = true;
                        if (edata->flags & ENVSYS_FCHANGERFACT) {
                                mutex_enter(&sme->sme_mtx);
                                edata->rfact = prop_number_signed_value(obj2);
                                edata->upropset |= PROP_RFACT;
                                mutex_exit(&sme->sme_mtx);
                                DPRINTF(("%s: sensor%d changed rfact to %d\n",
                                    __func__, edata->sensor, edata->rfact));
                        } else {
                                error = ENOTSUP;
                                goto out;
                        }
                }

                sdt = sme_find_table_entry(SME_DESC_UNITS, edata->units);

                /*
                 * did the user want to set a critical capacity event?
                 */
                obj2 = prop_dictionary_get(udict, "critical-capacity");
                if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) {
                        targetfound = true;
                        lims.sel_critmin = prop_number_signed_value(obj2);
                        props |= PROP_BATTCAP;
                }

                /*
                 * did the user want to set a warning capacity event?
                 */
                obj2 = prop_dictionary_get(udict, "warning-capacity");
                if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) {
                        targetfound = true;
                        lims.sel_warnmin = prop_number_signed_value(obj2);
                        props |= PROP_BATTWARN;
                }

                /*
                 * did the user want to set a high capacity event?
                 */
                obj2 = prop_dictionary_get(udict, "high-capacity");
                if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) {
                        targetfound = true;
                        lims.sel_warnmin = prop_number_signed_value(obj2);
                        props |= PROP_BATTHIGH;
                }

                /*
                 * did the user want to set a maximum capacity event?
                 */
                obj2 = prop_dictionary_get(udict, "maximum-capacity");
                if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) {
                        targetfound = true;
                        lims.sel_warnmin = prop_number_signed_value(obj2);
                        props |= PROP_BATTMAX;
                }

                /*
                 * did the user want to set a critical max event?
                 */
                obj2 = prop_dictionary_get(udict, "critical-max");
                if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) {
                        targetfound = true;
                        lims.sel_critmax = prop_number_signed_value(obj2);
                        props |= PROP_CRITMAX;
                }

                /*
                 * did the user want to set a warning max event?
                 */
                obj2 = prop_dictionary_get(udict, "warning-max");
                if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) {
                        targetfound = true;
                        lims.sel_warnmax = prop_number_signed_value(obj2);
                        props |= PROP_WARNMAX;
                }

                /*
                 * did the user want to set a critical min event?
                 */
                obj2 = prop_dictionary_get(udict, "critical-min");
                if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) {
                        targetfound = true;
                        lims.sel_critmin = prop_number_signed_value(obj2);
                        props |= PROP_CRITMIN;
                }

                /*
                 * did the user want to set a warning min event?
                 */
                obj2 = prop_dictionary_get(udict, "warning-min");
                if (obj2 && prop_object_type(obj2) == PROP_TYPE_NUMBER) {
                        targetfound = true;
                        lims.sel_warnmin = prop_number_signed_value(obj2);
                        props |= PROP_WARNMIN;
                }

                if (props && (edata->flags & ENVSYS_FMONNOTSUPP) != 0) {
                        error = ENOTSUP;
                        goto out;
                }
                if (props || (edata->flags & ENVSYS_FHAS_ENTROPY) != 0) {
                        error = sme_event_register(dict, edata, sme, &lims,
                                        props,
                                        (edata->flags & ENVSYS_FPERCENT)?
                                                PENVSYS_EVENT_CAPACITY:
                                                PENVSYS_EVENT_LIMITS,
                                        sdt->crittype);
                        if (error == EEXIST)
                                error = 0;
                        if (error)
                                goto out;
                }

                /*
                 * All objects in dictionary were processed.
                 */
                break;
        }

out:
        /*
         * invalid target? return the error.
         */
        if (!targetfound)
                error = EINVAL;

        return error;
}

/*
 * + sysmon_envsys_foreach_sensor
 *
 *        Walk through the devices' sensor lists and execute the callback.
 *        If the callback returns false, the remainder of the current
 *        device's sensors are skipped.
 */
void
sysmon_envsys_foreach_sensor(sysmon_envsys_callback_t func, void *arg,
                             bool refresh)
{
        struct sysmon_envsys *sme;
        envsys_data_t *sensor;

        mutex_enter(&sme_global_mtx);
        LIST_FOREACH(sme, &sysmon_envsys_list, sme_list) {

                sysmon_envsys_acquire(sme, false);
                TAILQ_FOREACH(sensor, &sme->sme_sensors_list, sensors_head) {
                        if (refresh) {
                                mutex_enter(&sme->sme_mtx);
                                sysmon_envsys_refresh_sensor(sme, sensor);
                                mutex_exit(&sme->sme_mtx);
                        }
                        if (!(*func)(sme, sensor, arg))
                                break;
                }
                sysmon_envsys_release(sme, false);
        }
        mutex_exit(&sme_global_mtx);
}

/*
 * Call the sensor's refresh function, and collect/stir entropy
 */
void
sysmon_envsys_refresh_sensor(struct sysmon_envsys *sme, envsys_data_t *edata)
{

        if ((sme->sme_flags & SME_DISABLE_REFRESH) == 0)
                (*sme->sme_refresh)(sme, edata);

        if (edata->flags & ENVSYS_FHAS_ENTROPY &&
            edata->state != ENVSYS_SINVALID &&
            edata->value_prev != edata->value_cur)
                rnd_add_uint32(&edata->rnd_src, edata->value_cur);
        edata->value_prev = edata->value_cur;
}

static int
sysmon_envsys_modcmd(modcmd_t cmd, void *arg)
{
        int ret;

        switch (cmd) {
        case MODULE_CMD_INIT:
                ret = sysmon_envsys_init();
                break;
        case MODULE_CMD_FINI:
                ret = sysmon_envsys_fini();
                break;
        case MODULE_CMD_STAT:
        default:
                ret = ENOTTY;
        }

        return ret;
}










































































    2 




















    2 
    2 


















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/*        $NetBSD: rf_shutdown.c,v 1.21 2021/07/21 23:10:12 oster Exp $        */
/*
 * rf_shutdown.c
 */
/*
 * Copyright (c) 1996 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Jim Zelenka
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */
/*
 * Maintain lists of cleanup functions. Also, mechanisms for coordinating
 * thread startup and shutdown.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_shutdown.c,v 1.21 2021/07/21 23:10:12 oster Exp $");

#include <dev/raidframe/raidframevar.h>

#include "rf_archs.h"
#include "rf_shutdown.h"
#include "rf_debugMem.h"


#ifndef RF_DEBUG_SHUTDOWN
#define RF_DEBUG_SHUTDOWN 0
#endif

static void rf_FreeShutdownEnt(RF_ShutdownList_t *);

static void
rf_FreeShutdownEnt(RF_ShutdownList_t *ent)
{
        free(ent, M_RAIDFRAME);
}

#if RF_DEBUG_SHUTDOWN
void
_rf_ShutdownCreate(RF_ShutdownList_t **listp,  void (*cleanup)(void *arg),
                   void *arg, char *file, int line)
#else
void
_rf_ShutdownCreate(RF_ShutdownList_t **listp,  void (*cleanup)(void *arg),
                   void *arg)
#endif
{
        RF_ShutdownList_t *ent;

        /*
         * Have to directly allocate memory here, since we start up before
         * and shutdown after RAIDframe internal allocation system.
         */
        ent = (RF_ShutdownList_t *) malloc(sizeof(RF_ShutdownList_t),
                                           M_RAIDFRAME, M_WAITOK);
        ent->cleanup = cleanup;
        ent->arg = arg;
#if RF_DEBUG_SHUTDOWN
        ent->file = file;
        ent->line = line;
#endif
        ent->next = *listp;
        *listp = ent;
}

void
rf_ShutdownList(RF_ShutdownList_t **list)
{
        RF_ShutdownList_t *r, *next;
#if RF_DEBUG_SHUTDOWN
        char   *file;
        int     line;
#endif

        for (r = *list; r; r = next) {
                next = r->next;
#if RF_DEBUG_SHUTDOWN
                file = r->file;
                line = r->line;

                if (rf_shutdownDebug) {
                        printf("call shutdown, created %s:%d\n", file, line);
                }
#endif
                r->cleanup(r->arg);
#if RF_DEBUG_SHUTDOWN
                if (rf_shutdownDebug) {
                        printf("completed shutdown, created %s:%d\n", file, line);
                }
#endif
                rf_FreeShutdownEnt(r);
        }
        *list = NULL;
}




















































































































   78 





  739 








   10 









   10 


















  385 


    4 
    4 

    4 
    4 


















  375 
    1 



  375 


  374 




  375 

























   76 










   76 



   76 








   74 



   74 









    5 






    3 










  366 



  366 












































  784 

















  739 









  783 











  756 










  783 

   70 


  782 



  738 









  738 












    1 










  400 
  396 





























   10 



   10 













  358 
  733 

  731 


    8 


    8 



    8 

  733 



  733 
  703 
  731 





  731 
    7 
  732 









  727 

  395 









  727 
  726 


  727 
  725 












  727 
  395 

  395 

  386 



  722 





  727 
  727 
  679 



  667 





  667 
    4 

















  665 


  703 






  468 






    7 



  464 
  463 





  464 

   96 
  380 
  376 




  376 
   13 

  376 
  376 

    2 


















   94 







    3 




  665 
  665 







  665 






  354 
  275 






   85 






   75 




   73 


   72 









   35 
   26 
   10 

   26 
   25 





   25 

   24 



  622 


  622 






  733 



























  724 




  727 

  726 
  727 





  725 

  726 

  727 

















  727 


  727 


  726 
  726 









  727 




















  346 











  346 



























    1 




    1 
    1 
    1 

    1 



    1 









    1 



    1 












  350 
  350 

  350 


























  350 












  350 










  350 

  350 










































  350 

    3 
    3 

    3 

  349 

  349 
  349 





  350 


  350 



  350 








  350 








  349 
    1 




    1 

  349 

  350 



































  350 
    1 

  350 









































   34 








   34 













   34 


   34 





   34 




   33 



    1 




   34 



   34 







   34 

   31 
   32 
   32 









   34 









   34 
























    8 







    7 

    7 
    7 
    7 
    7 

    7 








    7 
















   11 








   11 
   11 





   11 


   11 


   11 



   11 
   11 

   11 






   11 

   11 






















  737 














  734 


  734 
  734 





  734 
  734 








  733 






  733 

  733 

  732 




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
/*        $NetBSD: ufs_lookup.c,v 1.156 2022/08/06 18:26:42 andvar Exp $        */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ufs_lookup.c        8.9 (Berkeley) 8/11/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_lookup.c,v 1.156 2022/08/06 18:26:42 andvar Exp $");

#ifdef _KERNEL_OPT
#include "opt_ffs.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/buf.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/kernel.h>
#include <sys/kauth.h>
#include <sys/wapbl.h>
#include <sys/proc.h>
#include <sys/kmem.h>

#include <ufs/ufs/inode.h>
#include <ufs/ufs/dir.h>
#ifdef UFS_DIRHASH
#include <ufs/ufs/dirhash.h>
#endif
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_wapbl.h>

#include <miscfs/genfs/genfs.h>

#ifdef DIAGNOSTIC
int        dirchk = 1;
#else
int        dirchk = 0;
#endif

#if BYTE_ORDER == LITTLE_ENDIAN
# define ENDIANSWAP(needswap) ((needswap) == 0)
#else
# define ENDIANSWAP(needswap) ((needswap) != 0)
#endif

#define NAMLEN(fsfmt, needswap, dp) \
    ((fsfmt) && ENDIANSWAP(needswap) ? (dp)->d_type : (dp)->d_namlen)

static void
ufs_dirswap(struct direct *dirp)
{
        uint8_t tmp = dirp->d_namlen;
        dirp->d_namlen = dirp->d_type;
        dirp->d_type = tmp;
}

struct slotinfo {
        enum {
                NONE,                /* need to search a slot for our new entry */
                COMPACT,        /* a compaction can make a slot in the current
                                   DIRBLKSIZ block */
                FOUND,                /* found a slot (or no need to search) */
        } status;
        doff_t offset;                /* offset of area with free space.
                                   a special value -1 for invalid */
        int size;                /* size of area at slotoffset */
        int freespace;                /* accumulated amount of space free in
                                   the current DIRBLKSIZ block */
        int needed;                /* size of the entry we're seeking */
};

static void
calc_count(struct ufs_lookup_results *results, int dirblksiz, doff_t prevoff)
{
        if ((results->ulr_offset & (dirblksiz - 1)) == 0)
                results->ulr_count = 0;
        else
                results->ulr_count = results->ulr_offset - prevoff;
}

static void
slot_init(struct slotinfo *slot)
{
        slot->status = FOUND;
        slot->offset = -1;
        slot->freespace = slot->size = slot->needed = 0;
}

#ifdef UFS_DIRHASH
static doff_t
slot_findfree(struct slotinfo *slot, struct inode *dp)
{
        if (slot->status == FOUND)
                return dp->i_size;

        slot->offset = ufsdirhash_findfree(dp, slot->needed, &slot->size);
        if (slot->offset < 0)
                return dp->i_size;

        slot->status = COMPACT;
        doff_t enduseful = ufsdirhash_enduseful(dp);
        if (enduseful < 0)
                return dp->i_size;
        return enduseful;
}
#endif

static void
slot_white(struct slotinfo *slot, uint16_t reclen,
    struct ufs_lookup_results *results)
{
        slot->status = FOUND;
        slot->offset = results->ulr_offset;
        slot->size = reclen;
        results->ulr_reclen = slot->size;
}

static void
slot_update(struct slotinfo *slot, int size, uint16_t reclen, doff_t offset)
{
        if (size >= slot->needed) {
                slot->status = FOUND;
                slot->offset = offset;
                slot->size = reclen;
        } else if (slot->status == NONE) {
                slot->freespace += size;
                if (slot->offset == -1)
                        slot->offset = offset;
                if (slot->freespace >= slot->needed) {
                        slot->status = COMPACT;
                        slot->size = offset + reclen - slot->offset;
                }
        }
}

/*
 * Return an indication of where the new directory entry should be put.
 * If we didn't find a slot, then set results->ulr_count to 0 indicating
 * that the new slot belongs at the end of the directory. If we found a slot,
 * then the new entry can be put in the range from results->ulr_offset to
 * results->ulr_offset + results->ulr_count.
 */
static int
slot_estimate(const struct slotinfo *slot, int dirblksiz, int nameiop,
    doff_t prevoff, doff_t enduseful, const struct inode *ip,
    struct ufs_lookup_results *results)
{
        if (slot->status == NONE) {
                results->ulr_offset = roundup(ip->i_size, dirblksiz);
                results->ulr_count = 0;
                enduseful = results->ulr_offset;
        } else if (nameiop == DELETE) {
                results->ulr_offset = slot->offset;
                calc_count(results, dirblksiz, prevoff);
        } else {
                results->ulr_offset = slot->offset;
                results->ulr_count = slot->size;
                if (enduseful < slot->offset + slot->size)
                        enduseful = slot->offset + slot->size;
        }
        results->ulr_endoff = roundup(enduseful, dirblksiz);
#if 0 /* commented out by dbj. none of the on disk fields changed */
        ip->i_flag |= IN_CHANGE | IN_UPDATE;
#endif
        return EJUSTRETURN;
}

/*
 * Check if we can delete inode tdp in directory vdp with inode ip and creds.
 */
static int
ufs_can_delete(struct vnode *tdp, struct vnode *vdp, struct inode *ip,
    kauth_cred_t cred)
{
        int error;

#ifdef UFS_ACL
        /*
         * NFSv4 Minor Version 1, draft-ietf-nfsv4-minorversion1-03.txt
         *
         * 3.16.2.1. ACE4_DELETE vs. ACE4_DELETE_CHILD
         */

        /*
         * XXX: Is this check required?
         */
        error = VOP_ACCESS(vdp, VEXEC, cred);
        if (error)
                goto out;

#if 0
        /* Moved to ufs_remove, ufs_rmdir because they hold the lock */
        error = VOP_ACCESSX(tdp, VDELETE, cred);
        if (error == 0)
                return (0);
#endif

        error = VOP_ACCESSX(vdp, VDELETE_CHILD, cred);
        if (error == 0)
                return (0);

        error = VOP_ACCESSX(vdp, VEXPLICIT_DENY | VDELETE_CHILD, cred);
        if (error)
                goto out;

#endif /* !UFS_ACL */

        /*
         * Write access to directory required to delete files.
         */
        error = VOP_ACCESS(vdp, VWRITE, cred);
        if (error)
                goto out;

        if (!(ip->i_mode & ISVTX)) 
                return 0;

        /*
         * If directory is "sticky", then user must own
         * the directory, or the file in it, else she
         * may not delete it (unless she's root). This
         * implements append-only directories.
         */
        error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, tdp, vdp,
            genfs_can_sticky(vdp, cred, ip->i_uid, VTOI(tdp)->i_uid));
        if (error) {
                error = EPERM;        // Why override?
                goto out;
        }
        return 0;
out:
        vrele(tdp);
        return error;
}

static int
ufs_getino(struct vnode *vdp, struct inode *ip, ino_t foundino,
    struct vnode **tdp, bool same)
{
        if (ip->i_number == foundino) {
                if (same)
                        return EISDIR;
                vref(vdp);
                *tdp = vdp;
                return 0;
        }
        return vcache_get(vdp->v_mount, &foundino, sizeof(foundino), tdp);
}


/*
 * Convert a component of a pathname into a pointer to a locked inode.
 * This is a very central and rather complicated routine.
 * If the file system is not maintained in a strict tree hierarchy,
 * this can result in a deadlock situation (see comments in code below).
 *
 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
 * on whether the name is to be looked up, created, renamed, or deleted.
 * When CREATE, RENAME, or DELETE is specified, information usable in
 * creating, renaming, or deleting a directory entry may be calculated.
 * If flag has LOCKPARENT or'ed into it and the target of the pathname
 * exists, lookup returns both the target and its parent directory locked.
 * When creating or renaming and LOCKPARENT is specified, the target may
 * not be ".".  When deleting and LOCKPARENT is specified, the target may
 * be "."., but the caller must check to ensure it does an vrele and vput
 * instead of two vputs.
 *
 * Overall outline of ufs_lookup:
 *
 *        check accessibility of directory
 *        look for name in cache, if found, then if at end of path
 *          and deleting or creating, drop it, else return name
 *        search for name in directory, to found or notfound
 * notfound:
 *        if creating, return locked directory, leaving info on available slots
 *        else return error
 * found:
 *        if at end of path and deleting, return information to allow delete
 *        if at end of path and rewriting (RENAME and LOCKPARENT), lock target
 *          inode and return info to allow rewrite
 *        if not at end, add name to cache; if at end and neither creating
 *          nor deleting, add name to cache
 */
int
ufs_lookup(void *v)
{
        struct vop_lookup_v2_args /* {
                struct vnode *a_dvp;
                struct vnode **a_vpp;
                struct componentname *a_cnp;
        } */ *ap = v;
        struct vnode *vdp = ap->a_dvp;        /* vnode for directory being searched */
        struct inode *dp = VTOI(vdp);        /* inode for directory being searched */
        struct buf *bp;                        /* a buffer of directory entries */
        struct direct *ep;                /* the current directory entry */
        int entryoffsetinblock;                /* offset of ep in bp's buffer */
        struct slotinfo slot;
        int numdirpasses;                /* strategy for directory search */
        doff_t endsearch;                /* offset to end directory search */
        doff_t prevoff;                        /* previous value of ulr_offset */
        struct vnode *tdp;                /* returned by vcache_get */
        doff_t enduseful;                /* pointer past last used dir slot.
                                           used for directory truncation. */
        u_long bmask;                        /* block offset mask */
        int error;
        struct vnode **vpp = ap->a_vpp;
        struct componentname *cnp = ap->a_cnp;
        kauth_cred_t cred = cnp->cn_cred;
        int flags;
        int nameiop = cnp->cn_nameiop;
        struct ufsmount *ump = dp->i_ump;
        const int needswap = UFS_MPNEEDSWAP(ump);
        int dirblksiz = ump->um_dirblksiz;
        ino_t foundino;
        struct ufs_lookup_results *results;
        int iswhiteout;                        /* temp result from cache_lookup() */
        const int fsfmt = FSFMT(vdp);
        uint16_t reclen;

        flags = cnp->cn_flags;

        bp = NULL;
        *vpp = NULL;
        endsearch = 0; /* silence compiler warning */

        /*
         * Check accessibility of directory.
         */
        if ((error = VOP_ACCESS(vdp, VEXEC, cred)) != 0)
                return (error);

        if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) &&
            (nameiop == DELETE || nameiop == RENAME))
                return (EROFS);

        /*
         * We now have a segment name to search for, and a directory to search.
         *
         * Before tediously performing a linear scan of the directory,
         * check the name cache to see if the directory/name pair
         * we are looking for is known already.
         */
        if (cache_lookup(vdp, cnp->cn_nameptr, cnp->cn_namelen,
            cnp->cn_nameiop, cnp->cn_flags, &iswhiteout, vpp)) {
                if (iswhiteout) {
                        cnp->cn_flags |= ISWHITEOUT;
                }
                return *vpp == NULLVP ? ENOENT : 0;
        }

        /* May need to restart the lookup with an exclusive lock. */
        if (VOP_ISLOCKED(vdp) != LK_EXCLUSIVE) {
                return ENOLCK;
        }

        /*
         * Produce the auxiliary lookup results into i_crap. Increment
         * its serial number so elsewhere we can tell if we're using
         * stale results. This should not be done this way. XXX.
         */
        results = &dp->i_crap;
        dp->i_crapcounter++;

        if (iswhiteout) {
                /*
                 * The namecache set iswhiteout without finding a
                 * cache entry. As of this writing (20121014), this
                 * can happen if there was a whiteout entry that has
                 * been invalidated by the lookup. It is not clear if
                 * it is correct to set ISWHITEOUT in this case or
                 * not; however, doing so retains the prior behavior,
                 * so we'll go with that until some clearer answer
                 * appears. XXX
                 */
                cnp->cn_flags |= ISWHITEOUT;
        }

        /*
         * Suppress search for slots unless creating
         * file and at end of pathname, in which case
         * we watch for a place to put the new file in
         * case it doesn't already exist.
         */
        slot_init(&slot);

        if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN)) {
                slot.status = NONE;
                slot.needed = UFS_DIRECTSIZ(cnp->cn_namelen);
        }

        /*
         * If there is cached information on a previous search of
         * this directory, pick up where we last left off.
         * We cache only lookups as these are the most common
         * and have the greatest payoff. Caching CREATE has little
         * benefit as it usually must search the entire directory
         * to determine that the entry does not exist. Caching the
         * location of the last DELETE or RENAME has not reduced
         * profiling time and hence has been removed in the interest
         * of simplicity.
         */
        bmask = vdp->v_mount->mnt_stat.f_iosize - 1;

#ifdef UFS_DIRHASH
        /*
         * Use dirhash for fast operations on large directories. The logic
         * to determine whether to hash the directory is contained within
         * ufsdirhash_build(); a zero return means that it decided to hash
         * this directory and it successfully built up the hash table.
         */
        if (ufsdirhash_build(dp) == 0) {
                /* Look for a free slot if needed. */
                enduseful = slot_findfree(&slot, dp);
                /* Look up the component. */
                numdirpasses = 1;
                entryoffsetinblock = 0; /* silence compiler warning */
                switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen,
                    &results->ulr_offset, &bp,
                    nameiop == DELETE ? &prevoff : NULL)) {
                case 0:
                        ep = (void *)((char *)bp->b_data +
                            (results->ulr_offset & bmask));
                        reclen = ufs_rw16(ep->d_reclen, needswap);
                        goto foundentry;
                case ENOENT:
                        results->ulr_offset = roundup(dp->i_size, dirblksiz);
                        goto notfound;
                default:
                        /* Something failed; just do a linear search. */
                        break;
                }
        }
#endif /* UFS_DIRHASH */

        if (nameiop != LOOKUP || results->ulr_diroff == 0 ||
            results->ulr_diroff >= dp->i_size) {
                entryoffsetinblock = 0;
                results->ulr_offset = 0;
                numdirpasses = 1;
        } else {
                results->ulr_offset = results->ulr_diroff;
                entryoffsetinblock = results->ulr_offset & bmask;
                if (entryoffsetinblock != 0 &&
                    (error = ufs_blkatoff(vdp, (off_t)results->ulr_offset,
                    NULL, &bp, false)))
                        goto out;
                numdirpasses = 2;
                namecache_count_2passes();
        }
        prevoff = results->ulr_offset;
        endsearch = roundup(dp->i_size, dirblksiz);
        enduseful = 0;

searchloop:
        while (results->ulr_offset < endsearch) {
                preempt_point();

                /*
                 * If necessary, get the next directory block.
                 */
                if ((results->ulr_offset & bmask) == 0) {
                        if (bp != NULL)
                                brelse(bp, 0);
                        error = ufs_blkatoff(vdp, (off_t)results->ulr_offset,
                            NULL, &bp, false);
                        if (error)
                                goto out;
                        entryoffsetinblock = 0;
                }
                /*
                 * If still looking for a slot, and at a DIRBLKSIZ
                 * boundary, have to start looking for free space again.
                 */
                if (slot.status == NONE &&
                    (entryoffsetinblock & (dirblksiz - 1)) == 0) {
                        slot.offset = -1;
                        slot.freespace = 0;
                }
                /*
                 * Get pointer to next entry.
                 * Full validation checks are slow, so we only check
                 * enough to insure forward progress through the
                 * directory. Complete checks can be run by patching
                 * "dirchk" to be true.
                 */
                KASSERT(bp != NULL);
                ep = (void *)((char *)bp->b_data + entryoffsetinblock);
                const char *msg;
                reclen = ufs_rw16(ep->d_reclen, needswap);
                if ((reclen == 0 && (msg = "null entry")) || (dirchk &&
                    (msg = ufs_dirbadentry(vdp, ep, entryoffsetinblock)))) {
                        ufs_dirbad(dp, results->ulr_offset, msg);
                        reclen = dirblksiz -
                            (entryoffsetinblock & (dirblksiz - 1));
                        goto next;
                }

                /*
                 * If an appropriate sized slot has not yet been found,
                 * check to see if one is available. Also accumulate space
                 * in the current block so that we can determine if
                 * compaction is viable.
                 */
                if (slot.status != FOUND) {
                        int size = reclen;
                        if (ep->d_ino != 0)
                                size -= UFS_DIRSIZ(fsfmt, ep, needswap);
                        if (size > 0)
                                slot_update(&slot, size, reclen,
                                    results->ulr_offset);
                }

                if (ep->d_ino == 0)
                        goto next;

                /*
                 * Check for a name match.
                 */
                const uint16_t namlen = NAMLEN(fsfmt, needswap, ep);
                if (namlen != cnp->cn_namelen ||
                    memcmp(cnp->cn_nameptr, ep->d_name, (size_t)namlen))
                        goto next;

#ifdef UFS_DIRHASH
foundentry:
#endif
                /*
                 * Save directory entry's inode number and
                 * reclen, and release directory buffer.
                 */
                if (!fsfmt && ep->d_type == DT_WHT) {
                        slot_white(&slot, reclen, results);
                        /*
                         * This is used to set results->ulr_endoff, which may
                         * be used by ufs_direnter() as a length to truncate
                         * the directory to. Therefore, it must point past the
                         * end of the last non-empty directory entry. We don't
                         * know where that is in this case, so we effectively
                         * disable shrinking by using the existing size of the
                         * directory.
                         *
                         * Note that we wouldn't expect to shrink the
                         * directory while rewriting an existing entry anyway.
                         */
                        enduseful = endsearch;
                        cnp->cn_flags |= ISWHITEOUT;
                        numdirpasses--;
                        goto notfound;
                }
                foundino = ufs_rw32(ep->d_ino, needswap);
                results->ulr_reclen = reclen;
                goto found;
next:
                prevoff = results->ulr_offset;
                results->ulr_offset += reclen;
                entryoffsetinblock += reclen;
                if (ep->d_ino)
                        enduseful = results->ulr_offset;
        }
notfound:
        /*
         * If we started in the middle of the directory and failed
         * to find our target, we must check the beginning as well.
         */
        if (numdirpasses == 2) {
                numdirpasses--;
                results->ulr_offset = 0;
                endsearch = results->ulr_diroff;
                goto searchloop;
        }
        if (bp != NULL)
                brelse(bp, 0);
        /*
         * If creating, and at end of pathname and current
         * directory has not been removed, then can consider
         * allowing file to be created.
         */
        if ((nameiop == CREATE || nameiop == RENAME ||
             (nameiop == DELETE &&
              (cnp->cn_flags & DOWHITEOUT) &&
              (cnp->cn_flags & ISWHITEOUT))) &&
            (flags & ISLASTCN) && dp->i_nlink != 0) {
                /*
                 * Access for write is interpreted as allowing
                 * creation of files in the directory.
                 */
                if (flags & WILLBEDIR)
                        error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred);
                else
                        error = VOP_ACCESS(vdp, VWRITE, cred);
                if (error)
                        goto out;
                error = slot_estimate(&slot, dirblksiz, nameiop,
                    prevoff, enduseful, dp, results);
                /*
                 * We return with the directory locked, so that
                 * the parameters we set up above will still be
                 * valid if we actually decide to do a direnter().
                 * We return ni_vp == NULL to indicate that the entry
                 * does not currently exist; we leave a pointer to
                 * the (locked) directory inode in ndp->ni_dvp.
                 *
                 * NB - if the directory is unlocked, then this
                 * information cannot be used.
                 */
                goto out;
        }
        /*
         * Insert name into cache (as non-existent) if appropriate.
         */
        if (nameiop != CREATE) {
                cache_enter(vdp, *vpp, cnp->cn_nameptr, cnp->cn_namelen,
                            cnp->cn_flags);
        }
        error = ENOENT;
        goto out;

found:
        if (numdirpasses == 2)
                namecache_count_pass2();
        /*
         * Check that directory length properly reflects presence
         * of this entry.
         */
        const uint64_t newisize =
            results->ulr_offset + UFS_DIRSIZ(fsfmt, ep, needswap);
        if (newisize > dp->i_size) {
                ufs_dirbad(dp, results->ulr_offset, "i_size too small");
                dp->i_size = newisize;
                DIP_ASSIGN(dp, size, dp->i_size);
                dp->i_flag |= IN_CHANGE | IN_UPDATE;
                UFS_WAPBL_UPDATE(vdp, NULL, NULL, UPDATE_DIROP);
        }
        brelse(bp, 0);

        /*
         * Found component in pathname.
         * If the final component of path name, save information
         * in the cache as to where the entry was found.
         */
        if ((flags & ISLASTCN) && nameiop == LOOKUP)
                results->ulr_diroff = results->ulr_offset & ~(dirblksiz - 1);

        /*
         * If deleting, and at end of pathname, return
         * parameters which can be used to remove file.
         * Lock the inode, being careful with ".".
         */
        if (nameiop == DELETE && (flags & ISLASTCN)) {
                /*
                 * Return pointer to current entry in results->ulr_offset,
                 * and distance past previous entry (if there
                 * is a previous entry in this block) in results->ulr_count.
                 * Save directory inode pointer in ndp->ni_dvp for dirremove().
                 */
                calc_count(results, dirblksiz, prevoff);

                if ((error = ufs_getino(vdp, dp, foundino, &tdp, false)) != 0)
                        goto out;

                if ((error = ufs_can_delete(tdp, vdp, dp, cred)) != 0)
                        goto out;

                *vpp = tdp;
                goto out;
        }

        /*
         * If rewriting (RENAME), return the inode and the
         * information required to rewrite the present directory
         * Must get inode of directory entry to verify it's a
         * regular file, or empty directory.
         */
        if (nameiop == RENAME && (flags & ISLASTCN)) {
                if (flags & WILLBEDIR)
                        error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred);
                else
                        error = VOP_ACCESS(vdp, VWRITE, cred);
                if (error)
                        goto out;
                /*
                 * Careful about locking second inode.
                 * This can only occur if the target is ".".
                 */
                if ((error = ufs_getino(vdp, dp, foundino, &tdp, true)) != 0)
                        goto out;
                *vpp = tdp;
                goto out;
        }

        if ((error = ufs_getino(vdp, dp, foundino, &tdp, false)) != 0)
                goto out;

        *vpp = tdp;
        /*
         * Insert name into cache if appropriate.
         */
        cache_enter(vdp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_flags);
        error = 0;

out:
        return error;
}

void
ufs_dirbad(struct inode *ip, doff_t offset, const char *how)
{
        struct mount *mp = ITOV(ip)->v_mount;
        void (*p)(const char  *, ...) __printflike(1, 2) =
            (mp->mnt_flag & MNT_RDONLY) == 0 ? panic : printf;

        (*p)("%s: bad dir ino %ju at offset %d: %s\n",
            mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number,
            offset, how);
}

/*
 * Do consistency checking on a directory entry:
 *        record length must be multiple of 4
 *        entry must fit in rest of its DIRBLKSIZ block
 *        record must be large enough to contain entry
 *        name is not longer than FFS_MAXNAMLEN
 *        name must be as long as advertised, and null terminated
 */
const char *
ufs_dirbadentry(const struct vnode *dp, const struct direct *ep,
    int entryoffsetinblock)
{
        const struct ufsmount *ump = VFSTOUFS(dp->v_mount);
        const int needswap = UFS_MPNEEDSWAP(ump);
        const int dirblksiz = ump->um_dirblksiz;
        const int maxsize = dirblksiz - (entryoffsetinblock & (dirblksiz - 1));
        const int fsfmt = FSFMT(dp);
        const uint8_t namlen = NAMLEN(fsfmt, needswap, ep);
        const uint16_t reclen = ufs_rw16(ep->d_reclen, needswap);
        const int dirsiz = (int)UFS_DIRSIZ(fsfmt, ep, needswap);
        const char *name = ep->d_name;
        const char *str;
#ifdef DIAGNOSTIC
        static char buf[512];
#endif

        if ((reclen & 0x3) != 0)
                str = "not rounded";
        else if (reclen > maxsize)
                str = "too big";
        else if (reclen < dirsiz)
                str = "too small";
#if FFS_MAXNAMLEN < 255
        else if (namlen > FFS_MAXNAMLEN)
                str = "long name";
#endif
        else
                str = NULL;

        if (str) {
#ifdef DIAGNOSTIC
                snprintf(buf, sizeof(buf), "Bad dir (%s), reclen=%#x, "
                    "namlen=%d, dirsiz=%d <= reclen=%d <= maxsize=%d, "
                    "flags=%#x, entryoffsetinblock=%d, dirblksiz=%d",
                    str, reclen, namlen, dirsiz, reclen, maxsize,
                    dp->v_mount->mnt_flag, entryoffsetinblock, dirblksiz);
                str = buf;
#endif
                return str;
        }

        if (ep->d_ino == 0)
                return NULL;

        for (uint8_t i = 0; i < namlen; i++)
                if (name[i] == '\0') {
                        str = "NUL in name";
#ifdef DIAGNOSTIC
                        snprintf(buf, sizeof(buf), "%s [%s] i=%d, namlen=%d",
                            str, name, i, namlen);
                        str = buf;
#endif
                        return str;
                }

        if (name[namlen]) {
                str = "missing NUL in name";
#ifdef DIAGNOSTIC
                snprintf(buf, sizeof(buf), "%s [%*.*s] namlen=%d", str, 
                    namlen, namlen, name, namlen);
                str = buf;
#endif
                return str;
        }
        return NULL;
}

/*
 * Construct a new directory entry after a call to namei, using the
 * name in the componentname argument cnp. The argument ip is the
 * inode to which the new directory entry will refer.
 */
void
ufs_makedirentry(struct inode *ip, struct componentname *cnp,
    struct direct *newdirp)
{
        size_t namelen = cnp->cn_namelen;

        newdirp->d_ino = ip->i_number;
        newdirp->d_namlen = namelen;
        memcpy(newdirp->d_name, cnp->cn_nameptr, namelen);

        /* NUL terminate and zero out padding */
        memset(&newdirp->d_name[namelen], 0, UFS_NAMEPAD(namelen));

        if (FSFMT(ITOV(ip)))
                newdirp->d_type = 0;
        else
                newdirp->d_type = IFTODT(ip->i_mode);
}


static int
ufs_dirgrow(struct vnode *dvp, const struct ufs_lookup_results *ulr,
    struct vnode *tvp, struct direct *dirp,
    struct componentname *cnp, struct buf *newdirbp)
{
        const kauth_cred_t cr = cnp->cn_cred;
        const struct ufsmount *ump = VFSTOUFS(dvp->v_mount);
        const int needswap = UFS_MPNEEDSWAP(ump);
        const int dirblksiz = ump->um_dirblksiz;
        const int fsfmt = FSFMT(dvp);
        const u_int newentrysize = UFS_DIRSIZ(0, dirp, 0);
        struct inode *dp = VTOI(dvp);
        int error, ret, blkoff;
        struct timespec ts;
        struct buf *bp;

        /*
         * If ulr_count is 0, then namei could find no
         * space in the directory. Here, ulr_offset will
         * be on a directory block boundary and we will write the
         * new entry into a fresh block.
         */
        if (ulr->ulr_offset & (dirblksiz - 1))
                panic("%s: newblk", __func__);
        if ((error = UFS_BALLOC(dvp, (off_t)ulr->ulr_offset, dirblksiz,
            cr, B_CLRBUF | B_SYNC, &bp)) != 0) {
                return error;
        }

        dp->i_size = ulr->ulr_offset + dirblksiz;
        DIP_ASSIGN(dp, size, dp->i_size);
        dp->i_flag |= IN_CHANGE | IN_UPDATE;
        uvm_vnp_setsize(dvp, dp->i_size);
        dirp->d_reclen = ufs_rw16(dirblksiz, needswap);
        dirp->d_ino = ufs_rw32(dirp->d_ino, needswap);
        if (fsfmt && ENDIANSWAP(needswap))
                ufs_dirswap(dirp);
        blkoff = ulr->ulr_offset & (ump->um_mountp->mnt_stat.f_iosize - 1);
        memcpy((char *)bp->b_data + blkoff, dirp, newentrysize);
#ifdef UFS_DIRHASH
        if (dp->i_dirhash != NULL) {
                ufsdirhash_newblk(dp, ulr->ulr_offset);
                ufsdirhash_add(dp, dirp, ulr->ulr_offset);
                ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff,
                    ulr->ulr_offset);
        }
#endif
        error = VOP_BWRITE(bp->b_vp, bp);
        vfs_timestamp(&ts);
        ret = UFS_UPDATE(dvp, &ts, &ts, UPDATE_DIROP);
        if (error == 0)
                return ret;
        return error;
}

static int
#if __GNUC_PREREQ__(5, 3)
/* This gets miscompiled by gcc 5.3 PR/51094 */
__attribute__((__optimize__("no-tree-vrp")))
#endif
ufs_dircompact(struct vnode *dvp, const struct ufs_lookup_results *ulr,
    struct vnode *tvp, struct direct *dirp,
    struct componentname *cnp, struct buf *newdirbp)
{
        const struct ufsmount *ump = VFSTOUFS(dvp->v_mount);
        const int needswap = UFS_MPNEEDSWAP(ump);
        const int fsfmt = FSFMT(dvp);
        const u_int newentrysize = UFS_DIRSIZ(0, dirp, 0);
        struct inode *dp = VTOI(dvp);
        struct buf *bp;
        u_int dsize;
        struct direct *ep, *nep;
        int error, loc, spacefree;
        char *dirbuf;
        uint16_t reclen;

        UFS_WAPBL_JLOCK_ASSERT(dvp->v_mount);

        /*
         * If ulr_count is non-zero, then namei found space for the new
         * entry in the range ulr_offset to ulr_offset + ulr_count
         * in the directory. To use this space, we may have to compact
         * the entries located there, by copying them together towards the
         * beginning of the block, leaving the free space in one usable
         * chunk at the end.
         */

        /*
         * Increase size of directory if entry eats into new space.
         * This should never push the size past a new multiple of
         * DIRBLKSIZ.
         *
         * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
         */
        if (ulr->ulr_offset + ulr->ulr_count > dp->i_size) {
#ifdef DIAGNOSTIC
                printf("%s: reached 4.2-only block, not supposed to happen\n",
                    __func__);
#endif
                dp->i_size = ulr->ulr_offset + ulr->ulr_count;
                DIP_ASSIGN(dp, size, dp->i_size);
                dp->i_flag |= IN_CHANGE | IN_UPDATE;
                UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
        }
        /*
         * Get the block containing the space for the new directory entry.
         */
        error = ufs_blkatoff(dvp, (off_t)ulr->ulr_offset, &dirbuf, &bp, true);
        if (error)
                return error;

        /*
         * Find space for the new entry. In the simple case, the entry at
         * offset base will have the space. If it does not, then namei
         * arranged that compacting the region ulr_offset to
         * ulr_offset + ulr_count would yield the space.
         */
        ep = (void *)dirbuf;
        dsize = (ep->d_ino != 0) ? UFS_DIRSIZ(fsfmt, ep, needswap) : 0;
        reclen = ufs_rw16(ep->d_reclen, needswap);
        spacefree = reclen - dsize;
        for (loc = reclen; loc < ulr->ulr_count; ) {
                nep = (void *)(dirbuf + loc);

                /* Trim the existing slot (NB: dsize may be zero). */
                ep->d_reclen = ufs_rw16(dsize, needswap);
                ep = (void *)((char *)ep + dsize);

                reclen = ufs_rw16(nep->d_reclen, needswap);
                loc += reclen;
                if (nep->d_ino == 0) {
                        /*
                         * A mid-block unused entry. Such entries are
                         * never created by the kernel, but fsck_ffs
                         * can create them (and it doesn't fix them).
                         *
                         * Add up the free space, and initialise the
                         * relocated entry since we don't memcpy it.
                         */
                        spacefree += reclen;
                        ep->d_ino = 0;
                        dsize = 0;
                        continue;
                }
                dsize = UFS_DIRSIZ(fsfmt, nep, needswap);
                spacefree += reclen - dsize;
#ifdef UFS_DIRHASH
                if (dp->i_dirhash != NULL)
                        ufsdirhash_move(dp, nep,
                            ulr->ulr_offset + ((char *)nep - dirbuf),
                            ulr->ulr_offset + ((char *)ep - dirbuf));
#endif
                memcpy(ep, nep, dsize);
        }
        /*
         * Here, `ep' points to a directory entry containing `dsize' in-use
         * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0,
         * then the entry is completely unused (dsize == 0). The value
         * of ep->d_reclen is always indeterminate.
         *
         * Update the pointer fields in the previous entry (if any),
         * copy in the new entry, and write out the block.
         */
        if (ep->d_ino == 0 ||
            (ufs_rw32(ep->d_ino, needswap) == UFS_WINO &&
             memcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) {
                if (spacefree + dsize < newentrysize)
                        panic("%s: too big", __func__);
                dirp->d_reclen = spacefree + dsize;
        } else {
                if (spacefree < newentrysize)
                        panic("%s: nospace", __func__);
                dirp->d_reclen = spacefree;
                ep->d_reclen = ufs_rw16(dsize, needswap);
                ep = (void *)((char *)ep + dsize);
        }

        dirp->d_reclen = ufs_rw16(dirp->d_reclen, needswap);
        dirp->d_ino = ufs_rw32(dirp->d_ino, needswap);
        if (fsfmt && ENDIANSWAP(needswap))
                ufs_dirswap(dirp);
#ifdef UFS_DIRHASH
        if (dp->i_dirhash != NULL && (ep->d_ino == 0 ||
            dirp->d_reclen == spacefree))
                ufsdirhash_add(dp, dirp, ulr->ulr_offset + ((char *)ep - dirbuf));
#endif
        memcpy(ep, dirp, newentrysize);
#ifdef UFS_DIRHASH
        if (dp->i_dirhash != NULL) {
                const int dirblkmsk = ump->um_dirblksiz - 1;
                ufsdirhash_checkblock(dp, dirbuf -
                    (ulr->ulr_offset & dirblkmsk),
                    ulr->ulr_offset & ~dirblkmsk);
        }
#endif
        error = VOP_BWRITE(bp->b_vp, bp);
        dp->i_flag |= IN_CHANGE | IN_UPDATE;
        /*
         * If all went well, and the directory can be shortened, proceed
         * with the truncation. Note that we have to unlock the inode for
         * the entry that we just entered, as the truncation may need to
         * lock other inodes which can lead to deadlock if we also hold a
         * lock on the newly entered node.
         */
        if (error == 0 && ulr->ulr_endoff && ulr->ulr_endoff < dp->i_size) {
                const kauth_cred_t cr = cnp->cn_cred;
#ifdef UFS_DIRHASH
                if (dp->i_dirhash != NULL)
                        ufsdirhash_dirtrunc(dp, ulr->ulr_endoff);
#endif
                (void) UFS_TRUNCATE(dvp, (off_t)ulr->ulr_endoff, IO_SYNC, cr);
        }
        UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
        return error;
}

/*
 * Write a directory entry after a call to namei, using the parameters
 * that ufs_lookup left in nameidata and in the ufs_lookup_results.
 *
 * DVP is the directory to be updated. It must be locked.
 * ULR is the ufs_lookup_results structure from the final lookup step.
 * TVP is not used. (XXX: why is it here? remove it)
 * DIRP is the new directory entry contents.
 * CNP is the componentname from the final lookup step.
 * NEWDIRBP is not used and (XXX) should be removed. The previous
 * comment here said it was used by the now-removed softupdates code.
 *
 * The link count of the target inode is *not* incremented; the
 * caller does that.
 *
 * If ulr->ulr_count is 0, ufs_lookup did not find space to insert the
 * directory entry. ulr_offset, which is the place to put the entry,
 * should be on a block boundary (and should be at the end of the
 * directory AFAIK) and a fresh block is allocated to put the new
 * directory entry in.
 *
 * If ulr->ulr_count is not zero, ufs_lookup found a slot to insert
 * the entry into. This slot ranges from ulr_offset to ulr_offset +
 * ulr_count. However, this slot may already be partially populated
 * requiring compaction. See notes below.
 *
 * Furthermore, if ulr_count is not zero and ulr_endoff is not the
 * same as i_size, the directory is truncated to size ulr_endoff.
 */
int
ufs_direnter(struct vnode *dvp, const struct ufs_lookup_results *ulr,
    struct vnode *tvp, struct direct *dirp,
    struct componentname *cnp, struct buf *newdirbp)
{
        if (ulr->ulr_count == 0)
                return ufs_dirgrow(dvp, ulr, tvp, dirp, cnp, newdirbp);
        else
                return ufs_dircompact(dvp, ulr, tvp, dirp, cnp, newdirbp);
}

/*
 * Remove a directory entry after a call to namei, using the
 * parameters that ufs_lookup left in nameidata and in the
 * ufs_lookup_results.
 *
 * DVP is the directory to be updated. It must be locked.
 * ULR is the ufs_lookup_results structure from the final lookup step.
 * IP, if not null, is the inode being unlinked.
 * FLAGS may contain DOWHITEOUT.
 * ISRMDIR is not used and (XXX) should be removed.
 *
 * If FLAGS contains DOWHITEOUT the entry is replaced with a whiteout
 * instead of being cleared.
 *
 * ulr->ulr_offset contains the position of the directory entry
 * to be removed.
 *
 * ulr->ulr_reclen contains the size of the directory entry to be
 * removed.
 *
 * ulr->ulr_count contains the size of the *previous* directory
 * entry. This allows finding it, for free space management. If
 * ulr_count is 0, the target entry is at the beginning of the
 * directory. (Does this ever happen? The first entry should be ".",
 * which should only be removed at rmdir time. Does rmdir come here
 * to clear out the "." and ".." entries? Perhaps, but I doubt it.)
 *
 * The space is marked free by adding it to the record length (not
 * name length) of the preceding entry. If the first entry becomes
 * free, it is marked free by setting the inode number to 0.
 *
 * The link count of IP is decremented. Note that this is not the
 * inverse behavior of ufs_direnter, which does not adjust link
 * counts. Sigh.
 */
int
ufs_dirremove(struct vnode *dvp, const struct ufs_lookup_results *ulr,
    struct inode *ip, int flags, int isrmdir)
{
        struct inode *dp = VTOI(dvp);
        struct direct *ep;
        struct buf *bp;
        int error;
        const int needswap = UFS_MPNEEDSWAP(dp->i_ump);
        uint16_t reclen;

        UFS_WAPBL_JLOCK_ASSERT(dvp->v_mount);

        if (flags & DOWHITEOUT) {
                /*
                 * Whiteout entry: set d_ino to UFS_WINO.
                 */
                error = ufs_blkatoff(dvp, (off_t)ulr->ulr_offset, &ep,
                                     &bp, true);
                if (error)
                        return (error);
                ep->d_ino = ufs_rw32(UFS_WINO, needswap);
                ep->d_type = DT_WHT;
                goto out;
        }

        if ((error = ufs_blkatoff(dvp,
            (off_t)(ulr->ulr_offset - ulr->ulr_count), &ep, &bp, true)) != 0)
                return (error);

        reclen = ufs_rw16(ep->d_reclen, needswap);
#ifdef UFS_DIRHASH
        /*
         * Remove the dirhash entry. This is complicated by the fact
         * that `ep' is the previous entry when ulr_count != 0.
         */
        if (dp->i_dirhash != NULL)
                ufsdirhash_remove(dp, (ulr->ulr_count == 0) ? ep :
                   (void *)((char *)ep + reclen), ulr->ulr_offset);
#endif

        if (ulr->ulr_count == 0) {
                /*
                 * First entry in block: set d_ino to zero.
                 */
                ep->d_ino = 0;
        } else {
                /*
                 * Collapse new free space into previous entry.
                 */
                ep->d_reclen = ufs_rw16(reclen + ulr->ulr_reclen, needswap);
        }

#ifdef UFS_DIRHASH
        if (dp->i_dirhash != NULL) {
                int dirblksiz = ip->i_ump->um_dirblksiz;
                ufsdirhash_checkblock(dp, (char *)ep -
                    ((ulr->ulr_offset - ulr->ulr_count) & (dirblksiz - 1)),
                    ulr->ulr_offset & ~(dirblksiz - 1));
        }
#endif

out:
        if (ip) {
                ip->i_nlink--;
                DIP_ASSIGN(ip, nlink, ip->i_nlink);
                ip->i_flag |= IN_CHANGE;
                UFS_WAPBL_UPDATE(ITOV(ip), NULL, NULL, 0);
        }
        /*
         * XXX did it ever occur to anyone that it might be a good
         * idea to restore ip->i_nlink if this fails? Or something?
         * Currently on error return from this function the state of
         * ip->i_nlink depends on what happened, and callers
         * definitely do not take this into account.
         */
        error = VOP_BWRITE(bp->b_vp, bp);
        dp->i_flag |= IN_CHANGE | IN_UPDATE;
        /*
         * If the last named reference to a snapshot goes away,
         * drop its snapshot reference so that it will be reclaimed
         * when last open reference goes away.
         */
        if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 &&
            ip->i_nlink == 0)
                UFS_SNAPGONE(ITOV(ip));
        UFS_WAPBL_UPDATE(dvp, NULL, NULL, 0);
        return (error);
}

/*
 * Rewrite an existing directory entry to point at the inode supplied.
 *
 * DP is the directory to update.
 * OFFSET is the position of the entry in question. It may come
 * from ulr_offset of a ufs_lookup_results.
 * OIP is the old inode the directory previously pointed to.
 * NEWINUM is the number of the new inode.
 * NEWTYPE is the new value for the type field of the directory entry.
 * (This is ignored if the fs doesn't support that.)
 * ISRMDIR is not used and (XXX) should be removed.
 * IFLAGS are added to DP's inode flags.
 *
 * The link count of OIP is decremented. Note that the link count of
 * the new inode is *not* incremented. Yay for symmetry.
 */
int
ufs_dirrewrite(struct inode *dp, off_t offset,
    struct inode *oip, ino_t newinum, int newtype,
    int isrmdir, int iflags)
{
        struct buf *bp;
        struct direct *ep;
        struct vnode *vdp = ITOV(dp);
        int error;

        error = ufs_blkatoff(vdp, offset, &ep, &bp, true);
        if (error)
                return (error);
        ep->d_ino = ufs_rw32(newinum, UFS_MPNEEDSWAP(dp->i_ump));
        if (!FSFMT(vdp))
                ep->d_type = newtype;
        oip->i_nlink--;
        DIP_ASSIGN(oip, nlink, oip->i_nlink);
        oip->i_flag |= IN_CHANGE;
        UFS_WAPBL_UPDATE(ITOV(oip), NULL, NULL, UPDATE_DIROP);
        error = VOP_BWRITE(bp->b_vp, bp);
        dp->i_flag |= iflags;
        /*
         * If the last named reference to a snapshot goes away,
         * drop its snapshot reference so that it will be reclaimed
         * when last open reference goes away.
         */
        if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_nlink == 0)
                UFS_SNAPGONE(ITOV(oip));
        UFS_WAPBL_UPDATE(vdp, NULL, NULL, UPDATE_DIROP);
        return (error);
}

/*
 * Check if a directory is empty or not.
 * Inode supplied must be locked.
 *
 * Using a struct dirtemplate here is not precisely
 * what we want, but better than using a struct direct.
 *
 * NB: does not handle corrupted directories.
 */
int
ufs_dirempty(struct inode *ip, ino_t parentino, kauth_cred_t cred)
{
        doff_t off;
        struct dirtemplate dbuf;
        struct direct *dp = (void *)&dbuf;
        int error;
        size_t count;
        const int needswap = UFS_IPNEEDSWAP(ip);
        const int fsfmt = FSFMT(ITOV(ip));
#define        MINDIRSIZ (sizeof (struct dirtemplate) / 2)

        for (off = 0; off < ip->i_size;
            off += ufs_rw16(dp->d_reclen, needswap)) {
                error = ufs_bufio(UIO_READ, ITOV(ip), dp, MINDIRSIZ,
                    off, IO_NODELOCKED, cred, &count, NULL);
                /*
                 * Since we read MINDIRSIZ, residual must
                 * be 0 unless we're at end of file.
                 */
                if (error || count != 0)
                        return (0);
                /* avoid infinite loops */
                if (dp->d_reclen == 0)
                        return (0);
                /* skip empty entries */
                ino_t ino = ufs_rw32(dp->d_ino, needswap);
                if (ino == 0 || ino == UFS_WINO)
                        continue;
                /* accept only "." and ".." */
                const uint8_t namlen = NAMLEN(fsfmt, needswap, dp);
                if (namlen > 2)
                        return (0);
                if (dp->d_name[0] != '.')
                        return (0);
                /*
                 * At this point namlen must be 1 or 2.
                 * 1 implies ".", 2 implies ".." if second
                 * char is also "."
                 */
                if (namlen == 1 && ino == ip->i_number)
                        continue;
                if (dp->d_name[1] == '.' && ino == parentino)
                        continue;
                return (0);
        }
        return (1);
}

#define        UFS_DIRRABLKS 0
int ufs_dirrablks = UFS_DIRRABLKS;

/*
 * ufs_blkatoff: Return buffer with the contents of block "offset" from
 * the beginning of directory "vp".  If "res" is non-NULL, fill it in with
 * a pointer to the remaining space in the directory.  If the caller intends
 * to modify the buffer returned, "modify" must be true.
 */

int
ufs_blkatoff(struct vnode *vp, off_t offset, void *v, struct buf **bpp,
    bool modify)
{
        char **res = v;
        struct inode *ip __diagused;
        struct buf *bp;
        daddr_t lbn;
        const int dirrablks = ufs_dirrablks;
        daddr_t *blks;
        int *blksizes;
        int run, error;
        struct mount *mp = vp->v_mount;
        const int bshift = mp->mnt_fs_bshift;
        const int bsize = 1 << bshift;
        off_t eof;

        blks = kmem_alloc((1 + dirrablks) * sizeof(daddr_t), KM_SLEEP);
        blksizes = kmem_alloc((1 + dirrablks) * sizeof(int), KM_SLEEP);
        ip = VTOI(vp);
        KASSERT(vp->v_size == ip->i_size);
        GOP_SIZE(vp, vp->v_size, &eof, 0);
        lbn = offset >> bshift;

        for (run = 0; run <= dirrablks;) {
                const off_t curoff = lbn << bshift;
                const int size = MIN(eof - curoff, bsize);

                if (size == 0) {
                        break;
                }
                KASSERT(curoff < eof);
                blks[run] = lbn;
                blksizes[run] = size;
                lbn++;
                run++;
                if (size != bsize) {
                        break;
                }
        }
        KASSERT(run >= 1);
        error = breadn(vp, blks[0], blksizes[0], &blks[1], &blksizes[1],
            run - 1, (modify ? B_MODIFY : 0), &bp);
        if (error != 0) {
                *bpp = NULL;
                goto out;
        }
        if (res) {
                *res = (char *)bp->b_data + (offset & (bsize - 1));
        }
        *bpp = bp;

 out:
        kmem_free(blks, (1 + dirrablks) * sizeof(daddr_t));
        kmem_free(blksizes, (1 + dirrablks) * sizeof(int));
        return error;
}






















































































































































































































































    3 
    3 



























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
/*        $OpenBSD: if_rum.c,v 1.40 2006/09/18 16:20:20 damien Exp $        */
/*        $NetBSD: if_rum.c,v 1.70 2022/08/12 19:13:36 riastradh Exp $        */

/*-
 * Copyright (c) 2005-2007 Damien Bergamini <damien.bergamini@free.fr>
 * Copyright (c) 2006 Niall O'Higgins <niallo@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*-
 * Ralink Technology RT2501USB/RT2601USB chipset driver
 * http://www.ralinktech.com.tw/
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_rum.c,v 1.70 2022/08/12 19:13:36 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/systm.h>
#include <sys/module.h>
#include <sys/conf.h>
#include <sys/device.h>

#include <sys/bus.h>
#include <machine/endian.h>
#include <sys/intr.h>

#include <net/bpf.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_ether.h>
#include <net/if_media.h>
#include <net/if_types.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>

#include <net80211/ieee80211_netbsd.h>
#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_amrr.h>
#include <net80211/ieee80211_radiotap.h>

#include <dev/firmload.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <dev/usb/if_rumreg.h>
#include <dev/usb/if_rumvar.h>

#ifdef RUM_DEBUG
#define DPRINTF(x)        do { if (rum_debug) printf x; } while (0)
#define DPRINTFN(n, x)        do { if (rum_debug >= (n)) printf x; } while (0)
int rum_debug = 1;
#else
#define DPRINTF(x)
#define DPRINTFN(n, x)
#endif

/* various supported device vendors/products */
static const struct usb_devno rum_devs[] = {
        { USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_HWU54DM },
        { USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_RT2573_2 },
        { USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_RT2573_3 },
        { USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_RT2573_4 },
        { USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_WUG2700 },
        { USB_VENDOR_AMIT,                USB_PRODUCT_AMIT_CGWLUSB2GO },
        { USB_VENDOR_ASUSTEK,                USB_PRODUCT_ASUSTEK_WL167G_2 },
        { USB_VENDOR_ASUSTEK,                USB_PRODUCT_ASUSTEK_WL167G_3 },
        { USB_VENDOR_BELKIN,                USB_PRODUCT_BELKIN_F5D7050A },
        { USB_VENDOR_BELKIN,                USB_PRODUCT_BELKIN_F5D9050V3 },
        { USB_VENDOR_BELKIN,                USB_PRODUCT_BELKIN_F5D9050C },
        { USB_VENDOR_CISCOLINKSYS,        USB_PRODUCT_CISCOLINKSYS_WUSB200 },
        { USB_VENDOR_CISCOLINKSYS,        USB_PRODUCT_CISCOLINKSYS_WUSB54GC },
        { USB_VENDOR_CISCOLINKSYS,        USB_PRODUCT_CISCOLINKSYS_WUSB54GR },
        { USB_VENDOR_CONCEPTRONIC,        USB_PRODUCT_CONCEPTRONIC_C54RU2 },
        { USB_VENDOR_CONCEPTRONIC,        USB_PRODUCT_CONCEPTRONIC_RT2573 },
        { USB_VENDOR_COREGA,                USB_PRODUCT_COREGA_CGWLUSB2GL },
        { USB_VENDOR_COREGA,                USB_PRODUCT_COREGA_CGWLUSB2GPX },
        { USB_VENDOR_DICKSMITH,                USB_PRODUCT_DICKSMITH_CWD854F },
        { USB_VENDOR_DICKSMITH,                USB_PRODUCT_DICKSMITH_RT2573 },
        { USB_VENDOR_DLINK2,                USB_PRODUCT_DLINK2_DWLG122C1 },
        { USB_VENDOR_DLINK2,                USB_PRODUCT_DLINK2_WUA1340 },
        { USB_VENDOR_DLINK2,                USB_PRODUCT_DLINK2_DWA110 },
        { USB_VENDOR_DLINK2,                USB_PRODUCT_DLINK2_DWA111 },
        { USB_VENDOR_EDIMAX,                USB_PRODUCT_EDIMAX_EW7318 },
        { USB_VENDOR_EDIMAX,                USB_PRODUCT_EDIMAX_EW7618 },
        { USB_VENDOR_GIGABYTE,                USB_PRODUCT_GIGABYTE_GNWB01GS },
        { USB_VENDOR_GIGABYTE,                USB_PRODUCT_GIGABYTE_GNWI05GS },
        { USB_VENDOR_GIGASET,                USB_PRODUCT_GIGASET_RT2573 },
        { USB_VENDOR_GOODWAY,                USB_PRODUCT_GOODWAY_RT2573 },
        { USB_VENDOR_GUILLEMOT,                USB_PRODUCT_GUILLEMOT_HWGUSB254LB },
        { USB_VENDOR_GUILLEMOT,                USB_PRODUCT_GUILLEMOT_HWGUSB254V2AP },
        { USB_VENDOR_HUAWEI3COM,        USB_PRODUCT_HUAWEI3COM_RT2573 },
        { USB_VENDOR_MELCO,                USB_PRODUCT_MELCO_G54HP },
        { USB_VENDOR_MELCO,                USB_PRODUCT_MELCO_SG54HP },
        { USB_VENDOR_MELCO,                USB_PRODUCT_MELCO_SG54HG },
        { USB_VENDOR_MELCO,                USB_PRODUCT_MELCO_WLIUCG },
        { USB_VENDOR_MSI,                USB_PRODUCT_MSI_RT2573 },
        { USB_VENDOR_MSI,                USB_PRODUCT_MSI_RT2573_2 },
        { USB_VENDOR_MSI,                USB_PRODUCT_MSI_RT2573_3 },
        { USB_VENDOR_MSI,                USB_PRODUCT_MSI_RT2573_4 },
        { USB_VENDOR_NOVATECH,                USB_PRODUCT_NOVATECH_RT2573 },
        { USB_VENDOR_PLANEX2,                USB_PRODUCT_PLANEX2_GWUS54HP },
        { USB_VENDOR_PLANEX2,                USB_PRODUCT_PLANEX2_GWUS54MINI2 },
        { USB_VENDOR_PLANEX2,                USB_PRODUCT_PLANEX2_GWUSMM },
        { USB_VENDOR_QCOM,                USB_PRODUCT_QCOM_RT2573 },
        { USB_VENDOR_QCOM,                USB_PRODUCT_QCOM_RT2573_2 },
        { USB_VENDOR_QCOM,                USB_PRODUCT_QCOM_RT2573_3 },
        { USB_VENDOR_RALINK,                USB_PRODUCT_RALINK_RT2573 },
        { USB_VENDOR_RALINK,                USB_PRODUCT_RALINK_RT2671 },
        { USB_VENDOR_SITECOMEU,                USB_PRODUCT_SITECOMEU_WL113R2 },
        { USB_VENDOR_SITECOMEU,                USB_PRODUCT_SITECOMEU_WL172 },
        { USB_VENDOR_SPARKLAN,                USB_PRODUCT_SPARKLAN_RT2573 },
        { USB_VENDOR_SURECOM,                USB_PRODUCT_SURECOM_RT2573 },
        { USB_VENDOR_SYNET,                USB_PRODUCT_SYNET_MWP54SS },
        { USB_VENDOR_ZYXEL,                USB_PRODUCT_ZYXEL_RT2573 }
};

static int                rum_attachhook(void *);
static int                rum_alloc_tx_list(struct rum_softc *);
static void                rum_free_tx_list(struct rum_softc *);
static int                rum_alloc_rx_list(struct rum_softc *);
static void                rum_free_rx_list(struct rum_softc *);
static int                rum_media_change(struct ifnet *);
static void                rum_next_scan(void *);
static void                rum_task(void *);
static int                rum_newstate(struct ieee80211com *,
                            enum ieee80211_state, int);
static void                rum_txeof(struct usbd_xfer *, void *,
                            usbd_status);
static void                rum_rxeof(struct usbd_xfer *, void *,
                            usbd_status);
static uint8_t                rum_rxrate(const struct rum_rx_desc *);
static int                rum_ack_rate(struct ieee80211com *, int);
static uint16_t                rum_txtime(int, int, uint32_t);
static uint8_t                rum_plcp_signal(int);
static void                rum_setup_tx_desc(struct rum_softc *,
                            struct rum_tx_desc *, uint32_t, uint16_t, int,
                            int);
static int                rum_tx_data(struct rum_softc *, struct mbuf *,
                            struct ieee80211_node *);
static void                rum_start(struct ifnet *);
static void                rum_watchdog(struct ifnet *);
static int                rum_ioctl(struct ifnet *, u_long, void *);
static void                rum_eeprom_read(struct rum_softc *, uint16_t, void *,
                            int);
static uint32_t                rum_read(struct rum_softc *, uint16_t);
static void                rum_read_multi(struct rum_softc *, uint16_t, void *,
                            int);
static void                rum_write(struct rum_softc *, uint16_t, uint32_t);
static void                rum_write_multi(struct rum_softc *, uint16_t, void *,
                            size_t);
static void                rum_bbp_write(struct rum_softc *, uint8_t, uint8_t);
static uint8_t                rum_bbp_read(struct rum_softc *, uint8_t);
static void                rum_rf_write(struct rum_softc *, uint8_t, uint32_t);
static void                rum_select_antenna(struct rum_softc *);
static void                rum_enable_mrr(struct rum_softc *);
static void                rum_set_txpreamble(struct rum_softc *);
static void                rum_set_basicrates(struct rum_softc *);
static void                rum_select_band(struct rum_softc *,
                            struct ieee80211_channel *);
static void                rum_set_chan(struct rum_softc *,
                            struct ieee80211_channel *);
static void                rum_enable_tsf_sync(struct rum_softc *);
static void                rum_update_slot(struct rum_softc *);
static void                rum_set_bssid(struct rum_softc *, const uint8_t *);
static void                rum_set_macaddr(struct rum_softc *, const uint8_t *);
static void                rum_update_promisc(struct rum_softc *);
static const char        *rum_get_rf(int);
static void                rum_read_eeprom(struct rum_softc *);
static int                rum_bbp_init(struct rum_softc *);
static int                rum_init(struct ifnet *);
static void                rum_stop(struct ifnet *, int);
static int                rum_load_microcode(struct rum_softc *, const u_char *,
                            size_t);
static int                rum_prepare_beacon(struct rum_softc *);
static void                rum_newassoc(struct ieee80211_node *, int);
static void                rum_amrr_start(struct rum_softc *,
                            struct ieee80211_node *);
static void                rum_amrr_timeout(void *);
static void                rum_amrr_update(struct usbd_xfer *, void *,
                            usbd_status);

static const struct {
        uint32_t        reg;
        uint32_t        val;
} rum_def_mac[] = {
        RT2573_DEF_MAC
};

static const struct {
        uint8_t        reg;
        uint8_t        val;
} rum_def_bbp[] = {
        RT2573_DEF_BBP
};

static const struct rfprog {
        uint8_t                chan;
        uint32_t        r1, r2, r3, r4;
}  rum_rf5226[] = {
        RT2573_RF5226
}, rum_rf5225[] = {
        RT2573_RF5225
};

static int rum_match(device_t, cfdata_t, void *);
static void rum_attach(device_t, device_t, void *);
static int rum_detach(device_t, int);
static int rum_activate(device_t, enum devact);

CFATTACH_DECL_NEW(rum, sizeof(struct rum_softc), rum_match, rum_attach,
    rum_detach, rum_activate);

static int
rum_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return (usb_lookup(rum_devs, uaa->uaa_vendor, uaa->uaa_product) != NULL) ?
            UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static int
rum_attachhook(void *xsc)
{
        struct rum_softc *sc = xsc;
        firmware_handle_t fwh;
        const char *name = "rum-rt2573";
        u_char *ucode;
        size_t size;
        int error;

        if ((error = firmware_open("rum", name, &fwh)) != 0) {
                printf("%s: failed firmware_open of file %s (error %d)\n",
                    device_xname(sc->sc_dev), name, error);
                return error;
        }
        size = firmware_get_size(fwh);
        ucode = firmware_malloc(size);
        if (ucode == NULL) {
                printf("%s: failed to allocate firmware memory\n",
                    device_xname(sc->sc_dev));
                firmware_close(fwh);
                return ENOMEM;
        }
        error = firmware_read(fwh, 0, ucode, size);
        firmware_close(fwh);
        if (error != 0) {
                printf("%s: failed to read firmware (error %d)\n",
                    device_xname(sc->sc_dev), error);
                firmware_free(ucode, size);
                return error;
        }

        if (rum_load_microcode(sc, ucode, size) != 0) {
                printf("%s: could not load 8051 microcode\n",
                    device_xname(sc->sc_dev));
                firmware_free(ucode, size);
                return ENXIO;
        }

        firmware_free(ucode, size);
        sc->sc_flags |= RT2573_FWLOADED;

        return 0;
}

static void
rum_attach(device_t parent, device_t self, void *aux)
{
        struct rum_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        usbd_status error;
        char *devinfop;
        int i, ntries;
        uint32_t tmp;

        sc->sc_dev = self;
        sc->sc_udev = uaa->uaa_device;
        sc->sc_flags = 0;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(sc->sc_udev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        error = usbd_set_config_no(sc->sc_udev, RT2573_CONFIG_NO, 0);
        if (error != 0) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(error));
                return;
        }

        /* get the first interface handle */
        error = usbd_device2interface_handle(sc->sc_udev, RT2573_IFACE_INDEX,
            &sc->sc_iface);
        if (error != 0) {
                aprint_error_dev(self, "could not get interface handle\n");
                return;
        }

        /*
         * Find endpoints.
         */
        id = usbd_get_interface_descriptor(sc->sc_iface);

        sc->sc_rx_no = sc->sc_tx_no = -1;
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "no endpoint descriptor for iface %d\n", i);
                        return;
                }

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK)
                        sc->sc_rx_no = ed->bEndpointAddress;
                else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK)
                        sc->sc_tx_no = ed->bEndpointAddress;
        }
        if (sc->sc_rx_no == -1 || sc->sc_tx_no == -1) {
                aprint_error_dev(self, "missing endpoint\n");
                return;
        }

        usb_init_task(&sc->sc_task, rum_task, sc, 0);
        callout_init(&sc->sc_scan_ch, 0);

        sc->amrr.amrr_min_success_threshold =  1;
        sc->amrr.amrr_max_success_threshold = 10;
        callout_init(&sc->sc_amrr_ch, 0);

        /* retrieve RT2573 rev. no */
        for (ntries = 0; ntries < 1000; ntries++) {
                if ((tmp = rum_read(sc, RT2573_MAC_CSR0)) != 0)
                        break;
                DELAY(1000);
        }
        if (ntries == 1000) {
                aprint_error_dev(self, "timeout waiting for chip to settle\n");
                return;
        }

        /* retrieve MAC address and various other things from EEPROM */
        rum_read_eeprom(sc);

        aprint_normal_dev(self,
            "MAC/BBP RT%04x (rev 0x%05x), RF %s, address %s\n",
            sc->macbbp_rev, tmp,
            rum_get_rf(sc->rf_rev), ether_sprintf(ic->ic_myaddr));

        ic->ic_ifp = ifp;
        ic->ic_phytype = IEEE80211_T_OFDM;        /* not only, but not used */
        ic->ic_opmode = IEEE80211_M_STA;        /* default to BSS mode */
        ic->ic_state = IEEE80211_S_INIT;

        /* set device capabilities */
        ic->ic_caps =
            IEEE80211_C_IBSS |                /* IBSS mode supported */
            IEEE80211_C_MONITOR |        /* monitor mode supported */
            IEEE80211_C_HOSTAP |        /* HostAp mode supported */
            IEEE80211_C_TXPMGT |        /* tx power management */
            IEEE80211_C_SHPREAMBLE |        /* short preamble supported */
            IEEE80211_C_SHSLOT |        /* short slot time supported */
            IEEE80211_C_WPA;                /* 802.11i */

        if (sc->rf_rev == RT2573_RF_5225 || sc->rf_rev == RT2573_RF_5226) {
                /* set supported .11a rates */
                ic->ic_sup_rates[IEEE80211_MODE_11A] = ieee80211_std_rateset_11a;

                /* set supported .11a channels */
                for (i = 34; i <= 46; i += 4) {
                        ic->ic_channels[i].ic_freq =
                            ieee80211_ieee2mhz(i, IEEE80211_CHAN_5GHZ);
                        ic->ic_channels[i].ic_flags = IEEE80211_CHAN_A;
                }
                for (i = 36; i <= 64; i += 4) {
                        ic->ic_channels[i].ic_freq =
                            ieee80211_ieee2mhz(i, IEEE80211_CHAN_5GHZ);
                        ic->ic_channels[i].ic_flags = IEEE80211_CHAN_A;
                }
                for (i = 100; i <= 140; i += 4) {
                        ic->ic_channels[i].ic_freq =
                            ieee80211_ieee2mhz(i, IEEE80211_CHAN_5GHZ);
                        ic->ic_channels[i].ic_flags = IEEE80211_CHAN_A;
                }
                for (i = 149; i <= 165; i += 4) {
                        ic->ic_channels[i].ic_freq =
                            ieee80211_ieee2mhz(i, IEEE80211_CHAN_5GHZ);
                        ic->ic_channels[i].ic_flags = IEEE80211_CHAN_A;
                }
        }

        /* set supported .11b and .11g rates */
        ic->ic_sup_rates[IEEE80211_MODE_11B] = ieee80211_std_rateset_11b;
        ic->ic_sup_rates[IEEE80211_MODE_11G] = ieee80211_std_rateset_11g;

        /* set supported .11b and .11g channels (1 through 14) */
        for (i = 1; i <= 14; i++) {
                ic->ic_channels[i].ic_freq =
                    ieee80211_ieee2mhz(i, IEEE80211_CHAN_2GHZ);
                ic->ic_channels[i].ic_flags =
                    IEEE80211_CHAN_CCK | IEEE80211_CHAN_OFDM |
                    IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ;
        }

        ifp->if_softc = sc;
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_init = rum_init;
        ifp->if_ioctl = rum_ioctl;
        ifp->if_start = rum_start;
        ifp->if_watchdog = rum_watchdog;
        IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
        IFQ_SET_READY(&ifp->if_snd);
        memcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);

        if_attach(ifp);
        ieee80211_ifattach(ic);
        ic->ic_newassoc = rum_newassoc;

        /* override state transition machine */
        sc->sc_newstate = ic->ic_newstate;
        ic->ic_newstate = rum_newstate;

        /* XXX media locking needs revisiting */
        mutex_init(&sc->sc_media_mtx, MUTEX_DEFAULT, IPL_SOFTUSB);
        ieee80211_media_init_with_lock(ic,
            rum_media_change, ieee80211_media_status, &sc->sc_media_mtx);

        bpf_attach2(ifp, DLT_IEEE802_11_RADIO,
            sizeof(struct ieee80211_frame) + IEEE80211_RADIOTAP_HDRLEN,
            &sc->sc_drvbpf);

        sc->sc_rxtap_len = sizeof(sc->sc_rxtapu);
        sc->sc_rxtap.wr_ihdr.it_len = htole16(sc->sc_rxtap_len);
        sc->sc_rxtap.wr_ihdr.it_present = htole32(RT2573_RX_RADIOTAP_PRESENT);

        sc->sc_txtap_len = sizeof(sc->sc_txtapu);
        sc->sc_txtap.wt_ihdr.it_len = htole16(sc->sc_txtap_len);
        sc->sc_txtap.wt_ihdr.it_present = htole32(RT2573_TX_RADIOTAP_PRESENT);

        ieee80211_announce(ic);

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        return;
}

static int
rum_detach(device_t self, int flags)
{
        struct rum_softc *sc = device_private(self);
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        int s;

        if (!ifp->if_softc)
                return 0;

        pmf_device_deregister(self);

        s = splusb();

        rum_stop(ifp, 1);
        callout_halt(&sc->sc_scan_ch, NULL);
        callout_halt(&sc->sc_amrr_ch, NULL);
        usb_rem_task_wait(sc->sc_udev, &sc->sc_task, USB_TASKQ_DRIVER, NULL);

        bpf_detach(ifp);
        ieee80211_ifdetach(ic);        /* free all nodes */
        if_detach(ifp);

        splx(s);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return 0;
}

static int
rum_alloc_tx_list(struct rum_softc *sc)
{
        struct rum_tx_data *data;
        int i, error;

        sc->tx_cur = sc->tx_queued = 0;

        for (i = 0; i < RUM_TX_LIST_COUNT; i++) {
                data = &sc->tx_data[i];

                data->sc = sc;

                error = usbd_create_xfer(sc->sc_tx_pipeh,
                    RT2573_TX_DESC_SIZE + IEEE80211_MAX_LEN,
                    USBD_FORCE_SHORT_XFER, 0, &data->xfer);
                if (error) {
                        printf("%s: could not allocate tx xfer\n",
                            device_xname(sc->sc_dev));
                        goto fail;
                }
                data->buf = usbd_get_buffer(data->xfer);

                /* clean Tx descriptor */
                memset(data->buf, 0, RT2573_TX_DESC_SIZE);
        }

        return 0;

fail:        rum_free_tx_list(sc);
        return error;
}

static void
rum_free_tx_list(struct rum_softc *sc)
{
        struct rum_tx_data *data;
        int i;

        for (i = 0; i < RUM_TX_LIST_COUNT; i++) {
                data = &sc->tx_data[i];

                if (data->xfer != NULL) {
                        usbd_destroy_xfer(data->xfer);
                        data->xfer = NULL;
                }

                if (data->ni != NULL) {
                        ieee80211_free_node(data->ni);
                        data->ni = NULL;
                }
        }
}

static int
rum_alloc_rx_list(struct rum_softc *sc)
{
        struct rum_rx_data *data;
        int i, error;

        for (i = 0; i < RUM_RX_LIST_COUNT; i++) {
                data = &sc->rx_data[i];

                data->sc = sc;

                error = usbd_create_xfer(sc->sc_rx_pipeh, MCLBYTES,
                    0, 0, &data->xfer);
                if (error) {
                        printf("%s: could not allocate rx xfer\n",
                            device_xname(sc->sc_dev));
                        goto fail;
                }

                MGETHDR(data->m, M_DONTWAIT, MT_DATA);
                if (data->m == NULL) {
                        printf("%s: could not allocate rx mbuf\n",
                            device_xname(sc->sc_dev));
                        error = ENOMEM;
                        goto fail;
                }

                MCLGET(data->m, M_DONTWAIT);
                if (!(data->m->m_flags & M_EXT)) {
                        printf("%s: could not allocate rx mbuf cluster\n",
                            device_xname(sc->sc_dev));
                        error = ENOMEM;
                        goto fail;
                }

                data->buf = mtod(data->m, uint8_t *);
        }

        return 0;

fail:        rum_free_rx_list(sc);
        return error;
}

static void
rum_free_rx_list(struct rum_softc *sc)
{
        struct rum_rx_data *data;
        int i;

        for (i = 0; i < RUM_RX_LIST_COUNT; i++) {
                data = &sc->rx_data[i];

                if (data->xfer != NULL) {
                        usbd_destroy_xfer(data->xfer);
                        data->xfer = NULL;
                }

                if (data->m != NULL) {
                        m_freem(data->m);
                        data->m = NULL;
                }
        }
}

static int
rum_media_change(struct ifnet *ifp)
{
        int error;

        error = ieee80211_media_change(ifp);
        if (error != ENETRESET)
                return error;

        if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
                rum_init(ifp);

        return 0;
}

/*
 * This function is called periodically (every 200ms) during scanning to
 * switch from one channel to another.
 */
static void
rum_next_scan(void *arg)
{
        struct rum_softc *sc = arg;
        struct ieee80211com *ic = &sc->sc_ic;
        int s;

        s = splnet();
        if (ic->ic_state == IEEE80211_S_SCAN)
                ieee80211_next_scan(ic);
        splx(s);
}

static void
rum_task(void *arg)
{
        struct rum_softc *sc = arg;
        struct ieee80211com *ic = &sc->sc_ic;
        enum ieee80211_state ostate;
        struct ieee80211_node *ni;
        uint32_t tmp;

        ostate = ic->ic_state;

        switch (sc->sc_state) {
        case IEEE80211_S_INIT:
                if (ostate == IEEE80211_S_RUN) {
                        /* abort TSF synchronization */
                        tmp = rum_read(sc, RT2573_TXRX_CSR9);
                        rum_write(sc, RT2573_TXRX_CSR9, tmp & ~0x00ffffff);
                }
                break;

        case IEEE80211_S_SCAN:
                rum_set_chan(sc, ic->ic_curchan);
                callout_reset(&sc->sc_scan_ch, hz / 5, rum_next_scan, sc);
                break;

        case IEEE80211_S_AUTH:
                rum_set_chan(sc, ic->ic_curchan);
                break;

        case IEEE80211_S_ASSOC:
                rum_set_chan(sc, ic->ic_curchan);
                break;

        case IEEE80211_S_RUN:
                rum_set_chan(sc, ic->ic_curchan);

                ni = ic->ic_bss;

                if (ic->ic_opmode != IEEE80211_M_MONITOR) {
                        rum_update_slot(sc);
                        rum_enable_mrr(sc);
                        rum_set_txpreamble(sc);
                        rum_set_basicrates(sc);
                        rum_set_bssid(sc, ni->ni_bssid);
                }

                if (ic->ic_opmode == IEEE80211_M_HOSTAP ||
                    ic->ic_opmode == IEEE80211_M_IBSS)
                        rum_prepare_beacon(sc);

                if (ic->ic_opmode != IEEE80211_M_MONITOR)
                        rum_enable_tsf_sync(sc);

                if (ic->ic_opmode == IEEE80211_M_STA) {
                        /* fake a join to init the tx rate */
                        rum_newassoc(ic->ic_bss, 1);

                        /* enable automatic rate adaptation in STA mode */
                        if (ic->ic_fixed_rate == IEEE80211_FIXED_RATE_NONE)
                                rum_amrr_start(sc, ni);
                }

                break;
        }

        sc->sc_newstate(ic, sc->sc_state, sc->sc_arg);
}

static int
rum_newstate(struct ieee80211com *ic, enum ieee80211_state nstate, int arg)
{
        struct rum_softc *sc = ic->ic_ifp->if_softc;

        /*
         * XXXSMP: This does not wait for the task, if it is in flight,
         * to complete.  If this code works at all, it must rely on the
         * kernel lock to serialize with the USB task thread.
         */
        usb_rem_task(sc->sc_udev, &sc->sc_task);
        callout_stop(&sc->sc_scan_ch);
        callout_stop(&sc->sc_amrr_ch);

        /* do it in a process context */
        sc->sc_state = nstate;
        sc->sc_arg = arg;
        usb_add_task(sc->sc_udev, &sc->sc_task, USB_TASKQ_DRIVER);

        return 0;
}

/* quickly determine if a given rate is CCK or OFDM */
#define RUM_RATE_IS_OFDM(rate)        ((rate) >= 12 && (rate) != 22)

#define RUM_ACK_SIZE        14        /* 10 + 4(FCS) */
#define RUM_CTS_SIZE        14        /* 10 + 4(FCS) */

static void
rum_txeof(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct rum_tx_data *data = priv;
        struct rum_softc *sc = data->sc;
        struct ifnet *ifp = &sc->sc_if;
        int s;

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;

                printf("%s: could not transmit buffer: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(status));

                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->sc_tx_pipeh);

                if_statinc(ifp, if_oerrors);
                return;
        }

        s = splnet();

        ieee80211_free_node(data->ni);
        data->ni = NULL;

        sc->tx_queued--;
        if_statinc(ifp, if_opackets);

        DPRINTFN(10, ("tx done\n"));

        sc->sc_tx_timer = 0;
        ifp->if_flags &= ~IFF_OACTIVE;
        rum_start(ifp);

        splx(s);
}

static void
rum_rxeof(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct rum_rx_data *data = priv;
        struct rum_softc *sc = data->sc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        struct rum_rx_desc *desc;
        struct ieee80211_frame *wh;
        struct ieee80211_node *ni;
        struct mbuf *mnew, *m;
        int s, len;

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;

                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->sc_rx_pipeh);
                goto skip;
        }

        usbd_get_xfer_status(xfer, NULL, NULL, &len, NULL);

        if (len < (int)(RT2573_RX_DESC_SIZE +
                        sizeof(struct ieee80211_frame_min))) {
                DPRINTF(("%s: xfer too short %d\n", device_xname(sc->sc_dev),
                    len));
                if_statinc(ifp, if_ierrors);
                goto skip;
        }

        desc = (struct rum_rx_desc *)data->buf;

        if (le32toh(desc->flags) & RT2573_RX_CRC_ERROR) {
                /*
                 * This should not happen since we did not request to receive
                 * those frames when we filled RT2573_TXRX_CSR0.
                 */
                DPRINTFN(5, ("CRC error\n"));
                if_statinc(ifp, if_ierrors);
                goto skip;
        }

        MGETHDR(mnew, M_DONTWAIT, MT_DATA);
        if (mnew == NULL) {
                printf("%s: could not allocate rx mbuf\n",
                    device_xname(sc->sc_dev));
                if_statinc(ifp, if_ierrors);
                goto skip;
        }

        MCLGET(mnew, M_DONTWAIT);
        if (!(mnew->m_flags & M_EXT)) {
                printf("%s: could not allocate rx mbuf cluster\n",
                    device_xname(sc->sc_dev));
                m_freem(mnew);
                if_statinc(ifp, if_ierrors);
                goto skip;
        }

        m = data->m;
        data->m = mnew;
        data->buf = mtod(data->m, uint8_t *);

        /* finalize mbuf */
        m_set_rcvif(m, ifp);
        m->m_data = (void *)(desc + 1);
        m->m_pkthdr.len = m->m_len = (le32toh(desc->flags) >> 16) & 0xfff;

        s = splnet();

        if (sc->sc_drvbpf != NULL) {
                struct rum_rx_radiotap_header *tap = &sc->sc_rxtap;

                tap->wr_flags = IEEE80211_RADIOTAP_F_FCS;
                tap->wr_rate = rum_rxrate(desc);
                tap->wr_chan_freq = htole16(ic->ic_curchan->ic_freq);
                tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags);
                tap->wr_antenna = sc->rx_ant;
                tap->wr_antsignal = desc->rssi;

                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_rxtap_len, m, BPF_D_IN);
        }

        wh = mtod(m, struct ieee80211_frame *);
        ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh);

        /* send the frame to the 802.11 layer */
        ieee80211_input(ic, m, ni, desc->rssi, 0);

        /* node is no longer needed */
        ieee80211_free_node(ni);

        splx(s);

        DPRINTFN(15, ("rx done\n"));

skip:        /* setup a new transfer */
        usbd_setup_xfer(xfer, data, data->buf, MCLBYTES, USBD_SHORT_XFER_OK,
            USBD_NO_TIMEOUT, rum_rxeof);
        usbd_transfer(xfer);
}

/*
 * This function is only used by the Rx radiotap code. It returns the rate at
 * which a given frame was received.
 */
static uint8_t
rum_rxrate(const struct rum_rx_desc *desc)
{
        if (le32toh(desc->flags) & RT2573_RX_OFDM) {
                /* reverse function of rum_plcp_signal */
                switch (desc->rate) {
                case 0xb:        return 12;
                case 0xf:        return 18;
                case 0xa:        return 24;
                case 0xe:        return 36;
                case 0x9:        return 48;
                case 0xd:        return 72;
                case 0x8:        return 96;
                case 0xc:        return 108;
                }
        } else {
                if (desc->rate == 10)
                        return 2;
                if (desc->rate == 20)
                        return 4;
                if (desc->rate == 55)
                        return 11;
                if (desc->rate == 110)
                        return 22;
        }
        return 2;        /* should not get there */
}

/*
 * Return the expected ack rate for a frame transmitted at rate `rate'.
 * XXX: this should depend on the destination node basic rate set.
 */
static int
rum_ack_rate(struct ieee80211com *ic, int rate)
{
        switch (rate) {
        /* CCK rates */
        case 2:
                return 2;
        case 4:
        case 11:
        case 22:
                return (ic->ic_curmode == IEEE80211_MODE_11B) ? 4 : rate;

        /* OFDM rates */
        case 12:
        case 18:
                return 12;
        case 24:
        case 36:
                return 24;
        case 48:
        case 72:
        case 96:
        case 108:
                return 48;
        }

        /* default to 1Mbps */
        return 2;
}

/*
 * Compute the duration (in us) needed to transmit `len' bytes at rate `rate'.
 * The function automatically determines the operating mode depending on the
 * given rate. `flags' indicates whether short preamble is in use or not.
 */
static uint16_t
rum_txtime(int len, int rate, uint32_t flags)
{
        uint16_t txtime;

        if (RUM_RATE_IS_OFDM(rate)) {
                /* IEEE Std 802.11a-1999, pp. 37 */
                txtime = (8 + 4 * len + 3 + rate - 1) / rate;
                txtime = 16 + 4 + 4 * txtime + 6;
        } else {
                /* IEEE Std 802.11b-1999, pp. 28 */
                txtime = (16 * len + rate - 1) / rate;
                if (rate != 2 && (flags & IEEE80211_F_SHPREAMBLE))
                        txtime +=  72 + 24;
                else
                        txtime += 144 + 48;
        }
        return txtime;
}

static uint8_t
rum_plcp_signal(int rate)
{
        switch (rate) {
        /* CCK rates (returned values are device-dependent) */
        case 2:                return 0x0;
        case 4:                return 0x1;
        case 11:        return 0x2;
        case 22:        return 0x3;

        /* OFDM rates (cf IEEE Std 802.11a-1999, pp. 14 Table 80) */
        case 12:        return 0xb;
        case 18:        return 0xf;
        case 24:        return 0xa;
        case 36:        return 0xe;
        case 48:        return 0x9;
        case 72:        return 0xd;
        case 96:        return 0x8;
        case 108:        return 0xc;

        /* unsupported rates (should not get there) */
        default:        return 0xff;
        }
}

static void
rum_setup_tx_desc(struct rum_softc *sc, struct rum_tx_desc *desc,
    uint32_t flags, uint16_t xflags, int len, int rate)
{
        struct ieee80211com *ic = &sc->sc_ic;
        uint16_t plcp_length;
        int remainder;

        desc->flags = htole32(flags);
        desc->flags |= htole32(RT2573_TX_VALID);
        desc->flags |= htole32(len << 16);

        desc->xflags = htole16(xflags);

        desc->wme = htole16(
            RT2573_QID(0) |
            RT2573_AIFSN(2) |
            RT2573_LOGCWMIN(4) |
            RT2573_LOGCWMAX(10));

        /* setup PLCP fields */
        desc->plcp_signal  = rum_plcp_signal(rate);
        desc->plcp_service = 4;

        len += IEEE80211_CRC_LEN;
        if (RUM_RATE_IS_OFDM(rate)) {
                desc->flags |= htole32(RT2573_TX_OFDM);

                plcp_length = len & 0xfff;
                desc->plcp_length_hi = plcp_length >> 6;
                desc->plcp_length_lo = plcp_length & 0x3f;
        } else {
                plcp_length = (16 * len + rate - 1) / rate;
                if (rate == 22) {
                        remainder = (16 * len) % 22;
                        if (remainder != 0 && remainder < 7)
                                desc->plcp_service |= RT2573_PLCP_LENGEXT;
                }
                desc->plcp_length_hi = plcp_length >> 8;
                desc->plcp_length_lo = plcp_length & 0xff;

                if (rate != 2 && (ic->ic_flags & IEEE80211_F_SHPREAMBLE))
                        desc->plcp_signal |= 0x08;
        }
}

#define RUM_TX_TIMEOUT        5000

static int
rum_tx_data(struct rum_softc *sc, struct mbuf *m0, struct ieee80211_node *ni)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct rum_tx_desc *desc;
        struct rum_tx_data *data;
        struct ieee80211_frame *wh;
        struct ieee80211_key *k;
        uint32_t flags = 0;
        uint16_t dur;
        usbd_status error;
        int rate, xferlen, pktlen, needrts = 0, needcts = 0;

        wh = mtod(m0, struct ieee80211_frame *);

        if (wh->i_fc[1] & IEEE80211_FC1_WEP) {
                k = ieee80211_crypto_encap(ic, ni, m0);
                if (k == NULL) {
                        m_freem(m0);
                        return ENOBUFS;
                }

                /* packet header may have moved, reset our local pointer */
                wh = mtod(m0, struct ieee80211_frame *);
        }

        /* compute actual packet length (including CRC and crypto overhead) */
        pktlen = m0->m_pkthdr.len + IEEE80211_CRC_LEN;

        /* pickup a rate */
        if (IEEE80211_IS_MULTICAST(wh->i_addr1) ||
            ((wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) ==
             IEEE80211_FC0_TYPE_MGT)) {
                /* mgmt/multicast frames are sent at the lowest avail. rate */
                rate = ni->ni_rates.rs_rates[0];
        } else if (ic->ic_fixed_rate != IEEE80211_FIXED_RATE_NONE) {
                rate = ic->ic_bss->ni_rates.rs_rates[ic->ic_fixed_rate];
        } else
                rate = ni->ni_rates.rs_rates[ni->ni_txrate];
        if (rate == 0)
                rate = 2;        /* XXX should not happen */
        rate &= IEEE80211_RATE_VAL;

        /* check if RTS/CTS or CTS-to-self protection must be used */
        if (!IEEE80211_IS_MULTICAST(wh->i_addr1)) {
                /* multicast frames are not sent at OFDM rates in 802.11b/g */
                if (pktlen > ic->ic_rtsthreshold) {
                        needrts = 1;        /* RTS/CTS based on frame length */
                } else if ((ic->ic_flags & IEEE80211_F_USEPROT) &&
                    RUM_RATE_IS_OFDM(rate)) {
                        if (ic->ic_protmode == IEEE80211_PROT_CTSONLY)
                                needcts = 1;        /* CTS-to-self */
                        else if (ic->ic_protmode == IEEE80211_PROT_RTSCTS)
                                needrts = 1;        /* RTS/CTS */
                }
        }
        if (needrts || needcts) {
                struct mbuf *mprot;
                int protrate, ackrate;

                protrate = IEEE80211_IS_CHAN_5GHZ(ni->ni_chan) ? 12 : 2;
                ackrate  = rum_ack_rate(ic, rate);

                dur = rum_txtime(pktlen, rate, ic->ic_flags) +
                      rum_txtime(RUM_ACK_SIZE, ackrate, ic->ic_flags) +
                      2 * sc->sifs;
                if (needrts) {
                        dur += rum_txtime(RUM_CTS_SIZE, rum_ack_rate(ic,
                            protrate), ic->ic_flags) + sc->sifs;
                        mprot = ieee80211_get_rts(ic, wh, dur);
                } else {
                        mprot = ieee80211_get_cts_to_self(ic, dur);
                }
                if (mprot == NULL) {
                        aprint_error_dev(sc->sc_dev,
                            "couldn't allocate protection frame\n");
                        m_freem(m0);
                        return ENOBUFS;
                }

                data = &sc->tx_data[sc->tx_cur];
                desc = (struct rum_tx_desc *)data->buf;

                /* avoid multiple free() of the same node for each fragment */
                data->ni = ieee80211_ref_node(ni);

                m_copydata(mprot, 0, mprot->m_pkthdr.len,
                    data->buf + RT2573_TX_DESC_SIZE);
                rum_setup_tx_desc(sc, desc,
                    (needrts ? RT2573_TX_NEED_ACK : 0) | RT2573_TX_MORE_FRAG,
                    0, mprot->m_pkthdr.len, protrate);

                /* no roundup necessary here */
                xferlen = RT2573_TX_DESC_SIZE + mprot->m_pkthdr.len;

                /* XXX may want to pass the protection frame to BPF */

                /* mbuf is no longer needed */
                m_freem(mprot);

                usbd_setup_xfer(data->xfer, data, data->buf,
                    xferlen, USBD_FORCE_SHORT_XFER,
                    RUM_TX_TIMEOUT, rum_txeof);
                error = usbd_transfer(data->xfer);
                if (error != USBD_NORMAL_COMPLETION &&
                    error != USBD_IN_PROGRESS) {
                        m_freem(m0);
                        return error;
                }

                sc->tx_queued++;
                sc->tx_cur = (sc->tx_cur + 1) % RUM_TX_LIST_COUNT;

                flags |= RT2573_TX_LONG_RETRY | RT2573_TX_IFS_SIFS;
        }

        data = &sc->tx_data[sc->tx_cur];
        desc = (struct rum_tx_desc *)data->buf;

        data->ni = ni;

        if (!IEEE80211_IS_MULTICAST(wh->i_addr1)) {
                flags |= RT2573_TX_NEED_ACK;

                dur = rum_txtime(RUM_ACK_SIZE, rum_ack_rate(ic, rate),
                    ic->ic_flags) + sc->sifs;
                *(uint16_t *)wh->i_dur = htole16(dur);

                /* tell hardware to set timestamp in probe responses */
                if ((wh->i_fc[0] &
                    (IEEE80211_FC0_TYPE_MASK | IEEE80211_FC0_SUBTYPE_MASK)) ==
                    (IEEE80211_FC0_TYPE_MGT | IEEE80211_FC0_SUBTYPE_PROBE_RESP))
                        flags |= RT2573_TX_TIMESTAMP;
        }

        if (sc->sc_drvbpf != NULL) {
                struct rum_tx_radiotap_header *tap = &sc->sc_txtap;

                tap->wt_flags = 0;
                tap->wt_rate = rate;
                tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq);
                tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags);
                tap->wt_antenna = sc->tx_ant;

                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_txtap_len, m0, BPF_D_OUT);
        }

        m_copydata(m0, 0, m0->m_pkthdr.len, data->buf + RT2573_TX_DESC_SIZE);
        rum_setup_tx_desc(sc, desc, flags, 0, m0->m_pkthdr.len, rate);

        /* align end on a 4-bytes boundary */
        xferlen = (RT2573_TX_DESC_SIZE + m0->m_pkthdr.len + 3) & ~3;

        /*
         * No space left in the last URB to store the extra 4 bytes, force
         * sending of another URB.
         */
        if ((xferlen % 64) == 0)
                xferlen += 4;

        DPRINTFN(10, ("sending data frame len=%zu rate=%u xfer len=%u\n",
            (size_t)m0->m_pkthdr.len + RT2573_TX_DESC_SIZE,
            rate, xferlen));

        /* mbuf is no longer needed */
        m_freem(m0);

        usbd_setup_xfer(data->xfer, data, data->buf, xferlen,
            USBD_FORCE_SHORT_XFER, RUM_TX_TIMEOUT, rum_txeof);
        error = usbd_transfer(data->xfer);
        if (error != USBD_NORMAL_COMPLETION && error != USBD_IN_PROGRESS)
                return error;

        sc->tx_queued++;
        sc->tx_cur = (sc->tx_cur + 1) % RUM_TX_LIST_COUNT;

        return 0;
}

static void
rum_start(struct ifnet *ifp)
{
        struct rum_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ether_header *eh;
        struct ieee80211_node *ni;
        struct mbuf *m0;

        if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
                return;

        for (;;) {
                IF_POLL(&ic->ic_mgtq, m0);
                if (m0 != NULL) {
                        if (sc->tx_queued >= RUM_TX_LIST_COUNT - 1) {
                                ifp->if_flags |= IFF_OACTIVE;
                                break;
                        }
                        IF_DEQUEUE(&ic->ic_mgtq, m0);

                        ni = M_GETCTX(m0, struct ieee80211_node *);
                        M_CLEARCTX(m0);
                        bpf_mtap3(ic->ic_rawbpf, m0, BPF_D_OUT);
                        if (rum_tx_data(sc, m0, ni) != 0)
                                break;

                } else {
                        if (ic->ic_state != IEEE80211_S_RUN)
                                break;
                        IFQ_POLL(&ifp->if_snd, m0);
                        if (m0 == NULL)
                                break;
                        if (sc->tx_queued >= RUM_TX_LIST_COUNT - 1) {
                                ifp->if_flags |= IFF_OACTIVE;
                                break;
                        }
                        IFQ_DEQUEUE(&ifp->if_snd, m0);
                        if (m0->m_len < (int)sizeof(struct ether_header) &&
                            !(m0 = m_pullup(m0, sizeof(struct ether_header))))
                                continue;

                        eh = mtod(m0, struct ether_header *);
                        ni = ieee80211_find_txnode(ic, eh->ether_dhost);
                        if (ni == NULL) {
                                m_freem(m0);
                                continue;
                        }
                        bpf_mtap(ifp, m0, BPF_D_OUT);
                        m0 = ieee80211_encap(ic, m0, ni);
                        if (m0 == NULL) {
                                ieee80211_free_node(ni);
                                continue;
                        }
                        bpf_mtap3(ic->ic_rawbpf, m0, BPF_D_OUT);
                        if (rum_tx_data(sc, m0, ni) != 0) {
                                ieee80211_free_node(ni);
                                if_statinc(ifp, if_oerrors);
                                break;
                        }
                }

                sc->sc_tx_timer = 5;
                ifp->if_timer = 1;
        }
}

static void
rum_watchdog(struct ifnet *ifp)
{
        struct rum_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;

        ifp->if_timer = 0;

        if (sc->sc_tx_timer > 0) {
                if (--sc->sc_tx_timer == 0) {
                        printf("%s: device timeout\n", device_xname(sc->sc_dev));
                        /*rum_init(ifp); XXX needs a process context! */
                        if_statinc(ifp, if_oerrors);
                        return;
                }
                ifp->if_timer = 1;
        }

        ieee80211_watchdog(ic);
}

static int
rum_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
#define IS_RUNNING(ifp) \
        (((ifp)->if_flags & IFF_UP) && ((ifp)->if_flags & IFF_RUNNING))

        struct rum_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        int s, error = 0;

        s = splnet();

        switch (cmd) {
        case SIOCSIFFLAGS:
                if ((error = ifioctl_common(ifp, cmd, data)) != 0)
                        break;
                switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) {
                case IFF_UP|IFF_RUNNING:
                        rum_update_promisc(sc);
                        break;
                case IFF_UP:
                        rum_init(ifp);
                        break;
                case IFF_RUNNING:
                        rum_stop(ifp, 1);
                        break;
                case 0:
                        break;
                }
                break;

        case SIOCADDMULTI:
        case SIOCDELMULTI:
                if ((error = ether_ioctl(ifp, cmd, data)) == ENETRESET) {
                        error = 0;
                }
                break;

        default:
                error = ieee80211_ioctl(ic, cmd, data);
        }

        if (error == ENETRESET) {
                if (IS_RUNNING(ifp) &&
                        (ic->ic_roaming != IEEE80211_ROAMING_MANUAL))
                        rum_init(ifp);
                error = 0;
        }

        splx(s);

        return error;
#undef IS_RUNNING
}

static void
rum_eeprom_read(struct rum_softc *sc, uint16_t addr, void *buf, int len)
{
        usb_device_request_t req;
        usbd_status error;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = RT2573_READ_EEPROM;
        USETW(req.wValue, 0);
        USETW(req.wIndex, addr);
        USETW(req.wLength, len);

        error = usbd_do_request(sc->sc_udev, &req, buf);
        if (error != 0) {
                printf("%s: could not read EEPROM: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
                memset(buf, 0, len);
        }
}

static uint32_t
rum_read(struct rum_softc *sc, uint16_t reg)
{
        uint32_t val;

        rum_read_multi(sc, reg, &val, sizeof(val));

        return le32toh(val);
}

static void
rum_read_multi(struct rum_softc *sc, uint16_t reg, void *buf, int len)
{
        usb_device_request_t req;
        usbd_status error;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = RT2573_READ_MULTI_MAC;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, len);

        error = usbd_do_request(sc->sc_udev, &req, buf);
        if (error != 0) {
                printf("%s: could not multi read MAC register: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
                memset(buf, 0, len);
        }
}

static void
rum_write(struct rum_softc *sc, uint16_t reg, uint32_t val)
{
        uint32_t tmp = htole32(val);

        rum_write_multi(sc, reg, &tmp, sizeof(tmp));
}

static void
rum_write_multi(struct rum_softc *sc, uint16_t reg, void *buf, size_t len)
{
        usb_device_request_t req;
        usbd_status error;
        int offset;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = RT2573_WRITE_MULTI_MAC;
        USETW(req.wValue, 0);

        /* write at most 64 bytes at a time */
        for (offset = 0; offset < len; offset += 64) {
                USETW(req.wIndex, reg + offset);
                USETW(req.wLength, MIN(len - offset, 64));

                error = usbd_do_request(sc->sc_udev, &req, (char *)buf + offset);
                if (error != 0) {
                        printf("%s: could not multi write MAC register: %s\n",
                            device_xname(sc->sc_dev), usbd_errstr(error));
                }
        }
}

static void
rum_bbp_write(struct rum_softc *sc, uint8_t reg, uint8_t val)
{
        uint32_t tmp;
        int ntries;

        for (ntries = 0; ntries < 5; ntries++) {
                if (!(rum_read(sc, RT2573_PHY_CSR3) & RT2573_BBP_BUSY))
                        break;
        }
        if (ntries == 5) {
                printf("%s: could not write to BBP\n", device_xname(sc->sc_dev));
                return;
        }

        tmp = RT2573_BBP_BUSY | (reg & 0x7f) << 8 | val;
        rum_write(sc, RT2573_PHY_CSR3, tmp);
}

static uint8_t
rum_bbp_read(struct rum_softc *sc, uint8_t reg)
{
        uint32_t val;
        int ntries;

        for (ntries = 0; ntries < 5; ntries++) {
                if (!(rum_read(sc, RT2573_PHY_CSR3) & RT2573_BBP_BUSY))
                        break;
        }
        if (ntries == 5) {
                printf("%s: could not read BBP\n", device_xname(sc->sc_dev));
                return 0;
        }

        val = RT2573_BBP_BUSY | RT2573_BBP_READ | reg << 8;
        rum_write(sc, RT2573_PHY_CSR3, val);

        for (ntries = 0; ntries < 100; ntries++) {
                val = rum_read(sc, RT2573_PHY_CSR3);
                if (!(val & RT2573_BBP_BUSY))
                        return val & 0xff;
                DELAY(1);
        }

        printf("%s: could not read BBP\n", device_xname(sc->sc_dev));
        return 0;
}

static void
rum_rf_write(struct rum_softc *sc, uint8_t reg, uint32_t val)
{
        uint32_t tmp;
        int ntries;

        for (ntries = 0; ntries < 5; ntries++) {
                if (!(rum_read(sc, RT2573_PHY_CSR4) & RT2573_RF_BUSY))
                        break;
        }
        if (ntries == 5) {
                printf("%s: could not write to RF\n", device_xname(sc->sc_dev));
                return;
        }

        tmp = RT2573_RF_BUSY | RT2573_RF_20BIT | (val & 0xfffff) << 2 |
            (reg & 3);
        rum_write(sc, RT2573_PHY_CSR4, tmp);

        /* remember last written value in sc */
        sc->rf_regs[reg] = val;

        DPRINTFN(15, ("RF R[%u] <- 0x%05x\n", reg & 3, val & 0xfffff));
}

static void
rum_select_antenna(struct rum_softc *sc)
{
        uint8_t bbp4, bbp77;
        uint32_t tmp;

        bbp4  = rum_bbp_read(sc, 4);
        bbp77 = rum_bbp_read(sc, 77);

        /* TBD */

        /* make sure Rx is disabled before switching antenna */
        tmp = rum_read(sc, RT2573_TXRX_CSR0);
        rum_write(sc, RT2573_TXRX_CSR0, tmp | RT2573_DISABLE_RX);

        rum_bbp_write(sc,  4, bbp4);
        rum_bbp_write(sc, 77, bbp77);

        rum_write(sc, RT2573_TXRX_CSR0, tmp);
}

/*
 * Enable multi-rate retries for frames sent at OFDM rates.
 * In 802.11b/g mode, allow fallback to CCK rates.
 */
static void
rum_enable_mrr(struct rum_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        uint32_t tmp;

        tmp = rum_read(sc, RT2573_TXRX_CSR4);

        tmp &= ~RT2573_MRR_CCK_FALLBACK;
        if (!IEEE80211_IS_CHAN_5GHZ(ic->ic_curchan))
                tmp |= RT2573_MRR_CCK_FALLBACK;
        tmp |= RT2573_MRR_ENABLED;

        rum_write(sc, RT2573_TXRX_CSR4, tmp);
}

static void
rum_set_txpreamble(struct rum_softc *sc)
{
        uint32_t tmp;

        tmp = rum_read(sc, RT2573_TXRX_CSR4);

        tmp &= ~RT2573_SHORT_PREAMBLE;
        if (sc->sc_ic.ic_flags & IEEE80211_F_SHPREAMBLE)
                tmp |= RT2573_SHORT_PREAMBLE;

        rum_write(sc, RT2573_TXRX_CSR4, tmp);
}

static void
rum_set_basicrates(struct rum_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;

        /* update basic rate set */
        if (ic->ic_curmode == IEEE80211_MODE_11B) {
                /* 11b basic rates: 1, 2Mbps */
                rum_write(sc, RT2573_TXRX_CSR5, 0x3);
        } else if (ic->ic_curmode == IEEE80211_MODE_11A) {
                /* 11a basic rates: 6, 12, 24Mbps */
                rum_write(sc, RT2573_TXRX_CSR5, 0x150);
        } else {
                /* 11b/g basic rates: 1, 2, 5.5, 11Mbps */
                rum_write(sc, RT2573_TXRX_CSR5, 0xf);
        }
}

/*
 * Reprogram MAC/BBP to switch to a new band.  Values taken from the reference
 * driver.
 */
static void
rum_select_band(struct rum_softc *sc, struct ieee80211_channel *c)
{
        uint8_t bbp17, bbp35, bbp96, bbp97, bbp98, bbp104;
        uint32_t tmp;

        /* update all BBP registers that depend on the band */
        bbp17 = 0x20; bbp96 = 0x48; bbp104 = 0x2c;
        bbp35 = 0x50; bbp97 = 0x48; bbp98  = 0x48;
        if (IEEE80211_IS_CHAN_5GHZ(c)) {
                bbp17 += 0x08; bbp96 += 0x10; bbp104 += 0x0c;
                bbp35 += 0x10; bbp97 += 0x10; bbp98  += 0x10;
        }
        if ((IEEE80211_IS_CHAN_2GHZ(c) && sc->ext_2ghz_lna) ||
            (IEEE80211_IS_CHAN_5GHZ(c) && sc->ext_5ghz_lna)) {
                bbp17 += 0x10; bbp96 += 0x10; bbp104 += 0x10;
        }

        sc->bbp17 = bbp17;
        rum_bbp_write(sc,  17, bbp17);
        rum_bbp_write(sc,  96, bbp96);
        rum_bbp_write(sc, 104, bbp104);

        if ((IEEE80211_IS_CHAN_2GHZ(c) && sc->ext_2ghz_lna) ||
            (IEEE80211_IS_CHAN_5GHZ(c) && sc->ext_5ghz_lna)) {
                rum_bbp_write(sc, 75, 0x80);
                rum_bbp_write(sc, 86, 0x80);
                rum_bbp_write(sc, 88, 0x80);
        }

        rum_bbp_write(sc, 35, bbp35);
        rum_bbp_write(sc, 97, bbp97);
        rum_bbp_write(sc, 98, bbp98);

        tmp = rum_read(sc, RT2573_PHY_CSR0);
        tmp &= ~(RT2573_PA_PE_2GHZ | RT2573_PA_PE_5GHZ);
        if (IEEE80211_IS_CHAN_2GHZ(c))
                tmp |= RT2573_PA_PE_2GHZ;
        else
                tmp |= RT2573_PA_PE_5GHZ;
        rum_write(sc, RT2573_PHY_CSR0, tmp);

        /* 802.11a uses a 16 microseconds short interframe space */
        sc->sifs = IEEE80211_IS_CHAN_5GHZ(c) ? 16 : 10;
}

static void
rum_set_chan(struct rum_softc *sc, struct ieee80211_channel *c)
{
        struct ieee80211com *ic = &sc->sc_ic;
        const struct rfprog *rfprog;
        uint8_t bbp3, bbp94 = RT2573_BBPR94_DEFAULT;
        int8_t power;
        u_int i, chan;

        chan = ieee80211_chan2ieee(ic, c);
        if (chan == 0 || chan == IEEE80211_CHAN_ANY)
                return;

        /* select the appropriate RF settings based on what EEPROM says */
        rfprog = (sc->rf_rev == RT2573_RF_5225 ||
                  sc->rf_rev == RT2573_RF_2527) ? rum_rf5225 : rum_rf5226;

        /* find the settings for this channel (we know it exists) */
        for (i = 0; rfprog[i].chan != chan; i++);

        power = sc->txpow[i];
        if (power < 0) {
                bbp94 += power;
                power = 0;
        } else if (power > 31) {
                bbp94 += power - 31;
                power = 31;
        }

        /*
         * If we are switching from the 2GHz band to the 5GHz band or
         * vice-versa, BBP registers need to be reprogrammed.
         */
        if (c->ic_flags != ic->ic_curchan->ic_flags) {
                rum_select_band(sc, c);
                rum_select_antenna(sc);
        }
        ic->ic_curchan = c;

        rum_rf_write(sc, RT2573_RF1, rfprog[i].r1);
        rum_rf_write(sc, RT2573_RF2, rfprog[i].r2);
        rum_rf_write(sc, RT2573_RF3, rfprog[i].r3 | power << 7);
        rum_rf_write(sc, RT2573_RF4, rfprog[i].r4 | sc->rffreq << 10);

        rum_rf_write(sc, RT2573_RF1, rfprog[i].r1);
        rum_rf_write(sc, RT2573_RF2, rfprog[i].r2);
        rum_rf_write(sc, RT2573_RF3, rfprog[i].r3 | power << 7 | 1);
        rum_rf_write(sc, RT2573_RF4, rfprog[i].r4 | sc->rffreq << 10);

        rum_rf_write(sc, RT2573_RF1, rfprog[i].r1);
        rum_rf_write(sc, RT2573_RF2, rfprog[i].r2);
        rum_rf_write(sc, RT2573_RF3, rfprog[i].r3 | power << 7);
        rum_rf_write(sc, RT2573_RF4, rfprog[i].r4 | sc->rffreq << 10);

        DELAY(10);

        /* enable smart mode for MIMO-capable RFs */
        bbp3 = rum_bbp_read(sc, 3);

        bbp3 &= ~RT2573_SMART_MODE;
        if (sc->rf_rev == RT2573_RF_5225 || sc->rf_rev == RT2573_RF_2527)
                bbp3 |= RT2573_SMART_MODE;

        rum_bbp_write(sc, 3, bbp3);

        if (bbp94 != RT2573_BBPR94_DEFAULT)
                rum_bbp_write(sc, 94, bbp94);
}

/*
 * Enable TSF synchronization and tell h/w to start sending beacons for IBSS
 * and HostAP operating modes.
 */
static void
rum_enable_tsf_sync(struct rum_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        uint32_t tmp;

        if (ic->ic_opmode != IEEE80211_M_STA) {
                /*
                 * Change default 16ms TBTT adjustment to 8ms.
                 * Must be done before enabling beacon generation.
                 */
                rum_write(sc, RT2573_TXRX_CSR10, 1 << 12 | 8);
        }

        tmp = rum_read(sc, RT2573_TXRX_CSR9) & 0xff000000;

        /* set beacon interval (in 1/16ms unit) */
        tmp |= ic->ic_bss->ni_intval * 16;

        tmp |= RT2573_TSF_TICKING | RT2573_ENABLE_TBTT;
        if (ic->ic_opmode == IEEE80211_M_STA)
                tmp |= RT2573_TSF_MODE(1);
        else
                tmp |= RT2573_TSF_MODE(2) | RT2573_GENERATE_BEACON;

        rum_write(sc, RT2573_TXRX_CSR9, tmp);
}

static void
rum_update_slot(struct rum_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        uint8_t slottime;
        uint32_t tmp;

        slottime = (ic->ic_flags & IEEE80211_F_SHSLOT) ? 9 : 20;

        tmp = rum_read(sc, RT2573_MAC_CSR9);
        tmp = (tmp & ~0xff) | slottime;
        rum_write(sc, RT2573_MAC_CSR9, tmp);

        DPRINTF(("setting slot time to %uus\n", slottime));
}

static void
rum_set_bssid(struct rum_softc *sc, const uint8_t *bssid)
{
        uint32_t tmp;

        tmp = bssid[0] | bssid[1] << 8 | bssid[2] << 16 | bssid[3] << 24;
        rum_write(sc, RT2573_MAC_CSR4, tmp);

        tmp = bssid[4] | bssid[5] << 8 | RT2573_ONE_BSSID << 16;
        rum_write(sc, RT2573_MAC_CSR5, tmp);
}

static void
rum_set_macaddr(struct rum_softc *sc, const uint8_t *addr)
{
        uint32_t tmp;

        tmp = addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24;
        rum_write(sc, RT2573_MAC_CSR2, tmp);

        tmp = addr[4] | addr[5] << 8 | 0xff << 16;
        rum_write(sc, RT2573_MAC_CSR3, tmp);
}

static void
rum_update_promisc(struct rum_softc *sc)
{
        struct ifnet *ifp = sc->sc_ic.ic_ifp;
        uint32_t tmp;

        tmp = rum_read(sc, RT2573_TXRX_CSR0);

        tmp &= ~RT2573_DROP_NOT_TO_ME;
        if (!(ifp->if_flags & IFF_PROMISC))
                tmp |= RT2573_DROP_NOT_TO_ME;

        rum_write(sc, RT2573_TXRX_CSR0, tmp);

        DPRINTF(("%s promiscuous mode\n", (ifp->if_flags & IFF_PROMISC) ?
            "entering" : "leaving"));
}

static const char *
rum_get_rf(int rev)
{
        switch (rev) {
        case RT2573_RF_2527:        return "RT2527 (MIMO XR)";
        case RT2573_RF_2528:        return "RT2528";
        case RT2573_RF_5225:        return "RT5225 (MIMO XR)";
        case RT2573_RF_5226:        return "RT5226";
        default:                return "unknown";
        }
}

static void
rum_read_eeprom(struct rum_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        uint16_t val;
#ifdef RUM_DEBUG
        int i;
#endif

        /* read MAC/BBP type */
        rum_eeprom_read(sc, RT2573_EEPROM_MACBBP, &val, 2);
        sc->macbbp_rev = le16toh(val);

        /* read MAC address */
        rum_eeprom_read(sc, RT2573_EEPROM_ADDRESS, ic->ic_myaddr, 6);

        rum_eeprom_read(sc, RT2573_EEPROM_ANTENNA, &val, 2);
        val = le16toh(val);
        sc->rf_rev =   (val >> 11) & 0x1f;
        sc->hw_radio = (val >> 10) & 0x1;
        sc->rx_ant =   (val >> 4)  & 0x3;
        sc->tx_ant =   (val >> 2)  & 0x3;
        sc->nb_ant =   val & 0x3;

        DPRINTF(("RF revision=%d\n", sc->rf_rev));

        rum_eeprom_read(sc, RT2573_EEPROM_CONFIG2, &val, 2);
        val = le16toh(val);
        sc->ext_5ghz_lna = (val >> 6) & 0x1;
        sc->ext_2ghz_lna = (val >> 4) & 0x1;

        DPRINTF(("External 2GHz LNA=%d\nExternal 5GHz LNA=%d\n",
            sc->ext_2ghz_lna, sc->ext_5ghz_lna));

        rum_eeprom_read(sc, RT2573_EEPROM_RSSI_2GHZ_OFFSET, &val, 2);
        val = le16toh(val);
        if ((val & 0xff) != 0xff)
                sc->rssi_2ghz_corr = (int8_t)(val & 0xff);        /* signed */

        rum_eeprom_read(sc, RT2573_EEPROM_RSSI_5GHZ_OFFSET, &val, 2);
        val = le16toh(val);
        if ((val & 0xff) != 0xff)
                sc->rssi_5ghz_corr = (int8_t)(val & 0xff);        /* signed */

        DPRINTF(("RSSI 2GHz corr=%d\nRSSI 5GHz corr=%d\n",
            sc->rssi_2ghz_corr, sc->rssi_5ghz_corr));

        rum_eeprom_read(sc, RT2573_EEPROM_FREQ_OFFSET, &val, 2);
        val = le16toh(val);
        if ((val & 0xff) != 0xff)
                sc->rffreq = val & 0xff;

        DPRINTF(("RF freq=%d\n", sc->rffreq));

        /* read Tx power for all a/b/g channels */
        rum_eeprom_read(sc, RT2573_EEPROM_TXPOWER, sc->txpow, 14);
        /* XXX default Tx power for 802.11a channels */
        memset(sc->txpow + 14, 24, sizeof(sc->txpow) - 14);
#ifdef RUM_DEBUG
        for (i = 0; i < 14; i++)
                DPRINTF(("Channel=%d Tx power=%d\n", i + 1,  sc->txpow[i]));
#endif

        /* read default values for BBP registers */
        rum_eeprom_read(sc, RT2573_EEPROM_BBP_BASE, sc->bbp_prom, 2 * 16);
#ifdef RUM_DEBUG
        for (i = 0; i < 14; i++) {
                if (sc->bbp_prom[i].reg == 0 || sc->bbp_prom[i].reg == 0xff)
                        continue;
                DPRINTF(("BBP R%d=%02x\n", sc->bbp_prom[i].reg,
                    sc->bbp_prom[i].val));
        }
#endif
}

static int
rum_bbp_init(struct rum_softc *sc)
{
        unsigned int i, ntries;
        uint8_t val;

        /* wait for BBP to be ready */
        for (ntries = 0; ntries < 100; ntries++) {
                val = rum_bbp_read(sc, 0);
                if (val != 0 && val != 0xff)
                        break;
                DELAY(1000);
        }
        if (ntries == 100) {
                printf("%s: timeout waiting for BBP\n",
                    device_xname(sc->sc_dev));
                return EIO;
        }

        /* initialize BBP registers to default values */
        for (i = 0; i < __arraycount(rum_def_bbp); i++)
                rum_bbp_write(sc, rum_def_bbp[i].reg, rum_def_bbp[i].val);

        /* write vendor-specific BBP values (from EEPROM) */
        for (i = 0; i < 16; i++) {
                if (sc->bbp_prom[i].reg == 0 || sc->bbp_prom[i].reg == 0xff)
                        continue;
                rum_bbp_write(sc, sc->bbp_prom[i].reg, sc->bbp_prom[i].val);
        }

        return 0;
}

static int
rum_init(struct ifnet *ifp)
{
        struct rum_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        uint32_t tmp;
        usbd_status error = 0;
        unsigned int i, ntries;

        if ((sc->sc_flags & RT2573_FWLOADED) == 0) {
                if (rum_attachhook(sc))
                        goto fail;
        }

        rum_stop(ifp, 0);

        /* initialize MAC registers to default values */
        for (i = 0; i < __arraycount(rum_def_mac); i++)
                rum_write(sc, rum_def_mac[i].reg, rum_def_mac[i].val);

        /* set host ready */
        rum_write(sc, RT2573_MAC_CSR1, 3);
        rum_write(sc, RT2573_MAC_CSR1, 0);

        /* wait for BBP/RF to wakeup */
        for (ntries = 0; ntries < 1000; ntries++) {
                if (rum_read(sc, RT2573_MAC_CSR12) & 8)
                        break;
                rum_write(sc, RT2573_MAC_CSR12, 4);        /* force wakeup */
                DELAY(1000);
        }
        if (ntries == 1000) {
                printf("%s: timeout waiting for BBP/RF to wakeup\n",
                    device_xname(sc->sc_dev));
                goto fail;
        }

        if ((error = rum_bbp_init(sc)) != 0)
                goto fail;

        /* select default channel */
        rum_select_band(sc, ic->ic_curchan);
        rum_select_antenna(sc);
        rum_set_chan(sc, ic->ic_curchan);

        /* clear STA registers */
        rum_read_multi(sc, RT2573_STA_CSR0, sc->sta, sizeof(sc->sta));

        IEEE80211_ADDR_COPY(ic->ic_myaddr, CLLADDR(ifp->if_sadl));
        rum_set_macaddr(sc, ic->ic_myaddr);

        /* initialize ASIC */
        rum_write(sc, RT2573_MAC_CSR1, 4);

        /*
         * Allocate xfer for AMRR statistics requests.
         */
        struct usbd_pipe *pipe0 = usbd_get_pipe0(sc->sc_udev);
        error = usbd_create_xfer(pipe0, sizeof(sc->sta), 0, 0,
            &sc->amrr_xfer);
        if (error) {
                printf("%s: could not allocate AMRR xfer\n",
                    device_xname(sc->sc_dev));
                goto fail;
        }

        /*
         * Open Tx and Rx USB bulk pipes.
         */
        error = usbd_open_pipe(sc->sc_iface, sc->sc_tx_no, USBD_EXCLUSIVE_USE,
            &sc->sc_tx_pipeh);
        if (error != 0) {
                printf("%s: could not open Tx pipe: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
                goto fail;
        }

        error = usbd_open_pipe(sc->sc_iface, sc->sc_rx_no, USBD_EXCLUSIVE_USE,
            &sc->sc_rx_pipeh);
        if (error != 0) {
                printf("%s: could not open Rx pipe: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
                goto fail;
        }

        /*
         * Allocate Tx and Rx xfer queues.
         */
        error = rum_alloc_tx_list(sc);
        if (error != 0) {
                printf("%s: could not allocate Tx list\n",
                    device_xname(sc->sc_dev));
                goto fail;
        }

        error = rum_alloc_rx_list(sc);
        if (error != 0) {
                printf("%s: could not allocate Rx list\n",
                    device_xname(sc->sc_dev));
                goto fail;
        }

        /*
         * Start up the receive pipe.
         */
        for (i = 0; i < RUM_RX_LIST_COUNT; i++) {
                struct rum_rx_data *data;

                data = &sc->rx_data[i];

                usbd_setup_xfer(data->xfer, data, data->buf, MCLBYTES,
                    USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, rum_rxeof);
                error = usbd_transfer(data->xfer);
                if (error != USBD_NORMAL_COMPLETION &&
                    error != USBD_IN_PROGRESS) {
                        printf("%s: could not queue Rx transfer\n",
                            device_xname(sc->sc_dev));
                        goto fail;
                }
        }

        /* update Rx filter */
        tmp = rum_read(sc, RT2573_TXRX_CSR0) & 0xffff;

        tmp |= RT2573_DROP_PHY_ERROR | RT2573_DROP_CRC_ERROR;
        if (ic->ic_opmode != IEEE80211_M_MONITOR) {
                tmp |= RT2573_DROP_CTL | RT2573_DROP_VER_ERROR |
                       RT2573_DROP_ACKCTS;
                if (ic->ic_opmode != IEEE80211_M_HOSTAP)
                        tmp |= RT2573_DROP_TODS;
                if (!(ifp->if_flags & IFF_PROMISC))
                        tmp |= RT2573_DROP_NOT_TO_ME;
        }
        rum_write(sc, RT2573_TXRX_CSR0, tmp);

        ifp->if_flags &= ~IFF_OACTIVE;
        ifp->if_flags |= IFF_RUNNING;

        if (ic->ic_opmode == IEEE80211_M_MONITOR)
                ieee80211_new_state(ic, IEEE80211_S_RUN, -1);
        else
                ieee80211_new_state(ic, IEEE80211_S_SCAN, -1);

        return 0;

fail:        rum_stop(ifp, 1);
        return error;
}

static void
rum_stop(struct ifnet *ifp, int disable)
{
        struct rum_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        uint32_t tmp;

        ieee80211_new_state(ic, IEEE80211_S_INIT, -1);        /* free all nodes */

        sc->sc_tx_timer = 0;
        ifp->if_timer = 0;
        ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);

        /* disable Rx */
        tmp = rum_read(sc, RT2573_TXRX_CSR0);
        rum_write(sc, RT2573_TXRX_CSR0, tmp | RT2573_DISABLE_RX);

        /* reset ASIC */
        rum_write(sc, RT2573_MAC_CSR1, 3);
        rum_write(sc, RT2573_MAC_CSR1, 0);

        if (sc->amrr_xfer != NULL) {
                usbd_destroy_xfer(sc->amrr_xfer);
                sc->amrr_xfer = NULL;
        }

        if (sc->sc_rx_pipeh != NULL) {
                usbd_abort_pipe(sc->sc_rx_pipeh);
        }

        if (sc->sc_tx_pipeh != NULL) {
                usbd_abort_pipe(sc->sc_tx_pipeh);
        }

        rum_free_rx_list(sc);
        rum_free_tx_list(sc);

        if (sc->sc_rx_pipeh != NULL) {
                usbd_close_pipe(sc->sc_rx_pipeh);
                sc->sc_rx_pipeh = NULL;
        }

        if (sc->sc_tx_pipeh != NULL) {
                usbd_close_pipe(sc->sc_tx_pipeh);
                sc->sc_tx_pipeh = NULL;
        }
}

static int
rum_load_microcode(struct rum_softc *sc, const u_char *ucode, size_t size)
{
        usb_device_request_t req;
        uint16_t reg = RT2573_MCU_CODE_BASE;
        usbd_status error;

        /* copy firmware image into NIC */
        for (; size >= 4; reg += 4, ucode += 4, size -= 4)
                rum_write(sc, reg, UGETDW(ucode));

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = RT2573_MCU_CNTL;
        USETW(req.wValue, RT2573_MCU_RUN);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);

        error = usbd_do_request(sc->sc_udev, &req, NULL);
        if (error != 0) {
                printf("%s: could not run firmware: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
        }
        return error;
}

static int
rum_prepare_beacon(struct rum_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct rum_tx_desc desc;
        struct mbuf *m0;
        int rate;

        m0 = ieee80211_beacon_alloc(ic, ic->ic_bss, &sc->sc_bo);
        if (m0 == NULL) {
                aprint_error_dev(sc->sc_dev,
                    "could not allocate beacon frame\n");
                return ENOBUFS;
        }

        /* send beacons at the lowest available rate */
        rate = IEEE80211_IS_CHAN_5GHZ(ic->ic_curchan) ? 12 : 2;

        rum_setup_tx_desc(sc, &desc, RT2573_TX_TIMESTAMP, RT2573_TX_HWSEQ,
            m0->m_pkthdr.len, rate);

        /* copy the first 24 bytes of Tx descriptor into NIC memory */
        rum_write_multi(sc, RT2573_HW_BEACON_BASE0, (uint8_t *)&desc, 24);

        /* copy beacon header and payload into NIC memory */
        rum_write_multi(sc, RT2573_HW_BEACON_BASE0 + 24, mtod(m0, uint8_t *),
            m0->m_pkthdr.len);

        m_freem(m0);

        return 0;
}

static void
rum_newassoc(struct ieee80211_node *ni, int isnew)
{
        /* start with lowest Tx rate */
        ni->ni_txrate = 0;
}

static void
rum_amrr_start(struct rum_softc *sc, struct ieee80211_node *ni)
{
        int i;

        /* clear statistic registers (STA_CSR0 to STA_CSR5) */
        rum_read_multi(sc, RT2573_STA_CSR0, sc->sta, sizeof(sc->sta));

        ieee80211_amrr_node_init(&sc->amrr, &sc->amn);

        /* set rate to some reasonable initial value */
        for (i = ni->ni_rates.rs_nrates - 1;
             i > 0 && (ni->ni_rates.rs_rates[i] & IEEE80211_RATE_VAL) > 72;
             i--);
        ni->ni_txrate = i;

        callout_reset(&sc->sc_amrr_ch, hz, rum_amrr_timeout, sc);
}

static void
rum_amrr_timeout(void *arg)
{
        struct rum_softc *sc = arg;
        usb_device_request_t req;

        /*
         * Asynchronously read statistic registers (cleared by read).
         */
        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = RT2573_READ_MULTI_MAC;
        USETW(req.wValue, 0);
        USETW(req.wIndex, RT2573_STA_CSR0);
        USETW(req.wLength, sizeof(sc->sta));

        usbd_setup_default_xfer(sc->amrr_xfer, sc->sc_udev, sc,
            USBD_DEFAULT_TIMEOUT, &req, sc->sta, sizeof(sc->sta), 0,
            rum_amrr_update);
        (void)usbd_transfer(sc->amrr_xfer);
}

static void
rum_amrr_update(struct usbd_xfer *xfer, void *priv,
    usbd_status status)
{
        struct rum_softc *sc = (struct rum_softc *)priv;
        struct ifnet *ifp = sc->sc_ic.ic_ifp;

        if (status != USBD_NORMAL_COMPLETION) {
                printf("%s: could not retrieve Tx statistics - cancelling "
                    "automatic rate control\n", device_xname(sc->sc_dev));
                return;
        }

        /* count TX retry-fail as Tx errors */
        if_statadd(ifp, if_oerrors, le32toh(sc->sta[5]) >> 16);

        sc->amn.amn_retrycnt =
            (le32toh(sc->sta[4]) >> 16) +        /* TX one-retry ok count */
            (le32toh(sc->sta[5]) & 0xffff) +        /* TX more-retry ok count */
            (le32toh(sc->sta[5]) >> 16);        /* TX retry-fail count */

        sc->amn.amn_txcnt =
            sc->amn.amn_retrycnt +
            (le32toh(sc->sta[4]) & 0xffff);        /* TX no-retry ok count */

        ieee80211_amrr_choose(&sc->amrr, sc->sc_ic.ic_bss, &sc->amn);

        callout_reset(&sc->sc_amrr_ch, hz, rum_amrr_timeout, sc);
}

static int
rum_activate(device_t self, enum devact act)
{
        switch (act) {
        case DVACT_DEACTIVATE:
                /*if_deactivate(&sc->sc_ic.ic_if);*/
                return 0;
        default:
                return 0;
        }
}

MODULE(MODULE_CLASS_DRIVER, if_rum, NULL);

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
if_rum_modcmd(modcmd_t cmd, void *aux)
{
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                error = config_init_component(cfdriver_ioconf_rum,
                    cfattach_ioconf_rum, cfdata_ioconf_rum);
#endif
                return error;
        case MODULE_CMD_FINI:
#ifdef _MODULE
                error = config_fini_component(cfdriver_ioconf_rum,
                    cfattach_ioconf_rum, cfdata_ioconf_rum);
#endif
                return error;
        default:
                return ENOTTY;
        }
}











































































































































































    2 
    2 

    1 








































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
/* $NetBSD: pseye.c,v 1.29 2022/03/03 06:23:25 riastradh Exp $ */

/*-
 * Copyright (c) 2008 Jared D. McNeill <jmcneill@invisible.ca>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Sony PlayStation Eye Driver
 *
 * The only documentation we have for this part is based on a series
 * of forum postings by Jim Paris on ps2dev.org. Many thanks for
 * figuring this one out.
 *
 * URL: http://forums.ps2dev.org/viewtopic.php?t=9238
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: pseye.c,v 1.29 2022/03/03 06:23:25 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/fcntl.h>
#include <sys/conf.h>
#include <sys/poll.h>
#include <sys/bus.h>
#include <sys/mutex.h>
#include <sys/kthread.h>
#include <sys/condvar.h>
#include <sys/module.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/uvideoreg.h>

#include <dev/video_if.h>

#define PRI_PSEYE        PRI_BIO

/* Bulk-in buffer length -- make room for payload + UVC headers */
#define PSEYE_BULKIN_BUFLEN        ((640 * 480 * 2) + 4096)
#define PSEYE_BULKIN_BLKLEN        2048

/* SCCB/sensor interface */
#define PSEYE_SCCB_ADDRESS        0xf1
#define PSEYE_SCCB_SUBADDR        0xf2
#define PSEYE_SCCB_WRITE        0xf3
#define PSEYE_SCCB_READ                0xf4
#define PSEYE_SCCB_OPERATION        0xf5
#define PSEYE_SCCB_STATUS        0xf6

#define PSEYE_SCCB_OP_WRITE_3        0x37
#define PSEYE_SCCB_OP_WRITE_2        0x33
#define PSEYE_SCCB_OP_READ_2        0xf9

struct pseye_softc {
        device_t                sc_dev;

        struct usbd_device *        sc_udev;
        struct usbd_interface *        sc_iface;

        device_t                sc_videodev;
        char                        sc_running;

        kcondvar_t                sc_cv;
        kmutex_t                sc_mtx;

        struct usbd_pipe *        sc_bulkin_pipe;
        struct usbd_xfer *        sc_bulkin_xfer;
        int                        sc_bulkin;
        uint8_t                        *sc_bulkin_buffer;
        int                        sc_bulkin_bufferlen;

        char                        sc_dying;

        char                        sc_businfo[32];
};

static int        pseye_match(device_t, cfdata_t, void *);
static void        pseye_attach(device_t, device_t, void *);
static int        pseye_detach(device_t, int);
static void        pseye_childdet(device_t, device_t);
static int        pseye_activate(device_t, enum devact);

static void        pseye_init(struct pseye_softc *);
static void        pseye_sccb_init(struct pseye_softc *);
static void        pseye_stop(struct pseye_softc *);
static void        pseye_start(struct pseye_softc *);
static void        pseye_led(struct pseye_softc *, bool);
static uint8_t        pseye_getreg(struct pseye_softc *, uint16_t);
static void        pseye_setreg(struct pseye_softc *, uint16_t, uint8_t);
static void        pseye_setregv(struct pseye_softc *, uint16_t, uint8_t);
static void        pseye_sccb_setreg(struct pseye_softc *, uint8_t, uint8_t);
static bool        pseye_sccb_status(struct pseye_softc *);

static int        pseye_init_pipes(struct pseye_softc *);
static int        pseye_close_pipes(struct pseye_softc *);

static usbd_status        pseye_get_frame(struct pseye_softc *, uint32_t *);
static void        pseye_submit_payload(struct pseye_softc *, uint32_t);

/* video(9) API */
static int                pseye_open(void *, int);
static void                pseye_close(void *);
static const char *        pseye_get_devname(void *);
static const char *        pseye_get_businfo(void *);
static int                pseye_enum_format(void *, uint32_t,
                                          struct video_format *);
static int                pseye_get_format(void *, struct video_format *);
static int                pseye_set_format(void *, struct video_format *);
static int                pseye_try_format(void *, struct video_format *);
static int                pseye_get_framerate(void *, struct video_fract *);
static int                pseye_set_framerate(void *, struct video_fract *);
static int                pseye_start_transfer(void *);
static int                pseye_stop_transfer(void *);

CFATTACH_DECL2_NEW(pseye, sizeof(struct pseye_softc),
    pseye_match, pseye_attach, pseye_detach, pseye_activate,
    NULL, pseye_childdet);

static const struct video_hw_if pseye_hw_if = {
        .open = pseye_open,
        .close = pseye_close,
        .get_devname = pseye_get_devname,
        .get_businfo = pseye_get_businfo,
        .enum_format = pseye_enum_format,
        .get_format = pseye_get_format,
        .set_format = pseye_set_format,
        .try_format = pseye_try_format,
        .get_framerate = pseye_get_framerate,
        .set_framerate = pseye_set_framerate,
        .start_transfer = pseye_start_transfer,
        .stop_transfer = pseye_stop_transfer,
        .control_iter_init = NULL,
        .control_iter_next = NULL,
        .get_control_desc_group = NULL,
        .get_control_group = NULL,
        .set_control_group = NULL,
};

static int
pseye_match(device_t parent, cfdata_t match, void *opaque)
{
        struct usbif_attach_arg *uiaa = opaque;

        if (uiaa->uiaa_class != UICLASS_VENDOR)
                return UMATCH_NONE;

        if (uiaa->uiaa_vendor == USB_VENDOR_OMNIVISION2) {
                switch (uiaa->uiaa_product) {
                case USB_PRODUCT_OMNIVISION2_PSEYE:
                        if (uiaa->uiaa_ifaceno != 0)
                                return UMATCH_NONE;
                        return UMATCH_VENDOR_PRODUCT;
                }
        }

        return UMATCH_NONE;
}

static void
pseye_attach(device_t parent, device_t self, void *opaque)
{
        struct pseye_softc *sc = device_private(self);
        struct usbif_attach_arg *uiaa = opaque;
        struct usbd_device *dev = uiaa->uiaa_device;
        usb_interface_descriptor_t *id = NULL;
        usb_endpoint_descriptor_t *ed = NULL, *ed_bulkin = NULL;
        char *devinfop;
        int i;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        sc->sc_dev = self;
        sc->sc_udev = dev;
        sc->sc_iface = uiaa->uiaa_iface;
        snprintf(sc->sc_businfo, sizeof(sc->sc_businfo), "usb:%08x",
            sc->sc_udev->ud_cookie.cookie);
        sc->sc_bulkin_bufferlen = PSEYE_BULKIN_BUFLEN;

        sc->sc_dying = sc->sc_running = 0;
        cv_init(&sc->sc_cv, device_xname(self));
        mutex_init(&sc->sc_mtx, MUTEX_DEFAULT, IPL_NONE);

        id = usbd_get_interface_descriptor(sc->sc_iface);
        if (id == NULL) {
                aprint_error_dev(self, "failed to get interface descriptor\n");
                sc->sc_dying = 1;
                return;
        }

        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self, "couldn't get ep %d\n", i);
                        sc->sc_dying = 1;
                        return;
                }

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        ed_bulkin = ed;
                        break;
                }
        }

        if (ed_bulkin == NULL) {
                aprint_error_dev(self, "no bulk-in endpoint found\n");
                sc->sc_dying = 1;
                return;
        }

        sc->sc_bulkin = ed_bulkin->bEndpointAddress;

        int error = pseye_init_pipes(sc);
        if (error) {
                aprint_error_dev(self, "couldn't open pipes\n");
                return;
        }

        error = usbd_create_xfer(sc->sc_bulkin_pipe, sc->sc_bulkin_bufferlen,
            0, 0, &sc->sc_bulkin_xfer);
        if (error) {
                aprint_error_dev(self, "couldn't create transfer\n");
                pseye_close_pipes(sc);
                return;
        }

        sc->sc_bulkin_buffer = usbd_get_buffer(sc->sc_bulkin_xfer);

        pseye_init(sc);

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        sc->sc_videodev = video_attach_mi(&pseye_hw_if, self, sc);
        if (sc->sc_videodev == NULL) {
                aprint_error_dev(self, "couldn't attach video layer\n");
                sc->sc_dying = 1;
                return;
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, self);

}

static int
pseye_detach(device_t self, int flags)
{
        struct pseye_softc *sc = device_private(self);

        sc->sc_dying = 1;

        pmf_device_deregister(self);

        if (sc->sc_videodev != NULL) {
                config_detach(sc->sc_videodev, flags);
                sc->sc_videodev = NULL;
        }

        if (sc->sc_bulkin_pipe != NULL) {
                usbd_abort_pipe(sc->sc_bulkin_pipe);
        }

        if (sc->sc_bulkin_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_bulkin_xfer);
                sc->sc_bulkin_xfer = NULL;
        }

        if (sc->sc_bulkin_pipe != NULL) {
                usbd_close_pipe(sc->sc_bulkin_pipe);
                sc->sc_bulkin_pipe = NULL;
        }

        mutex_enter(&sc->sc_mtx);
        if (sc->sc_running) {
                sc->sc_running = 0;
                cv_wait_sig(&sc->sc_cv, &sc->sc_mtx);
        }
        mutex_exit(&sc->sc_mtx);

        cv_destroy(&sc->sc_cv);
        mutex_destroy(&sc->sc_mtx);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return 0;
}

int
pseye_activate(device_t self, enum devact act)
{
        struct pseye_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

static void
pseye_childdet(device_t self, device_t child)
{
        struct pseye_softc *sc = device_private(self);

        if (sc->sc_videodev) {
                KASSERT(sc->sc_videodev == child);
                sc->sc_videodev = NULL;
        }
}

/*
 * Device access
 */

static void
pseye_init(struct pseye_softc *sc)
{
        pseye_sccb_init(sc);

        pseye_setregv(sc, 0xc2, 0x0c);
        pseye_setregv(sc, 0x88, 0xf8);
        pseye_setregv(sc, 0xc3, 0x69);
        pseye_setregv(sc, 0x89, 0xff);
        pseye_setregv(sc, 0x76, 0x03);
        pseye_setregv(sc, 0x92, 0x01);
        pseye_setregv(sc, 0x93, 0x18);
        pseye_setregv(sc, 0x94, 0x10);
        pseye_setregv(sc, 0x95, 0x10);
        pseye_setregv(sc, 0xe2, 0x00);
        pseye_setregv(sc, 0xe7, 0x3e);

        pseye_setregv(sc, 0x96, 0x00);

        pseye_setreg(sc, 0x97, 0x20);
        pseye_setreg(sc, 0x97, 0x20);
        pseye_setreg(sc, 0x97, 0x20);
        pseye_setreg(sc, 0x97, 0x0a);
        pseye_setreg(sc, 0x97, 0x3f);
        pseye_setreg(sc, 0x97, 0x4a);
        pseye_setreg(sc, 0x97, 0x20);
        pseye_setreg(sc, 0x97, 0x15);
        pseye_setreg(sc, 0x97, 0x0b);

        pseye_setregv(sc, 0x8e, 0x40);
        pseye_setregv(sc, 0x1f, 0x81);
        pseye_setregv(sc, 0x34, 0x05);
        pseye_setregv(sc, 0xe3, 0x04);
        pseye_setregv(sc, 0x88, 0x00);
        pseye_setregv(sc, 0x89, 0x00);
        pseye_setregv(sc, 0x76, 0x00);
        pseye_setregv(sc, 0xe7, 0x2e);
        pseye_setregv(sc, 0x31, 0xf9);
        pseye_setregv(sc, 0x25, 0x42);
        pseye_setregv(sc, 0x21, 0xf0);

        pseye_setreg(sc, 0x1c, 0x00);
        pseye_setreg(sc, 0x1d, 0x40);
        pseye_setreg(sc, 0x1d, 0x02);        /* payload size 0x0200 * 4 == 2048 */
        pseye_setreg(sc, 0x1d, 0x00);
        pseye_setreg(sc, 0x1d, 0x02);        /* frame size 0x025800 * 4 == 614400 */
        pseye_setreg(sc, 0x1d, 0x58);
        pseye_setreg(sc, 0x1d, 0x00);

        pseye_setreg(sc, 0x1c, 0x0a);
        pseye_setreg(sc, 0x1d, 0x08);        /* enable UVC header */
        pseye_setreg(sc, 0x1d, 0x0e);

        pseye_setregv(sc, 0x8d, 0x1c);
        pseye_setregv(sc, 0x8e, 0x80);
        pseye_setregv(sc, 0xe5, 0x04);

        pseye_sccb_setreg(sc, 0x12, 0x80);
        pseye_sccb_setreg(sc, 0x11, 0x01);
        pseye_sccb_setreg(sc, 0x11, 0x01);
        pseye_sccb_setreg(sc, 0x11, 0x01);
        pseye_sccb_setreg(sc, 0x11, 0x01);
        pseye_sccb_setreg(sc, 0x11, 0x01);
        pseye_sccb_setreg(sc, 0x11, 0x01);
        pseye_sccb_setreg(sc, 0x11, 0x01);
        pseye_sccb_setreg(sc, 0x11, 0x01);
        pseye_sccb_setreg(sc, 0x11, 0x01);
        pseye_sccb_setreg(sc, 0x11, 0x01);
        pseye_sccb_setreg(sc, 0x11, 0x01);

        pseye_sccb_setreg(sc, 0x3d, 0x03);
        pseye_sccb_setreg(sc, 0x17, 0x26);
        pseye_sccb_setreg(sc, 0x18, 0xa0);
        pseye_sccb_setreg(sc, 0x19, 0x07);
        pseye_sccb_setreg(sc, 0x1a, 0xf0);
        pseye_sccb_setreg(sc, 0x32, 0x00);
        pseye_sccb_setreg(sc, 0x29, 0xa0);
        pseye_sccb_setreg(sc, 0x2c, 0xf0);
        pseye_sccb_setreg(sc, 0x65, 0x20);
        pseye_sccb_setreg(sc, 0x11, 0x01);
        pseye_sccb_setreg(sc, 0x42, 0x7f);
        pseye_sccb_setreg(sc, 0x63, 0xe0);
        pseye_sccb_setreg(sc, 0x64, 0xff);
        pseye_sccb_setreg(sc, 0x66, 0x00);
        pseye_sccb_setreg(sc, 0x13, 0xf0);
        pseye_sccb_setreg(sc, 0x0d, 0x41);
        pseye_sccb_setreg(sc, 0x0f, 0xc5);
        pseye_sccb_setreg(sc, 0x14, 0x11);

        pseye_sccb_setreg(sc, 0x22, 0x7f);
        pseye_sccb_setreg(sc, 0x23, 0x03);
        pseye_sccb_setreg(sc, 0x24, 0x40);
        pseye_sccb_setreg(sc, 0x25, 0x30);
        pseye_sccb_setreg(sc, 0x26, 0xa1);
        pseye_sccb_setreg(sc, 0x2a, 0x00);
        pseye_sccb_setreg(sc, 0x2b, 0x00);
        pseye_sccb_setreg(sc, 0x6b, 0xaa);
        pseye_sccb_setreg(sc, 0x13, 0xff);

        pseye_sccb_setreg(sc, 0x90, 0x05);
        pseye_sccb_setreg(sc, 0x91, 0x01);
        pseye_sccb_setreg(sc, 0x92, 0x03);
        pseye_sccb_setreg(sc, 0x93, 0x00);
        pseye_sccb_setreg(sc, 0x94, 0x60);
        pseye_sccb_setreg(sc, 0x95, 0x3c);
        pseye_sccb_setreg(sc, 0x96, 0x24);
        pseye_sccb_setreg(sc, 0x97, 0x1e);
        pseye_sccb_setreg(sc, 0x98, 0x62);
        pseye_sccb_setreg(sc, 0x99, 0x80);
        pseye_sccb_setreg(sc, 0x9a, 0x1e);
        pseye_sccb_setreg(sc, 0x9b, 0x08);
        pseye_sccb_setreg(sc, 0x9c, 0x20);
        pseye_sccb_setreg(sc, 0x9e, 0x81);

        pseye_sccb_setreg(sc, 0xa6, 0x04);
        pseye_sccb_setreg(sc, 0x7e, 0x0c);
        pseye_sccb_setreg(sc, 0x7f, 0x16);

        pseye_sccb_setreg(sc, 0x80, 0x2a);
        pseye_sccb_setreg(sc, 0x81, 0x4e);
        pseye_sccb_setreg(sc, 0x82, 0x61);
        pseye_sccb_setreg(sc, 0x83, 0x6f);
        pseye_sccb_setreg(sc, 0x84, 0x7b);
        pseye_sccb_setreg(sc, 0x85, 0x86);
        pseye_sccb_setreg(sc, 0x86, 0x8e);
        pseye_sccb_setreg(sc, 0x87, 0x97);
        pseye_sccb_setreg(sc, 0x88, 0xa4);
        pseye_sccb_setreg(sc, 0x89, 0xaf);
        pseye_sccb_setreg(sc, 0x8a, 0xc5);
        pseye_sccb_setreg(sc, 0x8b, 0xd7);
        pseye_sccb_setreg(sc, 0x8c, 0xe8);
        pseye_sccb_setreg(sc, 0x8d, 0x20);

        pseye_sccb_setreg(sc, 0x0c, 0x90);

        pseye_setregv(sc, 0xc0, 0x50);
        pseye_setregv(sc, 0xc1, 0x3c);
        pseye_setregv(sc, 0xc2, 0x0c);

        pseye_sccb_setreg(sc, 0x2b, 0x00);
        pseye_sccb_setreg(sc, 0x22, 0x7f);
        pseye_sccb_setreg(sc, 0x23, 0x03);
        pseye_sccb_setreg(sc, 0x11, 0x01);
        pseye_sccb_setreg(sc, 0x0c, 0xd0);
        pseye_sccb_setreg(sc, 0x64, 0xff);
        pseye_sccb_setreg(sc, 0x0d, 0x41);

        pseye_sccb_setreg(sc, 0x14, 0x41);
        pseye_sccb_setreg(sc, 0x0e, 0xcd);
        pseye_sccb_setreg(sc, 0xac, 0xbf);
        pseye_sccb_setreg(sc, 0x8e, 0x00);
        pseye_sccb_setreg(sc, 0x0c, 0xd0);

        pseye_stop(sc);
}

static void
pseye_sccb_init(struct pseye_softc *sc)
{
        pseye_setregv(sc, 0xe7, 0x3a);
        pseye_setreg(sc, PSEYE_SCCB_ADDRESS, 0x60);
        pseye_setreg(sc, PSEYE_SCCB_ADDRESS, 0x60);
        pseye_setreg(sc, PSEYE_SCCB_ADDRESS, 0x60);
        pseye_setreg(sc, PSEYE_SCCB_ADDRESS, 0x42);
}

static void
pseye_stop(struct pseye_softc *sc)
{
        pseye_led(sc, false);
        pseye_setreg(sc, 0xe0, 0x09);
}

static void
pseye_start(struct pseye_softc *sc)
{
        pseye_led(sc, true);
        pseye_setreg(sc, 0xe0, 0x00);
}

static void
pseye_led(struct pseye_softc *sc, bool enabled)
{
        uint8_t val;

        val = pseye_getreg(sc, 0x21);
        pseye_setreg(sc, 0x21, val | 0x80);

        val = pseye_getreg(sc, 0x23);
        if (enabled == true)
                val |= 0x80;
        else
                val &= ~0x80;
        pseye_setreg(sc, 0x23, val);
}

static uint8_t
pseye_getreg(struct pseye_softc *sc, uint16_t reg)
{
        usb_device_request_t req;
        usbd_status err;
        uint8_t buf;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = 1;
        USETW(req.wValue, 0x0000);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 1);

        err = usbd_do_request(sc->sc_udev, &req, &buf);
        if (err) {
                aprint_error_dev(sc->sc_dev, "couldn't read reg 0x%04x: %s\n",
                    reg, usbd_errstr(err));
                return 0xff;
        }

        return buf;
}

static void
pseye_setreg(struct pseye_softc *sc, uint16_t reg, uint8_t val)
{
        usb_device_request_t req;
        usbd_status err;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = 1;
        USETW(req.wValue, 0x0000);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 1);

        err = usbd_do_request(sc->sc_udev, &req, &val);
        if (err)
                aprint_error_dev(sc->sc_dev, "couldn't write reg 0x%04x: %s\n",
                    reg, usbd_errstr(err));
}

static void
pseye_setregv(struct pseye_softc *sc, uint16_t reg, uint8_t val)
{
        pseye_setreg(sc, reg, val);
        if (pseye_getreg(sc, reg) != val)
                aprint_error_dev(sc->sc_dev, "couldn't verify reg 0x%04x\n",
                    reg);
}

static void
pseye_sccb_setreg(struct pseye_softc *sc, uint8_t reg, uint8_t val)
{
        pseye_setreg(sc, PSEYE_SCCB_SUBADDR, reg);
        pseye_setreg(sc, PSEYE_SCCB_WRITE, val);
        pseye_setreg(sc, PSEYE_SCCB_OPERATION, PSEYE_SCCB_OP_WRITE_3);

        if (pseye_sccb_status(sc) == false)
                aprint_error_dev(sc->sc_dev, "couldn't write sccb reg 0x%04x\n",
                    reg);
}

static bool
pseye_sccb_status(struct pseye_softc *sc)
{
        int retry = 5;
        uint8_t reg;

        while (retry-- >= 0) {
                reg = pseye_getreg(sc, PSEYE_SCCB_STATUS);
                if (reg == 0x00)
                        return true;
                else if (reg == 0x04)
                        return false;
        }

        aprint_error_dev(sc->sc_dev, "timeout reading sccb status\n");
        return false;
}

static usbd_status
pseye_get_frame(struct pseye_softc *sc, uint32_t *plen)
{
        if (sc->sc_dying)
                return USBD_IOERROR;

        return usbd_bulk_transfer(sc->sc_bulkin_xfer, sc->sc_bulkin_pipe,
            USBD_SHORT_XFER_OK, 1000, sc->sc_bulkin_buffer, plen);
}

static int
pseye_init_pipes(struct pseye_softc *sc)
{
        usbd_status err;

        if (sc->sc_dying)
                return EIO;

        err = usbd_open_pipe(sc->sc_iface, sc->sc_bulkin, 0,
            &sc->sc_bulkin_pipe);
        if (err) {
                aprint_error_dev(sc->sc_dev, "couldn't open bulk-in pipe: %s\n",
                    usbd_errstr(err));
                return ENOMEM;
        }

        return 0;
}

int
pseye_close_pipes(struct pseye_softc *sc)
{
        if (sc->sc_bulkin_pipe != NULL) {
                usbd_abort_pipe(sc->sc_bulkin_pipe);
                usbd_close_pipe(sc->sc_bulkin_pipe);
                sc->sc_bulkin_pipe = NULL;
        }

        return 0;
}

static void
pseye_submit_payload(struct pseye_softc *sc, uint32_t tlen)
{
        struct video_payload payload;
        uvideo_payload_header_t *uvchdr;
        uint8_t *buf = sc->sc_bulkin_buffer;
        uint32_t len;
        uint32_t brem = (640*480*2);

        while (brem > 0 && tlen > 0) {
                len = uimin(tlen, PSEYE_BULKIN_BLKLEN);
                if (len < UVIDEO_PAYLOAD_HEADER_SIZE) {
                        printf("pseye_submit_payload: len=%u\n", len);
                        return;
                }

                uvchdr = (uvideo_payload_header_t *)buf;
                if (uvchdr->bHeaderLength != UVIDEO_PAYLOAD_HEADER_SIZE)
                        goto next;
                if (uvchdr->bHeaderLength == len &&
                    !(uvchdr->bmHeaderInfo & UV_END_OF_FRAME))
                        goto next;
                if (uvchdr->bmHeaderInfo & UV_ERROR)
                        return;
                if ((uvchdr->bmHeaderInfo & UV_PRES_TIME) == 0)
                        goto next;

                payload.data = buf + uvchdr->bHeaderLength;
                payload.size = uimin(brem, len - uvchdr->bHeaderLength);
                payload.frameno = UGETDW(&buf[2]);
                payload.end_of_frame = uvchdr->bmHeaderInfo & UV_END_OF_FRAME;
                video_submit_payload(sc->sc_videodev, &payload);

next:
                tlen -= len;
                buf += len;
                brem -= payload.size;
        }
}

static void
pseye_transfer_thread(void *opaque)
{
        struct pseye_softc *sc = opaque;
        uint32_t len;
        int error;

        while (sc->sc_running) {
                len = sc->sc_bulkin_bufferlen;
                error = pseye_get_frame(sc, &len);
                if (error == USBD_NORMAL_COMPLETION)
                        pseye_submit_payload(sc, len);
        }

        mutex_enter(&sc->sc_mtx);
        cv_broadcast(&sc->sc_cv);
        mutex_exit(&sc->sc_mtx);

        kthread_exit(0);
}

/* video(9) API implementations */
static int
pseye_open(void *opaque, int flags)
{
        struct pseye_softc *sc = opaque;

        if (sc->sc_dying)
                return EIO;

        pseye_start(sc);

        return 0;
}

static void
pseye_close(void *opaque)
{
        struct pseye_softc *sc = opaque;

        pseye_stop(sc);
}

static const char *
pseye_get_devname(void *opaque)
{
        return "PlayStation Eye";
}

static const char *
pseye_get_businfo(void *opaque)
{
        struct pseye_softc *sc = opaque;

        return sc->sc_businfo;
}

static int
pseye_enum_format(void *opaque, uint32_t index, struct video_format *format)
{
        if (index != 0)
                return EINVAL;
        return pseye_get_format(opaque, format);
}

static int
pseye_get_format(void *opaque, struct video_format *format)
{
        format->pixel_format = VIDEO_FORMAT_YUY2; /* XXX actually YUYV */
        format->width = 640;
        format->height = 480;
        format->aspect_x = 4;
        format->aspect_y = 3;
        format->sample_size = format->width * format->height * 2;
        format->stride = format->width * 2;
        format->color.primaries = VIDEO_COLOR_PRIMARIES_UNSPECIFIED;
        format->color.gamma_function = VIDEO_GAMMA_FUNCTION_UNSPECIFIED;
        format->color.matrix_coeff = VIDEO_MATRIX_COEFF_UNSPECIFIED;
        format->interlace_flags = VIDEO_INTERLACE_ON;
        format->priv = 0;

        return 0;
}

static int
pseye_set_format(void *opaque, struct video_format *format)
{
#if notyet
        if (format->pixel_format != VIDEO_FORMAT_YUYV)
                return EINVAL;
        if (format->width != 640 || format->height != 480)
                return EINVAL;
#endif
        /* XXX */
        return pseye_get_format(opaque, format);
}

static int
pseye_try_format(void *opaque, struct video_format *format)
{
        return pseye_get_format(opaque, format);
}

static int
pseye_get_framerate(void *opaque, struct video_fract *fract)
{
        /* Driver only supports 60fps */
        fract->numerator = 1;
        fract->denominator = 60;

        return 0;
}

static int
pseye_set_framerate(void *opaque, struct video_fract *fract)
{
        /* Driver only supports one framerate. Return actual rate. */
        return pseye_get_framerate(opaque, fract);
}

static int
pseye_start_transfer(void *opaque)
{
        struct pseye_softc *sc = opaque;
        int err = 0;

        mutex_enter(&sc->sc_mtx);
        if (sc->sc_running == 0) {
                sc->sc_running = 1;
                err = kthread_create(PRI_PSEYE, 0, NULL, pseye_transfer_thread,
                    opaque, NULL, "%s", device_xname(sc->sc_dev));
        } else
                aprint_error_dev(sc->sc_dev, "transfer already in progress\n");
        mutex_exit(&sc->sc_mtx);

        return err;
}

static int
pseye_stop_transfer(void *opaque)
{
        struct pseye_softc *sc = opaque;

        mutex_enter(&sc->sc_mtx);
        if (sc->sc_running) {
                sc->sc_running = 0;
                cv_wait_sig(&sc->sc_cv, &sc->sc_mtx);
        }
        mutex_exit(&sc->sc_mtx);

        return 0;
}

MODULE(MODULE_CLASS_DRIVER, pseye, NULL);

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
pseye_modcmd(modcmd_t cmd, void *opaque)
{
        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                return config_init_component(cfdriver_ioconf_pseye,
                    cfattach_ioconf_pseye, cfdata_ioconf_pseye);
#else
                return 0;
#endif
        case MODULE_CMD_FINI:
#ifdef _MODULE
                return config_fini_component(cfdriver_ioconf_pseye,
                    cfattach_ioconf_pseye, cfdata_ioconf_pseye);
#else
                return 0;
#endif
        default:
                return ENOTTY;
        }
}




























































































































































    2 










   11 










  514 
  515 


    1 

    1 
    1 
    1 








































































































    5 

    5 
    2 
    2 



    5 





    1 



    1 






















    8 




    2 
    8 
    8 





    2 


    8 
    8 










    7 













    2 

    2 








    2 

    2 















   30 







    8 
    8 

   23 
   23 




   23 
   12 


   11 






   19 

   30 














   12 







   12 
    7 




    7 
   12 

    2 






    7 






















    3 

    2 






    3 
    2 
    2 

    3 












   19 

   19 
   19 




    1 
    1 

   19 




   19 
    8 












    6 







    6 












    6 



    5 



    5 














    5 





    5 





    3 


    5 






    5 
















   13 







   13 







   12 


   10 




    9 

















    9 





    8 

    7 



    2 



    3 


    3 
    2 
    2 



    1 







    1 

    1 







    1 
    1 























    3 
    1 

    8 


    8 

    9 
    2 











    5 









    2 


    2 


    1 


    5 




















































    2 




    1 

    1 






    1 
    2 














    5 
    4 


    2 


    2 
    2 





































    6 



    6 


    4 
    4 



    3 

    3 

    4 







   13 



   12 


    8 
    8 
    7 
    5 
    3 




    4 

    1 
    3 




    3 
    7 












    4 










   10 






    8 


    5 
   10 












    4 










    6 




    5 


    4 
    4 












    6 





    5 


    5 


    5 




    5 



    3 

    2 
    2 




    2 







    2 

    5 


    2 
    1 


    3 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
/*        $NetBSD: uipc_sem.c,v 1.60 2020/12/14 23:12:12 chs Exp $        */

/*-
 * Copyright (c) 2011, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Mindaugas Rasiukevicius and Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2002 Alfred Perlstein <alfred@FreeBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Implementation of POSIX semaphore.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uipc_sem.c,v 1.60 2020/12/14 23:12:12 chs Exp $");

#include <sys/param.h>
#include <sys/kernel.h>

#include <sys/atomic.h>
#include <sys/proc.h>
#include <sys/lwp.h>
#include <sys/ksem.h>
#include <sys/syscall.h>
#include <sys/stat.h>
#include <sys/kmem.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/kauth.h>
#include <sys/module.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/rwlock.h>
#include <sys/semaphore.h>
#include <sys/syscall.h>
#include <sys/syscallargs.h>
#include <sys/syscallvar.h>
#include <sys/sysctl.h>
#include <sys/uidinfo.h>
#include <sys/cprng.h>

MODULE(MODULE_CLASS_MISC, ksem, NULL);

#define        SEM_MAX_NAMELEN                NAME_MAX

#define        KS_UNLINKED                0x01

static kmutex_t                ksem_lock        __cacheline_aligned;
static LIST_HEAD(,ksem)        ksem_head        __cacheline_aligned;
static u_int                nsems_total        __cacheline_aligned;
static u_int                nsems                __cacheline_aligned;

static krwlock_t        ksem_pshared_lock __cacheline_aligned;
static LIST_HEAD(, ksem) *ksem_pshared_hashtab __cacheline_aligned;
static u_long                ksem_pshared_hashmask __read_mostly;

#define        KSEM_PSHARED_HASHSIZE        32

static kauth_listener_t        ksem_listener;

static int                ksem_sysinit(void);
static int                ksem_sysfini(bool);
static int                ksem_modcmd(modcmd_t, void *);
static void                ksem_release(ksem_t *, int);
static int                ksem_close_fop(file_t *);
static int                ksem_stat_fop(file_t *, struct stat *);
static int                ksem_read_fop(file_t *, off_t *, struct uio *,
    kauth_cred_t, int);

static const struct fileops semops = {
        .fo_name = "sem",
        .fo_read = ksem_read_fop,
        .fo_write = fbadop_write,
        .fo_ioctl = fbadop_ioctl,
        .fo_fcntl = fnullop_fcntl,
        .fo_poll = fnullop_poll,
        .fo_stat = ksem_stat_fop,
        .fo_close = ksem_close_fop,
        .fo_kqfilter = fnullop_kqfilter,
        .fo_restart = fnullop_restart,
};

static const struct syscall_package ksem_syscalls[] = {
        { SYS__ksem_init, 0, (sy_call_t *)sys__ksem_init },
        { SYS__ksem_open, 0, (sy_call_t *)sys__ksem_open },
        { SYS__ksem_unlink, 0, (sy_call_t *)sys__ksem_unlink },
        { SYS__ksem_close, 0, (sy_call_t *)sys__ksem_close },
        { SYS__ksem_post, 0, (sy_call_t *)sys__ksem_post },
        { SYS__ksem_wait, 0, (sy_call_t *)sys__ksem_wait },
        { SYS__ksem_trywait, 0, (sy_call_t *)sys__ksem_trywait },
        { SYS__ksem_getvalue, 0, (sy_call_t *)sys__ksem_getvalue },
        { SYS__ksem_destroy, 0, (sy_call_t *)sys__ksem_destroy },
        { SYS__ksem_timedwait, 0, (sy_call_t *)sys__ksem_timedwait },
        { 0, 0, NULL },
};

struct sysctllog *ksem_clog;
int ksem_max = KSEM_MAX;

static int
name_copyin(const char *uname, char **name)
{
        *name = kmem_alloc(SEM_MAX_NAMELEN, KM_SLEEP);

        int error = copyinstr(uname, *name, SEM_MAX_NAMELEN, NULL);
        if (error)
                kmem_free(*name, SEM_MAX_NAMELEN);

        return error;
}

static void
name_destroy(char **name)
{
        if (!*name)
                return;

        kmem_free(*name, SEM_MAX_NAMELEN);
        *name = NULL;
}

static int
ksem_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        ksem_t *ks;
        mode_t mode;

        if (action != KAUTH_SYSTEM_SEMAPHORE)
                return KAUTH_RESULT_DEFER;

        ks = arg1;
        mode = ks->ks_mode;

        if ((kauth_cred_geteuid(cred) == ks->ks_uid && (mode & S_IWUSR) != 0) ||
            (kauth_cred_getegid(cred) == ks->ks_gid && (mode & S_IWGRP) != 0) ||
            (mode & S_IWOTH) != 0)
                return KAUTH_RESULT_ALLOW;

        return KAUTH_RESULT_DEFER;
}

static int
ksem_sysinit(void)
{
        int error;
        const struct sysctlnode *rnode;

        mutex_init(&ksem_lock, MUTEX_DEFAULT, IPL_NONE);
        LIST_INIT(&ksem_head);
        nsems_total = 0;
        nsems = 0;

        rw_init(&ksem_pshared_lock);
        ksem_pshared_hashtab = hashinit(KSEM_PSHARED_HASHSIZE, HASH_LIST,
            true, &ksem_pshared_hashmask);
        KASSERT(ksem_pshared_hashtab != NULL);

        ksem_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
            ksem_listener_cb, NULL);

        /* Define module-specific sysctl tree */

        ksem_clog = NULL;

        sysctl_createv(&ksem_clog, 0, NULL, &rnode,
                        CTLFLAG_PERMANENT,
                        CTLTYPE_NODE, "posix",
                        SYSCTL_DESCR("POSIX options"),
                        NULL, 0, NULL, 0,
                        CTL_KERN, CTL_CREATE, CTL_EOL);
        sysctl_createv(&ksem_clog, 0, &rnode, NULL,
                        CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                        CTLTYPE_INT, "semmax",
                        SYSCTL_DESCR("Maximal number of semaphores"),
                        NULL, 0, &ksem_max, 0,
                        CTL_CREATE, CTL_EOL);
        sysctl_createv(&ksem_clog, 0, &rnode, NULL,
                        CTLFLAG_PERMANENT | CTLFLAG_READONLY,
                        CTLTYPE_INT, "semcnt",
                        SYSCTL_DESCR("Current number of semaphores"),
                        NULL, 0, &nsems, 0,
                        CTL_CREATE, CTL_EOL);

        error = syscall_establish(NULL, ksem_syscalls);
        if (error) {
                (void)ksem_sysfini(false);
        }

        return error;
}

static int
ksem_sysfini(bool interface)
{
        int error;

        if (interface) {
                error = syscall_disestablish(NULL, ksem_syscalls);
                if (error != 0) {
                        return error;
                }
                /*
                 * Make sure that no semaphores are in use.  Note: semops
                 * must be unused at this point.
                 */
                if (nsems_total) {
                        error = syscall_establish(NULL, ksem_syscalls);
                        KASSERT(error == 0);
                        return EBUSY;
                }
        }
        kauth_unlisten_scope(ksem_listener);
        hashdone(ksem_pshared_hashtab, HASH_LIST, ksem_pshared_hashmask);
        rw_destroy(&ksem_pshared_lock);
        mutex_destroy(&ksem_lock);
        sysctl_teardown(&ksem_clog);
        return 0;
}

static int
ksem_modcmd(modcmd_t cmd, void *arg)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                return ksem_sysinit();

        case MODULE_CMD_FINI:
                return ksem_sysfini(true);

        default:
                return ENOTTY;
        }
}

static ksem_t *
ksem_lookup(const char *name)
{
        ksem_t *ks;

        KASSERT(mutex_owned(&ksem_lock));

        LIST_FOREACH(ks, &ksem_head, ks_entry) {
                if (strcmp(ks->ks_name, name) == 0) {
                        mutex_enter(&ks->ks_lock);
                        return ks;
                }
        }
        return NULL;
}

static int
ksem_perm(lwp_t *l, ksem_t *ks)
{
        kauth_cred_t uc = l->l_cred;

        KASSERT(mutex_owned(&ks->ks_lock));

        if (kauth_authorize_system(uc, KAUTH_SYSTEM_SEMAPHORE, 0, ks, NULL, NULL) != 0)
                return EACCES;

        return 0;
}

/*
 * Bits 1..23 are random, just pluck a few of those and assume the
 * distribution is going to be pretty good.
 */
#define        KSEM_PSHARED_HASH(id)        (((id) >> 1) & ksem_pshared_hashmask)

static void
ksem_remove_pshared(ksem_t *ksem)
{
        rw_enter(&ksem_pshared_lock, RW_WRITER);
        LIST_REMOVE(ksem, ks_entry);
        rw_exit(&ksem_pshared_lock);
}

static ksem_t *
ksem_lookup_pshared_locked(intptr_t id)
{
        u_long bucket = KSEM_PSHARED_HASH(id);
        ksem_t *ksem = NULL;

        /* ksem_t is locked and referenced upon return. */

        LIST_FOREACH(ksem, &ksem_pshared_hashtab[bucket], ks_entry) {
                if (ksem->ks_pshared_id == id) {
                        mutex_enter(&ksem->ks_lock);
                        if (ksem->ks_pshared_proc == NULL) {
                                /*
                                 * This entry is dead, and in the process
                                 * of being torn down; skip it.
                                 */
                                mutex_exit(&ksem->ks_lock);
                                continue;
                        }
                        ksem->ks_ref++;
                        KASSERT(ksem->ks_ref != 0);
                        return ksem;
                }
        }

        return NULL;
}

static ksem_t *
ksem_lookup_pshared(intptr_t id)
{
        rw_enter(&ksem_pshared_lock, RW_READER);
        ksem_t *ksem = ksem_lookup_pshared_locked(id);
        rw_exit(&ksem_pshared_lock);
        return ksem;
}

static void
ksem_alloc_pshared_id(ksem_t *ksem)
{
        ksem_t *ksem0;
        uint32_t try;

        KASSERT(ksem->ks_pshared_proc != NULL);

        rw_enter(&ksem_pshared_lock, RW_WRITER);
        for (;;) {
                try = (cprng_fast32() & ~KSEM_MARKER_MASK) |
                    KSEM_PSHARED_MARKER;

                if ((ksem0 = ksem_lookup_pshared_locked(try)) == NULL) {
                        /* Got it! */
                        break;
                }
                ksem_release(ksem0, -1);
        }
        ksem->ks_pshared_id = try;
        u_long bucket = KSEM_PSHARED_HASH(ksem->ks_pshared_id);
        LIST_INSERT_HEAD(&ksem_pshared_hashtab[bucket], ksem, ks_entry);
        rw_exit(&ksem_pshared_lock);
}

/*
 * ksem_get: get the semaphore from the descriptor.
 *
 * => locks the semaphore, if found, and holds an extra reference.
 * => holds a reference on the file descriptor.
 */
static int
ksem_get(intptr_t id, ksem_t **ksret, int *fdp)
{
        ksem_t *ks;
        int fd;

        if ((id & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER) {
                /*
                 * ksem_lookup_pshared() returns the ksem_t *
                 * locked and referenced.
                 */
                ks = ksem_lookup_pshared(id);
                if (ks == NULL)
                        return EINVAL;
                KASSERT(ks->ks_pshared_id == id);
                KASSERT(ks->ks_pshared_proc != NULL);
                fd = -1;
        } else if (id <= INT_MAX) {
                fd = (int)id;
                file_t *fp = fd_getfile(fd);

                if (__predict_false(fp == NULL))
                        return EINVAL;
                if (__predict_false(fp->f_type != DTYPE_SEM)) {
                        fd_putfile(fd);
                        return EINVAL;
                }
                ks = fp->f_ksem;
                mutex_enter(&ks->ks_lock);
                ks->ks_ref++;
        } else {
                return EINVAL;
        }

        *ksret = ks;
        *fdp = fd;
        return 0;
}

/*
 * ksem_create: allocate and setup a new semaphore structure.
 */
static int
ksem_create(lwp_t *l, const char *name, ksem_t **ksret, mode_t mode, u_int val)
{
        ksem_t *ks;
        kauth_cred_t uc;
        char *kname;
        size_t len;

        /* Pre-check for the limit. */
        if (nsems >= ksem_max) {
                return ENFILE;
        }

        if (val > SEM_VALUE_MAX) {
                return EINVAL;
        }

        if (name != NULL) {
                len = strlen(name);
                if (len > SEM_MAX_NAMELEN) {
                        return ENAMETOOLONG;
                }
                /* Name must start with a '/' but not contain one. */
                if (*name != '/' || len < 2 || strchr(name + 1, '/') != NULL) {
                        return EINVAL;
                }
                kname = kmem_alloc(++len, KM_SLEEP);
                strlcpy(kname, name, len);
        } else {
                kname = NULL;
                len = 0;
        }

        ks = kmem_zalloc(sizeof(ksem_t), KM_SLEEP);
        mutex_init(&ks->ks_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&ks->ks_cv, "psem");
        ks->ks_name = kname;
        ks->ks_namelen = len;
        ks->ks_mode = mode;
        ks->ks_value = val;
        ks->ks_ref = 1;

        uc = l->l_cred;
        ks->ks_uid = kauth_cred_geteuid(uc);
        ks->ks_gid = kauth_cred_getegid(uc);
        chgsemcnt(ks->ks_uid, 1);
        atomic_inc_uint(&nsems_total);

        *ksret = ks;
        return 0;
}

static void
ksem_free(ksem_t *ks)
{

        KASSERT(!cv_has_waiters(&ks->ks_cv));

        chgsemcnt(ks->ks_uid, -1);
        atomic_dec_uint(&nsems_total);

        if (ks->ks_pshared_id) {
                KASSERT(ks->ks_pshared_proc == NULL);
                ksem_remove_pshared(ks);
        }
        if (ks->ks_name) {
                KASSERT(ks->ks_namelen > 0);
                kmem_free(ks->ks_name, ks->ks_namelen);
        }
        mutex_destroy(&ks->ks_lock);
        cv_destroy(&ks->ks_cv);
        kmem_free(ks, sizeof(ksem_t));
}

#define        KSEM_ID_IS_PSHARED(id)                \
        (((id) & KSEM_MARKER_MASK) == KSEM_PSHARED_MARKER)

static void
ksem_release(ksem_t *ksem, int fd)
{
        bool destroy = false;

        KASSERT(mutex_owned(&ksem->ks_lock));

        KASSERT(ksem->ks_ref > 0);
        if (--ksem->ks_ref == 0) {
                /*
                 * Destroy if the last reference and semaphore is unnamed,
                 * or unlinked (for named semaphore).
                 */
                destroy = (ksem->ks_flags & KS_UNLINKED) ||
                    (ksem->ks_name == NULL);
        }
        mutex_exit(&ksem->ks_lock);

        if (destroy) {
                ksem_free(ksem);
        }
        if (fd != -1) {
                fd_putfile(fd);
        }
}

int
sys__ksem_init(struct lwp *l, const struct sys__ksem_init_args *uap,
    register_t *retval)
{
        /* {
                unsigned int value;
                intptr_t *idp;
        } */

        return do_ksem_init(l, SCARG(uap, value), SCARG(uap, idp),
            copyin, copyout);
}

int
do_ksem_init(lwp_t *l, u_int val, intptr_t *idp, copyin_t docopyin,
    copyout_t docopyout)
{
        proc_t *p = l->l_proc;
        ksem_t *ks;
        file_t *fp;
        intptr_t id, arg;
        int fd, error;

        /*
         * Newer versions of librt / libpthread pass us 'PSRD' in *idp to
         * indicate that a pshared semaphore is wanted.  In that case we
         * allocate globally unique ID and return that, rather than the
         * process-scoped file descriptor ID.
         */
        error = (*docopyin)(idp, &arg, sizeof(*idp));
        if (error) {
                return error;
        }

        error = fd_allocfile(&fp, &fd);
        if (error) {
                return error;
        }
        fp->f_type = DTYPE_SEM;
        fp->f_flag = FREAD | FWRITE;
        fp->f_ops = &semops;

        if (fd >= KSEM_MARKER_MIN) {
                /*
                 * This is super-unlikely, but we check for it anyway
                 * because potential collisions with the pshared marker
                 * would be bad.
                 */
                fd_abort(p, fp, fd);
                return EMFILE;
        }

        /* Note the mode does not matter for anonymous semaphores. */
        error = ksem_create(l, NULL, &ks, 0, val);
        if (error) {
                fd_abort(p, fp, fd);
                return error;
        }

        if (arg == KSEM_PSHARED) {
                ks->ks_pshared_proc = curproc;
                ks->ks_pshared_fd = fd;
                ksem_alloc_pshared_id(ks);
                id = ks->ks_pshared_id;
        } else {
                id = (intptr_t)fd;
        }

        error = (*docopyout)(&id, idp, sizeof(*idp));
        if (error) {
                ksem_free(ks);
                fd_abort(p, fp, fd);
                return error;
        }

        fp->f_ksem = ks;
        fd_affix(p, fp, fd);
        return error;
}

int
sys__ksem_open(struct lwp *l, const struct sys__ksem_open_args *uap,
    register_t *retval)
{
        /* {
                const char *name;
                int oflag;
                mode_t mode;
                unsigned int value;
                intptr_t *idp;
        } */

        return do_ksem_open(l, SCARG(uap, name), SCARG(uap, oflag),
            SCARG(uap, mode), SCARG(uap, value), SCARG(uap, idp), copyout);
}

int
do_ksem_open(struct lwp *l, const char *semname, int oflag, mode_t mode,
     unsigned int value, intptr_t *idp, copyout_t docopyout)
{
        char *name;
        proc_t *p = l->l_proc;
        ksem_t *ksnew = NULL, *ks;
        file_t *fp;
        intptr_t id;
        int fd, error;

        error = name_copyin(semname, &name);
        if (error) {
                return error;
        }
        error = fd_allocfile(&fp, &fd);
        if (error) {
                name_destroy(&name);
                return error;
        }
        fp->f_type = DTYPE_SEM;
        fp->f_flag = FREAD | FWRITE;
        fp->f_ops = &semops;

        if (fd >= KSEM_MARKER_MIN) {
                /*
                 * This is super-unlikely, but we check for it anyway
                 * because potential collisions with the pshared marker
                 * would be bad.
                 */
                fd_abort(p, fp, fd);
                return EMFILE;
        }

        /*
         * The ID (file descriptor number) can be stored early.
         * Note that zero is a special value for libpthread.
         */
        id = (intptr_t)fd;
        error = (*docopyout)(&id, idp, sizeof(*idp));
        if (error) {
                goto err;
        }

        if (oflag & O_CREAT) {
                /* Create a new semaphore. */
                error = ksem_create(l, name, &ksnew, mode, value);
                if (error) {
                        goto err;
                }
                KASSERT(ksnew != NULL);
        }

        /* Lookup for a semaphore with such name. */
        mutex_enter(&ksem_lock);
        ks = ksem_lookup(name);
        name_destroy(&name);
        if (ks) {
                KASSERT(mutex_owned(&ks->ks_lock));
                mutex_exit(&ksem_lock);

                /* Check for exclusive create. */
                if (oflag & O_EXCL) {
                        mutex_exit(&ks->ks_lock);
                        error = EEXIST;
                        goto err;
                }
                /*
                 * Verify permissions.  If we can access it,
                 * add the reference of this thread.
                 */
                error = ksem_perm(l, ks);
                if (error == 0) {
                        ks->ks_ref++;
                }
                mutex_exit(&ks->ks_lock);
                if (error) {
                        goto err;
                }
        } else {
                /* Fail if not found and not creating. */
                if ((oflag & O_CREAT) == 0) {
                        mutex_exit(&ksem_lock);
                        KASSERT(ksnew == NULL);
                        error = ENOENT;
                        goto err;
                }

                /* Check for the limit locked. */
                if (nsems >= ksem_max) {
                        mutex_exit(&ksem_lock);
                        error = ENFILE;
                        goto err;
                }

                /*
                 * Finally, insert semaphore into the list.
                 * Note: it already has the initial reference.
                 */
                ks = ksnew;
                LIST_INSERT_HEAD(&ksem_head, ks, ks_entry);
                nsems++;
                mutex_exit(&ksem_lock);

                ksnew = NULL;
        }
        KASSERT(ks != NULL);
        fp->f_ksem = ks;
        fd_affix(p, fp, fd);
err:
        name_destroy(&name);
        if (error) {
                fd_abort(p, fp, fd);
        }
        if (ksnew) {
                ksem_free(ksnew);
        }
        return error;
}

int
sys__ksem_close(struct lwp *l, const struct sys__ksem_close_args *uap,
    register_t *retval)
{
        /* {
                intptr_t id;
        } */
        intptr_t id = SCARG(uap, id);
        int fd, error;
        ksem_t *ks;

        error = ksem_get(id, &ks, &fd);
        if (error) {
                return error;
        }

        /* This is only for named semaphores. */
        if (ks->ks_name == NULL) {
                error = EINVAL;
        }
        ksem_release(ks, -1);
        if (error) {
                if (fd != -1)
                        fd_putfile(fd);
                return error;
        }
        return fd_close(fd);
}

static int
ksem_read_fop(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
    int flags)
{
        size_t len;
        char *name;
        ksem_t *ks = fp->f_ksem;

        mutex_enter(&ks->ks_lock);
        len = ks->ks_namelen;
        name = ks->ks_name;
        mutex_exit(&ks->ks_lock);
        if (name == NULL || len == 0)
                return 0;
        return uiomove(name, len, uio);
}

static int
ksem_stat_fop(file_t *fp, struct stat *ub)
{
        ksem_t *ks = fp->f_ksem;

        mutex_enter(&ks->ks_lock);

        memset(ub, 0, sizeof(*ub));

        ub->st_mode = ks->ks_mode | ((ks->ks_name && ks->ks_namelen)
            ? _S_IFLNK : _S_IFREG);
        ub->st_uid = ks->ks_uid;
        ub->st_gid = ks->ks_gid;
        ub->st_size = ks->ks_value;
        ub->st_blocks = (ub->st_size) ? 1 : 0;
        ub->st_nlink = ks->ks_ref;
        ub->st_blksize = 4096;

        nanotime(&ub->st_atimespec);
        ub->st_mtimespec = ub->st_ctimespec = ub->st_birthtimespec =
            ub->st_atimespec;

        /*
         * Left as 0: st_dev, st_ino, st_rdev, st_flags, st_gen.
         * XXX (st_dev, st_ino) should be unique.
         */
        mutex_exit(&ks->ks_lock);
        return 0;
}

static int
ksem_close_fop(file_t *fp)
{
        ksem_t *ks = fp->f_ksem;

        mutex_enter(&ks->ks_lock);

        if (ks->ks_pshared_id) {
                if (ks->ks_pshared_proc != curproc) {
                        /* Do nothing if this is not the creator. */
                        mutex_exit(&ks->ks_lock);
                        return 0;
                }
                /* Mark this semaphore as dead. */
                ks->ks_pshared_proc = NULL;
        }

        ksem_release(ks, -1);
        return 0;
}

int
sys__ksem_unlink(struct lwp *l, const struct sys__ksem_unlink_args *uap,
    register_t *retval)
{
        /* {
                const char *name;
        } */
        char *name;
        ksem_t *ks;
        u_int refcnt;
        int error;

        error = name_copyin(SCARG(uap, name), &name);
        if (error)
                return error;

        mutex_enter(&ksem_lock);
        ks = ksem_lookup(name);
        name_destroy(&name);
        if (ks == NULL) {
                mutex_exit(&ksem_lock);
                return ENOENT;
        }
        KASSERT(mutex_owned(&ks->ks_lock));

        /* Verify permissions. */
        error = ksem_perm(l, ks);
        if (error) {
                mutex_exit(&ks->ks_lock);
                mutex_exit(&ksem_lock);
                return error;
        }

        /* Remove from the global list. */
        LIST_REMOVE(ks, ks_entry);
        nsems--;
        mutex_exit(&ksem_lock);

        refcnt = ks->ks_ref;
        if (refcnt) {
                /* Mark as unlinked, if there are references. */
                ks->ks_flags |= KS_UNLINKED;
        }
        mutex_exit(&ks->ks_lock);

        if (refcnt == 0) {
                ksem_free(ks);
        }
        return 0;
}

int
sys__ksem_post(struct lwp *l, const struct sys__ksem_post_args *uap,
    register_t *retval)
{
        /* {
                intptr_t id;
        } */
        int fd, error;
        ksem_t *ks;

        error = ksem_get(SCARG(uap, id), &ks, &fd);
        if (error) {
                return error;
        }
        KASSERT(mutex_owned(&ks->ks_lock));
        if (ks->ks_value == SEM_VALUE_MAX) {
                error = EOVERFLOW;
                goto out;
        }
        ks->ks_value++;
        if (ks->ks_waiters) {
                cv_broadcast(&ks->ks_cv);
        }
out:
        ksem_release(ks, fd);
        return error;
}

int
do_ksem_wait(lwp_t *l, intptr_t id, bool try_p, struct timespec *abstime)
{
        int fd, error, timeo;
        ksem_t *ks;

        error = ksem_get(id, &ks, &fd);
        if (error) {
                return error;
        }
        KASSERT(mutex_owned(&ks->ks_lock));
        while (ks->ks_value == 0) {
                ks->ks_waiters++;
                if (!try_p && abstime != NULL) {
                        error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, abstime,
                            &timeo, NULL);
                        if (error != 0)
                                goto out;
                } else {
                        timeo = 0;
                }
                error = try_p ? EAGAIN : cv_timedwait_sig(&ks->ks_cv,
                    &ks->ks_lock, timeo);
                ks->ks_waiters--;
                if (error)
                        goto out;
        }
        ks->ks_value--;
out:
        ksem_release(ks, fd);
        return error;
}

int
sys__ksem_wait(struct lwp *l, const struct sys__ksem_wait_args *uap,
    register_t *retval)
{
        /* {
                intptr_t id;
        } */

        return do_ksem_wait(l, SCARG(uap, id), false, NULL);
}

int
sys__ksem_timedwait(struct lwp *l, const struct sys__ksem_timedwait_args *uap,
    register_t *retval)
{
        /* {
                intptr_t id;
                const struct timespec *abstime;
        } */
        struct timespec ts;
        int error;

        error = copyin(SCARG(uap, abstime), &ts, sizeof(ts));
        if (error != 0)
                return error;

        if (ts.tv_sec < 0 || ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000)
                return EINVAL;

        error = do_ksem_wait(l, SCARG(uap, id), false, &ts);
        if (error == EWOULDBLOCK)
                error = ETIMEDOUT;
        return error;
}

int
sys__ksem_trywait(struct lwp *l, const struct sys__ksem_trywait_args *uap,
    register_t *retval)
{
        /* {
                intptr_t id;
        } */

        return do_ksem_wait(l, SCARG(uap, id), true, NULL);
}

int
sys__ksem_getvalue(struct lwp *l, const struct sys__ksem_getvalue_args *uap,
    register_t *retval)
{
        /* {
                intptr_t id;
                unsigned int *value;
        } */
        int fd, error;
        ksem_t *ks;
        unsigned int val;

        error = ksem_get(SCARG(uap, id), &ks, &fd);
        if (error) {
                return error;
        }
        KASSERT(mutex_owned(&ks->ks_lock));
        val = ks->ks_value;
        ksem_release(ks, fd);

        return copyout(&val, SCARG(uap, value), sizeof(val));
}

int
sys__ksem_destroy(struct lwp *l, const struct sys__ksem_destroy_args *uap,
    register_t *retval)
{
        /* {
                intptr_t id;
        } */
        int fd, error;
        ksem_t *ks;

        intptr_t id = SCARG(uap, id);

        error = ksem_get(id, &ks, &fd);
        if (error) {
                return error;
        }
        KASSERT(mutex_owned(&ks->ks_lock));

        /* Operation is only for unnamed semaphores. */
        if (ks->ks_name != NULL) {
                error = EINVAL;
                goto out;
        }
        /* Cannot destroy if there are waiters. */
        if (ks->ks_waiters) {
                error = EBUSY;
                goto out;
        }
        if (KSEM_ID_IS_PSHARED(id)) {
                /* Cannot destroy if we did't create it. */
                KASSERT(fd == -1);
                KASSERT(ks->ks_pshared_proc != NULL);
                if (ks->ks_pshared_proc != curproc) {
                        error = EINVAL;
                        goto out;
                }
                fd = ks->ks_pshared_fd;

                /* Mark it dead so subsequent lookups fail. */
                ks->ks_pshared_proc = NULL;

                /* Do an fd_getfile() to for the benefit of fd_close(). */
                file_t *fp __diagused = fd_getfile(fd);
                KASSERT(fp != NULL);
                KASSERT(fp->f_ksem == ks);
        }
out:
        ksem_release(ks, -1);
        if (error) {
                if (!KSEM_ID_IS_PSHARED(id))
                        fd_putfile(fd);
                return error;
        }
        return fd_close(fd);
}
























































































    5 
    5 



    5 

    5 



    4 





    3 
    3 













































    1 


    1 































    1 
























    1 



















































    3 
    3 

    2 



























    1 

    1 
    1 

    1 




    1 

















    1 








    1 


















    6 







    6 
    6 

    4 

    3 



    1 































































































   14 










































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
/*        $NetBSD: sco_socket.c,v 1.38 2019/01/28 12:53:01 martin Exp $        */

/*-
 * Copyright (c) 2006 Itronix Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of Itronix Inc. may not be used to endorse
 *    or promote products derived from this software without specific
 *    prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sco_socket.c,v 1.38 2019/01/28 12:53:01 martin Exp $");

/* load symbolic names */
#ifdef BLUETOOTH_DEBUG
#define PRUREQUESTS
#define PRCOREQUESTS
#endif

#include <sys/param.h>
#include <sys/domain.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/systm.h>

#include <netbt/bluetooth.h>
#include <netbt/hci.h>
#include <netbt/sco.h>

/*******************************************************************************
 *
 * SCO SOCK_SEQPACKET sockets - low latency audio data
 */

static void sco_connecting(void *);
static void sco_connected(void *);
static void sco_disconnected(void *, int);
static void *sco_newconn(void *, struct sockaddr_bt *, struct sockaddr_bt *);
static void sco_complete(void *, int);
static void sco_linkmode(void *, int);
static void sco_input(void *, struct mbuf *);

static const struct btproto sco_proto = {
        sco_connecting,
        sco_connected,
        sco_disconnected,
        sco_newconn,
        sco_complete,
        sco_linkmode,
        sco_input,
};

int sco_sendspace = 4096;
int sco_recvspace = 4096;

static int
sco_attach(struct socket *so, int proto)
{
        int error;

        KASSERT(so->so_pcb == NULL);

        if (so->so_lock == NULL) {
                mutex_obj_hold(bt_lock);
                so->so_lock = bt_lock;
                solock(so);
        }
        KASSERT(solocked(so));

        error = soreserve(so, sco_sendspace, sco_recvspace);
        if (error) {
                return error;
        }
        return sco_attach_pcb((struct sco_pcb **)&so->so_pcb, &sco_proto, so);
}

static void
sco_detach(struct socket *so)
{
        KASSERT(so->so_pcb != NULL);
        sco_detach_pcb((struct sco_pcb **)&so->so_pcb);
        KASSERT(so->so_pcb == NULL);
}

static int
sco_accept(struct socket *so, struct sockaddr *nam)
{
        struct sco_pcb *pcb = so->so_pcb;

        KASSERT(solocked(so));
        KASSERT(nam != NULL);

        if (pcb == NULL)
                return EINVAL;

        return sco_peeraddr_pcb(pcb, (struct sockaddr_bt *)nam);
}

static int
sco_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct sco_pcb *pcb = so->so_pcb;
        struct sockaddr_bt *sa = (struct sockaddr_bt *)nam;

        KASSERT(solocked(so));
        KASSERT(nam != NULL);

        if (pcb == NULL)
                return EINVAL;

        if (sa->bt_len != sizeof(struct sockaddr_bt))
                return EINVAL;

        if (sa->bt_family != AF_BLUETOOTH)
                return EAFNOSUPPORT;

        return sco_bind_pcb(pcb, sa);
}

static int
sco_listen(struct socket *so, struct lwp *l)
{
        struct sco_pcb *pcb = so->so_pcb;

        KASSERT(solocked(so));

        if (pcb == NULL)
                return EINVAL;

        return sco_listen_pcb(pcb);
}

static int
sco_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct sco_pcb *pcb = so->so_pcb;
        struct sockaddr_bt *sa = (struct sockaddr_bt *)nam;

        KASSERT(solocked(so));
        KASSERT(nam != NULL);

        if (pcb == NULL)
                return EINVAL;

        if (sa->bt_len != sizeof(struct sockaddr_bt))
                return EINVAL;

        if (sa->bt_family != AF_BLUETOOTH)
                return EAFNOSUPPORT;

        soisconnecting(so);
        return sco_connect_pcb(pcb, sa);
}

static int
sco_connect2(struct socket *so, struct socket *so2)
{
        struct sco_pcb *pcb = so->so_pcb;

        KASSERT(solocked(so));

        if (pcb == NULL)
                return EINVAL;

        return EOPNOTSUPP;
}

static int
sco_disconnect(struct socket *so)
{
        struct sco_pcb *pcb = so->so_pcb;

        KASSERT(solocked(so));

        if (pcb == NULL)
                return EINVAL;

        soisdisconnecting(so);
        return sco_disconnect_pcb(pcb, so->so_linger);
}

static int
sco_shutdown(struct socket *so)
{
        KASSERT(solocked(so));

        socantsendmore(so);
        return 0;
}

static int
sco_abort(struct socket *so)
{
        struct sco_pcb *pcb = so->so_pcb;

        KASSERT(solocked(so));

        if (pcb == NULL)
                return EINVAL;

        sco_disconnect_pcb(pcb, 0);
        soisdisconnected(so);
        sco_detach(so);
        return 0;
}

static int
sco_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
{
        return EOPNOTSUPP;
}

static int
sco_stat(struct socket *so, struct stat *ub)
{
        KASSERT(solocked(so));

        return 0;
}

static int
sco_peeraddr(struct socket *so, struct sockaddr *nam)
{
        struct sco_pcb *pcb = (struct sco_pcb *)so->so_pcb;

        KASSERT(solocked(so));
        KASSERT(pcb != NULL);
        KASSERT(nam != NULL);

        return sco_peeraddr_pcb(pcb, (struct sockaddr_bt *)nam);
}

static int
sco_sockaddr(struct socket *so, struct sockaddr *nam)
{
        struct sco_pcb *pcb = (struct sco_pcb *)so->so_pcb;

        KASSERT(solocked(so));
        KASSERT(pcb != NULL);
        KASSERT(nam != NULL);

        return sco_sockaddr_pcb(pcb, (struct sockaddr_bt *)nam);
}

static int
sco_rcvd(struct socket *so, int flags, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
sco_recvoob(struct socket *so, struct mbuf *m, int flags)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
sco_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
    struct mbuf *control, struct lwp *l)
{
        struct sco_pcb *pcb = so->so_pcb;
        int err = 0;
        struct mbuf *m0;

        KASSERT(solocked(so));
        KASSERT(m != NULL);

        if (control) /* no use for that */
                m_freem(control);

        if (pcb == NULL) {
                err = EINVAL;
                goto release;
        }

        if (m->m_pkthdr.len == 0)
                goto release;

        if (m->m_pkthdr.len > pcb->sp_mtu) {
                err = EMSGSIZE;
                goto release;
        }

        m0 = m_copypacket(m, M_DONTWAIT);
        if (m0 == NULL) {
                err = ENOMEM;
                goto release;
        }

        sbappendrecord(&so->so_snd, m);
        return sco_send_pcb(pcb, m0);

release:
        m_freem(m);
        return err;
}

static int
sco_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
{
        KASSERT(solocked(so));

        m_freem(m);
        m_freem(control);

        return EOPNOTSUPP;
}

static int
sco_purgeif(struct socket *so, struct ifnet *ifp)
{

        return EOPNOTSUPP;
}

/*
 * get/set socket options
 */
int
sco_ctloutput(int req, struct socket *so, struct sockopt *sopt)
{
        struct sco_pcb *pcb = (struct sco_pcb *)so->so_pcb;
        int err = 0;

        DPRINTFN(2, "req %s\n", prcorequests[req]);

        if (pcb == NULL)
                return EINVAL;

        if (sopt->sopt_level != BTPROTO_SCO)
                return ENOPROTOOPT;

        switch(req) {
        case PRCO_GETOPT:
                err = sco_getopt(pcb, sopt);
                break;

        case PRCO_SETOPT:
                err = sco_setopt(pcb, sopt);
                break;

        default:
                err = ENOPROTOOPT;
                break;
        }

        return err;
}

/*****************************************************************************
 *
 *        SCO Protocol socket callbacks
 *
 */
static void
sco_connecting(void *arg)
{
        struct socket *so = arg;

        DPRINTF("Connecting\n");
        soisconnecting(so);
}

static void
sco_connected(void *arg)
{
        struct socket *so = arg;

        DPRINTF("Connected\n");
        soisconnected(so);
}

static void
sco_disconnected(void *arg, int err)
{
        struct socket *so = arg;

        DPRINTF("Disconnected (%d)\n", err);

        so->so_error = err;
        soisdisconnected(so);
}

static void *
sco_newconn(void *arg, struct sockaddr_bt *laddr,
    struct sockaddr_bt *raddr)
{
        struct socket *so = arg;

        DPRINTF("New Connection\n");
        so = sonewconn(so, false);
        if (so == NULL)
                return NULL;

        soisconnecting(so);
        return so->so_pcb;
}

static void
sco_complete(void *arg, int num)
{
        struct socket *so = arg;

        while (num-- > 0)
                sbdroprecord(&so->so_snd);

        sowwakeup(so);
}

static void
sco_linkmode(void *arg, int mode)
{
}

static void
sco_input(void *arg, struct mbuf *m)
{
        struct socket *so = arg;

        /*
         * since this data is time sensitive, if the buffer
         * is full we just dump data until the latest one
         * will fit.
         */

        while (m->m_pkthdr.len > sbspace(&so->so_rcv))
                sbdroprecord(&so->so_rcv);

        DPRINTFN(10, "received %d bytes\n", m->m_pkthdr.len);

        sbappendrecord(&so->so_rcv, m);
        sorwakeup(so);
}

PR_WRAP_USRREQS(sco)

#define        sco_attach                sco_attach_wrapper
#define        sco_detach                sco_detach_wrapper
#define        sco_accept                sco_accept_wrapper
#define        sco_bind                sco_bind_wrapper
#define        sco_listen                sco_listen_wrapper
#define        sco_connect                sco_connect_wrapper
#define        sco_connect2                sco_connect2_wrapper
#define        sco_disconnect                sco_disconnect_wrapper
#define        sco_shutdown                sco_shutdown_wrapper
#define        sco_abort                sco_abort_wrapper
#define        sco_ioctl                sco_ioctl_wrapper
#define        sco_stat                sco_stat_wrapper
#define        sco_peeraddr                sco_peeraddr_wrapper
#define        sco_sockaddr                sco_sockaddr_wrapper
#define        sco_rcvd                sco_rcvd_wrapper
#define        sco_recvoob                sco_recvoob_wrapper
#define        sco_send                sco_send_wrapper
#define        sco_sendoob                sco_sendoob_wrapper
#define        sco_purgeif                sco_purgeif_wrapper

const struct pr_usrreqs sco_usrreqs = {
        .pr_attach        = sco_attach,
        .pr_detach        = sco_detach,
        .pr_accept        = sco_accept,
        .pr_bind        = sco_bind,
        .pr_listen        = sco_listen,
        .pr_connect        = sco_connect,
        .pr_connect2        = sco_connect2,
        .pr_disconnect        = sco_disconnect,
        .pr_shutdown        = sco_shutdown,
        .pr_abort        = sco_abort,
        .pr_ioctl        = sco_ioctl,
        .pr_stat        = sco_stat,
        .pr_peeraddr        = sco_peeraddr,
        .pr_sockaddr        = sco_sockaddr,
        .pr_rcvd        = sco_rcvd,
        .pr_recvoob        = sco_recvoob,
        .pr_send        = sco_send,
        .pr_sendoob        = sco_sendoob,
        .pr_purgeif        = sco_purgeif,
};












































































 1604 
 1600 

 1603 


 1602 






































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
/*        $NetBSD: subr_fault.c,v 1.2 2020/06/30 16:28:17 maxv Exp $        */

/*
 * Copyright (c) 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Maxime Villard.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_fault.c,v 1.2 2020/06/30 16:28:17 maxv Exp $");

#include <sys/module.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>

#include <sys/conf.h>
#include <sys/types.h>
#include <sys/specificdata.h>
#include <sys/kmem.h>
#include <sys/atomic.h>
#include <sys/ioccom.h>
#include <sys/lwp.h>
#include <sys/fault.h>

typedef struct {
        volatile bool enabled;
        volatile bool oneshot;
        volatile unsigned long nth;
        volatile unsigned long cnt;
        volatile unsigned long nfaults;
} fault_t;

static fault_t fault_global __cacheline_aligned = {
        .enabled = false,
        .oneshot = false,
        .nth = FAULT_NTH_MIN,
        .cnt = 0,
        .nfaults = 0
};

static kmutex_t fault_global_lock __cacheline_aligned;
static specificdata_key_t fault_lwp_key;

/* -------------------------------------------------------------------------- */

bool
fault_inject(void)
{
        volatile unsigned long cnt;
        fault_t *f;

        if (__predict_false(cold))
                return false;

        if (__predict_false(atomic_load_acquire(&fault_global.enabled))) {
                f = &fault_global;
        } else {
                f = lwp_getspecific(fault_lwp_key);
                if (__predict_true(f == NULL))
                        return false;
                if (__predict_false(!f->enabled))
                        return false;
        }

        if (atomic_load_relaxed(&f->oneshot)) {
                if (__predict_true(atomic_load_relaxed(&f->nfaults) > 0))
                        return false;
        }

        cnt = atomic_inc_ulong_nv(&f->cnt);
        if (__predict_false(cnt % atomic_load_relaxed(&f->nth) == 0)) {
                atomic_inc_ulong(&f->nfaults);
                return true;
        }

        return false;
}

/* -------------------------------------------------------------------------- */

static int
fault_open(dev_t dev, int flag, int mode, struct lwp *l)
{
        return 0;
}

static int
fault_close(dev_t dev, int flag, int mode, struct lwp *l)
{
        return 0;
}

static int
fault_ioc_enable(struct fault_ioc_enable *args)
{
        fault_t *f;

        if (args->mode != FAULT_MODE_NTH_ONESHOT)
                return EINVAL;
        if (args->nth < FAULT_NTH_MIN)
                return EINVAL;

        switch (args->scope) {
        case FAULT_SCOPE_GLOBAL:
                mutex_enter(&fault_global_lock);
                if (fault_global.enabled) {
                        mutex_exit(&fault_global_lock);
                        return EEXIST;
                }
                fault_global.oneshot = true;
                atomic_store_relaxed(&fault_global.nth, args->nth);
                fault_global.cnt = 0;
                fault_global.nfaults = 0;
                atomic_store_release(&fault_global.enabled, true);
                mutex_exit(&fault_global_lock);
                break;
        case FAULT_SCOPE_LWP:
                f = lwp_getspecific(fault_lwp_key);
                if (f != NULL) {
                        if (f->enabled)
                                return EEXIST;
                } else {
                        f = kmem_zalloc(sizeof(*f), KM_SLEEP);
                        lwp_setspecific(fault_lwp_key, f);
                }
                f->oneshot = true;
                atomic_store_relaxed(&f->nth, args->nth);
                f->cnt = 0;
                f->nfaults = 0;
                atomic_store_release(&f->enabled, true);
                break;
        default:
                return EINVAL;
        }

        return 0;
}

static int
fault_ioc_disable(struct fault_ioc_disable *args)
{
        fault_t *f;

        switch (args->scope) {
        case FAULT_SCOPE_GLOBAL:
                mutex_enter(&fault_global_lock);
                if (!fault_global.enabled) {
                        mutex_exit(&fault_global_lock);
                        return ENOENT;
                }
                atomic_store_release(&fault_global.enabled, false);
                mutex_exit(&fault_global_lock);
                break;
        case FAULT_SCOPE_LWP:
                f = lwp_getspecific(fault_lwp_key);
                if (f == NULL)
                        return ENOENT;
                if (!f->enabled)
                        return ENOENT;
                atomic_store_release(&f->enabled, false);
                break;
        default:
                return EINVAL;
        }

        return 0;
}

static int
fault_ioc_getinfo(struct fault_ioc_getinfo *args)
{
        fault_t *f;

        switch (args->scope) {
        case FAULT_SCOPE_GLOBAL:
                args->nfaults = atomic_load_relaxed(&fault_global.nfaults);
                break;
        case FAULT_SCOPE_LWP:
                f = lwp_getspecific(fault_lwp_key);
                if (f == NULL)
                        return ENOENT;
                args->nfaults = atomic_load_relaxed(&f->nfaults);
                break;
        default:
                return EINVAL;
        }

        return 0;
}

static int
fault_ioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
{
        switch (cmd) {
        case FAULT_IOC_ENABLE:
                return fault_ioc_enable(addr);
        case FAULT_IOC_DISABLE:
                return fault_ioc_disable(addr);
        case FAULT_IOC_GETINFO:
                return fault_ioc_getinfo(addr);
        default:
                return EINVAL;
        }
}

const struct cdevsw fault_cdevsw = {
        .d_open = fault_open,
        .d_close = fault_close,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = fault_ioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER | D_MPSAFE
};

/* -------------------------------------------------------------------------- */

MODULE(MODULE_CLASS_MISC, fault, NULL);

static void
fault_lwp_free(void *arg)
{
        fault_t *f = (fault_t *)arg;

        if (f == NULL) {
                return;
        }

        kmem_free(f, sizeof(*f));
}

static void
fault_init(void)
{
        mutex_init(&fault_global_lock, MUTEX_DEFAULT, IPL_NONE);
        lwp_specific_key_create(&fault_lwp_key, fault_lwp_free);
}

static int
fault_modcmd(modcmd_t cmd, void *arg)
{
           switch (cmd) {
        case MODULE_CMD_INIT:
                fault_init();
                return 0;
        case MODULE_CMD_FINI:
                return EINVAL;
        default:
                return ENOTTY;
        }
}
























































































































































































    2 


    1 





    3 


    1 



    1 











































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
/*        $NetBSD: efs_vfsops.c,v 1.30 2022/03/19 13:53:32 hannken Exp $        */

/*
 * Copyright (c) 2006 Stephen M. Rumble <rumble@ephemeral.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: efs_vfsops.c,v 1.30 2022/03/19 13:53:32 hannken Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/fstypes.h>
#include <sys/vnode.h>
#include <sys/buf.h>
#include <sys/namei.h>
#include <sys/fcntl.h>
#include <sys/stat.h>
#include <sys/kauth.h>
#include <sys/proc.h>
#include <sys/module.h>

#include <miscfs/genfs/genfs_node.h>
#include <miscfs/genfs/genfs.h>

#include <miscfs/specfs/specdev.h>

#include <fs/efs/efs.h>
#include <fs/efs/efs_sb.h>
#include <fs/efs/efs_dir.h>
#include <fs/efs/efs_genfs.h>
#include <fs/efs/efs_mount.h>
#include <fs/efs/efs_extent.h>
#include <fs/efs/efs_dinode.h>
#include <fs/efs/efs_inode.h>
#include <fs/efs/efs_subr.h>

MODULE(MODULE_CLASS_VFS, efs, NULL);

MALLOC_JUSTDEFINE(M_EFSMNT, "efsmnt", "efs mount structure");
MALLOC_JUSTDEFINE(M_EFSINO, "efsino", "efs in-core inode structure");
MALLOC_JUSTDEFINE(M_EFSTMP, "efstmp", "efs temporary allocations");

extern int (**efs_vnodeop_p)(void *);         /* for getnewvnode() */
extern int (**efs_specop_p)(void *);         /* for getnewvnode() */
extern int (**efs_fifoop_p)(void *);         /* for getnewvnode() */
static int efs_statvfs(struct mount *, struct statvfs *);

/*
 * efs_mount and efs_mountroot common functions.
 */
static int
efs_mount_common(struct mount *mp, const char *path, struct vnode *devvp,
    struct efs_args *args)
{
        int err;
        struct buf *bp;
        const char *why;
        struct efs_mount *emp;
        struct lwp *l = curlwp;

        emp = malloc(sizeof(*emp), M_EFSMNT, M_WAITOK);
        emp->em_dev = devvp->v_rdev;
        emp->em_devvp = devvp;
        emp->em_mnt = mp;

        /* read in the superblock */
        err = efs_bread(emp, EFS_BB_SB, l, &bp);
        if (err) {
                EFS_DPRINTF(("superblock read failed\n"));
                free(emp, M_EFSMNT);
                return (err);
        }
        memcpy(&emp->em_sb, bp->b_data, sizeof(emp->em_sb));
        brelse(bp, 0);

        /* validate the superblock */
        if (efs_sb_validate(&emp->em_sb, &why)) {
                printf("efs: invalid superblock: %s\n", why);
                if (!(mp->mnt_flag & MNT_FORCE)) {
                        free(emp, M_EFSMNT);
                        return (EIO);
                }
        }

        /* check that it's clean */
        if (be16toh(emp->em_sb.sb_dirty) != EFS_SB_CLEAN) {
                printf("efs: filesystem is dirty (sb_dirty = 0x%x); please "
                    "run fsck_efs(8)\n", be16toh(emp->em_sb.sb_dirty));
                /* XXX - default to readonly unless forced?? */
        }

        /* if the superblock was replicated, verify that it is the same */
        if (be32toh(emp->em_sb.sb_replsb) != 0) {
                struct buf *rbp;
                bool skip = false;

                err = efs_bread(emp, be32toh(emp->em_sb.sb_replsb), l, &rbp);
                if (err) {
                        printf("efs: read of superblock replicant failed; "
                            "please run fsck_efs(8)\n");
                        if (mp->mnt_flag & MNT_FORCE) {
                                skip = true;
                        } else {
                                free(emp, M_EFSMNT);
                                return (err);
                        }
                }

                if (!skip) {
                        if (memcmp(rbp->b_data, &emp->em_sb,
                            sizeof(emp->em_sb))) {
                                printf("efs: superblock differs from "
                                    "replicant; please run fsck_efs(8)\n");
                                if (!(mp->mnt_flag & MNT_FORCE)) {
                                        brelse(rbp, 0);
                                        free(emp, M_EFSMNT);
                                        return (EIO);
                                }
                        }
                        brelse(rbp, 0);
                }
        }

        /* ensure we can read last block */
        err = efs_bread(emp, be32toh(emp->em_sb.sb_size) - 1, l, &bp);
        if (err) {
                printf("efs: cannot access all filesystem blocks; please run "
                    "fsck_efs(8)\n");
                if (!(mp->mnt_flag & MNT_FORCE)) {
                        free(emp, M_EFSMNT);
                        return (err);
                }
        } else {
                brelse(bp, 0);
        }

        mp->mnt_data = emp;
        mp->mnt_flag |= MNT_LOCAL;
        mp->mnt_fs_bshift = EFS_BB_SHFT;
        mp->mnt_dev_bshift = DEV_BSHIFT;
        vfs_getnewfsid(mp);
        efs_statvfs(mp, &mp->mnt_stat);

        err = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
            UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
        if (err)
                free(emp, M_EFSMNT);

        return (err);
}

/*
 * mount syscall vfsop.
 *
 * Returns 0 on success.
 */
static int
efs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct lwp *l = curlwp;
        struct efs_args *args = data;
        struct pathbuf *pb;
        struct nameidata devnd;
        struct efs_mount *emp;
        struct vnode *devvp;
        int err, mode;

        if (args == NULL)
                return EINVAL;
        if (*data_len < sizeof *args)
                return EINVAL;

        if (mp->mnt_flag & MNT_GETARGS) {
                if ((emp = VFSTOEFS(mp)) == NULL)
                        return (EIO);
                args->fspec = NULL;
                args->version = EFS_MNT_VERSION;
                *data_len = sizeof *args;
                return 0;
        }

        if (mp->mnt_flag & MNT_UPDATE)
                return (EOPNOTSUPP);        /* XXX read-only */

        /* look up our device's vnode. it is returned locked */
        err = pathbuf_copyin(args->fspec, &pb);
        if (err) {
                return err;
        }
        NDINIT(&devnd, LOOKUP, FOLLOW | LOCKLEAF, pb);
        if ((err = namei(&devnd))) {
                pathbuf_destroy(pb);
                return (err);
        }

        devvp = devnd.ni_vp;
        pathbuf_destroy(pb);

        if (devvp->v_type != VBLK) {
                vput(devvp);
                return (ENOTBLK);
        }

        /* XXX - rdonly */
        mode = FREAD;

        /*
         * If mount by non-root, then verify that user has necessary
         * permissions on the device.
         */
        err = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
            KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(VREAD));
        if (err) {
                vput(devvp);
                return (err);
        }

        if ((err = VOP_OPEN(devvp, mode, l->l_cred))) {
                vput(devvp);
                return (err);
        }

        err = efs_mount_common(mp, path, devvp, args);
        if (err) {
                VOP_CLOSE(devvp, mode, l->l_cred);
                vput(devvp);
                return (err);
        }

        VOP_UNLOCK(devvp);

        return (0);
}

/*
 * Initialisation routine.
 *
 * Returns 0 on success.
 */
static int
efs_start(struct mount *mp, int flags)
{

        return (0);
}

/*
 * unmount syscall vfsop. 
 *
 * Returns 0 on success.
 */
static int
efs_unmount(struct mount *mp, int mntflags)
{
        struct efs_mount *emp;
        struct lwp *l = curlwp;
        int err;

        emp = VFSTOEFS(mp);

        err = vflush(mp, NULL, (mntflags & MNT_FORCE) ? FORCECLOSE : 0);
        if (err)
                return (err);

        cache_purgevfs(mp);

        vn_lock(emp->em_devvp, LK_EXCLUSIVE | LK_RETRY);
        err = VOP_CLOSE(emp->em_devvp, FREAD, l->l_cred);
        vput(emp->em_devvp);

        free(mp->mnt_data, M_EFSMNT);
        mp->mnt_data = NULL;
        mp->mnt_flag &= ~MNT_LOCAL;

        return (err);
}

/*
 * Return the root vnode.
 *
 * Returns 0 on success.
 */
static int
efs_root(struct mount *mp, int lktype, struct vnode **vpp)
{
        int err;
        struct vnode *vp;
        
        if ((err = VFS_VGET(mp, EFS_ROOTINO, lktype, &vp)))
                return (err);

        *vpp = vp;
        return (0);
}

/*
 * statvfs syscall vfsop.
 *
 * Returns 0 on success.
 */
static int
efs_statvfs(struct mount *mp, struct statvfs *sbp)
{
        struct efs_mount *emp;
        
        emp = VFSTOEFS(mp);
        sbp->f_bsize        = EFS_BB_SIZE;
        sbp->f_frsize        = EFS_BB_SIZE;
        sbp->f_iosize        = EFS_BB_SIZE;
        sbp->f_blocks        = be32toh(emp->em_sb.sb_size);
        sbp->f_bfree        = be32toh(emp->em_sb.sb_tfree);
        sbp->f_bavail        = sbp->f_bfree; // XXX same?? 
        sbp->f_bresvd        = 0;
        sbp->f_files        = be32toh(emp->em_sb.sb_tinode);
        sbp->f_ffree        = be16toh(emp->em_sb.sb_cgisize) *
                          be16toh(emp->em_sb.sb_ncg) *
                          EFS_DINODES_PER_BB;
        sbp->f_favail        = sbp->f_ffree; // XXX same??
        sbp->f_fresvd        = 0;
        sbp->f_namemax        = EFS_DIRENT_NAMELEN_MAX;
        copy_statvfs_info(sbp, mp);

        return (0);
}

/*
 * Obtain a locked vnode for the given on-disk inode number.
 *
 * Returns 0 on success.
 */
static int
efs_vget(struct mount *mp, ino_t ino, int lktype, struct vnode **vpp)
{
        int error;

        error = vcache_get(mp, &ino, sizeof(ino), vpp);
        if (error)
                return error;
        error = vn_lock(*vpp, lktype);
        if (error) {
                vrele(*vpp);
                *vpp = NULL;
                return error;
        }
        return 0;
}

/*
 * Initialize this vnode / inode pair.
 * Caller assures no other thread will try to load this inode.
 */
static int
efs_loadvnode(struct mount *mp, struct vnode *vp,
    const void *key, size_t key_len, const void **new_key)
{
        int error;
        ino_t ino;
        struct efs_inode *eip;
        struct efs_mount *emp;

        KASSERT(key_len == sizeof(ino));
        memcpy(&ino, key, key_len);
        emp = VFSTOEFS(mp);

        eip = pool_get(&efs_inode_pool, PR_WAITOK);
        eip->ei_mode = 0;
        eip->ei_lockf = NULL;
        eip->ei_number = ino;
        eip->ei_dev = emp->em_dev;
        eip->ei_vp = vp;

        error = efs_read_inode(emp, ino, NULL, &eip->ei_di);
        if (error) {
                pool_put(&efs_inode_pool, eip);
                return error;
        }

        efs_sync_dinode_to_inode(eip);

        if (ino == EFS_ROOTINO && !S_ISDIR(eip->ei_mode)) {
                printf("efs: root inode (%lu) is not a directory!\n",
                    (ulong)EFS_ROOTINO);
                pool_put(&efs_inode_pool, eip);
                return EIO;
        }

        switch (eip->ei_mode & S_IFMT) {
        case S_IFIFO:
                vp->v_type = VFIFO;
                vp->v_op = efs_fifoop_p;
                break;
        case S_IFCHR:
                vp->v_type = VCHR;
                vp->v_op = efs_specop_p;
                spec_node_init(vp, eip->ei_dev);
                break;
        case S_IFDIR:
                vp->v_type = VDIR;
                vp->v_op = efs_vnodeop_p;
                if (ino == EFS_ROOTINO)
                        vp->v_vflag |= VV_ROOT;
                break;
        case S_IFBLK:
                vp->v_type = VBLK;
                vp->v_op = efs_specop_p;
                spec_node_init(vp, eip->ei_dev);
                break;
        case S_IFREG:
                vp->v_type = VREG;
                vp->v_op = efs_vnodeop_p;
                break;
        case S_IFLNK:
                vp->v_type = VLNK;
                vp->v_op = efs_vnodeop_p;
                break;
        case S_IFSOCK:
                vp->v_type = VSOCK;
                vp->v_op = efs_vnodeop_p;
                break;
        default:
                printf("efs: invalid mode 0x%x in inode %lu on mount %s\n",
                    eip->ei_mode, (ulong)ino, mp->mnt_stat.f_mntonname);
                pool_put(&efs_inode_pool, eip);
                return EIO;
        }

        vp->v_tag = VT_EFS;
        vp->v_data = eip;
        genfs_node_init(vp, &efs_genfsops);
        uvm_vnp_setsize(vp, eip->ei_size);
        *new_key = &eip->ei_number;
        return 0;
}

/*
 * Convert the provided opaque, unique file handle into a vnode.
 *
 * Returns 0 on success.
 */
static int
efs_fhtovp(struct mount *mp, struct fid *fhp, int lktype, struct vnode **vpp)
{
        int err;
        struct vnode *vp;
        struct efs_fid *efp;
        struct efs_inode *eip;

        if (fhp->fid_len != sizeof(struct efs_fid)) 
                return (EINVAL);

        efp = (struct efs_fid *)fhp;

        if ((err = VFS_VGET(mp, efp->ef_ino, lktype, &vp))) {
                *vpp = NULL;
                return (err);
        }

        eip = EFS_VTOI(vp);
        if (eip->ei_mode == 0 || eip->ei_gen != efp->ef_gen) {
                vput(vp);
                *vpp = NULL;
                return (ESTALE);
        }

        *vpp = vp;
        return (0);
}

/*
 * Convert the provided vnode into an opaque, unique file handle.
 *
 * Returns 0 on success.
 */
static int
efs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
{
        struct efs_fid *efp;
        struct efs_inode *eip;

        if (*fh_size < sizeof(struct efs_fid)) {
                *fh_size = sizeof(struct efs_fid);
                return (E2BIG);
        }
        *fh_size = sizeof(struct efs_fid);

        eip = EFS_VTOI(vp);
        efp = (struct efs_fid *)fhp;

        fhp->fid_len = sizeof(struct efs_fid);
        efp->ef_ino = eip->ei_number;
        efp->ef_gen = eip->ei_gen;

        return (0);
}

/*
 * Globally initialise the filesystem.
 */
static void 
efs_init(void)
{

        malloc_type_attach(M_EFSMNT);
        malloc_type_attach(M_EFSINO);
        malloc_type_attach(M_EFSTMP);
        pool_init(&efs_inode_pool, sizeof(struct efs_inode), 0, 0, 0,
            "efsinopl", &pool_allocator_nointr, IPL_NONE);
}

/*
 * Globally reinitialise the filesystem.
 */
static void 
efs_reinit(void)
{
        
}

/*
 * Globally clean up the filesystem.
 */
static void 
efs_done(void)
{
        
        pool_destroy(&efs_inode_pool);
        malloc_type_detach(M_EFSMNT);
        malloc_type_detach(M_EFSINO);
        malloc_type_detach(M_EFSTMP);
}

extern const struct vnodeopv_desc efs_vnodeop_opv_desc;
extern const struct vnodeopv_desc efs_specop_opv_desc;
extern const struct vnodeopv_desc efs_fifoop_opv_desc;

const struct vnodeopv_desc * const efs_vnodeopv_descs[] = {
        &efs_vnodeop_opv_desc,
        &efs_specop_opv_desc,
        &efs_fifoop_opv_desc,
        NULL
};

struct vfsops efs_vfsops = {
        .vfs_name        = MOUNT_EFS,
        .vfs_min_mount_data = sizeof (struct efs_args),
        .vfs_mount        = efs_mount,
        .vfs_start        = efs_start,
        .vfs_unmount        = efs_unmount,
        .vfs_root        = efs_root,
        .vfs_quotactl        = (void *)eopnotsupp,
        .vfs_statvfs        = efs_statvfs,
        .vfs_sync        = (void *)nullop,
        .vfs_vget        = efs_vget,
        .vfs_loadvnode        = efs_loadvnode,
        .vfs_fhtovp        = efs_fhtovp,
        .vfs_vptofh        = efs_vptofh,
        .vfs_init        = efs_init,
        .vfs_reinit        = efs_reinit,
        .vfs_done        = efs_done,
        .vfs_mountroot        = (void *)eopnotsupp,
        .vfs_snapshot        = (void *)eopnotsupp,
        .vfs_extattrctl        = vfs_stdextattrctl,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_opv_descs        = efs_vnodeopv_descs
/*        .vfs_refcount */
/*        .vfs_list */
};

static int
efs_modcmd(modcmd_t cmd, void *arg)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                return vfs_attach(&efs_vfsops);
        case MODULE_CMD_FINI:
                return vfs_detach(&efs_vfsops);
        default:
                return ENOTTY;
        }
}

















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 













    1 













































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
/*        $NetBSD: kern_ksyms.c,v 1.107 2022/07/15 06:40:24 mrg Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software developed for The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2001, 2003 Anders Magnusson (ragge@ludd.luth.se).
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Code to deal with in-kernel symbol table management + /dev/ksyms.
 *
 * For each loaded module the symbol table info is kept track of by a
 * struct, placed in a circular list. The first entry is the kernel
 * symbol table.
 */

/*
 * TODO:
 *
 *        Add support for mmap, poll.
 *        Constify tables.
 *        Constify db_symtab and move it to .rodata.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_ksyms.c,v 1.107 2022/07/15 06:40:24 mrg Exp $");

#if defined(_KERNEL) && defined(_KERNEL_OPT)
#include "opt_copy_symtab.h"
#include "opt_ddb.h"
#include "opt_dtrace.h"
#endif

#define _KSYMS_PRIVATE

#include <sys/param.h>
#include <sys/queue.h>
#include <sys/exec.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/kauth.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/kmem.h>
#include <sys/proc.h>
#include <sys/atomic.h>
#include <sys/ksyms.h>
#include <sys/kernel.h>
#include <sys/intr.h>
#include <sys/pserialize.h>
#include <sys/stat.h>

#include <uvm/uvm_extern.h>

#ifdef DDB
#include <ddb/db_output.h>
#endif

#include "ksyms.h"
#if NKSYMS > 0
#include "ioconf.h"
#endif

struct ksyms_snapshot {
        uint64_t                ks_refcnt;
        uint64_t                ks_gen;
        struct uvm_object        *ks_uobj;
        size_t                        ks_size;
        dev_t                        ks_dev;
        int                        ks_maxlen;
};

#define KSYMS_MAX_ID        98304
#ifdef KDTRACE_HOOKS
static uint32_t ksyms_nmap[KSYMS_MAX_ID];        /* sorted symbol table map */
#else
static uint32_t *ksyms_nmap = NULL;
#endif

static int ksyms_maxlen;
static bool ksyms_initted;
static bool ksyms_loaded;
static kmutex_t ksyms_lock __cacheline_aligned;
static struct ksyms_symtab kernel_symtab;
static kcondvar_t ksyms_cv;
static struct lwp *ksyms_snapshotting;
static struct ksyms_snapshot *ksyms_snapshot;
static uint64_t ksyms_snapshot_gen;
static pserialize_t ksyms_psz __read_mostly;

static void ksyms_hdr_init(const void *);
static void ksyms_sizes_calc(void);
static struct ksyms_snapshot *ksyms_snapshot_alloc(int, size_t, dev_t,
    uint64_t);
static void ksyms_snapshot_release(struct ksyms_snapshot *);

#ifdef KSYMS_DEBUG
#define        FOLLOW_CALLS                1
#define        FOLLOW_MORE_CALLS        2
#define        FOLLOW_DEVKSYMS                4
static int ksyms_debug;
#endif

#define                SYMTAB_FILLER        "|This is the symbol table!"

#ifdef makeoptions_COPY_SYMTAB
extern char db_symtab[];
extern int db_symtabsize;
#endif

/*
 * used by savecore(8) so non-static
 */
struct ksyms_hdr ksyms_hdr;
int ksyms_symsz;
int ksyms_strsz;
int ksyms_ctfsz;        /* this is not currently used by savecore(8) */
TAILQ_HEAD(ksyms_symtab_queue, ksyms_symtab) ksyms_symtabs =
    TAILQ_HEAD_INITIALIZER(ksyms_symtabs);
static struct pslist_head ksyms_symtabs_psz = PSLIST_INITIALIZER;

static int
ksyms_verify(const void *symstart, const void *strstart)
{
#if defined(DIAGNOSTIC) || defined(DEBUG)
        if (symstart == NULL)
                printf("ksyms: Symbol table not found\n");
        if (strstart == NULL)
                printf("ksyms: String table not found\n");
        if (symstart == NULL || strstart == NULL)
                printf("ksyms: Perhaps the kernel is stripped?\n");
#endif
        if (symstart == NULL || strstart == NULL)
                return 0;
        return 1;
}

/*
 * Finds a certain symbol name in a certain symbol table.
 */
static Elf_Sym *
findsym(const char *name, struct ksyms_symtab *table, int type)
{
        Elf_Sym *sym, *maxsym;
        int low, mid, high, nglob;
        char *str, *cmp;

        sym = table->sd_symstart;
        str = table->sd_strstart - table->sd_usroffset;
        nglob = table->sd_nglob;
        low = 0;
        high = nglob;

        /*
         * Start with a binary search of all global symbols in this table.
         * Global symbols must have unique names.
         */
        while (low < high) {
                mid = (low + high) >> 1;
                cmp = sym[mid].st_name + str;
                if (cmp[0] < name[0] || strcmp(cmp, name) < 0) {
                        low = mid + 1;
                } else {
                        high = mid;
                }
        }
        KASSERT(low == high);
        if (__predict_true(low < nglob &&
            strcmp(sym[low].st_name + str, name) == 0)) {
                KASSERT(ELF_ST_BIND(sym[low].st_info) == STB_GLOBAL);
                return &sym[low];
        }

        /*
         * Perform a linear search of local symbols (rare).  Many local
         * symbols with the same name can exist so are not included in
         * the binary search.
         */
        if (type != KSYMS_EXTERN) {
                maxsym = sym + table->sd_symsize / sizeof(Elf_Sym);
                for (sym += nglob; sym < maxsym; sym++) {
                        if (strcmp(name, sym->st_name + str) == 0) {
                                return sym;
                        }
                }
        }
        return NULL;
}

/*
 * The "attach" is in reality done in ksyms_init().
 */
#if NKSYMS > 0
/*
 * ksyms can be loaded even if the kernel has a missing "pseudo-device ksyms"
 * statement because ddb and modules require it. Fixing it properly requires
 * fixing config to warn about required, but missing preudo-devices. For now,
 * if we don't have the pseudo-device we don't need the attach function; this
 * is fine, as it does nothing.
 */
void
ksymsattach(int arg)
{
}
#endif

void
ksyms_init(void)
{

#ifdef makeoptions_COPY_SYMTAB
        if (!ksyms_loaded &&
            strncmp(db_symtab, SYMTAB_FILLER, sizeof(SYMTAB_FILLER))) {
                ksyms_addsyms_elf(db_symtabsize, db_symtab,
                    db_symtab + db_symtabsize);
        }
#endif

        if (!ksyms_initted) {
                mutex_init(&ksyms_lock, MUTEX_DEFAULT, IPL_NONE);
                cv_init(&ksyms_cv, "ksyms");
                ksyms_psz = pserialize_create();
                ksyms_initted = true;
        }
}

/*
 * Are any symbols available?
 */
bool
ksyms_available(void)
{

        return ksyms_loaded;
}

/*
 * Add a symbol table.
 * This is intended for use when the symbol table and its corresponding
 * string table are easily available.  If they are embedded in an ELF
 * image, use addsymtab_elf() instead.
 *
 * name - Symbol's table name.
 * symstart, symsize - Address and size of the symbol table.
 * strstart, strsize - Address and size of the string table.
 * tab - Symbol table to be updated with this information.
 * newstart - Address to which the symbol table has to be copied during
 *            shrinking.  If NULL, it is not moved.
 */
static const char *addsymtab_strstart;

static int
addsymtab_compar(const void *a, const void *b)
{
        const Elf_Sym *sa, *sb;

        sa = a;
        sb = b;

        /*
         * Split the symbol table into two, with globals at the start
         * and locals at the end.
         */
        if (ELF_ST_BIND(sa->st_info) != ELF_ST_BIND(sb->st_info)) {
                if (ELF_ST_BIND(sa->st_info) == STB_GLOBAL) {
                        return -1;
                }
                if (ELF_ST_BIND(sb->st_info) == STB_GLOBAL) {
                        return 1;
                }
        }

        /* Within each band, sort by name. */
        return strcmp(sa->st_name + addsymtab_strstart,
            sb->st_name + addsymtab_strstart);
}

static void
addsymtab(const char *name, void *symstart, size_t symsize,
          void *strstart, size_t strsize, struct ksyms_symtab *tab,
          void *newstart, void *ctfstart, size_t ctfsize, uint32_t *nmap)
{
        Elf_Sym *sym, *nsym, ts;
        int i, j, n, nglob;
        char *str;
        int nsyms = symsize / sizeof(Elf_Sym);
        int s;

        /* Sanity check for pre-allocated map table used during startup. */
        if ((nmap == ksyms_nmap) && (nsyms >= KSYMS_MAX_ID)) {
                printf("kern_ksyms: ERROR %d > %d, increase KSYMS_MAX_ID\n",
                    nsyms, KSYMS_MAX_ID);

                /* truncate for now */
                nsyms = KSYMS_MAX_ID - 1;
        }

        tab->sd_symstart = symstart;
        tab->sd_symsize = symsize;
        tab->sd_strstart = strstart;
        tab->sd_strsize = strsize;
        tab->sd_name = name;
        tab->sd_minsym = UINTPTR_MAX;
        tab->sd_maxsym = 0;
        tab->sd_usroffset = 0;
        tab->sd_ctfstart = ctfstart;
        tab->sd_ctfsize = ctfsize;
        tab->sd_nmap = nmap;
        tab->sd_nmapsize = nsyms;
#ifdef KSYMS_DEBUG
        printf("newstart %p sym %p ksyms_symsz %zu str %p strsz %zu send %p\n",
            newstart, symstart, symsize, strstart, strsize,
            tab->sd_strstart + tab->sd_strsize);
#endif

        if (nmap) {
                memset(nmap, 0, nsyms * sizeof(uint32_t));
        }

        /* Pack symbol table by removing all file name references. */
        sym = tab->sd_symstart;
        nsym = (Elf_Sym *)newstart;
        str = tab->sd_strstart;
        nglob = 0;
        for (i = n = 0; i < nsyms; i++) {

                /*
                 * This breaks CTF mapping, so don't do it when
                 * DTrace is enabled.
                 */
#ifndef KDTRACE_HOOKS
                /*
                 * Remove useless symbols.
                 * Should actually remove all typeless symbols.
                 */
                if (sym[i].st_name == 0)
                        continue; /* Skip nameless entries */
                if (sym[i].st_shndx == SHN_UNDEF)
                        continue; /* Skip external references */
                if (ELF_ST_TYPE(sym[i].st_info) == STT_FILE)
                        continue; /* Skip filenames */
                if (ELF_ST_TYPE(sym[i].st_info) == STT_NOTYPE &&
                    sym[i].st_value == 0 &&
                    strcmp(str + sym[i].st_name, "*ABS*") == 0)
                        continue; /* XXX */
                if (ELF_ST_TYPE(sym[i].st_info) == STT_NOTYPE &&
                    strcmp(str + sym[i].st_name, "gcc2_compiled.") == 0)
                        continue; /* XXX */
#endif

                /* Save symbol. Set it as an absolute offset */
                nsym[n] = sym[i];

#ifdef KDTRACE_HOOKS
                if (nmap != NULL) {
                        /*
                         * Save the size, replace it with the symbol id so
                         * the mapping can be done after the cleanup and sort.
                         */
                        nmap[i] = nsym[n].st_size;
                        nsym[n].st_size = i + 1;        /* zero is reserved */
                }
#endif

                if (sym[i].st_shndx != SHN_ABS) {
                        nsym[n].st_shndx = SHBSS;
                } else {
                        /* SHN_ABS is a magic value, don't overwrite it */
                }

                j = strlen(nsym[n].st_name + str) + 1;
                if (j > ksyms_maxlen)
                        ksyms_maxlen = j;
                nglob += (ELF_ST_BIND(nsym[n].st_info) == STB_GLOBAL);

                /* Compute min and max symbols. */
                if (strcmp(str + sym[i].st_name, "*ABS*") != 0
                    && ELF_ST_TYPE(nsym[n].st_info) != STT_NOTYPE) {
                        if (nsym[n].st_value < tab->sd_minsym) {
                                tab->sd_minsym = nsym[n].st_value;
                        }
                        if (nsym[n].st_value > tab->sd_maxsym) {
                                tab->sd_maxsym = nsym[n].st_value;
                        }
                }
                n++;
        }

        /* Fill the rest of the record, and sort the symbols. */
        tab->sd_symstart = nsym;
        tab->sd_symsize = n * sizeof(Elf_Sym);
        tab->sd_nglob = nglob;

        addsymtab_strstart = str;
        if (kheapsort(nsym, n, sizeof(Elf_Sym), addsymtab_compar, &ts) != 0)
                panic("addsymtab");

#ifdef KDTRACE_HOOKS
        /*
         * Build the mapping from original symbol id to new symbol table.
         * Deleted symbols will have a zero map, indices will be one based
         * instead of zero based.
         * Resulting map is sd_nmap[original_index] = new_index + 1
         */
        if (nmap != NULL) {
                int new;
                for (new = 0; new < n; new++) {
                        uint32_t orig = nsym[new].st_size - 1;
                        uint32_t size = nmap[orig];

                        nmap[orig] = new + 1;

                        /* restore the size */
                        nsym[new].st_size = size;
                }
        }
#endif

        KASSERT(strcmp(name, "netbsd") == 0 || mutex_owned(&ksyms_lock));
        KASSERT(cold || mutex_owned(&ksyms_lock));

        /*
         * Publish the symtab.  Do this at splhigh to ensure ddb never
         * witnesses an inconsistent state of the queue, unless memory
         * is so corrupt that we crash in PSLIST_WRITER_INSERT_AFTER or
         * TAILQ_INSERT_TAIL.
         */
        PSLIST_ENTRY_INIT(tab, sd_pslist);
        s = splhigh();
        if (TAILQ_EMPTY(&ksyms_symtabs)) {
                PSLIST_WRITER_INSERT_HEAD(&ksyms_symtabs_psz, tab, sd_pslist);
        } else {
                struct ksyms_symtab *last;

                last = TAILQ_LAST(&ksyms_symtabs, ksyms_symtab_queue);
                PSLIST_WRITER_INSERT_AFTER(last, tab, sd_pslist);
        }
        TAILQ_INSERT_TAIL(&ksyms_symtabs, tab, sd_queue);
        splx(s);

        ksyms_sizes_calc();
        ksyms_loaded = true;
}

/*
 * Setup the kernel symbol table stuff.
 */
void
ksyms_addsyms_elf(int symsize, void *start, void *end)
{
        int i, j;
        Elf_Shdr *shdr;
        char *symstart = NULL, *strstart = NULL;
        size_t strsize = 0;
        Elf_Ehdr *ehdr;
        char *ctfstart = NULL;
        size_t ctfsize = 0;

        if (symsize <= 0) {
                printf("[ Kernel symbol table missing! ]\n");
                return;
        }

        /* Sanity check */
        if (ALIGNED_POINTER(start, long) == 0) {
                printf("[ Kernel symbol table has bad start address %p ]\n",
                    start);
                return;
        }

        ehdr = (Elf_Ehdr *)start;

        /* check if this is a valid ELF header */
        /* No reason to verify arch type, the kernel is actually running! */
        if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) ||
            ehdr->e_ident[EI_CLASS] != ELFCLASS ||
            ehdr->e_version > 1) {
                printf("[ Kernel symbol table invalid! ]\n");
                return; /* nothing to do */
        }

        /* Loaded header will be scratched in addsymtab */
        ksyms_hdr_init(start);

        /* Find the symbol table and the corresponding string table. */
        shdr = (Elf_Shdr *)((uint8_t *)start + ehdr->e_shoff);
        for (i = 1; i < ehdr->e_shnum; i++) {
                if (shdr[i].sh_type != SHT_SYMTAB)
                        continue;
                if (shdr[i].sh_offset == 0)
                        continue;
                symstart = (uint8_t *)start + shdr[i].sh_offset;
                symsize = shdr[i].sh_size;
                j = shdr[i].sh_link;
                if (shdr[j].sh_offset == 0)
                        continue; /* Can this happen? */
                strstart = (uint8_t *)start + shdr[j].sh_offset;
                strsize = shdr[j].sh_size;
                break;
        }

#ifdef KDTRACE_HOOKS
        /* Find the CTF section */
        shdr = (Elf_Shdr *)((uint8_t *)start + ehdr->e_shoff);
        if (ehdr->e_shstrndx != 0) {
                char *shstr = (uint8_t *)start +
                    shdr[ehdr->e_shstrndx].sh_offset;
                for (i = 1; i < ehdr->e_shnum; i++) {
#ifdef KSYMS_DEBUG
                        printf("ksyms: checking %s\n", &shstr[shdr[i].sh_name]);
#endif
                        if (shdr[i].sh_type != SHT_PROGBITS)
                                continue;
                        if (strncmp(".SUNW_ctf", &shstr[shdr[i].sh_name], 10)
                            != 0)
                                continue;
                        ctfstart = (uint8_t *)start + shdr[i].sh_offset;
                        ctfsize = shdr[i].sh_size;
                        ksyms_ctfsz = ctfsize;
#ifdef DEBUG
                        aprint_normal("Found CTF at %p, size 0x%zx\n",
                            ctfstart, ctfsize);
#endif
                        break;
                }
#ifdef DEBUG
        } else {
                printf("ksyms: e_shstrndx == 0\n");
#endif
        }
#endif

        if (!ksyms_verify(symstart, strstart))
                return;

        addsymtab("netbsd", symstart, symsize, strstart, strsize,
            &kernel_symtab, symstart, ctfstart, ctfsize, ksyms_nmap);

#ifdef DEBUG
        aprint_normal("Loaded initial symtab at %p, strtab at %p, # entries %ld\n",
            kernel_symtab.sd_symstart, kernel_symtab.sd_strstart,
            (long)kernel_symtab.sd_symsize/sizeof(Elf_Sym));
#endif

        /* Should be no snapshot to invalidate yet.  */
        KASSERT(ksyms_snapshot == NULL);
}

/*
 * Setup the kernel symbol table stuff.
 * Use this when the address of the symbol and string tables are known;
 * otherwise use ksyms_init with an ELF image.
 * We need to pass a minimal ELF header which will later be completed by
 * ksyms_hdr_init and handed off to userland through /dev/ksyms.  We use
 * a void *rather than a pointer to avoid exposing the Elf_Ehdr type.
 */
void
ksyms_addsyms_explicit(void *ehdr, void *symstart, size_t symsize,
    void *strstart, size_t strsize)
{
        if (!ksyms_verify(symstart, strstart))
                return;

        ksyms_hdr_init(ehdr);
        addsymtab("netbsd", symstart, symsize, strstart, strsize,
            &kernel_symtab, symstart, NULL, 0, ksyms_nmap);

        /* Should be no snapshot to invalidate yet.  */
        KASSERT(ksyms_snapshot == NULL);
}

/*
 * Get the value associated with a symbol.
 * "mod" is the module name, or null if any module.
 * "sym" is the symbol name.
 * "val" is a pointer to the corresponding value, if call succeeded.
 * Returns 0 if success or ENOENT if no such entry.
 *
 * If symp is nonnull, caller must hold ksyms_lock or module_lock, have
 * ksyms_opencnt nonzero, be in a pserialize read section, be in ddb
 * with all other CPUs quiescent.
 */
int
ksyms_getval_unlocked(const char *mod, const char *sym, Elf_Sym **symp,
    unsigned long *val, int type)
{
        struct ksyms_symtab *st;
        Elf_Sym *es;
        int s, error = ENOENT;

#ifdef KSYMS_DEBUG
        if (ksyms_debug & FOLLOW_CALLS)
                printf("%s: mod %s sym %s valp %p\n", __func__, mod, sym, val);
#endif

        s = pserialize_read_enter();
        PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab,
            sd_pslist) {
                if (mod != NULL && strcmp(st->sd_name, mod))
                        continue;
                if ((es = findsym(sym, st, type)) != NULL) {
                        *val = es->st_value;
                        if (symp)
                                *symp = es;
                        error = 0;
                        break;
                }
        }
        pserialize_read_exit(s);
        return error;
}

int
ksyms_getval(const char *mod, const char *sym, unsigned long *val, int type)
{

        if (!ksyms_loaded)
                return ENOENT;

        /* No locking needed -- we read the table pserialized.  */
        return ksyms_getval_unlocked(mod, sym, NULL, val, type);
}

/*
 * ksyms_get_mod(mod)
 *
 * Return the symtab for the given module name.  Caller must ensure
 * that the module cannot be unloaded until after this returns.
 */
struct ksyms_symtab *
ksyms_get_mod(const char *mod)
{
        struct ksyms_symtab *st;
        int s;

        s = pserialize_read_enter();
        PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab,
            sd_pslist) {
                if (mod != NULL && strcmp(st->sd_name, mod))
                        continue;
                break;
        }
        pserialize_read_exit(s);

        return st;
}


/*
 * ksyms_mod_foreach()
 *
 * Iterate over the symbol table of the specified module, calling the callback
 * handler for each symbol. Stop iterating if the handler return is non-zero.
 *
 */

int
ksyms_mod_foreach(const char *mod, ksyms_callback_t callback, void *opaque)
{
        struct ksyms_symtab *st;
        Elf_Sym *sym, *maxsym;
        char *str;
        int symindx;

        if (!ksyms_loaded)
                return ENOENT;

        mutex_enter(&ksyms_lock);

        /* find the module */
        TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
                if (mod != NULL && strcmp(st->sd_name, mod))
                        continue;

                sym = st->sd_symstart;
                str = st->sd_strstart - st->sd_usroffset;

                /* now iterate through the symbols */
                maxsym = sym + st->sd_symsize / sizeof(Elf_Sym);
                for (symindx = 0; sym < maxsym; sym++, symindx++) {
                        if (callback(str + sym->st_name, symindx,
                            (void *)sym->st_value,
                            sym->st_size,
                            sym->st_info,
                            opaque) != 0) {
                                break;
                        }
                }
        }
        mutex_exit(&ksyms_lock);

        return 0;
}

/*
 * Get "mod" and "symbol" associated with an address.
 * Returns 0 if success or ENOENT if no such entry.
 *
 * Caller must hold ksyms_lock or module_lock, have ksyms_opencnt
 * nonzero, be in a pserialize read section, or be in ddb with all
 * other CPUs quiescent.
 */
int
ksyms_getname(const char **mod, const char **sym, vaddr_t v, int f)
{
        struct ksyms_symtab *st;
        Elf_Sym *les, *es = NULL;
        vaddr_t laddr = 0;
        const char *lmod = NULL;
        char *stable = NULL;
        int type, i, sz;

        if (!ksyms_loaded)
                return ENOENT;

        PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz, struct ksyms_symtab,
            sd_pslist) {
                if (v < st->sd_minsym || v > st->sd_maxsym)
                        continue;
                sz = st->sd_symsize/sizeof(Elf_Sym);
                for (i = 0; i < sz; i++) {
                        les = st->sd_symstart + i;
                        type = ELF_ST_TYPE(les->st_info);

                        if ((f & KSYMS_PROC) && (type != STT_FUNC))
                                continue;

                        if (type == STT_NOTYPE)
                                continue;

                        if (((f & KSYMS_ANY) == 0) &&
                            (type != STT_FUNC) && (type != STT_OBJECT))
                                continue;

                        if ((les->st_value <= v) && (les->st_value > laddr)) {
                                laddr = les->st_value;
                                es = les;
                                lmod = st->sd_name;
                                stable = st->sd_strstart - st->sd_usroffset;
                        }
                }
        }
        if (es == NULL)
                return ENOENT;
        if ((f & KSYMS_EXACT) && (v != es->st_value))
                return ENOENT;
        if (mod)
                *mod = lmod;
        if (sym)
                *sym = stable + es->st_name;
        return 0;
}

/*
 * Add a symbol table from a loadable module.
 */
void
ksyms_modload(const char *name, void *symstart, vsize_t symsize,
    char *strstart, vsize_t strsize)
{
        struct ksyms_symtab *st;
        struct ksyms_snapshot *ks;
        void *nmap;

        st = kmem_zalloc(sizeof(*st), KM_SLEEP);
        nmap = kmem_zalloc(symsize / sizeof(Elf_Sym) * sizeof (uint32_t),
                           KM_SLEEP);
        mutex_enter(&ksyms_lock);
        addsymtab(name, symstart, symsize, strstart, strsize, st, symstart,
            NULL, 0, nmap);
        ks = ksyms_snapshot;
        ksyms_snapshot = NULL;
        mutex_exit(&ksyms_lock);

        if (ks)
                ksyms_snapshot_release(ks);
}

/*
 * Remove a symbol table from a loadable module.
 */
void
ksyms_modunload(const char *name)
{
        struct ksyms_symtab *st;
        struct ksyms_snapshot *ks;
        int s;

        mutex_enter(&ksyms_lock);
        TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
                if (strcmp(name, st->sd_name) != 0)
                        continue;
                break;
        }
        KASSERT(st != NULL);

        /* Wait for any snapshot in progress to complete.  */
        while (ksyms_snapshotting)
                cv_wait(&ksyms_cv, &ksyms_lock);

        /*
         * Remove the symtab.  Do this at splhigh to ensure ddb never
         * witnesses an inconsistent state of the queue, unless memory
         * is so corrupt that we crash in TAILQ_REMOVE or
         * PSLIST_WRITER_REMOVE.
         */
        s = splhigh();
        TAILQ_REMOVE(&ksyms_symtabs, st, sd_queue);
        PSLIST_WRITER_REMOVE(st, sd_pslist);
        splx(s);

        /*
         * And wait a grace period, in case there are any pserialized
         * readers in flight.
         */
        pserialize_perform(ksyms_psz);
        PSLIST_ENTRY_DESTROY(st, sd_pslist);

        /* Recompute the ksyms sizes now that we've removed st.  */
        ksyms_sizes_calc();

        /* Invalidate the global ksyms snapshot.  */
        ks = ksyms_snapshot;
        ksyms_snapshot = NULL;
        mutex_exit(&ksyms_lock);

        /*
         * No more references are possible.  Free the name map and the
         * symtab itself, which we had allocated in ksyms_modload.
         */
        kmem_free(st->sd_nmap, st->sd_nmapsize * sizeof(uint32_t));
        kmem_free(st, sizeof(*st));

        /* Release the formerly global ksyms snapshot, if any.  */
        if (ks)
                ksyms_snapshot_release(ks);
}

#ifdef DDB
/*
 * Keep sifting stuff here, to avoid export of ksyms internals.
 *
 * Systems is expected to be quiescent, so no locking done.
 */
int
ksyms_sift(char *mod, char *sym, int mode)
{
        struct ksyms_symtab *st;
        char *sb;
        int i, sz;

        if (!ksyms_loaded)
                return ENOENT;

        TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
                if (mod && strcmp(mod, st->sd_name))
                        continue;
                sb = st->sd_strstart - st->sd_usroffset;

                sz = st->sd_symsize/sizeof(Elf_Sym);
                for (i = 0; i < sz; i++) {
                        Elf_Sym *les = st->sd_symstart + i;
                        char c;

                        if (strstr(sb + les->st_name, sym) == NULL)
                                continue;

                        if (mode == 'F') {
                                switch (ELF_ST_TYPE(les->st_info)) {
                                case STT_OBJECT:
                                        c = '+';
                                        break;
                                case STT_FUNC:
                                        c = '*';
                                        break;
                                case STT_SECTION:
                                        c = '&';
                                        break;
                                case STT_FILE:
                                        c = '/';
                                        break;
                                default:
                                        c = ' ';
                                        break;
                                }
                                db_printf("%s%c ", sb + les->st_name, c);
                        } else
                                db_printf("%s ", sb + les->st_name);
                }
        }
        return ENOENT;
}
#endif /* DDB */

/*
 * In case we exposing the symbol table to the userland using the pseudo-
 * device /dev/ksyms, it is easier to provide all the tables as one.
 * However, it means we have to change all the st_name fields for the
 * symbols so they match the ELF image that the userland will read
 * through the device.
 *
 * The actual (correct) value of st_name is preserved through a global
 * offset stored in the symbol table structure.
 *
 * Call with ksyms_lock held.
 */
static void
ksyms_sizes_calc(void)
{
        struct ksyms_symtab *st;
        int i, delta;

        KASSERT(cold || mutex_owned(&ksyms_lock));

        ksyms_symsz = ksyms_strsz = 0;
        TAILQ_FOREACH(st, &ksyms_symtabs, sd_queue) {
                delta = ksyms_strsz - st->sd_usroffset;
                if (delta != 0) {
                        for (i = 0; i < st->sd_symsize/sizeof(Elf_Sym); i++)
                                st->sd_symstart[i].st_name += delta;
                        st->sd_usroffset = ksyms_strsz;
                }
                ksyms_symsz += st->sd_symsize;
                ksyms_strsz += st->sd_strsize;
        }
}

static void
ksyms_fill_note(void)
{
        int32_t *note = ksyms_hdr.kh_note;
        note[0] = ELF_NOTE_NETBSD_NAMESZ;
        note[1] = ELF_NOTE_NETBSD_DESCSZ;
        note[2] = ELF_NOTE_TYPE_NETBSD_TAG;
        memcpy(&note[3],  "NetBSD\0", 8);
        note[5] = __NetBSD_Version__;
}

static void
ksyms_hdr_init(const void *hdraddr)
{
        /* Copy the loaded elf exec header */
        memcpy(&ksyms_hdr.kh_ehdr, hdraddr, sizeof(Elf_Ehdr));

        /* Set correct program/section header sizes, offsets and numbers */
        ksyms_hdr.kh_ehdr.e_phoff = offsetof(struct ksyms_hdr, kh_phdr[0]);
        ksyms_hdr.kh_ehdr.e_phentsize = sizeof(Elf_Phdr);
        ksyms_hdr.kh_ehdr.e_phnum = NPRGHDR;
        ksyms_hdr.kh_ehdr.e_shoff = offsetof(struct ksyms_hdr, kh_shdr[0]);
        ksyms_hdr.kh_ehdr.e_shentsize = sizeof(Elf_Shdr);
        ksyms_hdr.kh_ehdr.e_shnum = NSECHDR;
        ksyms_hdr.kh_ehdr.e_shstrndx = SHSTRTAB;

        /* Text/data - fake */
        ksyms_hdr.kh_phdr[0].p_type = PT_LOAD;
        ksyms_hdr.kh_phdr[0].p_memsz = (unsigned long)-1L;
        ksyms_hdr.kh_phdr[0].p_flags = PF_R | PF_X | PF_W;

#define SHTCOPY(name)  strlcpy(&ksyms_hdr.kh_strtab[offs], (name), \
    sizeof(ksyms_hdr.kh_strtab) - offs), offs += sizeof(name)

        uint32_t offs = 1;
        /* First section header ".note.netbsd.ident" */
        ksyms_hdr.kh_shdr[SHNOTE].sh_name = offs;
        ksyms_hdr.kh_shdr[SHNOTE].sh_type = SHT_NOTE;
        ksyms_hdr.kh_shdr[SHNOTE].sh_offset = 
            offsetof(struct ksyms_hdr, kh_note[0]);
        ksyms_hdr.kh_shdr[SHNOTE].sh_size = sizeof(ksyms_hdr.kh_note);
        ksyms_hdr.kh_shdr[SHNOTE].sh_addralign = sizeof(int);
        SHTCOPY(".note.netbsd.ident");
        ksyms_fill_note();

        /* Second section header; ".symtab" */
        ksyms_hdr.kh_shdr[SYMTAB].sh_name = offs;
        ksyms_hdr.kh_shdr[SYMTAB].sh_type = SHT_SYMTAB;
        ksyms_hdr.kh_shdr[SYMTAB].sh_offset = sizeof(struct ksyms_hdr);
/*        ksyms_hdr.kh_shdr[SYMTAB].sh_size = filled in at open */
        ksyms_hdr.kh_shdr[SYMTAB].sh_link = STRTAB; /* Corresponding strtab */
        ksyms_hdr.kh_shdr[SYMTAB].sh_addralign = sizeof(long);
        ksyms_hdr.kh_shdr[SYMTAB].sh_entsize = sizeof(Elf_Sym);
        SHTCOPY(".symtab");

        /* Third section header; ".strtab" */
        ksyms_hdr.kh_shdr[STRTAB].sh_name = offs;
        ksyms_hdr.kh_shdr[STRTAB].sh_type = SHT_STRTAB;
/*        ksyms_hdr.kh_shdr[STRTAB].sh_offset = filled in at open */
/*        ksyms_hdr.kh_shdr[STRTAB].sh_size = filled in at open */
        ksyms_hdr.kh_shdr[STRTAB].sh_addralign = sizeof(char);
        SHTCOPY(".strtab");

        /* Fourth section, ".shstrtab" */
        ksyms_hdr.kh_shdr[SHSTRTAB].sh_name = offs;
        ksyms_hdr.kh_shdr[SHSTRTAB].sh_type = SHT_STRTAB;
        ksyms_hdr.kh_shdr[SHSTRTAB].sh_offset =
            offsetof(struct ksyms_hdr, kh_strtab);
        ksyms_hdr.kh_shdr[SHSTRTAB].sh_size = SHSTRSIZ;
        ksyms_hdr.kh_shdr[SHSTRTAB].sh_addralign = sizeof(char);
        SHTCOPY(".shstrtab");

        /* Fifth section, ".bss". All symbols reside here. */
        ksyms_hdr.kh_shdr[SHBSS].sh_name = offs;
        ksyms_hdr.kh_shdr[SHBSS].sh_type = SHT_NOBITS;
        ksyms_hdr.kh_shdr[SHBSS].sh_offset = 0;
        ksyms_hdr.kh_shdr[SHBSS].sh_size = (unsigned long)-1L;
        ksyms_hdr.kh_shdr[SHBSS].sh_addralign = PAGE_SIZE;
        ksyms_hdr.kh_shdr[SHBSS].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
        SHTCOPY(".bss");

        /* Sixth section header; ".SUNW_ctf" */
        ksyms_hdr.kh_shdr[SHCTF].sh_name = offs;
        ksyms_hdr.kh_shdr[SHCTF].sh_type = SHT_PROGBITS;
/*        ksyms_hdr.kh_shdr[SHCTF].sh_offset = filled in at open */
/*        ksyms_hdr.kh_shdr[SHCTF].sh_size = filled in at open */
        ksyms_hdr.kh_shdr[SHCTF].sh_link = SYMTAB; /* Corresponding symtab */
        ksyms_hdr.kh_shdr[SHCTF].sh_addralign = sizeof(char);
        SHTCOPY(".SUNW_ctf");
}

static struct ksyms_snapshot *
ksyms_snapshot_alloc(int maxlen, size_t size, dev_t dev, uint64_t gen)
{
        struct ksyms_snapshot *ks;

        ks = kmem_zalloc(sizeof(*ks), KM_SLEEP);
        ks->ks_refcnt = 1;
        ks->ks_gen = gen;
        ks->ks_uobj = uao_create(size, 0);
        ks->ks_size = size;
        ks->ks_dev = dev;
        ks->ks_maxlen = maxlen;

        return ks;
}

static void
ksyms_snapshot_release(struct ksyms_snapshot *ks)
{
        uint64_t refcnt;

        mutex_enter(&ksyms_lock);
        refcnt = --ks->ks_refcnt;
        mutex_exit(&ksyms_lock);

        if (refcnt)
                return;

        uao_detach(ks->ks_uobj);
        kmem_free(ks, sizeof(*ks));
}

static int
ubc_copyfrombuf(struct uvm_object *uobj, struct uio *uio, const void *buf,
    size_t n)
{
        struct iovec iov = { .iov_base = __UNCONST(buf), .iov_len = n };

        uio->uio_iov = &iov;
        uio->uio_iovcnt = 1;
        uio->uio_resid = n;

        return ubc_uiomove(uobj, uio, n, UVM_ADV_SEQUENTIAL, UBC_WRITE);
}

static int
ksyms_take_snapshot(struct ksyms_snapshot *ks, struct ksyms_symtab *last)
{
        struct uvm_object *uobj = ks->ks_uobj;
        struct uio uio;
        struct ksyms_symtab *st;
        int error;

        /* Caller must have initiated snapshotting.  */
        KASSERT(ksyms_snapshotting == curlwp);

        /* Start a uio transfer to reuse incrementally.  */
        uio.uio_offset = 0;
        uio.uio_rw = UIO_WRITE; /* write from buffer to uobj */
        UIO_SETUP_SYSSPACE(&uio);

        /*
         * First: Copy out the ELF header.
         */
        error = ubc_copyfrombuf(uobj, &uio, &ksyms_hdr, sizeof(ksyms_hdr));
        if (error)
                return error;

        /*
         * Copy out the symbol table.  The list of symtabs is
         * guaranteed to be nonempty because we always have an entry
         * for the main kernel.  We stop at last, not at the end of the
         * tailq or NULL, because entries beyond last are not included
         * in this snapshot (and may not be fully initialized memory as
         * we witness it).
         */
        KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr));
        for (st = TAILQ_FIRST(&ksyms_symtabs);
             ;
             st = TAILQ_NEXT(st, sd_queue)) {
                error = ubc_copyfrombuf(uobj, &uio, st->sd_symstart,
                    st->sd_symsize);
                if (error)
                        return error;
                if (st == last)
                        break;
        }

        /*
         * Copy out the string table
         */
        KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) +
            ksyms_hdr.kh_shdr[SYMTAB].sh_size);
        for (st = TAILQ_FIRST(&ksyms_symtabs);
             ;
             st = TAILQ_NEXT(st, sd_queue)) {
                error = ubc_copyfrombuf(uobj, &uio, st->sd_strstart,
                    st->sd_strsize);
                if (error)
                        return error;
                if (st == last)
                        break;
        }

        /*
         * Copy out the CTF table.
         */
        KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) +
            ksyms_hdr.kh_shdr[SYMTAB].sh_size +
            ksyms_hdr.kh_shdr[STRTAB].sh_size);
        st = TAILQ_FIRST(&ksyms_symtabs);
        if (st->sd_ctfstart != NULL) {
                error = ubc_copyfrombuf(uobj, &uio, st->sd_ctfstart,
                    st->sd_ctfsize);
                if (error)
                        return error;
        }

        KASSERT(uio.uio_offset == sizeof(struct ksyms_hdr) +
            ksyms_hdr.kh_shdr[SYMTAB].sh_size +
            ksyms_hdr.kh_shdr[STRTAB].sh_size +
            ksyms_hdr.kh_shdr[SHCTF].sh_size);
        KASSERT(uio.uio_offset == ks->ks_size);

        return 0;
}

static const struct fileops ksyms_fileops;

static int
ksymsopen(dev_t dev, int flags, int devtype, struct lwp *l)
{
        struct file *fp = NULL;
        int fd = -1;
        struct ksyms_snapshot *ks = NULL;
        size_t size;
        struct ksyms_symtab *last;
        int maxlen;
        uint64_t gen;
        int error;

        if (minor(dev) != 0 || !ksyms_loaded)
                return ENXIO;

        /* Allocate a private file.  */
        error = fd_allocfile(&fp, &fd);
        if (error)
                return error;

        mutex_enter(&ksyms_lock);

        /*
         * Wait until we have a snapshot, or until there is no snapshot
         * being taken right now so we can take one.
         */
        while ((ks = ksyms_snapshot) == NULL && ksyms_snapshotting) {
                error = cv_wait_sig(&ksyms_cv, &ksyms_lock);
                if (error)
                        goto out;
        }

        /*
         * If there's a usable snapshot, increment its reference count
         * (can't overflow, 64-bit) and just reuse it.
         */
        if (ks) {
                ks->ks_refcnt++;
                goto out;
        }

        /* Find the current length of the symtab object. */
        size = sizeof(struct ksyms_hdr);
        size += ksyms_strsz;
        size += ksyms_symsz;
        size += ksyms_ctfsz;

        /* Start a new snapshot.  */
        ksyms_hdr.kh_shdr[SYMTAB].sh_size = ksyms_symsz;
        ksyms_hdr.kh_shdr[SYMTAB].sh_info = ksyms_symsz / sizeof(Elf_Sym);
        ksyms_hdr.kh_shdr[STRTAB].sh_offset = ksyms_symsz +
            ksyms_hdr.kh_shdr[SYMTAB].sh_offset;
        ksyms_hdr.kh_shdr[STRTAB].sh_size = ksyms_strsz;
        ksyms_hdr.kh_shdr[SHCTF].sh_offset = ksyms_strsz +
            ksyms_hdr.kh_shdr[STRTAB].sh_offset;
        ksyms_hdr.kh_shdr[SHCTF].sh_size = ksyms_ctfsz;
        last = TAILQ_LAST(&ksyms_symtabs, ksyms_symtab_queue);
        maxlen = ksyms_maxlen;
        gen = ksyms_snapshot_gen++;

        /*
         * Prevent ksyms entries from being removed while we take the
         * snapshot.
         */
        KASSERT(ksyms_snapshotting == NULL);
        ksyms_snapshotting = curlwp;
        mutex_exit(&ksyms_lock);

        /* Create a snapshot and write the symtab to it.  */
        ks = ksyms_snapshot_alloc(maxlen, size, dev, gen);
        error = ksyms_take_snapshot(ks, last);

        /*
         * Snapshot creation is done.  Wake up anyone waiting to remove
         * entries (module unload).
         */
        mutex_enter(&ksyms_lock);
        KASSERTMSG(ksyms_snapshotting == curlwp, "lwp %p stole snapshot",
            ksyms_snapshotting);
        ksyms_snapshotting = NULL;
        cv_broadcast(&ksyms_cv);

        /* If we failed, give up.  */
        if (error)
                goto out;

        /* Cache the snapshot for the next reader.  */
        KASSERT(ksyms_snapshot == NULL);
        ksyms_snapshot = ks;
        ks->ks_refcnt++;
        KASSERT(ks->ks_refcnt == 2);

out:        mutex_exit(&ksyms_lock);
        if (error) {
                if (fp)
                        fd_abort(curproc, fp, fd);
                if (ks)
                        ksyms_snapshot_release(ks);
        } else {
                KASSERT(fp);
                KASSERT(ks);
                error = fd_clone(fp, fd, flags, &ksyms_fileops, ks);
                KASSERTMSG(error == EMOVEFD, "error=%d", error);
        }
        return error;
}

static int
ksymsclose(struct file *fp)
{
        struct ksyms_snapshot *ks = fp->f_data;

        ksyms_snapshot_release(ks);

        return 0;
}

static int
ksymsread(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
    int flags)
{
        const struct ksyms_snapshot *ks = fp->f_data;
        size_t count;
        int error;

        /*
         * Since we don't have a per-object lock, we might as well use
         * the struct file lock to serialize access to fp->f_offset --
         * but if the caller isn't relying on or updating fp->f_offset,
         * there's no need to do even that.  We could use ksyms_lock,
         * but why bother with a global lock if not needed?  Either
         * way, the lock we use here must agree with what ksymsseek
         * takes (nothing else in ksyms uses fp->f_offset).
         */
        if (offp == &fp->f_offset)
                mutex_enter(&fp->f_lock);

        /* Refuse negative offsets.  */
        if (*offp < 0) {
                error = EINVAL;
                goto out;
        }

        /* Return nothing at or past end of file.  */
        if (*offp >= ks->ks_size) {
                error = 0;
                goto out;
        }

        /*
         * 1. Set up the uio to transfer from offset *offp.
         * 2. Transfer as many bytes as we can (at most uio->uio_resid
         *    or what's left in the ksyms).
         * 3. If requested, update *offp to reflect the number of bytes
         *    transferred.
         */
        uio->uio_offset = *offp;
        count = uio->uio_resid;
        error = ubc_uiomove(ks->ks_uobj, uio, MIN(count, ks->ks_size - *offp),
            UVM_ADV_SEQUENTIAL, UBC_READ|UBC_PARTIALOK);
        if (flags & FOF_UPDATE_OFFSET)
                *offp += count - uio->uio_resid;

out:        if (offp == &fp->f_offset)
                mutex_exit(&fp->f_lock);
        return error;
}

static int
ksymsstat(struct file *fp, struct stat *st)
{
        const struct ksyms_snapshot *ks = fp->f_data;

        memset(st, 0, sizeof(*st));

        st->st_dev = NODEV;
        st->st_ino = 0;
        st->st_mode = S_IFCHR;
        st->st_nlink = 1;
        st->st_uid = kauth_cred_geteuid(fp->f_cred);
        st->st_gid = kauth_cred_getegid(fp->f_cred);
        st->st_rdev = ks->ks_dev;
        st->st_size = ks->ks_size;
        /* zero time */
        st->st_blksize = MAXPHYS; /* XXX arbitrary */
        st->st_blocks = 0;
        st->st_gen = ks->ks_gen;

        return 0;
}

static int
ksymsmmap(struct file *fp, off_t *offp, size_t nbytes, int prot, int *flagsp,
    int *advicep, struct uvm_object **uobjp, int *maxprotp)
{
        const struct ksyms_snapshot *ks = fp->f_data;

        /* uvm_mmap guarantees page-aligned offset and size.  */
        KASSERT(*offp == round_page(*offp));
        KASSERT(nbytes == round_page(nbytes));
        KASSERT(nbytes > 0);

        /* Refuse negative offsets.  */
        if (*offp < 0)
                return EINVAL;

        /* Refuse mappings that pass the end of file.  */
        if (nbytes > round_page(ks->ks_size) ||
            *offp > round_page(ks->ks_size) - nbytes)
                return EINVAL;        /* XXX ??? */

        /* Success!  */
        uao_reference(ks->ks_uobj);
        *advicep = UVM_ADV_SEQUENTIAL;
        *uobjp = ks->ks_uobj;
        *maxprotp = prot & VM_PROT_READ;
        return 0;
}

static int
ksymsseek(struct file *fp, off_t delta, int whence, off_t *newoffp, int flags)
{
        struct ksyms_snapshot *ks = fp->f_data;
        off_t base, newoff;
        int error;

        mutex_enter(&fp->f_lock);

        switch (whence) {
        case SEEK_CUR:
                base = fp->f_offset;
                break;
        case SEEK_END:
                base = ks->ks_size;
                break;
        case SEEK_SET:
                base = 0;
                break;
        default:
                error = EINVAL;
                goto out;
        }

        /* Compute the new offset and validate it.  */
        newoff = base + delta;        /* XXX arithmetic overflow */
        if (newoff < 0) {
                error = EINVAL;
                goto out;
        }

        /* Success!  */
        if (newoffp)
                *newoffp = newoff;
        if (flags & FOF_UPDATE_OFFSET)
                fp->f_offset = newoff;
        error = 0;

out:        mutex_exit(&fp->f_lock);
        return error;
}

__CTASSERT(offsetof(struct ksyms_ogsymbol, kg_name) == offsetof(struct ksyms_gsymbol, kg_name));
__CTASSERT(offsetof(struct ksyms_gvalue, kv_name) == offsetof(struct ksyms_gsymbol, kg_name));

static int
ksymsioctl(struct file *fp, u_long cmd, void *data)
{
        struct ksyms_snapshot *ks = fp->f_data;
        struct ksyms_ogsymbol *okg = (struct ksyms_ogsymbol *)data;
        struct ksyms_gsymbol *kg = (struct ksyms_gsymbol *)data;
        struct ksyms_gvalue *kv = (struct ksyms_gvalue *)data;
        struct ksyms_symtab *st;
        Elf_Sym *sym = NULL, copy;
        unsigned long val;
        int error = 0;
        char *str = NULL;
        int len, s;

        /* Read cached ksyms_maxlen.  */
        len = ks->ks_maxlen;

        if (cmd == OKIOCGVALUE || cmd == OKIOCGSYMBOL ||
            cmd == KIOCGVALUE || cmd == KIOCGSYMBOL) {
                str = kmem_alloc(len, KM_SLEEP);
                if ((error = copyinstr(kg->kg_name, str, len, NULL)) != 0) {
                        kmem_free(str, len);
                        return error;
                }
        }

        switch (cmd) {
        case OKIOCGVALUE:
                /*
                 * Use the in-kernel symbol lookup code for fast
                 * retreival of a value.
                 */
                error = ksyms_getval(NULL, str, &val, KSYMS_EXTERN);
                if (error == 0)
                        error = copyout(&val, okg->kg_value, sizeof(long));
                kmem_free(str, len);
                break;

        case OKIOCGSYMBOL:
                /*
                 * Use the in-kernel symbol lookup code for fast
                 * retreival of a symbol.
                 */
                s = pserialize_read_enter();
                PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz,
                    struct ksyms_symtab, sd_pslist) {
                        if ((sym = findsym(str, st, KSYMS_ANY)) == NULL)
                                continue;
#ifdef notdef
                        /* Skip if bad binding */
                        if (ELF_ST_BIND(sym->st_info) != STB_GLOBAL) {
                                sym = NULL;
                                continue;
                        }
#endif
                        break;
                }
                if (sym != NULL) {
                        memcpy(&copy, sym, sizeof(copy));
                        pserialize_read_exit(s);
                        error = copyout(&copy, okg->kg_sym, sizeof(Elf_Sym));
                } else {
                        pserialize_read_exit(s);
                        error = ENOENT;
                }
                kmem_free(str, len);
                break;

        case KIOCGVALUE:
                /*
                 * Use the in-kernel symbol lookup code for fast
                 * retreival of a value.
                 */
                error = ksyms_getval(NULL, str, &val, KSYMS_EXTERN);
                if (error == 0)
                        kv->kv_value = val;
                kmem_free(str, len);
                break;

        case KIOCGSYMBOL:
                /*
                 * Use the in-kernel symbol lookup code for fast
                 * retreival of a symbol.
                 */
                s = pserialize_read_enter();
                PSLIST_READER_FOREACH(st, &ksyms_symtabs_psz,
                    struct ksyms_symtab, sd_pslist) {
                        if ((sym = findsym(str, st, KSYMS_ANY)) == NULL)
                                continue;
#ifdef notdef
                        /* Skip if bad binding */
                        if (ELF_ST_BIND(sym->st_info) != STB_GLOBAL) {
                                sym = NULL;
                                continue;
                        }
#endif
                        break;
                }
                if (sym != NULL) {
                        kg->kg_sym = *sym;
                } else {
                        error = ENOENT;
                }
                pserialize_read_exit(s);
                kmem_free(str, len);
                break;

        case KIOCGSIZE:
                /*
                 * Get total size of symbol table.
                 */
                *(int *)data = ks->ks_size;
                break;

        default:
                error = ENOTTY;
                break;
        }

        return error;
}

const struct cdevsw ksyms_cdevsw = {
        .d_open = ksymsopen,
        .d_close = noclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = noioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER | D_MPSAFE
};

static const struct fileops ksyms_fileops = {
        .fo_name = "ksyms",
        .fo_read = ksymsread,
        .fo_write = fbadop_write,
        .fo_ioctl = ksymsioctl,
        .fo_fcntl = fnullop_fcntl,
        .fo_poll = fnullop_poll,
        .fo_stat = ksymsstat,
        .fo_close = ksymsclose,
        .fo_kqfilter = fnullop_kqfilter,
        .fo_restart = fnullop_restart,
        .fo_mmap = ksymsmmap,
        .fo_seek = ksymsseek,
};




































































































































   12 








   52 



























































































































    4 
   23 
































































   12 





























































































































   18 













   18 

































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
/*        $NetBSD: in_var.h,v 1.102 2021/03/08 22:01:18 christos Exp $        */

/*-
 * Copyright (c) 1998 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Public Access Networks Corporation ("Panix").  It was developed under
 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1985, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)in_var.h        8.2 (Berkeley) 1/9/95
 */

#ifndef _NETINET_IN_VAR_H_
#define _NETINET_IN_VAR_H_

#include <sys/queue.h>

#define IN_IFF_TENTATIVE        0x01        /* tentative address */
#define IN_IFF_DUPLICATED        0x02        /* DAD detected duplicate */
#define IN_IFF_DETACHED                0x04        /* may be detached from the link */
#define IN_IFF_TRYTENTATIVE        0x08        /* intent to try DAD */

#define IN_IFFBITS \
    "\020\1TENTATIVE\2DUPLICATED\3DETACHED\4TRYTENTATIVE"

/* do not input/output */
#define IN_IFF_NOTREADY \
    (IN_IFF_TRYTENTATIVE | IN_IFF_TENTATIVE | IN_IFF_DUPLICATED)

/*
 * Interface address, Internet version.  One of these structures
 * is allocated for each interface with an Internet address.
 * The ifaddr structure contains the protocol-independent part
 * of the structure and is assumed to be first.
 */
struct in_ifaddr {
        struct        ifaddr ia_ifa;                /* protocol-independent info */
#define        ia_ifp                ia_ifa.ifa_ifp
#define ia_flags        ia_ifa.ifa_flags
                                        /* ia_{,sub}net{,mask} in host order */
        u_int32_t ia_net;                /* network number of interface */
        u_int32_t ia_netmask;                /* mask of net part */
        u_int32_t ia_subnet;                /* subnet number, including net */
        u_int32_t ia_subnetmask;        /* mask of subnet part */
        struct        in_addr ia_netbroadcast; /* to recognize net broadcasts */
        LIST_ENTRY(in_ifaddr) ia_hash;        /* entry in bucket of inet addresses */
        TAILQ_ENTRY(in_ifaddr) ia_list;        /* list of internet addresses */
        struct        sockaddr_in ia_addr;        /* reserve space for interface name */
        struct        sockaddr_in ia_dstaddr;        /* reserve space for broadcast addr */
#define        ia_broadaddr        ia_dstaddr
        struct        sockaddr_in ia_sockmask; /* reserve space for general netmask */
        LIST_HEAD(, in_multi) ia_multiaddrs; /* list of multicast addresses */
        struct        in_multi *ia_allhosts;        /* multicast address record for
                                           the allhosts multicast group */
        uint16_t ia_idsalt;                /* ip_id salt for this ia */
        int        ia4_flags;                /* address flags */
        void        (*ia_dad_start) (struct ifaddr *);        /* DAD start function */
        void        (*ia_dad_stop) (struct ifaddr *);        /* DAD stop function */
        time_t        ia_dad_defended;        /* last time of DAD defence */

#ifdef _KERNEL
        struct pslist_entry        ia_hash_pslist_entry;
        struct pslist_entry        ia_pslist_entry;
#endif
};

struct in_nbrinfo {
        char ifname[IFNAMSIZ];        /* if name, e.g. "en0" */
        struct in_addr addr;        /* IPv4 address of the neighbor */
        long        asked;                /* number of queries already sent for this addr */
        int        state;                /* reachability state */
        int        expire;                /* lifetime for NDP state transition */
};

#ifdef _KERNEL
static __inline void
ia4_acquire(struct in_ifaddr *ia, struct psref *psref)
{

        KASSERT(ia != NULL);
        ifa_acquire(&ia->ia_ifa, psref);
}

static __inline void
ia4_release(struct in_ifaddr *ia, struct psref *psref)
{

        if (ia == NULL)
                return;
        ifa_release(&ia->ia_ifa, psref);
}
#endif

struct        in_aliasreq {
        char        ifra_name[IFNAMSIZ];                /* if name, e.g. "en0" */
        struct        sockaddr_in ifra_addr;
        struct        sockaddr_in ifra_dstaddr;
#define        ifra_broadaddr        ifra_dstaddr
        struct        sockaddr_in ifra_mask;
};

/*
 * Given a pointer to an in_ifaddr (ifaddr),
 * return a pointer to the addr as a sockaddr_in.
 */
#define        IA_SIN(ia) (&(((struct in_ifaddr *)(ia))->ia_addr))

#ifdef _KERNEL

/* Note: 61, 127, 251, 509, 1021, 2039 are good. */
#ifndef IN_IFADDR_HASH_SIZE
#define IN_IFADDR_HASH_SIZE        509
#endif

/*
 * This is a bit unconventional, and wastes a little bit of space, but
 * because we want a very even hash function we don't use & in_ifaddrhash
 * here, but rather % the hash size, which should obviously be prime.
 */

#define        IN_IFADDR_HASH(x) in_ifaddrhashtbl[(u_long)(x) % IN_IFADDR_HASH_SIZE]

LIST_HEAD(in_ifaddrhashhead, in_ifaddr);        /* Type of the hash head */
TAILQ_HEAD(in_ifaddrhead, in_ifaddr);                /* Type of the list head */

extern        u_long in_ifaddrhash;                        /* size of hash table - 1 */
extern  struct in_ifaddrhashhead *in_ifaddrhashtbl;        /* Hash table head */
extern  struct in_ifaddrhead in_ifaddrhead;                /* List head (in ip_input) */

extern pserialize_t in_ifaddrhash_psz;
extern struct pslist_head *in_ifaddrhashtbl_pslist;
extern u_long in_ifaddrhash_pslist;
extern struct pslist_head in_ifaddrhead_pslist;

#define IN_IFADDR_HASH_PSLIST(x)                                        \
        in_ifaddrhashtbl_pslist[(u_long)(x) % IN_IFADDR_HASH_SIZE]

#define IN_ADDRHASH_READER_FOREACH(__ia, __addr)                        \
        PSLIST_READER_FOREACH((__ia), &IN_IFADDR_HASH_PSLIST(__addr),        \
            struct in_ifaddr, ia_hash_pslist_entry)
#define IN_ADDRHASH_WRITER_INSERT_HEAD(__ia)                                \
        PSLIST_WRITER_INSERT_HEAD(                                        \
            &IN_IFADDR_HASH_PSLIST((__ia)->ia_addr.sin_addr.s_addr),        \
            (__ia), ia_hash_pslist_entry)
#define IN_ADDRHASH_WRITER_REMOVE(__ia)                                        \
        PSLIST_WRITER_REMOVE((__ia), ia_hash_pslist_entry)
#define IN_ADDRHASH_ENTRY_INIT(__ia)                                        \
        PSLIST_ENTRY_INIT((__ia), ia_hash_pslist_entry);
#define IN_ADDRHASH_ENTRY_DESTROY(__ia)                                        \
        PSLIST_ENTRY_DESTROY((__ia), ia_hash_pslist_entry);
#define IN_ADDRHASH_READER_NEXT(__ia)                                        \
        PSLIST_READER_NEXT((__ia), struct in_ifaddr, ia_hash_pslist_entry)

#define IN_ADDRLIST_ENTRY_INIT(__ia)                                        \
        PSLIST_ENTRY_INIT((__ia), ia_pslist_entry)
#define IN_ADDRLIST_ENTRY_DESTROY(__ia)                                        \
        PSLIST_ENTRY_DESTROY((__ia), ia_pslist_entry);
#define IN_ADDRLIST_READER_EMPTY()                                        \
        (PSLIST_READER_FIRST(&in_ifaddrhead_pslist, struct in_ifaddr,        \
                             ia_pslist_entry) == NULL)
#define IN_ADDRLIST_READER_FIRST()                                        \
        PSLIST_READER_FIRST(&in_ifaddrhead_pslist, struct in_ifaddr,        \
                            ia_pslist_entry)
#define IN_ADDRLIST_READER_NEXT(__ia)                                        \
        PSLIST_READER_NEXT((__ia), struct in_ifaddr, ia_pslist_entry)
#define IN_ADDRLIST_READER_FOREACH(__ia)                                \
        PSLIST_READER_FOREACH((__ia), &in_ifaddrhead_pslist,                \
                              struct in_ifaddr, ia_pslist_entry)
#define IN_ADDRLIST_WRITER_INSERT_HEAD(__ia)                                \
        PSLIST_WRITER_INSERT_HEAD(&in_ifaddrhead_pslist, (__ia),        \
            ia_pslist_entry)
#define IN_ADDRLIST_WRITER_REMOVE(__ia)                                        \
        PSLIST_WRITER_REMOVE((__ia), ia_pslist_entry)
#define IN_ADDRLIST_WRITER_FOREACH(__ia)                                \
        PSLIST_WRITER_FOREACH((__ia), &in_ifaddrhead_pslist,                \
                              struct in_ifaddr, ia_pslist_entry)
#define IN_ADDRLIST_WRITER_FIRST()                                        \
        PSLIST_WRITER_FIRST(&in_ifaddrhead_pslist, struct in_ifaddr,        \
                            ia_pslist_entry)
#define IN_ADDRLIST_WRITER_NEXT(__ia)                                        \
        PSLIST_WRITER_NEXT((__ia), struct in_ifaddr, ia_pslist_entry)
#define IN_ADDRLIST_WRITER_INSERT_AFTER(__ia, __new)                        \
        PSLIST_WRITER_INSERT_AFTER((__ia), (__new), ia_pslist_entry)
#define IN_ADDRLIST_WRITER_EMPTY()                                        \
        (PSLIST_WRITER_FIRST(&in_ifaddrhead_pslist, struct in_ifaddr,        \
            ia_pslist_entry) == NULL)
#define IN_ADDRLIST_WRITER_INSERT_TAIL(__new)                                \
        do {                                                                \
                if (IN_ADDRLIST_WRITER_EMPTY()) {                        \
                        IN_ADDRLIST_WRITER_INSERT_HEAD((__new));        \
                } else {                                                \
                        struct in_ifaddr *__ia;                                \
                        IN_ADDRLIST_WRITER_FOREACH(__ia) {                \
                                if (IN_ADDRLIST_WRITER_NEXT(__ia) == NULL) { \
                                        IN_ADDRLIST_WRITER_INSERT_AFTER(__ia,\
                                            (__new));                        \
                                        break;                                \
                                }                                        \
                        }                                                \
                }                                                        \
        } while (0)

extern        const        int        inetctlerrmap[];

/*
 * Find whether an internet address (in_addr) belongs to one
 * of our interfaces (in_ifaddr).  NULL if the address isn't ours.
 */
static __inline struct in_ifaddr *
in_get_ia(struct in_addr addr)
{
        struct in_ifaddr *ia;

        IN_ADDRHASH_READER_FOREACH(ia, addr.s_addr) {
                if (in_hosteq(ia->ia_addr.sin_addr, addr))
                        break;
        }

        return ia;
}

static __inline struct in_ifaddr *
in_get_ia_psref(struct in_addr addr, struct psref *psref)
{
        struct in_ifaddr *ia;
        int s;

        s = pserialize_read_enter();
        ia = in_get_ia(addr);
        if (ia != NULL)
                ia4_acquire(ia, psref);
        pserialize_read_exit(s);

        return ia;
}

/*
 * Find whether an internet address (in_addr) belongs to a specified
 * interface.  NULL if the address isn't ours.
 */
static __inline struct in_ifaddr *
in_get_ia_on_iface(struct in_addr addr, struct ifnet *ifp)
{
        struct in_ifaddr *ia;

        IN_ADDRHASH_READER_FOREACH(ia, addr.s_addr) {
                if (in_hosteq(ia->ia_addr.sin_addr, addr) &&
                    ia->ia_ifp == ifp)
                        break;
        }

        return ia;
}

static __inline struct in_ifaddr *
in_get_ia_on_iface_psref(struct in_addr addr, struct ifnet *ifp, struct psref *psref)
{
        struct in_ifaddr *ia;
        int s;

        s = pserialize_read_enter();
        ia = in_get_ia_on_iface(addr, ifp);
        if (ia != NULL)
                ia4_acquire(ia, psref);
        pserialize_read_exit(s);

        return ia;
}

/*
 * Find an internet address structure (in_ifaddr) corresponding
 * to a given interface (ifnet structure).
 */
static __inline struct in_ifaddr *
in_get_ia_from_ifp(struct ifnet *ifp)
{
        struct ifaddr *ifa;

        IFADDR_READER_FOREACH(ifa, ifp) {
                if (ifa->ifa_addr->sa_family == AF_INET)
                        break;
        }

        return ifatoia(ifa);
}

static __inline struct in_ifaddr *
in_get_ia_from_ifp_psref(struct ifnet *ifp, struct psref *psref)
{
        struct in_ifaddr *ia;
        int s;

        s = pserialize_read_enter();
        ia = in_get_ia_from_ifp(ifp);
        if (ia != NULL)
                ia4_acquire(ia, psref);
        pserialize_read_exit(s);

        return ia;
}

#include <netinet/in_selsrc.h>
/*
 * IPv4 per-interface state.
 */
struct in_ifinfo {
        struct lltable                *ii_llt;        /* ARP state */
        struct in_ifsysctl        *ii_selsrc;
};

#endif /* _KERNEL */

/*
 * Internet multicast address structure.  There is one of these for each IP
 * multicast group to which this host belongs on a given network interface.
 * They are kept in a linked list, rooted in the interface's in_ifaddr
 * structure.
 */
struct router_info;

struct in_multi {
        LIST_ENTRY(in_multi) inm_list;        /* list of multicast addresses */
        struct        router_info *inm_rti;        /* router version info */
        struct        ifnet *inm_ifp;                /* back pointer to ifnet */
        struct        in_addr inm_addr;        /* IP multicast address */
        u_int        inm_refcount;                /* no. membership claims by sockets */
        u_int        inm_timer;                /* IGMP membership report timer */
        u_int        inm_state;                /* state of membership */
};

#ifdef _KERNEL

#include <net/pktqueue.h>
#include <sys/cprng.h>

extern pktqueue_t *ip_pktq;

extern int ip_dad_count;                /* Duplicate Address Detection probes */

static inline bool
ip_dad_enabled(void)
{
#if NARP > 0
        return ip_dad_count > 0;
#else
        return false;
#endif
}

#if defined(INET) && NARP > 0
extern int arp_debug;
#define ARPLOGADDR(a) IN_PRINT(_ipbuf, a)
#define ARPLOG(level, fmt, args...)                                         \
        do {                                                                \
                char _ipbuf[INET_ADDRSTRLEN];                                 \
                (void)_ipbuf;                                                \
                if (arp_debug)                                                 \
                        log(level, "%s: " fmt, __func__, ##args);        \
        } while (/*CONSTCOND*/0)
#else
#define ARPLOG(level, fmt, args...)
#endif

/*
 * Structure used by functions below to remember position when stepping
 * through all of the in_multi records.
 */
struct in_multistep {
        int i_n;
        struct in_multi *i_inm;
};

bool in_multi_group(struct in_addr, struct ifnet *, int);
struct in_multi *in_first_multi(struct in_multistep *);
struct in_multi *in_next_multi(struct in_multistep *);
struct in_multi *in_lookup_multi(struct in_addr, struct ifnet *);
struct in_multi *in_addmulti(struct in_addr *, struct ifnet *);
void in_delmulti(struct in_multi *);

void in_multi_lock(int);
void in_multi_unlock(void);
int in_multi_lock_held(void);

struct ifaddr;

int        in_ifinit(struct ifnet *, struct in_ifaddr *,
    const struct sockaddr_in *, const struct sockaddr_in *, int);
void        in_savemkludge(struct in_ifaddr *);
void        in_restoremkludge(struct in_ifaddr *, struct ifnet *);
void        in_purgemkludge(struct ifnet *);
void        in_setmaxmtu(void);
int        in_control(struct socket *, u_long, void *, struct ifnet *);
void        in_purgeaddr(struct ifaddr *);
void        in_purgeif(struct ifnet *);
void        in_addrhash_insert(struct in_ifaddr *);
void        in_addrhash_remove(struct in_ifaddr *);
int        ipflow_fastforward(struct mbuf *);

extern uint16_t                ip_id;
extern int                ip_do_randomid;

static __inline uint16_t
ip_randomid(void)
{

        uint16_t id = (uint16_t)cprng_fast32();
        return id ? id : 1;
}

/*
 * ip_newid_range: "allocate" num contiguous IP IDs.
 *
 * => Return the first ID.
 */
static __inline uint16_t
ip_newid_range(const struct in_ifaddr *ia, u_int num)
{
        uint16_t id;

        if (ip_do_randomid) {
                /* XXX ignore num */
                return ip_randomid();
        }

        /* Never allow an IP ID of 0 (detect wrap). */
        if ((uint16_t)(ip_id + num) < ip_id) {
                ip_id = 1;
        }
        id = htons(ip_id);
        ip_id += num;
        return id;
}

static __inline uint16_t
ip_newid(const struct in_ifaddr *ia)
{

        return ip_newid_range(ia, 1);
}

#ifdef SYSCTLFN_PROTO
int        sysctl_inpcblist(SYSCTLFN_PROTO);
#endif

#define LLTABLE(ifp)        \
        ((struct in_ifinfo *)(ifp)->if_afdata[AF_INET])->ii_llt

#endif        /* !_KERNEL */

/* INET6 stuff */
#include <netinet6/in6_var.h>

#endif /* !_NETINET_IN_VAR_H_ */


































































































































    2 






    2 
























































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
/*        $NetBSD: uxrcom.c,v 1.2 2020/07/09 13:43:04 simonb Exp $        */
/*        $OpenBSD: uxrcom.c,v 1.1 2019/03/27 22:08:51 kettenis Exp $        */

/*
 * Copyright (c) 1998, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology and Simon Burge.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2006 Jonathan Gray <jsg@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uxrcom.c,v 1.2 2020/07/09 13:43:04 simonb Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/tty.h>
#include <sys/device.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbcdc.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usbhist.h>

#include <dev/usb/usbdevs.h>
#include <dev/usb/ucomvar.h>
#include <dev/usb/umodemvar.h>


#define UXRCOMBUFSZ                64

/* XXX uxrcomreg.h */
#define XR_SET_REG                0
#define XR_GET_REGN                1

#define XR_FLOW_CONTROL                0x000c
#define  XR_FLOW_CONTROL_ON        1
#define  XR_FLOW_CONTROL_OFF        0
#define XR_TX_BREAK                0x0014
#define  XR_TX_BREAK_ON                1
#define  XR_TX_BREAK_OFF        0
#define XR_GPIO_SET                0x001d
#define XR_GPIO_CLEAR                0x001e
#define  XR_GPIO3                (1 << 3)
#define  XR_GPIO5                (1 << 5)

/* for XR_SET_REG/XR_GET_REGN specify which uart block to use */
#define        XR_UART_BLOCK(sc)        (((sc)->sc_ctl_iface_no / 2) << NBBY)

#ifdef UXRCOM_DEBUG
#define        DPRINTF(x)        if (uxrcomdebug) printf x
int uxrcomdebug = 0;
#else
#define        DPRINTF(x)
#endif


static void        uxrcom_set(void *, int, int, int);
static int        uxrcom_param(void *, int, struct termios *);
static void        uxrcom_break(void *, int, int);

static const struct        ucom_methods uxrcom_methods = {
        .ucom_get_status = umodem_get_status,
        .ucom_set = uxrcom_set,
        .ucom_param = uxrcom_param,
        .ucom_ioctl = NULL,        /* TODO */
        .ucom_open = umodem_open,
        .ucom_close = umodem_close,
};

static const struct usb_devno uxrcom_devs[] = {
        { USB_VENDOR_EXAR,        USB_PRODUCT_EXAR_XR21V1410 },
        { USB_VENDOR_EXAR,        USB_PRODUCT_EXAR_XR21V1412 },
        { USB_VENDOR_EXAR,        USB_PRODUCT_EXAR_XR21V1414 },
};
#define uxrcom_lookup(v, p) usb_lookup(uxrcom_devs, v, p)

static int uxrcom_match(device_t, cfdata_t, void *);
static void uxrcom_attach(device_t, device_t, void *);
static int uxrcom_detach(device_t, int);

CFATTACH_DECL_NEW(uxrcom, sizeof(struct umodem_softc), uxrcom_match,
    uxrcom_attach, uxrcom_detach, NULL);

static int
uxrcom_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;

        if (uiaa->uiaa_class != UICLASS_CDC ||
            uiaa->uiaa_subclass != UISUBCLASS_ABSTRACT_CONTROL_MODEL ||
            !(uiaa->uiaa_proto == UIPROTO_CDC_NOCLASS ||
              uiaa->uiaa_proto == UIPROTO_CDC_AT))
                return UMATCH_NONE;

        return uxrcom_lookup(uiaa->uiaa_vendor, uiaa->uiaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
uxrcom_attach(device_t parent, device_t self, void *aux)
{
        struct umodem_softc *sc = device_private(self);
        struct usbif_attach_arg *uiaa = aux;
        struct ucom_attach_args ucaa;

        memset(&ucaa, 0, sizeof(ucaa));

        ucaa.ucaa_portno = UCOM_UNK_PORTNO;
        ucaa.ucaa_methods = &uxrcom_methods;
        ucaa.ucaa_info = NULL;

        ucaa.ucaa_ibufsize = UXRCOMBUFSZ;
        ucaa.ucaa_obufsize = UXRCOMBUFSZ;
        ucaa.ucaa_ibufsizepad = UXRCOMBUFSZ;

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler");

        umodem_common_attach(self, sc, uiaa, &ucaa);
}

static int
uxrcom_detach(device_t self, int flags)
{
        struct umodem_softc *sc = device_private(self);

        pmf_device_deregister(self);

        return umodem_common_detach(sc, flags);
}

static void
uxrcom_set(void *addr, int portno, int reg, int onoff)
{
        struct umodem_softc *sc = addr;
        usb_device_request_t req;
        uint16_t index;
        uint8_t value;

        if (sc->sc_dying)
                return;

        index = onoff ? XR_GPIO_SET : XR_GPIO_CLEAR;

        switch (reg) {
        case UCOM_SET_DTR:
                value = XR_GPIO3;
                break;
        case UCOM_SET_RTS:
                value = XR_GPIO5;
                break;
        case UCOM_SET_BREAK:
                uxrcom_break(sc, portno, onoff);
                return;
        default:
                return;
        }
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = XR_SET_REG;
        USETW(req.wValue, value);
        USETW(req.wIndex, index | XR_UART_BLOCK(sc));
        USETW(req.wLength, 0);
        usbd_do_request(sc->sc_udev, &req, NULL);
}

static usbd_status
uxrcom_set_line_coding(struct umodem_softc *sc, usb_cdc_line_state_t *state)
{
        usb_device_request_t req;
        usbd_status err;

        DPRINTF(("%s: rate=%d fmt=%d parity=%d bits=%d\n", __func__,
                UGETDW(state->dwDTERate), state->bCharFormat,
                state->bParityType, state->bDataBits));

        if (memcmp(state, &sc->sc_line_state, UCDC_LINE_STATE_LENGTH) == 0) {
                DPRINTF(("%s: already set\n", __func__));
                return USBD_NORMAL_COMPLETION;
        }

        req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
        req.bRequest = UCDC_SET_LINE_CODING;
        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_ctl_iface_no);
        USETW(req.wLength, UCDC_LINE_STATE_LENGTH);

        err = usbd_do_request(sc->sc_udev, &req, state);
        if (err) {
                DPRINTF(("%s: failed, err=%u\n", __func__, err));
                return err;
        }

        sc->sc_line_state = *state;

        return USBD_NORMAL_COMPLETION;
}

static int
uxrcom_param(void *addr, int portno, struct termios *t)
{
        struct umodem_softc *sc = addr;
        usb_device_request_t req;
        usbd_status err;
        usb_cdc_line_state_t ls;
        uint8_t flowctrl;

        if (sc->sc_dying)
                return EIO;

        /* slowest supported baud rate is 1200 bps, max is 12 Mbps */
        if (t->c_ospeed < 1200 || t->c_ospeed > 12000000)
                return (EINVAL);

        USETDW(ls.dwDTERate, t->c_ospeed);
        if (ISSET(t->c_cflag, CSTOPB))
                ls.bCharFormat = UCDC_STOP_BIT_2;
        else
                ls.bCharFormat = UCDC_STOP_BIT_1;
        if (ISSET(t->c_cflag, PARENB)) {
                if (ISSET(t->c_cflag, PARODD))
                        ls.bParityType = UCDC_PARITY_ODD;
                else
                        ls.bParityType = UCDC_PARITY_EVEN;
        } else
                ls.bParityType = UCDC_PARITY_NONE;
        switch (ISSET(t->c_cflag, CSIZE)) {
        case CS5:
                ls.bDataBits = 5;
                break;
        case CS6:
                ls.bDataBits = 6;
                break;
        case CS7:
                ls.bDataBits = 7;
                break;
        case CS8:
                ls.bDataBits = 8;
                break;
        }

        err = uxrcom_set_line_coding(sc, &ls);
        if (err) {
                DPRINTF(("%s: err=%u\n", __func__, err));
                return EIO;
        }

        if (ISSET(t->c_cflag, CRTSCTS)) {
                /*  rts/cts flow ctl */
                flowctrl = XR_FLOW_CONTROL_ON;
        } else {
                /* disable flow ctl */
                flowctrl = XR_FLOW_CONTROL_OFF;
        }
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = XR_SET_REG;
        USETW(req.wValue, flowctrl);
        USETW(req.wIndex, XR_FLOW_CONTROL | XR_UART_BLOCK(sc));
        USETW(req.wLength, 0);
        usbd_do_request(sc->sc_udev, &req, NULL);

        return (0);
}

static void
uxrcom_break(void *addr, int portno, int onoff)
{
        struct umodem_softc *sc = addr;
        usb_device_request_t req;
        uint8_t brk = onoff ? UCDC_BREAK_ON : UCDC_BREAK_OFF;

        DPRINTF(("%s: port=%d onoff=%d\n", __func__, portno, onoff));

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = XR_SET_REG;
        USETW(req.wValue, brk);
        USETW(req.wIndex, XR_TX_BREAK | XR_UART_BLOCK(sc));
        USETW(req.wLength, 0);

        (void)usbd_do_request(sc->sc_udev, &req, 0);
}

































































































































































































































































































 2662 








 2659 
 2659 

 2656 





 2572 

 2558 

  490 

  375 

  372 




  191 



  189 


  188 



 2604 
 2607 


 2647 












  969 

  963 

   74 












 2121 
 2124 

 2127 





 2063 
 2061 
  303 

  240 
  238 

  119 


  119 




 2105 





 2117 













 2279 

 2277 





 2273 











  904 



  895 









 2026 
 2030 
 2026 





















































































  492 






























   76 




   76 

   76 
   76 






















   17 


   17 














































 2106 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
/*        $NetBSD: subr_kmem.c,v 1.87 2022/05/30 23:36:26 mrg Exp $        */

/*
 * Copyright (c) 2009-2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran and Maxime Villard.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c)2006 YAMAMOTO Takashi,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Allocator of kernel wired memory. This allocator has some debug features
 * enabled with "option DIAGNOSTIC" and "option DEBUG".
 */

/*
 * KMEM_SIZE: detect alloc/free size mismatch bugs.
 *        Append to each allocation a fixed-sized footer and record the exact
 *        user-requested allocation size in it.  When freeing, compare it with
 *        kmem_free's "size" argument.
 *
 * This option is enabled on DIAGNOSTIC.
 *
 *  |CHUNK|CHUNK|CHUNK|CHUNK|CHUNK|CHUNK|CHUNK|CHUNK|CHUNK| |
 *  +-----+-----+-----+-----+-----+-----+-----+-----+-----+-+
 *  |     |     |     |     |     |     |     |     |/////|U|
 *  |     |     |     |     |     |     |     |     |/HSZ/|U|
 *  |     |     |     |     |     |     |     |     |/////|U|
 *  +-----+-----+-----+-----+-----+-----+-----+-----+-----+-+
 *  | Buffer usable by the caller (requested size)  |Size |Unused
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_kmem.c,v 1.87 2022/05/30 23:36:26 mrg Exp $");

#ifdef _KERNEL_OPT
#include "opt_kmem.h"
#endif

#include <sys/param.h>
#include <sys/callback.h>
#include <sys/kmem.h>
#include <sys/pool.h>
#include <sys/debug.h>
#include <sys/lockdebug.h>
#include <sys/cpu.h>
#include <sys/asan.h>
#include <sys/msan.h>
#include <sys/sdt.h>

#include <uvm/uvm_extern.h>
#include <uvm/uvm_map.h>

#include <lib/libkern/libkern.h>

struct kmem_cache_info {
        size_t                kc_size;
        const char *        kc_name;
#ifdef KDTRACE_HOOKS
        const id_t        *kc_alloc_probe_id;
        const id_t        *kc_free_probe_id;
#endif
};

#define        KMEM_CACHE_SIZES(F)                                                      \
        F(8, kmem-00008, kmem__00008)                                              \
        F(16, kmem-00016, kmem__00016)                                              \
        F(24, kmem-00024, kmem__00024)                                              \
        F(32, kmem-00032, kmem__00032)                                              \
        F(40, kmem-00040, kmem__00040)                                              \
        F(48, kmem-00048, kmem__00048)                                              \
        F(56, kmem-00056, kmem__00056)                                              \
        F(64, kmem-00064, kmem__00064)                                              \
        F(80, kmem-00080, kmem__00080)                                              \
        F(96, kmem-00096, kmem__00096)                                              \
        F(112, kmem-00112, kmem__00112)                                              \
        F(128, kmem-00128, kmem__00128)                                              \
        F(160, kmem-00160, kmem__00160)                                              \
        F(192, kmem-00192, kmem__00192)                                              \
        F(224, kmem-00224, kmem__00224)                                              \
        F(256, kmem-00256, kmem__00256)                                              \
        F(320, kmem-00320, kmem__00320)                                              \
        F(384, kmem-00384, kmem__00384)                                              \
        F(448, kmem-00448, kmem__00448)                                              \
        F(512, kmem-00512, kmem__00512)                                              \
        F(768, kmem-00768, kmem__00768)                                              \
        F(1024, kmem-01024, kmem__01024)                                      \
        /* end of KMEM_CACHE_SIZES */

#define        KMEM_CACHE_BIG_SIZES(F)                                                      \
        F(2048, kmem-02048, kmem__02048)                                      \
        F(4096, kmem-04096, kmem__04096)                                      \
        F(8192, kmem-08192, kmem__08192)                                      \
        F(16384, kmem-16384, kmem__16384)                                      \
        /* end of KMEM_CACHE_BIG_SIZES */

/* sdt:kmem:alloc:kmem-* probes */
#define        F(SZ, NAME, PROBENAME)                                                      \
        SDT_PROBE_DEFINE4(sdt, kmem, alloc, PROBENAME,                              \
            "void *"/*ptr*/,                                                      \
            "size_t"/*requested_size*/,                                              \
            "size_t"/*allocated_size*/,                                              \
            "km_flag_t"/*kmflags*/);
KMEM_CACHE_SIZES(F);
KMEM_CACHE_BIG_SIZES(F);
#undef        F

/* sdt:kmem:free:kmem-* probes */
#define        F(SZ, NAME, PROBENAME)                                                      \
        SDT_PROBE_DEFINE3(sdt, kmem, free, PROBENAME,                              \
            "void *"/*ptr*/,                                                      \
            "size_t"/*requested_size*/,                                              \
            "size_t"/*allocated_size*/);
KMEM_CACHE_SIZES(F);
KMEM_CACHE_BIG_SIZES(F);
#undef        F

/* sdt:kmem:alloc:large, sdt:kmem:free:large probes */
SDT_PROBE_DEFINE4(sdt, kmem, alloc, large,
    "void *"/*ptr*/,
    "size_t"/*requested_size*/,
    "size_t"/*allocated_size*/,
    "km_flag_t"/*kmflags*/);
SDT_PROBE_DEFINE3(sdt, kmem, free, large,
    "void *"/*ptr*/,
    "size_t"/*requested_size*/,
    "size_t"/*allocated_size*/);

#ifdef KDTRACE_HOOKS
#define        F(SZ, NAME, PROBENAME)                                                      \
        { SZ, #NAME,                                                              \
          &sdt_sdt_kmem_alloc_##PROBENAME->id,                                      \
          &sdt_sdt_kmem_free_##PROBENAME->id },
#else
#define        F(SZ, NAME, PROBENAME)        { SZ, #NAME },
#endif

static const struct kmem_cache_info kmem_cache_sizes[] = {
        KMEM_CACHE_SIZES(F)
        { 0 }
};

static const struct kmem_cache_info kmem_cache_big_sizes[] = {
        KMEM_CACHE_BIG_SIZES(F)
        { 0 }
};

#undef        F

/*
 * KMEM_ALIGN is the smallest guaranteed alignment and also the
 * smallest allocateable quantum.
 * Every cache size >= CACHE_LINE_SIZE gets CACHE_LINE_SIZE alignment.
 */
#define        KMEM_ALIGN                8
#define        KMEM_SHIFT                3
#define        KMEM_MAXSIZE                1024
#define        KMEM_CACHE_COUNT        (KMEM_MAXSIZE >> KMEM_SHIFT)

static pool_cache_t kmem_cache[KMEM_CACHE_COUNT] __cacheline_aligned;
static size_t kmem_cache_maxidx __read_mostly;

#define        KMEM_BIG_ALIGN                2048
#define        KMEM_BIG_SHIFT                11
#define        KMEM_BIG_MAXSIZE        16384
#define        KMEM_CACHE_BIG_COUNT        (KMEM_BIG_MAXSIZE >> KMEM_BIG_SHIFT)

static pool_cache_t kmem_cache_big[KMEM_CACHE_BIG_COUNT] __cacheline_aligned;
static size_t kmem_cache_big_maxidx __read_mostly;

#if defined(DIAGNOSTIC) && defined(_HARDKERNEL)
#define        KMEM_SIZE
#endif

#if defined(DEBUG) && defined(_HARDKERNEL)
static void *kmem_freecheck;
#endif

#if defined(KMEM_SIZE)
#define        SIZE_SIZE        sizeof(size_t)
static void kmem_size_set(void *, size_t);
static void kmem_size_check(void *, size_t);
#else
#define        SIZE_SIZE        0
#define        kmem_size_set(p, sz)        /* nothing */
#define        kmem_size_check(p, sz)        /* nothing */
#endif

#ifndef KDTRACE_HOOKS

static const id_t **const kmem_cache_alloc_probe_id = NULL;
static const id_t **const kmem_cache_big_alloc_probe_id = NULL;
static const id_t **const kmem_cache_free_probe_id = NULL;
static const id_t **const kmem_cache_big_free_probe_id = NULL;

#define        KMEM_CACHE_PROBE(ARRAY, INDEX, PTR, REQSIZE, ALLOCSIZE, FLAGS)              \
        __nothing

#else

static const id_t *kmem_cache_alloc_probe_id[KMEM_CACHE_COUNT];
static const id_t *kmem_cache_big_alloc_probe_id[KMEM_CACHE_COUNT];
static const id_t *kmem_cache_free_probe_id[KMEM_CACHE_COUNT];
static const id_t *kmem_cache_big_free_probe_id[KMEM_CACHE_COUNT];

#define        KMEM_CACHE_PROBE(ARRAY, INDEX, PTR, REQSIZE, ALLOCSIZE, FLAGS) do     \
{                                                                              \
        id_t id;                                                              \
                                                                              \
        KDASSERT((INDEX) < __arraycount(ARRAY));                              \
        if (__predict_false((id = *(ARRAY)[INDEX]) != 0)) {                      \
                (*sdt_probe_func)(id,                                              \
                    (uintptr_t)(PTR),                                              \
                    (uintptr_t)(REQSIZE),                                      \
                    (uintptr_t)(ALLOCSIZE),                                      \
                    (uintptr_t)(FLAGS),                                              \
                    (uintptr_t)0);                                              \
        }                                                                      \
} while (0)

#endif        /* KDTRACE_HOOKS */

#define        KMEM_CACHE_ALLOC_PROBE(I, P, RS, AS, F)                                      \
        KMEM_CACHE_PROBE(kmem_cache_alloc_probe_id, I, P, RS, AS, F)
#define        KMEM_CACHE_BIG_ALLOC_PROBE(I, P, RS, AS, F)                              \
        KMEM_CACHE_PROBE(kmem_cache_big_alloc_probe_id, I, P, RS, AS, F)
#define        KMEM_CACHE_FREE_PROBE(I, P, RS, AS)                                      \
        KMEM_CACHE_PROBE(kmem_cache_free_probe_id, I, P, RS, AS, 0)
#define        KMEM_CACHE_BIG_FREE_PROBE(I, P, RS, AS)                                      \
        KMEM_CACHE_PROBE(kmem_cache_big_free_probe_id, I, P, RS, AS, 0)

CTASSERT(KM_SLEEP == PR_WAITOK);
CTASSERT(KM_NOSLEEP == PR_NOWAIT);

/*
 * kmem_intr_alloc: allocate wired memory.
 */
void *
kmem_intr_alloc(size_t requested_size, km_flag_t kmflags)
{
#ifdef KASAN
        const size_t origsize = requested_size;
#endif
        size_t allocsz, index;
        size_t size;
        pool_cache_t pc;
        uint8_t *p;

        KASSERT(requested_size > 0);

        KASSERT((kmflags & KM_SLEEP) || (kmflags & KM_NOSLEEP));
        KASSERT(!(kmflags & KM_SLEEP) || !(kmflags & KM_NOSLEEP));

        kasan_add_redzone(&requested_size);
        size = kmem_roundup_size(requested_size);
        allocsz = size + SIZE_SIZE;

        if ((index = ((allocsz - 1) >> KMEM_SHIFT))
            < kmem_cache_maxidx) {
                pc = kmem_cache[index];
                p = pool_cache_get(pc, kmflags);
                KMEM_CACHE_ALLOC_PROBE(index,
                    p, requested_size, allocsz, kmflags);
        } else if ((index = ((allocsz - 1) >> KMEM_BIG_SHIFT))
            < kmem_cache_big_maxidx) {
                pc = kmem_cache_big[index];
                p = pool_cache_get(pc, kmflags);
                KMEM_CACHE_BIG_ALLOC_PROBE(index,
                    p, requested_size, allocsz, kmflags);
        } else {
                int ret = uvm_km_kmem_alloc(kmem_va_arena,
                    (vsize_t)round_page(size),
                    ((kmflags & KM_SLEEP) ? VM_SLEEP : VM_NOSLEEP)
                     | VM_INSTANTFIT, (vmem_addr_t *)&p);
                SDT_PROBE4(sdt, kmem, alloc, large,
                    ret ? NULL : p, requested_size, round_page(size), kmflags);
                if (ret) {
                        return NULL;
                }
                FREECHECK_OUT(&kmem_freecheck, p);
                return p;
        }

        if (__predict_true(p != NULL)) {
                FREECHECK_OUT(&kmem_freecheck, p);
                kmem_size_set(p, requested_size);
                kasan_mark(p, origsize, size, KASAN_KMEM_REDZONE);
                return p;
        }
        return p;
}

/*
 * kmem_intr_zalloc: allocate zeroed wired memory.
 */
void *
kmem_intr_zalloc(size_t size, km_flag_t kmflags)
{
        void *p;

        p = kmem_intr_alloc(size, kmflags);
        if (p != NULL) {
                memset(p, 0, size);
        }
        return p;
}

/*
 * kmem_intr_free: free wired memory allocated by kmem_alloc.
 */
void
kmem_intr_free(void *p, size_t requested_size)
{
        size_t allocsz, index;
        size_t size;
        pool_cache_t pc;

        KASSERT(p != NULL);
        KASSERTMSG(requested_size > 0, "kmem_intr_free(%p, 0)", p);

        kasan_add_redzone(&requested_size);
        size = kmem_roundup_size(requested_size);
        allocsz = size + SIZE_SIZE;

        if ((index = ((allocsz - 1) >> KMEM_SHIFT))
            < kmem_cache_maxidx) {
                KMEM_CACHE_FREE_PROBE(index, p, requested_size, allocsz);
                pc = kmem_cache[index];
        } else if ((index = ((allocsz - 1) >> KMEM_BIG_SHIFT))
            < kmem_cache_big_maxidx) {
                KMEM_CACHE_BIG_FREE_PROBE(index, p, requested_size, allocsz);
                pc = kmem_cache_big[index];
        } else {
                FREECHECK_IN(&kmem_freecheck, p);
                SDT_PROBE3(sdt, kmem, free, large,
                    p, requested_size, round_page(size));
                uvm_km_kmem_free(kmem_va_arena, (vaddr_t)p,
                    round_page(size));
                return;
        }

        kasan_mark(p, size, size, 0);

        kmem_size_check(p, requested_size);
        FREECHECK_IN(&kmem_freecheck, p);
        LOCKDEBUG_MEM_CHECK(p, size);

        pool_cache_put(pc, p);
}

/* -------------------------------- Kmem API -------------------------------- */

/*
 * kmem_alloc: allocate wired memory.
 * => must not be called from interrupt context.
 */
void *
kmem_alloc(size_t size, km_flag_t kmflags)
{
        void *v;

        KASSERTMSG((!cpu_intr_p() && !cpu_softintr_p()),
            "kmem(9) should not be used from the interrupt context");
        v = kmem_intr_alloc(size, kmflags);
        if (__predict_true(v != NULL)) {
                kmsan_mark(v, size, KMSAN_STATE_UNINIT);
                kmsan_orig(v, size, KMSAN_TYPE_KMEM, __RET_ADDR);
        }
        KASSERT(v || (kmflags & KM_NOSLEEP) != 0);
        return v;
}

/*
 * kmem_zalloc: allocate zeroed wired memory.
 * => must not be called from interrupt context.
 */
void *
kmem_zalloc(size_t size, km_flag_t kmflags)
{
        void *v;

        KASSERTMSG((!cpu_intr_p() && !cpu_softintr_p()),
            "kmem(9) should not be used from the interrupt context");
        v = kmem_intr_zalloc(size, kmflags);
        KASSERT(v || (kmflags & KM_NOSLEEP) != 0);
        return v;
}

/*
 * kmem_free: free wired memory allocated by kmem_alloc.
 * => must not be called from interrupt context.
 */
void
kmem_free(void *p, size_t size)
{
        KASSERT(!cpu_intr_p());
        KASSERT(!cpu_softintr_p());
        kmem_intr_free(p, size);
        kmsan_mark(p, size, KMSAN_STATE_INITED);
}

static size_t
kmem_create_caches(const struct kmem_cache_info *array,
    const id_t *alloc_probe_table[], const id_t *free_probe_table[],
    pool_cache_t alloc_table[], size_t maxsize, int shift, int ipl)
{
        size_t maxidx = 0;
        size_t table_unit = (1 << shift);
        size_t size = table_unit;
        int i;

        for (i = 0; array[i].kc_size != 0 ; i++) {
                const char *name = array[i].kc_name;
                size_t cache_size = array[i].kc_size;
                struct pool_allocator *pa;
                int flags = 0;
                pool_cache_t pc;
                size_t align;

                /* check if we reached the requested size */
                if (cache_size > maxsize || cache_size > PAGE_SIZE) {
                        break;
                }

                /*
                 * Exclude caches with size not a factor or multiple of the
                 * coherency unit.
                 */
                if (cache_size < COHERENCY_UNIT) {
                        if (COHERENCY_UNIT % cache_size > 0) {
                                    continue;
                        }
                        flags |= PR_NOTOUCH;
                        align = KMEM_ALIGN;
                } else if ((cache_size & (PAGE_SIZE - 1)) == 0) {
                        align = PAGE_SIZE;
                } else {
                        if ((cache_size % COHERENCY_UNIT) > 0) {
                                continue;
                        }
                        align = COHERENCY_UNIT;
                }

                if ((cache_size >> shift) > maxidx) {
                        maxidx = cache_size >> shift;
                }

                pa = &pool_allocator_kmem;
                pc = pool_cache_init(cache_size, align, 0, flags,
                    name, pa, ipl, NULL, NULL, NULL);

                while (size <= cache_size) {
                        alloc_table[(size - 1) >> shift] = pc;
#ifdef KDTRACE_HOOKS
                        if (alloc_probe_table) {
                                alloc_probe_table[(size - 1) >> shift] =
                                    array[i].kc_alloc_probe_id;
                        }
                        if (free_probe_table) {
                                free_probe_table[(size - 1) >> shift] =
                                    array[i].kc_free_probe_id;
                        }
#endif
                        size += table_unit;
                }
        }
        return maxidx;
}

void
kmem_init(void)
{
        kmem_cache_maxidx = kmem_create_caches(kmem_cache_sizes,
            kmem_cache_alloc_probe_id, kmem_cache_free_probe_id,
            kmem_cache, KMEM_MAXSIZE, KMEM_SHIFT, IPL_VM);
        kmem_cache_big_maxidx = kmem_create_caches(kmem_cache_big_sizes,
            kmem_cache_big_alloc_probe_id, kmem_cache_big_free_probe_id,
            kmem_cache_big, PAGE_SIZE, KMEM_BIG_SHIFT, IPL_VM);
}

size_t
kmem_roundup_size(size_t size)
{
        return (size + (KMEM_ALIGN - 1)) & ~(KMEM_ALIGN - 1);
}

/*
 * Used to dynamically allocate string with kmem accordingly to format.
 */
char *
kmem_asprintf(const char *fmt, ...)
{
        int size __diagused, len;
        va_list va;
        char *str;

        va_start(va, fmt);
        len = vsnprintf(NULL, 0, fmt, va);
        va_end(va);

        str = kmem_alloc(len + 1, KM_SLEEP);

        va_start(va, fmt);
        size = vsnprintf(str, len + 1, fmt, va);
        va_end(va);

        KASSERT(size == len);

        return str;
}

char *
kmem_strdupsize(const char *str, size_t *lenp, km_flag_t flags)
{
        size_t len = strlen(str) + 1;
        char *ptr = kmem_alloc(len, flags);
        if (ptr == NULL)
                return NULL;

        if (lenp)
                *lenp = len;
        memcpy(ptr, str, len);
        return ptr;
}

char *
kmem_strndup(const char *str, size_t maxlen, km_flag_t flags)
{
        KASSERT(str != NULL);
        KASSERT(maxlen != 0);

        size_t len = strnlen(str, maxlen);
        char *ptr = kmem_alloc(len + 1, flags);
        if (ptr == NULL)
                return NULL;

        memcpy(ptr, str, len);
        ptr[len] = '\0';

        return ptr;
}

void
kmem_strfree(char *str)
{
        if (str == NULL)
                return;

        kmem_free(str, strlen(str) + 1);
}

/*
 * Utility routine to maybe-allocate a temporary buffer if the size
 * is larger than we're willing to put on the stack.
 */
void *
kmem_tmpbuf_alloc(size_t size, void *stackbuf, size_t stackbufsize,
    km_flag_t flags)
{
        if (size <= stackbufsize) {
                return stackbuf;
        }

        return kmem_alloc(size, flags);
}

void
kmem_tmpbuf_free(void *buf, size_t size, void *stackbuf)
{
        if (buf != stackbuf) {
                kmem_free(buf, size);
        }
}

/* --------------------------- DEBUG / DIAGNOSTIC --------------------------- */

#if defined(KMEM_SIZE)
static void
kmem_size_set(void *p, size_t sz)
{
        memcpy((char *)p + sz, &sz, sizeof(size_t));
}

static void
kmem_size_check(void *p, size_t sz)
{
        size_t hsz;

        memcpy(&hsz, (char *)p + sz, sizeof(size_t));

        if (hsz != sz) {
                panic("kmem_free(%p, %zu) != allocated size %zu; overwrote?",
                    p, sz, hsz);
        }

        memset((char *)p + sz, 0xff, sizeof(size_t));
}
#endif /* defined(KMEM_SIZE) */



























































































































































    1 















    1 
































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
/*        $NetBSD: joy.c,v 1.21 2017/10/28 04:53:55 riastradh Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software developed for The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1995 Jean-Marc Zucconi
 * All rights reserved.
 *
 * Ported to NetBSD by Matthieu Herrb <matthieu@laas.fr>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer
 *    in this position and unchanged.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: joy.c,v 1.21 2017/10/28 04:53:55 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/errno.h>
#include <sys/conf.h>
#include <sys/event.h>
#include <sys/vnode.h>
#include <sys/bus.h>
#include <sys/joystick.h>

#include <dev/ic/joyvar.h>

#include "ioconf.h"

/*
 * The game port can manage 4 buttons and 4 variable resistors (usually 2
 * joysticks, each with 2 buttons and 2 pots.) via the port at address 0x201.
 * Getting the state of the buttons is done by reading the game port;
 * buttons 1-4 correspond to bits 4-7 and resistors 1-4 (X1, Y1, X2, Y2)
 * to bits 0-3.  If button 1 (resp 2, 3, 4) is pressed, the bit 4 (resp 5,
 * 6, 7) is set to 0 to get the value of a resistor, write the value 0xff
 * at port and wait until the corresponding bit returns to 0.
 */


#define JOYPART(d) (minor(d) & 1)
#define JOYUNIT(d) (minor(d) >> 1)

#ifndef JOY_TIMEOUT
#define JOY_TIMEOUT   2000        /* 2 milliseconds */
#endif

static dev_type_open(joyopen);
static dev_type_close(joyclose);
static dev_type_read(joyread);
static dev_type_ioctl(joyioctl);

const struct cdevsw joy_cdevsw = {
        .d_open = joyopen,
        .d_close = joyclose,
        .d_read = joyread,
        .d_write = nowrite,
        .d_ioctl = joyioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER | D_MPSAFE
};

void
joyattach(struct joy_softc *sc)
{

        if (sc->sc_lock == NULL) {
                panic("joyattach: no lock");
        }

        sc->timeout[0] = 0;
        sc->timeout[1] = 0;

        mutex_enter(sc->sc_lock);
        bus_space_write_1(sc->sc_iot, sc->sc_ioh, 0, 0xff);
        DELAY(10000);                /* 10 ms delay */
        aprint_normal_dev(sc->sc_dev, "joystick %sconnected\n",
            (bus_space_read_1(sc->sc_iot, sc->sc_ioh, 0) & 0x0f) == 0x0f ?
            "not " : "");
        mutex_exit(sc->sc_lock);
}

int
joydetach(struct joy_softc *sc, int flags)
{
        int maj, mn;

        maj = cdevsw_lookup_major(&joy_cdevsw);
        mn = device_unit(sc->sc_dev) << 1;
        vdevgone(maj, mn, mn, VCHR);
        vdevgone(maj, mn + 1, mn + 1, VCHR);

        return 0;
}

static int
joyopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        int unit = JOYUNIT(dev);
        int i = JOYPART(dev);
        struct joy_softc *sc;

        sc = device_lookup_private(&joy_cd, unit);
        if (sc == NULL)
                return ENXIO;

        mutex_enter(sc->sc_lock);
        if (sc->timeout[i]) {
                mutex_exit(sc->sc_lock);
                return EBUSY;
        }
        sc->x_off[i] = sc->y_off[i] = 0;
        sc->timeout[i] = JOY_TIMEOUT;
        mutex_exit(sc->sc_lock);
        return 0;
}

static int
joyclose(dev_t dev, int flag, int mode, struct lwp *l)
{
        int unit = JOYUNIT(dev);
        int i = JOYPART(dev);
        struct joy_softc *sc = device_lookup_private(&joy_cd, unit);

        mutex_enter(sc->sc_lock);
        sc->timeout[i] = 0;
        mutex_exit(sc->sc_lock);
        return 0;
}

static int
joyread(dev_t dev, struct uio *uio, int flag)
{
        int unit = JOYUNIT(dev);
        struct joy_softc *sc = device_lookup_private(&joy_cd, unit);
        bus_space_tag_t iot = sc->sc_iot;
        bus_space_handle_t ioh = sc->sc_ioh;
        struct joystick c;
        struct timeval start, now, diff;
        int state = 0, x = 0, y = 0, i;

        mutex_enter(sc->sc_lock);
        bus_space_write_1(iot, ioh, 0, 0xff);
        microtime(&start);
        now = start; /* structure assignment */
        i = sc->timeout[JOYPART(dev)];
        for (;;) {
                timersub(&now, &start, &diff);
                if (diff.tv_sec > 0 || diff.tv_usec > i)
                        break;
                state = bus_space_read_1(iot, ioh, 0);
                if (JOYPART(dev) == 1)
                        state >>= 2;
                if (!x && !(state & 0x01))
                        x = diff.tv_usec;
                if (!y && !(state & 0x02))
                        y = diff.tv_usec;
                if (x && y)
                        break;
                microtime(&now);
        }
        mutex_exit(sc->sc_lock);

        c.x = x ? sc->x_off[JOYPART(dev)] + x : 0x80000000;
        c.y = y ? sc->y_off[JOYPART(dev)] + y : 0x80000000;
        state >>= 4;
        c.b1 = ~state & 1;
        c.b2 = ~(state >> 1) & 1;
        return uiomove(&c, sizeof(struct joystick), uio);
}

static int
joyioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        int unit = JOYUNIT(dev);
        struct joy_softc *sc = device_lookup_private(&joy_cd, unit);
        int i = JOYPART(dev), x, error;

        mutex_enter(sc->sc_lock);
        error = 0;
        switch (cmd) {
        case JOY_SETTIMEOUT:
                x = *(int *)data;
                if (x < 1 || x > 10000) {        /* 10ms maximum! */
                        error = EINVAL;
                        break;
                }
                sc->timeout[i] = x;
                break;
        case JOY_GETTIMEOUT:
                *(int *)data = sc->timeout[i];
                break;
        case JOY_SET_X_OFFSET:
                sc->x_off[i] = *(int *)data;
                break;
        case JOY_SET_Y_OFFSET:
                sc->y_off[i] = *(int *)data;
                break;
        case JOY_GET_X_OFFSET:
                *(int *)data = sc->x_off[i];
                break;
        case JOY_GET_Y_OFFSET:
                *(int *)data = sc->y_off[i];
                break;
        default:
                error = ENXIO;
                break;
        }
        mutex_exit(sc->sc_lock);
        return error;
}












































































































 2028 






































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
/*        $NetBSD: cpu.h,v 1.51 2020/06/15 18:04:42 ad Exp $        */

/*-
 * Copyright (c) 2007 YAMAMOTO Takashi,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef _SYS_CPU_H_
#define _SYS_CPU_H_

#ifndef _LOCORE

#include <machine/cpu.h>

#include <sys/lwp.h>

struct cpu_info;

#ifdef _KERNEL
#ifndef cpu_idle
void cpu_idle(void);
#endif

#ifdef CPU_UCODE
#include <sys/cpuio.h>
#include <dev/firmload.h>
#ifdef COMPAT_60
#include <compat/sys/cpuio.h>
#endif
#endif

#ifndef cpu_need_resched
void cpu_need_resched(struct cpu_info *, struct lwp *, int);
#endif

/*
 * CPU_INFO_ITERATOR() may be supplied by machine dependent code as it
 * controls how the cpu_info structures are allocated.
 *
 * This macro must always iterate just the boot-CPU when the system has
 * not attached any cpus via mi_cpu_attach() yet, and the "ncpu" variable
 * is zero.
 */
#ifndef CPU_INFO_ITERATOR
#define        CPU_INFO_ITERATOR                int
#define        CPU_INFO_FOREACH(cii, ci)        \
    (void)cii, ci = curcpu(); ci != NULL; ci = NULL
#endif

#ifndef CPU_IS_PRIMARY
#define        CPU_IS_PRIMARY(ci)        ((void)ci, 1)
#endif

#ifdef __HAVE_MD_CPU_OFFLINE
void        cpu_offline_md(void);
#endif

struct lwp *cpu_switchto(struct lwp *, struct lwp *, bool);
struct        cpu_info *cpu_lookup(u_int);
int        cpu_setmodel(const char *fmt, ...)        __printflike(1, 2);
const char *cpu_getmodel(void);
int        cpu_setstate(struct cpu_info *, bool);
int        cpu_setintr(struct cpu_info *, bool);
bool        cpu_intr_p(void);
bool        cpu_softintr_p(void);
bool        cpu_kpreempt_enter(uintptr_t, int);
void        cpu_kpreempt_exit(uintptr_t);
bool        cpu_kpreempt_disabled(void);
int        cpu_lwp_setprivate(struct lwp *, void *);
void        cpu_intr_redistribute(void);
u_int        cpu_intr_count(struct cpu_info *);
void        cpu_topology_set(struct cpu_info *, u_int, u_int, u_int, u_int);
void        cpu_topology_setspeed(struct cpu_info *, bool);
void        cpu_topology_init(void);
#endif

#ifdef _KERNEL
extern kmutex_t cpu_lock;
extern u_int maxcpus;
extern struct cpu_info **cpu_infos;
extern kcpuset_t *kcpuset_attached;
extern kcpuset_t *kcpuset_running;

static __inline u_int
cpu_index(const struct cpu_info *ci)
{
        return ci->ci_index;
}

static __inline char *
cpu_name(struct cpu_info *ci)
{
        return ci->ci_data.cpu_name;
}

#ifdef CPU_UCODE
struct cpu_ucode_softc {
        int loader_version;
        char *sc_blob;
        off_t sc_blobsize;
};

int cpu_ucode_get_version(struct cpu_ucode_version *);
int cpu_ucode_apply(const struct cpu_ucode *);
#ifdef COMPAT_60
int compat6_cpu_ucode_get_version(struct compat6_cpu_ucode *);
int compat6_cpu_ucode_apply(const struct compat6_cpu_ucode *);
#endif
int cpu_ucode_load(struct cpu_ucode_softc *, const char *);
int cpu_ucode_md_open(firmware_handle_t *, int, const char *);
#endif

#endif
#endif        /* !_LOCORE */

/*
 * Flags for cpu_need_resched.  RESCHED_KPREEMPT must be greater than
 * RESCHED_UPREEMPT; see sched_resched_cpu().
 */
#define        RESCHED_REMOTE                0x01        /* request is for a remote CPU */
#define        RESCHED_IDLE                0x02        /* idle LWP observed */
#define        RESCHED_UPREEMPT        0x04        /* immediate user ctx switch */
#define        RESCHED_KPREEMPT        0x08        /* immediate kernel ctx switch */

#endif        /* !_SYS_CPU_H_ */
















































































 1267 
 1268 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
/*        $NetBSD: uvm_pdpolicy.h,v 1.9 2022/08/20 23:26:02 riastradh Exp $        */

/*-
 * Copyright (c)2005, 2006 YAMAMOTO Takashi,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef _UVM_PDPOLICY_H_
#define _UVM_PDPOLICY_H_

#include <sys/mutex.h>
#include <sys/stdint.h>

#include <uvm/uvm_page.h>

struct krwlock;
struct uvm_cpu;
struct vm_anon;
struct vm_page;

/*
 * these API is for uvm internal use only.
 * don't use them directly from outside of /sys/uvm.
 */

void uvmpdpol_idle(struct uvm_cpu *);
void uvmpdpol_init(void);
void uvmpdpol_init_cpu(struct uvm_cpu *);
void uvmpdpol_reinit(void);
void uvmpdpol_estimatepageable(int *, int *);
bool uvmpdpol_needsscan_p(void);

void uvmpdpol_pageactivate(struct vm_page *);
void uvmpdpol_pagedeactivate(struct vm_page *);
void uvmpdpol_pagedequeue(struct vm_page *);
void uvmpdpol_pageenqueue(struct vm_page *);
bool uvmpdpol_pageactivate_p(struct vm_page *);
bool uvmpdpol_pageisqueued_p(struct vm_page *);
void uvmpdpol_pagerealize(struct vm_page *);
void uvmpdpol_anfree(struct vm_anon *);

void uvmpdpol_tune(void);
void uvmpdpol_scaninit(void);
void uvmpdpol_scanfini(void);
struct vm_page *uvmpdpol_selectvictim(struct krwlock **);
void uvmpdpol_balancequeue(int);

void uvmpdpol_sysctlsetup(void);

/*
 * uvmpdpol_set_intent: set an intended state for the page, taking care not
 * to overwrite any of the other flags.
 */

static inline void
uvmpdpol_set_intent(struct vm_page *pg, uint32_t i)
{

        KASSERT(mutex_owned(&pg->interlock));
        pg->pqflags = PQ_INTENT_SET | (pg->pqflags & ~PQ_INTENT_MASK) | i;
}

#endif /* !_UVM_PDPOLICY_H_ */

























































































 4341 
 4342 
   85 


 4342 

   10 













 4339 

 1115 



 4343 


 4342 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/*        $NetBSD: userret.h,v 1.33 2020/03/26 20:19:06 ad Exp $        */

/*-
 * Copyright (c) 1998, 2000, 2003, 2006, 2008, 2019, 2020
 *     The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum, and Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1990 The Regents of the University of California.
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * William Jolitz.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

#ifndef _SYS_USERRET_H_
#define        _SYS_USERRET_H_

#include <sys/lockdebug.h>
#include <sys/intr.h>
#include <sys/psref.h>

/*
 * Define the MI code needed before returning to user mode, for trap and
 * syscall.
 *
 * We handle "exceptional" events: pending signals, stop/exit actions, etc. 
 * Note that the event must be flagged BEFORE any AST is posted as we are
 * reading unlocked.
 */
static __inline void
mi_userret(struct lwp *l)
{
        struct cpu_info *ci;

        KPREEMPT_DISABLE(l);
        ci = l->l_cpu;
        KASSERTMSG(ci->ci_biglock_count == 0, "kernel_lock leaked");
        KASSERT(l->l_blcnt == 0);
        if (__predict_false(ci->ci_want_resched)) {
                preempt();
                ci = l->l_cpu;
        }
        if (__predict_false(l->l_flag & LW_USERRET)) {
                KPREEMPT_ENABLE(l);
                lwp_userret(l);
                KPREEMPT_DISABLE(l);
                ci = l->l_cpu;
        }
        /*
         * lwp_eprio() is too involved to use here unlocked.  At this point
         * it only matters for PTHREAD_PRIO_PROTECT; setting a too low value
         * is OK because the scheduler will find out the true value if we
         * end up in mi_switch().
         *
         * This is being called on every syscall and trap, and remote CPUs
         * regularly look at ci_schedstate.  Keep the cache line in the
         * SHARED state by only updating spc_curpriority if it has changed.
         */
        l->l_kpriority = false;
        if (ci->ci_schedstate.spc_curpriority != l->l_priority) {
                ci->ci_schedstate.spc_curpriority = l->l_priority;
        }
        KPREEMPT_ENABLE(l);

        LOCKDEBUG_BARRIER(NULL, 0);
        KASSERT(l->l_nopreempt == 0);
        PSREF_DEBUG_BARRIER();
        KASSERT(l->l_psrefs == 0);
}

#endif        /* !_SYS_USERRET_H_ */


































































































   44 





   45 

   38 
   45 

   38 






   10 
   38 







   38 








    9 



    1 






    2 



    1 









   45 






   47 














   22 

   21 

   20 
    5 
   22 






   96 

   92 

   87 
   17 











   66 
   71 





   48 






   47 
   48 
   48 
   47 

   47 
   30 


   20 


















    8 


    8 



    8 













   46 
    8 


    5 
    7 
    3 


    5 











   38 

    6 


    3 

    5 


    3 






    5 




   32 

   15 
   46 

   14 
















   40 




   35 
   35 


   12 
   18 



   12 

   11 






   14 



   39 








   21 









   21 
   21 
   21 











   21 






































   21 











































   12 








   12 
   11 
   12 











   12 








































    2 



































   10 



   12 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
/*        $NetBSD: subr_time.c,v 1.35 2022/06/28 02:04:51 riastradh Exp $        */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_clock.c        8.5 (Berkeley) 1/21/94
 *        @(#)kern_time.c 8.4 (Berkeley) 5/26/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_time.c,v 1.35 2022/06/28 02:04:51 riastradh Exp $");

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/kauth.h>
#include <sys/lwp.h>
#include <sys/timex.h>
#include <sys/time.h>
#include <sys/timetc.h>
#include <sys/intr.h>

#ifdef DEBUG_STICKS
#define DPRINTF(a) uprintf a
#else
#define DPRINTF(a) 
#endif

/*
 * Compute number of hz until specified time.  Used to compute second
 * argument to callout_reset() from an absolute time.
 */
int
tvhzto(const struct timeval *tvp)
{
        struct timeval now, tv;

        tv = *tvp;        /* Don't modify original tvp. */
        getmicrotime(&now);
        timersub(&tv, &now, &tv);
        return tvtohz(&tv);
}

/*
 * Compute number of ticks in the specified amount of time.
 */
int
tvtohz(const struct timeval *tv)
{
        unsigned long ticks;
        long sec, usec;

        /*
         * If the number of usecs in the whole seconds part of the time
         * difference fits in a long, then the total number of usecs will
         * fit in an unsigned long.  Compute the total and convert it to
         * ticks, rounding up and adding 1 to allow for the current tick
         * to expire.  Rounding also depends on unsigned long arithmetic
         * to avoid overflow.
         *
         * Otherwise, if the number of ticks in the whole seconds part of
         * the time difference fits in a long, then convert the parts to
         * ticks separately and add, using similar rounding methods and
         * overflow avoidance.  This method would work in the previous
         * case, but it is slightly slower and assumes that hz is integral.
         *
         * Otherwise, round the time difference down to the maximum
         * representable value.
         *
         * If ints are 32-bit, then the maximum value for any timeout in
         * 10ms ticks is 248 days.
         */
        sec = tv->tv_sec;
        usec = tv->tv_usec;

        KASSERT(usec >= 0 && usec < 1000000);

        /* catch overflows in conversion time_t->int */
        if (tv->tv_sec > INT_MAX)
                return INT_MAX;
        if (tv->tv_sec < 0)
                return 0;

        if (sec < 0 || (sec == 0 && usec == 0)) {
                /*
                 * Would expire now or in the past.  Return 0 ticks.
                 * This is different from the legacy tvhzto() interface,
                 * and callers need to check for it.
                 */
                ticks = 0;
        } else if (sec <= (LONG_MAX / 1000000))
                ticks = (((sec * 1000000) + (unsigned long)usec + (tick - 1))
                    / tick) + 1;
        else if (sec <= (LONG_MAX / hz))
                ticks = (sec * hz) +
                    (((unsigned long)usec + (tick - 1)) / tick) + 1;
        else
                ticks = LONG_MAX;

        if (ticks > INT_MAX)
                ticks = INT_MAX;

        return ((int)ticks);
}

int
tshzto(const struct timespec *tsp)
{
        struct timespec now, ts;

        ts = *tsp;        /* Don't modify original tsp. */
        getnanotime(&now);
        timespecsub(&ts, &now, &ts);
        return tstohz(&ts);
}

int
tshztoup(const struct timespec *tsp)
{
        struct timespec now, ts;

        ts = *tsp;        /* Don't modify original tsp. */
        getnanouptime(&now);
        timespecsub(&ts, &now, &ts);
        return tstohz(&ts);
}

/*
 * Compute number of ticks in the specified amount of time.
 */
int
tstohz(const struct timespec *ts)
{
        struct timeval tv;

        /*
         * usec has great enough resolution for hz, so convert to a
         * timeval and use tvtohz() above.
         */
        TIMESPEC_TO_TIMEVAL(&tv, ts);
        return tvtohz(&tv);
}

/*
 * Check that a proposed value to load into the .it_value or
 * .it_interval part of an interval timer is acceptable, and
 * fix it to have at least minimal value (i.e. if it is less
 * than the resolution of the clock, round it up.). We don't
 * timeout the 0,0 value because this means to disable the
 * timer or the interval.
 */
int
itimerfix(struct timeval *tv)
{

        if (tv->tv_usec < 0 || tv->tv_usec >= 1000000)
                return EINVAL;
        if (tv->tv_sec < 0)
                return ETIMEDOUT;
        if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick)
                tv->tv_usec = tick;
        return 0;
}

int
itimespecfix(struct timespec *ts)
{

        if (ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000)
                return EINVAL;
        if (ts->tv_sec < 0)
                return ETIMEDOUT;
        if (ts->tv_sec == 0 && ts->tv_nsec != 0 && ts->tv_nsec < tick * 1000)
                ts->tv_nsec = tick * 1000;
        return 0;
}

int
inittimeleft(struct timespec *ts, struct timespec *sleepts)
{

        if (itimespecfix(ts)) {
                return -1;
        }
        KASSERT(ts->tv_sec >= 0);
        getnanouptime(sleepts);
        return 0;
}

int
gettimeleft(struct timespec *ts, struct timespec *sleepts)
{
        struct timespec now, sleptts;

        KASSERT(ts->tv_sec >= 0);

        /*
         * Reduce ts by elapsed time based on monotonic time scale.
         */
        getnanouptime(&now);
        KASSERT(timespeccmp(sleepts, &now, <=));
        timespecsub(&now, sleepts, &sleptts);
        *sleepts = now;

        if (timespeccmp(ts, &sleptts, <=)) { /* timed out */
                timespecclear(ts);
                return 0;
        }
        timespecsub(ts, &sleptts, ts);

        return tstohz(ts);
}

void
clock_timeleft(clockid_t clockid, struct timespec *ts, struct timespec *sleepts)
{
        struct timespec sleptts;

        clock_gettime1(clockid, &sleptts);
        timespecadd(ts, sleepts, ts);
        timespecsub(ts, &sleptts, ts);
        *sleepts = sleptts;
}

static void
ticks2ts(uint64_t ticks, struct timespec *ts)
{
        ts->tv_sec = ticks / hz;
        uint64_t sticks = ticks - ts->tv_sec * hz;
        if (sticks > BINTIME_SCALE_MS)        /* floor(2^64 / 1000) */
                ts->tv_nsec = sticks / hz * 1000000000LL;
           else if (sticks > BINTIME_SCALE_US)        /* floor(2^64 / 1000000) */
                   ts->tv_nsec = sticks * 1000LL / hz * 1000000LL;
        else
                   ts->tv_nsec = sticks * 1000000000LL / hz;
        DPRINTF(("%s: %ju/%ju -> %ju.%ju\n", __func__,
            (uintmax_t)ticks, (uintmax_t)sticks,
            (uintmax_t)ts->tv_sec, (uintmax_t)ts->tv_nsec));
}

int
clock_gettime1(clockid_t clock_id, struct timespec *ts)
{
        int error;
        uint64_t ticks;
        struct proc *p;

#define CPUCLOCK_ID_MASK (~(CLOCK_THREAD_CPUTIME_ID|CLOCK_PROCESS_CPUTIME_ID))
        if (clock_id & CLOCK_PROCESS_CPUTIME_ID) {
                pid_t pid = clock_id & CPUCLOCK_ID_MASK;

                mutex_enter(&proc_lock);
                p = pid == 0 ? curproc : proc_find(pid);
                if (p == NULL) {
                        mutex_exit(&proc_lock);
                        return ESRCH;
                }
                ticks = p->p_uticks + p->p_sticks + p->p_iticks;
                DPRINTF(("%s: u=%ju, s=%ju, i=%ju\n", __func__,
                    (uintmax_t)p->p_uticks, (uintmax_t)p->p_sticks,
                    (uintmax_t)p->p_iticks));
                mutex_exit(&proc_lock);

                // XXX: Perhaps create a special kauth type
                error = kauth_authorize_process(kauth_cred_get(),
                    KAUTH_PROCESS_PTRACE, p,
                    KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
                if (error)
                        return error;
        } else if (clock_id & CLOCK_THREAD_CPUTIME_ID) {
                struct lwp *l;
                lwpid_t lid = clock_id & CPUCLOCK_ID_MASK;
                p = curproc;
                mutex_enter(p->p_lock);
                l = lid == 0 ? curlwp : lwp_find(p, lid);
                if (l == NULL) {
                        mutex_exit(p->p_lock);
                        return ESRCH;
                }
                ticks = l->l_rticksum + l->l_slpticksum;
                DPRINTF(("%s: r=%ju, s=%ju\n", __func__,
                    (uintmax_t)l->l_rticksum, (uintmax_t)l->l_slpticksum));
                mutex_exit(p->p_lock);
        } else
                ticks = (uint64_t)-1;

        if (ticks != (uint64_t)-1) {
                ticks2ts(ticks, ts);
                return 0;
        }

        switch (clock_id) {
        case CLOCK_REALTIME:
                nanotime(ts);
                break;
        case CLOCK_MONOTONIC:
                nanouptime(ts);
                break;
        default:
                return EINVAL;
        }

        return 0;
}

/*
 * Calculate delta and convert from struct timespec to the ticks.
 */
int
ts2timo(clockid_t clock_id, int flags, struct timespec *ts,
    int *timo, struct timespec *start)
{
        int error;
        struct timespec tsd;

        if (ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000L)
                return EINVAL;

        if ((flags & TIMER_ABSTIME) != 0 || start != NULL) {
                error = clock_gettime1(clock_id, &tsd);
                if (error != 0)
                        return error;
                if (start != NULL)
                        *start = tsd;
        }

        if ((flags & TIMER_ABSTIME) != 0) {
                if (!timespecsubok(ts, &tsd))
                        return EINVAL;
                timespecsub(ts, &tsd, ts);
        }

        error = itimespecfix(ts);
        if (error != 0)
                return error;

        if (ts->tv_sec == 0 && ts->tv_nsec == 0)
                return ETIMEDOUT;

        *timo = tstohz(ts);
        KASSERT(*timo > 0);

        return 0;
}

bool
timespecaddok(const struct timespec *tsp, const struct timespec *usp)
{
        enum { TIME_MIN = __type_min(time_t), TIME_MAX = __type_max(time_t) };
        time_t a = tsp->tv_sec;
        time_t b = usp->tv_sec;
        bool carry;

        /*
         * Caller is responsible for guaranteeing valid timespec
         * inputs.  Any user-controlled inputs must be validated or
         * adjusted.
         */
        KASSERT(tsp->tv_nsec >= 0);
        KASSERT(usp->tv_nsec >= 0);
        KASSERT(tsp->tv_nsec < 1000000000L);
        KASSERT(usp->tv_nsec < 1000000000L);
        CTASSERT(1000000000L <= __type_max(long) - 1000000000L);

        /*
         * Fail if a + b + carry overflows TIME_MAX, or if a + b
         * overflows TIME_MIN because timespecadd adds the carry after
         * computing a + b.
         *
         * Break it into two mutually exclusive and exhaustive cases:
         * I. a >= 0
         * II. a < 0
         */
        carry = (tsp->tv_nsec + usp->tv_nsec >= 1000000000L);
        if (a >= 0) {
                /*
                 * Case I: a >= 0.  If b < 0, then b + 1 <= 0, so
                 *
                 *        a + b + 1 <= a + 0 <= TIME_MAX,
                 *
                 * and
                 *
                 *        a + b >= 0 + b = b >= TIME_MIN,
                 *
                 * so this can't overflow.
                 *
                 * If b >= 0, then a + b + carry >= a + b >= 0, so
                 * negative results and thus results below TIME_MIN are
                 * impossible; we need only avoid
                 *
                 *        a + b + carry > TIME_MAX,
                 *
                 * which we will do by rejecting if
                 *
                 *        b > TIME_MAX - a - carry,
                 *
                 * which in turn is incidentally always false if b < 0
                 * so we don't need extra logic to discriminate on the
                 * b >= 0 and b < 0 cases.
                 *
                 * Since 0 <= a <= TIME_MAX, we know
                 *
                 *        0 <= TIME_MAX - a <= TIME_MAX,
                 *
                 * and hence
                 *
                 *        -1 <= TIME_MAX - a - 1 < TIME_MAX.
                 *
                 * So we can compute TIME_MAX - a - carry (i.e., either
                 * TIME_MAX - a or TIME_MAX - a - 1) safely without
                 * overflow.
                 */
                if (b > TIME_MAX - a - carry)
                        return false;
        } else {
                /*
                 * Case II: a < 0.  If b >= 0, then since a + 1 <= 0,
                 * we have
                 *
                 *        a + b + 1 <= b <= TIME_MAX,
                 *
                 * and
                 *
                 *        a + b >= a >= TIME_MIN,
                 *
                 * so this can't overflow.
                 *
                 * If b < 0, then the intermediate a + b is negative
                 * and the outcome a + b + 1 is nonpositive, so we need
                 * only avoid
                 *
                 *        a + b < TIME_MIN,
                 *
                 * which we will do by rejecting if
                 *
                 *        a < TIME_MIN - b.
                 *
                 * (Reminder: The carry is added afterward in
                 * timespecadd, so to avoid overflow it is not enough
                 * to merely reject a + b + carry < TIME_MIN.)
                 *
                 * It is safe to compute the difference TIME_MIN - b
                 * because b is negative, so the result lies in
                 * (TIME_MIN, 0].
                 */
                if (b < 0 && a < TIME_MIN - b)
                        return false;
        }

        return true;
}

bool
timespecsubok(const struct timespec *tsp, const struct timespec *usp)
{
        enum { TIME_MIN = __type_min(time_t), TIME_MAX = __type_max(time_t) };
        time_t a = tsp->tv_sec, b = usp->tv_sec;
        bool borrow;

        /*
         * Caller is responsible for guaranteeing valid timespec
         * inputs.  Any user-controlled inputs must be validated or
         * adjusted.
         */
        KASSERT(tsp->tv_nsec >= 0);
        KASSERT(usp->tv_nsec >= 0);
        KASSERT(tsp->tv_nsec < 1000000000L);
        KASSERT(usp->tv_nsec < 1000000000L);
        CTASSERT(1000000000L <= __type_max(long) - 1000000000L);

        /*
         * Fail if a - b - borrow overflows TIME_MIN, or if a - b
         * overflows TIME_MAX because timespecsub subtracts the borrow
         * after computing a - b.
         *
         * Break it into two mutually exclusive and exhaustive cases:
         * I. a < 0
         * II. a >= 0
         */
        borrow = (tsp->tv_nsec - usp->tv_nsec < 0);
        if (a < 0) {
                /*
                 * Case I: a < 0.  If b < 0, then -b - 1 >= 0, so
                 *
                 *        a - b - 1 >= a + 0 >= TIME_MIN,
                 *
                 * and, since a <= -1, provided that TIME_MIN <=
                 * -TIME_MAX - 1 so that TIME_MAX <= -TIME_MIN - 1 (in
                 * fact, equality holds, under the assumption of
                 * two's-complement arithmetic),
                 *
                 *        a - b <= -1 - b = -b - 1 <= TIME_MAX,
                 *
                 * so this can't overflow.
                 */
                CTASSERT(TIME_MIN <= -TIME_MAX - 1);

                /*
                 * If b >= 0, then a - b - borrow <= a - b < 0, so
                 * positive results and thus results above TIME_MAX are
                 * impossible; we need only avoid
                 *
                 *        a - b - borrow < TIME_MIN,
                 *
                 * which we will do by rejecting if
                 *
                 *        a < TIME_MIN + b + borrow.
                 *
                 * The right-hand side is safe to evaluate for any
                 * values of b and borrow as long as TIME_MIN +
                 * TIME_MAX + 1 <= TIME_MAX, i.e., TIME_MIN <= -1.
                 * (Note: If time_t were unsigned, this would fail!)
                 *
                 * Note: Unlike Case I in timespecaddok, this criterion
                 * does not work for b < 0, nor can the roles of a and
                 * b in the inequality be reversed (e.g., -b < TIME_MIN
                 * - a + borrow) without extra cases like checking for
                 * b = TEST_MIN.
                 */
                CTASSERT(TIME_MIN < -1);
                if (b >= 0 && a < TIME_MIN + b + borrow)
                        return false;
        } else {
                /*
                 * Case II: a >= 0.  If b >= 0, then
                 *
                 *        a - b <= a <= TIME_MAX,
                 *
                 * and, provided TIME_MIN <= -TIME_MAX - 1 (in fact,
                 * equality holds, under the assumption of
                 * two's-complement arithmetic)
                 *
                 *        a - b - 1 >= -b - 1 >= -TIME_MAX - 1 >= TIME_MIN,
                 *
                 * so this can't overflow.
                 */
                CTASSERT(TIME_MIN <= -TIME_MAX - 1);

                /*
                 * If b < 0, then a - b >= a >= 0, so negative results
                 * and thus results below TIME_MIN are impossible; we
                 * need only avoid
                 *
                 *        a - b > TIME_MAX,
                 *
                 * which we will do by rejecting if
                 *
                 *        a > TIME_MAX + b.
                 *
                 * (Reminder: The borrow is subtracted afterward in
                 * timespecsub, so to avoid overflow it is not enough
                 * to merely reject a - b - borrow > TIME_MAX.)
                 *
                 * It is safe to compute the sum TIME_MAX + b because b
                 * is negative, so the result lies in [0, TIME_MAX).
                 */
                if (b < 0 && a > TIME_MAX + b)
                        return false;
        }

        return true;
}





























































































































































































































































































































































































































































































    6 







    6 







    5 








    6 












    3 
















    1 





    1 



    1 



    1 



























    4 











    3 





    3 

















    1 









    1 


    1 







    1 










    4 
    4 



    3 






    1 
    1 

    1 
    1 








    4 














    4 





    3 











    1 

    1 


    1 



    1 








    6 


    5 


















    1 










    1 



    1 
    1 
















































































































































































































































































































    1 





    1 

    1 









    1 




    1 

    1 







    4 






    4 























































































    4 



    4 















































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
/* $NetBSD: cgd.c,v 1.146 2022/04/02 09:53:20 riastradh Exp $ */

/*-
 * Copyright (c) 2002 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Roland C. Dowdeswell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: cgd.c,v 1.146 2022/04/02 09:53:20 riastradh Exp $");

#include <sys/types.h>
#include <sys/param.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/conf.h>
#include <sys/cpu.h>
#include <sys/device.h>
#include <sys/disk.h>
#include <sys/disklabel.h>
#include <sys/errno.h>
#include <sys/fcntl.h>
#include <sys/ioctl.h>
#include <sys/kmem.h>
#include <sys/module.h>
#include <sys/namei.h> /* for pathbuf */
#include <sys/pool.h>
#include <sys/proc.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/vnode.h>
#include <sys/workqueue.h>

#include <dev/cgd_crypto.h>
#include <dev/cgdvar.h>
#include <dev/dkvar.h>

#include <miscfs/specfs/specdev.h> /* for v_rdev */

#include "ioconf.h"

struct selftest_params {
        const char *alg;
        int encblkno8;
        int blocksize;        /* number of bytes */
        int secsize;
        daddr_t blkno;
        int keylen;        /* number of bits */
        int txtlen;        /* number of bytes */
        const uint8_t *key;
        const uint8_t *ptxt;
        const uint8_t *ctxt;
};

/* Entry Point Functions */

static dev_type_open(cgdopen);
static dev_type_close(cgdclose);
static dev_type_read(cgdread);
static dev_type_write(cgdwrite);
static dev_type_ioctl(cgdioctl);
static dev_type_strategy(cgdstrategy);
static dev_type_dump(cgddump);
static dev_type_size(cgdsize);

const struct bdevsw cgd_bdevsw = {
        .d_open = cgdopen,
        .d_close = cgdclose,
        .d_strategy = cgdstrategy,
        .d_ioctl = cgdioctl,
        .d_dump = cgddump,
        .d_psize = cgdsize,
        .d_discard = nodiscard,
        .d_flag = D_DISK | D_MPSAFE
};

const struct cdevsw cgd_cdevsw = {
        .d_open = cgdopen,
        .d_close = cgdclose,
        .d_read = cgdread,
        .d_write = cgdwrite,
        .d_ioctl = cgdioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_DISK | D_MPSAFE
};

/*
 * Vector 5 from IEEE 1619/D16 truncated to 64 bytes, blkno 1.
 */
static const uint8_t selftest_aes_xts_256_ptxt[64] = {
        0x27, 0xa7, 0x47, 0x9b, 0xef, 0xa1, 0xd4, 0x76,
        0x48, 0x9f, 0x30, 0x8c, 0xd4, 0xcf, 0xa6, 0xe2,
        0xa9, 0x6e, 0x4b, 0xbe, 0x32, 0x08, 0xff, 0x25,
        0x28, 0x7d, 0xd3, 0x81, 0x96, 0x16, 0xe8, 0x9c,
        0xc7, 0x8c, 0xf7, 0xf5, 0xe5, 0x43, 0x44, 0x5f,
        0x83, 0x33, 0xd8, 0xfa, 0x7f, 0x56, 0x00, 0x00,
        0x05, 0x27, 0x9f, 0xa5, 0xd8, 0xb5, 0xe4, 0xad,
        0x40, 0xe7, 0x36, 0xdd, 0xb4, 0xd3, 0x54, 0x12,
};

static const uint8_t selftest_aes_xts_256_ctxt[512] = {
        0x26, 0x4d, 0x3c, 0xa8, 0x51, 0x21, 0x94, 0xfe,
        0xc3, 0x12, 0xc8, 0xc9, 0x89, 0x1f, 0x27, 0x9f,
        0xef, 0xdd, 0x60, 0x8d, 0x0c, 0x02, 0x7b, 0x60,
        0x48, 0x3a, 0x3f, 0xa8, 0x11, 0xd6, 0x5e, 0xe5,
        0x9d, 0x52, 0xd9, 0xe4, 0x0e, 0xc5, 0x67, 0x2d,
        0x81, 0x53, 0x2b, 0x38, 0xb6, 0xb0, 0x89, 0xce,
        0x95, 0x1f, 0x0f, 0x9c, 0x35, 0x59, 0x0b, 0x8b,
        0x97, 0x8d, 0x17, 0x52, 0x13, 0xf3, 0x29, 0xbb,
};

static const uint8_t selftest_aes_xts_256_key[33] = {
        0x27, 0x18, 0x28, 0x18, 0x28, 0x45, 0x90, 0x45,
        0x23, 0x53, 0x60, 0x28, 0x74, 0x71, 0x35, 0x26,
        0x31, 0x41, 0x59, 0x26, 0x53, 0x58, 0x97, 0x93,
        0x23, 0x84, 0x62, 0x64, 0x33, 0x83, 0x27, 0x95,
        0
};

/*
 * Vector 11 from IEEE 1619/D16 truncated to 64 bytes, blkno 0xffff.
 */
static const uint8_t selftest_aes_xts_512_ptxt[64] = {
        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
        0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
        0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
        0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
        0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
        0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
        0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
        0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
};

static const uint8_t selftest_aes_xts_512_ctxt[64] = {
        0x77, 0xa3, 0x12, 0x51, 0x61, 0x8a, 0x15, 0xe6,
        0xb9, 0x2d, 0x1d, 0x66, 0xdf, 0xfe, 0x7b, 0x50,
        0xb5, 0x0b, 0xad, 0x55, 0x23, 0x05, 0xba, 0x02,
        0x17, 0xa6, 0x10, 0x68, 0x8e, 0xff, 0x7e, 0x11,
        0xe1, 0xd0, 0x22, 0x54, 0x38, 0xe0, 0x93, 0x24,
        0x2d, 0x6d, 0xb2, 0x74, 0xfd, 0xe8, 0x01, 0xd4,
        0xca, 0xe0, 0x6f, 0x20, 0x92, 0xc7, 0x28, 0xb2,
        0x47, 0x85, 0x59, 0xdf, 0x58, 0xe8, 0x37, 0xc2,
};

static const uint8_t selftest_aes_xts_512_key[65] = {
        0x27, 0x18, 0x28, 0x18, 0x28, 0x45, 0x90, 0x45,
        0x23, 0x53, 0x60, 0x28, 0x74, 0x71, 0x35, 0x26,
        0x62, 0x49, 0x77, 0x57, 0x24, 0x70, 0x93, 0x69,
        0x99, 0x59, 0x57, 0x49, 0x66, 0x96, 0x76, 0x27,
        0x31, 0x41, 0x59, 0x26, 0x53, 0x58, 0x97, 0x93,
        0x23, 0x84, 0x62, 0x64, 0x33, 0x83, 0x27, 0x95,
        0x02, 0x88, 0x41, 0x97, 0x16, 0x93, 0x99, 0x37,
        0x51, 0x05, 0x82, 0x09, 0x74, 0x94, 0x45, 0x92,
        0
};

static const uint8_t selftest_aes_cbc_key[32] = {
        0x27, 0x18, 0x28, 0x18, 0x28, 0x45, 0x90, 0x45,
        0x23, 0x53, 0x60, 0x28, 0x74, 0x71, 0x35, 0x26,
        0x62, 0x49, 0x77, 0x57, 0x24, 0x70, 0x93, 0x69,
        0x99, 0x59, 0x57, 0x49, 0x66, 0x96, 0x76, 0x27,
};

static const uint8_t selftest_aes_cbc_128_ptxt[64] = {
        0x27, 0xa7, 0x47, 0x9b, 0xef, 0xa1, 0xd4, 0x76,
        0x48, 0x9f, 0x30, 0x8c, 0xd4, 0xcf, 0xa6, 0xe2,
        0xa9, 0x6e, 0x4b, 0xbe, 0x32, 0x08, 0xff, 0x25,
        0x28, 0x7d, 0xd3, 0x81, 0x96, 0x16, 0xe8, 0x9c,
        0xc7, 0x8c, 0xf7, 0xf5, 0xe5, 0x43, 0x44, 0x5f,
        0x83, 0x33, 0xd8, 0xfa, 0x7f, 0x56, 0x00, 0x00,
        0x05, 0x27, 0x9f, 0xa5, 0xd8, 0xb5, 0xe4, 0xad,
        0x40, 0xe7, 0x36, 0xdd, 0xb4, 0xd3, 0x54, 0x12,
};

static const uint8_t selftest_aes_cbc_128_ctxt[64] = { /* blkno=1 */
        0x93, 0x94, 0x56, 0x36, 0x83, 0xbc, 0xff, 0xa4,
        0xe0, 0x24, 0x34, 0x12, 0xbe, 0xfa, 0xb0, 0x7d,
        0x88, 0x1e, 0xc5, 0x57, 0x55, 0x23, 0x05, 0x0c,
        0x69, 0xa5, 0xc1, 0xda, 0x64, 0xee, 0x74, 0x10,
        0xc2, 0xc5, 0xe6, 0x66, 0xd6, 0xa7, 0x49, 0x1c,
        0x9d, 0x40, 0xb5, 0x0c, 0x9b, 0x6e, 0x1c, 0xe6,
        0xb1, 0x7a, 0x1c, 0xe7, 0x5a, 0xfe, 0xf9, 0x2a,
        0x78, 0xfa, 0xb7, 0x7b, 0x08, 0xdf, 0x8e, 0x51,
};

static const uint8_t selftest_aes_cbc_256_ptxt[64] = {
        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
        0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
        0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
        0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
        0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
        0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
        0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
        0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
};

static const uint8_t selftest_aes_cbc_256_ctxt[64] = { /* blkno=0xffff */
        0x6c, 0xa3, 0x15, 0x17, 0x51, 0x90, 0xe9, 0x69,
        0x08, 0x36, 0x7b, 0xa6, 0xbb, 0xd1, 0x0b, 0x9e,
        0xcd, 0x6b, 0x1e, 0xaf, 0xb6, 0x2e, 0x62, 0x7d,
        0x8e, 0xde, 0xf0, 0xed, 0x0d, 0x44, 0xe7, 0x31,
        0x26, 0xcf, 0xd5, 0x0b, 0x3e, 0x95, 0x59, 0x89,
        0xdf, 0x5d, 0xd6, 0x9a, 0x00, 0x66, 0xcc, 0x7f,
        0x45, 0xd3, 0x06, 0x58, 0xed, 0xef, 0x49, 0x47,
        0x87, 0x89, 0x17, 0x7d, 0x08, 0x56, 0x50, 0xe1,
};

static const uint8_t selftest_3des_cbc_key[24] = {
        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
        0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
        0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
};

static const uint8_t selftest_3des_cbc_ptxt[64] = {
        0x27, 0xa7, 0x47, 0x9b, 0xef, 0xa1, 0xd4, 0x76,
        0x48, 0x9f, 0x30, 0x8c, 0xd4, 0xcf, 0xa6, 0xe2,
        0xa9, 0x6e, 0x4b, 0xbe, 0x32, 0x08, 0xff, 0x25,
        0x28, 0x7d, 0xd3, 0x81, 0x96, 0x16, 0xe8, 0x9c,
        0xc7, 0x8c, 0xf7, 0xf5, 0xe5, 0x43, 0x44, 0x5f,
        0x83, 0x33, 0xd8, 0xfa, 0x7f, 0x56, 0x00, 0x00,
        0x05, 0x27, 0x9f, 0xa5, 0xd8, 0xb5, 0xe4, 0xad,
        0x40, 0xe7, 0x36, 0xdd, 0xb4, 0xd3, 0x54, 0x12,
};

static const uint8_t selftest_3des_cbc_ctxt[64] = {
        0xa2, 0xfe, 0x81, 0xaa, 0x10, 0x6c, 0xea, 0xb9,
        0x11, 0x58, 0x1f, 0x29, 0xb5, 0x86, 0x71, 0x56,
        0xe9, 0x25, 0x1d, 0x07, 0xb1, 0x69, 0x59, 0x6c,
        0x96, 0x80, 0xf7, 0x54, 0x38, 0xaa, 0xa7, 0xe4,
        0xe8, 0x81, 0xf5, 0x00, 0xbb, 0x1c, 0x00, 0x3c,
        0xba, 0x38, 0x45, 0x97, 0x4c, 0xcf, 0x84, 0x14,
        0x46, 0x86, 0xd9, 0xf4, 0xc5, 0xe2, 0xf0, 0x54,
        0xde, 0x41, 0xf6, 0xa1, 0xef, 0x1b, 0x0a, 0xea,
};

static const uint8_t selftest_bf_cbc_key[56] = {
        0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
        0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
        0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
        0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
        0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
        0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
        0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
};

static const uint8_t selftest_bf_cbc_ptxt[64] = {
        0x27, 0xa7, 0x47, 0x9b, 0xef, 0xa1, 0xd4, 0x76,
        0x48, 0x9f, 0x30, 0x8c, 0xd4, 0xcf, 0xa6, 0xe2,
        0xa9, 0x6e, 0x4b, 0xbe, 0x32, 0x08, 0xff, 0x25,
        0x28, 0x7d, 0xd3, 0x81, 0x96, 0x16, 0xe8, 0x9c,
        0xc7, 0x8c, 0xf7, 0xf5, 0xe5, 0x43, 0x44, 0x5f,
        0x83, 0x33, 0xd8, 0xfa, 0x7f, 0x56, 0x00, 0x00,
        0x05, 0x27, 0x9f, 0xa5, 0xd8, 0xb5, 0xe4, 0xad,
        0x40, 0xe7, 0x36, 0xdd, 0xb4, 0xd3, 0x54, 0x12,
};

static const uint8_t selftest_bf_cbc_ctxt[64] = {
        0xec, 0xa2, 0xc0, 0x0e, 0xa9, 0x7f, 0x04, 0x1e,
        0x2e, 0x4f, 0x64, 0x07, 0x67, 0x3e, 0xf4, 0x58,
        0x61, 0x5f, 0xd3, 0x50, 0x5e, 0xd3, 0x4d, 0x34,
        0xa0, 0x53, 0xbe, 0x47, 0x75, 0x69, 0x3b, 0x1f,
        0x86, 0xf2, 0xae, 0x8b, 0xb7, 0x91, 0xda, 0xd4,
        0x2b, 0xa5, 0x47, 0x9b, 0x7d, 0x13, 0x30, 0xdd,
        0x7b, 0xad, 0x86, 0x57, 0x51, 0x11, 0x74, 0x42,
        0xb8, 0xbf, 0x69, 0x17, 0x20, 0x0a, 0xf7, 0xda,
};

static const uint8_t selftest_aes_cbc_encblkno8_zero64[64];
static const uint8_t selftest_aes_cbc_encblkno8_ctxt[64] = {
        0xa2, 0x06, 0x26, 0x26, 0xac, 0xdc, 0xe7, 0xcf,
        0x47, 0x68, 0x24, 0x0e, 0xfa, 0x40, 0x44, 0x83,
        0x07, 0xe1, 0xf4, 0x5d, 0x53, 0x47, 0xa0, 0xfe,
        0xc0, 0x6e, 0x4e, 0xf8, 0x9d, 0x98, 0x63, 0xb8,
        0x2c, 0x27, 0xfa, 0x3a, 0xd5, 0x40, 0xda, 0xdb,
        0xe6, 0xc3, 0xe4, 0xfb, 0x85, 0x53, 0xfb, 0x78,
        0x5d, 0xbd, 0x8f, 0x4c, 0x1a, 0x04, 0x9c, 0x88,
        0x85, 0xec, 0x3c, 0x56, 0x46, 0x1a, 0x6e, 0xf5,
};

const struct selftest_params selftests[] = {
        {
                .alg = "aes-xts",
                .blocksize = 16,
                .secsize = 512,
                .blkno = 1,
                .keylen = 256,
                .txtlen = sizeof(selftest_aes_xts_256_ptxt),
                .key  = selftest_aes_xts_256_key,
                .ptxt = selftest_aes_xts_256_ptxt,
                .ctxt = selftest_aes_xts_256_ctxt
        },
        {
                .alg = "aes-xts",
                .blocksize = 16,
                .secsize = 512,
                .blkno = 0xffff,
                .keylen = 512,
                .txtlen = sizeof(selftest_aes_xts_512_ptxt),
                .key  = selftest_aes_xts_512_key,
                .ptxt = selftest_aes_xts_512_ptxt,
                .ctxt = selftest_aes_xts_512_ctxt
        },
        {
                .alg = "aes-cbc",
                .blocksize = 16,
                .secsize = 512,
                .blkno = 1,
                .keylen = 128,
                .txtlen = sizeof(selftest_aes_cbc_128_ptxt),
                .key  = selftest_aes_cbc_key,
                .ptxt = selftest_aes_cbc_128_ptxt,
                .ctxt = selftest_aes_cbc_128_ctxt,
        },
        {
                .alg = "aes-cbc",
                .blocksize = 16,
                .secsize = 512,
                .blkno = 0xffff,
                .keylen = 256,
                .txtlen = sizeof(selftest_aes_cbc_256_ptxt),
                .key  = selftest_aes_cbc_key,
                .ptxt = selftest_aes_cbc_256_ptxt,
                .ctxt = selftest_aes_cbc_256_ctxt,
        },
        {
                .alg = "3des-cbc",
                .blocksize = 8,
                .secsize = 512,
                .blkno = 1,
                .keylen = 192,        /* 168 + 3*8 parity bits */
                .txtlen = sizeof(selftest_3des_cbc_ptxt),
                .key  = selftest_3des_cbc_key,
                .ptxt = selftest_3des_cbc_ptxt,
                .ctxt = selftest_3des_cbc_ctxt,
        },
        {
                .alg = "blowfish-cbc",
                .blocksize = 8,
                .secsize = 512,
                .blkno = 1,
                .keylen = 448,
                .txtlen = sizeof(selftest_bf_cbc_ptxt),
                .key  = selftest_bf_cbc_key,
                .ptxt = selftest_bf_cbc_ptxt,
                .ctxt = selftest_bf_cbc_ctxt,
        },
        {
                .alg = "aes-cbc",
                .encblkno8 = 1,
                .blocksize = 16,
                .secsize = 512,
                .blkno = 0,
                .keylen = 128,
                .txtlen = sizeof(selftest_aes_cbc_encblkno8_zero64),
                .key = selftest_aes_cbc_encblkno8_zero64,
                .ptxt = selftest_aes_cbc_encblkno8_zero64,
                .ctxt = selftest_aes_cbc_encblkno8_ctxt,
        },
};

static int cgd_match(device_t, cfdata_t, void *);
static void cgd_attach(device_t, device_t, void *);
static int cgd_detach(device_t, int);
static struct cgd_softc        *cgd_spawn(int);
static struct cgd_worker *cgd_create_one_worker(void);
static void cgd_destroy_one_worker(struct cgd_worker *);
static struct cgd_worker *cgd_create_worker(void);
static void cgd_destroy_worker(struct cgd_worker *);
static int cgd_destroy(device_t);

/* Internal Functions */

static int        cgd_diskstart(device_t, struct buf *);
static void        cgd_diskstart2(struct cgd_softc *, struct cgd_xfer *);
static void        cgdiodone(struct buf *);
static void        cgd_iodone2(struct cgd_softc *, struct cgd_xfer *);
static void        cgd_enqueue(struct cgd_softc *, struct cgd_xfer *);
static void        cgd_process(struct work *, void *);
static int        cgd_dumpblocks(device_t, void *, daddr_t, int);

static int        cgd_ioctl_set(struct cgd_softc *, void *, struct lwp *);
static int        cgd_ioctl_clr(struct cgd_softc *, struct lwp *);
static int        cgd_ioctl_get(dev_t, void *, struct lwp *);
static int        cgdinit(struct cgd_softc *, const char *, struct vnode *,
                        struct lwp *);
static void        cgd_cipher(struct cgd_softc *, void *, const void *,
                           size_t, daddr_t, size_t, int);

static void        cgd_selftest(void);

static const struct dkdriver cgddkdriver = {
        .d_minphys  = minphys,
        .d_open = cgdopen,
        .d_close = cgdclose,
        .d_strategy = cgdstrategy,
        .d_iosize = NULL,
        .d_diskstart = cgd_diskstart,
        .d_dumpblocks = cgd_dumpblocks,
        .d_lastclose = NULL
};

CFATTACH_DECL3_NEW(cgd, sizeof(struct cgd_softc),
    cgd_match, cgd_attach, cgd_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);

/* DIAGNOSTIC and DEBUG definitions */

#if defined(CGDDEBUG) && !defined(DEBUG)
#define DEBUG
#endif

#ifdef DEBUG
int cgddebug = 0;

#define CGDB_FOLLOW        0x1
#define CGDB_IO        0x2
#define CGDB_CRYPTO        0x4

#define IFDEBUG(x,y)                if (cgddebug & (x)) y
#define DPRINTF(x,y)                IFDEBUG(x, printf y)
#define DPRINTF_FOLLOW(y)        DPRINTF(CGDB_FOLLOW, y)

static void        hexprint(const char *, void *, int);

#else
#define IFDEBUG(x,y)
#define DPRINTF(x,y)
#define DPRINTF_FOLLOW(y)
#endif

/* Global variables */

static kmutex_t cgd_spawning_mtx;
static kcondvar_t cgd_spawning_cv;
static bool cgd_spawning;
static struct cgd_worker *cgd_worker;
static u_int cgd_refcnt;        /* number of users of cgd_worker */

/* Utility Functions */

#define CGDUNIT(x)                DISKUNIT(x)

/* The code */

static int
cgd_lock(bool intr)
{
        int error = 0;

        mutex_enter(&cgd_spawning_mtx);
        while (cgd_spawning) {
                if (intr)
                        error = cv_wait_sig(&cgd_spawning_cv, &cgd_spawning_mtx);
                else
                        cv_wait(&cgd_spawning_cv, &cgd_spawning_mtx);
        }
        if (error == 0)
                cgd_spawning = true;
        mutex_exit(&cgd_spawning_mtx);
        return error;
}

static void
cgd_unlock(void)
{
        mutex_enter(&cgd_spawning_mtx);
        cgd_spawning = false;
        cv_broadcast(&cgd_spawning_cv);
        mutex_exit(&cgd_spawning_mtx);
}

static struct cgd_softc *
getcgd_softc(dev_t dev)
{
        return device_lookup_private(&cgd_cd, CGDUNIT(dev));
}

static int
cgd_match(device_t self, cfdata_t cfdata, void *aux)
{

        return 1;
}

static void
cgd_attach(device_t parent, device_t self, void *aux)
{
        struct cgd_softc *sc = device_private(self);

        mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_BIO);
        cv_init(&sc->sc_cv, "cgdcv");
        dk_init(&sc->sc_dksc, self, DKTYPE_CGD);
        disk_init(&sc->sc_dksc.sc_dkdev, sc->sc_dksc.sc_xname, &cgddkdriver);

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self,
                    "unable to register power management hooks\n");
}


static int
cgd_detach(device_t self, int flags)
{
        int ret;
        struct cgd_softc *sc = device_private(self);
        struct dk_softc *dksc = &sc->sc_dksc;

        if (DK_BUSY(dksc, 0))
                return EBUSY;

        if (DK_ATTACHED(dksc) &&
            (ret = cgd_ioctl_clr(sc, curlwp)) != 0)
                return ret;

        disk_destroy(&dksc->sc_dkdev);
        cv_destroy(&sc->sc_cv);
        mutex_destroy(&sc->sc_lock);

        return 0;
}

void
cgdattach(int num)
{
#ifndef _MODULE
        int error;

        mutex_init(&cgd_spawning_mtx, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&cgd_spawning_cv, "cgspwn");

        error = config_cfattach_attach(cgd_cd.cd_name, &cgd_ca);
        if (error != 0)
                aprint_error("%s: unable to register cfattach\n",
                    cgd_cd.cd_name);
#endif

        cgd_selftest();
}

static struct cgd_softc *
cgd_spawn(int unit)
{
        cfdata_t cf;
        struct cgd_worker *cw;
        struct cgd_softc *sc;

        cf = kmem_alloc(sizeof(*cf), KM_SLEEP);
        cf->cf_name = cgd_cd.cd_name;
        cf->cf_atname = cgd_cd.cd_name;
        cf->cf_unit = unit;
        cf->cf_fstate = FSTATE_STAR;

        cw = cgd_create_one_worker();
        if (cw == NULL) {
                kmem_free(cf, sizeof(*cf));
                return NULL;
        }

        sc = device_private(config_attach_pseudo(cf));
        if (sc == NULL) {
                cgd_destroy_one_worker(cw);
                return NULL;
        }

        sc->sc_worker = cw;

        return sc;
}

static int
cgd_destroy(device_t dev)
{
        struct cgd_softc *sc = device_private(dev);
        struct cgd_worker *cw = sc->sc_worker;
        cfdata_t cf;
        int error;

        cf = device_cfdata(dev);
        error = config_detach(dev, DETACH_QUIET);
        if (error)
                return error;

        cgd_destroy_one_worker(cw);

        kmem_free(cf, sizeof(*cf));
        return 0;
}

static void
cgd_busy(struct cgd_softc *sc)
{

        mutex_enter(&sc->sc_lock);
        while (sc->sc_busy)
                cv_wait(&sc->sc_cv, &sc->sc_lock);
        sc->sc_busy = true;
        mutex_exit(&sc->sc_lock);
}

static void
cgd_unbusy(struct cgd_softc *sc)
{

        mutex_enter(&sc->sc_lock);
        sc->sc_busy = false;
        cv_broadcast(&sc->sc_cv);
        mutex_exit(&sc->sc_lock);
}

static struct cgd_worker *
cgd_create_one_worker(void)
{
        KASSERT(cgd_spawning);

        if (cgd_refcnt++ == 0) {
                KASSERT(cgd_worker == NULL);
                cgd_worker = cgd_create_worker();
        }

        KASSERT(cgd_worker != NULL);
        return cgd_worker;
}

static void
cgd_destroy_one_worker(struct cgd_worker *cw)
{
        KASSERT(cgd_spawning);
        KASSERT(cw == cgd_worker);

        if (--cgd_refcnt == 0) {
                cgd_destroy_worker(cgd_worker);
                cgd_worker = NULL;
        }
}

static struct cgd_worker *
cgd_create_worker(void)
{
        struct cgd_worker *cw;
        struct workqueue *wq;
        struct pool *cp;
        int error;

        cw = kmem_alloc(sizeof(struct cgd_worker), KM_SLEEP);
        cp = kmem_alloc(sizeof(struct pool), KM_SLEEP);

        error = workqueue_create(&wq, "cgd", cgd_process, NULL,
            PRI_BIO, IPL_BIO, WQ_FPU|WQ_MPSAFE|WQ_PERCPU);
        if (error) {
                kmem_free(cp, sizeof(struct pool));
                kmem_free(cw, sizeof(struct cgd_worker));
                return NULL;
        }

        cw->cw_cpool = cp;
        cw->cw_wq = wq;
        pool_init(cw->cw_cpool, sizeof(struct cgd_xfer), 0,
            0, 0, "cgdcpl", NULL, IPL_BIO);
        mutex_init(&cw->cw_lock, MUTEX_DEFAULT, IPL_BIO);

        return cw;
}

static void
cgd_destroy_worker(struct cgd_worker *cw)
{

        /*
         * Wait for all worker threads to complete before destroying
         * the rest of the cgd_worker.
         */
        if (cw->cw_wq)
                workqueue_destroy(cw->cw_wq);

        mutex_destroy(&cw->cw_lock);

        if (cw->cw_cpool) {
                pool_destroy(cw->cw_cpool);
                kmem_free(cw->cw_cpool, sizeof(struct pool));
        }

        kmem_free(cw, sizeof(struct cgd_worker));
}

static int
cgdopen(dev_t dev, int flags, int fmt, struct lwp *l)
{
        struct        cgd_softc *sc;
        int error;

        DPRINTF_FOLLOW(("cgdopen(0x%"PRIx64", %d)\n", dev, flags));

        error = cgd_lock(true);
        if (error)
                return error;
        sc = getcgd_softc(dev);
        if (sc == NULL)
                sc = cgd_spawn(CGDUNIT(dev));
        cgd_unlock();
        if (sc == NULL)
                return ENXIO;

        return dk_open(&sc->sc_dksc, dev, flags, fmt, l);
}

static int
cgdclose(dev_t dev, int flags, int fmt, struct lwp *l)
{
        struct        cgd_softc *sc;
        struct        dk_softc *dksc;
        int error;

        DPRINTF_FOLLOW(("cgdclose(0x%"PRIx64", %d)\n", dev, flags));

        error = cgd_lock(false);
        if (error)
                return error;
        sc = getcgd_softc(dev);
        if (sc == NULL) {
                error = ENXIO;
                goto done;
        }

        dksc = &sc->sc_dksc;
        if ((error =  dk_close(dksc, dev, flags, fmt, l)) != 0)
                goto done;

        if (!DK_ATTACHED(dksc)) {
                if ((error = cgd_destroy(sc->sc_dksc.sc_dev)) != 0) {
                        device_printf(dksc->sc_dev,
                            "unable to detach instance\n");
                        goto done;
                }
        }

done:
        cgd_unlock();

        return error;
}

static void
cgdstrategy(struct buf *bp)
{
        struct        cgd_softc *sc = getcgd_softc(bp->b_dev);

        DPRINTF_FOLLOW(("cgdstrategy(%p): b_bcount = %ld\n", bp,
            (long)bp->b_bcount));

        /*
         * Reject unaligned writes.
         */
        if (((uintptr_t)bp->b_data & 3) != 0) {
                bp->b_error = EINVAL;
                goto bail;
        }

        dk_strategy(&sc->sc_dksc, bp);
        return;

bail:
        bp->b_resid = bp->b_bcount;
        biodone(bp);
        return;
}

static int
cgdsize(dev_t dev)
{
        struct cgd_softc *sc = getcgd_softc(dev);

        DPRINTF_FOLLOW(("cgdsize(0x%"PRIx64")\n", dev));
        if (!sc)
                return -1;
        return dk_size(&sc->sc_dksc, dev);
}

/*
 * cgd_{get,put}data are functions that deal with getting a buffer
 * for the new encrypted data.
 * We can no longer have a buffer per device, we need a buffer per
 * work queue...
 */

static void *
cgd_getdata(struct cgd_softc *sc, unsigned long size)
{
        void *data = NULL;

        mutex_enter(&sc->sc_lock);
        if (!sc->sc_data_used) {
                sc->sc_data_used = true;
                data = sc->sc_data;
        }
        mutex_exit(&sc->sc_lock);

        if (data)
                return data;

        return kmem_intr_alloc(size, KM_NOSLEEP);
}

static void
cgd_putdata(struct cgd_softc *sc, void *data, unsigned long size)
{

        if (data == sc->sc_data) {
                mutex_enter(&sc->sc_lock);
                sc->sc_data_used = false;
                mutex_exit(&sc->sc_lock);
        } else
                kmem_intr_free(data, size);
}

static int
cgd_diskstart(device_t dev, struct buf *bp)
{
        struct        cgd_softc *sc = device_private(dev);
        struct        cgd_worker *cw = sc->sc_worker;
        struct        dk_softc *dksc = &sc->sc_dksc;
        struct        disk_geom *dg = &dksc->sc_dkdev.dk_geom;
        struct        cgd_xfer *cx;
        struct        buf *nbp;
        void *        newaddr;
        daddr_t        bn;

        DPRINTF_FOLLOW(("cgd_diskstart(%p, %p)\n", dksc, bp));

        bn = bp->b_rawblkno;

        /*
         * We attempt to allocate all of our resources up front, so that
         * we can fail quickly if they are unavailable.
         */
        nbp = getiobuf(sc->sc_tvn, false);
        if (nbp == NULL)
                return EAGAIN;

        cx = pool_get(cw->cw_cpool, PR_NOWAIT);
        if (cx == NULL) {
                putiobuf(nbp);
                return EAGAIN;
        }

        cx->cx_sc = sc;
        cx->cx_obp = bp;
        cx->cx_nbp = nbp;
        cx->cx_srcv = cx->cx_dstv = bp->b_data;
        cx->cx_blkno = bn;
        cx->cx_secsize = dg->dg_secsize;

        /*
         * If we are writing, then we need to encrypt the outgoing
         * block into a new block of memory.
         */
        if ((bp->b_flags & B_READ) == 0) {
                newaddr = cgd_getdata(sc, bp->b_bcount);
                if (!newaddr) {
                        pool_put(cw->cw_cpool, cx);
                        putiobuf(nbp);
                        return EAGAIN;
                }

                cx->cx_dstv = newaddr;
                cx->cx_len = bp->b_bcount;
                cx->cx_dir = CGD_CIPHER_ENCRYPT;

                cgd_enqueue(sc, cx);
                return 0;
        }

        cgd_diskstart2(sc, cx);
        return 0;
}

static void
cgd_diskstart2(struct cgd_softc *sc, struct cgd_xfer *cx)
{
        struct        vnode *vp;
        struct        buf *bp;
        struct        buf *nbp;

        bp = cx->cx_obp;
        nbp = cx->cx_nbp;

        nbp->b_data = cx->cx_dstv;
        nbp->b_flags = bp->b_flags;
        nbp->b_oflags = bp->b_oflags;
        nbp->b_cflags = bp->b_cflags;
        nbp->b_iodone = cgdiodone;
        nbp->b_proc = bp->b_proc;
        nbp->b_blkno = btodb(cx->cx_blkno * cx->cx_secsize);
        nbp->b_bcount = bp->b_bcount;
        nbp->b_private = cx;

        BIO_COPYPRIO(nbp, bp);

        if ((nbp->b_flags & B_READ) == 0) {
                vp = nbp->b_vp;
                mutex_enter(vp->v_interlock);
                vp->v_numoutput++;
                mutex_exit(vp->v_interlock);
        }
        VOP_STRATEGY(sc->sc_tvn, nbp);
}

static void
cgdiodone(struct buf *nbp)
{
        struct        cgd_xfer *cx = nbp->b_private;
        struct        buf *obp = cx->cx_obp;
        struct        cgd_softc *sc = getcgd_softc(obp->b_dev);
        struct        dk_softc *dksc = &sc->sc_dksc;
        struct        disk_geom *dg = &dksc->sc_dkdev.dk_geom;
        daddr_t        bn;

        KDASSERT(sc);

        DPRINTF_FOLLOW(("cgdiodone(%p)\n", nbp));
        DPRINTF(CGDB_IO, ("cgdiodone: bp %p bcount %d resid %d\n",
            obp, obp->b_bcount, obp->b_resid));
        DPRINTF(CGDB_IO, (" dev 0x%"PRIx64", nbp %p bn %" PRId64
            " addr %p bcnt %d\n", nbp->b_dev, nbp, nbp->b_blkno, nbp->b_data,
                nbp->b_bcount));
        if (nbp->b_error != 0) {
                obp->b_error = nbp->b_error;
                DPRINTF(CGDB_IO, ("%s: error %d\n", dksc->sc_xname,
                    obp->b_error));
        }

        /* Perform the decryption if we are reading.
         *
         * Note: use the blocknumber from nbp, since it is what
         *       we used to encrypt the blocks.
         */

        if (nbp->b_flags & B_READ) {
                bn = dbtob(nbp->b_blkno) / dg->dg_secsize;

                cx->cx_obp     = obp;
                cx->cx_nbp     = nbp;
                cx->cx_dstv    = obp->b_data;
                cx->cx_srcv    = obp->b_data;
                cx->cx_len     = obp->b_bcount;
                cx->cx_blkno   = bn;
                cx->cx_secsize = dg->dg_secsize;
                cx->cx_dir     = CGD_CIPHER_DECRYPT;

                cgd_enqueue(sc, cx);
                return;
        }

        cgd_iodone2(sc, cx);
}

static void
cgd_iodone2(struct cgd_softc *sc, struct cgd_xfer *cx)
{
        struct cgd_worker *cw = sc->sc_worker;
        struct buf *obp = cx->cx_obp;
        struct buf *nbp = cx->cx_nbp;
        struct dk_softc *dksc = &sc->sc_dksc;

        pool_put(cw->cw_cpool, cx);

        /* If we allocated memory, free it now... */
        if (nbp->b_data != obp->b_data)
                cgd_putdata(sc, nbp->b_data, nbp->b_bcount);

        putiobuf(nbp);

        /* Request is complete for whatever reason */
        obp->b_resid = 0;
        if (obp->b_error != 0)
                obp->b_resid = obp->b_bcount;

        dk_done(dksc, obp);
        dk_start(dksc, NULL);
}

static int
cgd_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
{
        struct cgd_softc *sc = device_private(dev);
        struct dk_softc *dksc = &sc->sc_dksc;
        struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
        size_t nbytes, blksize;
        void *buf;
        int error;

        /*
         * dk_dump gives us units of disklabel sectors.  Everything
         * else in cgd uses units of diskgeom sectors.  These had
         * better agree; otherwise we need to figure out how to convert
         * between them.
         */
        KASSERTMSG((dg->dg_secsize == dksc->sc_dkdev.dk_label->d_secsize),
            "diskgeom secsize %"PRIu32" != disklabel secsize %"PRIu32,
            dg->dg_secsize, dksc->sc_dkdev.dk_label->d_secsize);
        blksize = dg->dg_secsize;

        /*
         * Compute the number of bytes in this request, which dk_dump
         * has `helpfully' converted to a number of blocks for us.
         */
        nbytes = nblk*blksize;

        /* Try to acquire a buffer to store the ciphertext.  */
        buf = cgd_getdata(sc, nbytes);
        if (buf == NULL)
                /* Out of memory: give up.  */
                return ENOMEM;

        /* Encrypt the caller's data into the temporary buffer.  */
        cgd_cipher(sc, buf, va, nbytes, blkno, blksize, CGD_CIPHER_ENCRYPT);

        /* Pass it on to the underlying disk device.  */
        error = bdev_dump(sc->sc_tdev, blkno, buf, nbytes);

        /* Release the buffer.  */
        cgd_putdata(sc, buf, nbytes);

        /* Return any error from the underlying disk device.  */
        return error;
}

/* XXX: we should probably put these into dksubr.c, mostly */
static int
cgdread(dev_t dev, struct uio *uio, int flags)
{
        struct        cgd_softc *sc;
        struct        dk_softc *dksc;

        DPRINTF_FOLLOW(("cgdread(0x%llx, %p, %d)\n",
            (unsigned long long)dev, uio, flags));
        sc = getcgd_softc(dev);
        if (sc == NULL)
                return ENXIO;
        dksc = &sc->sc_dksc;
        if (!DK_ATTACHED(dksc))
                return ENXIO;
        return physio(cgdstrategy, NULL, dev, B_READ, minphys, uio);
}

/* XXX: we should probably put these into dksubr.c, mostly */
static int
cgdwrite(dev_t dev, struct uio *uio, int flags)
{
        struct        cgd_softc *sc;
        struct        dk_softc *dksc;

        DPRINTF_FOLLOW(("cgdwrite(0x%"PRIx64", %p, %d)\n", dev, uio, flags));
        sc = getcgd_softc(dev);
        if (sc == NULL)
                return ENXIO;
        dksc = &sc->sc_dksc;
        if (!DK_ATTACHED(dksc))
                return ENXIO;
        return physio(cgdstrategy, NULL, dev, B_WRITE, minphys, uio);
}

static int
cgdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        struct        cgd_softc *sc;
        struct        dk_softc *dksc;
        int        part = DISKPART(dev);
        int        pmask = 1 << part;
        int        error;

        DPRINTF_FOLLOW(("cgdioctl(0x%"PRIx64", %ld, %p, %d, %p)\n",
            dev, cmd, data, flag, l));

        switch (cmd) {
        case CGDIOCGET:
                return cgd_ioctl_get(dev, data, l);
        case CGDIOCSET:
        case CGDIOCCLR:
                if ((flag & FWRITE) == 0)
                        return EBADF;
                /* FALLTHROUGH */
        default:
                sc = getcgd_softc(dev);
                if (sc == NULL)
                        return ENXIO;
                dksc = &sc->sc_dksc;
                break;
        }

        switch (cmd) {
        case CGDIOCSET:
                cgd_busy(sc);
                if (DK_ATTACHED(dksc))
                        error = EBUSY;
                else
                        error = cgd_ioctl_set(sc, data, l);
                cgd_unbusy(sc);
                break;
        case CGDIOCCLR:
                cgd_busy(sc);
                if (DK_BUSY(&sc->sc_dksc, pmask))
                        error = EBUSY;
                else
                        error = cgd_ioctl_clr(sc, l);
                cgd_unbusy(sc);
                break;
        case DIOCGCACHE:
        case DIOCCACHESYNC:
                cgd_busy(sc);
                if (!DK_ATTACHED(dksc)) {
                        cgd_unbusy(sc);
                        error = ENOENT;
                        break;
                }
                /*
                 * We pass this call down to the underlying disk.
                 */
                error = VOP_IOCTL(sc->sc_tvn, cmd, data, flag, l->l_cred);
                cgd_unbusy(sc);
                break;
        case DIOCGSECTORALIGN: {
                struct disk_sectoralign *dsa = data;

                cgd_busy(sc);
                if (!DK_ATTACHED(dksc)) {
                        cgd_unbusy(sc);
                        error = ENOENT;
                        break;
                }

                /* Get the underlying disk's sector alignment.  */
                error = VOP_IOCTL(sc->sc_tvn, cmd, data, flag, l->l_cred);
                if (error) {
                        cgd_unbusy(sc);
                        break;
                }

                /* Adjust for the disklabel partition if necessary.  */
                if (part != RAW_PART) {
                        struct disklabel *lp = dksc->sc_dkdev.dk_label;
                        daddr_t offset = lp->d_partitions[part].p_offset;
                        uint32_t r = offset % dsa->dsa_alignment;

                        if (r < dsa->dsa_firstaligned)
                                dsa->dsa_firstaligned = dsa->dsa_firstaligned
                                    - r;
                        else
                                dsa->dsa_firstaligned = (dsa->dsa_firstaligned
                                    + dsa->dsa_alignment) - r;
                }
                cgd_unbusy(sc);
                break;
        }
        case DIOCGSTRATEGY:
        case DIOCSSTRATEGY:
                if (!DK_ATTACHED(dksc)) {
                        error = ENOENT;
                        break;
                }
                /*FALLTHROUGH*/
        default:
                error = dk_ioctl(dksc, dev, cmd, data, flag, l);
                break;
        case CGDIOCGET:
                KASSERT(0);
                error = EINVAL;
        }

        return error;
}

static int
cgddump(dev_t dev, daddr_t blkno, void *va, size_t size)
{
        struct        cgd_softc *sc;

        DPRINTF_FOLLOW(("cgddump(0x%"PRIx64", %" PRId64 ", %p, %lu)\n",
            dev, blkno, va, (unsigned long)size));
        sc = getcgd_softc(dev);
        if (sc == NULL)
                return ENXIO;
        return dk_dump(&sc->sc_dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE);
}

/*
 * XXXrcd:
 *  for now we hardcode the maximum key length.
 */
#define MAX_KEYSIZE        1024

static const struct {
        const char *n;
        int v;
        int d;
} encblkno[] = {
        { "encblkno",  CGD_CIPHER_CBC_ENCBLKNO8, 1 },
        { "encblkno8", CGD_CIPHER_CBC_ENCBLKNO8, 1 },
        { "encblkno1", CGD_CIPHER_CBC_ENCBLKNO1, 8 },
};

/* ARGSUSED */
static int
cgd_ioctl_set(struct cgd_softc *sc, void *data, struct lwp *l)
{
        struct         cgd_ioctl *ci = data;
        struct         vnode *vp;
        int         ret;
        size_t         i;
        size_t         keybytes;                        /* key length in bytes */
        const char *cp;
        struct pathbuf *pb;
        char         *inbuf;
        struct dk_softc *dksc = &sc->sc_dksc;

        cp = ci->ci_disk;

        ret = pathbuf_copyin(ci->ci_disk, &pb);
        if (ret != 0) {
                return ret;
        }
        ret = vn_bdev_openpath(pb, &vp, l);
        pathbuf_destroy(pb);
        if (ret != 0) {
                return ret;
        }

        inbuf = kmem_alloc(MAX_KEYSIZE, KM_SLEEP);

        if ((ret = cgdinit(sc, cp, vp, l)) != 0)
                goto bail;

        (void)memset(inbuf, 0, MAX_KEYSIZE);
        ret = copyinstr(ci->ci_alg, inbuf, 256, NULL);
        if (ret)
                goto bail;
        sc->sc_cfuncs = cryptfuncs_find(inbuf);
        if (!sc->sc_cfuncs) {
                ret = EINVAL;
                goto bail;
        }

        (void)memset(inbuf, 0, MAX_KEYSIZE);
        ret = copyinstr(ci->ci_ivmethod, inbuf, MAX_KEYSIZE, NULL);
        if (ret)
                goto bail;

        for (i = 0; i < __arraycount(encblkno); i++)
                if (strcmp(encblkno[i].n, inbuf) == 0)
                        break;

        if (i == __arraycount(encblkno)) {
                ret = EINVAL;
                goto bail;
        }

        keybytes = ci->ci_keylen / 8 + 1;
        if (keybytes > MAX_KEYSIZE) {
                ret = EINVAL;
                goto bail;
        }

        (void)memset(inbuf, 0, MAX_KEYSIZE);
        ret = copyin(ci->ci_key, inbuf, keybytes);
        if (ret)
                goto bail;

        sc->sc_cdata.cf_blocksize = ci->ci_blocksize;
        sc->sc_cdata.cf_mode = encblkno[i].v;

        /*
         * Print a warning if the user selected the legacy encblkno8
         * mistake, and reject it altogether for ciphers that it
         * doesn't apply to.
         */
        if (encblkno[i].v != CGD_CIPHER_CBC_ENCBLKNO1) {
                if (strcmp(sc->sc_cfuncs->cf_name, "aes-cbc") &&
                    strcmp(sc->sc_cfuncs->cf_name, "3des-cbc") &&
                    strcmp(sc->sc_cfuncs->cf_name, "blowfish-cbc")) {
                        log(LOG_WARNING, "cgd: %s only makes sense for cbc,"
                            " not for %s; ignoring\n",
                            encblkno[i].n, sc->sc_cfuncs->cf_name);
                        sc->sc_cdata.cf_mode = CGD_CIPHER_CBC_ENCBLKNO1;
                } else {
                        log(LOG_WARNING, "cgd: enabling legacy encblkno8\n");
                }
        }

        sc->sc_cdata.cf_keylen = ci->ci_keylen;
        sc->sc_cdata.cf_priv = sc->sc_cfuncs->cf_init(ci->ci_keylen, inbuf,
            &sc->sc_cdata.cf_blocksize);
        if (sc->sc_cdata.cf_blocksize > CGD_MAXBLOCKSIZE) {
            log(LOG_WARNING, "cgd: Disallowed cipher with blocksize %zu > %u\n",
                sc->sc_cdata.cf_blocksize, CGD_MAXBLOCKSIZE);
            sc->sc_cdata.cf_priv = NULL;
        }

        /*
         * The blocksize is supposed to be in bytes. Unfortunately originally
         * it was expressed in bits. For compatibility we maintain encblkno
         * and encblkno8.
         */
        sc->sc_cdata.cf_blocksize /= encblkno[i].d;
        (void)explicit_memset(inbuf, 0, MAX_KEYSIZE);
        if (!sc->sc_cdata.cf_priv) {
                ret = EINVAL;                /* XXX is this the right error? */
                goto bail;
        }
        kmem_free(inbuf, MAX_KEYSIZE);

        bufq_alloc(&dksc->sc_bufq, "fcfs", 0);

        sc->sc_data = kmem_alloc(MAXPHYS, KM_SLEEP);
        sc->sc_data_used = false;

        /* Attach the disk. */
        dk_attach(dksc);
        disk_attach(&dksc->sc_dkdev);

        disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);

        /* Discover wedges on this disk. */
        dkwedge_discover(&dksc->sc_dkdev);

        return 0;

bail:
        kmem_free(inbuf, MAX_KEYSIZE);
        (void)vn_close(vp, FREAD|FWRITE, l->l_cred);
        return ret;
}

/* ARGSUSED */
static int
cgd_ioctl_clr(struct cgd_softc *sc, struct lwp *l)
{
        struct        dk_softc *dksc = &sc->sc_dksc;

        if (!DK_ATTACHED(dksc))
                return ENXIO;

        /* Delete all of our wedges. */
        dkwedge_delall(&dksc->sc_dkdev);

        /* Kill off any queued buffers. */
        dk_drain(dksc);
        bufq_free(dksc->sc_bufq);

        (void)vn_close(sc->sc_tvn, FREAD|FWRITE, l->l_cred);
        sc->sc_cfuncs->cf_destroy(sc->sc_cdata.cf_priv);
        kmem_free(sc->sc_tpath, sc->sc_tpathlen);
        kmem_free(sc->sc_data, MAXPHYS);
        sc->sc_data_used = false;
        dk_detach(dksc);
        disk_detach(&dksc->sc_dkdev);

        return 0;
}

static int
cgd_ioctl_get(dev_t dev, void *data, struct lwp *l)
{
        struct cgd_softc *sc;
        struct cgd_user *cgu;
        int unit, error;

        unit = CGDUNIT(dev);
        cgu = (struct cgd_user *)data;

        DPRINTF_FOLLOW(("cgd_ioctl_get(0x%"PRIx64", %d, %p, %p)\n",
                           dev, unit, data, l));

        /* XXX, we always return this units data, so if cgu_unit is
         * not -1, that field doesn't match the rest
         */
        if (cgu->cgu_unit == -1)
                cgu->cgu_unit = unit;

        if (cgu->cgu_unit < 0)
                return EINVAL;        /* XXX: should this be ENXIO? */

        error = cgd_lock(false);
        if (error)
                return error;

        sc = device_lookup_private(&cgd_cd, unit);
        if (sc == NULL || !DK_ATTACHED(&sc->sc_dksc)) {
                cgu->cgu_dev = 0;
                cgu->cgu_alg[0] = '\0';
                cgu->cgu_blocksize = 0;
                cgu->cgu_mode = 0;
                cgu->cgu_keylen = 0;
        }
        else {
                mutex_enter(&sc->sc_lock);
                cgu->cgu_dev = sc->sc_tdev;
                strncpy(cgu->cgu_alg, sc->sc_cfuncs->cf_name,
                    sizeof(cgu->cgu_alg));
                cgu->cgu_blocksize = sc->sc_cdata.cf_blocksize;
                cgu->cgu_mode = sc->sc_cdata.cf_mode;
                cgu->cgu_keylen = sc->sc_cdata.cf_keylen;
                mutex_exit(&sc->sc_lock);
        }

        cgd_unlock();
        return 0;
}

static int
cgdinit(struct cgd_softc *sc, const char *cpath, struct vnode *vp,
        struct lwp *l)
{
        struct        disk_geom *dg;
        int        ret;
        char        *tmppath;
        uint64_t psize;
        unsigned secsize;
        struct dk_softc *dksc = &sc->sc_dksc;

        sc->sc_tvn = vp;
        sc->sc_tpath = NULL;

        tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
        ret = copyinstr(cpath, tmppath, MAXPATHLEN, &sc->sc_tpathlen);
        if (ret)
                goto bail;
        sc->sc_tpath = kmem_alloc(sc->sc_tpathlen, KM_SLEEP);
        memcpy(sc->sc_tpath, tmppath, sc->sc_tpathlen);

        sc->sc_tdev = vp->v_rdev;

        if ((ret = getdisksize(vp, &psize, &secsize)) != 0)
                goto bail;

        if (psize == 0) {
                ret = ENODEV;
                goto bail;
        }

        /*
         * XXX here we should probe the underlying device.  If we
         *     are accessing a partition of type RAW_PART, then
         *     we should populate our initial geometry with the
         *     geometry that we discover from the device.
         */
        dg = &dksc->sc_dkdev.dk_geom;
        memset(dg, 0, sizeof(*dg));
        dg->dg_secperunit = psize;
        dg->dg_secsize = secsize;
        dg->dg_ntracks = 1;
        dg->dg_nsectors = 1024 * 1024 / dg->dg_secsize;
        dg->dg_ncylinders = dg->dg_secperunit / dg->dg_nsectors;

bail:
        kmem_free(tmppath, MAXPATHLEN);
        if (ret && sc->sc_tpath)
                kmem_free(sc->sc_tpath, sc->sc_tpathlen);
        return ret;
}

/*
 * Our generic cipher entry point.  This takes care of the
 * IV mode and passes off the work to the specific cipher.
 * We implement here the IV method ``encrypted block
 * number''.
 *
 * XXXrcd: for now we rely on our own crypto framework defined
 *         in dev/cgd_crypto.c.  This will change when we
 *         get a generic kernel crypto framework.
 */

static void
blkno2blkno_buf(char *sbuf, daddr_t blkno)
{
        int        i;

        /* Set up the blkno in blkno_buf, here we do not care much
         * about the final layout of the information as long as we
         * can guarantee that each sector will have a different IV
         * and that the endianness of the machine will not affect
         * the representation that we have chosen.
         *
         * We choose this representation, because it does not rely
         * on the size of buf (which is the blocksize of the cipher),
         * but allows daddr_t to grow without breaking existing
         * disks.
         *
         * Note that blkno2blkno_buf does not take a size as input,
         * and hence must be called on a pre-zeroed buffer of length
         * greater than or equal to sizeof(daddr_t).
         */
        for (i=0; i < sizeof(daddr_t); i++) {
                *sbuf++ = blkno & 0xff;
                blkno >>= 8;
        }
}

static struct cpu_info *
cgd_cpu(struct cgd_softc *sc)
{
        struct cgd_worker *cw = sc->sc_worker;
        struct cpu_info *ci = NULL;
        u_int cidx, i;

        if (cw->cw_busy == 0) {
                cw->cw_last = cpu_index(curcpu());
                return NULL;
        }

        for (i=0, cidx = cw->cw_last+1; i<maxcpus; ++i, ++cidx) {
                if (cidx >= maxcpus)
                        cidx = 0;
                ci = cpu_lookup(cidx);
                if (ci) {
                        cw->cw_last = cidx;
                        break;
                }
        }

        return ci;
}

static void
cgd_enqueue(struct cgd_softc *sc, struct cgd_xfer *cx)
{
        struct cgd_worker *cw = sc->sc_worker;
        struct cpu_info *ci;

        mutex_enter(&cw->cw_lock);
        ci = cgd_cpu(sc);
        cw->cw_busy++;
        mutex_exit(&cw->cw_lock);

        workqueue_enqueue(cw->cw_wq, &cx->cx_work, ci);
}

static void
cgd_process(struct work *wk, void *arg)
{
        struct cgd_xfer *cx = (struct cgd_xfer *)wk;
        struct cgd_softc *sc = cx->cx_sc;
        struct cgd_worker *cw = sc->sc_worker;

        cgd_cipher(sc, cx->cx_dstv, cx->cx_srcv, cx->cx_len,
            cx->cx_blkno, cx->cx_secsize, cx->cx_dir);

        if (cx->cx_dir == CGD_CIPHER_ENCRYPT) {
                cgd_diskstart2(sc, cx);
        } else {
                cgd_iodone2(sc, cx);
        }

        mutex_enter(&cw->cw_lock);
        if (cw->cw_busy > 0)
                cw->cw_busy--;
        mutex_exit(&cw->cw_lock);
}

static void
cgd_cipher(struct cgd_softc *sc, void *dstv, const void *srcv,
    size_t len, daddr_t blkno, size_t secsize, int dir)
{
        char                *dst = dstv;
        const char        *src = srcv;
        cfunc_cipher        *cipher = sc->sc_cfuncs->cf_cipher;
        size_t                blocksize = sc->sc_cdata.cf_blocksize;
        size_t                todo;
        char                blkno_buf[CGD_MAXBLOCKSIZE] __aligned(CGD_BLOCKALIGN);

        DPRINTF_FOLLOW(("cgd_cipher() dir=%d\n", dir));

        if (sc->sc_cdata.cf_mode == CGD_CIPHER_CBC_ENCBLKNO8)
                blocksize /= 8;

        KASSERT(len % blocksize == 0);
        /* ensure that sizeof(daddr_t) <= blocksize (for encblkno IVing) */
        KASSERT(sizeof(daddr_t) <= blocksize);
        KASSERT(blocksize <= CGD_MAXBLOCKSIZE);

        for (; len > 0; len -= todo) {
                todo = MIN(len, secsize);

                memset(blkno_buf, 0x0, blocksize);
                blkno2blkno_buf(blkno_buf, blkno);
                IFDEBUG(CGDB_CRYPTO, hexprint("step 1: blkno_buf",
                    blkno_buf, blocksize));

                /*
                 * Handle bollocksed up encblkno8 mistake.  We used to
                 * compute the encryption of a zero block with blkno as
                 * the CBC IV -- except in an early mistake arising
                 * from bit/byte confusion, we actually computed the
                 * encryption of the last of _eight_ zero blocks under
                 * CBC as the CBC IV.
                 *
                 * Encrypting the block number is handled inside the
                 * cipher dispatch now (even though in practice, both
                 * CBC and XTS will do the same thing), so we have to
                 * simulate the block number that would yield the same
                 * result.  So we encrypt _six_ zero blocks -- the
                 * first one and the last one are handled inside the
                 * cipher dispatch.
                 */
                if (sc->sc_cdata.cf_mode == CGD_CIPHER_CBC_ENCBLKNO8) {
                        static const uint8_t zero[CGD_MAXBLOCKSIZE];
                        uint8_t iv[CGD_MAXBLOCKSIZE];

                        memcpy(iv, blkno_buf, blocksize);
                        cipher(sc->sc_cdata.cf_priv, blkno_buf, zero,
                            6*blocksize, iv, CGD_CIPHER_ENCRYPT);
                        memmove(blkno_buf, blkno_buf + 5*blocksize, blocksize);
                }

                cipher(sc->sc_cdata.cf_priv, dst, src, todo, blkno_buf, dir);

                dst += todo;
                src += todo;
                blkno++;
        }
}

#ifdef DEBUG
static void
hexprint(const char *start, void *buf, int len)
{
        char        *c = buf;

        KASSERTMSG(len >= 0, "hexprint: called with len < 0");
        printf("%s: len=%06d 0x", start, len);
        while (len--)
                printf("%02x", (unsigned char) *c++);
}
#endif

static void
cgd_selftest(void)
{
        struct cgd_softc sc;
        void *buf;

        for (size_t i = 0; i < __arraycount(selftests); i++) {
                const char *alg = selftests[i].alg;
                int encblkno8 = selftests[i].encblkno8;
                const uint8_t *key = selftests[i].key;
                int keylen = selftests[i].keylen;
                int txtlen = selftests[i].txtlen;

                aprint_debug("cgd: self-test %s-%d%s\n", alg, keylen,
                    encblkno8 ? " (encblkno8)" : "");

                memset(&sc, 0, sizeof(sc));

                sc.sc_cfuncs = cryptfuncs_find(alg);
                if (sc.sc_cfuncs == NULL)
                        panic("%s not implemented", alg);

                sc.sc_cdata.cf_blocksize = 8 * selftests[i].blocksize;
                sc.sc_cdata.cf_mode = encblkno8 ? CGD_CIPHER_CBC_ENCBLKNO8 :
                    CGD_CIPHER_CBC_ENCBLKNO1;
                sc.sc_cdata.cf_keylen = keylen;

                sc.sc_cdata.cf_priv = sc.sc_cfuncs->cf_init(keylen,
                    key, &sc.sc_cdata.cf_blocksize);
                if (sc.sc_cdata.cf_priv == NULL)
                        panic("cf_priv is NULL");
                if (sc.sc_cdata.cf_blocksize > CGD_MAXBLOCKSIZE)
                        panic("bad block size %zu", sc.sc_cdata.cf_blocksize);

                if (!encblkno8)
                        sc.sc_cdata.cf_blocksize /= 8;

                buf = kmem_alloc(txtlen, KM_SLEEP);
                memcpy(buf, selftests[i].ptxt, txtlen);

                cgd_cipher(&sc, buf, buf, txtlen, selftests[i].blkno,
                                selftests[i].secsize, CGD_CIPHER_ENCRYPT);
                if (memcmp(buf, selftests[i].ctxt, txtlen) != 0) {
                        hexdump(printf, "was", buf, txtlen);
                        hexdump(printf, "exp", selftests[i].ctxt, txtlen);
                        panic("cgd %s-%d encryption is broken [%zu]",
                            selftests[i].alg, keylen, i);
                }

                cgd_cipher(&sc, buf, buf, txtlen, selftests[i].blkno,
                                selftests[i].secsize, CGD_CIPHER_DECRYPT);
                if (memcmp(buf, selftests[i].ptxt, txtlen) != 0) {
                        hexdump(printf, "was", buf, txtlen);
                        hexdump(printf, "exp", selftests[i].ptxt, txtlen);
                        panic("cgd %s-%d decryption is broken [%zu]",
                            selftests[i].alg, keylen, i);
                }

                kmem_free(buf, txtlen);
                sc.sc_cfuncs->cf_destroy(sc.sc_cdata.cf_priv);
        }

        aprint_debug("cgd: self-tests passed\n");
}

MODULE(MODULE_CLASS_DRIVER, cgd, "blowfish,des,dk_subr,bufq_fcfs");

#ifdef _MODULE
CFDRIVER_DECL(cgd, DV_DISK, NULL);

devmajor_t cgd_bmajor = -1, cgd_cmajor = -1;
#endif

static int
cgd_modcmd(modcmd_t cmd, void *arg)
{
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                mutex_init(&cgd_spawning_mtx, MUTEX_DEFAULT, IPL_NONE);
                cv_init(&cgd_spawning_cv, "cgspwn");

                /*
                 * Attach the {b,c}devsw's
                 */
                error = devsw_attach("cgd", &cgd_bdevsw, &cgd_bmajor,
                    &cgd_cdevsw, &cgd_cmajor);
                if (error) {
                        aprint_error("%s: unable to attach %s devsw, "
                            "error %d", __func__, cgd_cd.cd_name, error);
                        break;
                }

                /*
                 * Attach to autoconf database
                 */
                error = config_cfdriver_attach(&cgd_cd);
                if (error) {
                        devsw_detach(&cgd_bdevsw, &cgd_cdevsw);
                        aprint_error("%s: unable to register cfdriver for"
                            "%s, error %d\n", __func__, cgd_cd.cd_name, error);
                        break;
                }

                error = config_cfattach_attach(cgd_cd.cd_name, &cgd_ca);
                if (error) {
                        config_cfdriver_detach(&cgd_cd);
                        devsw_detach(&cgd_bdevsw, &cgd_cdevsw);
                        aprint_error("%s: unable to register cfattach for"
                            "%s, error %d\n", __func__, cgd_cd.cd_name, error);
                        break;
                }
#endif
                break;

        case MODULE_CMD_FINI:
#ifdef _MODULE
                /*
                 * Remove device from autoconf database
                 */
                error = config_cfattach_detach(cgd_cd.cd_name, &cgd_ca);
                if (error) {
                        aprint_error("%s: failed to detach %s cfattach, "
                            "error %d\n", __func__, cgd_cd.cd_name, error);
                         break;
                }
                error = config_cfdriver_detach(&cgd_cd);
                if (error) {
                        (void)config_cfattach_attach(cgd_cd.cd_name, &cgd_ca);
                        aprint_error("%s: failed to detach %s cfdriver, "
                            "error %d\n", __func__, cgd_cd.cd_name, error);
                        break;
                }

                /*
                 * Remove {b,c}devsw's
                 */
                devsw_detach(&cgd_bdevsw, &cgd_cdevsw);

                cv_destroy(&cgd_spawning_cv);
                mutex_destroy(&cgd_spawning_mtx);
#endif
                break;

        case MODULE_CMD_STAT:
                error = ENOTTY;
                break;
        default:
                error = ENOTTY;
                break;
        }

        return error;
}




































































































    9 

    1 
    8 






    2 





    1 












































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
/*        $NetBSD: if_media_80.c,v 1.5 2022/08/03 01:38:51 riastradh Exp $        */

/*-
 * Copyright (c) 1998 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1997
 *        Jonathan Stone and Jason R. Thorpe.  All rights reserved.
 *
 * This software is derived from information provided by Matt Thomas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Jonathan Stone
 *        and Jason R. Thorpe for the NetBSD Project.
 * 4. The names of the authors may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_media_80.c,v 1.5 2022/08/03 01:38:51 riastradh Exp $");

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/syscallargs.h>
#include <sys/errno.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/compat_stub.h>

#include <net/if.h>
#include <net/if_media.h>

#include <compat/sys/sockio.h>
#include <compat/common/compat_mod.h>

static void
ifmword_n2o(int *oldwd, int *newwd)
{

        if (IFM_SUBTYPE(*newwd) > IFM_OTHER)
                *oldwd = (*newwd & ~(_IFM_ETH_XTMASK | IFM_TMASK)) | IFM_OTHER;
        else
                *oldwd = *newwd;
}

/*ARGSUSED*/
static int
compat_ifmediareq_pre(struct ifreq *ifr, u_long *cmd, bool *do_post)
{
        struct ifmediareq *ifmr = (struct ifmediareq *)ifr;

        switch (*cmd) {
        case SIOCSIFMEDIA_80:
                *cmd = SIOCSIFMEDIA; /* Convert to new one */
                if ((IFM_TYPE(ifr->ifr_media) == IFM_ETHER) &&
                    IFM_SUBTYPE(ifr->ifr_media) > IFM_OTHER) {
                        /* Clear unused bits to not to change to wrong media */
                        ifr->ifr_media &= ~_IFM_ETH_XTMASK;
                }
                return 0;
        case SIOCGIFMEDIA_80:
                *cmd = SIOCGIFMEDIA; /* Convert to new one */
                if (ifmr->ifm_count != 0) {
                        /*
                         * Tell the upper layer to try to convert each ifmedia
                         * entry in the post process.
                         */
                        *do_post = true;
                }
                return 0;
        default:
                return 0;
        }
}

/*ARGSUSED*/
static int
compat_ifmediareq_post(struct ifreq *ifr, u_long cmd)
{
        struct ifmediareq *ifmr = (struct ifmediareq *)ifr;
        size_t minwords;
        size_t count;
        int error, *kptr;

        switch (cmd) {
        case SIOCSIFMEDIA:
                return 0;
        case SIOCGIFMEDIA:
                if (ifmr->ifm_count < 0)
                        return EINVAL;
                
                /*
                 * ifmr->ifm_count was already ajusted in ifmedia_ioctl(), so
                 * there is no problem to trust ifm_count.
                 */
                minwords = ifmr->ifm_count;
                kptr = malloc(minwords * sizeof(*kptr), M_TEMP,
                    M_WAITOK|M_ZERO);
                if (kptr == NULL)
                        return ENOMEM;

                /*
                 * Convert ifm_current and ifm_active.
                 * It's not required to convert ifm_mask.
                 */
                ifmword_n2o(&ifmr->ifm_current, &ifmr->ifm_current);
                ifmword_n2o(&ifmr->ifm_active, &ifmr->ifm_active);

                /* Convert ifm_ulist array */
                for (count = 0; count < minwords; count++) {
                        int oldmwd;

                        error = ufetch_int(&ifmr->ifm_ulist[count], &oldmwd);
                        if (error != 0)
                                goto out;
                        ifmword_n2o(&kptr[count], &oldmwd);
                }

                /* Copy to userland in old format */
                error = copyout(kptr, ifmr->ifm_ulist,
                    minwords * sizeof(*kptr));
out:
                free(kptr, M_TEMP);
                return error;
        default:
                return 0;
        }
}

void
ifmedia_80_init(void)
{

        MODULE_HOOK_SET(ifmedia_80_pre_hook, compat_ifmediareq_pre);
        MODULE_HOOK_SET(ifmedia_80_post_hook, compat_ifmediareq_post);
}

void
ifmedia_80_fini(void)
{
 
        MODULE_HOOK_UNSET(ifmedia_80_post_hook);
        MODULE_HOOK_UNSET(ifmedia_80_pre_hook);
}





















































































    3 







    2 







    4 



    2 











    2 





    2 



    1 

    1 














































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
/*        $NetBSD: umap_vfsops.c,v 1.103 2020/04/13 19:23:19 ad Exp $        */

/*
 * Copyright (c) 1992, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software donated to Berkeley by
 * the UCLA Ficus project.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        from: @(#)null_vfsops.c       1.5 (Berkeley) 7/10/92
 *        @(#)umap_vfsops.c        8.8 (Berkeley) 5/14/95
 */

/*
 * Umap Layer
 * (See mount_umap(8) for a description of this layer.)
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: umap_vfsops.c,v 1.103 2020/04/13 19:23:19 ad Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/proc.h>
#include <sys/time.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/syslog.h>
#include <sys/kauth.h>
#include <sys/module.h>

#include <miscfs/umapfs/umap.h>
#include <miscfs/genfs/layer_extern.h>

MODULE(MODULE_CLASS_VFS, umap, "layerfs");

VFS_PROTOS(umapfs);

/*
 * Mount umap layer
 */
int
umapfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct lwp *l = curlwp;
        struct pathbuf *pb;
        struct nameidata nd;
        struct umap_args *args = data;
        struct vnode *lowerrootvp, *vp;
        struct umap_mount *amp;
        int error;
#ifdef UMAPFS_DIAGNOSTIC
        int i;
#endif
        fsid_t tfsid;

        if (args == NULL)
                return EINVAL;
        if (*data_len < sizeof *args) {
#ifdef UMAPFS_DIAGNOSTIC
                printf("mount_umap: data len %d < args %d\n",
                        (int)*data_len, (int)(sizeof *args));
#endif
                return EINVAL;
        }

        if (mp->mnt_flag & MNT_GETARGS) {
                amp = MOUNTTOUMAPMOUNT(mp);
                if (amp == NULL)
                        return EIO;
                args->la.target = NULL;
                args->nentries = amp->info_nentries;
                args->gnentries = amp->info_gnentries;
                *data_len = sizeof *args;
                return 0;
        }

        /* only for root */
        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
            KAUTH_REQ_SYSTEM_MOUNT_UMAP, NULL, NULL, NULL);
        if (error)
                return error;

#ifdef UMAPFS_DIAGNOSTIC
        printf("umapfs_mount(mp = %p)\n", mp);
#endif

        /*
         * Update is not supported
         */
        if (mp->mnt_flag & MNT_UPDATE)
                return EOPNOTSUPP;

        /*
         * Find lower node
         */
        error = pathbuf_copyin(args->umap_target, &pb);
        if (error) {
                return error;
        }
        NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, pb);
        if ((error = namei(&nd)) != 0) {
                pathbuf_destroy(pb);
                return error;
        }

        /*
         * Sanity check on lower vnode
         */
        lowerrootvp = nd.ni_vp;
        pathbuf_destroy(pb);
#ifdef UMAPFS_DIAGNOSTIC
        printf("vp = %p, check for VDIR...\n", lowerrootvp);
#endif

        if (lowerrootvp->v_type != VDIR) {
                vput(lowerrootvp);
                return (EINVAL);
        }

#ifdef UMAPFS_DIAGNOSTIC
        printf("mp = %p\n", mp);
#endif

        amp = kmem_zalloc(sizeof(struct umap_mount), KM_SLEEP);
        mp->mnt_data = amp;

        /*
         * Now copy in the number of entries and maps for umap mapping.
         */
        if (args->nentries < 0 || args->nentries > MAPFILEENTRIES ||
            args->gnentries < 0 || args->gnentries > GMAPFILEENTRIES) {
                vput(lowerrootvp);
                return (EINVAL);
        }

        amp->info_nentries = args->nentries;
        amp->info_gnentries = args->gnentries;
        error = copyin(args->mapdata, amp->info_mapdata,
            2*sizeof(u_long)*args->nentries);
        if (error) {
                vput(lowerrootvp);
                return (error);
        }

#ifdef UMAPFS_DIAGNOSTIC
        printf("umap_mount:nentries %d\n",args->nentries);
        for (i = 0; i < args->nentries; i++)
                printf("   %ld maps to %ld\n", amp->info_mapdata[i][0],
                     amp->info_mapdata[i][1]);
#endif

        error = copyin(args->gmapdata, amp->info_gmapdata,
            2*sizeof(u_long)*args->gnentries);
        if (error) {
                vput(lowerrootvp);
                return (error);
        }

#ifdef UMAPFS_DIAGNOSTIC
        printf("umap_mount:gnentries %d\n",args->gnentries);
        for (i = 0; i < args->gnentries; i++)
                printf("\tgroup %ld maps to %ld\n",
                    amp->info_gmapdata[i][0],
                     amp->info_gmapdata[i][1]);
#endif

        /*
         * Make sure the mount point's sufficiently initialized
         * that the node create call will work.
         */
        tfsid.__fsid_val[0] = (int32_t)args->fsid;
        tfsid.__fsid_val[1] = makefstype(MOUNT_UMAP);
        if (tfsid.__fsid_val[0] == 0) {
                log(LOG_WARNING, "umapfs: fsid given as 0, ignoring\n");
                vfs_getnewfsid(mp);
        } else if (vfs_getvfs(&tfsid)) {
                log(LOG_WARNING, "umapfs: fsid %x already mounted\n",
                        tfsid.__fsid_val[0]);
                vfs_getnewfsid(mp);
        } else {
                       mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
                       mp->mnt_stat.f_fsidx.__fsid_val[1] = tfsid.__fsid_val[1];
                mp->mnt_stat.f_fsid = tfsid.__fsid_val[0];
        }
        log(LOG_DEBUG, "umapfs: using fsid %x/%x\n",
                mp->mnt_stat.f_fsidx.__fsid_val[0],
                mp->mnt_stat.f_fsidx.__fsid_val[1]);
        mp->mnt_lower = lowerrootvp->v_mount;

        amp->umapm_size = sizeof(struct umap_node);
        amp->umapm_tag = VT_UMAP;
        amp->umapm_bypass = umap_bypass;
        amp->umapm_vnodeop_p = umap_vnodeop_p;

        /*
         * fix up umap node for root vnode.
         */
        VOP_UNLOCK(lowerrootvp);
        error = layer_node_create(mp, lowerrootvp, &vp);
        /*
         * Make sure the node alias worked
         */
        if (error) {
                vrele(lowerrootvp);
                kmem_free(amp, sizeof(struct umap_mount));
                return error;
        }

        /*
         * Keep a held reference to the root vnode.
         * It is vrele'd in umapfs_unmount.
         */
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        vp->v_vflag |= VV_ROOT;
        amp->umapm_rootvp = vp;
        VOP_UNLOCK(vp);

        error = set_statvfs_info(path, UIO_USERSPACE, args->umap_target,
            UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
        if (error)
                return error;

        if (mp->mnt_lower->mnt_flag & MNT_LOCAL)
                mp->mnt_flag |= MNT_LOCAL;
#ifdef UMAPFS_DIAGNOSTIC
        printf("umapfs_mount: lower %s, alias at %s\n",
                mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
#endif
        return 0;
}

/*
 * Free reference to umap layer
 */
int
umapfs_unmount(struct mount *mp, int mntflags)
{
        struct umap_mount *amp = MOUNTTOUMAPMOUNT(mp);
        struct vnode *rtvp = amp->umapm_rootvp;
        int error;
        int flags = 0;

#ifdef UMAPFS_DIAGNOSTIC
        printf("umapfs_unmount(mp = %p)\n", mp);
#endif

        if (mntflags & MNT_FORCE)
                flags |= FORCECLOSE;

        if (vrefcnt(rtvp) > 1 && (mntflags & MNT_FORCE) == 0)
                return (EBUSY);
        if ((error = vflush(mp, rtvp, flags)) != 0)
                return (error);

#ifdef UMAPFS_DIAGNOSTIC
        vprint("alias root of lower", rtvp);
#endif
        /*
         * Blow it away for future re-use
         */
        vgone(rtvp);
        /*
         * Finally, throw away the umap_mount structure
         */
        kmem_free(amp, sizeof(struct umap_mount));
        mp->mnt_data = NULL;
        return 0;
}

extern const struct vnodeopv_desc umapfs_vnodeop_opv_desc;

const struct vnodeopv_desc * const umapfs_vnodeopv_descs[] = {
        &umapfs_vnodeop_opv_desc,
        NULL,
};

struct vfsops umapfs_vfsops = {
        .vfs_name = MOUNT_UMAP,
        .vfs_min_mount_data = sizeof (struct umap_args),
        .vfs_mount = umapfs_mount,
        .vfs_start = layerfs_start,
        .vfs_unmount = umapfs_unmount,
        .vfs_root = layerfs_root,
        .vfs_quotactl = layerfs_quotactl,
        .vfs_statvfs = layerfs_statvfs,
        .vfs_sync = layerfs_sync,
        .vfs_loadvnode = layerfs_loadvnode,
        .vfs_vget = layerfs_vget,
        .vfs_fhtovp = layerfs_fhtovp,
        .vfs_vptofh = layerfs_vptofh,
        .vfs_init = layerfs_init,
        .vfs_done = layerfs_done,
        .vfs_snapshot = layerfs_snapshot,
        .vfs_extattrctl = vfs_stdextattrctl,
        .vfs_suspendctl = layerfs_suspendctl,
        .vfs_renamelock_enter = layerfs_renamelock_enter,
        .vfs_renamelock_exit = layerfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = umapfs_vnodeopv_descs
};

SYSCTL_SETUP(umapfs_sysctl_setup, "umapfs sysctl")
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "umap",
                       SYSCTL_DESCR("UID/GID remapping file system"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, 10, CTL_EOL);
        /*
         * XXX the "10" above could be dynamic, thereby eliminating
         * one more instance of the "number to vfs" mapping problem,
         * but "10" is the order as taken from sys/mount.h
         */
}

static int
umap_modcmd(modcmd_t cmd, void *arg)
{
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = vfs_attach(&umapfs_vfsops);
                if (error != 0)
                        break;
                break;
        case MODULE_CMD_FINI:
                error = vfs_detach(&umapfs_vfsops);
                if (error != 0)
                        break;
                break;
        default:
                error = ENOTTY;
                break;
        }

        return (error);
}










































































 3298 



 3300 






 3298 






 3302 


 3301 


 3304 


 3300 



 3300 






































































































  247 





 1761 
 1766 


 1752 





 1754 

 1739 
 1740 










  222 














  222 



  221 
















  203 


  205 

  205 
  205 



  203 










































 2111 


  936 
  936 
  937 





 1680 
  107 
 1631 






 1684 
 1682 

 1670 




  294 

  234 




 1671 
  108 







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
/*        $NetBSD: kern_lock.c,v 1.178 2022/08/20 23:37:12 riastradh Exp $        */

/*-
 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.178 2022/08/20 23:37:12 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_lockdebug.h"
#endif

#include <sys/param.h>
#include <sys/proc.h>
#include <sys/lock.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/lockdebug.h>
#include <sys/cpu.h>
#include <sys/syslog.h>
#include <sys/atomic.h>
#include <sys/lwp.h>
#include <sys/pserialize.h>

#if defined(DIAGNOSTIC) && !defined(LOCKDEBUG)
#include <sys/ksyms.h>
#endif

#include <machine/lock.h>

#include <dev/lockstat.h>

#define        RETURN_ADDRESS        (uintptr_t)__builtin_return_address(0)

bool        kernel_lock_dodebug;

__cpu_simple_lock_t kernel_lock[CACHE_LINE_SIZE / sizeof(__cpu_simple_lock_t)]
    __cacheline_aligned;

void
assert_sleepable(void)
{
        const char *reason;
        uint64_t pctr;
        bool idle;

        if (panicstr != NULL) {
                return;
        }

        LOCKDEBUG_BARRIER(kernel_lock, 1);

        /*
         * Avoid disabling/re-enabling preemption here since this
         * routine may be called in delicate situations.
         */
        do {
                pctr = lwp_pctr();
                __insn_barrier();
                idle = CURCPU_IDLE_P();
                __insn_barrier();
        } while (pctr != lwp_pctr());

        reason = NULL;
        if (idle && !cold) {
                reason = "idle";
        }
        if (cpu_intr_p()) {
                reason = "interrupt";
        }
        if (cpu_softintr_p()) {
                reason = "softint";
        }
        if (!pserialize_not_in_read_section()) {
                reason = "pserialize";
        }

        if (reason) {
                panic("%s: %s caller=%p", __func__, reason,
                    (void *)RETURN_ADDRESS);
        }
}

/*
 * Functions for manipulating the kernel_lock.  We put them here
 * so that they show up in profiles.
 */

#define        _KERNEL_LOCK_ABORT(msg)                                                \
    LOCKDEBUG_ABORT(__func__, __LINE__, kernel_lock, &_kernel_lock_ops, msg)

#ifdef LOCKDEBUG
#define        _KERNEL_LOCK_ASSERT(cond)                                        \
do {                                                                        \
        if (!(cond))                                                        \
                _KERNEL_LOCK_ABORT("assertion failed: " #cond);                \
} while (/* CONSTCOND */ 0)
#else
#define        _KERNEL_LOCK_ASSERT(cond)        /* nothing */
#endif

static void        _kernel_lock_dump(const volatile void *, lockop_printer_t);

lockops_t _kernel_lock_ops = {
        .lo_name = "Kernel lock",
        .lo_type = LOCKOPS_SPIN,
        .lo_dump = _kernel_lock_dump,
};

#ifdef LOCKDEBUG

#include <ddb/ddb.h>

static void
kernel_lock_trace_ipi(void *cookie)
{

        printf("%s[%d %s]: hogging kernel lock\n", cpu_name(curcpu()),
            curlwp->l_lid,
            curlwp->l_name ? curlwp->l_name : curproc->p_comm);
        db_stacktrace();
}

#endif

/*
 * Initialize the kernel lock.
 */
void
kernel_lock_init(void)
{

        __cpu_simple_lock_init(kernel_lock);
        kernel_lock_dodebug = LOCKDEBUG_ALLOC(kernel_lock, &_kernel_lock_ops,
            RETURN_ADDRESS);
}
CTASSERT(CACHE_LINE_SIZE >= sizeof(__cpu_simple_lock_t));

/*
 * Print debugging information about the kernel lock.
 */
static void
_kernel_lock_dump(const volatile void *junk, lockop_printer_t pr)
{
        struct cpu_info *ci = curcpu();

        (void)junk;

        pr("curcpu holds : %18d wanted by: %#018lx\n",
            ci->ci_biglock_count, (long)ci->ci_biglock_wanted);
}

/*
 * Acquire 'nlocks' holds on the kernel lock.
 *
 * Although it may not look it, this is one of the most central, intricate
 * routines in the kernel, and tons of code elsewhere depends on its exact
 * behaviour.  If you change something in here, expect it to bite you in the
 * rear.
 */
void
_kernel_lock(int nlocks)
{
        struct cpu_info *ci;
        LOCKSTAT_TIMER(spintime);
        LOCKSTAT_FLAG(lsflag);
        struct lwp *owant;
#ifdef LOCKDEBUG
        static struct cpu_info *kernel_lock_holder;
        u_int spins = 0;
#endif
        int s;
        struct lwp *l = curlwp;

        _KERNEL_LOCK_ASSERT(nlocks > 0);

        s = splvm();
        ci = curcpu();
        if (ci->ci_biglock_count != 0) {
                _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock));
                ci->ci_biglock_count += nlocks;
                l->l_blcnt += nlocks;
                splx(s);
                return;
        }

        _KERNEL_LOCK_ASSERT(l->l_blcnt == 0);
        LOCKDEBUG_WANTLOCK(kernel_lock_dodebug, kernel_lock, RETURN_ADDRESS,
            0);

        if (__predict_true(__cpu_simple_lock_try(kernel_lock))) {
#ifdef LOCKDEBUG
                kernel_lock_holder = curcpu();
#endif
                ci->ci_biglock_count = nlocks;
                l->l_blcnt = nlocks;
                LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL,
                    RETURN_ADDRESS, 0);
                splx(s);
                return;
        }

        /*
         * To remove the ordering constraint between adaptive mutexes
         * and kernel_lock we must make it appear as if this thread is
         * blocking.  For non-interlocked mutex release, a store fence
         * is required to ensure that the result of any mutex_exit()
         * by the current LWP becomes visible on the bus before the set
         * of ci->ci_biglock_wanted becomes visible.
         */
        membar_producer();
        owant = ci->ci_biglock_wanted;
        ci->ci_biglock_wanted = l;
#if defined(DIAGNOSTIC) && !defined(LOCKDEBUG)
        l->l_ld_wanted = __builtin_return_address(0);
#endif

        /*
         * Spin until we acquire the lock.  Once we have it, record the
         * time spent with lockstat.
         */
        LOCKSTAT_ENTER(lsflag);
        LOCKSTAT_START_TIMER(lsflag, spintime);

        do {
                splx(s);
                while (__SIMPLELOCK_LOCKED_P(kernel_lock)) {
#ifdef LOCKDEBUG
                        extern int start_init_exec;
                        if (SPINLOCK_SPINOUT(spins) && start_init_exec) {
                                ipi_msg_t msg = {
                                        .func = kernel_lock_trace_ipi,
                                };
                                kpreempt_disable();
                                ipi_unicast(&msg, kernel_lock_holder);
                                ipi_wait(&msg);
                                kpreempt_enable();
                                _KERNEL_LOCK_ABORT("spinout");
                        }
                        SPINLOCK_BACKOFF_HOOK;
                        SPINLOCK_SPIN_HOOK;
#endif
                }
                s = splvm();
        } while (!__cpu_simple_lock_try(kernel_lock));

        ci->ci_biglock_count = nlocks;
        l->l_blcnt = nlocks;
        LOCKSTAT_STOP_TIMER(lsflag, spintime);
        LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL,
            RETURN_ADDRESS, 0);
        if (owant == NULL) {
                LOCKSTAT_EVENT_RA(lsflag, kernel_lock,
                    LB_KERNEL_LOCK | LB_SPIN, 1, spintime, RETURN_ADDRESS);
        }
        LOCKSTAT_EXIT(lsflag);
        splx(s);

        /*
         * Now that we have kernel_lock, reset ci_biglock_wanted.  This
         * store must be unbuffered (immediately visible on the bus) in
         * order for non-interlocked mutex release to work correctly.
         * It must be visible before a mutex_exit() can execute on this
         * processor.
         *
         * Note: only where CAS is available in hardware will this be
         * an unbuffered write, but non-interlocked release cannot be
         * done on CPUs without CAS in hardware.
         */
        (void)atomic_swap_ptr(&ci->ci_biglock_wanted, owant);

        /*
         * Issue a memory barrier as we have acquired a lock.  This also
         * prevents stores from a following mutex_exit() being reordered
         * to occur before our store to ci_biglock_wanted above.
         */
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_enter();
#endif

#ifdef LOCKDEBUG
        kernel_lock_holder = curcpu();
#endif
}

/*
 * Release 'nlocks' holds on the kernel lock.  If 'nlocks' is zero, release
 * all holds.
 */
void
_kernel_unlock(int nlocks, int *countp)
{
        struct cpu_info *ci;
        u_int olocks;
        int s;
        struct lwp *l = curlwp;

        _KERNEL_LOCK_ASSERT(nlocks < 2);

        olocks = l->l_blcnt;

        if (olocks == 0) {
                _KERNEL_LOCK_ASSERT(nlocks <= 0);
                if (countp != NULL)
                        *countp = 0;
                return;
        }

        _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock));

        if (nlocks == 0)
                nlocks = olocks;
        else if (nlocks == -1) {
                nlocks = 1;
                _KERNEL_LOCK_ASSERT(olocks == 1);
        }
        s = splvm();
        ci = curcpu();
        _KERNEL_LOCK_ASSERT(ci->ci_biglock_count >= l->l_blcnt);
        if (ci->ci_biglock_count == nlocks) {
                LOCKDEBUG_UNLOCKED(kernel_lock_dodebug, kernel_lock,
                    RETURN_ADDRESS, 0);
                ci->ci_biglock_count = 0;
                __cpu_simple_unlock(kernel_lock);
                l->l_blcnt -= nlocks;
                splx(s);
                if (l->l_dopreempt)
                        kpreempt(0);
        } else {
                ci->ci_biglock_count -= nlocks;
                l->l_blcnt -= nlocks;
                splx(s);
        }

        if (countp != NULL)
                *countp = olocks;
}

bool
_kernel_locked_p(void)
{
        return __SIMPLELOCK_LOCKED_P(kernel_lock);
}









































































































































































































































































































































































































































































































































    4 



    4 

    4 


    4 




















    4 





















































    1 














  440 
  440 










  440 





















  438 









  437 













  438 











  438 









  438 
  422 

  438 
  438 









  438 


  438 









  438 
  438 
  436 


  438 
  438 
































  438 








  438 
















  433 




























    4 





















    4 





    1 


    4 















    6 























    6 





















    2 






    1 









   39 
   29 













   38 

   39 

    1 







    1 


















    2 






    4 

    3 






    3 

    2 
    2 




    1 






    9 











    2 









































    1 


















    1 
















    1 






    1 



    1 
































    1 


































































































































    4 






    4 
    3 














    1 





    1 


































































































































































































































































































































































































































































































    2 

















    2 
    1 

    2 











    1 









    1 





    1 




    1 


    1 


    1 
    1 






    1 



    1 




    1 




    1 

    1 



    1 

    1 















    3 





    2 




    2 


    1 

    1 

    2 
    2 

    1 




    1 

























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
/*        $NetBSD: sd.c,v 1.334 2022/03/28 12:39:46 riastradh Exp $        */

/*-
 * Copyright (c) 1998, 2003, 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Originally written by Julian Elischer (julian@dialix.oz.au)
 * for TRW Financial Systems for use under the MACH(2.5) operating system.
 *
 * TRW Financial Systems, in accordance with their agreement with Carnegie
 * Mellon University, makes this software available to CMU to distribute
 * or use in any manner that they see fit as long as this message is kept with
 * the software. For this reason TFS also grants any other persons or
 * organisations permission to use or modify this software.
 *
 * TFS supplies this software to be publicly redistributed
 * on the understanding that TFS is not responsible for the correct
 * functioning of this software in any circumstances.
 *
 * Ported to run under 386BSD by Julian Elischer (julian@dialix.oz.au) Sept 1992
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sd.c,v 1.334 2022/03/28 12:39:46 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_scsi.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/scsiio.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/uio.h>
#include <sys/malloc.h>
#include <sys/errno.h>
#include <sys/device.h>
#include <sys/disklabel.h>
#include <sys/disk.h>
#include <sys/proc.h>
#include <sys/conf.h>
#include <sys/vnode.h>

#include <dev/scsipi/scsi_spc.h>
#include <dev/scsipi/scsipi_all.h>
#include <dev/scsipi/scsi_all.h>
#include <dev/scsipi/scsipi_disk.h>
#include <dev/scsipi/scsi_disk.h>
#include <dev/scsipi/scsiconf.h>
#include <dev/scsipi/scsipi_base.h>
#include <dev/scsipi/sdvar.h>

#include <prop/proplib.h>

#define        SDUNIT(dev)                        DISKUNIT(dev)
#define        SDPART(dev)                        DISKPART(dev)
#define        SDMINOR(unit, part)                DISKMINOR(unit, part)
#define        MAKESDDEV(maj, unit, part)        MAKEDISKDEV(maj, unit, part)

#define        SDLABELDEV(dev)        (MAKESDDEV(major(dev), SDUNIT(dev), RAW_PART))

#define        SD_DEFAULT_BLKSIZE        512

static void        sdminphys(struct buf *);
static void        sdstart(struct scsipi_periph *);
static void        sdrestart(void *);
static void        sddone(struct scsipi_xfer *, int);
static bool        sd_suspend(device_t, const pmf_qual_t *);
static bool        sd_shutdown(device_t, int);
static int        sd_interpret_sense(struct scsipi_xfer *);
static int        sd_diskstart(device_t, struct buf *);
static int        sd_dumpblocks(device_t, void *, daddr_t, int);
static void        sd_iosize(device_t, int *);
static int        sd_lastclose(device_t);
static int        sd_firstopen(device_t, dev_t, int, int);
static void        sd_label(device_t, struct disklabel *);

static int        sd_mode_sense(struct sd_softc *, u_int8_t, void *, size_t, int,
                    int, int *);
static int        sd_mode_select(struct sd_softc *, u_int8_t, void *, size_t, int,
                    int);
static int        sd_validate_blksize(struct scsipi_periph *, int);
static u_int64_t sd_read_capacity(struct scsipi_periph *, int *, int flags);
static int        sd_get_simplifiedparms(struct sd_softc *, struct disk_parms *,
                    int);
static int        sd_get_capacity(struct sd_softc *, struct disk_parms *, int);
static int        sd_get_parms(struct sd_softc *, struct disk_parms *, int);
static int        sd_get_parms_page4(struct sd_softc *, struct disk_parms *,
                    int);
static int        sd_get_parms_page5(struct sd_softc *, struct disk_parms *,
                    int);

static int        sd_flush(struct sd_softc *, int);
static int        sd_getcache(struct sd_softc *, int *);
static int        sd_setcache(struct sd_softc *, int);

static int        sdmatch(device_t, cfdata_t, void *);
static void        sdattach(device_t, device_t, void *);
static int        sddetach(device_t, int);
static void        sd_set_geometry(struct sd_softc *);

CFATTACH_DECL3_NEW(sd, sizeof(struct sd_softc), sdmatch, sdattach, sddetach,
    NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);

extern struct cfdriver sd_cd;

static const struct scsipi_inquiry_pattern sd_patterns[] = {
        {T_DIRECT, T_FIXED,
         "",         "",                 ""},
        {T_DIRECT, T_REMOV,
         "",         "",                 ""},
        {T_OPTICAL, T_FIXED,
         "",         "",                 ""},
        {T_OPTICAL, T_REMOV,
         "",         "",                 ""},
        {T_SIMPLE_DIRECT, T_FIXED,
         "",         "",                 ""},
        {T_SIMPLE_DIRECT, T_REMOV,
         "",         "",                 ""},
};

static dev_type_open(sdopen);
static dev_type_close(sdclose);
static dev_type_read(sdread);
static dev_type_write(sdwrite);
static dev_type_ioctl(sdioctl);
static dev_type_strategy(sdstrategy);
static dev_type_dump(sddump);
static dev_type_size(sdsize);

const struct bdevsw sd_bdevsw = {
        .d_open = sdopen,
        .d_close = sdclose,
        .d_strategy = sdstrategy,
        .d_ioctl = sdioctl,
        .d_dump = sddump,
        .d_psize = sdsize,
        .d_discard = nodiscard,
        .d_cfdriver = &sd_cd,
        .d_devtounit = disklabel_dev_unit,
        .d_flag = D_DISK | D_MPSAFE
};

const struct cdevsw sd_cdevsw = {
        .d_open = sdopen,
        .d_close = sdclose,
        .d_read = sdread,
        .d_write = sdwrite,
        .d_ioctl = sdioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_cfdriver = &sd_cd,
        .d_devtounit = disklabel_dev_unit,
        .d_flag = D_DISK | D_MPSAFE
};

static const struct dkdriver sddkdriver = {
        .d_open = sdopen,
        .d_close = sdclose,
        .d_strategy = sdstrategy,
        .d_minphys = sdminphys,
        .d_diskstart = sd_diskstart,
        .d_dumpblocks = sd_dumpblocks,
        .d_iosize = sd_iosize,
        .d_firstopen = sd_firstopen,
        .d_lastclose = sd_lastclose,
        .d_label = sd_label,
};

static const struct scsipi_periphsw sd_switch = {
        sd_interpret_sense,        /* check our error handler first */
        sdstart,                /* have a queue, served by this */
        NULL,                        /* have no async handler */
        sddone,                        /* deal with stats at interrupt time */
};

struct sd_mode_sense_data {
        /*
         * XXX
         * We are not going to parse this as-is -- it just has to be large
         * enough.
         */
        union {
                struct scsi_mode_parameter_header_6 small;
                struct scsi_mode_parameter_header_10 big;
        } header;
        struct scsi_general_block_descriptor blk_desc;
        union scsi_disk_pages pages;
};

/*
 * The routine called by the low level scsi routine when it discovers
 * A device suitable for this driver
 */
static int
sdmatch(device_t parent, cfdata_t match,
    void *aux)
{
        struct scsipibus_attach_args *sa = aux;
        int priority;

        (void)scsipi_inqmatch(&sa->sa_inqbuf,
            sd_patterns, sizeof(sd_patterns) / sizeof(sd_patterns[0]),
            sizeof(sd_patterns[0]), &priority);

        return (priority);
}

/*
 * Attach routine common to atapi & scsi.
 */
static void
sdattach(device_t parent, device_t self, void *aux)
{
        struct sd_softc *sd = device_private(self);
        struct dk_softc *dksc = &sd->sc_dksc;
        struct scsipibus_attach_args *sa = aux;
        struct scsipi_periph *periph = sa->sa_periph;
        int error, result, dtype;
        struct disk_parms *dp = &sd->params;
        char pbuf[9];

        SC_DEBUG(periph, SCSIPI_DB2, ("sdattach: "));

        sd->type = (sa->sa_inqbuf.type & SID_TYPE);
        strncpy(sd->name, sa->sa_inqbuf.product, sizeof(sd->name));

        strncpy(sd->typename, sa->sa_inqbuf.product, sizeof(sd->typename));

        if (sd->type == T_SIMPLE_DIRECT)
                periph->periph_quirks |= PQUIRK_ONLYBIG | PQUIRK_NOBIGMODESENSE;

        switch (SCSIPI_BUSTYPE_TYPE(scsipi_periph_bustype(sa->sa_periph))) {
        case SCSIPI_BUSTYPE_SCSI:
                dtype = DKTYPE_SCSI;
                if (periph->periph_version == 0)
                        sd->flags |= SDF_ANCIENT;
                break;
        case SCSIPI_BUSTYPE_ATAPI:
                dtype = DKTYPE_ATAPI;
                break;
        default:
                dtype = DKTYPE_UNKNOWN;
                break;
        }

        /* Initialize dk and disk structure. */
        dk_init(dksc, self, dtype);
        disk_init(&dksc->sc_dkdev, dksc->sc_xname, &sddkdriver);

        /* Attach dk and disk subsystems */
        dk_attach(dksc);
        disk_attach(&dksc->sc_dkdev);

        bufq_alloc(&dksc->sc_bufq, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);

        callout_init(&sd->sc_callout, 0);

        /*
         * Store information needed to contact our base driver
         */
        sd->sc_periph = periph;

        periph->periph_dev = dksc->sc_dev;
        periph->periph_switch = &sd_switch;

        /*
         * Increase our openings to the maximum-per-periph
         * supported by the adapter.  This will either be
         * clamped down or grown by the adapter if necessary.
         */
        periph->periph_openings =
            SCSIPI_CHAN_MAX_PERIPH(periph->periph_channel);
        periph->periph_flags |= PERIPH_GROW_OPENINGS;

        /*
         * Use the subdriver to request information regarding the drive.
         */
        aprint_naive("\n");
        aprint_normal("\n");

        if (periph->periph_quirks & PQUIRK_START)
                (void)scsipi_start(periph, SSS_START, XS_CTL_SILENT);

        error = scsipi_test_unit_ready(periph,
            XS_CTL_DISCOVERY | XS_CTL_IGNORE_ILLEGAL_REQUEST |
            XS_CTL_IGNORE_MEDIA_CHANGE | XS_CTL_SILENT_NODEV);
        if (error)
                result = SDGP_RESULT_OFFLINE;
        else
                result = sd_get_parms(sd, &sd->params, XS_CTL_DISCOVERY);

        aprint_normal_dev(dksc->sc_dev, "");
        switch (result) {
        case SDGP_RESULT_OK:
                format_bytes(pbuf, sizeof(pbuf),
                    (u_int64_t)dp->disksize * dp->blksize);
                aprint_normal(
                "%s, %ld cyl, %ld head, %ld sec, %ld bytes/sect x %llu sectors",
                    pbuf, dp->cyls, dp->heads, dp->sectors, dp->blksize,
                    (unsigned long long)dp->disksize);
                break;

        case SDGP_RESULT_OFFLINE:
                aprint_normal("drive offline");
                break;

        case SDGP_RESULT_UNFORMATTED:
                aprint_normal("unformatted media");
                break;

#ifdef DIAGNOSTIC
        default:
                panic("sdattach: unknown result from get_parms");
                break;
#endif
        }
        aprint_normal("\n");

        /* Discover wedges on this disk. */
        dkwedge_discover(&dksc->sc_dkdev);

        /*
         * Establish a shutdown hook so that we can ensure that
         * our data has actually made it onto the platter at
         * shutdown time.  Note that this relies on the fact
         * that the shutdown hooks at the "leaves" of the device tree
         * are run, first (thus guaranteeing that our hook runs before
         * our ancestors').
         */
        if (!pmf_device_register1(self, sd_suspend, NULL, sd_shutdown))
                aprint_error_dev(self, "couldn't establish power handler\n");
}

static int
sddetach(device_t self, int flags)
{
        struct sd_softc *sd = device_private(self);
        struct dk_softc *dksc = &sd->sc_dksc;
        struct scsipi_periph *periph = sd->sc_periph;
        struct scsipi_channel *chan = periph->periph_channel;
        int bmaj, cmaj, i, mn, rc;

        if ((rc = disk_begindetach(&dksc->sc_dkdev, sd_lastclose, self, flags)) != 0)
                return rc;

        /* locate the major number */
        bmaj = bdevsw_lookup_major(&sd_bdevsw);
        cmaj = cdevsw_lookup_major(&sd_cdevsw);

        /* Nuke the vnodes for any open instances */
        for (i = 0; i < MAXPARTITIONS; i++) {
                mn = SDMINOR(device_unit(self), i);
                vdevgone(bmaj, mn, mn, VBLK);
                vdevgone(cmaj, mn, mn, VCHR);
        }

        /* kill any pending restart */
        callout_halt(&sd->sc_callout, NULL);

        dk_drain(dksc);

        /* Kill off any pending commands. */
        mutex_enter(chan_mtx(chan));
        scsipi_kill_pending(periph);
        mutex_exit(chan_mtx(chan));

        bufq_free(dksc->sc_bufq);

        /* Delete all of our wedges. */
        dkwedge_delall(&dksc->sc_dkdev);

        /* Detach from the disk list. */
        disk_detach(&dksc->sc_dkdev);
        disk_destroy(&dksc->sc_dkdev);

        dk_detach(dksc);

        callout_destroy(&sd->sc_callout);

        pmf_device_deregister(self);

        return (0);
}

/*
 * Serialized by caller
 */
static int
sd_firstopen(device_t self, dev_t dev, int flag, int fmt)
{
        struct sd_softc *sd = device_private(self);
        struct scsipi_periph *periph = sd->sc_periph;
        struct scsipi_adapter *adapt = periph->periph_channel->chan_adapter;
        int error, silent;
        int part, removable;

        part = SDPART(dev);

        error = scsipi_adapter_addref(adapt);
        if (error)
                return error;

        if ((part == RAW_PART && fmt == S_IFCHR) || (flag & FSILENT))
                silent = XS_CTL_SILENT;
        else
                silent = 0;

        /* Check that it is still responding and ok. */
        error = scsipi_test_unit_ready(periph,
            XS_CTL_IGNORE_ILLEGAL_REQUEST | XS_CTL_IGNORE_MEDIA_CHANGE |
            silent);

        /*
         * Start the pack spinning if necessary. Always allow the
         * raw partition to be opened, for raw IOCTLs. Data transfers
         * will check for SDEV_MEDIA_LOADED.
         */
        if (error == EIO) {
                error = scsipi_start(periph, SSS_START, silent);
                if (error == EINVAL)
                        error = EIO;
        }
        if (error)
                goto bad;

        removable = (periph->periph_flags & PERIPH_REMOVABLE) != 0;
        if (removable) {
                /* Lock the pack in. */
                error = scsipi_prevent(periph, SPAMR_PREVENT_DT,
                    XS_CTL_IGNORE_ILLEGAL_REQUEST |
                    XS_CTL_IGNORE_MEDIA_CHANGE |
                    XS_CTL_SILENT);
                if (error)
                        goto bad;
        }

        if ((periph->periph_flags & PERIPH_MEDIA_LOADED) == 0) {
                int param_error;

                /*
                 * Load the physical device parameters.
                 *
                 * Note that if media is present but unformatted,
                 * we allow the open (so that it can be formatted!).
                 * The drive should refuse real I/O, if the media is
                 * unformatted.
                 */
                param_error = sd_get_parms(sd, &sd->params, 0);
                if (param_error == SDGP_RESULT_OFFLINE) {
                        error = ENXIO;
                        goto bad2;
                }
                periph->periph_flags |= PERIPH_MEDIA_LOADED;

                SC_DEBUG(periph, SCSIPI_DB3, ("Params loaded "));
        }

        periph->periph_flags |= PERIPH_OPEN;
        return 0;

bad2:
        if (removable)
                scsipi_prevent(periph, SPAMR_ALLOW,
                    XS_CTL_IGNORE_ILLEGAL_REQUEST |
                    XS_CTL_IGNORE_MEDIA_CHANGE |
                    XS_CTL_SILENT);

bad:
        scsipi_adapter_delref(adapt);
        return error;
}

/*
 * open the device. Make sure the partition info is a up-to-date as can be.
 */
static int
sdopen(dev_t dev, int flag, int fmt, struct lwp *l)
{
        struct sd_softc *sd;
        struct dk_softc *dksc;
        struct scsipi_periph *periph;
        int unit, part;
        int error;

        unit = SDUNIT(dev);
        sd = device_lookup_private(&sd_cd, unit);
        if (sd == NULL)
                return (ENXIO);
        dksc = &sd->sc_dksc;

        if (!device_is_active(dksc->sc_dev))
                return (ENODEV);

        periph = sd->sc_periph;
        part = SDPART(dev);

        SC_DEBUG(periph, SCSIPI_DB1,
            ("sdopen: dev=0x%"PRIx64" (unit %d (of %d), partition %d)\n",
            dev, unit, sd_cd.cd_ndevs, SDPART(dev)));

        /*
         * If any partition is open, but the disk has been invalidated,
         * disallow further opens of non-raw partition
         */
        if ((periph->periph_flags & (PERIPH_OPEN | PERIPH_MEDIA_LOADED)) ==
            PERIPH_OPEN) {
                if (part != RAW_PART || fmt != S_IFCHR)
                        return EIO;
        }

        error = dk_open(dksc, dev, flag, fmt, l);

        SC_DEBUG(periph, SCSIPI_DB3, ("open complete\n"));

        return error;
}

/*
 * Serialized by caller
 */
static int
sd_lastclose(device_t self)
{
        struct sd_softc *sd = device_private(self);
        struct dk_softc *dksc = &sd->sc_dksc;
        struct scsipi_periph *periph = sd->sc_periph;
        struct scsipi_adapter *adapt = periph->periph_channel->chan_adapter;

        /*
         * If the disk cache needs flushing, and the disk supports
         * it, do it now.
         */
        if ((sd->flags & SDF_DIRTY) != 0) {
                if (sd_flush(sd, 0)) {
                        aprint_error_dev(dksc->sc_dev,
                                "cache synchronization failed\n");
                        sd->flags &= ~SDF_FLUSHING;
                } else
                        sd->flags &= ~(SDF_FLUSHING|SDF_DIRTY);
        }

        scsipi_wait_drain(periph);

        if (periph->periph_flags & PERIPH_REMOVABLE)
                scsipi_prevent(periph, SPAMR_ALLOW,
                    XS_CTL_IGNORE_ILLEGAL_REQUEST |
                    XS_CTL_IGNORE_NOT_READY |
                    XS_CTL_SILENT);
        periph->periph_flags &= ~PERIPH_OPEN;

        scsipi_wait_drain(periph);

        scsipi_adapter_delref(adapt);

        return 0;
}

/*
 * close the device.. only called if we are the LAST occurrence of an open
 * device.  Convenient now but usually a pain.
 */
static int
sdclose(dev_t dev, int flag, int fmt, struct lwp *l)
{
        struct sd_softc *sd;
        struct dk_softc *dksc;
        int unit;

        unit = SDUNIT(dev);
        sd = device_lookup_private(&sd_cd, unit);
        dksc = &sd->sc_dksc;

        return dk_close(dksc, dev, flag, fmt, l);
}

/*
 * Actually translate the requested transfer into one the physical driver
 * can understand.  The transfer is described by a buf and will include
 * only one physical transfer.
 */
static void
sdstrategy(struct buf *bp)
{
        struct sd_softc *sd = device_lookup_private(&sd_cd, SDUNIT(bp->b_dev));
        struct dk_softc *dksc = &sd->sc_dksc;
        struct scsipi_periph *periph = sd->sc_periph;

        SC_DEBUG(sd->sc_periph, SCSIPI_DB2, ("sdstrategy "));
        SC_DEBUG(sd->sc_periph, SCSIPI_DB1,
            ("%d bytes @ blk %" PRId64 "\n", bp->b_bcount, bp->b_blkno));

        /*
         * If the device has been made invalid, error out
         */
        if ((periph->periph_flags & PERIPH_MEDIA_LOADED) == 0 ||
            !device_is_active(dksc->sc_dev)) {
                if (periph->periph_flags & PERIPH_OPEN)
                        bp->b_error = EIO;
                else
                        bp->b_error = ENODEV;

                bp->b_resid = bp->b_bcount;
                biodone(bp);
                return;
        }

        dk_strategy(dksc, bp);
}

/*
 * Issue single I/O command
 *
 * Called from dk_start and implicitly from dk_strategy
 */
static int
sd_diskstart(device_t dev, struct buf *bp)
{
        struct sd_softc *sd = device_private(dev);
        struct scsipi_periph *periph = sd->sc_periph;
        struct scsipi_channel *chan = periph->periph_channel;
        struct scsipi_rw_16 cmd16;
        struct scsipi_rw_10 cmd_big;
        struct scsi_rw_6 cmd_small;
        struct scsipi_generic *cmdp;
        struct scsipi_xfer *xs;
        int error, flags, nblks, cmdlen;
        int cdb_flags;
        bool havefua = !(periph->periph_quirks & PQUIRK_NOFUA);

        mutex_enter(chan_mtx(chan));

        if (periph->periph_active >= periph->periph_openings) {
                error = EAGAIN;
                goto out;
        }

        /*
         * there is excess capacity, but a special waits
         * It'll need the adapter as soon as we clear out of the
         * way and let it run (user level wait).
         */
        if (periph->periph_flags & PERIPH_WAITING) {
                periph->periph_flags &= ~PERIPH_WAITING;
                cv_broadcast(periph_cv_periph(periph));
                error = EAGAIN;
                goto out;
        }

        /*
         * If the device has become invalid, abort all the
         * reads and writes until all files have been closed and
         * re-opened.
         */
        if (__predict_false(
            (periph->periph_flags & PERIPH_MEDIA_LOADED) == 0)) {
                error = EIO;
                goto out;
        }

        /*
         * Mark the disk dirty so that the cache will be
         * flushed on close.
         */
        if ((bp->b_flags & B_READ) == 0)
                sd->flags |= SDF_DIRTY;

        if (sd->params.blksize == DEV_BSIZE)
                nblks = bp->b_bcount >> DEV_BSHIFT;
        else
                nblks = howmany(bp->b_bcount, sd->params.blksize);

        /*
         * Pass FUA and/or DPO if requested. Must be done before CDB
         * selection, as 6-byte CDB doesn't support the flags.
         */
        cdb_flags = 0;
        if (havefua) {
                if (bp->b_flags & B_MEDIA_FUA)
                        cdb_flags |= SRWB_FUA;

                if (bp->b_flags & B_MEDIA_DPO)
                        cdb_flags |= SRWB_DPO;
        }

        /*
         * Fill out the scsi command.  Use the smallest CDB possible
         * (6-byte, 10-byte, or 16-byte). If we need FUA or DPO,
         * need to use 10-byte or bigger, as the 6-byte doesn't support
         * the flags.
         */
        if (((bp->b_rawblkno & 0x1fffff) == bp->b_rawblkno) &&
            ((nblks & 0xff) == nblks) &&
            !(periph->periph_quirks & PQUIRK_ONLYBIG) &&
            !cdb_flags) {
                /* 6-byte CDB */
                memset(&cmd_small, 0, sizeof(cmd_small));
                cmd_small.opcode = (bp->b_flags & B_READ) ?
                    SCSI_READ_6_COMMAND : SCSI_WRITE_6_COMMAND;
                _lto3b(bp->b_rawblkno, cmd_small.addr);
                cmd_small.length = nblks & 0xff;
                cmdlen = sizeof(cmd_small);
                cmdp = (struct scsipi_generic *)&cmd_small;
        } else if ((bp->b_rawblkno & 0xffffffff) == bp->b_rawblkno) {
                /* 10-byte CDB */
                memset(&cmd_big, 0, sizeof(cmd_big));
                cmd_big.opcode = (bp->b_flags & B_READ) ?
                    READ_10 : WRITE_10;
                _lto4b(bp->b_rawblkno, cmd_big.addr);
                _lto2b(nblks, cmd_big.length);
                cmdlen = sizeof(cmd_big);
                cmdp = (struct scsipi_generic *)&cmd_big;
        } else {
                /* 16-byte CDB */
                memset(&cmd16, 0, sizeof(cmd16));
                cmd16.opcode = (bp->b_flags & B_READ) ?
                    READ_16 : WRITE_16;
                _lto8b(bp->b_rawblkno, cmd16.addr);
                _lto4b(nblks, cmd16.length);
                cmdlen = sizeof(cmd16);
                cmdp = (struct scsipi_generic *)&cmd16;
        }

        if (cdb_flags)
                cmdp->bytes[0] = cdb_flags;

        /*
         * Figure out what flags to use.
         */
        flags = XS_CTL_NOSLEEP|XS_CTL_ASYNC|XS_CTL_SIMPLE_TAG;
        if (bp->b_flags & B_READ)
                flags |= XS_CTL_DATA_IN;
        else
                flags |= XS_CTL_DATA_OUT;

        /*
         * Call the routine that chats with the adapter.
         * Note: we cannot sleep as we may be an interrupt
         */
        xs = scsipi_make_xs_locked(periph, cmdp, cmdlen,
            (u_char *)bp->b_data, bp->b_bcount,
            SDRETRIES, SD_IO_TIMEOUT, bp, flags);
        if (__predict_false(xs == NULL)) {
                /*
                 * out of memory. Keep this buffer in the queue, and
                 * retry later.
                 */
                callout_reset(&sd->sc_callout, hz / 2, sdrestart, sd);
                error = EAGAIN;
                goto out;
        }

        error = scsipi_execute_xs(xs);
        /* with a scsipi_xfer preallocated, scsipi_command can't fail */
        KASSERT(error == 0);

out:
        mutex_exit(chan_mtx(chan));

        return error;
}

/*
 * Recover I/O request after memory shortage
 *
 * Called from callout
 */
static void
sdrestart(void *v)
{
        struct sd_softc *sd = v;
        struct dk_softc *dksc = &sd->sc_dksc;

        dk_start(dksc, NULL);
}

/*
 * Recover I/O request after memory shortage
 *
 * Called from scsipi midlayer when resources have been freed
 * with channel lock held
 */
static void
sdstart(struct scsipi_periph *periph)
{
        struct sd_softc *sd = device_private(periph->periph_dev);
        struct dk_softc *dksc = &sd->sc_dksc;
        struct scsipi_channel *chan = periph->periph_channel;

        /*
         * release channel lock as dk_start may need to acquire
         * other locks
         *
         * sdstart is called from scsipi_put_xs and all its callers
         * release the lock afterwards. So releasing it here
         * doesn't matter.
         */
        mutex_exit(chan_mtx(chan));

        dk_start(dksc, NULL);

        mutex_enter(chan_mtx(chan));
}

static void
sddone(struct scsipi_xfer *xs, int error)
{
        struct sd_softc *sd = device_private(xs->xs_periph->periph_dev);
        struct dk_softc *dksc = &sd->sc_dksc;
        struct buf *bp = xs->bp;

        if (sd->flags & SDF_FLUSHING) {
                /* Flush completed, no longer dirty. */
                sd->flags &= ~(SDF_FLUSHING|SDF_DIRTY);
        }

        if (bp) {
                bp->b_error = error;
                bp->b_resid = xs->resid;
                if (error) {
                        /* on a read/write error bp->b_resid is zero, so fix */
                        bp->b_resid = bp->b_bcount;
                }

                dk_done(dksc, bp);
                /* dk_start is called from scsipi_complete */
        }
}

static void
sdminphys(struct buf *bp)
{
        struct sd_softc *sd = device_lookup_private(&sd_cd, SDUNIT(bp->b_dev));
        struct dk_softc *dksc = &sd->sc_dksc;
        long xmax;

        /*
         * If the device is ancient, we want to make sure that
         * the transfer fits into a 6-byte cdb.
         *
         * XXX Note that the SCSI-I spec says that 256-block transfers
         * are allowed in a 6-byte read/write, and are specified
         * by setting the "length" to 0.  However, we're conservative
         * here, allowing only 255-block transfers in case an
         * ancient device gets confused by length == 0.  A length of 0
         * in a 10-byte read/write actually means 0 blocks.
         */
        if ((sd->flags & SDF_ANCIENT) &&
            ((sd->sc_periph->periph_flags &
            (PERIPH_REMOVABLE | PERIPH_MEDIA_LOADED)) != PERIPH_REMOVABLE)) {
                xmax = dksc->sc_dkdev.dk_geom.dg_secsize * 0xff;

                if (bp->b_bcount > xmax)
                        bp->b_bcount = xmax;
        }

        scsipi_adapter_minphys(sd->sc_periph->periph_channel, bp);
}

static void
sd_iosize(device_t dev, int *count)
{
        struct buf B;
        int bmaj;

        bmaj       = bdevsw_lookup_major(&sd_bdevsw);
        B.b_dev    = MAKESDDEV(bmaj,device_unit(dev),RAW_PART);
        B.b_bcount = *count;

        sdminphys(&B);

        *count = B.b_bcount;
}

static int
sdread(dev_t dev, struct uio *uio, int ioflag)
{

        return (physio(sdstrategy, NULL, dev, B_READ, sdminphys, uio));
}

static int
sdwrite(dev_t dev, struct uio *uio, int ioflag)
{

        return (physio(sdstrategy, NULL, dev, B_WRITE, sdminphys, uio));
}

/*
 * Perform special action on behalf of the user
 * Knows about the internals of this device
 */
static int
sdioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
{
        struct sd_softc *sd = device_lookup_private(&sd_cd, SDUNIT(dev));
        struct dk_softc *dksc = &sd->sc_dksc;
        struct scsipi_periph *periph = sd->sc_periph;

        int part = SDPART(dev);
        int error;

        SC_DEBUG(sd->sc_periph, SCSIPI_DB2, ("sdioctl 0x%lx ", cmd));

        /*
         * If the device is not valid, some IOCTLs can still be
         * handled on the raw partition. Check this here.
         */
        if ((periph->periph_flags & PERIPH_MEDIA_LOADED) == 0 &&
            part != RAW_PART)
                return (EIO);

        switch (cmd) {
        case DIOCLOCK:
                if (periph->periph_flags & PERIPH_REMOVABLE)
                        return (scsipi_prevent(periph,
                            (*(int *)addr) ?
                            SPAMR_PREVENT_DT : SPAMR_ALLOW, 0));
                else 
                        return (ENOTTY);

        case DIOCEJECT:
                if ((periph->periph_flags & PERIPH_REMOVABLE) == 0)
                        return (ENOTTY);
                if (*(int *)addr == 0) {
                        int pmask = __BIT(part);
                        /*
                         * Don't force eject: check that we are the only
                         * partition open. If so, unlock it.
                         */
                        if (DK_BUSY(dksc, pmask) == 0) {
                                error = scsipi_prevent(periph, SPAMR_ALLOW,
                                    XS_CTL_IGNORE_NOT_READY);
                                if (error)
                                        return (error);
                        } else {
                                return (EBUSY);
                        }
                }
                /* FALLTHROUGH */
        case ODIOCEJECT:
                return ((periph->periph_flags & PERIPH_REMOVABLE) == 0 ?
                    ENOTTY : scsipi_start(periph, SSS_STOP|SSS_LOEJ, 0));

        case DIOCGCACHE:
                return (sd_getcache(sd, (int *) addr));

        case DIOCSCACHE:
                if ((flag & FWRITE) == 0)
                        return (EBADF);
                return (sd_setcache(sd, *(int *) addr));

        case DIOCCACHESYNC:
                /*
                 * XXX Do we really need to care about having a writable
                 * file descriptor here?
                 */
                if ((flag & FWRITE) == 0)
                        return (EBADF);
                if (((sd->flags & SDF_DIRTY) != 0 || *(int *)addr != 0)) {
                        error = sd_flush(sd, 0);
                        if (error) {
                                sd->flags &= ~SDF_FLUSHING;
                                return (error);
                        }
                        sd->flags &= ~(SDF_FLUSHING|SDF_DIRTY);
                }
                return (0);

        default:
                error = dk_ioctl(dksc, dev, cmd, addr, flag, l); 
                if (error == ENOTTY)
                        error = scsipi_do_ioctl(periph, dev, cmd, addr, flag, l);
                return (error);
        }

#ifdef DIAGNOSTIC
        panic("sdioctl: impossible");
#endif
}

static void
sd_label(device_t self, struct disklabel *lp)
{               
        struct sd_softc *sd = device_private(self);

        strncpy(lp->d_typename, sd->name, 16);
        lp->d_rpm = sd->params.rot_rate;
        if (sd->sc_periph->periph_flags & PERIPH_REMOVABLE)
                lp->d_flags |= D_REMOVABLE;
}

static bool
sd_shutdown(device_t self, int how)
{
        struct sd_softc *sd = device_private(self);
        struct dk_softc *dksc = &sd->sc_dksc;

        /*
         * If the disk cache needs to be flushed, and the disk supports
         * it, flush it.  We're cold at this point, so we poll for
         * completion.
         */
        if ((sd->flags & SDF_DIRTY) != 0) {
                if (sd_flush(sd, XS_CTL_NOSLEEP|XS_CTL_POLL)) {
                        aprint_error_dev(dksc->sc_dev,
                                "cache synchronization failed\n");
                        sd->flags &= ~SDF_FLUSHING;
                } else
                        sd->flags &= ~(SDF_FLUSHING|SDF_DIRTY);
        }
        return true;
}

static bool
sd_suspend(device_t dv, const pmf_qual_t *qual)
{
        return sd_shutdown(dv, boothowto); /* XXX no need to poll */
}

/*
 * Check Errors
 */
static int
sd_interpret_sense(struct scsipi_xfer *xs)
{
        struct scsipi_periph *periph = xs->xs_periph;
        struct scsipi_channel *chan = periph->periph_channel;
        struct scsi_sense_data *sense = &xs->sense.scsi_sense;
        struct sd_softc *sd = device_private(periph->periph_dev);
        struct dk_softc *dksc = &sd->sc_dksc;
        int error, retval = EJUSTRETURN;

        /*
         * If the periph is already recovering, just do the normal
         * error processing.
         */
        if (periph->periph_flags & PERIPH_RECOVERING)
                return (retval);

        /*
         * Ignore errors from accessing illegal fields (e.g. trying to
         * lock the door of a digicam, which doesn't have a door that
         * can be locked) for the SCSI_PREVENT_ALLOW_MEDIUM_REMOVAL command.
         */
        if (xs->cmd->opcode == SCSI_PREVENT_ALLOW_MEDIUM_REMOVAL &&
            SSD_SENSE_KEY(sense->flags) == SKEY_ILLEGAL_REQUEST &&
            sense->asc == 0x24 &&
            sense->ascq == 0x00) { /* Illegal field in CDB */
                if (!(xs->xs_control & XS_CTL_SILENT)) {
                        scsipi_printaddr(periph);
                        printf("no door lock\n");
                }
                xs->xs_control |= XS_CTL_IGNORE_ILLEGAL_REQUEST;
                return (retval);
        }



        /*
         * If the device is not open yet, let the generic code handle it.
         */
        if ((periph->periph_flags & PERIPH_MEDIA_LOADED) == 0)
                return (retval);

        /*
         * If it isn't a extended or extended/deferred error, let
         * the generic code handle it.
         */
        if (SSD_RCODE(sense->response_code) != SSD_RCODE_CURRENT &&
            SSD_RCODE(sense->response_code) != SSD_RCODE_DEFERRED)
                return (retval);

        if (SSD_SENSE_KEY(sense->flags) == SKEY_NOT_READY &&
            sense->asc == 0x4) {
                if (sense->ascq == 0x01)        {
                        /*
                         * Unit In The Process Of Becoming Ready.
                         */
                        printf("%s: waiting for pack to spin up...\n",
                            dksc->sc_xname);
                        if (!callout_pending(&periph->periph_callout))
                                scsipi_periph_freeze(periph, 1);
                        callout_reset(&periph->periph_callout,
                            5 * hz, scsipi_periph_timed_thaw, periph);
                        retval = ERESTART;
                } else if (sense->ascq == 0x02) {
                        printf("%s: pack is stopped, restarting...\n",
                            dksc->sc_xname);
                        mutex_enter(chan_mtx(chan));
                        periph->periph_flags |= PERIPH_RECOVERING;
                        mutex_exit(chan_mtx(chan));
                        error = scsipi_start(periph, SSS_START,
                            XS_CTL_URGENT|XS_CTL_HEAD_TAG|
                            XS_CTL_THAW_PERIPH|XS_CTL_FREEZE_PERIPH);
                        if (error) {
                                aprint_error_dev(dksc->sc_dev,
                                        "unable to restart pack\n");
                                retval = error;
                        } else
                                retval = ERESTART;
                        mutex_enter(chan_mtx(chan));
                        periph->periph_flags &= ~PERIPH_RECOVERING;
                        mutex_exit(chan_mtx(chan));
                }
        }
        if (SSD_SENSE_KEY(sense->flags) == SKEY_MEDIUM_ERROR &&
            sense->asc == 0x31 &&
            sense->ascq == 0x00)        { /* maybe for any asq ? */
                /* Medium Format Corrupted */
                retval = EFTYPE;
        }
        return (retval);
}


static int
sdsize(dev_t dev)
{
        struct sd_softc *sd;
        struct dk_softc *dksc;
        int unit;

        unit = SDUNIT(dev);
        sd = device_lookup_private(&sd_cd, unit);
        if (sd == NULL)
                return (-1);
        dksc = &sd->sc_dksc;

        if (!device_is_active(dksc->sc_dev))
                return (-1);

        return dk_size(dksc, dev);
}

/* #define SD_DUMP_NOT_TRUSTED if you just want to watch */
static struct scsipi_xfer sx;

/*
 * dump all of physical memory into the partition specified, starting
 * at offset 'dumplo' into the partition.
 */
static int
sddump(dev_t dev, daddr_t blkno, void *va, size_t size)
{
        struct sd_softc *sd;
        struct dk_softc *dksc;
        struct scsipi_periph *periph;
        int unit;

        unit = SDUNIT(dev);
        if ((sd = device_lookup_private(&sd_cd, unit)) == NULL)
                return (ENXIO);
        dksc = &sd->sc_dksc;

        if (!device_is_active(dksc->sc_dev))
                return (ENODEV);

        periph = sd->sc_periph;

        /* Make sure it was initialized. */
        if ((periph->periph_flags & PERIPH_MEDIA_LOADED) == 0)
                return (ENXIO);

        return dk_dump(dksc, dev, blkno, va, size, 0);
}

static int
sd_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
{
        struct sd_softc *sd = device_private(dev);
        struct dk_softc *dksc = &sd->sc_dksc;
        struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
        struct scsipi_rw_10 cmd;        /* write command */
        struct scsipi_xfer *xs;                /* ... convenience */
        struct scsipi_periph *periph;
        struct scsipi_channel *chan;
        size_t sectorsize;

        periph = sd->sc_periph;
        chan = periph->periph_channel;

        sectorsize = dg->dg_secsize;

        xs = &sx;

#ifndef        SD_DUMP_NOT_TRUSTED
        /*
         *  Fill out the scsi command
         */
        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = WRITE_10;
        _lto4b(blkno, cmd.addr);
        _lto2b(nblk, cmd.length);
        /*
         * Fill out the scsipi_xfer structure
         *    Note: we cannot sleep as we may be an interrupt
         * don't use scsipi_command() as it may want to wait
         * for an xs.
         */
        memset(xs, 0, sizeof(sx));
        xs->xs_control |= XS_CTL_NOSLEEP | XS_CTL_POLL |
            XS_CTL_DATA_OUT;
        xs->xs_status = 0;
        xs->xs_periph = periph;
        xs->xs_retries = SDRETRIES;
        xs->timeout = 10000;        /* 10000 millisecs for a disk ! */
        xs->cmd = (struct scsipi_generic *)&cmd;
        xs->cmdlen = sizeof(cmd);
        xs->resid = nblk * sectorsize;
        xs->error = XS_NOERROR;
        xs->bp = 0;
        xs->data = va;
        xs->datalen = nblk * sectorsize;
        callout_init(&xs->xs_callout, 0);

        /*
         * Pass all this info to the scsi driver.
         */
        scsipi_adapter_request(chan, ADAPTER_REQ_RUN_XFER, xs);
        if ((xs->xs_status & XS_STS_DONE) == 0 ||
            xs->error != XS_NOERROR)
                return (EIO);
#else        /* SD_DUMP_NOT_TRUSTED */
        /* Let's just talk about this first... */
        printf("sd%d: dump addr 0x%x, blk %d\n", unit, va, blkno);
        delay(500 * 1000);        /* half a second */
#endif        /* SD_DUMP_NOT_TRUSTED */

        return (0);
}

static int
sd_mode_sense(struct sd_softc *sd, u_int8_t byte2, void *sense, size_t size,
    int page, int flags, int *big)
{

        if ((sd->sc_periph->periph_quirks & PQUIRK_ONLYBIG) &&
            !(sd->sc_periph->periph_quirks & PQUIRK_NOBIGMODESENSE)) {
                *big = 1;
                return scsipi_mode_sense_big(sd->sc_periph, byte2, page, sense,
                    size + sizeof(struct scsi_mode_parameter_header_10),
                    flags, SDRETRIES, 6000);
        } else {
                *big = 0;
                return scsipi_mode_sense(sd->sc_periph, byte2, page, sense,
                    size + sizeof(struct scsi_mode_parameter_header_6),
                    flags, SDRETRIES, 6000);
        }
}

static int
sd_mode_select(struct sd_softc *sd, u_int8_t byte2, void *sense, size_t size,
    int flags, int big)
{

        if (big) {
                struct scsi_mode_parameter_header_10 *header = sense;

                _lto2b(0, header->data_length);
                return scsipi_mode_select_big(sd->sc_periph, byte2, sense,
                    size + sizeof(struct scsi_mode_parameter_header_10),
                    flags, SDRETRIES, 6000);
        } else {
                struct scsi_mode_parameter_header_6 *header = sense;

                header->data_length = 0;
                return scsipi_mode_select(sd->sc_periph, byte2, sense,
                    size + sizeof(struct scsi_mode_parameter_header_6),
                    flags, SDRETRIES, 6000);
        }
}

/*
 * sd_validate_blksize:
 *
 *        Validate the block size.  Print error if periph is specified, 
 */
static int
sd_validate_blksize(struct scsipi_periph *periph, int len)
{

        if (len >= 256 && powerof2(len) && len <= 4096) {
                return 1;
        }

        if (periph) {
                scsipi_printaddr(periph);
                printf("%s sector size: 0x%x.  Defaulting to %d bytes.\n",
                    !powerof2(len) ?
                    "preposterous" : "unsupported",
                    len, SD_DEFAULT_BLKSIZE);
        }

        return 0;
}

/*
 * sd_read_capacity:
 *
 *        Find out from the device what its capacity is.
 */
static u_int64_t
sd_read_capacity(struct scsipi_periph *periph, int *blksize, int flags)
{
        union {
                struct scsipi_read_capacity_10 cmd;
                struct scsipi_read_capacity_16 cmd16;
        } cmd;
        union {
                struct scsipi_read_capacity_10_data data;
                struct scsipi_read_capacity_16_data data16;
        } *datap;
        uint64_t rv;

        memset(&cmd, 0, sizeof(cmd));
        cmd.cmd.opcode = READ_CAPACITY_10;

        /*
         * Don't allocate data buffer on stack;
         * The lower driver layer might use the same stack and
         * if it uses region which is in the same cacheline,
         * cache flush ops against the data buffer won't work properly.
         */
        datap = malloc(sizeof(*datap), M_TEMP, M_WAITOK);
        if (datap == NULL)
                return 0;

        /*
         * If the command works, interpret the result as a 4 byte
         * number of blocks
         */
        rv = 0;
        memset(datap, 0, sizeof(datap->data));
        if (scsipi_command(periph, (void *)&cmd.cmd, sizeof(cmd.cmd),
            (void *)datap, sizeof(datap->data), SCSIPIRETRIES, 20000, NULL,
            flags | XS_CTL_DATA_IN | XS_CTL_SILENT) != 0)
                goto out;

        if (_4btol(datap->data.addr) != 0xffffffff) {
                *blksize = _4btol(datap->data.length);
                rv = _4btol(datap->data.addr) + 1;
                goto out;
        }

        /*
         * Device is larger than can be reflected by READ CAPACITY (10).
         * Try READ CAPACITY (16).
         */

        memset(&cmd, 0, sizeof(cmd));
        cmd.cmd16.opcode = READ_CAPACITY_16;
        cmd.cmd16.byte2 = SRC16_SERVICE_ACTION;
        _lto4b(sizeof(datap->data16), cmd.cmd16.len);

        memset(datap, 0, sizeof(datap->data16));
        if (scsipi_command(periph, (void *)&cmd.cmd16, sizeof(cmd.cmd16),
            (void *)datap, sizeof(datap->data16), SCSIPIRETRIES, 20000, NULL,
            flags | XS_CTL_DATA_IN | XS_CTL_SILENT) != 0)
                goto out;

        *blksize = _4btol(datap->data16.length);
        rv = _8btol(datap->data16.addr) + 1;

 out:
        free(datap, M_TEMP);
        return rv;
}

static int
sd_get_simplifiedparms(struct sd_softc *sd, struct disk_parms *dp, int flags)
{
        struct {
                struct scsi_mode_parameter_header_6 header;
                /* no block descriptor */
                u_int8_t pg_code; /* page code (should be 6) */
                u_int8_t pg_length; /* page length (should be 11) */
                u_int8_t wcd; /* bit0: cache disable */
                u_int8_t lbs[2]; /* logical block size */
                u_int8_t size[5]; /* number of log. blocks */
                u_int8_t pp; /* power/performance */
                u_int8_t flags;
                u_int8_t resvd;
        } scsipi_sense;
        u_int64_t blocks;
        int error, blksize;

        /*
         * sd_read_capacity (ie "read capacity") and mode sense page 6
         * give the same information. Do both for now, and check
         * for consistency.
         * XXX probably differs for removable media
         */
        dp->blksize = SD_DEFAULT_BLKSIZE;
        if ((blocks = sd_read_capacity(sd->sc_periph, &blksize, flags)) == 0)
                return (SDGP_RESULT_OFFLINE);                /* XXX? */

        error = scsipi_mode_sense(sd->sc_periph, SMS_DBD, 6,
            &scsipi_sense.header, sizeof(scsipi_sense),
            flags, SDRETRIES, 6000);

        if (error != 0)
                return (SDGP_RESULT_OFFLINE);                /* XXX? */

        dp->blksize = blksize;
        if (!sd_validate_blksize(NULL, dp->blksize))
                dp->blksize = _2btol(scsipi_sense.lbs);
        if (!sd_validate_blksize(sd->sc_periph, dp->blksize))
                dp->blksize = SD_DEFAULT_BLKSIZE;

        /*
         * Create a pseudo-geometry.
         */
        dp->heads = 64;
        dp->sectors = 32;
        dp->cyls = blocks / (dp->heads * dp->sectors);
        dp->disksize = _5btol(scsipi_sense.size);
        if (dp->disksize <= UINT32_MAX && dp->disksize != blocks) {
                printf("RBC size: mode sense=%llu, get cap=%llu\n",
                       (unsigned long long)dp->disksize,
                       (unsigned long long)blocks);
                dp->disksize = blocks;
        }
        dp->disksize512 = (dp->disksize * dp->blksize) / DEV_BSIZE;

        return (SDGP_RESULT_OK);
}

/*
 * Get the scsi driver to send a full inquiry to the * device and use the
 * results to fill out the disk parameter structure.
 */
static int
sd_get_capacity(struct sd_softc *sd, struct disk_parms *dp, int flags)
{
        u_int64_t blocks;
        int error, blksize;
#if 0
        int i;
        u_int8_t *p;
#endif

        dp->disksize = blocks = sd_read_capacity(sd->sc_periph, &blksize,
            flags);
        if (blocks == 0) {
                struct scsipi_read_format_capacities cmd;
                struct {
                        struct scsipi_capacity_list_header header;
                        struct scsipi_capacity_descriptor desc;
                } __packed data;

                memset(&cmd, 0, sizeof(cmd));
                memset(&data, 0, sizeof(data));
                cmd.opcode = READ_FORMAT_CAPACITIES;
                _lto2b(sizeof(data), cmd.length);

                error = scsipi_command(sd->sc_periph,
                    (void *)&cmd, sizeof(cmd), (void *)&data, sizeof(data),
                    SDRETRIES, 20000, NULL,
                    flags | XS_CTL_DATA_IN);
                if (error == EFTYPE) {
                        /* Medium Format Corrupted, handle as not formatted */
                        return (SDGP_RESULT_UNFORMATTED);
                }
                if (error || data.header.length == 0)
                        return (SDGP_RESULT_OFFLINE);

#if 0
printf("rfc: length=%d\n", data.header.length);
printf("rfc result:"); for (i = sizeof(struct scsipi_capacity_list_header) + data.header.length, p = (void *)&data; i; i--, p++) printf(" %02x", *p); printf("\n");
#endif
                switch (data.desc.byte5 & SCSIPI_CAP_DESC_CODE_MASK) {
                case SCSIPI_CAP_DESC_CODE_RESERVED:
                case SCSIPI_CAP_DESC_CODE_FORMATTED:
                        break;

                case SCSIPI_CAP_DESC_CODE_UNFORMATTED:
                        return (SDGP_RESULT_UNFORMATTED);

                case SCSIPI_CAP_DESC_CODE_NONE:
                        return (SDGP_RESULT_OFFLINE);
                }

                dp->disksize = blocks = _4btol(data.desc.nblks);
                if (blocks == 0)
                        return (SDGP_RESULT_OFFLINE);                /* XXX? */

                blksize = _3btol(data.desc.blklen);

        } else if (!sd_validate_blksize(NULL, blksize)) {
                struct sd_mode_sense_data scsipi_sense;
                int big, bsize;
                struct scsi_general_block_descriptor *bdesc;

                memset(&scsipi_sense, 0, sizeof(scsipi_sense));
                error = sd_mode_sense(sd, 0, &scsipi_sense,
                    sizeof(scsipi_sense.blk_desc), 0, flags | XS_CTL_SILENT, &big);
                if (!error) {
                        if (big) {
                                bdesc = (void *)(&scsipi_sense.header.big + 1);
                                bsize = _2btol(scsipi_sense.header.big.blk_desc_len);
                        } else {
                                bdesc = (void *)(&scsipi_sense.header.small + 1);
                                bsize = scsipi_sense.header.small.blk_desc_len;
                        }

#if 0
printf("page 0 sense:"); for (i = sizeof(scsipi_sense), p = (void *)&scsipi_sense; i; i--, p++) printf(" %02x", *p); printf("\n");
printf("page 0 bsize=%d\n", bsize);
printf("page 0 ok\n");
#endif

                        if (bsize >= 8) {
                                blksize = _3btol(bdesc->blklen);
                        }
                }
        }

        if (!sd_validate_blksize(sd->sc_periph, blksize))
                blksize = SD_DEFAULT_BLKSIZE;

        dp->blksize = blksize;
        dp->disksize512 = (blocks * dp->blksize) / DEV_BSIZE;
        return (0);
}

static int
sd_get_parms_page4(struct sd_softc *sd, struct disk_parms *dp, int flags)
{
        struct sd_mode_sense_data scsipi_sense;
        int error;
        int big, byte2;
        size_t poffset;
        union scsi_disk_pages *pages;

        byte2 = SMS_DBD;
again:
        memset(&scsipi_sense, 0, sizeof(scsipi_sense));
        error = sd_mode_sense(sd, byte2, &scsipi_sense,
            (byte2 ? 0 : sizeof(scsipi_sense.blk_desc)) +
            sizeof(scsipi_sense.pages.rigid_geometry), 4,
            flags | XS_CTL_SILENT, &big);
        if (error) {
                if (byte2 == SMS_DBD) {
                        /* No result; try once more with DBD off */
                        byte2 = 0;
                        goto again;
                }
                return (error);
        }

        if (big) {
                poffset = sizeof scsipi_sense.header.big;
                poffset += _2btol(scsipi_sense.header.big.blk_desc_len);
        } else {
                poffset = sizeof scsipi_sense.header.small;
                poffset += scsipi_sense.header.small.blk_desc_len;
        }

        if (poffset > sizeof(scsipi_sense) - sizeof(pages->rigid_geometry))
                return ERESTART;

        pages = (void *)((u_long)&scsipi_sense + poffset);
#if 0
        {
                size_t i;
                u_int8_t *p;

                printf("page 4 sense:");
                for (i = sizeof(scsipi_sense), p = (void *)&scsipi_sense; i;
                    i--, p++)
                        printf(" %02x", *p);
                printf("\n");
                printf("page 4 pg_code=%d sense=%p/%p\n",
                    pages->rigid_geometry.pg_code, &scsipi_sense, pages);
        }
#endif

        if ((pages->rigid_geometry.pg_code & PGCODE_MASK) != 4)
                return (ERESTART);

        SC_DEBUG(sd->sc_periph, SCSIPI_DB3,
            ("%d cyls, %d heads, %d precomp, %d red_write, %d land_zone\n",
            _3btol(pages->rigid_geometry.ncyl),
            pages->rigid_geometry.nheads,
            _2btol(pages->rigid_geometry.st_cyl_wp),
            _2btol(pages->rigid_geometry.st_cyl_rwc),
            _2btol(pages->rigid_geometry.land_zone)));

        /*
         * KLUDGE!! (for zone recorded disks)
         * give a number of sectors so that sec * trks * cyls
         * is <= disk_size
         * can lead to wasted space! THINK ABOUT THIS !
         */
        dp->heads = pages->rigid_geometry.nheads;
        dp->cyls = _3btol(pages->rigid_geometry.ncyl);
        if (dp->heads == 0 || dp->cyls == 0)
                return (ERESTART);
        dp->sectors = dp->disksize / (dp->heads * dp->cyls);        /* XXX */

        dp->rot_rate = _2btol(pages->rigid_geometry.rpm);
        if (dp->rot_rate == 0)
                dp->rot_rate = 3600;

#if 0
printf("page 4 ok\n");
#endif
        return (0);
}

static int
sd_get_parms_page5(struct sd_softc *sd, struct disk_parms *dp, int flags)
{
        struct sd_mode_sense_data scsipi_sense;
        int error;
        int big, byte2;
        size_t poffset;
        union scsi_disk_pages *pages;

        byte2 = SMS_DBD;
again:
        memset(&scsipi_sense, 0, sizeof(scsipi_sense));
        error = sd_mode_sense(sd, 0, &scsipi_sense,
            (byte2 ? 0 : sizeof(scsipi_sense.blk_desc)) +
            sizeof(scsipi_sense.pages.flex_geometry), 5,
            flags | XS_CTL_SILENT, &big);
        if (error) {
                if (byte2 == SMS_DBD) {
                        /* No result; try once more with DBD off */
                        byte2 = 0;
                        goto again;
                }
                return (error);
        }

        if (big) {
                poffset = sizeof scsipi_sense.header.big;
                poffset += _2btol(scsipi_sense.header.big.blk_desc_len);
        } else {
                poffset = sizeof scsipi_sense.header.small;
                poffset += scsipi_sense.header.small.blk_desc_len;
        }

        if (poffset > sizeof(scsipi_sense) - sizeof(pages->flex_geometry))
                return ERESTART;

        pages = (void *)((u_long)&scsipi_sense + poffset);
#if 0
        {
                size_t i;
                u_int8_t *p;

                printf("page 5 sense:");
                for (i = sizeof(scsipi_sense), p = (void *)&scsipi_sense; i;
                    i--, p++)
                        printf(" %02x", *p);
                printf("\n");
                printf("page 5 pg_code=%d sense=%p/%p\n",
                    pages->flex_geometry.pg_code, &scsipi_sense, pages);
        }
#endif

        if ((pages->flex_geometry.pg_code & PGCODE_MASK) != 5)
                return (ERESTART);

        SC_DEBUG(sd->sc_periph, SCSIPI_DB3,
            ("%d cyls, %d heads, %d sec, %d bytes/sec\n",
            _3btol(pages->flex_geometry.ncyl),
            pages->flex_geometry.nheads,
            pages->flex_geometry.ph_sec_tr,
            _2btol(pages->flex_geometry.bytes_s)));

        dp->heads = pages->flex_geometry.nheads;
        dp->cyls = _2btol(pages->flex_geometry.ncyl);
        dp->sectors = pages->flex_geometry.ph_sec_tr;
        if (dp->heads == 0 || dp->cyls == 0 || dp->sectors == 0)
                return (ERESTART);

        dp->rot_rate = _2btol(pages->rigid_geometry.rpm);
        if (dp->rot_rate == 0)
                dp->rot_rate = 3600;

#if 0
printf("page 5 ok\n");
#endif
        return (0);
}

static int
sd_get_parms(struct sd_softc *sd, struct disk_parms *dp, int flags)
{
        struct dk_softc *dksc = &sd->sc_dksc;
        int error;

        /*
         * If offline, the SDEV_MEDIA_LOADED flag will be
         * cleared by the caller if necessary.
         */
        if (sd->type == T_SIMPLE_DIRECT) {
                error = sd_get_simplifiedparms(sd, dp, flags);
                if (!error)
                        goto setprops;
                return (error);
        }

        error = sd_get_capacity(sd, dp, flags);
        if (error)
                return (error);

        if (sd->type == T_OPTICAL)
                goto page0;

        if (sd->sc_periph->periph_flags & PERIPH_REMOVABLE) {
                if (!sd_get_parms_page5(sd, dp, flags) ||
                    !sd_get_parms_page4(sd, dp, flags))
                        goto setprops;
        } else {
                if (!sd_get_parms_page4(sd, dp, flags) ||
                    !sd_get_parms_page5(sd, dp, flags))
                        goto setprops;
        }

page0:
        printf("%s: fabricating a geometry\n", dksc->sc_xname);
        /* Try calling driver's method for figuring out geometry. */
        if (!sd->sc_periph->periph_channel->chan_adapter->adapt_getgeom ||
            !(*sd->sc_periph->periph_channel->chan_adapter->adapt_getgeom)
                (sd->sc_periph, dp, dp->disksize)) {
                /*
                 * Use adaptec standard fictitious geometry
                 * this depends on which controller (e.g. 1542C is
                 * different. but we have to put SOMETHING here..)
                 */
                dp->heads = 64;
                dp->sectors = 32;
                dp->cyls = dp->disksize / (64 * 32);
        }
        dp->rot_rate = 3600;

setprops:
        sd_set_geometry(sd);

        return (SDGP_RESULT_OK);
}

static int
sd_flush(struct sd_softc *sd, int flags)
{
        struct scsipi_periph *periph = sd->sc_periph;
        struct scsi_synchronize_cache_10 cmd;

        /*
         * If the device is SCSI-2, issue a SYNCHRONIZE CACHE.
         * We issue with address 0 length 0, which should be
         * interpreted by the device as "all remaining blocks
         * starting at address 0".  We ignore ILLEGAL REQUEST
         * in the event that the command is not supported by
         * the device, and poll for completion so that we know
         * that the cache has actually been flushed.
         *
         * Unless, that is, the device can't handle the SYNCHRONIZE CACHE
         * command, as indicated by our quirks flags.
         *
         * XXX What about older devices?
         */
        if (periph->periph_version < 2 ||
            (periph->periph_quirks & PQUIRK_NOSYNCCACHE))
                return (0);

        sd->flags |= SDF_FLUSHING;
        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = SCSI_SYNCHRONIZE_CACHE_10;

        return (scsipi_command(periph, (void *)&cmd, sizeof(cmd), 0, 0,
            SDRETRIES, 100000, NULL, flags | XS_CTL_IGNORE_ILLEGAL_REQUEST));
}

static int
sd_getcache(struct sd_softc *sd, int *bitsp)
{
        struct scsipi_periph *periph = sd->sc_periph;
        struct sd_mode_sense_data scsipi_sense;
        int error, bits = 0;
        int big;
        union scsi_disk_pages *pages;
        uint8_t dev_spec;

        /* only SCSI-2 and later supported */
        if (periph->periph_version < 2)
                return (EOPNOTSUPP);

        memset(&scsipi_sense, 0, sizeof(scsipi_sense));
        error = sd_mode_sense(sd, SMS_DBD, &scsipi_sense,
            sizeof(scsipi_sense.pages.caching_params), 8, XS_CTL_SILENT, &big);
        if (error)
                return (error);

        if (big) {
                pages = (void *)(&scsipi_sense.header.big + 1);
                dev_spec = scsipi_sense.header.big.dev_spec;
        } else {
                pages = (void *)(&scsipi_sense.header.small + 1);
                dev_spec = scsipi_sense.header.small.dev_spec;
        }

        if ((pages->caching_params.flags & CACHING_RCD) == 0)
                bits |= DKCACHE_READ;
        if (pages->caching_params.flags & CACHING_WCE)
                bits |= DKCACHE_WRITE;
        if (pages->caching_params.pg_code & PGCODE_PS)
                bits |= DKCACHE_SAVE;

        /*
         * Support for FUA/DPO, defined starting with SCSI-2. Use only
         * if device claims to support it, according to the MODE SENSE.
         */
        if (!(periph->periph_quirks & PQUIRK_NOFUA) &&
            ISSET(dev_spec, SMH_DSP_DPOFUA))
                bits |= DKCACHE_FUA | DKCACHE_DPO;

        memset(&scsipi_sense, 0, sizeof(scsipi_sense));
        error = sd_mode_sense(sd, SMS_DBD, &scsipi_sense,
            sizeof(scsipi_sense.pages.caching_params),
            SMS_PCTRL_CHANGEABLE|8, XS_CTL_SILENT, &big);
        if (error == 0) {
                if (big)
                        pages = (void *)(&scsipi_sense.header.big + 1);
                else
                        pages = (void *)(&scsipi_sense.header.small + 1);

                if (pages->caching_params.flags & CACHING_RCD)
                        bits |= DKCACHE_RCHANGE;
                if (pages->caching_params.flags & CACHING_WCE)
                        bits |= DKCACHE_WCHANGE;
        }

        *bitsp = bits;

        return (0);
}

static int
sd_setcache(struct sd_softc *sd, int bits)
{
        struct scsipi_periph *periph = sd->sc_periph;
        struct sd_mode_sense_data scsipi_sense;
        int error;
        uint8_t oflags, byte2 = 0;
        int big;
        union scsi_disk_pages *pages;

        if (periph->periph_version < 2)
                return (EOPNOTSUPP);

        memset(&scsipi_sense, 0, sizeof(scsipi_sense));
        error = sd_mode_sense(sd, SMS_DBD, &scsipi_sense,
            sizeof(scsipi_sense.pages.caching_params), 8, 0, &big);
        if (error)
                return (error);

        if (big)
                pages = (void *)(&scsipi_sense.header.big + 1);
        else
                pages = (void *)(&scsipi_sense.header.small + 1);

        oflags = pages->caching_params.flags;

        if (bits & DKCACHE_READ)
                pages->caching_params.flags &= ~CACHING_RCD;
        else
                pages->caching_params.flags |= CACHING_RCD;

        if (bits & DKCACHE_WRITE)
                pages->caching_params.flags |= CACHING_WCE;
        else
                pages->caching_params.flags &= ~CACHING_WCE;

        if (oflags == pages->caching_params.flags)
                return (0);

        pages->caching_params.pg_code &= PGCODE_MASK;

        if (bits & DKCACHE_SAVE)
                byte2 |= SMS_SP;

        return (sd_mode_select(sd, byte2|SMS_PF, &scsipi_sense,
            sizeof(struct scsi_mode_page_header) +
            pages->caching_params.pg_length, 0, big));
}

static void
sd_set_geometry(struct sd_softc *sd)
{
        struct dk_softc *dksc = &sd->sc_dksc;
        struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;

        memset(dg, 0, sizeof(*dg));

        dg->dg_secperunit = sd->params.disksize;
        dg->dg_secsize = sd->params.blksize;
        dg->dg_nsectors = sd->params.sectors;
        dg->dg_ntracks = sd->params.heads;
        dg->dg_ncylinders = sd->params.cyls;

        disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, sd->typename);
}











































































































































































































  591 




















































































  651 




  214 
  214 



  450 













   40 







   41 


   41 


   40 















  165 
  165 


  164 









    2 





  164 



  164 

   71 

  153 




   71 




















   50 
   50 


   50 
















  120 


  120 

















   94 



   94 
   23 
   93 












   94 





















  121 
  120 
  123 



  122 

   29 
  122 

   62 


   99 
  121 

































































   62 


   61 














































  196 
  195 


  196 



























   27 


   27 












  322 


  320 












    3 


    3 






































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
/*        $NetBSD: kern_timeout.c,v 1.70 2022/06/29 22:27:01 riastradh Exp $        */

/*-
 * Copyright (c) 2003, 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org>
 * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.70 2022/06/29 22:27:01 riastradh Exp $");

/*
 * Timeouts are kept in a hierarchical timing wheel.  The c_time is the
 * value of c_cpu->cc_ticks when the timeout should be called.  There are
 * four levels with 256 buckets each. See 'Scheme 7' in "Hashed and
 * Hierarchical Timing Wheels: Efficient Data Structures for Implementing
 * a Timer Facility" by George Varghese and Tony Lauck.
 *
 * Some of the "math" in here is a bit tricky.  We have to beware of
 * wrapping ints.
 *
 * We use the fact that any element added to the queue must be added with
 * a positive time.  That means that any element `to' on the queue cannot
 * be scheduled to timeout further in time than INT_MAX, but c->c_time can
 * be positive or negative so comparing it with anything is dangerous. 
 * The only way we can use the c->c_time value in any predictable way is
 * when we calculate how far in the future `to' will timeout - "c->c_time
 * - c->c_cpu->cc_ticks".  The result will always be positive for future
 * timeouts and 0 or negative for due timeouts.
 */

#define        _CALLOUT_PRIVATE

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/callout.h>
#include <sys/lwp.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sleepq.h>
#include <sys/syncobj.h>
#include <sys/evcnt.h>
#include <sys/intr.h>
#include <sys/cpu.h>
#include <sys/kmem.h>

#ifdef DDB
#include <machine/db_machdep.h>
#include <ddb/db_interface.h>
#include <ddb/db_access.h>
#include <ddb/db_cpu.h>
#include <ddb/db_sym.h>
#include <ddb/db_output.h>
#endif

#define BUCKETS                1024
#define WHEELSIZE        256
#define WHEELMASK        255
#define WHEELBITS        8

#define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK)

#define BUCKET(cc, rel, abs)                                                \
    (((rel) <= (1 << (2*WHEELBITS)))                                        \
            ? ((rel) <= (1 << WHEELBITS))                                        \
            ? &(cc)->cc_wheel[MASKWHEEL(0, (abs))]                        \
            : &(cc)->cc_wheel[MASKWHEEL(1, (abs)) + WHEELSIZE]                \
        : ((rel) <= (1 << (3*WHEELBITS)))                                \
            ? &(cc)->cc_wheel[MASKWHEEL(2, (abs)) + 2*WHEELSIZE]        \
            : &(cc)->cc_wheel[MASKWHEEL(3, (abs)) + 3*WHEELSIZE])

#define MOVEBUCKET(cc, wheel, time)                                        \
    CIRCQ_APPEND(&(cc)->cc_todo,                                        \
        &(cc)->cc_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE])

/*
 * Circular queue definitions.
 */

#define CIRCQ_INIT(list)                                                \
do {                                                                        \
        (list)->cq_next_l = (list);                                        \
        (list)->cq_prev_l = (list);                                        \
} while (/*CONSTCOND*/0)

#define CIRCQ_INSERT(elem, list)                                        \
do {                                                                        \
        (elem)->cq_prev_e = (list)->cq_prev_e;                                \
        (elem)->cq_next_l = (list);                                        \
        (list)->cq_prev_l->cq_next_l = (elem);                                \
        (list)->cq_prev_l = (elem);                                        \
} while (/*CONSTCOND*/0)

#define CIRCQ_APPEND(fst, snd)                                                \
do {                                                                        \
        if (!CIRCQ_EMPTY(snd)) {                                        \
                (fst)->cq_prev_l->cq_next_l = (snd)->cq_next_l;                \
                (snd)->cq_next_l->cq_prev_l = (fst)->cq_prev_l;                \
                (snd)->cq_prev_l->cq_next_l = (fst);                        \
                (fst)->cq_prev_l = (snd)->cq_prev_l;                        \
                CIRCQ_INIT(snd);                                        \
        }                                                                \
} while (/*CONSTCOND*/0)

#define CIRCQ_REMOVE(elem)                                                \
do {                                                                        \
        (elem)->cq_next_l->cq_prev_e = (elem)->cq_prev_e;                \
        (elem)->cq_prev_l->cq_next_e = (elem)->cq_next_e;                \
} while (/*CONSTCOND*/0)

#define CIRCQ_FIRST(list)        ((list)->cq_next_e)
#define CIRCQ_NEXT(elem)        ((elem)->cq_next_e)
#define CIRCQ_LAST(elem,list)        ((elem)->cq_next_l == (list))
#define CIRCQ_EMPTY(list)        ((list)->cq_next_l == (list))

struct callout_cpu {
        kmutex_t        *cc_lock;
        sleepq_t        cc_sleepq;
        u_int                cc_nwait;
        u_int                cc_ticks;
        lwp_t                *cc_lwp;
        callout_impl_t        *cc_active;
        callout_impl_t        *cc_cancel;
        struct evcnt        cc_ev_late;
        struct evcnt        cc_ev_block;
        struct callout_circq cc_todo;                /* Worklist */
        struct callout_circq cc_wheel[BUCKETS];        /* Queues of timeouts */
        char                cc_name1[12];
        char                cc_name2[12];
};

#ifdef DDB
static struct callout_cpu ccb;
#endif

#ifndef CRASH /* _KERNEL */
static void        callout_softclock(void *);
static void        callout_wait(callout_impl_t *, void *, kmutex_t *);

static struct callout_cpu callout_cpu0 __cacheline_aligned;
static void *callout_sih __read_mostly;

static inline kmutex_t *
callout_lock(callout_impl_t *c)
{
        struct callout_cpu *cc;
        kmutex_t *lock;

        for (;;) {
                cc = c->c_cpu;
                lock = cc->cc_lock;
                mutex_spin_enter(lock);
                if (__predict_true(cc == c->c_cpu))
                        return lock;
                mutex_spin_exit(lock);
        }
}

/*
 * callout_startup:
 *
 *        Initialize the callout facility, called at system startup time.
 *        Do just enough to allow callouts to be safely registered.
 */
void
callout_startup(void)
{
        struct callout_cpu *cc;
        int b;

        KASSERT(curcpu()->ci_data.cpu_callout == NULL);

        cc = &callout_cpu0;
        cc->cc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
        CIRCQ_INIT(&cc->cc_todo);
        for (b = 0; b < BUCKETS; b++)
                CIRCQ_INIT(&cc->cc_wheel[b]);
        curcpu()->ci_data.cpu_callout = cc;
}

/*
 * callout_init_cpu:
 *
 *        Per-CPU initialization.
 */
CTASSERT(sizeof(callout_impl_t) <= sizeof(callout_t));

void
callout_init_cpu(struct cpu_info *ci)
{
        struct callout_cpu *cc;
        int b;

        if ((cc = ci->ci_data.cpu_callout) == NULL) {
                cc = kmem_zalloc(sizeof(*cc), KM_SLEEP);
                cc->cc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
                CIRCQ_INIT(&cc->cc_todo);
                for (b = 0; b < BUCKETS; b++)
                        CIRCQ_INIT(&cc->cc_wheel[b]);
        } else {
                /* Boot CPU, one time only. */
                callout_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE,
                    callout_softclock, NULL);
                if (callout_sih == NULL)
                        panic("callout_init_cpu (2)");
        }

        sleepq_init(&cc->cc_sleepq);

        snprintf(cc->cc_name1, sizeof(cc->cc_name1), "late/%u",
            cpu_index(ci));
        evcnt_attach_dynamic(&cc->cc_ev_late, EVCNT_TYPE_MISC,
            NULL, "callout", cc->cc_name1);

        snprintf(cc->cc_name2, sizeof(cc->cc_name2), "wait/%u",
            cpu_index(ci));
        evcnt_attach_dynamic(&cc->cc_ev_block, EVCNT_TYPE_MISC,
            NULL, "callout", cc->cc_name2);

        ci->ci_data.cpu_callout = cc;
}

/*
 * callout_init:
 *
 *        Initialize a callout structure.  This must be quick, so we fill
 *        only the minimum number of fields.
 */
void
callout_init(callout_t *cs, u_int flags)
{
        callout_impl_t *c = (callout_impl_t *)cs;
        struct callout_cpu *cc;

        KASSERT((flags & ~CALLOUT_FLAGMASK) == 0);

        cc = curcpu()->ci_data.cpu_callout;
        c->c_func = NULL;
        c->c_magic = CALLOUT_MAGIC;
        if (__predict_true((flags & CALLOUT_MPSAFE) != 0 && cc != NULL)) {
                c->c_flags = flags;
                c->c_cpu = cc;
                return;
        }
        c->c_flags = flags | CALLOUT_BOUND;
        c->c_cpu = &callout_cpu0;
}

/*
 * callout_destroy:
 *
 *        Destroy a callout structure.  The callout must be stopped.
 */
void
callout_destroy(callout_t *cs)
{
        callout_impl_t *c = (callout_impl_t *)cs;

        KASSERTMSG(c->c_magic == CALLOUT_MAGIC,
            "callout %p: c_magic (%#x) != CALLOUT_MAGIC (%#x)",
            c, c->c_magic, CALLOUT_MAGIC);
        /*
         * It's not necessary to lock in order to see the correct value
         * of c->c_flags.  If the callout could potentially have been
         * running, the current thread should have stopped it.
         */
        KASSERTMSG((c->c_flags & CALLOUT_PENDING) == 0,
            "pending callout %p: c_func (%p) c_flags (%#x) destroyed from %p",
            c, c->c_func, c->c_flags, __builtin_return_address(0));
        KASSERTMSG(c->c_cpu->cc_lwp == curlwp || c->c_cpu->cc_active != c,
            "running callout %p: c_func (%p) c_flags (%#x) destroyed from %p",
            c, c->c_func, c->c_flags, __builtin_return_address(0));
        c->c_magic = 0;
}

/*
 * callout_schedule_locked:
 *
 *        Schedule a callout to run.  The function and argument must
 *        already be set in the callout structure.  Must be called with
 *        callout_lock.
 */
static void
callout_schedule_locked(callout_impl_t *c, kmutex_t *lock, int to_ticks)
{
        struct callout_cpu *cc, *occ;
        int old_time;

        KASSERT(to_ticks >= 0);
        KASSERT(c->c_func != NULL);

        /* Initialize the time here, it won't change. */
        occ = c->c_cpu;
        c->c_flags &= ~(CALLOUT_FIRED | CALLOUT_INVOKING);

        /*
         * If this timeout is already scheduled and now is moved
         * earlier, reschedule it now.  Otherwise leave it in place
         * and let it be rescheduled later.
         */
        if ((c->c_flags & CALLOUT_PENDING) != 0) {
                /* Leave on existing CPU. */
                old_time = c->c_time;
                c->c_time = to_ticks + occ->cc_ticks;
                if (c->c_time - old_time < 0) {
                        CIRCQ_REMOVE(&c->c_list);
                        CIRCQ_INSERT(&c->c_list, &occ->cc_todo);
                }
                mutex_spin_exit(lock);
                return;
        }

        cc = curcpu()->ci_data.cpu_callout;
        if ((c->c_flags & CALLOUT_BOUND) != 0 || cc == occ ||
            !mutex_tryenter(cc->cc_lock)) {
                /* Leave on existing CPU. */
                c->c_time = to_ticks + occ->cc_ticks;
                c->c_flags |= CALLOUT_PENDING;
                CIRCQ_INSERT(&c->c_list, &occ->cc_todo);
        } else {
                /* Move to this CPU. */
                c->c_cpu = cc;
                c->c_time = to_ticks + cc->cc_ticks;
                c->c_flags |= CALLOUT_PENDING;
                CIRCQ_INSERT(&c->c_list, &cc->cc_todo);
                mutex_spin_exit(cc->cc_lock);
        }
        mutex_spin_exit(lock);
}

/*
 * callout_reset:
 *
 *        Reset a callout structure with a new function and argument, and
 *        schedule it to run.
 */
void
callout_reset(callout_t *cs, int to_ticks, void (*func)(void *), void *arg)
{
        callout_impl_t *c = (callout_impl_t *)cs;
        kmutex_t *lock;

        KASSERT(c->c_magic == CALLOUT_MAGIC);
        KASSERT(func != NULL);

        lock = callout_lock(c);
        c->c_func = func;
        c->c_arg = arg;
        callout_schedule_locked(c, lock, to_ticks);
}

/*
 * callout_schedule:
 *
 *        Schedule a callout to run.  The function and argument must
 *        already be set in the callout structure.
 */
void
callout_schedule(callout_t *cs, int to_ticks)
{
        callout_impl_t *c = (callout_impl_t *)cs;
        kmutex_t *lock;

        KASSERT(c->c_magic == CALLOUT_MAGIC);

        lock = callout_lock(c);
        callout_schedule_locked(c, lock, to_ticks);
}

/*
 * callout_stop:
 *
 *        Try to cancel a pending callout.  It may be too late: the callout
 *        could be running on another CPU.  If called from interrupt context,
 *        the callout could already be in progress at a lower priority.
 */
bool
callout_stop(callout_t *cs)
{
        callout_impl_t *c = (callout_impl_t *)cs;
        struct callout_cpu *cc;
        kmutex_t *lock;
        bool expired;

        KASSERT(c->c_magic == CALLOUT_MAGIC);

        lock = callout_lock(c);

        if ((c->c_flags & CALLOUT_PENDING) != 0)
                CIRCQ_REMOVE(&c->c_list);
        expired = ((c->c_flags & CALLOUT_FIRED) != 0);
        c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED);

        cc = c->c_cpu;
        if (cc->cc_active == c) {
                /*
                 * This is for non-MPSAFE callouts only.  To synchronize
                 * effectively we must be called with kernel_lock held.
                 * It's also taken in callout_softclock.
                 */
                cc->cc_cancel = c;
        }

        mutex_spin_exit(lock);

        return expired;
}

/*
 * callout_halt:
 *
 *        Cancel a pending callout.  If in-flight, block until it completes.
 *        May not be called from a hard interrupt handler.  If the callout
 *         can take locks, the caller of callout_halt() must not hold any of
 *        those locks, otherwise the two could deadlock.  If 'interlock' is
 *        non-NULL and we must wait for the callout to complete, it will be
 *        released and re-acquired before returning.
 */
bool
callout_halt(callout_t *cs, void *interlock)
{
        callout_impl_t *c = (callout_impl_t *)cs;
        kmutex_t *lock;
        int flags;

        KASSERT(c->c_magic == CALLOUT_MAGIC);
        KASSERT(!cpu_intr_p());
        KASSERT(interlock == NULL || mutex_owned(interlock));

        /* Fast path. */
        lock = callout_lock(c);
        flags = c->c_flags;
        if ((flags & CALLOUT_PENDING) != 0)
                CIRCQ_REMOVE(&c->c_list);
        c->c_flags = flags & ~(CALLOUT_PENDING|CALLOUT_FIRED);
        if (__predict_false(flags & CALLOUT_FIRED)) {
                callout_wait(c, interlock, lock);
                return true;
        }
        mutex_spin_exit(lock);
        return false;
}

/*
 * callout_wait:
 *
 *        Slow path for callout_halt().  Deliberately marked __noinline to
 *        prevent unneeded overhead in the caller.
 */
static void __noinline
callout_wait(callout_impl_t *c, void *interlock, kmutex_t *lock)
{
        struct callout_cpu *cc;
        struct lwp *l;
        kmutex_t *relock;

        l = curlwp;
        relock = NULL;
        for (;;) {
                /*
                 * At this point we know the callout is not pending, but it
                 * could be running on a CPU somewhere.  That can be curcpu
                 * in a few cases:
                 *
                 * - curlwp is a higher priority soft interrupt
                 * - the callout blocked on a lock and is currently asleep
                 * - the callout itself has called callout_halt() (nice!)
                 */
                cc = c->c_cpu;
                if (__predict_true(cc->cc_active != c || cc->cc_lwp == l))
                        break;

                /* It's running - need to wait for it to complete. */
                if (interlock != NULL) {
                        /*
                         * Avoid potential scheduler lock order problems by
                         * dropping the interlock without the callout lock
                         * held; then retry.
                         */
                        mutex_spin_exit(lock);
                        mutex_exit(interlock);
                        relock = interlock;
                        interlock = NULL;
                } else {
                        /* XXX Better to do priority inheritance. */
                        KASSERT(l->l_wchan == NULL);
                        cc->cc_nwait++;
                        cc->cc_ev_block.ev_count++;
                        l->l_kpriority = true;
                        sleepq_enter(&cc->cc_sleepq, l, cc->cc_lock);
                        sleepq_enqueue(&cc->cc_sleepq, cc, "callout",
                            &sleep_syncobj, false);
                        sleepq_block(0, false, &sleep_syncobj);
                }

                /*
                 * Re-lock the callout and check the state of play again. 
                 * It's a common design pattern for callouts to re-schedule
                 * themselves so put a stop to it again if needed.
                 */
                lock = callout_lock(c);
                if ((c->c_flags & CALLOUT_PENDING) != 0)
                        CIRCQ_REMOVE(&c->c_list);
                c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED);
        }

        mutex_spin_exit(lock);
        if (__predict_false(relock != NULL))
                mutex_enter(relock);
}

#ifdef notyet
/*
 * callout_bind:
 *
 *        Bind a callout so that it will only execute on one CPU.
 *        The callout must be stopped, and must be MPSAFE.
 *
 *        XXX Disabled for now until it is decided how to handle
 *        offlined CPUs.  We may want weak+strong binding.
 */
void
callout_bind(callout_t *cs, struct cpu_info *ci)
{
        callout_impl_t *c = (callout_impl_t *)cs;
        struct callout_cpu *cc;
        kmutex_t *lock;

        KASSERT((c->c_flags & CALLOUT_PENDING) == 0);
        KASSERT(c->c_cpu->cc_active != c);
        KASSERT(c->c_magic == CALLOUT_MAGIC);
        KASSERT((c->c_flags & CALLOUT_MPSAFE) != 0);

        lock = callout_lock(c);
        cc = ci->ci_data.cpu_callout;
        c->c_flags |= CALLOUT_BOUND;
        if (c->c_cpu != cc) {
                /*
                 * Assigning c_cpu effectively unlocks the callout
                 * structure, as we don't hold the new CPU's lock.
                 * Issue memory barrier to prevent accesses being
                 * reordered.
                 */
                membar_exit();
                c->c_cpu = cc;
        }
        mutex_spin_exit(lock);
}
#endif

void
callout_setfunc(callout_t *cs, void (*func)(void *), void *arg)
{
        callout_impl_t *c = (callout_impl_t *)cs;
        kmutex_t *lock;

        KASSERT(c->c_magic == CALLOUT_MAGIC);
        KASSERT(func != NULL);

        lock = callout_lock(c);
        c->c_func = func;
        c->c_arg = arg;
        mutex_spin_exit(lock);
}

bool
callout_expired(callout_t *cs)
{
        callout_impl_t *c = (callout_impl_t *)cs;
        kmutex_t *lock;
        bool rv;

        KASSERT(c->c_magic == CALLOUT_MAGIC);

        lock = callout_lock(c);
        rv = ((c->c_flags & CALLOUT_FIRED) != 0);
        mutex_spin_exit(lock);

        return rv;
}

bool
callout_active(callout_t *cs)
{
        callout_impl_t *c = (callout_impl_t *)cs;
        kmutex_t *lock;
        bool rv;

        KASSERT(c->c_magic == CALLOUT_MAGIC);

        lock = callout_lock(c);
        rv = ((c->c_flags & (CALLOUT_PENDING|CALLOUT_FIRED)) != 0);
        mutex_spin_exit(lock);

        return rv;
}

bool
callout_pending(callout_t *cs)
{
        callout_impl_t *c = (callout_impl_t *)cs;
        kmutex_t *lock;
        bool rv;

        KASSERT(c->c_magic == CALLOUT_MAGIC);

        lock = callout_lock(c);
        rv = ((c->c_flags & CALLOUT_PENDING) != 0);
        mutex_spin_exit(lock);

        return rv;
}

bool
callout_invoking(callout_t *cs)
{
        callout_impl_t *c = (callout_impl_t *)cs;
        kmutex_t *lock;
        bool rv;

        KASSERT(c->c_magic == CALLOUT_MAGIC);

        lock = callout_lock(c);
        rv = ((c->c_flags & CALLOUT_INVOKING) != 0);
        mutex_spin_exit(lock);

        return rv;
}

void
callout_ack(callout_t *cs)
{
        callout_impl_t *c = (callout_impl_t *)cs;
        kmutex_t *lock;

        KASSERT(c->c_magic == CALLOUT_MAGIC);

        lock = callout_lock(c);
        c->c_flags &= ~CALLOUT_INVOKING;
        mutex_spin_exit(lock);
}

/*
 * callout_hardclock:
 *
 *        Called from hardclock() once every tick.  We schedule a soft
 *        interrupt if there is work to be done.
 */
void
callout_hardclock(void)
{
        struct callout_cpu *cc;
        int needsoftclock, ticks;

        cc = curcpu()->ci_data.cpu_callout;
        mutex_spin_enter(cc->cc_lock);

        ticks = ++cc->cc_ticks;

        MOVEBUCKET(cc, 0, ticks);
        if (MASKWHEEL(0, ticks) == 0) {
                MOVEBUCKET(cc, 1, ticks);
                if (MASKWHEEL(1, ticks) == 0) {
                        MOVEBUCKET(cc, 2, ticks);
                        if (MASKWHEEL(2, ticks) == 0)
                                MOVEBUCKET(cc, 3, ticks);
                }
        }

        needsoftclock = !CIRCQ_EMPTY(&cc->cc_todo);
        mutex_spin_exit(cc->cc_lock);

        if (needsoftclock)
                softint_schedule(callout_sih);
}

/*
 * callout_softclock:
 *
 *        Soft interrupt handler, scheduled above if there is work to
 *         be done.  Callouts are made in soft interrupt context.
 */
static void
callout_softclock(void *v)
{
        callout_impl_t *c;
        struct callout_cpu *cc;
        void (*func)(void *);
        void *arg;
        int mpsafe, count, ticks, delta;
        lwp_t *l;

        l = curlwp;
        KASSERT(l->l_cpu == curcpu());
        cc = l->l_cpu->ci_data.cpu_callout;

        mutex_spin_enter(cc->cc_lock);
        cc->cc_lwp = l;
        while (!CIRCQ_EMPTY(&cc->cc_todo)) {
                c = CIRCQ_FIRST(&cc->cc_todo);
                KASSERT(c->c_magic == CALLOUT_MAGIC);
                KASSERT(c->c_func != NULL);
                KASSERT(c->c_cpu == cc);
                KASSERT((c->c_flags & CALLOUT_PENDING) != 0);
                KASSERT((c->c_flags & CALLOUT_FIRED) == 0);
                CIRCQ_REMOVE(&c->c_list);

                /* If due run it, otherwise insert it into the right bucket. */
                ticks = cc->cc_ticks;
                delta = (int)((unsigned)c->c_time - (unsigned)ticks);
                if (delta > 0) {
                        CIRCQ_INSERT(&c->c_list, BUCKET(cc, delta, c->c_time));
                        continue;
                }
                if (delta < 0)
                        cc->cc_ev_late.ev_count++;

                c->c_flags = (c->c_flags & ~CALLOUT_PENDING) |
                    (CALLOUT_FIRED | CALLOUT_INVOKING);
                mpsafe = (c->c_flags & CALLOUT_MPSAFE);
                func = c->c_func;
                arg = c->c_arg;
                cc->cc_active = c;

                mutex_spin_exit(cc->cc_lock);
                KASSERT(func != NULL);
                if (__predict_false(!mpsafe)) {
                        KERNEL_LOCK(1, NULL);
                        (*func)(arg);
                        KERNEL_UNLOCK_ONE(NULL);
                } else
                        (*func)(arg);
                KASSERTMSG(l->l_blcnt == 0,
                    "callout %p func %p leaked %d biglocks",
                    c, func, l->l_blcnt);
                mutex_spin_enter(cc->cc_lock);

                /*
                 * We can't touch 'c' here because it might be
                 * freed already.  If LWPs waiting for callout
                 * to complete, awaken them.
                 */
                cc->cc_active = NULL;
                if ((count = cc->cc_nwait) != 0) {
                        cc->cc_nwait = 0;
                        /* sleepq_wake() drops the lock. */
                        sleepq_wake(&cc->cc_sleepq, cc, count, cc->cc_lock);
                        mutex_spin_enter(cc->cc_lock);
                }
        }
        cc->cc_lwp = NULL;
        mutex_spin_exit(cc->cc_lock);
}
#endif /* !CRASH */

#ifdef DDB
static void
db_show_callout_bucket(struct callout_cpu *cc, struct callout_circq *kbucket,
    struct callout_circq *bucket)
{
        callout_impl_t *c, ci;
        db_expr_t offset;
        const char *name;
        static char question[] = "?";
        int b;

        if (CIRCQ_LAST(bucket, kbucket))
                return;

        for (c = CIRCQ_FIRST(bucket); /*nothing*/; c = CIRCQ_NEXT(&c->c_list)) {
                db_read_bytes((db_addr_t)c, sizeof(ci), (char *)&ci);
                c = &ci;
                db_find_sym_and_offset((db_addr_t)(intptr_t)c->c_func, &name,
                    &offset);
                name = name ? name : question;
                b = (bucket - cc->cc_wheel);
                if (b < 0)
                        b = -WHEELSIZE;
                db_printf("%9d %2d/%-4d %16lx  %s\n",
                    c->c_time - cc->cc_ticks, b / WHEELSIZE, b,
                    (u_long)c->c_arg, name);
                if (CIRCQ_LAST(&c->c_list, kbucket))
                        break;
        }
}

void
db_show_callout(db_expr_t addr, bool haddr, db_expr_t count, const char *modif)
{
        struct callout_cpu *cc;
        struct cpu_info *ci;
        int b;

#ifndef CRASH
        db_printf("hardclock_ticks now: %d\n", getticks());
#endif
        db_printf("    ticks  wheel               arg  func\n");

        /*
         * Don't lock the callwheel; all the other CPUs are paused
         * anyhow, and we might be called in a circumstance where
         * some other CPU was paused while holding the lock.
         */
        for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) {
                db_read_bytes((db_addr_t)ci +
                    offsetof(struct cpu_info, ci_data.cpu_callout),
                    sizeof(cc), (char *)&cc);
                db_read_bytes((db_addr_t)cc, sizeof(ccb), (char *)&ccb);
                db_show_callout_bucket(&ccb, &cc->cc_todo, &ccb.cc_todo);
        }
        for (b = 0; b < BUCKETS; b++) {
                for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) {
                        db_read_bytes((db_addr_t)ci +
                            offsetof(struct cpu_info, ci_data.cpu_callout),
                            sizeof(cc), (char *)&cc);
                        db_read_bytes((db_addr_t)cc, sizeof(ccb), (char *)&ccb);
                        db_show_callout_bucket(&ccb, &cc->cc_wheel[b],
                            &ccb.cc_wheel[b]);
                }
        }
}
#endif /* DDB */





































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
/*        $NetBSD: ipsec.h,v 1.91 2020/08/28 06:20:44 ozaki-r Exp $        */
/*        $FreeBSD: ipsec.h,v 1.2.4.2 2004/02/14 22:23:23 bms Exp $        */
/*        $KAME: ipsec.h,v 1.53 2001/11/20 08:32:38 itojun Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef _NETIPSEC_IPSEC_H_
#define _NETIPSEC_IPSEC_H_

#if defined(_KERNEL_OPT)
#include "opt_inet.h"
#include "opt_ipsec.h"
#endif

#include <net/pfkeyv2.h>

#ifdef _KERNEL
#include <sys/socketvar.h>
#include <sys/localcount.h>

#include <netinet/in_pcb_hdr.h>
#include <netipsec/keydb.h>

/*
 * Security Policy Index
 * Ensure that both address families in the "src" and "dst" are same.
 * When the value of the ul_proto is ICMPv6, the port field in "src"
 * specifies ICMPv6 type, and the port field in "dst" specifies ICMPv6 code.
 */
struct secpolicyindex {
        u_int8_t dir;                        /* direction of packet flow, see blow */
        union sockaddr_union src;        /* IP src address for SP */
        union sockaddr_union dst;        /* IP dst address for SP */
        u_int8_t prefs;                        /* prefix length in bits for src */
        u_int8_t prefd;                        /* prefix length in bits for dst */
        u_int16_t ul_proto;                /* upper layer Protocol */
};

/* Security Policy Data Base */
struct secpolicy {
        struct pslist_entry pslist_entry;

        struct localcount localcount;        /* reference count */
        struct secpolicyindex spidx;        /* selector */
        u_int32_t id;                        /* It's unique number on the system. */
        u_int state;                        /* 0: dead, others: alive */
#define IPSEC_SPSTATE_DEAD        0
#define IPSEC_SPSTATE_ALIVE        1

        u_int origin;                        /* who generate this SP. */
#define IPSEC_SPORIGIN_USER        0
#define IPSEC_SPORIGIN_KERNEL        1

        u_int policy;                /* DISCARD, NONE or IPSEC, see keyv2.h */
        struct ipsecrequest *req;
                                /* pointer to the ipsec request tree, */
                                /* if policy == IPSEC else this value == NULL.*/

        /*
         * lifetime handler.
         * the policy can be used without limitiation if both lifetime and
         * validtime are zero.
         * "lifetime" is passed by sadb_lifetime.sadb_lifetime_addtime.
         * "validtime" is passed by sadb_lifetime.sadb_lifetime_usetime.
         */
        time_t created;                /* time created the policy */
        time_t lastused;        /* updated every when kernel sends a packet */
        time_t lifetime;        /* duration of the lifetime of this policy */
        time_t validtime;        /* duration this policy is valid without use */
};

/* Request for IPsec */
struct ipsecrequest {
        struct ipsecrequest *next;
                                /* pointer to next structure */
                                /* If NULL, it means the end of chain. */
        struct secasindex saidx;/* hint for search proper SA */
                                /* if __ss_len == 0 then no address specified.*/
        u_int level;                /* IPsec level defined below. */

        struct secpolicy *sp;        /* back pointer to SP */
};

/* security policy in PCB */
struct inpcbpolicy {
        struct secpolicy *sp_in;
        struct secpolicy *sp_out;
        int priv;                        /* privileged socket ? */

        /* cached policy */
        struct {
                struct secpolicy *cachesp;
                struct secpolicyindex cacheidx;
                int cachehint;                /* processing requirement hint: */
#define        IPSEC_PCBHINT_UNKNOWN        0        /* Unknown */
#define        IPSEC_PCBHINT_YES        1        /* IPsec processing is required */
#define        IPSEC_PCBHINT_NO        2        /* IPsec processing not required */
                u_int cachegen;                /* spdgen when cache filled */
        } sp_cache[3];                        /* XXX 3 == IPSEC_DIR_MAX */
        int sp_cacheflags;
#define        IPSEC_PCBSP_CONNECTED        1
        struct inpcb_hdr *sp_inph;        /* back pointer */
};

extern u_int ipsec_spdgen;

static __inline bool
ipsec_pcb_skip_ipsec(struct inpcbpolicy *pcbsp, int dir)
{

        KASSERT(inph_locked(pcbsp->sp_inph));

        return pcbsp->sp_cache[(dir)].cachehint == IPSEC_PCBHINT_NO &&
            pcbsp->sp_cache[(dir)].cachegen == ipsec_spdgen;
}

/* SP acquiring list table. */
struct secspacq {
        LIST_ENTRY(secspacq) chain;

        struct secpolicyindex spidx;

        time_t created;                /* for lifetime */
        int count;                /* for lifetime */
        /* XXX: here is mbuf place holder to be sent ? */
};
#endif /* _KERNEL */

/* buffer size for formatted output of ipsec address (addr + '%' + scope_id?) */
#define        IPSEC_ADDRSTRLEN        (INET6_ADDRSTRLEN + 11)
/* buffer size for ipsec_logsastr() */
#define        IPSEC_LOGSASTRLEN        192

/* according to IANA assignment, port 0x0000 and proto 0xff are reserved. */
#define IPSEC_PORT_ANY                0
#define IPSEC_ULPROTO_ANY        255
#define IPSEC_PROTO_ANY                255

/* mode of security protocol */
/* NOTE: DON'T use IPSEC_MODE_ANY at SPD.  It's only use in SAD */
#define        IPSEC_MODE_ANY                0        /* i.e. wildcard. */
#define        IPSEC_MODE_TRANSPORT        1
#define        IPSEC_MODE_TUNNEL        2
#define        IPSEC_MODE_TCPMD5        3        /* TCP MD5 mode */

/*
 * Direction of security policy.
 * NOTE: Since INVALID is used just as flag.
 * The other are used for loop counter too.
 */
#define IPSEC_DIR_ANY                0
#define IPSEC_DIR_INBOUND        1
#define IPSEC_DIR_OUTBOUND        2
#define IPSEC_DIR_MAX                3
#define IPSEC_DIR_INVALID        4

#define IPSEC_DIR_IS_VALID(dir)                ((dir) >= 0 && (dir) <= IPSEC_DIR_MAX)
#define IPSEC_DIR_IS_INOROUT(dir)        ((dir) == IPSEC_DIR_INBOUND || \
                                         (dir) == IPSEC_DIR_OUTBOUND)

/* Policy level */
/*
 * IPSEC, ENTRUST and BYPASS are allowed for setsockopt() in PCB,
 * DISCARD, IPSEC and NONE are allowed for setkey() in SPD.
 * DISCARD and NONE are allowed for system default.
 */
#define IPSEC_POLICY_DISCARD        0        /* discarding packet */
#define IPSEC_POLICY_NONE        1        /* through IPsec engine */
#define IPSEC_POLICY_IPSEC        2        /* do IPsec */
#define IPSEC_POLICY_ENTRUST        3        /* consulting SPD if present. */
#define IPSEC_POLICY_BYPASS        4        /* only for privileged socket. */

/* Security protocol level */
#define        IPSEC_LEVEL_DEFAULT        0        /* reference to system default */
#define        IPSEC_LEVEL_USE                1        /* use SA if present. */
#define        IPSEC_LEVEL_REQUIRE        2        /* require SA. */
#define        IPSEC_LEVEL_UNIQUE        3        /* unique SA. */

#define IPSEC_MANUAL_REQID_MAX        0x3fff
                                /*
                                 * if security policy level == unique, this id
                                 * indicate to a relative SA for use, else is
                                 * zero.
                                 * 1 - 0x3fff are reserved for manual keying.
                                 * 0 are reserved for above reason.  Others is
                                 * for kernel use.
                                 * Note that this id doesn't identify SA
                                 * by only itself.
                                 */
#define IPSEC_REPLAYWSIZE  32

#ifdef _KERNEL

extern int ipsec_debug;
#ifdef IPSEC_DEBUG
extern int ipsec_replay;
extern int ipsec_integrity;
#endif

extern struct secpolicy ip4_def_policy;
extern int ip4_esp_trans_deflev;
extern int ip4_esp_net_deflev;
extern int ip4_ah_trans_deflev;
extern int ip4_ah_net_deflev;
extern int ip4_ah_cleartos;
extern int ip4_ah_offsetmask;
extern int ip4_ipsec_dfbit;
extern int ip4_ipsec_ecn;
extern int crypto_support;

#include <sys/syslog.h>

#define        DPRINTF(fmt, args...)                                                 \
        do {                                                                \
                if (ipsec_debug)                                        \
                        log(LOG_DEBUG, "%s: " fmt, __func__, ##args);        \
        } while (/*CONSTCOND*/0)

#define IPSECLOG(level, fmt, args...)                                         \
        do {                                                                \
                if (ipsec_debug)                                        \
                        log(level, "%s: " fmt, __func__, ##args);        \
        } while (/*CONSTCOND*/0)

#define ipsec_indone(m)        \
        ((m->m_flags & M_AUTHIPHDR) || (m->m_flags & M_DECRYPTED))
#define ipsec_outdone(m) \
        (m_tag_find((m), PACKET_TAG_IPSEC_OUT_DONE) != NULL)

static __inline bool
ipsec_skip_pfil(struct mbuf *m)
{
        bool rv;

        if (ipsec_indone(m) &&
            ((m->m_pkthdr.pkthdr_flags & PKTHDR_FLAG_IPSEC_SKIP_PFIL) != 0)) {
                m->m_pkthdr.pkthdr_flags &= ~PKTHDR_FLAG_IPSEC_SKIP_PFIL;
                rv = true;
        } else {
                rv = false;
        }

        return rv;
}

void ipsec_pcbconn(struct inpcbpolicy *);
void ipsec_pcbdisconn(struct inpcbpolicy *);
void ipsec_invalpcbcacheall(void);

struct inpcb;
int ipsec4_output(struct mbuf *, struct inpcb *, int, u_long *, bool *, bool *, bool *);

int ipsec_ip_input_checkpolicy(struct mbuf *, bool);
void ipsec_mtu(struct mbuf *, int *);
#ifdef INET6
void ipsec6_udp_cksum(struct mbuf *);
#endif

struct inpcb;
int ipsec_init_pcbpolicy(struct socket *so, struct inpcbpolicy **);
int ipsec_copy_policy(const struct inpcbpolicy *, struct inpcbpolicy *);
u_int ipsec_get_reqlevel(const struct ipsecrequest *);

int ipsec_set_policy(void *, const void *, size_t, kauth_cred_t);
int ipsec_get_policy(void *, const void *, size_t, struct mbuf **);
int ipsec_delete_pcbpolicy(void *);
int ipsec_in_reject(struct mbuf *, void *);

struct secasvar *ipsec_lookup_sa(const struct ipsecrequest *,
    const struct mbuf *);

struct secas;
struct tcpcb;
int ipsec_chkreplay(u_int32_t, const struct secasvar *);
int ipsec_updatereplay(u_int32_t, const struct secasvar *);

size_t ipsec_hdrsiz(struct mbuf *, u_int, void *);
size_t ipsec4_hdrsiz_tcp(struct tcpcb *);

union sockaddr_union;
const char *ipsec_address(const union sockaddr_union* sa, char *, size_t);
const char *ipsec_logsastr(const struct secasvar *, char *, size_t);

/* NetBSD protosw ctlin entrypoint */
void *esp4_ctlinput(int, const struct sockaddr *, void *);
void *ah4_ctlinput(int, const struct sockaddr *, void *);

void ipsec_output_init(void);
struct m_tag;
void ipsec4_common_input(struct mbuf *m, int, int);
int ipsec4_common_input_cb(struct mbuf *, struct secasvar *, int, int);
int ipsec4_process_packet(struct mbuf *, const struct ipsecrequest *, u_long *);
int ipsec_process_done(struct mbuf *, const struct ipsecrequest *,
    struct secasvar *, int);

struct mbuf *m_clone(struct mbuf *);
struct mbuf *m_makespace(struct mbuf *, int, int, int *);
void *m_pad(struct mbuf *, int);
int m_striphdr(struct mbuf *, int, int);

extern int ipsec_used __read_mostly;
extern int ipsec_enabled __read_mostly;

#endif /* _KERNEL */

#ifndef _KERNEL
char *ipsec_set_policy(const char *, int);
int ipsec_get_policylen(char *);
char *ipsec_dump_policy(char *, const char *);
const char *ipsec_strerror(void);
#endif /* !_KERNEL */

#ifdef _KERNEL
/* External declarations of per-file init functions */
void ah_attach(void);
void esp_attach(void);
void ipcomp_attach(void);
void ipe4_attach(void);
void tcpsignature_attach(void);

void ipsec_attach(void);

void sysctl_net_inet_ipsec_setup(struct sysctllog **);
#ifdef INET6
void sysctl_net_inet6_ipsec6_setup(struct sysctllog **);
#endif

#endif /* _KERNEL */
#endif /* !_NETIPSEC_IPSEC_H_ */










































































































  142 
















































































































































































































































































































































































































































































































































































    1 


    1 














































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
/*        $NetBSD: bus_space.c,v 1.47 2022/07/17 08:33:48 riastradh Exp $        */

/*-
 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace
 * Simulation Facility, NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: bus_space.c,v 1.47 2022/07/17 08:33:48 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/extent.h>
#include <sys/kmem.h>

#include <uvm/uvm_extern.h>

#include <dev/isa/isareg.h>

#include <sys/bus.h>
#include <machine/pio.h>
#include <machine/isa_machdep.h>

#ifdef XEN
#include <xen/hypervisor.h>
#endif

/*
 * Macros for sanity-checking the aligned-ness of pointers passed to
 * bus space ops.  These are not strictly necessary on the x86, but
 * could lead to performance improvements, and help catch problems
 * with drivers that would creep up on other architectures.
 */
#ifdef BUS_SPACE_DEBUG
#define        BUS_SPACE_ALIGNED_ADDRESS(p, t)                                \
        ((((u_long)(p)) & (sizeof(t)-1)) == 0)

#define        BUS_SPACE_ADDRESS_SANITY(p, t, d)                                \
({                                                                        \
        if (BUS_SPACE_ALIGNED_ADDRESS((p), t) == 0) {                        \
                printf("%s 0x%lx not aligned to %zu bytes %s:%d\n",        \
                    d, (u_long)(p), sizeof(t), __FILE__, __LINE__);        \
        }                                                                \
        (void) 0;                                                        \
})
#else
#define        BUS_SPACE_ADDRESS_SANITY(p,t,d)        (void) 0
#endif /* BUS_SPACE_DEBUG */

/*
 * Extent maps to manage I/O and memory space.  Allocate
 * storage for 8 regions in each, initially.  Later, ioport_malloc_safe
 * will indicate that it's safe to use malloc() to dynamically allocate
 * region descriptors.
 *
 * N.B. At least two regions are _always_ allocated from the iomem
 * extent map; (0 -> ISA hole) and (end of ISA hole -> end of RAM).
 *
 * The extent maps are not static!  Machine-dependent ISA and EISA
 * routines need access to them for bus address space allocation.
 */
static        long ioport_ex_storage[EXTENT_FIXED_STORAGE_SIZE(16) / sizeof(long)];
static        long iomem_ex_storage[EXTENT_FIXED_STORAGE_SIZE(64) / sizeof(long)];
struct        extent *ioport_ex;
struct        extent *iomem_ex;
static        int ioport_malloc_safe;

static struct bus_space_tag x86_io = { .bst_type = X86_BUS_SPACE_IO };
static struct bus_space_tag x86_mem = { .bst_type = X86_BUS_SPACE_MEM };

bus_space_tag_t x86_bus_space_io = &x86_io;
bus_space_tag_t x86_bus_space_mem = &x86_mem;

int x86_mem_add_mapping(bus_addr_t, bus_size_t,
            int, bus_space_handle_t *);

static inline bool
x86_bus_space_is_io(bus_space_tag_t t)
{
        return t->bst_type == X86_BUS_SPACE_IO;
}

static inline bool
x86_bus_space_is_mem(bus_space_tag_t t)
{
        return t->bst_type == X86_BUS_SPACE_MEM;
}

void
x86_bus_space_init(void)
{
        /*
         * Initialize the I/O port and I/O mem extent maps.
         * Note: we don't have to check the return value since
         * creation of a fixed extent map will never fail (since
         * descriptor storage has already been allocated).
         *
         * N.B. The iomem extent manages _all_ physical addresses
         * on the machine.  When the amount of RAM is found, the two
         * extents of RAM are allocated from the map (0 -> ISA hole
         * and end of ISA hole -> end of RAM).
         */
        ioport_ex = extent_create("ioport", 0x0, 0xffff,
            (void *)ioport_ex_storage, sizeof(ioport_ex_storage),
            EX_NOCOALESCE|EX_NOWAIT);
        iomem_ex = extent_create("iomem", 0x0, MAXIOMEM,
            (void *)iomem_ex_storage, sizeof(iomem_ex_storage),
            EX_NOCOALESCE|EX_NOWAIT);

#ifdef XENPV
        /* We are privileged guest os - should have IO privileges. */
        if (xendomain_is_privileged()) {
                struct physdev_set_iopl set_iopl;
                memset(&set_iopl, 0, sizeof(set_iopl));
                set_iopl.iopl = 1;
                if (HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl) != 0)
                        panic("Unable to obtain IOPL, "
                            "despite being SIF_PRIVILEGED");
        }
#endif        /* XENPV */
}

void
x86_bus_space_mallocok(void)
{

        ioport_malloc_safe = 1;
}

int
bus_space_map(bus_space_tag_t t, bus_addr_t bpa, bus_size_t size,
                int flags, bus_space_handle_t *bshp)
{
        bus_space_reservation_t bsr;
        bus_space_tag_t it;
        int error;

        if ((t->bst_exists & BUS_SPACE_OVERRIDE_MAP) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bst_super) {
                if ((it->bst_present & BUS_SPACE_OVERRIDE_MAP) == 0)
                        continue;
                return (*it->bst_ov->ov_space_map)(it->bst_ctx, t, bpa, size,
                    flags, bshp);
        }

        error = bus_space_reserve(t, bpa, size, flags, &bsr);
        if (error != 0)
                return error;

        error = bus_space_reservation_map(t, &bsr, flags, bshp);
        if (error != 0)
                bus_space_release(t, &bsr);

        return error;
}

int
bus_space_reservation_map(bus_space_tag_t t, bus_space_reservation_t *bsr,
    int flags, bus_space_handle_t *bshp)
{
        bus_addr_t bpa;
        bus_size_t size;
        bus_space_tag_t it;

        if ((t->bst_exists & BUS_SPACE_OVERRIDE_RESERVATION_MAP) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bst_super) {
                if ((it->bst_present & BUS_SPACE_OVERRIDE_RESERVATION_MAP) == 0)
                        continue;
                return (*it->bst_ov->ov_space_reservation_map)(it->bst_ctx, t,
                    bsr, flags, bshp);
        }

        bpa = bus_space_reservation_addr(bsr);
        size = bus_space_reservation_size(bsr);

        /*
         * For I/O space, that's all she wrote.
         */
        if (x86_bus_space_is_io(t)) {
                *bshp = bpa;
                return 0;
        }

#ifndef XENPV
        if (bpa >= IOM_BEGIN && (bpa + size) != 0 && (bpa + size) <= IOM_END) {
                *bshp = (bus_space_handle_t)ISA_HOLE_VADDR(bpa);
                return 0;
        }
#endif        /* !XENPV */

        /*
         * For memory space, map the bus physical address to
         * a kernel virtual address.
         */
        return x86_mem_add_mapping(bpa, size, flags, bshp);
}

int
_x86_memio_map(bus_space_tag_t t, bus_addr_t bpa, bus_size_t size,
                int flags, bus_space_handle_t *bshp)
{

        /*
         * For I/O space, just fill in the handle.
         */
        if (x86_bus_space_is_io(t)) {
                if (flags & BUS_SPACE_MAP_LINEAR)
                        return (EOPNOTSUPP);
                *bshp = bpa;
                return (0);
        }

        /*
         * For memory space, map the bus physical address to
         * a kernel virtual address.
         */
        return x86_mem_add_mapping(bpa, size, flags, bshp);
}

int
bus_space_reserve(bus_space_tag_t t,
    bus_addr_t bpa,
    bus_size_t size,
    int flags, bus_space_reservation_t *bsrp)
{
        struct extent *ex;
        int error;
        bus_space_tag_t it;

        if ((t->bst_exists & BUS_SPACE_OVERRIDE_RESERVE) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bst_super) {
                if ((it->bst_present & BUS_SPACE_OVERRIDE_RESERVE) == 0)
                        continue;
                return (*it->bst_ov->ov_space_reserve)(it->bst_ctx, t,
                    bpa, size, flags, bsrp);
        }

        /*
         * Pick the appropriate extent map.
         */
        if (x86_bus_space_is_io(t)) {
                if (flags & BUS_SPACE_MAP_LINEAR)
                        return (EOPNOTSUPP);
                ex = ioport_ex;
        } else if (x86_bus_space_is_mem(t))
                ex = iomem_ex;
        else
                panic("x86_memio_alloc: bad bus space tag");

        /*
         * Before we go any further, let's make sure that this
         * region is available.
         */
        error = extent_alloc_region(ex, bpa, size,
            EX_NOWAIT | (ioport_malloc_safe ? EX_MALLOCOK : 0));

        if (error != 0)
                return error;

        bus_space_reservation_init(bsrp, bpa, size);

        return 0;
}

int
bus_space_reserve_subregion(bus_space_tag_t t,
    bus_addr_t rstart, bus_addr_t rend,
    const bus_size_t size, const bus_size_t alignment,
    const bus_size_t boundary,
    const int flags, bus_space_reservation_t *bsrp)
{
        bus_space_reservation_t bsr;
        struct extent *ex;
        u_long bpa;
        int error;
        bus_space_tag_t it;

        if ((t->bst_exists & BUS_SPACE_OVERRIDE_RESERVE_SUBREGION) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bst_super) {
                if ((it->bst_present & BUS_SPACE_OVERRIDE_RESERVE_SUBREGION) ==
                    0)
                        continue;
                return (*it->bst_ov->ov_space_reserve_subregion)(it->bst_ctx, t,
                    rstart, rend, size, alignment, boundary, flags, bsrp);
        }

        /*
         * Pick the appropriate extent map.
         */
        if (x86_bus_space_is_io(t)) {
                if (flags & BUS_SPACE_MAP_LINEAR)
                        return (EOPNOTSUPP);
                ex = ioport_ex;
        } else if (x86_bus_space_is_mem(t))
                ex = iomem_ex;
        else
                panic("x86_memio_alloc: bad bus space tag");

        /*
         * Sanity check the allocation against the extent's boundaries.
         */
        rstart = MAX(rstart, ex->ex_start);
        rend = MIN(rend, ex->ex_end);
        if (rstart >= rend)
                panic("x86_memio_alloc: bad region start/end");

        /*
         * Do the requested allocation.
         */
        error = extent_alloc_subregion(ex, rstart, rend, size, alignment,
            boundary,
            EX_FAST | EX_NOWAIT | (ioport_malloc_safe ?  EX_MALLOCOK : 0),
            &bpa);

        if (error)
                return (error);

        bus_space_reservation_init(&bsr, bpa, size);

        *bsrp = bsr;

        return 0;
}

void
bus_space_release(bus_space_tag_t t, bus_space_reservation_t *bsr)
{
        struct extent *ex;
        bus_space_tag_t it;

        if ((t->bst_exists & BUS_SPACE_OVERRIDE_RELEASE) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bst_super) {
                if ((it->bst_present & BUS_SPACE_OVERRIDE_RELEASE) == 0)
                        continue;
                (*it->bst_ov->ov_space_release)(it->bst_ctx, t, bsr);
                return;
        }

        /*
         * Pick the appropriate extent map.
         */
        if (x86_bus_space_is_io(t)) {
                ex = ioport_ex;
        } else if (x86_bus_space_is_mem(t))
                ex = iomem_ex;
        else
                panic("x86_memio_alloc: bad bus space tag");

        if (extent_free(ex, bus_space_reservation_addr(bsr),
            bus_space_reservation_size(bsr), EX_NOWAIT |
            (ioport_malloc_safe ? EX_MALLOCOK : 0))) {
                printf("%s: pa 0x%jx, size 0x%jx\n", __func__,
                    (uintmax_t)bus_space_reservation_addr(bsr),
                    (uintmax_t)bus_space_reservation_size(bsr));
                printf("%s: can't free region\n", __func__);
        }
}

int
bus_space_alloc(bus_space_tag_t t, bus_addr_t rstart, bus_addr_t rend,
                bus_size_t size, bus_size_t alignment, bus_size_t boundary,
                int flags, bus_addr_t *bpap, bus_space_handle_t *bshp)
{
        bus_space_reservation_t bsr;
        bus_space_tag_t it;
        int error;

        if ((t->bst_exists & BUS_SPACE_OVERRIDE_ALLOC) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bst_super) {
                if ((it->bst_present & BUS_SPACE_OVERRIDE_ALLOC) == 0)
                        continue;
                return (*it->bst_ov->ov_space_alloc)(it->bst_ctx, t,
                    rstart, rend, size, alignment, boundary, flags, bpap, bshp);
        }

        /*
         * Do the requested allocation.
         */
        error = bus_space_reserve_subregion(t, rstart, rend, size, alignment,
            boundary, flags, &bsr);

        if (error != 0)
                return error;

        error = bus_space_reservation_map(t, &bsr, flags, bshp);
        if (error != 0)
                bus_space_release(t, &bsr);

        *bpap = bus_space_reservation_addr(&bsr);

        return error;
}

int
x86_mem_add_mapping(bus_addr_t bpa, bus_size_t size,
                int flags, bus_space_handle_t *bshp)
{
        paddr_t pa, endpa;
        vaddr_t va, sva;
        u_int pmapflags;

        pa = x86_trunc_page(bpa);
        endpa = x86_round_page(bpa + size);

        pmapflags = PMAP_NOCACHE;
        if ((flags & BUS_SPACE_MAP_CACHEABLE) != 0)
                pmapflags = 0;
        else if (flags & BUS_SPACE_MAP_PREFETCHABLE)
                pmapflags = PMAP_WRITE_COMBINE;

#ifdef DIAGNOSTIC
        if (endpa != 0 && endpa <= pa)
                panic("x86_mem_add_mapping: overflow");
#endif

#ifdef XENPV
        if (bpa >= IOM_BEGIN && (bpa + size) != 0 && (bpa + size) <= IOM_END) {
                sva = (vaddr_t)ISA_HOLE_VADDR(pa);
        } else
#endif        /* XENPV */
        {
                sva = uvm_km_alloc(kernel_map, endpa - pa, 0,
                    UVM_KMF_VAONLY | UVM_KMF_NOWAIT);
                if (sva == 0)
                        return (ENOMEM);
        }

        *bshp = (bus_space_handle_t)(sva + (bpa & PGOFSET));

        for (va = sva; pa != endpa; pa += PAGE_SIZE, va += PAGE_SIZE) {
                pmap_kenter_ma(va, pa, VM_PROT_READ | VM_PROT_WRITE, pmapflags);
        }
        pmap_update(pmap_kernel());

        return 0;
}

bool
bus_space_is_equal(bus_space_tag_t t1, bus_space_tag_t t2)
{
        if (t1 == NULL || t2 == NULL)
                return false;
        return t1->bst_type == t2->bst_type;
}

/*
 * void _x86_memio_unmap(bus_space_tag bst, bus_space_handle bsh,
 *                        bus_size_t size, bus_addr_t *adrp)
 *
 *   This function unmaps memory- or io-space mapped by the function
 *   _x86_memio_map().  This function works nearly as same as
 *   x86_memio_unmap(), but this function does not ask kernel
 *   built-in extents and returns physical address of the bus space,
 *   for the convenience of the extra extent manager.
 */
void
_x86_memio_unmap(bus_space_tag_t t, bus_space_handle_t bsh,
                bus_size_t size, bus_addr_t *adrp)
{
        u_long va, endva;
        bus_addr_t bpa;

        /*
         * Find the correct extent and bus physical address.
         */
        if (x86_bus_space_is_io(t)) {
                bpa = bsh;
        } else if (x86_bus_space_is_mem(t)) {
                if (bsh >= atdevbase && (bsh + size) != 0 &&
                    (bsh + size) <= (atdevbase + IOM_SIZE)) {
                        bpa = (bus_addr_t)ISA_PHYSADDR(bsh);
                } else {

                        va = x86_trunc_page(bsh);
                        endva = x86_round_page(bsh + size);

#ifdef DIAGNOSTIC
                        if (endva <= va) {
                                panic("_x86_memio_unmap: overflow");
                        }
#endif

                        if (pmap_extract_ma(pmap_kernel(), va, &bpa) == FALSE) {
                                panic("_x86_memio_unmap:"
                                    " wrong virtual address");
                        }
                        bpa += (bsh & PGOFSET);
                        pmap_kremove(va, endva - va);
                        pmap_update(pmap_kernel());

                        /*
                         * Free the kernel virtual mapping.
                         */
                        uvm_km_free(kernel_map, va, endva - va, UVM_KMF_VAONLY);
                }
        } else {
                panic("_x86_memio_unmap: bad bus space tag");
        }

        if (adrp != NULL) {
                *adrp = bpa;
        }
}

static void
bus_space_reservation_unmap1(bus_space_tag_t t, const bus_space_handle_t bsh,
    const bus_size_t size, bus_addr_t *bpap)
{
        u_long va, endva;
        bus_addr_t bpa;

        /*
         * Find the correct extent and bus physical address.
         */
        if (x86_bus_space_is_io(t)) {
                bpa = bsh;
        } else if (x86_bus_space_is_mem(t)) {
                if (bsh >= atdevbase && (bsh + size) != 0 &&
                    (bsh + size) <= (atdevbase + IOM_SIZE)) {
                        bpa = (bus_addr_t)ISA_PHYSADDR(bsh);
                        goto ok;
                }

                va = x86_trunc_page(bsh);
                endva = x86_round_page(bsh + size);

#ifdef DIAGNOSTIC
                if (endva <= va)
                        panic("x86_memio_unmap: overflow");
#endif

                (void) pmap_extract_ma(pmap_kernel(), va, &bpa);
                bpa += (bsh & PGOFSET);

                pmap_kremove(va, endva - va);
                pmap_update(pmap_kernel());

                /*
                 * Free the kernel virtual mapping.
                 */
                uvm_km_free(kernel_map, va, endva - va, UVM_KMF_VAONLY);
        } else
                panic("x86_memio_unmap: bad bus space tag");
ok:
        if (bpap != NULL)
                *bpap = bpa;
}

void
bus_space_reservation_unmap(bus_space_tag_t t, const bus_space_handle_t bsh,
    const bus_size_t size)
{
        bus_space_tag_t it;

        if ((t->bst_exists & BUS_SPACE_OVERRIDE_RESERVATION_UNMAP) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bst_super) {
                if ((it->bst_present & BUS_SPACE_OVERRIDE_RESERVATION_UNMAP) ==
                    0)
                        continue;
                (*it->bst_ov->ov_space_reservation_unmap)(it->bst_ctx,
                    t, bsh, size);
                return;
        }

        bus_space_reservation_unmap1(t, bsh, size, NULL);
}

void
bus_space_unmap(bus_space_tag_t t, const bus_space_handle_t bsh,
    const bus_size_t size)
{
        bus_addr_t addr;
        bus_space_reservation_t bsr;
        bus_space_tag_t it;

        if ((t->bst_exists & BUS_SPACE_OVERRIDE_UNMAP) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bst_super) {
                if ((it->bst_present & BUS_SPACE_OVERRIDE_UNMAP) == 0)
                        continue;
                (*it->bst_ov->ov_space_unmap)(it->bst_ctx, t, bsh, size);
                return;
        }

        bus_space_reservation_unmap1(t, bsh, size, &addr);

        bus_space_reservation_init(&bsr, addr, size);
        bus_space_release(t, &bsr);
}

void
bus_space_free(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size)
{
        bus_space_tag_t it;

        if ((t->bst_exists & BUS_SPACE_OVERRIDE_FREE) == 0)
                ;        /* skip override */
        else for (it = t; it != NULL; it = it->bst_super) {
                if ((it->bst_present & BUS_SPACE_OVERRIDE_FREE) == 0)
                        continue;
                (*it->bst_ov->ov_space_free)(it->bst_ctx, t, bsh, size);
                return;
        }
        /* bus_space_unmap() does all that we need to do. */
        bus_space_unmap(t, bsh, size);
}

int
bus_space_subregion(bus_space_tag_t t, bus_space_handle_t bsh,
    bus_size_t offset, bus_size_t size, bus_space_handle_t *nbshp)
{

        *nbshp = bsh + offset;
        return (0);
}

paddr_t
bus_space_mmap(bus_space_tag_t t, bus_addr_t addr, off_t off, int prot,
    int flags)
{
        paddr_t pflags = 0;

        /* Can't mmap I/O space. */
        if (x86_bus_space_is_io(t))
                return (-1);

        /*
         * "addr" is the base address of the device we're mapping.
         * "off" is the offset into that device.
         *
         * Note we are called for each "page" in the device that
         * the upper layers want to map.
         */
        if (flags & BUS_SPACE_MAP_PREFETCHABLE)
                pflags |= X86_MMAP_FLAG_PREFETCH;

        return x86_btop(addr + off) | (pflags << X86_MMAP_FLAG_SHIFT);
}

void
bus_space_set_multi_1(bus_space_tag_t t, bus_space_handle_t h, bus_size_t o,
                      uint8_t v, size_t c)
{
        vaddr_t addr = h + o;

        if (x86_bus_space_is_io(t))
                while (c--)
                        outb(addr, v);
        else
                while (c--)
                        *(volatile uint8_t *)(addr) = v;
}

void
bus_space_set_multi_2(bus_space_tag_t t, bus_space_handle_t h, bus_size_t o,
                      uint16_t v, size_t c)
{
        vaddr_t addr = h + o;

        BUS_SPACE_ADDRESS_SANITY(addr, uint16_t, "bus addr");

        if (x86_bus_space_is_io(t))
                while (c--)
                        outw(addr, v);
        else
                while (c--)
                        *(volatile uint16_t *)(addr) = v;
}

void
bus_space_set_multi_4(bus_space_tag_t t, bus_space_handle_t h, bus_size_t o,
                      uint32_t v, size_t c)
{
        vaddr_t addr = h + o;

        BUS_SPACE_ADDRESS_SANITY(addr, uint32_t, "bus addr");

        if (x86_bus_space_is_io(t))
                while (c--)
                        outl(addr, v);
        else
                while (c--)
                        *(volatile uint32_t *)(addr) = v;
}

void
bus_space_set_region_1(bus_space_tag_t t, bus_space_handle_t h, bus_size_t o,
                      uint8_t v, size_t c)
{
        vaddr_t addr = h + o;

        if (x86_bus_space_is_io(t))
                for (; c != 0; c--, addr++)
                        outb(addr, v);
        else
                for (; c != 0; c--, addr++)
                        *(volatile uint8_t *)(addr) = v;
}

void
bus_space_set_region_2(bus_space_tag_t t, bus_space_handle_t h, bus_size_t o,
                       uint16_t v, size_t c)
{
        vaddr_t addr = h + o;

        BUS_SPACE_ADDRESS_SANITY(addr, uint16_t, "bus addr");

        if (x86_bus_space_is_io(t))
                for (; c != 0; c--, addr += 2)
                        outw(addr, v);
        else
                for (; c != 0; c--, addr += 2)
                        *(volatile uint16_t *)(addr) = v;
}

void
bus_space_set_region_4(bus_space_tag_t t, bus_space_handle_t h, bus_size_t o,
                       uint32_t v, size_t c)
{
        vaddr_t addr = h + o;

        BUS_SPACE_ADDRESS_SANITY(addr, uint32_t, "bus addr");

        if (x86_bus_space_is_io(t))
                for (; c != 0; c--, addr += 4)
                        outl(addr, v);
        else
                for (; c != 0; c--, addr += 4)
                        *(volatile uint32_t *)(addr) = v;
}

void
bus_space_copy_region_1(bus_space_tag_t t, bus_space_handle_t h1,
                        bus_size_t o1, bus_space_handle_t h2,
                        bus_size_t o2, size_t c)
{
        vaddr_t addr1 = h1 + o1;
        vaddr_t addr2 = h2 + o2;

        if (x86_bus_space_is_io(t)) {
                if (addr1 >= addr2) {
                        /* src after dest: copy forward */
                        for (; c != 0; c--, addr1++, addr2++)
                                outb(addr2, inb(addr1));
                } else {
                        /* dest after src: copy backwards */
                        for (addr1 += (c - 1), addr2 += (c - 1);
                            c != 0; c--, addr1--, addr2--)
                                outb(addr2, inb(addr1));
                }
        } else {
                if (addr1 >= addr2) {
                        /* src after dest: copy forward */
                        for (; c != 0; c--, addr1++, addr2++)
                                *(volatile uint8_t *)(addr2) =
                                    *(volatile uint8_t *)(addr1);
                } else {
                        /* dest after src: copy backwards */
                        for (addr1 += (c - 1), addr2 += (c - 1);
                            c != 0; c--, addr1--, addr2--)
                                *(volatile uint8_t *)(addr2) =
                                    *(volatile uint8_t *)(addr1);
                }
        }
}

void
bus_space_copy_region_2(bus_space_tag_t t, bus_space_handle_t h1,
                        bus_size_t o1, bus_space_handle_t h2,
                        bus_size_t o2, size_t c)
{
        vaddr_t addr1 = h1 + o1;
        vaddr_t addr2 = h2 + o2;

        BUS_SPACE_ADDRESS_SANITY(addr1, uint16_t, "bus addr 1");
        BUS_SPACE_ADDRESS_SANITY(addr2, uint16_t, "bus addr 2");

        if (x86_bus_space_is_io(t)) {
                if (addr1 >= addr2) {
                        /* src after dest: copy forward */
                        for (; c != 0; c--, addr1 += 2, addr2 += 2)
                                outw(addr2, inw(addr1));
                } else {
                        /* dest after src: copy backwards */
                        for (addr1 += 2 * (c - 1), addr2 += 2 * (c - 1);
                            c != 0; c--, addr1 -= 2, addr2 -= 2)
                                outw(addr2, inw(addr1));
                }
        } else {
                if (addr1 >= addr2) {
                        /* src after dest: copy forward */
                        for (; c != 0; c--, addr1 += 2, addr2 += 2)
                                *(volatile uint16_t *)(addr2) =
                                    *(volatile uint16_t *)(addr1);
                } else {
                        /* dest after src: copy backwards */
                        for (addr1 += 2 * (c - 1), addr2 += 2 * (c - 1);
                            c != 0; c--, addr1 -= 2, addr2 -= 2)
                                *(volatile uint16_t *)(addr2) =
                                    *(volatile uint16_t *)(addr1);
                }
        }
}

void
bus_space_copy_region_4(bus_space_tag_t t, bus_space_handle_t h1,
                        bus_size_t o1, bus_space_handle_t h2,
                        bus_size_t o2, size_t c)
{
        vaddr_t addr1 = h1 + o1;
        vaddr_t addr2 = h2 + o2;

        BUS_SPACE_ADDRESS_SANITY(addr1, uint32_t, "bus addr 1");
        BUS_SPACE_ADDRESS_SANITY(addr2, uint32_t, "bus addr 2");

        if (x86_bus_space_is_io(t)) {
                if (addr1 >= addr2) {
                        /* src after dest: copy forward */
                        for (; c != 0; c--, addr1 += 4, addr2 += 4)
                                outl(addr2, inl(addr1));
                } else {
                        /* dest after src: copy backwards */
                        for (addr1 += 4 * (c - 1), addr2 += 4 * (c - 1);
                            c != 0; c--, addr1 -= 4, addr2 -= 4)
                                outl(addr2, inl(addr1));
                }
        } else {
                if (addr1 >= addr2) {
                        /* src after dest: copy forward */
                        for (; c != 0; c--, addr1 += 4, addr2 += 4)
                                *(volatile uint32_t *)(addr2) =
                                    *(volatile uint32_t *)(addr1);
                } else {
                        /* dest after src: copy backwards */
                        for (addr1 += 4 * (c - 1), addr2 += 4 * (c - 1);
                            c != 0; c--, addr1 -= 4, addr2 -= 4)
                                *(volatile uint32_t *)(addr2) =
                                    *(volatile uint32_t *)(addr1);
                }
        }
}

void
bus_space_barrier(bus_space_tag_t tag, bus_space_handle_t bsh,
                  bus_size_t offset, bus_size_t len, int flags)
{

        /* I/O instructions always happen in program order.  */
        if (x86_bus_space_is_io(tag))
                return;

        /*
         * For default mappings, which are mapped with UC-type memory
         * regions, all loads and stores are issued in program order.
         *
         * For BUS_SPACE_MAP_PREFETCHABLE mappings, which are mapped
         * with WC-type memory regions, loads and stores may be issued
         * out of order, potentially requiring any of the three x86
         * fences -- LFENCE, SFENCE, MFENCE.
         *
         * For BUS_SPACE_MAP_CACHEABLE mappings, which are mapped with
         * WB-type memory regions (like normal memory), store/load may
         * be reordered to load/store, potentially requiring MFENCE.
         *
         * We can't easily tell here how the region was mapped (without
         * consulting the page tables), so just issue the fence
         * unconditionally.  Chances are either it's necessary or the
         * cost is small in comparison to device register I/O.
         *
         * Reference:
         *
         *        AMD64 Architecture Programmer's Manual, Volume 2:
         *        System Programming, 24593--Rev. 3.38--November 2021,
         *        Sec. 7.4.2 Memory Barrier Interaction with Memory
         *        Types, Table 7-3, p. 196.
         *        https://web.archive.org/web/20220625040004/https://www.amd.com/system/files/TechDocs/24593.pdf#page=256
         */
        switch (flags) {
        case 0:
                break;
        case BUS_SPACE_BARRIER_READ:
                x86_lfence();
                break;
        case BUS_SPACE_BARRIER_WRITE:
                x86_sfence();
                break;
        case BUS_SPACE_BARRIER_READ|BUS_SPACE_BARRIER_WRITE:
                x86_mfence();
                break;
        default:
                panic("unknown bus space barrier: 0x%x", (unsigned)flags);
        }
}

void *
bus_space_vaddr(bus_space_tag_t tag, bus_space_handle_t bsh)
{

        return x86_bus_space_is_mem(tag) ? (void *)bsh : NULL;
}

static const void *
bit_to_function_pointer(const struct bus_space_overrides *ov, uint64_t bit)
{
        switch (bit) {
        case BUS_SPACE_OVERRIDE_MAP:
                return ov->ov_space_map;
        case BUS_SPACE_OVERRIDE_UNMAP:
                return ov->ov_space_unmap;
        case BUS_SPACE_OVERRIDE_ALLOC:
                return ov->ov_space_alloc;
        case BUS_SPACE_OVERRIDE_FREE:
                return ov->ov_space_free;
        case BUS_SPACE_OVERRIDE_RESERVE:
                return ov->ov_space_reserve;
        case BUS_SPACE_OVERRIDE_RELEASE:
                return ov->ov_space_release;
        case BUS_SPACE_OVERRIDE_RESERVATION_MAP:
                return ov->ov_space_reservation_map;
        case BUS_SPACE_OVERRIDE_RESERVATION_UNMAP:
                return ov->ov_space_reservation_unmap;
        case BUS_SPACE_OVERRIDE_RESERVE_SUBREGION:
                return ov->ov_space_reserve_subregion;
        default:
                return NULL;
        }
}

void
bus_space_tag_destroy(bus_space_tag_t bst)
{
        kmem_free(bst, sizeof(struct bus_space_tag));
}

int
bus_space_tag_create(bus_space_tag_t obst, const uint64_t present,
    const uint64_t extpresent, const struct bus_space_overrides *ov, void *ctx,
    bus_space_tag_t *bstp)
{
        uint64_t bit, bits, nbits;
        bus_space_tag_t bst;
        const void *fp;

        if (ov == NULL || present == 0 || extpresent != 0)
                return EINVAL;

        bst = kmem_alloc(sizeof(struct bus_space_tag), KM_SLEEP);
        bst->bst_super = obst;
        bst->bst_type = obst->bst_type;

        for (bits = present; bits != 0; bits = nbits) {
                nbits = bits & (bits - 1);
                bit = nbits ^ bits;
                if ((fp = bit_to_function_pointer(ov, bit)) == NULL) {
                        printf("%s: missing bit %" PRIx64 "\n", __func__, bit);
                        goto einval;
                }
        }

        bst->bst_ov = ov;
        bst->bst_exists = obst->bst_exists | present;
        bst->bst_present = present;
        bst->bst_ctx = ctx;

        *bstp = bst;

        return 0;
einval:
        kmem_free(bst, sizeof(struct bus_space_tag));
        return EINVAL;
}














































































































    2 
    2 










































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
/*        $NetBSD: slurm.c,v 1.4 2019/01/22 06:47:20 skrll Exp $ */

/*
 * Copyright (c) 2012 Jonathan A. Kollasch
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: slurm.c,v 1.4 2019/01/22 06:47:20 skrll Exp $");

#include <sys/param.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/conf.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>
#include <dev/ic/si470x_reg.h>

#include <sys/radioio.h>
#include <dev/radio_if.h>

#ifdef SLURM_DEBUG
int        slurmdebug = 0;
#define DPRINTFN(n, x)        do { if (slurmdebug > (n)) printf x; } while (0)
#else
#define DPRINTFN(n, x)
#endif

#define DPRINTF(x) DPRINTFN(0, x)

#define SI470X_VOLFACT (255 / __SHIFTOUT_MASK(SI470X_VOLUME))

struct slurm_softc {
        device_t                sc_dev;
        struct usbd_device *        sc_udev;
        struct usbd_interface *        sc_uif;
        uint32_t                sc_band;
        uint32_t                sc_space;
};

static const struct usb_devno slurm_devs[] = {
        { USB_VENDOR_ADS, USB_PRODUCT_ADS_RDX155 },
};

static int slurm_match(device_t, cfdata_t, void *);
static void slurm_attach(device_t, device_t, void *);
static int slurm_detach(device_t, int);

static int slurm_get_info(void *, struct radio_info *);
static int slurm_set_info(void *, struct radio_info *);
static int slurm_search(void *, int);

static usbd_status slurm_setreg(struct slurm_softc *, int, uint16_t);
static usbd_status slurm_getreg(struct slurm_softc *, int, uint16_t *);

static uint32_t slurm_si470x_get_freq(struct slurm_softc *, uint16_t);
static void slurm_si470x_get_bandspace(struct slurm_softc *, uint16_t);
static int slurm_si470x_get_info(uint16_t);
static int slurm_si470x_get_mute(uint16_t);
static int slurm_si470x_get_stereo(uint16_t);
static int slurm_si470x_get_volume(uint16_t);

static int slurm_si470x_search(struct slurm_softc *, int);

static void slurm_si470x_set_freq(struct slurm_softc *, uint32_t);
static void slurm_si470x_set_powercfg(struct slurm_softc *, int, int);
static void slurm_si470x_set_volume(struct slurm_softc *, int);

static const struct radio_hw_if slurm_radio = {
        .get_info = slurm_get_info,
        .set_info = slurm_set_info,
        .search = slurm_search,
};

CFATTACH_DECL_NEW(slurm, sizeof(struct slurm_softc),
    slurm_match, slurm_attach, slurm_detach, NULL);

static int
slurm_match(device_t parent, cfdata_t match, void *aux)
{
        const struct usbif_attach_arg * const uiaa = aux;

        if (uiaa->uiaa_ifaceno != 2)
                return UMATCH_NONE;

        if (usb_lookup(slurm_devs, uiaa->uiaa_vendor, uiaa->uiaa_product) != NULL) {
                return UMATCH_VENDOR_PRODUCT;
        }

        return UMATCH_NONE;
}

static void
slurm_attach(device_t parent, device_t self, void *aux)
{
        struct slurm_softc * const sc = device_private(self);
        const struct usbif_attach_arg * const uiaa = aux;

        sc->sc_dev = self;
        sc->sc_udev = uiaa->uiaa_device;
        sc->sc_uif = uiaa->uiaa_iface;

        aprint_normal("\n");
        aprint_naive("\n");

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

#ifdef SLURM_DEBUG
        {
                uint16_t val;
                for (int i = 0; i < 16; i++) {
                        slurm_getreg(sc, i, &val);
                        device_printf(self, "%02x -> %04x\n", i, val);
                }
        }
#endif

        radio_attach_mi(&slurm_radio, sc, self);
}

static int
slurm_detach(device_t self, int flags)
{
        struct slurm_softc * const sc = device_private(self);
        int rv = 0;

        if ((rv = config_detach_children(self, flags)) != 0)
                return rv;

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return rv;
}

static int
slurm_get_info(void *v, struct radio_info *ri)
{
        struct slurm_softc * const sc = v;
        uint16_t powercfg, sysconfig2, readchannel, statusrssi;

        slurm_getreg(sc, SI470X_POWERCFG, &powercfg);
        slurm_getreg(sc, SI470X_SYSCONFIG2, &sysconfig2);
        slurm_getreg(sc, SI470X_STATUSRSSI, &statusrssi);
        slurm_getreg(sc, SI470X_READCHANNEL, &readchannel);

        ri->mute = slurm_si470x_get_mute(powercfg);
        ri->volume = slurm_si470x_get_volume(sysconfig2);
        ri->stereo = slurm_si470x_get_stereo(powercfg);
        ri->rfreq = 0;
        ri->lock = 0;
        slurm_si470x_get_bandspace(sc, sysconfig2);
        ri->freq = slurm_si470x_get_freq(sc, readchannel);
        ri->caps = RADIO_CAPS_DETECT_STEREO | RADIO_CAPS_DETECT_SIGNAL |
                   RADIO_CAPS_SET_MONO | RADIO_CAPS_HW_SEARCH |
                   RADIO_CAPS_HW_AFC | RADIO_CAPS_LOCK_SENSITIVITY;
        ri->info = slurm_si470x_get_info(statusrssi);

        return 0;
}

static int
slurm_set_info(void *v, struct radio_info *ri)
{
        struct slurm_softc * const sc = v;

        slurm_si470x_set_freq(sc, ri->freq);
        slurm_si470x_set_powercfg(sc, ri->mute, ri->stereo);
        slurm_si470x_set_volume(sc, ri->volume);

        return 0;
}

static int
slurm_search(void *v, int f)
{
        struct slurm_softc * const sc = v;

        return slurm_si470x_search(sc, f);
}

static usbd_status
slurm_getreg(struct slurm_softc *sc, int reg, uint16_t *val)
{
        usbd_status status;
        uint8_t s[3];

        ++reg;

        s[0] = reg;
        s[1] = s[2] = 0;

        status = usbd_get_report(sc->sc_uif, UHID_FEATURE_REPORT,
                reg, &s, sizeof(s));

        *val = (s[1] << 8) | s[2];

        return status;
}

static usbd_status
slurm_setreg(struct slurm_softc *sc, int reg, uint16_t val)
{
        usbd_status status;
        uint8_t s[3];

        ++reg;

        s[0] = reg;
        s[1] = (val >> 8) & 0xff;
        s[2] = (val >> 0) & 0xff;

        status = usbd_set_report(sc->sc_uif, UHID_FEATURE_REPORT,
                reg, &s, sizeof(s));

        return status;
}

static int
slurm_si470x_await_stc(struct slurm_softc *sc)
{
        int i;
        uint16_t statusrssi;

        for (i = 50; i > 0; i--) {
                usbd_delay_ms(sc->sc_udev, 2);
                slurm_getreg(sc, SI470X_STATUSRSSI, &statusrssi);
                if ((statusrssi & (SI470X_STC|SI470X_SF_BL)) != 0)
                        break;
        }

        if (i == 0)
                return -1;
        else
                return 0;
}

static void
slurm_si470x_get_bandspace(struct slurm_softc *sc, uint16_t sysconfig2)
{
        switch (__SHIFTOUT(sysconfig2, SI470X_SPACE)) {
        default:
        case 0:
                sc->sc_space = 200;
                break;
        case 1:
                sc->sc_space = 100;
                break;
        case 2:
                sc->sc_space = 50;
                break;
        }

        switch (__SHIFTOUT(sysconfig2, SI470X_BAND)) {
        default:
        case 0:
                sc->sc_band = 87500;
                break;
        case 1:
        case 2:
                sc->sc_band = 76000;
                break;
        }
}

static uint32_t
slurm_si470x_get_freq(struct slurm_softc *sc, uint16_t readchannel)
{
        readchannel = __SHIFTOUT(readchannel, SI470X_READCHAN);
        return sc->sc_band + readchannel * sc->sc_space;
}

static int
slurm_si470x_get_info(uint16_t statusrssi)
{
        return (__SHIFTOUT(statusrssi, SI470X_ST) ? RADIO_INFO_STEREO : 0)
            | (__SHIFTOUT(statusrssi, SI470X_AFCRL) ? 0 : RADIO_INFO_SIGNAL);
}

static int
slurm_si470x_get_mute(uint16_t powercfg)
{
        return __SHIFTOUT(powercfg, SI470X_DMUTE) ? 0 : 1;
}

static int
slurm_si470x_get_stereo(uint16_t powercfg)
{
        return __SHIFTOUT(powercfg, SI470X_MONO) ? 0 : 1;
}

static int
slurm_si470x_get_volume(uint16_t sysconfig2)
{
        return __SHIFTOUT(sysconfig2, SI470X_VOLUME) * SI470X_VOLFACT;
}

static int
slurm_si470x_search(struct slurm_softc *sc, int up)
{
        uint16_t powercfg;

        slurm_getreg(sc, SI470X_POWERCFG, &powercfg);
        powercfg &= ~(SI470X_SKMODE|SI470X_SEEKUP|SI470X_SEEK);
        powercfg |= up ? SI470X_SEEKUP : 0;
        slurm_setreg(sc, SI470X_POWERCFG, SI470X_SEEK|powercfg);
        slurm_si470x_await_stc(sc);
        slurm_setreg(sc, SI470X_POWERCFG, powercfg);

        return 0;
}

static void
slurm_si470x_set_freq(struct slurm_softc *sc, uint32_t freq)
{
        uint16_t channel;

        channel = (freq - sc->sc_band) / sc->sc_space;

        slurm_setreg(sc, SI470X_CHANNEL, SI470X_TUNE|channel);
        slurm_si470x_await_stc(sc);
        slurm_setreg(sc, SI470X_CHANNEL, channel);

#ifdef SLURM_DEBUG
        device_printf(sc->sc_dev, "%s 0a -> %04x after %d\n", __func__, val, i);
#endif
}

static void
slurm_si470x_set_powercfg(struct slurm_softc *sc, int mute, int stereo)
{
        uint16_t powercfg;

        slurm_getreg(sc, SI470X_POWERCFG, &powercfg);
        powercfg &= ~(SI470X_DMUTE|SI470X_MONO);
        powercfg |= SI470X_DSMUTE;
        powercfg |= mute ? 0 : SI470X_DMUTE;
        powercfg |= stereo ? 0 : SI470X_MONO;
        slurm_setreg(sc, SI470X_POWERCFG, powercfg);
}

static void
slurm_si470x_set_volume(struct slurm_softc *sc, int volume)
{
        uint16_t sysconfig2;

        slurm_getreg(sc, SI470X_SYSCONFIG2, &sysconfig2);
        sysconfig2 &= ~SI470X_VOLUME;
        sysconfig2 |= __SHIFTIN(volume / SI470X_VOLFACT, SI470X_VOLUME);
        slurm_setreg(sc, SI470X_SYSCONFIG2, sysconfig2);
}









































































































































































































































































































































































































































































   12 










    9 









   12 



    3 











   12 
    1 




   11 











   11 


   12 







    9 





    9 



















    9 








    3 







    3 



    3 

















   12 
    9 



    3 
    3 




   12 























































































































































































































































































































































































   23 


   23 








   23 


   23 






   18 











    5 



























   23 



   23 












    2 


    2 

    2 


    2 


    2 
    2 




    2 
    2 
    2 
    1 






















   21 






   11 



   10 
   10 

















   21 





















































   21 












    3 









   21 

   11 




   10 
   10 








   19 

   19 


   18 












   21 

   21 






   21 

   21 

































































































































    1 








    1 












































































































    1 


    1 





























































































































































































































































































































   12 
   11 




   10 



   12 
   12 




   11 



   12 













   12 
   11 

   12 




























































































































































































































































































   12 


    9 


    3 
   12 

    3 



























   12 





































   12 








   12 







































































   29 








   29 















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
/*        $NetBSD: tcp_subr.c,v 1.290 2022/06/27 01:29:51 knakahara Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1997, 1998, 2000, 2001, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
 * Facility, NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)tcp_subr.c        8.2 (Berkeley) 5/24/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tcp_subr.c,v 1.290 2022/06/27 01:29:51 knakahara Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_ipsec.h"
#include "opt_inet_csum.h"
#include "opt_mbuftrace.h"
#endif

#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/once.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/pool.h>
#include <sys/md5.h>
#include <sys/cprng.h>

#include <net/route.h>
#include <net/if.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
#include <netinet/ip_icmp.h>

#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_var.h>
#include <netinet6/ip6protosw.h>
#include <netinet/icmp6.h>
#include <netinet6/nd6.h>
#endif

#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_vtw.h>
#include <netinet/tcp_private.h>
#include <netinet/tcp_congctl.h>

#ifdef IPSEC
#include <netipsec/ipsec.h>
#ifdef INET6
#include <netipsec/ipsec6.h>
#endif
#include <netipsec/key.h>
#endif


struct        inpcbtable tcbtable;        /* head of queue of active tcpcb's */
u_int32_t tcp_now;                /* slow ticks, for RFC 1323 timestamps */

percpu_t *tcpstat_percpu;

/* patchable/settable parameters for tcp */
int         tcp_mssdflt = TCP_MSS;
int        tcp_minmss = TCP_MINMSS;
int         tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
int        tcp_do_rfc1323 = 1;        /* window scaling / timestamps (obsolete) */
int        tcp_do_rfc1948 = 0;        /* ISS by cryptographic hash */
int        tcp_do_sack = 1;        /* selective acknowledgement */
int        tcp_do_win_scale = 1;        /* RFC1323 window scaling */
int        tcp_do_timestamps = 1;        /* RFC1323 timestamps */
int        tcp_ack_on_push = 0;        /* set to enable immediate ACK-on-PUSH */
int        tcp_do_ecn = 0;                /* Explicit Congestion Notification */
#ifndef TCP_INIT_WIN
#define        TCP_INIT_WIN        4        /* initial slow start window */
#endif
#ifndef TCP_INIT_WIN_LOCAL
#define        TCP_INIT_WIN_LOCAL 4        /* initial slow start window for local nets */
#endif
/*
 * Up to 5 we scale linearly, to reach 3 * 1460; then (iw) * 1460.
 * This is to simulate current behavior for iw == 4
 */
int tcp_init_win_max[] = {
         1 * 1460,
         1 * 1460,
         2 * 1460,
         2 * 1460,
         3 * 1460,
         5 * 1460,
         6 * 1460,
         7 * 1460,
         8 * 1460,
         9 * 1460,
        10 * 1460
};
int        tcp_init_win = TCP_INIT_WIN;
int        tcp_init_win_local = TCP_INIT_WIN_LOCAL;
int        tcp_mss_ifmtu = 0;
int        tcp_rst_ppslim = 100;        /* 100pps */
int        tcp_ackdrop_ppslim = 100;        /* 100pps */
int        tcp_do_loopback_cksum = 0;
int        tcp_do_abc = 1;                /* RFC3465 Appropriate byte counting. */
int        tcp_abc_aggressive = 1;        /* 1: L=2*SMSS  0: L=1*SMSS */
int        tcp_sack_tp_maxholes = 32;
int        tcp_sack_globalmaxholes = 1024;
int        tcp_sack_globalholes = 0;
int        tcp_ecn_maxretries = 1;
int        tcp_msl_enable = 1;                /* enable TIME_WAIT truncation        */
int        tcp_msl_loop   = PR_SLOWHZ;        /* MSL for loopback                */
int        tcp_msl_local  = 5 * PR_SLOWHZ;        /* MSL for 'local'                */
int        tcp_msl_remote = TCPTV_MSL;        /* MSL otherwise                */
int        tcp_msl_remote_threshold = TCPTV_SRTTDFLT;        /* RTT threshold */
int        tcp_rttlocal = 0;                /* Use RTT to decide who's 'local' */

int        tcp4_vtw_enable = 0;                /* 1 to enable */
int        tcp6_vtw_enable = 0;                /* 1 to enable */
int        tcp_vtw_was_enabled = 0;
int        tcp_vtw_entries = 1 << 4;        /* 16 vestigial TIME_WAIT entries */

/* tcb hash */
#ifndef TCBHASHSIZE
#define        TCBHASHSIZE        128
#endif
int        tcbhashsize = TCBHASHSIZE;

/* syn hash parameters */
#define        TCP_SYN_HASH_SIZE        293
#define        TCP_SYN_BUCKET_SIZE        35
int        tcp_syn_cache_size = TCP_SYN_HASH_SIZE;
int        tcp_syn_cache_limit = TCP_SYN_HASH_SIZE*TCP_SYN_BUCKET_SIZE;
int        tcp_syn_bucket_limit = 3*TCP_SYN_BUCKET_SIZE;
struct        syn_cache_head tcp_syn_cache[TCP_SYN_HASH_SIZE];

int        tcp_freeq(struct tcpcb *);
static int        tcp_iss_secret_init(void);

static void        tcp_mtudisc_callback(struct in_addr);

#ifdef INET6
static void        tcp6_mtudisc(struct in6pcb *, int);
#endif

static struct pool tcpcb_pool;

static int tcp_drainwanted;

#ifdef TCP_CSUM_COUNTERS
#include <sys/device.h>

struct evcnt tcp_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "tcp", "hwcsum bad");
struct evcnt tcp_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "tcp", "hwcsum ok");
struct evcnt tcp_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "tcp", "hwcsum data");
struct evcnt tcp_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "tcp", "swcsum");

EVCNT_ATTACH_STATIC(tcp_hwcsum_bad);
EVCNT_ATTACH_STATIC(tcp_hwcsum_ok);
EVCNT_ATTACH_STATIC(tcp_hwcsum_data);
EVCNT_ATTACH_STATIC(tcp_swcsum);

#if defined(INET6)
struct evcnt tcp6_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "tcp6", "hwcsum bad");
struct evcnt tcp6_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "tcp6", "hwcsum ok");
struct evcnt tcp6_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "tcp6", "hwcsum data");
struct evcnt tcp6_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "tcp6", "swcsum");

EVCNT_ATTACH_STATIC(tcp6_hwcsum_bad);
EVCNT_ATTACH_STATIC(tcp6_hwcsum_ok);
EVCNT_ATTACH_STATIC(tcp6_hwcsum_data);
EVCNT_ATTACH_STATIC(tcp6_swcsum);
#endif /* defined(INET6) */
#endif /* TCP_CSUM_COUNTERS */


#ifdef TCP_OUTPUT_COUNTERS
#include <sys/device.h>

struct evcnt tcp_output_bigheader = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "tcp", "output big header");
struct evcnt tcp_output_predict_hit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "tcp", "output predict hit");
struct evcnt tcp_output_predict_miss = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "tcp", "output predict miss");
struct evcnt tcp_output_copysmall = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "tcp", "output copy small");
struct evcnt tcp_output_copybig = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "tcp", "output copy big");
struct evcnt tcp_output_refbig = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "tcp", "output reference big");

EVCNT_ATTACH_STATIC(tcp_output_bigheader);
EVCNT_ATTACH_STATIC(tcp_output_predict_hit);
EVCNT_ATTACH_STATIC(tcp_output_predict_miss);
EVCNT_ATTACH_STATIC(tcp_output_copysmall);
EVCNT_ATTACH_STATIC(tcp_output_copybig);
EVCNT_ATTACH_STATIC(tcp_output_refbig);

#endif /* TCP_OUTPUT_COUNTERS */

#ifdef TCP_REASS_COUNTERS
#include <sys/device.h>

struct evcnt tcp_reass_ = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "tcp_reass", "calls");
struct evcnt tcp_reass_empty = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    &tcp_reass_, "tcp_reass", "insert into empty queue");
struct evcnt tcp_reass_iteration[8] = {
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", ">7 iterations"),
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "1 iteration"),
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "2 iterations"),
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "3 iterations"),
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "4 iterations"),
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "5 iterations"),
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "6 iterations"),
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, &tcp_reass_, "tcp_reass", "7 iterations"),
};
struct evcnt tcp_reass_prependfirst = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    &tcp_reass_, "tcp_reass", "prepend to first");
struct evcnt tcp_reass_prepend = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    &tcp_reass_, "tcp_reass", "prepend");
struct evcnt tcp_reass_insert = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    &tcp_reass_, "tcp_reass", "insert");
struct evcnt tcp_reass_inserttail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    &tcp_reass_, "tcp_reass", "insert at tail");
struct evcnt tcp_reass_append = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    &tcp_reass_, "tcp_reass", "append");
struct evcnt tcp_reass_appendtail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    &tcp_reass_, "tcp_reass", "append to tail fragment");
struct evcnt tcp_reass_overlaptail = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    &tcp_reass_, "tcp_reass", "overlap at end");
struct evcnt tcp_reass_overlapfront = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    &tcp_reass_, "tcp_reass", "overlap at start");
struct evcnt tcp_reass_segdup = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    &tcp_reass_, "tcp_reass", "duplicate segment");
struct evcnt tcp_reass_fragdup = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    &tcp_reass_, "tcp_reass", "duplicate fragment");

EVCNT_ATTACH_STATIC(tcp_reass_);
EVCNT_ATTACH_STATIC(tcp_reass_empty);
EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 0);
EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 1);
EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 2);
EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 3);
EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 4);
EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 5);
EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 6);
EVCNT_ATTACH_STATIC2(tcp_reass_iteration, 7);
EVCNT_ATTACH_STATIC(tcp_reass_prependfirst);
EVCNT_ATTACH_STATIC(tcp_reass_prepend);
EVCNT_ATTACH_STATIC(tcp_reass_insert);
EVCNT_ATTACH_STATIC(tcp_reass_inserttail);
EVCNT_ATTACH_STATIC(tcp_reass_append);
EVCNT_ATTACH_STATIC(tcp_reass_appendtail);
EVCNT_ATTACH_STATIC(tcp_reass_overlaptail);
EVCNT_ATTACH_STATIC(tcp_reass_overlapfront);
EVCNT_ATTACH_STATIC(tcp_reass_segdup);
EVCNT_ATTACH_STATIC(tcp_reass_fragdup);

#endif /* TCP_REASS_COUNTERS */

#ifdef MBUFTRACE
struct mowner tcp_mowner = MOWNER_INIT("tcp", "");
struct mowner tcp_rx_mowner = MOWNER_INIT("tcp", "rx");
struct mowner tcp_tx_mowner = MOWNER_INIT("tcp", "tx");
struct mowner tcp_sock_mowner = MOWNER_INIT("tcp", "sock");
struct mowner tcp_sock_rx_mowner = MOWNER_INIT("tcp", "sock rx");
struct mowner tcp_sock_tx_mowner = MOWNER_INIT("tcp", "sock tx");
#endif

static int
do_tcpinit(void)
{

        in_pcbinit(&tcbtable, tcbhashsize, tcbhashsize);
        pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, 0, 0, "tcpcbpl",
            NULL, IPL_SOFTNET);

        tcp_usrreq_init();

        /* Initialize timer state. */
        tcp_timer_init();

        /* Initialize the compressed state engine. */
        syn_cache_init();

        /* Initialize the congestion control algorithms. */
        tcp_congctl_init();

        /* Initialize the TCPCB template. */
        tcp_tcpcb_template();

        /* Initialize reassembly queue */
        tcpipqent_init();

        /* SACK */
        tcp_sack_init();

        MOWNER_ATTACH(&tcp_tx_mowner);
        MOWNER_ATTACH(&tcp_rx_mowner);
        MOWNER_ATTACH(&tcp_reass_mowner);
        MOWNER_ATTACH(&tcp_sock_mowner);
        MOWNER_ATTACH(&tcp_sock_tx_mowner);
        MOWNER_ATTACH(&tcp_sock_rx_mowner);
        MOWNER_ATTACH(&tcp_mowner);

        tcpstat_percpu = percpu_alloc(sizeof(uint64_t) * TCP_NSTATS);

        vtw_earlyinit();

        tcp_slowtimo_init();

        return 0;
}

void
tcp_init_common(unsigned basehlen)
{
        static ONCE_DECL(dotcpinit);
        unsigned hlen = basehlen + sizeof(struct tcphdr);
        unsigned oldhlen;

        if (max_linkhdr + hlen > MHLEN)
                panic("tcp_init");
        while ((oldhlen = max_protohdr) < hlen)
                atomic_cas_uint(&max_protohdr, oldhlen, hlen);

        RUN_ONCE(&dotcpinit, do_tcpinit);
}

/*
 * Tcp initialization
 */
void
tcp_init(void)
{

        icmp_mtudisc_callback_register(tcp_mtudisc_callback);

        tcp_init_common(sizeof(struct ip));
}

/*
 * Create template to be used to send tcp packets on a connection.
 * Call after host entry created, allocates an mbuf and fills
 * in a skeletal tcp/ip header, minimizing the amount of work
 * necessary when the connection is used.
 */
struct mbuf *
tcp_template(struct tcpcb *tp)
{
        struct inpcb *inp = tp->t_inpcb;
#ifdef INET6
        struct in6pcb *in6p = tp->t_in6pcb;
#endif
        struct tcphdr *n;
        struct mbuf *m;
        int hlen;

        switch (tp->t_family) {
        case AF_INET:
                hlen = sizeof(struct ip);
                if (inp)
                        break;
#ifdef INET6
                if (in6p) {
                        /* mapped addr case */
                        if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)
                         && IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr))
                                break;
                }
#endif
                return NULL;        /*EINVAL*/
#ifdef INET6
        case AF_INET6:
                hlen = sizeof(struct ip6_hdr);
                if (in6p) {
                        /* more sainty check? */
                        break;
                }
                return NULL;        /*EINVAL*/
#endif
        default:
                return NULL;        /*EAFNOSUPPORT*/
        }

        KASSERT(hlen + sizeof(struct tcphdr) <= MCLBYTES);

        m = tp->t_template;
        if (m && m->m_len == hlen + sizeof(struct tcphdr)) {
                ;
        } else {
                if (m)
                        m_freem(m);
                m = tp->t_template = NULL;
                MGETHDR(m, M_DONTWAIT, MT_HEADER);
                if (m && hlen + sizeof(struct tcphdr) > MHLEN) {
                        MCLGET(m, M_DONTWAIT);
                        if ((m->m_flags & M_EXT) == 0) {
                                m_free(m);
                                m = NULL;
                        }
                }
                if (m == NULL)
                        return NULL;
                MCLAIM(m, &tcp_mowner);
                m->m_pkthdr.len = m->m_len = hlen + sizeof(struct tcphdr);
        }

        memset(mtod(m, void *), 0, m->m_len);

        n = (struct tcphdr *)(mtod(m, char *) + hlen);

        switch (tp->t_family) {
        case AF_INET:
            {
                struct ipovly *ipov;
                mtod(m, struct ip *)->ip_v = 4;
                mtod(m, struct ip *)->ip_hl = hlen >> 2;
                ipov = mtod(m, struct ipovly *);
                ipov->ih_pr = IPPROTO_TCP;
                ipov->ih_len = htons(sizeof(struct tcphdr));
                if (inp) {
                        ipov->ih_src = inp->inp_laddr;
                        ipov->ih_dst = inp->inp_faddr;
                }
#ifdef INET6
                else if (in6p) {
                        /* mapped addr case */
                        bcopy(&in6p->in6p_laddr.s6_addr32[3], &ipov->ih_src,
                                sizeof(ipov->ih_src));
                        bcopy(&in6p->in6p_faddr.s6_addr32[3], &ipov->ih_dst,
                                sizeof(ipov->ih_dst));
                }
#endif

                /*
                 * Compute the pseudo-header portion of the checksum
                 * now.  We incrementally add in the TCP option and
                 * payload lengths later, and then compute the TCP
                 * checksum right before the packet is sent off onto
                 * the wire.
                 */
                n->th_sum = in_cksum_phdr(ipov->ih_src.s_addr,
                    ipov->ih_dst.s_addr,
                    htons(sizeof(struct tcphdr) + IPPROTO_TCP));
                break;
            }
#ifdef INET6
        case AF_INET6:
            {
                struct ip6_hdr *ip6;
                mtod(m, struct ip *)->ip_v = 6;
                ip6 = mtod(m, struct ip6_hdr *);
                ip6->ip6_nxt = IPPROTO_TCP;
                ip6->ip6_plen = htons(sizeof(struct tcphdr));
                ip6->ip6_src = in6p->in6p_laddr;
                ip6->ip6_dst = in6p->in6p_faddr;
                ip6->ip6_flow = in6p->in6p_flowinfo & IPV6_FLOWINFO_MASK;
                if (ip6_auto_flowlabel) {
                        ip6->ip6_flow &= ~IPV6_FLOWLABEL_MASK;
                        ip6->ip6_flow |=
                            (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
                }
                ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
                ip6->ip6_vfc |= IPV6_VERSION;

                /*
                 * Compute the pseudo-header portion of the checksum
                 * now.  We incrementally add in the TCP option and
                 * payload lengths later, and then compute the TCP
                 * checksum right before the packet is sent off onto
                 * the wire.
                 */
                n->th_sum = in6_cksum_phdr(&in6p->in6p_laddr,
                    &in6p->in6p_faddr, htonl(sizeof(struct tcphdr)),
                    htonl(IPPROTO_TCP));
                break;
            }
#endif
        }

        if (inp) {
                n->th_sport = inp->inp_lport;
                n->th_dport = inp->inp_fport;
        }
#ifdef INET6
        else if (in6p) {
                n->th_sport = in6p->in6p_lport;
                n->th_dport = in6p->in6p_fport;
        }
#endif

        n->th_seq = 0;
        n->th_ack = 0;
        n->th_x2 = 0;
        n->th_off = 5;
        n->th_flags = 0;
        n->th_win = 0;
        n->th_urp = 0;
        return m;
}

/*
 * Send a single message to the TCP at address specified by
 * the given TCP/IP header.  If m == 0, then we make a copy
 * of the tcpiphdr at ti and send directly to the addressed host.
 * This is used to force keep alive messages out using the TCP
 * template for a connection tp->t_template.  If flags are given
 * then we send a message back to the TCP which originated the
 * segment ti, and discard the mbuf containing it and any other
 * attached mbufs.
 *
 * In any case the ack and sequence number of the transmitted
 * segment are as specified by the parameters.
 */
int
tcp_respond(struct tcpcb *tp, struct mbuf *mtemplate, struct mbuf *m,
    struct tcphdr *th0, tcp_seq ack, tcp_seq seq, int flags)
{
        struct route *ro;
        int error, tlen, win = 0;
        int hlen;
        struct ip *ip;
#ifdef INET6
        struct ip6_hdr *ip6;
#endif
        int family;        /* family on packet, not inpcb/in6pcb! */
        struct tcphdr *th;

        if (tp != NULL && (flags & TH_RST) == 0) {
                KASSERT(!(tp->t_inpcb && tp->t_in6pcb));

                if (tp->t_inpcb)
                        win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
#ifdef INET6
                if (tp->t_in6pcb)
                        win = sbspace(&tp->t_in6pcb->in6p_socket->so_rcv);
#endif
        }

        th = NULL;        /* Quell uninitialized warning */
        ip = NULL;
#ifdef INET6
        ip6 = NULL;
#endif
        if (m == NULL) {
                if (!mtemplate)
                        return EINVAL;

                /* get family information from template */
                switch (mtod(mtemplate, struct ip *)->ip_v) {
                case 4:
                        family = AF_INET;
                        hlen = sizeof(struct ip);
                        break;
#ifdef INET6
                case 6:
                        family = AF_INET6;
                        hlen = sizeof(struct ip6_hdr);
                        break;
#endif
                default:
                        return EAFNOSUPPORT;
                }

                MGETHDR(m, M_DONTWAIT, MT_HEADER);
                if (m) {
                        MCLAIM(m, &tcp_tx_mowner);
                        MCLGET(m, M_DONTWAIT);
                        if ((m->m_flags & M_EXT) == 0) {
                                m_free(m);
                                m = NULL;
                        }
                }
                if (m == NULL)
                        return ENOBUFS;

                tlen = 0;

                m->m_data += max_linkhdr;
                bcopy(mtod(mtemplate, void *), mtod(m, void *),
                        mtemplate->m_len);
                switch (family) {
                case AF_INET:
                        ip = mtod(m, struct ip *);
                        th = (struct tcphdr *)(ip + 1);
                        break;
#ifdef INET6
                case AF_INET6:
                        ip6 = mtod(m, struct ip6_hdr *);
                        th = (struct tcphdr *)(ip6 + 1);
                        break;
#endif
                }
                flags = TH_ACK;
        } else {
                if ((m->m_flags & M_PKTHDR) == 0) {
                        m_freem(m);
                        return EINVAL;
                }
                KASSERT(th0 != NULL);

                /* get family information from m */
                switch (mtod(m, struct ip *)->ip_v) {
                case 4:
                        family = AF_INET;
                        hlen = sizeof(struct ip);
                        ip = mtod(m, struct ip *);
                        break;
#ifdef INET6
                case 6:
                        family = AF_INET6;
                        hlen = sizeof(struct ip6_hdr);
                        ip6 = mtod(m, struct ip6_hdr *);
                        break;
#endif
                default:
                        m_freem(m);
                        return EAFNOSUPPORT;
                }
                /* clear h/w csum flags inherited from rx packet */
                m->m_pkthdr.csum_flags = 0;

                if ((flags & TH_SYN) == 0 || sizeof(*th0) > (th0->th_off << 2))
                        tlen = sizeof(*th0);
                else
                        tlen = th0->th_off << 2;

                if (m->m_len > hlen + tlen && (m->m_flags & M_EXT) == 0 &&
                    mtod(m, char *) + hlen == (char *)th0) {
                        m->m_len = hlen + tlen;
                        m_freem(m->m_next);
                        m->m_next = NULL;
                } else {
                        struct mbuf *n;

                        KASSERT(max_linkhdr + hlen + tlen <= MCLBYTES);

                        MGETHDR(n, M_DONTWAIT, MT_HEADER);
                        if (n && max_linkhdr + hlen + tlen > MHLEN) {
                                MCLGET(n, M_DONTWAIT);
                                if ((n->m_flags & M_EXT) == 0) {
                                        m_freem(n);
                                        n = NULL;
                                }
                        }
                        if (!n) {
                                m_freem(m);
                                return ENOBUFS;
                        }

                        MCLAIM(n, &tcp_tx_mowner);
                        n->m_data += max_linkhdr;
                        n->m_len = hlen + tlen;
                        m_copyback(n, 0, hlen, mtod(m, void *));
                        m_copyback(n, hlen, tlen, (void *)th0);

                        m_freem(m);
                        m = n;
                        n = NULL;
                }

#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
                switch (family) {
                case AF_INET:
                        ip = mtod(m, struct ip *);
                        th = (struct tcphdr *)(ip + 1);
                        ip->ip_p = IPPROTO_TCP;
                        xchg(ip->ip_dst, ip->ip_src, struct in_addr);
                        ip->ip_p = IPPROTO_TCP;
                        break;
#ifdef INET6
                case AF_INET6:
                        ip6 = mtod(m, struct ip6_hdr *);
                        th = (struct tcphdr *)(ip6 + 1);
                        ip6->ip6_nxt = IPPROTO_TCP;
                        xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
                        ip6->ip6_nxt = IPPROTO_TCP;
                        break;
#endif
                }
                xchg(th->th_dport, th->th_sport, u_int16_t);
#undef xchg
                tlen = 0;        /*be friendly with the following code*/
        }
        th->th_seq = htonl(seq);
        th->th_ack = htonl(ack);
        th->th_x2 = 0;
        if ((flags & TH_SYN) == 0) {
                if (tp)
                        win >>= tp->rcv_scale;
                if (win > TCP_MAXWIN)
                        win = TCP_MAXWIN;
                th->th_win = htons((u_int16_t)win);
                th->th_off = sizeof (struct tcphdr) >> 2;
                tlen += sizeof(*th);
        } else {
                tlen += th->th_off << 2;
        }
        m->m_len = hlen + tlen;
        m->m_pkthdr.len = hlen + tlen;
        m_reset_rcvif(m);
        th->th_flags = flags;
        th->th_urp = 0;

        switch (family) {
        case AF_INET:
            {
                struct ipovly *ipov = (struct ipovly *)ip;
                memset(ipov->ih_x1, 0, sizeof ipov->ih_x1);
                ipov->ih_len = htons((u_int16_t)tlen);

                th->th_sum = 0;
                th->th_sum = in_cksum(m, hlen + tlen);
                ip->ip_len = htons(hlen + tlen);
                ip->ip_ttl = ip_defttl;
                break;
            }
#ifdef INET6
        case AF_INET6:
            {
                th->th_sum = 0;
                th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(struct ip6_hdr),
                    tlen);
                ip6->ip6_plen = htons(tlen);
                if (tp && tp->t_in6pcb)
                        ip6->ip6_hlim = in6_selecthlim_rt(tp->t_in6pcb);
                else
                        ip6->ip6_hlim = ip6_defhlim;
                ip6->ip6_flow &= ~IPV6_FLOWINFO_MASK;
                if (ip6_auto_flowlabel) {
                        ip6->ip6_flow |=
                            (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
                }
                break;
            }
#endif
        }

        if (tp != NULL && tp->t_inpcb != NULL) {
                ro = &tp->t_inpcb->inp_route;
                KASSERT(family == AF_INET);
                KASSERT(in_hosteq(ip->ip_dst, tp->t_inpcb->inp_faddr));
        }
#ifdef INET6
        else if (tp != NULL && tp->t_in6pcb != NULL) {
                ro = (struct route *)&tp->t_in6pcb->in6p_route;

#ifdef DIAGNOSTIC
                if (family == AF_INET) {
                        if (!IN6_IS_ADDR_V4MAPPED(&tp->t_in6pcb->in6p_faddr))
                                panic("tcp_respond: not mapped addr");
                        if (memcmp(&ip->ip_dst,
                            &tp->t_in6pcb->in6p_faddr.s6_addr32[3],
                            sizeof(ip->ip_dst)) != 0) {
                                panic("tcp_respond: ip_dst != in6p_faddr");
                        }
                } else if (family == AF_INET6) {
                        if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
                            &tp->t_in6pcb->in6p_faddr))
                                panic("tcp_respond: ip6_dst != in6p_faddr");
                } else
                        panic("tcp_respond: address family mismatch");
#endif
        }
#endif
        else
                ro = NULL;

        switch (family) {
        case AF_INET:
                error = ip_output(m, NULL, ro,
                    (tp && tp->t_mtudisc ? IP_MTUDISC : 0), NULL,
                    tp ? tp->t_inpcb : NULL);
                break;
#ifdef INET6
        case AF_INET6:
                error = ip6_output(m, NULL, ro, 0, NULL,
                    tp ? tp->t_in6pcb : NULL, NULL);
                break;
#endif
        default:
                error = EAFNOSUPPORT;
                break;
        }

        return error;
}

/*
 * Template TCPCB.  Rather than zeroing a new TCPCB and initializing
 * a bunch of members individually, we maintain this template for the
 * static and mostly-static components of the TCPCB, and copy it into
 * the new TCPCB instead.
 */
static struct tcpcb tcpcb_template = {
        .t_srtt = TCPTV_SRTTBASE,
        .t_rttmin = TCPTV_MIN,

        .snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT,
        .snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT,
        .snd_numholes = 0,
        .snd_cubic_wmax = 0,
        .snd_cubic_wmax_last = 0,
        .snd_cubic_ctime = 0,

        .t_partialacks = -1,
        .t_bytes_acked = 0,
        .t_sndrexmitpack = 0,
        .t_rcvoopack = 0,
        .t_sndzerowin = 0,
};

/*
 * Updates the TCPCB template whenever a parameter that would affect
 * the template is changed.
 */
void
tcp_tcpcb_template(void)
{
        struct tcpcb *tp = &tcpcb_template;
        int flags;

        tp->t_peermss = tcp_mssdflt;
        tp->t_ourmss = tcp_mssdflt;
        tp->t_segsz = tcp_mssdflt;

        flags = 0;
        if (tcp_do_rfc1323 && tcp_do_win_scale)
                flags |= TF_REQ_SCALE;
        if (tcp_do_rfc1323 && tcp_do_timestamps)
                flags |= TF_REQ_TSTMP;
        tp->t_flags = flags;

        /*
         * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
         * rtt estimate.  Set rttvar so that srtt + 2 * rttvar gives
         * reasonable initial retransmit time.
         */
        tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << (TCP_RTTVAR_SHIFT + 2 - 1);
        TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
            TCPTV_MIN, TCPTV_REXMTMAX);

        /* Keep Alive */
        tp->t_keepinit = MIN(tcp_keepinit, TCP_TIMER_MAXTICKS);
        tp->t_keepidle = MIN(tcp_keepidle, TCP_TIMER_MAXTICKS);
        tp->t_keepintvl = MIN(tcp_keepintvl, TCP_TIMER_MAXTICKS);
        tp->t_keepcnt = MAX(1, MIN(tcp_keepcnt, TCP_TIMER_MAXTICKS));
        tp->t_maxidle = tp->t_keepcnt * MIN(tp->t_keepintvl,
            TCP_TIMER_MAXTICKS/tp->t_keepcnt);

        /* MSL */
        tp->t_msl = TCPTV_MSL;
}

/*
 * Create a new TCP control block, making an
 * empty reassembly queue and hooking it to the argument
 * protocol control block.
 */
/* family selects inpcb, or in6pcb */
struct tcpcb *
tcp_newtcpcb(int family, void *aux)
{
        struct tcpcb *tp;
        int i;

        /* XXX Consider using a pool_cache for speed. */
        tp = pool_get(&tcpcb_pool, PR_NOWAIT);        /* splsoftnet via tcp_usrreq */
        if (tp == NULL)
                return NULL;
        memcpy(tp, &tcpcb_template, sizeof(*tp));
        TAILQ_INIT(&tp->segq);
        TAILQ_INIT(&tp->timeq);
        tp->t_family = family;                /* may be overridden later on */
        TAILQ_INIT(&tp->snd_holes);
        LIST_INIT(&tp->t_sc);                /* XXX can template this */

        /* Don't sweat this loop; hopefully the compiler will unroll it. */
        for (i = 0; i < TCPT_NTIMERS; i++) {
                callout_init(&tp->t_timer[i], CALLOUT_MPSAFE);
                TCP_TIMER_INIT(tp, i);
        }
        callout_init(&tp->t_delack_ch, CALLOUT_MPSAFE);

        switch (family) {
        case AF_INET:
            {
                struct inpcb *inp = (struct inpcb *)aux;

                inp->inp_ip.ip_ttl = ip_defttl;
                inp->inp_ppcb = (void *)tp;

                tp->t_inpcb = inp;
                tp->t_mtudisc = ip_mtudisc;
                break;
            }
#ifdef INET6
        case AF_INET6:
            {
                struct in6pcb *in6p = (struct in6pcb *)aux;

                in6p->in6p_ip6.ip6_hlim = in6_selecthlim_rt(in6p);
                in6p->in6p_ppcb = (void *)tp;

                tp->t_in6pcb = in6p;
                /* for IPv6, always try to run path MTU discovery */
                tp->t_mtudisc = 1;
                break;
            }
#endif /* INET6 */
        default:
                for (i = 0; i < TCPT_NTIMERS; i++)
                        callout_destroy(&tp->t_timer[i]);
                callout_destroy(&tp->t_delack_ch);
                pool_put(&tcpcb_pool, tp);        /* splsoftnet via tcp_usrreq */
                return NULL;
        }

        /*
         * Initialize our timebase.  When we send timestamps, we take
         * the delta from tcp_now -- this means each connection always
         * gets a timebase of 1, which makes it, among other things,
         * more difficult to determine how long a system has been up,
         * and thus how many TCP sequence increments have occurred.
         *
         * We start with 1, because 0 doesn't work with linux, which
         * considers timestamp 0 in a SYN packet as a bug and disables
         * timestamps.
         */
        tp->ts_timebase = tcp_now - 1;

        tcp_congctl_select(tp, tcp_congctl_global_name);

        return tp;
}

/*
 * Drop a TCP connection, reporting
 * the specified error.  If connection is synchronized,
 * then send a RST to peer.
 */
struct tcpcb *
tcp_drop(struct tcpcb *tp, int errno)
{
        struct socket *so = NULL;

        KASSERT(!(tp->t_inpcb && tp->t_in6pcb));

        if (tp->t_inpcb)
                so = tp->t_inpcb->inp_socket;
#ifdef INET6
        if (tp->t_in6pcb)
                so = tp->t_in6pcb->in6p_socket;
#endif
        if (!so)
                return NULL;

        if (TCPS_HAVERCVDSYN(tp->t_state)) {
                tp->t_state = TCPS_CLOSED;
                (void) tcp_output(tp);
                TCP_STATINC(TCP_STAT_DROPS);
        } else
                TCP_STATINC(TCP_STAT_CONNDROPS);
        if (errno == ETIMEDOUT && tp->t_softerror)
                errno = tp->t_softerror;
        so->so_error = errno;
        return (tcp_close(tp));
}

/*
 * Close a TCP control block:
 *        discard all space held by the tcp
 *        discard internet protocol block
 *        wake up any sleepers
 */
struct tcpcb *
tcp_close(struct tcpcb *tp)
{
        struct inpcb *inp;
#ifdef INET6
        struct in6pcb *in6p;
#endif
        struct socket *so;
#ifdef RTV_RTT
        struct rtentry *rt = NULL;
#endif
        struct route *ro;
        int j;

        inp = tp->t_inpcb;
#ifdef INET6
        in6p = tp->t_in6pcb;
#endif
        so = NULL;
        ro = NULL;
        if (inp) {
                so = inp->inp_socket;
                ro = &inp->inp_route;
        }
#ifdef INET6
        else if (in6p) {
                so = in6p->in6p_socket;
                ro = (struct route *)&in6p->in6p_route;
        }
#endif

#ifdef RTV_RTT
        /*
         * If we sent enough data to get some meaningful characteristics,
         * save them in the routing entry.  'Enough' is arbitrarily
         * defined as the sendpipesize (default 4K) * 16.  This would
         * give us 16 rtt samples assuming we only get one sample per
         * window (the usual case on a long haul net).  16 samples is
         * enough for the srtt filter to converge to within 5% of the correct
         * value; fewer samples and we could save a very bogus rtt.
         *
         * Don't update the default route's characteristics and don't
         * update anything that the user "locked".
         */
        if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) &&
            ro && (rt = rtcache_validate(ro)) != NULL &&
            !in_nullhost(satocsin(rt_getkey(rt))->sin_addr)) {
                u_long i = 0;

                if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
                        i = tp->t_srtt *
                            ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTT_SHIFT + 2));
                        if (rt->rt_rmx.rmx_rtt && i)
                                /*
                                 * filter this update to half the old & half
                                 * the new values, converting scale.
                                 * See route.h and tcp_var.h for a
                                 * description of the scaling constants.
                                 */
                                rt->rt_rmx.rmx_rtt =
                                    (rt->rt_rmx.rmx_rtt + i) / 2;
                        else
                                rt->rt_rmx.rmx_rtt = i;
                }
                if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
                        i = tp->t_rttvar *
                            ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTTVAR_SHIFT + 2));
                        if (rt->rt_rmx.rmx_rttvar && i)
                                rt->rt_rmx.rmx_rttvar =
                                    (rt->rt_rmx.rmx_rttvar + i) / 2;
                        else
                                rt->rt_rmx.rmx_rttvar = i;
                }
                /*
                 * update the pipelimit (ssthresh) if it has been updated
                 * already or if a pipesize was specified & the threshold
                 * got below half the pipesize.  I.e., wait for bad news
                 * before we start updating, then update on both good
                 * and bad news.
                 */
                if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
                    (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh) ||
                    i < (rt->rt_rmx.rmx_sendpipe / 2)) {
                        /*
                         * convert the limit from user data bytes to
                         * packets then to packet data bytes.
                         */
                        i = (i + tp->t_segsz / 2) / tp->t_segsz;
                        if (i < 2)
                                i = 2;
                        i *= (u_long)(tp->t_segsz + sizeof (struct tcpiphdr));
                        if (rt->rt_rmx.rmx_ssthresh)
                                rt->rt_rmx.rmx_ssthresh =
                                    (rt->rt_rmx.rmx_ssthresh + i) / 2;
                        else
                                rt->rt_rmx.rmx_ssthresh = i;
                }
        }
        rtcache_unref(rt, ro);
#endif /* RTV_RTT */
        /* free the reassembly queue, if any */
        TCP_REASS_LOCK(tp);
        (void) tcp_freeq(tp);
        TCP_REASS_UNLOCK(tp);

        /* free the SACK holes list. */
        tcp_free_sackholes(tp);
        tcp_congctl_release(tp);
        syn_cache_cleanup(tp);

        if (tp->t_template) {
                m_free(tp->t_template);
                tp->t_template = NULL;
        }

        /*
         * Detaching the pcb will unlock the socket/tcpcb, and stopping
         * the timers can also drop the lock.  We need to prevent access
         * to the tcpcb as it's half torn down.  Flag the pcb as dead
         * (prevents access by timers) and only then detach it.
         */
        tp->t_flags |= TF_DEAD;
        if (inp) {
                inp->inp_ppcb = 0;
                soisdisconnected(so);
                in_pcbdetach(inp);
        }
#ifdef INET6
        else if (in6p) {
                in6p->in6p_ppcb = 0;
                soisdisconnected(so);
                in6_pcbdetach(in6p);
        }
#endif
        /*
         * pcb is no longer visble elsewhere, so we can safely release
         * the lock in callout_halt() if needed.
         */
        TCP_STATINC(TCP_STAT_CLOSED);
        for (j = 0; j < TCPT_NTIMERS; j++) {
                callout_halt(&tp->t_timer[j], softnet_lock);
                callout_destroy(&tp->t_timer[j]);
        }
        callout_halt(&tp->t_delack_ch, softnet_lock);
        callout_destroy(&tp->t_delack_ch);
        pool_put(&tcpcb_pool, tp);

        return NULL;
}

int
tcp_freeq(struct tcpcb *tp)
{
        struct ipqent *qe;
        int rv = 0;

        TCP_REASS_LOCK_CHECK(tp);

        while ((qe = TAILQ_FIRST(&tp->segq)) != NULL) {
                TAILQ_REMOVE(&tp->segq, qe, ipqe_q);
                TAILQ_REMOVE(&tp->timeq, qe, ipqe_timeq);
                m_freem(qe->ipqe_m);
                tcpipqent_free(qe);
                rv = 1;
        }
        tp->t_segqlen = 0;
        KASSERT(TAILQ_EMPTY(&tp->timeq));
        return (rv);
}

void
tcp_fasttimo(void)
{
        if (tcp_drainwanted) {
                tcp_drain();
                tcp_drainwanted = 0;
        }
}

void
tcp_drainstub(void)
{
        tcp_drainwanted = 1;
}

/*
 * Protocol drain routine.  Called when memory is in short supply.
 * Called from pr_fasttimo thus a callout context.
 */
void
tcp_drain(void)
{
        struct inpcb_hdr *inph;
        struct tcpcb *tp;

        mutex_enter(softnet_lock);
        KERNEL_LOCK(1, NULL);

        /*
         * Free the sequence queue of all TCP connections.
         */
        TAILQ_FOREACH(inph, &tcbtable.inpt_queue, inph_queue) {
                switch (inph->inph_af) {
                case AF_INET:
                        tp = intotcpcb((struct inpcb *)inph);
                        break;
#ifdef INET6
                case AF_INET6:
                        tp = in6totcpcb((struct in6pcb *)inph);
                        break;
#endif
                default:
                        tp = NULL;
                        break;
                }
                if (tp != NULL) {
                        /*
                         * If the tcpcb is already busy,
                         * just bail out now.
                         */
                        if (tcp_reass_lock_try(tp) == 0)
                                continue;
                        if (tcp_freeq(tp))
                                TCP_STATINC(TCP_STAT_CONNSDRAINED);
                        TCP_REASS_UNLOCK(tp);
                }
        }

        KERNEL_UNLOCK_ONE(NULL);
        mutex_exit(softnet_lock);
}

/*
 * Notify a tcp user of an asynchronous error;
 * store error as soft error, but wake up user
 * (for now, won't do anything until can select for soft error).
 */
void
tcp_notify(struct inpcb *inp, int error)
{
        struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
        struct socket *so = inp->inp_socket;

        /*
         * Ignore some errors if we are hooked up.
         * If connection hasn't completed, has retransmitted several times,
         * and receives a second error, give up now.  This is better
         * than waiting a long time to establish a connection that
         * can never complete.
         */
        if (tp->t_state == TCPS_ESTABLISHED &&
             (error == EHOSTUNREACH || error == ENETUNREACH ||
              error == EHOSTDOWN)) {
                return;
        } else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 &&
            tp->t_rxtshift > 3 && tp->t_softerror)
                so->so_error = error;
        else
                tp->t_softerror = error;
        cv_broadcast(&so->so_cv);
        sorwakeup(so);
        sowwakeup(so);
}

#ifdef INET6
void
tcp6_notify(struct in6pcb *in6p, int error)
{
        struct tcpcb *tp = (struct tcpcb *)in6p->in6p_ppcb;
        struct socket *so = in6p->in6p_socket;

        /*
         * Ignore some errors if we are hooked up.
         * If connection hasn't completed, has retransmitted several times,
         * and receives a second error, give up now.  This is better
         * than waiting a long time to establish a connection that
         * can never complete.
         */
        if (tp->t_state == TCPS_ESTABLISHED &&
             (error == EHOSTUNREACH || error == ENETUNREACH ||
              error == EHOSTDOWN)) {
                return;
        } else if (TCPS_HAVEESTABLISHED(tp->t_state) == 0 &&
            tp->t_rxtshift > 3 && tp->t_softerror)
                so->so_error = error;
        else
                tp->t_softerror = error;
        cv_broadcast(&so->so_cv);
        sorwakeup(so);
        sowwakeup(so);
}
#endif

#ifdef INET6
void *
tcp6_ctlinput(int cmd, const struct sockaddr *sa, void *d)
{
        struct tcphdr th;
        void (*notify)(struct in6pcb *, int) = tcp6_notify;
        int nmatch;
        struct ip6_hdr *ip6;
        const struct sockaddr_in6 *sa6_src = NULL;
        const struct sockaddr_in6 *sa6 = (const struct sockaddr_in6 *)sa;
        struct mbuf *m;
        int off;

        if (sa->sa_family != AF_INET6 ||
            sa->sa_len != sizeof(struct sockaddr_in6))
                return NULL;
        if ((unsigned)cmd >= PRC_NCMDS)
                return NULL;
        else if (cmd == PRC_QUENCH) {
                /*
                 * Don't honor ICMP Source Quench messages meant for
                 * TCP connections.
                 */
                return NULL;
        } else if (PRC_IS_REDIRECT(cmd))
                notify = in6_rtchange, d = NULL;
        else if (cmd == PRC_MSGSIZE)
                ; /* special code is present, see below */
        else if (cmd == PRC_HOSTDEAD)
                d = NULL;
        else if (inet6ctlerrmap[cmd] == 0)
                return NULL;

        /* if the parameter is from icmp6, decode it. */
        if (d != NULL) {
                struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d;
                m = ip6cp->ip6c_m;
                ip6 = ip6cp->ip6c_ip6;
                off = ip6cp->ip6c_off;
                sa6_src = ip6cp->ip6c_src;
        } else {
                m = NULL;
                ip6 = NULL;
                sa6_src = &sa6_any;
                off = 0;
        }

        if (ip6) {
                /* check if we can safely examine src and dst ports */
                if (m->m_pkthdr.len < off + sizeof(th)) {
                        if (cmd == PRC_MSGSIZE)
                                icmp6_mtudisc_update((struct ip6ctlparam *)d, 0);
                        return NULL;
                }

                memset(&th, 0, sizeof(th));
                m_copydata(m, off, sizeof(th), (void *)&th);

                if (cmd == PRC_MSGSIZE) {
                        int valid = 0;

                        /*
                         * Check to see if we have a valid TCP connection
                         * corresponding to the address in the ICMPv6 message
                         * payload.
                         */
                        if (in6_pcblookup_connect(&tcbtable, &sa6->sin6_addr,
                            th.th_dport,
                            (const struct in6_addr *)&sa6_src->sin6_addr,
                                                  th.th_sport, 0, 0))
                                valid++;

                        /*
                         * Depending on the value of "valid" and routing table
                         * size (mtudisc_{hi,lo}wat), we will:
                         * - recalcurate the new MTU and create the
                         *   corresponding routing entry, or
                         * - ignore the MTU change notification.
                         */
                        icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);

                        /*
                         * no need to call in6_pcbnotify, it should have been
                         * called via callback if necessary
                         */
                        return NULL;
                }

                nmatch = in6_pcbnotify(&tcbtable, sa, th.th_dport,
                    (const struct sockaddr *)sa6_src, th.th_sport, cmd, NULL, notify);
                if (nmatch == 0 && syn_cache_count &&
                    (inet6ctlerrmap[cmd] == EHOSTUNREACH ||
                     inet6ctlerrmap[cmd] == ENETUNREACH ||
                     inet6ctlerrmap[cmd] == EHOSTDOWN))
                        syn_cache_unreach((const struct sockaddr *)sa6_src,
                                          sa, &th);
        } else {
                (void) in6_pcbnotify(&tcbtable, sa, 0,
                    (const struct sockaddr *)sa6_src, 0, cmd, NULL, notify);
        }

        return NULL;
}
#endif

/* assumes that ip header and tcp header are contiguous on mbuf */
void *
tcp_ctlinput(int cmd, const struct sockaddr *sa, void *v)
{
        struct ip *ip = v;
        struct tcphdr *th;
        struct icmp *icp;
        extern const int inetctlerrmap[];
        void (*notify)(struct inpcb *, int) = tcp_notify;
        int errno;
        int nmatch;
        struct tcpcb *tp;
        u_int mtu;
        tcp_seq seq;
        struct inpcb *inp;
#ifdef INET6
        struct in6pcb *in6p;
        struct in6_addr src6, dst6;
#endif

        if (sa->sa_family != AF_INET ||
            sa->sa_len != sizeof(struct sockaddr_in))
                return NULL;
        if ((unsigned)cmd >= PRC_NCMDS)
                return NULL;
        errno = inetctlerrmap[cmd];
        if (cmd == PRC_QUENCH)
                /*
                 * Don't honor ICMP Source Quench messages meant for
                 * TCP connections.
                 */
                return NULL;
        else if (PRC_IS_REDIRECT(cmd))
                notify = in_rtchange, ip = 0;
        else if (cmd == PRC_MSGSIZE && ip && ip->ip_v == 4) {
                /*
                 * Check to see if we have a valid TCP connection
                 * corresponding to the address in the ICMP message
                 * payload.
                 *
                 * Boundary check is made in icmp_input(), with ICMP_ADVLENMIN.
                 */
                th = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
#ifdef INET6
                in6_in_2_v4mapin6(&ip->ip_src, &src6);
                in6_in_2_v4mapin6(&ip->ip_dst, &dst6);
#endif
                if ((inp = in_pcblookup_connect(&tcbtable, ip->ip_dst,
                    th->th_dport, ip->ip_src, th->th_sport, 0)) != NULL)
#ifdef INET6
                        in6p = NULL;
#else
                        ;
#endif
#ifdef INET6
                else if ((in6p = in6_pcblookup_connect(&tcbtable, &dst6,
                    th->th_dport, &src6, th->th_sport, 0, 0)) != NULL)
                        ;
#endif
                else
                        return NULL;

                /*
                 * Now that we've validated that we are actually communicating
                 * with the host indicated in the ICMP message, locate the
                 * ICMP header, recalculate the new MTU, and create the
                 * corresponding routing entry.
                 */
                icp = (struct icmp *)((char *)ip -
                    offsetof(struct icmp, icmp_ip));
                if (inp) {
                        if ((tp = intotcpcb(inp)) == NULL)
                                return NULL;
                }
#ifdef INET6
                else if (in6p) {
                        if ((tp = in6totcpcb(in6p)) == NULL)
                                return NULL;
                }
#endif
                else
                        return NULL;
                seq = ntohl(th->th_seq);
                if (SEQ_LT(seq, tp->snd_una) || SEQ_GT(seq, tp->snd_max))
                        return NULL;
                /*
                 * If the ICMP message advertises a Next-Hop MTU
                 * equal or larger than the maximum packet size we have
                 * ever sent, drop the message.
                 */
                mtu = (u_int)ntohs(icp->icmp_nextmtu);
                if (mtu >= tp->t_pmtud_mtu_sent)
                        return NULL;
                if (mtu >= tcp_hdrsz(tp) + tp->t_pmtud_mss_acked) {
                        /*
                         * Calculate new MTU, and create corresponding
                         * route (traditional PMTUD).
                         */
                        tp->t_flags &= ~TF_PMTUD_PEND;
                        icmp_mtudisc(icp, ip->ip_dst);
                } else {
                        /*
                         * Record the information got in the ICMP
                         * message; act on it later.
                         * If we had already recorded an ICMP message,
                         * replace the old one only if the new message
                         * refers to an older TCP segment
                         */
                        if (tp->t_flags & TF_PMTUD_PEND) {
                                if (SEQ_LT(tp->t_pmtud_th_seq, seq))
                                        return NULL;
                        } else
                                tp->t_flags |= TF_PMTUD_PEND;
                        tp->t_pmtud_th_seq = seq;
                        tp->t_pmtud_nextmtu = icp->icmp_nextmtu;
                        tp->t_pmtud_ip_len = icp->icmp_ip.ip_len;
                        tp->t_pmtud_ip_hl = icp->icmp_ip.ip_hl;
                }
                return NULL;
        } else if (cmd == PRC_HOSTDEAD)
                ip = 0;
        else if (errno == 0)
                return NULL;
        if (ip && ip->ip_v == 4 && sa->sa_family == AF_INET) {
                th = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2));
                nmatch = in_pcbnotify(&tcbtable, satocsin(sa)->sin_addr,
                    th->th_dport, ip->ip_src, th->th_sport, errno, notify);
                if (nmatch == 0 && syn_cache_count &&
                    (inetctlerrmap[cmd] == EHOSTUNREACH ||
                    inetctlerrmap[cmd] == ENETUNREACH ||
                    inetctlerrmap[cmd] == EHOSTDOWN)) {
                        struct sockaddr_in sin;
                        memset(&sin, 0, sizeof(sin));
                        sin.sin_len = sizeof(sin);
                        sin.sin_family = AF_INET;
                        sin.sin_port = th->th_sport;
                        sin.sin_addr = ip->ip_src;
                        syn_cache_unreach((struct sockaddr *)&sin, sa, th);
                }

                /* XXX mapped address case */
        } else
                in_pcbnotifyall(&tcbtable, satocsin(sa)->sin_addr, errno,
                    notify);
        return NULL;
}

/*
 * When a source quench is received, we are being notified of congestion.
 * Close the congestion window down to the Loss Window (one segment).
 * We will gradually open it again as we proceed.
 */
void
tcp_quench(struct inpcb *inp)
{
        struct tcpcb *tp = intotcpcb(inp);

        if (tp) {
                tp->snd_cwnd = tp->t_segsz;
                tp->t_bytes_acked = 0;
        }
}

#ifdef INET6
void
tcp6_quench(struct in6pcb *in6p)
{
        struct tcpcb *tp = in6totcpcb(in6p);

        if (tp) {
                tp->snd_cwnd = tp->t_segsz;
                tp->t_bytes_acked = 0;
        }
}
#endif

/*
 * Path MTU Discovery handlers.
 */
void
tcp_mtudisc_callback(struct in_addr faddr)
{
#ifdef INET6
        struct in6_addr in6;
#endif

        in_pcbnotifyall(&tcbtable, faddr, EMSGSIZE, tcp_mtudisc);
#ifdef INET6
        in6_in_2_v4mapin6(&faddr, &in6);
        tcp6_mtudisc_callback(&in6);
#endif
}

/*
 * On receipt of path MTU corrections, flush old route and replace it
 * with the new one.  Retransmit all unacknowledged packets, to ensure
 * that all packets will be received.
 */
void
tcp_mtudisc(struct inpcb *inp, int errno)
{
        struct tcpcb *tp = intotcpcb(inp);
        struct rtentry *rt;

        if (tp == NULL)
                return;

        rt = in_pcbrtentry(inp);
        if (rt != NULL) {
                /*
                 * If this was not a host route, remove and realloc.
                 */
                if ((rt->rt_flags & RTF_HOST) == 0) {
                        in_pcbrtentry_unref(rt, inp);
                        in_rtchange(inp, errno);
                        if ((rt = in_pcbrtentry(inp)) == NULL)
                                return;
                }

                /*
                 * Slow start out of the error condition.  We
                 * use the MTU because we know it's smaller
                 * than the previously transmitted segment.
                 *
                 * Note: This is more conservative than the
                 * suggestion in draft-floyd-incr-init-win-03.
                 */
                if (rt->rt_rmx.rmx_mtu != 0)
                        tp->snd_cwnd =
                            TCP_INITIAL_WINDOW(tcp_init_win,
                            rt->rt_rmx.rmx_mtu);
                in_pcbrtentry_unref(rt, inp);
        }

        /*
         * Resend unacknowledged packets.
         */
        tp->snd_nxt = tp->sack_newdata = tp->snd_una;
        tcp_output(tp);
}

#ifdef INET6
/*
 * Path MTU Discovery handlers.
 */
void
tcp6_mtudisc_callback(struct in6_addr *faddr)
{
        struct sockaddr_in6 sin6;

        memset(&sin6, 0, sizeof(sin6));
        sin6.sin6_family = AF_INET6;
        sin6.sin6_len = sizeof(struct sockaddr_in6);
        sin6.sin6_addr = *faddr;
        (void) in6_pcbnotify(&tcbtable, (struct sockaddr *)&sin6, 0,
            (const struct sockaddr *)&sa6_any, 0, PRC_MSGSIZE, NULL, tcp6_mtudisc);
}

void
tcp6_mtudisc(struct in6pcb *in6p, int errno)
{
        struct tcpcb *tp = in6totcpcb(in6p);
        struct rtentry *rt;

        if (tp == NULL)
                return;

        rt = in6_pcbrtentry(in6p);
        if (rt != NULL) {
                /*
                 * If this was not a host route, remove and realloc.
                 */
                if ((rt->rt_flags & RTF_HOST) == 0) {
                        in6_pcbrtentry_unref(rt, in6p);
                        in6_rtchange(in6p, errno);
                        rt = in6_pcbrtentry(in6p);
                        if (rt == NULL)
                                return;
                }

                /*
                 * Slow start out of the error condition.  We
                 * use the MTU because we know it's smaller
                 * than the previously transmitted segment.
                 *
                 * Note: This is more conservative than the
                 * suggestion in draft-floyd-incr-init-win-03.
                 */
                if (rt->rt_rmx.rmx_mtu != 0) {
                        tp->snd_cwnd = TCP_INITIAL_WINDOW(tcp_init_win,
                            rt->rt_rmx.rmx_mtu);
                }
                in6_pcbrtentry_unref(rt, in6p);
        }

        /*
         * Resend unacknowledged packets.
         */
        tp->snd_nxt = tp->sack_newdata = tp->snd_una;
        tcp_output(tp);
}
#endif /* INET6 */

/*
 * Compute the MSS to advertise to the peer.  Called only during
 * the 3-way handshake.  If we are the server (peer initiated
 * connection), we are called with a pointer to the interface
 * on which the SYN packet arrived.  If we are the client (we
 * initiated connection), we are called with a pointer to the
 * interface out which this connection should go.
 *
 * NOTE: Do not subtract IP option/extension header size nor IPsec
 * header size from MSS advertisement.  MSS option must hold the maximum
 * segment size we can accept, so it must always be:
 *         max(if mtu) - ip header - tcp header
 */
u_long
tcp_mss_to_advertise(const struct ifnet *ifp, int af)
{
        extern u_long in_maxmtu;
        u_long mss = 0;
        u_long hdrsiz;

        /*
         * In order to avoid defeating path MTU discovery on the peer,
         * we advertise the max MTU of all attached networks as our MSS,
         * per RFC 1191, section 3.1.
         *
         * We provide the option to advertise just the MTU of
         * the interface on which we hope this connection will
         * be receiving.  If we are responding to a SYN, we
         * will have a pretty good idea about this, but when
         * initiating a connection there is a bit more doubt.
         *
         * We also need to ensure that loopback has a large enough
         * MSS, as the loopback MTU is never included in in_maxmtu.
         */

        if (ifp != NULL)
                switch (af) {
#ifdef INET6
                case AF_INET6:        /* FALLTHROUGH */
#endif
                case AF_INET:
                        mss = ifp->if_mtu;
                        break;
                }

        if (tcp_mss_ifmtu == 0)
                switch (af) {
#ifdef INET6
                case AF_INET6:        /* FALLTHROUGH */
#endif
                case AF_INET:
                        mss = uimax(in_maxmtu, mss);
                        break;
                }

        switch (af) {
        case AF_INET:
                hdrsiz = sizeof(struct ip);
                break;
#ifdef INET6
        case AF_INET6:
                hdrsiz = sizeof(struct ip6_hdr);
                break;
#endif
        default:
                hdrsiz = 0;
                break;
        }
        hdrsiz += sizeof(struct tcphdr);
        if (mss > hdrsiz)
                mss -= hdrsiz;

        mss = uimax(tcp_mssdflt, mss);
        return (mss);
}

/*
 * Set connection variables based on the peer's advertised MSS.
 * We are passed the TCPCB for the actual connection.  If we
 * are the server, we are called by the compressed state engine
 * when the 3-way handshake is complete.  If we are the client,
 * we are called when we receive the SYN,ACK from the server.
 *
 * NOTE: Our advertised MSS value must be initialized in the TCPCB
 * before this routine is called!
 */
void
tcp_mss_from_peer(struct tcpcb *tp, int offer)
{
        struct socket *so;
#if defined(RTV_SPIPE) || defined(RTV_SSTHRESH)
        struct rtentry *rt;
#endif
        u_long bufsize;
        int mss;

        KASSERT(!(tp->t_inpcb && tp->t_in6pcb));

        so = NULL;
        rt = NULL;

        if (tp->t_inpcb) {
                so = tp->t_inpcb->inp_socket;
#if defined(RTV_SPIPE) || defined(RTV_SSTHRESH)
                rt = in_pcbrtentry(tp->t_inpcb);
#endif
        }

#ifdef INET6
        if (tp->t_in6pcb) {
                so = tp->t_in6pcb->in6p_socket;
#if defined(RTV_SPIPE) || defined(RTV_SSTHRESH)
                rt = in6_pcbrtentry(tp->t_in6pcb);
#endif
        }
#endif

        /*
         * As per RFC1122, use the default MSS value, unless they
         * sent us an offer.  Do not accept offers less than 256 bytes.
         */
        mss = tcp_mssdflt;
        if (offer)
                mss = offer;
        mss = uimax(mss, 256);                /* sanity */
        tp->t_peermss = mss;
        mss -= tcp_optlen(tp);
        if (tp->t_inpcb)
                mss -= ip_optlen(tp->t_inpcb);
#ifdef INET6
        if (tp->t_in6pcb)
                mss -= ip6_optlen(tp->t_in6pcb);
#endif
        /*
         * XXX XXX What if mss goes negative or zero? This can happen if a
         * socket has large IPv6 options. We crash below.
         */

        /*
         * If there's a pipesize, change the socket buffer to that size.
         * Make the socket buffer an integral number of MSS units.  If
         * the MSS is larger than the socket buffer, artificially decrease
         * the MSS.
         */
#ifdef RTV_SPIPE
        if (rt != NULL && rt->rt_rmx.rmx_sendpipe != 0)
                bufsize = rt->rt_rmx.rmx_sendpipe;
        else
#endif
        {
                KASSERT(so != NULL);
                bufsize = so->so_snd.sb_hiwat;
        }
        if (bufsize < mss)
                mss = bufsize;
        else {
                bufsize = roundup(bufsize, mss);
                if (bufsize > sb_max)
                        bufsize = sb_max;
                (void) sbreserve(&so->so_snd, bufsize, so);
        }
        tp->t_segsz = mss;

#ifdef RTV_SSTHRESH
        if (rt != NULL && rt->rt_rmx.rmx_ssthresh) {
                /*
                 * There's some sort of gateway or interface buffer
                 * limit on the path.  Use this to set the slow
                 * start threshold, but set the threshold to no less
                 * than 2 * MSS.
                 */
                tp->snd_ssthresh = uimax(2 * mss, rt->rt_rmx.rmx_ssthresh);
        }
#endif
#if defined(RTV_SPIPE) || defined(RTV_SSTHRESH)
        if (tp->t_inpcb)
                in_pcbrtentry_unref(rt, tp->t_inpcb);
#ifdef INET6
        if (tp->t_in6pcb)
                in6_pcbrtentry_unref(rt, tp->t_in6pcb);
#endif
#endif
}

/*
 * Processing necessary when a TCP connection is established.
 */
void
tcp_established(struct tcpcb *tp)
{
        struct socket *so;
#ifdef RTV_RPIPE
        struct rtentry *rt;
#endif
        u_long bufsize;

        KASSERT(!(tp->t_inpcb && tp->t_in6pcb));

        so = NULL;
        rt = NULL;

        /* This is a while() to reduce the dreadful stairstepping below */
        while (tp->t_inpcb) {
                so = tp->t_inpcb->inp_socket;
#if defined(RTV_RPIPE)
                rt = in_pcbrtentry(tp->t_inpcb);
#endif
                if (__predict_true(tcp_msl_enable)) {
                        if (tp->t_inpcb->inp_laddr.s_addr == INADDR_LOOPBACK) {
                                tp->t_msl = tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2);
                                break;
                        }

                        if (__predict_false(tcp_rttlocal)) {
                                /* This may be adjusted by tcp_input */
                                tp->t_msl = tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
                                break;
                        }
                        if (in_localaddr(tp->t_inpcb->inp_faddr)) {
                                tp->t_msl = tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
                                break;
                        }
                }
                tp->t_msl = tcp_msl_remote ? tcp_msl_remote : TCPTV_MSL;
                break;
        }

        /* Clamp to a reasonable range.  */
        tp->t_msl = MIN(tp->t_msl, TCP_MAXMSL);

#ifdef INET6
        /* The !tp->t_inpcb lets the compiler know it can't be v4 *and* v6 */
        while (!tp->t_inpcb && tp->t_in6pcb) {
                so = tp->t_in6pcb->in6p_socket;
#if defined(RTV_RPIPE)
                rt = in6_pcbrtentry(tp->t_in6pcb);
#endif
                if (__predict_true(tcp_msl_enable)) {
                        extern const struct in6_addr in6addr_loopback;

                        if (IN6_ARE_ADDR_EQUAL(&tp->t_in6pcb->in6p_laddr,
                            &in6addr_loopback)) {
                                tp->t_msl = tcp_msl_loop ? tcp_msl_loop : (TCPTV_MSL >> 2);
                                break;
                        }

                        if (__predict_false(tcp_rttlocal)) {
                                /* This may be adjusted by tcp_input */
                                tp->t_msl = tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
                                break;
                        }
                        if (in6_localaddr(&tp->t_in6pcb->in6p_faddr)) {
                                tp->t_msl = tcp_msl_local ? tcp_msl_local : (TCPTV_MSL >> 1);
                                break;
                        }
                }
                tp->t_msl = tcp_msl_remote ? tcp_msl_remote : TCPTV_MSL;
                break;
        }

        /* Clamp to a reasonable range.  */
        tp->t_msl = MIN(tp->t_msl, TCP_MAXMSL);
#endif

        tp->t_state = TCPS_ESTABLISHED;
        TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle);

#ifdef RTV_RPIPE
        if (rt != NULL && rt->rt_rmx.rmx_recvpipe != 0)
                bufsize = rt->rt_rmx.rmx_recvpipe;
        else
#endif
        {
                KASSERT(so != NULL);
                bufsize = so->so_rcv.sb_hiwat;
        }
        if (bufsize > tp->t_ourmss) {
                bufsize = roundup(bufsize, tp->t_ourmss);
                if (bufsize > sb_max)
                        bufsize = sb_max;
                (void) sbreserve(&so->so_rcv, bufsize, so);
        }
#ifdef RTV_RPIPE
        if (tp->t_inpcb)
                in_pcbrtentry_unref(rt, tp->t_inpcb);
#ifdef INET6
        if (tp->t_in6pcb)
                in6_pcbrtentry_unref(rt, tp->t_in6pcb);
#endif
#endif
}

/*
 * Check if there's an initial rtt or rttvar.  Convert from the
 * route-table units to scaled multiples of the slow timeout timer.
 * Called only during the 3-way handshake.
 */
void
tcp_rmx_rtt(struct tcpcb *tp)
{
#ifdef RTV_RTT
        struct rtentry *rt = NULL;
        int rtt;

        KASSERT(!(tp->t_inpcb && tp->t_in6pcb));

        if (tp->t_inpcb)
                rt = in_pcbrtentry(tp->t_inpcb);
#ifdef INET6
        if (tp->t_in6pcb)
                rt = in6_pcbrtentry(tp->t_in6pcb);
#endif
        if (rt == NULL)
                return;

        if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
                /*
                 * XXX The lock bit for MTU indicates that the value
                 * is also a minimum value; this is subject to time.
                 */
                if (rt->rt_rmx.rmx_locks & RTV_RTT)
                        TCPT_RANGESET(tp->t_rttmin,
                            rtt / (RTM_RTTUNIT / PR_SLOWHZ),
                            TCPTV_MIN, TCPTV_REXMTMAX);
                tp->t_srtt = rtt /
                    ((RTM_RTTUNIT / PR_SLOWHZ) >> (TCP_RTT_SHIFT + 2));
                if (rt->rt_rmx.rmx_rttvar) {
                        tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
                            ((RTM_RTTUNIT / PR_SLOWHZ) >>
                                (TCP_RTTVAR_SHIFT + 2));
                } else {
                        /* Default variation is +- 1 rtt */
                        tp->t_rttvar =
                            tp->t_srtt >> (TCP_RTT_SHIFT - TCP_RTTVAR_SHIFT);
                }
                TCPT_RANGESET(tp->t_rxtcur,
                    ((tp->t_srtt >> 2) + tp->t_rttvar) >> (1 + 2),
                    tp->t_rttmin, TCPTV_REXMTMAX);
        }
        if (tp->t_inpcb)
                in_pcbrtentry_unref(rt, tp->t_inpcb);
#ifdef INET6
        if (tp->t_in6pcb)
                in6_pcbrtentry_unref(rt, tp->t_in6pcb);
#endif
#endif
}

tcp_seq         tcp_iss_seq = 0;        /* tcp initial seq # */

/*
 * Get a new sequence value given a tcp control block
 */
tcp_seq
tcp_new_iss(struct tcpcb *tp)
{

        if (tp->t_inpcb != NULL) {
                return tcp_new_iss1(&tp->t_inpcb->inp_laddr,
                    &tp->t_inpcb->inp_faddr, tp->t_inpcb->inp_lport,
                    tp->t_inpcb->inp_fport, sizeof(tp->t_inpcb->inp_laddr));
        }
#ifdef INET6
        if (tp->t_in6pcb != NULL) {
                return tcp_new_iss1(&tp->t_in6pcb->in6p_laddr,
                    &tp->t_in6pcb->in6p_faddr, tp->t_in6pcb->in6p_lport,
                    tp->t_in6pcb->in6p_fport, sizeof(tp->t_in6pcb->in6p_laddr));
        }
#endif

        panic("tcp_new_iss: unreachable");
}

static u_int8_t tcp_iss_secret[16];        /* 128 bits; should be plenty */

/*
 * Initialize RFC 1948 ISS Secret
 */
static int
tcp_iss_secret_init(void)
{
        cprng_strong(kern_cprng,
            tcp_iss_secret, sizeof(tcp_iss_secret), 0);

        return 0;
}

/*
 * This routine actually generates a new TCP initial sequence number.
 */
tcp_seq
tcp_new_iss1(void *laddr, void *faddr, u_int16_t lport, u_int16_t fport,
    size_t addrsz)
{
        tcp_seq tcp_iss;

        if (tcp_do_rfc1948) {
                MD5_CTX ctx;
                u_int8_t hash[16];        /* XXX MD5 knowledge */
                static ONCE_DECL(tcp_iss_secret_control);

                /*
                 * If we haven't been here before, initialize our cryptographic
                 * hash secret.
                 */
                RUN_ONCE(&tcp_iss_secret_control, tcp_iss_secret_init);

                /*
                 * Compute the base value of the ISS.  It is a hash
                 * of (saddr, sport, daddr, dport, secret).
                 */
                MD5Init(&ctx);

                MD5Update(&ctx, (u_char *) laddr, addrsz);
                MD5Update(&ctx, (u_char *) &lport, sizeof(lport));

                MD5Update(&ctx, (u_char *) faddr, addrsz);
                MD5Update(&ctx, (u_char *) &fport, sizeof(fport));

                MD5Update(&ctx, tcp_iss_secret, sizeof(tcp_iss_secret));

                MD5Final(hash, &ctx);

                memcpy(&tcp_iss, hash, sizeof(tcp_iss));

#ifdef TCPISS_DEBUG
                printf("ISS hash 0x%08x, ", tcp_iss);
#endif
        } else {
                /*
                 * Randomize.
                 */
                tcp_iss = cprng_fast32() & TCP_ISS_RANDOM_MASK;
#ifdef TCPISS_DEBUG
                printf("ISS random 0x%08x, ", tcp_iss);
#endif
        }

        /*
         * Add the offset in to the computed value.
         */
        tcp_iss += tcp_iss_seq;
#ifdef TCPISS_DEBUG
        printf("ISS %08x\n", tcp_iss);
#endif
        return tcp_iss;
}

#if defined(IPSEC)
/* compute ESP/AH header size for TCP, including outer IP header. */
size_t
ipsec4_hdrsiz_tcp(struct tcpcb *tp)
{
        struct inpcb *inp;
        size_t hdrsiz;

        /* XXX mapped addr case (tp->t_in6pcb) */
        if (!tp || !tp->t_template || !(inp = tp->t_inpcb))
                return 0;
        switch (tp->t_family) {
        case AF_INET:
                /* XXX: should use correct direction. */
                hdrsiz = ipsec_hdrsiz(tp->t_template, IPSEC_DIR_OUTBOUND, inp);
                break;
        default:
                hdrsiz = 0;
                break;
        }

        return hdrsiz;
}

#ifdef INET6
size_t
ipsec6_hdrsiz_tcp(struct tcpcb *tp)
{
        struct in6pcb *in6p;
        size_t hdrsiz;

        if (!tp || !tp->t_template || !(in6p = tp->t_in6pcb))
                return 0;
        switch (tp->t_family) {
        case AF_INET6:
                /* XXX: should use correct direction. */
                hdrsiz = ipsec_hdrsiz(tp->t_template, IPSEC_DIR_OUTBOUND, in6p);
                break;
        case AF_INET:
                /* mapped address case - tricky */
        default:
                hdrsiz = 0;
                break;
        }

        return hdrsiz;
}
#endif
#endif /*IPSEC*/

/*
 * Determine the length of the TCP options for this connection.
 *
 * XXX:  What do we do for SACK, when we add that?  Just reserve
 *       all of the space?  Otherwise we can't exactly be incrementing
 *       cwnd by an amount that varies depending on the amount we last
 *       had to SACK!
 */

u_int
tcp_optlen(struct tcpcb *tp)
{
        u_int optlen;

        optlen = 0;
        if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
            (TF_REQ_TSTMP | TF_RCVD_TSTMP))
                optlen += TCPOLEN_TSTAMP_APPA;

#ifdef TCP_SIGNATURE
        if (tp->t_flags & TF_SIGNATURE)
                optlen += TCPOLEN_SIGLEN;
#endif

        return optlen;
}

u_int
tcp_hdrsz(struct tcpcb *tp)
{
        u_int hlen;

        switch (tp->t_family) {
#ifdef INET6
        case AF_INET6:
                hlen = sizeof(struct ip6_hdr);
                break;
#endif
        case AF_INET:
                hlen = sizeof(struct ip);
                break;
        default:
                hlen = 0;
                break;
        }
        hlen += sizeof(struct tcphdr);

        if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
            (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
                hlen += TCPOLEN_TSTAMP_APPA;
#ifdef TCP_SIGNATURE
        if (tp->t_flags & TF_SIGNATURE)
                hlen += TCPOLEN_SIGLEN;
#endif
        return hlen;
}

void
tcp_statinc(u_int stat)
{

        KASSERT(stat < TCP_NSTATS);
        TCP_STATINC(stat);
}

void
tcp_statadd(u_int stat, uint64_t val)
{

        KASSERT(stat < TCP_NSTATS);
        TCP_STATADD(stat, val);
}










































































    3 




















    1 











   53 
   42 

    1 









   52 
    7 

















   48 
   46 
   42 










   39 
    6 




































   69 



   11 








    2 


    1 


    1 







   68 























   31 







   31 




    9 

    9 




    9 





   31 





   53 
   43 

   53 












   54 











   54 
   42 
    1 





   40 
   39 




   53 



    1 



   43 



    1 











   53 
   43 

   54 


































   26 
    9 




   26 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
/*        $NetBSD: scope6.c,v 1.23 2020/06/16 17:12:18 maxv Exp $        */
/*        $KAME$        */

/*
 * Copyright (C) 2000 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: scope6.c,v 1.23 2020/06/16 17:12:18 maxv Exp $");

#include <sys/param.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/systm.h>
#include <sys/queue.h>
#include <sys/syslog.h>

#include <net/if.h>

#include <netinet/in.h>

#include <netinet6/in6_var.h>
#include <netinet6/scope6_var.h>

#ifdef ENABLE_DEFAULT_SCOPE
int ip6_use_defzone = 1;
#else
int ip6_use_defzone = 0;
#endif

static struct scope6_id sid_default;
#define SID(ifp) \
    ((ifp)->if_afdata[AF_INET6] == NULL ? NULL : \
        ((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->scope6_id)

void
scope6_init(void)
{

        memset(&sid_default, 0, sizeof(sid_default));
}

struct scope6_id *
scope6_ifattach(struct ifnet *ifp)
{
        struct scope6_id *sid;

        sid = malloc(sizeof(*sid), M_IFADDR, M_WAITOK | M_ZERO);

        /*
         * XXX: IPV6_ADDR_SCOPE_xxx macros are not standard.
         * Should we rather hardcode here?
         */
        sid->s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL] = ifp->if_index;
        sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL] = ifp->if_index;
#ifdef MULTI_SCOPE
        /* by default, we don't care about scope boundary for these scopes. */
        sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL] = 1;
        sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL] = 1;
#endif

        return sid;
}

void
scope6_ifdetach(struct scope6_id *sid)
{

        free(sid, M_IFADDR);
}

/*
 * Get a scope of the address. Interface-local, link-local, site-local
 * or global.
 */
int
in6_addrscope(const struct in6_addr *addr)
{
        int scope;

        if (addr->s6_addr[0] == 0xfe) {
                scope = addr->s6_addr[1] & 0xc0;

                switch (scope) {
                case 0x80:
                        return IPV6_ADDR_SCOPE_LINKLOCAL;
                case 0xc0:
                        return IPV6_ADDR_SCOPE_SITELOCAL;
                default:
                        return IPV6_ADDR_SCOPE_GLOBAL; /* just in case */
                }
        }

        if (addr->s6_addr[0] == 0xff) {
                scope = addr->s6_addr[1] & 0x0f;

                /*
                 * due to other scope such as reserved,
                 * return scope doesn't work.
                 */
                switch (scope) {
                case IPV6_ADDR_SCOPE_INTFACELOCAL:
                        return IPV6_ADDR_SCOPE_INTFACELOCAL;
                case IPV6_ADDR_SCOPE_LINKLOCAL:
                        return IPV6_ADDR_SCOPE_LINKLOCAL;
                case IPV6_ADDR_SCOPE_SITELOCAL:
                        return IPV6_ADDR_SCOPE_SITELOCAL;
                default:
                        return IPV6_ADDR_SCOPE_GLOBAL;
                }
        }

        if (memcmp(&in6addr_loopback, addr, sizeof(*addr) - 1) == 0) {
                if (addr->s6_addr[15] == 1) /* loopback */
                        return IPV6_ADDR_SCOPE_LINKLOCAL;
                if (addr->s6_addr[15] == 0) {
                        /*
                         * Regard the unspecified addresses as global,
                         * since it has no ambiguity.
                         * XXX: not sure if it's correct...
                         */
                        return IPV6_ADDR_SCOPE_GLOBAL;
                }
        }

        return IPV6_ADDR_SCOPE_GLOBAL;
}

uint32_t
scope6_addr2default(const struct in6_addr *addr)
{
        uint32_t id;

        /*
         * special case: The loopback address should be considered as
         * link-local, but there's no ambiguity in the syntax.
         */
        if (IN6_IS_ADDR_LOOPBACK(addr))
                return 0;

        /*
         * XXX: 32-bit read is atomic on all our platforms, is it OK
         * not to lock here?
         */
        id = sid_default.s6id_list[in6_addrscope(addr)];

        return id;
}

/*
 * Validate the specified scope zone ID in the sin6_scope_id field.  If the ID
 * is unspecified (=0), needs to be specified, and the default zone ID can be
 * used, the default value will be used.
 * This routine then generates the kernel-internal form: if the address scope
 * of is interface-local or link-local, embed the interface index in the
 * address.
 */
int
sa6_embedscope(struct sockaddr_in6 *sin6, int defaultok)
{
        struct ifnet *ifp;
        uint32_t zoneid;

        if ((zoneid = sin6->sin6_scope_id) == 0 && defaultok)
                zoneid = scope6_addr2default(&sin6->sin6_addr);

        if (zoneid != 0 &&
            (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) ||
            IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr))) {
                int s;
                /*
                 * At this moment, we only check interface-local and
                 * link-local scope IDs, and use interface indices as the
                 * zone IDs assuming a one-to-one mapping between interfaces
                 * and links.
                 */
                s = pserialize_read_enter();
                ifp = if_byindex(zoneid);
                if (ifp == NULL) {
                        pserialize_read_exit(s);
                        return ENXIO;
                }
                pserialize_read_exit(s);

                /* XXX assignment to 16bit from 32bit variable */
                sin6->sin6_addr.s6_addr16[1] = htons(zoneid & 0xffff);

                sin6->sin6_scope_id = 0;
        }

        return 0;
}

struct sockaddr *
sockaddr_in6_externalize(struct sockaddr *dst, socklen_t socklen,
    const struct sockaddr *src)
{
        struct sockaddr_in6 *sin6;

        sin6 = satosin6(sockaddr_copy(dst, socklen, src));

        if (sin6 == NULL || sa6_recoverscope(sin6) != 0)
                return NULL;

        return dst;
}

/*
 * generate standard sockaddr_in6 from embedded form.
 */
int
sa6_recoverscope(struct sockaddr_in6 *sin6)
{
        uint32_t zoneid;
        char ip6buf[INET6_ADDRSTRLEN];

        if (sin6->sin6_scope_id != 0) {
                log(LOG_NOTICE,
                    "%s: assumption failure (non 0 ID): %s%%%d\n", __func__,
                    IN6_PRINT(ip6buf, &sin6->sin6_addr), sin6->sin6_scope_id);
                /* XXX: proceed anyway... */
        }
        if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) ||
            IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr)) {
                /*
                 * KAME assumption: link id == interface id
                 */
                zoneid = ntohs(sin6->sin6_addr.s6_addr16[1]);
                if (zoneid) {
                        int s = pserialize_read_enter();
                        if (!if_byindex(zoneid)) {
                                pserialize_read_exit(s);
                                return ENXIO;
                        }
                        pserialize_read_exit(s);
                        sin6->sin6_addr.s6_addr16[1] = 0;
                        sin6->sin6_scope_id = zoneid;
                }
        }

        return 0;
}

int
in6_setzoneid(struct in6_addr *in6, uint32_t zoneid)
{
        if (IN6_IS_SCOPE_EMBEDDABLE(in6))
                in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */

        return 0;
}

/*
 * Determine the appropriate scope zone ID for in6 and ifp.  If ret_id is
 * non NULL, it is set to the zone ID.  If the zone ID needs to be embedded
 * in the in6_addr structure, in6 will be modified.
 */
int
in6_setscope(struct in6_addr *in6, const struct ifnet *ifp, uint32_t *ret_id)
{
        int scope;
        uint32_t zoneid = 0;
        const struct scope6_id *sid = SID(ifp);

        if (sid == NULL) {
                log(LOG_NOTICE, "%s: no scope id for %s\n", __func__,
                    if_name(ifp));
                return EINVAL;
        }

        /*
         * special case: the loopback address can only belong to a loopback
         * interface.
         */
        if (IN6_IS_ADDR_LOOPBACK(in6)) {
                if (!(ifp->if_flags & IFF_LOOPBACK)) {
                        char ip6buf[INET6_ADDRSTRLEN];
                        log(LOG_NOTICE, "%s: can't set scope for not loopback "
                            "interface %s and loopback address %s\n",
                            __func__, if_name(ifp), IN6_PRINT(ip6buf, in6));
                        return EINVAL;
                } else {
                        if (ret_id != NULL)
                                *ret_id = 0; /* there's no ambiguity */
                        return 0;
                }
        }

        scope = in6_addrscope(in6);

        switch (scope) {
        case IPV6_ADDR_SCOPE_INTFACELOCAL: /* should be interface index */
                zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_INTFACELOCAL];
                break;

        case IPV6_ADDR_SCOPE_LINKLOCAL:
                zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL];
                break;

        case IPV6_ADDR_SCOPE_SITELOCAL:
                zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL];
                break;

        case IPV6_ADDR_SCOPE_ORGLOCAL:
                zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL];
                break;

        default:
                zoneid = 0;        /* XXX: treat as global. */
                break;
        }

        if (ret_id != NULL)
                *ret_id = zoneid;

        return in6_setzoneid(in6, zoneid);
}

const char *
in6_getscopename(const struct in6_addr *addr)
{
        switch (in6_addrscope(addr)) {
        case IPV6_ADDR_SCOPE_INTFACELOCAL:
                return "interface";
#if IPV6_ADDR_SCOPE_INTFACELOCAL != IPV6_ADDR_SCOPE_NODELOCAL
        case IPV6_ADDR_SCOPE_NODELOCAL:
                return "node";
#endif
        case IPV6_ADDR_SCOPE_LINKLOCAL:
                return "link";
        case IPV6_ADDR_SCOPE_SITELOCAL:
                return "site";
        case IPV6_ADDR_SCOPE_ORGLOCAL:
                return "organization";
        case IPV6_ADDR_SCOPE_GLOBAL:
                return "global";
        default:
                return "unknown";
        }
}

/*
 * Just clear the embedded scope identifier.  Return 0 if the original address
 * is intact; return non 0 if the address is modified.
 */
int
in6_clearscope(struct in6_addr *in6)
{
        int modified = 0;

        if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) {
                if (in6->s6_addr16[1] != 0)
                        modified = 1;
                in6->s6_addr16[1] = 0;
        }

        return modified;
}


































































































  167 




  159 


  161 


























































































































   24 














































































































































































































































































































  167 










  168 






  168 
  106 




  168 





  168 













































    1 























































  160 

   10 
   10 


























  168 
  157 
  157 








   13 














   26 









   11 
    1 












   16 
   16 
   16 













  149 
  141 

    8 
  149 






  146 
  103 





  148 









   21 






   21 



   21 
   16 





   16 









   15 


   15 
   15 









   20 



   20 

   20 

   11 

    2 


   10 



   10 

   10 


   10 
    3 

   10 





   10 











   10 



   10 

   10 









   10 



   20 









   20 
   19 



   19 








   19 









   19 










    3 

   16 



    4 




































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
/*        $NetBSD: ffs_balloc.c,v 1.65 2020/09/05 16:30:13 riastradh Exp $        */

/*
 * Copyright (c) 2002 Networks Associates Technology, Inc.
 * All rights reserved.
 *
 * This software was developed for the FreeBSD Project by Marshall
 * Kirk McKusick and Network Associates Laboratories, the Security
 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
 * research program
 *
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ffs_balloc.c        8.8 (Berkeley) 6/16/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.65 2020/09/05 16:30:13 riastradh Exp $");

#if defined(_KERNEL_OPT)
#include "opt_quota.h"
#include "opt_uvmhist.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/file.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/kauth.h>
#include <sys/fstrans.h>

#include <ufs/ufs/quota.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_bswap.h>

#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>

#ifdef UVMHIST
#include <uvm/uvm.h>
#endif
#include <uvm/uvm_extern.h>
#include <uvm/uvm_stat.h>

static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
    struct buf **);
static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
    struct buf **);

static daddr_t
ffs_extb(struct fs *fs, struct ufs2_dinode *dp, daddr_t nb)
{
        return ufs_rw64(dp->di_extb[nb], UFS_FSNEEDSWAP(fs));
}
   
/*
 * Balloc defines the structure of file system storage
 * by allocating the physical blocks on a device given
 * the inode and the logical block number in a file.
 */

int
ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
    struct buf **bpp)
{
        int error;

        if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
                error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
        else
                error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);

        if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
                brelse(*bpp, 0);

        return error;
}

static int
ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    int flags, struct buf **bpp)
{
        daddr_t lbn, lastlbn;
        struct buf *bp, *nbp;
        struct inode *ip = VTOI(vp);
        struct fs *fs = ip->i_fs;
        struct ufsmount *ump = ip->i_ump;
        struct indir indirs[UFS_NIADDR + 2];
        daddr_t newb, pref, nb;
        int32_t *bap;        /* XXX ondisk32 */
        int deallocated, osize, nsize, num, i, error;
        int32_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
        int32_t *allocib;
        int unwindidx = -1;
        const int needswap = UFS_FSNEEDSWAP(fs);
        UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);

        lbn = ffs_lblkno(fs, off);
        size = ffs_blkoff(fs, off) + size;
        if (size > fs->fs_bsize)
                panic("ffs_balloc: blk too big");
        if (bpp != NULL) {
                *bpp = NULL;
        }
        UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp,
            lbn, size, 0);

        if (lbn < 0)
                return (EFBIG);

        /*
         * If the next write will extend the file into a new block,
         * and the file is currently composed of a fragment
         * this fragment has to be extended to be a full block.
         */

        lastlbn = ffs_lblkno(fs, ip->i_size);
        if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
                nb = lastlbn;
                osize = ffs_blksize(fs, ip, nb);
                if (osize < fs->fs_bsize && osize > 0) {
                        mutex_enter(&ump->um_lock);
                        error = ffs_realloccg(ip, nb, ffs_getdb(fs, ip, nb),
                                    ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
                                        &ip->i_ffs1_db[0]),
                                    osize, (int)fs->fs_bsize, flags, cred, bpp,
                                    &newb);
                        if (error)
                                return (error);
                        ip->i_size = ffs_lblktosize(fs, nb + 1);
                        ip->i_ffs1_size = ip->i_size;
                        uvm_vnp_setsize(vp, ip->i_ffs1_size);
                        ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
                        ip->i_flag |= IN_CHANGE | IN_UPDATE;
                        if (bpp && *bpp) {
                                if (flags & B_SYNC)
                                        bwrite(*bpp);
                                else
                                        bawrite(*bpp);
                        }
                }
        }

        /*
         * The first UFS_NDADDR blocks are direct blocks
         */

        if (lbn < UFS_NDADDR) {
                nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
                if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {

                        /*
                         * The block is an already-allocated direct block
                         * and the file already extends past this block,
                         * thus this must be a whole block.
                         * Just read the block (if requested).
                         */

                        if (bpp != NULL) {
                                error = bread(vp, lbn, fs->fs_bsize,
                                              B_MODIFY, bpp);
                                if (error) {
                                        return (error);
                                }
                        }
                        return (0);
                }
                if (nb != 0) {

                        /*
                         * Consider need to reallocate a fragment.
                         */

                        osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
                        nsize = ffs_fragroundup(fs, size);
                        if (nsize <= osize) {

                                /*
                                 * The existing block is already
                                 * at least as big as we want.
                                 * Just read the block (if requested).
                                 */

                                if (bpp != NULL) {
                                        error = bread(vp, lbn, osize,
                                                      B_MODIFY, bpp);
                                        if (error) {
                                                return (error);
                                        }
                                }
                                return 0;
                        } else {

                                /*
                                 * The existing block is smaller than we want,
                                 * grow it.
                                 */
                                mutex_enter(&ump->um_lock);
                                error = ffs_realloccg(ip, lbn,
                                    ffs_getdb(fs, ip, lbn),
                                    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
                                        &ip->i_ffs1_db[0]),
                                    osize, nsize, flags, cred, bpp, &newb);
                                if (error)
                                        return (error);
                        }
                } else {

                        /*
                         * the block was not previously allocated,
                         * allocate a new block or fragment.
                         */

                        if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
                                nsize = ffs_fragroundup(fs, size);
                        else
                                nsize = fs->fs_bsize;
                        mutex_enter(&ump->um_lock);
                        error = ffs_alloc(ip, lbn,
                            ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
                                &ip->i_ffs1_db[0]),
                            nsize, flags, cred, &newb);
                        if (error)
                                return (error);
                        if (bpp != NULL) {
                                error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
                                    nsize, (flags & B_CLRBUF) != 0, bpp);
                                if (error)
                                        return error;
                        }
                }
                ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
                ip->i_flag |= IN_CHANGE | IN_UPDATE;
                return (0);
        }

        /*
         * Determine the number of levels of indirection.
         */

        pref = 0;
        if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
                return (error);

        /*
         * Fetch the first indirect block allocating if necessary.
         */

        --num;
        nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
        allocib = NULL;
        allocblk = allociblk;
        if (nb == 0) {
                mutex_enter(&ump->um_lock);
                pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
                error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
                    flags | B_METAONLY, cred, &newb);
                if (error)
                        goto fail;
                nb = newb;
                *allocblk++ = nb;
                error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
                    fs->fs_bsize, true, &bp);
                if (error)
                        goto fail;
                /*
                 * Write synchronously so that indirect blocks
                 * never point at garbage.
                 */
                if ((error = bwrite(bp)) != 0)
                        goto fail;
                unwindidx = 0;
                allocib = &ip->i_ffs1_ib[indirs[0].in_off];
                *allocib = ufs_rw32(nb, needswap);
                ip->i_flag |= IN_CHANGE | IN_UPDATE;
        }

        /*
         * Fetch through the indirect blocks, allocating as necessary.
         */

        for (i = 1;;) {
                error = bread(vp,
                    indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
                if (error) {
                        goto fail;
                }
                bap = (int32_t *)bp->b_data;        /* XXX ondisk32 */
                nb = ufs_rw32(bap[indirs[i].in_off], needswap);
                if (i == num)
                        break;
                i++;
                if (nb != 0) {
                        brelse(bp, 0);
                        continue;
                }
                if (fscow_run(bp, true) != 0) {
                        brelse(bp, 0);
                        goto fail;
                }
                mutex_enter(&ump->um_lock);
                /* Try to keep snapshot indirect blocks contiguous. */
                if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
                        pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
                            flags | B_METAONLY, &bap[0]);
                if (pref == 0)
                        pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
                            NULL);
                error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
                    flags | B_METAONLY, cred, &newb);
                if (error) {
                        brelse(bp, 0);
                        goto fail;
                }
                nb = newb;
                *allocblk++ = nb;
                error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
                    fs->fs_bsize, true, &nbp);
                if (error) {
                        brelse(bp, 0);
                        goto fail;
                }
                /*
                 * Write synchronously so that indirect blocks
                 * never point at garbage.
                 */
                if ((error = bwrite(nbp)) != 0) {
                        brelse(bp, 0);
                        goto fail;
                }
                if (unwindidx < 0)
                        unwindidx = i - 1;
                bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);

                /*
                 * If required, write synchronously, otherwise use
                 * delayed write.
                 */

                if (flags & B_SYNC) {
                        bwrite(bp);
                } else {
                        bdwrite(bp);
                }
        }

        if (flags & B_METAONLY) {
                KASSERT(bpp != NULL);
                *bpp = bp;
                return (0);
        }

        /*
         * Get the data block, allocating if necessary.
         */

        if (nb == 0) {
                if (fscow_run(bp, true) != 0) {
                        brelse(bp, 0);
                        goto fail;
                }
                mutex_enter(&ump->um_lock);
                pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
                    &bap[0]);
                error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
                    &newb);
                if (error) {
                        brelse(bp, 0);
                        goto fail;
                }
                nb = newb;
                *allocblk++ = nb;
                if (bpp != NULL) {
                        error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
                            fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
                        if (error) {
                                brelse(bp, 0);
                                goto fail;
                        }
                }
                bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
                if (allocib == NULL && unwindidx < 0) {
                        unwindidx = i - 1;
                }

                /*
                 * If required, write synchronously, otherwise use
                 * delayed write.
                 */

                if (flags & B_SYNC) {
                        bwrite(bp);
                } else {
                        bdwrite(bp);
                }
                return (0);
        }
        brelse(bp, 0);
        if (bpp != NULL) {
                if (flags & B_CLRBUF) {
                        error = bread(vp, lbn, (int)fs->fs_bsize,
                            B_MODIFY, &nbp);
                        if (error) {
                                goto fail;
                        }
                } else {
                        error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
                            fs->fs_bsize, true, &nbp);
                        if (error)
                                goto fail;
                }
                *bpp = nbp;
        }
        return (0);

fail:
        /*
         * If we have failed part way through block allocation, we
         * have to deallocate any indirect blocks that we have allocated.
         */

        if (unwindidx >= 0) {

                /*
                 * First write out any buffers we've created to resolve their
                 * softdeps.  This must be done in reverse order of creation
                 * so that we resolve the dependencies in one pass.
                 * Write the cylinder group buffers for these buffers too.
                 */

                for (i = num; i >= unwindidx; i--) {
                        if (i == 0) {
                                break;
                        }
                        if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
                            fs->fs_bsize, false, &bp) != 0)
                                continue;
                        if (bp->b_oflags & BO_DELWRI) {
                                nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
                                    FFS_DBTOFSB(fs, bp->b_blkno))));
                                bwrite(bp);
                                if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
                                    fs->fs_cgsize, false, &bp) != 0)
                                        continue;
                                if (bp->b_oflags & BO_DELWRI) {
                                        bwrite(bp);
                                } else {
                                        brelse(bp, BC_INVAL);
                                }
                        } else {
                                brelse(bp, BC_INVAL);
                        }
                }

                /*
                 * Undo the partial allocation.
                 */
                if (unwindidx == 0) {
                        *allocib = 0;
                        ip->i_flag |= IN_CHANGE | IN_UPDATE;
                } else {
                        int r;

                        r = bread(vp, indirs[unwindidx].in_lbn,
                            (int)fs->fs_bsize, 0, &bp);
                        if (r) {
                                panic("Could not unwind indirect block, error %d", r);
                        } else {
                                bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
                                bap[indirs[unwindidx].in_off] = 0;
                                bwrite(bp);
                        }
                }
                for (i = unwindidx + 1; i <= num; i++) {
                        if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
                            fs->fs_bsize, false, &bp) == 0)
                                brelse(bp, BC_INVAL);
                }
        }
        for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
                ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
                deallocated += fs->fs_bsize;
        }
        if (deallocated) {
#if defined(QUOTA) || defined(QUOTA2)
                /*
                 * Restore user's disk quota because allocation failed.
                 */
                (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
#endif
                ip->i_ffs1_blocks -= btodb(deallocated);
                ip->i_flag |= IN_CHANGE | IN_UPDATE;
        }
        return (error);
}

static int
ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
    int flags, struct buf **bpp)
{
        daddr_t lbn, lastlbn;
        struct buf *bp, *nbp;
        struct inode *ip = VTOI(vp);
        struct fs *fs = ip->i_fs;
        struct ufsmount *ump = ip->i_ump;
        struct indir indirs[UFS_NIADDR + 2];
        daddr_t newb, pref, nb;
        int64_t *bap;
        int deallocated, osize, nsize, num, i, error;
        daddr_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
        int64_t *allocib;
        int unwindidx = -1;
        const int needswap = UFS_FSNEEDSWAP(fs);
        UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);

        lbn = ffs_lblkno(fs, off);
        size = ffs_blkoff(fs, off) + size;
        if (size > fs->fs_bsize)
                panic("ffs_balloc: blk too big");
        if (bpp != NULL) {
                *bpp = NULL;
        }
        UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp,
            lbn, size, 0);

        if (lbn < 0)
                return (EFBIG);

        /*
         * Check for allocating external data.
         */
        if (flags & IO_EXT) {
                struct ufs2_dinode *dp = ip->i_din.ffs2_din;
                if (lbn >= UFS_NXADDR)
                        return (EFBIG);
                /*
                 * If the next write will extend the data into a new block,
                 * and the data is currently composed of a fragment
                 * this fragment has to be extended to be a full block.
                 */
                lastlbn = ffs_lblkno(fs, dp->di_extsize);
                if (lastlbn < lbn) {
                        nb = lastlbn;
                        osize = ffs_sblksize(fs, dp->di_extsize, nb);
                        if (osize < fs->fs_bsize && osize > 0) {
                                mutex_enter(&ump->um_lock);
                                error = ffs_realloccg(ip, -1 - nb,
                                    ffs_extb(fs, dp, nb),
                                    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
                                        flags, &dp->di_extb[0]),
                                    osize, (int)fs->fs_bsize, flags, cred,
                                    &bp, &newb);
                                if (error)
                                        return (error);
                                dp->di_extsize = ffs_lblktosize(fs, nb + 1);
                                dp->di_extb[nb] = FFS_DBTOFSB(fs, bp->b_blkno);
                                ip->i_flag |= IN_CHANGE | IN_UPDATE;
                                if (flags & IO_SYNC)
                                        bwrite(bp);
                                else
                                        bawrite(bp);
                        }
                }
                /*
                 * All blocks are direct blocks
                 */
                nb = dp->di_extb[lbn];
                if (nb != 0 && dp->di_extsize >= ffs_lblktosize(fs, lbn + 1)) {
                        error = bread(vp, -1 - lbn, fs->fs_bsize,
                            0, &bp);
                        if (error) {
                                return (error);
                        }
                        mutex_enter(bp->b_objlock);
                        bp->b_blkno = FFS_FSBTODB(fs, nb);
                        mutex_exit(bp->b_objlock);
                        *bpp = bp;
                        return (0);
                }
                if (nb != 0) {
                        /*
                         * Consider need to reallocate a fragment.
                         */
                        osize = ffs_fragroundup(fs, ffs_blkoff(fs, dp->di_extsize));
                        nsize = ffs_fragroundup(fs, size);
                        if (nsize <= osize) {
                                error = bread(vp, -1 - lbn, osize,
                                    0, &bp);
                                if (error) {
                                        return (error);
                                }
                                mutex_enter(bp->b_objlock);
                                bp->b_blkno = FFS_FSBTODB(fs, nb);
                                mutex_exit(bp->b_objlock);
                        } else {
                                mutex_enter(&ump->um_lock);
                                error = ffs_realloccg(ip, -1 - lbn,
                                    ffs_extb(fs, dp, lbn),
                                    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
                                        &dp->di_extb[0]),
                                    osize, nsize, flags, cred, &bp, &newb);
                                if (error)
                                        return (error);
                        }
                } else {
                        if (dp->di_extsize < ffs_lblktosize(fs, lbn + 1))
                                nsize = ffs_fragroundup(fs, size);
                        else
                                nsize = fs->fs_bsize;
                        mutex_enter(&ump->um_lock);
                        error = ffs_alloc(ip, lbn,
                           ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
                               &dp->di_extb[0]),
                           nsize, flags, cred, &newb);
                        if (error)
                                return (error);
                        error = ffs_getblk(vp, -1 - lbn, FFS_FSBTODB(fs, newb),
                            nsize, (flags & B_CLRBUF) != 0, &bp);
                        if (error)
                                return error;
                }
                dp->di_extb[lbn] = FFS_DBTOFSB(fs, bp->b_blkno);
                ip->i_flag |= IN_CHANGE | IN_UPDATE;
                *bpp = bp;
                return (0);
        }
        /*
         * If the next write will extend the file into a new block,
         * and the file is currently composed of a fragment
         * this fragment has to be extended to be a full block.
         */

        lastlbn = ffs_lblkno(fs, ip->i_size);
        if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
                nb = lastlbn;
                osize = ffs_blksize(fs, ip, nb);
                if (osize < fs->fs_bsize && osize > 0) {
                        mutex_enter(&ump->um_lock);
                        error = ffs_realloccg(ip, nb, ffs_getdb(fs, ip, lbn),
                                    ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
                                        &ip->i_ffs2_db[0]),
                                    osize, (int)fs->fs_bsize, flags, cred, bpp,
                                    &newb);
                        if (error)
                                return (error);
                        ip->i_size = ffs_lblktosize(fs, nb + 1);
                        ip->i_ffs2_size = ip->i_size;
                        uvm_vnp_setsize(vp, ip->i_size);
                        ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
                        ip->i_flag |= IN_CHANGE | IN_UPDATE;
                        if (bpp) {
                                if (flags & B_SYNC)
                                        bwrite(*bpp);
                                else
                                        bawrite(*bpp);
                        }
                }
        }

        /*
         * The first UFS_NDADDR blocks are direct blocks
         */

        if (lbn < UFS_NDADDR) {
                nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
                if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {

                        /*
                         * The block is an already-allocated direct block
                         * and the file already extends past this block,
                         * thus this must be a whole block.
                         * Just read the block (if requested).
                         */

                        if (bpp != NULL) {
                                error = bread(vp, lbn, fs->fs_bsize,
                                              B_MODIFY, bpp);
                                if (error) {
                                        return (error);
                                }
                        }
                        return (0);
                }
                if (nb != 0) {

                        /*
                         * Consider need to reallocate a fragment.
                         */

                        osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
                        nsize = ffs_fragroundup(fs, size);
                        if (nsize <= osize) {

                                /*
                                 * The existing block is already
                                 * at least as big as we want.
                                 * Just read the block (if requested).
                                 */

                                if (bpp != NULL) {
                                        error = bread(vp, lbn, osize,
                                                      B_MODIFY, bpp);
                                        if (error) {
                                                return (error);
                                        }
                                }
                                return 0;
                        } else {

                                /*
                                 * The existing block is smaller than we want,
                                 * grow it.
                                 */
                                mutex_enter(&ump->um_lock);
                                error = ffs_realloccg(ip, lbn,
                                    ffs_getdb(fs, ip, lbn),
                                    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
                                        &ip->i_ffs2_db[0]),
                                    osize, nsize, flags, cred, bpp, &newb);
                                if (error)
                                        return (error);
                        }
                } else {

                        /*
                         * the block was not previously allocated,
                         * allocate a new block or fragment.
                         */

                        if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
                                nsize = ffs_fragroundup(fs, size);
                        else
                                nsize = fs->fs_bsize;
                        mutex_enter(&ump->um_lock);
                        error = ffs_alloc(ip, lbn,
                            ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
                                &ip->i_ffs2_db[0]),
                            nsize, flags, cred, &newb);
                        if (error)
                                return (error);
                        if (bpp != NULL) {
                                error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
                                    nsize, (flags & B_CLRBUF) != 0, bpp);
                                if (error)
                                        return error;
                        }
                }
                ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
                ip->i_flag |= IN_CHANGE | IN_UPDATE;
                return (0);
        }

        /*
         * Determine the number of levels of indirection.
         */

        pref = 0;
        if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
                return (error);

        /*
         * Fetch the first indirect block allocating if necessary.
         */

        --num;
        nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
        allocib = NULL;
        allocblk = allociblk;
        if (nb == 0) {
                mutex_enter(&ump->um_lock);
                pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
                error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
                    flags | B_METAONLY, cred, &newb);
                if (error)
                        goto fail;
                nb = newb;
                *allocblk++ = nb;
                error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
                    fs->fs_bsize, true, &bp);
                if (error)
                        goto fail;
                /*
                 * Write synchronously so that indirect blocks
                 * never point at garbage.
                 */
                if ((error = bwrite(bp)) != 0)
                        goto fail;
                unwindidx = 0;
                allocib = &ip->i_ffs2_ib[indirs[0].in_off];
                *allocib = ufs_rw64(nb, needswap);
                ip->i_flag |= IN_CHANGE | IN_UPDATE;
        }

        /*
         * Fetch through the indirect blocks, allocating as necessary.
         */

        for (i = 1;;) {
                error = bread(vp,
                    indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
                if (error) {
                        goto fail;
                }
                bap = (int64_t *)bp->b_data;
                nb = ufs_rw64(bap[indirs[i].in_off], needswap);
                if (i == num)
                        break;
                i++;
                if (nb != 0) {
                        brelse(bp, 0);
                        continue;
                }
                if (fscow_run(bp, true) != 0) {
                        brelse(bp, 0);
                        goto fail;
                }
                mutex_enter(&ump->um_lock);
                /* Try to keep snapshot indirect blocks contiguous. */
                if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
                        pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
                            flags | B_METAONLY, &bap[0]);
                if (pref == 0)
                        pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
                            NULL);
                error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
                    flags | B_METAONLY, cred, &newb);
                if (error) {
                        brelse(bp, 0);
                        goto fail;
                }
                nb = newb;
                *allocblk++ = nb;
                error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
                    fs->fs_bsize, true, &nbp);
                if (error) {
                        brelse(bp, 0);
                        goto fail;
                }
                /*
                 * Write synchronously so that indirect blocks
                 * never point at garbage.
                 */
                if ((error = bwrite(nbp)) != 0) {
                        brelse(bp, 0);
                        goto fail;
                }
                if (unwindidx < 0)
                        unwindidx = i - 1;
                bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);

                /*
                 * If required, write synchronously, otherwise use
                 * delayed write.
                 */

                if (flags & B_SYNC) {
                        bwrite(bp);
                } else {
                        bdwrite(bp);
                }
        }

        if (flags & B_METAONLY) {
                KASSERT(bpp != NULL);
                *bpp = bp;
                return (0);
        }

        /*
         * Get the data block, allocating if necessary.
         */

        if (nb == 0) {
                if (fscow_run(bp, true) != 0) {
                        brelse(bp, 0);
                        goto fail;
                }
                mutex_enter(&ump->um_lock);
                pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
                    &bap[0]);
                error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
                    &newb);
                if (error) {
                        brelse(bp, 0);
                        goto fail;
                }
                nb = newb;
                *allocblk++ = nb;
                if (bpp != NULL) {
                        error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
                            fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
                        if (error) {
                                brelse(bp, 0);
                                goto fail;
                        }
                }
                bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
                if (allocib == NULL && unwindidx < 0) {
                        unwindidx = i - 1;
                }

                /*
                 * If required, write synchronously, otherwise use
                 * delayed write.
                 */

                if (flags & B_SYNC) {
                        bwrite(bp);
                } else {
                        bdwrite(bp);
                }
                return (0);
        }
        brelse(bp, 0);
        if (bpp != NULL) {
                if (flags & B_CLRBUF) {
                        error = bread(vp, lbn, (int)fs->fs_bsize,
                            B_MODIFY, &nbp);
                        if (error) {
                                goto fail;
                        }
                } else {
                        error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
                            fs->fs_bsize, true, &nbp);
                        if (error)
                                goto fail;
                }
                *bpp = nbp;
        }
        return (0);

fail:
        /*
         * If we have failed part way through block allocation, we
         * have to deallocate any indirect blocks that we have allocated.
         */

        if (unwindidx >= 0) {

                /*
                 * First write out any buffers we've created to resolve their
                 * softdeps.  This must be done in reverse order of creation
                 * so that we resolve the dependencies in one pass.
                 * Write the cylinder group buffers for these buffers too.
                 */

                for (i = num; i >= unwindidx; i--) {
                        if (i == 0) {
                                break;
                        }
                        if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
                            fs->fs_bsize, false, &bp) != 0)
                                continue;
                        if (bp->b_oflags & BO_DELWRI) {
                                nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
                                    FFS_DBTOFSB(fs, bp->b_blkno))));
                                bwrite(bp);
                                if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
                                    fs->fs_cgsize, false, &bp) != 0)
                                        continue;
                                if (bp->b_oflags & BO_DELWRI) {
                                        bwrite(bp);
                                } else {
                                        brelse(bp, BC_INVAL);
                                }
                        } else {
                                brelse(bp, BC_INVAL);
                        }
                }

                /*
                 * Now that any dependencies that we created have been
                 * resolved, we can undo the partial allocation.
                 */

                if (unwindidx == 0) {
                        *allocib = 0;
                        ip->i_flag |= IN_CHANGE | IN_UPDATE;
                } else {
                        int r;

                        r = bread(vp, indirs[unwindidx].in_lbn,
                            (int)fs->fs_bsize, 0, &bp);
                        if (r) {
                                panic("Could not unwind indirect block, error %d", r);
                        } else {
                                bap = (int64_t *)bp->b_data;
                                bap[indirs[unwindidx].in_off] = 0;
                                bwrite(bp);
                        }
                }
                for (i = unwindidx + 1; i <= num; i++) {
                        if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
                            fs->fs_bsize, false, &bp) == 0)
                                brelse(bp, BC_INVAL);
                }
        }
        for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
                ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
                deallocated += fs->fs_bsize;
        }
        if (deallocated) {
#if defined(QUOTA) || defined(QUOTA2)
                /*
                 * Restore user's disk quota because allocation failed.
                 */
                (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
#endif
                ip->i_ffs2_blocks -= btodb(deallocated);
                ip->i_flag |= IN_CHANGE | IN_UPDATE;
        }

        return (error);
}




























































































    1 


    1 













    1 


    1 





















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
/*        $NetBSD: tty_bsdpty.c,v 1.20 2014/04/04 18:11:58 christos Exp $        */

/*-
 * Copyright (c) 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tty_bsdpty.c,v 1.20 2014/04/04 18:11:58 christos Exp $");

#include "opt_ptm.h"

#ifndef NO_DEV_PTM
#ifdef COMPAT_BSDPTY
/* bsd tty implementation for pty multiplexor driver /dev/ptm{,x} */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/ioctl.h>
#include <sys/lwp.h>
#include <sys/tty.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <sys/uio.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/namei.h>
#include <sys/signalvar.h>
#include <sys/filedesc.h>
#include <sys/conf.h>
#include <sys/poll.h>
#include <sys/pty.h>
#include <sys/kauth.h>

/*
 * pts == /dev/tty[pqrs]?
 * ptc == /dev/pty[pqrs]?
 */

/*
 * All this hard-coding is really evil.
 */
#define TTY_GID                4
#define TTY_PERM        (S_IRUSR|S_IWUSR|S_IWGRP)
#define TTY_TEMPLATE        "/dev/XtyXX"
#define TTY_NAMESIZE        sizeof(TTY_TEMPLATE)
#define TTY_LETTERS        "pqrstuvwxyzPQRST"
#define TTY_OLD_SUFFIX  "0123456789abcdef"
#define TTY_NEW_SUFFIX  "ghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"

static int pty_makename(struct mount *, struct lwp *, char *, size_t, dev_t,
    char);
static int pty_allocvp(struct mount *, struct lwp *, struct vnode **,
    dev_t, char);
static void pty_getvattr(struct mount *, struct lwp *, struct vattr *);
static int pty__getmp(struct lwp *, struct mount **);

struct ptm_pty ptm_bsdpty = {
        pty_allocvp,
        pty_makename,
        pty_getvattr,
        pty__getmp,
};

static int
/*ARGSUSED*/
pty_makename(struct mount *mp, struct lwp *l, char *bf,
    size_t bufsiz, dev_t dev, char c)
{
        size_t nt;
        dev_t minor = minor(dev);
        const char *suffix;

        if (bufsiz < TTY_NAMESIZE)
                return EINVAL;

        (void)memcpy(bf, TTY_TEMPLATE, TTY_NAMESIZE);

        if (minor < 256) {
                suffix = TTY_OLD_SUFFIX;
                nt = sizeof(TTY_OLD_SUFFIX) - 1;
        } else {
                minor -= 256;
                suffix = TTY_NEW_SUFFIX;
                nt = sizeof(TTY_NEW_SUFFIX) - 1;
        }

        bf[5] = c;
        bf[8] = TTY_LETTERS[minor / nt];
        bf[9] = suffix[minor % nt];
        return 0;
}


static int
/*ARGSUSED*/
pty_allocvp(struct mount *mp, struct lwp *l, struct vnode **vp, dev_t dev,
    char ms)
{
        int error;
        struct pathbuf *pb;
        struct nameidata nd;
        char name[TTY_NAMESIZE];

        error = pty_makename(NULL, l, name, sizeof(name), dev, ms);
        if (error)
                return error;

        pb = pathbuf_create(name);
        if (pb == NULL) {
                return ENOMEM;
        }

        NDINIT(&nd, LOOKUP, NOFOLLOW|LOCKLEAF, pb);
        if ((error = namei(&nd)) != 0) {
                pathbuf_destroy(pb);
                return error;
        }
        *vp = nd.ni_vp;
        pathbuf_destroy(pb);
        return 0;
}


static void
/*ARGSUSED*/
pty_getvattr(struct mount *mp, struct lwp *l, struct vattr *vattr)
{
        vattr_null(vattr);
        /* get real uid */
        vattr->va_uid = kauth_cred_getuid(l->l_cred);
        vattr->va_gid = TTY_GID;
        vattr->va_mode = TTY_PERM;
}

static int
pty__getmp(struct lwp *l __unused, struct mount **mpp)
{
        *mpp = 0;
        return 0;
}

#endif /* COMPAT_BSDPTY */
#endif /* NO_DEV_PTM */



























































































































































    4 

    4 


    2 






    3 







    2 




    2 


    1 


    2 

    2 

























    2 


    2 

    2 
















    1 

    1 

    1 
    1 


    1 






    1 

    1 






    1 


    1 

    1 



    1 















    1 

    1 







    1 

    1 



    1 







    1 






    1 
















    2 
    2 

    2 






    1 
    1 

    1 















    2 
    2 
    2 



    2 
    2 

    2 



    2 








    2 





































    2 

    2 




    2 


    2 


    2 



    1 




































































































































































































































    1 



    1 
    1 




    1 



    1 








    1 
    1 


    1 





    1 


    1 










    1 


    1 

    1 



    1 




    1 
    1 




    1 














































































































































    1 

    1 

    1 








    1 










    1 




    1 

    1 

    1 


    1 





    1 





    1 





    1 




    1 


    1 


    1 


    1 



































    1 





    1 


    1 

    1 









    1 






    1 

    1 

    1 

    1 







    1 





    1 

    1 




    1 

    1 
    1 




    1 

    1 










    1 

    1 










































    1 





    1 

























    1 

    1 







    2 






    2 







    1 







    1 































































































    1 
































































































































    3 

    3 







    2 
    2 


    2 
    2 

    2 










































    2 












    2 







































































































































    1 
    2 
    1 
    2 






























    1 






























    1 










    2 







    2 
    2 







    2 
    2 

























































































































































































































































































































































    1 
    1 


    1 














































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
/*        $NetBSD: nvlist.c,v 1.8 2019/07/23 00:49:16 rmind Exp $        */

/*-
 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
 *
 * Copyright (c) 2009-2013 The FreeBSD Foundation
 * Copyright (c) 2013-2015 Mariusz Zaborski <oshogbo@FreeBSD.org>
 * All rights reserved.
 *
 * This software was developed by Pawel Jakub Dawidek under sponsorship from
 * the FreeBSD Foundation.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
#ifdef __FreeBSD__
__FBSDID("$FreeBSD: head/sys/contrib/libnv/nvlist.c 335347 2018-06-18 22:57:32Z oshogbo $");
#else
__RCSID("$NetBSD: nvlist.c,v 1.8 2019/07/23 00:49:16 rmind Exp $");
#endif

#include <sys/param.h>
#include <sys/endian.h>
#include <sys/queue.h>

#if defined(_KERNEL) || defined(_STANDALONE)

#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/systm.h>

#ifdef __FreeBSD__
#include <machine/stdarg.h>
#endif

#else
#include <sys/socket.h>

#include <errno.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "msgio.h"
#endif

#ifdef HAVE_PJDLOG
#include <pjdlog.h>
#endif

#ifdef __FreeBSD__
#include <sys/nv.h>
#else
#include "nv.h"
#endif

#include "nv_impl.h"
#include "nvlist_impl.h"
#include "nvpair_impl.h"

#ifndef        HAVE_PJDLOG
#if defined(_KERNEL) || defined(_STANDALONE)
#ifdef __FreeBSD__
#define        PJDLOG_ASSERT(...)                MPASS(__VA_ARGS__)
#else
#define        PJDLOG_ASSERT(...)                KASSERT(__VA_ARGS__)
#endif
#define        PJDLOG_RASSERT(expr, ...)        KASSERT(expr, (__VA_ARGS__))
#define        PJDLOG_ABORT(...)                panic(__VA_ARGS__)
#else
#ifndef __lint__
#include <assert.h>
#define        PJDLOG_ASSERT(...)                assert(__VA_ARGS__)
#define        PJDLOG_RASSERT(expr, ...)        assert(expr)
#define        PJDLOG_ABORT(...)                do {                                \
        fprintf(stderr, "%s:%u: ", __FILE__, __LINE__);                        \
        fprintf(stderr, __VA_ARGS__);                                        \
        fprintf(stderr, "\n");                                                \
        abort();                                                        \
} while (/*CONSTCOND*/0)
#else
#define        PJDLOG_ASSERT(...)
#define        PJDLOG_RASSERT(expr, ...)
#define        PJDLOG_ABORT(...)
#endif
#endif
#endif

#define        NV_FLAG_PRIVATE_MASK        (NV_FLAG_BIG_ENDIAN | NV_FLAG_IN_ARRAY)
#define        NV_FLAG_PUBLIC_MASK        (NV_FLAG_IGNORE_CASE | NV_FLAG_NO_UNIQUE)
#define        NV_FLAG_ALL_MASK        (NV_FLAG_PRIVATE_MASK | NV_FLAG_PUBLIC_MASK)

#define        NVLIST_MAGIC        0x6e766c        /* "nvl" */
struct nvlist {
        int                 nvl_magic;
        int                 nvl_error;
        int                 nvl_flags;
        nvpair_t        *nvl_parent;
        nvpair_t        *nvl_array_next;
        struct nvl_head         nvl_head;
};

#define        NVLIST_ASSERT(nvl)        do {                                        \
        PJDLOG_ASSERT((nvl) != NULL);                                        \
        PJDLOG_ASSERT((nvl)->nvl_magic == NVLIST_MAGIC);                \
} while (/*CONSTCOND*/0)

#ifdef _KERNEL
MALLOC_DEFINE(M_NVLIST, "nvlist", "kernel nvlist");
#endif

#define        NVPAIR_ASSERT(nvp)        nvpair_assert(nvp)

#define        NVLIST_HEADER_MAGIC        0x6c
#define        NVLIST_HEADER_VERSION        0x00
struct nvlist_header {
        uint8_t                nvlh_magic;
        uint8_t                nvlh_version;
        uint8_t                nvlh_flags;
        uint64_t        nvlh_descriptors;
        uint64_t        nvlh_size;
} __packed;

nvlist_t *
nvlist_create(int flags)
{
        nvlist_t *nvl;

        PJDLOG_ASSERT((flags & ~(NV_FLAG_PUBLIC_MASK)) == 0);

        nvl = nv_malloc(sizeof(*nvl));
        if (nvl == NULL)
                return (NULL);
        nvl->nvl_error = 0;
        nvl->nvl_flags = flags;
        nvl->nvl_parent = NULL;
        nvl->nvl_array_next = NULL;
        TAILQ_INIT(&nvl->nvl_head);
        nvl->nvl_magic = NVLIST_MAGIC;

        return (nvl);
}

void
nvlist_destroy(nvlist_t *nvl)
{
        nvpair_t *nvp;

        if (nvl == NULL)
                return;

        ERRNO_SAVE();

        NVLIST_ASSERT(nvl);

        while ((nvp = nvlist_first_nvpair(nvl)) != NULL) {
                nvlist_remove_nvpair(nvl, nvp);
                nvpair_free(nvp);
        }
        if (nvl->nvl_array_next != NULL)
                nvpair_free_structure(nvl->nvl_array_next);
        nvl->nvl_array_next = NULL;
        nvl->nvl_parent = NULL;
        nvl->nvl_magic = 0;
        nv_free(nvl);

        ERRNO_RESTORE();
}

void
nvlist_set_error(nvlist_t *nvl, int error)
{

        PJDLOG_ASSERT(error != 0);

        /*
         * Check for error != 0 so that we don't do the wrong thing if somebody
         * tries to abuse this API when asserts are disabled.
         */
        if (nvl != NULL && error != 0 && nvl->nvl_error == 0)
                nvl->nvl_error = error;
}

int
nvlist_error(const nvlist_t *nvl)
{

        if (nvl == NULL)
                return (ENOMEM);

        NVLIST_ASSERT(nvl);

        return (nvl->nvl_error);
}

nvpair_t *
nvlist_get_nvpair_parent(const nvlist_t *nvl)
{

        NVLIST_ASSERT(nvl);

        return (nvl->nvl_parent);
}

const nvlist_t *
nvlist_get_parent(const nvlist_t *nvl, void **cookiep)
{
        nvpair_t *nvp;

        NVLIST_ASSERT(nvl);

        nvp = nvl->nvl_parent;
        if (cookiep != NULL)
                *cookiep = nvp;
        if (nvp == NULL)
                return (NULL);

        return (nvpair_nvlist(nvp));
}

void
nvlist_set_parent(nvlist_t *nvl, nvpair_t *parent)
{

        NVLIST_ASSERT(nvl);

        nvl->nvl_parent = parent;
}

void
nvlist_set_array_next(nvlist_t *nvl, nvpair_t *ele)
{

        NVLIST_ASSERT(nvl);

        if (ele != NULL) {
                nvl->nvl_flags |= NV_FLAG_IN_ARRAY;
        } else {
                nvl->nvl_flags &= ~NV_FLAG_IN_ARRAY;
                nv_free(nvl->nvl_array_next);
        }

        nvl->nvl_array_next = ele;
}

nvpair_t *
nvlist_get_array_next_nvpair(nvlist_t *nvl)
{

        NVLIST_ASSERT(nvl);

        return (nvl->nvl_array_next);
}

bool
nvlist_in_array(const nvlist_t *nvl)
{

        NVLIST_ASSERT(nvl);

        return ((nvl->nvl_flags & NV_FLAG_IN_ARRAY) != 0);
}

const nvlist_t *
nvlist_get_array_next(const nvlist_t *nvl)
{
        nvpair_t *nvp;

        NVLIST_ASSERT(nvl);

        nvp = nvl->nvl_array_next;
        if (nvp == NULL)
                return (NULL);

        return (nvpair_get_nvlist(nvp));
}

const nvlist_t *
nvlist_get_pararr(const nvlist_t *nvl, void **cookiep)
{
        const nvlist_t *ret;

        ret = nvlist_get_array_next(nvl);
        if (ret != NULL) {
                if (cookiep != NULL)
                        *cookiep = NULL;
                return (ret);
        }

        return (nvlist_get_parent(nvl, cookiep));
}

bool
nvlist_empty(const nvlist_t *nvl)
{

        NVLIST_ASSERT(nvl);
        PJDLOG_ASSERT(nvl->nvl_error == 0);

        return (nvlist_first_nvpair(nvl) == NULL);
}

int
nvlist_flags(const nvlist_t *nvl)
{

        NVLIST_ASSERT(nvl);
        PJDLOG_ASSERT(nvl->nvl_error == 0);

        return (nvl->nvl_flags & NV_FLAG_PUBLIC_MASK);
}

void
nvlist_set_flags(nvlist_t *nvl, int flags)
{

        NVLIST_ASSERT(nvl);
        PJDLOG_ASSERT(nvl->nvl_error == 0);

        nvl->nvl_flags = flags;
}

__dead void
nvlist_report_missing(int type, const char *name)
{

        PJDLOG_ABORT("Element '%s' of type %s doesn't exist.",
            name, nvpair_type_string(type));
}

static nvpair_t *
nvlist_find(const nvlist_t *nvl, int type, const char *name)
{
        nvpair_t *nvp;

        NVLIST_ASSERT(nvl);
        PJDLOG_ASSERT(nvl->nvl_error == 0);
        PJDLOG_ASSERT(type == NV_TYPE_NONE ||
            (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST));

        for (nvp = nvlist_first_nvpair(nvl); nvp != NULL;
            nvp = nvlist_next_nvpair(nvl, nvp)) {
                if (type != NV_TYPE_NONE && nvpair_type(nvp) != type)
                        continue;
                if ((nvl->nvl_flags & NV_FLAG_IGNORE_CASE) != 0) {
                        if (strcasecmp(nvpair_name(nvp), name) != 0)
                                continue;
                } else {
                        if (strcmp(nvpair_name(nvp), name) != 0)
                                continue;
                }
                break;
        }

        if (nvp == NULL)
                ERRNO_SET(ENOENT);

        return (nvp);
}

bool
nvlist_exists_type(const nvlist_t *nvl, const char *name, int type)
{

        NVLIST_ASSERT(nvl);
        PJDLOG_ASSERT(nvl->nvl_error == 0);
        PJDLOG_ASSERT(type == NV_TYPE_NONE ||
            (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST));

        return (nvlist_find(nvl, type, name) != NULL);
}

void
nvlist_free_type(nvlist_t *nvl, const char *name, int type)
{
        nvpair_t *nvp;

        NVLIST_ASSERT(nvl);
        PJDLOG_ASSERT(nvl->nvl_error == 0);
        PJDLOG_ASSERT(type == NV_TYPE_NONE ||
            (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST));

        nvp = nvlist_find(nvl, type, name);
        if (nvp != NULL)
                nvlist_free_nvpair(nvl, nvp);
        else
                nvlist_report_missing(type, name);
}

nvlist_t *
nvlist_clone(const nvlist_t *nvl)
{
        nvlist_t *newnvl;
        nvpair_t *nvp, *newnvp;

        NVLIST_ASSERT(nvl);

        if (nvl->nvl_error != 0) {
                ERRNO_SET(nvl->nvl_error);
                return (NULL);
        }

        newnvl = nvlist_create(nvl->nvl_flags & NV_FLAG_PUBLIC_MASK);
        for (nvp = nvlist_first_nvpair(nvl); nvp != NULL;
            nvp = nvlist_next_nvpair(nvl, nvp)) {
                newnvp = nvpair_clone(nvp);
                if (newnvp == NULL)
                        break;
                (void)nvlist_move_nvpair(newnvl, newnvp);
        }
        if (nvp != NULL) {
                nvlist_destroy(newnvl);
                return (NULL);
        }
        return (newnvl);
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
static bool
nvlist_dump_error_check(const nvlist_t *nvl, int fd, int level)
{

        if (nvlist_error(nvl) != 0) {
                dprintf(fd, "%*serror: %d\n", level * 4, "",
                    nvlist_error(nvl));
                return (true);
        }

        return (false);
}

/*
 * Dump content of nvlist.
 */
void
nvlist_dump(const nvlist_t *nvl, int fd)
{
        const nvlist_t *tmpnvl;
        nvpair_t *nvp, *tmpnvp;
        void *cookie;
        int level;

        level = 0;
        if (nvlist_dump_error_check(nvl, fd, level))
                return;

        nvp = nvlist_first_nvpair(nvl);
        while (nvp != NULL) {
                dprintf(fd, "%*s%s (%s):", level * 4, "", nvpair_name(nvp),
                    nvpair_type_string(nvpair_type(nvp)));
                switch (nvpair_type(nvp)) {
                case NV_TYPE_NULL:
                        dprintf(fd, " null\n");
                        break;
                case NV_TYPE_BOOL:
                        dprintf(fd, " %s\n", nvpair_get_bool(nvp) ?
                            "TRUE" : "FALSE");
                        break;
                case NV_TYPE_NUMBER:
                        dprintf(fd, " %ju (%jd) (0x%jx)\n",
                            (uintmax_t)nvpair_get_number(nvp),
                            (intmax_t)nvpair_get_number(nvp),
                            (uintmax_t)nvpair_get_number(nvp));
                        break;
                case NV_TYPE_STRING:
                        dprintf(fd, " [%s]\n", nvpair_get_string(nvp));
                        break;
                case NV_TYPE_NVLIST:
                        dprintf(fd, "\n");
                        tmpnvl = nvpair_get_nvlist(nvp);
                        if (nvlist_dump_error_check(tmpnvl, fd, level + 1))
                                break;
                        tmpnvp = nvlist_first_nvpair(tmpnvl);
                        if (tmpnvp != NULL) {
                                nvl = tmpnvl;
                                nvp = tmpnvp;
                                level++;
                                continue;
                        }
                        break;
                case NV_TYPE_DESCRIPTOR:
                        dprintf(fd, " %d\n", nvpair_get_descriptor(nvp));
                        break;
                case NV_TYPE_BINARY:
                    {
                        const unsigned char *binary;
                        unsigned int ii;
                        size_t size;

                        binary = nvpair_get_binary(nvp, &size);
                        dprintf(fd, " %zu ", size);
                        for (ii = 0; ii < size; ii++)
                                dprintf(fd, "%02hhx", binary[ii]);
                        dprintf(fd, "\n");
                        break;
                    }
                case NV_TYPE_BOOL_ARRAY:
                    {
                        const bool *value;
                        unsigned int ii;
                        size_t nitems;

                        value = nvpair_get_bool_array(nvp, &nitems);
                        dprintf(fd, " [ ");
                        for (ii = 0; ii < nitems; ii++) {
                                dprintf(fd, "%s", value[ii] ? "TRUE" : "FALSE");
                                if (ii != nitems - 1)
                                        dprintf(fd, ", ");
                        }
                        dprintf(fd, " ]\n");
                        break;
                    }
                case NV_TYPE_STRING_ARRAY:
                    {
                        const char * const *value;
                        unsigned int ii;
                        size_t nitems;

                        value = nvpair_get_string_array(nvp, &nitems);
                        dprintf(fd, " [ ");
                        for (ii = 0; ii < nitems; ii++) {
                                if (value[ii] == NULL)
                                        dprintf(fd, "NULL");
                                else
                                        dprintf(fd, "\"%s\"", value[ii]);
                                if (ii != nitems - 1)
                                        dprintf(fd, ", ");
                        }
                        dprintf(fd, " ]\n");
                        break;
                    }
                case NV_TYPE_NUMBER_ARRAY:
                    {
                        const uint64_t *value;
                        unsigned int ii;
                        size_t nitems;

                        value = nvpair_get_number_array(nvp, &nitems);
                        dprintf(fd, " [ ");
                        for (ii = 0; ii < nitems; ii++) {
                                dprintf(fd, "%ju (%jd) (0x%jx)",
                                    value[ii], value[ii], value[ii]);
                                if (ii != nitems - 1)
                                        dprintf(fd, ", ");
                        }
                        dprintf(fd, " ]\n");
                        break;
                    }
                case NV_TYPE_DESCRIPTOR_ARRAY:
                    {
                        const int *value;
                        unsigned int ii;
                        size_t nitems;

                        value = nvpair_get_descriptor_array(nvp, &nitems);
                        dprintf(fd, " [ ");
                        for (ii = 0; ii < nitems; ii++) {
                                dprintf(fd, "%d", value[ii]);
                                if (ii != nitems - 1)
                                        dprintf(fd, ", ");
                        }
                        dprintf(fd, " ]\n");
                        break;
                    }
                case NV_TYPE_NVLIST_ARRAY:
                    {
                        const nvlist_t * const *value;
                        unsigned int ii;
                        size_t nitems;

                        value = nvpair_get_nvlist_array(nvp, &nitems);
                        dprintf(fd, " %zu\n", nitems);
                        tmpnvl = NULL;
                        tmpnvp = NULL;
                        for (ii = 0; ii < nitems; ii++) {
                                if (nvlist_dump_error_check(value[ii], fd,
                                    level + 1)) {
                                        break;
                                }

                                if (tmpnvl == NULL) {
                                        tmpnvp = nvlist_first_nvpair(value[ii]);
                                        if (tmpnvp != NULL) {
                                                tmpnvl = value[ii];
                                        } else {
                                                dprintf(fd, "%*s,\n",
                                                    (level + 1) * 4, "");
                                        }
                                }
                        }
                        if (tmpnvp != NULL) {
                                nvl = tmpnvl;
                                nvp = tmpnvp;
                                level++;
                                continue;
                        }
                        break;
                    }
                default:
                        PJDLOG_ABORT("Unknown type: %d.", nvpair_type(nvp));
                }

                while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) {
                        do {
                                cookie = NULL;
                                if (nvlist_in_array(nvl))
                                        dprintf(fd, "%*s,\n", level * 4, "");
                                nvl = nvlist_get_pararr(nvl, &cookie);
                                if (nvl == NULL)
                                        return;
                                if (nvlist_in_array(nvl) && cookie == NULL) {
                                        nvp = nvlist_first_nvpair(nvl);
                                } else {
                                        nvp = cookie;
                                        level--;
                                }
                        } while (nvp == NULL);
                        if (nvlist_in_array(nvl) && cookie == NULL)
                                break;
                }
        }
}

void
nvlist_fdump(const nvlist_t *nvl, FILE *fp)
{

        fflush(fp);
        nvlist_dump(nvl, fileno(fp));
}
#endif

/*
 * The function obtains size of the nvlist after nvlist_pack().
 */
size_t
nvlist_size(const nvlist_t *nvl)
{
        const nvlist_t *tmpnvl;
        const nvlist_t * const *nvlarray;
        const nvpair_t *nvp, *tmpnvp;
        void *cookie;
        size_t size, nitems;
        unsigned int ii;

        NVLIST_ASSERT(nvl);
        PJDLOG_ASSERT(nvl->nvl_error == 0);

        size = sizeof(struct nvlist_header);
        nvp = nvlist_first_nvpair(nvl);
        while (nvp != NULL) {
                size += nvpair_header_size();
                size += strlen(nvpair_name(nvp)) + 1;
                if (nvpair_type(nvp) == NV_TYPE_NVLIST) {
                        size += sizeof(struct nvlist_header);
                        size += nvpair_header_size() + 1;
                        tmpnvl = nvpair_get_nvlist(nvp);
                        PJDLOG_ASSERT(tmpnvl->nvl_error == 0);
                        tmpnvp = nvlist_first_nvpair(tmpnvl);
                        if (tmpnvp != NULL) {
                                nvl = tmpnvl;
                                nvp = tmpnvp;
                                continue;
                        }
                } else if (nvpair_type(nvp) == NV_TYPE_NVLIST_ARRAY) {
                        nvlarray = nvpair_get_nvlist_array(nvp, &nitems);
                        PJDLOG_ASSERT(nitems > 0);

                        size += (nvpair_header_size() + 1) * nitems;
                        size += sizeof(struct nvlist_header) * nitems;

                        tmpnvl = NULL;
                        tmpnvp = NULL;
                        for (ii = 0; ii < nitems; ii++) {
                                PJDLOG_ASSERT(nvlarray[ii]->nvl_error == 0);
                                tmpnvp = nvlist_first_nvpair(nvlarray[ii]);
                                if (tmpnvp != NULL) {
                                        tmpnvl = nvlarray[ii];
                                        break;
                                }
                        }
                        if (tmpnvp != NULL) {
                                nvp = tmpnvp;
                                nvl = tmpnvl;
                                continue;
                        }

                } else {
                        size += nvpair_size(nvp);
                }

                while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) {
                        do {
                                cookie = NULL;
                                nvl = nvlist_get_pararr(nvl, &cookie);
                                if (nvl == NULL)
                                        goto out;
                                if (nvlist_in_array(nvl) && cookie == NULL) {
                                        nvp = nvlist_first_nvpair(nvl);
                                } else {
                                        nvp = cookie;
                                }
                        } while (nvp == NULL);
                        if (nvlist_in_array(nvl) && cookie == NULL)
                                break;
                }
        }

out:
        return (size);
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
static int *
nvlist_xdescriptors(const nvlist_t *nvl, int *descs)
{
        void *cookie;
        nvpair_t *nvp;
        int type;

        NVLIST_ASSERT(nvl);
        PJDLOG_ASSERT(nvl->nvl_error == 0);

        cookie = NULL;
        do {
                while (nvlist_next(nvl, &type, &cookie) != NULL) {
                        nvp = cookie;
                        switch (type) {
                        case NV_TYPE_DESCRIPTOR:
                                *descs = nvpair_get_descriptor(nvp);
                                descs++;
                                break;
                        case NV_TYPE_DESCRIPTOR_ARRAY:
                            {
                                const int *value;
                                size_t nitems;
                                unsigned int ii;

                                value = nvpair_get_descriptor_array(nvp,
                                    &nitems);
                                for (ii = 0; ii < nitems; ii++) {
                                        *descs = value[ii];
                                        descs++;
                                }
                                break;
                            }
                        case NV_TYPE_NVLIST:
                                nvl = nvpair_get_nvlist(nvp);
                                cookie = NULL;
                                break;
                        case NV_TYPE_NVLIST_ARRAY:
                            {
                                const nvlist_t * const *value;
                                size_t nitems;

                                value = nvpair_get_nvlist_array(nvp, &nitems);
                                PJDLOG_ASSERT(value != NULL);
                                PJDLOG_ASSERT(nitems > 0);

                                nvl = value[0];
                                cookie = NULL;
                                break;
                            }
                        }
                }
        } while ((nvl = nvlist_get_pararr(nvl, &cookie)) != NULL);

        return (descs);
}
#endif

#if !defined(_KERNEL) && !defined(_STANDALONE)
int *
nvlist_descriptors(const nvlist_t *nvl, size_t *nitemsp)
{
        size_t nitems;
        int *fds;

        nitems = nvlist_ndescriptors(nvl);
        fds = nv_malloc(sizeof(fds[0]) * (nitems + 1));
        if (fds == NULL)
                return (NULL);
        if (nitems > 0)
                nvlist_xdescriptors(nvl, fds);
        fds[nitems] = -1;
        if (nitemsp != NULL)
                *nitemsp = nitems;
        return (fds);
}
#endif

size_t
nvlist_ndescriptors(const nvlist_t *nvl)
{
#if !defined(_KERNEL) && !defined(_STANDALONE)
        void *cookie;
        nvpair_t *nvp;
        size_t ndescs;
        int type;

        NVLIST_ASSERT(nvl);
        PJDLOG_ASSERT(nvl->nvl_error == 0);

        ndescs = 0;
        cookie = NULL;
        do {
                while (nvlist_next(nvl, &type, &cookie) != NULL) {
                        nvp = cookie;
                        switch (type) {
                        case NV_TYPE_DESCRIPTOR:
                                ndescs++;
                                break;
                        case NV_TYPE_NVLIST:
                                nvl = nvpair_get_nvlist(nvp);
                                cookie = NULL;
                                break;
                        case NV_TYPE_NVLIST_ARRAY:
                            {
                                const nvlist_t * const *value;
                                size_t nitems;

                                value = nvpair_get_nvlist_array(nvp, &nitems);
                                PJDLOG_ASSERT(value != NULL);
                                PJDLOG_ASSERT(nitems > 0);

                                nvl = value[0];
                                cookie = NULL;
                                break;
                            }
                        case NV_TYPE_DESCRIPTOR_ARRAY:
                            {
                                size_t nitems;

                                (void)nvpair_get_descriptor_array(nvp,
                                    &nitems);
                                ndescs += nitems;
                                break;
                            }
                        }
                }
        } while ((nvl = nvlist_get_pararr(nvl, &cookie)) != NULL);

        return (ndescs);
#else
        return (0);
#endif
}

static unsigned char *
nvlist_pack_header(const nvlist_t *nvl, unsigned char *ptr, size_t *leftp)
{
        struct nvlist_header nvlhdr;

        NVLIST_ASSERT(nvl);

        nvlhdr.nvlh_magic = NVLIST_HEADER_MAGIC;
        nvlhdr.nvlh_version = NVLIST_HEADER_VERSION;
        nvlhdr.nvlh_flags = nvl->nvl_flags;
#if BYTE_ORDER == BIG_ENDIAN
        nvlhdr.nvlh_flags |= NV_FLAG_BIG_ENDIAN;
#endif
        nvlhdr.nvlh_descriptors = nvlist_ndescriptors(nvl);
        nvlhdr.nvlh_size = *leftp - sizeof(nvlhdr);
        PJDLOG_ASSERT(*leftp >= sizeof(nvlhdr));
        memcpy(ptr, &nvlhdr, sizeof(nvlhdr));
        ptr += sizeof(nvlhdr);
        *leftp -= sizeof(nvlhdr);

        return (ptr);
}

static void *
nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep)
{
        unsigned char *buf, *ptr;
        size_t left, size;
        const nvlist_t *tmpnvl;
        nvpair_t *nvp, *tmpnvp;
        void *cookie;

        NVLIST_ASSERT(nvl);

        if (nvl->nvl_error != 0) {
                ERRNO_SET(nvl->nvl_error);
                return (NULL);
        }

        size = nvlist_size(nvl);
        buf = nv_malloc(size);
        if (buf == NULL)
                return (NULL);

        ptr = buf;
        left = size;

        ptr = nvlist_pack_header(nvl, ptr, &left);

        nvp = nvlist_first_nvpair(nvl);
        while (nvp != NULL) {
                NVPAIR_ASSERT(nvp);

                nvpair_init_datasize(nvp);
                ptr = nvpair_pack_header(nvp, ptr, &left);
                if (ptr == NULL)
                        goto fail;
                switch (nvpair_type(nvp)) {
                case NV_TYPE_NULL:
                        ptr = nvpair_pack_null(nvp, ptr, &left);
                        break;
                case NV_TYPE_BOOL:
                        ptr = nvpair_pack_bool(nvp, ptr, &left);
                        break;
                case NV_TYPE_NUMBER:
                        ptr = nvpair_pack_number(nvp, ptr, &left);
                        break;
                case NV_TYPE_STRING:
                        ptr = nvpair_pack_string(nvp, ptr, &left);
                        break;
                case NV_TYPE_NVLIST:
                        tmpnvl = nvpair_get_nvlist(nvp);
                        ptr = nvlist_pack_header(tmpnvl, ptr, &left);
                        if (ptr == NULL)
                                goto fail;
                        tmpnvp = nvlist_first_nvpair(tmpnvl);
                        if (tmpnvp != NULL) {
                                nvl = tmpnvl;
                                nvp = tmpnvp;
                                continue;
                        }
                        ptr = nvpair_pack_nvlist_up(ptr, &left);
                        break;
#if !defined(_KERNEL) && !defined(_STANDALONE)
                case NV_TYPE_DESCRIPTOR:
                        ptr = nvpair_pack_descriptor(nvp, ptr, fdidxp, &left);
                        break;
                case NV_TYPE_DESCRIPTOR_ARRAY:
                        ptr = nvpair_pack_descriptor_array(nvp, ptr, fdidxp,
                            &left);
                        break;
#endif
                case NV_TYPE_BINARY:
                        ptr = nvpair_pack_binary(nvp, ptr, &left);
                        break;
                case NV_TYPE_BOOL_ARRAY:
                        ptr = nvpair_pack_bool_array(nvp, ptr, &left);
                        break;
                case NV_TYPE_NUMBER_ARRAY:
                        ptr = nvpair_pack_number_array(nvp, ptr, &left);
                        break;
                case NV_TYPE_STRING_ARRAY:
                        ptr = nvpair_pack_string_array(nvp, ptr, &left);
                        break;
                case NV_TYPE_NVLIST_ARRAY:
                    {
                        const nvlist_t * const * value;
                        size_t nitems;
                        unsigned int ii;

                        tmpnvl = NULL;
                        value = nvpair_get_nvlist_array(nvp, &nitems);
                        for (ii = 0; ii < nitems; ii++) {
                                ptr = nvlist_pack_header(value[ii], ptr, &left);
                                if (ptr == NULL)
                                        goto out;
                                tmpnvp = nvlist_first_nvpair(value[ii]);
                                if (tmpnvp != NULL) {
                                        tmpnvl = value[ii];
                                        break;
                                }
                                ptr = nvpair_pack_nvlist_array_next(ptr, &left);
                                if (ptr == NULL)
                                        goto out;
                        }
                        if (tmpnvl != NULL) {
                                nvl = tmpnvl;
                                nvp = tmpnvp;
                                continue;
                        }
                        break;
                    }
                default:
                        PJDLOG_ABORT("Invalid type (%d).", nvpair_type(nvp));
                }
                if (ptr == NULL)
                        goto fail;
                while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) {
                        do {
                                cookie = NULL;
                                if (nvlist_in_array(nvl)) {
                                        ptr = nvpair_pack_nvlist_array_next(ptr,
                                            &left);
                                        if (ptr == NULL)
                                                goto fail;
                                }
                                nvl = nvlist_get_pararr(nvl, &cookie);
                                if (nvl == NULL)
                                        goto out;
                                if (nvlist_in_array(nvl) && cookie == NULL) {
                                        nvp = nvlist_first_nvpair(nvl);
                                        ptr = nvlist_pack_header(nvl, ptr,
                                            &left);
                                        if (ptr == NULL)
                                                goto fail;
                                } else if (nvpair_type((nvpair_t *)cookie) !=
                                    NV_TYPE_NVLIST_ARRAY) {
                                        ptr = nvpair_pack_nvlist_up(ptr, &left);
                                        if (ptr == NULL)
                                                goto fail;
                                        nvp = cookie;
                                } else {
                                        nvp = cookie;
                                }
                        } while (nvp == NULL);
                        if (nvlist_in_array(nvl) && cookie == NULL)
                                break;
                }
        }

out:
        if (sizep != NULL)
                *sizep = size;
        return (buf);
fail:
        nv_free(buf);
        return (NULL);
}

void *
nvlist_pack(const nvlist_t *nvl, size_t *sizep)
{

        NVLIST_ASSERT(nvl);

        if (nvl->nvl_error != 0) {
                ERRNO_SET(nvl->nvl_error);
                return (NULL);
        }

        if (nvlist_ndescriptors(nvl) > 0) {
                ERRNO_SET(EOPNOTSUPP);
                return (NULL);
        }

        return (nvlist_xpack(nvl, NULL, sizep));
}

static bool
nvlist_check_header(struct nvlist_header *nvlhdrp)
{

        if (nvlhdrp->nvlh_magic != NVLIST_HEADER_MAGIC) {
                ERRNO_SET(EINVAL);
                return (false);
        }
        if ((nvlhdrp->nvlh_flags & ~NV_FLAG_ALL_MASK) != 0) {
                ERRNO_SET(EINVAL);
                return (false);
        }
#if BYTE_ORDER == BIG_ENDIAN
        if ((nvlhdrp->nvlh_flags & NV_FLAG_BIG_ENDIAN) == 0) {
                nvlhdrp->nvlh_size = le64toh(nvlhdrp->nvlh_size);
                nvlhdrp->nvlh_descriptors = le64toh(nvlhdrp->nvlh_descriptors);
        }
#else
        if ((nvlhdrp->nvlh_flags & NV_FLAG_BIG_ENDIAN) != 0) {
                nvlhdrp->nvlh_size = be64toh(nvlhdrp->nvlh_size);
                nvlhdrp->nvlh_descriptors = be64toh(nvlhdrp->nvlh_descriptors);
        }
#endif
        return (true);
}

const unsigned char *
nvlist_unpack_header(nvlist_t *nvl, const unsigned char *ptr, size_t nfds,
    bool *isbep, size_t *leftp)
{
        struct nvlist_header nvlhdr;
        int inarrayf;

        if (*leftp < sizeof(nvlhdr))
                goto fail;

        memcpy(&nvlhdr, ptr, sizeof(nvlhdr));

        if (!nvlist_check_header(&nvlhdr))
                goto fail;

        if (nvlhdr.nvlh_size != *leftp - sizeof(nvlhdr))
                goto fail;

        /*
         * nvlh_descriptors might be smaller than nfds in embedded nvlists.
         */
        if (nvlhdr.nvlh_descriptors > nfds)
                goto fail;

        if ((nvlhdr.nvlh_flags & ~NV_FLAG_ALL_MASK) != 0)
                goto fail;

        inarrayf = (nvl->nvl_flags & NV_FLAG_IN_ARRAY);
        nvl->nvl_flags = (nvlhdr.nvlh_flags & NV_FLAG_PUBLIC_MASK) | inarrayf;

        ptr += sizeof(nvlhdr);
        if (isbep != NULL)
                *isbep = (((int)nvlhdr.nvlh_flags & NV_FLAG_BIG_ENDIAN) != 0);
        *leftp -= sizeof(nvlhdr);

        return (ptr);
fail:
        ERRNO_SET(EINVAL);
        return (NULL);
}

static nvlist_t *
nvlist_xunpack(const void *buf, size_t size, const int *fds, size_t nfds,
    int flags)
{
        const unsigned char *ptr;
        nvlist_t *nvl, *retnvl, *tmpnvl, *array;
        nvpair_t *nvp;
        size_t left;
        bool isbe;

        PJDLOG_ASSERT((flags & ~(NV_FLAG_PUBLIC_MASK)) == 0);

        left = size;
        ptr = buf;

        tmpnvl = array = NULL;
        nvl = retnvl = nvlist_create(0);
        if (nvl == NULL)
                goto fail;

        ptr = nvlist_unpack_header(nvl, ptr, nfds, &isbe, &left);
        if (ptr == NULL)
                goto fail;
        if (nvl->nvl_flags != flags) {
                ERRNO_SET(EILSEQ);
                goto fail;
        }

        while (left > 0) {
                ptr = nvpair_unpack(isbe, ptr, &left, &nvp);
                if (ptr == NULL)
                        goto fail;
                switch (nvpair_type(nvp)) {
                case NV_TYPE_NULL:
                        ptr = nvpair_unpack_null(isbe, nvp, ptr, &left);
                        break;
                case NV_TYPE_BOOL:
                        ptr = nvpair_unpack_bool(isbe, nvp, ptr, &left);
                        break;
                case NV_TYPE_NUMBER:
                        ptr = nvpair_unpack_number(isbe, nvp, ptr, &left);
                        break;
                case NV_TYPE_STRING:
                        ptr = nvpair_unpack_string(isbe, nvp, ptr, &left);
                        break;
                case NV_TYPE_NVLIST:
                        ptr = nvpair_unpack_nvlist(isbe, nvp, ptr, &left, nfds,
                            &tmpnvl);
                        if (tmpnvl == NULL || ptr == NULL)
                                goto fail;
                        nvlist_set_parent(tmpnvl, nvp);
                        break;
#if !defined(_KERNEL) && !defined(_STANDALONE) && !defined(__NetBSD__)
                case NV_TYPE_DESCRIPTOR:
                        ptr = nvpair_unpack_descriptor(isbe, nvp, ptr, &left,
                            fds, nfds);
                        break;
                case NV_TYPE_DESCRIPTOR_ARRAY:
                        ptr = nvpair_unpack_descriptor_array(isbe, nvp, ptr,
                            &left, fds, nfds);
                        break;
#endif
                case NV_TYPE_BINARY:
                        ptr = nvpair_unpack_binary(isbe, nvp, ptr, &left);
                        break;
                case NV_TYPE_NVLIST_UP:
                        if (nvl->nvl_parent == NULL)
                                goto fail;
                        nvl = nvpair_nvlist(nvl->nvl_parent);
                        nvpair_free_structure(nvp);
                        continue;
                case NV_TYPE_NVLIST_ARRAY_NEXT:
                        if (nvl->nvl_array_next == NULL) {
                                if (nvl->nvl_parent == NULL)
                                        goto fail;
                                nvl = nvpair_nvlist(nvl->nvl_parent);
                        } else {
                                nvl = __DECONST(nvlist_t *,
                                    nvlist_get_array_next(nvl));
                                ptr = nvlist_unpack_header(nvl, ptr, nfds,
                                    &isbe, &left);
                                if (ptr == NULL)
                                        goto fail;
                        }
                        nvpair_free_structure(nvp);
                        continue;
                case NV_TYPE_BOOL_ARRAY:
                        ptr = nvpair_unpack_bool_array(isbe, nvp, ptr, &left);
                        break;
                case NV_TYPE_NUMBER_ARRAY:
                        ptr = nvpair_unpack_number_array(isbe, nvp, ptr, &left);
                        break;
                case NV_TYPE_STRING_ARRAY:
                        ptr = nvpair_unpack_string_array(isbe, nvp, ptr, &left);
                        break;
                case NV_TYPE_NVLIST_ARRAY:
                        ptr = nvpair_unpack_nvlist_array(isbe, nvp, ptr, &left,
                            &array);
                        if (ptr == NULL)
                                goto fail;
                        PJDLOG_ASSERT(array != NULL);
                        tmpnvl = array;
                        do {
                                nvlist_set_parent(array, nvp);
                                array = __DECONST(nvlist_t *,
                                    nvlist_get_array_next(array));
                        } while (array != NULL);
                        ptr = nvlist_unpack_header(tmpnvl, ptr, nfds, &isbe,
                            &left);
                        break;
                default:
                        PJDLOG_ABORT("Invalid type (%d).", nvpair_type(nvp));
                }
                if (ptr == NULL)
                        goto fail;
                if (!nvlist_move_nvpair(nvl, nvp))
                        goto fail;
                if (tmpnvl != NULL) {
                        nvl = tmpnvl;
                        tmpnvl = NULL;
                }
        }

        return (retnvl);
fail:
        nvlist_destroy(retnvl);
        return (NULL);
}

nvlist_t *
nvlist_unpack(const void *buf, size_t size, int flags)
{

        return (nvlist_xunpack(buf, size, NULL, 0, flags));
}

#if !defined(_KERNEL) && !defined(_STANDALONE) && defined(WITH_MSGIO)
int
nvlist_send(int sock, const nvlist_t *nvl)
{
        size_t datasize, nfds;
        int *fds;
        void *data;
        int64_t fdidx;
        int ret;

        if (nvlist_error(nvl) != 0) {
                ERRNO_SET(nvlist_error(nvl));
                return (-1);
        }

        fds = nvlist_descriptors(nvl, &nfds);
        if (fds == NULL)
                return (-1);

        ret = -1;
        fdidx = 0;

        data = nvlist_xpack(nvl, &fdidx, &datasize);
        if (data == NULL)
                goto out;

        if (buf_send(sock, data, datasize) == -1)
                goto out;

        if (nfds > 0) {
                if (fd_send(sock, fds, nfds) == -1)
                        goto out;
        }

        ret = 0;
out:
        ERRNO_SAVE();
        nv_free(fds);
        nv_free(data);
        ERRNO_RESTORE();
        return (ret);
}

nvlist_t *
nvlist_recv(int sock, int flags)
{
        struct nvlist_header nvlhdr;
        nvlist_t *nvl, *ret;
        unsigned char *buf;
        size_t nfds, size, i;
        int *fds;

        if (buf_recv(sock, &nvlhdr, sizeof(nvlhdr)) == -1)
                return (NULL);

        if (!nvlist_check_header(&nvlhdr))
                return (NULL);

        nfds = (size_t)nvlhdr.nvlh_descriptors;
        size = sizeof(nvlhdr) + (size_t)nvlhdr.nvlh_size;

        buf = nv_malloc(size);
        if (buf == NULL)
                return (NULL);

        memcpy(buf, &nvlhdr, sizeof(nvlhdr));

        ret = NULL;
        fds = NULL;

        if (buf_recv(sock, buf + sizeof(nvlhdr), size - sizeof(nvlhdr)) == -1)
                goto out;

        if (nfds > 0) {
                fds = nv_malloc(nfds * sizeof(fds[0]));
                if (fds == NULL)
                        goto out;
                if (fd_recv(sock, fds, nfds) == -1)
                        goto out;
        }

        nvl = nvlist_xunpack(buf, size, fds, nfds, flags);
        if (nvl == NULL) {
                ERRNO_SAVE();
                for (i = 0; i < nfds; i++)
                        close(fds[i]);
                ERRNO_RESTORE();
                goto out;
        }

        ret = nvl;
out:
        ERRNO_SAVE();
        nv_free(buf);
        nv_free(fds);
        ERRNO_RESTORE();

        return (ret);
}

nvlist_t *
nvlist_xfer(int sock, nvlist_t *nvl, int flags)
{

        if (nvlist_send(sock, nvl) < 0) {
                nvlist_destroy(nvl);
                return (NULL);
        }
        nvlist_destroy(nvl);
        return (nvlist_recv(sock, flags));
}
#endif

nvpair_t *
nvlist_first_nvpair(const nvlist_t *nvl)
{

        NVLIST_ASSERT(nvl);

        return (TAILQ_FIRST(&nvl->nvl_head));
}

nvpair_t *
nvlist_next_nvpair(const nvlist_t *nvl, const nvpair_t *nvp)
{
        nvpair_t *retnvp;

        NVLIST_ASSERT(nvl);
        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl);

        retnvp = nvpair_next(nvp);
        PJDLOG_ASSERT(retnvp == NULL || nvpair_nvlist(retnvp) == nvl);

        return (retnvp);

}

nvpair_t *
nvlist_prev_nvpair(const nvlist_t *nvl, const nvpair_t *nvp)
{
        nvpair_t *retnvp;

        NVLIST_ASSERT(nvl);
        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl);

        retnvp = nvpair_prev(nvp);
        PJDLOG_ASSERT(nvpair_nvlist(retnvp) == nvl);

        return (retnvp);
}

const char *
nvlist_next(const nvlist_t *nvl, int *typep, void **cookiep)
{
        nvpair_t *nvp;

        NVLIST_ASSERT(nvl);

        if (cookiep == NULL || *cookiep == NULL)
                nvp = nvlist_first_nvpair(nvl);
        else
                nvp = nvlist_next_nvpair(nvl, *cookiep);
        if (nvp == NULL)
                return (NULL);
        if (typep != NULL)
                *typep = nvpair_type(nvp);
        if (cookiep != NULL)
                *cookiep = nvp;
        return (nvpair_name(nvp));
}

bool
nvlist_exists(const nvlist_t *nvl, const char *name)
{

        return (nvlist_find(nvl, NV_TYPE_NONE, name) != NULL);
}

#define        NVLIST_EXISTS(type, TYPE)                                        \
bool                                                                        \
nvlist_exists_##type(const nvlist_t *nvl, const char *name)                \
{                                                                        \
                                                                        \
        return (nvlist_find(nvl, NV_TYPE_##TYPE, name) != NULL);        \
}

NVLIST_EXISTS(null, NULL)
NVLIST_EXISTS(bool, BOOL)
NVLIST_EXISTS(number, NUMBER)
NVLIST_EXISTS(string, STRING)
NVLIST_EXISTS(nvlist, NVLIST)
NVLIST_EXISTS(binary, BINARY)
NVLIST_EXISTS(bool_array, BOOL_ARRAY)
NVLIST_EXISTS(number_array, NUMBER_ARRAY)
NVLIST_EXISTS(string_array, STRING_ARRAY)
NVLIST_EXISTS(nvlist_array, NVLIST_ARRAY)
#if !defined(_KERNEL) && !defined(_STANDALONE)
NVLIST_EXISTS(descriptor, DESCRIPTOR)
NVLIST_EXISTS(descriptor_array, DESCRIPTOR_ARRAY)
#endif

#undef        NVLIST_EXISTS

void
nvlist_add_nvpair(nvlist_t *nvl, const nvpair_t *nvp)
{
        nvpair_t *newnvp;

        NVPAIR_ASSERT(nvp);

        if (nvlist_error(nvl) != 0) {
                ERRNO_SET(nvlist_error(nvl));
                return;
        }
        if ((nvl->nvl_flags & NV_FLAG_NO_UNIQUE) == 0) {
                if (nvlist_exists(nvl, nvpair_name(nvp))) {
                        nvl->nvl_error = EEXIST;
                        ERRNO_SET(nvlist_error(nvl));
                        return;
                }
        }

        newnvp = nvpair_clone(nvp);
        if (newnvp == NULL) {
                nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
                ERRNO_SET(nvlist_error(nvl));
                return;
        }

        nvpair_insert(&nvl->nvl_head, newnvp, nvl);
}

#if !defined(_STANDALONE)
void
nvlist_add_stringf(nvlist_t *nvl, const char *name, const char *valuefmt, ...)
{
        va_list valueap;

        va_start(valueap, valuefmt);
        nvlist_add_stringv(nvl, name, valuefmt, valueap);
        va_end(valueap);
}

void
nvlist_add_stringv(nvlist_t *nvl, const char *name, const char *valuefmt,
    va_list valueap)
{
        nvpair_t *nvp;

        if (nvlist_error(nvl) != 0) {
                ERRNO_SET(nvlist_error(nvl));
                return;
        }

        nvp = nvpair_create_stringv(name, valuefmt, valueap);
        if (nvp == NULL) {
                nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
                ERRNO_SET(nvl->nvl_error);
        } else {
                (void)nvlist_move_nvpair(nvl, nvp);
        }
}
#endif

void
nvlist_add_null(nvlist_t *nvl, const char *name)
{
        nvpair_t *nvp;

        if (nvlist_error(nvl) != 0) {
                ERRNO_SET(nvlist_error(nvl));
                return;
        }

        nvp = nvpair_create_null(name);
        if (nvp == NULL) {
                nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
                ERRNO_SET(nvl->nvl_error);
        } else {
                (void)nvlist_move_nvpair(nvl, nvp);
        }
}

void
nvlist_add_binary(nvlist_t *nvl, const char *name, const void *value,
    size_t size)
{
        nvpair_t *nvp;

        if (nvlist_error(nvl) != 0) {
                ERRNO_SET(nvlist_error(nvl));
                return;
        }

        nvp = nvpair_create_binary(name, value, size);
        if (nvp == NULL) {
                nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
                ERRNO_SET(nvl->nvl_error);
        } else {
                (void)nvlist_move_nvpair(nvl, nvp);
        }
}


#define        NVLIST_ADD(vtype, type)                                                \
void                                                                        \
nvlist_add_##type(nvlist_t *nvl, const char *name, vtype value)                \
{                                                                        \
        nvpair_t *nvp;                                                        \
                                                                        \
        if (nvlist_error(nvl) != 0) {                                        \
                ERRNO_SET(nvlist_error(nvl));                                \
                return;                                                        \
        }                                                                \
                                                                        \
        nvp = nvpair_create_##type(name, value);                        \
        if (nvp == NULL) {                                                \
                nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);                \
                ERRNO_SET(nvl->nvl_error);                                \
        } else {                                                        \
                (void)nvlist_move_nvpair(nvl, nvp);                        \
        }                                                                \
}

NVLIST_ADD(bool, bool)
NVLIST_ADD(uint64_t, number)
NVLIST_ADD(const char *, string)
NVLIST_ADD(const nvlist_t *, nvlist)
#if !defined(_KERNEL) && !defined(_STANDALONE)
NVLIST_ADD(int, descriptor);
#endif

#undef        NVLIST_ADD

#define        NVLIST_ADD_ARRAY(vtype, type)                                        \
void                                                                        \
nvlist_add_##type##_array(nvlist_t *nvl, const char *name, vtype value,        \
    size_t nitems)                                                        \
{                                                                        \
        nvpair_t *nvp;                                                        \
                                                                        \
        if (nvlist_error(nvl) != 0) {                                        \
                ERRNO_SET(nvlist_error(nvl));                                \
                return;                                                        \
        }                                                                \
                                                                        \
        nvp = nvpair_create_##type##_array(name, value, nitems);        \
        if (nvp == NULL) {                                                \
                nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);                \
                ERRNO_SET(nvl->nvl_error);                                \
        } else {                                                        \
                (void)nvlist_move_nvpair(nvl, nvp);                        \
        }                                                                \
}

NVLIST_ADD_ARRAY(const bool *, bool)
NVLIST_ADD_ARRAY(const uint64_t *, number)
NVLIST_ADD_ARRAY(const char * const *, string)
NVLIST_ADD_ARRAY(const nvlist_t * const *, nvlist)
#if !defined(_KERNEL) && !defined(_STANDALONE)
NVLIST_ADD_ARRAY(const int *, descriptor)
#endif

#undef        NVLIST_ADD_ARRAY

#define        NVLIST_APPEND_ARRAY(vtype, type, TYPE)                                \
void                                                                        \
nvlist_append_##type##_array(nvlist_t *nvl, const char *name, vtype value)\
{                                                                        \
        nvpair_t *nvp;                                                        \
                                                                        \
        if (nvlist_error(nvl) != 0) {                                        \
                ERRNO_SET(nvlist_error(nvl));                                \
                return;                                                        \
        }                                                                \
        nvp = nvlist_find(nvl, NV_TYPE_##TYPE##_ARRAY, name);                \
        if (nvp == NULL) {                                                \
                nvlist_add_##type##_array(nvl, name, &value, 1);        \
                return;                                                        \
        }                                                                \
        if (nvpair_append_##type##_array(nvp, value) == -1) {                \
                nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);                \
                ERRNO_SET(nvl->nvl_error);                                \
        }                                                                \
}

NVLIST_APPEND_ARRAY(const bool, bool, BOOL)
NVLIST_APPEND_ARRAY(const uint64_t, number, NUMBER)
NVLIST_APPEND_ARRAY(const char *, string, STRING)
NVLIST_APPEND_ARRAY(const nvlist_t *, nvlist, NVLIST)
#if !defined(_KERNEL) && !defined(_STANDALONE)
NVLIST_APPEND_ARRAY(const int, descriptor, DESCRIPTOR)
#endif

#undef        NVLIST_APPEND_ARRAY

bool
nvlist_move_nvpair(nvlist_t *nvl, nvpair_t *nvp)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvpair_nvlist(nvp) == NULL);

        if (nvlist_error(nvl) != 0) {
                nvpair_free(nvp);
                ERRNO_SET(nvlist_error(nvl));
                return (false);
        }
        if ((nvl->nvl_flags & NV_FLAG_NO_UNIQUE) == 0) {
                if (nvlist_exists(nvl, nvpair_name(nvp))) {
                        nvpair_free(nvp);
                        nvl->nvl_error = EEXIST;
                        ERRNO_SET(nvl->nvl_error);
                        return (false);
                }
        }

        nvpair_insert(&nvl->nvl_head, nvp, nvl);
        return (true);
}

void
nvlist_move_string(nvlist_t *nvl, const char *name, char *value)
{
        nvpair_t *nvp;

        if (nvlist_error(nvl) != 0) {
                nv_free(value);
                ERRNO_SET(nvlist_error(nvl));
                return;
        }

        nvp = nvpair_move_string(name, value);
        if (nvp == NULL) {
                nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
                ERRNO_SET(nvl->nvl_error);
        } else {
                (void)nvlist_move_nvpair(nvl, nvp);
        }
}

void
nvlist_move_nvlist(nvlist_t *nvl, const char *name, nvlist_t *value)
{
        nvpair_t *nvp;

        if (nvlist_error(nvl) != 0) {
                if (value != NULL && nvlist_get_nvpair_parent(value) != NULL)
                        nvlist_destroy(value);
                ERRNO_SET(nvlist_error(nvl));
                return;
        }

        nvp = nvpair_move_nvlist(name, value);
        if (nvp == NULL) {
                nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
                ERRNO_SET(nvl->nvl_error);
        } else {
                (void)nvlist_move_nvpair(nvl, nvp);
        }
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
void
nvlist_move_descriptor(nvlist_t *nvl, const char *name, int value)
{
        nvpair_t *nvp;

        if (nvlist_error(nvl) != 0) {
                close(value);
                ERRNO_SET(nvlist_error(nvl));
                return;
        }

        nvp = nvpair_move_descriptor(name, value);
        if (nvp == NULL) {
                nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
                ERRNO_SET(nvl->nvl_error);
        } else {
                (void)nvlist_move_nvpair(nvl, nvp);
        }
}
#endif

void
nvlist_move_binary(nvlist_t *nvl, const char *name, void *value, size_t size)
{
        nvpair_t *nvp;

        if (nvlist_error(nvl) != 0) {
                nv_free(value);
                ERRNO_SET(nvlist_error(nvl));
                return;
        }

        nvp = nvpair_move_binary(name, value, size);
        if (nvp == NULL) {
                nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
                ERRNO_SET(nvl->nvl_error);
        } else {
                (void)nvlist_move_nvpair(nvl, nvp);
        }
}

void
nvlist_move_bool_array(nvlist_t *nvl, const char *name, bool *value,
    size_t nitems)
{
        nvpair_t *nvp;

        if (nvlist_error(nvl) != 0) {
                nv_free(value);
                ERRNO_SET(nvlist_error(nvl));
                return;
        }

        nvp = nvpair_move_bool_array(name, value, nitems);
        if (nvp == NULL) {
                nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
                ERRNO_SET(nvl->nvl_error);
        } else {
                (void)nvlist_move_nvpair(nvl, nvp);
        }
}

void
nvlist_move_string_array(nvlist_t *nvl, const char *name, char **value,
    size_t nitems)
{
        nvpair_t *nvp;
        size_t i;

        if (nvlist_error(nvl) != 0) {
                if (value != NULL) {
                        for (i = 0; i < nitems; i++)
                                nv_free(value[i]);
                        nv_free(value);
                }
                ERRNO_SET(nvlist_error(nvl));
                return;
        }

        nvp = nvpair_move_string_array(name, value, nitems);
        if (nvp == NULL) {
                nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
                ERRNO_SET(nvl->nvl_error);
        } else {
                (void)nvlist_move_nvpair(nvl, nvp);
        }
}

void
nvlist_move_nvlist_array(nvlist_t *nvl, const char *name, nvlist_t **value,
    size_t nitems)
{
        nvpair_t *nvp;
        size_t i;

        if (nvlist_error(nvl) != 0) {
                if (value != NULL) {
                        for (i = 0; i < nitems; i++) {
                                if (nvlist_get_pararr(value[i], NULL) == NULL)
                                        nvlist_destroy(value[i]);
                        }
                }
                nv_free(value);
                ERRNO_SET(nvlist_error(nvl));
                return;
        }

        nvp = nvpair_move_nvlist_array(name, value, nitems);
        if (nvp == NULL) {
                nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
                ERRNO_SET(nvl->nvl_error);
        } else {
                (void)nvlist_move_nvpair(nvl, nvp);
        }
}

void
nvlist_move_number_array(nvlist_t *nvl, const char *name, uint64_t *value,
    size_t nitems)
{
        nvpair_t *nvp;

        if (nvlist_error(nvl) != 0) {
                nv_free(value);
                ERRNO_SET(nvlist_error(nvl));
                return;
        }

        nvp = nvpair_move_number_array(name, value, nitems);
        if (nvp == NULL) {
                nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
                ERRNO_SET(nvl->nvl_error);
        } else {
                (void)nvlist_move_nvpair(nvl, nvp);
        }
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
void
nvlist_move_descriptor_array(nvlist_t *nvl, const char *name, int *value,
    size_t nitems)
{
        nvpair_t *nvp;
        size_t i;

        if (nvlist_error(nvl) != 0) {
                if (value != 0) {
                        for (i = 0; i < nitems; i++)
                                close(value[i]);
                        nv_free(value);
                }

                ERRNO_SET(nvlist_error(nvl));
                return;
        }

        nvp = nvpair_move_descriptor_array(name, value, nitems);
        if (nvp == NULL) {
                nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
                ERRNO_SET(nvl->nvl_error);
        } else {
                (void)nvlist_move_nvpair(nvl, nvp);
        }
}
#endif

const nvpair_t *
nvlist_get_nvpair(const nvlist_t *nvl, const char *name)
{

        return (nvlist_find(nvl, NV_TYPE_NONE, name));
}

#define        NVLIST_GET(ftype, type, TYPE)                                        \
ftype                                                                        \
nvlist_get_##type(const nvlist_t *nvl, const char *name)                \
{                                                                        \
        const nvpair_t *nvp;                                                \
                                                                        \
        nvp = nvlist_find(nvl, NV_TYPE_##TYPE, name);                        \
        if (nvp == NULL)                                                \
                nvlist_report_missing(NV_TYPE_##TYPE, name);                \
        return (nvpair_get_##type(nvp));                                \
}

NVLIST_GET(bool, bool, BOOL)
NVLIST_GET(uint64_t, number, NUMBER)
NVLIST_GET(const char *, string, STRING)
NVLIST_GET(const nvlist_t *, nvlist, NVLIST)
#if !defined(_KERNEL) && !defined(_STANDALONE)
NVLIST_GET(int, descriptor, DESCRIPTOR)
#endif

#undef        NVLIST_GET

const void *
nvlist_get_binary(const nvlist_t *nvl, const char *name, size_t *sizep)
{
        nvpair_t *nvp;

        nvp = nvlist_find(nvl, NV_TYPE_BINARY, name);
        if (nvp == NULL)
                nvlist_report_missing(NV_TYPE_BINARY, name);

        return (nvpair_get_binary(nvp, sizep));
}

#define        NVLIST_GET_ARRAY(ftype, type, TYPE)                                \
ftype                                                                        \
nvlist_get_##type##_array(const nvlist_t *nvl, const char *name,        \
    size_t *nitems)                                                        \
{                                                                        \
        const nvpair_t *nvp;                                                \
                                                                        \
        nvp = nvlist_find(nvl, NV_TYPE_##TYPE##_ARRAY, name);                \
        if (nvp == NULL)                                                \
                nvlist_report_missing(NV_TYPE_##TYPE##_ARRAY, name);        \
        return (nvpair_get_##type##_array(nvp, nitems));                \
}

NVLIST_GET_ARRAY(const bool *, bool, BOOL)
NVLIST_GET_ARRAY(const uint64_t *, number, NUMBER)
NVLIST_GET_ARRAY(const char * const *, string, STRING)
NVLIST_GET_ARRAY(const nvlist_t * const *, nvlist, NVLIST)
#if !defined(_KERNEL) && !defined(_STANDALONE)
NVLIST_GET_ARRAY(const int *, descriptor, DESCRIPTOR)
#endif

#undef        NVLIST_GET_ARRAY

#define        NVLIST_TAKE(ftype, type, TYPE)                                        \
ftype                                                                        \
nvlist_take_##type(nvlist_t *nvl, const char *name)                        \
{                                                                        \
        nvpair_t *nvp;                                                        \
        ftype value;                                                        \
                                                                        \
        nvp = nvlist_find(nvl, NV_TYPE_##TYPE, name);                        \
        if (nvp == NULL)                                                \
                nvlist_report_missing(NV_TYPE_##TYPE, name);                \
        value = (ftype)(intptr_t)nvpair_get_##type(nvp);                \
        nvlist_remove_nvpair(nvl, nvp);                                        \
        nvpair_free_structure(nvp);                                        \
        return (value);                                                        \
}

NVLIST_TAKE(bool, bool, BOOL)
NVLIST_TAKE(uint64_t, number, NUMBER)
NVLIST_TAKE(char *, string, STRING)
NVLIST_TAKE(nvlist_t *, nvlist, NVLIST)
#if !defined(_KERNEL) && !defined(_STANDALONE)
NVLIST_TAKE(int, descriptor, DESCRIPTOR)
#endif

#undef        NVLIST_TAKE

void *
nvlist_take_binary(nvlist_t *nvl, const char *name, size_t *sizep)
{
        nvpair_t *nvp;
        void *value;

        nvp = nvlist_find(nvl, NV_TYPE_BINARY, name);
        if (nvp == NULL)
                nvlist_report_missing(NV_TYPE_BINARY, name);

        value = (void *)(intptr_t)nvpair_get_binary(nvp, sizep);
        nvlist_remove_nvpair(nvl, nvp);
        nvpair_free_structure(nvp);
        return (value);
}

#define        NVLIST_TAKE_ARRAY(ftype, type, TYPE)                                \
ftype                                                                        \
nvlist_take_##type##_array(nvlist_t *nvl, const char *name,                \
    size_t *nitems)                                                        \
{                                                                        \
        nvpair_t *nvp;                                                        \
        ftype value;                                                        \
                                                                        \
        nvp = nvlist_find(nvl, NV_TYPE_##TYPE##_ARRAY, name);                \
        if (nvp == NULL)                                                \
                nvlist_report_missing(NV_TYPE_##TYPE##_ARRAY, name);        \
        value = (ftype)(intptr_t)nvpair_get_##type##_array(nvp, nitems);\
        nvlist_remove_nvpair(nvl, nvp);                                        \
        nvpair_free_structure(nvp);                                        \
        return (value);                                                        \
}

NVLIST_TAKE_ARRAY(bool *, bool, BOOL)
NVLIST_TAKE_ARRAY(uint64_t *, number, NUMBER)
NVLIST_TAKE_ARRAY(char **, string, STRING)
NVLIST_TAKE_ARRAY(nvlist_t **, nvlist, NVLIST)
#if !defined(_KERNEL) && !defined(_STANDALONE)
NVLIST_TAKE_ARRAY(int *, descriptor, DESCRIPTOR)
#endif

void
nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp)
{

        NVLIST_ASSERT(nvl);
        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl);

        nvpair_remove(&nvl->nvl_head, nvp, nvl);
}

void
nvlist_free(nvlist_t *nvl, const char *name)
{

        nvlist_free_type(nvl, name, NV_TYPE_NONE);
}

#define        NVLIST_FREE(type, TYPE)                                                \
void                                                                        \
nvlist_free_##type(nvlist_t *nvl, const char *name)                        \
{                                                                        \
                                                                        \
        nvlist_free_type(nvl, name, NV_TYPE_##TYPE);                        \
}

NVLIST_FREE(null, NULL)
NVLIST_FREE(bool, BOOL)
NVLIST_FREE(number, NUMBER)
NVLIST_FREE(string, STRING)
NVLIST_FREE(nvlist, NVLIST)
NVLIST_FREE(binary, BINARY)
NVLIST_FREE(bool_array, BOOL_ARRAY)
NVLIST_FREE(number_array, NUMBER_ARRAY)
NVLIST_FREE(string_array, STRING_ARRAY)
NVLIST_FREE(nvlist_array, NVLIST_ARRAY)
#if !defined(_KERNEL) && !defined(_STANDALONE)
NVLIST_FREE(descriptor, DESCRIPTOR)
NVLIST_FREE(descriptor_array, DESCRIPTOR_ARRAY)
#endif

#undef        NVLIST_FREE

void
nvlist_free_nvpair(nvlist_t *nvl, nvpair_t *nvp)
{

        NVLIST_ASSERT(nvl);
        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl);

        nvlist_remove_nvpair(nvl, nvp);
        nvpair_free(nvp);
}










































































































































































































































































































































































    2 












































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
/*        $NetBSD: rf_map.c,v 1.51 2021/07/23 00:54:45 oster Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Mark Holland
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/**************************************************************************
 *
 * map.c -- main code for mapping RAID addresses to physical disk addresses
 *
 **************************************************************************/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_map.c,v 1.51 2021/07/23 00:54:45 oster Exp $");

#include <dev/raidframe/raidframevar.h>

#include "rf_threadstuff.h"
#include "rf_raid.h"
#include "rf_general.h"
#include "rf_map.h"
#include "rf_shutdown.h"

static void rf_FreePDAList(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda_list);
static void rf_FreeASMList(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asm_list);

/***************************************************************************
 *
 * MapAccess -- main 1st order mapping routine.  Maps an access in the
 * RAID address space to the corresponding set of physical disk
 * addresses.  The result is returned as a list of AccessStripeMap
 * structures, one per stripe accessed.  Each ASM structure contains a
 * pointer to a list of PhysDiskAddr structures, which describe the
 * physical locations touched by the user access.  Note that this
 * routine returns only static mapping information, i.e. the list of
 * physical addresses returned does not necessarily identify the set
 * of physical locations that will actually be read or written.  The
 * routine also maps the parity.  The physical disk location returned
 * always indicates the entire parity unit, even when only a subset of
 * it is being accessed.  This is because an access that is not stripe
 * unit aligned but that spans a stripe unit boundary may require
 * access two distinct portions of the parity unit, and we can't yet
 * tell which portion(s) we'll actually need.  We leave it up to the
 * algorithm selection code to decide what subset of the parity unit
 * to access.  Note that addresses in the RAID address space must
 * always be maintained as longs, instead of ints.
 *
 * This routine returns NULL if numBlocks is 0
 *
 * raidAddress - starting address in RAID address space
 * numBlocks   - number of blocks in RAID address space to access
 * buffer      - buffer to supply/receive data
 * remap       - 1 => remap address to spare space
 ***************************************************************************/

RF_AccessStripeMapHeader_t *
rf_MapAccess(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddress,
             RF_SectorCount_t numBlocks, void *buffer, int remap)
{
        RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
        RF_AccessStripeMapHeader_t *asm_hdr = NULL;
        RF_AccessStripeMap_t *asm_list = NULL, *asm_p = NULL;
        int     faultsTolerated = layoutPtr->map->faultsTolerated;
        /* we'll change raidAddress along the way */
        RF_RaidAddr_t startAddress = raidAddress;
        RF_RaidAddr_t endAddress = raidAddress + numBlocks;
        RF_RaidDisk_t *disks = raidPtr->Disks;
        RF_PhysDiskAddr_t *pda_p;
#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
        RF_PhysDiskAddr_t *pda_q;
#endif
        RF_StripeCount_t numStripes = 0;
        RF_RaidAddr_t stripeRealEndAddress, stripeEndAddress,
                nextStripeUnitAddress;
        RF_RaidAddr_t startAddrWithinStripe, lastRaidAddr;
        RF_StripeCount_t totStripes;
        RF_StripeNum_t stripeID, lastSID, SUID, lastSUID;
        RF_AccessStripeMap_t *asmList, *t_asm;
        RF_PhysDiskAddr_t *pdaList, *t_pda;

        /* allocate all the ASMs and PDAs up front */
        lastRaidAddr = raidAddress + numBlocks - 1;
        stripeID = rf_RaidAddressToStripeID(layoutPtr, raidAddress);
        lastSID = rf_RaidAddressToStripeID(layoutPtr, lastRaidAddr);
        totStripes = lastSID - stripeID + 1;
        SUID = rf_RaidAddressToStripeUnitID(layoutPtr, raidAddress);
        lastSUID = rf_RaidAddressToStripeUnitID(layoutPtr, lastRaidAddr);

        asmList = rf_AllocASMList(raidPtr, totStripes);

        /* may also need pda(s) per stripe for parity */
        pdaList = rf_AllocPDAList(raidPtr, lastSUID - SUID + 1 +
                                  faultsTolerated * totStripes);


        if (raidAddress + numBlocks > raidPtr->totalSectors) {
                RF_ERRORMSG1("Unable to map access because offset (%d) was invalid\n",
                    (int) raidAddress);
                return (NULL);
        }
#if RF_DEBUG_MAP
        if (rf_mapDebug)
                rf_PrintRaidAddressInfo(raidPtr, raidAddress, numBlocks);
#endif
        for (; raidAddress < endAddress;) {
                /* make the next stripe structure */
                RF_ASSERT(asmList);
                t_asm = asmList;
                asmList = asmList->next;
                memset(t_asm, 0, sizeof(*t_asm));
                if (!asm_p)
                        asm_list = asm_p = t_asm;
                else {
                        asm_p->next = t_asm;
                        asm_p = asm_p->next;
                }
                numStripes++;

                /* map SUs from current location to the end of the stripe */
                asm_p->stripeID =        /* rf_RaidAddressToStripeID(layoutPtr,
                        raidAddress) */ stripeID++;
                stripeRealEndAddress = rf_RaidAddressOfNextStripeBoundary(layoutPtr, raidAddress);
                stripeEndAddress = RF_MIN(endAddress, stripeRealEndAddress);
                asm_p->raidAddress = raidAddress;
                asm_p->endRaidAddress = stripeEndAddress;

                /* map each stripe unit in the stripe */
                pda_p = NULL;

                /* Raid addr of start of portion of access that is
                   within this stripe */
                startAddrWithinStripe = raidAddress;

                for (; raidAddress < stripeEndAddress;) {
                        RF_ASSERT(pdaList);
                        t_pda = pdaList;
                        pdaList = pdaList->next;
                        memset(t_pda, 0, sizeof(*t_pda));
                        if (!pda_p)
                                asm_p->physInfo = pda_p = t_pda;
                        else {
                                pda_p->next = t_pda;
                                pda_p = pda_p->next;
                        }

                        pda_p->type = RF_PDA_TYPE_DATA;
                        (layoutPtr->map->MapSector) (raidPtr, raidAddress,
                                                     &(pda_p->col),
                                                     &(pda_p->startSector),
                                                     remap);

                        /* mark any failures we find.  failedPDA is
                         * don't-care if there is more than one
                         * failure */

                        /* the RAID address corresponding to this
                           physical diskaddress */
                        pda_p->raidAddress = raidAddress;
                        nextStripeUnitAddress = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, raidAddress);
                        pda_p->numSector = RF_MIN(endAddress, nextStripeUnitAddress) - raidAddress;
                        RF_ASSERT(pda_p->numSector != 0);
                        rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 0);
                        pda_p->bufPtr = (char *)buffer + rf_RaidAddressToByte(raidPtr, (raidAddress - startAddress));
                        asm_p->totalSectorsAccessed += pda_p->numSector;
                        asm_p->numStripeUnitsAccessed++;

                        raidAddress = RF_MIN(endAddress, nextStripeUnitAddress);
                }

                /* Map the parity. At this stage, the startSector and
                 * numSector fields for the parity unit are always set
                 * to indicate the entire parity unit. We may modify
                 * this after mapping the data portion. */
                switch (faultsTolerated) {
                case 0:
                        break;
                case 1:        /* single fault tolerant */
                        RF_ASSERT(pdaList);
                        t_pda = pdaList;
                        pdaList = pdaList->next;
                        memset(t_pda, 0, sizeof(*t_pda));
                        pda_p = asm_p->parityInfo = t_pda;
                        pda_p->type = RF_PDA_TYPE_PARITY;
                        (layoutPtr->map->MapParity) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe),
                            &(pda_p->col), &(pda_p->startSector), remap);
                        pda_p->numSector = layoutPtr->sectorsPerStripeUnit;
                        /* raidAddr may be needed to find unit to redirect to */
                        pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe);
                        rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1);
                        rf_ASMParityAdjust(raidPtr, asm_p->parityInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p);

                        break;
#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
                case 2:        /* two fault tolerant */
                        RF_ASSERT(pdaList && pdaList->next);
                        t_pda = pdaList;
                        pdaList = pdaList->next;
                        memset(t_pda, 0, sizeof(*t_pda));
                        pda_p = asm_p->parityInfo = t_pda;
                        pda_p->type = RF_PDA_TYPE_PARITY;
                        t_pda = pdaList;
                        pdaList = pdaList->next;
                        memset(t_pda, 0, sizeof(*t_pda));
                        pda_q = asm_p->qInfo = t_pda;
                        pda_q->type = RF_PDA_TYPE_Q;
                        (layoutPtr->map->MapParity) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe),
                            &(pda_p->col), &(pda_p->startSector), remap);
                        (layoutPtr->map->MapQ) (raidPtr, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe),
                            &(pda_q->col), &(pda_q->startSector), remap);
                        pda_q->numSector = pda_p->numSector = layoutPtr->sectorsPerStripeUnit;
                        /* raidAddr may be needed to find unit to redirect to */
                        pda_p->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe);
                        pda_q->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, startAddrWithinStripe);
                        /* failure mode stuff */
                        rf_ASMCheckStatus(raidPtr, pda_p, asm_p, disks, 1);
                        rf_ASMCheckStatus(raidPtr, pda_q, asm_p, disks, 1);
                        rf_ASMParityAdjust(raidPtr, asm_p->parityInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p);
                        rf_ASMParityAdjust(raidPtr, asm_p->qInfo, startAddrWithinStripe, endAddress, layoutPtr, asm_p);
                        break;
#endif
                }
        }
        RF_ASSERT(asmList == NULL && pdaList == NULL);
        /* make the header structure */
        asm_hdr = rf_AllocAccessStripeMapHeader(raidPtr);
        RF_ASSERT(numStripes == totStripes);
        asm_hdr->numStripes = numStripes;
        asm_hdr->stripeMap = asm_list;

#if RF_DEBUG_MAP
        if (rf_mapDebug)
                rf_PrintAccessStripeMap(asm_hdr);
#endif
        return (asm_hdr);
}

/***************************************************************************
 * This routine walks through an ASM list and marks the PDAs that have
 * failed.  It's called only when a disk failure causes an in-flight
 * DAG to fail.  The parity may consist of two components, but we want
 * to use only one failedPDA pointer.  Thus we set failedPDA to point
 * to the first parity component, and rely on the rest of the code to
 * do the right thing with this.
 ***************************************************************************/

void
rf_MarkFailuresInASMList(RF_Raid_t *raidPtr,
                         RF_AccessStripeMapHeader_t *asm_h)
{
        RF_RaidDisk_t *disks = raidPtr->Disks;
        RF_AccessStripeMap_t *asmap;
        RF_PhysDiskAddr_t *pda;

        for (asmap = asm_h->stripeMap; asmap; asmap = asmap->next) {
                asmap->numDataFailed = 0;
                asmap->numParityFailed = 0;
                asmap->numQFailed = 0;
                asmap->numFailedPDAs = 0;
                memset(asmap->failedPDAs, 0,
                    RF_MAX_FAILED_PDA * sizeof(*asmap->failedPDAs));
                for (pda = asmap->physInfo; pda; pda = pda->next) {
                        if (RF_DEAD_DISK(disks[pda->col].status)) {
                                asmap->numDataFailed++;
                                asmap->failedPDAs[asmap->numFailedPDAs] = pda;
                                asmap->numFailedPDAs++;
                        }
                }
                pda = asmap->parityInfo;
                if (pda && RF_DEAD_DISK(disks[pda->col].status)) {
                        asmap->numParityFailed++;
                        asmap->failedPDAs[asmap->numFailedPDAs] = pda;
                        asmap->numFailedPDAs++;
                }
                pda = asmap->qInfo;
                if (pda && RF_DEAD_DISK(disks[pda->col].status)) {
                        asmap->numQFailed++;
                        asmap->failedPDAs[asmap->numFailedPDAs] = pda;
                        asmap->numFailedPDAs++;
                }
        }
}

/***************************************************************************
 *
 * routines to allocate and free list elements.  All allocation
 * routines zero the structure before returning it.
 *
 * FreePhysDiskAddr is static.  It should never be called directly,
 * because FreeAccessStripeMap takes care of freeing the PhysDiskAddr
 * list.
 *
 ***************************************************************************/

#define RF_MAX_FREE_ASMHDR 128
#define RF_MIN_FREE_ASMHDR  32

#define RF_MAX_FREE_ASM 192
#define RF_MIN_FREE_ASM  64

#define RF_MAX_FREE_PDA 192
#define RF_MIN_FREE_PDA  64

#define RF_MAX_FREE_ASMHLE 64
#define RF_MIN_FREE_ASMHLE 16

#define RF_MAX_FREE_FSS 128
#define RF_MIN_FREE_FSS  32

#define RF_MAX_FREE_VFPLE 128
#define RF_MIN_FREE_VFPLE  32

#define RF_MAX_FREE_VPLE 128
#define RF_MIN_FREE_VPLE  32


/* called at shutdown time.  So far, all that is necessary is to
   release all the free lists */
static void rf_ShutdownMapModule(void *);
static void
rf_ShutdownMapModule(void *arg)
{
        RF_Raid_t *raidPtr;

        raidPtr = (RF_Raid_t *) arg;

        pool_destroy(&raidPtr->pools.asm_hdr);
        pool_destroy(&raidPtr->pools.asmap);
        pool_destroy(&raidPtr->pools.asmhle);
        pool_destroy(&raidPtr->pools.pda);
        pool_destroy(&raidPtr->pools.fss);
        pool_destroy(&raidPtr->pools.vfple);
        pool_destroy(&raidPtr->pools.vple);
}

int
rf_ConfigureMapModule(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                      RF_Config_t *cfgPtr)
{

        rf_pool_init(raidPtr, raidPtr->poolNames.asm_hdr, &raidPtr->pools.asm_hdr, sizeof(RF_AccessStripeMapHeader_t),
                     "asmhdr", RF_MIN_FREE_ASMHDR, RF_MAX_FREE_ASMHDR);
        rf_pool_init(raidPtr, raidPtr->poolNames.asmap, &raidPtr->pools.asmap, sizeof(RF_AccessStripeMap_t),
                     "asmap", RF_MIN_FREE_ASM, RF_MAX_FREE_ASM);
        rf_pool_init(raidPtr, raidPtr->poolNames.asmhle, &raidPtr->pools.asmhle, sizeof(RF_ASMHeaderListElem_t),
                     "asmhle", RF_MIN_FREE_ASMHLE, RF_MAX_FREE_ASMHLE);
        rf_pool_init(raidPtr, raidPtr->poolNames.pda, &raidPtr->pools.pda, sizeof(RF_PhysDiskAddr_t),
                     "pda", RF_MIN_FREE_PDA, RF_MAX_FREE_PDA);
        rf_pool_init(raidPtr, raidPtr->poolNames.fss, &raidPtr->pools.fss, sizeof(RF_FailedStripe_t),
                     "fss", RF_MIN_FREE_FSS, RF_MAX_FREE_FSS);
        rf_pool_init(raidPtr, raidPtr->poolNames.vfple, &raidPtr->pools.vfple, sizeof(RF_VoidFunctionPointerListElem_t),
                     "vfple", RF_MIN_FREE_VFPLE, RF_MAX_FREE_VFPLE);
        rf_pool_init(raidPtr, raidPtr->poolNames.vple, &raidPtr->pools.vple, sizeof(RF_VoidPointerListElem_t),
                     "vple", RF_MIN_FREE_VPLE, RF_MAX_FREE_VPLE);
        rf_ShutdownCreate(listp, rf_ShutdownMapModule, raidPtr);

        return (0);
}

RF_AccessStripeMapHeader_t *
rf_AllocAccessStripeMapHeader(RF_Raid_t *raidPtr)
{
        return pool_get(&raidPtr->pools.asm_hdr, PR_WAITOK | PR_ZERO);
}

void
rf_FreeAccessStripeMapHeader(RF_Raid_t *raidPtr, RF_AccessStripeMapHeader_t *p)
{
        pool_put(&raidPtr->pools.asm_hdr, p);
}


RF_VoidFunctionPointerListElem_t *
rf_AllocVFPListElem(RF_Raid_t *raidPtr)
{
        return pool_get(&raidPtr->pools.vfple, PR_WAITOK | PR_ZERO);
}

void
rf_FreeVFPListElem(RF_Raid_t *raidPtr, RF_VoidFunctionPointerListElem_t *p)
{

        pool_put(&raidPtr->pools.vfple, p);
}


RF_VoidPointerListElem_t *
rf_AllocVPListElem(RF_Raid_t *raidPtr)
{
        return pool_get(&raidPtr->pools.vple, PR_WAITOK | PR_ZERO);
}

void
rf_FreeVPListElem(RF_Raid_t *raidPtr, RF_VoidPointerListElem_t *p)
{

        pool_put(&raidPtr->pools.vple, p);
}

RF_ASMHeaderListElem_t *
rf_AllocASMHeaderListElem(RF_Raid_t *raidPtr)
{
        return pool_get(&raidPtr->pools.asmhle, PR_WAITOK | PR_ZERO);
}

void
rf_FreeASMHeaderListElem(RF_Raid_t *raidPtr, RF_ASMHeaderListElem_t *p)
{

        pool_put(&raidPtr->pools.asmhle, p);
}

RF_FailedStripe_t *
rf_AllocFailedStripeStruct(RF_Raid_t *raidPtr)
{
        return pool_get(&raidPtr->pools.fss, PR_WAITOK | PR_ZERO);
}

void
rf_FreeFailedStripeStruct(RF_Raid_t *raidPtr, RF_FailedStripe_t *p)
{
        pool_put(&raidPtr->pools.fss, p);
}





RF_PhysDiskAddr_t *
rf_AllocPhysDiskAddr(RF_Raid_t *raidPtr)
{
        return pool_get(&raidPtr->pools.pda, PR_WAITOK | PR_ZERO);
}
/* allocates a list of PDAs, locking the free list only once when we
 * have to call calloc, we do it one component at a time to simplify
 * the process of freeing the list at program shutdown.  This should
 * not be much of a performance hit, because it should be very
 * infrequently executed.  */
RF_PhysDiskAddr_t *
rf_AllocPDAList(RF_Raid_t *raidPtr, int count)
{
        RF_PhysDiskAddr_t *p, *prev;
        int i;

        p = NULL;
        prev = NULL;
        for (i = 0; i < count; i++) {
                p = pool_get(&raidPtr->pools.pda, PR_WAITOK);
                p->next = prev;
                prev = p;
        }

        return (p);
}

void
rf_FreePhysDiskAddr(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *p)
{
        pool_put(&raidPtr->pools.pda, p);
}

static void
rf_FreePDAList(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda_list)
{
        RF_PhysDiskAddr_t *p, *tmp;

        p=pda_list;
        while (p) {
                tmp = p->next;
                pool_put(&raidPtr->pools.pda, p);
                p = tmp;
        }
}

/* this is essentially identical to AllocPDAList.  I should combine
 * the two.  when we have to call calloc, we do it one component at a
 * time to simplify the process of freeing the list at program
 * shutdown.  This should not be much of a performance hit, because it
 * should be very infrequently executed.  */
RF_AccessStripeMap_t *
rf_AllocASMList(RF_Raid_t *raidPtr, int count)
{
        RF_AccessStripeMap_t *p, *prev;
        int i;

        p = NULL;
        prev = NULL;
        for (i = 0; i < count; i++) {
                p = pool_get(&raidPtr->pools.asmap, PR_WAITOK);
                p->next = prev;
                prev = p;
        }
        return (p);
}

static void
rf_FreeASMList(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asm_list)
{
        RF_AccessStripeMap_t *p, *tmp;

        p=asm_list;
        while (p) {
                tmp = p->next;
                pool_put(&raidPtr->pools.asmap, p);
                p = tmp;
        }
}

void
rf_FreeAccessStripeMap(RF_Raid_t *raidPtr, RF_AccessStripeMapHeader_t *hdr)
{
        RF_AccessStripeMap_t *p;
        RF_PhysDiskAddr_t *pdp, *trailer, *pdaList = NULL, *pdaEnd = NULL;
        int     count = 0, t;

        for (p = hdr->stripeMap; p; p = p->next) {

                /* link the 3 pda lists into the accumulating pda list */

                if (!pdaList)
                        pdaList = p->qInfo;
                else
                        pdaEnd->next = p->qInfo;
                for (trailer = NULL, pdp = p->qInfo; pdp;) {
                        trailer = pdp;
                        pdp = pdp->next;
                        count++;
                }
                if (trailer)
                        pdaEnd = trailer;

                if (!pdaList)
                        pdaList = p->parityInfo;
                else
                        pdaEnd->next = p->parityInfo;
                for (trailer = NULL, pdp = p->parityInfo; pdp;) {
                        trailer = pdp;
                        pdp = pdp->next;
                        count++;
                }
                if (trailer)
                        pdaEnd = trailer;

                if (!pdaList)
                        pdaList = p->physInfo;
                else
                        pdaEnd->next = p->physInfo;
                for (trailer = NULL, pdp = p->physInfo; pdp;) {
                        trailer = pdp;
                        pdp = pdp->next;
                        count++;
                }
                if (trailer)
                        pdaEnd = trailer;
        }

        /* debug only */
        for (t = 0, pdp = pdaList; pdp; pdp = pdp->next)
                t++;
        RF_ASSERT(t == count);

        if (pdaList)
                rf_FreePDAList(raidPtr, pdaList);
        rf_FreeASMList(raidPtr, hdr->stripeMap);
        rf_FreeAccessStripeMapHeader(raidPtr, hdr);
}
/* We can't use the large write optimization if there are any failures
 * in the stripe.  In the declustered layout, there is no way to
 * immediately determine what disks constitute a stripe, so we
 * actually have to hunt through the stripe looking for failures.  The
 * reason we map the parity instead of just using asm->parityInfo->col
 * is because the latter may have been already redirected to a spare
 * drive, which would mess up the computation of the stripe offset.
 *
 * ASSUMES AT MOST ONE FAILURE IN THE STRIPE.  */
int
rf_CheckStripeForFailures(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap)
{
        RF_RowCol_t tcol, pcol, *diskids, i;
        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
        RF_StripeCount_t stripeOffset;
        int     numFailures;
        RF_RaidAddr_t sosAddr;
        RF_SectorNum_t diskOffset, poffset;

        /* quick out in the fault-free case.  */
        rf_lock_mutex2(raidPtr->mutex);
        numFailures = raidPtr->numFailures;
        rf_unlock_mutex2(raidPtr->mutex);
        if (numFailures == 0)
                return (0);

        sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
                                                     asmap->raidAddress);
        (layoutPtr->map->IdentifyStripe) (raidPtr, asmap->raidAddress,
                                          &diskids);
        (layoutPtr->map->MapParity) (raidPtr, asmap->raidAddress,
                                     &pcol, &poffset, 0);        /* get pcol */

        /* this need not be true if we've redirected the access to a
         * spare in another row RF_ASSERT(row == testrow); */
        stripeOffset = 0;
        for (i = 0; i < layoutPtr->numDataCol + layoutPtr->numParityCol; i++) {
                if (diskids[i] != pcol) {
                        if (RF_DEAD_DISK(raidPtr->Disks[diskids[i]].status)) {
                                if (raidPtr->status != rf_rs_reconstructing)
                                        return (1);
                                RF_ASSERT(raidPtr->reconControl->fcol == diskids[i]);
                                layoutPtr->map->MapSector(raidPtr,
                                    sosAddr + stripeOffset * layoutPtr->sectorsPerStripeUnit,
                                    &tcol, &diskOffset, 0);
                                RF_ASSERT(tcol == diskids[i]);
                                if (!rf_CheckRUReconstructed(raidPtr->reconControl->reconMap, diskOffset))
                                        return (1);
                                asmap->flags |= RF_ASM_REDIR_LARGE_WRITE;
                                return (0);
                        }
                        stripeOffset++;
                }
        }
        return (0);
}
#if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD >0)
/*
   return the number of failed data units in the stripe.
*/

int
rf_NumFailedDataUnitsInStripe(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap)
{
        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
        RF_RowCol_t tcol, i;
        RF_SectorNum_t diskOffset;
        RF_RaidAddr_t sosAddr;
        int     numFailures;

        /* quick out in the fault-free case.  */
        rf_lock_mutex2(raidPtr->mutex);
        numFailures = raidPtr->numFailures;
        rf_unlock_mutex2(raidPtr->mutex);
        if (numFailures == 0)
                return (0);
        numFailures = 0;

        sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr,
                                                     asmap->raidAddress);
        for (i = 0; i < layoutPtr->numDataCol; i++) {
                (layoutPtr->map->MapSector) (raidPtr, sosAddr + i * layoutPtr->sectorsPerStripeUnit,
                    &tcol, &diskOffset, 0);
                if (RF_DEAD_DISK(raidPtr->Disks[tcol].status))
                        numFailures++;
        }

        return numFailures;
}
#endif

/****************************************************************************
 *
 * debug routines
 *
 ***************************************************************************/
#if RF_DEBUG_MAP
void
rf_PrintAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h)
{
        rf_PrintFullAccessStripeMap(asm_h, 0);
}
#endif

/* prbuf - flag to print buffer pointers */
void
rf_PrintFullAccessStripeMap(RF_AccessStripeMapHeader_t *asm_h, int prbuf)
{
        int     i;
        RF_AccessStripeMap_t *asmap = asm_h->stripeMap;
        RF_PhysDiskAddr_t *p;
        printf("%d stripes total\n", (int) asm_h->numStripes);
        for (; asmap; asmap = asmap->next) {
                /* printf("Num failures: %d\n",asmap->numDataFailed); */
                /* printf("Num sectors:
                 * %d\n",(int)asmap->totalSectorsAccessed); */
                printf("Stripe %d (%d sectors), failures: %d data, %d parity: ",
                    (int) asmap->stripeID,
                    (int) asmap->totalSectorsAccessed,
                    (int) asmap->numDataFailed,
                    (int) asmap->numParityFailed);
                if (asmap->parityInfo) {
                        printf("Parity [c%d s%d-%d", asmap->parityInfo->col,
                            (int) asmap->parityInfo->startSector,
                            (int) (asmap->parityInfo->startSector +
                                asmap->parityInfo->numSector - 1));
                        if (prbuf)
                                printf(" b0x%lx", (unsigned long) asmap->parityInfo->bufPtr);
                        if (asmap->parityInfo->next) {
                                printf(", c%d s%d-%d", asmap->parityInfo->next->col,
                                    (int) asmap->parityInfo->next->startSector,
                                    (int) (asmap->parityInfo->next->startSector +
                                        asmap->parityInfo->next->numSector - 1));
                                if (prbuf)
                                        printf(" b0x%lx", (unsigned long) asmap->parityInfo->next->bufPtr);
                                RF_ASSERT(asmap->parityInfo->next->next == NULL);
                        }
                        printf("]\n\t");
                }
                for (i = 0, p = asmap->physInfo; p; p = p->next, i++) {
                        printf("SU c%d s%d-%d ", p->col, (int) p->startSector,
                            (int) (p->startSector + p->numSector - 1));
                        if (prbuf)
                                printf("b0x%lx ", (unsigned long) p->bufPtr);
                        if (i && !(i & 1))
                                printf("\n\t");
                }
                printf("\n");
                p = asm_h->stripeMap->failedPDAs[0];
                if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 1)
                        printf("[multiple failures]\n");
                else
                        if (asm_h->stripeMap->numDataFailed + asm_h->stripeMap->numParityFailed > 0)
                                printf("\t[Failed PDA: c%d s%d-%d]\n", p->col,
                                    (int) p->startSector, (int) (p->startSector + p->numSector - 1));
        }
}

#if RF_MAP_DEBUG
void
rf_PrintRaidAddressInfo(RF_Raid_t *raidPtr, RF_RaidAddr_t raidAddr,
                        RF_SectorCount_t numBlocks)
{
        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
        RF_RaidAddr_t ra, sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, raidAddr);

        printf("Raid addrs of SU boundaries from start of stripe to end of access:\n\t");
        for (ra = sosAddr; ra <= raidAddr + numBlocks; ra += layoutPtr->sectorsPerStripeUnit) {
                printf("%d (0x%x), ", (int) ra, (int) ra);
        }
        printf("\n");
        printf("Offset into stripe unit: %d (0x%x)\n",
            (int) (raidAddr % layoutPtr->sectorsPerStripeUnit),
            (int) (raidAddr % layoutPtr->sectorsPerStripeUnit));
}
#endif
/* given a parity descriptor and the starting address within a stripe,
 * range restrict the parity descriptor to touch only the correct
 * stuff.  */
void
rf_ASMParityAdjust(RF_Raid_t *raidPtr,
                   RF_PhysDiskAddr_t *toAdjust,
                   RF_StripeNum_t startAddrWithinStripe,
                   RF_SectorNum_t endAddress,
                   RF_RaidLayout_t *layoutPtr,
                   RF_AccessStripeMap_t *asm_p)
{
        RF_PhysDiskAddr_t *new_pda;

        /* when we're accessing only a portion of one stripe unit, we
         * want the parity descriptor to identify only the chunk of
         * parity associated with the data.  When the access spans
         * exactly one stripe unit boundary and is less than a stripe
         * unit in size, it uses two disjoint regions of the parity
         * unit.  When an access spans more than one stripe unit
         * boundary, it uses all of the parity unit.
         *
         * To better handle the case where stripe units are small, we
         * may eventually want to change the 2nd case so that if the
         * SU size is below some threshold, we just read/write the
         * whole thing instead of breaking it up into two accesses. */
        if (asm_p->numStripeUnitsAccessed == 1) {
                int     x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit);
                toAdjust->startSector += x;
                toAdjust->raidAddress += x;
                toAdjust->numSector = asm_p->physInfo->numSector;
                RF_ASSERT(toAdjust->numSector != 0);
        } else
                if (asm_p->numStripeUnitsAccessed == 2 && asm_p->totalSectorsAccessed < layoutPtr->sectorsPerStripeUnit) {
                        int     x = (startAddrWithinStripe % layoutPtr->sectorsPerStripeUnit);

                        /* create a second pda and copy the parity map info
                         * into it */
                        RF_ASSERT(toAdjust->next == NULL);
                        /* the following will get freed in rf_FreeAccessStripeMap() via
                           rf_FreePDAList() */
                        new_pda = toAdjust->next = rf_AllocPhysDiskAddr(raidPtr);
                        *new_pda = *toAdjust;        /* structure assignment */
                        new_pda->next = NULL;

                        /* adjust the start sector & number of blocks for the
                         * first parity pda */
                        toAdjust->startSector += x;
                        toAdjust->raidAddress += x;
                        toAdjust->numSector = rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, startAddrWithinStripe) - startAddrWithinStripe;
                        RF_ASSERT(toAdjust->numSector != 0);

                        /* adjust the second pda */
                        new_pda->numSector = endAddress - rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, endAddress);
                        /* new_pda->raidAddress =
                         * rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr,
                         * toAdjust->raidAddress); */
                        RF_ASSERT(new_pda->numSector != 0);
                }
}

/* Check if a disk has been spared or failed. If spared, redirect the
 * I/O.  If it has been failed, record it in the asm pointer.  Fifth
 * arg is whether data or parity.  */
void
rf_ASMCheckStatus(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda_p,
                  RF_AccessStripeMap_t *asm_p, RF_RaidDisk_t *disks,
                  int parity)
{
        RF_DiskStatus_t dstatus;
        RF_RowCol_t fcol;

        dstatus = disks[pda_p->col].status;

        if (dstatus == rf_ds_spared) {
                /* if the disk has been spared, redirect access to the spare */
                fcol = pda_p->col;
                pda_p->col = disks[fcol].spareCol;
        } else
                if (dstatus == rf_ds_dist_spared) {
                        /* ditto if disk has been spared to dist spare space */
#if RF_DEBUG_MAP
                        RF_RowCol_t oc = pda_p->col;
                        RF_SectorNum_t oo = pda_p->startSector;
#endif
                        if (pda_p->type == RF_PDA_TYPE_DATA)
                                raidPtr->Layout.map->MapSector(raidPtr, pda_p->raidAddress, &pda_p->col, &pda_p->startSector, RF_REMAP);
                        else
                                raidPtr->Layout.map->MapParity(raidPtr, pda_p->raidAddress, &pda_p->col, &pda_p->startSector, RF_REMAP);

#if RF_DEBUG_MAP
                        if (rf_mapDebug) {
                                printf("Redirected c %d o %d -> c %d o %d\n", oc, (int) oo,
                                    pda_p->col, (int) pda_p->startSector);
                        }
#endif
                } else
                        if (RF_DEAD_DISK(dstatus)) {
                                /* if the disk is inaccessible, mark the
                                 * failure */
                                if (parity)
                                        asm_p->numParityFailed++;
                                else {
                                        asm_p->numDataFailed++;
                                }
                                asm_p->failedPDAs[asm_p->numFailedPDAs] = pda_p;
                                asm_p->numFailedPDAs++;
#if 0
                                switch (asm_p->numParityFailed + asm_p->numDataFailed) {
                                case 1:
                                        asm_p->failedPDAs[0] = pda_p;
                                        break;
                                case 2:
                                        asm_p->failedPDAs[1] = pda_p;
                                default:
                                        break;
                                }
#endif
                        }
        /* the redirected access should never span a stripe unit boundary */
        RF_ASSERT(rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress) ==
            rf_RaidAddressToStripeUnitID(&raidPtr->Layout, pda_p->raidAddress + pda_p->numSector - 1));
        RF_ASSERT(pda_p->col != -1);
}

































































































































































































































































































    3 


    2 












    4 






    2 







    2 
    1 





    2 
















































    2 




































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
/*        $NetBSD: msdosfs_vfsops.c,v 1.138 2022/04/16 07:58:21 hannken Exp $        */

/*-
 * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
 * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
 * All rights reserved.
 * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by TooLs GmbH.
 * 4. The name of TooLs GmbH may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
/*
 * Written by Paul Popelka (paulp@uts.amdahl.com)
 *
 * You can do anything you want with this software, just don't say you wrote
 * it, and don't remove this notice.
 *
 * This software is provided "as is".
 *
 * The author supplies this software to be publicly redistributed on the
 * understanding that the author is not responsible for the correct
 * functioning of this software in any circumstances and is not liable for
 * any damages caused by this software.
 *
 * October 1992
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: msdosfs_vfsops.c,v 1.138 2022/04/16 07:58:21 hannken Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/namei.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h> /* XXX */        /* defines v_rdev */
#include <sys/mount.h>
#include <sys/buf.h>
#include <sys/file.h>
#include <sys/device.h>
#include <sys/disklabel.h>
#include <sys/disk.h>
#include <sys/ioctl.h>
#include <sys/malloc.h>
#include <sys/dirent.h>
#include <sys/stat.h>
#include <sys/conf.h>
#include <sys/kauth.h>
#include <sys/module.h>

#include <fs/msdosfs/bpb.h>
#include <fs/msdosfs/bootsect.h>
#include <fs/msdosfs/direntry.h>
#include <fs/msdosfs/denode.h>
#include <fs/msdosfs/msdosfsmount.h>
#include <fs/msdosfs/fat.h>

MODULE(MODULE_CLASS_VFS, msdos, NULL);

#ifdef MSDOSFS_DEBUG
#define DPRINTF(fmt, ...) uprintf("%s(): " fmt "\n", __func__, ##__VA_ARGS__)
#else
#define DPRINTF(fmt, ...)
#endif

#define GEMDOSFS_BSIZE        512

#define MSDOSFS_NAMEMAX(pmp) \
        (pmp)->pm_flags & MSDOSFSMNT_LONGNAME ? WIN_MAXLEN : 12

int msdosfs_mountfs(struct vnode *, struct mount *, struct lwp *,
    struct msdosfs_args *);

static int update_mp(struct mount *, struct msdosfs_args *);

MALLOC_JUSTDEFINE(M_MSDOSFSMNT, "MSDOSFS mount", "MSDOS FS mount structure");
MALLOC_JUSTDEFINE(M_MSDOSFSFAT, "MSDOSFS FAT", "MSDOS FS FAT table");
MALLOC_JUSTDEFINE(M_MSDOSFSTMP, "MSDOSFS temp", "MSDOS FS temp. structures");

extern const struct vnodeopv_desc msdosfs_vnodeop_opv_desc;

const struct vnodeopv_desc * const msdosfs_vnodeopv_descs[] = {
        &msdosfs_vnodeop_opv_desc,
        NULL,
};

struct vfsops msdosfs_vfsops = {
        .vfs_name = MOUNT_MSDOS,
        .vfs_min_mount_data = sizeof (struct msdosfs_args),
        .vfs_mount = msdosfs_mount,
        .vfs_start = msdosfs_start,
        .vfs_unmount = msdosfs_unmount,
        .vfs_root = msdosfs_root,
        .vfs_quotactl = (void *)eopnotsupp,
        .vfs_statvfs = msdosfs_statvfs,
        .vfs_sync = msdosfs_sync,
        .vfs_vget = msdosfs_vget,
        .vfs_loadvnode = msdosfs_loadvnode,
        .vfs_fhtovp = msdosfs_fhtovp,
        .vfs_vptofh = msdosfs_vptofh,
        .vfs_init = msdosfs_init,
        .vfs_reinit = msdosfs_reinit,
        .vfs_done = msdosfs_done,
        .vfs_mountroot = msdosfs_mountroot,
        .vfs_snapshot = (void *)eopnotsupp,
        .vfs_extattrctl = vfs_stdextattrctl,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_renamelock_enter = genfs_renamelock_enter,
        .vfs_renamelock_exit = genfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = msdosfs_vnodeopv_descs
};

SYSCTL_SETUP(msdosfs_sysctl_setup, "msdosfs sysctl")
{
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "msdosfs",
                       SYSCTL_DESCR("MS-DOS file system"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, 4, CTL_EOL);
        /*
         * XXX the "4" above could be dynamic, thereby eliminating one
         * more instance of the "number to vfs" mapping problem, but
         * "4" is the order as taken from sys/mount.h
         */
}

static int
msdos_modcmd(modcmd_t cmd, void *arg)
{
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = vfs_attach(&msdosfs_vfsops);
                if (error != 0)
                        break;
                break;
        case MODULE_CMD_FINI:
                error = vfs_detach(&msdosfs_vfsops);
                if (error != 0)
                        break;
                break;
        default:
                error = ENOTTY;
                break;
        }

        return (error);
}

static int
update_mp(struct mount *mp, struct msdosfs_args *argp)
{
        struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
        int error;

        pmp->pm_gid = argp->gid;
        pmp->pm_uid = argp->uid;
        pmp->pm_mask = argp->mask & ALLPERMS;
        pmp->pm_dirmask = argp->dirmask & ALLPERMS;
        pmp->pm_gmtoff = argp->gmtoff;
        pmp->pm_flags |= argp->flags & MSDOSFSMNT_MNTOPT;

        /*
         * GEMDOS knows nothing about win95 long filenames
         */
        if (pmp->pm_flags & MSDOSFSMNT_GEMDOSFS)
                pmp->pm_flags |= MSDOSFSMNT_NOWIN95;

        if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
                pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
        else if (!(pmp->pm_flags &
            (MSDOSFSMNT_SHORTNAME | MSDOSFSMNT_LONGNAME))) {
                struct vnode *rtvp;

                /*
                 * Try to divine whether to support Win'95 long filenames
                 */
                if (FAT32(pmp))
                        pmp->pm_flags |= MSDOSFSMNT_LONGNAME;
                else {
                        error = msdosfs_root(mp, LK_EXCLUSIVE, &rtvp);
                        if (error != 0)
                                return error;
                        pmp->pm_flags |= msdosfs_findwin95(VTODE(rtvp))
                                ? MSDOSFSMNT_LONGNAME
                                        : MSDOSFSMNT_SHORTNAME;
                        vput(rtvp);
                }
        }

        mp->mnt_stat.f_namemax = MSDOSFS_NAMEMAX(pmp);

        return 0;
}

int
msdosfs_mountroot(void)
{
        struct mount *mp;
        struct lwp *l = curlwp;        /* XXX */
        int error;
        struct msdosfs_args args;

        if (device_class(root_device) != DV_DISK)
                return (ENODEV);

        if ((error = vfs_rootmountalloc(MOUNT_MSDOS, "root_device", &mp))) {
                vrele(rootvp);
                return (error);
        }

        args.flags = MSDOSFSMNT_VERSIONED;
        args.uid = 0;
        args.gid = 0;
        args.mask = 0777;
        args.version = MSDOSFSMNT_VERSION;
        args.dirmask = 0777;

        if ((error = msdosfs_mountfs(rootvp, mp, l, &args)) != 0) {
                vfs_unbusy(mp);
                vfs_rele(mp);
                return (error);
        }

        if ((error = update_mp(mp, &args)) != 0) {
                (void)msdosfs_unmount(mp, 0);
                vfs_unbusy(mp);
                vfs_rele(mp);
                vrele(rootvp);
                return (error);
        }

        mountlist_append(mp);
        (void)msdosfs_statvfs(mp, &mp->mnt_stat);
        vfs_unbusy(mp);
        return (0);
}

/*
 * mp - path - addr in user space of mount point (ie /usr or whatever)
 * data - addr in user space of mount params including the name of the block
 * special file to treat as a filesystem.
 */
int
msdosfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct lwp *l = curlwp;
        struct vnode *devvp;          /* vnode for blk device to mount */
        struct msdosfs_args *args = data; /* holds data from mount request */
        /* msdosfs specific mount control block */
        struct msdosfsmount *pmp = NULL;
        int error, flags;
        mode_t accessmode;

        if (args == NULL)
                return EINVAL;
        if (*data_len < sizeof *args)
                return EINVAL;

        if (mp->mnt_flag & MNT_GETARGS) {
                pmp = VFSTOMSDOSFS(mp);
                if (pmp == NULL)
                        return EIO;
                args->fspec = NULL;
                args->uid = pmp->pm_uid;
                args->gid = pmp->pm_gid;
                args->mask = pmp->pm_mask;
                args->flags = pmp->pm_flags;
                args->version = MSDOSFSMNT_VERSION;
                args->dirmask = pmp->pm_dirmask;
                args->gmtoff = pmp->pm_gmtoff;
                *data_len = sizeof *args;
                return 0;
        }

        /*
         * If not versioned (i.e. using old mount_msdos(8)), fill in
         * the additional structure items with suitable defaults.
         */
        if ((args->flags & MSDOSFSMNT_VERSIONED) == 0) {
                args->version = 1;
                args->dirmask = args->mask;
        }

        /*
         * Reset GMT offset for pre-v3 mount structure args.
         */
        if (args->version < 3)
                args->gmtoff = 0;

        /*
         * If updating, check whether changing from read-only to
         * read/write; if there is no device name, that's all we do.
         */
        if (mp->mnt_flag & MNT_UPDATE) {
                pmp = VFSTOMSDOSFS(mp);
                error = 0;
                if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) &&
                    (mp->mnt_flag & MNT_RDONLY)) {
                        flags = WRITECLOSE;
                        if (mp->mnt_flag & MNT_FORCE)
                                flags |= FORCECLOSE;
                        error = vflush(mp, NULLVP, flags);
                }
                if (!error && (mp->mnt_flag & MNT_RELOAD))
                        /* not yet implemented */
                        error = EOPNOTSUPP;
                if (error) {
                        DPRINTF("vflush %d", error);
                        return (error);
                }
                if ((pmp->pm_flags & MSDOSFSMNT_RONLY) &&
                    (mp->mnt_iflag & IMNT_WANTRDWR)) {
                        /*
                         * If upgrade to read-write by non-root, then verify
                         * that user has necessary permissions on the device.
                         *
                         * Permission to update a mount is checked higher, so
                         * here we presume updating the mount is okay (for
                         * example, as far as securelevel goes) which leaves us
                         * with the normal check.
                         */
                        devvp = pmp->pm_devvp;
                        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
                        error = kauth_authorize_system(l->l_cred,
                            KAUTH_SYSTEM_MOUNT, KAUTH_REQ_SYSTEM_MOUNT_DEVICE,
                            mp, devvp, KAUTH_ARG(VREAD | VWRITE));
                        VOP_UNLOCK(devvp);
                        DPRINTF("KAUTH_REQ_SYSTEM_MOUNT_DEVICE %d", error);
                        if (error)
                                return (error);

                        pmp->pm_flags &= ~MSDOSFSMNT_RONLY;
                }
                if (args->fspec == NULL) {
                        DPRINTF("missing fspec");
                        return EINVAL;
                }
        }
        /*
         * Not an update, or updating the name: look up the name
         * and verify that it refers to a sensible block device.
         */
        error = namei_simple_user(args->fspec,
                                NSM_FOLLOW_NOEMULROOT, &devvp);
        if (error != 0) {
                DPRINTF("namei %d", error);
                return (error);
        }

        if (devvp->v_type != VBLK) {
                DPRINTF("not block");
                vrele(devvp);
                return (ENOTBLK);
        }
        if (bdevsw_lookup(devvp->v_rdev) == NULL) {
                DPRINTF("no block switch");
                vrele(devvp);
                return (ENXIO);
        }
        /*
         * If mount by non-root, then verify that user has necessary
         * permissions on the device.
         */
        accessmode = VREAD;
        if ((mp->mnt_flag & MNT_RDONLY) == 0)
                accessmode |= VWRITE;
        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
            KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(accessmode));
        VOP_UNLOCK(devvp);
        if (error) {
                DPRINTF("KAUTH_REQ_SYSTEM_MOUNT_DEVICE %d", error);
                vrele(devvp);
                return (error);
        }
        if ((mp->mnt_flag & MNT_UPDATE) == 0) {
                int xflags;

                if (mp->mnt_flag & MNT_RDONLY)
                        xflags = FREAD;
                else
                        xflags = FREAD|FWRITE;
                vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
                error = VOP_OPEN(devvp, xflags, FSCRED);
                VOP_UNLOCK(devvp);
                if (error) {
                        DPRINTF("VOP_OPEN %d", error);
                        goto fail;
                }
                error = msdosfs_mountfs(devvp, mp, l, args);
                if (error) {
                        DPRINTF("msdosfs_mountfs %d", error);
                        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
                        (void) VOP_CLOSE(devvp, xflags, NOCRED);
                        VOP_UNLOCK(devvp);
                        goto fail;
                }
#ifdef MSDOSFS_DEBUG                /* only needed for the printf below */
                pmp = VFSTOMSDOSFS(mp);
#endif
        } else {
                vrele(devvp);
                if (devvp != pmp->pm_devvp) {
                        DPRINTF("devvp %p pmp %p", devvp, pmp->pm_devvp);
                        return (EINVAL);        /* needs translation */
                }
        }
        if ((error = update_mp(mp, args)) != 0) {
                msdosfs_unmount(mp, MNT_FORCE);
                DPRINTF("update_mp %d", error);
                return error;
        }

#ifdef MSDOSFS_DEBUG
        printf("msdosfs_mount(): mp %p, pmp %p, inusemap %p\n", mp, pmp, pmp->pm_inusemap);
#endif
        return set_statvfs_info(path, UIO_USERSPACE, args->fspec, UIO_USERSPACE,
            mp->mnt_op->vfs_name, mp, l);

fail:
        vrele(devvp);
        return (error);
}

int
msdosfs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l, struct msdosfs_args *argp)
{
        struct msdosfsmount *pmp;
        struct buf *bp;
        dev_t dev = devvp->v_rdev;
        union bootsector *bsp;
        struct byte_bpb33 *b33;
        struct byte_bpb50 *b50;
        struct byte_bpb710 *b710;
        uint8_t SecPerClust;
        int        ronly, error, BlkPerSec;
        uint64_t psize;
        unsigned secsize;
        u_long fatbytes, fatblocksecs;

        /* Flush out any old buffers remaining from a previous use. */
        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
        error = vinvalbuf(devvp, V_SAVE, l->l_cred, l, 0, 0);
        VOP_UNLOCK(devvp);
        if (error)
                return (error);

        ronly = (mp->mnt_flag & MNT_RDONLY) != 0;

        bp  = NULL; /* both used in error_exit */
        pmp = NULL;

        error = getdisksize(devvp, &psize, &secsize);
        if (error) {
                if (argp->flags & MSDOSFSMNT_GEMDOSFS)
                        goto error_exit;

                /* ok, so it failed.  we most likely don't need the info */
                secsize = DEV_BSIZE;
                psize = 0;
                error = 0;
        }
        if (secsize < DEV_BSIZE) {
                DPRINTF("Invalid block secsize (%d < DEV_BSIZE)", secsize);
                error = EINVAL;
                goto error_exit;
        }

        if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
                if (secsize != GEMDOSFS_BSIZE) {
                        DPRINTF("Invalid block secsize %d for GEMDOS", secsize);
                        error = EINVAL;
                        goto error_exit;
                }
        }

        /*
         * Read the boot sector of the filesystem, and then check the
         * boot signature.  If not a dos boot sector then error out.
         */
        if (secsize < sizeof(*b50)) {
                DPRINTF("50 bootsec %u\n", secsize);
                error = EINVAL;
                goto error_exit;
        }
        if ((error = bread(devvp, 0, secsize, 0, &bp)) != 0)
                goto error_exit;
        bsp = (union bootsector *)bp->b_data;
        b33 = (struct byte_bpb33 *)bsp->bs33.bsBPB;
        b50 = (struct byte_bpb50 *)bsp->bs50.bsBPB;
        b710 = (struct byte_bpb710 *)bsp->bs710.bsBPB;

#if 0
        /*
         * Some FAT partition, for example Raspberry Pi Pico's
         * USB mass storage, does not have exptected BOOTSIGs.
         * According to FreeBSD's comment, some PC-9800/9821
         * FAT floppy disks have similar problems.
         */
        if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) {
                if (bsp->bs50.bsBootSectSig0 != BOOTSIG0
                    || bsp->bs50.bsBootSectSig1 != BOOTSIG1) {
                        DPRINTF("bootsig0 %d bootsig1 %d", 
                            bsp->bs50.bsBootSectSig0,
                            bsp->bs50.bsBootSectSig1);
                        error = EINVAL;
                        goto error_exit;
                }
        }
#endif

        pmp = malloc(sizeof(*pmp), M_MSDOSFSMNT, M_WAITOK|M_ZERO);
        pmp->pm_mountp = mp;

        /*
         * Compute several useful quantities from the bpb in the
         * bootsector.  Copy in the dos 5 variant of the bpb then fix up
         * the fields that are different between dos 5 and dos 3.3.
         */
        SecPerClust = b50->bpbSecPerClust;
        pmp->pm_BytesPerSec = getushort(b50->bpbBytesPerSec);
        pmp->pm_ResSectors = getushort(b50->bpbResSectors);
        pmp->pm_FATs = b50->bpbFATs;
        pmp->pm_RootDirEnts = getushort(b50->bpbRootDirEnts);
        pmp->pm_Sectors = getushort(b50->bpbSectors);
        pmp->pm_FATsecs = getushort(b50->bpbFATsecs);
        pmp->pm_SecPerTrack = getushort(b50->bpbSecPerTrack);
        pmp->pm_Heads = getushort(b50->bpbHeads);
        pmp->pm_Media = b50->bpbMedia;

        if (pmp->pm_Sectors == 0) {
                pmp->pm_HiddenSects = getulong(b50->bpbHiddenSecs);
                pmp->pm_HugeSectors = getulong(b50->bpbHugeSectors);
        } else {
                if (secsize < sizeof(*b33)) {
                        DPRINTF("33 bootsec %u\n", secsize);
                        error = EINVAL;
                        goto error_exit;
                }
                pmp->pm_HiddenSects = getushort(b33->bpbHiddenSecs);
                pmp->pm_HugeSectors = pmp->pm_Sectors;
        }

        /*
         * Sanity checks, from the FAT specification:
         * - sectors per cluster: >= 1, power of 2
         * - logical sector size: >= 1, power of 2
         * - cluster size:        <= max FS block size
         * - number of sectors:   >= 1
         */
        if ((SecPerClust == 0) || !powerof2(SecPerClust) ||
            (pmp->pm_BytesPerSec == 0) || !powerof2(pmp->pm_BytesPerSec) ||
            (SecPerClust * pmp->pm_BytesPerSec > MAXBSIZE) ||
            (pmp->pm_HugeSectors == 0)) {
                DPRINTF("consistency checks");
                error = EINVAL;
                goto error_exit;
        }

        if (!(argp->flags & MSDOSFSMNT_GEMDOSFS) &&
            (pmp->pm_SecPerTrack > 63)) {
                DPRINTF("SecPerTrack %d", pmp->pm_SecPerTrack);
                error = EINVAL;
                goto error_exit;
        }

        if (pmp->pm_RootDirEnts == 0) {
                if (secsize < sizeof(*b710)) {
                        DPRINTF("710 bootsec %u\n", secsize);
                        error = EINVAL;
                        goto error_exit;
                }
                unsigned short FSVers = getushort(b710->bpbFSVers);
                unsigned short ExtFlags = getushort(b710->bpbExtFlags);
                /*
                 * Some say that bsBootSectSig[23] must be zero, but
                 * Windows does not require this and some digital cameras
                 * do not set these to zero.  Therefore, do not insist.
                 */
                if (pmp->pm_Sectors || pmp->pm_FATsecs || FSVers) {
                        DPRINTF("Sectors %d FATsecs %lu FSVers %d",
                            pmp->pm_Sectors, pmp->pm_FATsecs, FSVers);
                        error = EINVAL;
                        goto error_exit;
                }
                pmp->pm_fatmask = FAT32_MASK;
                pmp->pm_fatmult = 4;
                pmp->pm_fatdiv = 1;
                pmp->pm_FATsecs = getulong(b710->bpbBigFATsecs);

                /* Mirroring is enabled if the FATMIRROR bit is not set. */
                if ((ExtFlags & FATMIRROR) == 0)
                        pmp->pm_flags |= MSDOSFS_FATMIRROR;
                else
                        pmp->pm_curfat = ExtFlags & FATNUM;
        } else
                pmp->pm_flags |= MSDOSFS_FATMIRROR;

        if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
                if (FAT32(pmp)) {
                        /* GEMDOS doesn't know FAT32. */
                        DPRINTF("FAT32 for GEMDOS");
                        error = EINVAL;
                        goto error_exit;
                }

                /*
                 * Check a few values (could do some more):
                 * - logical sector size: >= block size
                 * - number of sectors:   <= size of partition
                 */
                if ((pmp->pm_BytesPerSec < GEMDOSFS_BSIZE) ||
                    (pmp->pm_HugeSectors *
                     (pmp->pm_BytesPerSec / GEMDOSFS_BSIZE) > psize)) {
                        DPRINTF("consistency checks for GEMDOS");
                        error = EINVAL;
                        goto error_exit;
                }
                /*
                 * XXX - Many parts of the msdosfs driver seem to assume that
                 * the number of bytes per logical sector (BytesPerSec) will
                 * always be the same as the number of bytes per disk block
                 * Let's pretend it is.
                 */
                BlkPerSec = pmp->pm_BytesPerSec / GEMDOSFS_BSIZE;
                pmp->pm_BytesPerSec  = GEMDOSFS_BSIZE;
                pmp->pm_HugeSectors *= BlkPerSec;
                pmp->pm_HiddenSects *= BlkPerSec;
                pmp->pm_ResSectors  *= BlkPerSec;
                pmp->pm_Sectors     *= BlkPerSec;
                pmp->pm_FATsecs     *= BlkPerSec;
                SecPerClust         *= BlkPerSec;
        }

        /* Check that fs has nonzero FAT size */
        if (pmp->pm_FATsecs == 0) {
                DPRINTF("FATsecs is 0");
                error = EINVAL;
                goto error_exit;
        }

        pmp->pm_fatblk = pmp->pm_ResSectors;
        if (FAT32(pmp)) {
                if (secsize < sizeof(*b710)) {
                        DPRINTF("710 bootsec %u\n", secsize);
                        error = EINVAL;
                        goto error_exit;
                }
                pmp->pm_rootdirblk = getulong(b710->bpbRootClust);
                pmp->pm_firstcluster = pmp->pm_fatblk
                        + (pmp->pm_FATs * pmp->pm_FATsecs);
                pmp->pm_fsinfo = getushort(b710->bpbFSInfo);
        } else {
                pmp->pm_rootdirblk = pmp->pm_fatblk +
                        (pmp->pm_FATs * pmp->pm_FATsecs);
                pmp->pm_rootdirsize = (pmp->pm_RootDirEnts * sizeof(struct direntry)
                                       + pmp->pm_BytesPerSec - 1)
                        / pmp->pm_BytesPerSec;/* in sectors */
                pmp->pm_firstcluster = pmp->pm_rootdirblk + pmp->pm_rootdirsize;
        }

        pmp->pm_nmbrofclusters = (pmp->pm_HugeSectors - pmp->pm_firstcluster) /
            SecPerClust;
        pmp->pm_maxcluster = pmp->pm_nmbrofclusters + 1;
        pmp->pm_fatsize = pmp->pm_FATsecs * pmp->pm_BytesPerSec;

        if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
                if (pmp->pm_nmbrofclusters <= (0xff0 - 2)) {
                        pmp->pm_fatmask = FAT12_MASK;
                        pmp->pm_fatmult = 3;
                        pmp->pm_fatdiv = 2;
                } else {
                        pmp->pm_fatmask = FAT16_MASK;
                        pmp->pm_fatmult = 2;
                        pmp->pm_fatdiv = 1;
                }
        } else if (pmp->pm_fatmask == 0) {
                if (pmp->pm_maxcluster
                    <= ((CLUST_RSRVD - CLUST_FIRST) & FAT12_MASK)) {
                        /*
                         * This will usually be a floppy disk. This size makes
                         * sure that one FAT entry will not be split across
                         * multiple blocks.
                         */
                        pmp->pm_fatmask = FAT12_MASK;
                        pmp->pm_fatmult = 3;
                        pmp->pm_fatdiv = 2;
                } else {
                        pmp->pm_fatmask = FAT16_MASK;
                        pmp->pm_fatmult = 2;
                        pmp->pm_fatdiv = 1;
                }
        }

        /* validate cluster count against FAT */
        if ((pmp->pm_maxcluster & pmp->pm_fatmask) != pmp->pm_maxcluster) {
                DPRINTF("maxcluster %lu outside of mask %#lx\n",
                        pmp->pm_maxcluster, pmp->pm_fatmask);
                error = EINVAL;
                goto error_exit;
        }

        /* validate FAT size */
        fatbytes = (pmp->pm_maxcluster+1) * pmp->pm_fatmult / pmp->pm_fatdiv;
        fatblocksecs = howmany(fatbytes, pmp->pm_BytesPerSec);

        if (pmp->pm_FATsecs < fatblocksecs) {
                DPRINTF("FATsecs %lu < real %lu\n", pmp->pm_FATsecs,
                        fatblocksecs);
                error = EINVAL;
                goto error_exit;
        }

        if (FAT12(pmp)) {
                /*
                 * limit block size to what is needed to read a FAT block
                 * to not exceed MAXBSIZE
                 */
                pmp->pm_fatblocksec = uimin(3, fatblocksecs);
                pmp->pm_fatblocksize = pmp->pm_fatblocksec
                        * pmp->pm_BytesPerSec;
        } else {
                pmp->pm_fatblocksize = MAXBSIZE;
                pmp->pm_fatblocksec = pmp->pm_fatblocksize
                        / pmp->pm_BytesPerSec;
        }

        pmp->pm_bnshift = ffs(pmp->pm_BytesPerSec) - 1;

        /*
         * Compute mask and shift value for isolating cluster relative byte
         * offsets and cluster numbers from a file offset.
         */
        pmp->pm_bpcluster = SecPerClust * pmp->pm_BytesPerSec;
        pmp->pm_crbomask = pmp->pm_bpcluster - 1;
        pmp->pm_cnshift = ffs(pmp->pm_bpcluster) - 1;

        /*
         * Check for valid cluster size
         * must be a power of 2
         */
        if (pmp->pm_bpcluster ^ (1 << pmp->pm_cnshift)) {
                DPRINTF("bpcluster %lu cnshift %lu", pmp->pm_bpcluster,
                    pmp->pm_cnshift);
                error = EINVAL;
                goto error_exit;
        }

        /*
         * Cluster size must be within limit of MAXBSIZE.
         * Many FAT filesystems will not have clusters larger than
         * 32KiB due to limits in Windows versions before Vista.
         */
        if (pmp->pm_bpcluster > MAXBSIZE) {
                DPRINTF("bpcluster %lu > MAXBSIZE %d",
                    pmp->pm_bpcluster, MAXBSIZE);
                error = EINVAL;
                goto error_exit;
        }

        /*
         * Release the bootsector buffer.
         */
        brelse(bp, BC_AGE);
        bp = NULL;

        /*
         * Check FSInfo.
         */
        if (pmp->pm_fsinfo) {
                struct fsinfo *fp;
                const int rdsz = roundup(sizeof(*fp), pmp->pm_BytesPerSec);

                /*
                 * XXX        If the fsinfo block is stored on media with
                 *        2KB or larger sectors, is the fsinfo structure
                 *        padded at the end or in the middle?
                 */
                if ((error = bread(devvp, de_bn2kb(pmp, pmp->pm_fsinfo),
                    rdsz, 0, &bp)) != 0)
                        goto error_exit;
                fp = (struct fsinfo *)bp->b_data;
                if (!memcmp(fp->fsisig1, "RRaA", 4)
                    && !memcmp(fp->fsisig2, "rrAa", 4)
                    && !memcmp(fp->fsisig3, "\0\0\125\252", 4)
                    && !memcmp(fp->fsisig4, "\0\0\125\252", 4))
                        pmp->pm_nxtfree = getulong(fp->fsinxtfree);
                else
                        pmp->pm_fsinfo = 0;
                brelse(bp, 0);
                bp = NULL;
        }

        /*
         * Check and validate (or perhaps invalidate?) the fsinfo structure?
         * XXX
         */
        if (pmp->pm_fsinfo) {
                if ((pmp->pm_nxtfree == 0xffffffffUL) ||
                    (pmp->pm_nxtfree > pmp->pm_maxcluster))
                        pmp->pm_fsinfo = 0;
        }

        /*
         * Allocate memory for the bitmap of allocated clusters, and then
         * fill it in.
         */
        pmp->pm_inusemap = malloc(((pmp->pm_maxcluster + N_INUSEBITS)
                                   / N_INUSEBITS)
                                  * sizeof(*pmp->pm_inusemap),
                                  M_MSDOSFSFAT, M_WAITOK);

        /*
         * fillinusemap() needs pm_devvp.
         */
        pmp->pm_dev = dev;
        pmp->pm_devvp = devvp;

        /*
         * Have the inuse map filled in.
         */
        if ((error = msdosfs_fillinusemap(pmp)) != 0) {
                DPRINTF("fillinusemap %d", error);
                goto error_exit;
        }

        /*
         * If they want FAT updates to be synchronous then let them suffer
         * the performance degradation in exchange for the on disk copy of
         * the FAT being correct just about all the time.  I suppose this
         * would be a good thing to turn on if the kernel is still flakey.
         */
        if (mp->mnt_flag & MNT_SYNCHRONOUS)
                pmp->pm_flags |= MSDOSFSMNT_WAITONFAT;

        /*
         * Finish up.
         */
        if (ronly)
                pmp->pm_flags |= MSDOSFSMNT_RONLY;
        else
                pmp->pm_fmod = 1;
        mp->mnt_data = pmp;
        mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
        mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_MSDOS);
        mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
        mp->mnt_stat.f_namemax = MSDOSFS_NAMEMAX(pmp);
        mp->mnt_flag |= MNT_LOCAL;
        mp->mnt_iflag |= IMNT_SHRLOOKUP;
        mp->mnt_dev_bshift = pmp->pm_bnshift;
        mp->mnt_fs_bshift = pmp->pm_cnshift;

        /*
         * If we ever do quotas for DOS filesystems this would be a place
         * to fill in the info in the msdosfsmount structure. You dolt,
         * quotas on dos filesystems make no sense because files have no
         * owners on dos filesystems. of course there is some empty space
         * in the directory entry where we could put uid's and gid's.
         */

        spec_node_setmountedfs(devvp, mp);

        return (0);

error_exit:
        if (bp)
                brelse(bp, BC_AGE);
        if (pmp) {
                if (pmp->pm_inusemap)
                        free(pmp->pm_inusemap, M_MSDOSFSFAT);
                free(pmp, M_MSDOSFSMNT);
                mp->mnt_data = NULL;
        }
        return (error);
}

int
msdosfs_start(struct mount *mp, int flags)
{

        return (0);
}

/*
 * Unmount the filesystem described by mp.
 */
int
msdosfs_unmount(struct mount *mp, int mntflags)
{
        struct msdosfsmount *pmp;
        int error, flags;

        flags = 0;
        if (mntflags & MNT_FORCE)
                flags |= FORCECLOSE;
        if ((error = vflush(mp, NULLVP, flags)) != 0)
                return (error);
        pmp = VFSTOMSDOSFS(mp);
        if (pmp->pm_devvp->v_type != VBAD)
                spec_node_setmountedfs(pmp->pm_devvp, NULL);
#ifdef MSDOSFS_DEBUG
        {
                struct vnode *vp = pmp->pm_devvp;

                printf("msdosfs_umount(): just before calling VOP_CLOSE()\n");
                printf("flag %08x, usecount %d, writecount %d, holdcnt %d\n",
                    vp->v_vflag | vp->v_iflag | vp->v_uflag, vrefcnt(vp),
                    vp->v_writecount, vp->v_holdcnt);
                printf("mount %p, op %p\n",
                    vp->v_mount, vp->v_op);
                printf("cleanblkhd %p, dirtyblkhd %p, numoutput %d, type %d\n",
                    vp->v_cleanblkhd.lh_first,
                    vp->v_dirtyblkhd.lh_first,
                    vp->v_numoutput, vp->v_type);
                printf("union %p, tag %d, data[0] %08x, data[1] %08x\n",
                    vp->v_socket, vp->v_tag,
                    ((u_int *)vp->v_data)[0],
                    ((u_int *)vp->v_data)[1]);
        }
#endif
        vn_lock(pmp->pm_devvp, LK_EXCLUSIVE | LK_RETRY);
        (void) VOP_CLOSE(pmp->pm_devvp,
            pmp->pm_flags & MSDOSFSMNT_RONLY ? FREAD : FREAD|FWRITE, NOCRED);
        vput(pmp->pm_devvp);
        msdosfs_fh_destroy(pmp);
        free(pmp->pm_inusemap, M_MSDOSFSFAT);
        free(pmp, M_MSDOSFSMNT);
        mp->mnt_data = NULL;
        mp->mnt_flag &= ~MNT_LOCAL;
        return (0);
}

int
msdosfs_root(struct mount *mp, int lktype, struct vnode **vpp)
{
        struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
        int error;

#ifdef MSDOSFS_DEBUG
        printf("msdosfs_root(); mp %p, pmp %p\n", mp, pmp);
#endif
        if ((error = msdosfs_deget(pmp, MSDOSFSROOT, MSDOSFSROOT_OFS,
            vpp)) != 0)
                return error;
        error = vn_lock(*vpp, lktype);
        if (error) {
                vrele(*vpp);
                *vpp = NULL;
                return error;
        }
        return 0;
}

int
msdosfs_statvfs(struct mount *mp, struct statvfs *sbp)
{
        struct msdosfsmount *pmp;

        pmp = VFSTOMSDOSFS(mp);
        sbp->f_bsize = pmp->pm_bpcluster;
        sbp->f_frsize = sbp->f_bsize;
        sbp->f_iosize = pmp->pm_bpcluster;
        sbp->f_blocks = pmp->pm_nmbrofclusters;
        sbp->f_bfree = pmp->pm_freeclustercount;
        sbp->f_bavail = pmp->pm_freeclustercount;
        sbp->f_bresvd = 0;
        sbp->f_files = pmp->pm_RootDirEnts;                        /* XXX */
        sbp->f_ffree = 0;        /* what to put in here? */
        sbp->f_favail = 0;        /* what to put in here? */
        sbp->f_fresvd = 0;
        copy_statvfs_info(sbp, mp);
        return (0);
}

struct msdosfs_sync_ctx {
        int waitfor;
};

static bool
msdosfs_sync_selector(void *cl, struct vnode *vp)
{
        struct msdosfs_sync_ctx *c = cl;
        struct denode *dep;

        KASSERT(mutex_owned(vp->v_interlock));

        dep = VTODE(vp);
        if (c->waitfor == MNT_LAZY || vp->v_type == VNON ||
            dep == NULL || (((dep->de_flag &
            (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) &&
             (LIST_EMPTY(&vp->v_dirtyblkhd) &&
              (vp->v_iflag & VI_ONWORKLST) == 0)))
                return false;
        return true;
}

int
msdosfs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
{
        struct vnode *vp;
        struct vnode_iterator *marker;
        struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
        int error, allerror = 0;
        struct msdosfs_sync_ctx ctx;

        /*
         * If we ever switch to not updating all of the FATs all the time,
         * this would be the place to update them from the first one.
         */
        if (pmp->pm_fmod != 0) {
                if (pmp->pm_flags & MSDOSFSMNT_RONLY)
                        panic("msdosfs_sync: rofs mod");
                else {
                        /* update FATs here */
                }
        }
        /*
         * Write back each (modified) denode.
         */
        vfs_vnode_iterator_init(mp, &marker);
        ctx.waitfor = waitfor;
        while ((vp = vfs_vnode_iterator_next(marker, msdosfs_sync_selector,
            &ctx)))
        {
                error = vn_lock(vp, LK_EXCLUSIVE);
                if (error) {
                        vrele(vp);
                        continue;
                }
                if ((error = VOP_FSYNC(vp, cred,
                    waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0)) != 0)
                        allerror = error;
                vput(vp);
        }
        vfs_vnode_iterator_destroy(marker);

        /*
         * Force stale file system control information to be flushed.
         */
        vn_lock(pmp->pm_devvp, LK_EXCLUSIVE | LK_RETRY);
        if ((error = VOP_FSYNC(pmp->pm_devvp, cred,
            waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0)) != 0)
                allerror = error;
        VOP_UNLOCK(pmp->pm_devvp);
        return (allerror);
}

int
msdosfs_fhtovp(struct mount *mp, struct fid *fhp, int lktype, struct vnode **vpp)
{
        struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
        struct defid defh;
        uint32_t gen;
        int error;

        if (fhp->fid_len != sizeof(struct defid)) {
                DPRINTF("fid_len %d %zd", fhp->fid_len, sizeof(struct defid));
                return EINVAL;
        }
        memcpy(&defh, fhp, sizeof(defh));
        error = msdosfs_fh_lookup(pmp, defh.defid_dirclust, defh.defid_dirofs,
            &gen);
        if (error == 0 && gen != defh.defid_gen)
                error = ESTALE;
        if (error) {
                *vpp = NULLVP;
                return error;
        }
        error = msdosfs_deget(pmp, defh.defid_dirclust, defh.defid_dirofs, vpp);
        if (error) {
                DPRINTF("deget %d", error);
                *vpp = NULLVP;
                return error;
        }
        error = vn_lock(*vpp, lktype);
        if (error) {
                vrele(*vpp);
                *vpp = NULLVP;
                return error;
        }
        return 0;
}

int
msdosfs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
{
        struct msdosfsmount *pmp = VFSTOMSDOSFS(vp->v_mount);
        struct denode *dep;
        struct defid defh;
        int error;

        if (*fh_size < sizeof(struct defid)) {
                *fh_size = sizeof(struct defid);
                return E2BIG;
        }
        *fh_size = sizeof(struct defid);
        dep = VTODE(vp);
        memset(&defh, 0, sizeof(defh));
        defh.defid_len = sizeof(struct defid);
        defh.defid_dirclust = dep->de_dirclust;
        defh.defid_dirofs = dep->de_diroffset;
        error = msdosfs_fh_enter(pmp, dep->de_dirclust, dep->de_diroffset,
             &defh.defid_gen);
        if (error == 0)
                memcpy(fhp, &defh, sizeof(defh));
        return error;
}

int
msdosfs_vget(struct mount *mp, ino_t ino, int lktype,
    struct vnode **vpp)
{

        return (EOPNOTSUPP);
}

















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 











    1 










































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
/*        $NetBSD: ata.c,v 1.169 2022/05/31 08:43:15 andvar Exp $        */

/*
 * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ata.c,v 1.169 2022/05/31 08:43:15 andvar Exp $");

#include "opt_ata.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/conf.h>
#include <sys/fcntl.h>
#include <sys/proc.h>
#include <sys/kthread.h>
#include <sys/errno.h>
#include <sys/ataio.h>
#include <sys/kmem.h>
#include <sys/intr.h>
#include <sys/bus.h>
#include <sys/once.h>
#include <sys/bitops.h>
#include <sys/cpu.h>

#define ATABUS_PRIVATE

#include <dev/ata/ataconf.h>
#include <dev/ata/atareg.h>
#include <dev/ata/atavar.h>
#include <dev/ic/wdcvar.h>        /* for PIOBM */

#include "ioconf.h"
#include "locators.h"

#include "atapibus.h"
#include "ataraid.h"
#include "sata_pmp.h"

#if NATARAID > 0
#include <dev/ata/ata_raidvar.h>
#endif
#if NSATA_PMP > 0
#include <dev/ata/satapmpvar.h>
#endif
#include <dev/ata/satapmpreg.h>

#define DEBUG_FUNCS  0x08
#define DEBUG_PROBE  0x10
#define DEBUG_DETACH 0x20
#define        DEBUG_XFERS  0x40
#ifdef ATADEBUG
#ifndef ATADEBUG_MASK
#define ATADEBUG_MASK 0
#endif
int atadebug_mask = ATADEBUG_MASK;
#define ATADEBUG_PRINT(args, level) \
        if (atadebug_mask & (level)) \
                printf args
#else
#define ATADEBUG_PRINT(args, level)
#endif

#if defined(ATA_DOWNGRADE_MODE) && NATA_DMA
static int        ata_downgrade_mode(struct ata_drive_datas *, int);
#endif

static ONCE_DECL(ata_init_ctrl);
static struct pool ata_xfer_pool;

/*
 * A queue of atabus instances, used to ensure the same bus probe order
 * for a given hardware configuration at each boot.  Kthread probing
 * devices on a atabus.  Only one probing at once.
 */
static TAILQ_HEAD(, atabus_initq)        atabus_initq_head;
static kmutex_t                                atabus_qlock;
static kcondvar_t                        atabus_qcv;
static lwp_t *                                atabus_cfg_lwp;

/*****************************************************************************
 * ATA bus layer.
 *
 * ATA controllers attach an atabus instance, which handles probing the bus
 * for drives, etc.
 *****************************************************************************/

dev_type_open(atabusopen);
dev_type_close(atabusclose);
dev_type_ioctl(atabusioctl);

const struct cdevsw atabus_cdevsw = {
        .d_open = atabusopen,
        .d_close = atabusclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = atabusioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

static void atabus_childdetached(device_t, device_t);
static int atabus_rescan(device_t, const char *, const int *);
static bool atabus_resume(device_t, const pmf_qual_t *);
static bool atabus_suspend(device_t, const pmf_qual_t *);
static void atabusconfig_thread(void *);

static void ata_channel_idle(struct ata_channel *);
static void ata_activate_xfer_locked(struct ata_channel *, struct ata_xfer *);
static void ata_channel_freeze_locked(struct ata_channel *);
static void ata_thread_wake_locked(struct ata_channel *);

/*
 * atabus_init:
 *
 *        Initialize ATA subsystem structures.
 */
static int
atabus_init(void)
{

        pool_init(&ata_xfer_pool, sizeof(struct ata_xfer), 0, 0, 0,
            "ataspl", NULL, IPL_BIO);
        TAILQ_INIT(&atabus_initq_head);
        mutex_init(&atabus_qlock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&atabus_qcv, "atainitq");
        return 0;
}

/*
 * atabusprint:
 *
 *        Autoconfiguration print routine used by ATA controllers when
 *        attaching an atabus instance.
 */
int
atabusprint(void *aux, const char *pnp)
{
        struct ata_channel *chan = aux;

        if (pnp)
                aprint_normal("atabus at %s", pnp);
        aprint_normal(" channel %d", chan->ch_channel);

        return (UNCONF);
}

/*
 * ataprint:
 *
 *        Autoconfiguration print routine.
 */
int
ataprint(void *aux, const char *pnp)
{
        struct ata_device *adev = aux;

        if (pnp)
                aprint_normal("wd at %s", pnp);
        aprint_normal(" drive %d", adev->adev_drv_data->drive);

        return (UNCONF);
}

/*
 * ata_channel_attach:
 *
 *        Common parts of attaching an atabus to an ATA controller channel.
 */
void
ata_channel_attach(struct ata_channel *chp)
{
        if (chp->ch_flags & ATACH_DISABLED)
                return;

        ata_channel_init(chp);

        KASSERT(chp->ch_queue != NULL);

        chp->atabus = config_found(chp->ch_atac->atac_dev, chp, atabusprint,
                CFARGS(.iattr = "ata"));
}

/*
 * ata_channel_detach:
 *
 *        Common parts of detaching an atabus to an ATA controller channel.
 */
void
ata_channel_detach(struct ata_channel *chp)
{
        if (chp->ch_flags & ATACH_DISABLED)
                return;

        ata_channel_destroy(chp);

        chp->ch_flags |= ATACH_DETACHED;
}

static void
atabusconfig(struct atabus_softc *atabus_sc)
{
        struct ata_channel *chp = atabus_sc->sc_chan;
        struct atac_softc *atac = chp->ch_atac;
        struct atabus_initq *atabus_initq = NULL;
        int i, error;

        /* we are in the atabus's thread context */

        /*
         * Probe for the drives attached to controller, unless a PMP
         * is already known
         */
        /* XXX for SATA devices we will power up all drives at once */
        if (chp->ch_satapmp_nports == 0)
                (*atac->atac_probe)(chp);

        if (chp->ch_ndrives >= 2) {
                ATADEBUG_PRINT(("atabusattach: ch_drive_type 0x%x 0x%x\n",
                    chp->ch_drive[0].drive_type, chp->ch_drive[1].drive_type),
                    DEBUG_PROBE);
        }

        /* Make sure the devices probe in atabus order to avoid jitter. */
        mutex_enter(&atabus_qlock);
        for (;;) {
                atabus_initq = TAILQ_FIRST(&atabus_initq_head);
                if (atabus_initq->atabus_sc == atabus_sc)
                        break;
                cv_wait(&atabus_qcv, &atabus_qlock);
        }
        mutex_exit(&atabus_qlock);

        ata_channel_lock(chp);

        KASSERT(ata_is_thread_run(chp));

        /* If no drives, abort here */
        if (chp->ch_drive == NULL)
                goto out;
        KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL);
        for (i = 0; i < chp->ch_ndrives; i++)
                if (chp->ch_drive[i].drive_type != ATA_DRIVET_NONE)
                        break;
        if (i == chp->ch_ndrives)
                goto out;

        /* Shortcut in case we've been shutdown */
        if (chp->ch_flags & ATACH_SHUTDOWN)
                goto out;

        ata_channel_unlock(chp);

        if ((error = kthread_create(PRI_NONE, 0, NULL, atabusconfig_thread,
            atabus_sc, &atabus_cfg_lwp,
            "%scnf", device_xname(atac->atac_dev))) != 0)
                aprint_error_dev(atac->atac_dev,
                    "unable to create config thread: error %d\n", error);
        return;

 out:
        ata_channel_unlock(chp);

        mutex_enter(&atabus_qlock);
        TAILQ_REMOVE(&atabus_initq_head, atabus_initq, atabus_initq);
        cv_broadcast(&atabus_qcv);
        mutex_exit(&atabus_qlock);

        kmem_free(atabus_initq, sizeof(*atabus_initq));

        ata_delref(chp);

        config_pending_decr(atabus_sc->sc_dev);
}

/*
 * atabus_configthread: finish attach of atabus's childrens, in a separate
 * kernel thread.
 */
static void
atabusconfig_thread(void *arg)
{
        struct atabus_softc *atabus_sc = arg;
        struct ata_channel *chp = atabus_sc->sc_chan;
        struct atac_softc *atac = chp->ch_atac;
        struct atabus_initq *atabus_initq = NULL;
        int i, s;

        /* XXX seems wrong */
        mutex_enter(&atabus_qlock);
        atabus_initq = TAILQ_FIRST(&atabus_initq_head);
        KASSERT(atabus_initq->atabus_sc == atabus_sc);
        mutex_exit(&atabus_qlock);

        /*
         * First look for a port multiplier
         */
        if (chp->ch_ndrives == PMP_MAX_DRIVES &&
            chp->ch_drive[PMP_PORT_CTL].drive_type == ATA_DRIVET_PM) {
#if NSATA_PMP > 0
                satapmp_attach(chp);
#else
                aprint_error_dev(atabus_sc->sc_dev,
                    "SATA port multiplier not supported\n");
                /* no problems going on, all drives are ATA_DRIVET_NONE */
#endif
        }

        /*
         * Attach an ATAPI bus, if needed.
         */
        KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL);
        for (i = 0; i < chp->ch_ndrives && chp->atapibus == NULL; i++) {
                if (chp->ch_drive[i].drive_type == ATA_DRIVET_ATAPI) {
#if NATAPIBUS > 0
                        (*atac->atac_atapibus_attach)(atabus_sc);
#else
                        /*
                         * Fake the autoconfig "not configured" message
                         */
                        aprint_normal("atapibus at %s not configured\n",
                            device_xname(atac->atac_dev));
                        chp->atapibus = NULL;
                        s = splbio();
                        for (i = 0; i < chp->ch_ndrives; i++) {
                                if (chp->ch_drive[i].drive_type == ATA_DRIVET_ATAPI)
                                        chp->ch_drive[i].drive_type = ATA_DRIVET_NONE;
                        }
                        splx(s);
#endif
                        break;
                }
        }

        for (i = 0; i < chp->ch_ndrives; i++) {
                struct ata_device adev;
                if (chp->ch_drive[i].drive_type != ATA_DRIVET_ATA &&
                    chp->ch_drive[i].drive_type != ATA_DRIVET_OLD) {
                        continue;
                }
                if (chp->ch_drive[i].drv_softc != NULL)
                        continue;
                memset(&adev, 0, sizeof(struct ata_device));
                adev.adev_bustype = atac->atac_bustype_ata;
                adev.adev_channel = chp->ch_channel;
                adev.adev_drv_data = &chp->ch_drive[i];
                chp->ch_drive[i].drv_softc = config_found(atabus_sc->sc_dev,
                    &adev, ataprint,
                    CFARGS(.iattr = "ata_hl"));
                if (chp->ch_drive[i].drv_softc != NULL) {
                        ata_probe_caps(&chp->ch_drive[i]);
                } else {
                        s = splbio();
                        chp->ch_drive[i].drive_type = ATA_DRIVET_NONE;
                        splx(s);
                }
        }

        /* now that we know the drives, the controller can set its modes */
        if (atac->atac_set_modes) {
                (*atac->atac_set_modes)(chp);
                ata_print_modes(chp);
        }
#if NATARAID > 0
        if (atac->atac_cap & ATAC_CAP_RAID) {
                for (i = 0; i < chp->ch_ndrives; i++) {
                        if (chp->ch_drive[i].drive_type == ATA_DRIVET_ATA) {
                                ata_raid_check_component(
                                    chp->ch_drive[i].drv_softc);
                        }
                }
        }
#endif /* NATARAID > 0 */

        /*
         * reset drive_flags for unattached devices, reset state for attached
         * ones
         */
        s = splbio();
        for (i = 0; i < chp->ch_ndrives; i++) {
                if (chp->ch_drive[i].drive_type == ATA_DRIVET_PM)
                        continue;
                if (chp->ch_drive[i].drv_softc == NULL) {
                        chp->ch_drive[i].drive_flags = 0;
                        chp->ch_drive[i].drive_type = ATA_DRIVET_NONE;
                } else
                        chp->ch_drive[i].state = 0;
        }
        splx(s);

        mutex_enter(&atabus_qlock);
        TAILQ_REMOVE(&atabus_initq_head, atabus_initq, atabus_initq);
        cv_broadcast(&atabus_qcv);
        mutex_exit(&atabus_qlock);

        kmem_free(atabus_initq, sizeof(*atabus_initq));

        ata_delref(chp);

        config_pending_decr(atabus_sc->sc_dev);
        kthread_exit(0);
}

/*
 * atabus_thread:
 *
 *        Worker thread for the ATA bus.
 */
static void
atabus_thread(void *arg)
{
        struct atabus_softc *sc = arg;
        struct ata_channel *chp = sc->sc_chan;
        struct ata_queue *chq = chp->ch_queue;
        struct ata_xfer *xfer;
        int i, rv;

        ata_channel_lock(chp);
        KASSERT(ata_is_thread_run(chp));

        /*
         * Probe the drives.  Reset type to indicate to controllers
         * that can re-probe that all drives must be probed..
         *
         * Note: ch_ndrives may be changed during the probe.
         */
        KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL);
        for (i = 0; i < chp->ch_ndrives; i++) {
                chp->ch_drive[i].drive_flags = 0;
                chp->ch_drive[i].drive_type = ATA_DRIVET_NONE;
        }
        ata_channel_unlock(chp);

        atabusconfig(sc);

        ata_channel_lock(chp);
        for (;;) {
                if ((chp->ch_flags & (ATACH_TH_RESET | ATACH_TH_DRIVE_RESET
                    | ATACH_TH_RECOVERY | ATACH_SHUTDOWN)) == 0 &&
                    (chq->queue_active == 0 || chq->queue_freeze == 0)) {
                        cv_wait(&chp->ch_thr_idle, &chp->ch_lock);
                }
                if (chp->ch_flags & ATACH_SHUTDOWN) {
                        break;
                }
                if (chp->ch_flags & ATACH_TH_RESCAN) {
                        chp->ch_flags &= ~ATACH_TH_RESCAN;
                        ata_channel_unlock(chp);
                        atabusconfig(sc);
                        ata_channel_lock(chp);
                }
                if (chp->ch_flags & ATACH_TH_RESET) {
                        /* this will unfreeze the channel */
                        ata_thread_run(chp, AT_WAIT,
                            ATACH_TH_RESET, ATACH_NODRIVE);
                } else if (chp->ch_flags & ATACH_TH_DRIVE_RESET) {
                        /* this will unfreeze the channel */
                        for (i = 0; i < chp->ch_ndrives; i++) {
                                struct ata_drive_datas *drvp;

                                drvp = &chp->ch_drive[i];

                                if (drvp->drive_flags & ATA_DRIVE_TH_RESET) {
                                        ata_thread_run(chp,
                                            AT_WAIT, ATACH_TH_DRIVE_RESET, i);
                                }
                        }
                        chp->ch_flags &= ~ATACH_TH_DRIVE_RESET;
                } else if (chp->ch_flags & ATACH_TH_RECOVERY) {
                        /*
                         * This will unfreeze the channel; drops locks during
                         * run, so must wrap in splbio()/splx() to avoid
                         * spurious interrupts. XXX MPSAFE
                         */
                        int s = splbio();
                        ata_thread_run(chp, AT_WAIT, ATACH_TH_RECOVERY,
                            chp->recovery_tfd);
                        splx(s);
                } else if (chq->queue_active > 0 && chq->queue_freeze == 1) {
                        /*
                         * Caller has bumped queue_freeze, decrease it. This
                         * flow shalt never be executed for NCQ commands.
                         */
                        KASSERT((chp->ch_flags & ATACH_NCQ) == 0);
                        KASSERT(chq->queue_active == 1);

                        ata_channel_thaw_locked(chp);
                        xfer = ata_queue_get_active_xfer_locked(chp);

                        KASSERT(xfer != NULL);
                        KASSERT((xfer->c_flags & C_POLL) == 0);

                        switch ((rv = ata_xfer_start(xfer))) {
                        case ATASTART_STARTED:
                        case ATASTART_POLL:
                        case ATASTART_ABORT:
                                break;
                        case ATASTART_TH:
                        default:
                                panic("%s: ata_xfer_start() unexpected rv %d",
                                    __func__, rv);
                                /* NOTREACHED */
                        }
                } else if (chq->queue_freeze > 1)
                        panic("%s: queue_freeze", __func__);

                /* Try to run down the queue once channel is unfrozen */
                if (chq->queue_freeze == 0) {
                        ata_channel_unlock(chp);
                        atastart(chp);
                        ata_channel_lock(chp);
                }
        }
        chp->ch_thread = NULL;
        cv_signal(&chp->ch_thr_idle);
        ata_channel_unlock(chp);
        kthread_exit(0);
}

bool
ata_is_thread_run(struct ata_channel *chp)
{
        KASSERT(mutex_owned(&chp->ch_lock));

        return (chp->ch_thread == curlwp && !cpu_intr_p());
}

static void
ata_thread_wake_locked(struct ata_channel *chp)
{
        KASSERT(mutex_owned(&chp->ch_lock));
        ata_channel_freeze_locked(chp);
        cv_signal(&chp->ch_thr_idle);
}

/*
 * atabus_match:
 *
 *        Autoconfiguration match routine.
 */
static int
atabus_match(device_t parent, cfdata_t cf, void *aux)
{
        struct ata_channel *chp = aux;

        if (chp == NULL)
                return (0);

        if (cf->cf_loc[ATACF_CHANNEL] != chp->ch_channel &&
            cf->cf_loc[ATACF_CHANNEL] != ATACF_CHANNEL_DEFAULT)
                return (0);

        return (1);
}

/*
 * atabus_attach:
 *
 *        Autoconfiguration attach routine.
 */
static void
atabus_attach(device_t parent, device_t self, void *aux)
{
        struct atabus_softc *sc = device_private(self);
        struct ata_channel *chp = aux;
        struct atabus_initq *initq;
        int error;

        sc->sc_chan = chp;

        aprint_normal("\n");
        aprint_naive("\n");

        sc->sc_dev = self;

        if (ata_addref(chp))
                return;

        RUN_ONCE(&ata_init_ctrl, atabus_init);

        initq = kmem_zalloc(sizeof(*initq), KM_SLEEP);
        initq->atabus_sc = sc;
        mutex_enter(&atabus_qlock);
        TAILQ_INSERT_TAIL(&atabus_initq_head, initq, atabus_initq);
        mutex_exit(&atabus_qlock);
        config_pending_incr(sc->sc_dev);

        /* XXX MPSAFE - no KTHREAD_MPSAFE, so protected by KERNEL_LOCK() */
        if ((error = kthread_create(PRI_NONE, 0, NULL, atabus_thread, sc,
            &chp->ch_thread, "%s", device_xname(self))) != 0)
                aprint_error_dev(self,
                    "unable to create kernel thread: error %d\n", error);

        if (!pmf_device_register(self, atabus_suspend, atabus_resume))
                aprint_error_dev(self, "couldn't establish power handler\n");
}

/*
 * atabus_detach:
 *
 *        Autoconfiguration detach routine.
 */
static int
atabus_detach(device_t self, int flags)
{
        struct atabus_softc *sc = device_private(self);
        struct ata_channel *chp = sc->sc_chan;
        device_t dev = NULL;
        int i, error = 0;

        /*
         * Detach atapibus and its children.
         */
        if ((dev = chp->atapibus) != NULL) {
                ATADEBUG_PRINT(("atabus_detach: %s: detaching %s\n",
                    device_xname(self), device_xname(dev)), DEBUG_DETACH);

                error = config_detach(dev, flags);
                if (error)
                        goto out;
                KASSERT(chp->atapibus == NULL);
        }

        KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL);

        /*
         * Detach our other children.
         */
        for (i = 0; i < chp->ch_ndrives; i++) {
                if (chp->ch_drive[i].drive_type == ATA_DRIVET_ATAPI)
                        continue;
                if (chp->ch_drive[i].drive_type == ATA_DRIVET_PM)
                        chp->ch_drive[i].drive_type = ATA_DRIVET_NONE;
                if ((dev = chp->ch_drive[i].drv_softc) != NULL) {
                        ATADEBUG_PRINT(("%s.%d: %s: detaching %s\n", __func__,
                            __LINE__, device_xname(self), device_xname(dev)),
                            DEBUG_DETACH);
                        error = config_detach(dev, flags);
                        if (error)
                                goto out;
                        KASSERT(chp->ch_drive[i].drv_softc == NULL);
                        KASSERT(chp->ch_drive[i].drive_type == 0);
                }
        }

        /* Shutdown the channel. */
        ata_channel_lock(chp);
        chp->ch_flags |= ATACH_SHUTDOWN;
        while (chp->ch_thread != NULL) {
                cv_signal(&chp->ch_thr_idle);
                cv_wait(&chp->ch_thr_idle, &chp->ch_lock);
        }
        ata_channel_unlock(chp);

        atabus_free_drives(chp);

 out:
#ifdef ATADEBUG
        if (dev != NULL && error != 0)
                ATADEBUG_PRINT(("%s: %s: error %d detaching %s\n", __func__,
                    device_xname(self), error, device_xname(dev)),
                    DEBUG_DETACH);
#endif /* ATADEBUG */

        return (error);
}

void
atabus_childdetached(device_t self, device_t child)
{
        bool found = false;
        struct atabus_softc *sc = device_private(self);
        struct ata_channel *chp = sc->sc_chan;
        int i;

        KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL);
        /*
         * atapibus detached.
         */
        if (child == chp->atapibus) {
                chp->atapibus = NULL;
                found = true;
                for (i = 0; i < chp->ch_ndrives; i++) {
                        if (chp->ch_drive[i].drive_type != ATA_DRIVET_ATAPI)
                                continue;
                        KASSERT(chp->ch_drive[i].drv_softc != NULL);
                        chp->ch_drive[i].drv_softc = NULL;
                        chp->ch_drive[i].drive_flags = 0;
                        chp->ch_drive[i].drive_type = ATA_DRIVET_NONE;
                }
        }

        /*
         * Detach our other children.
         */
        for (i = 0; i < chp->ch_ndrives; i++) {
                if (chp->ch_drive[i].drive_type == ATA_DRIVET_ATAPI)
                        continue;
                if (child == chp->ch_drive[i].drv_softc) {
                        chp->ch_drive[i].drv_softc = NULL;
                        chp->ch_drive[i].drive_flags = 0;
                        if (chp->ch_drive[i].drive_type == ATA_DRIVET_PM)
                                chp->ch_satapmp_nports = 0;
                        chp->ch_drive[i].drive_type = ATA_DRIVET_NONE;
                        found = true;
                }
        }

        if (!found)
                panic("%s: unknown child %p", device_xname(self),
                    (const void *)child);
}

CFATTACH_DECL3_NEW(atabus, sizeof(struct atabus_softc),
    atabus_match, atabus_attach, atabus_detach, NULL, atabus_rescan,
    atabus_childdetached, DVF_DETACH_SHUTDOWN);

/*****************************************************************************
 * Common ATA bus operations.
 *****************************************************************************/

/* allocate/free the channel's ch_drive[] array */
int
atabus_alloc_drives(struct ata_channel *chp, int ndrives)
{
        int i;
        if (chp->ch_ndrives != ndrives)
                atabus_free_drives(chp);
        if (chp->ch_drive == NULL) {
                void *drv;

                ata_channel_unlock(chp);
                drv = kmem_zalloc(sizeof(*chp->ch_drive) * ndrives, KM_SLEEP);
                ata_channel_lock(chp);

                if (chp->ch_drive != NULL) {
                        /* lost the race */
                        kmem_free(drv, sizeof(*chp->ch_drive) * ndrives);
                        return 0;
                }
                chp->ch_drive = drv;
        }
        for (i = 0; i < ndrives; i++) {
                chp->ch_drive[i].chnl_softc = chp;
                chp->ch_drive[i].drive = i;
        }
        chp->ch_ndrives = ndrives;
        return 0;
}

void
atabus_free_drives(struct ata_channel *chp)
{
#ifdef DIAGNOSTIC
        int i;
        int dopanic = 0;
        KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL);
        for (i = 0; i < chp->ch_ndrives; i++) {
                if (chp->ch_drive[i].drive_type != ATA_DRIVET_NONE) {
                        printf("%s: ch_drive[%d] type %d != ATA_DRIVET_NONE\n",
                            device_xname(chp->atabus), i,
                            chp->ch_drive[i].drive_type);
                        dopanic = 1;
                }
                if (chp->ch_drive[i].drv_softc != NULL) {
                        printf("%s: ch_drive[%d] attached to %s\n",
                            device_xname(chp->atabus), i,
                            device_xname(chp->ch_drive[i].drv_softc));
                        dopanic = 1;
                }
        }
        if (dopanic)
                panic("atabus_free_drives");
#endif

        if (chp->ch_drive == NULL)
                return;
        kmem_free(chp->ch_drive,
            sizeof(struct ata_drive_datas) * chp->ch_ndrives);
        chp->ch_ndrives = 0;
        chp->ch_drive = NULL;
}

/* Get the disk's parameters */
int
ata_get_params(struct ata_drive_datas *drvp, uint8_t flags,
    struct ataparams *prms)
{
        struct ata_xfer *xfer;
        struct ata_channel *chp = drvp->chnl_softc;
        struct atac_softc *atac = chp->ch_atac;
        char *tb;
        int i, rv;
        uint16_t *p;

        ATADEBUG_PRINT(("%s\n", __func__), DEBUG_FUNCS);

        xfer = ata_get_xfer(chp, false);
        if (xfer == NULL) {
                ATADEBUG_PRINT(("%s: no xfer\n", __func__),
                    DEBUG_FUNCS|DEBUG_PROBE);
                return CMD_AGAIN;
        }

        tb = kmem_zalloc(ATA_BSIZE, KM_SLEEP);
        memset(prms, 0, sizeof(struct ataparams));

        if (drvp->drive_type == ATA_DRIVET_ATA) {
                xfer->c_ata_c.r_command = WDCC_IDENTIFY;
                xfer->c_ata_c.r_st_bmask = WDCS_DRDY;
                xfer->c_ata_c.r_st_pmask = WDCS_DRQ;
                xfer->c_ata_c.timeout = 3000; /* 3s */
        } else if (drvp->drive_type == ATA_DRIVET_ATAPI) {
                xfer->c_ata_c.r_command = ATAPI_IDENTIFY_DEVICE;
                xfer->c_ata_c.r_st_bmask = 0;
                xfer->c_ata_c.r_st_pmask = WDCS_DRQ;
                xfer->c_ata_c.timeout = 10000; /* 10s */
        } else {
                ATADEBUG_PRINT(("ata_get_parms: no disks\n"),
                    DEBUG_FUNCS|DEBUG_PROBE);
                rv = CMD_ERR;
                goto out;
        }
        xfer->c_ata_c.flags = AT_READ | flags;
        xfer->c_ata_c.data = tb;
        xfer->c_ata_c.bcount = ATA_BSIZE;
        (*atac->atac_bustype_ata->ata_exec_command)(drvp, xfer);
        ata_wait_cmd(chp, xfer);
        if (xfer->c_ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
                ATADEBUG_PRINT(("ata_get_parms: ata_c.flags=0x%x\n",
                    xfer->c_ata_c.flags), DEBUG_FUNCS|DEBUG_PROBE);
                rv = CMD_ERR;
                goto out;
        }
        /* if we didn't read any data something is wrong */
        if ((xfer->c_ata_c.flags & AT_XFDONE) == 0) {
                rv = CMD_ERR;
                goto out;
        }

        /* Read in parameter block. */
        memcpy(prms, tb, sizeof(struct ataparams));

        /*
         * Shuffle string byte order.
         * ATAPI NEC, Mitsumi and Pioneer drives and
         * old ATA TDK CompactFlash cards
         * have different byte order.
         */
#if BYTE_ORDER == BIG_ENDIAN
# define M(n)        prms->atap_model[(n) ^ 1]
#else
# define M(n)        prms->atap_model[n]
#endif
        if (
#if BYTE_ORDER == BIG_ENDIAN
            !
#endif
            ((drvp->drive_type == ATA_DRIVET_ATAPI) ?
             ((M(0) == 'N' && M(1) == 'E') ||
              (M(0) == 'F' && M(1) == 'X') ||
              (M(0) == 'P' && M(1) == 'i')) :
             ((M(0) == 'T' && M(1) == 'D' && M(2) == 'K')))) {
                rv = CMD_OK;
                goto out;
             }
#undef M
        for (i = 0; i < sizeof(prms->atap_model); i += 2) {
                p = (uint16_t *)(prms->atap_model + i);
                *p = bswap16(*p);
        }
        for (i = 0; i < sizeof(prms->atap_serial); i += 2) {
                p = (uint16_t *)(prms->atap_serial + i);
                *p = bswap16(*p);
        }
        for (i = 0; i < sizeof(prms->atap_revision); i += 2) {
                p = (uint16_t *)(prms->atap_revision + i);
                *p = bswap16(*p);
        }

        rv = CMD_OK;
 out:
        kmem_free(tb, ATA_BSIZE);
        ata_free_xfer(chp, xfer);
        return rv;
}

int
ata_set_mode(struct ata_drive_datas *drvp, uint8_t mode, uint8_t flags)
{
        struct ata_xfer *xfer;
        int rv;
        struct ata_channel *chp = drvp->chnl_softc;
        struct atac_softc *atac = chp->ch_atac;

        ATADEBUG_PRINT(("ata_set_mode=0x%x\n", mode), DEBUG_FUNCS);

        xfer = ata_get_xfer(chp, false);
        if (xfer == NULL) {
                ATADEBUG_PRINT(("%s: no xfer\n", __func__),
                    DEBUG_FUNCS|DEBUG_PROBE);
                return CMD_AGAIN;
        }

        xfer->c_ata_c.r_command = SET_FEATURES;
        xfer->c_ata_c.r_st_bmask = 0;
        xfer->c_ata_c.r_st_pmask = 0;
        xfer->c_ata_c.r_features = WDSF_SET_MODE;
        xfer->c_ata_c.r_count = mode;
        xfer->c_ata_c.flags = flags;
        xfer->c_ata_c.timeout = 1000; /* 1s */
        (*atac->atac_bustype_ata->ata_exec_command)(drvp, xfer);
        ata_wait_cmd(chp, xfer);
        if (xfer->c_ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
                rv = CMD_ERR;
                goto out;
        }

        rv = CMD_OK;

out:
        ata_free_xfer(chp, xfer);
        return rv;
}

#if NATA_DMA
void
ata_dmaerr(struct ata_drive_datas *drvp, int flags)
{
        ata_channel_lock_owned(drvp->chnl_softc);

        /*
         * Downgrade decision: if we get NERRS_MAX in NXFER.
         * We start with n_dmaerrs set to NERRS_MAX-1 so that the
         * first error within the first NXFER ops will immediately trigger
         * a downgrade.
         * If we got an error and n_xfers is bigger than NXFER reset counters.
         */
        drvp->n_dmaerrs++;
        if (drvp->n_dmaerrs >= NERRS_MAX && drvp->n_xfers <= NXFER) {
#ifdef ATA_DOWNGRADE_MODE
                ata_downgrade_mode(drvp, flags);
                drvp->n_dmaerrs = NERRS_MAX-1;
#else
                static struct timeval last;
                static const struct timeval serrintvl = { 300, 0 };

                if (ratecheck(&last, &serrintvl)) {
                        aprint_error_dev(drvp->drv_softc,
                            "excessive DMA errors - %d in last %d transfers\n",
                            drvp->n_dmaerrs, drvp->n_xfers);
                }
#endif
                drvp->n_xfers = 0;
                return;
        }
        if (drvp->n_xfers > NXFER) {
                drvp->n_dmaerrs = 1; /* just got an error */
                drvp->n_xfers = 1; /* restart counting from this error */
        }
}
#endif        /* NATA_DMA */

/*
 * freeze the queue and wait for the controller to be idle. Caller has to
 * unfreeze/restart the queue
 */
static void
ata_channel_idle(struct ata_channel *chp)
{
        ata_channel_lock(chp);
        ata_channel_freeze_locked(chp);
        while (chp->ch_queue->queue_active > 0) {
                chp->ch_queue->queue_flags |= QF_IDLE_WAIT;
                cv_timedwait(&chp->ch_queue->queue_idle, &chp->ch_lock, 1);
        }
        ata_channel_unlock(chp);
}

/*
 * Add a command to the queue and start controller.
 *
 * MUST BE CALLED AT splbio()!
 */
void
ata_exec_xfer(struct ata_channel *chp, struct ata_xfer *xfer)
{

        ATADEBUG_PRINT(("ata_exec_xfer %p channel %d drive %d\n", xfer,
            chp->ch_channel, xfer->c_drive), DEBUG_XFERS);

        /* complete xfer setup */
        xfer->c_chp = chp;

        ata_channel_lock(chp);

        /*
         * Standard commands are added to the end of command list, but
         * recovery commands must be run immediately.
         */
        if ((xfer->c_flags & C_SKIP_QUEUE) == 0)
                SIMPLEQ_INSERT_TAIL(&chp->ch_queue->queue_xfer, xfer,
                    c_xferchain);
        else
                SIMPLEQ_INSERT_HEAD(&chp->ch_queue->queue_xfer, xfer,
                    c_xferchain);

        /*
         * if polling and can sleep, wait for the xfer to be at head of queue
         */
        if ((xfer->c_flags & (C_POLL | C_WAIT)) ==  (C_POLL | C_WAIT)) {
                while (chp->ch_queue->queue_active > 0 ||
                    SIMPLEQ_FIRST(&chp->ch_queue->queue_xfer) != xfer) {
                        xfer->c_flags |= C_WAITACT;
                        cv_wait(&chp->ch_queue->c_active, &chp->ch_lock);
                        xfer->c_flags &= ~C_WAITACT;
                }

                /*
                 * Free xfer now if it there was attempt to free it
                 * while we were waiting.
                 */
                if ((xfer->c_flags & (C_FREE|C_WAITTIMO)) == C_FREE) {
                        ata_channel_unlock(chp);

                        ata_free_xfer(chp, xfer);
                        return;
                }
        }

        ata_channel_unlock(chp);

        ATADEBUG_PRINT(("atastart from ata_exec_xfer, flags 0x%x\n",
            chp->ch_flags), DEBUG_XFERS);
        atastart(chp);
}

/*
 * Start I/O on a controller, for the given channel.
 * The first xfer may be not for our channel if the channel queues
 * are shared.
 *
 * MUST BE CALLED AT splbio()!
 *
 * XXX FIS-based switching with PMP
 * Currently atastart() never schedules concurrent NCQ transfers to more than
 * one drive, even when channel has several SATA drives attached via PMP.
 * To support concurrent transfers to different drives with PMP, it would be
 * necessary to implement FIS-based switching support in controller driver,
 * and then adjust error handling and recovery to stop assuming at most
 * one active drive.
 */
void
atastart(struct ata_channel *chp)
{
        struct atac_softc *atac = chp->ch_atac;
        struct ata_queue *chq = chp->ch_queue;
        struct ata_xfer *xfer, *axfer;
        bool skipq;

#ifdef ATA_DEBUG
        int spl1, spl2;

        spl1 = splbio();
        spl2 = splbio();
        if (spl2 != spl1) {
                printf("atastart: not at splbio()\n");
                panic("atastart");
        }
        splx(spl2);
        splx(spl1);
#endif /* ATA_DEBUG */

        ata_channel_lock(chp);

again:
        /* is there a xfer ? */
        if ((xfer = SIMPLEQ_FIRST(&chp->ch_queue->queue_xfer)) == NULL) {
                ATADEBUG_PRINT(("%s(chp=%p): channel %d queue_xfer is empty\n",
                    __func__, chp, chp->ch_channel), DEBUG_XFERS);
                goto out;
        }

        /*
         * if someone is waiting for the command to be active, wake it up
         * and let it process the command
         */
        if (__predict_false(xfer->c_flags & C_WAITACT)) {
                ATADEBUG_PRINT(("atastart: xfer %p channel %d drive %d "
                    "wait active\n", xfer, chp->ch_channel, xfer->c_drive),
                    DEBUG_XFERS);
                cv_broadcast(&chp->ch_queue->c_active);
                goto out;
        }

        skipq = ISSET(xfer->c_flags, C_SKIP_QUEUE);

        /* is the queue frozen? */
        if (__predict_false(!skipq && chq->queue_freeze > 0)) {
                if (chq->queue_flags & QF_IDLE_WAIT) {
                        chq->queue_flags &= ~QF_IDLE_WAIT;
                        cv_signal(&chp->ch_queue->queue_idle);
                }
                ATADEBUG_PRINT(("%s(chp=%p): channel %d drive %d "
                    "queue frozen: %d\n",
                    __func__, chp, chp->ch_channel, xfer->c_drive,
                    chq->queue_freeze),
                    DEBUG_XFERS);
                goto out;
        }

        /* all xfers on same queue must belong to the same channel */
        KASSERT(xfer->c_chp == chp);

        /*
         * Can only take the command if there are no current active
         * commands, or if the command is NCQ and the active commands are also
         * NCQ. If PM is in use and HBA driver doesn't support/use FIS-based
         * switching, can only send commands to single drive.
         * Need only check first xfer.
         * XXX FIS-based switching - revisit
         */
        if (!skipq && (axfer = TAILQ_FIRST(&chp->ch_queue->active_xfers))) {
                if (!ISSET(xfer->c_flags, C_NCQ) ||
                    !ISSET(axfer->c_flags, C_NCQ) ||
                    xfer->c_drive != axfer->c_drive)
                        goto out;
        }

        struct ata_drive_datas * const drvp = &chp->ch_drive[xfer->c_drive];

        /*
         * Are we on limit of active xfers ? If the queue has more
         * than 1 openings, we keep one slot reserved for recovery or dump.
         */
        KASSERT(chq->queue_active <= chq->queue_openings);
        const uint8_t chq_openings = (!skipq && chq->queue_openings > 1)
            ? (chq->queue_openings - 1) : chq->queue_openings;
        const uint8_t drv_openings = ISSET(xfer->c_flags, C_NCQ)
            ? drvp->drv_openings : ATA_MAX_OPENINGS;
        if (chq->queue_active >= MIN(chq_openings, drv_openings)) {
                if (skipq) {
                        panic("%s: channel %d busy, xfer not possible",
                            __func__, chp->ch_channel);
                }

                ATADEBUG_PRINT(("%s(chp=%p): channel %d completely busy\n",
                    __func__, chp, chp->ch_channel), DEBUG_XFERS);
                goto out;
        }

        /* Slot allocation can fail if drv_openings < ch_openings */
        if (!ata_queue_alloc_slot(chp, &xfer->c_slot, drv_openings))
                goto out;

        if (__predict_false(atac->atac_claim_hw)) {
                if (!atac->atac_claim_hw(chp, 0)) {
                        ata_queue_free_slot(chp, xfer->c_slot);
                        goto out;
                }
        }

        /* Now committed to start the xfer */

        ATADEBUG_PRINT(("%s(chp=%p): xfer %p channel %d drive %d\n",
            __func__, chp, xfer, chp->ch_channel, xfer->c_drive), DEBUG_XFERS);
        if (drvp->drive_flags & ATA_DRIVE_RESET) {
                drvp->drive_flags &= ~ATA_DRIVE_RESET;
                drvp->state = 0;
        }

        if (ISSET(xfer->c_flags, C_NCQ))
                SET(chp->ch_flags, ATACH_NCQ);
        else
                CLR(chp->ch_flags, ATACH_NCQ);

        SIMPLEQ_REMOVE_HEAD(&chq->queue_xfer, c_xferchain);

        ata_activate_xfer_locked(chp, xfer);

        if (atac->atac_cap & ATAC_CAP_NOIRQ)
                KASSERT(xfer->c_flags & C_POLL);

        switch (ata_xfer_start(xfer)) {
        case ATASTART_TH:
        case ATASTART_ABORT:
                /* don't start any further commands in this case */
                goto out;
        default:
                /* nothing to do */
                break;
        }

        /* Queue more commands if possible, but not during recovery or dump */
        if (!skipq && chq->queue_active < chq->queue_openings)
                goto again;

out:
        ata_channel_unlock(chp);
}

int
ata_xfer_start(struct ata_xfer *xfer)
{
        struct ata_channel *chp = xfer->c_chp;
        int rv, status;

        KASSERT(mutex_owned(&chp->ch_lock));

again:
        rv = xfer->ops->c_start(chp, xfer);
        switch (rv) {
        case ATASTART_STARTED:
                /* nothing to do */
                break;
        case ATASTART_TH:
                /* postpone xfer to thread */
                ata_thread_wake_locked(chp);
                break;
        case ATASTART_POLL:
                /* can happen even in thread context for some ATAPI devices */
                ata_channel_unlock(chp);
                KASSERT(xfer->ops != NULL && xfer->ops->c_poll != NULL);
                status = xfer->ops->c_poll(chp, xfer);
                ata_channel_lock(chp);
                if (status == ATAPOLL_AGAIN)
                        goto again;
                break;
        case ATASTART_ABORT:
                ata_channel_unlock(chp);
                KASSERT(xfer->ops != NULL && xfer->ops->c_abort != NULL);
                xfer->ops->c_abort(chp, xfer);
                ata_channel_lock(chp);
                break;
        }

        return rv;
}

static void
ata_activate_xfer_locked(struct ata_channel *chp, struct ata_xfer *xfer)
{
        struct ata_queue * const chq = chp->ch_queue;

        KASSERT(mutex_owned(&chp->ch_lock));
        KASSERT((chq->active_xfers_used & __BIT(xfer->c_slot)) == 0);

        if ((xfer->c_flags & C_SKIP_QUEUE) == 0)
                TAILQ_INSERT_TAIL(&chq->active_xfers, xfer, c_activechain);
        else {
                /*
                 * Must go to head, so that ata_queue_get_active_xfer()
                 * returns the recovery command, and not some other
                 * random active transfer.
                 */
                TAILQ_INSERT_HEAD(&chq->active_xfers, xfer, c_activechain);
        }
        chq->active_xfers_used |= __BIT(xfer->c_slot);
        chq->queue_active++;
}

/*
 * Does it's own locking, does not require splbio().
 * flags - whether to block waiting for free xfer
 */
struct ata_xfer *
ata_get_xfer(struct ata_channel *chp, bool waitok)
{
        return pool_get(&ata_xfer_pool,
            PR_ZERO | (waitok ? PR_WAITOK : PR_NOWAIT));
}

/*
 * ata_deactivate_xfer() must be always called prior to ata_free_xfer()
 */
void
ata_free_xfer(struct ata_channel *chp, struct ata_xfer *xfer)
{
        struct ata_queue *chq = chp->ch_queue;

        ata_channel_lock(chp);

        if (__predict_false(xfer->c_flags & (C_WAITACT|C_WAITTIMO))) {
                /* Someone is waiting for this xfer, so we can't free now */
                xfer->c_flags |= C_FREE;
                cv_broadcast(&chq->c_active);
                ata_channel_unlock(chp);
                return;
        }

        /* XXX move PIOBM and free_gw to deactivate? */
#if NATA_PIOBM                /* XXX wdc dependent code */
        if (__predict_false(xfer->c_flags & C_PIOBM)) {
                struct wdc_softc *wdc = CHAN_TO_WDC(chp);

                /* finish the busmastering PIO */
                (*wdc->piobm_done)(wdc->dma_arg,
                    chp->ch_channel, xfer->c_drive);
                chp->ch_flags &= ~(ATACH_DMA_WAIT | ATACH_PIOBM_WAIT | ATACH_IRQ_WAIT);
        }
#endif

        if (__predict_false(chp->ch_atac->atac_free_hw))
                chp->ch_atac->atac_free_hw(chp);

        ata_channel_unlock(chp);

        if (__predict_true(!ISSET(xfer->c_flags, C_PRIVATE_ALLOC)))
                pool_put(&ata_xfer_pool, xfer);
}

void
ata_deactivate_xfer(struct ata_channel *chp, struct ata_xfer *xfer)
{
        struct ata_queue * const chq = chp->ch_queue;

        ata_channel_lock(chp);

        KASSERT(chq->queue_active > 0);
        KASSERT((chq->active_xfers_used & __BIT(xfer->c_slot)) != 0);

        /* Stop only when this is last active xfer */
        if (chq->queue_active == 1)
                callout_stop(&chp->c_timo_callout);

        if (callout_invoking(&chp->c_timo_callout))
                xfer->c_flags |= C_WAITTIMO;

        TAILQ_REMOVE(&chq->active_xfers, xfer, c_activechain);
        chq->active_xfers_used &= ~__BIT(xfer->c_slot);
        chq->queue_active--;

        ata_queue_free_slot(chp, xfer->c_slot);

        if (xfer->c_flags & C_WAIT)
                cv_broadcast(&chq->c_cmd_finish);

        ata_channel_unlock(chp);
}

/*
 * Called in c_intr hook. Must be called before before any deactivations
 * are done - if there is drain pending, it calls c_kill_xfer hook which
 * deactivates the xfer.
 * Calls c_kill_xfer with channel lock free.
 * Returns true if caller should just exit without further processing.
 * Caller must not further access any part of xfer or any related controller
 * structures in that case, it should just return.
 */
bool
ata_waitdrain_xfer_check(struct ata_channel *chp, struct ata_xfer *xfer)
{
        int drive = xfer->c_drive;
        bool draining = false;

        ata_channel_lock(chp);

        if (chp->ch_drive[drive].drive_flags & ATA_DRIVE_WAITDRAIN) {
                ata_channel_unlock(chp);

                xfer->ops->c_kill_xfer(chp, xfer, KILL_GONE);

                ata_channel_lock(chp);
                chp->ch_drive[drive].drive_flags &= ~ATA_DRIVE_WAITDRAIN;
                cv_signal(&chp->ch_queue->queue_drain);
                draining = true;
        }

        ata_channel_unlock(chp);

        return draining;
}

/*
 * Check for race of normal transfer handling vs. timeout.
 */
bool
ata_timo_xfer_check(struct ata_xfer *xfer)
{
        struct ata_channel *chp = xfer->c_chp;
        struct ata_drive_datas *drvp = &chp->ch_drive[xfer->c_drive];

        ata_channel_lock(chp);

        if (xfer->c_flags & C_WAITTIMO) {
                xfer->c_flags &= ~C_WAITTIMO;

                /* Handle race vs. ata_free_xfer() */
                if (xfer->c_flags & C_FREE) {
                        xfer->c_flags &= ~C_FREE;
                        ata_channel_unlock(chp);

                            device_printf(drvp->drv_softc,
                            "xfer %"PRIxPTR" freed while invoking timeout\n",
                            (intptr_t)xfer & PAGE_MASK);

                        ata_free_xfer(chp, xfer);
                        return true;
                }

                /* Race vs. callout_stop() in ata_deactivate_xfer() */
                ata_channel_unlock(chp);

                    device_printf(drvp->drv_softc,
                    "xfer %"PRIxPTR" deactivated while invoking timeout\n",
                    (intptr_t)xfer & PAGE_MASK);
                return true;
        }

        ata_channel_unlock(chp);

        /* No race, proceed with timeout handling */
        return false;
}

/*
 * Kill off all active xfers for a ata_channel.
 *
 * Must be called with channel lock held.
 */
void
ata_kill_active(struct ata_channel *chp, int reason, int flags)
{
        struct ata_queue * const chq = chp->ch_queue;
        struct ata_xfer *xfer, *xfernext;

        KASSERT(mutex_owned(&chp->ch_lock));

        TAILQ_FOREACH_SAFE(xfer, &chq->active_xfers, c_activechain, xfernext) {
                ata_channel_unlock(chp);
                xfer->ops->c_kill_xfer(xfer->c_chp, xfer, reason);
                ata_channel_lock(chp);
        }
}

/*
 * Kill off all pending xfers for a drive.
 */
void
ata_kill_pending(struct ata_drive_datas *drvp)
{
        struct ata_channel * const chp = drvp->chnl_softc;
        struct ata_queue * const chq = chp->ch_queue;
        struct ata_xfer *xfer;

        ata_channel_lock(chp);

        /* Kill all pending transfers */
        while ((xfer = SIMPLEQ_FIRST(&chq->queue_xfer))) {
                KASSERT(xfer->c_chp == chp);

                if (xfer->c_drive != drvp->drive)
                        continue;

                SIMPLEQ_REMOVE_HEAD(&chp->ch_queue->queue_xfer, c_xferchain);

                /*
                 * Keep the lock, so that we get deadlock (and 'locking against
                 * myself' with LOCKDEBUG), instead of silent
                 * data corruption, if the hook tries to call back into
                 * middle layer for inactive xfer.
                 */
                xfer->ops->c_kill_xfer(chp, xfer, KILL_GONE_INACTIVE);
        }

        /* Wait until all active transfers on the drive finish */
        while (chq->queue_active > 0) {
                bool drv_active = false;

                TAILQ_FOREACH(xfer, &chq->active_xfers, c_activechain) {
                        KASSERT(xfer->c_chp == chp);

                        if (xfer->c_drive == drvp->drive) {
                                drv_active = true;
                                break;
                        }
                }

                if (!drv_active) {
                        /* all finished */
                        break;
                }

                drvp->drive_flags |= ATA_DRIVE_WAITDRAIN;
                cv_wait(&chq->queue_drain, &chp->ch_lock);
        }

        ata_channel_unlock(chp);
}

static void
ata_channel_freeze_locked(struct ata_channel *chp)
{
        chp->ch_queue->queue_freeze++;

        ATADEBUG_PRINT(("%s(chp=%p) -> %d\n", __func__, chp,
            chp->ch_queue->queue_freeze), DEBUG_FUNCS | DEBUG_XFERS);
}

void
ata_channel_freeze(struct ata_channel *chp)
{
        ata_channel_lock(chp);
        ata_channel_freeze_locked(chp);
        ata_channel_unlock(chp);
}

void
ata_channel_thaw_locked(struct ata_channel *chp)
{
        KASSERT(mutex_owned(&chp->ch_lock));
        KASSERT(chp->ch_queue->queue_freeze > 0);

        chp->ch_queue->queue_freeze--;

        ATADEBUG_PRINT(("%s(chp=%p) -> %d\n", __func__, chp,
            chp->ch_queue->queue_freeze), DEBUG_FUNCS | DEBUG_XFERS);
}

/*
 * ata_thread_run:
 *
 *        Reset and ATA channel. Channel lock must be held. arg is type-specific.
 */
void
ata_thread_run(struct ata_channel *chp, int flags, int type, int arg)
{
        struct atac_softc *atac = chp->ch_atac;
        bool threset = false;
        struct ata_drive_datas *drvp;

        ata_channel_lock_owned(chp);

        /*
         * If we can poll or wait it's OK, otherwise wake up the
         * kernel thread to do it for us.
         */
        ATADEBUG_PRINT(("%s flags 0x%x ch_flags 0x%x\n",
            __func__, flags, chp->ch_flags), DEBUG_FUNCS | DEBUG_XFERS);
        if ((flags & (AT_POLL | AT_WAIT)) == 0) {
                switch (type) {
                case ATACH_TH_RESET:
                        if (chp->ch_flags & ATACH_TH_RESET) {
                                /* No need to schedule another reset */
                                return;
                        }
                        break;
                case ATACH_TH_DRIVE_RESET:
                    {
                        int drive = arg;

                        KASSERT(drive <= chp->ch_ndrives);
                        drvp = &chp->ch_drive[drive];

                        if (drvp->drive_flags & ATA_DRIVE_TH_RESET) {
                                /* No need to schedule another reset */
                                return;
                        }
                        drvp->drive_flags |= ATA_DRIVE_TH_RESET;
                        break;
                    }
                case ATACH_TH_RECOVERY:
                    {
                        uint32_t tfd = (uint32_t)arg;

                        KASSERT((chp->ch_flags & ATACH_RECOVERING) == 0);
                        chp->recovery_tfd = tfd;
                        break;
                    }
                default:
                        panic("%s: unknown type: %x", __func__, type);
                        /* NOTREACHED */
                }

                if (!(chp->ch_flags & type)) {
                        /*
                         * Block execution of other commands while
                         * reset is scheduled to a thread.
                         */
                        ata_channel_freeze_locked(chp);
                        chp->ch_flags |= type;
                }

                cv_signal(&chp->ch_thr_idle);
                return;
        }

        /* Block execution of other commands during reset */
        ata_channel_freeze_locked(chp);

        /*
         * If reset has been scheduled to a thread, then clear
         * the flag now so that the thread won't try to execute it if
         * we happen to sleep, and thaw one more time after the reset.
         */
        if (chp->ch_flags & type) {
                chp->ch_flags &= ~type;
                threset = true;
        }

        switch (type) {
        case ATACH_TH_RESET:
                (*atac->atac_bustype_ata->ata_reset_channel)(chp, flags);

                KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL);
                for (int drive = 0; drive < chp->ch_ndrives; drive++)
                        chp->ch_drive[drive].state = 0;
                break;

        case ATACH_TH_DRIVE_RESET:
            {
                int drive = arg;

                KASSERT(drive <= chp->ch_ndrives);
                drvp = &chp->ch_drive[drive];
                (*atac->atac_bustype_ata->ata_reset_drive)(drvp, flags, NULL);
                drvp->state = 0;
                break;
            }

        case ATACH_TH_RECOVERY:
            {
                uint32_t tfd = (uint32_t)arg;

                KASSERT((chp->ch_flags & ATACH_RECOVERING) == 0);
                KASSERT(atac->atac_bustype_ata->ata_recovery != NULL);

                SET(chp->ch_flags, ATACH_RECOVERING);
                (*atac->atac_bustype_ata->ata_recovery)(chp, flags, tfd);
                CLR(chp->ch_flags, ATACH_RECOVERING);
                break;
            }

        default:
                panic("%s: unknown type: %x", __func__, type);
                /* NOTREACHED */
        }

        /*
         * Thaw one extra time to clear the freeze done when the reset has
         * been scheduled to the thread.
         */
        if (threset)
                ata_channel_thaw_locked(chp);

        /* Allow commands to run again */
        ata_channel_thaw_locked(chp);

        /* Signal the thread in case there is an xfer to run */
        cv_signal(&chp->ch_thr_idle);
}

int
ata_addref(struct ata_channel *chp)
{
        struct atac_softc *atac = chp->ch_atac;
        struct scsipi_adapter *adapt = &atac->atac_atapi_adapter._generic;
        int s, error = 0;

        s = splbio();
        if (adapt->adapt_refcnt++ == 0 &&
            adapt->adapt_enable != NULL) {
                error = (*adapt->adapt_enable)(atac->atac_dev, 1);
                if (error)
                        adapt->adapt_refcnt--;
        }
        splx(s);
        return (error);
}

void
ata_delref(struct ata_channel *chp)
{
        struct atac_softc *atac = chp->ch_atac;
        struct scsipi_adapter *adapt = &atac->atac_atapi_adapter._generic;
        int s;

        s = splbio();
        if (adapt->adapt_refcnt-- == 1 &&
            adapt->adapt_enable != NULL)
                (void) (*adapt->adapt_enable)(atac->atac_dev, 0);
        splx(s);
}

void
ata_print_modes(struct ata_channel *chp)
{
        struct atac_softc *atac = chp->ch_atac;
        int drive;
        struct ata_drive_datas *drvp;

        KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL);
        for (drive = 0; drive < chp->ch_ndrives; drive++) {
                drvp = &chp->ch_drive[drive];
                if (drvp->drive_type == ATA_DRIVET_NONE ||
                    drvp->drv_softc == NULL)
                        continue;
                aprint_verbose("%s(%s:%d:%d): using PIO mode %d",
                        device_xname(drvp->drv_softc),
                        device_xname(atac->atac_dev),
                        chp->ch_channel, drvp->drive, drvp->PIO_mode);
#if NATA_DMA
                if (drvp->drive_flags & ATA_DRIVE_DMA)
                        aprint_verbose(", DMA mode %d", drvp->DMA_mode);
#if NATA_UDMA
                if (drvp->drive_flags & ATA_DRIVE_UDMA) {
                        aprint_verbose(", Ultra-DMA mode %d", drvp->UDMA_mode);
                        if (drvp->UDMA_mode == 2)
                                aprint_verbose(" (Ultra/33)");
                        else if (drvp->UDMA_mode == 4)
                                aprint_verbose(" (Ultra/66)");
                        else if (drvp->UDMA_mode == 5)
                                aprint_verbose(" (Ultra/100)");
                        else if (drvp->UDMA_mode == 6)
                                aprint_verbose(" (Ultra/133)");
                }
#endif        /* NATA_UDMA */
#endif        /* NATA_DMA */
#if NATA_DMA || NATA_PIOBM
                if (0
#if NATA_DMA
                    || (drvp->drive_flags & (ATA_DRIVE_DMA | ATA_DRIVE_UDMA))
#endif
#if NATA_PIOBM
                    /* PIOBM capable controllers use DMA for PIO commands */
                    || (atac->atac_cap & ATAC_CAP_PIOBM)
#endif
                    )
                        aprint_verbose(" (using DMA)");

                if (drvp->drive_flags & ATA_DRIVE_NCQ) {
                        aprint_verbose(", NCQ (%d tags)%s",
                            ATA_REAL_OPENINGS(chp->ch_queue->queue_openings),
                            (drvp->drive_flags & ATA_DRIVE_NCQ_PRIO)
                            ? " w/PRIO" : "");
                } else if (drvp->drive_flags & ATA_DRIVE_WFUA)
                        aprint_verbose(", WRITE DMA FUA EXT");

#endif        /* NATA_DMA || NATA_PIOBM */
                aprint_verbose("\n");
        }
}

#if defined(ATA_DOWNGRADE_MODE) && NATA_DMA
/*
 * downgrade the transfer mode of a drive after an error. return 1 if
 * downgrade was possible, 0 otherwise.
 *
 * MUST BE CALLED AT splbio()!
 */
static int
ata_downgrade_mode(struct ata_drive_datas *drvp, int flags)
{
        struct ata_channel *chp = drvp->chnl_softc;
        struct atac_softc *atac = chp->ch_atac;
        device_t drv_dev = drvp->drv_softc;
        int cf_flags = device_cfdata(drv_dev)->cf_flags;

        ata_channel_lock_owned(drvp->chnl_softc);

        /* if drive or controller don't know its mode, we can't do much */
        if ((drvp->drive_flags & ATA_DRIVE_MODE) == 0 ||
            (atac->atac_set_modes == NULL))
                return 0;
        /* current drive mode was set by a config flag, let it this way */
        if ((cf_flags & ATA_CONFIG_PIO_SET) ||
            (cf_flags & ATA_CONFIG_DMA_SET) ||
            (cf_flags & ATA_CONFIG_UDMA_SET))
                return 0;

#if NATA_UDMA
        /*
         * If we were using Ultra-DMA mode, downgrade to the next lower mode.
         */
        if ((drvp->drive_flags & ATA_DRIVE_UDMA) && drvp->UDMA_mode >= 2) {
                drvp->UDMA_mode--;
                aprint_error_dev(drv_dev,
                    "transfer error, downgrading to Ultra-DMA mode %d\n",
                    drvp->UDMA_mode);
        }
#endif

        /*
         * If we were using ultra-DMA, don't downgrade to multiword DMA.
         */
        else if (drvp->drive_flags & (ATA_DRIVE_DMA | ATA_DRIVE_UDMA)) {
                drvp->drive_flags &= ~(ATA_DRIVE_DMA | ATA_DRIVE_UDMA);
                drvp->PIO_mode = drvp->PIO_cap;
                aprint_error_dev(drv_dev,
                    "transfer error, downgrading to PIO mode %d\n",
                    drvp->PIO_mode);
        } else /* already using PIO, can't downgrade */
                return 0;

        (*atac->atac_set_modes)(chp);
        ata_print_modes(chp);
        /* reset the channel, which will schedule all drives for setup */
        ata_thread_run(chp, flags, ATACH_TH_RESET, ATACH_NODRIVE);
        return 1;
}
#endif        /* ATA_DOWNGRADE_MODE && NATA_DMA */

/*
 * Probe drive's capabilities, for use by the controller later
 * Assumes drvp points to an existing drive.
 */
void
ata_probe_caps(struct ata_drive_datas *drvp)
{
        struct ataparams params, params2;
        struct ata_channel *chp = drvp->chnl_softc;
        struct atac_softc *atac = chp->ch_atac;
        device_t drv_dev = drvp->drv_softc;
        int i, printed = 0;
        const char *sep = "";
        int cf_flags;

        if (ata_get_params(drvp, AT_WAIT, &params) != CMD_OK) {
                /* IDENTIFY failed. Can't tell more about the device */
                return;
        }
        if ((atac->atac_cap & (ATAC_CAP_DATA16 | ATAC_CAP_DATA32)) ==
            (ATAC_CAP_DATA16 | ATAC_CAP_DATA32)) {
                /*
                 * Controller claims 16 and 32 bit transfers.
                 * Re-do an IDENTIFY with 32-bit transfers,
                 * and compare results.
                 */
                ata_channel_lock(chp);
                drvp->drive_flags |= ATA_DRIVE_CAP32;
                ata_channel_unlock(chp);
                ata_get_params(drvp, AT_WAIT, &params2);
                if (memcmp(&params, &params2, sizeof(struct ataparams)) != 0) {
                        /* Not good. fall back to 16bits */
                        ata_channel_lock(chp);
                        drvp->drive_flags &= ~ATA_DRIVE_CAP32;
                        ata_channel_unlock(chp);
                } else {
                        aprint_verbose_dev(drv_dev, "32-bit data port\n");
                }
        }
#if 0 /* Some ultra-DMA drives claims to only support ATA-3. sigh */
        if (params.atap_ata_major > 0x01 &&
            params.atap_ata_major != 0xffff) {
                for (i = 14; i > 0; i--) {
                        if (params.atap_ata_major & (1 << i)) {
                                aprint_verbose_dev(drv_dev,
                                    "ATA version %d\n", i);
                                drvp->ata_vers = i;
                                break;
                        }
                }
        }
#endif

        /* An ATAPI device is at last PIO mode 3 */
        if (drvp->drive_type == ATA_DRIVET_ATAPI)
                drvp->PIO_mode = 3;

        /*
         * It's not in the specs, but it seems that some drive
         * returns 0xffff in atap_extensions when this field is invalid
         */
        if (params.atap_extensions != 0xffff &&
            (params.atap_extensions & WDC_EXT_MODES)) {
                /*
                 * XXX some drives report something wrong here (they claim to
                 * support PIO mode 8 !). As mode is coded on 3 bits in
                 * SET FEATURE, limit it to 7 (so limit i to 4).
                 * If higher mode than 7 is found, abort.
                 */
                for (i = 7; i >= 0; i--) {
                        if ((params.atap_piomode_supp & (1 << i)) == 0)
                                continue;
                        if (i > 4)
                                return;
                        /*
                         * See if mode is accepted.
                         * If the controller can't set its PIO mode,
                         * assume the defaults are good, so don't try
                         * to set it
                         */
                        if (atac->atac_set_modes)
                                /*
                                 * It's OK to poll here, it's fast enough
                                 * to not bother waiting for interrupt
                                 */
                                if (ata_set_mode(drvp, 0x08 | (i + 3),
                                   AT_WAIT) != CMD_OK)
                                        continue;
                        if (!printed) {
                                aprint_verbose_dev(drv_dev,
                                    "drive supports PIO mode %d", i + 3);
                                sep = ",";
                                printed = 1;
                        }
                        /*
                         * If controller's driver can't set its PIO mode,
                         * get the higher one for the drive.
                         */
                        if (atac->atac_set_modes == NULL ||
                            atac->atac_pio_cap >= i + 3) {
                                drvp->PIO_mode = i + 3;
                                drvp->PIO_cap = i + 3;
                                break;
                        }
                }
                if (!printed) {
                        /*
                         * We didn't find a valid PIO mode.
                         * Assume the values returned for DMA are buggy too
                         */
                        return;
                }
                ata_channel_lock(chp);
                drvp->drive_flags |= ATA_DRIVE_MODE;
                ata_channel_unlock(chp);
                printed = 0;
                for (i = 7; i >= 0; i--) {
                        if ((params.atap_dmamode_supp & (1 << i)) == 0)
                                continue;
#if NATA_DMA
                        if ((atac->atac_cap & ATAC_CAP_DMA) &&
                            atac->atac_set_modes != NULL)
                                if (ata_set_mode(drvp, 0x20 | i, AT_WAIT)
                                    != CMD_OK)
                                        continue;
#endif
                        if (!printed) {
                                aprint_verbose("%s DMA mode %d", sep, i);
                                sep = ",";
                                printed = 1;
                        }
#if NATA_DMA
                        if (atac->atac_cap & ATAC_CAP_DMA) {
                                if (atac->atac_set_modes != NULL &&
                                    atac->atac_dma_cap < i)
                                        continue;
                                drvp->DMA_mode = i;
                                drvp->DMA_cap = i;
                                ata_channel_lock(chp);
                                drvp->drive_flags |= ATA_DRIVE_DMA;
                                ata_channel_unlock(chp);
                        }
#endif
                        break;
                }
                if (params.atap_extensions & WDC_EXT_UDMA_MODES) {
                        printed = 0;
                        for (i = 7; i >= 0; i--) {
                                if ((params.atap_udmamode_supp & (1 << i))
                                    == 0)
                                        continue;
#if NATA_UDMA
                                if (atac->atac_set_modes != NULL &&
                                    (atac->atac_cap & ATAC_CAP_UDMA))
                                        if (ata_set_mode(drvp, 0x40 | i,
                                            AT_WAIT) != CMD_OK)
                                                continue;
#endif
                                if (!printed) {
                                        aprint_verbose("%s Ultra-DMA mode %d",
                                            sep, i);
                                        if (i == 2)
                                                aprint_verbose(" (Ultra/33)");
                                        else if (i == 4)
                                                aprint_verbose(" (Ultra/66)");
                                        else if (i == 5)
                                                aprint_verbose(" (Ultra/100)");
                                        else if (i == 6)
                                                aprint_verbose(" (Ultra/133)");
                                        sep = ",";
                                        printed = 1;
                                }
#if NATA_UDMA
                                if (atac->atac_cap & ATAC_CAP_UDMA) {
                                        if (atac->atac_set_modes != NULL &&
                                            atac->atac_udma_cap < i)
                                                continue;
                                        drvp->UDMA_mode = i;
                                        drvp->UDMA_cap = i;
                                        ata_channel_lock(chp);
                                        drvp->drive_flags |= ATA_DRIVE_UDMA;
                                        ata_channel_unlock(chp);
                                }
#endif
                                break;
                        }
                }
        }

        ata_channel_lock(chp);
        drvp->drive_flags &= ~ATA_DRIVE_NOSTREAM;
        if (drvp->drive_type == ATA_DRIVET_ATAPI) {
                if (atac->atac_cap & ATAC_CAP_ATAPI_NOSTREAM)
                        drvp->drive_flags |= ATA_DRIVE_NOSTREAM;
        } else {
                if (atac->atac_cap & ATAC_CAP_ATA_NOSTREAM)
                        drvp->drive_flags |= ATA_DRIVE_NOSTREAM;
        }
        ata_channel_unlock(chp);

        /* Try to guess ATA version here, if it didn't get reported */
        if (drvp->ata_vers == 0) {
#if NATA_UDMA
                if (drvp->drive_flags & ATA_DRIVE_UDMA)
                        drvp->ata_vers = 4; /* should be at last ATA-4 */
                else
#endif
                if (drvp->PIO_cap > 2)
                        drvp->ata_vers = 2; /* should be at last ATA-2 */
        }
        cf_flags = device_cfdata(drv_dev)->cf_flags;
        if (cf_flags & ATA_CONFIG_PIO_SET) {
                ata_channel_lock(chp);
                drvp->PIO_mode =
                    (cf_flags & ATA_CONFIG_PIO_MODES) >> ATA_CONFIG_PIO_OFF;
                drvp->drive_flags |= ATA_DRIVE_MODE;
                ata_channel_unlock(chp);
        }
#if NATA_DMA
        if ((atac->atac_cap & ATAC_CAP_DMA) == 0) {
                /* don't care about DMA modes */
                goto out;
        }
        if (cf_flags & ATA_CONFIG_DMA_SET) {
                ata_channel_lock(chp);
                if ((cf_flags & ATA_CONFIG_DMA_MODES) ==
                    ATA_CONFIG_DMA_DISABLE) {
                        drvp->drive_flags &= ~ATA_DRIVE_DMA;
                } else {
                        drvp->DMA_mode = (cf_flags & ATA_CONFIG_DMA_MODES) >>
                            ATA_CONFIG_DMA_OFF;
                        drvp->drive_flags |= ATA_DRIVE_DMA | ATA_DRIVE_MODE;
                }
                ata_channel_unlock(chp);
        }

        /*
         * Probe WRITE DMA FUA EXT. Support is mandatory for devices
         * supporting LBA48, but nevertheless confirm with the feature flag.
         */
        if (drvp->drive_flags & ATA_DRIVE_DMA) {
                if ((params.atap_cmd2_en & ATA_CMD2_LBA48) != 0
                    && (params.atap_cmd_def & ATA_CMDE_WFE)) {
                        drvp->drive_flags |= ATA_DRIVE_WFUA;
                        aprint_verbose("%s WRITE DMA FUA", sep);
                        sep = ",";
                }
        }

        /* Probe NCQ support - READ/WRITE FPDMA QUEUED command support */
        ata_channel_lock(chp);
        drvp->drv_openings = 1;
        if (params.atap_sata_caps & SATA_NATIVE_CMDQ) {
                if (atac->atac_cap & ATAC_CAP_NCQ)
                        drvp->drive_flags |= ATA_DRIVE_NCQ;
                drvp->drv_openings =
                    (params.atap_queuedepth & WDC_QUEUE_DEPTH_MASK) + 1;
                aprint_verbose("%s NCQ (%d tags)", sep, drvp->drv_openings);
                sep = ",";

                if (params.atap_sata_caps & SATA_NCQ_PRIO) {
                        drvp->drive_flags |= ATA_DRIVE_NCQ_PRIO;
                        aprint_verbose(" w/PRIO");
                }
        }
        ata_channel_unlock(chp);

#if NATA_UDMA
        if ((atac->atac_cap & ATAC_CAP_UDMA) == 0) {
                /* don't care about UDMA modes */
                goto out;
        }
        if (cf_flags & ATA_CONFIG_UDMA_SET) {
                ata_channel_lock(chp);
                if ((cf_flags & ATA_CONFIG_UDMA_MODES) ==
                    ATA_CONFIG_UDMA_DISABLE) {
                        drvp->drive_flags &= ~ATA_DRIVE_UDMA;
                } else {
                        drvp->UDMA_mode = (cf_flags & ATA_CONFIG_UDMA_MODES) >>
                            ATA_CONFIG_UDMA_OFF;
                        drvp->drive_flags |= ATA_DRIVE_UDMA | ATA_DRIVE_MODE;
                }
                ata_channel_unlock(chp);
        }
#endif        /* NATA_UDMA */
out:
#endif        /* NATA_DMA */
        if (*sep != '\0')
                aprint_verbose("\n");
}

/* management of the /dev/atabus* devices */
int
atabusopen(dev_t dev, int flag, int fmt, struct lwp *l)
{
        struct atabus_softc *sc;
        int error;

        sc = device_lookup_private(&atabus_cd, minor(dev));
        if (sc == NULL)
                return (ENXIO);

        if (sc->sc_flags & ATABUSCF_OPEN)
                return (EBUSY);

        if ((error = ata_addref(sc->sc_chan)) != 0)
                return (error);

        sc->sc_flags |= ATABUSCF_OPEN;

        return (0);
}


int
atabusclose(dev_t dev, int flag, int fmt, struct lwp *l)
{
        struct atabus_softc *sc =
            device_lookup_private(&atabus_cd, minor(dev));

        ata_delref(sc->sc_chan);

        sc->sc_flags &= ~ATABUSCF_OPEN;

        return (0);
}

int
atabusioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
{
        struct atabus_softc *sc =
            device_lookup_private(&atabus_cd, minor(dev));
        struct ata_channel *chp = sc->sc_chan;
        int min_drive, max_drive, drive;
        int error;

        /*
         * Enforce write permission for ioctls that change the
         * state of the bus.  Host adapter specific ioctls must
         * be checked by the adapter driver.
         */
        switch (cmd) {
        case ATABUSIOSCAN:
        case ATABUSIODETACH:
        case ATABUSIORESET:
                if ((flag & FWRITE) == 0)
                        return (EBADF);
        }

        switch (cmd) {
        case ATABUSIORESET:
                ata_channel_lock(chp);
                ata_thread_run(sc->sc_chan, AT_WAIT | AT_POLL,
                    ATACH_TH_RESET, ATACH_NODRIVE);
                ata_channel_unlock(chp);
                return 0;
        case ATABUSIOSCAN:
        {
#if 0
                struct atabusioscan_args *a=
                    (struct atabusioscan_args *)addr;
#endif
                if ((chp->ch_drive[0].drive_type == ATA_DRIVET_OLD) ||
                    (chp->ch_drive[1].drive_type == ATA_DRIVET_OLD))
                        return (EOPNOTSUPP);
                return (EOPNOTSUPP);
        }
        case ATABUSIODETACH:
        {
                struct atabusiodetach_args *a=
                    (struct atabusiodetach_args *)addr;
                if ((chp->ch_drive[0].drive_type == ATA_DRIVET_OLD) ||
                    (chp->ch_drive[1].drive_type == ATA_DRIVET_OLD))
                        return (EOPNOTSUPP);
                switch (a->at_dev) {
                case -1:
                        min_drive = 0;
                        max_drive = 1;
                        break;
                case 0:
                case 1:
                        min_drive = max_drive = a->at_dev;
                        break;
                default:
                        return (EINVAL);
                }
                for (drive = min_drive; drive <= max_drive; drive++) {
                        if (chp->ch_drive[drive].drv_softc != NULL) {
                                error = config_detach(
                                    chp->ch_drive[drive].drv_softc, 0);
                                if (error)
                                        return (error);
                                KASSERT(chp->ch_drive[drive].drv_softc == NULL);
                        }
                }
                return 0;
        }
        default:
                return ENOTTY;
        }
}

static bool
atabus_suspend(device_t dv, const pmf_qual_t *qual)
{
        struct atabus_softc *sc = device_private(dv);
        struct ata_channel *chp = sc->sc_chan;

        ata_channel_idle(chp);

        return true;
}

static bool
atabus_resume(device_t dv, const pmf_qual_t *qual)
{
        struct atabus_softc *sc = device_private(dv);
        struct ata_channel *chp = sc->sc_chan;

        /*
         * XXX joerg: with wdc, the first channel unfreezes the controller.
         * Move this the reset and queue idling into wdc.
         */
        ata_channel_lock(chp);
        if (chp->ch_queue->queue_freeze == 0) {
                ata_channel_unlock(chp);
                goto out;
        }

        /* unfreeze the queue and reset drives */
        ata_channel_thaw_locked(chp);

        /* reset channel only if there are drives attached */
        if (chp->ch_ndrives > 0)
                ata_thread_run(chp, AT_WAIT, ATACH_TH_RESET, ATACH_NODRIVE);

        ata_channel_unlock(chp);

out:
        return true;
}

static int
atabus_rescan(device_t self, const char *ifattr, const int *locators)
{
        struct atabus_softc *sc = device_private(self);
        struct ata_channel *chp = sc->sc_chan;
        struct atabus_initq *initq;
        int i;

        /*
         * we can rescan a port multiplier atabus, even if some devices are
         * still attached
         */
        if (chp->ch_satapmp_nports == 0) {
                if (chp->atapibus != NULL) {
                        return EBUSY;
                }

                KASSERT(chp->ch_ndrives == 0 || chp->ch_drive != NULL);
                for (i = 0; i < chp->ch_ndrives; i++) {
                        if (chp->ch_drive[i].drv_softc != NULL) {
                                return EBUSY;
                        }
                }
        }

        initq = kmem_zalloc(sizeof(*initq), KM_SLEEP);
        initq->atabus_sc = sc;
        mutex_enter(&atabus_qlock);
        TAILQ_INSERT_TAIL(&atabus_initq_head, initq, atabus_initq);
        mutex_exit(&atabus_qlock);
        config_pending_incr(sc->sc_dev);

        ata_channel_lock(chp);
        chp->ch_flags |= ATACH_TH_RESCAN;
        cv_signal(&chp->ch_thr_idle);
        ata_channel_unlock(chp);

        return 0;
}

void
ata_delay(struct ata_channel *chp, int ms, const char *msg, int flags)
{
        KASSERT(mutex_owned(&chp->ch_lock));

        if ((flags & (AT_WAIT | AT_POLL)) == AT_POLL) {
                /*
                 * can't use kpause(), we may be in interrupt context
                 * or taking a crash dump
                 */
                delay(ms * 1000);
        } else {
                int pause = mstohz(ms);

                kpause(msg, false, pause > 0 ? pause : 1, &chp->ch_lock);
        }
}

void
atacmd_toncq(struct ata_xfer *xfer, uint8_t *cmd, uint16_t *count,
    uint16_t *features, uint8_t *device)
{
        if ((xfer->c_flags & C_NCQ) == 0) {
                /* FUA handling for non-NCQ drives */
                if (xfer->c_bio.flags & ATA_FUA
                    && *cmd == WDCC_WRITEDMA_EXT)
                        *cmd = WDCC_WRITEDMA_FUA_EXT;

                return;
        }

        *cmd = (xfer->c_bio.flags & ATA_READ) ?
            WDCC_READ_FPDMA_QUEUED : WDCC_WRITE_FPDMA_QUEUED;

        /* for FPDMA the block count is in features */
        *features = *count;

        /* NCQ tag */
        *count = (xfer->c_slot << 3);

        if (xfer->c_bio.flags & ATA_PRIO_HIGH)
                *count |= WDSC_PRIO_HIGH;

        /* other device flags */
        if (xfer->c_bio.flags & ATA_FUA)
                *device |= WDSD_FUA;
}

void
ata_wait_cmd(struct ata_channel *chp, struct ata_xfer *xfer)
{
        struct ata_queue *chq = chp->ch_queue;
        struct ata_command *ata_c = &xfer->c_ata_c;

        ata_channel_lock(chp);

        while ((ata_c->flags & AT_DONE) == 0)
                cv_wait(&chq->c_cmd_finish, &chp->ch_lock);

        ata_channel_unlock(chp);

        KASSERT((ata_c->flags & AT_DONE) != 0);
}














































































    8 











































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
/*        $NetBSD: chacha_sse2_impl.c,v 1.1 2020/07/25 22:49:20 riastradh Exp $        */

/*-
 * Copyright (c) 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(1, "$NetBSD: chacha_sse2_impl.c,v 1.1 2020/07/25 22:49:20 riastradh Exp $");

#include "chacha_sse2.h"

#ifdef _KERNEL
#include <x86/cpu.h>
#include <x86/fpu.h>
#else
#include <sys/sysctl.h>
#include <cpuid.h>
#include <stddef.h>
#define        fpu_kern_enter()        ((void)0)
#define        fpu_kern_leave()        ((void)0)
#endif

static void
chacha_core_sse2_impl(uint8_t out[restrict static 64],
    const uint8_t in[static 16],
    const uint8_t k[static 32],
    const uint8_t c[static 16],
    unsigned nr)
{

        fpu_kern_enter();
        chacha_core_sse2(out, in, k, c, nr);
        fpu_kern_leave();
}

static void
hchacha_sse2_impl(uint8_t out[restrict static 32],
    const uint8_t in[static 16],
    const uint8_t k[static 32],
    const uint8_t c[static 16],
    unsigned nr)
{

        fpu_kern_enter();
        hchacha_sse2(out, in, k, c, nr);
        fpu_kern_leave();
}

static void
chacha_stream_sse2_impl(uint8_t *restrict s, size_t nbytes, uint32_t blkno,
    const uint8_t nonce[static 12],
    const uint8_t key[static 32],
    unsigned nr)
{

        fpu_kern_enter();
        chacha_stream_sse2(s, nbytes, blkno, nonce, key, nr);
        fpu_kern_leave();
}

static void
chacha_stream_xor_sse2_impl(uint8_t *c, const uint8_t *p, size_t nbytes,
    uint32_t blkno,
    const uint8_t nonce[static 12],
    const uint8_t key[static 32],
    unsigned nr)
{

        fpu_kern_enter();
        chacha_stream_xor_sse2(c, p, nbytes, blkno, nonce, key, nr);
        fpu_kern_leave();
}

static void
xchacha_stream_sse2_impl(uint8_t *restrict s, size_t nbytes, uint32_t blkno,
    const uint8_t nonce[static 24],
    const uint8_t key[static 32],
    unsigned nr)
{

        fpu_kern_enter();
        xchacha_stream_sse2(s, nbytes, blkno, nonce, key, nr);
        fpu_kern_leave();
}

static void
xchacha_stream_xor_sse2_impl(uint8_t *c, const uint8_t *p, size_t nbytes,
    uint32_t blkno,
    const uint8_t nonce[static 24],
    const uint8_t key[static 32],
    unsigned nr)
{

        fpu_kern_enter();
        xchacha_stream_xor_sse2(c, p, nbytes, blkno, nonce, key, nr);
        fpu_kern_leave();
}

static int
chacha_probe_sse2(void)
{

        /* Verify that the CPU supports SSE and SSE2.  */
#ifdef _KERNEL
        if (!i386_has_sse)
                return -1;
        if (!i386_has_sse2)
                return -1;
#else
        unsigned eax, ebx, ecx, edx;
        if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx))
                return -1;
        if ((edx & bit_SSE) == 0)
                return -1;
        if ((edx & bit_SSE2) == 0)
                return -1;
#endif

        return 0;
}

const struct chacha_impl chacha_sse2_impl = {
        .ci_name = "x86 SSE2 ChaCha",
        .ci_probe = chacha_probe_sse2,
        .ci_chacha_core = chacha_core_sse2_impl,
        .ci_hchacha = hchacha_sse2_impl,
        .ci_chacha_stream = chacha_stream_sse2_impl,
        .ci_chacha_stream_xor = chacha_stream_xor_sse2_impl,
        .ci_xchacha_stream = xchacha_stream_sse2_impl,
        .ci_xchacha_stream_xor = xchacha_stream_xor_sse2_impl,
};























































  485 




















































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
/* $NetBSD: secmodel.c,v 1.2 2014/11/04 16:01:58 maxv Exp $ */
/*-
 * Copyright (c) 2011 Elad Efrat <elad@NetBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/types.h>
#include <sys/param.h>
#include <sys/errno.h>

#include <sys/atomic.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/queue.h>
#include <sys/rwlock.h>
#include <secmodel/secmodel.h>
#include <prop/proplib.h>

/* List of secmodels, parameters, and lock. */
static LIST_HEAD(, secmodel_descr) secmodels =
    LIST_HEAD_INITIALIZER(secmodels);
static unsigned int secmodel_copy_cred_on_fork = false;
static krwlock_t secmodels_lock;
static int nsecmodels = 0; /* number of registered secmodels */

static int secmodel_plug(secmodel_t);
static int secmodel_unplug(secmodel_t);

int
secmodel_nsecmodels(void)
{

        return nsecmodels;
}

void
secmodel_init(void)
{

        rw_init(&secmodels_lock);

        secmodel_copy_cred_on_fork = false;
}

/*
 * Register a new secmodel.
 */
int
secmodel_register(secmodel_t *secmodel, const char *id, const char *name,
                  prop_dictionary_t behavior,
                  secmodel_eval_t eval, secmodel_setinfo_t setinfo)
{
        int err;
        secmodel_t sm;

        sm = kmem_alloc(sizeof(*sm), KM_SLEEP);

        sm->sm_id = id;
        sm->sm_name = name;
        sm->sm_behavior = behavior;
        sm->sm_eval = eval;
        sm->sm_setinfo = setinfo;

        err = secmodel_plug(sm);
        if (err == 0) {
                atomic_inc_uint(&nsecmodels);
        } else {
                kmem_free(sm, sizeof(*sm));
                sm = NULL;
        }

        *secmodel = sm;
        return err;
}

/*
 * Deregister a secmodel.
 */
int
secmodel_deregister(secmodel_t sm)
{
        int error;

        error = secmodel_unplug(sm);
        if (error == 0) {
                atomic_dec_uint(&nsecmodels);
                kmem_free(sm, sizeof(*sm));
        }

        return error;
}

/*
 * Lookup a secmodel by its id.
 *
 * Requires "secmodels_lock" handling by the caller.
 */
static secmodel_t
secmodel_lookup(const char *id)
{
        secmodel_t tsm;

        KASSERT(rw_lock_held(&secmodels_lock));

        LIST_FOREACH(tsm, &secmodels, sm_list) {
                if (strcasecmp(tsm->sm_id, id) == 0) {
                        return tsm;
                }
        }

        return NULL;
}

/*
 * Adjust system-global secmodel behavior following the addition
 * or removal of a secmodel.
 *
 * Requires "secmodels_lock" to be held by the caller.
 */
static void
secmodel_adjust_behavior(secmodel_t sm, bool added)
{
        bool r, b;

        KASSERT(rw_write_held(&secmodels_lock));

#define        ADJUST_COUNTER(which, added)                \
        do {                                        \
                if (added) {                        \
                        (which)++;                \
                } else {                        \
                        if ((which) > 0)        \
                                (which)--;        \
                }                                \
        } while (/*CONSTCOND*/0)

        /* Copy credentials on fork? */
        r = prop_dictionary_get_bool(sm->sm_behavior, "copy-cred-on-fork", &b);
        if (r) {
                ADJUST_COUNTER(secmodel_copy_cred_on_fork, added);
        }

#undef ADJUST_COUNTER
}

static int
secmodel_plug(secmodel_t sm)
{
        secmodel_t tsm;
        int error = 0;

        if (sm == NULL)
                return EFAULT;

        /* Check if the secmodel is already present. */
        rw_enter(&secmodels_lock, RW_WRITER);
        tsm = secmodel_lookup(sm->sm_id);
        if (tsm != NULL) {
                error = EEXIST;
                goto out;
        }

        /* Add the secmodel. */
        LIST_INSERT_HEAD(&secmodels, sm, sm_list);

        /* Adjust behavior. */
        secmodel_adjust_behavior(sm, true);

 out:
        /* Unlock the secmodels list. */
        rw_exit(&secmodels_lock);

        return error;
}

static int
secmodel_unplug(secmodel_t sm)
{
        secmodel_t tsm;
        int error = 0;

        if (sm == NULL)
                return EFAULT;

        /* Make sure the secmodel is present. */
        rw_enter(&secmodels_lock, RW_WRITER);
        tsm = secmodel_lookup(sm->sm_id);
        if (tsm == NULL) {
                error = ENOENT;
                goto out;
        }

        /* Remove the secmodel. */
        LIST_REMOVE(tsm, sm_list);

        /* Adjust behavior. */
        secmodel_adjust_behavior(tsm, false);

 out:
        /* Unlock the secmodels list. */
        rw_exit(&secmodels_lock);

        return error;
}

/* XXX TODO */
int
secmodel_setinfo(const char *id, void *v, int *err)
{

        return EOPNOTSUPP;
}

int
secmodel_eval(const char *id, const char *what, void *arg, void *ret)
{
        secmodel_t sm;
        int error = 0;

        rw_enter(&secmodels_lock, RW_READER);
        sm = secmodel_lookup(id);
        if (sm == NULL) {
                error = EINVAL;
                goto out;
        }

        if (sm->sm_eval == NULL) {
                error = ENOENT;
                goto out;
        }

        if (ret == NULL) {
                error = EFAULT;
                goto out;
        }

        error = sm->sm_eval(what, arg, ret);
        /* pass error from a secmodel(9) callback as a negative value */
        error = -error;

 out:
        rw_exit(&secmodels_lock);

        return error;
}

































































    1 
    1 
    1 
    1 


















































    1 



    1 














































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
/* $NetBSD: wsfontdev.c,v 1.20 2022/05/12 23:17:42 uwe Exp $ */

/*
 * Copyright (c) 2001
 *         Matthias Drochner.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions, and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: wsfontdev.c,v 1.20 2022/05/12 23:17:42 uwe Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/fcntl.h>
#include <sys/ioctl.h>
#include <sys/malloc.h>
#include <sys/event.h>

#include <dev/wsfont/wsfont.h>
#include <dev/wscons/wsconsio.h> /* XXX */

#include "ioconf.h"

#ifdef WSFONT_DEBUG
#define DPRINTF printf
#else
#define DPRINTF while (0) printf
#endif
static int wsfont_isopen;


void
wsfontattach(int n)
{

        wsfont_init();
}

static int
wsfontopen(dev_t dev, int flag, int mode,
    struct lwp *l)
{

        if (wsfont_isopen)
                return (EBUSY);
        wsfont_isopen = 1;
        return (0);
}

static int
wsfontclose(dev_t dev, int flag, int mode,
    struct lwp *l)
{

        wsfont_isopen = 0;
        return (0);
}

static void
fontmatchfunc(struct wsdisplay_font *f, void *cookie, int fontcookie)
{
        struct wsdisplayio_fontinfo *fi = cookie;
        struct wsdisplayio_fontdesc fd;
        int offset;

        DPRINTF("%s %dx%d\n", f->name, f->fontwidth, f->fontheight);
        if (fi->fi_fonts != NULL && fi->fi_buffersize > 0) {
                memset(&fd, 0, sizeof(fd));
                strncpy(fd.fd_name, f->name, sizeof(fd.fd_name) - 1);
                fd.fd_width = f->fontwidth;
                fd.fd_height = f->fontheight;
                offset = sizeof(struct wsdisplayio_fontdesc) * (fi->fi_numentries + 1);
                if (offset > fi->fi_buffersize) {
                        fi->fi_fonts = NULL;
                } else
                        copyout(&fd, &fi->fi_fonts[fi->fi_numentries],
                            sizeof(struct wsdisplayio_fontdesc));
        }
        fi->fi_numentries++;
}

static int
wsdisplayio_listfonts(struct wsdisplayio_fontinfo *f)
{
        void *addr = f->fi_fonts;
        DPRINTF("%s: %d %d\n", __func__, f->fi_buffersize, f->fi_numentries);
        f->fi_numentries = 0;
        wsfont_walk(fontmatchfunc, f);
        /* check if we ran out of buffer space */
        if (f->fi_fonts == NULL && addr != NULL) return ENOMEM;
        return 0;
}

static int
wsfontioctl(dev_t dev, u_long cmd, void *data, int flag,
    struct lwp *l)
{
        char nbuf[64];
        void *buf;
        int res;

        switch (cmd) {
        case WSDISPLAYIO_LDFONT:
#define d ((struct wsdisplay_font *)data)
                if ((flag & FWRITE) == 0)
                        return EPERM;

                if (d->name) {
                        res = copyinstr(d->name, nbuf, sizeof(nbuf), 0);
                        if (res)
                                return (res);
                        d->name = nbuf;
                } else
                        d->name = "loaded"; /* ??? */
                buf = malloc(d->fontheight * d->stride * d->numchars,
                             M_DEVBUF, M_WAITOK);
                res = copyin(d->data, buf,
                             d->fontheight * d->stride * d->numchars);
                if (res) {
                        free(buf, M_DEVBUF);
                        return (res);
                }
                d->data = buf;
                res = wsfont_add(d, 1);
                free(buf, M_DEVBUF);
#undef d
                return (res);
        case WSDISPLAYIO_LISTFONTS:
                return wsdisplayio_listfonts(data);
        default:
                return (EINVAL);
        }
}

const struct cdevsw wsfont_cdevsw = {
        .d_open = wsfontopen,
        .d_close = wsfontclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = wsfontioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};



























































































































































































































  119 
   36 


   88 
































  422 


  420 








  420 

































  318 



  320 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
/*        $NetBSD: kern_syscall.c,v 1.21 2020/08/31 19:51:30 christos Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software developed for The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_syscall.c,v 1.21 2020/08/31 19:51:30 christos Exp $");

#ifdef _KERNEL_OPT
#include "opt_modular.h"
#include "opt_syscall_debug.h"
#include "opt_ktrace.h"
#include "opt_ptrace.h"
#include "opt_dtrace.h"
#endif

/* XXX To get syscall prototypes. */
#define SYSVSHM
#define SYSVSEM
#define SYSVMSG

#include <sys/param.h>
#include <sys/module.h>
#include <sys/sched.h>
#include <sys/syscall.h>
#include <sys/syscallargs.h>
#include <sys/syscallvar.h>
#include <sys/systm.h>
#include <sys/xcall.h>
#include <sys/ktrace.h>
#include <sys/ptrace.h>

int
sys_nomodule(struct lwp *l, const void *v, register_t *retval)
{
#ifdef MODULAR

        const struct sysent *sy;
        const struct emul *em;
        const struct sc_autoload *auto_list;
        u_int code;

        /*
         * Restart the syscall if we interrupted a module unload that
         * failed.  Acquiring kernconfig_lock delays us until any unload
         * has been completed or rolled back.
         */
        kernconfig_lock();
        sy = l->l_sysent;
        if (sy->sy_call != sys_nomodule) {
                kernconfig_unlock();
                return ERESTART;
        }
        /*
         * Try to autoload a module to satisfy the request.  If it 
         * works, retry the request.
         */
        em = l->l_proc->p_emul;
        code = sy - em->e_sysent;

        if ((auto_list = em->e_sc_autoload) != NULL)
                for (; auto_list->al_code > 0; auto_list++) {
                        if (auto_list->al_code != code) {
                                continue;
                        }
                        if (module_autoload(auto_list->al_module,
                            MODULE_CLASS_ANY) != 0 ||
                            sy->sy_call == sys_nomodule) {
                                    break;
                        }
                        kernconfig_unlock();
                        return ERESTART;
                }
        kernconfig_unlock();
#endif        /* MODULAR */

        return sys_nosys(l, v, retval);
}

int
syscall_establish(const struct emul *em, const struct syscall_package *sp)
{
        struct sysent *sy;
        int i;

        KASSERT(kernconfig_is_held());

        if (em == NULL) {
                em = &emul_netbsd;
        }
        sy = em->e_sysent;

        /*
         * Ensure that all preconditions are valid, since this is
         * an all or nothing deal.  Once a system call is entered,
         * it can become busy and we could be unable to remove it
         * on error.
         */
        for (i = 0; sp[i].sp_call != NULL; i++) {
                if (sp[i].sp_code >= SYS_NSYSENT)
                        return EINVAL;
                if (sy[sp[i].sp_code].sy_call != sys_nomodule &&
                    sy[sp[i].sp_code].sy_call != sys_nosys) {
#ifdef DIAGNOSTIC
                        printf("syscall %d is busy\n", sp[i].sp_code);
#endif
                        return EBUSY;
                }
        }
        /* Everything looks good, patch them in. */
        for (i = 0; sp[i].sp_call != NULL; i++) {
                sy[sp[i].sp_code].sy_call = sp[i].sp_call;
        }

        return 0;
}

int
syscall_disestablish(const struct emul *em, const struct syscall_package *sp)
{
        struct sysent *sy;
        const uint32_t *sb;
        lwp_t *l;
        int i;

        KASSERT(kernconfig_is_held());

        if (em == NULL) {
                em = &emul_netbsd;
        }
        sy = em->e_sysent;
        sb = em->e_nomodbits;

        /*
         * First, patch the system calls to sys_nomodule or sys_nosys
         * to gate further activity.
         */
        for (i = 0; sp[i].sp_call != NULL; i++) {
                KASSERT(sy[sp[i].sp_code].sy_call == sp[i].sp_call);
                sy[sp[i].sp_code].sy_call =
                    sb[sp[i].sp_code / 32] & (1 << (sp[i].sp_code % 32)) ?
                      sys_nomodule : sys_nosys;
        }

        /*
         * Run a cross call to cycle through all CPUs.  This does two
         * things: lock activity provides a barrier and makes our update
         * of sy_call visible to all CPUs, and upon return we can be sure
         * that we see pertinent values of l_sysent posted by remote CPUs.
         */
        xc_barrier(0);

        /*
         * Now it's safe to check l_sysent.  Run through all LWPs and see
         * if anyone is still using the system call.
         */
        for (i = 0; sp[i].sp_call != NULL; i++) {
                mutex_enter(&proc_lock);
                LIST_FOREACH(l, &alllwp, l_list) {
                        if (l->l_sysent == &sy[sp[i].sp_code]) {
                                break;
                        }
                }
                mutex_exit(&proc_lock);
                if (l == NULL) {
                        continue;
                }
                /*
                 * We lose: one or more calls are still in use.  Put back
                 * the old entrypoints and act like nothing happened.
                 * When we drop kernconfig_lock, any system calls held in
                 * sys_nomodule() will be restarted.
                 */
                for (i = 0; sp[i].sp_call != NULL; i++) {
                        sy[sp[i].sp_code].sy_call = sp[i].sp_call;
                }
                return EBUSY;
        }

        return 0;
}

/*
 * Return true if system call tracing is enabled for the specified process.
 */
bool
trace_is_enabled(struct proc *p)
{
#ifdef SYSCALL_DEBUG
        return (true);
#endif
#ifdef KTRACE
        if (ISSET(p->p_traceflag, (KTRFAC_SYSCALL | KTRFAC_SYSRET)))
                return (true);
#endif
#ifdef PTRACE
        if (ISSET(p->p_slflag, PSL_SYSCALL))
                return (true);
#endif

        return (false);
}

/*
 * Start trace of particular system call. If process is being traced,
 * this routine is called by MD syscall dispatch code just before
 * a system call is actually executed.
 */
int
trace_enter(register_t code, const struct sysent *sy, const void *args)
{
        int error = 0;
#if defined(PTRACE) || defined(KDTRACE_HOOKS)
        struct proc *p = curlwp->l_proc;
#endif

#ifdef KDTRACE_HOOKS
        if (sy->sy_entry) {
                struct emul *e = p->p_emul;
                if (e->e_dtrace_syscall)
                        (*e->e_dtrace_syscall)(sy->sy_entry, code, sy, args,
                            NULL, 0);
        }
#endif

#ifdef SYSCALL_DEBUG
        scdebug_call(code, args);
#endif /* SYSCALL_DEBUG */

        ktrsyscall(code, args, sy->sy_narg);

#ifdef PTRACE
        if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED)) ==
            (PSL_SYSCALL|PSL_TRACED)) {
                proc_stoptrace(TRAP_SCE, code, args, NULL, 0);
                if (curlwp->l_proc->p_slflag & PSL_SYSCALLEMU) {
                        /* tracer will emulate syscall for us */
                        error = EJUSTRETURN;
                }
        }
#endif
        return error;
}

/*
 * End trace of particular system call. If process is being traced,
 * this routine is called by MD syscall dispatch code just after
 * a system call finishes.
 * MD caller guarantees the passed 'code' is within the supported
 * system call number range for emulation the process runs under.
 */
void
trace_exit(register_t code, const struct sysent *sy, const void *args,
    register_t rval[], int error)
{
#if defined(PTRACE) || defined(KDTRACE_HOOKS)
        struct proc *p = curlwp->l_proc;
#endif

#ifdef KDTRACE_HOOKS
        if (sy->sy_return) {
                struct emul *e = p->p_emul;
                if (e->e_dtrace_syscall)
                        (*p->p_emul->e_dtrace_syscall)(sy->sy_return, code, sy,
                            args, rval, error);
        }
#endif

#ifdef SYSCALL_DEBUG
        scdebug_ret(code, error, rval);
#endif /* SYSCALL_DEBUG */

        ktrsysret(code, error, rval);
        
#ifdef PTRACE
        if ((p->p_slflag & (PSL_SYSCALL|PSL_TRACED|PSL_SYSCALLEMU)) ==
            (PSL_SYSCALL|PSL_TRACED)) {
                proc_stoptrace(TRAP_SCX, code, args, rval, error);
        }
        CLR(p->p_slflag, PSL_SYSCALLEMU);
#endif
}










































































































































   29 


   29 

   28 



    1 





    1 










    1 











    6 








    6 




    6 







    2 

    2 






















    5 




    5 

    2 

    2 


    3 
    1 
    3 

    3 
    2 

    2 




    2 

    2 
    3 











    2 

    2 
    2 






















    2 











    1 



    1 

    1 

    1 


    1 






























    1 






    3 

















   20 












    2 




    3 



    1 




    3 
    2 



    1 








    1 





    3 



    1 






   17 






    3 










    2 




    2 








    1 

    1 


    1 







    1 



















































































































    3 

























    2 





































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
/* $NetBSD: kern_drvctl.c,v 1.51 2022/03/28 12:33:22 riastradh Exp $ */

/*
 * Copyright (c) 2004
 *         Matthias Drochner.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions, and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_drvctl.c,v 1.51 2022/03/28 12:33:22 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/event.h>
#include <sys/kmem.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/select.h>
#include <sys/poll.h>
#include <sys/drvctlio.h>
#include <sys/devmon.h>
#include <sys/stat.h>
#include <sys/kauth.h>
#include <sys/lwp.h>
#include <sys/module.h>

#include "ioconf.h"

struct drvctl_event {
        TAILQ_ENTRY(drvctl_event) dce_link;
        prop_dictionary_t        dce_event;
};

TAILQ_HEAD(drvctl_queue, drvctl_event);

static struct drvctl_queue        drvctl_eventq;                /* FIFO */
static kcondvar_t                drvctl_cond;
static kmutex_t                        drvctl_lock;
static int                        drvctl_nopen = 0, drvctl_eventcnt = 0;
static struct selinfo                drvctl_rdsel;

#define DRVCTL_EVENTQ_DEPTH        64        /* arbitrary queue limit */

dev_type_open(drvctlopen);

const struct cdevsw drvctl_cdevsw = {
        .d_open = drvctlopen,
        .d_close = nullclose,
        .d_read = nullread,
        .d_write = nullwrite,
        .d_ioctl = noioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

static int        drvctl_read(struct file *, off_t *, struct uio *,
                            kauth_cred_t, int);
static int        drvctl_write(struct file *, off_t *, struct uio *,
                             kauth_cred_t, int);
static int        drvctl_ioctl(struct file *, u_long, void *);
static int        drvctl_poll(struct file *, int);
static int        drvctl_stat(struct file *, struct stat *);
static int        drvctl_close(struct file *);

static const struct fileops drvctl_fileops = {
        .fo_name = "drvctl",
        .fo_read = drvctl_read,
        .fo_write = drvctl_write,
        .fo_ioctl = drvctl_ioctl,
        .fo_fcntl = fnullop_fcntl,
        .fo_poll = drvctl_poll,
        .fo_stat = drvctl_stat,
        .fo_close = drvctl_close,
        .fo_kqfilter = fnullop_kqfilter,
        .fo_restart = fnullop_restart,
};

#define MAXLOCATORS 100

static int (*saved_insert_vec)(const char *, prop_dictionary_t) = NULL;

static int drvctl_command(struct lwp *, struct plistref *, u_long, int);
static int drvctl_getevent(struct lwp *, struct plistref *, u_long, int);

void
drvctl_init(void)
{
        TAILQ_INIT(&drvctl_eventq);
        mutex_init(&drvctl_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&drvctl_cond, "devmon");
        selinit(&drvctl_rdsel);
}

void
drvctl_fini(void)
{

        seldestroy(&drvctl_rdsel);
        cv_destroy(&drvctl_cond);
        mutex_destroy(&drvctl_lock);
}

int
devmon_insert(const char *event, prop_dictionary_t ev)
{
        struct drvctl_event *dce, *odce;

        mutex_enter(&drvctl_lock);

        if (drvctl_nopen == 0) {
                prop_object_release(ev);
                mutex_exit(&drvctl_lock);
                return 0;
        }

        /* Fill in mandatory member */
        if (!prop_dictionary_set_string_nocopy(ev, "event", event)) {
                prop_object_release(ev);
                mutex_exit(&drvctl_lock);
                return 0;
        }

        dce = kmem_alloc(sizeof(*dce), KM_SLEEP);
        dce->dce_event = ev;

        if (drvctl_eventcnt == DRVCTL_EVENTQ_DEPTH) {
                odce = TAILQ_FIRST(&drvctl_eventq);
                TAILQ_REMOVE(&drvctl_eventq, odce, dce_link);
                prop_object_release(odce->dce_event);
                kmem_free(odce, sizeof(*odce));
                --drvctl_eventcnt;
        }

        TAILQ_INSERT_TAIL(&drvctl_eventq, dce, dce_link);
        ++drvctl_eventcnt;
        cv_broadcast(&drvctl_cond);
        selnotify(&drvctl_rdsel, 0, 0);

        mutex_exit(&drvctl_lock);
        return 0;
}

int
drvctlopen(dev_t dev, int flags, int mode, struct lwp *l)
{
        struct file *fp;
        int fd;
        int ret;

        ret = fd_allocfile(&fp, &fd);
        if (ret)
                return ret;

        /* XXX setup context */
        mutex_enter(&drvctl_lock);
        ret = fd_clone(fp, fd, flags, &drvctl_fileops, /* context */NULL);
        ++drvctl_nopen;
        mutex_exit(&drvctl_lock);

        return ret;
}

static int
pmdevbyname(u_long cmd, struct devpmargs *a)
{
        device_t d;

        KASSERT(KERNEL_LOCKED_P());

        if ((d = device_find_by_xname(a->devname)) == NULL)
                return ENXIO;

        switch (cmd) {
        case DRVSUSPENDDEV:
                return pmf_device_recursive_suspend(d, PMF_Q_DRVCTL) ? 0 : EBUSY;
        case DRVRESUMEDEV:
                if (a->flags & DEVPM_F_SUBTREE) {
                        return pmf_device_subtree_resume(d, PMF_Q_DRVCTL)
                            ? 0 : EBUSY;
                } else {
                        return pmf_device_recursive_resume(d, PMF_Q_DRVCTL)
                            ? 0 : EBUSY;
                }
        default:
                return EPASSTHROUGH;
        }
}

static int
listdevbyname(struct devlistargs *l)
{
        device_t d, child;
        deviter_t di;
        int cnt = 0, idx, error = 0;

        KASSERT(KERNEL_LOCKED_P());

        if (*l->l_devname == '\0')
                d = NULL;
        else if (memchr(l->l_devname, 0, sizeof(l->l_devname)) == NULL)
                return EINVAL;
        else if ((d = device_find_by_xname(l->l_devname)) == NULL)
                return ENXIO;

        for (child = deviter_first(&di, 0); child != NULL;
             child = deviter_next(&di)) {
                if (device_parent(child) != d)
                        continue;
                idx = cnt++;
                if (l->l_childname == NULL || idx >= l->l_children)
                        continue;
                error = copyoutstr(device_xname(child), l->l_childname[idx],
                                sizeof(l->l_childname[idx]), NULL);
                if (error != 0)
                        break;
        }
        deviter_release(&di);

        l->l_children = cnt;
        return error;
}

static int
detachdevbyname(const char *devname)
{
        device_t d;
        deviter_t di;
        int error;

        KASSERT(KERNEL_LOCKED_P());

        for (d = deviter_first(&di, DEVITER_F_RW);
             d != NULL;
             d = deviter_next(&di)) {
                if (strcmp(device_xname(d), devname) == 0)
                        break;
        }
        if (d == NULL) {
                error = ENXIO;
                goto out;
        }

#ifndef XXXFULLRISK
        /*
         * If the parent cannot be notified, it might keep
         * pointers to the detached device.
         * There might be a private notification mechanism,
         * but better play it safe here.
         */
        if (device_parent(d) &&
            !device_cfattach(device_parent(d))->ca_childdetached) {
                error = ENOTSUP;
                goto out;
        }
#endif

        error = config_detach(d, 0);
out:        deviter_release(&di);
        return error;
}

static int
rescanbus(const char *busname, const char *ifattr,
          int numlocators, const int *locators)
{
        int i, rc;
        device_t d;
        const struct cfiattrdata * const *ap;

        KASSERT(KERNEL_LOCKED_P());

        /* XXX there should be a way to get limits and defaults (per device)
           from config generated data */
        int locs[MAXLOCATORS];
        for (i = 0; i < MAXLOCATORS; i++)
                locs[i] = -1;

        for (i = 0; i < numlocators;i++)
                locs[i] = locators[i];

        if ((d = device_find_by_xname(busname)) == NULL)
                return ENXIO;

        /*
         * must support rescan, and must have something
         * to attach to
         */
        if (!device_cfattach(d)->ca_rescan ||
            !device_cfdriver(d)->cd_attrs)
                return ENODEV;

        /* rescan all ifattrs if none is specified */
        if (!ifattr) {
                rc = 0;
                for (ap = device_cfdriver(d)->cd_attrs; *ap; ap++) {
                        rc = (*device_cfattach(d)->ca_rescan)(d,
                            (*ap)->ci_name, locs);
                        if (rc)
                                break;
                }
        } else {
                /* check for valid attribute passed */
                for (ap = device_cfdriver(d)->cd_attrs; *ap; ap++)
                        if (!strcmp((*ap)->ci_name, ifattr))
                                break;
                if (!*ap)
                        return EINVAL;
                rc = (*device_cfattach(d)->ca_rescan)(d, ifattr, locs);
        }

        config_deferred(NULL);
        return rc;
}

static int
drvctl_read(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
    int flags)
{
        return ENODEV;
}

static int
drvctl_write(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
    int flags)
{
        return ENODEV;
}

static int
drvctl_ioctl(struct file *fp, u_long cmd, void *data)
{
        int res;
        char *ifattr;
        int *locs;
        size_t locs_sz = 0; /* XXXgcc */

        KERNEL_LOCK(1, NULL);
        switch (cmd) {
        case DRVSUSPENDDEV:
        case DRVRESUMEDEV:
#define d ((struct devpmargs *)data)
                res = pmdevbyname(cmd, d);
#undef d
                break;
        case DRVLISTDEV:
                res = listdevbyname((struct devlistargs *)data);
                break;
        case DRVDETACHDEV:
#define d ((struct devdetachargs *)data)
                res = detachdevbyname(d->devname);
#undef d
                break;
        case DRVRESCANBUS:
#define d ((struct devrescanargs *)data)
                d->busname[sizeof(d->busname) - 1] = '\0';

                /* XXX better copyin? */
                if (d->ifattr[0]) {
                        d->ifattr[sizeof(d->ifattr) - 1] = '\0';
                        ifattr = d->ifattr;
                } else
                        ifattr = 0;

                if (d->numlocators) {
                        if (d->numlocators > MAXLOCATORS) {
                                res = EINVAL;
                                goto out;
                        }
                        locs_sz = d->numlocators * sizeof(int);
                        locs = kmem_alloc(locs_sz, KM_SLEEP);
                        res = copyin(d->locators, locs, locs_sz);
                        if (res) {
                                kmem_free(locs, locs_sz);
                                goto out;
                        }
                } else
                        locs = NULL;
                res = rescanbus(d->busname, ifattr, d->numlocators, locs);
                if (locs)
                        kmem_free(locs, locs_sz);
#undef d
                break;
        case DRVCTLCOMMAND:
                    res = drvctl_command(curlwp, (struct plistref *)data, cmd,
                    fp->f_flag);
                    break;
        case DRVGETEVENT:
                res = drvctl_getevent(curlwp, (struct plistref *)data, cmd,
                    fp->f_flag);
                break;
        default:
                res = EPASSTHROUGH;
                break;
        }
out:        KERNEL_UNLOCK_ONE(NULL);
        return res;
}

static int
drvctl_stat(struct file *fp, struct stat *st)
{
        (void)memset(st, 0, sizeof(*st));
        st->st_uid = kauth_cred_geteuid(fp->f_cred);
        st->st_gid = kauth_cred_getegid(fp->f_cred);
        return 0;
}

static int
drvctl_poll(struct file *fp, int events)
{
        int revents = 0;

        if (!TAILQ_EMPTY(&drvctl_eventq))
                revents |= events & (POLLIN | POLLRDNORM);
        else
                selrecord(curlwp, &drvctl_rdsel);

        return revents;
}

static int
drvctl_close(struct file *fp)
{
        struct drvctl_event *dce;

        /* XXX free context */
        mutex_enter(&drvctl_lock);
        KASSERT(drvctl_nopen > 0);
        --drvctl_nopen;
        if (drvctl_nopen == 0) {
                /* flush queue */
                while ((dce = TAILQ_FIRST(&drvctl_eventq)) != NULL) {
                        TAILQ_REMOVE(&drvctl_eventq, dce, dce_link);
                        KASSERT(drvctl_eventcnt > 0);
                        --drvctl_eventcnt;
                        prop_object_release(dce->dce_event);
                        kmem_free(dce, sizeof(*dce));
                }
        }
        mutex_exit(&drvctl_lock);

        return 0;
}

void
drvctlattach(int arg __unused)
{
}

/*****************************************************************************
 * Driver control command processing engine
 *****************************************************************************/

static int
drvctl_command_get_properties(struct lwp *l,
                              prop_dictionary_t command_dict,
                              prop_dictionary_t results_dict)
{
        prop_dictionary_t args_dict;
        prop_string_t devname_string;
        device_t dev;
        deviter_t di;
        
        args_dict = prop_dictionary_get(command_dict, "drvctl-arguments");
        if (args_dict == NULL)
                return EINVAL;

        devname_string = prop_dictionary_get(args_dict, "device-name");
        if (devname_string == NULL)
                return EINVAL;
        
        for (dev = deviter_first(&di, 0); dev != NULL;
             dev = deviter_next(&di)) {
                if (prop_string_equals_string(devname_string,
                                               device_xname(dev))) {
                        prop_dictionary_set(results_dict, "drvctl-result-data",
                            device_properties(dev));
                        break;
                }
        }

        deviter_release(&di);

        if (dev == NULL)
                return ESRCH;

        return 0;
}

struct drvctl_command_desc {
        const char *dcd_name;                /* command name */
        int (*dcd_func)(struct lwp *,        /* handler function */
                        prop_dictionary_t,
                        prop_dictionary_t);
        int dcd_rw;                        /* read or write required */
};

static const struct drvctl_command_desc drvctl_command_table[] = {
        { .dcd_name = "get-properties",
          .dcd_func = drvctl_command_get_properties,
          .dcd_rw   = FREAD,
        },

        { .dcd_name = NULL }
};

static int
drvctl_command(struct lwp *l, struct plistref *pref, u_long ioctl_cmd,
               int fflag)
{
        prop_dictionary_t command_dict, results_dict;
        prop_string_t command_string;
        const struct drvctl_command_desc *dcd;
        int error;

        error = prop_dictionary_copyin_ioctl(pref, ioctl_cmd, &command_dict);
        if (error)
                return error;

        results_dict = prop_dictionary_create();
        if (results_dict == NULL) {
                prop_object_release(command_dict);
                return ENOMEM;
        }
        
        command_string = prop_dictionary_get(command_dict, "drvctl-command");
        if (command_string == NULL) {
                error = EINVAL;
                goto out;
        }

        for (dcd = drvctl_command_table; dcd->dcd_name != NULL; dcd++) {
                if (prop_string_equals_string(command_string,
                                              dcd->dcd_name))
                        break;
        }

        if (dcd->dcd_name == NULL) {
                error = EINVAL;
                goto out;
        }

        if ((fflag & dcd->dcd_rw) == 0) {
                error = EPERM;
                goto out;
        }

        error = (*dcd->dcd_func)(l, command_dict, results_dict);

        prop_dictionary_set_int32(results_dict, "drvctl-error", error);

        error = prop_dictionary_copyout_ioctl(pref, ioctl_cmd, results_dict);
 out:
        prop_object_release(command_dict);
        prop_object_release(results_dict);
        return error;
}

static int
drvctl_getevent(struct lwp *l, struct plistref *pref, u_long ioctl_cmd,
                int fflag)
{
        struct drvctl_event *dce;
        int ret;

        if ((fflag & (FREAD|FWRITE)) != (FREAD|FWRITE))
                return EPERM;

        mutex_enter(&drvctl_lock);
        while ((dce = TAILQ_FIRST(&drvctl_eventq)) == NULL) {
                if (fflag & O_NONBLOCK) {
                        mutex_exit(&drvctl_lock);
                        return EWOULDBLOCK;
                }

                ret = cv_wait_sig(&drvctl_cond, &drvctl_lock);
                if (ret) {
                        mutex_exit(&drvctl_lock);
                        return ret;
                }
        }
        TAILQ_REMOVE(&drvctl_eventq, dce, dce_link);
        KASSERT(drvctl_eventcnt > 0);
        --drvctl_eventcnt;
        mutex_exit(&drvctl_lock);

        ret = prop_dictionary_copyout_ioctl(pref, ioctl_cmd, dce->dce_event);

        prop_object_release(dce->dce_event);
        kmem_free(dce, sizeof(*dce));

        return ret;
}

/*
 * Module glue
 */

MODULE(MODULE_CLASS_DRIVER, drvctl, NULL);

int
drvctl_modcmd(modcmd_t cmd, void *arg)
{
        int error;
#ifdef _MODULE
        int bmajor, cmajor;
#endif

        error = 0;
        switch (cmd) {
        case MODULE_CMD_INIT:
                drvctl_init();

                mutex_enter(&drvctl_lock);
#ifdef _MODULE
                bmajor = cmajor = -1;
                error = devsw_attach("drvctl", NULL, &bmajor,
                    &drvctl_cdevsw, &cmajor);
#endif
                if (error == 0) {
                        KASSERT(saved_insert_vec == NULL);
                        saved_insert_vec = devmon_insert_vec;
                        devmon_insert_vec = devmon_insert;
                }

                mutex_exit(&drvctl_lock);
                break;

        case MODULE_CMD_FINI:
                mutex_enter(&drvctl_lock);
                if (drvctl_nopen != 0 || drvctl_eventcnt != 0 ) {
                        mutex_exit(&drvctl_lock);
                        return EBUSY;
                }
                KASSERT(saved_insert_vec != NULL);
                devmon_insert_vec = saved_insert_vec;
                saved_insert_vec = NULL;
#ifdef _MODULE
                devsw_detach(NULL, &drvctl_cdevsw);
#endif
                mutex_exit(&drvctl_lock);
                drvctl_fini();

                break;
        default:
                error = ENOTTY;
                break;
        }

        return error;
}




































































































































































































































































































    2 

  420 
  421 




























































































    3 





































































































    3 












































    3 









    3 














    1 





























































    1 





    1 





    1 



    1 















































    3 




    3 

















    3 

















    3 
    3 









    3 














































    3 

    3 
    3 

    3 


    3 


    3 

















    3 
    3 

    3 
    3 
    3 
    3 









































    3 











    3 


















































































































    3 







    3 


























    3 










    3 




















































































    1 





































    3 































    3 
    3 
    3 



    3 




















    1 











   95 








    1 










    1 




































































    1 














































    1 




















































    1 






    1 
    1 







































    1 











    1 

    1 
    1 



    1 

















    1 
    1 
    1 
    1 







    1 













    1 
    1 
    1 


    1 










    1 



    1 

    1 


























































    1 












    1 





    1 









    1 










    1 





    1 
























    1 














    1 















    2 




    2 



    2 
    1 

    2 
    2 



































































    4 










    3 




    2 


    2 

    1 




    1 




    4 















    3 





    1 
    1 




    1 




    1 






   43 






    3 











    3 

    3 








    1 







    1 



















   64 






   63 


   62 












   29 
















   30 



   28 
   17 

   17 




   28 





























    2 


    2 







    3 



























    3 
    3 










    3 



















    3 





    3 
    3 


    3 



    3 
    3 




    2 































    3 
    3 










   12 



   11 







































   51 




   51 


   51 
   50 


   51 






   51 



   51 
   51 


   50 












   51 



   51 









































































    9 












    9 













    9 
































    9 

    9 


    9 



    9 

















































































































































































































    1 















    1 


    1 




    1 
    1 
    1 
    1 








    1 







    1 











    1 


















































































































































































































































   13 







   13 






























































































    5 






    5 



    5 



    5 







    5 
    5 










   84 


   68 










   77 














   29 












   64 



   61 

   64 



































   37 
   39 








































































































    1 


































































































































































































































































































   18 








   18 





   13 

   13 

    9 

    9 







    9 
    9 





    9 


    9 




    2 






    2 




    2 




    2 






    4 




    3 





























































































































    5 






    4 



    3 

    4 




    4 
























    4 

    4 



    4 
    1 


    3 



    1 




    2 

    3 

    3 



    3 



    2 
    2 


    1 













   12 




   13 
   13 
   13 







   13 


   13 




   13 









   24 




   24 









   24 




   24 






















   24 





























































































































































   13 









   13 



















    3 







    3 







    3 







    3 







    3 










    3 






























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
/*        $NetBSD: if.c,v 1.520 2022/08/21 12:34:39 skrll Exp $        */

/*-
 * Copyright (c) 1999, 2000, 2001, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by William Studenmund and Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1980, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)if.c        8.5 (Berkeley) 1/9/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if.c,v 1.520 2022/08/21 12:34:39 skrll Exp $");

#if defined(_KERNEL_OPT)
#include "opt_inet.h"
#include "opt_ipsec.h"
#include "opt_atalk.h"
#include "opt_wlan.h"
#include "opt_net_mpsafe.h"
#include "opt_mrouting.h"
#endif

#include <sys/param.h>
#include <sys/mbuf.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/proc.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/kernel.h>
#include <sys/ioctl.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/xcall.h>
#include <sys/cpu.h>
#include <sys/intr.h>
#include <sys/module_hook.h>
#include <sys/compat_stub.h>
#include <sys/msan.h>
#include <sys/hook.h>

#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_ether.h>
#include <net/if_media.h>
#include <net80211/ieee80211.h>
#include <net80211/ieee80211_ioctl.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/netisr.h>
#include <sys/module.h>
#ifdef NETATALK
#include <netatalk/at_extern.h>
#include <netatalk/at.h>
#endif
#include <net/pfil.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/ip_encap.h>
#include <net/bpf.h>

#ifdef INET6
#include <netinet6/in6_var.h>
#include <netinet6/nd6.h>
#endif

#include "ether.h"

#include "bridge.h"
#if NBRIDGE > 0
#include <net/if_bridgevar.h>
#endif

#include "carp.h"
#if NCARP > 0
#include <netinet/ip_carp.h>
#endif

#include <compat/sys/sockio.h>

MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");

/*
 * XXX reusing (ifp)->if_snd->ifq_lock rather than having another spin mutex
 * for each ifnet.  It doesn't matter because:
 * - if IFEF_MPSAFE is enabled, if_snd isn't used and lock contentions on
 *   ifq_lock don't happen
 * - if IFEF_MPSAFE is disabled, there is no lock contention on ifq_lock
 *   because if_snd, if_link_state_change and if_link_state_change_process
 *   are all called with KERNEL_LOCK
 */
#define IF_LINK_STATE_CHANGE_LOCK(ifp)                \
        mutex_enter((ifp)->if_snd.ifq_lock)
#define IF_LINK_STATE_CHANGE_UNLOCK(ifp)        \
        mutex_exit((ifp)->if_snd.ifq_lock)

/*
 * Global list of interfaces.
 */
/* DEPRECATED. Remove it once kvm(3) users disappeared */
struct ifnet_head                ifnet_list;

struct pslist_head                ifnet_pslist;
static ifnet_t **                ifindex2ifnet = NULL;
static u_int                        if_index = 1;
static size_t                        if_indexlim = 0;
static uint64_t                        index_gen;
/* Mutex to protect the above objects. */
kmutex_t                        ifnet_mtx __cacheline_aligned;
static struct psref_class        *ifnet_psref_class __read_mostly;
static pserialize_t                ifnet_psz;
static struct workqueue                *ifnet_link_state_wq __read_mostly;

static struct workqueue                *if_slowtimo_wq __read_mostly;

static kmutex_t                        if_clone_mtx;

struct ifnet *lo0ifp;
int        ifqmaxlen = IFQ_MAXLEN;

struct psref_class                *ifa_psref_class __read_mostly;

static int        if_delroute_matcher(struct rtentry *, void *);

static bool if_is_unit(const char *);
static struct if_clone *if_clone_lookup(const char *, int *);

static LIST_HEAD(, if_clone) if_cloners = LIST_HEAD_INITIALIZER(if_cloners);
static int if_cloners_count;

/* Packet filtering hook for interfaces. */
pfil_head_t *                        if_pfil __read_mostly;

static kauth_listener_t if_listener;

static int doifioctl(struct socket *, u_long, void *, struct lwp *);
static void if_detach_queues(struct ifnet *, struct ifqueue *);
static void sysctl_sndq_setup(struct sysctllog **, const char *,
    struct ifaltq *);
static void if_slowtimo_intr(void *);
static void if_slowtimo_work(struct work *, void *);
static int sysctl_if_watchdog(SYSCTLFN_PROTO);
static void sysctl_watchdog_setup(struct ifnet *);
static void if_attachdomain1(struct ifnet *);
static int ifconf(u_long, void *);
static int if_transmit(struct ifnet *, struct mbuf *);
static int if_clone_create(const char *);
static int if_clone_destroy(const char *);
static void if_link_state_change_work(struct work *, void *);
static void if_up_locked(struct ifnet *);
static void _if_down(struct ifnet *);
static void if_down_deactivated(struct ifnet *);

struct if_percpuq {
        struct ifnet        *ipq_ifp;
        void                *ipq_si;
        struct percpu        *ipq_ifqs;        /* struct ifqueue */
};

static struct mbuf *if_percpuq_dequeue(struct if_percpuq *);

static void if_percpuq_drops(void *, void *, struct cpu_info *);
static int sysctl_percpuq_drops_handler(SYSCTLFN_PROTO);
static void sysctl_percpuq_setup(struct sysctllog **, const char *,
    struct if_percpuq *);

struct if_deferred_start {
        struct ifnet        *ids_ifp;
        void                (*ids_if_start)(struct ifnet *);
        void                *ids_si;
};

static void if_deferred_start_softint(void *);
static void if_deferred_start_common(struct ifnet *);
static void if_deferred_start_destroy(struct ifnet *);

struct if_slowtimo_data {
        kmutex_t                isd_lock;
        struct callout                isd_ch;
        struct work                isd_work;
        struct ifnet                *isd_ifp;
        bool                        isd_queued;
        bool                        isd_dying;
        bool                        isd_trigger;
};

#if defined(INET) || defined(INET6)
static void sysctl_net_pktq_setup(struct sysctllog **, int);
#endif

/*
 * Hook for if_vlan - needed by if_agr
 */
struct if_vlan_vlan_input_hook_t if_vlan_vlan_input_hook;

static void if_sysctl_setup(struct sysctllog **);

static int
if_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        int result;
        enum kauth_network_req req;

        result = KAUTH_RESULT_DEFER;
        req = (enum kauth_network_req)(uintptr_t)arg1;

        if (action != KAUTH_NETWORK_INTERFACE)
                return result;

        if ((req == KAUTH_REQ_NETWORK_INTERFACE_GET) ||
            (req == KAUTH_REQ_NETWORK_INTERFACE_SET))
                result = KAUTH_RESULT_ALLOW;

        return result;
}

/*
 * Network interface utility routines.
 *
 * Routines with ifa_ifwith* names take sockaddr *'s as
 * parameters.
 */
void
ifinit(void)
{

#if (defined(INET) || defined(INET6))
        encapinit();
#endif

        if_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
            if_listener_cb, NULL);

        /* interfaces are available, inform socket code */
        ifioctl = doifioctl;
}

/*
 * XXX Initialization before configure().
 * XXX hack to get pfil_add_hook working in autoconf.
 */
void
ifinit1(void)
{
        int error __diagused;

#ifdef NET_MPSAFE
        printf("NET_MPSAFE enabled\n");
#endif

        mutex_init(&if_clone_mtx, MUTEX_DEFAULT, IPL_NONE);

        TAILQ_INIT(&ifnet_list);
        mutex_init(&ifnet_mtx, MUTEX_DEFAULT, IPL_NONE);
        ifnet_psz = pserialize_create();
        ifnet_psref_class = psref_class_create("ifnet", IPL_SOFTNET);
        ifa_psref_class = psref_class_create("ifa", IPL_SOFTNET);
        error = workqueue_create(&ifnet_link_state_wq, "iflnkst",
            if_link_state_change_work, NULL, PRI_SOFTNET, IPL_NET,
            WQ_MPSAFE);
        KASSERT(error == 0);
        PSLIST_INIT(&ifnet_pslist);

        error = workqueue_create(&if_slowtimo_wq, "ifwdog",
            if_slowtimo_work, NULL, PRI_SOFTNET, IPL_SOFTCLOCK, WQ_MPSAFE);
        KASSERTMSG(error == 0, "error=%d", error);

        if_indexlim = 8;

        if_pfil = pfil_head_create(PFIL_TYPE_IFNET, NULL);
        KASSERT(if_pfil != NULL);

#if NETHER > 0 || defined(NETATALK) || defined(WLAN)
        etherinit();
#endif
}

/* XXX must be after domaininit() */
void
ifinit_post(void)
{

        if_sysctl_setup(NULL);
}

ifnet_t *
if_alloc(u_char type)
{
        return kmem_zalloc(sizeof(ifnet_t), KM_SLEEP);
}

void
if_free(ifnet_t *ifp)
{
        kmem_free(ifp, sizeof(ifnet_t));
}

void
if_initname(struct ifnet *ifp, const char *name, int unit)
{
        (void)snprintf(ifp->if_xname, sizeof(ifp->if_xname),
            "%s%d", name, unit);
}

/*
 * Null routines used while an interface is going away.  These routines
 * just return an error.
 */

int
if_nulloutput(struct ifnet *ifp, struct mbuf *m,
    const struct sockaddr *so, const struct rtentry *rt)
{

        return ENXIO;
}

void
if_nullinput(struct ifnet *ifp, struct mbuf *m)
{

        /* Nothing. */
}

void
if_nullstart(struct ifnet *ifp)
{

        /* Nothing. */
}

int
if_nulltransmit(struct ifnet *ifp, struct mbuf *m)
{

        m_freem(m);
        return ENXIO;
}

int
if_nullioctl(struct ifnet *ifp, u_long cmd, void *data)
{

        return ENXIO;
}

int
if_nullinit(struct ifnet *ifp)
{

        return ENXIO;
}

void
if_nullstop(struct ifnet *ifp, int disable)
{

        /* Nothing. */
}

void
if_nullslowtimo(struct ifnet *ifp)
{

        /* Nothing. */
}

void
if_nulldrain(struct ifnet *ifp)
{

        /* Nothing. */
}

void
if_set_sadl(struct ifnet *ifp, const void *lla, u_char addrlen, bool factory)
{
        struct ifaddr *ifa;
        struct sockaddr_dl *sdl;

        ifp->if_addrlen = addrlen;
        if_alloc_sadl(ifp);
        ifa = ifp->if_dl;
        sdl = satosdl(ifa->ifa_addr);

        (void)sockaddr_dl_setaddr(sdl, sdl->sdl_len, lla, ifp->if_addrlen);
        if (factory) {
                KASSERT(ifp->if_hwdl == NULL);
                ifp->if_hwdl = ifp->if_dl;
                ifaref(ifp->if_hwdl);
        }
        /* TBD routing socket */
}

struct ifaddr *
if_dl_create(const struct ifnet *ifp, const struct sockaddr_dl **sdlp)
{
        unsigned socksize, ifasize;
        int addrlen, namelen;
        struct sockaddr_dl *mask, *sdl;
        struct ifaddr *ifa;

        namelen = strlen(ifp->if_xname);
        addrlen = ifp->if_addrlen;
        socksize = roundup(sockaddr_dl_measure(namelen, addrlen), sizeof(long));
        ifasize = sizeof(*ifa) + 2 * socksize;
        ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);

        sdl = (struct sockaddr_dl *)(ifa + 1);
        mask = (struct sockaddr_dl *)(socksize + (char *)sdl);

        sockaddr_dl_init(sdl, socksize, ifp->if_index, ifp->if_type,
            ifp->if_xname, namelen, NULL, addrlen);
        mask->sdl_family = AF_LINK;
        mask->sdl_len = sockaddr_dl_measure(namelen, 0);
        memset(&mask->sdl_data[0], 0xff, namelen);
        ifa->ifa_rtrequest = link_rtrequest;
        ifa->ifa_addr = (struct sockaddr *)sdl;
        ifa->ifa_netmask = (struct sockaddr *)mask;
        ifa_psref_init(ifa);

        *sdlp = sdl;

        return ifa;
}

static void
if_sadl_setrefs(struct ifnet *ifp, struct ifaddr *ifa)
{
        const struct sockaddr_dl *sdl;

        ifp->if_dl = ifa;
        ifaref(ifa);
        sdl = satosdl(ifa->ifa_addr);
        ifp->if_sadl = sdl;
}

/*
 * Allocate the link level name for the specified interface.  This
 * is an attachment helper.  It must be called after ifp->if_addrlen
 * is initialized, which may not be the case when if_attach() is
 * called.
 */
void
if_alloc_sadl(struct ifnet *ifp)
{
        struct ifaddr *ifa;
        const struct sockaddr_dl *sdl;

        /*
         * If the interface already has a link name, release it
         * now.  This is useful for interfaces that can change
         * link types, and thus switch link names often.
         */
        if (ifp->if_sadl != NULL)
                if_free_sadl(ifp, 0);

        ifa = if_dl_create(ifp, &sdl);

        ifa_insert(ifp, ifa);
        if_sadl_setrefs(ifp, ifa);
}

static void
if_deactivate_sadl(struct ifnet *ifp)
{
        struct ifaddr *ifa;

        KASSERT(ifp->if_dl != NULL);

        ifa = ifp->if_dl;

        ifp->if_sadl = NULL;

        ifp->if_dl = NULL;
        ifafree(ifa);
}

static void
if_replace_sadl(struct ifnet *ifp, struct ifaddr *ifa)
{
        struct ifaddr *old;

        KASSERT(ifp->if_dl != NULL);

        old = ifp->if_dl;

        ifaref(ifa);
        /* XXX Update if_dl and if_sadl atomically */
        ifp->if_dl = ifa;
        ifp->if_sadl = satosdl(ifa->ifa_addr);

        ifafree(old);
}

void
if_activate_sadl(struct ifnet *ifp, struct ifaddr *ifa0,
    const struct sockaddr_dl *sdl)
{
        struct ifaddr *ifa;
        const int bound = curlwp_bind();

        KASSERT(ifa_held(ifa0));

        const int s = splsoftnet();

        if_replace_sadl(ifp, ifa0);

        int ss = pserialize_read_enter();
        IFADDR_READER_FOREACH(ifa, ifp) {
                struct psref psref;
                ifa_acquire(ifa, &psref);
                pserialize_read_exit(ss);

                rtinit(ifa, RTM_LLINFO_UPD, 0);

                ss = pserialize_read_enter();
                ifa_release(ifa, &psref);
        }
        pserialize_read_exit(ss);

        splx(s);
        curlwp_bindx(bound);
}

/*
 * Free the link level name for the specified interface.  This is
 * a detach helper.  This is called from if_detach().
 */
void
if_free_sadl(struct ifnet *ifp, int factory)
{
        struct ifaddr *ifa;

        if (factory && ifp->if_hwdl != NULL) {
                ifa = ifp->if_hwdl;
                ifp->if_hwdl = NULL;
                ifafree(ifa);
        }

        ifa = ifp->if_dl;
        if (ifa == NULL) {
                KASSERT(ifp->if_sadl == NULL);
                return;
        }

        KASSERT(ifp->if_sadl != NULL);

        const int s = splsoftnet();
        KASSERT(ifa->ifa_addr->sa_family == AF_LINK);
        ifa_remove(ifp, ifa);
        if_deactivate_sadl(ifp);
        splx(s);
}

static void
if_getindex(ifnet_t *ifp)
{
        bool hitlimit = false;
        char xnamebuf[HOOKNAMSIZ];

        ifp->if_index_gen = index_gen++;
        snprintf(xnamebuf, sizeof(xnamebuf),
            "%s-lshk", ifp->if_xname);
        ifp->if_linkstate_hooks = simplehook_create(IPL_NET,
            xnamebuf);

        ifp->if_index = if_index;
        if (ifindex2ifnet == NULL) {
                if_index++;
                goto skip;
        }
        while (if_byindex(ifp->if_index)) {
                /*
                 * If we hit USHRT_MAX, we skip back to 0 since
                 * there are a number of places where the value
                 * of if_index or if_index itself is compared
                 * to or stored in an unsigned short.  By
                 * jumping back, we won't botch those assignments
                 * or comparisons.
                 */
                if (++if_index == 0) {
                        if_index = 1;
                } else if (if_index == USHRT_MAX) {
                        /*
                         * However, if we have to jump back to
                         * zero *twice* without finding an empty
                         * slot in ifindex2ifnet[], then there
                         * there are too many (>65535) interfaces.
                         */
                        if (hitlimit) {
                                panic("too many interfaces");
                        }
                        hitlimit = true;
                        if_index = 1;
                }
                ifp->if_index = if_index;
        }
skip:
        /*
         * ifindex2ifnet is indexed by if_index. Since if_index will
         * grow dynamically, it should grow too.
         */
        if (ifindex2ifnet == NULL || ifp->if_index >= if_indexlim) {
                size_t m, n, oldlim;
                void *q;

                oldlim = if_indexlim;
                while (ifp->if_index >= if_indexlim)
                        if_indexlim <<= 1;

                /* grow ifindex2ifnet */
                m = oldlim * sizeof(struct ifnet *);
                n = if_indexlim * sizeof(struct ifnet *);
                q = malloc(n, M_IFADDR, M_WAITOK | M_ZERO);
                if (ifindex2ifnet != NULL) {
                        memcpy(q, ifindex2ifnet, m);
                        free(ifindex2ifnet, M_IFADDR);
                }
                ifindex2ifnet = (struct ifnet **)q;
        }
        ifindex2ifnet[ifp->if_index] = ifp;
}

/*
 * Initialize an interface and assign an index for it.
 *
 * It must be called prior to a device specific attach routine
 * (e.g., ether_ifattach and ieee80211_ifattach) or if_alloc_sadl,
 * and be followed by if_register:
 *
 *     if_initialize(ifp);
 *     ether_ifattach(ifp, enaddr);
 *     if_register(ifp);
 */
void
if_initialize(ifnet_t *ifp)
{

        KASSERT(if_indexlim > 0);
        TAILQ_INIT(&ifp->if_addrlist);

        /*
         * Link level name is allocated later by a separate call to
         * if_alloc_sadl().
         */

        if (ifp->if_snd.ifq_maxlen == 0)
                ifp->if_snd.ifq_maxlen = ifqmaxlen;

        ifp->if_broadcastaddr = 0; /* reliably crash if used uninitialized */

        ifp->if_link_state = LINK_STATE_UNKNOWN;
        ifp->if_link_queue = -1; /* all bits set, see link_state_change() */
        ifp->if_link_scheduled = false;

        ifp->if_capenable = 0;
        ifp->if_csum_flags_tx = 0;
        ifp->if_csum_flags_rx = 0;

#ifdef ALTQ
        ifp->if_snd.altq_type = 0;
        ifp->if_snd.altq_disc = NULL;
        ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
        ifp->if_snd.altq_tbr  = NULL;
        ifp->if_snd.altq_ifp  = ifp;
#endif

        IFQ_LOCK_INIT(&ifp->if_snd);

        ifp->if_pfil = pfil_head_create(PFIL_TYPE_IFNET, ifp);
        pfil_run_ifhooks(if_pfil, PFIL_IFNET_ATTACH, ifp);

        IF_AFDATA_LOCK_INIT(ifp);

        PSLIST_ENTRY_INIT(ifp, if_pslist_entry);
        PSLIST_INIT(&ifp->if_addr_pslist);
        psref_target_init(&ifp->if_psref, ifnet_psref_class);
        ifp->if_ioctl_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
        LIST_INIT(&ifp->if_multiaddrs);
        if_stats_init(ifp);

        IFNET_GLOBAL_LOCK();
        if_getindex(ifp);
        IFNET_GLOBAL_UNLOCK();
}

/*
 * Register an interface to the list of "active" interfaces.
 */
void
if_register(ifnet_t *ifp)
{
        /*
         * If the driver has not supplied its own if_ioctl or if_stop,
         * then supply the default.
         */
        if (ifp->if_ioctl == NULL)
                ifp->if_ioctl = ifioctl_common;
        if (ifp->if_stop == NULL)
                ifp->if_stop = if_nullstop;

        sysctl_sndq_setup(&ifp->if_sysctl_log, ifp->if_xname, &ifp->if_snd);

        if (!STAILQ_EMPTY(&domains))
                if_attachdomain1(ifp);

        /* Announce the interface. */
        rt_ifannouncemsg(ifp, IFAN_ARRIVAL);

        if (ifp->if_slowtimo != NULL) {
                struct if_slowtimo_data *isd;

                isd = kmem_zalloc(sizeof(*isd), KM_SLEEP);
                mutex_init(&isd->isd_lock, MUTEX_DEFAULT, IPL_SOFTCLOCK);
                callout_init(&isd->isd_ch, CALLOUT_MPSAFE);
                callout_setfunc(&isd->isd_ch, if_slowtimo_intr, ifp);
                isd->isd_ifp = ifp;

                ifp->if_slowtimo_data = isd;

                if_slowtimo_intr(ifp);

                sysctl_watchdog_setup(ifp);
        }

        if (ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit)
                ifp->if_transmit = if_transmit;

        IFNET_GLOBAL_LOCK();
        TAILQ_INSERT_TAIL(&ifnet_list, ifp, if_list);
        IFNET_WRITER_INSERT_TAIL(ifp);
        IFNET_GLOBAL_UNLOCK();
}

/*
 * The if_percpuq framework
 *
 * It allows network device drivers to execute the network stack
 * in softint (so called softint-based if_input). It utilizes
 * softint and percpu ifqueue. It doesn't distribute any packets
 * between CPUs, unlike pktqueue(9).
 *
 * Currently we support two options for device drivers to apply the framework:
 * - Use it implicitly with less changes
 *   - If you use if_attach in driver's _attach function and if_input in
 *     driver's Rx interrupt handler, a packet is queued and a softint handles
 *     the packet implicitly
 * - Use it explicitly in each driver (recommended)
 *   - You can use if_percpuq_* directly in your driver
 *   - In this case, you need to allocate struct if_percpuq in driver's softc
 *   - See wm(4) as a reference implementation
 */

static void
if_percpuq_softint(void *arg)
{
        struct if_percpuq *ipq = arg;
        struct ifnet *ifp = ipq->ipq_ifp;
        struct mbuf *m;

        while ((m = if_percpuq_dequeue(ipq)) != NULL) {
                if_statinc(ifp, if_ipackets);
                bpf_mtap(ifp, m, BPF_D_IN);

                ifp->_if_input(ifp, m);
        }
}

static void
if_percpuq_init_ifq(void *p, void *arg __unused, struct cpu_info *ci __unused)
{
        struct ifqueue *const ifq = p;

        memset(ifq, 0, sizeof(*ifq));
        ifq->ifq_maxlen = IFQ_MAXLEN;
}

struct if_percpuq *
if_percpuq_create(struct ifnet *ifp)
{
        struct if_percpuq *ipq;
        u_int flags = SOFTINT_NET;

        flags |= if_is_mpsafe(ifp) ? SOFTINT_MPSAFE : 0;

        ipq = kmem_zalloc(sizeof(*ipq), KM_SLEEP);
        ipq->ipq_ifp = ifp;
        ipq->ipq_si = softint_establish(flags, if_percpuq_softint, ipq);
        ipq->ipq_ifqs = percpu_alloc(sizeof(struct ifqueue));
        percpu_foreach(ipq->ipq_ifqs, &if_percpuq_init_ifq, NULL);

        sysctl_percpuq_setup(&ifp->if_sysctl_log, ifp->if_xname, ipq);

        return ipq;
}

static struct mbuf *
if_percpuq_dequeue(struct if_percpuq *ipq)
{
        struct mbuf *m;
        struct ifqueue *ifq;

        const int s = splnet();
        ifq = percpu_getref(ipq->ipq_ifqs);
        IF_DEQUEUE(ifq, m);
        percpu_putref(ipq->ipq_ifqs);
        splx(s);

        return m;
}

static void
if_percpuq_purge_ifq(void *p, void *arg __unused, struct cpu_info *ci __unused)
{
        struct ifqueue *const ifq = p;

        IF_PURGE(ifq);
}

void
if_percpuq_destroy(struct if_percpuq *ipq)
{

        /* if_detach may already destroy it */
        if (ipq == NULL)
                return;

        softint_disestablish(ipq->ipq_si);
        percpu_foreach(ipq->ipq_ifqs, &if_percpuq_purge_ifq, NULL);
        percpu_free(ipq->ipq_ifqs, sizeof(struct ifqueue));
        kmem_free(ipq, sizeof(*ipq));
}

void
if_percpuq_enqueue(struct if_percpuq *ipq, struct mbuf *m)
{
        struct ifqueue *ifq;

        KASSERT(ipq != NULL);

        const int s = splnet();
        ifq = percpu_getref(ipq->ipq_ifqs);
        if (IF_QFULL(ifq)) {
                IF_DROP(ifq);
                percpu_putref(ipq->ipq_ifqs);
                m_freem(m);
                goto out;
        }
        IF_ENQUEUE(ifq, m);
        percpu_putref(ipq->ipq_ifqs);

        softint_schedule(ipq->ipq_si);
out:
        splx(s);
}

static void
if_percpuq_drops(void *p, void *arg, struct cpu_info *ci __unused)
{
        struct ifqueue *const ifq = p;
        int *sum = arg;

        *sum += ifq->ifq_drops;
}

static int
sysctl_percpuq_drops_handler(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        struct if_percpuq *ipq;
        int sum = 0;
        int error;

        node = *rnode;
        ipq = node.sysctl_data;

        percpu_foreach(ipq->ipq_ifqs, if_percpuq_drops, &sum);

        node.sysctl_data = &sum;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error != 0 || newp == NULL)
                return error;

        return 0;
}

static void
sysctl_percpuq_setup(struct sysctllog **clog, const char* ifname,
    struct if_percpuq *ipq)
{
        const struct sysctlnode *cnode, *rnode;

        if (sysctl_createv(clog, 0, NULL, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "interfaces",
                       SYSCTL_DESCR("Per-interface controls"),
                       NULL, 0, NULL, 0,
                       CTL_NET, CTL_CREATE, CTL_EOL) != 0)
                goto bad;

        if (sysctl_createv(clog, 0, &rnode, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, ifname,
                       SYSCTL_DESCR("Interface controls"),
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL) != 0)
                goto bad;

        if (sysctl_createv(clog, 0, &rnode, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "rcvq",
                       SYSCTL_DESCR("Interface input queue controls"),
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL) != 0)
                goto bad;

#ifdef NOTYET
        /* XXX Should show each per-CPU queue length? */
        if (sysctl_createv(clog, 0, &rnode, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "len",
                       SYSCTL_DESCR("Current input queue length"),
                       sysctl_percpuq_len, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL) != 0)
                goto bad;

        if (sysctl_createv(clog, 0, &rnode, &cnode,
                       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxlen",
                       SYSCTL_DESCR("Maximum allowed input queue length"),
                       sysctl_percpuq_maxlen_handler, 0, (void *)ipq, 0,
                       CTL_CREATE, CTL_EOL) != 0)
                goto bad;
#endif

        if (sysctl_createv(clog, 0, &rnode, &cnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "drops",
                       SYSCTL_DESCR("Total packets dropped due to full input queue"),
                       sysctl_percpuq_drops_handler, 0, (void *)ipq, 0,
                       CTL_CREATE, CTL_EOL) != 0)
                goto bad;

        return;
bad:
        printf("%s: could not attach sysctl nodes\n", ifname);
        return;
}

/*
 * The deferred if_start framework
 *
 * The common APIs to defer if_start to softint when if_start is requested
 * from a device driver running in hardware interrupt context.
 */
/*
 * Call ifp->if_start (or equivalent) in a dedicated softint for
 * deferred if_start.
 */
static void
if_deferred_start_softint(void *arg)
{
        struct if_deferred_start *ids = arg;
        struct ifnet *ifp = ids->ids_ifp;

        ids->ids_if_start(ifp);
}

/*
 * The default callback function for deferred if_start.
 */
static void
if_deferred_start_common(struct ifnet *ifp)
{
        const int s = splnet();
        if_start_lock(ifp);
        splx(s);
}

static inline bool
if_snd_is_used(struct ifnet *ifp)
{

        return ALTQ_IS_ENABLED(&ifp->if_snd) ||
                ifp->if_transmit == if_transmit ||
                ifp->if_transmit == NULL || ifp->if_transmit == if_nulltransmit;
}

/*
 * Schedule deferred if_start.
 */
void
if_schedule_deferred_start(struct ifnet *ifp)
{

        KASSERT(ifp->if_deferred_start != NULL);

        if (if_snd_is_used(ifp) && IFQ_IS_EMPTY(&ifp->if_snd))
                return;

        softint_schedule(ifp->if_deferred_start->ids_si);
}

/*
 * Create an instance of deferred if_start. A driver should call the function
 * only if the driver needs deferred if_start. Drivers can setup their own
 * deferred if_start function via 2nd argument.
 */
void
if_deferred_start_init(struct ifnet *ifp, void (*func)(struct ifnet *))
{
        struct if_deferred_start *ids;
        u_int flags = SOFTINT_NET;

        flags |= if_is_mpsafe(ifp) ? SOFTINT_MPSAFE : 0;

        ids = kmem_zalloc(sizeof(*ids), KM_SLEEP);
        ids->ids_ifp = ifp;
        ids->ids_si = softint_establish(flags, if_deferred_start_softint, ids);
        if (func != NULL)
                ids->ids_if_start = func;
        else
                ids->ids_if_start = if_deferred_start_common;

        ifp->if_deferred_start = ids;
}

static void
if_deferred_start_destroy(struct ifnet *ifp)
{

        if (ifp->if_deferred_start == NULL)
                return;

        softint_disestablish(ifp->if_deferred_start->ids_si);
        kmem_free(ifp->if_deferred_start, sizeof(*ifp->if_deferred_start));
        ifp->if_deferred_start = NULL;
}

/*
 * The common interface input routine that is called by device drivers,
 * which should be used only when the driver's rx handler already runs
 * in softint.
 */
void
if_input(struct ifnet *ifp, struct mbuf *m)
{

        KASSERT(ifp->if_percpuq == NULL);
        KASSERT(!cpu_intr_p());

        if_statinc(ifp, if_ipackets);
        bpf_mtap(ifp, m, BPF_D_IN);

        ifp->_if_input(ifp, m);
}

/*
 * DEPRECATED. Use if_initialize and if_register instead.
 * See the above comment of if_initialize.
 *
 * Note that it implicitly enables if_percpuq to make drivers easy to
 * migrate softint-based if_input without much changes. If you don't
 * want to enable it, use if_initialize instead.
 */
void
if_attach(ifnet_t *ifp)
{

        if_initialize(ifp);
        ifp->if_percpuq = if_percpuq_create(ifp);
        if_register(ifp);
}

void
if_attachdomain(void)
{
        struct ifnet *ifp;
        const int bound = curlwp_bind();

        int s = pserialize_read_enter();
        IFNET_READER_FOREACH(ifp) {
                struct psref psref;
                psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class);
                pserialize_read_exit(s);
                if_attachdomain1(ifp);
                s = pserialize_read_enter();
                psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
        }
        pserialize_read_exit(s);
        curlwp_bindx(bound);
}

static void
if_attachdomain1(struct ifnet *ifp)
{
        struct domain *dp;
        const int s = splsoftnet();

        /* address family dependent data region */
        memset(ifp->if_afdata, 0, sizeof(ifp->if_afdata));
        DOMAIN_FOREACH(dp) {
                if (dp->dom_ifattach != NULL)
                        ifp->if_afdata[dp->dom_family] =
                            (*dp->dom_ifattach)(ifp);
        }

        splx(s);
}

/*
 * Deactivate an interface.  This points all of the procedure
 * handles at error stubs.  May be called from interrupt context.
 */
void
if_deactivate(struct ifnet *ifp)
{
        const int s = splsoftnet();

        ifp->if_output         = if_nulloutput;
        ifp->_if_input         = if_nullinput;
        ifp->if_start         = if_nullstart;
        ifp->if_transmit = if_nulltransmit;
        ifp->if_ioctl         = if_nullioctl;
        ifp->if_init         = if_nullinit;
        ifp->if_stop         = if_nullstop;
        if (ifp->if_slowtimo)
                ifp->if_slowtimo = if_nullslowtimo;
        ifp->if_drain         = if_nulldrain;

        /* No more packets may be enqueued. */
        ifp->if_snd.ifq_maxlen = 0;

        splx(s);
}

bool
if_is_deactivated(const struct ifnet *ifp)
{

        return ifp->if_output == if_nulloutput;
}

void
if_purgeaddrs(struct ifnet *ifp, int family, void (*purgeaddr)(struct ifaddr *))
{
        struct ifaddr *ifa, *nifa;
        int s;

        s = pserialize_read_enter();
        for (ifa = IFADDR_READER_FIRST(ifp); ifa; ifa = nifa) {
                nifa = IFADDR_READER_NEXT(ifa);
                if (ifa->ifa_addr->sa_family != family)
                        continue;
                pserialize_read_exit(s);

                (*purgeaddr)(ifa);

                s = pserialize_read_enter();
        }
        pserialize_read_exit(s);
}

#ifdef IFAREF_DEBUG
static struct ifaddr **ifa_list;
static int ifa_list_size;

/* Depends on only one if_attach runs at once */
static void
if_build_ifa_list(struct ifnet *ifp)
{
        struct ifaddr *ifa;
        int i;

        KASSERT(ifa_list == NULL);
        KASSERT(ifa_list_size == 0);

        IFADDR_READER_FOREACH(ifa, ifp)
                ifa_list_size++;

        ifa_list = kmem_alloc(sizeof(*ifa) * ifa_list_size, KM_SLEEP);
        i = 0;
        IFADDR_READER_FOREACH(ifa, ifp) {
                ifa_list[i++] = ifa;
                ifaref(ifa);
        }
}

static void
if_check_and_free_ifa_list(struct ifnet *ifp)
{
        int i;
        struct ifaddr *ifa;

        if (ifa_list == NULL)
                return;

        for (i = 0; i < ifa_list_size; i++) {
                char buf[64];

                ifa = ifa_list[i];
                sockaddr_format(ifa->ifa_addr, buf, sizeof(buf));
                if (ifa->ifa_refcnt > 1) {
                        log(LOG_WARNING,
                            "ifa(%s) still referenced (refcnt=%d)\n",
                            buf, ifa->ifa_refcnt - 1);
                } else
                        log(LOG_DEBUG,
                            "ifa(%s) not referenced (refcnt=%d)\n",
                            buf, ifa->ifa_refcnt - 1);
                ifafree(ifa);
        }

        kmem_free(ifa_list, sizeof(*ifa) * ifa_list_size);
        ifa_list = NULL;
        ifa_list_size = 0;
}
#endif

/*
 * Detach an interface from the list of "active" interfaces,
 * freeing any resources as we go along.
 *
 * NOTE: This routine must be called with a valid thread context,
 * as it may block.
 */
void
if_detach(struct ifnet *ifp)
{
        struct socket so;
        struct ifaddr *ifa;
#ifdef IFAREF_DEBUG
        struct ifaddr *last_ifa = NULL;
#endif
        struct domain *dp;
        const struct protosw *pr;
        int i, family, purged;

#ifdef IFAREF_DEBUG
        if_build_ifa_list(ifp);
#endif
        /*
         * XXX It's kind of lame that we have to have the
         * XXX socket structure...
         */
        memset(&so, 0, sizeof(so));

        const int s = splnet();

        sysctl_teardown(&ifp->if_sysctl_log);

        IFNET_LOCK(ifp);

        /*
         * Unset all queued link states and pretend a
         * link state change is scheduled.
         * This stops any more link state changes occurring for this
         * interface while it's being detached so it's safe
         * to drain the workqueue.
         */
        IF_LINK_STATE_CHANGE_LOCK(ifp);
        ifp->if_link_queue = -1; /* all bits set, see link_state_change() */
        ifp->if_link_scheduled = true;
        IF_LINK_STATE_CHANGE_UNLOCK(ifp);
        workqueue_wait(ifnet_link_state_wq, &ifp->if_link_work);

        if_deactivate(ifp);
        IFNET_UNLOCK(ifp);

        /*
         * Unlink from the list and wait for all readers to leave
         * from pserialize read sections.  Note that we can't do
         * psref_target_destroy here.  See below.
         */
        IFNET_GLOBAL_LOCK();
        ifindex2ifnet[ifp->if_index] = NULL;
        TAILQ_REMOVE(&ifnet_list, ifp, if_list);
        IFNET_WRITER_REMOVE(ifp);
        pserialize_perform(ifnet_psz);
        IFNET_GLOBAL_UNLOCK();

        if (ifp->if_slowtimo != NULL) {
                struct if_slowtimo_data *isd = ifp->if_slowtimo_data;

                mutex_enter(&isd->isd_lock);
                isd->isd_dying = true;
                mutex_exit(&isd->isd_lock);
                callout_halt(&isd->isd_ch, NULL);
                workqueue_wait(if_slowtimo_wq, &isd->isd_work);
                callout_destroy(&isd->isd_ch);
                mutex_destroy(&isd->isd_lock);
                kmem_free(isd, sizeof(*isd));

                ifp->if_slowtimo_data = NULL; /* paraonia */
                ifp->if_slowtimo = NULL;      /* paranoia */
        }
        if_deferred_start_destroy(ifp);

        /*
         * Do an if_down() to give protocols a chance to do something.
         */
        if_down_deactivated(ifp);

#ifdef ALTQ
        if (ALTQ_IS_ENABLED(&ifp->if_snd))
                altq_disable(&ifp->if_snd);
        if (ALTQ_IS_ATTACHED(&ifp->if_snd))
                altq_detach(&ifp->if_snd);
#endif

#if NCARP > 0
        /* Remove the interface from any carp group it is a part of.  */
        if (ifp->if_carp != NULL && ifp->if_type != IFT_CARP)
                carp_ifdetach(ifp);
#endif

        /*
         * Rip all the addresses off the interface.  This should make
         * all of the routes go away.
         *
         * pr_usrreq calls can remove an arbitrary number of ifaddrs
         * from the list, including our "cursor", ifa.  For safety,
         * and to honor the TAILQ abstraction, I just restart the
         * loop after each removal.  Note that the loop will exit
         * when all of the remaining ifaddrs belong to the AF_LINK
         * family.  I am counting on the historical fact that at
         * least one pr_usrreq in each address domain removes at
         * least one ifaddr.
         */
again:
        /*
         * At this point, no other one tries to remove ifa in the list,
         * so we don't need to take a lock or psref.  Avoid using
         * IFADDR_READER_FOREACH to pass over an inspection of contract
         * violations of pserialize.
         */
        IFADDR_WRITER_FOREACH(ifa, ifp) {
                family = ifa->ifa_addr->sa_family;
#ifdef IFAREF_DEBUG
                printf("if_detach: ifaddr %p, family %d, refcnt %d\n",
                    ifa, family, ifa->ifa_refcnt);
                if (last_ifa != NULL && ifa == last_ifa)
                        panic("if_detach: loop detected");
                last_ifa = ifa;
#endif
                if (family == AF_LINK)
                        continue;
                dp = pffinddomain(family);
                KASSERTMSG(dp != NULL, "no domain for AF %d", family);
                /*
                 * XXX These PURGEIF calls are redundant with the
                 * purge-all-families calls below, but are left in for
                 * now both to make a smaller change, and to avoid
                 * unplanned interactions with clearing of
                 * ifp->if_addrlist.
                 */
                purged = 0;
                for (pr = dp->dom_protosw;
                     pr < dp->dom_protoswNPROTOSW; pr++) {
                        so.so_proto = pr;
                        if (pr->pr_usrreqs) {
                                (void) (*pr->pr_usrreqs->pr_purgeif)(&so, ifp);
                                purged = 1;
                        }
                }
                if (purged == 0) {
                        /*
                         * XXX What's really the best thing to do
                         * XXX here?  --thorpej@NetBSD.org
                         */
                        printf("if_detach: WARNING: AF %d not purged\n",
                            family);
                        ifa_remove(ifp, ifa);
                }
                goto again;
        }

        if_free_sadl(ifp, 1);

restart:
        IFADDR_WRITER_FOREACH(ifa, ifp) {
                family = ifa->ifa_addr->sa_family;
                KASSERT(family == AF_LINK);
                ifa_remove(ifp, ifa);
                goto restart;
        }

        /* Delete stray routes from the routing table. */
        for (i = 0; i <= AF_MAX; i++)
                rt_delete_matched_entries(i, if_delroute_matcher, ifp);

        DOMAIN_FOREACH(dp) {
                if (dp->dom_ifdetach != NULL && ifp->if_afdata[dp->dom_family])
                {
                        void *p = ifp->if_afdata[dp->dom_family];
                        if (p) {
                                ifp->if_afdata[dp->dom_family] = NULL;
                                (*dp->dom_ifdetach)(ifp, p);
                        }
                }

                /*
                 * One would expect multicast memberships (INET and
                 * INET6) on UDP sockets to be purged by the PURGEIF
                 * calls above, but if all addresses were removed from
                 * the interface prior to destruction, the calls will
                 * not be made (e.g. ppp, for which pppd(8) generally
                 * removes addresses before destroying the interface).
                 * Because there is no invariant that multicast
                 * memberships only exist for interfaces with IPv4
                 * addresses, we must call PURGEIF regardless of
                 * addresses.  (Protocols which might store ifnet
                 * pointers are marked with PR_PURGEIF.)
                 */
                for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
                        so.so_proto = pr;
                        if (pr->pr_usrreqs && pr->pr_flags & PR_PURGEIF)
                                (void)(*pr->pr_usrreqs->pr_purgeif)(&so, ifp);
                }
        }

        /*
         * Must be done after the above pr_purgeif because if_psref may be
         * still used in pr_purgeif.
         */
        psref_target_destroy(&ifp->if_psref, ifnet_psref_class);
        PSLIST_ENTRY_DESTROY(ifp, if_pslist_entry);

        pfil_run_ifhooks(if_pfil, PFIL_IFNET_DETACH, ifp);
        (void)pfil_head_destroy(ifp->if_pfil);

        /* Announce that the interface is gone. */
        rt_ifannouncemsg(ifp, IFAN_DEPARTURE);

        IF_AFDATA_LOCK_DESTROY(ifp);

        /*
         * remove packets that came from ifp, from software interrupt queues.
         */
        DOMAIN_FOREACH(dp) {
                for (i = 0; i < __arraycount(dp->dom_ifqueues); i++) {
                        struct ifqueue *iq = dp->dom_ifqueues[i];
                        if (iq == NULL)
                                break;
                        dp->dom_ifqueues[i] = NULL;
                        if_detach_queues(ifp, iq);
                }
        }

        /*
         * IP queues have to be processed separately: net-queue barrier
         * ensures that the packets are dequeued while a cross-call will
         * ensure that the interrupts have completed. FIXME: not quite..
         */
#ifdef INET
        pktq_barrier(ip_pktq);
#endif
#ifdef INET6
        if (in6_present)
                pktq_barrier(ip6_pktq);
#endif
        xc_barrier(0);

        if (ifp->if_percpuq != NULL) {
                if_percpuq_destroy(ifp->if_percpuq);
                ifp->if_percpuq = NULL;
        }

        mutex_obj_free(ifp->if_ioctl_lock);
        ifp->if_ioctl_lock = NULL;
        mutex_obj_free(ifp->if_snd.ifq_lock);
        if_stats_fini(ifp);
        KASSERT(!simplehook_has_hooks(ifp->if_linkstate_hooks));
        simplehook_destroy(ifp->if_linkstate_hooks);

        splx(s);

#ifdef IFAREF_DEBUG
        if_check_and_free_ifa_list(ifp);
#endif
}

static void
if_detach_queues(struct ifnet *ifp, struct ifqueue *q)
{
        struct mbuf *m, *prev, *next;

        prev = NULL;
        for (m = q->ifq_head; m != NULL; m = next) {
                KASSERT((m->m_flags & M_PKTHDR) != 0);

                next = m->m_nextpkt;
                if (m->m_pkthdr.rcvif_index != ifp->if_index) {
                        prev = m;
                        continue;
                }

                if (prev != NULL)
                        prev->m_nextpkt = m->m_nextpkt;
                else
                        q->ifq_head = m->m_nextpkt;
                if (q->ifq_tail == m)
                        q->ifq_tail = prev;
                q->ifq_len--;

                m->m_nextpkt = NULL;
                m_freem(m);
                IF_DROP(q);
        }
}

/*
 * Callback for a radix tree walk to delete all references to an
 * ifnet.
 */
static int
if_delroute_matcher(struct rtentry *rt, void *v)
{
        struct ifnet *ifp = (struct ifnet *)v;

        if (rt->rt_ifp == ifp)
                return 1;
        else
                return 0;
}

/*
 * Create a clone network interface.
 */
static int
if_clone_create(const char *name)
{
        struct if_clone *ifc;
        int unit;
        struct ifnet *ifp;
        struct psref psref;

        KASSERT(mutex_owned(&if_clone_mtx));

        ifc = if_clone_lookup(name, &unit);
        if (ifc == NULL)
                return EINVAL;

        ifp = if_get(name, &psref);
        if (ifp != NULL) {
                if_put(ifp, &psref);
                return EEXIST;
        }

        return (*ifc->ifc_create)(ifc, unit);
}

/*
 * Destroy a clone network interface.
 */
static int
if_clone_destroy(const char *name)
{
        struct if_clone *ifc;
        struct ifnet *ifp;
        struct psref psref;
        int error;
        int (*if_ioctlfn)(struct ifnet *, u_long, void *);

        KASSERT(mutex_owned(&if_clone_mtx));

        ifc = if_clone_lookup(name, NULL);
        if (ifc == NULL)
                return EINVAL;

        if (ifc->ifc_destroy == NULL)
                return EOPNOTSUPP;

        ifp = if_get(name, &psref);
        if (ifp == NULL)
                return ENXIO;

        /* We have to disable ioctls here */
        IFNET_LOCK(ifp);
        if_ioctlfn = ifp->if_ioctl;
        ifp->if_ioctl = if_nullioctl;
        IFNET_UNLOCK(ifp);

        /*
         * We cannot call ifc_destroy with holding ifp.
         * Releasing ifp here is safe thanks to if_clone_mtx.
         */
        if_put(ifp, &psref);

        error = (*ifc->ifc_destroy)(ifp);

        if (error != 0) {
                /* We have to restore if_ioctl on error */
                IFNET_LOCK(ifp);
                ifp->if_ioctl = if_ioctlfn;
                IFNET_UNLOCK(ifp);
        }

        return error;
}

static bool
if_is_unit(const char *name)
{

        while (*name != '\0') {
                if (*name < '0' || *name > '9')
                        return false;
                name++;
        }

        return true;
}

/*
 * Look up a network interface cloner.
 */
static struct if_clone *
if_clone_lookup(const char *name, int *unitp)
{
        struct if_clone *ifc;
        const char *cp;
        char *dp, ifname[IFNAMSIZ + 3];
        int unit;

        KASSERT(mutex_owned(&if_clone_mtx));

        strcpy(ifname, "if_");
        /* separate interface name from unit */
        /* TODO: search unit number from backward */
        for (dp = ifname + 3, cp = name; cp - name < IFNAMSIZ &&
            *cp && !if_is_unit(cp);)
                *dp++ = *cp++;

        if (cp == name || cp - name == IFNAMSIZ || !*cp)
                return NULL;        /* No name or unit number */
        *dp++ = '\0';

again:
        LIST_FOREACH(ifc, &if_cloners, ifc_list) {
                if (strcmp(ifname + 3, ifc->ifc_name) == 0)
                        break;
        }

        if (ifc == NULL) {
                int error;
                if (*ifname == '\0')
                        return NULL;
                mutex_exit(&if_clone_mtx);
                error = module_autoload(ifname, MODULE_CLASS_DRIVER);
                mutex_enter(&if_clone_mtx);
                if (error)
                        return NULL;
                *ifname = '\0';
                goto again;
        }

        unit = 0;
        while (cp - name < IFNAMSIZ && *cp) {
                if (*cp < '0' || *cp > '9' || unit >= INT_MAX / 10) {
                        /* Bogus unit number. */
                        return NULL;
                }
                unit = (unit * 10) + (*cp++ - '0');
        }

        if (unitp != NULL)
                *unitp = unit;
        return ifc;
}

/*
 * Register a network interface cloner.
 */
void
if_clone_attach(struct if_clone *ifc)
{

        mutex_enter(&if_clone_mtx);
        LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list);
        if_cloners_count++;
        mutex_exit(&if_clone_mtx);
}

/*
 * Unregister a network interface cloner.
 */
void
if_clone_detach(struct if_clone *ifc)
{

        mutex_enter(&if_clone_mtx);
        LIST_REMOVE(ifc, ifc_list);
        if_cloners_count--;
        mutex_exit(&if_clone_mtx);
}

/*
 * Provide list of interface cloners to userspace.
 */
int
if_clone_list(int buf_count, char *buffer, int *total)
{
        char outbuf[IFNAMSIZ], *dst;
        struct if_clone *ifc;
        int count, error = 0;

        mutex_enter(&if_clone_mtx);
        *total = if_cloners_count;
        if ((dst = buffer) == NULL) {
                /* Just asking how many there are. */
                goto out;
        }

        if (buf_count < 0) {
                error = EINVAL;
                goto out;
        }

        count = (if_cloners_count < buf_count) ?
            if_cloners_count : buf_count;

        for (ifc = LIST_FIRST(&if_cloners); ifc != NULL && count != 0;
             ifc = LIST_NEXT(ifc, ifc_list), count--, dst += IFNAMSIZ) {
                (void)strncpy(outbuf, ifc->ifc_name, sizeof(outbuf));
                if (outbuf[sizeof(outbuf) - 1] != '\0') {
                        error = ENAMETOOLONG;
                        goto out;
                }
                error = copyout(outbuf, dst, sizeof(outbuf));
                if (error != 0)
                        break;
        }

out:
        mutex_exit(&if_clone_mtx);
        return error;
}

void
ifa_psref_init(struct ifaddr *ifa)
{

        psref_target_init(&ifa->ifa_psref, ifa_psref_class);
}

void
ifaref(struct ifaddr *ifa)
{

        atomic_inc_uint(&ifa->ifa_refcnt);
}

void
ifafree(struct ifaddr *ifa)
{
        KASSERT(ifa != NULL);
        KASSERTMSG(ifa->ifa_refcnt > 0, "ifa_refcnt=%d", ifa->ifa_refcnt);

#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_release();
#endif
        if (atomic_dec_uint_nv(&ifa->ifa_refcnt) != 0)
                return;
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_acquire();
#endif
        free(ifa, M_IFADDR);
}

bool
ifa_is_destroying(struct ifaddr *ifa)
{

        return ISSET(ifa->ifa_flags, IFA_DESTROYING);
}

void
ifa_insert(struct ifnet *ifp, struct ifaddr *ifa)
{

        ifa->ifa_ifp = ifp;

        /*
         * Check MP-safety for IFEF_MPSAFE drivers.
         * Check !IFF_RUNNING for initialization routines that normally don't
         * take IFNET_LOCK but it's safe because there is no competitor.
         * XXX there are false positive cases because IFF_RUNNING can be off on
         * if_stop.
         */
        KASSERT(!if_is_mpsafe(ifp) || !ISSET(ifp->if_flags, IFF_RUNNING) ||
            IFNET_LOCKED(ifp));

        TAILQ_INSERT_TAIL(&ifp->if_addrlist, ifa, ifa_list);
        IFADDR_ENTRY_INIT(ifa);
        IFADDR_WRITER_INSERT_TAIL(ifp, ifa);

        ifaref(ifa);
}

void
ifa_remove(struct ifnet *ifp, struct ifaddr *ifa)
{

        KASSERT(ifa->ifa_ifp == ifp);
        /*
         * Check MP-safety for IFEF_MPSAFE drivers.
         * if_is_deactivated indicates ifa_remove is called from if_detach
         * where it is safe even if IFNET_LOCK isn't held.
         */
        KASSERT(!if_is_mpsafe(ifp) || if_is_deactivated(ifp) || IFNET_LOCKED(ifp));

        TAILQ_REMOVE(&ifp->if_addrlist, ifa, ifa_list);
        IFADDR_WRITER_REMOVE(ifa);
#ifdef NET_MPSAFE
        IFNET_GLOBAL_LOCK();
        pserialize_perform(ifnet_psz);
        IFNET_GLOBAL_UNLOCK();
#endif

#ifdef NET_MPSAFE
        psref_target_destroy(&ifa->ifa_psref, ifa_psref_class);
#endif
        IFADDR_ENTRY_DESTROY(ifa);
        ifafree(ifa);
}

void
ifa_acquire(struct ifaddr *ifa, struct psref *psref)
{

        PSREF_DEBUG_FILL_RETURN_ADDRESS(psref);
        psref_acquire(psref, &ifa->ifa_psref, ifa_psref_class);
}

void
ifa_release(struct ifaddr *ifa, struct psref *psref)
{

        if (ifa == NULL)
                return;

        psref_release(psref, &ifa->ifa_psref, ifa_psref_class);
}

bool
ifa_held(struct ifaddr *ifa)
{

        return psref_held(&ifa->ifa_psref, ifa_psref_class);
}

static inline int
equal(const struct sockaddr *sa1, const struct sockaddr *sa2)
{
        return sockaddr_cmp(sa1, sa2) == 0;
}

/*
 * Locate an interface based on a complete address.
 */
/*ARGSUSED*/
struct ifaddr *
ifa_ifwithaddr(const struct sockaddr *addr)
{
        struct ifnet *ifp;
        struct ifaddr *ifa;

        IFNET_READER_FOREACH(ifp) {
                if (if_is_deactivated(ifp))
                        continue;
                IFADDR_READER_FOREACH(ifa, ifp) {
                        if (ifa->ifa_addr->sa_family != addr->sa_family)
                                continue;
                        if (equal(addr, ifa->ifa_addr))
                                return ifa;
                        if ((ifp->if_flags & IFF_BROADCAST) &&
                            ifa->ifa_broadaddr &&
                            /* IP6 doesn't have broadcast */
                            ifa->ifa_broadaddr->sa_len != 0 &&
                            equal(ifa->ifa_broadaddr, addr))
                                return ifa;
                }
        }
        return NULL;
}

struct ifaddr *
ifa_ifwithaddr_psref(const struct sockaddr *addr, struct psref *psref)
{
        struct ifaddr *ifa;
        int s = pserialize_read_enter();

        ifa = ifa_ifwithaddr(addr);
        if (ifa != NULL)
                ifa_acquire(ifa, psref);
        pserialize_read_exit(s);

        return ifa;
}

/*
 * Locate the point to point interface with a given destination address.
 */
/*ARGSUSED*/
struct ifaddr *
ifa_ifwithdstaddr(const struct sockaddr *addr)
{
        struct ifnet *ifp;
        struct ifaddr *ifa;

        IFNET_READER_FOREACH(ifp) {
                if (if_is_deactivated(ifp))
                        continue;
                if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
                        continue;
                IFADDR_READER_FOREACH(ifa, ifp) {
                        if (ifa->ifa_addr->sa_family != addr->sa_family ||
                            ifa->ifa_dstaddr == NULL)
                                continue;
                        if (equal(addr, ifa->ifa_dstaddr))
                                return ifa;
                }
        }

        return NULL;
}

struct ifaddr *
ifa_ifwithdstaddr_psref(const struct sockaddr *addr, struct psref *psref)
{
        struct ifaddr *ifa;
        int s;

        s = pserialize_read_enter();
        ifa = ifa_ifwithdstaddr(addr);
        if (ifa != NULL)
                ifa_acquire(ifa, psref);
        pserialize_read_exit(s);

        return ifa;
}

/*
 * Find an interface on a specific network.  If many, choice
 * is most specific found.
 */
struct ifaddr *
ifa_ifwithnet(const struct sockaddr *addr)
{
        struct ifnet *ifp;
        struct ifaddr *ifa, *ifa_maybe = NULL;
        const struct sockaddr_dl *sdl;
        u_int af = addr->sa_family;
        const char *addr_data = addr->sa_data, *cplim;

        if (af == AF_LINK) {
                sdl = satocsdl(addr);
                if (sdl->sdl_index && sdl->sdl_index < if_indexlim &&
                    ifindex2ifnet[sdl->sdl_index] &&
                    !if_is_deactivated(ifindex2ifnet[sdl->sdl_index])) {
                        return ifindex2ifnet[sdl->sdl_index]->if_dl;
                }
        }
#ifdef NETATALK
        if (af == AF_APPLETALK) {
                const struct sockaddr_at *sat, *sat2;
                sat = (const struct sockaddr_at *)addr;
                IFNET_READER_FOREACH(ifp) {
                        if (if_is_deactivated(ifp))
                                continue;
                        ifa = at_ifawithnet((const struct sockaddr_at *)addr, ifp);
                        if (ifa == NULL)
                                continue;
                        sat2 = (struct sockaddr_at *)ifa->ifa_addr;
                        if (sat2->sat_addr.s_net == sat->sat_addr.s_net)
                                return ifa; /* exact match */
                        if (ifa_maybe == NULL) {
                                /* else keep the if with the right range */
                                ifa_maybe = ifa;
                        }
                }
                return ifa_maybe;
        }
#endif
        IFNET_READER_FOREACH(ifp) {
                if (if_is_deactivated(ifp))
                        continue;
                IFADDR_READER_FOREACH(ifa, ifp) {
                        const char *cp, *cp2, *cp3;

                        if (ifa->ifa_addr->sa_family != af ||
                            ifa->ifa_netmask == NULL)
 next:                                continue;
                        cp = addr_data;
                        cp2 = ifa->ifa_addr->sa_data;
                        cp3 = ifa->ifa_netmask->sa_data;
                        cplim = (const char *)ifa->ifa_netmask +
                            ifa->ifa_netmask->sa_len;
                        while (cp3 < cplim) {
                                if ((*cp++ ^ *cp2++) & *cp3++) {
                                        /* want to continue for() loop */
                                        goto next;
                                }
                        }
                        if (ifa_maybe == NULL ||
                            rt_refines(ifa->ifa_netmask,
                                       ifa_maybe->ifa_netmask))
                                ifa_maybe = ifa;
                }
        }
        return ifa_maybe;
}

struct ifaddr *
ifa_ifwithnet_psref(const struct sockaddr *addr, struct psref *psref)
{
        struct ifaddr *ifa;
        int s;

        s = pserialize_read_enter();
        ifa = ifa_ifwithnet(addr);
        if (ifa != NULL)
                ifa_acquire(ifa, psref);
        pserialize_read_exit(s);

        return ifa;
}

/*
 * Find the interface of the address.
 */
struct ifaddr *
ifa_ifwithladdr(const struct sockaddr *addr)
{
        struct ifaddr *ia;

        if ((ia = ifa_ifwithaddr(addr)) || (ia = ifa_ifwithdstaddr(addr)) ||
            (ia = ifa_ifwithnet(addr)))
                return ia;
        return NULL;
}

struct ifaddr *
ifa_ifwithladdr_psref(const struct sockaddr *addr, struct psref *psref)
{
        struct ifaddr *ifa;
        int s;

        s = pserialize_read_enter();
        ifa = ifa_ifwithladdr(addr);
        if (ifa != NULL)
                ifa_acquire(ifa, psref);
        pserialize_read_exit(s);

        return ifa;
}

/*
 * Find an interface using a specific address family
 */
struct ifaddr *
ifa_ifwithaf(int af)
{
        struct ifnet *ifp;
        struct ifaddr *ifa = NULL;
        int s;

        s = pserialize_read_enter();
        IFNET_READER_FOREACH(ifp) {
                if (if_is_deactivated(ifp))
                        continue;
                IFADDR_READER_FOREACH(ifa, ifp) {
                        if (ifa->ifa_addr->sa_family == af)
                                goto out;
                }
        }
out:
        pserialize_read_exit(s);
        return ifa;
}

/*
 * Find an interface address specific to an interface best matching
 * a given address.
 */
struct ifaddr *
ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp)
{
        struct ifaddr *ifa;
        const char *cp, *cp2, *cp3;
        const char *cplim;
        struct ifaddr *ifa_maybe = 0;
        u_int af = addr->sa_family;

        if (if_is_deactivated(ifp))
                return NULL;

        if (af >= AF_MAX)
                return NULL;

        IFADDR_READER_FOREACH(ifa, ifp) {
                if (ifa->ifa_addr->sa_family != af)
                        continue;
                ifa_maybe = ifa;
                if (ifa->ifa_netmask == NULL) {
                        if (equal(addr, ifa->ifa_addr) ||
                            (ifa->ifa_dstaddr &&
                             equal(addr, ifa->ifa_dstaddr)))
                                return ifa;
                        continue;
                }
                cp = addr->sa_data;
                cp2 = ifa->ifa_addr->sa_data;
                cp3 = ifa->ifa_netmask->sa_data;
                cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
                for (; cp3 < cplim; cp3++) {
                        if ((*cp++ ^ *cp2++) & *cp3)
                                break;
                }
                if (cp3 == cplim)
                        return ifa;
        }
        return ifa_maybe;
}

struct ifaddr *
ifaof_ifpforaddr_psref(const struct sockaddr *addr, struct ifnet *ifp,
    struct psref *psref)
{
        struct ifaddr *ifa;
        int s;

        s = pserialize_read_enter();
        ifa = ifaof_ifpforaddr(addr, ifp);
        if (ifa != NULL)
                ifa_acquire(ifa, psref);
        pserialize_read_exit(s);

        return ifa;
}

/*
 * Default action when installing a route with a Link Level gateway.
 * Lookup an appropriate real ifa to point to.
 * This should be moved to /sys/net/link.c eventually.
 */
void
link_rtrequest(int cmd, struct rtentry *rt, const struct rt_addrinfo *info)
{
        struct ifaddr *ifa;
        const struct sockaddr *dst;
        struct ifnet *ifp;
        struct psref psref;

        if (cmd != RTM_ADD || ISSET(info->rti_flags, RTF_DONTCHANGEIFA))
                return;
        ifp = rt->rt_ifa->ifa_ifp;
        dst = rt_getkey(rt);
        if ((ifa = ifaof_ifpforaddr_psref(dst, ifp, &psref)) != NULL) {
                rt_replace_ifa(rt, ifa);
                if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
                        ifa->ifa_rtrequest(cmd, rt, info);
                ifa_release(ifa, &psref);
        }
}

/*
 * bitmask macros to manage a densely packed link_state change queue.
 * Because we need to store LINK_STATE_UNKNOWN(0), LINK_STATE_DOWN(1) and
 * LINK_STATE_UP(2) we need 2 bits for each state change.
 * As a state change to store is 0, treat all bits set as an unset item.
 */
#define LQ_ITEM_BITS                2
#define LQ_ITEM_MASK                ((1 << LQ_ITEM_BITS) - 1)
#define LQ_MASK(i)                (LQ_ITEM_MASK << (i) * LQ_ITEM_BITS)
#define LINK_STATE_UNSET        LQ_ITEM_MASK
#define LQ_ITEM(q, i)                (((q) & LQ_MASK((i))) >> (i) * LQ_ITEM_BITS)
#define LQ_STORE(q, i, v)                                                      \
        do {                                                                      \
                (q) &= ~LQ_MASK((i));                                              \
                (q) |= (v) << (i) * LQ_ITEM_BITS;                              \
        } while (0 /* CONSTCOND */)
#define LQ_MAX(q)                ((sizeof((q)) * NBBY) / LQ_ITEM_BITS)
#define LQ_POP(q, v)                                                              \
        do {                                                                      \
                (v) = LQ_ITEM((q), 0);                                              \
                (q) >>= LQ_ITEM_BITS;                                              \
                (q) |= LINK_STATE_UNSET << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS;  \
        } while (0 /* CONSTCOND */)
#define LQ_PUSH(q, v)                                                              \
        do {                                                                      \
                (q) >>= LQ_ITEM_BITS;                                              \
                (q) |= (v) << (LQ_MAX((q)) - 1) * LQ_ITEM_BITS;                      \
        } while (0 /* CONSTCOND */)
#define LQ_FIND_UNSET(q, i)                                                      \
        for ((i) = 0; i < LQ_MAX((q)); (i)++) {                                      \
                if (LQ_ITEM((q), (i)) == LINK_STATE_UNSET)                      \
                        break;                                                      \
        }

/*
 * Handle a change in the interface link state and
 * queue notifications.
 */
void
if_link_state_change(struct ifnet *ifp, int link_state)
{
        int idx;

        /* Ensure change is to a valid state */
        switch (link_state) {
        case LINK_STATE_UNKNOWN:        /* FALLTHROUGH */
        case LINK_STATE_DOWN:                /* FALLTHROUGH */
        case LINK_STATE_UP:
                break;
        default:
#ifdef DEBUG
                printf("%s: invalid link state %d\n",
                    ifp->if_xname, link_state);
#endif
                return;
        }

        IF_LINK_STATE_CHANGE_LOCK(ifp);

        /* Find the last unset event in the queue. */
        LQ_FIND_UNSET(ifp->if_link_queue, idx);

        if (idx == 0) {
                /*
                 * There is no queue of link state changes.
                 * As we have the lock we can safely compare against the
                 * current link state and return if the same.
                 * Otherwise, if scheduled is true then the interface is being
                 * detached and the queue is being drained so we need
                 * to avoid queuing more work.
                 */
                 if (ifp->if_link_state == link_state || ifp->if_link_scheduled)
                        goto out;
        } else {
                /* Ensure link_state doesn't match the last queued state. */
                if (LQ_ITEM(ifp->if_link_queue, idx - 1) == (uint8_t)link_state)
                        goto out;
        }

        /* Handle queue overflow. */
        if (idx == LQ_MAX(ifp->if_link_queue)) {
                uint8_t lost;

                /*
                 * The DOWN state must be protected from being pushed off
                 * the queue to ensure that userland will always be
                 * in a sane state.
                 * Because DOWN is protected, there is no need to protect
                 * UNKNOWN.
                 * It should be invalid to change from any other state to
                 * UNKNOWN anyway ...
                 */
                lost = LQ_ITEM(ifp->if_link_queue, 0);
                LQ_PUSH(ifp->if_link_queue, (uint8_t)link_state);
                if (lost == LINK_STATE_DOWN) {
                        lost = LQ_ITEM(ifp->if_link_queue, 0);
                        LQ_STORE(ifp->if_link_queue, 0, LINK_STATE_DOWN);
                }
                printf("%s: lost link state change %s\n",
                    ifp->if_xname,
                    lost == LINK_STATE_UP ? "UP" :
                    lost == LINK_STATE_DOWN ? "DOWN" :
                    "UNKNOWN");
        } else
                LQ_STORE(ifp->if_link_queue, idx, (uint8_t)link_state);

        if (ifp->if_link_scheduled)
                goto out;

        ifp->if_link_scheduled = true;
        workqueue_enqueue(ifnet_link_state_wq, &ifp->if_link_work, NULL);

out:
        IF_LINK_STATE_CHANGE_UNLOCK(ifp);
}

/*
 * Handle interface link state change notifications.
 */
static void
if_link_state_change_process(struct ifnet *ifp, int link_state)
{
        struct domain *dp;
        const int s = splnet();
        bool notify;

        KASSERT(!cpu_intr_p());

        IF_LINK_STATE_CHANGE_LOCK(ifp);

        /* Ensure the change is still valid. */
        if (ifp->if_link_state == link_state) {
                IF_LINK_STATE_CHANGE_UNLOCK(ifp);
                splx(s);
                return;
        }

#ifdef DEBUG
        log(LOG_DEBUG, "%s: link state %s (was %s)\n", ifp->if_xname,
                link_state == LINK_STATE_UP ? "UP" :
                link_state == LINK_STATE_DOWN ? "DOWN" :
                "UNKNOWN",
                ifp->if_link_state == LINK_STATE_UP ? "UP" :
                ifp->if_link_state == LINK_STATE_DOWN ? "DOWN" :
                "UNKNOWN");
#endif

        /*
         * When going from UNKNOWN to UP, we need to mark existing
         * addresses as tentative and restart DAD as we may have
         * erroneously not found a duplicate.
         *
         * This needs to happen before rt_ifmsg to avoid a race where
         * listeners would have an address and expect it to work right
         * away.
         */
        notify = (link_state == LINK_STATE_UP &&
            ifp->if_link_state == LINK_STATE_UNKNOWN);
        ifp->if_link_state = link_state;
        /* The following routines may sleep so release the spin mutex */
        IF_LINK_STATE_CHANGE_UNLOCK(ifp);

        KERNEL_LOCK_UNLESS_NET_MPSAFE();
        if (notify) {
                DOMAIN_FOREACH(dp) {
                        if (dp->dom_if_link_state_change != NULL)
                                dp->dom_if_link_state_change(ifp,
                                    LINK_STATE_DOWN);
                }
        }

        /* Notify that the link state has changed. */
        rt_ifmsg(ifp);

        simplehook_dohooks(ifp->if_linkstate_hooks);

        DOMAIN_FOREACH(dp) {
                if (dp->dom_if_link_state_change != NULL)
                        dp->dom_if_link_state_change(ifp, link_state);
        }
        KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
        splx(s);
}

/*
 * Process the interface link state change queue.
 */
static void
if_link_state_change_work(struct work *work, void *arg)
{
        struct ifnet *ifp = container_of(work, struct ifnet, if_link_work);
        uint8_t state;

        KERNEL_LOCK_UNLESS_NET_MPSAFE();
        const int s = splnet();

        /*
         * Pop a link state change from the queue and process it.
         * If there is nothing to process then if_detach() has been called.
         * We keep if_link_scheduled = true so the queue can safely drain
         * without more work being queued.
         */
        IF_LINK_STATE_CHANGE_LOCK(ifp);
        LQ_POP(ifp->if_link_queue, state);
        IF_LINK_STATE_CHANGE_UNLOCK(ifp);
        if (state == LINK_STATE_UNSET)
                goto out;

        if_link_state_change_process(ifp, state);

        /* If there is a link state change to come, schedule it. */
        IF_LINK_STATE_CHANGE_LOCK(ifp);
        if (LQ_ITEM(ifp->if_link_queue, 0) != LINK_STATE_UNSET) {
                ifp->if_link_scheduled = true;
                workqueue_enqueue(ifnet_link_state_wq, &ifp->if_link_work, NULL);
        } else
                ifp->if_link_scheduled = false;
        IF_LINK_STATE_CHANGE_UNLOCK(ifp);

out:
        splx(s);
        KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
}

void *
if_linkstate_change_establish(struct ifnet *ifp, void (*fn)(void *), void *arg)
{
        khook_t *hk;

        hk = simplehook_establish(ifp->if_linkstate_hooks, fn, arg);

        return (void *)hk;
}

void
if_linkstate_change_disestablish(struct ifnet *ifp, void *vhook, kmutex_t *lock)
{

        simplehook_disestablish(ifp->if_linkstate_hooks, vhook, lock);
}

/*
 * Used to mark addresses on an interface as DETATCHED or TENTATIVE
 * and thus start Duplicate Address Detection without changing the
 * real link state.
 */
void
if_domain_link_state_change(struct ifnet *ifp, int link_state)
{
        struct domain *dp;

        const int s = splnet();
        KERNEL_LOCK_UNLESS_NET_MPSAFE();

        DOMAIN_FOREACH(dp) {
                if (dp->dom_if_link_state_change != NULL)
                        dp->dom_if_link_state_change(ifp, link_state);
        }

        splx(s);
        KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
}

/*
 * Default action when installing a local route on a point-to-point
 * interface.
 */
void
p2p_rtrequest(int req, struct rtentry *rt,
    __unused const struct rt_addrinfo *info)
{
        struct ifnet *ifp = rt->rt_ifp;
        struct ifaddr *ifa, *lo0ifa;
        int s = pserialize_read_enter();

        switch (req) {
        case RTM_ADD:
                if ((rt->rt_flags & RTF_LOCAL) == 0)
                        break;

                rt->rt_ifp = lo0ifp;

                if (ISSET(info->rti_flags, RTF_DONTCHANGEIFA))
                        break;

                IFADDR_READER_FOREACH(ifa, ifp) {
                        if (equal(rt_getkey(rt), ifa->ifa_addr))
                                break;
                }
                if (ifa == NULL)
                        break;

                /*
                 * Ensure lo0 has an address of the same family.
                 */
                IFADDR_READER_FOREACH(lo0ifa, lo0ifp) {
                        if (lo0ifa->ifa_addr->sa_family ==
                            ifa->ifa_addr->sa_family)
                                break;
                }
                if (lo0ifa == NULL)
                        break;

                /*
                 * Make sure to set rt->rt_ifa to the interface
                 * address we are using, otherwise we will have trouble
                 * with source address selection.
                 */
                if (ifa != rt->rt_ifa)
                        rt_replace_ifa(rt, ifa);
                break;
        case RTM_DELETE:
        default:
                break;
        }
        pserialize_read_exit(s);
}

static void
_if_down(struct ifnet *ifp)
{
        struct ifaddr *ifa;
        struct domain *dp;
        struct psref psref;

        ifp->if_flags &= ~IFF_UP;
        nanotime(&ifp->if_lastchange);

        const int bound = curlwp_bind();
        int s = pserialize_read_enter();
        IFADDR_READER_FOREACH(ifa, ifp) {
                ifa_acquire(ifa, &psref);
                pserialize_read_exit(s);

                pfctlinput(PRC_IFDOWN, ifa->ifa_addr);

                s = pserialize_read_enter();
                ifa_release(ifa, &psref);
        }
        pserialize_read_exit(s);
        curlwp_bindx(bound);

        IFQ_PURGE(&ifp->if_snd);
#if NCARP > 0
        if (ifp->if_carp)
                carp_carpdev_state(ifp);
#endif
        rt_ifmsg(ifp);
        DOMAIN_FOREACH(dp) {
                if (dp->dom_if_down)
                        dp->dom_if_down(ifp);
        }
}

static void
if_down_deactivated(struct ifnet *ifp)
{

        KASSERT(if_is_deactivated(ifp));
        _if_down(ifp);
}

void
if_down_locked(struct ifnet *ifp)
{

        KASSERT(IFNET_LOCKED(ifp));
        _if_down(ifp);
}

/*
 * Mark an interface down and notify protocols of
 * the transition.
 * NOTE: must be called at splsoftnet or equivalent.
 */
void
if_down(struct ifnet *ifp)
{

        IFNET_LOCK(ifp);
        if_down_locked(ifp);
        IFNET_UNLOCK(ifp);
}

/*
 * Must be called with holding if_ioctl_lock.
 */
static void
if_up_locked(struct ifnet *ifp)
{
#ifdef notyet
        struct ifaddr *ifa;
#endif
        struct domain *dp;

        KASSERT(IFNET_LOCKED(ifp));

        KASSERT(!if_is_deactivated(ifp));
        ifp->if_flags |= IFF_UP;
        nanotime(&ifp->if_lastchange);
#ifdef notyet
        /* this has no effect on IP, and will kill all ISO connections XXX */
        IFADDR_READER_FOREACH(ifa, ifp)
                pfctlinput(PRC_IFUP, ifa->ifa_addr);
#endif
#if NCARP > 0
        if (ifp->if_carp)
                carp_carpdev_state(ifp);
#endif
        rt_ifmsg(ifp);
        DOMAIN_FOREACH(dp) {
                if (dp->dom_if_up)
                        dp->dom_if_up(ifp);
        }
}

/*
 * Handle interface slowtimo timer routine.  Called
 * from softclock, we decrement timer (if set) and
 * call the appropriate interface routine on expiration.
 */
static bool
if_slowtimo_countdown(struct ifnet *ifp)
{
        bool fire = false;
        const int s = splnet();
        KERNEL_LOCK(1, NULL);
        if (ifp->if_timer != 0 && --ifp->if_timer == 0)
                fire = true;
        KERNEL_UNLOCK_ONE(NULL);
        splx(s);

        return fire;
}

static void
if_slowtimo_intr(void *arg)
{
        struct ifnet *ifp = arg;
        struct if_slowtimo_data *isd = ifp->if_slowtimo_data;

        mutex_enter(&isd->isd_lock);
        if (!isd->isd_dying) {
                if (isd->isd_trigger || if_slowtimo_countdown(ifp)) {
                        if (!isd->isd_queued) {
                                isd->isd_queued = true;
                                workqueue_enqueue(if_slowtimo_wq,
                                    &isd->isd_work, NULL);
                        }
                } else {
                        callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ);
                }
        }
        mutex_exit(&isd->isd_lock);
}

static void
if_slowtimo_work(struct work *work, void *arg)
{
        struct if_slowtimo_data *isd =
            container_of(work, struct if_slowtimo_data, isd_work);
        struct ifnet *ifp = isd->isd_ifp;
        const int s = splnet();
        KERNEL_LOCK(1, NULL);
        (*ifp->if_slowtimo)(ifp);
        KERNEL_UNLOCK_ONE(NULL);
        splx(s);

        mutex_enter(&isd->isd_lock);
        if (isd->isd_trigger) {
                isd->isd_trigger = false;
                printf("%s: watchdog triggered\n", ifp->if_xname);
        }
        isd->isd_queued = false;
        if (!isd->isd_dying)
                callout_schedule(&isd->isd_ch, hz / IFNET_SLOWHZ);
        mutex_exit(&isd->isd_lock);
}

static int
sysctl_if_watchdog(SYSCTLFN_ARGS)
{
        struct sysctlnode node = *rnode;
        struct ifnet *ifp = node.sysctl_data;
        struct if_slowtimo_data *isd = ifp->if_slowtimo_data;
        int arg = 0;
        int error;

        node.sysctl_data = &arg;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;
        if (arg) {
                mutex_enter(&isd->isd_lock);
                KASSERT(!isd->isd_dying);
                isd->isd_trigger = true;
                callout_schedule(&isd->isd_ch, 0);
                mutex_exit(&isd->isd_lock);
        }

        return 0;
}

static void
sysctl_watchdog_setup(struct ifnet *ifp)
{
        struct sysctllog **clog = &ifp->if_sysctl_log;
        const struct sysctlnode *rnode;

        if (sysctl_createv(clog, 0, NULL, &rnode,
                CTLFLAG_PERMANENT, CTLTYPE_NODE, "interfaces",
                SYSCTL_DESCR("Per-interface controls"),
                NULL, 0, NULL, 0,
                CTL_NET, CTL_CREATE, CTL_EOL) != 0)
                goto bad;
        if (sysctl_createv(clog, 0, &rnode, &rnode,
                CTLFLAG_PERMANENT, CTLTYPE_NODE, ifp->if_xname,
                SYSCTL_DESCR("Interface controls"),
                NULL, 0, NULL, 0,
                CTL_CREATE, CTL_EOL) != 0)
                goto bad;
        if (sysctl_createv(clog, 0, &rnode, &rnode,
                CTLFLAG_PERMANENT, CTLTYPE_NODE, "watchdog",
                SYSCTL_DESCR("Interface watchdog controls"),
                NULL, 0, NULL, 0,
                CTL_CREATE, CTL_EOL) != 0)
                goto bad;
        if (sysctl_createv(clog, 0, &rnode, NULL,
                CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "trigger",
                SYSCTL_DESCR("Trigger watchdog timeout"),
                sysctl_if_watchdog, 0, (int *)ifp, 0,
                CTL_CREATE, CTL_EOL) != 0)
                goto bad;

        return;

bad:
        printf("%s: could not attach sysctl watchdog nodes\n", ifp->if_xname);
}

/*
 * Mark an interface up and notify protocols of
 * the transition.
 * NOTE: must be called at splsoftnet or equivalent.
 */
void
if_up(struct ifnet *ifp)
{

        IFNET_LOCK(ifp);
        if_up_locked(ifp);
        IFNET_UNLOCK(ifp);
}

/*
 * Set/clear promiscuous mode on interface ifp based on the truth value
 * of pswitch.  The calls are reference counted so that only the first
 * "on" request actually has an effect, as does the final "off" request.
 * Results are undefined if the "off" and "on" requests are not matched.
 */
int
ifpromisc_locked(struct ifnet *ifp, int pswitch)
{
        int pcount, ret = 0;
        u_short nflags;

        KASSERT(IFNET_LOCKED(ifp));

        pcount = ifp->if_pcount;
        if (pswitch) {
                /*
                 * Allow the device to be "placed" into promiscuous
                 * mode even if it is not configured up.  It will
                 * consult IFF_PROMISC when it is brought up.
                 */
                if (ifp->if_pcount++ != 0)
                        goto out;
                nflags = ifp->if_flags | IFF_PROMISC;
        } else {
                if (--ifp->if_pcount > 0)
                        goto out;
                nflags = ifp->if_flags & ~IFF_PROMISC;
        }
        ret = if_flags_set(ifp, nflags);
        /* Restore interface state if not successful. */
        if (ret != 0) {
                ifp->if_pcount = pcount;
        }
out:
        return ret;
}

int
ifpromisc(struct ifnet *ifp, int pswitch)
{
        int e;

        IFNET_LOCK(ifp);
        e = ifpromisc_locked(ifp, pswitch);
        IFNET_UNLOCK(ifp);

        return e;
}

/*
 * if_ioctl(ifp, cmd, data)
 *
 *        Apply an ioctl command to the interface.  Returns 0 on success,
 *        nonzero errno(3) number on failure.
 *
 *        For SIOCADDMULTI/SIOCDELMULTI, caller need not hold locks -- it
 *        is the driver's responsibility to take any internal locks.
 *        (Kernel logic should generally invoke these only through
 *        if_mcast_op.)
 *
 *        For all other ioctls, caller must hold ifp->if_ioctl_lock,
 *        a.k.a. IFNET_LOCK.  May sleep.
 */
int
if_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{

        switch (cmd) {
        case SIOCADDMULTI:
        case SIOCDELMULTI:
                break;
        default:
                KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);
        }

        return (*ifp->if_ioctl)(ifp, cmd, data);
}

/*
 * if_init(ifp)
 *
 *        Prepare the hardware underlying ifp to process packets
 *        according to its current configuration.  Returns 0 on success,
 *        nonzero errno(3) number on failure.
 *
 *        May sleep.  Caller must hold ifp->if_ioctl_lock, a.k.a
 *        IFNET_LOCK.
 */
int
if_init(struct ifnet *ifp)
{

        KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);

        return (*ifp->if_init)(ifp);
}

/*
 * if_stop(ifp, disable)
 *
 *        Stop the hardware underlying ifp from processing packets.
 *
 *        If disable is true, ... XXX(?)
 *
 *        May sleep.  Caller must hold ifp->if_ioctl_lock, a.k.a
 *        IFNET_LOCK.
 */
void
if_stop(struct ifnet *ifp, int disable)
{

        KASSERTMSG(IFNET_LOCKED(ifp), "%s", ifp->if_xname);

        (*ifp->if_stop)(ifp, disable);
}

/*
 * Map interface name to
 * interface structure pointer.
 */
struct ifnet *
ifunit(const char *name)
{
        struct ifnet *ifp;
        const char *cp = name;
        u_int unit = 0;
        u_int i;

        /*
         * If the entire name is a number, treat it as an ifindex.
         */
        for (i = 0; i < IFNAMSIZ && *cp >= '0' && *cp <= '9'; i++, cp++) {
                unit = unit * 10 + (*cp - '0');
        }

        /*
         * If the number took all of the name, then it's a valid ifindex.
         */
        if (i == IFNAMSIZ || (cp != name && *cp == '\0'))
                return if_byindex(unit);

        ifp = NULL;
        const int s = pserialize_read_enter();
        IFNET_READER_FOREACH(ifp) {
                if (if_is_deactivated(ifp))
                        continue;
                 if (strcmp(ifp->if_xname, name) == 0)
                        goto out;
        }
out:
        pserialize_read_exit(s);
        return ifp;
}

/*
 * Get a reference of an ifnet object by an interface name.
 * The returned reference is protected by psref(9). The caller
 * must release a returned reference by if_put after use.
 */
struct ifnet *
if_get(const char *name, struct psref *psref)
{
        struct ifnet *ifp;
        const char *cp = name;
        u_int unit = 0;
        u_int i;

        /*
         * If the entire name is a number, treat it as an ifindex.
         */
        for (i = 0; i < IFNAMSIZ && *cp >= '0' && *cp <= '9'; i++, cp++) {
                unit = unit * 10 + (*cp - '0');
        }

        /*
         * If the number took all of the name, then it's a valid ifindex.
         */
        if (i == IFNAMSIZ || (cp != name && *cp == '\0'))
                return if_get_byindex(unit, psref);

        ifp = NULL;
        const int s = pserialize_read_enter();
        IFNET_READER_FOREACH(ifp) {
                if (if_is_deactivated(ifp))
                        continue;
                if (strcmp(ifp->if_xname, name) == 0) {
                        PSREF_DEBUG_FILL_RETURN_ADDRESS(psref);
                        psref_acquire(psref, &ifp->if_psref,
                            ifnet_psref_class);
                        goto out;
                }
        }
out:
        pserialize_read_exit(s);
        return ifp;
}

/*
 * Release a reference of an ifnet object given by if_get, if_get_byindex
 * or if_get_bylla.
 */
void
if_put(const struct ifnet *ifp, struct psref *psref)
{

        if (ifp == NULL)
                return;

        psref_release(psref, &ifp->if_psref, ifnet_psref_class);
}

/*
 * Return ifp having idx. Return NULL if not found.  Normally if_byindex
 * should be used.
 */
ifnet_t *
_if_byindex(u_int idx)
{

        return (__predict_true(idx < if_indexlim)) ? ifindex2ifnet[idx] : NULL;
}

/*
 * Return ifp having idx. Return NULL if not found or the found ifp is
 * already deactivated.
 */
ifnet_t *
if_byindex(u_int idx)
{
        ifnet_t *ifp;

        ifp = _if_byindex(idx);
        if (ifp != NULL && if_is_deactivated(ifp))
                ifp = NULL;
        return ifp;
}

/*
 * Get a reference of an ifnet object by an interface index.
 * The returned reference is protected by psref(9). The caller
 * must release a returned reference by if_put after use.
 */
ifnet_t *
if_get_byindex(u_int idx, struct psref *psref)
{
        ifnet_t *ifp;

        const int s = pserialize_read_enter();
        ifp = if_byindex(idx);
        if (__predict_true(ifp != NULL)) {
                PSREF_DEBUG_FILL_RETURN_ADDRESS(psref);
                psref_acquire(psref, &ifp->if_psref, ifnet_psref_class);
        }
        pserialize_read_exit(s);

        return ifp;
}

ifnet_t *
if_get_bylla(const void *lla, unsigned char lla_len, struct psref *psref)
{
        ifnet_t *ifp;

        const int s = pserialize_read_enter();
        IFNET_READER_FOREACH(ifp) {
                if (if_is_deactivated(ifp))
                        continue;
                if (ifp->if_addrlen != lla_len)
                        continue;
                if (memcmp(lla, CLLADDR(ifp->if_sadl), lla_len) == 0) {
                        psref_acquire(psref, &ifp->if_psref,
                            ifnet_psref_class);
                        break;
                }
        }
        pserialize_read_exit(s);

        return ifp;
}

/*
 * Note that it's safe only if the passed ifp is guaranteed to not be freed,
 * for example using pserialize or the ifp is already held or some other
 * object is held which guarantes the ifp to not be freed indirectly.
 */
void
if_acquire(struct ifnet *ifp, struct psref *psref)
{

        KASSERT(ifp->if_index != 0);
        psref_acquire(psref, &ifp->if_psref, ifnet_psref_class);
}

bool
if_held(struct ifnet *ifp)
{

        return psref_held(&ifp->if_psref, ifnet_psref_class);
}

/*
 * Some tunnel interfaces can nest, e.g. IPv4 over IPv4 gif(4) tunnel over IPv4.
 * Check the tunnel nesting count.
 * Return > 0, if tunnel nesting count is more than limit.
 * Return 0, if tunnel nesting count is equal or less than limit.
 */
int
if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, int limit)
{
        struct m_tag *mtag;
        int *count;

        mtag = m_tag_find(m, PACKET_TAG_TUNNEL_INFO);
        if (mtag != NULL) {
                count = (int *)(mtag + 1);
                if (++(*count) > limit) {
                        log(LOG_NOTICE,
                            "%s: recursively called too many times(%d)\n",
                            ifp->if_xname, *count);
                        return EIO;
                }
        } else {
                mtag = m_tag_get(PACKET_TAG_TUNNEL_INFO, sizeof(*count),
                    M_NOWAIT);
                if (mtag != NULL) {
                        m_tag_prepend(m, mtag);
                        count = (int *)(mtag + 1);
                        *count = 0;
                } else {
                        log(LOG_DEBUG,
                            "%s: m_tag_get() failed, recursion calls are not prevented.\n",
                            ifp->if_xname);
                }
        }

        return 0;
}

static void
if_tunnel_ro_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
{
        struct tunnel_ro *tro = p;

        tro->tr_ro = kmem_zalloc(sizeof(*tro->tr_ro), KM_SLEEP);
        tro->tr_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
}

static void
if_tunnel_ro_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
{
        struct tunnel_ro *tro = p;

        rtcache_free(tro->tr_ro);
        kmem_free(tro->tr_ro, sizeof(*tro->tr_ro));

        mutex_obj_free(tro->tr_lock);
}

percpu_t *
if_tunnel_alloc_ro_percpu(void)
{

        return percpu_create(sizeof(struct tunnel_ro),
            if_tunnel_ro_init_pc, if_tunnel_ro_fini_pc, NULL);
}

void
if_tunnel_free_ro_percpu(percpu_t *ro_percpu)
{

        percpu_free(ro_percpu, sizeof(struct tunnel_ro));
}


static void
if_tunnel_rtcache_free_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
{
        struct tunnel_ro *tro = p;

        mutex_enter(tro->tr_lock);
        rtcache_free(tro->tr_ro);
        mutex_exit(tro->tr_lock);
}

void if_tunnel_ro_percpu_rtcache_free(percpu_t *ro_percpu)
{

        percpu_foreach(ro_percpu, if_tunnel_rtcache_free_pc, NULL);
}

void
if_export_if_data(ifnet_t * const ifp, struct if_data *ifi, bool zero_stats)
{

        /* Collect the volatile stats first; this zeros *ifi. */
        if_stats_to_if_data(ifp, ifi, zero_stats);

        ifi->ifi_type = ifp->if_type;
        ifi->ifi_addrlen = ifp->if_addrlen;
        ifi->ifi_hdrlen = ifp->if_hdrlen;
        ifi->ifi_link_state = ifp->if_link_state;
        ifi->ifi_mtu = ifp->if_mtu;
        ifi->ifi_metric = ifp->if_metric;
        ifi->ifi_baudrate = ifp->if_baudrate;
        ifi->ifi_lastchange = ifp->if_lastchange;
}

/* common */
int
ifioctl_common(struct ifnet *ifp, u_long cmd, void *data)
{
        struct ifreq *ifr;
        struct ifcapreq *ifcr;
        struct ifdatareq *ifdr;
        unsigned short flags;
        char *descr;
        int error;

        switch (cmd) {
        case SIOCSIFCAP:
                ifcr = data;
                if ((ifcr->ifcr_capenable & ~ifp->if_capabilities) != 0)
                        return EINVAL;

                if (ifcr->ifcr_capenable == ifp->if_capenable)
                        return 0;

                ifp->if_capenable = ifcr->ifcr_capenable;

                /* Pre-compute the checksum flags mask. */
                ifp->if_csum_flags_tx = 0;
                ifp->if_csum_flags_rx = 0;
                if (ifp->if_capenable & IFCAP_CSUM_IPv4_Tx)
                        ifp->if_csum_flags_tx |= M_CSUM_IPv4;
                if (ifp->if_capenable & IFCAP_CSUM_IPv4_Rx)
                        ifp->if_csum_flags_rx |= M_CSUM_IPv4;

                if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Tx)
                        ifp->if_csum_flags_tx |= M_CSUM_TCPv4;
                if (ifp->if_capenable & IFCAP_CSUM_TCPv4_Rx)
                        ifp->if_csum_flags_rx |= M_CSUM_TCPv4;

                if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Tx)
                        ifp->if_csum_flags_tx |= M_CSUM_UDPv4;
                if (ifp->if_capenable & IFCAP_CSUM_UDPv4_Rx)
                        ifp->if_csum_flags_rx |= M_CSUM_UDPv4;

                if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Tx)
                        ifp->if_csum_flags_tx |= M_CSUM_TCPv6;
                if (ifp->if_capenable & IFCAP_CSUM_TCPv6_Rx)
                        ifp->if_csum_flags_rx |= M_CSUM_TCPv6;

                if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Tx)
                        ifp->if_csum_flags_tx |= M_CSUM_UDPv6;
                if (ifp->if_capenable & IFCAP_CSUM_UDPv6_Rx)
                        ifp->if_csum_flags_rx |= M_CSUM_UDPv6;

                if (ifp->if_capenable & IFCAP_TSOv4)
                        ifp->if_csum_flags_tx |= M_CSUM_TSOv4;
                if (ifp->if_capenable & IFCAP_TSOv6)
                        ifp->if_csum_flags_tx |= M_CSUM_TSOv6;

#if NBRIDGE > 0
                if (ifp->if_bridge != NULL)
                        bridge_calc_csum_flags(ifp->if_bridge);
#endif

                if (ifp->if_flags & IFF_UP)
                        return ENETRESET;
                return 0;
        case SIOCSIFFLAGS:
                ifr = data;
                /*
                 * If if_is_mpsafe(ifp), KERNEL_LOCK isn't held here, but if_up
                 * and if_down aren't MP-safe yet, so we must hold the lock.
                 */
                KERNEL_LOCK_IF_IFP_MPSAFE(ifp);
                if (ifp->if_flags & IFF_UP && (ifr->ifr_flags & IFF_UP) == 0) {
                        const int s = splsoftnet();
                        if_down_locked(ifp);
                        splx(s);
                }
                if (ifr->ifr_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) {
                        const int s = splsoftnet();
                        if_up_locked(ifp);
                        splx(s);
                }
                KERNEL_UNLOCK_IF_IFP_MPSAFE(ifp);
                flags = (ifp->if_flags & IFF_CANTCHANGE) |
                    (ifr->ifr_flags &~ IFF_CANTCHANGE);
                if (ifp->if_flags != flags) {
                        ifp->if_flags = flags;
                        /* Notify that the flags have changed. */
                        rt_ifmsg(ifp);
                }
                break;
        case SIOCGIFFLAGS:
                ifr = data;
                ifr->ifr_flags = ifp->if_flags;
                break;

        case SIOCGIFMETRIC:
                ifr = data;
                ifr->ifr_metric = ifp->if_metric;
                break;

        case SIOCGIFMTU:
                ifr = data;
                ifr->ifr_mtu = ifp->if_mtu;
                break;

        case SIOCGIFDLT:
                ifr = data;
                ifr->ifr_dlt = ifp->if_dlt;
                break;

        case SIOCGIFCAP:
                ifcr = data;
                ifcr->ifcr_capabilities = ifp->if_capabilities;
                ifcr->ifcr_capenable = ifp->if_capenable;
                break;

        case SIOCSIFMETRIC:
                ifr = data;
                ifp->if_metric = ifr->ifr_metric;
                break;

        case SIOCGIFDATA:
                ifdr = data;
                if_export_if_data(ifp, &ifdr->ifdr_data, false);
                break;

        case SIOCGIFINDEX:
                ifr = data;
                ifr->ifr_index = ifp->if_index;
                break;

        case SIOCZIFDATA:
                ifdr = data;
                if_export_if_data(ifp, &ifdr->ifdr_data, true);
                getnanotime(&ifp->if_lastchange);
                break;
        case SIOCSIFMTU:
                ifr = data;
                if (ifp->if_mtu == ifr->ifr_mtu)
                        break;
                ifp->if_mtu = ifr->ifr_mtu;
                return ENETRESET;
        case SIOCSIFDESCR:
                error = kauth_authorize_network(kauth_cred_get(),
                    KAUTH_NETWORK_INTERFACE,
                    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, KAUTH_ARG(cmd),
                    NULL);
                if (error)
                        return error;

                ifr = data;

                if (ifr->ifr_buflen > IFDESCRSIZE)
                        return ENAMETOOLONG;

                if (ifr->ifr_buf == NULL || ifr->ifr_buflen == 0) {
                        /* unset description */
                        descr = NULL;
                } else {
                        descr = kmem_zalloc(IFDESCRSIZE, KM_SLEEP);
                        /*
                         * copy (IFDESCRSIZE - 1) bytes to ensure
                         * terminating nul
                         */
                        error = copyin(ifr->ifr_buf, descr, IFDESCRSIZE - 1);
                        if (error) {
                                kmem_free(descr, IFDESCRSIZE);
                                return error;
                        }
                }

                if (ifp->if_description != NULL)
                        kmem_free(ifp->if_description, IFDESCRSIZE);

                ifp->if_description = descr;
                break;

         case SIOCGIFDESCR:
                ifr = data;
                descr = ifp->if_description;

                if (descr == NULL)
                        return ENOMSG;

                if (ifr->ifr_buflen < IFDESCRSIZE)
                        return EINVAL;

                error = copyout(descr, ifr->ifr_buf, IFDESCRSIZE);
                if (error)
                        return error;
                 break;

        default:
                return ENOTTY;
        }
        return 0;
}

int
ifaddrpref_ioctl(struct socket *so, u_long cmd, void *data, struct ifnet *ifp)
{
        struct if_addrprefreq *ifap = (struct if_addrprefreq *)data;
        struct ifaddr *ifa;
        const struct sockaddr *any, *sa;
        union {
                struct sockaddr sa;
                struct sockaddr_storage ss;
        } u, v;
        int s, error = 0;

        switch (cmd) {
        case SIOCSIFADDRPREF:
                error = kauth_authorize_network(kauth_cred_get(),
                    KAUTH_NETWORK_INTERFACE,
                    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, KAUTH_ARG(cmd),
                    NULL);
                if (error)
                        return error;
                break;
        case SIOCGIFADDRPREF:
                break;
        default:
                return EOPNOTSUPP;
        }

        /* sanity checks */
        if (data == NULL || ifp == NULL) {
                panic("invalid argument to %s", __func__);
                /*NOTREACHED*/
        }

        /* address must be specified on ADD and DELETE */
        sa = sstocsa(&ifap->ifap_addr);
        if (sa->sa_family != sofamily(so))
                return EINVAL;
        if ((any = sockaddr_any(sa)) == NULL || sa->sa_len != any->sa_len)
                return EINVAL;

        sockaddr_externalize(&v.sa, sizeof(v.ss), sa);

        s = pserialize_read_enter();
        IFADDR_READER_FOREACH(ifa, ifp) {
                if (ifa->ifa_addr->sa_family != sa->sa_family)
                        continue;
                sockaddr_externalize(&u.sa, sizeof(u.ss), ifa->ifa_addr);
                if (sockaddr_cmp(&u.sa, &v.sa) == 0)
                        break;
        }
        if (ifa == NULL) {
                error = EADDRNOTAVAIL;
                goto out;
        }

        switch (cmd) {
        case SIOCSIFADDRPREF:
                ifa->ifa_preference = ifap->ifap_preference;
                goto out;
        case SIOCGIFADDRPREF:
                /* fill in the if_laddrreq structure */
                (void)sockaddr_copy(sstosa(&ifap->ifap_addr),
                    sizeof(ifap->ifap_addr), ifa->ifa_addr);
                ifap->ifap_preference = ifa->ifa_preference;
                goto out;
        default:
                error = EOPNOTSUPP;
        }
out:
        pserialize_read_exit(s);
        return error;
}

/*
 * Interface ioctls.
 */
static int
doifioctl(struct socket *so, u_long cmd, void *data, struct lwp *l)
{
        struct ifnet *ifp;
        struct ifreq *ifr;
        int error = 0;
        u_long ocmd = cmd;
        u_short oif_flags;
        struct ifreq ifrb;
        struct oifreq *oifr = NULL;
        int r;
        struct psref psref;
        bool do_if43_post = false;
        bool do_ifm80_post = false;

        switch (cmd) {
        case SIOCGIFCONF:
                return ifconf(cmd, data);
        case SIOCINITIFADDR:
                return EPERM;
        default:
                MODULE_HOOK_CALL(uipc_syscalls_40_hook, (cmd, data), enosys(),
                    error);
                if (error != ENOSYS)
                        return error;
                MODULE_HOOK_CALL(uipc_syscalls_50_hook, (l, cmd, data),
                    enosys(), error);
                if (error != ENOSYS)
                        return error;
                error = 0;
                break;
        }

        ifr = data;
        /* Pre-conversion */
        MODULE_HOOK_CALL(if_cvtcmd_43_hook, (&cmd, ocmd), enosys(), error);
        if (cmd != ocmd) {
                oifr = data;
                data = ifr = &ifrb;
                IFREQO2N_43(oifr, ifr);
                do_if43_post = true;
        }
        MODULE_HOOK_CALL(ifmedia_80_pre_hook, (ifr, &cmd, &do_ifm80_post),
            enosys(), error);

        switch (cmd) {
        case SIOCIFCREATE:
        case SIOCIFDESTROY: {
                const int bound = curlwp_bind();
                if (l != NULL) {
                        ifp = if_get(ifr->ifr_name, &psref);
                        error = kauth_authorize_network(l->l_cred,
                            KAUTH_NETWORK_INTERFACE,
                            KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp,
                            KAUTH_ARG(cmd), NULL);
                        if (ifp != NULL)
                                if_put(ifp, &psref);
                        if (error != 0) {
                                curlwp_bindx(bound);
                                return error;
                        }
                }
                KERNEL_LOCK_UNLESS_NET_MPSAFE();
                mutex_enter(&if_clone_mtx);
                r = (cmd == SIOCIFCREATE) ?
                        if_clone_create(ifr->ifr_name) :
                        if_clone_destroy(ifr->ifr_name);
                mutex_exit(&if_clone_mtx);
                KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
                curlwp_bindx(bound);
                return r;
            }
        case SIOCIFGCLONERS: {
                struct if_clonereq *req = (struct if_clonereq *)data;
                return if_clone_list(req->ifcr_count, req->ifcr_buffer,
                    &req->ifcr_total);
            }
        }

        if ((cmd & IOC_IN) == 0 || IOCPARM_LEN(cmd) < sizeof(ifr->ifr_name))
                return EINVAL;

        const int bound = curlwp_bind();
        ifp = if_get(ifr->ifr_name, &psref);
        if (ifp == NULL) {
                curlwp_bindx(bound);
                return ENXIO;
        }

        switch (cmd) {
        case SIOCALIFADDR:
        case SIOCDLIFADDR:
        case SIOCSIFADDRPREF:
        case SIOCSIFFLAGS:
        case SIOCSIFCAP:
        case SIOCSIFMETRIC:
        case SIOCZIFDATA:
        case SIOCSIFMTU:
        case SIOCSIFPHYADDR:
        case SIOCDIFPHYADDR:
#ifdef INET6
        case SIOCSIFPHYADDR_IN6:
#endif
        case SIOCSLIFPHYADDR:
        case SIOCADDMULTI:
        case SIOCDELMULTI:
        case SIOCSETHERCAP:
        case SIOCSIFMEDIA:
        case SIOCSDRVSPEC:
        case SIOCG80211:
        case SIOCS80211:
        case SIOCS80211NWID:
        case SIOCS80211NWKEY:
        case SIOCS80211POWER:
        case SIOCS80211BSSID:
        case SIOCS80211CHANNEL:
        case SIOCSLINKSTR:
                if (l != NULL) {
                        error = kauth_authorize_network(l->l_cred,
                            KAUTH_NETWORK_INTERFACE,
                            KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp,
                            KAUTH_ARG(cmd), NULL);
                        if (error != 0)
                                goto out;
                }
        }

        oif_flags = ifp->if_flags;

        KERNEL_LOCK_UNLESS_IFP_MPSAFE(ifp);
        IFNET_LOCK(ifp);

        error = if_ioctl(ifp, cmd, data);
        if (error != ENOTTY)
                ;
        else if (so->so_proto == NULL)
                error = EOPNOTSUPP;
        else {
                KERNEL_LOCK_IF_IFP_MPSAFE(ifp);
                MODULE_HOOK_CALL(if_ifioctl_43_hook,
                             (so, ocmd, cmd, data, l), enosys(), error);
                if (error == ENOSYS)
                        error = (*so->so_proto->pr_usrreqs->pr_ioctl)(so,
                            cmd, data, ifp);
                KERNEL_UNLOCK_IF_IFP_MPSAFE(ifp);
        }

        if (((oif_flags ^ ifp->if_flags) & IFF_UP) != 0) {
                if ((ifp->if_flags & IFF_UP) != 0) {
                        const int s = splsoftnet();
                        if_up_locked(ifp);
                        splx(s);
                }
        }

        /* Post-conversion */
        if (do_ifm80_post && (error == 0))
                MODULE_HOOK_CALL(ifmedia_80_post_hook, (ifr, cmd),
                    enosys(), error);
        if (do_if43_post)
                IFREQN2O_43(oifr, ifr);

        IFNET_UNLOCK(ifp);
        KERNEL_UNLOCK_UNLESS_IFP_MPSAFE(ifp);
out:
        if_put(ifp, &psref);
        curlwp_bindx(bound);
        return error;
}

/*
 * Return interface configuration
 * of system.  List may be used
 * in later ioctl's (above) to get
 * other information.
 *
 * Each record is a struct ifreq.  Before the addition of
 * sockaddr_storage, the API rule was that sockaddr flavors that did
 * not fit would extend beyond the struct ifreq, with the next struct
 * ifreq starting sa_len beyond the struct sockaddr.  Because the
 * union in struct ifreq includes struct sockaddr_storage, every kind
 * of sockaddr must fit.  Thus, there are no longer any overlength
 * records.
 *
 * Records are added to the user buffer if they fit, and ifc_len is
 * adjusted to the length that was written.  Thus, the user is only
 * assured of getting the complete list if ifc_len on return is at
 * least sizeof(struct ifreq) less than it was on entry.
 *
 * If the user buffer pointer is NULL, this routine copies no data and
 * returns the amount of space that would be needed.
 *
 * Invariants:
 * ifrp points to the next part of the user's buffer to be used.  If
 * ifrp != NULL, space holds the number of bytes remaining that we may
 * write at ifrp.  Otherwise, space holds the number of bytes that
 * would have been written had there been adequate space.
 */
/*ARGSUSED*/
static int
ifconf(u_long cmd, void *data)
{
        struct ifconf *ifc = (struct ifconf *)data;
        struct ifnet *ifp;
        struct ifaddr *ifa;
        struct ifreq ifr, *ifrp = NULL;
        int space = 0, error = 0;
        const int sz = (int)sizeof(struct ifreq);
        const bool docopy = ifc->ifc_req != NULL;
        struct psref psref;

        if (docopy) {
                if (ifc->ifc_len < 0)
                        return EINVAL;

                space = ifc->ifc_len;
                ifrp = ifc->ifc_req;
        }
        memset(&ifr, 0, sizeof(ifr));

        const int bound = curlwp_bind();
        int s = pserialize_read_enter();
        IFNET_READER_FOREACH(ifp) {
                psref_acquire(&psref, &ifp->if_psref, ifnet_psref_class);
                pserialize_read_exit(s);

                (void)strncpy(ifr.ifr_name, ifp->if_xname,
                    sizeof(ifr.ifr_name));
                if (ifr.ifr_name[sizeof(ifr.ifr_name) - 1] != '\0') {
                        error = ENAMETOOLONG;
                        goto release_exit;
                }
                if (IFADDR_READER_EMPTY(ifp)) {
                        /* Interface with no addresses - send zero sockaddr. */
                        memset(&ifr.ifr_addr, 0, sizeof(ifr.ifr_addr));
                        if (!docopy) {
                                space += sz;
                                goto next;
                        }
                        if (space >= sz) {
                                error = copyout(&ifr, ifrp, sz);
                                if (error != 0)
                                        goto release_exit;
                                ifrp++;
                                space -= sz;
                        }
                }

                s = pserialize_read_enter();
                IFADDR_READER_FOREACH(ifa, ifp) {
                        struct sockaddr *sa = ifa->ifa_addr;
                        /* all sockaddrs must fit in sockaddr_storage */
                        KASSERT(sa->sa_len <= sizeof(ifr.ifr_ifru));

                        if (!docopy) {
                                space += sz;
                                continue;
                        }
                        memcpy(&ifr.ifr_space, sa, sa->sa_len);
                        pserialize_read_exit(s);

                        if (space >= sz) {
                                error = copyout(&ifr, ifrp, sz);
                                if (error != 0)
                                        goto release_exit;
                                ifrp++; space -= sz;
                        }
                        s = pserialize_read_enter();
                }
                pserialize_read_exit(s);

        next:
                s = pserialize_read_enter();
                psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
        }
        pserialize_read_exit(s);
        curlwp_bindx(bound);

        if (docopy) {
                KASSERT(0 <= space && space <= ifc->ifc_len);
                ifc->ifc_len -= space;
        } else {
                KASSERT(space >= 0);
                ifc->ifc_len = space;
        }
        return (0);

release_exit:
        psref_release(&psref, &ifp->if_psref, ifnet_psref_class);
        curlwp_bindx(bound);
        return error;
}

int
ifreq_setaddr(u_long cmd, struct ifreq *ifr, const struct sockaddr *sa)
{
        uint8_t len = sizeof(ifr->ifr_ifru.ifru_space);
        struct ifreq ifrb;
        struct oifreq *oifr = NULL;
        u_long ocmd = cmd;
        int hook;

        MODULE_HOOK_CALL(if_cvtcmd_43_hook, (&cmd, ocmd), enosys(), hook);
        if (hook != ENOSYS) {
                if (cmd != ocmd) {
                        oifr = (struct oifreq *)(void *)ifr;
                        ifr = &ifrb;
                        IFREQO2N_43(oifr, ifr);
                                len = sizeof(oifr->ifr_addr);
                }
        }

        if (len < sa->sa_len)
                return EFBIG;

        memset(&ifr->ifr_addr, 0, len);
        sockaddr_copy(&ifr->ifr_addr, len, sa);

        if (cmd != ocmd)
                IFREQN2O_43(oifr, ifr);
        return 0;
}

/*
 * wrapper function for the drivers which doesn't have if_transmit().
 */
static int
if_transmit(struct ifnet *ifp, struct mbuf *m)
{
        int error;
        size_t pktlen = m->m_pkthdr.len;
        bool mcast = (m->m_flags & M_MCAST) != 0;

        const int s = splnet();

        IFQ_ENQUEUE(&ifp->if_snd, m, error);
        if (error != 0) {
                /* mbuf is already freed */
                goto out;
        }

        net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
        if_statadd_ref(nsr, if_obytes, pktlen);
        if (mcast)
                if_statinc_ref(nsr, if_omcasts);
        IF_STAT_PUTREF(ifp);

        if ((ifp->if_flags & IFF_OACTIVE) == 0)
                if_start_lock(ifp);
out:
        splx(s);

        return error;
}

int
if_transmit_lock(struct ifnet *ifp, struct mbuf *m)
{
        int error;

        kmsan_check_mbuf(m);

#ifdef ALTQ
        KERNEL_LOCK(1, NULL);
        if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
                error = if_transmit(ifp, m);
                KERNEL_UNLOCK_ONE(NULL);
        } else {
                KERNEL_UNLOCK_ONE(NULL);
                error = (*ifp->if_transmit)(ifp, m);
                /* mbuf is already freed */
        }
#else /* !ALTQ */
        error = (*ifp->if_transmit)(ifp, m);
        /* mbuf is already freed */
#endif /* !ALTQ */

        return error;
}

/*
 * Queue message on interface, and start output if interface
 * not yet active.
 */
int
ifq_enqueue(struct ifnet *ifp, struct mbuf *m)
{

        return if_transmit_lock(ifp, m);
}

/*
 * Queue message on interface, possibly using a second fast queue
 */
int
ifq_enqueue2(struct ifnet *ifp, struct ifqueue *ifq, struct mbuf *m)
{
        int error = 0;

        if (ifq != NULL
#ifdef ALTQ
            && ALTQ_IS_ENABLED(&ifp->if_snd) == 0
#endif
            ) {
                if (IF_QFULL(ifq)) {
                        IF_DROP(&ifp->if_snd);
                        m_freem(m);
                        if (error == 0)
                                error = ENOBUFS;
                } else
                        IF_ENQUEUE(ifq, m);
        } else
                IFQ_ENQUEUE(&ifp->if_snd, m, error);
        if (error != 0) {
                if_statinc(ifp, if_oerrors);
                return error;
        }
        return 0;
}

int
if_addr_init(ifnet_t *ifp, struct ifaddr *ifa, const bool src)
{
        int rc;

        KASSERT(IFNET_LOCKED(ifp));
        if (ifp->if_initaddr != NULL)
                rc = (*ifp->if_initaddr)(ifp, ifa, src);
        else if (src ||
                 (rc = if_ioctl(ifp, SIOCSIFDSTADDR, ifa)) == ENOTTY)
                rc = if_ioctl(ifp, SIOCINITIFADDR, ifa);

        return rc;
}

int
if_do_dad(struct ifnet *ifp)
{
        if ((ifp->if_flags & IFF_LOOPBACK) != 0)
                return 0;

        switch (ifp->if_type) {
        case IFT_FAITH:
                /*
                 * These interfaces do not have the IFF_LOOPBACK flag,
                 * but loop packets back.  We do not have to do DAD on such
                 * interfaces.  We should even omit it, because loop-backed
                 * responses would confuse the DAD procedure.
                 */
                return 0;
        default:
                /*
                 * Our DAD routine requires the interface up and running.
                 * However, some interfaces can be up before the RUNNING
                 * status.  Additionally, users may try to assign addresses
                 * before the interface becomes up (or running).
                 * We simply skip DAD in such a case as a work around.
                 * XXX: we should rather mark "tentative" on such addresses,
                 * and do DAD after the interface becomes ready.
                 */
                if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
                    (IFF_UP | IFF_RUNNING))
                        return 0;

                return 1;
        }
}

/*
 * if_flags_set(ifp, flags)
 *
 *        Ask ifp to change ifp->if_flags to flags, as if with the
 *        SIOCSIFFLAGS ioctl command.
 *
 *        May sleep.  Caller must hold ifp->if_ioctl_lock, a.k.a
 *        IFNET_LOCK.
 */
int
if_flags_set(ifnet_t *ifp, const u_short flags)
{
        int rc;

        KASSERT(IFNET_LOCKED(ifp));

        if (ifp->if_setflags != NULL)
                rc = (*ifp->if_setflags)(ifp, flags);
        else {
                u_short cantflags, chgdflags;
                struct ifreq ifr;

                chgdflags = ifp->if_flags ^ flags;
                cantflags = chgdflags & IFF_CANTCHANGE;

                if (cantflags != 0)
                        ifp->if_flags ^= cantflags;

                /*
                 * Traditionally, we do not call if_ioctl after
                 * setting/clearing only IFF_PROMISC if the interface
                 * isn't IFF_UP.  Uphold that tradition.
                 */
                if (chgdflags == IFF_PROMISC && (ifp->if_flags & IFF_UP) == 0)
                        return 0;

                memset(&ifr, 0, sizeof(ifr));

                ifr.ifr_flags = flags & ~IFF_CANTCHANGE;
                rc = if_ioctl(ifp, SIOCSIFFLAGS, &ifr);

                if (rc != 0 && cantflags != 0)
                        ifp->if_flags ^= cantflags;
        }

        return rc;
}

/*
 * if_mcast_op(ifp, cmd, sa)
 *
 *        Apply a multicast command, SIOCADDMULTI/SIOCDELMULTI, to the
 *        interface.  Returns 0 on success, nonzero errno(3) number on
 *        failure.
 *
 *        May sleep.
 *
 *        Use this, not if_ioctl, for the multicast commands.
 */
int
if_mcast_op(ifnet_t *ifp, const unsigned long cmd, const struct sockaddr *sa)
{
        int rc;
        struct ifreq ifr;

        switch (cmd) {
        case SIOCADDMULTI:
        case SIOCDELMULTI:
                break;
        default:
                panic("invalid ifnet multicast command: 0x%lx", cmd);
        }

        ifreq_setaddr(cmd, &ifr, sa);
        rc = if_ioctl(ifp, cmd, &ifr);

        return rc;
}

static void
sysctl_sndq_setup(struct sysctllog **clog, const char *ifname,
    struct ifaltq *ifq)
{
        const struct sysctlnode *cnode, *rnode;

        if (sysctl_createv(clog, 0, NULL, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "interfaces",
                       SYSCTL_DESCR("Per-interface controls"),
                       NULL, 0, NULL, 0,
                       CTL_NET, CTL_CREATE, CTL_EOL) != 0)
                goto bad;

        if (sysctl_createv(clog, 0, &rnode, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, ifname,
                       SYSCTL_DESCR("Interface controls"),
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL) != 0)
                goto bad;

        if (sysctl_createv(clog, 0, &rnode, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "sndq",
                       SYSCTL_DESCR("Interface output queue controls"),
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL) != 0)
                goto bad;

        if (sysctl_createv(clog, 0, &rnode, &cnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "len",
                       SYSCTL_DESCR("Current output queue length"),
                       NULL, 0, &ifq->ifq_len, 0,
                       CTL_CREATE, CTL_EOL) != 0)
                goto bad;

        if (sysctl_createv(clog, 0, &rnode, &cnode,
                       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxlen",
                       SYSCTL_DESCR("Maximum allowed output queue length"),
                       NULL, 0, &ifq->ifq_maxlen, 0,
                       CTL_CREATE, CTL_EOL) != 0)
                goto bad;

        if (sysctl_createv(clog, 0, &rnode, &cnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "drops",
                       SYSCTL_DESCR("Packets dropped due to full output queue"),
                       NULL, 0, &ifq->ifq_drops, 0,
                       CTL_CREATE, CTL_EOL) != 0)
                goto bad;

        return;
bad:
        printf("%s: could not attach sysctl nodes\n", ifname);
        return;
}

#if defined(INET) || defined(INET6)

#define        SYSCTL_NET_PKTQ(q, cn, c)                                        \
        static int                                                        \
        sysctl_net_##q##_##cn(SYSCTLFN_ARGS)                                \
        {                                                                \
                return sysctl_pktq_count(SYSCTLFN_CALL(rnode), q, c);        \
        }

#if defined(INET)
static int
sysctl_net_ip_pktq_maxlen(SYSCTLFN_ARGS)
{
        return sysctl_pktq_maxlen(SYSCTLFN_CALL(rnode), ip_pktq);
}
SYSCTL_NET_PKTQ(ip_pktq, items, PKTQ_NITEMS)
SYSCTL_NET_PKTQ(ip_pktq, drops, PKTQ_DROPS)
#endif

#if defined(INET6)
static int
sysctl_net_ip6_pktq_maxlen(SYSCTLFN_ARGS)
{
        return sysctl_pktq_maxlen(SYSCTLFN_CALL(rnode), ip6_pktq);
}
SYSCTL_NET_PKTQ(ip6_pktq, items, PKTQ_NITEMS)
SYSCTL_NET_PKTQ(ip6_pktq, drops, PKTQ_DROPS)
#endif

static void
sysctl_net_pktq_setup(struct sysctllog **clog, int pf)
{
        sysctlfn len_func = NULL, maxlen_func = NULL, drops_func = NULL;
        const char *pfname = NULL, *ipname = NULL;
        int ipn = 0, qid = 0;

        switch (pf) {
#if defined(INET)
        case PF_INET:
                len_func = sysctl_net_ip_pktq_items;
                maxlen_func = sysctl_net_ip_pktq_maxlen;
                drops_func = sysctl_net_ip_pktq_drops;
                pfname = "inet", ipn = IPPROTO_IP;
                ipname = "ip", qid = IPCTL_IFQ;
                break;
#endif
#if defined(INET6)
        case PF_INET6:
                len_func = sysctl_net_ip6_pktq_items;
                maxlen_func = sysctl_net_ip6_pktq_maxlen;
                drops_func = sysctl_net_ip6_pktq_drops;
                pfname = "inet6", ipn = IPPROTO_IPV6;
                ipname = "ip6", qid = IPV6CTL_IFQ;
                break;
#endif
        default:
                KASSERT(false);
        }

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, pfname, NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, pf, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, ipname, NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, pf, ipn, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "ifq",
                       SYSCTL_DESCR("Protocol input queue controls"),
                       NULL, 0, NULL, 0,
                       CTL_NET, pf, ipn, qid, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_QUAD, "len",
                       SYSCTL_DESCR("Current input queue length"),
                       len_func, 0, NULL, 0,
                       CTL_NET, pf, ipn, qid, IFQCTL_LEN, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxlen",
                       SYSCTL_DESCR("Maximum allowed input queue length"),
                       maxlen_func, 0, NULL, 0,
                       CTL_NET, pf, ipn, qid, IFQCTL_MAXLEN, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_QUAD, "drops",
                       SYSCTL_DESCR("Packets dropped due to full input queue"),
                       drops_func, 0, NULL, 0,
                       CTL_NET, pf, ipn, qid, IFQCTL_DROPS, CTL_EOL);
}
#endif /* INET || INET6 */

static int
if_sdl_sysctl(SYSCTLFN_ARGS)
{
        struct ifnet *ifp;
        const struct sockaddr_dl *sdl;
        struct psref psref;
        int error = 0;

        if (namelen != 1)
                return EINVAL;

        const int bound = curlwp_bind();
        ifp = if_get_byindex(name[0], &psref);
        if (ifp == NULL) {
                error = ENODEV;
                goto out0;
        }

        sdl = ifp->if_sadl;
        if (sdl == NULL) {
                *oldlenp = 0;
                goto out1;
        }

        if (oldp == NULL) {
                *oldlenp = sdl->sdl_alen;
                goto out1;
        }

        if (*oldlenp >= sdl->sdl_alen)
                *oldlenp = sdl->sdl_alen;
        error = sysctl_copyout(l, &sdl->sdl_data[sdl->sdl_nlen], oldp, *oldlenp);
out1:
        if_put(ifp, &psref);
out0:
        curlwp_bindx(bound);
        return error;
}

static void
if_sysctl_setup(struct sysctllog **clog)
{
        const struct sysctlnode *rnode = NULL;

        sysctl_createv(clog, 0, NULL, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "sdl",
                       SYSCTL_DESCR("Get active link-layer address"),
                       if_sdl_sysctl, 0, NULL, 0,
                       CTL_NET, CTL_CREATE, CTL_EOL);

#if defined(INET)
        sysctl_net_pktq_setup(NULL, PF_INET);
#endif
#ifdef INET6
        if (in6_present)
                sysctl_net_pktq_setup(NULL, PF_INET6);
#endif
}





















































    1 




















    1 

    1 

    1 
    1 

    1 




    1 
    1 

    1 
    1 









    1 

    1 
    1 

    1 
    1 
    1 
    1 

    1 
    1 
    1 
    1 
    1 



    1 














1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/*        $NetBSD: in6_print.c,v 1.1 2014/12/02 19:36:58 christos Exp $        */

/*-
 * Copyright (c) 2014 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <sys/cdefs.h>

#include <sys/types.h>
#ifdef _KERNEL
__KERNEL_RCSID(0, "$NetBSD: in6_print.c,v 1.1 2014/12/02 19:36:58 christos Exp $");
#include <sys/systm.h>
#else
__RCSID("$NetBSD: in6_print.c,v 1.1 2014/12/02 19:36:58 christos Exp $");
#include <stdio.h>
#define s6_addr32 __u6_addr.__u6_addr32
static const uint8_t hexdigits[] = "0123456789abcdef";
#endif

#include <netinet/in.h>

int
in6_print(char *buf, size_t len, const struct in6_addr *ia6)
{
        int i;
        char *bp;
        char *cp, *ecp;
        const uint16_t *a;
        const uint8_t *d;
        int dcolon = 0;

        if (IN6_IS_ADDR_V4MAPPED(ia6)) {
                char buf4[INET_ADDRSTRLEN];
                struct in_addr ia = { .s_addr = ia6->s6_addr32[3] };
                in_print(buf4, sizeof(buf4), &ia);
                return snprintf(buf, len, "::ffff:%s", buf4);
        }

#define ADDC(c) do { \
                if (cp >= ecp) {\
                        cp++; \
                } else \
                        *cp++ = (char)(c); \
        } while (/*CONSTCOND*/0)
#define ADDX(v) do { \
                uint8_t n = hexdigits[(v)]; \
                ADDC(n); \
                if (cp == bp && n == '0') \
                        cp--; \
        } while (/*CONSTCOND*/0)

        cp = buf;
        ecp = buf + len;
        a = (const uint16_t *)ia6;
        for (i = 0; i < 8; i++) {
                if (dcolon == 1) {
                        if (*a == 0) {
                                if (i == 7)
                                        ADDC(':');
                                a++;
                                continue;
                        } else
                                dcolon = 2;
                }
                if (*a == 0) {
                        if (dcolon == 0 && *(a + 1) == 0) {
                                if (i == 0)
                                        ADDC(':');
                                ADDC(':');
                                dcolon = 1;
                        } else {
                                ADDC('0');
                                ADDC(':');
                        }
                        a++;
                        continue;
                }
                d = (const u_char *)a;
                bp = cp + 1;

                ADDX((u_int)*d >> 4);
                ADDX(*d & 0xf);
                d++;
                ADDX((u_int)*d >> 4);
                ADDX(*d & 0xf);
                ADDC(':');
                a++;
        }
        if (cp > buf)
                --cp;
        if (ecp > buf) {
                if (cp < ecp)
                        *cp = '\0';
                else
                        *--ecp = '\0';
        }
        return (int)(cp - buf);
}

int
sin6_print(char *buf, size_t len, const void *v)
{
        const struct sockaddr_in6 *sin6 = v;
        const struct in6_addr *ia6 = &sin6->sin6_addr;
        char abuf[INET6_ADDRSTRLEN];

        if (!sin6->sin6_port)
                return in6_print(buf, len, ia6);
        in6_print(abuf, sizeof(abuf), ia6);
        return snprintf(buf, len, "[%s]:%hu", abuf, ntohs(sin6->sin6_port));
}











































































































































  437 





  437 





































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
/*        $NetBSD: bus_private.h,v 1.16 2022/01/22 15:10:32 skrll Exp $        */
/*        NetBSD: bus.h,v 1.8 2005/03/09 19:04:46 matt Exp        */

/*-
 * Copyright (c) 1996, 1997, 1998, 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1996 Charles M. Hannum.  All rights reserved.
 * Copyright (c) 1996 Christopher G. Demetriou.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Christopher G. Demetriou
 *        for the NetBSD Project.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#if !defined(_X86_BUS_PRIVATE_H_)
#define        _X86_BUS_PRIVATE_H_

/*
 * Cookie used for bounce buffers. A pointer to one of these it stashed in
 * the DMA map.
 */
struct x86_bus_dma_cookie {
        int        id_flags;                /* flags; see below */

        /*
         * Information about the original buffer used during
         * DMA map syncs.  Note that origibuflen is only used
         * for ID_BUFTYPE_LINEAR.
         */
        void        *id_origbuf;                /* pointer to orig buffer if
                                           bouncing */
        bus_size_t id_origbuflen;        /* ...and size */
        int        id_buftype;                /* type of buffer */

        void        *id_bouncebuf;                /* pointer to the bounce buffer */
        bus_size_t id_bouncebuflen;        /* ...and size */
        int        id_nbouncesegs;                /* number of valid bounce segs */
        bus_dma_segment_t id_bouncesegs[0]; /* array of bounce buffer
                                               physical memory segments */
};

/* id_flags */
#define        X86_DMA_MIGHT_NEED_BOUNCE        0x01        /* may need bounce buffers */
#define        X86_DMA_HAS_BOUNCE                0x02        /* has bounce buffers */
#define        X86_DMA_IS_BOUNCING                0x04        /* is bouncing current xfer */

/* id_buftype */
#define        X86_DMA_BUFTYPE_INVALID                0
#define        X86_DMA_BUFTYPE_LINEAR                1
#define        X86_DMA_BUFTYPE_MBUF                2
#define        X86_DMA_BUFTYPE_UIO                3
#define        X86_DMA_BUFTYPE_RAW                4

/*
 * default address translation macros, which are appropriate where
 * paddr_t == bus_addr_t.
 */

#if !defined(_BUS_PHYS_TO_BUS)
#define _BUS_PHYS_TO_BUS(pa)        ((bus_addr_t)(pa))
#endif /* !defined(_BUS_PHYS_TO_BUS) */

#if !defined(_BUS_BUS_TO_PHYS)
#define _BUS_BUS_TO_PHYS(ba)        ((paddr_t)(ba))
#endif /* !defined(_BUS_BUS_TO_PHYS) */

#if !defined(_BUS_VM_PAGE_TO_BUS)
#define        _BUS_VM_PAGE_TO_BUS(pg)        _BUS_PHYS_TO_BUS(VM_PAGE_TO_PHYS(pg))
#endif /* !defined(_BUS_VM_PAGE_TO_BUS) */

#if !defined(_BUS_BUS_TO_VM_PAGE)
#define        _BUS_BUS_TO_VM_PAGE(ba)        PHYS_TO_VM_PAGE(ba)
#endif /* !defined(_BUS_BUS_TO_VM_PAGE) */

#if !defined(_BUS_PMAP_ENTER)
#define _BUS_PMAP_ENTER(pmap, va, ba, prot, flags) \
    pmap_enter(pmap, va, ba, prot, flags)
#endif /* _BUS_PMAP_ENTER */

#if !defined(_BUS_VIRT_TO_BUS)
#include <uvm/uvm_extern.h>

static __inline bus_addr_t _bus_virt_to_bus(struct pmap *, vaddr_t);
#define        _BUS_VIRT_TO_BUS(pm, va) _bus_virt_to_bus((pm), (va))

static __inline bus_addr_t
_bus_virt_to_bus(struct pmap *pm, vaddr_t va)
{
        paddr_t pa;

        if (!pmap_extract(pm, va, &pa)) {
                panic("_bus_virt_to_bus");
        }

        return _BUS_PHYS_TO_BUS(pa);
}
#endif /* !defined(_BUS_VIRT_TO_BUS) */

/*
 * by default, the end address of RAM visible on bus is the same as the
 * largest physical address.
 */
#ifndef _BUS_AVAIL_END
#define _BUS_AVAIL_END (avail_end - 1)
#endif

struct x86_bus_dma_tag {
        bus_dma_tag_t                                bdt_super;
        /* bdt_present: bitmap indicating overrides present (1) in *this* tag,
         * bdt_exists: bitmap indicating overrides present (1) in *this* tag
         * or in an ancestor's tag (follow bdt_super to ancestors)
         */
        uint64_t                                bdt_present;
        uint64_t                                bdt_exists;
        const struct bus_dma_overrides                *bdt_ov;
        void                                        *bdt_ctx;
        /*
         * The `bounce threshold' is checked while we are loading
         * the DMA map.  If the physical address of the segment
         * exceeds the threshold, an error will be returned.  The
         * caller can then take whatever action is necessary to
         * bounce the transfer.  If this value is 0, it will be
         * ignored.
         */
        int        _tag_needs_free;
        bus_addr_t _bounce_thresh;
        bus_addr_t _bounce_alloc_lo;
        bus_addr_t _bounce_alloc_hi;
        int        (*_may_bounce)(bus_dma_tag_t, bus_dmamap_t, int, int *);
};

#endif /* !defined(_X86_BUS_PRIVATE_H_) */
































































































































































































































































































































































































































    7 
















    6 










    6 


    6 

    6 













    7 




    7 
















    1 







    1 




























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
/*        $NetBSD: spkr.c,v 1.23 2022/03/31 19:30:15 pgoyette Exp $        */

/*
 * Copyright (c) 1990 Eric S. Raymond (esr@snark.thyrsus.com)
 * Copyright (c) 1990 Andrew A. Chernov (ache@astral.msk.su)
 * Copyright (c) 1990 Lennart Augustsson (lennart@augustsson.net)
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by Eric S. Raymond
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * spkr.c -- device driver for console speaker on 80386
 *
 * v1.1 by Eric S. Raymond (esr@snark.thyrsus.com) Feb 1990
 *      modified for 386bsd by Andrew A. Chernov <ache@astral.msk.su>
 *      386bsd only clean version, all SYSV stuff removed
 *      use hz value from param.c
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: spkr.c,v 1.23 2022/03/31 19:30:15 pgoyette Exp $");

#if defined(_KERNEL_OPT)
#include "wsmux.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/errno.h>
#include <sys/device.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/uio.h>
#include <sys/proc.h>
#include <sys/ioctl.h>
#include <sys/conf.h>

#include <sys/bus.h>

#include <dev/spkrio.h>
#include <dev/spkrvar.h>
#include <dev/wscons/wsconsio.h>
#include <dev/wscons/wsbellvar.h>
#include <dev/wscons/wsbellmuxvar.h>

#include "ioconf.h"

dev_type_open(spkropen);
dev_type_close(spkrclose);
dev_type_write(spkrwrite);
dev_type_ioctl(spkrioctl);

const struct cdevsw spkr_cdevsw = {
        .d_open = spkropen,
        .d_close = spkrclose,
        .d_read = noread,
        .d_write = spkrwrite,
        .d_ioctl = spkrioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

static void playinit(struct spkr_softc *);
static void playtone(struct spkr_softc *, int, int, int);
static void playstring(struct spkr_softc *, const char *, size_t);

/**************** PLAY STRING INTERPRETER BEGINS HERE **********************
 *
 * Play string interpretation is modelled on IBM BASIC 2.0's PLAY statement;
 * M[LNS] are missing and the ~ synonym and octave-tracking facility is added.
 * String play is not interruptible except possibly at physical block
 * boundaries.
 */

/*
 * Magic number avoidance...
 */
#define SECS_PER_MIN        60        /* seconds per minute */
#define WHOLE_NOTE        4        /* quarter notes per whole note */
#define MIN_VALUE        64        /* the most we can divide a note by */
#define DFLT_VALUE        4        /* default value (quarter-note) */
#define FILLTIME        8        /* for articulation, break note in parts */
#define STACCATO        6        /* 6/8 = 3/4 of note is filled */
#define NORMAL                7        /* 7/8ths of note interval is filled */
#define LEGATO                8        /* all of note interval is filled */
#define DFLT_OCTAVE        4        /* default octave */
#define MIN_TEMPO        32        /* minimum tempo */
#define DFLT_TEMPO        120        /* default tempo */
#define MAX_TEMPO        255        /* max tempo */
#define NUM_MULT        3        /* numerator of dot multiplier */
#define DENOM_MULT        2        /* denominator of dot multiplier */

/* letter to half-tone:         A   B  C  D  E  F  G */
static const int notetab[8] = { 9, 11, 0, 2, 4, 5, 7 };

/*
 * This is the American Standard A440 Equal-Tempered scale with frequencies
 * rounded to nearest integer. Thank Goddess for the good ol' CRC Handbook...
 * our octave 0 is standard octave 2.
 */
#define OCTAVE_NOTES        12        /* semitones per octave */
static const int pitchtab[] =
{
/*        C     C#    D     D#    E     F     F#    G     G#    A     A#    B*/
/* 0 */   65,   69,   73,   78,   82,   87,   93,   98,  103,  110,  117,  123,
/* 1 */  131,  139,  147,  156,  165,  175,  185,  196,  208,  220,  233,  247,
/* 2 */  262,  277,  294,  311,  330,  349,  370,  392,  415,  440,  466,  494,
/* 3 */  523,  554,  587,  622,  659,  698,  740,  784,  831,  880,  932,  988,
/* 4 */ 1047, 1109, 1175, 1245, 1319, 1397, 1480, 1568, 1661, 1760, 1865, 1975,
/* 5 */ 2093, 2217, 2349, 2489, 2637, 2794, 2960, 3136, 3322, 3520, 3729, 3951,
/* 6 */ 4186, 4435, 4698, 4978, 5274, 5588, 5920, 6272, 6644, 7040, 7459, 7902,
};
#define NOCTAVES (int)(__arraycount(pitchtab) / OCTAVE_NOTES)

static void
playinit(struct spkr_softc *sc)
{
        sc->sc_octave = DFLT_OCTAVE;
        sc->sc_whole = (hz * SECS_PER_MIN * WHOLE_NOTE) / DFLT_TEMPO;
        sc->sc_fill = NORMAL;
        sc->sc_value = DFLT_VALUE;
        sc->sc_octtrack = false;
        sc->sc_octprefix = true;/* act as though there was an initial O(n) */
}

#define SPKRPRI (PZERO - 1)

/* Rest for given number of ticks */
static void
rest(struct spkr_softc *sc, int ticks)
{

#ifdef SPKRDEBUG
        device_printf(sc->sc_dev, "%s: rest for %d ticks\n", __func__, ticks);
#endif /* SPKRDEBUG */
        KASSERT(ticks > 0);

        tsleep(sc->sc_dev, SPKRPRI | PCATCH, device_xname(sc->sc_dev), ticks);
}

/*
 * Play tone of proper duration for current rhythm signature.
 * note indicates "O0C" = 0, "O0C#" = 1, "O0D" = 2, ... , and
 * -1 indiacates a rest.
 * val indicates the length, "L4" = 4, "L8" = 8.
 * sustain indicates the number of subsequent dots that extend the sound
 * by one a half.
 */
static void
playtone(struct spkr_softc *sc, int note, int val, int sustain)
{
        int whole;
        int total;
        int sound;
        int silence;

        /* this weirdness avoids floating-point arithmetic */
        whole = sc->sc_whole;
        for (; sustain; sustain--) {
                whole *= NUM_MULT;
                val *= DENOM_MULT;
        }

        /* Total length in tick */
        total = whole / val;

        if (note == -1) {
#ifdef SPKRDEBUG
                device_printf(sc->sc_dev, "%s: rest for %d ticks\n",
                    __func__, total);
#endif /* SPKRDEBUG */
                if (total != 0)
                        rest(sc, total);
                return;
        }

        /*
         * Rest 1/8 (if NORMAL) or 3/8 (if STACCATO) in tick.
         * silence should be rounded down.
         */
        silence = total * (FILLTIME - sc->sc_fill) / FILLTIME;
        sound = total - silence;

#ifdef SPKRDEBUG
        device_printf(sc->sc_dev,
            "%s: note %d for %d ticks, rest for %d ticks\n", __func__,
            note, sound, silence);
#endif /* SPKRDEBUG */

        if (sound != 0)
                (*sc->sc_tone)(sc->sc_dev, pitchtab[note], sound);
        if (silence != 0)
                rest(sc, silence);
}

/* interpret and play an item from a notation string */
static void
playstring(struct spkr_softc *sc, const char *cp, size_t slen)
{
        int pitch;
        int lastpitch = OCTAVE_NOTES * DFLT_OCTAVE;

#define GETNUM(cp, v)        \
        for (v = 0; slen > 0 && isdigit((unsigned char)cp[1]); ) { \
                v = v * 10 + (*++cp - '0'); \
                slen--; \
        }

        for (; slen--; cp++) {
                int sustain, timeval, tempo;
                char c = toupper((unsigned char)*cp);

#ifdef SPKRDEBUG
                if (0x20 <= c && c < 0x7f) {
                        device_printf(sc->sc_dev, "%s: '%c'\n", __func__, c);
                } else {
                        device_printf(sc->sc_dev, "%s: (0x%x)\n", __func__, c);
                }
#endif /* SPKRDEBUG */

                switch (c) {
                case 'A': case 'B': case 'C': case 'D':
                case 'E': case 'F': case 'G':
                        /* compute pitch */
                        pitch = notetab[c - 'A'] + sc->sc_octave * OCTAVE_NOTES;

                        /* this may be followed by an accidental sign */
                        if (slen > 0 && (cp[1] == '#' || cp[1] == '+')) {
                                ++pitch;
                                ++cp;
                                slen--;
                        } else if (slen > 0 && cp[1] == '-') {
                                --pitch;
                                ++cp;
                                slen--;
                        }

                        /*
                         * If octave-tracking mode is on, and there has been no
                         * octave- setting prefix, find the version of the
                         * current letter note * closest to the last
                         * regardless of octave.
                         */
                        if (sc->sc_octtrack && !sc->sc_octprefix) {
                                int d = abs(pitch - lastpitch);
                                if (d > abs(pitch + OCTAVE_NOTES - lastpitch)) {
                                        if (sc->sc_octave < NOCTAVES - 1) {
                                                ++sc->sc_octave;
                                                pitch += OCTAVE_NOTES;
                                        }
                                }

                                if (d > abs(pitch - OCTAVE_NOTES - lastpitch)) {
                                        if (sc->sc_octave > 0) {
                                                --sc->sc_octave;
                                                pitch -= OCTAVE_NOTES;
                                        }
                                }
                        }
                        sc->sc_octprefix = false;
                        lastpitch = pitch;

                        /*
                         * ...which may in turn be followed by an override
                         * time value
                         */
                        GETNUM(cp, timeval);
                        if (timeval <= 0 || timeval > MIN_VALUE)
                                timeval = sc->sc_value;

                        /* ...and/or sustain dots */
                        for (sustain = 0; slen > 0 && cp[1] == '.'; cp++) {
                                slen--;
                                sustain++;
                        }

                        /* time to emit the actual tone */
                        playtone(sc, pitch, timeval, sustain);
                        break;

                case 'O':
                        if (slen > 0 && (cp[1] == 'N' || cp[1] == 'n')) {
                                sc->sc_octprefix = sc->sc_octtrack = false;
                                ++cp;
                                slen--;
                        } else if (slen > 0 && (cp[1] == 'L' || cp[1] == 'l')) {
                                sc->sc_octtrack = true;
                                ++cp;
                                slen--;
                        } else {
                                GETNUM(cp, sc->sc_octave);
                                if (sc->sc_octave >= NOCTAVES)
                                        sc->sc_octave = DFLT_OCTAVE;
                                sc->sc_octprefix = true;
                        }
                        break;

                case '>':
                        if (sc->sc_octave < NOCTAVES - 1)
                                sc->sc_octave++;
                        sc->sc_octprefix = true;
                        break;

                case '<':
                        if (sc->sc_octave > 0)
                                sc->sc_octave--;
                        sc->sc_octprefix = true;
                        break;

                case 'N':
                        GETNUM(cp, pitch);
                        for (sustain = 0; slen > 0 && cp[1] == '.'; cp++) {
                                slen--;
                                sustain++;
                        }
                        playtone(sc, pitch - 1, sc->sc_value, sustain);
                        break;

                case 'L':
                        GETNUM(cp, sc->sc_value);
                        if (sc->sc_value <= 0 || sc->sc_value > MIN_VALUE)
                                sc->sc_value = DFLT_VALUE;
                        break;

                case 'P':
                case '~':
                        /* this may be followed by an override time value */
                        GETNUM(cp, timeval);
                        if (timeval <= 0 || timeval > MIN_VALUE)
                                timeval = sc->sc_value;
                        for (sustain = 0; slen > 0 && cp[1] == '.'; cp++) {
                                slen--;
                                sustain++;
                        }
                        playtone(sc, -1, timeval, sustain);
                        break;

                case 'T':
                        GETNUM(cp, tempo);
                        if (tempo < MIN_TEMPO || tempo > MAX_TEMPO)
                                tempo = DFLT_TEMPO;
                        sc->sc_whole = (hz * SECS_PER_MIN * WHOLE_NOTE) / tempo;
                        break;

                case 'M':
                        if (slen > 0 && (cp[1] == 'N' || cp[1] == 'n')) {
                                sc->sc_fill = NORMAL;
                                ++cp;
                                slen--;
                        } else if (slen > 0 && (cp[1] == 'L' || cp[1] == 'l')) {
                                sc->sc_fill = LEGATO;
                                ++cp;
                                slen--;
                        } else if (slen > 0 && (cp[1] == 'S' || cp[1] == 's')) {
                                sc->sc_fill = STACCATO;
                                ++cp;
                                slen--;
                        }
                        break;
                }
        }
}

/******************* UNIX DRIVER HOOKS BEGIN HERE **************************/
#define spkrenter(d)        device_lookup_private(&spkr_cd, d)

/*
 * Attaches spkr.  Specify tone function with the following specification:
 *
 * void
 * tone(device_t self, u_int pitch, u_int tick)
 *        plays a beep with specified parameters.
 *        The argument 'pitch' specifies the pitch of a beep in Hz.  The argument
 *        'tick' specifies the period of a beep in tick(9).  This function waits
 *        to finish playing the beep and then halts it.
 *        If the pitch is zero, it halts all sound if any (for compatibility
 *        with the past confused specifications, but there should be no sound at
 *        this point).  And it returns immediately, without waiting ticks.  So
 *        you cannot use this as a rest.
 *        If the tick is zero, it returns immediately.
 */
void
spkr_attach(device_t self, void (*tone)(device_t, u_int, u_int))
{
        struct spkr_softc *sc = device_private(self);

#ifdef SPKRDEBUG
        aprint_debug("%s: entering for unit %d\n", __func__,
            device_unit(self));
#endif /* SPKRDEBUG */
        sc->sc_dev = self;
        sc->sc_tone = tone;
        sc->sc_inbuf = NULL;
        sc->sc_wsbelldev = NULL;

        spkr_rescan(self, NULL, NULL);
}

int
spkr_detach(device_t self, int flags)
{
        struct spkr_softc *sc = device_private(self);
        int rc;

#ifdef SPKRDEBUG
        aprint_debug("%s: entering for unit %d\n", __func__,
            device_unit(self));
#endif /* SPKRDEBUG */
        if (sc == NULL)
                return ENXIO;

        /* XXXNS If speaker never closes, we cannot complete the detach. */
        while ((flags & DETACH_FORCE) != 0 && sc->sc_inbuf != NULL)
                kpause("spkrwait", TRUE, 1, NULL);
        if (sc->sc_inbuf != NULL)
                return EBUSY;

        rc = config_detach_children(self, flags);

        return rc;
}

/* ARGSUSED */
int
spkr_rescan(device_t self, const char *iattr, const int *locators)
{
#if NWSMUX > 0
        struct spkr_softc *sc = device_private(self);
        struct wsbelldev_attach_args a;

        if (sc->sc_wsbelldev == NULL) {
                a.accesscookie = sc;
                sc->sc_wsbelldev = config_found(self, &a, wsbelldevprint,
                    CFARGS_NONE);
        }
#endif
        return 0;
}

int
spkr_childdet(device_t self, device_t child)
{
        struct spkr_softc *sc = device_private(self);

        if (sc->sc_wsbelldev == child)
                sc->sc_wsbelldev = NULL;

        return 0;
}

int
spkropen(dev_t dev, int        flags, int mode, struct lwp *l)
{
        struct spkr_softc *sc = spkrenter(minor(dev));

#ifdef SPKRDEBUG
        device_printf(sc->sc_dev, "%s: entering\n", __func__);
#endif /* SPKRDEBUG */
        if (sc == NULL)
                return ENXIO;
        if (sc->sc_inbuf != NULL)
                return EBUSY;

        sc->sc_inbuf = malloc(DEV_BSIZE, M_DEVBUF, M_WAITOK);
        playinit(sc);
        return 0;
}

int
spkrwrite(dev_t dev, struct uio *uio, int flags)
{
        struct spkr_softc *sc = spkrenter(minor(dev));

#ifdef SPKRDEBUG
        device_printf(sc->sc_dev, "%s: entering with length = %zu\n",
            __func__, uio->uio_resid);
#endif /* SPKRDEBUG */
        if (sc == NULL)
                return ENXIO;
        if (sc->sc_inbuf == NULL)
                return EINVAL;

        size_t n = uimin(DEV_BSIZE, uio->uio_resid);
        int error = uiomove(sc->sc_inbuf, n, uio);
        if (error)
                return error;
        playstring(sc, sc->sc_inbuf, n);
        return 0;
}

int
spkrclose(dev_t dev, int flags, int mode, struct lwp *l)
{
        struct spkr_softc *sc = spkrenter(minor(dev));

#ifdef SPKRDEBUG
        device_printf(sc->sc_dev, "%s: entering\n", __func__);
#endif /* SPKRDEBUG */
        if (sc == NULL)
                return ENXIO;
        if (sc->sc_inbuf == NULL)
                return EINVAL;

        sc->sc_tone(sc->sc_dev, 0, 0);
        free(sc->sc_inbuf, M_DEVBUF);
        sc->sc_inbuf = NULL;

        return 0;
}

/*
 * Play tone specified by tp.
 * tp->frequency is the frequency (0 means a rest).
 * tp->duration is the length in tick (returns immediately if 0).
 */
static void
playonetone(struct spkr_softc *sc, tone_t *tp)
{
        if (tp->duration <= 0)
                return;

        if (tp->frequency == 0)
                rest(sc, tp->duration);
        else
                (*sc->sc_tone)(sc->sc_dev, tp->frequency, tp->duration);
}

int
spkrioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        struct spkr_softc *sc = spkrenter(minor(dev));
        tone_t *tp;
        tone_t ttp;
        int error;

#ifdef SPKRDEBUG
        device_printf(sc->sc_dev, "%s: entering with cmd = %lx\n",
            __func__, cmd);
#endif /* SPKRDEBUG */
        if (sc == NULL)
                return ENXIO;
        if (sc->sc_inbuf == NULL)
                return EINVAL;

        switch (cmd) {
        case SPKRTONE:
                playonetone(sc, data);
                return 0;
        case SPKRTUNE:
                for (tp = *(void **)data;; tp++) {
                        error = copyin(tp, &ttp, sizeof(tone_t));
                        if (error)
                                return(error);
                        if (ttp.duration == 0)
                                break;
                        playonetone(sc, &ttp);
                }
                return 0;
        case SPKRGETVOL:
                if (data != NULL)
                        *(u_int *)data = sc->sc_vol;
                return 0;
        case SPKRSETVOL:
                if (data != NULL && *(u_int *)data <= 100)
                        sc->sc_vol = *(u_int *)data;
                return 0;
        default:
                return ENOTTY;
        }
}

#ifdef _MODULE
#include "ioconf.c"
#endif

MODULE(MODULE_CLASS_DRIVER, spkr, "audio" /* and/or pcppi */ );

int
spkr_modcmd(modcmd_t cmd, void *arg)
{
        int error = 0;
#ifdef _MODULE
        devmajor_t bmajor, cmajor;
#endif

        switch(cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                bmajor = cmajor = -1;
                error = devsw_attach(spkr_cd.cd_name, NULL, &bmajor,
                    &spkr_cdevsw, &cmajor);
                if (error)
                        break;

                error = config_init_component(cfdriver_ioconf_spkr,
                    cfattach_ioconf_spkr, cfdata_ioconf_spkr);
                if (error) {
                        devsw_detach(NULL, &spkr_cdevsw);
                }
#endif
                break;

        case MODULE_CMD_FINI:
#ifdef _MODULE
                error = config_fini_component(cfdriver_ioconf_spkr,
                    cfattach_ioconf_spkr, cfdata_ioconf_spkr);
                if (error == 0)
                        devsw_detach(NULL, &spkr_cdevsw);
#endif
                break;

        default:
                error = ENOTTY;
                break;
        }

        return error;
}
































































    8 
    8 


    8 
    8 
    8 




    8 
    5 
    4 


    5 

    8 

    8 

















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/*        $NetBSD: strlcat.c,v 1.4 2013/01/23 07:57:27 matt Exp $        */
/*        $OpenBSD: strlcat.c,v 1.10 2003/04/12 21:56:39 millert Exp $        */

/*
 * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND TODD C. MILLER DISCLAIMS ALL
 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL TODD C. MILLER BE LIABLE
 * FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#if !defined(_KERNEL) && !defined(_STANDALONE)
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif

#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
__RCSID("$NetBSD: strlcat.c,v 1.4 2013/01/23 07:57:27 matt Exp $");
#endif /* LIBC_SCCS and not lint */

#ifdef _LIBC
#include "namespace.h"
#endif
#include <sys/types.h>
#include <assert.h>
#include <string.h>

#ifdef _LIBC
# ifdef __weak_alias
__weak_alias(strlcat, _strlcat)
# endif
#endif

#else
#include <lib/libkern/libkern.h>
#endif /* !_KERNEL && !_STANDALONE */

#if !HAVE_STRLCAT
/*
 * Appends src to string dst of size siz (unlike strncat, siz is the
 * full size of dst, not space left).  At most siz-1 characters
 * will be copied.  Always NUL terminates (unless siz <= strlen(dst)).
 * Returns strlen(src) + MIN(siz, strlen(initial dst)).
 * If retval >= siz, truncation occurred.
 */
size_t
strlcat(char *dst, const char *src, size_t siz)
{
#if 1
        char *d = dst;
        const char *s = src;
        size_t n = siz;
        size_t dlen;

        _DIAGASSERT(dst != NULL);
        _DIAGASSERT(src != NULL);

        /* Find the end of dst and adjust bytes left but don't go past end */
        while (n-- != 0 && *d != '\0')
                d++;
        dlen = d - dst;
        n = siz - dlen;

        if (n == 0)
                return(dlen + strlen(s));
        while (*s != '\0') {
                if (n != 1) {
                        *d++ = *s;
                        n--;
                }
                s++;
        }
        *d = '\0';

        return(dlen + (s - src));        /* count does not include NUL */
#else
        _DIAGASSERT(dst != NULL);
        _DIAGASSERT(src != NULL);

        /*
         * Find length of string in dst (maxing out at siz).
         */
        size_t dlen = strnlen(dst, siz);

        /*
         * Copy src into any remaining space in dst (truncating if needed).
         * Note strlcpy(dst, src, 0) returns strlen(src).
         */
        return dlen + strlcpy(dst + dlen, src, siz - dlen);
#endif
}
#endif





















































































































































































































































































































































































































































































































































































































































































    1 



































   50 







   50 
   47 


















    3 

    1 

    2 































   49 










   17 










   17 









   17 




   17 


   17 


   17 




   12 








   12 


















   12 






   12 





   15 

    5 

    5 
    5 












   11 







   11 





   10 



   10 












    6 

    6 





















    4 
    4 
























   17 
   17 




   12 
   14 


















    2 

























































    1 
    1 














    2 
    2 




























   19 





   19 
   19 

   19 



    3 




    4 



   15 




   17 







    3 

    3 
    3 
    1 

   17 








    2 






























































































































































































































   58 









































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
/*        $NetBSD: udp_usrreq.c,v 1.261 2021/02/19 14:51:59 christos Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)udp_usrreq.c        8.6 (Berkeley) 5/23/95
 */

/*
 * UDP protocol implementation.
 * Per RFC 768, August, 1980.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: udp_usrreq.c,v 1.261 2021/02/19 14:51:59 christos Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_ipsec.h"
#include "opt_inet_csum.h"
#include "opt_mbuftrace.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/mbuf.h>
#include <sys/once.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/domain.h>
#include <sys/sysctl.h>

#include <net/if.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
#include <netinet/ip_icmp.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <netinet/udp_private.h>

#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/ip6_private.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/udp6_var.h>
#include <netinet6/udp6_private.h>
#endif

#ifndef INET6
#include <netinet/ip6.h>
#endif

#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/esp.h>
#endif

int udpcksum = 1;
int udp_do_loopback_cksum = 0;

struct inpcbtable udbtable;

percpu_t *udpstat_percpu;

#ifdef INET
#ifdef IPSEC
static int udp4_espinudp(struct mbuf **, int);
#endif
static void udp4_sendup(struct mbuf *, int, struct sockaddr *,
    struct socket *);
static int udp4_realinput(struct sockaddr_in *, struct sockaddr_in *,
    struct mbuf **, int);
static int udp4_input_checksum(struct mbuf *, const struct udphdr *, int, int);
#endif
#ifdef INET
static void udp_notify (struct inpcb *, int);
#endif

#ifndef UDBHASHSIZE
#define        UDBHASHSIZE        128
#endif
int udbhashsize = UDBHASHSIZE;

/*
 * For send - really max datagram size; for receive - 40 1K datagrams.
 */
static int udp_sendspace = 9216;
static int udp_recvspace = 40 * (1024 + sizeof(struct sockaddr_in));

#ifdef MBUFTRACE
struct mowner udp_mowner = MOWNER_INIT("udp", "");
struct mowner udp_rx_mowner = MOWNER_INIT("udp", "rx");
struct mowner udp_tx_mowner = MOWNER_INIT("udp", "tx");
#endif

#ifdef UDP_CSUM_COUNTERS
#include <sys/device.h>

#if defined(INET)
struct evcnt udp_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "udp", "hwcsum bad");
struct evcnt udp_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "udp", "hwcsum ok");
struct evcnt udp_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "udp", "hwcsum data");
struct evcnt udp_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "udp", "swcsum");

EVCNT_ATTACH_STATIC(udp_hwcsum_bad);
EVCNT_ATTACH_STATIC(udp_hwcsum_ok);
EVCNT_ATTACH_STATIC(udp_hwcsum_data);
EVCNT_ATTACH_STATIC(udp_swcsum);
#endif /* defined(INET) */

#define        UDP_CSUM_COUNTER_INCR(ev)        (ev)->ev_count++
#else
#define        UDP_CSUM_COUNTER_INCR(ev)        /* nothing */
#endif /* UDP_CSUM_COUNTERS */

static void sysctl_net_inet_udp_setup(struct sysctllog **);

static int
do_udpinit(void)
{

        in_pcbinit(&udbtable, udbhashsize, udbhashsize);
        udpstat_percpu = percpu_alloc(sizeof(uint64_t) * UDP_NSTATS);

        MOWNER_ATTACH(&udp_tx_mowner);
        MOWNER_ATTACH(&udp_rx_mowner);
        MOWNER_ATTACH(&udp_mowner);

        return 0;
}

void
udp_init_common(void)
{
        static ONCE_DECL(doudpinit);

        RUN_ONCE(&doudpinit, do_udpinit);
}

void
udp_init(void)
{

        sysctl_net_inet_udp_setup(NULL);

        udp_init_common();
}

/*
 * Checksum extended UDP header and data.
 */
int
udp_input_checksum(int af, struct mbuf *m, const struct udphdr *uh,
    int iphlen, int len)
{

        switch (af) {
#ifdef INET
        case AF_INET:
                return udp4_input_checksum(m, uh, iphlen, len);
#endif
#ifdef INET6
        case AF_INET6:
                return udp6_input_checksum(m, uh, iphlen, len);
#endif
        }
#ifdef DIAGNOSTIC
        panic("udp_input_checksum: unknown af %d", af);
#endif
        /* NOTREACHED */
        return -1;
}

#ifdef INET

/*
 * Checksum extended UDP header and data.
 */
static int
udp4_input_checksum(struct mbuf *m, const struct udphdr *uh,
    int iphlen, int len)
{

        /*
         * XXX it's better to record and check if this mbuf is
         * already checked.
         */

        if (uh->uh_sum == 0)
                return 0;

        switch (m->m_pkthdr.csum_flags &
            ((m_get_rcvif_NOMPSAFE(m)->if_csum_flags_rx & M_CSUM_UDPv4) |
            M_CSUM_TCP_UDP_BAD | M_CSUM_DATA)) {
        case M_CSUM_UDPv4|M_CSUM_TCP_UDP_BAD:
                UDP_CSUM_COUNTER_INCR(&udp_hwcsum_bad);
                goto badcsum;

        case M_CSUM_UDPv4|M_CSUM_DATA: {
                u_int32_t hw_csum = m->m_pkthdr.csum_data;

                UDP_CSUM_COUNTER_INCR(&udp_hwcsum_data);
                if (m->m_pkthdr.csum_flags & M_CSUM_NO_PSEUDOHDR) {
                        const struct ip *ip =
                            mtod(m, const struct ip *);

                        hw_csum = in_cksum_phdr(ip->ip_src.s_addr,
                            ip->ip_dst.s_addr,
                            htons(hw_csum + len + IPPROTO_UDP));
                }
                if ((hw_csum ^ 0xffff) != 0)
                        goto badcsum;
                break;
        }

        case M_CSUM_UDPv4:
                /* Checksum was okay. */
                UDP_CSUM_COUNTER_INCR(&udp_hwcsum_ok);
                break;

        default:
                /*
                 * Need to compute it ourselves.  Maybe skip checksum
                 * on loopback interfaces.
                 */
                if (__predict_true(!(m_get_rcvif_NOMPSAFE(m)->if_flags &
                                     IFF_LOOPBACK) ||
                                   udp_do_loopback_cksum)) {
                        UDP_CSUM_COUNTER_INCR(&udp_swcsum);
                        if (in4_cksum(m, IPPROTO_UDP, iphlen, len) != 0)
                                goto badcsum;
                }
                break;
        }

        return 0;

badcsum:
        UDP_STATINC(UDP_STAT_BADSUM);
        return -1;
}

void
udp_input(struct mbuf *m, int off, int proto)
{
        struct sockaddr_in src, dst;
        struct ip *ip;
        struct udphdr *uh;
        int iphlen = off;
        int len;
        int n;
        u_int16_t ip_len;

        MCLAIM(m, &udp_rx_mowner);
        UDP_STATINC(UDP_STAT_IPACKETS);

        /*
         * Get IP and UDP header together in first mbuf.
         */
        ip = mtod(m, struct ip *);
        M_REGION_GET(uh, struct udphdr *, m, iphlen, sizeof(struct udphdr));
        if (uh == NULL) {
                UDP_STATINC(UDP_STAT_HDROPS);
                return;
        }

        /*
         * Enforce alignment requirements that are violated in
         * some cases, see kern/50766 for details.
         */
        if (ACCESSIBLE_POINTER(uh, struct udphdr) == 0) {
                m = m_copyup(m, iphlen + sizeof(struct udphdr), 0);
                if (m == NULL) {
                        UDP_STATINC(UDP_STAT_HDROPS);
                        return;
                }
                ip = mtod(m, struct ip *);
                uh = (struct udphdr *)(mtod(m, char *) + iphlen);
        }
        KASSERT(ACCESSIBLE_POINTER(uh, struct udphdr));

        /* destination port of 0 is illegal, based on RFC768. */
        if (uh->uh_dport == 0)
                goto bad;

        /*
         * Make mbuf data length reflect UDP length.
         * If not enough data to reflect UDP length, drop.
         */
        ip_len = ntohs(ip->ip_len);
        len = ntohs((u_int16_t)uh->uh_ulen);
        if (len < sizeof(struct udphdr)) {
                UDP_STATINC(UDP_STAT_BADLEN);
                goto bad;
        }
        if (ip_len != iphlen + len) {
                if (ip_len < iphlen + len) {
                        UDP_STATINC(UDP_STAT_BADLEN);
                        goto bad;
                }
                m_adj(m, iphlen + len - ip_len);
        }

        /*
         * Checksum extended UDP header and data.
         */
        if (udp4_input_checksum(m, uh, iphlen, len))
                goto badcsum;

        /* construct source and dst sockaddrs. */
        sockaddr_in_init(&src, &ip->ip_src, uh->uh_sport);
        sockaddr_in_init(&dst, &ip->ip_dst, uh->uh_dport);

        if ((n = udp4_realinput(&src, &dst, &m, iphlen)) == -1) {
                UDP_STATINC(UDP_STAT_HDROPS);
                return;
        }
        if (m == NULL) {
                /*
                 * packet has been processed by ESP stuff -
                 * e.g. dropped NAT-T-keep-alive-packet ...
                 */
                return;
        }

        ip = mtod(m, struct ip *);
        M_REGION_GET(uh, struct udphdr *, m, iphlen, sizeof(struct udphdr));
        if (uh == NULL) {
                UDP_STATINC(UDP_STAT_HDROPS);
                return;
        }
        /* XXX Re-enforce alignment? */

#ifdef INET6
        if (IN_MULTICAST(ip->ip_dst.s_addr) || n == 0) {
                struct sockaddr_in6 src6, dst6;

                memset(&src6, 0, sizeof(src6));
                src6.sin6_family = AF_INET6;
                src6.sin6_len = sizeof(struct sockaddr_in6);
                in6_in_2_v4mapin6(&ip->ip_src, &src6.sin6_addr);
                src6.sin6_port = uh->uh_sport;
                memset(&dst6, 0, sizeof(dst6));
                dst6.sin6_family = AF_INET6;
                dst6.sin6_len = sizeof(struct sockaddr_in6);
                in6_in_2_v4mapin6(&ip->ip_dst, &dst6.sin6_addr);
                dst6.sin6_port = uh->uh_dport;

                n += udp6_realinput(AF_INET, &src6, &dst6, &m, iphlen);
        }
#endif

        if (n == 0) {
                if (m->m_flags & (M_BCAST | M_MCAST)) {
                        UDP_STATINC(UDP_STAT_NOPORTBCAST);
                        goto bad;
                }
                UDP_STATINC(UDP_STAT_NOPORT);
                icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
                m = NULL;
        }

bad:
        if (m)
                m_freem(m);
        return;

badcsum:
        m_freem(m);
}
#endif

#ifdef INET
static void
udp4_sendup(struct mbuf *m, int off /* offset of data portion */,
    struct sockaddr *src, struct socket *so)
{
        struct mbuf *opts = NULL;
        struct mbuf *n;
        struct inpcb *inp;

        KASSERT(so != NULL);
        KASSERT(so->so_proto->pr_domain->dom_family == AF_INET);
        inp = sotoinpcb(so);
        KASSERT(inp != NULL);

#if defined(IPSEC)
        if (ipsec_used && ipsec_in_reject(m, inp)) {
                if ((n = m_copypacket(m, M_DONTWAIT)) != NULL)
                        icmp_error(n, ICMP_UNREACH, ICMP_UNREACH_ADMIN_PROHIBIT,
                            0, 0);
                return;
        }
#endif

        if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) {
                if (inp->inp_flags & INP_CONTROLOPTS ||
                    SOOPT_TIMESTAMP(so->so_options)) {
                        struct ip *ip = mtod(n, struct ip *);
                        ip_savecontrol(inp, &opts, ip, n);
                }

                m_adj(n, off);
                if (sbappendaddr(&so->so_rcv, src, n, opts) == 0) {
                        m_freem(n);
                        if (opts)
                                m_freem(opts);
                        UDP_STATINC(UDP_STAT_FULLSOCK);
                        soroverflow(so);
                } else
                        sorwakeup(so);
        }
}
#endif

#ifdef INET
static int
udp4_realinput(struct sockaddr_in *src, struct sockaddr_in *dst,
    struct mbuf **mp, int off /* offset of udphdr */)
{
        u_int16_t *sport, *dport;
        int rcvcnt;
        struct in_addr *src4, *dst4;
        struct inpcb_hdr *inph;
        struct inpcb *inp;
        struct mbuf *m = *mp;

        rcvcnt = 0;
        off += sizeof(struct udphdr);        /* now, offset of payload */

        if (src->sin_family != AF_INET || dst->sin_family != AF_INET)
                goto bad;

        src4 = &src->sin_addr;
        sport = &src->sin_port;
        dst4 = &dst->sin_addr;
        dport = &dst->sin_port;

        if (IN_MULTICAST(dst4->s_addr) ||
            in_broadcast(*dst4, m_get_rcvif_NOMPSAFE(m))) {
                /*
                 * Deliver a multicast or broadcast datagram to *all* sockets
                 * for which the local and remote addresses and ports match
                 * those of the incoming datagram.  This allows more than
                 * one process to receive multi/broadcasts on the same port.
                 * (This really ought to be done for unicast datagrams as
                 * well, but that would cause problems with existing
                 * applications that open both address-specific sockets and
                 * a wildcard socket listening to the same port -- they would
                 * end up receiving duplicates of every unicast datagram.
                 * Those applications open the multiple sockets to overcome an
                 * inadequacy of the UDP socket interface, but for backwards
                 * compatibility we avoid the problem here rather than
                 * fixing the interface.  Maybe 4.5BSD will remedy this?)
                 */

                /*
                 * KAME note: traditionally we dropped udpiphdr from mbuf here.
                 * we need udpiphdr for IPsec processing so we do that later.
                 */
                /*
                 * Locate pcb(s) for datagram.
                 */
                TAILQ_FOREACH(inph, &udbtable.inpt_queue, inph_queue) {
                        inp = (struct inpcb *)inph;
                        if (inp->inp_af != AF_INET)
                                continue;

                        if (inp->inp_lport != *dport)
                                continue;
                        if (!in_nullhost(inp->inp_laddr)) {
                                if (!in_hosteq(inp->inp_laddr, *dst4))
                                        continue;
                        }
                        if (!in_nullhost(inp->inp_faddr)) {
                                if (!in_hosteq(inp->inp_faddr, *src4) ||
                                    inp->inp_fport != *sport)
                                        continue;
                        }

                        udp4_sendup(m, off, (struct sockaddr *)src,
                            inp->inp_socket);
                        rcvcnt++;

                        /*
                         * Don't look for additional matches if this one does
                         * not have either the SO_REUSEPORT or SO_REUSEADDR
                         * socket options set.  This heuristic avoids searching
                         * through all pcbs in the common case of a non-shared
                         * port.  It assumes that an application will never
                         * clear these options after setting them.
                         */
                        if ((inp->inp_socket->so_options &
                            (SO_REUSEPORT|SO_REUSEADDR)) == 0)
                                break;
                }
        } else {
                /*
                 * Locate pcb for datagram.
                 */
                inp = in_pcblookup_connect(&udbtable, *src4, *sport, *dst4,
                    *dport, 0);
                if (inp == 0) {
                        UDP_STATINC(UDP_STAT_PCBHASHMISS);
                        inp = in_pcblookup_bind(&udbtable, *dst4, *dport);
                        if (inp == 0)
                                return rcvcnt;
                }

#ifdef IPSEC
                /* Handle ESP over UDP */
                if (inp->inp_flags & INP_ESPINUDP) {
                        switch (udp4_espinudp(mp, off)) {
                        case -1: /* Error, m was freed */
                                rcvcnt = -1;
                                goto bad;

                        case 1: /* ESP over UDP */
                                rcvcnt++;
                                goto bad;

                        case 0: /* plain UDP */
                        default: /* Unexpected */
                                /*
                                 * Normal UDP processing will take place,
                                 * m may have changed.
                                 */
                                m = *mp;
                                break;
                        }
                }
#endif
                if (inp->inp_overudp_cb != NULL) {
                        int ret;
                        ret = inp->inp_overudp_cb(mp, off, inp->inp_socket,
                            sintosa(src), inp->inp_overudp_arg);
                        switch (ret) {
                        case -1: /* Error, m was freed */
                                rcvcnt = -1;
                                goto bad;

                        case 1: /* Foo over UDP */
                                KASSERT(*mp == NULL);
                                rcvcnt++;
                                goto bad;

                        case 0: /* plain UDP */
                        default: /* Unexpected */
                                /*
                                 * Normal UDP processing will take place,
                                 * m may have changed.
                                 */
                                m = *mp;
                                break;
                        }
                }

                /*
                 * Check the minimum TTL for socket.
                 */
                if (mtod(m, struct ip *)->ip_ttl < inp->inp_ip_minttl)
                        goto bad;

                udp4_sendup(m, off, (struct sockaddr *)src, inp->inp_socket);
                rcvcnt++;
        }

bad:
        return rcvcnt;
}
#endif

#ifdef INET
/*
 * Notify a udp user of an asynchronous error;
 * just wake up so that he can collect error status.
 */
static void
udp_notify(struct inpcb *inp, int errno)
{
        inp->inp_socket->so_error = errno;
        sorwakeup(inp->inp_socket);
        sowwakeup(inp->inp_socket);
}

void *
udp_ctlinput(int cmd, const struct sockaddr *sa, void *v)
{
        struct ip *ip = v;
        struct udphdr *uh;
        void (*notify)(struct inpcb *, int) = udp_notify;
        int errno;

        if (sa->sa_family != AF_INET ||
            sa->sa_len != sizeof(struct sockaddr_in))
                return NULL;
        if ((unsigned)cmd >= PRC_NCMDS)
                return NULL;

        errno = inetctlerrmap[cmd];
        if (PRC_IS_REDIRECT(cmd)) {
                notify = in_rtchange;
                ip = NULL;
        } else if (cmd == PRC_HOSTDEAD) {
                ip = NULL;
        } else if (errno == 0) {
                return NULL;
        }

        if (ip) {
                uh = (struct udphdr *)((char *)ip + (ip->ip_hl << 2));
                in_pcbnotify(&udbtable, satocsin(sa)->sin_addr, uh->uh_dport,
                    ip->ip_src, uh->uh_sport, errno, notify);
                /* XXX mapped address case */
        } else {
                in_pcbnotifyall(&udbtable, satocsin(sa)->sin_addr, errno,
                    notify);
        }

        return NULL;
}

int
udp_ctloutput(int op, struct socket *so, struct sockopt *sopt)
{
        int s;
        int error = 0;
        struct inpcb *inp;
        int family;
        int optval;

        family = so->so_proto->pr_domain->dom_family;

        s = splsoftnet();
        switch (family) {
#ifdef INET
        case PF_INET:
                if (sopt->sopt_level != IPPROTO_UDP) {
                        error = ip_ctloutput(op, so, sopt);
                        goto end;
                }
                break;
#endif
#ifdef INET6
        case PF_INET6:
                if (sopt->sopt_level != IPPROTO_UDP) {
                        error = ip6_ctloutput(op, so, sopt);
                        goto end;
                }
                break;
#endif
        default:
                error = EAFNOSUPPORT;
                goto end;
        }


        switch (op) {
        case PRCO_SETOPT:
                inp = sotoinpcb(so);

                switch (sopt->sopt_name) {
                case UDP_ENCAP:
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                break;

                        switch(optval) {
                        case 0:
                                inp->inp_flags &= ~INP_ESPINUDP;
                                break;

                        case UDP_ENCAP_ESPINUDP:
                                inp->inp_flags |= INP_ESPINUDP;
                                break;

                        default:
                                error = EINVAL;
                                break;
                        }
                        break;

                default:
                        error = ENOPROTOOPT;
                        break;
                }
                break;

        default:
                error = EINVAL;
                break;
        }

end:
        splx(s);
        return error;
}

int
udp_output(struct mbuf *m, struct inpcb *inp, struct mbuf *control,
    struct lwp *l)
{
        struct udpiphdr *ui;
        struct route *ro;
        struct ip_pktopts pktopts;
        kauth_cred_t cred;
        int len = m->m_pkthdr.len;
        int error, flags = 0;

        MCLAIM(m, &udp_tx_mowner);

        /*
         * Calculate data length and get a mbuf
         * for UDP and IP headers.
         */
        M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT);
        if (m == NULL) {
                error = ENOBUFS;
                goto release;
        }

        /*
         * Compute the packet length of the IP header, and
         * punt if the length looks bogus.
         */
        if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
                error = EMSGSIZE;
                goto release;
        }

        if (l == NULL)
                cred = NULL;
        else
                cred = l->l_cred;

        /* Setup IP outgoing packet options */
        memset(&pktopts, 0, sizeof(pktopts));
        error = ip_setpktopts(control, &pktopts, &flags, inp, cred);
        if (error != 0)
                goto release;

        if (control != NULL) {
                m_freem(control);
                control = NULL;
        }

        /*
         * Fill in mbuf with extended UDP header
         * and addresses and length put into network format.
         */
        ui = mtod(m, struct udpiphdr *);
        ui->ui_pr = IPPROTO_UDP;
        ui->ui_src = pktopts.ippo_laddr.sin_addr;
        ui->ui_dst = inp->inp_faddr;
        ui->ui_sport = inp->inp_lport;
        ui->ui_dport = inp->inp_fport;
        ui->ui_ulen = htons((u_int16_t)len + sizeof(struct udphdr));

        ro = &inp->inp_route;

        /*
         * Set up checksum and output datagram.
         */
        if (udpcksum) {
                /*
                 * XXX Cache pseudo-header checksum part for
                 * XXX "connected" UDP sockets.
                 */
                ui->ui_sum = in_cksum_phdr(ui->ui_src.s_addr,
                    ui->ui_dst.s_addr, htons((u_int16_t)len +
                    sizeof(struct udphdr) + IPPROTO_UDP));
                m->m_pkthdr.csum_flags = M_CSUM_UDPv4;
                m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
        } else
                ui->ui_sum = 0;

        ((struct ip *)ui)->ip_len = htons(sizeof(struct udpiphdr) + len);
        ((struct ip *)ui)->ip_ttl = inp->inp_ip.ip_ttl;        /* XXX */
        ((struct ip *)ui)->ip_tos = inp->inp_ip.ip_tos;        /* XXX */
        UDP_STATINC(UDP_STAT_OPACKETS);

        flags |= inp->inp_socket->so_options & (SO_DONTROUTE|SO_BROADCAST);
        return ip_output(m, inp->inp_options, ro, flags, pktopts.ippo_imo, inp);

 release:
        if (control != NULL)
                m_freem(control);
        m_freem(m);
        return error;
}

static int
udp_attach(struct socket *so, int proto)
{
        struct inpcb *inp;
        int error;

        KASSERT(sotoinpcb(so) == NULL);

        /* Assign the lock (must happen even if we will error out). */
        sosetlock(so);

#ifdef MBUFTRACE
        so->so_mowner = &udp_mowner;
        so->so_rcv.sb_mowner = &udp_rx_mowner;
        so->so_snd.sb_mowner = &udp_tx_mowner;
#endif
        if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
                error = soreserve(so, udp_sendspace, udp_recvspace);
                if (error) {
                        return error;
                }
        }

        error = in_pcballoc(so, &udbtable);
        if (error) {
                return error;
        }
        inp = sotoinpcb(so);
        inp->inp_ip.ip_ttl = ip_defttl;
        KASSERT(solocked(so));

        return error;
}

static void
udp_detach(struct socket *so)
{
        struct inpcb *inp;

        KASSERT(solocked(so));
        inp = sotoinpcb(so);
        KASSERT(inp != NULL);
        in_pcbdetach(inp);
}

static int
udp_accept(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));

        panic("udp_accept");

        return EOPNOTSUPP;
}

static int
udp_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct inpcb *inp = sotoinpcb(so);
        struct sockaddr_in *sin = (struct sockaddr_in *)nam;
        int error = 0;
        int s;

        KASSERT(solocked(so));
        KASSERT(inp != NULL);
        KASSERT(nam != NULL);

        s = splsoftnet();
        error = in_pcbbind(inp, sin, l);
        splx(s);

        return error;
}

static int
udp_listen(struct socket *so, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
udp_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct inpcb *inp = sotoinpcb(so);
        int error = 0;
        int s;

        KASSERT(solocked(so));
        KASSERT(inp != NULL);
        KASSERT(nam != NULL);

        s = splsoftnet();
        error = in_pcbconnect(inp, (struct sockaddr_in *)nam, l);
        if (! error)
                soisconnected(so);
        splx(s);
        return error;
}

static int
udp_connect2(struct socket *so, struct socket *so2)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
udp_disconnect(struct socket *so)
{
        struct inpcb *inp = sotoinpcb(so);
        int s;

        KASSERT(solocked(so));
        KASSERT(inp != NULL);

        s = splsoftnet();
        /*soisdisconnected(so);*/
        so->so_state &= ~SS_ISCONNECTED;        /* XXX */
        in_pcbdisconnect(inp);
        inp->inp_laddr = zeroin_addr;                /* XXX */
        in_pcbstate(inp, INP_BOUND);                /* XXX */
        splx(s);

        return 0;
}

static int
udp_shutdown(struct socket *so)
{
        int s;

        KASSERT(solocked(so));

        s = splsoftnet();
        socantsendmore(so);
        splx(s);

        return 0;
}

static int
udp_abort(struct socket *so)
{
        KASSERT(solocked(so));

        panic("udp_abort");

        return EOPNOTSUPP;
}

static int
udp_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
{
        return in_control(so, cmd, nam, ifp);
}

static int
udp_stat(struct socket *so, struct stat *ub)
{
        KASSERT(solocked(so));

        /* stat: don't bother with a blocksize. */
        return 0;
}

static int
udp_peeraddr(struct socket *so, struct sockaddr *nam)
{
        int s;

        KASSERT(solocked(so));
        KASSERT(sotoinpcb(so) != NULL);
        KASSERT(nam != NULL);

        s = splsoftnet();
        in_setpeeraddr(sotoinpcb(so), (struct sockaddr_in *)nam);
        splx(s);

        return 0;
}

static int
udp_sockaddr(struct socket *so, struct sockaddr *nam)
{
        int s;

        KASSERT(solocked(so));
        KASSERT(sotoinpcb(so) != NULL);
        KASSERT(nam != NULL);

        s = splsoftnet();
        in_setsockaddr(sotoinpcb(so), (struct sockaddr_in *)nam);
        splx(s);

        return 0;
}

static int
udp_rcvd(struct socket *so, int flags, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
udp_recvoob(struct socket *so, struct mbuf *m, int flags)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

int
udp_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
    struct mbuf *control, struct lwp *l)
{
        struct inpcb *inp = sotoinpcb(so);
        int error = 0;
        struct in_addr laddr;                        /* XXX */
        int s;

        KASSERT(solocked(so));
        KASSERT(inp != NULL);
        KASSERT(m != NULL);

        memset(&laddr, 0, sizeof laddr);

        s = splsoftnet();
        if (nam) {
                laddr = inp->inp_laddr;                /* XXX */
                if ((so->so_state & SS_ISCONNECTED) != 0) {
                        error = EISCONN;
                        goto die;
                }
                error = in_pcbconnect(inp, (struct sockaddr_in *)nam, l);
                if (error)
                        goto die;
        } else {
                if ((so->so_state & SS_ISCONNECTED) == 0) {
                        error = ENOTCONN;
                        goto die;
                }
        }
        error = udp_output(m, inp, control, l);
        m = NULL;
        control = NULL;
        if (nam) {
                in_pcbdisconnect(inp);
                inp->inp_laddr = laddr;                /* XXX */
                in_pcbstate(inp, INP_BOUND);        /* XXX */
        }
  die:
        if (m != NULL)
                m_freem(m);
        if (control != NULL)
                m_freem(control);

        splx(s);
        return error;
}

static int
udp_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
{
        KASSERT(solocked(so));

        m_freem(m);
        m_freem(control);

        return EOPNOTSUPP;
}

static int
udp_purgeif(struct socket *so, struct ifnet *ifp)
{
        int s;

        s = splsoftnet();
        mutex_enter(softnet_lock);
        in_pcbpurgeif0(&udbtable, ifp);
#ifdef NET_MPSAFE
        mutex_exit(softnet_lock);
#endif
        in_purgeif(ifp);
#ifdef NET_MPSAFE
        mutex_enter(softnet_lock);
#endif
        in_pcbpurgeif(&udbtable, ifp);
        mutex_exit(softnet_lock);
        splx(s);

        return 0;
}

static int
sysctl_net_inet_udp_stats(SYSCTLFN_ARGS)
{

        return (NETSTAT_SYSCTL(udpstat_percpu, UDP_NSTATS));
}

/*
 * Sysctl for udp variables.
 */
static void
sysctl_net_inet_udp_setup(struct sysctllog **clog)
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "inet", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "udp",
                       SYSCTL_DESCR("UDPv4 related settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET, IPPROTO_UDP, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "checksum",
                       SYSCTL_DESCR("Compute UDP checksums"),
                       NULL, 0, &udpcksum, 0,
                       CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_CHECKSUM,
                       CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "sendspace",
                       SYSCTL_DESCR("Default UDP send buffer size"),
                       NULL, 0, &udp_sendspace, 0,
                       CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_SENDSPACE,
                       CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "recvspace",
                       SYSCTL_DESCR("Default UDP receive buffer size"),
                       NULL, 0, &udp_recvspace, 0,
                       CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_RECVSPACE,
                       CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "do_loopback_cksum",
                       SYSCTL_DESCR("Perform UDP checksum on loopback"),
                       NULL, 0, &udp_do_loopback_cksum, 0,
                       CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_LOOPBACKCKSUM,
                       CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "pcblist",
                       SYSCTL_DESCR("UDP protocol control block list"),
                       sysctl_inpcblist, 0, &udbtable, 0,
                       CTL_NET, PF_INET, IPPROTO_UDP, CTL_CREATE,
                       CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "stats",
                       SYSCTL_DESCR("UDP statistics"),
                       sysctl_net_inet_udp_stats, 0, NULL, 0,
                       CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_STATS,
                       CTL_EOL);
}
#endif

void
udp_statinc(u_int stat)
{

        KASSERT(stat < UDP_NSTATS);
        UDP_STATINC(stat);
}

#if defined(INET) && defined(IPSEC)
/*
 * Handle ESP-in-UDP packets (RFC3948).
 *
 * We need to distinguish between ESP packets and IKE packets. We do so by
 * looking at the Non-ESP marker. If IKE, we process the UDP packet as usual.
 * Otherwise, ESP, we invoke IPsec.
 *
 * Returns:
 *     1 if the packet was processed
 *     0 if normal UDP processing should take place
 *    -1 if an error occurred and m was freed
 */
static int
udp4_espinudp(struct mbuf **mp, int off)
{
        const size_t skip = sizeof(struct udphdr);
        size_t len;
        uint8_t *data;
        size_t minlen;
        size_t iphdrlen;
        struct ip *ip;
        struct m_tag *tag;
        struct udphdr *udphdr;
        u_int16_t sport, dport;
        struct mbuf *m = *mp;
        uint32_t *marker;

        minlen = off + sizeof(struct esp);
        if (minlen > m->m_pkthdr.len)
                minlen = m->m_pkthdr.len;

        if (m->m_len < minlen) {
                if ((*mp = m_pullup(m, minlen)) == NULL) {
                        return -1;
                }
                m = *mp;
        }

        len = m->m_len - off;
        data = mtod(m, uint8_t *) + off;

        /* Ignore keepalive packets. */
        if ((len == 1) && (*data == 0xff)) {
                m_freem(m);
                *mp = NULL; /* avoid any further processing by caller */
                return 1;
        }

        /* Handle Non-ESP marker (32bit). If zero, then IKE. */
        marker = (uint32_t *)data;
        if (len <= sizeof(uint32_t))
                return 0;
        if (marker[0] == 0)
                return 0;

        /*
         * Get the UDP ports. They are handled in network order
         * everywhere in the IPSEC_NAT_T code.
         */
        udphdr = (struct udphdr *)((char *)data - skip);
        sport = udphdr->uh_sport;
        dport = udphdr->uh_dport;

        /*
         * Remove the UDP header, plus a possible marker. IP header
         * length is iphdrlen.
         *
         * Before:
         *   <--- off --->
         *   +----+------+-----+
         *   | IP |  UDP | ESP |
         *   +----+------+-----+
         *        <-skip->
         * After:
         *          +----+-----+
         *          | IP | ESP |
         *          +----+-----+
         *   <-skip->
         */
        iphdrlen = off - sizeof(struct udphdr);
        memmove(mtod(m, char *) + skip, mtod(m, void *), iphdrlen);
        m_adj(m, skip);

        ip = mtod(m, struct ip *);
        ip->ip_len = htons(ntohs(ip->ip_len) - skip);
        ip->ip_p = IPPROTO_ESP;

        /*
         * We have modified the packet - it is now ESP, so we should not
         * return to UDP processing.
         *
         * Add a PACKET_TAG_IPSEC_NAT_T_PORTS tag to remember the source
         * UDP port. This is required if we want to select the right SPD
         * for multiple hosts behind same NAT.
         */
        if ((tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
            sizeof(sport) + sizeof(dport), M_DONTWAIT)) == NULL) {
                m_freem(m);
                return -1;
        }
        ((u_int16_t *)(tag + 1))[0] = sport;
        ((u_int16_t *)(tag + 1))[1] = dport;
        m_tag_prepend(m, tag);

        if (ipsec_used)
                ipsec4_common_input(m, iphdrlen, IPPROTO_ESP);
        else
                m_freem(m);

        /* We handled it, it shouldn't be handled by UDP */
        *mp = NULL; /* avoid free by caller ... */
        return 1;
}
#endif

PR_WRAP_USRREQS(udp)
#define        udp_attach        udp_attach_wrapper
#define        udp_detach        udp_detach_wrapper
#define        udp_accept        udp_accept_wrapper
#define        udp_bind        udp_bind_wrapper
#define        udp_listen        udp_listen_wrapper
#define        udp_connect        udp_connect_wrapper
#define        udp_connect2        udp_connect2_wrapper
#define        udp_disconnect        udp_disconnect_wrapper
#define        udp_shutdown        udp_shutdown_wrapper
#define        udp_abort        udp_abort_wrapper
#define        udp_ioctl        udp_ioctl_wrapper
#define        udp_stat        udp_stat_wrapper
#define        udp_peeraddr        udp_peeraddr_wrapper
#define        udp_sockaddr        udp_sockaddr_wrapper
#define        udp_rcvd        udp_rcvd_wrapper
#define        udp_recvoob        udp_recvoob_wrapper
#define        udp_send        udp_send_wrapper
#define        udp_sendoob        udp_sendoob_wrapper
#define        udp_purgeif        udp_purgeif_wrapper

const struct pr_usrreqs udp_usrreqs = {
        .pr_attach        = udp_attach,
        .pr_detach        = udp_detach,
        .pr_accept        = udp_accept,
        .pr_bind        = udp_bind,
        .pr_listen        = udp_listen,
        .pr_connect        = udp_connect,
        .pr_connect2        = udp_connect2,
        .pr_disconnect        = udp_disconnect,
        .pr_shutdown        = udp_shutdown,
        .pr_abort        = udp_abort,
        .pr_ioctl        = udp_ioctl,
        .pr_stat        = udp_stat,
        .pr_peeraddr        = udp_peeraddr,
        .pr_sockaddr        = udp_sockaddr,
        .pr_rcvd        = udp_rcvd,
        .pr_recvoob        = udp_recvoob,
        .pr_send        = udp_send,
        .pr_sendoob        = udp_sendoob,
        .pr_purgeif        = udp_purgeif,
};

































































































































































   28 

   28 


   28 









   28 




   28 



   28 

   28 


   28 

   28 
   28 
   28 















   18 







   18 


   18 
   17 






















   18 

   17 


    1 
    1 

    1 

























   17 


    1 









   17 





   16 

    8 

   17 









    2 





   17 











   16 
    2 




















    2 



    2 
    1 

    2 







   15 



    2 



   17 
   17 


   17 






   19 



   20 





   20 




   19 

    5 




   14 















    1 






















   16 














   16 

   16 

   15 

   15 


   14 
    1 


   13 





   13 








   13 




   13 




   13 








































   13 
   10 
    9 





    3 


    2 




    9 
    8 


    1 


    1 
   10 
   10 

   10 

   10 



    9 
    7 



    9 

   10 














   10 




   10 
    3 






















   19 


   18 




   19 

   19 



   19 
   19 






   19 
    3 


   19 






   19 





    5 


    5 







    1 


    1 











































































































































































    1 

    1 


    1 



    1 



    1 





    1 








    1 








    1 




    1 







    1 

    1 

    1 




    1 





































































   27 


   27 

   27 
    8 

    8 


    3 


    2 



    2 

    2 


    2 



















    2 
    2 



    2 














    2 











   27 






































































    3 

    3 
    3 





    3 






    3 



    3 
    1 

    1 



















    1 





    1 








    3 











   10 


   10 

   10 
    2 

    2 



    2 












   10 




   10 






























































































   22 


   60 
   13 

   61 

   21 




   10 






   61 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
/*        $NetBSD: in6_pcb.c,v 1.169 2022/07/29 07:35:16 knakahara Exp $        */
/*        $KAME: in6_pcb.c,v 1.84 2001/02/08 18:02:08 itojun Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)in_pcb.c        8.2 (Berkeley) 1/4/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: in6_pcb.c,v 1.169 2022/07/29 07:35:16 knakahara Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_ipsec.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/ioctl.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/proc.h>
#include <sys/kauth.h>
#include <sys/domain.h>
#include <sys/once.h>

#include <net/if.h>
#include <net/route.h>

#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/ip6.h>
#include <netinet/portalgo.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/scope6_var.h>

#include "faith.h"

#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
#include <netipsec/key.h>
#endif /* IPSEC */

#include <netinet/tcp_vtw.h>

const struct in6_addr zeroin6_addr;

#define        IN6PCBHASH_PORT(table, lport) \
        &(table)->inpt_porthashtbl[ntohs(lport) & (table)->inpt_porthash]
#define IN6PCBHASH_BIND(table, laddr, lport) \
        &(table)->inpt_bindhashtbl[ \
            (((laddr)->s6_addr32[0] ^ (laddr)->s6_addr32[1] ^ \
              (laddr)->s6_addr32[2] ^ (laddr)->s6_addr32[3]) + ntohs(lport)) & \
            (table)->inpt_bindhash]
#define IN6PCBHASH_CONNECT(table, faddr, fport, laddr, lport) \
        &(table)->inpt_bindhashtbl[ \
            ((((faddr)->s6_addr32[0] ^ (faddr)->s6_addr32[1] ^ \
              (faddr)->s6_addr32[2] ^ (faddr)->s6_addr32[3]) + ntohs(fport)) + \
             (((laddr)->s6_addr32[0] ^ (laddr)->s6_addr32[1] ^ \
              (laddr)->s6_addr32[2] ^ (laddr)->s6_addr32[3]) + \
              ntohs(lport))) & (table)->inpt_bindhash]

int ip6_anonportmin = IPV6PORT_ANONMIN;
int ip6_anonportmax = IPV6PORT_ANONMAX;
int ip6_lowportmin  = IPV6PORT_RESERVEDMIN;
int ip6_lowportmax  = IPV6PORT_RESERVEDMAX;

static struct pool in6pcb_pool;

static int
in6pcb_poolinit(void)
{

        pool_init(&in6pcb_pool, sizeof(struct in6pcb), 0, 0, 0, "in6pcbpl",
            NULL, IPL_SOFTNET);
        return 0;
}

void
in6_pcbinit(struct inpcbtable *table, int bindhashsize, int connecthashsize)
{
        static ONCE_DECL(control);

        in_pcbinit(table, bindhashsize, connecthashsize);
        table->inpt_lastport = (u_int16_t)ip6_anonportmax;

        RUN_ONCE(&control, in6pcb_poolinit);
}

int
in6_pcballoc(struct socket *so, void *v)
{
        struct inpcbtable *table = v;
        struct in6pcb *in6p;
        int s;

        KASSERT(so->so_proto->pr_domain->dom_family == AF_INET6);

        in6p = pool_get(&in6pcb_pool, PR_NOWAIT);
        if (in6p == NULL)
                return (ENOBUFS);
        memset((void *)in6p, 0, sizeof(*in6p));
        in6p->in6p_af = AF_INET6;
        in6p->in6p_table = table;
        in6p->in6p_socket = so;
        in6p->in6p_hops = -1;        /* use kernel default */
        in6p->in6p_icmp6filt = NULL;
        in6p->in6p_portalgo = PORTALGO_DEFAULT;
        in6p->in6p_bindportonsend = false;
#if defined(IPSEC)
        if (ipsec_enabled) {
                int error = ipsec_init_pcbpolicy(so, &in6p->in6p_sp);
                if (error != 0) {
                        pool_put(&in6pcb_pool, in6p);
                        return error;
                }
                in6p->in6p_sp->sp_inph = (struct inpcb_hdr *)in6p;
        }
#endif /* IPSEC */
        s = splsoftnet();
        TAILQ_INSERT_HEAD(&table->inpt_queue, (struct inpcb_hdr*)in6p,
            inph_queue);
        LIST_INSERT_HEAD(IN6PCBHASH_PORT(table, in6p->in6p_lport),
            &in6p->in6p_head, inph_lhash);
        in6_pcbstate(in6p, IN6P_ATTACHED);
        splx(s);
        if (ip6_v6only)
                in6p->in6p_flags |= IN6P_IPV6_V6ONLY;
        so->so_pcb = (void *)in6p;
        return (0);
}

/*
 * Bind address from sin6 to in6p.
 */
static int
in6_pcbbind_addr(struct in6pcb *in6p, struct sockaddr_in6 *sin6, struct lwp *l)
{
        int error;
        int s;

        /*
         * We should check the family, but old programs
         * incorrectly fail to initialize it.
         */
        if (sin6->sin6_family != AF_INET6)
                return (EAFNOSUPPORT);

#ifndef INET
        if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
                return (EADDRNOTAVAIL);
#endif

        if ((error = sa6_embedscope(sin6, ip6_use_defzone)) != 0)
                return (error);

        s = pserialize_read_enter();
        if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
                if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0) {
                        error = EINVAL;
                        goto out;
                }
                if (sin6->sin6_addr.s6_addr32[3]) {
                        struct sockaddr_in sin;

                        memset(&sin, 0, sizeof(sin));
                        sin.sin_len = sizeof(sin);
                        sin.sin_family = AF_INET;
                        bcopy(&sin6->sin6_addr.s6_addr32[3],
                            &sin.sin_addr, sizeof(sin.sin_addr));
                        if (!IN_MULTICAST(sin.sin_addr.s_addr)) {
                                struct ifaddr *ifa;
                                ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
                                if (ifa == NULL &&
                                    (in6p->in6p_flags & IN6P_BINDANY) == 0) {
                                        error = EADDRNOTAVAIL;
                                        goto out;
                                }
                        }
                }
        } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
                // succeed
        } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
                struct ifaddr *ifa = NULL;

                if ((in6p->in6p_flags & IN6P_FAITH) == 0) {
                        ifa = ifa_ifwithaddr(sin6tosa(sin6));
                        if (ifa == NULL &&
                            (in6p->in6p_flags & IN6P_BINDANY) == 0) {
                                error = EADDRNOTAVAIL;
                                goto out;
                        }
                }

                /*
                 * bind to an anycast address might accidentally
                 * cause sending a packet with an anycast source
                 * address, so we forbid it.
                 *
                 * We should allow to bind to a deprecated address,
                 * since the application dare to use it.
                 * But, can we assume that they are careful enough
                 * to check if the address is deprecated or not?
                 * Maybe, as a safeguard, we should have a setsockopt
                 * flag to control the bind(2) behavior against
                 * deprecated addresses (default: forbid bind(2)).
                 */
                if (ifa &&
                    ifatoia6(ifa)->ia6_flags &
                    (IN6_IFF_ANYCAST | IN6_IFF_DUPLICATED)) {
                        error = EADDRNOTAVAIL;
                        goto out;
                }
        }
        in6p->in6p_laddr = sin6->sin6_addr;
        error = 0;
out:
        pserialize_read_exit(s);
        return error;
}

/*
 * Bind port from sin6 to in6p.
 */
static int
in6_pcbbind_port(struct in6pcb *in6p, struct sockaddr_in6 *sin6, struct lwp *l)
{
        struct inpcbtable *table = in6p->in6p_table;
        struct socket *so = in6p->in6p_socket;
        int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
        int error;

        if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
           ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
            (so->so_options & SO_ACCEPTCONN) == 0))
                wild = 1;

        if (sin6->sin6_port != 0) {
                enum kauth_network_req req;

#ifndef IPNOPRIVPORTS
                if (ntohs(sin6->sin6_port) < IPV6PORT_RESERVED)
                        req = KAUTH_REQ_NETWORK_BIND_PRIVPORT;
                else
#endif /* IPNOPRIVPORTS */
                        req = KAUTH_REQ_NETWORK_BIND_PORT;

                error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_BIND,
                    req, so, sin6, NULL);
                if (error)
                        return (EACCES);
        }

        if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
                /*
                 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
                 * allow compepte duplication of binding if
                 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
                 * and a multicast address is bound on both
                 * new and duplicated sockets.
                 */
                if (so->so_options & (SO_REUSEADDR | SO_REUSEPORT))
                        reuseport = SO_REUSEADDR|SO_REUSEPORT;
        }

        if (sin6->sin6_port != 0) {
                if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
#ifdef INET
                        struct inpcb *t;
                        struct vestigial_inpcb vestige;

                        t = in_pcblookup_port(table,
                            *(struct in_addr *)&sin6->sin6_addr.s6_addr32[3],
                            sin6->sin6_port, wild, &vestige);
                        if (t && (reuseport & t->inp_socket->so_options) == 0)
                                return (EADDRINUSE);
                        if (!t
                            && vestige.valid
                            && !(reuseport && vestige.reuse_port))
                            return EADDRINUSE;
#else
                        return (EADDRNOTAVAIL);
#endif
                }

                {
                        struct in6pcb *t;
                        struct vestigial_inpcb vestige;

                        t = in6_pcblookup_port(table, &sin6->sin6_addr,
                            sin6->sin6_port, wild, &vestige);
                        if (t && (reuseport & t->in6p_socket->so_options) == 0)
                                return (EADDRINUSE);
                        if (!t
                            && vestige.valid
                            && !(reuseport && vestige.reuse_port))
                            return EADDRINUSE;
                }
        }

        if (sin6->sin6_port == 0) {
                int e;
                e = in6_pcbsetport(sin6, in6p, l);
                if (e != 0)
                        return (e);
        } else {
                in6p->in6p_lport = sin6->sin6_port;
                in6_pcbstate(in6p, IN6P_BOUND);
        }

        LIST_REMOVE(&in6p->in6p_head, inph_lhash);
        LIST_INSERT_HEAD(IN6PCBHASH_PORT(table, in6p->in6p_lport),
            &in6p->in6p_head, inph_lhash);

        return (0);
}

int
in6_pcbbind(void *v, struct sockaddr_in6 *sin6, struct lwp *l)
{
        struct in6pcb *in6p = v;
        struct sockaddr_in6 lsin6;
        int error;

        if (in6p->in6p_af != AF_INET6)
                return (EINVAL);

        /*
         * If we already have a local port or a local address it means we're
         * bounded.
         */
        if (in6p->in6p_lport || !(IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) ||
            (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr) &&
              in6p->in6p_laddr.s6_addr32[3] == 0)))
                return (EINVAL);

        if (NULL != sin6) {
                /* We were provided a sockaddr_in6 to use. */
                if (sin6->sin6_len != sizeof(*sin6))
                        return (EINVAL);
        } else {
                /* We always bind to *something*, even if it's "anything". */
                lsin6 = *((const struct sockaddr_in6 *)
                    in6p->in6p_socket->so_proto->pr_domain->dom_sa_any);
                sin6 = &lsin6;
        }

        /* Bind address. */
        error = in6_pcbbind_addr(in6p, sin6, l);
        if (error)
                return (error);

        /* Bind port. */
        error = in6_pcbbind_port(in6p, sin6, l);
        if (error) {
                /*
                 * Reset the address here to "any" so we don't "leak" the
                 * in6pcb.
                 */
                in6p->in6p_laddr = in6addr_any;

                return (error);
        }


#if 0
        in6p->in6p_flowinfo = 0;        /* XXX */
#endif
        return (0);
}

/*
 * Connect from a socket to a specified address.
 * Both address and port must be specified in argument sin6.
 * If don't have a local address for this socket yet,
 * then pick one.
 */
int
in6_pcbconnect(void *v, struct sockaddr_in6 *sin6, struct lwp *l)
{
        struct in6pcb *in6p = v;
        struct in6_addr *in6a = NULL;
        struct in6_addr ia6;
        struct ifnet *ifp = NULL;        /* outgoing interface */
        int error = 0;
        int scope_ambiguous = 0;
#ifdef INET
        struct in6_addr mapped;
#endif
        struct sockaddr_in6 tmp;
        struct vestigial_inpcb vestige;
        struct psref psref;
        int bound;

        (void)&in6a;                                /* XXX fool gcc */

        if (in6p->in6p_af != AF_INET6)
                return (EINVAL);

        if (sin6->sin6_len != sizeof(*sin6))
                return (EINVAL);
        if (sin6->sin6_family != AF_INET6)
                return (EAFNOSUPPORT);
        if (sin6->sin6_port == 0)
                return (EADDRNOTAVAIL);

        if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) &&
            in6p->in6p_socket->so_type == SOCK_STREAM)
                return EADDRNOTAVAIL;

        if (sin6->sin6_scope_id == 0 && !ip6_use_defzone)
                scope_ambiguous = 1;
        if ((error = sa6_embedscope(sin6, ip6_use_defzone)) != 0)
                return(error);

        /* sanity check for mapped address case */
        if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
                if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0)
                        return EINVAL;
                if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr))
                        in6p->in6p_laddr.s6_addr16[5] = htons(0xffff);
                if (!IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr))
                        return EINVAL;
        } else
        {
                if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr))
                        return EINVAL;
        }

        /* protect *sin6 from overwrites */
        tmp = *sin6;
        sin6 = &tmp;

        bound = curlwp_bind();
        /* Source address selection. */
        if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr) &&
            in6p->in6p_laddr.s6_addr32[3] == 0) {
#ifdef INET
                struct sockaddr_in sin;
                struct in_ifaddr *ia4;
                struct psref _psref;

                memset(&sin, 0, sizeof(sin));
                sin.sin_len = sizeof(sin);
                sin.sin_family = AF_INET;
                memcpy(&sin.sin_addr, &sin6->sin6_addr.s6_addr32[3],
                        sizeof(sin.sin_addr));
                ia4 = in_selectsrc(&sin, &in6p->in6p_route,
                        in6p->in6p_socket->so_options, NULL, &error, &_psref);
                if (ia4 == NULL) {
                        if (error == 0)
                                error = EADDRNOTAVAIL;
                        curlwp_bindx(bound);
                        return (error);
                }
                memset(&mapped, 0, sizeof(mapped));
                mapped.s6_addr16[5] = htons(0xffff);
                memcpy(&mapped.s6_addr32[3], &IA_SIN(ia4)->sin_addr,
                    sizeof(IA_SIN(ia4)->sin_addr));
                ia4_release(ia4, &_psref);
                in6a = &mapped;
#else
                curlwp_bindx(bound);
                return EADDRNOTAVAIL;
#endif
        } else {
                /*
                 * XXX: in6_selectsrc might replace the bound local address
                 * with the address specified by setsockopt(IPV6_PKTINFO).
                 * Is it the intended behavior?
                 */
                error = in6_selectsrc(sin6, in6p->in6p_outputopts,
                    in6p->in6p_moptions, &in6p->in6p_route, &in6p->in6p_laddr,
                    &ifp, &psref, &ia6);
                if (error == 0)
                        in6a = &ia6;
                if (ifp && scope_ambiguous &&
                    (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) {
                        if_put(ifp, &psref);
                        curlwp_bindx(bound);
                        return error;
                }

                if (in6a == NULL) {
                        if_put(ifp, &psref);
                        curlwp_bindx(bound);
                        if (error == 0)
                                error = EADDRNOTAVAIL;
                        return error;
                }
        }

        if (ifp != NULL) {
                in6p->in6p_ip6.ip6_hlim = (u_int8_t)in6_selecthlim(in6p, ifp);
                if_put(ifp, &psref);
        } else
                in6p->in6p_ip6.ip6_hlim = (u_int8_t)in6_selecthlim_rt(in6p);
        curlwp_bindx(bound);

        if (in6_pcblookup_connect(in6p->in6p_table, &sin6->sin6_addr,
            sin6->sin6_port,
            IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) ? in6a : &in6p->in6p_laddr,
                                  in6p->in6p_lport, 0, &vestige)
                || vestige.valid)
                return (EADDRINUSE);
        if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) ||
            (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr) &&
             in6p->in6p_laddr.s6_addr32[3] == 0))
        {
                if (in6p->in6p_lport == 0) {
                        error = in6_pcbbind(in6p, NULL, l);
                        if (error != 0)
                                return error;
                }
                in6p->in6p_laddr = *in6a;
        }
        in6p->in6p_faddr = sin6->sin6_addr;
        in6p->in6p_fport = sin6->sin6_port;

        /* Late bind, if needed */
        if (in6p->in6p_bindportonsend) {
               struct sockaddr_in6 lsin = *((const struct sockaddr_in6 *)
                    in6p->in6p_socket->so_proto->pr_domain->dom_sa_any);
                lsin.sin6_addr = in6p->in6p_laddr;
                lsin.sin6_port = 0;

               if ((error = in6_pcbbind_port(in6p, &lsin, l)) != 0)
                       return error;
        }
        
        in6_pcbstate(in6p, IN6P_CONNECTED);
        in6p->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK;
        if (ip6_auto_flowlabel)
                in6p->in6p_flowinfo |=
                    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
#if defined(IPSEC)
        if (ipsec_enabled && in6p->in6p_socket->so_type == SOCK_STREAM)
                ipsec_pcbconn(in6p->in6p_sp);
#endif
        return (0);
}

void
in6_pcbdisconnect(struct in6pcb *in6p)
{
        memset((void *)&in6p->in6p_faddr, 0, sizeof(in6p->in6p_faddr));
        in6p->in6p_fport = 0;
        in6_pcbstate(in6p, IN6P_BOUND);
        in6p->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK;
#if defined(IPSEC)
        if (ipsec_enabled)
                ipsec_pcbdisconn(in6p->in6p_sp);
#endif
        if (in6p->in6p_socket->so_state & SS_NOFDREF)
                in6_pcbdetach(in6p);
}

void
in6_pcbdetach(struct in6pcb *in6p)
{
        struct socket *so = in6p->in6p_socket;
        int s;

        if (in6p->in6p_af != AF_INET6)
                return;

#if defined(IPSEC)
        if (ipsec_enabled)
                ipsec_delete_pcbpolicy(in6p);
#endif
        so->so_pcb = NULL;

        s = splsoftnet();
        in6_pcbstate(in6p, IN6P_ATTACHED);
        LIST_REMOVE(&in6p->in6p_head, inph_lhash);
        TAILQ_REMOVE(&in6p->in6p_table->inpt_queue, &in6p->in6p_head,
            inph_queue);
        splx(s);

        if (in6p->in6p_options) {
                m_freem(in6p->in6p_options);
        }
        if (in6p->in6p_outputopts != NULL) {
                ip6_clearpktopts(in6p->in6p_outputopts, -1);
                free(in6p->in6p_outputopts, M_IP6OPT);
        }
        rtcache_free(&in6p->in6p_route);
        ip6_freemoptions(in6p->in6p_moptions);
        ip_freemoptions(in6p->in6p_v4moptions);
        sofree(so);                                /* drops the socket's lock */

        pool_put(&in6pcb_pool, in6p);
        mutex_enter(softnet_lock);                /* reacquire it */
}

void
in6_setsockaddr(struct in6pcb *in6p, struct sockaddr_in6 *sin6)
{

        if (in6p->in6p_af != AF_INET6)
                return;

        sockaddr_in6_init(sin6, &in6p->in6p_laddr, in6p->in6p_lport, 0, 0);
        (void)sa6_recoverscope(sin6); /* XXX: should catch errors */
}

void
in6_setpeeraddr(struct in6pcb *in6p, struct sockaddr_in6 *sin6)
{

        if (in6p->in6p_af != AF_INET6)
                return;

        sockaddr_in6_init(sin6, &in6p->in6p_faddr, in6p->in6p_fport, 0, 0);
        (void)sa6_recoverscope(sin6); /* XXX: should catch errors */
}

/*
 * Pass some notification to all connections of a protocol
 * associated with address dst.  The local address and/or port numbers
 * may be specified to limit the search.  The "usual action" will be
 * taken, depending on the ctlinput cmd.  The caller must filter any
 * cmds that are uninteresting (e.g., no error in the map).
 * Call the protocol specific routine (if any) to report
 * any errors for each matching socket.
 *
 * Must be called at splsoftnet.
 *
 * Note: src (4th arg) carries the flowlabel value on the original IPv6
 * header, in sin6_flowinfo member.
 */
int
in6_pcbnotify(struct inpcbtable *table, const struct sockaddr *dst,
    u_int fport_arg, const struct sockaddr *src, u_int lport_arg, int cmd,
    void *cmdarg, void (*notify)(struct in6pcb *, int))
{
        struct inpcb_hdr *inph;
        struct sockaddr_in6 sa6_src;
        const struct sockaddr_in6 *sa6_dst;
        u_int16_t fport = fport_arg, lport = lport_arg;
        int errno;
        int nmatch = 0;
        u_int32_t flowinfo;

        if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6)
                return 0;

        sa6_dst = (const struct sockaddr_in6 *)dst;
        if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr))
                return 0;

        /*
         * note that src can be NULL when we get notify by local fragmentation.
         */
        sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src;
        flowinfo = sa6_src.sin6_flowinfo;

        /*
         * Redirects go to all references to the destination,
         * and use in6_rtchange to invalidate the route cache.
         * Dead host indications: also use in6_rtchange to invalidate
         * the cache, and deliver the error to all the sockets.
         * Otherwise, if we have knowledge of the local port and address,
         * deliver only to that socket.
         */
        if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
                fport = 0;
                lport = 0;
                memset((void *)&sa6_src.sin6_addr, 0, sizeof(sa6_src.sin6_addr));

                if (cmd != PRC_HOSTDEAD)
                        notify = in6_rtchange;
        }

        errno = inet6ctlerrmap[cmd];
        TAILQ_FOREACH(inph, &table->inpt_queue, inph_queue) {
                struct in6pcb *in6p = (struct in6pcb *)inph;
                struct rtentry *rt = NULL;

                if (in6p->in6p_af != AF_INET6)
                        continue;

                /*
                 * Under the following condition, notify of redirects
                 * to the pcb, without making address matches against inpcb.
                 * - redirect notification is arrived.
                 * - the inpcb is unconnected.
                 * - the inpcb is caching !RTF_HOST routing entry.
                 * - the ICMPv6 notification is from the gateway cached in the
                 *   inpcb.  i.e. ICMPv6 notification is from nexthop gateway
                 *   the inpcb used very recently.
                 *
                 * This is to improve interaction between netbsd/openbsd
                 * redirect handling code, and inpcb route cache code.
                 * without the clause, !RTF_HOST routing entry (which carries
                 * gateway used by inpcb right before the ICMPv6 redirect)
                 * will be cached forever in unconnected inpcb.
                 *
                 * There still is a question regarding to what is TRT:
                 * - On bsdi/freebsd, RTF_HOST (cloned) routing entry will be
                 *   generated on packet output.  inpcb will always cache
                 *   RTF_HOST routing entry so there's no need for the clause
                 *   (ICMPv6 redirect will update RTF_HOST routing entry,
                 *   and inpcb is caching it already).
                 *   However, bsdi/freebsd are vulnerable to local DoS attacks
                 *   due to the cloned routing entries.
                 * - Specwise, "destination cache" is mentioned in RFC2461.
                 *   Jinmei says that it implies bsdi/freebsd behavior, itojun
                 *   is not really convinced.
                 * - Having hiwat/lowat on # of cloned host route (redirect/
                 *   pmtud) may be a good idea.  netbsd/openbsd has it.  see
                 *   icmp6_mtudisc_update().
                 */
                if ((PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) &&
                    IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
                    (rt = rtcache_validate(&in6p->in6p_route)) != NULL &&
                    !(rt->rt_flags & RTF_HOST)) {
                        const struct sockaddr_in6 *dst6;

                        dst6 = (const struct sockaddr_in6 *)
                            rtcache_getdst(&in6p->in6p_route);
                        if (dst6 == NULL)
                                ;
                        else if (IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr,
                            &sa6_dst->sin6_addr)) {
                                rtcache_unref(rt, &in6p->in6p_route);
                                goto do_notify;
                        }
                }
                rtcache_unref(rt, &in6p->in6p_route);

                /*
                 * If the error designates a new path MTU for a destination
                 * and the application (associated with this socket) wanted to
                 * know the value, notify. Note that we notify for all
                 * disconnected sockets if the corresponding application
                 * wanted. This is because some UDP applications keep sending
                 * sockets disconnected.
                 * XXX: should we avoid to notify the value to TCP sockets?
                 */
                if (cmd == PRC_MSGSIZE && (in6p->in6p_flags & IN6P_MTU) != 0 &&
                    (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) ||
                     IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &sa6_dst->sin6_addr))) {
                        ip6_notify_pmtu(in6p, (const struct sockaddr_in6 *)dst,
                                        (u_int32_t *)cmdarg);
                }

                /*
                 * Detect if we should notify the error. If no source and
                 * destination ports are specified, but non-zero flowinfo and
                 * local address match, notify the error. This is the case
                 * when the error is delivered with an encrypted buffer
                 * by ESP. Otherwise, just compare addresses and ports
                 * as usual.
                 */
                if (lport == 0 && fport == 0 && flowinfo &&
                    in6p->in6p_socket != NULL &&
                    flowinfo == (in6p->in6p_flowinfo & IPV6_FLOWLABEL_MASK) &&
                    IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &sa6_src.sin6_addr))
                        goto do_notify;
                else if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr,
                                             &sa6_dst->sin6_addr) ||
                    in6p->in6p_socket == NULL ||
                    (lport && in6p->in6p_lport != lport) ||
                    (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) &&
                     !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr,
                                         &sa6_src.sin6_addr)) ||
                    (fport && in6p->in6p_fport != fport))
                        continue;

          do_notify:
                if (notify)
                        (*notify)(in6p, errno);
                nmatch++;
        }
        return nmatch;
}

void
in6_pcbpurgeif0(struct inpcbtable *table, struct ifnet *ifp)
{
        struct inpcb_hdr *inph;
        struct ip6_moptions *im6o;
        struct in6_multi_mship *imm, *nimm;

        KASSERT(ifp != NULL);

        TAILQ_FOREACH(inph, &table->inpt_queue, inph_queue) {
                struct in6pcb *in6p = (struct in6pcb *)inph;
                bool need_unlock = false;
                if (in6p->in6p_af != AF_INET6)
                        continue;

                /* The caller holds either one of in6ps' lock */
                if (!in6p_locked(in6p)) {
                        in6p_lock(in6p);
                        need_unlock = true;
                }
                im6o = in6p->in6p_moptions;
                if (im6o) {
                        /*
                         * Unselect the outgoing interface if it is being
                         * detached.
                         */
                        if (im6o->im6o_multicast_if_index == ifp->if_index)
                                im6o->im6o_multicast_if_index = 0;

                        /*
                         * Drop multicast group membership if we joined
                         * through the interface being detached.
                         * XXX controversial - is it really legal for kernel
                         * to force this?
                         */
                        LIST_FOREACH_SAFE(imm, &im6o->im6o_memberships,
                            i6mm_chain, nimm) {
                                if (imm->i6mm_maddr->in6m_ifp == ifp) {
                                        LIST_REMOVE(imm, i6mm_chain);
                                        in6_leavegroup(imm);
                                }
                        }
                }

                in_purgeifmcast(in6p->in6p_v4moptions, ifp);

                if (need_unlock)
                        in6p_unlock(in6p);
        }
}

void
in6_pcbpurgeif(struct inpcbtable *table, struct ifnet *ifp)
{
        struct rtentry *rt;
        struct inpcb_hdr *inph;

        TAILQ_FOREACH(inph, &table->inpt_queue, inph_queue) {
                struct in6pcb *in6p = (struct in6pcb *)inph;
                if (in6p->in6p_af != AF_INET6)
                        continue;
                if ((rt = rtcache_validate(&in6p->in6p_route)) != NULL &&
                    rt->rt_ifp == ifp) {
                        rtcache_unref(rt, &in6p->in6p_route);
                        in6_rtchange(in6p, 0);
                } else
                        rtcache_unref(rt, &in6p->in6p_route);
        }
}

/*
 * Check for alternatives when higher level complains
 * about service problems.  For now, invalidate cached
 * routing information.  If the route was created dynamically
 * (by a redirect), time to try a default gateway again.
 */
void
in6_losing(struct in6pcb *in6p)
{
        struct rtentry *rt;
        struct rt_addrinfo info;

        if (in6p->in6p_af != AF_INET6)
                return;

        if ((rt = rtcache_validate(&in6p->in6p_route)) == NULL)
                return;

        memset(&info, 0, sizeof(info));
        info.rti_info[RTAX_DST] = rtcache_getdst(&in6p->in6p_route);
        info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
        info.rti_info[RTAX_NETMASK] = rt_mask(rt);
        rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
        if (rt->rt_flags & RTF_DYNAMIC) {
                int error;
                struct rtentry *nrt;

                error = rtrequest(RTM_DELETE, rt_getkey(rt),
                    rt->rt_gateway, rt_mask(rt), rt->rt_flags, &nrt);
                rtcache_unref(rt, &in6p->in6p_route);
                if (error == 0)
                        rt_free(nrt);
        } else
                rtcache_unref(rt, &in6p->in6p_route);
        /*
         * A new route can be allocated
         * the next time output is attempted.
         */
        rtcache_free(&in6p->in6p_route);
}

/*
 * After a routing change, flush old routing.  A new route can be
 * allocated the next time output is attempted.
 */
void
in6_rtchange(struct in6pcb *in6p, int errno)
{
        if (in6p->in6p_af != AF_INET6)
                return;

        rtcache_free(&in6p->in6p_route);
        /*
         * A new route can be allocated the next time
         * output is attempted.
         */
}

struct in6pcb *
in6_pcblookup_port(struct inpcbtable *table, struct in6_addr *laddr6, 
                   u_int lport_arg, int lookup_wildcard, struct vestigial_inpcb *vp)
{
        struct inpcbhead *head;
        struct inpcb_hdr *inph;
        struct in6pcb *in6p, *match = NULL;
        int matchwild = 3, wildcard;
        u_int16_t lport = lport_arg;

        if (vp)
                vp->valid = 0;

        head = IN6PCBHASH_PORT(table, lport);
        LIST_FOREACH(inph, head, inph_lhash) {
                in6p = (struct in6pcb *)inph;
                if (in6p->in6p_af != AF_INET6)
                        continue;

                if (in6p->in6p_lport != lport)
                        continue;
                wildcard = 0;
                if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) {
                        if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0)
                                continue;
                }
                if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr))
                        wildcard++;
                if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr)) {
                        if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0)
                                continue;
                        if (!IN6_IS_ADDR_V4MAPPED(laddr6))
                                continue;

                        /* duplicate of IPv4 logic */
                        wildcard = 0;
                        if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr) &&
                            in6p->in6p_faddr.s6_addr32[3])
                                wildcard++;
                        if (!in6p->in6p_laddr.s6_addr32[3]) {
                                if (laddr6->s6_addr32[3])
                                        wildcard++;
                        } else {
                                if (!laddr6->s6_addr32[3])
                                        wildcard++;
                                else {
                                        if (in6p->in6p_laddr.s6_addr32[3] !=
                                            laddr6->s6_addr32[3])
                                                continue;
                                }
                        }
                } else if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
                        if (IN6_IS_ADDR_V4MAPPED(laddr6)) {
                                if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0)
                                        continue;
                        }
                        if (!IN6_IS_ADDR_UNSPECIFIED(laddr6))
                                wildcard++;
                } else {
                        if (IN6_IS_ADDR_V4MAPPED(laddr6)) {
                                if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0)
                                        continue;
                        }
                        if (IN6_IS_ADDR_UNSPECIFIED(laddr6))
                                wildcard++;
                        else {
                                if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr,
                                    laddr6))
                                        continue;
                        }
                }
                if (wildcard && !lookup_wildcard)
                        continue;
                if (wildcard < matchwild) {
                        match = in6p;
                        matchwild = wildcard;
                        if (matchwild == 0)
                                break;
                }
        }
        if (match && matchwild == 0)
                return match;

        if (vp && table->vestige && table->vestige->init_ports6) {
                struct vestigial_inpcb better;
                void *state;

                state = (*table->vestige->init_ports6)(laddr6,
                                                       lport_arg,
                                                       lookup_wildcard);
                while (table->vestige
                       && (*table->vestige->next_port6)(state, vp)) {

                        if (vp->lport != lport)
                                continue;
                        wildcard = 0;
                        if (!IN6_IS_ADDR_UNSPECIFIED(&vp->faddr.v6))
                                wildcard++;
                        if (IN6_IS_ADDR_UNSPECIFIED(&vp->laddr.v6)) {
                                if (!IN6_IS_ADDR_UNSPECIFIED(laddr6))
                                        wildcard++;
                        } else {
                                if (IN6_IS_ADDR_V4MAPPED(laddr6)) {
                                        if (vp->v6only)
                                                continue;
                                }
                                if (IN6_IS_ADDR_UNSPECIFIED(laddr6))
                                        wildcard++;
                                else {
                                        if (!IN6_ARE_ADDR_EQUAL(&vp->laddr.v6, laddr6))
                                                continue;
                                }
                        }
                        if (wildcard && !lookup_wildcard)
                                continue;
                        if (wildcard < matchwild) {
                                better = *vp;
                                match  = (void*)&better;

                                matchwild = wildcard;
                                if (matchwild == 0)
                                        break;
                        }
                }

                if (match) {
                        if (match != (void*)&better)
                                return match;
                        else {
                                *vp = better;
                                return 0;
                        }
                }
        }
        return (match);
}

/*
 * WARNING: return value (rtentry) could be IPv4 one if in6pcb is connected to
 * IPv4 mapped address.
 */
struct rtentry *
in6_pcbrtentry(struct in6pcb *in6p)
{
        struct rtentry *rt;
        struct route *ro;
        union {
                const struct sockaddr *sa;
                const struct sockaddr_in6 *sa6;
#ifdef INET
                const struct sockaddr_in *sa4;
#endif
        } cdst;

        ro = &in6p->in6p_route;

        if (in6p->in6p_af != AF_INET6)
                return (NULL);

        cdst.sa = rtcache_getdst(ro);
        if (cdst.sa == NULL)
                ;
#ifdef INET
        else if (cdst.sa->sa_family == AF_INET) {
                KASSERT(IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr));
                if (cdst.sa4->sin_addr.s_addr != in6p->in6p_faddr.s6_addr32[3])
                        rtcache_free(ro);
        }
#endif
        else {
                if (!IN6_ARE_ADDR_EQUAL(&cdst.sa6->sin6_addr,
                                        &in6p->in6p_faddr))
                        rtcache_free(ro);
        }
        if ((rt = rtcache_validate(ro)) == NULL)
                rt = rtcache_update(ro, 1);
#ifdef INET
        if (rt == NULL && IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) {
                union {
                        struct sockaddr                dst;
                        struct sockaddr_in        dst4;
                } u;
                struct in_addr addr;

                addr.s_addr = in6p->in6p_faddr.s6_addr32[3];

                sockaddr_in_init(&u.dst4, &addr, 0);
                if (rtcache_setdst(ro, &u.dst) != 0)
                        return NULL;

                rt = rtcache_init(ro);
        } else
#endif
        if (rt == NULL && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
                union {
                        struct sockaddr                dst;
                        struct sockaddr_in6        dst6;
                } u;

                sockaddr_in6_init(&u.dst6, &in6p->in6p_faddr, 0, 0, 0);
                if (rtcache_setdst(ro, &u.dst) != 0)
                        return NULL;

                rt = rtcache_init(ro);
        }
        return rt;
}

void
in6_pcbrtentry_unref(struct rtentry *rt, struct in6pcb *in6p)
{

        rtcache_unref(rt, &in6p->in6p_route);
}

struct in6pcb *
in6_pcblookup_connect(struct inpcbtable *table, const struct in6_addr *faddr6,
                      u_int fport_arg, const struct in6_addr *laddr6, u_int lport_arg,
                      int faith,
                      struct vestigial_inpcb *vp)
{
        struct inpcbhead *head;
        struct inpcb_hdr *inph;
        struct in6pcb *in6p;
        u_int16_t fport = fport_arg, lport = lport_arg;

        if (vp)
                vp->valid = 0;

        head = IN6PCBHASH_CONNECT(table, faddr6, fport, laddr6, lport);
        LIST_FOREACH(inph, head, inph_hash) {
                in6p = (struct in6pcb *)inph;
                if (in6p->in6p_af != AF_INET6)
                        continue;

                /* find exact match on both source and dest */
                if (in6p->in6p_fport != fport)
                        continue;
                if (in6p->in6p_lport != lport)
                        continue;
                if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr))
                        continue;
                if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, faddr6))
                        continue;
                if (IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr))
                        continue;
                if (!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, laddr6))
                        continue;
                if ((IN6_IS_ADDR_V4MAPPED(laddr6) ||
                     IN6_IS_ADDR_V4MAPPED(faddr6)) &&
                    (in6p->in6p_flags & IN6P_IPV6_V6ONLY))
                        continue;
                return in6p;
        }
        if (vp && table->vestige) {
                if ((*table->vestige->lookup6)(faddr6, fport_arg,
                                               laddr6, lport_arg, vp))
                        return NULL;
        }

        return NULL;
}

struct in6pcb *
in6_pcblookup_bind(struct inpcbtable *table, const struct in6_addr *laddr6, 
        u_int lport_arg, int faith)
{
        struct inpcbhead *head;
        struct inpcb_hdr *inph;
        struct in6pcb *in6p;
        u_int16_t lport = lport_arg;
#ifdef INET
        struct in6_addr zero_mapped;
#endif

        head = IN6PCBHASH_BIND(table, laddr6, lport);
        LIST_FOREACH(inph, head, inph_hash) {
                in6p = (struct in6pcb *)inph;
                if (in6p->in6p_af != AF_INET6)
                        continue;

                if (faith && (in6p->in6p_flags & IN6P_FAITH) == 0)
                        continue;
                if (in6p->in6p_fport != 0)
                        continue;
                if (in6p->in6p_lport != lport)
                        continue;
                if (IN6_IS_ADDR_V4MAPPED(laddr6) &&
                    (in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0)
                        continue;
                if (IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, laddr6))
                        goto out;
        }
#ifdef INET
        if (IN6_IS_ADDR_V4MAPPED(laddr6)) {
                memset(&zero_mapped, 0, sizeof(zero_mapped));
                zero_mapped.s6_addr16[5] = 0xffff;
                head = IN6PCBHASH_BIND(table, &zero_mapped, lport);
                LIST_FOREACH(inph, head, inph_hash) {
                        in6p = (struct in6pcb *)inph;
                        if (in6p->in6p_af != AF_INET6)
                                continue;

                        if (faith && (in6p->in6p_flags & IN6P_FAITH) == 0)
                                continue;
                        if (in6p->in6p_fport != 0)
                                continue;
                        if (in6p->in6p_lport != lport)
                                continue;
                        if ((in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0)
                                continue;
                        if (IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &zero_mapped))
                                goto out;
                }
        }
#endif
        head = IN6PCBHASH_BIND(table, &zeroin6_addr, lport);
        LIST_FOREACH(inph, head, inph_hash) {
                in6p = (struct in6pcb *)inph;
                if (in6p->in6p_af != AF_INET6)
                        continue;

                if (faith && (in6p->in6p_flags & IN6P_FAITH) == 0)
                        continue;
                if (in6p->in6p_fport != 0)
                        continue;
                if (in6p->in6p_lport != lport)
                        continue;
                if (IN6_IS_ADDR_V4MAPPED(laddr6) &&
                    (in6p->in6p_flags & IN6P_IPV6_V6ONLY) != 0)
                        continue;
                if (IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &zeroin6_addr))
                        goto out;
        }
        return (NULL);

out:
        inph = &in6p->in6p_head;
        if (inph != LIST_FIRST(head)) {
                LIST_REMOVE(inph, inph_hash);
                LIST_INSERT_HEAD(head, inph, inph_hash);
        }
        return in6p;
}

void
in6_pcbstate(struct in6pcb *in6p, int state)
{

        if (in6p->in6p_af != AF_INET6)
                return;

        if (in6p->in6p_state > IN6P_ATTACHED)
                LIST_REMOVE(&in6p->in6p_head, inph_hash);

        switch (state) {
        case IN6P_BOUND:
                LIST_INSERT_HEAD(IN6PCBHASH_BIND(in6p->in6p_table,
                    &in6p->in6p_laddr, in6p->in6p_lport), &in6p->in6p_head,
                    inph_hash);
                break;
        case IN6P_CONNECTED:
                LIST_INSERT_HEAD(IN6PCBHASH_CONNECT(in6p->in6p_table,
                    &in6p->in6p_faddr, in6p->in6p_fport,
                    &in6p->in6p_laddr, in6p->in6p_lport), &in6p->in6p_head,
                    inph_hash);
                break;
        }

        in6p->in6p_state = state;
}
















































































































  338 











   16 
   16 










  525 
  525 


  526 



  526 












  360 
  360 

  360 



  360 











    3 














































    3 




























































































































    1 



























































































































































































































































































    2 







    2 





    2 















































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
/*        $NetBSD: ufs_quota.c,v 1.117 2014/06/28 22:27:51 dholland Exp $        */

/*
 * Copyright (c) 1982, 1986, 1990, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Robert Elz at The University of Melbourne.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ufs_quota.c        8.5 (Berkeley) 5/20/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_quota.c,v 1.117 2014/06/28 22:27:51 dholland Exp $");

#if defined(_KERNEL_OPT)
#include "opt_quota.h"
#endif 
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/file.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/kauth.h>

#include <sys/quotactl.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_quota.h>

kmutex_t dqlock;
kcondvar_t dqcv;
const char *quotatypes[MAXQUOTAS] = INITQFNAMES;

/*
 * Code pertaining to management of the in-core dquot data structures.
 */
#define DQHASH(dqvp, id) \
        (((((long)(dqvp)) >> 8) + id) & dqhash)
static LIST_HEAD(dqhashhead, dquot) *dqhashtbl;
static u_long dqhash;
static pool_cache_t dquot_cache;


static int quota_handle_cmd_stat(struct mount *, struct lwp *,
    struct quotactl_args *args);
static int quota_handle_cmd_idtypestat(struct mount *, struct lwp *,
    struct quotactl_args *args);
static int quota_handle_cmd_objtypestat(struct mount *, struct lwp *,
    struct quotactl_args *args);
static int quota_handle_cmd_get(struct mount *, struct lwp *,
    struct quotactl_args *args);
static int quota_handle_cmd_put(struct mount *, struct lwp *,
    struct quotactl_args *args);
static int quota_handle_cmd_cursorget(struct mount *, struct lwp *,
    struct quotactl_args *args);
static int quota_handle_cmd_del(struct mount *, struct lwp *,
    struct quotactl_args *args);
static int quota_handle_cmd_quotaon(struct mount *, struct lwp *, 
    struct quotactl_args *args);
static int quota_handle_cmd_quotaoff(struct mount *, struct lwp *, 
    struct quotactl_args *args);
static int quota_handle_cmd_cursoropen(struct mount *, struct lwp *,
    struct quotactl_args *args);
static int quota_handle_cmd_cursorclose(struct mount *, struct lwp *,
    struct quotactl_args *args);
static int quota_handle_cmd_cursorskipidtype(struct mount *, struct lwp *,
    struct quotactl_args *args);
static int quota_handle_cmd_cursoratend(struct mount *, struct lwp *,
    struct quotactl_args *args);
static int quota_handle_cmd_cursorrewind(struct mount *, struct lwp *,
    struct quotactl_args *args);

/*
 * Initialize the quota fields of an inode.
 */
void
ufsquota_init(struct inode *ip)
{
        int i;

        for (i = 0; i < MAXQUOTAS; i++)
                ip->i_dquot[i] = NODQUOT;
}

/*
 * Release the quota fields from an inode.
 */
void
ufsquota_free(struct inode *ip)
{
        int i;

        for (i = 0; i < MAXQUOTAS; i++) {
                dqrele(ITOV(ip), ip->i_dquot[i]);
                ip->i_dquot[i] = NODQUOT;
        }
}

/*
 * Update disk usage, and take corrective action.
 */
int
chkdq(struct inode *ip, int64_t change, kauth_cred_t cred, int flags)
{
        /* do not track snapshot usage, or we will deadlock */
        if ((ip->i_flags & SF_SNAPSHOT) != 0)
                return 0;

#ifdef QUOTA
        if (ip->i_ump->um_flags & UFS_QUOTA)
                return chkdq1(ip, change, cred, flags);
#endif
#ifdef QUOTA2
        if (ip->i_ump->um_flags & UFS_QUOTA2)
                return chkdq2(ip, change, cred, flags);
#endif
        return 0;
}

/*
 * Check the inode limit, applying corrective action.
 */
int
chkiq(struct inode *ip, int32_t change, kauth_cred_t cred, int flags)
{
        /* do not track snapshot usage, or we will deadlock */
        if ((ip->i_flags & SF_SNAPSHOT) != 0)
                return 0;
#ifdef QUOTA
        if (ip->i_ump->um_flags & UFS_QUOTA)
                return chkiq1(ip, change, cred, flags);
#endif
#ifdef QUOTA2
        if (ip->i_ump->um_flags & UFS_QUOTA2)
                return chkiq2(ip, change, cred, flags);
#endif
        return 0;
}

int
quota_handle_cmd(struct mount *mp, struct lwp *l,
                 struct quotactl_args *args)
{
        int error = 0;

        switch (args->qc_op) {
            case QUOTACTL_STAT:
                error = quota_handle_cmd_stat(mp, l, args);
                break;
            case QUOTACTL_IDTYPESTAT:
                error = quota_handle_cmd_idtypestat(mp, l, args);
                break;
            case QUOTACTL_OBJTYPESTAT:
                error = quota_handle_cmd_objtypestat(mp, l, args);
                break;
            case QUOTACTL_QUOTAON:
                error = quota_handle_cmd_quotaon(mp, l, args);
                break;
            case QUOTACTL_QUOTAOFF:
                error = quota_handle_cmd_quotaoff(mp, l, args);
                break;
            case QUOTACTL_GET:
                error = quota_handle_cmd_get(mp, l, args);
                break;
            case QUOTACTL_PUT:
                error = quota_handle_cmd_put(mp, l, args);
                break;
            case QUOTACTL_CURSORGET:
                error = quota_handle_cmd_cursorget(mp, l, args);
                break;
            case QUOTACTL_DEL:
                error = quota_handle_cmd_del(mp, l, args);
                break;
            case QUOTACTL_CURSOROPEN:
                error = quota_handle_cmd_cursoropen(mp, l, args);
                break;
            case QUOTACTL_CURSORCLOSE:
                error = quota_handle_cmd_cursorclose(mp, l, args);
                break;
            case QUOTACTL_CURSORSKIPIDTYPE:
                error = quota_handle_cmd_cursorskipidtype(mp, l, args);
                break;
            case QUOTACTL_CURSORATEND:
                error = quota_handle_cmd_cursoratend(mp, l, args);
                break;
            case QUOTACTL_CURSORREWIND:
                error = quota_handle_cmd_cursorrewind(mp, l, args);
                break;
            default:
                panic("Invalid quotactl operation %d\n", args->qc_op);
        }

        return error;
}

static int 
quota_handle_cmd_stat(struct mount *mp, struct lwp *l, 
    struct quotactl_args *args)
{
        struct ufsmount *ump = VFSTOUFS(mp);
        struct quotastat *info;

        KASSERT(args->qc_op == QUOTACTL_STAT);
        info = args->u.stat.qc_info;

        if ((ump->um_flags & (UFS_QUOTA|UFS_QUOTA2)) == 0)
                return EOPNOTSUPP;

#ifdef QUOTA
        if (ump->um_flags & UFS_QUOTA) {
                strcpy(info->qs_implname, "ufs/ffs quota v1");
                info->qs_numidtypes = MAXQUOTAS;
                /* XXX no define for this */
                info->qs_numobjtypes = 2;
                info->qs_restrictions = 0;
                info->qs_restrictions |= QUOTA_RESTRICT_NEEDSQUOTACHECK;
                info->qs_restrictions |= QUOTA_RESTRICT_UNIFORMGRACE;
                info->qs_restrictions |= QUOTA_RESTRICT_32BIT;
        } else
#endif
#ifdef QUOTA2
        if (ump->um_flags & UFS_QUOTA2) {
                strcpy(info->qs_implname, "ufs/ffs quota v2");
                info->qs_numidtypes = MAXQUOTAS;
                info->qs_numobjtypes = N_QL;
                info->qs_restrictions = 0;
        } else
#endif
                return EOPNOTSUPP;

        return 0;
}

static int 
quota_handle_cmd_idtypestat(struct mount *mp, struct lwp *l, 
    struct quotactl_args *args)
{
        struct ufsmount *ump = VFSTOUFS(mp);
        int idtype;
        struct quotaidtypestat *info;
        const char *name;

        KASSERT(args->qc_op == QUOTACTL_IDTYPESTAT);
        idtype = args->u.idtypestat.qc_idtype;
        info = args->u.idtypestat.qc_info;

        if ((ump->um_flags & (UFS_QUOTA|UFS_QUOTA2)) == 0)
                return EOPNOTSUPP;

        /*
         * These are the same for both QUOTA and QUOTA2.
         */
        switch (idtype) {
            case QUOTA_IDTYPE_USER:
                name = "user";
                break;
            case QUOTA_IDTYPE_GROUP:
                name = "group";
                break;
            default:
                return EINVAL;
        }
        strlcpy(info->qis_name, name, sizeof(info->qis_name));
        return 0;
}

static int 
quota_handle_cmd_objtypestat(struct mount *mp, struct lwp *l, 
    struct quotactl_args *args)
{
        struct ufsmount *ump = VFSTOUFS(mp);
        int objtype;
        struct quotaobjtypestat *info;
        const char *name;
        int isbytes;

        KASSERT(args->qc_op == QUOTACTL_OBJTYPESTAT);
        objtype = args->u.objtypestat.qc_objtype;
        info = args->u.objtypestat.qc_info;

        if ((ump->um_flags & (UFS_QUOTA|UFS_QUOTA2)) == 0)
                return EOPNOTSUPP;

        /*
         * These are the same for both QUOTA and QUOTA2.
         */
        switch (objtype) {
            case QUOTA_OBJTYPE_BLOCKS:
                name = "block";
                isbytes = 1;
                break;
            case QUOTA_OBJTYPE_FILES:
                name = "file";
                isbytes = 0;
                break;
            default:
                return EINVAL;
        }
        strlcpy(info->qos_name, name, sizeof(info->qos_name));
        info->qos_isbytes = isbytes;
        return 0;
}

/* XXX shouldn't all this be in kauth ? */
static int
quota_get_auth(struct mount *mp, struct lwp *l, uid_t id) {
        /* The user can always query about his own quota. */
        if (id == kauth_cred_geteuid(l->l_cred))
                return 0;
        return kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA,
            KAUTH_REQ_SYSTEM_FS_QUOTA_GET, mp, KAUTH_ARG(id), NULL);
}

static int 
quota_handle_cmd_get(struct mount *mp, struct lwp *l, 
    struct quotactl_args *args)
{
        struct ufsmount *ump = VFSTOUFS(mp);
        int error;
        const struct quotakey *qk;
        struct quotaval *qv;

        KASSERT(args->qc_op == QUOTACTL_GET);
        qk = args->u.get.qc_key;
        qv = args->u.get.qc_val;

        if ((ump->um_flags & (UFS_QUOTA|UFS_QUOTA2)) == 0)
                return EOPNOTSUPP;
        
        error = quota_get_auth(mp, l, qk->qk_id);
        if (error != 0) 
                return error;
#ifdef QUOTA
        if (ump->um_flags & UFS_QUOTA) {
                error = quota1_handle_cmd_get(ump, qk, qv);
        } else
#endif
#ifdef QUOTA2
        if (ump->um_flags & UFS_QUOTA2) {
                error = quota2_handle_cmd_get(ump, qk, qv);
        } else
#endif
                panic("quota_handle_cmd_get: no support ?");
                
        if (error != 0)
                return error;

        return error;
}

static int 
quota_handle_cmd_put(struct mount *mp, struct lwp *l, 
    struct quotactl_args *args)
{
        struct ufsmount *ump = VFSTOUFS(mp);
        const struct quotakey *qk;
        const struct quotaval *qv;
        id_t kauth_id;
        int error;

        KASSERT(args->qc_op == QUOTACTL_PUT);
        qk = args->u.put.qc_key;
        qv = args->u.put.qc_val;

        if ((ump->um_flags & (UFS_QUOTA|UFS_QUOTA2)) == 0)
                return EOPNOTSUPP;

        kauth_id = qk->qk_id;
        if (kauth_id == QUOTA_DEFAULTID) {
                kauth_id = 0;
        }

        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA,
            KAUTH_REQ_SYSTEM_FS_QUOTA_MANAGE, mp, KAUTH_ARG(kauth_id),
            NULL);
        if (error != 0) {
                return error;
        }

#ifdef QUOTA
        if (ump->um_flags & UFS_QUOTA)
                error = quota1_handle_cmd_put(ump, qk, qv);
        else
#endif
#ifdef QUOTA2
        if (ump->um_flags & UFS_QUOTA2) {
                error = quota2_handle_cmd_put(ump, qk, qv);
        } else
#endif
                panic("quota_handle_cmd_get: no support ?");
                
        if (error == ENOENT) {
                error = 0;
        }

        return error;
}

static int 
quota_handle_cmd_del(struct mount *mp, struct lwp *l, 
    struct quotactl_args *args)
{
        struct ufsmount *ump = VFSTOUFS(mp);
        const struct quotakey *qk;
        id_t kauth_id;
        int error;

        KASSERT(args->qc_op == QUOTACTL_DEL);
        qk = args->u.del.qc_key;

        kauth_id = qk->qk_id;
        if (kauth_id == QUOTA_DEFAULTID) {
                kauth_id = 0;
        }

        if ((ump->um_flags & UFS_QUOTA2) == 0)
                return EOPNOTSUPP;

        /* avoid whitespace changes */
        {
                error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA,
                    KAUTH_REQ_SYSTEM_FS_QUOTA_MANAGE, mp, KAUTH_ARG(kauth_id),
                    NULL);
                if (error != 0)
                        goto err;
#ifdef QUOTA2
                if (ump->um_flags & UFS_QUOTA2) {
                        error = quota2_handle_cmd_del(ump, qk);
                } else
#endif
                        panic("quota_handle_cmd_get: no support ?");
                
                if (error && error != ENOENT)
                        goto err;
        }

        return 0;
 err:
        return error;
}

static int 
quota_handle_cmd_cursorget(struct mount *mp, struct lwp *l, 
    struct quotactl_args *args)
{
        struct ufsmount *ump = VFSTOUFS(mp);
        int error;

        KASSERT(args->qc_op == QUOTACTL_CURSORGET);

        if ((ump->um_flags & UFS_QUOTA2) == 0)
                return EOPNOTSUPP;
        
        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA,
            KAUTH_REQ_SYSTEM_FS_QUOTA_GET, mp, NULL, NULL);
        if (error)
                return error;
                
#ifdef QUOTA2
        if (ump->um_flags & UFS_QUOTA2) {
                struct quotakcursor *cursor = args->u.cursorget.qc_cursor;
                struct quotakey *keys = args->u.cursorget.qc_keys;
                struct quotaval *vals = args->u.cursorget.qc_vals;
                unsigned maxnum = args->u.cursorget.qc_maxnum;
                unsigned *ret = args->u.cursorget.qc_ret;

                error = quota2_handle_cmd_cursorget(ump, cursor, keys, vals,
                                                    maxnum, ret);
        } else
#endif
                panic("quota_handle_cmd_cursorget: no support ?");

        return error;
}

static int 
quota_handle_cmd_cursoropen(struct mount *mp, struct lwp *l, 
    struct quotactl_args *args)
{
#ifdef QUOTA2
        struct ufsmount *ump = VFSTOUFS(mp);
        struct quotakcursor *cursor = args->u.cursoropen.qc_cursor;
#endif
        int error;

        KASSERT(args->qc_op == QUOTACTL_CURSOROPEN);

        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA,
            KAUTH_REQ_SYSTEM_FS_QUOTA_GET, mp, NULL, NULL);
        if (error)
                return error;

#ifdef QUOTA2
        if (ump->um_flags & UFS_QUOTA2) {
                error = quota2_handle_cmd_cursoropen(ump, cursor);
        } else
#endif
                error = EOPNOTSUPP;

        return error;
}

static int 
quota_handle_cmd_cursorclose(struct mount *mp, struct lwp *l, 
    struct quotactl_args *args)
{
#ifdef QUOTA2
        struct ufsmount *ump = VFSTOUFS(mp);
        struct quotakcursor *cursor = args->u.cursorclose.qc_cursor;
#endif
        int error;

        KASSERT(args->qc_op == QUOTACTL_CURSORCLOSE);

        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA,
            KAUTH_REQ_SYSTEM_FS_QUOTA_GET, mp, NULL, NULL);
        if (error)
                return error;

#ifdef QUOTA2
        if (ump->um_flags & UFS_QUOTA2) {
                error = quota2_handle_cmd_cursorclose(ump, cursor);
        } else
#endif
                error = EOPNOTSUPP;

        return error;
}

static int 
quota_handle_cmd_cursorskipidtype(struct mount *mp, struct lwp *l, 
    struct quotactl_args *args)
{
#ifdef QUOTA2
        struct ufsmount *ump = VFSTOUFS(mp);
        struct quotakcursor *cursor = args->u.cursorskipidtype.qc_cursor;
        int idtype = args->u.cursorskipidtype.qc_idtype;
#endif
        int error;

        KASSERT(args->qc_op == QUOTACTL_CURSORSKIPIDTYPE);

#ifdef QUOTA2
        if (ump->um_flags & UFS_QUOTA2) {
                error = quota2_handle_cmd_cursorskipidtype(ump, cursor, idtype);
        } else
#endif
                error = EOPNOTSUPP;

        return error;
}

static int 
quota_handle_cmd_cursoratend(struct mount *mp, struct lwp *l, 
    struct quotactl_args *args)
{
#ifdef QUOTA2
        struct ufsmount *ump = VFSTOUFS(mp);
        struct quotakcursor *cursor = args->u.cursoratend.qc_cursor;
        unsigned *ret = args->u.cursoratend.qc_ret;
#endif
        int error;

        KASSERT(args->qc_op == QUOTACTL_CURSORATEND);

#ifdef QUOTA2
        if (ump->um_flags & UFS_QUOTA2) {
                error = quota2_handle_cmd_cursoratend(ump, cursor, ret);
        } else
#endif
                error = EOPNOTSUPP;

        return error;
}

static int 
quota_handle_cmd_cursorrewind(struct mount *mp, struct lwp *l, 
    struct quotactl_args *args)
{
#ifdef QUOTA2
        struct ufsmount *ump = VFSTOUFS(mp);
        struct quotakcursor *cursor = args->u.cursorrewind.qc_cursor;
#endif
        int error;

        KASSERT(args->qc_op == QUOTACTL_CURSORREWIND);

#ifdef QUOTA2
        if (ump->um_flags & UFS_QUOTA2) {
                error = quota2_handle_cmd_cursorrewind(ump, cursor);
        } else
#endif
                error = EOPNOTSUPP;

        return error;
}

static int 
quota_handle_cmd_quotaon(struct mount *mp, struct lwp *l, 
    struct quotactl_args *args)
{
        struct ufsmount *ump = VFSTOUFS(mp);
        int error;

        KASSERT(args->qc_op == QUOTACTL_QUOTAON);

        if ((ump->um_flags & UFS_QUOTA2) != 0)
                return EBUSY;
        
        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA,
            KAUTH_REQ_SYSTEM_FS_QUOTA_ONOFF, mp, NULL, NULL);
        if (error != 0) {
                return error;
        }
#ifdef QUOTA
        int idtype = args->u.quotaon.qc_idtype;
        const char *qfile = args->u.quotaon.qc_quotafile;
        error = quota1_handle_cmd_quotaon(l, ump, idtype, qfile);
#else
        error = EOPNOTSUPP;
#endif
        
        return error;
}

static int 
quota_handle_cmd_quotaoff(struct mount *mp, struct lwp *l, 
    struct quotactl_args *args)
{
        struct ufsmount *ump = VFSTOUFS(mp);
        int error;

        KASSERT(args->qc_op == QUOTACTL_QUOTAOFF);

        if ((ump->um_flags & UFS_QUOTA2) != 0)
                return EOPNOTSUPP;
        
        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA,
            KAUTH_REQ_SYSTEM_FS_QUOTA_ONOFF, mp, NULL, NULL);
        if (error != 0) {
                return error;
        }
#ifdef QUOTA
        int idtype = args->u.quotaoff.qc_idtype;
        error = quota1_handle_cmd_quotaoff(l, ump, idtype);
#else
        error = EOPNOTSUPP;
#endif
        
        return error;
}

/*
 * Initialize the quota system.
 */
void
dqinit(void)
{

        mutex_init(&dqlock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&dqcv, "quota");
        dqhashtbl = hashinit(desiredvnodes, HASH_LIST, true, &dqhash);
        dquot_cache = pool_cache_init(sizeof(struct dquot), 0, 0, 0, "ufsdq",
            NULL, IPL_NONE, NULL, NULL, NULL);
}

void
dqreinit(void)
{
        struct dquot *dq;
        struct dqhashhead *oldhash, *hash;
        struct vnode *dqvp;
        u_long oldmask, mask, hashval;
        int i;

        hash = hashinit(desiredvnodes, HASH_LIST, true, &mask);
        mutex_enter(&dqlock);
        oldhash = dqhashtbl;
        oldmask = dqhash;
        dqhashtbl = hash;
        dqhash = mask;
        for (i = 0; i <= oldmask; i++) {
                while ((dq = LIST_FIRST(&oldhash[i])) != NULL) {
                        dqvp = dq->dq_ump->um_quotas[dq->dq_type];
                        LIST_REMOVE(dq, dq_hash);
                        hashval = DQHASH(dqvp, dq->dq_id);
                        LIST_INSERT_HEAD(&dqhashtbl[hashval], dq, dq_hash);
                }
        }
        mutex_exit(&dqlock);
        hashdone(oldhash, HASH_LIST, oldmask);
}

/*
 * Free resources held by quota system.
 */
void
dqdone(void)
{

        pool_cache_destroy(dquot_cache);
        hashdone(dqhashtbl, HASH_LIST, dqhash);
        cv_destroy(&dqcv);
        mutex_destroy(&dqlock);
}

/*
 * Set up the quotas for an inode.
 *
 * This routine completely defines the semantics of quotas.
 * If other criterion want to be used to establish quotas, the
 * MAXQUOTAS value in quotas.h should be increased, and the
 * additional dquots set up here.
 */
int
getinoquota(struct inode *ip)
{
        struct ufsmount *ump = ip->i_ump;
        struct vnode *vp = ITOV(ip);
        int i, error;
        u_int32_t ino_ids[MAXQUOTAS];

        /*
         * To avoid deadlocks never update quotas for quota files
         * on the same file system
         */
        for (i = 0; i < MAXQUOTAS; i++)
                if (vp == ump->um_quotas[i])
                        return 0;

        ino_ids[USRQUOTA] = ip->i_uid;
        ino_ids[GRPQUOTA] = ip->i_gid;
        for (i = 0; i < MAXQUOTAS; i++) {
                /*
                 * If the file id changed the quota needs update.
                 */
                if (ip->i_dquot[i] != NODQUOT &&
                    ip->i_dquot[i]->dq_id != ino_ids[i]) {
                        dqrele(ITOV(ip), ip->i_dquot[i]);
                        ip->i_dquot[i] = NODQUOT;
                }
                /*
                 * Set up the quota based on file id.
                 * ENODEV means that quotas are not enabled.
                 */
                if (ip->i_dquot[i] == NODQUOT &&
                    (error = dqget(vp, ino_ids[i], ump, i, &ip->i_dquot[i])) &&
                    error != ENODEV)
                        return (error);
        }
        return 0;
}

/*
 * Obtain a dquot structure for the specified identifier and quota file
 * reading the information from the file if necessary.
 */
int
dqget(struct vnode *vp, u_long id, struct ufsmount *ump, int type,
    struct dquot **dqp)
{
        struct dquot *dq, *ndq;
        struct dqhashhead *dqh;
        struct vnode *dqvp;
        int error = 0; /* XXX gcc */

        /* Lock to see an up to date value for QTF_CLOSING. */
        mutex_enter(&dqlock);
        if ((ump->um_flags & (UFS_QUOTA|UFS_QUOTA2)) == 0) {
                mutex_exit(&dqlock);
                *dqp = NODQUOT;
                return (ENODEV);
        }
        dqvp = ump->um_quotas[type];
#ifdef QUOTA
        if (ump->um_flags & UFS_QUOTA) {
                if (dqvp == NULLVP || (ump->umq1_qflags[type] & QTF_CLOSING)) {
                        mutex_exit(&dqlock);
                        *dqp = NODQUOT;
                        return (ENODEV);
                }
        }
#endif
#ifdef QUOTA2
        if (ump->um_flags & UFS_QUOTA2) {
                if (dqvp == NULLVP) {
                        mutex_exit(&dqlock);
                        *dqp = NODQUOT;
                        return (ENODEV);
                }
        }
#endif
        KASSERT(dqvp != vp);
        /*
         * Check the cache first.
         */
        dqh = &dqhashtbl[DQHASH(dqvp, id)];
        LIST_FOREACH(dq, dqh, dq_hash) {
                if (dq->dq_id != id ||
                    dq->dq_ump->um_quotas[dq->dq_type] != dqvp)
                        continue;
                KASSERT(dq->dq_cnt > 0);
                dqref(dq);
                mutex_exit(&dqlock);
                *dqp = dq;
                return (0);
        }
        /*
         * Not in cache, allocate a new one.
         */
        mutex_exit(&dqlock);
        ndq = pool_cache_get(dquot_cache, PR_WAITOK);
        /*
         * Initialize the contents of the dquot structure.
         */
        memset((char *)ndq, 0, sizeof *ndq);
        ndq->dq_flags = 0;
        ndq->dq_id = id;
        ndq->dq_ump = ump;
        ndq->dq_type = type;
        mutex_init(&ndq->dq_interlock, MUTEX_DEFAULT, IPL_NONE);
        mutex_enter(&dqlock);
        dqh = &dqhashtbl[DQHASH(dqvp, id)];
        LIST_FOREACH(dq, dqh, dq_hash) {
                if (dq->dq_id != id ||
                    dq->dq_ump->um_quotas[dq->dq_type] != dqvp)
                        continue;
                /*
                 * Another thread beat us allocating this dquot.
                 */
                KASSERT(dq->dq_cnt > 0);
                dqref(dq);
                mutex_exit(&dqlock);
                mutex_destroy(&ndq->dq_interlock);
                pool_cache_put(dquot_cache, ndq);
                *dqp = dq;
                return 0;
        }
        dq = ndq;
        LIST_INSERT_HEAD(dqh, dq, dq_hash);
        dqref(dq);
        mutex_enter(&dq->dq_interlock);
        mutex_exit(&dqlock);
#ifdef QUOTA
        if (ump->um_flags & UFS_QUOTA)
                error = dq1get(dqvp, id, ump, type, dq);
#endif
#ifdef QUOTA2
        if (ump->um_flags & UFS_QUOTA2)
                error = dq2get(dqvp, id, ump, type, dq);
#endif
        /*
         * I/O error in reading quota file, release
         * quota structure and reflect problem to caller.
         */
        if (error) {
                mutex_enter(&dqlock);
                LIST_REMOVE(dq, dq_hash);
                mutex_exit(&dqlock);
                mutex_exit(&dq->dq_interlock);
                dqrele(vp, dq);
                *dqp = NODQUOT;
                return (error);
        }
        mutex_exit(&dq->dq_interlock);
        *dqp = dq;
        return (0);
}

/*
 * Obtain a reference to a dquot.
 */
void
dqref(struct dquot *dq)
{

        KASSERT(mutex_owned(&dqlock));
        dq->dq_cnt++;
        KASSERT(dq->dq_cnt > 0);
}

/*
 * Release a reference to a dquot.
 */
void
dqrele(struct vnode *vp, struct dquot *dq)
{

        if (dq == NODQUOT)
                return;
        mutex_enter(&dq->dq_interlock);
        for (;;) {
                mutex_enter(&dqlock);
                if (dq->dq_cnt > 1) {
                        dq->dq_cnt--;
                        mutex_exit(&dqlock);
                        mutex_exit(&dq->dq_interlock);
                        return;
                }
                if ((dq->dq_flags & DQ_MOD) == 0)
                        break;
                mutex_exit(&dqlock);
#ifdef QUOTA
                if (dq->dq_ump->um_flags & UFS_QUOTA)
                        (void) dq1sync(vp, dq);
#endif
#ifdef QUOTA2
                if (dq->dq_ump->um_flags & UFS_QUOTA2)
                        (void) dq2sync(vp, dq);
#endif
        }
        KASSERT(dq->dq_cnt == 1 && (dq->dq_flags & DQ_MOD) == 0);
        LIST_REMOVE(dq, dq_hash);
        mutex_exit(&dqlock);
        mutex_exit(&dq->dq_interlock);
        mutex_destroy(&dq->dq_interlock);
        pool_cache_put(dquot_cache, dq);
}

int
qsync(struct mount *mp)
{
        struct ufsmount *ump = VFSTOUFS(mp);
#ifdef QUOTA
        if (ump->um_flags & UFS_QUOTA)
                return q1sync(mp);
#endif
#ifdef QUOTA2
        if (ump->um_flags & UFS_QUOTA2)
                return q2sync(mp);
#endif
        return 0;
}

#ifdef DIAGNOSTIC
/*
 * Check the hash chains for stray dquot's.
 */
void
dqflush(struct vnode *vp)
{
        struct dquot *dq;
        int i;

        mutex_enter(&dqlock);
        for (i = 0; i <= dqhash; i++)
                LIST_FOREACH(dq, &dqhashtbl[i], dq_hash)
                        KASSERT(dq->dq_ump->um_quotas[dq->dq_type] != vp);
        mutex_exit(&dqlock);
}
#endif
































































































































































































































































































   30 

   30 



























































  837 



















   60 
   60 
   60 








  836 
  836 

  836 








  835 



  834 
  835 

  835 
  835 







  834 






















































































































































  147 






























































  143 


  838 


  838 








  837 









  142 




    2 











    2 






















  835 














  834 

   24 

   24 






  832 


  833 
  470 

  762 






  834 
  832 
  834 




















  531 




  528 
  529 
  468 





















  735 






  733 










  730 



  733 
  362 





















  403 
  403 

  403 









  404 
  403 
  383 

  379 




  404 













  404 
  403 











  403 

    1 






  404 



  244 











  403 
  403 
  403 



  403 

    1 





















  363 






















  458 

  457 
  458 


  458 




  458 
  458 












  458 

  457 
  250 








  443 


  458 












    1 
    1 

    1 













  837 

  836 
  836 
  837 

  836 


  837 


  837 



  143 


  836 
  461 

    2 
  463 







  837 



  837 
    3 

  837 






   46 





   46 







  837 




   60 









   60 


   60 
   59 







   60 
















  836 


  835 










  835 



















  835 






  773 

  398 














  839 


  838 
  838 
  834 
  835 




  838 
















  839 


  838 

  835 

  129 




  837 

  833 

  833 
  835 


  832 


  147 


  145 





  145 






  838 








  837 









  837 










































  837 
























  140 

  140 

  140 






















  140 


  837 


  837 

















  147 


  147 




  147 
  147 


  147 

















































  145 







































































































































    2 

    2 






    2 












  837 

  838 

  837 







  414 

  838 





  838 

























   37 

   36 










   37 










   37 




   37 



   37 





    2 

   36 




    7 
    7 


   32 








   32 



































































































































































































































































































































  462 






  463 

























































   13 



   13 










   12 
   12 
















   72 


   29 

   28 


   29 
   28 

   28 


    1 






  464 

















   80 








  834 

  834 

  835 




  129 



  129 


  129 






  129 

  129 


  835 



  835 

  834 












1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
/*        $NetBSD: vfs_bio.c,v 1.303 2022/03/30 14:54:29 riastradh Exp $        */

/*-
 * Copyright (c) 2007, 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran, and by Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vfs_bio.c        8.6 (Berkeley) 1/11/94
 */

/*-
 * Copyright (c) 1994 Christopher G. Demetriou
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vfs_bio.c        8.6 (Berkeley) 1/11/94
 */

/*
 * The buffer cache subsystem.
 *
 * Some references:
 *        Bach: The Design of the UNIX Operating System (Prentice Hall, 1986)
 *        Leffler, et al.: The Design and Implementation of the 4.3BSD
 *                UNIX Operating System (Addison Welley, 1989)
 *
 * Locking
 *
 * There are three locks:
 * - bufcache_lock: protects global buffer cache state.
 * - BC_BUSY: a long term per-buffer lock.
 * - buf_t::b_objlock: lock on completion (biowait vs biodone).
 *
 * For buffers associated with vnodes (a most common case) b_objlock points
 * to the vnode_t::v_interlock.  Otherwise, it points to generic buffer_lock.
 *
 * Lock order:
 *        bufcache_lock ->
 *                buf_t::b_objlock
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.303 2022/03/30 14:54:29 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_bufcache.h"
#include "opt_dtrace.h"
#include "opt_biohist.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/resourcevar.h>
#include <sys/sysctl.h>
#include <sys/conf.h>
#include <sys/kauth.h>
#include <sys/fstrans.h>
#include <sys/intr.h>
#include <sys/cpu.h>
#include <sys/wapbl.h>
#include <sys/bitops.h>
#include <sys/cprng.h>
#include <sys/sdt.h>

#include <uvm/uvm.h>        /* extern struct uvm uvm */

#include <miscfs/specfs/specdev.h>

SDT_PROVIDER_DEFINE(io);

SDT_PROBE_DEFINE4(io, kernel, , bbusy__start,
    "struct buf *"/*bp*/,
    "bool"/*intr*/, "int"/*timo*/, "kmutex_t *"/*interlock*/);
SDT_PROBE_DEFINE5(io, kernel, , bbusy__done,
    "struct buf *"/*bp*/,
    "bool"/*intr*/,
    "int"/*timo*/,
    "kmutex_t *"/*interlock*/,
    "int"/*error*/);
SDT_PROBE_DEFINE0(io, kernel, , getnewbuf__start);
SDT_PROBE_DEFINE1(io, kernel, , getnewbuf__done,  "struct buf *"/*bp*/);
SDT_PROBE_DEFINE3(io, kernel, , getblk__start,
    "struct vnode *"/*vp*/, "daddr_t"/*blkno*/, "int"/*size*/);
SDT_PROBE_DEFINE4(io, kernel, , getblk__done,
    "struct vnode *"/*vp*/, "daddr_t"/*blkno*/, "int"/*size*/,
    "struct buf *"/*bp*/);
SDT_PROBE_DEFINE2(io, kernel, , brelse, "struct buf *"/*bp*/, "int"/*set*/);
SDT_PROBE_DEFINE1(io, kernel, , wait__start, "struct buf *"/*bp*/);
SDT_PROBE_DEFINE1(io, kernel, , wait__done, "struct buf *"/*bp*/);

#ifndef        BUFPAGES
# define BUFPAGES 0
#endif

#ifdef BUFCACHE
# if (BUFCACHE < 5) || (BUFCACHE > 95)
#  error BUFCACHE is not between 5 and 95
# endif
#else
# define BUFCACHE 15
#endif

u_int        nbuf;                        /* desired number of buffer headers */
u_int        bufpages = BUFPAGES;        /* optional hardwired count */
u_int        bufcache = BUFCACHE;        /* max % of RAM to use for buffer cache */

/*
 * Definitions for the buffer free lists.
 */
#define        BQUEUES                3                /* number of free buffer queues */

#define        BQ_LOCKED        0                /* super-blocks &c */
#define        BQ_LRU                1                /* lru, useful buffers */
#define        BQ_AGE                2                /* rubbish */

struct bqueue {
        TAILQ_HEAD(, buf) bq_queue;
        uint64_t bq_bytes;
        buf_t *bq_marker;
};
static struct bqueue bufqueues[BQUEUES] __cacheline_aligned;

/* Function prototypes */
static void buf_setwm(void);
static int buf_trim(void);
static void *bufpool_page_alloc(struct pool *, int);
static void bufpool_page_free(struct pool *, void *);
static buf_t *bio_doread(struct vnode *, daddr_t, int, int);
static buf_t *getnewbuf(int, int, int);
static int buf_lotsfree(void);
static int buf_canrelease(void);
static u_long buf_mempoolidx(u_long);
static u_long buf_roundsize(u_long);
static void *buf_alloc(size_t);
static void buf_mrelease(void *, size_t);
static void binsheadfree(buf_t *, struct bqueue *);
static void binstailfree(buf_t *, struct bqueue *);
#ifdef DEBUG
static int checkfreelist(buf_t *, struct bqueue *, int);
#endif
static void biointr(void *);
static void biodone2(buf_t *);
static void sysctl_kern_buf_setup(void);
static void sysctl_vm_buf_setup(void);

/* Initialization for biohist */

#include <sys/biohist.h>

BIOHIST_DEFINE(biohist);

void
biohist_init(void)
{

        BIOHIST_INIT(biohist, BIOHIST_SIZE);
}

/*
 * Definitions for the buffer hash lists.
 */
#define        BUFHASH(dvp, lbn)        \
        (&bufhashtbl[(((long)(dvp) >> 8) + (int)(lbn)) & bufhash])
LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash;
u_long        bufhash;

static int     bufhash_stats(struct hashstat_sysctl *, bool);

static kcondvar_t needbuffer_cv;

/*
 * Buffer queue lock.
 */
kmutex_t bufcache_lock __cacheline_aligned;
kmutex_t buffer_lock __cacheline_aligned;

/* Software ISR for completed transfers. */
static void *biodone_sih;

/* Buffer pool for I/O buffers. */
static pool_cache_t buf_cache;
static pool_cache_t bufio_cache;

#define MEMPOOL_INDEX_OFFSET (ilog2(DEV_BSIZE))        /* smallest pool is 512 bytes */
#define NMEMPOOLS (ilog2(MAXBSIZE) - MEMPOOL_INDEX_OFFSET + 1)
__CTASSERT((1 << (NMEMPOOLS + MEMPOOL_INDEX_OFFSET - 1)) == MAXBSIZE);

/* Buffer memory pools */
static struct pool bmempools[NMEMPOOLS];

static struct vm_map *buf_map;

/*
 * Buffer memory pool allocator.
 */
static void *
bufpool_page_alloc(struct pool *pp, int flags)
{

        return (void *)uvm_km_alloc(buf_map,
            MAXBSIZE, MAXBSIZE,
            ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT|UVM_KMF_TRYLOCK)
            | UVM_KMF_WIRED);
}

static void
bufpool_page_free(struct pool *pp, void *v)
{

        uvm_km_free(buf_map, (vaddr_t)v, MAXBSIZE, UVM_KMF_WIRED);
}

static struct pool_allocator bufmempool_allocator = {
        .pa_alloc = bufpool_page_alloc,
        .pa_free = bufpool_page_free,
        .pa_pagesz = MAXBSIZE,
};

/* Buffer memory management variables */
u_long bufmem_valimit;
u_long bufmem_hiwater;
u_long bufmem_lowater;
u_long bufmem;

/*
 * MD code can call this to set a hard limit on the amount
 * of virtual memory used by the buffer cache.
 */
int
buf_setvalimit(vsize_t sz)
{

        /* We need to accommodate at least NMEMPOOLS of MAXBSIZE each */
        if (sz < NMEMPOOLS * MAXBSIZE)
                return EINVAL;

        bufmem_valimit = sz;
        return 0;
}

static void
buf_setwm(void)
{

        bufmem_hiwater = buf_memcalc();
        /* lowater is approx. 2% of memory (with bufcache = 15) */
#define        BUFMEM_WMSHIFT        3
#define        BUFMEM_HIWMMIN        (64 * 1024 << BUFMEM_WMSHIFT)
        if (bufmem_hiwater < BUFMEM_HIWMMIN)
                /* Ensure a reasonable minimum value */
                bufmem_hiwater = BUFMEM_HIWMMIN;
        bufmem_lowater = bufmem_hiwater >> BUFMEM_WMSHIFT;
}

#ifdef DEBUG
int debug_verify_freelist = 0;
static int
checkfreelist(buf_t *bp, struct bqueue *dp, int ison)
{
        buf_t *b;

        if (!debug_verify_freelist)
                return 1;

        TAILQ_FOREACH(b, &dp->bq_queue, b_freelist) {
                if (b == bp)
                        return ison ? 1 : 0;
        }

        return ison ? 0 : 1;
}
#endif

/*
 * Insq/Remq for the buffer hash lists.
 * Call with buffer queue locked.
 */
static void
binsheadfree(buf_t *bp, struct bqueue *dp)
{

        KASSERT(mutex_owned(&bufcache_lock));
        KASSERT(bp->b_freelistindex == -1);
        TAILQ_INSERT_HEAD(&dp->bq_queue, bp, b_freelist);
        dp->bq_bytes += bp->b_bufsize;
        bp->b_freelistindex = dp - bufqueues;
}

static void
binstailfree(buf_t *bp, struct bqueue *dp)
{

        KASSERT(mutex_owned(&bufcache_lock));
        KASSERTMSG(bp->b_freelistindex == -1, "double free of buffer? "
            "bp=%p, b_freelistindex=%d\n", bp, bp->b_freelistindex);
        TAILQ_INSERT_TAIL(&dp->bq_queue, bp, b_freelist);
        dp->bq_bytes += bp->b_bufsize;
        bp->b_freelistindex = dp - bufqueues;
}

void
bremfree(buf_t *bp)
{
        struct bqueue *dp;
        int bqidx = bp->b_freelistindex;

        KASSERT(mutex_owned(&bufcache_lock));

        KASSERT(bqidx != -1);
        dp = &bufqueues[bqidx];
        KDASSERT(checkfreelist(bp, dp, 1));
        KASSERT(dp->bq_bytes >= bp->b_bufsize);
        TAILQ_REMOVE(&dp->bq_queue, bp, b_freelist);
        dp->bq_bytes -= bp->b_bufsize;

        /* For the sysctl helper. */
        if (bp == dp->bq_marker)
                dp->bq_marker = NULL;

#if defined(DIAGNOSTIC)
        bp->b_freelistindex = -1;
#endif /* defined(DIAGNOSTIC) */
}

/*
 * note that for some ports this is used by pmap bootstrap code to
 * determine kva size.
 */
u_long
buf_memcalc(void)
{
        u_long n;
        vsize_t mapsz = 0;

        /*
         * Determine the upper bound of memory to use for buffers.
         *
         *        - If bufpages is specified, use that as the number
         *          pages.
         *
         *        - Otherwise, use bufcache as the percentage of
         *          physical memory.
         */
        if (bufpages != 0) {
                n = bufpages;
        } else {
                if (bufcache < 5) {
                        printf("forcing bufcache %d -> 5", bufcache);
                        bufcache = 5;
                }
                if (bufcache > 95) {
                        printf("forcing bufcache %d -> 95", bufcache);
                        bufcache = 95;
                }
                if (buf_map != NULL)
                        mapsz = vm_map_max(buf_map) - vm_map_min(buf_map);
                n = calc_cache_size(mapsz, bufcache,
                    (buf_map != kernel_map) ? 100 : BUFCACHE_VA_MAXPCT)
                    / PAGE_SIZE;
        }

        n <<= PAGE_SHIFT;
        if (bufmem_valimit != 0 && n > bufmem_valimit)
                n = bufmem_valimit;

        return (n);
}

/*
 * Initialize buffers and hash links for buffers.
 */
void
bufinit(void)
{
        struct bqueue *dp;
        int use_std;
        u_int i;

        biodone_vfs = biodone;

        mutex_init(&bufcache_lock, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&buffer_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&needbuffer_cv, "needbuf");

        if (bufmem_valimit != 0) {
                vaddr_t minaddr = 0, maxaddr;
                buf_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
                                          bufmem_valimit, 0, false, 0);
                if (buf_map == NULL)
                        panic("bufinit: cannot allocate submap");
        } else
                buf_map = kernel_map;

        /*
         * Initialize buffer cache memory parameters.
         */
        bufmem = 0;
        buf_setwm();

        /* On "small" machines use small pool page sizes where possible */
        use_std = (physmem < atop(16*1024*1024));

        /*
         * Also use them on systems that can map the pool pages using
         * a direct-mapped segment.
         */
#ifdef PMAP_MAP_POOLPAGE
        use_std = 1;
#endif

        buf_cache = pool_cache_init(sizeof(buf_t), 0, 0, 0,
            "bufpl", NULL, IPL_SOFTBIO, NULL, NULL, NULL);
        bufio_cache = pool_cache_init(sizeof(buf_t), 0, 0, 0,
            "biopl", NULL, IPL_BIO, NULL, NULL, NULL);

        for (i = 0; i < NMEMPOOLS; i++) {
                struct pool_allocator *pa;
                struct pool *pp = &bmempools[i];
                u_int size = 1 << (i + MEMPOOL_INDEX_OFFSET);
                char *name = kmem_alloc(8, KM_SLEEP); /* XXX: never freed */
                if (__predict_false(size >= 1048576))
                        (void)snprintf(name, 8, "buf%um", size / 1048576);
                else if (__predict_true(size >= 1024))
                        (void)snprintf(name, 8, "buf%uk", size / 1024);
                else
                        (void)snprintf(name, 8, "buf%ub", size);
                pa = (size <= PAGE_SIZE && use_std)
                        ? &pool_allocator_nointr
                        : &bufmempool_allocator;
                pool_init(pp, size, DEV_BSIZE, 0, 0, name, pa, IPL_NONE);
                pool_setlowat(pp, 1);
                pool_sethiwat(pp, 1);
        }

        /* Initialize the buffer queues */
        for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) {
                TAILQ_INIT(&dp->bq_queue);
                dp->bq_bytes = 0;
        }

        /*
         * Estimate hash table size based on the amount of memory we
         * intend to use for the buffer cache. The average buffer
         * size is dependent on our clients (i.e. filesystems).
         *
         * For now, use an empirical 3K per buffer.
         */
        nbuf = (bufmem_hiwater / 1024) / 3;
        bufhashtbl = hashinit(nbuf, HASH_LIST, true, &bufhash);

        sysctl_kern_buf_setup();
        sysctl_vm_buf_setup();
        hashstat_register("bufhash", bufhash_stats);
}

void
bufinit2(void)
{

        biodone_sih = softint_establish(SOFTINT_BIO | SOFTINT_MPSAFE, biointr,
            NULL);
        if (biodone_sih == NULL)
                panic("bufinit2: can't establish soft interrupt");
}

static int
buf_lotsfree(void)
{
        u_long guess;

        /* Always allocate if less than the low water mark. */
        if (bufmem < bufmem_lowater)
                return 1;

        /* Never allocate if greater than the high water mark. */
        if (bufmem > bufmem_hiwater)
                return 0;

        /* If there's anything on the AGE list, it should be eaten. */
        if (TAILQ_FIRST(&bufqueues[BQ_AGE].bq_queue) != NULL)
                return 0;

        /*
         * The probabily of getting a new allocation is inversely
         * proportional  to the current size of the cache above
         * the low water mark.  Divide the total first to avoid overflows
         * in the product.
         */
        guess = cprng_fast32() % 16;

        if ((bufmem_hiwater - bufmem_lowater) / 16 * guess >=
            (bufmem - bufmem_lowater))
                return 1;

        /* Otherwise don't allocate. */
        return 0;
}

/*
 * Return estimate of bytes we think need to be
 * released to help resolve low memory conditions.
 *
 * => called with bufcache_lock held.
 */
static int
buf_canrelease(void)
{
        int pagedemand, ninvalid = 0;

        KASSERT(mutex_owned(&bufcache_lock));

        if (bufmem < bufmem_lowater)
                return 0;

        if (bufmem > bufmem_hiwater)
                return bufmem - bufmem_hiwater;

        ninvalid += bufqueues[BQ_AGE].bq_bytes;

        pagedemand = uvmexp.freetarg - uvm_availmem(false);
        if (pagedemand < 0)
                return ninvalid;
        return MAX(ninvalid, MIN(2 * MAXBSIZE,
            MIN((bufmem - bufmem_lowater) / 16, pagedemand * PAGE_SIZE)));
}

/*
 * Buffer memory allocation helper functions
 */
static u_long
buf_mempoolidx(u_long size)
{
        u_int n = 0;

        size -= 1;
        size >>= MEMPOOL_INDEX_OFFSET;
        while (size) {
                size >>= 1;
                n += 1;
        }
        if (n >= NMEMPOOLS)
                panic("buf mem pool index %d", n);
        return n;
}

static u_long
buf_roundsize(u_long size)
{
        /* Round up to nearest power of 2 */
        return (1 << (buf_mempoolidx(size) + MEMPOOL_INDEX_OFFSET));
}

static void *
buf_alloc(size_t size)
{
        u_int n = buf_mempoolidx(size);
        void *addr;

        while (1) {
                addr = pool_get(&bmempools[n], PR_NOWAIT);
                if (addr != NULL)
                        break;

                /* No memory, see if we can free some. If so, try again */
                mutex_enter(&bufcache_lock);
                if (buf_drain(1) > 0) {
                        mutex_exit(&bufcache_lock);
                        continue;
                }

                if (curlwp == uvm.pagedaemon_lwp) {
                        mutex_exit(&bufcache_lock);
                        return NULL;
                }

                /* Wait for buffers to arrive on the LRU queue */
                cv_timedwait(&needbuffer_cv, &bufcache_lock, hz / 4);
                mutex_exit(&bufcache_lock);
        }

        return addr;
}

static void
buf_mrelease(void *addr, size_t size)
{

        pool_put(&bmempools[buf_mempoolidx(size)], addr);
}

/*
 * bread()/breadn() helper.
 */
static buf_t *
bio_doread(struct vnode *vp, daddr_t blkno, int size, int async)
{
        buf_t *bp;
        struct mount *mp;

        bp = getblk(vp, blkno, size, 0, 0);

        /*
         * getblk() may return NULL if we are the pagedaemon.
         */
        if (bp == NULL) {
                KASSERT(curlwp == uvm.pagedaemon_lwp);
                return NULL;
        }

        /*
         * If buffer does not have data valid, start a read.
         * Note that if buffer is BC_INVAL, getblk() won't return it.
         * Therefore, it's valid if its I/O has completed or been delayed.
         */
        if (!ISSET(bp->b_oflags, (BO_DONE | BO_DELWRI))) {
                /* Start I/O for the buffer. */
                SET(bp->b_flags, B_READ | async);
                if (async)
                        BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
                else
                        BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
                VOP_STRATEGY(vp, bp);

                /* Pay for the read. */
                curlwp->l_ru.ru_inblock++;
        } else if (async)
                brelse(bp, 0);

        if (vp->v_type == VBLK)
                mp = spec_node_getmountedfs(vp);
        else
                mp = vp->v_mount;

        /*
         * Collect statistics on synchronous and asynchronous reads.
         * Reads from block devices are charged to their associated
         * filesystem (if any).
         */
        if (mp != NULL) {
                if (async == 0)
                        mp->mnt_stat.f_syncreads++;
                else
                        mp->mnt_stat.f_asyncreads++;
        }

        return (bp);
}

/*
 * Read a disk block.
 * This algorithm described in Bach (p.54).
 */
int
bread(struct vnode *vp, daddr_t blkno, int size, int flags, buf_t **bpp)
{
        buf_t *bp;
        int error;

        BIOHIST_FUNC(__func__); BIOHIST_CALLED(biohist);

        /* Get buffer for block. */
        bp = *bpp = bio_doread(vp, blkno, size, 0);
        if (bp == NULL)
                return ENOMEM;

        /* Wait for the read to complete, and return result. */
        error = biowait(bp);
        if (error == 0 && (flags & B_MODIFY) != 0)
                error = fscow_run(bp, true);
        if (error) {
                brelse(bp, 0);
                *bpp = NULL;
        }

        return error;
}

/*
 * Read-ahead multiple disk blocks. The first is sync, the rest async.
 * Trivial modification to the breada algorithm presented in Bach (p.55).
 */
int
breadn(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablks,
    int *rasizes, int nrablks, int flags, buf_t **bpp)
{
        buf_t *bp;
        int error, i;

        BIOHIST_FUNC(__func__); BIOHIST_CALLED(biohist);

        bp = *bpp = bio_doread(vp, blkno, size, 0);
        if (bp == NULL)
                return ENOMEM;

        /*
         * For each of the read-ahead blocks, start a read, if necessary.
         */
        mutex_enter(&bufcache_lock);
        for (i = 0; i < nrablks; i++) {
                /* If it's in the cache, just go on to next one. */
                if (incore(vp, rablks[i]))
                        continue;

                /* Get a buffer for the read-ahead block */
                mutex_exit(&bufcache_lock);
                (void) bio_doread(vp, rablks[i], rasizes[i], B_ASYNC);
                mutex_enter(&bufcache_lock);
        }
        mutex_exit(&bufcache_lock);

        /* Otherwise, we had to start a read for it; wait until it's valid. */
        error = biowait(bp);
        if (error == 0 && (flags & B_MODIFY) != 0)
                error = fscow_run(bp, true);
        if (error) {
                brelse(bp, 0);
                *bpp = NULL;
        }

        return error;
}

/*
 * Block write.  Described in Bach (p.56)
 */
int
bwrite(buf_t *bp)
{
        int rv, sync, wasdelayed;
        struct vnode *vp;
        struct mount *mp;

        BIOHIST_FUNC(__func__); BIOHIST_CALLARGS(biohist, "bp=%#jx",
            (uintptr_t)bp, 0, 0, 0);

        KASSERT(ISSET(bp->b_cflags, BC_BUSY));
        KASSERT(!cv_has_waiters(&bp->b_done));

        vp = bp->b_vp;

        /*
         * dholland 20160728 AFAICT vp==NULL must be impossible as it
         * will crash upon reaching VOP_STRATEGY below... see further
         * analysis on tech-kern.
         */
        KASSERTMSG(vp != NULL, "bwrite given buffer with null vnode");

        if (vp != NULL) {
                KASSERT(bp->b_objlock == vp->v_interlock);
                if (vp->v_type == VBLK)
                        mp = spec_node_getmountedfs(vp);
                else
                        mp = vp->v_mount;
        } else {
                mp = NULL;
        }

        if (mp && mp->mnt_wapbl) {
                if (bp->b_iodone != mp->mnt_wapbl_op->wo_wapbl_biodone) {
                        bdwrite(bp);
                        return 0;
                }
        }

        /*
         * Remember buffer type, to switch on it later.  If the write was
         * synchronous, but the file system was mounted with MNT_ASYNC,
         * convert it to a delayed write.
         * XXX note that this relies on delayed tape writes being converted
         * to async, not sync writes (which is safe, but ugly).
         */
        sync = !ISSET(bp->b_flags, B_ASYNC);
        if (sync && mp != NULL && ISSET(mp->mnt_flag, MNT_ASYNC)) {
                bdwrite(bp);
                return (0);
        }

        /*
         * Collect statistics on synchronous and asynchronous writes.
         * Writes to block devices are charged to their associated
         * filesystem (if any).
         */
        if (mp != NULL) {
                if (sync)
                        mp->mnt_stat.f_syncwrites++;
                else
                        mp->mnt_stat.f_asyncwrites++;
        }

        /*
         * Pay for the I/O operation and make sure the buf is on the correct
         * vnode queue.
         */
        bp->b_error = 0;
        wasdelayed = ISSET(bp->b_oflags, BO_DELWRI);
        CLR(bp->b_flags, B_READ);
        if (wasdelayed) {
                mutex_enter(&bufcache_lock);
                mutex_enter(bp->b_objlock);
                CLR(bp->b_oflags, BO_DONE | BO_DELWRI);
                reassignbuf(bp, bp->b_vp);
                /* Wake anyone trying to busy the buffer via vnode's lists. */
                cv_broadcast(&bp->b_busy);
                mutex_exit(&bufcache_lock);
        } else {
                curlwp->l_ru.ru_oublock++;
                mutex_enter(bp->b_objlock);
                CLR(bp->b_oflags, BO_DONE | BO_DELWRI);
        }
        if (vp != NULL)
                vp->v_numoutput++;
        mutex_exit(bp->b_objlock);

        /* Initiate disk write. */
        if (sync)
                BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
        else
                BIO_SETPRIO(bp, BPRIO_TIMELIMITED);

        VOP_STRATEGY(vp, bp);

        if (sync) {
                /* If I/O was synchronous, wait for it to complete. */
                rv = biowait(bp);

                /* Release the buffer. */
                brelse(bp, 0);

                return (rv);
        } else {
                return (0);
        }
}

int
vn_bwrite(void *v)
{
        struct vop_bwrite_args *ap = v;

        return (bwrite(ap->a_bp));
}

/*
 * Delayed write.
 *
 * The buffer is marked dirty, but is not queued for I/O.
 * This routine should be used when the buffer is expected
 * to be modified again soon, typically a small write that
 * partially fills a buffer.
 *
 * NB: magnetic tapes cannot be delayed; they must be
 * written in the order that the writes are requested.
 *
 * Described in Leffler, et al. (pp. 208-213).
 */
void
bdwrite(buf_t *bp)
{

        BIOHIST_FUNC(__func__); BIOHIST_CALLARGS(biohist, "bp=%#jx",
            (uintptr_t)bp, 0, 0, 0);

        KASSERT(bp->b_vp == NULL || bp->b_vp->v_tag != VT_UFS ||
            bp->b_vp->v_type == VBLK || ISSET(bp->b_flags, B_COWDONE));
        KASSERT(ISSET(bp->b_cflags, BC_BUSY));
        KASSERT(!cv_has_waiters(&bp->b_done));

        /* If this is a tape block, write the block now. */
        if (bdev_type(bp->b_dev) == D_TAPE) {
                bawrite(bp);
                return;
        }

        if (wapbl_vphaswapbl(bp->b_vp)) {
                struct mount *mp = wapbl_vptomp(bp->b_vp);

                if (bp->b_iodone != mp->mnt_wapbl_op->wo_wapbl_biodone) {
                        WAPBL_ADD_BUF(mp, bp);
                }
        }

        /*
         * If the block hasn't been seen before:
         *        (1) Mark it as having been seen,
         *        (2) Charge for the write,
         *        (3) Make sure it's on its vnode's correct block list.
         */
        KASSERT(bp->b_vp == NULL || bp->b_objlock == bp->b_vp->v_interlock);

        if (!ISSET(bp->b_oflags, BO_DELWRI)) {
                mutex_enter(&bufcache_lock);
                mutex_enter(bp->b_objlock);
                SET(bp->b_oflags, BO_DELWRI);
                curlwp->l_ru.ru_oublock++;
                reassignbuf(bp, bp->b_vp);
                /* Wake anyone trying to busy the buffer via vnode's lists. */
                cv_broadcast(&bp->b_busy);
                mutex_exit(&bufcache_lock);
        } else {
                mutex_enter(bp->b_objlock);
        }
        /* Otherwise, the "write" is done, so mark and release the buffer. */
        CLR(bp->b_oflags, BO_DONE);
        mutex_exit(bp->b_objlock);

        brelse(bp, 0);
}

/*
 * Asynchronous block write; just an asynchronous bwrite().
 */
void
bawrite(buf_t *bp)
{

        KASSERT(ISSET(bp->b_cflags, BC_BUSY));
        KASSERT(bp->b_vp != NULL);

        SET(bp->b_flags, B_ASYNC);
        VOP_BWRITE(bp->b_vp, bp);
}

/*
 * Release a buffer on to the free lists.
 * Described in Bach (p. 46).
 */
void
brelsel(buf_t *bp, int set)
{
        struct bqueue *bufq;
        struct vnode *vp;

        SDT_PROBE2(io, kernel, , brelse,  bp, set);

        KASSERT(bp != NULL);
        KASSERT(mutex_owned(&bufcache_lock));
        KASSERT(!cv_has_waiters(&bp->b_done));

        SET(bp->b_cflags, set);

        KASSERT(ISSET(bp->b_cflags, BC_BUSY));
        KASSERT(bp->b_iodone == NULL);

        /* Wake up any processes waiting for any buffer to become free. */
        cv_signal(&needbuffer_cv);

        /* Wake up any proceeses waiting for _this_ buffer to become free */
        if (ISSET(bp->b_cflags, BC_WANTED))
                CLR(bp->b_cflags, BC_WANTED|BC_AGE);

        /* If it's clean clear the copy-on-write flag. */
        if (ISSET(bp->b_flags, B_COWDONE)) {
                mutex_enter(bp->b_objlock);
                if (!ISSET(bp->b_oflags, BO_DELWRI))
                        CLR(bp->b_flags, B_COWDONE);
                mutex_exit(bp->b_objlock);
        }

        /*
         * Determine which queue the buffer should be on, then put it there.
         */

        /* If it's locked, don't report an error; try again later. */
        if (ISSET(bp->b_flags, B_LOCKED))
                bp->b_error = 0;

        /* If it's not cacheable, or an error, mark it invalid. */
        if (ISSET(bp->b_cflags, BC_NOCACHE) || bp->b_error != 0)
                SET(bp->b_cflags, BC_INVAL);

        if (ISSET(bp->b_cflags, BC_VFLUSH)) {
                /*
                 * This is a delayed write buffer that was just flushed to
                 * disk.  It is still on the LRU queue.  If it's become
                 * invalid, then we need to move it to a different queue;
                 * otherwise leave it in its current position.
                 */
                CLR(bp->b_cflags, BC_VFLUSH);
                if (!ISSET(bp->b_cflags, BC_INVAL|BC_AGE) &&
                    !ISSET(bp->b_flags, B_LOCKED) && bp->b_error == 0) {
                        KDASSERT(checkfreelist(bp, &bufqueues[BQ_LRU], 1));
                        goto already_queued;
                } else {
                        bremfree(bp);
                }
        }

        KDASSERT(checkfreelist(bp, &bufqueues[BQ_AGE], 0));
        KDASSERT(checkfreelist(bp, &bufqueues[BQ_LRU], 0));
        KDASSERT(checkfreelist(bp, &bufqueues[BQ_LOCKED], 0));

        if ((bp->b_bufsize <= 0) || ISSET(bp->b_cflags, BC_INVAL)) {
                /*
                 * If it's invalid or empty, dissociate it from its vnode
                 * and put on the head of the appropriate queue.
                 */
                if (ISSET(bp->b_flags, B_LOCKED)) {
                        if (wapbl_vphaswapbl(vp = bp->b_vp)) {
                                struct mount *mp = wapbl_vptomp(vp);

                                KASSERT(bp->b_iodone
                                    != mp->mnt_wapbl_op->wo_wapbl_biodone);
                                WAPBL_REMOVE_BUF(mp, bp);
                        }
                }

                mutex_enter(bp->b_objlock);
                CLR(bp->b_oflags, BO_DONE|BO_DELWRI);
                if ((vp = bp->b_vp) != NULL) {
                        KASSERT(bp->b_objlock == vp->v_interlock);
                        reassignbuf(bp, bp->b_vp);
                        brelvp(bp);
                        mutex_exit(vp->v_interlock);
                } else {
                        KASSERT(bp->b_objlock == &buffer_lock);
                        mutex_exit(bp->b_objlock);
                }
                /* We want to dispose of the buffer, so wake everybody. */
                cv_broadcast(&bp->b_busy);
                if (bp->b_bufsize <= 0)
                        /* no data */
                        goto already_queued;
                else
                        /* invalid data */
                        bufq = &bufqueues[BQ_AGE];
                binsheadfree(bp, bufq);
        } else  {
                /*
                 * It has valid data.  Put it on the end of the appropriate
                 * queue, so that it'll stick around for as long as possible.
                 * If buf is AGE, but has dependencies, must put it on last
                 * bufqueue to be scanned, ie LRU. This protects against the
                 * livelock where BQ_AGE only has buffers with dependencies,
                 * and we thus never get to the dependent buffers in BQ_LRU.
                 */
                if (ISSET(bp->b_flags, B_LOCKED)) {
                        /* locked in core */
                        bufq = &bufqueues[BQ_LOCKED];
                } else if (!ISSET(bp->b_cflags, BC_AGE)) {
                        /* valid data */
                        bufq = &bufqueues[BQ_LRU];
                } else {
                        /* stale but valid data */
                        bufq = &bufqueues[BQ_AGE];
                }
                binstailfree(bp, bufq);
        }
already_queued:
        /* Unlock the buffer. */
        CLR(bp->b_cflags, BC_AGE|BC_BUSY|BC_NOCACHE);
        CLR(bp->b_flags, B_ASYNC);

        /*
         * Wake only the highest priority waiter on the lock, in order to
         * prevent a thundering herd: many LWPs simultaneously awakening and
         * competing for the buffer's lock.  Testing in 2019 revealed this
         * to reduce contention on bufcache_lock tenfold during a kernel
         * compile.  Here and elsewhere, when the buffer is changing
         * identity, being disposed of, or moving from one list to another,
         * we wake all lock requestors.
         */
        if (bp->b_bufsize <= 0) {
                cv_broadcast(&bp->b_busy);
                buf_destroy(bp);
#ifdef DEBUG
                memset((char *)bp, 0, sizeof(*bp));
#endif
                pool_cache_put(buf_cache, bp);
        } else
                cv_signal(&bp->b_busy);
}

void
brelse(buf_t *bp, int set)
{

        mutex_enter(&bufcache_lock);
        brelsel(bp, set);
        mutex_exit(&bufcache_lock);
}

/*
 * Determine if a block is in the cache.
 * Just look on what would be its hash chain.  If it's there, return
 * a pointer to it, unless it's marked invalid.  If it's marked invalid,
 * we normally don't return the buffer, unless the caller explicitly
 * wants us to.
 */
buf_t *
incore(struct vnode *vp, daddr_t blkno)
{
        buf_t *bp;

        KASSERT(mutex_owned(&bufcache_lock));

        /* Search hash chain */
        LIST_FOREACH(bp, BUFHASH(vp, blkno), b_hash) {
                if (bp->b_lblkno == blkno && bp->b_vp == vp &&
                    !ISSET(bp->b_cflags, BC_INVAL)) {
                            KASSERT(bp->b_objlock == vp->v_interlock);
                            return (bp);
                }
        }

        return (NULL);
}

/*
 * Get a block of requested size that is associated with
 * a given vnode and block offset. If it is found in the
 * block cache, mark it as having been found, make it busy
 * and return it. Otherwise, return an empty block of the
 * correct size. It is up to the caller to insure that the
 * cached blocks be of the correct size.
 */
buf_t *
getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo)
{
        int err, preserve;
        buf_t *bp;

        mutex_enter(&bufcache_lock);
        SDT_PROBE3(io, kernel, , getblk__start,  vp, blkno, size);
 loop:
        bp = incore(vp, blkno);
        if (bp != NULL) {
                err = bbusy(bp, ((slpflag & PCATCH) != 0), slptimeo, NULL);
                if (err != 0) {
                        if (err == EPASSTHROUGH)
                                goto loop;
                        mutex_exit(&bufcache_lock);
                        SDT_PROBE4(io, kernel, , getblk__done,
                            vp, blkno, size, NULL);
                        return (NULL);
                }
                KASSERT(!cv_has_waiters(&bp->b_done));
#ifdef DIAGNOSTIC
                if (ISSET(bp->b_oflags, BO_DONE|BO_DELWRI) &&
                    bp->b_bcount < size && vp->v_type != VBLK)
                        panic("getblk: block size invariant failed");
#endif
                bremfree(bp);
                preserve = 1;
        } else {
                if ((bp = getnewbuf(slpflag, slptimeo, 0)) == NULL)
                        goto loop;

                if (incore(vp, blkno) != NULL) {
                        /* The block has come into memory in the meantime. */
                        brelsel(bp, 0);
                        goto loop;
                }

                LIST_INSERT_HEAD(BUFHASH(vp, blkno), bp, b_hash);
                bp->b_blkno = bp->b_lblkno = bp->b_rawblkno = blkno;
                mutex_enter(vp->v_interlock);
                bgetvp(vp, bp);
                mutex_exit(vp->v_interlock);
                preserve = 0;
        }
        mutex_exit(&bufcache_lock);

        /*
         * LFS can't track total size of B_LOCKED buffer (locked_queue_bytes)
         * if we re-size buffers here.
         */
        if (ISSET(bp->b_flags, B_LOCKED)) {
                KASSERT(bp->b_bufsize >= size);
        } else {
                if (allocbuf(bp, size, preserve)) {
                        mutex_enter(&bufcache_lock);
                        LIST_REMOVE(bp, b_hash);
                        brelsel(bp, BC_INVAL);
                        mutex_exit(&bufcache_lock);
                        SDT_PROBE4(io, kernel, , getblk__done,
                            vp, blkno, size, NULL);
                        return NULL;
                }
        }
        BIO_SETPRIO(bp, BPRIO_DEFAULT);
        SDT_PROBE4(io, kernel, , getblk__done,  vp, blkno, size, bp);
        return (bp);
}

/*
 * Get an empty, disassociated buffer of given size.
 */
buf_t *
geteblk(int size)
{
        buf_t *bp;
        int error __diagused;

        mutex_enter(&bufcache_lock);
        while ((bp = getnewbuf(0, 0, 0)) == NULL)
                ;

        SET(bp->b_cflags, BC_INVAL);
        LIST_INSERT_HEAD(&invalhash, bp, b_hash);
        mutex_exit(&bufcache_lock);
        BIO_SETPRIO(bp, BPRIO_DEFAULT);
        error = allocbuf(bp, size, 0);
        KASSERT(error == 0);
        return (bp);
}

/*
 * Expand or contract the actual memory allocated to a buffer.
 *
 * If the buffer shrinks, data is lost, so it's up to the
 * caller to have written it out *first*; this routine will not
 * start a write.  If the buffer grows, it's the callers
 * responsibility to fill out the buffer's additional contents.
 */
int
allocbuf(buf_t *bp, int size, int preserve)
{
        void *addr;
        vsize_t oldsize, desired_size;
        int oldcount;
        int delta;

        desired_size = buf_roundsize(size);
        if (desired_size > MAXBSIZE)
                printf("allocbuf: buffer larger than MAXBSIZE requested");

        oldcount = bp->b_bcount;

        bp->b_bcount = size;

        oldsize = bp->b_bufsize;
        if (oldsize == desired_size) {
                /*
                 * Do not short cut the WAPBL resize, as the buffer length
                 * could still have changed and this would corrupt the
                 * tracking of the transaction length.
                 */
                goto out;
        }

        /*
         * If we want a buffer of a different size, re-allocate the
         * buffer's memory; copy old content only if needed.
         */
        addr = buf_alloc(desired_size);
        if (addr == NULL)
                return ENOMEM;
        if (preserve)
                memcpy(addr, bp->b_data, MIN(oldsize,desired_size));
        if (bp->b_data != NULL)
                buf_mrelease(bp->b_data, oldsize);
        bp->b_data = addr;
        bp->b_bufsize = desired_size;

        /*
         * Update overall buffer memory counter (protected by bufcache_lock)
         */
        delta = (long)desired_size - (long)oldsize;

        mutex_enter(&bufcache_lock);
        if ((bufmem += delta) > bufmem_hiwater) {
                /*
                 * Need to trim overall memory usage.
                 */
                while (buf_canrelease()) {
                        if (preempt_needed()) {
                                mutex_exit(&bufcache_lock);
                                preempt();
                                mutex_enter(&bufcache_lock);
                        }
                        if (buf_trim() == 0)
                                break;
                }
        }
        mutex_exit(&bufcache_lock);

 out:
        if (wapbl_vphaswapbl(bp->b_vp))
                WAPBL_RESIZE_BUF(wapbl_vptomp(bp->b_vp), bp, oldsize, oldcount);

        return 0;
}

/*
 * Find a buffer which is available for use.
 * Select something from a free list.
 * Preference is to AGE list, then LRU list.
 *
 * Called with the buffer queues locked.
 * Return buffer locked.
 */
static buf_t *
getnewbuf(int slpflag, int slptimeo, int from_bufq)
{
        buf_t *bp;
        struct vnode *vp;
        struct mount *transmp = NULL;

        SDT_PROBE0(io, kernel, , getnewbuf__start);

 start:
        KASSERT(mutex_owned(&bufcache_lock));

        /*
         * Get a new buffer from the pool.
         */
        if (!from_bufq && buf_lotsfree()) {
                mutex_exit(&bufcache_lock);
                bp = pool_cache_get(buf_cache, PR_NOWAIT);
                if (bp != NULL) {
                        memset((char *)bp, 0, sizeof(*bp));
                        buf_init(bp);
                        SET(bp->b_cflags, BC_BUSY);        /* mark buffer busy */
                        mutex_enter(&bufcache_lock);
#if defined(DIAGNOSTIC)
                        bp->b_freelistindex = -1;
#endif /* defined(DIAGNOSTIC) */
                        SDT_PROBE1(io, kernel, , getnewbuf__done,  bp);
                        return (bp);
                }
                mutex_enter(&bufcache_lock);
        }

        KASSERT(mutex_owned(&bufcache_lock));
        if ((bp = TAILQ_FIRST(&bufqueues[BQ_AGE].bq_queue)) != NULL) {
                KASSERT(!ISSET(bp->b_oflags, BO_DELWRI));
        } else {
                TAILQ_FOREACH(bp, &bufqueues[BQ_LRU].bq_queue, b_freelist) {
                        if (ISSET(bp->b_cflags, BC_VFLUSH) ||
                            !ISSET(bp->b_oflags, BO_DELWRI))
                                break;
                        if (fstrans_start_nowait(bp->b_vp->v_mount) == 0) {
                                KASSERT(transmp == NULL);
                                transmp = bp->b_vp->v_mount;
                                break;
                        }
                }
        }
        if (bp != NULL) {
                    KASSERT(!ISSET(bp->b_cflags, BC_BUSY) || ISSET(bp->b_cflags, BC_VFLUSH));
                bremfree(bp);

                /* Buffer is no longer on free lists. */
                SET(bp->b_cflags, BC_BUSY);

                /* Wake anyone trying to lock the old identity. */
                cv_broadcast(&bp->b_busy);
        } else {
                /*
                 * XXX: !from_bufq should be removed.
                 */
                if (!from_bufq || curlwp != uvm.pagedaemon_lwp) {
                        /* wait for a free buffer of any kind */
                        if ((slpflag & PCATCH) != 0)
                                (void)cv_timedwait_sig(&needbuffer_cv,
                                    &bufcache_lock, slptimeo);
                        else
                                (void)cv_timedwait(&needbuffer_cv,
                                    &bufcache_lock, slptimeo);
                }
                SDT_PROBE1(io, kernel, , getnewbuf__done,  NULL);
                return (NULL);
        }

#ifdef DIAGNOSTIC
        if (bp->b_bufsize <= 0)
                panic("buffer %p: on queue but empty", bp);
#endif

        if (ISSET(bp->b_cflags, BC_VFLUSH)) {
                /*
                 * This is a delayed write buffer being flushed to disk.  Make
                 * sure it gets aged out of the queue when it's finished, and
                 * leave it off the LRU queue.
                 */
                CLR(bp->b_cflags, BC_VFLUSH);
                SET(bp->b_cflags, BC_AGE);
                goto start;
        }

        KASSERT(ISSET(bp->b_cflags, BC_BUSY));
            KASSERT(!cv_has_waiters(&bp->b_done));

        /*
         * If buffer was a delayed write, start it and return NULL
         * (since we might sleep while starting the write).
         */
        if (ISSET(bp->b_oflags, BO_DELWRI)) {
                /*
                 * This buffer has gone through the LRU, so make sure it gets
                 * reused ASAP.
                 */
                SET(bp->b_cflags, BC_AGE);
                mutex_exit(&bufcache_lock);
                bawrite(bp);
                KASSERT(transmp != NULL);
                fstrans_done(transmp);
                mutex_enter(&bufcache_lock);
                SDT_PROBE1(io, kernel, , getnewbuf__done,  NULL);
                return (NULL);
        }

        KASSERT(transmp == NULL);

        vp = bp->b_vp;

        /* clear out various other fields */
        bp->b_cflags = BC_BUSY;
        bp->b_oflags = 0;
        bp->b_flags = 0;
        bp->b_dev = NODEV;
        bp->b_blkno = 0;
        bp->b_lblkno = 0;
        bp->b_rawblkno = 0;
        bp->b_iodone = 0;
        bp->b_error = 0;
        bp->b_resid = 0;
        bp->b_bcount = 0;

        LIST_REMOVE(bp, b_hash);

        /* Disassociate us from our vnode, if we had one... */
        if (vp != NULL) {
                mutex_enter(vp->v_interlock);
                brelvp(bp);
                mutex_exit(vp->v_interlock);
        }

        SDT_PROBE1(io, kernel, , getnewbuf__done,  bp);
        return (bp);
}

/*
 * Invalidate the specified buffer if it exists.
 */
void
binvalbuf(struct vnode *vp, daddr_t blkno)
{
        buf_t *bp;
        int err;

        mutex_enter(&bufcache_lock);

 loop:
        bp = incore(vp, blkno);
        if (bp != NULL) {
                err = bbusy(bp, 0, 0, NULL);
                if (err == EPASSTHROUGH)
                        goto loop;
                bremfree(bp);
                if (ISSET(bp->b_oflags, BO_DELWRI)) {
                        SET(bp->b_cflags, BC_NOCACHE);
                        mutex_exit(&bufcache_lock);
                        bwrite(bp);
                } else {
                        brelsel(bp, BC_INVAL);
                        mutex_exit(&bufcache_lock);
                }
        } else
                mutex_exit(&bufcache_lock);
}

/*
 * Attempt to free an aged buffer off the queues.
 * Called with queue lock held.
 * Returns the amount of buffer memory freed.
 */
static int
buf_trim(void)
{
        buf_t *bp;
        long size;

        KASSERT(mutex_owned(&bufcache_lock));

        /* Instruct getnewbuf() to get buffers off the queues */
        if ((bp = getnewbuf(PCATCH, 1, 1)) == NULL)
                return 0;

        KASSERT((bp->b_cflags & BC_WANTED) == 0);
        size = bp->b_bufsize;
        bufmem -= size;
        if (size > 0) {
                buf_mrelease(bp->b_data, size);
                bp->b_bcount = bp->b_bufsize = 0;
        }
        /* brelse() will return the buffer to the global buffer pool */
        brelsel(bp, 0);
        return size;
}

int
buf_drain(int n)
{
        int size = 0, sz;

        KASSERT(mutex_owned(&bufcache_lock));

        while (size < n && bufmem > bufmem_lowater) {
                sz = buf_trim();
                if (sz <= 0)
                        break;
                size += sz;
        }

        return size;
}

/*
 * Wait for operations on the buffer to complete.
 * When they do, extract and return the I/O's error value.
 */
int
biowait(buf_t *bp)
{

        BIOHIST_FUNC(__func__);

        KASSERT(ISSET(bp->b_cflags, BC_BUSY));

        SDT_PROBE1(io, kernel, , wait__start, bp);

        mutex_enter(bp->b_objlock);

        BIOHIST_CALLARGS(biohist, "bp=%#jx, oflags=0x%jx, ret_addr=%#jx",
            (uintptr_t)bp, bp->b_oflags, 
            (uintptr_t)__builtin_return_address(0), 0);

        while (!ISSET(bp->b_oflags, BO_DONE | BO_DELWRI)) {
                BIOHIST_LOG(biohist, "waiting bp=%#jx", (uintptr_t)bp, 0, 0, 0);
                cv_wait(&bp->b_done, bp->b_objlock);
        }
        mutex_exit(bp->b_objlock);

        SDT_PROBE1(io, kernel, , wait__done, bp);

        BIOHIST_LOG(biohist, "return %jd", bp->b_error, 0, 0, 0);

        return bp->b_error;
}

/*
 * Mark I/O complete on a buffer.
 *
 * If a callback has been requested, e.g. the pageout
 * daemon, do so. Otherwise, awaken waiting processes.
 *
 * [ Leffler, et al., says on p.247:
 *        "This routine wakes up the blocked process, frees the buffer
 *        for an asynchronous write, or, for a request by the pagedaemon
 *        process, invokes a procedure specified in the buffer structure" ]
 *
 * In real life, the pagedaemon (or other system processes) wants
 * to do async stuff too, and doesn't want the buffer brelse()'d.
 * (for swap pager, that puts swap buffers on the free lists (!!!),
 * for the vn device, that puts allocated buffers on the free lists!)
 */
void
biodone(buf_t *bp)
{
        int s;

        BIOHIST_FUNC(__func__);

        KASSERT(!ISSET(bp->b_oflags, BO_DONE));

        if (cpu_intr_p()) {
                /* From interrupt mode: defer to a soft interrupt. */
                s = splvm();
                TAILQ_INSERT_TAIL(&curcpu()->ci_data.cpu_biodone, bp, b_actq);

                BIOHIST_CALLARGS(biohist, "bp=%#jx, softint scheduled",
                    (uintptr_t)bp, 0, 0, 0);
                softint_schedule(biodone_sih);
                splx(s);
        } else {
                /* Process now - the buffer may be freed soon. */
                biodone2(bp);
        }
}

SDT_PROBE_DEFINE1(io, kernel, , done, "struct buf *"/*bp*/);

static void
biodone2(buf_t *bp)
{
        void (*callout)(buf_t *);

        SDT_PROBE1(io, kernel, ,done, bp);

        BIOHIST_FUNC(__func__);
        BIOHIST_CALLARGS(biohist, "bp=%#jx", (uintptr_t)bp, 0, 0, 0);

        mutex_enter(bp->b_objlock);
        /* Note that the transfer is done. */
        if (ISSET(bp->b_oflags, BO_DONE))
                panic("biodone2 already");
        CLR(bp->b_flags, B_COWDONE);
        SET(bp->b_oflags, BO_DONE);
        BIO_SETPRIO(bp, BPRIO_DEFAULT);

        /* Wake up waiting writers. */
        if (!ISSET(bp->b_flags, B_READ))
                vwakeup(bp);

        if ((callout = bp->b_iodone) != NULL) {
                BIOHIST_LOG(biohist, "callout %#jx", (uintptr_t)callout,
                    0, 0, 0);

                /* Note callout done, then call out. */
                KASSERT(!cv_has_waiters(&bp->b_done));
                bp->b_iodone = NULL;
                mutex_exit(bp->b_objlock);
                (*callout)(bp);
        } else if (ISSET(bp->b_flags, B_ASYNC)) {
                /* If async, release. */
                BIOHIST_LOG(biohist, "async", 0, 0, 0, 0);
                KASSERT(!cv_has_waiters(&bp->b_done));
                mutex_exit(bp->b_objlock);
                brelse(bp, 0);
        } else {
                /* Otherwise just wake up waiters in biowait(). */
                BIOHIST_LOG(biohist, "wake-up", 0, 0, 0, 0);
                cv_broadcast(&bp->b_done);
                mutex_exit(bp->b_objlock);
        }
}

static void
biointr(void *cookie)
{
        struct cpu_info *ci;
        buf_t *bp;
        int s;

        BIOHIST_FUNC(__func__); BIOHIST_CALLED(biohist);

        ci = curcpu();

        s = splvm();
        while (!TAILQ_EMPTY(&ci->ci_data.cpu_biodone)) {
                KASSERT(curcpu() == ci);

                bp = TAILQ_FIRST(&ci->ci_data.cpu_biodone);
                TAILQ_REMOVE(&ci->ci_data.cpu_biodone, bp, b_actq);
                splx(s);

                BIOHIST_LOG(biohist, "bp=%#jx", (uintptr_t)bp, 0, 0, 0);
                biodone2(bp);

                s = splvm();
        }
        splx(s);
}

static void
sysctl_fillbuf(const buf_t *i, struct buf_sysctl *o)
{
        const bool allowaddr = get_expose_address(curproc);

        memset(o, 0, sizeof(*o));

        o->b_flags = i->b_flags | i->b_cflags | i->b_oflags;
        o->b_error = i->b_error;
        o->b_prio = i->b_prio;
        o->b_dev = i->b_dev;
        o->b_bufsize = i->b_bufsize;
        o->b_bcount = i->b_bcount;
        o->b_resid = i->b_resid;
        COND_SET_VALUE(o->b_addr, PTRTOUINT64(i->b_data), allowaddr);
        o->b_blkno = i->b_blkno;
        o->b_rawblkno = i->b_rawblkno;
        COND_SET_VALUE(o->b_iodone, PTRTOUINT64(i->b_iodone), allowaddr);
        COND_SET_VALUE(o->b_proc, PTRTOUINT64(i->b_proc), allowaddr);
        COND_SET_VALUE(o->b_vp, PTRTOUINT64(i->b_vp), allowaddr);
        COND_SET_VALUE(o->b_saveaddr, PTRTOUINT64(i->b_saveaddr), allowaddr);
        o->b_lblkno = i->b_lblkno;
}

static int
sysctl_dobuf(SYSCTLFN_ARGS)
{
        buf_t *bp;
        struct buf_sysctl bs;
        struct bqueue *bq;
        char *dp;
        u_int i, op, arg;
        size_t len, needed, elem_size, out_size;
        int error, elem_count, retries;

        if (namelen == 1 && name[0] == CTL_QUERY)
                return (sysctl_query(SYSCTLFN_CALL(rnode)));

        if (namelen != 4)
                return (EINVAL);

        retries = 100;
 retry:
        dp = oldp;
        len = (oldp != NULL) ? *oldlenp : 0;
        op = name[0];
        arg = name[1];
        elem_size = name[2];
        elem_count = name[3];
        out_size = MIN(sizeof(bs), elem_size);

        /*
         * at the moment, these are just "placeholders" to make the
         * API for retrieving kern.buf data more extensible in the
         * future.
         *
         * XXX kern.buf currently has "netbsd32" issues.  hopefully
         * these will be resolved at a later point.
         */
        if (op != KERN_BUF_ALL || arg != KERN_BUF_ALL ||
            elem_size < 1 || elem_count < 0)
                return (EINVAL);

        if (oldp == NULL) {
                /* count only, don't run through the buffer queues */
                needed = pool_cache_nget(buf_cache) - pool_cache_nput(buf_cache);
                *oldlenp = (needed + KERN_BUFSLOP) * elem_size;

                return 0;
        }

        error = 0;
        needed = 0;
        sysctl_unlock();
        mutex_enter(&bufcache_lock);
        for (i = 0; i < BQUEUES; i++) {
                bq = &bufqueues[i];
                TAILQ_FOREACH(bp, &bq->bq_queue, b_freelist) {
                        bq->bq_marker = bp;
                        if (len >= elem_size && elem_count > 0) {
                                sysctl_fillbuf(bp, &bs);
                                mutex_exit(&bufcache_lock);
                                error = copyout(&bs, dp, out_size);
                                mutex_enter(&bufcache_lock);
                                if (error)
                                        break;
                                if (bq->bq_marker != bp) {
                                        /*
                                         * This sysctl node is only for
                                         * statistics.  Retry; if the
                                         * queue keeps changing, then
                                         * bail out.
                                         */
                                        if (retries-- == 0) {
                                                error = EAGAIN;
                                                break;
                                        }
                                        mutex_exit(&bufcache_lock);
                                        sysctl_relock();
                                        goto retry;
                                }
                                dp += elem_size;
                                len -= elem_size;
                        }
                        needed += elem_size;
                        if (elem_count > 0 && elem_count != INT_MAX)
                                elem_count--;
                }
                if (error != 0)
                        break;
        }
        mutex_exit(&bufcache_lock);
        sysctl_relock();

        *oldlenp = needed;

        return (error);
}

static int
sysctl_bufvm_update(SYSCTLFN_ARGS)
{
        int error, rv;
        struct sysctlnode node;
        unsigned int temp_bufcache;
        unsigned long temp_water;

        /* Take a copy of the supplied node and its data */
        node = *rnode;
        if (node.sysctl_data == &bufcache) {
            node.sysctl_data = &temp_bufcache;
            temp_bufcache = *(unsigned int *)rnode->sysctl_data;
        } else {
            node.sysctl_data = &temp_water;
            temp_water = *(unsigned long *)rnode->sysctl_data;
        }

        /* Update the copy */
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return (error);

        if (rnode->sysctl_data == &bufcache) {
                if (temp_bufcache > 100)
                        return (EINVAL);
                bufcache = temp_bufcache;
                buf_setwm();
        } else if (rnode->sysctl_data == &bufmem_lowater) {
                if (bufmem_hiwater - temp_water < 16)
                        return (EINVAL);
                bufmem_lowater = temp_water;
        } else if (rnode->sysctl_data == &bufmem_hiwater) {
                if (temp_water - bufmem_lowater < 16)
                        return (EINVAL);
                bufmem_hiwater = temp_water;
        } else
                return (EINVAL);

        /* Drain until below new high water mark */
        sysctl_unlock();
        mutex_enter(&bufcache_lock);
        while (bufmem > bufmem_hiwater) {
                rv = buf_drain((bufmem - bufmem_hiwater) / (2 * 1024));
                if (rv <= 0)
                        break;
        }
        mutex_exit(&bufcache_lock);
        sysctl_relock();

        return 0;
}

static struct sysctllog *vfsbio_sysctllog;

static void
sysctl_kern_buf_setup(void)
{

        sysctl_createv(&vfsbio_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "buf",
                       SYSCTL_DESCR("Kernel buffer cache information"),
                       sysctl_dobuf, 0, NULL, 0,
                       CTL_KERN, KERN_BUF, CTL_EOL);
}

static void
sysctl_vm_buf_setup(void)
{

        sysctl_createv(&vfsbio_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "bufcache",
                       SYSCTL_DESCR("Percentage of physical memory to use for "
                                    "buffer cache"),
                       sysctl_bufvm_update, 0, &bufcache, 0,
                       CTL_VM, CTL_CREATE, CTL_EOL);
        sysctl_createv(&vfsbio_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_LONG, "bufmem",
                       SYSCTL_DESCR("Amount of kernel memory used by buffer "
                                    "cache"),
                       NULL, 0, &bufmem, 0,
                       CTL_VM, CTL_CREATE, CTL_EOL);
        sysctl_createv(&vfsbio_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_LONG, "bufmem_lowater",
                       SYSCTL_DESCR("Minimum amount of kernel memory to "
                                    "reserve for buffer cache"),
                       sysctl_bufvm_update, 0, &bufmem_lowater, 0,
                       CTL_VM, CTL_CREATE, CTL_EOL);
        sysctl_createv(&vfsbio_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_LONG, "bufmem_hiwater",
                       SYSCTL_DESCR("Maximum amount of kernel memory to use "
                                    "for buffer cache"),
                       sysctl_bufvm_update, 0, &bufmem_hiwater, 0,
                       CTL_VM, CTL_CREATE, CTL_EOL);
}

static int
bufhash_stats(struct hashstat_sysctl *hs, bool fill)
{
        buf_t *bp;
        uint64_t chain;

        strlcpy(hs->hash_name, "bufhash", sizeof(hs->hash_name));
        strlcpy(hs->hash_desc, "buffer hash", sizeof(hs->hash_desc));
        if (!fill)
                return 0;

        hs->hash_size = bufhash + 1;

        for (size_t i = 0; i < hs->hash_size; i++) {
                chain = 0;

                mutex_enter(&bufcache_lock);
                LIST_FOREACH(bp, &bufhashtbl[i], b_hash) {
                        chain++;
                }
                mutex_exit(&bufcache_lock);

                if (chain > 0) {
                        hs->hash_used++;
                        hs->hash_items += chain;
                        if (chain > hs->hash_maxchain)
                                hs->hash_maxchain = chain;
                }
                preempt_point();
        }

        return 0;
}

#ifdef DEBUG
/*
 * Print out statistics on the current allocation of the buffer pool.
 * Can be enabled to print out on every ``sync'' by setting "syncprt"
 * in vfs_syscalls.c using sysctl.
 */
void
vfs_bufstats(void)
{
        int i, j, count;
        buf_t *bp;
        struct bqueue *dp;
        int counts[MAXBSIZE / MIN_PAGE_SIZE + 1];
        static const char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE" };

        for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) {
                count = 0;
                memset(counts, 0, sizeof(counts));
                TAILQ_FOREACH(bp, &dp->bq_queue, b_freelist) {
                        counts[bp->b_bufsize / PAGE_SIZE]++;
                        count++;
                }
                printf("%s: total-%d", bname[i], count);
                for (j = 0; j <= MAXBSIZE / PAGE_SIZE; j++)
                        if (counts[j] != 0)
                                printf(", %d-%d", j * PAGE_SIZE, counts[j]);
                printf("\n");
        }
}
#endif /* DEBUG */

/* ------------------------------ */

buf_t *
getiobuf(struct vnode *vp, bool waitok)
{
        buf_t *bp;

        bp = pool_cache_get(bufio_cache, (waitok ? PR_WAITOK : PR_NOWAIT));
        if (bp == NULL)
                return bp;

        buf_init(bp);

        if ((bp->b_vp = vp) != NULL) {
                bp->b_objlock = vp->v_interlock;
        } else {
                KASSERT(bp->b_objlock == &buffer_lock);
        }

        return bp;
}

void
putiobuf(buf_t *bp)
{

        buf_destroy(bp);
        pool_cache_put(bufio_cache, bp);
}

/*
 * nestiobuf_iodone: b_iodone callback for nested buffers.
 */

void
nestiobuf_iodone(buf_t *bp)
{
        buf_t *mbp = bp->b_private;
        int error;
        int donebytes;

        KASSERT(bp->b_bcount <= bp->b_bufsize);
        KASSERT(mbp != bp);

        error = bp->b_error;
        if (bp->b_error == 0 &&
            (bp->b_bcount < bp->b_bufsize || bp->b_resid > 0)) {
                /*
                 * Not all got transferred, raise an error. We have no way to
                 * propagate these conditions to mbp.
                 */
                error = EIO;
        }

        donebytes = bp->b_bufsize;

        putiobuf(bp);
        nestiobuf_done(mbp, donebytes, error);
}

/*
 * nestiobuf_setup: setup a "nested" buffer.
 *
 * => 'mbp' is a "master" buffer which is being divided into sub pieces.
 * => 'bp' should be a buffer allocated by getiobuf.
 * => 'offset' is a byte offset in the master buffer.
 * => 'size' is a size in bytes of this nested buffer.
 */

void
nestiobuf_setup(buf_t *mbp, buf_t *bp, int offset, size_t size)
{
        const int b_pass = mbp->b_flags & (B_READ|B_PHYS|B_RAW|B_MEDIA_FLAGS);
        struct vnode *vp = mbp->b_vp;

        KASSERT(mbp->b_bcount >= offset + size);
        bp->b_vp = vp;
        bp->b_dev = mbp->b_dev;
        bp->b_objlock = mbp->b_objlock;
        bp->b_cflags = BC_BUSY;
        bp->b_flags = B_ASYNC | b_pass;
        bp->b_iodone = nestiobuf_iodone;
        bp->b_data = (char *)mbp->b_data + offset;
        bp->b_resid = bp->b_bcount = size;
        bp->b_bufsize = bp->b_bcount;
        bp->b_private = mbp;
        BIO_COPYPRIO(bp, mbp);
        if (BUF_ISWRITE(bp) && vp != NULL) {
                mutex_enter(vp->v_interlock);
                vp->v_numoutput++;
                mutex_exit(vp->v_interlock);
        }
}

/*
 * nestiobuf_done: propagate completion to the master buffer.
 *
 * => 'donebytes' specifies how many bytes in the 'mbp' is completed.
 * => 'error' is an errno(2) that 'donebytes' has been completed with.
 */

void
nestiobuf_done(buf_t *mbp, int donebytes, int error)
{

        if (donebytes == 0) {
                return;
        }
        mutex_enter(mbp->b_objlock);
        KASSERT(mbp->b_resid >= donebytes);
        mbp->b_resid -= donebytes;
        if (error)
                mbp->b_error = error;
        if (mbp->b_resid == 0) {
                if (mbp->b_error)
                        mbp->b_resid = mbp->b_bcount;
                mutex_exit(mbp->b_objlock);
                biodone(mbp);
        } else
                mutex_exit(mbp->b_objlock);
}

void
buf_init(buf_t *bp)
{

        cv_init(&bp->b_busy, "biolock");
        cv_init(&bp->b_done, "biowait");
        bp->b_dev = NODEV;
        bp->b_error = 0;
        bp->b_flags = 0;
        bp->b_cflags = 0;
        bp->b_oflags = 0;
        bp->b_objlock = &buffer_lock;
        bp->b_iodone = NULL;
        bp->b_dev = NODEV;
        bp->b_vnbufs.le_next = NOLIST;
        BIO_SETPRIO(bp, BPRIO_DEFAULT);
}

void
buf_destroy(buf_t *bp)
{

        cv_destroy(&bp->b_done);
        cv_destroy(&bp->b_busy);
}

int
bbusy(buf_t *bp, bool intr, int timo, kmutex_t *interlock)
{
        int error;

        KASSERT(mutex_owned(&bufcache_lock));

        SDT_PROBE4(io, kernel, , bbusy__start,  bp, intr, timo, interlock);

        if ((bp->b_cflags & BC_BUSY) != 0) {
                if (curlwp == uvm.pagedaemon_lwp) {
                        error = EDEADLK;
                        goto out;
                }
                bp->b_cflags |= BC_WANTED;
                if (interlock != NULL)
                        mutex_exit(interlock);
                if (intr) {
                        error = cv_timedwait_sig(&bp->b_busy, &bufcache_lock,
                            timo);
                } else {
                        error = cv_timedwait(&bp->b_busy, &bufcache_lock,
                            timo);
                }
                /*
                 * At this point the buffer may be gone: don't touch it
                 * again.  The caller needs to find it again and retry.
                 */
                if (interlock != NULL)
                        mutex_enter(interlock);
                if (error == 0)
                        error = EPASSTHROUGH;
        } else {
                bp->b_cflags |= BC_BUSY;
                error = 0;
        }

out:        SDT_PROBE5(io, kernel, , bbusy__done,
            bp, intr, timo, interlock, error);
        return error;
}

/*
 * Nothing outside this file should really need to know about nbuf,
 * but a few things still want to read it, so give them a way to do that.
 */
u_int
buf_nbuf(void)
{

        return nbuf;
}
















































































































































































































































































































   37 
   38 












   33 





   38 











   38 




   38 
   38 
   38 


   38 
   38 

   38 
   38 
   38 





   12 






   12 
   12 

   12 

   12 


   12 




   12 


   12 







   38 

   38 
























   37 

   38 








   38 
















   38 











   38 
   38 


   37 
















   38 








































   38 

   38 
   38 

   38 













   38 


   37 




























   38 

   38 


   38 























   38 





   38 




   38 




   37 
   38 
   38 




   38 

   38 












   38 

   37 




































































  166 



  166 

  166 


  166 
  166 


  166 



















































  166 




  166 
  166 
  166 







  166 



















  166 





  166 

  166 






  166 












    5 

    5 

    5 

    5 
    1 


    1 

    1 

    1 



    5 









    5 
    1 
    1 










    5 












    5 

    5 










    5 








    5 























  166 

  166 


  166 

  166 























































  168 






  168 
  168 


  168 










    5 


    5 











  166 











  166 
  166 
  166 










  166 




   12 



   12 





   12 


   12 

    5 


   12 




  166 



    5 
    5 















   12 











   12 
   12 
   12 










   12 





   12 

   12 
   12 



   12 





   12 


   12 

   12 


   12 




   12 



   12 
   12 














  166 



  166 








































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
/*        $NetBSD: vhci.c,v 1.27 2022/03/12 15:30:51 riastradh Exp $ */

/*
 * Copyright (c) 2019-2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Maxime Villard.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vhci.c,v 1.27 2022/03/12 15:30:51 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>

#include <sys/bus.h>
#include <sys/cpu.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/systm.h>
#include <sys/mman.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/kcov.h>

#include <machine/endian.h>

#include "ioconf.h"

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdivar.h>

#include <dev/usb/usbroothub.h>
#include <dev/usb/vhci.h>

#ifdef VHCI_DEBUG
#define DPRINTF(fmt, ...)        printf(fmt, __VA_ARGS__)
#else
#define DPRINTF(fmt, ...)        __nothing
#endif

static usbd_status vhci_open(struct usbd_pipe *);
static void vhci_softintr(void *);

static struct usbd_xfer *vhci_allocx(struct usbd_bus *, unsigned int);
static void vhci_freex(struct usbd_bus *, struct usbd_xfer *);
static void vhci_get_lock(struct usbd_bus *, kmutex_t **);
static int vhci_roothub_ctrl(struct usbd_bus *, usb_device_request_t *,
    void *, int);

static const struct usbd_bus_methods vhci_bus_methods = {
        .ubm_open =        vhci_open,
        .ubm_softint =        vhci_softintr,
        .ubm_dopoll =        NULL,
        .ubm_allocx =        vhci_allocx,
        .ubm_freex =        vhci_freex,
        .ubm_getlock =        vhci_get_lock,
        .ubm_rhctrl =        vhci_roothub_ctrl,
};

static usbd_status vhci_device_ctrl_transfer(struct usbd_xfer *);
static usbd_status vhci_device_ctrl_start(struct usbd_xfer *);
static void vhci_device_ctrl_abort(struct usbd_xfer *);
static void vhci_device_ctrl_close(struct usbd_pipe *);
static void vhci_device_ctrl_cleartoggle(struct usbd_pipe *);
static void vhci_device_ctrl_done(struct usbd_xfer *);

static const struct usbd_pipe_methods vhci_device_ctrl_methods = {
        .upm_init =                NULL,
        .upm_fini =                NULL,
        .upm_transfer =                vhci_device_ctrl_transfer,
        .upm_start =                vhci_device_ctrl_start,
        .upm_abort =                vhci_device_ctrl_abort,
        .upm_close =                vhci_device_ctrl_close,
        .upm_cleartoggle =        vhci_device_ctrl_cleartoggle,
        .upm_done =                vhci_device_ctrl_done,
};

static usbd_status vhci_root_intr_transfer(struct usbd_xfer *);
static usbd_status vhci_root_intr_start(struct usbd_xfer *);
static void vhci_root_intr_abort(struct usbd_xfer *);
static void vhci_root_intr_close(struct usbd_pipe *);
static void vhci_root_intr_cleartoggle(struct usbd_pipe *);
static void vhci_root_intr_done(struct usbd_xfer *);

static const struct usbd_pipe_methods vhci_root_intr_methods = {
        .upm_init =                NULL,
        .upm_fini =                NULL,
        .upm_transfer =                vhci_root_intr_transfer,
        .upm_start =                vhci_root_intr_start,
        .upm_abort =                vhci_root_intr_abort,
        .upm_close =                vhci_root_intr_close,
        .upm_cleartoggle =        vhci_root_intr_cleartoggle,
        .upm_done =                vhci_root_intr_done,
};

/*
 * There are three structures to understand: vxfers, packets, and ports.
 *
 * Each xfer from the point of view of the USB stack is a vxfer from the point
 * of view of vHCI.
 *
 * A vxfer has a linked list containing a maximum of two packets: a request
 * packet and possibly a data packet. Packets basically contain data exchanged
 * between the Host and the virtual USB device. A packet is linked to both a
 * vxfer and a port.
 *
 * A port is an abstraction of an actual USB port. Each virtual USB device gets
 * connected to a port. A port has two lists:
 *  - The Usb-To-Host list, containing packets to be fetched from the USB
 *    device and provided to the host.
 *  - The Host-To-Usb list, containing packets to be sent from the Host to the
 *    USB device.
 * Request packets are always in the H->U direction. Data packets however can
 * be in both the H->U and U->H directions.
 *
 * With read() and write() operations on /dev/vhci, userland respectively
 * "fetches" and "sends" packets from or to the virtual USB device, which
 * respectively means reading/inserting packets in the H->U and U->H lists on
 * the port where the virtual USB device is connected.
 *
 *             +------------------------------------------------+
 *             |                 USB Stack                      |
 *             +---------------------^--------------------------+
 *                                   |
 *             +---------------------V--------------------------+
 *             | +----------------+    +-------------+          |
 *             | | Request Packet |    | Data Packet |     Xfer |
 *             | +-------|--------+    +----|---^----+          |
 *             +---------|------------------|---|---------------+
 *                       |                  |   |
 *                       |   +--------------+   |
 *                       |   |                  |
 *             +---------|---|------------------|---------------+
 *             |     +---V---V---+    +---------|-+             |
 *             |     | H->U List |    | U->H List |   vHCI Port |
 *             |     +-----|-----+    +-----^-----+             |
 *             +-----------|----------------|-------------------+
 *                         |                |
 *             +-----------|----------------|-------------------+
 *             |     +-----V-----+    +-----|-----+             |
 *             |     |   read()  |    |  write()  |     vHCI FD |
 *             |     +-----------+    +-----------+             |
 *             +------------------------------------------------+
 */

struct vhci_xfer;

typedef struct vhci_packet {
        /* General. */
        TAILQ_ENTRY(vhci_packet) portlist;
        TAILQ_ENTRY(vhci_packet) xferlist;
        struct vhci_xfer *vxfer;
        bool utoh;
        uint8_t addr;

        /* Type. */
        struct {
                bool req:1;
                bool res:1;
                bool dat:1;
        } type;

        /* Exposed for FD operations. */
        uint8_t *buf;
        size_t size;
        size_t cursor;
} vhci_packet_t;

typedef TAILQ_HEAD(, vhci_packet) vhci_packet_list_t;

#define VHCI_NADDRS        16        /* maximum supported by USB */

typedef struct {
        kmutex_t lock;
        int status;
        int change;
        struct {
                vhci_packet_list_t usb_to_host;
                vhci_packet_list_t host_to_usb;
        } endpoints[VHCI_NADDRS];
} vhci_port_t;

typedef struct {
        struct usbd_pipe pipe;
} vhci_pipe_t;

typedef struct vhci_xfer {
        /* General. */
        struct usbd_xfer xfer;

        /* Port where the xfer occurs. */
        vhci_port_t *port;

        /* Packets in the xfer. */
        size_t npkts;
        vhci_packet_list_t pkts;

        /* Header storage. */
        vhci_request_t reqbuf;
        vhci_response_t resbuf;

        /* Used for G/C. */
        TAILQ_ENTRY(vhci_xfer) freelist;
} vhci_xfer_t;

typedef TAILQ_HEAD(, vhci_xfer) vhci_xfer_list_t;

#define VHCI_INDEX2PORT(idx)        (idx)
#define VHCI_NPORTS                8        /* above 8, update TODO-bitmap */
#define VHCI_NBUSES                8

typedef struct {
        device_t sc_dev;

        struct usbd_bus sc_bus;
        bool sc_dying;
        kmutex_t sc_lock;

        /*
         * Intr Root. Used to attach the devices.
         */
        struct usbd_xfer *sc_intrxfer;

        /*
         * The ports. Zero is for the roothub, one and beyond for the USB
         * devices.
         */
        size_t sc_nports;
        vhci_port_t sc_port[VHCI_NPORTS];

        device_t sc_child; /* /dev/usb# device */
} vhci_softc_t;

typedef struct {
        u_int port;
        uint8_t addr;
        vhci_softc_t *softc;
} vhci_fd_t;

extern struct cfdriver vhci_cd;

/* -------------------------------------------------------------------------- */

static void
vhci_pkt_ctrl_create(vhci_port_t *port, struct usbd_xfer *xfer, bool utoh,
    uint8_t addr)
{
        vhci_xfer_t *vxfer = (vhci_xfer_t *)xfer;
        vhci_packet_list_t *reqlist, *reslist, *datlist = NULL;
        vhci_packet_t *req, *res = NULL, *dat = NULL;
        size_t npkts = 0;

        /* Request packet. */
        reqlist = &port->endpoints[addr].host_to_usb;
        req = kmem_zalloc(sizeof(*req), KM_SLEEP);
        req->vxfer = vxfer;
        req->utoh = false;
        req->addr = addr;
        req->type.req = true;
        req->buf = (uint8_t *)&vxfer->reqbuf;
        req->size = sizeof(vxfer->reqbuf);
        req->cursor = 0;
        npkts++;

        /* Init the request buffer. */
        memset(&vxfer->reqbuf, 0, sizeof(vxfer->reqbuf));
        vxfer->reqbuf.type = VHCI_REQ_CTRL;
        memcpy(&vxfer->reqbuf.u.ctrl, &xfer->ux_request,
            sizeof(xfer->ux_request));

        /* Response packet. */
        if (utoh && (xfer->ux_length > 0)) {
                reslist = &port->endpoints[addr].usb_to_host;
                res = kmem_zalloc(sizeof(*res), KM_SLEEP);
                res->vxfer = vxfer;
                res->utoh = true;
                res->addr = addr;
                res->type.res = true;
                res->buf = (uint8_t *)&vxfer->resbuf;
                res->size = sizeof(vxfer->resbuf);
                res->cursor = 0;
                npkts++;
        }

        /* Data packet. */
        if (xfer->ux_length > 0) {
                if (utoh) {
                        datlist = &port->endpoints[addr].usb_to_host;
                } else {
                        datlist = &port->endpoints[addr].host_to_usb;
                }
                dat = kmem_zalloc(sizeof(*dat), KM_SLEEP);
                dat->vxfer = vxfer;
                dat->utoh = utoh;
                dat->addr = addr;
                dat->type.dat = true;
                dat->buf = xfer->ux_buf;
                dat->size = xfer->ux_length;
                dat->cursor = 0;
                npkts++;
        }

        /* Insert in the xfer. */
        vxfer->port = port;
        vxfer->npkts = npkts;
        TAILQ_INIT(&vxfer->pkts);
        TAILQ_INSERT_TAIL(&vxfer->pkts, req, xferlist);
        if (res != NULL)
                TAILQ_INSERT_TAIL(&vxfer->pkts, res, xferlist);
        if (dat != NULL)
                TAILQ_INSERT_TAIL(&vxfer->pkts, dat, xferlist);

        /* Insert in the port. */
        KASSERT(mutex_owned(&port->lock));
        TAILQ_INSERT_TAIL(reqlist, req, portlist);
        if (res != NULL)
                TAILQ_INSERT_TAIL(reslist, res, portlist);
        if (dat != NULL)
                TAILQ_INSERT_TAIL(datlist, dat, portlist);
}

static void
vhci_pkt_destroy(vhci_softc_t *sc, vhci_packet_t *pkt)
{
        vhci_xfer_t *vxfer = pkt->vxfer;
        vhci_port_t *port = vxfer->port;
        vhci_packet_list_t *pktlist;

        KASSERT(mutex_owned(&port->lock));

        /* Remove from the port. */
        if (pkt->utoh) {
                pktlist = &port->endpoints[pkt->addr].usb_to_host;
        } else {
                pktlist = &port->endpoints[pkt->addr].host_to_usb;
        }
        TAILQ_REMOVE(pktlist, pkt, portlist);

        /* Remove from the xfer. */
        TAILQ_REMOVE(&vxfer->pkts, pkt, xferlist);
        kmem_free(pkt, sizeof(*pkt));

        /* Unref. */
        KASSERT(vxfer->npkts > 0);
        vxfer->npkts--;
        if (vxfer->npkts > 0)
                return;
        KASSERT(TAILQ_FIRST(&vxfer->pkts) == NULL);
}

/* -------------------------------------------------------------------------- */

static usbd_status
vhci_open(struct usbd_pipe *pipe)
{
        struct usbd_device *dev = pipe->up_dev;
        struct usbd_bus *bus = dev->ud_bus;
        usb_endpoint_descriptor_t *ed = pipe->up_endpoint->ue_edesc;
        vhci_softc_t *sc = bus->ub_hcpriv;
        uint8_t addr = dev->ud_addr;

        if (sc->sc_dying)
                return USBD_IOERROR;

        DPRINTF("%s: called, type=%d\n", __func__,
            UE_GET_XFERTYPE(ed->bmAttributes));

        if (addr == bus->ub_rhaddr) {
                switch (ed->bEndpointAddress) {
                case USB_CONTROL_ENDPOINT:
                        DPRINTF("%s: roothub_ctrl\n", __func__);
                        pipe->up_methods = &roothub_ctrl_methods;
                        break;
                case UE_DIR_IN | USBROOTHUB_INTR_ENDPT:
                        DPRINTF("%s: root_intr\n", __func__);
                        pipe->up_methods = &vhci_root_intr_methods;
                        break;
                default:
                        DPRINTF("%s: inval\n", __func__);
                        return USBD_INVAL;
                }
        } else {
                switch (UE_GET_XFERTYPE(ed->bmAttributes)) {
                case UE_CONTROL:
                        pipe->up_methods = &vhci_device_ctrl_methods;
                        break;
                case UE_INTERRUPT:
                case UE_BULK:
                default:
                        goto bad;
                }
        }

        return USBD_NORMAL_COMPLETION;

bad:
        return USBD_NOMEM;
}

static void
vhci_softintr(void *v)
{
        DPRINTF("%s: called\n", __func__);
}

static struct usbd_xfer *
vhci_allocx(struct usbd_bus *bus, unsigned int nframes)
{
        vhci_xfer_t *vxfer;

        vxfer = kmem_zalloc(sizeof(*vxfer), KM_SLEEP);
#ifdef DIAGNOSTIC
        vxfer->xfer.ux_state = XFER_BUSY;
#endif
        return (struct usbd_xfer *)vxfer;
}

static void
vhci_freex(struct usbd_bus *bus, struct usbd_xfer *xfer)
{
        vhci_xfer_t *vxfer = (vhci_xfer_t *)xfer;

        KASSERT(vxfer->npkts == 0);
        KASSERT(TAILQ_FIRST(&vxfer->pkts) == NULL);

#ifdef DIAGNOSTIC
        vxfer->xfer.ux_state = XFER_FREE;
#endif
        kmem_free(vxfer, sizeof(*vxfer));
}

static void
vhci_get_lock(struct usbd_bus *bus, kmutex_t **lock)
{
        vhci_softc_t *sc = bus->ub_hcpriv;

        *lock = &sc->sc_lock;
}

static int
vhci_roothub_ctrl(struct usbd_bus *bus, usb_device_request_t *req,
    void *buf, int buflen)
{
        vhci_softc_t *sc = bus->ub_hcpriv;
        vhci_port_t *port;
        usb_hub_descriptor_t hubd;
        uint16_t len, value, index;
        int totlen = 0;

        len = UGETW(req->wLength);
        value = UGETW(req->wValue);
        index = UGETW(req->wIndex);

#define C(x,y) ((x) | ((y) << 8))
        switch (C(req->bRequest, req->bmRequestType)) {
        case C(UR_GET_DESCRIPTOR, UT_READ_DEVICE):
                switch (value) {
                case C(0, UDESC_DEVICE): {
                        usb_device_descriptor_t devd;

                        totlen = uimin(buflen, sizeof(devd));
                        memcpy(&devd, buf, totlen);
                        USETW(devd.idVendor, 0);
                        USETW(devd.idProduct, 0);
                        memcpy(buf, &devd, totlen);
                        break;
                }
#define sd ((usb_string_descriptor_t *)buf)
                case C(1, UDESC_STRING):
                        /* Vendor */
                        totlen = usb_makestrdesc(sd, len, "NetBSD");
                        break;
                case C(2, UDESC_STRING):
                        /* Product */
                        totlen = usb_makestrdesc(sd, len, "VHCI root hub");
                        break;
#undef sd
                default:
                        /* default from usbroothub */
                        return buflen;
                }
                break;

        case C(UR_SET_FEATURE, UT_WRITE_CLASS_OTHER):
                switch (value) {
                case UHF_PORT_RESET:
                        if (index < 1 || index >= sc->sc_nports) {
                                return -1;
                        }
                        port = &sc->sc_port[VHCI_INDEX2PORT(index)];
                        port->status |= UPS_C_PORT_RESET;
                        break;
                case UHF_PORT_POWER:
                        break;
                default:
                        return -1;
                }
                break;

        /* Hub requests. */
        case C(UR_CLEAR_FEATURE, UT_WRITE_CLASS_DEVICE):
                break;
        case C(UR_CLEAR_FEATURE, UT_WRITE_CLASS_OTHER):
                if (index < 1 || index >= sc->sc_nports) {
                        return -1;
                }
                port = &sc->sc_port[VHCI_INDEX2PORT(index)];
                switch (value) {
                case UHF_PORT_ENABLE:
                        port->status &= ~UPS_PORT_ENABLED;
                        break;
                case UHF_C_PORT_ENABLE:
                        port->change |= UPS_C_PORT_ENABLED;
                        break;
                default:
                        return -1;
                }
                break;

        case C(UR_GET_DESCRIPTOR, UT_READ_CLASS_DEVICE):
                totlen = uimin(buflen, sizeof(hubd));
                memcpy(&hubd, buf, totlen);
                hubd.bNbrPorts = sc->sc_nports - 1;
                hubd.bDescLength = USB_HUB_DESCRIPTOR_SIZE;
                totlen = uimin(totlen, hubd.bDescLength);
                memcpy(buf, &hubd, totlen);
                break;

        case C(UR_GET_STATUS, UT_READ_CLASS_DEVICE):
                /* XXX The other HCs do this */
                memset(buf, 0, len);
                totlen = len;
                break;

        case C(UR_GET_STATUS, UT_READ_CLASS_OTHER): {
                usb_port_status_t ps;

                if (index < 1 || index >= sc->sc_nports) {
                        return -1;
                }
                port = &sc->sc_port[VHCI_INDEX2PORT(index)];
                USETW(ps.wPortStatus, port->status);
                USETW(ps.wPortChange, port->change);
                totlen = uimin(len, sizeof(ps));
                memcpy(buf, &ps, totlen);
                break;
        }
        default:
                /* default from usbroothub */
                return buflen;
        }

        return totlen;
}

/* -------------------------------------------------------------------------- */

static usbd_status
vhci_device_ctrl_transfer(struct usbd_xfer *xfer)
{

        DPRINTF("%s: called\n", __func__);

        /* Pipe isn't running, start first */
        return vhci_device_ctrl_start(SIMPLEQ_FIRST(&xfer->ux_pipe->up_queue));
}

static usbd_status
vhci_device_ctrl_start(struct usbd_xfer *xfer)
{
        usb_endpoint_descriptor_t *ed = xfer->ux_pipe->up_endpoint->ue_edesc;
        usb_device_request_t *req = &xfer->ux_request;
        struct usbd_device *dev = xfer->ux_pipe->up_dev;
        vhci_softc_t *sc = xfer->ux_bus->ub_hcpriv;
        vhci_port_t *port;
        bool isread = (req->bmRequestType & UT_READ) != 0;
        uint8_t addr = UE_GET_ADDR(ed->bEndpointAddress);
        int portno, ret;

        KASSERT(addr == 0);
        KASSERT(xfer->ux_rqflags & URQ_REQUEST);
        KASSERT(dev->ud_myhsport != NULL);
        portno = dev->ud_myhsport->up_portno;

        DPRINTF("%s: type=0x%02x, len=%d, isread=%d, portno=%d\n",
            __func__, req->bmRequestType, UGETW(req->wLength), isread, portno);

        KASSERT(sc->sc_bus.ub_usepolling || mutex_owned(&sc->sc_lock));

        if (sc->sc_dying)
                return USBD_IOERROR;

        port = &sc->sc_port[portno];

        mutex_enter(&port->lock);
        if (port->status & UPS_PORT_ENABLED) {
                xfer->ux_status = USBD_IN_PROGRESS;
                vhci_pkt_ctrl_create(port, xfer, isread, addr);
                ret = USBD_IN_PROGRESS;
        } else {
                ret = USBD_IOERROR;
        }
        mutex_exit(&port->lock);

        return ret;
}

static void
vhci_device_ctrl_abort(struct usbd_xfer *xfer)
{
        vhci_xfer_t *vxfer = (vhci_xfer_t *)xfer;
        vhci_softc_t *sc = xfer->ux_bus->ub_hcpriv;
        vhci_port_t *port = vxfer->port;
        vhci_packet_t *pkt;

        DPRINTF("%s: called\n", __func__);

        KASSERT(mutex_owned(&sc->sc_lock));

        callout_halt(&xfer->ux_callout, &sc->sc_lock);

        /* If anyone else beat us, we're done.  */
        KASSERT(xfer->ux_status != USBD_CANCELLED);
        if (xfer->ux_status != USBD_IN_PROGRESS)
                return;

        mutex_enter(&port->lock);
        while (vxfer->npkts > 0) {
                pkt = TAILQ_FIRST(&vxfer->pkts);
                KASSERT(pkt != NULL);
                vhci_pkt_destroy(sc, pkt);
        }
        KASSERT(TAILQ_FIRST(&vxfer->pkts) == NULL);
        mutex_exit(&port->lock);

        xfer->ux_status = USBD_CANCELLED;
        usb_transfer_complete(xfer);
        KASSERT(mutex_owned(&sc->sc_lock));
}

static void
vhci_device_ctrl_close(struct usbd_pipe *pipe)
{
        DPRINTF("%s: called\n", __func__);
}

static void
vhci_device_ctrl_cleartoggle(struct usbd_pipe *pipe)
{
        DPRINTF("%s: called\n", __func__);
}

static void
vhci_device_ctrl_done(struct usbd_xfer *xfer)
{
        DPRINTF("%s: called\n", __func__);
}

/* -------------------------------------------------------------------------- */

static usbd_status
vhci_root_intr_transfer(struct usbd_xfer *xfer)
{

        DPRINTF("%s: called\n", __func__);

        /* Pipe isn't running, start first */
        return vhci_root_intr_start(SIMPLEQ_FIRST(&xfer->ux_pipe->up_queue));
}

static usbd_status
vhci_root_intr_start(struct usbd_xfer *xfer)
{
        vhci_softc_t *sc = xfer->ux_bus->ub_hcpriv;

        DPRINTF("%s: called, len=%zu\n", __func__, (size_t)xfer->ux_length);

        KASSERT(sc->sc_bus.ub_usepolling || mutex_owned(&sc->sc_lock));

        if (sc->sc_dying)
                return USBD_IOERROR;

        KASSERT(sc->sc_intrxfer == NULL);
        sc->sc_intrxfer = xfer;
        xfer->ux_status = USBD_IN_PROGRESS;

        return USBD_IN_PROGRESS;
}

static void
vhci_root_intr_abort(struct usbd_xfer *xfer)
{
        vhci_softc_t *sc = xfer->ux_bus->ub_hcpriv;

        DPRINTF("%s: called\n", __func__);

        KASSERT(mutex_owned(&sc->sc_lock));
        KASSERT(xfer->ux_pipe->up_intrxfer == xfer);

        /* If xfer has already completed, nothing to do here.  */
        if (sc->sc_intrxfer == NULL)
                return;

        /*
         * Otherwise, sc->sc_intrxfer had better be this transfer.
         * Cancel it.
         */
        KASSERT(sc->sc_intrxfer == xfer);
        KASSERT(xfer->ux_status == USBD_IN_PROGRESS);
        xfer->ux_status = USBD_CANCELLED;
        usb_transfer_complete(xfer);
}

static void
vhci_root_intr_close(struct usbd_pipe *pipe)
{
        vhci_softc_t *sc __diagused = pipe->up_dev->ud_bus->ub_hcpriv;

        DPRINTF("%s: called\n", __func__);

        KASSERT(mutex_owned(&sc->sc_lock));

        /*
         * Caller must guarantee the xfer has completed first, by
         * closing the pipe only after normal completion or an abort.
         */
        KASSERT(sc->sc_intrxfer == NULL);
}

static void
vhci_root_intr_cleartoggle(struct usbd_pipe *pipe)
{
        DPRINTF("%s: called\n", __func__);
}

static void
vhci_root_intr_done(struct usbd_xfer *xfer)
{
        vhci_softc_t *sc = xfer->ux_bus->ub_hcpriv;

        KASSERT(mutex_owned(&sc->sc_lock));

        /* Claim the xfer so it doesn't get completed again.  */
        KASSERT(sc->sc_intrxfer == xfer);
        KASSERT(xfer->ux_status != USBD_IN_PROGRESS);
        sc->sc_intrxfer = NULL;
}

/* -------------------------------------------------------------------------- */

static void
vhci_usb_attach(vhci_fd_t *vfd)
{
        vhci_softc_t *sc = vfd->softc;
        vhci_port_t *port;
        struct usbd_xfer *xfer;
        u_char *p;

        port = &sc->sc_port[vfd->port];

        mutex_enter(&sc->sc_lock);

        mutex_enter(&port->lock);
        port->status = UPS_CURRENT_CONNECT_STATUS | UPS_PORT_ENABLED |
            UPS_PORT_POWER;
        port->change = UPS_C_CONNECT_STATUS | UPS_C_PORT_RESET;
        mutex_exit(&port->lock);

        xfer = sc->sc_intrxfer;

        if (xfer == NULL) {
                goto done;
        }
        KASSERT(xfer->ux_status == USBD_IN_PROGRESS);

        /*
         * Mark our port has having changed state. Uhub will then fetch
         * status/change and see it needs to perform an attach.
         */
        p = xfer->ux_buf;
        memset(p, 0, xfer->ux_length);
        p[0] = __BIT(vfd->port); /* TODO-bitmap */
        xfer->ux_actlen = xfer->ux_length;
        xfer->ux_status = USBD_NORMAL_COMPLETION;

        usb_transfer_complete(xfer);

done:
        mutex_exit(&sc->sc_lock);
}

static void
vhci_port_flush(vhci_softc_t *sc, vhci_port_t *port)
{
        vhci_packet_list_t *pktlist;
        vhci_packet_t *pkt, *nxt;
        vhci_xfer_list_t vxferlist;
        vhci_xfer_t *vxfer;
        uint8_t addr;

        KASSERT(mutex_owned(&sc->sc_lock));
        KASSERT(mutex_owned(&port->lock));

        TAILQ_INIT(&vxferlist);

        for (addr = 0; addr < VHCI_NADDRS; addr++) {
                /* Drop all the packets in the H->U direction. */
                pktlist = &port->endpoints[addr].host_to_usb;
                TAILQ_FOREACH_SAFE(pkt, pktlist, portlist, nxt) {
                        vxfer = pkt->vxfer;
                        KASSERT(vxfer->xfer.ux_status == USBD_IN_PROGRESS);
                        vhci_pkt_destroy(sc, pkt);
                        if (vxfer->npkts == 0)
                                TAILQ_INSERT_TAIL(&vxferlist, vxfer, freelist);
                }
                KASSERT(TAILQ_FIRST(pktlist) == NULL);

                /* Drop all the packets in the U->H direction. */
                pktlist = &port->endpoints[addr].usb_to_host;
                TAILQ_FOREACH_SAFE(pkt, pktlist, portlist, nxt) {
                        vxfer = pkt->vxfer;
                        KASSERT(vxfer->xfer.ux_status == USBD_IN_PROGRESS);
                        vhci_pkt_destroy(sc, pkt);
                        if (vxfer->npkts == 0)
                                TAILQ_INSERT_TAIL(&vxferlist, vxfer, freelist);
                }
                KASSERT(TAILQ_FIRST(pktlist) == NULL);

                /* Terminate all the xfers collected. */
                while ((vxfer = TAILQ_FIRST(&vxferlist)) != NULL) {
                        struct usbd_xfer *xfer = &vxfer->xfer;
                        TAILQ_REMOVE(&vxferlist, vxfer, freelist);

                        xfer->ux_status = USBD_TIMEOUT;
                        usb_transfer_complete(xfer);
                }
        }
}

static void
vhci_usb_detach(vhci_fd_t *vfd)
{
        vhci_softc_t *sc = vfd->softc;
        vhci_port_t *port;
        struct usbd_xfer *xfer;
        u_char *p;

        port = &sc->sc_port[vfd->port];

        mutex_enter(&sc->sc_lock);

        xfer = sc->sc_intrxfer;
        if (xfer == NULL) {
                goto done;
        }
        KASSERT(xfer->ux_status == USBD_IN_PROGRESS);

        mutex_enter(&port->lock);

        port->status = 0;
        port->change = UPS_C_CONNECT_STATUS | UPS_C_PORT_RESET;

        /*
         * Mark our port has having changed state. Uhub will then fetch
         * status/change and see it needs to perform a detach.
         */
        p = xfer->ux_buf;
        memset(p, 0, xfer->ux_length);
        p[0] = __BIT(vfd->port); /* TODO-bitmap */
        xfer->ux_actlen = xfer->ux_length;
        xfer->ux_status = USBD_NORMAL_COMPLETION;

        usb_transfer_complete(xfer);
        vhci_port_flush(sc, port);

        mutex_exit(&port->lock);
done:
        mutex_exit(&sc->sc_lock);
}

static int
vhci_get_info(vhci_fd_t *vfd, struct vhci_ioc_get_info *args)
{
        vhci_softc_t *sc = vfd->softc;
        vhci_port_t *port;

        port = &sc->sc_port[vfd->port];

        args->nports = VHCI_NPORTS;
        args->port = vfd->port;
        mutex_enter(&port->lock);
        args->status = port->status;
        mutex_exit(&port->lock);
        args->addr = vfd->addr;

        return 0;
}

static int
vhci_set_port(vhci_fd_t *vfd, struct vhci_ioc_set_port *args)
{
        vhci_softc_t *sc = vfd->softc;

        if (args->port == 0 || args->port >= sc->sc_nports)
                return EINVAL;

        vfd->port = args->port;

        return 0;
}

static int
vhci_set_addr(vhci_fd_t *vfd, struct vhci_ioc_set_addr *args)
{
        if (args->addr >= VHCI_NADDRS)
                return EINVAL;

        vfd->addr = args->addr;

        return 0;
}

/* -------------------------------------------------------------------------- */

static dev_type_open(vhci_fd_open);

const struct cdevsw vhci_cdevsw = {
        .d_open = vhci_fd_open,
        .d_close = noclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = noioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER | D_MPSAFE
};

static int vhci_fd_ioctl(file_t *, u_long, void *);
static int vhci_fd_close(file_t *);
static int vhci_fd_read(struct file *, off_t *, struct uio *, kauth_cred_t, int);
static int vhci_fd_write(struct file *, off_t *, struct uio *, kauth_cred_t, int);

const struct fileops vhci_fileops = {
        .fo_read = vhci_fd_read,
        .fo_write = vhci_fd_write,
        .fo_ioctl = vhci_fd_ioctl,
        .fo_fcntl = fnullop_fcntl,
        .fo_poll = fnullop_poll,
        .fo_stat = fbadop_stat,
        .fo_close = vhci_fd_close,
        .fo_kqfilter = fnullop_kqfilter,
        .fo_restart = fnullop_restart,
        .fo_mmap = NULL,
};

static int
vhci_fd_open(dev_t dev, int flags, int type, struct lwp *l)
{
        vhci_softc_t *sc;
        vhci_fd_t *vfd;
        struct file *fp;
        int error, fd;

        sc = device_lookup_private(&vhci_cd, minor(dev));
        if (sc == NULL)
                return EXDEV;

        error = fd_allocfile(&fp, &fd);
        if (error)
                return error;

        vfd = kmem_alloc(sizeof(*vfd), KM_SLEEP);
        vfd->port = 1;
        vfd->addr = 0;
        vfd->softc = sc;

        return fd_clone(fp, fd, flags, &vhci_fileops, vfd);
}

static int
vhci_fd_close(file_t *fp)
{
        vhci_fd_t *vfd = fp->f_data;

        KASSERT(vfd != NULL);
        vhci_usb_detach(vfd);

        kmem_free(vfd, sizeof(*vfd));
        fp->f_data = NULL;

        return 0;
}

static int
vhci_fd_read(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
    int flags)
{
        vhci_fd_t *vfd = fp->f_data;
        vhci_softc_t *sc = vfd->softc;
        vhci_packet_list_t *pktlist;
        vhci_packet_t *pkt, *nxt;
        vhci_xfer_list_t vxferlist;
        vhci_xfer_t *vxfer;
        vhci_port_t *port;
        int error = 0;
        uint8_t *buf;
        size_t size;

        if (uio->uio_resid == 0)
                return 0;
        port = &sc->sc_port[vfd->port];
        pktlist = &port->endpoints[vfd->addr].host_to_usb;

        TAILQ_INIT(&vxferlist);

        mutex_enter(&port->lock);

        if (!(port->status & UPS_PORT_ENABLED)) {
                error = ENOBUFS;
                goto out;
        }

        TAILQ_FOREACH_SAFE(pkt, pktlist, portlist, nxt) {
                vxfer = pkt->vxfer;
                buf = pkt->buf + pkt->cursor;

                KASSERT(pkt->size >= pkt->cursor);
                size = uimin(uio->uio_resid, pkt->size - pkt->cursor);

                KASSERT(vxfer->xfer.ux_status == USBD_IN_PROGRESS);

                error = uiomove(buf, size, uio);
                if (error) {
                        DPRINTF("%s: error = %d\n", __func__, error);
                        goto out;
                }

                pkt->cursor += size;

                if (pkt->cursor == pkt->size) {
                        vhci_pkt_destroy(sc, pkt);
                        if (vxfer->npkts == 0) {
                                TAILQ_INSERT_TAIL(&vxferlist, vxfer, freelist);
                        }
                }
                if (uio->uio_resid == 0) {
                        break;
                }
        }

out:
        mutex_exit(&port->lock);

        while ((vxfer = TAILQ_FIRST(&vxferlist)) != NULL) {
                struct usbd_xfer *xfer = &vxfer->xfer;
                TAILQ_REMOVE(&vxferlist, vxfer, freelist);

                mutex_enter(&sc->sc_lock);
                xfer->ux_actlen = xfer->ux_length;
                xfer->ux_status = USBD_NORMAL_COMPLETION;
                usb_transfer_complete(xfer);
                mutex_exit(&sc->sc_lock);
        }

        return error;
}

static int
vhci_fd_write(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
    int flags)
{
        vhci_fd_t *vfd = fp->f_data;
        vhci_softc_t *sc = vfd->softc;
        vhci_packet_list_t *pktlist;
        vhci_packet_t *pkt, *nxt;
        vhci_xfer_list_t vxferlist;
        vhci_xfer_t *vxfer;
        vhci_port_t *port;
        int error = 0;
        uint8_t *buf;
        size_t pktsize, size;

        if (uio->uio_resid == 0)
                return 0;
        port = &sc->sc_port[vfd->port];
        pktlist = &port->endpoints[vfd->addr].usb_to_host;

        TAILQ_INIT(&vxferlist);

        mutex_enter(&port->lock);

        if (!(port->status & UPS_PORT_ENABLED)) {
                error = ENOBUFS;
                goto out;
        }

        TAILQ_FOREACH_SAFE(pkt, pktlist, portlist, nxt) {
                vxfer = pkt->vxfer;
                buf = pkt->buf + pkt->cursor;

                pktsize = pkt->size;
                if (pkt->type.dat)
                        pktsize = ulmin(vxfer->resbuf.size, pktsize);

                KASSERT(pktsize >= pkt->cursor);
                size = uimin(uio->uio_resid, pktsize - pkt->cursor);

                KASSERT(vxfer->xfer.ux_status == USBD_IN_PROGRESS);

                error = uiomove(buf, size, uio);
                if (error) {
                        DPRINTF("%s: error = %d\n", __func__, error);
                        goto out;
                }

                pkt->cursor += size;

                if (pkt->cursor == pktsize) {
                        vhci_pkt_destroy(sc, pkt);
                        if (vxfer->npkts == 0) {
                                TAILQ_INSERT_TAIL(&vxferlist, vxfer, freelist);
                        }
                }
                if (uio->uio_resid == 0) {
                        break;
                }
        }

out:
        mutex_exit(&port->lock);

        while ((vxfer = TAILQ_FIRST(&vxferlist)) != NULL) {
                struct usbd_xfer *xfer = &vxfer->xfer;
                TAILQ_REMOVE(&vxferlist, vxfer, freelist);

                mutex_enter(&sc->sc_lock);
                xfer->ux_actlen = ulmin(vxfer->resbuf.size, xfer->ux_length);
                xfer->ux_status = USBD_NORMAL_COMPLETION;
                usb_transfer_complete(xfer);
                mutex_exit(&sc->sc_lock);
        }

        return error;
}

static int
vhci_fd_ioctl(file_t *fp, u_long cmd, void *data)
{
        vhci_fd_t *vfd = fp->f_data;

        KASSERT(vfd != NULL);

        switch (cmd) {
        case VHCI_IOC_GET_INFO:
                return vhci_get_info(vfd, data);
        case VHCI_IOC_SET_PORT:
                return vhci_set_port(vfd, data);
        case VHCI_IOC_SET_ADDR:
                return vhci_set_addr(vfd, data);
        case VHCI_IOC_USB_ATTACH:
                vhci_usb_attach(vfd);
                return 0;
        case VHCI_IOC_USB_DETACH:
                vhci_usb_detach(vfd);
                return 0;
        default:
                return EINVAL;
        }
}

/* -------------------------------------------------------------------------- */

static int vhci_match(device_t, cfdata_t, void *);
static void vhci_attach(device_t, device_t, void *);
static int vhci_activate(device_t, enum devact);

CFATTACH_DECL_NEW(vhci, sizeof(vhci_softc_t), vhci_match, vhci_attach,
    NULL, vhci_activate);

void
vhciattach(int nunits)
{
        struct cfdata *cf;
        int error;
        size_t i;

        error = config_cfattach_attach(vhci_cd.cd_name, &vhci_ca);
        if (error) {
                aprint_error("%s: unable to register cfattach\n",
                    vhci_cd.cd_name);
                (void)config_cfdriver_detach(&vhci_cd);
                return;
        }

        for (i = 0; i < VHCI_NBUSES; i++) {
                cf = kmem_alloc(sizeof(*cf), KM_SLEEP);
                cf->cf_name = vhci_cd.cd_name;
                cf->cf_atname = vhci_cd.cd_name;
                cf->cf_unit = i;
                cf->cf_fstate = FSTATE_STAR;
                config_attach_pseudo(cf);
        }
}

static int
vhci_activate(device_t self, enum devact act)
{
        vhci_softc_t *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

static int
vhci_match(device_t parent, cfdata_t match, void *aux)
{
        return 1;
}

static void
vhci_attach(device_t parent, device_t self, void *aux)
{
        vhci_softc_t *sc = device_private(self);
        vhci_port_t *port;
        uint8_t addr;
        size_t i;

        sc->sc_dev = self;
        sc->sc_bus.ub_revision = USBREV_2_0;
        sc->sc_bus.ub_hctype = USBHCTYPE_VHCI;
        sc->sc_bus.ub_busnum = device_unit(self);
        sc->sc_bus.ub_usedma = false;
        sc->sc_bus.ub_methods = &vhci_bus_methods;
        sc->sc_bus.ub_pipesize = sizeof(vhci_pipe_t);
        sc->sc_bus.ub_hcpriv = sc;
        sc->sc_dying = false;
        mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_SOFTUSB);

        sc->sc_nports = VHCI_NPORTS;
        for (i = 0; i < sc->sc_nports; i++) {
                port = &sc->sc_port[i];
                mutex_init(&port->lock, MUTEX_DEFAULT, IPL_SOFTUSB);
                for (addr = 0; addr < VHCI_NADDRS; addr++) {
                        TAILQ_INIT(&port->endpoints[addr].usb_to_host);
                        TAILQ_INIT(&port->endpoints[addr].host_to_usb);
                }
                kcov_remote_register(KCOV_REMOTE_VHCI,
                    KCOV_REMOTE_VHCI_ID(sc->sc_bus.ub_busnum, i));
        }

        sc->sc_child = config_found(self, &sc->sc_bus, usbctlprint, CFARGS_NONE);
}





























































































































































































































































































































































































































































































































   53 







   14 
   64 



   63 













   24 



















































































































































































































































































































































   10 






























































































































































































































































































































   10 
































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
/*        $NetBSD: if.h,v 1.300 2022/08/20 11:09:24 riastradh Exp $        */

/*-
 * Copyright (c) 1999, 2000, 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by William Studenmund and Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)if.h        8.3 (Berkeley) 2/9/95
 */

#ifndef _NET_IF_H_
#define _NET_IF_H_

#if !defined(_KERNEL) && !defined(_STANDALONE)
#include <stdbool.h>
#endif

#include <sys/featuretest.h>

/*
 * Length of interface external name, including terminating '\0'.
 * Note: this is the same size as a generic device's external name.
 */
#define IF_NAMESIZE 16

/*
 * Length of interface description, including terminating '\0'.
 */
#define        IFDESCRSIZE        64

#if defined(_NETBSD_SOURCE)

#include <sys/socket.h>
#include <sys/queue.h>
#include <sys/mutex.h>
#include <sys/hook.h>

#include <net/dlt.h>
#include <net/pfil.h>
#ifdef _KERNEL
#include <net/pktqueue.h>
#include <sys/pslist.h>
#include <sys/pserialize.h>
#include <sys/psref.h>
#include <sys/module_hook.h>
#endif

/*
 * Always include ALTQ glue here -- we use the ALTQ interface queue
 * structure even when ALTQ is not configured into the kernel so that
 * the size of struct ifnet does not changed based on the option.  The
 * ALTQ queue structure is API-compatible with the legacy ifqueue.
 */
#include <altq/if_altq.h>

/*
 * Structures defining a network interface, providing a packet
 * transport mechanism (ala level 0 of the PUP protocols).
 *
 * Each interface accepts output datagrams of a specified maximum
 * length, and provides higher level routines with input datagrams
 * received from its medium.
 *
 * Output occurs when the routine if_output is called, with four parameters:
 *        (*ifp->if_output)(ifp, m, dst, rt)
 * Here m is the mbuf chain to be sent and dst is the destination address.
 * The output routine encapsulates the supplied datagram if necessary,
 * and then transmits it on its medium.
 *
 * On input, each interface unwraps the data received by it, and either
 * places it on the input queue of a internetwork datagram routine
 * and posts the associated software interrupt, or passes the datagram to a raw
 * packet input routine.
 *
 * Routines exist for locating interfaces by their addresses
 * or for locating a interface on a certain network, as well as more general
 * routing and gateway routines maintaining information used to locate
 * interfaces.  These routines live in the files if.c and route.c
 */
#include <sys/time.h>

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#include "opt_gateway.h"
#endif

struct mbuf;
struct proc;
struct rtentry;
struct socket;
struct ether_header;
struct ifaddr;
struct ifnet;
struct rt_addrinfo;

#define        IFNAMSIZ        IF_NAMESIZE

/*
 * Structure describing a `cloning' interface.
 */
struct if_clone {
        LIST_ENTRY(if_clone) ifc_list;        /* on list of cloners */
        const char *ifc_name;                /* name of device, e.g. `gif' */
        size_t ifc_namelen;                /* length of name */

        int        (*ifc_create)(struct if_clone *, int);
        int        (*ifc_destroy)(struct ifnet *);
};

#define        IF_CLONE_INITIALIZER(name, create, destroy)                        \
        { { NULL, NULL }, name, sizeof(name) - 1, create, destroy }

/*
 * Structure used to query names of interface cloners.
 */
struct if_clonereq {
        int        ifcr_total;                /* total cloners (out) */
        int        ifcr_count;                /* room for this many in user buffer */
        char        *ifcr_buffer;                /* buffer for cloner names */
};

/*
 * Structure defining statistics and other data kept regarding a network
 * interface.
 *
 * Only used for exporting data from the interface.
 */
struct if_data {
        /* generic interface information */
        u_char        ifi_type;                /* ethernet, tokenring, etc. */
        u_char        ifi_addrlen;                /* media address length */
        u_char        ifi_hdrlen;                /* media header length */
        int        ifi_link_state;                /* current link state */
        uint64_t ifi_mtu;                /* maximum transmission unit */
        uint64_t ifi_metric;                /* routing metric (external only) */
        uint64_t ifi_baudrate;                /* linespeed */
        /* volatile statistics */
        uint64_t ifi_ipackets;                /* packets received on interface */
        uint64_t ifi_ierrors;                /* input errors on interface */
        uint64_t ifi_opackets;                /* packets sent on interface */
        uint64_t ifi_oerrors;                /* output errors on interface */
        uint64_t ifi_collisions;        /* collisions on csma interfaces */
        uint64_t ifi_ibytes;                /* total number of octets received */
        uint64_t ifi_obytes;                /* total number of octets sent */
        uint64_t ifi_imcasts;                /* packets received via multicast */
        uint64_t ifi_omcasts;                /* packets sent via multicast */
        uint64_t ifi_iqdrops;                /* dropped on input, this interface */
        uint64_t ifi_noproto;                /* destined for unsupported protocol */
        struct        timespec ifi_lastchange;/* last operational state change */
};

/*
 * Values for if_link_state.
 */
#define        LINK_STATE_UNKNOWN        0        /* link invalid/unknown */
#define        LINK_STATE_DOWN                1        /* link is down */
#define        LINK_STATE_UP                2        /* link is up */

/*
 * Status bit descriptions for the various interface types.
 */
struct if_status_description {
        unsigned char        ifs_type;
        unsigned char        ifs_state;
        const char        *ifs_string;
};

#define LINK_STATE_DESC_MATCH(_ifs, _t, _s)                                \
        (((_ifs)->ifs_type == (_t) || (_ifs)->ifs_type == 0) &&                \
            (_ifs)->ifs_state == (_s))

#define LINK_STATE_DESCRIPTIONS {                                        \
        { IFT_ETHER, LINK_STATE_DOWN, "no carrier" },                        \
        { IFT_IEEE80211, LINK_STATE_DOWN, "no network" },                \
        { IFT_PPP, LINK_STATE_DOWN, "no carrier" },                        \
        { IFT_CARP, LINK_STATE_DOWN, "backup" },                        \
        { IFT_CARP, LINK_STATE_UP, "master" },                                \
        { 0, LINK_STATE_UP, "active" },                                        \
        { 0, LINK_STATE_UNKNOWN, "unknown" },                                \
        { 0, LINK_STATE_DOWN, "down" },                                        \
        { 0, 0, NULL }                                                        \
}

/*
 * Structure defining a queue for a network interface.
 */
struct ifqueue {
        struct                mbuf *ifq_head;
        struct                mbuf *ifq_tail;
        int                ifq_len;
        int                ifq_maxlen;
        int                ifq_drops;
        kmutex_t        *ifq_lock;
};

#ifdef _KERNEL
#include <sys/percpu.h>
#include <sys/callout.h>
#include <sys/rwlock.h>
#include <sys/workqueue.h>

#endif /* _KERNEL */

/*
 * Structure defining a queue for a network interface.
 *
 * (Would like to call this struct ``if'', but C isn't PL/1.)
 */
TAILQ_HEAD(ifnet_head, ifnet);                /* the actual queue head */

struct bridge_softc;
struct bridge_iflist;
struct callout;
struct krwlock;
struct if_percpuq;
struct if_deferred_start;
struct in6_multi;

typedef unsigned short if_index_t;

/*
 * Interface.  Field markings and the corresponding locks:
 *
 * i:        IFNET_LOCK (a.k.a., if_ioctl_lock)
 * q:        ifq_lock (struct ifaltq)
 * a:        if_afdata_lock
 * 6:        in6_multilock (global lock)
 * ::        unlocked, stable
 * ?:        unknown, maybe unsafe
 *
 * Lock order: IFNET_LOCK => in6_multilock => if_afdata_lock => ifq_lock
 *   Note that currently if_afdata_lock and ifq_lock aren't held
 *   at the same time, but define the order anyway.
 *
 * Lock order of IFNET_LOCK with other locks:
 *     softnet_lock => solock => IFNET_LOCK => ND6_LOCK, in_multilock
 */
typedef struct ifnet {
        void                *if_softc;        /* :: lower-level data for this if */
        /* DEPRECATED. Keep it to avoid breaking kvm(3) users */
        TAILQ_ENTRY(ifnet)
                        if_list;        /* i: all struct ifnets are chained */
        TAILQ_HEAD(, ifaddr)
                        if_addrlist;        /* i: linked list of addresses per if */
        char                if_xname[IFNAMSIZ];
                                        /* :: external name (name + unit) */
        int                if_pcount;        /* i: number of promiscuous listeners */
        struct bpf_if        *if_bpf;        /* :: packet filter structure */
        if_index_t        if_index;        /* :: numeric abbreviation for this if */
        short                if_timer;        /* ?: time 'til if_slowtimo called */
        unsigned short        if_flags;        /* i: up/down, broadcast, etc. */
        short                if_extflags;        /* :: if_output MP-safe, etc. */
        u_char                if_type;        /* :: ethernet, tokenring, etc. */
        u_char                if_addrlen;        /* :: media address length */
        u_char                if_hdrlen;        /* :: media header length */
        /* XXX audit :? fields here. */
        int                if_link_state;        /* :? current link state */
        uint64_t        if_mtu;                /* :? maximum transmission unit */
        uint64_t        if_metric;        /* :? routing metric (external only) */
        uint64_t        if_baudrate;        /* :? linespeed */
        struct timespec        if_lastchange;        /* :? last operational state change */
#ifdef _KERNEL
        percpu_t        *if_stats;        /* :: statistics */
#else
        void                *if_stats;        /* opaque to user-space */
#endif /* _KERNEL */
        /*
         * Procedure handles.  If you add more of these, don't forget the
         * corresponding NULL stub in if.c.
         */
        int                (*if_output)        /* :: output routine (enqueue) */
                            (struct ifnet *, struct mbuf *, const struct sockaddr *,
                             const struct rtentry *);
        void                (*_if_input)        /* :: input routine (from h/w driver) */
                            (struct ifnet *, struct mbuf *);
        void                (*if_start)        /* :: initiate output routine */
                            (struct ifnet *);
        int                (*if_transmit)        /* :: output routine, must be MP-safe */
                            (struct ifnet *, struct mbuf *);
        int                (*if_ioctl)        /* :: ioctl routine */
                            (struct ifnet *, u_long, void *);
        int                (*if_init)        /* :: init routine */
                            (struct ifnet *);
        void                (*if_stop)        /* :: stop routine */
                            (struct ifnet *, int);
        void                (*if_slowtimo)        /* :: timer routine */
                            (struct ifnet *);
#define        if_watchdog        if_slowtimo
        void                (*if_drain)        /* :: routine to release resources */
                            (struct ifnet *);
        void                (*if_bpf_mtap)        /* :: bpf routine */
                            (struct bpf_if *, struct mbuf *, u_int);
        struct ifaltq        if_snd;                /* q: output queue (includes altq) */
        struct ifaddr        *if_dl;                /* i: identity of this interface. */
        const struct sockaddr_dl
                        *if_sadl;        /* i: pointer to sockaddr_dl of if_dl */
        /*
         * May be NULL.  If not NULL, it is the address assigned
         * to the interface by the manufacturer, so it very likely
         * to be unique.  It MUST NOT be deleted.  It is highly
         * suitable for deriving the EUI64 for the interface.
         */
        struct ifaddr        *if_hwdl;        /* i: h/w identity */
        const uint8_t        *if_broadcastaddr;
                                        /* :: linklevel broadcast bytestring */
        struct bridge_softc
                        *if_bridge;        /* i: bridge glue */
        struct bridge_iflist
                        *if_bridgeif;        /* i: shortcut to interface list entry */
        int                if_dlt;                /* :: data link type (<net/dlt.h>) */
        pfil_head_t *        if_pfil;        /* :: filtering point */
        uint64_t        if_capabilities;
                                        /* i: interface capabilities */
        uint64_t        if_capenable;        /* i: capabilities enabled */
        union {
                void *                carp_s;        /* carp structure (used by !carp ifs) */
                struct ifnet        *carp_d;/* ptr to carpdev (used by carp ifs) */
        }                if_carp_ptr;        /* ?: */
#define if_carp                if_carp_ptr.carp_s
#define if_carpdev        if_carp_ptr.carp_d
        /*
         * These are pre-computed based on an interfaces enabled
         * capabilities, for speed elsewhere.
         */
        int                if_csum_flags_tx;
                                        /* i: M_CSUM_* flags for Tx */
        int                if_csum_flags_rx;
                                        /* i: M_CSUM_* flags for Rx */

        void                *if_afdata[AF_MAX];
                                        /* a: */
        struct mowner        *if_mowner;        /* ?: who owns mbufs for this interface */

        void                *if_lagg;        /* :: lagg or agr structure */
        void                *if_npf_private;/* ?: associated NPF context */

        /*
         * pf specific data, used only when #if NPF > 0.
         */
        void                *if_pf_kif;        /* ?: pf interface abstraction */
        void                *if_pf_groups;        /* ?: pf interface groups */
        /*
         * During an ifnet's lifetime, it has only one if_index, but
         * and if_index is not sufficient to identify an ifnet
         * because during the lifetime of the system, many ifnets may occupy a
         * given if_index.  Let us tell different ifnets at the same
         * if_index apart by their if_index_gen, a unique number that each ifnet
         * is assigned when it if_attach()s.  Now, the kernel can use the
         * pair (if_index, if_index_gen) as a weak reference to an ifnet.
         */
        uint64_t        if_index_gen;        /* :: generation number for the ifnet
                                         * at if_index: if two ifnets' index
                                         * and generation number are both the
                                         * same, they are the same ifnet.
                                         */
        struct sysctllog
                        *if_sysctl_log;        /* :: */
        int                (*if_initaddr)  /* :: */
                            (struct ifnet *, struct ifaddr *, bool);
        int                (*if_setflags)        /* :: */
                            (struct ifnet *, const u_short);
        kmutex_t        *if_ioctl_lock;        /* :: */
        char                *if_description;        /* i: interface description */
#ifdef _KERNEL /* XXX kvm(3) */
        struct if_slowtimo_data *if_slowtimo_data; /* :: */
        struct krwlock        *if_afdata_lock;/* :: */
        struct if_percpuq
                        *if_percpuq;        /* :: we should remove it in the future */
        struct work        if_link_work;        /* q: linkage on link state work queue */
        uint16_t        if_link_queue;        /* q: masked link state change queue */
                                        /* q: is link state work scheduled? */
        bool                if_link_scheduled;
        struct pslist_entry
                        if_pslist_entry;/* i: */
        struct psref_target
                        if_psref;        /* :: */
        struct pslist_head
                        if_addr_pslist;        /* i: */
        struct if_deferred_start
                        *if_deferred_start;
                                        /* :: */
        /* XXX should be protocol independent */
        LIST_HEAD(, in6_multi)
                        if_multiaddrs;        /* 6: */
        khook_list_t        *if_linkstate_hooks;        /* :: */
#endif
} ifnet_t;

#include <net/if_stats.h>

#define        if_name(ifp)        ((ifp)->if_xname)

#define        IFF_UP                0x0001                /* interface is up */
#define        IFF_BROADCAST        0x0002                /* broadcast address valid */
#define        IFF_DEBUG        0x0004                /* turn on debugging */
#define        IFF_LOOPBACK        0x0008                /* is a loopback net */
#define        IFF_POINTOPOINT        0x0010                /* interface is point-to-point link */
/*                        0x0020                   was IFF_NOTRAILERS */
#define        IFF_RUNNING        0x0040                /* resources allocated */
#define        IFF_NOARP        0x0080                /* no address resolution protocol */
#define        IFF_PROMISC        0x0100                /* receive all packets */
#define        IFF_ALLMULTI        0x0200                /* receive all multicast packets */
#define        IFF_OACTIVE        0x0400                /* transmission in progress */
#define        IFF_SIMPLEX        0x0800                /* can't hear own transmissions */
#define        IFF_LINK0        0x1000                /* per link layer defined bit */
#define        IFF_LINK1        0x2000                /* per link layer defined bit */
#define        IFF_LINK2        0x4000                /* per link layer defined bit */
#define        IFF_MULTICAST        0x8000                /* supports multicast */

#define        IFEF_MPSAFE                        __BIT(0)        /* handlers can run in parallel (see below) */

/*
 * The guidelines for converting an interface to IFEF_MPSAFE are as follows
 *
 * Enabling IFEF_MPSAFE on an interface suppresses taking KERNEL_LOCK when
 * calling the following handlers:
 * - if_start
 *   - Note that if_transmit is always called without KERNEL_LOCK
 * - if_output
 * - if_ioctl
 * - if_init
 * - if_stop
 *
 * This means that an interface with IFEF_MPSAFE must make the above handlers
 * MP-safe or take KERNEL_LOCK by itself inside handlers that aren't MP-safe
 * yet.
 *
 * There are some additional restrictions to access member variables of struct
 * ifnet:
 * - if_flags
 *   - Must be updated with holding IFNET_LOCK
 *   - You cannot use the flag in Tx/Rx paths anymore because there is no
 *     synchronization on the flag except for IFNET_LOCK
 *   - Note that IFNET_LOCK can't be taken in softint because it's known
 *     that it causes a deadlock
 *     - Some synchronization mechanisms such as pserialize_perform are called
 *       with IFNET_LOCK and also require context switches on every CPUs
 *       that mean softints finish so trying to take IFNET_LOCK in softint
 *       might block on IFNET_LOCK and prevent such synchronization mechanisms
 *       from being completed
 *     - Currently the deadlock occurs only if NET_MPSAFE is enabled, however,
 *       we should deal with the restriction because NET_MPSAFE will be enabled
 *       by default in the future
 * - if_watchdog and if_timer
 *   - The watchdog framework works only for non-IFEF_MPSAFE interfaces
 *     that rely on KERNEL_LOCK
 *   - Interfaces with IFEF_MPSAFE have to provide its own watchdog mechanism
 *     if needed
 *     - Keep if_watchdog NULL when calling if_attach
 */

#ifdef _KERNEL
static __inline bool
if_is_mpsafe(struct ifnet *ifp)
{

        return ((ifp->if_extflags & IFEF_MPSAFE) != 0);
}

static __inline int
if_output_lock(struct ifnet *cifp, struct ifnet *ifp, struct mbuf *m,
    const struct sockaddr *dst, const struct rtentry *rt)
{

        if (if_is_mpsafe(cifp)) {
                return (*cifp->if_output)(ifp, m, dst, rt);
        } else {
                int ret;

                KERNEL_LOCK(1, NULL);
                ret = (*cifp->if_output)(ifp, m, dst, rt);
                KERNEL_UNLOCK_ONE(NULL);
                return ret;
        }
}

static __inline void
if_start_lock(struct ifnet *ifp)
{

        if (if_is_mpsafe(ifp)) {
                (*ifp->if_start)(ifp);
        } else {
                KERNEL_LOCK(1, NULL);
                (*ifp->if_start)(ifp);
                KERNEL_UNLOCK_ONE(NULL);
        }
}

#define KERNEL_LOCK_IF_IFP_MPSAFE(ifp)                                        \
        do { if (if_is_mpsafe(ifp)) { KERNEL_LOCK(1, NULL); } } while (0)
#define KERNEL_UNLOCK_IF_IFP_MPSAFE(ifp)                                \
        do { if (if_is_mpsafe(ifp)) { KERNEL_UNLOCK_ONE(NULL); } } while (0)

#define KERNEL_LOCK_UNLESS_IFP_MPSAFE(ifp)                                \
        do { if (!if_is_mpsafe(ifp)) { KERNEL_LOCK(1, NULL); } } while (0)
#define KERNEL_UNLOCK_UNLESS_IFP_MPSAFE(ifp)                                \
        do { if (!if_is_mpsafe(ifp)) { KERNEL_UNLOCK_ONE(NULL); } } while (0)

#ifdef _KERNEL_OPT
#include "opt_net_mpsafe.h"
#endif

/* XXX explore a better place to define */
#ifdef NET_MPSAFE

#define KERNEL_LOCK_UNLESS_NET_MPSAFE()                do { } while (0)
#define KERNEL_UNLOCK_UNLESS_NET_MPSAFE()        do { } while (0)

#define SOFTNET_LOCK_UNLESS_NET_MPSAFE()        do { } while (0)
#define SOFTNET_UNLOCK_UNLESS_NET_MPSAFE()        do { } while (0)

#define SOFTNET_LOCK_IF_NET_MPSAFE()                                        \
        do { mutex_enter(softnet_lock); } while (0)
#define SOFTNET_UNLOCK_IF_NET_MPSAFE()                                        \
        do { mutex_exit(softnet_lock); } while (0)

#else /* NET_MPSAFE */

#define KERNEL_LOCK_UNLESS_NET_MPSAFE()                                        \
        do { KERNEL_LOCK(1, NULL); } while (0)
#define KERNEL_UNLOCK_UNLESS_NET_MPSAFE()                                \
        do { KERNEL_UNLOCK_ONE(NULL); } while (0)

#define SOFTNET_LOCK_UNLESS_NET_MPSAFE()                                \
        do { mutex_enter(softnet_lock); } while (0)
#define SOFTNET_UNLOCK_UNLESS_NET_MPSAFE()                                \
        do { mutex_exit(softnet_lock); } while (0)

#define SOFTNET_LOCK_IF_NET_MPSAFE()                do { } while (0)
#define SOFTNET_UNLOCK_IF_NET_MPSAFE()                do { } while (0)

#endif /* NET_MPSAFE */

#define SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE()                                \
        do {                                                                \
                SOFTNET_LOCK_UNLESS_NET_MPSAFE();                        \
                KERNEL_LOCK_UNLESS_NET_MPSAFE();                        \
        } while (0)

#define SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE()                        \
        do {                                                                \
                KERNEL_UNLOCK_UNLESS_NET_MPSAFE();                        \
                SOFTNET_UNLOCK_UNLESS_NET_MPSAFE();                        \
        } while (0)

#endif /* _KERNEL */

#define        IFFBITS \
    "\020\1UP\2BROADCAST\3DEBUG\4LOOPBACK\5POINTOPOINT" \
    "\7RUNNING\10NOARP\11PROMISC\12ALLMULTI\13OACTIVE\14SIMPLEX" \
    "\15LINK0\16LINK1\17LINK2\20MULTICAST"

/* flags set internally only: */
#define        IFF_CANTCHANGE \
        (IFF_BROADCAST|IFF_POINTOPOINT|IFF_RUNNING|IFF_OACTIVE|\
            IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_PROMISC)

/*
 * Some convenience macros used for setting ifi_baudrate.
 */
#define        IF_Kbps(x)        ((x) * 1000ULL)                        /* kilobits/sec. */
#define        IF_Mbps(x)        (IF_Kbps((x) * 1000ULL))        /* megabits/sec. */
#define        IF_Gbps(x)        (IF_Mbps((x) * 1000ULL))        /* gigabits/sec. */

/* Capabilities that interfaces can advertise. */
                                        /* 0x01 .. 0x40 were previously used */
#define        IFCAP_TSOv4                0x00080        /* can do TCPv4 segmentation offload */
#define        IFCAP_CSUM_IPv4_Rx        0x00100        /* can do IPv4 header checksums (Rx) */
#define        IFCAP_CSUM_IPv4_Tx        0x00200        /* can do IPv4 header checksums (Tx) */
#define        IFCAP_CSUM_TCPv4_Rx        0x00400        /* can do IPv4/TCP checksums (Rx) */
#define        IFCAP_CSUM_TCPv4_Tx        0x00800        /* can do IPv4/TCP checksums (Tx) */
#define        IFCAP_CSUM_UDPv4_Rx        0x01000        /* can do IPv4/UDP checksums (Rx) */
#define        IFCAP_CSUM_UDPv4_Tx        0x02000        /* can do IPv4/UDP checksums (Tx) */
#define        IFCAP_CSUM_TCPv6_Rx        0x04000        /* can do IPv6/TCP checksums (Rx) */
#define        IFCAP_CSUM_TCPv6_Tx        0x08000        /* can do IPv6/TCP checksums (Tx) */
#define        IFCAP_CSUM_UDPv6_Rx        0x10000        /* can do IPv6/UDP checksums (Rx) */
#define        IFCAP_CSUM_UDPv6_Tx        0x20000        /* can do IPv6/UDP checksums (Tx) */
#define        IFCAP_TSOv6                0x40000        /* can do TCPv6 segmentation offload */
#define        IFCAP_LRO                0x80000        /* can do Large Receive Offload */
#define        IFCAP_MASK                0xfff80 /* currently valid capabilities */

#define        IFCAPBITS                \
        "\020"                        \
        "\10TSO4"                \
        "\11IP4CSUM_Rx"                \
        "\12IP4CSUM_Tx"                \
        "\13TCP4CSUM_Rx"        \
        "\14TCP4CSUM_Tx"        \
        "\15UDP4CSUM_Rx"        \
        "\16UDP4CSUM_Tx"        \
        "\17TCP6CSUM_Rx"        \
        "\20TCP6CSUM_Tx"        \
        "\21UDP6CSUM_Rx"        \
        "\22UDP6CSUM_Tx"        \
        "\23TSO6"                \
        "\24LRO"                \

#define        IF_AFDATA_LOCK_INIT(ifp)        \
        do {(ifp)->if_afdata_lock = rw_obj_alloc();} while (0)

#define        IF_AFDATA_LOCK_DESTROY(ifp)        rw_obj_free((ifp)->if_afdata_lock)

#define        IF_AFDATA_WLOCK(ifp)        rw_enter((ifp)->if_afdata_lock, RW_WRITER)
#define        IF_AFDATA_RLOCK(ifp)        rw_enter((ifp)->if_afdata_lock, RW_READER)
#define        IF_AFDATA_WUNLOCK(ifp)        rw_exit((ifp)->if_afdata_lock)
#define        IF_AFDATA_RUNLOCK(ifp)        rw_exit((ifp)->if_afdata_lock)
#define        IF_AFDATA_LOCK(ifp)        IF_AFDATA_WLOCK(ifp)
#define        IF_AFDATA_UNLOCK(ifp)        IF_AFDATA_WUNLOCK(ifp)
#define        IF_AFDATA_TRYLOCK(ifp)        rw_tryenter((ifp)->if_afdata_lock, RW_WRITER)

#define        IF_AFDATA_LOCK_ASSERT(ifp)        \
        KASSERT(rw_lock_held((ifp)->if_afdata_lock))
#define        IF_AFDATA_RLOCK_ASSERT(ifp)        \
        KASSERT(rw_read_held((ifp)->if_afdata_lock))
#define        IF_AFDATA_WLOCK_ASSERT(ifp)        \
        KASSERT(rw_write_held((ifp)->if_afdata_lock))

/*
 * Output queues (ifp->if_snd) and internetwork datagram level (pup level 1)
 * input routines have queues of messages stored on ifqueue structures
 * (defined above).  Entries are added to and deleted from these structures
 * by these macros, which should be called with ipl raised to splnet().
 */
#define        IF_QFULL(ifq)                ((ifq)->ifq_len >= (ifq)->ifq_maxlen)
#define        IF_DROP(ifq)                ((ifq)->ifq_drops++)
#define        IF_ENQUEUE(ifq, m) do { \
        (m)->m_nextpkt = 0; \
        if ((ifq)->ifq_tail == 0) \
                (ifq)->ifq_head = m; \
        else \
                (ifq)->ifq_tail->m_nextpkt = m; \
        (ifq)->ifq_tail = m; \
        (ifq)->ifq_len++; \
} while (/*CONSTCOND*/0)
#define        IF_PREPEND(ifq, m) do { \
        (m)->m_nextpkt = (ifq)->ifq_head; \
        if ((ifq)->ifq_tail == 0) \
                (ifq)->ifq_tail = (m); \
        (ifq)->ifq_head = (m); \
        (ifq)->ifq_len++; \
} while (/*CONSTCOND*/0)
#define        IF_DEQUEUE(ifq, m) do { \
        (m) = (ifq)->ifq_head; \
        if (m) { \
                if (((ifq)->ifq_head = (m)->m_nextpkt) == 0) \
                        (ifq)->ifq_tail = 0; \
                (m)->m_nextpkt = 0; \
                (ifq)->ifq_len--; \
        } \
} while (/*CONSTCOND*/0)
#define        IF_POLL(ifq, m)                ((m) = (ifq)->ifq_head)
#define        IF_PURGE(ifq)                                                        \
do {                                                                        \
        struct mbuf *__m0;                                                \
                                                                        \
        for (;;) {                                                        \
                IF_DEQUEUE((ifq), __m0);                                \
                if (__m0 == NULL)                                        \
                        break;                                                \
                else                                                        \
                        m_freem(__m0);                                        \
        }                                                                \
} while (/*CONSTCOND*/ 0)
#define        IF_IS_EMPTY(ifq)        ((ifq)->ifq_len == 0)

#ifndef IFQ_MAXLEN
#define        IFQ_MAXLEN        256
#endif
#define        IFNET_SLOWHZ        1                /* granularity is 1 second */

/*
 * Structure defining statistics and other data kept regarding an address
 * on a network interface.
 */
struct ifaddr_data {
        int64_t        ifad_inbytes;
        int64_t        ifad_outbytes;
};

/*
 * The ifaddr structure contains information about one address
 * of an interface.  They are maintained by the different address families,
 * are allocated and attached when an address is set, and are linked
 * together so all addresses for an interface can be located.
 */
struct ifaddr {
        struct        sockaddr *ifa_addr;        /* address of interface */
        struct        sockaddr *ifa_dstaddr;        /* other end of p-to-p link */
#define        ifa_broadaddr        ifa_dstaddr        /* broadcast address interface */
        struct        sockaddr *ifa_netmask;        /* used to determine subnet */
        struct        ifnet *ifa_ifp;                /* back-pointer to interface */
        TAILQ_ENTRY(ifaddr) ifa_list;        /* list of addresses for interface */
        struct        ifaddr_data        ifa_data;        /* statistics on the address */
        void        (*ifa_rtrequest)        /* check or clean routes (+ or -)'d */
                        (int, struct rtentry *, const struct rt_addrinfo *);
        u_int        ifa_flags;                /* mostly rt_flags for cloning */
        int        ifa_refcnt;                /* count of references */
        int        ifa_metric;                /* cost of going out this interface */
        struct ifaddr        *(*ifa_getifa)(struct ifaddr *,
                                       const struct sockaddr *);
        uint32_t        *ifa_seqno;
        int16_t        ifa_preference;        /* preference level for this address */
#ifdef _KERNEL
        struct pslist_entry     ifa_pslist_entry;
        struct psref_target        ifa_psref;
#endif
};
#define        IFA_ROUTE        RTF_UP        /* (0x01) route installed */
#define        IFA_DESTROYING        0x2

/*
 * Message format for use in obtaining information about interfaces from
 * sysctl and the routing socket.  We need to force 64-bit alignment if we
 * aren't using compatibility definitions.
 */
#if !defined(_KERNEL) || !defined(COMPAT_RTSOCK)
#define        __align64        __aligned(sizeof(uint64_t))
#else
#define        __align64
#endif
struct if_msghdr {
        u_short        ifm_msglen __align64;
                                /* to skip over non-understood messages */
        u_char        ifm_version;        /* future binary compatibility */
        u_char        ifm_type;        /* message type */
        int        ifm_addrs;        /* like rtm_addrs */
        int        ifm_flags;        /* value of if_flags */
        u_short        ifm_index;        /* index for associated ifp */
        struct        if_data ifm_data __align64;
                                /* statistics and other data about if */
};

/*
 * Message format for use in obtaining information about interface addresses
 * from sysctl and the routing socket.
 */
struct ifa_msghdr {
        u_short        ifam_msglen __align64;
                                /* to skip over non-understood messages */
        u_char        ifam_version;        /* future binary compatibility */
        u_char        ifam_type;        /* message type */
        u_short        ifam_index;        /* index for associated ifp */
        int        ifam_flags;        /* value of ifa_flags */
        int        ifam_addrs;        /* like rtm_addrs */
        pid_t        ifam_pid;        /* identify sender */
        int        ifam_addrflags;        /* family specific address flags */
        int        ifam_metric;        /* value of ifa_metric */
};

/*
 * Message format announcing the arrival or departure of a network interface.
 */
struct if_announcemsghdr {
        u_short        ifan_msglen __align64;
                                /* to skip over non-understood messages */
        u_char        ifan_version;        /* future binary compatibility */
        u_char        ifan_type;        /* message type */
        u_short        ifan_index;        /* index for associated ifp */
        char        ifan_name[IFNAMSIZ]; /* if name, e.g. "en0" */
        u_short        ifan_what;        /* what type of announcement */
};

#define        IFAN_ARRIVAL        0        /* interface arrival */
#define        IFAN_DEPARTURE        1        /* interface departure */

#undef __align64

/*
 * Interface request structure used for socket
 * ioctl's.  All interface ioctl's must have parameter
 * definitions which begin with ifr_name.  The
 * remainder may be interface specific.
 */
struct        ifreq {
        char        ifr_name[IFNAMSIZ];                /* if name, e.g. "en0" */
        union {
                struct        sockaddr ifru_addr;
                struct        sockaddr ifru_dstaddr;
                struct        sockaddr ifru_broadaddr;
                struct        sockaddr_storage ifru_space;
                short        ifru_flags;
                int        ifru_addrflags;
                int        ifru_metric;
                int        ifru_mtu;
                int        ifru_dlt;
                u_int        ifru_value;
                void *        ifru_data;
                struct {
                        uint32_t        b_buflen;
                        void                *b_buf;
                } ifru_b;
        } ifr_ifru;
#define        ifr_addr        ifr_ifru.ifru_addr        /* address */
#define        ifr_dstaddr        ifr_ifru.ifru_dstaddr        /* other end of p-to-p link */
#define        ifr_broadaddr        ifr_ifru.ifru_broadaddr        /* broadcast address */
#define        ifr_space        ifr_ifru.ifru_space        /* sockaddr_storage */
#define        ifr_flags        ifr_ifru.ifru_flags        /* flags */
#define        ifr_addrflags        ifr_ifru.ifru_addrflags        /* addr flags */
#define        ifr_metric        ifr_ifru.ifru_metric        /* metric */
#define        ifr_mtu                ifr_ifru.ifru_mtu        /* mtu */
#define        ifr_dlt                ifr_ifru.ifru_dlt        /* data link type (DLT_*) */
#define        ifr_value        ifr_ifru.ifru_value        /* generic value */
#define        ifr_media        ifr_ifru.ifru_metric        /* media options (overload) */
#define        ifr_data        ifr_ifru.ifru_data        /* for use by interface
                                                 * XXX deprecated
                                                 */
#define        ifr_buf                ifr_ifru.ifru_b.b_buf        /* new interface ioctls */
#define        ifr_buflen        ifr_ifru.ifru_b.b_buflen
#define        ifr_index        ifr_ifru.ifru_value        /* interface index, BSD */
#define        ifr_ifindex        ifr_index                /* interface index, linux */
};

#ifdef _KERNEL
#define        ifreq_setdstaddr        ifreq_setaddr
#define        ifreq_setbroadaddr        ifreq_setaddr
#define        ifreq_getdstaddr        ifreq_getaddr
#define        ifreq_getbroadaddr        ifreq_getaddr

static __inline const struct sockaddr *
/*ARGSUSED*/
ifreq_getaddr(u_long cmd, const struct ifreq *ifr)
{
        return &ifr->ifr_addr;
}
#endif /* _KERNEL */

struct ifcapreq {
        char                ifcr_name[IFNAMSIZ];        /* if name, e.g. "en0" */
        uint64_t        ifcr_capabilities;        /* supported capabiliites */
        uint64_t        ifcr_capenable;                /* capabilities enabled */
};

struct ifaliasreq {
        char        ifra_name[IFNAMSIZ];                /* if name, e.g. "en0" */
        struct        sockaddr ifra_addr;
        struct        sockaddr ifra_dstaddr;
#define        ifra_broadaddr        ifra_dstaddr
        struct        sockaddr ifra_mask;
};

struct ifdatareq {
        char        ifdr_name[IFNAMSIZ];                /* if name, e.g. "en0" */
        struct        if_data ifdr_data;
};

struct ifmediareq {
        char        ifm_name[IFNAMSIZ];        /* if name, e.g. "en0" */
        int        ifm_current;                /* IFMWD: current media options */
        int        ifm_mask;                /* IFMWD: don't care mask */
        int        ifm_status;                /* media status */
        int        ifm_active;                /* IFMWD: active options */
        int        ifm_count;                /* # entries in ifm_ulist
                                           array */
        int        *ifm_ulist;                /* array of ifmedia word */
};


struct  ifdrv {
        char                ifd_name[IFNAMSIZ];        /* if name, e.g. "en0" */
        unsigned long        ifd_cmd;
        size_t                ifd_len;
        void                *ifd_data;
};
#define IFLINKSTR_QUERYLEN        0x01
#define IFLINKSTR_UNSET                0x02

/*
 * Structure used in SIOCGIFCONF request.
 * Used to retrieve interface configuration
 * for machine (useful for programs which
 * must know all networks accessible).
 */
struct        ifconf {
        int        ifc_len;                /* size of associated buffer */
        union {
                void *        ifcu_buf;
                struct        ifreq *ifcu_req;
        } ifc_ifcu;
#define        ifc_buf        ifc_ifcu.ifcu_buf        /* buffer address */
#define        ifc_req        ifc_ifcu.ifcu_req        /* array of structures returned */
};

/*
 * Structure for SIOC[AGD]LIFADDR
 */
struct if_laddrreq {
        char iflr_name[IFNAMSIZ];
        unsigned int flags;
#define IFLR_PREFIX        0x8000        /* in: prefix given  out: kernel fills id */
#define IFLR_ACTIVE        0x4000        /* in/out: link-layer address activation */
#define IFLR_FACTORY        0x2000        /* in/out: factory link-layer address */
        unsigned int prefixlen;                /* in/out */
        struct sockaddr_storage addr;        /* in/out */
        struct sockaddr_storage dstaddr; /* out */
};

/*
 * Structure for SIOC[SG]IFADDRPREF
 */
struct if_addrprefreq {
        char                        ifap_name[IFNAMSIZ];
        int16_t                        ifap_preference;        /* in/out */
        struct sockaddr_storage        ifap_addr;                /* in/out */
};

#include <net/if_arp.h>

#endif /* _NETBSD_SOURCE */

#ifdef _KERNEL
#ifdef ALTQ
#define IFQ_ENQUEUE(ifq, m, err)                                        \
do {                                                                        \
        mutex_enter((ifq)->ifq_lock);                                        \
        if (ALTQ_IS_ENABLED(ifq))                                        \
                ALTQ_ENQUEUE((ifq), (m), (err));                        \
        else {                                                                \
                if (IF_QFULL(ifq)) {                                        \
                        m_freem(m);                                        \
                        (err) = ENOBUFS;                                \
                } else {                                                \
                        IF_ENQUEUE((ifq), (m));                                \
                        (err) = 0;                                        \
                }                                                        \
        }                                                                \
        if ((err))                                                        \
                (ifq)->ifq_drops++;                                        \
        mutex_exit((ifq)->ifq_lock);                                        \
} while (/*CONSTCOND*/ 0)

#define IFQ_DEQUEUE(ifq, m)                                                \
do {                                                                        \
        mutex_enter((ifq)->ifq_lock);                                        \
        if (TBR_IS_ENABLED(ifq))                                        \
                (m) = tbr_dequeue((ifq), ALTDQ_REMOVE);                        \
        else if (ALTQ_IS_ENABLED(ifq))                                        \
                ALTQ_DEQUEUE((ifq), (m));                                \
        else                                                                \
                IF_DEQUEUE((ifq), (m));                                        \
        mutex_exit((ifq)->ifq_lock);                                        \
} while (/*CONSTCOND*/ 0)

#define        IFQ_POLL(ifq, m)                                                \
do {                                                                        \
        mutex_enter((ifq)->ifq_lock);                                        \
        if (TBR_IS_ENABLED(ifq))                                        \
                (m) = tbr_dequeue((ifq), ALTDQ_POLL);                        \
        else if (ALTQ_IS_ENABLED(ifq))                                        \
                ALTQ_POLL((ifq), (m));                                        \
        else                                                                \
                IF_POLL((ifq), (m));                                        \
        mutex_exit((ifq)->ifq_lock);                                        \
} while (/*CONSTCOND*/ 0)

#define        IFQ_PURGE(ifq)                                                        \
do {                                                                        \
        mutex_enter((ifq)->ifq_lock);                                        \
        if (ALTQ_IS_ENABLED(ifq))                                        \
                ALTQ_PURGE(ifq);                                        \
        else                                                                \
                IF_PURGE(ifq);                                                \
        mutex_exit((ifq)->ifq_lock);                                        \
} while (/*CONSTCOND*/ 0)

#define        IFQ_SET_READY(ifq)                                                \
do {                                                                        \
        (ifq)->altq_flags |= ALTQF_READY;                                \
} while (/*CONSTCOND*/ 0)

#define        IFQ_CLASSIFY(ifq, m, af)                                        \
do {                                                                        \
        KASSERT(((m)->m_flags & M_PKTHDR) != 0);                        \
        mutex_enter((ifq)->ifq_lock);                                        \
        if (ALTQ_IS_ENABLED(ifq)) {                                        \
                if (ALTQ_NEEDS_CLASSIFY(ifq))                                \
                        (m)->m_pkthdr.pattr_class = (*(ifq)->altq_classify) \
                                ((ifq)->altq_clfier, (m), (af));        \
                (m)->m_pkthdr.pattr_af = (af);                                \
                (m)->m_pkthdr.pattr_hdr = mtod((m), void *);                \
        }                                                                \
        mutex_exit((ifq)->ifq_lock);                                        \
} while (/*CONSTCOND*/ 0)
#else /* ! ALTQ */
#define        IFQ_ENQUEUE(ifq, m, err)                                        \
do {                                                                        \
        mutex_enter((ifq)->ifq_lock);                                        \
        if (IF_QFULL(ifq)) {                                                \
                m_freem(m);                                                \
                (err) = ENOBUFS;                                        \
        } else {                                                        \
                IF_ENQUEUE((ifq), (m));                                        \
                (err) = 0;                                                \
        }                                                                \
        if (err)                                                        \
                (ifq)->ifq_drops++;                                        \
        mutex_exit((ifq)->ifq_lock);                                        \
} while (/*CONSTCOND*/ 0)

#define        IFQ_DEQUEUE(ifq, m)                                                \
do {                                                                        \
        mutex_enter((ifq)->ifq_lock);                                        \
        IF_DEQUEUE((ifq), (m));                                                \
        mutex_exit((ifq)->ifq_lock);                                        \
} while (/*CONSTCOND*/ 0)

#define        IFQ_POLL(ifq, m)                                                \
do {                                                                        \
        mutex_enter((ifq)->ifq_lock);                                        \
        IF_POLL((ifq), (m));                                                \
        mutex_exit((ifq)->ifq_lock);                                        \
} while (/*CONSTCOND*/ 0)

#define        IFQ_PURGE(ifq)                                                        \
do {                                                                        \
        mutex_enter((ifq)->ifq_lock);                                        \
        IF_PURGE(ifq);                                                        \
        mutex_exit((ifq)->ifq_lock);                                        \
} while (/*CONSTCOND*/ 0)

#define        IFQ_SET_READY(ifq)        /* nothing */

#define        IFQ_CLASSIFY(ifq, m, af) /* nothing */

#endif /* ALTQ */

#define IFQ_LOCK_INIT(ifq)        (ifq)->ifq_lock =                        \
            mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET)
#define IFQ_LOCK_DESTROY(ifq)        mutex_obj_free((ifq)->ifq_lock)
#define IFQ_LOCK(ifq)                mutex_enter((ifq)->ifq_lock)
#define IFQ_UNLOCK(ifq)                mutex_exit((ifq)->ifq_lock)

#define        IFQ_IS_EMPTY(ifq)                IF_IS_EMPTY(ifq)
#define        IFQ_INC_LEN(ifq)                ((ifq)->ifq_len++)
#define        IFQ_DEC_LEN(ifq)                (--(ifq)->ifq_len)
#define        IFQ_INC_DROPS(ifq)                ((ifq)->ifq_drops++)
#define        IFQ_SET_MAXLEN(ifq, len)        ((ifq)->ifq_maxlen = (len))

#define        IFQ_ENQUEUE_ISR(ifq, m, isr)                                        \
do {                                                                        \
        IFQ_LOCK(inq);                                                        \
        if (IF_QFULL(inq)) {                                                \
                IF_DROP(inq);                                                \
                IFQ_UNLOCK(inq);                                        \
                m_freem(m);                                                \
        } else {                                                        \
                IF_ENQUEUE(inq, m);                                        \
                IFQ_UNLOCK(inq);                                        \
                schednetisr(isr);                                        \
        }                                                                \
} while (/*CONSTCOND*/ 0)

#include <sys/mallocvar.h>
MALLOC_DECLARE(M_IFADDR);
MALLOC_DECLARE(M_IFMADDR);

int ifreq_setaddr(u_long, struct ifreq *, const struct sockaddr *);

struct ifnet *if_alloc(u_char);
void if_free(struct ifnet *);
void if_initname(struct ifnet *, const char *, int);
struct ifaddr *if_dl_create(const struct ifnet *, const struct sockaddr_dl **);
void if_activate_sadl(struct ifnet *, struct ifaddr *,
    const struct sockaddr_dl *);
void        if_set_sadl(struct ifnet *, const void *, u_char, bool);
void        if_alloc_sadl(struct ifnet *);
void        if_free_sadl(struct ifnet *, int);
void        if_initialize(struct ifnet *);
void        if_register(struct ifnet *);
void        if_attach(struct ifnet *); /* Deprecated. Use if_initialize and if_register */
void        if_attachdomain(void);
void        if_deactivate(struct ifnet *);
bool        if_is_deactivated(const struct ifnet *);
void        if_export_if_data(struct ifnet *, struct if_data *, bool);
void        if_purgeaddrs(struct ifnet *, int, void (*)(struct ifaddr *));
void        if_detach(struct ifnet *);
void        if_down(struct ifnet *);
void        if_down_locked(struct ifnet *);
void        if_link_state_change(struct ifnet *, int);
void        if_domain_link_state_change(struct ifnet *, int);
void        if_up(struct ifnet *);
void        ifinit(void);
void        ifinit1(void);
void        ifinit_post(void);
int        ifaddrpref_ioctl(struct socket *, u_long, void *, struct ifnet *);
extern int (*ifioctl)(struct socket *, u_long, void *, struct lwp *);
int        ifioctl_common(struct ifnet *, u_long, void *);
int        ifpromisc(struct ifnet *, int);
int        ifpromisc_locked(struct ifnet *, int);
int        if_addr_init(ifnet_t *, struct ifaddr *, bool);
int        if_do_dad(struct ifnet *);
int        if_mcast_op(ifnet_t *, const unsigned long, const struct sockaddr *);
int        if_flags_set(struct ifnet *, const u_short);
int        if_clone_list(int, char *, int *);

int        if_ioctl(struct ifnet *, u_long, void *);
int        if_init(struct ifnet *);
void        if_stop(struct ifnet *, int);

struct        ifnet *ifunit(const char *);
struct        ifnet *if_get(const char *, struct psref *);
ifnet_t *if_byindex(u_int);
ifnet_t *_if_byindex(u_int);
ifnet_t *if_get_byindex(u_int, struct psref *);
ifnet_t *if_get_bylla(const void *, unsigned char, struct psref *);
void        if_put(const struct ifnet *, struct psref *);
void        if_acquire(struct ifnet *, struct psref *);
#define        if_release        if_put

int if_tunnel_check_nesting(struct ifnet *, struct mbuf *, int);
percpu_t *if_tunnel_alloc_ro_percpu(void);
void if_tunnel_free_ro_percpu(percpu_t *);
void if_tunnel_ro_percpu_rtcache_free(percpu_t *);

struct tunnel_ro {
        struct route *tr_ro;
        kmutex_t *tr_lock;
};

static inline void
if_tunnel_get_ro(percpu_t *ro_percpu, struct route **ro, kmutex_t **lock)
{
        struct tunnel_ro *tro;

        tro = percpu_getref(ro_percpu);
        *ro = tro->tr_ro;
        *lock = tro->tr_lock;
        mutex_enter(*lock);
}

static inline void
if_tunnel_put_ro(percpu_t *ro_percpu, kmutex_t *lock)
{

        mutex_exit(lock);
        percpu_putref(ro_percpu);
}

static __inline if_index_t
if_get_index(const struct ifnet *ifp)
{

        return ifp != NULL ? ifp->if_index : 0;
}

bool        if_held(struct ifnet *);

void        if_input(struct ifnet *, struct mbuf *);

struct if_percpuq *
        if_percpuq_create(struct ifnet *);
void        if_percpuq_destroy(struct if_percpuq *);
void
        if_percpuq_enqueue(struct if_percpuq *, struct mbuf *);

void        if_deferred_start_init(struct ifnet *, void (*)(struct ifnet *));
void        if_schedule_deferred_start(struct ifnet *);

void ifa_insert(struct ifnet *, struct ifaddr *);
void ifa_remove(struct ifnet *, struct ifaddr *);

void        ifa_psref_init(struct ifaddr *);
void        ifa_acquire(struct ifaddr *, struct psref *);
void        ifa_release(struct ifaddr *, struct psref *);
bool        ifa_held(struct ifaddr *);
bool        ifa_is_destroying(struct ifaddr *);

void        ifaref(struct ifaddr *);
void        ifafree(struct ifaddr *);

struct        ifaddr *ifa_ifwithaddr(const struct sockaddr *);
struct        ifaddr *ifa_ifwithaddr_psref(const struct sockaddr *, struct psref *);
struct        ifaddr *ifa_ifwithaf(int);
struct        ifaddr *ifa_ifwithdstaddr(const struct sockaddr *);
struct        ifaddr *ifa_ifwithdstaddr_psref(const struct sockaddr *,
            struct psref *);
struct        ifaddr *ifa_ifwithnet(const struct sockaddr *);
struct        ifaddr *ifa_ifwithnet_psref(const struct sockaddr *, struct psref *);
struct        ifaddr *ifa_ifwithladdr(const struct sockaddr *);
struct        ifaddr *ifa_ifwithladdr_psref(const struct sockaddr *, struct psref *);
struct        ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *);
struct        ifaddr *ifaof_ifpforaddr_psref(const struct sockaddr *, struct ifnet *,
            struct psref *);
void        link_rtrequest(int, struct rtentry *, const struct rt_addrinfo *);
void        p2p_rtrequest(int, struct rtentry *, const struct rt_addrinfo *);

void        if_clone_attach(struct if_clone *);
void        if_clone_detach(struct if_clone *);

int        if_transmit_lock(struct ifnet *, struct mbuf *);

int        ifq_enqueue(struct ifnet *, struct mbuf *);
int        ifq_enqueue2(struct ifnet *, struct ifqueue *, struct mbuf *);

int        loioctl(struct ifnet *, u_long, void *);
void        loopattach(int);
void        loopinit(void);
int        looutput(struct ifnet *,
           struct mbuf *, const struct sockaddr *, const struct rtentry *);

void *        if_linkstate_change_establish(struct ifnet *,
            void (*)(void *), void *);
void        if_linkstate_change_disestablish(struct ifnet *,
            void *, kmutex_t *);

/*
 * These are exported because they're an easy way to tell if
 * an interface is going away without having to burn a flag.
 */
int        if_nulloutput(struct ifnet *, struct mbuf *,
            const struct sockaddr *, const struct rtentry *);
void        if_nullinput(struct ifnet *, struct mbuf *);
void        if_nullstart(struct ifnet *);
int        if_nulltransmit(struct ifnet *, struct mbuf *);
int        if_nullioctl(struct ifnet *, u_long, void *);
int        if_nullinit(struct ifnet *);
void        if_nullstop(struct ifnet *, int);
void        if_nullslowtimo(struct ifnet *);
#define        if_nullwatchdog        if_nullslowtimo
void        if_nulldrain(struct ifnet *);
#else
struct if_nameindex {
        unsigned int        if_index;        /* 1, 2, ... */
        char                *if_name;        /* null terminated name: "le0", ... */
};

#include <sys/cdefs.h>
__BEGIN_DECLS
unsigned int if_nametoindex(const char *);
char *        if_indextoname(unsigned int, char *);
struct        if_nameindex * if_nameindex(void);
void        if_freenameindex(struct if_nameindex *);
__END_DECLS
#endif /* _KERNEL */ /* XXX really ALTQ? */

#ifdef _KERNEL

#define        IFADDR_FIRST(__ifp)                TAILQ_FIRST(&(__ifp)->if_addrlist)
#define        IFADDR_NEXT(__ifa)                TAILQ_NEXT((__ifa), ifa_list)
#define        IFADDR_FOREACH(__ifa, __ifp)        TAILQ_FOREACH(__ifa, \
                                            &(__ifp)->if_addrlist, ifa_list)
#define        IFADDR_FOREACH_SAFE(__ifa, __ifp, __nifa) \
                                            TAILQ_FOREACH_SAFE(__ifa, \
                                            &(__ifp)->if_addrlist, ifa_list, __nifa)
#define        IFADDR_EMPTY(__ifp)                TAILQ_EMPTY(&(__ifp)->if_addrlist)

#define IFADDR_ENTRY_INIT(__ifa)                                        \
        PSLIST_ENTRY_INIT((__ifa), ifa_pslist_entry)
#define IFADDR_ENTRY_DESTROY(__ifa)                                        \
        PSLIST_ENTRY_DESTROY((__ifa), ifa_pslist_entry)
#define IFADDR_READER_EMPTY(__ifp)                                        \
        (PSLIST_READER_FIRST(&(__ifp)->if_addr_pslist, struct ifaddr,        \
                             ifa_pslist_entry) == NULL)
#define IFADDR_READER_FIRST(__ifp)                                        \
        PSLIST_READER_FIRST(&(__ifp)->if_addr_pslist, struct ifaddr,        \
                            ifa_pslist_entry)
#define IFADDR_READER_NEXT(__ifa)                                        \
        PSLIST_READER_NEXT((__ifa), struct ifaddr, ifa_pslist_entry)
#define IFADDR_READER_FOREACH(__ifa, __ifp)                                \
        PSLIST_READER_FOREACH((__ifa), &(__ifp)->if_addr_pslist, struct ifaddr,\
                              ifa_pslist_entry)
#define IFADDR_WRITER_INSERT_HEAD(__ifp, __ifa)                                \
        PSLIST_WRITER_INSERT_HEAD(&(__ifp)->if_addr_pslist, (__ifa),        \
                                  ifa_pslist_entry)
#define IFADDR_WRITER_REMOVE(__ifa)                                        \
        PSLIST_WRITER_REMOVE((__ifa), ifa_pslist_entry)
#define IFADDR_WRITER_FOREACH(__ifa, __ifp)                                \
        PSLIST_WRITER_FOREACH((__ifa), &(__ifp)->if_addr_pslist, struct ifaddr,\
                              ifa_pslist_entry)
#define IFADDR_WRITER_NEXT(__ifp)                                        \
        PSLIST_WRITER_NEXT((__ifp), struct ifaddr, ifa_pslist_entry)
#define IFADDR_WRITER_INSERT_AFTER(__ifp, __new)                        \
        PSLIST_WRITER_INSERT_AFTER((__ifp), (__new), ifa_pslist_entry)
#define IFADDR_WRITER_EMPTY(__ifp)                                        \
        (PSLIST_WRITER_FIRST(&(__ifp)->if_addr_pslist, struct ifaddr,        \
                             ifa_pslist_entry) == NULL)
#define IFADDR_WRITER_INSERT_TAIL(__ifp, __new)                                \
        do {                                                                \
                if (IFADDR_WRITER_EMPTY(__ifp)) {                        \
                        IFADDR_WRITER_INSERT_HEAD((__ifp), (__new));        \
                } else {                                                \
                        struct ifaddr *__ifa;                                \
                        IFADDR_WRITER_FOREACH(__ifa, (__ifp)) {                \
                                if (IFADDR_WRITER_NEXT(__ifa) == NULL) {\
                                        IFADDR_WRITER_INSERT_AFTER(__ifa,\
                                            (__new));                        \
                                        break;                                \
                                }                                        \
                        }                                                \
                }                                                        \
        } while (0)

#define        IFNET_GLOBAL_LOCK()                        mutex_enter(&ifnet_mtx)
#define        IFNET_GLOBAL_UNLOCK()                        mutex_exit(&ifnet_mtx)
#define        IFNET_GLOBAL_LOCKED()                        mutex_owned(&ifnet_mtx)

#define IFNET_READER_EMPTY() \
        (PSLIST_READER_FIRST(&ifnet_pslist, struct ifnet, if_pslist_entry) == NULL)
#define IFNET_READER_FIRST() \
        PSLIST_READER_FIRST(&ifnet_pslist, struct ifnet, if_pslist_entry)
#define IFNET_READER_NEXT(__ifp) \
        PSLIST_READER_NEXT((__ifp), struct ifnet, if_pslist_entry)
#define IFNET_READER_FOREACH(__ifp) \
        PSLIST_READER_FOREACH((__ifp), &ifnet_pslist, struct ifnet, \
                              if_pslist_entry)
#define IFNET_WRITER_INSERT_HEAD(__ifp) \
        PSLIST_WRITER_INSERT_HEAD(&ifnet_pslist, (__ifp), if_pslist_entry)
#define IFNET_WRITER_REMOVE(__ifp) \
        PSLIST_WRITER_REMOVE((__ifp), if_pslist_entry)
#define IFNET_WRITER_FOREACH(__ifp) \
        PSLIST_WRITER_FOREACH((__ifp), &ifnet_pslist, struct ifnet, \
                              if_pslist_entry)
#define IFNET_WRITER_NEXT(__ifp) \
        PSLIST_WRITER_NEXT((__ifp), struct ifnet, if_pslist_entry)
#define IFNET_WRITER_INSERT_AFTER(__ifp, __new) \
        PSLIST_WRITER_INSERT_AFTER((__ifp), (__new), if_pslist_entry)
#define IFNET_WRITER_EMPTY() \
        (PSLIST_WRITER_FIRST(&ifnet_pslist, struct ifnet, if_pslist_entry) == NULL)
#define IFNET_WRITER_INSERT_TAIL(__new)                                        \
        do {                                                                \
                if (IFNET_WRITER_EMPTY()) {                                \
                        IFNET_WRITER_INSERT_HEAD(__new);                \
                } else {                                                \
                        struct ifnet *__ifp;                                \
                        IFNET_WRITER_FOREACH(__ifp) {                        \
                                if (IFNET_WRITER_NEXT(__ifp) == NULL) {        \
                                        IFNET_WRITER_INSERT_AFTER(__ifp,\
                                            (__new));                        \
                                        break;                                \
                                }                                        \
                        }                                                \
                }                                                        \
        } while (0)

#define IFNET_LOCK(ifp)                mutex_enter((ifp)->if_ioctl_lock)
#define IFNET_UNLOCK(ifp)        mutex_exit((ifp)->if_ioctl_lock)
#define IFNET_LOCKED(ifp)        mutex_owned((ifp)->if_ioctl_lock)

#define IFNET_ASSERT_UNLOCKED(ifp)        \
        KDASSERT(mutex_ownable((ifp)->if_ioctl_lock))

extern struct pslist_head ifnet_pslist;
extern kmutex_t ifnet_mtx;

extern struct ifnet *lo0ifp;

/*
 * ifq sysctl support
 */
int        sysctl_ifq(int *name, u_int namelen, void *oldp,
                       size_t *oldlenp, void *newp, size_t newlen,
                       struct ifqueue *ifq);
/* symbolic names for terminal (per-protocol) CTL_IFQ_ nodes */
#define IFQCTL_LEN        1
#define IFQCTL_MAXLEN        2
#define IFQCTL_PEAK        3
#define IFQCTL_DROPS        4

/*
 * Hook for if_vlan - needed by if_agr
 */
MODULE_HOOK(if_vlan_vlan_input_hook,
    struct mbuf *, (struct ifnet *, struct mbuf *));

#endif /* _KERNEL */

#endif /* !_NET_IF_H_ */



















































































































   43 







 2081 


  738 
 2353 










   97 
 2080 























































   10 


























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
/*        $NetBSD: time.h,v 1.80 2022/06/26 22:31:38 riastradh Exp $        */

/*
 * Copyright (c) 1982, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)time.h        8.5 (Berkeley) 5/4/95
 */

#ifndef _SYS_TIME_H_
#define        _SYS_TIME_H_

#include <sys/featuretest.h>
#include <sys/types.h>

/*
 * Structure returned by gettimeofday(2) system call,
 * and used in other calls.
 */
struct timeval {
        time_t            tv_sec;                /* seconds */
        suseconds_t        tv_usec;        /* and microseconds */
};

#include <sys/timespec.h>

#if defined(_NETBSD_SOURCE)
#define        TIMEVAL_TO_TIMESPEC(tv, ts) do {                                \
        (ts)->tv_sec = (tv)->tv_sec;                                        \
        (ts)->tv_nsec = (tv)->tv_usec * 1000;                                \
} while (/*CONSTCOND*/0)
#define        TIMESPEC_TO_TIMEVAL(tv, ts) do {                                \
        (tv)->tv_sec = (ts)->tv_sec;                                        \
        (tv)->tv_usec = (suseconds_t)(ts)->tv_nsec / 1000;                \
} while (/*CONSTCOND*/0)

/*
 * Note: timezone is obsolete. All timezone handling is now in
 * userland. Its just here for back compatibility.
 */
struct timezone {
        int        tz_minuteswest;        /* minutes west of Greenwich */
        int        tz_dsttime;        /* type of dst correction */
};

/* Operations on timevals. */
#define        timerclear(tvp)                (tvp)->tv_sec = (tvp)->tv_usec = 0L
#define        timerisset(tvp)                ((tvp)->tv_sec || (tvp)->tv_usec)
#define        timercmp(tvp, uvp, cmp)                                                \
        (((tvp)->tv_sec == (uvp)->tv_sec) ?                                \
            ((tvp)->tv_usec cmp (uvp)->tv_usec) :                        \
            ((tvp)->tv_sec cmp (uvp)->tv_sec))
#define        timeradd(tvp, uvp, vvp)                                                \
        do {                                                                \
                (vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec;                \
                (vvp)->tv_usec = (tvp)->tv_usec + (uvp)->tv_usec;        \
                if ((vvp)->tv_usec >= 1000000) {                        \
                        (vvp)->tv_sec++;                                \
                        (vvp)->tv_usec -= 1000000;                        \
                }                                                        \
        } while (/* CONSTCOND */ 0)
#define        timersub(tvp, uvp, vvp)                                                \
        do {                                                                \
                (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;                \
                (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;        \
                if ((vvp)->tv_usec < 0) {                                \
                        (vvp)->tv_sec--;                                \
                        (vvp)->tv_usec += 1000000;                        \
                }                                                        \
        } while (/* CONSTCOND */ 0)

/*
 * hide bintime for _STANDALONE because this header is used for hpcboot.exe,
 * which is built with compilers which don't recognize LL suffix.
 *        http://mail-index.NetBSD.org/tech-userlevel/2008/02/27/msg000181.html
 */
#if !defined(_STANDALONE)
struct bintime {
        time_t        sec;
        uint64_t frac;
};

static __inline void
bintime_addx(struct bintime *bt, uint64_t x)
{
        uint64_t u;

        u = bt->frac;
        bt->frac += x;
        if (u > bt->frac)
                bt->sec++;
}

static __inline void
bintime_add(struct bintime *bt, const struct bintime *bt2)
{
        uint64_t u;

        u = bt->frac;
        bt->frac += bt2->frac;
        if (u > bt->frac)
                bt->sec++;
        bt->sec += bt2->sec;
}

static __inline void
bintime_sub(struct bintime *bt, const struct bintime *bt2)
{
        uint64_t u;

        u = bt->frac;
        bt->frac -= bt2->frac;
        if (u < bt->frac)
                bt->sec--;
        bt->sec -= bt2->sec;
}

#define        bintimecmp(bta, btb, cmp)                                        \
        (((bta)->sec == (btb)->sec) ?                                        \
            ((bta)->frac cmp (btb)->frac) :                                \
            ((bta)->sec cmp (btb)->sec))

/*-
 * Background information:
 *
 * When converting between timestamps on parallel timescales of differing
 * resolutions it is historical and scientific practice to round down rather
 * than doing 4/5 rounding.
 *
 *   The date changes at midnight, not at noon.
 *
 *   Even at 15:59:59.999999999 it's not four'o'clock.
 *
 *   time_second ticks after N.999999999 not after N.4999999999
 */

/*
 * The magic numbers for converting ms/us/ns to fractions
 */

/* 1ms = (2^64) / 1000       */
#define        BINTIME_SCALE_MS        ((uint64_t)18446744073709551ULL)

/* 1us = (2^64) / 1000000    */
#define        BINTIME_SCALE_US        ((uint64_t)18446744073709ULL)

/* 1ns = (2^64) / 1000000000 */
#define        BINTIME_SCALE_NS        ((uint64_t)18446744073ULL)

static __inline void
bintime2timespec(const struct bintime *bt, struct timespec *ts)
{

        ts->tv_sec = bt->sec;
        ts->tv_nsec =
            (long)((1000000000ULL * (uint32_t)(bt->frac >> 32)) >> 32);
}

static __inline void
timespec2bintime(const struct timespec *ts, struct bintime *bt)
{

        bt->sec = ts->tv_sec;
        bt->frac = (uint64_t)ts->tv_nsec * BINTIME_SCALE_NS;
}

static __inline void
bintime2timeval(const struct bintime *bt, struct timeval *tv)
{

        tv->tv_sec = bt->sec;
        tv->tv_usec =
            (suseconds_t)((1000000ULL * (uint32_t)(bt->frac >> 32)) >> 32);
}

static __inline void
timeval2bintime(const struct timeval *tv, struct bintime *bt)
{

        bt->sec = tv->tv_sec;
        bt->frac = (uint64_t)tv->tv_usec * BINTIME_SCALE_US;
}

static __inline struct bintime
ms2bintime(uint64_t ms)
{
        struct bintime bt;

        bt.sec = (time_t)(ms / 1000U);
        bt.frac = (uint64_t)(ms % 1000U) * BINTIME_SCALE_MS;

        return bt;
}

static __inline struct bintime
us2bintime(uint64_t us)
{
        struct bintime bt;

        bt.sec = (time_t)(us / 1000000U);
        bt.frac = (uint64_t)(us % 1000000U) * BINTIME_SCALE_US;

        return bt;
}

static __inline struct bintime
ns2bintime(uint64_t ns)
{
        struct bintime bt;

        bt.sec = (time_t)(ns / 1000000000U);
        bt.frac = (uint64_t)(ns % 1000000000U) * BINTIME_SCALE_NS;

        return bt;
}
#endif /* !defined(_STANDALONE) */

/* Operations on timespecs. */
#define        timespecclear(tsp)        (tsp)->tv_sec = (time_t)((tsp)->tv_nsec = 0L)
#define        timespecisset(tsp)        ((tsp)->tv_sec || (tsp)->tv_nsec)
#define        timespeccmp(tsp, usp, cmp)                                        \
        (((tsp)->tv_sec == (usp)->tv_sec) ?                                \
            ((tsp)->tv_nsec cmp (usp)->tv_nsec) :                        \
            ((tsp)->tv_sec cmp (usp)->tv_sec))
#define        timespecadd(tsp, usp, vsp)                                        \
        do {                                                                \
                (vsp)->tv_sec = (tsp)->tv_sec + (usp)->tv_sec;                \
                (vsp)->tv_nsec = (tsp)->tv_nsec + (usp)->tv_nsec;        \
                if ((vsp)->tv_nsec >= 1000000000L) {                        \
                        (vsp)->tv_sec++;                                \
                        (vsp)->tv_nsec -= 1000000000L;                        \
                }                                                        \
        } while (/* CONSTCOND */ 0)
#define        timespecsub(tsp, usp, vsp)                                        \
        do {                                                                \
                (vsp)->tv_sec = (tsp)->tv_sec - (usp)->tv_sec;                \
                (vsp)->tv_nsec = (tsp)->tv_nsec - (usp)->tv_nsec;        \
                if ((vsp)->tv_nsec < 0) {                                \
                        (vsp)->tv_sec--;                                \
                        (vsp)->tv_nsec += 1000000000L;                        \
                }                                                        \
        } while (/* CONSTCOND */ 0)
#define timespec2ns(x) (((uint64_t)(x)->tv_sec) * 1000000000L + (x)->tv_nsec)

#ifdef _KERNEL
bool timespecaddok(const struct timespec *, const struct timespec *) __pure;
bool timespecsubok(const struct timespec *, const struct timespec *) __pure;
#endif

#endif /* _NETBSD_SOURCE */

/*
 * Names of the interval timers, and structure
 * defining a timer setting.
 * NB: Must match the CLOCK_ constants below.
 */
#define        ITIMER_REAL                0
#define        ITIMER_VIRTUAL                1
#define        ITIMER_PROF                2
#define        ITIMER_MONOTONIC        3

struct        itimerval {
        struct        timeval it_interval;        /* timer interval */
        struct        timeval it_value;        /* current value */
};

/*
 * Structure defined by POSIX.1b to be like a itimerval, but with
 * timespecs. Used in the timer_*() system calls.
 */
struct        itimerspec {
        struct        timespec it_interval;
        struct        timespec it_value;
};

#define        CLOCK_REALTIME        0
#define        CLOCK_VIRTUAL        1
#define        CLOCK_PROF        2
#define        CLOCK_MONOTONIC        3
#define CLOCK_THREAD_CPUTIME_ID                0x20000000
#define CLOCK_PROCESS_CPUTIME_ID        0x40000000

#if defined(_NETBSD_SOURCE)
#define        TIMER_RELTIME        0x0        /* relative timer */
#endif
#define        TIMER_ABSTIME        0x1        /* absolute timer */

#ifdef _KERNEL
#include <sys/timevar.h>
#else /* !_KERNEL */
#ifndef _STANDALONE
#if (_POSIX_C_SOURCE - 0) >= 200112L || \
    (defined(_XOPEN_SOURCE) && defined(_XOPEN_SOURCE_EXTENDED)) || \
    (_XOPEN_SOURCE - 0) >= 500 || defined(_NETBSD_SOURCE)
#include <sys/select.h>
#endif

#include <sys/cdefs.h>
#include <time.h>

__BEGIN_DECLS
#ifndef __LIBC12_SOURCE__
#if (_POSIX_C_SOURCE - 0) >= 200112L || \
    defined(_XOPEN_SOURCE) || defined(_NETBSD_SOURCE)
int        getitimer(int, struct itimerval *) __RENAME(__getitimer50);
int        gettimeofday(struct timeval * __restrict, void *__restrict)
    __RENAME(__gettimeofday50);
int        setitimer(int, const struct itimerval * __restrict,
            struct itimerval * __restrict) __RENAME(__setitimer50);
int        utimes(const char *, const struct timeval [2]) __RENAME(__utimes50);
#endif /* _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE || _NETBSD_SOURCE */

#if defined(_NETBSD_SOURCE) || defined(HAVE_NBTOOL_CONFIG_H)
int        adjtime(const struct timeval *, struct timeval *) __RENAME(__adjtime50);
int        futimes(int, const struct timeval [2]) __RENAME(__futimes50);
int        lutimes(const char *, const struct timeval [2]) __RENAME(__lutimes50);
int        settimeofday(const struct timeval * __restrict,
            const void *__restrict) __RENAME(__settimeofday50);
#endif /* _NETBSD_SOURCE */
#endif /* __LIBC12_SOURCE__ */
__END_DECLS

#endif        /* !_STANDALONE */
#endif /* !_KERNEL */
#endif /* !_SYS_TIME_H_ */










































































    5 


    2 


    1 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
/*        $NetBSD: clock.h,v 1.4 2018/04/19 21:19:07 christos Exp $        */

/*-
 * Copyright (c) 1996 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Gordon W. Ross
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _SYS_CLOCK_H_
#define _SYS_CLOCK_H_

/* Some handy constants. */
#define SECS_PER_MINUTE                60
#define SECS_PER_HOUR                3600
#define SECS_PER_DAY                86400
#define DAYS_PER_COMMON_YEAR    365
#define DAYS_PER_LEAP_YEAR      366
#define SECS_PER_COMMON_YEAR        (SECS_PER_DAY * DAYS_PER_COMMON_YEAR)
#define SECS_PER_LEAP_YEAR        (SECS_PER_DAY * DAYS_PER_LEAP_YEAR)

/* Traditional POSIX base year */
#define        POSIX_BASE_YEAR        1970

/* Some handy functions */
static __inline int
days_in_month(int m)
{
        switch (m) {
        case 2:
                return 28;
        case 4: case 6: case 9: case 11:
                return 30;
        case 1: case 3: case 5: case 7: case 8: case 10: case 12:
                return 31;
        default:
                return -1;
        }
}

/*
 * This inline avoids some unnecessary modulo operations
 * as compared with the usual macro:
 *   ( ((year % 4) == 0 &&
 *      (year % 100) != 0) ||
 *     ((year % 400) == 0) )
 * It is otherwise equivalent.
 */
static __inline int
is_leap_year(uint64_t year)
{
        if ((year & 3) != 0)
                return 0;

        if (__predict_false((year % 100) != 0))
                return 1;

        return __predict_false((year % 400) == 0);
}

static __inline int
days_per_year(uint64_t year)
{
        return is_leap_year(year) ? DAYS_PER_LEAP_YEAR : DAYS_PER_COMMON_YEAR;
}

#endif /* _SYS_CLOCK_H_ */






























































































































































































































































































































































































































































    3 
    3 

    3 


































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
/*        $NetBSD: uhso.c,v 1.35 2021/06/13 09:27:20 mlelstv Exp $        */

/*-
 * Copyright (c) 2009 Iain Hibbert
 * Copyright (c) 2008 Fredrik Lindberg
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 *   This driver originated as the hso module for FreeBSD written by
 * Fredrik Lindberg[1]. It has been rewritten almost completely for
 * NetBSD, and to support more devices with information extracted from
 * the Linux hso driver provided by Option N.V.[2]
 *
 *   [1] http://www.shapeshifter.se/code/hso
 *   [2] http://www.pharscape.org/hso.htm
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uhso.c,v 1.35 2021/06/13 09:27:20 mlelstv Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/fcntl.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/mbuf.h>
#include <sys/poll.h>
#include <sys/queue.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/tty.h>
#include <sys/vnode.h>
#include <sys/lwp.h>

#include <net/bpf.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/netisr.h>

#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbcdc.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/umassvar.h>

#include <dev/scsipi/scsi_disk.h>

#include "usbdevs.h"
#include "ioconf.h"

#undef DPRINTF
#ifdef UHSO_DEBUG
/*
 * defined levels
 *        0        warnings only
 *        1        informational
 *        5        really chatty
 */
int uhso_debug = 0;

#define DPRINTF(n, ...)        do {                        \
        if (uhso_debug >= (n)) {                \
                printf("%s: ", __func__);        \
                printf(__VA_ARGS__);                \
        }                                        \
} while (/* CONSTCOND */0)
#else
#define DPRINTF(...)        ((void)0)
#endif

/*
 * When first attached, the device class will be 0 and the modem
 * will attach as UMASS until a SCSI REZERO_UNIT command is sent,
 * in which case it will detach and reattach with device class set
 * to UDCLASS_VENDOR (0xff) and provide the serial interfaces.
 *
 * If autoswitch is set (the default) this will happen automatically.
 */
Static int uhso_autoswitch = 1;

SYSCTL_SETUP(sysctl_hw_uhso_setup, "uhso sysctl setup")
{
        const struct sysctlnode *node = NULL;

        sysctl_createv(clog, 0, NULL, &node,
                CTLFLAG_PERMANENT,
                CTLTYPE_NODE, "uhso",
                NULL,
                NULL, 0,
                NULL, 0,
                CTL_HW, CTL_CREATE, CTL_EOL);

        if (node == NULL)
                return;

#ifdef UHSO_DEBUG
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "debug",
                SYSCTL_DESCR("uhso debug level (0, 1, 5)"),
                NULL, 0,
                &uhso_debug, sizeof(uhso_debug),
                CTL_CREATE, CTL_EOL);
#endif

        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "autoswitch",
                SYSCTL_DESCR("automatically switch device into modem mode"),
                NULL, 0,
                &uhso_autoswitch, sizeof(uhso_autoswitch),
                CTL_CREATE, CTL_EOL);
}

/*
 * The uhso modems have a number of interfaces providing a variety of
 * IO ports using the bulk endpoints, or multiplexed on the control
 * endpoints. We separate the ports by function and provide each with
 * a predictable index number used to construct the device minor number.
 *
 * The Network port is configured as a network interface rather than
 * a tty as it provides raw IPv4 packets.
 */

Static const char *uhso_port_name[] = {
        "Control",
        "Diagnostic",
        "Diagnostic2",
        "Application",
        "Application2",
        "GPS",
        "GPS Control",
        "PC Smartcard",
        "Modem",
        "MSD",                        /* "Modem Sharing Device" ? */
        "Voice",
        "Network",
};

#define UHSO_PORT_CONTROL        0x00
#define UHSO_PORT_DIAG                0x01
#define UHSO_PORT_DIAG2                0x02
#define UHSO_PORT_APP                0x03
#define UHSO_PORT_APP2                0x04
#define UHSO_PORT_GPS                0x05
#define UHSO_PORT_GPS_CONTROL        0x06
#define UHSO_PORT_PCSC                0x07
#define UHSO_PORT_MODEM                0x08
#define UHSO_PORT_MSD                0x09
#define UHSO_PORT_VOICE                0x0a
#define UHSO_PORT_NETWORK        0x0b

#define UHSO_PORT_MAX                __arraycount(uhso_port_name)

#define UHSO_IFACE_MUX                0x20
#define UHSO_IFACE_BULK                0x40
#define UHSO_IFACE_IFNET        0x80

/*
 * The interface specification can sometimes be deduced from the device
 * type and interface number, or some modems support a vendor specific
 * way to read config info which we can translate to the port index.
 */
Static const uint8_t uhso_spec_default[] = {
        UHSO_IFACE_IFNET | UHSO_PORT_NETWORK | UHSO_IFACE_MUX,
        UHSO_IFACE_BULK | UHSO_PORT_DIAG,
        UHSO_IFACE_BULK | UHSO_PORT_MODEM,
};

Static const uint8_t uhso_spec_icon321[] = {
        UHSO_IFACE_IFNET | UHSO_PORT_NETWORK | UHSO_IFACE_MUX,
        UHSO_IFACE_BULK | UHSO_PORT_DIAG2,
        UHSO_IFACE_BULK | UHSO_PORT_MODEM,
        UHSO_IFACE_BULK | UHSO_PORT_DIAG,
};

Static const uint8_t uhso_spec_config[] = {
        0,
        UHSO_IFACE_BULK | UHSO_PORT_DIAG,
        UHSO_IFACE_BULK | UHSO_PORT_GPS,
        UHSO_IFACE_BULK | UHSO_PORT_GPS_CONTROL,
        UHSO_IFACE_BULK | UHSO_PORT_APP,
        UHSO_IFACE_BULK | UHSO_PORT_APP2,
        UHSO_IFACE_BULK | UHSO_PORT_CONTROL,
        UHSO_IFACE_IFNET | UHSO_PORT_NETWORK,
        UHSO_IFACE_BULK | UHSO_PORT_MODEM,
        UHSO_IFACE_BULK | UHSO_PORT_MSD,
        UHSO_IFACE_BULK | UHSO_PORT_PCSC,
        UHSO_IFACE_BULK | UHSO_PORT_VOICE,
};

struct uhso_dev {
        uint16_t vendor;
        uint16_t product;
        uint16_t type;
};

#define UHSOTYPE_DEFAULT        1
#define UHSOTYPE_ICON321        2
#define UHSOTYPE_CONFIG                3

Static const struct uhso_dev uhso_devs[] = {
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GSICON72,    UHSOTYPE_DEFAULT },
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_ICON225,     UHSOTYPE_DEFAULT },
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GEHSUPA,     UHSOTYPE_DEFAULT },
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GTHSUPA,     UHSOTYPE_DEFAULT },
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GSHSUPA,     UHSOTYPE_DEFAULT },
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GE40X1,      UHSOTYPE_CONFIG },
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GE40X2,      UHSOTYPE_CONFIG },
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GE40X3,      UHSOTYPE_CONFIG },
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_ICON401,     UHSOTYPE_CONFIG },
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GTM382,             UHSOTYPE_CONFIG },
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GE40X4,      UHSOTYPE_CONFIG },
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GTHSUPAM,    UHSOTYPE_CONFIG },
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_ICONEDGE,    UHSOTYPE_DEFAULT },
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_MODHSXPA,    UHSOTYPE_ICON321 },
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_ICON321,     UHSOTYPE_ICON321 },
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_ICON322,     UHSOTYPE_ICON321 },
    { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_ICON505,     UHSOTYPE_CONFIG },
};

#define uhso_lookup(p, v)  ((const struct uhso_dev *)usb_lookup(uhso_devs, (p), (v)))

/* IO buffer sizes */
#define UHSO_MUX_WSIZE                64
#define UHSO_MUX_RSIZE                1024
#define UHSO_BULK_WSIZE                8192
#define UHSO_BULK_RSIZE                4096
#define UHSO_IFNET_MTU                1500

/*
 * Each IO port provided by the modem can be mapped to a network
 * interface (when hp_ifp != NULL) or a tty (when hp_tp != NULL)
 * which may be multiplexed and sharing interrupt and control endpoints
 * from an interface, or using the dedicated bulk endpoints.
 */

struct uhso_port;
struct uhso_softc;

/* uhso callback functions return errno on failure */
typedef int (*uhso_callback)(struct uhso_port *);

struct uhso_port {
        struct uhso_softc      *hp_sc;                /* master softc */
        struct tty               *hp_tp;                /* tty pointer */
        struct ifnet               *hp_ifp;                /* ifnet pointer */
        unsigned int                hp_flags;        /* see below */
        int                        hp_swflags;        /* persistent tty flags */
        int                        hp_status;        /* modem status */

        /* port type specific handlers */
        uhso_callback                hp_abort;        /* abort any transfers */
        uhso_callback                hp_detach;        /* detach port completely */
        uhso_callback                hp_init;        /* init port (first open) */
        uhso_callback                hp_clean;        /* clean port (last close) */
        uhso_callback                hp_write;        /* write data */
        usbd_callback                hp_write_cb;        /* write callback */
        uhso_callback                hp_read;        /* read data */
        usbd_callback                hp_read_cb;        /* read callback */
        uhso_callback                hp_control;        /* set control lines */

        struct usbd_interface  *hp_ifh;                /* interface handle */
        unsigned int                hp_index;        /* usb request index */

        int                        hp_iaddr;        /* interrupt endpoint */
        struct usbd_pipe       *hp_ipipe;        /* interrupt pipe */
        void                       *hp_ibuf;        /* interrupt buffer */
        size_t                        hp_isize;        /* allocated size */

        int                        hp_raddr;        /* bulk in endpoint */
        struct usbd_pipe       *hp_rpipe;        /* bulk in pipe */
        struct usbd_xfer       *hp_rxfer;        /* input xfer */
        void                       *hp_rbuf;        /* input buffer */
        size_t                        hp_rlen;        /* fill length */
        size_t                        hp_rsize;        /* allocated size */

        int                        hp_waddr;        /* bulk out endpoint */
        struct usbd_pipe       *hp_wpipe;        /* bulk out pipe */
        struct usbd_xfer       *hp_wxfer;        /* output xfer */
        void                       *hp_wbuf;        /* output buffer */
        size_t                        hp_wlen;        /* fill length */
        size_t                        hp_wsize;        /* allocated size */

        struct mbuf               *hp_mbuf;        /* partial packet */
};

/* hp_flags */
#define UHSO_PORT_MUXPIPE        __BIT(0)        /* duplicate ipipe/ibuf references */
#define UHSO_PORT_MUXREADY        __BIT(1)        /* input is ready */
#define UHSO_PORT_MUXBUSY        __BIT(2)        /* read in progress */

struct uhso_softc {
        device_t                sc_dev;                /* self */
        struct usbd_device     *sc_udev;
        int                        sc_refcnt;
        struct uhso_port       *sc_port[UHSO_PORT_MAX];
};

#define UHSO_CONFIG_NO                1

static int uhso_match(device_t, cfdata_t, void *);
static void uhso_attach(device_t, device_t, void *);
static int uhso_detach(device_t, int);



CFATTACH_DECL_NEW(uhso, sizeof(struct uhso_softc), uhso_match, uhso_attach,
    uhso_detach, NULL);

Static int uhso_switch_mode(struct usbd_device *);
Static int uhso_get_iface_spec(struct usb_attach_arg *, uint8_t, uint8_t *);
Static usb_endpoint_descriptor_t *uhso_get_endpoint(struct usbd_interface *,
    int, int);

Static void uhso_mux_attach(struct uhso_softc *, struct usbd_interface *, int);
Static int  uhso_mux_abort(struct uhso_port *);
Static int  uhso_mux_detach(struct uhso_port *);
Static int  uhso_mux_init(struct uhso_port *);
Static int  uhso_mux_clean(struct uhso_port *);
Static int  uhso_mux_write(struct uhso_port *);
Static int  uhso_mux_read(struct uhso_port *);
Static int  uhso_mux_control(struct uhso_port *);
Static void uhso_mux_intr(struct usbd_xfer *, void *, usbd_status);

Static void uhso_bulk_attach(struct uhso_softc *, struct usbd_interface *, int);
Static int  uhso_bulk_abort(struct uhso_port *);
Static int  uhso_bulk_detach(struct uhso_port *);
Static int  uhso_bulk_init(struct uhso_port *);
Static int  uhso_bulk_clean(struct uhso_port *);
Static int  uhso_bulk_write(struct uhso_port *);
Static int  uhso_bulk_read(struct uhso_port *);
Static int  uhso_bulk_control(struct uhso_port *);
Static void uhso_bulk_intr(struct usbd_xfer *, void *, usbd_status);

Static void uhso_tty_attach(struct uhso_port *);
Static void uhso_tty_detach(struct uhso_port *);
Static void uhso_tty_read_cb(struct usbd_xfer *, void *, usbd_status);
Static void uhso_tty_write_cb(struct usbd_xfer *, void *, usbd_status);

static dev_type_open(uhso_tty_open);
static dev_type_close(uhso_tty_close);
static dev_type_read(uhso_tty_read);
static dev_type_write(uhso_tty_write);
static dev_type_ioctl(uhso_tty_ioctl);
static dev_type_stop(uhso_tty_stop);
static dev_type_tty(uhso_tty_tty);
static dev_type_poll(uhso_tty_poll);

const struct cdevsw uhso_cdevsw = {
        .d_open = uhso_tty_open,
        .d_close = uhso_tty_close,
        .d_read = uhso_tty_read,
        .d_write = uhso_tty_write,
        .d_ioctl = uhso_tty_ioctl,
        .d_stop = uhso_tty_stop,
        .d_tty = uhso_tty_tty,
        .d_poll = uhso_tty_poll,
        .d_mmap = nommap,
        .d_kqfilter = ttykqfilter,
        .d_discard = nodiscard,
        .d_flag = D_TTY
};

Static int  uhso_tty_init(struct uhso_port *);
Static void uhso_tty_clean(struct uhso_port *);
Static int  uhso_tty_do_ioctl(struct uhso_port *, u_long, void *, int, struct lwp *);
Static void uhso_tty_start(struct tty *);
Static int  uhso_tty_param(struct tty *, struct termios *);
Static int  uhso_tty_control(struct uhso_port *, u_long, int);

#define UHSO_UNIT_MASK                TTUNIT_MASK
#define UHSO_PORT_MASK                0x0000f
#define UHSO_DIALOUT_MASK        TTDIALOUT_MASK
#define UHSO_CALLUNIT_MASK        TTCALLUNIT_MASK

#define UHSOUNIT(x)        (TTUNIT(x) >> 4)
#define UHSOPORT(x)        (TTUNIT(x) & UHSO_PORT_MASK)
#define UHSODIALOUT(x)        TTDIALOUT(x)
#define UHSOMINOR(u, p)        ((((u) << 4) & UHSO_UNIT_MASK) | ((p) & UHSO_UNIT_MASK))

Static void uhso_ifnet_attach(struct uhso_softc *, struct usbd_interface *,
    int);
Static int  uhso_ifnet_abort(struct uhso_port *);
Static int  uhso_ifnet_detach(struct uhso_port *);
Static void uhso_ifnet_read_cb(struct usbd_xfer *, void *, usbd_status);
Static void uhso_ifnet_input(struct ifnet *, struct mbuf **, uint8_t *, size_t);
Static void uhso_ifnet_write_cb(struct usbd_xfer *, void *, usbd_status);

Static int  uhso_ifnet_ioctl(struct ifnet *, u_long, void *);
Static int  uhso_ifnet_init(struct uhso_port *);
Static void uhso_ifnet_clean(struct uhso_port *);
Static void uhso_ifnet_start(struct ifnet *);
Static int  uhso_ifnet_output(struct ifnet *, struct mbuf *,
    const struct sockaddr *, const struct rtentry *);


/*******************************************************************************
 *
 *        USB autoconfig
 *
 */

static int
uhso_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        /*
         * don't claim this device if autoswitch is disabled
         * and it is not in modem mode already
         */
        if (!uhso_autoswitch && uaa->uaa_class != UDCLASS_VENDOR)
                return UMATCH_NONE;

        if (uhso_lookup(uaa->uaa_vendor, uaa->uaa_product))
                return UMATCH_VENDOR_PRODUCT;

        return UMATCH_NONE;
}

static void
uhso_attach(device_t parent, device_t self, void *aux)
{
        struct uhso_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_interface *ifh;
        char *devinfop;
        uint8_t count, i, spec;
        usbd_status status;

        DPRINTF(1, ": sc = %p, self=%p", sc, self);

        sc->sc_dev = self;
        sc->sc_udev = uaa->uaa_device;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(uaa->uaa_device, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        status = usbd_set_config_no(sc->sc_udev, UHSO_CONFIG_NO, 1);
        if (status != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(status));
                return;
        }

        if (uaa->uaa_class != UDCLASS_VENDOR) {
                aprint_verbose_dev(self,
                    "Switching device into modem mode..\n");
                if (uhso_switch_mode(uaa->uaa_device) != 0)
                        aprint_error_dev(self, "modem switch failed\n");

                return;
        }

        count = 0;
        (void)usbd_interface_count(sc->sc_udev, &count);
        DPRINTF(1, "interface count %d\n", count);

        for (i = 0; i < count; i++) {
                status = usbd_device2interface_handle(sc->sc_udev, i, &ifh);
                if (status != USBD_NORMAL_COMPLETION) {
                        aprint_error_dev(self,
                            "could not get interface %d: %s\n",
                            i, usbd_errstr(status));

                        return;
                }

                if (!uhso_get_iface_spec(uaa, i, &spec)) {
                        aprint_error_dev(self,
                            "could not get interface %d specification\n", i);

                        return;
                }

                if (ISSET(spec, UHSO_IFACE_MUX))
                        uhso_mux_attach(sc, ifh, UHSOPORT(spec));

                if (ISSET(spec, UHSO_IFACE_BULK))
                        uhso_bulk_attach(sc, ifh, UHSOPORT(spec));

                if (ISSET(spec, UHSO_IFACE_IFNET))
                        uhso_ifnet_attach(sc, ifh, UHSOPORT(spec));
        }

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");
}

static int
uhso_detach(device_t self, int flags)
{
        struct uhso_softc *sc = device_private(self);
        struct uhso_port *hp;
        devmajor_t major;
        devminor_t minor;
        unsigned int i;
        int s;

        pmf_device_deregister(self);

        for (i = 0; i < UHSO_PORT_MAX; i++) {
                hp = sc->sc_port[i];
                if (hp != NULL)
                        (*hp->hp_abort)(hp);
        }

        s = splusb();
        if (sc->sc_refcnt-- > 0) {
                DPRINTF(1, "waiting for refcnt (%d)..\n", sc->sc_refcnt);
                usb_detach_waitold(sc->sc_dev);
        }
        splx(s);

        /*
         * XXX the tty close routine increases/decreases refcnt causing
         * XXX another usb_detach_wakeupold() does it matter, should these
         * XXX be before the detach_wait? or before the abort?
         */

        /* Nuke the vnodes for any open instances (calls close). */
        major = cdevsw_lookup_major(&uhso_cdevsw);
        minor = UHSOMINOR(device_unit(sc->sc_dev), 0);
        vdevgone(major, minor, minor + UHSO_PORT_MAX, VCHR);
        minor = UHSOMINOR(device_unit(sc->sc_dev), 0) | UHSO_DIALOUT_MASK;
        vdevgone(major, minor, minor + UHSO_PORT_MAX, VCHR);
        minor = UHSOMINOR(device_unit(sc->sc_dev), 0) | UHSO_CALLUNIT_MASK;
        vdevgone(major, minor, minor + UHSO_PORT_MAX, VCHR);

        for (i = 0; i < UHSO_PORT_MAX; i++) {
                hp = sc->sc_port[i];
                if (hp != NULL)
                        (*hp->hp_detach)(hp);
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return 0;
}

/*
 * Send SCSI REZERO_UNIT command to switch device into modem mode
 */
Static int
uhso_switch_mode(struct usbd_device *udev)
{
        umass_bbb_cbw_t        cmd;
        usb_endpoint_descriptor_t *ed;
        struct usbd_interface *ifh;
        struct usbd_pipe *pipe;
        struct usbd_xfer *xfer;
        usbd_status status;

        status = usbd_device2interface_handle(udev, 0, &ifh);
        if (status != USBD_NORMAL_COMPLETION)
                return EIO;

        ed = uhso_get_endpoint(ifh, UE_BULK, UE_DIR_OUT);
        if (ed == NULL)
                return ENODEV;

        status = usbd_open_pipe(ifh, ed->bEndpointAddress, 0, &pipe);
        if (status != USBD_NORMAL_COMPLETION)
                return EIO;

        int error = usbd_create_xfer(pipe, sizeof(cmd), 0, 0, &xfer);
        if (error)
                return error;

        USETDW(cmd.dCBWSignature, CBWSIGNATURE);
        USETDW(cmd.dCBWTag, 1);
        USETDW(cmd.dCBWDataTransferLength, 0);
        cmd.bCBWFlags = CBWFLAGS_OUT;
        cmd.bCBWLUN = 0;
        cmd.bCDBLength = 6;

        memset(&cmd.CBWCDB, 0, CBWCDBLENGTH);
        cmd.CBWCDB[0] = SCSI_REZERO_UNIT;

        usbd_setup_xfer(xfer, NULL, &cmd, sizeof(cmd),
                USBD_SYNCHRONOUS, USBD_DEFAULT_TIMEOUT, NULL);

        status = usbd_transfer(xfer);

        usbd_destroy_xfer(xfer);
        usbd_close_pipe(pipe);

        return status == USBD_NORMAL_COMPLETION ? 0 : EIO;
}

Static int
uhso_get_iface_spec(struct usb_attach_arg *uaa, uint8_t ifnum, uint8_t *spec)
{
        const struct uhso_dev *hd;
        uint8_t config[17];
        usb_device_request_t req;
        usbd_status status;

        hd = uhso_lookup(uaa->uaa_vendor, uaa->uaa_product);
        KASSERT(hd != NULL);

        switch (hd->type) {
        case UHSOTYPE_DEFAULT:
                if (ifnum >= __arraycount(uhso_spec_default))
                        break;

                *spec = uhso_spec_default[ifnum];
                return 1;

        case UHSOTYPE_ICON321:
                if (ifnum >= __arraycount(uhso_spec_icon321))
                        break;

                *spec = uhso_spec_icon321[ifnum];
                return 1;

        case UHSOTYPE_CONFIG:
                req.bmRequestType = UT_READ_VENDOR_DEVICE;
                req.bRequest = 0x86;        /* "Config Info" */
                USETW(req.wValue, 0);
                USETW(req.wIndex, 0);
                USETW(req.wLength, sizeof(config));

                status = usbd_do_request(uaa->uaa_device, &req, config);
                if (status != USBD_NORMAL_COMPLETION)
                        break;

                if (ifnum >= __arraycount(config)
                    || config[ifnum] >= __arraycount(uhso_spec_config))
                        break;

                *spec = uhso_spec_config[config[ifnum]];

                /*
                 * Apparently some modems also have a CRC bug that is
                 * indicated by ISSET(config[16], __BIT(0)) but we dont
                 * handle it at this time.
                 */
                return 1;

        default:
                DPRINTF(0, "unknown interface type\n");
                break;
        }

        return 0;
}

Static usb_endpoint_descriptor_t *
uhso_get_endpoint(struct usbd_interface *ifh, int type, int dir)
{
        usb_endpoint_descriptor_t *ed;
        uint8_t count, i;

        count = 0;
        (void)usbd_endpoint_count(ifh, &count);

        for (i = 0; i < count; i++) {
                ed = usbd_interface2endpoint_descriptor(ifh, i);
                if (ed != NULL
                    && UE_GET_XFERTYPE(ed->bmAttributes) == type
                    && UE_GET_DIR(ed->bEndpointAddress) == dir)
                        return ed;
        }

        return NULL;
}


/******************************************************************************
 *
 *        Multiplexed ports signal with the interrupt endpoint to indicate
 *  when data is available for reading, and a separate request is made on
 *  the control endpoint to read or write on each port. The offsets in the
 *  table below relate to bit numbers in the mux mask, identifying each port.
 */

Static const int uhso_mux_port[] = {
        UHSO_PORT_CONTROL,
        UHSO_PORT_APP,
        UHSO_PORT_PCSC,
        UHSO_PORT_GPS,
        UHSO_PORT_APP2,
};

Static void
uhso_mux_attach(struct uhso_softc *sc, struct usbd_interface *ifh, int index)
{
        usbd_desc_iter_t iter;
        const usb_descriptor_t *desc;
        usb_endpoint_descriptor_t *ed;
        struct usbd_pipe *pipe;
        struct uhso_port *hp;
        uint8_t *buf;
        size_t size;
        unsigned int i, mux, flags;
        int addr;
        usbd_status status;

        ed = uhso_get_endpoint(ifh, UE_INTERRUPT, UE_DIR_IN);
        if (ed == NULL) {
                aprint_error_dev(sc->sc_dev, "no interrupt endpoint\n");
                return;
        }
        addr = ed->bEndpointAddress;
        size = UGETW(ed->wMaxPacketSize);

        /*
         * There should be an additional "Class Specific" descriptor on
         * the mux interface containing a single byte with a bitmask of
         * enabled ports. We need to look through the device descriptor
         * to find it and the port index is found from the uhso_mux_port
         * array, above.
         */
        usb_desc_iter_init(sc->sc_udev, &iter);

        /* skip past the current interface descriptor */
        iter.cur = (const uByte *)usbd_get_interface_descriptor(ifh);
        desc = usb_desc_iter_next(&iter);

        for (;;) {
                desc = usb_desc_iter_next(&iter);
                if (desc == NULL
                    || desc->bDescriptorType == UDESC_INTERFACE) {
                        mux = 0;
                        break;        /* not found */
                }

                if (desc->bDescriptorType == UDESC_CS_INTERFACE
                    && desc->bLength == 3) {
                        mux = ((const uint8_t *)desc)[2];
                        break;
                }
        }

        DPRINTF(1, "addr=%d, size=%zd, mux=0x%02x\n", addr, size, mux);

        buf = kmem_alloc(size, KM_SLEEP);
        status = usbd_open_pipe_intr(ifh, addr, USBD_SHORT_XFER_OK, &pipe,
            sc, buf, size, uhso_mux_intr, USBD_DEFAULT_INTERVAL);

        if (status != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(sc->sc_dev,
                    "failed to open interrupt pipe: %s", usbd_errstr(status));

                kmem_free(buf, size);
                return;
        }

        flags = 0;
        for (i = 0; i < __arraycount(uhso_mux_port); i++) {
                if (ISSET(mux, __BIT(i))) {
                        if (sc->sc_port[uhso_mux_port[i]] != NULL) {
                                aprint_error_dev(sc->sc_dev,
                                    "mux port %d is duplicate!\n", i);

                                continue;
                        }

                        hp = kmem_zalloc(sizeof(struct uhso_port), KM_SLEEP);
                        sc->sc_port[uhso_mux_port[i]] = hp;

                        hp->hp_sc = sc;
                        hp->hp_index = i;
                        hp->hp_ipipe = pipe;
                        hp->hp_ibuf = buf;
                        hp->hp_isize = size;
                        hp->hp_flags = flags;
                        hp->hp_abort = uhso_mux_abort;
                        hp->hp_detach = uhso_mux_detach;
                        hp->hp_init = uhso_mux_init;
                        hp->hp_clean = uhso_mux_clean;
                        hp->hp_write = uhso_mux_write;
                        hp->hp_write_cb = uhso_tty_write_cb;
                        hp->hp_read = uhso_mux_read;
                        hp->hp_read_cb = uhso_tty_read_cb;
                        hp->hp_control = uhso_mux_control;
                        hp->hp_wsize = UHSO_MUX_WSIZE;
                        hp->hp_rsize = UHSO_MUX_RSIZE;

                        uhso_tty_attach(hp);

                        aprint_normal_dev(sc->sc_dev,
                            "%s (port %d) attached as mux tty\n",
                            uhso_port_name[uhso_mux_port[i]], uhso_mux_port[i]);

                        /*
                         * As the pipe handle is stored in each mux, mark
                         * secondary references so they don't get released
                         */
                        flags = UHSO_PORT_MUXPIPE;
                }
        }

        if (flags == 0) {
                /* for whatever reasons, nothing was attached */
                usbd_abort_pipe(pipe);
                usbd_close_pipe(pipe);
                kmem_free(buf, size);
        }
}

Static int
uhso_mux_abort(struct uhso_port *hp)
{
        struct uhso_softc *sc = hp->hp_sc;

        DPRINTF(1, "hp=%p\n", hp);

        if (!ISSET(hp->hp_flags, UHSO_PORT_MUXPIPE))
                usbd_abort_pipe(hp->hp_ipipe);

        usbd_abort_default_pipe(sc->sc_udev);

        return (*hp->hp_clean)(hp);
}

Static int
uhso_mux_detach(struct uhso_port *hp)
{

        DPRINTF(1, "hp=%p\n", hp);

        if (!ISSET(hp->hp_flags, UHSO_PORT_MUXPIPE)) {
                DPRINTF(1, "interrupt pipe closed\n");
                usbd_abort_pipe(hp->hp_ipipe);
                usbd_close_pipe(hp->hp_ipipe);
                kmem_free(hp->hp_ibuf, hp->hp_isize);
        }

        uhso_tty_detach(hp);
        kmem_free(hp, sizeof(struct uhso_port));
        return 0;
}

Static int
uhso_mux_init(struct uhso_port *hp)
{

        DPRINTF(1, "hp=%p\n", hp);

        CLR(hp->hp_flags, UHSO_PORT_MUXBUSY | UHSO_PORT_MUXREADY);
        SET(hp->hp_status, TIOCM_DSR | TIOCM_CAR);

        struct uhso_softc *sc = hp->hp_sc;
        struct usbd_pipe *pipe0 = usbd_get_pipe0(sc->sc_udev);
        int error;

        error = usbd_create_xfer(pipe0, hp->hp_rsize, 0, 0, &hp->hp_rxfer);
        if (error)
                return error;

        hp->hp_rbuf = usbd_get_buffer(hp->hp_rxfer);

        error = usbd_create_xfer(pipe0, hp->hp_wsize, 0, 0, &hp->hp_wxfer);
        if (error)
                return error;

        hp->hp_wbuf = usbd_get_buffer(hp->hp_wxfer);

        return 0;
}

Static int
uhso_mux_clean(struct uhso_port *hp)
{

        DPRINTF(1, "hp=%p\n", hp);

        CLR(hp->hp_flags, UHSO_PORT_MUXREADY);
        CLR(hp->hp_status, TIOCM_DTR | TIOCM_DSR | TIOCM_CAR);
        return 0;
}

Static int
uhso_mux_write(struct uhso_port *hp)
{
        struct uhso_softc *sc = hp->hp_sc;
        usb_device_request_t req;
        usbd_status status;

        DPRINTF(5, "hp=%p, index=%d, wlen=%zd\n", hp, hp->hp_index,
            hp->hp_wlen);

        req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
        req.bRequest = UCDC_SEND_ENCAPSULATED_COMMAND;
        USETW(req.wValue, 0);
        USETW(req.wIndex, hp->hp_index);
        USETW(req.wLength, hp->hp_wlen);

        usbd_setup_default_xfer(hp->hp_wxfer, sc->sc_udev, hp, USBD_NO_TIMEOUT,
            &req, hp->hp_wbuf, hp->hp_wlen, 0, hp->hp_write_cb);

        status = usbd_transfer(hp->hp_wxfer);
        if (status != USBD_IN_PROGRESS) {
                DPRINTF(0, "non-normal status %s\n", usbd_errstr(status));
                return EIO;
        }

        sc->sc_refcnt++;
        return 0;
}

Static int
uhso_mux_read(struct uhso_port *hp)
{
        struct uhso_softc *sc = hp->hp_sc;
        usb_device_request_t req;
        usbd_status status;

        CLR(hp->hp_flags, UHSO_PORT_MUXBUSY);

        if (hp->hp_rlen == 0 && !ISSET(hp->hp_flags, UHSO_PORT_MUXREADY))
                return 0;

        SET(hp->hp_flags, UHSO_PORT_MUXBUSY);
        CLR(hp->hp_flags, UHSO_PORT_MUXREADY);

        DPRINTF(5, "hp=%p, index=%d\n", hp, hp->hp_index);

        req.bmRequestType = UT_READ_CLASS_INTERFACE;
        req.bRequest = UCDC_GET_ENCAPSULATED_RESPONSE;
        USETW(req.wValue, 0);
        USETW(req.wIndex, hp->hp_index);
        USETW(req.wLength, hp->hp_rsize);

        usbd_setup_default_xfer(hp->hp_rxfer, sc->sc_udev, hp, USBD_NO_TIMEOUT,
            &req, hp->hp_rbuf, hp->hp_rsize, USBD_SHORT_XFER_OK,
            hp->hp_read_cb);

        status = usbd_transfer(hp->hp_rxfer);
        if (status != USBD_IN_PROGRESS) {
                DPRINTF(0, "non-normal status %s\n", usbd_errstr(status));
                CLR(hp->hp_flags, UHSO_PORT_MUXBUSY);
                return EIO;
        }

        sc->sc_refcnt++;
        return 0;
}

Static int
uhso_mux_control(struct uhso_port *hp)
{

        DPRINTF(1, "hp=%p\n", hp);

        return 0;
}

Static void
uhso_mux_intr(struct usbd_xfer *xfer, void * p, usbd_status status)
{
        struct uhso_softc *sc = p;
        struct uhso_port *hp;
        uint32_t cc;
        uint8_t *buf;
        unsigned int i;

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF(0, "non-normal status %s\n", usbd_errstr(status));
                return;
        }

        usbd_get_xfer_status(xfer, NULL, (void **)&buf, &cc, NULL);
        if (cc == 0)
                return;

        DPRINTF(5, "mux mask 0x%02x, cc=%u\n", buf[0], cc);

        for (i = 0; i < __arraycount(uhso_mux_port); i++) {
                if (!ISSET(buf[0], __BIT(i)))
                        continue;

                DPRINTF(5, "mux %d port %d\n", i, uhso_mux_port[i]);
                hp = sc->sc_port[uhso_mux_port[i]];
                if (hp == NULL
                    || hp->hp_tp == NULL
                    || !ISSET(hp->hp_status, TIOCM_DTR))
                        continue;

                SET(hp->hp_flags, UHSO_PORT_MUXREADY);
                if (ISSET(hp->hp_flags, UHSO_PORT_MUXBUSY))
                        continue;

                uhso_mux_read(hp);
        }
}


/******************************************************************************
 *
 *        Bulk ports operate using the bulk endpoints on an interface, though
 *   the Modem port (at least) may have an interrupt endpoint that will pass
 *   CDC Notification messages with the modem status.
 */

Static void
uhso_bulk_attach(struct uhso_softc *sc, struct usbd_interface *ifh, int index)
{
        usb_endpoint_descriptor_t *ed;
        usb_interface_descriptor_t *id;
        struct uhso_port *hp;
        int in, out;

        ed = uhso_get_endpoint(ifh, UE_BULK, UE_DIR_IN);
        if (ed == NULL) {
                aprint_error_dev(sc->sc_dev, "bulk-in endpoint not found\n");
                return;
        }
        in = ed->bEndpointAddress;

        ed = uhso_get_endpoint(ifh, UE_BULK, UE_DIR_OUT);
        if (ed == NULL) {
                aprint_error_dev(sc->sc_dev, "bulk-out endpoint not found\n");
                return;
        }
        out = ed->bEndpointAddress;

        id = usbd_get_interface_descriptor(ifh);
        if (id == NULL) {
                aprint_error_dev(sc->sc_dev,
                    "interface descriptor not found\n");
                return;
        }

        DPRINTF(1, "bulk endpoints in=%x, out=%x\n", in, out);

        if (sc->sc_port[index] != NULL) {
                aprint_error_dev(sc->sc_dev, "bulk port %d is duplicate!\n",
                    index);

                return;
        }

        hp = kmem_zalloc(sizeof(struct uhso_port), KM_SLEEP);
        sc->sc_port[index] = hp;

        hp->hp_sc = sc;
        hp->hp_ifh = ifh;
        hp->hp_index = id->bInterfaceNumber;
        hp->hp_raddr = in;
        hp->hp_waddr = out;
        hp->hp_abort = uhso_bulk_abort;
        hp->hp_detach = uhso_bulk_detach;
        hp->hp_init = uhso_bulk_init;
        hp->hp_clean = uhso_bulk_clean;
        hp->hp_write = uhso_bulk_write;
        hp->hp_write_cb = uhso_tty_write_cb;
        hp->hp_read = uhso_bulk_read;
        hp->hp_read_cb = uhso_tty_read_cb;
        hp->hp_control = uhso_bulk_control;
        hp->hp_wsize = UHSO_BULK_WSIZE;
        hp->hp_rsize = UHSO_BULK_RSIZE;

        if (index == UHSO_PORT_MODEM) {
                ed = uhso_get_endpoint(ifh, UE_INTERRUPT, UE_DIR_IN);
                if (ed != NULL) {
                        hp->hp_iaddr = ed->bEndpointAddress;
                        hp->hp_isize = UGETW(ed->wMaxPacketSize);
                }
        }

        uhso_tty_attach(hp);

        aprint_normal_dev(sc->sc_dev,
            "%s (port %d) attached as bulk tty\n",
            uhso_port_name[index], index);
}

Static int
uhso_bulk_abort(struct uhso_port *hp)
{

        DPRINTF(1, "hp=%p\n", hp);

        return (*hp->hp_clean)(hp);
}

Static int
uhso_bulk_detach(struct uhso_port *hp)
{

        DPRINTF(1, "hp=%p\n", hp);

        uhso_tty_detach(hp);
        kmem_free(hp, sizeof(struct uhso_port));
        return 0;
}

Static int
uhso_bulk_init(struct uhso_port *hp)
{
        usbd_status status;

        DPRINTF(1, "hp=%p\n", hp);

        if (hp->hp_isize > 0) {
                hp->hp_ibuf = kmem_alloc(hp->hp_isize, KM_SLEEP);

                status = usbd_open_pipe_intr(hp->hp_ifh, hp->hp_iaddr,
                    USBD_SHORT_XFER_OK, &hp->hp_ipipe, hp, hp->hp_ibuf,
                    hp->hp_isize, uhso_bulk_intr, USBD_DEFAULT_INTERVAL);

                if (status != USBD_NORMAL_COMPLETION) {
                        DPRINTF(0, "interrupt pipe open failed: %s\n",
                            usbd_errstr(status));

                        return EIO;
                }
        }

        status = usbd_open_pipe(hp->hp_ifh, hp->hp_raddr, 0, &hp->hp_rpipe);
        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF(0, "read pipe open failed: %s\n", usbd_errstr(status));
                return EIO;
        }

        status = usbd_open_pipe(hp->hp_ifh, hp->hp_waddr, 0, &hp->hp_wpipe);
        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF(0, "write pipe open failed: %s\n", usbd_errstr(status));
                return EIO;
        }

        int error = usbd_create_xfer(hp->hp_rpipe, hp->hp_rsize,
            0, 0, &hp->hp_rxfer);
        if (error)
                return error;

        hp->hp_rbuf = usbd_get_buffer(hp->hp_rxfer);

        error = usbd_create_xfer(hp->hp_wpipe, hp->hp_wsize, 0, 0,
            &hp->hp_wxfer);
        if (error)
                return error;
        hp->hp_wbuf = usbd_get_buffer(hp->hp_wxfer);

        return 0;
}

Static int
uhso_bulk_clean(struct uhso_port *hp)
{

        DPRINTF(1, "hp=%p\n", hp);

        if (hp->hp_ipipe != NULL) {
                usbd_abort_pipe(hp->hp_ipipe);
                usbd_close_pipe(hp->hp_ipipe);
                hp->hp_ipipe = NULL;
        }

        if (hp->hp_ibuf != NULL) {
                kmem_free(hp->hp_ibuf, hp->hp_isize);
                hp->hp_ibuf = NULL;
        }

        if (hp->hp_rpipe != NULL) {
                usbd_abort_pipe(hp->hp_rpipe);
        }

        if (hp->hp_wpipe != NULL) {
                usbd_abort_pipe(hp->hp_wpipe);
        }

        if (hp->hp_rxfer != NULL) {
                usbd_destroy_xfer(hp->hp_rxfer);
                hp->hp_rxfer = NULL;
                hp->hp_rbuf = NULL;
        }

        if (hp->hp_wxfer != NULL) {
                usbd_destroy_xfer(hp->hp_wxfer);
                hp->hp_wxfer = NULL;
                hp->hp_wbuf = NULL;
        }

        if (hp->hp_rpipe != NULL) {
                usbd_close_pipe(hp->hp_rpipe);
                hp->hp_rpipe = NULL;
        }

        if (hp->hp_wpipe != NULL) {
                usbd_close_pipe(hp->hp_wpipe);
                hp->hp_wpipe = NULL;
        }

        return 0;
}

Static int
uhso_bulk_write(struct uhso_port *hp)
{
        struct uhso_softc *sc = hp->hp_sc;
        usbd_status status;

        DPRINTF(5, "hp=%p, wlen=%zd\n", hp, hp->hp_wlen);

        usbd_setup_xfer(hp->hp_wxfer, hp, hp->hp_wbuf, hp->hp_wlen, 0,
             USBD_NO_TIMEOUT, hp->hp_write_cb);

        status = usbd_transfer(hp->hp_wxfer);
        if (status != USBD_IN_PROGRESS) {
                DPRINTF(0, "non-normal status %s\n", usbd_errstr(status));
                return EIO;
        }

        sc->sc_refcnt++;
        return 0;
}

Static int
uhso_bulk_read(struct uhso_port *hp)
{
        struct uhso_softc *sc = hp->hp_sc;
        usbd_status status;

        DPRINTF(5, "hp=%p\n", hp);

        usbd_setup_xfer(hp->hp_rxfer, hp, hp->hp_rbuf, hp->hp_rsize,
            USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, hp->hp_read_cb);

        status = usbd_transfer(hp->hp_rxfer);
        if (status != USBD_IN_PROGRESS) {
                DPRINTF(0, "non-normal status %s\n", usbd_errstr(status));
                return EIO;
        }

        sc->sc_refcnt++;
        return 0;
}

Static int
uhso_bulk_control(struct uhso_port *hp)
{
        struct uhso_softc *sc = hp->hp_sc;
        usb_device_request_t req;
        usbd_status status;
        int val;

        DPRINTF(1, "hp=%p\n", hp);

        if (hp->hp_isize == 0)
                return 0;

        val = 0;
        if (ISSET(hp->hp_status, TIOCM_DTR))
                SET(val, UCDC_LINE_DTR);
        if (ISSET(hp->hp_status, TIOCM_RTS))
                SET(val, UCDC_LINE_RTS);

        req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
        req.bRequest = UCDC_SET_CONTROL_LINE_STATE;
        USETW(req.wValue, val);
        USETW(req.wIndex, hp->hp_index);
        USETW(req.wLength, 0);

        sc->sc_refcnt++;

        status = usbd_do_request(sc->sc_udev, &req, NULL);

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF(0, "non-normal status %s\n", usbd_errstr(status));
                return EIO;
        }

        return 0;
}

Static void
uhso_bulk_intr(struct usbd_xfer *xfer, void * p, usbd_status status)
{
        struct uhso_port *hp = p;
        struct tty *tp = hp->hp_tp;
        usb_cdc_notification_t *msg;
        uint32_t cc;
        int s, old;

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF(0, "non-normal status %s\n", usbd_errstr(status));
                return;
        }

        usbd_get_xfer_status(xfer, NULL, (void **)&msg, &cc, NULL);

        if (cc < UCDC_NOTIFICATION_LENGTH
            || msg->bmRequestType != UCDC_NOTIFICATION
            || msg->bNotification != UCDC_N_SERIAL_STATE
            || UGETW(msg->wValue) != 0
            || UGETW(msg->wIndex) != hp->hp_index
            || UGETW(msg->wLength) < 1)
                return;

        DPRINTF(5, "state=%02x\n", msg->data[0]);

        old = hp->hp_status;
        CLR(hp->hp_status, TIOCM_RNG | TIOCM_DSR | TIOCM_CAR);
        if (ISSET(msg->data[0], UCDC_N_SERIAL_RI))
                SET(hp->hp_status, TIOCM_RNG);
        if (ISSET(msg->data[0], UCDC_N_SERIAL_DSR))
                SET(hp->hp_status, TIOCM_DSR);
        if (ISSET(msg->data[0], UCDC_N_SERIAL_DCD))
                SET(hp->hp_status, TIOCM_CAR);

        if (ISSET(hp->hp_status ^ old, TIOCM_CAR)) {
                s = spltty();
                tp->t_linesw->l_modem(tp, ISSET(hp->hp_status, TIOCM_CAR));
                splx(s);
        }

        if (ISSET((hp->hp_status ^ old), TIOCM_RNG | TIOCM_DSR | TIOCM_CAR))
                DPRINTF(1, "RNG %s, DSR %s, DCD %s\n",
                    (ISSET(hp->hp_status, TIOCM_RNG) ? "on" : "off"),
                    (ISSET(hp->hp_status, TIOCM_DSR) ? "on" : "off"),
                    (ISSET(hp->hp_status, TIOCM_CAR) ? "on" : "off"));
}


/******************************************************************************
 *
 *        TTY management
 *
 */

Static void
uhso_tty_attach(struct uhso_port *hp)
{
        struct tty *tp;

        tp = tty_alloc();
        tp->t_oproc = uhso_tty_start;
        tp->t_param = uhso_tty_param;

        hp->hp_tp = tp;
        tty_attach(tp);

        DPRINTF(1, "hp=%p, tp=%p\n", hp, tp);
}

Static void
uhso_tty_detach(struct uhso_port *hp)
{

        DPRINTF(1, "hp=%p\n", hp);

        uhso_tty_clean(hp);

        tty_detach(hp->hp_tp);
        tty_free(hp->hp_tp);
        hp->hp_tp = NULL;
}

Static void
uhso_tty_write_cb(struct usbd_xfer *xfer, void * p, usbd_status status)
{
        struct uhso_port *hp = p;
        struct uhso_softc *sc = hp->hp_sc;
        struct tty *tp = hp->hp_tp;
        uint32_t cc;
        int s;

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF(0, "non-normal status %s\n", usbd_errstr(status));

                if (status == USBD_STALLED && hp->hp_wpipe != NULL)
                        usbd_clear_endpoint_stall_async(hp->hp_wpipe);
                else
                        return;
        } else {
                usbd_get_xfer_status(xfer, NULL, NULL, &cc, NULL);

                DPRINTF(5, "wrote %d bytes (of %zd)\n", cc, hp->hp_wlen);
                if (cc != hp->hp_wlen)
                        DPRINTF(0, "cc=%u, wlen=%zd\n", cc, hp->hp_wlen);
        }

        s = spltty();
        CLR(tp->t_state, TS_BUSY);
        tp->t_linesw->l_start(tp);
        splx(s);
}

Static void
uhso_tty_read_cb(struct usbd_xfer *xfer, void * p, usbd_status status)
{
        struct uhso_port *hp = p;
        struct uhso_softc *sc = hp->hp_sc;
        struct tty *tp = hp->hp_tp;
        uint8_t *cp;
        uint32_t cc;
        int s;

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF(0, "non-normal status: %s\n", usbd_errstr(status));

                if (status == USBD_STALLED && hp->hp_rpipe != NULL)
                        usbd_clear_endpoint_stall_async(hp->hp_rpipe);
                else
                        return;

                hp->hp_rlen = 0;
        } else {
                usbd_get_xfer_status(xfer, NULL, (void **)&cp, &cc, NULL);

                hp->hp_rlen = cc;
                DPRINTF(5, "read %d bytes\n", cc);

                s = spltty();
                while (cc > 0) {
                        if (tp->t_linesw->l_rint(*cp++, tp) == -1) {
                                DPRINTF(0, "lost %d bytes\n", cc);
                                break;
                        }

                        cc--;
                }
                splx(s);
        }

        (*hp->hp_read)(hp);
}


/******************************************************************************
 *
 *        TTY subsystem
 *
 */

static int
uhso_tty_open(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct uhso_softc *sc;
        struct uhso_port *hp;
        struct tty *tp;
        int error, s;

        DPRINTF(1, "unit %d port %d\n", UHSOUNIT(dev), UHSOPORT(dev));

        sc = device_lookup_private(&uhso_cd, UHSOUNIT(dev));
        if (sc == NULL
            || !device_is_active(sc->sc_dev)
            || UHSOPORT(dev) >= UHSO_PORT_MAX)
                return ENXIO;

        hp = sc->sc_port[UHSOPORT(dev)];
        if (hp == NULL || hp->hp_tp == NULL)
                return ENXIO;

        tp = hp->hp_tp;
        if (kauth_authorize_device_tty(l->l_cred, KAUTH_DEVICE_TTY_OPEN, tp))
                return EBUSY;

        error = 0;
        s = spltty();
        if (!ISSET(tp->t_state, TS_ISOPEN) && tp->t_wopen == 0) {
                tp->t_dev = dev;
                error = uhso_tty_init(hp);
        }
        splx(s);

        if (error == 0) {
                error = ttyopen(tp, UHSODIALOUT(dev), ISSET(flag, O_NONBLOCK));
                if (error == 0) {
                        error = tp->t_linesw->l_open(dev, tp);
                }
        }

        if (!ISSET(tp->t_state, TS_ISOPEN) && tp->t_wopen == 0)
                uhso_tty_clean(hp);

        DPRINTF(1, "sc=%p, hp=%p, tp=%p, error=%d\n", sc, hp, tp, error);

        return error;
}

Static int
uhso_tty_init(struct uhso_port *hp)
{
        struct tty *tp = hp->hp_tp;
        struct termios t;
        int error;

        DPRINTF(1, "sc=%p, hp=%p, tp=%p\n", sc, hp, tp);

        /*
         * Initialize the termios status to the defaults.  Add in the
         * sticky bits from TIOCSFLAGS.
         */
        t.c_ispeed = 0;
        t.c_ospeed = TTYDEF_SPEED;
        t.c_cflag = TTYDEF_CFLAG;
        if (ISSET(hp->hp_swflags, TIOCFLAG_CLOCAL))
                SET(t.c_cflag, CLOCAL);
        if (ISSET(hp->hp_swflags, TIOCFLAG_CRTSCTS))
                SET(t.c_cflag, CRTSCTS);
        if (ISSET(hp->hp_swflags, TIOCFLAG_MDMBUF))
                SET(t.c_cflag, MDMBUF);

        /* Ensure uhso_tty_param() will do something. */
        tp->t_ospeed = 0;
        (void)uhso_tty_param(tp, &t);

        tp->t_iflag = TTYDEF_IFLAG;
        tp->t_oflag = TTYDEF_OFLAG;
        tp->t_lflag = TTYDEF_LFLAG;
        ttychars(tp);
        ttsetwater(tp);

        hp->hp_status = 0;
        error = (*hp->hp_init)(hp);
        if (error != 0)
                return error;

        /*
         * Turn on DTR.  We must always do this, even if carrier is not
         * present, because otherwise we'd have to use TIOCSDTR
         * immediately after setting CLOCAL, which applications do not
         * expect.  We always assert DTR while the port is open
         * unless explicitly requested to deassert it.  Ditto RTS.
         */
        uhso_tty_control(hp, TIOCMBIS, TIOCM_DTR | TIOCM_RTS);

        /* and start reading */
        error = (*hp->hp_read)(hp);
        if (error != 0)
                return error;

        return 0;
}

static int
uhso_tty_close(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct uhso_softc *sc = device_lookup_private(&uhso_cd, UHSOUNIT(dev));
        struct uhso_port *hp = sc->sc_port[UHSOPORT(dev)];
        struct tty *tp = hp->hp_tp;

        if (!ISSET(tp->t_state, TS_ISOPEN))
                return 0;

        DPRINTF(1, "sc=%p, hp=%p, tp=%p\n", sc, hp, tp);

        sc->sc_refcnt++;

        tp->t_linesw->l_close(tp, flag);
        ttyclose(tp);

        if (!ISSET(tp->t_state, TS_ISOPEN) && tp->t_wopen == 0)
                uhso_tty_clean(hp);

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        return 0;
}

Static void
uhso_tty_clean(struct uhso_port *hp)
{

        DPRINTF(1, "hp=%p\n", hp);

        if (ISSET(hp->hp_status, TIOCM_DTR)
            && ISSET(hp->hp_tp->t_cflag, HUPCL))
                uhso_tty_control(hp, TIOCMBIC, TIOCM_DTR);

        (*hp->hp_clean)(hp);

        if (hp->hp_rxfer != NULL) {
                usbd_destroy_xfer(hp->hp_rxfer);
                hp->hp_rxfer = NULL;
                hp->hp_rbuf = NULL;
        }

        if (hp->hp_wxfer != NULL) {
                usbd_destroy_xfer(hp->hp_wxfer);
                hp->hp_wxfer = NULL;
                hp->hp_wbuf = NULL;
        }
}

static int
uhso_tty_read(dev_t dev, struct uio *uio, int flag)
{
        struct uhso_softc *sc = device_lookup_private(&uhso_cd, UHSOUNIT(dev));
        struct uhso_port *hp = sc->sc_port[UHSOPORT(dev)];
        struct tty *tp = hp->hp_tp;
        int error;

        if (!device_is_active(sc->sc_dev))
                return EIO;

        DPRINTF(5, "sc=%p, hp=%p, tp=%p\n", sc, hp, tp);

        sc->sc_refcnt++;

        error = tp->t_linesw->l_read(tp, uio, flag);

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        return error;
}

static int
uhso_tty_write(dev_t dev, struct uio *uio, int flag)
{
        struct uhso_softc *sc = device_lookup_private(&uhso_cd, UHSOUNIT(dev));
        struct uhso_port *hp = sc->sc_port[UHSOPORT(dev)];
        struct tty *tp = hp->hp_tp;
        int error;

        if (!device_is_active(sc->sc_dev))
                return EIO;

        DPRINTF(5, "sc=%p, hp=%p, tp=%p\n", sc, hp, tp);

        sc->sc_refcnt++;

        error = tp->t_linesw->l_write(tp, uio, flag);

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        return error;
}

static int
uhso_tty_ioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        struct uhso_softc *sc = device_lookup_private(&uhso_cd, UHSOUNIT(dev));
        struct uhso_port *hp = sc->sc_port[UHSOPORT(dev)];
        int error;

        if (!device_is_active(sc->sc_dev))
                return EIO;

        DPRINTF(1, "sc=%p, hp=%p\n", sc, hp);

        sc->sc_refcnt++;

        error = uhso_tty_do_ioctl(hp, cmd, data, flag, l);

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        return error;
}

Static int
uhso_tty_do_ioctl(struct uhso_port *hp, u_long cmd, void *data, int flag,
    struct lwp *l)
{
        struct tty *tp = hp->hp_tp;
        int error, s;

        error = tp->t_linesw->l_ioctl(tp, cmd, data, flag, l);
        if (error != EPASSTHROUGH)
                return error;

        error = ttioctl(tp, cmd, data, flag, l);
        if (error != EPASSTHROUGH)
                return error;

        error = 0;

        s = spltty();

        switch (cmd) {
        case TIOCSDTR:
                error = uhso_tty_control(hp, TIOCMBIS, TIOCM_DTR);
                break;

        case TIOCCDTR:
                error = uhso_tty_control(hp, TIOCMBIC, TIOCM_DTR);
                break;

        case TIOCGFLAGS:
                *(int *)data = hp->hp_swflags;
                break;

        case TIOCSFLAGS:
                error = kauth_authorize_device_tty(l->l_cred,
                    KAUTH_DEVICE_TTY_PRIVSET, tp);

                if (error)
                        break;

                hp->hp_swflags = *(int *)data;
                break;

        case TIOCMSET:
        case TIOCMBIS:
        case TIOCMBIC:
                error = uhso_tty_control(hp, cmd, *(int *)data);
                break;

        case TIOCMGET:
                *(int *)data = hp->hp_status;
                break;

        default:
                error = EPASSTHROUGH;
                break;
        }

        splx(s);

        return error;
}

/* this is called with tty_lock held */
static void
uhso_tty_stop(struct tty *tp, int flag)
{
#if 0
        struct uhso_softc *sc = device_lookup_private(&uhso_cd,
            UHSOUNIT(tp->t_dev));
        struct uhso_port *hp = sc->sc_port[UHSOPORT(tp->t_dev)];
#endif
}

static struct tty *
uhso_tty_tty(dev_t dev)
{
        struct uhso_softc *sc = device_lookup_private(&uhso_cd, UHSOUNIT(dev));
        struct uhso_port *hp = sc->sc_port[UHSOPORT(dev)];

        return hp->hp_tp;
}

static int
uhso_tty_poll(dev_t dev, int events, struct lwp *l)
{
        struct uhso_softc *sc = device_lookup_private(&uhso_cd, UHSOUNIT(dev));
        struct uhso_port *hp = sc->sc_port[UHSOPORT(dev)];
        struct tty *tp = hp->hp_tp;
        int revents;

        if (!device_is_active(sc->sc_dev))
                return POLLHUP;

        sc->sc_refcnt++;

        revents = tp->t_linesw->l_poll(tp, events, l);

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        return revents;
}

Static int
uhso_tty_param(struct tty *tp, struct termios *t)
{
        struct uhso_softc *sc = device_lookup_private(&uhso_cd,
            UHSOUNIT(tp->t_dev));
        struct uhso_port *hp = sc->sc_port[UHSOPORT(tp->t_dev)];

        if (!device_is_active(sc->sc_dev))
                return EIO;

        DPRINTF(1, "hp=%p, tp=%p, termios iflag=%x, oflag=%x, cflag=%x\n",
            hp, tp, t->c_iflag, t->c_oflag, t->c_cflag);

        /* Check requested parameters. */
        if (t->c_ispeed != 0
            && t->c_ispeed != t->c_ospeed)
                return EINVAL;

        /* force CLOCAL and !HUPCL for console */
        if (ISSET(hp->hp_swflags, TIOCFLAG_SOFTCAR)) {
                SET(t->c_cflag, CLOCAL);
                CLR(t->c_cflag, HUPCL);
        }

        /* If there were no changes, don't do anything.  */
        if (tp->t_ospeed == t->c_ospeed
            && tp->t_cflag == t->c_cflag)
                return 0;

        tp->t_ispeed = 0;
        tp->t_ospeed = t->c_ospeed;
        tp->t_cflag = t->c_cflag;

        /* update tty layers idea of carrier bit */
        tp->t_linesw->l_modem(tp, ISSET(hp->hp_status, TIOCM_CAR));
        return 0;
}

/* this is called with tty_lock held */
Static void
uhso_tty_start(struct tty *tp)
{
        struct uhso_softc *sc = device_lookup_private(&uhso_cd,
            UHSOUNIT(tp->t_dev));
        struct uhso_port *hp = sc->sc_port[UHSOPORT(tp->t_dev)];
        int s;

        if (!device_is_active(sc->sc_dev))
                return;

        s = spltty();

        if (!ISSET(tp->t_state, TS_BUSY | TS_TIMEOUT | TS_TTSTOP)
            && ttypull(tp) != 0) {
                hp->hp_wlen = q_to_b(&tp->t_outq, hp->hp_wbuf, hp->hp_wsize);
                if (hp->hp_wlen > 0) {
                        SET(tp->t_state, TS_BUSY);
                        (*hp->hp_write)(hp);
                }
        }

        splx(s);
}

Static int
uhso_tty_control(struct uhso_port *hp, u_long cmd, int bits)
{

        bits &= (TIOCM_DTR | TIOCM_RTS);
        DPRINTF(1, "cmd %s, DTR=%d, RTS=%d\n",
            (cmd == TIOCMBIC ? "BIC" : (cmd == TIOCMBIS ? "BIS" : "SET")),
            (bits & TIOCM_DTR) ? 1 : 0,
            (bits & TIOCM_RTS) ? 1 : 0);

        switch (cmd) {
        case TIOCMBIC:
                CLR(hp->hp_status, bits);
                break;

        case TIOCMBIS:
                SET(hp->hp_status, bits);
                break;

        case TIOCMSET:
                CLR(hp->hp_status, TIOCM_DTR | TIOCM_RTS);
                SET(hp->hp_status, bits);
                break;
        }

        return (*hp->hp_control)(hp);
}


/******************************************************************************
 *
 *        Network Interface
 *
 */

Static void
uhso_ifnet_attach(struct uhso_softc *sc, struct usbd_interface *ifh, int index)
{
        usb_endpoint_descriptor_t *ed;
        struct uhso_port *hp;
        struct ifnet *ifp;
        int in, out;

        ed = uhso_get_endpoint(ifh, UE_BULK, UE_DIR_IN);
        if (ed == NULL) {
                aprint_error_dev(sc->sc_dev,
                    "could not find bulk-in endpoint\n");

                return;
        }
        in = ed->bEndpointAddress;

        ed = uhso_get_endpoint(ifh, UE_BULK, UE_DIR_OUT);
        if (ed == NULL) {
                aprint_error_dev(sc->sc_dev,
                    "could not find bulk-out endpoint\n");

                return;
        }
        out = ed->bEndpointAddress;

        DPRINTF(1, "in=%d, out=%d\n", in, out);

        if (sc->sc_port[index] != NULL) {
                aprint_error_dev(sc->sc_dev,
                    "ifnet port %d is duplicate!\n", index);

                return;
        }

        hp = kmem_zalloc(sizeof(struct uhso_port), KM_SLEEP);
        sc->sc_port[index] = hp;

        ifp = if_alloc(IFT_IP);
        strlcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);
        ifp->if_softc = hp;
        ifp->if_mtu = UHSO_IFNET_MTU;
        ifp->if_dlt = DLT_RAW;
        ifp->if_type = IFT_IP;
        ifp->if_flags = IFF_NOARP | IFF_SIMPLEX;
        ifp->if_ioctl = uhso_ifnet_ioctl;
        ifp->if_start = uhso_ifnet_start;
        ifp->if_output = uhso_ifnet_output;
        IFQ_SET_READY(&ifp->if_snd);

        hp->hp_sc = sc;
        hp->hp_ifp = ifp;
        hp->hp_ifh = ifh;
        hp->hp_raddr = in;
        hp->hp_waddr = out;
        hp->hp_abort = uhso_ifnet_abort;
        hp->hp_detach = uhso_ifnet_detach;
        hp->hp_init = uhso_bulk_init;
        hp->hp_clean = uhso_bulk_clean;
        hp->hp_write = uhso_bulk_write;
        hp->hp_write_cb = uhso_ifnet_write_cb;
        hp->hp_read = uhso_bulk_read;
        hp->hp_read_cb = uhso_ifnet_read_cb;
        hp->hp_wsize = MCLBYTES;
        hp->hp_rsize = MCLBYTES;

        if_attach(ifp);
        if_alloc_sadl(ifp);
        bpf_attach(ifp, DLT_RAW, 0);

        aprint_normal_dev(sc->sc_dev, "%s (port %d) attached as ifnet\n",
            uhso_port_name[index], index);
}

Static int
uhso_ifnet_abort(struct uhso_port *hp)
{
        struct ifnet *ifp = hp->hp_ifp;

        /* All ifnet IO will abort when IFF_RUNNING is not set */
        CLR(ifp->if_flags, IFF_RUNNING);

        return (*hp->hp_clean)(hp);
}

Static int
uhso_ifnet_detach(struct uhso_port *hp)
{
        struct ifnet *ifp = hp->hp_ifp;
        int s;

        s = splnet();
        bpf_detach(ifp);
        if_detach(ifp);
        if_free(ifp);
        splx(s);

        kmem_free(hp, sizeof(struct uhso_port));
        return 0;
}

Static void
uhso_ifnet_write_cb(struct usbd_xfer *xfer, void * p, usbd_status status)
{
        struct uhso_port *hp = p;
        struct uhso_softc *sc= hp->hp_sc;
        struct ifnet *ifp = hp->hp_ifp;
        uint32_t cc;
        int s;

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        if (!ISSET(ifp->if_flags, IFF_RUNNING))
                return;

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF(0, "non-normal status %s\n", usbd_errstr(status));

                if (status == USBD_STALLED && hp->hp_wpipe != NULL)
                        usbd_clear_endpoint_stall_async(hp->hp_wpipe);
                else
                        return;

                if_statinc(ifp, if_oerrors);
        } else {
                usbd_get_xfer_status(xfer, NULL, NULL, &cc, NULL);
                DPRINTF(5, "wrote %d bytes (of %zd)\n", cc, hp->hp_wlen);

                if (cc != hp->hp_wlen)
                        DPRINTF(0, "cc=%u, wlen=%zd\n", cc, hp->hp_wlen);

                if_statinc(ifp, if_opackets);
        }

        s = splnet();
        CLR(ifp->if_flags, IFF_OACTIVE);
        ifp->if_start(ifp);
        splx(s);
}

Static void
uhso_ifnet_read_cb(struct usbd_xfer *xfer, void * p,
    usbd_status status)
{
        struct uhso_port *hp = p;
        struct uhso_softc *sc= hp->hp_sc;
        struct ifnet *ifp = hp->hp_ifp;
        void *cp;
        uint32_t cc;

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        if (!ISSET(ifp->if_flags, IFF_RUNNING))
                return;

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF(0, "non-normal status: %s\n", usbd_errstr(status));

                if (status == USBD_STALLED && hp->hp_rpipe != NULL)
                        usbd_clear_endpoint_stall_async(hp->hp_rpipe);
                else
                        return;

                if_statinc(ifp, if_ierrors);
                hp->hp_rlen = 0;
        } else {
                usbd_get_xfer_status(xfer, NULL, (void **)&cp, &cc, NULL);

                hp->hp_rlen = cc;
                DPRINTF(5, "read %d bytes\n", cc);

                uhso_ifnet_input(ifp, &hp->hp_mbuf, cp, cc);
        }

        (*hp->hp_read)(hp);
}

Static void
uhso_ifnet_input(struct ifnet *ifp, struct mbuf **mb, uint8_t *cp, size_t cc)
{
        struct mbuf *m;
        size_t got, len, want;
        int s;

        /*
         * Several IP packets might be in the same buffer, we need to
         * separate them before handing it to the ip-stack.  We might
         * also receive partial packets which we need to defer until
         * we get more data.
         */
        while (cc > 0) {
                if (*mb == NULL) {
                        MGETHDR(m, M_DONTWAIT, MT_DATA);
                        if (m == NULL) {
                                aprint_error_ifnet(ifp, "no mbufs\n");
                                if_statinc(ifp, if_ierrors);
                                break;
                        }

                        MCLGET(m, M_DONTWAIT);
                        if (!ISSET(m->m_flags, M_EXT)) {
                                aprint_error_ifnet(ifp, "no mbuf clusters\n");
                                if_statinc(ifp, if_ierrors);
                                m_freem(m);
                                break;
                        }

                        got = 0;
                } else {
                        m = *mb;
                        *mb = NULL;
                        got = m->m_pkthdr.len;
                }

                /* make sure that the incoming packet is ok */
                if (got == 0)
                        mtod(m, uint8_t *)[0] = cp[0];

                want = mtod(m, struct ip *)->ip_hl << 2;
                if (mtod(m, struct ip *)->ip_v != 4
                    || want != sizeof(struct ip)) {
                        aprint_error_ifnet(ifp,
                            "bad IP header (v=%d, hl=%zd)\n",
                            mtod(m, struct ip *)->ip_v, want);

                        if_statinc(ifp, if_ierrors);
                        m_freem(m);
                        break;
                }

                /* ensure we have the IP header.. */
                if (got < want) {
                        len = MIN(want - got, cc);
                        memcpy(mtod(m, uint8_t *) + got, cp, len);
                        got += len;
                        cc -= len;
                        cp += len;

                        if (got < want) {
                                DPRINTF(5, "waiting for IP header "
                                           "(got %zd want %zd)\n", got, want);

                                m->m_pkthdr.len = got;
                                *mb = m;
                                break;
                        }
                }

                /* ..and the packet body */
                want = ntohs(mtod(m, struct ip *)->ip_len);
                if (got < want) {
                        len = MIN(want - got, cc);
                        memcpy(mtod(m, uint8_t *) + got, cp, len);
                        got += len;
                        cc -= len;
                        cp += len;

                        if (got < want) {
                                DPRINTF(5, "waiting for IP packet "
                                           "(got %zd want %zd)\n", got, want);

                                m->m_pkthdr.len = got;
                                *mb = m;
                                break;
                        }
                }

                m_set_rcvif(m, ifp);
                m->m_pkthdr.len = m->m_len = got;

                s = splnet();

                bpf_mtap(ifp, m, BPF_D_IN);

                if (__predict_false(!pktq_enqueue(ip_pktq, m, 0))) {
                        m_freem(m);
                } else {
                        if_statadd2(ifp, if_ipackets, 1, if_ibytes, got);
                }
                splx(s);
        }
}

Static int
uhso_ifnet_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
        struct uhso_port *hp = ifp->if_softc;
        int error, s;

        s = splnet();

        switch (cmd) {
        case SIOCINITIFADDR:
                switch (((struct ifaddr *)data)->ifa_addr->sa_family) {
#ifdef INET
                case AF_INET:
                        if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
                                SET(ifp->if_flags, IFF_UP);
                                error = uhso_ifnet_init(hp);
                                if (error != 0) {
                                        uhso_ifnet_clean(hp);
                                        break;
                                }

                                SET(ifp->if_flags, IFF_RUNNING);
                                DPRINTF(1, "hp=%p, ifp=%p INITIFADDR\n", hp,
                                    ifp);
                                break;
                        }

                        error = 0;
                        break;
#endif

                default:
                        error = EAFNOSUPPORT;
                        break;
                }
                break;

        case SIOCSIFMTU:
                if (((struct ifreq *)data)->ifr_mtu > hp->hp_wsize) {
                        error = EINVAL;
                        break;
                }

                error = ifioctl_common(ifp, cmd, data);
                if (error == ENETRESET)
                        error = 0;

                break;

        case SIOCSIFFLAGS:
                error = ifioctl_common(ifp, cmd, data);
                if (error != 0)
                        break;

                switch (ifp->if_flags & (IFF_UP | IFF_RUNNING)) {
                case IFF_UP:
                        error = uhso_ifnet_init(hp);
                        if (error != 0) {
                                uhso_ifnet_clean(hp);
                                break;
                        }

                        SET(ifp->if_flags, IFF_RUNNING);
                        DPRINTF(1, "hp=%p, ifp=%p RUNNING\n", hp, ifp);
                        break;

                case IFF_RUNNING:
                        uhso_ifnet_clean(hp);
                        CLR(ifp->if_flags, IFF_RUNNING);
                        DPRINTF(1, "hp=%p, ifp=%p STOPPED\n", hp, ifp);
                        break;

                default:
                        break;
                }
                break;

        default:
                error = ifioctl_common(ifp, cmd, data);
                break;
        }

        splx(s);

        return error;
}

/* is only called if IFF_RUNNING not set */
Static int
uhso_ifnet_init(struct uhso_port *hp)
{
        struct uhso_softc *sc = hp->hp_sc;
        int error;

        DPRINTF(1, "sc=%p, hp=%p\n", sc, hp);

        if (!device_is_active(sc->sc_dev))
                return EIO;

        error = (*hp->hp_init)(hp);
        if (error != 0)
                return error;

        error = (*hp->hp_read)(hp);
        if (error != 0)
                return error;

        return 0;
}

Static void
uhso_ifnet_clean(struct uhso_port *hp)
{

        DPRINTF(1, "hp=%p\n", hp);

        (*hp->hp_clean)(hp);
}

/* called at splnet() with IFF_OACTIVE not set */
Static void
uhso_ifnet_start(struct ifnet *ifp)
{
        struct uhso_port *hp = ifp->if_softc;
        struct mbuf *m;

        KASSERT(!ISSET(ifp->if_flags, IFF_OACTIVE));

        if (!ISSET(ifp->if_flags, IFF_RUNNING))
                return;

        if (IFQ_IS_EMPTY(&ifp->if_snd)) {
                DPRINTF(5, "finished sending\n");
                return;
        }

        SET(ifp->if_flags, IFF_OACTIVE);
        IFQ_DEQUEUE(&ifp->if_snd, m);
        hp->hp_wlen = m->m_pkthdr.len;
        if (hp->hp_wlen > hp->hp_wsize) {
                aprint_error_ifnet(ifp,
                    "packet too long (%zd > %zd), truncating\n",
                    hp->hp_wlen, hp->hp_wsize);

                hp->hp_wlen = hp->hp_wsize;
        }

        bpf_mtap(ifp, m, BPF_D_OUT);

        m_copydata(m, 0, hp->hp_wlen, hp->hp_wbuf);
        m_freem(m);

        if ((*hp->hp_write)(hp) != 0) {
                if_statinc(ifp, if_oerrors);
                CLR(ifp->if_flags, IFF_OACTIVE);
        }
}

Static int
uhso_ifnet_output(struct ifnet *ifp, struct mbuf *m,
    const struct sockaddr *dst, const struct rtentry *rt0)
{
        int error;

        if (!ISSET(ifp->if_flags, IFF_RUNNING))
                return EIO;

        IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);

        switch (dst->sa_family) {
#ifdef INET
        case AF_INET:
                error = ifq_enqueue(ifp, m);
                break;
#endif

        default:
                DPRINTF(0, "unsupported address family %d\n", dst->sa_family);
                error = EAFNOSUPPORT;
                m_freem(m);
                break;
        }

        return error;
}




















































































































































































































































































































































































































































   44 











   44 



































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
/*        $NetBSD: tcp_var.h,v 1.196 2021/07/31 20:29:37 andvar Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 *      @(#)COPYRIGHT   1.1 (NRL) 17 January 1995
 *
 * NRL grants permission for redistribution and use in source and binary
 * forms, with or without modification, of the software and documentation
 * created at NRL provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgements:
 *      This product includes software developed by the University of
 *      California, Berkeley and its contributors.
 *      This product includes software developed at the Information
 *      Technology Division, US Naval Research Laboratory.
 * 4. Neither the name of the NRL nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * The views and conclusions contained in the software and documentation
 * are those of the authors and should not be interpreted as representing
 * official policies, either expressed or implied, of the US Naval
 * Research Laboratory (NRL).
 */

/*-
 * Copyright (c) 1997, 1998, 1999, 2001, 2005 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1993, 1994, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)tcp_var.h        8.4 (Berkeley) 5/24/95
 */

#ifndef _NETINET_TCP_VAR_H_
#define _NETINET_TCP_VAR_H_

#if defined(_KERNEL_OPT)
#include "opt_inet.h"
#include "opt_mbuftrace.h"

#endif

/*
 * TCP kernel structures and variables.
 */

#include <sys/callout.h>

#ifdef TCP_SIGNATURE
/*
 * Defines which are needed by the xform_tcp module and tcp_[in|out]put
 * for SADB verification and lookup.
 */
#define        TCP_SIGLEN        16        /* length of computed digest in bytes */
#define        TCP_KEYLEN_MIN        1        /* minimum length of TCP-MD5 key */
#define        TCP_KEYLEN_MAX        80        /* maximum length of TCP-MD5 key */
/*
 * Only a single SA per host may be specified at this time. An SPI is
 * needed in order for the KEY_LOOKUP_SA() lookup to work.
 */
#define        TCP_SIG_SPI        0x1000
#endif /* TCP_SIGNATURE */

/*
 * Tcp+ip header, after ip options removed.
 */
struct tcpiphdr {
        struct ipovly ti_i;                /* overlaid ip structure */
        struct tcphdr ti_t;                /* tcp header */
};
#ifdef CTASSERT
CTASSERT(sizeof(struct tcpiphdr) == 40);
#endif
#define        ti_x1                ti_i.ih_x1
#define        ti_pr                ti_i.ih_pr
#define        ti_len                ti_i.ih_len
#define        ti_src                ti_i.ih_src
#define        ti_dst                ti_i.ih_dst
#define        ti_sport        ti_t.th_sport
#define        ti_dport        ti_t.th_dport
#define        ti_seq                ti_t.th_seq
#define        ti_ack                ti_t.th_ack
#define        ti_x2                ti_t.th_x2
#define        ti_off                ti_t.th_off
#define        ti_flags        ti_t.th_flags
#define        ti_win                ti_t.th_win
#define        ti_sum                ti_t.th_sum
#define        ti_urp                ti_t.th_urp

/*
 * SACK option block.
 */
struct sackblk {
        tcp_seq left;                /* Left edge of sack block. */
        tcp_seq right;                /* Right edge of sack block. */
};

TAILQ_HEAD(sackhead, sackhole);
struct sackhole {
        tcp_seq start;
        tcp_seq end;
        tcp_seq rxmit;

        TAILQ_ENTRY(sackhole) sackhole_q;
};

/*
 * Tcp control block, one per tcp; fields:
 */
struct tcpcb {
        int        t_family;                /* address family on the wire */
        struct ipqehead segq;                /* sequencing queue */
        int        t_segqlen;                /* length of the above */
        callout_t t_timer[TCPT_NTIMERS];/* tcp timers */
        short        t_state;                /* state of this connection */
        short        t_rxtshift;                /* log(2) of rexmt exp. backoff */
        uint32_t t_rxtcur;                /* current retransmit value */
        short        t_dupacks;                /* consecutive dup acks recd */
        /*
         * t_partialacks:
         *        <0        not in fast recovery.
         *        ==0        in fast recovery.  has not received partial acks
         *        >0        in fast recovery.  has received partial acks
         */
        short        t_partialacks;                /* partials acks during fast rexmit */
        u_short        t_peermss;                /* peer's maximum segment size */
        u_short        t_ourmss;                /* our's maximum segment size */
        u_short t_segsz;                /* current segment size in use */
        char        t_force;                /* 1 if forcing out a byte */
        u_int        t_flags;
#define        TF_ACKNOW        0x0001                /* ack peer immediately */
#define        TF_DELACK        0x0002                /* ack, but try to delay it */
#define        TF_NODELAY        0x0004                /* don't delay packets to coalesce */
#define        TF_NOOPT        0x0008                /* don't use tcp options */
#define        TF_REQ_SCALE        0x0020                /* have/will request window scaling */
#define        TF_RCVD_SCALE        0x0040                /* other side has requested scaling */
#define        TF_REQ_TSTMP        0x0080                /* have/will request timestamps */
#define        TF_RCVD_TSTMP        0x0100                /* a timestamp was received in SYN */
#define        TF_SACK_PERMIT        0x0200                /* other side said I could SACK */
#define        TF_SYN_REXMT        0x0400                /* rexmit timer fired on SYN */
#define        TF_WILL_SACK        0x0800                /* try to use SACK */
#define        TF_REASSEMBLING        0x1000                /* we're busy reassembling */
#define        TF_DEAD                0x2000                /* dead and to-be-released */
#define        TF_PMTUD_PEND        0x4000                /* Path MTU Discovery pending */
#define        TF_ECN_PERMIT        0x10000                /* other side said is ECN-ready */
#define        TF_ECN_SND_CWR        0x20000                /* ECN CWR in queue */
#define        TF_ECN_SND_ECE        0x40000                /* ECN ECE in queue */
#define        TF_SIGNATURE        0x400000        /* require MD5 digests (RFC2385) */


        struct        mbuf *t_template;        /* skeletal packet for transmit */
        struct        inpcb *t_inpcb;                /* back pointer to internet pcb */
        struct        in6pcb *t_in6pcb;        /* back pointer to internet pcb */
        callout_t t_delack_ch;                /* delayed ACK callout */
/*
 * The following fields are used as in the protocol specification.
 * See RFC793, Dec. 1981, page 21.
 */
/* send sequence variables */
        tcp_seq        snd_una;                /* send unacknowledged */
        tcp_seq        snd_nxt;                /* send next */
        tcp_seq        snd_up;                        /* send urgent pointer */
        tcp_seq        snd_wl1;                /* window update seg seq number */
        tcp_seq        snd_wl2;                /* window update seg ack number */
        tcp_seq        iss;                        /* initial send sequence number */
        u_long        snd_wnd;                /* send window */
/*
 * snd_recover
 *         it's basically same as the "recover" variable in RFC 2852 (NewReno).
 *         when entering fast retransmit, it's set to snd_max.
 *         newreno uses this to detect partial ack.
 * snd_high
 *         it's basically same as the "send_high" variable in RFC 2852 (NewReno).
 *         on each RTO, it's set to snd_max.
 *         newreno uses this to avoid false fast retransmits.
 */
        tcp_seq snd_recover;
        tcp_seq        snd_high;
/* receive sequence variables */
        u_long        rcv_wnd;                /* receive window */
        tcp_seq        rcv_nxt;                /* receive next */
        tcp_seq        rcv_up;                        /* receive urgent pointer */
        tcp_seq        irs;                        /* initial receive sequence number */
/*
 * Additional variables for this implementation.
 */
/* receive variables */
        tcp_seq        rcv_adv;                /* advertised window */

/*
 * retransmit variables
 *
 * snd_max
 *         the highest sequence number we've ever sent.
 *        used to recognize retransmits.
 */
        tcp_seq        snd_max;

/* congestion control (for slow start, source quench, retransmit after loss) */
        u_long        snd_cwnd;                /* congestion-controlled window */
        u_long        snd_ssthresh;                /* snd_cwnd size threshold for
                                         * for slow start exponential to
                                         * linear switch
                                         */
/* auto-sizing variables */
        u_int rfbuf_cnt;                /* recv buffer autoscaling byte count */
        uint32_t rfbuf_ts;                /* recv buffer autoscaling timestamp */

/*
 * transmit timing stuff.  See below for scale of srtt and rttvar.
 * "Variance" is actually smoothed difference.
 */
        uint32_t t_rcvtime;                /* time last segment received */
        uint32_t t_rtttime;                /* time we started measuring rtt */
        tcp_seq        t_rtseq;                /* sequence number being timed */
        int32_t        t_srtt;                        /* smoothed round-trip time */
        int32_t        t_rttvar;                /* variance in round-trip time */
        uint32_t t_rttmin;                /* minimum rtt allowed */
        u_long        max_sndwnd;                /* largest window peer has offered */

/* out-of-band data */
        char        t_oobflags;                /* have some */
        char        t_iobc;                        /* input character */
#define        TCPOOB_HAVEDATA        0x01
#define        TCPOOB_HADDATA        0x02
        short        t_softerror;                /* possible error not yet reported */

/* RFC 1323 variables */
        u_char        snd_scale;                /* window scaling for send window */
        u_char        rcv_scale;                /* window scaling for recv window */
        u_char        request_r_scale;        /* pending window scaling */
        u_char        requested_s_scale;
        u_int32_t ts_recent;                /* timestamp echo data */
        u_int32_t ts_recent_age;        /* when last updated */
        u_int32_t ts_timebase;                /* our timebase */
        tcp_seq        last_ack_sent;

/* RFC 3465 variables */
        u_long        t_bytes_acked;                /* ABC "bytes_acked" parameter */

/* SACK stuff */
#define TCP_SACK_MAX 3
#define TCPSACK_NONE 0
#define TCPSACK_HAVED 1
        u_char rcv_sack_flags;                /* SACK flags. */
        struct sackblk rcv_dsack_block;        /* RX D-SACK block. */
        struct ipqehead timeq;                /* time sequenced queue. */
        struct sackhead snd_holes;        /* TX SACK holes. */
        int        snd_numholes;                /* Number of TX SACK holes. */
        tcp_seq rcv_lastsack;                /* last seq number(+1) sack'd by rcv'r*/
        tcp_seq sack_newdata;                /* New data xmitted in this recovery
                                           episode starts at this seq number*/
        tcp_seq snd_fack;                /* FACK TCP.  Forward-most data held by
                                           peer. */

/* CUBIC variables */
        ulong snd_cubic_wmax;                /* W_max */
        ulong snd_cubic_wmax_last;        /* Used for fast convergence */
        ulong snd_cubic_ctime;                /* Last congestion time */

/* pointer for syn cache entries*/
        LIST_HEAD(, syn_cache) t_sc;        /* list of entries by this tcb */

/* prediction of next mbuf when using large window sizes */
        struct        mbuf *t_lastm;                /* last mbuf that data was sent from */
        int        t_inoff;                /* data offset in previous mbuf */
        int        t_lastoff;                /* last data address in mbuf chain */
        int        t_lastlen;                /* last length read from mbuf chain */

/* Path-MTU discovery blackhole detection */
        int t_mtudisc;                        /* perform mtudisc for this tcb */
/* Path-MTU Discovery Information */
        u_int        t_pmtud_mss_acked;        /* MSS acked, lower bound for MTU */
        u_int        t_pmtud_mtu_sent;        /* MTU used, upper bound for MTU */
        tcp_seq        t_pmtud_th_seq;                /* TCP SEQ from ICMP payload */
        u_int        t_pmtud_nextmtu;        /* Advertised Next-Hop MTU from ICMP */
        u_short        t_pmtud_ip_len;                /* IP length from ICMP payload */
        u_short        t_pmtud_ip_hl;                /* IP header length from ICMP payload */

        uint8_t t_ecn_retries;                /* # of ECN setup retries */
        
        const struct tcp_congctl *t_congctl;        /* per TCB congctl algorithm */

        /* Keepalive per socket */
        u_int        t_keepinit;
        u_int        t_keepidle;
        u_int        t_keepintvl;
        u_int        t_keepcnt;
        u_int        t_maxidle;                /* t_keepcnt * t_keepintvl */

        u_int        t_msl;                        /* MSL to use for this connexion */

        /* maintain a few stats per connection: */
        uint32_t t_rcvoopack;                 /* out-of-order packets received */
        uint32_t t_sndrexmitpack;         /* retransmit packets sent */
        uint32_t t_sndzerowin;                /* zero-window updates sent */
};

/*
 * Macros to aid ECN TCP.
 */
#define TCP_ECN_ALLOWED(tp)        (tp->t_flags & TF_ECN_PERMIT)

/*
 * Macros to aid SACK/FACK TCP.
 */
#define TCP_SACK_ENABLED(tp)        (tp->t_flags & TF_WILL_SACK)
#define TCP_FACK_FASTRECOV(tp)        \
        (TCP_SACK_ENABLED(tp) && \
        (SEQ_GT(tp->snd_fack, tp->snd_una + tcprexmtthresh * tp->t_segsz)))

#ifdef _KERNEL
/*
 * TCP reassembly queue locks.
 */
static __inline int tcp_reass_lock_try (struct tcpcb *)
        __unused;
static __inline void tcp_reass_unlock (struct tcpcb *)
        __unused;

static __inline int
tcp_reass_lock_try(struct tcpcb *tp)
{
        int s;

        /*
         * Use splvm() -- we're blocking things that would cause
         * mbuf allocation.
         */
        s = splvm();
        if (tp->t_flags & TF_REASSEMBLING) {
                splx(s);
                return (0);
        }
        tp->t_flags |= TF_REASSEMBLING;
        splx(s);
        return (1);
}

static __inline void
tcp_reass_unlock(struct tcpcb *tp)
{
        int s;

        s = splvm();
        KASSERT((tp->t_flags & TF_REASSEMBLING) != 0);
        tp->t_flags &= ~TF_REASSEMBLING;
        splx(s);
}

#ifdef DIAGNOSTIC
#define        TCP_REASS_LOCK(tp)                                                \
do {                                                                        \
        if (tcp_reass_lock_try(tp) == 0) {                                \
                printf("%s:%d: tcpcb %p reass already locked\n",        \
                    __FILE__, __LINE__, tp);                                \
                panic("tcp_reass_lock");                                \
        }                                                                \
} while (/*CONSTCOND*/ 0)
#define        TCP_REASS_LOCK_CHECK(tp)                                        \
do {                                                                        \
        if (((tp)->t_flags & TF_REASSEMBLING) == 0) {                        \
                printf("%s:%d: tcpcb %p reass lock not held\n",                \
                    __FILE__, __LINE__, tp);                                \
                panic("tcp reass lock check");                                \
        }                                                                \
} while (/*CONSTCOND*/ 0)
#else
#define        TCP_REASS_LOCK(tp)        (void) tcp_reass_lock_try((tp))
#define        TCP_REASS_LOCK_CHECK(tp) /* nothing */
#endif

#define        TCP_REASS_UNLOCK(tp)        tcp_reass_unlock((tp))
#endif /* _KERNEL */

/*
 * Queue for delayed ACK processing.
 */
#ifdef _KERNEL
extern int tcp_delack_ticks;
void        tcp_delack(void *);

#define TCP_RESTART_DELACK(tp)                                                \
        callout_reset(&(tp)->t_delack_ch, tcp_delack_ticks,                \
            tcp_delack, tp)

#define        TCP_SET_DELACK(tp)                                                \
do {                                                                        \
        if (((tp)->t_flags & TF_DELACK) == 0) {                                \
                (tp)->t_flags |= TF_DELACK;                                \
                TCP_RESTART_DELACK(tp);                                        \
        }                                                                \
} while (/*CONSTCOND*/0)

#define        TCP_CLEAR_DELACK(tp)                                                \
do {                                                                        \
        if ((tp)->t_flags & TF_DELACK) {                                \
                (tp)->t_flags &= ~TF_DELACK;                                \
                callout_stop(&(tp)->t_delack_ch);                        \
        }                                                                \
} while (/*CONSTCOND*/0)
#endif /* _KERNEL */

/*
 * Compute the current timestamp for a connection.
 */
#define        TCP_TIMESTAMP(tp)        (tcp_now - (tp)->ts_timebase)

/*
 * Handy way of passing around TCP option info.
 */
struct tcp_opt_info {
        int                ts_present;
        u_int32_t        ts_val;
        u_int32_t        ts_ecr;
        u_int16_t        maxseg;
};

#define        TOF_SIGNATURE        0x0040                /* signature option present */
#define        TOF_SIGLEN        0x0080                /* sigature length valid (RFC2385) */

/*
 * Data for the TCP compressed state engine.
 */
union syn_cache_sa {
        struct sockaddr sa;
        struct sockaddr_in sin;
#if 1 /*def INET6*/
        struct sockaddr_in6 sin6;
#endif
};

struct syn_cache {
        TAILQ_ENTRY(syn_cache) sc_bucketq;        /* link on bucket list */
        callout_t sc_timer;                        /* rexmt timer */
        struct route sc_route;
        long sc_win;                                /* advertised window */
        int sc_bucketidx;                        /* our bucket index */
        u_int32_t sc_hash;
        u_int32_t sc_timestamp;                        /* timestamp from SYN */
        u_int32_t sc_timebase;                        /* our local timebase */
        union syn_cache_sa sc_src;
        union syn_cache_sa sc_dst;
        tcp_seq sc_irs;
        tcp_seq sc_iss;
        u_int sc_rxtcur;                        /* current rxt timeout */
        u_int sc_rxttot;                        /* total time spend on queues */
        u_short sc_rxtshift;                        /* for computing backoff */
        u_short sc_flags;

#define        SCF_UNREACH                0x0001                /* we've had an unreach error */
#define        SCF_TIMESTAMP                0x0002                /* peer will do timestamps */
#define        SCF_DEAD                0x0004                /* this entry to be released */
#define SCF_SACK_PERMIT                0x0008                /* peer will do SACK */
#define SCF_ECN_PERMIT                0x0010                /* peer will do ECN */
#define SCF_SIGNATURE        0x40                        /* send MD5 digests */

        struct mbuf *sc_ipopts;                        /* IP options */
        u_int16_t sc_peermaxseg;
        u_int16_t sc_ourmaxseg;
        u_int8_t sc_request_r_scale        : 4,
                 sc_requested_s_scale        : 4;

        struct tcpcb *sc_tp;                        /* tcb for listening socket */
        LIST_ENTRY(syn_cache) sc_tpq;                /* list of entries by same tp */
};

struct syn_cache_head {
        TAILQ_HEAD(, syn_cache) sch_bucket;        /* bucket entries */
        u_short sch_length;                        /* # entries in bucket */
};

#define        intotcpcb(ip)        ((struct tcpcb *)(ip)->inp_ppcb)
#ifdef INET6
#define        in6totcpcb(ip)        ((struct tcpcb *)(ip)->in6p_ppcb)
#endif
#ifndef INET6
#define        sototcpcb(so)        (intotcpcb(sotoinpcb(so)))
#else
#define        sototcpcb(so)        (((so)->so_proto->pr_domain->dom_family == AF_INET) \
                                ? intotcpcb(sotoinpcb(so)) \
                                : in6totcpcb(sotoin6pcb(so)))
#endif

/*
 * See RFC2988 for a discussion of RTO calculation; comments assume
 * familiarity with that document.
 *
 * The smoothed round-trip time and estimated variance are stored as
 * fixed point numbers.  Historically, srtt was scaled by
 * TCP_RTT_SHIFT bits, and rttvar by TCP_RTTVAR_SHIFT bits.  Because
 * the values coincide with the alpha and beta parameters suggested
 * for RTO calculation (1/8 for srtt, 1/4 for rttvar), the combination
 * of computing 1/8 of the new value and transforming it to the
 * fixed-point representation required zero instructions.  However,
 * the storage representations no longer coincide with the alpha/beta
 * shifts; instead, more fractional bits are present.
 *
 * The storage representation of srtt is 1/32 slow ticks, or 1/64 s.
 * (The assumption that a slow tick is 500 ms should not be present in
 * the code.)
 *
 * The storage representation of rttvar is 1/16 slow ticks, or 1/32 s.
 * There may be some confusion about this in the code.
 *
 * For historical reasons, these scales are also used in smoothing the
 * average (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed).
 * This results in alpha of 0.125 and beta of 0.25, following RFC2988
 * section 2.3
 *
 * XXX Change SHIFT values to LGWEIGHT and REP_SHIFT, and adjust
 * the code to use the correct ones.
 */
#define        TCP_RTT_SHIFT                3        /* shift for srtt; 3 bits frac. */
#define        TCP_RTTVAR_SHIFT        2        /* multiplier for rttvar; 2 bits */

/*
 * Compute TCP retransmission timer, following RFC2988.
 * This macro returns a value in slow timeout ticks.
 *
 * Section 2.2 requires that the RTO value be
 *  srtt + max(G, 4*RTTVAR)
 * where G is the clock granularity.
 *
 * This comment has not necessarily been updated for the new storage
 * representation:
 *
 * Because of the way we do the smoothing, srtt and rttvar
 * will each average +1/2 tick of bias.  When we compute
 * the retransmit timer, we want 1/2 tick of rounding and
 * 1 extra tick because of +-1/2 tick uncertainty in the
 * firing of the timer.  The bias will give us exactly the
 * 1.5 tick we need.  But, because the bias is
 * statistical, we have to test that we don't drop below
 * the minimum feasible timer (which is 2 ticks).
 * This macro assumes that the value of 1<<TCP_RTTVAR_SHIFT
 * is the same as the multiplier for rttvar.
 *
 * This macro appears to be wrong; it should be checking rttvar*4 in
 * ticks and making sure we use 1 instead if rttvar*4 rounds to 0.  It
 * appears to be treating srtt as being in the old storage
 * representation, resulting in a factor of 4 extra.
 */
#define        TCP_REXMTVAL(tp) \
        ((((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar) >> 2)

/*
 * Compute the initial window for slow start.
 */
#define        TCP_INITIAL_WINDOW(iw, segsz) \
        uimin((iw) * (segsz), uimax(2 * (segsz), tcp_init_win_max[(iw)]))

/*
 * TCP statistics.
 * Each counter is an unsigned 64-bit value.
 *
 * Many of these should be kept per connection, but that's inconvenient
 * at the moment.
 */
#define        TCP_STAT_CONNATTEMPT        0        /* connections initiated */
#define        TCP_STAT_ACCEPTS        1        /* connections accepted */
#define        TCP_STAT_CONNECTS        2        /* connections established */
#define        TCP_STAT_DROPS                3        /* connections dropped */
#define        TCP_STAT_CONNDROPS        4        /* embryonic connections dropped */
#define        TCP_STAT_CLOSED                5        /* conn. closed (includes drops) */
#define        TCP_STAT_SEGSTIMED        6        /* segs where we tried to get rtt */
#define        TCP_STAT_RTTUPDATED        7        /* times we succeeded */
#define        TCP_STAT_DELACK                8        /* delayed ACKs sent */
#define        TCP_STAT_TIMEOUTDROP        9        /* conn. dropped in rxmt timeout */
#define        TCP_STAT_REXMTTIMEO        10        /* retransmit timeouts */
#define        TCP_STAT_PERSISTTIMEO        11        /* persist timeouts */
#define        TCP_STAT_KEEPTIMEO        12        /* keepalive timeouts */
#define        TCP_STAT_KEEPPROBE        13        /* keepalive probes sent */
#define        TCP_STAT_KEEPDROPS        14        /* connections dropped in keepalive */
#define        TCP_STAT_PERSISTDROPS        15        /* connections dropped in persist */
#define        TCP_STAT_CONNSDRAINED        16        /* connections drained due to memory
                                           shortage */
#define        TCP_STAT_PMTUBLACKHOLE        17        /* PMTUD blackhole detected */
#define        TCP_STAT_SNDTOTAL        18        /* total packets sent */
#define        TCP_STAT_SNDPACK        19        /* data packlets sent */
#define        TCP_STAT_SNDBYTE        20        /* data bytes sent */
#define        TCP_STAT_SNDREXMITPACK        21        /* data packets retransmitted */
#define        TCP_STAT_SNDREXMITBYTE        22        /* data bytes retransmitted */
#define        TCP_STAT_SNDACKS        23        /* ACK-only packets sent */
#define        TCP_STAT_SNDPROBE        24        /* window probes sent */
#define        TCP_STAT_SNDURG                25        /* packets sent with URG only */
#define        TCP_STAT_SNDWINUP        26        /* window update-only packets sent */
#define        TCP_STAT_SNDCTRL        27        /* control (SYN|FIN|RST) packets sent */
#define        TCP_STAT_RCVTOTAL        28        /* total packets received */
#define        TCP_STAT_RCVPACK        29        /* packets received in sequence */
#define        TCP_STAT_RCVBYTE        30        /* bytes received in sequence */
#define        TCP_STAT_RCVBADSUM        31        /* packets received with cksum errs */
#define        TCP_STAT_RCVBADOFF        32        /* packets received with bad offset */
#define        TCP_STAT_RCVMEMDROP        33        /* packets dropped for lack of memory */
#define        TCP_STAT_RCVSHORT        34        /* packets received too short */
#define        TCP_STAT_RCVDUPPACK        35        /* duplicate-only packets received */
#define        TCP_STAT_RCVDUPBYTE        36        /* duplicate-only bytes received */
#define        TCP_STAT_RCVPARTDUPPACK        37        /* packets with some duplicate data */
#define        TCP_STAT_RCVPARTDUPBYTE        38        /* dup. bytes in part-dup. packets */
#define        TCP_STAT_RCVOOPACK        39        /* out-of-order packets received */
#define        TCP_STAT_RCVOOBYTE        40        /* out-of-order bytes received */
#define        TCP_STAT_RCVPACKAFTERWIN 41        /* packets with data after window */
#define        TCP_STAT_RCVBYTEAFTERWIN 42        /* bytes received after window */
#define        TCP_STAT_RCVAFTERCLOSE        43        /* packets received after "close" */
#define        TCP_STAT_RCVWINPROBE        44        /* rcvd window probe packets */
#define        TCP_STAT_RCVDUPACK        45        /* rcvd duplicate ACKs */
#define        TCP_STAT_RCVACKTOOMUCH        46        /* rcvd ACKs for unsent data */
#define        TCP_STAT_RCVACKPACK        47        /* rcvd ACK packets */
#define        TCP_STAT_RCVACKBYTE        48        /* bytes ACKed by rcvd ACKs */
#define        TCP_STAT_RCVWINUPD        49        /* rcvd window update packets */
#define        TCP_STAT_PAWSDROP        50        /* segments dropped due to PAWS */
#define        TCP_STAT_PREDACK        51        /* times hdr predict OK for ACKs */
#define        TCP_STAT_PREDDAT        52        /* times hdr predict OK for data pkts */
#define        TCP_STAT_PCBHASHMISS        53        /* input packets missing PCB hash */
#define        TCP_STAT_NOPORT                54        /* no socket on port */
#define        TCP_STAT_BADSYN                55        /* received ACK for which we have
                                           no SYN in compressed state */
#define        TCP_STAT_DELAYED_FREE        56        /* delayed pool_put() of tcpcb */
#define        TCP_STAT_SC_ADDED        57        /* # of sc entries added */
#define        TCP_STAT_SC_COMPLETED        58        /* # of sc connections completed */
#define        TCP_STAT_SC_TIMED_OUT        59        /* # of sc entries timed out */
#define        TCP_STAT_SC_OVERFLOWED        60        /* # of sc drops due to overflow */
#define        TCP_STAT_SC_RESET        61        /* # of sc drops due to RST */
#define        TCP_STAT_SC_UNREACH        62        /* # of sc drops due to ICMP unreach */
#define        TCP_STAT_SC_BUCKETOVERFLOW 63        /* # of sc drops due to bucket ovflow */
#define        TCP_STAT_SC_ABORTED        64        /* # of sc entries aborted (no mem) */
#define        TCP_STAT_SC_DUPESYN        65        /* # of duplicate SYNs received */
#define        TCP_STAT_SC_DROPPED        66        /* # of SYNs dropped (no route/mem) */
#define        TCP_STAT_SC_COLLISIONS        67        /* # of sc hash collisions */
#define        TCP_STAT_SC_RETRANSMITTED 68        /* # of sc retransmissions */
#define        TCP_STAT_SC_DELAYED_FREE 69        /* # of delayed pool_put()s */
#define        TCP_STAT_SELFQUENCH        70        /* # of ENOBUFS we get on output */
#define        TCP_STAT_BADSIG                71        /* # of drops due to bad signature */
#define        TCP_STAT_GOODSIG        72        /* # of packets with good signature */
#define        TCP_STAT_ECN_SHS        73        /* # of successful ECN handshakes */
#define        TCP_STAT_ECN_CE                74        /* # of packets with CE bit */
#define        TCP_STAT_ECN_ECT        75        /* # of packets with ECT(0) bit */

#define        TCP_NSTATS                76

/*
 * Names for TCP sysctl objects.
 */
#define        TCPCTL_RFC1323                1        /* RFC1323 timestamps/scaling */
#define        TCPCTL_SENDSPACE        2        /* default send buffer */
#define        TCPCTL_RECVSPACE        3        /* default recv buffer */
#define        TCPCTL_MSSDFLT                4        /* default seg size */
#define        TCPCTL_SYN_CACHE_LIMIT        5        /* max size of comp. state engine */
#define        TCPCTL_SYN_BUCKET_LIMIT        6        /* max size of hash bucket */
#if 0        /*obsoleted*/
#define        TCPCTL_SYN_CACHE_INTER        7        /* interval of comp. state timer */
#endif
#define        TCPCTL_INIT_WIN                8        /* initial window */
#define        TCPCTL_MSS_IFMTU        9        /* mss from interface, not in_maxmtu */
#define        TCPCTL_SACK                10        /* RFC2018 selective acknowledgement */
#define        TCPCTL_WSCALE                11        /* RFC1323 window scaling */
#define        TCPCTL_TSTAMP                12        /* RFC1323 timestamps */
#if 0        /*obsoleted*/
#define        TCPCTL_COMPAT_42        13        /* 4.2BSD TCP bug work-arounds */
#endif
#define        TCPCTL_CWM                14        /* Congestion Window Monitoring */
#define        TCPCTL_CWM_BURSTSIZE        15        /* burst size allowed by CWM */
#define        TCPCTL_ACK_ON_PUSH        16        /* ACK immediately on PUSH */
#define        TCPCTL_KEEPIDLE                17        /* keepalive idle time */
#define        TCPCTL_KEEPINTVL        18        /* keepalive probe interval */
#define        TCPCTL_KEEPCNT                19        /* keepalive count */
#define        TCPCTL_SLOWHZ                20        /* PR_SLOWHZ (read-only) */
#define        TCPCTL_NEWRENO                21        /* NewReno Congestion Control */
#define TCPCTL_LOG_REFUSED        22        /* Log refused connections */
#if 0        /*obsoleted*/
#define        TCPCTL_RSTRATELIMIT        23        /* RST rate limit */
#endif
#define        TCPCTL_RSTPPSLIMIT        24        /* RST pps limit */
#define        TCPCTL_DELACK_TICKS        25        /* # ticks to delay ACK */
#define        TCPCTL_INIT_WIN_LOCAL        26        /* initial window for local nets */
#define        TCPCTL_IDENT                27        /* rfc 931 identd */
#define        TCPCTL_ACKDROPRATELIMIT        28        /* SYN/RST -> ACK rate limit */
#define        TCPCTL_LOOPBACKCKSUM        29        /* do TCP checksum on loopback */
#define        TCPCTL_STATS                30        /* TCP statistics */
#define        TCPCTL_DEBUG                31        /* TCP debug sockets */
#define        TCPCTL_DEBX                32        /* # of tcp debug sockets */
#define        TCPCTL_DROP                33        /* drop tcp connection */
#define        TCPCTL_MSL                34        /* Max Segment Life */

#ifdef _KERNEL

extern        struct inpcbtable tcbtable;        /* head of queue of active tcpcb's */
extern        const struct pr_usrreqs tcp_usrreqs;

extern        u_int32_t tcp_now;        /* for RFC 1323 timestamps */
extern        int tcp_do_rfc1323;        /* enabled/disabled? */
extern        int tcp_do_sack;        /* SACK enabled/disabled? */
extern        int tcp_do_win_scale;        /* RFC1323 window scaling enabled/disabled? */
extern        int tcp_do_timestamps;        /* RFC1323 timestamps enabled/disabled? */
extern        int tcp_mssdflt;        /* default seg size */
extern        int tcp_minmss;                /* minimal seg size */
extern  int tcp_msl;                /* max segment life */
extern        int tcp_init_win;        /* initial window */
extern        int tcp_init_win_local;        /* initial window for local nets */
extern        int tcp_init_win_max[11];/* max sizes for values of tcp_init_win_* */
extern        int tcp_mss_ifmtu;        /* take MSS from interface, not in_maxmtu */
extern        int tcp_cwm;                /* enable Congestion Window Monitoring */
extern        int tcp_cwm_burstsize;        /* burst size allowed by CWM */
extern        int tcp_ack_on_push;        /* ACK immediately on PUSH */
extern        int tcp_syn_cache_limit; /* max entries for compressed state engine */
extern        int tcp_syn_bucket_limit;/* max entries per hash bucket */
extern        int tcp_log_refused;        /* log refused connections */
extern        int tcp_do_ecn;                /* TCP ECN enabled/disabled? */
extern        int tcp_ecn_maxretries;        /* Max ECN setup retries */
extern        int tcp_do_rfc1948;        /* ISS by cryptographic hash */
extern int tcp_sack_tp_maxholes;        /* Max holes per connection. */
extern int tcp_sack_globalmaxholes;        /* Max holes per system. */
extern int tcp_sack_globalholes;        /* Number of holes present. */
extern int tcp_do_abc;                        /* RFC3465 ABC enabled/disabled? */
extern int tcp_abc_aggressive;                /* 1: L=2*SMSS  0: L=1*SMSS */

extern int tcp_msl_enable;                /* enable TIME_WAIT truncation        */
extern int tcp_msl_loop;                /* MSL for loopback                */
extern int tcp_msl_local;                /* MSL for 'local'                */
extern int tcp_msl_remote;                /* MSL otherwise                */
extern int tcp_msl_remote_threshold;        /* RTT threshold                */
extern int tcp_rttlocal;                /* Use RTT to decide who's 'local' */
extern int tcp4_vtw_enable;
extern int tcp6_vtw_enable;
extern int tcp_vtw_was_enabled;
extern int tcp_vtw_entries;

extern        int tcp_rst_ppslim;
extern        int tcp_ackdrop_ppslim;

extern        int tcp_syn_cache_size;
extern        struct syn_cache_head tcp_syn_cache[];
extern        u_long syn_cache_count;

#ifdef MBUFTRACE
extern        struct mowner tcp_rx_mowner;
extern        struct mowner tcp_tx_mowner;
extern        struct mowner tcp_reass_mowner;
extern        struct mowner tcp_sock_mowner;
extern        struct mowner tcp_sock_rx_mowner;
extern        struct mowner tcp_sock_tx_mowner;
extern        struct mowner tcp_mowner;
#endif

extern int tcp_do_autorcvbuf;
extern int tcp_autorcvbuf_inc;
extern int tcp_autorcvbuf_max;
extern int tcp_do_autosndbuf;
extern int tcp_autosndbuf_inc;
extern int tcp_autosndbuf_max;

struct secasvar;

void         tcp_canceltimers(struct tcpcb *);
struct tcpcb *
         tcp_close(struct tcpcb *);
int         tcp_isdead(struct tcpcb *);
#ifdef INET6
void         *tcp6_ctlinput(int, const struct sockaddr *, void *);
#endif
void         *tcp_ctlinput(int, const struct sockaddr *, void *);
int         tcp_ctloutput(int, struct socket *, struct sockopt *);
struct tcpcb *
         tcp_disconnect1(struct tcpcb *);
struct tcpcb *
         tcp_drop(struct tcpcb *, int);
#ifdef TCP_SIGNATURE
int         tcp_signature_apply(void *, void *, u_int);
struct secasvar *tcp_signature_getsav(struct mbuf *);
int         tcp_signature(struct mbuf *, struct tcphdr *, int, struct secasvar *,
            char *);
#endif
void         tcp_drain(void);
void         tcp_drainstub(void);
void         tcp_established(struct tcpcb *);
void         tcp_init(void);
void         tcp_init_common(unsigned);
#ifdef INET6
int         tcp6_input(struct mbuf **, int *, int);
#endif
void         tcp_input(struct mbuf *, int, int);
u_int         tcp_hdrsz(struct tcpcb *);
u_long         tcp_mss_to_advertise(const struct ifnet *, int);
void         tcp_mss_from_peer(struct tcpcb *, int);
void         tcp_tcpcb_template(void);
struct tcpcb *
         tcp_newtcpcb(int, void *);
void         tcp_notify(struct inpcb *, int);
#ifdef INET6
void         tcp6_notify(struct in6pcb *, int);
#endif
u_int         tcp_optlen(struct tcpcb *);
int         tcp_output(struct tcpcb *);
void         tcp_pulloutofband(struct socket *,
            struct tcphdr *, struct mbuf *, int);
void         tcp_quench(struct inpcb *);
#ifdef INET6
void         tcp6_quench(struct in6pcb *);
#endif
void         tcp_mtudisc(struct inpcb *, int);
#ifdef INET6
void         tcp6_mtudisc_callback(struct in6_addr *);
#endif

void        tcpipqent_init(void);
struct ipqent *tcpipqent_alloc(void);
void         tcpipqent_free(struct ipqent *);

int         tcp_respond(struct tcpcb *, struct mbuf *, struct mbuf *,
            struct tcphdr *, tcp_seq, tcp_seq, int);
void         tcp_rmx_rtt(struct tcpcb *);
void         tcp_setpersist(struct tcpcb *);
#ifdef TCP_SIGNATURE
int         tcp_signature_compute(struct mbuf *, struct tcphdr *, int, int,
            int, u_char *, u_int);
#endif
void         tcp_fasttimo(void);
struct mbuf *
         tcp_template(struct tcpcb *);
void         tcp_trace(short, short, struct tcpcb *, struct mbuf *, int);
struct tcpcb *
         tcp_usrclosed(struct tcpcb *);
void         tcp_usrreq_init(void);
void         tcp_xmit_timer(struct tcpcb *, uint32_t);
tcp_seq         tcp_new_iss(struct tcpcb *);
tcp_seq  tcp_new_iss1(void *, void *, u_int16_t, u_int16_t, size_t);

void         tcp_sack_init(void);
void         tcp_new_dsack(struct tcpcb *, tcp_seq, u_int32_t);
void         tcp_sack_option(struct tcpcb *, const struct tcphdr *,
            const u_char *, int);
void         tcp_del_sackholes(struct tcpcb *, const struct tcphdr *);
void         tcp_free_sackholes(struct tcpcb *);
void         tcp_sack_adjust(struct tcpcb *tp);
struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
int         tcp_sack_numblks(const struct tcpcb *);
#define        TCP_SACK_OPTLEN(nblks)        ((nblks) * 8 + 2 + 2)

void         tcp_statinc(u_int);
void         tcp_statadd(u_int, uint64_t);

int         syn_cache_add(struct sockaddr *, struct sockaddr *,
                struct tcphdr *, unsigned int, struct socket *,
                struct mbuf *, u_char *, int, struct tcp_opt_info *);
void         syn_cache_unreach(const struct sockaddr *, const struct sockaddr *,
           struct tcphdr *);
struct socket *syn_cache_get(struct sockaddr *, struct sockaddr *,
                struct tcphdr *, struct socket *so, struct mbuf *);
void         syn_cache_init(void);
void         syn_cache_insert(struct syn_cache *, struct tcpcb *);
struct syn_cache *syn_cache_lookup(const struct sockaddr *, const struct sockaddr *,
                struct syn_cache_head **);
void         syn_cache_reset(struct sockaddr *, struct sockaddr *,
                struct tcphdr *);
int         syn_cache_respond(struct syn_cache *);
void         syn_cache_cleanup(struct tcpcb *);

int         tcp_input_checksum(int, struct mbuf *, const struct tcphdr *, int, int,
    int);
#endif

#endif /* !_NETINET_TCP_VAR_H_ */
























































































































































































   30 



   30 


   30 



   30 


   30 










   29 
   30 































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
/*        $NetBSD: subr_ipi.c,v 1.10 2022/04/09 23:51:22 riastradh Exp $        */

/*-
 * Copyright (c) 2014 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Inter-processor interrupt (IPI) interface: asynchronous IPIs to
 * invoke functions with a constant argument and synchronous IPIs
 * with the cross-call support.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_ipi.c,v 1.10 2022/04/09 23:51:22 riastradh Exp $");

#include <sys/param.h>
#include <sys/types.h>

#include <sys/atomic.h>
#include <sys/evcnt.h>
#include <sys/cpu.h>
#include <sys/ipi.h>
#include <sys/intr.h>
#include <sys/kcpuset.h>
#include <sys/kmem.h>
#include <sys/lock.h>
#include <sys/mutex.h>

/*
 * An array of the IPI handlers used for asynchronous invocation.
 * The lock protects the slot allocation.
 */

typedef struct {
        ipi_func_t        func;
        void *                arg;
} ipi_intr_t;

static kmutex_t                ipi_mngmt_lock;
static ipi_intr_t        ipi_intrs[IPI_MAXREG]        __cacheline_aligned;

/*
 * Per-CPU mailbox for IPI messages: it is a single cache line storing
 * up to IPI_MSG_MAX messages.  This interface is built on top of the
 * synchronous IPIs.
 */

#define        IPI_MSG_SLOTS        (CACHE_LINE_SIZE / sizeof(ipi_msg_t *))
#define        IPI_MSG_MAX        IPI_MSG_SLOTS

typedef struct {
        ipi_msg_t *        msg[IPI_MSG_SLOTS];
} ipi_mbox_t;


/* Mailboxes for the synchronous IPIs. */
static ipi_mbox_t *        ipi_mboxes        __read_mostly;
static struct evcnt        ipi_mboxfull_ev        __cacheline_aligned;
static void                ipi_msg_cpu_handler(void *);

/* Handler for the synchronous IPIs - it must be zero. */
#define        IPI_SYNCH_ID        0

#ifndef MULTIPROCESSOR
#define        cpu_ipi(ci)        KASSERT(ci == NULL)
#endif

void
ipi_sysinit(void)
{

        mutex_init(&ipi_mngmt_lock, MUTEX_DEFAULT, IPL_NONE);
        memset(ipi_intrs, 0, sizeof(ipi_intrs));

        /*
         * Register the handler for synchronous IPIs.  This mechanism
         * is built on top of the asynchronous interface.  Slot zero is
         * reserved permanently; it is also handy to use zero as a failure
         * for other registers (as it is potentially less error-prone).
         */
        ipi_intrs[IPI_SYNCH_ID].func = ipi_msg_cpu_handler;

        evcnt_attach_dynamic(&ipi_mboxfull_ev, EVCNT_TYPE_MISC, NULL,
           "ipi", "full");
}

void
ipi_percpu_init(void)
{
        const size_t len = ncpu * sizeof(ipi_mbox_t);

        /* Initialise the per-CPU bit fields. */
        for (u_int i = 0; i < ncpu; i++) {
                struct cpu_info *ci = cpu_lookup(i);
                memset(&ci->ci_ipipend, 0, sizeof(ci->ci_ipipend));
        }

        /* Allocate per-CPU IPI mailboxes. */
        ipi_mboxes = kmem_zalloc(len, KM_SLEEP);
        KASSERT(ipi_mboxes != NULL);
}

/*
 * ipi_register: register an asynchronous IPI handler.
 *
 * => Returns IPI ID which is greater than zero; on failure - zero.
 */
u_int
ipi_register(ipi_func_t func, void *arg)
{
        mutex_enter(&ipi_mngmt_lock);
        for (u_int i = 0; i < IPI_MAXREG; i++) {
                if (ipi_intrs[i].func == NULL) {
                        /* Register the function. */
                        ipi_intrs[i].func = func;
                        ipi_intrs[i].arg = arg;
                        mutex_exit(&ipi_mngmt_lock);

                        KASSERT(i != IPI_SYNCH_ID);
                        return i;
                }
        }
        mutex_exit(&ipi_mngmt_lock);
        printf("WARNING: ipi_register: table full, increase IPI_MAXREG\n");
        return 0;
}

/*
 * ipi_unregister: release the IPI handler given the ID.
 */
void
ipi_unregister(u_int ipi_id)
{
        ipi_msg_t ipimsg = { .func = __FPTRCAST(ipi_func_t, nullop) };

        KASSERT(ipi_id != IPI_SYNCH_ID);
        KASSERT(ipi_id < IPI_MAXREG);

        /* Release the slot. */
        mutex_enter(&ipi_mngmt_lock);
        KASSERT(ipi_intrs[ipi_id].func != NULL);
        ipi_intrs[ipi_id].func = NULL;

        /* Ensure that there are no IPIs in flight. */
        kpreempt_disable();
        ipi_broadcast(&ipimsg, false);
        ipi_wait(&ipimsg);
        kpreempt_enable();
        mutex_exit(&ipi_mngmt_lock);
}

/*
 * ipi_mark_pending: internal routine to mark an IPI pending on the
 * specified CPU (which might be curcpu()).
 */
static bool
ipi_mark_pending(u_int ipi_id, struct cpu_info *ci)
{
        const u_int i = ipi_id >> IPI_BITW_SHIFT;
        const uint32_t bitm = 1U << (ipi_id & IPI_BITW_MASK);

        KASSERT(ipi_id < IPI_MAXREG);
        KASSERT(kpreempt_disabled());

        /* Mark as pending and return true if not previously marked. */
        if ((atomic_load_acquire(&ci->ci_ipipend[i]) & bitm) == 0) {
#ifndef __HAVE_ATOMIC_AS_MEMBAR
                membar_release();
#endif
                atomic_or_32(&ci->ci_ipipend[i], bitm);
                return true;
        }
        return false;
}

/*
 * ipi_trigger: asynchronously send an IPI to the specified CPU.
 */
void
ipi_trigger(u_int ipi_id, struct cpu_info *ci)
{

        KASSERT(curcpu() != ci);
        if (ipi_mark_pending(ipi_id, ci)) {
                cpu_ipi(ci);
        }
}

/*
 * ipi_trigger_multi_internal: the guts of ipi_trigger_multi() and
 * ipi_trigger_broadcast().
 */
static void
ipi_trigger_multi_internal(u_int ipi_id, const kcpuset_t *target,
    bool skip_self)
{
        const cpuid_t selfid = cpu_index(curcpu());
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;

        KASSERT(kpreempt_disabled());
        KASSERT(target != NULL);

        for (CPU_INFO_FOREACH(cii, ci)) {
                const cpuid_t cpuid = cpu_index(ci);

                if (!kcpuset_isset(target, cpuid) || cpuid == selfid) {
                        continue;
                }
                ipi_trigger(ipi_id, ci);
        }
        if (!skip_self && kcpuset_isset(target, selfid)) {
                ipi_mark_pending(ipi_id, curcpu());
                int s = splhigh();
                ipi_cpu_handler();
                splx(s);
        }
}

/*
 * ipi_trigger_multi: same as ipi_trigger() but sends to the multiple
 * CPUs given the target CPU set.
 */
void
ipi_trigger_multi(u_int ipi_id, const kcpuset_t *target)
{
        ipi_trigger_multi_internal(ipi_id, target, false);
}

/*
 * ipi_trigger_broadcast: same as ipi_trigger_multi() to kcpuset_attached,
 * optionally skipping the sending CPU.
 */
void
ipi_trigger_broadcast(u_int ipi_id, bool skip_self)
{
        ipi_trigger_multi_internal(ipi_id, kcpuset_attached, skip_self);
}

/*
 * put_msg: insert message into the mailbox.
 *
 * Caller is responsible for issuing membar_release first.
 */
static inline void
put_msg(ipi_mbox_t *mbox, ipi_msg_t *msg)
{
        int count = SPINLOCK_BACKOFF_MIN;
again:
        for (u_int i = 0; i < IPI_MSG_MAX; i++) {
                if (atomic_cas_ptr(&mbox->msg[i], NULL, msg) == NULL) {
                        return;
                }
        }

        /* All slots are full: we have to spin-wait. */
        ipi_mboxfull_ev.ev_count++;
        SPINLOCK_BACKOFF(count);
        goto again;
}

/*
 * ipi_cpu_handler: the IPI handler.
 */
void
ipi_cpu_handler(void)
{
        struct cpu_info * const ci = curcpu();

        /*
         * Handle asynchronous IPIs: inspect per-CPU bit field, extract
         * IPI ID numbers and execute functions in those slots.
         */
        for (u_int i = 0; i < IPI_BITWORDS; i++) {
                uint32_t pending, bit;

                if (atomic_load_relaxed(&ci->ci_ipipend[i]) == 0) {
                        continue;
                }
                pending = atomic_swap_32(&ci->ci_ipipend[i], 0);
#ifndef __HAVE_ATOMIC_AS_MEMBAR
                membar_acquire();
#endif
                while ((bit = ffs(pending)) != 0) {
                        const u_int ipi_id = (i << IPI_BITW_SHIFT) | --bit;
                        ipi_intr_t *ipi_hdl = &ipi_intrs[ipi_id];

                        pending &= ~(1U << bit);
                        KASSERT(ipi_hdl->func != NULL);
                        ipi_hdl->func(ipi_hdl->arg);
                }
        }
}

/*
 * ipi_msg_cpu_handler: handle synchronous IPIs - iterate mailbox,
 * execute the passed functions and acknowledge the messages.
 */
static void
ipi_msg_cpu_handler(void *arg __unused)
{
        const struct cpu_info * const ci = curcpu();
        ipi_mbox_t *mbox = &ipi_mboxes[cpu_index(ci)];

        for (u_int i = 0; i < IPI_MSG_MAX; i++) {
                ipi_msg_t *msg;

                /* Get the message. */
                if ((msg = atomic_load_acquire(&mbox->msg[i])) == NULL) {
                        continue;
                }
                atomic_store_relaxed(&mbox->msg[i], NULL);

                /* Execute the handler. */
                KASSERT(msg->func);
                msg->func(msg->arg);

                /* Ack the request. */
#ifndef __HAVE_ATOMIC_AS_MEMBAR
                membar_release();
#endif
                atomic_dec_uint(&msg->_pending);
        }
}

/*
 * ipi_unicast: send an IPI to a single CPU.
 *
 * => The CPU must be remote; must not be local.
 * => The caller must ipi_wait() on the message for completion.
 */
void
ipi_unicast(ipi_msg_t *msg, struct cpu_info *ci)
{
        const cpuid_t id = cpu_index(ci);

        KASSERT(msg->func != NULL);
        KASSERT(kpreempt_disabled());
        KASSERT(curcpu() != ci);

        msg->_pending = 1;
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_release();
#endif

        put_msg(&ipi_mboxes[id], msg);
        ipi_trigger(IPI_SYNCH_ID, ci);
}

/*
 * ipi_multicast: send an IPI to each CPU in the specified set.
 *
 * => The caller must ipi_wait() on the message for completion.
 */
void
ipi_multicast(ipi_msg_t *msg, const kcpuset_t *target)
{
        const struct cpu_info * const self = curcpu();
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;
        u_int local;

        KASSERT(msg->func != NULL);
        KASSERT(kpreempt_disabled());

        local = !!kcpuset_isset(target, cpu_index(self));
        msg->_pending = kcpuset_countset(target) - local;
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_release();
#endif

        for (CPU_INFO_FOREACH(cii, ci)) {
                cpuid_t id;

                if (__predict_false(ci == self)) {
                        continue;
                }
                id = cpu_index(ci);
                if (!kcpuset_isset(target, id)) {
                        continue;
                }
                put_msg(&ipi_mboxes[id], msg);
                ipi_trigger(IPI_SYNCH_ID, ci);
        }
        if (local) {
                msg->func(msg->arg);
        }
}

/*
 * ipi_broadcast: send an IPI to all CPUs.
 *
 * => The caller must ipi_wait() on the message for completion.
 */
void
ipi_broadcast(ipi_msg_t *msg, bool skip_self)
{
        const struct cpu_info * const self = curcpu();
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;

        KASSERT(msg->func != NULL);
        KASSERT(kpreempt_disabled());

        msg->_pending = ncpu - 1;
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_release();
#endif

        /* Broadcast IPIs for remote CPUs. */
        for (CPU_INFO_FOREACH(cii, ci)) {
                cpuid_t id;

                if (__predict_false(ci == self)) {
                        continue;
                }
                id = cpu_index(ci);
                put_msg(&ipi_mboxes[id], msg);
                ipi_trigger(IPI_SYNCH_ID, ci);
        }

        if (!skip_self) {
                /* Finally, execute locally. */
                msg->func(msg->arg);
        }
}

/*
 * ipi_wait: spin-wait until the message is processed.
 */
void
ipi_wait(ipi_msg_t *msg)
{
        int count = SPINLOCK_BACKOFF_MIN;

        while (atomic_load_acquire(&msg->_pending)) {
                KASSERT(atomic_load_relaxed(&msg->_pending) < ncpu);
                SPINLOCK_BACKOFF(count);
        }
}

























































































































































  126 









  126 


  126 
























































   54 




   54 




   54 

   54 
   38 


   47 












   67 



   67 
   67 
   40 




   62 
   62 









  593 

  592 












  193 










   80 
   80 




   80 

   80 






    9 















  575 


  575 








  574 







  534 


















  569 

  569 













   27 



   27 









   19 




   19 






   19 






   27 








   27 

   27 

   27 



   18 
   25 

   27 



   26 
   26 
    7 




   26 














   27 












  542 





  542 
   33 

   33 








  529 
   33 

  542 































   13 


   13 









   13 


    9 







    9 



















    5 






    9 







    5 
    2 
    2 


    2 


    3 







   17 









   17 



   13 



    3 









   11 
    3 




   12 

    4 



















   27 

   23 




   23 
   23 
   23 






   23 
   23 




    3 




    3 


    1 

    2 


    2 


    3 
    2 
    2 
    3 


   23 
   71 






   23 










    2 


    2 








  129 








    1 




  128 
    1 



  127 
    1 



  126 




  126 



















   45 
   45 
   45 
   45 








   45 





   45 




   45 




   45 








   45 




   43 
   45 



   45 

   45 


   45 




   45 





   45 
























   21 








   21 





   21 

   21 
    5 



    5 



   17 

   19 
   14 


   19 


    5 

    4 



    5 








    9 








    9 

    9 
    9 

    9 


    9 


    9 
   14 




















































































































































































































































































































































































































   16 





















    9 



    9 
    9 

    8 

    8 

    8 
















  202 









  203 



  198 




  197 

   14 
    6 












    5 


    4 

    1 








   10 



    9 






    9 














   54 










   87 




















   87 







   72 


   41 

   72 
   72 













   87 




   80 



   87 

   87 

   87 


   66 


   87 
   87 






   87 

   86 









   87 
   87 










   20 



















   45 










    9 
    9 
    9 


    9 




























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
/*        $NetBSD: vfs_mount.c,v 1.95 2022/08/22 09:14:24 hannken Exp $        */

/*-
 * Copyright (c) 1997-2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vfs_subr.c        8.13 (Berkeley) 4/18/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.95 2022/08/22 09:14:24 hannken Exp $");

#include <sys/param.h>
#include <sys/kernel.h>

#include <sys/atomic.h>
#include <sys/buf.h>
#include <sys/conf.h>
#include <sys/fcntl.h>
#include <sys/filedesc.h>
#include <sys/device.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/module.h>
#include <sys/mount.h>
#include <sys/fstrans.h>
#include <sys/namei.h>
#include <sys/extattr.h>
#include <sys/syscallargs.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/vfs_syscalls.h>
#include <sys/vnode_impl.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>

#include <uvm/uvm_swap.h>

enum mountlist_type {
        ME_MOUNT,
        ME_MARKER
};
struct mountlist_entry {
        TAILQ_ENTRY(mountlist_entry) me_list;        /* Mount list. */
        struct mount *me_mount;                        /* Actual mount if ME_MOUNT,
                                                   current mount else. */
        enum mountlist_type me_type;                /* Mount or marker. */
};
struct mount_iterator {
        struct mountlist_entry mi_entry;
};

static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *,
    bool (*)(void *, struct vnode *), void *, bool);

/* Root filesystem. */
vnode_t *                        rootvnode;

/* Mounted filesystem list. */
static TAILQ_HEAD(mountlist, mountlist_entry) mountlist;
static kmutex_t                        mountlist_lock __cacheline_aligned;
int vnode_offset_next_by_lru        /* XXX: ugly hack for pstat.c */
    = offsetof(vnode_impl_t, vi_lrulist.tqe_next);

kmutex_t                        vfs_list_lock __cacheline_aligned;

static specificdata_domain_t        mount_specificdata_domain;
static kmutex_t                        mntid_lock;

static kmutex_t                        mountgen_lock __cacheline_aligned;
static uint64_t                        mountgen;

void
vfs_mount_sysinit(void)
{

        TAILQ_INIT(&mountlist);
        mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);

        mount_specificdata_domain = specificdata_domain_create();
        mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE);
        mountgen = 0;
}

struct mount *
vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp)
{
        struct mount *mp;
        int error __diagused;

        mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
        mp->mnt_op = vfsops;
        mp->mnt_refcnt = 1;
        TAILQ_INIT(&mp->mnt_vnodelist);
        mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
        mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
        mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
        mp->mnt_vnodecovered = vp;
        mount_initspecific(mp);

        error = fstrans_mount(mp);
        KASSERT(error == 0);

        mutex_enter(&mountgen_lock);
        mp->mnt_gen = mountgen++;
        mutex_exit(&mountgen_lock);

        return mp;
}

/*
 * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and
 * initialize a mount structure for it.
 *
 * Devname is usually updated by mount(8) after booting.
 */
int
vfs_rootmountalloc(const char *fstypename, const char *devname,
    struct mount **mpp)
{
        struct vfsops *vfsp = NULL;
        struct mount *mp;
        int error __diagused;

        mutex_enter(&vfs_list_lock);
        LIST_FOREACH(vfsp, &vfs_list, vfs_list)
                if (!strncmp(vfsp->vfs_name, fstypename, 
                    sizeof(mp->mnt_stat.f_fstypename)))
                        break;
        if (vfsp == NULL) {
                mutex_exit(&vfs_list_lock);
                return (ENODEV);
        }
        vfsp->vfs_refcount++;
        mutex_exit(&vfs_list_lock);

        if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL)
                return ENOMEM;
        error = vfs_busy(mp);
        KASSERT(error == 0);
        mp->mnt_flag = MNT_RDONLY;
        (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
            sizeof(mp->mnt_stat.f_fstypename));
        mp->mnt_stat.f_mntonname[0] = '/';
        mp->mnt_stat.f_mntonname[1] = '\0';
        mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
            '\0';
        (void)copystr(devname, mp->mnt_stat.f_mntfromname,
            sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
        *mpp = mp;
        return 0;
}

/*
 * vfs_getnewfsid: get a new unique fsid.
 */
void
vfs_getnewfsid(struct mount *mp)
{
        static u_short xxxfs_mntid;
        fsid_t tfsid;
        int mtype;

        mutex_enter(&mntid_lock);
        mtype = makefstype(mp->mnt_op->vfs_name);
        mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
        mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
        mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
        if (xxxfs_mntid == 0)
                ++xxxfs_mntid;
        tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
        tfsid.__fsid_val[1] = mtype;
        while (vfs_getvfs(&tfsid)) {
                tfsid.__fsid_val[0]++;
                xxxfs_mntid++;
        }
        mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
        mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
        mutex_exit(&mntid_lock);
}

/*
 * Lookup a mount point by filesystem identifier.
 *
 * XXX Needs to add a reference to the mount point.
 */
struct mount *
vfs_getvfs(fsid_t *fsid)
{
        mount_iterator_t *iter;
        struct mount *mp;

        mountlist_iterator_init(&iter);
        while ((mp = mountlist_iterator_next(iter)) != NULL) {
                if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
                    mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
                        mountlist_iterator_destroy(iter);
                        return mp;
                }
        }
        mountlist_iterator_destroy(iter);
        return NULL;
}

/*
 * Take a reference to a mount structure.
 */
void
vfs_ref(struct mount *mp)
{

        KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock));

        atomic_inc_uint(&mp->mnt_refcnt);
}

/*
 * Drop a reference to a mount structure, freeing if the last reference.
 */
void
vfs_rele(struct mount *mp)
{

#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_release();
#endif
        if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
                return;
        }
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_acquire();
#endif

        /*
         * Nothing else has visibility of the mount: we can now
         * free the data structures.
         */
        KASSERT(mp->mnt_refcnt == 0);
        specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
        mutex_obj_free(mp->mnt_updating);
        mutex_obj_free(mp->mnt_renamelock);
        mutex_obj_free(mp->mnt_vnodelock);
        if (mp->mnt_op != NULL) {
                vfs_delref(mp->mnt_op);
        }
        fstrans_unmount(mp);
        /*
         * Final free of mp gets done from fstrans_mount_dtor().
         *
         * Prevents this memory to be reused as a mount before
         * fstrans releases all references to it.
         */
}

/*
 * Mark a mount point as busy, and gain a new reference to it.  Used to
 * prevent the file system from being unmounted during critical sections.
 *
 * vfs_busy can be called multiple times and by multiple threads
 * and must be accompanied by the same number of vfs_unbusy calls.
 *
 * => The caller must hold a pre-existing reference to the mount.
 * => Will fail if the file system is being unmounted, or is unmounted.
 */
static inline int
_vfs_busy(struct mount *mp, bool wait)
{

        KASSERT(mp->mnt_refcnt > 0);

        if (wait) {
                fstrans_start(mp);
        } else {
                if (fstrans_start_nowait(mp))
                        return EBUSY;
        }
        if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
                fstrans_done(mp);
                return ENOENT;
        }
        vfs_ref(mp);
        return 0;
}

int
vfs_busy(struct mount *mp)
{

        return _vfs_busy(mp, true);
}

int
vfs_trybusy(struct mount *mp)
{

        return _vfs_busy(mp, false);
}

/*
 * Unbusy a busy filesystem.
 *
 * Every successful vfs_busy() call must be undone by a vfs_unbusy() call.
 */
void
vfs_unbusy(struct mount *mp)
{

        KASSERT(mp->mnt_refcnt > 0);

        fstrans_done(mp);
        vfs_rele(mp);
}

struct vnode_iterator {
        vnode_impl_t vi_vnode;
};

void
vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip)
{
        vnode_t *vp;
        vnode_impl_t *vip;

        vp = vnalloc_marker(mp);
        vip = VNODE_TO_VIMPL(vp);

        mutex_enter(mp->mnt_vnodelock);
        TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes);
        vp->v_usecount = 1;
        mutex_exit(mp->mnt_vnodelock);

        *vnip = (struct vnode_iterator *)vip;
}

void
vfs_vnode_iterator_destroy(struct vnode_iterator *vni)
{
        vnode_impl_t *mvip = &vni->vi_vnode;
        vnode_t *mvp = VIMPL_TO_VNODE(mvip);
        kmutex_t *lock;

        KASSERT(vnis_marker(mvp));
        if (vrefcnt(mvp) != 0) {
                lock = mvp->v_mount->mnt_vnodelock;
                mutex_enter(lock);
                TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes);
                mvp->v_usecount = 0;
                mutex_exit(lock);
        }
        vnfree_marker(mvp);
}

static struct vnode *
vfs_vnode_iterator_next1(struct vnode_iterator *vni,
    bool (*f)(void *, struct vnode *), void *cl, bool do_wait)
{
        vnode_impl_t *mvip = &vni->vi_vnode;
        struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount;
        vnode_t *vp;
        vnode_impl_t *vip;
        kmutex_t *lock;
        int error;

        KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip)));

        lock = mp->mnt_vnodelock;
        do {
                mutex_enter(lock);
                vip = TAILQ_NEXT(mvip, vi_mntvnodes);
                TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes);
                VIMPL_TO_VNODE(mvip)->v_usecount = 0;
again:
                if (vip == NULL) {
                        mutex_exit(lock);
                               return NULL;
                }
                vp = VIMPL_TO_VNODE(vip);
                KASSERT(vp != NULL);
                mutex_enter(vp->v_interlock);
                if (vnis_marker(vp) ||
                    vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) ||
                    (f && !(*f)(cl, vp))) {
                        mutex_exit(vp->v_interlock);
                        vip = TAILQ_NEXT(vip, vi_mntvnodes);
                        goto again;
                }

                TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes);
                VIMPL_TO_VNODE(mvip)->v_usecount = 1;
                mutex_exit(lock);
                error = vcache_vget(vp);
                KASSERT(error == 0 || error == ENOENT);
        } while (error != 0);

        return vp;
}

struct vnode *
vfs_vnode_iterator_next(struct vnode_iterator *vni,
    bool (*f)(void *, struct vnode *), void *cl)
{

        return vfs_vnode_iterator_next1(vni, f, cl, false);
}

/*
 * Move a vnode from one mount queue to another.
 */
void
vfs_insmntque(vnode_t *vp, struct mount *mp)
{
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
        struct mount *omp;
        kmutex_t *lock;

        KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 ||
            vp->v_tag == VT_VFS);

        /*
         * Delete from old mount point vnode list, if on one.
         */
        if ((omp = vp->v_mount) != NULL) {
                lock = omp->mnt_vnodelock;
                mutex_enter(lock);
                TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes);
                mutex_exit(lock);
        }

        /*
         * Insert into list of vnodes for the new mount point, if
         * available.  The caller must take a reference on the mount
         * structure and donate to the vnode.
         */
        if ((vp->v_mount = mp) != NULL) {
                lock = mp->mnt_vnodelock;
                mutex_enter(lock);
                TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes);
                mutex_exit(lock);
        }

        if (omp != NULL) {
                /* Release reference to old mount. */
                vfs_rele(omp);
        }
}

/*
 * Remove any vnodes in the vnode table belonging to mount point mp.
 *
 * If FORCECLOSE is not specified, there should not be any active ones,
 * return error if any are found (nb: this is a user error, not a
 * system error). If FORCECLOSE is specified, detach any active vnodes
 * that are found.
 *
 * If WRITECLOSE is set, only flush out regular file vnodes open for
 * writing.
 *
 * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped.
 */
#ifdef DEBUG
int busyprt = 0;        /* print out busy vnodes */
struct ctldebug debug1 = { "busyprt", &busyprt };
#endif

static vnode_t *
vflushnext(struct vnode_iterator *marker, int *when)
{
        if (getticks() > *when) {
                yield();
                *when = getticks() + hz / 10;
        }
        return vfs_vnode_iterator_next1(marker, NULL, NULL, true);
}

/*
 * Flush one vnode.  Referenced on entry, unreferenced on return.
 */
static int
vflush_one(vnode_t *vp, vnode_t *skipvp, int flags)
{
        int error;
        struct vattr vattr;

        if (vp == skipvp ||
            ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) {
                vrele(vp);
                return 0;
        }
        /*
         * If WRITECLOSE is set, only flush out regular file
         * vnodes open for writing or open and unlinked.
         */
        if ((flags & WRITECLOSE)) {
                if (vp->v_type != VREG) {
                        vrele(vp);
                        return 0;
                }
                error = vn_lock(vp, LK_EXCLUSIVE);
                if (error) {
                        KASSERT(error == ENOENT);
                        vrele(vp);
                        return 0;
                }
                error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0);
                if (error == 0)
                        error = VOP_GETATTR(vp, &vattr, curlwp->l_cred);
                VOP_UNLOCK(vp);
                if (error) {
                        vrele(vp);
                        return error;
                }
                if (vp->v_writecount == 0 && vattr.va_nlink > 0) {
                        vrele(vp);
                        return 0;
                }
        }
        /*
         * First try to recycle the vnode.
         */
        if (vrecycle(vp))
                return 0;
        /*
         * If FORCECLOSE is set, forcibly close the vnode.
         * For block or character devices, revert to an
         * anonymous device.  For all other files, just
         * kill them.
         */
        if (flags & FORCECLOSE) {
                if (vrefcnt(vp) > 1 &&
                    (vp->v_type == VBLK || vp->v_type == VCHR))
                        vcache_make_anon(vp);
                else
                        vgone(vp);
                return 0;
        }
        vrele(vp);
        return EBUSY;
}

int
vflush(struct mount *mp, vnode_t *skipvp, int flags)
{
        vnode_t *vp;
        struct vnode_iterator *marker;
        int busy, error, when, retries = 2;

        do {
                busy = error = when = 0;

                /*
                 * First, flush out any vnode references from the
                 * deferred vrele list.
                 */
                vrele_flush(mp);

                vfs_vnode_iterator_init(mp, &marker);

                while ((vp = vflushnext(marker, &when)) != NULL) {
                        error = vflush_one(vp, skipvp, flags);
                        if (error == EBUSY) {
                                error = 0;
                                busy++;
#ifdef DEBUG
                                if (busyprt && retries == 0)
                                        vprint("vflush: busy vnode", vp);
#endif
                        } else if (error != 0) {
                                break;
                        }
                }

                vfs_vnode_iterator_destroy(marker);
        } while (error == 0 && busy > 0 && retries-- > 0);

        if (error)
                return error;
        if (busy)
                return EBUSY;
        return 0;
}

/*
 * Mount a file system.
 */

/*
 * Scan all active processes to see if any of them have a current or root
 * directory onto which the new filesystem has just been  mounted. If so,
 * replace them with the new mount point.
 */
static void
mount_checkdirs(vnode_t *olddp)
{
        vnode_t *newdp, *rele1, *rele2;
        struct cwdinfo *cwdi;
        struct proc *p;
        bool retry;

        if (vrefcnt(olddp) == 1) {
                return;
        }
        if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp))
                panic("mount: lost mount");

        do {
                retry = false;
                mutex_enter(&proc_lock);
                PROCLIST_FOREACH(p, &allproc) {
                        if ((cwdi = p->p_cwdi) == NULL)
                                continue;
                        /*
                         * Cannot change to the old directory any more,
                         * so even if we see a stale value it is not a
                         * problem.
                         */
                        if (cwdi->cwdi_cdir != olddp &&
                            cwdi->cwdi_rdir != olddp)
                                continue;
                        retry = true;
                        rele1 = NULL;
                        rele2 = NULL;
                        atomic_inc_uint(&cwdi->cwdi_refcnt);
                        mutex_exit(&proc_lock);
                        rw_enter(&cwdi->cwdi_lock, RW_WRITER);
                        if (cwdi->cwdi_cdir == olddp) {
                                rele1 = cwdi->cwdi_cdir;
                                vref(newdp);
                                cwdi->cwdi_cdir = newdp;
                        }
                        if (cwdi->cwdi_rdir == olddp) {
                                rele2 = cwdi->cwdi_rdir;
                                vref(newdp);
                                cwdi->cwdi_rdir = newdp;
                        }
                        rw_exit(&cwdi->cwdi_lock);
                        cwdfree(cwdi);
                        if (rele1 != NULL)
                                vrele(rele1);
                        if (rele2 != NULL)
                                vrele(rele2);
                        mutex_enter(&proc_lock);
                        break;
                }
                mutex_exit(&proc_lock);
        } while (retry);

        if (rootvnode == olddp) {
                vrele(rootvnode);
                vref(newdp);
                rootvnode = newdp;
        }
        vput(newdp);
}

/*
 * Start extended attributes
 */
static int
start_extattr(struct mount *mp)
{
        int error;

        error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL);
        if (error) 
                printf("%s: failed to start extattr: error = %d\n",
                       mp->mnt_stat.f_mntonname, error);

        return error;
}

int
mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops,
    const char *path, int flags, void *data, size_t *data_len)
{
        vnode_t *vp = *vpp;
        struct mount *mp;
        struct pathbuf *pb;
        struct nameidata nd;
        int error, error2;

        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
            KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
        if (error) {
                vfs_delref(vfsops);
                return error;
        }

        /* Cannot make a non-dir a mount-point (from here anyway). */
        if (vp->v_type != VDIR) {
                vfs_delref(vfsops);
                return ENOTDIR;
        }

        if (flags & MNT_EXPORTED) {
                vfs_delref(vfsops);
                return EINVAL;
        }

        if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) {
                vfs_delref(vfsops);
                return ENOMEM;
        }

        mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);

        /*
         * The underlying file system may refuse the mount for
         * various reasons.  Allow the user to force it to happen.
         *
         * Set the mount level flags.
         */
        mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE);

        error = VFS_MOUNT(mp, path, data, data_len);
        mp->mnt_flag &= ~MNT_OP_FLAGS;

        if (error != 0) {
                vfs_rele(mp);
                return error;
        }

        /* Suspend new file system before taking mnt_updating. */
        do {
                error2 = vfs_suspend(mp, 0);
        } while (error2 == EINTR || error2 == ERESTART);
        KASSERT(error2 == 0 || error2 == EOPNOTSUPP);
        mutex_enter(mp->mnt_updating);

        /*
         * Validate and prepare the mount point.
         */
        error = pathbuf_copyin(path, &pb);
        if (error != 0) {
                goto err_mounted;
        }
        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
        error = namei(&nd);
        pathbuf_destroy(pb);
        if (error != 0) {
                goto err_mounted;
        }
        if (nd.ni_vp != vp) {
                vput(nd.ni_vp);
                error = EINVAL;
                goto err_mounted;
        }
        if (vp->v_mountedhere != NULL) {
                vput(nd.ni_vp);
                error = EBUSY;
                goto err_mounted;
        }
        error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0);
        if (error != 0) {
                vput(nd.ni_vp);
                goto err_mounted;
        }

        /*
         * Put the new filesystem on the mount list after root.
         */
        cache_purge(vp);
        mp->mnt_iflag &= ~IMNT_WANTRDWR;

        mountlist_append(mp);
        if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
                vfs_syncer_add_to_worklist(mp);
        vp->v_mountedhere = mp;
        vput(nd.ni_vp);

        mount_checkdirs(vp);
        mutex_exit(mp->mnt_updating);
        if (error2 == 0)
                vfs_resume(mp);

        /* Hold an additional reference to the mount across VFS_START(). */
        vfs_ref(mp);
        (void) VFS_STATVFS(mp, &mp->mnt_stat);
        error = VFS_START(mp, 0);
        if (error) {
                vrele(vp);
        } else if (flags & MNT_EXTATTR) {
                if (start_extattr(mp) != 0)
                        mp->mnt_flag &= ~MNT_EXTATTR;
        }
        /* Drop reference held for VFS_START(). */
        vfs_rele(mp);
        *vpp = NULL;
        return error;

err_mounted:
        if (VFS_UNMOUNT(mp, MNT_FORCE) != 0)
                panic("Unmounting fresh file system failed");
        mutex_exit(mp->mnt_updating);
        if (error2 == 0)
                vfs_resume(mp);
        vfs_rele(mp);

        return error;
}

/*
 * Do the actual file system unmount.  File system is assumed to have
 * been locked by the caller.
 *
 * => Caller hold reference to the mount, explicitly for dounmount().
 */
int
dounmount(struct mount *mp, int flags, struct lwp *l)
{
        vnode_t *coveredvp;
        int error, async, used_syncer, used_extattr;
        const bool was_suspended = fstrans_is_owner(mp);

#if NVERIEXEC > 0
        error = veriexec_unmountchk(mp);
        if (error)
                return (error);
#endif /* NVERIEXEC > 0 */

        if (!was_suspended) {
                error = vfs_suspend(mp, 0);
                if (error) {
                        return error;
                }
        }

        KASSERT((mp->mnt_iflag & IMNT_GONE) == 0);

        used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0;
        used_extattr = mp->mnt_flag & MNT_EXTATTR;

        mp->mnt_iflag |= IMNT_UNMOUNT;
        mutex_enter(mp->mnt_updating);
        async = mp->mnt_flag & MNT_ASYNC;
        mp->mnt_flag &= ~MNT_ASYNC;
        cache_purgevfs(mp);        /* remove cache entries for this file sys */
        if (used_syncer)
                vfs_syncer_remove_from_worklist(mp);
        error = 0;
        if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) {
                error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
        }
        if (error == 0 || (flags & MNT_FORCE)) {
                error = VFS_UNMOUNT(mp, flags);
        }
        if (error) {
                mp->mnt_iflag &= ~IMNT_UNMOUNT;
                if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
                        vfs_syncer_add_to_worklist(mp);
                mp->mnt_flag |= async;
                mutex_exit(mp->mnt_updating);
                if (!was_suspended)
                        vfs_resume(mp);
                if (used_extattr) {
                        if (start_extattr(mp) != 0)
                                mp->mnt_flag &= ~MNT_EXTATTR;
                        else
                                mp->mnt_flag |= MNT_EXTATTR;
                }
                return (error);
        }
        mutex_exit(mp->mnt_updating);

        /*
         * mark filesystem as gone to prevent further umounts
         * after mnt_umounting lock is gone, this also prevents
         * vfs_busy() from succeeding.
         */
        mp->mnt_iflag |= IMNT_GONE;
        if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
                coveredvp->v_mountedhere = NULL;
        }
        if (!was_suspended)
                vfs_resume(mp);

        mountlist_remove(mp);
        if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
                panic("unmount: dangling vnode");
        vfs_hooks_unmount(mp);

        vfs_rele(mp);        /* reference from mount() */
        if (coveredvp != NULLVP) {
                vrele(coveredvp);
        }
        return (0);
}

/*
 * Unmount all file systems.
 * We traverse the list in reverse order under the assumption that doing so
 * will avoid needing to worry about dependencies.
 */
bool
vfs_unmountall(struct lwp *l)
{

        printf("unmounting file systems...\n");
        return vfs_unmountall1(l, true, true);
}

static void
vfs_unmount_print(struct mount *mp, const char *pfx)
{

        aprint_verbose("%sunmounted %s on %s type %s\n", pfx,
            mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname,
            mp->mnt_stat.f_fstypename);
}

/*
 * Return the mount with the highest generation less than "gen".
 */
static struct mount *
vfs_unmount_next(uint64_t gen)
{
        mount_iterator_t *iter;
        struct mount *mp, *nmp;

        nmp = NULL;

        mountlist_iterator_init(&iter);
        while ((mp = mountlist_iterator_next(iter)) != NULL) {
                if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) && 
                    mp->mnt_gen < gen) {
                        if (nmp != NULL)
                                vfs_rele(nmp);
                        nmp = mp;
                        vfs_ref(nmp);
                }
        }
        mountlist_iterator_destroy(iter);

        return nmp;
}

bool
vfs_unmount_forceone(struct lwp *l)
{
        struct mount *mp;
        int error;

        mp = vfs_unmount_next(mountgen);
        if (mp == NULL) {
                return false;
        }

#ifdef DEBUG
        printf("forcefully unmounting %s (%s)...\n",
            mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
#endif
        if ((error = dounmount(mp, MNT_FORCE, l)) == 0) {
                vfs_unmount_print(mp, "forcefully ");
                return true;
        } else {
                vfs_rele(mp);
        }

#ifdef DEBUG
        printf("forceful unmount of %s failed with error %d\n",
            mp->mnt_stat.f_mntonname, error);
#endif

        return false;
}

bool
vfs_unmountall1(struct lwp *l, bool force, bool verbose)
{
        struct mount *mp;
        mount_iterator_t *iter;
        bool any_error = false, progress = false;
        uint64_t gen;
        int error;

        gen = mountgen;
        for (;;) {
                mp = vfs_unmount_next(gen);
                if (mp == NULL)
                        break;
                gen = mp->mnt_gen;

#ifdef DEBUG
                printf("unmounting %p %s (%s)...\n",
                    (void *)mp, mp->mnt_stat.f_mntonname,
                    mp->mnt_stat.f_mntfromname);
#endif
                if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) {
                        vfs_unmount_print(mp, "");
                        progress = true;
                } else {
                        vfs_rele(mp);
                        if (verbose) {
                                printf("unmount of %s failed with error %d\n",
                                    mp->mnt_stat.f_mntonname, error);
                        }
                        any_error = true;
                }
        }
        if (verbose) {
                printf("unmounting done\n");
        }
        if (any_error && verbose) {
                printf("WARNING: some file systems would not unmount\n");
        }
        /* If the mountlist is empty it is time to remove swap. */
        mountlist_iterator_init(&iter);
        if (mountlist_iterator_next(iter) == NULL) {
                uvm_swap_shutdown(l);
        }
        mountlist_iterator_destroy(iter);

        return progress;
}

void
vfs_sync_all(struct lwp *l)
{
        printf("syncing disks... ");

        /* remove user processes from run queue */
        suspendsched();
        (void)spl0();

        /* avoid coming back this way again if we panic. */
        doing_shutdown = 1;

        do_sys_sync(l);

        /* Wait for sync to finish. */
        if (vfs_syncwait() != 0) {
#if defined(DDB) && defined(DEBUG_HALT_BUSY)
                Debugger();
#endif
                printf("giving up\n");
                return;
        } else
                printf("done\n");
}

/*
 * Sync and unmount file systems before shutting down.
 */
void
vfs_shutdown(void)
{
        lwp_t *l = curlwp;

        vfs_sync_all(l);

        /*
         * If we have panicked - do not make the situation potentially
         * worse by unmounting the file systems.
         */
        if (panicstr != NULL) {
                return;
        }

        /* Unmount file systems. */
        vfs_unmountall(l);
}

/*
 * Print a list of supported file system types (used by vfs_mountroot)
 */
static void
vfs_print_fstypes(void)
{
        struct vfsops *v;
        int cnt = 0;

        mutex_enter(&vfs_list_lock);
        LIST_FOREACH(v, &vfs_list, vfs_list)
                ++cnt;
        mutex_exit(&vfs_list_lock);

        if (cnt == 0) {
                printf("WARNING: No file system modules have been loaded.\n");
                return;
        }

        printf("Supported file systems:");
        mutex_enter(&vfs_list_lock);
        LIST_FOREACH(v, &vfs_list, vfs_list) {
                printf(" %s", v->vfs_name);
        }
        mutex_exit(&vfs_list_lock);
        printf("\n");
}

/*
 * Mount the root file system.  If the operator didn't specify a
 * file system to use, try all possible file systems until one
 * succeeds.
 */
int
vfs_mountroot(void)
{
        struct vfsops *v;
        int error = ENODEV;

        if (root_device == NULL)
                panic("vfs_mountroot: root device unknown");

        switch (device_class(root_device)) {
        case DV_IFNET:
                if (rootdev != NODEV)
                        panic("vfs_mountroot: rootdev set for DV_IFNET "
                            "(0x%llx -> %llu,%llu)",
                            (unsigned long long)rootdev,
                            (unsigned long long)major(rootdev),
                            (unsigned long long)minor(rootdev));
                break;

        case DV_DISK:
                if (rootdev == NODEV)
                        panic("vfs_mountroot: rootdev not set for DV_DISK");
                if (bdevvp(rootdev, &rootvp))
                        panic("vfs_mountroot: can't get vnode for rootdev");
                vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY);
                error = VOP_OPEN(rootvp, FREAD, FSCRED);
                VOP_UNLOCK(rootvp);
                if (error) {
                        printf("vfs_mountroot: can't open root device\n");
                        return (error);
                }
                break;

        case DV_VIRTUAL:
                break;

        default:
                printf("%s: inappropriate for root file system\n",
                    device_xname(root_device));
                return (ENODEV);
        }

        /*
         * If user specified a root fs type, use it.  Make sure the
         * specified type exists and has a mount_root()
         */
        if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) {
                v = vfs_getopsbyname(rootfstype);
                error = EFTYPE;
                if (v != NULL) {
                        if (v->vfs_mountroot != NULL) {
                                error = (v->vfs_mountroot)();
                        }
                        v->vfs_refcount--;
                }
                goto done;
        }

        /*
         * Try each file system currently configured into the kernel.
         */
        mutex_enter(&vfs_list_lock);
        LIST_FOREACH(v, &vfs_list, vfs_list) {
                if (v->vfs_mountroot == NULL)
                        continue;
#ifdef DEBUG
                aprint_normal("mountroot: trying %s...\n", v->vfs_name);
#endif
                v->vfs_refcount++;
                mutex_exit(&vfs_list_lock);
                error = (*v->vfs_mountroot)();
                mutex_enter(&vfs_list_lock);
                v->vfs_refcount--;
                if (!error) {
                        aprint_normal("root file system type: %s\n",
                            v->vfs_name);
                        break;
                }
        }
        mutex_exit(&vfs_list_lock);

        if (v == NULL) {
                vfs_print_fstypes();
                printf("no file system for %s", device_xname(root_device));
                if (device_class(root_device) == DV_DISK)
                        printf(" (dev 0x%llx)", (unsigned long long)rootdev);
                printf("\n");
                error = EFTYPE;
        }

done:
        if (error && device_class(root_device) == DV_DISK) {
                vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY);
                VOP_CLOSE(rootvp, FREAD, FSCRED);
                VOP_UNLOCK(rootvp);
                vrele(rootvp);
        }
        if (error == 0) {
                mount_iterator_t *iter;
                struct mount *mp;
                extern struct cwdinfo cwdi0;

                mountlist_iterator_init(&iter);
                mp = mountlist_iterator_next(iter);
                KASSERT(mp != NULL);
                mountlist_iterator_destroy(iter);

                mp->mnt_flag |= MNT_ROOTFS;
                mp->mnt_op->vfs_refcount++;

                /*
                 * Get the vnode for '/'.  Set cwdi0.cwdi_cdir to
                 * reference it, and donate it the reference grabbed
                 * with VFS_ROOT().
                 */
                error = VFS_ROOT(mp, LK_NONE, &rootvnode);
                if (error)
                        panic("cannot find root vnode, error=%d", error);
                cwdi0.cwdi_cdir = rootvnode;
                cwdi0.cwdi_rdir = NULL;

                /*
                 * Now that root is mounted, we can fixup initproc's CWD
                 * info.  All other processes are kthreads, which merely
                 * share proc0's CWD info.
                 */
                initproc->p_cwdi->cwdi_cdir = rootvnode;
                vref(initproc->p_cwdi->cwdi_cdir);
                initproc->p_cwdi->cwdi_rdir = NULL;
                /*
                 * Enable loading of modules from the filesystem
                 */
                module_load_vfs_init();

        }
        return (error);
}

/*
 * mount_specific_key_create --
 *        Create a key for subsystem mount-specific data.
 */
int
mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
{

        return specificdata_key_create(mount_specificdata_domain, keyp, dtor);
}

/*
 * mount_specific_key_delete --
 *        Delete a key for subsystem mount-specific data.
 */
void
mount_specific_key_delete(specificdata_key_t key)
{

        specificdata_key_delete(mount_specificdata_domain, key);
}

/*
 * mount_initspecific --
 *        Initialize a mount's specificdata container.
 */
void
mount_initspecific(struct mount *mp)
{
        int error __diagused;

        error = specificdata_init(mount_specificdata_domain,
                                  &mp->mnt_specdataref);
        KASSERT(error == 0);
}

/*
 * mount_finispecific --
 *        Finalize a mount's specificdata container.
 */
void
mount_finispecific(struct mount *mp)
{

        specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
}

/*
 * mount_getspecific --
 *        Return mount-specific data corresponding to the specified key.
 */
void *
mount_getspecific(struct mount *mp, specificdata_key_t key)
{

        return specificdata_getspecific(mount_specificdata_domain,
                                         &mp->mnt_specdataref, key);
}

/*
 * mount_setspecific --
 *        Set mount-specific data corresponding to the specified key.
 */
void
mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
{

        specificdata_setspecific(mount_specificdata_domain,
                                 &mp->mnt_specdataref, key, data);
}

/*
 * Check to see if a filesystem is mounted on a block device.
 */
int
vfs_mountedon(vnode_t *vp)
{
        vnode_t *vq;
        int error = 0;

        if (vp->v_type != VBLK)
                return ENOTBLK;
        if (spec_node_getmountedfs(vp) != NULL)
                return EBUSY;
        if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, VDEAD_NOWAIT, &vq)
            == 0) {
                if (spec_node_getmountedfs(vq) != NULL)
                        error = EBUSY;
                vrele(vq);
        }

        return error;
}

/*
 * Check if a device pointed to by vp is mounted.
 *
 * Returns:
 *   EINVAL        if it's not a disk
 *   EBUSY        if it's a disk and mounted
 *   0                if it's a disk and not mounted
 */
int
rawdev_mounted(vnode_t *vp, vnode_t **bvpp)
{
        vnode_t *bvp;
        dev_t dev;
        int d_type;

        bvp = NULL;
        d_type = D_OTHER;

        if (iskmemvp(vp))
                return EINVAL;

        switch (vp->v_type) {
        case VCHR: {
                const struct cdevsw *cdev;

                dev = vp->v_rdev;
                cdev = cdevsw_lookup(dev);
                if (cdev != NULL) {
                        dev_t blkdev;

                        blkdev = devsw_chr2blk(dev);
                        if (blkdev != NODEV) {
                                if (vfinddev(blkdev, VBLK, &bvp) != 0) {
                                        d_type = (cdev->d_flag & D_TYPEMASK);
                                        /* XXX: what if bvp disappears? */
                                        vrele(bvp);
                                }
                        }
                }

                break;
                }

        case VBLK: {
                const struct bdevsw *bdev;

                dev = vp->v_rdev;
                bdev = bdevsw_lookup(dev);
                if (bdev != NULL)
                        d_type = (bdev->d_flag & D_TYPEMASK);

                bvp = vp;

                break;
                }

        default:
                break;
        }

        if (d_type != D_DISK)
                return EINVAL;

        if (bvpp != NULL)
                *bvpp = bvp;

        /*
         * XXX: This is bogus. We should be failing the request
         * XXX: not only if this specific slice is mounted, but
         * XXX: if it's on a disk with any other mounted slice.
         */
        if (vfs_mountedon(bvp))
                return EBUSY;

        return 0;
}

/*
 * Make a 'unique' number from a mount type name.
 */
long
makefstype(const char *type)
{
        long rv;

        for (rv = 0; *type; type++) {
                rv <<= 2;
                rv ^= *type;
        }
        return rv;
}

static struct mountlist_entry *
mountlist_alloc(enum mountlist_type type, struct mount *mp)
{
        struct mountlist_entry *me;

        me = kmem_zalloc(sizeof(*me), KM_SLEEP);
        me->me_mount = mp;
        me->me_type = type;

        return me;
}

static void
mountlist_free(struct mountlist_entry *me)
{

        kmem_free(me, sizeof(*me));
}

void
mountlist_iterator_init(mount_iterator_t **mip)
{
        struct mountlist_entry *me;

        me = mountlist_alloc(ME_MARKER, NULL);
        mutex_enter(&mountlist_lock);
        TAILQ_INSERT_HEAD(&mountlist, me, me_list);
        mutex_exit(&mountlist_lock);
        *mip = (mount_iterator_t *)me;
}

void
mountlist_iterator_destroy(mount_iterator_t *mi)
{
        struct mountlist_entry *marker = &mi->mi_entry;

        if (marker->me_mount != NULL)
                vfs_unbusy(marker->me_mount);

        mutex_enter(&mountlist_lock);
        TAILQ_REMOVE(&mountlist, marker, me_list);
        mutex_exit(&mountlist_lock);

        mountlist_free(marker);

}

/*
 * Return the next mount or NULL for this iterator.
 * Mark it busy on success.
 */
static inline struct mount *
_mountlist_iterator_next(mount_iterator_t *mi, bool wait)
{
        struct mountlist_entry *me, *marker = &mi->mi_entry;
        struct mount *mp;
        int error;

        if (marker->me_mount != NULL) {
                vfs_unbusy(marker->me_mount);
                marker->me_mount = NULL;
        }

        mutex_enter(&mountlist_lock);
        for (;;) {
                KASSERT(marker->me_type == ME_MARKER);

                me = TAILQ_NEXT(marker, me_list);
                if (me == NULL) {
                        /* End of list: keep marker and return. */
                        mutex_exit(&mountlist_lock);
                        return NULL;
                }
                TAILQ_REMOVE(&mountlist, marker, me_list);
                TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list);

                /* Skip other markers. */
                if (me->me_type != ME_MOUNT)
                        continue;

                /* Take an initial reference for vfs_busy() below. */
                mp = me->me_mount;
                KASSERT(mp != NULL);
                vfs_ref(mp);
                mutex_exit(&mountlist_lock);

                /* Try to mark this mount busy and return on success. */
                if (wait)
                        error = vfs_busy(mp);
                else
                        error = vfs_trybusy(mp);
                if (error == 0) {
                        vfs_rele(mp);
                        marker->me_mount = mp;
                        return mp;
                }
                vfs_rele(mp);
                mutex_enter(&mountlist_lock);
        }
}

struct mount *
mountlist_iterator_next(mount_iterator_t *mi)
{

        return _mountlist_iterator_next(mi, true);
}

struct mount *
mountlist_iterator_trynext(mount_iterator_t *mi)
{

        return _mountlist_iterator_next(mi, false);
}

/*
 * Attach new mount to the end of the mount list.
 */
void
mountlist_append(struct mount *mp)
{
        struct mountlist_entry *me;

        me = mountlist_alloc(ME_MOUNT, mp);
        mutex_enter(&mountlist_lock);
        TAILQ_INSERT_TAIL(&mountlist, me, me_list);
        mutex_exit(&mountlist_lock);
}

/*
 * Remove mount from mount list.
 */void
mountlist_remove(struct mount *mp)
{
        struct mountlist_entry *me;

        mutex_enter(&mountlist_lock);
        TAILQ_FOREACH(me, &mountlist, me_list)
                if (me->me_type == ME_MOUNT && me->me_mount == mp)
                        break;
        KASSERT(me != NULL);
        TAILQ_REMOVE(&mountlist, me, me_list);
        mutex_exit(&mountlist_lock);
        mountlist_free(me);
}

/*
 * Unlocked variant to traverse the mountlist.
 * To be used from DDB only.
 */
struct mount *
_mountlist_next(struct mount *mp)
{
        struct mountlist_entry *me;

        if (mp == NULL) {
                me = TAILQ_FIRST(&mountlist);
        } else {
                TAILQ_FOREACH(me, &mountlist, me_list)
                        if (me->me_type == ME_MOUNT && me->me_mount == mp)
                                break;
                if (me != NULL)
                        me = TAILQ_NEXT(me, me_list);
        }

        while (me != NULL && me->me_type != ME_MOUNT)
                me = TAILQ_NEXT(me, me_list);

        return (me ? me->me_mount : NULL);
}
































































 4665 











  422 







 4819 
    7 
 4822 










 4332 
    7 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/*        $NetBSD: syscallvar.h,v 1.12 2018/04/19 21:19:07 christos Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software developed for The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _SYS_SYSCALLVAR_H_
#define        _SYS_SYSCALLVAR_H_

#ifndef _KERNEL
#error nothing of interest to userspace here
#endif

#if defined(_KERNEL) && defined(_KERNEL_OPT)
#include "opt_dtrace.h"
#endif

#include <sys/systm.h>
#include <sys/proc.h>

extern struct emul emul_netbsd;

struct syscall_package {
        u_short                sp_code;
        u_short                sp_flags;
        sy_call_t        *sp_call;
};

void        syscall_init(void);
int        syscall_establish(const struct emul *, const struct syscall_package *);
int        syscall_disestablish(const struct emul *, const struct syscall_package *);

static __inline int
sy_call(const struct sysent *sy, struct lwp *l, const void *uap,
        register_t *rval)
{
        int error;

        l->l_sysent = sy;
        error = (*sy->sy_call)(l, uap, rval);
        l->l_sysent = NULL;

        return error;
}

static __inline int
sy_invoke(const struct sysent *sy, struct lwp *l, const void *uap,
        register_t *rval, int code)
{
        const bool do_trace = l->l_proc->p_trace_enabled &&
            (sy->sy_flags & SYCALL_INDIRECT) == 0;
        int error;

#ifdef KDTRACE_HOOKS
#define KDTRACE_ENTRY(a)        (a)
#else
#define KDTRACE_ENTRY(a)        (0)
#endif
        if (__predict_true(!(do_trace || KDTRACE_ENTRY(sy->sy_entry)))
            || (error = trace_enter(code, sy, uap)) == 0) {
                rval[0] = 0;
#if !defined(__mips__) && !defined(__m68k__)
                /*
                 * Due to the mips userland code for SYS_break needing v1 to be
                 * preserved, we can't clear this on mips. 
                 */
                rval[1] = 0;
#endif
                error = sy_call(sy, l, uap, rval);
        }

        if (__predict_false(do_trace || KDTRACE_ENTRY(sy->sy_return))) {
                trace_exit(code, sy, uap, rval, error);
        }
        return error;
}

/* inclusion in the kernel currently depends on SYSCALL_DEBUG */
extern const char * const syscallnames[];
extern const char * const altsyscallnames[];

#endif        /* _SYS_SYSCALLVAR_H_ */


















































































































































































































   20 







   20 



   20 






























   27 





   27 






   27 
   20 







   27 





















   16 
   16 





   16 








   16 







   11 

   11 
























    5 
   16 






















    2 
    2 
















    2 







    2 






















   13 














   13 





   13 























   13 
    3 


   10 














   13 










   13 










   11 
   11 

   11 






























   17 
   17 
































   84 


   79 









   70 









   70 





    8 





    8 











    2 
    2 










    2 
    1 







    1 


    2 






























    2 





    2 

    2 



    2 



    2 

    2 
















    2 





    2 

    2 
    2 







    2 





    2 














    2 


































    2 

    2 



























   17 
   19 











   19 
   19 






   19 













   14 

    3 
   14 
   14 



    7 
    7 



    3 













    3 

    3 


   14 

















   18 



   17 
   17 













   17 

    5 































    5 









   17 


   17 


   17 














   17 



























































   17 


   17 












   17 
   17 
   17 
   17 



   16 





   17 


   18 















   16 

   16 






























































































































































































































    2 
    2 

    2 



    2 




























































    2 


    2 
    2 













    2 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
/*        $NetBSD: uvm_aobj.c,v 1.156 2022/05/31 08:43:16 andvar Exp $        */

/*
 * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
 *                    Washington University.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
 */

/*
 * uvm_aobj.c: anonymous memory uvm_object pager
 *
 * author: Chuck Silvers <chuq@chuq.com>
 * started: Jan-1998
 *
 * - design mostly from Chuck Cranor
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.156 2022/05/31 08:43:16 andvar Exp $");

#ifdef _KERNEL_OPT
#include "opt_uvmhist.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/pool.h>
#include <sys/atomic.h>

#include <uvm/uvm.h>
#include <uvm/uvm_page_array.h>

/*
 * An anonymous UVM object (aobj) manages anonymous-memory.  In addition to
 * keeping the list of resident pages, it may also keep a list of allocated
 * swap blocks.  Depending on the size of the object, this list is either
 * stored in an array (small objects) or in a hash table (large objects).
 *
 * Lock order
 *
 *        uao_list_lock ->
 *                uvm_object::vmobjlock
 */

/*
 * Note: for hash tables, we break the address space of the aobj into blocks
 * of UAO_SWHASH_CLUSTER_SIZE pages, which shall be a power of two.
 */

#define        UAO_SWHASH_CLUSTER_SHIFT        4
#define        UAO_SWHASH_CLUSTER_SIZE                (1 << UAO_SWHASH_CLUSTER_SHIFT)

/* Get the "tag" for this page index. */
#define        UAO_SWHASH_ELT_TAG(idx)                ((idx) >> UAO_SWHASH_CLUSTER_SHIFT)
#define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \
    ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1))

/* Given an ELT and a page index, find the swap slot. */
#define        UAO_SWHASH_ELT_PAGESLOT(elt, idx) \
    ((elt)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(idx)])

/* Given an ELT, return its pageidx base. */
#define        UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
    ((elt)->tag << UAO_SWHASH_CLUSTER_SHIFT)

/* The hash function. */
#define        UAO_SWHASH_HASH(aobj, idx) \
    (&(aobj)->u_swhash[(((idx) >> UAO_SWHASH_CLUSTER_SHIFT) \
    & (aobj)->u_swhashmask)])

/*
 * The threshold which determines whether we will use an array or a
 * hash table to store the list of allocated swap blocks.
 */
#define        UAO_SWHASH_THRESHOLD                (UAO_SWHASH_CLUSTER_SIZE * 4)
#define        UAO_USES_SWHASH(aobj) \
    ((aobj)->u_pages > UAO_SWHASH_THRESHOLD)

/* The number of buckets in a hash, with an upper bound. */
#define        UAO_SWHASH_MAXBUCKETS                256
#define        UAO_SWHASH_BUCKETS(aobj) \
    (MIN((aobj)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS))

/*
 * uao_swhash_elt: when a hash table is being used, this structure defines
 * the format of an entry in the bucket list.
 */

struct uao_swhash_elt {
        LIST_ENTRY(uao_swhash_elt) list;        /* the hash list */
        voff_t tag;                                /* our 'tag' */
        int count;                                /* our number of active slots */
        int slots[UAO_SWHASH_CLUSTER_SIZE];        /* the slots */
};

/*
 * uao_swhash: the swap hash table structure
 */

LIST_HEAD(uao_swhash, uao_swhash_elt);

/*
 * uao_swhash_elt_pool: pool of uao_swhash_elt structures.
 * Note: pages for this pool must not come from a pageable kernel map.
 */
static struct pool        uao_swhash_elt_pool        __cacheline_aligned;

/*
 * uvm_aobj: the actual anon-backed uvm_object
 *
 * => the uvm_object is at the top of the structure, this allows
 *   (struct uvm_aobj *) == (struct uvm_object *)
 * => only one of u_swslots and u_swhash is used in any given aobj
 */

struct uvm_aobj {
        struct uvm_object u_obj; /* has: lock, pgops, #pages, #refs */
        pgoff_t u_pages;         /* number of pages in entire object */
        int u_flags;                 /* the flags (see uvm_aobj.h) */
        int *u_swslots;                 /* array of offset->swapslot mappings */
                                 /*
                                  * hashtable of offset->swapslot mappings
                                  * (u_swhash is an array of bucket heads)
                                  */
        struct uao_swhash *u_swhash;
        u_long u_swhashmask;                /* mask for hashtable */
        LIST_ENTRY(uvm_aobj) u_list;        /* global list of aobjs */
        int u_freelist;                  /* freelist to allocate pages from */
};

static void        uao_free(struct uvm_aobj *);
static int        uao_get(struct uvm_object *, voff_t, struct vm_page **,
                    int *, int, vm_prot_t, int, int);
static int        uao_put(struct uvm_object *, voff_t, voff_t, int);

#if defined(VMSWAP)
static struct uao_swhash_elt *uao_find_swhash_elt
    (struct uvm_aobj *, int, bool);

static bool uao_pagein(struct uvm_aobj *, int, int);
static bool uao_pagein_page(struct uvm_aobj *, int);
#endif /* defined(VMSWAP) */

static struct vm_page        *uao_pagealloc(struct uvm_object *, voff_t, int);

/*
 * aobj_pager
 *
 * note that some functions (e.g. put) are handled elsewhere
 */

const struct uvm_pagerops aobj_pager = {
        .pgo_reference = uao_reference,
        .pgo_detach = uao_detach,
        .pgo_get = uao_get,
        .pgo_put = uao_put,
};

/*
 * uao_list: global list of active aobjs, locked by uao_list_lock
 */

static LIST_HEAD(aobjlist, uvm_aobj) uao_list        __cacheline_aligned;
static kmutex_t                uao_list_lock                __cacheline_aligned;

/*
 * hash table/array related functions
 */

#if defined(VMSWAP)

/*
 * uao_find_swhash_elt: find (or create) a hash table entry for a page
 * offset.
 *
 * => the object should be locked by the caller
 */

static struct uao_swhash_elt *
uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, bool create)
{
        struct uao_swhash *swhash;
        struct uao_swhash_elt *elt;
        voff_t page_tag;

        swhash = UAO_SWHASH_HASH(aobj, pageidx);
        page_tag = UAO_SWHASH_ELT_TAG(pageidx);

        /*
         * now search the bucket for the requested tag
         */

        LIST_FOREACH(elt, swhash, list) {
                if (elt->tag == page_tag) {
                        return elt;
                }
        }
        if (!create) {
                return NULL;
        }

        /*
         * allocate a new entry for the bucket and init/insert it in
         */

        elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT);
        if (elt == NULL) {
                return NULL;
        }
        LIST_INSERT_HEAD(swhash, elt, list);
        elt->tag = page_tag;
        elt->count = 0;
        memset(elt->slots, 0, sizeof(elt->slots));
        return elt;
}

/*
 * uao_find_swslot: find the swap slot number for an aobj/pageidx
 *
 * => object must be locked by caller
 */

int
uao_find_swslot(struct uvm_object *uobj, int pageidx)
{
        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
        struct uao_swhash_elt *elt;

        KASSERT(UVM_OBJ_IS_AOBJ(uobj));

        /*
         * if noswap flag is set, then we never return a slot
         */

        if (aobj->u_flags & UAO_FLAG_NOSWAP)
                return 0;

        /*
         * if hashing, look in hash table.
         */

        if (UAO_USES_SWHASH(aobj)) {
                elt = uao_find_swhash_elt(aobj, pageidx, false);
                return elt ? UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) : 0;
        }

        /*
         * otherwise, look in the array
         */

        return aobj->u_swslots[pageidx];
}

/*
 * uao_set_swslot: set the swap slot for a page in an aobj.
 *
 * => setting a slot to zero frees the slot
 * => object must be locked by caller
 * => we return the old slot number, or -1 if we failed to allocate
 *    memory to record the new slot number
 */

int
uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
{
        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
        struct uao_swhash_elt *elt;
        int oldslot;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(pdhist, "aobj %#jx pageidx %jd slot %jd",
            (uintptr_t)aobj, pageidx, slot, 0);

        KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0);
        KASSERT(UVM_OBJ_IS_AOBJ(uobj));

        /*
         * if noswap flag is set, then we can't set a non-zero slot.
         */

        if (aobj->u_flags & UAO_FLAG_NOSWAP) {
                KASSERTMSG(slot == 0, "uao_set_swslot: no swap object");
                return 0;
        }

        /*
         * are we using a hash table?  if so, add it in the hash.
         */

        if (UAO_USES_SWHASH(aobj)) {

                /*
                 * Avoid allocating an entry just to free it again if
                 * the page had not swap slot in the first place, and
                 * we are freeing.
                 */

                elt = uao_find_swhash_elt(aobj, pageidx, slot != 0);
                if (elt == NULL) {
                        return slot ? -1 : 0;
                }

                oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
                UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;

                /*
                 * now adjust the elt's reference counter and free it if we've
                 * dropped it to zero.
                 */

                if (slot) {
                        if (oldslot == 0)
                                elt->count++;
                } else {
                        if (oldslot)
                                elt->count--;

                        if (elt->count == 0) {
                                LIST_REMOVE(elt, list);
                                pool_put(&uao_swhash_elt_pool, elt);
                        }
                }
        } else {
                /* we are using an array */
                oldslot = aobj->u_swslots[pageidx];
                aobj->u_swslots[pageidx] = slot;
        }
        return oldslot;
}

#endif /* defined(VMSWAP) */

/*
 * end of hash/array functions
 */

/*
 * uao_free: free all resources held by an aobj, and then free the aobj
 *
 * => the aobj should be dead
 */

static void
uao_free(struct uvm_aobj *aobj)
{
        struct uvm_object *uobj = &aobj->u_obj;

        KASSERT(UVM_OBJ_IS_AOBJ(uobj));
        KASSERT(rw_write_held(uobj->vmobjlock));
        uao_dropswap_range(uobj, 0, 0);
        rw_exit(uobj->vmobjlock);

#if defined(VMSWAP)
        if (UAO_USES_SWHASH(aobj)) {

                /*
                 * free the hash table itself.
                 */

                hashdone(aobj->u_swhash, HASH_LIST, aobj->u_swhashmask);
        } else {

                /*
                 * free the array itself.
                 */

                kmem_free(aobj->u_swslots, aobj->u_pages * sizeof(int));
        }
#endif /* defined(VMSWAP) */

        /*
         * finally free the aobj itself
         */

        uvm_obj_destroy(uobj, true);
        kmem_free(aobj, sizeof(struct uvm_aobj));
}

/*
 * pager functions
 */

/*
 * uao_create: create an aobj of the given size and return its uvm_object.
 *
 * => for normal use, flags are always zero
 * => for the kernel object, the flags are:
 *        UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
 *        UAO_FLAG_KERNSWAP - enable swapping of kernel object ("           ")
 */

struct uvm_object *
uao_create(voff_t size, int flags)
{
        static struct uvm_aobj kernel_object_store;
        static krwlock_t bootstrap_kernel_object_lock;
        static int kobj_alloced __diagused = 0;
        pgoff_t pages = round_page((uint64_t)size) >> PAGE_SHIFT;
        struct uvm_aobj *aobj;
        int refs;

        /*
         * Allocate a new aobj, unless kernel object is requested.
         */

        if (flags & UAO_FLAG_KERNOBJ) {
                KASSERT(!kobj_alloced);
                aobj = &kernel_object_store;
                aobj->u_pages = pages;
                aobj->u_flags = UAO_FLAG_NOSWAP;
                refs = UVM_OBJ_KERN;
                kobj_alloced = UAO_FLAG_KERNOBJ;
        } else if (flags & UAO_FLAG_KERNSWAP) {
                KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ);
                aobj = &kernel_object_store;
                kobj_alloced = UAO_FLAG_KERNSWAP;
                refs = 0xdeadbeaf; /* XXX: gcc */
        } else {
                aobj = kmem_alloc(sizeof(struct uvm_aobj), KM_SLEEP);
                aobj->u_pages = pages;
                aobj->u_flags = 0;
                refs = 1;
        }

        /*
         * no freelist by default
         */

        aobj->u_freelist = VM_NFREELIST;

        /*
          * allocate hash/array if necessary
          *
          * note: in the KERNSWAP case no need to worry about locking since
          * we are still booting we should be the only thread around.
          */

        const int kernswap = (flags & UAO_FLAG_KERNSWAP) != 0;
        if (flags == 0 || kernswap) {
#if defined(VMSWAP)

                /* allocate hash table or array depending on object size */
                if (UAO_USES_SWHASH(aobj)) {
                        aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(aobj),
                            HASH_LIST, true, &aobj->u_swhashmask);
                } else {
                        aobj->u_swslots = kmem_zalloc(pages * sizeof(int),
                            KM_SLEEP);
                }
#endif /* defined(VMSWAP) */

                /*
                 * Replace kernel_object's temporary static lock with
                 * a regular rw_obj.  We cannot use uvm_obj_setlock()
                 * because that would try to free the old lock.
                 */

                if (kernswap) {
                        aobj->u_obj.vmobjlock = rw_obj_alloc();
                        rw_destroy(&bootstrap_kernel_object_lock);
                }
                if (flags) {
                        aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
                        return &aobj->u_obj;
                }
        }

        /*
         * Initialise UVM object.
         */

        const bool kernobj = (flags & UAO_FLAG_KERNOBJ) != 0;
        uvm_obj_init(&aobj->u_obj, &aobj_pager, !kernobj, refs);
        if (__predict_false(kernobj)) {
                /* Use a temporary static lock for kernel_object. */
                rw_init(&bootstrap_kernel_object_lock);
                uvm_obj_setlock(&aobj->u_obj, &bootstrap_kernel_object_lock);
        }

        /*
          * now that aobj is ready, add it to the global list
          */

        mutex_enter(&uao_list_lock);
        LIST_INSERT_HEAD(&uao_list, aobj, u_list);
        mutex_exit(&uao_list_lock);
        return(&aobj->u_obj);
}

/*
 * uao_set_pgfl: allocate pages only from the specified freelist.
 *
 * => must be called before any pages are allocated for the object.
 * => reset by setting it to VM_NFREELIST, meaning any freelist.
 */

void
uao_set_pgfl(struct uvm_object *uobj, int freelist)
{
        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;

        KASSERTMSG((0 <= freelist), "invalid freelist %d", freelist);
        KASSERTMSG((freelist <= VM_NFREELIST), "invalid freelist %d",
            freelist);

        aobj->u_freelist = freelist;
}

/*
 * uao_pagealloc: allocate a page for aobj.
 */

static inline struct vm_page *
uao_pagealloc(struct uvm_object *uobj, voff_t offset, int flags)
{
        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;

        if (__predict_true(aobj->u_freelist == VM_NFREELIST))
                return uvm_pagealloc(uobj, offset, NULL, flags);
        else
                return uvm_pagealloc_strat(uobj, offset, NULL, flags,
                    UVM_PGA_STRAT_ONLY, aobj->u_freelist);
}

/*
 * uao_init: set up aobj pager subsystem
 *
 * => called at boot time from uvm_pager_init()
 */

void
uao_init(void)
{
        static int uao_initialized;

        if (uao_initialized)
                return;
        uao_initialized = true;
        LIST_INIT(&uao_list);
        mutex_init(&uao_list_lock, MUTEX_DEFAULT, IPL_NONE);
        pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt),
            0, 0, 0, "uaoeltpl", NULL, IPL_VM);
}

/*
 * uao_reference: hold a reference to an anonymous UVM object.
 */
void
uao_reference(struct uvm_object *uobj)
{
        /* Kernel object is persistent. */
        if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
                return;
        }
        atomic_inc_uint(&uobj->uo_refs);
}

/*
 * uao_detach: drop a reference to an anonymous UVM object.
 */
void
uao_detach(struct uvm_object *uobj)
{
        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
        struct uvm_page_array a;
        struct vm_page *pg;

        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        /*
         * Detaching from kernel object is a NOP.
         */

        if (UVM_OBJ_IS_KERN_OBJECT(uobj))
                return;

        /*
         * Drop the reference.  If it was the last one, destroy the object.
         */

        KASSERT(uobj->uo_refs > 0);
        UVMHIST_LOG(maphist,"  (uobj=%#jx)  ref=%jd",
            (uintptr_t)uobj, uobj->uo_refs, 0, 0);
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_release();
#endif
        if (atomic_dec_uint_nv(&uobj->uo_refs) > 0) {
                UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
                return;
        }
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_acquire();
#endif

        /*
         * Remove the aobj from the global list.
         */

        mutex_enter(&uao_list_lock);
        LIST_REMOVE(aobj, u_list);
        mutex_exit(&uao_list_lock);

        /*
         * Free all the pages left in the aobj.  For each page, when the
         * page is no longer busy (and thus after any disk I/O that it is
         * involved in is complete), release any swap resources and free
         * the page itself.
         */
        uvm_page_array_init(&a, uobj, 0);
        rw_enter(uobj->vmobjlock, RW_WRITER);
        while ((pg = uvm_page_array_fill_and_peek(&a, 0, 0)) != NULL) {
                uvm_page_array_advance(&a);
                pmap_page_protect(pg, VM_PROT_NONE);
                if (pg->flags & PG_BUSY) {
                        uvm_pagewait(pg, uobj->vmobjlock, "uao_det");
                        uvm_page_array_clear(&a);
                        rw_enter(uobj->vmobjlock, RW_WRITER);
                        continue;
                }
                uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
                uvm_pagefree(pg);
        }
        uvm_page_array_fini(&a);

        /*
         * Finally, free the anonymous UVM object itself.
         */

        uao_free(aobj);
}

/*
 * uao_put: flush pages out of a uvm object
 *
 * => object should be locked by caller.  we may _unlock_ the object
 *        if (and only if) we need to clean a page (PGO_CLEANIT).
 *        XXXJRT Currently, however, we don't.  In the case of cleaning
 *        XXXJRT a page, we simply just deactivate it.  Should probably
 *        XXXJRT handle this better, in the future (although "flushing"
 *        XXXJRT anonymous memory isn't terribly important).
 * => if PGO_CLEANIT is not set, then we will neither unlock the object
 *        or block.
 * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
 *        for flushing.
 * => we return 0 unless we encountered some sort of I/O error
 *        XXXJRT currently never happens, as we never directly initiate
 *        XXXJRT I/O
 */

static int
uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
{
        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
        struct uvm_page_array a;
        struct vm_page *pg;
        voff_t curoff;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        KASSERT(UVM_OBJ_IS_AOBJ(uobj));
        KASSERT(rw_write_held(uobj->vmobjlock));

        if (flags & PGO_ALLPAGES) {
                start = 0;
                stop = aobj->u_pages << PAGE_SHIFT;
        } else {
                start = trunc_page(start);
                if (stop == 0) {
                        stop = aobj->u_pages << PAGE_SHIFT;
                } else {
                        stop = round_page(stop);
                }
                if (stop > (uint64_t)(aobj->u_pages << PAGE_SHIFT)) {
                        printf("uao_put: strange, got an out of range "
                            "flush %#jx > %#jx (fixed)\n",
                            (uintmax_t)stop,
                            (uintmax_t)(aobj->u_pages << PAGE_SHIFT));
                        stop = aobj->u_pages << PAGE_SHIFT;
                }
        }
        UVMHIST_LOG(maphist,
            " flush start=%#jx, stop=%#jx, flags=%#jx",
            start, stop, flags, 0);

        /*
         * Don't need to do any work here if we're not freeing
         * or deactivating pages.
         */

        if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
                rw_exit(uobj->vmobjlock);
                return 0;
        }

        /* locked: uobj */
        uvm_page_array_init(&a, uobj, 0);
        curoff = start;
        while ((pg = uvm_page_array_fill_and_peek(&a, curoff, 0)) != NULL) {
                if (pg->offset >= stop) {
                        break;
                }

                /*
                 * wait and try again if the page is busy.
                 */

                if (pg->flags & PG_BUSY) {
                        uvm_pagewait(pg, uobj->vmobjlock, "uao_put");
                        uvm_page_array_clear(&a);
                        rw_enter(uobj->vmobjlock, RW_WRITER);
                        continue;
                }
                uvm_page_array_advance(&a);
                curoff = pg->offset + PAGE_SIZE;

                switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {

                /*
                 * XXX In these first 3 cases, we always just
                 * XXX deactivate the page.  We may want to
                 * XXX handle the different cases more specifically
                 * XXX in the future.
                 */

                case PGO_CLEANIT|PGO_FREE:
                case PGO_CLEANIT|PGO_DEACTIVATE:
                case PGO_DEACTIVATE:
 deactivate_it:
                         uvm_pagelock(pg);
                        uvm_pagedeactivate(pg);
                         uvm_pageunlock(pg);
                        break;

                case PGO_FREE:
                        /*
                         * If there are multiple references to
                         * the object, just deactivate the page.
                         */

                        if (uobj->uo_refs > 1)
                                goto deactivate_it;

                        /*
                         * free the swap slot and the page.
                         */

                        pmap_page_protect(pg, VM_PROT_NONE);

                        /*
                         * freeing swapslot here is not strictly necessary.
                         * however, leaving it here doesn't save much
                         * because we need to update swap accounting anyway.
                         */

                        uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
                        uvm_pagefree(pg);
                        break;

                default:
                        panic("%s: impossible", __func__);
                }
        }
        rw_exit(uobj->vmobjlock);
        uvm_page_array_fini(&a);
        return 0;
}

/*
 * uao_get: fetch me a page
 *
 * we have three cases:
 * 1: page is resident     -> just return the page.
 * 2: page is zero-fill    -> allocate a new page and zero it.
 * 3: page is swapped out  -> fetch the page from swap.
 *
 * case 1 can be handled with PGO_LOCKED, cases 2 and 3 cannot.
 * so, if the "center" page hits case 2/3 then we will need to return EBUSY.
 *
 * => prefer map unlocked (not required)
 * => object must be locked!  we will _unlock_ it before starting any I/O.
 * => flags: PGO_LOCKED: fault data structures are locked
 * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
 * => NOTE: caller must check for released pages!!
 */

static int
uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
    int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
{
        voff_t current_offset;
        struct vm_page *ptmp;
        int lcv, gotpages, maxpages, swslot, pageidx;
        bool overwrite = ((flags & PGO_OVERWRITE) != 0);
        struct uvm_page_array a;

        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(pdhist, "aobj=%#jx offset=%jd, flags=%#jx",
                    (uintptr_t)uobj, offset, flags,0);

        /*
         * the object must be locked.  it can only be a read lock when
         * processing a read fault with PGO_LOCKED.
         */

        KASSERT(UVM_OBJ_IS_AOBJ(uobj));
        KASSERT(rw_lock_held(uobj->vmobjlock));
        KASSERT(rw_write_held(uobj->vmobjlock) ||
           ((flags & PGO_LOCKED) != 0 && (access_type & VM_PROT_WRITE) == 0));

        /*
          * get number of pages
          */

        maxpages = *npagesp;

        /*
          * step 1: handled the case where fault data structures are locked.
          */

        if (flags & PGO_LOCKED) {

                /*
                  * step 1a: get pages that are already resident.   only do
                 * this if the data structures are locked (i.e. the first
                 * time through).
                  */

                uvm_page_array_init(&a, uobj, 0);
                gotpages = 0;        /* # of pages we got so far */
                for (lcv = 0; lcv < maxpages; lcv++) {
                        ptmp = uvm_page_array_fill_and_peek(&a,
                            offset + (lcv << PAGE_SHIFT), maxpages);
                        if (ptmp == NULL) {
                                break;
                        }
                        KASSERT(ptmp->offset >= offset);
                        lcv = (ptmp->offset - offset) >> PAGE_SHIFT;
                        if (lcv >= maxpages) {
                                break;
                        }
                        uvm_page_array_advance(&a);

                        /*
                         * to be useful must get a non-busy page
                         */

                        if ((ptmp->flags & PG_BUSY) != 0) {
                                continue;
                        }

                        /*
                         * useful page: plug it in our result array
                         */

                        KASSERT(uvm_pagegetdirty(ptmp) !=
                            UVM_PAGE_STATUS_CLEAN);
                        pps[lcv] = ptmp;
                        gotpages++;
                }
                uvm_page_array_fini(&a);

                /*
                  * step 1b: now we've either done everything needed or we
                 * to unlock and do some waiting or I/O.
                  */

                UVMHIST_LOG(pdhist, "<- done (done=%jd)",
                    (pps[centeridx] != NULL), 0,0,0);
                *npagesp = gotpages;
                return pps[centeridx] != NULL ? 0 : EBUSY;
        }

        /*
          * step 2: get non-resident or busy pages.
          * object is locked.   data structures are unlocked.
          */

        if ((flags & PGO_SYNCIO) == 0) {
                goto done;
        }

        uvm_page_array_init(&a, uobj, 0);
        for (lcv = 0, current_offset = offset ; lcv < maxpages ;) {

                /*
                  * we have yet to locate the current page (pps[lcv]).   we
                 * first look for a page that is already at the current offset.
                 * if we find a page, we check to see if it is busy or
                 * released.  if that is the case, then we sleep on the page
                 * until it is no longer busy or released and repeat the lookup.
                 * if the page we found is neither busy nor released, then we
                 * busy it (so we own it) and plug it into pps[lcv].   we are
                 * ready to move on to the next page.
                  */

                ptmp = uvm_page_array_fill_and_peek(&a, current_offset,
                    maxpages - lcv);

                if (ptmp != NULL && ptmp->offset == current_offset) {
                        /* page is there, see if we need to wait on it */
                        if ((ptmp->flags & PG_BUSY) != 0) {
                                UVMHIST_LOG(pdhist,
                                    "sleeping, ptmp->flags %#jx\n",
                                    ptmp->flags,0,0,0);
                                uvm_pagewait(ptmp, uobj->vmobjlock, "uao_get");
                                rw_enter(uobj->vmobjlock, RW_WRITER);
                                uvm_page_array_clear(&a);
                                continue;
                        }

                        /*
                          * if we get here then the page is resident and
                         * unbusy.  we busy it now (so we own it).  if
                         * overwriting, mark the page dirty up front as
                         * it will be zapped via an unmanaged mapping.
                          */

                        KASSERT(uvm_pagegetdirty(ptmp) !=
                            UVM_PAGE_STATUS_CLEAN);
                        if (overwrite) {
                                uvm_pagemarkdirty(ptmp, UVM_PAGE_STATUS_DIRTY);
                        }
                        /* we own it, caller must un-busy */
                        ptmp->flags |= PG_BUSY;
                        UVM_PAGE_OWN(ptmp, "uao_get2");
                        pps[lcv++] = ptmp;
                        current_offset += PAGE_SIZE;
                        uvm_page_array_advance(&a);
                        continue;
                } else {
                        KASSERT(ptmp == NULL || ptmp->offset > current_offset);
                }

                /*
                 * not resident.  allocate a new busy/fake/clean page in the
                 * object.  if it's in swap we need to do I/O to fill in the
                 * data, otherwise the page needs to be cleared: if it's not
                 * destined to be overwritten, then zero it here and now.
                 */

                pageidx = current_offset >> PAGE_SHIFT;
                swslot = uao_find_swslot(uobj, pageidx);
                ptmp = uao_pagealloc(uobj, current_offset,
                    swslot != 0 || overwrite ? 0 : UVM_PGA_ZERO);

                /* out of RAM? */
                if (ptmp == NULL) {
                        rw_exit(uobj->vmobjlock);
                        UVMHIST_LOG(pdhist, "sleeping, ptmp == NULL",0,0,0,0);
                        uvm_wait("uao_getpage");
                        rw_enter(uobj->vmobjlock, RW_WRITER);
                        uvm_page_array_clear(&a);
                        continue;
                }

                /*
                  * if swslot == 0, page hasn't existed before and is zeroed.
                  * otherwise we have a "fake/busy/clean" page that we just
                  * allocated.  do the needed "i/o", reading from swap.
                  */

                if (swslot != 0) {
#if defined(VMSWAP)
                        int error;

                        UVMHIST_LOG(pdhist, "pagein from swslot %jd",
                             swslot, 0,0,0);

                        /*
                         * page in the swapped-out page.
                         * unlock object for i/o, relock when done.
                         */

                        uvm_page_array_clear(&a);
                        rw_exit(uobj->vmobjlock);
                        error = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
                        rw_enter(uobj->vmobjlock, RW_WRITER);

                        /*
                         * I/O done.  check for errors.
                         */

                        if (error != 0) {
                                UVMHIST_LOG(pdhist, "<- done (error=%jd)",
                                    error,0,0,0);

                                /*
                                 * remove the swap slot from the aobj
                                 * and mark the aobj as having no real slot.
                                 * don't free the swap slot, thus preventing
                                 * it from being used again.
                                 */

                                swslot = uao_set_swslot(uobj, pageidx,
                                    SWSLOT_BAD);
                                if (swslot > 0) {
                                        uvm_swap_markbad(swslot, 1);
                                }

                                uvm_pagefree(ptmp);
                                rw_exit(uobj->vmobjlock);
                                UVMHIST_LOG(pdhist, "<- done (error)",
                                    error,lcv,0,0);
                                if (lcv != 0) {
                                        uvm_page_unbusy(pps, lcv);
                                }
                                memset(pps, 0, maxpages * sizeof(pps[0]));
                                uvm_page_array_fini(&a);
                                return error;
                        }
#else /* defined(VMSWAP) */
                        panic("%s: pagein", __func__);
#endif /* defined(VMSWAP) */
                }

                /*
                 * note that we will allow the page being writably-mapped
                 * (!PG_RDONLY) regardless of access_type.  if overwrite,
                 * the page can be modified through an unmanaged mapping
                 * so mark it dirty up front.
                 */
                if (overwrite) {
                        uvm_pagemarkdirty(ptmp, UVM_PAGE_STATUS_DIRTY);
                } else {
                        uvm_pagemarkdirty(ptmp, UVM_PAGE_STATUS_UNKNOWN);
                }

                /*
                  * we got the page!   clear the fake flag (indicates valid
                 * data now in page) and plug into our result array.   note
                 * that page is still busy.
                  *
                  * it is the callers job to:
                  * => check if the page is released
                  * => unbusy the page
                  * => activate the page
                  */
                KASSERT(uvm_pagegetdirty(ptmp) != UVM_PAGE_STATUS_CLEAN);
                KASSERT((ptmp->flags & PG_FAKE) != 0);
                KASSERT(ptmp->offset == current_offset);
                ptmp->flags &= ~PG_FAKE;
                pps[lcv++] = ptmp;
                current_offset += PAGE_SIZE;
        }
        uvm_page_array_fini(&a);

        /*
          * finally, unlock object and return.
          */

done:
        rw_exit(uobj->vmobjlock);
        UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
        return 0;
}

#if defined(VMSWAP)

/*
 * uao_dropswap:  release any swap resources from this aobj page.
 *
 * => aobj must be locked or have a reference count of 0.
 */

void
uao_dropswap(struct uvm_object *uobj, int pageidx)
{
        int slot;

        KASSERT(UVM_OBJ_IS_AOBJ(uobj));

        slot = uao_set_swslot(uobj, pageidx, 0);
        if (slot) {
                uvm_swap_free(slot, 1);
        }
}

/*
 * page in every page in every aobj that is paged-out to a range of swslots.
 *
 * => nothing should be locked.
 * => returns true if pagein was aborted due to lack of memory.
 */

bool
uao_swap_off(int startslot, int endslot)
{
        struct uvm_aobj *aobj;

        /*
         * Walk the list of all anonymous UVM objects.  Grab the first.
         */
        mutex_enter(&uao_list_lock);
        if ((aobj = LIST_FIRST(&uao_list)) == NULL) {
                mutex_exit(&uao_list_lock);
                return false;
        }
        uao_reference(&aobj->u_obj);

        do {
                struct uvm_aobj *nextaobj;
                bool rv;

                /*
                 * Prefetch the next object and immediately hold a reference
                 * on it, so neither the current nor the next entry could
                 * disappear while we are iterating.
                 */
                if ((nextaobj = LIST_NEXT(aobj, u_list)) != NULL) {
                        uao_reference(&nextaobj->u_obj);
                }
                mutex_exit(&uao_list_lock);

                /*
                 * Page in all pages in the swap slot range.
                 */
                rw_enter(aobj->u_obj.vmobjlock, RW_WRITER);
                rv = uao_pagein(aobj, startslot, endslot);
                rw_exit(aobj->u_obj.vmobjlock);

                /* Drop the reference of the current object. */
                uao_detach(&aobj->u_obj);
                if (rv) {
                        if (nextaobj) {
                                uao_detach(&nextaobj->u_obj);
                        }
                        return rv;
                }

                aobj = nextaobj;
                mutex_enter(&uao_list_lock);
        } while (aobj);

        mutex_exit(&uao_list_lock);
        return false;
}

/*
 * page in any pages from aobj in the given range.
 *
 * => aobj must be locked and is returned locked.
 * => returns true if pagein was aborted due to lack of memory.
 */
static bool
uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
{
        bool rv;

        if (UAO_USES_SWHASH(aobj)) {
                struct uao_swhash_elt *elt;
                int buck;

restart:
                for (buck = aobj->u_swhashmask; buck >= 0; buck--) {
                        for (elt = LIST_FIRST(&aobj->u_swhash[buck]);
                             elt != NULL;
                             elt = LIST_NEXT(elt, list)) {
                                int i;

                                for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
                                        int slot = elt->slots[i];

                                        /*
                                         * if the slot isn't in range, skip it.
                                         */

                                        if (slot < startslot ||
                                            slot >= endslot) {
                                                continue;
                                        }

                                        /*
                                         * process the page,
                                         * the start over on this object
                                         * since the swhash elt
                                         * may have been freed.
                                         */

                                        rv = uao_pagein_page(aobj,
                                          UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
                                        if (rv) {
                                                return rv;
                                        }
                                        goto restart;
                                }
                        }
                }
        } else {
                int i;

                for (i = 0; i < aobj->u_pages; i++) {
                        int slot = aobj->u_swslots[i];

                        /*
                         * if the slot isn't in range, skip it
                         */

                        if (slot < startslot || slot >= endslot) {
                                continue;
                        }

                        /*
                         * process the page.
                         */

                        rv = uao_pagein_page(aobj, i);
                        if (rv) {
                                return rv;
                        }
                }
        }

        return false;
}

/*
 * uao_pagein_page: page in a single page from an anonymous UVM object.
 *
 * => Returns true if pagein was aborted due to lack of memory.
 * => Object must be locked and is returned locked.
 */

static bool
uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
{
        struct uvm_object *uobj = &aobj->u_obj;
        struct vm_page *pg;
        int rv, npages;

        pg = NULL;
        npages = 1;

        KASSERT(rw_write_held(uobj->vmobjlock));
        rv = uao_get(uobj, (voff_t)pageidx << PAGE_SHIFT, &pg, &npages,
            0, VM_PROT_READ | VM_PROT_WRITE, 0, PGO_SYNCIO);

        /*
         * relock and finish up.
         */

        rw_enter(uobj->vmobjlock, RW_WRITER);
        switch (rv) {
        case 0:
                break;

        case EIO:
        case ERESTART:

                /*
                 * nothing more to do on errors.
                 * ERESTART can only mean that the anon was freed,
                 * so again there's nothing to do.
                 */

                return false;

        default:
                return true;
        }

        /*
         * ok, we've got the page now.
         * mark it as dirty, clear its swslot and un-busy it.
         */
        uao_dropswap(&aobj->u_obj, pageidx);

        /*
         * make sure it's on a page queue.
         */
        uvm_pagelock(pg);
        uvm_pageenqueue(pg);
        uvm_pagewakeup(pg);
        uvm_pageunlock(pg);

        pg->flags &= ~(PG_BUSY|PG_FAKE);
        uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
        UVM_PAGE_OWN(pg, NULL);

        return false;
}

/*
 * uao_dropswap_range: drop swapslots in the range.
 *
 * => aobj must be locked and is returned locked.
 * => start is inclusive.  end is exclusive.
 */

void
uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
{
        struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
        int swpgonlydelta = 0;

        KASSERT(UVM_OBJ_IS_AOBJ(uobj));
        KASSERT(rw_write_held(uobj->vmobjlock));

        if (end == 0) {
                end = INT64_MAX;
        }

        if (UAO_USES_SWHASH(aobj)) {
                int i, hashbuckets = aobj->u_swhashmask + 1;
                voff_t taghi;
                voff_t taglo;

                taglo = UAO_SWHASH_ELT_TAG(start);
                taghi = UAO_SWHASH_ELT_TAG(end);

                for (i = 0; i < hashbuckets; i++) {
                        struct uao_swhash_elt *elt, *next;

                        for (elt = LIST_FIRST(&aobj->u_swhash[i]);
                             elt != NULL;
                             elt = next) {
                                int startidx, endidx;
                                int j;

                                next = LIST_NEXT(elt, list);

                                if (elt->tag < taglo || taghi < elt->tag) {
                                        continue;
                                }

                                if (elt->tag == taglo) {
                                        startidx =
                                            UAO_SWHASH_ELT_PAGESLOT_IDX(start);
                                } else {
                                        startidx = 0;
                                }

                                if (elt->tag == taghi) {
                                        endidx =
                                            UAO_SWHASH_ELT_PAGESLOT_IDX(end);
                                } else {
                                        endidx = UAO_SWHASH_CLUSTER_SIZE;
                                }

                                for (j = startidx; j < endidx; j++) {
                                        int slot = elt->slots[j];

                                        KASSERT(uvm_pagelookup(&aobj->u_obj,
                                            (UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
                                            + j) << PAGE_SHIFT) == NULL);
                                        if (slot > 0) {
                                                uvm_swap_free(slot, 1);
                                                swpgonlydelta++;
                                                KASSERT(elt->count > 0);
                                                elt->slots[j] = 0;
                                                elt->count--;
                                        }
                                }

                                if (elt->count == 0) {
                                        LIST_REMOVE(elt, list);
                                        pool_put(&uao_swhash_elt_pool, elt);
                                }
                        }
                }
        } else {
                int i;

                if (aobj->u_pages < end) {
                        end = aobj->u_pages;
                }
                for (i = start; i < end; i++) {
                        int slot = aobj->u_swslots[i];

                        if (slot > 0) {
                                uvm_swap_free(slot, 1);
                                swpgonlydelta++;
                        }
                }
        }

        /*
         * adjust the counter of pages only in swap for all
         * the swap slots we've freed.
         */

        if (swpgonlydelta > 0) {
                KASSERT(uvmexp.swpgonly >= swpgonlydelta);
                atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta);
        }
}

#endif /* defined(VMSWAP) */












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 

    1 




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
/* $NetBSD: wsdisplay.c,v 1.165 2022/07/17 11:43:39 riastradh Exp $ */

/*
 * Copyright (c) 1996, 1997 Christopher G. Demetriou.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Christopher G. Demetriou
 *        for the NetBSD Project.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: wsdisplay.c,v 1.165 2022/07/17 11:43:39 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_wsdisplay_compat.h"
#include "opt_wsmsgattrs.h"
#endif

#include "wskbd.h"
#include "wsmux.h"
#include "wsdisplay.h"

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/tty.h>
#include <sys/signalvar.h>
#include <sys/errno.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/kauth.h>
#include <sys/sysctl.h>

#include <dev/wscons/wsconsio.h>
#include <dev/wscons/wseventvar.h>
#include <dev/wscons/wsmuxvar.h>
#include <dev/wscons/wsdisplayvar.h>
#include <dev/wscons/wsksymvar.h>
#include <dev/wscons/wsksymdef.h>
#include <dev/wscons/wsemulvar.h>
#include <dev/wscons/wscons_callbacks.h>
#include <dev/cons.h>

#include "locators.h"
#include "ioconf.h"

#ifdef WSDISPLAY_MULTICONS
static bool wsdisplay_multicons_enable = true;
static bool wsdisplay_multicons_suspended = false;
#endif

/* Console device before replaced by wsdisplay */
static struct consdev *wsdisplay_ocn;

struct wsscreen_internal {
        const struct wsdisplay_emulops *emulops;
        void        *emulcookie;

        const struct wsscreen_descr *scrdata;

        const struct wsemul_ops *wsemul;
        void        *wsemulcookie;
};

struct wsscreen {
        struct wsscreen_internal *scr_dconf;

        struct tty *scr_tty;
        int        scr_hold_screen;                /* hold tty output */

        int scr_flags;
#define SCR_OPEN 1                /* is it open? */
#define SCR_WAITACTIVE 2        /* someone waiting on activation */
#define SCR_GRAPHICS 4                /* graphics mode, no text (emulation) output */
#define        SCR_DUMBFB 8                /* in use as a dumb fb (iff SCR_GRAPHICS) */
        const struct wscons_syncops *scr_syncops;
        void *scr_synccookie;

#ifdef WSDISPLAY_COMPAT_RAWKBD
        int scr_rawkbd;
#endif

#ifdef WSDISPLAY_MULTICONS
        callout_t scr_getc_ch;
#endif

        struct wsdisplay_softc *sc;

        /* XXX this is to support a hack in emulinput, see comment below */
        int scr_in_ttyoutput;
};

static struct wsscreen *wsscreen_attach(struct wsdisplay_softc *, int,
                                 const char *,
                                 const struct wsscreen_descr *, void *,
                                 int, int, long);
static void wsscreen_detach(struct wsscreen *);
static int wsdisplay_addscreen(struct wsdisplay_softc *, int, const char *, const char *);
static void wsdisplay_addscreen_print(struct wsdisplay_softc *, int, int);
static void wsdisplay_closescreen(struct wsdisplay_softc *, struct wsscreen *);
static int wsdisplay_delscreen(struct wsdisplay_softc *, int, int);

#define WSDISPLAY_MAXSCREEN 8

struct wsdisplay_softc {
        device_t sc_dev;

        const struct wsdisplay_accessops *sc_accessops;
        void        *sc_accesscookie;

        const struct wsscreen_list *sc_scrdata;
#ifdef WSDISPLAY_SCROLLSUPPORT
        struct wsdisplay_scroll_data sc_scroll_values;
#endif

        struct wsscreen *sc_scr[WSDISPLAY_MAXSCREEN];
        int sc_focusidx;        /* available only if sc_focus isn't null */
        struct wsscreen *sc_focus;

        struct wseventvar evar;

        int        sc_isconsole;

        int sc_flags;
#define SC_SWITCHPENDING 1
#define SC_SWITCHERROR 2
#define SC_XATTACHED 4 /* X server active */
        kmutex_t sc_flagsmtx; /* for flags, might also be used for focus */
        kcondvar_t sc_flagscv;

        int sc_screenwanted, sc_oldscreen; /* valid with SC_SWITCHPENDING */

#if NWSKBD > 0
        struct wsevsrc *sc_input;
#ifdef WSDISPLAY_COMPAT_RAWKBD
        int sc_rawkbd;
#endif
#endif /* NWSKBD > 0 */
};

#ifdef WSDISPLAY_SCROLLSUPPORT

struct wsdisplay_scroll_data wsdisplay_default_scroll_values = {
        WSDISPLAY_SCROLL_DOALL,
        25,
        2,
};
#endif

/* Autoconfiguration definitions. */
static int wsdisplay_emul_match(device_t , cfdata_t, void *);
static void wsdisplay_emul_attach(device_t, device_t, void *);
static int wsdisplay_emul_detach(device_t, int);
static int wsdisplay_noemul_match(device_t, cfdata_t, void *);
static void wsdisplay_noemul_attach(device_t, device_t, void *);
static bool wsdisplay_suspend(device_t, const pmf_qual_t *);

CFATTACH_DECL_NEW(wsdisplay_emul, sizeof (struct wsdisplay_softc),
    wsdisplay_emul_match, wsdisplay_emul_attach, wsdisplay_emul_detach, NULL);
  
CFATTACH_DECL_NEW(wsdisplay_noemul, sizeof (struct wsdisplay_softc),
    wsdisplay_noemul_match, wsdisplay_noemul_attach, NULL, NULL);

dev_type_open(wsdisplayopen);
dev_type_close(wsdisplayclose);
dev_type_read(wsdisplayread);
dev_type_write(wsdisplaywrite);
dev_type_ioctl(wsdisplayioctl);
dev_type_stop(wsdisplaystop);
dev_type_tty(wsdisplaytty);
dev_type_poll(wsdisplaypoll);
dev_type_mmap(wsdisplaymmap);
dev_type_kqfilter(wsdisplaykqfilter);

const struct cdevsw wsdisplay_cdevsw = {
        .d_open = wsdisplayopen,
        .d_close = wsdisplayclose,
        .d_read = wsdisplayread,
        .d_write = wsdisplaywrite,
        .d_ioctl = wsdisplayioctl,
        .d_stop = wsdisplaystop,
        .d_tty = wsdisplaytty,
        .d_poll = wsdisplaypoll,
        .d_mmap = wsdisplaymmap,
        .d_kqfilter = wsdisplaykqfilter,
        .d_discard = nodiscard,
        .d_flag = D_TTY
};

static void wsdisplaystart(struct tty *);
static int wsdisplayparam(struct tty *, struct termios *);


#define        WSDISPLAYUNIT(dev)        (minor(dev) >> 8)
#define        WSDISPLAYSCREEN(dev)        (minor(dev) & 0xff)
#define ISWSDISPLAYSTAT(dev)        (WSDISPLAYSCREEN(dev) == 254)
#define ISWSDISPLAYCTL(dev)        (WSDISPLAYSCREEN(dev) == 255)
#define WSDISPLAYMINOR(unit, screen)        (((unit) << 8) | (screen))

#define        WSSCREEN_HAS_EMULATOR(scr)        ((scr)->scr_dconf->wsemul != NULL)
#define        WSSCREEN_HAS_TTY(scr)        ((scr)->scr_tty != NULL)

static void wsdisplay_common_attach(struct wsdisplay_softc *sc,
            int console, int kbdmux, const struct wsscreen_list *,
            const struct wsdisplay_accessops *accessops,
            void *accesscookie);

#ifdef WSDISPLAY_COMPAT_RAWKBD
int wsdisplay_update_rawkbd(struct wsdisplay_softc *,
                                 struct wsscreen *);
#endif

static int wsdisplay_console_initted;
static int wsdisplay_console_attached;
static struct wsdisplay_softc *wsdisplay_console_device;
static struct wsscreen_internal wsdisplay_console_conf;

static int wsdisplay_getc(dev_t);
static void wsdisplay_pollc(dev_t, int);

static int wsdisplay_cons_pollmode;
static int (*wsdisplay_cons_kbd_getc)(dev_t);
static void (*wsdisplay_cons_kbd_pollc)(dev_t, int);

static struct consdev wsdisplay_cons = {
        .cn_getc = wsdisplay_getc,
        .cn_putc = wsdisplay_cnputc,
        .cn_pollc = wsdisplay_pollc,
        .cn_dev = NODEV,
        .cn_pri = CN_NORMAL
};

#ifndef WSDISPLAY_DEFAULTSCREENS
# define WSDISPLAY_DEFAULTSCREENS        0
#endif
int wsdisplay_defaultscreens = WSDISPLAY_DEFAULTSCREENS;

static int wsdisplay_switch1(device_t, int, int);
static void wsdisplay_switch1_cb(void *, int, int);
static int wsdisplay_switch2(device_t, int, int);
static void wsdisplay_switch2_cb(void *, int, int);
static int wsdisplay_switch3(device_t, int, int);
static void wsdisplay_switch3_cb(void *, int, int);

static void wsdisplay_swdone_cb(void *, int, int);
static int wsdisplay_dosync(struct wsdisplay_softc *, int);

int wsdisplay_clearonclose;

#ifdef WSDISPLAY_MULTICONS
/*
 * Replace cn_isconsole() so that we can enter DDB from old console.
 */
bool
wsdisplay_cn_isconsole(dev_t dev)
{

        return (cn_tab != NULL && cn_tab->cn_dev == dev) ||
            (cn_tab == &wsdisplay_cons && !wsdisplay_multicons_suspended &&
            wsdisplay_multicons_enable && wsdisplay_ocn != NULL &&
            wsdisplay_ocn->cn_dev == dev);
}

static void
wsscreen_getc_poll(void *priv)
{
        struct wsscreen *scr = priv;
        int c;

        if (wsdisplay_multicons_enable &&
            wsdisplay_ocn && wsdisplay_ocn->cn_getc &&
            WSSCREEN_HAS_EMULATOR(scr) && WSSCREEN_HAS_TTY(scr)) {
                struct tty *tp = scr->scr_tty;
                do {
                        c = wsdisplay_ocn->cn_getc(wsdisplay_ocn->cn_dev);
                        if (c >= 0)
                                (*tp->t_linesw->l_rint)((unsigned char)c, tp);
                } while (c >= 0);
        }

        callout_schedule(&scr->scr_getc_ch, mstohz(10));
}
#endif

static struct wsscreen *
wsscreen_attach(struct wsdisplay_softc *sc, int console, const char *emul,
        const struct wsscreen_descr *type, void *cookie, int ccol,
        int crow, long defattr)
{
        struct wsscreen_internal *dconf;
        struct wsscreen *scr;

        scr = malloc(sizeof(struct wsscreen), M_DEVBUF, M_WAITOK);

        if (console) {
                dconf = &wsdisplay_console_conf;
                /*
                 * If there's an emulation, tell it about the callback argument.
                 * The other stuff is already there.
                 */
                if (dconf->wsemul != NULL)
                        (*dconf->wsemul->attach)(1, 0, 0, 0, 0, scr, 0);
        } else { /* not console */
                dconf = malloc(sizeof(struct wsscreen_internal),
                               M_DEVBUF, M_WAITOK);
                dconf->emulops = type->textops;
                dconf->emulcookie = cookie;
                if (dconf->emulops) {
                        dconf->wsemul = wsemul_pick(emul);
                        if (dconf->wsemul == NULL) {
                                free(dconf, M_DEVBUF);
                                free(scr, M_DEVBUF);
                                return NULL;
                        }
                        dconf->wsemulcookie =
                          (*dconf->wsemul->attach)(0, type, cookie,
                                                   ccol, crow, scr, defattr);
                } else
                        dconf->wsemul = NULL;
                dconf->scrdata = type;
        }

        scr->scr_dconf = dconf;

        scr->scr_tty = tty_alloc();
        tty_attach(scr->scr_tty);
        scr->scr_hold_screen = 0;
        if (WSSCREEN_HAS_EMULATOR(scr))
                scr->scr_flags = 0;
        else
                scr->scr_flags = SCR_GRAPHICS;

        scr->scr_syncops = 0;
        scr->sc = sc;
#ifdef WSDISPLAY_COMPAT_RAWKBD
        scr->scr_rawkbd = 0;
#endif
#ifdef WSDISPLAY_MULTICONS
        callout_init(&scr->scr_getc_ch, 0);
        callout_setfunc(&scr->scr_getc_ch, wsscreen_getc_poll, scr);
        if (console)
                callout_schedule(&scr->scr_getc_ch, mstohz(10));
#endif
        return scr;
}

static void
wsscreen_detach(struct wsscreen *scr)
{
        u_int ccol, crow; /* XXX */

        if (WSSCREEN_HAS_TTY(scr)) {
                tty_detach(scr->scr_tty);
                tty_free(scr->scr_tty);
        }
        if (WSSCREEN_HAS_EMULATOR(scr)) {
                (*scr->scr_dconf->wsemul->detach)(scr->scr_dconf->wsemulcookie,
                                                  &ccol, &crow);
                wsemul_drop(scr->scr_dconf->wsemul);
        }
        if (scr->scr_dconf->scrdata->capabilities & WSSCREEN_FREE)
                free(__UNCONST(scr->scr_dconf->scrdata), M_DEVBUF);
#ifdef WSDISPLAY_MULTICONS
        callout_halt(&scr->scr_getc_ch, NULL);
        callout_destroy(&scr->scr_getc_ch);
#endif
        free(scr->scr_dconf, M_DEVBUF);
        free(scr, M_DEVBUF);
}

const struct wsscreen_descr *
wsdisplay_screentype_pick(const struct wsscreen_list *scrdata, const char *name)
{
        int i;
        const struct wsscreen_descr *scr;

        KASSERT(scrdata->nscreens > 0);
        if (name == NULL)
                return scrdata->screens[0];

        for (i = 0; i < scrdata->nscreens; i++) {
                scr = scrdata->screens[i];
                if (!strcmp(name, scr->name))
                        return scr;
        }

        return 0;
}

/*
 * print info about attached screen
 */
static void
wsdisplay_addscreen_print(struct wsdisplay_softc *sc, int idx, int count)
{
        aprint_verbose_dev(sc->sc_dev, "screen %d", idx);
        if (count > 1)
                aprint_verbose("-%d", idx + (count-1));
        aprint_verbose(" added (%s", sc->sc_scr[idx]->scr_dconf->scrdata->name);
        if (WSSCREEN_HAS_EMULATOR(sc->sc_scr[idx])) {
                aprint_verbose(", %s emulation",
                        sc->sc_scr[idx]->scr_dconf->wsemul->name);
        }
        aprint_verbose(")\n");
}

static int
wsdisplay_addscreen(struct wsdisplay_softc *sc, int idx,
        const char *screentype, const char *emul)
{
        const struct wsscreen_descr *scrdesc;
        struct wsscreen_descr *scrdescr2;
        int error;
        void *cookie;
        int ccol, crow;
        long defattr;
        struct wsscreen *scr;
        int s;

        if (idx < 0 || idx >= WSDISPLAY_MAXSCREEN)
                return EINVAL;
        if (sc->sc_scr[idx] != NULL)
                return EBUSY;
        scrdesc = wsdisplay_screentype_pick(sc->sc_scrdata, screentype);
        if (!scrdesc)
                return ENXIO;

        /*
         * if this screen can resize we need to copy the descr so each screen
         * gets its own
         */
        if (scrdesc->capabilities & WSSCREEN_RESIZE) {
                /* we want per screen wsscreen_descr */
                scrdescr2 = malloc(sizeof(struct wsscreen_descr), M_DEVBUF, M_WAITOK);
                memcpy(scrdescr2, scrdesc, sizeof(struct wsscreen_descr));
                scrdescr2->capabilities |= WSSCREEN_FREE;
                scrdesc = scrdescr2;
        }

        error = (*sc->sc_accessops->alloc_screen)(sc->sc_accesscookie,
                        scrdesc, &cookie, &ccol, &crow, &defattr);
        if (error)
                return error;

        scr = wsscreen_attach(sc, 0, emul, scrdesc,
                              cookie, ccol, crow, defattr);
        if (scr == NULL) {
                (*sc->sc_accessops->free_screen)(sc->sc_accesscookie,
                                                 cookie);
                return ENXIO;
        }

        sc->sc_scr[idx] = scr;

        /* if no screen has focus yet, activate the first we get */
        s = spltty();
        if (!sc->sc_focus) {
                (*sc->sc_accessops->show_screen)(sc->sc_accesscookie,
                                                 scr->scr_dconf->emulcookie,
                                                 0, 0, 0);
                sc->sc_focusidx = idx;
                sc->sc_focus = scr;
        }
        splx(s);
        return 0;
}

static void
wsdisplay_closescreen(struct wsdisplay_softc *sc, struct wsscreen *scr)
{
        int maj, mn, idx;

        /* hangup */
        if (WSSCREEN_HAS_TTY(scr)) {
                struct tty *tp = scr->scr_tty;
                (*tp->t_linesw->l_modem)(tp, 0);
        }

        /* locate the major number */
        maj = cdevsw_lookup_major(&wsdisplay_cdevsw);
        /* locate the screen index */
        for (idx = 0; idx < WSDISPLAY_MAXSCREEN; idx++)
                if (scr == sc->sc_scr[idx])
                        break;
#ifdef DIAGNOSTIC
        if (idx == WSDISPLAY_MAXSCREEN)
                panic("wsdisplay_forceclose: bad screen");
#endif

        /* nuke the vnodes */
        mn = WSDISPLAYMINOR(device_unit(sc->sc_dev), idx);
        vdevgone(maj, mn, mn, VCHR);
}

#ifdef WSDISPLAY_SCROLLSUPPORT
void
wsdisplay_scroll(void *arg, int op)
{
        device_t dv = arg;
        struct wsdisplay_softc *sc = device_private(dv);
        struct wsscreen *scr;
        int lines;

        scr = sc->sc_focus;

        if (!scr)
                return;

        if (op == WSDISPLAY_SCROLL_RESET)
                lines = 0;
        else {
                lines = (op & WSDISPLAY_SCROLL_LOW) ?
                        sc->sc_scroll_values.slowlines :
                        sc->sc_scroll_values.fastlines;
                if (op & WSDISPLAY_SCROLL_BACKWARD)
                        lines = -(lines);
        }

        if (sc->sc_accessops->scroll) {
                (*sc->sc_accessops->scroll)(sc->sc_accesscookie,
                    sc->sc_focus->scr_dconf->emulcookie, lines);
        }
}
#endif

static int
wsdisplay_delscreen(struct wsdisplay_softc *sc, int idx, int flags)
{
        struct wsscreen *scr;
        int s;
        void *cookie;

        if (idx < 0 || idx >= WSDISPLAY_MAXSCREEN)
                return EINVAL;
        if ((scr = sc->sc_scr[idx]) == NULL)
                return ENXIO;

        if (scr->scr_dconf == &wsdisplay_console_conf ||
            scr->scr_syncops ||
            ((scr->scr_flags & SCR_OPEN) && !(flags & WSDISPLAY_DELSCR_FORCE)))
                return EBUSY;

        wsdisplay_closescreen(sc, scr);

        /*
         * delete pointers, so neither device entries
         * nor keyboard input can reference it anymore
         */
        s = spltty();
        if (sc->sc_focus == scr) {
                sc->sc_focus = 0;
#ifdef WSDISPLAY_COMPAT_RAWKBD
                wsdisplay_update_rawkbd(sc, 0);
#endif
        }
        sc->sc_scr[idx] = 0;
        splx(s);

        /*
         * Wake up processes waiting for the screen to
         * be activated. Sleepers must check whether
         * the screen still exists.
         */
        if (scr->scr_flags & SCR_WAITACTIVE)
                wakeup(scr);

        /* save a reference to the graphics screen */
        cookie = scr->scr_dconf->emulcookie;

        wsscreen_detach(scr);

        (*sc->sc_accessops->free_screen)(sc->sc_accesscookie,
                                         cookie);

        aprint_verbose_dev(sc->sc_dev, "screen %d deleted\n", idx);
        return 0;
}

/*
 * Autoconfiguration functions.
 */
int
wsdisplay_emul_match(device_t parent, cfdata_t match, void *aux)
{
        struct wsemuldisplaydev_attach_args *ap = aux;

        if (match->cf_loc[WSEMULDISPLAYDEVCF_CONSOLE] !=
            WSEMULDISPLAYDEVCF_CONSOLE_DEFAULT) {
                /*
                 * If console-ness of device specified, either match
                 * exactly (at high priority), or fail.
                 */
                if (match->cf_loc[WSEMULDISPLAYDEVCF_CONSOLE] != 0 &&
                    ap->console != 0)
                        return 10;
                else
                        return 0;
        }

        /* If console-ness unspecified, it wins. */
        return 1;
}

void
wsdisplay_emul_attach(device_t parent, device_t self, void *aux)
{
        struct wsdisplay_softc *sc = device_private(self);
        struct wsemuldisplaydev_attach_args *ap = aux;

        sc->sc_dev = self;

        /* Don't allow more than one console to attach */
        if (wsdisplay_console_attached && ap->console)
                ap->console = 0;

        wsdisplay_common_attach(sc, ap->console,
             device_cfdata(self)->cf_loc[WSEMULDISPLAYDEVCF_KBDMUX],
             ap->scrdata, ap->accessops, ap->accesscookie);

        if (ap->console) {
                int maj;

                /* locate the major number */
                maj = cdevsw_lookup_major(&wsdisplay_cdevsw);

                cn_tab->cn_dev = makedev(maj, WSDISPLAYMINOR(device_unit(self),
                                         0));
        }
}

/* Print function (for parent devices). */
int
wsemuldisplaydevprint(void *aux, const char *pnp)
{
#if 0 /* -Wunused */
        struct wsemuldisplaydev_attach_args *ap = aux;
#endif

        if (pnp)
                aprint_normal("wsdisplay at %s", pnp);
#if 0 /* don't bother; it's ugly */
        aprint_normal(" console %d", ap->console);
#endif

        return UNCONF;
}

int
wsdisplay_emul_detach(device_t dev, int how)
{
        struct wsdisplay_softc *sc = device_private(dev);
        int flag, i, res;

        flag = (how & DETACH_FORCE ? WSDISPLAY_DELSCR_FORCE : 0);
        for (i = 0; i < WSDISPLAY_MAXSCREEN; i++)
                if (sc->sc_scr[i]) {
                        res = wsdisplay_delscreen(sc, i, flag);
                        if (res)
                                return res;
                }

        cv_destroy(&sc->sc_flagscv);
        mutex_destroy(&sc->sc_flagsmtx);
        return 0;
}

int
wsdisplay_noemul_match(device_t parent, cfdata_t match, void *aux)
{
#if 0 /* -Wunused */
        struct wsdisplaydev_attach_args *ap = aux;
#endif

        /* Always match. */
        return 1;
}

void
wsdisplay_noemul_attach(device_t parent, device_t self, void *aux)
{
        struct wsdisplay_softc *sc = device_private(self);
        struct wsdisplaydev_attach_args *ap = aux;

        sc->sc_dev = self;

        wsdisplay_common_attach(sc, 0,
            device_cfdata(self)->cf_loc[WSDISPLAYDEVCF_KBDMUX], NULL,
            ap->accessops, ap->accesscookie);
}

static void
wsdisplay_swdone_cb(void *arg, int error, int waitok)
{
        struct wsdisplay_softc *sc = arg;

        mutex_enter(&sc->sc_flagsmtx);
        KASSERT(sc->sc_flags & SC_SWITCHPENDING);
        if (error)
                sc->sc_flags |= SC_SWITCHERROR;
        sc->sc_flags &= ~SC_SWITCHPENDING;
        cv_signal(&sc->sc_flagscv);
        mutex_exit(&sc->sc_flagsmtx);
}

static int
wsdisplay_dosync(struct wsdisplay_softc *sc, int attach)
{
        struct wsscreen *scr;
        int (*op)(void *, int, void (*)(void *, int, int), void *);
        int res;

        scr = sc->sc_focus;
        if (!scr || !scr->scr_syncops)
                return 0; /* XXX check SCR_GRAPHICS? */

        sc->sc_flags |= SC_SWITCHPENDING;
        sc->sc_flags &= ~SC_SWITCHERROR;
        if (attach)
                op = scr->scr_syncops->attach;
        else
                op = scr->scr_syncops->detach;
        res = (*op)(scr->scr_synccookie, 1, wsdisplay_swdone_cb, sc);
        if (res == EAGAIN) {
                /* wait for callback */
                mutex_enter(&sc->sc_flagsmtx);
                while (sc->sc_flags & SC_SWITCHPENDING)
                        cv_wait_sig(&sc->sc_flagscv, &sc->sc_flagsmtx);
                mutex_exit(&sc->sc_flagsmtx);
                if (sc->sc_flags & SC_SWITCHERROR)
                        return EIO; /* XXX pass real error */
        } else {
                sc->sc_flags &= ~SC_SWITCHPENDING;
                if (res)
                        return res;
        }
        if (attach)
                sc->sc_flags |= SC_XATTACHED;
        else
                sc->sc_flags &= ~SC_XATTACHED;
        return 0;
}

int
wsdisplay_handlex(int resume)
{
        int i, res;
        device_t dv;

        for (i = 0; i < wsdisplay_cd.cd_ndevs; i++) {
                dv = device_lookup(&wsdisplay_cd, i);
                if (!dv)
                        continue;
                res = wsdisplay_dosync(device_private(dv), resume);
                if (res)
                        return res;
        }
        return 0;
}

static bool
wsdisplay_suspend(device_t dv, const pmf_qual_t *qual)
{
        struct wsdisplay_softc *sc = device_private(dv);
        struct wsscreen *scr = sc->sc_focus;

        if (sc->sc_flags & SC_XATTACHED) {
                KASSERT(scr);
                KASSERT(scr->scr_syncops);
        }

#if 1
        /*
         * XXX X servers should have been detached earlier.
         * pmf currently ignores our return value and suspends the system
         * after device suspend failures. We try to avoid bigger damage
         * and try to detach the X server here. This is not safe because
         * other parts of the system which the X server deals with
         * might already be suspended.
         */
        if (sc->sc_flags & SC_XATTACHED) {
                printf("%s: emergency X server detach\n", device_xname(dv));
                wsdisplay_dosync(sc, 0);
        }
#endif
        return !(sc->sc_flags & SC_XATTACHED);
}

/* Print function (for parent devices). */
int
wsdisplaydevprint(void *aux, const char *pnp)
{
#if 0 /* -Wunused */
        struct wsdisplaydev_attach_args *ap = aux;
#endif

        if (pnp)
                aprint_normal("wsdisplay at %s", pnp);

        return UNCONF;
}

static void
wsdisplay_common_attach(struct wsdisplay_softc *sc, int console, int kbdmux,
        const struct wsscreen_list *scrdata,
        const struct wsdisplay_accessops *accessops,
        void *accesscookie)
{
        int i, start=0;
#if NWSKBD > 0
        struct wsevsrc *kme;
#if NWSMUX > 0
        struct wsmux_softc *mux;

        if (kbdmux >= 0)
                mux = wsmux_getmux(kbdmux);
        else
                mux = wsmux_create("dmux", device_unit(sc->sc_dev));
        sc->sc_input = &mux->sc_base;
        mux->sc_base.me_dispdv = sc->sc_dev;
        aprint_normal(" kbdmux %d", kbdmux);
#else
        if (kbdmux >= 0)
                aprint_normal(" (kbdmux ignored)");
#endif
#endif

        sc->sc_isconsole = console;

        if (console) {
                KASSERT(wsdisplay_console_initted);
                KASSERT(wsdisplay_console_device == NULL);

                sc->sc_scr[0] = wsscreen_attach(sc, 1, 0, 0, 0, 0, 0, 0);
                wsdisplay_console_device = sc;

                aprint_normal(": console (%s, %s emulation)",
                       wsdisplay_console_conf.scrdata->name,
                       wsdisplay_console_conf.wsemul->name);

#if NWSKBD > 0
                kme = wskbd_set_console_display(sc->sc_dev, sc->sc_input);
                if (kme != NULL)
                        aprint_normal(", using %s", device_xname(kme->me_dv));
#if NWSMUX == 0
                sc->sc_input = kme;
#endif
#endif

                sc->sc_focusidx = 0;
                sc->sc_focus = sc->sc_scr[0];
                start = 1;

                wsdisplay_console_attached = 1;
        }
        aprint_normal("\n");
        aprint_naive("\n");

#if NWSKBD > 0 && NWSMUX > 0
        wsmux_set_display(mux, sc->sc_dev);
#endif

        mutex_init(&sc->sc_flagsmtx, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&sc->sc_flagscv, "wssw");

        sc->sc_accessops = accessops;
        sc->sc_accesscookie = accesscookie;
        sc->sc_scrdata = scrdata;

#ifdef WSDISPLAY_SCROLLSUPPORT
        sc->sc_scroll_values = wsdisplay_default_scroll_values;
#endif

        /*
         * Set up a number of virtual screens if wanted. The
         * WSDISPLAYIO_ADDSCREEN ioctl is more flexible, so this code
         * is for special cases like installation kernels.
         */
        for (i = start; i < wsdisplay_defaultscreens; i++) {
                if (wsdisplay_addscreen(sc, i, 0, 0))
                        break;
        }

        if (i > start)
                wsdisplay_addscreen_print(sc, start, i-start);

        if (!pmf_device_register(sc->sc_dev, wsdisplay_suspend, NULL))
                aprint_error_dev(sc->sc_dev,
                    "couldn't establish power handler\n");
}

void
wsdisplay_cnattach(const struct wsscreen_descr *type, void *cookie,
        int ccol, int crow, long defattr)
{
        const struct wsemul_ops *wsemul;

        KASSERT(wsdisplay_console_initted < 2);
        KASSERT(type->nrows > 0);
        KASSERT(type->ncols > 0);
        KASSERT(crow < type->nrows);
        KASSERT(ccol < type->ncols);

        wsdisplay_console_conf.emulops = type->textops;
        wsdisplay_console_conf.emulcookie = cookie;
        wsdisplay_console_conf.scrdata = type;

        wsemul = wsemul_pick(0); /* default */
        wsdisplay_console_conf.wsemul = wsemul;
        wsdisplay_console_conf.wsemulcookie = (*wsemul->cnattach)(type, cookie,
                                                                  ccol, crow,
                                                                  defattr);

        if (cn_tab != &wsdisplay_cons)
                wsdisplay_ocn = cn_tab;

        if (wsdisplay_ocn != NULL && wsdisplay_ocn->cn_halt != NULL)
                wsdisplay_ocn->cn_halt(wsdisplay_ocn->cn_dev);

        cn_tab = &wsdisplay_cons;
        wsdisplay_console_initted = 2;
}

void
wsdisplay_preattach(const struct wsscreen_descr *type, void *cookie,
        int ccol, int crow, long defattr)
{
        const struct wsemul_ops *wsemul;

        KASSERT(!wsdisplay_console_initted);
        KASSERT(type->nrows > 0);
        KASSERT(type->ncols > 0);
        KASSERT(crow < type->nrows);
        KASSERT(ccol < type->ncols);

        wsdisplay_console_conf.emulops = type->textops;
        wsdisplay_console_conf.emulcookie = cookie;
        wsdisplay_console_conf.scrdata = type;

        wsemul = wsemul_pick(0); /* default */
        wsdisplay_console_conf.wsemul = wsemul;
        wsdisplay_console_conf.wsemulcookie = (*wsemul->cnattach)(type, cookie,
                                                                  ccol, crow,
                                                                  defattr);

        if (cn_tab != &wsdisplay_cons)
                wsdisplay_ocn = cn_tab;

        if (wsdisplay_ocn != NULL && wsdisplay_ocn->cn_halt != NULL)
                wsdisplay_ocn->cn_halt(wsdisplay_ocn->cn_dev);

        cn_tab = &wsdisplay_cons;
        wsdisplay_console_initted = 1;
}

void
wsdisplay_cndetach(void)
{
        KASSERT(wsdisplay_console_initted == 2);

        cn_tab = wsdisplay_ocn;
        wsdisplay_console_initted = 0;
}

/*
 * Tty and cdevsw functions.
 */
int
wsdisplayopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct wsdisplay_softc *sc;
        struct tty *tp;
        int newopen, error;
        struct wsscreen *scr;

        sc = device_lookup_private(&wsdisplay_cd, WSDISPLAYUNIT(dev));
        if (sc == NULL)                        /* make sure it was attached */
                return ENXIO;

        if (ISWSDISPLAYSTAT(dev)) {
                wsevent_init(&sc->evar, l->l_proc);
                return 0;
        }

        if (ISWSDISPLAYCTL(dev))
                return 0;

        if (WSDISPLAYSCREEN(dev) >= WSDISPLAY_MAXSCREEN)
                return ENXIO;
        if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL)
                return ENXIO;

        if (WSSCREEN_HAS_TTY(scr)) {
                tp = scr->scr_tty;
                tp->t_oproc = wsdisplaystart;
                tp->t_param = wsdisplayparam;
                tp->t_dev = dev;
                newopen = (tp->t_state & TS_ISOPEN) == 0;

                if (kauth_authorize_device_tty(l->l_cred,
                        KAUTH_DEVICE_TTY_OPEN, tp))
                        return EBUSY;

                if (newopen) {
                        ttychars(tp);
                        tp->t_iflag = TTYDEF_IFLAG;
                        tp->t_oflag = TTYDEF_OFLAG;
                        tp->t_cflag = TTYDEF_CFLAG;
                        tp->t_lflag = TTYDEF_LFLAG;
                        tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED;
                        wsdisplayparam(tp, &tp->t_termios);
                        ttsetwater(tp);
                }
                tp->t_state |= TS_CARR_ON;

                error = ((*tp->t_linesw->l_open)(dev, tp));
                if (error)
                        return error;

                if (newopen && WSSCREEN_HAS_EMULATOR(scr)) {
                        /* set window sizes as appropriate, and reset
                         the emulation */
                        tp->t_winsize.ws_row = scr->scr_dconf->scrdata->nrows;
                        tp->t_winsize.ws_col = scr->scr_dconf->scrdata->ncols;

                        /* wsdisplay_set_emulation() */
                }
        }

        scr->scr_flags |= SCR_OPEN;
        return 0;
}

int
wsdisplayclose(dev_t dev, int flag, int mode, struct lwp *l)
{
        device_t dv;
        struct wsdisplay_softc *sc;
        struct tty *tp;
        struct wsscreen *scr;

        dv = device_lookup(&wsdisplay_cd, WSDISPLAYUNIT(dev));
        sc = device_private(dv);

        if (ISWSDISPLAYSTAT(dev)) {
                wsevent_fini(&sc->evar);
                return 0;
        }

        if (ISWSDISPLAYCTL(dev))
                return 0;

        if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL)
                return 0;

        if (WSSCREEN_HAS_TTY(scr)) {
                if (scr->scr_hold_screen) {
                        int s;

                        /* XXX RESET KEYBOARD LEDS, etc. */
                        s = spltty();        /* avoid conflict with keyboard */
                        wsdisplay_kbdholdscreen(dv, 0);
                        splx(s);
                }
                tp = scr->scr_tty;
                (*tp->t_linesw->l_close)(tp, flag);
                ttyclose(tp);
        }

        if (scr->scr_syncops)
                (*scr->scr_syncops->destroy)(scr->scr_synccookie);

        if (WSSCREEN_HAS_EMULATOR(scr)) {
                scr->scr_flags &= ~SCR_GRAPHICS;
                (*scr->scr_dconf->wsemul->reset)(scr->scr_dconf->wsemulcookie,
                                                 WSEMUL_RESET);
                if (wsdisplay_clearonclose)
                        (*scr->scr_dconf->wsemul->reset)
                                (scr->scr_dconf->wsemulcookie,
                                 WSEMUL_CLEARSCREEN);
        }

#ifdef WSDISPLAY_COMPAT_RAWKBD
        if (scr->scr_rawkbd) {
                int kbmode = WSKBD_TRANSLATED;
                (void)wsdisplay_internal_ioctl(sc, scr, WSKBDIO_SETMODE,
                                               (void *)&kbmode, 0, l);
        }
#endif

        scr->scr_flags &= ~SCR_OPEN;

        return 0;
}

int
wsdisplayread(dev_t dev, struct uio *uio, int flag)
{
        struct wsdisplay_softc *sc;
        struct tty *tp;
        struct wsscreen *scr;
        int error;

        sc = device_lookup_private(&wsdisplay_cd, WSDISPLAYUNIT(dev));

        if (ISWSDISPLAYSTAT(dev)) {
                error = wsevent_read(&sc->evar, uio, flag);
                return error;
        }

        if (ISWSDISPLAYCTL(dev))
                return 0;

        if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL)
                return ENXIO;

        if (!WSSCREEN_HAS_TTY(scr))
                return ENODEV;

        tp = scr->scr_tty;
        return (*tp->t_linesw->l_read)(tp, uio, flag);
}

int
wsdisplaywrite(dev_t dev, struct uio *uio, int flag)
{
        struct wsdisplay_softc *sc;
        struct tty *tp;
        struct wsscreen *scr;

        sc = device_lookup_private(&wsdisplay_cd, WSDISPLAYUNIT(dev));

        if (ISWSDISPLAYSTAT(dev)) {
                return 0;
        }

        if (ISWSDISPLAYCTL(dev))
                return 0;

        if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL)
                return ENXIO;

        if (!WSSCREEN_HAS_TTY(scr))
                return ENODEV;

        tp = scr->scr_tty;
        return (*tp->t_linesw->l_write)(tp, uio, flag);
}

int
wsdisplaypoll(dev_t dev, int events, struct lwp *l)
{
        struct wsdisplay_softc *sc;
        struct tty *tp;
        struct wsscreen *scr;

        sc = device_lookup_private(&wsdisplay_cd, WSDISPLAYUNIT(dev));

        if (ISWSDISPLAYSTAT(dev))
                return wsevent_poll(&sc->evar, events, l);

        if (ISWSDISPLAYCTL(dev))
                return 0;

        if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL)
                return POLLHUP;

        if (!WSSCREEN_HAS_TTY(scr))
                return POLLERR;

        tp = scr->scr_tty;
        return (*tp->t_linesw->l_poll)(tp, events, l);
}

int
wsdisplaykqfilter(dev_t dev, struct knote *kn)
{
        struct wsdisplay_softc *sc;
        struct wsscreen *scr;

        sc = device_lookup_private(&wsdisplay_cd, WSDISPLAYUNIT(dev));

        if (ISWSDISPLAYCTL(dev))
                return 1;

        if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL)
                return 1;


        if (WSSCREEN_HAS_TTY(scr))
                return ttykqfilter(dev, kn);
        else
                return 1;
}

struct tty *
wsdisplaytty(dev_t dev)
{
        struct wsdisplay_softc *sc;
        struct wsscreen *scr;

        sc = device_lookup_private(&wsdisplay_cd, WSDISPLAYUNIT(dev));

        if (ISWSDISPLAYSTAT(dev))
                panic("wsdisplaytty() on status device");

        if (ISWSDISPLAYCTL(dev))
                panic("wsdisplaytty() on ctl device");

        if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL)
                return NULL;

        return scr->scr_tty;
}

int
wsdisplayioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        device_t dv;
        struct wsdisplay_softc *sc;
        struct tty *tp;
        int error;
        struct wsscreen *scr;

        dv = device_lookup(&wsdisplay_cd, WSDISPLAYUNIT(dev));
        sc = device_private(dv);

#ifdef WSDISPLAY_COMPAT_USL
        error = wsdisplay_usl_ioctl1(dv, cmd, data, flag, l);
        if (error != EPASSTHROUGH)
                return error;
#endif

        if (ISWSDISPLAYSTAT(dev))
                return wsdisplay_stat_ioctl(sc, cmd, data, flag, l);

        if (ISWSDISPLAYCTL(dev))
                return wsdisplay_cfg_ioctl(sc, cmd, data, flag, l);

        if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL)
                return ENXIO;

        if (WSSCREEN_HAS_TTY(scr)) {
                tp = scr->scr_tty;

                /* do the line discipline ioctls first */
                error = (*tp->t_linesw->l_ioctl)(tp, cmd, data, flag, l);
                if (error != EPASSTHROUGH)
                        return error;

                /* then the tty ioctls */
                error = ttioctl(tp, cmd, data, flag, l);
                if (error != EPASSTHROUGH)
                        return error;
        }

#ifdef WSDISPLAY_COMPAT_USL
        error = wsdisplay_usl_ioctl2(sc, scr, cmd, data, flag, l);
        if (error != EPASSTHROUGH)
                return error;
#endif

        return wsdisplay_internal_ioctl(sc, scr, cmd, data, flag, l);
}

int
wsdisplay_param(device_t dv, u_long cmd, struct wsdisplay_param *dp)
{
        struct wsdisplay_softc *sc = device_private(dv);
        return (*sc->sc_accessops->ioctl)(sc->sc_accesscookie,
            sc->sc_focus->scr_dconf->emulcookie,
            cmd, (void *)dp, 0, NULL);
}

int
wsdisplay_internal_ioctl(struct wsdisplay_softc *sc, struct wsscreen *scr,
        u_long cmd, void *data, int flag, struct lwp *l)
{
        int error;
        char namebuf[32];
        struct wsdisplay_font fd;
#ifdef WSDISPLAY_SCROLLSUPPORT
        struct wsdisplay_scroll_data *ksdp, *usdp;
#endif

#if NWSKBD > 0
        struct wsevsrc *inp;

#ifdef WSDISPLAY_COMPAT_RAWKBD
        switch (cmd) {
        case WSKBDIO_SETMODE:
                scr->scr_rawkbd = (*(int *)data == WSKBD_RAW);
                return wsdisplay_update_rawkbd(sc, scr);
        case WSKBDIO_GETMODE:
                *(int *)data = (scr->scr_rawkbd ?
                                WSKBD_RAW : WSKBD_TRANSLATED);
                return 0;
        }
#endif
        inp = sc->sc_input;
        if (inp == NULL)
                return ENXIO;
        error = wsevsrc_display_ioctl(inp, cmd, data, flag, l);
        if (error != EPASSTHROUGH)
                return error;
#endif /* NWSKBD > 0 */

        switch (cmd) {
        case WSDISPLAYIO_GMODE:
                if (scr->scr_flags & SCR_GRAPHICS) {
                        if (scr->scr_flags & SCR_DUMBFB)
                                *(u_int *)data = WSDISPLAYIO_MODE_DUMBFB;
                        else
                                *(u_int *)data = WSDISPLAYIO_MODE_MAPPED;
                } else
                        *(u_int *)data = WSDISPLAYIO_MODE_EMUL;
                return 0;

        case WSDISPLAYIO_SMODE:
#define d (*(int *)data)
                if (d != WSDISPLAYIO_MODE_EMUL &&
                    d != WSDISPLAYIO_MODE_MAPPED &&
                    d != WSDISPLAYIO_MODE_DUMBFB)
                        return EINVAL;

            if (WSSCREEN_HAS_EMULATOR(scr)) {
                    scr->scr_flags &= ~SCR_GRAPHICS;
                    if (d == WSDISPLAYIO_MODE_MAPPED ||
                        d == WSDISPLAYIO_MODE_DUMBFB)
                            scr->scr_flags |= SCR_GRAPHICS |
                                    ((d == WSDISPLAYIO_MODE_DUMBFB) ? SCR_DUMBFB : 0);
            } else if (d == WSDISPLAYIO_MODE_EMUL)
                    return EINVAL;

            (void)(*sc->sc_accessops->ioctl)(sc->sc_accesscookie,
                scr->scr_dconf->emulcookie, cmd, data, flag, l);

            return 0;
#undef d

#ifdef WSDISPLAY_SCROLLSUPPORT
#define        SETSCROLLLINES(dstp, srcp, dfltp)                                \
    do {                                                                \
        (dstp)->fastlines = ((srcp)->which &                                \
                             WSDISPLAY_SCROLL_DOFASTLINES) ?                \
                             (srcp)->fastlines : (dfltp)->fastlines;        \
        (dstp)->slowlines = ((srcp)->which &                                \
                             WSDISPLAY_SCROLL_DOSLOWLINES) ?                \
                             (srcp)->slowlines : (dfltp)->slowlines;        \
        (dstp)->which = WSDISPLAY_SCROLL_DOALL;                                \
    } while (0)


        case WSDISPLAYIO_DSSCROLL:
                usdp = (struct wsdisplay_scroll_data *)data;
                ksdp = &sc->sc_scroll_values;
                SETSCROLLLINES(ksdp, usdp, ksdp);
                return 0;

        case WSDISPLAYIO_DGSCROLL:
                usdp = (struct wsdisplay_scroll_data *)data;
                ksdp = &sc->sc_scroll_values;
                SETSCROLLLINES(usdp, ksdp, ksdp);
                return 0;
#else
        case WSDISPLAYIO_DSSCROLL:
        case WSDISPLAYIO_DGSCROLL:
                return ENODEV;
#endif

        case WSDISPLAYIO_SFONT:
#define d ((struct wsdisplay_usefontdata *)data)
                if (!sc->sc_accessops->load_font)
                        return EINVAL;
                if (d->name) {
                        error = copyinstr(d->name, namebuf, sizeof(namebuf), 0);
                        if (error)
                                return error;
                        fd.name = namebuf;
                } else
                        fd.name = 0;
                fd.data = 0;
                error = (*sc->sc_accessops->load_font)(sc->sc_accesscookie,
                                        scr->scr_dconf->emulcookie, &fd);
                if (!error && WSSCREEN_HAS_EMULATOR(scr)) {
                        (*scr->scr_dconf->wsemul->reset)
                                (scr->scr_dconf->wsemulcookie, WSEMUL_SYNCFONT);
                        if (scr->scr_dconf->wsemul->resize) {
                                (*scr->scr_dconf->wsemul->resize)
                                        (scr->scr_dconf->wsemulcookie,
                                         scr->scr_dconf->scrdata);
                                /* update the tty's size */
                                scr->scr_tty->t_winsize.ws_row =
                                    scr->scr_dconf->scrdata->nrows;
                                scr->scr_tty->t_winsize.ws_col =
                                    scr->scr_dconf->scrdata->ncols;
                                /* send SIGWINCH to the process group on our tty */
                                kpreempt_disable();
                                ttysig(scr->scr_tty, TTYSIG_PG1, SIGWINCH);
                                kpreempt_enable();
                        }
                }
                return error;
#undef d

#ifdef WSDISPLAY_CUSTOM_OUTPUT
        case WSDISPLAYIO_GMSGATTRS:
#define d ((struct wsdisplay_msgattrs *)data)
                (*scr->scr_dconf->wsemul->getmsgattrs)
                    (scr->scr_dconf->wsemulcookie, d);
                return 0;
#undef d

        case WSDISPLAYIO_SMSGATTRS: {
#define d ((struct wsdisplay_msgattrs *)data)
                int i;
                for (i = 0; i < WSDISPLAY_MAXSCREEN; i++)
                        if (sc->sc_scr[i] != NULL)
                                (*sc->sc_scr[i]->scr_dconf->wsemul->setmsgattrs)
                                    (sc->sc_scr[i]->scr_dconf->wsemulcookie,
                                     sc->sc_scr[i]->scr_dconf->scrdata,
                                     d);
                }
                return 0;
#undef d
#else
        case WSDISPLAYIO_GMSGATTRS:
        case WSDISPLAYIO_SMSGATTRS:
                return ENODEV;
#endif
        case WSDISPLAYIO_SETVERSION:
                return wsevent_setversion(&sc->evar, *(int *)data);
        }

        /* check ioctls for display */
        return (*sc->sc_accessops->ioctl)(sc->sc_accesscookie,
            scr->scr_dconf->emulcookie, cmd, data, flag, l);
}

int
wsdisplay_stat_ioctl(struct wsdisplay_softc *sc, u_long cmd, void *data,
        int flag, struct lwp *l)
{
        switch (cmd) {
        case WSDISPLAYIO_GETACTIVESCREEN:
                *(int*)data = wsdisplay_getactivescreen(sc);
                return 0;
        }

        return EPASSTHROUGH;
}

int
wsdisplay_cfg_ioctl(struct wsdisplay_softc *sc, u_long cmd, void *data,
        int flag, struct lwp *l)
{
        int error;
        char *type, typebuf[16], *emul, emulbuf[16];
        void *tbuf;
        u_int fontsz;
#if defined(COMPAT_14) && NWSKBD > 0
        struct wsmux_device wsmuxdata;
#endif
#if NWSKBD > 0
        struct wsevsrc *inp;
#endif

        switch (cmd) {
        case WSDISPLAYIO_ADDSCREEN:
#define d ((struct wsdisplay_addscreendata *)data)
                if (d->screentype) {
                        error = copyinstr(d->screentype, typebuf,
                                          sizeof(typebuf), 0);
                        if (error)
                                return error;
                        type = typebuf;
                } else
                        type = 0;
                if (d->emul) {
                        error = copyinstr(d->emul, emulbuf, sizeof(emulbuf),0);
                        if (error)
                                return error;
                        emul = emulbuf;
                } else
                        emul = 0;

                if ((error = wsdisplay_addscreen(sc, d->idx, type, emul)) == 0)
                        wsdisplay_addscreen_print(sc, d->idx, 0);
                return error;
#undef d
        case WSDISPLAYIO_DELSCREEN:
#define d ((struct wsdisplay_delscreendata *)data)
                return wsdisplay_delscreen(sc, d->idx, d->flags);
#undef d
        case WSDISPLAYIO_LDFONT:
#define d ((struct wsdisplay_font *)data)
                if (!sc->sc_accessops->load_font)
                        return EINVAL;
                if (d->name) {
                        error = copyinstr(d->name, typebuf, sizeof(typebuf), 0);
                        if (error)
                                return error;
                        d->name = typebuf;
                } else
                        d->name = "loaded"; /* ??? */
                fontsz = d->fontheight * d->stride * d->numchars;
                if (fontsz > WSDISPLAY_MAXFONTSZ)
                        return EINVAL;

                tbuf = malloc(fontsz, M_DEVBUF, M_WAITOK);
                error = copyin(d->data, tbuf, fontsz);
                if (error) {
                        free(tbuf, M_DEVBUF);
                        return error;
                }
                d->data = tbuf;
                error =
                  (*sc->sc_accessops->load_font)(sc->sc_accesscookie, 0, d);
                free(tbuf, M_DEVBUF);
#undef d
                return error;

#if NWSKBD > 0
#ifdef COMPAT_14
        case _O_WSDISPLAYIO_SETKEYBOARD:
#define d ((struct wsdisplay_kbddata *)data)
                inp = sc->sc_input;
                if (inp == NULL)
                        return ENXIO;
                switch (d->op) {
                case _O_WSDISPLAY_KBD_ADD:
                        if (d->idx == -1) {
                                d->idx = wskbd_pickfree();
                                if (d->idx == -1)
                                        return ENXIO;
                        }
                        wsmuxdata.type = WSMUX_KBD;
                        wsmuxdata.idx = d->idx;
                        return wsevsrc_ioctl(inp, WSMUX_ADD_DEVICE,
                            &wsmuxdata, flag, l);
                case _O_WSDISPLAY_KBD_DEL:
                        wsmuxdata.type = WSMUX_KBD;
                        wsmuxdata.idx = d->idx;
                        return wsevsrc_ioctl(inp, WSMUX_REMOVE_DEVICE,
                            &wsmuxdata, flag, l);
                default:
                        return EINVAL;
                }
#undef d
#endif

        case WSMUXIO_ADD_DEVICE:
#define d ((struct wsmux_device *)data)
                if (d->idx == -1 && d->type == WSMUX_KBD)
                        d->idx = wskbd_pickfree();
#undef d
                /* FALLTHROUGH */
        case WSMUXIO_INJECTEVENT:
        case WSMUXIO_REMOVE_DEVICE:
        case WSMUXIO_LIST_DEVICES:
                inp = sc->sc_input;
                if (inp == NULL)
                        return ENXIO;
                return wsevsrc_ioctl(inp, cmd, data, flag, l);
#endif /* NWSKBD > 0 */

        }
        return EPASSTHROUGH;
}

int
wsdisplay_stat_inject(device_t dv, u_int type, int value)
{
        struct wsdisplay_softc *sc = device_private(dv);
        struct wseventvar *evar;
        struct wscons_event event;

        evar = &sc->evar;

        if (evar == NULL)
                return 0;

        if (evar->q == NULL)
                return 1;

        event.type = type;
        event.value = value;
        if (wsevent_inject(evar, &event, 1) != 0) {
                log(LOG_WARNING, "wsdisplay: event queue overflow\n");
                return 1;
        }

        return 0;
}

paddr_t
wsdisplaymmap(dev_t dev, off_t offset, int prot)
{
        struct wsdisplay_softc *sc;
        struct wsscreen *scr;

        sc = device_lookup_private(&wsdisplay_cd, WSDISPLAYUNIT(dev));

        if (ISWSDISPLAYSTAT(dev))
                return -1;

        if (ISWSDISPLAYCTL(dev))
                return -1;

        if ((scr = sc->sc_scr[WSDISPLAYSCREEN(dev)]) == NULL)
                return -1;

        if (!(scr->scr_flags & SCR_GRAPHICS))
                return -1;

        /* pass mmap to display */
        return (*sc->sc_accessops->mmap)(sc->sc_accesscookie,
            scr->scr_dconf->emulcookie, offset, prot);
}

void
wsdisplaystart(struct tty *tp)
{
        struct wsdisplay_softc *sc;
        struct wsscreen *scr;
        int s, n;
        u_char *tbuf;

        s = spltty();
        if (tp->t_state & (TS_TIMEOUT | TS_BUSY | TS_TTSTOP)) {
                splx(s);
                return;
        }
        sc = device_lookup_private(&wsdisplay_cd, WSDISPLAYUNIT(tp->t_dev));
        if ((scr = sc->sc_scr[WSDISPLAYSCREEN(tp->t_dev)]) == NULL) {
                splx(s);
                return;
        }

        if (scr->scr_hold_screen) {
                tp->t_state |= TS_TIMEOUT;
                splx(s);
                return;
        }
        tp->t_state |= TS_BUSY;
        splx(s);

#ifdef DIAGNOSTIC
        scr->scr_in_ttyoutput = 1;
#endif

        /*
         * Drain output from ring buffer.
         * The output will normally be in one contiguous chunk, but when the
         * ring wraps, it will be in two pieces.. one at the end of the ring,
         * the other at the start.  For performance, rather than loop here,
         * we output one chunk, see if there's another one, and if so, output
         * it too.
         */

        n = ndqb(&tp->t_outq, 0);
        tbuf = tp->t_outq.c_cf;

        if (!(scr->scr_flags & SCR_GRAPHICS)) {
                KASSERT(WSSCREEN_HAS_EMULATOR(scr));
                (*scr->scr_dconf->wsemul->output)(scr->scr_dconf->wsemulcookie,
                                                  tbuf, n, 0);
#ifdef WSDISPLAY_MULTICONS
                if (wsdisplay_multicons_enable &&
                    scr->scr_dconf == &wsdisplay_console_conf &&
                    wsdisplay_ocn && wsdisplay_ocn->cn_putc) {
                        for (int i = 0; i < n; i++)
                                wsdisplay_ocn->cn_putc(
                                    wsdisplay_ocn->cn_dev, tbuf[i]);
                }
#endif
        }
        ndflush(&tp->t_outq, n);

        if ((n = ndqb(&tp->t_outq, 0)) > 0) {
                tbuf = tp->t_outq.c_cf;

                if (!(scr->scr_flags & SCR_GRAPHICS)) {
                        KASSERT(WSSCREEN_HAS_EMULATOR(scr));
                        (*scr->scr_dconf->wsemul->output)
                            (scr->scr_dconf->wsemulcookie, tbuf, n, 0);

#ifdef WSDISPLAY_MULTICONS
                        if (wsdisplay_multicons_enable &&
                            scr->scr_dconf == &wsdisplay_console_conf &&
                            wsdisplay_ocn && wsdisplay_ocn->cn_putc) {
                                for (int i = 0; i < n; i++)
                                        wsdisplay_ocn->cn_putc(
                                            wsdisplay_ocn->cn_dev, tbuf[i]);
                        }
#endif
                }
                ndflush(&tp->t_outq, n);
        }

#ifdef DIAGNOSTIC
        scr->scr_in_ttyoutput = 0;
#endif

        s = spltty();
        tp->t_state &= ~TS_BUSY;
        /* Come back if there's more to do */
        if (ttypull(tp)) {
                tp->t_state |= TS_TIMEOUT;
                callout_schedule(&tp->t_rstrt_ch, (hz > 128) ? (hz / 128) : 1);
        }
        splx(s);
}

void
wsdisplaystop(struct tty *tp, int flag)
{
        int s;

        s = spltty();
        if (ISSET(tp->t_state, TS_BUSY))
                if (!ISSET(tp->t_state, TS_TTSTOP))
                        SET(tp->t_state, TS_FLUSH);
        splx(s);
}

/* Set line parameters. */
int
wsdisplayparam(struct tty *tp, struct termios *t)
{

        tp->t_ispeed = t->c_ispeed;
        tp->t_ospeed = t->c_ospeed;
        tp->t_cflag = t->c_cflag;
        return 0;
}

/*
 * Callbacks for the emulation code.
 */
void
wsdisplay_emulbell(void *v)
{
        struct wsscreen *scr = v;

        if (scr == NULL)                /* console, before real attach */
                return;

        if (scr->scr_flags & SCR_GRAPHICS) /* can this happen? */
                return;

        (void) wsdisplay_internal_ioctl(scr->sc, scr, WSKBDIO_BELL, NULL,
                                        FWRITE, NULL);
}

void
wsdisplay_emulinput(void *v, const u_char *data, u_int count)
{
        struct wsscreen *scr = v;
        struct tty *tp;
        int (*ifcn)(int, struct tty *);

        if (v == NULL)                        /* console, before real attach */
                return;

        if (scr->scr_flags & SCR_GRAPHICS) /* XXX can't happen */
                return;
        if (!WSSCREEN_HAS_TTY(scr))
                return;

        tp = scr->scr_tty;

        /*
         * XXX bad hack to work around locking problems in tty.c:
         * ttyinput() will try to lock again, causing deadlock.
         * We assume that wsdisplay_emulinput() can only be called
         * from within wsdisplaystart(), and thus the tty lock
         * is already held. Use an entry point which doesn't lock.
         */
        KASSERT(scr->scr_in_ttyoutput);
        ifcn = tp->t_linesw->l_rint;
        if (ifcn == ttyinput)
                ifcn = ttyinput_wlock;

        while (count-- > 0)
                (*ifcn)(*data++, tp);
}

/*
 * Calls from the keyboard interface.
 */
void
wsdisplay_kbdinput(device_t dv, keysym_t ks)
{
        struct wsdisplay_softc *sc = device_private(dv);
        struct wsscreen *scr;
        const char *dp;
        int count;
        struct tty *tp;

        KASSERT(sc != NULL);

        scr = sc->sc_focus;

        if (!scr || !WSSCREEN_HAS_TTY(scr))
                return;

        tp = scr->scr_tty;

        if (KS_GROUP(ks) == KS_GROUP_Plain && KS_VALUE(ks) <= 0x7f)
                (*tp->t_linesw->l_rint)(KS_VALUE(ks), tp);
        else if (WSSCREEN_HAS_EMULATOR(scr)) {
                count = (*scr->scr_dconf->wsemul->translate)
                    (scr->scr_dconf->wsemulcookie, ks, &dp);
                while (count-- > 0)
                        (*tp->t_linesw->l_rint)((unsigned char)(*dp++), tp);
        }
}

#if defined(WSDISPLAY_COMPAT_RAWKBD)
int
wsdisplay_update_rawkbd(struct wsdisplay_softc *sc, struct wsscreen *scr)
{
#if NWSKBD > 0
        int s, raw, data, error;
        struct wsevsrc *inp;

        s = spltty();

        raw = (scr ? scr->scr_rawkbd : 0);

        if (scr != sc->sc_focus ||
            sc->sc_rawkbd == raw) {
                splx(s);
                return 0;
        }

        data = raw ? WSKBD_RAW : WSKBD_TRANSLATED;
        inp = sc->sc_input;
        if (inp == NULL) {
                splx(s);
                return ENXIO;
        }
        error = wsevsrc_display_ioctl(inp, WSKBDIO_SETMODE, &data, 0, 0);
        if (!error)
                sc->sc_rawkbd = raw;
        splx(s);
        return error;
#else
        return 0;
#endif
}
#endif

static void
wsdisplay_switch3_cb(void *arg, int error, int waitok)
{
        device_t dv = arg;

        wsdisplay_switch3(dv, error, waitok);
}

static int
wsdisplay_switch3(device_t dv, int error, int waitok)
{
        struct wsdisplay_softc *sc = device_private(dv);
        int no;
        struct wsscreen *scr;

        if (!(sc->sc_flags & SC_SWITCHPENDING)) {
                aprint_error_dev(dv, "wsdisplay_switch3: not switching\n");
                return EINVAL;
        }

        no = sc->sc_screenwanted;
        if (no < 0 || no >= WSDISPLAY_MAXSCREEN)
                panic("wsdisplay_switch3: invalid screen %d", no);
        scr = sc->sc_scr[no];
        if (!scr) {
                aprint_error_dev(dv,
                    "wsdisplay_switch3: screen %d disappeared\n", no);
                error = ENXIO;
        }

        if (error) {
                /* try to recover, avoid recursion */

                if (sc->sc_oldscreen == WSDISPLAY_NULLSCREEN) {
                        aprint_error_dev(dv, "wsdisplay_switch3: giving up\n");
                        sc->sc_focus = 0;
#ifdef WSDISPLAY_COMPAT_RAWKBD
                        wsdisplay_update_rawkbd(sc, 0);
#endif
                        sc->sc_flags &= ~SC_SWITCHPENDING;
                        return error;
                }

                sc->sc_screenwanted = sc->sc_oldscreen;
                sc->sc_oldscreen = WSDISPLAY_NULLSCREEN;
                return wsdisplay_switch1(dv, 0, waitok);
        }

        if (scr->scr_syncops && !error)
                sc->sc_flags |= SC_XATTACHED;

        sc->sc_flags &= ~SC_SWITCHPENDING;

        if (!error && (scr->scr_flags & SCR_WAITACTIVE))
                wakeup(scr);
        return error;
}

static void
wsdisplay_switch2_cb(void *arg, int error, int waitok)
{
        device_t dv = arg;

        wsdisplay_switch2(dv, error, waitok);
}

static int
wsdisplay_switch2(device_t dv, int error, int waitok)
{
        struct wsdisplay_softc *sc = device_private(dv);
        int no;
        struct wsscreen *scr;

        if (!(sc->sc_flags & SC_SWITCHPENDING)) {
                aprint_error_dev(dv, "wsdisplay_switch2: not switching\n");
                return EINVAL;
        }

        no = sc->sc_screenwanted;
        if (no < 0 || no >= WSDISPLAY_MAXSCREEN)
                panic("wsdisplay_switch2: invalid screen %d", no);
        scr = sc->sc_scr[no];
        if (!scr) {
                aprint_error_dev(dv,
                    "wsdisplay_switch2: screen %d disappeared\n", no);
                error = ENXIO;
        }

        if (error) {
                /* try to recover, avoid recursion */

                if (sc->sc_oldscreen == WSDISPLAY_NULLSCREEN) {
                        aprint_error_dev(dv, "wsdisplay_switch2: giving up\n");
                        sc->sc_focus = 0;
                        sc->sc_flags &= ~SC_SWITCHPENDING;
                        return error;
                }

                sc->sc_screenwanted = sc->sc_oldscreen;
                sc->sc_oldscreen = WSDISPLAY_NULLSCREEN;
                return wsdisplay_switch1(dv, 0, waitok);
        }

        sc->sc_focusidx = no;
        sc->sc_focus = scr;

#ifdef WSDISPLAY_COMPAT_RAWKBD
        (void) wsdisplay_update_rawkbd(sc, scr);
#endif
        /* keyboard map??? */

        if (scr->scr_syncops &&
            !(sc->sc_isconsole && wsdisplay_cons_pollmode)) {
                error = (*scr->scr_syncops->attach)(scr->scr_synccookie, waitok,
                                                    wsdisplay_switch3_cb, dv);
                if (error == EAGAIN) {
                        /* switch will be done asynchronously */
                        return 0;
                }
        }

        return wsdisplay_switch3(dv, error, waitok);
}

static void
wsdisplay_switch1_cb(void *arg, int error, int waitok)
{
        device_t dv = arg;

        wsdisplay_switch1(dv, error, waitok);
}

static int
wsdisplay_switch1(device_t dv, int error, int waitok)
{
        struct wsdisplay_softc *sc = device_private(dv);
        int no;
        struct wsscreen *scr;

        if (!(sc->sc_flags & SC_SWITCHPENDING)) {
                aprint_error_dev(dv, "wsdisplay_switch1: not switching\n");
                return EINVAL;
        }

        no = sc->sc_screenwanted;
        if (no == WSDISPLAY_NULLSCREEN) {
                sc->sc_flags &= ~SC_SWITCHPENDING;
                if (!error) {
                        sc->sc_flags &= ~SC_XATTACHED;
                        sc->sc_focus = 0;
                }
                wakeup(sc);
                return error;
        }
        if (no < 0 || no >= WSDISPLAY_MAXSCREEN)
                panic("wsdisplay_switch1: invalid screen %d", no);
        scr = sc->sc_scr[no];
        if (!scr) {
                aprint_error_dev(dv,
                    "wsdisplay_switch1: screen %d disappeared\n", no);
                error = ENXIO;
        }

        if (error) {
                sc->sc_flags &= ~SC_SWITCHPENDING;
                return error;
        }

        sc->sc_flags &= ~SC_XATTACHED;

        error = (*sc->sc_accessops->show_screen)(sc->sc_accesscookie,
                                                 scr->scr_dconf->emulcookie,
                                                 waitok,
          sc->sc_isconsole && wsdisplay_cons_pollmode ? 0 : wsdisplay_switch2_cb, dv);
        if (error == EAGAIN) {
                /* switch will be done asynchronously */
                return 0;
        }

        return wsdisplay_switch2(dv, error, waitok);
}

int
wsdisplay_switch(device_t dv, int no, int waitok)
{
        struct wsdisplay_softc *sc = device_private(dv);
        int s, res = 0;
        struct wsscreen *scr;

        if (no != WSDISPLAY_NULLSCREEN) {
                if ((no < 0 || no >= WSDISPLAY_MAXSCREEN))
                        return EINVAL;
                if (sc->sc_scr[no] == NULL)
                        return ENXIO;
        }

        wsdisplay_stat_inject(dv, WSCONS_EVENT_SCREEN_SWITCH, no);

        s = spltty();

        if ((sc->sc_focus && no == sc->sc_focusidx) ||
            (sc->sc_focus == NULL && no == WSDISPLAY_NULLSCREEN)) {
                splx(s);
                return 0;
        }

        if (sc->sc_flags & SC_SWITCHPENDING) {
                splx(s);
                return EBUSY;
        }

        sc->sc_flags |= SC_SWITCHPENDING;
        sc->sc_screenwanted = no;

        splx(s);

        scr = sc->sc_focus;
        if (!scr) {
                sc->sc_oldscreen = WSDISPLAY_NULLSCREEN;
                return wsdisplay_switch1(dv, 0, waitok);
        } else
                sc->sc_oldscreen = sc->sc_focusidx;

        if (scr->scr_syncops) {
                if (!(sc->sc_flags & SC_XATTACHED) ||
                    (sc->sc_isconsole && wsdisplay_cons_pollmode)) {
                        /* nothing to do here */
                        return wsdisplay_switch1(dv, 0, waitok);
                }
                res = (*scr->scr_syncops->detach)(scr->scr_synccookie, waitok,
                                                  wsdisplay_switch1_cb, dv);
                if (res == EAGAIN) {
                        /* switch will be done asynchronously */
                        return 0;
                }
        } else if (scr->scr_flags & SCR_GRAPHICS) {
                /* no way to save state */
                res = EBUSY;
        }

        return wsdisplay_switch1(dv, res, waitok);
}

void
wsdisplay_reset(device_t dv, enum wsdisplay_resetops op)
{
        struct wsdisplay_softc *sc = device_private(dv);
        struct wsscreen *scr;

        KASSERT(sc != NULL);
        scr = sc->sc_focus;

        if (!scr)
                return;

        switch (op) {
        case WSDISPLAY_RESETEMUL:
                if (!WSSCREEN_HAS_EMULATOR(scr))
                        break;
                (*scr->scr_dconf->wsemul->reset)(scr->scr_dconf->wsemulcookie,
                                                 WSEMUL_RESET);
                break;
        case WSDISPLAY_RESETCLOSE:
                wsdisplay_closescreen(sc, scr);
                break;
        }
}


bool
wsdisplay_isconsole(struct wsdisplay_softc *sc)
{
        return sc->sc_isconsole;
}

/*
 * Interface for (external) VT switch / process synchronization code
 */
int
wsscreen_attach_sync(struct wsscreen *scr, const struct wscons_syncops *ops,
        void *cookie)
{
        if (scr->scr_syncops) {
                /*
                 * The screen is already claimed.
                 * Check if the owner is still alive.
                 */
                if ((*scr->scr_syncops->check)(scr->scr_synccookie))
                        return EBUSY;
        }
        scr->scr_syncops = ops;
        scr->scr_synccookie = cookie;
        if (scr == scr->sc->sc_focus)
                scr->sc->sc_flags |= SC_XATTACHED;
        return 0;
}

int
wsscreen_detach_sync(struct wsscreen *scr)
{
        if (!scr->scr_syncops)
                return EINVAL;
        scr->scr_syncops = 0;
        if (scr == scr->sc->sc_focus)
                scr->sc->sc_flags &= ~SC_XATTACHED;
        return 0;
}

int
wsscreen_lookup_sync(struct wsscreen *scr,
        const struct wscons_syncops *ops, /* used as ID */
        void **cookiep)
{
        if (!scr->scr_syncops || ops != scr->scr_syncops)
                return EINVAL;
        *cookiep = scr->scr_synccookie;
        return 0;
}

/*
 * Interface to virtual screen stuff
 */
int
wsdisplay_maxscreenidx(struct wsdisplay_softc *sc)
{
        return (WSDISPLAY_MAXSCREEN - 1);
}

int
wsdisplay_screenstate(struct wsdisplay_softc *sc, int idx)
{
        if (idx < 0 || idx >= WSDISPLAY_MAXSCREEN)
                return EINVAL;
        if (!sc->sc_scr[idx])
                return ENXIO;
        return ((sc->sc_scr[idx]->scr_flags & SCR_OPEN) ? EBUSY : 0);
}

int
wsdisplay_getactivescreen(struct wsdisplay_softc *sc)
{
        return (sc->sc_focus ? sc->sc_focusidx : WSDISPLAY_NULLSCREEN);
}

int
wsscreen_switchwait(struct wsdisplay_softc *sc, int no)
{
        struct wsscreen *scr;
        int s, res = 0;

        if (no == WSDISPLAY_NULLSCREEN) {
                s = spltty();
                while (sc->sc_focus && res == 0) {
                        res = tsleep(sc, PCATCH, "wswait", 0);
                }
                splx(s);
                return res;
        }

        if (no < 0 || no >= WSDISPLAY_MAXSCREEN)
                return ENXIO;
        scr = sc->sc_scr[no];
        if (!scr)
                return ENXIO;

        s = spltty();
        if (scr != sc->sc_focus) {
                scr->scr_flags |= SCR_WAITACTIVE;
                res = tsleep(scr, PCATCH, "wswait", 0);
                if (scr != sc->sc_scr[no])
                        res = ENXIO; /* disappeared in the meantime */
                else
                        scr->scr_flags &= ~SCR_WAITACTIVE;
        }
        splx(s);
        return res;
}

void
wsdisplay_kbdholdscreen(device_t dv, int hold)
{
        struct wsdisplay_softc *sc = device_private(dv);
        struct wsscreen *scr;

        scr = sc->sc_focus;

        if (!scr)
                return;

        if (hold)
                scr->scr_hold_screen = 1;
        else {
                scr->scr_hold_screen = 0;
                callout_schedule(&scr->scr_tty->t_rstrt_ch, 0);
        }
}

#if NWSKBD > 0
void
wsdisplay_set_console_kbd(struct wsevsrc *src)
{
        if (wsdisplay_console_device == NULL) {
                src->me_dispdv = NULL;
                return;
        }
#if NWSMUX > 0
        if (wsmux_attach_sc((struct wsmux_softc *)
                            wsdisplay_console_device->sc_input, src)) {
                src->me_dispdv = NULL;
                return;
        }
#else
        wsdisplay_console_device->sc_input = src;
#endif
        src->me_dispdv = wsdisplay_console_device->sc_dev;
}
#endif /* NWSKBD > 0 */

/*
 * Console interface.
 */
void
wsdisplay_cnputc(dev_t dev, int i)
{
        struct wsscreen_internal *dc;
        u_char c = i;

        if (!wsdisplay_console_initted)
                return;

        if ((wsdisplay_console_device != NULL) &&
            (wsdisplay_console_device->sc_scr[0] != NULL) &&
            (wsdisplay_console_device->sc_scr[0]->scr_flags & SCR_GRAPHICS))
                return;

        dc = &wsdisplay_console_conf;
        (*dc->wsemul->output)(dc->wsemulcookie, &c, 1, 1);

#ifdef WSDISPLAY_MULTICONS
        if (!wsdisplay_multicons_suspended &&
            wsdisplay_multicons_enable && wsdisplay_ocn && wsdisplay_ocn->cn_putc)
                wsdisplay_ocn->cn_putc(wsdisplay_ocn->cn_dev, i);
#endif
}

static int
wsdisplay_getc(dev_t dev)
{
        int c;

        if (wsdisplay_cons_kbd_getc) {
                c = wsdisplay_cons_kbd_getc(wsdisplay_cons.cn_dev);
                if (c >= 0)
                        return c;
        }

#ifdef WSDISPLAY_MULTICONS
        if (!wsdisplay_multicons_suspended &&
            wsdisplay_multicons_enable && wsdisplay_ocn && wsdisplay_ocn->cn_getc) {
                c = wsdisplay_ocn->cn_getc(wsdisplay_ocn->cn_dev);
                if (c >= 0)
                        return c;
        }
#endif
        return -1;
}

static void
wsdisplay_pollc(dev_t dev, int on)
{

        wsdisplay_cons_pollmode = on;

        /* notify to fb drivers */
        if (wsdisplay_console_device != NULL &&
            wsdisplay_console_device->sc_accessops->pollc != NULL)
                (*wsdisplay_console_device->sc_accessops->pollc)
                        (wsdisplay_console_device->sc_accesscookie, on);

        /* notify to kbd drivers */
        if (wsdisplay_cons_kbd_pollc)
                (*wsdisplay_cons_kbd_pollc)(NODEV, on);

#ifdef WSDISPLAY_MULTICONS
        /* notify to old console driver */
        if (!wsdisplay_multicons_suspended &&
            wsdisplay_multicons_enable && wsdisplay_ocn && wsdisplay_ocn->cn_pollc)
                wsdisplay_ocn->cn_pollc(wsdisplay_ocn->cn_dev, on);
#endif
}

void
wsdisplay_set_cons_kbd(int (*get)(dev_t), void (*poll)(dev_t, int),
        void (*bell)(dev_t, u_int, u_int, u_int))
{
        wsdisplay_cons.cn_bell = bell;
        wsdisplay_cons_kbd_getc = get;
        wsdisplay_cons_kbd_pollc = poll;
}

void
wsdisplay_unset_cons_kbd(void)
{
        wsdisplay_cons.cn_bell = NULL;
        wsdisplay_cons_kbd_getc = NULL;
        wsdisplay_cons_kbd_pollc = NULL;
}

#ifdef WSDISPLAY_MULTICONS
void
wsdisplay_multicons_suspend(bool suspend)
{
        wsdisplay_multicons_suspended = suspend;
}
#endif

#ifdef WSDISPLAY_MULTICONS
SYSCTL_SETUP(sysctl_hw_wsdisplay_setup, "sysctl hw.wsdisplay subtree setup")
{
        const struct sysctlnode *wsdisplay_node;

        if (sysctl_createv(clog, 0, NULL, &wsdisplay_node,
            CTLFLAG_PERMANENT,
            CTLTYPE_NODE, "wsdisplay", NULL,
            NULL, 0, NULL, 0,
            CTL_HW, CTL_CREATE, CTL_EOL) != 0)
                return;

        sysctl_createv(clog, 0, NULL, NULL,
            CTLFLAG_READWRITE,
            CTLTYPE_BOOL, "multicons",
            SYSCTL_DESCR("Enable wsdisplay multicons"),
            NULL, 0, &wsdisplay_multicons_enable, 0,
            CTL_HW, wsdisplay_node->sysctl_num, CTL_CREATE, CTL_EOL);
}
#endif







































































































































































































































   26 








    3 




    3 














   29 
   26 
   26 


    3 
    3 









   28 

    2 





   28 


   28 



























   28 
   26 

    2 
    2 






   26 








   26 



   26 



   29 






    3 

    3 



    3 


   29 

















   29 









   29 





   29 



   29 









    2 
    1 




    2 












   28 
    8 
    1 
   11 




   11 


   12 












   11 








   11 









    7 
    7 





   10 



    7 



   11 
    7 

    7 

    6 



   11 




   11 
    9 



    6 












   11 














   29 


























   26 




   26 



    3 
    3 
















    3 




   27 





   28 











   26 


   26 
   26 






    3 

    3 


    3 
    2 



   27 










   29 














   29 





   11 

    9 
    5 

   12 
    3 



   12 



   19 
   29 























   29 










   29 


   29 






















   19 
















































   29 
    8 




















    8 




    8 
   19 


































   29 
    6 


   28 



   29 




















    5 

   27 





































   29 
   29 
   19 
    1 
    1 

    1 





   29 
    1 














    1 



   29 
















    2 
   12 

   12 
    1 
    6 

    7 

    2 

    2 

    2 











   24 





   25 






   14 









   25 


   13 


    3 


   17 








   13 






    8 
    6 



























    8 
    1 
    1 






    8 



   28 
























   28 



   12 
    9 

    3 
    3 


   12 
   12 


    9 

   12 
    3 

   12 
   12 






   12 


   12 







   12 












   28 
   28 
   26 
   16 

   26 
   16 



   25 
   26 










   18 
   17 


























   28 




































   28 


























   27 



   28 









   12 









   11 

   25 


   25 
   21 
    5 
    5 

    1 
   25 


   25 











   25 





   24 








    3 











   27 

   27 

   27 







   12 
   17 








   27 




































   27 



   27 
    7 
   24 

    7 

   27 

   25 


   27 






   27 







   27 

    8 










   23 


























   27 

   24 




   24 










    3 




    3 




   25 










   27 
   27 







   18 


   27 

   24 





   22 













   27 


   24 
   22 
    1 


   22 
    3 




























   27 



   24 


   24 












    3 







    3 

    3 














   24 
   24 



   24 
   24 







    3 
    3 


    3 








   26 
    4 









    4 
    1 





    4 



    4 





   22 
   15 

   21 


    4 
   22 







   22 
    8 
   22 
   22 
    3 

   22 


   22 







    1 







    1 
    1 
    1 



    1 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
/*        $NetBSD: tcp_output.c,v 1.214 2021/12/30 23:03:44 andvar Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 *      @(#)COPYRIGHT   1.1 (NRL) 17 January 1995
 *
 * NRL grants permission for redistribution and use in source and binary
 * forms, with or without modification, of the software and documentation
 * created at NRL provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgements:
 *      This product includes software developed by the University of
 *      California, Berkeley and its contributors.
 *      This product includes software developed at the Information
 *      Technology Division, US Naval Research Laboratory.
 * 4. Neither the name of the NRL nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * The views and conclusions contained in the software and documentation
 * are those of the authors and should not be interpreted as representing
 * official policies, either expressed or implied, of the US Naval
 * Research Laboratory (NRL).
 */

/*-
 * Copyright (c) 1997, 1998, 2001, 2005, 2006 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
 * Facility, NASA Ames Research Center.
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 * This code is derived from software contributed to The NetBSD Foundation
 * by Rui Paulo.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)tcp_output.c        8.4 (Berkeley) 5/24/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tcp_output.c,v 1.214 2021/12/30 23:03:44 andvar Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_ipsec.h"
#include "opt_tcp_debug.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/errno.h>
#include <sys/domain.h>
#include <sys/kernel.h>
#ifdef TCP_SIGNATURE
#include <sys/md5.h>
#endif

#include <net/if.h>
#include <net/route.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>

#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/in6_var.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/nd6.h>
#endif

#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/key.h>
#ifdef INET6
#include <netipsec/ipsec6.h>
#endif
#endif

#include <netinet/tcp.h>
#define        TCPOUTFLAGS
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_private.h>
#include <netinet/tcp_congctl.h>
#include <netinet/tcp_debug.h>
#include <netinet/in_offload.h>
#include <netinet6/in6_offload.h>

/*
 * Knob to enable Congestion Window Monitoring, and control
 * the burst size it allows.  Default burst is 4 packets, per
 * the Internet draft.
 */
int        tcp_cwm = 0;
int        tcp_cwm_burstsize = 4;

int        tcp_do_autosndbuf = 1;
int        tcp_autosndbuf_inc = 8 * 1024;
int        tcp_autosndbuf_max = 256 * 1024;

#ifdef TCP_OUTPUT_COUNTERS
#include <sys/device.h>

extern struct evcnt tcp_output_bigheader;
extern struct evcnt tcp_output_predict_hit;
extern struct evcnt tcp_output_predict_miss;
extern struct evcnt tcp_output_copysmall;
extern struct evcnt tcp_output_copybig;
extern struct evcnt tcp_output_refbig;

#define        TCP_OUTPUT_COUNTER_INCR(ev)        (ev)->ev_count++
#else

#define        TCP_OUTPUT_COUNTER_INCR(ev)        /* nothing */

#endif /* TCP_OUTPUT_COUNTERS */

static int
tcp_segsize(struct tcpcb *tp, int *txsegsizep, int *rxsegsizep,
    bool *alwaysfragp)
{
        struct inpcb *inp = tp->t_inpcb;
#ifdef INET6
        struct in6pcb *in6p = tp->t_in6pcb;
#endif
        struct socket *so = NULL;
        struct rtentry *rt;
        struct ifnet *ifp;
        int size;
        int hdrlen;
        int optlen;

        *alwaysfragp = false;
        size = tcp_mssdflt;

        KASSERT(!(tp->t_inpcb && tp->t_in6pcb));

        switch (tp->t_family) {
        case AF_INET:
                hdrlen = sizeof(struct ip) + sizeof(struct tcphdr);
                break;
#ifdef INET6
        case AF_INET6:
                hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
                break;
#endif
        default:
                hdrlen = 1; /* prevent zero sized segments */
                goto out;
        }

        rt = NULL;
        if (inp) {
                rt = in_pcbrtentry(inp);
                so = inp->inp_socket;
        }
#ifdef INET6
        if (in6p) {
                rt = in6_pcbrtentry(in6p);
                so = in6p->in6p_socket;
        }
#endif
        if (rt == NULL) {
                goto out;
        }

        ifp = rt->rt_ifp;

        if (tp->t_mtudisc && rt->rt_rmx.rmx_mtu != 0) {
#ifdef INET6
                if (in6p && rt->rt_rmx.rmx_mtu < IPV6_MMTU) {
                        /*
                         * RFC2460 section 5, last paragraph: if path MTU is
                         * smaller than 1280, use 1280 as packet size and
                         * attach fragment header.
                         */
                        size = IPV6_MMTU - hdrlen - sizeof(struct ip6_frag);
                        *alwaysfragp = true;
                } else
                        size = rt->rt_rmx.rmx_mtu - hdrlen;
#else
                size = rt->rt_rmx.rmx_mtu - hdrlen;
#endif
        } else if (ifp->if_flags & IFF_LOOPBACK)
                size = ifp->if_mtu - hdrlen;
        else if (inp && tp->t_mtudisc)
                size = ifp->if_mtu - hdrlen;
        else if (inp && in_localaddr(inp->inp_faddr))
                size = ifp->if_mtu - hdrlen;
#ifdef INET6
        else if (in6p) {
                if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr)) {
                        /* mapped addr case */
                        struct in_addr d;
                        memcpy(&d, &in6p->in6p_faddr.s6_addr32[3], sizeof(d));
                        if (tp->t_mtudisc || in_localaddr(d))
                                size = ifp->if_mtu - hdrlen;
                } else {
                        /*
                         * for IPv6, path MTU discovery is always turned on,
                         * or the node must use packet size <= 1280.
                         */
                        size = tp->t_mtudisc ? ifp->if_mtu : IPV6_MMTU;
                        size -= hdrlen;
                }
        }
#endif
        if (inp)
                in_pcbrtentry_unref(rt, inp);
#ifdef INET6
        if (in6p)
                in6_pcbrtentry_unref(rt, in6p);
#endif
 out:
        /*
         * Now we must make room for whatever extra TCP/IP options are in
         * the packet.
         */
        optlen = tcp_optlen(tp);

        /*
         * XXX tp->t_ourmss should have the right size, but without this code
         * fragmentation will occur... need more investigation
         */

        if (inp) {
#if defined(IPSEC)
                if (ipsec_used &&
                    !ipsec_pcb_skip_ipsec(inp->inp_sp, IPSEC_DIR_OUTBOUND))
                        optlen += ipsec4_hdrsiz_tcp(tp);
#endif
                optlen += ip_optlen(inp);
        }

#ifdef INET6
        if (in6p && tp->t_family == AF_INET) {
#if defined(IPSEC)
                if (ipsec_used &&
                    !ipsec_pcb_skip_ipsec(in6p->in6p_sp, IPSEC_DIR_OUTBOUND))
                        optlen += ipsec4_hdrsiz_tcp(tp);
#endif
                /* XXX size -= ip_optlen(in6p); */
        } else if (in6p && tp->t_family == AF_INET6) {
#if defined(IPSEC)
                if (ipsec_used &&
                    !ipsec_pcb_skip_ipsec(in6p->in6p_sp, IPSEC_DIR_OUTBOUND))
                        optlen += ipsec6_hdrsiz_tcp(tp);
#endif
                optlen += ip6_optlen(in6p);
        }
#endif
        size -= optlen;

        /*
         * There may not be any room for data if mtu is too small. This
         * includes zero-sized.
         */
        if (size <= 0) {
                return EMSGSIZE;
        }

        /*
         * *rxsegsizep holds *estimated* inbound segment size (estimation
         * assumes that path MTU is the same for both ways).  this is only
         * for silly window avoidance, do not use the value for other purposes.
         *
         * ipseclen is subtracted from both sides, this may not be right.
         * I'm not quite sure about this (could someone comment).
         */
        *txsegsizep = uimin(tp->t_peermss - optlen, size);
        *rxsegsizep = uimin(tp->t_ourmss - optlen, size);

        /*
         * Never send more than half a buffer full.  This insures that we can
         * always keep 2 packets on the wire, no matter what SO_SNDBUF is, and
         * therefore acks will never be delayed unless we run out of data to
         * transmit.
         */
        if (so) {
                *txsegsizep = uimin(so->so_snd.sb_hiwat >> 1, *txsegsizep);
        }

        /*
         * A segment must at least store header + options
         */
        if (*txsegsizep < hdrlen + optlen) {
                return EMSGSIZE;
        }

        if (*txsegsizep != tp->t_segsz) {
                /*
                 * If the new segment size is larger, we don't want to
                 * mess up the congestion window, but if it is smaller
                 * we'll have to reduce the congestion window to ensure
                 * that we don't get into trouble with initial windows
                 * and the rest.  In any case, if the segment size
                 * has changed, chances are the path has, too, and
                 * our congestion window will be different.
                 */
                if (*txsegsizep < tp->t_segsz) {
                        tp->snd_cwnd = uimax((tp->snd_cwnd / tp->t_segsz)
                            * *txsegsizep, *txsegsizep);
                        tp->snd_ssthresh = uimax((tp->snd_ssthresh / tp->t_segsz)
                            * *txsegsizep, *txsegsizep);
                }
                tp->t_segsz = *txsegsizep;
        }

        return 0;
}

static int
tcp_build_datapkt(struct tcpcb *tp, struct socket *so, int off,
    long len, int hdrlen, struct mbuf **mp)
{
        struct mbuf *m, *m0;
        uint64_t *tcps;

        tcps = TCP_STAT_GETREF();
        if (tp->t_force && len == 1)
                tcps[TCP_STAT_SNDPROBE]++;
        else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
                tp->t_sndrexmitpack++;
                tcps[TCP_STAT_SNDREXMITPACK]++;
                tcps[TCP_STAT_SNDREXMITBYTE] += len;
        } else {
                tcps[TCP_STAT_SNDPACK]++;
                tcps[TCP_STAT_SNDBYTE] += len;
        }
        TCP_STAT_PUTREF();

        MGETHDR(m, M_DONTWAIT, MT_HEADER);
        if (__predict_false(m == NULL))
                return ENOBUFS;
        MCLAIM(m, &tcp_tx_mowner);

        /*
         * XXX Because other code assumes headers will fit in
         * XXX one header mbuf.
         *
         * (This code should almost *never* be run.)
         */
        if (__predict_false((max_linkhdr + hdrlen) > MHLEN)) {
                TCP_OUTPUT_COUNTER_INCR(&tcp_output_bigheader);
                MCLGET(m, M_DONTWAIT);
                if ((m->m_flags & M_EXT) == 0) {
                        m_freem(m);
                        return ENOBUFS;
                }
        }

        m->m_data += max_linkhdr;
        m->m_len = hdrlen;

        /*
         * To avoid traversing the whole sb_mb chain for correct
         * data to send, remember last sent mbuf, its offset and
         * the sent size.  When called the next time, see if the
         * data to send is directly following the previous transfer.
         * This is important for large TCP windows.
         */
        if (off == 0 || tp->t_lastm == NULL ||
            (tp->t_lastoff + tp->t_lastlen) != off) {
                TCP_OUTPUT_COUNTER_INCR(&tcp_output_predict_miss);
                /*
                 * Either a new packet or a retransmit.
                 * Start from the beginning.
                 */
                tp->t_lastm = so->so_snd.sb_mb;
                tp->t_inoff = off;
        } else {
                TCP_OUTPUT_COUNTER_INCR(&tcp_output_predict_hit);
                tp->t_inoff += tp->t_lastlen;
        }

        /* Traverse forward to next packet */
        while (tp->t_inoff > 0) {
                if (tp->t_lastm == NULL)
                        panic("tp->t_lastm == NULL");
                if (tp->t_inoff < tp->t_lastm->m_len)
                        break;
                tp->t_inoff -= tp->t_lastm->m_len;
                tp->t_lastm = tp->t_lastm->m_next;
        }

        tp->t_lastoff = off;
        tp->t_lastlen = len;
        m0 = tp->t_lastm;
        off = tp->t_inoff;

        if (len <= M_TRAILINGSPACE(m)) {
                m_copydata(m0, off, (int)len, mtod(m, char *) + hdrlen);
                m->m_len += len;
                TCP_OUTPUT_COUNTER_INCR(&tcp_output_copysmall);
        } else {
                m->m_next = m_copym(m0, off, (int)len, M_DONTWAIT);
                if (m->m_next == NULL) {
                        m_freem(m);
                        return ENOBUFS;
                }
#ifdef TCP_OUTPUT_COUNTERS
                if (m->m_next->m_flags & M_EXT)
                        TCP_OUTPUT_COUNTER_INCR(&tcp_output_refbig);
                else
                        TCP_OUTPUT_COUNTER_INCR(&tcp_output_copybig);
#endif
        }

        *mp = m;
        return 0;
}

/*
 * Tcp output routine: figure out what should be sent and send it.
 */
int
tcp_output(struct tcpcb *tp)
{
        struct rtentry *rt = NULL;
        struct socket *so;
        struct route *ro;
        long len, win;
        int off, flags, error;
        struct mbuf *m;
        struct ip *ip;
#ifdef INET6
        struct ip6_hdr *ip6;
#endif
        struct tcphdr *th;
        u_char opt[MAX_TCPOPTLEN], *optp;
#define OPT_FITS(more)        ((optlen + (more)) <= sizeof(opt))
        unsigned optlen, hdrlen, packetlen;
        unsigned int sack_numblks;
        int idle, sendalot, txsegsize, rxsegsize;
        int txsegsize_nosack;
        int maxburst = TCP_MAXBURST;
        int af;                /* address family on the wire */
        int iphdrlen;
        int has_tso4, has_tso6;
        int has_tso, use_tso;
        bool alwaysfrag;
        int sack_rxmit;
        int sack_bytes_rxmt;
        int ecn_tos;
        struct sackhole *p;
#ifdef TCP_SIGNATURE
        int sigoff = 0;
#endif
        uint64_t *tcps;

        KASSERT(!(tp->t_inpcb && tp->t_in6pcb));

        so = NULL;
        ro = NULL;
        if (tp->t_inpcb) {
                so = tp->t_inpcb->inp_socket;
                ro = &tp->t_inpcb->inp_route;
        }
#ifdef INET6
        else if (tp->t_in6pcb) {
                so = tp->t_in6pcb->in6p_socket;
                ro = &tp->t_in6pcb->in6p_route;
        }
#endif

        switch (af = tp->t_family) {
        case AF_INET:
                if (tp->t_inpcb)
                        break;
#ifdef INET6
                /* mapped addr case */
                if (tp->t_in6pcb)
                        break;
#endif
                return EINVAL;
#ifdef INET6
        case AF_INET6:
                if (tp->t_in6pcb)
                        break;
                return EINVAL;
#endif
        default:
                return EAFNOSUPPORT;
        }

        if (tcp_segsize(tp, &txsegsize, &rxsegsize, &alwaysfrag))
                return EMSGSIZE;

        idle = (tp->snd_max == tp->snd_una);

        /*
         * Determine if we can use TCP segmentation offload:
         * - If we're using IPv4
         * - If there is not an IPsec policy that prevents it
         * - If the interface can do it
         */
        has_tso4 = has_tso6 = false;

        has_tso4 = tp->t_inpcb != NULL &&
#if defined(IPSEC)
            (!ipsec_used || ipsec_pcb_skip_ipsec(tp->t_inpcb->inp_sp,
            IPSEC_DIR_OUTBOUND)) &&
#endif
            (rt = rtcache_validate(&tp->t_inpcb->inp_route)) != NULL &&
            (rt->rt_ifp->if_capenable & IFCAP_TSOv4) != 0;
        if (rt != NULL) {
                rtcache_unref(rt, &tp->t_inpcb->inp_route);
                rt = NULL;
        }

#if defined(INET6)
        has_tso6 = tp->t_in6pcb != NULL &&
#if defined(IPSEC)
            (!ipsec_used || ipsec_pcb_skip_ipsec(tp->t_in6pcb->in6p_sp,
            IPSEC_DIR_OUTBOUND)) &&
#endif
            (rt = rtcache_validate(&tp->t_in6pcb->in6p_route)) != NULL &&
            (rt->rt_ifp->if_capenable & IFCAP_TSOv6) != 0;
        if (rt != NULL)
                rtcache_unref(rt, &tp->t_in6pcb->in6p_route);
#endif /* defined(INET6) */
        has_tso = (has_tso4 || has_tso6) && !alwaysfrag;

        /*
         * Restart Window computation.  From draft-floyd-incr-init-win-03:
         *
         *        Optionally, a TCP MAY set the restart window to the
         *        minimum of the value used for the initial window and
         *        the current value of cwnd (in other words, using a
         *        larger value for the restart window should never increase
         *        the size of cwnd).
         */
        if (tcp_cwm) {
                /*
                 * Hughes/Touch/Heidemann Congestion Window Monitoring.
                 * Count the number of packets currently pending
                 * acknowledgement, and limit our congestion window
                 * to a pre-determined allowed burst size plus that count.
                 * This prevents bursting once all pending packets have
                 * been acknowledged (i.e. transmission is idle).
                 *
                 * XXX Link this to Initial Window?
                 */
                tp->snd_cwnd = uimin(tp->snd_cwnd,
                    (tcp_cwm_burstsize * txsegsize) +
                    (tp->snd_nxt - tp->snd_una));
        } else {
                if (idle && (tcp_now - tp->t_rcvtime) >= tp->t_rxtcur) {
                        /*
                         * We have been idle for "a while" and no acks are
                         * expected to clock out any data we send --
                         * slow start to get ack "clock" running again.
                         */
                        int ss = tcp_init_win;
                        if (tp->t_inpcb &&
                            in_localaddr(tp->t_inpcb->inp_faddr))
                                ss = tcp_init_win_local;
#ifdef INET6
                        if (tp->t_in6pcb &&
                            in6_localaddr(&tp->t_in6pcb->in6p_faddr))
                                ss = tcp_init_win_local;
#endif
                        tp->snd_cwnd = uimin(tp->snd_cwnd,
                            TCP_INITIAL_WINDOW(ss, txsegsize));
                }
        }

        txsegsize_nosack = txsegsize;
again:
        ecn_tos = 0;
        use_tso = has_tso;
        if ((tp->t_flags & (TF_ECN_SND_CWR|TF_ECN_SND_ECE)) != 0) {
                /* don't duplicate CWR/ECE. */
                use_tso = 0;
        }
        TCP_REASS_LOCK(tp);
        sack_numblks = tcp_sack_numblks(tp);
        if (sack_numblks) {
                int sackoptlen;

                sackoptlen = TCP_SACK_OPTLEN(sack_numblks);
                if (sackoptlen > txsegsize_nosack) {
                        sack_numblks = 0; /* give up SACK */
                        txsegsize = txsegsize_nosack;
                } else {
                        if ((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) {
                                /* don't duplicate D-SACK. */
                                use_tso = 0;
                        }
                        txsegsize = txsegsize_nosack - sackoptlen;
                }
        } else {
                txsegsize = txsegsize_nosack;
        }

        /*
         * Determine length of data that should be transmitted, and
         * flags that should be used.  If there is some data or critical
         * controls (SYN, RST) to send, then transmit; otherwise,
         * investigate further.
         *
         * Readjust SACK information to avoid resending duplicate data.
         */
        if (TCP_SACK_ENABLED(tp) && SEQ_LT(tp->snd_nxt, tp->snd_max))
                tcp_sack_adjust(tp);
        sendalot = 0;
        off = tp->snd_nxt - tp->snd_una;
        win = uimin(tp->snd_wnd, tp->snd_cwnd);

        flags = tcp_outflags[tp->t_state];

        /*
         * Send any SACK-generated retransmissions.  If we're explicitly trying
         * to send out new data (when sendalot is 1), bypass this function.
         * If we retransmit in fast recovery mode, decrement snd_cwnd, since
         * we're replacing a (future) new transmission with a retransmission
         * now, and we previously incremented snd_cwnd in tcp_input().
         */
        /*
         * Still in sack recovery, reset rxmit flag to zero.
         */
        sack_rxmit = 0;
        sack_bytes_rxmt = 0;
        len = 0;
        p = NULL;
        do {
                long cwin;
                if (!TCP_SACK_ENABLED(tp))
                        break;
                if (tp->t_partialacks < 0)
                        break;
                p = tcp_sack_output(tp, &sack_bytes_rxmt);
                if (p == NULL)
                        break;

                cwin = uimin(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt;
                if (cwin < 0)
                        cwin = 0;
                /* Do not retransmit SACK segments beyond snd_recover */
                if (SEQ_GT(p->end, tp->snd_recover)) {
                        /*
                         * (At least) part of sack hole extends beyond
                         * snd_recover. Check to see if we can rexmit data
                         * for this hole.
                         */
                        if (SEQ_GEQ(p->rxmit, tp->snd_recover)) {
                                /*
                                 * Can't rexmit any more data for this hole.
                                 * That data will be rexmitted in the next
                                 * sack recovery episode, when snd_recover
                                 * moves past p->rxmit.
                                 */
                                p = NULL;
                                break;
                        }
                        /* Can rexmit part of the current hole */
                        len = ((long)ulmin(cwin, tp->snd_recover - p->rxmit));
                } else
                        len = ((long)ulmin(cwin, p->end - p->rxmit));
                off = p->rxmit - tp->snd_una;
                if (off + len > so->so_snd.sb_cc) {
                        /* 1 for TH_FIN */
                        KASSERT(off + len == so->so_snd.sb_cc + 1);
                        KASSERT(p->rxmit + len == tp->snd_max);
                        len = so->so_snd.sb_cc - off;
                }
                if (len > 0) {
                        sack_rxmit = 1;
                        sendalot = 1;
                }
        } while (/*CONSTCOND*/0);

        /*
         * If in persist timeout with window of 0, send 1 byte.
         * Otherwise, if window is small but nonzero
         * and timer expired, we will send what we can
         * and go to transmit state.
         */
        if (tp->t_force) {
                if (win == 0) {
                        /*
                         * If we still have some data to send, then
                         * clear the FIN bit.  Usually this would
                         * happen below when it realizes that we
                         * aren't sending all the data.  However,
                         * if we have exactly 1 byte of unset data,
                         * then it won't clear the FIN bit below,
                         * and if we are in persist state, we wind
                         * up sending the packet without recording
                         * that we sent the FIN bit.
                         *
                         * We can't just blindly clear the FIN bit,
                         * because if we don't have any more data
                         * to send then the probe will be the FIN
                         * itself.
                         */
                        if (off < so->so_snd.sb_cc)
                                flags &= ~TH_FIN;
                        win = 1;
                } else {
                        TCP_TIMER_DISARM(tp, TCPT_PERSIST);
                        tp->t_rxtshift = 0;
                }
        }

        if (sack_rxmit == 0) {
                if (TCP_SACK_ENABLED(tp) && tp->t_partialacks >= 0) {
                        long cwin;

                        /*
                         * We are inside of a SACK recovery episode and are
                         * sending new data, having retransmitted all the
                         * data possible in the scoreboard.
                         */
                        if (tp->snd_wnd < so->so_snd.sb_cc) {
                                len = tp->snd_wnd - off;
                                flags &= ~TH_FIN;
                        } else {
                                len = so->so_snd.sb_cc - off;
                        }

                        /*
                         * From FreeBSD:
                         *  Don't remove this (len > 0) check !
                         *  We explicitly check for len > 0 here (although it
                         *  isn't really necessary), to work around a gcc
                         *  optimization issue - to force gcc to compute
                         *  len above. Without this check, the computation
                         *  of len is bungled by the optimizer.
                         */
                        if (len > 0) {
                                cwin = tp->snd_cwnd -
                                    (tp->snd_nxt - tp->sack_newdata) -
                                    sack_bytes_rxmt;
                                if (cwin < 0)
                                        cwin = 0;
                                if (cwin < len) {
                                        len = cwin;
                                        flags &= ~TH_FIN;
                                }
                        }
                } else if (win < so->so_snd.sb_cc) {
                        len = win - off;
                        flags &= ~TH_FIN;
                } else {
                        len = so->so_snd.sb_cc - off;
                }
        }

        if (len < 0) {
                /*
                 * If FIN has been sent but not acked,
                 * but we haven't been called to retransmit,
                 * len will be -1.  Otherwise, window shrank
                 * after we sent into it.  If window shrank to 0,
                 * cancel pending retransmit, pull snd_nxt back
                 * to (closed) window, and set the persist timer
                 * if it isn't already going.  If the window didn't
                 * close completely, just wait for an ACK.
                 *
                 * If we have a pending FIN, either it has already been
                 * transmitted or it is outside the window, so drop it.
                 * If the FIN has been transmitted, but this is not a
                 * retransmission, then len must be -1.  Therefore we also
                 * prevent here the sending of `gratuitous FINs'.  This
                 * eliminates the need to check for that case below (e.g.
                 * to back up snd_nxt before the FIN so that the sequence
                 * number is correct).
                 */
                len = 0;
                flags &= ~TH_FIN;
                if (win == 0) {
                        TCP_TIMER_DISARM(tp, TCPT_REXMT);
                        tp->t_rxtshift = 0;
                        tp->snd_nxt = tp->snd_una;
                        if (TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0)
                                tcp_setpersist(tp);
                }
        }

        /*
         * Automatic sizing enables the performance of large buffers
         * and most of the efficiency of small ones by only allocating
         * space when it is needed.
         *
         * The criteria to step up the send buffer one notch are:
         *  1. receive window of remote host is larger than send buffer
         *     (with a fudge factor of 5/4th);
         *  2. send buffer is filled to 7/8th with data (so we actually
         *     have data to make use of it);
         *  3. send buffer fill has not hit maximal automatic size;
         *  4. our send window (slow start and cogestion controlled) is
         *     larger than sent but unacknowledged data in send buffer.
         *
         * The remote host receive window scaling factor may limit the
         * growing of the send buffer before it reaches its allowed
         * maximum.
         *
         * It scales directly with slow start or congestion window
         * and does at most one step per received ACK.  This fast
         * scaling has the drawback of growing the send buffer beyond
         * what is strictly necessary to make full use of a given
         * delay*bandwidth product.  However testing has shown this not
         * to be much of an problem.  At worst we are trading wasting
         * of available bandwidth (the non-use of it) for wasting some
         * socket buffer memory.
         *
         * TODO: Shrink send buffer during idle periods together
         * with congestion window.  Requires another timer.
         */
        if (tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) {
                if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat &&
                    so->so_snd.sb_cc >= (so->so_snd.sb_hiwat / 8 * 7) &&
                    so->so_snd.sb_cc < tcp_autosndbuf_max &&
                    win >= (so->so_snd.sb_cc - (tp->snd_nxt - tp->snd_una))) {
                        if (!sbreserve(&so->so_snd,
                            uimin(so->so_snd.sb_hiwat + tcp_autosndbuf_inc,
                             tcp_autosndbuf_max), so))
                                so->so_snd.sb_flags &= ~SB_AUTOSIZE;
                }
        }

        if (len > txsegsize) {
                if (use_tso) {
                        /*
                         * Truncate TSO transfers to IP_MAXPACKET, and make
                         * sure that we send equal size transfers down the
                         * stack (rather than big-small-big-small-...).
                         */
#ifdef INET6
                        CTASSERT(IPV6_MAXPACKET == IP_MAXPACKET);
#endif
                        len = (uimin(len, IP_MAXPACKET) / txsegsize) * txsegsize;
                        if (len <= txsegsize) {
                                use_tso = 0;
                        }
                } else
                        len = txsegsize;
                flags &= ~TH_FIN;
                sendalot = 1;
        } else
                use_tso = 0;
        if (sack_rxmit) {
                if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc))
                        flags &= ~TH_FIN;
        }

        win = sbspace(&so->so_rcv);

        /*
         * Sender silly window avoidance.  If connection is idle
         * and can send all data, a maximum segment,
         * at least a maximum default-size segment do it,
         * or are forced, do it; otherwise don't bother.
         * If peer's buffer is tiny, then send
         * when window is at least half open.
         * If retransmitting (possibly after persist timer forced us
         * to send into a small window), then must resend.
         */
        if (len) {
                if (len >= txsegsize)
                        goto send;
                if ((so->so_state & SS_MORETOCOME) == 0 &&
                    ((idle || tp->t_flags & TF_NODELAY) &&
                     len + off >= so->so_snd.sb_cc))
                        goto send;
                if (tp->t_force)
                        goto send;
                if (len >= tp->max_sndwnd / 2)
                        goto send;
                if (SEQ_LT(tp->snd_nxt, tp->snd_max))
                        goto send;
                if (sack_rxmit)
                        goto send;
        }

        /*
         * Compare available window to amount of window known to peer
         * (as advertised window less next expected input).  If the
         * difference is at least twice the size of the largest segment
         * we expect to receive (i.e. two segments) or at least 50% of
         * the maximum possible window, then want to send a window update
         * to peer.
         */
        if (win > 0) {
                /*
                 * "adv" is the amount we can increase the window,
                 * taking into account that we are limited by
                 * TCP_MAXWIN << tp->rcv_scale.
                 */
                long recwin = uimin(win, (long)TCP_MAXWIN << tp->rcv_scale);
                long oldwin, adv;

                /*
                 * rcv_nxt may overtake rcv_adv when we accept a
                 * zero-window probe.
                 */
                if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt))
                        oldwin = tp->rcv_adv - tp->rcv_nxt;
                else
                        oldwin = 0;

                /*
                 * If the new window size ends up being the same as or
                 * less than the old size when it is scaled, then
                 * don't force a window update.
                 */
                if (recwin >> tp->rcv_scale <= oldwin >> tp->rcv_scale)
                        goto dontupdate;

                adv = recwin - oldwin;
                if (adv >= (long) (2 * rxsegsize))
                        goto send;
                if (2 * adv >= (long) so->so_rcv.sb_hiwat)
                        goto send;
        }
dontupdate:

        /*
         * Send if we owe peer an ACK.
         */
        if (tp->t_flags & TF_ACKNOW)
                goto send;
        if (flags & (TH_SYN|TH_FIN|TH_RST))
                goto send;
        if (SEQ_GT(tp->snd_up, tp->snd_una))
                goto send;
        /*
         * In SACK, it is possible for tcp_output to fail to send a segment
         * after the retransmission timer has been turned off.  Make sure
         * that the retransmission timer is set.
         */
        if (TCP_SACK_ENABLED(tp) && SEQ_GT(tp->snd_max, tp->snd_una) &&
            !TCP_TIMER_ISARMED(tp, TCPT_REXMT) &&
            !TCP_TIMER_ISARMED(tp, TCPT_PERSIST)) {
                TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
                goto just_return;
        }

        /*
         * TCP window updates are not reliable, rather a polling protocol
         * using ``persist'' packets is used to insure receipt of window
         * updates.  The three ``states'' for the output side are:
         *        idle                        not doing retransmits or persists
         *        persisting                to move a small or zero window
         *        (re)transmitting        and thereby not persisting
         *
         * tp->t_timer[TCPT_PERSIST]
         *        is set when we are in persist state.
         * tp->t_force
         *        is set when we are called to send a persist packet.
         * tp->t_timer[TCPT_REXMT]
         *        is set when we are retransmitting
         * The output side is idle when both timers are zero.
         *
         * If send window is too small, there is data to transmit, and no
         * retransmit or persist is pending, then go to persist state.
         * If nothing happens soon, send when timer expires:
         * if window is nonzero, transmit what we can,
         * otherwise force out a byte.
         */
        if (so->so_snd.sb_cc && TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0 &&
            TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0) {
                tp->t_rxtshift = 0;
                tcp_setpersist(tp);
        }

        /*
         * No reason to send a segment, just return.
         */
just_return:
        TCP_REASS_UNLOCK(tp);
        return 0;

send:
        /*
         * Before ESTABLISHED, force sending of initial options unless TCP set
         * not to do any options.
         *
         * Note: we assume that the IP/TCP header plus TCP options always fit
         * in a single mbuf, leaving room for a maximum link header, i.e.:
         *     max_linkhdr + IP_header + TCP_header + optlen <= MCLBYTES
         */
        optlen = 0;
        optp = opt;
        switch (af) {
        case AF_INET:
                iphdrlen = sizeof(struct ip) + sizeof(struct tcphdr);
                break;
#ifdef INET6
        case AF_INET6:
                iphdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
                break;
#endif
        default:        /*pacify gcc*/
                iphdrlen = 0;
                break;
        }
        hdrlen = iphdrlen;
        if (flags & TH_SYN) {
                struct rtentry *synrt;

                synrt = NULL;
                if (tp->t_inpcb)
                        synrt = in_pcbrtentry(tp->t_inpcb);
#ifdef INET6
                if (tp->t_in6pcb)
                        synrt = in6_pcbrtentry(tp->t_in6pcb);
#endif

                tp->snd_nxt = tp->iss;
                tp->t_ourmss = tcp_mss_to_advertise(synrt != NULL ?
                                                    synrt->rt_ifp : NULL, af);
                if (tp->t_inpcb)
                        in_pcbrtentry_unref(synrt, tp->t_inpcb);
#ifdef INET6
                if (tp->t_in6pcb)
                        in6_pcbrtentry_unref(synrt, tp->t_in6pcb);
#endif
                if ((tp->t_flags & TF_NOOPT) == 0 && OPT_FITS(TCPOLEN_MAXSEG)) {
                        *optp++ = TCPOPT_MAXSEG;
                        *optp++ = TCPOLEN_MAXSEG;
                        *optp++ = (tp->t_ourmss >> 8) & 0xff;
                        *optp++ = tp->t_ourmss & 0xff;
                        optlen += TCPOLEN_MAXSEG;

                        if ((tp->t_flags & TF_REQ_SCALE) &&
                            ((flags & TH_ACK) == 0 ||
                            (tp->t_flags & TF_RCVD_SCALE)) &&
                            OPT_FITS(TCPOLEN_WINDOW + TCPOLEN_NOP)) {
                                *((uint32_t *)optp) = htonl(
                                        TCPOPT_NOP << 24 |
                                        TCPOPT_WINDOW << 16 |
                                        TCPOLEN_WINDOW << 8 |
                                        tp->request_r_scale);
                                optp += TCPOLEN_WINDOW + TCPOLEN_NOP;
                                optlen += TCPOLEN_WINDOW + TCPOLEN_NOP;
                        }
                        if (tcp_do_sack && OPT_FITS(TCPOLEN_SACK_PERMITTED)) {
                                *optp++ = TCPOPT_SACK_PERMITTED;
                                *optp++ = TCPOLEN_SACK_PERMITTED;
                                optlen += TCPOLEN_SACK_PERMITTED;
                        }
                }
        }

        /*
         * Send a timestamp and echo-reply if this is a SYN and our side
         * wants to use timestamps (TF_REQ_TSTMP is set) or both our side
         * and our peer have sent timestamps in our SYN's.
         */
        if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
             (flags & TH_RST) == 0 &&
            ((flags & (TH_SYN|TH_ACK)) == TH_SYN ||
             (tp->t_flags & TF_RCVD_TSTMP))) {
                int alen = 0;
                while (optlen % 4 != 2) {
                        optlen += TCPOLEN_NOP;
                        *optp++ = TCPOPT_NOP;
                        alen++;
                }
                if (OPT_FITS(TCPOLEN_TIMESTAMP)) {
                        *optp++ = TCPOPT_TIMESTAMP;
                        *optp++ = TCPOLEN_TIMESTAMP;
                        uint32_t *lp = (uint32_t *)optp;
                        /* Form timestamp option (appendix A of RFC 1323) */
                        *lp++ = htonl(TCP_TIMESTAMP(tp));
                        *lp   = htonl(tp->ts_recent);
                        optp += TCPOLEN_TIMESTAMP - 2;
                        optlen += TCPOLEN_TIMESTAMP;

                        /* Set receive buffer autosizing timestamp. */
                        if (tp->rfbuf_ts == 0 &&
                            (so->so_rcv.sb_flags & SB_AUTOSIZE))
                                tp->rfbuf_ts = TCP_TIMESTAMP(tp);
                } else {
                        optp -= alen;
                        optlen -= alen;
                }
        }

#ifdef TCP_SIGNATURE
        if (tp->t_flags & TF_SIGNATURE) {
                /*
                 * Initialize TCP-MD5 option (RFC2385)
                 */
                if (!OPT_FITS(TCPOLEN_SIGNATURE))
                        goto reset;

                *optp++ = TCPOPT_SIGNATURE;
                *optp++ = TCPOLEN_SIGNATURE;
                sigoff = optlen + 2;
                memset(optp, 0, TCP_SIGLEN);
                optlen += TCPOLEN_SIGNATURE;
                optp += TCP_SIGLEN;
        }
#endif

        /*
         * Tack on the SACK block if it is necessary.
         */
        if (sack_numblks) {
                int alen = 0;
                int sack_len = sack_numblks * 8;
                while (optlen % 4 != 2) {
                        optlen += TCPOLEN_NOP;
                        *optp++ = TCPOPT_NOP;
                        alen++;
                }
                if (OPT_FITS(sack_len + 2)) {
                        struct ipqent *tiqe;
                        *optp++ = TCPOPT_SACK;
                        *optp++ = sack_len + 2;
                        uint32_t *lp = (uint32_t *)optp;
                        if ((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) {
                                sack_numblks--;
                                *lp++ = htonl(tp->rcv_dsack_block.left);
                                *lp++ = htonl(tp->rcv_dsack_block.right);
                                tp->rcv_sack_flags &= ~TCPSACK_HAVED;
                        }
                        for (tiqe = TAILQ_FIRST(&tp->timeq);
                            sack_numblks > 0;
                            tiqe = TAILQ_NEXT(tiqe, ipqe_timeq)) {
                                KASSERT(tiqe != NULL);
                                sack_numblks--;
                                *lp++ = htonl(tiqe->ipqe_seq);
                                *lp++ = htonl(tiqe->ipqe_seq + tiqe->ipqe_len +
                                    ((tiqe->ipqe_flags & TH_FIN) != 0 ? 1 : 0));
                        }
                        optlen += sack_len + 2;
                        optp += sack_len;
                } else {
                        optp -= alen;
                        optlen -= alen;
                }
        }

        /* Terminate and pad TCP options to a 4 byte boundary. */
        if (optlen % 4) {
                if (!OPT_FITS(TCPOLEN_EOL)) {
reset:                        TCP_REASS_UNLOCK(tp);
                        error = ECONNABORTED;
                        goto out;
                }
                optlen += TCPOLEN_EOL;
                *optp++ = TCPOPT_EOL;
        }
        /*
         * According to RFC 793 (STD0007):
         *   "The content of the header beyond the End-of-Option option
         *    must be header padding (i.e., zero)."
         *   and later: "The padding is composed of zeros."
         */
        while (optlen % 4) {
                if (!OPT_FITS(TCPOLEN_PAD))
                        goto reset;
                optlen += TCPOLEN_PAD;
                *optp++ = TCPOPT_PAD;
        }

        TCP_REASS_UNLOCK(tp);

        hdrlen += optlen;

#ifdef DIAGNOSTIC
        if (!use_tso && len > txsegsize)
                panic("tcp data to be sent is larger than segment");
        else if (use_tso && len > IP_MAXPACKET)
                panic("tcp data to be sent is larger than max TSO size");
        if (max_linkhdr + hdrlen > MCLBYTES)
                panic("tcphdr too big");
#endif

        /*
         * Grab a header mbuf, attaching a copy of data to
         * be transmitted, and initialize the header from
         * the template for sends on this connection.
         */
        if (len) {
                error = tcp_build_datapkt(tp, so, off, len, hdrlen, &m);
                if (error)
                        goto out;
                /*
                 * If we're sending everything we've got, set PUSH.
                 * (This will keep happy those implementations which only
                 * give data to the user when a buffer fills or
                 * a PUSH comes in.)
                 */
                if (off + len == so->so_snd.sb_cc)
                        flags |= TH_PUSH;
        } else {
                tcps = TCP_STAT_GETREF();
                if (tp->t_flags & TF_ACKNOW)
                        tcps[TCP_STAT_SNDACKS]++;
                else if (flags & (TH_SYN|TH_FIN|TH_RST))
                        tcps[TCP_STAT_SNDCTRL]++;
                else if (SEQ_GT(tp->snd_up, tp->snd_una))
                        tcps[TCP_STAT_SNDURG]++;
                else
                        tcps[TCP_STAT_SNDWINUP]++;
                TCP_STAT_PUTREF();

                MGETHDR(m, M_DONTWAIT, MT_HEADER);
                if (m != NULL && max_linkhdr + hdrlen > MHLEN) {
                        MCLGET(m, M_DONTWAIT);
                        if ((m->m_flags & M_EXT) == 0) {
                                m_freem(m);
                                m = NULL;
                        }
                }
                if (m == NULL) {
                        error = ENOBUFS;
                        goto out;
                }
                MCLAIM(m, &tcp_tx_mowner);
                m->m_data += max_linkhdr;
                m->m_len = hdrlen;
        }
        m_reset_rcvif(m);
        switch (af) {
        case AF_INET:
                ip = mtod(m, struct ip *);
#ifdef INET6
                ip6 = NULL;
#endif
                th = (struct tcphdr *)(ip + 1);
                break;
#ifdef INET6
        case AF_INET6:
                ip = NULL;
                ip6 = mtod(m, struct ip6_hdr *);
                th = (struct tcphdr *)(ip6 + 1);
                break;
#endif
        default:        /*pacify gcc*/
                ip = NULL;
#ifdef INET6
                ip6 = NULL;
#endif
                th = NULL;
                break;
        }
        if (tp->t_template == NULL)
                panic("%s: no template", __func__);
        if (tp->t_template->m_len < iphdrlen)
                panic("%s: %d < %d", __func__, tp->t_template->m_len, iphdrlen);
        bcopy(mtod(tp->t_template, void *), mtod(m, void *), iphdrlen);

        /*
         * If we are starting a connection, send ECN setup
         * SYN packet. If we are on a retransmit, we may
         * resend those bits a number of times as per
         * RFC 3168.
         */
        if (tp->t_state == TCPS_SYN_SENT && tcp_do_ecn) {
                if (tp->t_flags & TF_SYN_REXMT) {
                        if (tp->t_ecn_retries--)
                                flags |= TH_ECE|TH_CWR;
                } else {
                        flags |= TH_ECE|TH_CWR;
                        tp->t_ecn_retries = tcp_ecn_maxretries;
                }
        }

        if (TCP_ECN_ALLOWED(tp)) {
                /*
                 * If the peer has ECN, mark data packets
                 * ECN capable. Ignore pure ack packets, retransmissions
                 * and window probes.
                 */
                if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
                    !(tp->t_force && len == 1)) {
                        ecn_tos = IPTOS_ECN_ECT0;
                        TCP_STATINC(TCP_STAT_ECN_ECT);
                }

                /*
                 * Reply with proper ECN notifications.
                 */
                if (tp->t_flags & TF_ECN_SND_CWR) {
                        flags |= TH_CWR;
                        tp->t_flags &= ~TF_ECN_SND_CWR;
                }
                if (tp->t_flags & TF_ECN_SND_ECE) {
                        flags |= TH_ECE;
                }
        }

        /*
         * If we are doing retransmissions, then snd_nxt will
         * not reflect the first unsent octet.  For ACK only
         * packets, we do not want the sequence number of the
         * retransmitted packet, we want the sequence number
         * of the next unsent octet.  So, if there is no data
         * (and no SYN or FIN), use snd_max instead of snd_nxt
         * when filling in ti_seq.  But if we are in persist
         * state, snd_max might reflect one byte beyond the
         * right edge of the window, so use snd_nxt in that
         * case, since we know we aren't doing a retransmission.
         * (retransmit and persist are mutually exclusive...)
         */
        if (TCP_SACK_ENABLED(tp) && sack_rxmit) {
                th->th_seq = htonl(p->rxmit);
                p->rxmit += len;
        } else {
                if (len || (flags & (TH_SYN|TH_FIN)) ||
                    TCP_TIMER_ISARMED(tp, TCPT_PERSIST))
                        th->th_seq = htonl(tp->snd_nxt);
                else
                        th->th_seq = htonl(tp->snd_max);
        }
        th->th_ack = htonl(tp->rcv_nxt);
        if (optlen) {
                memcpy(th + 1, opt, optlen);
                th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
        }
        th->th_flags = flags;
        /*
         * Calculate receive window.  Don't shrink window,
         * but avoid silly window syndrome.
         */
        if (win < (long)(so->so_rcv.sb_hiwat / 4) && win < (long)rxsegsize)
                win = 0;
        if (win > (long)TCP_MAXWIN << tp->rcv_scale)
                win = (long)TCP_MAXWIN << tp->rcv_scale;
        if (win < (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt))
                win = (long)(int32_t)(tp->rcv_adv - tp->rcv_nxt);
        th->th_win = htons((u_int16_t) (win>>tp->rcv_scale));
        if (th->th_win == 0) {
                tp->t_sndzerowin++;
        }
        if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
                u_int32_t urp = tp->snd_up - tp->snd_nxt;
                if (urp > IP_MAXPACKET)
                        urp = IP_MAXPACKET;
                th->th_urp = htons((u_int16_t)urp);
                th->th_flags |= TH_URG;
        } else
                /*
                 * If no urgent pointer to send, then we pull
                 * the urgent pointer to the left edge of the send window
                 * so that it doesn't drift into the send window on sequence
                 * number wraparound.
                 */
                tp->snd_up = tp->snd_una;                /* drag it along */

#ifdef TCP_SIGNATURE
        if (sigoff && (tp->t_flags & TF_SIGNATURE)) {
                struct secasvar *sav;
                u_int8_t *sigp;

                sav = tcp_signature_getsav(m);
                if (sav == NULL) {
                        if (m)
                                m_freem(m);
                        return EPERM;
                }

                m->m_pkthdr.len = hdrlen + len;
                sigp = (char *)th + sizeof(*th) + sigoff;
                tcp_signature(m, th, (char *)th - mtod(m, char *), sav, sigp);

                key_sa_recordxfer(sav, m);
                KEY_SA_UNREF(&sav);
        }
#endif

        /*
         * Set ourselves up to be checksummed just before the packet
         * hits the wire.
         */
        switch (af) {
        case AF_INET:
                m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
                if (use_tso) {
                        m->m_pkthdr.segsz = txsegsize;
                        m->m_pkthdr.csum_flags = M_CSUM_TSOv4;
                } else {
                        m->m_pkthdr.csum_flags = M_CSUM_TCPv4;
                        if (len + optlen) {
                                /* Fixup the pseudo-header checksum. */
                                /* XXXJRT Not IP Jumbogram safe. */
                                th->th_sum = in_cksum_addword(th->th_sum,
                                    htons((u_int16_t) (len + optlen)));
                        }
                }
                break;
#ifdef INET6
        case AF_INET6:
                m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
                if (use_tso) {
                        m->m_pkthdr.segsz = txsegsize;
                        m->m_pkthdr.csum_flags = M_CSUM_TSOv6;
                } else {
                        m->m_pkthdr.csum_flags = M_CSUM_TCPv6;
                        if (len + optlen) {
                                /* Fixup the pseudo-header checksum. */
                                /* XXXJRT: Not IPv6 Jumbogram safe. */
                                th->th_sum = in_cksum_addword(th->th_sum,
                                    htons((u_int16_t) (len + optlen)));
                        }
                }
                break;
#endif
        }

        /*
         * In transmit state, time the transmission and arrange for
         * the retransmit.  In persist state, just set snd_max.
         */
        if (tp->t_force == 0 || TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0) {
                tcp_seq startseq = tp->snd_nxt;

                /*
                 * Advance snd_nxt over sequence space of this segment.
                 * There are no states in which we send both a SYN and a FIN,
                 * so we collapse the tests for these flags.
                 */
                if (flags & (TH_SYN|TH_FIN))
                        tp->snd_nxt++;
                if (sack_rxmit)
                        goto timer;
                tp->snd_nxt += len;
                if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
                        tp->snd_max = tp->snd_nxt;
                        /*
                         * Time this transmission if not a retransmission and
                         * not currently timing anything.
                         */
                        if (tp->t_rtttime == 0) {
                                tp->t_rtttime = tcp_now;
                                tp->t_rtseq = startseq;
                                TCP_STATINC(TCP_STAT_SEGSTIMED);
                        }
                }

                /*
                 * Set retransmit timer if not currently set,
                 * and not doing an ack or a keep-alive probe.
                 * Initial value for retransmit timer is smoothed
                 * round-trip time + 2 * round-trip time variance.
                 * Initialize shift counter which is used for backoff
                 * of retransmit time.
                 */
timer:
                if (TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0) {
                        if ((sack_rxmit && tp->snd_nxt != tp->snd_max)
                            || tp->snd_nxt != tp->snd_una) {
                                if (TCP_TIMER_ISARMED(tp, TCPT_PERSIST)) {
                                        TCP_TIMER_DISARM(tp, TCPT_PERSIST);
                                        tp->t_rxtshift = 0;
                                }
                                TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
                        } else if (len == 0 && so->so_snd.sb_cc > 0
                            && TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0) {
                                /*
                                 * If we are sending a window probe and there's
                                 * unacked data in the socket, make sure at
                                 * least the persist timer is running.
                                 */
                                tp->t_rxtshift = 0;
                                tcp_setpersist(tp);
                        }
                }
        } else
                if (SEQ_GT(tp->snd_nxt + len, tp->snd_max))
                        tp->snd_max = tp->snd_nxt + len;

#ifdef TCP_DEBUG
        /*
         * Trace.
         */
        if (so->so_options & SO_DEBUG)
                tcp_trace(TA_OUTPUT, tp->t_state, tp, m, 0);
#endif

        /*
         * Fill in IP length and desired time to live and
         * send to IP level.  There should be a better way
         * to handle ttl and tos; we could keep them in
         * the template, but need a way to checksum without them.
         */
        m->m_pkthdr.len = hdrlen + len;

        switch (af) {
        case AF_INET:
                ip->ip_len = htons(m->m_pkthdr.len);
                packetlen = m->m_pkthdr.len;
                if (tp->t_inpcb) {
                        ip->ip_ttl = tp->t_inpcb->inp_ip.ip_ttl;
                        ip->ip_tos = tp->t_inpcb->inp_ip.ip_tos | ecn_tos;
                }
#ifdef INET6
                else if (tp->t_in6pcb) {
                        ip->ip_ttl = in6_selecthlim(tp->t_in6pcb, NULL); /*XXX*/
                        ip->ip_tos = ecn_tos;        /*XXX*/
                }
#endif
                break;
#ifdef INET6
        case AF_INET6:
                packetlen = m->m_pkthdr.len;
                ip6->ip6_nxt = IPPROTO_TCP;
                if (tp->t_in6pcb) {
                        /*
                         * we separately set hoplimit for every segment, since
                         * the user might want to change the value via
                         * setsockopt. Also, desired default hop limit might
                         * be changed via Neighbor Discovery.
                         */
                        ip6->ip6_hlim = in6_selecthlim_rt(tp->t_in6pcb);
                }
                ip6->ip6_flow |= htonl(ecn_tos << 20);
                /* ip6->ip6_flow = ??? (from template) */
                /* ip6_plen will be filled in ip6_output(). */
                break;
#endif
        default:        /*pacify gcc*/
                packetlen = 0;
                break;
        }

        switch (af) {
        case AF_INET:
            {
                struct mbuf *opts;

                if (tp->t_inpcb)
                        opts = tp->t_inpcb->inp_options;
                else
                        opts = NULL;
                error = ip_output(m, opts, ro,
                        (tp->t_mtudisc ? IP_MTUDISC : 0) |
                        (so->so_options & SO_DONTROUTE), NULL, tp->t_inpcb);
                break;
            }
#ifdef INET6
        case AF_INET6:
            {
                struct ip6_pktopts *opts;

                if (tp->t_in6pcb)
                        opts = tp->t_in6pcb->in6p_outputopts;
                else
                        opts = NULL;
                error = ip6_output(m, opts, ro, so->so_options & SO_DONTROUTE,
                        NULL, tp->t_in6pcb, NULL);
                break;
            }
#endif
        default:
                error = EAFNOSUPPORT;
                break;
        }
        if (error) {
out:
                if (error == ENOBUFS) {
                        TCP_STATINC(TCP_STAT_SELFQUENCH);
                        if (tp->t_inpcb)
                                tcp_quench(tp->t_inpcb);
#ifdef INET6
                        if (tp->t_in6pcb)
                                tcp6_quench(tp->t_in6pcb);
#endif
                        error = 0;
                } else if ((error == EHOSTUNREACH || error == ENETDOWN) &&
                    TCPS_HAVERCVDSYN(tp->t_state)) {
                        tp->t_softerror = error;
                        error = 0;
                }

                /* Back out the sequence number advance. */
                if (sack_rxmit)
                        p->rxmit -= len;

                /* Restart the delayed ACK timer, if necessary. */
                if (tp->t_flags & TF_DELACK)
                        TCP_RESTART_DELACK(tp);

                return error;
        }

        if (packetlen > tp->t_pmtud_mtu_sent)
                tp->t_pmtud_mtu_sent = packetlen;

        tcps = TCP_STAT_GETREF();
        tcps[TCP_STAT_SNDTOTAL]++;
        if (tp->t_flags & TF_DELACK)
                tcps[TCP_STAT_DELACK]++;
        TCP_STAT_PUTREF();

        /*
         * Data sent (as far as we can tell).
         * If this advertises a larger window than any other segment,
         * then remember the size of the advertised window.
         * Any pending ACK has now been sent.
         */
        if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
                tp->rcv_adv = tp->rcv_nxt + win;
        tp->last_ack_sent = tp->rcv_nxt;
        tp->t_flags &= ~TF_ACKNOW;
        TCP_CLEAR_DELACK(tp);
#ifdef DIAGNOSTIC
        if (maxburst < 0)
                printf("tcp_output: maxburst exceeded by %d\n", -maxburst);
#endif
        if (sendalot && (tp->t_congctl == &tcp_reno_ctl || --maxburst))
                goto again;
        return 0;
}

void
tcp_setpersist(struct tcpcb *tp)
{
        int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> (1 + 2);
        int nticks;

        if (TCP_TIMER_ISARMED(tp, TCPT_REXMT))
                panic("tcp_output REXMT");
        /*
         * Start/restart persistance timer.
         */
        if (t < tp->t_rttmin)
                t = tp->t_rttmin;
        TCPT_RANGESET(nticks, t * tcp_backoff[tp->t_rxtshift],
            TCPTV_PERSMIN, TCPTV_PERSMAX);
        TCP_TIMER_ARM(tp, TCPT_PERSIST, nticks);
        if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
                tp->t_rxtshift++;
}































































    6 









    5 

    5 





    4 






    4 


    4 



    4 



    4 


    4 








    4 




    4 





    3 























    3 



    2 



















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
/*        $NetBSD: kern_mod_80.c,v 1.6 2019/12/12 02:15:42 pgoyette Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * System calls relating to loadable modules.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_mod_80.c,v 1.6 2019/12/12 02:15:42 pgoyette Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
#include "opt_modular.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/namei.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/kobj.h>
#include <sys/module.h>
#include <sys/syscall.h>
#include <sys/syscallargs.h>
#include <sys/compat_stub.h>

#include <compat/sys/module.h>

#include <compat/common/compat_mod.h>

static int
compat_80_modstat(int cmd, struct iovec *iov, void *arg)
{
        omodstat_t *oms, *omso;
        modinfo_t *mi;
        module_t *mod;
        vaddr_t addr;
        size_t size;
        size_t omslen;
        size_t used;
        int error;
        int omscnt;
        bool stataddr;
        const char *suffix = "...";

        if (cmd != MODCTL_OSTAT)
                return EINVAL;

        error = copyin(arg, iov, sizeof(*iov));
        if (error != 0) {
                return error;
        }

        /* If not privileged, don't expose kernel addresses. */
        error = kauth_authorize_system(kauth_cred_get(), KAUTH_SYSTEM_MODULE,
            0, (void *)(uintptr_t)MODCTL_STAT, NULL, NULL);
        stataddr = (error == 0);

        kernconfig_lock();
        omscnt = 0;
        TAILQ_FOREACH(mod, &module_list, mod_chain) {
                omscnt++;
                mi = mod->mod_info;
        }
        TAILQ_FOREACH(mod, &module_builtins, mod_chain) {
                omscnt++;
                mi = mod->mod_info;
        }
        omslen = omscnt * sizeof(omodstat_t);
        omso = kmem_zalloc(omslen, KM_SLEEP);
        oms = omso;
        TAILQ_FOREACH(mod, &module_list, mod_chain) {
                mi = mod->mod_info;
                strlcpy(oms->oms_name, mi->mi_name, sizeof(oms->oms_name));
                if (mi->mi_required != NULL) {
                        used = strlcpy(oms->oms_required, mi->mi_required,
                            sizeof(oms->oms_required));
                        if (used >= sizeof(oms->oms_required)) {
                                oms->oms_required[sizeof(oms->oms_required) -
                                    strlen(suffix) - 1] = '\0';
                                strlcat(oms->oms_required, suffix,
                                    sizeof(oms->oms_required));
                        }
                }
                if (mod->mod_kobj != NULL && stataddr) {
                        kobj_stat(mod->mod_kobj, &addr, &size);
                        oms->oms_addr = addr;
                        oms->oms_size = size;
                }
                oms->oms_class = mi->mi_class;
                oms->oms_refcnt = mod->mod_refcnt;
                oms->oms_source = mod->mod_source;
                oms->oms_flags = mod->mod_flags;
                oms++;
        }
        TAILQ_FOREACH(mod, &module_builtins, mod_chain) {
                mi = mod->mod_info;
                strlcpy(oms->oms_name, mi->mi_name, sizeof(oms->oms_name));
                if (mi->mi_required != NULL) {
                        used = strlcpy(oms->oms_required, mi->mi_required,
                            sizeof(oms->oms_required));
                        if (used >= sizeof(oms->oms_required)) {
                                oms->oms_required[sizeof(oms->oms_required) -
                                    strlen(suffix) - 1] = '\0';
                                strlcat(oms->oms_required, suffix,
                                    sizeof(oms->oms_required));
                        }
                }
                if (mod->mod_kobj != NULL && stataddr) {
                        kobj_stat(mod->mod_kobj, &addr, &size);
                        oms->oms_addr = addr;
                        oms->oms_size = size;
                }
                oms->oms_class = mi->mi_class;
                oms->oms_refcnt = -1;
                KASSERT(mod->mod_source == MODULE_SOURCE_KERNEL);
                oms->oms_source = mod->mod_source;
                oms++;
        }
        kernconfig_unlock();
        error = copyout(omso, iov->iov_base, uimin(omslen, iov->iov_len));
        kmem_free(omso, omslen);
        if (error == 0) {
                iov->iov_len = omslen;
                error = copyout(iov, arg, sizeof(*iov));
        }

        return error;
}

void
kern_mod_80_init(void)
{

        MODULE_HOOK_SET(compat_modstat_80_hook, compat_80_modstat);
}

void
kern_mod_80_fini(void)
{

        MODULE_HOOK_UNSET(compat_modstat_80_hook);
}

















































































































































    3 

    2 















    2 


    2 





    3 
    3 












    1 






    1 


    1 








    1 












   11 






   11 






   10 












    1 











    1 












    2 











    2 







































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
/*        $NetBSD: sysmon.c,v 1.32 2022/03/28 12:33:21 riastradh Exp $        */

/*-
 * Copyright (c) 2000 Zembu Labs, Inc.
 * All rights reserved.
 *
 * Author: Jason R. Thorpe <thorpej@zembu.com>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by Zembu Labs, Inc.
 * 4. Neither the name of Zembu Labs nor the names of its employees may
 *    be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ZEMBU LABS, INC. ``AS IS'' AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WAR-
 * RANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DIS-
 * CLAIMED.  IN NO EVENT SHALL ZEMBU LABS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Clearing house for system monitoring hardware.  We currently
 * handle environmental sensors, watchdog timers, and power management.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sysmon.c,v 1.32 2022/03/28 12:33:21 riastradh Exp $");

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/errno.h>
#include <sys/fcntl.h>
#include <sys/callout.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/device.h>
#include <sys/once.h>

#include <dev/sysmon/sysmonvar.h>

dev_type_open(sysmonopen);
dev_type_close(sysmonclose);
dev_type_ioctl(sysmonioctl);
dev_type_read(sysmonread);
dev_type_poll(sysmonpoll);
dev_type_kqfilter(sysmonkqfilter);

const struct cdevsw sysmon_cdevsw = {
        .d_open = sysmonopen,
        .d_close = sysmonclose,
        .d_read = sysmonread,
        .d_write = nowrite,
        .d_ioctl = sysmonioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = sysmonpoll,
        .d_mmap = nommap,
        .d_kqfilter = sysmonkqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER | D_MPSAFE
};

static int        sysmon_modcmd(modcmd_t, void *);
static int        sm_init_once(void);

/*
 * Info about our minor "devices"
 */
static struct sysmon_opvec        *sysmon_opvec_table[] = { NULL, NULL, NULL };
static int                        sysmon_refcnt[] = { 0, 0, 0 };
static const char                *sysmon_mod[] = { "sysmon_envsys",
                                                  "sysmon_wdog",
                                                  "sysmon_power" };
static kmutex_t sysmon_minor_mtx;

#ifdef _MODULE
static bool        sm_is_attached;
#endif

ONCE_DECL(once_sm);

/*
 * sysmon_attach_minor
 *
 *        Attach a minor device for wdog, power, or envsys.  Manage a
 *        reference count so we can prevent the device from being
 *        detached if there are still users with the minor device opened.
 *
 *        If the opvec argument is NULL, this is a request to detach the
 *        minor device - make sure the refcnt is zero!
 */
int
sysmon_attach_minor(int minor, struct sysmon_opvec *opvec)
{
        int ret;

        mutex_enter(&sysmon_minor_mtx);
        if (opvec) {
                if (sysmon_opvec_table[minor] == NULL) {
                        sysmon_refcnt[minor] = 0;
                        sysmon_opvec_table[minor] = opvec;
                        ret = 0;
                } else
                        ret = EEXIST;
        } else {
                if (sysmon_refcnt[minor] == 0) {
                        sysmon_opvec_table[minor] = NULL;
                        ret = 0;
                } else
                        ret = EBUSY;
        }

        mutex_exit(&sysmon_minor_mtx);
        return ret;
}

/*
 * sysmonopen:
 *
 *        Open the system monitor device.
 */
int
sysmonopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        int error;

        mutex_enter(&sysmon_minor_mtx);

        switch (minor(dev)) {
        case SYSMON_MINOR_ENVSYS:
        case SYSMON_MINOR_WDOG:
        case SYSMON_MINOR_POWER:
                if (sysmon_opvec_table[minor(dev)] == NULL) {
                        mutex_exit(&sysmon_minor_mtx);
                        error = module_autoload(sysmon_mod[minor(dev)],
                            MODULE_CLASS_DRIVER);
                        if (error)
                                return error;
                        mutex_enter(&sysmon_minor_mtx);
                        if (sysmon_opvec_table[minor(dev)] == NULL) {
                                error = ENODEV;
                                break;
                        }
                }
                error = (sysmon_opvec_table[minor(dev)]->so_open)(dev, flag,
                    mode, l);
                if (error == 0)
                        sysmon_refcnt[minor(dev)]++;
                break;
        default:
                error = ENODEV;
        }

        mutex_exit(&sysmon_minor_mtx);
        return error;
}

/*
 * sysmonclose:
 *
 *        Close the system monitor device.
 */
int
sysmonclose(dev_t dev, int flag, int mode, struct lwp *l)
{
        int error;

        switch (minor(dev)) {
        case SYSMON_MINOR_ENVSYS:
        case SYSMON_MINOR_WDOG:
        case SYSMON_MINOR_POWER:
                if (sysmon_opvec_table[minor(dev)] == NULL)
                        error = ENODEV;
                else {
                        error = (sysmon_opvec_table[minor(dev)]->so_close)(dev,
                            flag, mode, l);
                        if (error == 0) {
                                sysmon_refcnt[minor(dev)]--;
                                KASSERT(sysmon_refcnt[minor(dev)] >= 0);
                        }
                }
                break;
        default:
                error = ENODEV;
        }

        return (error);
}

/*
 * sysmonioctl:
 *
 *        Perform a control request.
 */
int
sysmonioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        int error;

        switch (minor(dev)) {
        case SYSMON_MINOR_ENVSYS:
        case SYSMON_MINOR_WDOG:
        case SYSMON_MINOR_POWER:
                if (sysmon_opvec_table[minor(dev)] == NULL)
                        error = ENODEV;
                else
                        error = (sysmon_opvec_table[minor(dev)]->so_ioctl)(dev,
                            cmd, data, flag, l);
                break;
        default:
                error = ENODEV;
        }

        return (error);
}

/*
 * sysmonread:
 *
 *        Perform a read request.
 */
int
sysmonread(dev_t dev, struct uio *uio, int flags)
{
        int error;

        switch (minor(dev)) {
        case SYSMON_MINOR_POWER:
                if (sysmon_opvec_table[minor(dev)] == NULL)
                        error = ENODEV;
                else
                        error = (sysmon_opvec_table[minor(dev)]->so_read)(dev,
                            uio, flags);
                break;
        default:
                error = ENODEV;
        }

        return (error);
}

/*
 * sysmonpoll:
 *
 *        Poll the system monitor device.
 */
int
sysmonpoll(dev_t dev, int events, struct lwp *l)
{
        int rv;

        switch (minor(dev)) {
        case SYSMON_MINOR_POWER:
                if (sysmon_opvec_table[minor(dev)] == NULL)
                        rv = events;
                else
                        rv = (sysmon_opvec_table[minor(dev)]->so_poll)(dev,
                            events, l);
                break;
        default:
                rv = events;
        }

        return (rv);
}

/*
 * sysmonkqfilter:
 *
 *        Kqueue filter for the system monitor device.
 */
int
sysmonkqfilter(dev_t dev, struct knote *kn)
{
        int error;

        switch (minor(dev)) {
        case SYSMON_MINOR_POWER:
                if (sysmon_opvec_table[minor(dev)] == NULL)
                        error = ENODEV;
                else
                        error = (sysmon_opvec_table[minor(dev)]->so_filter)(dev,
                            kn);
                break;
        default:
                error = 1;
        }

        return (error);
}

MODULE(MODULE_CLASS_DRIVER, sysmon, NULL);

static int
sm_init_once(void)
{

        mutex_init(&sysmon_minor_mtx, MUTEX_DEFAULT, IPL_NONE);

        return 0;
}

int
sysmon_init(void)
{
        int error;
#ifdef _MODULE
        devmajor_t bmajor, cmajor;
#endif

        error = RUN_ONCE(&once_sm, sm_init_once);

#ifdef _MODULE
        mutex_enter(&sysmon_minor_mtx);
        if (!sm_is_attached) {
                bmajor = cmajor = -1;
                error = devsw_attach("sysmon", NULL, &bmajor,
                                &sysmon_cdevsw, &cmajor);
                sm_is_attached = (error != 0);
        }
        mutex_exit(&sysmon_minor_mtx);
#endif

        return error;
}

int
sysmon_fini(void)
{
        int error = 0;

        if ((sysmon_opvec_table[SYSMON_MINOR_ENVSYS] != NULL) ||
            (sysmon_opvec_table[SYSMON_MINOR_WDOG] != NULL) ||
            (sysmon_opvec_table[SYSMON_MINOR_POWER] != NULL))
                error = EBUSY;

#ifdef _MODULE
        if (error == 0) {
                mutex_enter(&sysmon_minor_mtx);
                sm_is_attached = false;
                devsw_detach(NULL, &sysmon_cdevsw);
                mutex_exit(&sysmon_minor_mtx);
        }
#endif

        return error;
}

static int
sysmon_modcmd(modcmd_t cmd, void *arg)
{
        int ret;

        switch (cmd) {
        case MODULE_CMD_INIT:
                ret = sysmon_init();
                break;
        case MODULE_CMD_FINI:
                ret = sysmon_fini();
                break;
        case MODULE_CMD_STAT:
        default:
                ret = ENOTTY;
        }

        return ret;
}



















































































































  466 







  466 
  435 
  465 
   66 

  426 
  465 

  432 


  428 
  260 
  428 








  432 





  433 
  433 



  433 

  433 


  433 

  430 


   14 



  433 











  433 






  433 


  433 

  248 
  459 














   50 


















   49 
   50 

   42 


   50 







   49 
   43 

   50 


   50 
   50 





































   50 

   50 
    5 

    4 
    4 

    4 
    4 


   46 

    4 



   42 


   40 


   41 








   41 
   10 
    9 
    7 



    1 







    1 







   10 





    9 


    9 














   30 

   38 





   15 
   18 




   19 
   19 
   18 




   16 
   16 








   36 

   36 
   35 


















   33 
   33 
   33 

   12 
   12 


   33 
   33 
   24 

   16 


   33 

   26 











   31 

   31 


   31 

   31 


   31 
   30 
   31 






   31 


   30 
   31 
   30 
   15 
   11 







   15 



   15 









   13 
    9 
    9 

    9 
    9 


   31 






   24 
   22 
   22 


   15 
   15 







   15 
   15 
   15 

   22 






   12 
   12 






   12 
   12 
   12 
   12 
   12 

   12 





    8 








    8 

    8 



   30 
   29 
   30 



   30 
   30 


   30 

   14 


   30 














   30 
   29 
   30 
   30 



   30 

   23 

















   15 









   15 




















   12 

   15 
    7 
   15 













   15 


















   13 









   15 



   15 
    7 

    7 
    7 
    7 













   15 
   15 
   15 


   14 








   14 



   11 











   10 
   14 


   14 
   14 





   13 

    5 
    5 
    4 






   13 



   13 
    7 
   11 

   11 














  542 


  541 


  492 

  391 

  391 
  391 

  490 
  447 
  446 

  447 
  447 

  447 

  490 
  476 

  476 
  476 

  491 
  391 
  490 
  476 
  490 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
/*        $NetBSD: ffs_inode.c,v 1.131 2020/07/31 04:07:30 chs Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ffs_inode.c        8.13 (Berkeley) 4/21/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_inode.c,v 1.131 2020/07/31 04:07:30 chs Exp $");

#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
#include "opt_quota.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/file.h>
#include <sys/fstrans.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/trace.h>
#include <sys/vnode.h>
#include <sys/wapbl.h>

#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_wapbl.h>

#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>

static int ffs_indirtrunc(struct inode *, daddr_t, daddr_t, daddr_t, int,
                          int64_t *);

/*
 * Update the access, modified, and inode change times as specified
 * by the IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively.
 * The IN_MODIFIED flag is used to specify that the inode needs to be
 * updated but that the times have already been set. The access
 * and modified times are taken from the second and third parameters;
 * the inode change time is always taken from the current time. If
 * UPDATE_WAIT flag is set, or UPDATE_DIROP is set then wait for the
 * disk write of the inode to complete.
 */

int
ffs_update(struct vnode *vp, const struct timespec *acc,
    const struct timespec *mod, int updflags)
{
        struct fs *fs;
        struct buf *bp;
        struct inode *ip;
        int error;
        void *cp;
        int waitfor, flags;

        if (vp->v_mount->mnt_flag & MNT_RDONLY)
                return (0);
        ip = VTOI(vp);
        FFS_ITIMES(ip, acc, mod, NULL);
        if (updflags & UPDATE_CLOSE)
                flags = ip->i_flag & (IN_MODIFIED | IN_ACCESSED);
        else
                flags = ip->i_flag & IN_MODIFIED;
        if (flags == 0)
                return (0);
        fs = ip->i_fs;

        if ((flags & IN_MODIFIED) != 0 &&
            (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) {
                waitfor = updflags & UPDATE_WAIT;
                if ((updflags & UPDATE_DIROP) != 0)
                        waitfor |= UPDATE_WAIT;
        } else
                waitfor = 0;

        /*
         * Ensure that uid and gid are correct. This is a temporary
         * fix until fsck has been changed to do the update.
         */
        if (fs->fs_magic == FS_UFS1_MAGIC &&                        /* XXX */
            fs->fs_old_inodefmt < FS_44INODEFMT) {                /* XXX */
                ip->i_ffs1_ouid = ip->i_uid;        /* XXX */
                ip->i_ffs1_ogid = ip->i_gid;        /* XXX */
        }                                                        /* XXX */
        error = bread(ip->i_devvp,
                      FFS_FSBTODB(fs, ino_to_fsba(fs, ip->i_number)),
                      (int)fs->fs_bsize, B_MODIFY, &bp);
        if (error) {
                return (error);
        }
        ip->i_flag &= ~(IN_MODIFIED | IN_ACCESSED);
        /* Keep unlinked inode list up to date */
        KDASSERTMSG(DIP(ip, nlink) == ip->i_nlink,
            "DIP(ip, nlink) [%d] == ip->i_nlink [%d]",
            DIP(ip, nlink), ip->i_nlink);
        if (ip->i_mode) {
                if (ip->i_nlink > 0) {
                        UFS_WAPBL_UNREGISTER_INODE(ip->i_ump->um_mountp,
                            ip->i_number, ip->i_mode);
                } else {
                        UFS_WAPBL_REGISTER_INODE(ip->i_ump->um_mountp,
                            ip->i_number, ip->i_mode);
                }
        }
        if (fs->fs_magic == FS_UFS1_MAGIC) {
                cp = (char *)bp->b_data +
                    (ino_to_fsbo(fs, ip->i_number) * DINODE1_SIZE);
#ifdef FFS_EI
                if (UFS_FSNEEDSWAP(fs))
                        ffs_dinode1_swap(ip->i_din.ffs1_din,
                            (struct ufs1_dinode *)cp);
                else
#endif
                        memcpy(cp, ip->i_din.ffs1_din, DINODE1_SIZE);
        } else {
                cp = (char *)bp->b_data +
                    (ino_to_fsbo(fs, ip->i_number) * DINODE2_SIZE);
#ifdef FFS_EI
                if (UFS_FSNEEDSWAP(fs))
                        ffs_dinode2_swap(ip->i_din.ffs2_din,
                            (struct ufs2_dinode *)cp);
                else
#endif
                        memcpy(cp, ip->i_din.ffs2_din, DINODE2_SIZE);
        }
        if (waitfor) {
                return (bwrite(bp));
        } else {
                bdwrite(bp);
                return (0);
        }
}

#define        SINGLE        0        /* index of single indirect block */
#define        DOUBLE        1        /* index of double indirect block */
#define        TRIPLE        2        /* index of triple indirect block */
/*
 * Truncate the inode oip to at most length size, freeing the
 * disk blocks.
 */
int
ffs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
{
        daddr_t lastblock;
        struct inode *oip = VTOI(ovp);
        struct mount *omp = ovp->v_mount;
        daddr_t bn, lastiblock[UFS_NIADDR], indir_lbn[UFS_NIADDR];
        daddr_t blks[UFS_NDADDR + UFS_NIADDR], oldblks[UFS_NDADDR + UFS_NIADDR];
        struct fs *fs;
        int extblocks;
        int offset, pgoffset, level;
        int64_t blocksreleased = 0, datablocks;
        int i, aflag, nblocks;
        int error, allerror = 0;
        off_t osize;
        int sync;
        struct ufsmount *ump = oip->i_ump;
        void *dcookie;
        long bsize;
        bool wapbl = omp->mnt_wapbl != NULL;

        UFS_WAPBL_JLOCK_ASSERT(ump->um_mountp);

        if (ovp->v_type == VCHR || ovp->v_type == VBLK ||
            ovp->v_type == VFIFO || ovp->v_type == VSOCK) {
                KASSERT(oip->i_size == 0);
                return 0;
        }

        if (length < 0)
                return (EINVAL);

        /*
         * Historically clients did not have to specify which data
         * they were truncating. So, if not specified, we assume
         * traditional behavior, e.g., just the normal data.
         */
        if ((ioflag & (IO_EXT | IO_NORMAL)) == 0)
                ioflag |= IO_NORMAL;

        fs = oip->i_fs;
#define i_din2 i_din.ffs2_din
        extblocks = 0;
        datablocks = DIP(oip, blocks);
        if (fs->fs_magic == FS_UFS2_MAGIC && oip->i_din2->di_extsize > 0) {
                extblocks = btodb(ffs_fragroundup(fs, oip->i_din2->di_extsize));
                datablocks -= extblocks;
        }
        if ((ioflag & IO_EXT) && extblocks > 0) {
                if (length != 0)
                        panic("ffs_truncate: partial trunc of extdata");
                {
#ifdef QUOTA
                        (void) chkdq(oip, -extblocks, NOCRED, FORCE);
#endif
                        osize = oip->i_din2->di_extsize;
                        oip->i_din2->di_blocks -= extblocks;
                        oip->i_din2->di_extsize = 0;
                        for (i = 0; i < UFS_NXADDR; i++) {
                                binvalbuf(ovp, -1 - i);
                                oldblks[i] = oip->i_din2->di_extb[i];
                                oip->i_din2->di_extb[i] = 0;
                        }
                        oip->i_flag |= IN_CHANGE;
                        if ((error = ffs_update(ovp, NULL, NULL, 0)))
                                return (error);
                        for (i = 0; i < UFS_NXADDR; i++) {
                                if (oldblks[i] == 0)
                                        continue;
                                bsize = ffs_sblksize(fs, osize, i);
                                if (wapbl) {
                                        error = UFS_WAPBL_REGISTER_DEALLOCATION(omp,
                                            FFS_FSBTODB(fs, oldblks[i]), bsize, NULL);
                                        if (error)
                                                return error;
                                } else 
                                        ffs_blkfree(fs, oip->i_devvp, oldblks[i],
                                            bsize, oip->i_number);
                        }
                        extblocks = 0;
                }
        }
        if ((ioflag & IO_NORMAL) == 0)
                return (0);
        if (ovp->v_type == VLNK &&
            (oip->i_size < ump->um_maxsymlinklen ||
             (ump->um_maxsymlinklen == 0 && datablocks == 0))) {
                KDASSERT(length == 0);
                memset(SHORTLINK(oip), 0, (size_t)oip->i_size);
                oip->i_size = 0;
                DIP_ASSIGN(oip, size, 0);
                oip->i_flag |= IN_CHANGE | IN_UPDATE;
                return (ffs_update(ovp, NULL, NULL, 0));
        }
        if (oip->i_size == length) {
                /* still do a uvm_vnp_setsize() as writesize may be larger */
                uvm_vnp_setsize(ovp, length);
                oip->i_flag |= IN_CHANGE | IN_UPDATE;
                return (ffs_update(ovp, NULL, NULL, 0));
        }
        if (length > ump->um_maxfilesize)
                return (EFBIG);

        if ((oip->i_flags & SF_SNAPSHOT) != 0)
                ffs_snapremove(ovp);

        osize = oip->i_size;
        aflag = ioflag & IO_SYNC ? B_SYNC : 0;

        /*
         * Lengthen the size of the file. We must ensure that the
         * last byte of the file is allocated. Since the smallest
         * value of osize is 0, length will be at least 1.
         */

        if (osize < length) {
                if (ffs_lblkno(fs, osize) < UFS_NDADDR &&
                    ffs_lblkno(fs, osize) != ffs_lblkno(fs, length) &&
                    ffs_blkroundup(fs, osize) != osize) {
                        off_t eob;

                        eob = ffs_blkroundup(fs, osize);
                        uvm_vnp_setwritesize(ovp, eob);
                        error = ufs_balloc_range(ovp, osize, eob - osize,
                            cred, aflag);
                        if (error) {
                                (void) ffs_truncate(ovp, osize,
                                    ioflag & IO_SYNC, cred);
                                return error;
                        }
                        if (ioflag & IO_SYNC) {
                                rw_enter(ovp->v_uobj.vmobjlock, RW_WRITER);
                                VOP_PUTPAGES(ovp,
                                    trunc_page(osize & fs->fs_bmask),
                                    round_page(eob), PGO_CLEANIT | PGO_SYNCIO |
                                    PGO_JOURNALLOCKED);
                        }
                }
                uvm_vnp_setwritesize(ovp, length);
                error = ufs_balloc_range(ovp, length - 1, 1, cred, aflag);
                if (error) {
                        (void) ffs_truncate(ovp, osize, ioflag & IO_SYNC, cred);
                        return (error);
                }
                uvm_vnp_setsize(ovp, length);
                oip->i_flag |= IN_CHANGE | IN_UPDATE;
                KASSERT(ovp->v_size == oip->i_size);
                return (ffs_update(ovp, NULL, NULL, 0));
        }

        /*
         * When truncating a regular file down to a non-block-aligned size,
         * we must zero the part of last block which is past the new EOF.
         * We must synchronously flush the zeroed pages to disk
         * since the new pages will be invalidated as soon as we
         * inform the VM system of the new, smaller size.
         * We must do this before acquiring the GLOCK, since fetching
         * the pages will acquire the GLOCK internally.
         * So there is a window where another thread could see a whole
         * zeroed page past EOF, but that's life.
         */

        offset = ffs_blkoff(fs, length);
        pgoffset = length & PAGE_MASK;
        if (ovp->v_type == VREG && (pgoffset != 0 || offset != 0) &&
            osize > length) {
                daddr_t lbn;
                voff_t eoz;
                int size;

                if (offset != 0) {
                        error = ufs_balloc_range(ovp, length - 1, 1, cred,
                            aflag);
                        if (error)
                                return error;
                }
                lbn = ffs_lblkno(fs, length);
                size = ffs_blksize(fs, oip, lbn);
                eoz = MIN(MAX(ffs_lblktosize(fs, lbn) + size, round_page(pgoffset)),
                    osize);
                ubc_zerorange(&ovp->v_uobj, length, eoz - length,
                    UBC_VNODE_FLAGS(ovp));
                if (round_page(eoz) > round_page(length)) {
                        rw_enter(ovp->v_uobj.vmobjlock, RW_WRITER);
                        error = VOP_PUTPAGES(ovp, round_page(length),
                            round_page(eoz),
                            PGO_CLEANIT | PGO_DEACTIVATE | PGO_JOURNALLOCKED |
                            ((ioflag & IO_SYNC) ? PGO_SYNCIO : 0));
                        if (error)
                                return error;
                }
        }

        genfs_node_wrlock(ovp);
        oip->i_size = length;
        DIP_ASSIGN(oip, size, length);
        uvm_vnp_setsize(ovp, length);
        /*
         * Calculate index into inode's block list of
         * last direct and indirect blocks (if any)
         * which we want to keep.  Lastblock is -1 when
         * the file is truncated to 0.
         */
        lastblock = ffs_lblkno(fs, length + fs->fs_bsize - 1) - 1;
        lastiblock[SINGLE] = lastblock - UFS_NDADDR;
        lastiblock[DOUBLE] = lastiblock[SINGLE] - FFS_NINDIR(fs);
        lastiblock[TRIPLE] = lastiblock[DOUBLE] - FFS_NINDIR(fs) * FFS_NINDIR(fs);
        nblocks = btodb(fs->fs_bsize);
        /*
         * Update file and block pointers on disk before we start freeing
         * blocks.  If we crash before free'ing blocks below, the blocks
         * will be returned to the free list.  lastiblock values are also
         * normalized to -1 for calls to ffs_indirtrunc below.
         */
        sync = 0;
        for (level = TRIPLE; level >= SINGLE; level--) {
                blks[UFS_NDADDR + level] = DIP(oip, ib[level]);
                if (lastiblock[level] < 0 && blks[UFS_NDADDR + level] != 0) {
                        sync = 1;
                        DIP_ASSIGN(oip, ib[level], 0);
                        lastiblock[level] = -1;
                }
        }
        for (i = 0; i < UFS_NDADDR; i++) {
                blks[i] = DIP(oip, db[i]);
                if (i > lastblock && blks[i] != 0) {
                        sync = 1;
                        DIP_ASSIGN(oip, db[i], 0);
                }
        }
        oip->i_flag |= IN_CHANGE | IN_UPDATE;
        if (sync) {
                error = ffs_update(ovp, NULL, NULL, UPDATE_WAIT);
                if (error && !allerror)
                        allerror = error;
        }

        /*
         * Having written the new inode to disk, save its new configuration
         * and put back the old block pointers long enough to process them.
         * Note that we save the new block configuration so we can check it
         * when we are done.
         */
        for (i = 0; i < UFS_NDADDR; i++) {
                bn = DIP(oip, db[i]);
                DIP_ASSIGN(oip, db[i], blks[i]);
                blks[i] = bn;
        }
        for (i = 0; i < UFS_NIADDR; i++) {
                bn = DIP(oip, ib[i]);
                DIP_ASSIGN(oip, ib[i], blks[UFS_NDADDR + i]);
                blks[UFS_NDADDR + i] = bn;
        }

        oip->i_size = osize;
        DIP_ASSIGN(oip, size, osize);
        error = vtruncbuf(ovp, lastblock + 1, 0, 0);
        if (error && !allerror)
                allerror = error;

        /*
         * Indirect blocks first.
         */
        indir_lbn[SINGLE] = -UFS_NDADDR;
        indir_lbn[DOUBLE] = indir_lbn[SINGLE] - FFS_NINDIR(fs) - 1;
        indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - FFS_NINDIR(fs) * FFS_NINDIR(fs) - 1;
        for (level = TRIPLE; level >= SINGLE; level--) {
                bn = ffs_getib(fs, oip, level);
                if (bn != 0) {
                        if (lastiblock[level] < 0 &&
                            oip->i_ump->um_mountp->mnt_wapbl) {
                                error = UFS_WAPBL_REGISTER_DEALLOCATION(
                                    oip->i_ump->um_mountp,
                                    FFS_FSBTODB(fs, bn), fs->fs_bsize,
                                    &dcookie);
                                if (error)
                                        goto out;
                        } else {
                                dcookie = NULL;
                        }
                            
                        error = ffs_indirtrunc(oip, indir_lbn[level],
                            FFS_FSBTODB(fs, bn), lastiblock[level], level,
                            &blocksreleased);
                        if (error) {
                                if (dcookie) {
                                        UFS_WAPBL_UNREGISTER_DEALLOCATION(
                                            oip->i_ump->um_mountp, dcookie);
                                }
                                goto out;
                        }

                        if (lastiblock[level] < 0) {
                                if (!dcookie)
                                        ffs_blkfree(fs, oip->i_devvp, bn,
                                            fs->fs_bsize, oip->i_number);
                                DIP_ASSIGN(oip, ib[level], 0);
                                blocksreleased += nblocks;
                        }
                }
                if (lastiblock[level] >= 0)
                        goto done;
        }

        /*
         * All whole direct blocks or frags.
         */
        for (i = UFS_NDADDR - 1; i > lastblock; i--) {
                bn = ffs_getdb(fs, oip, i);
                if (bn == 0)
                        continue;

                bsize = ffs_blksize(fs, oip, i);
                if ((oip->i_ump->um_mountp->mnt_wapbl) &&
                    (ovp->v_type != VREG)) {
                        error = UFS_WAPBL_REGISTER_DEALLOCATION(
                            oip->i_ump->um_mountp,
                            FFS_FSBTODB(fs, bn), bsize, NULL);
                        if (error)
                                goto out;
                } else
                        ffs_blkfree(fs, oip->i_devvp, bn, bsize, oip->i_number);
                DIP_ASSIGN(oip, db[i], 0);
                blocksreleased += btodb(bsize);
        }
        if (lastblock < 0)
                goto done;

        /*
         * Finally, look for a change in size of the
         * last direct block; release any frags.
         */
        bn = ffs_getdb(fs, oip, lastblock);
        if (bn != 0) {
                long oldspace, newspace;

                /*
                 * Calculate amount of space we're giving
                 * back as old block size minus new block size.
                 */
                oldspace = ffs_blksize(fs, oip, lastblock);
                oip->i_size = length;
                DIP_ASSIGN(oip, size, length);
                newspace = ffs_blksize(fs, oip, lastblock);
                if (newspace == 0)
                        panic("itrunc: newspace");
                if (oldspace - newspace > 0) {
                        /*
                         * Block number of space to be free'd is
                         * the old block # plus the number of frags
                         * required for the storage we're keeping.
                         */
                        bn += ffs_numfrags(fs, newspace);
                        if ((oip->i_ump->um_mountp->mnt_wapbl) &&
                            (ovp->v_type != VREG)) {
                                error = UFS_WAPBL_REGISTER_DEALLOCATION(
                                    oip->i_ump->um_mountp, FFS_FSBTODB(fs, bn),
                                    oldspace - newspace, NULL);
                                if (error)
                                        goto out;
                        } else
                                ffs_blkfree(fs, oip->i_devvp, bn,
                                    oldspace - newspace, oip->i_number);
                        blocksreleased += btodb(oldspace - newspace);
                }
        }

done:
        for (level = SINGLE; level <= TRIPLE; level++)
                KASSERTMSG((blks[UFS_NDADDR + level] == DIP(oip, ib[level])),
                    "itrunc1 blk mismatch: %jx != %jx",
                    (uintmax_t)blks[UFS_NDADDR + level],
                    (uintmax_t)DIP(oip, ib[level]));
        for (i = 0; i < UFS_NDADDR; i++)
                KASSERTMSG((blks[i] == DIP(oip, db[i])),
                    "itrunc2 blk mismatch: %jx != %jx",
                    (uintmax_t)blks[i], (uintmax_t)DIP(oip, db[i]));
        KASSERTMSG((length != 0 || extblocks || LIST_EMPTY(&ovp->v_cleanblkhd)),
            "itrunc3: zero length and nonempty cleanblkhd");
        KASSERTMSG((length != 0 || extblocks || LIST_EMPTY(&ovp->v_dirtyblkhd)),
            "itrunc3: zero length and nonempty dirtyblkhd");

out:
        /*
         * Set length back to old size if deallocation failed. Some indirect
         * blocks were deallocated creating a hole, but that is okay.
         */
        if (error == EAGAIN) {
                if (!allerror)
                        allerror = error;
                length = osize;
                uvm_vnp_setsize(ovp, length);
        }

        /*
         * Put back the real size.
         */
        oip->i_size = length;
        DIP_ASSIGN(oip, size, length);
        DIP_ADD(oip, blocks, -blocksreleased);
        genfs_node_unlock(ovp);
        oip->i_flag |= IN_CHANGE;
        UFS_WAPBL_UPDATE(ovp, NULL, NULL, 0);
#if defined(QUOTA) || defined(QUOTA2)
        (void) chkdq(oip, -blocksreleased, NOCRED, 0);
#endif
        KASSERT(ovp->v_type != VREG || ovp->v_size == oip->i_size);
        return (allerror);
}

/*
 * Release blocks associated with the inode ip and stored in the indirect
 * block bn.  Blocks are free'd in LIFO order up to (but not including)
 * lastbn.  If level is greater than SINGLE, the block is an indirect block
 * and recursive calls to indirtrunc must be used to cleanse other indirect
 * blocks.
 *
 * NB: triple indirect blocks are untested.
 */
static int
ffs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, daddr_t lastbn,
    int level, int64_t *countp)
{
        int i;
        struct buf *bp;
        struct fs *fs = ip->i_fs;
        int32_t *bap1 = NULL;
        int64_t *bap2 = NULL;
        struct vnode *vp;
        daddr_t nb, nlbn, last;
        char *copy = NULL;
        int64_t factor;
        int64_t nblocks;
        int error = 0, allerror = 0;
        const int needswap = UFS_FSNEEDSWAP(fs);
        const int wapbl = (ip->i_ump->um_mountp->mnt_wapbl != NULL);
        void *dcookie;

#define RBAP(ip, i) (((ip)->i_ump->um_fstype == UFS1) ? \
            ufs_rw32(bap1[i], needswap) : ufs_rw64(bap2[i], needswap))
#define BAP_ASSIGN(ip, i, value)                                        \
        do {                                                                \
                if ((ip)->i_ump->um_fstype == UFS1)                        \
                        bap1[i] = (value);                                \
                else                                                        \
                        bap2[i] = (value);                                \
        } while(0)

        /*
         * Calculate index in current block of last
         * block to be kept.  -1 indicates the entire
         * block so we need not calculate the index.
         */
        factor = 1;
        for (i = SINGLE; i < level; i++)
                factor *= FFS_NINDIR(fs);
        last = lastbn;
        if (lastbn > 0)
                last /= factor;
        nblocks = btodb(fs->fs_bsize);
        /*
         * Get buffer of block pointers, zero those entries corresponding
         * to blocks to be free'd, and update on disk copy first.  Since
         * double(triple) indirect before single(double) indirect, calls
         * to bmap on these blocks will fail.  However, we already have
         * the on disk address, so we have to set the b_blkno field
         * explicitly instead of letting bread do everything for us.
         */
        vp = ITOV(ip);
        error = ffs_getblk(vp, lbn, FFS_NOBLK, fs->fs_bsize, false, &bp);
        if (error)
                return error;

        if (bp->b_oflags & (BO_DONE | BO_DELWRI)) {
                /* Braces must be here in case trace evaluates to nothing. */
                trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn);
        } else {
                trace(TR_BREADMISS, pack(vp, fs->fs_bsize), lbn);
                curlwp->l_ru.ru_inblock++;        /* pay for read */
                bp->b_flags |= B_READ;
                bp->b_flags &= ~B_COWDONE;        /* we change blkno below */
                if (bp->b_bcount > bp->b_bufsize)
                        panic("ffs_indirtrunc: bad buffer size");
                bp->b_blkno = dbn;
                BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
                VOP_STRATEGY(vp, bp);
                error = biowait(bp);
                if (error == 0)
                        error = fscow_run(bp, true);
        }
        if (error) {
                brelse(bp, 0);
                return error;
        }

        /*
         * Clear reference to blocks to be removed on disk, before actually
         * reclaiming them, so that fsck is more likely to be able to recover
         * the filesystem if system goes down during the truncate process.
         * This assumes the truncate process would not fail, contrary
         * to the wapbl case.
         */
        if (ip->i_ump->um_fstype == UFS1)
                bap1 = (int32_t *)bp->b_data;
        else
                bap2 = (int64_t *)bp->b_data;
        if (lastbn >= 0 && !wapbl) {
                copy = kmem_alloc(fs->fs_bsize, KM_SLEEP);
                memcpy((void *)copy, bp->b_data, (u_int)fs->fs_bsize);
                for (i = last + 1; i < FFS_NINDIR(fs); i++)
                        BAP_ASSIGN(ip, i, 0);
                error = bwrite(bp);
                if (error)
                        allerror = error;

                if (ip->i_ump->um_fstype == UFS1)
                        bap1 = (int32_t *)copy;
                else
                        bap2 = (int64_t *)copy;
        }

        /*
         * Recursively free totally unused blocks.
         */
        for (i = FFS_NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
            i--, nlbn += factor) {
                nb = RBAP(ip, i);
                if (nb == 0)
                        continue;

                if ((ip->i_ump->um_mountp->mnt_wapbl) &&
                    ((level > SINGLE) || (ITOV(ip)->v_type != VREG))) {
                        error = UFS_WAPBL_REGISTER_DEALLOCATION(
                            ip->i_ump->um_mountp,
                            FFS_FSBTODB(fs, nb), fs->fs_bsize,
                            &dcookie);
                        if (error)
                                goto out;
                } else {
                        dcookie = NULL;
                }

                if (level > SINGLE) {
                        error = ffs_indirtrunc(ip, nlbn, FFS_FSBTODB(fs, nb),
                                               (daddr_t)-1, level - 1, countp);
                        if (error) {
                                if (dcookie) {
                                        UFS_WAPBL_UNREGISTER_DEALLOCATION(
                                            ip->i_ump->um_mountp, dcookie);
                                }

                                goto out;
                        }
                }

                if (!dcookie)
                        ffs_blkfree(fs, ip->i_devvp, nb, fs->fs_bsize,
                            ip->i_number);

                BAP_ASSIGN(ip, i, 0);
                *countp += nblocks;
        }

        /*
         * Recursively free blocks on the now last partial indirect block.
         */
        if (level > SINGLE && lastbn >= 0) {
                last = lastbn % factor;
                nb = RBAP(ip, i);
                if (nb != 0) {
                        error = ffs_indirtrunc(ip, nlbn, FFS_FSBTODB(fs, nb),
                                               last, level - 1, countp);
                        if (error)
                                goto out;
                }
        }

out:
         if (error && !allerror)
                 allerror = error;

         if (copy != NULL) {
                 kmem_free(copy, fs->fs_bsize);
         } else if (lastbn < 0 && error == 0) {
                /* all freed, release without writing back */
                brelse(bp, BC_INVAL);
        } else if (wapbl) {
                 /* only partially freed, write the updated block */
                 error = bwrite(bp);
                 if (!allerror)
                         allerror = error;
        }

        return (allerror);
}

void
ffs_itimes(struct inode *ip, const struct timespec *acc,
    const struct timespec *mod, const struct timespec *cre)
{
        struct timespec now;

        if (!(ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY))) {
                return;
        }

        vfs_timestamp(&now);
        if (ip->i_flag & IN_ACCESS) {
                if (acc == NULL)
                        acc = &now;
                DIP_ASSIGN(ip, atime, acc->tv_sec);
                DIP_ASSIGN(ip, atimensec, acc->tv_nsec);
        }
        if (ip->i_flag & (IN_UPDATE | IN_MODIFY)) {
                if ((ip->i_flags & SF_SNAPSHOT) == 0) {
                        if (mod == NULL)
                                mod = &now;
                        DIP_ASSIGN(ip, mtime, mod->tv_sec);
                        DIP_ASSIGN(ip, mtimensec, mod->tv_nsec);
                }
                ip->i_modrev++;
        }
        if (ip->i_flag & (IN_CHANGE | IN_MODIFY)) {
                if (cre == NULL)
                        cre = &now;
                DIP_ASSIGN(ip, ctime, cre->tv_sec);
                DIP_ASSIGN(ip, ctimensec, cre->tv_nsec);
        }
        if (ip->i_flag & (IN_ACCESS | IN_MODIFY))
                ip->i_flag |= IN_ACCESSED;
        if (ip->i_flag & (IN_UPDATE | IN_CHANGE))
                ip->i_flag |= IN_MODIFIED;
        ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY);
}















































































   12 

























































































































































































































   29 
















   29 





























































   29 













   12 







   12 









   12 
















   12 














  569 














  567 























   34 







   34 






  580 

























































    3 






    3 
    3 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
/*        $NetBSD: subr_percpu.c,v 1.25 2020/05/11 21:37:31 riastradh Exp $        */

/*-
 * Copyright (c)2007,2008 YAMAMOTO Takashi,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * per-cpu storage.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_percpu.c,v 1.25 2020/05/11 21:37:31 riastradh Exp $");

#include <sys/param.h>
#include <sys/cpu.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
#include <sys/percpu.h>
#include <sys/rwlock.h>
#include <sys/vmem.h>
#include <sys/xcall.h>

#define        PERCPU_QUANTUM_SIZE        (ALIGNBYTES + 1)
#define        PERCPU_QCACHE_MAX        0
#define        PERCPU_IMPORT_SIZE        2048

struct percpu {
        unsigned                pc_offset;
        size_t                        pc_size;
        percpu_callback_t        pc_ctor;
        percpu_callback_t        pc_dtor;
        void                        *pc_cookie;
        LIST_ENTRY(percpu)        pc_list;
};

static krwlock_t        percpu_swap_lock        __cacheline_aligned;
static vmem_t *                percpu_offset_arena        __read_mostly;
static struct {
        kmutex_t        lock;
        unsigned int        nextoff;
        LIST_HEAD(, percpu) ctor_list;
        struct lwp        *busy;
        kcondvar_t        cv;
} percpu_allocation __cacheline_aligned;

static percpu_cpu_t *
cpu_percpu(struct cpu_info *ci)
{

        return &ci->ci_data.cpu_percpu;
}

static unsigned int
percpu_offset(percpu_t *pc)
{
        const unsigned int off = pc->pc_offset;

        KASSERT(off < percpu_allocation.nextoff);
        return off;
}

/*
 * percpu_cpu_swap: crosscall handler for percpu_cpu_enlarge
 */
__noubsan
static void
percpu_cpu_swap(void *p1, void *p2)
{
        struct cpu_info * const ci = p1;
        percpu_cpu_t * const newpcc = p2;
        percpu_cpu_t * const pcc = cpu_percpu(ci);

        KASSERT(ci == curcpu() || !mp_online);

        /*
         * swap *pcc and *newpcc unless anyone has beaten us.
         */
        rw_enter(&percpu_swap_lock, RW_WRITER);
        if (newpcc->pcc_size > pcc->pcc_size) {
                percpu_cpu_t tmp;
                int s;

                tmp = *pcc;

                /*
                 * block interrupts so that we don't lose their modifications.
                 */

                s = splhigh();

                /*
                 * copy data to new storage.
                 */

                memcpy(newpcc->pcc_data, pcc->pcc_data, pcc->pcc_size);

                /*
                 * this assignment needs to be atomic for percpu_getptr_remote.
                 */

                pcc->pcc_data = newpcc->pcc_data;

                splx(s);

                pcc->pcc_size = newpcc->pcc_size;
                *newpcc = tmp;
        }
        rw_exit(&percpu_swap_lock);
}

/*
 * percpu_cpu_enlarge: ensure that percpu_cpu_t of each cpus have enough space
 */

static void
percpu_cpu_enlarge(size_t size)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;

        for (CPU_INFO_FOREACH(cii, ci)) {
                percpu_cpu_t pcc;

                pcc.pcc_data = kmem_alloc(size, KM_SLEEP); /* XXX cacheline */
                pcc.pcc_size = size;
                if (!mp_online) {
                        percpu_cpu_swap(ci, &pcc);
                } else {
                        uint64_t where;

                        where = xc_unicast(0, percpu_cpu_swap, ci, &pcc, ci);
                        xc_wait(where);
                }
                KASSERT(pcc.pcc_size <= size);
                if (pcc.pcc_data != NULL) {
                        kmem_free(pcc.pcc_data, pcc.pcc_size);
                }
        }
}

/*
 * percpu_backend_alloc: vmem import callback for percpu_offset_arena
 */

static int
percpu_backend_alloc(vmem_t *dummy, vmem_size_t size, vmem_size_t *resultsize,
    vm_flag_t vmflags, vmem_addr_t *addrp)
{
        unsigned int offset;
        unsigned int nextoff;

        ASSERT_SLEEPABLE();
        KASSERT(dummy == NULL);

        if ((vmflags & VM_NOSLEEP) != 0)
                return ENOMEM;

        size = roundup(size, PERCPU_IMPORT_SIZE);
        mutex_enter(&percpu_allocation.lock);
        offset = percpu_allocation.nextoff;
        percpu_allocation.nextoff = nextoff = percpu_allocation.nextoff + size;
        mutex_exit(&percpu_allocation.lock);

        percpu_cpu_enlarge(nextoff);

        *resultsize = size;
        *addrp = (vmem_addr_t)offset;
        return 0;
}

static void
percpu_zero_cb(void *vp, void *vp2, struct cpu_info *ci)
{
        size_t sz = (uintptr_t)vp2;

        memset(vp, 0, sz);
}

/*
 * percpu_zero: initialize percpu storage with zero.
 */

static void
percpu_zero(percpu_t *pc, size_t sz)
{

        percpu_foreach(pc, percpu_zero_cb, (void *)(uintptr_t)sz);
}

/*
 * percpu_init: subsystem initialization
 */

void
percpu_init(void)
{

        ASSERT_SLEEPABLE();
        rw_init(&percpu_swap_lock);
        mutex_init(&percpu_allocation.lock, MUTEX_DEFAULT, IPL_NONE);
        percpu_allocation.nextoff = PERCPU_QUANTUM_SIZE;
        LIST_INIT(&percpu_allocation.ctor_list);
        percpu_allocation.busy = NULL;
        cv_init(&percpu_allocation.cv, "percpu");

        percpu_offset_arena = vmem_xcreate("percpu", 0, 0, PERCPU_QUANTUM_SIZE,
            percpu_backend_alloc, NULL, NULL, PERCPU_QCACHE_MAX, VM_SLEEP,
            IPL_NONE);
}

/*
 * percpu_init_cpu: cpu initialization
 *
 * => should be called before the cpu appears on the list for CPU_INFO_FOREACH.
 * => may be called for static CPUs afterward (typically just primary CPU)
 */

void
percpu_init_cpu(struct cpu_info *ci)
{
        percpu_cpu_t * const pcc = cpu_percpu(ci);
        struct percpu *pc;
        size_t size = percpu_allocation.nextoff; /* XXX racy */

        ASSERT_SLEEPABLE();

        /*
         * For the primary CPU, prior percpu_create may have already
         * triggered allocation, so there's nothing more for us to do
         * here.
         */
        if (pcc->pcc_size)
                return;
        KASSERT(pcc->pcc_data == NULL);

        /*
         * Otherwise, allocate storage and, while the constructor list
         * is locked, run constructors for all percpus on this CPU.
         */
        pcc->pcc_size = size;
        if (size) {
                pcc->pcc_data = kmem_zalloc(pcc->pcc_size, KM_SLEEP);
                mutex_enter(&percpu_allocation.lock);
                while (percpu_allocation.busy)
                        cv_wait(&percpu_allocation.cv,
                            &percpu_allocation.lock);
                percpu_allocation.busy = curlwp;
                LIST_FOREACH(pc, &percpu_allocation.ctor_list, pc_list) {
                        KASSERT(pc->pc_ctor);
                        mutex_exit(&percpu_allocation.lock);
                        (*pc->pc_ctor)((char *)pcc->pcc_data + pc->pc_offset,
                            pc->pc_cookie, ci);
                        mutex_enter(&percpu_allocation.lock);
                }
                KASSERT(percpu_allocation.busy == curlwp);
                percpu_allocation.busy = NULL;
                cv_broadcast(&percpu_allocation.cv);
                mutex_exit(&percpu_allocation.lock);
        }
}

/*
 * percpu_alloc: allocate percpu storage
 *
 * => called in thread context.
 * => considered as an expensive and rare operation.
 * => allocated storage is initialized with zeros.
 */

percpu_t *
percpu_alloc(size_t size)
{

        return percpu_create(size, NULL, NULL, NULL);
}

/*
 * percpu_create: allocate percpu storage and associate ctor/dtor with it
 *
 * => called in thread context.
 * => considered as an expensive and rare operation.
 * => allocated storage is initialized by ctor, or zeros if ctor is null
 * => percpu_free will call dtor first, if dtor is nonnull
 * => ctor or dtor may sleep, even on allocation
 */

percpu_t *
percpu_create(size_t size, percpu_callback_t ctor, percpu_callback_t dtor,
    void *cookie)
{
        vmem_addr_t offset;
        percpu_t *pc;

        ASSERT_SLEEPABLE();
        (void)vmem_alloc(percpu_offset_arena, size, VM_SLEEP | VM_BESTFIT,
            &offset);

        pc = kmem_alloc(sizeof(*pc), KM_SLEEP);
        pc->pc_offset = offset;
        pc->pc_size = size;
        pc->pc_ctor = ctor;
        pc->pc_dtor = dtor;
        pc->pc_cookie = cookie;

        if (ctor) {
                CPU_INFO_ITERATOR cii;
                struct cpu_info *ci;
                void *buf;

                /*
                 * Wait until nobody is using the list of percpus with
                 * constructors.
                 */
                mutex_enter(&percpu_allocation.lock);
                while (percpu_allocation.busy)
                        cv_wait(&percpu_allocation.cv,
                            &percpu_allocation.lock);
                percpu_allocation.busy = curlwp;
                mutex_exit(&percpu_allocation.lock);

                /*
                 * Run the constructor for all CPUs.  We use a
                 * temporary buffer wo that we need not hold the
                 * percpu_swap_lock while running the constructor.
                 */
                buf = kmem_alloc(size, KM_SLEEP);
                for (CPU_INFO_FOREACH(cii, ci)) {
                        memset(buf, 0, size);
                        (*ctor)(buf, cookie, ci);
                        percpu_traverse_enter();
                        memcpy(percpu_getptr_remote(pc, ci), buf, size);
                        percpu_traverse_exit();
                }
                explicit_memset(buf, 0, size);
                kmem_free(buf, size);

                /*
                 * Insert the percpu into the list of percpus with
                 * constructors.  We are now done using the list, so it
                 * is safe for concurrent percpu_create or concurrent
                 * percpu_init_cpu to run.
                 */
                mutex_enter(&percpu_allocation.lock);
                KASSERT(percpu_allocation.busy == curlwp);
                percpu_allocation.busy = NULL;
                cv_broadcast(&percpu_allocation.cv);
                LIST_INSERT_HEAD(&percpu_allocation.ctor_list, pc, pc_list);
                mutex_exit(&percpu_allocation.lock);
        } else {
                percpu_zero(pc, size);
        }

        return pc;
}

/*
 * percpu_free: free percpu storage
 *
 * => called in thread context.
 * => considered as an expensive and rare operation.
 */

void
percpu_free(percpu_t *pc, size_t size)
{

        ASSERT_SLEEPABLE();
        KASSERT(size == pc->pc_size);

        /*
         * If there's a constructor, take the percpu off the list of
         * percpus with constructors, but first wait until nobody is
         * using the list.
         */
        if (pc->pc_ctor) {
                mutex_enter(&percpu_allocation.lock);
                while (percpu_allocation.busy)
                        cv_wait(&percpu_allocation.cv,
                            &percpu_allocation.lock);
                LIST_REMOVE(pc, pc_list);
                mutex_exit(&percpu_allocation.lock);
        }

        /* If there's a destructor, run it now for all CPUs.  */
        if (pc->pc_dtor) {
                CPU_INFO_ITERATOR cii;
                struct cpu_info *ci;
                void *buf;

                buf = kmem_alloc(size, KM_SLEEP);
                for (CPU_INFO_FOREACH(cii, ci)) {
                        percpu_traverse_enter();
                        memcpy(buf, percpu_getptr_remote(pc, ci), size);
                        explicit_memset(percpu_getptr_remote(pc, ci), 0, size);
                        percpu_traverse_exit();
                        (*pc->pc_dtor)(buf, pc->pc_cookie, ci);
                }
                explicit_memset(buf, 0, size);
                kmem_free(buf, size);
        }

        vmem_free(percpu_offset_arena, (vmem_addr_t)percpu_offset(pc), size);
        kmem_free(pc, sizeof(*pc));
}

/*
 * percpu_getref:
 *
 * => safe to be used in either thread or interrupt context
 * => disables preemption; must be bracketed with a percpu_putref()
 */

void *
percpu_getref(percpu_t *pc)
{

        kpreempt_disable();
        return percpu_getptr_remote(pc, curcpu());
}

/*
 * percpu_putref:
 *
 * => drops the preemption-disabled count after caller is done with per-cpu
 *    data
 */

void
percpu_putref(percpu_t *pc)
{

        kpreempt_enable();
}

/*
 * percpu_traverse_enter, percpu_traverse_exit, percpu_getptr_remote:
 * helpers to access remote cpu's percpu data.
 *
 * => called in thread context.
 * => percpu_traverse_enter can block low-priority xcalls.
 * => typical usage would be:
 *
 *        sum = 0;
 *        percpu_traverse_enter();
 *        for (CPU_INFO_FOREACH(cii, ci)) {
 *                unsigned int *p = percpu_getptr_remote(pc, ci);
 *                sum += *p;
 *        }
 *        percpu_traverse_exit();
 */

void
percpu_traverse_enter(void)
{

        ASSERT_SLEEPABLE();
        rw_enter(&percpu_swap_lock, RW_READER);
}

void
percpu_traverse_exit(void)
{

        rw_exit(&percpu_swap_lock);
}

void *
percpu_getptr_remote(percpu_t *pc, struct cpu_info *ci)
{

        return &((char *)cpu_percpu(ci)->pcc_data)[percpu_offset(pc)];
}

/*
 * percpu_foreach: call the specified callback function for each cpus.
 *
 * => must be called from thread context.
 * => callback executes on **current** CPU (or, really, arbitrary CPU,
 *    in case of preemption)
 * => caller should not rely on the cpu iteration order.
 * => the callback function should be minimum because it is executed with
 *    holding a global lock, which can block low-priority xcalls.
 *    eg. it's illegal for a callback function to sleep for memory allocation.
 */
void
percpu_foreach(percpu_t *pc, percpu_callback_t cb, void *arg)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;

        percpu_traverse_enter();
        for (CPU_INFO_FOREACH(cii, ci)) {
                (*cb)(percpu_getptr_remote(pc, ci), arg, ci);
        }
        percpu_traverse_exit();
}

struct percpu_xcall_ctx {
        percpu_callback_t  ctx_cb;
        void                  *ctx_arg;
};

static void
percpu_xcfunc(void * const v1, void * const v2)
{
        percpu_t * const pc = v1;
        struct percpu_xcall_ctx * const ctx = v2;

        (*ctx->ctx_cb)(percpu_getref(pc), ctx->ctx_arg, curcpu());
        percpu_putref(pc);
}

/*
 * percpu_foreach_xcall: call the specified callback function for each
 * cpu.  This version uses an xcall to run the callback on each cpu.
 *
 * => must be called from thread context.
 * => callback executes on **remote** CPU in soft-interrupt context
 *    (at the specified soft interrupt priority).
 * => caller should not rely on the cpu iteration order.
 * => the callback function should be minimum because it may be
 *    executed in soft-interrupt context.  eg. it's illegal for
 *    a callback function to sleep for memory allocation.
 */
void
percpu_foreach_xcall(percpu_t *pc, u_int xcflags, percpu_callback_t cb,
                     void *arg)
{
        struct percpu_xcall_ctx ctx = {
                .ctx_cb = cb,
                .ctx_arg = arg,
        };
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;

        for (CPU_INFO_FOREACH(cii, ci)) {
                xc_wait(xc_unicast(xcflags, percpu_xcfunc, pc, &ctx, ci));
        }
}























































































   14 






























    6 






    3 
    5 















    6 






    4 
    5 











    3 







    3 














    9 






    7 
    7 




















   18 













   17 




   16 





   15 



   15 


















   10 
   10 



   10 

   10 


   10 
   10 


   10 




   10 





   10 









    8 







    7 













    9 

    9 

   14 


   10 
   14 
   16 

   17 














    8 










    7 


    7 



    6 




    3 








    3 

    2 

















   12 












    3 







    2 












    3 








    3 

















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
/*        $NetBSD: vfs_syscalls_30.c,v 1.45 2022/03/12 20:46:03 riastradh Exp $        */

/*-
 * Copyright (c) 2005, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls_30.c,v 1.45 2022/03/12 20:46:03 riastradh Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/socketvar.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/uio.h>
#include <sys/dirent.h>
#include <sys/malloc.h>
#include <sys/kauth.h>
#include <sys/vfs_syscalls.h>
#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>

#include <compat/common/compat_mod.h>
#include <compat/common/compat_util.h>

#include <compat/sys/stat.h>
#include <compat/sys/dirent.h>
#include <compat/sys/mount.h>
#include <compat/sys/statvfs.h>

static const struct syscall_package vfs_syscalls_30_syscalls[] = {
        { SYS_compat_30___fhstat30, 0, (sy_call_t *)compat_30_sys___fhstat30 },
        { SYS_compat_30___fstat13, 0, (sy_call_t *)compat_30_sys___fstat13 },
        { SYS_compat_30___lstat13, 0, (sy_call_t *)compat_30_sys___lstat13 }, 
        { SYS_compat_30___stat13, 0, (sy_call_t *)compat_30_sys___stat13 },  
        { SYS_compat_30_fhopen, 0, (sy_call_t *)compat_30_sys_fhopen },
        { SYS_compat_30_fhstat, 0, (sy_call_t *)compat_30_sys_fhstat },  
        { SYS_compat_30_fhstatvfs1, 0, (sy_call_t *)compat_30_sys_fhstatvfs1 },
        { SYS_compat_30_getdents, 0, (sy_call_t *)compat_30_sys_getdents },
        { SYS_compat_30_getfh, 0, (sy_call_t *)compat_30_sys_getfh },
        { 0,0, NULL }
};

/*
 * Convert from a new to an old stat structure.
 */
static void
cvtstat(struct stat13 *ost, const struct stat *st)
{

        /* Handle any padding. */
        memset(ost, 0, sizeof(*ost));
        ost->st_dev = st->st_dev;
        ost->st_ino = (uint32_t)st->st_ino;
        ost->st_mode = st->st_mode;
        ost->st_nlink = st->st_nlink;
        ost->st_uid = st->st_uid;
        ost->st_gid = st->st_gid;
        ost->st_rdev = st->st_rdev;
        timespec_to_timespec50(&st->st_atimespec, &ost->st_atimespec);
        timespec_to_timespec50(&st->st_mtimespec, &ost->st_mtimespec);
        timespec_to_timespec50(&st->st_ctimespec, &ost->st_ctimespec);
        timespec_to_timespec50(&st->st_birthtimespec, &ost->st_birthtimespec);
        ost->st_size = st->st_size;
        ost->st_blocks = st->st_blocks;
        ost->st_blksize = st->st_blksize;
        ost->st_flags = st->st_flags;
        ost->st_gen = st->st_gen;
}

/*
 * Get file status; this version follows links.
 */
/* ARGSUSED */
int
compat_30_sys___stat13(struct lwp *l,
    const struct compat_30_sys___stat13_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(struct stat13 *) ub;
        } */
        struct stat sb;
        struct stat13 osb;
        int error;

        error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb);
        if (error)
                return error;
        cvtstat(&osb, &sb);
        return copyout(&osb, SCARG(uap, ub), sizeof(osb));
}


/*
 * Get file status; this version does not follow links.
 */
/* ARGSUSED */
int
compat_30_sys___lstat13(struct lwp *l,
    const struct compat_30_sys___lstat13_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(struct stat13 *) ub;
        } */
        struct stat sb;
        struct stat13 osb;
        int error;

        error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb);
        if (error)
                return error;
        cvtstat(&osb, &sb);
        return copyout(&osb, SCARG(uap, ub), sizeof(osb));
}

/* ARGSUSED */
int
compat_30_sys_fhstat(struct lwp *l,
    const struct compat_30_sys_fhstat_args *uap, register_t *retval)
{
        /* {
                syscallarg(const struct compat_30_fhandle *) fhp;
                syscallarg(struct stat13 *) sb;
        } */
        struct stat sb;
        struct stat13 osb;
        int error;

        error = do_fhstat(l, SCARG(uap, fhp), sizeof(*SCARG(uap, fhp)), &sb);
        if (error)
                return error;
        cvtstat(&osb, &sb);
        return copyout(&osb, SCARG(uap, sb), sizeof(osb));
}

/*
 * Return status information about a file descriptor.
 */
/* ARGSUSED */
int
compat_30_sys___fstat13(struct lwp *l,
    const struct compat_30_sys___fstat13_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(struct stat13 *) sb;
        } */
        struct stat sb;
        struct stat13 osb;
        int error;

        error = do_sys_fstat(SCARG(uap, fd), &sb);
        if (error)
                return error;
        cvtstat(&osb, &sb);
        return copyout(&osb, SCARG(uap, sb), sizeof(osb));
}

/*
 * Read a block of directory entries in a file system independent format.
 */
int
compat_30_sys_getdents(struct lwp *l,
    const struct compat_30_sys_getdents_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(char *) buf;
                syscallarg(size_t) count;
        } */
        struct dirent *bdp;
        struct vnode *vp;
        char *inp, *tbuf;        /* BSD-format */
        int len, reclen;        /* BSD-format */
        char *outp;                /* NetBSD-3.0-format */
        int resid;        
        struct file *fp;
        struct uio auio;
        struct iovec aiov;
        struct dirent12 idb;
        off_t off;                /* true file offset */
        int buflen, error, eofflag;
        off_t *cookiebuf = NULL, *cookie;
        int ncookies;
        bool any = false;

        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
                return error;

        if ((fp->f_flag & FREAD) == 0) {
                error = EBADF;
                goto out1;
        }

        vp = fp->f_vnode;
        if (vp->v_type != VDIR) {
                error = EINVAL;
                goto out1;
        }

        buflen = uimin(MAXBSIZE, SCARG(uap, count));
        tbuf = malloc(buflen, M_TEMP, M_WAITOK);
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        off = fp->f_offset;
again:
        aiov.iov_base = tbuf;
        aiov.iov_len = buflen;
        auio.uio_iov = &aiov;
        auio.uio_iovcnt = 1;
        auio.uio_rw = UIO_READ;
        auio.uio_resid = buflen;
        auio.uio_offset = off;
        UIO_SETUP_SYSSPACE(&auio);
        /*
         * First we read into the malloc'ed buffer, then
         * we massage it into user space, one record at a time.
         */
        error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &cookiebuf,
            &ncookies);
        if (error)
                goto out;

        inp = tbuf;
        outp = SCARG(uap, buf);
        resid = SCARG(uap, count);
        if ((len = buflen - auio.uio_resid) == 0)
                goto eof;

        for (cookie = cookiebuf; len > 0; len -= reclen) {
                bdp = (struct dirent *)inp;
                reclen = bdp->d_reclen;
                if (reclen & _DIRENT_ALIGN(bdp))
                        panic("%s: bad reclen %d", __func__, reclen);
                if (cookie)
                        off = *cookie++; /* each entry points to the next */
                else
                        off += reclen;
                if ((off >> 32) != 0) {
                        compat_offseterr(vp, "netbsd30_getdents");
                        error = EINVAL;
                        goto out;
                }
                memset(&idb, 0, sizeof(idb));
                if (bdp->d_namlen >= sizeof(idb.d_name))
                        idb.d_namlen = sizeof(idb.d_name) - 1;
                else
                        idb.d_namlen = bdp->d_namlen;
                idb.d_reclen = _DIRENT_SIZE(&idb);
                if (reclen > len || resid < idb.d_reclen) {
                        /* entry too big for buffer, so just stop */
                        any = true;
                        break;
                }
                /*
                 * Massage in place to make a NetBSD-3.0-shaped dirent
                 * (otherwise we have to worry about touching user memory
                 * outside of the copyout() call).
                 */
                idb.d_fileno = (u_int32_t)bdp->d_fileno;
                idb.d_type = bdp->d_type;
                (void)memcpy(idb.d_name, bdp->d_name, idb.d_namlen);
                memset(idb.d_name + idb.d_namlen, 0,
                    idb.d_reclen - _DIRENT_NAMEOFF(&idb) - idb.d_namlen);
                if ((error = copyout(&idb, outp, idb.d_reclen)) != 0)
                        goto out;
                /* advance past this real entry */
                inp += reclen;
                /* advance output past NetBSD-3.0-shaped entry */
                outp += idb.d_reclen;
                resid -= idb.d_reclen;
                any = true;
        }

        /* if we squished out the whole block, try again */
        if (!any) {
                if (cookiebuf)
                        free(cookiebuf, M_TEMP);
                cookiebuf = NULL;
                goto again;
        }
        fp->f_offset = off;        /* update the vnode offset */

eof:
        *retval = SCARG(uap, count) - resid;
out:
        VOP_UNLOCK(vp);
        if (cookiebuf)
                free(cookiebuf, M_TEMP);
        free(tbuf, M_TEMP);
out1:
        fd_putfile(SCARG(uap, fd));
        return error;
}

/*
 * Get file handle system call
 */
int
compat_30_sys_getfh(struct lwp *l, const struct compat_30_sys_getfh_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(char *) fname;
                syscallarg(struct compat_30_fhandle *) fhp;
        } */
        struct vnode *vp;
        struct compat_30_fhandle fh;
        int error;
        struct pathbuf *pb;
        struct nameidata nd;
        size_t sz;

        /*
         * Must be super user
         */
        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
            0, NULL, NULL, NULL);
        if (error)
                return (error);

        error = pathbuf_copyin(SCARG(uap, fname), &pb);
        if (error) {
                return error;
        }
        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
        error = namei(&nd);
        pathbuf_destroy(pb);
        if (error)
                return error;
        vp = nd.ni_vp;

        sz = sizeof(struct compat_30_fhandle);
        error = vfs_composefh(vp, (void *)&fh, &sz);
        vput(vp);
        CTASSERT(FHANDLE_SIZE_COMPAT == sizeof(struct compat_30_fhandle));
        if (sz != FHANDLE_SIZE_COMPAT) {
                error = EINVAL;
        }
        if (error)
                return error;
        return copyout(&fh, SCARG(uap, fhp), sizeof(fh));
}

/*
 * Open a file given a file handle.
 *
 * Check permissions, allocate an open file structure,
 * and call the device open routine if any.
 */
int
compat_30_sys_fhopen(struct lwp *l,
    const struct compat_30_sys_fhopen_args *uap, register_t *retval)
{
        /* {
                syscallarg(const fhandle_t *) fhp;
                syscallarg(int) flags;
        } */

        return dofhopen(l, SCARG(uap, fhp), FHANDLE_SIZE_COMPAT,
            SCARG(uap, flags), retval);
}

/* ARGSUSED */
int
compat_30_sys___fhstat30(struct lwp *l,
    const struct compat_30_sys___fhstat30_args *uap_30, register_t *retval)
{
        /* {
                syscallarg(const fhandle_t *) fhp;
                syscallarg(struct stat30 *) sb;
        } */
        struct stat sb;
        struct stat13 osb;
        int error;

        error = do_fhstat(l, SCARG(uap_30, fhp), FHANDLE_SIZE_COMPAT, &sb);
        if (error)
                return error;
        cvtstat(&osb, &sb);
        return copyout(&osb, SCARG(uap_30, sb), sizeof(osb));
}

/* ARGSUSED */
int
compat_30_sys_fhstatvfs1(struct lwp *l,
    const struct compat_30_sys_fhstatvfs1_args *uap, register_t *retval)
{
        /* {
                syscallarg(const fhandle_t *) fhp;
                syscallarg(struct statvfs90 *) buf;
                syscallarg(int)        flags;
        } */
        struct statvfs *sb = STATVFSBUF_GET();
        int error = do_fhstatvfs(l, SCARG(uap, fhp), FHANDLE_SIZE_COMPAT,
            sb, SCARG(uap, flags));

        if (!error) {
                error = statvfs_to_statvfs90_copy(sb, SCARG(uap, buf),
                    sizeof(struct statvfs90));
        }

        STATVFSBUF_PUT(sb);

        return error;
}

int
vfs_syscalls_30_init(void)
{

        return syscall_establish(NULL, vfs_syscalls_30_syscalls);
}

int
vfs_syscalls_30_fini(void)
{

        return syscall_disestablish(NULL, vfs_syscalls_30_syscalls);
}







































































































































































































































































    1 






















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
/*        $NetBSD: drm_drv.c,v 1.23 2022/07/17 14:11:18 riastradh Exp $        */

/*
 * Created: Fri Jan 19 10:48:35 2001 by faith@acm.org
 *
 * Copyright 2001 VA Linux Systems, Inc., Sunnyvale, California.
 * All Rights Reserved.
 *
 * Author Rickard E. (Rik) Faith <faith@valinux.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: drm_drv.c,v 1.23 2022/07/17 14:11:18 riastradh Exp $");

#include <linux/debugfs.h>
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/mount.h>
#include <linux/pseudo_fs.h>
#include <linux/slab.h>
#include <linux/srcu.h>

#include <drm/drm_client.h>
#include <drm/drm_color_mgmt.h>
#include <drm/drm_drv.h>
#include <drm/drm_file.h>
#include <drm/drm_mode_object.h>
#include <drm/drm_print.h>

#include "drm_crtc_internal.h"
#include "drm_internal.h"
#include "drm_legacy.h"

#include <linux/nbsd-namespace.h>

MODULE_AUTHOR("Gareth Hughes, Leif Delgass, José Fonseca, Jon Smirl");
MODULE_DESCRIPTION("DRM shared core routines");
MODULE_LICENSE("GPL and additional rights");

#ifdef __NetBSD__
spinlock_t drm_minor_lock;
struct idr drm_minors_idr;
#else
static DEFINE_SPINLOCK(drm_minor_lock);
static struct idr drm_minors_idr;
#endif

/*
 * If the drm core fails to init for whatever reason,
 * we should prevent any drivers from registering with it.
 * It's best to check this at drm_dev_init(), as some drivers
 * prefer to embed struct drm_device into their own device
 * structure and call drm_dev_init() themselves.
 */
bool drm_core_init_complete = false;

#ifndef __NetBSD__
static struct dentry *drm_debugfs_root;
#endif

#ifdef __NetBSD__
struct srcu_struct drm_unplug_srcu;
#else
DEFINE_STATIC_SRCU(drm_unplug_srcu);
#endif

/*
 * DRM Minors
 * A DRM device can provide several char-dev interfaces on the DRM-Major. Each
 * of them is represented by a drm_minor object. Depending on the capabilities
 * of the device-driver, different interfaces are registered.
 *
 * Minors can be accessed via dev->$minor_name. This pointer is either
 * NULL or a valid drm_minor pointer and stays valid as long as the device is
 * valid. This means, DRM minors have the same life-time as the underlying
 * device. However, this doesn't mean that the minor is active. Minors are
 * registered and unregistered dynamically according to device-state.
 */

static struct drm_minor **drm_minor_get_slot(struct drm_device *dev,
                                             unsigned int type)
{
        switch (type) {
        case DRM_MINOR_PRIMARY:
                return &dev->primary;
        case DRM_MINOR_RENDER:
                return &dev->render;
        default:
                BUG();
        }
}

static int drm_minor_alloc(struct drm_device *dev, unsigned int type)
{
        struct drm_minor *minor;
        unsigned long flags;
        int r;

        minor = kzalloc(sizeof(*minor), GFP_KERNEL);
        if (!minor)
                return -ENOMEM;

        minor->type = type;
        minor->dev = dev;

        idr_preload(GFP_KERNEL);
        spin_lock_irqsave(&drm_minor_lock, flags);
        r = idr_alloc(&drm_minors_idr,
                      NULL,
                      64 * type,
                      64 * (type + 1),
                      GFP_NOWAIT);
        spin_unlock_irqrestore(&drm_minor_lock, flags);
        idr_preload_end();

        if (r < 0)
                goto err_free;

        minor->index = r;

#ifndef __NetBSD__                /* XXX drm sysfs */
        minor->kdev = drm_sysfs_minor_alloc(minor);
        if (IS_ERR(minor->kdev)) {
                r = PTR_ERR(minor->kdev);
                goto err_index;
        }
#endif

        *drm_minor_get_slot(dev, type) = minor;
        return 0;

err_index: __unused
        spin_lock_irqsave(&drm_minor_lock, flags);
        idr_remove(&drm_minors_idr, minor->index);
        spin_unlock_irqrestore(&drm_minor_lock, flags);
err_free:
        kfree(minor);
        return r;
}

static void drm_minor_free(struct drm_device *dev, unsigned int type)
{
        struct drm_minor **slot, *minor;
        unsigned long flags;

        slot = drm_minor_get_slot(dev, type);
        minor = *slot;
        if (!minor)
                return;

#ifndef __NetBSD__                /* XXX drm sysfs */
        put_device(minor->kdev);
#endif

        spin_lock_irqsave(&drm_minor_lock, flags);
        idr_remove(&drm_minors_idr, minor->index);
        spin_unlock_irqrestore(&drm_minor_lock, flags);

        kfree(minor);
        *slot = NULL;
}

static int drm_minor_register(struct drm_device *dev, unsigned int type)
{
        struct drm_minor *minor;
        unsigned long flags;
#ifndef __NetBSD__
        int ret;
#endif

        DRM_DEBUG("\n");

        minor = *drm_minor_get_slot(dev, type);
        if (!minor)
                return 0;

#ifndef __NetBSD__
        ret = drm_debugfs_init(minor, minor->index, drm_debugfs_root);
        if (ret) {
                DRM_ERROR("DRM: Failed to initialize /sys/kernel/debug/dri.\n");
                goto err_debugfs;
        }

        ret = device_add(minor->kdev);
        if (ret)
                goto err_debugfs;
#endif

        /* replace NULL with @minor so lookups will succeed from now on */
        spin_lock_irqsave(&drm_minor_lock, flags);
        idr_replace(&drm_minors_idr, minor, minor->index);
        spin_unlock_irqrestore(&drm_minor_lock, flags);

        DRM_DEBUG("new minor registered %d\n", minor->index);
        return 0;

#ifndef __NetBSD__
err_debugfs:
        drm_debugfs_cleanup(minor);
        return ret;
#endif
}

static void drm_minor_unregister(struct drm_device *dev, unsigned int type)
{
        struct drm_minor *minor;
        unsigned long flags;

        minor = *drm_minor_get_slot(dev, type);
#ifdef __NetBSD__
        if (!minor)
#else
        if (!minor || !device_is_registered(minor->kdev))
#endif
                return;

        /* replace @minor with NULL so lookups will fail from now on */
        spin_lock_irqsave(&drm_minor_lock, flags);
        idr_replace(&drm_minors_idr, NULL, minor->index);
        spin_unlock_irqrestore(&drm_minor_lock, flags);

#ifndef __NetBSD__
        device_del(minor->kdev);
        dev_set_drvdata(minor->kdev, NULL); /* safety belt */
        drm_debugfs_cleanup(minor);
#endif
}

/*
 * Looks up the given minor-ID and returns the respective DRM-minor object. The
 * refence-count of the underlying device is increased so you must release this
 * object with drm_minor_release().
 *
 * As long as you hold this minor, it is guaranteed that the object and the
 * minor->dev pointer will stay valid! However, the device may get unplugged and
 * unregistered while you hold the minor.
 */
struct drm_minor *drm_minor_acquire(unsigned int minor_id)
{
        struct drm_minor *minor;
        unsigned long flags;

        spin_lock_irqsave(&drm_minor_lock, flags);
        minor = idr_find(&drm_minors_idr, minor_id);
        if (minor)
                drm_dev_get(minor->dev);
        spin_unlock_irqrestore(&drm_minor_lock, flags);

        if (!minor) {
                return ERR_PTR(-ENODEV);
        } else if (drm_dev_is_unplugged(minor->dev)) {
                drm_dev_put(minor->dev);
                return ERR_PTR(-ENODEV);
        }

        return minor;
}

void drm_minor_release(struct drm_minor *minor)
{
        drm_dev_put(minor->dev);
}

/**
 * DOC: driver instance overview
 *
 * A device instance for a drm driver is represented by &struct drm_device. This
 * is initialized with drm_dev_init(), usually from bus-specific ->probe()
 * callbacks implemented by the driver. The driver then needs to initialize all
 * the various subsystems for the drm device like memory management, vblank
 * handling, modesetting support and intial output configuration plus obviously
 * initialize all the corresponding hardware bits. Finally when everything is up
 * and running and ready for userspace the device instance can be published
 * using drm_dev_register().
 *
 * There is also deprecated support for initalizing device instances using
 * bus-specific helpers and the &drm_driver.load callback. But due to
 * backwards-compatibility needs the device instance have to be published too
 * early, which requires unpretty global locking to make safe and is therefore
 * only support for existing drivers not yet converted to the new scheme.
 *
 * When cleaning up a device instance everything needs to be done in reverse:
 * First unpublish the device instance with drm_dev_unregister(). Then clean up
 * any other resources allocated at device initialization and drop the driver's
 * reference to &drm_device using drm_dev_put().
 *
 * Note that the lifetime rules for &drm_device instance has still a lot of
 * historical baggage. Hence use the reference counting provided by
 * drm_dev_get() and drm_dev_put() only carefully.
 *
 * Display driver example
 * ~~~~~~~~~~~~~~~~~~~~~~
 *
 * The following example shows a typical structure of a DRM display driver.
 * The example focus on the probe() function and the other functions that is
 * almost always present and serves as a demonstration of devm_drm_dev_init()
 * usage with its accompanying drm_driver->release callback.
 *
 * .. code-block:: c
 *
 *        struct driver_device {
 *                struct drm_device drm;
 *                void *userspace_facing;
 *                struct clk *pclk;
 *        };
 *
 *        static void driver_drm_release(struct drm_device *drm)
 *        {
 *                struct driver_device *priv = container_of(...);
 *
 *                drm_mode_config_cleanup(drm);
 *                drm_dev_fini(drm);
 *                kfree(priv->userspace_facing);
 *                kfree(priv);
 *        }
 *
 *        static struct drm_driver driver_drm_driver = {
 *                [...]
 *                .release = driver_drm_release,
 *        };
 *
 *        static int driver_probe(struct platform_device *pdev)
 *        {
 *                struct driver_device *priv;
 *                struct drm_device *drm;
 *                int ret;
 *
 *                // devm_kzalloc() can't be used here because the drm_device '
 *                // lifetime can exceed the device lifetime if driver unbind
 *                // happens when userspace still has open file descriptors.
 *                priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 *                if (!priv)
 *                        return -ENOMEM;
 *
 *                drm = &priv->drm;
 *
 *                ret = devm_drm_dev_init(&pdev->dev, drm, &driver_drm_driver);
 *                if (ret) {
 *                        kfree(drm);
 *                        return ret;
 *                }
 *
 *                drm_mode_config_init(drm);
 *
 *                priv->userspace_facing = kzalloc(..., GFP_KERNEL);
 *                if (!priv->userspace_facing)
 *                        return -ENOMEM;
 *
 *                priv->pclk = devm_clk_get(dev, "PCLK");
 *                if (IS_ERR(priv->pclk))
 *                        return PTR_ERR(priv->pclk);
 *
 *                // Further setup, display pipeline etc
 *
 *                platform_set_drvdata(pdev, drm);
 *
 *                drm_mode_config_reset(drm);
 *
 *                ret = drm_dev_register(drm);
 *                if (ret)
 *                        return ret;
 *
 *                drm_fbdev_generic_setup(drm, 32);
 *
 *                return 0;
 *        }
 *
 *        // This function is called before the devm_ resources are released
 *        static int driver_remove(struct platform_device *pdev)
 *        {
 *                struct drm_device *drm = platform_get_drvdata(pdev);
 *
 *                drm_dev_unregister(drm);
 *                drm_atomic_helper_shutdown(drm)
 *
 *                return 0;
 *        }
 *
 *        // This function is called on kernel restart and shutdown
 *        static void driver_shutdown(struct platform_device *pdev)
 *        {
 *                drm_atomic_helper_shutdown(platform_get_drvdata(pdev));
 *        }
 *
 *        static int __maybe_unused driver_pm_suspend(struct device *dev)
 *        {
 *                return drm_mode_config_helper_suspend(dev_get_drvdata(dev));
 *        }
 *
 *        static int __maybe_unused driver_pm_resume(struct device *dev)
 *        {
 *                drm_mode_config_helper_resume(dev_get_drvdata(dev));
 *
 *                return 0;
 *        }
 *
 *        static const struct dev_pm_ops driver_pm_ops = {
 *                SET_SYSTEM_SLEEP_PM_OPS(driver_pm_suspend, driver_pm_resume)
 *        };
 *
 *        static struct platform_driver driver_driver = {
 *                .driver = {
 *                        [...]
 *                        .pm = &driver_pm_ops,
 *                },
 *                .probe = driver_probe,
 *                .remove = driver_remove,
 *                .shutdown = driver_shutdown,
 *        };
 *        module_platform_driver(driver_driver);
 *
 * Drivers that want to support device unplugging (USB, DT overlay unload) should
 * use drm_dev_unplug() instead of drm_dev_unregister(). The driver must protect
 * regions that is accessing device resources to prevent use after they're
 * released. This is done using drm_dev_enter() and drm_dev_exit(). There is one
 * shortcoming however, drm_dev_unplug() marks the drm_device as unplugged before
 * drm_atomic_helper_shutdown() is called. This means that if the disable code
 * paths are protected, they will not run on regular driver module unload,
 * possibily leaving the hardware enabled.
 */

/**
 * drm_put_dev - Unregister and release a DRM device
 * @dev: DRM device
 *
 * Called at module unload time or when a PCI device is unplugged.
 *
 * Cleans up all DRM device, calling drm_lastclose().
 *
 * Note: Use of this function is deprecated. It will eventually go away
 * completely.  Please use drm_dev_unregister() and drm_dev_put() explicitly
 * instead to make sure that the device isn't userspace accessible any more
 * while teardown is in progress, ensuring that userspace can't access an
 * inconsistent state.
 */
void drm_put_dev(struct drm_device *dev)
{
        DRM_DEBUG("\n");

        if (!dev) {
                DRM_ERROR("cleanup called no dev\n");
                return;
        }

        drm_dev_unregister(dev);
        drm_dev_put(dev);
}
EXPORT_SYMBOL(drm_put_dev);

/**
 * drm_dev_enter - Enter device critical section
 * @dev: DRM device
 * @idx: Pointer to index that will be passed to the matching drm_dev_exit()
 *
 * This function marks and protects the beginning of a section that should not
 * be entered after the device has been unplugged. The section end is marked
 * with drm_dev_exit(). Calls to this function can be nested.
 *
 * Returns:
 * True if it is OK to enter the section, false otherwise.
 */
bool drm_dev_enter(struct drm_device *dev, int *idx)
{
        *idx = srcu_read_lock(&drm_unplug_srcu);

        if (dev->unplugged) {
                srcu_read_unlock(&drm_unplug_srcu, *idx);
                return false;
        }

        return true;
}
EXPORT_SYMBOL(drm_dev_enter);

/**
 * drm_dev_exit - Exit device critical section
 * @idx: index returned from drm_dev_enter()
 *
 * This function marks the end of a section that should not be entered after
 * the device has been unplugged.
 */
void drm_dev_exit(int idx)
{
        srcu_read_unlock(&drm_unplug_srcu, idx);
}
EXPORT_SYMBOL(drm_dev_exit);

/**
 * drm_dev_unplug - unplug a DRM device
 * @dev: DRM device
 *
 * This unplugs a hotpluggable DRM device, which makes it inaccessible to
 * userspace operations. Entry-points can use drm_dev_enter() and
 * drm_dev_exit() to protect device resources in a race free manner. This
 * essentially unregisters the device like drm_dev_unregister(), but can be
 * called while there are still open users of @dev.
 */
void drm_dev_unplug(struct drm_device *dev)
{
        /*
         * After synchronizing any critical read section is guaranteed to see
         * the new value of ->unplugged, and any critical section which might
         * still have seen the old value of ->unplugged is guaranteed to have
         * finished.
         */
        dev->unplugged = true;
        synchronize_srcu(&drm_unplug_srcu);

        drm_dev_unregister(dev);
}
EXPORT_SYMBOL(drm_dev_unplug);

#ifdef __NetBSD__

static void *
drm_fs_inode_new(void)
{
        return NULL;
}

static void
drm_fs_inode_free(void *inode)
{
        KASSERT(inode == NULL);
}

#else

/*
 * DRM internal mount
 * We want to be able to allocate our own "struct address_space" to control
 * memory-mappings in VRAM (or stolen RAM, ...). However, core MM does not allow
 * stand-alone address_space objects, so we need an underlying inode. As there
 * is no way to allocate an independent inode easily, we need a fake internal
 * VFS mount-point.
 *
 * The drm_fs_inode_new() function allocates a new inode, drm_fs_inode_free()
 * frees it again. You are allowed to use iget() and iput() to get references to
 * the inode. But each drm_fs_inode_new() call must be paired with exactly one
 * drm_fs_inode_free() call (which does not have to be the last iput()).
 * We use drm_fs_inode_*() to manage our internal VFS mount-point and share it
 * between multiple inode-users. You could, technically, call
 * iget() + drm_fs_inode_free() directly after alloc and sometime later do an
 * iput(), but this way you'd end up with a new vfsmount for each inode.
 */

static int drm_fs_cnt;
static struct vfsmount *drm_fs_mnt;

static int drm_fs_init_fs_context(struct fs_context *fc)
{
        return init_pseudo(fc, 0x010203ff) ? 0 : -ENOMEM;
}

static struct file_system_type drm_fs_type = {
        .name                = "drm",
        .owner                = THIS_MODULE,
        .init_fs_context = drm_fs_init_fs_context,
        .kill_sb        = kill_anon_super,
};

static struct inode *drm_fs_inode_new(void)
{
        struct inode *inode;
        int r;

        r = simple_pin_fs(&drm_fs_type, &drm_fs_mnt, &drm_fs_cnt);
        if (r < 0) {
                DRM_ERROR("Cannot mount pseudo fs: %d\n", r);
                return ERR_PTR(r);
        }

        inode = alloc_anon_inode(drm_fs_mnt->mnt_sb);
        if (IS_ERR(inode))
                simple_release_fs(&drm_fs_mnt, &drm_fs_cnt);

        return inode;
}

static void drm_fs_inode_free(struct inode *inode)
{
        if (inode) {
                iput(inode);
                simple_release_fs(&drm_fs_mnt, &drm_fs_cnt);
        }
}

#endif

/**
 * DOC: component helper usage recommendations
 *
 * DRM drivers that drive hardware where a logical device consists of a pile of
 * independent hardware blocks are recommended to use the :ref:`component helper
 * library<component>`. For consistency and better options for code reuse the
 * following guidelines apply:
 *
 *  - The entire device initialization procedure should be run from the
 *    &component_master_ops.master_bind callback, starting with drm_dev_init(),
 *    then binding all components with component_bind_all() and finishing with
 *    drm_dev_register().
 *
 *  - The opaque pointer passed to all components through component_bind_all()
 *    should point at &struct drm_device of the device instance, not some driver
 *    specific private structure.
 *
 *  - The component helper fills the niche where further standardization of
 *    interfaces is not practical. When there already is, or will be, a
 *    standardized interface like &drm_bridge or &drm_panel, providing its own
 *    functions to find such components at driver load time, like
 *    drm_of_find_panel_or_bridge(), then the component helper should not be
 *    used.
 */

/**
 * drm_dev_init - Initialise new DRM device
 * @dev: DRM device
 * @driver: DRM driver
 * @parent: Parent device object
 *
 * Initialize a new DRM device. No device registration is done.
 * Call drm_dev_register() to advertice the device to user space and register it
 * with other core subsystems. This should be done last in the device
 * initialization sequence to make sure userspace can't access an inconsistent
 * state.
 *
 * The initial ref-count of the object is 1. Use drm_dev_get() and
 * drm_dev_put() to take and drop further ref-counts.
 *
 * It is recommended that drivers embed &struct drm_device into their own device
 * structure.
 *
 * Drivers that do not want to allocate their own device struct
 * embedding &struct drm_device can call drm_dev_alloc() instead. For drivers
 * that do embed &struct drm_device it must be placed first in the overall
 * structure, and the overall structure must be allocated using kmalloc(): The
 * drm core's release function unconditionally calls kfree() on the @dev pointer
 * when the final reference is released. To override this behaviour, and so
 * allow embedding of the drm_device inside the driver's device struct at an
 * arbitrary offset, you must supply a &drm_driver.release callback and control
 * the finalization explicitly.
 *
 * RETURNS:
 * 0 on success, or error code on failure.
 */
int drm_dev_init(struct drm_device *dev,
                 struct drm_driver *driver,
                 struct device *parent)
{
        int ret;

        if (!drm_core_init_complete) {
                DRM_ERROR("DRM core is not initialized\n");
                return -ENODEV;
        }

        if (WARN_ON(!parent))
                return -EINVAL;

        kref_init(&dev->ref);
        dev->dev = get_device(parent);
        dev->driver = driver;

        /* no per-device feature limits by default */
        dev->driver_features = ~0u;

        drm_legacy_init_members(dev);
        INIT_LIST_HEAD(&dev->filelist);
        INIT_LIST_HEAD(&dev->filelist_internal);
        INIT_LIST_HEAD(&dev->clientlist);
        INIT_LIST_HEAD(&dev->vblank_event_list);

        spin_lock_init(&dev->event_lock);
        mutex_init(&dev->struct_mutex);
        mutex_init(&dev->filelist_mutex);
        mutex_init(&dev->clientlist_mutex);
        mutex_init(&dev->master_mutex);

        dev->sc_monitor_hotplug.smpsw_name = PSWITCH_HK_DISPLAY_CYCLE;
        dev->sc_monitor_hotplug.smpsw_type = PSWITCH_TYPE_HOTKEY;
        ret = sysmon_pswitch_register(&dev->sc_monitor_hotplug);
        if (ret)
                goto err_pswitch;

        dev->anon_inode = drm_fs_inode_new();
        if (IS_ERR(dev->anon_inode)) {
                ret = PTR_ERR(dev->anon_inode);
                DRM_ERROR("Cannot allocate anonymous inode: %d\n", ret);
                goto err_free;
        }

        if (drm_core_check_feature(dev, DRIVER_RENDER)) {
                ret = drm_minor_alloc(dev, DRM_MINOR_RENDER);
                if (ret)
                        goto err_minors;
        }

        ret = drm_minor_alloc(dev, DRM_MINOR_PRIMARY);
        if (ret)
                goto err_minors;

        ret = drm_legacy_create_map_hash(dev);
        if (ret)
                goto err_minors;

        drm_legacy_ctxbitmap_init(dev);

        if (drm_core_check_feature(dev, DRIVER_GEM)) {
                ret = drm_gem_init(dev);
                if (ret) {
                        DRM_ERROR("Cannot initialize graphics execution manager (GEM)\n");
                        goto err_ctxbitmap;
                }
        }

        ret = drm_dev_set_unique(dev, dev_name(parent));
        if (ret)
                goto err_setunique;

        return 0;

err_setunique:
        if (drm_core_check_feature(dev, DRIVER_GEM))
                drm_gem_destroy(dev);
err_ctxbitmap:
        drm_legacy_ctxbitmap_cleanup(dev);
        drm_legacy_remove_map_hash(dev);
err_minors:
        drm_minor_free(dev, DRM_MINOR_PRIMARY);
        drm_minor_free(dev, DRM_MINOR_RENDER);
        drm_fs_inode_free(dev->anon_inode);
err_free:
#ifdef __NetBSD__
        sysmon_pswitch_unregister(&dev->sc_monitor_hotplug);
err_pswitch:
#endif
#ifndef __NetBSD__                /* XXX drm sysfs */
        put_device(dev->dev);
#endif
        mutex_destroy(&dev->master_mutex);
        mutex_destroy(&dev->clientlist_mutex);
        mutex_destroy(&dev->filelist_mutex);
        mutex_destroy(&dev->struct_mutex);
        spin_lock_destroy(&dev->event_lock);
        drm_legacy_destroy_members(dev);
        return ret;
}
EXPORT_SYMBOL(drm_dev_init);

#ifndef __NetBSD__

static void devm_drm_dev_init_release(void *data)
{
        drm_dev_put(data);
}

/**
 * devm_drm_dev_init - Resource managed drm_dev_init()
 * @parent: Parent device object
 * @dev: DRM device
 * @driver: DRM driver
 *
 * Managed drm_dev_init(). The DRM device initialized with this function is
 * automatically put on driver detach using drm_dev_put(). You must supply a
 * &drm_driver.release callback to control the finalization explicitly.
 *
 * RETURNS:
 * 0 on success, or error code on failure.
 */
int devm_drm_dev_init(struct device *parent,
                      struct drm_device *dev,
                      struct drm_driver *driver)
{
        int ret;

        if (WARN_ON(!driver->release))
                return -EINVAL;

        ret = drm_dev_init(dev, driver, parent);
        if (ret)
                return ret;

        ret = devm_add_action(parent, devm_drm_dev_init_release, dev);
        if (ret)
                devm_drm_dev_init_release(dev);

        return ret;
}
EXPORT_SYMBOL(devm_drm_dev_init);

#endif

/**
 * drm_dev_fini - Finalize a dead DRM device
 * @dev: DRM device
 *
 * Finalize a dead DRM device. This is the converse to drm_dev_init() and
 * frees up all data allocated by it. All driver private data should be
 * finalized first. Note that this function does not free the @dev, that is
 * left to the caller.
 *
 * The ref-count of @dev must be zero, and drm_dev_fini() should only be called
 * from a &drm_driver.release callback.
 */
void drm_dev_fini(struct drm_device *dev)
{
        drm_vblank_cleanup(dev);

        if (drm_core_check_feature(dev, DRIVER_GEM))
                drm_gem_destroy(dev);

        drm_legacy_ctxbitmap_cleanup(dev);
        drm_legacy_remove_map_hash(dev);
        drm_fs_inode_free(dev->anon_inode);

        drm_minor_free(dev, DRM_MINOR_PRIMARY);
        drm_minor_free(dev, DRM_MINOR_RENDER);

#ifdef __NetBSD__
        sysmon_pswitch_unregister(&dev->sc_monitor_hotplug);
#endif

#ifndef __NetBSD__                /* XXX drm sysfs */
        put_device(dev->dev);
#endif

        mutex_destroy(&dev->master_mutex);
        mutex_destroy(&dev->clientlist_mutex);
        mutex_destroy(&dev->filelist_mutex);
        mutex_destroy(&dev->struct_mutex);
        spin_lock_destroy(&dev->event_lock);
        drm_legacy_destroy_members(dev);
        kfree(dev->unique);
}
EXPORT_SYMBOL(drm_dev_fini);

/**
 * drm_dev_alloc - Allocate new DRM device
 * @driver: DRM driver to allocate device for
 * @parent: Parent device object
 *
 * Allocate and initialize a new DRM device. No device registration is done.
 * Call drm_dev_register() to advertice the device to user space and register it
 * with other core subsystems. This should be done last in the device
 * initialization sequence to make sure userspace can't access an inconsistent
 * state.
 *
 * The initial ref-count of the object is 1. Use drm_dev_get() and
 * drm_dev_put() to take and drop further ref-counts.
 *
 * Note that for purely virtual devices @parent can be NULL.
 *
 * Drivers that wish to subclass or embed &struct drm_device into their
 * own struct should look at using drm_dev_init() instead.
 *
 * RETURNS:
 * Pointer to new DRM device, or ERR_PTR on failure.
 */
struct drm_device *drm_dev_alloc(struct drm_driver *driver,
                                 struct device *parent)
{
        struct drm_device *dev;
        int ret;

        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (!dev)
                return ERR_PTR(-ENOMEM);

        ret = drm_dev_init(dev, driver, parent);
        if (ret) {
                kfree(dev);
                return ERR_PTR(ret);
        }

        return dev;
}
EXPORT_SYMBOL(drm_dev_alloc);

static void drm_dev_release(struct kref *ref)
{
        struct drm_device *dev = container_of(ref, struct drm_device, ref);

        if (dev->driver->release) {
                dev->driver->release(dev);
        } else {
                drm_dev_fini(dev);
                kfree(dev);
        }
}

/**
 * drm_dev_get - Take reference of a DRM device
 * @dev: device to take reference of or NULL
 *
 * This increases the ref-count of @dev by one. You *must* already own a
 * reference when calling this. Use drm_dev_put() to drop this reference
 * again.
 *
 * This function never fails. However, this function does not provide *any*
 * guarantee whether the device is alive or running. It only provides a
 * reference to the object and the memory associated with it.
 */
void drm_dev_get(struct drm_device *dev)
{
        if (dev)
                kref_get(&dev->ref);
}
EXPORT_SYMBOL(drm_dev_get);

/**
 * drm_dev_put - Drop reference of a DRM device
 * @dev: device to drop reference of or NULL
 *
 * This decreases the ref-count of @dev by one. The device is destroyed if the
 * ref-count drops to zero.
 */
void drm_dev_put(struct drm_device *dev)
{
        if (dev)
                kref_put(&dev->ref, drm_dev_release);
}
EXPORT_SYMBOL(drm_dev_put);

static int create_compat_control_link(struct drm_device *dev)
{
        struct drm_minor *minor;
        char *name;
        int ret;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return 0;

        minor = *drm_minor_get_slot(dev, DRM_MINOR_PRIMARY);
        if (!minor)
                return 0;

        /*
         * Some existing userspace out there uses the existing of the controlD*
         * sysfs files to figure out whether it's a modeset driver. It only does
         * readdir, hence a symlink is sufficient (and the least confusing
         * option). Otherwise controlD* is entirely unused.
         *
         * Old controlD chardev have been allocated in the range
         * 64-127.
         */
        name = kasprintf(GFP_KERNEL, "controlD%d", minor->index + 64);
        if (!name)
                return -ENOMEM;

#ifdef __NetBSD__                /* XXX sysfs */
        ret = 0;
#else
        ret = sysfs_create_link(minor->kdev->kobj.parent,
                                &minor->kdev->kobj,
                                name);
#endif

        kfree(name);

        return ret;
}

static void remove_compat_control_link(struct drm_device *dev)
{
        struct drm_minor *minor;
        char *name;

        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return;

        minor = *drm_minor_get_slot(dev, DRM_MINOR_PRIMARY);
        if (!minor)
                return;

        name = kasprintf(GFP_KERNEL, "controlD%d", minor->index + 64);
        if (!name)
                return;

#ifndef __NetBSD__                /* XXX sysfs */
        sysfs_remove_link(minor->kdev->kobj.parent, name);
#endif

        kfree(name);
}

/**
 * drm_dev_register - Register DRM device
 * @dev: Device to register
 * @flags: Flags passed to the driver's .load() function
 *
 * Register the DRM device @dev with the system, advertise device to user-space
 * and start normal device operation. @dev must be initialized via drm_dev_init()
 * previously.
 *
 * Never call this twice on any device!
 *
 * NOTE: To ensure backward compatibility with existing drivers method this
 * function calls the &drm_driver.load method after registering the device
 * nodes, creating race conditions. Usage of the &drm_driver.load methods is
 * therefore deprecated, drivers must perform all initialization before calling
 * drm_dev_register().
 *
 * RETURNS:
 * 0 on success, negative error code on failure.
 */
int drm_dev_register(struct drm_device *dev, unsigned long flags)
{
        struct drm_driver *driver = dev->driver;
        int ret;

#ifndef __NetBSD__
        mutex_lock(&drm_global_mutex);
#endif

        ret = drm_minor_register(dev, DRM_MINOR_RENDER);
        if (ret)
                goto err_minors;

        ret = drm_minor_register(dev, DRM_MINOR_PRIMARY);
        if (ret)
                goto err_minors;

        ret = create_compat_control_link(dev);
        if (ret)
                goto err_minors;

        dev->registered = true;

        if (dev->driver->load) {
                ret = dev->driver->load(dev, flags);
                if (ret)
                        goto err_minors;
        }

        if (drm_core_check_feature(dev, DRIVER_MODESET))
                drm_modeset_register_all(dev);

        ret = 0;

        DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n",
                 driver->name, driver->major, driver->minor,
                 driver->patchlevel, driver->date,
                 dev->dev ? dev_name(dev->dev) : "virtual device",
                 dev->primary->index);

        goto out_unlock;

err_minors:
        remove_compat_control_link(dev);
        drm_minor_unregister(dev, DRM_MINOR_PRIMARY);
        drm_minor_unregister(dev, DRM_MINOR_RENDER);
out_unlock:
#ifndef __NetBSD__
        mutex_unlock(&drm_global_mutex);
#endif
        return ret;
}
EXPORT_SYMBOL(drm_dev_register);

/**
 * drm_dev_unregister - Unregister DRM device
 * @dev: Device to unregister
 *
 * Unregister the DRM device from the system. This does the reverse of
 * drm_dev_register() but does not deallocate the device. The caller must call
 * drm_dev_put() to drop their final reference.
 *
 * A special form of unregistering for hotpluggable devices is drm_dev_unplug(),
 * which can be called while there are still open users of @dev.
 *
 * This should be called first in the device teardown code to make sure
 * userspace can't access the device instance any more.
 */
void drm_dev_unregister(struct drm_device *dev)
{
        if (drm_core_check_feature(dev, DRIVER_LEGACY))
                drm_lastclose(dev);

        dev->registered = false;

        drm_client_dev_unregister(dev);

        if (drm_core_check_feature(dev, DRIVER_MODESET))
                drm_modeset_unregister_all(dev);

        if (dev->driver->unload)
                dev->driver->unload(dev);

#ifndef __NetBSD__                /* Moved to drm_pci.  */
        if (dev->agp)
                drm_pci_agp_destroy(dev);
#endif

        drm_legacy_rmmaps(dev);

        remove_compat_control_link(dev);
        drm_minor_unregister(dev, DRM_MINOR_PRIMARY);
        drm_minor_unregister(dev, DRM_MINOR_RENDER);
}
EXPORT_SYMBOL(drm_dev_unregister);

/**
 * drm_dev_set_unique - Set the unique name of a DRM device
 * @dev: device of which to set the unique name
 * @name: unique name
 *
 * Sets the unique name of a DRM device using the specified string. This is
 * already done by drm_dev_init(), drivers should only override the default
 * unique name for backwards compatibility reasons.
 *
 * Return: 0 on success or a negative error code on failure.
 */
int drm_dev_set_unique(struct drm_device *dev, const char *name)
{
        kfree(dev->unique);
        dev->unique = kstrdup(name, GFP_KERNEL);

        return dev->unique ? 0 : -ENOMEM;
}
EXPORT_SYMBOL(drm_dev_set_unique);

#ifndef __NetBSD__

/*
 * DRM Core
 * The DRM core module initializes all global DRM objects and makes them
 * available to drivers. Once setup, drivers can probe their respective
 * devices.
 * Currently, core management includes:
 *  - The "DRM-Global" key/value database
 *  - Global ID management for connectors
 *  - DRM major number allocation
 *  - DRM minor management
 *  - DRM sysfs class
 *  - DRM debugfs root
 *
 * Furthermore, the DRM core provides dynamic char-dev lookups. For each
 * interface registered on a DRM device, you can request minor numbers from DRM
 * core. DRM core takes care of major-number management and char-dev
 * registration. A stub ->open() callback forwards any open() requests to the
 * registered minor.
 */

static int drm_stub_open(struct inode *inode, struct file *filp)
{
        const struct file_operations *new_fops;
        struct drm_minor *minor;
        int err;

        DRM_DEBUG("\n");

        mutex_lock(&drm_global_mutex);
        minor = drm_minor_acquire(iminor(inode));
        if (IS_ERR(minor)) {
                err = PTR_ERR(minor);
                goto out_unlock;
        }

        new_fops = fops_get(minor->dev->driver->fops);
        if (!new_fops) {
                err = -ENODEV;
                goto out_release;
        }

        replace_fops(filp, new_fops);
        if (filp->f_op->open)
                err = filp->f_op->open(inode, filp);
        else
                err = 0;

out_release:
        drm_minor_release(minor);
out_unlock:
        mutex_unlock(&drm_global_mutex);
        return err;
}

static const struct file_operations drm_stub_fops = {
        .owner = THIS_MODULE,
        .open = drm_stub_open,
        .llseek = noop_llseek,
};

static void drm_core_exit(void)
{
        unregister_chrdev(DRM_MAJOR, "drm");
        debugfs_remove(drm_debugfs_root);
        drm_sysfs_destroy();
        idr_destroy(&drm_minors_idr);
        drm_connector_ida_destroy();
}

static int __init drm_core_init(void)
{
        int ret;

        drm_connector_ida_init();
        idr_init(&drm_minors_idr);

        ret = drm_sysfs_init();
        if (ret < 0) {
                DRM_ERROR("Cannot create DRM class: %d\n", ret);
                goto error;
        }

        drm_debugfs_root = debugfs_create_dir("dri", NULL);

        ret = register_chrdev(DRM_MAJOR, "drm", &drm_stub_fops);
        if (ret < 0)
                goto error;

        drm_core_init_complete = true;

        DRM_DEBUG("Initialized\n");
        return 0;

error:
        drm_core_exit();
        return ret;
}

module_init(drm_core_init);
module_exit(drm_core_exit);

#endif









































































































































































































































































    3 
    3 
































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
/*        $NetBSD: uplcom.c,v 1.94 2022/07/06 15:24:14 hannken Exp $        */

/*
 * Copyright (c) 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Ichiro FUKUHARA (ichiro@ichiro.org).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * General information: http://www.prolific.com.tw/fr_pl2303.htm
 * http://www.hitachi-hitec.com/jyouhou/prolific/2303.pdf
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uplcom.c,v 1.94 2022/07/06 15:24:14 hannken Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/ioctl.h>
#include <sys/conf.h>
#include <sys/tty.h>
#include <sys/file.h>
#include <sys/select.h>
#include <sys/proc.h>
#include <sys/device.h>
#include <sys/poll.h>
#include <sys/sysctl.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbcdc.h>

#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usb_quirks.h>
#include <dev/usb/usbhist.h>

#include <dev/usb/ucomvar.h>

#ifdef USB_DEBUG
#ifndef UPLCOM_DEBUG
#define uplcomdebug 0
#else
int        uplcomdebug = 0;

SYSCTL_SETUP(sysctl_hw_uplcom_setup, "sysctl hw.uplcom setup")
{
        int err;
        const struct sysctlnode *rnode;
        const struct sysctlnode *cnode;

        err = sysctl_createv(clog, 0, NULL, &rnode,
            CTLFLAG_PERMANENT, CTLTYPE_NODE, "uplcom",
            SYSCTL_DESCR("uplcom global controls"),
            NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);

        if (err)
                goto fail;

        /* control debugging printfs */
        err = sysctl_createv(clog, 0, &rnode, &cnode,
            CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
            "debug", SYSCTL_DESCR("Enable debugging output"),
            NULL, 0, &uplcomdebug, sizeof(uplcomdebug), CTL_CREATE, CTL_EOL);
        if (err)
                goto fail;

        return;
fail:
        aprint_error("%s: sysctl_createv failed (err = %d)\n", __func__, err);
}

#endif /* UCOM_DEBUG */
#endif /* USB_DEBUG */


#define DPRINTF(FMT,A,B,C,D)    USBHIST_LOGN(uplcomdebug,1,FMT,A,B,C,D)
#define DPRINTFN(N,FMT,A,B,C,D) USBHIST_LOGN(uplcomdebug,N,FMT,A,B,C,D)
#define UPLCOMHIST_FUNC()       USBHIST_FUNC()
#define UPLCOMHIST_CALLED(name) USBHIST_CALLED(uplcomdebug)

#define        UPLCOM_CONFIG_INDEX        0
#define        UPLCOM_IFACE_INDEX        0
#define        UPLCOM_SECOND_IFACE_INDEX        1

#define        UPLCOM_SET_REQUEST                0x01
#define        UPLCOM_SET_CRTSCTS_0                0x41
#define        UPLCOM_SET_CRTSCTS_HX                0x61
#define UPLCOM_HX_STATUS_REG                0x8080

#define        UPLCOM_N_SERIAL_CTS                0x80

#define UPLCOM_HXN_SET_REQUEST                0x80
#define UPLCOM_HXN_SET_CRTSCTS_REG        0x0A
#define UPLCOM_HXN_SET_CRTSCTS                0xFA

enum  pl2303_type {
        UPLCOM_TYPE_0,        /* we use this for all non-HX variants */
        UPLCOM_TYPE_HX,
        UPLCOM_TYPE_HXN,
};

struct        uplcom_softc {
        device_t                sc_dev;                /* base device */
        struct usbd_device *        sc_udev;        /* USB device */
        struct usbd_interface *        sc_iface;        /* interface */
        int                        sc_iface_number;        /* interface number */

        struct usbd_interface *        sc_intr_iface;        /* interrupt interface */
        int                        sc_intr_number;        /* interrupt number */
        struct usbd_pipe *        sc_intr_pipe;        /* interrupt pipe */
        u_char                        *sc_intr_buf;        /* interrupt buffer */
        int                        sc_isize;

        usb_cdc_line_state_t        sc_line_state;        /* current line state */
        int                        sc_dtr;                /* current DTR state */
        int                        sc_rts;                /* current RTS state */

        device_t                sc_subdev;        /* ucom device */

        bool                        sc_dying;        /* disconnecting */

        u_char                        sc_lsr;                /* Local status register */
        u_char                        sc_msr;                /* uplcom status register */

        enum pl2303_type        sc_type;        /* PL2303 chip type */
};

/*
 * These are the maximum number of bytes transferred per frame.
 * The output buffer size cannot be increased due to the size encoding.
 */
#define UPLCOMIBUFSIZE 256
#define UPLCOMOBUFSIZE 256

static        usbd_status uplcom_reset(struct uplcom_softc *);
static        usbd_status uplcom_set_line_coding(struct uplcom_softc *,
                                           usb_cdc_line_state_t *);
static        usbd_status uplcom_set_crtscts(struct uplcom_softc *);
static        void uplcom_intr(struct usbd_xfer *, void *, usbd_status);

static        void uplcom_set(void *, int, int, int);
static        void uplcom_dtr(struct uplcom_softc *, int);
static        void uplcom_rts(struct uplcom_softc *, int);
static        void uplcom_break(struct uplcom_softc *, int);
static        void uplcom_set_line_state(struct uplcom_softc *);
static        void uplcom_get_status(void *, int, u_char *, u_char *);
#if TODO
static        int  uplcom_ioctl(void *, int, u_long, void *, int, proc_t *);
#endif
static        int  uplcom_param(void *, int, struct termios *);
static        int  uplcom_open(void *, int);
static        void uplcom_close(void *, int);
static usbd_status uplcom_vendor_control_write(struct usbd_device *, uint16_t, uint16_t);
static void uplcom_close_pipe(struct uplcom_softc *);

static const struct        ucom_methods uplcom_methods = {
        .ucom_get_status = uplcom_get_status,
        .ucom_set = uplcom_set,
        .ucom_param = uplcom_param,
        .ucom_ioctl = NULL,        /* TODO */
        .ucom_open = uplcom_open,
        .ucom_close = uplcom_close,
};

static const struct usb_devno uplcom_devs[] = {
        /* I/O DATA USB-RSAQ2 */
        { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_RSAQ2 },
        /* I/O DATA USB-RSAQ3 */
        { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_RSAQ3 },
        /* I/O DATA USB-RSAQ */
        { USB_VENDOR_IODATA, USB_PRODUCT_IODATA_USBRSAQ },
        /* I/O DATA USB-RSAQ5 */
        { USB_VENDOR_IODATA, USB_PRODUCT_IODATA_USBRSAQ5 },
        /* PLANEX USB-RS232 URS-03 */
        { USB_VENDOR_ATEN, USB_PRODUCT_ATEN_UC232A },
        /* various */
        { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2303 },
        /* SMART Technologies USB to serial */
        { USB_VENDOR_PROLIFIC2, USB_PRODUCT_PROLIFIC2_PL2303 },
        /* IOGEAR/ATENTRIPPLITE */
        { USB_VENDOR_TRIPPLITE, USB_PRODUCT_TRIPPLITE_U209 },
        /* ELECOM UC-SGT */
        { USB_VENDOR_ELECOM, USB_PRODUCT_ELECOM_UCSGT },
        /* ELECOM UC-SGT0 */
        { USB_VENDOR_ELECOM, USB_PRODUCT_ELECOM_UCSGT0 },
        /* Panasonic 50" Touch Panel */
        { USB_VENDOR_PANASONIC, USB_PRODUCT_PANASONIC_TYTP50P6S },
        /* RATOC REX-USB60 */
        { USB_VENDOR_RATOC, USB_PRODUCT_RATOC_REXUSB60 },
        /* TDK USB-PHS Adapter UHA6400 */
        { USB_VENDOR_TDK, USB_PRODUCT_TDK_UHA6400 },
        /* TDK USB-PDC Adapter UPA9664 */
        { USB_VENDOR_TDK, USB_PRODUCT_TDK_UPA9664 },
        /* Sony Ericsson USB Cable */
        { USB_VENDOR_SUSTEEN, USB_PRODUCT_SUSTEEN_DCU10 },
        /* SOURCENEXT KeikaiDenwa 8 */
        { USB_VENDOR_SOURCENEXT, USB_PRODUCT_SOURCENEXT_KEIKAI8 },
        /* SOURCENEXT KeikaiDenwa 8 with charger */
        { USB_VENDOR_SOURCENEXT, USB_PRODUCT_SOURCENEXT_KEIKAI8_CHG },
        /* HAL Corporation Crossam2+USB */
        { USB_VENDOR_HAL, USB_PRODUCT_HAL_IMR001 },
        /* Sitecom USB to serial cable */
        { USB_VENDOR_SITECOM, USB_PRODUCT_SITECOM_CN104 },
        /* Pharos USB GPS - Microsoft version */
        { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2303X },
        /* Willcom WS002IN (DD) */
        { USB_VENDOR_NETINDEX, USB_PRODUCT_NETINDEX_WS002IN },
        /* COREGA CG-USBRS232R */
        { USB_VENDOR_COREGA, USB_PRODUCT_COREGA_CGUSBRS232R },
        /* Sharp CE-175TU (USB to Zaurus option port 15 adapter) */
        { USB_VENDOR_SHARP, USB_PRODUCT_SHARP_CE175TU },
        /* Various */
        { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2303GB },
        { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2303GC },
        { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2303GE },
        { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2303GL },
        { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2303GS },
        { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2303GT },
};
#define uplcom_lookup(v, p) usb_lookup(uplcom_devs, v, p)

static int uplcom_match(device_t, cfdata_t, void *);
static void uplcom_attach(device_t, device_t, void *);
static void uplcom_childdet(device_t, device_t);
static int uplcom_detach(device_t, int);

CFATTACH_DECL2_NEW(uplcom, sizeof(struct uplcom_softc), uplcom_match,
    uplcom_attach, uplcom_detach, NULL, NULL, uplcom_childdet);

static int
uplcom_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return uplcom_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
uplcom_attach(device_t parent, device_t self, void *aux)
{
        struct uplcom_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        usb_device_descriptor_t *ddesc;
        usb_config_descriptor_t *cdesc;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        usb_device_request_t req;
        char *devinfop;
        const char *devname = device_xname(self);
        usbd_status err;
        uint8_t val;
        int i;
        struct ucom_attach_args ucaa;

        UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED();
        DPRINTF("sc=%#jx", (uintptr_t)sc, 0, 0, 0);

        sc->sc_dev = self;
        sc->sc_dying = false;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        sc->sc_udev = dev;

        /* initialize endpoints */
        ucaa.ucaa_bulkin = ucaa.ucaa_bulkout = -1;
        sc->sc_intr_number = -1;
        sc->sc_intr_pipe = NULL;

        /* Move the device into the configured state. */
        err = usbd_set_config_index(dev, UPLCOM_CONFIG_INDEX, 1);
        if (err) {
                aprint_error("\n%s: failed to set configuration, err=%s\n",
                        devname, usbd_errstr(err));
                sc->sc_dying = true;
                return;
        }

        /* determine chip type */
        ddesc = usbd_get_device_descriptor(dev);
        if (ddesc->bDeviceClass != UDCLASS_COMM &&
            ddesc->bMaxPacketSize == 0x40)
                sc->sc_type = UPLCOM_TYPE_HX;

        if (sc->sc_type == UPLCOM_TYPE_HX) {
                req.bmRequestType = UT_READ_VENDOR_DEVICE;
                req.bRequest = UPLCOM_SET_REQUEST;
                USETW(req.wValue, UPLCOM_HX_STATUS_REG);
                USETW(req.wIndex, sc->sc_iface_number);
                USETW(req.wLength, 1);

                err = usbd_do_request(sc->sc_udev, &req, &val);
                if (err)
                        sc->sc_type = UPLCOM_TYPE_HXN;
        }

#ifdef UPLCOM_DEBUG
        /* print the chip type */
        if (sc->sc_type == UPLCOM_TYPE_HXN) {
                DPRINTF("chiptype HXN", 0, 0, 0, 0);
        } else if (sc->sc_type == UPLCOM_TYPE_HX) {
                DPRINTF("chiptype HX", 0, 0, 0, 0);
        } else {
                DPRINTF("chiptype 0", 0, 0, 0, 0);
        }
#endif

        /* Move the device into the configured state. */
        err = usbd_set_config_index(dev, UPLCOM_CONFIG_INDEX, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration: %s\n",
                    usbd_errstr(err));
                sc->sc_dying = true;
                return;
        }

        /* get the config descriptor */
        cdesc = usbd_get_config_descriptor(sc->sc_udev);

        if (cdesc == NULL) {
                aprint_error_dev(self,
                    "failed to get configuration descriptor\n");
                sc->sc_dying = true;
                return;
        }

        /* get the (first/common) interface */
        err = usbd_device2interface_handle(dev, UPLCOM_IFACE_INDEX,
                                                        &sc->sc_iface);
        if (err) {
                aprint_error("\n%s: failed to get interface, err=%s\n",
                        devname, usbd_errstr(err));
                sc->sc_dying = true;
                return;
        }

        /* Find the interrupt endpoints */

        id = usbd_get_interface_descriptor(sc->sc_iface);
        sc->sc_iface_number = id->bInterfaceNumber;

        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "no endpoint descriptor for %d\n", i);
                        sc->sc_dying = true;
                        return;
                }

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        sc->sc_intr_number = ed->bEndpointAddress;
                        sc->sc_isize = UGETW(ed->wMaxPacketSize);
                }
        }

        if (sc->sc_intr_number== -1) {
                aprint_error_dev(self, "Could not find interrupt in\n");
                sc->sc_dying = true;
                return;
        }

        /* keep interface for interrupt */
        sc->sc_intr_iface = sc->sc_iface;

        /*
         * USB-RSAQ1 has two interface
         *
         *  USB-RSAQ1       | USB-RSAQ2
         * -----------------+-----------------
         * Interface 0      |Interface 0
         *  Interrupt(0x81) | Interrupt(0x81)
         * -----------------+ BulkIN(0x02)
         * Interface 1            | BulkOUT(0x83)
         *   BulkIN(0x02)   |
         *   BulkOUT(0x83)  |
         */
        if (cdesc->bNumInterface == 2) {
                err = usbd_device2interface_handle(dev,
                                UPLCOM_SECOND_IFACE_INDEX, &sc->sc_iface);
                if (err) {
                        aprint_error("\n%s: failed to get second interface, "
                            "err=%s\n", devname, usbd_errstr(err));
                        sc->sc_dying = true;
                        return;
                }
        }

        /* Find the bulk{in,out} endpoints */

        id = usbd_get_interface_descriptor(sc->sc_iface);
        sc->sc_iface_number = id->bInterfaceNumber;

        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "no endpoint descriptor for %d\n", i);
                        sc->sc_dying = true;
                        return;
                }

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        ucaa.ucaa_bulkin = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        ucaa.ucaa_bulkout = ed->bEndpointAddress;
                }
        }

        if (ucaa.ucaa_bulkin == -1) {
                aprint_error_dev(self, "Could not find data bulk in\n");
                sc->sc_dying = true;
                return;
        }

        if (ucaa.ucaa_bulkout == -1) {
                aprint_error_dev(self, "Could not find data bulk out\n");
                sc->sc_dying = true;
                return;
        }

        sc->sc_dtr = sc->sc_rts = -1;
        ucaa.ucaa_portno = UCOM_UNK_PORTNO;
        /* ucaa_bulkin, ucaa_bulkout set above */
        ucaa.ucaa_ibufsize = UPLCOMIBUFSIZE;
        ucaa.ucaa_obufsize = UPLCOMOBUFSIZE;
        ucaa.ucaa_ibufsizepad = UPLCOMIBUFSIZE;
        ucaa.ucaa_opkthdrlen = 0;
        ucaa.ucaa_device = dev;
        ucaa.ucaa_iface = sc->sc_iface;
        ucaa.ucaa_methods = &uplcom_methods;
        ucaa.ucaa_arg = sc;
        ucaa.ucaa_info = NULL;

        err = uplcom_reset(sc);

        if (err) {
                aprint_error_dev(self, "reset failed, %s\n", usbd_errstr(err));
                sc->sc_dying = true;
                return;
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        DPRINTF("in=%#jx out=%#jx intr=%#jx",
            ucaa.ucaa_bulkin, ucaa.ucaa_bulkout, sc->sc_intr_number, 0);
        sc->sc_subdev = config_found(self, &ucaa, ucomprint,
            CFARGS(.submatch = ucomsubmatch));

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        return;
}

static void
uplcom_childdet(device_t self, device_t child)
{
        struct uplcom_softc *sc = device_private(self);

        UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED();

        KASSERT(sc->sc_subdev == child);
        sc->sc_subdev = NULL;
}
 
static void
uplcom_close_pipe(struct uplcom_softc *sc)
{

        if (sc->sc_intr_pipe != NULL) {
                usbd_abort_pipe(sc->sc_intr_pipe);
                usbd_close_pipe(sc->sc_intr_pipe);
                sc->sc_intr_pipe = NULL;
        }
        if (sc->sc_intr_buf != NULL) {
                kmem_free(sc->sc_intr_buf, sc->sc_isize);
                sc->sc_intr_buf = NULL;
        }
}

static int
uplcom_detach(device_t self, int flags)
{
        struct uplcom_softc *sc = device_private(self);
        int rv = 0;

        UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED();
        DPRINTF("sc=%#jx flags=%jd", (uintptr_t)sc, flags, 0, 0);

        sc->sc_dying = true;
 
        uplcom_close_pipe(sc);

        if (sc->sc_subdev != NULL) {
                rv = config_detach(sc->sc_subdev, flags);
                sc->sc_subdev = NULL;
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        pmf_device_deregister(self);

        return rv;
}

usbd_status
uplcom_reset(struct uplcom_softc *sc)
{
        usb_device_request_t req;
        usbd_status err;

        if (sc->sc_type == UPLCOM_TYPE_HXN)
                return 0;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = UPLCOM_SET_REQUEST;
        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_iface_number);
        USETW(req.wLength, 0);

        err = usbd_do_request(sc->sc_udev, &req, 0);
        if (err)
                return EIO;

        return 0;
}

struct pl2303x_init {
        uint8_t                req_type;
        uint8_t                request;
        uint16_t        value;
        uint16_t        index;
};

static const struct pl2303x_init pl2303x[] = {
        { UT_READ_VENDOR_DEVICE,  UPLCOM_SET_REQUEST, 0x8484,    0 },
        { UT_WRITE_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 0x0404,    0 },
        { UT_READ_VENDOR_DEVICE,  UPLCOM_SET_REQUEST, 0x8484,    0 },
        { UT_READ_VENDOR_DEVICE,  UPLCOM_SET_REQUEST, 0x8383,    0 },
        { UT_READ_VENDOR_DEVICE,  UPLCOM_SET_REQUEST, 0x8484,    0 },
        { UT_WRITE_VENDOR_DEVICE, UPLCOM_SET_REQUEST, 0x0404,    1 },
        { UT_READ_VENDOR_DEVICE,  UPLCOM_SET_REQUEST, 0x8484,    0 },
        { UT_READ_VENDOR_DEVICE,  UPLCOM_SET_REQUEST, 0x8383,    0 },
        { UT_WRITE_VENDOR_DEVICE, UPLCOM_SET_REQUEST,      0,    1 },
        { UT_WRITE_VENDOR_DEVICE, UPLCOM_SET_REQUEST,      1,    0 },
        { UT_WRITE_VENDOR_DEVICE, UPLCOM_SET_REQUEST,      2, 0x44 }
};
#define N_PL2302X_INIT  (sizeof(pl2303x)/sizeof(pl2303x[0]))

static usbd_status
uplcom_pl2303x_init(struct uplcom_softc *sc)
{
        usb_device_request_t req;
        usbd_status err;
        int i;

        for (i = 0; i < N_PL2302X_INIT; i++) {
                char buf[1];
                void *b;

                req.bmRequestType = pl2303x[i].req_type;
                req.bRequest = pl2303x[i].request;
                USETW(req.wValue, pl2303x[i].value);
                USETW(req.wIndex, pl2303x[i].index);
                if (UT_GET_DIR(req.bmRequestType) == UT_READ) {
                        b = buf;
                        USETW(req.wLength, sizeof(buf));
                } else {
                        b = NULL;
                        USETW(req.wLength, 0);
                }

                err = usbd_do_request(sc->sc_udev, &req, b);
                if (err) {
                        aprint_error_dev(sc->sc_dev,
                            "uplcom_pl2303x_init failed: %s\n",
                            usbd_errstr(err));
                        return EIO;
                }
        }

        return 0;
}

static void
uplcom_set_line_state(struct uplcom_softc *sc)
{
        usb_device_request_t req;
        int ls;

        /* make sure we have initialized state for sc_dtr and sc_rts */
        if (sc->sc_dtr == -1)
                sc->sc_dtr = 0;
        if (sc->sc_rts == -1)
                sc->sc_rts = 0;

        ls = (sc->sc_dtr ? UCDC_LINE_DTR : 0) |
                (sc->sc_rts ? UCDC_LINE_RTS : 0);

        req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
        req.bRequest = UCDC_SET_CONTROL_LINE_STATE;
        USETW(req.wValue, ls);
        USETW(req.wIndex, sc->sc_iface_number);
        USETW(req.wLength, 0);

        (void)usbd_do_request(sc->sc_udev, &req, 0);
}

static void
uplcom_set(void *addr, int portno, int reg, int onoff)
{
        struct uplcom_softc *sc = addr;

        if (sc->sc_dying)
                return;

        switch (reg) {
        case UCOM_SET_DTR:
                uplcom_dtr(sc, onoff);
                break;
        case UCOM_SET_RTS:
                uplcom_rts(sc, onoff);
                break;
        case UCOM_SET_BREAK:
                uplcom_break(sc, onoff);
                break;
        default:
                break;
        }
}

static void
uplcom_dtr(struct uplcom_softc *sc, int onoff)
{

        UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED();
        DPRINTF("onoff=%jd", onoff, 0, 0, 0);

        if (sc->sc_dtr != -1 && !sc->sc_dtr == !onoff)
                return;

        sc->sc_dtr = !!onoff;

        uplcom_set_line_state(sc);
}

static void
uplcom_rts(struct uplcom_softc *sc, int onoff)
{
        UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED();
        DPRINTF("onoff=%jd", onoff, 0, 0, 0);

        if (sc->sc_rts != -1 && !sc->sc_rts == !onoff)
                return;

        sc->sc_rts = !!onoff;

        uplcom_set_line_state(sc);
}

static void
uplcom_break(struct uplcom_softc *sc, int onoff)
{
        usb_device_request_t req;

        UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED();
        DPRINTF("onoff=%jd", onoff, 0, 0, 0);

        req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
        req.bRequest = UCDC_SEND_BREAK;
        USETW(req.wValue, onoff ? UCDC_BREAK_ON : UCDC_BREAK_OFF);
        USETW(req.wIndex, sc->sc_iface_number);
        USETW(req.wLength, 0);

        (void)usbd_do_request(sc->sc_udev, &req, 0);
}

static usbd_status
uplcom_set_crtscts(struct uplcom_softc *sc)
{
        usb_device_request_t req;
        usbd_status err;

        UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED();

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        if (sc->sc_type == UPLCOM_TYPE_HXN) {
                req.bRequest = UPLCOM_HXN_SET_REQUEST;
                USETW(req.wValue, UPLCOM_HXN_SET_CRTSCTS_REG);
        } else {
                req.bRequest = UPLCOM_SET_REQUEST;
                USETW(req.wValue, 0);
        }

        if (sc->sc_type == UPLCOM_TYPE_HXN)
                USETW(req.wIndex, UPLCOM_HXN_SET_CRTSCTS);
        else if (sc->sc_type == UPLCOM_TYPE_HX)
                USETW(req.wIndex, UPLCOM_SET_CRTSCTS_HX);
        else
                USETW(req.wIndex, UPLCOM_SET_CRTSCTS_0);
        USETW(req.wLength, 0);

        err = usbd_do_request(sc->sc_udev, &req, 0);
        if (err) {
                DPRINTF("failed, err=%jd", err, 0, 0, 0);
                return err;
        }

        return USBD_NORMAL_COMPLETION;
}

static usbd_status
uplcom_set_line_coding(struct uplcom_softc *sc, usb_cdc_line_state_t *state)
{
        usb_device_request_t req;
        usbd_status err;

        UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED();

        DPRINTF("rate=%jd fmt=%jd parity=%jd bits=%jd",
                UGETDW(state->dwDTERate), state->bCharFormat,
                state->bParityType, state->bDataBits);

        if (memcmp(state, &sc->sc_line_state, UCDC_LINE_STATE_LENGTH) == 0) {
                DPRINTF("already set", 0, 0, 0, 0);
                return USBD_NORMAL_COMPLETION;
        }

        req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
        req.bRequest = UCDC_SET_LINE_CODING;
        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_iface_number);
        USETW(req.wLength, UCDC_LINE_STATE_LENGTH);

        err = usbd_do_request(sc->sc_udev, &req, state);
        if (err) {
                DPRINTF("failed, err=%ju", err, 0, 0, 0);
                return err;
        }

        sc->sc_line_state = *state;

        return USBD_NORMAL_COMPLETION;
}

static int
uplcom_param(void *addr, int portno, struct termios *t)
{
        struct uplcom_softc *sc = addr;
        usbd_status err;
        usb_cdc_line_state_t ls;

        UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED();
        DPRINTF("sc=%#jx", (uintptr_t)sc, 0, 0, 0);

        if (sc->sc_dying)
                return EIO;

        USETDW(ls.dwDTERate, t->c_ospeed);
        if (ISSET(t->c_cflag, CSTOPB))
                ls.bCharFormat = UCDC_STOP_BIT_2;
        else
                ls.bCharFormat = UCDC_STOP_BIT_1;
        if (ISSET(t->c_cflag, PARENB)) {
                if (ISSET(t->c_cflag, PARODD))
                        ls.bParityType = UCDC_PARITY_ODD;
                else
                        ls.bParityType = UCDC_PARITY_EVEN;
        } else
                ls.bParityType = UCDC_PARITY_NONE;
        switch (ISSET(t->c_cflag, CSIZE)) {
        case CS5:
                ls.bDataBits = 5;
                break;
        case CS6:
                ls.bDataBits = 6;
                break;
        case CS7:
                ls.bDataBits = 7;
                break;
        case CS8:
                ls.bDataBits = 8;
                break;
        }

        err = uplcom_set_line_coding(sc, &ls);
        if (err) {
                DPRINTF("err=%jd", err, 0, 0, 0);
                return EIO;
        }

        if (ISSET(t->c_cflag, CRTSCTS))
                uplcom_set_crtscts(sc);

        if (sc->sc_rts == -1 || sc->sc_dtr == -1)
                uplcom_set_line_state(sc);

        if (err) {
                DPRINTF("err=%jd", err, 0, 0, 0);
                return EIO;
        }

        return 0;
}

static usbd_status
uplcom_vendor_control_write(struct usbd_device *dev, uint16_t value,
    uint16_t index)
{
        usb_device_request_t req;
        usbd_status err;

        UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED();

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = UPLCOM_SET_REQUEST;
        USETW(req.wValue, value);
        USETW(req.wIndex, index);
        USETW(req.wLength, 0);

        err = usbd_do_request(dev, &req, NULL);

        if (err) {
                DPRINTF("vendor write failed, err=%jd", err, 0, 0, 0);
        }

        return err;
}

static int
uplcom_open(void *addr, int portno)
{
        struct uplcom_softc *sc = addr;
        usbd_status err = 0;
 
        UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED();
        DPRINTF("sc=%#jx", (uintptr_t)sc, 0, 0, 0);

        if (sc->sc_dying)
                return EIO;

        /* Some unknown device frobbing. */
        if (sc->sc_type == UPLCOM_TYPE_HX)
                uplcom_vendor_control_write(sc->sc_udev, 2, 0x44);
        else
                uplcom_vendor_control_write(sc->sc_udev, 2, 0x24);

        if (sc->sc_intr_number != -1 && sc->sc_intr_pipe == NULL) {
                sc->sc_intr_buf = kmem_alloc(sc->sc_isize, KM_SLEEP);
                err = usbd_open_pipe_intr(sc->sc_intr_iface, sc->sc_intr_number,
                        USBD_SHORT_XFER_OK, &sc->sc_intr_pipe, sc,
                        sc->sc_intr_buf, sc->sc_isize,
                        uplcom_intr, USBD_DEFAULT_INTERVAL);
                if (err) {
                        DPRINTF("cannot open interrupt pipe (addr %jd)",
                                sc->sc_intr_number, 0, 0, 0);
                }
        }

        if (err == 0 && sc->sc_type == UPLCOM_TYPE_HX)
                err = uplcom_pl2303x_init(sc);

        return err;
}

static void
uplcom_close(void *addr, int portno)
{
        struct uplcom_softc *sc = addr;

        UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED();
        DPRINTF("sc=%#jx", (uintptr_t)sc, 0, 0, 0);

        if (sc->sc_dying)
                return;

        uplcom_close_pipe(sc);
}

static void
uplcom_intr(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct uplcom_softc *sc = priv;
        u_char *buf = sc->sc_intr_buf;
        u_char pstatus;

        UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED();

        if (sc->sc_dying)
                return;

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;

                DPRINTF("abnormal status: %ju", status, 0, 0, 0);
                usbd_clear_endpoint_stall_async(sc->sc_intr_pipe);
                return;
        }

        DPRINTF("uplcom status = %02jx", buf[8], 0, 0, 0);

        sc->sc_lsr = sc->sc_msr = 0;
        pstatus = buf[8];
        if (ISSET(pstatus, UPLCOM_N_SERIAL_CTS))
                sc->sc_msr |= UMSR_CTS;
        if (ISSET(pstatus, UCDC_N_SERIAL_RI))
                sc->sc_msr |= UMSR_RI;
        if (ISSET(pstatus, UCDC_N_SERIAL_DSR))
                sc->sc_msr |= UMSR_DSR;
        if (ISSET(pstatus, UCDC_N_SERIAL_DCD))
                sc->sc_msr |= UMSR_DCD;
        ucom_status_change(device_private(sc->sc_subdev));
}

static void
uplcom_get_status(void *addr, int portno, u_char *lsr, u_char *msr)
{
        struct uplcom_softc *sc = addr;

        UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED();

        if (sc->sc_dying)
                return;

        *lsr = sc->sc_lsr;
        *msr = sc->sc_msr;
}

#if TODO
static int
uplcom_ioctl(void *addr, int portno, u_long cmd, void *data, int flag,
             proc_t *p)
{
        struct uplcom_softc *sc = addr;
        int error = 0;

        UPLCOMHIST_FUNC(); UPLCOMHIST_CALLED();

        if (sc->sc_dying)
                return EIO;

        DPRINTF("cmd=0x%08lx", cmd, 0, 0, 0);

        switch (cmd) {
        case TIOCNOTTY:
        case TIOCMGET:
        case TIOCMSET:
        case USB_GET_CM_OVER_DATA:
        case USB_SET_CM_OVER_DATA:
                break;

        default:
                DPRINTF("unknown", 0, 0, 0, 0);
                error = ENOTTY;
                break;
        }

        return error;
}
#endif

















































































































































    3 
    3 




































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
/*        $NetBSD: uipaq.c,v 1.30 2021/08/07 16:19:17 thorpej Exp $        */
/*        $OpenBSD: uipaq.c,v 1.1 2005/06/17 23:50:33 deraadt Exp $        */

/*
 * Copyright (c) 2000-2005 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * iPAQ driver
 *
 * 19 July 2003:        Incorporated changes suggested by Sam Lawrance from
 *                         the uppc module
 *
 *
 * Contact isis@cs.umd.edu if you have any questions/comments about this driver
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uipaq.c,v 1.30 2021/08/07 16:19:17 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/conf.h>
#include <sys/tty.h>

#include <dev/usb/usb.h>

#include <dev/usb/usbcdc.h>        /*UCDC_* stuff */

#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <dev/usb/ucomvar.h>

#ifdef UIPAQ_DEBUG
#define DPRINTF(x)        if (uipaqdebug) printf x
#define DPRINTFN(n,x)        if (uipaqdebug>(n)) printf x
int uipaqdebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

#define UIPAQ_CONFIG_NO                1
#define UIPAQ_IFACE_INDEX        0

#define UIPAQIBUFSIZE 1024
#define UIPAQOBUFSIZE 1024

struct uipaq_softc {
        device_t                sc_dev;                /* base device */
        struct usbd_device *        sc_udev;        /* device */
        struct usbd_interface *        sc_iface;        /* interface */

        device_t                sc_subdev;        /* ucom uses that */
        uint16_t                sc_lcr;                /* state for DTR/RTS */

        uint16_t                sc_flags;

        bool                        sc_dying;
};

/* Callback routines */
static void        uipaq_set(void *, int, int, int);
static int        uipaq_open(void *, int);


/* Support routines. */
/* based on uppc module by Sam Lawrance */
static void        uipaq_dtr(struct uipaq_softc *, int);
static void        uipaq_rts(struct uipaq_softc *, int);
static void        uipaq_break(struct uipaq_softc *, int);


static const struct ucom_methods uipaq_methods = {
        .ucom_set = uipaq_set,
        .ucom_open = uipaq_open,
};

struct uipaq_type {
        struct usb_devno        uv_dev;
        uint16_t                uv_flags;
};

static const struct uipaq_type uipaq_devs[] = {
        {{ USB_VENDOR_HP, USB_PRODUCT_HP_2215 }, 0 },
        {{ USB_VENDOR_HP, USB_PRODUCT_HP_568J }, 0},
        {{ USB_VENDOR_COMPAQ, USB_PRODUCT_COMPAQ_IPAQPOCKETPC} , 0},
        {{ USB_VENDOR_CASIO, USB_PRODUCT_CASIO_BE300} , 0},
        {{ USB_VENDOR_SHARP, USB_PRODUCT_SHARP_WS007SH} , 0},
        {{ USB_VENDOR_SHARP, USB_PRODUCT_SHARP_WS011SH} , 0}
};

#define uipaq_lookup(v, p) ((const struct uipaq_type *)usb_lookup(uipaq_devs, v, p))

static int uipaq_match(device_t, cfdata_t, void *);
static void uipaq_attach(device_t, device_t, void *);
static void uipaq_childdet(device_t, device_t);
static int uipaq_detach(device_t, int);

CFATTACH_DECL2_NEW(uipaq, sizeof(struct uipaq_softc), uipaq_match,
    uipaq_attach, uipaq_detach, NULL, NULL, uipaq_childdet);

static int
uipaq_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        DPRINTFN(20,("uipaq: vendor=%#x, product=%#x\n",
            uaa->uaa_vendor, uaa->uaa_product));

        return uipaq_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
            UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
uipaq_attach(device_t parent, device_t self, void *aux)
{
        struct uipaq_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        struct usbd_interface *iface;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        const char *devname = device_xname(self);
        int i;
        usbd_status err;
        struct ucom_attach_args ucaa;

        DPRINTFN(10,("\nuipaq_attach: sc=%p\n", sc));

        sc->sc_dev = self;
        sc->sc_dying = false;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        /* Move the device into the configured state. */
        err = usbd_set_config_no(dev, UIPAQ_CONFIG_NO, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration, err=%s\n",
                    usbd_errstr(err));
                goto bad;
        }

        err = usbd_device2interface_handle(dev, UIPAQ_IFACE_INDEX, &iface);
        if (err) {
                aprint_error("\n%s: failed to get interface, err=%s\n",
                    devname, usbd_errstr(err));
                goto bad;
        }

        sc->sc_flags = uipaq_lookup(uaa->uaa_vendor, uaa->uaa_product)->uv_flags;

        id = usbd_get_interface_descriptor(iface);

        sc->sc_udev = dev;
        sc->sc_iface = iface;

        ucaa.ucaa_ibufsize = UIPAQIBUFSIZE;
        ucaa.ucaa_obufsize = UIPAQOBUFSIZE;
        ucaa.ucaa_ibufsizepad = UIPAQIBUFSIZE;
        ucaa.ucaa_opkthdrlen = 0;
        ucaa.ucaa_device = dev;
        ucaa.ucaa_iface = iface;
        ucaa.ucaa_methods = &uipaq_methods;
        ucaa.ucaa_arg = sc;
        ucaa.ucaa_portno = UCOM_UNK_PORTNO;
        ucaa.ucaa_info = "Generic";

/*        err = uipaq_init(sc);
        if (err) {
                printf("%s: init failed, %s\n", device_xname(sc->sc_dev),
                    usbd_errstr(err));
                goto bad;
        }*/

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        ucaa.ucaa_bulkin = ucaa.ucaa_bulkout = -1;
        for (i=0; i<id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "no endpoint descriptor for %d\n", i);
                        goto bad;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    (ed->bmAttributes & UE_XFERTYPE) == UE_BULK) {
                        ucaa.ucaa_bulkin = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                    (ed->bmAttributes & UE_XFERTYPE) == UE_BULK) {
                        ucaa.ucaa_bulkout = ed->bEndpointAddress;
                }
        }
        if (ucaa.ucaa_bulkin == -1 || ucaa.ucaa_bulkout == -1) {
                aprint_error_dev(self, "no proper endpoints found (%d,%d) \n",
                    ucaa.ucaa_bulkin, ucaa.ucaa_bulkout);
                return;
        }

        sc->sc_subdev = config_found(self, &ucaa, ucomprint,
            CFARGS(.submatch = ucomsubmatch));

        return;

bad:
        DPRINTF(("uipaq_attach: ATTACH ERROR\n"));
        sc->sc_dying = true;
        return;
}


void
uipaq_dtr(struct uipaq_softc* sc, int onoff)
{
        usb_device_request_t req;
        usbd_status err;
        int retries = 3;

        DPRINTF(("%s: uipaq_dtr: onoff=%x\n", device_xname(sc->sc_dev), onoff));

        /* Avoid sending unnecessary requests */
        if (onoff && (sc->sc_lcr & UCDC_LINE_DTR))
                return;
        if (!onoff && !(sc->sc_lcr & UCDC_LINE_DTR))
                return;

        /* Other parameters depend on reg */
        req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
        req.bRequest = UCDC_SET_CONTROL_LINE_STATE;
        sc->sc_lcr = onoff ? sc->sc_lcr | UCDC_LINE_DTR
            : sc->sc_lcr & ~UCDC_LINE_DTR;
        USETW(req.wValue, sc->sc_lcr);
        USETW(req.wIndex, 0x0);
        USETW(req.wLength, 0);

        /* Fire off the request a few times if necessary */
        while (retries) {
                err = usbd_do_request(sc->sc_udev, &req, NULL);
                if (!err)
                        break;
                retries--;
        }
}


void
uipaq_rts(struct uipaq_softc* sc, int onoff)
{
        usb_device_request_t req;
        usbd_status err;
        int retries = 3;

        DPRINTF(("%s: uipaq_rts: onoff=%x\n", device_xname(sc->sc_dev), onoff));

        /* Avoid sending unnecessary requests */
        if (onoff && (sc->sc_lcr & UCDC_LINE_RTS)) return;
        if (!onoff && !(sc->sc_lcr & UCDC_LINE_RTS)) return;

        req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
        req.bRequest = UCDC_SET_CONTROL_LINE_STATE;
        sc->sc_lcr = onoff ? sc->sc_lcr | UCDC_LINE_RTS
            : sc->sc_lcr & ~UCDC_LINE_RTS;
        USETW(req.wValue, sc->sc_lcr);
        USETW(req.wIndex, 0x0);
        USETW(req.wLength, 0);

        while (retries) {
                err = usbd_do_request(sc->sc_udev, &req, NULL);
                if (!err)
                        break;
                retries--;
        }
}


void
uipaq_break(struct uipaq_softc* sc, int onoff)
{
        usb_device_request_t req;
        usbd_status err;
        int retries = 3;

        DPRINTF(("%s: uipaq_break: onoff=%x\n", device_xname(sc->sc_dev),
            onoff));

        req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
        req.bRequest = UCDC_SEND_BREAK;

        USETW(req.wValue, onoff ? UCDC_BREAK_ON : UCDC_BREAK_OFF);
        USETW(req.wIndex, 0x0);
        USETW(req.wLength, 0);

        while (retries) {
                err = usbd_do_request(sc->sc_udev, &req, NULL);
                if (!err)
                        break;
                retries--;
        }
}


void
uipaq_set(void *addr, int portno, int reg, int onoff)
{
        struct uipaq_softc* sc = addr;

        if (sc->sc_dying)
                return;

        switch (reg) {
        case UCOM_SET_DTR:
                uipaq_dtr(addr, onoff);
                break;
        case UCOM_SET_RTS:
                uipaq_rts(addr, onoff);
                break;
        case UCOM_SET_BREAK:
                uipaq_break(addr, onoff);
                break;
        default:
                aprint_error_dev(sc->sc_dev,
                    "unhandled set request: reg=%x onoff=%x\n", reg, onoff);
                return;
        }
}

static int
uipaq_open(void *arg, int portno)
{
        struct uipaq_softc *sc = arg;

        if (sc->sc_dying)
                return EIO;

        return 0;
}

static void
uipaq_childdet(device_t self, device_t child)
{
        struct uipaq_softc *sc = device_private(self);

        KASSERT(sc->sc_subdev == child);
        sc->sc_subdev = NULL;
}

static int
uipaq_detach(device_t self, int flags)
{
        struct uipaq_softc *sc = device_private(self);
        int rv = 0;

        DPRINTF(("uipaq_detach: sc=%p flags=%d\n", sc, flags));

        sc->sc_dying = true;

        if (sc->sc_subdev != NULL) {
                rv |= config_detach(sc->sc_subdev, flags);
                sc->sc_subdev = NULL;
        }
        if (sc->sc_udev != NULL)
                usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev,
                    sc->sc_dev);

        return rv;
}












































































































































    3 





    3 




































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
/*        $NetBSD: ufs_vfsops.c,v 1.60 2020/05/01 08:43:37 hannken Exp $        */

/*
 * Copyright (c) 1991, 1993, 1994
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ufs_vfsops.c        8.8 (Berkeley) 5/20/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_vfsops.c,v 1.60 2020/05/01 08:43:37 hannken Exp $");

#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
#include "opt_quota.h"
#include "opt_wapbl.h"
#endif

#include <sys/param.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/module.h>
#include <sys/vnode.h>
#include <sys/kmem.h>
#include <sys/kauth.h>

#include <miscfs/specfs/specdev.h>

#include <sys/quotactl.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#ifdef UFS_DIRHASH
#include <ufs/ufs/dirhash.h>
#endif

/* how many times ufs_init() was called */
static int ufs_initcount = 0;

pool_cache_t ufs_direct_cache;

/*
 * Make a filesystem operational.
 * Nothing to do at the moment.
 */
/* ARGSUSED */
int
ufs_start(struct mount *mp, int flags)
{

        return (0);
}

/*
 * Return the root of a filesystem.
 */
int
ufs_root(struct mount *mp, int lktype, struct vnode **vpp)
{
        struct vnode *nvp;
        int error;

        if ((error = VFS_VGET(mp, (ino_t)UFS_ROOTINO, lktype, &nvp)) != 0)
                return (error);
        *vpp = nvp;
        return (0);
}

/*
 * Look up and return a vnode/inode pair by inode number.
 */
int
ufs_vget(struct mount *mp, ino_t ino, int lktype, struct vnode **vpp)
{
        int error;

        error = vcache_get(mp, &ino, sizeof(ino), vpp);
        if (error)
                return error;
        error = vn_lock(*vpp, lktype);
        if (error) {
                vrele(*vpp);
                *vpp = NULL;
                return error;
        }
        return 0;
}

/*
 * Do operations associated with quotas
 */
int
ufs_quotactl(struct mount *mp, struct quotactl_args *args)
{

#if !defined(QUOTA) && !defined(QUOTA2)
        (void) mp;
        (void) args;
        return (EOPNOTSUPP);
#else
        struct lwp *l = curlwp;
        int error;

        /* Mark the mount busy, as we're passing it to kauth(9). */
        error = vfs_busy(mp);
        if (error) {
                return (error);
        }
        mutex_enter(mp->mnt_updating);

        error = quota_handle_cmd(mp, l, args);

        mutex_exit(mp->mnt_updating);
        vfs_unbusy(mp);
        return (error);
#endif
}
        
#if 0
        switch (cmd) {
        case Q_SYNC:
                break;

        case Q_GETQUOTA:
                /* The user can always query about his own quota. */
                if (uid == kauth_cred_getuid(l->l_cred))
                        break;

                error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA,
                    KAUTH_REQ_SYSTEM_FS_QUOTA_GET, mp, KAUTH_ARG(uid), NULL);

                break;

        case Q_QUOTAON:
        case Q_QUOTAOFF:
                error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA,
                    KAUTH_REQ_SYSTEM_FS_QUOTA_ONOFF, mp, NULL, NULL);

                break;

        case Q_SETQUOTA:
        case Q_SETUSE:
                error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA,
                    KAUTH_REQ_SYSTEM_FS_QUOTA_MANAGE, mp, KAUTH_ARG(uid), NULL);

                break;

        default:
                error = EINVAL;
                break;
        }

        type = cmds & SUBCMDMASK;
        if (!error) {
                /* Only check if there was no error above. */
                if ((u_int)type >= MAXQUOTAS)
                        error = EINVAL;
        }

        if (error) {
                vfs_unbusy(mp);
                return (error);
        }

        mutex_enter(mp->mnt_updating);
        switch (cmd) {

        case Q_QUOTAON:
                error = quotaon(l, mp, type, arg);
                break;

        case Q_QUOTAOFF:
                error = quotaoff(l, mp, type);
                break;

        case Q_SETQUOTA:
                error = setquota(mp, uid, type, arg);
                break;

        case Q_SETUSE:
                error = setuse(mp, uid, type, arg);
                break;

        case Q_GETQUOTA:
                error = getquota(mp, uid, type, arg);
                break;

        case Q_SYNC:
                error = qsync(mp);
                break;

        default:
                error = EINVAL;
        }
        mutex_exit(mp->mnt_updating);
        vfs_unbusy(mp);
        return (error);
#endif

/*
 * This is the generic part of fhtovp called after the underlying
 * filesystem has validated the file handle.
 */
int
ufs_fhtovp(struct mount *mp, struct ufid *ufhp, int lktype, struct vnode **vpp)
{
        struct vnode *nvp;
        struct inode *ip;
        int error;

        if ((error = VFS_VGET(mp, ufhp->ufid_ino, lktype, &nvp)) != 0) {
                if (error == ENOENT)
                        error = ESTALE;
                *vpp = NULLVP;
                return (error);
        }
        ip = VTOI(nvp);
        KASSERT(ip != NULL);
        if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen ||
            ((ip->i_mode & IFMT) == IFDIR && ip->i_size == 0)) {
                vput(nvp);
                *vpp = NULLVP;
                return (ESTALE);
        }
        *vpp = nvp;
        return (0);
}

/*
 * Initialize UFS filesystems, done only once.
 */
void
ufs_init(void)
{
        if (ufs_initcount++ > 0)
                return;

        ufs_direct_cache = pool_cache_init(sizeof(struct direct), 0, 0, 0,
            "ufsdir", NULL, IPL_NONE, NULL, NULL, NULL);

#if defined(QUOTA) || defined(QUOTA2)
        dqinit();
#endif
#ifdef UFS_DIRHASH
        ufsdirhash_init();
#endif
#ifdef UFS_EXTATTR
        ufs_extattr_init();
#endif
}

void
ufs_reinit(void)
{
#if defined(QUOTA) || defined(QUOTA2)
        dqreinit();
#endif
}

/*
 * Free UFS filesystem resources, done only once.
 */
void
ufs_done(void)
{
        if (--ufs_initcount > 0)
                return;

#if defined(QUOTA) || defined(QUOTA2)
        dqdone();
#endif
        pool_cache_destroy(ufs_direct_cache);
#ifdef UFS_DIRHASH
        ufsdirhash_done();
#endif
#ifdef UFS_EXTATTR
        ufs_extattr_done();
#endif
}

/*
 * module interface
 */

#ifdef WAPBL
MODULE(MODULE_CLASS_MISC, ufs, "wapbl");
#else
MODULE(MODULE_CLASS_MISC, ufs, NULL);
#endif

static int
ufs_modcmd(modcmd_t cmd, void *arg)
{
        int error;
 
        switch (cmd) {
        case MODULE_CMD_INIT:
                ufs_init();
                error = 0;
                break;
        case MODULE_CMD_FINI:
                ufs_done();
                error = 0;
                break;
        default:
                error = ENOTTY;
                break;
        }

        return error;
}







































































































































    2 




    2 


















































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
/* $NetBSD: irmce.c,v 1.8 2021/08/07 16:19:16 thorpej Exp $ */

/*-
 * Copyright (c) 2011 Jared D. McNeill <jmcneill@invisible.ca>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * IR receiver/transceiver for Windows Media Center
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: irmce.c,v 1.8 2021/08/07 16:19:16 thorpej Exp $");

#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/conf.h>
#include <sys/bus.h>
#include <sys/select.h>
#include <sys/module.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <dev/ir/ir.h>
#include <dev/ir/cirio.h>
#include <dev/ir/cirvar.h>

enum irmce_state {
        IRMCE_STATE_HEADER,
        IRMCE_STATE_IRDATA,
        IRMCE_STATE_CMDHEADER,
        IRMCE_STATE_CMDDATA,
};

struct irmce_softc {
        device_t                sc_dev;
        device_t                sc_cirdev;

        struct usbd_device *        sc_udev;
        struct usbd_interface *        sc_iface;

        int                        sc_bulkin_ep;
        uint16_t                sc_bulkin_maxpktsize;
        struct usbd_pipe *        sc_bulkin_pipe;
        struct usbd_xfer *        sc_bulkin_xfer;
        uint8_t *                sc_bulkin_buffer;

        int                        sc_bulkout_ep;
        uint16_t                sc_bulkout_maxpktsize;
        struct usbd_pipe *        sc_bulkout_pipe;
        struct usbd_xfer *        sc_bulkout_xfer;
        uint8_t *                sc_bulkout_buffer;

        bool                        sc_raw;

        uint8_t                        sc_ir_buf[16];
        size_t                        sc_ir_bufused;
        size_t                        sc_ir_resid;
        enum irmce_state        sc_ir_state;
        uint8_t                        sc_ir_header;

        bool                        sc_rc6_hb[256];
        size_t                        sc_rc6_nhb;
};

static int        irmce_match(device_t, cfdata_t, void *);
static void        irmce_attach(device_t, device_t, void *);
static int        irmce_detach(device_t, int);
static void        irmce_childdet(device_t, device_t);
static int        irmce_activate(device_t, enum devact);
static int        irmce_rescan(device_t, const char *, const int *);

static int        irmce_print(void *, const char *);

static int        irmce_reset(struct irmce_softc *);

static int        irmce_open(void *, int, int, struct proc *);
static int        irmce_close(void *, int, int, struct proc *);
static int        irmce_read(void *, struct uio *, int);
static int        irmce_write(void *, struct uio *, int);
static int        irmce_setparams(void *, struct cir_params *);

static const struct cir_methods irmce_cir_methods = {
        .im_open = irmce_open,
        .im_close = irmce_close,
        .im_read = irmce_read,
        .im_write = irmce_write,
        .im_setparams = irmce_setparams,
};

static const struct {
        uint16_t                vendor;
        uint16_t                product;
} irmce_devices[] = {
        { USB_VENDOR_SMK, USB_PRODUCT_SMK_MCE_IR },
};

CFATTACH_DECL2_NEW(irmce, sizeof(struct irmce_softc),
    irmce_match, irmce_attach, irmce_detach, irmce_activate,
    irmce_rescan, irmce_childdet);

static int
irmce_match(device_t parent, cfdata_t match, void *opaque)
{
        struct usbif_attach_arg *uiaa = opaque;
        unsigned int i;

        for (i = 0; i < __arraycount(irmce_devices); i++) {
                if (irmce_devices[i].vendor == uiaa->uiaa_vendor &&
                    irmce_devices[i].product == uiaa->uiaa_product)
                        return UMATCH_VENDOR_PRODUCT;
        }

        return UMATCH_NONE;
}

static void
irmce_attach(device_t parent, device_t self, void *opaque)
{
        struct irmce_softc *sc = device_private(self);
        struct usbif_attach_arg *uiaa = opaque;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        unsigned int i;
        uint8_t nep;

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        aprint_naive("\n");

        devinfop = usbd_devinfo_alloc(uiaa->uiaa_device, 0);
        aprint_normal(": %s\n", devinfop);
        usbd_devinfo_free(devinfop);

        sc->sc_dev = self;
        sc->sc_udev = uiaa->uiaa_device;
        sc->sc_iface = uiaa->uiaa_iface;

        nep = 0;
        usbd_endpoint_count(sc->sc_iface, &nep);
        sc->sc_bulkin_ep = sc->sc_bulkout_ep = -1;
        for (i = 0; i < nep; i++) {
                int dir, type;

                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "couldn't read endpoint descriptor %d\n", i);
                        continue;
                }

                dir = UE_GET_DIR(ed->bEndpointAddress);
                type = UE_GET_XFERTYPE(ed->bmAttributes);

                if (type != UE_BULK)
                        continue;

                if (dir == UE_DIR_IN && sc->sc_bulkin_ep == -1) {
                        sc->sc_bulkin_ep = ed->bEndpointAddress;
                        sc->sc_bulkin_maxpktsize =
                            UE_GET_SIZE(UGETW(ed->wMaxPacketSize)) *
                            (UE_GET_TRANS(UGETW(ed->wMaxPacketSize)) + 1);
                }
                if (dir == UE_DIR_OUT && sc->sc_bulkout_ep == -1) {
                        sc->sc_bulkout_ep = ed->bEndpointAddress;
                        sc->sc_bulkout_maxpktsize =
                            UE_GET_SIZE(UGETW(ed->wMaxPacketSize)) *
                            (UE_GET_TRANS(UGETW(ed->wMaxPacketSize)) + 1);
                }
        }

        aprint_debug_dev(self, "in 0x%02x/%d out 0x%02x/%d\n",
            sc->sc_bulkin_ep, sc->sc_bulkin_maxpktsize,
            sc->sc_bulkout_ep, sc->sc_bulkout_maxpktsize);

        if (sc->sc_bulkin_maxpktsize < 16 || sc->sc_bulkout_maxpktsize < 16) {
                aprint_error_dev(self, "bad maxpktsize\n");
                return;
        }
        usbd_status err;

        err = usbd_open_pipe(sc->sc_iface, sc->sc_bulkin_ep,
            USBD_EXCLUSIVE_USE, &sc->sc_bulkin_pipe);
        if (err) {
                aprint_error_dev(sc->sc_dev,
                    "couldn't open bulk-in pipe: %s\n", usbd_errstr(err));
                return;
        }
        err = usbd_open_pipe(sc->sc_iface, sc->sc_bulkout_ep,
            USBD_EXCLUSIVE_USE, &sc->sc_bulkout_pipe);
        if (err) {
                aprint_error_dev(sc->sc_dev,
                    "couldn't open bulk-out pipe: %s\n", usbd_errstr(err));
                usbd_close_pipe(sc->sc_bulkin_pipe);
                sc->sc_bulkin_pipe = NULL;
                return;
        }

        int error;
        error = usbd_create_xfer(sc->sc_bulkin_pipe, sc->sc_bulkin_maxpktsize,
            0, 0, &sc->sc_bulkin_xfer);
        if (error) {
                goto fail;
        }

        error = usbd_create_xfer(sc->sc_bulkout_pipe,
            sc->sc_bulkout_maxpktsize, USBD_FORCE_SHORT_XFER, 0,
            &sc->sc_bulkout_xfer);
        if (error) {
                goto fail;
        }
        sc->sc_bulkin_buffer = usbd_get_buffer(sc->sc_bulkin_xfer);
        sc->sc_bulkout_buffer = usbd_get_buffer(sc->sc_bulkout_xfer);

        irmce_rescan(self, NULL, NULL);
        return;

fail:
        if (sc->sc_bulkin_xfer)
                usbd_destroy_xfer(sc->sc_bulkin_xfer);
        if (sc->sc_bulkout_xfer)
                usbd_destroy_xfer(sc->sc_bulkout_xfer);
}

static int
irmce_detach(device_t self, int flags)
{
        struct irmce_softc *sc = device_private(self);
        int error;

        if (sc->sc_cirdev) {
                error = config_detach(sc->sc_cirdev, flags);
                if (error)
                        return error;
        }

        if (sc->sc_bulkin_pipe) {
                usbd_abort_pipe(sc->sc_bulkin_pipe);
        }
        if (sc->sc_bulkout_pipe) {
                usbd_abort_pipe(sc->sc_bulkout_pipe);
        }
        if (sc->sc_bulkin_xfer) {
                usbd_destroy_xfer(sc->sc_bulkin_xfer);
                sc->sc_bulkin_buffer = NULL;
                sc->sc_bulkin_xfer = NULL;
        }
        if (sc->sc_bulkout_xfer) {
                usbd_destroy_xfer(sc->sc_bulkout_xfer);
                sc->sc_bulkout_buffer = NULL;
                sc->sc_bulkout_xfer = NULL;
        }
        if (sc->sc_bulkin_pipe) {
                usbd_close_pipe(sc->sc_bulkin_pipe);
                sc->sc_bulkin_pipe = NULL;
        }
        if (sc->sc_bulkout_pipe) {
                usbd_close_pipe(sc->sc_bulkout_pipe);
                sc->sc_bulkout_pipe = NULL;
        }

        pmf_device_deregister(self);

        return 0;
}

static int
irmce_activate(device_t self, enum devact act)
{
        return 0;
}

static int
irmce_rescan(device_t self, const char *ifattr, const int *locators)
{
        struct irmce_softc *sc = device_private(self);
        struct ir_attach_args iaa;

        if (sc->sc_cirdev == NULL) {
                iaa.ia_type = IR_TYPE_CIR;
                iaa.ia_methods = &irmce_cir_methods;
                iaa.ia_handle = sc;
                sc->sc_cirdev =
                    config_found(self, &iaa, irmce_print, CFARGS_NONE);
        }

        return 0;
}

static int
irmce_print(void *priv, const char *pnp)
{
        if (pnp)
                aprint_normal("cir at %s", pnp);

        return UNCONF;
}

static void
irmce_childdet(device_t self, device_t child)
{
        struct irmce_softc *sc = device_private(self);

        if (sc->sc_cirdev == child)
                sc->sc_cirdev = NULL;
}

static int
irmce_reset(struct irmce_softc *sc)
{
        static const uint8_t reset_cmd[] = { 0x00, 0xff, 0xaa };
        uint8_t *p = sc->sc_bulkout_buffer;
        usbd_status err;
        uint32_t wlen;
        unsigned int n;

        for (n = 0; n < __arraycount(reset_cmd); n++)
                *p++ = reset_cmd[n];

        wlen = sizeof(reset_cmd);
        err = usbd_bulk_transfer(sc->sc_bulkout_xfer, sc->sc_bulkout_pipe,
            USBD_FORCE_SHORT_XFER, USBD_DEFAULT_TIMEOUT,
            sc->sc_bulkout_buffer, &wlen);
        if (err != USBD_NORMAL_COMPLETION) {
                if (err == USBD_INTERRUPTED)
                        return EINTR;
                else if (err == USBD_TIMEOUT)
                        return ETIMEDOUT;
                else
                        return EIO;
        }

        return 0;
}

static int
irmce_open(void *priv, int flag, int mode, struct proc *p)
{
        struct irmce_softc *sc = priv;
        int err = irmce_reset(sc);
        if (err) {
                aprint_error_dev(sc->sc_dev,
                    "couldn't reset device: %s\n", usbd_errstr(err));
        }
        sc->sc_ir_state = IRMCE_STATE_HEADER;
        sc->sc_rc6_nhb = 0;

        return 0;
}

static int
irmce_close(void *priv, int flag, int mode, struct proc *p)
{
        struct irmce_softc *sc = priv;

        if (sc->sc_bulkin_pipe) {
                usbd_abort_pipe(sc->sc_bulkin_pipe);
        }
        if (sc->sc_bulkout_pipe) {
                usbd_abort_pipe(sc->sc_bulkout_pipe);
        }

        return 0;
}

static int
irmce_rc6_decode(struct irmce_softc *sc, uint8_t *buf, size_t buflen,
    struct uio *uio)
{
        bool *hb = &sc->sc_rc6_hb[0];
        unsigned int n;
        int state, pulse;
        uint32_t data;
        uint8_t mode;
        bool idle = false;

        for (n = 0; n < buflen; n++) {
                state = (buf[n] & 0x80) ? 1 : 0;
                pulse = (buf[n] & 0x7f) * 50;

                if (pulse >= 300 && pulse <= 600) {
                        hb[sc->sc_rc6_nhb++] = state;
                } else if (pulse >= 680 && pulse <= 1080) {
                        hb[sc->sc_rc6_nhb++] = state;
                        hb[sc->sc_rc6_nhb++] = state;
                } else if (pulse >= 1150 && pulse <= 1450) {
                        hb[sc->sc_rc6_nhb++] = state;
                        hb[sc->sc_rc6_nhb++] = state;
                        hb[sc->sc_rc6_nhb++] = state;
                } else if (pulse >= 2400 && pulse <= 2800) {
                        hb[sc->sc_rc6_nhb++] = state;
                        hb[sc->sc_rc6_nhb++] = state;
                        hb[sc->sc_rc6_nhb++] = state;
                        hb[sc->sc_rc6_nhb++] = state;
                        hb[sc->sc_rc6_nhb++] = state;
                        hb[sc->sc_rc6_nhb++] = state;
                } else if (pulse > 3000) {
                        if (sc->sc_rc6_nhb & 1)
                                hb[sc->sc_rc6_nhb++] = state;
                        idle = true;
                        break;
                } else {
                        aprint_debug_dev(sc->sc_dev,
                            "error parsing RC6 stream (pulse=%d)\n", pulse);
                        return EIO;
                }
        }

        if (!idle)
                return 0;

        if (sc->sc_rc6_nhb < 20) {
                aprint_debug_dev(sc->sc_dev, "not enough RC6 data\n");
                return EIO;
        }

        /* RC6 leader 11111100 */
        if (!hb[0] || !hb[1] || !hb[2] || !hb[3] || !hb[4] || !hb[5] ||
            hb[6] || hb[7]) {
                aprint_debug_dev(sc->sc_dev, "bad RC6 leader\n");
                return EIO;
        }

        /* start bit 10 */
        if (!hb[8] || hb[9]) {
                aprint_debug_dev(sc->sc_dev, "missing RC6 start bit\n");
                return EIO;
        }

        /* mode info */
        mode = 0x00;
        for (n = 10; n < 15; n += 2) {
                if (hb[n] && !hb[n + 1])
                        mode = (mode << 1) | 1;
                else if (!hb[n] && hb[n + 1])
                        mode = (mode << 1) | 0;
                else {
                        aprint_debug_dev(sc->sc_dev, "bad RC6 mode bits\n");
                        return EIO;
                }
        }

        data = 0;
        for (n = 20; n < sc->sc_rc6_nhb; n += 2) {
                if (hb[n] && !hb[n + 1])
                        data = (data << 1) | 1;
                else if (!hb[n] && hb[n + 1])
                        data = (data << 1) | 0;
                else {
                        aprint_debug_dev(sc->sc_dev, "bad RC6 data bits\n");
                        return EIO;
                }
        }

        sc->sc_rc6_nhb = 0;

        return uiomove(&data, sizeof(data), uio);
}

static int
irmce_process(struct irmce_softc *sc, uint8_t *buf, size_t buflen,
    struct uio *uio)
{
        uint8_t *p = buf;
        uint8_t data, cmd;
        int error;

        while (p - buf < (ssize_t)buflen) {
                switch (sc->sc_ir_state) {
                case IRMCE_STATE_HEADER:
                        sc->sc_ir_header = data = *p++;
                        if ((data & 0xe0) == 0x80 && (data & 0x1f) != 0x1f) {
                                sc->sc_ir_bufused = 0;
                                sc->sc_ir_resid = data & 0x1f;
                                sc->sc_ir_state = IRMCE_STATE_IRDATA;
                                if (sc->sc_ir_resid > sizeof(sc->sc_ir_buf))
                                        return EIO;
                                if (sc->sc_ir_resid == 0)
                                        sc->sc_ir_state = IRMCE_STATE_HEADER;
                        } else {
                                sc->sc_ir_state = IRMCE_STATE_CMDHEADER;
                        }
                        break;
                case IRMCE_STATE_CMDHEADER:
                        cmd = *p++;
                        data = sc->sc_ir_header;
                        if (data == 0x00 && cmd == 0x9f)
                                sc->sc_ir_resid = 1;
                        else if (data == 0xff && cmd == 0x0b)
                                sc->sc_ir_resid = 2;
                        else if (data == 0x9f) {
                                if (cmd == 0x04 || cmd == 0x06 ||
                                    cmd == 0x0c || cmd == 0x15) {
                                        sc->sc_ir_resid = 2;
                                } else if (cmd == 0x01 || cmd == 0x08 ||
                                    cmd == 0x14) {
                                        sc->sc_ir_resid = 1;
                                }
                        }
                        if (sc->sc_ir_resid > 0)
                                sc->sc_ir_state = IRMCE_STATE_CMDDATA;
                        else
                                sc->sc_ir_state = IRMCE_STATE_HEADER;
                        break;
                case IRMCE_STATE_IRDATA:
                        sc->sc_ir_resid--;
                        sc->sc_ir_buf[sc->sc_ir_bufused++] = *p;
                        p++;
                        if (sc->sc_ir_resid == 0) {
                                sc->sc_ir_state = IRMCE_STATE_HEADER;
                                error = irmce_rc6_decode(sc,
                                    sc->sc_ir_buf, sc->sc_ir_bufused, uio);
                                if (error)
                                        sc->sc_rc6_nhb = 0;
                        }
                        break;
                case IRMCE_STATE_CMDDATA:
                        p++;
                        sc->sc_ir_resid--;
                        if (sc->sc_ir_resid == 0)
                                sc->sc_ir_state = IRMCE_STATE_HEADER;
                        break;
                }

        }

        return 0;
}

static int
irmce_read(void *priv, struct uio *uio, int flag)
{
        struct irmce_softc *sc = priv;
        usbd_status err;
        uint32_t rlen;
        int error = 0;

        while (uio->uio_resid > 0) {
                rlen = sc->sc_bulkin_maxpktsize;
                err = usbd_bulk_transfer(sc->sc_bulkin_xfer,
                    sc->sc_bulkin_pipe, USBD_SHORT_XFER_OK,
                    USBD_DEFAULT_TIMEOUT, sc->sc_bulkin_buffer, &rlen);
                if (err != USBD_NORMAL_COMPLETION) {
                        if (err == USBD_INTERRUPTED)
                                return EINTR;
                        else if (err == USBD_TIMEOUT)
                                continue;
                        else
                                return EIO;
                }

                if (sc->sc_raw) {
                        error = uiomove(sc->sc_bulkin_buffer, rlen, uio);
                        break;
                } else {
                        error = irmce_process(sc, sc->sc_bulkin_buffer,
                            rlen, uio);
                        if (error)
                                break;
                }
        }

        return error;
}

static int
irmce_write(void *priv, struct uio *uio, int flag)
{
        return EIO;
}

static int
irmce_setparams(void *priv, struct cir_params *params)
{
        struct irmce_softc *sc = priv;

        if (params->raw > 1)
                return EINVAL;
        sc->sc_raw = params->raw;

        return 0;
}

MODULE(MODULE_CLASS_DRIVER, irmce, NULL);

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
irmce_modcmd(modcmd_t cmd, void *opaque)
{
        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                return config_init_component(cfdriver_ioconf_irmce,
                    cfattach_ioconf_irmce, cfdata_ioconf_irmce);
#else
                return 0;
#endif
        case MODULE_CMD_FINI:
#ifdef _MODULE
                return config_fini_component(cfdriver_ioconf_irmce,
                    cfattach_ioconf_irmce, cfdata_ioconf_irmce);
#else
                return 0;
#endif
        default:
                return ENOTTY;
        }
}




























































































    7 



    6 


    3 





    3 
    3 
















    5 





    3 

    3 



    3 
    1 

















    5 

























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
/*        $NetBSD: vfs_syscalls_20.c,v 1.46 2020/06/28 14:37:53 christos Exp $        */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vfs_syscalls.c        8.42 (Berkeley) 7/31/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls_20.c,v 1.46 2020/06/28 14:37:53 christos Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/uio.h>
#include <sys/dirent.h>
#include <sys/sysctl.h>
#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>
#include <sys/kauth.h>
#include <sys/vfs_syscalls.h>

#include <compat/common/compat_mod.h>

#include <compat/sys/mount.h>
#include <compat/sys/statvfs.h>

static const struct syscall_package vfs_syscalls_20_syscalls[] = {
        { SYS_compat_20_fhstatfs, 0, (sy_call_t *)compat_20_sys_fhstatfs },
        { SYS_compat_20_fstatfs, 0, (sy_call_t *)compat_20_sys_fstatfs },
        { SYS_compat_20_getfsstat, 0, (sy_call_t *)compat_20_sys_getfsstat }, 
        { SYS_compat_20_statfs, 0, (sy_call_t *)compat_20_sys_statfs },
        { 0, 0, NULL }
};

/*
 * Get filesystem statistics.
 */
/* ARGSUSED */
int
compat_20_sys_statfs(struct lwp *l, const struct compat_20_sys_statfs_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(struct statfs12 *) buf;
        } */
        struct mount *mp;
        struct statvfs *sbuf;
        int error;
        struct vnode *vp;

        error = namei_simple_user(SCARG(uap, path),
                        NSM_FOLLOW_TRYEMULROOT, &vp);
        if (error != 0)
                return error;

        mp = vp->v_mount;

        sbuf = STATVFSBUF_GET();
        if ((error = dostatvfs(mp, sbuf, l, 0, 1)) != 0)
                goto done;

        error = statvfs_to_statfs12_copy(sbuf, SCARG(uap, buf), 0);
done:
        vrele(vp);
        STATVFSBUF_PUT(sbuf);
        return error;
}

/*
 * Get filesystem statistics.
 */
/* ARGSUSED */
int
compat_20_sys_fstatfs(struct lwp *l, const struct compat_20_sys_fstatfs_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(struct statfs12 *) buf;
        } */
        struct file *fp;
        struct mount *mp;
        struct statvfs *sbuf;
        int error;

        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
                return (error);
        mp = fp->f_vnode->v_mount;
        sbuf = STATVFSBUF_GET();
        if ((error = dostatvfs(mp, sbuf, l, 0, 1)) != 0)
                goto out;
        error = statvfs_to_statfs12_copy(sbuf, SCARG(uap, buf), 0);
 out:
        fd_putfile(SCARG(uap, fd));
        STATVFSBUF_PUT(sbuf);
        return error;
}


/*
 * Get statistics on all filesystems.
 */
int
compat_20_sys_getfsstat(struct lwp *l, const struct compat_20_sys_getfsstat_args *uap, register_t *retval)
{
        /* {
                syscallarg(struct statfs12 *) buf;
                syscallarg(long) bufsize;
                syscallarg(int) flags;
        } */
        return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
            SCARG(uap, flags), statvfs_to_statfs12_copy,
            sizeof(struct statfs12), retval);
}

int
compat_20_sys_fhstatfs(struct lwp *l, const struct compat_20_sys_fhstatfs_args *uap, register_t *retval)
{
        /* {
                syscallarg(const struct compat_30_fhandle *) fhp;
                syscallarg(struct statfs12 *) buf;
        } */
        struct statvfs *sbuf;
        struct compat_30_fhandle fh;
        struct mount *mp;
        struct vnode *vp;
        int error;

        /*
         * Must be super user
         */
        if ((error = kauth_authorize_system(l->l_cred,
            KAUTH_SYSTEM_FILEHANDLE, 0, NULL, NULL, NULL)))
                return (error);

        if ((error = copyin(SCARG(uap, fhp), &fh, sizeof(fh))) != 0)
                return (error);

        if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL)
                return (ESTALE);
        error = VFS_FHTOVP(mp, (struct fid*)&fh.fh_fid, LK_EXCLUSIVE, &vp);
        if (error != 0)
                return (error);
        mp = vp->v_mount;
        VOP_UNLOCK(vp);
        sbuf = STATVFSBUF_GET();
        if ((error = VFS_STATVFS(mp, sbuf)) != 0)
                goto out;
        error = statvfs_to_statfs12_copy(sbuf, SCARG(uap, buf), 0);
out:
        vrele(vp);
        STATVFSBUF_PUT(sbuf);
        return error;
}

int
vfs_syscalls_20_init(void)
{

        return syscall_establish(NULL, vfs_syscalls_20_syscalls);
}

int
vfs_syscalls_20_fini(void)
{

        return syscall_disestablish(NULL, vfs_syscalls_20_syscalls);
}


























































































    7 
















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/*        $NetBSD: ccd_60.c,v 1.11 2019/12/12 02:15:42 pgoyette Exp $        */

/*-
 * Copyright (c) 2018 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ccd_60.c,v 1.11 2019/12/12 02:15:42 pgoyette Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/disk.h>
#include <sys/lwp.h>
#include <sys/compat_stub.h>

#include <dev/ccdvar.h>
#include <compat/sys/ccdvar.h>

/*
 * Compat code must not be called if on a platform where
 * sizeof (size_t) == sizeof (uint64_t) as CCDIOCSET will
 * be the same as CCDIOCSET_60
 */
static int
compat_60_ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l,
    int (*f)(dev_t, u_long, void *, int, struct lwp *))
{
        switch (cmd) {
#ifdef CCDIOCSET_60
        case CCDIOCSET_60: {
                if (data == NULL)
                        return 0;
                
                struct ccd_ioctl ccio;
                       struct ccd_ioctl_60 *ccio60 = data;

                ccio.ccio_disks = ccio60->ccio_disks;
                ccio.ccio_ndisks = ccio60->ccio_ndisks;
                ccio.ccio_ileave = ccio60->ccio_ileave;
                ccio.ccio_flags = ccio60->ccio_flags;
                ccio.ccio_unit = ccio60->ccio_unit;
                int error = (*f)(dev, CCDIOCSET, &ccio, flag, l);
                if (!error) {
                        /* Copy data back, adjust types if necessary */
                        ccio60->ccio_disks = ccio.ccio_disks;
                        ccio60->ccio_ndisks = ccio.ccio_ndisks;
                        ccio60->ccio_ileave = ccio.ccio_ileave;
                        ccio60->ccio_flags = ccio.ccio_flags;
                        ccio60->ccio_unit = ccio.ccio_unit;
                        ccio60->ccio_size = (size_t)ccio.ccio_size;
                }
                return error;
        }

        case CCDIOCCLR_60:
                if (data == NULL)
                        return ENOSYS;
                /*
                 * ccio_size member not used, so existing struct OK
                 * drop through to existing non-compat version
                 */
                return (*f)(dev, CCDIOCCLR, data, flag, l);
#endif
        default:
                return ENOSYS;
        }
}

void
ccd_60_init(void)
{

        MODULE_HOOK_SET(ccd_ioctl_60_hook, compat_60_ccdioctl);
}

void
ccd_60_fini(void)
{

        MODULE_HOOK_UNSET(ccd_ioctl_60_hook);
}












































































  874 















  875 







































  375 


  375 



  375 













  250 


  251 






  250 
  249 




  165 
















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
/*        $NetBSD: kern_mutex_obj.c,v 1.9 2022/04/09 23:38:33 riastradh Exp $        */

/*-
 * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_mutex_obj.c,v 1.9 2022/04/09 23:38:33 riastradh Exp $");

#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/mutex.h>
#include <sys/pool.h>

/* Mutex cache */
#define        MUTEX_OBJ_MAGIC        0x5aa3c85d
struct kmutexobj {
        kmutex_t        mo_lock;
        u_int                mo_magic;
        u_int                mo_refcnt;
};

static int        mutex_obj_ctor(void *, void *, int);

static pool_cache_t        mutex_obj_cache                __read_mostly;

/*
 * mutex_obj_init:
 *
 *        Initialize the mutex object store.
 */
void
mutex_obj_init(void)
{

        mutex_obj_cache = pool_cache_init(sizeof(struct kmutexobj),
            coherency_unit, 0, 0, "mutex", NULL, IPL_NONE, mutex_obj_ctor,
            NULL, NULL);
}

/*
 * mutex_obj_ctor:
 *
 *        Initialize a new lock for the cache.
 */
static int
mutex_obj_ctor(void *arg, void *obj, int flags)
{
        struct kmutexobj * mo = obj;

        mo->mo_magic = MUTEX_OBJ_MAGIC;

        return 0;
}

/*
 * mutex_obj_alloc:
 *
 *        Allocate a single lock object, waiting for memory if needed.
 */
kmutex_t *
mutex_obj_alloc(kmutex_type_t type, int ipl)
{
        struct kmutexobj *mo;
        extern void _mutex_init(kmutex_t *, kmutex_type_t, int, uintptr_t);

        mo = pool_cache_get(mutex_obj_cache, PR_WAITOK);
        _mutex_init(&mo->mo_lock, type, ipl,
            (uintptr_t)__builtin_return_address(0));
        mo->mo_refcnt = 1;

        return (kmutex_t *)mo;
}

/*
 * mutex_obj_alloc:
 *
 *        Allocate a single lock object, failing if no memory available.
 */
kmutex_t *
mutex_obj_tryalloc(kmutex_type_t type, int ipl)
{
        struct kmutexobj *mo;
        extern void _mutex_init(kmutex_t *, kmutex_type_t, int, uintptr_t);

        mo = pool_cache_get(mutex_obj_cache, PR_NOWAIT);
        if (__predict_true(mo != NULL)) {
                _mutex_init(&mo->mo_lock, type, ipl,
                    (uintptr_t)__builtin_return_address(0));
                mo->mo_refcnt = 1;
        }

        return (kmutex_t *)mo;
}

/*
 * mutex_obj_hold:
 *
 *        Add a single reference to a lock object.  A reference to the object
 *        must already be held, and must be held across this call.
 */
void
mutex_obj_hold(kmutex_t *lock)
{
        struct kmutexobj *mo = (struct kmutexobj *)lock;

        KASSERTMSG(mo->mo_magic == MUTEX_OBJ_MAGIC,
            "%s: lock %p: mo->mo_magic (%#x) != MUTEX_OBJ_MAGIC (%#x)",
             __func__, mo, mo->mo_magic, MUTEX_OBJ_MAGIC);
        KASSERTMSG(mo->mo_refcnt > 0,
            "%s: lock %p: mo->mo_refcnt (%#x) == 0",
             __func__, mo, mo->mo_refcnt);

        atomic_inc_uint(&mo->mo_refcnt);
}

/*
 * mutex_obj_free:
 *
 *        Drop a reference from a lock object.  If the last reference is being
 *        dropped, free the object and return true.  Otherwise, return false.
 */
bool
mutex_obj_free(kmutex_t *lock)
{
        struct kmutexobj *mo = (struct kmutexobj *)lock;

        KASSERTMSG(mo->mo_magic == MUTEX_OBJ_MAGIC,
            "%s: lock %p: mo->mo_magic (%#x) != MUTEX_OBJ_MAGIC (%#x)",
             __func__, mo, mo->mo_magic, MUTEX_OBJ_MAGIC);
        KASSERTMSG(mo->mo_refcnt > 0,
            "%s: lock %p: mo->mo_refcnt (%#x) == 0",
             __func__, mo, mo->mo_refcnt);

#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_release();
#endif
        if (atomic_dec_uint_nv(&mo->mo_refcnt) > 0) {
                return false;
        }
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_acquire();
#endif
        mutex_destroy(&mo->mo_lock);
        pool_cache_put(mutex_obj_cache, mo);
        return true;
}

/*
 * mutex_obj_refcnt:
 *
 *        Return the reference count on a lock object.
 */
u_int
mutex_obj_refcnt(kmutex_t *lock)
{
        struct kmutexobj *mo = (struct kmutexobj *)lock;

        return mo->mo_refcnt;
}




















































































































    3 








    3 












    7 







    7 












    6 







    4 

    6 














    7 




    6 



    5 

    3 







































































































    8 








    8 











   18 









   16 



    4 




   15 


   18 























































































































    8 

















    4 







    3 
    4 





























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
/*        $NetBSD: kern_time_50.c,v 1.37 2021/09/07 11:43:02 riastradh Exp $        */

/*-
 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_time_50.c,v 1.37 2021/09/07 11:43:02 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
#include "opt_aio.h"
#include "opt_ntp.h"
#include "opt_mqueue.h"
#endif

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/socketvar.h>
#include <sys/vnode.h>
#include <sys/proc.h>
#include <sys/uio.h>
#include <sys/dirent.h>
#include <sys/kauth.h>
#include <sys/time.h>
#include <sys/timex.h>
#include <sys/clockctl.h>
#include <sys/aio.h>
#include <sys/poll.h>
#include <sys/syscall.h>
#include <sys/syscallargs.h>
#include <sys/syscallvar.h>
#include <sys/sysctl.h>
#include <sys/resource.h>
#include <sys/compat_stub.h>

#include <compat/common/compat_util.h>
#include <compat/common/compat_mod.h>
#include <compat/sys/time.h>
#include <compat/sys/timex.h>
#include <compat/sys/resource.h>
#include <compat/sys/clockctl.h>

struct timeval50 boottime50; 

static const struct syscall_package kern_time_50_syscalls[] = {
        { SYS_compat_50_clock_gettime, 0,
            (sy_call_t *)compat_50_sys_clock_gettime },  
        { SYS_compat_50_clock_settime, 0,
            (sy_call_t *)compat_50_sys_clock_settime },
        { SYS_compat_50_clock_getres, 0,
            (sy_call_t *)compat_50_sys_clock_getres},
        { SYS_compat_50_nanosleep, 0, (sy_call_t *)compat_50_sys_nanosleep },
        { SYS_compat_50_gettimeofday, 0,
            (sy_call_t *)compat_50_sys_gettimeofday },     
        { SYS_compat_50_settimeofday, 0,
            (sy_call_t *)compat_50_sys_settimeofday },
        { SYS_compat_50_adjtime, 0, (sy_call_t *)compat_50_sys_adjtime },
        { SYS_compat_50_setitimer, 0, (sy_call_t *)compat_50_sys_setitimer },
        { SYS_compat_50_getitimer, 0, (sy_call_t *)compat_50_sys_getitimer },
        { SYS_compat_50_aio_suspend, 0,
            (sy_call_t *)compat_50_sys_aio_suspend },
        { SYS_compat_50_mq_timedsend, 0,
            (sy_call_t *)compat_50_sys_mq_timedsend },
        { SYS_compat_50_mq_timedreceive, 0,
            (sy_call_t *)compat_50_sys_mq_timedreceive },
        { SYS_compat_50_getrusage, 0, (sy_call_t *)compat_50_sys_getrusage },
        { SYS_compat_50_timer_settime, 0,
            (sy_call_t *)compat_50_sys_timer_settime },
        { SYS_compat_50_timer_gettime, 0,
            (sy_call_t *)compat_50_sys_timer_gettime },
        { SYS_compat_50___ntp_gettime30, 0,
            (sy_call_t *)compat_50_sys___ntp_gettime30 },
        { 0, 0, NULL }
}; 

int
compat_50_sys_clock_gettime(struct lwp *l,
    const struct compat_50_sys_clock_gettime_args *uap, register_t *retval)
{
        /* {
                syscallarg(clockid_t) clock_id;
                syscallarg(struct timespec50 *) tp;
        } */
        int error;
        struct timespec ats;
        struct timespec50 ats50;

        error = clock_gettime1(SCARG(uap, clock_id), &ats);
        if (error != 0)
                return error;

        timespec_to_timespec50(&ats, &ats50);

        return copyout(&ats50, SCARG(uap, tp), sizeof(ats50));
}

/* ARGSUSED */
int
compat_50_sys_clock_settime(struct lwp *l,
    const struct compat_50_sys_clock_settime_args *uap, register_t *retval)
{
        /* {
                syscallarg(clockid_t) clock_id;
                syscallarg(const struct timespec50 *) tp;
        } */
        int error;
        struct timespec ats;
        struct timespec50 ats50;

        error = copyin(SCARG(uap, tp), &ats50, sizeof(ats50));
        if (error)
                return error;
        timespec50_to_timespec(&ats50, &ats);

        return clock_settime1(l->l_proc, SCARG(uap, clock_id), &ats,
            true);
}


int
compat_50_sys_clock_getres(struct lwp *l,
    const struct compat_50_sys_clock_getres_args *uap, register_t *retval)
{
        /* {
                syscallarg(clockid_t) clock_id;
                syscallarg(struct timespec50 *) tp;
        } */
        struct timespec50 ats50;
        struct timespec ats;
        int error;

        error = clock_getres1(SCARG(uap, clock_id), &ats);
        if (error != 0)
                return error;

        if (SCARG(uap, tp)) {
                timespec_to_timespec50(&ats, &ats50);
                error = copyout(&ats50, SCARG(uap, tp), sizeof(ats50));
        }

        return error;
}

/* ARGSUSED */
int
compat_50_sys_nanosleep(struct lwp *l,
    const struct compat_50_sys_nanosleep_args *uap, register_t *retval)
{
        /* {
                syscallarg(struct timespec50 *) rqtp;
                syscallarg(struct timespec50 *) rmtp;
        } */
        struct timespec rmt, rqt;
        struct timespec50 rmt50, rqt50;
        int error, error1;

        error = copyin(SCARG(uap, rqtp), &rqt50, sizeof(rqt50));
        if (error)
                return error;
        timespec50_to_timespec(&rqt50, &rqt);

        error = nanosleep1(l, CLOCK_MONOTONIC, 0, &rqt,
            SCARG(uap, rmtp) ? &rmt : NULL);
        if (SCARG(uap, rmtp) == NULL || (error != 0 && error != EINTR))
                return error;

        timespec_to_timespec50(&rmt, &rmt50);
        error1 = copyout(&rmt50, SCARG(uap, rmtp), sizeof(*SCARG(uap, rmtp)));
        return error1 ? error1 : error;
}

/* ARGSUSED */
int
compat_50_sys_gettimeofday(struct lwp *l,
    const struct compat_50_sys_gettimeofday_args *uap, register_t *retval)
{
        /* {
                syscallarg(struct timeval50 *) tp;
                syscallarg(void *) tzp;                really "struct timezone *";
        } */
        struct timeval atv;
        struct timeval50 atv50;
        int error = 0;
        struct timezone tzfake;

        if (SCARG(uap, tp)) {
                microtime(&atv);
                timeval_to_timeval50(&atv, &atv50);
                error = copyout(&atv50, SCARG(uap, tp), sizeof(*SCARG(uap, tp)));
                if (error)
                        return error;
        }
        if (SCARG(uap, tzp)) {
                /*
                 * NetBSD has no kernel notion of time zone, so we just
                 * fake up a timezone struct and return it if demanded.
                 */
                memset(&tzfake, 0, sizeof(tzfake));
                tzfake.tz_minuteswest = 0;
                tzfake.tz_dsttime = 0;
                error = copyout(&tzfake, SCARG(uap, tzp), sizeof(tzfake));
        }
        return error;
}

/* ARGSUSED */
int
compat_50_sys_settimeofday(struct lwp *l,
    const struct compat_50_sys_settimeofday_args *uap, register_t *retval)
{
        /* {
                syscallarg(const struct timeval50 *) tv;
                syscallarg(const void *) tzp; really "const struct timezone *";
        } */
        struct timeval50 atv50;
        struct timeval atv;
        int error = copyin(SCARG(uap, tv), &atv50, sizeof(atv50));
        if (error)
                return error;
        timeval50_to_timeval(&atv50, &atv);
        return settimeofday1(&atv, false, SCARG(uap, tzp), l, true);
}

/* ARGSUSED */
int
compat_50_sys_adjtime(struct lwp *l,
    const struct compat_50_sys_adjtime_args *uap, register_t *retval)
{
        /* {
                syscallarg(const struct timeval50 *) delta;
                syscallarg(struct timeval50 *) olddelta;
        } */
        int error;
        struct timeval50 delta50, olddelta50;
        struct timeval delta, olddelta;

        if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_TIME,
            KAUTH_REQ_SYSTEM_TIME_ADJTIME, NULL, NULL, NULL)) != 0)
                return error;

        if (SCARG(uap, delta)) {
                error = copyin(SCARG(uap, delta), &delta50,
                    sizeof(*SCARG(uap, delta)));
                if (error)
                        return (error);
                timeval50_to_timeval(&delta50, &delta);
        }
        adjtime1(SCARG(uap, delta) ? &delta : NULL,
            SCARG(uap, olddelta) ? &olddelta : NULL, l->l_proc);
        if (SCARG(uap, olddelta)) {
                timeval_to_timeval50(&olddelta, &olddelta50);
                error = copyout(&olddelta50, SCARG(uap, olddelta),
                    sizeof(*SCARG(uap, olddelta)));
        }
        return error;
}

/* BSD routine to set/arm an interval timer. */
/* ARGSUSED */
int
compat_50_sys_getitimer(struct lwp *l,
    const struct compat_50_sys_getitimer_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) which;
                syscallarg(struct itimerval50 *) itv;
        } */
        struct proc *p = l->l_proc;
        struct itimerval aitv;
        struct itimerval50 aitv50;
        int error;

        error = dogetitimer(p, SCARG(uap, which), &aitv);
        if (error)
                return error;
        itimerval_to_itimerval50(&aitv, &aitv50);
        return copyout(&aitv50, SCARG(uap, itv), sizeof(*SCARG(uap, itv)));
}

int
compat_50_sys_setitimer(struct lwp *l,
    const struct compat_50_sys_setitimer_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) which;
                syscallarg(const struct itimerval50 *) itv;
                syscallarg(struct itimerval50 *) oitv;
        } */
        struct proc *p = l->l_proc;
        int which = SCARG(uap, which);
        struct compat_50_sys_getitimer_args getargs;
        const struct itimerval50 *itvp;
        struct itimerval50 aitv50;
        struct itimerval aitv;
        int error;

        itvp = SCARG(uap, itv);
        if (itvp &&
            (error = copyin(itvp, &aitv50, sizeof(aitv50))) != 0)
                return (error);
        itimerval50_to_itimerval(&aitv50, &aitv);
        if (SCARG(uap, oitv) != NULL) {
                SCARG(&getargs, which) = which;
                SCARG(&getargs, itv) = SCARG(uap, oitv);
                if ((error = compat_50_sys_getitimer(l, &getargs, retval)) != 0)
                        return (error);
        }
        if (itvp == 0)
                return (0);

        return dosetitimer(p, which, &aitv);
}

int
compat_50_sys_aio_suspend(struct lwp *l,
    const struct compat_50_sys_aio_suspend_args *uap, register_t *retval)
{
        /* {
                syscallarg(const struct aiocb *const[]) list;
                syscallarg(int) nent;
                syscallarg(const struct timespec50 *) timeout;
        } */
#ifdef AIO
        struct aiocb **list;
        struct timespec ts;
        struct timespec50 ts50;
        int error, nent;

        nent = SCARG(uap, nent);
        if (nent <= 0 || nent > aio_listio_max)
                return EAGAIN;

        if (SCARG(uap, timeout)) {
                /* Convert timespec to ticks */
                error = copyin(SCARG(uap, timeout), &ts50,
                    sizeof(*SCARG(uap, timeout)));
                if (error)
                        return error;
                timespec50_to_timespec(&ts50, &ts);
        }
        list = kmem_alloc(nent * sizeof(*list), KM_SLEEP);
        error = copyin(SCARG(uap, list), list, nent * sizeof(*list));
        if (error)
                goto out;
        error = aio_suspend1(l, list, nent, SCARG(uap, timeout) ? &ts : NULL);
out:
        kmem_free(list, nent * sizeof(*list));
        return error;
#else
        return ENOSYS;
#endif
}

int
compat_50_sys_mq_timedsend(struct lwp *l,
    const struct compat_50_sys_mq_timedsend_args *uap, register_t *retval)
{
        /* {
                syscallarg(mqd_t) mqdes;
                syscallarg(const char *) msg_ptr;
                syscallarg(size_t) msg_len;
                syscallarg(unsigned) msg_prio;
                syscallarg(const struct timespec50 *) abs_timeout;
        } */
#ifdef MQUEUE
        struct timespec50 ts50;
        struct timespec ts, *tsp;
        int error;

        /* Get and convert time value */
        if (SCARG(uap, abs_timeout)) {
                error = copyin(SCARG(uap, abs_timeout), &ts50, sizeof(ts50));
                if (error)
                        return error;
                timespec50_to_timespec(&ts50, &ts);
                tsp = &ts;
        } else {
                tsp = NULL;
        }

        return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr),
            SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp);
#else
        return ENOSYS;
#endif
}

int
compat_50_sys_mq_timedreceive(struct lwp *l,
    const struct compat_50_sys_mq_timedreceive_args *uap, register_t *retval)
{
        /* {
                syscallarg(mqd_t) mqdes;
                syscallarg(char *) msg_ptr;
                syscallarg(size_t) msg_len;
                syscallarg(unsigned *) msg_prio;
                syscallarg(const struct timespec50 *) abs_timeout;
        } */
#ifdef MQUEUE
        struct timespec ts, *tsp;
        struct timespec50 ts50;
        ssize_t mlen;
        int error;

        /* Get and convert time value */
        if (SCARG(uap, abs_timeout)) {
                error = copyin(SCARG(uap, abs_timeout), &ts50, sizeof(ts50));
                if (error)
                        return error;

                timespec50_to_timespec(&ts50, &ts);
                tsp = &ts;
        } else {
                tsp = NULL;
        }

        error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr),
            SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp, &mlen);
        if (error == 0)
                *retval = mlen;

        return error;
#else
        return ENOSYS;
#endif
}

void
rusage_to_rusage50(const struct rusage *ru, struct rusage50 *ru50)
{
        memset(ru50, 0, sizeof(*ru50));
        (void)memcpy(&ru50->ru_first, &ru->ru_first,
            (char *)&ru50->ru_last - (char *)&ru50->ru_first +
            sizeof(ru50->ru_last));
        ru50->ru_maxrss = ru->ru_maxrss;
        timeval_to_timeval50(&ru->ru_utime, &ru50->ru_utime);
        timeval_to_timeval50(&ru->ru_stime, &ru50->ru_stime);
}

int
compat_50_sys_getrusage(struct lwp *l,
    const struct compat_50_sys_getrusage_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) who;
                syscallarg(struct rusage50 *) rusage;
        } */
        int error;
        struct rusage ru;
        struct rusage50 ru50;
        struct proc *p = l->l_proc;

        error = getrusage1(p, SCARG(uap, who), &ru);
        if (error != 0)
                return error;

        rusage_to_rusage50(&ru, &ru50);
        return copyout(&ru50, SCARG(uap, rusage), sizeof(ru50));
}


/* Return the time remaining until a POSIX timer fires. */
int
compat_50_sys_timer_gettime(struct lwp *l,
    const struct compat_50_sys_timer_gettime_args *uap, register_t *retval)
{
        /* {
                syscallarg(timer_t) timerid;
                syscallarg(struct itimerspec50 *) value;
        } */
        struct itimerspec its;
        struct itimerspec50 its50;
        int error;

        if ((error = dotimer_gettime(SCARG(uap, timerid), l->l_proc,
            &its)) != 0)
                return error;
        itimerspec_to_itimerspec50(&its, &its50);

        return copyout(&its50, SCARG(uap, value), sizeof(its50));
}

/* Set and arm a POSIX realtime timer */
int
compat_50_sys_timer_settime(struct lwp *l,
    const struct compat_50_sys_timer_settime_args *uap, register_t *retval)
{
        /* {
                syscallarg(timer_t) timerid;
                syscallarg(int) flags;
                syscallarg(const struct itimerspec50 *) value;
                syscallarg(struct itimerspec50 *) ovalue;
        } */
        int error;
        struct itimerspec value, ovalue, *ovp = NULL;
        struct itimerspec50 value50, ovalue50;

        if ((error = copyin(SCARG(uap, value), &value50, sizeof(value50))) != 0)
                return error;

        itimerspec50_to_itimerspec(&value50, &value);
        if (SCARG(uap, ovalue))
                ovp = &ovalue;

        if ((error = dotimer_settime(SCARG(uap, timerid), &value, ovp,
            SCARG(uap, flags), l->l_proc)) != 0)
                return error;

        if (ovp) {
                itimerspec_to_itimerspec50(&ovalue, &ovalue50);
                return copyout(&ovalue50, SCARG(uap, ovalue), sizeof(ovalue50));
        }
        return 0;
}

/*
 * ntp_gettime() - NTP user application interface
 */
int
compat_50_sys___ntp_gettime30(struct lwp *l,
    const struct compat_50_sys___ntp_gettime30_args *uap, register_t *retval)
{
        if (vec_ntp_gettime == NULL)
                return ENOSYS;                /* No NTP available in kernel */

        /* {
                syscallarg(struct ntptimeval *) ntvp;
        } */
        struct ntptimeval ntv;
        struct ntptimeval50 ntv50;
        int error;

        if (SCARG(uap, ntvp)) {
                (*vec_ntp_gettime)(&ntv);
                memset(&ntv50, 0, sizeof(ntv50));
                timespec_to_timespec50(&ntv.time, &ntv50.time);
                ntv50.maxerror = ntv.maxerror;
                ntv50.esterror = ntv.esterror;
                ntv50.tai = ntv.tai;
                ntv50.time_state = ntv.time_state;

                error = copyout(&ntv50, SCARG(uap, ntvp), sizeof(ntv50));
                if (error)
                        return error;
        }
        *retval = (*vec_ntp_timestatus)();
        return 0;
}

SYSCTL_SETUP(compat_sysctl_time, "Old system boottime")
{
        struct timeval tv;

        getmicroboottime(&tv);
        timeval_to_timeval50(&tv, &boottime50);

        sysctl_createv(clog, 0, NULL, NULL,
                CTLFLAG_PERMANENT, 
                CTLTYPE_STRUCT, "oboottime", 
                SYSCTL_DESCR("System boot time"),
                NULL, 0, &boottime50, sizeof(boottime50),
                CTL_KERN, KERN_OBOOTTIME, CTL_EOL);
}

int             
kern_time_50_init(void)
{               
        int error;

        error = syscall_establish(NULL, kern_time_50_syscalls);

        return error;
}       
        
int
kern_time_50_fini(void)
{               
        int error;

        error = syscall_disestablish(NULL, kern_time_50_syscalls);

        return error;
}


































































































































































    1 

    1 























































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
/*        $NetBSD: lpt.c,v 1.82 2018/09/03 16:29:31 riastradh Exp $        */

/*
 * Copyright (c) 1993, 1994 Charles M. Hannum.
 * Copyright (c) 1990 William F. Jolitz, TeleMuse
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This software is a component of "386BSD" developed by
 *        William F. Jolitz, TeleMuse.
 * 4. Neither the name of the developer nor the name "386BSD"
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS A COMPONENT OF 386BSD DEVELOPED BY WILLIAM F. JOLITZ
 * AND IS INTENDED FOR RESEARCH AND EDUCATIONAL PURPOSES ONLY. THIS
 * SOFTWARE SHOULD NOT BE CONSIDERED TO BE A COMMERCIAL PRODUCT.
 * THE DEVELOPER URGES THAT USERS WHO REQUIRE A COMMERCIAL PRODUCT
 * NOT MAKE USE OF THIS WORK.
 *
 * FOR USERS WHO WISH TO UNDERSTAND THE 386BSD SYSTEM DEVELOPED
 * BY WILLIAM F. JOLITZ, WE RECOMMEND THE USER STUDY WRITTEN
 * REFERENCES SUCH AS THE  "PORTING UNIX TO THE 386" SERIES
 * (BEGINNING JANUARY 1991 "DR. DOBBS JOURNAL", USA AND BEGINNING
 * JUNE 1991 "UNIX MAGAZIN", GERMANY) BY WILLIAM F. JOLITZ AND
 * LYNNE GREER JOLITZ, AS WELL AS OTHER BOOKS ON UNIX AND THE
 * ON-LINE 386BSD USER MANUAL BEFORE USE. A BOOK DISCUSSING THE INTERNALS
 * OF 386BSD ENTITLED "386BSD FROM THE INSIDE OUT" WILL BE AVAILABLE LATE 1992.
 *
 * THIS SOFTWARE IS PROVIDED BY THE DEVELOPER ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE DEVELOPER BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Device Driver for AT style parallel printer port
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lpt.c,v 1.82 2018/09/03 16:29:31 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/ioctl.h>
#include <sys/uio.h>
#include <sys/device.h>
#include <sys/conf.h>
#include <sys/syslog.h>
#include <sys/intr.h>

#include <sys/bus.h>

#include <dev/ic/lptreg.h>
#include <dev/ic/lptvar.h>

#include "ioconf.h"

#define        TIMEOUT                hz*16        /* wait up to 16 seconds for a ready */
#define        STEP                hz/4

#define        LPTPRI                (PZERO+8)
#define        LPT_BSIZE        1024

#define LPTDEBUG

#ifndef LPTDEBUG
#define LPRINTF(a)
#else
#define LPRINTF(a)        if (lptdebug) printf a
int lptdebug = 0;
#endif

dev_type_open(lptopen);
dev_type_close(lptclose);
dev_type_write(lptwrite);
dev_type_ioctl(lptioctl);

const struct cdevsw lpt_cdevsw = {
        .d_open = lptopen,
        .d_close = lptclose,
        .d_read = noread,
        .d_write = lptwrite,
        .d_ioctl = lptioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

#define        LPTUNIT(s)        (minor(s) & 0x1f)
#define        LPTFLAGS(s)        (minor(s) & 0xe0)

static void        lptsoftintr(void *);

void
lpt_attach_subr(struct lpt_softc *sc)
{
        bus_space_tag_t iot;
        bus_space_handle_t ioh;

        sc->sc_state = 0;

        iot = sc->sc_iot;
        ioh = sc->sc_ioh;

        bus_space_write_1(iot, ioh, lpt_control, LPC_NINIT);

        callout_init(&sc->sc_wakeup_ch, 0);
        sc->sc_sih = softint_establish(SOFTINT_SERIAL, lptsoftintr, sc);

        sc->sc_dev_ok = 1;
}

int
lpt_detach_subr(device_t self, int flags)
{
        struct lpt_softc *sc = device_private(self);

        sc->sc_dev_ok = 0;
        softint_disestablish(sc->sc_sih);
        callout_destroy(&sc->sc_wakeup_ch);
        return 0;
}

/*
 * Reset the printer, then wait until it's selected and not busy.
 */
int
lptopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        u_char flags = LPTFLAGS(dev);
        struct lpt_softc *sc;
        bus_space_tag_t iot;
        bus_space_handle_t ioh;
        u_char control;
        int error;
        int spin;

        sc = device_lookup_private(&lpt_cd, LPTUNIT(dev));
        if (!sc || !sc->sc_dev_ok)
                return ENXIO;

#if 0        /* XXX what to do? */
        if (sc->sc_irq == IRQUNK && (flags & LPT_NOINTR) == 0)
                return ENXIO;
#endif

#ifdef DIAGNOSTIC
        if (sc->sc_state)
                aprint_verbose_dev(sc->sc_dev, "stat=0x%x not zero\n",
                    sc->sc_state);
#endif

        if (sc->sc_state)
                return EBUSY;

        sc->sc_state = LPT_INIT;
        sc->sc_flags = flags;
        LPRINTF(("%s: open: flags=0x%x\n", device_xname(sc->sc_dev),
            (unsigned)flags));
        iot = sc->sc_iot;
        ioh = sc->sc_ioh;

        if ((flags & LPT_NOPRIME) == 0) {
                /* assert INIT for 100 usec to start up printer */
                bus_space_write_1(iot, ioh, lpt_control, LPC_SELECT);
                delay(100);
        }

        control = LPC_SELECT | LPC_NINIT;
        bus_space_write_1(iot, ioh, lpt_control, control);

        /* wait till ready (printer running diagnostics) */
        for (spin = 0; NOT_READY_ERR(); spin += STEP) {
                if (spin >= TIMEOUT) {
                        sc->sc_state = 0;
                        return EBUSY;
                }

                /* wait 1/4 second, give up if we get a signal */
                error = tsleep((void *)sc, LPTPRI | PCATCH, "lptopen", STEP);
                if (error != EWOULDBLOCK) {
                        sc->sc_state = 0;
                        return error;
                }
        }

        if ((flags & LPT_NOINTR) == 0)
                control |= LPC_IENABLE;
        if (flags & LPT_AUTOLF)
                control |= LPC_AUTOLF;
        sc->sc_control = control;
        bus_space_write_1(iot, ioh, lpt_control, control);

        sc->sc_inbuf = malloc(LPT_BSIZE, M_DEVBUF, M_WAITOK);
        sc->sc_count = 0;
        sc->sc_state = LPT_OPEN;

        if ((sc->sc_flags & LPT_NOINTR) == 0)
                lptwakeup(sc);

        LPRINTF(("%s: opened\n", device_xname(sc->sc_dev)));
        return 0;
}

int
lptnotready(u_char status, struct lpt_softc *sc)
{
        u_char new;

        status = (status ^ LPS_INVERT) & LPS_MASK;
        new = status & ~sc->sc_laststatus;
        sc->sc_laststatus = status;

        if (sc->sc_state & LPT_OPEN) {
                if (new & LPS_SELECT)
                        log(LOG_NOTICE,
                            "%s: offline\n", device_xname(sc->sc_dev));
                else if (new & LPS_NOPAPER)
                        log(LOG_NOTICE,
                            "%s: out of paper\n", device_xname(sc->sc_dev));
                else if (new & LPS_NERR)
                        log(LOG_NOTICE,
                            "%s: output error\n", device_xname(sc->sc_dev));
        }

        return status;
}

void
lptwakeup(void *arg)
{
        struct lpt_softc *sc = arg;
        int s;

        s = splvm();
        lptintr(sc);
        splx(s);

        callout_reset(&sc->sc_wakeup_ch, STEP, lptwakeup, sc);
}

/*
 * Close the device, and free the local line buffer.
 */
int
lptclose(dev_t dev, int flag, int mode,
    struct lwp *l)
{
        struct lpt_softc *sc =
            device_lookup_private(&lpt_cd, LPTUNIT(dev));
        bus_space_tag_t iot = sc->sc_iot;
        bus_space_handle_t ioh = sc->sc_ioh;

        if (sc->sc_count)
                (void) lptpushbytes(sc);

        if ((sc->sc_flags & LPT_NOINTR) == 0)
                callout_stop(&sc->sc_wakeup_ch);

        bus_space_write_1(iot, ioh, lpt_control, LPC_NINIT);
        sc->sc_state = 0;
        bus_space_write_1(iot, ioh, lpt_control, LPC_NINIT);
        free(sc->sc_inbuf, M_DEVBUF);

        LPRINTF(("%s: closed\n", device_xname(sc->sc_dev)));
        return 0;
}

int
lptpushbytes(struct lpt_softc *sc)
{
        bus_space_tag_t iot = sc->sc_iot;
        bus_space_handle_t ioh = sc->sc_ioh;
        int error;

        if (sc->sc_flags & LPT_NOINTR) {
                int spin, tic;
                u_char control = sc->sc_control;

                while (sc->sc_count > 0) {
                        spin = 0;
                        while (NOT_READY()) {
                                if (++spin < sc->sc_spinmax)
                                        continue;
                                tic = 0;
                                /* adapt busy-wait algorithm */
                                sc->sc_spinmax++;
                                while (NOT_READY_ERR()) {
                                        /* exponential backoff */
                                        tic = tic + tic + 1;
                                        if (tic > TIMEOUT)
                                                tic = TIMEOUT;
                                        error = tsleep((void *)sc,
                                            LPTPRI | PCATCH, "lptpsh", tic);
                                        if (error != EWOULDBLOCK)
                                                return error;
                                }
                                break;
                        }

                        bus_space_write_1(iot, ioh, lpt_data, *sc->sc_cp++);
                        DELAY(1);
                        bus_space_write_1(iot, ioh, lpt_control,
                            control | LPC_STROBE);
                        DELAY(1);
                        sc->sc_count--;
                        bus_space_write_1(iot, ioh, lpt_control, control);
                        DELAY(1);

                        /* adapt busy-wait algorithm */
                        if (spin*2 + 16 < sc->sc_spinmax)
                                sc->sc_spinmax--;
                }
        } else {
                int s;

                while (sc->sc_count > 0) {
                        /* if the printer is ready for a char, give it one */
                        if ((sc->sc_state & LPT_OBUSY) == 0) {
                                LPRINTF(("%s: write %lu\n",
                                    device_xname(sc->sc_dev),
                                    (u_long)sc->sc_count));
                                s = splvm();
                                (void) lptintr(sc);
                                splx(s);
                        }
                        error = tsleep((void *)sc, LPTPRI | PCATCH,
                            "lptwrite2", 0);
                        if (error)
                                return error;
                }
        }
        return 0;
}

/*
 * Copy a line from user space to a local buffer, then call putc to get the
 * chars moved to the output queue.
 */
int
lptwrite(dev_t dev, struct uio *uio, int flags)
{
        struct lpt_softc *sc =
            device_lookup_private(&lpt_cd, LPTUNIT(dev));
        size_t n;
        int error = 0;

        while ((n = uimin(LPT_BSIZE, uio->uio_resid)) != 0) {
                uiomove(sc->sc_cp = sc->sc_inbuf, n, uio);
                sc->sc_count = n;
                error = lptpushbytes(sc);
                if (error) {
                        /*
                         * Return accurate residual if interrupted or timed
                         * out.
                         */
                        uio->uio_resid += sc->sc_count;
                        sc->sc_count = 0;
                        return error;
                }
        }
        return 0;
}

/*
 * Handle printer interrupts which occur when the printer is ready to accept
 * another char.
 */
int
lptintr(void *arg)
{
        struct lpt_softc *sc = arg;
        bus_space_tag_t iot = sc->sc_iot;
        bus_space_handle_t ioh = sc->sc_ioh;

#if 0
        if ((sc->sc_state & LPT_OPEN) == 0)
                return 0;
#endif

        /* is printer online and ready for output */
        if (NOT_READY() && NOT_READY_ERR())
                return 0;

        if (sc->sc_count) {
                u_char control = sc->sc_control;
                /* send char */
                bus_space_write_1(iot, ioh, lpt_data, *sc->sc_cp++);
                DELAY(1);
                bus_space_write_1(iot, ioh, lpt_control, control | LPC_STROBE);
                DELAY(1);
                sc->sc_count--;
                bus_space_write_1(iot, ioh, lpt_control, control);
                DELAY(1);
                sc->sc_state |= LPT_OBUSY;
        } else
                sc->sc_state &= ~LPT_OBUSY;

        if (sc->sc_count == 0) {
                /* none, wake up the top half to get more */
                softint_schedule(sc->sc_sih);
        }

        return 1;
}

static void
lptsoftintr(void *cookie)
{

        wakeup(cookie);
}

int
lptioctl(dev_t dev, u_long cmd, void *data,
    int flag, struct lwp *l)
{
        return ENODEV;
}































































































































































































  169 

  169 
   29 


   29 





































































  109 


































































  644 
  644 

  643 

  643 































  731 
  755 






  753 








  718 


  754 










  754 
  752 

  721 

  720 



  721 









  717 
  718 




  721 













  685 









  685 
  685 













  278 
  244 
  279 
  240 

  279 












  241 
  225 
   11 






  279 
  240 

  240 
  151 
   62 













 1632 
 1629 












































































































































  628 



    8 


    8 

    8 

  625 










  340 


  337 
    4 


  336 











  768 
  767 
  769 











   48 








   50 













 1587 










 1557 












   72 
   72 















  333 













  337 



  686 
















  333 










  583 








 1595 


 1590 
 1299 


  964 




  963 





  965 





















   23 
    1 











   74 










  107 
    1 


























































  517 
  519 

  519 



   74 

  518 


  518 
   54 
   54 

   54 
   54 





















   97 





























  484 














































  340 




  340 







  339 
   72 




   72 

  340 


  332 

   10 




  329 
  201 


















  339 


















  339 

  340 

  339 














  340 

   14 


   11 

  338 
  121 
  120 






  121 
  119 
   97 

   23 
  118 


  231 


  336 


    7 













    7 
    7 
    7 


  337 













  333 





















  333 


  193 
   90 
   90 



  332 

  246 



















  333 







  276 




















  333 


  333 






  110 
  110 








  333 
  120 
  120 






  117 



  117 








  330 



  271 
  171 


  110 









  109 
    2 




    1 

















   53 
   52 





  108 









  209 
  185 

  185 


   22 

























   21 





   21 






   11 


























































   21 
   19 

   20 
   11 

   11 





   11 

   11 

   11 

   11 
   10 


   21 





  331 












  205 
  211 






   72 







   78 


  271 











    6 







    6 


  264 


  271 






   80 
    9 




  321 











  113 



  330 
  116 

  330 













 1575 

 1579 
 1455 
 1439 



 1439 

 1550 

  343 




























 1700 


 1691 

 1123 





 1578 











 1438 
 1440 
 1697 




  145 
  344 
























  222 
   77 

   77 
   77 
   78 


   78 





   77 
   68 

   78 



  160 













  222 



  222 










  230 

  212 






   17 





























  231 


  230 







  169 
   78 
  230 

  230 


















   72 

  229 
















  169 






























  222 
  230 




























  214 

   16 






    7 

    7 

    5 











    2 
















    1 















  222 











  165 


  165 
  164 






  165 

  165 
    6 
    6 

    6 
    6 



  159 
    6 
  159 

  150 
  150 



  159 

  164 


    2 


  162 

    6 

  163 




  164 




    4 
    2 


    2 


    4 










    2 


  161 


  162 
    5 



  157 
  162 

  162 





   12 




  152 


  152 
    2 

  150 

  152 
  152 
    2 

  150 
  151 



  152 

  150 
    1 

  149 


  152 
    2 



  150 
  152 

  152 





  148 









    7 










    7 






















    7 








  226 





  226 





  229 

























  210 














  206 
   15 

  206 

    3 






  208 

















  209 









  209 
  207 

  206 
  207 
  190 









  206 



  196 







   68 

   67 
  137 
   14 


  130 











  116 










  116 





































  206 

  206 

  204 






  205 









  204 



















  202 
  202 











  202 






  202 
  202 
   23 
   23 


  202 
































































    4 








    4 






    4 


    4 
    4 















    4 
    4 



    4 







    4 



    4 




    4 

    4 










    4 



    4 


    4 
















    4 








    4 


































    6 



















    6 






    6 
    6 




    6 
    6 











    6 









    5 

















    5 
    5 





    1 

















    5 


    5 
    5 


    5 
    5 








    4 





    1 












    4 

    4 

    4 






    4 

    4 




    4 



    4 






    5 



    4 





    4 









    4 

    4 



    4 







    4 
    4 








    4 























    4 

    4 




    4 



    4 










    4 















    4 





    4 






    4 
    4 











    4 
















    4 







    1 






    1 


    5 
    1 



































































   20 


   20 


















   21 








   19 

    2 







   21 
   20 



   19 








   19 
    4 



    3 






   19 





   20 


   19 
   19 




   19 






   19 






   18 
   16 







    9 
    3 



    2 











   18 
    2 
    2 










    1 






















   18 

   19 

   20 




















   11 




   11 










    9 


    8 
    7 

    1 

    9 
    9 
    9 


    9 













   10 







    9 
    7 

    1 






   10 
   10 

    9 











    9 



    9 

    9 









    3 







    1 

    3 
    3 


    3 
    3 








    3 



    3 

    1 

    3 

    3 



    2 
























   80 









   80 
   70 
   80 
















   80 

   80 
    6 



   60 







   13 






   13 
   13 
   10 

    6 
    9 
    9 












    8 
    8 
    8 
    8 

    6 




























   70 
   70 









   70 
   70 
   63 
    4 
   61 




   51 
   70 
   70 






   69 

   14 






    4 
    4 















   67 






   67 
   67 
   67 












   48 








    2 













    2 
    2 








    2 

    1 


    1 

    2 
    8 




   48 







    1 





























   14 

   14 












    5 
    5 
    5 


    3 





    9 





    3 


    9 

































    7 
    7 
    7 
    7 
    7 



    6 
    3 



    6 
    3 








    5 
    5 
    5 

    5 









    5 
    5 
    5 
    5 
    5 





    5 







    5 






    5 
    5 
    5 


























































    2 

    9 



























   19 


























   18 
   18 



   18 



   17 


    9 






   16 
   16 











   16 


    7 



    7 
    7 



    7 
    7 



    7 



    7 








    4 










    3 
    3 











    6 



    3 


    2 


    2 




    5 

   16 







    8 












   13 




























































   71 











































    4 

































































































































































   10 
   15 
   14 









   10 









   10 



















































   71 
   71 
   71 



   71 




















   69 

    2 




   69 

















   71 





































   71 
   71 

    7 
















   71 






    7 
    7 










   69 
   69 
   69 





   69 

   69 


   69 













   65 










   65 
    2 
    2 

    2 



















   71 

























   71 
   71 


   71 




   65 

















   71 


   65 


   65 
    2 


   65 

























  421 
  422 

  422 



  421 


  422 

  422 

  422 
  392 
   70 

   69 


   65 



   65 
   65 





   65 





   65 

   65 

   65 


   65 





  422 

  422 
  161 
   57 

   55 


   45 

    1 


   45 
   45 





   44 
    1 
    1 

    1 


   44 

   44 

   44 


   44 


















   71 
































   18 







   18 
   12 






   18 




   17 

   17 




















   62 
  200 

   50 
   14 

    4 





  215 
   85 

  213 
  195 




























































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
/*        $NetBSD: uvm_map.c,v 1.402 2022/06/08 16:55:00 macallan Exp $        */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * Copyright (c) 1991, 1993, The Regents of the University of California.
 *
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * The Mach Operating System project at Carnegie-Mellon University.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vm_map.c    8.3 (Berkeley) 1/12/94
 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
 *
 *
 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*
 * uvm_map.c: uvm map operations
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.402 2022/06/08 16:55:00 macallan Exp $");

#include "opt_ddb.h"
#include "opt_pax.h"
#include "opt_uvmhist.h"
#include "opt_uvm.h"
#include "opt_sysv.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mman.h>
#include <sys/proc.h>
#include <sys/pool.h>
#include <sys/kernel.h>
#include <sys/mount.h>
#include <sys/pax.h>
#include <sys/vnode.h>
#include <sys/filedesc.h>
#include <sys/lockdebug.h>
#include <sys/atomic.h>
#include <sys/sysctl.h>
#ifndef __USER_VA0_IS_SAFE
#include <sys/kauth.h>
#include "opt_user_va0_disable_default.h"
#endif

#include <sys/shm.h>

#include <uvm/uvm.h>
#include <uvm/uvm_readahead.h>

#if defined(DDB) || defined(DEBUGPRINT)
#include <uvm/uvm_ddb.h>
#endif

#ifdef UVMHIST
#ifndef UVMHIST_MAPHIST_SIZE
#define UVMHIST_MAPHIST_SIZE 100
#endif
static struct kern_history_ent maphistbuf[UVMHIST_MAPHIST_SIZE];
UVMHIST_DEFINE(maphist) = UVMHIST_INITIALIZER(maphist, maphistbuf);
#endif

#if !defined(UVMMAP_COUNTERS)

#define        UVMMAP_EVCNT_DEFINE(name)        /* nothing */
#define UVMMAP_EVCNT_INCR(ev)                /* nothing */
#define UVMMAP_EVCNT_DECR(ev)                /* nothing */

#else /* defined(UVMMAP_NOCOUNTERS) */

#include <sys/evcnt.h>
#define        UVMMAP_EVCNT_DEFINE(name) \
struct evcnt uvmmap_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \
    "uvmmap", #name); \
EVCNT_ATTACH_STATIC(uvmmap_evcnt_##name);
#define        UVMMAP_EVCNT_INCR(ev)                uvmmap_evcnt_##ev.ev_count++
#define        UVMMAP_EVCNT_DECR(ev)                uvmmap_evcnt_##ev.ev_count--

#endif /* defined(UVMMAP_NOCOUNTERS) */

UVMMAP_EVCNT_DEFINE(ubackmerge)
UVMMAP_EVCNT_DEFINE(uforwmerge)
UVMMAP_EVCNT_DEFINE(ubimerge)
UVMMAP_EVCNT_DEFINE(unomerge)
UVMMAP_EVCNT_DEFINE(kbackmerge)
UVMMAP_EVCNT_DEFINE(kforwmerge)
UVMMAP_EVCNT_DEFINE(kbimerge)
UVMMAP_EVCNT_DEFINE(knomerge)
UVMMAP_EVCNT_DEFINE(map_call)
UVMMAP_EVCNT_DEFINE(mlk_call)
UVMMAP_EVCNT_DEFINE(mlk_hint)
UVMMAP_EVCNT_DEFINE(mlk_tree)
UVMMAP_EVCNT_DEFINE(mlk_treeloop)

const char vmmapbsy[] = "vmmapbsy";

/*
 * cache for vmspace structures.
 */

static struct pool_cache uvm_vmspace_cache;

/*
 * cache for dynamically-allocated map entries.
 */

static struct pool_cache uvm_map_entry_cache;

#ifdef PMAP_GROWKERNEL
/*
 * This global represents the end of the kernel virtual address
 * space.  If we want to exceed this, we must grow the kernel
 * virtual address space dynamically.
 *
 * Note, this variable is locked by kernel_map's lock.
 */
vaddr_t uvm_maxkaddr;
#endif

#ifndef __USER_VA0_IS_SAFE
#ifndef __USER_VA0_DISABLE_DEFAULT
#define __USER_VA0_DISABLE_DEFAULT 1
#endif
#ifdef USER_VA0_DISABLE_DEFAULT /* kernel config option overrides */
#undef __USER_VA0_DISABLE_DEFAULT
#define __USER_VA0_DISABLE_DEFAULT USER_VA0_DISABLE_DEFAULT
#endif
int user_va0_disable = __USER_VA0_DISABLE_DEFAULT;
#endif

/*
 * macros
 */

/*
 * uvm_map_align_va: round down or up virtual address
 */
static __inline void
uvm_map_align_va(vaddr_t *vap, vsize_t align, int topdown)
{

        KASSERT(powerof2(align));

        if (align != 0 && (*vap & (align - 1)) != 0) {
                if (topdown)
                        *vap = rounddown2(*vap, align);
                else
                        *vap = roundup2(*vap, align);
        }
}

/*
 * UVM_ET_ISCOMPATIBLE: check some requirements for map entry merging
 */
extern struct vm_map *pager_map;

#define        UVM_ET_ISCOMPATIBLE(ent, type, uobj, meflags, \
    prot, maxprot, inh, adv, wire) \
        ((ent)->etype == (type) && \
        (((ent)->flags ^ (meflags)) & (UVM_MAP_NOMERGE)) == 0 && \
        (ent)->object.uvm_obj == (uobj) && \
        (ent)->protection == (prot) && \
        (ent)->max_protection == (maxprot) && \
        (ent)->inheritance == (inh) && \
        (ent)->advice == (adv) && \
        (ent)->wired_count == (wire))

/*
 * uvm_map_entry_link: insert entry into a map
 *
 * => map must be locked
 */
#define uvm_map_entry_link(map, after_where, entry) do { \
        uvm_mapent_check(entry); \
        (map)->nentries++; \
        (entry)->prev = (after_where); \
        (entry)->next = (after_where)->next; \
        (entry)->prev->next = (entry); \
        (entry)->next->prev = (entry); \
        uvm_rb_insert((map), (entry)); \
} while (/*CONSTCOND*/ 0)

/*
 * uvm_map_entry_unlink: remove entry from a map
 *
 * => map must be locked
 */
#define uvm_map_entry_unlink(map, entry) do { \
        KASSERT((entry) != (map)->first_free); \
        KASSERT((entry) != (map)->hint); \
        uvm_mapent_check(entry); \
        (map)->nentries--; \
        (entry)->next->prev = (entry)->prev; \
        (entry)->prev->next = (entry)->next; \
        uvm_rb_remove((map), (entry)); \
} while (/*CONSTCOND*/ 0)

/*
 * SAVE_HINT: saves the specified entry as the hint for future lookups.
 *
 * => map need not be locked.
 */
#define SAVE_HINT(map, check, value) do { \
        if ((map)->hint == (check)) \
                (map)->hint = (value); \
} while (/*CONSTCOND*/ 0)

/*
 * clear_hints: ensure that hints don't point to the entry.
 *
 * => map must be write-locked.
 */
static void
clear_hints(struct vm_map *map, struct vm_map_entry *ent)
{

        SAVE_HINT(map, ent, ent->prev);
        if (map->first_free == ent) {
                map->first_free = ent->prev;
        }
}

/*
 * VM_MAP_RANGE_CHECK: check and correct range
 *
 * => map must at least be read locked
 */

#define VM_MAP_RANGE_CHECK(map, start, end) do { \
        if (start < vm_map_min(map))                \
                start = vm_map_min(map);        \
        if (end > vm_map_max(map))                \
                end = vm_map_max(map);                \
        if (start > end)                        \
                start = end;                        \
} while (/*CONSTCOND*/ 0)

/*
 * local prototypes
 */

static struct vm_map_entry *
                uvm_mapent_alloc(struct vm_map *, int);
static void        uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *);
static void        uvm_mapent_free(struct vm_map_entry *);
#if defined(DEBUG)
static void        _uvm_mapent_check(const struct vm_map_entry *, int);
#define        uvm_mapent_check(map)        _uvm_mapent_check(map, __LINE__)
#else /* defined(DEBUG) */
#define        uvm_mapent_check(e)        /* nothing */
#endif /* defined(DEBUG) */

static void        uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *);
static void        uvm_map_reference_amap(struct vm_map_entry *, int);
static int        uvm_map_space_avail(vaddr_t *, vsize_t, voff_t, vsize_t, int,
                    int, struct vm_map_entry *);
static void        uvm_map_unreference_amap(struct vm_map_entry *, int);

int _uvm_map_sanity(struct vm_map *);
int _uvm_tree_sanity(struct vm_map *);
static vsize_t uvm_rb_maxgap(const struct vm_map_entry *);

#define        ROOT_ENTRY(map)                ((struct vm_map_entry *)(map)->rb_tree.rbt_root)
#define        LEFT_ENTRY(entry)        ((struct vm_map_entry *)(entry)->rb_node.rb_left)
#define        RIGHT_ENTRY(entry)        ((struct vm_map_entry *)(entry)->rb_node.rb_right)
#define        PARENT_ENTRY(map, entry) \
        (ROOT_ENTRY(map) == (entry) \
            ? NULL : (struct vm_map_entry *)RB_FATHER(&(entry)->rb_node))

/*
 * These get filled in if/when SYSVSHM shared memory code is loaded
 *
 * We do this with function pointers rather the #ifdef SYSVSHM so the
 * SYSVSHM code can be loaded and unloaded
 */
void (*uvm_shmexit)(struct vmspace *) = NULL;
void (*uvm_shmfork)(struct vmspace *, struct vmspace *) = NULL;

static int
uvm_map_compare_nodes(void *ctx, const void *nparent, const void *nkey)
{
        const struct vm_map_entry *eparent = nparent;
        const struct vm_map_entry *ekey = nkey;

        KASSERT(eparent->start < ekey->start || eparent->start >= ekey->end);
        KASSERT(ekey->start < eparent->start || ekey->start >= eparent->end);

        if (eparent->start < ekey->start)
                return -1;
        if (eparent->end >= ekey->start)
                return 1;
        return 0;
}

static int
uvm_map_compare_key(void *ctx, const void *nparent, const void *vkey)
{
        const struct vm_map_entry *eparent = nparent;
        const vaddr_t va = *(const vaddr_t *) vkey;

        if (eparent->start < va)
                return -1;
        if (eparent->end >= va)
                return 1;
        return 0;
}

static const rb_tree_ops_t uvm_map_tree_ops = {
        .rbto_compare_nodes = uvm_map_compare_nodes,
        .rbto_compare_key = uvm_map_compare_key,
        .rbto_node_offset = offsetof(struct vm_map_entry, rb_node),
        .rbto_context = NULL
};

/*
 * uvm_rb_gap: return the gap size between our entry and next entry.
 */
static inline vsize_t
uvm_rb_gap(const struct vm_map_entry *entry)
{

        KASSERT(entry->next != NULL);
        return entry->next->start - entry->end;
}

static vsize_t
uvm_rb_maxgap(const struct vm_map_entry *entry)
{
        struct vm_map_entry *child;
        vsize_t maxgap = entry->gap;

        /*
         * We need maxgap to be the largest gap of us or any of our
         * descendents.  Since each of our children's maxgap is the
         * cached value of their largest gap of themselves or their
         * descendents, we can just use that value and avoid recursing
         * down the tree to calculate it.
         */
        if ((child = LEFT_ENTRY(entry)) != NULL && maxgap < child->maxgap)
                maxgap = child->maxgap;

        if ((child = RIGHT_ENTRY(entry)) != NULL && maxgap < child->maxgap)
                maxgap = child->maxgap;

        return maxgap;
}

static void
uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry)
{
        struct vm_map_entry *parent;

        KASSERT(entry->gap == uvm_rb_gap(entry));
        entry->maxgap = uvm_rb_maxgap(entry);

        while ((parent = PARENT_ENTRY(map, entry)) != NULL) {
                struct vm_map_entry *brother;
                vsize_t maxgap = parent->gap;
                unsigned int which;

                KDASSERT(parent->gap == uvm_rb_gap(parent));
                if (maxgap < entry->maxgap)
                        maxgap = entry->maxgap;
                /*
                 * Since we work towards the root, we know entry's maxgap
                 * value is OK, but its brothers may now be out-of-date due
                 * to rebalancing.  So refresh it.
                 */
                which = RB_POSITION(&entry->rb_node) ^ RB_DIR_OTHER;
                brother = (struct vm_map_entry *)parent->rb_node.rb_nodes[which];
                if (brother != NULL) {
                        KDASSERT(brother->gap == uvm_rb_gap(brother));
                        brother->maxgap = uvm_rb_maxgap(brother);
                        if (maxgap < brother->maxgap)
                                maxgap = brother->maxgap;
                }

                parent->maxgap = maxgap;
                entry = parent;
        }
}

static void
uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry)
{
        struct vm_map_entry *ret __diagused;

        entry->gap = entry->maxgap = uvm_rb_gap(entry);
        if (entry->prev != &map->header)
                entry->prev->gap = uvm_rb_gap(entry->prev);

        ret = rb_tree_insert_node(&map->rb_tree, entry);
        KASSERTMSG(ret == entry,
            "uvm_rb_insert: map %p: duplicate entry %p", map, ret);

        /*
         * If the previous entry is not our immediate left child, then it's an
         * ancestor and will be fixed up on the way to the root.  We don't
         * have to check entry->prev against &map->header since &map->header
         * will never be in the tree.
         */
        uvm_rb_fixup(map,
            LEFT_ENTRY(entry) == entry->prev ? entry->prev : entry);
}

static void
uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry)
{
        struct vm_map_entry *prev_parent = NULL, *next_parent = NULL;

        /*
         * If we are removing an interior node, then an adjacent node will
         * be used to replace its position in the tree.  Therefore we will
         * need to fixup the tree starting at the parent of the replacement
         * node.  So record their parents for later use.
         */
        if (entry->prev != &map->header)
                prev_parent = PARENT_ENTRY(map, entry->prev);
        if (entry->next != &map->header)
                next_parent = PARENT_ENTRY(map, entry->next);

        rb_tree_remove_node(&map->rb_tree, entry);

        /*
         * If the previous node has a new parent, fixup the tree starting
         * at the previous node's old parent.
         */
        if (entry->prev != &map->header) {
                /*
                 * Update the previous entry's gap due to our absence.
                 */
                entry->prev->gap = uvm_rb_gap(entry->prev);
                uvm_rb_fixup(map, entry->prev);
                if (prev_parent != NULL
                    && prev_parent != entry
                    && prev_parent != PARENT_ENTRY(map, entry->prev))
                        uvm_rb_fixup(map, prev_parent);
        }

        /*
         * If the next node has a new parent, fixup the tree starting
         * at the next node's old parent.
         */
        if (entry->next != &map->header) {
                uvm_rb_fixup(map, entry->next);
                if (next_parent != NULL
                    && next_parent != entry
                    && next_parent != PARENT_ENTRY(map, entry->next))
                        uvm_rb_fixup(map, next_parent);
        }
}

#if defined(DEBUG)
int uvm_debug_check_map = 0;
int uvm_debug_check_rbtree = 0;
#define uvm_map_check(map, name) \
        _uvm_map_check((map), (name), __FILE__, __LINE__)
static void
_uvm_map_check(struct vm_map *map, const char *name,
    const char *file, int line)
{

        if ((uvm_debug_check_map && _uvm_map_sanity(map)) ||
            (uvm_debug_check_rbtree && _uvm_tree_sanity(map))) {
                panic("uvm_map_check failed: \"%s\" map=%p (%s:%d)",
                    name, map, file, line);
        }
}
#else /* defined(DEBUG) */
#define uvm_map_check(map, name)        /* nothing */
#endif /* defined(DEBUG) */

#if defined(DEBUG) || defined(DDB)
int
_uvm_map_sanity(struct vm_map *map)
{
        bool first_free_found = false;
        bool hint_found = false;
        const struct vm_map_entry *e;
        struct vm_map_entry *hint = map->hint;

        e = &map->header;
        for (;;) {
                if (map->first_free == e) {
                        first_free_found = true;
                } else if (!first_free_found && e->next->start > e->end) {
                        printf("first_free %p should be %p\n",
                            map->first_free, e);
                        return -1;
                }
                if (hint == e) {
                        hint_found = true;
                }

                e = e->next;
                if (e == &map->header) {
                        break;
                }
        }
        if (!first_free_found) {
                printf("stale first_free\n");
                return -1;
        }
        if (!hint_found) {
                printf("stale hint\n");
                return -1;
        }
        return 0;
}

int
_uvm_tree_sanity(struct vm_map *map)
{
        struct vm_map_entry *tmp, *trtmp;
        int n = 0, i = 1;

        for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) {
                if (tmp->gap != uvm_rb_gap(tmp)) {
                        printf("%d/%d gap %#lx != %#lx %s\n",
                            n + 1, map->nentries,
                            (ulong)tmp->gap, (ulong)uvm_rb_gap(tmp),
                            tmp->next == &map->header ? "(last)" : "");
                        goto error;
                }
                /*
                 * If any entries are out of order, tmp->gap will be unsigned
                 * and will likely exceed the size of the map.
                 */
                if (tmp->gap >= vm_map_max(map) - vm_map_min(map)) {
                        printf("too large gap %zu\n", (size_t)tmp->gap);
                        goto error;
                }
                n++;
        }

        if (n != map->nentries) {
                printf("nentries: %d vs %d\n", n, map->nentries);
                goto error;
        }

        trtmp = NULL;
        for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) {
                if (tmp->maxgap != uvm_rb_maxgap(tmp)) {
                        printf("maxgap %#lx != %#lx\n",
                            (ulong)tmp->maxgap,
                            (ulong)uvm_rb_maxgap(tmp));
                        goto error;
                }
                if (trtmp != NULL && trtmp->start >= tmp->start) {
                        printf("corrupt: 0x%"PRIxVADDR"x >= 0x%"PRIxVADDR"x\n",
                            trtmp->start, tmp->start);
                        goto error;
                }

                trtmp = tmp;
        }

        for (tmp = map->header.next; tmp != &map->header;
            tmp = tmp->next, i++) {
                trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_LEFT);
                if (trtmp == NULL)
                        trtmp = &map->header;
                if (tmp->prev != trtmp) {
                        printf("lookup: %d: %p->prev=%p: %p\n",
                            i, tmp, tmp->prev, trtmp);
                        goto error;
                }
                trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_RIGHT);
                if (trtmp == NULL)
                        trtmp = &map->header;
                if (tmp->next != trtmp) {
                        printf("lookup: %d: %p->next=%p: %p\n",
                            i, tmp, tmp->next, trtmp);
                        goto error;
                }
                trtmp = rb_tree_find_node(&map->rb_tree, &tmp->start);
                if (trtmp != tmp) {
                        printf("lookup: %d: %p - %p: %p\n", i, tmp, trtmp,
                            PARENT_ENTRY(map, tmp));
                        goto error;
                }
        }

        return (0);
 error:
        return (-1);
}
#endif /* defined(DEBUG) || defined(DDB) */

/*
 * vm_map_lock: acquire an exclusive (write) lock on a map.
 *
 * => The locking protocol provides for guaranteed upgrade from shared ->
 *    exclusive by whichever thread currently has the map marked busy.
 *    See "LOCKING PROTOCOL NOTES" in uvm_map.h.  This is horrible; among
 *    other problems, it defeats any fairness guarantees provided by RW
 *    locks.
 */

void
vm_map_lock(struct vm_map *map)
{

        for (;;) {
                rw_enter(&map->lock, RW_WRITER);
                if (map->busy == NULL || map->busy == curlwp) {
                        break;
                }
                mutex_enter(&map->misc_lock);
                rw_exit(&map->lock);
                if (map->busy != NULL) {
                        cv_wait(&map->cv, &map->misc_lock);
                }
                mutex_exit(&map->misc_lock);
        }
        map->timestamp++;
}

/*
 * vm_map_lock_try: try to lock a map, failing if it is already locked.
 */

bool
vm_map_lock_try(struct vm_map *map)
{

        if (!rw_tryenter(&map->lock, RW_WRITER)) {
                return false;
        }
        if (map->busy != NULL) {
                rw_exit(&map->lock);
                return false;
        }
        map->timestamp++;
        return true;
}

/*
 * vm_map_unlock: release an exclusive lock on a map.
 */

void
vm_map_unlock(struct vm_map *map)
{

        KASSERT(rw_write_held(&map->lock));
        KASSERT(map->busy == NULL || map->busy == curlwp);
        rw_exit(&map->lock);
}

/*
 * vm_map_unbusy: mark the map as unbusy, and wake any waiters that
 *     want an exclusive lock.
 */

void
vm_map_unbusy(struct vm_map *map)
{

        KASSERT(map->busy == curlwp);

        /*
         * Safe to clear 'busy' and 'waiters' with only a read lock held:
         *
         * o they can only be set with a write lock held
         * o writers are blocked out with a read or write hold
         * o at any time, only one thread owns the set of values
         */
        mutex_enter(&map->misc_lock);
        map->busy = NULL;
        cv_broadcast(&map->cv);
        mutex_exit(&map->misc_lock);
}

/*
 * vm_map_lock_read: acquire a shared (read) lock on a map.
 */

void
vm_map_lock_read(struct vm_map *map)
{

        rw_enter(&map->lock, RW_READER);
}

/*
 * vm_map_unlock_read: release a shared lock on a map.
 */

void
vm_map_unlock_read(struct vm_map *map)
{

        rw_exit(&map->lock);
}

/*
 * vm_map_busy: mark a map as busy.
 *
 * => the caller must hold the map write locked
 */

void
vm_map_busy(struct vm_map *map)
{

        KASSERT(rw_write_held(&map->lock));
        KASSERT(map->busy == NULL);

        map->busy = curlwp;
}

/*
 * vm_map_locked_p: return true if the map is write locked.
 *
 * => only for debug purposes like KASSERTs.
 * => should not be used to verify that a map is not locked.
 */

bool
vm_map_locked_p(struct vm_map *map)
{

        return rw_write_held(&map->lock);
}

/*
 * uvm_mapent_alloc: allocate a map entry
 */

static struct vm_map_entry *
uvm_mapent_alloc(struct vm_map *map, int flags)
{
        struct vm_map_entry *me;
        int pflags = (flags & UVM_FLAG_NOWAIT) ? PR_NOWAIT : PR_WAITOK;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        me = pool_cache_get(&uvm_map_entry_cache, pflags);
        if (__predict_false(me == NULL)) {
                return NULL;
        }
        me->flags = 0;

        UVMHIST_LOG(maphist, "<- new entry=%#jx [kentry=%jd]", (uintptr_t)me,
            (map == kernel_map), 0, 0);
        return me;
}

/*
 * uvm_mapent_free: free map entry
 */

static void
uvm_mapent_free(struct vm_map_entry *me)
{
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist,"<- freeing map entry=%#jx [flags=%#jx]",
                (uintptr_t)me, me->flags, 0, 0);
        pool_cache_put(&uvm_map_entry_cache, me);
}

/*
 * uvm_mapent_copy: copy a map entry, preserving flags
 */

static inline void
uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
{

        memcpy(dst, src, sizeof(*dst));
        dst->flags = 0;
}

#if defined(DEBUG)
static void
_uvm_mapent_check(const struct vm_map_entry *entry, int line)
{

        if (entry->start >= entry->end) {
                goto bad;
        }
        if (UVM_ET_ISOBJ(entry)) {
                if (entry->object.uvm_obj == NULL) {
                        goto bad;
                }
        } else if (UVM_ET_ISSUBMAP(entry)) {
                if (entry->object.sub_map == NULL) {
                        goto bad;
                }
        } else {
                if (entry->object.uvm_obj != NULL ||
                    entry->object.sub_map != NULL) {
                        goto bad;
                }
        }
        if (!UVM_ET_ISOBJ(entry)) {
                if (entry->offset != 0) {
                        goto bad;
                }
        }

        return;

bad:
        panic("%s: bad entry %p, line %d", __func__, entry, line);
}
#endif /* defined(DEBUG) */

/*
 * uvm_map_entry_unwire: unwire a map entry
 *
 * => map should be locked by caller
 */

static inline void
uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry)
{

        entry->wired_count = 0;
        uvm_fault_unwire_locked(map, entry->start, entry->end);
}


/*
 * wrapper for calling amap_ref()
 */
static inline void
uvm_map_reference_amap(struct vm_map_entry *entry, int flags)
{

        amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff,
            (entry->end - entry->start) >> PAGE_SHIFT, flags);
}


/*
 * wrapper for calling amap_unref()
 */
static inline void
uvm_map_unreference_amap(struct vm_map_entry *entry, int flags)
{

        amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff,
            (entry->end - entry->start) >> PAGE_SHIFT, flags);
}


/*
 * uvm_map_init: init mapping system at boot time.
 */

void
uvm_map_init(void)
{
        /*
         * first, init logging system.
         */

        UVMHIST_FUNC(__func__);
        UVMHIST_LINK_STATIC(maphist);
        UVMHIST_LINK_STATIC(pdhist);
        UVMHIST_CALLED(maphist);
        UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0);

        /*
         * initialize the global lock for kernel map entry.
         */

        mutex_init(&uvm_kentry_lock, MUTEX_DRIVER, IPL_VM);
}

/*
 * uvm_map_init_caches: init mapping system caches.
 */
void
uvm_map_init_caches(void)
{
        /*
         * initialize caches.
         */

        pool_cache_bootstrap(&uvm_map_entry_cache, sizeof(struct vm_map_entry),
            coherency_unit, 0, PR_LARGECACHE, "vmmpepl", NULL, IPL_NONE, NULL,
            NULL, NULL);
        pool_cache_bootstrap(&uvm_vmspace_cache, sizeof(struct vmspace),
            0, 0, 0, "vmsppl", NULL, IPL_NONE, NULL, NULL, NULL);
}

/*
 * clippers
 */

/*
 * uvm_mapent_splitadj: adjust map entries for splitting, after uvm_mapent_copy.
 */

static void
uvm_mapent_splitadj(struct vm_map_entry *entry1, struct vm_map_entry *entry2,
    vaddr_t splitat)
{
        vaddr_t adj;

        KASSERT(entry1->start < splitat);
        KASSERT(splitat < entry1->end);

        adj = splitat - entry1->start;
        entry1->end = entry2->start = splitat;

        if (entry1->aref.ar_amap) {
                amap_splitref(&entry1->aref, &entry2->aref, adj);
        }
        if (UVM_ET_ISSUBMAP(entry1)) {
                /* ... unlikely to happen, but play it safe */
                 uvm_map_reference(entry1->object.sub_map);
        } else if (UVM_ET_ISOBJ(entry1)) {
                KASSERT(entry1->object.uvm_obj != NULL); /* suppress coverity */
                entry2->offset += adj;
                if (entry1->object.uvm_obj->pgops &&
                    entry1->object.uvm_obj->pgops->pgo_reference)
                        entry1->object.uvm_obj->pgops->pgo_reference(
                            entry1->object.uvm_obj);
        }
}

/*
 * uvm_map_clip_start: ensure that the entry begins at or after
 *        the starting address, if it doesn't we split the entry.
 *
 * => caller should use UVM_MAP_CLIP_START macro rather than calling
 *    this directly
 * => map must be locked by caller
 */

void
uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry,
    vaddr_t start)
{
        struct vm_map_entry *new_entry;

        /* uvm_map_simplify_entry(map, entry); */ /* XXX */

        uvm_map_check(map, "clip_start entry");
        uvm_mapent_check(entry);

        /*
         * Split off the front portion.  note that we must insert the new
         * entry BEFORE this one, so that this entry has the specified
         * starting address.
         */
        new_entry = uvm_mapent_alloc(map, 0);
        uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
        uvm_mapent_splitadj(new_entry, entry, start);
        uvm_map_entry_link(map, entry->prev, new_entry);

        uvm_map_check(map, "clip_start leave");
}

/*
 * uvm_map_clip_end: ensure that the entry ends at or before
 *        the ending address, if it does't we split the reference
 *
 * => caller should use UVM_MAP_CLIP_END macro rather than calling
 *    this directly
 * => map must be locked by caller
 */

void
uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end)
{
        struct vm_map_entry *new_entry;

        uvm_map_check(map, "clip_end entry");
        uvm_mapent_check(entry);

        /*
         *        Create a new entry and insert it
         *        AFTER the specified entry
         */
        new_entry = uvm_mapent_alloc(map, 0);
        uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
        uvm_mapent_splitadj(entry, new_entry, end);
        uvm_map_entry_link(map, entry, new_entry);

        uvm_map_check(map, "clip_end leave");
}

/*
 *   M A P   -   m a i n   e n t r y   p o i n t
 */
/*
 * uvm_map: establish a valid mapping in a map
 *
 * => assume startp is page aligned.
 * => assume size is a multiple of PAGE_SIZE.
 * => assume sys_mmap provides enough of a "hint" to have us skip
 *        over text/data/bss area.
 * => map must be unlocked (we will lock it)
 * => <uobj,uoffset> value meanings (4 cases):
 *         [1] <NULL,uoffset>                == uoffset is a hint for PMAP_PREFER
 *         [2] <NULL,UVM_UNKNOWN_OFFSET>        == don't PMAP_PREFER
 *         [3] <uobj,uoffset>                == normal mapping
 *         [4] <uobj,UVM_UNKNOWN_OFFSET>        == uvm_map finds offset based on VA
 *
 *    case [4] is for kernel mappings where we don't know the offset until
 *    we've found a virtual address.   note that kernel object offsets are
 *    always relative to vm_map_min(kernel_map).
 *
 * => if `align' is non-zero, we align the virtual address to the specified
 *        alignment.
 *        this is provided as a mechanism for large pages.
 *
 * => XXXCDC: need way to map in external amap?
 */

int
uvm_map(struct vm_map *map, vaddr_t *startp /* IN/OUT */, vsize_t size,
    struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags)
{
        struct uvm_map_args args;
        struct vm_map_entry *new_entry;
        int error;

        KASSERT((size & PAGE_MASK) == 0);
        KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0);

        /*
         * for pager_map, allocate the new entry first to avoid sleeping
         * for memory while we have the map locked.
         */

        new_entry = NULL;
        if (map == pager_map) {
                new_entry = uvm_mapent_alloc(map, (flags & UVM_FLAG_NOWAIT));
                if (__predict_false(new_entry == NULL))
                        return ENOMEM;
        }
        if (map == pager_map)
                flags |= UVM_FLAG_NOMERGE;

        error = uvm_map_prepare(map, *startp, size, uobj, uoffset, align,
            flags, &args);
        if (!error) {
                error = uvm_map_enter(map, &args, new_entry);
                *startp = args.uma_start;
        } else if (new_entry) {
                uvm_mapent_free(new_entry);
        }

#if defined(DEBUG)
        if (!error && VM_MAP_IS_KERNEL(map) && (flags & UVM_FLAG_NOWAIT) == 0) {
                uvm_km_check_empty(map, *startp, *startp + size);
        }
#endif /* defined(DEBUG) */

        return error;
}

/*
 * uvm_map_prepare:
 *
 * called with map unlocked.
 * on success, returns the map locked.
 */

int
uvm_map_prepare(struct vm_map *map, vaddr_t start, vsize_t size,
    struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags,
    struct uvm_map_args *args)
{
        struct vm_map_entry *prev_entry;
        vm_prot_t prot = UVM_PROTECTION(flags);
        vm_prot_t maxprot = UVM_MAXPROTECTION(flags);

        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist, "(map=%#jx, start=%#jx, size=%jx, flags=%#jx)",
            (uintptr_t)map, start, size, flags);
        UVMHIST_LOG(maphist, "  uobj/offset %#jx/%jd", (uintptr_t)uobj,
            uoffset,0,0);

        /*
         * detect a popular device driver bug.
         */

        KASSERT(doing_shutdown || curlwp != NULL);

        /*
         * zero-sized mapping doesn't make any sense.
         */
        KASSERT(size > 0);

        KASSERT((~flags & (UVM_FLAG_NOWAIT | UVM_FLAG_WAITVA)) != 0);

        uvm_map_check(map, "map entry");

        /*
         * check sanity of protection code
         */

        if ((prot & maxprot) != prot) {
                UVMHIST_LOG(maphist, "<- prot. failure:  prot=%#jx, max=%#jx",
                prot, maxprot,0,0);
                return EACCES;
        }

        /*
         * figure out where to put new VM range
         */
retry:
        if (vm_map_lock_try(map) == false) {
                if ((flags & UVM_FLAG_TRYLOCK) != 0) {
                        return EAGAIN;
                }
                vm_map_lock(map); /* could sleep here */
        }
        if (flags & UVM_FLAG_UNMAP) {
                KASSERT(flags & UVM_FLAG_FIXED);
                KASSERT((flags & UVM_FLAG_NOWAIT) == 0);

                /*
                 * Set prev_entry to what it will need to be after any existing
                 * entries are removed later in uvm_map_enter().
                 */

                if (uvm_map_lookup_entry(map, start, &prev_entry)) {
                        if (start == prev_entry->start)
                                prev_entry = prev_entry->prev;
                        else
                                UVM_MAP_CLIP_END(map, prev_entry, start);
                        SAVE_HINT(map, map->hint, prev_entry);
                }
        } else {
                prev_entry = uvm_map_findspace(map, start, size, &start,
                    uobj, uoffset, align, flags);
        }
        if (prev_entry == NULL) {
                unsigned int timestamp;

                timestamp = map->timestamp;
                UVMHIST_LOG(maphist,"waiting va timestamp=%#jx",
                            timestamp,0,0,0);
                map->flags |= VM_MAP_WANTVA;
                vm_map_unlock(map);

                /*
                 * try to reclaim kva and wait until someone does unmap.
                 * fragile locking here, so we awaken every second to
                 * recheck the condition.
                 */

                mutex_enter(&map->misc_lock);
                while ((map->flags & VM_MAP_WANTVA) != 0 &&
                   map->timestamp == timestamp) {
                        if ((flags & UVM_FLAG_WAITVA) == 0) {
                                mutex_exit(&map->misc_lock);
                                UVMHIST_LOG(maphist,
                                    "<- uvm_map_findspace failed!", 0,0,0,0);
                                return ENOMEM;
                        } else {
                                cv_timedwait(&map->cv, &map->misc_lock, hz);
                        }
                }
                mutex_exit(&map->misc_lock);
                goto retry;
        }

#ifdef PMAP_GROWKERNEL
        /*
         * If the kernel pmap can't map the requested space,
         * then allocate more resources for it.
         */
        if (map == kernel_map && uvm_maxkaddr < (start + size))
                uvm_maxkaddr = pmap_growkernel(start + size);
#endif

        UVMMAP_EVCNT_INCR(map_call);

        /*
         * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER
         * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET.   in
         * either case we want to zero it  before storing it in the map entry
         * (because it looks strange and confusing when debugging...)
         *
         * if uobj is not null
         *   if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping
         *      and we do not need to change uoffset.
         *   if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset
         *      now (based on the starting address of the map).   this case is
         *      for kernel object mappings where we don't know the offset until
         *      the virtual address is found (with uvm_map_findspace).   the
         *      offset is the distance we are from the start of the map.
         */

        if (uobj == NULL) {
                uoffset = 0;
        } else {
                if (uoffset == UVM_UNKNOWN_OFFSET) {
                        KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
                        uoffset = start - vm_map_min(kernel_map);
                }
        }

        args->uma_flags = flags;
        args->uma_prev = prev_entry;
        args->uma_start = start;
        args->uma_size = size;
        args->uma_uobj = uobj;
        args->uma_uoffset = uoffset;

        UVMHIST_LOG(maphist, "<- done!", 0,0,0,0);
        return 0;
}

/*
 * uvm_map_enter:
 *
 * called with map locked.
 * unlock the map before returning.
 */

int
uvm_map_enter(struct vm_map *map, const struct uvm_map_args *args,
    struct vm_map_entry *new_entry)
{
        struct vm_map_entry *prev_entry = args->uma_prev;
        struct vm_map_entry *dead = NULL, *dead_entries = NULL;

        const uvm_flag_t flags = args->uma_flags;
        const vm_prot_t prot = UVM_PROTECTION(flags);
        const vm_prot_t maxprot = UVM_MAXPROTECTION(flags);
        const vm_inherit_t inherit = UVM_INHERIT(flags);
        const int amapwaitflag = (flags & UVM_FLAG_NOWAIT) ?
            AMAP_EXTEND_NOWAIT : 0;
        const int advice = UVM_ADVICE(flags);

        vaddr_t start = args->uma_start;
        vsize_t size = args->uma_size;
        struct uvm_object *uobj = args->uma_uobj;
        voff_t uoffset = args->uma_uoffset;

        const int kmap = (vm_map_pmap(map) == pmap_kernel());
        int merged = 0;
        int error;
        int newetype;

        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist, "(map=%#jx, start=%#jx, size=%ju, flags=%#jx)",
            (uintptr_t)map, start, size, flags);
        UVMHIST_LOG(maphist, "  uobj/offset %#jx/%jd", (uintptr_t)uobj,
            uoffset,0,0);

        KASSERT(map->hint == prev_entry); /* bimerge case assumes this */
        KASSERT(vm_map_locked_p(map));
        KASSERT((flags & (UVM_FLAG_NOWAIT | UVM_FLAG_UNMAP)) !=
                (UVM_FLAG_NOWAIT | UVM_FLAG_UNMAP));

        if (uobj)
                newetype = UVM_ET_OBJ;
        else
                newetype = 0;

        if (flags & UVM_FLAG_COPYONW) {
                newetype |= UVM_ET_COPYONWRITE;
                if ((flags & UVM_FLAG_OVERLAY) == 0)
                        newetype |= UVM_ET_NEEDSCOPY;
        }

        /*
         * For mappings with unmap, remove any old entries now.  Adding the new
         * entry cannot fail because that can only happen if UVM_FLAG_NOWAIT
         * is set, and we do not support nowait and unmap together.
         */

        if (flags & UVM_FLAG_UNMAP) {
                KASSERT(flags & UVM_FLAG_FIXED);
                uvm_unmap_remove(map, start, start + size, &dead_entries, 0);
#ifdef DEBUG
                struct vm_map_entry *tmp_entry __diagused;
                bool rv __diagused;

                rv = uvm_map_lookup_entry(map, start, &tmp_entry);
                KASSERT(!rv);
                KASSERTMSG(prev_entry == tmp_entry,
                           "args %p prev_entry %p tmp_entry %p",
                           args, prev_entry, tmp_entry);
#endif
                SAVE_HINT(map, map->hint, prev_entry);
        }

        /*
         * try and insert in map by extending previous entry, if possible.
         * XXX: we don't try and pull back the next entry.   might be useful
         * for a stack, but we are currently allocating our stack in advance.
         */

        if (flags & UVM_FLAG_NOMERGE)
                goto nomerge;

        if (prev_entry->end == start &&
            prev_entry != &map->header &&
            UVM_ET_ISCOMPATIBLE(prev_entry, newetype, uobj, 0,
            prot, maxprot, inherit, advice, 0)) {

                if (uobj && prev_entry->offset +
                    (prev_entry->end - prev_entry->start) != uoffset)
                        goto forwardmerge;

                /*
                 * can't extend a shared amap.  note: no need to lock amap to
                 * look at refs since we don't care about its exact value.
                 * if it is one (i.e. we have only reference) it will stay there
                 */

                if (prev_entry->aref.ar_amap &&
                    amap_refs(prev_entry->aref.ar_amap) != 1) {
                        goto forwardmerge;
                }

                if (prev_entry->aref.ar_amap) {
                        error = amap_extend(prev_entry, size,
                            amapwaitflag | AMAP_EXTEND_FORWARDS);
                        if (error)
                                goto nomerge;
                }

                if (kmap) {
                        UVMMAP_EVCNT_INCR(kbackmerge);
                } else {
                        UVMMAP_EVCNT_INCR(ubackmerge);
                }
                UVMHIST_LOG(maphist,"  starting back merge", 0, 0, 0, 0);

                /*
                 * drop our reference to uobj since we are extending a reference
                 * that we already have (the ref count can not drop to zero).
                 */

                if (uobj && uobj->pgops->pgo_detach)
                        uobj->pgops->pgo_detach(uobj);

                /*
                 * Now that we've merged the entries, note that we've grown
                 * and our gap has shrunk.  Then fix the tree.
                 */
                prev_entry->end += size;
                prev_entry->gap -= size;
                uvm_rb_fixup(map, prev_entry);

                uvm_map_check(map, "map backmerged");

                UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0);
                merged++;
        }

forwardmerge:
        if (prev_entry->next->start == (start + size) &&
            prev_entry->next != &map->header &&
            UVM_ET_ISCOMPATIBLE(prev_entry->next, newetype, uobj, 0,
            prot, maxprot, inherit, advice, 0)) {

                if (uobj && prev_entry->next->offset != uoffset + size)
                        goto nomerge;

                /*
                 * can't extend a shared amap.  note: no need to lock amap to
                 * look at refs since we don't care about its exact value.
                 * if it is one (i.e. we have only reference) it will stay there.
                 *
                 * note that we also can't merge two amaps, so if we
                 * merged with the previous entry which has an amap,
                 * and the next entry also has an amap, we give up.
                 *
                 * Interesting cases:
                 * amap, new, amap -> give up second merge (single fwd extend)
                 * amap, new, none -> double forward extend (extend again here)
                 * none, new, amap -> double backward extend (done here)
                 * uobj, new, amap -> single backward extend (done here)
                 *
                 * XXX should we attempt to deal with someone refilling
                 * the deallocated region between two entries that are
                 * backed by the same amap (ie, arefs is 2, "prev" and
                 * "next" refer to it, and adding this allocation will
                 * close the hole, thus restoring arefs to 1 and
                 * deallocating the "next" vm_map_entry)?  -- @@@
                 */

                if (prev_entry->next->aref.ar_amap &&
                    (amap_refs(prev_entry->next->aref.ar_amap) != 1 ||
                     (merged && prev_entry->aref.ar_amap))) {
                        goto nomerge;
                }

                if (merged) {
                        /*
                         * Try to extend the amap of the previous entry to
                         * cover the next entry as well.  If it doesn't work
                         * just skip on, don't actually give up, since we've
                         * already completed the back merge.
                         */
                        if (prev_entry->aref.ar_amap) {
                                if (amap_extend(prev_entry,
                                    prev_entry->next->end -
                                    prev_entry->next->start,
                                    amapwaitflag | AMAP_EXTEND_FORWARDS))
                                        goto nomerge;
                        }

                        /*
                         * Try to extend the amap of the *next* entry
                         * back to cover the new allocation *and* the
                         * previous entry as well (the previous merge
                         * didn't have an amap already otherwise we
                         * wouldn't be checking here for an amap).  If
                         * it doesn't work just skip on, again, don't
                         * actually give up, since we've already
                         * completed the back merge.
                         */
                        else if (prev_entry->next->aref.ar_amap) {
                                if (amap_extend(prev_entry->next,
                                    prev_entry->end -
                                    prev_entry->start,
                                    amapwaitflag | AMAP_EXTEND_BACKWARDS))
                                        goto nomerge;
                        }
                } else {
                        /*
                         * Pull the next entry's amap backwards to cover this
                         * new allocation.
                         */
                        if (prev_entry->next->aref.ar_amap) {
                                error = amap_extend(prev_entry->next, size,
                                    amapwaitflag | AMAP_EXTEND_BACKWARDS);
                                if (error)
                                        goto nomerge;
                        }
                }

                if (merged) {
                        if (kmap) {
                                UVMMAP_EVCNT_DECR(kbackmerge);
                                UVMMAP_EVCNT_INCR(kbimerge);
                        } else {
                                UVMMAP_EVCNT_DECR(ubackmerge);
                                UVMMAP_EVCNT_INCR(ubimerge);
                        }
                } else {
                        if (kmap) {
                                UVMMAP_EVCNT_INCR(kforwmerge);
                        } else {
                                UVMMAP_EVCNT_INCR(uforwmerge);
                        }
                }
                UVMHIST_LOG(maphist,"  starting forward merge", 0, 0, 0, 0);

                /*
                 * drop our reference to uobj since we are extending a reference
                 * that we already have (the ref count can not drop to zero).
                 */
                if (uobj && uobj->pgops->pgo_detach)
                        uobj->pgops->pgo_detach(uobj);

                if (merged) {
                        dead = prev_entry->next;
                        prev_entry->end = dead->end;
                        uvm_map_entry_unlink(map, dead);
                        if (dead->aref.ar_amap != NULL) {
                                prev_entry->aref = dead->aref;
                                dead->aref.ar_amap = NULL;
                        }
                } else {
                        prev_entry->next->start -= size;
                        if (prev_entry != &map->header) {
                                prev_entry->gap -= size;
                                KASSERT(prev_entry->gap == uvm_rb_gap(prev_entry));
                                uvm_rb_fixup(map, prev_entry);
                        }
                        if (uobj)
                                prev_entry->next->offset = uoffset;
                }

                uvm_map_check(map, "map forwardmerged");

                UVMHIST_LOG(maphist,"<- done forwardmerge", 0, 0, 0, 0);
                merged++;
        }

nomerge:
        if (!merged) {
                UVMHIST_LOG(maphist,"  allocating new map entry", 0, 0, 0, 0);
                if (kmap) {
                        UVMMAP_EVCNT_INCR(knomerge);
                } else {
                        UVMMAP_EVCNT_INCR(unomerge);
                }

                /*
                 * allocate new entry and link it in.
                 */

                if (new_entry == NULL) {
                        new_entry = uvm_mapent_alloc(map,
                                (flags & UVM_FLAG_NOWAIT));
                        if (__predict_false(new_entry == NULL)) {
                                error = ENOMEM;
                                goto done;
                        }
                }
                new_entry->start = start;
                new_entry->end = new_entry->start + size;
                new_entry->object.uvm_obj = uobj;
                new_entry->offset = uoffset;

                new_entry->etype = newetype;

                if (flags & UVM_FLAG_NOMERGE) {
                        new_entry->flags |= UVM_MAP_NOMERGE;
                }

                new_entry->protection = prot;
                new_entry->max_protection = maxprot;
                new_entry->inheritance = inherit;
                new_entry->wired_count = 0;
                new_entry->advice = advice;
                if (flags & UVM_FLAG_OVERLAY) {

                        /*
                         * to_add: for BSS we overallocate a little since we
                         * are likely to extend
                         */

                        vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ?
                                UVM_AMAP_CHUNK << PAGE_SHIFT : 0;
                        struct vm_amap *amap = amap_alloc(size, to_add,
                            (flags & UVM_FLAG_NOWAIT));
                        if (__predict_false(amap == NULL)) {
                                error = ENOMEM;
                                goto done;
                        }
                        new_entry->aref.ar_pageoff = 0;
                        new_entry->aref.ar_amap = amap;
                } else {
                        new_entry->aref.ar_pageoff = 0;
                        new_entry->aref.ar_amap = NULL;
                }
                uvm_map_entry_link(map, prev_entry, new_entry);

                /*
                 * Update the free space hint
                 */

                if ((map->first_free == prev_entry) &&
                    (prev_entry->end >= new_entry->start))
                        map->first_free = new_entry;

                new_entry = NULL;
        }

        map->size += size;

        UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);

        error = 0;

done:
        vm_map_unlock(map);

        if (new_entry) {
                uvm_mapent_free(new_entry);
        }
        if (dead) {
                KDASSERT(merged);
                uvm_mapent_free(dead);
        }
        if (dead_entries)
                uvm_unmap_detach(dead_entries, 0);

        return error;
}

/*
 * uvm_map_lookup_entry_bytree: lookup an entry in tree
 */

static inline bool
uvm_map_lookup_entry_bytree(struct vm_map *map, vaddr_t address,
    struct vm_map_entry **entry        /* OUT */)
{
        struct vm_map_entry *prev = &map->header;
        struct vm_map_entry *cur = ROOT_ENTRY(map);

        while (cur) {
                UVMMAP_EVCNT_INCR(mlk_treeloop);
                if (address >= cur->start) {
                        if (address < cur->end) {
                                *entry = cur;
                                return true;
                        }
                        prev = cur;
                        cur = RIGHT_ENTRY(cur);
                } else
                        cur = LEFT_ENTRY(cur);
        }
        *entry = prev;
        return false;
}

/*
 * uvm_map_lookup_entry: find map entry at or before an address
 *
 * => map must at least be read-locked by caller
 * => entry is returned in "entry"
 * => return value is true if address is in the returned entry
 */

bool
uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
    struct vm_map_entry **entry        /* OUT */)
{
        struct vm_map_entry *cur;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist,"(map=%#jx,addr=%#jx,ent=%#jx)",
            (uintptr_t)map, address, (uintptr_t)entry, 0);

        /*
         * make a quick check to see if we are already looking at
         * the entry we want (which is usually the case).  note also
         * that we don't need to save the hint here...  it is the
         * same hint (unless we are at the header, in which case the
         * hint didn't buy us anything anyway).
         */

        cur = map->hint;
        UVMMAP_EVCNT_INCR(mlk_call);
        if (cur != &map->header &&
            address >= cur->start && cur->end > address) {
                UVMMAP_EVCNT_INCR(mlk_hint);
                *entry = cur;
                UVMHIST_LOG(maphist,"<- got it via hint (%#jx)",
                    (uintptr_t)cur, 0, 0, 0);
                uvm_mapent_check(*entry);
                return (true);
        }
        uvm_map_check(map, __func__);

        /*
         * lookup in the tree.
         */

        UVMMAP_EVCNT_INCR(mlk_tree);
        if (__predict_true(uvm_map_lookup_entry_bytree(map, address, entry))) {
                SAVE_HINT(map, map->hint, *entry);
                UVMHIST_LOG(maphist,"<- search got it (%#jx)",
                    (uintptr_t)cur, 0, 0, 0);
                KDASSERT((*entry)->start <= address);
                KDASSERT(address < (*entry)->end);
                uvm_mapent_check(*entry);
                return (true);
        }

        SAVE_HINT(map, map->hint, *entry);
        UVMHIST_LOG(maphist,"<- failed!",0,0,0,0);
        KDASSERT((*entry) == &map->header || (*entry)->end <= address);
        KDASSERT((*entry)->next == &map->header ||
            address < (*entry)->next->start);
        return (false);
}

/*
 * See if the range between start and start + length fits in the gap
 * entry->next->start and entry->end.  Returns 1 if fits, 0 if doesn't
 * fit, and -1 address wraps around.
 */
static int
uvm_map_space_avail(vaddr_t *start, vsize_t length, voff_t uoffset,
    vsize_t align, int flags, int topdown, struct vm_map_entry *entry)
{
        vaddr_t end;

#ifdef PMAP_PREFER
        /*
         * push start address forward as needed to avoid VAC alias problems.
         * we only do this if a valid offset is specified.
         */

        if (uoffset != UVM_UNKNOWN_OFFSET)
                PMAP_PREFER(uoffset, start, length, topdown);
#endif
        if ((flags & UVM_FLAG_COLORMATCH) != 0) {
                KASSERT(align < uvmexp.ncolors);
                if (uvmexp.ncolors > 1) {
                        const u_int colormask = uvmexp.colormask;
                        const u_int colorsize = colormask + 1;
                        vaddr_t hint = atop(*start);
                        const u_int color = hint & colormask;
                        if (color != align) {
                                hint -= color;        /* adjust to color boundary */
                                KASSERT((hint & colormask) == 0);
                                if (topdown) {
                                        if (align > color)
                                                hint -= colorsize;
                                } else {
                                        if (align < color)
                                                hint += colorsize;
                                }
                                *start = ptoa(hint + align); /* adjust to color */
                        }
                }
        } else {
                KASSERT(powerof2(align));
                uvm_map_align_va(start, align, topdown);
                /*
                 * XXX Should we PMAP_PREFER() here again?
                 * eh...i think we're okay
                 */
        }

        /*
         * Find the end of the proposed new region.  Be sure we didn't
         * wrap around the address; if so, we lose.  Otherwise, if the
         * proposed new region fits before the next entry, we win.
         */

        end = *start + length;
        if (end < *start)
                return (-1);

        if (entry->next->start >= end && *start >= entry->end)
                return (1);

        return (0);
}

static void
uvm_findspace_invariants(struct vm_map *map, vaddr_t orig_hint, vaddr_t length,
    struct uvm_object *uobj, voff_t uoffset, vsize_t align, int flags,
    vaddr_t hint, struct vm_map_entry *entry, int line)
{
        const int topdown = map->flags & VM_MAP_TOPDOWN;

        KASSERTMSG( topdown || hint >= orig_hint,
            "map=%p hint=%#"PRIxVADDR" orig_hint=%#"PRIxVADDR
            " length=%#"PRIxVSIZE" uobj=%p uoffset=%#llx align=%"PRIxVSIZE
            " flags=%#x entry=%p (uvm_map_findspace line %d)",
            map, hint, orig_hint,
            length, uobj, (unsigned long long)uoffset, align,
            flags, entry, line);
        KASSERTMSG(!topdown || hint <= orig_hint,
            "map=%p hint=%#"PRIxVADDR" orig_hint=%#"PRIxVADDR
            " length=%#"PRIxVSIZE" uobj=%p uoffset=%#llx align=%"PRIxVSIZE
            " flags=%#x entry=%p (uvm_map_findspace line %d)",
            map, hint, orig_hint,
            length, uobj, (unsigned long long)uoffset, align,
            flags, entry, line);
}

/*
 * uvm_map_findspace: find "length" sized space in "map".
 *
 * => "hint" is a hint about where we want it, unless UVM_FLAG_FIXED is
 *        set in "flags" (in which case we insist on using "hint").
 * => "result" is VA returned
 * => uobj/uoffset are to be used to handle VAC alignment, if required
 * => if "align" is non-zero, we attempt to align to that value.
 * => caller must at least have read-locked map
 * => returns NULL on failure, or pointer to prev. map entry if success
 * => note this is a cross between the old vm_map_findspace and vm_map_find
 */

struct vm_map_entry *
uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length,
    vaddr_t *result /* OUT */, struct uvm_object *uobj, voff_t uoffset,
    vsize_t align, int flags)
{
#define        INVARIANTS()                                                              \
        uvm_findspace_invariants(map, orig_hint, length, uobj, uoffset, align,\
            flags, hint, entry, __LINE__)
        struct vm_map_entry *entry = NULL;
        struct vm_map_entry *child, *prev, *tmp;
        vaddr_t orig_hint __diagused;
        const int topdown = map->flags & VM_MAP_TOPDOWN;
        int avail;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist, "(map=%#jx, hint=%#jx, len=%ju, flags=%#jx...",
            (uintptr_t)map, hint, length, flags);
        UVMHIST_LOG(maphist, " uobj=%#jx, uoffset=%#jx, align=%#jx)",
            (uintptr_t)uobj, uoffset, align, 0);

        KASSERT((flags & UVM_FLAG_COLORMATCH) != 0 || powerof2(align));
        KASSERT((flags & UVM_FLAG_COLORMATCH) == 0 || align < uvmexp.ncolors);
        KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0);

        uvm_map_check(map, "map_findspace entry");

        /*
         * Clamp the hint to the VM map's min/max address, and remmeber
         * the clamped original hint.  Remember the original hint,
         * clamped to the min/max address.  If we are aligning, then we
         * may have to try again with no alignment constraint if we
         * fail the first time.
         *
         * We use the original hint to verify later that the search has
         * been monotonic -- that is, nonincreasing or nondecreasing,
         * according to topdown or !topdown respectively.  But the
         * clamping is not monotonic.
         */
        if (hint < vm_map_min(map)) {        /* check ranges ... */
                if (flags & UVM_FLAG_FIXED) {
                        UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0);
                        return (NULL);
                }
                hint = vm_map_min(map);
        }
        if (hint > vm_map_max(map)) {
                UVMHIST_LOG(maphist,"<- VA %#jx > range [%#jx->%#jx]",
                    hint, vm_map_min(map), vm_map_max(map), 0);
                return (NULL);
        }
        orig_hint = hint;
        INVARIANTS();

        UVMHIST_LOG(maphist,"<- VA %#jx vs range [%#jx->%#jx]",
            hint, vm_map_min(map), vm_map_max(map), 0);

        /*
         * hint may not be aligned properly; we need round up or down it
         * before proceeding further.
         */
        if ((flags & UVM_FLAG_COLORMATCH) == 0) {
                uvm_map_align_va(&hint, align, topdown);
                INVARIANTS();
        }

        UVMHIST_LOG(maphist,"<- VA %#jx vs range [%#jx->%#jx]",
            hint, vm_map_min(map), vm_map_max(map), 0);
        /*
         * Look for the first possible address; if there's already
         * something at this address, we have to start after it.
         */

        /*
         * @@@: there are four, no, eight cases to consider.
         *
         * 0: found,     fixed,     bottom up -> fail
         * 1: found,     fixed,     top down  -> fail
         * 2: found,     not fixed, bottom up -> start after entry->end,
         *                                       loop up
         * 3: found,     not fixed, top down  -> start before entry->start,
         *                                       loop down
         * 4: not found, fixed,     bottom up -> check entry->next->start, fail
         * 5: not found, fixed,     top down  -> check entry->next->start, fail
         * 6: not found, not fixed, bottom up -> check entry->next->start,
         *                                       loop up
         * 7: not found, not fixed, top down  -> check entry->next->start,
         *                                       loop down
         *
         * as you can see, it reduces to roughly five cases, and that
         * adding top down mapping only adds one unique case (without
         * it, there would be four cases).
         */

        if ((flags & UVM_FLAG_FIXED) == 0 &&
            hint == (topdown ? vm_map_max(map) : vm_map_min(map))) {
                /*
                 * The uvm_map_findspace algorithm is monotonic -- for
                 * topdown VM it starts with a high hint and returns a
                 * lower free address; for !topdown VM it starts with a
                 * low hint and returns a higher free address.  As an
                 * optimization, start with the first (highest for
                 * topdown, lowest for !topdown) free address.
                 *
                 * XXX This `optimization' probably doesn't actually do
                 * much in practice unless userland explicitly passes
                 * the VM map's minimum or maximum address, which
                 * varies from machine to machine (VM_MAX/MIN_ADDRESS,
                 * e.g. 0x7fbfdfeff000 on amd64 but 0xfffffffff000 on
                 * aarch64) and may vary according to other factors
                 * like sysctl vm.user_va0_disable.  In particular, if
                 * the user specifies 0 as a hint to mmap, then mmap
                 * will choose a default address which is usually _not_
                 * VM_MAX/MIN_ADDRESS but something else instead like
                 * VM_MAX_ADDRESS - stack size - guard page overhead,
                 * in which case this branch is never hit.
                 *
                 * In fact, this branch appears to have been broken for
                 * two decades between when topdown was introduced in
                 * ~2003 and when it was adapted to handle the topdown
                 * case without violating the monotonicity assertion in
                 * 2022.  Maybe Someone^TM should either ditch the
                 * optimization or find a better way to do it.
                 */
                entry = map->first_free;
        } else {
                if (uvm_map_lookup_entry(map, hint, &entry)) {
                        /* "hint" address already in use ... */
                        if (flags & UVM_FLAG_FIXED) {
                                UVMHIST_LOG(maphist, "<- fixed & VA in use",
                                    0, 0, 0, 0);
                                return (NULL);
                        }
                        if (topdown)
                                /* Start from lower gap. */
                                entry = entry->prev;
                } else if (flags & UVM_FLAG_FIXED) {
                        if (entry->next->start >= hint + length &&
                            hint + length > hint)
                                goto found;

                        /* "hint" address is gap but too small */
                        UVMHIST_LOG(maphist, "<- fixed mapping failed",
                            0, 0, 0, 0);
                        return (NULL); /* only one shot at it ... */
                } else {
                        /*
                         * See if given hint fits in this gap.
                         */
                        avail = uvm_map_space_avail(&hint, length,
                            uoffset, align, flags, topdown, entry);
                        INVARIANTS();
                        switch (avail) {
                        case 1:
                                goto found;
                        case -1:
                                goto wraparound;
                        }

                        if (topdown) {
                                /*
                                 * Still there is a chance to fit
                                 * if hint > entry->end.
                                 */
                        } else {
                                /* Start from higher gap. */
                                entry = entry->next;
                                if (entry == &map->header)
                                        goto notfound;
                                goto nextgap;
                        }
                }
        }

        /*
         * Note that all UVM_FLAGS_FIXED case is already handled.
         */
        KDASSERT((flags & UVM_FLAG_FIXED) == 0);

        /* Try to find the space in the red-black tree */

        /* Check slot before any entry */
        hint = topdown ? entry->next->start - length : entry->end;
        INVARIANTS();
        avail = uvm_map_space_avail(&hint, length, uoffset, align, flags,
            topdown, entry);
        INVARIANTS();
        switch (avail) {
        case 1:
                goto found;
        case -1:
                goto wraparound;
        }

nextgap:
        KDASSERT((flags & UVM_FLAG_FIXED) == 0);
        /* If there is not enough space in the whole tree, we fail */
        tmp = ROOT_ENTRY(map);
        if (tmp == NULL || tmp->maxgap < length)
                goto notfound;

        prev = NULL; /* previous candidate */

        /* Find an entry close to hint that has enough space */
        for (; tmp;) {
                KASSERT(tmp->next->start == tmp->end + tmp->gap);
                if (topdown) {
                        if (tmp->next->start < hint + length &&
                            (prev == NULL || tmp->end > prev->end)) {
                                if (tmp->gap >= length)
                                        prev = tmp;
                                else if ((child = LEFT_ENTRY(tmp)) != NULL
                                    && child->maxgap >= length)
                                        prev = tmp;
                        }
                } else {
                        if (tmp->end >= hint &&
                            (prev == NULL || tmp->end < prev->end)) {
                                if (tmp->gap >= length)
                                        prev = tmp;
                                else if ((child = RIGHT_ENTRY(tmp)) != NULL
                                    && child->maxgap >= length)
                                        prev = tmp;
                        }
                }
                if (tmp->next->start < hint + length)
                        child = RIGHT_ENTRY(tmp);
                else if (tmp->end > hint)
                        child = LEFT_ENTRY(tmp);
                else {
                        if (tmp->gap >= length)
                                break;
                        if (topdown)
                                child = LEFT_ENTRY(tmp);
                        else
                                child = RIGHT_ENTRY(tmp);
                }
                if (child == NULL || child->maxgap < length)
                        break;
                tmp = child;
        }

        if (tmp != NULL && tmp->start < hint && hint < tmp->next->start) {
                /*
                 * Check if the entry that we found satifies the
                 * space requirement
                 */
                if (topdown) {
                        if (hint > tmp->next->start - length)
                                hint = tmp->next->start - length;
                } else {
                        if (hint < tmp->end)
                                hint = tmp->end;
                }
                INVARIANTS();
                avail = uvm_map_space_avail(&hint, length, uoffset, align,
                    flags, topdown, tmp);
                INVARIANTS();
                switch (avail) {
                case 1:
                        entry = tmp;
                        goto found;
                case -1:
                        goto wraparound;
                }
                if (tmp->gap >= length)
                        goto listsearch;
        }
        if (prev == NULL)
                goto notfound;

        if (topdown) {
                KASSERT(orig_hint >= prev->next->start - length ||
                    prev->next->start - length > prev->next->start);
                hint = prev->next->start - length;
        } else {
                KASSERT(orig_hint <= prev->end);
                hint = prev->end;
        }
        INVARIANTS();
        avail = uvm_map_space_avail(&hint, length, uoffset, align,
            flags, topdown, prev);
        INVARIANTS();
        switch (avail) {
        case 1:
                entry = prev;
                goto found;
        case -1:
                goto wraparound;
        }
        if (prev->gap >= length)
                goto listsearch;

        if (topdown)
                tmp = LEFT_ENTRY(prev);
        else
                tmp = RIGHT_ENTRY(prev);
        for (;;) {
                KASSERT(tmp && tmp->maxgap >= length);
                if (topdown)
                        child = RIGHT_ENTRY(tmp);
                else
                        child = LEFT_ENTRY(tmp);
                if (child && child->maxgap >= length) {
                        tmp = child;
                        continue;
                }
                if (tmp->gap >= length)
                        break;
                if (topdown)
                        tmp = LEFT_ENTRY(tmp);
                else
                        tmp = RIGHT_ENTRY(tmp);
        }

        if (topdown) {
                KASSERT(orig_hint >= tmp->next->start - length ||
                    tmp->next->start - length > tmp->next->start);
                hint = tmp->next->start - length;
        } else {
                KASSERT(orig_hint <= tmp->end);
                hint = tmp->end;
        }
        INVARIANTS();
        avail = uvm_map_space_avail(&hint, length, uoffset, align,
            flags, topdown, tmp);
        INVARIANTS();
        switch (avail) {
        case 1:
                entry = tmp;
                goto found;
        case -1:
                goto wraparound;
        }

        /*
         * The tree fails to find an entry because of offset or alignment
         * restrictions.  Search the list instead.
         */
 listsearch:
        /*
         * Look through the rest of the map, trying to fit a new region in
         * the gap between existing regions, or after the very last region.
         * note: entry->end = base VA of current gap,
         *         entry->next->start = VA of end of current gap
         */

        INVARIANTS();
        for (;;) {
                /* Update hint for current gap. */
                hint = topdown ? entry->next->start - length : entry->end;
                INVARIANTS();

                /* See if it fits. */
                avail = uvm_map_space_avail(&hint, length, uoffset, align,
                    flags, topdown, entry);
                INVARIANTS();
                switch (avail) {
                case 1:
                        goto found;
                case -1:
                        goto wraparound;
                }

                /* Advance to next/previous gap */
                if (topdown) {
                        if (entry == &map->header) {
                                UVMHIST_LOG(maphist, "<- failed (off start)",
                                    0,0,0,0);
                                goto notfound;
                        }
                        entry = entry->prev;
                } else {
                        entry = entry->next;
                        if (entry == &map->header) {
                                UVMHIST_LOG(maphist, "<- failed (off end)",
                                    0,0,0,0);
                                goto notfound;
                        }
                }
        }

 found:
        SAVE_HINT(map, map->hint, entry);
        *result = hint;
        UVMHIST_LOG(maphist,"<- got it!  (result=%#jx)", hint, 0,0,0);
        INVARIANTS();
        KASSERT(entry->end <= hint);
        KASSERT(hint + length <= entry->next->start);
        return (entry);

 wraparound:
        UVMHIST_LOG(maphist, "<- failed (wrap around)", 0,0,0,0);

        return (NULL);

 notfound:
        UVMHIST_LOG(maphist, "<- failed (notfound)", 0,0,0,0);

        return (NULL);
#undef INVARIANTS
}

/*
 *   U N M A P   -   m a i n   h e l p e r   f u n c t i o n s
 */

/*
 * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop")
 *
 * => caller must check alignment and size
 * => map must be locked by caller
 * => we return a list of map entries that we've remove from the map
 *    in "entry_list"
 */

void
uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
    struct vm_map_entry **entry_list /* OUT */, int flags)
{
        struct vm_map_entry *entry, *first_entry, *next;
        vaddr_t len;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist,"(map=%#jx, start=%#jx, end=%#jx)",
            (uintptr_t)map, start, end, 0);
        VM_MAP_RANGE_CHECK(map, start, end);

        uvm_map_check(map, "unmap_remove entry");

        /*
         * find first entry
         */

        if (uvm_map_lookup_entry(map, start, &first_entry) == true) {
                /* clip and go... */
                entry = first_entry;
                UVM_MAP_CLIP_START(map, entry, start);
                /* critical!  prevents stale hint */
                SAVE_HINT(map, entry, entry->prev);
        } else {
                entry = first_entry->next;
        }

        /*
         * save the free space hint
         */

        if (map->first_free != &map->header && map->first_free->start >= start)
                map->first_free = entry->prev;

        /*
         * note: we now re-use first_entry for a different task.  we remove
         * a number of map entries from the map and save them in a linked
         * list headed by "first_entry".  once we remove them from the map
         * the caller should unlock the map and drop the references to the
         * backing objects [c.f. uvm_unmap_detach].  the object is to
         * separate unmapping from reference dropping.  why?
         *   [1] the map has to be locked for unmapping
         *   [2] the map need not be locked for reference dropping
         *   [3] dropping references may trigger pager I/O, and if we hit
         *       a pager that does synchronous I/O we may have to wait for it.
         *   [4] we would like all waiting for I/O to occur with maps unlocked
         *       so that we don't block other threads.
         */

        first_entry = NULL;
        *entry_list = NULL;

        /*
         * break up the area into map entry sized regions and unmap.  note
         * that all mappings have to be removed before we can even consider
         * dropping references to amaps or VM objects (otherwise we could end
         * up with a mapping to a page on the free list which would be very bad)
         */

        while ((entry != &map->header) && (entry->start < end)) {
                KASSERT((entry->flags & UVM_MAP_STATIC) == 0);

                UVM_MAP_CLIP_END(map, entry, end);
                next = entry->next;
                len = entry->end - entry->start;

                /*
                 * unwire before removing addresses from the pmap; otherwise
                 * unwiring will put the entries back into the pmap (XXX).
                 */

                if (VM_MAPENT_ISWIRED(entry)) {
                        uvm_map_entry_unwire(map, entry);
                }
                if (flags & UVM_FLAG_VAONLY) {

                        /* nothing */

                } else if ((map->flags & VM_MAP_PAGEABLE) == 0) {

                        /*
                         * if the map is non-pageable, any pages mapped there
                         * must be wired and entered with pmap_kenter_pa(),
                         * and we should free any such pages immediately.
                         * this is mostly used for kmem_map.
                         */
                        KASSERT(vm_map_pmap(map) == pmap_kernel());

                        uvm_km_pgremove_intrsafe(map, entry->start, entry->end);
                } else if (UVM_ET_ISOBJ(entry) &&
                           UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
                        panic("%s: kernel object %p %p\n",
                            __func__, map, entry);
                } else if (UVM_ET_ISOBJ(entry) || entry->aref.ar_amap) {
                        /*
                         * remove mappings the standard way.  lock object
                         * and/or amap to ensure vm_page state does not
                         * change while in pmap_remove().
                         */

#ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
                        uvm_map_lock_entry(entry, RW_WRITER);
#else
                        uvm_map_lock_entry(entry, RW_READER);
#endif
                        pmap_remove(map->pmap, entry->start, entry->end);

                        /*
                         * note: if map is dying, leave pmap_update() for
                         * later.  if the map is to be reused (exec) then
                         * pmap_update() will be called.  if the map is
                         * being disposed of (exit) then pmap_destroy()
                         * will be called.
                         */

                        if ((map->flags & VM_MAP_DYING) == 0) {
                                pmap_update(vm_map_pmap(map));
                        } else {
                                KASSERT(vm_map_pmap(map) != pmap_kernel());
                        }

                        uvm_map_unlock_entry(entry);
                }

#if defined(UVMDEBUG)
                /*
                 * check if there's remaining mapping,
                 * which is a bug in caller.
                 */

                vaddr_t va;
                for (va = entry->start; va < entry->end;
                    va += PAGE_SIZE) {
                        if (pmap_extract(vm_map_pmap(map), va, NULL)) {
                                panic("%s: %#"PRIxVADDR" has mapping",
                                    __func__, va);
                        }
                }

                if (VM_MAP_IS_KERNEL(map) && (flags & UVM_FLAG_NOWAIT) == 0) {
                        uvm_km_check_empty(map, entry->start,
                            entry->end);
                }
#endif /* defined(UVMDEBUG) */

                /*
                 * remove entry from map and put it on our list of entries
                 * that we've nuked.  then go to next entry.
                 */

                UVMHIST_LOG(maphist, "  removed map entry %#jx",
                    (uintptr_t)entry, 0, 0, 0);

                /* critical!  prevents stale hint */
                SAVE_HINT(map, entry, entry->prev);

                uvm_map_entry_unlink(map, entry);
                KASSERT(map->size >= len);
                map->size -= len;
                entry->prev = NULL;
                entry->next = first_entry;
                first_entry = entry;
                entry = next;
        }

        uvm_map_check(map, "unmap_remove leave");

        /*
         * now we've cleaned up the map and are ready for the caller to drop
         * references to the mapped objects.
         */

        *entry_list = first_entry;
        UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);

        if (map->flags & VM_MAP_WANTVA) {
                mutex_enter(&map->misc_lock);
                map->flags &= ~VM_MAP_WANTVA;
                cv_broadcast(&map->cv);
                mutex_exit(&map->misc_lock);
        }
}

/*
 * uvm_unmap_detach: drop references in a chain of map entries
 *
 * => we will free the map entries as we traverse the list.
 */

void
uvm_unmap_detach(struct vm_map_entry *first_entry, int flags)
{
        struct vm_map_entry *next_entry;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        while (first_entry) {
                KASSERT(!VM_MAPENT_ISWIRED(first_entry));
                UVMHIST_LOG(maphist,
                    "  detach %#jx: amap=%#jx, obj=%#jx, submap?=%jd",
                    (uintptr_t)first_entry,
                    (uintptr_t)first_entry->aref.ar_amap,
                    (uintptr_t)first_entry->object.uvm_obj,
                    UVM_ET_ISSUBMAP(first_entry));

                /*
                 * drop reference to amap, if we've got one
                 */

                if (first_entry->aref.ar_amap)
                        uvm_map_unreference_amap(first_entry, flags);

                /*
                 * drop reference to our backing object, if we've got one
                 */

                KASSERT(!UVM_ET_ISSUBMAP(first_entry));
                if (UVM_ET_ISOBJ(first_entry) &&
                    first_entry->object.uvm_obj->pgops->pgo_detach) {
                        (*first_entry->object.uvm_obj->pgops->pgo_detach)
                                (first_entry->object.uvm_obj);
                }
                next_entry = first_entry->next;
                uvm_mapent_free(first_entry);
                first_entry = next_entry;
        }
        UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
}

/*
 *   E X T R A C T I O N   F U N C T I O N S
 */

/*
 * uvm_map_reserve: reserve space in a vm_map for future use.
 *
 * => we reserve space in a map by putting a dummy map entry in the
 *    map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE)
 * => map should be unlocked (we will write lock it)
 * => we return true if we were able to reserve space
 * => XXXCDC: should be inline?
 */

int
uvm_map_reserve(struct vm_map *map, vsize_t size,
    vaddr_t offset        /* hint for pmap_prefer */,
    vsize_t align        /* alignment */,
    vaddr_t *raddr        /* IN:hint, OUT: reserved VA */,
    uvm_flag_t flags        /* UVM_FLAG_FIXED or UVM_FLAG_COLORMATCH or 0 */)
{
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist, "(map=%#jx, size=%#jx, offset=%#jx, addr=%#jx)",
            (uintptr_t)map, size, offset, (uintptr_t)raddr);

        size = round_page(size);

        /*
         * reserve some virtual space.
         */

        if (uvm_map(map, raddr, size, NULL, offset, align,
            UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
            UVM_ADV_RANDOM, UVM_FLAG_NOMERGE|flags)) != 0) {
            UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0);
                return (false);
        }

        UVMHIST_LOG(maphist, "<- done (*raddr=%#jx)", *raddr,0,0,0);
        return (true);
}

/*
 * uvm_map_replace: replace a reserved (blank) area of memory with
 * real mappings.
 *
 * => caller must WRITE-LOCK the map
 * => we return true if replacement was a success
 * => we expect the newents chain to have nnewents entrys on it and
 *    we expect newents->prev to point to the last entry on the list
 * => note newents is allowed to be NULL
 */

static int
uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end,
    struct vm_map_entry *newents, int nnewents, vsize_t nsize,
    struct vm_map_entry **oldentryp)
{
        struct vm_map_entry *oldent, *last;

        uvm_map_check(map, "map_replace entry");

        /*
         * first find the blank map entry at the specified address
         */

        if (!uvm_map_lookup_entry(map, start, &oldent)) {
                return (false);
        }

        /*
         * check to make sure we have a proper blank entry
         */

        if (end < oldent->end) {
                UVM_MAP_CLIP_END(map, oldent, end);
        }
        if (oldent->start != start || oldent->end != end ||
            oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) {
                return (false);
        }

#ifdef DIAGNOSTIC

        /*
         * sanity check the newents chain
         */

        {
                struct vm_map_entry *tmpent = newents;
                int nent = 0;
                vsize_t sz = 0;
                vaddr_t cur = start;

                while (tmpent) {
                        nent++;
                        sz += tmpent->end - tmpent->start;
                        if (tmpent->start < cur)
                                panic("uvm_map_replace1");
                        if (tmpent->start >= tmpent->end || tmpent->end > end) {
                                panic("uvm_map_replace2: "
                                    "tmpent->start=%#"PRIxVADDR
                                    ", tmpent->end=%#"PRIxVADDR
                                    ", end=%#"PRIxVADDR,
                                    tmpent->start, tmpent->end, end);
                        }
                        cur = tmpent->end;
                        if (tmpent->next) {
                                if (tmpent->next->prev != tmpent)
                                        panic("uvm_map_replace3");
                        } else {
                                if (newents->prev != tmpent)
                                        panic("uvm_map_replace4");
                        }
                        tmpent = tmpent->next;
                }
                if (nent != nnewents)
                        panic("uvm_map_replace5");
                if (sz != nsize)
                        panic("uvm_map_replace6");
        }
#endif

        /*
         * map entry is a valid blank!   replace it.   (this does all the
         * work of map entry link/unlink...).
         */

        if (newents) {
                last = newents->prev;

                /* critical: flush stale hints out of map */
                SAVE_HINT(map, map->hint, newents);
                if (map->first_free == oldent)
                        map->first_free = last;

                last->next = oldent->next;
                last->next->prev = last;

                /* Fix RB tree */
                uvm_rb_remove(map, oldent);

                newents->prev = oldent->prev;
                newents->prev->next = newents;
                map->nentries = map->nentries + (nnewents - 1);

                /* Fixup the RB tree */
                {
                        int i;
                        struct vm_map_entry *tmp;

                        tmp = newents;
                        for (i = 0; i < nnewents && tmp; i++) {
                                uvm_rb_insert(map, tmp);
                                tmp = tmp->next;
                        }
                }
        } else {
                /* NULL list of new entries: just remove the old one */
                clear_hints(map, oldent);
                uvm_map_entry_unlink(map, oldent);
        }
        map->size -= end - start - nsize;

        uvm_map_check(map, "map_replace leave");

        /*
         * now we can free the old blank entry and return.
         */

        *oldentryp = oldent;
        return (true);
}

/*
 * uvm_map_extract: extract a mapping from a map and put it somewhere
 *        (maybe removing the old mapping)
 *
 * => maps should be unlocked (we will write lock them)
 * => returns 0 on success, error code otherwise
 * => start must be page aligned
 * => len must be page sized
 * => flags:
 *      UVM_EXTRACT_REMOVE: remove mappings from srcmap
 *      UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only)
 *      UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs
 *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
 *      UVM_EXTRACT_PROT_ALL: set prot to UVM_PROT_ALL as we go
 *    >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<<
 *    >>>NOTE: QREF's must be unmapped via the QREF path, thus should only
 *             be used from within the kernel in a kernel level map <<<
 */

int
uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
    struct vm_map *dstmap, vaddr_t *dstaddrp, int flags)
{
        vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge;
        struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry,
            *deadentry, *oldentry;
        struct vm_map_entry *resentry = NULL; /* a dummy reservation entry */
        vsize_t elen __unused;
        int nchain, error, copy_ok;
        vsize_t nsize;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist,"(srcmap=%#jx,start=%#jx, len=%#jx",
            (uintptr_t)srcmap, start, len, 0);
        UVMHIST_LOG(maphist," ...,dstmap=%#jx, flags=%#jx)",
            (uintptr_t)dstmap, flags, 0, 0);

        /*
         * step 0: sanity check: start must be on a page boundary, length
         * must be page sized.  can't ask for CONTIG/QREF if you asked for
         * REMOVE.
         */

        KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0);
        KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 ||
                (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0);

        /*
         * step 1: reserve space in the target map for the extracted area
         */

        if ((flags & UVM_EXTRACT_RESERVED) == 0) {
                dstaddr = vm_map_min(dstmap);
                if (!uvm_map_reserve(dstmap, len, start,
                    atop(start) & uvmexp.colormask, &dstaddr,
                    UVM_FLAG_COLORMATCH))
                        return (ENOMEM);
                KASSERT((atop(start ^ dstaddr) & uvmexp.colormask) == 0);
                *dstaddrp = dstaddr;        /* pass address back to caller */
                UVMHIST_LOG(maphist, "  dstaddr=%#jx", dstaddr,0,0,0);
        } else {
                dstaddr = *dstaddrp;
        }

        /*
         * step 2: setup for the extraction process loop by init'ing the
         * map entry chain, locking src map, and looking up the first useful
         * entry in the map.
         */

        end = start + len;
        newend = dstaddr + len;
        chain = endchain = NULL;
        nchain = 0;
        nsize = 0;
        vm_map_lock(srcmap);

        if (uvm_map_lookup_entry(srcmap, start, &entry)) {

                /* "start" is within an entry */
                if (flags & UVM_EXTRACT_QREF) {

                        /*
                         * for quick references we don't clip the entry, so
                         * the entry may map space "before" the starting
                         * virtual address... this is the "fudge" factor
                         * (which can be non-zero only the first time
                         * through the "while" loop in step 3).
                         */

                        fudge = start - entry->start;
                } else {

                        /*
                         * normal reference: we clip the map to fit (thus
                         * fudge is zero)
                         */

                        UVM_MAP_CLIP_START(srcmap, entry, start);
                        SAVE_HINT(srcmap, srcmap->hint, entry->prev);
                        fudge = 0;
                }
        } else {

                /* "start" is not within an entry ... skip to next entry */
                if (flags & UVM_EXTRACT_CONTIG) {
                        error = EINVAL;
                        goto bad;    /* definite hole here ... */
                }

                entry = entry->next;
                fudge = 0;
        }

        /* save values from srcmap for step 6 */
        orig_entry = entry;
        orig_fudge = fudge;

        /*
         * step 3: now start looping through the map entries, extracting
         * as we go.
         */

        while (entry->start < end && entry != &srcmap->header) {

                /* if we are not doing a quick reference, clip it */
                if ((flags & UVM_EXTRACT_QREF) == 0)
                        UVM_MAP_CLIP_END(srcmap, entry, end);

                /* clear needs_copy (allow chunking) */
                if (UVM_ET_ISNEEDSCOPY(entry)) {
                        amap_copy(srcmap, entry,
                            AMAP_COPY_NOWAIT|AMAP_COPY_NOMERGE, start, end);
                        if (UVM_ET_ISNEEDSCOPY(entry)) {  /* failed? */
                                error = ENOMEM;
                                goto bad;
                        }

                        /* amap_copy could clip (during chunk)!  update fudge */
                        if (fudge) {
                                fudge = start - entry->start;
                                orig_fudge = fudge;
                        }
                }

                /* calculate the offset of this from "start" */
                oldoffset = (entry->start + fudge) - start;

                /* allocate a new map entry */
                newentry = uvm_mapent_alloc(dstmap, 0);
                if (newentry == NULL) {
                        error = ENOMEM;
                        goto bad;
                }

                /* set up new map entry */
                newentry->next = NULL;
                newentry->prev = endchain;
                newentry->start = dstaddr + oldoffset;
                newentry->end =
                    newentry->start + (entry->end - (entry->start + fudge));
                if (newentry->end > newend || newentry->end < newentry->start)
                        newentry->end = newend;
                newentry->object.uvm_obj = entry->object.uvm_obj;
                if (newentry->object.uvm_obj) {
                        if (newentry->object.uvm_obj->pgops->pgo_reference)
                                newentry->object.uvm_obj->pgops->
                                    pgo_reference(newentry->object.uvm_obj);
                        newentry->offset = entry->offset + fudge;
                } else {
                        newentry->offset = 0;
                }
                newentry->etype = entry->etype;
                if (flags & UVM_EXTRACT_PROT_ALL) {
                        newentry->protection = newentry->max_protection =
                            UVM_PROT_ALL;
                } else {
                        newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ?
                            entry->max_protection : entry->protection;
                        newentry->max_protection = entry->max_protection;
                }
                newentry->inheritance = entry->inheritance;
                newentry->wired_count = 0;
                newentry->aref.ar_amap = entry->aref.ar_amap;
                if (newentry->aref.ar_amap) {
                        newentry->aref.ar_pageoff =
                            entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT);
                        uvm_map_reference_amap(newentry, AMAP_SHARED |
                            ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0));
                } else {
                        newentry->aref.ar_pageoff = 0;
                }
                newentry->advice = entry->advice;
                if ((flags & UVM_EXTRACT_QREF) != 0) {
                        newentry->flags |= UVM_MAP_NOMERGE;
                }

                /* now link it on the chain */
                nchain++;
                nsize += newentry->end - newentry->start;
                if (endchain == NULL) {
                        chain = endchain = newentry;
                } else {
                        endchain->next = newentry;
                        endchain = newentry;
                }

                /* end of 'while' loop! */
                if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end &&
                    (entry->next == &srcmap->header ||
                    entry->next->start != entry->end)) {
                        error = EINVAL;
                        goto bad;
                }
                entry = entry->next;
                fudge = 0;
        }

        /*
         * step 4: close off chain (in format expected by uvm_map_replace)
         */

        if (chain)
                chain->prev = endchain;

        /*
         * step 5: attempt to lock the dest map so we can pmap_copy.
         * note usage of copy_ok:
         *   1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5)
         *   0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7
         */

        if (srcmap == dstmap || vm_map_lock_try(dstmap) == true) {
                copy_ok = 1;
                if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
                    nchain, nsize, &resentry)) {
                        if (srcmap != dstmap)
                                vm_map_unlock(dstmap);
                        error = EIO;
                        goto bad;
                }
        } else {
                copy_ok = 0;
                /* replace defered until step 7 */
        }

        /*
         * step 6: traverse the srcmap a second time to do the following:
         *  - if we got a lock on the dstmap do pmap_copy
         *  - if UVM_EXTRACT_REMOVE remove the entries
         * we make use of orig_entry and orig_fudge (saved in step 2)
         */

        if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) {

                /* purge possible stale hints from srcmap */
                if (flags & UVM_EXTRACT_REMOVE) {
                        SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev);
                        if (srcmap->first_free != &srcmap->header &&
                            srcmap->first_free->start >= start)
                                srcmap->first_free = orig_entry->prev;
                }

                entry = orig_entry;
                fudge = orig_fudge;
                deadentry = NULL;        /* for UVM_EXTRACT_REMOVE */

                while (entry->start < end && entry != &srcmap->header) {
                        if (copy_ok) {
                                oldoffset = (entry->start + fudge) - start;
                                elen = MIN(end, entry->end) -
                                    (entry->start + fudge);
                                pmap_copy(dstmap->pmap, srcmap->pmap,
                                    dstaddr + oldoffset, elen,
                                    entry->start + fudge);
                        }

                        /* we advance "entry" in the following if statement */
                        if (flags & UVM_EXTRACT_REMOVE) {
#ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
                                uvm_map_lock_entry(entry, RW_WRITER);
#else
                                uvm_map_lock_entry(entry, RW_READER);
#endif
                                pmap_remove(srcmap->pmap, entry->start,
                                                entry->end);
                                uvm_map_unlock_entry(entry);
                                oldentry = entry;        /* save entry */
                                entry = entry->next;        /* advance */
                                uvm_map_entry_unlink(srcmap, oldentry);
                                                        /* add to dead list */
                                oldentry->next = deadentry;
                                deadentry = oldentry;
                        } else {
                                entry = entry->next;                /* advance */
                        }

                        /* end of 'while' loop */
                        fudge = 0;
                }
                pmap_update(srcmap->pmap);

                /*
                 * unlock dstmap.  we will dispose of deadentry in
                 * step 7 if needed
                 */

                if (copy_ok && srcmap != dstmap)
                        vm_map_unlock(dstmap);

        } else {
                deadentry = NULL;
        }

        /*
         * step 7: we are done with the source map, unlock.   if copy_ok
         * is 0 then we have not replaced the dummy mapping in dstmap yet
         * and we need to do so now.
         */

        vm_map_unlock(srcmap);
        if ((flags & UVM_EXTRACT_REMOVE) && deadentry)
                uvm_unmap_detach(deadentry, 0);   /* dispose of old entries */

        /* now do the replacement if we didn't do it in step 5 */
        if (copy_ok == 0) {
                vm_map_lock(dstmap);
                error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
                    nchain, nsize, &resentry);
                vm_map_unlock(dstmap);

                if (error == false) {
                        error = EIO;
                        goto bad2;
                }
        }

        if (resentry != NULL)
                uvm_mapent_free(resentry);

        return (0);

        /*
         * bad: failure recovery
         */
bad:
        vm_map_unlock(srcmap);
bad2:                        /* src already unlocked */
        if (chain)
                uvm_unmap_detach(chain,
                    (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0);

        if (resentry != NULL)
                uvm_mapent_free(resentry);

        if ((flags & UVM_EXTRACT_RESERVED) == 0) {
                uvm_unmap(dstmap, dstaddr, dstaddr+len);   /* ??? */
        }
        return (error);
}

/* end of extraction functions */

/*
 * uvm_map_submap: punch down part of a map into a submap
 *
 * => only the kernel_map is allowed to be submapped
 * => the purpose of submapping is to break up the locking granularity
 *        of a larger map
 * => the range specified must have been mapped previously with a uvm_map()
 *        call [with uobj==NULL] to create a blank map entry in the main map.
 *        [And it had better still be blank!]
 * => maps which contain submaps should never be copied or forked.
 * => to remove a submap, use uvm_unmap() on the main map
 *        and then uvm_map_deallocate() the submap.
 * => main map must be unlocked.
 * => submap must have been init'd and have a zero reference count.
 *        [need not be locked as we don't actually reference it]
 */

int
uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
    struct vm_map *submap)
{
        struct vm_map_entry *entry;
        int error;

        vm_map_lock(map);
        VM_MAP_RANGE_CHECK(map, start, end);

        if (uvm_map_lookup_entry(map, start, &entry)) {
                UVM_MAP_CLIP_START(map, entry, start);
                UVM_MAP_CLIP_END(map, entry, end);        /* to be safe */
        } else {
                entry = NULL;
        }

        if (entry != NULL &&
            entry->start == start && entry->end == end &&
            entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
            !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
                entry->etype |= UVM_ET_SUBMAP;
                entry->object.sub_map = submap;
                entry->offset = 0;
                uvm_map_reference(submap);
                error = 0;
        } else {
                error = EINVAL;
        }
        vm_map_unlock(map);

        return error;
}

/*
 * uvm_map_protect_user: change map protection on behalf of the user.
 * Enforces PAX settings as necessary.
 */
int
uvm_map_protect_user(struct lwp *l, vaddr_t start, vaddr_t end,
    vm_prot_t new_prot)
{
        int error;

        if ((error = PAX_MPROTECT_VALIDATE(l, new_prot)))
                return error;

        return uvm_map_protect(&l->l_proc->p_vmspace->vm_map, start, end,
            new_prot, false);
}


/*
 * uvm_map_protect: change map protection
 *
 * => set_max means set max_protection.
 * => map must be unlocked.
 */

#define MASK(entry)        (UVM_ET_ISCOPYONWRITE(entry) ? \
                         ~VM_PROT_WRITE : VM_PROT_ALL)

int
uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
    vm_prot_t new_prot, bool set_max)
{
        struct vm_map_entry *current, *entry;
        int error = 0;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx,new_prot=%#jx)",
            (uintptr_t)map, start, end, new_prot);

        vm_map_lock(map);
        VM_MAP_RANGE_CHECK(map, start, end);
        if (uvm_map_lookup_entry(map, start, &entry)) {
                UVM_MAP_CLIP_START(map, entry, start);
        } else {
                entry = entry->next;
        }

        /*
         * make a first pass to check for protection violations.
         */

        current = entry;
        while ((current != &map->header) && (current->start < end)) {
                if (UVM_ET_ISSUBMAP(current)) {
                        error = EINVAL;
                        goto out;
                }
                if ((new_prot & current->max_protection) != new_prot) {
                        error = EACCES;
                        goto out;
                }
                /*
                 * Don't allow VM_PROT_EXECUTE to be set on entries that
                 * point to vnodes that are associated with a NOEXEC file
                 * system.
                 */
                if (UVM_ET_ISOBJ(current) &&
                    UVM_OBJ_IS_VNODE(current->object.uvm_obj)) {
                        struct vnode *vp =
                            (struct vnode *) current->object.uvm_obj;

                        if ((new_prot & VM_PROT_EXECUTE) != 0 &&
                            (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0) {
                                error = EACCES;
                                goto out;
                        }
                }

                current = current->next;
        }

        /* go back and fix up protections (no need to clip this time). */

        current = entry;
        while ((current != &map->header) && (current->start < end)) {
                vm_prot_t old_prot;

                UVM_MAP_CLIP_END(map, current, end);
                old_prot = current->protection;
                if (set_max)
                        current->protection =
                            (current->max_protection = new_prot) & old_prot;
                else
                        current->protection = new_prot;

                /*
                 * update physical map if necessary.  worry about copy-on-write
                 * here -- CHECK THIS XXX
                 */

                if (current->protection != old_prot) {
                        /* update pmap! */
#ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
                        uvm_map_lock_entry(current, RW_WRITER);
#else
                        uvm_map_lock_entry(current, RW_READER);
#endif
                        pmap_protect(map->pmap, current->start, current->end,
                            current->protection & MASK(current));
                        uvm_map_unlock_entry(current);

                        /*
                         * If this entry points at a vnode, and the
                         * protection includes VM_PROT_EXECUTE, mark
                         * the vnode as VEXECMAP.
                         */
                        if (UVM_ET_ISOBJ(current)) {
                                struct uvm_object *uobj =
                                    current->object.uvm_obj;

                                if (UVM_OBJ_IS_VNODE(uobj) &&
                                    (current->protection & VM_PROT_EXECUTE)) {
                                        vn_markexec((struct vnode *) uobj);
                                }
                        }
                }

                /*
                 * If the map is configured to lock any future mappings,
                 * wire this entry now if the old protection was VM_PROT_NONE
                 * and the new protection is not VM_PROT_NONE.
                 */

                if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
                    VM_MAPENT_ISWIRED(current) == 0 &&
                    old_prot == VM_PROT_NONE &&
                    new_prot != VM_PROT_NONE) {

                        /*
                         * We must call pmap_update() here because the
                         * pmap_protect() call above might have removed some
                         * pmap entries and uvm_map_pageable() might create
                         * some new pmap entries that rely on the prior
                         * removals being completely finished.
                         */

                        pmap_update(map->pmap);

                        if (uvm_map_pageable(map, current->start,
                            current->end, false,
                            UVM_LK_ENTER|UVM_LK_EXIT) != 0) {

                                /*
                                 * If locking the entry fails, remember the
                                 * error if it's the first one.  Note we
                                 * still continue setting the protection in
                                 * the map, but will return the error
                                 * condition regardless.
                                 *
                                 * XXX Ignore what the actual error is,
                                 * XXX just call it a resource shortage
                                 * XXX so that it doesn't get confused
                                 * XXX what uvm_map_protect() itself would
                                 * XXX normally return.
                                 */

                                error = ENOMEM;
                        }
                }
                current = current->next;
        }
        pmap_update(map->pmap);

 out:
        vm_map_unlock(map);

        UVMHIST_LOG(maphist, "<- done, error=%jd",error,0,0,0);
        return error;
}

#undef  MASK

/*
 * uvm_map_inherit: set inheritance code for range of addrs in map.
 *
 * => map must be unlocked
 * => note that the inherit code is used during a "fork".  see fork
 *        code for details.
 */

int
uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
    vm_inherit_t new_inheritance)
{
        struct vm_map_entry *entry, *temp_entry;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx,new_inh=%#jx)",
            (uintptr_t)map, start, end, new_inheritance);

        switch (new_inheritance) {
        case MAP_INHERIT_NONE:
        case MAP_INHERIT_COPY:
        case MAP_INHERIT_SHARE:
        case MAP_INHERIT_ZERO:
                break;
        default:
                UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
                return EINVAL;
        }

        vm_map_lock(map);
        VM_MAP_RANGE_CHECK(map, start, end);
        if (uvm_map_lookup_entry(map, start, &temp_entry)) {
                entry = temp_entry;
                UVM_MAP_CLIP_START(map, entry, start);
        }  else {
                entry = temp_entry->next;
        }
        while ((entry != &map->header) && (entry->start < end)) {
                UVM_MAP_CLIP_END(map, entry, end);
                entry->inheritance = new_inheritance;
                entry = entry->next;
        }
        vm_map_unlock(map);
        UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
        return 0;
}

/*
 * uvm_map_advice: set advice code for range of addrs in map.
 *
 * => map must be unlocked
 */

int
uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
{
        struct vm_map_entry *entry, *temp_entry;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx,new_adv=%#jx)",
            (uintptr_t)map, start, end, new_advice);

        vm_map_lock(map);
        VM_MAP_RANGE_CHECK(map, start, end);
        if (uvm_map_lookup_entry(map, start, &temp_entry)) {
                entry = temp_entry;
                UVM_MAP_CLIP_START(map, entry, start);
        } else {
                entry = temp_entry->next;
        }

        /*
         * XXXJRT: disallow holes?
         */

        while ((entry != &map->header) && (entry->start < end)) {
                UVM_MAP_CLIP_END(map, entry, end);

                switch (new_advice) {
                case MADV_NORMAL:
                case MADV_RANDOM:
                case MADV_SEQUENTIAL:
                        /* nothing special here */
                        break;

                default:
                        vm_map_unlock(map);
                        UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
                        return EINVAL;
                }
                entry->advice = new_advice;
                entry = entry->next;
        }

        vm_map_unlock(map);
        UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
        return 0;
}

/*
 * uvm_map_willneed: apply MADV_WILLNEED
 */

int
uvm_map_willneed(struct vm_map *map, vaddr_t start, vaddr_t end)
{
        struct vm_map_entry *entry;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx)",
            (uintptr_t)map, start, end, 0);

        vm_map_lock_read(map);
        VM_MAP_RANGE_CHECK(map, start, end);
        if (!uvm_map_lookup_entry(map, start, &entry)) {
                entry = entry->next;
        }
        while (entry->start < end) {
                struct vm_amap * const amap = entry->aref.ar_amap;
                struct uvm_object * const uobj = entry->object.uvm_obj;

                KASSERT(entry != &map->header);
                KASSERT(start < entry->end);
                /*
                 * For now, we handle only the easy but commonly-requested case.
                 * ie. start prefetching of backing uobj pages.
                 *
                 * XXX It might be useful to pmap_enter() the already-in-core
                 * pages by inventing a "weak" mode for uvm_fault() which would
                 * only do the PGO_LOCKED pgo_get().
                 */
                if (UVM_ET_ISOBJ(entry) && amap == NULL && uobj != NULL) {
                        off_t offset;
                        off_t size;

                        offset = entry->offset;
                        if (start < entry->start) {
                                offset += entry->start - start;
                        }
                        size = entry->offset + (entry->end - entry->start);
                        if (entry->end < end) {
                                size -= end - entry->end;
                        }
                        uvm_readahead(uobj, offset, size);
                }
                entry = entry->next;
        }
        vm_map_unlock_read(map);
        UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
        return 0;
}

/*
 * uvm_map_pageable: sets the pageability of a range in a map.
 *
 * => wires map entries.  should not be used for transient page locking.
 *        for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()).
 * => regions specified as not pageable require lock-down (wired) memory
 *        and page tables.
 * => map must never be read-locked
 * => if islocked is true, map is already write-locked
 * => we always unlock the map, since we must downgrade to a read-lock
 *        to call uvm_fault_wire()
 * => XXXCDC: check this and try and clean it up.
 */

int
uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
    bool new_pageable, int lockflags)
{
        struct vm_map_entry *entry, *start_entry, *failed_entry;
        int rv;
#ifdef DIAGNOSTIC
        u_int timestamp_save;
#endif
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx,new_pageable=%ju)",
            (uintptr_t)map, start, end, new_pageable);
        KASSERT(map->flags & VM_MAP_PAGEABLE);

        if ((lockflags & UVM_LK_ENTER) == 0)
                vm_map_lock(map);
        VM_MAP_RANGE_CHECK(map, start, end);

        /*
         * only one pageability change may take place at one time, since
         * uvm_fault_wire assumes it will be called only once for each
         * wiring/unwiring.  therefore, we have to make sure we're actually
         * changing the pageability for the entire region.  we do so before
         * making any changes.
         */

        if (uvm_map_lookup_entry(map, start, &start_entry) == false) {
                if ((lockflags & UVM_LK_EXIT) == 0)
                        vm_map_unlock(map);

                UVMHIST_LOG(maphist,"<- done (fault)",0,0,0,0);
                return EFAULT;
        }
        entry = start_entry;

        if (start == end) {                /* nothing required */
                if ((lockflags & UVM_LK_EXIT) == 0)
                        vm_map_unlock(map);

                UVMHIST_LOG(maphist,"<- done (nothing)",0,0,0,0);
                return 0;
        }

        /*
         * handle wiring and unwiring separately.
         */

        if (new_pageable) {                /* unwire */
                UVM_MAP_CLIP_START(map, entry, start);

                /*
                 * unwiring.  first ensure that the range to be unwired is
                 * really wired down and that there are no holes.
                 */

                while ((entry != &map->header) && (entry->start < end)) {
                        if (entry->wired_count == 0 ||
                            (entry->end < end &&
                             (entry->next == &map->header ||
                              entry->next->start > entry->end))) {
                                if ((lockflags & UVM_LK_EXIT) == 0)
                                        vm_map_unlock(map);
                                UVMHIST_LOG(maphist, "<- done (INVAL)",0,0,0,0);
                                return EINVAL;
                        }
                        entry = entry->next;
                }

                /*
                 * POSIX 1003.1b - a single munlock call unlocks a region,
                 * regardless of the number of mlock calls made on that
                 * region.
                 */

                entry = start_entry;
                while ((entry != &map->header) && (entry->start < end)) {
                        UVM_MAP_CLIP_END(map, entry, end);
                        if (VM_MAPENT_ISWIRED(entry))
                                uvm_map_entry_unwire(map, entry);
                        entry = entry->next;
                }
                if ((lockflags & UVM_LK_EXIT) == 0)
                        vm_map_unlock(map);
                UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
                return 0;
        }

        /*
         * wire case: in two passes [XXXCDC: ugly block of code here]
         *
         * 1: holding the write lock, we create any anonymous maps that need
         *    to be created.  then we clip each map entry to the region to
         *    be wired and increment its wiring count.
         *
         * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
         *    in the pages for any newly wired area (wired_count == 1).
         *
         *    downgrading to a read lock for uvm_fault_wire avoids a possible
         *    deadlock with another thread that may have faulted on one of
         *    the pages to be wired (it would mark the page busy, blocking
         *    us, then in turn block on the map lock that we hold).  because
         *    of problems in the recursive lock package, we cannot upgrade
         *    to a write lock in vm_map_lookup.  thus, any actions that
         *    require the write lock must be done beforehand.  because we
         *    keep the read lock on the map, the copy-on-write status of the
         *    entries we modify here cannot change.
         */

        while ((entry != &map->header) && (entry->start < end)) {
                if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */

                        /*
                         * perform actions of vm_map_lookup that need the
                         * write lock on the map: create an anonymous map
                         * for a copy-on-write region, or an anonymous map
                         * for a zero-fill region.  (XXXCDC: submap case
                         * ok?)
                         */

                        if (!UVM_ET_ISSUBMAP(entry)) {  /* not submap */
                                if (UVM_ET_ISNEEDSCOPY(entry) &&
                                    ((entry->max_protection & VM_PROT_WRITE) ||
                                     (entry->object.uvm_obj == NULL))) {
                                        amap_copy(map, entry, 0, start, end);
                                        /* XXXCDC: wait OK? */
                                }
                        }
                }
                UVM_MAP_CLIP_START(map, entry, start);
                UVM_MAP_CLIP_END(map, entry, end);
                entry->wired_count++;

                /*
                 * Check for holes
                 */

                if (entry->protection == VM_PROT_NONE ||
                    (entry->end < end &&
                     (entry->next == &map->header ||
                      entry->next->start > entry->end))) {

                        /*
                         * found one.  amap creation actions do not need to
                         * be undone, but the wired counts need to be restored.
                         */

                        while (entry != &map->header && entry->end > start) {
                                entry->wired_count--;
                                entry = entry->prev;
                        }
                        if ((lockflags & UVM_LK_EXIT) == 0)
                                vm_map_unlock(map);
                        UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0);
                        return EINVAL;
                }
                entry = entry->next;
        }

        /*
         * Pass 2.
         */

#ifdef DIAGNOSTIC
        timestamp_save = map->timestamp;
#endif
        vm_map_busy(map);
        vm_map_unlock(map);

        rv = 0;
        entry = start_entry;
        while (entry != &map->header && entry->start < end) {
                if (entry->wired_count == 1) {
                        rv = uvm_fault_wire(map, entry->start, entry->end,
                            entry->max_protection, 1);
                        if (rv) {

                                /*
                                 * wiring failed.  break out of the loop.
                                 * we'll clean up the map below, once we
                                 * have a write lock again.
                                 */

                                break;
                        }
                }
                entry = entry->next;
        }

        if (rv) {        /* failed? */

                /*
                 * Get back to an exclusive (write) lock.
                 */

                vm_map_lock(map);
                vm_map_unbusy(map);

#ifdef DIAGNOSTIC
                if (timestamp_save + 1 != map->timestamp)
                        panic("uvm_map_pageable: stale map");
#endif

                /*
                 * first drop the wiring count on all the entries
                 * which haven't actually been wired yet.
                 */

                failed_entry = entry;
                while (entry != &map->header && entry->start < end) {
                        entry->wired_count--;
                        entry = entry->next;
                }

                /*
                 * now, unwire all the entries that were successfully
                 * wired above.
                 */

                entry = start_entry;
                while (entry != failed_entry) {
                        entry->wired_count--;
                        if (VM_MAPENT_ISWIRED(entry) == 0)
                                uvm_map_entry_unwire(map, entry);
                        entry = entry->next;
                }
                if ((lockflags & UVM_LK_EXIT) == 0)
                        vm_map_unlock(map);
                UVMHIST_LOG(maphist, "<- done (RV=%jd)", rv,0,0,0);
                return (rv);
        }

        if ((lockflags & UVM_LK_EXIT) == 0) {
                vm_map_unbusy(map);
        } else {

                /*
                 * Get back to an exclusive (write) lock.
                 */

                vm_map_lock(map);
                vm_map_unbusy(map);
        }

        UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
        return 0;
}

/*
 * uvm_map_pageable_all: special case of uvm_map_pageable - affects
 * all mapped regions.
 *
 * => map must not be locked.
 * => if no flags are specified, all regions are unwired.
 * => XXXJRT: has some of the same problems as uvm_map_pageable() above.
 */

int
uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
{
        struct vm_map_entry *entry, *failed_entry;
        vsize_t size;
        int rv;
#ifdef DIAGNOSTIC
        u_int timestamp_save;
#endif
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist,"(map=%#jx,flags=%#jx)", (uintptr_t)map, flags,
            0, 0);

        KASSERT(map->flags & VM_MAP_PAGEABLE);

        vm_map_lock(map);

        /*
         * handle wiring and unwiring separately.
         */

        if (flags == 0) {                        /* unwire */

                /*
                 * POSIX 1003.1b -- munlockall unlocks all regions,
                 * regardless of how many times mlockall has been called.
                 */

                for (entry = map->header.next; entry != &map->header;
                     entry = entry->next) {
                        if (VM_MAPENT_ISWIRED(entry))
                                uvm_map_entry_unwire(map, entry);
                }
                map->flags &= ~VM_MAP_WIREFUTURE;
                vm_map_unlock(map);
                UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
                return 0;
        }

        if (flags & MCL_FUTURE) {

                /*
                 * must wire all future mappings; remember this.
                 */

                map->flags |= VM_MAP_WIREFUTURE;
        }

        if ((flags & MCL_CURRENT) == 0) {

                /*
                 * no more work to do!
                 */

                UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0);
                vm_map_unlock(map);
                return 0;
        }

        /*
         * wire case: in three passes [XXXCDC: ugly block of code here]
         *
         * 1: holding the write lock, count all pages mapped by non-wired
         *    entries.  if this would cause us to go over our limit, we fail.
         *
         * 2: still holding the write lock, we create any anonymous maps that
         *    need to be created.  then we increment its wiring count.
         *
         * 3: we downgrade to a read lock, and call uvm_fault_wire to fault
         *    in the pages for any newly wired area (wired_count == 1).
         *
         *    downgrading to a read lock for uvm_fault_wire avoids a possible
         *    deadlock with another thread that may have faulted on one of
         *    the pages to be wired (it would mark the page busy, blocking
         *    us, then in turn block on the map lock that we hold).  because
         *    of problems in the recursive lock package, we cannot upgrade
         *    to a write lock in vm_map_lookup.  thus, any actions that
         *    require the write lock must be done beforehand.  because we
         *    keep the read lock on the map, the copy-on-write status of the
         *    entries we modify here cannot change.
         */

        for (size = 0, entry = map->header.next; entry != &map->header;
             entry = entry->next) {
                if (entry->protection != VM_PROT_NONE &&
                    VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
                        size += entry->end - entry->start;
                }
        }

        if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
                vm_map_unlock(map);
                return ENOMEM;
        }

        if (limit != 0 &&
            (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) {
                vm_map_unlock(map);
                return ENOMEM;
        }

        /*
         * Pass 2.
         */

        for (entry = map->header.next; entry != &map->header;
             entry = entry->next) {
                if (entry->protection == VM_PROT_NONE)
                        continue;
                if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */

                        /*
                         * perform actions of vm_map_lookup that need the
                         * write lock on the map: create an anonymous map
                         * for a copy-on-write region, or an anonymous map
                         * for a zero-fill region.  (XXXCDC: submap case
                         * ok?)
                         */

                        if (!UVM_ET_ISSUBMAP(entry)) {        /* not submap */
                                if (UVM_ET_ISNEEDSCOPY(entry) &&
                                    ((entry->max_protection & VM_PROT_WRITE) ||
                                     (entry->object.uvm_obj == NULL))) {
                                        amap_copy(map, entry, 0, entry->start,
                                            entry->end);
                                        /* XXXCDC: wait OK? */
                                }
                        }
                }
                entry->wired_count++;
        }

        /*
         * Pass 3.
         */

#ifdef DIAGNOSTIC
        timestamp_save = map->timestamp;
#endif
        vm_map_busy(map);
        vm_map_unlock(map);

        rv = 0;
        for (entry = map->header.next; entry != &map->header;
             entry = entry->next) {
                if (entry->wired_count == 1) {
                        rv = uvm_fault_wire(map, entry->start, entry->end,
                            entry->max_protection, 1);
                        if (rv) {

                                /*
                                 * wiring failed.  break out of the loop.
                                 * we'll clean up the map below, once we
                                 * have a write lock again.
                                 */

                                break;
                        }
                }
        }

        if (rv) {

                /*
                 * Get back an exclusive (write) lock.
                 */

                vm_map_lock(map);
                vm_map_unbusy(map);

#ifdef DIAGNOSTIC
                if (timestamp_save + 1 != map->timestamp)
                        panic("uvm_map_pageable_all: stale map");
#endif

                /*
                 * first drop the wiring count on all the entries
                 * which haven't actually been wired yet.
                 *
                 * Skip VM_PROT_NONE entries like we did above.
                 */

                failed_entry = entry;
                for (/* nothing */; entry != &map->header;
                     entry = entry->next) {
                        if (entry->protection == VM_PROT_NONE)
                                continue;
                        entry->wired_count--;
                }

                /*
                 * now, unwire all the entries that were successfully
                 * wired above.
                 *
                 * Skip VM_PROT_NONE entries like we did above.
                 */

                for (entry = map->header.next; entry != failed_entry;
                     entry = entry->next) {
                        if (entry->protection == VM_PROT_NONE)
                                continue;
                        entry->wired_count--;
                        if (VM_MAPENT_ISWIRED(entry))
                                uvm_map_entry_unwire(map, entry);
                }
                vm_map_unlock(map);
                UVMHIST_LOG(maphist,"<- done (RV=%jd)", rv,0,0,0);
                return (rv);
        }

        vm_map_unbusy(map);

        UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
        return 0;
}

/*
 * uvm_map_clean: clean out a map range
 *
 * => valid flags:
 *   if (flags & PGO_CLEANIT): dirty pages are cleaned first
 *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
 *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
 *   if (flags & PGO_FREE): any cached pages are freed after clean
 * => returns an error if any part of the specified range isn't mapped
 * => never a need to flush amap layer since the anonymous memory has
 *        no permanent home, but may deactivate pages there
 * => called from sys_msync() and sys_madvise()
 * => caller must not write-lock map (read OK).
 * => we may sleep while cleaning if SYNCIO [with map read-locked]
 */

int
uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
{
        struct vm_map_entry *current, *entry;
        struct uvm_object *uobj;
        struct vm_amap *amap;
        struct vm_anon *anon;
        struct vm_page *pg;
        vaddr_t offset;
        vsize_t size;
        voff_t uoff;
        int error, refs;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist,"(map=%#jx,start=%#jx,end=%#jx,flags=%#jx)",
            (uintptr_t)map, start, end, flags);

        KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
                (PGO_FREE|PGO_DEACTIVATE));

        vm_map_lock_read(map);
        VM_MAP_RANGE_CHECK(map, start, end);
        if (uvm_map_lookup_entry(map, start, &entry) == false) {
                vm_map_unlock_read(map);
                return EFAULT;
        }

        /*
         * Make a first pass to check for holes and wiring problems.
         */

        for (current = entry; current->start < end; current = current->next) {
                if (UVM_ET_ISSUBMAP(current)) {
                        vm_map_unlock_read(map);
                        return EINVAL;
                }
                if ((flags & PGO_FREE) != 0 && VM_MAPENT_ISWIRED(entry)) {
                        vm_map_unlock_read(map);
                        return EBUSY;
                }
                if (end <= current->end) {
                        break;
                }
                if (current->end != current->next->start) {
                        vm_map_unlock_read(map);
                        return EFAULT;
                }
        }

        error = 0;
        for (current = entry; start < end; current = current->next) {
                amap = current->aref.ar_amap;        /* upper layer */
                uobj = current->object.uvm_obj;        /* lower layer */
                KASSERT(start >= current->start);

                /*
                 * No amap cleaning necessary if:
                 *
                 *        (1) There's no amap.
                 *
                 *        (2) We're not deactivating or freeing pages.
                 */

                if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
                        goto flush_object;

                offset = start - current->start;
                size = MIN(end, current->end) - start;

                amap_lock(amap, RW_WRITER);
                for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) {
                        anon = amap_lookup(&current->aref, offset);
                        if (anon == NULL)
                                continue;

                        KASSERT(anon->an_lock == amap->am_lock);
                        pg = anon->an_page;
                        if (pg == NULL) {
                                continue;
                        }
                        if (pg->flags & PG_BUSY) {
                                continue;
                        }

                        switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {

                        /*
                         * In these first 3 cases, we just deactivate the page.
                         */

                        case PGO_CLEANIT|PGO_FREE:
                        case PGO_CLEANIT|PGO_DEACTIVATE:
                        case PGO_DEACTIVATE:
 deactivate_it:
                                /*
                                 * skip the page if it's loaned or wired,
                                 * since it shouldn't be on a paging queue
                                 * at all in these cases.
                                 */

                                if (pg->loan_count != 0 ||
                                    pg->wire_count != 0) {
                                        continue;
                                }
                                KASSERT(pg->uanon == anon);
                                uvm_pagelock(pg);
                                uvm_pagedeactivate(pg);
                                uvm_pageunlock(pg);
                                continue;

                        case PGO_FREE:

                                /*
                                 * If there are multiple references to
                                 * the amap, just deactivate the page.
                                 */

                                if (amap_refs(amap) > 1)
                                        goto deactivate_it;

                                /* skip the page if it's wired */
                                if (pg->wire_count != 0) {
                                        continue;
                                }
                                amap_unadd(&current->aref, offset);
                                refs = --anon->an_ref;
                                if (refs == 0) {
                                        uvm_anfree(anon);
                                }
                                continue;
                        }
                }
                amap_unlock(amap);

 flush_object:
                /*
                 * flush pages if we've got a valid backing object.
                 * note that we must always clean object pages before
                 * freeing them since otherwise we could reveal stale
                 * data from files.
                 */

                uoff = current->offset + (start - current->start);
                size = MIN(end, current->end) - start;
                if (uobj != NULL) {
                        rw_enter(uobj->vmobjlock, RW_WRITER);
                        if (uobj->pgops->pgo_put != NULL)
                                error = (uobj->pgops->pgo_put)(uobj, uoff,
                                    uoff + size, flags | PGO_CLEANIT);
                        else
                                error = 0;
                }
                start += size;
        }
        vm_map_unlock_read(map);
        return (error);
}


/*
 * uvm_map_checkprot: check protection in map
 *
 * => must allow specified protection in a fully allocated region.
 * => map must be read or write locked by caller.
 */

bool
uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
    vm_prot_t protection)
{
        struct vm_map_entry *entry;
        struct vm_map_entry *tmp_entry;

        if (!uvm_map_lookup_entry(map, start, &tmp_entry)) {
                return (false);
        }
        entry = tmp_entry;
        while (start < end) {
                if (entry == &map->header) {
                        return (false);
                }

                /*
                 * no holes allowed
                 */

                if (start < entry->start) {
                        return (false);
                }

                /*
                 * check protection associated with entry
                 */

                if ((entry->protection & protection) != protection) {
                        return (false);
                }
                start = entry->end;
                entry = entry->next;
        }
        return (true);
}

/*
 * uvmspace_alloc: allocate a vmspace structure.
 *
 * - structure includes vm_map and pmap
 * - XXX: no locking on this structure
 * - refcnt set to 1, rest must be init'd by caller
 */
struct vmspace *
uvmspace_alloc(vaddr_t vmin, vaddr_t vmax, bool topdown)
{
        struct vmspace *vm;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        vm = pool_cache_get(&uvm_vmspace_cache, PR_WAITOK);
        uvmspace_init(vm, NULL, vmin, vmax, topdown);
        UVMHIST_LOG(maphist,"<- done (vm=%#jx)", (uintptr_t)vm, 0, 0, 0);
        return (vm);
}

/*
 * uvmspace_init: initialize a vmspace structure.
 *
 * - XXX: no locking on this structure
 * - refcnt set to 1, rest must be init'd by caller
 */
void
uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin,
    vaddr_t vmax, bool topdown)
{
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist, "(vm=%#jx, pmap=%#jx, vmin=%#jx, vmax=%#jx",
            (uintptr_t)vm, (uintptr_t)pmap, vmin, vmax);
        UVMHIST_LOG(maphist, "   topdown=%ju)", topdown, 0, 0, 0);

        memset(vm, 0, sizeof(*vm));
        uvm_map_setup(&vm->vm_map, vmin, vmax, VM_MAP_PAGEABLE
            | (topdown ? VM_MAP_TOPDOWN : 0)
            );
        if (pmap)
                pmap_reference(pmap);
        else
                pmap = pmap_create();
        vm->vm_map.pmap = pmap;
        vm->vm_refcnt = 1;
        UVMHIST_LOG(maphist,"<- done",0,0,0,0);
}

/*
 * uvmspace_share: share a vmspace between two processes
 *
 * - used for vfork, threads(?)
 */

void
uvmspace_share(struct proc *p1, struct proc *p2)
{

        uvmspace_addref(p1->p_vmspace);
        p2->p_vmspace = p1->p_vmspace;
}

#if 0

/*
 * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace
 *
 * - XXX: no locking on vmspace
 */

void
uvmspace_unshare(struct lwp *l)
{
        struct proc *p = l->l_proc;
        struct vmspace *nvm, *ovm = p->p_vmspace;

        if (ovm->vm_refcnt == 1)
                /* nothing to do: vmspace isn't shared in the first place */
                return;

        /* make a new vmspace, still holding old one */
        nvm = uvmspace_fork(ovm);

        kpreempt_disable();
        pmap_deactivate(l);                /* unbind old vmspace */
        p->p_vmspace = nvm;
        pmap_activate(l);                /* switch to new vmspace */
        kpreempt_enable();

        uvmspace_free(ovm);                /* drop reference to old vmspace */
}

#endif


/*
 * uvmspace_spawn: a new process has been spawned and needs a vmspace
 */

void
uvmspace_spawn(struct lwp *l, vaddr_t start, vaddr_t end, bool topdown)
{
        struct proc *p = l->l_proc;
        struct vmspace *nvm;

#ifdef __HAVE_CPU_VMSPACE_EXEC
        cpu_vmspace_exec(l, start, end);
#endif

        nvm = uvmspace_alloc(start, end, topdown);
        kpreempt_disable();
        p->p_vmspace = nvm;
        pmap_activate(l);
        kpreempt_enable();
}

/*
 * uvmspace_exec: the process wants to exec a new program
 */

void
uvmspace_exec(struct lwp *l, vaddr_t start, vaddr_t end, bool topdown)
{
        struct proc *p = l->l_proc;
        struct vmspace *nvm, *ovm = p->p_vmspace;
        struct vm_map *map;
        int flags;

        KASSERT(ovm != NULL);
#ifdef __HAVE_CPU_VMSPACE_EXEC
        cpu_vmspace_exec(l, start, end);
#endif

        map = &ovm->vm_map;
        /*
         * see if more than one process is using this vmspace...
         */

        if (ovm->vm_refcnt == 1
            && topdown == ((ovm->vm_map.flags & VM_MAP_TOPDOWN) != 0)) {

                /*
                 * if p is the only process using its vmspace then we can safely
                 * recycle that vmspace for the program that is being exec'd.
                 * But only if TOPDOWN matches the requested value for the new
                 * vm space!
                 */

                /*
                 * SYSV SHM semantics require us to kill all segments on an exec
                 */
                if (uvm_shmexit && ovm->vm_shm)
                        (*uvm_shmexit)(ovm);

                /*
                 * POSIX 1003.1b -- "lock future mappings" is revoked
                 * when a process execs another program image.
                 */

                map->flags &= ~VM_MAP_WIREFUTURE;

                /*
                 * now unmap the old program.
                 *
                 * XXX set VM_MAP_DYING for the duration, so pmap_update()
                 * is not called until the pmap has been totally cleared out
                 * after pmap_remove_all(), or it can confuse some pmap
                 * implementations.  it would be nice to handle this by
                 * deferring the pmap_update() while it is known the address
                 * space is not visible to any user LWP other than curlwp,
                 * but there isn't an elegant way of inferring that right
                 * now.
                 */

                flags = pmap_remove_all(map->pmap) ? UVM_FLAG_VAONLY : 0;
                map->flags |= VM_MAP_DYING;
                uvm_unmap1(map, vm_map_min(map), vm_map_max(map), flags);
                map->flags &= ~VM_MAP_DYING;
                pmap_update(map->pmap);
                KASSERT(map->header.prev == &map->header);
                KASSERT(map->nentries == 0);

                /*
                 * resize the map
                 */

                vm_map_setmin(map, start);
                vm_map_setmax(map, end);
        } else {

                /*
                 * p's vmspace is being shared, so we can't reuse it for p since
                 * it is still being used for others.   allocate a new vmspace
                 * for p
                 */

                nvm = uvmspace_alloc(start, end, topdown);

                /*
                 * install new vmspace and drop our ref to the old one.
                 */

                kpreempt_disable();
                pmap_deactivate(l);
                p->p_vmspace = nvm;
                pmap_activate(l);
                kpreempt_enable();

                uvmspace_free(ovm);
        }
}

/*
 * uvmspace_addref: add a reference to a vmspace.
 */

void
uvmspace_addref(struct vmspace *vm)
{

        KASSERT((vm->vm_map.flags & VM_MAP_DYING) == 0);
        KASSERT(vm->vm_refcnt > 0);
        atomic_inc_uint(&vm->vm_refcnt);
}

/*
 * uvmspace_free: free a vmspace data structure
 */

void
uvmspace_free(struct vmspace *vm)
{
        struct vm_map_entry *dead_entries;
        struct vm_map *map = &vm->vm_map;
        int flags;

        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist,"(vm=%#jx) ref=%jd", (uintptr_t)vm,
            vm->vm_refcnt, 0, 0);

        membar_release();
        if (atomic_dec_uint_nv(&vm->vm_refcnt) > 0)
                return;
        membar_acquire();

        /*
         * at this point, there should be no other references to the map.
         * delete all of the mappings, then destroy the pmap.
         */

        map->flags |= VM_MAP_DYING;
        flags = pmap_remove_all(map->pmap) ? UVM_FLAG_VAONLY : 0;

        /* Get rid of any SYSV shared memory segments. */
        if (uvm_shmexit && vm->vm_shm != NULL)
                (*uvm_shmexit)(vm);

        if (map->nentries) {
                uvm_unmap_remove(map, vm_map_min(map), vm_map_max(map),
                    &dead_entries, flags);
                if (dead_entries != NULL)
                        uvm_unmap_detach(dead_entries, 0);
        }
        KASSERT(map->nentries == 0);
        KASSERT(map->size == 0);

        mutex_destroy(&map->misc_lock);
        rw_destroy(&map->lock);
        cv_destroy(&map->cv);
        pmap_destroy(map->pmap);
        pool_cache_put(&uvm_vmspace_cache, vm);
}

static struct vm_map_entry *
uvm_mapent_clone(struct vm_map *new_map, struct vm_map_entry *old_entry,
    int flags)
{
        struct vm_map_entry *new_entry;

        new_entry = uvm_mapent_alloc(new_map, 0);
        /* old_entry -> new_entry */
        uvm_mapent_copy(old_entry, new_entry);

        /* new pmap has nothing wired in it */
        new_entry->wired_count = 0;

        /*
         * gain reference to object backing the map (can't
         * be a submap, already checked this case).
         */

        if (new_entry->aref.ar_amap)
                uvm_map_reference_amap(new_entry, flags);

        if (new_entry->object.uvm_obj &&
            new_entry->object.uvm_obj->pgops->pgo_reference)
                new_entry->object.uvm_obj->pgops->pgo_reference(
                        new_entry->object.uvm_obj);

        /* insert entry at end of new_map's entry list */
        uvm_map_entry_link(new_map, new_map->header.prev,
            new_entry);

        return new_entry;
}

/*
 * share the mapping: this means we want the old and
 * new entries to share amaps and backing objects.
 */
static void
uvm_mapent_forkshared(struct vm_map *new_map, struct vm_map *old_map,
    struct vm_map_entry *old_entry)
{
        /*
         * if the old_entry needs a new amap (due to prev fork)
         * then we need to allocate it now so that we have
         * something we own to share with the new_entry.   [in
         * other words, we need to clear needs_copy]
         */

        if (UVM_ET_ISNEEDSCOPY(old_entry)) {
                /* get our own amap, clears needs_copy */
                amap_copy(old_map, old_entry, AMAP_COPY_NOCHUNK,
                    0, 0);
                /* XXXCDC: WAITOK??? */
        }

        uvm_mapent_clone(new_map, old_entry, AMAP_SHARED);
}


static void
uvm_mapent_forkcopy(struct vm_map *new_map, struct vm_map *old_map,
    struct vm_map_entry *old_entry)
{
        struct vm_map_entry *new_entry;

        /*
         * copy-on-write the mapping (using mmap's
         * MAP_PRIVATE semantics)
         *
         * allocate new_entry, adjust reference counts.
         * (note that new references are read-only).
         */

        new_entry = uvm_mapent_clone(new_map, old_entry, 0);

        new_entry->etype |=
            (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);

        /*
         * the new entry will need an amap.  it will either
         * need to be copied from the old entry or created
         * from scratch (if the old entry does not have an
         * amap).  can we defer this process until later
         * (by setting "needs_copy") or do we need to copy
         * the amap now?
         *
         * we must copy the amap now if any of the following
         * conditions hold:
         * 1. the old entry has an amap and that amap is
         *    being shared.  this means that the old (parent)
         *    process is sharing the amap with another
         *    process.  if we do not clear needs_copy here
         *    we will end up in a situation where both the
         *    parent and child process are referring to the
         *    same amap with "needs_copy" set.  if the
         *    parent write-faults, the fault routine will
         *    clear "needs_copy" in the parent by allocating
         *    a new amap.   this is wrong because the
         *    parent is supposed to be sharing the old amap
         *    and the new amap will break that.
         *
         * 2. if the old entry has an amap and a non-zero
         *    wire count then we are going to have to call
         *    amap_cow_now to avoid page faults in the
         *    parent process.   since amap_cow_now requires
         *    "needs_copy" to be clear we might as well
         *    clear it here as well.
         *
         */

        if (old_entry->aref.ar_amap != NULL) {
                if ((amap_flags(old_entry->aref.ar_amap) & AMAP_SHARED) != 0 ||
                    VM_MAPENT_ISWIRED(old_entry)) {

                        amap_copy(new_map, new_entry,
                            AMAP_COPY_NOCHUNK, 0, 0);
                        /* XXXCDC: M_WAITOK ... ok? */
                }
        }

        /*
         * if the parent's entry is wired down, then the
         * parent process does not want page faults on
         * access to that memory.  this means that we
         * cannot do copy-on-write because we can't write
         * protect the old entry.   in this case we
         * resolve all copy-on-write faults now, using
         * amap_cow_now.   note that we have already
         * allocated any needed amap (above).
         */

        if (VM_MAPENT_ISWIRED(old_entry)) {

                /*
                 * resolve all copy-on-write faults now
                 * (note that there is nothing to do if
                 * the old mapping does not have an amap).
                 */
                if (old_entry->aref.ar_amap)
                        amap_cow_now(new_map, new_entry);

        } else {
                /*
                 * setup mappings to trigger copy-on-write faults
                 * we must write-protect the parent if it has
                 * an amap and it is not already "needs_copy"...
                 * if it is already "needs_copy" then the parent
                 * has already been write-protected by a previous
                 * fork operation.
                 */
                if (old_entry->aref.ar_amap &&
                    !UVM_ET_ISNEEDSCOPY(old_entry)) {
                        if (old_entry->max_protection & VM_PROT_WRITE) {
#ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
                                uvm_map_lock_entry(old_entry, RW_WRITER);
#else
                                uvm_map_lock_entry(old_entry, RW_READER);
#endif
                                pmap_protect(old_map->pmap,
                                    old_entry->start, old_entry->end,
                                    old_entry->protection & ~VM_PROT_WRITE);
                                uvm_map_unlock_entry(old_entry);
                        }
                        old_entry->etype |= UVM_ET_NEEDSCOPY;
                }
        }
}

/*
 * zero the mapping: the new entry will be zero initialized
 */
static void
uvm_mapent_forkzero(struct vm_map *new_map, struct vm_map *old_map,
    struct vm_map_entry *old_entry)
{
        struct vm_map_entry *new_entry;

        new_entry = uvm_mapent_clone(new_map, old_entry, 0);

        new_entry->etype |=
            (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);

        if (new_entry->aref.ar_amap) {
                uvm_map_unreference_amap(new_entry, 0);
                new_entry->aref.ar_pageoff = 0;
                new_entry->aref.ar_amap = NULL;
        }

        if (UVM_ET_ISOBJ(new_entry)) {
                if (new_entry->object.uvm_obj->pgops->pgo_detach)
                        new_entry->object.uvm_obj->pgops->pgo_detach(
                            new_entry->object.uvm_obj);
                new_entry->object.uvm_obj = NULL;
                new_entry->offset = 0;
                new_entry->etype &= ~UVM_ET_OBJ;
        }
}

/*
 *   F O R K   -   m a i n   e n t r y   p o i n t
 */
/*
 * uvmspace_fork: fork a process' main map
 *
 * => create a new vmspace for child process from parent.
 * => parent's map must not be locked.
 */

struct vmspace *
uvmspace_fork(struct vmspace *vm1)
{
        struct vmspace *vm2;
        struct vm_map *old_map = &vm1->vm_map;
        struct vm_map *new_map;
        struct vm_map_entry *old_entry;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        vm_map_lock(old_map);

        vm2 = uvmspace_alloc(vm_map_min(old_map), vm_map_max(old_map),
            vm1->vm_map.flags & VM_MAP_TOPDOWN);
        memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
            (char *) (vm1 + 1) - (char *) &vm1->vm_startcopy);
        new_map = &vm2->vm_map;                  /* XXX */

        old_entry = old_map->header.next;
        new_map->size = old_map->size;

        /*
         * go entry-by-entry
         */

        while (old_entry != &old_map->header) {

                /*
                 * first, some sanity checks on the old entry
                 */

                KASSERT(!UVM_ET_ISSUBMAP(old_entry));
                KASSERT(UVM_ET_ISCOPYONWRITE(old_entry) ||
                        !UVM_ET_ISNEEDSCOPY(old_entry));

                switch (old_entry->inheritance) {
                case MAP_INHERIT_NONE:
                        /*
                         * drop the mapping, modify size
                         */
                        new_map->size -= old_entry->end - old_entry->start;
                        break;

                case MAP_INHERIT_SHARE:
                        uvm_mapent_forkshared(new_map, old_map, old_entry);
                        break;

                case MAP_INHERIT_COPY:
                        uvm_mapent_forkcopy(new_map, old_map, old_entry);
                        break;

                case MAP_INHERIT_ZERO:
                        uvm_mapent_forkzero(new_map, old_map, old_entry);
                        break;
                default:
                        KASSERT(0);
                        break;
                }
                old_entry = old_entry->next;
        }

        pmap_update(old_map->pmap);
        vm_map_unlock(old_map);

        if (uvm_shmfork && vm1->vm_shm)
                (*uvm_shmfork)(vm1, vm2);

#ifdef PMAP_FORK
        pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap);
#endif

        UVMHIST_LOG(maphist,"<- done",0,0,0,0);
        return (vm2);
}


/*
 * uvm_mapent_trymerge: try to merge an entry with its neighbors.
 *
 * => called with map locked.
 * => return non zero if successfully merged.
 */

int
uvm_mapent_trymerge(struct vm_map *map, struct vm_map_entry *entry, int flags)
{
        struct uvm_object *uobj;
        struct vm_map_entry *next;
        struct vm_map_entry *prev;
        vsize_t size;
        int merged = 0;
        bool copying;
        int newetype;

        if (entry->aref.ar_amap != NULL) {
                return 0;
        }
        if ((entry->flags & UVM_MAP_NOMERGE) != 0) {
                return 0;
        }

        uobj = entry->object.uvm_obj;
        size = entry->end - entry->start;
        copying = (flags & UVM_MERGE_COPYING) != 0;
        newetype = copying ? (entry->etype & ~UVM_ET_NEEDSCOPY) : entry->etype;

        next = entry->next;
        if (next != &map->header &&
            next->start == entry->end &&
            ((copying && next->aref.ar_amap != NULL &&
            amap_refs(next->aref.ar_amap) == 1) ||
            (!copying && next->aref.ar_amap == NULL)) &&
            UVM_ET_ISCOMPATIBLE(next, newetype,
            uobj, entry->flags, entry->protection,
            entry->max_protection, entry->inheritance, entry->advice,
            entry->wired_count) &&
            (uobj == NULL || entry->offset + size == next->offset)) {
                int error;

                if (copying) {
                        error = amap_extend(next, size,
                            AMAP_EXTEND_NOWAIT|AMAP_EXTEND_BACKWARDS);
                } else {
                        error = 0;
                }
                if (error == 0) {
                        if (uobj) {
                                if (uobj->pgops->pgo_detach) {
                                        uobj->pgops->pgo_detach(uobj);
                                }
                        }

                        entry->end = next->end;
                        clear_hints(map, next);
                        uvm_map_entry_unlink(map, next);
                        if (copying) {
                                entry->aref = next->aref;
                                entry->etype &= ~UVM_ET_NEEDSCOPY;
                        }
                        uvm_map_check(map, "trymerge forwardmerge");
                        uvm_mapent_free(next);
                        merged++;
                }
        }

        prev = entry->prev;
        if (prev != &map->header &&
            prev->end == entry->start &&
            ((copying && !merged && prev->aref.ar_amap != NULL &&
            amap_refs(prev->aref.ar_amap) == 1) ||
            (!copying && prev->aref.ar_amap == NULL)) &&
            UVM_ET_ISCOMPATIBLE(prev, newetype,
            uobj, entry->flags, entry->protection,
            entry->max_protection, entry->inheritance, entry->advice,
            entry->wired_count) &&
            (uobj == NULL ||
            prev->offset + prev->end - prev->start == entry->offset)) {
                int error;

                if (copying) {
                        error = amap_extend(prev, size,
                            AMAP_EXTEND_NOWAIT|AMAP_EXTEND_FORWARDS);
                } else {
                        error = 0;
                }
                if (error == 0) {
                        if (uobj) {
                                if (uobj->pgops->pgo_detach) {
                                        uobj->pgops->pgo_detach(uobj);
                                }
                                entry->offset = prev->offset;
                        }

                        entry->start = prev->start;
                        clear_hints(map, prev);
                        uvm_map_entry_unlink(map, prev);
                        if (copying) {
                                entry->aref = prev->aref;
                                entry->etype &= ~UVM_ET_NEEDSCOPY;
                        }
                        uvm_map_check(map, "trymerge backmerge");
                        uvm_mapent_free(prev);
                        merged++;
                }
        }

        return merged;
}

/*
 * uvm_map_setup: init map
 *
 * => map must not be in service yet.
 */

void
uvm_map_setup(struct vm_map *map, vaddr_t vmin, vaddr_t vmax, int flags)
{

        rb_tree_init(&map->rb_tree, &uvm_map_tree_ops);
        map->header.next = map->header.prev = &map->header;
        map->nentries = 0;
        map->size = 0;
        map->ref_count = 1;
        vm_map_setmin(map, vmin);
        vm_map_setmax(map, vmax);
        map->flags = flags;
        map->first_free = &map->header;
        map->hint = &map->header;
        map->timestamp = 0;
        map->busy = NULL;

        rw_init(&map->lock);
        cv_init(&map->cv, "vm_map");
        mutex_init(&map->misc_lock, MUTEX_DRIVER, IPL_NONE);
}

/*
 *   U N M A P   -   m a i n   e n t r y   p o i n t
 */

/*
 * uvm_unmap1: remove mappings from a vm_map (from "start" up to "stop")
 *
 * => caller must check alignment and size
 * => map must be unlocked (we will lock it)
 * => flags is UVM_FLAG_QUANTUM or 0.
 */

void
uvm_unmap1(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
{
        struct vm_map_entry *dead_entries;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist, "  (map=%#jx, start=%#jx, end=%#jx)",
            (uintptr_t)map, start, end, 0);

        KASSERTMSG(start < end,
            "%s: map %p: start %#jx < end %#jx", __func__, map,
            (uintmax_t)start, (uintmax_t)end);
        if (map == kernel_map) {
                LOCKDEBUG_MEM_CHECK((void *)start, end - start);
        }

        /*
         * work now done by helper functions.   wipe the pmap's and then
         * detach from the dead entries...
         */
        vm_map_lock(map);
        uvm_unmap_remove(map, start, end, &dead_entries, flags);
        vm_map_unlock(map);

        if (dead_entries != NULL)
                uvm_unmap_detach(dead_entries, 0);

        UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
}


/*
 * uvm_map_reference: add reference to a map
 *
 * => map need not be locked
 */

void
uvm_map_reference(struct vm_map *map)
{

        atomic_inc_uint(&map->ref_count);
}

void
uvm_map_lock_entry(struct vm_map_entry *entry, krw_t op)
{

        if (entry->aref.ar_amap != NULL) {
                amap_lock(entry->aref.ar_amap, op);
        }
        if (UVM_ET_ISOBJ(entry)) {
                rw_enter(entry->object.uvm_obj->vmobjlock, op);
        }
}

void
uvm_map_unlock_entry(struct vm_map_entry *entry)
{

        if (UVM_ET_ISOBJ(entry)) {
                rw_exit(entry->object.uvm_obj->vmobjlock);
        }
        if (entry->aref.ar_amap != NULL) {
                amap_unlock(entry->aref.ar_amap);
        }
}

#define        UVM_VOADDR_TYPE_MASK        0x3UL
#define        UVM_VOADDR_TYPE_UOBJ        0x1UL
#define        UVM_VOADDR_TYPE_ANON        0x2UL
#define        UVM_VOADDR_OBJECT_MASK        ~UVM_VOADDR_TYPE_MASK

#define        UVM_VOADDR_GET_TYPE(voa)                                        \
        ((voa)->object & UVM_VOADDR_TYPE_MASK)
#define        UVM_VOADDR_GET_OBJECT(voa)                                        \
        ((voa)->object & UVM_VOADDR_OBJECT_MASK)
#define        UVM_VOADDR_SET_OBJECT(voa, obj, type)                                \
do {                                                                        \
        KASSERT(((uintptr_t)(obj) & UVM_VOADDR_TYPE_MASK) == 0);        \
        (voa)->object = ((uintptr_t)(obj)) | (type);                        \
} while (/*CONSTCOND*/0)

#define        UVM_VOADDR_GET_UOBJ(voa)                                        \
        ((struct uvm_object *)UVM_VOADDR_GET_OBJECT(voa))
#define        UVM_VOADDR_SET_UOBJ(voa, uobj)                                        \
        UVM_VOADDR_SET_OBJECT(voa, uobj, UVM_VOADDR_TYPE_UOBJ)

#define        UVM_VOADDR_GET_ANON(voa)                                        \
        ((struct vm_anon *)UVM_VOADDR_GET_OBJECT(voa))
#define        UVM_VOADDR_SET_ANON(voa, anon)                                        \
        UVM_VOADDR_SET_OBJECT(voa, anon, UVM_VOADDR_TYPE_ANON)

/*
 * uvm_voaddr_acquire: returns the virtual object address corresponding
 * to the specified virtual address.
 *
 * => resolves COW so the true page identity is tracked.
 *
 * => acquires a reference on the page's owner (uvm_object or vm_anon)
 */
bool
uvm_voaddr_acquire(struct vm_map * const map, vaddr_t const va,
    struct uvm_voaddr * const voaddr)
{
        struct vm_map_entry *entry;
        struct vm_anon *anon = NULL;
        bool result = false;
        bool exclusive = false;
        void (*unlock_fn)(struct vm_map *);

        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
        UVMHIST_LOG(maphist,"(map=%#jx,va=%#jx)", (uintptr_t)map, va, 0, 0);

        const vaddr_t start = trunc_page(va);
        const vaddr_t end = round_page(va+1);

 lookup_again:
        if (__predict_false(exclusive)) {
                vm_map_lock(map);
                unlock_fn = vm_map_unlock;
        } else {
                vm_map_lock_read(map);
                unlock_fn = vm_map_unlock_read;
        }

        if (__predict_false(!uvm_map_lookup_entry(map, start, &entry))) {
                unlock_fn(map);
                UVMHIST_LOG(maphist,"<- done (no entry)",0,0,0,0);
                return false;
        }

        if (__predict_false(entry->protection == VM_PROT_NONE)) {
                unlock_fn(map);
                UVMHIST_LOG(maphist,"<- done (PROT_NONE)",0,0,0,0);
                return false;
        }

        /*
         * We have a fast path for the common case of "no COW resolution
         * needed" whereby we have taken a read lock on the map and if
         * we don't encounter any need to create a vm_anon then great!
         * But if we do, we loop around again, instead taking an exclusive
         * lock so that we can perform the fault.
         *
         * In the event that we have to resolve the fault, we do nearly the
         * same work as uvm_map_pageable() does:
         *
         * 1: holding the write lock, we create any anonymous maps that need
         *    to be created.  however, we do NOT need to clip the map entries
         *    in this case.
         *
         * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
         *    in the page (assuming the entry is not already wired).  this
         *    is done because we need the vm_anon to be present.
         */
        if (__predict_true(!VM_MAPENT_ISWIRED(entry))) {

                bool need_fault = false;

                /*
                 * perform the action of vm_map_lookup that need the
                 * write lock on the map: create an anonymous map for
                 * a copy-on-write region, or an anonymous map for
                 * a zero-fill region.
                 */
                if (__predict_false(UVM_ET_ISSUBMAP(entry))) {
                        unlock_fn(map);
                        UVMHIST_LOG(maphist,"<- done (submap)",0,0,0,0);
                        return false;
                }
                if (__predict_false(UVM_ET_ISNEEDSCOPY(entry) &&
                    ((entry->max_protection & VM_PROT_WRITE) ||
                     (entry->object.uvm_obj == NULL)))) {
                        if (!exclusive) {
                                /* need to take the slow path */
                                KASSERT(unlock_fn == vm_map_unlock_read);
                                vm_map_unlock_read(map);
                                exclusive = true;
                                goto lookup_again;
                        }
                        need_fault = true;
                        amap_copy(map, entry, 0, start, end);
                        /* XXXCDC: wait OK? */
                }

                /*
                 * do a quick check to see if the fault has already
                 * been resolved to the upper layer.
                 */
                if (__predict_true(entry->aref.ar_amap != NULL &&
                                   need_fault == false)) {
                        amap_lock(entry->aref.ar_amap, RW_WRITER);
                        anon = amap_lookup(&entry->aref, start - entry->start);
                        if (__predict_true(anon != NULL)) {
                                /* amap unlocked below */
                                goto found_anon;
                        }
                        amap_unlock(entry->aref.ar_amap);
                        need_fault = true;
                }

                /*
                 * we predict this test as false because if we reach
                 * this point, then we are likely dealing with a
                 * shared memory region backed by a uvm_object, in
                 * which case a fault to create the vm_anon is not
                 * necessary.
                 */
                if (__predict_false(need_fault)) {
                        if (exclusive) {
                                vm_map_busy(map);
                                vm_map_unlock(map);
                                unlock_fn = vm_map_unbusy;
                        }

                        if (uvm_fault_wire(map, start, end,
                                           entry->max_protection, 1)) {
                                /* wiring failed */
                                unlock_fn(map);
                                UVMHIST_LOG(maphist,"<- done (wire failed)",
                                            0,0,0,0);
                                return false;
                        }

                        /*
                         * now that we have resolved the fault, we can unwire
                         * the page.
                         */
                        if (exclusive) {
                                vm_map_lock(map);
                                vm_map_unbusy(map);
                                unlock_fn = vm_map_unlock;
                        }

                        uvm_fault_unwire_locked(map, start, end);
                }
        }

        /* check the upper layer */
        if (entry->aref.ar_amap) {
                amap_lock(entry->aref.ar_amap, RW_WRITER);
                anon = amap_lookup(&entry->aref, start - entry->start);
                if (anon) {
 found_anon:                KASSERT(anon->an_lock == entry->aref.ar_amap->am_lock);
                        anon->an_ref++;
                        rw_obj_hold(anon->an_lock);
                        KASSERT(anon->an_ref != 0);
                        UVM_VOADDR_SET_ANON(voaddr, anon);
                        voaddr->offset = va & PAGE_MASK;
                        result = true;
                }
                amap_unlock(entry->aref.ar_amap);
        }

        /* check the lower layer */
        if (!result && UVM_ET_ISOBJ(entry)) {
                struct uvm_object *uobj = entry->object.uvm_obj;

                KASSERT(uobj != NULL);
                (*uobj->pgops->pgo_reference)(uobj);
                UVM_VOADDR_SET_UOBJ(voaddr, uobj);
                voaddr->offset = entry->offset + (va - entry->start);
                result = true;
        }

        unlock_fn(map);

        if (result) {
                UVMHIST_LOG(maphist,
                    "<- done OK (type=%jd,owner=%#jx,offset=%#jx)",
                    UVM_VOADDR_GET_TYPE(voaddr),
                    UVM_VOADDR_GET_OBJECT(voaddr),
                    voaddr->offset, 0);
        } else {
                UVMHIST_LOG(maphist,"<- done (failed)",0,0,0,0);
        }

        return result;
}

/*
 * uvm_voaddr_release: release the references held by the
 * vitual object address.
 */
void
uvm_voaddr_release(struct uvm_voaddr * const voaddr)
{

        switch (UVM_VOADDR_GET_TYPE(voaddr)) {
        case UVM_VOADDR_TYPE_UOBJ: {
                struct uvm_object * const uobj = UVM_VOADDR_GET_UOBJ(voaddr);

                KASSERT(uobj != NULL);
                KASSERT(uobj->pgops->pgo_detach != NULL);
                (*uobj->pgops->pgo_detach)(uobj);
                break;
            }
        case UVM_VOADDR_TYPE_ANON: {
                struct vm_anon * const anon = UVM_VOADDR_GET_ANON(voaddr);
                krwlock_t *lock;

                KASSERT(anon != NULL);
                rw_enter((lock = anon->an_lock), RW_WRITER);
                    KASSERT(anon->an_ref > 0);
                if (--anon->an_ref == 0) {
                        uvm_anfree(anon);
                }
                rw_exit(lock);
                rw_obj_free(lock);
                    break;
            }
        default:
                panic("uvm_voaddr_release: bad type");
        }
        memset(voaddr, 0, sizeof(*voaddr));
}

/*
 * uvm_voaddr_compare: compare two uvm_voaddr objects.
 *
 * => memcmp() semantics
 */
int
uvm_voaddr_compare(const struct uvm_voaddr * const voaddr1,
    const struct uvm_voaddr * const voaddr2)
{
        const uintptr_t type1 = UVM_VOADDR_GET_TYPE(voaddr1);
        const uintptr_t type2 = UVM_VOADDR_GET_TYPE(voaddr2);

        KASSERT(type1 == UVM_VOADDR_TYPE_UOBJ ||
                type1 == UVM_VOADDR_TYPE_ANON);

        KASSERT(type2 == UVM_VOADDR_TYPE_UOBJ ||
                type2 == UVM_VOADDR_TYPE_ANON);

        if (type1 < type2)
                return -1;
        if (type1 > type2)
                return 1;

        const uintptr_t addr1 = UVM_VOADDR_GET_OBJECT(voaddr1);
        const uintptr_t addr2 = UVM_VOADDR_GET_OBJECT(voaddr2);

        if (addr1 < addr2)
                return -1;
        if (addr1 > addr2)
                return 1;

        if (voaddr1->offset < voaddr2->offset)
                return -1;
        if (voaddr1->offset > voaddr2->offset)
                return 1;

        return 0;
}

#if defined(DDB) || defined(DEBUGPRINT)

/*
 * uvm_map_printit: actually prints the map
 */

void
uvm_map_printit(struct vm_map *map, bool full,
    void (*pr)(const char *, ...))
{
        struct vm_map_entry *entry;

        (*pr)("MAP %p: [%#lx->%#lx]\n", map, vm_map_min(map),
            vm_map_max(map));
        (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=%#x\n",
            map->nentries, map->size, map->ref_count, map->timestamp,
            map->flags);
        (*pr)("\tpmap=%p(resident=%ld, wired=%ld)\n", map->pmap,
            pmap_resident_count(map->pmap), pmap_wired_count(map->pmap));
        if (!full)
                return;
        for (entry = map->header.next; entry != &map->header;
            entry = entry->next) {
                (*pr)(" - %p: %#lx->%#lx: obj=%p/%#llx, amap=%p/%d\n",
                    entry, entry->start, entry->end, entry->object.uvm_obj,
                    (long long)entry->offset, entry->aref.ar_amap,
                    entry->aref.ar_pageoff);
                (*pr)(
                    "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, "
                    "wc=%d, adv=%d%s\n",
                    (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
                    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
                    (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
                    entry->protection, entry->max_protection,
                    entry->inheritance, entry->wired_count, entry->advice,
                    entry == map->first_free ? " (first_free)" : "");
        }
}

void
uvm_whatis(uintptr_t addr, void (*pr)(const char *, ...))
{
        struct vm_map *map;

        for (map = kernel_map;;) {
                struct vm_map_entry *entry;

                if (!uvm_map_lookup_entry_bytree(map, (vaddr_t)addr, &entry)) {
                        break;
                }
                (*pr)("%p is %p+%zu from VMMAP %p\n",
                    (void *)addr, (void *)entry->start,
                    (size_t)(addr - (uintptr_t)entry->start), map);
                if (!UVM_ET_ISSUBMAP(entry)) {
                        break;
                }
                map = entry->object.sub_map;
        }
}

#endif /* DDB || DEBUGPRINT */

#ifndef __USER_VA0_IS_SAFE
static int
sysctl_user_va0_disable(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int t, error;

        node = *rnode;
        node.sysctl_data = &t;
        t = user_va0_disable;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return (error);

        if (!t && user_va0_disable &&
            kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MAP_VA_ZERO, 0,
            NULL, NULL, NULL))
                return EPERM;

        user_va0_disable = !!t;
        return 0;
}
#endif

static int
fill_vmentry(struct lwp *l, struct proc *p, struct kinfo_vmentry *kve,
    struct vm_map *m, struct vm_map_entry *e)
{
#ifndef _RUMPKERNEL
        int error;

        memset(kve, 0, sizeof(*kve));
        KASSERT(e != NULL);
        if (UVM_ET_ISOBJ(e)) {
                struct uvm_object *uobj = e->object.uvm_obj;
                KASSERT(uobj != NULL);
                kve->kve_ref_count = uobj->uo_refs;
                kve->kve_count = uobj->uo_npages;
                if (UVM_OBJ_IS_VNODE(uobj)) {
                        struct vattr va;
                        struct vnode *vp = (struct vnode *)uobj;
                        vn_lock(vp, LK_SHARED | LK_RETRY);
                        error = VOP_GETATTR(vp, &va, l->l_cred);
                        VOP_UNLOCK(vp);
                        kve->kve_type = KVME_TYPE_VNODE;
                        if (error == 0) {
                                kve->kve_vn_size = vp->v_size;
                                kve->kve_vn_type = (int)vp->v_type;
                                kve->kve_vn_mode = va.va_mode;
                                kve->kve_vn_rdev = va.va_rdev;
                                kve->kve_vn_fileid = va.va_fileid;
                                kve->kve_vn_fsid = va.va_fsid;
                                error = vnode_to_path(kve->kve_path,
                                    sizeof(kve->kve_path) / 2, vp, l, p);
                        }
                } else if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
                        kve->kve_type = KVME_TYPE_KERN;
                } else if (UVM_OBJ_IS_DEVICE(uobj)) {
                        kve->kve_type = KVME_TYPE_DEVICE;
                } else if (UVM_OBJ_IS_AOBJ(uobj)) {
                        kve->kve_type = KVME_TYPE_ANON;
                } else {
                        kve->kve_type = KVME_TYPE_OBJECT;
                }
        } else if (UVM_ET_ISSUBMAP(e)) {
                struct vm_map *map = e->object.sub_map;
                KASSERT(map != NULL);
                kve->kve_ref_count = map->ref_count;
                kve->kve_count = map->nentries;
                kve->kve_type = KVME_TYPE_SUBMAP;
        } else
                kve->kve_type = KVME_TYPE_UNKNOWN;

        kve->kve_start = e->start;
        kve->kve_end = e->end;
        kve->kve_offset = e->offset;
        kve->kve_wired_count = e->wired_count;
        kve->kve_inheritance = e->inheritance;
        kve->kve_attributes = 0; /* unused */
        kve->kve_advice = e->advice;
#define PROT(p) (((p) & VM_PROT_READ) ? KVME_PROT_READ : 0) | \
        (((p) & VM_PROT_WRITE) ? KVME_PROT_WRITE : 0) | \
        (((p) & VM_PROT_EXECUTE) ? KVME_PROT_EXEC : 0)
        kve->kve_protection = PROT(e->protection);
        kve->kve_max_protection = PROT(e->max_protection);
        kve->kve_flags |= (e->etype & UVM_ET_COPYONWRITE)
            ? KVME_FLAG_COW : 0;
        kve->kve_flags |= (e->etype & UVM_ET_NEEDSCOPY)
            ? KVME_FLAG_NEEDS_COPY : 0;
        kve->kve_flags |= (m->flags & VM_MAP_TOPDOWN)
            ? KVME_FLAG_GROWS_DOWN : KVME_FLAG_GROWS_UP;
        kve->kve_flags |= (m->flags & VM_MAP_PAGEABLE)
            ? KVME_FLAG_PAGEABLE : 0;
#endif
        return 0;
}

static int
fill_vmentries(struct lwp *l, pid_t pid, u_int elem_size, void *oldp,
    size_t *oldlenp)
{
        int error;
        struct proc *p;
        struct kinfo_vmentry *vme;
        struct vmspace *vm;
        struct vm_map *map;
        struct vm_map_entry *entry;
        char *dp;
        size_t count, vmesize;

        if (elem_size == 0 || elem_size > 2 * sizeof(*vme))
                return EINVAL;

        if (oldp) {
                if (*oldlenp > 10UL * 1024UL * 1024UL)
                        return E2BIG;
                count = *oldlenp / elem_size;
                if (count == 0)
                        return ENOMEM;
                vmesize = count * sizeof(*vme);
        } else
                vmesize = 0;

        if ((error = proc_find_locked(l, &p, pid)) != 0)
                return error;

        vme = NULL;
        count = 0;

        if ((error = proc_vmspace_getref(p, &vm)) != 0)
                goto out;

        map = &vm->vm_map;
        vm_map_lock_read(map);

        dp = oldp;
        if (oldp)
                vme = kmem_alloc(vmesize, KM_SLEEP);
        for (entry = map->header.next; entry != &map->header;
            entry = entry->next) {
                if (oldp && (dp - (char *)oldp) < vmesize) {
                        error = fill_vmentry(l, p, &vme[count], map, entry);
                        if (error)
                                goto out;
                        dp += elem_size;
                }
                count++;
        }
        vm_map_unlock_read(map);
        uvmspace_free(vm);

out:
        if (pid != -1)
                mutex_exit(p->p_lock);
        if (error == 0) {
                const u_int esize = uimin(sizeof(*vme), elem_size);
                dp = oldp;
                for (size_t i = 0; i < count; i++) {
                        if (oldp && (dp - (char *)oldp) < vmesize) {
                                error = sysctl_copyout(l, &vme[i], dp, esize);
                                if (error)
                                        break;
                                dp += elem_size;
                        } else
                                break;
                }
                count *= elem_size;
                if (oldp != NULL && *oldlenp < count)
                        error = ENOSPC;
                *oldlenp = count;
        }
        if (vme)
                kmem_free(vme, vmesize);
        return error;
}

static int
sysctl_vmproc(SYSCTLFN_ARGS)
{
        int error;

        if (namelen == 1 && name[0] == CTL_QUERY)
                return (sysctl_query(SYSCTLFN_CALL(rnode)));

        if (namelen == 0)
                return EINVAL;

        switch (name[0]) {
        case VM_PROC_MAP:
                if (namelen != 3)
                        return EINVAL;
                sysctl_unlock();
                error = fill_vmentries(l, name[1], name[2], oldp, oldlenp);
                sysctl_relock();
                return error;
        default:
                return EINVAL;
        }
}

SYSCTL_SETUP(sysctl_uvmmap_setup, "sysctl uvmmap setup")
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "proc",
                       SYSCTL_DESCR("Process vm information"),
                       sysctl_vmproc, 0, NULL, 0,
                       CTL_VM, VM_PROC, CTL_EOL);
#ifndef __USER_VA0_IS_SAFE
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "user_va0_disable",
                       SYSCTL_DESCR("Disable VA 0"),
                       sysctl_user_va0_disable, 0, &user_va0_disable, 0,
                       CTL_VM, CTL_CREATE, CTL_EOL);
#endif
}















































































































































































































































   58 


   40 


  387 



  426 







   77 




   23 


   11 




   54 



   67 








  357 





   26 






   26 





   26 


    6 



   21 


  358 
    4 














    1 










    1 









































































































  134 




  134 












    4 

    3 






  132 


  132 



    1 

    4 
  131 












    2 










    1 



























  107 









  140 











    7 



  138 
    5 












  136 
   96 

   64 






    1 
    1 

   95 




   95 







  133 
    3 



    1 





  131 
    2 


  129 
  129 



    1 
   45 

   45 


  127 

    2 
  132 
   87 

  132 
   88 
  133 



















   26 



   19 



   25 

    1 


   24 









    2 







   22 








   22 
    1 




   21 



    9 

    9 















    6 




    6 
    6 

    6 




















































































    6 







































































































































































































































































































































































































































   22 










   22 








    3 


   19 


   19 
   16 




    4 













    4 







    4 
    1 




    4 
    2 





    4 

   22 
   22 










    4 




    2 


    3 




























    5 






    3 


    5 
































   14 










   14 
   14 
   10 




    8 


    7 


   11 


    9 



    1 



    1 




    1 

    9 
    4 

    5 
   10 















    6 









    5 







    4 

    4 






    3 

    1 


    2 










    2 











    2 


    2 

    3 














    5 










    5 


    4 



    4 

    3 




    2 
    1 


    2 


    2 


















    3 




    4 












    5 













   15 

   14 



   14 

    6 



















    1 
    8 











    4 



    7 


















   19 




   18 


   16 

    4 


   11 





   10 


    2 

    9 












  663 









    5 






  662 


  662 



  663 




  659 




  104 





  555 
  199 



  197 

  358 


  353 































  669 






    5 
    5 




  669 





  664 






  135 

    6 


    4 


  662 


    4 
  655 


  658 











  500 





  410 

  491 












  184 





  149 

  182 






















    3 

    3 






    3 




    3 

    2 


    3 









    6 






    6 


    5 




    5 

    5 



    6 


























    1 



    1 


















   32 



   29 



   23 
   13 

   12 






    2 









   17 
   31 





















   10 











    9 


    8 



    8 


    1 


    6 







    5 



    4 




    4 
    3 



    5 
















   18 

















   17 
    3 


   17 


   16 

   18 

   13 




    5 
    4 







































   13 



    9 















    6 







   14 





   13 



   13 



    5 
    3 


















    8 





    8 






    6 







    5 



    6 



    3 
    3 



















    1 






    1 















   17 
   16 

   77 
















   69 















   24 







   71 






  148 







  142 





  147 



  145 





  145 



  137 




  135 






    3 


  111 


    3 






    7 



















  122 






  116 







    7 


  121 
  125 



    9 

    1 

    8 
    9 
    2 

  140 




































   16 






   13 


   15 


    3 


   12 
    2 

    1 

    1 
    2 



   10 





    8 
    8 












   20 






   11 

    9 




   12 



   10 


    8 




    6 




    5 



    5 


   10 

   11 

   18 
    3 


    1 

    2 
    3 
    2 










    9 

















   11 































   37 

   37 

   35 











   29 

   34 


   33 
    2 

    1 

    1 
    2 



   31 





   31 
   31 
   34 

   37 
















   15 













   22 














    7 



    6 



    6 


    1 



    4 
    2 

    1 

    1 
    2 
    2 
    2 


    2 

    2 















   10 












   10 















   32 





   32 
   29 


   30 





   30 


   19 









   20 







   20 








   10 



    8 








    9 












    8 





    4 


    1 

    3 
    4 

   28 




















   12 





   10 


   11 




   10 




    9 


    8 
















   19 





   18 
    2 



   16 




   14 





   13 


    3 

   15 














  104 


















   28 





   28 
    1 



   27 




   25 





   24 


    3 

   21 














   24 
















    4 










   13 









   11 


   11 
   11 


   10 




    9 


   10 


    2 


    6 





    4 

    4 
    1 
    4 
    3 

    4 
    3 
    1 

    7 
    8 















    9 










   34 







   58 


   34 

   58 
   48 



   55 



   17 


   31 
















    8 





    7 













    6 





    5 













   10 







   10 



    9 






   42 



   41 


   41 

    1 


   39 

















   42 





























    3 








   13 






   12 


   11 

    5 


    5 



    4 

    4 






    4 


    3 

    5 














   10 














    5 






    3 

    4 














    5 





    3 



    5 













   11 






    8 

   10 
































   10 






   11 



    4 

    7 





   10 



   18 













    5 





    3 

    4 













   11 















    6 






    3 


    5 































    7 








   11 



    3 

    8 





   10 



   14 
















    9 






    7 


    8 















    4 




    2 


    4 














   10 

















    4 







    4 















    8 






    5 


    7 
















    3 









    2 









   23 








   23 








    7 
    2 



    7 
    2 








   16 
   10 
    6 
   14 


   17 


   23 






   21 


















    5 
















    5 




    4 


    5 



































    6 

















   16 


   16 












   33 






   43 














   43 

   43 

   43 

   18 



   25 
   11 



   14 




   41 
    2 


    1 

   39 
    1 

   41 







   26 
   18 
   26 
   26 


   25 

   26 
   26 

    8 


   26 
   14 
   26 


   39 
   20 








   33 




   20 


    7 

    6 



   19 

    2 

   17 


   16 


   18 



   18 


   14 


   14 

















   32 






   30 

   27 

   28 


   27 
   26 
   25 



   24 















   10 








    9 

    6 



    4 



    4 
    4 



    3 
    5 

    8 













    5 





    3 




    4 


















































































    6 





    4 




    4 














   37 














   14 















    8 

























   59 








   60 


   57 

   57 


   55 








   56 







   50 



   50 
   50 







   50 




















   50 
   50 

    1 
    1 














   48 










   44 


   44 

   44 
   23 




   44 












   44 



   43 















   44 
   44 










   44 























   44 









   44 
   24 
   44 

   43 
















   24 


   14 









   42 










   40 
   12 



   10 
    8 

















   31 






















   31 
   31 
   31 


















   13 
   12 
   13 

   13 


   12 
   19 



   53 
   54 
   56 














   81 













   34 
























  113 
  106 



  111 


    7 


  104 

    2 

    1 

    1 
    2 



  103 






   98 
   98 











   12 














   13 





   11 




   10 
    9 
    9 
   10 

   11 



















    2 






















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
/*        $NetBSD: vfs_syscalls.c,v 1.555 2022/02/12 15:51:29 thorpej Exp $        */

/*-
 * Copyright (c) 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vfs_syscalls.c        8.42 (Berkeley) 7/31/95
 */

/*
 * Virtual File System System Calls
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.555 2022/02/12 15:51:29 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_fileassoc.h"
#include "veriexec.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/fcntl.h>
#include <sys/stat.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/fstrans.h>
#include <sys/proc.h>
#include <sys/uio.h>
#include <sys/kmem.h>
#include <sys/dirent.h>
#include <sys/sysctl.h>
#include <sys/syscallargs.h>
#include <sys/vfs_syscalls.h>
#include <sys/quota.h>
#include <sys/quotactl.h>
#include <sys/ktrace.h>
#ifdef FILEASSOC
#include <sys/fileassoc.h>
#endif /* FILEASSOC */
#include <sys/extattr.h>
#include <sys/verified_exec.h>
#include <sys/kauth.h>
#include <sys/atomic.h>
#include <sys/module.h>
#include <sys/buf.h>
#include <sys/event.h>
#include <sys/compat_stub.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>

#include <nfs/rpcv2.h>
#include <nfs/nfsproto.h>
#include <nfs/nfs.h>
#include <nfs/nfs_var.h>

/* XXX this shouldn't be here */
#ifndef OFF_T_MAX
#define OFF_T_MAX __type_max(off_t)
#endif

static int change_flags(struct vnode *, u_long, struct lwp *);
static int change_mode(struct vnode *, int, struct lwp *);
static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int);
static int do_sys_openat(lwp_t *, int, const char *, int, int, int *);
static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t,
    enum uio_seg);
static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t);
static int do_sys_symlinkat(struct lwp *, const char *, int, const char *,
    enum uio_seg);
static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *,
    enum uio_seg, int);
static int do_sys_readlinkat(struct lwp *, int, const char *, char *,
    size_t, register_t *);
static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg);

static int fd_nameiat(struct lwp *, int, struct nameidata *);
static int fd_nameiat_simple_user(struct lwp *, int, const char *,
    namei_simple_flags_t, struct vnode **);

/*
 * This table is used to maintain compatibility with 4.3BSD
 * and NetBSD 0.9 mount syscalls - and possibly other systems.
 * Note, the order is important!
 *
 * Do not modify this table. It should only contain filesystems
 * supported by NetBSD 0.9 and 4.3BSD.
 */
const char * const mountcompatnames[] = {
        NULL,                /* 0 = MOUNT_NONE */
        MOUNT_FFS,        /* 1 = MOUNT_UFS */
        MOUNT_NFS,        /* 2 */
        MOUNT_MFS,        /* 3 */
        MOUNT_MSDOS,        /* 4 */
        MOUNT_CD9660,        /* 5 = MOUNT_ISOFS */
        MOUNT_FDESC,        /* 6 */
        MOUNT_KERNFS,        /* 7 */
        NULL,                /* 8 = MOUNT_DEVFS */
        MOUNT_AFS,        /* 9 */
};

const u_int nmountcompatnames = __arraycount(mountcompatnames);

/*
 * Filter event method for EVFILT_FS.
 */
static struct klist fs_klist;
static kmutex_t fs_klist_lock;

CTASSERT((NOTE_SUBMIT & VQ_MOUNT) == 0);
CTASSERT((NOTE_SUBMIT & VQ_UNMOUNT) == 0);

void
vfs_evfilt_fs_init(void)
{
        klist_init(&fs_klist);
        mutex_init(&fs_klist_lock, MUTEX_DEFAULT, IPL_NONE);
}

static int
filt_fsattach(struct knote *kn)
{
        mutex_enter(&fs_klist_lock);
        kn->kn_flags |= EV_CLEAR;
        klist_insert(&fs_klist, kn);
        mutex_exit(&fs_klist_lock);

        return 0;
}

static void
filt_fsdetach(struct knote *kn)
{
        mutex_enter(&fs_klist_lock);
        klist_remove(&fs_klist, kn);
        mutex_exit(&fs_klist_lock);
}

static int
filt_fs(struct knote *kn, long hint)
{
        int rv;

        if (hint & NOTE_SUBMIT) {
                KASSERT(mutex_owned(&fs_klist_lock));
                kn->kn_fflags |= hint & ~NOTE_SUBMIT;
        } else {
                mutex_enter(&fs_klist_lock);
        }

        rv = (kn->kn_fflags != 0);

        if ((hint & NOTE_SUBMIT) == 0) {
                mutex_exit(&fs_klist_lock);
        }

        return rv;
}

/* referenced in kern_event.c */
const struct filterops fs_filtops = {
        .f_flags = FILTEROP_MPSAFE,
        .f_attach = filt_fsattach,
        .f_detach = filt_fsdetach,
        .f_event = filt_fs,
};

static int 
fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp)
{
        file_t *dfp;
        int error;

        if (fdat != AT_FDCWD) {
                if ((error = fd_getvnode(fdat, &dfp)) != 0)
                        goto out;

                NDAT(ndp, dfp->f_vnode);
        }

        error = namei(ndp);

        if (fdat != AT_FDCWD)
                fd_putfile(fdat);
out:
        return error;        
}

static int
fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path,
    namei_simple_flags_t sflags, struct vnode **vp_ret)
{
        file_t *dfp;
        struct vnode *dvp;
        int error;

        if (fdat != AT_FDCWD) {
                if ((error = fd_getvnode(fdat, &dfp)) != 0)
                        goto out;

                dvp = dfp->f_vnode;
        } else {
                dvp = NULL;
        }

        error = nameiat_simple_user(dvp, path, sflags, vp_ret);

        if (fdat != AT_FDCWD)
                fd_putfile(fdat);
out:
        return error;        
}

static int
open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags)
{
        int error;

        fp->f_flag = flags & FMASK;
        fp->f_type = DTYPE_VNODE;
        fp->f_ops = &vnops;
        fp->f_vnode = vp;

        if (flags & (O_EXLOCK | O_SHLOCK)) {
                struct flock lf;
                int type;

                lf.l_whence = SEEK_SET;
                lf.l_start = 0;
                lf.l_len = 0;
                if (flags & O_EXLOCK)
                        lf.l_type = F_WRLCK;
                else
                        lf.l_type = F_RDLCK;
                type = F_FLOCK;
                if ((flags & FNONBLOCK) == 0)
                        type |= F_WAIT;
                VOP_UNLOCK(vp);
                error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type);
                if (error) {
                        (void) vn_close(vp, fp->f_flag, fp->f_cred);
                        fd_abort(l->l_proc, fp, indx);
                        return error;
                }
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                atomic_or_uint(&fp->f_flag, FHASLOCK);
        }
        if (flags & O_CLOEXEC)
                fd_set_exclose(l, indx, true);
        return 0;
}

static int
mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags,
    void *data, size_t *data_len)
{
        struct mount *mp;
        int error = 0, saved_flags;

        mp = vp->v_mount;
        saved_flags = mp->mnt_flag;

        /* We can operate only on VV_ROOT nodes. */
        if ((vp->v_vflag & VV_ROOT) == 0) {
                error = EINVAL;
                goto out;
        }

        /*
         * We only allow the filesystem to be reloaded if it
         * is currently mounted read-only.  Additionally, we
         * prevent read-write to read-only downgrades.
         */
        if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 &&
            (mp->mnt_flag & MNT_RDONLY) == 0 &&
            (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) {
                error = EOPNOTSUPP;        /* Needs translation */
                goto out;
        }

        /*
         * Enabling MNT_UNION requires a covered mountpoint and
         * must not happen on the root mount.
         */
        if ((flags & MNT_UNION) != 0 && mp->mnt_vnodecovered == NULLVP) {
                error = EOPNOTSUPP;
                goto out;
        }

        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
            KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data);
        if (error)
                goto out;

        error = vfs_suspend(mp, 0);
        if (error)
                goto out;

        mutex_enter(mp->mnt_updating);

        mp->mnt_flag &= ~MNT_OP_FLAGS;
        mp->mnt_flag |= flags & MNT_OP_FLAGS;

        /*
         * Set the mount level flags.
         */
        if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) {
                if ((flags & MNT_RDONLY))
                        mp->mnt_iflag |= IMNT_WANTRDONLY;
                else
                        mp->mnt_iflag |= IMNT_WANTRDWR;
        }
        mp->mnt_flag &= ~MNT_BASIC_FLAGS;
        mp->mnt_flag |= flags & MNT_BASIC_FLAGS;
        if ((mp->mnt_iflag & IMNT_WANTRDONLY))
                mp->mnt_flag &= ~MNT_RDONLY;

        error = VFS_MOUNT(mp, path, data, data_len);

        if (error && data != NULL) {
                int error2;

                /*
                 * Update failed; let's try and see if it was an
                 * export request.  For compat with 3.0 and earlier.
                 */
                error2 = vfs_hooks_reexport(mp, path, data);

                /*
                 * Only update error code if the export request was
                 * understood but some problem occurred while
                 * processing it.
                 */
                if (error2 != EJUSTRETURN)
                        error = error2;
        }

        if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY))
                mp->mnt_flag |= MNT_RDONLY;
        if (error)
                mp->mnt_flag = saved_flags;
        mp->mnt_flag &= ~MNT_OP_FLAGS;
        mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR);
        if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) {
                if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0)
                        vfs_syncer_add_to_worklist(mp);
        } else {
                if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0)
                        vfs_syncer_remove_from_worklist(mp);
        }
        mutex_exit(mp->mnt_updating);
        vfs_resume(mp);

        if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 
            (flags & MNT_EXTATTR)) {
                if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 
                                   NULL, 0, NULL) != 0) {
                        printf("%s: failed to start extattr, error = %d",
                               mp->mnt_stat.f_mntonname, error);
                        mp->mnt_flag &= ~MNT_EXTATTR;
                }
        }

        if ((error == 0) && (saved_flags & MNT_EXTATTR) && 
            !(flags & MNT_EXTATTR)) {
                if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 
                                   NULL, 0, NULL) != 0) {
                        printf("%s: failed to stop extattr, error = %d",
                               mp->mnt_stat.f_mntonname, error);
                        mp->mnt_flag |= MNT_RDONLY;
                }
        }
 out:
        return (error);
}

static int
mount_get_vfsops(const char *fstype, enum uio_seg type_seg,
    struct vfsops **vfsops)
{
        char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)];
        int error;

        if (type_seg == UIO_USERSPACE) {
                /* Copy file-system type from userspace.  */
                error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL);
        } else {
                error = copystr(fstype, fstypename, sizeof(fstypename), NULL);
                KASSERT(error == 0);
        }

        if (error) {
                /*
                 * Historically, filesystem types were identified by numbers.
                 * If we get an integer for the filesystem type instead of a
                 * string, we check to see if it matches one of the historic
                 * filesystem types.
                 */
                u_long fsindex = (u_long)fstype;
                if (fsindex >= nmountcompatnames ||
                    mountcompatnames[fsindex] == NULL)
                        return ENODEV;
                strlcpy(fstypename, mountcompatnames[fsindex],
                    sizeof(fstypename));
        }

        /* Accept `ufs' as an alias for `ffs', for compatibility. */
        if (strcmp(fstypename, "ufs") == 0)
                fstypename[0] = 'f';

        if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
                return 0;

        /* If we can autoload a vfs module, try again */
        (void)module_autoload(fstypename, MODULE_CLASS_VFS);

        if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL)
                return 0;

        return ENODEV;
}

static int
mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags,
    void *data, size_t *data_len)
{
        struct mount *mp;
        int error;

        /* If MNT_GETARGS is specified, it should be the only flag. */
        if (flags & ~MNT_GETARGS)
                return EINVAL;

        mp = vp->v_mount;

        /* XXX: probably some notion of "can see" here if we want isolation. */ 
        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
            KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL);
        if (error)
                return error;

        if ((vp->v_vflag & VV_ROOT) == 0)
                return EINVAL;

        if (vfs_busy(mp))
                return EPERM;

        mutex_enter(mp->mnt_updating);
        mp->mnt_flag &= ~MNT_OP_FLAGS;
        mp->mnt_flag |= MNT_GETARGS;
        error = VFS_MOUNT(mp, path, data, data_len);
        mp->mnt_flag &= ~MNT_OP_FLAGS;
        mutex_exit(mp->mnt_updating);

        vfs_unbusy(mp);
        return (error);
}

int
sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) type;
                syscallarg(const char *) path;
                syscallarg(int) flags;
                syscallarg(void *) data;
                syscallarg(size_t) data_len;
        } */

        return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path),
            SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE,
            SCARG(uap, data_len), retval);
}

int
do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg,
    const char *path, int flags, void *data, enum uio_seg data_seg,
    size_t data_len, register_t *retval)
{
        struct vfsops *vfsops = NULL;        /* XXX gcc4.8 */
        struct vnode *vp;
        void *data_buf = data;
        bool vfsopsrele = false;
        size_t alloc_sz = 0;
        int error;

        /*
         * Get vnode to be covered
         */
        error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
        if (error != 0) {
                vp = NULL;
                goto done;
        }

        if (flags & (MNT_GETARGS | MNT_UPDATE)) {
                vfsops = vp->v_mount->mnt_op;
        } else {
                /* 'type' is userspace */
                error = mount_get_vfsops(type, type_seg, &vfsops);
                if (error != 0)
                        goto done;
                vfsopsrele = true;
        }

        /*
         * We allow data to be NULL, even for userspace. Some fs's don't need
         * it. The others will handle NULL.
         */
        if (data != NULL && data_seg == UIO_USERSPACE) {
                if (data_len == 0) {
                        /* No length supplied, use default for filesystem */
                        data_len = vfsops->vfs_min_mount_data;

                        /*
                         * Hopefully a longer buffer won't make copyin() fail.
                         * For compatibility with 3.0 and earlier.
                         */
                        if (flags & MNT_UPDATE
                            && data_len < sizeof (struct mnt_export_args30))
                                data_len = sizeof (struct mnt_export_args30);
                }
                if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) {
                        error = EINVAL;
                        goto done;
                }
                alloc_sz = data_len;
                data_buf = kmem_alloc(alloc_sz, KM_SLEEP);

                /* NFS needs the buffer even for mnt_getargs .... */
                error = copyin(data, data_buf, data_len);
                if (error != 0)
                        goto done;
        }

        if (flags & MNT_GETARGS) {
                if (data_len == 0) {
                        error = EINVAL;
                        goto done;
                }
                error = mount_getargs(l, vp, path, flags, data_buf, &data_len);
                if (error != 0)
                        goto done;
                if (data_seg == UIO_USERSPACE)
                        error = copyout(data_buf, data, data_len);
                *retval = data_len;
        } else if (flags & MNT_UPDATE) {
                error = mount_update(l, vp, path, flags, data_buf, &data_len);
        } else {
                /* Locking is handled internally in mount_domount(). */
                KASSERT(vfsopsrele == true);
                error = mount_domount(l, &vp, vfsops, path, flags, data_buf,
                    &data_len);
                vfsopsrele = false;
        }
        if (!error) {
                mutex_enter(&fs_klist_lock);
                KNOTE(&fs_klist, NOTE_SUBMIT | VQ_MOUNT);
                mutex_exit(&fs_klist_lock);
        }

    done:
        if (vfsopsrele)
                vfs_delref(vfsops);
            if (vp != NULL) {
                    vrele(vp);
        }
        if (data_buf != data)
                kmem_free(data_buf, alloc_sz);
        return (error);
}

/*
 * Unmount a file system.
 *
 * Note: unmount takes a path to the vnode mounted on as argument,
 * not special file (as before).
 */
/* ARGSUSED */
int
sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) flags;
        } */
        struct vnode *vp;
        struct mount *mp;
        int error;
        struct pathbuf *pb;
        struct nameidata nd;

        error = pathbuf_copyin(SCARG(uap, path), &pb);
        if (error) {
                return error;
        }

        NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb);
        if ((error = namei(&nd)) != 0) {
                pathbuf_destroy(pb);
                return error;
        }
        vp = nd.ni_vp;
        pathbuf_destroy(pb);

        mp = vp->v_mount;
        vfs_ref(mp);
        VOP_UNLOCK(vp);

        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
            KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL);
        if (error) {
                vrele(vp);
                vfs_rele(mp);
                return (error);
        }

        /*
         * Don't allow unmounting the root file system.
         */
        if (mp->mnt_flag & MNT_ROOTFS) {
                vrele(vp);
                vfs_rele(mp);
                return (EINVAL);
        }

        /*
         * Must be the root of the filesystem
         */
        if ((vp->v_vflag & VV_ROOT) == 0) {
                vrele(vp);
                vfs_rele(mp);
                return (EINVAL);
        }

        vrele(vp);
        error = dounmount(mp, SCARG(uap, flags), l);
        vfs_rele(mp);
        if (!error) {
                mutex_enter(&fs_klist_lock);
                KNOTE(&fs_klist, NOTE_SUBMIT | VQ_UNMOUNT);
                mutex_exit(&fs_klist_lock);
        }
        return error;
}

/*
 * Sync each mounted filesystem.
 */
#ifdef DEBUG
int syncprt = 0;
struct ctldebug debug0 = { "syncprt", &syncprt };
#endif

void
do_sys_sync(struct lwp *l)
{
        mount_iterator_t *iter;
        struct mount *mp;
        int asyncflag;

        mountlist_iterator_init(&iter);
        while ((mp = mountlist_iterator_next(iter)) != NULL) {
                mutex_enter(mp->mnt_updating);
                if ((mp->mnt_flag & MNT_RDONLY) == 0) {
                        asyncflag = mp->mnt_flag & MNT_ASYNC;
                        mp->mnt_flag &= ~MNT_ASYNC;
                        VFS_SYNC(mp, MNT_NOWAIT, l->l_cred);
                        if (asyncflag)
                                 mp->mnt_flag |= MNT_ASYNC;
                }
                mutex_exit(mp->mnt_updating);
        }
        mountlist_iterator_destroy(iter);
#ifdef DEBUG
        if (syncprt)
                vfs_bufstats();
#endif /* DEBUG */
}

static bool
sync_vnode_filter(void *cookie, vnode_t *vp)
{

        if (vp->v_numoutput > 0) {
                ++*(int *)cookie;
        }
        return false;
}

int
vfs_syncwait(void)
{
        int nbusy, nbusy_prev, iter;
        struct vnode_iterator *vniter;
        mount_iterator_t *mpiter;
        struct mount *mp;

        for (nbusy_prev = 0, iter = 0; iter < 20;) {
                nbusy = 0;
                mountlist_iterator_init(&mpiter);
                while ((mp = mountlist_iterator_next(mpiter)) != NULL) {
                        vnode_t *vp __diagused;
                        vfs_vnode_iterator_init(mp, &vniter);
                        vp = vfs_vnode_iterator_next(vniter,
                            sync_vnode_filter, &nbusy);
                        KASSERT(vp == NULL);
                        vfs_vnode_iterator_destroy(vniter);
                }
                mountlist_iterator_destroy(mpiter);

                if (nbusy == 0)
                        break;
                if (nbusy_prev == 0)
                        nbusy_prev = nbusy;
                printf("%d ", nbusy);
                kpause("syncwait", false, MAX(1, hz / 25 * iter), NULL);
                if (nbusy >= nbusy_prev) /* we didn't flush anything */
                        iter++;
                else
                        nbusy_prev = nbusy;
        }

        if (nbusy) {
#if defined(DEBUG) || defined(DEBUG_HALT_BUSY)
                printf("giving up\nPrinting vnodes for busy buffers\n");
                mountlist_iterator_init(&mpiter);
                while ((mp = mountlist_iterator_next(mpiter)) != NULL) {
                        vnode_t *vp;
                        vfs_vnode_iterator_init(mp, &vniter);
                        vp = vfs_vnode_iterator_next(vniter,
                            NULL, NULL);
                        mutex_enter(vp->v_interlock);
                        if (vp->v_numoutput > 0)
                                vprint(NULL, vp);
                        mutex_exit(vp->v_interlock);
                        vrele(vp);
                        vfs_vnode_iterator_destroy(vniter);
                }
                mountlist_iterator_destroy(mpiter);
#endif
        }

        return nbusy;
}

/* ARGSUSED */
int
sys_sync(struct lwp *l, const void *v, register_t *retval)
{
        do_sys_sync(l);
        return (0);
}


/*
 * Access or change filesystem quotas.
 *
 * (this is really 14 different calls bundled into one)
 */

static int
do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u)
{
        struct quotastat info_k;
        int error;

        /* ensure any padding bytes are cleared */
        memset(&info_k, 0, sizeof(info_k));

        error = vfs_quotactl_stat(mp, &info_k);
        if (error) {
                return error;
        }

        return copyout(&info_k, info_u, sizeof(info_k));
}

static int
do_sys_quotactl_idtypestat(struct mount *mp, int idtype,
    struct quotaidtypestat *info_u)
{
        struct quotaidtypestat info_k;
        int error;

        /* ensure any padding bytes are cleared */
        memset(&info_k, 0, sizeof(info_k));

        error = vfs_quotactl_idtypestat(mp, idtype, &info_k);
        if (error) {
                return error;
        }

        return copyout(&info_k, info_u, sizeof(info_k));
}

static int
do_sys_quotactl_objtypestat(struct mount *mp, int objtype,
    struct quotaobjtypestat *info_u)
{
        struct quotaobjtypestat info_k;
        int error;

        /* ensure any padding bytes are cleared */
        memset(&info_k, 0, sizeof(info_k));

        error = vfs_quotactl_objtypestat(mp, objtype, &info_k);
        if (error) {
                return error;
        }

        return copyout(&info_k, info_u, sizeof(info_k));
}

static int
do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u,
    struct quotaval *val_u)
{
        struct quotakey key_k;
        struct quotaval val_k;
        int error;

        /* ensure any padding bytes are cleared */
        memset(&val_k, 0, sizeof(val_k));

        error = copyin(key_u, &key_k, sizeof(key_k));
        if (error) {
                return error;
        }

        error = vfs_quotactl_get(mp, &key_k, &val_k);
        if (error) {
                return error;
        }

        return copyout(&val_k, val_u, sizeof(val_k));
}

static int
do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u,
    const struct quotaval *val_u)
{
        struct quotakey key_k;
        struct quotaval val_k;
        int error;

        error = copyin(key_u, &key_k, sizeof(key_k));
        if (error) {
                return error;
        }

        error = copyin(val_u, &val_k, sizeof(val_k));
        if (error) {
                return error;
        }

        return vfs_quotactl_put(mp, &key_k, &val_k);
}

static int
do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u)
{
        struct quotakey key_k;
        int error;

        error = copyin(key_u, &key_k, sizeof(key_k));
        if (error) {
                return error;
        }

        return vfs_quotactl_del(mp, &key_k);
}

static int
do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u)
{
        struct quotakcursor cursor_k;
        int error;

        /* ensure any padding bytes are cleared */
        memset(&cursor_k, 0, sizeof(cursor_k));

        error = vfs_quotactl_cursoropen(mp, &cursor_k);
        if (error) {
                return error;
        }

        return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
}

static int
do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u)
{
        struct quotakcursor cursor_k;
        int error;

        error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
        if (error) {
                return error;
        }

        return vfs_quotactl_cursorclose(mp, &cursor_k);
}

static int
do_sys_quotactl_cursorskipidtype(struct mount *mp,
    struct quotakcursor *cursor_u, int idtype)
{
        struct quotakcursor cursor_k;
        int error;

        error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
        if (error) {
                return error;
        }

        error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype);
        if (error) {
                return error;
        }

        return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
}

static int
do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u,
    struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum,
    unsigned *ret_u)
{
#define CGET_STACK_MAX 8
        struct quotakcursor cursor_k;
        struct quotakey stackkeys[CGET_STACK_MAX];
        struct quotaval stackvals[CGET_STACK_MAX];
        struct quotakey *keys_k;
        struct quotaval *vals_k;
        unsigned ret_k;
        int error;

        if (maxnum > 128) {
                maxnum = 128;
        }

        error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
        if (error) {
                return error;
        }

        if (maxnum <= CGET_STACK_MAX) {
                keys_k = stackkeys;
                vals_k = stackvals;
                /* ensure any padding bytes are cleared */
                memset(keys_k, 0, maxnum * sizeof(keys_k[0]));
                memset(vals_k, 0, maxnum * sizeof(vals_k[0]));
        } else {
                keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP);
                vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP);
        }

        error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum,
                                       &ret_k);
        if (error) {
                goto fail;
        }

        error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0]));
        if (error) {
                goto fail;
        }

        error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0]));
        if (error) {
                goto fail;
        }

        error = copyout(&ret_k, ret_u, sizeof(ret_k));
        if (error) {
                goto fail;
        }

        /* do last to maximize the chance of being able to recover a failure */
        error = copyout(&cursor_k, cursor_u, sizeof(cursor_k));

fail:
        if (keys_k != stackkeys) {
                kmem_free(keys_k, maxnum * sizeof(keys_k[0]));
        }
        if (vals_k != stackvals) {
                kmem_free(vals_k, maxnum * sizeof(vals_k[0]));
        }
        return error;
}

static int
do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u,
    int *ret_u)
{
        struct quotakcursor cursor_k;
        int ret_k;
        int error;

        error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
        if (error) {
                return error;
        }

        error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k);
        if (error) {
                return error;
        }

        error = copyout(&ret_k, ret_u, sizeof(ret_k));
        if (error) {
                return error;
        }

        return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
}

static int
do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u)
{
        struct quotakcursor cursor_k;
        int error;

        error = copyin(cursor_u, &cursor_k, sizeof(cursor_k));
        if (error) {
                return error;
        }

        error = vfs_quotactl_cursorrewind(mp, &cursor_k);
        if (error) {
                return error;
        }

        return copyout(&cursor_k, cursor_u, sizeof(cursor_k));
}

static int
do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u)
{
        char *path_k;
        int error;

        /* XXX this should probably be a struct pathbuf */
        path_k = PNBUF_GET();
        error = copyin(path_u, path_k, PATH_MAX);
        if (error) {
                PNBUF_PUT(path_k);
                return error;
        }

        error = vfs_quotactl_quotaon(mp, idtype, path_k);

        PNBUF_PUT(path_k);
        return error;
}

static int
do_sys_quotactl_quotaoff(struct mount *mp, int idtype)
{
        return vfs_quotactl_quotaoff(mp, idtype);
}

int
do_sys_quotactl(const char *path_u, const struct quotactl_args *args)
{
        struct mount *mp;
        struct vnode *vp;
        int error;

        error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp);
        if (error != 0)
                return (error);
        mp = vp->v_mount;

        switch (args->qc_op) {
            case QUOTACTL_STAT:
                error = do_sys_quotactl_stat(mp, args->u.stat.qc_info);
                break;
            case QUOTACTL_IDTYPESTAT:
                error = do_sys_quotactl_idtypestat(mp,
                                args->u.idtypestat.qc_idtype,
                                args->u.idtypestat.qc_info);
                break;
            case QUOTACTL_OBJTYPESTAT:
                error = do_sys_quotactl_objtypestat(mp,
                                args->u.objtypestat.qc_objtype,
                                args->u.objtypestat.qc_info);
                break;
            case QUOTACTL_GET:
                error = do_sys_quotactl_get(mp,
                                args->u.get.qc_key,
                                args->u.get.qc_val);
                break;
            case QUOTACTL_PUT:
                error = do_sys_quotactl_put(mp,
                                args->u.put.qc_key,
                                args->u.put.qc_val);
                break;
            case QUOTACTL_DEL:
                error = do_sys_quotactl_del(mp, args->u.del.qc_key);
                break;
            case QUOTACTL_CURSOROPEN:
                error = do_sys_quotactl_cursoropen(mp,
                                args->u.cursoropen.qc_cursor);
                break;
            case QUOTACTL_CURSORCLOSE:
                error = do_sys_quotactl_cursorclose(mp,
                                args->u.cursorclose.qc_cursor);
                break;
            case QUOTACTL_CURSORSKIPIDTYPE:
                error = do_sys_quotactl_cursorskipidtype(mp,
                                args->u.cursorskipidtype.qc_cursor,
                                args->u.cursorskipidtype.qc_idtype);
                break;
            case QUOTACTL_CURSORGET:
                error = do_sys_quotactl_cursorget(mp,
                                args->u.cursorget.qc_cursor,
                                args->u.cursorget.qc_keys,
                                args->u.cursorget.qc_vals,
                                args->u.cursorget.qc_maxnum,
                                args->u.cursorget.qc_ret);
                break;
            case QUOTACTL_CURSORATEND:
                error = do_sys_quotactl_cursoratend(mp,
                                args->u.cursoratend.qc_cursor,
                                args->u.cursoratend.qc_ret);
                break;
            case QUOTACTL_CURSORREWIND:
                error = do_sys_quotactl_cursorrewind(mp,
                                args->u.cursorrewind.qc_cursor);
                break;
            case QUOTACTL_QUOTAON:
                error = do_sys_quotactl_quotaon(mp,
                                args->u.quotaon.qc_idtype,
                                args->u.quotaon.qc_quotafile);
                break;
            case QUOTACTL_QUOTAOFF:
                error = do_sys_quotactl_quotaoff(mp,
                                args->u.quotaoff.qc_idtype);
                break;
            default:
                error = EINVAL;
                break;
        }

        vrele(vp);
        return error;
}

/* ARGSUSED */
int
sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(struct quotactl_args *) args;
        } */
        struct quotactl_args args;
        int error;

        error = copyin(SCARG(uap, args), &args, sizeof(args));
        if (error) {
                return error;
        }

        return do_sys_quotactl(SCARG(uap, path), &args);
}

int
dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
    int root)
{
        struct cwdinfo *cwdi = l->l_proc->p_cwdi;
        bool chrooted;
        int error = 0;

        KASSERT(l == curlwp);

        /*
         * This is safe unlocked.  cwdi_rdir never goes non-NULL -> NULL,
         * since it would imply chroots can be escaped.  Just make sure this
         * routine is self-consistent.
         */
        chrooted = (atomic_load_relaxed(&cwdi->cwdi_rdir) != NULL);

        /*
         * If MNT_NOWAIT or MNT_LAZY is specified, do not
         * refresh the fsstat cache. MNT_WAIT or MNT_LAZY
         * overrides MNT_NOWAIT.
         */
        if (flags == MNT_NOWAIT        || flags == MNT_LAZY ||
            (flags != MNT_WAIT && flags != 0)) {
                memcpy(sp, &mp->mnt_stat, sizeof(*sp));
        } else {
                /* Get the filesystem stats now */
                memset(sp, 0, sizeof(*sp));
                if ((error = VFS_STATVFS(mp, sp)) != 0)
                        return error;
                if (!chrooted)
                        (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
        }

        if (chrooted) {
                size_t len;
                char *bp;
                char c;
                char *path = PNBUF_GET();

                bp = path + MAXPATHLEN;
                *--bp = '\0';
                rw_enter(&cwdi->cwdi_lock, RW_READER);
                error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
                    MAXPATHLEN / 2, 0, l);
                rw_exit(&cwdi->cwdi_lock);
                if (error) {
                        PNBUF_PUT(path);
                        return error;
                }
                len = strlen(bp);
                if (len != 1) {
                        /*
                         * for mount points that are below our root, we can see
                         * them, so we fix up the pathname and return them. The
                         * rest we cannot see, so we don't allow viewing the
                         * data.
                         */
                        if (strncmp(bp, sp->f_mntonname, len) == 0 &&
                            ((c = sp->f_mntonname[len]) == '/' || c == '\0')) {
                                (void)strlcpy(sp->f_mntonname,
                                    c == '\0' ? "/" : &sp->f_mntonname[len],
                                    sizeof(sp->f_mntonname));
                        } else {
                                if (root)
                                        (void)strlcpy(sp->f_mntonname, "/",
                                            sizeof(sp->f_mntonname));
                                else
                                        error = EPERM;
                        }
                }
                PNBUF_PUT(path);
        }
        sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
        return error;
}

/*
 * Get filesystem statistics by path.
 */
int
do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb)
{
        struct mount *mp;
        int error;
        struct vnode *vp;

        error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp);
        if (error != 0)
                return error;
        mp = vp->v_mount;
        error = dostatvfs(mp, sb, l, flags, 1);
        vrele(vp);
        return error;
}

/* ARGSUSED */
int
sys___statvfs190(struct lwp *l, const struct sys___statvfs190_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(struct statvfs *) buf;
                syscallarg(int) flags;
        } */
        struct statvfs *sb;
        int error;

        sb = STATVFSBUF_GET();
        error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);
        if (error == 0)
                error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
        STATVFSBUF_PUT(sb);
        return error;
}

/*
 * Get filesystem statistics by fd.
 */
int
do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb)
{
        file_t *fp;
        struct mount *mp;
        int error;

        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(fd, &fp)) != 0)
                return (error);
        mp = fp->f_vnode->v_mount;
        error = dostatvfs(mp, sb, curlwp, flags, 1);
        fd_putfile(fd);
        return error;
}

/* ARGSUSED */
int
sys___fstatvfs190(struct lwp *l, const struct sys___fstatvfs190_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(struct statvfs *) buf;
                syscallarg(int) flags;
        } */
        struct statvfs *sb;
        int error;

        sb = STATVFSBUF_GET();
        error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);
        if (error == 0)
                error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
        STATVFSBUF_PUT(sb);
        return error;
}


/*
 * Get statistics on all filesystems.
 */
int
do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags,
    int (*copyfn)(const void *, void *, size_t), size_t entry_sz,
    register_t *retval)
{
        int root = 0;
        mount_iterator_t *iter;
        struct proc *p = l->l_proc;
        struct mount *mp;
        struct statvfs *sb;
        size_t count, maxcount;
        int error = 0;

        sb = STATVFSBUF_GET();
        maxcount = bufsize / entry_sz;
        count = 0;
        mountlist_iterator_init(&iter);
        while ((mp = mountlist_iterator_next(iter)) != NULL) {
                if (sfsp && count < maxcount) {
                        error = dostatvfs(mp, sb, l, flags, 0);
                        if (error) {
                                error = 0;
                                continue;
                        }
                        error = copyfn(sb, sfsp, entry_sz);
                        if (error)
                                goto out;
                        sfsp = (char *)sfsp + entry_sz;
                        root |= strcmp(sb->f_mntonname, "/") == 0;
                }
                count++;
        }

        if (root == 0 && p->p_cwdi->cwdi_rdir) {
                /*
                 * fake a root entry
                 */
                error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount,
                    sb, l, flags, 1);
                if (error != 0)
                        goto out;
                if (sfsp) {
                        error = copyfn(sb, sfsp, entry_sz);
                        if (error != 0)
                                goto out;
                }
                count++;
        }
        if (sfsp && count > maxcount)
                *retval = maxcount;
        else
                *retval = count;
out:
        mountlist_iterator_destroy(iter);
        STATVFSBUF_PUT(sb);
        return error;
}

int
sys___getvfsstat90(struct lwp *l, const struct sys___getvfsstat90_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(struct statvfs *) buf;
                syscallarg(size_t) bufsize;
                syscallarg(int) flags;
        } */

        return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
            SCARG(uap, flags), copyout, sizeof (struct statvfs), retval);
}

/*
 * Change current working directory to a given file descriptor.
 */
int
do_sys_fchdir(struct lwp *l, int fd, register_t *retval)
{
        struct proc *p = l->l_proc;
        struct cwdinfo *cwdi;
        struct vnode *vp, *tdp;
        struct mount *mp;
        file_t *fp;
        int error;

        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(fd, &fp)) != 0)
                return error;
        vp = fp->f_vnode;

        vref(vp);
        vn_lock(vp, LK_SHARED | LK_RETRY);
        if (vp->v_type != VDIR)
                error = ENOTDIR;
        else
                error = VOP_ACCESS(vp, VEXEC, l->l_cred);
        if (error) {
                vput(vp);
                goto out;
        }
        while ((mp = vp->v_mountedhere) != NULL) {
                error = vfs_busy(mp);
                vput(vp);
                if (error != 0)
                        goto out;
                error = VFS_ROOT(mp, LK_SHARED, &tdp);
                vfs_unbusy(mp);
                if (error)
                        goto out;
                vp = tdp;
        }
        VOP_UNLOCK(vp);

        /*
         * Disallow changing to a directory not under the process's
         * current root directory (if there is one).
         */
        cwdi = p->p_cwdi;
        rw_enter(&cwdi->cwdi_lock, RW_WRITER);
        if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
                vrele(vp);
                error = EPERM;        /* operation not permitted */
        } else {
                vrele(cwdi->cwdi_cdir);
                cwdi->cwdi_cdir = vp;
        }
        rw_exit(&cwdi->cwdi_lock);

out:
        fd_putfile(fd);
        return error;
}

/*
 * Change current working directory to a given file descriptor.
 */
/* ARGSUSED */
int
sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
        } */
        return do_sys_fchdir(l, SCARG(uap, fd), retval);
}

/*
 * Change this process's notion of the root directory to a given file
 * descriptor.
 */
int
sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
{
        struct vnode        *vp;
        file_t        *fp;
        int                 error, fd = SCARG(uap, fd);

        if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
             KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0)
                return error;
        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(fd, &fp)) != 0)
                return error;
        vp = fp->f_vnode;
        vn_lock(vp, LK_SHARED | LK_RETRY);
        if (vp->v_type != VDIR)
                error = ENOTDIR;
        else
                error = VOP_ACCESS(vp, VEXEC, l->l_cred);
        VOP_UNLOCK(vp);
        if (error)
                goto out;
        vref(vp);
        change_root(vp);

 out:
        fd_putfile(fd);
        return (error);
}

/*
 * Change current working directory (``.'').
 */
int
do_sys_chdir(struct lwp *l, const char *path, enum uio_seg seg,
    register_t *retval)
{
        struct proc *p = l->l_proc;
        struct cwdinfo * cwdi;
        int error;
        struct vnode *vp;

        if ((error = chdir_lookup(path, seg, &vp, l)) != 0)
                return error;
        cwdi = p->p_cwdi;
        rw_enter(&cwdi->cwdi_lock, RW_WRITER);
        vrele(cwdi->cwdi_cdir);
        cwdi->cwdi_cdir = vp;
        rw_exit(&cwdi->cwdi_lock);
        return 0;
}

/*
 * Change current working directory (``.'').
 */
/* ARGSUSED */
int
sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
        } */
        return do_sys_chdir(l, SCARG(uap, path), UIO_USERSPACE, retval);
}

/*
 * Change notion of root (``/'') directory.
 */
/* ARGSUSED */
int
sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
        } */
        int error;
        struct vnode *vp;

        if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
            KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
                return (error);

        error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l);
        if (error == 0)
                change_root(vp);
        return error;
}

/*
 * Common routine for chroot and fchroot.
 * NB: callers need to properly authorize the change root operation.
 */
void
change_root(struct vnode *vp)
{
        kauth_cred_t ncred;
        struct lwp *l = curlwp;
        struct proc *p = l->l_proc;
        struct cwdinfo *cwdi = p->p_cwdi;

        ncred = kauth_cred_alloc();

        rw_enter(&cwdi->cwdi_lock, RW_WRITER);
        if (cwdi->cwdi_rdir != NULL)
                vrele(cwdi->cwdi_rdir);
        cwdi->cwdi_rdir = vp;

        /*
         * Prevent escaping from chroot by putting the root under
         * the working directory.  Silently chdir to / if we aren't
         * already there.
         */
        if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) {
                /*
                 * XXX would be more failsafe to change directory to a
                 * deadfs node here instead
                 */
                vrele(cwdi->cwdi_cdir);
                vref(vp);
                cwdi->cwdi_cdir = vp;
        }
        rw_exit(&cwdi->cwdi_lock);

        /* Get a write lock on the process credential. */
        proc_crmod_enter();

        kauth_cred_clone(p->p_cred, ncred);
        kauth_proc_chroot(ncred, p->p_cwdi);

        /* Broadcast our credentials to the process and other LWPs. */
         proc_crmod_leave(ncred, p->p_cred, true);
}

/*
 * Common routine for chroot and chdir.
 * XXX "where" should be enum uio_seg
 */
int
chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l)
{
        struct pathbuf *pb;
        struct nameidata nd;
        int error;

        error = pathbuf_maybe_copyin(path, where, &pb);
        if (error) {
                return error;
        }
        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb);
        if ((error = namei(&nd)) != 0) {
                pathbuf_destroy(pb);
                return error;
        }
        *vpp = nd.ni_vp;
        pathbuf_destroy(pb);

        if ((*vpp)->v_type != VDIR)
                error = ENOTDIR;
        else
                error = VOP_ACCESS(*vpp, VEXEC, l->l_cred);

        if (error)
                vput(*vpp);
        else
                VOP_UNLOCK(*vpp);
        return (error);
}

/*
 * Internals of sys_open - path has already been converted into a pathbuf
 * (so we can easily reuse this function from other parts of the kernel,
 * like posix_spawn post-processing).
 */
int
do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 
        int open_mode, int *fd)
{
        struct proc *p = l->l_proc;
        struct cwdinfo *cwdi = p->p_cwdi;
        file_t *fp;
        struct vnode *vp;
        int dupfd;
        bool dupfd_move;
        int flags, cmode;
        int indx, error;

        if (open_flags & O_SEARCH) {
                open_flags &= ~(int)O_SEARCH;
        }

        /*
         * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
         * may be specified.
         */     
        if ((open_flags & O_EXEC) && (open_flags & O_ACCMODE))
                return EINVAL;

        flags = FFLAGS(open_flags);
        if ((flags & (FREAD | FWRITE)) == 0)
                return EINVAL;

        if ((error = fd_allocfile(&fp, &indx)) != 0) {
                return error;
        }

        /* We're going to read cwdi->cwdi_cmask unlocked here. */
        cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT;
        
        error = vn_open(dvp, pb, TRYEMULROOT, flags, cmode,
            &vp, &dupfd_move, &dupfd);
        if (error != 0) {
                fd_abort(p, fp, indx);
                if (error == ERESTART)
                        error = EINTR;
                return error;
        }

        if (vp == NULL) {
                fd_abort(p, fp, indx);
                error = fd_dupopen(dupfd, dupfd_move, flags, &indx);
                if (error)
                        return error;
                *fd = indx;
        } else {
                error = open_setfp(l, fp, vp, indx, flags);
                if (error)
                        return error;
                VOP_UNLOCK(vp);
                *fd = indx;
                fd_affix(p, fp, indx);
        }

        return 0;
}

int
fd_open(const char *path, int open_flags, int open_mode, int *fd)
{
        struct pathbuf *pb;
        int error, oflags;

        oflags = FFLAGS(open_flags);
        if ((oflags & (FREAD | FWRITE)) == 0)
                return EINVAL;

        pb = pathbuf_create(path);
        if (pb == NULL)
                return ENOMEM;

        error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd);
        pathbuf_destroy(pb);

        return error;
}

static int
do_sys_openat(lwp_t *l, int fdat, const char *path, int flags,
    int mode, int *fd)
{
        file_t *dfp = NULL;
        struct vnode *dvp = NULL;
        struct pathbuf *pb;
        const char *pathstring = NULL;
        int error;

        if (path == NULL) {
                MODULE_HOOK_CALL(vfs_openat_10_hook, (&pb), enosys(), error);
                if (error == ENOSYS)
                        goto no_compat;
                if (error)
                        return error;
        } else {
no_compat:
                error = pathbuf_copyin(path, &pb);
                if (error)
                        return error;
        }

        pathstring = pathbuf_stringcopy_get(pb);

        /* 
         * fdat is ignored if:
         * 1) if fdat is AT_FDCWD, which means use current directory as base.
         * 2) if path is absolute, then fdat is useless.
         */
        if (fdat != AT_FDCWD && pathstring[0] != '/') {
                /* fd_getvnode() will use the descriptor for us */
                if ((error = fd_getvnode(fdat, &dfp)) != 0)
                        goto out;

                dvp = dfp->f_vnode;
        }

        error = do_open(l, dvp, pb, flags, mode, fd);

        if (dfp != NULL)
                fd_putfile(fdat);
out:
        pathbuf_stringcopy_put(pb, pathstring);
        pathbuf_destroy(pb);
        return error;
}

int
sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) flags;
                syscallarg(int) mode;
        } */
        int error;
        int fd;

        error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path),
                              SCARG(uap, flags), SCARG(uap, mode), &fd);

        if (error == 0)
                *retval = fd;

        return error;
}

int
sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const char *) path;
                syscallarg(int) oflags;
                syscallarg(int) mode;
        } */
        int error;
        int fd;

        error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path),
                              SCARG(uap, oflags), SCARG(uap, mode), &fd);

        if (error == 0)
                *retval = fd;

        return error;
}

static void
vfs__fhfree(fhandle_t *fhp)
{
        size_t fhsize;

        fhsize = FHANDLE_SIZE(fhp);
        kmem_free(fhp, fhsize);
}

/*
 * vfs_composefh: compose a filehandle.
 */

int
vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size)
{
        struct mount *mp;
        struct fid *fidp;
        int error;
        size_t needfhsize;
        size_t fidsize;

        mp = vp->v_mount;
        fidp = NULL;
        if (*fh_size < FHANDLE_SIZE_MIN) {
                fidsize = 0;
        } else {
                fidsize = *fh_size - offsetof(fhandle_t, fh_fid);
                if (fhp != NULL) {
                        memset(fhp, 0, *fh_size);
                        fhp->fh_fsid = mp->mnt_stat.f_fsidx;
                        fidp = &fhp->fh_fid;
                }
        }
        error = VFS_VPTOFH(vp, fidp, &fidsize);
        needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
        if (error == 0 && *fh_size < needfhsize) {
                error = E2BIG;
        }
        *fh_size = needfhsize;
        return error;
}

int
vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp)
{
        struct mount *mp;
        fhandle_t *fhp;
        size_t fhsize;
        size_t fidsize;
        int error;

        mp = vp->v_mount;
        fidsize = 0;
        error = VFS_VPTOFH(vp, NULL, &fidsize);
        KASSERT(error != 0);
        if (error != E2BIG) {
                goto out;
        }
        fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize);
        fhp = kmem_zalloc(fhsize, KM_SLEEP);
        fhp->fh_fsid = mp->mnt_stat.f_fsidx;
        error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize);
        if (error == 0) {
                KASSERT((FHANDLE_SIZE(fhp) == fhsize &&
                    FHANDLE_FILEID(fhp)->fid_len == fidsize));
                *fhpp = fhp;
        } else {
                kmem_free(fhp, fhsize);
        }
out:
        return error;
}

void
vfs_composefh_free(fhandle_t *fhp)
{

        vfs__fhfree(fhp);
}

/*
 * vfs_fhtovp: lookup a vnode by a filehandle.
 */

int
vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp)
{
        struct mount *mp;
        int error;

        *vpp = NULL;
        mp = vfs_getvfs(FHANDLE_FSID(fhp));
        if (mp == NULL) {
                error = ESTALE;
                goto out;
        }
        if (mp->mnt_op->vfs_fhtovp == NULL) {
                error = EOPNOTSUPP;
                goto out;
        }
        error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), LK_EXCLUSIVE, vpp);
out:
        return error;
}

/*
 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given
 * the needed size.
 */

int
vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp)
{
        fhandle_t *fhp;
        int error;

        if (fhsize > FHANDLE_SIZE_MAX) {
                return EINVAL;
        }
        if (fhsize < FHANDLE_SIZE_MIN) {
                return EINVAL;
        }
again:
        fhp = kmem_alloc(fhsize, KM_SLEEP);
        error = copyin(ufhp, fhp, fhsize);
        if (error == 0) {
                /* XXX this check shouldn't be here */
                if (FHANDLE_SIZE(fhp) == fhsize) {
                        *fhpp = fhp;
                        return 0;
                } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) {
                        /*
                         * a kludge for nfsv2 padded handles.
                         */
                        size_t sz;

                        sz = FHANDLE_SIZE(fhp);
                        kmem_free(fhp, fhsize);
                        fhsize = sz;
                        goto again;
                } else {
                        /*
                         * userland told us wrong size.
                         */
                            error = EINVAL;
                }
        }
        kmem_free(fhp, fhsize);
        return error;
}

void
vfs_copyinfh_free(fhandle_t *fhp)
{

        vfs__fhfree(fhp);
}

/*
 * Get file handle system call
 */
int
sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval)
{
        /* {
                syscallarg(char *) fname;
                syscallarg(fhandle_t *) fhp;
                syscallarg(size_t *) fh_size;
        } */
        struct vnode *vp;
        fhandle_t *fh;
        int error;
        struct pathbuf *pb;
        struct nameidata nd;
        size_t sz;
        size_t usz;

        /*
         * Must be super user
         */
        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
            0, NULL, NULL, NULL);
        if (error)
                return (error);

        error = pathbuf_copyin(SCARG(uap, fname), &pb);
        if (error) {
                return error;
        }
        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
        error = namei(&nd);
        if (error) {
                pathbuf_destroy(pb);
                return error;
        }
        vp = nd.ni_vp;
        pathbuf_destroy(pb);

        error = vfs_composefh_alloc(vp, &fh);
        vput(vp);
        if (error != 0) {
                return error;
        }
        error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t));
        if (error != 0) {
                goto out;
        }
        sz = FHANDLE_SIZE(fh);
        error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t));
        if (error != 0) {
                goto out;
        }
        if (usz >= sz) {
                error = copyout(fh, SCARG(uap, fhp), sz);
        } else {
                error = E2BIG;
        }
out:
        vfs_composefh_free(fh);
        return (error);
}

/*
 * Open a file given a file handle.
 *
 * Check permissions, allocate an open file structure,
 * and call the device open routine if any.
 */

int
dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags,
    register_t *retval)
{
        file_t *fp;
        struct vnode *vp = NULL;
        kauth_cred_t cred = l->l_cred;
        file_t *nfp;
        int indx, error;
        struct vattr va;
        fhandle_t *fh;
        int flags;
        proc_t *p;

        p = curproc;

        /*
         * Must be super user
         */
        if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
            0, NULL, NULL, NULL)))
                return (error);

        if (oflags & O_SEARCH) {
                oflags &= ~(int)O_SEARCH;
        }

        flags = FFLAGS(oflags);
        if ((flags & (FREAD | FWRITE)) == 0)
                return (EINVAL);
        if ((flags & O_CREAT))
                return (EINVAL);
        if ((error = fd_allocfile(&nfp, &indx)) != 0)
                return (error);
        fp = nfp;
        error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
        if (error != 0) {
                goto bad;
        }
        error = vfs_fhtovp(fh, &vp);
        vfs_copyinfh_free(fh);
        if (error != 0) {
                goto bad;
        }

        /* Now do an effective vn_open */

        if (vp->v_type == VSOCK) {
                error = EOPNOTSUPP;
                goto bad;
        }
        error = vn_openchk(vp, cred, flags);
        if (error != 0)
                goto bad;
        if (flags & O_TRUNC) {
                VOP_UNLOCK(vp);                        /* XXX */
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);   /* XXX */
                vattr_null(&va);
                va.va_size = 0;
                error = VOP_SETATTR(vp, &va, cred);
                if (error)
                        goto bad;
        }
        if ((error = VOP_OPEN(vp, flags, cred)) != 0)
                goto bad;
        if (flags & FWRITE) {
                mutex_enter(vp->v_interlock);
                vp->v_writecount++;
                mutex_exit(vp->v_interlock);
        }

        /* done with modified vn_open, now finish what sys_open does. */
        if ((error = open_setfp(l, fp, vp, indx, flags)))
                return error;

        VOP_UNLOCK(vp);
        *retval = indx;
        fd_affix(p, fp, indx);
        return (0);

bad:
        fd_abort(p, fp, indx);
        if (vp != NULL)
                vput(vp);
        if (error == EDUPFD || error == EMOVEFD) {
                /* XXX should probably close curlwp->l_dupfd */
                error = EOPNOTSUPP;
        }
        return (error);
}

int
sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval)
{
        /* {
                syscallarg(const void *) fhp;
                syscallarg(size_t) fh_size;
                syscallarg(int) flags;
        } */

        return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size),
            SCARG(uap, flags), retval);
}

int
do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb)
{
        int error;
        fhandle_t *fh;
        struct vnode *vp;

        /*
         * Must be super user
         */
        if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
            0, NULL, NULL, NULL)))
                return (error);

        error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
        if (error != 0)
                return error;

        error = vfs_fhtovp(fh, &vp);
        vfs_copyinfh_free(fh);
        if (error != 0)
                return error;

        error = vn_stat(vp, sb);
        vput(vp);
        return error;
}


/* ARGSUSED */
int
sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval)
{
        /* {
                syscallarg(const void *) fhp;
                syscallarg(size_t) fh_size;
                syscallarg(struct stat *) sb;
        } */
        struct stat sb;
        int error;

        error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
        if (error)
                return error;
        return copyout(&sb, SCARG(uap, sb), sizeof(sb));
}

int
do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb,
    int flags)
{
        fhandle_t *fh;
        struct mount *mp;
        struct vnode *vp;
        int error;

        /*
         * Must be super user
         */
        if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE,
            0, NULL, NULL, NULL)))
                return error;

        error = vfs_copyinfh_alloc(ufhp, fhsize, &fh);
        if (error != 0)
                return error;

        error = vfs_fhtovp(fh, &vp);
        vfs_copyinfh_free(fh);
        if (error != 0)
                return error;

        mp = vp->v_mount;
        error = dostatvfs(mp, sb, l, flags, 1);
        vput(vp);
        return error;
}

/* ARGSUSED */
int
sys___fhstatvfs190(struct lwp *l, const struct sys___fhstatvfs190_args *uap, register_t *retval)
{
        /* {
                syscallarg(const void *) fhp;
                syscallarg(size_t) fh_size;
                syscallarg(struct statvfs *) buf;
                syscallarg(int)        flags;
        } */
        struct statvfs *sb = STATVFSBUF_GET();
        int error;

        error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb,
            SCARG(uap, flags));
        if (error == 0)
                error = copyout(sb, SCARG(uap, buf), sizeof(*sb));
        STATVFSBUF_PUT(sb);
        return error;
}

int
do_posix_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode,
    dev_t dev)
{

        /*
         * The POSIX mknod(2) call is an alias for mkfifo(2) for S_IFIFO
         * in mode and dev=0.
         *
         * In all the other cases it's implementation defined behavior.
         */

        if ((mode & S_IFIFO) && dev == 0)
                return do_sys_mkfifoat(l, fdat, pathname, mode);
        else
                return do_sys_mknodat(l, fdat, pathname, mode, dev,
                    UIO_USERSPACE);
}

/*
 * Create a special file.
 */
/* ARGSUSED */
int
sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(mode_t) mode;
                syscallarg(dev_t) dev;
        } */
        return do_posix_mknodat(l, AT_FDCWD, SCARG(uap, path),
            SCARG(uap, mode), SCARG(uap, dev));
}

int
sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const char *) path;
                syscallarg(mode_t) mode;
                syscallarg(int) pad;
                syscallarg(dev_t) dev;
        } */

        return do_posix_mknodat(l, SCARG(uap, fd), SCARG(uap, path),
            SCARG(uap, mode), SCARG(uap, dev));
}

int
do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev,
    enum uio_seg seg)
{
        return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, seg);
}

int
do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode,
    dev_t dev, enum uio_seg seg)
{
        struct proc *p = l->l_proc;
        struct vnode *vp;
        struct vattr vattr;
        int error, optype;
        struct pathbuf *pb;
        struct nameidata nd;
        const char *pathstring;

        if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD,
            0, NULL, NULL, NULL)) != 0)
                return (error);

        optype = VOP_MKNOD_DESCOFFSET;

        error = pathbuf_maybe_copyin(pathname, seg, &pb);
        if (error) {
                return error;
        }
        pathstring = pathbuf_stringcopy_get(pb);
        if (pathstring == NULL) {
                pathbuf_destroy(pb);
                return ENOMEM;
        }

        NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);

        if ((error = fd_nameiat(l, fdat, &nd)) != 0)
                goto out;
        vp = nd.ni_vp;

        if (vp != NULL)
                error = EEXIST;
        else {
                vattr_null(&vattr);
                /* We will read cwdi->cwdi_cmask unlocked. */
                vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
                vattr.va_rdev = dev;

                switch (mode & S_IFMT) {
                case S_IFMT:        /* used by badsect to flag bad sectors */
                        vattr.va_type = VBAD;
                        break;
                case S_IFCHR:
                        vattr.va_type = VCHR;
                        break;
                case S_IFBLK:
                        vattr.va_type = VBLK;
                        break;
                case S_IFWHT:
                        optype = VOP_WHITEOUT_DESCOFFSET;
                        break;
                case S_IFREG:
#if NVERIEXEC > 0
                        error = veriexec_openchk(l, nd.ni_vp, pathstring,
                            O_CREAT);
#endif /* NVERIEXEC > 0 */
                        vattr.va_type = VREG;
                        vattr.va_rdev = VNOVAL;
                        optype = VOP_CREATE_DESCOFFSET;
                        break;
                default:
                        error = EINVAL;
                        break;
                }

                if (error == 0 && optype == VOP_MKNOD_DESCOFFSET &&
                    vattr.va_rdev == VNOVAL)
                        error = EINVAL;
        }

        if (!error) {
                switch (optype) {
                case VOP_WHITEOUT_DESCOFFSET:
                        error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
                        if (error)
                                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                        vput(nd.ni_dvp);
                        break;

                case VOP_MKNOD_DESCOFFSET:
                        error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
                                                &nd.ni_cnd, &vattr);
                        if (error == 0)
                                vrele(nd.ni_vp);
                        vput(nd.ni_dvp);
                        break;

                case VOP_CREATE_DESCOFFSET:
                        error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
                                                &nd.ni_cnd, &vattr);
                        if (error == 0)
                                vrele(nd.ni_vp);
                        vput(nd.ni_dvp);
                        break;
                }
        } else {
                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                if (nd.ni_dvp == vp)
                        vrele(nd.ni_dvp);
                else
                        vput(nd.ni_dvp);
                if (vp)
                        vrele(vp);
        }
out:
        pathbuf_stringcopy_put(pb, pathstring);
        pathbuf_destroy(pb);
        return (error);
}

/*
 * Create a named pipe.
 */
/* ARGSUSED */
int
sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) mode;
        } */
        return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode));
}

int
sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 
    register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const char *) path;
                syscallarg(int) mode;
        } */

        return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 
            SCARG(uap, mode));
}

static int
do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode)
{
        struct proc *p = l->l_proc;
        struct vattr vattr;
        int error;
        struct pathbuf *pb;
        struct nameidata nd;

        error = pathbuf_copyin(path, &pb);
        if (error) {
                return error;
        }
        NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb);

        if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
                pathbuf_destroy(pb);
                return error;
        }
        if (nd.ni_vp != NULL) {
                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                if (nd.ni_dvp == nd.ni_vp)
                        vrele(nd.ni_dvp);
                else
                        vput(nd.ni_dvp);
                vrele(nd.ni_vp);
                pathbuf_destroy(pb);
                return (EEXIST);
        }
        vattr_null(&vattr);
        vattr.va_type = VFIFO;
        /* We will read cwdi->cwdi_cmask unlocked. */
        vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask;
        error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
        if (error == 0)
                vrele(nd.ni_vp);
        vput(nd.ni_dvp);
        pathbuf_destroy(pb);
        return (error);
}

/*
 * Make a hard file link.
 */
/* ARGSUSED */
int
do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink,
    const char *link, int follow, register_t *retval) 
{
        struct vnode *vp;
        struct pathbuf *linkpb;
        struct nameidata nd;
        namei_simple_flags_t ns_flags;
        int error;

        if (follow & AT_SYMLINK_FOLLOW)
                ns_flags = NSM_FOLLOW_TRYEMULROOT;
        else
                ns_flags = NSM_NOFOLLOW_TRYEMULROOT;

        error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp);
        if (error != 0)
                return (error);
        error = pathbuf_copyin(link, &linkpb);
        if (error) {
                goto out1;
        }
        NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
        if ((error = fd_nameiat(l, fdlink, &nd)) != 0)
                goto out2;
        if (nd.ni_vp) {
                error = EEXIST;
                goto abortop;
        }
        /* Prevent hard links on directories. */
        if (vp->v_type == VDIR) {
                error = EPERM;
                goto abortop;
        }
        /* Prevent cross-mount operation. */
        if (nd.ni_dvp->v_mount != vp->v_mount) {
                error = EXDEV;
                goto abortop;
        }
        error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
        VOP_UNLOCK(nd.ni_dvp);
        vrele(nd.ni_dvp);
out2:
        pathbuf_destroy(linkpb);
out1:
        vrele(vp);
        return (error);
abortop:
        VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
        if (nd.ni_dvp == nd.ni_vp)
                vrele(nd.ni_dvp);
        else
                vput(nd.ni_dvp);
        if (nd.ni_vp != NULL)
                vrele(nd.ni_vp);
        goto out2;
}

int
sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(const char *) link;
        } */
        const char *path = SCARG(uap, path);
        const char *link = SCARG(uap, link);

        return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link,
            AT_SYMLINK_FOLLOW, retval);
}

int
sys_linkat(struct lwp *l, const struct sys_linkat_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) fd1;
                syscallarg(const char *) name1;
                syscallarg(int) fd2;
                syscallarg(const char *) name2;
                syscallarg(int) flags;
        } */
        int fd1 = SCARG(uap, fd1);
        const char *name1 = SCARG(uap, name1);
        int fd2 = SCARG(uap, fd2);
        const char *name2 = SCARG(uap, name2);
        int follow;

        follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW;

        return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval);
}


int
do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg)
{
        return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg);
}

static int
do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat,
    const char *link, enum uio_seg seg)
{
        struct proc *p = curproc;
        struct vattr vattr;
        char *path;
        int error;
        size_t len;
        struct pathbuf *linkpb;
        struct nameidata nd;

        KASSERT(l != NULL || fdat == AT_FDCWD);

        path = PNBUF_GET();
        if (seg == UIO_USERSPACE) {
                if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0)
                        goto out1;
                if ((error = pathbuf_copyin(link, &linkpb)) != 0)
                        goto out1;
        } else {
                len = strlen(patharg) + 1;
                KASSERT(len <= MAXPATHLEN);
                memcpy(path, patharg, len);
                linkpb = pathbuf_create(link);
                if (linkpb == NULL) {
                        error = ENOMEM;
                        goto out1;
                }
        }
        ktrkuser("symlink-target", path, len - 1);

        NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb);
        if ((error = fd_nameiat(l, fdat, &nd)) != 0)
                goto out2;
        if (nd.ni_vp) {
                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                if (nd.ni_dvp == nd.ni_vp)
                        vrele(nd.ni_dvp);
                else
                        vput(nd.ni_dvp);
                vrele(nd.ni_vp);
                error = EEXIST;
                goto out2;
        }
        vattr_null(&vattr);
        vattr.va_type = VLNK;
        /* We will read cwdi->cwdi_cmask unlocked. */
        vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask;
        error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
        if (error == 0)
                vrele(nd.ni_vp);
        vput(nd.ni_dvp);
out2:
        pathbuf_destroy(linkpb);
out1:
        PNBUF_PUT(path);
        return (error);
}

/*
 * Make a symbolic link.
 */
/* ARGSUSED */
int
sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(const char *) link;
        } */

        return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link),
            UIO_USERSPACE);
}

int
sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 
    register_t *retval)
{
        /* {
                syscallarg(const char *) path1;
                syscallarg(int) fd;
                syscallarg(const char *) path2;
        } */

        return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd),
            SCARG(uap, path2), UIO_USERSPACE);
}

/*
 * Delete a whiteout from the filesystem.
 */
/* ARGSUSED */
int
sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
        } */
        int error;
        struct pathbuf *pb;
        struct nameidata nd;

        error = pathbuf_copyin(SCARG(uap, path), &pb);
        if (error) {
                return error;
        }

        NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb);
        error = namei(&nd);
        if (error) {
                pathbuf_destroy(pb);
                return (error);
        }

        if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                if (nd.ni_dvp == nd.ni_vp)
                        vrele(nd.ni_dvp);
                else
                        vput(nd.ni_dvp);
                if (nd.ni_vp)
                        vrele(nd.ni_vp);
                pathbuf_destroy(pb);
                return (EEXIST);
        }
        if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0)
                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
        vput(nd.ni_dvp);
        pathbuf_destroy(pb);
        return (error);
}

/*
 * Delete a name from the filesystem.
 */
/* ARGSUSED */
int
sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
        } */

        return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE);
}

int
sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const char *) path;
                syscallarg(int) flag;
        } */

        return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path),
            SCARG(uap, flag), UIO_USERSPACE);
}

int
do_sys_unlink(const char *arg, enum uio_seg seg)
{
        return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg);
}

static int
do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags,
    enum uio_seg seg)
{
        struct vnode *vp;
        int error;
        struct pathbuf *pb;
        struct nameidata nd;
        const char *pathstring;

        KASSERT(l != NULL || fdat == AT_FDCWD);

        error = pathbuf_maybe_copyin(arg, seg, &pb);
        if (error) {
                return error;
        }
        pathstring = pathbuf_stringcopy_get(pb);
        if (pathstring == NULL) {
                pathbuf_destroy(pb);
                return ENOMEM;
        }

        NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb);
        if ((error = fd_nameiat(l, fdat, &nd)) != 0)
                goto out;
        vp = nd.ni_vp;

        /*
         * The root of a mounted filesystem cannot be deleted.
         */
        if ((vp->v_vflag & VV_ROOT) != 0) {
                error = EBUSY;
                goto abort;
        }

        if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) {
                error = EBUSY;
                goto abort;
        }

        /*
         * No rmdir "." please.
         */
        if (nd.ni_dvp == vp) {
                error = EINVAL;
                goto abort;
        }

        /*
         * AT_REMOVEDIR is required to remove a directory
         */
        if (vp->v_type == VDIR) {
                if (!(flags & AT_REMOVEDIR)) {
                        error = EPERM;
                        goto abort;
                } else {
                        error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
                        vput(nd.ni_dvp);
                        goto out;
                }
        }

        /*
         * Starting here we only deal with non directories.
         */
        if (flags & AT_REMOVEDIR) {
                error = ENOTDIR;
                goto abort;
        }

#if NVERIEXEC > 0
        /* Handle remove requests for veriexec entries. */
        if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) {
                goto abort;
        }
#endif /* NVERIEXEC > 0 */
        
#ifdef FILEASSOC
        (void)fileassoc_file_delete(vp);
#endif /* FILEASSOC */
        error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
        vput(nd.ni_dvp);
        goto out;

abort:
        VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
        if (nd.ni_dvp == vp)
                vrele(nd.ni_dvp);
        else
                vput(nd.ni_dvp);
        vput(vp);

out:
        pathbuf_stringcopy_put(pb, pathstring);
        pathbuf_destroy(pb);
        return (error);
}

/*
 * Reposition read/write file offset.
 */
int
sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(int) pad;
                syscallarg(off_t) offset;
                syscallarg(int) whence;
        } */
        file_t *fp;
        int error, fd;

        switch (SCARG(uap, whence)) {
        case SEEK_CUR:
        case SEEK_END:
        case SEEK_SET:
                break;
        default:
                return EINVAL;
        }

        fd = SCARG(uap, fd);

        if ((fp = fd_getfile(fd)) == NULL)
                return (EBADF);

        if (fp->f_ops->fo_seek == NULL) {
                error = ESPIPE;
                goto out;
        }

        error = (*fp->f_ops->fo_seek)(fp, SCARG(uap, offset),
            SCARG(uap, whence), (off_t *)retval, FOF_UPDATE_OFFSET);
 out:
         fd_putfile(fd);
        return (error);
}

/*
 * Positional read system call.
 */
int
sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(void *) buf;
                syscallarg(size_t) nbyte;
                syscallarg(off_t) offset;
        } */
        file_t *fp;
        off_t offset;
        int error, fd = SCARG(uap, fd);

        if ((fp = fd_getfile(fd)) == NULL)
                return (EBADF);

        if ((fp->f_flag & FREAD) == 0) {
                fd_putfile(fd);
                return (EBADF);
        }

        if (fp->f_ops->fo_seek == NULL) {
                error = ESPIPE;
                goto out;
        }

        offset = SCARG(uap, offset);
        error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0);
        if (error)
                goto out;

        /* dofileread() will unuse the descriptor for us */
        return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
            &offset, 0, retval));

 out:
        fd_putfile(fd);
        return (error);
}

/*
 * Positional scatter read system call.
 */
int
sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const struct iovec *) iovp;
                syscallarg(int) iovcnt;
                syscallarg(off_t) offset;
        } */
        off_t offset = SCARG(uap, offset);

        return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
            SCARG(uap, iovcnt), &offset, 0, retval);
}

/*
 * Positional write system call.
 */
int
sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const void *) buf;
                syscallarg(size_t) nbyte;
                syscallarg(off_t) offset;
        } */
        file_t *fp;
        off_t offset;
        int error, fd = SCARG(uap, fd);

        if ((fp = fd_getfile(fd)) == NULL)
                return (EBADF);

        if ((fp->f_flag & FWRITE) == 0) {
                fd_putfile(fd);
                return (EBADF);
        }

        if (fp->f_ops->fo_seek == NULL) {
                error = ESPIPE;
                goto out;
        }

        offset = SCARG(uap, offset);
        error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0);
        if (error)
                goto out;

        /* dofilewrite() will unuse the descriptor for us */
        return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
            &offset, 0, retval));

 out:
        fd_putfile(fd);
        return (error);
}

/*
 * Positional gather write system call.
 */
int
sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const struct iovec *) iovp;
                syscallarg(int) iovcnt;
                syscallarg(off_t) offset;
        } */
        off_t offset = SCARG(uap, offset);

        return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
            SCARG(uap, iovcnt), &offset, 0, retval);
}

/*
 * Check access permissions.
 */
int
sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) flags;
        } */

        return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path),
             SCARG(uap, flags), 0);
}

int
do_sys_accessat(struct lwp *l, int fdat, const char *path,
    int mode, int flags)
{
        kauth_cred_t cred;
        struct vnode *vp;
        int error, nd_flag, vmode;
        struct pathbuf *pb;
        struct nameidata nd;

        CTASSERT(F_OK == 0);
        if ((mode & ~(R_OK | W_OK | X_OK)) != 0) {
                /* nonsense mode */
                return EINVAL;
        }

        nd_flag = FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT;
        if (flags & AT_SYMLINK_NOFOLLOW)
                nd_flag &= ~FOLLOW;

        error = pathbuf_copyin(path, &pb);
        if (error) 
                return error;

        NDINIT(&nd, LOOKUP, nd_flag, pb);

        /* Override default credentials */
        cred = kauth_cred_dup(l->l_cred);
        if (!(flags & AT_EACCESS)) {
                kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred));
                kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred));
        }
        nd.ni_cnd.cn_cred = cred;

        if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
                pathbuf_destroy(pb);
                goto out;
        }
        vp = nd.ni_vp;
        pathbuf_destroy(pb);

        /* Flags == 0 means only check for existence. */
        if (mode) {
                vmode = 0;
                if (mode & R_OK)
                        vmode |= VREAD;
                if (mode & W_OK)
                        vmode |= VWRITE;
                if (mode & X_OK)
                        vmode |= VEXEC;

                error = VOP_ACCESS(vp, vmode, cred);
                if (!error && (vmode & VWRITE))
                        error = vn_writechk(vp);
        }
        vput(vp);
out:
        kauth_cred_free(cred);
        return (error);
}

int
sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const char *) path;
                syscallarg(int) amode;
                syscallarg(int) flag;
        } */

        return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path),
             SCARG(uap, amode), SCARG(uap, flag));
}

/*
 * Common code for all sys_stat functions, including compat versions.
 */
int
do_sys_stat(const char *userpath, unsigned int nd_flag,
    struct stat *sb)
{
        return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb);
}

int
do_sys_statat(struct lwp *l, int fdat, const char *userpath,
    unsigned int nd_flag, struct stat *sb) 
{
        int error;
        struct pathbuf *pb;
        struct nameidata nd;

        KASSERT(l != NULL || fdat == AT_FDCWD);

        error = pathbuf_copyin(userpath, &pb);
        if (error) {
                return error;
        }

        NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb);

        error = fd_nameiat(l, fdat, &nd);
        if (error != 0) {
                pathbuf_destroy(pb);
                return error;
        }
        error = vn_stat(nd.ni_vp, sb);
        vput(nd.ni_vp);
        pathbuf_destroy(pb);
        return error;
}

/*
 * Get file status; this version follows links.
 */
/* ARGSUSED */
int
sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(struct stat *) ub;
        } */
        struct stat sb;
        int error;

        error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb);
        if (error)
                return error;
        return copyout(&sb, SCARG(uap, ub), sizeof(sb));
}

/*
 * Get file status; this version does not follow links.
 */
/* ARGSUSED */
int
sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(struct stat *) ub;
        } */
        struct stat sb;
        int error;

        error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb);
        if (error)
                return error;
        return copyout(&sb, SCARG(uap, ub), sizeof(sb));
}

int
sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const char *) path;
                syscallarg(struct stat *) buf;
                syscallarg(int) flag;
        } */
        unsigned int nd_flag;
        struct stat sb;
        int error;

        if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW)
                nd_flag = NOFOLLOW;
        else
                nd_flag = FOLLOW;

        error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 
            &sb);
        if (error)
                return error;
        return copyout(&sb, SCARG(uap, buf), sizeof(sb));
}

static int
kern_pathconf(register_t *retval, const char *path, int name, int flag)
{
        int error;
        struct pathbuf *pb;
        struct nameidata nd;

        error = pathbuf_copyin(path, &pb);
        if (error) {
                return error;
        }
        NDINIT(&nd, LOOKUP, flag | LOCKLEAF | TRYEMULROOT, pb);
        if ((error = namei(&nd)) != 0) {
                pathbuf_destroy(pb);
                return error;
        }
        error = VOP_PATHCONF(nd.ni_vp, name, retval);
        vput(nd.ni_vp);
        pathbuf_destroy(pb);
        return error;
}

/*
 * Get configurable pathname variables.
 */
/* ARGSUSED */
int
sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) name;
        } */
        return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 
            FOLLOW);
}

/* ARGSUSED */
int
sys_lpathconf(struct lwp *l, const struct sys_lpathconf_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) name;
        } */
        return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 
            NOFOLLOW);
}

/*
 * Return target name of a symbolic link.
 */
/* ARGSUSED */
int
sys_readlink(struct lwp *l, const struct sys_readlink_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(char *) buf;
                syscallarg(size_t) count;
        } */
        return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path),
            SCARG(uap, buf), SCARG(uap, count), retval);
}

static int
do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf,
    size_t count, register_t *retval)
{
        struct vnode *vp;
        struct iovec aiov;
        struct uio auio;
        int error;
        struct pathbuf *pb;
        struct nameidata nd;

        error = pathbuf_copyin(path, &pb);
        if (error) {
                return error;
        }
        NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb);
        if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
                pathbuf_destroy(pb);
                return error;
        }
        vp = nd.ni_vp;
        pathbuf_destroy(pb);
        if (vp->v_type != VLNK)
                error = EINVAL;
        else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) ||
            (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) {
                aiov.iov_base = buf;
                aiov.iov_len = count;
                auio.uio_iov = &aiov;
                auio.uio_iovcnt = 1;
                auio.uio_offset = 0;
                auio.uio_rw = UIO_READ;
                KASSERT(l == curlwp);
                auio.uio_vmspace = l->l_proc->p_vmspace;
                auio.uio_resid = count;
                if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0)
                        *retval = count - auio.uio_resid;
        }
        vput(vp);
        return (error);
}

int
sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const char *) path;
                syscallarg(char *) buf;
                syscallarg(size_t) bufsize;
        } */

        return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path),
            SCARG(uap, buf), SCARG(uap, bufsize), retval);
}

/*
 * Change flags of a file given a path name.
 */
/* ARGSUSED */
int
sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(u_long) flags;
        } */
        struct vnode *vp;
        int error;

        error = namei_simple_user(SCARG(uap, path),
                                NSM_FOLLOW_TRYEMULROOT, &vp);
        if (error != 0)
                return (error);
        error = change_flags(vp, SCARG(uap, flags), l);
        vput(vp);
        return (error);
}

/*
 * Change flags of a file given a file descriptor.
 */
/* ARGSUSED */
int
sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(u_long) flags;
        } */
        struct vnode *vp;
        file_t *fp;
        int error;

        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
                return (error);
        vp = fp->f_vnode;
        error = change_flags(vp, SCARG(uap, flags), l);
        VOP_UNLOCK(vp);
        fd_putfile(SCARG(uap, fd));
        return (error);
}

/*
 * Change flags of a file given a path name; this version does
 * not follow links.
 */
int
sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(u_long) flags;
        } */
        struct vnode *vp;
        int error;

        error = namei_simple_user(SCARG(uap, path),
                                NSM_NOFOLLOW_TRYEMULROOT, &vp);
        if (error != 0)
                return (error);
        error = change_flags(vp, SCARG(uap, flags), l);
        vput(vp);
        return (error);
}

/*
 * Common routine to change flags of a file.
 */
int
change_flags(struct vnode *vp, u_long flags, struct lwp *l)
{
        struct vattr vattr;
        int error;

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);

        vattr_null(&vattr);
        vattr.va_flags = flags;
        error = VOP_SETATTR(vp, &vattr, l->l_cred);

        return (error);
}

/*
 * Change mode of a file given path name; this version follows links.
 */
/* ARGSUSED */
int
sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) mode;
        } */
        return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path),
                              SCARG(uap, mode), 0);
}

int
do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags)
{
        int error;
        struct vnode *vp;
        namei_simple_flags_t ns_flag;

        if (flags & AT_SYMLINK_NOFOLLOW)
                ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
        else
                ns_flag = NSM_FOLLOW_TRYEMULROOT;

        error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
        if (error != 0)
                return error;

        error = change_mode(vp, mode, l);

        vrele(vp);

        return (error);
}

/*
 * Change mode of a file given a file descriptor.
 */
/* ARGSUSED */
int
sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(int) mode;
        } */
        file_t *fp;
        int error;

        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
                return (error);
        error = change_mode(fp->f_vnode, SCARG(uap, mode), l);
        fd_putfile(SCARG(uap, fd));
        return (error);
}

int
sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const char *) path;
                syscallarg(int) mode;
                syscallarg(int) flag;
        } */

        return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path),
                              SCARG(uap, mode), SCARG(uap, flag));
}

/*
 * Change mode of a file given path name; this version does not follow links.
 */
/* ARGSUSED */
int
sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) mode;
        } */
        int error;
        struct vnode *vp;

        error = namei_simple_user(SCARG(uap, path),
                                NSM_NOFOLLOW_TRYEMULROOT, &vp);
        if (error != 0)
                return (error);

        error = change_mode(vp, SCARG(uap, mode), l);

        vrele(vp);
        return (error);
}

/*
 * Common routine to set mode given a vnode.
 */
static int
change_mode(struct vnode *vp, int mode, struct lwp *l)
{
        struct vattr vattr;
        int error;

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        vattr_null(&vattr);
        vattr.va_mode = mode & ALLPERMS;
        error = VOP_SETATTR(vp, &vattr, l->l_cred);
        VOP_UNLOCK(vp);
        return (error);
}

/*
 * Set ownership given a path name; this version follows links.
 */
/* ARGSUSED */
int
sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(uid_t) uid;
                syscallarg(gid_t) gid;
        } */
        return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid),
                              SCARG(uap, gid), 0);
}

int
do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid,
   gid_t gid, int flags)
{
        int error;
        struct vnode *vp;
        namei_simple_flags_t ns_flag;

        if (flags & AT_SYMLINK_NOFOLLOW)
                ns_flag = NSM_NOFOLLOW_TRYEMULROOT;
        else
                ns_flag = NSM_FOLLOW_TRYEMULROOT;

        error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp);
        if (error != 0)
                return error;

        error = change_owner(vp, uid, gid, l, 0);

        vrele(vp);

        return (error);
}

/*
 * Set ownership given a path name; this version follows links.
 * Provides POSIX semantics.
 */
/* ARGSUSED */
int
sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(uid_t) uid;
                syscallarg(gid_t) gid;
        } */
        int error;
        struct vnode *vp;

        error = namei_simple_user(SCARG(uap, path),
                                NSM_FOLLOW_TRYEMULROOT, &vp);
        if (error != 0)
                return (error);

        error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);

        vrele(vp);
        return (error);
}

/*
 * Set ownership given a file descriptor.
 */
/* ARGSUSED */
int
sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(uid_t) uid;
                syscallarg(gid_t) gid;
        } */
        int error;
        file_t *fp;

        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
                return (error);
        error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
            l, 0);
        fd_putfile(SCARG(uap, fd));
        return (error);
}

int
sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const char *) path;
                syscallarg(uid_t) owner;
                syscallarg(gid_t) group;
                syscallarg(int) flag;
        } */

        return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path),
                              SCARG(uap, owner), SCARG(uap, group),
                              SCARG(uap, flag));
}

/*
 * Set ownership given a file descriptor, providing POSIX/XPG semantics.
 */
/* ARGSUSED */
int
sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(uid_t) uid;
                syscallarg(gid_t) gid;
        } */
        int error;
        file_t *fp;

        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
                return (error);
        error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid),
            l, 1);
        fd_putfile(SCARG(uap, fd));
        return (error);
}

/*
 * Set ownership given a path name; this version does not follow links.
 */
/* ARGSUSED */
int
sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(uid_t) uid;
                syscallarg(gid_t) gid;
        } */
        int error;
        struct vnode *vp;

        error = namei_simple_user(SCARG(uap, path),
                                NSM_NOFOLLOW_TRYEMULROOT, &vp);
        if (error != 0)
                return (error);

        error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0);

        vrele(vp);
        return (error);
}

/*
 * Set ownership given a path name; this version does not follow links.
 * Provides POSIX/XPG semantics.
 */
/* ARGSUSED */
int
sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(uid_t) uid;
                syscallarg(gid_t) gid;
        } */
        int error;
        struct vnode *vp;

        error = namei_simple_user(SCARG(uap, path),
                                NSM_NOFOLLOW_TRYEMULROOT, &vp);
        if (error != 0)
                return (error);

        error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1);

        vrele(vp);
        return (error);
}

/*
 * Common routine to set ownership given a vnode.
 */
static int
change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l,
    int posix_semantics)
{
        struct vattr vattr;
        mode_t newmode;
        int error;

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0)
                goto out;

#define CHANGED(x) ((int)(x) != -1)
        newmode = vattr.va_mode;
        if (posix_semantics) {
                /*
                 * POSIX/XPG semantics: if the caller is not the super-user,
                 * clear set-user-id and set-group-id bits.  Both POSIX and
                 * the XPG consider the behaviour for calls by the super-user
                 * implementation-defined; we leave the set-user-id and set-
                 * group-id settings intact in that case.
                 */
                if (vattr.va_mode & S_ISUID) {
                        if (kauth_authorize_vnode(l->l_cred,
                            KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0)
                                newmode &= ~S_ISUID;
                }
                if (vattr.va_mode & S_ISGID) {
                        if (kauth_authorize_vnode(l->l_cred,
                            KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0)
                                newmode &= ~S_ISGID;
                }
        } else {
                /*
                 * NetBSD semantics: when changing owner and/or group,
                 * clear the respective bit(s).
                 */
                if (CHANGED(uid))
                        newmode &= ~S_ISUID;
                if (CHANGED(gid))
                        newmode &= ~S_ISGID;
        }
        /* Update va_mode iff altered. */
        if (vattr.va_mode == newmode)
                newmode = VNOVAL;

        vattr_null(&vattr);
        vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL;
        vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL;
        vattr.va_mode = newmode;
        error = VOP_SETATTR(vp, &vattr, l->l_cred);
#undef CHANGED

out:
        VOP_UNLOCK(vp);
        return (error);
}

/*
 * Set the access and modification times given a path name; this
 * version follows links.
 */
/* ARGSUSED */
int
sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(const struct timeval *) tptr;
        } */

        return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
            SCARG(uap, tptr), UIO_USERSPACE);
}

/*
 * Set the access and modification times given a file descriptor.
 */
/* ARGSUSED */
int
sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const struct timeval *) tptr;
        } */
        int error;
        file_t *fp;

        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
                return (error);
        error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr),
            UIO_USERSPACE);
        fd_putfile(SCARG(uap, fd));
        return (error);
}

int
sys_futimens(struct lwp *l, const struct sys_futimens_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const struct timespec *) tptr;
        } */
        int error;
        file_t *fp;

        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
                return (error);
        error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0,
            SCARG(uap, tptr), UIO_USERSPACE);
        fd_putfile(SCARG(uap, fd));
        return (error);
}

/*
 * Set the access and modification times given a path name; this
 * version does not follow links.
 */
int
sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(const struct timeval *) tptr;
        } */

        return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
            SCARG(uap, tptr), UIO_USERSPACE);
}

int
sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const char *) path;
                syscallarg(const struct timespec *) tptr;
                syscallarg(int) flag;
        } */
        int follow;
        const struct timespec *tptr;
        int error;

        tptr = SCARG(uap, tptr);
        follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;

        error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 
            SCARG(uap, path), follow, tptr, UIO_USERSPACE);

        return error;
}

/*
 * Common routine to set access and modification times given a vnode.
 */
int
do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag,
    const struct timespec *tptr, enum uio_seg seg)
{
        return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg);
}

int
do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp,
    const char *path, int flag, const struct timespec *tptr, enum uio_seg seg)
{
        struct vattr vattr;
        int error, dorele = 0;
        namei_simple_flags_t sflags;
        bool vanull, setbirthtime;
        struct timespec ts[2];

        KASSERT(l != NULL || fdat == AT_FDCWD);

        /* 
         * I have checked all callers and they pass either FOLLOW,
         * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW
         * is 0. More to the point, they don't pass anything else.
         * Let's keep it that way at least until the namei interfaces
         * are fully sanitized.
         */
        KASSERT(flag == NOFOLLOW || flag == FOLLOW);
        sflags = (flag == FOLLOW) ? 
                NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT;

        if (tptr == NULL) {
                vanull = true;
                nanotime(&ts[0]);
                ts[1] = ts[0];
        } else {
                vanull = false;
                if (seg != UIO_SYSSPACE) {
                        error = copyin(tptr, ts, sizeof (ts));
                        if (error != 0)
                                return error;
                } else {
                        ts[0] = tptr[0];
                        ts[1] = tptr[1];
                }
        }

        if (ts[0].tv_nsec == UTIME_NOW) {
                nanotime(&ts[0]);
                if (ts[1].tv_nsec == UTIME_NOW) {
                        vanull = true;
                        ts[1] = ts[0];
                }
        } else if (ts[1].tv_nsec == UTIME_NOW)
                nanotime(&ts[1]);

        if (vp == NULL) {
                /* note: SEG describes TPTR, not PATH; PATH is always user */
                error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp);
                if (error != 0)
                        return error;
                dorele = 1;
        }

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 &&
            timespeccmp(&ts[1], &vattr.va_birthtime, <));
        vattr_null(&vattr);

        if (ts[0].tv_nsec != UTIME_OMIT)
                vattr.va_atime = ts[0];

        if (ts[1].tv_nsec != UTIME_OMIT) {
                vattr.va_mtime = ts[1];
                if (setbirthtime)
                        vattr.va_birthtime = ts[1];
        }

        if (vanull)
                vattr.va_vaflags |= VA_UTIMES_NULL;
        error = VOP_SETATTR(vp, &vattr, l->l_cred);
        VOP_UNLOCK(vp);

        if (dorele != 0)
                vrele(vp);

        return error;
}

int
do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag,
    const struct timeval *tptr, enum uio_seg seg)
{
        struct timespec ts[2];
        struct timespec *tsptr = NULL;
        int error;
        
        if (tptr != NULL) {
                struct timeval tv[2];

                if (seg != UIO_SYSSPACE) {
                        error = copyin(tptr, tv, sizeof(tv));
                        if (error != 0)
                                return error;
                        tptr = tv;
                }

                if ((tptr[0].tv_usec == UTIME_NOW) || 
                    (tptr[0].tv_usec == UTIME_OMIT))
                        ts[0].tv_nsec = tptr[0].tv_usec;
                else {
                        if (tptr[0].tv_usec < 0 || tptr[0].tv_usec >= 1000000)
                                return EINVAL;

                        TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]);
                }

                if ((tptr[1].tv_usec == UTIME_NOW) || 
                    (tptr[1].tv_usec == UTIME_OMIT))
                        ts[1].tv_nsec = tptr[1].tv_usec;
                else {
                        if (tptr[1].tv_usec < 0 || tptr[1].tv_usec >= 1000000)
                                return EINVAL;

                        TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]);
                }

                tsptr = &ts[0];        
        }

        return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE);
}

/*
 * Truncate a file given its path name.
 */
/* ARGSUSED */
int
sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) pad;
                syscallarg(off_t) length;
        } */
        struct vnode *vp;
        struct vattr vattr;
        int error;

        if (SCARG(uap, length) < 0)
                return EINVAL;

        error = namei_simple_user(SCARG(uap, path),
                                NSM_FOLLOW_TRYEMULROOT, &vp);
        if (error != 0)
                return (error);
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        if (vp->v_type == VDIR)
                error = EISDIR;
        else if ((error = vn_writechk(vp)) == 0 &&
            (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) {
                vattr_null(&vattr);
                vattr.va_size = SCARG(uap, length);
                error = VOP_SETATTR(vp, &vattr, l->l_cred);
        }
        vput(vp);
        return (error);
}

/*
 * Truncate a file given a file descriptor.
 */
/* ARGSUSED */
int
sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(int) pad;
                syscallarg(off_t) length;
        } */
        struct vattr vattr;
        struct vnode *vp;
        file_t *fp;
        int error;

        if (SCARG(uap, length) < 0)
                return EINVAL;

        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
                return (error);
        if ((fp->f_flag & FWRITE) == 0) {
                error = EINVAL;
                goto out;
        }
        vp = fp->f_vnode;
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        if (vp->v_type == VDIR)
                error = EISDIR;
        else if ((error = vn_writechk(vp)) == 0) {
                vattr_null(&vattr);
                vattr.va_size = SCARG(uap, length);
                error = VOP_SETATTR(vp, &vattr, fp->f_cred);
        }
        VOP_UNLOCK(vp);
 out:
        fd_putfile(SCARG(uap, fd));
        return (error);
}

/*
 * Sync an open file.
 */
/* ARGSUSED */
int
sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
        } */
        struct vnode *vp;
        file_t *fp;
        int error;

        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
                return (error);
        vp = fp->f_vnode;
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0);
        VOP_UNLOCK(vp);
        fd_putfile(SCARG(uap, fd));
        return (error);
}

/*
 * Sync a range of file data.  API modeled after that found in AIX.
 *
 * FDATASYNC indicates that we need only save enough metadata to be able
 * to re-read the written data.
 */
/* ARGSUSED */
int
sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(int) flags;
                syscallarg(off_t) start;
                syscallarg(off_t) length;
        } */
        struct vnode *vp;
        file_t *fp;
        int flags, nflags;
        off_t s, e, len;
        int error;

        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
                return (error);

        if ((fp->f_flag & FWRITE) == 0) {
                error = EBADF;
                goto out;
        }

        flags = SCARG(uap, flags);
        if (((flags & (FDATASYNC | FFILESYNC)) == 0) ||
            ((~flags & (FDATASYNC | FFILESYNC)) == 0)) {
                error = EINVAL;
                goto out;
        }
        /* Now set up the flags for value(s) to pass to VOP_FSYNC() */
        if (flags & FDATASYNC)
                nflags = FSYNC_DATAONLY | FSYNC_WAIT;
        else
                nflags = FSYNC_WAIT;
        if (flags & FDISKSYNC)
                nflags |= FSYNC_CACHE;

        len = SCARG(uap, length);
        /* If length == 0, we do the whole file, and s = e = 0 will do that */
        if (len) {
                s = SCARG(uap, start);
                if (s < 0 || len < 0 || len > OFF_T_MAX - s) {
                        error = EINVAL;
                        goto out;
                }
                e = s + len;
                KASSERT(s <= e);
        } else {
                e = 0;
                s = 0;
        }

        vp = fp->f_vnode;
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e);
        VOP_UNLOCK(vp);
out:
        fd_putfile(SCARG(uap, fd));
        return (error);
}

/*
 * Sync the data of an open file.
 */
/* ARGSUSED */
int
sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
        } */
        struct vnode *vp;
        file_t *fp;
        int error;

        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
                return (error);
        vp = fp->f_vnode;
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0);
        VOP_UNLOCK(vp);
        fd_putfile(SCARG(uap, fd));
        return (error);
}

/*
 * Rename files, (standard) BSD semantics frontend.
 */
/* ARGSUSED */
int
sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) from;
                syscallarg(const char *) to;
        } */

        return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 
            SCARG(uap, to), UIO_USERSPACE, 0));
}

int
sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 
    register_t *retval)
{
        /* {
                syscallarg(int) fromfd;
                syscallarg(const char *) from;
                syscallarg(int) tofd;
                syscallarg(const char *) to;
        } */

        return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from),
            SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0));
}

/*
 * Rename files, POSIX semantics frontend.
 */
/* ARGSUSED */
int
sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) from;
                syscallarg(const char *) to;
        } */

        return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD,
            SCARG(uap, to), UIO_USERSPACE, 1));
}

/*
 * Rename files.  Source and destination must either both be directories,
 * or both not be directories.  If target is a directory, it must be empty.
 * If `from' and `to' refer to the same object, the value of the `retain'
 * argument is used to determine whether `from' will be
 *
 * (retain == 0)        deleted unless `from' and `to' refer to the same
 *                        object in the file system's name space (BSD).
 * (retain == 1)        always retained (POSIX).
 *
 * XXX Synchronize with nfsrv_rename in nfs_serv.c.
 */
int
do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain)
{
        return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain);
}

static int
do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd,
    const char *to, enum uio_seg seg, int retain)
{
        struct pathbuf *fpb, *tpb;
        struct nameidata fnd, tnd;
        struct vnode *fdvp, *fvp;
        struct vnode *tdvp, *tvp;
        struct mount *mp, *tmp;
        int error;

        KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD));

        error = pathbuf_maybe_copyin(from, seg, &fpb);
        if (error)
                goto out0;
        KASSERT(fpb != NULL);

        error = pathbuf_maybe_copyin(to, seg, &tpb);
        if (error)
                goto out1;
        KASSERT(tpb != NULL);

        /*
         * Lookup from.
         *
         * XXX LOCKPARENT is wrong because we don't actually want it
         * locked yet, but (a) namei is insane, and (b) VOP_RENAME is
         * insane, so for the time being we need to leave it like this.
         */
        NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb);
        if ((error = fd_nameiat(l, fromfd, &fnd)) != 0)
                goto out2;

        /*
         * Pull out the important results of the lookup, fdvp and fvp.
         * Of course, fvp is bogus because we're about to unlock fdvp.
         */
        fdvp = fnd.ni_dvp;
        fvp = fnd.ni_vp;
        mp = fdvp->v_mount;
        KASSERT(fdvp != NULL);
        KASSERT(fvp != NULL);
        KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE));
        /*
         * Bracket the operation with fstrans_start()/fstrans_done().
         *
         * Inside the bracket this file system cannot be unmounted so
         * a vnode on this file system cannot change its v_mount.
         * A vnode on another file system may still change to dead mount.
         */
        fstrans_start(mp);

        /*
         * Make sure neither fdvp nor fvp is locked.
         */
        if (fdvp != fvp)
                VOP_UNLOCK(fdvp);
        /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
        /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */

        /*
         * Reject renaming `.' and `..'.  Can't do this until after
         * namei because we need namei's parsing to find the final
         * component name.  (namei should just leave us with the final
         * component name and not look it up itself, but anyway...)
         *
         * This was here before because we used to relookup from
         * instead of to and relookup requires the caller to check
         * this, but now file systems may depend on this check, so we
         * must retain it until the file systems are all rototilled.
         */
        if (((fnd.ni_cnd.cn_namelen == 1) &&
                (fnd.ni_cnd.cn_nameptr[0] == '.')) ||
            ((fnd.ni_cnd.cn_namelen == 2) &&
                (fnd.ni_cnd.cn_nameptr[0] == '.') &&
                (fnd.ni_cnd.cn_nameptr[1] == '.'))) {
                error = EINVAL;        /* XXX EISDIR?  */
                goto abort0;
        }

        /*
         * Lookup to.
         *
         * XXX LOCKPARENT is wrong, but...insanity, &c.  Also, using
         * fvp here to decide whether to add CREATEDIR is a load of
         * bollocks because fvp might be the wrong node by now, since
         * fdvp is unlocked.
         *
         * XXX Why not pass CREATEDIR always?
         */
        NDINIT(&tnd, RENAME,
            (LOCKPARENT | NOCACHE | TRYEMULROOT |
                ((fvp->v_type == VDIR)? CREATEDIR : 0)),
            tpb);
        if ((error = fd_nameiat(l, tofd, &tnd)) != 0)
                goto abort0;

        /*
         * Pull out the important results of the lookup, tdvp and tvp.
         * Of course, tvp is bogus because we're about to unlock tdvp.
         */
        tdvp = tnd.ni_dvp;
        tvp = tnd.ni_vp;
        KASSERT(tdvp != NULL);
        KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE));

        if (fvp->v_type == VDIR)
                tnd.ni_cnd.cn_flags |= WILLBEDIR;
        /*
         * Make sure neither tdvp nor tvp is locked.
         */
        if (tdvp != tvp)
                VOP_UNLOCK(tdvp);
        /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
        /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */

        /*
         * Reject renaming onto `.' or `..'.  relookup is unhappy with
         * these, which is why we must do this here.  Once upon a time
         * we relooked up from instead of to, and consequently didn't
         * need this check, but now that we relookup to instead of
         * from, we need this; and we shall need it forever forward
         * until the VOP_RENAME protocol changes, because file systems
         * will no doubt begin to depend on this check.
         */
        if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) {
                error = EISDIR;
                goto abort1;
        }
        if ((tnd.ni_cnd.cn_namelen == 2) &&
            (tnd.ni_cnd.cn_nameptr[0] == '.') &&
            (tnd.ni_cnd.cn_nameptr[1] == '.')) {
                error = EINVAL;
                goto abort1;
        }

        /*
         * Make sure the mount points match.  Although we don't hold
         * any vnode locks, the v_mount on fdvp file system are stable.
         *
         * Unmounting another file system at an inopportune moment may
         * cause tdvp to disappear and change its v_mount to dead.
         *
         * So in either case different v_mount means cross-device rename.
         */
        KASSERT(mp != NULL);
        tmp = tdvp->v_mount;

        if (mp != tmp) {
                error = EXDEV;
                goto abort1;
        }

        /*
         * Take the vfs rename lock to avoid cross-directory screw cases.
         * Nothing is locked currently, so taking this lock is safe.
         */
        error = VFS_RENAMELOCK_ENTER(mp);
        if (error)
                goto abort1;

        /*
         * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced,
         * and nothing is locked except for the vfs rename lock.
         *
         * The next step is a little rain dance to conform to the
         * insane lock protocol, even though it does nothing to ward
         * off race conditions.
         *
         * We need tdvp and tvp to be locked.  However, because we have
         * unlocked tdvp in order to hold no locks while we take the
         * vfs rename lock, tvp may be wrong here, and we can't safely
         * lock it even if the sensible file systems will just unlock
         * it straight away.  Consequently, we must lock tdvp and then
         * relookup tvp to get it locked.
         *
         * Finally, because the VOP_RENAME protocol is brain-damaged
         * and various file systems insanely depend on the semantics of
         * this brain damage, the lookup of to must be the last lookup
         * before VOP_RENAME.
         */
        vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
        error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0);
        if (error)
                goto abort2;

        /*
         * Drop the old tvp and pick up the new one -- which might be
         * the same, but that doesn't matter to us.  After this, tdvp
         * and tvp should both be locked.
         */
        if (tvp != NULL)
                vrele(tvp);
        tvp = tnd.ni_vp;
        KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
        KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));

        /*
         * The old do_sys_rename had various consistency checks here
         * involving fvp and tvp.  fvp is bogus already here, and tvp
         * will become bogus soon in any sensible file system, so the
         * only purpose in putting these checks here is to give lip
         * service to these screw cases and to acknowledge that they
         * exist, not actually to handle them, but here you go
         * anyway...
         */

        /*
         * Acknowledge that directories and non-directories aren't
         * suposed to mix.
         */
        if (tvp != NULL) {
                if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) {
                        error = ENOTDIR;
                        goto abort3;
                } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) {
                        error = EISDIR;
                        goto abort3;
                }
        }

        /*
         * Acknowledge some random screw case, among the dozens that
         * might arise.
         */
        if (fvp == tdvp) {
                error = EINVAL;
                goto abort3;
        }

        /*
         * Acknowledge that POSIX has a wacky screw case.
         *
         * XXX Eventually the retain flag needs to be passed on to
         * VOP_RENAME.
         */
        if (fvp == tvp) {
                if (retain) {
                        error = 0;
                        goto abort3;
                } else if ((fdvp == tdvp) &&
                    (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) &&
                    (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr,
                        fnd.ni_cnd.cn_namelen))) {
                        error = 0;
                        goto abort3;
                }
        }

        /*
         * Make sure veriexec can screw us up.  (But a race can screw
         * up veriexec, of course -- remember, fvp and (soon) tvp are
         * bogus.)
         */
#if NVERIEXEC > 0
        {
                char *f1, *f2;
                size_t f1_len;
                size_t f2_len;

                f1_len = fnd.ni_cnd.cn_namelen + 1;
                f1 = kmem_alloc(f1_len, KM_SLEEP);
                strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len);

                f2_len = tnd.ni_cnd.cn_namelen + 1;
                f2 = kmem_alloc(f2_len, KM_SLEEP);
                strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len);

                error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2);

                kmem_free(f1, f1_len);
                kmem_free(f2, f2_len);

                if (error)
                        goto abort3;
        }
#endif /* NVERIEXEC > 0 */

        /*
         * All ready.  Incant the rename vop.
         */
        /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
        /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
        KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
        KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
        error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd);

        /*
         * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks
         * tdvp and tvp.  But we can't assert any of that.
         */
        /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
        /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
        /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
        /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */

        /*
         * So all we have left to do is to drop the rename lock and
         * destroy the pathbufs.
         */
        VFS_RENAMELOCK_EXIT(mp);
        fstrans_done(mp);
        goto out2;

abort3:        if ((tvp != NULL) && (tvp != tdvp))
                VOP_UNLOCK(tvp);
abort2:        VOP_UNLOCK(tdvp);
        VFS_RENAMELOCK_EXIT(mp);
abort1:        VOP_ABORTOP(tdvp, &tnd.ni_cnd);
        vrele(tdvp);
        if (tvp != NULL)
                vrele(tvp);
abort0:        VOP_ABORTOP(fdvp, &fnd.ni_cnd);
        vrele(fdvp);
        vrele(fvp);
        fstrans_done(mp);
out2:        pathbuf_destroy(tpb);
out1:        pathbuf_destroy(fpb);
out0:        return error;
}

/*
 * Make a directory file.
 */
/* ARGSUSED */
int
sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) mode;
        } */

        return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path),
            SCARG(uap, mode), UIO_USERSPACE);
}

int
sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const char *) path;
                syscallarg(int) mode;
        } */

        return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path),
            SCARG(uap, mode), UIO_USERSPACE);
}


int
do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg)
{
        return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, seg);
}

static int
do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode,
    enum uio_seg seg)
{
        struct proc *p = curlwp->l_proc;
        struct vnode *vp;
        struct vattr vattr;
        int error;
        struct pathbuf *pb;
        struct nameidata nd;

        KASSERT(l != NULL || fdat == AT_FDCWD);

        /* XXX bollocks, should pass in a pathbuf */
        error = pathbuf_maybe_copyin(path, seg, &pb);
        if (error) {
                return error;
        }

        NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb);

        if ((error = fd_nameiat(l, fdat, &nd)) != 0) {
                pathbuf_destroy(pb);
                return (error);
        }
        vp = nd.ni_vp;
        if (vp != NULL) {
                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                if (nd.ni_dvp == vp)
                        vrele(nd.ni_dvp);
                else
                        vput(nd.ni_dvp);
                vrele(vp);
                pathbuf_destroy(pb);
                return (EEXIST);
        }
        vattr_null(&vattr);
        vattr.va_type = VDIR;
        /* We will read cwdi->cwdi_cmask unlocked. */
        vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask;
        nd.ni_cnd.cn_flags |= WILLBEDIR;
        error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
        if (!error)
                vrele(nd.ni_vp);
        vput(nd.ni_dvp);
        pathbuf_destroy(pb);
        return (error);
}

/*
 * Remove a directory file.
 */
/* ARGSUSED */
int
sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval)
{
        return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path),
            AT_REMOVEDIR, UIO_USERSPACE);
}

/*
 * Read a block of directory entries in a file system independent format.
 */
int
sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(char *) buf;
                syscallarg(size_t) count;
        } */
        file_t *fp;
        int error, done;

        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
                return (error);
        if ((fp->f_flag & FREAD) == 0) {
                error = EBADF;
                goto out;
        }
        error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE,
                        SCARG(uap, count), &done, l, 0, 0);
        ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error);
        *retval = done;
 out:
        fd_putfile(SCARG(uap, fd));
        return (error);
}

/*
 * Set the mode mask for creation of filesystem nodes.
 */
int
sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval)
{
        /* {
                syscallarg(mode_t) newmask;
        } */

        /*
         * cwdi->cwdi_cmask will be read unlocked elsewhere, and no kind of
         * serialization with those reads is required.  It's important to
         * return a coherent answer for the caller of umask() though, and
         * the atomic operation accomplishes that.
         */
        *retval = atomic_swap_uint(&curproc->p_cwdi->cwdi_cmask,
            SCARG(uap, newmask) & ALLPERMS);

        return (0);
}

int
dorevoke(struct vnode *vp, kauth_cred_t cred)
{
        struct vattr vattr;
        int error, fs_decision;

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_GETATTR(vp, &vattr, cred);
        VOP_UNLOCK(vp);
        if (error != 0)
                return error;
        fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM;
        error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL,
            fs_decision);
        if (!error)
                VOP_REVOKE(vp, REVOKEALL);
        return (error);
}

/*
 * Void all references to file by ripping underlying filesystem
 * away from vnode.
 */
/* ARGSUSED */
int
sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
        } */
        struct vnode *vp;
        int error;

        error = namei_simple_user(SCARG(uap, path),
                                NSM_FOLLOW_TRYEMULROOT, &vp);
        if (error != 0)
                return (error);
        error = dorevoke(vp, l->l_cred);
        vrele(vp);
        return (error);
}

/*
 * Allocate backing store for a file, filling a hole without having to
 * explicitly write anything out.
 */
/* ARGSUSED */
int
sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap,
                register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(off_t) pos;
                syscallarg(off_t) len;
        } */
        int fd;
        off_t pos, len;
        struct file *fp;
        struct vnode *vp;
        int error;

        fd = SCARG(uap, fd);
        pos = SCARG(uap, pos);
        len = SCARG(uap, len);
        
        if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
                *retval = EINVAL;
                return 0;
        }
        
        error = fd_getvnode(fd, &fp);
        if (error) {
                *retval = error;
                return 0;
        }
        if ((fp->f_flag & FWRITE) == 0) {
                error = EBADF;
                goto fail;
        }
        vp = fp->f_vnode;

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        if (vp->v_type == VDIR) {
                error = EISDIR;
        } else {
                error = VOP_FALLOCATE(vp, pos, len);
        }
        VOP_UNLOCK(vp);

fail:
        fd_putfile(fd);
        *retval = error;
        return 0;
}

/*
 * Deallocate backing store for a file, creating a hole. Also used for
 * invoking TRIM on disks.
 */
/* ARGSUSED */
int
sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap,
                register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(off_t) pos;
                syscallarg(off_t) len;
        } */
        int fd;
        off_t pos, len;
        struct file *fp;
        struct vnode *vp;
        int error;

        fd = SCARG(uap, fd);
        pos = SCARG(uap, pos);
        len = SCARG(uap, len);

        if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) {
                return EINVAL;
        }
        
        error = fd_getvnode(fd, &fp);
        if (error) {
                return error;
        }
        if ((fp->f_flag & FWRITE) == 0) {
                error = EBADF;
                goto fail;
        }
        vp = fp->f_vnode;

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        if (vp->v_type == VDIR) {
                error = EISDIR;
        } else {
                error = VOP_FDISCARD(vp, pos, len);
        }
        VOP_UNLOCK(vp);

fail:
        fd_putfile(fd);
        return error;
}



































































   18 
    6 



   17 





   10 



    9 






    8 









    4 






    8 
    8 


   16 
   17 





    6 





























    6 
    6 


    6 


    6 








    6 












    6 


    6 


    6 


    6 
    6 




    6 





    5 





















    5 




    4 

    1 





    3 



    3 






    2 
    2 




    5 




    5 



    5 










   38 












   19 


   18 




    4 

    3 




    7 



























    6 
    6 

    5 




   34 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
/*        $NetBSD: sys_module.c,v 1.30 2022/05/24 06:20:05 andvar Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * System calls relating to loadable modules.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_module.c,v 1.30 2022/05/24 06:20:05 andvar Exp $");

#ifdef _KERNEL_OPT
#include "opt_modular.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/namei.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/kobj.h>
#include <sys/module.h>
#include <sys/syscall.h>
#include <sys/syscallargs.h>
#include <sys/compat_stub.h>

/*
 * Arbitrary limit to avoid DoS for excessive memory allocation.
 */
#define MAXPROPSLEN        4096

int
handle_modctl_load(const char *ml_filename, int ml_flags, const char *ml_props,
    size_t ml_propslen)
{
        char *path;
        char *props;
        int error;
        prop_dictionary_t dict;
        size_t propslen = 0;

        if ((ml_props != NULL && ml_propslen == 0) ||
            (ml_props == NULL && ml_propslen > 0)) {
                return EINVAL;
        }

        path = PNBUF_GET();
        error = copyinstr(ml_filename, path, MAXPATHLEN, NULL);
        if (error != 0)
                goto out1;

        if (ml_props != NULL) {
                if (ml_propslen > MAXPROPSLEN) {
                        error = ENOMEM;
                        goto out1;
                }
                propslen = ml_propslen + 1;

                props = kmem_alloc(propslen, KM_SLEEP);
                error = copyinstr(ml_props, props, propslen, NULL);
                if (error != 0)
                        goto out2;

                dict = prop_dictionary_internalize(props);
                if (dict == NULL) {
                        error = EINVAL;
                        goto out2;
                }
        } else {
                dict = NULL;
                props = NULL;
        }

        error = module_load(path, ml_flags, dict, MODULE_CLASS_ANY);

        if (dict != NULL) {
                prop_object_release(dict);
        }

out2:
        if (props != NULL) {
                kmem_free(props, propslen);
        }
out1:
        PNBUF_PUT(path);
        return error;
}

static int
handle_modctl_stat(struct iovec *iov, void *arg)
{
        int ms_cnt;
        modstat_t *ms, *mso;
        size_t ms_len;
        char *req, *reqo;
        size_t req_len;
        char *out_p;
        size_t out_s;

        modinfo_t *mi;
        module_t *mod;
        vaddr_t addr;
        size_t size;
        size_t used;
        int off;
        int error;
        bool stataddr;

        /* If not privileged, don't expose kernel addresses. */
        error = kauth_authorize_process(kauth_cred_get(), KAUTH_PROCESS_CANSEE,
            curproc, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_KPTR), NULL, NULL);
        stataddr = (error == 0);

        kernconfig_lock();
        ms_cnt = 0;
        req_len = 1;

        /*
         * Count up the number of modstat_t needed, and total size of
         * require_module lists on both active and built-in lists
         */
        TAILQ_FOREACH(mod, &module_list, mod_chain) {
                ms_cnt++;
                mi = mod->mod_info;
                if (mi->mi_required != NULL) {
                        req_len += strlen(mi->mi_required) + 1;
                }
        }
        TAILQ_FOREACH(mod, &module_builtins, mod_chain) {
                ms_cnt++;
                mi = mod->mod_info;
                if (mi->mi_required != NULL) {
                        req_len += strlen(mi->mi_required) + 1;
                }
        }

        /* Allocate internal buffers to hold all the output data */
        ms_len = ms_cnt * sizeof(modstat_t);
        ms = kmem_zalloc(ms_len, KM_SLEEP);
        req = kmem_zalloc(req_len, KM_SLEEP);

        mso = ms;
        reqo = req++;
        off = 1;

        /*
         * Load data into our internal buffers for both active and
         * built-in module lists
         */
        TAILQ_FOREACH(mod, &module_list, mod_chain) {
                mi = mod->mod_info;
                strlcpy(ms->ms_name, mi->mi_name, sizeof(ms->ms_name));
                if (mi->mi_required != NULL) {
                        ms->ms_reqoffset = off;
                        used = strlcpy(req,  mi->mi_required, req_len - off);
                        KASSERTMSG(used < req_len - off, "reqlist grew!");
                        off += used + 1;
                        req += used + 1;
                } else
                        ms->ms_reqoffset = 0;
                if (mod->mod_kobj != NULL && stataddr) {
                        kobj_stat(mod->mod_kobj, &addr, &size);
                        ms->ms_addr = addr;
                        ms->ms_size = size;
                }
                ms->ms_class = mi->mi_class;
                ms->ms_refcnt = mod->mod_refcnt;
                ms->ms_source = mod->mod_source;
                ms->ms_flags = mod->mod_flags;
                ms++;
        }
        TAILQ_FOREACH(mod, &module_builtins, mod_chain) {
                mi = mod->mod_info;
                strlcpy(ms->ms_name, mi->mi_name, sizeof(ms->ms_name));
                if (mi->mi_required != NULL) {
                        ms->ms_reqoffset = off;
                        used = strlcpy(req,  mi->mi_required, req_len - off);
                        KASSERTMSG(used < req_len - off, "reqlist grew!");
                        off += used + 1;
                        req += used + 1;
                } else
                        ms->ms_reqoffset = 0;
                if (mod->mod_kobj != NULL && stataddr) {
                        kobj_stat(mod->mod_kobj, &addr, &size);
                        ms->ms_addr = addr;
                        ms->ms_size = size;
                }
                ms->ms_class = mi->mi_class;
                ms->ms_refcnt = -1;
                KASSERT(mod->mod_source == MODULE_SOURCE_KERNEL);
                ms->ms_source = mod->mod_source;
                ms++;
        }
        kernconfig_unlock();

        /*
         * Now copyout our internal buffers back to userland
         */
        out_p = iov->iov_base;
        out_s = iov->iov_len;
        size = sizeof(ms_cnt);

        /* Copy out the count of modstat_t */
        if (out_s) {
                size = uimin(sizeof(ms_cnt), out_s);
                error = copyout(&ms_cnt, out_p, size);
                out_p += size;
                out_s -= size;
        }
        /* Copy out the modstat_t array */
        if (out_s && error == 0) {
                size = uimin(ms_len, out_s);
                error = copyout(mso, out_p, size);
                out_p += size;
                out_s -= size;
        }
        /* Copy out the "required" strings */
        if (out_s && error == 0) {
                size = uimin(req_len, out_s);
                error = copyout(reqo, out_p, size);
                out_p += size;
                out_s -= size;
        }
        kmem_free(mso, ms_len);
        kmem_free(reqo, req_len);

        /* Finally, update the userland copy of the iovec's length */
        if (error == 0) {
                iov->iov_len = ms_len + req_len + sizeof(ms_cnt);
                error = copyout(iov, arg, sizeof(*iov));
        }

        return error;
}

int
sys_modctl(struct lwp *l, const struct sys_modctl_args *uap,
           register_t *retval)
{
        /* {
                syscallarg(int)                cmd;
                syscallarg(void *)        arg;
        } */
        char buf[MAXMODNAME];
        struct iovec iov;
        modctl_load_t ml;
        int error;
        void *arg;
#ifdef MODULAR
        uintptr_t loadtype;
#endif

        arg = SCARG(uap, arg);

        switch (SCARG(uap, cmd)) {
        case MODCTL_LOAD:
                error = copyin(arg, &ml, sizeof(ml));
                if (error != 0)
                        break;
                error = handle_modctl_load(ml.ml_filename, ml.ml_flags,
                    ml.ml_props, ml.ml_propslen);
                break;

        case MODCTL_UNLOAD:
                error = copyinstr(arg, buf, sizeof(buf), NULL);
                if (error == 0) {
                        error = module_unload(buf);
                }
                break;

        case MODCTL_STAT:
                error = copyin(arg, &iov, sizeof(iov));
                if (error != 0) {
                        break;
                }
                error = handle_modctl_stat(&iov, arg);
                break;

        case MODCTL_EXISTS:
#ifndef MODULAR
                error = ENOSYS;
#else
                loadtype = (uintptr_t)arg;
                switch (loadtype) {        /* 0 = modload, 1 = autoload */
                case 0:                        /* FALLTHROUGH */
                case 1:
                        error = kauth_authorize_system(kauth_cred_get(),
                             KAUTH_SYSTEM_MODULE, 0,
                             (void *)(uintptr_t)MODCTL_LOAD,
                             (void *)loadtype, NULL);
                        break;
                default:
                        error = EINVAL;
                        break;
                }
#endif
                break;

        default:
                (void)module_autoload("compat_80", MODULE_CLASS_EXEC);
                MODULE_HOOK_CALL(compat_modstat_80_hook,
                    (SCARG(uap, cmd), &iov, arg), enosys(), error);
                if (error == ENOSYS)
                        error = EINVAL;
                break;
        }

        return error;
}



























































































































































































































































































































































































































































































































































































































































































































































































    3 


















































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
/*        $NetBSD: if_mue.c,v 1.82 2022/08/20 14:08:59 riastradh Exp $        */
/*        $OpenBSD: if_mue.c,v 1.3 2018/08/04 16:42:46 jsg Exp $        */

/*
 * Copyright (c) 2018 Kevin Lo <kevlo@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/* Driver for Microchip LAN7500/LAN7800 chipsets. */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_mue.c,v 1.82 2022/08/20 14:08:59 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#include "opt_inet.h"
#endif

#include <sys/param.h>

#include <dev/usb/usbnet.h>

#include <dev/usb/if_muereg.h>
#include <dev/usb/if_muevar.h>

#define MUE_PRINTF(un, fmt, args...)                                        \
        device_printf((un)->un_dev, "%s: " fmt, __func__, ##args);

#ifdef USB_DEBUG
int muedebug = 0;
#define DPRINTF(un, fmt, args...)                                        \
        do {                                                                \
                if (muedebug)                                                \
                        MUE_PRINTF(un, fmt, ##args);                        \
        } while (0 /* CONSTCOND */)
#else
#define DPRINTF(un, fmt, args...)        __nothing
#endif

/*
 * Various supported device vendors/products.
 */
struct mue_type {
        struct usb_devno        mue_dev;
        uint16_t                mue_flags;
#define LAN7500                0x0001        /* LAN7500 */
#define LAN7800                0x0002        /* LAN7800 */
#define LAN7801                0x0004        /* LAN7801 */
#define LAN7850                0x0008        /* LAN7850 */
};

static const struct mue_type mue_devs[] = {
        { { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_LAN7500 }, LAN7500 },
        { { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_LAN7505 }, LAN7500 },
        { { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_LAN7800 }, LAN7800 },
        { { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_LAN7801 }, LAN7801 },
        { { USB_VENDOR_SMSC, USB_PRODUCT_SMSC_LAN7850 }, LAN7850 }
};

#define MUE_LOOKUP(uaa)        ((const struct mue_type *)usb_lookup(mue_devs, \
    uaa->uaa_vendor, uaa->uaa_product))

#define MUE_ENADDR_LO(enaddr) \
    ((enaddr[3] << 24) | (enaddr[2] << 16) | (enaddr[1] << 8) | enaddr[0])
#define MUE_ENADDR_HI(enaddr) \
    ((enaddr[5] << 8) | enaddr[4])

static int        mue_match(device_t, cfdata_t, void *);
static void        mue_attach(device_t, device_t, void *);

static uint32_t        mue_csr_read(struct usbnet *, uint32_t);
static int        mue_csr_write(struct usbnet *, uint32_t, uint32_t);
static int        mue_wait_for_bits(struct usbnet *, uint32_t, uint32_t,
                    uint32_t, uint32_t);
static uint8_t        mue_eeprom_getbyte(struct usbnet *, int, uint8_t *);
static bool        mue_eeprom_present(struct usbnet *);
static void        mue_dataport_write(struct usbnet *, uint32_t, uint32_t,
                    uint32_t, uint32_t *);
static void        mue_init_ltm(struct usbnet *);
static int        mue_chip_init(struct usbnet *);
static void        mue_set_macaddr(struct usbnet *);
static int        mue_get_macaddr(struct usbnet *, prop_dictionary_t);
static int        mue_prepare_tso(struct usbnet *, struct mbuf *);
static void        mue_uno_mcast(struct ifnet *);
static void        mue_sethwcsum_locked(struct usbnet *);
static void        mue_setmtu_locked(struct usbnet *);
static void        mue_reset(struct usbnet *);

static void        mue_uno_stop(struct ifnet *, int);
static int        mue_uno_ioctl(struct ifnet *, u_long, void *);
static int        mue_uno_mii_read_reg(struct usbnet *, int, int, uint16_t *);
static int        mue_uno_mii_write_reg(struct usbnet *, int, int, uint16_t);
static void        mue_uno_mii_statchg(struct ifnet *);
static void        mue_uno_rx_loop(struct usbnet *, struct usbnet_chain *,
                                uint32_t);
static unsigned        mue_uno_tx_prepare(struct usbnet *, struct mbuf *,
                                   struct usbnet_chain *);
static int        mue_uno_init(struct ifnet *);

static const struct usbnet_ops mue_ops = {
        .uno_stop = mue_uno_stop,
        .uno_ioctl = mue_uno_ioctl,
        .uno_mcast = mue_uno_mcast,
        .uno_read_reg = mue_uno_mii_read_reg,
        .uno_write_reg = mue_uno_mii_write_reg,
        .uno_statchg = mue_uno_mii_statchg,
        .uno_tx_prepare = mue_uno_tx_prepare,
        .uno_rx_loop = mue_uno_rx_loop,
        .uno_init = mue_uno_init,
};

#define MUE_SETBIT(un, reg, x)        \
        mue_csr_write(un, reg, mue_csr_read(un, reg) | (x))

#define MUE_CLRBIT(un, reg, x)        \
        mue_csr_write(un, reg, mue_csr_read(un, reg) & ~(x))

#define MUE_WAIT_SET(un, reg, set, fail)        \
        mue_wait_for_bits(un, reg, set, ~0, fail)

#define MUE_WAIT_CLR(un, reg, clear, fail)        \
        mue_wait_for_bits(un, reg, 0, clear, fail)

#define ETHER_IS_VALID(addr) \
        (!ETHER_IS_MULTICAST(addr) && !ETHER_IS_ZERO(addr))

#define ETHER_IS_ZERO(addr) \
        (!(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]))

CFATTACH_DECL_NEW(mue, sizeof(struct usbnet), mue_match, mue_attach,
    usbnet_detach, usbnet_activate);

static uint32_t
mue_csr_read(struct usbnet *un, uint32_t reg)
{
        usb_device_request_t req;
        usbd_status err;
        uDWord val;

        if (usbnet_isdying(un))
                return 0;

        USETDW(val, 0);
        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = MUE_UR_READREG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 4);

        err = usbd_do_request(un->un_udev, &req, &val);
        if (err) {
                MUE_PRINTF(un, "reg = %#x: %s\n", reg, usbd_errstr(err));
                return 0;
        }

        return UGETDW(val);
}

static int
mue_csr_write(struct usbnet *un, uint32_t reg, uint32_t aval)
{
        usb_device_request_t req;
        usbd_status err;
        uDWord val;

        if (usbnet_isdying(un))
                return 0;

        USETDW(val, aval);
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = MUE_UR_WRITEREG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 4);

        err = usbd_do_request(un->un_udev, &req, &val);
        if (err) {
                MUE_PRINTF(un, "reg = %#x: %s\n", reg, usbd_errstr(err));
                return -1;
        }

        return 0;
}

static int
mue_wait_for_bits(struct usbnet *un, uint32_t reg,
    uint32_t set, uint32_t clear, uint32_t fail)
{
        uint32_t val;
        int ntries;

        for (ntries = 0; ntries < 1000; ntries++) {
                if (usbnet_isdying(un))
                        return 1;
                val = mue_csr_read(un, reg);
                if ((val & set) || !(val & clear))
                        return 0;
                if (val & fail)
                        return 1;
                usbd_delay_ms(un->un_udev, 1);
        }

        return 1;
}

static int
mue_uno_mii_read_reg(struct usbnet *un, int phy, int reg, uint16_t *val)
{
        uint32_t data;

        if (un->un_phyno != phy) {
                *val = 0;
                return EINVAL;
        }

        if (MUE_WAIT_CLR(un, MUE_MII_ACCESS, MUE_MII_ACCESS_BUSY, 0)) {
                MUE_PRINTF(un, "not ready\n");
                *val = 0;
                return EBUSY;
        }

        mue_csr_write(un, MUE_MII_ACCESS, MUE_MII_ACCESS_READ |
            MUE_MII_ACCESS_BUSY | MUE_MII_ACCESS_REGADDR(reg) |
            MUE_MII_ACCESS_PHYADDR(phy));

        if (MUE_WAIT_CLR(un, MUE_MII_ACCESS, MUE_MII_ACCESS_BUSY, 0)) {
                MUE_PRINTF(un, "timed out\n");
                *val = 0;
                return ETIMEDOUT;
        }

        data = mue_csr_read(un, MUE_MII_DATA);
        *val = data & 0xffff;

        return 0;
}

static int
mue_uno_mii_write_reg(struct usbnet *un, int phy, int reg, uint16_t val)
{

        if (un->un_phyno != phy)
                return EINVAL;

        if (MUE_WAIT_CLR(un, MUE_MII_ACCESS, MUE_MII_ACCESS_BUSY, 0)) {
                MUE_PRINTF(un, "not ready\n");
                return EBUSY;
        }

        mue_csr_write(un, MUE_MII_DATA, val);
        mue_csr_write(un, MUE_MII_ACCESS, MUE_MII_ACCESS_WRITE |
            MUE_MII_ACCESS_BUSY | MUE_MII_ACCESS_REGADDR(reg) |
            MUE_MII_ACCESS_PHYADDR(phy));

        if (MUE_WAIT_CLR(un, MUE_MII_ACCESS, MUE_MII_ACCESS_BUSY, 0)) {
                MUE_PRINTF(un, "timed out\n");
                return ETIMEDOUT;
        }

        return 0;
}

static void
mue_uno_mii_statchg(struct ifnet *ifp)
{
        struct usbnet * const un = ifp->if_softc;
        struct mii_data * const mii = usbnet_mii(un);
        uint32_t flow, threshold;

        if (usbnet_isdying(un))
                return;

        if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
            (IFM_ACTIVE | IFM_AVALID)) {
                switch (IFM_SUBTYPE(mii->mii_media_active)) {
                case IFM_10_T:
                case IFM_100_TX:
                case IFM_1000_T:
                        usbnet_set_link(un, true);
                        break;
                default:
                        break;
                }
        }

        /* Lost link, do nothing. */
        if (!usbnet_havelink(un)) {
                DPRINTF(un, "mii_media_status = %#x\n", mii->mii_media_status);
                return;
        }

        if (!(un->un_flags & LAN7500)) {
                if (un->un_udev->ud_speed == USB_SPEED_SUPER) {
                        if (IFM_SUBTYPE(mii->mii_media_active) == IFM_1000_T) {
                                /* Disable U2 and enable U1. */
                                MUE_CLRBIT(un, MUE_USB_CFG1,
                                    MUE_USB_CFG1_DEV_U2_INIT_EN);
                                MUE_SETBIT(un, MUE_USB_CFG1,
                                    MUE_USB_CFG1_DEV_U1_INIT_EN);
                        } else {
                                /* Enable U1 and U2. */
                                MUE_SETBIT(un, MUE_USB_CFG1,
                                    MUE_USB_CFG1_DEV_U1_INIT_EN |
                                    MUE_USB_CFG1_DEV_U2_INIT_EN);
                        }
                }
        }

        flow = 0;
        /* XXX Linux does not check IFM_FDX flag for 7800. */
        if (IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) {
                if (IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_TXPAUSE)
                        flow |= MUE_FLOW_TX_FCEN | MUE_FLOW_PAUSE_TIME;
                if (IFM_OPTIONS(mii->mii_media_active) & IFM_ETH_RXPAUSE)
                        flow |= MUE_FLOW_RX_FCEN;
        }

        /* XXX Magic numbers taken from Linux driver. */
        if (un->un_flags & LAN7500)
                threshold = 0x820;
        else
                switch (un->un_udev->ud_speed) {
                case USB_SPEED_SUPER:
                        threshold = 0x817;
                        break;
                case USB_SPEED_HIGH:
                        threshold = 0x211;
                        break;
                default:
                        threshold = 0;
                        break;
                }

        /* Threshold value should be set before enabling flow. */
        mue_csr_write(un, (un->un_flags & LAN7500) ?
            MUE_7500_FCT_FLOW : MUE_7800_FCT_FLOW, threshold);
        mue_csr_write(un, MUE_FLOW, flow);

        DPRINTF(un, "done\n");
}

static uint8_t
mue_eeprom_getbyte(struct usbnet *un, int off, uint8_t *dest)
{
        uint32_t val;

        if (MUE_WAIT_CLR(un, MUE_E2P_CMD, MUE_E2P_CMD_BUSY, 0)) {
                MUE_PRINTF(un, "not ready\n");
                return ETIMEDOUT;
        }

        KASSERT((off & ~MUE_E2P_CMD_ADDR_MASK) == 0);
        mue_csr_write(un, MUE_E2P_CMD, MUE_E2P_CMD_READ | MUE_E2P_CMD_BUSY |
            off);

        if (MUE_WAIT_CLR(un, MUE_E2P_CMD, MUE_E2P_CMD_BUSY,
            MUE_E2P_CMD_TIMEOUT)) {
                MUE_PRINTF(un, "timed out\n");
                return ETIMEDOUT;
        }

        val = mue_csr_read(un, MUE_E2P_DATA);
        *dest = val & 0xff;

        return 0;
}

static int
mue_read_eeprom(struct usbnet *un, uint8_t *dest, int off, int cnt)
{
        uint32_t val = 0; /* XXX gcc */
        uint8_t byte;
        int i, err = 0;

        /* 
         * EEPROM pins are muxed with the LED function on LAN7800 device.
         */
        if (un->un_flags & LAN7800) {
                val = mue_csr_read(un, MUE_HW_CFG);
                mue_csr_write(un, MUE_HW_CFG,
                    val & ~(MUE_HW_CFG_LED0_EN | MUE_HW_CFG_LED1_EN));
        }

        for (i = 0; i < cnt; i++) {
                err = mue_eeprom_getbyte(un, off + i, &byte);
                if (err)
                        break;
                *(dest + i) = byte;
        }

        if (un->un_flags & LAN7800)
                mue_csr_write(un, MUE_HW_CFG, val);

        return err ? 1 : 0;
}

static bool
mue_eeprom_present(struct usbnet *un)
{
        uint32_t val;
        uint8_t sig;
        int ret;

        if (un->un_flags & LAN7500) {
                val = mue_csr_read(un, MUE_E2P_CMD);
                return val & MUE_E2P_CMD_LOADED;
        } else {
                ret = mue_read_eeprom(un, &sig, MUE_E2P_IND_OFFSET, 1);
                return (ret == 0) && (sig == MUE_E2P_IND);
        }
}

static int
mue_read_otp_raw(struct usbnet *un, uint8_t *dest, int off, int cnt)
{
        uint32_t val;
        int i, err;

        val = mue_csr_read(un, MUE_OTP_PWR_DN);

        /* Checking if bit is set. */
        if (val & MUE_OTP_PWR_DN_PWRDN_N) {
                /* Clear it, then wait for it to be cleared. */
                mue_csr_write(un, MUE_OTP_PWR_DN, 0);
                err = MUE_WAIT_CLR(un, MUE_OTP_PWR_DN, MUE_OTP_PWR_DN_PWRDN_N,
                    0);
                if (err) {
                        MUE_PRINTF(un, "not ready\n");
                        return 1;
                }
        }

        /* Start reading the bytes, one at a time. */
        for (i = 0; i < cnt; i++) {
                mue_csr_write(un, MUE_OTP_ADDR1,
                    ((off + i) >> 8) & MUE_OTP_ADDR1_MASK);
                mue_csr_write(un, MUE_OTP_ADDR2,
                    ((off + i) & MUE_OTP_ADDR2_MASK));
                mue_csr_write(un, MUE_OTP_FUNC_CMD, MUE_OTP_FUNC_CMD_READ);
                mue_csr_write(un, MUE_OTP_CMD_GO, MUE_OTP_CMD_GO_GO);

                err = MUE_WAIT_CLR(un, MUE_OTP_STATUS, MUE_OTP_STATUS_BUSY, 0);
                if (err) {
                        MUE_PRINTF(un, "timed out\n");
                        return 1;
                }
                val = mue_csr_read(un, MUE_OTP_RD_DATA);
                *(dest + i) = (uint8_t)(val & 0xff);
        }

        return 0;
}

static int
mue_read_otp(struct usbnet *un, uint8_t *dest, int off, int cnt)
{
        uint8_t sig;
        int err;

        if (un->un_flags & LAN7500)
                return 1;

        err = mue_read_otp_raw(un, &sig, MUE_OTP_IND_OFFSET, 1);
        if (err)
                return 1;
        switch (sig) {
        case MUE_OTP_IND_1:
                break;
        case MUE_OTP_IND_2:
                off += 0x100;
                break;
        default:
                DPRINTF(un, "OTP not found\n");
                return 1;
        }
        err = mue_read_otp_raw(un, dest, off, cnt);
        return err;
}

static void
mue_dataport_write(struct usbnet *un, uint32_t sel, uint32_t addr,
    uint32_t cnt, uint32_t *data)
{
        uint32_t i;

        if (MUE_WAIT_SET(un, MUE_DP_SEL, MUE_DP_SEL_DPRDY, 0)) {
                MUE_PRINTF(un, "not ready\n");
                return;
        }

        mue_csr_write(un, MUE_DP_SEL,
            (mue_csr_read(un, MUE_DP_SEL) & ~MUE_DP_SEL_RSEL_MASK) | sel);

        for (i = 0; i < cnt; i++) {
                mue_csr_write(un, MUE_DP_ADDR, addr + i);
                mue_csr_write(un, MUE_DP_DATA, data[i]);
                mue_csr_write(un, MUE_DP_CMD, MUE_DP_CMD_WRITE);
                if (MUE_WAIT_SET(un, MUE_DP_SEL, MUE_DP_SEL_DPRDY, 0)) {
                        MUE_PRINTF(un, "timed out\n");
                        return;
                }
        }
}

static void
mue_init_ltm(struct usbnet *un)
{
        uint32_t idx[MUE_NUM_LTM_INDEX] = { 0, 0, 0, 0, 0, 0 };
        uint8_t temp[2];
        size_t i;

        if (mue_csr_read(un, MUE_USB_CFG1) & MUE_USB_CFG1_LTM_ENABLE) {
                if (mue_eeprom_present(un) &&
                    (mue_read_eeprom(un, temp, MUE_E2P_LTM_OFFSET, 2) == 0)) {
                        if (temp[0] != sizeof(idx)) {
                                DPRINTF(un, "EEPROM: unexpected size\n");
                                goto done;
                        }
                        if (mue_read_eeprom(un, (uint8_t *)idx, temp[1] << 1,
                                sizeof(idx))) {
                                DPRINTF(un, "EEPROM: failed to read\n");
                                goto done;
                        }
                        DPRINTF(un, "success\n");
                } else if (mue_read_otp(un, temp, MUE_E2P_LTM_OFFSET, 2) == 0) {
                        if (temp[0] != sizeof(idx)) {
                                DPRINTF(un, "OTP: unexpected size\n");
                                goto done;
                        }
                        if (mue_read_otp(un, (uint8_t *)idx, temp[1] << 1,
                                sizeof(idx))) {
                                DPRINTF(un, "OTP: failed to read\n");
                                goto done;
                        }
                        DPRINTF(un, "success\n");
                } else
                        DPRINTF(un, "nothing to do\n");
        } else
                DPRINTF(un, "nothing to do\n");
done:
        for (i = 0; i < __arraycount(idx); i++)
                mue_csr_write(un, MUE_LTM_INDEX(i), idx[i]);
}

static int
mue_chip_init(struct usbnet *un)
{
        uint32_t val;

        if ((un->un_flags & LAN7500) &&
            MUE_WAIT_SET(un, MUE_PMT_CTL, MUE_PMT_CTL_READY, 0)) {
                MUE_PRINTF(un, "not ready\n");
                        return ETIMEDOUT;
        }

        MUE_SETBIT(un, MUE_HW_CFG, MUE_HW_CFG_LRST);
        if (MUE_WAIT_CLR(un, MUE_HW_CFG, MUE_HW_CFG_LRST, 0)) {
                MUE_PRINTF(un, "timed out\n");
                return ETIMEDOUT;
        }

        /* Respond to the IN token with a NAK. */
        if (un->un_flags & LAN7500)
                MUE_SETBIT(un, MUE_HW_CFG, MUE_HW_CFG_BIR);
        else
                MUE_SETBIT(un, MUE_USB_CFG0, MUE_USB_CFG0_BIR);

        if (un->un_flags & LAN7500) {
                if (un->un_udev->ud_speed == USB_SPEED_HIGH)
                        val = MUE_7500_HS_RX_BUFSIZE /
                            MUE_HS_USB_PKT_SIZE;
                else
                        val = MUE_7500_FS_RX_BUFSIZE /
                            MUE_FS_USB_PKT_SIZE;
                mue_csr_write(un, MUE_7500_BURST_CAP, val);
                mue_csr_write(un, MUE_7500_BULKIN_DELAY,
                    MUE_7500_DEFAULT_BULKIN_DELAY);

                MUE_SETBIT(un, MUE_HW_CFG, MUE_HW_CFG_BCE | MUE_HW_CFG_MEF);

                /* Set FIFO sizes. */
                val = (MUE_7500_MAX_RX_FIFO_SIZE - 512) / 512;
                mue_csr_write(un, MUE_7500_FCT_RX_FIFO_END, val);
                val = (MUE_7500_MAX_TX_FIFO_SIZE - 512) / 512;
                mue_csr_write(un, MUE_7500_FCT_TX_FIFO_END, val);
        } else {
                /* Init LTM. */
                mue_init_ltm(un);

                val = MUE_7800_RX_BUFSIZE;
                switch (un->un_udev->ud_speed) {
                case USB_SPEED_SUPER:
                        val /= MUE_SS_USB_PKT_SIZE;
                        break;
                case USB_SPEED_HIGH:
                        val /= MUE_HS_USB_PKT_SIZE;
                        break;
                default:
                        val /= MUE_FS_USB_PKT_SIZE;
                        break;
                }
                mue_csr_write(un, MUE_7800_BURST_CAP, val);
                mue_csr_write(un, MUE_7800_BULKIN_DELAY,
                    MUE_7800_DEFAULT_BULKIN_DELAY);

                MUE_SETBIT(un, MUE_HW_CFG, MUE_HW_CFG_MEF);
                MUE_SETBIT(un, MUE_USB_CFG0, MUE_USB_CFG0_BCE);

                /*
                 * Set FCL's RX and TX FIFO sizes: according to data sheet this
                 * is already the default value. But we initialize it to the
                 * same value anyways, as that's what the Linux driver does.
                 */
                val = (MUE_7800_MAX_RX_FIFO_SIZE - 512) / 512;
                mue_csr_write(un, MUE_7800_FCT_RX_FIFO_END, val);
                val = (MUE_7800_MAX_TX_FIFO_SIZE - 512) / 512;
                mue_csr_write(un, MUE_7800_FCT_TX_FIFO_END, val);
        }

        /* Enabling interrupts. */
        mue_csr_write(un, MUE_INT_STATUS, ~0);

        mue_csr_write(un, (un->un_flags & LAN7500) ?
            MUE_7500_FCT_FLOW : MUE_7800_FCT_FLOW, 0);
        mue_csr_write(un, MUE_FLOW, 0);

        /* Reset PHY. */
        MUE_SETBIT(un, MUE_PMT_CTL, MUE_PMT_CTL_PHY_RST);
        if (MUE_WAIT_CLR(un, MUE_PMT_CTL, MUE_PMT_CTL_PHY_RST, 0)) {
                MUE_PRINTF(un, "PHY not ready\n");
                return ETIMEDOUT;
        }

        /* LAN7801 only has RGMII mode. */
        if (un->un_flags & LAN7801)
                MUE_CLRBIT(un, MUE_MAC_CR, MUE_MAC_CR_GMII_EN);

        if ((un->un_flags & (LAN7500 | LAN7800)) ||
            !mue_eeprom_present(un)) {
                /* Allow MAC to detect speed and duplex from PHY. */
                MUE_SETBIT(un, MUE_MAC_CR, MUE_MAC_CR_AUTO_SPEED |
                    MUE_MAC_CR_AUTO_DUPLEX);
        }

        MUE_SETBIT(un, MUE_MAC_TX, MUE_MAC_TX_TXEN);
        MUE_SETBIT(un, (un->un_flags & LAN7500) ?
            MUE_7500_FCT_TX_CTL : MUE_7800_FCT_TX_CTL, MUE_FCT_TX_CTL_EN);

        MUE_SETBIT(un, (un->un_flags & LAN7500) ?
            MUE_7500_FCT_RX_CTL : MUE_7800_FCT_RX_CTL, MUE_FCT_RX_CTL_EN);

        /* Set default GPIO/LED settings only if no EEPROM is detected. */
        if ((un->un_flags & LAN7500) && !mue_eeprom_present(un)) {
                MUE_CLRBIT(un, MUE_LED_CFG, MUE_LED_CFG_LED10_FUN_SEL);
                MUE_SETBIT(un, MUE_LED_CFG,
                    MUE_LED_CFG_LEDGPIO_EN | MUE_LED_CFG_LED2_FUN_SEL);
        }

        /* XXX We assume two LEDs at least when EEPROM is missing. */
        if (un->un_flags & LAN7800 &&
            !mue_eeprom_present(un))
                MUE_SETBIT(un, MUE_HW_CFG,
                    MUE_HW_CFG_LED0_EN | MUE_HW_CFG_LED1_EN);

        return 0;
}

static void
mue_set_macaddr(struct usbnet *un)
{
        struct ifnet * const ifp = usbnet_ifp(un);
        const uint8_t *enaddr = CLLADDR(ifp->if_sadl);
        uint32_t lo, hi;

        lo = MUE_ENADDR_LO(enaddr);
        hi = MUE_ENADDR_HI(enaddr);

        mue_csr_write(un, MUE_RX_ADDRL, lo);
        mue_csr_write(un, MUE_RX_ADDRH, hi);
}

static int
mue_get_macaddr(struct usbnet *un, prop_dictionary_t dict)
{
        prop_data_t eaprop;
        uint32_t low, high;

        if (!(un->un_flags & LAN7500)) {
                low  = mue_csr_read(un, MUE_RX_ADDRL);
                high = mue_csr_read(un, MUE_RX_ADDRH);
                un->un_eaddr[5] = (uint8_t)((high >> 8) & 0xff);
                un->un_eaddr[4] = (uint8_t)((high) & 0xff);
                un->un_eaddr[3] = (uint8_t)((low >> 24) & 0xff);
                un->un_eaddr[2] = (uint8_t)((low >> 16) & 0xff);
                un->un_eaddr[1] = (uint8_t)((low >> 8) & 0xff);
                un->un_eaddr[0] = (uint8_t)((low) & 0xff);
                if (ETHER_IS_VALID(un->un_eaddr))
                        return 0;
                else
                        DPRINTF(un, "registers: %s\n",
                            ether_sprintf(un->un_eaddr));
        }

        if (mue_eeprom_present(un) && !mue_read_eeprom(un, un->un_eaddr,
            MUE_E2P_MAC_OFFSET, ETHER_ADDR_LEN)) {
                if (ETHER_IS_VALID(un->un_eaddr))
                        return 0;
                else
                        DPRINTF(un, "EEPROM: %s\n",
                            ether_sprintf(un->un_eaddr));
        }

        if (mue_read_otp(un, un->un_eaddr, MUE_OTP_MAC_OFFSET,
            ETHER_ADDR_LEN) == 0) {
                if (ETHER_IS_VALID(un->un_eaddr))
                        return 0;
                else
                        DPRINTF(un, "OTP: %s\n",
                            ether_sprintf(un->un_eaddr));
        }

        /*
         * Other MD methods. This should be tried only if other methods fail.
         * Otherwise, MAC address for internal device can be assinged to
         * external devices on Raspberry Pi, for example.
         */
        eaprop = prop_dictionary_get(dict, "mac-address");
        if (eaprop != NULL) {
                KASSERT(prop_object_type(eaprop) == PROP_TYPE_DATA);
                KASSERT(prop_data_size(eaprop) == ETHER_ADDR_LEN);
                memcpy(un->un_eaddr, prop_data_value(eaprop),
                    ETHER_ADDR_LEN);
                if (ETHER_IS_VALID(un->un_eaddr))
                        return 0;
                else
                        DPRINTF(un, "prop_dictionary_get: %s\n",
                            ether_sprintf(un->un_eaddr));
        }

        return 1;
}


/* 
 * Probe for a Microchip chip.
 */
static int
mue_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return (MUE_LOOKUP(uaa) != NULL) ?  UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
mue_attach(device_t parent, device_t self, void *aux)
{
        USBNET_MII_DECL_DEFAULT(unm);
        struct usbnet * const un = device_private(self);
        prop_dictionary_t dict = device_properties(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        usbd_status err;
        const char *descr;
        uint32_t id_rev;
        uint8_t i;
        unsigned rx_list_cnt, tx_list_cnt;
        unsigned rx_bufsz;

        aprint_naive("\n");
        aprint_normal("\n");
        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        un->un_dev = self;
        un->un_udev = dev;
        un->un_sc = un;
        un->un_ops = &mue_ops;
        un->un_rx_xfer_flags = USBD_SHORT_XFER_OK;
        un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER;

#define MUE_CONFIG_NO        1
        err = usbd_set_config_no(dev, MUE_CONFIG_NO, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration: %s\n",
                    usbd_errstr(err));
                return;
        }

#define MUE_IFACE_IDX        0
        err = usbd_device2interface_handle(dev, MUE_IFACE_IDX, &un->un_iface);
        if (err) {
                aprint_error_dev(self, "failed to get interface handle: %s\n",
                    usbd_errstr(err));
                return;
        }

        un->un_flags = MUE_LOOKUP(uaa)->mue_flags;

        /* Decide on what our bufsize will be. */
        if (un->un_flags & LAN7500) {
                rx_bufsz = (un->un_udev->ud_speed == USB_SPEED_HIGH) ?
                    MUE_7500_HS_RX_BUFSIZE : MUE_7500_FS_RX_BUFSIZE;
                rx_list_cnt = 1;
                tx_list_cnt = 1;
        } else {
                rx_bufsz = MUE_7800_RX_BUFSIZE;
                rx_list_cnt = MUE_RX_LIST_CNT;
                tx_list_cnt = MUE_TX_LIST_CNT;
        }

        un->un_rx_list_cnt = rx_list_cnt;
        un->un_tx_list_cnt = tx_list_cnt;
        un->un_rx_bufsz = rx_bufsz;
        un->un_tx_bufsz = MUE_TX_BUFSIZE;

        /* Find endpoints. */
        id = usbd_get_interface_descriptor(un->un_iface);
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(un->un_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self, "failed to get ep %hhd\n", i);
                        return;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        un->un_ed[USBNET_ENDPT_INTR] = ed->bEndpointAddress;
                }
        }
        if (un->un_ed[USBNET_ENDPT_RX] == 0 ||
            un->un_ed[USBNET_ENDPT_TX] == 0 ||
            un->un_ed[USBNET_ENDPT_INTR] == 0) {
                aprint_error_dev(self, "failed to find endpoints\n");
                return;
        }

        /* Set these up now for mue_cmd().  */
        usbnet_attach(un);

        un->un_phyno = 1;

        if (mue_chip_init(un)) {
                aprint_error_dev(self, "failed to initialize chip\n");
                return;
        }

        /* A Microchip chip was detected.  Inform the world. */
        id_rev = mue_csr_read(un, MUE_ID_REV);
        descr = (un->un_flags & LAN7500) ? "LAN7500" : "LAN7800";
        aprint_normal_dev(self, "%s id %#x rev %#x\n", descr,
                (unsigned)__SHIFTOUT(id_rev, MUE_ID_REV_ID),
                (unsigned)__SHIFTOUT(id_rev, MUE_ID_REV_REV));

        if (mue_get_macaddr(un, dict)) {
                aprint_error_dev(self, "failed to read MAC address\n");
                return;
        }

        struct ifnet *ifp = usbnet_ifp(un);
        ifp->if_capabilities = IFCAP_TSOv4 | IFCAP_TSOv6 |
            IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_IPv4_Rx |
            IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv4_Rx |
            IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv4_Rx |
            IFCAP_CSUM_TCPv6_Tx | IFCAP_CSUM_TCPv6_Rx |
            IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_UDPv6_Rx;

        struct ethercom *ec = usbnet_ec(un);
        ec->ec_capabilities = ETHERCAP_VLAN_MTU;
#if 0 /* XXX not yet */
        ec->ec_capabilities = ETHERCAP_VLAN_MTU | ETHERCAP_JUMBO_MTU;
#endif

        usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST,
            0, &unm);
}

static unsigned
mue_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c)
{
        struct ifnet * const ifp = usbnet_ifp(un);
        struct mue_txbuf_hdr hdr;
        uint32_t tx_cmd_a, tx_cmd_b;
        int csum, len, rv;
        bool tso, ipe, tpe;

        if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz - sizeof(hdr))
                return 0;

        csum = m->m_pkthdr.csum_flags;
        tso = csum & (M_CSUM_TSOv4 | M_CSUM_TSOv6);
        ipe = csum & M_CSUM_IPv4;
        tpe = csum & (M_CSUM_TCPv4 | M_CSUM_UDPv4 |
                      M_CSUM_TCPv6 | M_CSUM_UDPv6);

        len = m->m_pkthdr.len;
        if (__predict_false((!tso && len > (int)MUE_FRAME_LEN(ifp->if_mtu)) ||
                            ( tso && len > MUE_TSO_FRAME_LEN))) {
                MUE_PRINTF(un, "packet length %d\n too long", len);
                return 0;
        }

        KASSERT((len & ~MUE_TX_CMD_A_LEN_MASK) == 0);
        tx_cmd_a = len | MUE_TX_CMD_A_FCS;

        if (tso) {
                tx_cmd_a |= MUE_TX_CMD_A_LSO;
                if (__predict_true(m->m_pkthdr.segsz > MUE_TX_MSS_MIN))
                        tx_cmd_b = m->m_pkthdr.segsz;
                else
                        tx_cmd_b = MUE_TX_MSS_MIN;
                tx_cmd_b <<= MUE_TX_CMD_B_MSS_SHIFT;
                KASSERT((tx_cmd_b & ~MUE_TX_CMD_B_MSS_MASK) == 0);
                rv = mue_prepare_tso(un, m);
                if (__predict_false(rv))
                        return 0;
        } else {
                if (ipe)
                        tx_cmd_a |= MUE_TX_CMD_A_IPE;
                if (tpe)
                        tx_cmd_a |= MUE_TX_CMD_A_TPE;
                tx_cmd_b = 0;
        }

        hdr.tx_cmd_a = htole32(tx_cmd_a);
        hdr.tx_cmd_b = htole32(tx_cmd_b);

        memcpy(c->unc_buf, &hdr, sizeof(hdr));
        m_copydata(m, 0, len, c->unc_buf + sizeof(hdr));

        return len + sizeof(hdr);
}

/*
 * L3 length field should be cleared.
 */
static int
mue_prepare_tso(struct usbnet *un, struct mbuf *m)
{
        struct ether_header *eh;
        struct ip *ip;
        struct ip6_hdr *ip6;
        uint16_t type, len = 0;
        int off;

        if (__predict_true(m->m_len >= (int)sizeof(*eh))) {
                eh = mtod(m, struct ether_header *);
                type = eh->ether_type;
        } else
                m_copydata(m, offsetof(struct ether_header, ether_type),
                    sizeof(type), &type);
        switch (type = htons(type)) {
        case ETHERTYPE_IP:
        case ETHERTYPE_IPV6:
                off = ETHER_HDR_LEN;
                break;
        case ETHERTYPE_VLAN:
                off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
                break;
        default:
                return EINVAL;
        }

        if (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) {
                if (__predict_true(m->m_len >= off + (int)sizeof(*ip))) {
                        ip = (void *)(mtod(m, char *) + off);
                        ip->ip_len = 0;
                } else
                        m_copyback(m, off + offsetof(struct ip, ip_len),
                            sizeof(len), &len);
        } else {
                if (__predict_true(m->m_len >= off + (int)sizeof(*ip6))) {
                        ip6 = (void *)(mtod(m, char *) + off);
                        ip6->ip6_plen = 0;
                } else
                        m_copyback(m, off + offsetof(struct ip6_hdr, ip6_plen),
                            sizeof(len), &len);
        }
        return 0;
}

static void
mue_uno_mcast(struct ifnet *ifp)
{
        struct usbnet *un = ifp->if_softc;
        struct ethercom *ec = usbnet_ec(un);
        const uint8_t *enaddr = CLLADDR(ifp->if_sadl);
        struct ether_multi *enm;
        struct ether_multistep step;
        uint32_t pfiltbl[MUE_NUM_ADDR_FILTX][2];
        uint32_t hashtbl[MUE_DP_SEL_VHF_HASH_LEN];
        uint32_t reg, rxfilt, h, hireg, loreg;
        size_t i;

        if (usbnet_isdying(un))
                return;

        /* Clear perfect filter and hash tables. */
        memset(pfiltbl, 0, sizeof(pfiltbl));
        memset(hashtbl, 0, sizeof(hashtbl));

        reg = (un->un_flags & LAN7500) ? MUE_7500_RFE_CTL : MUE_7800_RFE_CTL;
        rxfilt = mue_csr_read(un, reg);
        rxfilt &= ~(MUE_RFE_CTL_PERFECT | MUE_RFE_CTL_MULTICAST_HASH |
            MUE_RFE_CTL_UNICAST | MUE_RFE_CTL_MULTICAST);

        /* Always accept broadcast frames. */
        rxfilt |= MUE_RFE_CTL_BROADCAST;

        ETHER_LOCK(ec);
        if (usbnet_ispromisc(un)) {
                rxfilt |= MUE_RFE_CTL_UNICAST;
allmulti:        rxfilt |= MUE_RFE_CTL_MULTICAST;
                ec->ec_flags |= ETHER_F_ALLMULTI;
                if (usbnet_ispromisc(un))
                        DPRINTF(un, "promisc\n");
                else
                        DPRINTF(un, "allmulti\n");
        } else {
                /* Now program new ones. */
                pfiltbl[0][0] = MUE_ENADDR_HI(enaddr) | MUE_ADDR_FILTX_VALID;
                pfiltbl[0][1] = MUE_ENADDR_LO(enaddr);
                i = 1;
                ETHER_FIRST_MULTI(step, ec, enm);
                while (enm != NULL) {
                        if (memcmp(enm->enm_addrlo, enm->enm_addrhi,
                            ETHER_ADDR_LEN)) {
                                memset(pfiltbl, 0, sizeof(pfiltbl));
                                memset(hashtbl, 0, sizeof(hashtbl));
                                rxfilt &= ~MUE_RFE_CTL_MULTICAST_HASH;
                                goto allmulti;
                        }
                        if (i < MUE_NUM_ADDR_FILTX) {
                                /* Use perfect address table if possible. */
                                pfiltbl[i][0] = MUE_ENADDR_HI(enm->enm_addrlo) |
                                    MUE_ADDR_FILTX_VALID;
                                pfiltbl[i][1] = MUE_ENADDR_LO(enm->enm_addrlo);
                        } else {
                                /* Otherwise, use hash table. */
                                rxfilt |= MUE_RFE_CTL_MULTICAST_HASH;
                                h = (ether_crc32_be(enm->enm_addrlo,
                                    ETHER_ADDR_LEN) >> 23) & 0x1ff;
                                hashtbl[h / 32] |= 1 << (h % 32);
                        }
                        i++;
                        ETHER_NEXT_MULTI(step, enm);
                }
                ec->ec_flags &= ~ETHER_F_ALLMULTI;
                rxfilt |= MUE_RFE_CTL_PERFECT;
                if (rxfilt & MUE_RFE_CTL_MULTICAST_HASH)
                        DPRINTF(un, "perfect filter and hash tables\n");
                else
                        DPRINTF(un, "perfect filter\n");
        }
        ETHER_UNLOCK(ec);

        for (i = 0; i < MUE_NUM_ADDR_FILTX; i++) {
                hireg = (un->un_flags & LAN7500) ?
                    MUE_7500_ADDR_FILTX(i) : MUE_7800_ADDR_FILTX(i);
                loreg = hireg + 4;
                mue_csr_write(un, hireg, 0);
                mue_csr_write(un, loreg, pfiltbl[i][1]);
                mue_csr_write(un, hireg, pfiltbl[i][0]);
        }

        mue_dataport_write(un, MUE_DP_SEL_VHF, MUE_DP_SEL_VHF_VLAN_LEN,
            MUE_DP_SEL_VHF_HASH_LEN, hashtbl);

        mue_csr_write(un, reg, rxfilt);
}

static void
mue_sethwcsum_locked(struct usbnet *un)
{
        struct ifnet * const ifp = usbnet_ifp(un);
        uint32_t reg, val;

        KASSERT(IFNET_LOCKED(ifp));

        reg = (un->un_flags & LAN7500) ? MUE_7500_RFE_CTL : MUE_7800_RFE_CTL;
        val = mue_csr_read(un, reg);

        if (ifp->if_capenable & IFCAP_CSUM_IPv4_Rx) {
                DPRINTF(un, "RX IPv4 hwcsum enabled\n");
                val |= MUE_RFE_CTL_IP_COE;
        } else {
                DPRINTF(un, "RX IPv4 hwcsum disabled\n");
                val &= ~MUE_RFE_CTL_IP_COE;
        }

        if (ifp->if_capenable &
            (IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx |
             IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_UDPv6_Rx)) {
                DPRINTF(un, "RX L4 hwcsum enabled\n");
                val |= MUE_RFE_CTL_TCPUDP_COE;
        } else {
                DPRINTF(un, "RX L4 hwcsum disabled\n");
                val &= ~MUE_RFE_CTL_TCPUDP_COE;
        }

        val &= ~MUE_RFE_CTL_VLAN_FILTER;

        mue_csr_write(un, reg, val);
}

static void
mue_setmtu_locked(struct usbnet *un)
{
        struct ifnet * const ifp = usbnet_ifp(un);
        uint32_t val;

        KASSERT(IFNET_LOCKED(ifp));

        /* Set the maximum frame size. */
        MUE_CLRBIT(un, MUE_MAC_RX, MUE_MAC_RX_RXEN);
        val = mue_csr_read(un, MUE_MAC_RX);
        val &= ~MUE_MAC_RX_MAX_SIZE_MASK;
        val |= MUE_MAC_RX_MAX_LEN(MUE_FRAME_LEN(ifp->if_mtu));
        mue_csr_write(un, MUE_MAC_RX, val);
        MUE_SETBIT(un, MUE_MAC_RX, MUE_MAC_RX_RXEN);
}

static void
mue_uno_rx_loop(struct usbnet *un, struct usbnet_chain *c, uint32_t total_len)
{
        struct ifnet * const ifp = usbnet_ifp(un);
        struct mue_rxbuf_hdr *hdrp;
        uint32_t rx_cmd_a;
        uint16_t pktlen;
        int csum;
        uint8_t *buf = c->unc_buf;
        bool v6;

        KASSERTMSG(total_len <= un->un_rx_bufsz, "%u vs %u",
            total_len, un->un_rx_bufsz);

        do {
                if (__predict_false(total_len < sizeof(*hdrp))) {
                        MUE_PRINTF(un, "packet length %u too short\n", total_len);
                        if_statinc(ifp, if_ierrors);
                        return;
                }

                hdrp = (struct mue_rxbuf_hdr *)buf;
                rx_cmd_a = le32toh(hdrp->rx_cmd_a);

                if (__predict_false(rx_cmd_a & MUE_RX_CMD_A_ERRORS)) {
                        /*
                         * We cannot use MUE_RX_CMD_A_RED bit here;
                         * it is turned on in the cases of L3/L4
                         * checksum errors which we handle below.
                         */
                        MUE_PRINTF(un, "rx_cmd_a: %#x\n", rx_cmd_a);
                        if_statinc(ifp, if_ierrors);
                        return;
                }

                pktlen = (uint16_t)(rx_cmd_a & MUE_RX_CMD_A_LEN_MASK);
                if (un->un_flags & LAN7500)
                        pktlen -= 2;

                if (__predict_false(pktlen < ETHER_HDR_LEN + ETHER_CRC_LEN ||
                    pktlen > MCLBYTES - ETHER_ALIGN || /* XXX */
                    pktlen + sizeof(*hdrp) > total_len)) {
                        MUE_PRINTF(un, "invalid packet length %d\n", pktlen);
                        if_statinc(ifp, if_ierrors);
                        return;
                }

                if (__predict_false(rx_cmd_a & MUE_RX_CMD_A_ICSM)) {
                        csum = 0;
                } else {
                        v6 = rx_cmd_a & MUE_RX_CMD_A_IPV;
                        switch (rx_cmd_a & MUE_RX_CMD_A_PID) {
                        case MUE_RX_CMD_A_PID_TCP:
                                csum = v6 ?
                                    M_CSUM_TCPv6 : M_CSUM_IPv4 | M_CSUM_TCPv4;
                                break;
                        case MUE_RX_CMD_A_PID_UDP:
                                csum = v6 ?
                                    M_CSUM_UDPv6 : M_CSUM_IPv4 | M_CSUM_UDPv4;
                                break;
                        case MUE_RX_CMD_A_PID_IP:
                                csum = v6 ? 0 : M_CSUM_IPv4;
                                break;
                        default:
                                csum = 0;
                                break;
                        }
                        csum &= ifp->if_csum_flags_rx;
                        if (__predict_false((csum & M_CSUM_IPv4) &&
                            (rx_cmd_a & MUE_RX_CMD_A_ICE)))
                                csum |= M_CSUM_IPv4_BAD;
                        if (__predict_false((csum & ~M_CSUM_IPv4) &&
                            (rx_cmd_a & MUE_RX_CMD_A_TCE)))
                                csum |= M_CSUM_TCP_UDP_BAD;
                }

                usbnet_enqueue(un, buf + sizeof(*hdrp), pktlen, csum,
                               0, M_HASFCS);

                /* Attention: sizeof(hdr) = 10 */
                pktlen = roundup(pktlen + sizeof(*hdrp), 4);
                if (pktlen > total_len)
                        pktlen = total_len;
                total_len -= pktlen;
                buf += pktlen;
        } while (total_len > 0);
}

static int
mue_uno_init(struct ifnet *ifp)
{
        struct usbnet * const un = ifp->if_softc;

        mue_reset(un);

        /* Set MAC address. */
        mue_set_macaddr(un);

        /* TCP/UDP checksum offload engines. */
        mue_sethwcsum_locked(un);

        /* Set MTU. */
        mue_setmtu_locked(un);

        return 0;
}

static int
mue_uno_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
        struct usbnet * const un = ifp->if_softc;

        switch (cmd) {
        case SIOCSIFCAP:
                mue_sethwcsum_locked(un);
                break;
        case SIOCSIFMTU:
                mue_setmtu_locked(un);
                break;
        default:
                break;
        }

        return 0;
}

static void
mue_reset(struct usbnet *un)
{
        if (usbnet_isdying(un))
                return;

        /* Wait a little while for the chip to get its brains in order. */
        usbd_delay_ms(un->un_udev, 1);

//        mue_chip_init(un); /* XXX */
}

static void
mue_uno_stop(struct ifnet *ifp, int disable)
{
        struct usbnet * const un = ifp->if_softc;

        mue_reset(un);
}

#ifdef _MODULE
#include "ioconf.c"
#endif

USBNET_MODULE(mue)


























































































































































































































































 1405 





 1404 

 1405 





 1391 
 1392 
 1391 








   40 












   26 



   27 
   27 













 1195 



 1385 

   38 


 1158 
  938 











  349 






  348 










   42 











  862 
 1383 


 1382 






 1361 
 1361 
 1361 

 1353 































































































 1377 









 1377 


 1377 







 1375 









 1352 

 1352 
 1345 
 1348 
 1348 














 1369 













 1369 



 1366 
 1362 





 1368 


 1348 


 1347 




 1347 



 1023 

 1369 
 1022 





  369 






 1367 









 1368 
 1369 
 1369 
 1367 

 1369 










































   38 






































    9 



 1368 










 1369 
  154 



 1369 
    7 



 1361 









 1270 
  206 





























  178 









  178 














  177 

    1 










  176 

  176 
    1 




  176 

  166 

  176 










  144 






  142 


  142 

  142 




  174 














 1362 















 1361 



 1363 









 1363 








 1316 

 1316 




 1331 






 1362 
 1340 
 1289 

   85 
 1340 


 1311 

 1363 
   52 
   30 

 1363 















   90 











   90 
    4 
    4 










   90 
   90 
   90 




   90 
    5 



   89 
   88 

   89 












   89 






   89 

























   90 

   90 
   90 






   90 



   90 


















  861 


























  874 








  874 
















   29 



   29 
    1 



    1 
    1 





   29 


    2 






    1 












   29 
    5 


    5 






  875 





  875 


  875 


  875 


  692 











  693 
  613 

  613 
    4 






  478 
  102 
    5 

    1 







  385 







  375 








  374 










  374 








  802 

  802 

  731 






  802 

  871 


















 1363 



















 1363 







 1362 
 1361 







 1363 
   30 
    5 



   30 











 1362 
 1339 












 1338 




 1121 






 1121 
   32 
    3 



   32 








 1117 
  691 




  690 
  650 











 1113 
  156 
  169 
  169 
  156 
  156 

   23 
   23 














 1113 
   95 




   95 









 1109 



  125 







  764 
  763 


  914 









  198 







  198 


  198 


  198 














  872 

  433 


 1357 
 1357 
































   86 
 1363 








  373 



 1363 
    2 








  569 
 1363 














  875 









 1354 
 1270 
  766 
   90 
   90 




  147 
  123 
  116 


    7 


  147 














   90 








  374 












  178 

    2 

  178 



  178 





























  178 




   10 
   10 
   10 

   10 














    1 
    1 















 1254 
    3 

    3 



    3 




    3 









 1253 






  544 

  536 
    1 


  536 


  543 



 1232 


 1232 


 1015 










    1 






































 1016 

  108 
    3 









    2 










    1 










 1017 



















 1016 
   62 











    8 
    8 

 1010 
  768 
   12 
  768 

  768 










 1231 
  220 



 1230 
 1114 












 1376 





 1374 
 1355 



 1374 








  171 









    8 


 1356 
 1335 

 1353 








 1376 










  171 
  171 


 1350 























































































































































































   44 








   44 





















   43 

   44 

    2 
   44 








   43 


   44 





   43 


   20 


   19 






   42 






   44 









   42 
   42 









   44 

























  289 

  247 

  247 









    3 






    3 








    3 







    3 

    2 


    1 

    3 






  226 







  304 







  289 





    9 

  289 

   52 


  247 

  284 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
/*        $NetBSD: vfs_lookup.c,v 1.232 2022/08/22 09:14:59 hannken Exp $        */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vfs_lookup.c        8.10 (Berkeley) 5/27/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.232 2022/08/22 09:14:59 hannken Exp $");

#ifdef _KERNEL_OPT
#include "opt_magiclinks.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/syslimits.h>
#include <sys/time.h>
#include <sys/namei.h>
#include <sys/vnode.h>
#include <sys/vnode_impl.h>
#include <sys/fstrans.h>
#include <sys/mount.h>
#include <sys/errno.h>
#include <sys/filedesc.h>
#include <sys/hash.h>
#include <sys/proc.h>
#include <sys/syslog.h>
#include <sys/kauth.h>
#include <sys/ktrace.h>
#include <sys/dirent.h>

#ifndef MAGICLINKS
#define MAGICLINKS 0
#endif

int vfs_magiclinks = MAGICLINKS;

__CTASSERT(MAXNAMLEN == NAME_MAX);

/*
 * Substitute replacement text for 'magic' strings in symlinks.
 * Returns 0 if successful, and returns non-zero if an error
 * occurs.  (Currently, the only possible error is running out
 * of temporary pathname space.)
 *
 * Looks for "@<string>" and "@<string>/", where <string> is a
 * recognized 'magic' string.  Replaces the "@<string>" with the
 * appropriate replacement text.  (Note that in some cases the
 * replacement text may have zero length.)
 *
 * This would have been table driven, but the variance in
 * replacement strings (and replacement string lengths) made
 * that impractical.
 */
#define        VNL(x)                                                        \
        (sizeof(x) - 1)

#define        VO        '{'
#define        VC        '}'

#define        MATCH(str)                                                \
        ((termchar == '/' && i + VNL(str) == *len) ||                \
         (i + VNL(str) < *len &&                                \
          cp[i + VNL(str)] == termchar)) &&                        \
        !strncmp((str), &cp[i], VNL(str))

#define        SUBSTITUTE(m, s, sl)                                        \
        if ((newlen + (sl)) >= MAXPATHLEN)                        \
                return 1;                                        \
        i += VNL(m);                                                \
        if (termchar != '/')                                        \
                i++;                                                \
        (void)memcpy(&tmp[newlen], (s), (sl));                        \
        newlen += (sl);                                                \
        change = 1;                                                \
        termchar = '/';

static int
symlink_magic(struct proc *p, char *cp, size_t *len)
{
        char *tmp;
        size_t change, i, newlen, slen;
        char termchar = '/';
        char idtmp[11]; /* enough for 32 bit *unsigned* integer */


        tmp = PNBUF_GET();
        for (change = i = newlen = 0; i < *len; ) {
                if (cp[i] != '@') {
                        tmp[newlen++] = cp[i++];
                        continue;
                }

                i++;

                /* Check for @{var} syntax. */
                if (cp[i] == VO) {
                        termchar = VC;
                        i++;
                }

                /*
                 * The following checks should be ordered according
                 * to frequency of use.
                 */
                if (MATCH("machine_arch")) {
                        slen = VNL(MACHINE_ARCH);
                        SUBSTITUTE("machine_arch", MACHINE_ARCH, slen);
                } else if (MATCH("machine")) {
                        slen = VNL(MACHINE);
                        SUBSTITUTE("machine", MACHINE, slen);
                } else if (MATCH("hostname")) {
                        SUBSTITUTE("hostname", hostname, hostnamelen);
                } else if (MATCH("osrelease")) {
                        slen = strlen(osrelease);
                        SUBSTITUTE("osrelease", osrelease, slen);
                } else if (MATCH("emul")) {
                        slen = strlen(p->p_emul->e_name);
                        SUBSTITUTE("emul", p->p_emul->e_name, slen);
                } else if (MATCH("kernel_ident")) {
                        slen = strlen(kernel_ident);
                        SUBSTITUTE("kernel_ident", kernel_ident, slen);
                } else if (MATCH("domainname")) {
                        SUBSTITUTE("domainname", domainname, domainnamelen);
                } else if (MATCH("ostype")) {
                        slen = strlen(ostype);
                        SUBSTITUTE("ostype", ostype, slen);
                } else if (MATCH("uid")) {
                        slen = snprintf(idtmp, sizeof(idtmp), "%u",
                            kauth_cred_geteuid(kauth_cred_get()));
                        SUBSTITUTE("uid", idtmp, slen);
                } else if (MATCH("ruid")) {
                        slen = snprintf(idtmp, sizeof(idtmp), "%u",
                            kauth_cred_getuid(kauth_cred_get()));
                        SUBSTITUTE("ruid", idtmp, slen);
                } else if (MATCH("gid")) {
                        slen = snprintf(idtmp, sizeof(idtmp), "%u",
                            kauth_cred_getegid(kauth_cred_get()));
                        SUBSTITUTE("gid", idtmp, slen);
                } else if (MATCH("rgid")) {
                        slen = snprintf(idtmp, sizeof(idtmp), "%u",
                            kauth_cred_getgid(kauth_cred_get()));
                        SUBSTITUTE("rgid", idtmp, slen);
                } else {
                        tmp[newlen++] = '@';
                        if (termchar == VC)
                                tmp[newlen++] = VO;
                }
        }

        if (change) {
                (void)memcpy(cp, tmp, newlen);
                *len = newlen;
        }
        PNBUF_PUT(tmp);

        return 0;
}

#undef VNL
#undef VO
#undef VC
#undef MATCH
#undef SUBSTITUTE

////////////////////////////////////////////////////////////

/*
 * Determine the namei hash (for the namecache) for name.
 * If *ep != NULL, hash from name to ep-1.
 * If *ep == NULL, hash from name until the first NUL or '/', and
 * return the location of this termination character in *ep.
 *
 * This function returns an equivalent hash to the MI hash32_strn().
 * The latter isn't used because in the *ep == NULL case, determining
 * the length of the string to the first NUL or `/' and then calling
 * hash32_strn() involves unnecessary double-handling of the data.
 */
uint32_t
namei_hash(const char *name, const char **ep)
{
        uint32_t        hash;

        hash = HASH32_STR_INIT;
        if (*ep != NULL) {
                for (; name < *ep; name++)
                        hash = hash * 33 + *(const uint8_t *)name;
        } else {
                for (; *name != '\0' && *name != '/'; name++)
                        hash = hash * 33 + *(const uint8_t *)name;
                *ep = name;
        }
        return (hash + (hash >> 5));
}

////////////////////////////////////////////////////////////

/*
 * Sealed abstraction for pathnames.
 *
 * System-call-layer level code that is going to call namei should
 * first create a pathbuf and adjust all the bells and whistles on it
 * as needed by context.
 */

struct pathbuf {
        char *pb_path;
        char *pb_pathcopy;
        unsigned pb_pathcopyuses;
};

static struct pathbuf *
pathbuf_create_raw(void)
{
        struct pathbuf *pb;

        pb = kmem_alloc(sizeof(*pb), KM_SLEEP);
        pb->pb_path = PNBUF_GET();
        if (pb->pb_path == NULL) {
                kmem_free(pb, sizeof(*pb));
                return NULL;
        }
        pb->pb_pathcopy = NULL;
        pb->pb_pathcopyuses = 0;
        return pb;
}

void
pathbuf_destroy(struct pathbuf *pb)
{
        KASSERT(pb->pb_pathcopyuses == 0);
        KASSERT(pb->pb_pathcopy == NULL);
        PNBUF_PUT(pb->pb_path);
        kmem_free(pb, sizeof(*pb));
}

struct pathbuf *
pathbuf_assimilate(char *pnbuf)
{
        struct pathbuf *pb;

        pb = kmem_alloc(sizeof(*pb), KM_SLEEP);
        pb->pb_path = pnbuf;
        pb->pb_pathcopy = NULL;
        pb->pb_pathcopyuses = 0;
        return pb;
}

struct pathbuf *
pathbuf_create(const char *path)
{
        struct pathbuf *pb;
        int error;

        pb = pathbuf_create_raw();
        if (pb == NULL) {
                return NULL;
        }
        error = copystr(path, pb->pb_path, PATH_MAX, NULL);
        if (error != 0) {
                KASSERT(!"kernel path too long in pathbuf_create");
                /* make sure it's null-terminated, just in case */
                pb->pb_path[PATH_MAX-1] = '\0';
        }
        return pb;
}

int
pathbuf_copyin(const char *userpath, struct pathbuf **ret)
{
        struct pathbuf *pb;
        int error;

        pb = pathbuf_create_raw();
        if (pb == NULL) {
                return ENOMEM;
        }
        error = copyinstr(userpath, pb->pb_path, PATH_MAX, NULL);
        if (error) {
                pathbuf_destroy(pb);
                return error;
        }
        *ret = pb;
        return 0;
}

/*
 * XXX should not exist:
 *   1. whether a pointer is kernel or user should be statically checkable.
 *   2. copyin should be handled by the upper part of the syscall layer,
 *      not in here.
 */
int
pathbuf_maybe_copyin(const char *path, enum uio_seg seg, struct pathbuf **ret)
{
        if (seg == UIO_USERSPACE) {
                return pathbuf_copyin(path, ret);
        } else {
                *ret = pathbuf_create(path);
                if (*ret == NULL) {
                        return ENOMEM;
                }
                return 0;
        }
}

/*
 * Get a copy of the path buffer as it currently exists. If this is
 * called after namei starts the results may be arbitrary.
 */
void
pathbuf_copystring(const struct pathbuf *pb, char *buf, size_t maxlen)
{
        strlcpy(buf, pb->pb_path, maxlen);
}

/*
 * These two functions allow access to a saved copy of the original
 * path string. The first copy should be gotten before namei is
 * called. Each copy that is gotten should be put back.
 */

const char *
pathbuf_stringcopy_get(struct pathbuf *pb)
{
        if (pb->pb_pathcopyuses == 0) {
                pb->pb_pathcopy = PNBUF_GET();
                strcpy(pb->pb_pathcopy, pb->pb_path);
        }
        pb->pb_pathcopyuses++;
        return pb->pb_pathcopy;
}

void
pathbuf_stringcopy_put(struct pathbuf *pb, const char *str)
{
        KASSERT(str == pb->pb_pathcopy);
        KASSERT(pb->pb_pathcopyuses > 0);
        pb->pb_pathcopyuses--;
        if (pb->pb_pathcopyuses == 0) {
                PNBUF_PUT(pb->pb_pathcopy);
                pb->pb_pathcopy = NULL;
        }
}


////////////////////////////////////////////////////////////

/*
 * namei: convert a pathname into a pointer to a (maybe-locked) vnode,
 * and maybe also its parent directory vnode, and assorted other guff.
 * See namei(9) for the interface documentation.
 *
 *
 * The FOLLOW flag is set when symbolic links are to be followed
 * when they occur at the end of the name translation process.
 * Symbolic links are always followed for all other pathname
 * components other than the last.
 *
 * The segflg defines whether the name is to be copied from user
 * space or kernel space.
 *
 * Overall outline of namei:
 *
 *        copy in name
 *        get starting directory
 *        while (!done && !error) {
 *                call lookup to search path.
 *                if symbolic link, massage name in buffer and continue
 *        }
 */

/*
 * Search a pathname.
 * This is a very central and rather complicated routine.
 *
 * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
 * The starting directory is passed in. The pathname is descended
 * until done, or a symbolic link is encountered. The variable ni_more
 * is clear if the path is completed; it is set to one if a symbolic
 * link needing interpretation is encountered.
 *
 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
 * whether the name is to be looked up, created, renamed, or deleted.
 * When CREATE, RENAME, or DELETE is specified, information usable in
 * creating, renaming, or deleting a directory entry may be calculated.
 * If flag has LOCKPARENT or'ed into it, the parent directory is returned
 * locked.  Otherwise the parent directory is not returned. If the target
 * of the pathname exists and LOCKLEAF is or'ed into the flag the target
 * is returned locked, otherwise it is returned unlocked.  When creating
 * or renaming and LOCKPARENT is specified, the target may not be ".".
 * When deleting and LOCKPARENT is specified, the target may be ".".
 *
 * Overall outline of lookup:
 *
 * dirloop:
 *        identify next component of name at ndp->ni_ptr
 *        handle degenerate case where name is null string
 *        if .. and crossing mount points and on mounted filesys, find parent
 *        call VOP_LOOKUP routine for next component name
 *            directory vnode returned in ni_dvp, locked.
 *            component vnode returned in ni_vp (if it exists), locked.
 *        if result vnode is mounted on and crossing mount points,
 *            find mounted on vnode
 *        if more components of name, do next level at dirloop
 *        return the answer in ni_vp, locked if LOCKLEAF set
 *            if LOCKPARENT set, return locked parent in ni_dvp
 */


/*
 * Internal state for a namei operation.
 *
 * cnp is always equal to &ndp->ni_cnp.
 */
struct namei_state {
        struct nameidata *ndp;
        struct componentname *cnp;

        int docache;                        /* == 0 do not cache last component */
        int rdonly;                        /* lookup read-only flag bit */
        int slashes;

        unsigned attempt_retry:1;        /* true if error allows emul retry */
        unsigned root_referenced:1;        /* true if ndp->ni_rootdir and
                                             ndp->ni_erootdir were referenced */
};


/*
 * Initialize the namei working state.
 */
static void
namei_init(struct namei_state *state, struct nameidata *ndp)
{

        state->ndp = ndp;
        state->cnp = &ndp->ni_cnd;

        state->docache = 0;
        state->rdonly = 0;
        state->slashes = 0;

        state->root_referenced = 0;

        KASSERTMSG((state->cnp->cn_cred != NULL), "namei: bad cred/proc");
        KASSERTMSG(((state->cnp->cn_nameiop & (~OPMASK)) == 0),
            "namei: nameiop contaminated with flags: %08"PRIx32,
            state->cnp->cn_nameiop);
        KASSERTMSG(((state->cnp->cn_flags & OPMASK) == 0),
            "name: flags contaminated with nameiops: %08"PRIx32,
            state->cnp->cn_flags);

        /*
         * The buffer for name translation shall be the one inside the
         * pathbuf.
         */
        state->ndp->ni_pnbuf = state->ndp->ni_pathbuf->pb_path;
}

/*
 * Clean up the working namei state, leaving things ready for return
 * from namei.
 */
static void
namei_cleanup(struct namei_state *state)
{
        KASSERT(state->cnp == &state->ndp->ni_cnd);

        if (state->root_referenced) {
                if (state->ndp->ni_rootdir != NULL)
                        vrele(state->ndp->ni_rootdir);
                if (state->ndp->ni_erootdir != NULL)
                        vrele(state->ndp->ni_erootdir);
        }
}

//////////////////////////////

/*
 * Get the directory context.
 * Initializes the rootdir and erootdir state and returns a reference
 * to the starting dir.
 */
static struct vnode *
namei_getstartdir(struct namei_state *state)
{
        struct nameidata *ndp = state->ndp;
        struct componentname *cnp = state->cnp;
        struct cwdinfo *cwdi;                /* pointer to cwd state */
        struct lwp *self = curlwp;        /* thread doing namei() */
        struct vnode *rootdir, *erootdir, *curdir, *startdir;

        if (state->root_referenced) {
                if (state->ndp->ni_rootdir != NULL)
                        vrele(state->ndp->ni_rootdir);
                if (state->ndp->ni_erootdir != NULL)
                        vrele(state->ndp->ni_erootdir);
                state->root_referenced = 0;
        }

        cwdi = self->l_proc->p_cwdi;
        rw_enter(&cwdi->cwdi_lock, RW_READER);

        /* root dir */
        if (cwdi->cwdi_rdir == NULL || (cnp->cn_flags & NOCHROOT)) {
                rootdir = rootvnode;
        } else {
                rootdir = cwdi->cwdi_rdir;
        }

        /* emulation root dir, if any */
        if ((cnp->cn_flags & TRYEMULROOT) == 0) {
                /* if we don't want it, don't fetch it */
                erootdir = NULL;
        } else if (cnp->cn_flags & EMULROOTSET) {
                /* explicitly set emulroot; "/../" doesn't override this */
                erootdir = ndp->ni_erootdir;
        } else if (!strncmp(ndp->ni_pnbuf, "/../", 4)) {
                /* explicit reference to real rootdir */
                erootdir = NULL;
        } else {
                /* may be null */
                erootdir = cwdi->cwdi_edir;
        }

        /* current dir */
        curdir = cwdi->cwdi_cdir;

        if (ndp->ni_pnbuf[0] != '/') {
                if (ndp->ni_atdir != NULL) {
                        startdir = ndp->ni_atdir;
                } else {
                        startdir = curdir;
                }
                erootdir = NULL;
        } else if (cnp->cn_flags & TRYEMULROOT && erootdir != NULL) {
                startdir = erootdir;
        } else {
                startdir = rootdir;
                erootdir = NULL;
        }

        state->ndp->ni_rootdir = rootdir;
        state->ndp->ni_erootdir = erootdir;

        /*
         * Get a reference to the start dir so we can safely unlock cwdi.
         *
         * Must hold references to rootdir and erootdir while we're running.
         * A multithreaded process may chroot during namei.
         */
        if (startdir != NULL)
                vref(startdir);
        if (state->ndp->ni_rootdir != NULL)
                vref(state->ndp->ni_rootdir);
        if (state->ndp->ni_erootdir != NULL)
                vref(state->ndp->ni_erootdir);
        state->root_referenced = 1;

        rw_exit(&cwdi->cwdi_lock);
        return startdir;
}

/*
 * Get the directory context for the nfsd case, in parallel to
 * getstartdir. Initializes the rootdir and erootdir state and
 * returns a reference to the passed-in starting dir.
 */
static struct vnode *
namei_getstartdir_for_nfsd(struct namei_state *state)
{
        KASSERT(state->ndp->ni_atdir != NULL);

        /* always use the real root, and never set an emulation root */
        if (rootvnode == NULL) {
                return NULL;
        }
        state->ndp->ni_rootdir = rootvnode;
        state->ndp->ni_erootdir = NULL;

        vref(state->ndp->ni_atdir);
        KASSERT(! state->root_referenced);
        vref(state->ndp->ni_rootdir);
        state->root_referenced = 1;
        return state->ndp->ni_atdir;
}


/*
 * Ktrace the namei operation.
 */
static void
namei_ktrace(struct namei_state *state)
{
        struct nameidata *ndp = state->ndp;
        struct componentname *cnp = state->cnp;
        struct lwp *self = curlwp;        /* thread doing namei() */
        const char *emul_path;

        if (ktrpoint(KTR_NAMEI)) {
                if (ndp->ni_erootdir != NULL) {
                        /*
                         * To make any sense, the trace entry need to have the
                         * text of the emulation path prepended.
                         * Usually we can get this from the current process,
                         * but when called from emul_find_interp() it is only
                         * in the exec_package - so we get it passed in ni_next
                         * (this is a hack).
                         */
                        if (cnp->cn_flags & EMULROOTSET)
                                emul_path = ndp->ni_next;
                        else
                                emul_path = self->l_proc->p_emul->e_path;
                        ktrnamei2(emul_path, strlen(emul_path),
                            ndp->ni_pnbuf, ndp->ni_pathlen);
                } else
                        ktrnamei(ndp->ni_pnbuf, ndp->ni_pathlen);
        }
}

/*
 * Start up namei. Find the root dir and cwd, establish the starting
 * directory for lookup, and lock it. Also calls ktrace when
 * appropriate.
 */
static int
namei_start(struct namei_state *state, int isnfsd,
            struct vnode **startdir_ret)
{
        struct nameidata *ndp = state->ndp;
        struct vnode *startdir;

        /* length includes null terminator (was originally from copyinstr) */
        ndp->ni_pathlen = strlen(ndp->ni_pnbuf) + 1;

        /*
         * POSIX.1 requirement: "" is not a valid file name.
         */
        if (ndp->ni_pathlen == 1) {
                ndp->ni_erootdir = NULL;
                return ENOENT;
        }

        ndp->ni_loopcnt = 0;

        /* Get starting directory, set up root, and ktrace. */
        if (isnfsd) {
                startdir = namei_getstartdir_for_nfsd(state);
                /* no ktrace */
        } else {
                startdir = namei_getstartdir(state);
                namei_ktrace(state);
        }

        if (startdir == NULL) {
                return ENOENT;
        }

        /* NDAT may feed us with a non directory namei_getstartdir */
        if (startdir->v_type != VDIR) {
                vrele(startdir);
                return ENOTDIR;
        }

        *startdir_ret = startdir;
        return 0;
}

/*
 * Check for being at a symlink that we're going to follow.
 */
static inline int
namei_atsymlink(struct namei_state *state, struct vnode *foundobj)
{
        return (foundobj->v_type == VLNK) &&
                (state->cnp->cn_flags & (FOLLOW|REQUIREDIR));
}

/*
 * Follow a symlink.
 *
 * Updates searchdir. inhibitmagic causes magic symlinks to not be
 * interpreted; this is used by nfsd.
 *
 * Unlocks foundobj on success (ugh)
 */
static inline int
namei_follow(struct namei_state *state, int inhibitmagic,
             struct vnode *searchdir, struct vnode *foundobj,
             struct vnode **newsearchdir_ret)
{
        struct nameidata *ndp = state->ndp;
        struct componentname *cnp = state->cnp;

        struct lwp *self = curlwp;        /* thread doing namei() */
        struct iovec aiov;                /* uio for reading symbolic links */
        struct uio auio;
        char *cp;                        /* pointer into pathname argument */
        size_t linklen;
        int error;

        if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
                return ELOOP;
        }

        vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY);
        if (foundobj->v_mount->mnt_flag & MNT_SYMPERM) {
                error = VOP_ACCESS(foundobj, VEXEC, cnp->cn_cred);
                if (error != 0) {
                        VOP_UNLOCK(foundobj);
                        return error;
                }
        }

        /* FUTURE: fix this to not use a second buffer */
        cp = PNBUF_GET();
        aiov.iov_base = cp;
        aiov.iov_len = MAXPATHLEN;
        auio.uio_iov = &aiov;
        auio.uio_iovcnt = 1;
        auio.uio_offset = 0;
        auio.uio_rw = UIO_READ;
        auio.uio_resid = MAXPATHLEN;
        UIO_SETUP_SYSSPACE(&auio);
        error = VOP_READLINK(foundobj, &auio, cnp->cn_cred);
        VOP_UNLOCK(foundobj);
        if (error) {
                PNBUF_PUT(cp);
                return error;
        }
        linklen = MAXPATHLEN - auio.uio_resid;
        if (linklen == 0) {
                PNBUF_PUT(cp);
                return ENOENT;
        }

        /*
         * Do symlink substitution, if appropriate, and
         * check length for potential overflow.
         *
         * Inhibit symlink substitution for nfsd.
         * XXX: This is how it was before; is that a bug or a feature?
         */
        if ((!inhibitmagic && vfs_magiclinks &&
             symlink_magic(self->l_proc, cp, &linklen)) ||
            (linklen + ndp->ni_pathlen >= MAXPATHLEN)) {
                PNBUF_PUT(cp);
                return ENAMETOOLONG;
        }
        if (ndp->ni_pathlen > 1) {
                /* includes a null-terminator */
                memcpy(cp + linklen, ndp->ni_next, ndp->ni_pathlen);
        } else {
                cp[linklen] = '\0';
        }
        ndp->ni_pathlen += linklen;
        memcpy(ndp->ni_pnbuf, cp, ndp->ni_pathlen);
        PNBUF_PUT(cp);

        /* we're now starting from the beginning of the buffer again */
        cnp->cn_nameptr = ndp->ni_pnbuf;

        /*
         * Check if root directory should replace current directory.
         */
        if (ndp->ni_pnbuf[0] == '/') {
                vrele(searchdir);
                /* Keep absolute symbolic links inside emulation root */
                searchdir = ndp->ni_erootdir;
                if (searchdir == NULL ||
                    (ndp->ni_pnbuf[1] == '.'
                     && ndp->ni_pnbuf[2] == '.'
                     && ndp->ni_pnbuf[3] == '/')) {
                        ndp->ni_erootdir = NULL;
                        searchdir = ndp->ni_rootdir;
                }
                vref(searchdir);
                while (cnp->cn_nameptr[0] == '/') {
                        cnp->cn_nameptr++;
                        ndp->ni_pathlen--;
                }
        }

        *newsearchdir_ret = searchdir;
        return 0;
}

//////////////////////////////

/*
 * Inspect the leading path component and update the state accordingly.
 */
static int
lookup_parsepath(struct namei_state *state, struct vnode *searchdir)
{
        const char *cp;                        /* pointer into pathname argument */
        int error;

        struct componentname *cnp = state->cnp;
        struct nameidata *ndp = state->ndp;

        KASSERT(cnp == &ndp->ni_cnd);

        /*
         * Search a new directory.
         *
         * The last component of the filename is left accessible via
         * cnp->cn_nameptr for callers that need the name. Callers needing
         * the name set the SAVENAME flag. When done, they assume
         * responsibility for freeing the pathname buffer.
         *
         * At this point, our only vnode state is that the search dir
         * is held.
         */
        error = VOP_PARSEPATH(searchdir, cnp->cn_nameptr, &cnp->cn_namelen);
        if (error) {
                return error;
        }
        cp = cnp->cn_nameptr + cnp->cn_namelen;
        if (cnp->cn_namelen > KERNEL_NAME_MAX) {
                return ENAMETOOLONG;
        }
#ifdef NAMEI_DIAGNOSTIC
        { char c = *cp;
        *(char *)cp = '\0';
        printf("{%s}: ", cnp->cn_nameptr);
        *(char *)cp = c; }
#endif /* NAMEI_DIAGNOSTIC */
        ndp->ni_pathlen -= cnp->cn_namelen;
        ndp->ni_next = cp;
        /*
         * If this component is followed by a slash, then move the pointer to
         * the next component forward, and remember that this component must be
         * a directory.
         */
        if (*cp == '/') {
                do {
                        cp++;
                } while (*cp == '/');
                state->slashes = cp - ndp->ni_next;
                ndp->ni_pathlen -= state->slashes;
                ndp->ni_next = cp;
                cnp->cn_flags |= REQUIREDIR;
        } else {
                state->slashes = 0;
                cnp->cn_flags &= ~REQUIREDIR;
        }
        /*
         * We do special processing on the last component, whether or not it's
         * a directory.  Cache all intervening lookups, but not the final one.
         */
        if (*cp == '\0') {
                if (state->docache)
                        cnp->cn_flags |= MAKEENTRY;
                else
                        cnp->cn_flags &= ~MAKEENTRY;
                cnp->cn_flags |= ISLASTCN;
        } else {
                cnp->cn_flags |= MAKEENTRY;
                cnp->cn_flags &= ~ISLASTCN;
        }
        if (cnp->cn_namelen == 2 &&
            cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
                cnp->cn_flags |= ISDOTDOT;
        else
                cnp->cn_flags &= ~ISDOTDOT;

        return 0;
}

/*
 * Take care of crossing a mounted-on vnode.  On error, foundobj_ret will be
 * vrele'd, but searchdir is left alone.
 */
static int
lookup_crossmount(struct namei_state *state,
                  struct vnode **searchdir_ret,
                  struct vnode **foundobj_ret,
                  bool *searchdir_locked)
{
        struct componentname *cnp = state->cnp;
        struct vnode *foundobj, *vp;
        struct vnode *searchdir;
        struct mount *mp;
        int error, lktype;

        searchdir = *searchdir_ret;
        foundobj = *foundobj_ret;
        error = 0;

        KASSERT((cnp->cn_flags & NOCROSSMOUNT) == 0);

        /* First, unlock searchdir (oof). */
        if (*searchdir_locked) {
                KASSERT(searchdir != NULL);
                lktype = VOP_ISLOCKED(searchdir);
                VOP_UNLOCK(searchdir);
                *searchdir_locked = false;
        } else {
                lktype = LK_NONE;
        }

        /*
         * Do an unlocked check to see if the vnode has been mounted on; if
         * so find the root of the mounted file system.
         */
        while (foundobj->v_type == VDIR &&
            (mp = foundobj->v_mountedhere) != NULL &&
            (cnp->cn_flags & NOCROSSMOUNT) == 0) {
                /*
                 * Try the namecache first.  If that doesn't work, do
                 * it the hard way.
                 */
                if (cache_lookup_mount(foundobj, &vp)) {
                        vrele(foundobj);
                        foundobj = vp;
                } else {
                        /* First get the vnodes mount stable. */
                        while ((mp = foundobj->v_mountedhere) != NULL) {
                                fstrans_start(mp);
                                if (fstrans_held(mp) &&
                                    mp == foundobj->v_mountedhere) {
                                        break;
                                }
                                fstrans_done(mp);
                        }
                        if (mp == NULL) {
                                break;
                        }

                        /*
                         * Now get a reference on the root vnode.
                         * XXX Future - maybe allow only VDIR here.
                         */
                        error = VFS_ROOT(mp, LK_NONE, &vp);

                        /*
                         * If successful, enter it into the cache while
                         * holding the mount busy (competing with unmount).
                         */
                        if (error == 0) {
                                cache_enter_mount(foundobj, vp);
                        }

                        /* Finally, drop references to foundobj & mountpoint. */
                        vrele(foundobj);
                        fstrans_done(mp);
                        if (error) {
                                foundobj = NULL;
                                break;
                        }
                        foundobj = vp;
                }

                /*
                 * Avoid locking vnodes from two filesystems because
                 * it's prone to deadlock, e.g. when using puffs.
                 * Also, it isn't a good idea to propagate slowness of
                 * a filesystem up to the root directory. For now,
                 * only handle the common case, where foundobj is
                 * VDIR.
                 *
                 * In this case set searchdir to null to avoid using
                 * it again. It is not correct to set searchdir ==
                 * foundobj here as that will confuse the caller.
                 * (See PR 40740.)
                 */
                if (searchdir == NULL) {
                        /* already been here once; do nothing further */
                } else if (foundobj->v_type == VDIR) {
                        vrele(searchdir);
                        *searchdir_ret = searchdir = NULL;
                        lktype = LK_NONE;
                }
        }

        /* If searchdir is still around, re-lock it. */
         if (error == 0 && lktype != LK_NONE) {
                vn_lock(searchdir, lktype | LK_RETRY);
                *searchdir_locked = true;
        }
        *foundobj_ret = foundobj;
        return error;
}

/*
 * Determine the desired locking mode for the directory of a lookup.
 */
static int
lookup_lktype(struct vnode *searchdir, struct componentname *cnp)
{

        /*
         * If the file system supports VOP_LOOKUP() with a shared lock, and
         * we are not making any modifications (nameiop LOOKUP) or this is
         * not the last component then get a shared lock.  Where we can't do
         * fast-forwarded lookups (for example with layered file systems)
         * then this is the fallback for reducing lock contention.
         */
        if ((searchdir->v_mount->mnt_iflag & IMNT_SHRLOOKUP) != 0 &&
            (cnp->cn_nameiop == LOOKUP || (cnp->cn_flags & ISLASTCN) == 0)) {
                return LK_SHARED;
        } else {
                return LK_EXCLUSIVE;
        }
}

/*
 * Call VOP_LOOKUP for a single lookup; return a new search directory
 * (used when crossing mountpoints up or searching union mounts down) and
 * the found object, which for create operations may be NULL on success.
 *
 * Note that the new search directory may be null, which means the
 * searchdir was unlocked and released. This happens in the common case
 * when crossing a mount point downwards, in order to avoid coupling
 * locks between different file system volumes. Importantly, this can
 * happen even if the call fails. (XXX: this is gross and should be
 * tidied somehow.)
 */
static int
lookup_once(struct namei_state *state,
            struct vnode *searchdir,
            struct vnode **newsearchdir_ret,
            struct vnode **foundobj_ret,
            bool *newsearchdir_locked_ret)
{
        struct vnode *tmpvn;                /* scratch vnode */
        struct vnode *foundobj;                /* result */
        struct lwp *l = curlwp;
        bool searchdir_locked = false;
        int error, lktype;

        struct componentname *cnp = state->cnp;
        struct nameidata *ndp = state->ndp;

        KASSERT(cnp == &ndp->ni_cnd);
        *newsearchdir_ret = searchdir;

        /*
         * Handle "..": two special cases.
         * 1. If at root directory (e.g. after chroot)
         *    or at absolute root directory
         *    then ignore it so can't get out.
         * 1a. If at the root of the emulation filesystem go to the real
         *    root. So "/../<path>" is always absolute.
         * 1b. If we have somehow gotten out of a jail, warn
         *    and also ignore it so we can't get farther out.
         * 2. If this vnode is the root of a mounted
         *    filesystem, then replace it with the
         *    vnode which was mounted on so we take the
         *    .. in the other file system.
         */
        if (cnp->cn_flags & ISDOTDOT) {
                struct proc *p = l->l_proc;

                for (;;) {
                        if (searchdir == ndp->ni_rootdir ||
                            searchdir == rootvnode) {
                                foundobj = searchdir;
                                vref(foundobj);
                                *foundobj_ret = foundobj;
                                if (cnp->cn_flags & LOCKPARENT) {
                                        lktype = lookup_lktype(searchdir, cnp);
                                        vn_lock(searchdir, lktype | LK_RETRY);
                                        searchdir_locked = true;
                                }
                                error = 0;
                                goto done;
                        }
                        if (ndp->ni_rootdir != rootvnode) {
                                int retval;

                                retval = vn_isunder(searchdir, ndp->ni_rootdir, l);
                                if (!retval) {
                                    /* Oops! We got out of jail! */
                                    log(LOG_WARNING,
                                        "chrooted pid %d uid %d (%s) "
                                        "detected outside of its chroot\n",
                                        p->p_pid, kauth_cred_geteuid(l->l_cred),
                                        p->p_comm);
                                    /* Put us at the jail root. */
                                    vrele(searchdir);
                                    searchdir = NULL;
                                    foundobj = ndp->ni_rootdir;
                                    vref(foundobj);
                                    vref(foundobj);
                                    *newsearchdir_ret = foundobj;
                                    *foundobj_ret = foundobj;
                                    error = 0;
                                    goto done;
                                }
                        }
                        if ((searchdir->v_vflag & VV_ROOT) == 0 ||
                            (cnp->cn_flags & NOCROSSMOUNT))
                                break;
                        tmpvn = searchdir;
                        searchdir = searchdir->v_mount->mnt_vnodecovered;
                        vref(searchdir);
                        vrele(tmpvn);
                        *newsearchdir_ret = searchdir;
                }
        }

        lktype = lookup_lktype(searchdir, cnp);

        /*
         * We now have a segment name to search for, and a directory to search.
         * Our vnode state here is that "searchdir" is held.
         */
unionlookup:
        foundobj = NULL;
        if (!searchdir_locked) {
                vn_lock(searchdir, lktype | LK_RETRY);
                searchdir_locked = true;
        }
        error = VOP_LOOKUP(searchdir, &foundobj, cnp);

        if (error != 0) {
                KASSERTMSG((foundobj == NULL),
                    "leaf `%s' should be empty but is %p",
                    cnp->cn_nameptr, foundobj);
#ifdef NAMEI_DIAGNOSTIC
                printf("not found\n");
#endif /* NAMEI_DIAGNOSTIC */

                /*
                 * If ENOLCK, the file system needs us to retry the lookup
                 * with an exclusive lock.  It's likely nothing was found in
                 * cache and/or modifications need to be made.
                 */
                if (error == ENOLCK) {
                        KASSERT(VOP_ISLOCKED(searchdir) == LK_SHARED);
                        KASSERT(searchdir_locked);
                        if (vn_lock(searchdir, LK_UPGRADE | LK_NOWAIT)) {
                                VOP_UNLOCK(searchdir);
                                searchdir_locked = false;
                        }
                        lktype = LK_EXCLUSIVE;
                        goto unionlookup;
                }

                if ((error == ENOENT) &&
                    (searchdir->v_vflag & VV_ROOT) &&
                    (searchdir->v_mount->mnt_flag & MNT_UNION)) {
                        tmpvn = searchdir;
                        searchdir = searchdir->v_mount->mnt_vnodecovered;
                        vref(searchdir);
                        vput(tmpvn);
                        searchdir_locked = false;
                        *newsearchdir_ret = searchdir;
                        goto unionlookup;
                }

                if (error != EJUSTRETURN)
                        goto done;

                /*
                 * If this was not the last component, or there were trailing
                 * slashes, and we are not going to create a directory,
                 * then the name must exist.
                 */
                if ((cnp->cn_flags & (REQUIREDIR | CREATEDIR)) == REQUIREDIR) {
                        error = ENOENT;
                        goto done;
                }

                /*
                 * If creating and at end of pathname, then can consider
                 * allowing file to be created.
                 */
                if (state->rdonly) {
                        error = EROFS;
                        goto done;
                }

                /*
                 * We return success and a NULL foundobj to indicate
                 * that the entry doesn't currently exist, leaving a
                 * pointer to the (normally, locked) directory vnode
                 * as searchdir.
                 */
                *foundobj_ret = NULL;
                error = 0;
                goto done;
        }
#ifdef NAMEI_DIAGNOSTIC
        printf("found\n");
#endif /* NAMEI_DIAGNOSTIC */

        /* Unlock, unless the caller needs the parent locked. */
        if (searchdir != NULL) {
                KASSERT(searchdir_locked);
                if ((cnp->cn_flags & (ISLASTCN | LOCKPARENT)) !=
                    (ISLASTCN | LOCKPARENT)) {
                            VOP_UNLOCK(searchdir);
                            searchdir_locked = false;
                }
        } else {
                KASSERT(!searchdir_locked);
        }

        *foundobj_ret = foundobj;
        error = 0;
done:
        *newsearchdir_locked_ret = searchdir_locked;
        return error;
}

/*
 * Parse out the first path name component that we need to to consider.
 *
 * While doing this, attempt to use the name cache to fast-forward through
 * as many "easy" to find components of the path as possible.
 *
 * We use the namecache's node locks to form a chain, and avoid as many
 * vnode references and locks as possible.  In the ideal case, only the
 * final vnode will have its reference count adjusted and lock taken.
 */
static int
lookup_fastforward(struct namei_state *state, struct vnode **searchdir_ret,
                   struct vnode **foundobj_ret)
{
        struct componentname *cnp = state->cnp;
        struct nameidata *ndp = state->ndp;
        krwlock_t *plock;
        struct vnode *foundobj, *searchdir;
        int error, error2;
        size_t oldpathlen;
        const char *oldnameptr;
        bool terminal;

        /*
         * Eat as many path name components as possible before giving up and
         * letting lookup_once() handle it.  Remember the starting point in
         * case we can't get vnode references and need to roll back.
         */
        plock = NULL;
        searchdir = *searchdir_ret;
        oldnameptr = cnp->cn_nameptr;
        oldpathlen = ndp->ni_pathlen;
        terminal = false;
        for (;;) {
                foundobj = NULL;

                /*
                 * Get the next component name.  There should be no slashes
                 * here, and we shouldn't have looped around if we were
                 * done.
                 */
                KASSERT(cnp->cn_nameptr[0] != '/');
                KASSERT(cnp->cn_nameptr[0] != '\0');
                if ((error = lookup_parsepath(state, searchdir)) != 0) {
                        break;
                }

                /*
                 * Can't deal with DOTDOT lookups if NOCROSSMOUNT or the
                 * lookup is chrooted.
                 */
                if ((cnp->cn_flags & ISDOTDOT) != 0) {
                        if ((searchdir->v_vflag & VV_ROOT) != 0 &&
                            (cnp->cn_flags & NOCROSSMOUNT)) {
                                    error = EOPNOTSUPP;
                                break;
                        }
                        if (ndp->ni_rootdir != rootvnode) {
                                    error = EOPNOTSUPP;
                                break;
                        }
                }

                /*
                 * Can't deal with last component when modifying; this needs
                 * searchdir locked and VOP_LOOKUP() called (which can and
                 * does modify state, despite the name).  NB: this case means
                 * terminal is never set true when LOCKPARENT.
                 */
                if ((cnp->cn_flags & ISLASTCN) != 0) {
                        if (cnp->cn_nameiop != LOOKUP ||
                            (cnp->cn_flags & LOCKPARENT) != 0) {
                                error = EOPNOTSUPP;
                                break;
                        }
                }

                /*
                 * Good, now look for it in cache.  cache_lookup_linked()
                 * will fail if there's nothing there, or if there's no
                 * ownership info for the directory, or if the user doesn't
                 * have permission to look up files in this directory.
                 */
                if (!cache_lookup_linked(searchdir, cnp->cn_nameptr,
                    cnp->cn_namelen, &foundobj, &plock, cnp->cn_cred)) {
                        error = EOPNOTSUPP;
                        break;
                }
                KASSERT(plock != NULL && rw_lock_held(plock));

                /*
                 * Scored a hit.  Negative is good too (ENOENT).  If there's
                 * a '-o union' mount here, punt and let lookup_once() deal
                 * with it.
                 */
                if (foundobj == NULL) {
                        if ((searchdir->v_vflag & VV_ROOT) != 0 &&
                            (searchdir->v_mount->mnt_flag & MNT_UNION) != 0) {
                                    error = EOPNOTSUPP;
                        } else {
                                error = ENOENT;
                                terminal = ((cnp->cn_flags & ISLASTCN) != 0);
                        }
                        break;
                }

                /*
                 * Stop and get a hold on the vnode if we've encountered
                 * something other than a dirctory.
                 */
                if (foundobj->v_type != VDIR) {
                        error = vcache_tryvget(foundobj);
                        if (error != 0) {
                                foundobj = NULL;
                                error = EOPNOTSUPP;
                        } else {
                                terminal = (foundobj->v_type != VLNK &&
                                    (cnp->cn_flags & ISLASTCN) != 0);
                        }
                        break;
                }

                /*
                 * Try to cross mountpoints, bearing in mind that they can
                 * be stacked.  If at any point we can't go further, stop
                 * and try to get a reference on the vnode.  If we are able
                 * to get a ref then lookup_crossmount() will take care of
                 * it, otherwise we'll fall through to lookup_once().
                 */
                if (foundobj->v_mountedhere != NULL) {
                        while (foundobj->v_mountedhere != NULL &&
                            (cnp->cn_flags & NOCROSSMOUNT) == 0 &&
                            cache_cross_mount(&foundobj, &plock)) {
                                KASSERT(foundobj != NULL);
                                KASSERT(foundobj->v_type == VDIR);
                        }
                        if (foundobj->v_mountedhere != NULL) {
                                error = vcache_tryvget(foundobj);
                                if (error != 0) {
                                        foundobj = NULL;
                                        error = EOPNOTSUPP;
                                }
                                break;
                        } else {
                                searchdir = NULL;
                        }
                }

                /*
                 * Time to stop if we found the last component & traversed
                 * all mounts.
                 */
                if ((cnp->cn_flags & ISLASTCN) != 0) {
                        error = vcache_tryvget(foundobj);
                        if (error != 0) {
                                foundobj = NULL;
                                error = EOPNOTSUPP;
                        } else {
                                terminal = (foundobj->v_type != VLNK);
                        }
                        break;
                }

                /*
                 * Otherwise, we're still in business.  Set the found VDIR
                 * vnode as the search dir for the next component and
                 * continue on to it.
                 */
                cnp->cn_nameptr = ndp->ni_next;
                searchdir = foundobj;
        }

        if (terminal) {
                /*
                 * If we exited the loop above having successfully located
                 * the last component with a zero error code, and it's not a
                 * symbolic link, then the parent directory is not needed.
                 * Release reference to the starting parent and make the
                 * terminal parent disappear into thin air.
                 */
                KASSERT(plock != NULL);
                rw_exit(plock);
                vrele(*searchdir_ret);
                *searchdir_ret = NULL;
        } else if (searchdir != *searchdir_ret) {
                /*
                 * Otherwise we need to return the parent.  If we ended up
                 * with a new search dir, ref it before dropping the
                 * namecache's lock.  The lock prevents both searchdir and
                 * foundobj from disappearing.  If we can't ref the new
                 * searchdir, we have a bit of a problem.  Roll back the
                 * fastforward to the beginning and let lookup_once() take
                 * care of it.
                 */
                if (searchdir == NULL) {
                        /*
                         * It's possible for searchdir to be NULL in the
                         * case of a root vnode being reclaimed while
                         * trying to cross a mount.
                         */
                        error2 = EOPNOTSUPP;
                } else {
                        error2 = vcache_tryvget(searchdir);
                }
                KASSERT(plock != NULL);
                rw_exit(plock);
                if (__predict_true(error2 == 0)) {
                        /* Returning new searchdir, and maybe new foundobj. */
                        vrele(*searchdir_ret);
                        *searchdir_ret = searchdir;
                } else {
                        /* Returning nothing. */
                        if (foundobj != NULL) {
                                vrele(foundobj);
                                foundobj = NULL;
                        }
                        cnp->cn_nameptr = oldnameptr;
                        ndp->ni_pathlen = oldpathlen;
                        error = lookup_parsepath(state, *searchdir_ret);
                        if (error == 0) {
                                error = EOPNOTSUPP;
                        }
                }
        } else if (plock != NULL) {
                /* Drop any namecache lock still held. */
                rw_exit(plock);
        }

        KASSERT(error == 0 ? foundobj != NULL : foundobj == NULL);
        *foundobj_ret = foundobj;
        return error;
}

//////////////////////////////

/*
 * Do a complete path search from a single root directory.
 * (This is called up to twice if TRYEMULROOT is in effect.)
 */
static int
namei_oneroot(struct namei_state *state,
         int neverfollow, int inhibitmagic, int isnfsd)
{
        struct nameidata *ndp = state->ndp;
        struct componentname *cnp = state->cnp;
        struct vnode *searchdir, *foundobj;
        bool searchdir_locked = false;
        int error;

        error = namei_start(state, isnfsd, &searchdir);
        if (error) {
                ndp->ni_dvp = NULL;
                ndp->ni_vp = NULL;
                return error;
        }
        KASSERT(searchdir->v_type == VDIR);

        /*
         * Setup: break out flag bits into variables.
         */
        state->docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
        if (cnp->cn_nameiop == DELETE)
                state->docache = 0;
        state->rdonly = cnp->cn_flags & RDONLY;

        /*
         * Keep going until we run out of path components.
         */
        cnp->cn_nameptr = ndp->ni_pnbuf;

        /* drop leading slashes (already used them to choose startdir) */
        while (cnp->cn_nameptr[0] == '/') {
                cnp->cn_nameptr++;
                ndp->ni_pathlen--;
        }
        /* was it just "/"? */
        if (cnp->cn_nameptr[0] == '\0') {
                foundobj = searchdir;
                searchdir = NULL;
                cnp->cn_flags |= ISLASTCN;

                /* bleh */
                goto skiploop;
        }

        for (;;) {
                KASSERT(searchdir != NULL);
                KASSERT(!searchdir_locked);

                /*
                 * Parse out the first path name component that we need to
                 * to consider.  While doing this, attempt to use the name
                 * cache to fast-forward through as many "easy" to find
                 * components of the path as possible.
                 */
                error = lookup_fastforward(state, &searchdir, &foundobj);

                /*
                 * If we didn't get a good answer from the namecache, then
                 * go directly to the file system.
                 */
                if (error == EOPNOTSUPP) {
                        error = lookup_once(state, searchdir, &searchdir,
                            &foundobj, &searchdir_locked);
                }

                /*
                 * If the vnode we found is mounted on, then cross the mount
                 * and get the root vnode in foundobj.  If this encounters
                 * an error, it will dispose of foundobj, but searchdir is
                 * untouched.
                 */
                if (error == 0 && foundobj != NULL &&
                    foundobj->v_type == VDIR &&
                    foundobj->v_mountedhere != NULL &&
                    (cnp->cn_flags & NOCROSSMOUNT) == 0) {
                            error = lookup_crossmount(state, &searchdir,
                                &foundobj, &searchdir_locked);
                }

                if (error) {
                        if (searchdir != NULL) {
                                if (searchdir_locked) {
                                        searchdir_locked = false;
                                        vput(searchdir);
                                } else {
                                        vrele(searchdir);
                                }
                        }
                        ndp->ni_dvp = NULL;
                        ndp->ni_vp = NULL;
                        /*
                         * Note that if we're doing TRYEMULROOT we can
                         * retry with the normal root. Where this is
                         * currently set matches previous practice,
                         * but the previous practice didn't make much
                         * sense and somebody should sit down and
                         * figure out which cases should cause retry
                         * and which shouldn't. XXX.
                         */
                        state->attempt_retry = 1;
                        return (error);
                }

                if (foundobj == NULL) {
                        /*
                         * Success with no object returned means we're
                         * creating something and it isn't already
                         * there. Break out of the main loop now so
                         * the code below doesn't have to test for
                         * foundobj == NULL.
                         */
                        /* lookup_once can't have dropped the searchdir */
                        KASSERT(searchdir != NULL ||
                            (cnp->cn_flags & ISLASTCN) != 0);
                        break;
                }

                /*
                 * Check for symbolic link. If we've reached one,
                 * follow it, unless we aren't supposed to. Back up
                 * over any slashes that we skipped, as we will need
                 * them again.
                 */
                if (namei_atsymlink(state, foundobj)) {
                        /* Don't need searchdir locked any more. */
                        if (searchdir_locked) {
                                searchdir_locked = false;
                                VOP_UNLOCK(searchdir);
                        }
                        ndp->ni_pathlen += state->slashes;
                        ndp->ni_next -= state->slashes;
                        if (neverfollow) {
                                error = EINVAL;
                        } else if (searchdir == NULL) {
                                /*
                                 * dholland 20160410: lookup_once only
                                 * drops searchdir if it crossed a
                                 * mount point. Therefore, if we get
                                 * here it means we crossed a mount
                                 * point to a mounted filesystem whose
                                 * root vnode is a symlink. In theory
                                 * we could continue at this point by
                                 * using the pre-crossing searchdir
                                 * (e.g. just take out an extra
                                 * reference on it before calling
                                 * lookup_once so we still have it),
                                 * but this will make an ugly mess and
                                 * it should never happen in practice
                                 * as only badly broken filesystems
                                 * have non-directory root vnodes. (I
                                 * have seen this sort of thing with
                                 * NFS occasionally but even then it
                                 * means something's badly wrong.)
                                 */
                                error = ENOTDIR;
                        } else {
                                /*
                                 * dholland 20110410: if we're at a
                                 * union mount it might make sense to
                                 * use the top of the union stack here
                                 * rather than the layer we found the
                                 * symlink in. (FUTURE)
                                 */
                                error = namei_follow(state, inhibitmagic,
                                                     searchdir, foundobj,
                                                     &searchdir);
                        }
                        if (error) {
                                KASSERT(searchdir != foundobj);
                                if (searchdir != NULL) {
                                        vrele(searchdir);
                                }
                                vrele(foundobj);
                                ndp->ni_dvp = NULL;
                                ndp->ni_vp = NULL;
                                return error;
                        }
                        vrele(foundobj);
                        foundobj = NULL;

                        /*
                         * If we followed a symlink to `/' and there
                         * are no more components after the symlink,
                         * we're done with the loop and what we found
                         * is the searchdir.
                         */
                        if (cnp->cn_nameptr[0] == '\0') {
                                KASSERT(searchdir != NULL);
                                foundobj = searchdir;
                                searchdir = NULL;
                                cnp->cn_flags |= ISLASTCN;
                                break;
                        }

                        continue;
                }

                /*
                 * Not a symbolic link.
                 *
                 * Check for directory, if the component was
                 * followed by a series of slashes.
                 */
                if ((foundobj->v_type != VDIR) &&
                    (cnp->cn_flags & REQUIREDIR)) {
                        KASSERT(foundobj != searchdir);
                        if (searchdir) {
                                if (searchdir_locked) {
                                        searchdir_locked = false;
                                        vput(searchdir);
                                } else {
                                        vrele(searchdir);
                                }
                        } else {
                                KASSERT(!searchdir_locked);
                        }
                        vrele(foundobj);
                        ndp->ni_dvp = NULL;
                        ndp->ni_vp = NULL;
                        state->attempt_retry = 1;
                        return ENOTDIR;
                }

                /*
                 * Stop if we've reached the last component.
                 */
                if (cnp->cn_flags & ISLASTCN) {
                        break;
                }

                /*
                 * Continue with the next component.
                 */
                cnp->cn_nameptr = ndp->ni_next;
                if (searchdir != NULL) {
                        if (searchdir_locked) {
                                searchdir_locked = false;
                                vput(searchdir);
                        } else {
                                vrele(searchdir);
                        }
                }
                searchdir = foundobj;
                foundobj = NULL;
        }

        KASSERT((cnp->cn_flags & LOCKPARENT) == 0 || searchdir == NULL ||
            VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE);

 skiploop:

        if (foundobj != NULL) {
                if (foundobj == ndp->ni_erootdir) {
                        /*
                         * We are about to return the emulation root.
                         * This isn't a good idea because code might
                         * repeatedly lookup ".." until the file
                         * matches that returned for "/" and loop
                         * forever.  So convert it to the real root.
                         */
                        if (searchdir != NULL) {
                                if (searchdir_locked) {
                                        vput(searchdir);
                                        searchdir_locked = false;
                                } else {
                                        vrele(searchdir);
                                }
                                searchdir = NULL;
                        }
                        vrele(foundobj);
                        foundobj = ndp->ni_rootdir;
                        vref(foundobj);
                }

                /*
                 * If the caller requested the parent node (i.e. it's
                 * a CREATE, DELETE, or RENAME), and we don't have one
                 * (because this is the root directory, or we crossed
                 * a mount point), then we must fail.
                 *
                 * 20210604 dholland when NONEXCLHACK is set (open
                 * with O_CREAT but not O_EXCL) skip this logic. Since
                 * we have a foundobj, open will not be creating, so
                 * it doesn't actually need or use the searchdir, so
                 * it's ok to return it even if it's on a different
                 * volume, and it's also ok to return NULL; by setting
                 * NONEXCLHACK the open code promises to cope with
                 * those cases correctly. (That is, it should do what
                 * it would do anyway, that is, just release the
                 * searchdir, except not crash if it's null.) This is
                 * needed because otherwise opening mountpoints with
                 * O_CREAT but not O_EXCL fails... which is a silly
                 * thing to do but ought to work. (This whole issue
                 * came to light because 3rd party code wanted to open
                 * certain procfs nodes with O_CREAT for some 3rd
                 * party reason, and it failed.)
                 *
                 * Note that NONEXCLHACK is properly a different
                 * nameiop (it is partway between LOOKUP and CREATE)
                 * but it was stuffed in as a flag instead to make the
                 * resulting patch less invasive for pullup. Blah.
                 */
                if (cnp->cn_nameiop != LOOKUP &&
                    (searchdir == NULL ||
                     searchdir->v_mount != foundobj->v_mount) &&
                    (cnp->cn_flags & NONEXCLHACK) == 0) {
                        if (searchdir) {
                                if (searchdir_locked) {
                                        vput(searchdir);
                                        searchdir_locked = false;
                                } else {
                                        vrele(searchdir);
                                }
                                searchdir = NULL;
                        }
                        vrele(foundobj);
                        foundobj = NULL;
                        ndp->ni_dvp = NULL;
                        ndp->ni_vp = NULL;
                        state->attempt_retry = 1;

                        switch (cnp->cn_nameiop) {
                            case CREATE:
                                return EEXIST;
                            case DELETE:
                            case RENAME:
                                return EBUSY;
                            default:
                                break;
                        }
                        panic("Invalid nameiop\n");
                }

                /*
                 * Disallow directory write attempts on read-only lookups.
                 * Prefers EEXIST over EROFS for the CREATE case.
                 */
                if (state->rdonly &&
                    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
                        if (searchdir) {
                                if (searchdir_locked) {
                                        vput(searchdir);
                                        searchdir_locked = false;
                                } else {
                                        vrele(searchdir);
                                }
                                searchdir = NULL;
                        }
                        vrele(foundobj);
                        foundobj = NULL;
                        ndp->ni_dvp = NULL;
                        ndp->ni_vp = NULL;
                        state->attempt_retry = 1;
                        return EROFS;
                }

                /* Lock the leaf node if requested. */
                if ((cnp->cn_flags & (LOCKLEAF | LOCKPARENT)) == LOCKPARENT &&
                    searchdir == foundobj) {
                        /*
                         * Note: if LOCKPARENT but not LOCKLEAF is
                         * set, and searchdir == foundobj, this code
                         * necessarily unlocks the parent as well as
                         * the leaf. That is, just because you specify
                         * LOCKPARENT doesn't mean you necessarily get
                         * a locked parent vnode. The code in
                         * vfs_syscalls.c, and possibly elsewhere,
                         * that uses this combination "knows" this, so
                         * it can't be safely changed. Feh. XXX
                         */
                        KASSERT(searchdir_locked);
                            VOP_UNLOCK(searchdir);
                            searchdir_locked = false;
                } else if ((cnp->cn_flags & LOCKLEAF) != 0 &&
                    (searchdir != foundobj ||
                    (cnp->cn_flags & LOCKPARENT) == 0)) {
                        const int lktype = (cnp->cn_flags & LOCKSHARED) != 0 ?
                            LK_SHARED : LK_EXCLUSIVE;
                        vn_lock(foundobj, lktype | LK_RETRY);
                }
        }

        /*
         * Done.
         */

        /*
         * If LOCKPARENT is not set, the parent directory isn't returned.
         */
        if ((cnp->cn_flags & LOCKPARENT) == 0 && searchdir != NULL) {
                vrele(searchdir);
                searchdir = NULL;
        }

        ndp->ni_dvp = searchdir;
        ndp->ni_vp = foundobj;
        return 0;
}

/*
 * Do namei; wrapper layer that handles TRYEMULROOT.
 */
static int
namei_tryemulroot(struct namei_state *state,
         int neverfollow, int inhibitmagic, int isnfsd)
{
        int error;

        struct nameidata *ndp = state->ndp;
        struct componentname *cnp = state->cnp;
        const char *savepath = NULL;

        KASSERT(cnp == &ndp->ni_cnd);

        if (cnp->cn_flags & TRYEMULROOT) {
                savepath = pathbuf_stringcopy_get(ndp->ni_pathbuf);
        }

    emul_retry:
        state->attempt_retry = 0;

        error = namei_oneroot(state, neverfollow, inhibitmagic, isnfsd);
        if (error) {
                /*
                 * Once namei has started up, the existence of ni_erootdir
                 * tells us whether we're working from an emulation root.
                 * The TRYEMULROOT flag isn't necessarily authoritative.
                 */
                if (ndp->ni_erootdir != NULL && state->attempt_retry) {
                        /* Retry the whole thing using the normal root */
                        cnp->cn_flags &= ~TRYEMULROOT;
                        state->attempt_retry = 0;

                        /* kinda gross */
                        strcpy(ndp->ni_pathbuf->pb_path, savepath);
                        pathbuf_stringcopy_put(ndp->ni_pathbuf, savepath);
                        savepath = NULL;

                        goto emul_retry;
                }
        }
        if (savepath != NULL) {
                pathbuf_stringcopy_put(ndp->ni_pathbuf, savepath);
        }
        return error;
}

/*
 * External interface.
 */
int
namei(struct nameidata *ndp)
{
        struct namei_state state;
        int error;

        namei_init(&state, ndp);
        error = namei_tryemulroot(&state,
                                  0/*!neverfollow*/, 0/*!inhibitmagic*/,
                                  0/*isnfsd*/);
        namei_cleanup(&state);

        if (error) {
                /* make sure no stray refs leak out */
                KASSERT(ndp->ni_dvp == NULL);
                KASSERT(ndp->ni_vp == NULL);
        }

        return error;
}

////////////////////////////////////////////////////////////

/*
 * External interface used by nfsd. This is basically different from
 * namei only in that it has the ability to pass in the "current
 * directory", and uses an extra flag "neverfollow" for which there's
 * no physical flag defined in namei.h. (There used to be a cut&paste
 * copy of about half of namei in nfsd to allow these minor
 * adjustments to exist.)
 *
 * XXX: the namei interface should be adjusted so nfsd can just use
 * ordinary namei().
 */
int
lookup_for_nfsd(struct nameidata *ndp, struct vnode *forcecwd, int neverfollow)
{
        struct namei_state state;
        int error;

        KASSERT(ndp->ni_atdir == NULL);
        ndp->ni_atdir = forcecwd;

        namei_init(&state, ndp);
        error = namei_tryemulroot(&state,
                                  neverfollow, 1/*inhibitmagic*/, 1/*isnfsd*/);
        namei_cleanup(&state);

        if (error) {
                /* make sure no stray refs leak out */
                KASSERT(ndp->ni_dvp == NULL);
                KASSERT(ndp->ni_vp == NULL);
        }

        return error;
}

/*
 * A second external interface used by nfsd. This turns out to be a
 * single lookup used by the WebNFS code (ha!) to get "index.html" or
 * equivalent when asked for a directory. It should eventually evolve
 * into some kind of namei_once() call; for the time being it's kind
 * of a mess. XXX.
 *
 * dholland 20110109: I don't think it works, and I don't think it
 * worked before I started hacking and slashing either, and I doubt
 * anyone will ever notice.
 */

/*
 * Internals. This calls lookup_once() after setting up the assorted
 * pieces of state the way they ought to be.
 */
static int
do_lookup_for_nfsd_index(struct namei_state *state)
{
        int error;

        struct componentname *cnp = state->cnp;
        struct nameidata *ndp = state->ndp;
        struct vnode *startdir;
        struct vnode *foundobj;
        bool startdir_locked;
        const char *cp;                        /* pointer into pathname argument */

        KASSERT(cnp == &ndp->ni_cnd);

        startdir = state->ndp->ni_atdir;

        cnp->cn_nameptr = ndp->ni_pnbuf;
        state->docache = 1;
        state->rdonly = cnp->cn_flags & RDONLY;
        ndp->ni_dvp = NULL;

        error = VOP_PARSEPATH(startdir, cnp->cn_nameptr, &cnp->cn_namelen);
        if (error) {
                return error;
        }

        cp = cnp->cn_nameptr + cnp->cn_namelen;
        KASSERT(cnp->cn_namelen <= KERNEL_NAME_MAX);
        ndp->ni_pathlen -= cnp->cn_namelen;
        ndp->ni_next = cp;
        state->slashes = 0;
        cnp->cn_flags &= ~REQUIREDIR;
        cnp->cn_flags |= MAKEENTRY|ISLASTCN;

        if (cnp->cn_namelen == 2 &&
            cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
                cnp->cn_flags |= ISDOTDOT;
        else
                cnp->cn_flags &= ~ISDOTDOT;

        /*
         * Because lookup_once can change the startdir, we need our
         * own reference to it to avoid consuming the caller's.
         */
        vref(startdir);
        error = lookup_once(state, startdir, &startdir, &foundobj,
            &startdir_locked);

        KASSERT((cnp->cn_flags & LOCKPARENT) == 0);
        if (startdir_locked) {
                VOP_UNLOCK(startdir);
                startdir_locked = false;
        }

        /*
         * If the vnode we found is mounted on, then cross the mount and get
         * the root vnode in foundobj.  If this encounters an error, it will
         * dispose of foundobj, but searchdir is untouched.
         */
        if (error == 0 && foundobj != NULL &&
            foundobj->v_type == VDIR &&
            foundobj->v_mountedhere != NULL &&
            (cnp->cn_flags & NOCROSSMOUNT) == 0) {
                error = lookup_crossmount(state, &startdir, &foundobj,
                    &startdir_locked);
        }

        /* Now toss startdir and see if we have an error. */
        if (startdir != NULL)
                vrele(startdir);
        if (error)
                foundobj = NULL;
        else if (foundobj != NULL && (cnp->cn_flags & LOCKLEAF) != 0)
                vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY);

        ndp->ni_vp = foundobj;
        return (error);
}

/*
 * External interface. The partitioning between this function and the
 * above isn't very clear - the above function exists mostly so code
 * that uses "state->" can be shuffled around without having to change
 * it to "state.".
 */
int
lookup_for_nfsd_index(struct nameidata *ndp, struct vnode *startdir)
{
        struct namei_state state;
        int error;

        KASSERT(ndp->ni_atdir == NULL);
        ndp->ni_atdir = startdir;

        /*
         * Note: the name sent in here (is not|should not be) allowed
         * to contain a slash.
         */
        if (strlen(ndp->ni_pathbuf->pb_path) > KERNEL_NAME_MAX) {
                return ENAMETOOLONG;
        }
        if (strchr(ndp->ni_pathbuf->pb_path, '/')) {
                return EINVAL;
        }

        ndp->ni_pathlen = strlen(ndp->ni_pathbuf->pb_path) + 1;
        ndp->ni_pnbuf = NULL;
        ndp->ni_cnd.cn_nameptr = NULL;

        namei_init(&state, ndp);
        error = do_lookup_for_nfsd_index(&state);
        namei_cleanup(&state);

        return error;
}

////////////////////////////////////////////////////////////

/*
 * Reacquire a path name component.
 * dvp is locked on entry and exit.
 * *vpp is locked on exit unless it's NULL.
 */
int
relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, int dummy)
{
        int rdonly;                        /* lookup read-only flag bit */
        int error = 0;
#ifdef DEBUG
        size_t newlen;                        /* DEBUG: check name len */
        const char *cp;                        /* DEBUG: check name ptr */
#endif /* DEBUG */

        (void)dummy;

        /*
         * Setup: break out flag bits into variables.
         */
        rdonly = cnp->cn_flags & RDONLY;

        /*
         * Search a new directory.
         *
         * The cn_hash value is for use by vfs_cache.
         * The last component of the filename is left accessible via
         * cnp->cn_nameptr for callers that need the name. Callers needing
         * the name set the SAVENAME flag. When done, they assume
         * responsibility for freeing the pathname buffer.
         */
#ifdef DEBUG
#if 0
        cp = NULL;
        newhash = namei_hash(cnp->cn_nameptr, &cp);
        if ((uint32_t)newhash != (uint32_t)cnp->cn_hash)
                panic("relookup: bad hash");
#endif
        error = VOP_PARSEPATH(dvp, cnp->cn_nameptr, &newlen);
        if (error) {
                panic("relookup: parsepath failed with error %d", error);
        }
        if (cnp->cn_namelen != newlen)
                panic("relookup: bad len");
        cp = cnp->cn_nameptr + cnp->cn_namelen;
        while (*cp == '/')
                cp++;
        if (*cp != 0)
                panic("relookup: not last component");
#endif /* DEBUG */

        /*
         * Check for degenerate name (e.g. / or "")
         * which is a way of talking about a directory,
         * e.g. like "/." or ".".
         */
        if (cnp->cn_nameptr[0] == '\0')
                panic("relookup: null name");

        if (cnp->cn_flags & ISDOTDOT)
                panic("relookup: lookup on dot-dot");

        /*
         * We now have a segment name to search for, and a directory to search.
         */
        *vpp = NULL;
        error = VOP_LOOKUP(dvp, vpp, cnp);
        if ((error) != 0) {
                KASSERTMSG((*vpp == NULL),
                    "leaf `%s' should be empty but is %p",
                    cnp->cn_nameptr, *vpp);
                if (error != EJUSTRETURN)
                        goto bad;
        }

        /*
         * Check for symbolic link
         */
        KASSERTMSG((*vpp == NULL || (*vpp)->v_type != VLNK ||
                (cnp->cn_flags & FOLLOW) == 0),
            "relookup: symlink found");

        /*
         * Check for read-only lookups.
         */
        if (rdonly && cnp->cn_nameiop != LOOKUP) {
                error = EROFS;
                if (*vpp) {
                        vrele(*vpp);
                }
                goto bad;
        }
        /*
         * Lock result.
         */
        if (*vpp && *vpp != dvp) {
                error = vn_lock(*vpp, LK_EXCLUSIVE);
                if (error != 0) {
                        vrele(*vpp);
                        goto bad;
                }
        }
        return (0);

bad:
        *vpp = NULL;
        return (error);
}

/*
 * namei_simple - simple forms of namei.
 *
 * These are wrappers to allow the simple case callers of namei to be
 * left alone while everything else changes under them.
 */

/* Flags */
struct namei_simple_flags_type {
        int dummy;
};
static const struct namei_simple_flags_type ns_nn, ns_nt, ns_fn, ns_ft;
const namei_simple_flags_t NSM_NOFOLLOW_NOEMULROOT = &ns_nn;
const namei_simple_flags_t NSM_NOFOLLOW_TRYEMULROOT = &ns_nt;
const namei_simple_flags_t NSM_FOLLOW_NOEMULROOT = &ns_fn;
const namei_simple_flags_t NSM_FOLLOW_TRYEMULROOT = &ns_ft;

static
int
namei_simple_convert_flags(namei_simple_flags_t sflags)
{
        if (sflags == NSM_NOFOLLOW_NOEMULROOT)
                return NOFOLLOW | 0;
        if (sflags == NSM_NOFOLLOW_TRYEMULROOT)
                return NOFOLLOW | TRYEMULROOT;
        if (sflags == NSM_FOLLOW_NOEMULROOT)
                return FOLLOW | 0;
        if (sflags == NSM_FOLLOW_TRYEMULROOT)
                return FOLLOW | TRYEMULROOT;
        panic("namei_simple_convert_flags: bogus sflags\n");
        return 0;
}

int
namei_simple_kernel(const char *path, namei_simple_flags_t sflags,
        struct vnode **vp_ret)
{
        return nameiat_simple_kernel(NULL, path, sflags, vp_ret);
}

int
nameiat_simple_kernel(struct vnode *dvp, const char *path,
        namei_simple_flags_t sflags, struct vnode **vp_ret)
{
        struct nameidata nd;
        struct pathbuf *pb;
        int err;

        pb = pathbuf_create(path);
        if (pb == NULL) {
                return ENOMEM;
        }

        NDINIT(&nd,
                LOOKUP,
                namei_simple_convert_flags(sflags),
                pb);

        if (dvp != NULL)
                NDAT(&nd, dvp);

        err = namei(&nd);
        if (err != 0) {
                pathbuf_destroy(pb);
                return err;
        }
        *vp_ret = nd.ni_vp;
        pathbuf_destroy(pb);
        return 0;
}

int
namei_simple_user(const char *path, namei_simple_flags_t sflags,
        struct vnode **vp_ret)
{
        return nameiat_simple_user(NULL, path, sflags, vp_ret);
}

int
nameiat_simple_user(struct vnode *dvp, const char *path,
        namei_simple_flags_t sflags, struct vnode **vp_ret)
{
        struct pathbuf *pb;
        struct nameidata nd;
        int err;

        err = pathbuf_copyin(path, &pb);
        if (err) {
                return err;
        }

        NDINIT(&nd,
                LOOKUP,
                namei_simple_convert_flags(sflags),
                pb);

        if (dvp != NULL)
                NDAT(&nd, dvp);

        err = namei(&nd);
        if (err != 0) {
                pathbuf_destroy(pb);
                return err;
        }
        *vp_ret = nd.ni_vp;
        pathbuf_destroy(pb);
        return 0;
}













































































































































    1 































    1 
    1 


























































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
/*        $NetBSD: dead_vnops.c,v 1.66 2021/10/20 03:08:18 thorpej Exp $        */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)dead_vnops.c        8.2 (Berkeley) 11/21/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: dead_vnops.c,v 1.66 2021/10/20 03:08:18 thorpej Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/vnode.h>
#include <sys/errno.h>
#include <sys/namei.h>
#include <sys/buf.h>
#include <sys/proc.h>

#include <miscfs/genfs/genfs.h>

/*
 * Prototypes for dead operations on vnodes.
 */
int        dead_lookup(void *);
int        dead_open(void *);
int        dead_read(void *);
int        dead_write(void *);
int        dead_ioctl(void *);
int        dead_poll(void *);
int        dead_remove(void *);
int        dead_link(void *);
int        dead_rename(void *);
int        dead_rmdir(void *);
int        dead_inactive(void *);
int        dead_bmap(void *);
int        dead_strategy(void *);
int        dead_print(void *);
int        dead_getpages(void *);
int        dead_putpages(void *);

int        dead_default_error(void *);

int (**dead_vnodeop_p)(void *);

const struct vnodeopv_entry_desc dead_vnodeop_entries[] = {
        { &vop_default_desc, dead_default_error },
        { &vop_bwrite_desc, vn_bwrite },                /* bwrite */
        { &vop_parsepath_desc, genfs_parsepath },        /* parsepath */
        { &vop_lookup_desc, dead_lookup },                /* lookup */
        { &vop_open_desc, dead_open },                        /* open */
        { &vop_close_desc, genfs_nullop },                /* close */
        { &vop_read_desc, dead_read },                        /* read */
        { &vop_write_desc, dead_write },                /* write */
        { &vop_fallocate_desc, genfs_eopnotsupp },        /* fallocate */
        { &vop_fdiscard_desc, genfs_eopnotsupp },        /* fdiscard */
        { &vop_fcntl_desc, genfs_nullop },                /* fcntl */
        { &vop_ioctl_desc, dead_ioctl },                /* ioctl */
        { &vop_poll_desc, dead_poll },                        /* poll */
        { &vop_remove_desc, dead_remove },                /* remove */
        { &vop_link_desc, dead_link },                        /* link */
        { &vop_rename_desc, dead_rename },                /* rename */
        { &vop_rmdir_desc, dead_rmdir },                /* rmdir */
        { &vop_fsync_desc, genfs_nullop },                /* fsync */
        { &vop_seek_desc, genfs_nullop },                /* seek */
        { &vop_inactive_desc, dead_inactive },                /* inactive */
        { &vop_reclaim_desc, genfs_nullop },                /* reclaim */
        { &vop_lock_desc, genfs_deadlock },                /* lock */
        { &vop_unlock_desc, genfs_deadunlock },                /* unlock */
        { &vop_bmap_desc, dead_bmap },                        /* bmap */
        { &vop_strategy_desc, dead_strategy },                /* strategy */
        { &vop_print_desc, dead_print },                /* print */
        { &vop_islocked_desc, genfs_deadislocked },        /* islocked */
        { &vop_revoke_desc, genfs_nullop },                /* revoke */
        { &vop_getpages_desc, dead_getpages },                /* getpages */
        { &vop_putpages_desc, dead_putpages },                /* putpages */
        { NULL, NULL }
};
const struct vnodeopv_desc dead_vnodeop_opv_desc =
        { &dead_vnodeop_p, dead_vnodeop_entries };

/* ARGSUSED */
int
dead_default_error(void *v)
{

        return EBADF;
}

int
dead_bmap(void *v)
{
        struct vop_bmap_args /* {
                struct vnode *a_vp;
                daddr_t  a_bn;
                struct vnode **a_vpp;
                daddr_t *a_bnp;
                int *a_runp;
        } */ *ap = v;

        (void)ap;

        return (EIO);
}

int
dead_lookup(void *v)
{
        struct vop_lookup_v2_args /* {
                struct vnode *a_dvp;
                struct vnode **a_vpp;
                struct componentname *a_cnp;
        } */ *ap = v;

        *(ap->a_vpp) = NULL;

        return ENOENT;
}

int
dead_open(void *v)
{
        struct vop_open_args /* {
                struct vnode *a_vp;
                int a_mode;
                kauth_cred_t a_cred;
        } */ *ap = v;

        (void)ap;

        return (ENXIO);
}

int
dead_read(void *v)
{
        struct vop_read_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                int  a_ioflag;
                kauth_cred_t a_cred;
        } */ *ap = v;

        /*
         * Return EOF for tty devices, EIO for others
         */
        if ((ap->a_vp->v_vflag & VV_ISTTY) == 0)
                return (EIO);
        return (0);
}

int
dead_write(void *v)
{
        struct vop_write_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                int  a_ioflag;
                kauth_cred_t a_cred;
        } */ *ap = v;

        (void)ap;

        return (EIO);
}

int
dead_ioctl(void *v)
{
        struct vop_ioctl_args /* {
                struct vnode *a_vp;
                u_long a_command;
                void *a_data;
                int  a_fflag;
                kauth_cred_t a_cred;
                struct lwp *a_l;
        } */ *ap = v;

        (void)ap;

        return (EBADF);
}

int
dead_poll(void *v)
{
        struct vop_poll_args /* {
                struct vnode *a_vp;
                int a_events;
                struct lwp *a_l;
        } */ *ap = v;

        /*
         * Let the user find out that the descriptor is gone.
         */
        return (ap->a_events);
}

int
dead_remove(void *v)
{
        struct vop_remove_v3_args /* {
                struct vnode *a_dvp;
                struct vnode *a_vp;
                struct componentname *a_cnp;
                nlink_t ctx_vp_new_nlink;
        } */ *ap = v;

        vput(ap->a_vp);

        return EIO;
}

int
dead_link(void *v)
{
        struct vop_link_v2_args /* {
                struct vnode *a_dvp;
                struct vnode *a_vp;
                struct componentname *a_cnp;
        } */ *ap = v;

        (void)ap;

        return EIO;
}

int
dead_rename(void *v)
{
        struct vop_rename_args /* {
                struct vnode *a_fdvp;
                struct vnode *a_fvp;
                struct componentname *a_fcnp;
                struct vnode *a_tdvp;
                struct vnode *a_tvp;
                struct componentname *a_tcnp;
        } */ *ap = v;

        vrele(ap->a_fdvp);
        vrele(ap->a_fvp);
        if (ap->a_tvp != NULL && ap->a_tvp != ap->a_tdvp)
                VOP_UNLOCK(ap->a_tvp);
        vput(ap->a_tdvp);
        if (ap->a_tvp != NULL)
                vrele(ap->a_tvp);

        return EIO;
}

int
dead_rmdir(void *v)
{
        struct vop_rmdir_v2_args /* {
                struct vnode *a_dvp;
                struct vnode *a_vp;
                struct componentname *a_cnp;
        } */ *ap = v;

        vput(ap->a_vp);

        return EIO;
}

int
dead_inactive(void *v)
{
        struct vop_inactive_v2_args /* {
                struct vnode *a_vp;
                bool *a_recycle;
        } */ *ap = v;

        *ap->a_recycle = false;

        return 0;
}

int
dead_strategy(void *v)
{
        struct vop_strategy_args /* {
                struct vnode *a_vp;
                struct buf *a_bp;
        } */ *ap = v;
        struct buf *bp;

        bp = ap->a_bp;
        bp->b_error = EIO;
        bp->b_resid = bp->b_bcount;
        biodone(ap->a_bp);
        return (EIO);
}

/* ARGSUSED */
int
dead_print(void *v)
{
        printf("tag VT_NON, dead vnode\n");
        return 0;
}

int
dead_getpages(void *v)
{
        struct vop_getpages_args /* {
                struct vnode *a_vp;
                voff_t a_offset;
                struct vm_page **a_m;
                int *a_count;
                int a_centeridx;
                vm_prot_t a_access_type;
                int a_advice;
                int a_flags;
        } */ *ap = v;

        if ((ap->a_flags & PGO_LOCKED) == 0)
                rw_exit(ap->a_vp->v_uobj.vmobjlock);

        return (EFAULT);
}

int
dead_putpages(void *v)
{
        struct vop_putpages_args /* {
                struct vnode *a_vp;
                voff_t a_offlo;
                voff_t a_offhi;
                int a_flags;
        } */ *ap = v;

        rw_exit(ap->a_vp->v_uobj.vmobjlock);
        return (EFAULT);
}





































































































































    3 





    1 



    2 














    2 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
/* $NetBSD: unicode.h,v 1.7 2014/04/06 19:25:22 jakllsch Exp $ */

/*-
 * Copyright (c) 2001, 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Paul Borman at Krystal Technologies.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Routines for handling Unicode encoded in UTF-8 form, code derived from
 * src/lib/libc/locale/utf2.c.
 */
static u_int16_t wget_utf8(const char **, size_t *) __unused;
static int wput_utf8(char *, size_t, u_int16_t) __unused;

/*
 * Read one UTF8-encoded character off the string, shift the string pointer
 * and return the character.
 */
static u_int16_t
wget_utf8(const char **str, size_t *sz)
{
        size_t c;
        u_int16_t rune = 0;
        const char *s = *str;
        static const int _utf_count[16] = {
                1, 1, 1, 1, 1, 1, 1, 1,
                0, 0, 0, 0, 2, 2, 3, 0,
        };

        /* must be called with at least one byte remaining */
        KASSERT(*sz > 0);

        c = _utf_count[(s[0] & 0xf0) >> 4];
        if (c == 0 || c > *sz) {
    decoding_error:
                /*
                 * The first character is in range 128-255 and doesn't
                 * mark valid a valid UTF-8 sequence. There is not much
                 * we can do with this, so handle by returning
                 * the first character as if it would be a correctly
                 * encoded ISO-8859-1 character.
                 */
                c = 1;
        }

        switch (c) {
        case 1:
                rune = s[0] & 0xff;
                break;
        case 2:
                if ((s[1] & 0xc0) != 0x80)
                        goto decoding_error;
                rune = ((s[0] & 0x1F) << 6) | (s[1] & 0x3F);
                break;
        case 3:
                if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
                        goto decoding_error;
                rune = ((s[0] & 0x0F) << 12) | ((s[1] & 0x3F) << 6)
                    | (s[2] & 0x3F);
                break;
        }

        *str += c;
        *sz -= c;
        return rune;
}

/*
 * Encode wide character and write it to the string. 'n' specifies
 * how much buffer space remains in 's'. Returns number of bytes written
 * to the target string 's'.
 */
static int
wput_utf8(char *s, size_t n, u_int16_t wc)
{
        if (wc & 0xf800) {
                if (n < 3) {
                        /* bound check failure */
                        return 0;
                }

                s[0] = 0xE0 | (wc >> 12);
                s[1] = 0x80 | ((wc >> 6) & 0x3F);
                s[2] = 0x80 | ((wc) & 0x3F);
                return 3;
        } else if (wc & 0x0780) {
                if (n < 2) {
                        /* bound check failure */
                        return 0;
                }

                s[0] = 0xC0 | (wc >> 6);
                s[1] = 0x80 | ((wc) & 0x3F);
                return 2;
        } else {
                if (n < 1) {
                        /* bound check failure */
                        return 0;
                }

                s[0] = wc;
                return 1;
        }
}
















































































































































































































































    3 
    3 





















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
/*        $NetBSD: if_athn_usb.c,v 1.38 2020/03/14 02:35:33 christos Exp $        */
/*        $OpenBSD: if_athn_usb.c,v 1.12 2013/01/14 09:50:31 jsing Exp $        */

/*-
 * Copyright (c) 2011 Damien Bergamini <damien.bergamini@free.fr>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*
 * USB front-end for Atheros AR9271 and AR7010 chipsets.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_athn_usb.c,v 1.38 2020/03/14 02:35:33 christos Exp $");

#ifdef        _KERNEL_OPT
#include "opt_inet.h"
#endif

#include <sys/param.h>
#include <sys/callout.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/proc.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/systm.h>
#include <sys/kmem.h>

#include <sys/bus.h>
#include <sys/endian.h>
#include <sys/intr.h>

#include <net/bpf.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_ether.h>
#include <net/if_media.h>
#include <net/if_types.h>

#include <netinet/if_inarp.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>

#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_amrr.h>
#include <net80211/ieee80211_radiotap.h>

#include <dev/firmload.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>

#include <dev/ic/athnreg.h>
#include <dev/ic/athnvar.h>
#include <dev/ic/arn9285.h>
#include <dev/usb/if_athn_usb.h>

#define ATHN_USB_SOFTC(sc)        ((struct athn_usb_softc *)(sc))
#define ATHN_USB_NODE(ni)        ((struct athn_usb_node *)(ni))

#define IS_UP_AND_RUNNING(ifp) \
        (((ifp)->if_flags & IFF_UP) && ((ifp)->if_flags & IFF_RUNNING))

#define athn_usb_wmi_cmd(sc, cmd_id) \
        athn_usb_wmi_xcmd(sc, cmd_id, NULL, 0, NULL)

Static int        athn_usb_activate(device_t, enum devact);
Static int        athn_usb_detach(device_t, int);
Static int        athn_usb_match(device_t, cfdata_t, void *);
Static void        athn_usb_attach(device_t, device_t, void *);

CFATTACH_DECL_NEW(athn_usb, sizeof(struct athn_usb_softc), athn_usb_match,
    athn_usb_attach, athn_usb_detach, athn_usb_activate);

Static int        athn_usb_alloc_rx_list(struct athn_usb_softc *);
Static int        athn_usb_alloc_tx_cmd(struct athn_usb_softc *);
Static int        athn_usb_alloc_tx_msg(struct athn_usb_softc *);
Static int        athn_usb_alloc_tx_list(struct athn_usb_softc *);
Static void        athn_usb_attachhook(device_t);
Static void        athn_usb_bcneof(struct usbd_xfer *, void *,
                    usbd_status);
Static void        athn_usb_abort_pipes(struct athn_usb_softc *);
Static void        athn_usb_close_pipes(struct athn_usb_softc *);
Static int        athn_usb_create_hw_node(struct athn_usb_softc *,
                    struct ar_htc_target_sta *);
Static int        athn_usb_create_node(struct athn_usb_softc *,
                    struct ieee80211_node *);
Static void        athn_usb_do_async(struct athn_usb_softc *,
                    void (*)(struct athn_usb_softc *, void *), void *, int);
Static void        athn_usb_free_rx_list(struct athn_usb_softc *);
Static void        athn_usb_free_tx_cmd(struct athn_usb_softc *);
Static void        athn_usb_free_tx_msg(struct athn_usb_softc *);
Static void        athn_usb_free_tx_list(struct athn_usb_softc *);
Static int        athn_usb_htc_connect_svc(struct athn_usb_softc *, uint16_t,
                    uint8_t, uint8_t, uint8_t *);
Static int        athn_usb_htc_msg(struct athn_usb_softc *, uint16_t, void *,
                    int);
Static int        athn_usb_htc_setup(struct athn_usb_softc *);
Static int        athn_usb_init(struct ifnet *);
Static int        athn_usb_init_locked(struct ifnet *);
Static void        athn_usb_intr(struct usbd_xfer *, void *,
                    usbd_status);
Static int        athn_usb_ioctl(struct ifnet *, u_long, void *);
Static int        athn_usb_load_firmware(struct athn_usb_softc *);
Static const struct athn_usb_type *
                athn_usb_lookup(int, int);
Static int        athn_usb_media_change(struct ifnet *);
Static void        athn_usb_newassoc(struct ieee80211_node *, int);
Static void        athn_usb_newassoc_cb(struct athn_usb_softc *, void *);
Static int        athn_usb_newstate(struct ieee80211com *, enum ieee80211_state,
                    int);
Static void        athn_usb_newstate_cb(struct athn_usb_softc *, void *);
Static void        athn_usb_node_cleanup(struct ieee80211_node *);
Static void        athn_usb_node_cleanup_cb(struct athn_usb_softc *, void *);
Static int        athn_usb_open_pipes(struct athn_usb_softc *);
Static uint32_t        athn_usb_read(struct athn_softc *, uint32_t);
Static int        athn_usb_remove_hw_node(struct athn_usb_softc *, uint8_t *);
Static void        athn_usb_rx_enable(struct athn_softc *);
Static void        athn_usb_rx_frame(struct athn_usb_softc *, struct mbuf *);
Static void        athn_usb_rx_radiotap(struct athn_softc *, struct mbuf *,
                    struct ar_rx_status *);
Static void        athn_usb_rx_wmi_ctrl(struct athn_usb_softc *, uint8_t *, size_t);
Static void        athn_usb_rxeof(struct usbd_xfer *, void *,
                    usbd_status);
Static void        athn_usb_start(struct ifnet *);
//Static void        athn_usb_start_locked(struct ifnet *);
Static void        athn_usb_stop(struct ifnet *, int disable);
Static void        athn_usb_stop_locked(struct ifnet *);
Static void        athn_usb_swba(struct athn_usb_softc *);
Static int        athn_usb_switch_chan(struct athn_softc *,
                    struct ieee80211_channel *, struct ieee80211_channel *);
Static void        athn_usb_task(void *);
Static int        athn_usb_tx(struct athn_softc *, struct mbuf *,
                    struct ieee80211_node *, struct athn_usb_tx_data *);
Static void        athn_usb_txeof(struct usbd_xfer *, void *,
                    usbd_status);
Static void        athn_usb_updateslot(struct ifnet *);
Static void        athn_usb_updateslot_cb(struct athn_usb_softc *, void *);
Static void        athn_usb_wait_async(struct athn_usb_softc *);
Static int        athn_usb_wait_msg(struct athn_usb_softc *);
Static void        athn_usb_watchdog(struct ifnet *);
Static int        athn_usb_wmi_xcmd(struct athn_usb_softc *, uint16_t, void *,
                    int, void *);
Static void        athn_usb_wmieof(struct usbd_xfer *, void *,
                    usbd_status);
Static void        athn_usb_write(struct athn_softc *, uint32_t, uint32_t);
Static void        athn_usb_write_barrier(struct athn_softc *);

/************************************************************************
 * unused/notyet declarations
 */
#ifdef unused
Static int        athn_usb_read_rom(struct athn_softc *);
#endif /* unused */

#ifdef notyet_edca
Static void        athn_usb_updateedca(struct ieee80211com *);
Static void        athn_usb_updateedca_cb(struct athn_usb_softc *, void *);
#endif /* notyet_edca */

#ifdef notyet
Static int        athn_usb_ampdu_tx_start(struct ieee80211com *,
                    struct ieee80211_node *, uint8_t);
Static void        athn_usb_ampdu_tx_start_cb(struct athn_usb_softc *, void *);
Static void        athn_usb_ampdu_tx_stop(struct ieee80211com *,
                    struct ieee80211_node *, uint8_t);
Static void        athn_usb_ampdu_tx_stop_cb(struct athn_usb_softc *, void *);
Static void        athn_usb_delete_key(struct ieee80211com *,
                    struct ieee80211_node *, struct ieee80211_key *);
Static void        athn_usb_delete_key_cb(struct athn_usb_softc *, void *);
Static int        athn_usb_set_key(struct ieee80211com *,
                    struct ieee80211_node *, struct ieee80211_key *);
Static void        athn_usb_set_key_cb(struct athn_usb_softc *, void *);
#endif /* notyet */
/************************************************************************/

struct athn_usb_type {
        struct usb_devno        devno;
        u_int                        flags;
};

Static const struct athn_usb_type *
athn_usb_lookup(int vendor, int product)
{
        static const struct athn_usb_type athn_usb_devs[] = {
#define _D(v,p,f) \
                {{ USB_VENDOR_##v, USB_PRODUCT_##p }, ATHN_USB_FLAG_##f }

                _D( ACCTON,        ACCTON_AR9280,                AR7010 ),
                _D( ACTIONTEC,        ACTIONTEC_AR9287,        AR7010 ),
                _D( ATHEROS2,        ATHEROS2_AR9271_1,        NONE ),
                _D( ATHEROS2,        ATHEROS2_AR9271_2,        NONE ),
                _D( ATHEROS2,        ATHEROS2_AR9271_3,        NONE ),
                _D( ATHEROS2,        ATHEROS2_AR9280,        AR7010 ),
                _D( ATHEROS2,        ATHEROS2_AR9287,        AR7010 ),
                _D( AZUREWAVE,        AZUREWAVE_AR9271_1,        NONE ),
                _D( AZUREWAVE,        AZUREWAVE_AR9271_2,        NONE ),
                _D( AZUREWAVE,        AZUREWAVE_AR9271_3,        NONE ),
                _D( AZUREWAVE,        AZUREWAVE_AR9271_4,        NONE ),
                _D( AZUREWAVE,        AZUREWAVE_AR9271_5,        NONE ),
                _D( AZUREWAVE,        AZUREWAVE_AR9271_6,        NONE ),
                _D( DLINK2,        DLINK2_AR9271,                  NONE ),
                _D( LITEON,        LITEON_AR9271,                  NONE ),
                _D( NETGEAR,        NETGEAR_WNA1100,        NONE ),
                _D( NETGEAR,        NETGEAR_WNDA3200,        AR7010 ),
                _D( VIA,        VIA_AR9271,                NONE ),
                _D( MELCO,        MELCO_CEWL_1,                AR7010 ),
                _D( PANASONIC,        PANASONIC_N5HBZ0000055,        AR7010 ),
#undef _D
        };

        return (const void *)usb_lookup(athn_usb_devs, vendor, product);
}

Static int
athn_usb_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return athn_usb_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
            UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

Static void
athn_usb_attach(device_t parent, device_t self, void *aux)
{
        struct athn_usb_softc *usc;
        struct athn_softc *sc;
        struct usb_attach_arg *uaa;
        int error;

        usc = device_private(self);
        sc = &usc->usc_sc;
        uaa = aux;
        sc->sc_dev = self;
        usc->usc_udev = uaa->uaa_device;

        aprint_naive("\n");
        aprint_normal("\n");

        DPRINTFN(DBG_FN, sc, "\n");

        usc->usc_init_state = ATHN_INIT_NONE;
        usc->usc_athn_attached = 0;
        usc->usc_flags = athn_usb_lookup(uaa->uaa_vendor, uaa->uaa_product)->flags;
        sc->sc_flags |= ATHN_FLAG_USB;
#ifdef notyet
        /* Check if it is a combo WiFi+Bluetooth (WB193) device. */
        if (strncmp(product, "wb193", 5) == 0)
                sc->sc_flags |= ATHN_FLAG_BTCOEX3WIRE;
#endif

        sc->sc_ops.read = athn_usb_read;
        sc->sc_ops.write = athn_usb_write;
        sc->sc_ops.write_barrier = athn_usb_write_barrier;

        mutex_init(&usc->usc_lock, MUTEX_DEFAULT, IPL_NONE);

        cv_init(&usc->usc_wmi_cv, "athnwmi");
        cv_init(&usc->usc_htc_cv, "athnhtc");

        cv_init(&usc->usc_cmd_cv, "athncmd");
        mutex_init(&usc->usc_cmd_mtx, MUTEX_DEFAULT, IPL_SOFTUSB);
        cv_init(&usc->usc_msg_cv, "athnmsg");
        mutex_init(&usc->usc_msg_mtx, MUTEX_DEFAULT, IPL_SOFTUSB);

        cv_init(&usc->usc_task_cv, "athntsk");
        mutex_init(&usc->usc_task_mtx, MUTEX_DEFAULT, IPL_NET);
        mutex_init(&usc->usc_tx_mtx, MUTEX_DEFAULT, IPL_NONE);

        usb_init_task(&usc->usc_task, athn_usb_task, usc, 0);

        if (usbd_set_config_no(usc->usc_udev, 1, 0) != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not set configuration no\n");
                goto fail;
        }

        /* Get the first interface handle. */
        error = usbd_device2interface_handle(usc->usc_udev, 0, &usc->usc_iface);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not get interface handle\n");
                goto fail;
        }

        if (athn_usb_open_pipes(usc) != 0)
                goto fail;

        /* Allocate xfer for firmware commands. */
        if (athn_usb_alloc_tx_cmd(usc) != 0)
                goto fail;

        /* Allocate xfer for firmware commands. */
        if (athn_usb_alloc_tx_msg(usc) != 0)
                goto fail;

        /* Allocate Tx/Rx buffers. */
        error = athn_usb_alloc_rx_list(usc);
        if (error != 0)
                goto fail;
        error = athn_usb_alloc_tx_list(usc);
        if (error != 0)
                goto fail;

        config_mountroot(self, athn_usb_attachhook);

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, usc->usc_udev, sc->sc_dev);
        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        usc->usc_init_state = ATHN_INIT_INITED;

        return;

 fail:

        /* Free Tx/Rx buffers. */
        athn_usb_abort_pipes(usc);
        athn_usb_free_tx_list(usc);
        athn_usb_free_rx_list(usc);
        athn_usb_free_tx_cmd(usc);
        athn_usb_free_tx_msg(usc);
        athn_usb_close_pipes(usc);
        usb_rem_task_wait(usc->usc_udev, &usc->usc_task, USB_TASKQ_DRIVER,
            NULL);

        cv_destroy(&usc->usc_cmd_cv);
        cv_destroy(&usc->usc_msg_cv);

        cv_destroy(&usc->usc_wmi_cv);
        cv_destroy(&usc->usc_htc_cv);
        mutex_destroy(&usc->usc_lock);

        mutex_destroy(&usc->usc_cmd_mtx);
        mutex_destroy(&usc->usc_msg_mtx);
        mutex_destroy(&usc->usc_tx_mtx);
        mutex_destroy(&usc->usc_task_mtx);
}

Static void
athn_usb_node_cleanup_cb(struct athn_usb_softc *usc, void *arg)
{
        uint8_t sta_index = *(uint8_t *)arg;

        DPRINTFN(DBG_FN, usc, "\n");
        DPRINTFN(DBG_NODES, usc, "removing node %u\n", sta_index);
        athn_usb_remove_hw_node(usc, &sta_index);
}

Static void
athn_usb_node_cleanup(struct ieee80211_node *ni)
{
        struct athn_usb_softc *usc;
        struct ieee80211com *ic;
        uint8_t sta_index;

        usc = ATHN_USB_SOFTC(ni->ni_ic->ic_ifp->if_softc);
        ic = &ATHN_SOFTC(usc)->sc_ic;

        DPRINTFN(DBG_FN, usc, "\n");

        if (ic->ic_opmode == IEEE80211_M_HOSTAP) {
                sta_index = ATHN_NODE(ni)->sta_index;
                if (sta_index != 0)
                        athn_usb_do_async(usc, athn_usb_node_cleanup_cb,
                            &sta_index, sizeof(sta_index));
        }
        usc->usc_node_cleanup(ni);
}

Static void
athn_usb_attachhook(device_t arg)
{
        struct athn_usb_softc *usc = device_private(arg);
        struct athn_softc *sc = &usc->usc_sc;
        struct athn_ops *ops = &sc->sc_ops;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        size_t i;
        int error;

        if (usc->usc_dying)
                return;

        DPRINTFN(DBG_FN, usc, "\n");

        /* Load firmware. */
        error = athn_usb_load_firmware(usc);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not load firmware (%d)\n", error);
                return;
        }

        /* Setup the host transport communication interface. */
        error = athn_usb_htc_setup(usc);
        if (error != 0)
                return;

        /* We're now ready to attach the bus agnostic driver. */
        ic->ic_ifp = ifp;
        ic->ic_updateslot = athn_usb_updateslot;
        sc->sc_max_aid = AR_USB_MAX_STA;  /* Firmware is limited to 8 STA */
        sc->sc_media_change = athn_usb_media_change;

        /* Override some operations for USB. */
        ifp->if_init = athn_usb_init;
        ifp->if_stop = athn_usb_stop;
        ifp->if_ioctl = athn_usb_ioctl;
        ifp->if_start = athn_usb_start;
        ifp->if_watchdog = athn_usb_watchdog;

        error = athn_attach(sc);
        if (error != 0) {
                return;
        }
        usc->usc_athn_attached = 1;

        /* hooks for HostAP association and disassociation */
        ic->ic_newassoc = athn_usb_newassoc;
        usc->usc_node_cleanup = ic->ic_node_cleanup;
        ic->ic_node_cleanup = athn_usb_node_cleanup;

#ifdef notyet_edca
        ic->ic_updateedca = athn_usb_updateedca;
#endif
#ifdef notyet
        ic->ic_set_key = athn_usb_set_key;
        ic->ic_delete_key = athn_usb_delete_key;
        ic->ic_ampdu_tx_start = athn_usb_ampdu_tx_start;
        ic->ic_ampdu_tx_stop = athn_usb_ampdu_tx_stop;
#endif
        ic->ic_newstate = athn_usb_newstate;

        ops->rx_enable = athn_usb_rx_enable;

        /* Reset HW key cache entries. */
        for (i = 0; i < sc->sc_kc_entries; i++)
                athn_reset_key(sc, i);

        ops->enable_antenna_diversity(sc);

#ifdef ATHN_BT_COEXISTENCE
        /* Configure bluetooth coexistence for combo chips. */
        if (sc->sc_flags & ATHN_FLAG_BTCOEX)
                athn_btcoex_init(sc);
#endif
        /* Configure LED. */
        athn_led_init(sc);

        ieee80211_announce(ic);
}

Static int
athn_usb_detach(device_t self, int flags)
{
        struct athn_usb_softc *usc = device_private(self);
        struct athn_softc *sc = &usc->usc_sc;
        int error;

        DPRINTFN(DBG_FN, usc, "\n");

        if (usc->usc_init_state < ATHN_INIT_INITED)
                return 0;

        pmf_device_deregister(self);

        mutex_enter(&usc->usc_lock);
        usc->usc_dying = 1;
        mutex_exit(&usc->usc_lock);

        mutex_enter(&usc->usc_cmd_mtx);
        while (usc->usc_wmiactive) {
                error = cv_timedwait(&usc->usc_wmi_cv, &usc->usc_cmd_mtx, hz);

                if (error) {
                        mutex_exit(&usc->usc_cmd_mtx);
                        return error;
                }
        }
        mutex_exit(&usc->usc_cmd_mtx);

        mutex_enter(&usc->usc_msg_mtx);
        while (usc->usc_htcactive) {
                error = cv_timedwait(&usc->usc_htc_cv, &usc->usc_msg_mtx, hz);

                if (error) {
                        mutex_exit(&usc->usc_msg_mtx);
                        return error;
                }
        }
        mutex_exit(&usc->usc_msg_mtx);

        athn_usb_wait_async(usc);

        athn_usb_stop(&sc->sc_if, 0);
        usb_rem_task_wait(usc->usc_udev, &usc->usc_task, USB_TASKQ_DRIVER,
            NULL);

        /* Abort Tx/Rx pipes. */
        athn_usb_abort_pipes(usc);

        if (usc->usc_athn_attached) {
                usc->usc_athn_attached = 0;
                athn_detach(sc);
        }

        /* Free Tx/Rx buffers. */
        athn_usb_free_rx_list(usc);
        athn_usb_free_tx_list(usc);
        athn_usb_free_tx_cmd(usc);
        athn_usb_free_tx_msg(usc);

        /* Close Tx/Rx pipes. */
        athn_usb_close_pipes(usc);

        mutex_destroy(&usc->usc_tx_mtx);
        cv_destroy(&usc->usc_task_cv);
        mutex_destroy(&usc->usc_task_mtx);

        mutex_destroy(&usc->usc_cmd_mtx);
        cv_destroy(&usc->usc_cmd_cv);
        mutex_destroy(&usc->usc_msg_mtx);
        cv_destroy(&usc->usc_msg_cv);

        cv_destroy(&usc->usc_wmi_cv);
        mutex_destroy(&usc->usc_lock);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, usc->usc_udev, sc->sc_dev);
        return 0;
}

Static int
athn_usb_activate(device_t self, enum devact act)
{
        struct athn_usb_softc *usc = device_private(self);
        struct athn_softc *sc = &usc->usc_sc;

        DPRINTFN(DBG_FN, usc, "\n");

        switch (act) {
        case DVACT_DEACTIVATE:
                if_deactivate(sc->sc_ic.ic_ifp);
                usc->usc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

Static int
athn_usb_open_pipes(struct athn_usb_softc *usc)
{
        usb_endpoint_descriptor_t *ed;
        int error;

        DPRINTFN(DBG_FN, usc, "\n");

        error = usbd_open_pipe(usc->usc_iface, AR_PIPE_TX_DATA, 0,
            &usc->usc_tx_data_pipe);
        if (error != 0) {
                aprint_error_dev(usc->usc_dev,
                    "could not open Tx bulk pipe\n");
                goto fail;
        }

        error = usbd_open_pipe(usc->usc_iface, AR_PIPE_RX_DATA, 0,
            &usc->usc_rx_data_pipe);
        if (error != 0) {
                aprint_error_dev(usc->usc_dev,
                    "could not open Rx bulk pipe\n");
                goto fail;
        }

        ed = usbd_get_endpoint_descriptor(usc->usc_iface, AR_PIPE_RX_INTR);
        if (ed == NULL) {
                aprint_error_dev(usc->usc_dev,
                    "could not retrieve Rx intr pipe descriptor\n");
                goto fail;
        }
        usc->usc_ibufsize = UGETW(ed->wMaxPacketSize);
        if (usc->usc_ibufsize == 0) {
                aprint_error_dev(usc->usc_dev,
                    "invalid Rx intr pipe descriptor\n");
                goto fail;
        }
        usc->usc_ibuf = kmem_alloc(usc->usc_ibufsize, KM_SLEEP);

        error = usbd_open_pipe_intr(usc->usc_iface, AR_PIPE_RX_INTR,
            USBD_SHORT_XFER_OK, &usc->usc_rx_intr_pipe, usc, usc->usc_ibuf,
            usc->usc_ibufsize, athn_usb_intr, USBD_DEFAULT_INTERVAL);
        if (error != 0) {
                aprint_error_dev(usc->usc_dev,
                    "could not open Rx intr pipe\n");
                goto fail;
        }
        error = usbd_open_pipe(usc->usc_iface, AR_PIPE_TX_INTR, 0,
            &usc->usc_tx_intr_pipe);
        if (error != 0) {
                aprint_error_dev(usc->usc_dev,
                    "could not open Tx intr pipe\n");
                goto fail;
        }
        return 0;
 fail:
        athn_usb_abort_pipes(usc);
        athn_usb_close_pipes(usc);
        return error;
}

static inline void
athn_usb_kill_pipe(struct usbd_pipe **pipeptr)
{
        struct usbd_pipe *pipe;

        CTASSERT(sizeof(pipe) == sizeof(void *));
        pipe = atomic_swap_ptr(pipeptr, NULL);
        if (pipe != NULL) {
                usbd_close_pipe(pipe);
        }
}

Static void
athn_usb_abort_pipes(struct athn_usb_softc *usc)
{
        DPRINTFN(DBG_FN, usc, "\n");

        if (usc->usc_tx_data_pipe != NULL)
                usbd_abort_pipe(usc->usc_tx_data_pipe);
        if (usc->usc_rx_data_pipe != NULL)
                usbd_abort_pipe(usc->usc_rx_data_pipe);
        if (usc->usc_tx_intr_pipe != NULL)
                usbd_abort_pipe(usc->usc_tx_intr_pipe);
        if (usc->usc_rx_intr_pipe != NULL)
                usbd_abort_pipe(usc->usc_rx_intr_pipe);
}

Static void
athn_usb_close_pipes(struct athn_usb_softc *usc)
{
        uint8_t *ibuf;

        DPRINTFN(DBG_FN, usc, "\n");

        athn_usb_kill_pipe(&usc->usc_tx_data_pipe);
        athn_usb_kill_pipe(&usc->usc_rx_data_pipe);
        athn_usb_kill_pipe(&usc->usc_tx_intr_pipe);
        athn_usb_kill_pipe(&usc->usc_rx_intr_pipe);
        ibuf = atomic_swap_ptr(&usc->usc_ibuf, NULL);
        if (ibuf != NULL)
                kmem_free(ibuf, usc->usc_ibufsize);
}

Static int
athn_usb_alloc_rx_list(struct athn_usb_softc *usc)
{
        struct athn_usb_rx_data *data;
        size_t i;
        int error = 0;

        DPRINTFN(DBG_FN, usc, "\n");

        for (i = 0; i < ATHN_USB_RX_LIST_COUNT; i++) {
                data = &usc->usc_rx_data[i];

                data->sc = usc;        /* Backpointer for callbacks. */

                error = usbd_create_xfer(usc->usc_rx_data_pipe,
                    ATHN_USB_RXBUFSZ, 0, 0, &data->xfer);
                if (error) {
                        aprint_error_dev(usc->usc_dev,
                            "could not allocate xfer\n");
                        break;
                }
                data->buf = usbd_get_buffer(data->xfer);
        }
        if (error != 0)
                athn_usb_free_rx_list(usc);
        return error;
}

Static void
athn_usb_free_rx_list(struct athn_usb_softc *usc)
{
        struct usbd_xfer *xfer;
        size_t i;

        DPRINTFN(DBG_FN, usc, "\n");

        /* NB: Caller must abort pipe first. */
        for (i = 0; i < ATHN_USB_RX_LIST_COUNT; i++) {
                CTASSERT(sizeof(xfer) == sizeof(void *));
                xfer = atomic_swap_ptr(&usc->usc_rx_data[i].xfer, NULL);
                if (xfer != NULL)
                        usbd_destroy_xfer(xfer);
        }
}

Static int
athn_usb_alloc_tx_list(struct athn_usb_softc *usc)
{
        struct athn_usb_tx_data *data;
        size_t i;
        int error = 0;

        DPRINTFN(DBG_FN, usc, "\n");

        mutex_enter(&usc->usc_tx_mtx);
        TAILQ_INIT(&usc->usc_tx_free_list);
        for (i = 0; i < ATHN_USB_TX_LIST_COUNT; i++) {
                data = &usc->usc_tx_data[i];

                data->sc = usc;        /* Backpointer for callbacks. */

                error = usbd_create_xfer(usc->usc_tx_data_pipe,
                    ATHN_USB_TXBUFSZ, USBD_FORCE_SHORT_XFER, 0, &data->xfer);
                if (error) {
                        aprint_error_dev(usc->usc_dev,
                            "could not create xfer on TX pipe\n");
                        break;
                }
                data->buf = usbd_get_buffer(data->xfer);

                /* Append this Tx buffer to our free list. */
                TAILQ_INSERT_TAIL(&usc->usc_tx_free_list, data, next);
        }
        if (error == 0) {
                /* Steal one buffer for beacons. */
                usc->usc_tx_bcn = TAILQ_FIRST(&usc->usc_tx_free_list);
                TAILQ_REMOVE(&usc->usc_tx_free_list, usc->usc_tx_bcn, next);
        } else {
                athn_usb_free_tx_list(usc);
        }
        mutex_exit(&usc->usc_tx_mtx);

        return error;
}

Static void
athn_usb_free_tx_list(struct athn_usb_softc *usc)
{
        struct usbd_xfer *xfer;
        size_t i;

        DPRINTFN(DBG_FN, usc, "\n");

        /* NB: Caller must abort pipe first. */
        for (i = 0; i < ATHN_USB_TX_LIST_COUNT; i++) {
                CTASSERT(sizeof(xfer) == sizeof(void *));
                xfer = atomic_swap_ptr(&usc->usc_tx_data[i].xfer, NULL);
                if (xfer != NULL)
                        usbd_destroy_xfer(xfer);
        }
}

Static int
athn_usb_alloc_tx_cmd(struct athn_usb_softc *usc)
{
        struct athn_usb_tx_data *data = &usc->usc_tx_cmd;

        DPRINTFN(DBG_FN, usc, "\n");

        data->sc = usc;        /* Backpointer for callbacks. */

        int err = usbd_create_xfer(usc->usc_tx_intr_pipe, ATHN_USB_TXCMDSZ,
            0, 0, &data->xfer);
        if (err) {
                aprint_error_dev(usc->usc_dev,
                    "could not allocate command xfer\n");
                return err;
        }
        data->buf = usbd_get_buffer(data->xfer);

        return 0;
}

Static void
athn_usb_free_tx_cmd(struct athn_usb_softc *usc)
{
        struct usbd_xfer *xfer;

        DPRINTFN(DBG_FN, usc, "\n");

        CTASSERT(sizeof(xfer) == sizeof(void *));
        xfer = atomic_swap_ptr(&usc->usc_tx_cmd.xfer, NULL);
        if (xfer != NULL)
                usbd_destroy_xfer(xfer);
}

Static int
athn_usb_alloc_tx_msg(struct athn_usb_softc *usc)
{
        struct athn_usb_tx_data *data = &usc->usc_tx_msg;

        DPRINTFN(DBG_FN, usc, "\n");

        data->sc = usc;        /* Backpointer for callbacks. */

        int err = usbd_create_xfer(usc->usc_tx_intr_pipe, ATHN_USB_TXCMDSZ,
            0, 0, &data->xfer);
        if (err) {
                aprint_error_dev(usc->usc_dev,
                    "could not allocate command xfer\n");
                return err;
        }
        data->buf = usbd_get_buffer(data->xfer);

        return 0;
}

Static void
athn_usb_free_tx_msg(struct athn_usb_softc *usc)
{
        struct usbd_xfer *xfer;

        DPRINTFN(DBG_FN, usc, "\n");

        CTASSERT(sizeof(xfer) == sizeof(void *));
        xfer = atomic_swap_ptr(&usc->usc_tx_msg.xfer, NULL);
        if (xfer != NULL)
                usbd_destroy_xfer(xfer);
}

Static void
athn_usb_task(void *arg)
{
        struct athn_usb_softc *usc = arg;
        struct athn_usb_host_cmd_ring *ring = &usc->usc_cmdq;
        struct athn_usb_host_cmd *cmd;

        DPRINTFN(DBG_FN, usc, "\n");

        /* Process host commands. */
        mutex_spin_enter(&usc->usc_task_mtx);
        while (ring->next != ring->cur) {
                cmd = &ring->cmd[ring->next];
                mutex_spin_exit(&usc->usc_task_mtx);

                /* Invoke callback. */
                if (!usc->usc_dying)
                        cmd->cb(usc, cmd->data);

                mutex_spin_enter(&usc->usc_task_mtx);
                ring->queued--;
                ring->next = (ring->next + 1) % ATHN_USB_HOST_CMD_RING_COUNT;
        }
        cv_broadcast(&usc->usc_task_cv);
        mutex_spin_exit(&usc->usc_task_mtx);
}

Static void
athn_usb_do_async(struct athn_usb_softc *usc,
    void (*cb)(struct athn_usb_softc *, void *), void *arg, int len)
{
        struct athn_usb_host_cmd_ring *ring = &usc->usc_cmdq;
        struct athn_usb_host_cmd *cmd;

        if (usc->usc_dying)
                return;

        DPRINTFN(DBG_FN, usc, "\n");

        mutex_spin_enter(&usc->usc_task_mtx);
        cmd = &ring->cmd[ring->cur];
        cmd->cb = cb;
        KASSERT(len <= sizeof(cmd->data));
        memcpy(cmd->data, arg, len);
        ring->cur = (ring->cur + 1) % ATHN_USB_HOST_CMD_RING_COUNT;

        /* If there is no pending command already, schedule a task. */
        if (++ring->queued == 1) {
                usb_add_task(usc->usc_udev, &usc->usc_task, USB_TASKQ_DRIVER);
        }
        mutex_spin_exit(&usc->usc_task_mtx);
}

Static void
athn_usb_wait_async(struct athn_usb_softc *usc)
{

        DPRINTFN(DBG_FN, usc, "\n");

        /* Wait for all queued asynchronous commands to complete. */
        mutex_spin_enter(&usc->usc_task_mtx);
        while (usc->usc_cmdq.queued > 0)
                cv_wait(&usc->usc_task_cv, &usc->usc_task_mtx);
        mutex_spin_exit(&usc->usc_task_mtx);
}

Static int
athn_usb_load_firmware(struct athn_usb_softc *usc)
{
        struct athn_softc *sc = &usc->usc_sc;
        firmware_handle_t fwh;
        usb_device_descriptor_t *dd;
        usb_device_request_t req;
        const char *name;
        u_char *fw, *ptr;
        size_t size, remain;
        uint32_t addr;
        int mlen, error;

        DPRINTFN(DBG_FN, sc, "\n");

        /* Determine which firmware image to load. */
        if (usc->usc_flags & ATHN_USB_FLAG_AR7010) {
                dd = usbd_get_device_descriptor(usc->usc_udev);
                if (UGETW(dd->bcdDevice) == 0x0202)
                        name = "athn-ar7010-11";
                else
                        name = "athn-ar7010";
        } else
                name = "athn-ar9271";

        /* Read firmware image from the filesystem. */
        if ((error = firmware_open("if_athn", name, &fwh)) != 0) {
                aprint_error_dev(sc->sc_dev,
                    "failed to open firmware file %s (%d)\n", name, error);
                return error;
        }
        size = firmware_get_size(fwh);
        fw = firmware_malloc(size);
        if (fw == NULL) {
                aprint_error_dev(usc->usc_dev,
                    "failed to allocate firmware memory\n");
                firmware_close(fwh);
                return ENOMEM;
        }
        error = firmware_read(fwh, 0, fw, size);
        firmware_close(fwh);
        if (error != 0) {
                aprint_error_dev(usc->usc_dev,
                    "failed to read firmware (error %d)\n", error);
                firmware_free(fw, size);
                return error;
        }

        /* Load firmware image. */
        ptr = fw;
        addr = AR9271_FIRMWARE >> 8;
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = AR_FW_DOWNLOAD;
        USETW(req.wIndex, 0);
        remain = size;
        while (remain > 0) {
                mlen = MIN(remain, 4096);

                USETW(req.wValue, addr);
                USETW(req.wLength, mlen);
                error = usbd_do_request(usc->usc_udev, &req, ptr);
                if (error != 0) {
                        firmware_free(fw, size);
                        return error;
                }
                addr   += mlen >> 8;
                ptr    += mlen;
                remain -= mlen;
        }
        firmware_free(fw, size);

        /* Start firmware. */
        if (usc->usc_flags & ATHN_USB_FLAG_AR7010)
                addr = AR7010_FIRMWARE_TEXT >> 8;
        else
                addr = AR9271_FIRMWARE_TEXT >> 8;
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = AR_FW_DOWNLOAD_COMP;
        USETW(req.wIndex, 0);
        USETW(req.wValue, addr);
        USETW(req.wLength, 0);

        mutex_enter(&usc->usc_msg_mtx);
        while (usc->usc_htcactive) {
                error = cv_timedwait(&usc->usc_htc_cv, &usc->usc_msg_mtx, hz);

                if (error) {
                        mutex_exit(&usc->usc_msg_mtx);
                        return error;
                }
        }

        usc->usc_htcactive = true;

        KASSERT(usc->usc_wait_msg_id == 0);
        usc->usc_wait_msg_id = AR_HTC_MSG_READY;
        mutex_exit(&usc->usc_msg_mtx);

        error = usbd_do_request(usc->usc_udev, &req, NULL);

        mutex_enter(&usc->usc_msg_mtx);
        /* Wait at most 1 second for firmware to boot. */
        if (error == 0)
                error = athn_usb_wait_msg(usc);

        usc->usc_htcactive = false;
        cv_broadcast(&usc->usc_htc_cv);
        mutex_exit(&usc->usc_msg_mtx);

        DPRINTFN(DBG_FN, sc, "return %d\n", error);

        return error;
}

Static int
athn_usb_htc_msg(struct athn_usb_softc *usc, uint16_t msg_id, void *buf,
    int len)
{
        struct athn_usb_tx_data *data = &usc->usc_tx_msg;
        struct ar_htc_frame_hdr *htc;
        struct ar_htc_msg_hdr *msg;

        if (usc->usc_dying)
                return USBD_CANCELLED;

        DPRINTFN(DBG_FN, usc, "\n");

        htc = (struct ar_htc_frame_hdr *)data->buf;
        memset(htc, 0, sizeof(*htc));
        htc->endpoint_id = 0;
        htc->payload_len = htobe16(sizeof(*msg) + len);

        msg = (struct ar_htc_msg_hdr *)&htc[1];
        msg->msg_id = htobe16(msg_id);

        memcpy(&msg[1], buf, len);

        usbd_setup_xfer(data->xfer, NULL, data->buf,
            sizeof(*htc) + sizeof(*msg) + len,
            USBD_SHORT_XFER_OK, ATHN_USB_CMD_TIMEOUT, NULL);
        return usbd_sync_transfer(data->xfer);


}

Static int
athn_usb_htc_setup(struct athn_usb_softc *usc)
{
        struct ar_htc_msg_config_pipe cfg;
        int error;

        mutex_enter(&usc->usc_msg_mtx);
        while (usc->usc_htcactive) {
                error = cv_timedwait(&usc->usc_htc_cv, &usc->usc_msg_mtx, hz);

                if (error) {
                        mutex_exit(&usc->usc_msg_mtx);
                        return error;
                }
        }
        usc->usc_htcactive = true;
        mutex_exit(&usc->usc_msg_mtx);

        /*
         * Connect WMI services to USB pipes.
         */
        error = athn_usb_htc_connect_svc(usc, AR_SVC_WMI_CONTROL,
            AR_PIPE_TX_INTR, AR_PIPE_RX_INTR, &usc->usc_ep_ctrl);
        if (error != 0)
                return error;
        error = athn_usb_htc_connect_svc(usc, AR_SVC_WMI_BEACON,
            AR_PIPE_TX_DATA, AR_PIPE_RX_DATA, &usc->usc_ep_bcn);
        if (error != 0)
                return error;
        error = athn_usb_htc_connect_svc(usc, AR_SVC_WMI_CAB,
            AR_PIPE_TX_DATA, AR_PIPE_RX_DATA, &usc->usc_ep_cab);
        if (error != 0)
                return error;
        error = athn_usb_htc_connect_svc(usc, AR_SVC_WMI_UAPSD,
            AR_PIPE_TX_DATA, AR_PIPE_RX_DATA, &usc->usc_ep_uapsd);
        if (error != 0)
                return error;
        error = athn_usb_htc_connect_svc(usc, AR_SVC_WMI_MGMT,
            AR_PIPE_TX_DATA, AR_PIPE_RX_DATA, &usc->usc_ep_mgmt);
        if (error != 0)
                return error;
        error = athn_usb_htc_connect_svc(usc, AR_SVC_WMI_DATA_BE,
            AR_PIPE_TX_DATA, AR_PIPE_RX_DATA, &usc->usc_ep_data[WME_AC_BE]);
        if (error != 0)
                return error;
        error = athn_usb_htc_connect_svc(usc, AR_SVC_WMI_DATA_BK,
            AR_PIPE_TX_DATA, AR_PIPE_RX_DATA, &usc->usc_ep_data[WME_AC_BK]);
        if (error != 0)
                return error;
        error = athn_usb_htc_connect_svc(usc, AR_SVC_WMI_DATA_VI,
            AR_PIPE_TX_DATA, AR_PIPE_RX_DATA, &usc->usc_ep_data[WME_AC_VI]);
        if (error != 0)
                return error;
        error = athn_usb_htc_connect_svc(usc, AR_SVC_WMI_DATA_VO,
            AR_PIPE_TX_DATA, AR_PIPE_RX_DATA, &usc->usc_ep_data[WME_AC_VO]);
        if (error != 0)
                return error;

        /* Set credits for WLAN Tx pipe. */
        memset(&cfg, 0, sizeof(cfg));
        cfg.pipe_id = UE_GET_ADDR(AR_PIPE_TX_DATA);
        cfg.credits = (usc->usc_flags & ATHN_USB_FLAG_AR7010) ? 45 : 33;

        mutex_enter(&usc->usc_msg_mtx);

        KASSERT(usc->usc_wait_msg_id == 0);
        usc->usc_wait_msg_id = AR_HTC_MSG_CONF_PIPE_RSP;
        mutex_exit(&usc->usc_msg_mtx);

        error = athn_usb_htc_msg(usc, AR_HTC_MSG_CONF_PIPE, &cfg, sizeof(cfg));

        if (error != 0) {
                aprint_error_dev(usc->usc_dev, "could not request pipe configurations\n");
                return error;
        }

        mutex_enter(&usc->usc_msg_mtx);
        error = athn_usb_wait_msg(usc);
        if (error) {
                mutex_exit(&usc->usc_msg_mtx);
                return error;
        }

        mutex_exit(&usc->usc_msg_mtx);
        error = athn_usb_htc_msg(usc, AR_HTC_MSG_SETUP_COMPLETE, NULL, 0);
        if (error != 0) {
                aprint_error_dev(usc->usc_dev, "could not request complete setup\n");
                return error;
        }
        mutex_enter(&usc->usc_msg_mtx);
        error = athn_usb_wait_msg(usc);
        if (error) {
                mutex_exit(&usc->usc_msg_mtx);
                return error;
        }

        usc->usc_htcactive = false;
        cv_broadcast(&usc->usc_htc_cv);
        mutex_exit(&usc->usc_msg_mtx);

        return 0;
}

Static int
athn_usb_htc_connect_svc(struct athn_usb_softc *usc, uint16_t svc_id,
    uint8_t ul_pipe, uint8_t dl_pipe, uint8_t *endpoint_id)
{
        struct ar_htc_msg_conn_svc msg;
        struct ar_htc_msg_conn_svc_rsp rsp;
        int error;

        DPRINTFN(DBG_FN, usc, "\n");

        memset(&msg, 0, sizeof(msg));
        msg.svc_id = htobe16(svc_id);
        msg.dl_pipeid = UE_GET_ADDR(dl_pipe);
        msg.ul_pipeid = UE_GET_ADDR(ul_pipe);

        mutex_enter(&usc->usc_msg_mtx);
        KASSERT(usc->usc_wait_msg_id == 0);
        usc->usc_msg_conn_svc_rsp = &rsp;
        usc->usc_wait_msg_id = AR_HTC_MSG_CONN_SVC_RSP;
        mutex_exit(&usc->usc_msg_mtx);

        error = athn_usb_htc_msg(usc, AR_HTC_MSG_CONN_SVC, &msg, sizeof(msg));

        mutex_enter(&usc->usc_msg_mtx);
        if (error == 0)
                error = athn_usb_wait_msg(usc);

        mutex_exit(&usc->usc_msg_mtx);

        if (error != 0) {
                aprint_error_dev(usc->usc_dev,
                    "error waiting for service %d connection\n", svc_id);
                return error;
        }
        if (rsp.status != AR_HTC_SVC_SUCCESS) {
                aprint_error_dev(usc->usc_dev,
                    "service %d connection failed, error %d\n",
                    svc_id, rsp.status);
                return EIO;
        }
        DPRINTFN(DBG_INIT, usc,
            "service %d successfully connected to endpoint %d\n",
            svc_id, rsp.endpoint_id);

        /* Return endpoint id. */
        *endpoint_id = rsp.endpoint_id;
        return 0;
}

Static int
athn_usb_wait_msg(struct athn_usb_softc *usc)
{
        DPRINTFN(DBG_FN, usc, "\n");

        KASSERT(mutex_owned(&usc->usc_msg_mtx));

        int error = 0;
        while (usc->usc_wait_msg_id)
                error = cv_timedwait(&usc->usc_msg_cv, &usc->usc_msg_mtx, hz);

        return error;
}

Static void
athn_usb_wmieof(struct usbd_xfer *xfer, void * priv,
    usbd_status status)
{
        struct athn_usb_softc *usc = priv;

        DPRINTFN(DBG_FN, usc, "\n");

        if (__predict_false(status == USBD_STALLED))
                usbd_clear_endpoint_stall_async(usc->usc_tx_intr_pipe);
}

Static int
athn_usb_wmi_xcmd(struct athn_usb_softc *usc, uint16_t cmd_id, void *ibuf,
    int ilen, void *obuf)
{
        struct athn_usb_tx_data *data = &usc->usc_tx_cmd;
        struct ar_htc_frame_hdr *htc;
        struct ar_wmi_cmd_hdr *wmi;
        int error = 0;

        if (usc->usc_dying)
                return EIO;

         DPRINTFN(DBG_FN, usc, "cmd_id %#x\n", cmd_id);

        htc = (struct ar_htc_frame_hdr *)data->buf;
        memset(htc, 0, sizeof(*htc));
        htc->endpoint_id = usc->usc_ep_ctrl;
        htc->payload_len = htobe16(sizeof(*wmi) + ilen);

        wmi = (struct ar_wmi_cmd_hdr *)&htc[1];
        wmi->cmd_id = htobe16(cmd_id);
        usc->usc_wmi_seq_no++;
        wmi->seq_no = htobe16(usc->usc_wmi_seq_no);

        memcpy(&wmi[1], ibuf, ilen);

        usbd_setup_xfer(data->xfer, usc, data->buf,
            sizeof(*htc) + sizeof(*wmi) + ilen,
            USBD_SHORT_XFER_OK, ATHN_USB_CMD_TIMEOUT,
            athn_usb_wmieof);

        mutex_enter(&usc->usc_cmd_mtx);
        while (usc->usc_wmiactive) {
                error = cv_timedwait(&usc->usc_wmi_cv, &usc->usc_cmd_mtx, hz);

                if (error) {
                        mutex_exit(&usc->usc_cmd_mtx);
                        return error;
                }
        }
        usc->usc_wmiactive = true;

        KASSERT(usc->usc_wait_cmd_id == 0);
        usc->usc_wait_cmd_id = cmd_id;
        usc->usc_obuf = obuf;
        mutex_exit(&usc->usc_cmd_mtx);

        error = usbd_sync_transfer(data->xfer);
        if (error) {
                    DPRINTFN(DBG_FN, usc, "transfer error %d\n", error);

                return error;
        }

        mutex_enter(&usc->usc_cmd_mtx);
        while (usc->usc_wait_cmd_id)
                error = cv_timedwait(&usc->usc_cmd_cv, &usc->usc_cmd_mtx, hz);

        usc->usc_wmiactive = false;
        cv_broadcast(&usc->usc_wmi_cv);
        mutex_exit(&usc->usc_cmd_mtx);

        return 0;
}

#ifdef unused
Static int
athn_usb_read_rom(struct athn_softc *sc)
{
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);
        uint32_t addrs[8], vals[8], addr;
        uint16_t *eep;
        size_t i, j;
        int error = 0;

        DPRINTFN(DBG_FN, sc, "\n");

        /* Read EEPROM by blocks of 16 bytes. */
        eep = sc->sc_eep;
        addr = AR_EEPROM_OFFSET(sc->sc_eep_base);
        for (i = 0; i < sc->sc_eep_size / 16; i++) {
                for (j = 0; j < 8; j++, addr += 4)
                        addrs[j] = htobe32(addr);
                error = athn_usb_wmi_xcmd(usc, AR_WMI_CMD_REG_READ,
                    addrs, sizeof(addrs), vals);
                if (error != 0)
                        break;
                for (j = 0; j < 8; j++)
                        *eep++ = be32toh(vals[j]);
        }
        return error;
}
#endif /* unused */

Static uint32_t
athn_usb_read(struct athn_softc *sc, uint32_t addr)
{
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);
        uint32_t val;
        int error;

        if (usc->usc_dying)
                return 0;

         DPRINTFN(DBG_FN, sc, "addr %#x\n", htobe32(addr));

        /* Flush pending writes for strict consistency. */
        athn_usb_write_barrier(sc);

        addr = htobe32(addr);
        error = athn_usb_wmi_xcmd(usc, AR_WMI_CMD_REG_READ,
            &addr, sizeof(addr), &val);
        if (error != 0) {
                DPRINTFN(DBG_FN, sc, "error %d\n", addr);
                return 0xdeadbeef;
        }
         DPRINTFN(DBG_FN, sc, "addr %#x return %#x\n", addr, be32toh(val));

        return be32toh(val);
}

Static void
athn_usb_write(struct athn_softc *sc, uint32_t addr, uint32_t val)
{
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);

        if (usc->usc_dying)
                return;

         DPRINTFN(DBG_FN, sc, "addr %#x val %#x\n", addr, val);

        usc->usc_wbuf[usc->usc_wcount].addr = htobe32(addr);
        usc->usc_wbuf[usc->usc_wcount].val  = htobe32(val);
        if (++usc->usc_wcount == AR_MAX_WRITE_COUNT)
                athn_usb_write_barrier(sc);
}

Static void
athn_usb_write_barrier(struct athn_softc *sc)
{
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);

        if (usc->usc_dying)
                goto done;

         DPRINTFN(DBG_FN, sc, "usc_wcount %d\n", usc->usc_wcount);

        if (usc->usc_wcount == 0)
                return;

        (void)athn_usb_wmi_xcmd(usc, AR_WMI_CMD_REG_WRITE,
            usc->usc_wbuf, usc->usc_wcount * sizeof(usc->usc_wbuf[0]), NULL);
 done:
        usc->usc_wcount = 0;        /* Always flush buffer. */
}

Static int
athn_usb_media_change(struct ifnet *ifp)
{
        struct athn_softc *sc = ifp->if_softc;
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);
        int error;

        if (usc->usc_dying)
                return EIO;

        DPRINTFN(DBG_FN, sc, "\n");

        error = ieee80211_media_change(ifp);
        if (error == ENETRESET && IS_UP_AND_RUNNING(ifp)) {
                athn_usb_stop(ifp, 0);
                error = athn_usb_init(ifp);
        }
        return error;
}

Static int
athn_usb_newstate(struct ieee80211com *ic, enum ieee80211_state nstate,
    int arg)
{
        struct athn_softc *sc = ic->ic_ifp->if_softc;
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);
        struct athn_usb_cmd_newstate cmd;

        DPRINTFN(DBG_FN, sc, "\n");

        /* Do it in a process context. */
        cmd.state = nstate;
        cmd.arg = arg;
        athn_usb_do_async(usc, athn_usb_newstate_cb, &cmd, sizeof(cmd));
        return 0;
}

Static void
athn_usb_newstate_cb(struct athn_usb_softc *usc, void *arg)
{
        struct athn_usb_cmd_newstate *cmd = arg;
        struct athn_softc *sc = &usc->usc_sc;
        struct ieee80211com *ic = &sc->sc_ic;
        enum ieee80211_state ostate, nstate;
        uint32_t reg, intr_mask;
        int s;

        DPRINTFN(DBG_FN, sc, "\n");

        callout_stop(&sc->sc_calib_to);

        s = splnet();

        ostate = ic->ic_state;
        nstate = cmd->state;
        DPRINTFN(DBG_STM, usc, "newstate %s(%d) -> %s(%d)\n",
                    ieee80211_state_name[ostate], ostate,
                    ieee80211_state_name[nstate], nstate);

        if (ostate == IEEE80211_S_RUN) {
                uint8_t sta_index;

                sta_index = ATHN_NODE(ic->ic_bss)->sta_index;
                DPRINTFN(DBG_NODES, usc, "removing node %u\n", sta_index);
                athn_usb_remove_hw_node(usc, &sta_index);
        }

        switch (nstate) {
        case IEEE80211_S_INIT:
                athn_set_led(sc, 0);
                break;
        case IEEE80211_S_SCAN:
                /* Make the LED blink while scanning. */
                athn_set_led(sc, !sc->sc_led_state);
                (void)athn_usb_switch_chan(sc, ic->ic_curchan, NULL);
                if (!usc->usc_dying)
                        callout_schedule(&sc->sc_scan_to, hz / 5);
                break;
        case IEEE80211_S_AUTH:
                athn_set_led(sc, 0);
                athn_usb_switch_chan(sc, ic->ic_curchan, NULL);
                break;
        case IEEE80211_S_ASSOC:
                break;
        case IEEE80211_S_RUN:
                athn_set_led(sc, 1);

                if (ic->ic_opmode == IEEE80211_M_MONITOR)
                        break;

                /* Create node entry for our BSS. */
                DPRINTFN(DBG_NODES, sc, "create node for AID=%#x\n",
                    ic->ic_bss->ni_associd);
                athn_usb_create_node(usc, ic->ic_bss);        /* XXX: handle error? */

                athn_set_bss(sc, ic->ic_bss);
                athn_usb_wmi_cmd(usc, AR_WMI_CMD_DISABLE_INTR);
#ifndef IEEE80211_STA_ONLY
                if (ic->ic_opmode == IEEE80211_M_HOSTAP) {
                        athn_set_hostap_timers(sc);
                        /* Enable software beacon alert interrupts. */
                        intr_mask = htobe32(AR_IMR_SWBA);
                } else
#endif
                {
                        athn_set_sta_timers(sc);
                        /* Enable beacon miss interrupts. */
                        intr_mask = htobe32(AR_IMR_BMISS);

                        /* Stop receiving beacons from other BSS. */
                        reg = AR_READ(sc, AR_RX_FILTER);
                        reg = (reg & ~AR_RX_FILTER_BEACON) |
                            AR_RX_FILTER_MYBEACON;
                        AR_WRITE(sc, AR_RX_FILTER, reg);
                        AR_WRITE_BARRIER(sc);
                }
                athn_usb_wmi_xcmd(usc, AR_WMI_CMD_ENABLE_INTR,
                    &intr_mask, sizeof(intr_mask), NULL);
                break;
        }
        if (!usc->usc_dying)
                (void)sc->sc_newstate(ic, nstate, cmd->arg);
        splx(s);
}

Static void
athn_usb_newassoc(struct ieee80211_node *ni, int isnew)
{
        struct ieee80211com *ic = ni->ni_ic;
        struct athn_softc *sc = ic->ic_ifp->if_softc;
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);

        DPRINTFN(DBG_FN, sc, "\n");

        if (ic->ic_opmode != IEEE80211_M_HOSTAP || !isnew)
                return;

        /* Do it in a process context. */
        ieee80211_ref_node(ni);
        athn_usb_do_async(usc, athn_usb_newassoc_cb, &ni, sizeof(ni));
}

Static void
athn_usb_newassoc_cb(struct athn_usb_softc *usc, void *arg)
{
        struct ieee80211_node *ni = *(void **)arg;
        int s;

        DPRINTFN(DBG_FN, usc, "\n");

        s = splnet();
        /* NB: Node may have left before we got scheduled. */
        if (ni->ni_associd != 0) {
                DPRINTFN(DBG_NODES, usc, "creating node for AID=%#x\n",
                    ni->ni_associd);
                (void)athn_usb_create_node(usc, ni);        /* XXX: handle error? */
        }
        ieee80211_free_node(ni);
        splx(s);
}

#ifdef notyet
Static int
athn_usb_ampdu_tx_start(struct ieee80211com *ic, struct ieee80211_node *ni,
    uint8_t tid)
{
        struct athn_softc *sc = ic->ic_ifp->if_softc;
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);
        struct athn_node *an = ATHN_NODE(ni);
        struct athn_usb_aggr_cmd cmd;

        DPRINTFN(DBG_FN, sc, "\n");

        /* Do it in a process context. */
        cmd.sta_index = an->sta_index;
        cmd.tid = tid;
        athn_usb_do_async(usc, athn_usb_ampdu_tx_start_cb, &cmd, sizeof(cmd));
        return 0;
}

Static void
athn_usb_ampdu_tx_start_cb(struct athn_usb_softc *usc, void *arg)
{
        struct athn_usb_aggr_cmd *cmd = arg;
        struct ar_htc_target_aggr aggr;

        DPRINTFN(DBG_FN, usc, "\n");

        memset(&aggr, 0, sizeof(aggr));
        aggr.sta_index = cmd->sta_index;
        aggr.tidno = cmd->tid;
        aggr.aggr_enable = 1;
        (void)athn_usb_wmi_xcmd(usc, AR_WMI_CMD_TX_AGGR_ENABLE,
            &aggr, sizeof(aggr), NULL);
}

Static void
athn_usb_ampdu_tx_stop(struct ieee80211com *ic, struct ieee80211_node *ni,
    uint8_t tid)
{
        struct athn_softc *sc = ic->ic_ifp->if_softc;
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);
        struct athn_node *an = ATHN_NODE(ni);
        struct athn_usb_aggr_cmd cmd;

        DPRINTFN(DBG_FN, sc, "\n");

        /* Do it in a process context. */
        cmd.sta_index = an->sta_index;
        cmd.tid = tid;
        athn_usb_do_async(usc, athn_usb_ampdu_tx_stop_cb, &cmd, sizeof(cmd));
}

Static void
athn_usb_ampdu_tx_stop_cb(struct athn_usb_softc *usc, void *arg)
{
        struct athn_usb_aggr_cmd *cmd = arg;
        struct ar_htc_target_aggr aggr;

        DPRINTFN(DBG_FN, usc, "\n");

        memset(&aggr, 0, sizeof(aggr));
        aggr.sta_index = cmd->sta_index;
        aggr.tidno = cmd->tid;
        aggr.aggr_enable = 0;
        (void)athn_usb_wmi_xcmd(usc, AR_WMI_CMD_TX_AGGR_ENABLE,
            &aggr, sizeof(aggr), NULL);
}
#endif /* notyet */

Static int
athn_usb_remove_hw_node(struct athn_usb_softc *usc, uint8_t *sta_idx)
{
        int error;

        DPRINTFN(DBG_FN, usc, "\n");

        error = athn_usb_wmi_xcmd(usc, AR_WMI_CMD_NODE_REMOVE,
            sta_idx, sizeof(*sta_idx), NULL);

        DPRINTFN(DBG_NODES, usc, "node=%u error=%d\n",
            *sta_idx, error);
        return error;
}

Static int
athn_usb_create_hw_node(struct athn_usb_softc *usc,
    struct ar_htc_target_sta *sta)
{
        int error;

        DPRINTFN(DBG_FN, usc, "\n");

        error = athn_usb_wmi_xcmd(usc, AR_WMI_CMD_NODE_CREATE,
            sta, sizeof(*sta), NULL);

        DPRINTFN(DBG_NODES, usc, "node=%u error=%d\n",
            sta->sta_index, error);

        return error;
}

Static int
athn_usb_create_node(struct athn_usb_softc *usc, struct ieee80211_node *ni)
{
        struct athn_node *an = ATHN_NODE(ni);
        struct ar_htc_target_sta sta;
        struct ar_htc_target_rate rate;
        int error;

        DPRINTFN(DBG_FN | DBG_NODES, usc, "AID=%#x\n", ni->ni_associd);

        /*
         * NB: this is called by ic_newstate and (in HOSTAP mode by)
         * ic_newassoc.
         *
         * The firmware has a limit of 8 nodes.  In HOSTAP mode, we
         * limit the AID to < 8 and use that value to index the
         * firmware node table.  Node zero is used for the BSS.
         *
         * In STA mode, we simply use node 1 for the BSS.
         */
        if (ATHN_SOFTC(usc)->sc_ic.ic_opmode == IEEE80211_M_HOSTAP)
                an->sta_index = IEEE80211_NODE_AID(ni);
        else
                an->sta_index = 1;

        /* Create node entry on target. */
        memset(&sta, 0, sizeof(sta));
        IEEE80211_ADDR_COPY(sta.macaddr, ni->ni_macaddr);
        IEEE80211_ADDR_COPY(sta.bssid, ni->ni_bssid);

        sta.associd = htobe16(ni->ni_associd);
        sta.valid = 1;
        sta.sta_index = an->sta_index;

        sta.maxampdu = 0xffff;
#ifndef IEEE80211_NO_HT
        if (ni->ni_flags & IEEE80211_NODE_HT)
                sta.flags |= htobe16(AR_HTC_STA_HT);
#endif
        error = athn_usb_create_hw_node(usc, &sta);
        if (error)
                return error;

        /* Setup supported rates. */
        memset(&rate, 0, sizeof(rate));
        rate.sta_index = sta.sta_index;
        rate.isnew = 1;
        rate.lg_rates.rs_nrates = ni->ni_rates.rs_nrates;
        memcpy(rate.lg_rates.rs_rates, ni->ni_rates.rs_rates,
            ni->ni_rates.rs_nrates);

#ifndef IEEE80211_NO_HT
        if (ni->ni_flags & IEEE80211_NODE_HT) {
                rate.capflags |= htobe32(AR_RC_HT_FLAG);
#ifdef notyet
                /* XXX setup HT rates */
                if (ni->ni_htcaps & IEEE80211_HTCAP_CBW20_40)
                        rate.capflags |= htobe32(AR_RC_40_FLAG);
                if (ni->ni_htcaps & IEEE80211_HTCAP_SGI40)
                        rate.capflags |= htobe32(AR_RC_SGI_FLAG);
                if (ni->ni_htcaps & IEEE80211_HTCAP_SGI20)
                        rate.capflags |= htobe32(AR_RC_SGI_FLAG);
#endif
        }
#endif
        error = athn_usb_wmi_xcmd(usc, AR_WMI_CMD_RC_RATE_UPDATE,
            &rate, sizeof(rate), NULL);
        return error;
}

Static void
athn_usb_rx_enable(struct athn_softc *sc)
{

        DPRINTFN(DBG_FN, sc, "\n");

        AR_WRITE(sc, AR_CR, AR_CR_RXE);
        AR_WRITE_BARRIER(sc);
}

Static int
athn_usb_switch_chan(struct athn_softc *sc, struct ieee80211_channel *curchan,
    struct ieee80211_channel *extchan)
{
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);
        uint16_t mode;
        int error;

        DPRINTFN(DBG_FN, sc, "\n");

        /* Disable interrupts. */
        error = athn_usb_wmi_cmd(usc, AR_WMI_CMD_DISABLE_INTR);
        if (error != 0)
                goto reset;
        /* Stop all Tx queues. */
        error = athn_usb_wmi_cmd(usc, AR_WMI_CMD_DRAIN_TXQ_ALL);
        if (error != 0)
                goto reset;
        /* Stop Rx. */
        error = athn_usb_wmi_cmd(usc, AR_WMI_CMD_STOP_RECV);
        if (error != 0)
                goto reset;

        /* If band or bandwidth changes, we need to do a full reset. */
        if (curchan->ic_flags != sc->sc_curchan->ic_flags ||
            ((extchan != NULL) ^ (sc->sc_curchanext != NULL))) {
                DPRINTFN(DBG_RF, sc, "channel band switch\n");
                goto reset;
        }

        error = athn_set_chan(sc, curchan, extchan);
        if (AR_SREV_9271(sc) && error == 0)
                ar9271_load_ani(sc);
        if (error != 0) {
 reset:                /* Error found, try a full reset. */
                DPRINTFN(DBG_RF, sc, "needs a full reset\n");
                error = athn_hw_reset(sc, curchan, extchan, 0);
                if (error != 0)        /* Hopeless case. */
                        return error;
        }

        error = athn_usb_wmi_cmd(usc, AR_WMI_CMD_START_RECV);
        if (error != 0)
                return error;
        athn_rx_start(sc);

        mode = htobe16(IEEE80211_IS_CHAN_2GHZ(curchan) ?
            AR_HTC_MODE_11NG : AR_HTC_MODE_11NA);
        error = athn_usb_wmi_xcmd(usc, AR_WMI_CMD_SET_MODE,
            &mode, sizeof(mode), NULL);
        if (error != 0)
                return error;

        /* Re-enable interrupts. */
        error = athn_usb_wmi_cmd(usc, AR_WMI_CMD_ENABLE_INTR);
        return error;
}

#ifdef notyet_edca
Static void
athn_usb_updateedca(struct ieee80211com *ic)
{
        struct athn_softc *sc = ic->ic_ifp->if_softc;
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);

        DPRINTFN(DBG_FN, sc, "\n");

        /* Do it in a process context. */
        athn_usb_do_async(usc, athn_usb_updateedca_cb, NULL, 0);
}

Static void
athn_usb_updateedca_cb(struct athn_usb_softc *usc, void *arg)
{
        int s;

        DPRINTFN(DBG_FN, usc, "\n");

        s = splnet();
        athn_updateedca(&usc->usc_sc.sc_ic);
        splx(s);
}
#endif /* notyet_edca */

Static void
athn_usb_updateslot(struct ifnet *ifp)
{
        struct athn_softc *sc = ifp->if_softc;
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);

        DPRINTFN(DBG_FN, sc, "\n");

        /*
         * NB: athn_updateslog() needs to be done in a process context
         * to avoid being called by ieee80211_reset_erp() inside a
         * spinlock held by ieee80211_free_allnodes().
         *
         * XXX: calling this during the athn_attach() causes
         * usb_insert_transfer() to produce a bunch of "not busy"
         * messages.  Why?
         */
        if (usc->usc_athn_attached)
                athn_usb_do_async(usc, athn_usb_updateslot_cb, NULL, 0);
}

Static void
athn_usb_updateslot_cb(struct athn_usb_softc *usc, void *arg)
{
        int s;

        DPRINTFN(DBG_FN, usc, "\n");

        s = splnet();
        athn_updateslot(&usc->usc_sc.sc_if);
        splx(s);
}

#ifdef notyet
Static int
athn_usb_set_key(struct ieee80211com *ic, struct ieee80211_node *ni,
    struct ieee80211_key *k)
{
        struct athn_softc *sc = ic->ic_ifp->if_softc;
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);
        struct ifnet *ifp = &usc->usc_sc.sc_if;
        struct athn_usb_cmd_key cmd;

        DPRINTFN(DBG_FN, sc, "\n");

        /* Defer setting of WEP keys until interface is brought up. */
        if (!IS_UP_AND_RUNNING(ifp))
                return 0;

        /* Do it in a process context. */
        cmd.ni = (ni != NULL) ? ieee80211_ref_node(ni) : NULL;
        cmd.key = k;
        athn_usb_do_async(usc, athn_usb_set_key_cb, &cmd, sizeof(cmd));
        return 0;
}

Static void
athn_usb_set_key_cb(struct athn_usb_softc *usc, void *arg)
{
        struct ieee80211com *ic = &usc->usc_sc.sc_ic;
        struct athn_usb_cmd_key *cmd = arg;
        int s;

        DPRINTFN(DBG_FN, usc, "\n");

        s = splnet();
        athn_set_key(ic, cmd->ni, cmd->key);
        if (cmd->ni != NULL)
                ieee80211_free_node(cmd->ni);
        splx(s);
}

Static void
athn_usb_delete_key(struct ieee80211com *ic, struct ieee80211_node *ni,
    struct ieee80211_key *k)
{
        struct athn_softc *sc = ic->ic_ifp->if_softc;
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);
        struct ifnet *ifp = &usc->usc_sc.sc_if;
        struct athn_usb_cmd_key cmd;

        DPRINTFN(DBG_FN, sc, "\n");

        if (!(ifp->if_flags & IFF_RUNNING) ||
            ic->ic_state != IEEE80211_S_RUN)
                return;        /* Nothing to do. */

        /* Do it in a process context. */
        cmd.ni = (ni != NULL) ? ieee80211_ref_node(ni) : NULL;
        cmd.key = k;
        athn_usb_do_async(usc, athn_usb_delete_key_cb, &cmd, sizeof(cmd));
}

Static void
athn_usb_delete_key_cb(struct athn_usb_softc *usc, void *arg)
{
        struct ieee80211com *ic = &usc->usc_sc.sc_ic;
        struct athn_usb_cmd_key *cmd = arg;
        int s;

        DPRINTFN(DBG_FN, usc, "\n");

        s = splnet();
        athn_delete_key(ic, cmd->ni, cmd->key);
        if (cmd->ni != NULL)
                ieee80211_free_node(cmd->ni);
        splx(s);
}
#endif /* notyet */

#ifndef IEEE80211_STA_ONLY
Static void
athn_usb_bcneof(struct usbd_xfer *xfer, void * priv,
    usbd_status status)
{
        struct athn_usb_tx_data *data = priv;
        struct athn_usb_softc *usc = data->sc;

        DPRINTFN(DBG_FN, usc, "\n");

        if (__predict_false(status == USBD_STALLED))
                usbd_clear_endpoint_stall_async(usc->usc_tx_data_pipe);
        usc->usc_tx_bcn = data;
}

/*
 * Process Software Beacon Alert interrupts.
 */
Static void
athn_usb_swba(struct athn_usb_softc *usc)
{
        struct athn_softc *sc = &usc->usc_sc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct athn_usb_tx_data *data;
        struct ieee80211_frame *wh;
        struct ieee80211_beacon_offsets bo;
        struct ar_stream_hdr *hdr;
        struct ar_htc_frame_hdr *htc;
        struct ar_tx_bcn *bcn;
        struct mbuf *m;
        int error;

        if (usc->usc_dying)
                return;

        DPRINTFN(DBG_FN, sc, "\n");

        if (ic->ic_dtim_count == 0)
                ic->ic_dtim_count = ic->ic_dtim_period - 1;
        else
                ic->ic_dtim_count--;

        /* Make sure previous beacon has been sent. */
        if (usc->usc_tx_bcn == NULL)
                return;
        data = usc->usc_tx_bcn;

        /* Get new beacon. */
#ifdef ATHN_DEBUG
        memset(&bo, 0, sizeof(bo));
#endif
        m = ieee80211_beacon_alloc(ic, ic->ic_bss, &bo);
        if (__predict_false(m == NULL))
                return;
        /* Assign sequence number. */
        /* XXX: use non-QoS tid? */
        wh = mtod(m, struct ieee80211_frame *);
        *(uint16_t *)&wh->i_seq[0] =
            htole16(ic->ic_bss->ni_txseqs[0] << IEEE80211_SEQ_SEQ_SHIFT);
        ic->ic_bss->ni_txseqs[0]++;

        hdr = (struct ar_stream_hdr *)data->buf;
        hdr->tag = htole16(AR_USB_TX_STREAM_TAG);
        hdr->len = htole16(sizeof(*htc) + sizeof(*bcn) + m->m_pkthdr.len);

        htc = (struct ar_htc_frame_hdr *)&hdr[1];
        memset(htc, 0, sizeof(*htc));
        htc->endpoint_id = usc->usc_ep_bcn;
        htc->payload_len = htobe16(sizeof(*bcn) + m->m_pkthdr.len);

        bcn = (struct ar_tx_bcn *)&htc[1];
        memset(bcn, 0, sizeof(*bcn));
        bcn->vif_idx = 0;

        m_copydata(m, 0, m->m_pkthdr.len, (void *)&bcn[1]);

        usbd_setup_xfer(data->xfer, data, data->buf,
            sizeof(*hdr) + sizeof(*htc) + sizeof(*bcn) + m->m_pkthdr.len,
            USBD_SHORT_XFER_OK, ATHN_USB_TX_TIMEOUT,
            athn_usb_bcneof);

        m_freem(m);
        usc->usc_tx_bcn = NULL;
        error = usbd_transfer(data->xfer);
        if (__predict_false(error != USBD_IN_PROGRESS && error != 0))
                usc->usc_tx_bcn = data;
}
#endif

Static void
athn_usb_rx_wmi_ctrl(struct athn_usb_softc *usc, uint8_t *buf, size_t len)
{
#ifdef ATHN_DEBUG
        struct ar_wmi_evt_txrate *txrate;
#endif
        struct ar_wmi_cmd_hdr *wmi;
        uint16_t cmd_id;

        if (usc->usc_dying)
                return;

        DPRINTFN(DBG_FN, usc, "\n");

        if (__predict_false(len < sizeof(*wmi)))
                return;
        wmi = (struct ar_wmi_cmd_hdr *)buf;
        cmd_id = be16toh(wmi->cmd_id);

        if (!(cmd_id & AR_WMI_EVT_FLAG)) {
                mutex_enter(&usc->usc_cmd_mtx);
                if (usc->usc_wait_cmd_id == cmd_id) {

                        if (usc->usc_obuf != NULL) {
                                /* Copy answer into caller supplied buffer. */
                                memcpy(usc->usc_obuf, &wmi[1], len - sizeof(*wmi));
                        }
                        /* Notify caller of completion. */
                        usc->usc_wait_cmd_id = 0;
                        cv_broadcast(&usc->usc_cmd_cv);
                }
                mutex_exit(&usc->usc_cmd_mtx);
                return;
        }
        /*
         * XXX: the Linux 2.6 and 3.7.4 kernels differ on the event numbers!
         * See the alternate defines in if_athn_usb.h.
         */
        switch (cmd_id & 0xfff) {
#ifndef IEEE80211_STA_ONLY
        case AR_WMI_EVT_SWBA:
                athn_usb_swba(usc);
                break;
#endif
        case AR_WMI_EVT_FATAL:
                aprint_error_dev(usc->usc_dev, "fatal firmware error\n");
                break;
        case AR_WMI_EVT_TXRATE:
#ifdef ATHN_DEBUG
                txrate = (struct ar_wmi_evt_txrate *)&wmi[1];
                DPRINTFN(DBG_TX, usc, "txrate=%d\n", be32toh(txrate->txrate));
#endif
                break;
        default:
                DPRINTFN(DBG_TX, usc, "WMI event %#x (%d) ignored\n", cmd_id, cmd_id);
                break;
        }
}

Static void
athn_usb_intr(struct usbd_xfer *xfer, void * priv,
    usbd_status status)
{
        struct athn_usb_softc *usc = priv;
        struct ar_htc_frame_hdr *htc;
        struct ar_htc_msg_hdr *msg;
        uint8_t *buf = usc->usc_ibuf;
        uint16_t msg_id;
        int len;

        if (usc->usc_dying)
                return;

        DPRINTFN(DBG_FN, usc, "\n");

        if (__predict_false(status != USBD_NORMAL_COMPLETION)) {
                DPRINTFN(DBG_INTR, usc, "intr status=%d\n", status);
                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(usc->usc_rx_intr_pipe);
                return;
        }
        usbd_get_xfer_status(xfer, NULL, NULL, &len, NULL);

        /* Skip watchdog pattern if present. */
        if (len >= 4 && *(uint32_t *)buf == htobe32(0x00c60000)) {
                buf += 4;
                len -= 4;
        }
        if (__predict_false(len < (int)sizeof(*htc)))
                return;
        htc = (struct ar_htc_frame_hdr *)buf;
        /* Skip HTC header. */
        buf += sizeof(*htc);
        len -= sizeof(*htc);

        if (htc->endpoint_id != 0) {
                if (__predict_false(htc->endpoint_id != usc->usc_ep_ctrl)) {
                        DPRINTFN(DBG_RX, usc, "Rx %d != %d\n",
                            htc->endpoint_id, usc->usc_ep_ctrl);
                        return;
                }
                /* Remove trailer if present. */
                if (htc->flags & AR_HTC_FLAG_TRAILER) {
                        if (__predict_false(len < htc->control[0])) {
                                DPRINTFN(DBG_RX, usc, "Rx trailer %d < %d\n",
                                    len,  htc->control[0]);
                                return;
                        }
                        len -= htc->control[0];
                }
                athn_usb_rx_wmi_ctrl(usc, buf, len);
                return;
        }

        /*
         * Endpoint 0 carries HTC messages.
         */
        if (__predict_false(len < (int)sizeof(*msg)))
                return;
        msg = (struct ar_htc_msg_hdr *)buf;
        msg_id = be16toh(msg->msg_id);
        DPRINTFN(DBG_RX, usc, "Rx HTC message %d\n", msg_id);
        switch (msg_id) {
        case AR_HTC_MSG_READY:
        case AR_HTC_MSG_CONF_PIPE_RSP:
                mutex_enter(&usc->usc_msg_mtx);
                DPRINTFN(DBG_RX, usc, "AR_HTC_MSG_READY: %d vs %d\n",
                    usc->usc_wait_msg_id, msg_id);
                if (usc->usc_wait_msg_id == msg_id) {
                        usc->usc_wait_msg_id = 0;
                        cv_broadcast(&usc->usc_msg_cv);
                }
                mutex_exit(&usc->usc_msg_mtx);
                break;
        case AR_HTC_MSG_CONN_SVC_RSP:
                mutex_enter(&usc->usc_msg_mtx);
                DPRINTFN(DBG_RX, usc, "AR_HTC_MSG_CONN_SVC_RSP: %d vs %d\n",
                    usc->usc_wait_msg_id, msg_id);
                if (usc->usc_wait_msg_id == msg_id) {
                        if (usc->usc_msg_conn_svc_rsp != NULL) {
                                memcpy(usc->usc_msg_conn_svc_rsp, &msg[1],
                                    sizeof(*usc->usc_msg_conn_svc_rsp));
                        }
                        usc->usc_wait_msg_id = 0;
                        cv_broadcast(&usc->usc_msg_cv);
                }
                mutex_exit(&usc->usc_msg_mtx);
                break;
        default:
                DPRINTFN(DBG_RX, usc, "HTC message %d ignored\n", msg_id);
                break;
        }
}

Static void
athn_usb_rx_radiotap(struct athn_softc *sc, struct mbuf *m,
    struct ar_rx_status *rs)
{
        struct athn_rx_radiotap_header *tap = &sc->sc_rxtap;
        struct ieee80211com *ic = &sc->sc_ic;
        uint8_t rate;

        DPRINTFN(DBG_FN, sc, "\n");

        tap->wr_flags = IEEE80211_RADIOTAP_F_FCS;
        tap->wr_tsft = htole64(be64toh(rs->rs_tstamp));
        tap->wr_chan_freq = htole16(ic->ic_curchan->ic_freq);
        tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags);
        tap->wr_dbm_antsignal = rs->rs_rssi;
        /* XXX noise. */
        tap->wr_antenna = rs->rs_antenna;
        rate = rs->rs_rate;
        if (rate & 0x80) {                /* HT. */
                /* Bit 7 set means HT MCS instead of rate. */
                tap->wr_rate = rate;
                if (!(rs->rs_flags & AR_RXS_FLAG_GI))
                        tap->wr_flags |= IEEE80211_RADIOTAP_F_SHORTGI;
        } else if (rate & 0x10) {        /* CCK. */
                if (rate & 0x04)
                        tap->wr_flags |= IEEE80211_RADIOTAP_F_SHORTPRE;
                switch (rate & ~0x14) {
                case 0xb: tap->wr_rate =   2; break;
                case 0xa: tap->wr_rate =   4; break;
                case 0x9: tap->wr_rate =  11; break;
                case 0x8: tap->wr_rate =  22; break;
                default:  tap->wr_rate =   0; break;
                }
        } else {                        /* OFDM. */
                switch (rate) {
                case 0xb: tap->wr_rate =  12; break;
                case 0xf: tap->wr_rate =  18; break;
                case 0xa: tap->wr_rate =  24; break;
                case 0xe: tap->wr_rate =  36; break;
                case 0x9: tap->wr_rate =  48; break;
                case 0xd: tap->wr_rate =  72; break;
                case 0x8: tap->wr_rate =  96; break;
                case 0xc: tap->wr_rate = 108; break;
                default:  tap->wr_rate =   0; break;
                }
        }
        bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_rxtap_len, m, BPF_D_IN);
}

Static void
athn_usb_rx_frame(struct athn_usb_softc *usc, struct mbuf *m)
{
        struct athn_softc *sc = &usc->usc_sc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        struct ieee80211_frame *wh;
        struct ieee80211_node *ni;
        struct ar_htc_frame_hdr *htc;
        struct ar_rx_status *rs;
        uint16_t datalen;
        int s;

        DPRINTFN(DBG_FN, sc, "\n");

        if (__predict_false(m->m_len < (int)sizeof(*htc)))
                goto skip;
        htc = mtod(m, struct ar_htc_frame_hdr *);
        if (__predict_false(htc->endpoint_id == 0)) {
                DPRINTFN(DBG_RX, sc, "bad endpoint %d\n", htc->endpoint_id);
                goto skip;
        }
        if (htc->flags & AR_HTC_FLAG_TRAILER) {
                if (m->m_len < htc->control[0])
                        goto skip;
                m_adj(m, -(int)htc->control[0]);
        }
        m_adj(m, sizeof(*htc));        /* Strip HTC header. */

        if (__predict_false(m->m_len < (int)sizeof(*rs)))
                goto skip;
        rs = mtod(m, struct ar_rx_status *);

        /* Make sure that payload fits. */
        datalen = be16toh(rs->rs_datalen);
        if (__predict_false(m->m_len < (int)sizeof(*rs) + datalen))
                goto skip;

        /* Ignore runt frames.  Let ACKs be seen by bpf */
        if (__predict_false(datalen <
                sizeof(struct ieee80211_frame_ack) + IEEE80211_CRC_LEN))
                goto skip;

        m_adj(m, sizeof(*rs));        /* Strip Rx status. */
        m_set_rcvif(m, ifp);

        s = splnet();

        /* Grab a reference to the source node. */
        wh = mtod(m, struct ieee80211_frame *);
        ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh);

        /* Remove any HW padding after the 802.11 header. */
        if (!(wh->i_fc[0] & IEEE80211_FC0_TYPE_CTL)) {
                u_int hdrlen = ieee80211_anyhdrsize(wh);
                if (hdrlen & 3) {
                        memmove((uint8_t *)wh + 2, wh, hdrlen);
                        m_adj(m, 2);
                }
        }
        if (__predict_false(sc->sc_drvbpf != NULL))
                athn_usb_rx_radiotap(sc, m, rs);

        /* Trim 802.11 FCS after radiotap. */
        m_adj(m, -IEEE80211_CRC_LEN);

        /* Send the frame to the 802.11 layer. */
        ieee80211_input(ic, m, ni, rs->rs_rssi + AR_USB_DEFAULT_NF, 0);

        /* Node is no longer needed. */
        ieee80211_free_node(ni);
        splx(s);
        return;
 skip:
        m_freem(m);
}

Static void
athn_usb_rxeof(struct usbd_xfer *xfer, void * priv,
    usbd_status status)
{
        struct athn_usb_rx_data *data = priv;
        struct athn_usb_softc *usc = data->sc;
        struct athn_usb_rx_stream *stream = &usc->usc_rx_stream;
        uint8_t *buf = data->buf;
        struct ar_stream_hdr *hdr;
        struct mbuf *m;
        uint16_t pktlen;
        int off, len;

        if (usc->usc_dying)
                return;

        DPRINTFN(DBG_FN, usc, "\n");

        if (__predict_false(status != USBD_NORMAL_COMPLETION)) {
                DPRINTFN(DBG_RX, usc, "RX status=%d\n", status);
                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(usc->usc_rx_data_pipe);
                if (status != USBD_CANCELLED)
                        goto resubmit;
                return;
        }
        usbd_get_xfer_status(xfer, NULL, NULL, &len, NULL);

        if (stream->left > 0) {
                if (len >= stream->left) {
                        /* We have all our pktlen bytes now. */
                        if (__predict_true(stream->m != NULL)) {
                                memcpy(mtod(stream->m, uint8_t *) +
                                    stream->moff, buf, stream->left);
                                athn_usb_rx_frame(usc, stream->m);
                                stream->m = NULL;
                        }
                        /* Next header is 32-bit aligned. */
                        off = (stream->left + 3) & ~3;
                        buf += off;
                        len -= off;
                        stream->left = 0;
                } else {
                        /* Still need more bytes, save what we have. */
                        if (__predict_true(stream->m != NULL)) {
                                memcpy(mtod(stream->m, uint8_t *) +
                                    stream->moff, buf, len);
                                stream->moff += len;
                        }
                        stream->left -= len;
                        goto resubmit;
                }
        }
        KASSERT(stream->left == 0);
        while (len >= (int)sizeof(*hdr)) {
                hdr = (struct ar_stream_hdr *)buf;
                if (hdr->tag != htole16(AR_USB_RX_STREAM_TAG)) {
                        DPRINTFN(DBG_RX, usc, "invalid tag %#x\n", hdr->tag);
                        break;
                }
                pktlen = le16toh(hdr->len);
                buf += sizeof(*hdr);
                len -= sizeof(*hdr);

                if (__predict_true(pktlen <= MCLBYTES)) {
                        /* Allocate an mbuf to store the next pktlen bytes. */
                        MGETHDR(m, M_DONTWAIT, MT_DATA);
                        if (__predict_true(m != NULL)) {
                                m->m_pkthdr.len = m->m_len = pktlen;
                                if (pktlen > MHLEN) {
                                        MCLGET(m, M_DONTWAIT);
                                        if (!(m->m_flags & M_EXT)) {
                                                m_free(m);
                                                m = NULL;
                                        }
                                }
                        }
                } else        /* Drop frames larger than MCLBYTES. */
                        m = NULL;
                /*
                 * NB: m can be NULL, in which case the next pktlen bytes
                 * will be discarded from the Rx stream.
                 */
                if (pktlen > len) {
                        /* Need more bytes, save what we have. */
                        stream->m = m;        /* NB: m can be NULL. */
                        if (__predict_true(stream->m != NULL)) {
                                memcpy(mtod(stream->m, uint8_t *), buf, len);
                                stream->moff = len;
                        }
                        stream->left = pktlen - len;
                        goto resubmit;
                }
                if (__predict_true(m != NULL)) {
                        /* We have all the pktlen bytes in this xfer. */
                        memcpy(mtod(m, uint8_t *), buf, pktlen);
                        athn_usb_rx_frame(usc, m);
                }

                /* Next header is 32-bit aligned. */
                off = (pktlen + 3) & ~3;
                buf += off;
                len -= off;
        }

 resubmit:
        /* Setup a new transfer. */
        usbd_setup_xfer(xfer, data, data->buf, ATHN_USB_RXBUFSZ,
            USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, athn_usb_rxeof);
        (void)usbd_transfer(xfer);
}

Static void
athn_usb_txeof(struct usbd_xfer *xfer, void * priv,
    usbd_status status)
{
        struct athn_usb_tx_data *data = priv;
        struct athn_usb_softc *usc = data->sc;
        struct athn_softc *sc = &usc->usc_sc;
        struct ifnet *ifp = &sc->sc_if;
        int s;

        if (usc->usc_dying)
                return;

        DPRINTFN(DBG_FN, usc, "\n");

        s = splnet();
        /* Put this Tx buffer back to our free list. */
        mutex_enter(&usc->usc_tx_mtx);
        TAILQ_INSERT_TAIL(&usc->usc_tx_free_list, data, next);
        mutex_exit(&usc->usc_tx_mtx);

        if (__predict_false(status != USBD_NORMAL_COMPLETION)) {
                DPRINTFN(DBG_TX, sc, "TX status=%d\n", status);
                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(usc->usc_tx_data_pipe);
                if_statinc(ifp, if_oerrors);
                splx(s);
                /* XXX Why return? */
                return;
        }
        sc->sc_tx_timer = 0;
        if_statinc(ifp, if_opackets);

        /* We just released a Tx buffer, notify Tx. */
        if (ifp->if_flags & IFF_OACTIVE) {
                ifp->if_flags &= ~IFF_OACTIVE;
                ifp->if_start(ifp);
        }
        splx(s);
}

Static int
athn_usb_tx(struct athn_softc *sc, struct mbuf *m, struct ieee80211_node *ni,
    struct athn_usb_tx_data *data)
{
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);
        struct athn_node *an = ATHN_NODE(ni);
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_frame *wh;
        struct ieee80211_key *k = NULL;
        struct ar_stream_hdr *hdr;
        struct ar_htc_frame_hdr *htc;
        struct ar_tx_frame *txf;
        struct ar_tx_mgmt *txm;
        uint8_t *frm;
        uint8_t sta_index, qid, tid;
        int error, s, xferlen;

        DPRINTFN(DBG_FN, sc, "\n");

        wh = mtod(m, struct ieee80211_frame *);
        if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) {
                k = ieee80211_crypto_encap(ic, ni, m);
                if (k == NULL)
                        return ENOBUFS;

                /* packet header may have moved, reset our local pointer */
                wh = mtod(m, struct ieee80211_frame *);
        }
#ifdef notyet_edca
        if (ieee80211_has_qos(wh)) {
                uint16_t qos;

                qos = ieee80211_get_qos(wh);
                tid = qos & IEEE80211_QOS_TID;
                qid = ieee80211_up_to_ac(ic, tid);
        } else
#endif /* notyet_edca */
        {
                tid = 0;
                qid = WME_AC_BE;
        }

        /* XXX Change radiotap Tx header for USB (no txrate). */
        if (__predict_false(sc->sc_drvbpf != NULL)) {
                struct athn_tx_radiotap_header *tap = &sc->sc_txtap;

                tap->wt_flags = 0;
                tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq);
                tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags);
                if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED)
                        tap->wt_flags |= IEEE80211_RADIOTAP_F_WEP;

                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_txtap_len, m, BPF_D_OUT);
        }
        sta_index = an->sta_index;

        /* NB: We don't take advantage of USB Tx stream mode for now. */
        hdr = (struct ar_stream_hdr *)data->buf;
        hdr->tag = htole16(AR_USB_TX_STREAM_TAG);

        htc = (struct ar_htc_frame_hdr *)&hdr[1];
        memset(htc, 0, sizeof(*htc));
        if ((wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) ==
            IEEE80211_FC0_TYPE_DATA) {
                htc->endpoint_id = usc->usc_ep_data[qid];

                txf = (struct ar_tx_frame *)&htc[1];
                memset(txf, 0, sizeof(*txf));
                txf->data_type = AR_HTC_NORMAL;
                txf->node_idx = sta_index;
                txf->vif_idx = 0;
                txf->tid = tid;
                if (m->m_pkthdr.len + IEEE80211_CRC_LEN > ic->ic_rtsthreshold)
                        txf->flags |= htobe32(AR_HTC_TX_RTSCTS);
                else if (ic->ic_flags & IEEE80211_F_USEPROT) {
                        if (ic->ic_protmode == IEEE80211_PROT_CTSONLY)
                                txf->flags |= htobe32(AR_HTC_TX_CTSONLY);
                        else if (ic->ic_protmode == IEEE80211_PROT_RTSCTS)
                                txf->flags |= htobe32(AR_HTC_TX_RTSCTS);
                }
                txf->key_idx = 0xff;
                frm = (uint8_t *)&txf[1];
        } else {
                htc->endpoint_id = usc->usc_ep_mgmt;

                txm = (struct ar_tx_mgmt *)&htc[1];
                memset(txm, 0, sizeof(*txm));
                txm->node_idx = sta_index;
                txm->vif_idx = 0;
                txm->key_idx = 0xff;
                frm = (uint8_t *)&txm[1];
        }
        /* Copy payload. */
        m_copydata(m, 0, m->m_pkthdr.len, (void *)frm);
        frm += m->m_pkthdr.len;

        /* Finalize headers. */
        htc->payload_len = htobe16(frm - (uint8_t *)&htc[1]);
        hdr->len = htole16(frm - (uint8_t *)&hdr[1]);
        xferlen = frm - data->buf;

        s = splnet();
        usbd_setup_xfer(data->xfer, data, data->buf, xferlen,
            USBD_FORCE_SHORT_XFER, ATHN_USB_TX_TIMEOUT, athn_usb_txeof);
        error = usbd_transfer(data->xfer);
        if (__predict_false(error != USBD_IN_PROGRESS && error != 0)) {
                splx(s);
                return error;
        }
        splx(s);
        return 0;
}

Static void
athn_usb_start(struct ifnet *ifp)
{
        struct athn_softc *sc = ifp->if_softc;
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);
        struct ieee80211com *ic = &sc->sc_ic;
        struct athn_usb_tx_data *data;
        struct ether_header *eh;
        struct ieee80211_node *ni;
        struct mbuf *m;

        if (usc->usc_dying)
                return;

        DPRINTFN(DBG_FN, sc, "\n");

        if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
                return;

        data = NULL;
        for (;;) {
                mutex_enter(&usc->usc_tx_mtx);
                if (data == NULL && !TAILQ_EMPTY(&usc->usc_tx_free_list)) {
                        data = TAILQ_FIRST(&usc->usc_tx_free_list);
                        TAILQ_REMOVE(&usc->usc_tx_free_list, data, next);
                }
                mutex_exit(&usc->usc_tx_mtx);

                if (data == NULL) {
                        ifp->if_flags |= IFF_OACTIVE;
                        return;
                }

                /* Send pending management frames first. */
                IF_DEQUEUE(&ic->ic_mgtq, m);
                if (m != NULL) {
                        ni = M_GETCTX(m, struct ieee80211_node *);
                        M_CLEARCTX(m);
                        goto sendit;
                }
                if (ic->ic_state != IEEE80211_S_RUN)
                        break;

                /* Encapsulate and send data frames. */
                IFQ_DEQUEUE(&ifp->if_snd, m);
                if (m == NULL)
                        break;

                if (m->m_len < (int)sizeof(*eh) &&
                    (m = m_pullup(m, sizeof(*eh))) == NULL) {
                        if_statinc(ifp, if_oerrors);
                        continue;
                }
                eh = mtod(m, struct ether_header *);
                ni = ieee80211_find_txnode(ic, eh->ether_dhost);
                if (ni == NULL) {
                        m_freem(m);
                        if_statinc(ifp, if_oerrors);
                        continue;
                }

                bpf_mtap(ifp, m, BPF_D_OUT);

                if ((m = ieee80211_encap(ic, m, ni)) == NULL) {
                        ieee80211_free_node(ni);
                        if_statinc(ifp, if_oerrors);
                        continue;
                }
 sendit:
                bpf_mtap3(ic->ic_rawbpf, m, BPF_D_OUT);

                if (athn_usb_tx(sc, m, ni, data) != 0) {
                        m_freem(m);
                        ieee80211_free_node(ni);
                        if_statinc(ifp, if_oerrors);
                        continue;
                }
                data = NULL;
                m_freem(m);
                ieee80211_free_node(ni);
                sc->sc_tx_timer = 5;
                ifp->if_timer = 1;
        }

        /* Return the Tx buffer to the free list */
        mutex_enter(&usc->usc_tx_mtx);
        TAILQ_INSERT_TAIL(&usc->usc_tx_free_list, data, next);
        mutex_exit(&usc->usc_tx_mtx);
}

Static void
athn_usb_watchdog(struct ifnet *ifp)
{
        struct athn_softc *sc = ifp->if_softc;

        DPRINTFN(DBG_FN, sc, "\n");

        ifp->if_timer = 0;

        if (sc->sc_tx_timer > 0) {
                if (--sc->sc_tx_timer == 0) {
                        aprint_error_dev(sc->sc_dev, "device timeout\n");
                        /* athn_usb_init(ifp); XXX needs a process context! */
                        if_statinc(ifp, if_oerrors);
                        return;
                }
                ifp->if_timer = 1;
        }
        ieee80211_watchdog(&sc->sc_ic);
}

Static int
athn_usb_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
        struct athn_softc *sc = ifp->if_softc;
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);
        struct ieee80211com *ic = &sc->sc_ic;
        int s, error = 0;

        if (usc->usc_dying)
                return EIO;

        DPRINTFN(DBG_FN, sc, "cmd=0x%08lx\n", cmd);

        s = splnet();

        switch (cmd) {
        case SIOCSIFFLAGS:
                if ((error = ifioctl_common(ifp, cmd, data)) != 0)
                        break;

                switch (ifp->if_flags & (IFF_UP | IFF_RUNNING)) {
                case IFF_UP | IFF_RUNNING:
                        break;
                case IFF_UP:
                        error = athn_usb_init(ifp);
                        break;
                case IFF_RUNNING:
                        athn_usb_stop(ifp, 0);
                        break;
                case 0:
                default:
                        break;
                }
                break;

        case SIOCADDMULTI:
        case SIOCDELMULTI:
                if ((error = ether_ioctl(ifp, cmd, data)) == ENETRESET) {
                        /* setup multicast filter, etc */
                        error = 0;
                }
                break;

        case SIOCS80211CHANNEL:
                error = ieee80211_ioctl(ic, cmd, data);
                if (error == ENETRESET &&
                    ic->ic_opmode == IEEE80211_M_MONITOR) {
                        if (IS_UP_AND_RUNNING(ifp))
                                athn_usb_switch_chan(sc, ic->ic_curchan, NULL);
                        error = 0;
                }
                break;

        default:
                error = ieee80211_ioctl(ic, cmd, data);
                break;
        }
        if (error == ENETRESET) {
                error = 0;
                if (IS_UP_AND_RUNNING(ifp) &&
                    ic->ic_roaming != IEEE80211_ROAMING_MANUAL) {
                        mutex_enter(&usc->usc_lock);
                        athn_usb_stop_locked(ifp);
                        error = athn_usb_init_locked(ifp);
                        mutex_exit(&usc->usc_lock);
                }
        }
        splx(s);
        return error;
}

Static int
athn_usb_init(struct ifnet *ifp)
{
        struct athn_softc *sc = ifp->if_softc;
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);

        mutex_enter(&usc->usc_lock);
        int ret = athn_usb_init_locked(ifp);
        mutex_exit(&usc->usc_lock);

        return ret;
}

Static int
athn_usb_init_locked(struct ifnet *ifp)
{
        struct athn_softc *sc = ifp->if_softc;
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);
        struct athn_ops *ops = &sc->sc_ops;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_channel *curchan, *extchan;
        struct athn_usb_rx_data *data;
        struct ar_htc_target_vif hvif;
        struct ar_htc_target_sta sta;
        struct ar_htc_cap_target hic;
        uint16_t mode;
        size_t i;
        int error;

        if (usc->usc_dying)
                return USBD_CANCELLED;

        DPRINTFN(DBG_FN, sc, "\n");

        /* Init host async commands ring. */
        mutex_spin_enter(&usc->usc_task_mtx);
        usc->usc_cmdq.cur = usc->usc_cmdq.next = usc->usc_cmdq.queued = 0;
        mutex_spin_exit(&usc->usc_task_mtx);

        curchan = ic->ic_curchan;
        extchan = NULL;

        /* In case a new MAC address has been configured. */
        IEEE80211_ADDR_COPY(ic->ic_myaddr, CLLADDR(ifp->if_sadl));

        error = athn_set_power_awake(sc);
        if (error != 0)
                goto fail;

        error = athn_usb_wmi_cmd(usc, AR_WMI_CMD_FLUSH_RECV);
        if (error != 0)
                goto fail;

        error = athn_hw_reset(sc, curchan, extchan, 1);
        if (error != 0)
                goto fail;

        ops->set_txpower(sc, curchan, extchan);

        mode = htobe16(IEEE80211_IS_CHAN_2GHZ(curchan) ?
            AR_HTC_MODE_11NG : AR_HTC_MODE_11NA);
        error = athn_usb_wmi_xcmd(usc, AR_WMI_CMD_SET_MODE,
            &mode, sizeof(mode), NULL);
        if (error != 0)
                goto fail;

        error = athn_usb_wmi_cmd(usc, AR_WMI_CMD_ATH_INIT);
        if (error != 0)
                goto fail;

        error = athn_usb_wmi_cmd(usc, AR_WMI_CMD_START_RECV);
        if (error != 0)
                goto fail;

        athn_rx_start(sc);

        /* Create main interface on target. */
        memset(&hvif, 0, sizeof(hvif));
        hvif.index = 0;
        IEEE80211_ADDR_COPY(hvif.myaddr, ic->ic_myaddr);
        switch (ic->ic_opmode) {
        case IEEE80211_M_STA:
                hvif.opmode = htobe32(AR_HTC_M_STA);
                break;
        case IEEE80211_M_MONITOR:
                hvif.opmode = htobe32(AR_HTC_M_MONITOR);
                break;
#ifndef IEEE80211_STA_ONLY
        case IEEE80211_M_IBSS:
                hvif.opmode = htobe32(AR_HTC_M_IBSS);
                break;
        case IEEE80211_M_AHDEMO:
                hvif.opmode = htobe32(AR_HTC_M_AHDEMO);
                break;
        case IEEE80211_M_HOSTAP:
                hvif.opmode = htobe32(AR_HTC_M_HOSTAP);
                break;
#endif
        }
        hvif.rtsthreshold = htobe16(ic->ic_rtsthreshold);
        DPRINTFN(DBG_INIT, sc, "creating VAP\n");
        error = athn_usb_wmi_xcmd(usc, AR_WMI_CMD_VAP_CREATE,
            &hvif, sizeof(hvif), NULL);
        if (error != 0)
                goto fail;

        /* Create a fake node to send management frames before assoc. */
        memset(&sta, 0, sizeof(sta));
        IEEE80211_ADDR_COPY(sta.macaddr, ic->ic_myaddr);
        sta.sta_index = 0;
        sta.is_vif_sta = 1;
        sta.vif_index = hvif.index;
        sta.maxampdu = 0xffff;

        DPRINTFN(DBG_INIT | DBG_NODES, sc, "creating default node %u\n",
            sta.sta_index);
        error = athn_usb_create_hw_node(usc, &sta);
        if (error != 0)
                goto fail;

        /* Update target capabilities. */
        memset(&hic, 0, sizeof(hic));
        hic.flags = htobe32(0x400c2400);
        hic.flags_ext = htobe32(0x00106080);
        hic.ampdu_limit = htobe32(0x0000ffff);
        hic.ampdu_subframes = 20;
        hic.protmode = 1;        /* XXX */
        hic.lg_txchainmask = sc->sc_txchainmask;
        hic.ht_txchainmask = sc->sc_txchainmask;
        DPRINTFN(DBG_INIT, sc, "updating target configuration\n");
        error = athn_usb_wmi_xcmd(usc, AR_WMI_CMD_TARGET_IC_UPDATE,
            &hic, sizeof(hic), NULL);
        if (error != 0)
                goto fail;


        /* Queue Rx xfers. */
        for (i = 0; i < ATHN_USB_RX_LIST_COUNT; i++) {
                data = &usc->usc_rx_data[i];

                usbd_setup_xfer(data->xfer, data, data->buf,
                    ATHN_USB_RXBUFSZ, USBD_SHORT_XFER_OK,
                    USBD_NO_TIMEOUT, athn_usb_rxeof);
                error = usbd_transfer(data->xfer);
                if (error != 0 && error != USBD_IN_PROGRESS)
                        goto fail;
        }
        /* We're ready to go. */
        ifp->if_flags &= ~IFF_OACTIVE;
        ifp->if_flags |= IFF_RUNNING;

#ifdef notyet
        if (ic->ic_flags & IEEE80211_F_WEPON) {
                /* Install WEP keys. */
                for (i = 0; i < IEEE80211_WEP_NKID; i++)
                        athn_usb_set_key(ic, NULL, &ic->ic_nw_keys[i]);
        }
#endif
        if (ic->ic_opmode == IEEE80211_M_HOSTAP)
                ic->ic_max_aid = AR_USB_MAX_STA;  /* Firmware is limited to 8 STA */
        else
                ic->ic_max_aid = sc->sc_max_aid;

        if (ic->ic_opmode == IEEE80211_M_MONITOR)
                ieee80211_new_state(ic, IEEE80211_S_RUN, -1);
        else
                ieee80211_new_state(ic, IEEE80211_S_SCAN, -1);
        athn_usb_wait_async(usc);
        return 0;
 fail:
        athn_usb_stop(ifp, 0);
        return error;
}

Static void
athn_usb_stop(struct ifnet *ifp, int disable)
{
        struct athn_softc *sc = ifp->if_softc;
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);

        mutex_enter(&usc->usc_lock);
        athn_usb_stop_locked(ifp);
        mutex_exit(&usc->usc_lock);
}

Static void
athn_usb_stop_locked(struct ifnet *ifp)
{
        struct athn_softc *sc = ifp->if_softc;
        struct athn_usb_softc *usc = ATHN_USB_SOFTC(sc);
        struct ieee80211com *ic = &sc->sc_ic;
        struct ar_htc_target_vif hvif;
        struct mbuf *m;
        uint8_t sta_index;
        int s;

        DPRINTFN(DBG_FN, sc, "\n");

        s = splusb();
        ieee80211_new_state(ic, IEEE80211_S_INIT, -1);
        athn_usb_wait_async(usc);
        splx(s);

        sc->sc_tx_timer = 0;
        ifp->if_timer = 0;
        ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);

        callout_stop(&sc->sc_scan_to);
        callout_stop(&sc->sc_calib_to);

        /* Abort Tx/Rx. */
        usbd_abort_pipe(usc->usc_tx_data_pipe);
        usbd_abort_pipe(usc->usc_rx_data_pipe);

        /* Flush Rx stream. */
        CTASSERT(sizeof(m) == sizeof(void *));
        m = atomic_swap_ptr(&usc->usc_rx_stream.m, NULL);
        m_freem(m);
        usc->usc_rx_stream.left = 0;

        /* Remove main interface. */
        memset(&hvif, 0, sizeof(hvif));
        hvif.index = 0;
        IEEE80211_ADDR_COPY(hvif.myaddr, ic->ic_myaddr);
        (void)athn_usb_wmi_xcmd(usc, AR_WMI_CMD_VAP_REMOVE,
            &hvif, sizeof(hvif), NULL);

        /* Remove default node. */
        sta_index = 0;
        DPRINTFN(DBG_NODES, usc, "removing node %u\n", sta_index);
        (void)athn_usb_remove_hw_node(usc, &sta_index);

        (void)athn_usb_wmi_cmd(usc, AR_WMI_CMD_DISABLE_INTR);
        (void)athn_usb_wmi_cmd(usc, AR_WMI_CMD_DRAIN_TXQ_ALL);
        (void)athn_usb_wmi_cmd(usc, AR_WMI_CMD_STOP_RECV);

        athn_reset(sc, 0);
        athn_init_pll(sc, NULL);
        athn_set_power_awake(sc);
        athn_reset(sc, 1);
        athn_init_pll(sc, NULL);
        athn_set_power_sleep(sc);
}

MODULE(MODULE_CLASS_DRIVER, if_athn_usb, NULL);

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
if_athn_usb_modcmd(modcmd_t cmd, void *aux)
{
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                error = config_init_component(cfdriver_ioconf_if_athn_usb,
                    cfattach_ioconf_if_athn_usb, cfdata_ioconf_if_athn_usb);
#endif
                return error;
        case MODULE_CMD_FINI:
#ifdef _MODULE
                error = config_fini_component(cfdriver_ioconf_if_athn_usb,
                    cfattach_ioconf_if_athn_usb, cfdata_ioconf_if_athn_usb);
#endif
                return error;
        default:
                return ENOTTY;
        }
}























































   19 








   18 

    7 

    7 

















    7 








    7 







    7 
    7 
    6 
    7 







    7 
    5 
    6 






   19 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
/*        $NetBSD: in6_cksum.c,v 1.28 2011/04/25 22:05:05 yamt Exp $        */

/*-
 * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: in6_cksum.c,v 1.28 2011/04/25 22:05:05 yamt Exp $");

#include <sys/param.h>
#include <sys/mbuf.h>
#include <netinet/in.h>
#include <netinet/ip6.h>

/*
 * Checksum of the IPv6 pseudo header.
 *
 * off is supposed to be the skipped IPv6 header, len is the payload size.
 */

int
in6_cksum(struct mbuf *m, u_int8_t nxt, uint32_t off, uint32_t len)
{
        union {
                uint16_t words[16];
                struct {
                        struct in6_addr ip6_src;
                        struct in6_addr ip6_dst;
                } addrs;
        } u;
        const struct in6_addr *in6_src;
        const struct in6_addr *in6_dst;
        const struct ip6_hdr *ip6;
        uint32_t sum;
        const uint16_t *w;
        const char *cp;

        if (nxt == 0)
                return cpu_in_cksum(m, len, off, 0);

        if (__predict_false(off < sizeof(struct ip6_hdr)))
                panic("in6_cksum: offset too short for IPv6 header");
        if (__predict_false(m->m_len < sizeof(struct ip6_hdr)))
                panic("in6_cksum: mbuf too short for IPv6 header");

        /*
         * Compute the equivalent of:
         * struct ip6_hdr_pseudo ip6;
         *
         * bzero(sizeof(*ip6));
         * ip6.ip6ph_nxt = nxt;
         * ip6.ip6ph_len = htonl(len);
         * ipv6.ip6ph_src = mtod(m, struct ip6_hdr *)->ip6_src;
         * in6_clearscope(&ip6->ip6ph_src);
         * ipv6.ip6ph_dst = mtod(m, struct ip6_hdr *)->ip6_dst;
         * in6_clearscope(&ip6->ip6ph_dst);
         * sum = one_add(&ip6);
         */

#if BYTE_ORDER == LITTLE_ENDIAN
        sum = ((len & 0xffff) + ((len >> 16) & 0xffff) + nxt) << 8;
#else
        sum = (len & 0xffff) + ((len >> 16) & 0xffff) + nxt;
#endif
        cp = mtod(m, const char *);
        w = (const uint16_t *)(cp + offsetof(struct ip6_hdr, ip6_src));
        ip6 = (const void *)cp;
        if (__predict_true((uintptr_t)w % 2 == 0)) {
                in6_src = &ip6->ip6_src;
                in6_dst = &ip6->ip6_dst;
        } else {
                memcpy(&u, &ip6->ip6_src, 32);
                w = u.words;
                in6_src = &u.addrs.ip6_src;
                in6_dst = &u.addrs.ip6_dst;
        }

        sum += w[0];
        if (!IN6_IS_SCOPE_EMBEDDABLE(in6_src))
                sum += w[1];
        sum += w[2];
        sum += w[3];
        sum += w[4];
        sum += w[5];
        sum += w[6];
        sum += w[7];
        w += 8;
        sum += w[0];
        if (!IN6_IS_SCOPE_EMBEDDABLE(in6_dst))
                sum += w[1];
        sum += w[2];
        sum += w[3];
        sum += w[4];
        sum += w[5];
        sum += w[6];
        sum += w[7];

        return cpu_in_cksum(m, len, off, sum);
}

























































































































































































































    3 
    3 







































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
/*        $NetBSD: uchcom.c,v 1.40 2021/08/07 16:19:17 thorpej Exp $        */

/*
 * Copyright (c) 2007 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Takuya SHIOZAKI (tshiozak@netbsd.org).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uchcom.c,v 1.40 2021/08/07 16:19:17 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

/*
 * driver for WinChipHead CH341/340, the worst USB-serial chip in the world.
 */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/ioctl.h>
#include <sys/conf.h>
#include <sys/tty.h>
#include <sys/file.h>
#include <sys/select.h>
#include <sys/proc.h>
#include <sys/device.h>
#include <sys/poll.h>

#include <dev/usb/usb.h>

#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <dev/usb/ucomvar.h>

#ifdef UCHCOM_DEBUG
#define DPRINTFN(n, x)  if (uchcomdebug > (n)) printf x
int        uchcomdebug = 0;
#else
#define DPRINTFN(n, x)
#endif
#define DPRINTF(x) DPRINTFN(0, x)

#define        UCHCOM_IFACE_INDEX        0
#define        UCHCOM_CONFIG_INDEX        0

#define UCHCOM_INPUT_BUF_SIZE        8

#define UCHCOM_REQ_GET_VERSION        0x5F
#define UCHCOM_REQ_READ_REG        0x95
#define UCHCOM_REQ_WRITE_REG        0x9A
#define UCHCOM_REQ_RESET        0xA1
#define UCHCOM_REQ_SET_DTRRTS        0xA4

#define UCHCOM_REG_STAT1        0x06
#define UCHCOM_REG_STAT2        0x07
#define UCHCOM_REG_BPS_PRE        0x12
#define UCHCOM_REG_BPS_DIV        0x13
#define UCHCOM_REG_BREAK        0x05
#define UCHCOM_REG_LCR                0x18
#define UCHCOM_REG_LCR2                0x25

#define UCHCOM_VER_20                0x20
#define UCHCOM_VER_30                0x30

#define UCHCOM_BPS_PRE_IMM        0x80        /* CH341: immediate RX forwarding */

#define UCHCOM_DTR_MASK                0x20
#define UCHCOM_RTS_MASK                0x40

#define UCHCOM_BREAK_MASK        0x01

#define UCHCOM_LCR_CS5                0x00
#define UCHCOM_LCR_CS6                0x01
#define UCHCOM_LCR_CS7                0x02
#define UCHCOM_LCR_CS8                0x03
#define UCHCOM_LCR_STOPB        0x04
#define UCHCOM_LCR_PARENB        0x08
#define UCHCOM_LCR_PARODD        0x00
#define UCHCOM_LCR_PAREVEN        0x10
#define UCHCOM_LCR_PARMARK        0x20
#define UCHCOM_LCR_PARSPACE        0x30
#define UCHCOM_LCR_TXE                0x40
#define UCHCOM_LCR_RXE                0x80

#define UCHCOM_INTR_STAT1        0x02
#define UCHCOM_INTR_STAT2        0x03
#define UCHCOM_INTR_LEAST        4

#define UCHCOMIBUFSIZE 256
#define UCHCOMOBUFSIZE 256

struct uchcom_softc
{
        device_t                sc_dev;
        struct usbd_device *        sc_udev;
        device_t                sc_subdev;
        struct usbd_interface *        sc_iface;
        bool                        sc_dying;
        /* */
        int                        sc_intr_endpoint;
        int                        sc_intr_size;
        struct usbd_pipe *        sc_intr_pipe;
        u_char                        *sc_intr_buf;
        /* */
        uint8_t                        sc_version;
        int                        sc_dtr;
        int                        sc_rts;
        u_char                        sc_lsr;
        u_char                        sc_msr;
};

struct uchcom_endpoints
{
        int                ep_bulkin;
        int                ep_bulkout;
        int                ep_intr;
        int                ep_intr_size;
};

struct uchcom_divider
{
        uint8_t                dv_prescaler;
        uint8_t                dv_div;
};

/* 0,1,2,3,7 are prescale factors for given 4x 12000000 clock formula */
static const uint32_t rates4x[8] = {
        [0] = 4 * 12000000 / 1024,
        [1] = 4 * 12000000 / 128,
        [2] = 4 * 12000000 / 16,
        [3] = 4 * 12000000 / 2,
        [7] = 4 * 12000000,
};

static const struct usb_devno uchcom_devs[] = {
        { USB_VENDOR_QINHENG2, USB_PRODUCT_QINHENG2_CH341SER },
        { USB_VENDOR_QINHENG, USB_PRODUCT_QINHENG_CH340 },
        { USB_VENDOR_QINHENG, USB_PRODUCT_QINHENG_CH341_ASP },
};
#define uchcom_lookup(v, p)        usb_lookup(uchcom_devs, v, p)

static void        uchcom_get_status(void *, int, u_char *, u_char *);
static void        uchcom_set(void *, int, int, int);
static int        uchcom_param(void *, int, struct termios *);
static int        uchcom_open(void *, int);
static void        uchcom_close(void *, int);
static void        uchcom_intr(struct usbd_xfer *, void *,
                            usbd_status);

static int        set_config(struct uchcom_softc *);
static int        find_ifaces(struct uchcom_softc *, struct usbd_interface **);
static int        find_endpoints(struct uchcom_softc *,
                               struct uchcom_endpoints *);
static void        close_intr_pipe(struct uchcom_softc *);


static const struct        ucom_methods uchcom_methods = {
        .ucom_get_status        = uchcom_get_status,
        .ucom_set                = uchcom_set,
        .ucom_param                = uchcom_param,
        .ucom_open                = uchcom_open,
        .ucom_close                = uchcom_close,
};

static int        uchcom_match(device_t, cfdata_t, void *);
static void        uchcom_attach(device_t, device_t, void *);
static void        uchcom_childdet(device_t, device_t);
static int        uchcom_detach(device_t, int);

CFATTACH_DECL2_NEW(uchcom,
    sizeof(struct uchcom_softc),
    uchcom_match,
    uchcom_attach,
    uchcom_detach,
    NULL,
    NULL,
    uchcom_childdet);

/* ----------------------------------------------------------------------
 * driver entry points
 */

static int
uchcom_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return (uchcom_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE);
}

static void
uchcom_attach(device_t parent, device_t self, void *aux)
{
        struct uchcom_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        char *devinfop;
        struct uchcom_endpoints endpoints;
        struct ucom_attach_args ucaa;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        sc->sc_dev = self;
        sc->sc_udev = dev;
        sc->sc_dying = false;
        sc->sc_dtr = sc->sc_rts = -1;
        sc->sc_lsr = sc->sc_msr = 0;

        DPRINTF(("\n\nuchcom attach: sc=%p\n", sc));

        if (set_config(sc))
                goto failed;

        if (find_ifaces(sc, &sc->sc_iface))
                goto failed;

        if (find_endpoints(sc, &endpoints))
                goto failed;

        sc->sc_intr_endpoint = endpoints.ep_intr;
        sc->sc_intr_size = endpoints.ep_intr_size;

        /* setup ucom layer */
        ucaa.ucaa_portno = UCOM_UNK_PORTNO;
        ucaa.ucaa_bulkin = endpoints.ep_bulkin;
        ucaa.ucaa_bulkout = endpoints.ep_bulkout;
        ucaa.ucaa_ibufsize = UCHCOMIBUFSIZE;
        ucaa.ucaa_obufsize = UCHCOMOBUFSIZE;
        ucaa.ucaa_ibufsizepad = UCHCOMIBUFSIZE;
        ucaa.ucaa_opkthdrlen = 0;
        ucaa.ucaa_device = dev;
        ucaa.ucaa_iface = sc->sc_iface;
        ucaa.ucaa_methods = &uchcom_methods;
        ucaa.ucaa_arg = sc;
        ucaa.ucaa_info = NULL;

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        sc->sc_subdev = config_found(self, &ucaa, ucomprint,
            CFARGS(.submatch = ucomsubmatch));

        return;

failed:
        sc->sc_dying = true;
        return;
}

static void
uchcom_childdet(device_t self, device_t child)
{
        struct uchcom_softc *sc = device_private(self);

        KASSERT(sc->sc_subdev == child);
        sc->sc_subdev = NULL;
}

static int
uchcom_detach(device_t self, int flags)
{
        struct uchcom_softc *sc = device_private(self);
        int rv = 0;

        DPRINTF(("uchcom_detach: sc=%p flags=%d\n", sc, flags));

        close_intr_pipe(sc);

        sc->sc_dying = true;

        if (sc->sc_subdev != NULL) {
                rv = config_detach(sc->sc_subdev, flags);
                sc->sc_subdev = NULL;
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return rv;
}

static int
set_config(struct uchcom_softc *sc)
{
        usbd_status err;

        err = usbd_set_config_index(sc->sc_udev, UCHCOM_CONFIG_INDEX, 1);
        if (err) {
                aprint_error_dev(sc->sc_dev,
                    "failed to set configuration: %s\n", usbd_errstr(err));
                return -1;
        }

        return 0;
}

static int
find_ifaces(struct uchcom_softc *sc, struct usbd_interface **riface)
{
        usbd_status err;

        err = usbd_device2interface_handle(sc->sc_udev, UCHCOM_IFACE_INDEX,
                                           riface);
        if (err) {
                aprint_error("\n%s: failed to get interface: %s\n",
                        device_xname(sc->sc_dev), usbd_errstr(err));
                return -1;
        }

        return 0;
}

static int
find_endpoints(struct uchcom_softc *sc, struct uchcom_endpoints *endpoints)
{
        int i, bin=-1, bout=-1, intr=-1, isize=0;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;

        id = usbd_get_interface_descriptor(sc->sc_iface);

        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(sc->sc_dev,
                            "no endpoint descriptor for %d\n", i);
                        return -1;
                }

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        intr = ed->bEndpointAddress;
                        isize = UGETW(ed->wMaxPacketSize);
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        bin = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        bout = ed->bEndpointAddress;
                }
        }

        if (intr == -1 || bin == -1 || bout == -1) {
                if (intr == -1) {
                        aprint_error_dev(sc->sc_dev,
                            "no interrupt end point\n");
                }
                if (bin == -1) {
                        aprint_error_dev(sc->sc_dev,
                            "no data bulk in end point\n");
                }
                if (bout == -1) {
                        aprint_error_dev(sc->sc_dev,
                            "no data bulk out end point\n");
                }
                return -1;
        }
        if (isize < UCHCOM_INTR_LEAST) {
                aprint_error_dev(sc->sc_dev, "intr pipe is too short\n");
                return -1;
        }

        DPRINTF(("%s: bulkin=%d, bulkout=%d, intr=%d, isize=%d\n",
                 device_xname(sc->sc_dev), bin, bout, intr, isize));

        endpoints->ep_intr = intr;
        endpoints->ep_intr_size = isize;
        endpoints->ep_bulkin = bin;
        endpoints->ep_bulkout = bout;

        return 0;
}


/* ----------------------------------------------------------------------
 * low level i/o
 */

static __inline usbd_status
generic_control_out(struct uchcom_softc *sc, uint8_t reqno,
                    uint16_t value, uint16_t index)
{
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = reqno;
        USETW(req.wValue, value);
        USETW(req.wIndex, index);
        USETW(req.wLength, 0);

        return usbd_do_request(sc->sc_udev, &req, 0);
}

static __inline usbd_status
generic_control_in(struct uchcom_softc *sc, uint8_t reqno,
                   uint16_t value, uint16_t index, void *buf, int buflen,
                   int *actlen)
{
        usb_device_request_t req;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = reqno;
        USETW(req.wValue, value);
        USETW(req.wIndex, index);
        USETW(req.wLength, (uint16_t)buflen);

        return usbd_do_request_flags(sc->sc_udev, &req, buf,
                                     USBD_SHORT_XFER_OK, actlen,
                                     USBD_DEFAULT_TIMEOUT);
}

static __inline usbd_status
write_reg(struct uchcom_softc *sc,
          uint8_t reg1, uint8_t val1, uint8_t reg2, uint8_t val2)
{
        DPRINTF(("%s: write reg 0x%02X<-0x%02X, 0x%02X<-0x%02X\n",
                 device_xname(sc->sc_dev),
                 (unsigned)reg1, (unsigned)val1,
                 (unsigned)reg2, (unsigned)val2));
        return generic_control_out(
                sc, UCHCOM_REQ_WRITE_REG,
                reg1|((uint16_t)reg2<<8), val1|((uint16_t)val2<<8));
}

static __inline usbd_status
read_reg(struct uchcom_softc *sc,
         uint8_t reg1, uint8_t *rval1, uint8_t reg2, uint8_t *rval2)
{
        uint8_t buf[UCHCOM_INPUT_BUF_SIZE];
        usbd_status err;
        int actin;

        err = generic_control_in(
                sc, UCHCOM_REQ_READ_REG,
                reg1|((uint16_t)reg2<<8), 0, buf, sizeof(buf), &actin);
        if (err)
                return err;

        DPRINTF(("%s: read reg 0x%02X->0x%02X, 0x%02X->0x%02X\n",
                 device_xname(sc->sc_dev),
                 (unsigned)reg1, (unsigned)buf[0],
                 (unsigned)reg2, (unsigned)buf[1]));

        if (rval1) *rval1 = buf[0];
        if (rval2) *rval2 = buf[1];

        return USBD_NORMAL_COMPLETION;
}

static __inline usbd_status
get_version(struct uchcom_softc *sc, uint8_t *rver)
{
        uint8_t buf[UCHCOM_INPUT_BUF_SIZE];
        usbd_status err;
        int actin;

        err = generic_control_in(
                sc, UCHCOM_REQ_GET_VERSION, 0, 0, buf, sizeof(buf), &actin);
        if (err)
                return err;

        if (rver) *rver = buf[0];

        return USBD_NORMAL_COMPLETION;
}

static __inline usbd_status
get_status(struct uchcom_softc *sc, uint8_t *rval)
{
        return read_reg(sc, UCHCOM_REG_STAT1, rval, UCHCOM_REG_STAT2, NULL);
}

static __inline usbd_status
set_dtrrts_10(struct uchcom_softc *sc, uint8_t val)
{
        return write_reg(sc, UCHCOM_REG_STAT1, val, UCHCOM_REG_STAT1, val);
}

static __inline usbd_status
set_dtrrts_20(struct uchcom_softc *sc, uint8_t val)
{
        return generic_control_out(sc, UCHCOM_REQ_SET_DTRRTS, val, 0);
}


/* ----------------------------------------------------------------------
 * middle layer
 */

static int
update_version(struct uchcom_softc *sc)
{
        usbd_status err;

        err = get_version(sc, &sc->sc_version);
        if (err) {
                device_printf(sc->sc_dev, "cannot get version: %s\n",
                    usbd_errstr(err));
                return EIO;
        }
        DPRINTF(("%s: update_version %d\n", device_xname(sc->sc_dev), sc->sc_version));

        return 0;
}

static void
convert_status(struct uchcom_softc *sc, uint8_t cur)
{
        sc->sc_dtr = !(cur & UCHCOM_DTR_MASK);
        sc->sc_rts = !(cur & UCHCOM_RTS_MASK);

        cur = ~cur & 0x0F;
        sc->sc_msr = (cur << 4) | ((sc->sc_msr >> 4) ^ cur);
}

static int
update_status(struct uchcom_softc *sc)
{
        usbd_status err;
        uint8_t cur;

        err = get_status(sc, &cur);
        if (err) {
                device_printf(sc->sc_dev,
                    "cannot update status: %s\n", usbd_errstr(err));
                return EIO;
        }
        convert_status(sc, cur);

        return 0;
}


static int
set_dtrrts(struct uchcom_softc *sc, int dtr, int rts)
{
        usbd_status err;
        uint8_t val = 0;

        if (dtr) val |= UCHCOM_DTR_MASK;
        if (rts) val |= UCHCOM_RTS_MASK;

        if (sc->sc_version < UCHCOM_VER_20)
                err = set_dtrrts_10(sc, ~val);
        else
                err = set_dtrrts_20(sc, ~val);

        if (err) {
                device_printf(sc->sc_dev, "cannot set DTR/RTS: %s\n",
                    usbd_errstr(err));
                return EIO;
        }

        return 0;
}

static int
set_break(struct uchcom_softc *sc, int onoff)
{
        usbd_status err;
        uint8_t brk, lcr;

        err = read_reg(sc, UCHCOM_REG_BREAK, &brk, UCHCOM_REG_LCR, &lcr);
        if (err)
                return EIO;
        if (onoff) {
                /* on - clear bits */
                brk &= ~UCHCOM_BREAK_MASK;
                lcr &= ~UCHCOM_LCR_TXE;
        } else {
                /* off - set bits */
                brk |= UCHCOM_BREAK_MASK;
                lcr |= UCHCOM_LCR_TXE;
        }
        err = write_reg(sc, UCHCOM_REG_BREAK, brk, UCHCOM_REG_LCR, lcr);
        if (err)
                return EIO;

        return 0;
}

static int
calc_divider_settings(struct uchcom_divider *dp, uint32_t rate)
{
/*
 * combined with rates4x[] defined above, this routine generates,
 *   1200: prescale = 1/0x1, divisor = 178/0xb2
 *   2400: prescale = 1/0x1, divisor = 217/0xd9
 *   4800: prescale = 2/0x2, divisor = 100/0x64
 *   9600: prescale = 2/0x2, divisor = 178/0xb2
 *  19200: prescale = 2/0x2, divisor = 217/0xd9
 *  38400: prescale = 3/0x3, divisor = 100/0x64
 *  57600: prescale = 2/0x2, divisor = 243/0xf3
 * 115200: prescale = 3/0x3, divisor = 204/0xcc
 * 921600: prescale = 7/0x7, divisor = 243/0xf3
 * 500000: prescale = 3/0x3, divisor = 244/0xf4
 * 1000000: prescale = 3/0x3, divisor = 250/0xfa
 * 1500000: prescale = 3/0x3, divisor = 252/0xfc
 * 2000000: prescale = 3/0x3, divisor = 253/0xfd
 * 2500000: unsupported
 * 3000000: prescale = 3/0x3, divisor = 254/0xfe
 */
        size_t i;
        uint32_t best, div, pre;
        const uint32_t rate4x = rate * 4U;

        if (rate == 0 || rate > 3000000)
                return -1;

        pre = __arraycount(rates4x);
        best = UINT32_MAX;

        for (i = 0; i < __arraycount(rates4x); i++) {
                uint32_t score, try;
                try = rates4x[i] * 2 / rate4x;
                try = (try / 2) + (try & 1);
                if (try < 2)
                        continue;
                if (try > 255)
                        try = 255;
                score = abs((int)rate4x - rates4x[i] / try);
                if (score < best) {
                        best = score;
                        pre = i;
                        div = try;
                }
        }

        if (pre >= __arraycount(rates4x))
                return -1;
        if ((rates4x[pre] / div / 4) < (rate * 99 / 100))
                return -1;
        if ((rates4x[pre] / div / 4) > (rate * 101 / 100))
                return -1;

        dp->dv_prescaler = pre;
        dp->dv_div = 256 - div;

        return 0;
}

static int
set_dte_rate(struct uchcom_softc *sc, uint32_t rate)
{
        usbd_status err;
        struct uchcom_divider dv;

        if (calc_divider_settings(&dv, rate))
                return EINVAL;

        if ((err = write_reg(sc,
                             UCHCOM_REG_BPS_PRE,
                             dv.dv_prescaler | UCHCOM_BPS_PRE_IMM,
                             UCHCOM_REG_BPS_DIV, dv.dv_div))) {
                device_printf(sc->sc_dev, "cannot set DTE rate: %s\n",
                    usbd_errstr(err));
                return EIO;
        }

        return 0;
}

static int
set_line_control(struct uchcom_softc *sc, tcflag_t cflag)
{
        usbd_status err;
        uint8_t lcr = 0, lcr2 = 0;

        err = read_reg(sc, UCHCOM_REG_LCR, &lcr, UCHCOM_REG_LCR2, &lcr2);
        if (err) {
                device_printf(sc->sc_dev, "cannot get LCR: %s\n",
                    usbd_errstr(err));
                return EIO;
        }

        lcr = UCHCOM_LCR_RXE | UCHCOM_LCR_TXE;

        switch (ISSET(cflag, CSIZE)) {
        case CS5:
                lcr |= UCHCOM_LCR_CS5;
                break;
        case CS6:
                lcr |= UCHCOM_LCR_CS6;
                break;
        case CS7:
                lcr |= UCHCOM_LCR_CS7;
                break;
        case CS8:
                lcr |= UCHCOM_LCR_CS8;
                break;
        }

        if (ISSET(cflag, PARENB)) {
                lcr |= UCHCOM_LCR_PARENB;
                if (!ISSET(cflag, PARODD))
                        lcr |= UCHCOM_LCR_PAREVEN;
        }

        if (ISSET(cflag, CSTOPB)) {
                lcr |= UCHCOM_LCR_STOPB;
        }

        err = write_reg(sc, UCHCOM_REG_LCR, lcr, UCHCOM_REG_LCR2, lcr2);
        if (err) {
                device_printf(sc->sc_dev, "cannot set LCR: %s\n",
                    usbd_errstr(err));
                return EIO;
        }

        return 0;
}

static int
clear_chip(struct uchcom_softc *sc)
{
        usbd_status err;

        DPRINTF(("%s: clear\n", device_xname(sc->sc_dev)));
        err = generic_control_out(sc, UCHCOM_REQ_RESET, 0, 0);
        if (err) {
                device_printf(sc->sc_dev, "cannot clear: %s\n",
                    usbd_errstr(err));
                return EIO;
        }
        /*
         * this REQ_RESET call ends up with
         * LCR=0xc0 (8N1)
         * PRE=0x02, DIV=0xd9 (19200)
         */
        return 0;
}

static int
setup_comm(struct uchcom_softc *sc)
{
        int ret;

        ret = update_version(sc);
        if (ret)
                return ret;

        ret = clear_chip(sc);
        if (ret)
                return ret;

        ret = set_dte_rate(sc, TTYDEF_SPEED);
        if (ret)
                return ret;

        ret = set_line_control(sc, CS8);
        if (ret)
                return ret;

        ret = update_status(sc);
        if (ret)
                return ret;

        sc->sc_dtr = sc->sc_rts = 1;
        ret = set_dtrrts(sc, sc->sc_dtr, sc->sc_rts);
        if (ret)
                return ret;

        return 0;
}

static int
setup_intr_pipe(struct uchcom_softc *sc)
{
        usbd_status err;

        if (sc->sc_intr_endpoint != -1 && sc->sc_intr_pipe == NULL) {
                sc->sc_intr_buf = kmem_alloc(sc->sc_intr_size, KM_SLEEP);
                err = usbd_open_pipe_intr(sc->sc_iface,
                                          sc->sc_intr_endpoint,
                                          USBD_SHORT_XFER_OK,
                                          &sc->sc_intr_pipe, sc,
                                          sc->sc_intr_buf,
                                          sc->sc_intr_size,
                                          uchcom_intr, USBD_DEFAULT_INTERVAL);
                if (err) {
                        device_printf(sc->sc_dev,
                            "cannot open interrupt pipe: %s\n",
                            usbd_errstr(err));
                        return EIO;
                }
        }
        return 0;
}

static void
close_intr_pipe(struct uchcom_softc *sc)
{

        if (sc->sc_intr_pipe != NULL) {
                usbd_abort_pipe(sc->sc_intr_pipe);
                usbd_close_pipe(sc->sc_intr_pipe);
                sc->sc_intr_pipe = NULL;
        }
        if (sc->sc_intr_buf != NULL) {
                kmem_free(sc->sc_intr_buf, sc->sc_intr_size);
                sc->sc_intr_buf = NULL;
        }
}


/* ----------------------------------------------------------------------
 * methods for ucom
 */
static void
uchcom_get_status(void *arg, int portno, u_char *rlsr, u_char *rmsr)
{
        struct uchcom_softc *sc = arg;

        if (sc->sc_dying)
                return;

        *rlsr = sc->sc_lsr;
        *rmsr = sc->sc_msr;
}

static void
uchcom_set(void *arg, int portno, int reg, int onoff)
{
        struct uchcom_softc *sc = arg;

        if (sc->sc_dying)
                return;

        switch (reg) {
        case UCOM_SET_DTR:
                sc->sc_dtr = !!onoff;
                set_dtrrts(sc, sc->sc_dtr, sc->sc_rts);
                break;
        case UCOM_SET_RTS:
                sc->sc_rts = !!onoff;
                set_dtrrts(sc, sc->sc_dtr, sc->sc_rts);
                break;
        case UCOM_SET_BREAK:
                set_break(sc, onoff);
                break;
        }
}

static int
uchcom_param(void *arg, int portno, struct termios *t)
{
        struct uchcom_softc *sc = arg;
        int ret;

        if (sc->sc_dying)
                return EIO;

        ret = set_line_control(sc, t->c_cflag);
        if (ret)
                return ret;

        ret = set_dte_rate(sc, t->c_ospeed);
        if (ret)
                return ret;

        return 0;
}

static int
uchcom_open(void *arg, int portno)
{
        int ret;
        struct uchcom_softc *sc = arg;

        if (sc->sc_dying)
                return EIO;

        ret = setup_intr_pipe(sc);
        if (ret)
                return ret;

        ret = setup_comm(sc);
        if (ret)
                return ret;

        return 0;
}

static void
uchcom_close(void *arg, int portno)
{
        struct uchcom_softc *sc = arg;

        if (sc->sc_dying)
                return;

        close_intr_pipe(sc);
}


/* ----------------------------------------------------------------------
 * callback when the modem status is changed.
 */
static void
uchcom_intr(struct usbd_xfer *xfer, void * priv,
            usbd_status status)
{
        struct uchcom_softc *sc = priv;
        u_char *buf = sc->sc_intr_buf;

        if (sc->sc_dying)
                return;

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;

                DPRINTF(("%s: abnormal status: %s\n",
                         device_xname(sc->sc_dev), usbd_errstr(status)));
                usbd_clear_endpoint_stall_async(sc->sc_intr_pipe);
                return;
        }
        DPRINTF(("%s: intr: 0x%02X 0x%02X 0x%02X 0x%02X "
                 "0x%02X 0x%02X 0x%02X 0x%02X\n",
                 device_xname(sc->sc_dev),
                 (unsigned)buf[0], (unsigned)buf[1],
                 (unsigned)buf[2], (unsigned)buf[3],
                 (unsigned)buf[4], (unsigned)buf[5],
                 (unsigned)buf[6], (unsigned)buf[7]));

        convert_status(sc, buf[UCHCOM_INTR_STAT1]);
        ucom_status_change(device_private(sc->sc_subdev));
}






















































































































































































































    1 


    1 
























































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
/*        $NetBSD: linux_idr.c,v 1.15 2021/12/19 12:21:02 riastradh Exp $        */

/*-
 * Copyright (c) 2013 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: linux_idr.c,v 1.15 2021/12/19 12:21:02 riastradh Exp $");

#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/rbtree.h>
#include <sys/sdt.h>

#include <linux/err.h>
#include <linux/idr.h>
#include <linux/slab.h>

#ifdef _KERNEL_OPT
#include "opt_ddb.h"
#endif

#ifdef DDB
#include <ddb/ddb.h>
#endif

struct idr_node {
        rb_node_t                in_rb_node;
        int                        in_index;
        void                        *in_data;
};

struct idr_cache {
        struct idr_node                *ic_node;
        void                        *ic_where;
};

SDT_PROBE_DEFINE0(sdt, linux, idr, leak);
SDT_PROBE_DEFINE1(sdt, linux, idr, init, "struct idr *"/*idr*/);
SDT_PROBE_DEFINE1(sdt, linux, idr, destroy, "struct idr *"/*idr*/);
SDT_PROBE_DEFINE4(sdt, linux, idr, replace,
    "struct idr *"/*idr*/, "int"/*id*/, "void *"/*odata*/, "void *"/*ndata*/);
SDT_PROBE_DEFINE3(sdt, linux, idr, remove,
    "struct idr *"/*idr*/, "int"/*id*/, "void *"/*data*/);
SDT_PROBE_DEFINE0(sdt, linux, idr, preload);
SDT_PROBE_DEFINE0(sdt, linux, idr, preload__end);
SDT_PROBE_DEFINE3(sdt, linux, idr, alloc,
    "struct idr *"/*idr*/, "int"/*id*/, "void *"/*data*/);

static specificdata_key_t idr_cache_key __read_mostly;

static void
idr_cache_warning(struct idr_cache *cache)
{
#ifdef DDB
        const char *name;
        db_expr_t offset;
#endif

        KASSERT(cache->ic_node != NULL);

#ifdef DDB
        db_find_sym_and_offset((db_addr_t)(uintptr_t)cache->ic_where,
            &name, &offset);
        if (name) {
                printf("WARNING: idr preload at %s+%#"DDB_EXPR_FMT"x"
                    " leaked in lwp %s @ %p\n",
                    name, offset, curlwp->l_name, curlwp);
        } else
#endif
        {
                printf("WARNING: idr preload at %p leaked in lwp %s @ %p\n",
                    cache->ic_where, curlwp->l_name, curlwp);
        }
}

static void
idr_cache_dtor(void *cookie)
{
        struct idr_cache *cache = cookie;

        if (cache->ic_node) {
                SDT_PROBE0(sdt, linux, idr, leak);
                idr_cache_warning(cache);
                kmem_free(cache->ic_node, sizeof(*cache->ic_node));
        }
        kmem_free(cache, sizeof(*cache));
}

int
linux_idr_module_init(void)
{
        int error;

        error = lwp_specific_key_create(&idr_cache_key, &idr_cache_dtor);
        if (error)
                return error;

        return 0;
}

void
linux_idr_module_fini(void)
{

        lwp_specific_key_delete(idr_cache_key);
}

static signed int idr_tree_compare_nodes(void *, const void *, const void *);
static signed int idr_tree_compare_key(void *, const void *, const void *);

static const rb_tree_ops_t idr_rb_ops = {
        .rbto_compare_nodes = &idr_tree_compare_nodes,
        .rbto_compare_key = &idr_tree_compare_key,
        .rbto_node_offset = offsetof(struct idr_node, in_rb_node),
        .rbto_context = NULL,
};

static signed int
idr_tree_compare_nodes(void *ctx __unused, const void *na, const void *nb)
{
        const int a = ((const struct idr_node *)na)->in_index;
        const int b = ((const struct idr_node *)nb)->in_index;

        if (a < b)
                return -1;
        else if (b < a)
                 return +1;
        else
                return 0;
}

static signed int
idr_tree_compare_key(void *ctx __unused, const void *n, const void *key)
{
        const int a = ((const struct idr_node *)n)->in_index;
        const int b = *(const int *)key;

        if (a < b)
                return -1;
        else if (b < a)
                return +1;
        else
                return 0;
}

void
idr_init(struct idr *idr)
{

        idr_init_base(idr, 0);
}

void
idr_init_base(struct idr *idr, int base)
{

        mutex_init(&idr->idr_lock, MUTEX_DEFAULT, IPL_VM);
        rb_tree_init(&idr->idr_tree, &idr_rb_ops);
        idr->idr_base = base;

        SDT_PROBE1(sdt, linux, idr, init,  idr);
}

void
idr_destroy(struct idr *idr)
{

        SDT_PROBE1(sdt, linux, idr, destroy,  idr);
#if 0                                /* XXX No rb_tree_destroy?  */
        rb_tree_destroy(&idr->idr_tree);
#endif
        mutex_destroy(&idr->idr_lock);
}

bool
idr_is_empty(struct idr *idr)
{

        return (RB_TREE_MIN(&idr->idr_tree) == NULL);
}

void *
idr_find(struct idr *idr, int id)
{
        const struct idr_node *node;
        void *data;

        mutex_spin_enter(&idr->idr_lock);
        node = rb_tree_find_node(&idr->idr_tree, &id);
        data = (node == NULL? NULL : node->in_data);
        mutex_spin_exit(&idr->idr_lock);

        return data;
}

void *
idr_get_next(struct idr *idr, int *idp)
{
        const struct idr_node *node;
        void *data;

        mutex_spin_enter(&idr->idr_lock);
        node = rb_tree_find_node_geq(&idr->idr_tree, idp);
        if (node == NULL) {
                data = NULL;
        } else {
                data = node->in_data;
                *idp = node->in_index;
        }
        mutex_spin_exit(&idr->idr_lock);

        return data;
}

void *
idr_replace(struct idr *idr, void *replacement, int id)
{
        struct idr_node *node;
        void *result;

        mutex_spin_enter(&idr->idr_lock);
        node = rb_tree_find_node(&idr->idr_tree, &id);
        if (node == NULL) {
                result = ERR_PTR(-ENOENT);
        } else {
                result = node->in_data;
                node->in_data = replacement;
                SDT_PROBE4(sdt, linux, idr, replace,
                    idr, id, result, replacement);
        }
        mutex_spin_exit(&idr->idr_lock);

        return result;
}

void *
idr_remove(struct idr *idr, int id)
{
        struct idr_node *node;
        void *data;

        mutex_spin_enter(&idr->idr_lock);
        node = rb_tree_find_node(&idr->idr_tree, &id);
        if (node == NULL) {
                data = NULL;
        } else {
                data = node->in_data;
                SDT_PROBE3(sdt, linux, idr, remove,  idr, id, data);
                rb_tree_remove_node(&idr->idr_tree, node);
        }
        mutex_spin_exit(&idr->idr_lock);

        kmem_free(node, sizeof(*node));

        return data;
}

void
idr_preload(gfp_t gfp)
{
        struct idr_cache *cache;
        struct idr_node *node;
        km_flag_t kmflag = ISSET(gfp, __GFP_WAIT) ? KM_SLEEP : KM_NOSLEEP;

        SDT_PROBE0(sdt, linux, idr, preload);

        /* If caller asked to wait, we had better be sleepable.  */
        if (ISSET(gfp, __GFP_WAIT))
                ASSERT_SLEEPABLE();

        /*
         * Get the current lwp's private idr cache.
         */
        cache = lwp_getspecific(idr_cache_key);
        if (cache == NULL) {
                /* lwp_setspecific must be sleepable.  */
                if (!ISSET(gfp, __GFP_WAIT))
                        return;
                cache = kmem_zalloc(sizeof(*cache), kmflag);
                if (cache == NULL)
                        return;
                lwp_setspecific(idr_cache_key, cache);
        }

        /*
         * If there already is a node, a prior call to idr_preload must
         * not have been matched by idr_preload_end.  Print a warning,
         * claim the node, and record our return address for where this
         * node came from so the next leak is attributed to us.
         */
        if (cache->ic_node) {
                idr_cache_warning(cache);
                goto out;
        }

        /*
         * No cached node.  Allocate a new one, store it in the cache,
         * and record our return address for where this node came from
         * so the next leak is attributed to us.
         */
        node = kmem_alloc(sizeof(*node), kmflag);
        KASSERT(node != NULL || !ISSET(gfp, __GFP_WAIT));
        if (node == NULL)
                return;

        cache->ic_node = node;
out:        cache->ic_where = __builtin_return_address(0);
}

int
idr_alloc(struct idr *idr, void *data, int start, int end, gfp_t gfp)
{
        int maximum = (end <= 0? INT_MAX : (end - 1));
        struct idr_cache *cache;
        struct idr_node *node, *search, *collision __diagused;
        int id = start;

        /* Sanity-check inputs.  */
        if (ISSET(gfp, __GFP_WAIT))
                ASSERT_SLEEPABLE();
        if (__predict_false(start < 0))
                return -EINVAL;
        if (__predict_false(maximum < start))
                return -ENOSPC;

        /*
         * Grab a node allocated by idr_preload, if we have a cache and
         * it is populated.
         */
        cache = lwp_getspecific(idr_cache_key);
        if (cache == NULL || cache->ic_node == NULL)
                return -ENOMEM;
        node = cache->ic_node;
        cache->ic_node = NULL;

        /* Find an id.  */
        mutex_spin_enter(&idr->idr_lock);
        search = rb_tree_find_node_geq(&idr->idr_tree, &start);
        while ((search != NULL) && (search->in_index == id)) {
                if (maximum <= id) {
                        id = -ENOSPC;
                        goto out;
                }
                search = rb_tree_iterate(&idr->idr_tree, search, RB_DIR_RIGHT);
                id++;
        }
        node->in_index = id;
        node->in_data = data;
        collision = rb_tree_insert_node(&idr->idr_tree, node);
        KASSERT(collision == node);
out:        mutex_spin_exit(&idr->idr_lock);

        /* Discard the node on failure.  */
        if (id < 0) {
                cache->ic_node = node;
        } else {
                SDT_PROBE3(sdt, linux, idr, alloc,  idr, id, data);
        }
        return id;
}

void
idr_preload_end(void)
{
        struct idr_cache *cache;

        SDT_PROBE0(sdt, linux, idr, preload__end);

        /* Get the cache, or bail if it's not there.  */
        cache = lwp_getspecific(idr_cache_key);
        if (cache == NULL)
                return;

        /*
         * If there is a node, either because we didn't idr_alloc or
         * because idr_alloc failed, chuck it.
         *
         * XXX If we are not sleepable, then while the caller may have
         * used idr_preload(GFP_ATOMIC), kmem_free may still sleep.
         * What to do?
         */
        if (cache->ic_node) {
                struct idr_node *node;

                node = cache->ic_node;
                cache->ic_node = NULL;
                cache->ic_where = NULL;

                kmem_free(node, sizeof(*node));
        }
}

int
idr_for_each(struct idr *idr, int (*proc)(int, void *, void *), void *arg)
{
        struct idr_node *node;
        int error = 0;

        /* XXX Caller must exclude modifications.  */
        RB_TREE_FOREACH(node, &idr->idr_tree) {
                error = (*proc)(node->in_index, node->in_data, arg);
                if (error)
                        break;
        }

        return error;
}



































































































































    3 
    3 













































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/*        $NetBSD: uipad.c,v 1.9 2020/01/07 06:42:26 maxv Exp $        */

/*-
 * Copyright (c) 2011 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uipad.c,v 1.9 2020/01/07 06:42:26 maxv Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/conf.h>
#include <sys/file.h>
#include <sys/select.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/poll.h>
#include <sys/bus.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdivar.h>

#include <dev/usb/usbdevs.h>

#ifdef UIPAD_DEBUG
#define DPRINTF(x)        if (uipaddebug) printf x
#define DPRINTFN(n, x)        if (uipaddebug > n) printf x
int        uipaddebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n, x)
#endif

struct uipad_softc {
        device_t                sc_dev;
        struct usbd_device *        sc_udev;
};

static const struct usb_devno uipad_devs[] = {
        { USB_VENDOR_APPLE, USB_PRODUCT_APPLE_IPAD },
        { USB_VENDOR_APPLE, USB_PRODUCT_APPLE_IPAD_2 },
        { USB_VENDOR_APPLE, USB_PRODUCT_APPLE_IPAD_3 },
        { USB_VENDOR_APPLE, USB_PRODUCT_APPLE_IPAD_MINI },
#if 0
        { USB_VENDOR_APPLE, USB_PRODUCT_APPLE_IPHONE },
        { USB_VENDOR_APPLE, USB_PRODUCT_APPLE_IPHONE_3G },
        { USB_VENDOR_APPLE, USB_PRODUCT_APPLE_IPHONE_3GS },
        { USB_VENDOR_APPLE, USB_PRODUCT_APPLE_IPHONE_4 },
        { USB_VENDOR_APPLE, USB_PRODUCT_APPLE_IPHONE_4_VZW },
        { USB_VENDOR_APPLE, USB_PRODUCT_APPLE_IPHONE_4S },
        { USB_VENDOR_APPLE, USB_PRODUCT_APPLE_IPHONE_5 },
#endif
};

#define uipad_lookup(v, p) usb_lookup(uipad_devs, v, p)

static int        uipad_match(device_t, cfdata_t, void *);
static void        uipad_attach(device_t, device_t, void *);
static int        uipad_detach(device_t, int);

CFATTACH_DECL_NEW(uipad, sizeof(struct uipad_softc), uipad_match,
    uipad_attach, uipad_detach, NULL);

static void
uipad_cmd(struct uipad_softc *sc, uint8_t requestType, uint8_t reqno,
    uint16_t value, uint16_t index)
{
        usb_device_request_t req;
        usbd_status err;

        DPRINTF(("ipad cmd type=%x, number=%x, value=%d, index=%d\n",
            requestType, reqno, value, index));
        req.bmRequestType = requestType;
        req.bRequest = reqno;
        USETW(req.wValue, value);
        USETW(req.wIndex, index);
        USETW(req.wLength, 0);

        if ((err = usbd_do_request(sc->sc_udev, &req, NULL)) != 0)
                aprint_error_dev(sc->sc_dev, "sending command failed %d\n",
                    err);
}

static void
uipad_charge(struct uipad_softc *sc)
{
        if (sc->sc_udev->ud_power != USB_MAX_POWER)
                uipad_cmd(sc, UT_VENDOR | UT_WRITE, 0x40, 0x6400, 0x6400);
}

static int
uipad_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        DPRINTFN(50, ("uipad_match\n"));
        return uipad_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
            UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
uipad_attach(device_t parent, device_t self, void *aux)
{
        struct uipad_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *        dev = uaa->uaa_device;
        char                        *devinfop;

        DPRINTFN(10,("uipad_attach: sc=%p\n", sc));

        sc->sc_dev = self;
        sc->sc_udev = dev;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        uipad_charge(sc);

        DPRINTFN(10, ("uipad_attach: %p\n", sc->sc_udev));

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);
        return;
}

static int
uipad_detach(device_t self, int flags)
{
        struct uipad_softc *sc = device_private(self);
        DPRINTF(("uipad_detach: sc=%p flags=%d\n", sc, flags));

        pmf_device_deregister(self);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return 0;
}



























































































































































































































































































































































































































































































































































































































































































































































    2 














    2 





    2 




















    2 











































































































































































































































































































  450 


    1 

  450 

  450 






















































    3 




    2 
    3 







    2 






    2 
    2 




    2 



    2 


    2 












































































    1 



    1 
    1 

    1 





    1 



    1 





    1 













  440 







  438 
  440 





  439 


  440 







  438 



























  439 


  439 
  440 












  440 

















  440 

   12 






  440 


















  423 



  439 




  433 










    1 



    1 
    1 




    1 


    1 






























    4 




    4 
    4 


    4 














    3 
    2 

    3 














    2 
    1 

    2 










    5 




    5 
    4 
    5 






    5 





    4 



    1 






    1 
















    1 







































































    4 





    4 


    4 













    4 


    4 













































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
/*        $NetBSD: dk.c,v 1.123 2022/08/22 00:32:30 riastradh Exp $        */

/*-
 * Copyright (c) 2004, 2005, 2006, 2007 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: dk.c,v 1.123 2022/08/22 00:32:30 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_dkwedge.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/pool.h>
#include <sys/ioctl.h>
#include <sys/disklabel.h>
#include <sys/disk.h>
#include <sys/fcntl.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/vnode.h>
#include <sys/stat.h>
#include <sys/conf.h>
#include <sys/callout.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/device.h>
#include <sys/kauth.h>

#include <miscfs/specfs/specdev.h>

MALLOC_DEFINE(M_DKWEDGE, "dkwedge", "Disk wedge structures");

typedef enum {
        DKW_STATE_LARVAL        = 0,
        DKW_STATE_RUNNING        = 1,
        DKW_STATE_DYING                = 2,
        DKW_STATE_DEAD                = 666
} dkwedge_state_t;

struct dkwedge_softc {
        device_t        sc_dev;        /* pointer to our pseudo-device */
        struct cfdata        sc_cfdata;        /* our cfdata structure */
        uint8_t                sc_wname[128];        /* wedge name (Unicode, UTF-8) */

        dkwedge_state_t sc_state;        /* state this wedge is in */

        struct disk        *sc_parent;        /* parent disk */
        daddr_t                sc_offset;        /* LBA offset of wedge in parent */
        uint64_t        sc_size;        /* size of wedge in blocks */
        char                sc_ptype[32];        /* partition type */
        dev_t                sc_pdev;        /* cached parent's dev_t */
                                        /* link on parent's wedge list */
        LIST_ENTRY(dkwedge_softc) sc_plink;

        struct disk        sc_dk;                /* our own disk structure */
        struct bufq_state *sc_bufq;        /* buffer queue */
        struct callout        sc_restart_ch;        /* callout to restart I/O */

        kmutex_t        sc_iolock;
        kcondvar_t        sc_dkdrn;
        u_int                sc_iopend;        /* I/Os pending */
        int                sc_mode;        /* parent open mode */
};

static void        dkstart(struct dkwedge_softc *);
static void        dkiodone(struct buf *);
static void        dkrestart(void *);
static void        dkminphys(struct buf *);

static int        dkfirstopen(struct dkwedge_softc *, int);
static void        dklastclose(struct dkwedge_softc *);
static int        dkwedge_cleanup_parent(struct dkwedge_softc *, int);
static int        dkwedge_detach(device_t, int);
static void        dkwedge_delall1(struct disk *, bool);
static int        dkwedge_del1(struct dkwedge_info *, int);
static int        dk_open_parent(dev_t, int, struct vnode **);
static int        dk_close_parent(struct vnode *, int);

static dev_type_open(dkopen);
static dev_type_close(dkclose);
static dev_type_read(dkread);
static dev_type_write(dkwrite);
static dev_type_ioctl(dkioctl);
static dev_type_strategy(dkstrategy);
static dev_type_dump(dkdump);
static dev_type_size(dksize);
static dev_type_discard(dkdiscard);

const struct bdevsw dk_bdevsw = {
        .d_open = dkopen,
        .d_close = dkclose,
        .d_strategy = dkstrategy,
        .d_ioctl = dkioctl,
        .d_dump = dkdump,
        .d_psize = dksize,
        .d_discard = dkdiscard,
        .d_flag = D_DISK | D_MPSAFE
};

const struct cdevsw dk_cdevsw = {
        .d_open = dkopen,
        .d_close = dkclose,
        .d_read = dkread,
        .d_write = dkwrite,
        .d_ioctl = dkioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = dkdiscard,
        .d_flag = D_DISK | D_MPSAFE
};

static struct dkwedge_softc **dkwedges;
static u_int ndkwedges;
static krwlock_t dkwedges_lock;

static LIST_HEAD(, dkwedge_discovery_method) dkwedge_discovery_methods;
static krwlock_t dkwedge_discovery_methods_lock;

/*
 * dkwedge_match:
 *
 *        Autoconfiguration match function for pseudo-device glue.
 */
static int
dkwedge_match(device_t parent, cfdata_t match,
    void *aux)
{

        /* Pseudo-device; always present. */
        return (1);
}

/*
 * dkwedge_attach:
 *
 *        Autoconfiguration attach function for pseudo-device glue.
 */
static void
dkwedge_attach(device_t parent, device_t self,
    void *aux)
{

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");
}

CFDRIVER_DECL(dk, DV_DISK, NULL);
CFATTACH_DECL3_NEW(dk, 0,
    dkwedge_match, dkwedge_attach, dkwedge_detach, NULL, NULL, NULL,
    DVF_DETACH_SHUTDOWN);

/*
 * dkwedge_wait_drain:
 *
 *        Wait for I/O on the wedge to drain.
 */
static void
dkwedge_wait_drain(struct dkwedge_softc *sc)
{

        mutex_enter(&sc->sc_iolock);
        while (sc->sc_iopend != 0)
                cv_wait(&sc->sc_dkdrn, &sc->sc_iolock);
        mutex_exit(&sc->sc_iolock);
}

/*
 * dkwedge_compute_pdev:
 *
 *        Compute the parent disk's dev_t.
 */
static int
dkwedge_compute_pdev(const char *pname, dev_t *pdevp, enum vtype type)
{
        const char *name, *cp;
        devmajor_t pmaj;
        int punit;
        char devname[16];

        name = pname;
        switch (type) {
        case VBLK:
                pmaj = devsw_name2blk(name, devname, sizeof(devname));
                break;
        case VCHR:
                pmaj = devsw_name2chr(name, devname, sizeof(devname));
                break;
        default:
                pmaj = NODEVMAJOR;
                break;
        }
        if (pmaj == NODEVMAJOR)
                return (ENODEV);

        name += strlen(devname);
        for (cp = name, punit = 0; *cp >= '0' && *cp <= '9'; cp++)
                punit = (punit * 10) + (*cp - '0');
        if (cp == name) {
                /* Invalid parent disk name. */
                return (ENODEV);
        }

        *pdevp = MAKEDISKDEV(pmaj, punit, RAW_PART);

        return (0);
}

/*
 * dkwedge_array_expand:
 *
 *        Expand the dkwedges array.
 */
static void
dkwedge_array_expand(void)
{
        int newcnt = ndkwedges + 16;
        struct dkwedge_softc **newarray, **oldarray;

        newarray = malloc(newcnt * sizeof(*newarray), M_DKWEDGE,
            M_WAITOK|M_ZERO);
        if ((oldarray = dkwedges) != NULL)
                memcpy(newarray, dkwedges, ndkwedges * sizeof(*newarray));
        dkwedges = newarray;
        ndkwedges = newcnt;
        if (oldarray != NULL)
                free(oldarray, M_DKWEDGE);
}

static void
dk_set_geometry(struct dkwedge_softc *sc, struct disk *pdk)
{
        struct disk *dk = &sc->sc_dk;
        struct disk_geom *dg = &dk->dk_geom;

        memset(dg, 0, sizeof(*dg));

        dg->dg_secperunit = sc->sc_size;
        dg->dg_secsize = DEV_BSIZE << pdk->dk_blkshift;

        /* fake numbers, 1 cylinder is 1 MB with default sector size */
        dg->dg_nsectors = 32;
        dg->dg_ntracks = 64;
        dg->dg_ncylinders = dg->dg_secperunit / (dg->dg_nsectors * dg->dg_ntracks);

        disk_set_info(sc->sc_dev, dk, NULL);
}

/*
 * dkwedge_add:                [exported function]
 *
 *        Add a disk wedge based on the provided information.
 *
 *        The incoming dkw_devname[] is ignored, instead being
 *        filled in and returned to the caller.
 */
int
dkwedge_add(struct dkwedge_info *dkw)
{
        struct dkwedge_softc *sc, *lsc;
        struct disk *pdk;
        u_int unit;
        int error;
        dev_t pdev;

        dkw->dkw_parent[sizeof(dkw->dkw_parent) - 1] = '\0';
        pdk = disk_find(dkw->dkw_parent);
        if (pdk == NULL)
                return (ENODEV);

        error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VBLK);
        if (error)
                return (error);

        if (dkw->dkw_offset < 0)
                return (EINVAL);

        /*
         * Check for an existing wedge at the same disk offset. Allow
         * updating a wedge if the only change is the size, and the new
         * size is larger than the old.
         */
        sc = NULL;
        mutex_enter(&pdk->dk_openlock);
        LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
                if (lsc->sc_offset != dkw->dkw_offset)
                        continue;
                if (strcmp(lsc->sc_wname, dkw->dkw_wname) != 0)
                        break;
                if (strcmp(lsc->sc_ptype, dkw->dkw_ptype) != 0)
                        break;
                if (lsc->sc_size > dkw->dkw_size)
                        break;

                sc = lsc;
                sc->sc_size = dkw->dkw_size;
                dk_set_geometry(sc, pdk);

                break;
        }
        mutex_exit(&pdk->dk_openlock);

        if (sc != NULL)
                goto announce;

        sc = malloc(sizeof(*sc), M_DKWEDGE, M_WAITOK|M_ZERO);
        sc->sc_state = DKW_STATE_LARVAL;
        sc->sc_parent = pdk;
        sc->sc_pdev = pdev;
        sc->sc_offset = dkw->dkw_offset;
        sc->sc_size = dkw->dkw_size;

        memcpy(sc->sc_wname, dkw->dkw_wname, sizeof(sc->sc_wname));
        sc->sc_wname[sizeof(sc->sc_wname) - 1] = '\0';

        memcpy(sc->sc_ptype, dkw->dkw_ptype, sizeof(sc->sc_ptype));
        sc->sc_ptype[sizeof(sc->sc_ptype) - 1] = '\0';

        bufq_alloc(&sc->sc_bufq, "fcfs", 0);

        callout_init(&sc->sc_restart_ch, 0);
        callout_setfunc(&sc->sc_restart_ch, dkrestart, sc);

        mutex_init(&sc->sc_iolock, MUTEX_DEFAULT, IPL_BIO);
        cv_init(&sc->sc_dkdrn, "dkdrn");

        /*
         * Wedge will be added; increment the wedge count for the parent.
         * Only allow this to happen if RAW_PART is the only thing open.
         */
        mutex_enter(&pdk->dk_openlock);
        if (pdk->dk_openmask & ~(1 << RAW_PART))
                error = EBUSY;
        else {
                /* Check for wedge overlap. */
                LIST_FOREACH(lsc, &pdk->dk_wedges, sc_plink) {
                        daddr_t lastblk = sc->sc_offset + sc->sc_size - 1;
                        daddr_t llastblk = lsc->sc_offset + lsc->sc_size - 1;

                        if (sc->sc_offset >= lsc->sc_offset &&
                            sc->sc_offset <= llastblk) {
                                /* Overlaps the tail of the existing wedge. */
                                break;
                        }
                        if (lastblk >= lsc->sc_offset &&
                            lastblk <= llastblk) {
                                /* Overlaps the head of the existing wedge. */
                                    break;
                        }
                }
                if (lsc != NULL) {
                        if (sc->sc_offset == lsc->sc_offset &&
                            sc->sc_size == lsc->sc_size &&
                            strcmp(sc->sc_wname, lsc->sc_wname) == 0)
                                error = EEXIST;
                        else
                                error = EINVAL;
                } else {
                        pdk->dk_nwedges++;
                        LIST_INSERT_HEAD(&pdk->dk_wedges, sc, sc_plink);
                }
        }
        mutex_exit(&pdk->dk_openlock);
        if (error) {
                cv_destroy(&sc->sc_dkdrn);
                mutex_destroy(&sc->sc_iolock);
                bufq_free(sc->sc_bufq);
                free(sc, M_DKWEDGE);
                return (error);
        }

        /* Fill in our cfdata for the pseudo-device glue. */
        sc->sc_cfdata.cf_name = dk_cd.cd_name;
        sc->sc_cfdata.cf_atname = dk_ca.ca_name;
        /* sc->sc_cfdata.cf_unit set below */
        sc->sc_cfdata.cf_fstate = FSTATE_STAR;

        /* Insert the larval wedge into the array. */
        rw_enter(&dkwedges_lock, RW_WRITER);
        for (error = 0;;) {
                struct dkwedge_softc **scpp;

                /*
                 * Check for a duplicate wname while searching for
                 * a slot.
                 */
                for (scpp = NULL, unit = 0; unit < ndkwedges; unit++) {
                        if (dkwedges[unit] == NULL) {
                                if (scpp == NULL) {
                                        scpp = &dkwedges[unit];
                                        sc->sc_cfdata.cf_unit = unit;
                                }
                        } else {
                                /* XXX Unicode. */
                                if (strcmp(dkwedges[unit]->sc_wname,
                                           sc->sc_wname) == 0) {
                                        error = EEXIST;
                                        break;
                                }
                        }
                }
                if (error)
                        break;
                KASSERT(unit == ndkwedges);
                if (scpp == NULL)
                        dkwedge_array_expand();
                else {
                        KASSERT(scpp == &dkwedges[sc->sc_cfdata.cf_unit]);
                        *scpp = sc;
                        break;
                }
        }
        rw_exit(&dkwedges_lock);
        if (error) {
                mutex_enter(&pdk->dk_openlock);
                pdk->dk_nwedges--;
                LIST_REMOVE(sc, sc_plink);
                mutex_exit(&pdk->dk_openlock);

                cv_destroy(&sc->sc_dkdrn);
                mutex_destroy(&sc->sc_iolock);
                bufq_free(sc->sc_bufq);
                free(sc, M_DKWEDGE);
                return (error);
        }

        /*
         * Now that we know the unit #, attach a pseudo-device for
         * this wedge instance.  This will provide us with the
         * device_t necessary for glue to other parts of the system.
         *
         * This should never fail, unless we're almost totally out of
         * memory.
         */
        if ((sc->sc_dev = config_attach_pseudo(&sc->sc_cfdata)) == NULL) {
                aprint_error("%s%u: unable to attach pseudo-device\n",
                    sc->sc_cfdata.cf_name, sc->sc_cfdata.cf_unit);

                rw_enter(&dkwedges_lock, RW_WRITER);
                dkwedges[sc->sc_cfdata.cf_unit] = NULL;
                rw_exit(&dkwedges_lock);

                mutex_enter(&pdk->dk_openlock);
                pdk->dk_nwedges--;
                LIST_REMOVE(sc, sc_plink);
                mutex_exit(&pdk->dk_openlock);

                cv_destroy(&sc->sc_dkdrn);
                mutex_destroy(&sc->sc_iolock);
                bufq_free(sc->sc_bufq);
                free(sc, M_DKWEDGE);
                return (ENOMEM);
        }

        /*
         * XXX Really ought to make the disk_attach() and the changing
         * of state to RUNNING atomic.
         */

        disk_init(&sc->sc_dk, device_xname(sc->sc_dev), NULL);
        dk_set_geometry(sc, pdk);
        disk_attach(&sc->sc_dk);

        /* Disk wedge is ready for use! */
        sc->sc_state = DKW_STATE_RUNNING;

announce:
        /* Announce our arrival. */
        aprint_normal(
            "%s at %s: \"%s\", %"PRIu64" blocks at %"PRId64", type: %s\n",
            device_xname(sc->sc_dev), pdk->dk_name,
            sc->sc_wname,        /* XXX Unicode */
            sc->sc_size, sc->sc_offset,
            sc->sc_ptype[0] == '\0' ? "<unknown>" : sc->sc_ptype);

        /* Return the devname to the caller. */
        strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
                sizeof(dkw->dkw_devname));

        return (0);
}

/*
 * dkwedge_find:
 *
 *        Lookup a disk wedge based on the provided information.
 *        NOTE: We look up the wedge based on the wedge devname,
 *        not wname.
 *
 *        Return NULL if the wedge is not found, otherwise return
 *        the wedge's softc.  Assign the wedge's unit number to unitp
 *        if unitp is not NULL.
 */
static struct dkwedge_softc *
dkwedge_find(struct dkwedge_info *dkw, u_int *unitp)
{
        struct dkwedge_softc *sc = NULL;
        u_int unit;

        /* Find our softc. */
        dkw->dkw_devname[sizeof(dkw->dkw_devname) - 1] = '\0';
        rw_enter(&dkwedges_lock, RW_READER);
        for (unit = 0; unit < ndkwedges; unit++) {
                if ((sc = dkwedges[unit]) != NULL &&
                    strcmp(device_xname(sc->sc_dev), dkw->dkw_devname) == 0 &&
                    strcmp(sc->sc_parent->dk_name, dkw->dkw_parent) == 0) {
                        break;
                }
        }
        rw_exit(&dkwedges_lock);
        if (unit == ndkwedges)
                return NULL;

        if (unitp != NULL)
                *unitp = unit;

        return sc;
}

/*
 * dkwedge_del:                [exported function]
 *
 *        Delete a disk wedge based on the provided information.
 *        NOTE: We look up the wedge based on the wedge devname,
 *        not wname.
 */
int
dkwedge_del(struct dkwedge_info *dkw)
{
        return dkwedge_del1(dkw, 0);
}

int
dkwedge_del1(struct dkwedge_info *dkw, int flags)
{
        struct dkwedge_softc *sc = NULL;

        /* Find our softc. */
        if ((sc = dkwedge_find(dkw, NULL)) == NULL)
                return (ESRCH);

        return config_detach(sc->sc_dev, flags);
}

static int
dkwedge_cleanup_parent(struct dkwedge_softc *sc, int flags)
{
        struct disk *dk = &sc->sc_dk;
        int rc;

        rc = 0;
        mutex_enter(&dk->dk_openlock);
        if (dk->dk_openmask == 0) {
                /* nothing to do */
        } else if ((flags & DETACH_FORCE) == 0) {
                rc = EBUSY;
        }  else {
                mutex_enter(&sc->sc_parent->dk_rawlock);
                dklastclose(sc);
                mutex_exit(&sc->sc_parent->dk_rawlock);
        }
        mutex_exit(&sc->sc_dk.dk_openlock);

        return rc;
}

/*
 * dkwedge_detach:
 *
 *        Autoconfiguration detach function for pseudo-device glue.
 */
static int
dkwedge_detach(device_t self, int flags)
{
        struct dkwedge_softc *sc = NULL;
        u_int unit;
        int bmaj, cmaj, rc;

        rw_enter(&dkwedges_lock, RW_WRITER);
        for (unit = 0; unit < ndkwedges; unit++) {
                if ((sc = dkwedges[unit]) != NULL && sc->sc_dev == self)
                        break;
        }
        if (unit == ndkwedges)
                rc = ENXIO;
        else if ((rc = dkwedge_cleanup_parent(sc, flags)) == 0) {
                /* Mark the wedge as dying. */
                sc->sc_state = DKW_STATE_DYING;
        }
        rw_exit(&dkwedges_lock);

        if (rc != 0)
                return rc;

        pmf_device_deregister(self);

        /* Locate the wedge major numbers. */
        bmaj = bdevsw_lookup_major(&dk_bdevsw);
        cmaj = cdevsw_lookup_major(&dk_cdevsw);

        /* Kill any pending restart. */
        callout_stop(&sc->sc_restart_ch);

        /*
         * dkstart() will kill any queued buffers now that the
         * state of the wedge is not RUNNING.  Once we've done
         * that, wait for any other pending I/O to complete.
         */
        dkstart(sc);
        dkwedge_wait_drain(sc);

        /* Nuke the vnodes for any open instances. */
        vdevgone(bmaj, unit, unit, VBLK);
        vdevgone(cmaj, unit, unit, VCHR);

        /* Clean up the parent. */
        dkwedge_cleanup_parent(sc, flags | DETACH_FORCE);

        /* Announce our departure. */
        aprint_normal("%s at %s (%s) deleted\n", device_xname(sc->sc_dev),
            sc->sc_parent->dk_name,
            sc->sc_wname);        /* XXX Unicode */

        mutex_enter(&sc->sc_parent->dk_openlock);
        sc->sc_parent->dk_nwedges--;
        LIST_REMOVE(sc, sc_plink);
        mutex_exit(&sc->sc_parent->dk_openlock);

        /* Delete our buffer queue. */
        bufq_free(sc->sc_bufq);

        /* Detach from the disk list. */
        disk_detach(&sc->sc_dk);
        disk_destroy(&sc->sc_dk);

        /* Poof. */
        rw_enter(&dkwedges_lock, RW_WRITER);
        dkwedges[unit] = NULL;
        sc->sc_state = DKW_STATE_DEAD;
        rw_exit(&dkwedges_lock);

        mutex_destroy(&sc->sc_iolock);
        cv_destroy(&sc->sc_dkdrn);

        free(sc, M_DKWEDGE);

        return 0;
}

/*
 * dkwedge_delall:        [exported function]
 *
 *        Delete all of the wedges on the specified disk.  Used when
 *        a disk is being detached.
 */
void
dkwedge_delall(struct disk *pdk)
{
        dkwedge_delall1(pdk, false);
}

static void
dkwedge_delall1(struct disk *pdk, bool idleonly)
{
        struct dkwedge_info dkw;
        struct dkwedge_softc *sc;
        int flags;

        flags = DETACH_QUIET;
        if (!idleonly) flags |= DETACH_FORCE;

        for (;;) {
                mutex_enter(&pdk->dk_openlock);
                LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
                        if (!idleonly || sc->sc_dk.dk_openmask == 0)
                                break;
                }
                if (sc == NULL) {
                        KASSERT(idleonly || pdk->dk_nwedges == 0);
                        mutex_exit(&pdk->dk_openlock);
                        return;
                }
                strlcpy(dkw.dkw_parent, pdk->dk_name, sizeof(dkw.dkw_parent));
                strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev),
                        sizeof(dkw.dkw_devname));
                mutex_exit(&pdk->dk_openlock);
                (void) dkwedge_del1(&dkw, flags);
        }
}

/*
 * dkwedge_list:        [exported function]
 *
 *        List all of the wedges on a particular disk.
 */
int
dkwedge_list(struct disk *pdk, struct dkwedge_list *dkwl, struct lwp *l)
{
        struct uio uio;
        struct iovec iov;
        struct dkwedge_softc *sc;
        struct dkwedge_info dkw;
        int error = 0;

        iov.iov_base = dkwl->dkwl_buf;
        iov.iov_len = dkwl->dkwl_bufsize;

        uio.uio_iov = &iov;
        uio.uio_iovcnt = 1;
        uio.uio_offset = 0;
        uio.uio_resid = dkwl->dkwl_bufsize;
        uio.uio_rw = UIO_READ;
        KASSERT(l == curlwp);
        uio.uio_vmspace = l->l_proc->p_vmspace;

        dkwl->dkwl_ncopied = 0;

        mutex_enter(&pdk->dk_openlock);
        LIST_FOREACH(sc, &pdk->dk_wedges, sc_plink) {
                if (uio.uio_resid < sizeof(dkw))
                        break;

                if (sc->sc_state != DKW_STATE_RUNNING)
                        continue;

                strlcpy(dkw.dkw_devname, device_xname(sc->sc_dev),
                        sizeof(dkw.dkw_devname));
                memcpy(dkw.dkw_wname, sc->sc_wname, sizeof(dkw.dkw_wname));
                dkw.dkw_wname[sizeof(dkw.dkw_wname) - 1] = '\0';
                strlcpy(dkw.dkw_parent, sc->sc_parent->dk_name,
                    sizeof(dkw.dkw_parent));
                dkw.dkw_offset = sc->sc_offset;
                dkw.dkw_size = sc->sc_size;
                strlcpy(dkw.dkw_ptype, sc->sc_ptype, sizeof(dkw.dkw_ptype));

                error = uiomove(&dkw, sizeof(dkw), &uio);
                if (error)
                        break;
                dkwl->dkwl_ncopied++;
        }
        dkwl->dkwl_nwedges = pdk->dk_nwedges;
        mutex_exit(&pdk->dk_openlock);

        return (error);
}

device_t
dkwedge_find_by_wname(const char *wname)
{
        device_t dv = NULL;
        struct dkwedge_softc *sc;
        int i;

        rw_enter(&dkwedges_lock, RW_WRITER);
        for (i = 0; i < ndkwedges; i++) {
                if ((sc = dkwedges[i]) == NULL)
                        continue;
                if (strcmp(sc->sc_wname, wname) == 0) {
                        if (dv != NULL) {
                                printf(
                                    "WARNING: double match for wedge name %s "
                                    "(%s, %s)\n", wname, device_xname(dv),
                                    device_xname(sc->sc_dev));
                                continue;
                        }
                        dv = sc->sc_dev;
                }
        }
        rw_exit(&dkwedges_lock);
        return dv;
}

device_t
dkwedge_find_by_parent(const char *name, size_t *i)
{
        rw_enter(&dkwedges_lock, RW_WRITER);
        for (; *i < (size_t)ndkwedges; (*i)++) {
                struct dkwedge_softc *sc;
                if ((sc = dkwedges[*i]) == NULL)
                        continue;
                if (strcmp(sc->sc_parent->dk_name, name) != 0)
                        continue;
                rw_exit(&dkwedges_lock);
                return sc->sc_dev;
        }
        rw_exit(&dkwedges_lock);
        return NULL;
}

void
dkwedge_print_wnames(void)
{
        struct dkwedge_softc *sc;
        int i;

        rw_enter(&dkwedges_lock, RW_WRITER);
        for (i = 0; i < ndkwedges; i++) {
                if ((sc = dkwedges[i]) == NULL)
                        continue;
                printf(" wedge:%s", sc->sc_wname);
        }
        rw_exit(&dkwedges_lock);
}

/*
 * We need a dummy object to stuff into the dkwedge discovery method link
 * set to ensure that there is always at least one object in the set.
 */
static struct dkwedge_discovery_method dummy_discovery_method;
__link_set_add_bss(dkwedge_methods, dummy_discovery_method);

/*
 * dkwedge_init:
 *
 *        Initialize the disk wedge subsystem.
 */
void
dkwedge_init(void)
{
        __link_set_decl(dkwedge_methods, struct dkwedge_discovery_method);
        struct dkwedge_discovery_method * const *ddmp;
        struct dkwedge_discovery_method *lddm, *ddm;

        rw_init(&dkwedges_lock);
        rw_init(&dkwedge_discovery_methods_lock);

        if (config_cfdriver_attach(&dk_cd) != 0)
                panic("dkwedge: unable to attach cfdriver");
        if (config_cfattach_attach(dk_cd.cd_name, &dk_ca) != 0)
                panic("dkwedge: unable to attach cfattach");

        rw_enter(&dkwedge_discovery_methods_lock, RW_WRITER);

        LIST_INIT(&dkwedge_discovery_methods);

        __link_set_foreach(ddmp, dkwedge_methods) {
                ddm = *ddmp;
                if (ddm == &dummy_discovery_method)
                        continue;
                if (LIST_EMPTY(&dkwedge_discovery_methods)) {
                        LIST_INSERT_HEAD(&dkwedge_discovery_methods,
                                         ddm, ddm_list);
                        continue;
                }
                LIST_FOREACH(lddm, &dkwedge_discovery_methods, ddm_list) {
                        if (ddm->ddm_priority == lddm->ddm_priority) {
                                aprint_error("dk-method-%s: method \"%s\" "
                                    "already exists at priority %d\n",
                                    ddm->ddm_name, lddm->ddm_name,
                                    lddm->ddm_priority);
                                /* Not inserted. */
                                break;
                        }
                        if (ddm->ddm_priority < lddm->ddm_priority) {
                                /* Higher priority; insert before. */
                                LIST_INSERT_BEFORE(lddm, ddm, ddm_list);
                                break;
                        }
                        if (LIST_NEXT(lddm, ddm_list) == NULL) {
                                /* Last one; insert after. */
                                KASSERT(lddm->ddm_priority < ddm->ddm_priority);
                                LIST_INSERT_AFTER(lddm, ddm, ddm_list);
                                break;
                        }
                }
        }

        rw_exit(&dkwedge_discovery_methods_lock);
}

#ifdef DKWEDGE_AUTODISCOVER
int        dkwedge_autodiscover = 1;
#else
int        dkwedge_autodiscover = 0;
#endif

/*
 * dkwedge_discover:        [exported function]
 *
 *        Discover the wedges on a newly attached disk.
 *        Remove all unused wedges on the disk first.
 */
void
dkwedge_discover(struct disk *pdk)
{
        struct dkwedge_discovery_method *ddm;
        struct vnode *vp;
        int error;
        dev_t pdev;

        /*
         * Require people playing with wedges to enable this explicitly.
         */
        if (dkwedge_autodiscover == 0)
                return;

        rw_enter(&dkwedge_discovery_methods_lock, RW_READER);

        /*
         * Use the character device for scanning, the block device
         * is busy if there are already wedges attached.
         */
        error = dkwedge_compute_pdev(pdk->dk_name, &pdev, VCHR);
        if (error) {
                aprint_error("%s: unable to compute pdev, error = %d\n",
                    pdk->dk_name, error);
                goto out;
        }

        error = cdevvp(pdev, &vp);
        if (error) {
                aprint_error("%s: unable to find vnode for pdev, error = %d\n",
                    pdk->dk_name, error);
                goto out;
        }

        error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        if (error) {
                aprint_error("%s: unable to lock vnode for pdev, error = %d\n",
                    pdk->dk_name, error);
                vrele(vp);
                goto out;
        }

        error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
        if (error) {
                if (error != ENODEV)
                        aprint_error("%s: unable to open device, error = %d\n",
                            pdk->dk_name, error);
                vput(vp);
                goto out;
        }
        VOP_UNLOCK(vp);

        /*
         * Remove unused wedges
         */
        dkwedge_delall1(pdk, true);

        /*
         * For each supported partition map type, look to see if
         * this map type exists.  If so, parse it and add the
         * corresponding wedges.
         */
        LIST_FOREACH(ddm, &dkwedge_discovery_methods, ddm_list) {
                error = (*ddm->ddm_discover)(pdk, vp);
                if (error == 0) {
                        /* Successfully created wedges; we're done. */
                        break;
                }
        }

        error = vn_close(vp, FREAD, NOCRED);
        if (error) {
                aprint_error("%s: unable to close device, error = %d\n",
                    pdk->dk_name, error);
                /* We'll just assume the vnode has been cleaned up. */
        }

 out:
        rw_exit(&dkwedge_discovery_methods_lock);
}

/*
 * dkwedge_read:
 *
 *        Read some data from the specified disk, used for
 *        partition discovery.
 */
int
dkwedge_read(struct disk *pdk, struct vnode *vp, daddr_t blkno,
    void *tbuf, size_t len)
{
        buf_t *bp;
        int error;
        bool isopen;
        dev_t bdev;
        struct vnode *bdvp;

        /*
         * The kernel cannot read from a character device vnode
         * as physio() only handles user memory.
         *
         * If the block device has already been opened by a wedge
         * use that vnode and temporarily bump the open counter.
         *
         * Otherwise try to open the block device.
         */

        bdev = devsw_chr2blk(vp->v_rdev);

        mutex_enter(&pdk->dk_rawlock);
        if (pdk->dk_rawopens != 0) {
                KASSERT(pdk->dk_rawvp != NULL);
                isopen = true;
                ++pdk->dk_rawopens;
                bdvp = pdk->dk_rawvp;
                error = 0;
        } else {
                isopen = false;
                error = dk_open_parent(bdev, FREAD, &bdvp);
        }
        mutex_exit(&pdk->dk_rawlock);

        if (error)
                return error;

        bp = getiobuf(bdvp, true);
        bp->b_flags = B_READ;
        bp->b_cflags = BC_BUSY;
        bp->b_dev = bdev;
        bp->b_data = tbuf;
        bp->b_bufsize = bp->b_bcount = len;
        bp->b_blkno = blkno;
        bp->b_cylinder = 0;
        bp->b_error = 0;

        VOP_STRATEGY(bdvp, bp);
        error = biowait(bp);
        putiobuf(bp);

        mutex_enter(&pdk->dk_rawlock);
        if (isopen) {
                --pdk->dk_rawopens;
        } else {
                dk_close_parent(bdvp, FREAD);
        }
        mutex_exit(&pdk->dk_rawlock);

        return error;
}

/*
 * dkwedge_lookup:
 *
 *        Look up a dkwedge_softc based on the provided dev_t.
 */
static struct dkwedge_softc *
dkwedge_lookup(dev_t dev)
{
        int unit = minor(dev);

        if (unit >= ndkwedges)
                return (NULL);

        KASSERT(dkwedges != NULL);

        return (dkwedges[unit]);
}

static int
dk_open_parent(dev_t dev, int mode, struct vnode **vpp)
{
        struct vnode *vp;
        int error;

        error = bdevvp(dev, &vp);
        if (error)
                return error;

        error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        if (error) {
                vrele(vp);
                return error;
        }
        error = VOP_OPEN(vp, mode, NOCRED);
        if (error) {
                vput(vp);
                return error;
        }

        /* VOP_OPEN() doesn't do this for us. */
        if (mode & FWRITE) {
                mutex_enter(vp->v_interlock);
                vp->v_writecount++;
                mutex_exit(vp->v_interlock);
        }

        VOP_UNLOCK(vp);

        *vpp = vp;

        return 0;
}

static int
dk_close_parent(struct vnode *vp, int mode)
{
        int error;

        error = vn_close(vp, mode, NOCRED);
        return error;
}

/*
 * dkopen:                [devsw entry point]
 *
 *        Open a wedge.
 */
static int
dkopen(dev_t dev, int flags, int fmt, struct lwp *l)
{
        struct dkwedge_softc *sc = dkwedge_lookup(dev);
        int error = 0;

        if (sc == NULL)
                return (ENODEV);
        if (sc->sc_state != DKW_STATE_RUNNING)
                return (ENXIO);

        /*
         * We go through a complicated little dance to only open the parent
         * vnode once per wedge, no matter how many times the wedge is
         * opened.  The reason?  We see one dkopen() per open call, but
         * only dkclose() on the last close.
         */
        mutex_enter(&sc->sc_dk.dk_openlock);
        mutex_enter(&sc->sc_parent->dk_rawlock);
        if (sc->sc_dk.dk_openmask == 0) {
                error = dkfirstopen(sc, flags);
                if (error)
                        goto popen_fail;
        }
        KASSERT(sc->sc_mode != 0);
        if (flags & ~sc->sc_mode & FWRITE) {
                error = EROFS;
                goto popen_fail;
        }
        if (fmt == S_IFCHR)
                sc->sc_dk.dk_copenmask |= 1;
        else
                sc->sc_dk.dk_bopenmask |= 1;
        sc->sc_dk.dk_openmask =
            sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;

 popen_fail:
        mutex_exit(&sc->sc_parent->dk_rawlock);
        mutex_exit(&sc->sc_dk.dk_openlock);
        return (error);
}

static int
dkfirstopen(struct dkwedge_softc *sc, int flags)
{
        struct dkwedge_softc *nsc;
        struct vnode *vp;
        int mode;
        int error;

        KASSERT(mutex_owned(&sc->sc_dk.dk_openlock));
        KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock));

        if (sc->sc_parent->dk_rawopens == 0) {
                KASSERT(sc->sc_parent->dk_rawvp == NULL);
                /*
                 * Try open read-write. If this fails for EROFS
                 * and wedge is read-only, retry to open read-only.
                 */
                mode = FREAD | FWRITE;
                error = dk_open_parent(sc->sc_pdev, mode, &vp);
                if (error == EROFS && (flags & FWRITE) == 0) {
                        mode &= ~FWRITE;
                        error = dk_open_parent(sc->sc_pdev, mode, &vp);
                }
                if (error)
                        return error;
                sc->sc_parent->dk_rawvp = vp;
        } else {
                /*
                 * Retrieve mode from an already opened wedge.
                 */
                mode = 0;
                LIST_FOREACH(nsc, &sc->sc_parent->dk_wedges, sc_plink) {
                        if (nsc == sc || nsc->sc_dk.dk_openmask == 0)
                                continue;
                        mode = nsc->sc_mode;
                        break;
                }
        }
        sc->sc_mode = mode;
        sc->sc_parent->dk_rawopens++;

        return 0;
}

static void
dklastclose(struct dkwedge_softc *sc)
{

        KASSERT(mutex_owned(&sc->sc_dk.dk_openlock));
        KASSERT(mutex_owned(&sc->sc_parent->dk_rawlock));
        KASSERT(sc->sc_parent->dk_rawopens > 0);
        KASSERT(sc->sc_parent->dk_rawvp != NULL);

        if (--sc->sc_parent->dk_rawopens == 0) {
                struct vnode *const vp = sc->sc_parent->dk_rawvp;
                const int mode = sc->sc_mode;

                sc->sc_parent->dk_rawvp = NULL;
                sc->sc_mode = 0;

                dk_close_parent(vp, mode);
        }
}

/*
 * dkclose:                [devsw entry point]
 *
 *        Close a wedge.
 */
static int
dkclose(dev_t dev, int flags, int fmt, struct lwp *l)
{
        struct dkwedge_softc *sc = dkwedge_lookup(dev);

        if (sc == NULL)
                return ENODEV;
        if (sc->sc_state != DKW_STATE_RUNNING)
                return ENXIO;

        mutex_enter(&sc->sc_dk.dk_openlock);
        mutex_enter(&sc->sc_parent->dk_rawlock);

        KASSERT(sc->sc_dk.dk_openmask != 0);

        if (fmt == S_IFCHR)
                sc->sc_dk.dk_copenmask &= ~1;
        else
                sc->sc_dk.dk_bopenmask &= ~1;
        sc->sc_dk.dk_openmask =
            sc->sc_dk.dk_copenmask | sc->sc_dk.dk_bopenmask;

        if (sc->sc_dk.dk_openmask == 0) {
                dklastclose(sc);
        }

        mutex_exit(&sc->sc_parent->dk_rawlock);
        mutex_exit(&sc->sc_dk.dk_openlock);

        return 0;
}

/*
 * dkstragegy:                [devsw entry point]
 *
 *        Perform I/O based on the wedge I/O strategy.
 */
static void
dkstrategy(struct buf *bp)
{
        struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
        uint64_t p_size, p_offset;

        if (sc == NULL) {
                bp->b_error = ENODEV;
                goto done;
        }

        if (sc->sc_state != DKW_STATE_RUNNING ||
            sc->sc_parent->dk_rawvp == NULL) {
                bp->b_error = ENXIO;
                goto done;
        }

        /* If it's an empty transfer, wake up the top half now. */
        if (bp->b_bcount == 0)
                goto done;

        p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
        p_size   = sc->sc_size << sc->sc_parent->dk_blkshift;

        /* Make sure it's in-range. */
        if (bounds_check_with_mediasize(bp, DEV_BSIZE, p_size) <= 0)
                goto done;

        /* Translate it to the parent's raw LBA. */
        bp->b_rawblkno = bp->b_blkno + p_offset;

        /* Place it in the queue and start I/O on the unit. */
        mutex_enter(&sc->sc_iolock);
        sc->sc_iopend++;
        disk_wait(&sc->sc_dk);
        bufq_put(sc->sc_bufq, bp);
        mutex_exit(&sc->sc_iolock);

        dkstart(sc);
        return;

 done:
        bp->b_resid = bp->b_bcount;
        biodone(bp);
}

/*
 * dkstart:
 *
 *        Start I/O that has been enqueued on the wedge.
 */
static void
dkstart(struct dkwedge_softc *sc)
{
        struct vnode *vp;
        struct buf *bp, *nbp;

        mutex_enter(&sc->sc_iolock);

        /* Do as much work as has been enqueued. */
        while ((bp = bufq_peek(sc->sc_bufq)) != NULL) {
                if (sc->sc_state != DKW_STATE_RUNNING) {
                        (void) bufq_get(sc->sc_bufq);
                        if (--sc->sc_iopend == 0)
                                cv_broadcast(&sc->sc_dkdrn);
                        mutex_exit(&sc->sc_iolock);
                        bp->b_error = ENXIO;
                        bp->b_resid = bp->b_bcount;
                        biodone(bp);
                        mutex_enter(&sc->sc_iolock);
                        continue;
                }

                /* fetch an I/O buf with sc_iolock dropped */
                mutex_exit(&sc->sc_iolock);
                nbp = getiobuf(sc->sc_parent->dk_rawvp, false);
                mutex_enter(&sc->sc_iolock);
                if (nbp == NULL) {
                        /*
                         * No resources to run this request; leave the
                         * buffer queued up, and schedule a timer to
                         * restart the queue in 1/2 a second.
                         */
                        callout_schedule(&sc->sc_restart_ch, hz / 2);
                        break;
                }

                /*
                 * fetch buf, this can fail if another thread
                 * has already processed the queue, it can also
                 * return a completely different buf.
                 */
                bp = bufq_get(sc->sc_bufq);
                if (bp == NULL) {
                        mutex_exit(&sc->sc_iolock);
                        putiobuf(nbp);
                        mutex_enter(&sc->sc_iolock);
                        continue;
                }

                /* Instrumentation. */
                disk_busy(&sc->sc_dk);

                /* release lock for VOP_STRATEGY */
                mutex_exit(&sc->sc_iolock);

                nbp->b_data = bp->b_data;
                nbp->b_flags = bp->b_flags;
                nbp->b_oflags = bp->b_oflags;
                nbp->b_cflags = bp->b_cflags;
                nbp->b_iodone = dkiodone;
                nbp->b_proc = bp->b_proc;
                nbp->b_blkno = bp->b_rawblkno;
                nbp->b_dev = sc->sc_parent->dk_rawvp->v_rdev;
                nbp->b_bcount = bp->b_bcount;
                nbp->b_private = bp;
                BIO_COPYPRIO(nbp, bp);

                vp = nbp->b_vp;
                if ((nbp->b_flags & B_READ) == 0) {
                        mutex_enter(vp->v_interlock);
                        vp->v_numoutput++;
                        mutex_exit(vp->v_interlock);
                }
                VOP_STRATEGY(vp, nbp);

                mutex_enter(&sc->sc_iolock);
        }

        mutex_exit(&sc->sc_iolock);
}

/*
 * dkiodone:
 *
 *        I/O to a wedge has completed; alert the top half.
 */
static void
dkiodone(struct buf *bp)
{
        struct buf *obp = bp->b_private;
        struct dkwedge_softc *sc = dkwedge_lookup(obp->b_dev);

        if (bp->b_error != 0)
                obp->b_error = bp->b_error;
        obp->b_resid = bp->b_resid;
        putiobuf(bp);

        mutex_enter(&sc->sc_iolock);
        if (--sc->sc_iopend == 0)
                cv_broadcast(&sc->sc_dkdrn);

        disk_unbusy(&sc->sc_dk, obp->b_bcount - obp->b_resid,
            obp->b_flags & B_READ);
        mutex_exit(&sc->sc_iolock);

        biodone(obp);

        /* Kick the queue in case there is more work we can do. */
        dkstart(sc);
}

/*
 * dkrestart:
 *
 *        Restart the work queue after it was stalled due to
 *        a resource shortage.  Invoked via a callout.
 */
static void
dkrestart(void *v)
{
        struct dkwedge_softc *sc = v;

        dkstart(sc);
}

/*
 * dkminphys:
 *
 *        Call parent's minphys function.
 */
static void
dkminphys(struct buf *bp)
{
        struct dkwedge_softc *sc = dkwedge_lookup(bp->b_dev);
        dev_t dev;

        dev = bp->b_dev;
        bp->b_dev = sc->sc_pdev;
        if (sc->sc_parent->dk_driver && sc->sc_parent->dk_driver->d_minphys)
                (*sc->sc_parent->dk_driver->d_minphys)(bp);
        else
                minphys(bp);
        bp->b_dev = dev;
}

/*
 * dkread:                [devsw entry point]
 *
 *        Read from a wedge.
 */
static int
dkread(dev_t dev, struct uio *uio, int flags)
{
        struct dkwedge_softc *sc = dkwedge_lookup(dev);

        if (sc == NULL)
                return (ENODEV);
        if (sc->sc_state != DKW_STATE_RUNNING)
                return (ENXIO);

        return (physio(dkstrategy, NULL, dev, B_READ, dkminphys, uio));
}

/*
 * dkwrite:                [devsw entry point]
 *
 *        Write to a wedge.
 */
static int
dkwrite(dev_t dev, struct uio *uio, int flags)
{
        struct dkwedge_softc *sc = dkwedge_lookup(dev);

        if (sc == NULL)
                return (ENODEV);
        if (sc->sc_state != DKW_STATE_RUNNING)
                return (ENXIO);

        return (physio(dkstrategy, NULL, dev, B_WRITE, dkminphys, uio));
}

/*
 * dkioctl:                [devsw entry point]
 *
 *        Perform an ioctl request on a wedge.
 */
static int
dkioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        struct dkwedge_softc *sc = dkwedge_lookup(dev);
        int error = 0;

        if (sc == NULL)
                return (ENODEV);
        if (sc->sc_state != DKW_STATE_RUNNING)
                return (ENXIO);
        if (sc->sc_parent->dk_rawvp == NULL)
                return (ENXIO);

        /*
         * We pass NODEV instead of our device to indicate we don't
         * want to handle disklabel ioctls
         */
        error = disk_ioctl(&sc->sc_dk, NODEV, cmd, data, flag, l);
        if (error != EPASSTHROUGH)
                return (error);

        error = 0;

        switch (cmd) {
        case DIOCGSTRATEGY:
        case DIOCGCACHE:
        case DIOCCACHESYNC:
                error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, data, flag,
                                          l != NULL ? l->l_cred : NOCRED);
                break;
        case DIOCGWEDGEINFO:
            {
                struct dkwedge_info *dkw = (void *) data;

                strlcpy(dkw->dkw_devname, device_xname(sc->sc_dev),
                        sizeof(dkw->dkw_devname));
                    memcpy(dkw->dkw_wname, sc->sc_wname, sizeof(dkw->dkw_wname));
                dkw->dkw_wname[sizeof(dkw->dkw_wname) - 1] = '\0';
                strlcpy(dkw->dkw_parent, sc->sc_parent->dk_name,
                    sizeof(dkw->dkw_parent));
                dkw->dkw_offset = sc->sc_offset;
                dkw->dkw_size = sc->sc_size;
                strlcpy(dkw->dkw_ptype, sc->sc_ptype, sizeof(dkw->dkw_ptype));

                break;
            }
        case DIOCGSECTORALIGN:
            {
                struct disk_sectoralign *dsa = data;
                uint32_t r;

                error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, dsa, flag,
                    l != NULL ? l->l_cred : NOCRED);
                if (error)
                        break;

                r = sc->sc_offset % dsa->dsa_alignment;
                if (r < dsa->dsa_firstaligned)
                        dsa->dsa_firstaligned = dsa->dsa_firstaligned - r;
                else
                        dsa->dsa_firstaligned = (dsa->dsa_firstaligned +
                            dsa->dsa_alignment) - r;
                break;
            }
        default:
                error = ENOTTY;
        }

        return (error);
}

/*
 * dkdiscard:                [devsw entry point]
 *
 *        Perform a discard-range request on a wedge.
 */
static int
dkdiscard(dev_t dev, off_t pos, off_t len)
{
        struct dkwedge_softc *sc = dkwedge_lookup(dev);
        unsigned shift;
        off_t offset, maxlen;
        int error;

        if (sc == NULL)
                return (ENODEV);
        if (sc->sc_state != DKW_STATE_RUNNING)
                return (ENXIO);
        if (sc->sc_parent->dk_rawvp == NULL)
                return (ENXIO);

        shift = (sc->sc_parent->dk_blkshift + DEV_BSHIFT);
        KASSERT(__type_fit(off_t, sc->sc_size));
        KASSERT(__type_fit(off_t, sc->sc_offset));
        KASSERT(0 <= sc->sc_offset);
        KASSERT(sc->sc_size <= (__type_max(off_t) >> shift));
        KASSERT(sc->sc_offset <= ((__type_max(off_t) >> shift) - sc->sc_size));
        offset = ((off_t)sc->sc_offset << shift);
        maxlen = ((off_t)sc->sc_size << shift);

        if (len > maxlen)
                return (EINVAL);
        if (pos > (maxlen - len))
                return (EINVAL);

        pos += offset;

        vn_lock(sc->sc_parent->dk_rawvp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_FDISCARD(sc->sc_parent->dk_rawvp, pos, len);
        VOP_UNLOCK(sc->sc_parent->dk_rawvp);

        return error;
}

/*
 * dksize:                [devsw entry point]
 *
 *        Query the size of a wedge for the purpose of performing a dump
 *        or for swapping to.
 */
static int
dksize(dev_t dev)
{
        struct dkwedge_softc *sc = dkwedge_lookup(dev);
        uint64_t p_size;
        int rv = -1;

        if (sc == NULL)
                return (-1);
        if (sc->sc_state != DKW_STATE_RUNNING)
                return (-1);

        mutex_enter(&sc->sc_dk.dk_openlock);
        mutex_enter(&sc->sc_parent->dk_rawlock);

        /* Our content type is static, no need to open the device. */

        p_size   = sc->sc_size << sc->sc_parent->dk_blkshift;
        if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) == 0) {
                /* Saturate if we are larger than INT_MAX. */
                if (p_size > INT_MAX)
                        rv = INT_MAX;
                else
                        rv = (int) p_size;
        }

        mutex_exit(&sc->sc_parent->dk_rawlock);
        mutex_exit(&sc->sc_dk.dk_openlock);

        return (rv);
}

/*
 * dkdump:                [devsw entry point]
 *
 *        Perform a crash dump to a wedge.
 */
static int
dkdump(dev_t dev, daddr_t blkno, void *va, size_t size)
{
        struct dkwedge_softc *sc = dkwedge_lookup(dev);
        const struct bdevsw *bdev;
        uint64_t p_size, p_offset;
        int rv = 0;

        if (sc == NULL)
                return (ENODEV);
        if (sc->sc_state != DKW_STATE_RUNNING)
                return (ENXIO);

        mutex_enter(&sc->sc_dk.dk_openlock);
        mutex_enter(&sc->sc_parent->dk_rawlock);

        /* Our content type is static, no need to open the device. */

        if (strcmp(sc->sc_ptype, DKW_PTYPE_SWAP) != 0 &&
            strcmp(sc->sc_ptype, DKW_PTYPE_RAID) != 0 &&
            strcmp(sc->sc_ptype, DKW_PTYPE_CGD) != 0) {
                rv = ENXIO;
                goto out;
        }
        if (size % DEV_BSIZE != 0) {
                rv = EINVAL;
                goto out;
        }

        p_offset = sc->sc_offset << sc->sc_parent->dk_blkshift;
        p_size   = sc->sc_size << sc->sc_parent->dk_blkshift;

        if (blkno < 0 || blkno + size / DEV_BSIZE > p_size) {
                printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
                    "p_size (%" PRIu64 ")\n", __func__, blkno,
                    size / DEV_BSIZE, p_size);
                rv = EINVAL;
                goto out;
        }

        bdev = bdevsw_lookup(sc->sc_pdev);
        rv = (*bdev->d_dump)(sc->sc_pdev, blkno + p_offset, va, size);

out:
        mutex_exit(&sc->sc_parent->dk_rawlock);
        mutex_exit(&sc->sc_dk.dk_openlock);

        return rv;
}

/*
 * config glue
 */

/*
 * dkwedge_find_partition
 *
 *        Find wedge corresponding to the specified parent name
 *        and offset/length.
 */
device_t
dkwedge_find_partition(device_t parent, daddr_t startblk, uint64_t nblks)
{
        struct dkwedge_softc *sc;
        int i;
        device_t wedge = NULL;

        rw_enter(&dkwedges_lock, RW_READER);
        for (i = 0; i < ndkwedges; i++) {
                if ((sc = dkwedges[i]) == NULL)
                        continue;
                if (strcmp(sc->sc_parent->dk_name, device_xname(parent)) == 0 &&
                    sc->sc_offset == startblk &&
                    sc->sc_size == nblks) {
                        if (wedge) {
                                printf("WARNING: double match for boot wedge "
                                    "(%s, %s)\n",
                                    device_xname(wedge),
                                    device_xname(sc->sc_dev));
                                continue;
                        }
                        wedge = sc->sc_dev;
                }
        }
        rw_exit(&dkwedges_lock);

        return wedge;
}

const char *
dkwedge_get_parent_name(dev_t dev)
{
        /* XXX: perhaps do this in lookup? */
        int bmaj = bdevsw_lookup_major(&dk_bdevsw);
        int cmaj = cdevsw_lookup_major(&dk_cdevsw);
        if (major(dev) != bmaj && major(dev) != cmaj)
                return NULL;
        struct dkwedge_softc *sc = dkwedge_lookup(dev);
        if (sc == NULL)
                return NULL;
        return sc->sc_parent->dk_name;
}












































































    8 
























   73 
   74 

   74 



   74 







   74 





















 2027 

 2028 


 2026 















   21 
















 4552 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
/*        $NetBSD: subr_pserialize.c,v 1.18 2021/10/10 11:20:46 riastradh Exp $        */

/*-
 * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Passive serialization.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_pserialize.c,v 1.18 2021/10/10 11:20:46 riastradh Exp $");

#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/cpu.h>
#include <sys/evcnt.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
#include <sys/pserialize.h>
#include <sys/xcall.h>

struct pserialize {
        char                        psz_dummy;
};

static kmutex_t                        psz_lock        __cacheline_aligned;
static struct evcnt                psz_ev_excl        __cacheline_aligned =
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pserialize", "exclusive access");
EVCNT_ATTACH_STATIC(psz_ev_excl);

/*
 * pserialize_init:
 *
 *        Initialize passive serialization structures.
 */
void
pserialize_init(void)
{

        mutex_init(&psz_lock, MUTEX_DEFAULT, IPL_NONE);
}

/*
 * pserialize_create:
 *
 *        Create and initialize a passive serialization object.
 */
pserialize_t
pserialize_create(void)
{
        pserialize_t psz;

        psz = kmem_zalloc(sizeof(*psz), KM_SLEEP);
        return psz;
}

/*
 * pserialize_destroy:
 *
 *        Destroy a passive serialization object.
 */
void
pserialize_destroy(pserialize_t psz)
{

        kmem_free(psz, sizeof(*psz));
}

/*
 * pserialize_perform:
 *
 *        Perform the write side of passive serialization.
 */
void
pserialize_perform(pserialize_t psz)
{

        KASSERT(!cpu_intr_p());
        KASSERT(!cpu_softintr_p());

        if (__predict_false(panicstr != NULL)) {
                return;
        }

        if (__predict_false(mp_online == false)) {
                psz_ev_excl.ev_count++;
                return;
        }

        /*
         * Broadcast a NOP to all CPUs and wait until all of them complete.
         */
        xc_barrier(XC_HIGHPRI);

        mutex_enter(&psz_lock);
        psz_ev_excl.ev_count++;
        mutex_exit(&psz_lock);
}

int
pserialize_read_enter(void)
{
        int s;

        s = splsoftserial();
        curcpu()->ci_psz_read_depth++;
        __insn_barrier();
        return s;
}

void
pserialize_read_exit(int s)
{

        KASSERT(kpreempt_disabled());

        __insn_barrier();
        if (__predict_false(curcpu()->ci_psz_read_depth-- == 0))
                panic("mismatching pserialize_read_exit()");
        splx(s);
}

/*
 * pserialize_in_read_section:
 *
 *        True if the caller is in a pserialize read section.  To be used
 *        only for diagnostic assertions where we want to guarantee the
 *        condition like:
 *
 *                KASSERT(pserialize_in_read_section());
 */
bool
pserialize_in_read_section(void)
{

        return kpreempt_disabled() && curcpu()->ci_psz_read_depth > 0;
}

/*
 * pserialize_not_in_read_section:
 *
 *        True if the caller is not in a pserialize read section.  To be
 *        used only for diagnostic assertions where we want to guarantee
 *        the condition like:
 *
 *                KASSERT(pserialize_not_in_read_section());
 */
bool
pserialize_not_in_read_section(void)
{
        bool notin;

        kpreempt_disable();
        notin = (curcpu()->ci_psz_read_depth == 0);
        kpreempt_enable();

        return notin;
}
















































































































































































































































 4409 






















 4359 































































































 1623 

  105 



 1621 
 1622 



   94 
   95 
   95 







 1395 










  775 


  775 
  773 




  777 
  775 
















 1107 

 1107 
 1106 






 1107 


 1107 

  992 







































 3803 
 3801 

 3792 
 3794 






  542 










  543 

  543 



  542 

  543 
  542 






  543 






 4424 
 4426 
 4435 

 4427 
 4418 
 4417 


 4422 







 4413 











  633 


 1106 








 1108 
  991 



  992 



  975 
  975 





  329 










































































































  304 





  301 

   37 





  301 

  301 


  296 





 4412 





 4416 
 4539 













 4495 




 3785 
 3764 

 3681 









 4362 








 4359 
 4375 

















  573 






















































 4276 

 4280 
 4137 

















  301 
  300 







































  150 

  150 















  419 
  405 

  407 
  407 
  406 












   22 
   22 
   22 





  416 


















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
/*        $NetBSD: kern_mutex.c,v 1.99 2022/04/09 23:46:10 riastradh Exp $        */

/*-
 * Copyright (c) 2002, 2006, 2007, 2008, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe and Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Kernel mutex implementation, modeled after those found in Solaris,
 * a description of which can be found in:
 *
 *        Solaris Internals: Core Kernel Architecture, Jim Mauro and
 *            Richard McDougall.
 */

#define        __MUTEX_PRIVATE

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.99 2022/04/09 23:46:10 riastradh Exp $");

#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/proc.h>
#include <sys/mutex.h>
#include <sys/sched.h>
#include <sys/sleepq.h>
#include <sys/systm.h>
#include <sys/lockdebug.h>
#include <sys/kernel.h>
#include <sys/intr.h>
#include <sys/lock.h>
#include <sys/types.h>
#include <sys/cpu.h>
#include <sys/pserialize.h>

#include <dev/lockstat.h>

#include <machine/lock.h>

/*
 * When not running a debug kernel, spin mutexes are not much
 * more than an splraiseipl() and splx() pair.
 */

#if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
#define        FULL
#endif

/*
 * Debugging support.
 */

#define        MUTEX_WANTLOCK(mtx)                                        \
    LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx),                \
        (uintptr_t)__builtin_return_address(0), 0)
#define        MUTEX_TESTLOCK(mtx)                                        \
    LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx),                \
        (uintptr_t)__builtin_return_address(0), -1)
#define        MUTEX_LOCKED(mtx)                                        \
    LOCKDEBUG_LOCKED(MUTEX_DEBUG_P(mtx), (mtx), NULL,                \
        (uintptr_t)__builtin_return_address(0), 0)
#define        MUTEX_UNLOCKED(mtx)                                        \
    LOCKDEBUG_UNLOCKED(MUTEX_DEBUG_P(mtx), (mtx),                \
        (uintptr_t)__builtin_return_address(0), 0)
#define        MUTEX_ABORT(mtx, msg)                                        \
    mutex_abort(__func__, __LINE__, mtx, msg)

#if defined(LOCKDEBUG)

#define        MUTEX_DASSERT(mtx, cond)                                \
do {                                                                \
        if (__predict_false(!(cond)))                                \
                MUTEX_ABORT(mtx, "assertion failed: " #cond);        \
} while (/* CONSTCOND */ 0)

#else        /* LOCKDEBUG */

#define        MUTEX_DASSERT(mtx, cond)        /* nothing */

#endif /* LOCKDEBUG */

#if defined(DIAGNOSTIC)

#define        MUTEX_ASSERT(mtx, cond)                                        \
do {                                                                \
        if (__predict_false(!(cond)))                                \
                MUTEX_ABORT(mtx, "assertion failed: " #cond);        \
} while (/* CONSTCOND */ 0)

#else        /* DIAGNOSTIC */

#define        MUTEX_ASSERT(mtx, cond)        /* nothing */

#endif        /* DIAGNOSTIC */

/*
 * Some architectures can't use __cpu_simple_lock as is so allow a way
 * for them to use an alternate definition.
 */
#ifndef MUTEX_SPINBIT_LOCK_INIT
#define MUTEX_SPINBIT_LOCK_INIT(mtx)        __cpu_simple_lock_init(&(mtx)->mtx_lock)
#endif
#ifndef MUTEX_SPINBIT_LOCKED_P
#define MUTEX_SPINBIT_LOCKED_P(mtx)        __SIMPLELOCK_LOCKED_P(&(mtx)->mtx_lock)
#endif
#ifndef MUTEX_SPINBIT_LOCK_TRY
#define MUTEX_SPINBIT_LOCK_TRY(mtx)        __cpu_simple_lock_try(&(mtx)->mtx_lock)
#endif
#ifndef MUTEX_SPINBIT_LOCK_UNLOCK
#define MUTEX_SPINBIT_LOCK_UNLOCK(mtx)        __cpu_simple_unlock(&(mtx)->mtx_lock)
#endif

#ifndef MUTEX_INITIALIZE_SPIN_IPL
#define MUTEX_INITIALIZE_SPIN_IPL(mtx, ipl) \
                                        ((mtx)->mtx_ipl = makeiplcookie((ipl)))
#endif

/*
 * Spin mutex SPL save / restore.
 */

#define        MUTEX_SPIN_SPLRAISE(mtx)                                        \
do {                                                                        \
        struct cpu_info *x__ci;                                                \
        int x__cnt, s;                                                        \
        s = splraiseipl(MUTEX_SPIN_IPL(mtx));                                \
        x__ci = curcpu();                                                \
        x__cnt = x__ci->ci_mtx_count--;                                        \
        __insn_barrier();                                                \
        if (x__cnt == 0)                                                \
                x__ci->ci_mtx_oldspl = (s);                                \
} while (/* CONSTCOND */ 0)

#define        MUTEX_SPIN_SPLRESTORE(mtx)                                        \
do {                                                                        \
        struct cpu_info *x__ci = curcpu();                                \
        int s = x__ci->ci_mtx_oldspl;                                        \
        __insn_barrier();                                                \
        if (++(x__ci->ci_mtx_count) == 0)                                \
                splx(s);                                                \
} while (/* CONSTCOND */ 0)

/*
 * Memory barriers.
 */
#ifdef __HAVE_ATOMIC_AS_MEMBAR
#define        MUTEX_MEMBAR_ENTER()
#define        MUTEX_MEMBAR_ACQUIRE()
#define        MUTEX_MEMBAR_RELEASE()
#else
#define        MUTEX_MEMBAR_ENTER()                membar_enter()
#define        MUTEX_MEMBAR_ACQUIRE()                membar_acquire()
#define        MUTEX_MEMBAR_RELEASE()                membar_release()
#endif

/*
 * For architectures that provide 'simple' mutexes: they provide a
 * CAS function that is either MP-safe, or does not need to be MP
 * safe.  Adaptive mutexes on these architectures do not require an
 * additional interlock.
 */

#ifdef __HAVE_SIMPLE_MUTEXES

#define        MUTEX_OWNER(owner)                                                \
        (owner & MUTEX_THREAD)
#define        MUTEX_HAS_WAITERS(mtx)                                                \
        (((int)(mtx)->mtx_owner & MUTEX_BIT_WAITERS) != 0)

#define        MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug)                                \
do {                                                                        \
        if (!dodebug)                                                        \
                (mtx)->mtx_owner |= MUTEX_BIT_NODEBUG;                        \
} while (/* CONSTCOND */ 0)

#define        MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl)                        \
do {                                                                        \
        (mtx)->mtx_owner = MUTEX_BIT_SPIN;                                \
        if (!dodebug)                                                        \
                (mtx)->mtx_owner |= MUTEX_BIT_NODEBUG;                        \
        MUTEX_INITIALIZE_SPIN_IPL((mtx), (ipl));                        \
        MUTEX_SPINBIT_LOCK_INIT((mtx));                                        \
} while (/* CONSTCOND */ 0)

#define        MUTEX_DESTROY(mtx)                                                \
do {                                                                        \
        (mtx)->mtx_owner = MUTEX_THREAD;                                \
} while (/* CONSTCOND */ 0)

#define        MUTEX_SPIN_P(owner)                \
    (((owner) & MUTEX_BIT_SPIN) != 0)
#define        MUTEX_ADAPTIVE_P(owner)                \
    (((owner) & MUTEX_BIT_SPIN) == 0)

#ifndef MUTEX_CAS
#define        MUTEX_CAS(p, o, n)                \
        (atomic_cas_ulong((volatile unsigned long *)(p), (o), (n)) == (o))
#endif /* MUTEX_CAS */

#define        MUTEX_DEBUG_P(mtx)        (((mtx)->mtx_owner & MUTEX_BIT_NODEBUG) == 0)
#if defined(LOCKDEBUG)
#define        MUTEX_OWNED(owner)                (((owner) & ~MUTEX_BIT_NODEBUG) != 0)
#define        MUTEX_INHERITDEBUG(n, o)        (n) |= (o) & MUTEX_BIT_NODEBUG
#else /* defined(LOCKDEBUG) */
#define        MUTEX_OWNED(owner)                ((owner) != 0)
#define        MUTEX_INHERITDEBUG(n, o)        /* nothing */
#endif /* defined(LOCKDEBUG) */

static inline int
MUTEX_ACQUIRE(kmutex_t *mtx, uintptr_t curthread)
{
        int rv;
        uintptr_t oldown = 0;
        uintptr_t newown = curthread;

        MUTEX_INHERITDEBUG(oldown, mtx->mtx_owner);
        MUTEX_INHERITDEBUG(newown, oldown);
        rv = MUTEX_CAS(&mtx->mtx_owner, oldown, newown);
        MUTEX_MEMBAR_ACQUIRE();
        return rv;
}

static inline int
MUTEX_SET_WAITERS(kmutex_t *mtx, uintptr_t owner)
{
        int rv;
        rv = MUTEX_CAS(&mtx->mtx_owner, owner, owner | MUTEX_BIT_WAITERS);
        MUTEX_MEMBAR_ENTER();
        return rv;
}

static inline void
MUTEX_RELEASE(kmutex_t *mtx)
{
        uintptr_t newown;

        MUTEX_MEMBAR_RELEASE();
        newown = 0;
        MUTEX_INHERITDEBUG(newown, mtx->mtx_owner);
        mtx->mtx_owner = newown;
}
#endif        /* __HAVE_SIMPLE_MUTEXES */

/*
 * Patch in stubs via strong alias where they are not available.
 */

#if defined(LOCKDEBUG)
#undef        __HAVE_MUTEX_STUBS
#undef        __HAVE_SPIN_MUTEX_STUBS
#endif

#ifndef __HAVE_MUTEX_STUBS
__strong_alias(mutex_enter,mutex_vector_enter);
__strong_alias(mutex_exit,mutex_vector_exit);
#endif

#ifndef __HAVE_SPIN_MUTEX_STUBS
__strong_alias(mutex_spin_enter,mutex_vector_enter);
__strong_alias(mutex_spin_exit,mutex_vector_exit);
#endif

static void        mutex_abort(const char *, size_t, const kmutex_t *,
    const char *);
static void        mutex_dump(const volatile void *, lockop_printer_t);

lockops_t mutex_spin_lockops = {
        .lo_name = "Mutex",
        .lo_type = LOCKOPS_SPIN,
        .lo_dump = mutex_dump,
};

lockops_t mutex_adaptive_lockops = {
        .lo_name = "Mutex",
        .lo_type = LOCKOPS_SLEEP,
        .lo_dump = mutex_dump,
};

syncobj_t mutex_syncobj = {
        .sobj_flag        = SOBJ_SLEEPQ_SORTED,
        .sobj_unsleep        = turnstile_unsleep,
        .sobj_changepri        = turnstile_changepri,
        .sobj_lendpri        = sleepq_lendpri,
        .sobj_owner        = (void *)mutex_owner,
};

/*
 * mutex_dump:
 *
 *        Dump the contents of a mutex structure.
 */
static void
mutex_dump(const volatile void *cookie, lockop_printer_t pr)
{
        const volatile kmutex_t *mtx = cookie;
        uintptr_t owner = mtx->mtx_owner;

        pr("owner field  : %#018lx wait/spin: %16d/%d\n",
            (long)MUTEX_OWNER(owner), MUTEX_HAS_WAITERS(mtx),
            MUTEX_SPIN_P(owner));
}

/*
 * mutex_abort:
 *
 *        Dump information about an error and panic the system.  This
 *        generates a lot of machine code in the DIAGNOSTIC case, so
 *        we ask the compiler to not inline it.
 */
static void __noinline
mutex_abort(const char *func, size_t line, const kmutex_t *mtx, const char *msg)
{

        LOCKDEBUG_ABORT(func, line, mtx, (MUTEX_SPIN_P(mtx->mtx_owner) ?
            &mutex_spin_lockops : &mutex_adaptive_lockops), msg);
}

/*
 * mutex_init:
 *
 *        Initialize a mutex for use.  Note that adaptive mutexes are in
 *        essence spin mutexes that can sleep to avoid deadlock and wasting
 *        CPU time.  We can't easily provide a type of mutex that always
 *        sleeps - see comments in mutex_vector_enter() about releasing
 *        mutexes unlocked.
 */
void _mutex_init(kmutex_t *, kmutex_type_t, int, uintptr_t);
void
_mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl,
    uintptr_t return_address)
{
        lockops_t *lockops __unused;
        bool dodebug;

        memset(mtx, 0, sizeof(*mtx));

        if (ipl == IPL_NONE || ipl == IPL_SOFTCLOCK ||
            ipl == IPL_SOFTBIO || ipl == IPL_SOFTNET ||
            ipl == IPL_SOFTSERIAL) {
                lockops = (type == MUTEX_NODEBUG ?
                    NULL : &mutex_adaptive_lockops);
                dodebug = LOCKDEBUG_ALLOC(mtx, lockops, return_address);
                MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug);
        } else {
                lockops = (type == MUTEX_NODEBUG ?
                    NULL : &mutex_spin_lockops);
                dodebug = LOCKDEBUG_ALLOC(mtx, lockops, return_address);
                MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl);
        }
}

void
mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl)
{

        _mutex_init(mtx, type, ipl, (uintptr_t)__builtin_return_address(0));
}

/*
 * mutex_destroy:
 *
 *        Tear down a mutex.
 */
void
mutex_destroy(kmutex_t *mtx)
{
        uintptr_t owner = mtx->mtx_owner;

        if (MUTEX_ADAPTIVE_P(owner)) {
                MUTEX_ASSERT(mtx, !MUTEX_OWNED(owner));
                MUTEX_ASSERT(mtx, !MUTEX_HAS_WAITERS(mtx));
        } else {
                MUTEX_ASSERT(mtx, !MUTEX_SPINBIT_LOCKED_P(mtx));
        }

        LOCKDEBUG_FREE(MUTEX_DEBUG_P(mtx), mtx);
        MUTEX_DESTROY(mtx);
}

#ifdef MULTIPROCESSOR
/*
 * mutex_oncpu:
 *
 *        Return true if an adaptive mutex owner is running on a CPU in the
 *        system.  If the target is waiting on the kernel big lock, then we
 *        must release it.  This is necessary to avoid deadlock.
 */
static bool
mutex_oncpu(uintptr_t owner)
{
        struct cpu_info *ci;
        lwp_t *l;

        KASSERT(kpreempt_disabled());

        if (!MUTEX_OWNED(owner)) {
                return false;
        }

        /*
         * See lwp_dtor() why dereference of the LWP pointer is safe.
         * We must have kernel preemption disabled for that.
         */
        l = (lwp_t *)MUTEX_OWNER(owner);
        ci = l->l_cpu;

        if (ci && ci->ci_curlwp == l) {
                /* Target is running; do we need to block? */
                return (ci->ci_biglock_wanted != l);
        }

        /* Not running.  It may be safe to block now. */
        return false;
}
#endif        /* MULTIPROCESSOR */

/*
 * mutex_vector_enter:
 *
 *        Support routine for mutex_enter() that must handle all cases.  In
 *        the LOCKDEBUG case, mutex_enter() is always aliased here, even if
 *        fast-path stubs are available.  If a mutex_spin_enter() stub is
 *        not available, then it is also aliased directly here.
 */
void
mutex_vector_enter(kmutex_t *mtx)
{
        uintptr_t owner, curthread;
        turnstile_t *ts;
#ifdef MULTIPROCESSOR
        u_int count;
#endif
        LOCKSTAT_COUNTER(spincnt);
        LOCKSTAT_COUNTER(slpcnt);
        LOCKSTAT_TIMER(spintime);
        LOCKSTAT_TIMER(slptime);
        LOCKSTAT_FLAG(lsflag);

        /*
         * Handle spin mutexes.
         */
        KPREEMPT_DISABLE(curlwp);
        owner = mtx->mtx_owner;
        if (MUTEX_SPIN_P(owner)) {
#if defined(LOCKDEBUG) && defined(MULTIPROCESSOR)
                u_int spins = 0;
#endif
                KPREEMPT_ENABLE(curlwp);
                MUTEX_SPIN_SPLRAISE(mtx);
                MUTEX_WANTLOCK(mtx);
#ifdef FULL
                if (MUTEX_SPINBIT_LOCK_TRY(mtx)) {
                        MUTEX_LOCKED(mtx);
                        return;
                }
#if !defined(MULTIPROCESSOR)
                MUTEX_ABORT(mtx, "locking against myself");
#else /* !MULTIPROCESSOR */

                LOCKSTAT_ENTER(lsflag);
                LOCKSTAT_START_TIMER(lsflag, spintime);
                count = SPINLOCK_BACKOFF_MIN;

                /*
                 * Spin testing the lock word and do exponential backoff
                 * to reduce cache line ping-ponging between CPUs.
                 */
                do {
                        while (MUTEX_SPINBIT_LOCKED_P(mtx)) {
                                SPINLOCK_SPIN_HOOK;
                                SPINLOCK_BACKOFF(count);
#ifdef LOCKDEBUG
                                if (SPINLOCK_SPINOUT(spins))
                                        MUTEX_ABORT(mtx, "spinout");
#endif        /* LOCKDEBUG */
                        }
                } while (!MUTEX_SPINBIT_LOCK_TRY(mtx));

                if (count != SPINLOCK_BACKOFF_MIN) {
                        LOCKSTAT_STOP_TIMER(lsflag, spintime);
                        LOCKSTAT_EVENT(lsflag, mtx,
                            LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
                }
                LOCKSTAT_EXIT(lsflag);
#endif        /* !MULTIPROCESSOR */
#endif        /* FULL */
                MUTEX_LOCKED(mtx);
                return;
        }

        curthread = (uintptr_t)curlwp;

        MUTEX_DASSERT(mtx, MUTEX_ADAPTIVE_P(owner));
        MUTEX_ASSERT(mtx, curthread != 0);
        MUTEX_ASSERT(mtx, !cpu_intr_p());
        MUTEX_WANTLOCK(mtx);

        if (panicstr == NULL) {
                KDASSERT(pserialize_not_in_read_section());
                LOCKDEBUG_BARRIER(&kernel_lock, 1);
        }

        LOCKSTAT_ENTER(lsflag);

        /*
         * Adaptive mutex; spin trying to acquire the mutex.  If we
         * determine that the owner is not running on a processor,
         * then we stop spinning, and sleep instead.
         */
        for (;;) {
                if (!MUTEX_OWNED(owner)) {
                        /*
                         * Mutex owner clear could mean two things:
                         *
                         *        * The mutex has been released.
                         *        * The owner field hasn't been set yet.
                         *
                         * Try to acquire it again.  If that fails,
                         * we'll just loop again.
                         */
                        if (MUTEX_ACQUIRE(mtx, curthread))
                                break;
                        owner = mtx->mtx_owner;
                        continue;
                }
                if (__predict_false(MUTEX_OWNER(owner) == curthread)) {
                        MUTEX_ABORT(mtx, "locking against myself");
                }
#ifdef MULTIPROCESSOR
                /*
                 * Check to see if the owner is running on a processor.
                 * If so, then we should just spin, as the owner will
                 * likely release the lock very soon.
                 */
                if (mutex_oncpu(owner)) {
                        LOCKSTAT_START_TIMER(lsflag, spintime);
                        count = SPINLOCK_BACKOFF_MIN;
                        do {
                                KPREEMPT_ENABLE(curlwp);
                                SPINLOCK_BACKOFF(count);
                                KPREEMPT_DISABLE(curlwp);
                                owner = mtx->mtx_owner;
                        } while (mutex_oncpu(owner));
                        LOCKSTAT_STOP_TIMER(lsflag, spintime);
                        LOCKSTAT_COUNT(spincnt, 1);
                        if (!MUTEX_OWNED(owner))
                                continue;
                }
#endif

                ts = turnstile_lookup(mtx);

                /*
                 * Once we have the turnstile chain interlock, mark the
                 * mutex as having waiters.  If that fails, spin again:
                 * chances are that the mutex has been released.
                 */
                if (!MUTEX_SET_WAITERS(mtx, owner)) {
                        turnstile_exit(mtx);
                        owner = mtx->mtx_owner;
                        continue;
                }

#ifdef MULTIPROCESSOR
                /*
                 * mutex_exit() is permitted to release the mutex without
                 * any interlocking instructions, and the following can
                 * occur as a result:
                 *
                 *  CPU 1: MUTEX_SET_WAITERS()      CPU2: mutex_exit()
                 * ---------------------------- ----------------------------
                 *                ..                    acquire cache line
                 *                ..                   test for waiters
                 *        acquire cache line    <-      lose cache line
                 *         lock cache line                   ..
                 *     verify mutex is held                ..
                 *            set waiters                     ..
                 *         unlock cache line                   ..
                 *          lose cache line     ->    acquire cache line
                 *                ..                  clear lock word, waiters
                 *          return success
                 *
                 * There is another race that can occur: a third CPU could
                 * acquire the mutex as soon as it is released.  Since
                 * adaptive mutexes are primarily spin mutexes, this is not
                 * something that we need to worry about too much.  What we
                 * do need to ensure is that the waiters bit gets set.
                 *
                 * To allow the unlocked release, we need to make some
                 * assumptions here:
                 *
                 * o Release is the only non-atomic/unlocked operation
                 *   that can be performed on the mutex.  (It must still
                 *   be atomic on the local CPU, e.g. in case interrupted
                 *   or preempted).
                 *
                 * o At any given time, MUTEX_SET_WAITERS() can only ever
                 *   be in progress on one CPU in the system - guaranteed
                 *   by the turnstile chain lock.
                 *
                 * o No other operations other than MUTEX_SET_WAITERS()
                 *   and release can modify a mutex with a non-zero
                 *   owner field.
                 *
                 * o The result of a successful MUTEX_SET_WAITERS() call
                 *   is an unbuffered write that is immediately visible
                 *   to all other processors in the system.
                 *
                 * o If the holding LWP switches away, it posts a store
                 *   fence before changing curlwp, ensuring that any
                 *   overwrite of the mutex waiters flag by mutex_exit()
                 *   completes before the modification of curlwp becomes
                 *   visible to this CPU.
                 *
                 * o cpu_switchto() posts a store fence after setting curlwp
                 *   and before resuming execution of an LWP.
                 *
                 * o _kernel_lock() posts a store fence before setting
                 *   curcpu()->ci_biglock_wanted, and after clearing it.
                 *   This ensures that any overwrite of the mutex waiters
                 *   flag by mutex_exit() completes before the modification
                 *   of ci_biglock_wanted becomes visible.
                 *
                 * We now post a read memory barrier (after setting the
                 * waiters field) and check the lock holder's status again.
                 * Some of the possible outcomes (not an exhaustive list):
                 *
                 * 1. The on-CPU check returns true: the holding LWP is
                 *    running again.  The lock may be released soon and
                 *    we should spin.  Importantly, we can't trust the
                 *    value of the waiters flag.
                 *
                 * 2. The on-CPU check returns false: the holding LWP is
                 *    not running.  We now have the opportunity to check
                 *    if mutex_exit() has blatted the modifications made
                 *    by MUTEX_SET_WAITERS().
                 *
                 * 3. The on-CPU check returns false: the holding LWP may
                 *    or may not be running.  It has context switched at
                 *    some point during our check.  Again, we have the
                 *    chance to see if the waiters bit is still set or
                 *    has been overwritten.
                 *
                 * 4. The on-CPU check returns false: the holding LWP is
                 *    running on a CPU, but wants the big lock.  It's OK
                 *    to check the waiters field in this case.
                 *
                 * 5. The has-waiters check fails: the mutex has been
                 *    released, the waiters flag cleared and another LWP
                 *    now owns the mutex.
                 *
                 * 6. The has-waiters check fails: the mutex has been
                 *    released.
                 *
                 * If the waiters bit is not set it's unsafe to go asleep,
                 * as we might never be awoken.
                 */
                membar_consumer();
                if (mutex_oncpu(owner)) {
                        turnstile_exit(mtx);
                        owner = mtx->mtx_owner;
                        continue;
                }
                membar_consumer();
                if (!MUTEX_HAS_WAITERS(mtx)) {
                        turnstile_exit(mtx);
                        owner = mtx->mtx_owner;
                        continue;
                }
#endif        /* MULTIPROCESSOR */

                LOCKSTAT_START_TIMER(lsflag, slptime);

                turnstile_block(ts, TS_WRITER_Q, mtx, &mutex_syncobj);

                LOCKSTAT_STOP_TIMER(lsflag, slptime);
                LOCKSTAT_COUNT(slpcnt, 1);

                owner = mtx->mtx_owner;
        }
        KPREEMPT_ENABLE(curlwp);

        LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SLEEP1,
            slpcnt, slptime);
        LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SPIN,
            spincnt, spintime);
        LOCKSTAT_EXIT(lsflag);

        MUTEX_DASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
        MUTEX_LOCKED(mtx);
}

/*
 * mutex_vector_exit:
 *
 *        Support routine for mutex_exit() that handles all cases.
 */
void
mutex_vector_exit(kmutex_t *mtx)
{
        turnstile_t *ts;
        uintptr_t curthread;

        if (MUTEX_SPIN_P(mtx->mtx_owner)) {
#ifdef FULL
                if (__predict_false(!MUTEX_SPINBIT_LOCKED_P(mtx))) {
                        MUTEX_ABORT(mtx, "exiting unheld spin mutex");
                }
                MUTEX_UNLOCKED(mtx);
                MUTEX_SPINBIT_LOCK_UNLOCK(mtx);
#endif
                MUTEX_SPIN_SPLRESTORE(mtx);
                return;
        }

#ifndef __HAVE_MUTEX_STUBS
        /*
         * On some architectures without mutex stubs, we can enter here to
         * release mutexes before interrupts and whatnot are up and running.
         * We need this hack to keep them sweet.
         */
        if (__predict_false(cold)) {
                MUTEX_UNLOCKED(mtx);
                MUTEX_RELEASE(mtx);
                return;
        }
#endif

        curthread = (uintptr_t)curlwp;
        MUTEX_DASSERT(mtx, curthread != 0);
        MUTEX_ASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
        MUTEX_UNLOCKED(mtx);
#if !defined(LOCKDEBUG)
        __USE(curthread);
#endif

#ifdef LOCKDEBUG
        /*
         * Avoid having to take the turnstile chain lock every time
         * around.  Raise the priority level to splhigh() in order
         * to disable preemption and so make the following atomic.
         */
        {
                int s = splhigh();
                if (!MUTEX_HAS_WAITERS(mtx)) {
                        MUTEX_RELEASE(mtx);
                        splx(s);
                        return;
                }
                splx(s);
        }
#endif

        /*
         * Get this lock's turnstile.  This gets the interlock on
         * the sleep queue.  Once we have that, we can clear the
         * lock.  If there was no turnstile for the lock, there
         * were no waiters remaining.
         */
        ts = turnstile_lookup(mtx);

        if (ts == NULL) {
                MUTEX_RELEASE(mtx);
                turnstile_exit(mtx);
        } else {
                MUTEX_RELEASE(mtx);
                turnstile_wakeup(ts, TS_WRITER_Q,
                    TS_WAITERS(ts, TS_WRITER_Q), NULL);
        }
}

#ifndef __HAVE_SIMPLE_MUTEXES
/*
 * mutex_wakeup:
 *
 *        Support routine for mutex_exit() that wakes up all waiters.
 *        We assume that the mutex has been released, but it need not
 *        be.
 */
void
mutex_wakeup(kmutex_t *mtx)
{
        turnstile_t *ts;

        ts = turnstile_lookup(mtx);
        if (ts == NULL) {
                turnstile_exit(mtx);
                return;
        }
        MUTEX_CLEAR_WAITERS(mtx);
        turnstile_wakeup(ts, TS_WRITER_Q, TS_WAITERS(ts, TS_WRITER_Q), NULL);
}
#endif        /* !__HAVE_SIMPLE_MUTEXES */

/*
 * mutex_owned:
 *
 *        Return true if the current LWP (adaptive) or CPU (spin)
 *        holds the mutex.
 */
int
mutex_owned(const kmutex_t *mtx)
{

        if (mtx == NULL)
                return 0;
        if (MUTEX_ADAPTIVE_P(mtx->mtx_owner))
                return MUTEX_OWNER(mtx->mtx_owner) == (uintptr_t)curlwp;
#ifdef FULL
        return MUTEX_SPINBIT_LOCKED_P(mtx);
#else
        return 1;
#endif
}

/*
 * mutex_owner:
 *
 *        Return the current owner of an adaptive mutex.  Used for
 *        priority inheritance.
 */
lwp_t *
mutex_owner(const kmutex_t *mtx)
{

        MUTEX_ASSERT(mtx, MUTEX_ADAPTIVE_P(mtx->mtx_owner));
        return (struct lwp *)MUTEX_OWNER(mtx->mtx_owner);
}

/*
 * mutex_owner_running:
 *
 *        Return true if an adaptive mutex is unheld, or held and the owner is
 *        running on a CPU.  For the pagedaemon only - do not document or use
 *        in other code.
 */
bool
mutex_owner_running(const kmutex_t *mtx)
{
#ifdef MULTIPROCESSOR
        uintptr_t owner;
        bool rv;

        MUTEX_ASSERT(mtx, MUTEX_ADAPTIVE_P(mtx->mtx_owner));
        kpreempt_disable();
        owner = mtx->mtx_owner;
        rv = !MUTEX_OWNED(owner) || mutex_oncpu(MUTEX_OWNER(owner));
        kpreempt_enable();
        return rv;
#else
        return mutex_owner(mtx) == curlwp;
#endif
}

/*
 * mutex_ownable:
 *
 *        When compiled with DEBUG and LOCKDEBUG defined, ensure that
 *        the mutex is available.  We cannot use !mutex_owned() since
 *        that won't work correctly for spin mutexes.
 */
int
mutex_ownable(const kmutex_t *mtx)
{

#ifdef LOCKDEBUG
        MUTEX_TESTLOCK(mtx);
#endif
        return 1;
}

/*
 * mutex_tryenter:
 *
 *        Try to acquire the mutex; return non-zero if we did.
 */
int
mutex_tryenter(kmutex_t *mtx)
{
        uintptr_t curthread;

        /*
         * Handle spin mutexes.
         */
        if (MUTEX_SPIN_P(mtx->mtx_owner)) {
                MUTEX_SPIN_SPLRAISE(mtx);
#ifdef FULL
                if (MUTEX_SPINBIT_LOCK_TRY(mtx)) {
                        MUTEX_WANTLOCK(mtx);
                        MUTEX_LOCKED(mtx);
                        return 1;
                }
                MUTEX_SPIN_SPLRESTORE(mtx);
#else
                MUTEX_WANTLOCK(mtx);
                MUTEX_LOCKED(mtx);
                return 1;
#endif
        } else {
                curthread = (uintptr_t)curlwp;
                MUTEX_ASSERT(mtx, curthread != 0);
                if (MUTEX_ACQUIRE(mtx, curthread)) {
                        MUTEX_WANTLOCK(mtx);
                        MUTEX_LOCKED(mtx);
                        MUTEX_DASSERT(mtx,
                            MUTEX_OWNER(mtx->mtx_owner) == curthread);
                        return 1;
                }
        }

        return 0;
}

#if defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL)
/*
 * mutex_spin_retry:
 *
 *        Support routine for mutex_spin_enter().  Assumes that the caller
 *        has already raised the SPL, and adjusted counters.
 */
void
mutex_spin_retry(kmutex_t *mtx)
{
#ifdef MULTIPROCESSOR
        u_int count;
        LOCKSTAT_TIMER(spintime);
        LOCKSTAT_FLAG(lsflag);
#ifdef LOCKDEBUG
        u_int spins = 0;
#endif        /* LOCKDEBUG */

        MUTEX_WANTLOCK(mtx);

        LOCKSTAT_ENTER(lsflag);
        LOCKSTAT_START_TIMER(lsflag, spintime);
        count = SPINLOCK_BACKOFF_MIN;

        /*
         * Spin testing the lock word and do exponential backoff
         * to reduce cache line ping-ponging between CPUs.
         */
        do {
                while (MUTEX_SPINBIT_LOCKED_P(mtx)) {
                        SPINLOCK_BACKOFF(count);
#ifdef LOCKDEBUG
                        if (SPINLOCK_SPINOUT(spins))
                                MUTEX_ABORT(mtx, "spinout");
#endif        /* LOCKDEBUG */
                }
        } while (!MUTEX_SPINBIT_LOCK_TRY(mtx));

        LOCKSTAT_STOP_TIMER(lsflag, spintime);
        LOCKSTAT_EVENT(lsflag, mtx, LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
        LOCKSTAT_EXIT(lsflag);

        MUTEX_LOCKED(mtx);
#else        /* MULTIPROCESSOR */
        MUTEX_ABORT(mtx, "locking against myself");
#endif        /* MULTIPROCESSOR */
}
#endif        /* defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL) */


















































































































































































































































































































































































































































































































































































































































































































































































































































    1 






















































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
/*        $NetBSD: ip_encap.c,v 1.74 2020/08/22 01:43:07 riastradh Exp $        */
/*        $KAME: ip_encap.c,v 1.73 2001/10/02 08:30:58 itojun Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
/*
 * My grandfather said that there's a devil inside tunnelling technology...
 *
 * We have surprisingly many protocols that want packets with IP protocol
 * #4 or #41.  Here's a list of protocols that want protocol #41:
 *        RFC1933 configured tunnel
 *        RFC1933 automatic tunnel
 *        RFC2401 IPsec tunnel
 *        RFC2473 IPv6 generic packet tunnelling
 *        RFC2529 6over4 tunnel
 *        RFC3056 6to4 tunnel
 *        isatap tunnel
 *        mobile-ip6 (uses RFC2473)
 * Here's a list of protocol that want protocol #4:
 *        RFC1853 IPv4-in-IPv4 tunnelling
 *        RFC2003 IPv4 encapsulation within IPv4
 *        RFC2344 reverse tunnelling for mobile-ip4
 *        RFC2401 IPsec tunnel
 * Well, what can I say.  They impose different en/decapsulation mechanism
 * from each other, so they need separate protocol handler.  The only one
 * we can easily determine by protocol # is IPsec, which always has
 * AH/ESP/IPComp header right after outer IP header.
 *
 * So, clearly good old protosw does not work for protocol #4 and #41.
 * The code will let you match protocol via src/dst address pair.
 */
/* XXX is M_NETADDR correct? */

/*
 * With USE_RADIX the code will use radix table for tunnel lookup, for
 * tunnels registered with encap_attach() with a addr/mask pair.
 * Faster on machines with thousands of tunnel registerations (= interfaces).
 *
 * The code assumes that radix table code can handle non-continuous netmask,
 * as it will pass radix table memory region with (src + dst) sockaddr pair.
 */
#define USE_RADIX

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.74 2020/08/22 01:43:07 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_mrouting.h"
#include "opt_inet.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/socket.h>
#include <sys/socketvar.h> /* for softnet_lock */
#include <sys/sockio.h>
#include <sys/mbuf.h>
#include <sys/errno.h>
#include <sys/queue.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
#include <sys/condvar.h>
#include <sys/psref.h>
#include <sys/pslist.h>

#include <net/if.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/ip_encap.h>
#ifdef MROUTING
#include <netinet/ip_mroute.h>
#endif /* MROUTING */

#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
#include <netinet6/in6_var.h>
#include <netinet6/in6_pcb.h>
#include <netinet/icmp6.h>
#endif

#ifdef NET_MPSAFE
#define ENCAP_MPSAFE        1
#endif

enum direction { INBOUND, OUTBOUND };

#ifdef INET
static struct encaptab *encap4_lookup(struct mbuf *, int, int, enum direction,
    struct psref *);
#endif
#ifdef INET6
static struct encaptab *encap6_lookup(struct mbuf *, int, int, enum direction,
    struct psref *);
#endif
static int encap_add(struct encaptab *);
static int encap_remove(struct encaptab *);
static void encap_afcheck(int, const struct sockaddr *, const struct sockaddr *);
#ifdef USE_RADIX
static struct radix_node_head *encap_rnh(int);
static int mask_matchlen(const struct sockaddr *);
#else
static int mask_match(const struct encaptab *, const struct sockaddr *,
                const struct sockaddr *);
#endif

/*
 * In encap[46]_lookup(), ep->func can sleep(e.g. rtalloc1) while walking
 * encap_table. So, it cannot use pserialize_read_enter()
 */
static struct {
        struct pslist_head        list;
        pserialize_t                psz;
        struct psref_class        *elem_class; /* for the element of et_list */
} encaptab  __cacheline_aligned = {
        .list = PSLIST_INITIALIZER,
};
#define encap_table encaptab.list

static struct {
        kmutex_t        lock;
        kcondvar_t        cv;
        struct lwp        *busy;
} encap_whole __cacheline_aligned;

#ifdef USE_RADIX
struct radix_node_head *encap_head[2];        /* 0 for AF_INET, 1 for AF_INET6 */
static bool encap_head_updating = false;
#endif

static bool encap_initialized = false;
/*
 * must be done before other encap interfaces initialization.
 */
void
encapinit(void)
{

        if (encap_initialized)
                return;

        encaptab.psz = pserialize_create();
        encaptab.elem_class = psref_class_create("encapelem", IPL_SOFTNET);

        mutex_init(&encap_whole.lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&encap_whole.cv, "ip_encap cv");
        encap_whole.busy = NULL;

        encap_initialized = true;
}

void
encap_init(void)
{
        static int initialized = 0;

        if (initialized)
                return;
        initialized++;
#if 0
        /*
         * we cannot use LIST_INIT() here, since drivers may want to call
         * encap_attach(), on driver attach.  encap_init() will be called
         * on AF_INET{,6} initialization, which happens after driver
         * initialization - using LIST_INIT() here can nuke encap_attach()
         * from drivers.
         */
        PSLIST_INIT(&encap_table);
#endif

#ifdef USE_RADIX
        /*
         * initialize radix lookup table when the radix subsystem is inited.
         */
        rn_delayedinit((void *)&encap_head[0],
            sizeof(struct sockaddr_pack) << 3);
#ifdef INET6
        rn_delayedinit((void *)&encap_head[1],
            sizeof(struct sockaddr_pack) << 3);
#endif
#endif
}

#ifdef INET
static struct encaptab *
encap4_lookup(struct mbuf *m, int off, int proto, enum direction dir,
    struct psref *match_psref)
{
        struct ip *ip;
        struct ip_pack4 pack;
        struct encaptab *ep, *match;
        int prio, matchprio;
        int s;
#ifdef USE_RADIX
        struct radix_node_head *rnh = encap_rnh(AF_INET);
        struct radix_node *rn;
#endif

        KASSERT(m->m_len >= sizeof(*ip));

        ip = mtod(m, struct ip *);

        memset(&pack, 0, sizeof(pack));
        pack.p.sp_len = sizeof(pack);
        pack.mine.sin_family = pack.yours.sin_family = AF_INET;
        pack.mine.sin_len = pack.yours.sin_len = sizeof(struct sockaddr_in);
        if (dir == INBOUND) {
                pack.mine.sin_addr = ip->ip_dst;
                pack.yours.sin_addr = ip->ip_src;
        } else {
                pack.mine.sin_addr = ip->ip_src;
                pack.yours.sin_addr = ip->ip_dst;
        }

        match = NULL;
        matchprio = 0;

        s = pserialize_read_enter();
#ifdef USE_RADIX
        if (encap_head_updating) {
                /*
                 * Update in progress. Do nothing.
                 */
                pserialize_read_exit(s);
                return NULL;
        }

        rn = rnh->rnh_matchaddr((void *)&pack, rnh);
        if (rn && (rn->rn_flags & RNF_ROOT) == 0) {
                struct encaptab *encapp = (struct encaptab *)rn;

                psref_acquire(match_psref, &encapp->psref,
                    encaptab.elem_class);
                match = encapp;
                matchprio = mask_matchlen(match->srcmask) +
                    mask_matchlen(match->dstmask);
        }
#endif
        PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
                struct psref elem_psref;

                if (ep->af != AF_INET)
                        continue;
                if (ep->proto >= 0 && ep->proto != proto)
                        continue;

                psref_acquire(&elem_psref, &ep->psref,
                    encaptab.elem_class);
                if (ep->func) {
                        pserialize_read_exit(s);
                        /* ep->func is sleepable. e.g. rtalloc1 */
                        prio = (*ep->func)(m, off, proto, ep->arg);
                        s = pserialize_read_enter();
                } else {
#ifdef USE_RADIX
                        psref_release(&elem_psref, &ep->psref,
                            encaptab.elem_class);
                        continue;
#else
                        prio = mask_match(ep, (struct sockaddr *)&pack.mine,
                            (struct sockaddr *)&pack.yours);
#endif
                }

                /*
                 * We prioritize the matches by using bit length of the
                 * matches.  mask_match() and user-supplied matching function
                 * should return the bit length of the matches (for example,
                 * if both src/dst are matched for IPv4, 64 should be returned).
                 * 0 or negative return value means "it did not match".
                 *
                 * The question is, since we have two "mask" portion, we
                 * cannot really define total order between entries.
                 * For example, which of these should be preferred?
                 * mask_match() returns 48 (32 + 16) for both of them.
                 *        src=3ffe::/16, dst=3ffe:501::/32
                 *        src=3ffe:501::/32, dst=3ffe::/16
                 *
                 * We need to loop through all the possible candidates
                 * to get the best match - the search takes O(n) for
                 * n attachments (i.e. interfaces).
                 *
                 * For radix-based lookup, I guess source takes precedence.
                 * See rn_{refines,lexobetter} for the correct answer.
                 */
                if (prio <= 0) {
                        psref_release(&elem_psref, &ep->psref,
                            encaptab.elem_class);
                        continue;
                }
                if (prio > matchprio) {
                        /* release last matched ep */
                        if (match != NULL)
                                psref_release(match_psref, &match->psref,
                                    encaptab.elem_class);

                        psref_copy(match_psref, &elem_psref,
                            encaptab.elem_class);
                        matchprio = prio;
                        match = ep;
                }
                KASSERTMSG((match == NULL) || psref_held(&match->psref,
                        encaptab.elem_class),
                    "current match = %p, but not hold its psref", match);

                psref_release(&elem_psref, &ep->psref,
                    encaptab.elem_class);
        }
        pserialize_read_exit(s);

        return match;
}

void
encap4_input(struct mbuf *m, int off, int proto)
{
        const struct encapsw *esw;
        struct encaptab *match;
        struct psref match_psref;

        match = encap4_lookup(m, off, proto, INBOUND, &match_psref);
        if (match) {
                /* found a match, "match" has the best one */
                esw = match->esw;
                if (esw && esw->encapsw4.pr_input) {
                        (*esw->encapsw4.pr_input)(m, off, proto, match->arg);
                        psref_release(&match_psref, &match->psref,
                            encaptab.elem_class);
                } else {
                        psref_release(&match_psref, &match->psref,
                            encaptab.elem_class);
                        m_freem(m);
                }
                return;
        }

        /* last resort: inject to raw socket */
        SOFTNET_LOCK_IF_NET_MPSAFE();
        rip_input(m, off, proto);
        SOFTNET_UNLOCK_IF_NET_MPSAFE();
}
#endif

#ifdef INET6
static struct encaptab *
encap6_lookup(struct mbuf *m, int off, int proto, enum direction dir,
    struct psref *match_psref)
{
        struct ip6_hdr *ip6;
        struct ip_pack6 pack;
        int prio, matchprio;
        int s;
        struct encaptab *ep, *match;
#ifdef USE_RADIX
        struct radix_node_head *rnh = encap_rnh(AF_INET6);
        struct radix_node *rn;
#endif

        KASSERT(m->m_len >= sizeof(*ip6));

        ip6 = mtod(m, struct ip6_hdr *);

        memset(&pack, 0, sizeof(pack));
        pack.p.sp_len = sizeof(pack);
        pack.mine.sin6_family = pack.yours.sin6_family = AF_INET6;
        pack.mine.sin6_len = pack.yours.sin6_len = sizeof(struct sockaddr_in6);
        if (dir == INBOUND) {
                pack.mine.sin6_addr = ip6->ip6_dst;
                pack.yours.sin6_addr = ip6->ip6_src;
        } else {
                pack.mine.sin6_addr = ip6->ip6_src;
                pack.yours.sin6_addr = ip6->ip6_dst;
        }

        match = NULL;
        matchprio = 0;

        s = pserialize_read_enter();
#ifdef USE_RADIX
        if (encap_head_updating) {
                /*
                 * Update in progress. Do nothing.
                 */
                pserialize_read_exit(s);
                return NULL;
        }

        rn = rnh->rnh_matchaddr((void *)&pack, rnh);
        if (rn && (rn->rn_flags & RNF_ROOT) == 0) {
                struct encaptab *encapp = (struct encaptab *)rn;

                psref_acquire(match_psref, &encapp->psref,
                    encaptab.elem_class);
                match = encapp;
                matchprio = mask_matchlen(match->srcmask) +
                    mask_matchlen(match->dstmask);
        }
#endif
        PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
                struct psref elem_psref;

                if (ep->af != AF_INET6)
                        continue;
                if (ep->proto >= 0 && ep->proto != proto)
                        continue;

                psref_acquire(&elem_psref, &ep->psref,
                    encaptab.elem_class);

                if (ep->func) {
                        pserialize_read_exit(s);
                        /* ep->func is sleepable. e.g. rtalloc1 */
                        prio = (*ep->func)(m, off, proto, ep->arg);
                        s = pserialize_read_enter();
                } else {
#ifdef USE_RADIX
                        psref_release(&elem_psref, &ep->psref,
                            encaptab.elem_class);
                        continue;
#else
                        prio = mask_match(ep, (struct sockaddr *)&pack.mine,
                            (struct sockaddr *)&pack.yours);
#endif
                }

                /* see encap4_lookup() for issues here */
                if (prio <= 0) {
                        psref_release(&elem_psref, &ep->psref,
                            encaptab.elem_class);
                        continue;
                }
                if (prio > matchprio) {
                        /* release last matched ep */
                        if (match != NULL)
                                psref_release(match_psref, &match->psref,
                                    encaptab.elem_class);

                        psref_copy(match_psref, &elem_psref,
                            encaptab.elem_class);
                        matchprio = prio;
                        match = ep;
                }
                KASSERTMSG((match == NULL) || psref_held(&match->psref,
                        encaptab.elem_class),
                    "current match = %p, but not hold its psref", match);

                psref_release(&elem_psref, &ep->psref,
                    encaptab.elem_class);
        }
        pserialize_read_exit(s);

        return match;
}

int
encap6_input(struct mbuf **mp, int *offp, int proto)
{
        struct mbuf *m = *mp;
        const struct encapsw *esw;
        struct encaptab *match;
        struct psref match_psref;
        int rv;

        match = encap6_lookup(m, *offp, proto, INBOUND, &match_psref);

        if (match) {
                /* found a match */
                esw = match->esw;
                if (esw && esw->encapsw6.pr_input) {
                        int ret;
                        ret = (*esw->encapsw6.pr_input)(mp, offp, proto,
                            match->arg);
                        psref_release(&match_psref, &match->psref,
                            encaptab.elem_class);
                        return ret;
                } else {
                        psref_release(&match_psref, &match->psref,
                            encaptab.elem_class);
                        m_freem(m);
                        return IPPROTO_DONE;
                }
        }

        /* last resort: inject to raw socket */
        SOFTNET_LOCK_IF_NET_MPSAFE();
        rv = rip6_input(mp, offp, proto);
        SOFTNET_UNLOCK_IF_NET_MPSAFE();
        return rv;
}
#endif

/*
 * XXX
 * The encaptab list and the rnh radix tree must be manipulated atomically.
 */
static int
encap_add(struct encaptab *ep)
{
#ifdef USE_RADIX
        struct radix_node_head *rnh = encap_rnh(ep->af);
#endif

        KASSERT(encap_lock_held());

#ifdef USE_RADIX
        if (!ep->func && rnh) {
                /* Disable access to the radix tree for reader. */
                encap_head_updating = true;
                /* Wait for all readers to drain. */
                pserialize_perform(encaptab.psz);

                if (!rnh->rnh_addaddr((void *)ep->addrpack,
                    (void *)ep->maskpack, rnh, ep->nodes)) {
                        encap_head_updating = false;
                        return EEXIST;
                }

                /*
                 * The ep added to the radix tree must be skipped while
                 * encap[46]_lookup walks encaptab list. In other words,
                 * encap_add() does not need to care whether the ep has
                 * been added encaptab list or not yet.
                 * So, we can re-enable access to the radix tree for now.
                 */
                encap_head_updating = false;
        }
#endif
        PSLIST_WRITER_INSERT_HEAD(&encap_table, ep, chain);

        return 0;
}

/*
 * XXX
 * The encaptab list and the rnh radix tree must be manipulated atomically.
 */
static int
encap_remove(struct encaptab *ep)
{
#ifdef USE_RADIX
        struct radix_node_head *rnh = encap_rnh(ep->af);
#endif
        int error = 0;

        KASSERT(encap_lock_held());

#ifdef USE_RADIX
        if (!ep->func && rnh) {
                /* Disable access to the radix tree for reader. */
                encap_head_updating = true;
                /* Wait for all readers to drain. */
                pserialize_perform(encaptab.psz);

                if (!rnh->rnh_deladdr((void *)ep->addrpack,
                    (void *)ep->maskpack, rnh))
                        error = ESRCH;

                /*
                 * The ep added to the radix tree must be skipped while
                 * encap[46]_lookup walks encaptab list. In other words,
                 * encap_add() does not need to care whether the ep has
                 * been added encaptab list or not yet.
                 * So, we can re-enable access to the radix tree for now.
                 */
                encap_head_updating = false;
        }
#endif
        PSLIST_WRITER_REMOVE(ep, chain);

        return error;
}

static void
encap_afcheck(int af, const struct sockaddr *sp, const struct sockaddr *dp)
{

        KASSERT(sp != NULL && dp != NULL);
        KASSERT(sp->sa_len == dp->sa_len);
        KASSERT(af == sp->sa_family && af == dp->sa_family);

        socklen_t len __diagused = sockaddr_getsize_by_family(af);
        KASSERT(len != 0 && len == sp->sa_len && len == dp->sa_len);
}

/*
 * sp (src ptr) is always my side, and dp (dst ptr) is always remote side.
 * length of mask (sm and dm) is assumed to be same as sp/dp.
 * Return value will be necessary as input (cookie) for encap_detach().
 */
const struct encaptab *
encap_attach(int af, int proto,
    const struct sockaddr *sp, const struct sockaddr *sm,
    const struct sockaddr *dp, const struct sockaddr *dm,
    const struct encapsw *esw, void *arg)
{
        struct encaptab *ep;
        int error;
        int pss;
        size_t l;
        struct ip_pack4 *pack4;
#ifdef INET6
        struct ip_pack6 *pack6;
#endif
#ifndef ENCAP_MPSAFE
        int s;

        s = splsoftnet();
#endif

        ASSERT_SLEEPABLE();

        /* sanity check on args */
        encap_afcheck(af, sp, dp);

        /* check if anyone have already attached with exactly same config */
        pss = pserialize_read_enter();
        PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
                if (ep->af != af)
                        continue;
                if (ep->proto != proto)
                        continue;
                if (ep->func)
                        continue;

                KASSERT(ep->src != NULL);
                KASSERT(ep->dst != NULL);
                KASSERT(ep->srcmask != NULL);
                KASSERT(ep->dstmask != NULL);

                if (ep->src->sa_len != sp->sa_len ||
                    memcmp(ep->src, sp, sp->sa_len) != 0 ||
                    memcmp(ep->srcmask, sm, sp->sa_len) != 0)
                        continue;
                if (ep->dst->sa_len != dp->sa_len ||
                    memcmp(ep->dst, dp, dp->sa_len) != 0 ||
                    memcmp(ep->dstmask, dm, dp->sa_len) != 0)
                        continue;

                error = EEXIST;
                pserialize_read_exit(pss);
                goto fail;
        }
        pserialize_read_exit(pss);

        switch (af) {
        case AF_INET:
                l = sizeof(*pack4);
                break;
#ifdef INET6
        case AF_INET6:
                l = sizeof(*pack6);
                break;
#endif
        default:
                goto fail;
        }

        /* M_NETADDR ok? */
        ep = kmem_zalloc(sizeof(*ep), KM_SLEEP);
        ep->addrpack = kmem_zalloc(l, KM_SLEEP);
        ep->maskpack = kmem_zalloc(l, KM_SLEEP);

        ep->af = af;
        ep->proto = proto;
        ep->addrpack->sa_len = l & 0xff;
        ep->maskpack->sa_len = l & 0xff;
        switch (af) {
        case AF_INET:
                pack4 = (struct ip_pack4 *)ep->addrpack;
                ep->src = (struct sockaddr *)&pack4->mine;
                ep->dst = (struct sockaddr *)&pack4->yours;
                pack4 = (struct ip_pack4 *)ep->maskpack;
                ep->srcmask = (struct sockaddr *)&pack4->mine;
                ep->dstmask = (struct sockaddr *)&pack4->yours;
                break;
#ifdef INET6
        case AF_INET6:
                pack6 = (struct ip_pack6 *)ep->addrpack;
                ep->src = (struct sockaddr *)&pack6->mine;
                ep->dst = (struct sockaddr *)&pack6->yours;
                pack6 = (struct ip_pack6 *)ep->maskpack;
                ep->srcmask = (struct sockaddr *)&pack6->mine;
                ep->dstmask = (struct sockaddr *)&pack6->yours;
                break;
#endif
        }

        memcpy(ep->src, sp, sp->sa_len);
        memcpy(ep->srcmask, sm, sp->sa_len);
        memcpy(ep->dst, dp, dp->sa_len);
        memcpy(ep->dstmask, dm, dp->sa_len);
        ep->esw = esw;
        ep->arg = arg;
        psref_target_init(&ep->psref, encaptab.elem_class);

        error = encap_add(ep);
        if (error)
                goto gc;

        error = 0;
#ifndef ENCAP_MPSAFE
        splx(s);
#endif
        return ep;

gc:
        if (ep->addrpack)
                kmem_free(ep->addrpack, l);
        if (ep->maskpack)
                kmem_free(ep->maskpack, l);
        if (ep)
                kmem_free(ep, sizeof(*ep));
fail:
#ifndef ENCAP_MPSAFE
        splx(s);
#endif
        return NULL;
}

const struct encaptab *
encap_attach_func(int af, int proto,
    int (*func)(struct mbuf *, int, int, void *),
    const struct encapsw *esw, void *arg)
{
        struct encaptab *ep;
        int error;
#ifndef ENCAP_MPSAFE
        int s;

        s = splsoftnet();
#endif

        ASSERT_SLEEPABLE();

        /* sanity check on args */
        KASSERT(func != NULL);
        KASSERT(af == AF_INET
#ifdef INET6
            || af == AF_INET6
#endif
        );

        ep = kmem_alloc(sizeof(*ep), KM_SLEEP);
        memset(ep, 0, sizeof(*ep));

        ep->af = af;
        ep->proto = proto;
        ep->func = func;
        ep->esw = esw;
        ep->arg = arg;
        psref_target_init(&ep->psref, encaptab.elem_class);

        error = encap_add(ep);
        if (error)
                goto gc;

        error = 0;
#ifndef ENCAP_MPSAFE
        splx(s);
#endif
        return ep;

gc:
        kmem_free(ep, sizeof(*ep));
#ifndef ENCAP_MPSAFE
        splx(s);
#endif
        return NULL;
}

/* XXX encap4_ctlinput() is necessary if we set DF=1 on outer IPv4 header */

#ifdef INET6
void *
encap6_ctlinput(int cmd, const struct sockaddr *sa, void *d0)
{
        void *d = d0;
        struct ip6_hdr *ip6;
        struct mbuf *m;
        int off;
        struct ip6ctlparam *ip6cp = NULL;
        int nxt;
        int s;
        struct encaptab *ep;
        const struct encapsw *esw;

        if (sa->sa_family != AF_INET6 ||
            sa->sa_len != sizeof(struct sockaddr_in6))
                return NULL;

        if ((unsigned)cmd >= PRC_NCMDS)
                return NULL;
        if (cmd == PRC_HOSTDEAD)
                d = NULL;
        else if (cmd == PRC_MSGSIZE)
                ; /* special code is present, see below */
        else if (inet6ctlerrmap[cmd] == 0)
                return NULL;

        /* if the parameter is from icmp6, decode it. */
        if (d != NULL) {
                ip6cp = (struct ip6ctlparam *)d;
                m = ip6cp->ip6c_m;
                ip6 = ip6cp->ip6c_ip6;
                off = ip6cp->ip6c_off;
                nxt = ip6cp->ip6c_nxt;

                if (ip6 && cmd == PRC_MSGSIZE) {
                        int valid = 0;
                        struct encaptab *match;
                        struct psref elem_psref;

                        /*
                         * Check to see if we have a valid encap configuration.
                         */
                        match = encap6_lookup(m, off, nxt, OUTBOUND,
                            &elem_psref);
                        if (match) {
                                valid++;
                                psref_release(&elem_psref, &match->psref,
                                    encaptab.elem_class);
                        }

                        /*
                         * Depending on the value of "valid" and routing table
                         * size (mtudisc_{hi,lo}wat), we will:
                         * - recalcurate the new MTU and create the
                         *   corresponding routing entry, or
                         * - ignore the MTU change notification.
                         */
                        icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);
                }
        } else {
                m = NULL;
                ip6 = NULL;
                nxt = -1;
        }

        /* inform all listeners */

        s = pserialize_read_enter();
        PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
                struct psref elem_psref;

                if (ep->af != AF_INET6)
                        continue;
                if (ep->proto >= 0 && ep->proto != nxt)
                        continue;

                /* should optimize by looking at address pairs */

                /* XXX need to pass ep->arg or ep itself to listeners */
                psref_acquire(&elem_psref, &ep->psref,
                    encaptab.elem_class);
                esw = ep->esw;
                if (esw && esw->encapsw6.pr_ctlinput) {
                        pserialize_read_exit(s);
                        /* pr_ctlinput is sleepable. e.g. rtcache_free */
                        (*esw->encapsw6.pr_ctlinput)(cmd, sa, d, ep->arg);
                        s = pserialize_read_enter();
                }
                psref_release(&elem_psref, &ep->psref,
                    encaptab.elem_class);
        }
        pserialize_read_exit(s);

        rip6_ctlinput(cmd, sa, d0);
        return NULL;
}
#endif

int
encap_detach(const struct encaptab *cookie)
{
        const struct encaptab *ep = cookie;
        struct encaptab *p;
        int error;

        KASSERT(encap_lock_held());

        PSLIST_WRITER_FOREACH(p, &encap_table, struct encaptab, chain) {
                if (p == ep) {
                        error = encap_remove(p);
                        if (error)
                                return error;
                        else
                                break;
                }
        }
        if (p == NULL)
                return ENOENT;

        pserialize_perform(encaptab.psz);
        psref_target_destroy(&p->psref,
            encaptab.elem_class);
        if (!ep->func) {
                kmem_free(p->addrpack, ep->addrpack->sa_len);
                kmem_free(p->maskpack, ep->maskpack->sa_len);
        }
        kmem_free(p, sizeof(*p));

        return 0;
}

#ifdef USE_RADIX
static struct radix_node_head *
encap_rnh(int af)
{

        switch (af) {
        case AF_INET:
                return encap_head[0];
#ifdef INET6
        case AF_INET6:
                return encap_head[1];
#endif
        default:
                return NULL;
        }
}

static int
mask_matchlen(const struct sockaddr *sa)
{
        const char *p, *ep;
        int l;

        p = (const char *)sa;
        ep = p + sa->sa_len;
        p += 2;        /* sa_len + sa_family */

        l = 0;
        while (p < ep) {
                l += (*p ? 8 : 0);        /* estimate */
                p++;
        }
        return l;
}
#endif

#ifndef USE_RADIX
static int
mask_match(const struct encaptab *ep,
           const struct sockaddr *sp,
           const struct sockaddr *dp)
{
        struct sockaddr_storage s;
        struct sockaddr_storage d;
        int i;
        const u_int8_t *p, *q;
        u_int8_t *r;
        int matchlen;

        KASSERTMSG(ep->func == NULL, "wrong encaptab passed to mask_match");

        if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d))
                return 0;
        if (sp->sa_family != ep->af || dp->sa_family != ep->af)
                return 0;
        if (sp->sa_len != ep->src->sa_len || dp->sa_len != ep->dst->sa_len)
                return 0;

        matchlen = 0;

        p = (const u_int8_t *)sp;
        q = (const u_int8_t *)ep->srcmask;
        r = (u_int8_t *)&s;
        for (i = 0 ; i < sp->sa_len; i++) {
                r[i] = p[i] & q[i];
                /* XXX estimate */
                matchlen += (q[i] ? 8 : 0);
        }

        p = (const u_int8_t *)dp;
        q = (const u_int8_t *)ep->dstmask;
        r = (u_int8_t *)&d;
        for (i = 0 ; i < dp->sa_len; i++) {
                r[i] = p[i] & q[i];
                /* XXX rough estimate */
                matchlen += (q[i] ? 8 : 0);
        }

        /* need to overwrite len/family portion as we don't compare them */
        s.ss_len = sp->sa_len;
        s.ss_family = sp->sa_family;
        d.ss_len = dp->sa_len;
        d.ss_family = dp->sa_family;

        if (memcmp(&s, ep->src, ep->src->sa_len) == 0 &&
            memcmp(&d, ep->dst, ep->dst->sa_len) == 0) {
                return matchlen;
        } else
                return 0;
}
#endif

int
encap_lock_enter(void)
{
        int error;

        mutex_enter(&encap_whole.lock);
        while (encap_whole.busy != NULL) {
                error = cv_wait_sig(&encap_whole.cv, &encap_whole.lock);
                if (error) {
                        mutex_exit(&encap_whole.lock);
                        return error;
                }
        }
        KASSERT(encap_whole.busy == NULL);
        encap_whole.busy = curlwp;
        mutex_exit(&encap_whole.lock);

        return 0;
}

void
encap_lock_exit(void)
{

        mutex_enter(&encap_whole.lock);
        KASSERT(encap_whole.busy == curlwp);
        encap_whole.busy = NULL;
        cv_broadcast(&encap_whole.cv);
        mutex_exit(&encap_whole.lock);
}

bool
encap_lock_held(void)
{

        return (encap_whole.busy == curlwp);
}










































   68 
   68 
   68 
   39 
   68 








1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
/*        $NetBSD: copystr.c,v 1.1 2020/06/30 16:20:02 maxv Exp $        */

/*
 * Copyright (c) 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Maxime Villard.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/systm.h>
#include <sys/errno.h>

int
copystr(const void *kfaddr, void *kdaddr, size_t len, size_t *done)
{
        const char *src = kfaddr;
        char *dst = kdaddr;
        size_t i;

        for (i = 0; i < len; i++) {
                if ((*dst++ = *src++) == '\0') {
                        if (done)
                                *done = i + 1;
                        return 0;
                }
        }

        if (done)
                *done = i;

        return ENAMETOOLONG;
}

















































































   58 















  914 







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
/*        $NetBSD: sleepq.h,v 1.35 2022/06/29 22:27:01 riastradh Exp $        */

/*-
 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020
 *     The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe and Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef        _SYS_SLEEPQ_H_
#define        _SYS_SLEEPQ_H_

#include <sys/lwp.h>
#include <sys/mutex.h>
#include <sys/pool.h>
#include <sys/queue.h>
#include <sys/sched.h>
#include <sys/syncobj.h>
#include <sys/param.h>

/*
 * Generic sleep queues.
 */

typedef struct sleepq sleepq_t;

void        sleepq_init(sleepq_t *);
void        sleepq_remove(sleepq_t *, lwp_t *);
void        sleepq_enqueue(sleepq_t *, wchan_t, const char *, struct syncobj *,
            bool);
void        sleepq_transfer(lwp_t *, sleepq_t *, sleepq_t *, wchan_t, const char *,
            struct syncobj *, kmutex_t *, bool);
void        sleepq_uncatch(lwp_t *);
void        sleepq_unsleep(lwp_t *, bool);
void        sleepq_timeout(void *);
void        sleepq_wake(sleepq_t *, wchan_t, u_int, kmutex_t *);
int        sleepq_abort(kmutex_t *, int);
void        sleepq_changepri(lwp_t *, pri_t);
void        sleepq_lendpri(lwp_t *, pri_t);
int        sleepq_block(int, bool, struct syncobj *);

#ifdef _KERNEL
typedef union {
        kmutex_t        lock;
        uint8_t                pad[COHERENCY_UNIT];
} sleepqlock_t;

/*
 * Return non-zero if it is unsafe to sleep.
 *
 * XXX This only exists because panic() is broken.
 */
static __inline bool
sleepq_dontsleep(lwp_t *l)
{
        extern int cold;

        return cold || (doing_shutdown && (panicstr || CURCPU_IDLE_P()));
}

/*
 * Prepare to block on a sleep queue, after which any interlock can be
 * safely released.
 */
static __inline void
sleepq_enter(sleepq_t *sq, lwp_t *l, kmutex_t *mp)
{

        /*
         * Acquire the per-LWP mutex and lend it ours sleep queue lock.
         * Once interlocked, we can release the kernel lock.
         */
        lwp_lock(l);
        lwp_unlock_to(l, mp);
        KERNEL_UNLOCK_ALL(NULL, &l->l_biglocks);
}
#endif

#include <sys/sleeptab.h>

#endif        /* _SYS_SLEEPQ_H_ */





























































    3 



































    3 





    2 


    3 



    3 



    3 

















    3 
    3 




































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
/*        $NetBSD: subr_hash.c,v 1.12 2021/06/13 14:58:49 simonb Exp $        */

/*
 * Copyright (c) 1982, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_subr.c        8.4 (Berkeley) 2/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_hash.c,v 1.12 2021/06/13 14:58:49 simonb Exp $");

#include <sys/param.h>
#include <sys/bitops.h>
#include <sys/kmem.h>
#include <sys/systm.h>
#include <sys/pslist.h>
#include <sys/rwlock.h>
#include <sys/sysctl.h>

static int hashstat_sysctl(SYSCTLFN_PROTO);

static size_t
hash_list_size(enum hashtype htype)
{
        LIST_HEAD(, generic) *hashtbl_list;
        SLIST_HEAD(, generic) *hashtbl_slist;
        TAILQ_HEAD(, generic) *hashtbl_tailq;
        struct pslist_head *hashtbl_pslist;
        size_t esize;

        switch (htype) {
        case HASH_LIST:
                esize = sizeof(*hashtbl_list);
                break;
        case HASH_PSLIST:
                esize = sizeof(*hashtbl_pslist);
                break;
        case HASH_SLIST:
                esize = sizeof(*hashtbl_slist);
                break;
        case HASH_TAILQ:
                esize = sizeof(*hashtbl_tailq);
                break;
        default:
                panic("hashdone: invalid table type");
        }
        return esize;
}

/*
 * General routine to allocate a hash table.
 * Allocate enough memory to hold at least `elements' list-head pointers.
 * Return a pointer to the allocated space and set *hashmask to a pattern
 * suitable for masking a value to use as an index into the returned array.
 */
void *
hashinit(u_int elements, enum hashtype htype, bool waitok, u_long *hashmask)
{
        LIST_HEAD(, generic) *hashtbl_list;
        SLIST_HEAD(, generic) *hashtbl_slist;
        TAILQ_HEAD(, generic) *hashtbl_tailq;
        struct pslist_head *hashtbl_pslist;
        u_long hashsize, i;
        size_t esize;
        void *p;

        KASSERT(elements > 0);

#define MAXELEMENTS (1U << ((sizeof(elements) * NBBY) - 1))
        if (elements > MAXELEMENTS)
                elements = MAXELEMENTS;

        hashsize = 1UL << (ilog2(elements - 1) + 1);
        esize = hash_list_size(htype);

        p = kmem_alloc(hashsize * esize, waitok ? KM_SLEEP : KM_NOSLEEP);
        if (p == NULL)
                return NULL;

        switch (htype) {
        case HASH_LIST:
                hashtbl_list = p;
                for (i = 0; i < hashsize; i++)
                        LIST_INIT(&hashtbl_list[i]);
                break;
        case HASH_PSLIST:
                hashtbl_pslist = p;
                for (i = 0; i < hashsize; i++)
                        PSLIST_INIT(&hashtbl_pslist[i]);
                break;
        case HASH_SLIST:
                hashtbl_slist = p;
                for (i = 0; i < hashsize; i++)
                        SLIST_INIT(&hashtbl_slist[i]);
                break;
        case HASH_TAILQ:
                hashtbl_tailq = p;
                for (i = 0; i < hashsize; i++)
                        TAILQ_INIT(&hashtbl_tailq[i]);
                break;
        }
        *hashmask = hashsize - 1;
        return p;
}

/*
 * Free memory from hash table previosly allocated via hashinit().
 */
void
hashdone(void *hashtbl, enum hashtype htype, u_long hashmask)
{
        const size_t esize = hash_list_size(htype);
        kmem_free(hashtbl, esize * (hashmask + 1));
}

/*
 * Support for hash statistics (vmstat -H / vmstat -h hashname).
 */

struct hashstat {
        const char *hs_name;
        hashstat_func_t hs_func;
        TAILQ_ENTRY(hashstat) hs_next;
};
TAILQ_HEAD(, hashstat) hashstat_list =
    TAILQ_HEAD_INITIALIZER(hashstat_list);
static krwlock_t hashstat_lock;

void
hashstat_register(const char *name, hashstat_func_t func)
{
        struct hashstat *hs;

        hs = kmem_alloc(sizeof(*hs), KM_SLEEP);

        hs->hs_name = name;
        hs->hs_func = func;

        rw_enter(&hashstat_lock, RW_WRITER);
        TAILQ_INSERT_TAIL(&hashstat_list, hs, hs_next);
        rw_exit(&hashstat_lock);
}

/*
 * sysctl support for returning kernel hash statistics.
 *
 * We (ab)use CTL_DESCRIBE and CTL_QUERY:
 * When passed an OID of CTL_DESCRIBE, return a list and description
 * of the available hashes.
 * When passed an OID of CTL_QUERY, use the hash name passed in the
 * "new" hash input as the name of a single hash to return stats on.
 */
static int
hashstat_sysctl(SYSCTLFN_ARGS)
{
        struct hashstat_sysctl hs;
        struct hashstat *hash;
        char queryname[SYSCTL_NAMELEN];
        size_t written;
        bool fill, query;
        int error;

        if (oldp == NULL) {
                *oldlenp = 0;
                TAILQ_FOREACH(hash, &hashstat_list, hs_next)
                        *oldlenp += sizeof(hs);
                return 0;
        }

        error = 0;
        written = 0;

        if (namelen > 0 && name[0] == CTL_DESCRIBE)
                fill = false;
        else
                fill = true;

        if (namelen > 0 && name[0] == CTL_QUERY) {
                const struct hashstat_sysctl *h = newp;
                size_t s;

                if (h == NULL) {
                        /* Can't QUERY one hash without supplying the hash name. */
                        return EINVAL;
                }
                query = true;
                error = sysctl_copyinstr(l, h->hash_name, queryname, 
                    sizeof(queryname), &s);
                if (error)
                        return error;
        } else {
                query = false;
        }

        sysctl_unlock();
        rw_enter(&hashstat_lock, RW_READER);
        TAILQ_FOREACH(hash, &hashstat_list, hs_next) {
                if (query && (strcmp(hash->hs_name, queryname) != 0)) {
                        continue;
                }

                memset(&hs, 0, sizeof(hs));
                error = hash->hs_func(&hs, fill);
                if (error)
                        break;

                error = sysctl_copyout(l, &hs, oldp, sizeof(hs));
                if (error)
                        break;
                written += sizeof(hs);
                oldp = (char *)oldp + sizeof(hs);
        }
        rw_exit(&hashstat_lock);
        sysctl_relock();

        if (query && written == 0)        /* query not found? */
                error = ENOENT;

        *oldlenp = written;
        return error;
}


SYSCTL_SETUP(sysctl_hash_setup, "sysctl hash stats setup")
{

        rw_init(&hashstat_lock);        /* as good a place as any for this */

        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_STRUCT,
                       "hashstat", SYSCTL_DESCR("kernel hash statistics"),
                       hashstat_sysctl, 0, NULL, 0,
                       CTL_KERN, CTL_CREATE, CTL_EOL);
}

















































































































    2 


    2 







    1 




    1 







    1 

    1 


    1 


    1 










    2 
    2 
    2 





























































































































































































































    2 






    2 
    2 




    2 







    2 



    2 





    2 
    2 










    2 
    2 




    2 






















































    2 
























































    1 
    1 

    1 


    1 









    1 
    1 

    1 


    1 



























    2 













    2 




















   12 





























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
/*        $NetBSD: keysock.c,v 1.70 2019/06/12 22:23:50 christos Exp $        */
/*        $FreeBSD: keysock.c,v 1.3.2.1 2003/01/24 05:11:36 sam Exp $        */
/*        $KAME: keysock.c,v 1.25 2001/08/13 20:07:41 itojun Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: keysock.c,v 1.70 2019/06/12 22:23:50 christos Exp $");

/* This code has derived from sys/net/rtsock.c on FreeBSD2.2.5 */

#include <sys/types.h>
#include <sys/param.h>
#include <sys/domain.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/cpu.h>
#include <sys/syslog.h>

#include <net/raw_cb.h>
#include <net/route.h>

#include <net/pfkeyv2.h>
#include <netipsec/key.h>
#include <netipsec/keysock.h>
#include <netipsec/key_debug.h>

#include <netipsec/ipsec_private.h>

struct key_cb {
        int key_count;
        int any_count;
};
static struct key_cb key_cb;

static struct sockaddr key_dst = {
    .sa_len = 2,
    .sa_family = PF_KEY,
};
static struct sockaddr key_src = {
    .sa_len = 2,
    .sa_family = PF_KEY,
};

static const struct protosw keysw[];

static int key_sendup0(struct rawcb *, struct mbuf *, int, int);

int key_registered_sb_max = (2048 * MHLEN); /* XXX arbitrary */

static kmutex_t *key_so_mtx;
static struct rawcbhead key_rawcb;

void
key_init_so(void)
{

        key_so_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
}

static void
key_pr_init(void)
{

        LIST_INIT(&key_rawcb);
}

/*
 * key_output()
 */
static int
key_output(struct mbuf *m, struct socket *so)
{
        struct sadb_msg *msg;
        int len, error = 0;
        int s;

        KASSERT(m != NULL);

        {
                uint64_t *ps = PFKEY_STAT_GETREF();
                ps[PFKEY_STAT_OUT_TOTAL]++;
                ps[PFKEY_STAT_OUT_BYTES] += m->m_pkthdr.len;
                PFKEY_STAT_PUTREF();
        }

        len = m->m_pkthdr.len;
        if (len < sizeof(struct sadb_msg)) {
                PFKEY_STATINC(PFKEY_STAT_OUT_TOOSHORT);
                error = EINVAL;
                goto end;
        }

        if (m->m_len < sizeof(struct sadb_msg)) {
                if ((m = m_pullup(m, sizeof(struct sadb_msg))) == 0) {
                        PFKEY_STATINC(PFKEY_STAT_OUT_NOMEM);
                        error = ENOBUFS;
                        goto end;
                }
        }

        KASSERT((m->m_flags & M_PKTHDR) != 0);

        if (KEYDEBUG_ON(KEYDEBUG_KEY_DUMP))
                kdebug_mbuf(__func__, m);

        msg = mtod(m, struct sadb_msg *);
        PFKEY_STATINC(PFKEY_STAT_OUT_MSGTYPE + msg->sadb_msg_type);
        if (len != PFKEY_UNUNIT64(msg->sadb_msg_len)) {
                PFKEY_STATINC(PFKEY_STAT_OUT_INVLEN);
                error = EINVAL;
                goto end;
        }

        /*XXX giant lock*/
        s = splsoftnet();
        error = key_parse(m, so);
        m = NULL;
        splx(s);
end:
        if (m)
                m_freem(m);
        return error;
}

/*
 * send message to the socket.
 */
static int
key_sendup0(
    struct rawcb *rp,
    struct mbuf *m,
    int promisc,
    int sbprio
)
{
        int error;
        int ok;

        if (promisc) {
                struct sadb_msg *pmsg;

                M_PREPEND(m, sizeof(struct sadb_msg), M_DONTWAIT);
                if (m && m->m_len < sizeof(struct sadb_msg))
                        m = m_pullup(m, sizeof(struct sadb_msg));
                if (!m) {
                        PFKEY_STATINC(PFKEY_STAT_IN_NOMEM);
                        return ENOBUFS;
                }
                m->m_pkthdr.len += sizeof(*pmsg);

                pmsg = mtod(m, struct sadb_msg *);
                memset(pmsg, 0, sizeof(*pmsg));
                pmsg->sadb_msg_version = PF_KEY_V2;
                pmsg->sadb_msg_type = SADB_X_PROMISC;
                pmsg->sadb_msg_len = PFKEY_UNIT64(m->m_pkthdr.len);
                /* pid and seq? */

                PFKEY_STATINC(PFKEY_STAT_IN_MSGTYPE + pmsg->sadb_msg_type);
        }

        if (sbprio == 0)
                ok = sbappendaddr(&rp->rcb_socket->so_rcv,
                               (struct sockaddr *)&key_src, m, NULL);
        else
                ok = sbappendaddrchain(&rp->rcb_socket->so_rcv,
                               (struct sockaddr *)&key_src, m, sbprio);

        if (!ok) {
                log(LOG_WARNING,
                    "%s: couldn't send PF_KEY message to the socket\n",
                    __func__);
                PFKEY_STATINC(PFKEY_STAT_IN_NOMEM);
                m_freem(m);
                /* Don't call soroverflow because we're returning this
                 * error directly to the sender. */
                rp->rcb_socket->so_rcv.sb_overflowed++;
                error = ENOBUFS;
        } else {
                sorwakeup(rp->rcb_socket);
                error = 0;
        }
        return error;
}

/* so can be NULL if target != KEY_SENDUP_ONE */
static int
_key_sendup_mbuf(struct socket *so, struct mbuf *m,
                int target/*, sbprio */)
{
        struct mbuf *n;
        struct keycb *kp;
        int sendup;
        struct rawcb *rp;
        int error = 0;
        int sbprio = 0; /* XXX should be a parameter */

        KASSERT(m != NULL);
        KASSERT(so != NULL || target != KEY_SENDUP_ONE);

        /*
         * RFC 2367 says ACQUIRE and other kernel-generated messages
         * are special. We treat all KEY_SENDUP_REGISTERED messages
         * as special, delivering them to all registered sockets
         * even if the socket is at or above its so->so_rcv.sb_max limits.
         * The only constraint is that the  so_rcv data fall below
         * key_registered_sb_max.
         * Doing that check here avoids reworking every key_sendup_mbuf()
         * in the short term. . The rework will be done after a technical
         * conensus that this approach is appropriate.
          */
        if (target == KEY_SENDUP_REGISTERED) {
                sbprio = SB_PRIO_BESTEFFORT;
        }

        {
                uint64_t *ps = PFKEY_STAT_GETREF();
                ps[PFKEY_STAT_IN_TOTAL]++;
                ps[PFKEY_STAT_IN_BYTES] += m->m_pkthdr.len;
                PFKEY_STAT_PUTREF();
        }
        if (m->m_len < sizeof(struct sadb_msg)) {
#if 1
                m = m_pullup(m, sizeof(struct sadb_msg));
                if (m == NULL) {
                        PFKEY_STATINC(PFKEY_STAT_IN_NOMEM);
                        return ENOBUFS;
                }
#else
                /* don't bother pulling it up just for stats */
#endif
        }
        if (m->m_len >= sizeof(struct sadb_msg)) {
                struct sadb_msg *msg;
                msg = mtod(m, struct sadb_msg *);
                PFKEY_STATINC(PFKEY_STAT_IN_MSGTYPE + msg->sadb_msg_type);
        }

        LIST_FOREACH(rp, &key_rawcb, rcb_list)
        {
                struct socket * kso = rp->rcb_socket;
                if (rp->rcb_proto.sp_family != PF_KEY)
                        continue;
                if (rp->rcb_proto.sp_protocol
                 && rp->rcb_proto.sp_protocol != PF_KEY_V2) {
                        continue;
                }

                kp = (struct keycb *)rp;

                /*
                 * If you are in promiscuous mode, and when you get broadcasted
                 * reply, you'll get two PF_KEY messages.
                 * (based on pf_key@inner.net message on 14 Oct 1998)
                 */
                if (((struct keycb *)rp)->kp_promisc) {
                        if ((n = m_copym(m, 0, (int)M_COPYALL, M_DONTWAIT)) != NULL) {
                                (void)key_sendup0(rp, n, 1, 0);
                                n = NULL;
                        }
                }

                /* the exact target will be processed later */
                if (so && sotorawcb(so) == rp)
                        continue;

                sendup = 0;
                switch (target) {
                case KEY_SENDUP_ONE:
                        /* the statement has no effect */
                        if (so && sotorawcb(so) == rp)
                                sendup++;
                        break;
                case KEY_SENDUP_ALL:
                        sendup++;
                        break;
                case KEY_SENDUP_REGISTERED:
                        if (kp->kp_registered) {
                                if (kso->so_rcv.sb_cc <= key_registered_sb_max)
                                        sendup++;
                                  else
                                          printf("keysock: "
                                               "registered sendup dropped, "
                                               "sb_cc %ld max %d\n",
                                               kso->so_rcv.sb_cc,
                                               key_registered_sb_max);
                        }
                        break;
                }
                PFKEY_STATINC(PFKEY_STAT_IN_MSGTARGET + target);

                if (!sendup)
                        continue;

                if ((n = m_copym(m, 0, (int)M_COPYALL, M_DONTWAIT)) == NULL) {
                        m_freem(m);
                        PFKEY_STATINC(PFKEY_STAT_IN_NOMEM);
                        return ENOBUFS;
                }

                if ((error = key_sendup0(rp, n, 0, 0)) != 0) {
                        m_freem(m);
                        return error;
                }

                n = NULL;
        }

        /* The 'later' time for processing the exact target has arrived */
        if (so) {
                error = key_sendup0(sotorawcb(so), m, 0, sbprio);
                m = NULL;
        } else {
                error = 0;
                m_freem(m);
        }
        return error;
}

int
key_sendup_mbuf(struct socket *so, struct mbuf *m,
                int target/*, sbprio */)
{
        int error;

        if (so == NULL)
                mutex_enter(key_so_mtx);
        else
                KASSERT(solocked(so));

        error = _key_sendup_mbuf(so, m, target);

        if (so == NULL)
                mutex_exit(key_so_mtx);
        return error;
}

static int
key_attach(struct socket *so, int proto)
{
        struct keycb *kp;
        int s, error;

        KASSERT(sotorawcb(so) == NULL);
        kp = kmem_zalloc(sizeof(*kp), KM_SLEEP);
        kp->kp_raw.rcb_len = sizeof(*kp);
        so->so_pcb = kp;

        s = splsoftnet();

        if (so->so_lock != key_so_mtx) {
                KASSERT(so->so_lock == NULL);
                mutex_obj_hold(key_so_mtx);
                so->so_lock = key_so_mtx;
                solock(so);
        }

        error = raw_attach(so, proto, &key_rawcb);
        if (error) {
                PFKEY_STATINC(PFKEY_STAT_SOCKERR);
                kmem_free(kp, sizeof(*kp));
                so->so_pcb = NULL;
                goto out;
        }

        kp->kp_promisc = kp->kp_registered = 0;

        if (kp->kp_raw.rcb_proto.sp_protocol == PF_KEY) /* XXX: AF_KEY */
                key_cb.key_count++;
        key_cb.any_count++;
        kp->kp_raw.rcb_laddr = &key_src;
        kp->kp_raw.rcb_faddr = &key_dst;
        soisconnected(so);
        so->so_options |= SO_USELOOPBACK;
out:
        KASSERT(solocked(so));
        splx(s);
        return error;
}

static void
key_detach(struct socket *so)
{
        struct keycb *kp = (struct keycb *)sotorawcb(so);
        int s;

        KASSERT(!cpu_softintr_p());
        KASSERT(solocked(so));
        KASSERT(kp != NULL);

        s = splsoftnet();
        if (kp->kp_raw.rcb_proto.sp_protocol == PF_KEY) /* XXX: AF_KEY */
                key_cb.key_count--;
        key_cb.any_count--;
        key_freereg(so);
        raw_detach(so);
        splx(s);
}

static int
key_accept(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));

        panic("%s: unsupported", __func__);

        return EOPNOTSUPP;
}

static int
key_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
key_listen(struct socket *so, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
key_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
key_connect2(struct socket *so, struct socket *so2)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
key_disconnect(struct socket *so)
{
        struct rawcb *rp = sotorawcb(so);
        int s;
        
        KASSERT(solocked(so));
        KASSERT(rp != NULL);

        s = splsoftnet();
        soisdisconnected(so);
        raw_disconnect(rp);
        splx(s);
 
        return 0;                               
}

static int
key_shutdown(struct socket *so)
{
        int s;

        KASSERT(solocked(so));

        /*
         * Mark the connection as being incapable of further input.
         */
        s = splsoftnet();
        socantsendmore(so);
        splx(s);

        return 0;
}

static int
key_abort(struct socket *so)
{
        KASSERT(solocked(so));

        panic("%s: unsupported", __func__);

        return EOPNOTSUPP;
}

static int
key_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
{
        return EOPNOTSUPP;
}

static int
key_stat(struct socket *so, struct stat *ub)
{
        KASSERT(solocked(so));

        return 0;
}

static int
key_peeraddr(struct socket *so, struct sockaddr *nam)
{
        struct rawcb *rp = sotorawcb(so);

        KASSERT(solocked(so));
        KASSERT(rp != NULL);
        KASSERT(nam != NULL);

        if (rp->rcb_faddr == NULL)
                return ENOTCONN;

        raw_setpeeraddr(rp, nam);
        return 0;
}

static int
key_sockaddr(struct socket *so, struct sockaddr *nam)
{
        struct rawcb *rp = sotorawcb(so);

        KASSERT(solocked(so));
        KASSERT(rp != NULL);
        KASSERT(nam != NULL);

        if (rp->rcb_faddr == NULL)
                return ENOTCONN;

        raw_setsockaddr(rp, nam);
        return 0;
}

static int
key_rcvd(struct socket *so, int flags, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
key_recvoob(struct socket *so, struct mbuf *m, int flags)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
key_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
    struct mbuf *control, struct lwp *l)
{
        int error = 0;
        int s;

        KASSERT(solocked(so));
        KASSERT(so->so_proto == &keysw[0]);

        s = splsoftnet();
        error = raw_send(so, m, nam, control, l, &key_output);
        splx(s);

        return error;
}

static int
key_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
{
        KASSERT(solocked(so));

        m_freem(m);
        m_freem(control);

        return EOPNOTSUPP;
}

static int
key_purgeif(struct socket *so, struct ifnet *ifa)
{

        panic("%s: unsupported", __func__);

        return EOPNOTSUPP;
}

/*
 * Definitions of protocols supported in the KEY domain.
 */

DOMAIN_DEFINE(keydomain);

PR_WRAP_USRREQS(key)
#define        key_attach        key_attach_wrapper
#define        key_detach        key_detach_wrapper
#define        key_accept        key_accept_wrapper
#define        key_bind        key_bind_wrapper
#define        key_listen        key_listen_wrapper
#define        key_connect        key_connect_wrapper
#define        key_connect2        key_connect2_wrapper
#define        key_disconnect        key_disconnect_wrapper
#define        key_shutdown        key_shutdown_wrapper
#define        key_abort        key_abort_wrapper
#define        key_ioctl        key_ioctl_wrapper
#define        key_stat        key_stat_wrapper
#define        key_peeraddr        key_peeraddr_wrapper
#define        key_sockaddr        key_sockaddr_wrapper
#define        key_rcvd        key_rcvd_wrapper
#define        key_recvoob        key_recvoob_wrapper
#define        key_send        key_send_wrapper
#define        key_sendoob        key_sendoob_wrapper
#define        key_purgeif        key_purgeif_wrapper

static const struct pr_usrreqs key_usrreqs = {
        .pr_attach        = key_attach,
        .pr_detach        = key_detach,
        .pr_accept        = key_accept,
        .pr_bind        = key_bind,
        .pr_listen        = key_listen,
        .pr_connect        = key_connect,
        .pr_connect2        = key_connect2,
        .pr_disconnect        = key_disconnect,
        .pr_shutdown        = key_shutdown,
        .pr_abort        = key_abort,
        .pr_ioctl        = key_ioctl,
        .pr_stat        = key_stat,
        .pr_peeraddr        = key_peeraddr,
        .pr_sockaddr        = key_sockaddr,
        .pr_rcvd        = key_rcvd,
        .pr_recvoob        = key_recvoob,
        .pr_send        = key_send,
        .pr_sendoob        = key_sendoob,
        .pr_purgeif        = key_purgeif,
};

static const struct protosw keysw[] = {
    {
        .pr_type = SOCK_RAW,
        .pr_domain = &keydomain,
        .pr_protocol = PF_KEY_V2,
        .pr_flags = PR_ATOMIC|PR_ADDR,
        .pr_ctlinput = raw_ctlinput,
        .pr_usrreqs = &key_usrreqs,
        .pr_init = key_pr_init,
    }
};

struct domain keydomain = {
    .dom_family = PF_KEY,
    .dom_name = "key",
    .dom_init = key_init,
    .dom_protosw = keysw,
    .dom_protoswNPROTOSW = &keysw[__arraycount(keysw)],
};




































































































































































































































































































































































































































   24 


   24 










   10 


















































































































































































































































































































































































































































































































































    8 




    8 










    6 
    6 

    6 

    6 

    6 

    6 
















































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
/* $NetBSD: kern_pmf.c,v 1.48 2022/03/28 12:38:59 riastradh Exp $ */

/*-
 * Copyright (c) 2007 Jared D. McNeill <jmcneill@invisible.ca>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_pmf.c,v 1.48 2022/03/28 12:38:59 riastradh Exp $");

#include <sys/types.h>
#include <sys/param.h>
#include <sys/kmem.h>
#include <sys/buf.h>
#include <sys/callout.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/device_impl.h>
#include <sys/pmf.h>
#include <sys/queue.h>
#include <sys/sched.h>
#include <sys/workqueue.h>
#include <prop/proplib.h>
#include <sys/condvar.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/reboot.h>        /* for RB_NOSYNC */
#include <sys/sched.h>
#include <sys/sysctl.h>
#include <sys/vfs_syscalls.h>

/* XXX ugly special case, but for now the only client */
#include "wsdisplay.h"
#if NWSDISPLAY > 0
#include <dev/wscons/wsdisplayvar.h>
#endif

#define PMF_DEBUG

#ifdef PMF_DEBUG
int  pmf_debug_event;
int  pmf_debug_suspend;
int  pmf_debug_suspensor;
int  pmf_debug_idle;
int  pmf_debug_transition;

#define        PMF_SUSPENSOR_PRINTF(x)                if (pmf_debug_suspensor) printf x
#define        PMF_SUSPEND_PRINTF(x)                if (pmf_debug_suspend) printf x
#define        PMF_EVENT_PRINTF(x)                if (pmf_debug_event) printf x
#define        PMF_IDLE_PRINTF(x)                if (pmf_debug_idle) printf x
#define        PMF_TRANSITION_PRINTF(x)        if (pmf_debug_transition) printf x
#define        PMF_TRANSITION_PRINTF2(y,x)        if (pmf_debug_transition>y) printf x
#else
#define        PMF_SUSPENSOR_PRINTF(x)                do { } while (0)
#define        PMF_SUSPEND_PRINTF(x)                do { } while (0)
#define        PMF_EVENT_PRINTF(x)                do { } while (0)
#define        PMF_IDLE_PRINTF(x)                do { } while (0)
#define        PMF_TRANSITION_PRINTF(x)        do { } while (0)
#define        PMF_TRANSITION_PRINTF2(y,x)        do { } while (0)
#endif

static prop_dictionary_t pmf_platform = NULL;
static struct workqueue *pmf_event_workqueue;
static struct workqueue *pmf_suspend_workqueue;

typedef struct pmf_event_handler {
        TAILQ_ENTRY(pmf_event_handler) pmf_link;
        pmf_generic_event_t pmf_event;
        void (*pmf_handler)(device_t);
        device_t pmf_device;
        bool pmf_global;
} pmf_event_handler_t;

static TAILQ_HEAD(, pmf_event_handler) pmf_all_events =
    TAILQ_HEAD_INITIALIZER(pmf_all_events);

typedef struct pmf_event_workitem {
        struct work                                pew_work;
        pmf_generic_event_t                        pew_event;
        device_t                                pew_device;
} pmf_event_workitem_t;

typedef struct pmf_suspend_workitem {
        struct work        psw_work;
        device_t        psw_dev;
        pmf_qual_t        psw_qual;
} pmf_suspend_workitem_t;

static struct pool pew_pl;

static pmf_event_workitem_t *pmf_event_workitem_get(void);
static void pmf_event_workitem_put(pmf_event_workitem_t *);

bool pmf_device_resume_locked(device_t, const pmf_qual_t *);
bool pmf_device_suspend_locked(device_t, const pmf_qual_t *);
static bool device_pmf_any_suspensor(device_t, devact_level_t);

static bool
complete_suspension(device_t dev, const device_suspensor_t **susp,
    const pmf_qual_t *pqp)
{
        int i;
        pmf_qual_t pq;
        const device_suspensor_t *ds;

        ds = pmf_qual_suspension(pqp);
        KASSERT(ds->ds_delegator != NULL);

        pq = *pqp;
        pq.pq_suspensor = ds->ds_delegator;

        for (i = 0; i < DEVICE_SUSPENSORS_MAX; i++) {
                if (susp[i] != ds)
                        continue;
                if (!pmf_device_suspend(dev, &pq))
                        return false;
        }
        return true;
}

static void
pmf_suspend_worker(struct work *wk, void *dummy)
{
        pmf_suspend_workitem_t *psw;
        deviter_t di;
        device_t dev;

        psw = (void *)wk;
        KASSERT(wk == &psw->psw_work);
        KASSERT(psw != NULL);

        for (dev = deviter_first(&di, 0); dev != NULL;
             dev = deviter_next(&di)) {
                if (dev == psw->psw_dev && device_pmf_lock(dev))
                        break;
        }
        deviter_release(&di);

        if (dev == NULL)
                return;

        switch (pmf_qual_depth(&psw->psw_qual)) {
        case DEVACT_LEVEL_FULL:
                if (!complete_suspension(dev, dev->dv_class_suspensors,
                    &psw->psw_qual))
                        break;
                /*FALLTHROUGH*/
        case DEVACT_LEVEL_DRIVER:
                if (!complete_suspension(dev, dev->dv_driver_suspensors,
                    &psw->psw_qual))
                        break;
                /*FALLTHROUGH*/
        case DEVACT_LEVEL_BUS:
                if (!complete_suspension(dev, dev->dv_bus_suspensors,
                    &psw->psw_qual))
                        break;
        }
        device_pmf_unlock(dev);
        kmem_free(psw, sizeof(*psw));
}

static void
pmf_event_worker(struct work *wk, void *dummy)
{
        pmf_event_workitem_t *pew;
        pmf_event_handler_t *event;

        pew = (void *)wk;
        KASSERT(wk == &pew->pew_work);
        KASSERT(pew != NULL);
        
        TAILQ_FOREACH(event, &pmf_all_events, pmf_link) {
                if (event->pmf_event != pew->pew_event)
                        continue;
                if (event->pmf_device == pew->pew_device || event->pmf_global)
                        (*event->pmf_handler)(event->pmf_device);
        }

        pmf_event_workitem_put(pew);
}

static bool
pmf_check_system_drivers(void)
{
        device_t curdev;
        bool unsupported_devs;
        deviter_t di;

        unsupported_devs = false;
        for (curdev = deviter_first(&di, 0); curdev != NULL;
             curdev = deviter_next(&di)) {
                if (device_pmf_is_registered(curdev))
                        continue;
                if (!unsupported_devs)
                        printf("Devices without power management support:");
                printf(" %s", device_xname(curdev));
                unsupported_devs = true;
        }
        deviter_release(&di);
        if (unsupported_devs) {
                printf("\n");
                return false;
        }
        return true;
}

bool
pmf_system_bus_resume(const pmf_qual_t *qual)
{
        bool rv;
        device_t curdev;
        deviter_t di;

        aprint_debug("Powering devices:");
        /* D0 handlers are run in order */
        rv = true;
        for (curdev = deviter_first(&di, DEVITER_F_ROOT_FIRST); curdev != NULL;
             curdev = deviter_next(&di)) {
                if (!device_pmf_is_registered(curdev))
                        continue;
                if (device_is_active(curdev) ||
                    !device_is_enabled(curdev))
                        continue;

                aprint_debug(" %s", device_xname(curdev));

                if (!device_pmf_bus_resume(curdev, qual)) {
                        rv = false;
                        aprint_debug("(failed)");
                }
        }
        deviter_release(&di);
        aprint_debug("\n");

        return rv;
}

bool
pmf_system_resume(const pmf_qual_t *qual)
{
        bool rv;
        device_t curdev, parent;
        deviter_t di;

        if (!pmf_check_system_drivers())
                return false;

        aprint_debug("Resuming devices:");
        /* D0 handlers are run in order */
        rv = true;
        for (curdev = deviter_first(&di, DEVITER_F_ROOT_FIRST); curdev != NULL;
             curdev = deviter_next(&di)) {
                if (device_is_active(curdev) ||
                    !device_is_enabled(curdev))
                        continue;
                parent = device_parent(curdev);
                if (parent != NULL &&
                    !device_is_active(parent))
                        continue;

                aprint_debug(" %s", device_xname(curdev));

                if (!pmf_device_resume(curdev, qual)) {
                        rv = false;
                        aprint_debug("(failed)");
                }
        }
        deviter_release(&di);
        aprint_debug(".\n");

        KERNEL_UNLOCK_ONE(0);
#if NWSDISPLAY > 0
        if (rv)
                wsdisplay_handlex(1);
#endif
        return rv;
}

bool
pmf_system_suspend(const pmf_qual_t *qual)
{
        device_t curdev;
        deviter_t di;

        if (!pmf_check_system_drivers())
                return false;
#if NWSDISPLAY > 0
        if (wsdisplay_handlex(0))
                return false;
#endif
        KERNEL_LOCK(1, NULL);

        /*
         * Flush buffers only if the shutdown didn't do so
         * already and if there was no panic.
         */
        if (doing_shutdown == 0 && panicstr == NULL) {
                printf("Flushing disk caches: ");
                do_sys_sync(&lwp0);
                if (vfs_syncwait() != 0)
                        printf("giving up\n");
                else
                        printf("done\n");
        }

        aprint_debug("Suspending devices:");

        for (curdev = deviter_first(&di, DEVITER_F_LEAVES_FIRST);
             curdev != NULL;
             curdev = deviter_next(&di)) {
                if (!device_is_active(curdev))
                        continue;

                aprint_debug(" %s", device_xname(curdev));

                /* XXX joerg check return value and abort suspend */
                if (!pmf_device_suspend(curdev, qual))
                        aprint_debug("(failed)");
        }
        deviter_release(&di);

        aprint_debug(".\n");

        return true;
}

static bool
shutdown_all(int how)
{
        static struct shutdown_state s;
        device_t curdev;
        bool progress = false;

        KERNEL_LOCK(1, NULL);
        for (curdev = shutdown_first(&s); curdev != NULL;
             curdev = shutdown_next(&s)) {
                aprint_debug(" shutting down %s, ", device_xname(curdev));
                if (!device_pmf_is_registered(curdev))
                        aprint_debug("skipped.");
#if 0 /* needed? */
                else if (!device_pmf_class_shutdown(curdev, how))
                        aprint_debug("failed.");
#endif
                else if (!device_pmf_driver_shutdown(curdev, how))
                        aprint_debug("failed.");
                else if (!device_pmf_bus_shutdown(curdev, how))
                        aprint_debug("failed.");
                else {
                        progress = true;
                        aprint_debug("success.");
                }
        }
        KERNEL_UNLOCK_ONE(NULL);
        return progress;
}

void
pmf_system_shutdown(int how)
{

        if (panicstr != NULL)
                return;

        aprint_debug("Shutting down devices:");
        shutdown_all(how);
}

bool
pmf_set_platform(const char *key, const char *value)
{
        if (pmf_platform == NULL)
                pmf_platform = prop_dictionary_create();
        if (pmf_platform == NULL)
                return false;

        return prop_dictionary_set_string(pmf_platform, key, value);
}

const char *
pmf_get_platform(const char *key)
{
        const char *value;

        if (pmf_platform == NULL)
                return NULL;

        if (!prop_dictionary_get_string(pmf_platform, key, &value))
                return NULL;

        return value;
}

bool
pmf_device_register1(device_t dev,
    bool (*suspend)(device_t, const pmf_qual_t *),
    bool (*resume)(device_t, const pmf_qual_t *),
    bool (*shutdown)(device_t, int))
{
        if (!device_pmf_driver_register(dev, suspend, resume, shutdown))
                return false;

        if (!device_pmf_driver_child_register(dev)) {
                device_pmf_driver_deregister(dev);
                return false;
        }

        return true;
}

void
pmf_device_deregister(device_t dev)
{
        device_pmf_class_deregister(dev);
        device_pmf_bus_deregister(dev);
        device_pmf_driver_deregister(dev);
}

static const device_suspensor_t _device_suspensor_drvctl = {
          .ds_delegator = NULL
        , .ds_name = "drvctl"
};

static const device_suspensor_t _device_suspensor_self = {
          .ds_delegator = NULL
        , .ds_name = "self"
};

#if 0
static const device_suspensor_t _device_suspensor_self_delegate = {
          .ds_delegator = &_device_suspensor_self
        , .ds_name = "self delegate"
};
#endif

static const device_suspensor_t _device_suspensor_system = {
          .ds_delegator = NULL
        , .ds_name = "system"
};

const device_suspensor_t
    * const device_suspensor_self = &_device_suspensor_self,
#if 0
    * const device_suspensor_self_delegate = &_device_suspensor_self_delegate,
#endif
    * const device_suspensor_system = &_device_suspensor_system,
    * const device_suspensor_drvctl = &_device_suspensor_drvctl;

static const pmf_qual_t _pmf_qual_system = {
          .pq_actlvl = DEVACT_LEVEL_FULL
        , .pq_suspensor = &_device_suspensor_system
};

static const pmf_qual_t _pmf_qual_drvctl = {
          .pq_actlvl = DEVACT_LEVEL_FULL
        , .pq_suspensor = &_device_suspensor_drvctl
};

static const pmf_qual_t _pmf_qual_self = {
          .pq_actlvl = DEVACT_LEVEL_DRIVER
        , .pq_suspensor = &_device_suspensor_self
};

const pmf_qual_t
    * const PMF_Q_DRVCTL = &_pmf_qual_drvctl,
    * const PMF_Q_NONE = &_pmf_qual_system,
    * const PMF_Q_SELF = &_pmf_qual_self;

static bool
device_suspensor_delegates_to(const device_suspensor_t *ds,
    const device_suspensor_t *delegate)
{
        const device_suspensor_t *iter;

        for (iter = delegate->ds_delegator; iter != NULL;
             iter = iter->ds_delegator) {
                if (ds == iter)
                        return true;
        }
        return false;
}

static bool
add_suspensor(device_t dev, const char *kind, const device_suspensor_t **susp,
    const device_suspensor_t *ds)
{
        int i;

        for (i = 0; i < DEVICE_SUSPENSORS_MAX; i++) {
                if (susp[i] == NULL)
                        continue;
                if (ds == susp[i]) {
                        PMF_SUSPENSOR_PRINTF((
                            "%s: %s-suspended by %s (delegator %s) already\n",
                            device_xname(dev), kind,
                            susp[i]->ds_name,
                            (susp[i]->ds_delegator != NULL) ?
                            susp[i]->ds_delegator->ds_name : "<none>"));
                        return true;
                }
                if (device_suspensor_delegates_to(ds, susp[i])) {
                        PMF_SUSPENSOR_PRINTF((
                            "%s: %s assumes %s-suspension by %s "
                            "(delegator %s)\n",
                            device_xname(dev), ds->ds_name, kind,
                            susp[i]->ds_name,
                            (susp[i]->ds_delegator != NULL) ?
                            susp[i]->ds_delegator->ds_name : "<none>"));
                        susp[i] = ds;
                        return true;
                }
        }
        for (i = 0; i < DEVICE_SUSPENSORS_MAX; i++) {
                if (susp[i] == NULL) {
                        susp[i] = ds;
                        PMF_SUSPENSOR_PRINTF((
                            "%s: newly %s-suspended by %s (delegator %s)\n",
                            device_xname(dev), kind,
                            susp[i]->ds_name,
                            (susp[i]->ds_delegator != NULL) ?
                            susp[i]->ds_delegator->ds_name : "<none>"));
                        return true;
                }
        }
        return false;
}

static bool
device_pmf_add_suspensor(device_t dev, const pmf_qual_t *pq)
{
        const device_suspensor_t *ds;

        KASSERT(pq != NULL);

        ds = pmf_qual_suspension(pq);

        KASSERT(ds != NULL);

        if (!add_suspensor(dev, "class", dev->dv_class_suspensors, ds))
                return false;
        if (!add_suspensor(dev, "driver", dev->dv_driver_suspensors, ds))
                return false;
        if (!add_suspensor(dev, "bus", dev->dv_bus_suspensors, ds))
                return false;
        return true;
}

#if 0
static bool
device_pmf_has_suspension(device_t dev, const device_suspensor_t *ds)
{
        int i;

        for (i = 0; i < DEVICE_SUSPENSORS_MAX; i++) {
                if (dev->dv_suspensions[i] == ds)
                        return true;
                if (device_suspensor_delegates_to(dev->dv_suspensions[i], ds))
                        return true;
        }
        return false;
}
#endif

static bool
any_suspensor(device_t dev, const char *kind, const device_suspensor_t **susp)
{
        int i;
        bool suspended = false;

        for (i = 0; i < DEVICE_SUSPENSORS_MAX; i++) {
                if (susp[i] != NULL) {
                        PMF_SUSPENSOR_PRINTF(("%s: %s is suspended by %s "
                            "(delegator %s)\n",
                            device_xname(dev), kind,
                            susp[i]->ds_name,
                            (susp[i]->ds_delegator != NULL) ?
                            susp[i]->ds_delegator->ds_name : "<none>"));
                        suspended = true;
                }
        }
        return suspended;
}

static bool
device_pmf_any_suspensor(device_t dev, devact_level_t depth)
{
        switch (depth) {
        case DEVACT_LEVEL_FULL:
                if (any_suspensor(dev, "class", dev->dv_class_suspensors))
                        return true;
                /*FALLTHROUGH*/
        case DEVACT_LEVEL_DRIVER:
                if (any_suspensor(dev, "driver", dev->dv_driver_suspensors))
                        return true;
                /*FALLTHROUGH*/
        case DEVACT_LEVEL_BUS:
                if (any_suspensor(dev, "bus", dev->dv_bus_suspensors))
                        return true;
        }
        return false;
}

static bool
remove_suspensor(device_t dev, const char *kind,
    const device_suspensor_t **susp, const device_suspensor_t *ds)
{
        int i;

        for (i = 0; i < DEVICE_SUSPENSORS_MAX; i++) {
                if (susp[i] == NULL)
                        continue;
                if (ds == susp[i] ||
                    device_suspensor_delegates_to(ds, susp[i])) {
                        PMF_SUSPENSOR_PRINTF(("%s: %s suspension %s "
                            "(delegator %s) removed by %s\n",
                            device_xname(dev), kind,
                            susp[i]->ds_name,
                            (susp[i]->ds_delegator != NULL)
                                ?  susp[i]->ds_delegator->ds_name
                                : "<none>",
                            ds->ds_name));
                        susp[i] = NULL;
                        return true;
                }
        }
        return false;
}

static bool
device_pmf_remove_suspensor(device_t dev, const pmf_qual_t *pq)
{
        const device_suspensor_t *ds;

        KASSERT(pq != NULL);

        ds = pmf_qual_suspension(pq);

        KASSERT(ds != NULL);

        if (!remove_suspensor(dev, "class", dev->dv_class_suspensors, ds))
                return false;
        if (!remove_suspensor(dev, "driver", dev->dv_driver_suspensors, ds))
                return false;
        if (!remove_suspensor(dev, "bus", dev->dv_bus_suspensors, ds))
                return false;

        return true;
}

void
pmf_self_suspensor_init(device_t dev, device_suspensor_t *ds,
    pmf_qual_t *pq)
{
        ds->ds_delegator = device_suspensor_self;
        snprintf(ds->ds_name, sizeof(ds->ds_name), "%s-self",
            device_xname(dev));
        pq->pq_actlvl = DEVACT_LEVEL_DRIVER;
        pq->pq_suspensor = ds;
}

bool
pmf_device_suspend(device_t dev, const pmf_qual_t *qual)
{
        bool rc;

        PMF_TRANSITION_PRINTF(("%s: suspend enter\n", device_xname(dev)));
        if (!device_pmf_is_registered(dev))
                return false;

        if (!device_pmf_lock(dev))
                return false;

        rc = pmf_device_suspend_locked(dev, qual);

        device_pmf_unlock(dev);

        PMF_TRANSITION_PRINTF(("%s: suspend exit\n", device_xname(dev)));
        return rc;
}

bool
pmf_device_suspend_locked(device_t dev, const pmf_qual_t *qual)
{
        if (!device_pmf_add_suspensor(dev, qual))
                return false;

        PMF_TRANSITION_PRINTF2(1, ("%s: class suspend\n", device_xname(dev)));
        if (!device_pmf_class_suspend(dev, qual))
                return false;

        PMF_TRANSITION_PRINTF2(1, ("%s: driver suspend\n", device_xname(dev)));
        if (!device_pmf_driver_suspend(dev, qual))
                return false;

        PMF_TRANSITION_PRINTF2(1, ("%s: bus suspend\n", device_xname(dev)));
        if (!device_pmf_bus_suspend(dev, qual))
                return false;

        return true;
}

bool
pmf_device_resume(device_t dev, const pmf_qual_t *qual)
{
        bool rc;

        PMF_TRANSITION_PRINTF(("%s: resume enter\n", device_xname(dev)));
        if (!device_pmf_is_registered(dev))
                return false;

        if (!device_pmf_lock(dev))
                return false;

        rc = pmf_device_resume_locked(dev, qual);

        device_pmf_unlock(dev);

        PMF_TRANSITION_PRINTF(("%s: resume exit\n", device_xname(dev)));
        return rc;
}

bool
pmf_device_resume_locked(device_t dev, const pmf_qual_t *qual)
{
        device_pmf_remove_suspensor(dev, qual);

        if (device_pmf_any_suspensor(dev, DEVACT_LEVEL_FULL))
                return true;

        PMF_TRANSITION_PRINTF2(1, ("%s: bus resume\n", device_xname(dev)));
        if (!device_pmf_bus_resume(dev, qual))
                return false;

        PMF_TRANSITION_PRINTF2(1, ("%s: driver resume\n", device_xname(dev)));
        if (!device_pmf_driver_resume(dev, qual))
                return false;

        PMF_TRANSITION_PRINTF2(1, ("%s: class resume\n", device_xname(dev)));
        if (!device_pmf_class_resume(dev, qual))
                return false;

        return true;
}

bool
pmf_device_recursive_suspend(device_t dv, const pmf_qual_t *qual)
{
        bool rv = true;
        device_t curdev;
        deviter_t di;
        pmf_qual_t pq;

        pmf_qual_recursive_copy(&pq, qual);

        for (curdev = deviter_first(&di, 0); curdev != NULL;
             curdev = deviter_next(&di)) {
                if (device_parent(curdev) != dv)
                        continue;
                if (!pmf_device_recursive_suspend(curdev, &pq)) {
                        rv = false;
                        break;
                }
        }
        deviter_release(&di);

        return rv && pmf_device_suspend(dv, qual);
}

void
pmf_qual_recursive_copy(pmf_qual_t *dst, const pmf_qual_t *src)
{
        *dst = *src;
        dst->pq_actlvl = DEVACT_LEVEL_FULL;
}

bool
pmf_device_recursive_resume(device_t dv, const pmf_qual_t *qual)
{
        device_t parent;
        pmf_qual_t pq;

        if (device_is_active(dv))
                return true;

        pmf_qual_recursive_copy(&pq, qual);

        parent = device_parent(dv);
        if (parent != NULL) {
                if (!pmf_device_recursive_resume(parent, &pq))
                        return false;
        }

        return pmf_device_resume(dv, qual);
}

bool
pmf_device_descendants_release(device_t dv, const pmf_qual_t *qual)
{
        bool rv = true;
        device_t curdev;
        deviter_t di;

        for (curdev = deviter_first(&di, 0); curdev != NULL;
             curdev = deviter_next(&di)) {
                if (device_parent(curdev) != dv)
                        continue;
                device_pmf_remove_suspensor(curdev, qual);
                if (!pmf_device_descendants_release(curdev, qual)) {
                        rv = false;
                        break;
                }
        }
        deviter_release(&di);
        return rv;
}

bool
pmf_device_descendants_resume(device_t dv, const pmf_qual_t *qual)
{
        bool rv = true;
        device_t curdev;
        deviter_t di;

        KASSERT(pmf_qual_descend_ok(qual));

        for (curdev = deviter_first(&di, 0); curdev != NULL;
             curdev = deviter_next(&di)) {
                if (device_parent(curdev) != dv)
                        continue;
                if (!pmf_device_resume(curdev, qual) ||
                    !pmf_device_descendants_resume(curdev, qual)) {
                        rv = false;
                        break;
                }
        }
        deviter_release(&di);
        return rv;
}

bool
pmf_device_subtree_release(device_t dv, const pmf_qual_t *qual)
{
        pmf_qual_t pq;

        device_pmf_remove_suspensor(dv, qual);

        pmf_qual_recursive_copy(&pq, qual);

        return pmf_device_descendants_release(dv, &pq);
}

bool
pmf_device_subtree_resume(device_t dv, const pmf_qual_t *qual)
{
        pmf_qual_t pq;

        if (!pmf_device_subtree_release(dv, qual))
                return false;

        if (!pmf_device_recursive_resume(dv, qual))
                return false;

        pmf_qual_recursive_copy(&pq, qual);

        return pmf_device_descendants_resume(dv, &pq);
}

#include <net/if.h>

static bool
pmf_class_network_suspend(device_t dev, const pmf_qual_t *qual)
{
        struct ifnet *ifp = device_pmf_class_private(dev);
        int s;

        s = splnet();
        IFNET_LOCK(ifp);
        (*ifp->if_stop)(ifp, 0);
        IFNET_UNLOCK(ifp);
        splx(s);

        return true;
}

static bool
pmf_class_network_resume(device_t dev, const pmf_qual_t *qual)
{
        struct ifnet *ifp = device_pmf_class_private(dev);
        int s;
        bool restart = false;

        s = splnet();
        IFNET_LOCK(ifp);
        if (ifp->if_flags & IFF_UP) {
                ifp->if_flags &= ~IFF_RUNNING;
                if ((*ifp->if_init)(ifp) != 0)
                        aprint_normal_ifnet(ifp, "resume failed\n");
                restart = true;
        }
        IFNET_UNLOCK(ifp);

        if (restart)
                if_start_lock(ifp);

        splx(s);

        return true;
}

void
pmf_class_network_register(device_t dev, struct ifnet *ifp)
{
        device_pmf_class_register(dev, ifp, pmf_class_network_suspend,
            pmf_class_network_resume, NULL);
}

bool
pmf_event_inject(device_t dv, pmf_generic_event_t ev)
{
        pmf_event_workitem_t *pew;

        pew = pmf_event_workitem_get();
        if (pew == NULL) {
                PMF_EVENT_PRINTF(("%s: PMF event %d dropped (no memory)\n",
                    dv ? device_xname(dv) : "<anonymous>", ev));
                return false;
        }

        pew->pew_event = ev;
        pew->pew_device = dv;

        workqueue_enqueue(pmf_event_workqueue, &pew->pew_work, NULL);
        PMF_EVENT_PRINTF(("%s: PMF event %d injected\n",
            dv ? device_xname(dv) : "<anonymous>", ev));

        return true;
}

bool
pmf_event_register(device_t dv, pmf_generic_event_t ev,
    void (*handler)(device_t), bool global)
{
        pmf_event_handler_t *event;
        
        event = kmem_alloc(sizeof(*event), KM_SLEEP);
        event->pmf_event = ev;
        event->pmf_handler = handler;
        event->pmf_device = dv;
        event->pmf_global = global;
        TAILQ_INSERT_TAIL(&pmf_all_events, event, pmf_link);

        return true;
}

void
pmf_event_deregister(device_t dv, pmf_generic_event_t ev,
    void (*handler)(device_t), bool global)
{
        pmf_event_handler_t *event;

        TAILQ_FOREACH(event, &pmf_all_events, pmf_link) {
                if (event->pmf_event != ev)
                        continue;
                if (event->pmf_device != dv)
                        continue;
                if (event->pmf_global != global)
                        continue;
                if (event->pmf_handler != handler)
                        continue;
                TAILQ_REMOVE(&pmf_all_events, event, pmf_link);
                kmem_free(event, sizeof(*event));
                return;
        }
}

struct display_class_softc {
        TAILQ_ENTRY(display_class_softc) dc_link;
        device_t dc_dev;
};

static TAILQ_HEAD(, display_class_softc) all_displays;
static callout_t global_idle_counter;
static int idle_timeout = 30;

static void
input_idle(void *dummy)
{
        PMF_IDLE_PRINTF(("Input idle handler called\n"));
        pmf_event_inject(NULL, PMFE_DISPLAY_OFF);
}

static void
input_activity_handler(device_t dv, devactive_t type)
{
        if (!TAILQ_EMPTY(&all_displays))
                callout_schedule(&global_idle_counter, idle_timeout * hz);
}

static void
pmf_class_input_deregister(device_t dv)
{
        device_active_deregister(dv, input_activity_handler);
}

bool
pmf_class_input_register(device_t dv)
{
        if (!device_active_register(dv, input_activity_handler))
                return false;
        
        device_pmf_class_register(dv, NULL, NULL, NULL,
            pmf_class_input_deregister);

        return true;
}

static void
pmf_class_display_deregister(device_t dv)
{
        struct display_class_softc *sc = device_pmf_class_private(dv);
        int s;

        s = splsoftclock();
        TAILQ_REMOVE(&all_displays, sc, dc_link);
        if (TAILQ_EMPTY(&all_displays))
                callout_stop(&global_idle_counter);
        splx(s);

        kmem_free(sc, sizeof(*sc));
}

bool
pmf_class_display_register(device_t dv)
{
        struct display_class_softc *sc;
        int s;

        sc = kmem_alloc(sizeof(*sc), KM_SLEEP);

        s = splsoftclock();
        if (TAILQ_EMPTY(&all_displays))
                callout_schedule(&global_idle_counter, idle_timeout * hz);

        TAILQ_INSERT_HEAD(&all_displays, sc, dc_link);
        splx(s);

        device_pmf_class_register(dv, sc, NULL, NULL,
            pmf_class_display_deregister);

        return true;
}

static void
pmf_event_workitem_put(pmf_event_workitem_t *pew)
{

        KASSERT(pew != NULL);
        pool_put(&pew_pl, pew);
}

static pmf_event_workitem_t *
pmf_event_workitem_get(void)
{

        return pool_get(&pew_pl, PR_NOWAIT);
}

SYSCTL_SETUP(sysctl_pmf_setup, "PMF subtree setup")
{
        const struct sysctlnode *node = NULL;

        sysctl_createv(clog, 0, NULL, &node,
                CTLFLAG_PERMANENT,
                CTLTYPE_NODE, "pmf",
                SYSCTL_DESCR("pmf controls"),
                NULL, 0, NULL, 0,
                CTL_KERN, CTL_CREATE, CTL_EOL);

#ifdef PMF_DEBUG
        sysctl_createv(clog, 0, &node, &node,
                CTLFLAG_PERMANENT,
                CTLTYPE_NODE, "debug",
                SYSCTL_DESCR("debug levels"),
                NULL, 0, NULL, 0,
                CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                CTLTYPE_INT, "event",
                SYSCTL_DESCR("event"),
                NULL, 0,  &pmf_debug_event, sizeof(pmf_debug_event),
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                CTLTYPE_INT, "suspend",
                SYSCTL_DESCR("suspend"),
                NULL, 0,  &pmf_debug_suspend, sizeof(pmf_debug_suspend),
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                CTLTYPE_INT, "suspensor",
                SYSCTL_DESCR("suspensor"),
                NULL, 0,  &pmf_debug_suspensor, sizeof(pmf_debug_suspensor),
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                CTLTYPE_INT, "idle",
                SYSCTL_DESCR("idle"),
                NULL, 0,  &pmf_debug_idle, sizeof(pmf_debug_idle),
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                CTLTYPE_INT, "transition",
                SYSCTL_DESCR("event"),
                NULL, 0,  &pmf_debug_transition, sizeof(pmf_debug_transition),
                CTL_CREATE, CTL_EOL);
#endif
}


void
pmf_init(void)
{
        int err;

        pool_init(&pew_pl, sizeof(pmf_event_workitem_t), 0, 0, 0,
            "pewpl", NULL, IPL_HIGH);
        pool_setlowat(&pew_pl, 1);
        pool_sethiwat(&pew_pl, 8);

        KASSERT(pmf_event_workqueue == NULL);
        err = workqueue_create(&pmf_event_workqueue, "pmfevent",
            pmf_event_worker, NULL, PRI_NONE, IPL_VM, 0);
        if (err)
                panic("couldn't create pmfevent workqueue");

        KASSERT(pmf_suspend_workqueue == NULL);
        err = workqueue_create(&pmf_suspend_workqueue, "pmfsuspend",
            pmf_suspend_worker, NULL, PRI_NONE, IPL_VM, 0);
        if (err)
                panic("couldn't create pmfsuspend workqueue");


        callout_init(&global_idle_counter, 0);
        callout_setfunc(&global_idle_counter, input_idle, NULL);
}
























































































































   30 










































   49 
   47 




   48 





   47 

   48 
    3 
   48 
    2 
   48 
    2 
   48 
    3 


   48 








   46 


























   69 










   69 















   76 

















   76 








   69 





   76 

















   76 







   75 





















   76 































   76 










    2 


   74 

   76 









   76 
   76 
   76 



   76 
    2 
   75 


   75 


   76 


    2 

   75 





   75 

   72 


    3 


   75 

    2 



   73 


   75 


   75 











    1 


    1 
    1 






   75 























   69 





















   69 







   69 




   69 










   69 

   69 
   69 












    2 
   69 
    4 



    1 




   69 









   69 





    1 




   69 


   69 
   69 
   69 








   69 









   69 
   69 



   69 
















   69 
    2 








   69 




























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
/*        $NetBSD: kern_fork.c,v 1.229 2022/07/01 09:54:36 prlw1 Exp $        */

/*-
 * Copyright (c) 1999, 2001, 2004, 2006, 2007, 2008, 2019
 *     The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_fork.c        8.8 (Berkeley) 2/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_fork.c,v 1.229 2022/07/01 09:54:36 prlw1 Exp $");

#include "opt_ktrace.h"
#include "opt_dtrace.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/pool.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/ras.h>
#include <sys/resourcevar.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/acct.h>
#include <sys/ktrace.h>
#include <sys/sched.h>
#include <sys/signalvar.h>
#include <sys/syscall.h>
#include <sys/kauth.h>
#include <sys/atomic.h>
#include <sys/syscallargs.h>
#include <sys/uidinfo.h>
#include <sys/sdt.h>
#include <sys/ptrace.h>

/*
 * DTrace SDT provider definitions
 */
SDT_PROVIDER_DECLARE(proc);
SDT_PROBE_DEFINE3(proc, kernel, , create,
    "struct proc *", /* new process */
    "struct proc *", /* parent process */
    "int" /* flags */);

u_int        nprocs __cacheline_aligned = 1;                /* process 0 */

/*
 * Number of ticks to sleep if fork() would fail due to process hitting
 * limits. Exported in miliseconds to userland via sysctl.
 */
int        forkfsleep = 0;

int
sys_fork(struct lwp *l, const void *v, register_t *retval)
{

        return fork1(l, 0, SIGCHLD, NULL, 0, NULL, NULL, retval);
}

/*
 * vfork(2) system call compatible with 4.4BSD (i.e. BSD with Mach VM).
 * Address space is not shared, but parent is blocked until child exit.
 */
int
sys_vfork(struct lwp *l, const void *v, register_t *retval)
{

        return fork1(l, FORK_PPWAIT, SIGCHLD, NULL, 0, NULL, NULL,
            retval);
}

/*
 * New vfork(2) system call for NetBSD, which implements original 3BSD vfork(2)
 * semantics.  Address space is shared, and parent is blocked until child exit.
 */
int
sys___vfork14(struct lwp *l, const void *v, register_t *retval)
{

        return fork1(l, FORK_PPWAIT|FORK_SHAREVM, SIGCHLD, NULL, 0,
            NULL, NULL, retval);
}

/*
 * Linux-compatible __clone(2) system call.
 */
int
sys___clone(struct lwp *l, const struct sys___clone_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) flags;
                syscallarg(void *) stack;
        } */
        int flags, sig;

        /*
         * We don't support the CLONE_PTRACE flag.
         */
        if (SCARG(uap, flags) & (CLONE_PTRACE))
                return EINVAL;

        /*
         * Linux enforces CLONE_VM with CLONE_SIGHAND, do same.
         */
        if (SCARG(uap, flags) & CLONE_SIGHAND
            && (SCARG(uap, flags) & CLONE_VM) == 0)
                return EINVAL;

        flags = 0;

        if (SCARG(uap, flags) & CLONE_VM)
                flags |= FORK_SHAREVM;
        if (SCARG(uap, flags) & CLONE_FS)
                flags |= FORK_SHARECWD;
        if (SCARG(uap, flags) & CLONE_FILES)
                flags |= FORK_SHAREFILES;
        if (SCARG(uap, flags) & CLONE_SIGHAND)
                flags |= FORK_SHARESIGS;
        if (SCARG(uap, flags) & CLONE_VFORK)
                flags |= FORK_PPWAIT;

        sig = SCARG(uap, flags) & CLONE_CSIGNAL;
        if (sig < 0 || sig >= _NSIG)
                return EINVAL;

        /*
         * Note that the Linux API does not provide a portable way of
         * specifying the stack area; the caller must know if the stack
         * grows up or down.  So, we pass a stack size of 0, so that the
         * code that makes this adjustment is a noop.
         */
        return fork1(l, flags, sig, SCARG(uap, stack), 0,
            NULL, NULL, retval);
}

/*
 * Print the 'table full' message once per 10 seconds.
 */
static struct timeval fork_tfmrate = { 10, 0 };

/*
 * Check if a process is traced and shall inform about FORK events.
 */
static inline bool
tracefork(struct proc *p, int flags)
{

        return (p->p_slflag & (PSL_TRACEFORK|PSL_TRACED)) ==
            (PSL_TRACEFORK|PSL_TRACED) && (flags & FORK_PPWAIT) == 0;
}

/*
 * Check if a process is traced and shall inform about VFORK events.
 */
static inline bool
tracevfork(struct proc *p, int flags)
{

        return (p->p_slflag & (PSL_TRACEVFORK|PSL_TRACED)) ==
            (PSL_TRACEVFORK|PSL_TRACED) && (flags & FORK_PPWAIT) != 0;
}

/*
 * Check if a process is traced and shall inform about VFORK_DONE events.
 */
static inline bool
tracevforkdone(struct proc *p, int flags)
{

        return (p->p_slflag & (PSL_TRACEVFORK_DONE|PSL_TRACED)) ==
            (PSL_TRACEVFORK_DONE|PSL_TRACED) && (flags & FORK_PPWAIT);
}

/*
 * General fork call.  Note that another LWP in the process may call exec()
 * or exit() while we are forking.  It's safe to continue here, because
 * neither operation will complete until all LWPs have exited the process.
 */
int
fork1(struct lwp *l1, int flags, int exitsig, void *stack, size_t stacksize,
    void (*func)(void *), void *arg, register_t *retval)
{
        struct proc        *p1, *p2, *parent;
        struct plimit   *p1_lim;
        uid_t                uid;
        struct lwp        *l2;
        int                count;
        vaddr_t                uaddr;
        int                tnprocs;
        int                error = 0;

        p1 = l1->l_proc;
        uid = kauth_cred_getuid(l1->l_cred);
        tnprocs = atomic_inc_uint_nv(&nprocs);

        /*
         * Although process entries are dynamically created, we still keep
         * a global limit on the maximum number we will create.
         */
        if (__predict_false(tnprocs >= maxproc))
                error = -1;
        else
                error = kauth_authorize_process(l1->l_cred,
                    KAUTH_PROCESS_FORK, p1, KAUTH_ARG(tnprocs), NULL, NULL);

        if (error) {
                static struct timeval lasttfm;
                atomic_dec_uint(&nprocs);
                if (ratecheck(&lasttfm, &fork_tfmrate))
                        tablefull("proc", "increase kern.maxproc or NPROC");
                if (forkfsleep)
                        kpause("forkmx", false, forkfsleep, NULL);
                return EAGAIN;
        }

        /*
         * Enforce limits.
         */
        count = chgproccnt(uid, 1);
        if (__predict_false(count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur)) {
                if (kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_RLIMIT,
                    p1, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
                    &p1->p_rlimit[RLIMIT_NPROC], KAUTH_ARG(RLIMIT_NPROC)) != 0) {
                        (void)chgproccnt(uid, -1);
                        atomic_dec_uint(&nprocs);
                        if (forkfsleep)
                                kpause("forkulim", false, forkfsleep, NULL);
                        return EAGAIN;
                }
        }

        /*
         * Allocate virtual address space for the U-area now, while it
         * is still easy to abort the fork operation if we're out of
         * kernel virtual address space.
         */
        uaddr = uvm_uarea_alloc();
        if (__predict_false(uaddr == 0)) {
                (void)chgproccnt(uid, -1);
                atomic_dec_uint(&nprocs);
                return ENOMEM;
        }

        /* Allocate new proc. */
        p2 = proc_alloc();
        if (p2 == NULL) {
                /* We were unable to allocate a process ID. */
                uvm_uarea_free(uaddr);
                mutex_enter(p1->p_lock);
                uid = kauth_cred_getuid(p1->p_cred);
                (void)chgproccnt(uid, -1);
                mutex_exit(p1->p_lock);
                atomic_dec_uint(&nprocs);
                return EAGAIN;
        }

        /*
         * We are now committed to the fork.  From here on, we may
         * block on resources, but resource allocation may NOT fail.
         */

        /*
         * Make a proc table entry for the new process.
         * Start by zeroing the section of proc that is zero-initialized,
         * then copy the section that is copied directly from the parent.
         */
        memset(&p2->p_startzero, 0,
            (unsigned) ((char *)&p2->p_endzero - (char *)&p2->p_startzero));
        memcpy(&p2->p_startcopy, &p1->p_startcopy,
            (unsigned) ((char *)&p2->p_endcopy - (char *)&p2->p_startcopy));

        TAILQ_INIT(&p2->p_sigpend.sp_info);

        LIST_INIT(&p2->p_lwps);
        LIST_INIT(&p2->p_sigwaiters);

        /*
         * Duplicate sub-structures as needed.
         * Increase reference counts on shared objects.
         * Inherit flags we want to keep.  The flags related to SIGCHLD
         * handling are important in order to keep a consistent behaviour
         * for the child after the fork.  If we are a 32-bit process, the
         * child will be too.
         */
        p2->p_flag =
            p1->p_flag & (PK_SUGID | PK_NOCLDWAIT | PK_CLDSIGIGN | PK_32);
        p2->p_emul = p1->p_emul;
        p2->p_execsw = p1->p_execsw;

        if (flags & FORK_SYSTEM) {
                /*
                 * Mark it as a system process.  Set P_NOCLDWAIT so that
                 * children are reparented to init(8) when they exit.
                 * init(8) can easily wait them out for us.
                 */
                p2->p_flag |= (PK_SYSTEM | PK_NOCLDWAIT);
        }

        mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
        mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
        rw_init(&p2->p_reflock);
        cv_init(&p2->p_waitcv, "wait");
        cv_init(&p2->p_lwpcv, "lwpwait");

        /*
         * Share a lock between the processes if they are to share signal
         * state: we must synchronize access to it.
         */
        if (flags & FORK_SHARESIGS) {
                p2->p_lock = p1->p_lock;
                mutex_obj_hold(p1->p_lock);
        } else
                p2->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);

        kauth_proc_fork(p1, p2);

        p2->p_raslist = NULL;
#if defined(__HAVE_RAS)
        ras_fork(p1, p2);
#endif

        /* bump references to the text vnode (for procfs) */
        p2->p_textvp = p1->p_textvp;
        if (p2->p_textvp)
                vref(p2->p_textvp);
        if (p1->p_path)
                p2->p_path = kmem_strdupsize(p1->p_path, NULL, KM_SLEEP);
        else
                p2->p_path = NULL;

        if (flags & FORK_SHAREFILES)
                fd_share(p2);
        else if (flags & FORK_CLEANFILES)
                p2->p_fd = fd_init(NULL);
        else
                p2->p_fd = fd_copy();

        /* XXX racy */
        p2->p_mqueue_cnt = p1->p_mqueue_cnt;

        if (flags & FORK_SHARECWD)
                cwdshare(p2);
        else
                p2->p_cwdi = cwdinit();

        /*
         * Note: p_limit (rlimit stuff) is copy-on-write, so normally
         * we just need increase pl_refcnt.
         */
        p1_lim = p1->p_limit;
        if (!p1_lim->pl_writeable) {
                lim_addref(p1_lim);
                p2->p_limit = p1_lim;
        } else {
                p2->p_limit = lim_copy(p1_lim);
        }

        if (flags & FORK_PPWAIT) {
                /* Mark ourselves as waiting for a child. */
                p2->p_lflag = PL_PPWAIT;
                l1->l_vforkwaiting = true;
                p2->p_vforklwp = l1;
        } else {
                p2->p_lflag = 0;
                l1->l_vforkwaiting = false;
        }
        p2->p_sflag = 0;
        p2->p_slflag = 0;
        parent = (flags & FORK_NOWAIT) ? initproc : p1;
        p2->p_pptr = parent;
        p2->p_ppid = parent->p_pid;
        LIST_INIT(&p2->p_children);

        p2->p_aio = NULL;

#ifdef KTRACE
        /*
         * Copy traceflag and tracefile if enabled.
         * If not inherited, these were zeroed above.
         */
        if (p1->p_traceflag & KTRFAC_INHERIT) {
                mutex_enter(&ktrace_lock);
                p2->p_traceflag = p1->p_traceflag;
                if ((p2->p_tracep = p1->p_tracep) != NULL)
                        ktradref(p2);
                mutex_exit(&ktrace_lock);
        }
#endif

        /*
         * Create signal actions for the child process.
         */
        p2->p_sigacts = sigactsinit(p1, flags & FORK_SHARESIGS);
        mutex_enter(p1->p_lock);
        p2->p_sflag |=
            (p1->p_sflag & (PS_STOPFORK | PS_STOPEXEC | PS_NOCLDSTOP));
        sched_proc_fork(p1, p2);
        mutex_exit(p1->p_lock);

        p2->p_stflag = p1->p_stflag;

        /*
         * p_stats.
         * Copy parts of p_stats, and zero out the rest.
         */
        p2->p_stats = pstatscopy(p1->p_stats);

        /*
         * Set up the new process address space.
         */
        uvm_proc_fork(p1, p2, (flags & FORK_SHAREVM) ? true : false);

        /*
         * Finish creating the child process.
         * It will return through a different path later.
         */
        lwp_create(l1, p2, uaddr, (flags & FORK_PPWAIT) ? LWP_VFORK : 0,
            stack, stacksize, (func != NULL) ? func : child_return, arg, &l2,
            l1->l_class, &l1->l_sigmask, &l1->l_sigstk);

        /*
         * Inherit l_private from the parent.
         * Note that we cannot use lwp_setprivate() here since that
         * also sets the CPU TLS register, which is incorrect if the
         * process has changed that without letting the kernel know.
         */
        l2->l_private = l1->l_private;

        /*
         * If emulation has a process fork hook, call it now.
         */
        if (p2->p_emul->e_proc_fork)
                (*p2->p_emul->e_proc_fork)(p2, l1, flags);

        /*
         * ...and finally, any other random fork hooks that subsystems
         * might have registered.
         */
        doforkhooks(p2, p1);

        SDT_PROBE(proc, kernel, , create, p2, p1, flags, 0, 0);

        /*
         * It's now safe for the scheduler and other processes to see the
         * child process.
         */
        mutex_enter(&proc_lock);

        if (p1->p_session->s_ttyvp != NULL && p1->p_lflag & PL_CONTROLT)
                p2->p_lflag |= PL_CONTROLT;

        LIST_INSERT_HEAD(&parent->p_children, p2, p_sibling);
        p2->p_exitsig = exitsig;                /* signal for parent on exit */

        /*
         * Trace fork(2) and vfork(2)-like events on demand in a debugger.
         */
        if (tracefork(p1, flags) || tracevfork(p1, flags)) {
                proc_changeparent(p2, p1->p_pptr);
                SET(p2->p_slflag, PSL_TRACEDCHILD);
        }

        p2->p_oppid = p1->p_pid; /* Remember the original parent id. */

        LIST_INSERT_AFTER(p1, p2, p_pglist);
        LIST_INSERT_HEAD(&allproc, p2, p_list);

        p2->p_trace_enabled = trace_is_enabled(p2);
#ifdef __HAVE_SYSCALL_INTERN
        (*p2->p_emul->e_syscall_intern)(p2);
#endif

        /*
         * Update stats now that we know the fork was successful.
         */
        KPREEMPT_DISABLE(l1);
        CPU_COUNT(CPU_COUNT_FORKS, 1);
        if (flags & FORK_PPWAIT)
                CPU_COUNT(CPU_COUNT_FORKS_PPWAIT, 1);
        if (flags & FORK_SHAREVM)
                CPU_COUNT(CPU_COUNT_FORKS_SHAREVM, 1);
        KPREEMPT_ENABLE(l1);

        if (ktrpoint(KTR_EMUL))
                p2->p_traceflag |= KTRFAC_TRC_EMUL;

        /*
         * Notify any interested parties about the new process.
         */
        if (!SLIST_EMPTY(&p1->p_klist)) {
                mutex_exit(&proc_lock);
                knote_proc_fork(p1, p2);
                mutex_enter(&proc_lock);
        }

        /*
         * Make child runnable, set start time, and add to run queue except
         * if the parent requested the child to start in SSTOP state.
         */
        mutex_enter(p2->p_lock);

        /*
         * Start profiling.
         */
        if ((p2->p_stflag & PST_PROFIL) != 0) {
                mutex_spin_enter(&p2->p_stmutex);
                startprofclock(p2);
                mutex_spin_exit(&p2->p_stmutex);
        }

        getmicrotime(&p2->p_stats->p_start);
        p2->p_acflag = AFORK;
        lwp_lock(l2);
        KASSERT(p2->p_nrlwps == 1);
        KASSERT(l2->l_stat == LSIDL);
        if (p2->p_sflag & PS_STOPFORK) {
                p2->p_nrlwps = 0;
                p2->p_stat = SSTOP;
                p2->p_waited = 0;
                p1->p_nstopchild++;
                l2->l_stat = LSSTOP;
                KASSERT(l2->l_wchan == NULL);
                lwp_unlock(l2);
        } else {
                p2->p_nrlwps = 1;
                p2->p_stat = SACTIVE;
                setrunnable(l2);
                /* LWP now unlocked */
        }

        /*
         * Return child pid to parent process,
         * marking us as parent via retval[1].
         */
        if (retval != NULL) {
                retval[0] = p2->p_pid;
                retval[1] = 0;
        }

        mutex_exit(p2->p_lock);

        /*
         * Let the parent know that we are tracing its child.
         */
        if (tracefork(p1, flags) || tracevfork(p1, flags)) {
                mutex_enter(p1->p_lock);
                eventswitch(TRAP_CHLD,
                    tracefork(p1, flags) ? PTRACE_FORK : PTRACE_VFORK,
                    retval[0]);
                mutex_enter(&proc_lock);
        }

        /*
         * Preserve synchronization semantics of vfork.  If waiting for
         * child to exec or exit, sleep until it clears p_vforkwaiting.
         */
        while (l1->l_vforkwaiting)
                cv_wait(&l1->l_waitcv, &proc_lock);

        /*
         * Let the parent know that we are tracing its child.
         */
        if (tracevforkdone(p1, flags)) {
                mutex_enter(p1->p_lock);
                eventswitch(TRAP_CHLD, PTRACE_VFORK_DONE, retval[0]);
        } else
                mutex_exit(&proc_lock);

        return 0;
}

/*
 * MI code executed in each newly spawned process before returning to userland.
 */
void
child_return(void *arg)
{
        struct lwp *l = curlwp;
        struct proc *p = l->l_proc;

        if ((p->p_slflag & (PSL_TRACED|PSL_TRACEDCHILD)) ==
            (PSL_TRACED|PSL_TRACEDCHILD)) {
                eventswitchchild(p, TRAP_CHLD, 
                    ISSET(p->p_lflag, PL_PPWAIT) ? PTRACE_VFORK : PTRACE_FORK);
        }

        md_child_return(l);

        /*
         * Return SYS_fork for all fork types, including vfork(2) and clone(2).
         *
         * This approach simplifies the code and avoids extra locking.
         */
        ktrsysret(SYS_fork, 0, 0);
}






































































































  440 










  440 
  440 

  442 





















    3 
    3 







    6 























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
/*        $NetBSD: bufq_fcfs.c,v 1.13 2017/05/04 11:03:27 kamil Exp $        */
/*        NetBSD: subr_disk.c,v 1.61 2004/09/25 03:30:44 thorpej Exp         */

/*-
 * Copyright (c) 1996, 1997, 1999, 2000 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ufs_disksubr.c        8.5 (Berkeley) 1/21/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: bufq_fcfs.c,v 1.13 2017/05/04 11:03:27 kamil Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/bufq_impl.h>
#include <sys/kmem.h>
#include <sys/module.h>

/*
 * First-come first-served sort for disks.
 *
 * Requests are appended to the queue without any reordering.
 */

struct bufq_fcfs {
        TAILQ_HEAD(, buf) bq_head;        /* actual list of buffers */
};

static void bufq_fcfs_init(struct bufq_state *);
static void bufq_fcfs_put(struct bufq_state *, struct buf *);
static struct buf *bufq_fcfs_get(struct bufq_state *, int);

BUFQ_DEFINE(fcfs, 10, bufq_fcfs_init);

static void
bufq_fcfs_put(struct bufq_state *bufq, struct buf *bp)
{
        struct bufq_fcfs *fcfs = bufq_private(bufq);

        TAILQ_INSERT_TAIL(&fcfs->bq_head, bp, b_actq);
}

static struct buf *
bufq_fcfs_get(struct bufq_state *bufq, int remove)
{
        struct bufq_fcfs *fcfs = bufq_private(bufq);
        struct buf *bp;

        bp = TAILQ_FIRST(&fcfs->bq_head);

        if (bp != NULL && remove)
                TAILQ_REMOVE(&fcfs->bq_head, bp, b_actq);

        return (bp);
}

static struct buf *
bufq_fcfs_cancel(struct bufq_state *bufq, struct buf *buf)
{
        struct bufq_fcfs *fcfs = bufq_private(bufq);
        struct buf *bp;

        TAILQ_FOREACH(bp, &fcfs->bq_head, b_actq) {
                if (bp == buf) {
                        TAILQ_REMOVE(&fcfs->bq_head, bp, b_actq);
                        return buf;
                }
        }
        return NULL;
}

static void
bufq_fcfs_fini(struct bufq_state *bufq)
{

        KASSERT(bufq->bq_private != NULL);
        kmem_free(bufq->bq_private, sizeof(struct bufq_fcfs));
}

static void
bufq_fcfs_init(struct bufq_state *bufq)
{
        struct bufq_fcfs *fcfs;

        bufq->bq_get = bufq_fcfs_get;
        bufq->bq_put = bufq_fcfs_put;
        bufq->bq_cancel = bufq_fcfs_cancel;
        bufq->bq_fini = bufq_fcfs_fini;
        bufq->bq_private = kmem_zalloc(sizeof(struct bufq_fcfs), KM_SLEEP);
        fcfs = (struct bufq_fcfs *)bufq->bq_private;
        TAILQ_INIT(&fcfs->bq_head);
}

MODULE(MODULE_CLASS_BUFQ, bufq_fcfs, NULL);

static int
bufq_fcfs_modcmd(modcmd_t cmd, void *opaque)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                return bufq_register(&bufq_strat_fcfs);
        case MODULE_CMD_FINI:
                return bufq_unregister(&bufq_strat_fcfs);
        default:
                return ENOTTY;
        }
}































































   26 




   26 

   22 
   22 


   26 

   26 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
/*        $NetBSD: strncpy.c,v 1.4 2018/02/04 01:13:45 mrg Exp $        */

/*-
 * Copyright (c) 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Chris Torek.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
#if 0
static char sccsid[] = "@(#)strncpy.c        8.1 (Berkeley) 6/4/93";
#else
__RCSID("$NetBSD: strncpy.c,v 1.4 2018/02/04 01:13:45 mrg Exp $");
#endif
#endif /* LIBC_SCCS and not lint */

#if !defined(_KERNEL) && !defined(_STANDALONE)
#include <assert.h>
#include <string.h>
#else
#include <lib/libkern/libkern.h>
#endif

#ifdef _FORTIFY_SOURCE
#undef strncpy
#endif

/*
 * Copy src to dst, truncating or null-padding to always copy n bytes.
 * Return dst.
 */
char *
strncpy(char *dst, const char *src, size_t n)
{

        if (n != 0) {
                char *d = dst;
                const char *s = src;

                do {
                        if ((*d++ = *s++) == 0) {
                                /* NUL pad the remaining n-1 bytes */
                                while (--n != 0)
                                        *d++ = 0;
                                break;
                        }
                } while (--n != 0);
        }
        return (dst);
}













































































































































































    1 






    1 













    1 







    1 

    1 

    1 




























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
/*        $NetBSD: rfcomm_session.c,v 1.28 2021/12/05 04:35:38 msaitoh Exp $        */

/*-
 * Copyright (c) 2006 Itronix Inc.
 * All rights reserved.
 *
 * Written by Iain Hibbert for Itronix Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of Itronix Inc. may not be used to endorse
 *    or promote products derived from this software without specific
 *    prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rfcomm_session.c,v 1.28 2021/12/05 04:35:38 msaitoh Exp $");

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/socketvar.h>
#include <sys/systm.h>
#include <sys/types.h>

#include <netbt/bluetooth.h>
#include <netbt/hci.h>
#include <netbt/l2cap.h>
#include <netbt/rfcomm.h>

/******************************************************************************
 *
 * RFCOMM Multiplexer Sessions sit directly on L2CAP channels, and can
 * multiplex up to 30 incoming and 30 outgoing connections.
 * Only one Multiplexer is allowed between any two devices.
 */

static void rfcomm_session_timeout(void *);
static void rfcomm_session_recv_sabm(struct rfcomm_session *, int);
static void rfcomm_session_recv_disc(struct rfcomm_session *, int);
static void rfcomm_session_recv_ua(struct rfcomm_session *, int);
static void rfcomm_session_recv_dm(struct rfcomm_session *, int);
static void rfcomm_session_recv_uih(struct rfcomm_session *, int, int, struct mbuf *, int);
static void rfcomm_session_recv_mcc(struct rfcomm_session *, struct mbuf *);
static void rfcomm_session_recv_mcc_test(struct rfcomm_session *, int, struct mbuf *);
static void rfcomm_session_recv_mcc_fcon(struct rfcomm_session *, int);
static void rfcomm_session_recv_mcc_fcoff(struct rfcomm_session *, int);
static void rfcomm_session_recv_mcc_msc(struct rfcomm_session *, int, struct mbuf *);
static void rfcomm_session_recv_mcc_rpn(struct rfcomm_session *, int, struct mbuf *);
static void rfcomm_session_recv_mcc_rls(struct rfcomm_session *, int, struct mbuf *);
static void rfcomm_session_recv_mcc_pn(struct rfcomm_session *, int, struct mbuf *);
static void rfcomm_session_recv_mcc_nsc(struct rfcomm_session *, int, struct mbuf *);

/* L2CAP callbacks */
static void rfcomm_session_connecting(void *);
static void rfcomm_session_connected(void *);
static void rfcomm_session_disconnected(void *, int);
static void *rfcomm_session_newconn(void *, struct sockaddr_bt *, struct sockaddr_bt *);
static void rfcomm_session_complete(void *, int);
static void rfcomm_session_linkmode(void *, int);
static void rfcomm_session_input(void *, struct mbuf *);

static const struct btproto rfcomm_session_proto = {
        rfcomm_session_connecting,
        rfcomm_session_connected,
        rfcomm_session_disconnected,
        rfcomm_session_newconn,
        rfcomm_session_complete,
        rfcomm_session_linkmode,
        rfcomm_session_input,
};

struct rfcomm_session_list
        rfcomm_session_active = LIST_HEAD_INITIALIZER(rfcomm_session_active);

struct rfcomm_session_list
        rfcomm_session_listen = LIST_HEAD_INITIALIZER(rfcomm_session_listen);

static struct pool rfcomm_credit_pool;

/*
 * RFCOMM System Parameters (see section 5.3)
 */
int rfcomm_mtu_default = 127;        /* bytes */
int rfcomm_ack_timeout = 20;        /* seconds */
int rfcomm_mcc_timeout = 20;        /* seconds */

/*
 * Reversed CRC table as per TS 07.10 Annex B.3.5
 */
static const uint8_t crctable[256] = {        /* reversed, 8-bit, poly=0x07 */
        0x00, 0x91, 0xe3, 0x72, 0x07, 0x96, 0xe4, 0x75,
        0x0e, 0x9f, 0xed, 0x7c, 0x09, 0x98, 0xea, 0x7b,
        0x1c, 0x8d, 0xff, 0x6e, 0x1b, 0x8a, 0xf8, 0x69,
        0x12, 0x83, 0xf1, 0x60, 0x15, 0x84, 0xf6, 0x67,

        0x38, 0xa9, 0xdb, 0x4a, 0x3f, 0xae, 0xdc, 0x4d,
        0x36, 0xa7, 0xd5, 0x44, 0x31, 0xa0, 0xd2, 0x43,
        0x24, 0xb5, 0xc7, 0x56, 0x23, 0xb2, 0xc0, 0x51,
        0x2a, 0xbb, 0xc9, 0x58, 0x2d, 0xbc, 0xce, 0x5f,

        0x70, 0xe1, 0x93, 0x02, 0x77, 0xe6, 0x94, 0x05,
        0x7e, 0xef, 0x9d, 0x0c, 0x79, 0xe8, 0x9a, 0x0b,
        0x6c, 0xfd, 0x8f, 0x1e, 0x6b, 0xfa, 0x88, 0x19,
        0x62, 0xf3, 0x81, 0x10, 0x65, 0xf4, 0x86, 0x17,

        0x48, 0xd9, 0xab, 0x3a, 0x4f, 0xde, 0xac, 0x3d,
        0x46, 0xd7, 0xa5, 0x34, 0x41, 0xd0, 0xa2, 0x33,
        0x54, 0xc5, 0xb7, 0x26, 0x53, 0xc2, 0xb0, 0x21,
        0x5a, 0xcb, 0xb9, 0x28, 0x5d, 0xcc, 0xbe, 0x2f,

        0xe0, 0x71, 0x03, 0x92, 0xe7, 0x76, 0x04, 0x95,
        0xee, 0x7f, 0x0d, 0x9c, 0xe9, 0x78, 0x0a, 0x9b,
        0xfc, 0x6d, 0x1f, 0x8e, 0xfb, 0x6a, 0x18, 0x89,
        0xf2, 0x63, 0x11, 0x80, 0xf5, 0x64, 0x16, 0x87,

        0xd8, 0x49, 0x3b, 0xaa, 0xdf, 0x4e, 0x3c, 0xad,
        0xd6, 0x47, 0x35, 0xa4, 0xd1, 0x40, 0x32, 0xa3,
        0xc4, 0x55, 0x27, 0xb6, 0xc3, 0x52, 0x20, 0xb1,
        0xca, 0x5b, 0x29, 0xb8, 0xcd, 0x5c, 0x2e, 0xbf,

        0x90, 0x01, 0x73, 0xe2, 0x97, 0x06, 0x74, 0xe5,
        0x9e, 0x0f, 0x7d, 0xec, 0x99, 0x08, 0x7a, 0xeb,
        0x8c, 0x1d, 0x6f, 0xfe, 0x8b, 0x1a, 0x68, 0xf9,
        0x82, 0x13, 0x61, 0xf0, 0x85, 0x14, 0x66, 0xf7,

        0xa8, 0x39, 0x4b, 0xda, 0xaf, 0x3e, 0x4c, 0xdd,
        0xa6, 0x37, 0x45, 0xd4, 0xa1, 0x30, 0x42, 0xd3,
        0xb4, 0x25, 0x57, 0xc6, 0xb3, 0x22, 0x50, 0xc1,
        0xba, 0x2b, 0x59, 0xc8, 0xbd, 0x2c, 0x5e, 0xcf
};

#define FCS(f, d)        crctable[(f) ^ (d)]

void
rfcomm_init(void)
{

        pool_init(&rfcomm_credit_pool, sizeof(struct rfcomm_credit),
            0, 0, 0, "rfcomm_credit", NULL, IPL_SOFTNET);
}

/*
 * rfcomm_session_alloc(list, sockaddr)
 *
 * allocate a new session and fill in the blanks, then
 * attach session to front of specified list (active or listen)
 */
struct rfcomm_session *
rfcomm_session_alloc(struct rfcomm_session_list *list,
                        struct sockaddr_bt *laddr)
{
        struct rfcomm_session *rs;
        struct sockopt sopt;
        int err;

        rs = malloc(sizeof(*rs), M_BLUETOOTH, M_NOWAIT | M_ZERO);
        if (rs == NULL)
                return NULL;

        rs->rs_state = RFCOMM_SESSION_CLOSED;

        callout_init(&rs->rs_timeout, 0);
        callout_setfunc(&rs->rs_timeout, rfcomm_session_timeout, rs);

        SIMPLEQ_INIT(&rs->rs_credits);
        LIST_INIT(&rs->rs_dlcs);

        err = l2cap_attach_pcb(&rs->rs_l2cap, &rfcomm_session_proto, rs);
        if (err) {
                free(rs, M_BLUETOOTH);
                return NULL;
        }

        sockopt_init(&sopt, BTPROTO_L2CAP, SO_L2CAP_OMTU, 0);
        (void)l2cap_getopt(rs->rs_l2cap, &sopt);
        (void)sockopt_get(&sopt, &rs->rs_mtu, sizeof(rs->rs_mtu));
        sockopt_destroy(&sopt);

        if (laddr->bt_psm == L2CAP_PSM_ANY)
                laddr->bt_psm = L2CAP_PSM_RFCOMM;

        (void)l2cap_bind_pcb(rs->rs_l2cap, laddr);

        LIST_INSERT_HEAD(list, rs, rs_next);

        return rs;
}

/*
 * rfcomm_session_free(rfcomm_session)
 *
 * release a session, including any cleanup
 */
void
rfcomm_session_free(struct rfcomm_session *rs)
{
        struct rfcomm_credit *credit;

        KASSERT(rs != NULL);
        KASSERT(LIST_EMPTY(&rs->rs_dlcs));

        rs->rs_state = RFCOMM_SESSION_CLOSED;

        /*
         * If the callout is already invoked we have no way to stop it,
         * but it will call us back right away (there are no DLC's) so
         * not to worry.
         */
        callout_stop(&rs->rs_timeout);
        if (callout_invoking(&rs->rs_timeout))
                return;

        /*
         * Take care that rfcomm_session_disconnected() doesnt call
         * us back either as it will do if the l2cap_channel has not
         * been closed when we detach it..
         */
        if (rs->rs_flags & RFCOMM_SESSION_FREE)
                return;

        rs->rs_flags |= RFCOMM_SESSION_FREE;

        /* throw away any remaining credit notes */
        while ((credit = SIMPLEQ_FIRST(&rs->rs_credits)) != NULL) {
                SIMPLEQ_REMOVE_HEAD(&rs->rs_credits, rc_next);
                pool_put(&rfcomm_credit_pool, credit);
        }

        KASSERT(SIMPLEQ_EMPTY(&rs->rs_credits));

        /* Goodbye! */
        LIST_REMOVE(rs, rs_next);
        l2cap_detach_pcb(&rs->rs_l2cap);
        callout_destroy(&rs->rs_timeout);
        free(rs, M_BLUETOOTH);
}

/*
 * rfcomm_session_lookup(sockaddr, sockaddr)
 *
 * Find active rfcomm session matching src and dest addresses
 * when src is BDADDR_ANY match any local address
 */
struct rfcomm_session *
rfcomm_session_lookup(struct sockaddr_bt *src, struct sockaddr_bt *dest)
{
        struct rfcomm_session *rs;
        struct sockaddr_bt addr;

        LIST_FOREACH(rs, &rfcomm_session_active, rs_next) {
                if (rs->rs_state == RFCOMM_SESSION_CLOSED)
                        continue;

                l2cap_sockaddr_pcb(rs->rs_l2cap, &addr);

                if (bdaddr_same(&src->bt_bdaddr, &addr.bt_bdaddr) == 0)
                        if (bdaddr_any(&src->bt_bdaddr) == 0)
                                continue;

                l2cap_peeraddr_pcb(rs->rs_l2cap, &addr);

                if (addr.bt_psm != dest->bt_psm)
                        continue;

                if (bdaddr_same(&dest->bt_bdaddr, &addr.bt_bdaddr))
                        break;
        }

        return rs;
}

/*
 * rfcomm_session_timeout(rfcomm_session)
 *
 * Session timeouts are scheduled when a session is left or
 * created with no DLCs, and when SABM(0) or DISC(0) are
 * sent.
 *
 * So, if it is in an open state with DLC's attached then
 * we leave it alone, otherwise the session is lost.
 */
static void
rfcomm_session_timeout(void *arg)
{
        struct rfcomm_session *rs = arg;
        struct rfcomm_dlc *dlc;

        KASSERT(rs != NULL);

        mutex_enter(bt_lock);
        callout_ack(&rs->rs_timeout);

        if (rs->rs_state != RFCOMM_SESSION_OPEN) {
                DPRINTF("timeout\n");
                rs->rs_state = RFCOMM_SESSION_CLOSED;

                while (!LIST_EMPTY(&rs->rs_dlcs)) {
                        dlc = LIST_FIRST(&rs->rs_dlcs);

                        rfcomm_dlc_close(dlc, ETIMEDOUT);
                }
        }

        if (LIST_EMPTY(&rs->rs_dlcs)) {
                DPRINTF("expiring\n");
                rfcomm_session_free(rs);
        }
        mutex_exit(bt_lock);
}

/***********************************************************************
 *
 *        RFCOMM Session L2CAP protocol callbacks
 *
 */

static void
rfcomm_session_connecting(void *arg)
{
        /* struct rfcomm_session *rs = arg; */

        DPRINTF("Connecting\n");
}

static void
rfcomm_session_connected(void *arg)
{
        struct rfcomm_session *rs = arg;
        struct sockopt sopt;

        DPRINTF("Connected\n");

        /*
         * L2CAP is open.
         *
         * If we are initiator, we can send our SABM(0)
         * a timeout should be active?
         *
         * We must take note of the L2CAP MTU because currently
         * the L2CAP implementation can only do Basic Mode.
         */
        sockopt_init(&sopt, BTPROTO_L2CAP, SO_L2CAP_OMTU, 0);
        (void)l2cap_getopt(rs->rs_l2cap, &sopt);
        (void)sockopt_get(&sopt, &rs->rs_mtu, sizeof(rs->rs_mtu));
        sockopt_destroy(&sopt);

        rs->rs_mtu -= 6; /* (RFCOMM overhead could be this big) */
        if (rs->rs_mtu < RFCOMM_MTU_MIN) {
                rfcomm_session_disconnected(rs, EINVAL);
                return;
        }

        if (IS_INITIATOR(rs)) {
                int err;

                err = rfcomm_session_send_frame(rs, RFCOMM_FRAME_SABM, 0);
                if (err)
                        rfcomm_session_disconnected(rs, err);

                callout_schedule(&rs->rs_timeout, rfcomm_ack_timeout * hz);
        }
}

static void
rfcomm_session_disconnected(void *arg, int err)
{
        struct rfcomm_session *rs = arg;
        struct rfcomm_dlc *dlc;

        DPRINTF("Disconnected\n");

        /*
         * If we have any DLCs outstanding in the unlikely case that the
         * L2CAP channel disconnected normally, close them with an error
         */
        if (err == 0)
                err = ECONNRESET;

        rs->rs_state = RFCOMM_SESSION_CLOSED;

        while (!LIST_EMPTY(&rs->rs_dlcs)) {
                dlc = LIST_FIRST(&rs->rs_dlcs);

                rfcomm_dlc_close(dlc, err);
        }

        rfcomm_session_free(rs);
}

static void *
rfcomm_session_newconn(void *arg, struct sockaddr_bt *laddr,
                                struct sockaddr_bt *raddr)
{
        struct rfcomm_session *new, *rs = arg;

        DPRINTF("New Connection\n");

        /*
         * Incoming session connect request. We should return a new
         * session pointer if this is acceptable. The L2CAP layer
         * passes local and remote addresses, which we must check as
         * only one RFCOMM session is allowed between any two devices
         */
        new = rfcomm_session_lookup(laddr, raddr);
        if (new != NULL)
                return NULL;

        new = rfcomm_session_alloc(&rfcomm_session_active, laddr);
        if (new == NULL)
                return NULL;

        new->rs_mtu = rs->rs_mtu;
        new->rs_state = RFCOMM_SESSION_WAIT_CONNECT;

        /*
         * schedule an expiry so that if nothing comes of it we
         * can punt.
         */
        callout_schedule(&new->rs_timeout, rfcomm_mcc_timeout * hz);

        return new->rs_l2cap;
}

static void
rfcomm_session_complete(void *arg, int count)
{
        struct rfcomm_session *rs = arg;
        struct rfcomm_credit *credit;
        struct rfcomm_dlc *dlc;

        /*
         * count L2CAP packets are 'complete', meaning that they are cleared
         * our buffers (for best effort) or arrived safe (for guaranteed) so
         * we can take it off our list and pass the message on, so that
         * eventually the data can be removed from the sockbuf
         */
        while (count-- > 0) {
                credit = SIMPLEQ_FIRST(&rs->rs_credits);
                if (credit == NULL) {
                        printf("%s: too many packets completed!\n", __func__);
                        break;
                }

                dlc = credit->rc_dlc;
                if (dlc != NULL) {
                        dlc->rd_pending--;
                        (*dlc->rd_proto->complete)
                                        (dlc->rd_upper, credit->rc_len);

                        /*
                         * if not using credit flow control, we may push
                         * more data now
                         */
                        if ((rs->rs_flags & RFCOMM_SESSION_CFC) == 0
                            && dlc->rd_state == RFCOMM_DLC_OPEN) {
                                rfcomm_dlc_start(dlc);
                        }

                        /*
                         * When shutdown is indicated, we are just waiting to
                         * clear outgoing data.
                         */
                        if ((dlc->rd_flags & RFCOMM_DLC_SHUTDOWN)
                            && dlc->rd_txbuf == NULL && dlc->rd_pending == 0) {
                                dlc->rd_state = RFCOMM_DLC_WAIT_DISCONNECT;
                                rfcomm_session_send_frame(rs, RFCOMM_FRAME_DISC,
                                                            dlc->rd_dlci);
                                callout_schedule(&dlc->rd_timeout,
                                                    rfcomm_ack_timeout * hz);
                        }
                }

                SIMPLEQ_REMOVE_HEAD(&rs->rs_credits, rc_next);
                pool_put(&rfcomm_credit_pool, credit);
        }

        /*
         * If session is closed, we are just waiting to clear the queue
         */
        if (rs->rs_state == RFCOMM_SESSION_CLOSED) {
                if (SIMPLEQ_EMPTY(&rs->rs_credits))
                        l2cap_disconnect_pcb(rs->rs_l2cap, 0);
        }
}

/*
 * Link Mode changed
 *
 * This is called when a mode change is complete. Proceed with connections
 * where appropriate, or pass the new mode to any active DLCs.
 */
static void
rfcomm_session_linkmode(void *arg, int new)
{
        struct rfcomm_session *rs = arg;
        struct rfcomm_dlc *dlc, *next;
        int err, mode = 0;

        DPRINTF("auth %s, encrypt %s, secure %s\n",
                (new & L2CAP_LM_AUTH ? "on" : "off"),
                (new & L2CAP_LM_ENCRYPT ? "on" : "off"),
                (new & L2CAP_LM_SECURE ? "on" : "off"));

        if (new & L2CAP_LM_AUTH)
                mode |= RFCOMM_LM_AUTH;

        if (new & L2CAP_LM_ENCRYPT)
                mode |= RFCOMM_LM_ENCRYPT;

        if (new & L2CAP_LM_SECURE)
                mode |= RFCOMM_LM_SECURE;

        next = LIST_FIRST(&rs->rs_dlcs);
        while ((dlc = next) != NULL) {
                next = LIST_NEXT(dlc, rd_next);

                switch (dlc->rd_state) {
                case RFCOMM_DLC_WAIT_SEND_SABM:        /* we are connecting */
                        if ((mode & dlc->rd_mode) != dlc->rd_mode) {
                                rfcomm_dlc_close(dlc, ECONNABORTED);
                        } else {
                                err = rfcomm_session_send_frame(rs,
                                            RFCOMM_FRAME_SABM, dlc->rd_dlci);
                                if (err) {
                                        rfcomm_dlc_close(dlc, err);
                                } else {
                                        dlc->rd_state = RFCOMM_DLC_WAIT_RECV_UA;
                                        callout_schedule(&dlc->rd_timeout,
                                                        rfcomm_ack_timeout * hz);
                                        break;
                                }
                        }

                        /*
                         * If we aborted the connection and there are no more DLCs
                         * on the session, it is our responsibility to disconnect.
                         */
                        if (!LIST_EMPTY(&rs->rs_dlcs))
                                break;

                        rs->rs_state = RFCOMM_SESSION_WAIT_DISCONNECT;
                        rfcomm_session_send_frame(rs, RFCOMM_FRAME_DISC, 0);
                        callout_schedule(&rs->rs_timeout, rfcomm_ack_timeout * hz);
                        break;

                case RFCOMM_DLC_WAIT_SEND_UA: /* they are connecting */
                        if ((mode & dlc->rd_mode) != dlc->rd_mode) {
                                rfcomm_session_send_frame(rs,
                                            RFCOMM_FRAME_DM, dlc->rd_dlci);
                                rfcomm_dlc_close(dlc, ECONNABORTED);
                                break;
                        }

                        err = rfcomm_session_send_frame(rs,
                                            RFCOMM_FRAME_UA, dlc->rd_dlci);
                        if (err) {
                                rfcomm_session_send_frame(rs,
                                                RFCOMM_FRAME_DM, dlc->rd_dlci);
                                rfcomm_dlc_close(dlc, err);
                                break;
                        }

                        err = rfcomm_dlc_open(dlc);
                        if (err) {
                                rfcomm_session_send_frame(rs,
                                                RFCOMM_FRAME_DM, dlc->rd_dlci);
                                rfcomm_dlc_close(dlc, err);
                                break;
                        }

                        break;

                case RFCOMM_DLC_WAIT_RECV_UA:
                case RFCOMM_DLC_OPEN: /* already established */
                        (*dlc->rd_proto->linkmode)(dlc->rd_upper, mode);
                        break;

                default:
                        break;
                }
        }
}

/*
 * Receive data from L2CAP layer for session. There is always exactly one
 * RFCOMM frame contained in each L2CAP frame.
 */
static void
rfcomm_session_input(void *arg, struct mbuf *m)
{
        struct rfcomm_session *rs = arg;
        int dlci, len, type, pf;
        uint8_t fcs, b;

        KASSERT(m != NULL);
        KASSERT(rs != NULL);

        /*
         * UIH frames: FCS is only calculated on address and control fields
         * For other frames: FCS is calculated on address, control and length
         * Length may extend to two octets
         */
        fcs = 0xff;

        if (m->m_pkthdr.len < 4) {
                DPRINTF("short frame (%d), discarded\n", m->m_pkthdr.len);
                goto done;
        }

        /* address - one octet */
        m_copydata(m, 0, 1, &b);
        m_adj(m, 1);
        fcs = FCS(fcs, b);
        dlci = RFCOMM_DLCI(b);

        /* control - one octet */
        m_copydata(m, 0, 1, &b);
        m_adj(m, 1);
        fcs = FCS(fcs, b);
        type = RFCOMM_TYPE(b);
        pf = RFCOMM_PF(b);

        /* length - may be two octets */
        m_copydata(m, 0, 1, &b);
        m_adj(m, 1);
        if (type != RFCOMM_FRAME_UIH)
                fcs = FCS(fcs, b);
        len = (b >> 1) & 0x7f;

        if (RFCOMM_EA(b) == 0) {
                if (m->m_pkthdr.len < 2) {
                        DPRINTF("short frame (%d, EA = 0), discarded\n",
                                m->m_pkthdr.len);
                        goto done;
                }

                m_copydata(m, 0, 1, &b);
                m_adj(m, 1);
                if (type != RFCOMM_FRAME_UIH)
                        fcs = FCS(fcs, b);

                len |= (b << 7);
        }

        /* FCS byte is last octet in frame */
        m_copydata(m, m->m_pkthdr.len - 1, 1, &b);
        m_adj(m, -1);
        fcs = FCS(fcs, b);

        if (fcs != 0xcf) {
                DPRINTF("Bad FCS value (%#2.2x), frame discarded\n", fcs);
                goto done;
        }

        DPRINTFN(10, "dlci %d, type %2.2x, len = %d\n", dlci, type, len);

        switch (type) {
        case RFCOMM_FRAME_SABM:
                if (pf)
                        rfcomm_session_recv_sabm(rs, dlci);
                break;

        case RFCOMM_FRAME_DISC:
                if (pf)
                        rfcomm_session_recv_disc(rs, dlci);
                break;

        case RFCOMM_FRAME_UA:
                if (pf)
                        rfcomm_session_recv_ua(rs, dlci);
                break;

        case RFCOMM_FRAME_DM:
                rfcomm_session_recv_dm(rs, dlci);
                break;

        case RFCOMM_FRAME_UIH:
                rfcomm_session_recv_uih(rs, dlci, pf, m, len);
                return;        /* (no release) */

        default:
                UNKNOWN(type);
                break;
        }

done:
        m_freem(m);
}

/***********************************************************************
 *
 *        RFCOMM Session receive processing
 */

/*
 * rfcomm_session_recv_sabm(rfcomm_session, dlci)
 *
 * Set Asyncrhonous Balanced Mode - open the channel.
 */
static void
rfcomm_session_recv_sabm(struct rfcomm_session *rs, int dlci)
{
        struct rfcomm_dlc *dlc;
        int err;

        DPRINTFN(5, "SABM(%d)\n", dlci);

        if (dlci == 0) {        /* Open Session */
                rs->rs_state = RFCOMM_SESSION_OPEN;
                rfcomm_session_send_frame(rs, RFCOMM_FRAME_UA, 0);
                LIST_FOREACH(dlc, &rs->rs_dlcs, rd_next) {
                        if (dlc->rd_state == RFCOMM_DLC_WAIT_SESSION)
                                rfcomm_dlc_connect(dlc);
                }
                return;
        }

        if (rs->rs_state != RFCOMM_SESSION_OPEN) {
                DPRINTF("session was not even open!\n");
                return;
        }

        /* validate direction bit */
        if ((IS_INITIATOR(rs) && !RFCOMM_DIRECTION(dlci))
            || (!IS_INITIATOR(rs) && RFCOMM_DIRECTION(dlci))) {
                DPRINTF("Invalid direction bit on DLCI\n");
                return;
        }

        /*
         * look for our DLC - this may exist if we received PN
         * already, or we may have to fabricate a new one.
         */
        dlc = rfcomm_dlc_lookup(rs, dlci);
        if (dlc == NULL) {
                dlc = rfcomm_dlc_newconn(rs, dlci);
                if (dlc == NULL)
                        return;        /* (DM is sent) */
        }

        /*
         * ..but if this DLC is not waiting to connect, they did
         * something wrong, ignore it.
         */
        if (dlc->rd_state != RFCOMM_DLC_WAIT_CONNECT)
                return;

        /* set link mode */
        err = rfcomm_dlc_setmode(dlc);
        if (err == EINPROGRESS) {
                dlc->rd_state = RFCOMM_DLC_WAIT_SEND_UA;
                (*dlc->rd_proto->connecting)(dlc->rd_upper);
                return;
        }
        if (err)
                goto close;

        err = rfcomm_session_send_frame(rs, RFCOMM_FRAME_UA, dlci);
        if (err)
                goto close;

        /* and mark it open */
        err = rfcomm_dlc_open(dlc);
        if (err)
                goto close;

        return;

close:
        rfcomm_dlc_close(dlc, err);
}

/*
 * Receive Disconnect Command
 */
static void
rfcomm_session_recv_disc(struct rfcomm_session *rs, int dlci)
{
        struct rfcomm_dlc *dlc;

        DPRINTFN(5, "DISC(%d)\n", dlci);

        if (dlci == 0) {
                /*
                 * Disconnect Session
                 *
                 * We set the session state to CLOSED so that when
                 * the UA frame is clear the session will be closed
                 * automatically. We wont bother to close any DLC's
                 * just yet as there should be none. In the unlikely
                 * event that something is left, it will get flushed
                 * out as the session goes down.
                 */
                rfcomm_session_send_frame(rs, RFCOMM_FRAME_UA, 0);
                rs->rs_state = RFCOMM_SESSION_CLOSED;
                return;
        }

        dlc = rfcomm_dlc_lookup(rs, dlci);
        if (dlc == NULL) {
                rfcomm_session_send_frame(rs, RFCOMM_FRAME_DM, dlci);
                return;
        }

        rfcomm_dlc_close(dlc, 0);
        rfcomm_session_send_frame(rs, RFCOMM_FRAME_UA, dlci);
}

/*
 * Receive Unnumbered Acknowledgement Response
 *
 * This should be a response to a DISC or SABM frame that we
 * have previously sent. If unexpected, ignore it.
 */
static void
rfcomm_session_recv_ua(struct rfcomm_session *rs, int dlci)
{
        struct rfcomm_dlc *dlc;

        DPRINTFN(5, "UA(%d)\n", dlci);

        if (dlci == 0) {
                switch (rs->rs_state) {
                case RFCOMM_SESSION_WAIT_CONNECT:        /* We sent SABM */
                        callout_stop(&rs->rs_timeout);
                        rs->rs_state = RFCOMM_SESSION_OPEN;
                        LIST_FOREACH(dlc, &rs->rs_dlcs, rd_next) {
                                if (dlc->rd_state == RFCOMM_DLC_WAIT_SESSION)
                                        rfcomm_dlc_connect(dlc);
                        }
                        break;

                case RFCOMM_SESSION_WAIT_DISCONNECT:        /* We sent DISC */
                        callout_stop(&rs->rs_timeout);
                        rs->rs_state = RFCOMM_SESSION_CLOSED;
                        l2cap_disconnect_pcb(rs->rs_l2cap, 0);
                        break;

                default:
                        DPRINTF("Received spurious UA(0)!\n");
                        break;
                }

                return;
        }

        /*
         * If we have no DLC on this dlci, we may have aborted
         * without shutting down properly, so check if the session
         * needs disconnecting.
         */
        dlc = rfcomm_dlc_lookup(rs, dlci);
        if (dlc == NULL)
                goto check;

        switch (dlc->rd_state) {
        case RFCOMM_DLC_WAIT_RECV_UA:                /* We sent SABM */
                rfcomm_dlc_open(dlc);
                return;

        case RFCOMM_DLC_WAIT_DISCONNECT:        /* We sent DISC */
                rfcomm_dlc_close(dlc, 0);
                break;

        default:
                DPRINTF("Received spurious UA(%d)!\n", dlci);
                return;
        }

check:        /* last one out turns out the light */
        if (LIST_EMPTY(&rs->rs_dlcs)) {
                rs->rs_state = RFCOMM_SESSION_WAIT_DISCONNECT;
                rfcomm_session_send_frame(rs, RFCOMM_FRAME_DISC, 0);
                callout_schedule(&rs->rs_timeout, rfcomm_ack_timeout * hz);
        }
}

/*
 * Receive Disconnected Mode Response
 *
 * If this does not apply to a known DLC then we may ignore it.
 */
static void
rfcomm_session_recv_dm(struct rfcomm_session *rs, int dlci)
{
        struct rfcomm_dlc *dlc;

        DPRINTFN(5, "DM(%d)\n", dlci);

        dlc = rfcomm_dlc_lookup(rs, dlci);
        if (dlc == NULL)
                return;

        if (dlc->rd_state == RFCOMM_DLC_WAIT_CONNECT)
                rfcomm_dlc_close(dlc, ECONNREFUSED);
        else
                rfcomm_dlc_close(dlc, ECONNRESET);
}

/*
 * Receive Unnumbered Information with Header check (MCC or data packet)
 */
static void
rfcomm_session_recv_uih(struct rfcomm_session *rs, int dlci,
                        int pf, struct mbuf *m, int len)
{
        struct rfcomm_dlc *dlc;
        uint8_t credits = 0;

        DPRINTFN(10, "UIH(%d)\n", dlci);

        if (dlci == 0) {
                rfcomm_session_recv_mcc(rs, m);
                return;
        }

        if (m->m_pkthdr.len != len + pf) {
                DPRINTF("Bad Frame Length (%d), frame discarded\n",
                            m->m_pkthdr.len);

                goto discard;
        }

        dlc = rfcomm_dlc_lookup(rs, dlci);
        if (dlc == NULL) {
                DPRINTF("UIH received for non existent DLC, discarded\n");
                rfcomm_session_send_frame(rs, RFCOMM_FRAME_DM, dlci);
                goto discard;
        }

        if (dlc->rd_state != RFCOMM_DLC_OPEN) {
                DPRINTF("non-open DLC (state = %d), discarded\n",
                                dlc->rd_state);
                goto discard;
        }

        /* if PF is set, credits were included */
        if (rs->rs_flags & RFCOMM_SESSION_CFC) {
                if (pf != 0) {
                        if (m->m_pkthdr.len < sizeof(credits)) {
                                DPRINTF("Bad PF value, UIH discarded\n");
                                goto discard;
                        }

                        m_copydata(m, 0, sizeof(credits), &credits);
                        m_adj(m, sizeof(credits));

                        dlc->rd_txcred += credits;

                        if (credits > 0 && dlc->rd_txbuf != NULL)
                                rfcomm_dlc_start(dlc);
                }

                if (len == 0)
                        goto discard;

                if (dlc->rd_rxcred == 0) {
                        DPRINTF("Credit limit reached, UIH discarded\n");
                        goto discard;
                }

                if (len > dlc->rd_rxsize) {
                        DPRINTF("UIH frame exceeds rxsize, discarded\n");
                        goto discard;
                }

                dlc->rd_rxcred--;
                dlc->rd_rxsize -= len;
        }

        (*dlc->rd_proto->input)(dlc->rd_upper, m);
        return;

discard:
        m_freem(m);
}

/*
 * Receive Multiplexer Control Command
 */
static void
rfcomm_session_recv_mcc(struct rfcomm_session *rs, struct mbuf *m)
{
        int type, cr, len;
        uint8_t b;

        /*
         * Extract MCC header.
         *
         * Fields are variable length using extension bit = 1 to signify the
         * last octet in the sequence.
         *
         * Only single octet types are defined in TS 07.10/RFCOMM spec
         *
         * Length can realistically only use 15 bits (max RFCOMM MTU)
         */
        if (m->m_pkthdr.len < sizeof(b)) {
                DPRINTF("Short MCC header, discarded\n");
                goto release;
        }

        m_copydata(m, 0, sizeof(b), &b);
        m_adj(m, sizeof(b));

        if (RFCOMM_EA(b) == 0) {        /* verify no extensions */
                DPRINTF("MCC type EA = 0, discarded\n");
                goto release;
        }

        type = RFCOMM_MCC_TYPE(b);
        cr = RFCOMM_CR(b);

        len = 0;
        do {
                if (m->m_pkthdr.len < sizeof(b)) {
                        DPRINTF("Short MCC header, discarded\n");
                        goto release;
                }

                m_copydata(m, 0, sizeof(b), &b);
                m_adj(m, sizeof(b));

                len = (len << 7) | (b >> 1);
                len = uimin(len, RFCOMM_MTU_MAX);
        } while (RFCOMM_EA(b) == 0);

        if (len != m->m_pkthdr.len) {
                DPRINTF("Incorrect MCC length, discarded\n");
                goto release;
        }

        DPRINTFN(2, "MCC %s type %2.2x (%d bytes)\n",
                (cr ? "command" : "response"), type, len);

        /*
         * pass to command handler
         */
        switch(type) {
        case RFCOMM_MCC_TEST:        /* Test */
                rfcomm_session_recv_mcc_test(rs, cr, m);
                break;

        case RFCOMM_MCC_FCON:        /* Flow Control On */
                rfcomm_session_recv_mcc_fcon(rs, cr);
                break;

        case RFCOMM_MCC_FCOFF:        /* Flow Control Off */
                rfcomm_session_recv_mcc_fcoff(rs, cr);
                break;

        case RFCOMM_MCC_MSC:        /* Modem Status Command */
                rfcomm_session_recv_mcc_msc(rs, cr, m);
                break;

        case RFCOMM_MCC_RPN:        /* Remote Port Negotiation */
                rfcomm_session_recv_mcc_rpn(rs, cr, m);
                break;

        case RFCOMM_MCC_RLS:        /* Remote Line Status */
                rfcomm_session_recv_mcc_rls(rs, cr, m);
                break;

        case RFCOMM_MCC_PN:        /* Parameter Negotiation */
                rfcomm_session_recv_mcc_pn(rs, cr, m);
                break;

        case RFCOMM_MCC_NSC:        /* Non Supported Command */
                rfcomm_session_recv_mcc_nsc(rs, cr, m);
                break;

        default:
                b = RFCOMM_MKMCC_TYPE(cr, type);
                rfcomm_session_send_mcc(rs, 0, RFCOMM_MCC_NSC, &b, sizeof(b));
        }

release:
        m_freem(m);
}

/*
 * process TEST command/response
 */
static void
rfcomm_session_recv_mcc_test(struct rfcomm_session *rs, int cr, struct mbuf *m)
{
        void *data;
        int len;

        if (cr == 0)        /* ignore ack */
                return;

        /*
         * we must send all the data they included back as is
         */

        len = m->m_pkthdr.len;
        if (len > RFCOMM_MTU_MAX)
                return;

        data = malloc(len, M_BLUETOOTH, M_NOWAIT);
        if (data == NULL)
                return;

        m_copydata(m, 0, len, data);
        rfcomm_session_send_mcc(rs, 0, RFCOMM_MCC_TEST, data, len);
        free(data, M_BLUETOOTH);
}

/*
 * process Flow Control ON command/response
 */
static void
rfcomm_session_recv_mcc_fcon(struct rfcomm_session *rs, int cr)
{

        if (cr == 0)        /* ignore ack */
                return;

        rs->rs_flags |= RFCOMM_SESSION_RFC;
        rfcomm_session_send_mcc(rs, 0, RFCOMM_MCC_FCON, NULL, 0);
}

/*
 * process Flow Control OFF command/response
 */
static void
rfcomm_session_recv_mcc_fcoff(struct rfcomm_session *rs, int cr)
{

        if (cr == 0)        /* ignore ack */
                return;

        rs->rs_flags &= ~RFCOMM_SESSION_RFC;
        rfcomm_session_send_mcc(rs, 0, RFCOMM_MCC_FCOFF, NULL, 0);
}

/*
 * process Modem Status Command command/response
 */
static void
rfcomm_session_recv_mcc_msc(struct rfcomm_session *rs, int cr, struct mbuf *m)
{
        struct rfcomm_mcc_msc msc;        /* (3 octets) */
        struct rfcomm_dlc *dlc;
        int len = 0;

        /* [ADDRESS] */
        if (m->m_pkthdr.len < sizeof(msc.address))
                return;

        m_copydata(m, 0, sizeof(msc.address), &msc.address);
        m_adj(m, sizeof(msc.address));
        len += sizeof(msc.address);

        dlc = rfcomm_dlc_lookup(rs, RFCOMM_DLCI(msc.address));

        if (cr == 0) {        /* ignore acks */
                if (dlc != NULL)
                        callout_stop(&dlc->rd_timeout);

                return;
        }

        if (dlc == NULL) {
                rfcomm_session_send_frame(rs, RFCOMM_FRAME_DM,
                                                RFCOMM_DLCI(msc.address));
                return;
        }

        /* [SIGNALS] */
        if (m->m_pkthdr.len < sizeof(msc.modem))
                return;

        m_copydata(m, 0, sizeof(msc.modem), &msc.modem);
        m_adj(m, sizeof(msc.modem));
        len += sizeof(msc.modem);

        dlc->rd_rmodem = msc.modem;
        /* XXX how do we signal this upstream? */

        if (RFCOMM_EA(msc.modem) == 0) {
                if (m->m_pkthdr.len < sizeof(msc.brk))
                        return;

                m_copydata(m, 0, sizeof(msc.brk), &msc.brk);
                m_adj(m, sizeof(msc.brk));
                len += sizeof(msc.brk);

                /* XXX how do we signal this upstream? */
        }

        rfcomm_session_send_mcc(rs, 0, RFCOMM_MCC_MSC, &msc, len);
}

/*
 * process Remote Port Negotiation command/response
 */
static void
rfcomm_session_recv_mcc_rpn(struct rfcomm_session *rs, int cr, struct mbuf *m)
{
        struct rfcomm_mcc_rpn rpn;
        uint16_t mask;

        if (cr == 0)        /* ignore ack */
                return;

        /* default values */
        rpn.bit_rate = RFCOMM_RPN_BR_9600;
        rpn.line_settings = RFCOMM_RPN_8_N_1;
        rpn.flow_control = RFCOMM_RPN_FLOW_NONE;
        rpn.xon_char = RFCOMM_RPN_XON_CHAR;
        rpn.xoff_char = RFCOMM_RPN_XOFF_CHAR;

        if (m->m_pkthdr.len == sizeof(rpn)) {
                /* negotiation request */
                m_copydata(m, 0, sizeof(rpn), &rpn);
                rpn.param_mask = le16toh(rpn.param_mask);
        } else if (m->m_pkthdr.len == 1) {
                /* current settings request */
                m_copydata(m, 0, 1, &rpn.dlci);
                rpn.param_mask = RFCOMM_RPN_PM_ALL;
        } else {
                DPRINTF("Bad RPN length (%d)\n", m->m_pkthdr.len);
                return;
        }

        mask = 0;

        if (rpn.param_mask & RFCOMM_RPN_PM_RATE)
                mask |= RFCOMM_RPN_PM_RATE;

        if (rpn.param_mask & RFCOMM_RPN_PM_DATA
            && RFCOMM_RPN_DATA_BITS(rpn.line_settings) == RFCOMM_RPN_DATA_8)
                mask |= RFCOMM_RPN_PM_DATA;

        if (rpn.param_mask & RFCOMM_RPN_PM_STOP
            && RFCOMM_RPN_STOP_BITS(rpn.line_settings) == RFCOMM_RPN_STOP_1)
                mask |= RFCOMM_RPN_PM_STOP;

        if (rpn.param_mask & RFCOMM_RPN_PM_PARITY
            && RFCOMM_RPN_PARITY(rpn.line_settings) == RFCOMM_RPN_PARITY_NONE)
                mask |= RFCOMM_RPN_PM_PARITY;

        if (rpn.param_mask & RFCOMM_RPN_PM_XON
            && rpn.xon_char == RFCOMM_RPN_XON_CHAR)
                mask |= RFCOMM_RPN_PM_XON;

        if (rpn.param_mask & RFCOMM_RPN_PM_XOFF
            && rpn.xoff_char == RFCOMM_RPN_XOFF_CHAR)
                mask |= RFCOMM_RPN_PM_XOFF;

        if (rpn.param_mask & RFCOMM_RPN_PM_FLOW
            && rpn.flow_control == RFCOMM_RPN_FLOW_NONE)
                mask |= RFCOMM_RPN_PM_FLOW;

        rpn.param_mask = htole16(mask);

        rfcomm_session_send_mcc(rs, 0, RFCOMM_MCC_RPN, &rpn, sizeof(rpn));
}

/*
 * process Remote Line Status command/response
 */
static void
rfcomm_session_recv_mcc_rls(struct rfcomm_session *rs, int cr, struct mbuf *m)
{
        struct rfcomm_mcc_rls rls;

        if (cr == 0)        /* ignore ack */
                return;

        if (m->m_pkthdr.len != sizeof(rls)) {
                DPRINTF("Bad RLS length %d\n", m->m_pkthdr.len);
                return;
        }

        m_copydata(m, 0, sizeof(rls), &rls);

        /*
         * So far as I can tell, we just send back what
         * they sent us. This signifies errors that seem
         * irrelevent for RFCOMM over L2CAP.
         */
        rls.address |= 0x03;        /* EA = 1, CR = 1 */
        rls.status &= 0x0f;        /* only 4 bits valid */

        rfcomm_session_send_mcc(rs, 0, RFCOMM_MCC_RLS, &rls, sizeof(rls));
}

/*
 * process Parameter Negotiation command/response
 */
static void
rfcomm_session_recv_mcc_pn(struct rfcomm_session *rs, int cr, struct mbuf *m)
{
        struct rfcomm_dlc *dlc;
        struct rfcomm_mcc_pn pn;
        int err;

        if (m->m_pkthdr.len != sizeof(pn)) {
                DPRINTF("Bad PN length %d\n", m->m_pkthdr.len);
                return;
        }

        m_copydata(m, 0, sizeof(pn), &pn);

        pn.dlci &= 0x3f;
        pn.mtu = le16toh(pn.mtu);

        dlc = rfcomm_dlc_lookup(rs, pn.dlci);
        if (cr) {        /* Command */
                /*
                 * If there is no DLC present, this is a new
                 * connection so attempt to make one
                 */
                if (dlc == NULL) {
                        dlc = rfcomm_dlc_newconn(rs, pn.dlci);
                        if (dlc == NULL)
                                return;        /* (DM is sent) */
                }

                /* accept any valid MTU, and offer it back */
                pn.mtu = uimin(pn.mtu, RFCOMM_MTU_MAX);
                pn.mtu = uimin(pn.mtu, rs->rs_mtu);
                pn.mtu = uimax(pn.mtu, RFCOMM_MTU_MIN);
                dlc->rd_mtu = pn.mtu;
                pn.mtu = htole16(pn.mtu);

                /* credits are only set before DLC is open */
                if (dlc->rd_state == RFCOMM_DLC_WAIT_CONNECT
                    && (pn.flow_control & 0xf0) == 0xf0) {
                        rs->rs_flags |= RFCOMM_SESSION_CFC;
                        dlc->rd_txcred = pn.credits & 0x07;

                        dlc->rd_rxcred = (dlc->rd_rxsize / dlc->rd_mtu);
                        dlc->rd_rxcred = uimin(dlc->rd_rxcred,
                                                RFCOMM_CREDITS_DEFAULT);

                        pn.flow_control = 0xe0;
                        pn.credits = dlc->rd_rxcred;
                } else {
                        pn.flow_control = 0x00;
                        pn.credits = 0x00;
                }

                /* unused fields must be ignored and set to zero */
                pn.ack_timer = 0;
                pn.max_retrans = 0;

                /* send our response */
                err = rfcomm_session_send_mcc(rs, 0,
                                        RFCOMM_MCC_PN, &pn, sizeof(pn));
                if (err)
                        goto close;

        } else {        /* Response */
                /* ignore responses with no matching DLC */
                if (dlc == NULL)
                        return;

                callout_stop(&dlc->rd_timeout);

                /* reject invalid or unacceptable MTU */
                if (pn.mtu < RFCOMM_MTU_MIN || pn.mtu > dlc->rd_mtu) {
                        dlc->rd_state = RFCOMM_DLC_WAIT_DISCONNECT;
                        err = rfcomm_session_send_frame(rs, RFCOMM_FRAME_DISC,
                                                        pn.dlci);
                        if (err)
                                goto close;

                        callout_schedule(&dlc->rd_timeout,
                                            rfcomm_ack_timeout * hz);
                        return;
                }
                dlc->rd_mtu = pn.mtu;

                /* if DLC is not waiting to connect, we are done */
                if (dlc->rd_state != RFCOMM_DLC_WAIT_CONNECT)
                        return;

                /* set initial credits according to RFCOMM spec */
                if ((pn.flow_control & 0xf0) == 0xe0) {
                        rs->rs_flags |= RFCOMM_SESSION_CFC;
                        dlc->rd_txcred = (pn.credits & 0x07);
                }

                callout_schedule(&dlc->rd_timeout, rfcomm_ack_timeout * hz);

                /* set link mode */
                err = rfcomm_dlc_setmode(dlc);
                if (err == EINPROGRESS) {
                        dlc->rd_state = RFCOMM_DLC_WAIT_SEND_SABM;
                        (*dlc->rd_proto->connecting)(dlc->rd_upper);
                        return;
                }
                if (err)
                        goto close;

                /* we can proceed now */
                err = rfcomm_session_send_frame(rs, RFCOMM_FRAME_SABM, pn.dlci);
                if (err)
                        goto close;

                dlc->rd_state = RFCOMM_DLC_WAIT_RECV_UA;
        }
        return;

close:
        rfcomm_dlc_close(dlc, err);
}

/*
 * process Non Supported Command command/response
 */
static void
rfcomm_session_recv_mcc_nsc(struct rfcomm_session *rs,
    int cr, struct mbuf *m)
{
        struct rfcomm_dlc *dlc, *next;

        /*
         * Since we did nothing that is not mandatory,
         * we just abort the whole session..
         */

        next = LIST_FIRST(&rs->rs_dlcs);
        while ((dlc = next) != NULL) {
                next = LIST_NEXT(dlc, rd_next);
                rfcomm_dlc_close(dlc, ECONNABORTED);
        }

        rfcomm_session_free(rs);
}

/***********************************************************************
 *
 *        RFCOMM Session outward frame/uih/mcc building
 */

/*
 * SABM/DISC/DM/UA frames are all minimal and mostly identical.
 */
int
rfcomm_session_send_frame(struct rfcomm_session *rs, int type, int dlci)
{
        struct rfcomm_cmd_hdr *hdr;
        struct rfcomm_credit *credit;
        struct mbuf *m;
        uint8_t fcs, cr;

        credit = pool_get(&rfcomm_credit_pool, PR_NOWAIT);
        if (credit == NULL)
                return ENOMEM;

        m = m_gethdr(M_DONTWAIT, MT_DATA);
        if (m == NULL) {
                pool_put(&rfcomm_credit_pool, credit);
                return ENOMEM;
        }

        /*
         * The CR (command/response) bit identifies the frame either as a
         * command or a response and is used along with the DLCI to form
         * the address. Commands contain the non-initiator address, whereas
         * responses contain the initiator address, so the CR value is
         * also dependent on the session direction.
         */
        if (type == RFCOMM_FRAME_UA || type == RFCOMM_FRAME_DM)
                cr = IS_INITIATOR(rs) ? 0 : 1;
        else
                cr = IS_INITIATOR(rs) ? 1 : 0;

        hdr = mtod(m, struct rfcomm_cmd_hdr *);
        hdr->address = RFCOMM_MKADDRESS(cr, dlci);
        hdr->control = RFCOMM_MKCONTROL(type, 1);   /* PF = 1 */
        hdr->length = (0x00 << 1) | 0x01;            /* len = 0x00, EA = 1 */

        fcs = 0xff;
        fcs = FCS(fcs, hdr->address);
        fcs = FCS(fcs, hdr->control);
        fcs = FCS(fcs, hdr->length);
        fcs = 0xff - fcs;        /* ones complement */
        hdr->fcs = fcs;

        m->m_pkthdr.len = m->m_len = sizeof(struct rfcomm_cmd_hdr);

        /* empty credit note */
        credit->rc_dlc = NULL;
        credit->rc_len = m->m_pkthdr.len;
        SIMPLEQ_INSERT_TAIL(&rs->rs_credits, credit, rc_next);

        DPRINTFN(5, "dlci %d type %2.2x (%d bytes, fcs=%#2.2x)\n",
                dlci, type, m->m_pkthdr.len, fcs);

        return l2cap_send_pcb(rs->rs_l2cap, m);
}

/*
 * rfcomm_session_send_uih(rfcomm_session, rfcomm_dlc, credits, mbuf)
 *
 * UIH frame is per DLC data or Multiplexer Control Commands
 * when no DLC is given. Data mbuf is optional (just credits
 * will be sent in that case)
 */
int
rfcomm_session_send_uih(struct rfcomm_session *rs, struct rfcomm_dlc *dlc,
                        int credits, struct mbuf *m)
{
        struct rfcomm_credit *credit;
        struct mbuf *m0 = NULL;
        int err, len;
        uint8_t fcs, *hdr;

        KASSERT(rs != NULL);

        len = (m == NULL) ? 0 : m->m_pkthdr.len;
        KASSERT(!(credits == 0 && len == 0));

        /*
         * Make a credit note for the completion notification
         */
        credit = pool_get(&rfcomm_credit_pool, PR_NOWAIT);
        if (credit == NULL)
                goto nomem;

        credit->rc_len = len;
        credit->rc_dlc = dlc;

        /*
         * Wrap UIH frame information around payload.
         *
         * [ADDRESS] [CONTROL] [LENGTH] [CREDITS] [...] [FCS]
         *
         * Address is one octet.
         * Control is one octet.
         * Length is one or two octets.
         * Credits may be one octet.
         *
         * FCS is one octet and calculated on address and
         *        control octets only.
         *
         * If there are credits to be sent, we will set the PF
         * flag and include them in the frame.
         */
        m0 = m_gethdr(M_DONTWAIT, MT_DATA);
        if (m0 == NULL)
                goto nomem;

        m_align(m0, 5);        /* (max 5 header octets) */
        hdr = mtod(m0, uint8_t *);

        /* CR bit is set according to the initiator of the session */
        *hdr = RFCOMM_MKADDRESS((IS_INITIATOR(rs) ? 1 : 0),
                                (dlc ? dlc->rd_dlci : 0));
        fcs = FCS(0xff, *hdr);
        hdr++;

        /* PF bit is set if credits are being sent */
        *hdr = RFCOMM_MKCONTROL(RFCOMM_FRAME_UIH, (credits > 0 ? 1 : 0));
        fcs = FCS(fcs, *hdr);
        hdr++;

        if (len < (1 << 7)) {
                *hdr++ = ((len << 1) & 0xfe) | 0x01;        /* 7 bits, EA = 1 */
        } else {
                *hdr++ = ((len << 1) & 0xfe);                /* 7 bits, EA = 0 */
                *hdr++ = ((len >> 7) & 0xff);                /* 8 bits, no EA */
        }

        if (credits > 0)
                *hdr++ = (uint8_t)credits;

        m0->m_len = hdr - mtod(m0, uint8_t *);

        /* Append payload */
        m0->m_next = m;
        m = NULL;

        m0->m_pkthdr.len = m0->m_len + len;

        /* Append FCS */
        fcs = 0xff - fcs;        /* ones complement */
        len = m0->m_pkthdr.len;
        m_copyback(m0, len, sizeof(fcs), &fcs);
        if (m0->m_pkthdr.len != len + sizeof(fcs))
                goto nomem;

        DPRINTFN(10, "dlci %d, pktlen %d (%d data, %d credits), fcs=%#2.2x\n",
                dlc ? dlc->rd_dlci : 0, m0->m_pkthdr.len, credit->rc_len,
                credits, fcs);

        /*
         * UIH frame ready to go..
         */
        err = l2cap_send_pcb(rs->rs_l2cap, m0);
        if (err)
                goto fail;

        SIMPLEQ_INSERT_TAIL(&rs->rs_credits, credit, rc_next);
        return 0;

nomem:
        err = ENOMEM;

        if (m0 != NULL)
                m_freem(m0);

        if (m != NULL)
                m_freem(m);

fail:
        if (credit != NULL)
                pool_put(&rfcomm_credit_pool, credit);

        return err;
}

/*
 * send Multiplexer Control Command (or Response) on session
 */
int
rfcomm_session_send_mcc(struct rfcomm_session *rs, int cr,
                        uint8_t type, void *data, int len)
{
        struct mbuf *m;
        uint8_t *hdr;
        int hlen;

        m = m_gethdr(M_DONTWAIT, MT_DATA);
        if (m == NULL)
                return ENOMEM;

        hdr = mtod(m, uint8_t *);

        /*
         * Technically the type field can extend past one octet, but none
         * currently defined will do that.
         */
        *hdr++ = RFCOMM_MKMCC_TYPE(cr, type);

        /*
         * In the frame, the max length size is 2 octets (15 bits) whereas
         * no max length size is specified for MCC commands. We must allow
         * for 3 octets since for MCC frames we use 7 bits + EA in each.
         *
         * Only test data can possibly be that big.
         *
         * XXX Should we check this against the MTU?
         */
        if (len < (1 << 7)) {
                *hdr++ = ((len << 1) & 0xfe) | 0x01;        /* 7 bits, EA = 1 */
        } else if (len < (1 << 14)) {
                *hdr++ = ((len << 1) & 0xfe);                /* 7 bits, EA = 0 */
                *hdr++ = ((len >> 6) & 0xfe) | 0x01;        /* 7 bits, EA = 1 */
        } else if (len < (1 << 15)) {
                *hdr++ = ((len << 1) & 0xfe);                /* 7 bits, EA = 0 */
                *hdr++ = ((len >> 6) & 0xfe);                /* 7 bits, EA = 0 */
                *hdr++ = ((len >> 13) & 0x02) | 0x01;        /* 1 bit,  EA = 1 */
        } else {
                DPRINTF("incredible length! (%d)\n", len);
                m_freem(m);
                return EMSGSIZE;
        }

        /*
         * add command data (to same mbuf if possible)
         */
        hlen = hdr - mtod(m, uint8_t *);

        if (len > 0) {
                m->m_pkthdr.len = m->m_len = MHLEN;
                m_copyback(m, hlen, len, data);
                if (m->m_pkthdr.len != uimax(MHLEN, hlen + len)) {
                        m_freem(m);
                        return ENOMEM;
                }
        }

        m->m_pkthdr.len = hlen + len;
        m->m_len = uimin(MHLEN, m->m_pkthdr.len);

        DPRINTFN(5, "%s type %2.2x len %d\n",
                (cr ? "command" : "response"), type, m->m_pkthdr.len);

        return rfcomm_session_send_uih(rs, NULL, 0, m);
}

































































































































































































































































    3 


    2 
    2 


















    4 

































    2 





    2 





    2 
    2 










    2 













































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
/*        $NetBSD: mfs_vfsops.c,v 1.116 2022/03/19 13:53:33 hannken Exp $        */

/*
 * Copyright (c) 1989, 1990, 1993, 1994
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)mfs_vfsops.c        8.11 (Berkeley) 6/19/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: mfs_vfsops.c,v 1.116 2022/03/19 13:53:33 hannken Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/mount.h>
#include <sys/signalvar.h>
#include <sys/vnode.h>
#include <sys/kmem.h>
#include <sys/module.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>

#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>

#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>

#include <ufs/mfs/mfsnode.h>
#include <ufs/mfs/mfs_extern.h>

MODULE(MODULE_CLASS_VFS, mfs, "ffs");

kmutex_t mfs_lock;        /* global lock */

/* used for building internal dev_t, minor == 0 reserved for miniroot */
static devminor_t mfs_minor = 1;
static int mfs_initcnt;

extern int (**mfs_vnodeop_p)(void *);

/*
 * mfs vfs operations.
 */

extern const struct vnodeopv_desc mfs_vnodeop_opv_desc;

const struct vnodeopv_desc * const mfs_vnodeopv_descs[] = {
        &mfs_vnodeop_opv_desc,
        NULL,
};

struct vfsops mfs_vfsops = {
        .vfs_name = MOUNT_MFS,
        .vfs_min_mount_data = sizeof (struct mfs_args),
        .vfs_mount = mfs_mount,
        .vfs_start = mfs_start,
        .vfs_unmount = ffs_unmount,
        .vfs_root = ufs_root,
        .vfs_quotactl = ufs_quotactl,
        .vfs_statvfs = mfs_statvfs,
        .vfs_sync = ffs_sync,
        .vfs_vget = ufs_vget,
        .vfs_loadvnode = ffs_loadvnode,
        .vfs_newvnode = ffs_newvnode,
        .vfs_fhtovp = ffs_fhtovp,
        .vfs_vptofh = ffs_vptofh,
        .vfs_init = mfs_init,
        .vfs_reinit = mfs_reinit,
        .vfs_done = mfs_done,
        .vfs_snapshot = (void *)eopnotsupp,
        .vfs_extattrctl = vfs_stdextattrctl,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_renamelock_enter = genfs_renamelock_enter,
        .vfs_renamelock_exit = genfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = mfs_vnodeopv_descs
};

SYSCTL_SETUP(mfs_sysctl_setup, "mfs sysctl")
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_ALIAS,
                       CTLTYPE_NODE, "mfs",
                       SYSCTL_DESCR("Memory based file system"),
                       NULL, 1, NULL, 0,
                       CTL_VFS, 3, CTL_EOL);
        /*
         * XXX the "1" and the "3" above could be dynamic, thereby
         * eliminating one more instance of the "number to vfs"
         * mapping problem, but they are in order as taken from
         * sys/mount.h
         */
}

static int
mfs_modcmd(modcmd_t cmd, void *arg)
{
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = vfs_attach(&mfs_vfsops);
                if (error != 0)
                        break;
                break;
        case MODULE_CMD_FINI:
                error = vfs_detach(&mfs_vfsops);
                if (error != 0)
                        break;
                break;
        default:
                error = ENOTTY;
                break;
        }

        return (error);
}

/*
 * Memory based filesystem initialization.
 */
void
mfs_init(void)
{

        if (mfs_initcnt++ == 0) {
                mutex_init(&mfs_lock, MUTEX_DEFAULT, IPL_NONE);
                ffs_init();
        }
}

void
mfs_reinit(void)
{

        ffs_reinit();
}

void
mfs_done(void)
{

        if (--mfs_initcnt == 0) {
                ffs_done();
                mutex_destroy(&mfs_lock);
        }
}

/*
 * Called by main() when mfs is going to be mounted as root.
 */

int
mfs_mountroot(void)
{
        struct fs *fs;
        struct mount *mp;
        struct lwp *l = curlwp;                /* XXX */
        struct ufsmount *ump;
        struct mfsnode *mfsp;
        int error = 0;

        if ((error = vfs_rootmountalloc(MOUNT_MFS, "mfs_root", &mp))) {
                vrele(rootvp);
                return (error);
        }

        mfsp = kmem_alloc(sizeof(*mfsp), KM_SLEEP);
        rootvp->v_data = mfsp;
        rootvp->v_op = mfs_vnodeop_p;
        rootvp->v_tag = VT_MFS;
        mfsp->mfs_baseoff = mfs_rootbase;
        mfsp->mfs_size = mfs_rootsize;
        mfsp->mfs_vnode = rootvp;
        mfsp->mfs_proc = NULL;                /* indicate kernel space */
        mfsp->mfs_shutdown = 0;
        cv_init(&mfsp->mfs_cv, "mfs");
        mfsp->mfs_refcnt = 1;
        bufq_alloc(&mfsp->mfs_buflist, "fcfs", 0);
        if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
                vfs_unbusy(mp);
                bufq_free(mfsp->mfs_buflist);
                vfs_rele(mp);
                kmem_free(mfsp, sizeof(*mfsp));
                return (error);
        }
        mountlist_append(mp);
        mp->mnt_vnodecovered = NULLVP;
        ump = VFSTOUFS(mp);
        fs = ump->um_fs;
        (void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
        (void)ffs_statvfs(mp, &mp->mnt_stat);
        vfs_unbusy(mp);
        return (0);
}

/*
 * VFS Operations.
 *
 * mount system call
 */
/* ARGSUSED */
int
mfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct lwp *l = curlwp;
        struct vnode *devvp;
        struct mfs_args *args = data;
        struct ufsmount *ump;
        struct fs *fs;
        struct mfsnode *mfsp;
        struct proc *p;
        devminor_t minor;
        int flags, error = 0;

        if (args == NULL)
                return EINVAL;
        if (*data_len < sizeof *args)
                return EINVAL;

        p = l->l_proc;
        if (mp->mnt_flag & MNT_GETARGS) {
                struct vnode *vp;

                ump = VFSTOUFS(mp);
                if (ump == NULL)
                        return EIO;

                vp = ump->um_devvp;
                if (vp == NULL)
                        return EIO;

                mfsp = VTOMFS(vp);
                if (mfsp == NULL)
                        return EIO;

                args->fspec = NULL;
                args->base = mfsp->mfs_baseoff;
                args->size = mfsp->mfs_size;
                *data_len = sizeof *args;
                return 0;
        }
        /*
         * XXX turn off async to avoid hangs when writing lots of data.
         * the problem is that MFS needs to allocate pages to clean pages,
         * so if we wait until the last minute to clean pages then there
         * may not be any pages available to do the cleaning.
         * ... and since the default partially-synchronous mode turns out
         * to not be sufficient under heavy load, make it full synchronous.
         */
        mp->mnt_flag &= ~MNT_ASYNC;
        mp->mnt_flag |= MNT_SYNCHRONOUS;

        /*
         * If updating, check whether changing from read-only to
         * read/write; if there is no device name, that's all we do.
         */
        if (mp->mnt_flag & MNT_UPDATE) {
                ump = VFSTOUFS(mp);
                fs = ump->um_fs;
                if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
                        flags = WRITECLOSE;
                        if (mp->mnt_flag & MNT_FORCE)
                                flags |= FORCECLOSE;
                        error = ffs_flushfiles(mp, flags, l);
                        if (error)
                                return (error);
                }
                if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR))
                        fs->fs_ronly = 0;
                if (args->fspec == NULL)
                        return EINVAL;
                return (0);
        }
        mutex_enter(&mfs_lock);
        minor = mfs_minor++;
        mutex_exit(&mfs_lock);
        error = bdevvp(makedev(255, minor), &devvp);
        if (error)
                return (error);
        mfsp = kmem_alloc(sizeof(*mfsp), KM_SLEEP);
        /*
         * Changing v_op and v_data here is safe as we are
         * the exclusive owner of this device node.
         */
        KASSERT(devvp->v_op == spec_vnodeop_p);
        KASSERT(devvp->v_data == NULL);
        devvp->v_op = mfs_vnodeop_p;
        devvp->v_data = mfsp;
        mfsp->mfs_baseoff = args->base;
        mfsp->mfs_size = args->size;
        mfsp->mfs_vnode = devvp;
        mfsp->mfs_proc = p;
        mfsp->mfs_shutdown = 0;
        cv_init(&mfsp->mfs_cv, "mfsidl");
        mfsp->mfs_refcnt = 1;
        bufq_alloc(&mfsp->mfs_buflist, "fcfs", 0);
        if ((error = ffs_mountfs(devvp, mp, l)) != 0) {
                mfsp->mfs_shutdown = 1;
                vrele(devvp);
                return (error);
        }
        ump = VFSTOUFS(mp);
        fs = ump->um_fs;
        error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
            UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
        if (error)
                return error;
        (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
                sizeof(fs->fs_fsmnt));
        fs->fs_fsmnt[sizeof(fs->fs_fsmnt) - 1] = '\0';
        /* XXX: cleanup on error */
        return 0;
}

/*
 * Used to grab the process and keep it in the kernel to service
 * memory filesystem I/O requests.
 *
 * Loop servicing I/O requests.
 * Copy the requested data into or out of the memory filesystem
 * address space.
 */
/* ARGSUSED */
int
mfs_start(struct mount *mp, int flags)
{
        struct vnode *vp;
        struct mfsnode *mfsp;
        struct proc *p;
        struct buf *bp;
        void *base;
        int sleepreturn = 0, refcnt, error;
        ksiginfoq_t kq;

        /*
         * Ensure that file system is still mounted when getting mfsnode.
         * Add a reference to the mfsnode to prevent it disappearing in
         * this routine.
         */
        if ((error = vfs_busy(mp)) != 0)
                return error;
        vp = VFSTOUFS(mp)->um_devvp;
        mfsp = VTOMFS(vp);
        mutex_enter(&mfs_lock);
        mfsp->mfs_refcnt++;
        mutex_exit(&mfs_lock);
        vfs_unbusy(mp);

        base = mfsp->mfs_baseoff;
        mutex_enter(&mfs_lock);
        while (mfsp->mfs_shutdown != 1) {
                while ((bp = bufq_get(mfsp->mfs_buflist)) != NULL) {
                        mutex_exit(&mfs_lock);
                        mfs_doio(bp, base);
                        mutex_enter(&mfs_lock);
                }
                /*
                 * If a non-ignored signal is received, try to unmount.
                 * If that fails, or the filesystem is already in the
                 * process of being unmounted, clear the signal (it has been
                 * "processed"), otherwise we will loop here, as tsleep
                 * will always return EINTR/ERESTART.
                 */
                if (sleepreturn != 0) {
                        mutex_exit(&mfs_lock);
                        if (dounmount(mp, 0, curlwp) != 0) {
                                p = curproc;
                                ksiginfo_queue_init(&kq);
                                mutex_enter(p->p_lock);
                                sigclearall(p, NULL, &kq);
                                mutex_exit(p->p_lock);
                                ksiginfo_queue_drain(&kq);
                        }
                        sleepreturn = 0;
                        mutex_enter(&mfs_lock);
                        continue;
                }

                sleepreturn = cv_wait_sig(&mfsp->mfs_cv, &mfs_lock);
        }
        KASSERT(bufq_peek(mfsp->mfs_buflist) == NULL);
        refcnt = --mfsp->mfs_refcnt;
        mutex_exit(&mfs_lock);
        if (refcnt == 0) {
                bufq_free(mfsp->mfs_buflist);
                cv_destroy(&mfsp->mfs_cv);
                kmem_free(mfsp, sizeof(*mfsp));
        }
        return (sleepreturn);
}

/*
 * Get file system statistics.
 */
int
mfs_statvfs(struct mount *mp, struct statvfs *sbp)
{
        int error;

        error = ffs_statvfs(mp, sbp);
        if (error)
                return error;
        (void)strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name,
            sizeof(sbp->f_fstypename));
        sbp->f_fstypename[sizeof(sbp->f_fstypename) - 1] = '\0';
        return 0;
}

































































































































































































































































































































































































































































 1331 







 1355 





















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
/*        $NetBSD: uvm_page.h,v 1.109 2020/12/20 16:38:26 skrll Exp $        */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * Copyright (c) 1991, 1993, The Regents of the University of California.
 *
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * The Mach Operating System project at Carnegie-Mellon University.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vm_page.h   7.3 (Berkeley) 4/21/91
 * from: Id: uvm_page.h,v 1.1.2.6 1998/02/04 02:31:42 chuck Exp
 *
 *
 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

#ifndef _UVM_UVM_PAGE_H_
#define _UVM_UVM_PAGE_H_

#ifdef _KERNEL_OPT
#include "opt_uvm_page_trkown.h"
#endif

#include <sys/rwlock.h>

#include <uvm/uvm_extern.h>
#include <uvm/uvm_pglist.h>

/*
 * Management of resident (logical) pages.
 *
 * Each resident page has a vm_page structure, indexed by page number.
 * There are several lists in the structure:
 *
 * - A red-black tree rooted with the containing object is used to
 *   quickly perform object+offset lookups.
 * - A list of all pages for a given object, for a quick deactivation
 *   at a time of deallocation.
 * - An ordered list of pages due for pageout.
 *
 * In addition, the structure contains the object and offset to which
 * this page belongs (for pageout) and sundry status bits.
 *
 * Note that the page structure has no lock of its own.  The page is
 * generally protected by its owner's lock (UVM object or amap/anon).
 * It should be noted that UVM has to serialize pmap(9) operations on
 * the managed pages, e.g. for pmap_enter() calls.  Hence, the lock
 * order is as follows:
 *
 *        [vmpage-owner-lock] ->
 *                any pmap locks (e.g. PV hash lock)
 *
 * Since the kernel is always self-consistent, no serialization is
 * required for unmanaged mappings, e.g. for pmap_kenter_pa() calls.
 *
 * Field markings and the corresponding locks:
 *
 * f:        free page queue lock, uvm_fpageqlock
 * o:        page owner (uvm_object::vmobjlock, vm_amap::am_lock, vm_anon::an_lock)
 * i:        vm_page::interlock
 *        => flags set and cleared only with o&i held can
 *           safely be tested for with only o held.
 * o,i:        o|i for read, o&i for write (depends on context - if could be loaned)
 *          => see uvm_loan.c
 * w:        wired page queue or uvm_pglistalloc:
 *          => wired page queue: o&i to change, stable from wire to unwire
 *                XXX What about concurrent or nested wire?
 *          => uvm_pglistalloc: owned by caller
 * ?:        locked by pmap or assumed page owner's lock
 * p:        locked by pagedaemon policy module (pdpolicy)
 * c:        cpu private
 * s:        stable, does not change
 *
 * UVM and pmap(9) may use uvm_page_owner_locked_p() to assert whether the
 * page owner's lock is acquired.
 *
 * A page can have one of four identities:
 *
 * o free
 *   => pageq.list is entry on global free page queue
 *   => uanon is unused (or (void *)0xdeadbeef for DEBUG)
 *   => uobject is unused (or (void *)0xdeadbeef for DEBUG)
 *   => PG_FREE is set in flags
 * o owned by a uvm_object
 *   => pageq.queue is entry on wired page queue, if any
 *   => uanon is NULL or the vm_anon to which it has been O->A loaned
 *   => uobject is owner
 * o owned by a vm_anon
 *   => pageq is unused (XXX correct?)
 *   => uanon is owner
 *   => uobject is NULL
 *   => PG_ANON is set in flags
 * o allocated by uvm_pglistalloc
 *   => pageq.queue is entry on resulting pglist, owned by caller
 *   => uanon is unused
 *   => uobject is unused
 *
 * The following transitions are allowed:
 *
 * - uvm_pagealloc: free -> owned by a uvm_object/vm_anon
 * - uvm_pagefree: owned by a uvm_object/vm_anon -> free
 * - uvm_pglistalloc: free -> allocated by uvm_pglistalloc
 * - uvm_pglistfree: allocated by uvm_pglistalloc -> free
 *
 * On the ordering of fields:
 *
 * The fields most heavily used during fault processing are clustered
 * together at the start of the structure to reduce cache misses.
 * XXX This entire thing should be shrunk to fit in one cache line.
 */

struct vm_page {
        /* _LP64: first cache line */
        union {
                TAILQ_ENTRY(vm_page) queue;        /* w: wired page queue
                                                 * or uvm_pglistalloc output */
                LIST_ENTRY(vm_page) list;        /* f: global free page queue */
        } pageq;
        uint32_t                pqflags;        /* i: pagedaemon flags */
        uint32_t                flags;                /* o: object flags */
        paddr_t                        phys_addr;        /* o: physical address of pg */
        uint32_t                loan_count;        /* o,i: num. active loans */
        uint32_t                wire_count;        /* o,i: wired down map refs */
        struct vm_anon                *uanon;                /* o,i: anon */
        struct uvm_object        *uobject;        /* o,i: object */
        voff_t                        offset;                /* o: offset into object */

        /* _LP64: second cache line */
        kmutex_t                interlock;        /* s: lock on identity */
        TAILQ_ENTRY(vm_page)        pdqueue;        /* p: pagedaemon queue */

#ifdef __HAVE_VM_PAGE_MD
        struct vm_page_md        mdpage;                /* ?: pmap-specific data */
#endif

#if defined(UVM_PAGE_TRKOWN)
        /* debugging fields to track page ownership */
        pid_t                        owner;                /* proc that set PG_BUSY */
        lwpid_t                        lowner;                /* lwp that set PG_BUSY */
        const char                *owner_tag;        /* why it was set busy */
#endif
};

/*
 * Overview of UVM page flags, stored in pg->flags.
 *
 * Locking notes:
 *
 * PG_, struct vm_page::flags        => locked by owner
 * PG_AOBJ                        => additionally locked by vm_page::interlock
 * PG_ANON                        => additionally locked by vm_page::interlock
 * PG_FREE                        => additionally locked by uvm_fpageqlock
 *                                   for uvm_pglistalloc()
 *
 * Flag descriptions:
 *
 * PG_CLEAN:
 *        Page is known clean.
 *        The contents of the page is consistent with its backing store.
 *
 * PG_DIRTY:
 *        Page is known dirty.
 *        To avoid losing data, the contents of the page should be written
 *        back to the backing store before freeing the page.
 *
 * PG_BUSY:
 *        Page is long-term locked, usually because of I/O (transfer from the
 *        page memory to the backing store) is in progress.  LWP attempting
 *        to access the page shall set PQ_WANTED and wait.  PG_BUSY may only
 *        be set with a write lock held on the object.
 *
 * PG_PAGEOUT:
 *        Indicates that the page is being paged-out in preparation for
 *        being freed.
 *
 * PG_RELEASED:
 *        Indicates that the page, which is currently PG_BUSY, should be freed
 *        after the release of long-term lock.  It is responsibility of the
 *        owning LWP (i.e. which set PG_BUSY) to do it.
 *
 * PG_FAKE:
 *        Page has been allocated, but not yet initialised.  The flag is used
 *        to avoid overwriting of valid data, e.g. to prevent read from the
 *        backing store when in-core data is newer.
 *
 * PG_RDONLY:
 *        Indicates that the page must be mapped read-only.
 *
 * PG_MARKER:
 *        Dummy marker page, generally used for list traversal.
 */

/*
 * if you want to renumber PG_CLEAN and PG_DIRTY, check __CTASSERTs in
 * uvm_page_status.c first.
 */

#define        PG_CLEAN        0x00000001        /* page is known clean */
#define        PG_DIRTY        0x00000002        /* page is known dirty */
#define        PG_BUSY                0x00000004        /* page is locked */
#define        PG_PAGEOUT        0x00000010        /* page to be freed for pagedaemon */
#define        PG_RELEASED        0x00000020        /* page to be freed when unbusied */
#define        PG_FAKE                0x00000040        /* page is not yet initialized */
#define        PG_RDONLY        0x00000080        /* page must be mapped read-only */
#define        PG_TABLED        0x00000200        /* page is tabled in object */
#define        PG_AOBJ                0x00000400        /* page is part of an anonymous
                                           uvm_object */
#define        PG_ANON                0x00000800        /* page is part of an anon, rather
                                           than an uvm_object */
#define        PG_FILE                0x00001000        /* file backed (non-anonymous) */
#define        PG_READAHEAD        0x00002000        /* read-ahead but not "hit" yet */
#define        PG_FREE                0x00004000        /* page is on free list */
#define        PG_MARKER        0x00008000        /* dummy marker page */
#define        PG_PAGER1        0x00010000        /* pager-specific flag */
#define        PG_PGLCA        0x00020000        /* allocated by uvm_pglistalloc_contig */

#define        PG_STAT                (PG_ANON|PG_AOBJ|PG_FILE)
#define        PG_SWAPBACKED        (PG_ANON|PG_AOBJ)

#define        UVM_PGFLAGBITS \
        "\20\1CLEAN\2DIRTY\3BUSY" \
        "\5PAGEOUT\6RELEASED\7FAKE\10RDONLY" \
        "\11ZERO\12TABLED\13AOBJ\14ANON" \
        "\15FILE\16READAHEAD\17FREE\20MARKER" \
        "\21PAGER1\22PGLCA"

/*
 * Flags stored in pg->pqflags, which is protected by pg->interlock.
 *
 * PQ_PRIVATE:
 *        ... is for uvmpdpol to do whatever it wants with.
 *
 * PQ_INTENT_SET:
 *        Indicates that the intent set on the page has not yet been realized.
 *
 * PQ_INTENT_QUEUED:
 *        Indicates that the page is, or will soon be, on a per-CPU queue for
 *        the intent to be realized.
 *
 * PQ_WANTED:
 *        Indicates that the page, which is currently PG_BUSY, is wanted by
 *        some other LWP.  The page owner (i.e. LWP which set PG_BUSY) is
 *        responsible to clear both flags and wake up any waiters once it has
 *        released the long-term lock (PG_BUSY).
 */

#define        PQ_INTENT_A                0x00000000        /* intend activation */
#define        PQ_INTENT_I                0x00000001        /* intend deactivation */
#define        PQ_INTENT_E                0x00000002        /* intend enqueue */
#define        PQ_INTENT_D                0x00000003        /* intend dequeue */
#define        PQ_INTENT_MASK                0x00000003        /* mask of intended state */
#define        PQ_INTENT_SET                0x00000004        /* not realized yet */
#define        PQ_INTENT_QUEUED        0x00000008        /* queued for processing */
#define        PQ_PRIVATE                0x00000ff0        /* private for pdpolicy */
#define        PQ_WANTED                0x00001000        /* someone is waiting for page */

#define        UVM_PQFLAGBITS \
        "\20\1INTENT_0\2INTENT_1\3INTENT_SET\4INTENT_QUEUED" \
        "\5PRIVATE1\6PRIVATE2\7PRIVATE3\10PRIVATE4" \
        "\11PRIVATE5\12PRIVATE6\13PRIVATE7\14PRIVATE8" \
        "\15WANTED"

/*
 * physical memory layout structure
 *
 * MD vmparam.h must #define:
 *   VM_PHYSEG_MAX = max number of physical memory segments we support
 *                   (if this is "1" then we revert to a "contig" case)
 *   VM_PHYSSEG_STRAT: memory sort/search options (for VM_PHYSEG_MAX > 1)
 *         - VM_PSTRAT_RANDOM:   linear search (random order)
 *        - VM_PSTRAT_BSEARCH:  binary search (sorted by address)
 *        - VM_PSTRAT_BIGFIRST: linear search (sorted by largest segment first)
 *      - others?
 *   XXXCDC: eventually we should purge all left-over global variables...
 */
#define VM_PSTRAT_RANDOM        1
#define VM_PSTRAT_BSEARCH        2
#define VM_PSTRAT_BIGFIRST        3

#ifdef _KERNEL

/*
 * prototypes: the following prototypes define the interface to pages
 */

void uvm_page_init(vaddr_t *, vaddr_t *);
void uvm_pglistalloc_init(void);
#if defined(UVM_PAGE_TRKOWN)
void uvm_page_own(struct vm_page *, const char *);
#endif
#if !defined(PMAP_STEAL_MEMORY)
bool uvm_page_physget(paddr_t *);
#endif
void uvm_page_recolor(int);
void uvm_page_rebucket(void);

void uvm_pageactivate(struct vm_page *);
vaddr_t uvm_pageboot_alloc(vsize_t);
void uvm_pagecopy(struct vm_page *, struct vm_page *);
void uvm_pagedeactivate(struct vm_page *);
void uvm_pagedequeue(struct vm_page *);
void uvm_pageenqueue(struct vm_page *);
void uvm_pagefree(struct vm_page *);
void uvm_pagelock(struct vm_page *);
void uvm_pagelock2(struct vm_page *, struct vm_page *);
void uvm_pageunlock(struct vm_page *);
void uvm_pageunlock2(struct vm_page *, struct vm_page *);
void uvm_page_unbusy(struct vm_page **, int);
struct vm_page *uvm_pagelookup(struct uvm_object *, voff_t);
void uvm_pageunwire(struct vm_page *);
void uvm_pagewire(struct vm_page *);
void uvm_pagezero(struct vm_page *);
bool uvm_pageismanaged(paddr_t);
bool uvm_page_owner_locked_p(struct vm_page *, bool);
void uvm_pgfl_lock(void);
void uvm_pgfl_unlock(void);
unsigned int uvm_pagegetdirty(struct vm_page *);
void uvm_pagemarkdirty(struct vm_page *, unsigned int);
bool uvm_pagecheckdirty(struct vm_page *, bool);
bool uvm_pagereadonly_p(struct vm_page *);
bool uvm_page_locked_p(struct vm_page *);
void uvm_pagewakeup(struct vm_page *);
bool uvm_pagewanted_p(struct vm_page *);
void uvm_pagewait(struct vm_page *, krwlock_t *, const char *);

int uvm_page_lookup_freelist(struct vm_page *);

struct vm_page *uvm_phys_to_vm_page(paddr_t);
paddr_t uvm_vm_page_to_phys(const struct vm_page *);

#if defined(PMAP_DIRECT)
extern bool ubc_direct;
int uvm_direct_process(struct vm_page **, u_int, voff_t, vsize_t,
            int (*)(void *, size_t, void *), void *);
#endif

/*
 * page dirtiness status for uvm_pagegetdirty and uvm_pagemarkdirty
 *
 * UNKNOWN means that we need to consult pmap to know if the page is
 * dirty or not.
 * basically, UVM_PAGE_STATUS_CLEAN implies that the page has no writable
 * mapping.
 *
 * if you want to renumber these, check __CTASSERTs in
 * uvm_page_status.c first.
 */

#define        UVM_PAGE_STATUS_UNKNOWN        0
#define        UVM_PAGE_STATUS_CLEAN        1
#define        UVM_PAGE_STATUS_DIRTY        2
#define        UVM_PAGE_NUM_STATUS        3

/*
 * macros
 */

#define VM_PAGE_TO_PHYS(entry)        uvm_vm_page_to_phys(entry)

#ifdef __HAVE_VM_PAGE_MD
#define        VM_PAGE_TO_MD(pg)        (&(pg)->mdpage)
#define        VM_MD_TO_PAGE(md)        (container_of((md), struct vm_page, mdpage))
#endif

/*
 * Compute the page color for a given page.
 */
#define        VM_PGCOLOR(pg) \
        (atop(VM_PAGE_TO_PHYS((pg))) & uvmexp.colormask)
#define        PHYS_TO_VM_PAGE(pa)        uvm_phys_to_vm_page(pa)

/*
 * VM_PAGE_IS_FREE() can't tell if the page is on global free list, or a
 * per-CPU cache.  If you need to be certain, pause caching.
 */
#define VM_PAGE_IS_FREE(entry)  ((entry)->flags & PG_FREE)

/*
 * Use the lower 10 bits of pg->phys_addr to cache some some locators for
 * the page.  This implies that the smallest possible page size is 1kB, and
 * that nobody should use pg->phys_addr directly (use VM_PAGE_TO_PHYS()).
 * 
 * - 5 bits for the freelist index, because uvm_page_lookup_freelist()
 *   traverses an rbtree and therefore features prominently in traces
 *   captured during performance test.  It would probably be more useful to
 *   cache physseg index here because freelist can be inferred from physseg,
 *   but it requires changes to allocation for UVM_HOTPLUG, so for now we'll
 *   go with freelist.
 *
 * - 5 bits for "bucket", a way for us to categorise pages further as
 *   needed (e.g. NUMA node).
 *
 * None of this is set in stone; it can be adjusted as needed.
 */

#define        UVM_PHYSADDR_FREELIST        __BITS(0,4)
#define        UVM_PHYSADDR_BUCKET        __BITS(5,9)

static inline unsigned
uvm_page_get_freelist(struct vm_page *pg)
{
        unsigned fl = __SHIFTOUT(pg->phys_addr, UVM_PHYSADDR_FREELIST);
        KASSERT(fl == (unsigned)uvm_page_lookup_freelist(pg));
        return fl;
}

static inline unsigned
uvm_page_get_bucket(struct vm_page *pg)
{
        return __SHIFTOUT(pg->phys_addr, UVM_PHYSADDR_BUCKET);
}

static inline void
uvm_page_set_freelist(struct vm_page *pg, unsigned fl)
{
        KASSERT(fl < 32);
        pg->phys_addr &= ~UVM_PHYSADDR_FREELIST;
        pg->phys_addr |= __SHIFTIN(fl, UVM_PHYSADDR_FREELIST);
}

static inline void
uvm_page_set_bucket(struct vm_page *pg, unsigned b)
{
        KASSERT(b < 32);
        pg->phys_addr &= ~UVM_PHYSADDR_BUCKET;
        pg->phys_addr |= __SHIFTIN(b, UVM_PHYSADDR_BUCKET);
}

#endif /* _KERNEL */

#endif /* _UVM_UVM_PAGE_H_ */




















































































































































































































































































































































































































    3 
    3 




























































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
/*        $NetBSD: if_kue.c,v 1.119 2022/08/20 14:08:59 riastradh Exp $        */

/*
 * Copyright (c) 1997, 1998, 1999, 2000
 *        Bill Paul <wpaul@ee.columbia.edu>.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by Bill Paul.
 * 4. Neither the name of the author nor the names of any co-contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 *
 * $FreeBSD: src/sys/dev/usb/if_kue.c,v 1.14 2000/01/14 01:36:15 wpaul Exp $
 */

/*
 * Kawasaki LSI KL5KUSB101B USB to ethernet adapter driver.
 *
 * Written by Bill Paul <wpaul@ee.columbia.edu>
 * Electrical Engineering Department
 * Columbia University, New York City
 */

/*
 * The KLSI USB to ethernet adapter chip contains an USB serial interface,
 * ethernet MAC and embedded microcontroller (called the QT Engine).
 * The chip must have firmware loaded into it before it will operate.
 * Packets are passed between the chip and host via bulk transfers.
 * There is an interrupt endpoint mentioned in the software spec, however
 * it's currently unused. This device is 10Mbps half-duplex only, hence
 * there is no media selection logic. The MAC supports a 128 entry
 * multicast filter, though the exact size of the filter can depend
 * on the firmware. Curiously, while the software spec describes various
 * ethernet statistics counters, my sample adapter and firmware combination
 * claims not to support any statistics counters at all.
 *
 * Note that once we load the firmware in the device, we have to be
 * careful not to load it again: if you restart your computer but
 * leave the adapter attached to the USB controller, it may remain
 * powered on and retain its firmware. In this case, we don't need
 * to load the firmware a second time.
 *
 * Special thanks to Rob Furr for providing an ADS Technologies
 * adapter for development and testing. No monkeys were harmed during
 * the development of this driver.
 */

/*
 * Ported to NetBSD and somewhat rewritten by Lennart Augustsson.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_kue.c,v 1.119 2022/08/20 14:08:59 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/kmem.h>

#include <dev/usb/usbnet.h>

#ifdef INET
#include <netinet/in.h>
#include <netinet/if_inarp.h>
#endif

#include <dev/usb/if_kuereg.h>
#include <dev/usb/kue_fw.h>

#ifdef KUE_DEBUG
#define DPRINTF(x)        if (kuedebug) printf x
#define DPRINTFN(n, x)        if (kuedebug >= (n)) printf x
int        kuedebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n, x)
#endif

struct kue_type {
        uint16_t                kue_vid;
        uint16_t                kue_did;
};

struct kue_softc {
        struct usbnet                kue_un;

        struct kue_ether_desc        kue_desc;
        uint16_t                kue_rxfilt;
        uint8_t                        *kue_mcfilters;
};

#define KUE_MCFILT(x, y)        \
        (uint8_t *)&(sc->kue_mcfilters[y * ETHER_ADDR_LEN])

#define KUE_BUFSZ                1536
#define KUE_MIN_FRAMELEN        60

#define KUE_RX_LIST_CNT                1
#define KUE_TX_LIST_CNT                1

/*
 * Various supported device vendors/products.
 */
static const struct usb_devno kue_devs[] = {
        { USB_VENDOR_3COM, USB_PRODUCT_3COM_3C19250 },
        { USB_VENDOR_3COM, USB_PRODUCT_3COM_3C460 },
        { USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_URE450 },
        { USB_VENDOR_ADS, USB_PRODUCT_ADS_UBS10BT },
        { USB_VENDOR_ADS, USB_PRODUCT_ADS_UBS10BTX },
        { USB_VENDOR_ACTIONTEC, USB_PRODUCT_ACTIONTEC_AR9287 },
        { USB_VENDOR_ALLIEDTELESYN, USB_PRODUCT_ALLIEDTELESYN_AT_USB10 },
        { USB_VENDOR_AOX, USB_PRODUCT_AOX_USB101 },
        { USB_VENDOR_ASANTE, USB_PRODUCT_ASANTE_EA },
        { USB_VENDOR_ATEN, USB_PRODUCT_ATEN_UC10T },
        { USB_VENDOR_ATEN, USB_PRODUCT_ATEN_DSB650C },
        { USB_VENDOR_COREGA, USB_PRODUCT_COREGA_ETHER_USB_T },
        { USB_VENDOR_DLINK, USB_PRODUCT_DLINK_DSB650C },
        { USB_VENDOR_ENTREGA, USB_PRODUCT_ENTREGA_E45 },
        { USB_VENDOR_ENTREGA, USB_PRODUCT_ENTREGA_XX1 },
        { USB_VENDOR_ENTREGA, USB_PRODUCT_ENTREGA_XX2 },
        { USB_VENDOR_IODATA, USB_PRODUCT_IODATA_USBETT },
        { USB_VENDOR_JATON, USB_PRODUCT_JATON_EDA },
        { USB_VENDOR_KINGSTON, USB_PRODUCT_KINGSTON_XX1 },
        { USB_VENDOR_KLSI, USB_PRODUCT_KLSI_DUH3E10BT },
        { USB_VENDOR_KLSI, USB_PRODUCT_KLSI_DUH3E10BTN },
        { USB_VENDOR_LINKSYS, USB_PRODUCT_LINKSYS_USB10T },
        { USB_VENDOR_MOBILITY, USB_PRODUCT_MOBILITY_EA },
        { USB_VENDOR_NETGEAR, USB_PRODUCT_NETGEAR_EA101 },
        { USB_VENDOR_NETGEAR, USB_PRODUCT_NETGEAR_EA101X },
        { USB_VENDOR_PERACOM, USB_PRODUCT_PERACOM_ENET },
        { USB_VENDOR_PERACOM, USB_PRODUCT_PERACOM_ENET2 },
        { USB_VENDOR_PERACOM, USB_PRODUCT_PERACOM_ENET3 },
        { USB_VENDOR_PORTGEAR, USB_PRODUCT_PORTGEAR_EA8 },
        { USB_VENDOR_PORTGEAR, USB_PRODUCT_PORTGEAR_EA9 },
        { USB_VENDOR_PORTSMITH, USB_PRODUCT_PORTSMITH_EEA },
        { USB_VENDOR_SHARK, USB_PRODUCT_SHARK_PA },
        { USB_VENDOR_SILICOM, USB_PRODUCT_SILICOM_U2E },
        { USB_VENDOR_SILICOM, USB_PRODUCT_SILICOM_GPE },
        { USB_VENDOR_SMC, USB_PRODUCT_SMC_2102USB },
};
#define kue_lookup(v, p) (usb_lookup(kue_devs, v, p))

static int kue_match(device_t, cfdata_t, void *);
static void kue_attach(device_t, device_t, void *);
static int kue_detach(device_t, int);

CFATTACH_DECL_NEW(kue, sizeof(struct kue_softc), kue_match, kue_attach,
    kue_detach, usbnet_activate);

static void kue_uno_rx_loop(struct usbnet *, struct usbnet_chain *, uint32_t);
static unsigned kue_uno_tx_prepare(struct usbnet *, struct mbuf *,
                                   struct usbnet_chain *);
static void kue_uno_mcast(struct ifnet *);
static int kue_uno_init(struct ifnet *);

static const struct usbnet_ops kue_ops = {
        .uno_mcast = kue_uno_mcast,
        .uno_tx_prepare = kue_uno_tx_prepare,
        .uno_rx_loop = kue_uno_rx_loop,
        .uno_init = kue_uno_init,
};

static void kue_reset(struct usbnet *);

static usbd_status kue_ctl(struct usbnet *, int, uint8_t,
                           uint16_t, void *, uint32_t);
static int kue_load_fw(struct usbnet *);

static usbd_status
kue_setword(struct usbnet *un, uint8_t breq, uint16_t word)
{
        usb_device_request_t        req;

        DPRINTFN(10,("%s: %s: enter\n", device_xname(un->un_dev),__func__));

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = breq;
        USETW(req.wValue, word);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);

        return usbd_do_request(un->un_udev, &req, NULL);
}

static usbd_status
kue_ctl(struct usbnet *un, int rw, uint8_t breq, uint16_t val,
        void *data, uint32_t len)
{
        usb_device_request_t        req;

        DPRINTFN(10,("%s: %s: enter, len=%d\n", device_xname(un->un_dev),
                     __func__, len));

        if (rw == KUE_CTL_WRITE)
                req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        else
                req.bmRequestType = UT_READ_VENDOR_DEVICE;

        req.bRequest = breq;
        USETW(req.wValue, val);
        USETW(req.wIndex, 0);
        USETW(req.wLength, len);

        return usbd_do_request(un->un_udev, &req, data);
}

static int
kue_load_fw(struct usbnet *un)
{
        usb_device_descriptor_t dd;
        usbd_status                err;

        DPRINTFN(1,("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        /*
         * First, check if we even need to load the firmware.
         * If the device was still attached when the system was
         * rebooted, it may already have firmware loaded in it.
         * If this is the case, we don't need to do it again.
         * And in fact, if we try to load it again, we'll hang,
         * so we have to avoid this condition if we don't want
         * to look stupid.
         *
         * We can test this quickly by checking the bcdRevision
         * code. The NIC will return a different revision code if
         * it's probed while the firmware is still loaded and
         * running.
         */
        if (usbd_get_device_desc(un->un_udev, &dd))
                return EIO;
        if (UGETW(dd.bcdDevice) == KUE_WARM_REV) {
                printf("%s: warm boot, no firmware download\n",
                       device_xname(un->un_dev));
                return 0;
        }

        printf("%s: cold boot, downloading firmware\n",
               device_xname(un->un_dev));

        /* Load code segment */
        DPRINTFN(1,("%s: kue_load_fw: download code_seg\n",
                    device_xname(un->un_dev)));
        /*XXXUNCONST*/
        err = kue_ctl(un, KUE_CTL_WRITE, KUE_CMD_SEND_SCAN,
            0, __UNCONST(kue_code_seg), sizeof(kue_code_seg));
        if (err) {
                printf("%s: failed to load code segment: %s\n",
                    device_xname(un->un_dev), usbd_errstr(err));
                        return EIO;
        }

        /* Load fixup segment */
        DPRINTFN(1,("%s: kue_load_fw: download fix_seg\n",
                    device_xname(un->un_dev)));
        /*XXXUNCONST*/
        err = kue_ctl(un, KUE_CTL_WRITE, KUE_CMD_SEND_SCAN,
            0, __UNCONST(kue_fix_seg), sizeof(kue_fix_seg));
        if (err) {
                printf("%s: failed to load fixup segment: %s\n",
                    device_xname(un->un_dev), usbd_errstr(err));
                        return EIO;
        }

        /* Send trigger command. */
        DPRINTFN(1,("%s: kue_load_fw: download trig_seg\n",
                    device_xname(un->un_dev)));
        /*XXXUNCONST*/
        err = kue_ctl(un, KUE_CTL_WRITE, KUE_CMD_SEND_SCAN,
            0, __UNCONST(kue_trig_seg), sizeof(kue_trig_seg));
        if (err) {
                printf("%s: failed to load trigger segment: %s\n",
                    device_xname(un->un_dev), usbd_errstr(err));
                        return EIO;
        }

        usbd_delay_ms(un->un_udev, 10);

        /*
         * Reload device descriptor.
         * Why? The chip without the firmware loaded returns
         * one revision code. The chip with the firmware
         * loaded and running returns a *different* revision
         * code. This confuses the quirk mechanism, which is
         * dependent on the revision data.
         */
        (void)usbd_reload_device_desc(un->un_udev);

        DPRINTFN(1,("%s: %s: done\n", device_xname(un->un_dev), __func__));

        /* Reset the adapter. */
        kue_reset(un);

        return 0;
}

static void
kue_uno_mcast(struct ifnet *ifp)
{
        struct usbnet *                un = ifp->if_softc;
        struct ethercom *        ec = usbnet_ec(un);
        struct kue_softc *        sc = usbnet_softc(un);
        struct ether_multi        *enm;
        struct ether_multistep        step;
        int                        i;

        DPRINTFN(5,("%s: %s: enter\n", device_xname(un->un_dev), __func__));

         /* If we want promiscuous mode, set the allframes bit. */
        if (usbnet_ispromisc(un))
                sc->kue_rxfilt |= KUE_RXFILT_PROMISC;
        else
                sc->kue_rxfilt &= ~KUE_RXFILT_PROMISC;

        if (usbnet_ispromisc(un)) {
                ETHER_LOCK(ec);
allmulti:
                ec->ec_flags |= ETHER_F_ALLMULTI;
                ETHER_UNLOCK(ec);
                sc->kue_rxfilt |= KUE_RXFILT_ALLMULTI|KUE_RXFILT_PROMISC;
                sc->kue_rxfilt &= ~KUE_RXFILT_MULTICAST;
                kue_setword(un, KUE_CMD_SET_PKT_FILTER, sc->kue_rxfilt);
                return;
        }

        sc->kue_rxfilt &= ~(KUE_RXFILT_ALLMULTI|KUE_RXFILT_PROMISC);

        i = 0;
        ETHER_LOCK(ec);
        ETHER_FIRST_MULTI(step, ec, enm);
        while (enm != NULL) {
                if (i == KUE_MCFILTCNT(sc) ||
                    memcmp(enm->enm_addrlo, enm->enm_addrhi,
                    ETHER_ADDR_LEN) != 0) {
                        goto allmulti;
                }

                memcpy(KUE_MCFILT(sc, i), enm->enm_addrlo, ETHER_ADDR_LEN);
                ETHER_NEXT_MULTI(step, enm);
                i++;
        }
        ec->ec_flags &= ~ETHER_F_ALLMULTI;
        ETHER_UNLOCK(ec);

        sc->kue_rxfilt |= KUE_RXFILT_MULTICAST;
        kue_ctl(un, KUE_CTL_WRITE, KUE_CMD_SET_MCAST_FILTERS,
            i, sc->kue_mcfilters, i * ETHER_ADDR_LEN);

        kue_setword(un, KUE_CMD_SET_PKT_FILTER, sc->kue_rxfilt);
}

/*
 * Issue a SET_CONFIGURATION command to reset the MAC. This should be
 * done after the firmware is loaded into the adapter in order to
 * bring it into proper operation.
 */
static void
kue_reset(struct usbnet *un)
{
        DPRINTFN(5,("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        if (usbd_set_config_no(un->un_udev, KUE_CONFIG_NO, 1) ||
            usbd_device2interface_handle(un->un_udev, KUE_IFACE_IDX,
                                         &un->un_iface))
                printf("%s: reset failed\n", device_xname(un->un_dev));

        /* Wait a little while for the chip to get its brains in order. */
        usbd_delay_ms(un->un_udev, 10);
}

/*
 * Probe for a KLSI chip.
 */
static int
kue_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        DPRINTFN(25,("kue_match: enter\n"));

        return kue_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

/*
 * Attach the interface. Allocate softc structures, do
 * setup and ethernet/BPF attach.
 */
static void
kue_attach(device_t parent, device_t self, void *aux)
{
        struct kue_softc *sc = device_private(self);
        struct usbnet * const un = &sc->kue_un;
        struct usb_attach_arg *uaa = aux;
        char                        *devinfop;
        struct usbd_device *        dev = uaa->uaa_device;
        usbd_status                err;
        usb_interface_descriptor_t        *id;
        usb_endpoint_descriptor_t        *ed;
        int                        i;

        KASSERT((void *)sc == un);

        DPRINTFN(5,(" : kue_attach: sc=%p, dev=%p", sc, dev));

        aprint_naive("\n");
        aprint_normal("\n");
        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        un->un_dev = self;
        un->un_udev = dev;
        un->un_sc = sc;
        un->un_ops = &kue_ops;
        un->un_rx_xfer_flags = USBD_SHORT_XFER_OK;
        un->un_tx_xfer_flags = 0;
        un->un_rx_list_cnt = KUE_RX_LIST_CNT;
        un->un_tx_list_cnt = KUE_TX_LIST_CNT;
        un->un_rx_bufsz = KUE_BUFSZ;
        un->un_tx_bufsz = KUE_BUFSZ;

        err = usbd_set_config_no(dev, KUE_CONFIG_NO, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(err));
                return;
        }

        /* Load the firmware into the NIC. */
        if (kue_load_fw(un)) {
                aprint_error_dev(self, "loading firmware failed\n");
                return;
        }

        err = usbd_device2interface_handle(dev, KUE_IFACE_IDX, &un->un_iface);
        if (err) {
                aprint_error_dev(self, "getting interface handle failed\n");
                return;
        }

        id = usbd_get_interface_descriptor(un->un_iface);

        /* Find endpoints. */
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(un->un_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self, "couldn't get ep %d\n", i);
                        return;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        /*
                         * The interrupt endpoint is currently unused by the
                         * KLSI part.
                         */
                        un->un_ed[USBNET_ENDPT_INTR] = ed->bEndpointAddress;
                }
        }

        if (un->un_ed[USBNET_ENDPT_RX] == 0 ||
            un->un_ed[USBNET_ENDPT_TX] == 0) {
                aprint_error_dev(self, "missing endpoint\n");
                return;
        }

        /* First level attach, so kue_ctl() works. */
        usbnet_attach(un);

        /* Read ethernet descriptor */
        err = kue_ctl(un, KUE_CTL_READ, KUE_CMD_GET_ETHER_DESCRIPTOR,
            0, &sc->kue_desc, sizeof(sc->kue_desc));
        if (err) {
                aprint_error_dev(self, "could not read Ethernet descriptor\n");
                return;
        }
        memcpy(un->un_eaddr, sc->kue_desc.kue_macaddr, sizeof(un->un_eaddr));

        sc->kue_mcfilters = kmem_alloc(KUE_MCFILTCNT(sc) * ETHER_ADDR_LEN,
            KM_SLEEP);

        usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST,
            0, NULL);
}

static int
kue_detach(device_t self, int flags)
{
        struct kue_softc *sc = device_private(self);

        if (sc->kue_mcfilters != NULL) {
                kmem_free(sc->kue_mcfilters,
                    KUE_MCFILTCNT(sc) * ETHER_ADDR_LEN);
                sc->kue_mcfilters = NULL;
        }

        return usbnet_detach(self, flags);
}

/*
 * A frame has been uploaded: pass the resulting mbuf chain up to
 * the higher level protocols.
 */
static void
kue_uno_rx_loop(struct usbnet *un, struct usbnet_chain *c, uint32_t total_len)
{
        struct ifnet                *ifp = usbnet_ifp(un);
        uint8_t                        *buf = c->unc_buf;
        unsigned                pktlen;

        if (total_len <= 1)
                return;

        DPRINTFN(10,("%s: %s: total_len=%d len=%d\n",
                     device_xname(un->un_dev), __func__,
                     total_len, le16dec(buf)));

        pktlen = le16dec(buf);
        if (pktlen > total_len - ETHER_ALIGN)
                pktlen = total_len - ETHER_ALIGN;

        if (pktlen < ETHER_MIN_LEN - ETHER_CRC_LEN ||
            pktlen > MCLBYTES - ETHER_ALIGN) {
                if_statinc(ifp, if_ierrors);
                return;
        }

        DPRINTFN(10,("%s: %s: deliver %d\n", device_xname(un->un_dev),
                    __func__, pktlen));
        usbnet_enqueue(un, buf + 2, pktlen, 0, 0, 0);
}

static unsigned
kue_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c)
{
        unsigned                total_len, pkt_len;

        pkt_len = m->m_pkthdr.len + 2;
        total_len = roundup2(pkt_len, 64);

        if ((unsigned)total_len > un->un_tx_bufsz) {
                DPRINTFN(10,("%s: %s: too big pktlen %u total %u\n",
                    device_xname(un->un_dev), __func__, pkt_len, total_len));
                return 0;
        }

        /* Frame length is specified in the first 2 bytes of the buffer. */
        le16enc(c->unc_buf, (uint16_t)m->m_pkthdr.len);

        /*
         * Copy the mbuf data into a contiguous buffer after the frame length,
         * possibly zeroing the rest of the buffer.
         */
        m_copydata(m, 0, m->m_pkthdr.len, c->unc_buf + 2);
        if (total_len - pkt_len > 0)
                memset(c->unc_buf + pkt_len, 0, total_len - pkt_len);

        DPRINTFN(10,("%s: %s: enter pktlen %u total %u\n",
            device_xname(un->un_dev), __func__, pkt_len, total_len));

        return total_len;
}

static int
kue_uno_init(struct ifnet *ifp)
{
        struct usbnet * const        un = ifp->if_softc;
        struct kue_softc        *sc = usbnet_softc(un);
        uint8_t                        eaddr[ETHER_ADDR_LEN];

        DPRINTFN(5,("%s: %s: enter\n", device_xname(un->un_dev),__func__));

        memcpy(eaddr, CLLADDR(ifp->if_sadl), sizeof(eaddr));
        /* Set MAC address */
        kue_ctl(un, KUE_CTL_WRITE, KUE_CMD_SET_MAC, 0, eaddr, ETHER_ADDR_LEN);

        sc->kue_rxfilt = KUE_RXFILT_UNICAST | KUE_RXFILT_BROADCAST;

        /* I'm not sure how to tune these. */
#if 0
        /*
         * Leave this one alone for now; setting it
         * wrong causes lockups on some machines/controllers.
         */
        kue_setword(un, KUE_CMD_SET_SOFS, 1);
#endif
        kue_setword(un, KUE_CMD_SET_URB_SIZE, 64);

        return 0;
}

#ifdef _MODULE
#include "ioconf.c"
#endif

USBNET_MODULE(kue)













































































































































































































































































































































































    5 






















































































































































































































































































































































































































































































    5 




    5 























    7 











    6 


    6 




    5 






    5 
    6 

    5 


























    2 






    1 


    1 

    2 













































   25 








   26 




   23 

   22 















    2 

    1 



    1 


















   21 













   21 










    1 

    1 



    1 


    1 




    1 

    1 
    2 

    2 



    1 
    2 





    1 























    1 

    1 
    1 

    1 



    1 
    2 















































    4 


    4 


    3 



    3 







    2 




    1 
    1 

    1 







    5 

    5 








    4 






    3 

   18 


    1 








    2 


    2 




















    1 



    1 






















































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
/* $NetBSD: wskbd.c,v 1.144 2020/12/27 16:09:33 tsutsui Exp $ */

/*
 * Copyright (c) 1996, 1997 Christopher G. Demetriou.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Christopher G. Demetriou
 *        for the NetBSD Project.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1998 The NetBSD Foundation, Inc.
 *  All rights reserved.
 *
 * Keysym translator contributed to The NetBSD Foundation by
 * Juergen Hannken-Illjes.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1992, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This software was developed by the Computer Systems Engineering group
 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
 * contributed to Berkeley.
 *
 * All advertising materials mentioning features or use of this software
 * must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Lawrence Berkeley Laboratory.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kbd.c        8.2 (Berkeley) 10/30/93
 */

/*
 * Keyboard driver (/dev/wskbd*).  Translates incoming bytes to ASCII or
 * to `wscons_events' and passes them up to the appropriate reader.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: wskbd.c,v 1.144 2020/12/27 16:09:33 tsutsui Exp $");

#ifdef _KERNEL_OPT
#include "opt_ddb.h"
#include "opt_kgdb.h"
#include "opt_wsdisplay_compat.h"
#endif

#include "wsdisplay.h"
#include "wskbd.h"
#include "wsmux.h"

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/malloc.h>
#include <sys/tty.h>
#include <sys/signalvar.h>
#include <sys/errno.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/kauth.h>

#include <dev/wscons/wsconsio.h>
#include <dev/wscons/wskbdvar.h>
#include <dev/wscons/wsksymdef.h>
#include <dev/wscons/wsksymvar.h>
#include <dev/wscons/wsdisplayvar.h>
#include <dev/wscons/wseventvar.h>
#include <dev/wscons/wscons_callbacks.h>
#include <dev/wscons/wsbelldata.h>
#include <dev/wscons/wsmuxvar.h>

#ifdef KGDB
#include <sys/kgdb.h>
#endif

#include "ioconf.h"

#ifdef WSKBD_DEBUG
#define DPRINTF(x)        if (wskbddebug) printf x
int        wskbddebug = 0;
#else
#define DPRINTF(x)
#endif

struct wskbd_internal {
        const struct wskbd_mapdata *t_keymap;

        const struct wskbd_consops *t_consops;
        void        *t_consaccesscookie;

        int        t_modifiers;
        int        t_composelen;                /* remaining entries in t_composebuf */
        keysym_t t_composebuf[2];

        int t_flags;
#define WSKFL_METAESC 1

#define MAXKEYSYMSPERKEY 2 /* ESC <key> at max */
        keysym_t t_symbols[MAXKEYSYMSPERKEY];

        struct wskbd_softc *t_sc;        /* back pointer */
};

struct wskbd_softc {
        struct wsevsrc sc_base;

        struct wskbd_internal *id;

        const struct wskbd_accessops *sc_accessops;
        void *sc_accesscookie;

        int        sc_ledstate;

        int        sc_isconsole;

        struct wskbd_bell_data sc_bell_data;
        struct wskbd_keyrepeat_data sc_keyrepeat_data;
#ifdef WSDISPLAY_SCROLLSUPPORT
        struct wskbd_scroll_data sc_scroll_data;
#endif

        int        sc_repeating;                /* we've called timeout() */
        callout_t sc_repeat_ch;
        u_int        sc_repeat_type;
        int        sc_repeat_value;

        int        sc_translating;                /* xlate to chars for emulation */

        int        sc_maplen;                /* number of entries in sc_map */
        struct wscons_keymap *sc_map;        /* current translation map */
        kbd_t sc_layout; /* current layout */

        int                sc_refcnt;
        u_char                sc_dying;        /* device is being detached */

        wskbd_hotkey_plugin *sc_hotkey;
        void *sc_hotkeycookie;

        /* optional table to translate scancodes in event mode */
        int                sc_evtrans_len;
        keysym_t        *sc_evtrans;
};

#define MOD_SHIFT_L                (1 << 0)
#define MOD_SHIFT_R                (1 << 1)
#define MOD_SHIFTLOCK                (1 << 2)
#define MOD_CAPSLOCK                (1 << 3)
#define MOD_CONTROL_L                (1 << 4)
#define MOD_CONTROL_R                (1 << 5)
#define MOD_META_L                (1 << 6)
#define MOD_META_R                (1 << 7)
#define MOD_MODESHIFT                (1 << 8)
#define MOD_NUMLOCK                (1 << 9)
#define MOD_COMPOSE                (1 << 10)
#define MOD_HOLDSCREEN                (1 << 11)
#define MOD_COMMAND                (1 << 12)
#define MOD_COMMAND1                (1 << 13)
#define MOD_COMMAND2                (1 << 14)

#define MOD_ANYSHIFT                (MOD_SHIFT_L | MOD_SHIFT_R | MOD_SHIFTLOCK)
#define MOD_ANYCONTROL                (MOD_CONTROL_L | MOD_CONTROL_R)
#define MOD_ANYMETA                (MOD_META_L | MOD_META_R)

#define MOD_ONESET(id, mask)        (((id)->t_modifiers & (mask)) != 0)
#define MOD_ALLSET(id, mask)        (((id)->t_modifiers & (mask)) == (mask))

#define GETMODSTATE(src, dst)                \
        do {                                                        \
                dst |= (src & MOD_SHIFT_L) ? MOD_SHIFT_L : 0; \
                dst |= (src & MOD_SHIFT_R) ? MOD_SHIFT_R : 0; \
                dst |= (src & MOD_CONTROL_L) ? MOD_CONTROL_L : 0; \
                dst |= (src & MOD_CONTROL_R) ? MOD_CONTROL_R : 0; \
                dst |= (src & MOD_META_L) ? MOD_META_L : 0; \
                dst |= (src & MOD_META_R) ? MOD_META_R : 0; \
        } while (0)

static int  wskbd_match(device_t, cfdata_t, void *);
static void wskbd_attach(device_t, device_t, void *);
static int  wskbd_detach(device_t, int);
static int  wskbd_activate(device_t, enum devact);

static int  wskbd_displayioctl(device_t, u_long, void *, int,
                              struct lwp *);
#if NWSDISPLAY > 0
static int  wskbd_set_display(device_t, struct wsevsrc *);
#else
#define wskbd_set_display NULL
#endif

static inline void update_leds(struct wskbd_internal *);
static inline void update_modifier(struct wskbd_internal *, u_int, int, int);
static int internal_command(struct wskbd_softc *, u_int *, keysym_t, keysym_t);
static int wskbd_translate(struct wskbd_internal *, u_int, int);
static int wskbd_enable(struct wskbd_softc *, int);
#if NWSDISPLAY > 0
static void change_displayparam(struct wskbd_softc *, int, int, int);
static void wskbd_holdscreen(struct wskbd_softc *, int);
#endif

static int wskbd_do_ioctl_sc(struct wskbd_softc *, u_long, void *, int,
                             struct lwp *);
static void wskbd_deliver_event(struct wskbd_softc *sc, u_int type, int value);

#if NWSMUX > 0
static int wskbd_mux_open(struct wsevsrc *, struct wseventvar *);
static int wskbd_mux_close(struct wsevsrc *);
#else
#define wskbd_mux_open NULL
#define wskbd_mux_close NULL
#endif

static int wskbd_do_open(struct wskbd_softc *, struct wseventvar *);
static int wskbd_do_ioctl(device_t, u_long, void *, int, struct lwp *);

CFATTACH_DECL_NEW(wskbd, sizeof (struct wskbd_softc),
    wskbd_match, wskbd_attach, wskbd_detach, wskbd_activate);

dev_type_open(wskbdopen);
dev_type_close(wskbdclose);
dev_type_read(wskbdread);
dev_type_ioctl(wskbdioctl);
dev_type_poll(wskbdpoll);
dev_type_kqfilter(wskbdkqfilter);

const struct cdevsw wskbd_cdevsw = {
        .d_open = wskbdopen,
        .d_close = wskbdclose,
        .d_read = wskbdread,
        .d_write = nowrite,
        .d_ioctl = wskbdioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = wskbdpoll,
        .d_mmap = nommap,
        .d_kqfilter = wskbdkqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

#ifdef WSDISPLAY_SCROLLSUPPORT
struct wskbd_scroll_data wskbd_default_scroll_data = {
         WSKBD_SCROLL_DOALL,
         WSKBD_SCROLL_MODE_NORMAL,
#ifdef WSDISPLAY_SCROLLCOMBO
         WSDISPLAY_SCROLLCOMBO,
#else
         MOD_SHIFT_L,
#endif
};
#endif

#ifndef WSKBD_DEFAULT_KEYREPEAT_DEL1
#define        WSKBD_DEFAULT_KEYREPEAT_DEL1        400        /* 400ms to start repeating */
#endif
#ifndef WSKBD_DEFAULT_KEYREPEAT_DELN
#define        WSKBD_DEFAULT_KEYREPEAT_DELN        100        /* 100ms to between repeats */
#endif

struct wskbd_keyrepeat_data wskbd_default_keyrepeat_data = {
        WSKBD_KEYREPEAT_DOALL,
        WSKBD_DEFAULT_KEYREPEAT_DEL1,
        WSKBD_DEFAULT_KEYREPEAT_DELN,
};

#if NWSDISPLAY > 0 || NWSMUX > 0
struct wssrcops wskbd_srcops = {
        WSMUX_KBD,
        wskbd_mux_open, wskbd_mux_close, wskbd_do_ioctl,
        wskbd_displayioctl, wskbd_set_display
};
#endif

static bool wskbd_suspend(device_t dv, const pmf_qual_t *);
static void wskbd_repeat(void *v);

static int wskbd_console_initted;
static struct wskbd_softc *wskbd_console_device;
static struct wskbd_internal wskbd_console_data;

static void wskbd_update_layout(struct wskbd_internal *, kbd_t);

static void
wskbd_update_layout(struct wskbd_internal *id, kbd_t enc)
{

        if (enc & KB_METAESC)
                id->t_flags |= WSKFL_METAESC;
        else
                id->t_flags &= ~WSKFL_METAESC;
}

/*
 * Print function (for parent devices).
 */
int
wskbddevprint(void *aux, const char *pnp)
{
#if 0
        struct wskbddev_attach_args *ap = aux;
#endif

        if (pnp)
                aprint_normal("wskbd at %s", pnp);
#if 0
        aprint_normal(" console %d", ap->console);
#endif

        return (UNCONF);
}

int
wskbd_match(device_t parent, cfdata_t match, void *aux)
{
        struct wskbddev_attach_args *ap = aux;

        if (match->wskbddevcf_console != WSKBDDEVCF_CONSOLE_UNK) {
                /*
                 * If console-ness of device specified, either match
                 * exactly (at high priority), or fail.
                 */
                if (match->wskbddevcf_console != 0 && ap->console != 0)
                        return (10);
                else
                        return (0);
        }

        /* If console-ness unspecified, it wins. */
        return (1);
}

void
wskbd_attach(device_t parent, device_t self, void *aux)
{
        struct wskbd_softc *sc = device_private(self);
        struct wskbddev_attach_args *ap = aux;
#if NWSMUX > 0
        int mux, error;
#endif

         sc->sc_base.me_dv = self;
        sc->sc_isconsole = ap->console;
        sc->sc_hotkey = NULL;
        sc->sc_hotkeycookie = NULL;
        sc->sc_evtrans_len = 0;
        sc->sc_evtrans = NULL;

#if NWSMUX > 0 || NWSDISPLAY > 0
        sc->sc_base.me_ops = &wskbd_srcops;
#endif
#if NWSMUX > 0
        mux = device_cfdata(sc->sc_base.me_dv)->wskbddevcf_mux;
        if (ap->console) {
                /* Ignore mux for console; it always goes to the console mux. */
                /* printf(" (mux %d ignored for console)", mux); */
                mux = -1;
        }
        if (mux >= 0)
                aprint_normal(" mux %d", mux);
#else
        if (device_cfdata(sc->sc_base.me_dv)->wskbddevcf_mux >= 0)
                aprint_normal(" (mux ignored)");
#endif

        if (ap->console) {
                sc->id = &wskbd_console_data;
        } else {
                sc->id = malloc(sizeof(struct wskbd_internal),
                                M_DEVBUF, M_WAITOK|M_ZERO);
                sc->id->t_keymap = ap->keymap;
                wskbd_update_layout(sc->id, ap->keymap->layout);
        }

        callout_init(&sc->sc_repeat_ch, 0);
        callout_setfunc(&sc->sc_repeat_ch, wskbd_repeat, sc);

        sc->id->t_sc = sc;

        sc->sc_accessops = ap->accessops;
        sc->sc_accesscookie = ap->accesscookie;
        sc->sc_repeating = 0;
        sc->sc_translating = 1;
        sc->sc_ledstate = -1; /* force update */

        if (wskbd_load_keymap(sc->id->t_keymap,
                              &sc->sc_map, &sc->sc_maplen) != 0)
                panic("cannot load keymap");

        sc->sc_layout = sc->id->t_keymap->layout;

        /* set default bell and key repeat data */
        sc->sc_bell_data = wskbd_default_bell_data;
        sc->sc_keyrepeat_data = wskbd_default_keyrepeat_data;

#ifdef WSDISPLAY_SCROLLSUPPORT
        sc->sc_scroll_data = wskbd_default_scroll_data;
#endif

        if (ap->console) {
                KASSERT(wskbd_console_initted);
                KASSERT(wskbd_console_device == NULL);

                wskbd_console_device = sc;

                aprint_naive(": console keyboard");
                aprint_normal(": console keyboard");

#if NWSDISPLAY > 0
                wsdisplay_set_console_kbd(&sc->sc_base); /* sets me_dispv */
                if (sc->sc_base.me_dispdv != NULL)
                        aprint_normal(", using %s",
                            device_xname(sc->sc_base.me_dispdv));
#endif
        }
        aprint_naive("\n");
        aprint_normal("\n");

#if NWSMUX > 0
        if (mux >= 0) {
                error = wsmux_attach_sc(wsmux_getmux(mux), &sc->sc_base);
                if (error)
                        aprint_error_dev(sc->sc_base.me_dv,
                            "attach error=%d\n", error);
        }
#endif

        if (!pmf_device_register(self, wskbd_suspend, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");
        else if (!pmf_class_input_register(self))
                aprint_error_dev(self, "couldn't register as input device\n");
}

static bool
wskbd_suspend(device_t dv, const pmf_qual_t *qual)
{
        struct wskbd_softc *sc = device_private(dv);

        sc->sc_repeating = 0;
        callout_stop(&sc->sc_repeat_ch);

        return true;
}

void
wskbd_cnattach(const struct wskbd_consops *consops, void *conscookie,
        const struct wskbd_mapdata *mapdata)
{
        KASSERT(!wskbd_console_initted);

        wskbd_console_data.t_keymap = mapdata;
        wskbd_update_layout(&wskbd_console_data, mapdata->layout);

        wskbd_console_data.t_consops = consops;
        wskbd_console_data.t_consaccesscookie = conscookie;

#if NWSDISPLAY > 0
        wsdisplay_set_cons_kbd(wskbd_cngetc, wskbd_cnpollc, wskbd_cnbell);
#endif

        wskbd_console_initted = 1;
}

void
wskbd_cndetach(void)
{
        KASSERT(wskbd_console_initted);

        wskbd_console_data.t_keymap = 0;

        wskbd_console_data.t_consops = 0;
        wskbd_console_data.t_consaccesscookie = 0;

#if NWSDISPLAY > 0
        wsdisplay_unset_cons_kbd();
#endif

        wskbd_console_initted = 0;
}

static void
wskbd_repeat(void *v)
{
        struct wskbd_softc *sc = (struct wskbd_softc *)v;
        int s = spltty();

        if (!sc->sc_repeating) {
                /*
                 * race condition: a "key up" event came in when wskbd_repeat()
                 * was already called but not yet spltty()'d
                 */
                splx(s);
                return;
        }
        if (sc->sc_translating) {
                /* deliver keys */
#if NWSDISPLAY > 0
                if (sc->sc_base.me_dispdv != NULL) {
                        int i;
                        for (i = 0; i < sc->sc_repeating; i++)
                                wsdisplay_kbdinput(sc->sc_base.me_dispdv,
                                                   sc->id->t_symbols[i]);
                }
#endif
        } else {
#if defined(WSKBD_EVENT_AUTOREPEAT)
                /* queue event */
                wskbd_deliver_event(sc, sc->sc_repeat_type,
                                    sc->sc_repeat_value);
#endif /* defined(WSKBD_EVENT_AUTOREPEAT) */
        }
        callout_schedule(&sc->sc_repeat_ch, mstohz(sc->sc_keyrepeat_data.delN));
        splx(s);
}

int
wskbd_activate(device_t self, enum devact act)
{
        struct wskbd_softc *sc = device_private(self);

        if (act == DVACT_DEACTIVATE)
                sc->sc_dying = 1;
        return (0);
}

/*
 * Detach a keyboard.  To keep track of users of the softc we keep
 * a reference count that's incremented while inside, e.g., read.
 * If the keyboard is active and the reference count is > 0 (0 is the
 * normal state) we post an event and then wait for the process
 * that had the reference to wake us up again.  Then we blow away the
 * vnode and return (which will deallocate the softc).
 */
int
wskbd_detach(device_t self, int flags)
{
        struct wskbd_softc *sc = device_private(self);
        struct wseventvar *evar;
        int maj, mn;
        int s;

#if NWSMUX > 0
        /* Tell parent mux we're leaving. */
        if (sc->sc_base.me_parent != NULL)
                wsmux_detach_sc(&sc->sc_base);
#endif

        callout_halt(&sc->sc_repeat_ch, NULL);
        callout_destroy(&sc->sc_repeat_ch);

        if (sc->sc_isconsole) {
                KASSERT(wskbd_console_device == sc);
                wskbd_console_device = NULL;
        }

        pmf_device_deregister(self);

        evar = sc->sc_base.me_evp;
        if (evar != NULL && evar->io != NULL) {
                s = spltty();
                if (--sc->sc_refcnt >= 0) {
                        struct wscons_event event;

                        /* Wake everyone by generating a dummy event. */
                        event.type = 0;
                        event.value = 0;
                        if (wsevent_inject(evar, &event, 1) != 0)
                                wsevent_wakeup(evar);

                        /* Wait for processes to go away. */
                        if (tsleep(sc, PZERO, "wskdet", hz * 60))
                                aprint_error("wskbd_detach: %s didn't detach\n",
                                       device_xname(self));
                }
                splx(s);
        }

        /* locate the major number */
        maj = cdevsw_lookup_major(&wskbd_cdevsw);

        /* Nuke the vnodes for any open instances. */
        mn = device_unit(self);
        vdevgone(maj, mn, mn, VCHR);

        return (0);
}

void
wskbd_input(device_t dev, u_int type, int value)
{
        struct wskbd_softc *sc = device_private(dev);
#if NWSDISPLAY > 0
        int num, i;
#endif

        if (sc->sc_repeating) {
                sc->sc_repeating = 0;
                callout_stop(&sc->sc_repeat_ch);
        }

        device_active(dev, DVA_HARDWARE);

#if NWSDISPLAY > 0
        /*
         * If /dev/wskbdN is not connected in event mode translate and
         * send upstream.
         */
        if (sc->sc_translating) {
                num = wskbd_translate(sc->id, type, value);
                if (num > 0) {
                        if (sc->sc_base.me_dispdv != NULL) {
#ifdef WSDISPLAY_SCROLLSUPPORT
                                if (sc->id->t_symbols [0] != KS_Print_Screen) {
                                        wsdisplay_scroll(sc->sc_base.
                                        me_dispdv, WSDISPLAY_SCROLL_RESET);
                                }
#endif
                                for (i = 0; i < num; i++)
                                        wsdisplay_kbdinput(
                                                sc->sc_base.me_dispdv,
                                                sc->id->t_symbols[i]);
                        }

                        if (sc->sc_keyrepeat_data.del1 != 0) {
                                sc->sc_repeating = num;
                                callout_schedule(&sc->sc_repeat_ch,
                                    mstohz(sc->sc_keyrepeat_data.del1));
                        }
                }
                return;
        }
#endif

        wskbd_deliver_event(sc, type, value);

#if defined(WSKBD_EVENT_AUTOREPEAT)
        /* Repeat key presses if set. */
        if (type == WSCONS_EVENT_KEY_DOWN && sc->sc_keyrepeat_data.del1 != 0) {
                sc->sc_repeat_type = type;
                sc->sc_repeat_value = value;
                sc->sc_repeating = 1;
                callout_schedule(&sc->sc_repeat_ch,
                    mstohz(sc->sc_keyrepeat_data.del1));
        }
#endif /* defined(WSKBD_EVENT_AUTOREPEAT) */
}

/*
 * Keyboard is generating events.  Turn this keystroke into an
 * event and put it in the queue.  If the queue is full, the
 * keystroke is lost (sorry!).
 */
static void
wskbd_deliver_event(struct wskbd_softc *sc, u_int type, int value)
{
        struct wseventvar *evar;
        struct wscons_event event;

        evar = sc->sc_base.me_evp;

        if (evar == NULL) {
                DPRINTF(("wskbd_input: not open\n"));
                return;
        }

#ifdef DIAGNOSTIC
        if (evar->q == NULL) {
                printf("wskbd_input: evar->q=NULL\n");
                return;
        }
#endif

        event.type = type;
        event.value = 0;
        DPRINTF(("%d ->", value));
        if (sc->sc_evtrans_len > 0) {
                if (sc->sc_evtrans_len > value) {
                        DPRINTF(("%d", sc->sc_evtrans[value]));
                        event.value = sc->sc_evtrans[value];
                }
        } else {
                event.value = value;
        }
        DPRINTF(("\n"));
        if (wsevent_inject(evar, &event, 1) != 0)
                log(LOG_WARNING, "%s: event queue overflow\n",
                    device_xname(sc->sc_base.me_dv));
}

#ifdef WSDISPLAY_COMPAT_RAWKBD
void
wskbd_rawinput(device_t dev, u_char *tbuf, int len)
{
#if NWSDISPLAY > 0
        struct wskbd_softc *sc = device_private(dev);
        int i;

        if (sc->sc_base.me_dispdv != NULL)
                for (i = 0; i < len; i++)
                        wsdisplay_kbdinput(sc->sc_base.me_dispdv, tbuf[i]);
        /* this is KS_GROUP_Plain */
#endif
}
#endif /* WSDISPLAY_COMPAT_RAWKBD */

#if NWSDISPLAY > 0
static void
wskbd_holdscreen(struct wskbd_softc *sc, int hold)
{
        int new_state;

        if (sc->sc_base.me_dispdv != NULL) {
                wsdisplay_kbdholdscreen(sc->sc_base.me_dispdv, hold);
                new_state = sc->sc_ledstate;
                if (hold) {
#ifdef WSDISPLAY_SCROLLSUPPORT
                        sc->sc_scroll_data.mode = WSKBD_SCROLL_MODE_HOLD;
#endif
                        new_state |= WSKBD_LED_SCROLL;
                } else {
#ifdef WSDISPLAY_SCROLLSUPPORT
                        sc->sc_scroll_data.mode = WSKBD_SCROLL_MODE_NORMAL;
#endif
                        new_state &= ~WSKBD_LED_SCROLL;
                }
                if (new_state != sc->sc_ledstate) {
                        (*sc->sc_accessops->set_leds)(sc->sc_accesscookie,
                                                      new_state);
                        sc->sc_ledstate = new_state;
#ifdef WSDISPLAY_SCROLLSUPPORT
                        if (!hold)
                                wsdisplay_scroll(sc->sc_base.me_dispdv,
                                    WSDISPLAY_SCROLL_RESET);
#endif
                }
        }
}
#endif

static int
wskbd_enable(struct wskbd_softc *sc, int on)
{
        int error;

#if 0
/* I don't understand the purpose of this code.  And it seems to
 * break things, so it's out.  -- Lennart
 */
        if (!on && (!sc->sc_translating
#if NWSDISPLAY > 0
                    || sc->sc_base.me_dispdv
#endif
                ))
                return (EBUSY);
#endif
#if NWSDISPLAY > 0
        if (sc->sc_base.me_dispdv != NULL)
                return (0);
#endif

        /* Always cancel auto repeat when fiddling with the kbd. */
        if (sc->sc_repeating) {
                sc->sc_repeating = 0;
                callout_stop(&sc->sc_repeat_ch);
        }

        error = (*sc->sc_accessops->enable)(sc->sc_accesscookie, on);
        DPRINTF(("wskbd_enable: sc=%p on=%d res=%d\n", sc, on, error));
        return (error);
}

#if NWSMUX > 0
int
wskbd_mux_open(struct wsevsrc *me, struct wseventvar *evp)
{
        struct wskbd_softc *sc = (struct wskbd_softc *)me;

        if (sc->sc_dying)
                return (EIO);

        if (sc->sc_base.me_evp != NULL)
                return (EBUSY);

        return (wskbd_do_open(sc, evp));
}
#endif

int
wskbdopen(dev_t dev, int flags, int mode, struct lwp *l)
{
        struct wskbd_softc *sc = device_lookup_private(&wskbd_cd, minor(dev));
        struct wseventvar *evar;
        int error;

        if (sc == NULL)
                return (ENXIO);

#if NWSMUX > 0
        DPRINTF(("wskbdopen: %s mux=%p l=%p\n",
            device_xname(sc->sc_base.me_dv), sc->sc_base.me_parent, l));
#endif

        if (sc->sc_dying)
                return (EIO);

        if ((flags & (FREAD | FWRITE)) == FWRITE)
                /* Not opening for read, only ioctl is available. */
                return (0);

#if NWSMUX > 0
        if (sc->sc_base.me_parent != NULL) {
                /* Grab the keyboard out of the greedy hands of the mux. */
                DPRINTF(("wskbdopen: detach\n"));
                wsmux_detach_sc(&sc->sc_base);
        }
#endif

        if (sc->sc_base.me_evp != NULL)
                return (EBUSY);

        evar = &sc->sc_base.me_evar;
        wsevent_init(evar, l->l_proc);

        error = wskbd_do_open(sc, evar);
        if (error) {
                DPRINTF(("wskbdopen: %s open failed\n",
                         device_xname(sc->sc_base.me_dv)));
                sc->sc_base.me_evp = NULL;
                wsevent_fini(evar);
        }
        return (error);
}

int
wskbd_do_open(struct wskbd_softc *sc, struct wseventvar *evp)
{
        sc->sc_base.me_evp = evp;
        sc->sc_translating = 0;

        return (wskbd_enable(sc, 1));
}

int
wskbdclose(dev_t dev, int flags, int mode,
    struct lwp *l)
{
        struct wskbd_softc *sc =
            device_lookup_private(&wskbd_cd, minor(dev));
        struct wseventvar *evar = sc->sc_base.me_evp;

        if (evar == NULL)
                /* not open for read */
                return (0);

        sc->sc_base.me_evp = NULL;
        sc->sc_translating = 1;
        (void)wskbd_enable(sc, 0);
        wsevent_fini(evar);

        return (0);
}

#if NWSMUX > 0
int
wskbd_mux_close(struct wsevsrc *me)
{
        struct wskbd_softc *sc = (struct wskbd_softc *)me;

        sc->sc_base.me_evp = NULL;
        sc->sc_translating = 1;
        (void)wskbd_enable(sc, 0);

        return (0);
}
#endif

int
wskbdread(dev_t dev, struct uio *uio, int flags)
{
        struct wskbd_softc *sc =
            device_lookup_private(&wskbd_cd, minor(dev));
        int error;

        if (sc->sc_dying)
                return (EIO);

#ifdef DIAGNOSTIC
        if (sc->sc_base.me_evp == NULL) {
                printf("wskbdread: evp == NULL\n");
                return (EINVAL);
        }
#endif

        sc->sc_refcnt++;
        error = wsevent_read(sc->sc_base.me_evp, uio, flags);
        if (--sc->sc_refcnt < 0) {
                wakeup(sc);
                error = EIO;
        }
        return (error);
}

int
wskbdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        return (wskbd_do_ioctl(device_lookup(&wskbd_cd, minor(dev)),
            cmd, data, flag,l));
}

/* A wrapper around the ioctl() workhorse to make reference counting easy. */
int
wskbd_do_ioctl(device_t dv, u_long cmd, void *data, int flag,
        struct lwp *l)
{
        struct wskbd_softc *sc = device_private(dv);
        int error;

        sc->sc_refcnt++;
        error = wskbd_do_ioctl_sc(sc, cmd, data, flag, l);
        if (--sc->sc_refcnt < 0)
                wakeup(sc);
        return (error);
}

int
wskbd_do_ioctl_sc(struct wskbd_softc *sc, u_long cmd, void *data, int flag,
                  struct lwp *l)
{

        /*
         * Try the generic ioctls that the wskbd interface supports.
         */
        switch (cmd) {
        case FIONBIO:                /* we will remove this someday (soon???) */
                return (0);

        case FIOASYNC:
                if (sc->sc_base.me_evp == NULL)
                        return (EINVAL);
                sc->sc_base.me_evp->async = *(int *)data != 0;
                return (0);

        case FIOSETOWN:
                if (sc->sc_base.me_evp == NULL)
                        return (EINVAL);
                if (-*(int *)data != sc->sc_base.me_evp->io->p_pgid
                    && *(int *)data != sc->sc_base.me_evp->io->p_pid)
                        return (EPERM);
                return (0);

        case TIOCSPGRP:
                if (sc->sc_base.me_evp == NULL)
                        return (EINVAL);
                if (*(int *)data != sc->sc_base.me_evp->io->p_pgid)
                        return (EPERM);
                return (0);
        }

        /*
         * Try the keyboard driver for WSKBDIO ioctls.  It returns EPASSTHROUGH
         * if it didn't recognize the request.
         */
        return (wskbd_displayioctl(sc->sc_base.me_dv, cmd, data, flag, l));
}

/*
 * WSKBDIO ioctls, handled in both emulation mode and in ``raw'' mode.
 * Some of these have no real effect in raw mode, however.
 */
static int
wskbd_displayioctl(device_t dev, u_long cmd, void *data, int flag,
        struct lwp *l)
{
#ifdef WSDISPLAY_SCROLLSUPPORT
        struct wskbd_scroll_data *usdp, *ksdp;
#endif
        struct wskbd_softc *sc = device_private(dev);
        struct wskbd_bell_data *ubdp, *kbdp;
        struct wskbd_keyrepeat_data *ukdp, *kkdp;
        struct wskbd_map_data *umdp;
        struct wskbd_mapdata md;
        kbd_t enc;
        void *tbuf;
        int len, error;

        switch (cmd) {
        case WSKBDIO_BELL:
                if ((flag & FWRITE) == 0)
                        return (EACCES);
                return ((*sc->sc_accessops->ioctl)(sc->sc_accesscookie,
                    WSKBDIO_COMPLEXBELL, (void *)&sc->sc_bell_data, flag, l));

        case WSKBDIO_COMPLEXBELL:
                if ((flag & FWRITE) == 0)
                        return (EACCES);
                ubdp = (struct wskbd_bell_data *)data;
                SETBELL(ubdp, ubdp, &sc->sc_bell_data);
                return ((*sc->sc_accessops->ioctl)(sc->sc_accesscookie,
                    WSKBDIO_COMPLEXBELL, (void *)ubdp, flag, l));

        case WSKBDIO_SETBELL:
                if ((flag & FWRITE) == 0)
                        return (EACCES);
                kbdp = &sc->sc_bell_data;
setbell:
                ubdp = (struct wskbd_bell_data *)data;
                SETBELL(kbdp, ubdp, kbdp);
                return (0);

        case WSKBDIO_GETBELL:
                kbdp = &sc->sc_bell_data;
getbell:
                ubdp = (struct wskbd_bell_data *)data;
                SETBELL(ubdp, kbdp, kbdp);
                return (0);

        case WSKBDIO_SETDEFAULTBELL:
                if ((error = kauth_authorize_device(l->l_cred,
                    KAUTH_DEVICE_WSCONS_KEYBOARD_BELL, NULL, NULL,
                    NULL, NULL)) != 0)
                        return (error);
                kbdp = &wskbd_default_bell_data;
                goto setbell;


        case WSKBDIO_GETDEFAULTBELL:
                kbdp = &wskbd_default_bell_data;
                goto getbell;

#undef SETBELL

#define        SETKEYREPEAT(dstp, srcp, dfltp)                                        \
    do {                                                                \
        (dstp)->del1 = ((srcp)->which & WSKBD_KEYREPEAT_DODEL1) ?        \
            (srcp)->del1 : (dfltp)->del1;                                \
        (dstp)->delN = ((srcp)->which & WSKBD_KEYREPEAT_DODELN) ?        \
            (srcp)->delN : (dfltp)->delN;                                \
        (dstp)->which = WSKBD_KEYREPEAT_DOALL;                                \
    } while (0)

        case WSKBDIO_SETKEYREPEAT:
                if ((flag & FWRITE) == 0)
                        return (EACCES);
                kkdp = &sc->sc_keyrepeat_data;
setkeyrepeat:
                ukdp = (struct wskbd_keyrepeat_data *)data;
                SETKEYREPEAT(kkdp, ukdp, kkdp);
                return (0);

        case WSKBDIO_GETKEYREPEAT:
                kkdp = &sc->sc_keyrepeat_data;
getkeyrepeat:
                ukdp = (struct wskbd_keyrepeat_data *)data;
                SETKEYREPEAT(ukdp, kkdp, kkdp);
                return (0);

        case WSKBDIO_SETDEFAULTKEYREPEAT:
                if ((error = kauth_authorize_device(l->l_cred,
                    KAUTH_DEVICE_WSCONS_KEYBOARD_KEYREPEAT, NULL, NULL,
                    NULL, NULL)) != 0)
                        return (error);
                kkdp = &wskbd_default_keyrepeat_data;
                goto setkeyrepeat;


        case WSKBDIO_GETDEFAULTKEYREPEAT:
                kkdp = &wskbd_default_keyrepeat_data;
                goto getkeyrepeat;

#ifdef WSDISPLAY_SCROLLSUPPORT
#define        SETSCROLLMOD(dstp, srcp, dfltp)                                        \
    do {                                                                \
        (dstp)->mode = ((srcp)->which & WSKBD_SCROLL_DOMODE) ?                \
            (srcp)->mode : (dfltp)->mode;                                \
        (dstp)->modifier = ((srcp)->which & WSKBD_SCROLL_DOMODIFIER) ?        \
            (srcp)->modifier : (dfltp)->modifier;                        \
        (dstp)->which = WSKBD_SCROLL_DOALL;                                \
    } while (0)

        case WSKBDIO_SETSCROLL:
                usdp = (struct wskbd_scroll_data *)data;
                ksdp = &sc->sc_scroll_data;
                SETSCROLLMOD(ksdp, usdp, ksdp);
                return (0);

        case WSKBDIO_GETSCROLL:
                usdp = (struct wskbd_scroll_data *)data;
                ksdp = &sc->sc_scroll_data;
                SETSCROLLMOD(usdp, ksdp, ksdp);
                return (0);
#else
        case WSKBDIO_GETSCROLL:
        case WSKBDIO_SETSCROLL:
                return ENODEV;
#endif

#undef SETKEYREPEAT

        case WSKBDIO_SETMAP:
                if ((flag & FWRITE) == 0)
                        return (EACCES);
                umdp = (struct wskbd_map_data *)data;
                if (umdp->maplen > WSKBDIO_MAXMAPLEN)
                        return (EINVAL);

                len = umdp->maplen*sizeof(struct wscons_keymap);
                tbuf = malloc(len, M_TEMP, M_WAITOK);
                error = copyin(umdp->map, tbuf, len);
                if (error == 0) {
                        wskbd_init_keymap(umdp->maplen,
                                          &sc->sc_map, &sc->sc_maplen);
                        memcpy(sc->sc_map, tbuf, len);
                        /* drop the variant bits handled by the map */
                        sc->sc_layout = KB_USER |
                              (KB_VARIANT(sc->sc_layout) & KB_HANDLEDBYWSKBD);
                        wskbd_update_layout(sc->id, sc->sc_layout);
                }
                free(tbuf, M_TEMP);
                return(error);

        case WSKBDIO_GETMAP:
                umdp = (struct wskbd_map_data *)data;
                if (umdp->maplen > sc->sc_maplen)
                        umdp->maplen = sc->sc_maplen;
                error = copyout(sc->sc_map, umdp->map,
                                umdp->maplen*sizeof(struct wscons_keymap));
                return(error);

        case WSKBDIO_GETENCODING:
                *((kbd_t *) data) = sc->sc_layout;
                return(0);

        case WSKBDIO_SETENCODING:
                if ((flag & FWRITE) == 0)
                        return (EACCES);
                enc = *((kbd_t *)data);
                if (KB_ENCODING(enc) == KB_USER) {
                        /* user map must already be loaded */
                        if (KB_ENCODING(sc->sc_layout) != KB_USER)
                                return (EINVAL);
                        /* map variants make no sense */
                        if (KB_VARIANT(enc) & ~KB_HANDLEDBYWSKBD)
                                return (EINVAL);
                } else {
                        md = *(sc->id->t_keymap); /* structure assignment */
                        md.layout = enc;
                        error = wskbd_load_keymap(&md, &sc->sc_map,
                                                  &sc->sc_maplen);
                        if (error)
                                return (error);
                }
                sc->sc_layout = enc;
                wskbd_update_layout(sc->id, enc);
                return (0);

        case WSKBDIO_SETVERSION:
                return wsevent_setversion(sc->sc_base.me_evp, *(int *)data);
        }

        /*
         * Try the keyboard driver for WSKBDIO ioctls.  It returns -1
         * if it didn't recognize the request, and in turn we return
         * -1 if we didn't recognize the request.
         */
/* printf("kbdaccess\n"); */
        error = (*sc->sc_accessops->ioctl)(sc->sc_accesscookie, cmd, data,
                                           flag, l);
#ifdef WSDISPLAY_COMPAT_RAWKBD
        if (!error && cmd == WSKBDIO_SETMODE && *(int *)data == WSKBD_RAW) {
                int s = spltty();
                sc->id->t_modifiers &= ~(MOD_SHIFT_L | MOD_SHIFT_R
                                         | MOD_CONTROL_L | MOD_CONTROL_R
                                         | MOD_META_L | MOD_META_R
                                         | MOD_COMMAND
                                         | MOD_COMMAND1 | MOD_COMMAND2);
                if (sc->sc_repeating) {
                        sc->sc_repeating = 0;
                        callout_stop(&sc->sc_repeat_ch);
                }
                splx(s);
        }
#endif
        return (error);
}

int
wskbdpoll(dev_t dev, int events, struct lwp *l)
{
        struct wskbd_softc *sc =
            device_lookup_private(&wskbd_cd, minor(dev));

        if (sc->sc_base.me_evp == NULL)
                return (POLLERR);
        return (wsevent_poll(sc->sc_base.me_evp, events, l));
}

int
wskbdkqfilter(dev_t dev, struct knote *kn)
{
        struct wskbd_softc *sc =
            device_lookup_private(&wskbd_cd, minor(dev));

        if (sc->sc_base.me_evp == NULL)
                return (1);
        return (wsevent_kqfilter(sc->sc_base.me_evp, kn));
}

#if NWSDISPLAY > 0

int
wskbd_pickfree(void)
{
        int i;
        struct wskbd_softc *sc;

        for (i = 0; i < wskbd_cd.cd_ndevs; i++) {
                sc = device_lookup_private(&wskbd_cd, i);
                if (sc == NULL)
                        continue;
                if (sc->sc_base.me_dispdv == NULL)
                        return (i);
        }
        return (-1);
}

struct wsevsrc *
wskbd_set_console_display(device_t displaydv, struct wsevsrc *me)
{
        struct wskbd_softc *sc = wskbd_console_device;

        if (sc == NULL)
                return (NULL);
        sc->sc_base.me_dispdv = displaydv;
#if NWSMUX > 0
        (void)wsmux_attach_sc((struct wsmux_softc *)me, &sc->sc_base);
#endif
        return (&sc->sc_base);
}

int
wskbd_set_display(device_t dv, struct wsevsrc *me)
{
        struct wskbd_softc *sc = device_private(dv);
        device_t displaydv = me != NULL ? me->me_dispdv : NULL;
        device_t odisplaydv;
        int error;

        DPRINTF(("wskbd_set_display: %s me=%p odisp=%p disp=%p cons=%d\n",
                 device_xname(dv), me, sc->sc_base.me_dispdv, displaydv,
                 sc->sc_isconsole));

        if (sc->sc_isconsole)
                return (EBUSY);

        if (displaydv != NULL) {
                if (sc->sc_base.me_dispdv != NULL)
                        return (EBUSY);
        } else {
                if (sc->sc_base.me_dispdv == NULL)
                        return (ENXIO);
        }

        odisplaydv = sc->sc_base.me_dispdv;
        sc->sc_base.me_dispdv = NULL;
        error = wskbd_enable(sc, displaydv != NULL);
        sc->sc_base.me_dispdv = displaydv;
        if (error) {
                sc->sc_base.me_dispdv = odisplaydv;
                return (error);
        }

        if (displaydv)
                aprint_verbose_dev(sc->sc_base.me_dv, "connecting to %s\n",
                       device_xname(displaydv));
        else
                aprint_verbose_dev(sc->sc_base.me_dv, "disconnecting from %s\n",
                       device_xname(odisplaydv));

        return (0);
}

#endif /* NWSDISPLAY > 0 */

#if NWSMUX > 0
int
wskbd_add_mux(int unit, struct wsmux_softc *muxsc)
{
        struct wskbd_softc *sc = device_lookup_private(&wskbd_cd, unit);

        if (sc == NULL)
                return (ENXIO);

        if (sc->sc_base.me_parent != NULL || sc->sc_base.me_evp != NULL)
                return (EBUSY);

        return (wsmux_attach_sc(muxsc, &sc->sc_base));
}
#endif

/*
 * Console interface.
 */
int
wskbd_cngetc(dev_t dev)
{
        static int num = 0;
        static int pos;
        u_int type;
        int data;
        keysym_t ks;

        if (!wskbd_console_initted)
                return -1;

        if (wskbd_console_device != NULL &&
            !wskbd_console_device->sc_translating)
                return -1;

        for(;;) {
                if (num-- > 0) {
                        ks = wskbd_console_data.t_symbols[pos++];
                        if (KS_GROUP(ks) == KS_GROUP_Plain)
                                return (KS_VALUE(ks));
                } else {
                        (*wskbd_console_data.t_consops->getc)
                                (wskbd_console_data.t_consaccesscookie,
                                 &type, &data);
                        if (type == 0) {
                                /* No data returned */
                                return -1;
                        }
                        if (type == WSCONS_EVENT_ASCII) {
                                /*
                                 * We assume that when the driver falls back
                                 * to deliver pure ASCII it is in a state that
                                 * it can not track press/release events
                                 * reliable - so we clear all previously
                                 * accumulated modifier state.
                                 */
                                wskbd_console_data.t_modifiers = 0;
                                return(data);
                        }
                        num = wskbd_translate(&wskbd_console_data, type, data);
                        pos = 0;
                }
        }
}

void
wskbd_cnpollc(dev_t dev, int poll)
{

        if (!wskbd_console_initted)
                return;

        if (wskbd_console_device != NULL &&
            !wskbd_console_device->sc_translating)
                return;

        (*wskbd_console_data.t_consops->pollc)
            (wskbd_console_data.t_consaccesscookie, poll);
}

void
wskbd_cnbell(dev_t dev, u_int pitch, u_int period, u_int volume)
{

        if (!wskbd_console_initted)
                return;

        if (wskbd_console_data.t_consops->bell != NULL)
                (*wskbd_console_data.t_consops->bell)
                    (wskbd_console_data.t_consaccesscookie, pitch, period,
                        volume);
}

static inline void
update_leds(struct wskbd_internal *id)
{
        int new_state;

        new_state = 0;
        if (id->t_modifiers & (MOD_SHIFTLOCK | MOD_CAPSLOCK))
                new_state |= WSKBD_LED_CAPS;
        if (id->t_modifiers & MOD_NUMLOCK)
                new_state |= WSKBD_LED_NUM;
        if (id->t_modifiers & MOD_COMPOSE)
                new_state |= WSKBD_LED_COMPOSE;
        if (id->t_modifiers & MOD_HOLDSCREEN)
                new_state |= WSKBD_LED_SCROLL;

        if (id->t_sc && new_state != id->t_sc->sc_ledstate) {
                (*id->t_sc->sc_accessops->set_leds)
                    (id->t_sc->sc_accesscookie, new_state);
                id->t_sc->sc_ledstate = new_state;
        }
}

static inline void
update_modifier(struct wskbd_internal *id, u_int type, int toggle, int mask)
{
        if (toggle) {
                if (type == WSCONS_EVENT_KEY_DOWN)
                        id->t_modifiers ^= mask;
        } else {
                if (type == WSCONS_EVENT_KEY_DOWN)
                        id->t_modifiers |= mask;
                else
                        id->t_modifiers &= ~mask;
        }
}

#if NWSDISPLAY > 0
static void
change_displayparam(struct wskbd_softc *sc, int param, int updown,
        int wraparound)
{
        int res;
        struct wsdisplay_param dp;

        dp.param = param;
        res = wsdisplay_param(sc->sc_base.me_dispdv, WSDISPLAYIO_GETPARAM, &dp);

        if (res == EINVAL)
                return; /* no such parameter */

        dp.curval += updown;
        if (dp.max < dp.curval)
                dp.curval = wraparound ? dp.min : dp.max;
        else
        if (dp.curval < dp.min)
                dp.curval = wraparound ? dp.max : dp.min;
        wsdisplay_param(sc->sc_base.me_dispdv, WSDISPLAYIO_SETPARAM, &dp);
}
#endif

static int
internal_command(struct wskbd_softc *sc, u_int *type, keysym_t ksym,
        keysym_t ksym2)
{
#if NWSDISPLAY > 0 && defined(WSDISPLAY_SCROLLSUPPORT)
        u_int state = 0;
#endif
        switch (ksym) {
        case KS_Cmd_VolumeToggle:
                if (*type == WSCONS_EVENT_KEY_DOWN)
                        pmf_event_inject(NULL, PMFE_AUDIO_VOLUME_TOGGLE);
                break;
        case KS_Cmd_VolumeUp:
                if (*type == WSCONS_EVENT_KEY_DOWN)
                        pmf_event_inject(NULL, PMFE_AUDIO_VOLUME_UP);
                break;
        case KS_Cmd_VolumeDown:
                if (*type == WSCONS_EVENT_KEY_DOWN)
                        pmf_event_inject(NULL, PMFE_AUDIO_VOLUME_DOWN);
                break;
#if NWSDISPLAY > 0 && defined(WSDISPLAY_SCROLLSUPPORT)
        case KS_Cmd_ScrollFastUp:
        case KS_Cmd_ScrollFastDown:
                if (*type == WSCONS_EVENT_KEY_DOWN) {
                        GETMODSTATE(sc->id->t_modifiers, state);
                        if ((sc->sc_scroll_data.mode == WSKBD_SCROLL_MODE_HOLD
                                   && MOD_ONESET(sc->id, MOD_HOLDSCREEN))
                        || (sc->sc_scroll_data.mode == WSKBD_SCROLL_MODE_NORMAL
                                && sc->sc_scroll_data.modifier == state)) {
                                        update_modifier(sc->id, *type, 0, MOD_COMMAND);
                                        wsdisplay_scroll(sc->sc_base.me_dispdv,
                                                (ksym == KS_Cmd_ScrollFastUp) ?
                                                WSDISPLAY_SCROLL_BACKWARD :
                                                WSDISPLAY_SCROLL_FORWARD);
                                        return (1);
                        } else {
                                return (0);
                        }
                } else
                        update_modifier(sc->id, *type, 0, MOD_COMMAND);
                break;

        case KS_Cmd_ScrollSlowUp:
        case KS_Cmd_ScrollSlowDown:
                if (*type == WSCONS_EVENT_KEY_DOWN) {
                        GETMODSTATE(sc->id->t_modifiers, state);
                        if ((sc->sc_scroll_data.mode == WSKBD_SCROLL_MODE_HOLD
                                   && MOD_ONESET(sc->id, MOD_HOLDSCREEN))
                        || (sc->sc_scroll_data.mode == WSKBD_SCROLL_MODE_NORMAL
                                && sc->sc_scroll_data.modifier == state)) {
                                        update_modifier(sc->id, *type, 0, MOD_COMMAND);
                                        wsdisplay_scroll(sc->sc_base.me_dispdv,
                                                   (ksym == KS_Cmd_ScrollSlowUp) ?
                                                WSDISPLAY_SCROLL_BACKWARD | WSDISPLAY_SCROLL_LOW:
                                                WSDISPLAY_SCROLL_FORWARD | WSDISPLAY_SCROLL_LOW);
                                        return (1);
                        } else {
                                return (0);
                        }
                } else
                        update_modifier(sc->id, *type, 0, MOD_COMMAND);
                break;
#endif

        case KS_Cmd:
                update_modifier(sc->id, *type, 0, MOD_COMMAND);
                ksym = ksym2;
                break;

        case KS_Cmd1:
                update_modifier(sc->id, *type, 0, MOD_COMMAND1);
                break;

        case KS_Cmd2:
                update_modifier(sc->id, *type, 0, MOD_COMMAND2);
                break;
        }

        if (*type != WSCONS_EVENT_KEY_DOWN ||
            (! MOD_ONESET(sc->id, MOD_COMMAND) &&
             ! MOD_ALLSET(sc->id, MOD_COMMAND1 | MOD_COMMAND2)))
                return (0);

#if defined(DDB) || defined(KGDB)
        if (ksym == KS_Cmd_Debugger) {
                if (sc->sc_isconsole) {
#ifdef DDB
                        console_debugger();
#endif
#ifdef KGDB
                        kgdb_connect(1);
#endif
                }
                /* discard this key (ddb discarded command modifiers) */
                *type = WSCONS_EVENT_KEY_UP;
                return (1);
        }
#endif

#if NWSDISPLAY > 0
        if (sc->sc_base.me_dispdv == NULL)
                return (0);

        switch (ksym) {
        case KS_Cmd_Screen0:
        case KS_Cmd_Screen1:
        case KS_Cmd_Screen2:
        case KS_Cmd_Screen3:
        case KS_Cmd_Screen4:
        case KS_Cmd_Screen5:
        case KS_Cmd_Screen6:
        case KS_Cmd_Screen7:
        case KS_Cmd_Screen8:
        case KS_Cmd_Screen9:
                wsdisplay_switch(sc->sc_base.me_dispdv, ksym - KS_Cmd_Screen0, 0);
                return (1);
        case KS_Cmd_ResetEmul:
                wsdisplay_reset(sc->sc_base.me_dispdv, WSDISPLAY_RESETEMUL);
                return (1);
        case KS_Cmd_ResetClose:
                wsdisplay_reset(sc->sc_base.me_dispdv, WSDISPLAY_RESETCLOSE);
                return (1);
        case KS_Cmd_BacklightOn:
        case KS_Cmd_BacklightOff:
        case KS_Cmd_BacklightToggle:
                change_displayparam(sc, WSDISPLAYIO_PARAM_BACKLIGHT,
                                    ksym == KS_Cmd_BacklightOff ? -1 : 1,
                                    ksym == KS_Cmd_BacklightToggle ? 1 : 0);
                return (1);
        case KS_Cmd_BrightnessUp:
        case KS_Cmd_BrightnessDown:
        case KS_Cmd_BrightnessRotate:
                change_displayparam(sc, WSDISPLAYIO_PARAM_BRIGHTNESS,
                                    ksym == KS_Cmd_BrightnessDown ? -1 : 1,
                                    ksym == KS_Cmd_BrightnessRotate ? 1 : 0);
                return (1);
        case KS_Cmd_ContrastUp:
        case KS_Cmd_ContrastDown:
        case KS_Cmd_ContrastRotate:
                change_displayparam(sc, WSDISPLAYIO_PARAM_CONTRAST,
                                    ksym == KS_Cmd_ContrastDown ? -1 : 1,
                                    ksym == KS_Cmd_ContrastRotate ? 1 : 0);
                return (1);
        }
#endif

        return (0);
}

device_t
wskbd_hotkey_register(device_t self, void *cookie, wskbd_hotkey_plugin *hotkey)
{
        struct wskbd_softc *sc = device_private(self);

        KASSERT(sc != NULL);
        KASSERT(hotkey != NULL);

        sc->sc_hotkey = hotkey;
        sc->sc_hotkeycookie = cookie;

        return sc->sc_base.me_dv;
}

void
wskbd_hotkey_deregister(device_t self)
{
        struct wskbd_softc *sc = device_private(self);

        KASSERT(sc != NULL);

        sc->sc_hotkey = NULL;
        sc->sc_hotkeycookie = NULL;
}

static int
wskbd_translate(struct wskbd_internal *id, u_int type, int value)
{
        struct wskbd_softc *sc = id->t_sc;
        keysym_t ksym, res, *group;
        struct wscons_keymap kpbuf, *kp;
        int iscommand = 0;
        int ishotkey = 0;

        if (type == WSCONS_EVENT_ALL_KEYS_UP) {
                id->t_modifiers &= ~(MOD_SHIFT_L | MOD_SHIFT_R
                                | MOD_CONTROL_L | MOD_CONTROL_R
                                | MOD_META_L | MOD_META_R
                                | MOD_MODESHIFT
                                | MOD_COMMAND | MOD_COMMAND1 | MOD_COMMAND2);
                update_leds(id);
                return (0);
        }
        
        if (sc != NULL) {
                if (sc->sc_hotkey != NULL)
                        ishotkey = sc->sc_hotkey(sc, sc->sc_hotkeycookie,
                                                type, value);
                if (ishotkey)
                        return 0;

                if (value < 0 || value >= sc->sc_maplen) {
#ifdef DEBUG
                        printf("%s: keycode %d out of range\n",
                               __func__, value);
#endif
                        return (0);
                }
                kp = sc->sc_map + value;
        } else {
                kp = &kpbuf;
                wskbd_get_mapentry(id->t_keymap, value, kp);
        }

        /* if this key has a command, process it first */
        if (sc != NULL && kp->command != KS_voidSymbol)
                iscommand = internal_command(sc, &type, kp->command,
                                             kp->group1[0]);

        /* Now update modifiers */
        switch (kp->group1[0]) {
        case KS_Shift_L:
                update_modifier(id, type, 0, MOD_SHIFT_L);
                break;

        case KS_Shift_R:
                update_modifier(id, type, 0, MOD_SHIFT_R);
                break;

        case KS_Shift_Lock:
                update_modifier(id, type, 1, MOD_SHIFTLOCK);
                break;

        case KS_Caps_Lock:
                update_modifier(id, type, 1, MOD_CAPSLOCK);
                break;

        case KS_Control_L:
                update_modifier(id, type, 0, MOD_CONTROL_L);
                break;

        case KS_Control_R:
                update_modifier(id, type, 0, MOD_CONTROL_R);
                break;

        case KS_Alt_L:
                update_modifier(id, type, 0, MOD_META_L);
                break;

        case KS_Alt_R:
                update_modifier(id, type, 0, MOD_META_R);
                break;

        case KS_Mode_switch:
                update_modifier(id, type, 0, MOD_MODESHIFT);
                break;

        case KS_Num_Lock:
                update_modifier(id, type, 1, MOD_NUMLOCK);
                break;

#if NWSDISPLAY > 0
        case KS_Hold_Screen:
                if (sc != NULL) {
                        update_modifier(id, type, 1, MOD_HOLDSCREEN);
                        wskbd_holdscreen(sc, id->t_modifiers & MOD_HOLDSCREEN);
                }
                break;
#endif
        }

        /* If this is a key release or we are in command mode, we are done */
        if (type != WSCONS_EVENT_KEY_DOWN || iscommand) {
                update_leds(id);
                return (0);
        }

        /* Get the keysym */
        if (id->t_modifiers & MOD_MODESHIFT)
                group = & kp->group2[0];
        else
                group = & kp->group1[0];

        if ((id->t_modifiers & MOD_NUMLOCK) != 0 &&
            KS_GROUP(group[1]) == KS_GROUP_Keypad) {
                if (MOD_ONESET(id, MOD_ANYSHIFT))
                        ksym = group[0];
                else
                        ksym = group[1];
        } else if (! MOD_ONESET(id, MOD_ANYSHIFT | MOD_CAPSLOCK)) {
                ksym = group[0];
        } else if (MOD_ONESET(id, MOD_CAPSLOCK)) {
                if (! MOD_ONESET(id, MOD_SHIFT_L | MOD_SHIFT_R))
                        ksym = group[0];
                else
                        ksym = group[1];
                if (ksym >= KS_a && ksym <= KS_z)
                        ksym += KS_A - KS_a;
                else if (ksym >= KS_agrave && ksym <= KS_thorn &&
                         ksym != KS_division)
                        ksym += KS_Agrave - KS_agrave;
        } else if (MOD_ONESET(id, MOD_ANYSHIFT)) {
                ksym = group[1];
        } else {
                ksym = group[0];
        }

        /* Process compose sequence and dead accents */
        res = KS_voidSymbol;

        switch (KS_GROUP(ksym)) {
        case KS_GROUP_Plain:
        case KS_GROUP_Keypad:
        case KS_GROUP_Function:
                res = ksym;
                break;

        case KS_GROUP_Mod:
                if (ksym == KS_Multi_key) {
                        update_modifier(id, 1, 0, MOD_COMPOSE);
                        id->t_composelen = 2;
                }
                break;

        case KS_GROUP_Dead:
                if (id->t_composelen == 0) {
                        update_modifier(id, 1, 0, MOD_COMPOSE);
                        id->t_composelen = 1;
                        id->t_composebuf[0] = ksym;
                } else
                        res = ksym;
                break;
        }

        if (res == KS_voidSymbol) {
                update_leds(id);
                return (0);
        }

        if (id->t_composelen > 0) {
                id->t_composebuf[2 - id->t_composelen] = res;
                if (--id->t_composelen == 0) {
                        res = wskbd_compose_value(id->t_composebuf);
                        update_modifier(id, 0, 0, MOD_COMPOSE);
                } else {
                        return (0);
                }
        }

        update_leds(id);

        /* We are done, return the symbol */
        if (KS_GROUP(res) == KS_GROUP_Plain) {
                if (MOD_ONESET(id, MOD_ANYCONTROL)) {
                        if ((res >= KS_at && res <= KS_z) || res == KS_space)
                                res = res & 0x1f;
                        else if (res == KS_2)
                                res = 0x00;
                        else if (res >= KS_3 && res <= KS_7)
                                res = KS_Escape + (res - KS_3);
                        else if (res == KS_8)
                                res = KS_Delete;
                        /* convert CTL-/ to ^_ as xterm does (undo in emacs) */
                        else if (res == KS_slash)
                                res = KS_underscore & 0x1f;
                }
                if (MOD_ONESET(id, MOD_ANYMETA)) {
                        if (id->t_flags & WSKFL_METAESC) {
                                id->t_symbols[0] = KS_Escape;
                                id->t_symbols[1] = res;
                                return (2);
                        } else
                                res |= 0x80;
                }
        }

        id->t_symbols[0] = res;
        return (1);
}

void
wskbd_set_evtrans(device_t dev, keysym_t *tab, int len)
{
        struct wskbd_softc *sc = device_private(dev);

        sc->sc_evtrans_len = len;
        sc->sc_evtrans = tab;
}






































































































































































    5 











    5 




    5 












    5 








    5 

    2 

    5 

    3 



    2 





    2 



    2 








































    2 



































    1 
    1 









    1 


    1 























    1 




















    2 
    2 



















    2 


    1 








    9 











    9 










    9 







    1 

















    1 



















    1 

























    1 









    1 

















    1 








    1 










    2 

    2 







































    1 
    1 













    1 
















































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
/*        $NetBSD: wsmux.c,v 1.66 2022/03/28 12:38:58 riastradh Exp $        */

/*
 * Copyright (c) 1998, 2005 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Author: Lennart Augustsson <lennart@augustsson.net>
 *         Carlstedt Research & Technology
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * wscons mux device.
 *
 * The mux device is a collection of real mice and keyboards and acts as
 * a merge point for all the events from the different real devices.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: wsmux.c,v 1.66 2022/03/28 12:38:58 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
#include "opt_modular.h"
#endif

#include "wsdisplay.h"
#include "wsmux.h"
#include "wskbd.h"
#include "wsmouse.h"

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/fcntl.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/tty.h>
#include <sys/signalvar.h>
#include <sys/device.h>
#include <sys/device_impl.h>        /* XXX autoconf abuse */

#include "opt_wsdisplay_compat.h"

#include <dev/wscons/wsconsio.h>
#include <dev/wscons/wsksymdef.h>
#include <dev/wscons/wseventvar.h>
#include <dev/wscons/wscons_callbacks.h>
#include <dev/wscons/wsmuxvar.h>

#include "ioconf.h"

#ifdef WSMUX_DEBUG
#define DPRINTF(x)        if (wsmuxdebug) printf x
#define DPRINTFN(n,x)        if (wsmuxdebug > (n)) printf x
int        wsmuxdebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

/*
 * The wsmux pseudo device is used to multiplex events from several wsmouse,
 * wskbd, and/or wsmux devices together.
 * The devices connected together form a tree with muxes in the interior
 * and real devices (mouse and kbd) at the leaves.  The special case of
 * a tree with one node (mux or other) is supported as well.
 * Only the device at the root of the tree can be opened (if a non-root
 * device is opened the subtree rooted at that point is severed from the
 * containing tree).  When the root is opened it allocates a wseventvar
 * struct which all the nodes in the tree will send their events too.
 * An ioctl() performed on the root is propagated to all the nodes.
 * There are also ioctl() operations to add and remove nodes from a tree.
 */

static int wsmux_mux_open(struct wsevsrc *, struct wseventvar *);
static int wsmux_mux_close(struct wsevsrc *);

static void wsmux_do_open(struct wsmux_softc *, struct wseventvar *);

static void wsmux_do_close(struct wsmux_softc *);
#if NWSDISPLAY > 0
static int wsmux_evsrc_set_display(device_t, struct wsevsrc *);
#else
#define wsmux_evsrc_set_display NULL
#endif

static int wsmux_do_displayioctl(device_t dev, u_long cmd,
                                 void *data, int flag, struct lwp *l);
static int wsmux_do_ioctl(device_t, u_long, void *,int,struct lwp *);

static int wsmux_add_mux(int, struct wsmux_softc *);

#define WSMUXDEV(n) ((n) & 0x7f)
#define WSMUXCTL(n) ((n) & 0x80)

dev_type_open(wsmuxopen);
dev_type_close(wsmuxclose);
dev_type_read(wsmuxread);
dev_type_ioctl(wsmuxioctl);
dev_type_poll(wsmuxpoll);
dev_type_kqfilter(wsmuxkqfilter);

const struct cdevsw wsmux_cdevsw = {
        .d_open = wsmuxopen,
        .d_close = wsmuxclose,
        .d_read = wsmuxread,
        .d_write = nowrite,
        .d_ioctl = wsmuxioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = wsmuxpoll,
        .d_mmap = nommap,
        .d_kqfilter = wsmuxkqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

struct wssrcops wsmux_srcops = {
        WSMUX_MUX,
        wsmux_mux_open, wsmux_mux_close, wsmux_do_ioctl, wsmux_do_displayioctl,
        wsmux_evsrc_set_display
};

/* From upper level */
void
wsmuxattach(int n)
{
}

/* Keep track of all muxes that have been allocated */
static struct wsmux_softc **wsmuxdevs = NULL;
static int nwsmux = 0;

/* Return mux n, create if necessary */
struct wsmux_softc *
wsmux_getmux(int n)
{
        struct wsmux_softc *sc;

        n = WSMUXDEV(n);        /* limit range */

        /* Make sure there is room for mux n in the table */
        if (n >= nwsmux) {
                void *new;

                new = realloc(wsmuxdevs, (n + 1) * sizeof(*wsmuxdevs),
                    M_DEVBUF, M_ZERO | M_WAITOK);
                wsmuxdevs = new;
                nwsmux = n + 1;
        }

        sc = wsmuxdevs[n];
        if (sc == NULL) {
                sc = wsmux_create("wsmux", n);
                wsmuxdevs[n] = sc;
        }
        return (sc);
}

/*
 * open() of the pseudo device from device table.
 */
int
wsmuxopen(dev_t dev, int flags, int mode, struct lwp *l)
{
        struct wsmux_softc *sc;
        struct wseventvar *evar;
        int minr, unit;

        minr = minor(dev);
        unit = WSMUXDEV(minr);
        sc = wsmux_getmux(unit);
        if (sc == NULL)
                return (ENXIO);

        DPRINTF(("wsmuxopen: %s: sc=%p l=%p\n",
                 device_xname(sc->sc_base.me_dv), sc, l));

        if (WSMUXCTL(minr)) {
                /* This is the control device which does not allow reads. */
                if (flags & FREAD)
                        return (EINVAL);
                return (0);
        }
        if ((flags & (FREAD | FWRITE)) == FWRITE)
                /* Allow write only open */
                return (0);

        if (sc->sc_base.me_parent != NULL) {
                /* Grab the mux out of the greedy hands of the parent mux. */
                DPRINTF(("wsmuxopen: detach\n"));
                wsmux_detach_sc(&sc->sc_base);
        }

        if (sc->sc_base.me_evp != NULL)
                /* Already open. */
                return (EBUSY);

        evar = &sc->sc_base.me_evar;
        wsevent_init(evar, l->l_proc);
#ifdef WSDISPLAY_COMPAT_RAWKBD
        sc->sc_rawkbd = 0;
#endif

        wsmux_do_open(sc, evar);

        return (0);
}

/*
 * Open of a mux via the parent mux.
 */
int
wsmux_mux_open(struct wsevsrc *me, struct wseventvar *evar)
{
        struct wsmux_softc *sc = (struct wsmux_softc *)me;

#ifdef DIAGNOSTIC
        if (sc->sc_base.me_evp != NULL) {
                printf("wsmux_mux_open: busy\n");
                return (EBUSY);
        }
        if (sc->sc_base.me_parent == NULL) {
                printf("wsmux_mux_open: no parent\n");
                return (EINVAL);
        }
#endif

        wsmux_do_open(sc, evar);

        return (0);
}

/* Common part of opening a mux. */
void
wsmux_do_open(struct wsmux_softc *sc, struct wseventvar *evar)
{
        struct wsevsrc *me;

        sc->sc_base.me_evp = evar; /* remember event variable, mark as open */

        /* Open all children. */
        TAILQ_FOREACH(me, &sc->sc_cld, me_next) {
                DPRINTF(("wsmuxopen: %s: m=%p dev=%s\n",
                         device_xname(sc->sc_base.me_dv), me,
                         device_xname(me->me_dv)));
#ifdef DIAGNOSTIC
                if (me->me_evp != NULL) {
                        printf("wsmuxopen: dev already in use\n");
                        continue;
                }
                if (me->me_parent != sc) {
                        printf("wsmux_do_open: bad child=%p\n", me);
                        continue;
                }
                {
                int error = wsevsrc_open(me, evar);
                if (error) {
                        DPRINTF(("wsmuxopen: open failed %d\n", error));
                }
                }
#else
                /* ignore errors, failing children will not be marked open */
                (void)wsevsrc_open(me, evar);
#endif
        }
}

/*
 * close() of the pseudo device from device table.
 */
int
wsmuxclose(dev_t dev, int flags, int mode,
    struct lwp *l)
{
        int minr = minor(dev);
        struct wsmux_softc *sc = wsmuxdevs[WSMUXDEV(minr)];
        struct wseventvar *evar = sc->sc_base.me_evp;

        if (WSMUXCTL(minr))
                /* control device */
                return (0);
        if (evar == NULL)
                /* Not open for read */
                return (0);

        wsmux_do_close(sc);
        sc->sc_base.me_evp = NULL;
        wsevent_fini(evar);
        return (0);
}

/*
 * Close of a mux via the parent mux.
 */
int
wsmux_mux_close(struct wsevsrc *me)
{
        me->me_evp = NULL;
        wsmux_do_close((struct wsmux_softc *)me);
        return (0);
}

/* Common part of closing a mux. */
void
wsmux_do_close(struct wsmux_softc *sc)
{
        struct wsevsrc *me;

        DPRINTF(("wsmuxclose: %s: sc=%p\n",
                 device_xname(sc->sc_base.me_dv), sc));

        /* Close all the children. */
        TAILQ_FOREACH(me, &sc->sc_cld, me_next) {
                DPRINTF(("wsmuxclose %s: m=%p dev=%s\n",
                         device_xname(sc->sc_base.me_dv), me,
                         device_xname(me->me_dv)));
#ifdef DIAGNOSTIC
                if (me->me_parent != sc) {
                        printf("wsmuxclose: bad child=%p\n", me);
                        continue;
                }
#endif
                (void)wsevsrc_close(me);
                me->me_evp = NULL;
        }
}

/*
 * read() of the pseudo device from device table.
 */
int
wsmuxread(dev_t dev, struct uio *uio, int flags)
{
        int minr = minor(dev);
        struct wsmux_softc *sc = wsmuxdevs[WSMUXDEV(minr)];
        struct wseventvar *evar;
        int error;

        if (WSMUXCTL(minr)) {
                /* control device */
                return (EINVAL);
        }

        evar = sc->sc_base.me_evp;
        if (evar == NULL) {
#ifdef DIAGNOSTIC
                /* XXX can we get here? */
                printf("wsmuxread: not open\n");
#endif
                return (EINVAL);
        }

        DPRINTFN(5,("wsmuxread: %s event read evar=%p\n",
                    device_xname(sc->sc_base.me_dv), evar));
        error = wsevent_read(evar, uio, flags);
        DPRINTFN(5,("wsmuxread: %s event read ==> error=%d\n",
                    device_xname(sc->sc_base.me_dv), error));
        return (error);
}

/*
 * ioctl of the pseudo device from device table.
 */
int
wsmuxioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        int u = WSMUXDEV(minor(dev));

        return wsmux_do_ioctl(wsmuxdevs[u]->sc_base.me_dv, cmd, data, flag, l);
}

/*
 * ioctl of a mux via the parent mux, continuation of wsmuxioctl().
 */
int
wsmux_do_ioctl(device_t dv, u_long cmd, void *data, int flag,
               struct lwp *lwp)
{
        struct wsmux_softc *sc = device_private(dv);
        struct wsevsrc *me;
        int error, ok;
        int s, n;
        struct wseventvar *evar;
        struct wscons_event event;
        struct wsmux_device_list *l;

        DPRINTF(("wsmux_do_ioctl: %s: enter sc=%p, cmd=%08lx\n",
                 device_xname(sc->sc_base.me_dv), sc, cmd));

        switch (cmd) {
#if defined(COMPAT_50) || defined(MODULAR)
        case WSMUXIO_OINJECTEVENT:
#endif /* defined(COMPAT_50) || defined(MODULAR) */
        case WSMUXIO_INJECTEVENT:
                /* Inject an event, e.g., from moused. */
                DPRINTF(("%s: inject\n", device_xname(sc->sc_base.me_dv)));

                evar = sc->sc_base.me_evp;
                if (evar == NULL) {
                        /* No event sink, so ignore it. */
                        DPRINTF(("wsmux_do_ioctl: event ignored\n"));
                        return (0);
                }

                s = spltty();
                event.type = ((struct wscons_event *)data)->type;
                event.value = ((struct wscons_event *)data)->value;
                error = wsevent_inject(evar, &event, 1);
                splx(s);

                return error;
        case WSMUXIO_ADD_DEVICE:
#define d ((struct wsmux_device *)data)
                DPRINTF(("%s: add type=%d, no=%d\n",
                         device_xname(sc->sc_base.me_dv), d->type, d->idx));
                switch (d->type) {
#if NWSMOUSE > 0
                case WSMUX_MOUSE:
                        return (wsmouse_add_mux(d->idx, sc));
#endif
#if NWSKBD > 0
                case WSMUX_KBD:
                        return (wskbd_add_mux(d->idx, sc));
#endif
                case WSMUX_MUX:
                        return (wsmux_add_mux(d->idx, sc));
                case WSMUX_BELL:
                        return (wsbell_add_mux(d->idx, sc));
                default:
                        return (EINVAL);
                }
        case WSMUXIO_REMOVE_DEVICE:
                DPRINTF(("%s: rem type=%d, no=%d\n",
                         device_xname(sc->sc_base.me_dv), d->type, d->idx));
                /* Locate the device */
                TAILQ_FOREACH(me, &sc->sc_cld, me_next) {
                        if (me->me_ops->type == d->type &&
                            device_unit(me->me_dv) == d->idx) {
                                DPRINTF(("wsmux_do_ioctl: detach\n"));
                                wsmux_detach_sc(me);
                                return (0);
                        }
                }
                return (EINVAL);
#undef d

        case WSMUXIO_LIST_DEVICES:
                DPRINTF(("%s: list\n", device_xname(sc->sc_base.me_dv)));
                l = (struct wsmux_device_list *)data;
                n = 0;
                TAILQ_FOREACH(me, &sc->sc_cld, me_next) {
                        if (n >= WSMUX_MAXDEV)
                                break;
                        l->devices[n].type = me->me_ops->type;
                        l->devices[n].idx = device_unit(me->me_dv);
                        n++;
                }
                l->ndevices = n;
                return (0);
#ifdef WSDISPLAY_COMPAT_RAWKBD
        case WSKBDIO_SETMODE:
                sc->sc_rawkbd = *(int *)data;
                DPRINTF(("wsmux_do_ioctl: save rawkbd = %d\n", sc->sc_rawkbd));
                break;
#endif

        case WSKBDIO_SETVERSION:
        case WSMOUSEIO_SETVERSION:
        case WSDISPLAYIO_SETVERSION:
                DPRINTF(("%s: WSxxxIO_SETVERSION\n",
                        device_xname(sc->sc_base.me_dv)));
                evar = sc->sc_base.me_evp;
                if (evar == NULL)
                        return (EINVAL);
                return wsevent_setversion(evar, *(int *)data);

        case FIONBIO:
                DPRINTF(("%s: FIONBIO\n", device_xname(sc->sc_base.me_dv)));
                return (0);

        case FIOASYNC:
                DPRINTF(("%s: FIOASYNC\n", device_xname(sc->sc_base.me_dv)));
                evar = sc->sc_base.me_evp;
                if (evar == NULL)
                        return (EINVAL);
                evar->async = *(int *)data != 0;
                return (0);
        case FIOSETOWN:
                DPRINTF(("%s: FIOSETOWN\n", device_xname(sc->sc_base.me_dv)));
                evar = sc->sc_base.me_evp;
                if (evar == NULL)
                        return (EINVAL);
                if (-*(int *)data != evar->io->p_pgid
                    && *(int *)data != evar->io->p_pid)
                        return (EPERM);
                return (0);
        case TIOCSPGRP:
                DPRINTF(("%s: TIOCSPGRP\n", device_xname(sc->sc_base.me_dv)));
                evar = sc->sc_base.me_evp;
                if (evar == NULL)
                        return (EINVAL);
                if (*(int *)data != evar->io->p_pgid)
                        return (EPERM);
                return (0);
        default:
                DPRINTF(("%s: unknown\n", device_xname(sc->sc_base.me_dv)));
                break;
        }

        if (sc->sc_base.me_evp == NULL
#if NWSDISPLAY > 0
            && sc->sc_base.me_dispdv == NULL
#endif
            )
                return (EACCES);

        /* Return 0 if any of the ioctl() succeeds, otherwise the last error */
        error = 0;
        ok = 0;
        TAILQ_FOREACH(me, &sc->sc_cld, me_next) {
#ifdef DIAGNOSTIC
                /* XXX check evp? */
                if (me->me_parent != sc) {
                        printf("wsmux_do_ioctl: bad child %p\n", me);
                        continue;
                }
#endif
                error = wsevsrc_ioctl(me, cmd, data, flag, lwp);
                DPRINTF(("wsmux_do_ioctl: %s: me=%p dev=%s ==> %d\n",
                         device_xname(sc->sc_base.me_dv), me,
                         device_xname(me->me_dv), error));
                if (!error)
                        ok = 1;
        }
        if (ok) {
                error = 0;
                if (cmd == WSKBDIO_SETENCODING) {
                        sc->sc_kbd_layout = *((kbd_t *)data);
                }

        }

        return (error);
}

/*
 * poll() of the pseudo device from device table.
 */
int
wsmuxpoll(dev_t dev, int events, struct lwp *l)
{
        int minr = minor(dev);
        struct wsmux_softc *sc = wsmuxdevs[WSMUXDEV(minr)];

        if (WSMUXCTL(minr)) {
                /* control device */
                return (0);
        }

        if (sc->sc_base.me_evp == NULL) {
#ifdef DIAGNOSTIC
                printf("wsmuxpoll: not open\n");
#endif
                return (POLLHUP);
        }

        return (wsevent_poll(sc->sc_base.me_evp, events, l));
}

/*
 * kqfilter() of the pseudo device from device table.
 */
int
wsmuxkqfilter(dev_t dev, struct knote *kn)
{
        int minr = minor(dev);
        struct wsmux_softc *sc = wsmuxdevs[WSMUXDEV(minr)];

        if (WSMUXCTL(minr)) {
                /* control device */
                return (1);
        }

        if (sc->sc_base.me_evp == NULL) {
#ifdef DIAGNOSTIC
                printf("wsmuxkqfilter: not open\n");
#endif
                return (1);
        }

        return (wsevent_kqfilter(sc->sc_base.me_evp, kn));
}

/*
 * Add mux unit as a child to muxsc.
 */
int
wsmux_add_mux(int unit, struct wsmux_softc *muxsc)
{
        struct wsmux_softc *sc, *m;

        sc = wsmux_getmux(unit);
        if (sc == NULL)
                return (ENXIO);

        DPRINTF(("wsmux_add_mux: %s(%p) to %s(%p)\n",
                 device_xname(sc->sc_base.me_dv), sc,
                 device_xname(muxsc->sc_base.me_dv), muxsc));

        if (sc->sc_base.me_parent != NULL || sc->sc_base.me_evp != NULL)
                return (EBUSY);

        /* The mux we are adding must not be an ancestor of itself. */
        for (m = muxsc; m != NULL ; m = m->sc_base.me_parent)
                if (m == sc)
                        return (EINVAL);

        return (wsmux_attach_sc(muxsc, &sc->sc_base));
}

/* Create a new mux softc. */
struct wsmux_softc *
wsmux_create(const char *name, int unit)
{
        struct wsmux_softc *sc;

        /* XXX This is wrong -- should use autoconfiguration framework */

        DPRINTF(("wsmux_create: allocating\n"));
        sc = malloc(sizeof *sc, M_DEVBUF, M_WAITOK|M_ZERO);
        sc->sc_base.me_dv = malloc(sizeof(struct device), M_DEVBUF,
            M_WAITOK|M_ZERO);
        TAILQ_INIT(&sc->sc_cld);
        snprintf(sc->sc_base.me_dv->dv_xname,
            sizeof sc->sc_base.me_dv->dv_xname, "%s%d", name, unit);
        sc->sc_base.me_dv->dv_private = sc;
        sc->sc_base.me_dv->dv_unit = unit;
        sc->sc_base.me_ops = &wsmux_srcops;
        sc->sc_kbd_layout = KB_NONE;
        return (sc);
}

/* Attach me as a child to sc. */
int
wsmux_attach_sc(struct wsmux_softc *sc, struct wsevsrc *me)
{
        int error;

        if (sc == NULL)
                return (EINVAL);

        DPRINTF(("wsmux_attach_sc: %s(%p): type=%d\n",
                 device_xname(sc->sc_base.me_dv), sc, me->me_ops->type));

#ifdef DIAGNOSTIC
        if (me->me_parent != NULL) {
                printf("wsmux_attach_sc: busy\n");
                return (EBUSY);
        }
#endif
        me->me_parent = sc;
        TAILQ_INSERT_TAIL(&sc->sc_cld, me, me_next);

        error = 0;
#if NWSDISPLAY > 0
        if (sc->sc_base.me_dispdv != NULL) {
                /* This is a display mux, so attach the new device to it. */
                DPRINTF(("wsmux_attach_sc: %s: set display %p\n",
                         device_xname(sc->sc_base.me_dv),
                         sc->sc_base.me_dispdv));
                if (me->me_ops->dsetdisplay != NULL) {
                        error = wsevsrc_set_display(me, &sc->sc_base);
                        /* Ignore that the console already has a display. */
                        if (error == EBUSY)
                                error = 0;
                        if (!error) {
#ifdef WSDISPLAY_COMPAT_RAWKBD
                                DPRINTF(("wsmux_attach_sc: %s set rawkbd=%d\n",
                                         device_xname(me->me_dv),
                                         sc->sc_rawkbd));
                                (void)wsevsrc_ioctl(me, WSKBDIO_SETMODE,
                                                    &sc->sc_rawkbd, 0, 0);
#endif
                                if (sc->sc_kbd_layout != KB_NONE)
                                        (void)wsevsrc_ioctl(me,
                                            WSKBDIO_SETENCODING,
                                            &sc->sc_kbd_layout, FWRITE, 0);
                        }
                }
        }
#endif
        if (sc->sc_base.me_evp != NULL) {
                /* Mux is open, so open the new subdevice */
                DPRINTF(("wsmux_attach_sc: %s: calling open of %s\n",
                         device_xname(sc->sc_base.me_dv),
                         device_xname(me->me_dv)));
                error = wsevsrc_open(me, sc->sc_base.me_evp);
        } else {
                DPRINTF(("wsmux_attach_sc: %s not open\n",
                         device_xname(sc->sc_base.me_dv)));
        }

        if (error) {
                me->me_parent = NULL;
                TAILQ_REMOVE(&sc->sc_cld, me, me_next);
        }

        DPRINTF(("wsmux_attach_sc: %s(%p) done, error=%d\n",
                 device_xname(sc->sc_base.me_dv), sc, error));
        return (error);
}

/* Remove me from the parent. */
void
wsmux_detach_sc(struct wsevsrc *me)
{
        struct wsmux_softc *sc = me->me_parent;

        DPRINTF(("wsmux_detach_sc: %s(%p) parent=%p\n",
                 device_xname(me->me_dv), me, sc));

#ifdef DIAGNOSTIC
        if (sc == NULL) {
                printf("wsmux_detach_sc: %s has no parent\n",
                       device_xname(me->me_dv));
                return;
        }
#endif

#if NWSDISPLAY > 0
        if (sc->sc_base.me_dispdv != NULL) {
                if (me->me_ops->dsetdisplay != NULL)
                        /* ignore error, there's nothing we can do */
                        (void)wsevsrc_set_display(me, NULL);
        } else
#endif
                if (me->me_evp != NULL) {
                DPRINTF(("wsmux_detach_sc: close\n"));
                /* mux device is open, so close multiplexee */
                (void)wsevsrc_close(me);
        }

        TAILQ_REMOVE(&sc->sc_cld, me, me_next);
        me->me_parent = NULL;

        DPRINTF(("wsmux_detach_sc: done sc=%p\n", sc));
}

/*
 * Display ioctl() of a mux via the parent mux.
 */
int
wsmux_do_displayioctl(device_t dv, u_long cmd, void *data, int flag,
                      struct lwp *l)
{
        struct wsmux_softc *sc = device_private(dv);
        struct wsevsrc *me;
        int error, ok;

        DPRINTF(("wsmux_displayioctl: %s: sc=%p, cmd=%08lx\n",
                 device_xname(sc->sc_base.me_dv), sc, cmd));

#ifdef WSDISPLAY_COMPAT_RAWKBD
        if (cmd == WSKBDIO_SETMODE) {
                sc->sc_rawkbd = *(int *)data;
                DPRINTF(("wsmux_displayioctl: rawkbd = %d\n", sc->sc_rawkbd));
        }
#endif

        /*
         * Return 0 if any of the ioctl() succeeds, otherwise the last error.
         * Return EPASSTHROUGH if no mux component accepts the ioctl.
         */
        error = EPASSTHROUGH;
        ok = 0;
        TAILQ_FOREACH(me, &sc->sc_cld, me_next) {
                DPRINTF(("wsmux_displayioctl: me=%p\n", me));
#ifdef DIAGNOSTIC
                if (me->me_parent != sc) {
                        printf("wsmux_displayioctl: bad child %p\n", me);
                        continue;
                }
#endif
                if (me->me_ops->ddispioctl != NULL) {
                        error = wsevsrc_display_ioctl(me, cmd, data, flag, l);
                        DPRINTF(("wsmux_displayioctl: me=%p dev=%s ==> %d\n",
                                 me, device_xname(me->me_dv), error));
                        if (!error)
                                ok = 1;
                }
        }
        if (ok)
                error = 0;

        return (error);
}

#if NWSDISPLAY > 0
/*
 * Set display of a mux via the parent mux.
 */
int
wsmux_evsrc_set_display(device_t dv, struct wsevsrc *ame)
{
        struct wsmux_softc *muxsc = (struct wsmux_softc *)ame;
        struct wsmux_softc *sc = device_private(dv);
        device_t displaydv = muxsc ? muxsc->sc_base.me_dispdv : NULL;

        DPRINTF(("wsmux_set_display: %s: displaydv=%p\n",
                 device_xname(sc->sc_base.me_dv), displaydv));

        if (displaydv != NULL) {
                if (sc->sc_base.me_dispdv != NULL)
                        return (EBUSY);
        } else {
                if (sc->sc_base.me_dispdv == NULL)
                        return (ENXIO);
        }

        return wsmux_set_display(sc, displaydv);
}

int
wsmux_set_display(struct wsmux_softc *sc, device_t displaydv)
{
        device_t odisplaydv;
        struct wsevsrc *me;
        struct wsmux_softc *nsc = displaydv ? sc : NULL;
        int error, ok;

        odisplaydv = sc->sc_base.me_dispdv;
        sc->sc_base.me_dispdv = displaydv;

        if (displaydv)
                aprint_verbose_dev(sc->sc_base.me_dv, "connecting to %s\n",
                       device_xname(displaydv));
        ok = 0;
        error = 0;
        TAILQ_FOREACH(me, &sc->sc_cld,me_next) {
#ifdef DIAGNOSTIC
                if (me->me_parent != sc) {
                        printf("wsmux_set_display: bad child parent %p\n", me);
                        continue;
                }
#endif
                if (me->me_ops->dsetdisplay != NULL) {
                        error = wsevsrc_set_display(me, &nsc->sc_base);
                        DPRINTF(("wsmux_set_display: m=%p dev=%s error=%d\n",
                                 me, device_xname(me->me_dv), error));
                        if (!error) {
                                ok = 1;
#ifdef WSDISPLAY_COMPAT_RAWKBD
                                DPRINTF(("wsmux_set_display: %s set rawkbd=%d\n",
                                         device_xname(me->me_dv), sc->sc_rawkbd));
                                (void)wsevsrc_ioctl(me, WSKBDIO_SETMODE,
                                                    &sc->sc_rawkbd, 0, 0);
#endif
                        }
                }
        }
        if (ok)
                error = 0;

        if (displaydv == NULL)
                aprint_verbose("%s: disconnecting from %s\n",
                       device_xname(sc->sc_base.me_dv),
                       device_xname(odisplaydv));

        return (error);
}
#endif /* NWSDISPLAY > 0 */
































































































































































































    6 





















    6 
    5 







   10 























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
/*        $NetBSD: bufq_disksort.c,v 1.14 2017/05/04 11:03:27 kamil Exp $        */
/*        NetBSD: subr_disk.c,v 1.61 2004/09/25 03:30:44 thorpej Exp         */

/*-
 * Copyright (c) 1996, 1997, 1999, 2000 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ufs_disksubr.c        8.5 (Berkeley) 1/21/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: bufq_disksort.c,v 1.14 2017/05/04 11:03:27 kamil Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/bufq_impl.h>
#include <sys/kmem.h>
#include <sys/module.h>

/*
 * Seek sort for disks.
 *
 * There are actually two queues, sorted in ascendening order.  The first
 * queue holds those requests which are positioned after the current block;
 * the second holds requests which came in after their position was passed.
 * Thus we implement a one-way scan, retracting after reaching the end of
 * the drive to the first request on the second queue, at which time it
 * becomes the first queue.
 *
 * A one-way scan is natural because of the way UNIX read-ahead blocks are
 * allocated.
 */

struct bufq_disksort {
        TAILQ_HEAD(, buf) bq_head;        /* actual list of buffers */
};

static void bufq_disksort_init(struct bufq_state *);
static void bufq_disksort_put(struct bufq_state *, struct buf *);
static struct buf *bufq_disksort_get(struct bufq_state *, int);

BUFQ_DEFINE(disksort, 20, bufq_disksort_init);

static void
bufq_disksort_put(struct bufq_state *bufq, struct buf *bp)
{
        struct bufq_disksort *disksort = bufq_private(bufq);
        struct buf *bq, *nbq;
        int sortby;

        sortby = bufq->bq_flags & BUFQ_SORT_MASK;

        bq = TAILQ_FIRST(&disksort->bq_head);

        /*
         * If the queue is empty it's easy; we just go on the end.
         */
        if (bq == NULL) {
                TAILQ_INSERT_TAIL(&disksort->bq_head, bp, b_actq);
                return;
        }

        /*
         * If we lie before the currently active request, then we
         * must locate the second request list and add ourselves to it.
         */
        if (buf_inorder(bp, bq, sortby)) {
                while ((nbq = TAILQ_NEXT(bq, b_actq)) != NULL) {
                        /*
                         * Check for an ``inversion'' in the normally ascending
                         * block numbers, indicating the start of the second
                         * request list.
                         */
                        if (buf_inorder(nbq, bq, sortby)) {
                                /*
                                 * Search the second request list for the first
                                 * request at a larger block number.  We go
                                 * after that; if there is no such request, we
                                 * go at the end.
                                 */
                                do {
                                        if (buf_inorder(bp, nbq, sortby))
                                                goto insert;
                                        bq = nbq;
                                } while ((nbq =
                                    TAILQ_NEXT(bq, b_actq)) != NULL);
                                goto insert;                /* after last */
                        }
                        bq = nbq;
                }
                /*
                 * No inversions... we will go after the last, and
                 * be the first request in the second request list.
                 */
                goto insert;
        }
        /*
         * Request is at/after the current request...
         * sort in the first request list.
         */
        while ((nbq = TAILQ_NEXT(bq, b_actq)) != NULL) {
                /*
                 * We want to go after the current request if there is an
                 * inversion after it (i.e. it is the end of the first
                 * request list), or if the next request is a larger cylinder
                 * than our request.
                 */
                if (buf_inorder(nbq, bq, sortby) ||
                    buf_inorder(bp, nbq, sortby))
                        goto insert;
                bq = nbq;
        }
        /*
         * Neither a second list nor a larger request... we go at the end of
         * the first list, which is the same as the end of the whole schebang.
         */
insert:        TAILQ_INSERT_AFTER(&disksort->bq_head, bq, bp, b_actq);
}

static struct buf *
bufq_disksort_get(struct bufq_state *bufq, int remove)
{
        struct bufq_disksort *disksort = bufq_private(bufq);
        struct buf *bp;

        bp = TAILQ_FIRST(&disksort->bq_head);

        if (bp != NULL && remove)
                TAILQ_REMOVE(&disksort->bq_head, bp, b_actq);

        return (bp);
}

static struct buf *
bufq_disksort_cancel(struct bufq_state *bufq, struct buf *buf)
{
        struct bufq_disksort *disksort = bufq_private(bufq);
        struct buf *bq;

        TAILQ_FOREACH(bq, &disksort->bq_head, b_actq) {
                if (bq == buf) {
                        TAILQ_REMOVE(&disksort->bq_head, bq, b_actq);
                        return buf;
                }
        }
        return NULL;
}

static void
bufq_disksort_fini(struct bufq_state *bufq)
{

        KASSERT(bufq->bq_private != NULL);
        kmem_free(bufq->bq_private, sizeof(struct bufq_disksort));
}

static void
bufq_disksort_init(struct bufq_state *bufq)
{
        struct bufq_disksort *disksort;

        disksort = kmem_zalloc(sizeof(*disksort), KM_SLEEP);
        bufq->bq_private = disksort;
        bufq->bq_get = bufq_disksort_get;
        bufq->bq_put = bufq_disksort_put;
        bufq->bq_cancel = bufq_disksort_cancel;
        bufq->bq_fini = bufq_disksort_fini;
        TAILQ_INIT(&disksort->bq_head);
}

MODULE(MODULE_CLASS_BUFQ, bufq_disksort, NULL);

static int
bufq_disksort_modcmd(modcmd_t cmd, void *opaque)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                return bufq_register(&bufq_strat_disksort);
        case MODULE_CMD_FINI:
                return bufq_unregister(&bufq_strat_disksort);
        default:
                return ENOTTY;
        }
}


















































































    1 



    1 
    1 

    1 
    1 















































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
/*        $NetBSD: veriexec.c,v 1.3 2022/02/12 02:40:48 riastradh Exp $        */

/*-
 * Copyright (c) 2005, 2006 Elad Efrat <elad@NetBSD.org>
 * Copyright (c) 2005, 2006 Brett Lymn <blymn@NetBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the authors may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: veriexec.c,v 1.3 2022/02/12 02:40:48 riastradh Exp $");

#include <sys/param.h>
#include <sys/errno.h>
#include <sys/conf.h>
#include <sys/vnode.h>
#include <sys/fcntl.h>
#include <sys/namei.h>
#include <sys/verified_exec.h>
#include <sys/kauth.h>
#include <sys/syslog.h>
#include <sys/proc.h>

#include <sys/ioctl.h>
#include <sys/device_if.h>

#include <prop/proplib.h>

void veriexecattach(device_t, device_t, void *);
static dev_type_open(veriexecopen);
static dev_type_close(veriexecclose);
static dev_type_ioctl(veriexecioctl);

const struct cdevsw veriexec_cdevsw = {
        .d_open = veriexecopen,
        .d_close = veriexecclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = veriexecioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_discard = nodiscard,
        .d_kqfilter = nokqfilter,
        .d_flag = D_OTHER,
};

/* count of number of times device is open (we really only allow one open) */
static unsigned int veriexec_dev_usage = 0;

void
veriexecattach(device_t parent, device_t self, void *aux)
{
        veriexec_dev_usage = 0;
}

static int
veriexecopen(dev_t dev, int flags, int fmt, struct lwp *l)
{
        if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_VERIEXEC,
            KAUTH_REQ_SYSTEM_VERIEXEC_ACCESS, NULL, NULL, NULL))
                return (EPERM);

        if (veriexec_dev_usage > 0)
                return(EBUSY);

        veriexec_dev_usage++;
        return (0);
}

static int
veriexecclose(dev_t dev, int flags, int fmt, struct lwp *l)
{
        if (veriexec_dev_usage > 0)
                veriexec_dev_usage--;
        return (0);
}

static int
veriexec_delete(prop_dictionary_t dict, struct lwp *l)
{
        struct vnode *vp;
        const char *file;
        int error;

        if (!prop_dictionary_get_string(dict, "file", &file))
                return (EINVAL);

        error = namei_simple_kernel(file, NSM_FOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        /* XXX this should be done differently... */
        if (vp->v_type == VREG)
                error = veriexec_file_delete(l, vp);
        else if (vp->v_type == VDIR)
                error = veriexec_table_delete(l, vp->v_mount);

        vrele(vp);

        return (error);
}

static int
veriexec_query(prop_dictionary_t dict, prop_dictionary_t rdict, struct lwp *l)
{
        struct vnode *vp;
        const char *file;
        int error;

        if (!prop_dictionary_get_string(dict, "file", &file))
                return (EINVAL);

        error = namei_simple_kernel(file, NSM_FOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        error = veriexec_convert(vp, rdict);

        vrele(vp);

        return (error);
}

int
veriexecioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
{
        struct plistref *plistref;
        prop_dictionary_t dict;
        int error = 0;

        switch (cmd) {
        case VERIEXEC_TABLESIZE:
        case VERIEXEC_LOAD:
        case VERIEXEC_DELETE:
        case VERIEXEC_FLUSH:
                if (!(flags & FWRITE))
                        return (EPERM);

                error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_VERIEXEC,
                    KAUTH_REQ_SYSTEM_VERIEXEC_MODIFY, KAUTH_ARG(cmd), NULL,
                    NULL);
                if (error)
                        return error;

                break;

        case VERIEXEC_QUERY:
        case VERIEXEC_DUMP:
                if (!(flags & FREAD))
                        return (EPERM);

                break;

        default:
                /* Invalid operation. */
                return (ENODEV);
        }

        plistref = (struct plistref *)data;

        switch (cmd) {
        case VERIEXEC_TABLESIZE:
                /* Do nothing. Kept for binary compatibility. */
                break;

        case VERIEXEC_LOAD:
                error = prop_dictionary_copyin_ioctl(plistref, cmd, &dict);
                if (error)
                        break;

                error = veriexec_file_add(l, dict);
                prop_object_release(dict);
                break;

        case VERIEXEC_DELETE:
                error = prop_dictionary_copyin_ioctl(plistref, cmd, &dict);
                if (error)
                        break;

                error = veriexec_delete(dict, l);
                prop_object_release(dict);
                break;

        case VERIEXEC_QUERY: {
                prop_dictionary_t rdict;

                error = prop_dictionary_copyin_ioctl(plistref, cmd, &dict);
                if (error)
                        return (error);

                rdict = prop_dictionary_create();
                if (rdict == NULL) {
                        prop_object_release(dict);
                        error = ENOMEM;
                        break;
                }

                error = veriexec_query(dict, rdict, l);
                if (error == 0) {
                        error = prop_dictionary_copyout_ioctl(plistref, cmd,
                            rdict);
                }

                prop_object_release(rdict);
                prop_object_release(dict);

                break;
                }

        case VERIEXEC_DUMP: {
                prop_array_t rarray;

                rarray = prop_array_create();
                if (rarray == NULL) {
                        error = ENOMEM;
                        break;
                }

                error = veriexec_dump(l, rarray);
                if (error == 0) {
                        error = prop_array_copyout_ioctl(plistref, cmd,
                            rarray);
                }

                prop_object_release(rarray);

                break;
                }

        case VERIEXEC_FLUSH:
                error = veriexec_flush(l);
                break;

        default:
                /* Invalid operation. */
                error = ENODEV;
                break;
        }

        return (error);
}













































































































































































    3 











    3 

    3 

    3 







    1 

    1 



















































































































































































































































































































































    1 
    1 











    1 















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
/*        $NetBSD: nd6.c,v 1.278 2021/12/31 12:41:50 andvar Exp $        */
/*        $KAME: nd6.c,v 1.279 2002/06/08 11:16:51 itojun Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: nd6.c,v 1.278 2021/12/31 12:41:50 andvar Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
#include "opt_net_mpsafe.h"
#endif

#include "bridge.h"
#include "carp.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/kmem.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/errno.h>
#include <sys/ioctl.h>
#include <sys/syslog.h>
#include <sys/queue.h>
#include <sys/cprng.h>
#include <sys/workqueue.h>

#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_llatbl.h>
#include <net/if_types.h>
#include <net/nd.h>
#include <net/route.h>
#include <net/if_ether.h>
#include <net/if_arc.h>

#include <netinet/in.h>
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#include <netinet6/nd6.h>
#include <netinet6/in6_ifattach.h>
#include <netinet/icmp6.h>
#include <netinet6/icmp6_private.h>

#ifdef COMPAT_90
#include <compat/netinet6/in6_var.h>
#include <compat/netinet6/nd6.h>
#endif

#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */

/* timer values */
int        nd6_prune        = 1;        /* walk list every 1 seconds */
int        nd6_useloopback = 1;        /* use loopback interface for local traffic */

/* preventing too many loops in ND option parsing */
int nd6_maxndopt = 10;        /* max # of ND options allowed */

#ifdef ND6_DEBUG
int nd6_debug = 1;
#else
int nd6_debug = 0;
#endif

krwlock_t nd6_lock __cacheline_aligned;

int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;

static void nd6_slowtimo(void *);
static void nd6_free(struct llentry *, int);
static bool nd6_nud_enabled(struct ifnet *);
static unsigned int nd6_llinfo_reachable(struct ifnet *);
static unsigned int nd6_llinfo_retrans(struct ifnet *);
static union l3addr *nd6_llinfo_holdsrc(struct llentry *, union l3addr *);
static void nd6_llinfo_output(struct ifnet *, const union l3addr *,
    const union l3addr *, const uint8_t *, const union l3addr *);
static void nd6_llinfo_missed(struct ifnet *, const union l3addr *,
    int16_t, struct mbuf *);
static void nd6_timer(void *);
static void nd6_timer_work(struct work *, void *);
static struct nd_opt_hdr *nd6_option(union nd_opts *);

static callout_t nd6_slowtimo_ch;
static callout_t nd6_timer_ch;
static struct workqueue        *nd6_timer_wq;
static struct work        nd6_timer_wk;

struct nd_domain nd6_nd_domain = {
        .nd_family = AF_INET6,
        .nd_delay = 5,                /* delay first probe time 5 second */
        .nd_mmaxtries = 3,        /* maximum unicast query */
        .nd_umaxtries = 3,        /* maximum multicast query */
        .nd_retransmultiple = BACKOFF_MULTIPLE,
        .nd_maxretrans = MAX_RETRANS_TIMER,
        .nd_maxnudhint = 0,        /* max # of subsequent upper layer hints */
        .nd_maxqueuelen = 1,        /* max # of packets in unresolved ND entries */
        .nd_nud_enabled = nd6_nud_enabled,
        .nd_reachable = nd6_llinfo_reachable,
        .nd_retrans = nd6_llinfo_retrans,
        .nd_holdsrc = nd6_llinfo_holdsrc,
        .nd_output = nd6_llinfo_output,
        .nd_missed = nd6_llinfo_missed,
        .nd_free = nd6_free,
};

MALLOC_DEFINE(M_IP6NDP, "NDP", "IPv6 Neighbour Discovery");

void
nd6_init(void)
{
        int error;

        nd_attach_domain(&nd6_nd_domain);
        nd6_nbr_init();

        rw_init(&nd6_lock);

        callout_init(&nd6_slowtimo_ch, CALLOUT_MPSAFE);
        callout_init(&nd6_timer_ch, CALLOUT_MPSAFE);

        error = workqueue_create(&nd6_timer_wq, "nd6_timer",
            nd6_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
        if (error)
                panic("%s: workqueue_create failed (%d)\n", __func__, error);

        /* start timer */
        callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
            nd6_slowtimo, NULL);
        callout_reset(&nd6_timer_ch, hz, nd6_timer, NULL);
}

struct nd_kifinfo *
nd6_ifattach(struct ifnet *ifp)
{
        struct nd_kifinfo *nd;

        nd = kmem_zalloc(sizeof(*nd), KM_SLEEP);

        nd->chlim = IPV6_DEFHLIM;
        nd->basereachable = REACHABLE_TIME;
        nd->reachable = ND_COMPUTE_RTIME(nd->basereachable);
        nd->retrans = RETRANS_TIMER;

        nd->flags = ND6_IFF_PERFORMNUD;

        /* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL.
         * A bridge interface should not have ND6_IFF_AUTO_LINKLOCAL
         * because one of its members should. */
        if ((ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) ||
            (ifp->if_flags & IFF_LOOPBACK))
                nd->flags |= ND6_IFF_AUTO_LINKLOCAL;

        return nd;
}

void
nd6_ifdetach(struct ifnet *ifp, struct in6_ifextra *ext)
{

        /* Ensure all IPv6 addresses are purged before calling nd6_purge */
        if_purgeaddrs(ifp, AF_INET6, in6_purgeaddr);
        nd6_purge(ifp, ext);
        kmem_free(ext->nd_ifinfo, sizeof(struct nd_kifinfo));
}

void
nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
{

        memset(ndopts, 0, sizeof(*ndopts));
        ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
        ndopts->nd_opts_last
                = (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);

        if (icmp6len == 0) {
                ndopts->nd_opts_done = 1;
                ndopts->nd_opts_search = NULL;
        }
}

/*
 * Take one ND option.
 */
static struct nd_opt_hdr *
nd6_option(union nd_opts *ndopts)
{
        struct nd_opt_hdr *nd_opt;
        int olen;

        KASSERT(ndopts != NULL);
        KASSERT(ndopts->nd_opts_last != NULL);

        if (ndopts->nd_opts_search == NULL)
                return NULL;
        if (ndopts->nd_opts_done)
                return NULL;

        nd_opt = ndopts->nd_opts_search;

        /* make sure nd_opt_len is inside the buffer */
        if ((void *)&nd_opt->nd_opt_len >= (void *)ndopts->nd_opts_last) {
                memset(ndopts, 0, sizeof(*ndopts));
                return NULL;
        }

        olen = nd_opt->nd_opt_len << 3;
        if (olen == 0) {
                /*
                 * Message validation requires that all included
                 * options have a length that is greater than zero.
                 */
                memset(ndopts, 0, sizeof(*ndopts));
                return NULL;
        }

        ndopts->nd_opts_search = (struct nd_opt_hdr *)((char *)nd_opt + olen);
        if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
                /* option overruns the end of buffer, invalid */
                memset(ndopts, 0, sizeof(*ndopts));
                return NULL;
        } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
                /* reached the end of options chain */
                ndopts->nd_opts_done = 1;
                ndopts->nd_opts_search = NULL;
        }
        return nd_opt;
}

/*
 * Parse multiple ND options.
 * This function is much easier to use, for ND routines that do not need
 * multiple options of the same type.
 */
int
nd6_options(union nd_opts *ndopts)
{
        struct nd_opt_hdr *nd_opt;
        int i = 0;

        KASSERT(ndopts != NULL);
        KASSERT(ndopts->nd_opts_last != NULL);

        if (ndopts->nd_opts_search == NULL)
                return 0;
 
        while (1) {
                nd_opt = nd6_option(ndopts);
                if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
                        /*
                         * Message validation requires that all included
                         * options have a length that is greater than zero.
                         */
                        ICMP6_STATINC(ICMP6_STAT_ND_BADOPT);
                        memset(ndopts, 0, sizeof(*ndopts));
                        return -1;
                }

                if (nd_opt == NULL)
                        goto skip1;

                switch (nd_opt->nd_opt_type) {
                case ND_OPT_SOURCE_LINKADDR:
                case ND_OPT_TARGET_LINKADDR:
                case ND_OPT_MTU:
                case ND_OPT_REDIRECTED_HEADER:
                case ND_OPT_NONCE:
                        if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
                                nd6log(LOG_INFO,
                                    "duplicated ND6 option found (type=%d)\n",
                                    nd_opt->nd_opt_type);
                                /* XXX bark? */
                        } else {
                                ndopts->nd_opt_array[nd_opt->nd_opt_type]
                                        = nd_opt;
                        }
                        break;
                case ND_OPT_PREFIX_INFORMATION:
                        if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
                                ndopts->nd_opt_array[nd_opt->nd_opt_type]
                                        = nd_opt;
                        }
                        ndopts->nd_opts_pi_end =
                                (struct nd_opt_prefix_info *)nd_opt;
                        break;
                default:
                        /*
                         * Unknown options must be silently ignored,
                         * to accommodate future extension to the protocol.
                         */
                        nd6log(LOG_DEBUG,
                            "nd6_options: unsupported option %d - "
                            "option ignored\n", nd_opt->nd_opt_type);
                }

skip1:
                i++;
                if (i > nd6_maxndopt) {
                        ICMP6_STATINC(ICMP6_STAT_ND_TOOMANYOPT);
                        nd6log(LOG_INFO, "too many loop in nd opt\n");
                        break;
                }

                if (ndopts->nd_opts_done)
                        break;
        }

        return 0;
}

/*
 * Gets source address of the first packet in hold queue
 * and stores it in @src.
 * Returns pointer to @src (if hold queue is not empty) or NULL.
 */
static struct in6_addr *
nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src)
{
        struct ip6_hdr *hip6;

        if (ln == NULL || ln->ln_hold == NULL)
                return NULL;

        /*
         * assuming every packet in ln_hold has the same IP header
         */
        hip6 = mtod(ln->ln_hold, struct ip6_hdr *);
        /* XXX pullup? */
        if (sizeof(*hip6) < ln->ln_hold->m_len)
                *src = hip6->ip6_src;
        else
                src = NULL;

        return src;
}

static union l3addr *
nd6_llinfo_holdsrc(struct llentry *ln, union l3addr *src)
{

        if (nd6_llinfo_get_holdsrc(ln, &src->addr6) == NULL)
                return NULL;
        return src;
}

static void
nd6_llinfo_output(struct ifnet *ifp, const union l3addr *daddr,
    const union l3addr *taddr, __unused const uint8_t *tlladdr,
    const union l3addr *hsrc)
{

        nd6_ns_output(ifp,
            daddr != NULL ? &daddr->addr6 : NULL,
            taddr != NULL ? &taddr->addr6 : NULL,
            hsrc != NULL ? &hsrc->addr6 : NULL, NULL);
}

static bool
nd6_nud_enabled(struct ifnet *ifp)
{
        struct nd_kifinfo *ndi = ND_IFINFO(ifp);

        return ndi->flags & ND6_IFF_PERFORMNUD;
}

static unsigned int
nd6_llinfo_reachable(struct ifnet *ifp)
{
        struct nd_kifinfo *ndi = ND_IFINFO(ifp);

        return ndi->reachable;
}

static unsigned int
nd6_llinfo_retrans(struct ifnet *ifp)
{
        struct nd_kifinfo *ndi = ND_IFINFO(ifp);

        return ndi->retrans;
}

static void
nd6_llinfo_missed(struct ifnet *ifp, const union l3addr *taddr,
    int16_t type, struct mbuf *m)
{
        struct in6_addr mdaddr6 = zeroin6_addr;
        struct sockaddr_in6 dsin6, tsin6;
        struct sockaddr *sa;

        if (m != NULL) {
                if (type == ND_LLINFO_PROBE) {
                        struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);

                        /* XXX pullup? */
                        if (sizeof(*ip6) < m->m_len)
                                mdaddr6 = ip6->ip6_src;
                        m_freem(m);
                } else
                        icmp6_error2(m, ICMP6_DST_UNREACH,
                            ICMP6_DST_UNREACH_ADDR, 0, ifp, &mdaddr6);
        }
        if (!IN6_IS_ADDR_UNSPECIFIED(&mdaddr6)) {
                sockaddr_in6_init(&dsin6, &mdaddr6, 0, 0, 0);
                sa = sin6tosa(&dsin6);
        } else
                sa = NULL;

        sockaddr_in6_init(&tsin6, &taddr->addr6, 0, 0, 0);
        rt_clonedmsg(RTM_MISS, sa, sin6tosa(&tsin6), NULL, ifp);
}

/*
 * ND6 timer routine to expire default route list and prefix list
 */
static void
nd6_timer_work(struct work *wk, void *arg)
{
        struct in6_ifaddr *ia6, *nia6;
        int s, bound;
        struct psref psref;

        callout_reset(&nd6_timer_ch, nd6_prune * hz,
            nd6_timer, NULL);

        SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();

        /* expire interface addresses */
        bound = curlwp_bind();
        s = pserialize_read_enter();
        for (ia6 = IN6_ADDRLIST_READER_FIRST(); ia6; ia6 = nia6) {
                nia6 = IN6_ADDRLIST_READER_NEXT(ia6);

                ia6_acquire(ia6, &psref);
                pserialize_read_exit(s);

                /* check address lifetime */
                if (IFA6_IS_INVALID(ia6)) {
                        struct ifnet *ifp;

                        ifp = ia6->ia_ifa.ifa_ifp;
                        IFNET_LOCK(ifp);
                        /*
                         * Need to take the lock first to prevent if_detach
                         * from running in6_purgeaddr concurrently.
                         */
                        if (!if_is_deactivated(ifp)) {
                                ia6_release(ia6, &psref);
                                in6_purgeaddr(&ia6->ia_ifa);
                        } else {
                                /*
                                 * ifp is being destroyed, ia6 will be destroyed
                                 * by if_detach.
                                 */
                                ia6_release(ia6, &psref);
                        }
                        ia6 = NULL;
                        IFNET_UNLOCK(ifp);
                } else if (IFA6_IS_DEPRECATED(ia6)) {
                        int oldflags = ia6->ia6_flags;

                        if ((oldflags & IN6_IFF_DEPRECATED) == 0) {
                                ia6->ia6_flags |= IN6_IFF_DEPRECATED;
                                rt_addrmsg(RTM_NEWADDR, (struct ifaddr *)ia6);
                        }
                } else {
                        /*
                         * A new RA might have made a deprecated address
                         * preferred.
                         */
                        if (ia6->ia6_flags & IN6_IFF_DEPRECATED) {
                                ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
                                rt_addrmsg(RTM_NEWADDR, (struct ifaddr *)ia6);
                        }
                }
                s = pserialize_read_enter();
                ia6_release(ia6, &psref);
        }
        pserialize_read_exit(s);
        curlwp_bindx(bound);

        SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
}

static void
nd6_timer(void *ignored_arg)
{

        workqueue_enqueue(nd6_timer_wq, &nd6_timer_wk, NULL);
}

/*
 * Nuke neighbor cache/prefix/default router management table, right before
 * ifp goes away.
 */
void
nd6_purge(struct ifnet *ifp, struct in6_ifextra *ext)
{

        /*
         * During detach, the ND info might be already removed, but
         * then is explitly passed as argument.
         * Otherwise get it from ifp->if_afdata.
         */
        if (ext == NULL)
                ext = ifp->if_afdata[AF_INET6];
        if (ext == NULL)
                return;

        /*
         * We may not need to nuke the neighbor cache entries here
         * because the neighbor cache is kept in if_afdata[AF_INET6].
         * nd6_purge() is invoked by in6_ifdetach() which is called
         * from if_detach() where everything gets purged. However
         * in6_ifdetach is directly called from vlan(4), so we still
         * need to purge entries here.
         */
        if (ext->lltable != NULL)
                lltable_purge_entries(ext->lltable);
}

struct llentry *
nd6_lookup(const struct in6_addr *addr6, const struct ifnet *ifp, bool wlock)
{
        struct sockaddr_in6 sin6;
        struct llentry *ln;

        sockaddr_in6_init(&sin6, addr6, 0, 0, 0);

        IF_AFDATA_RLOCK(ifp);
        ln = lla_lookup(LLTABLE6(ifp), wlock ? LLE_EXCLUSIVE : 0,
            sin6tosa(&sin6));
        IF_AFDATA_RUNLOCK(ifp);

        return ln;
}

struct llentry *
nd6_create(const struct in6_addr *addr6, const struct ifnet *ifp)
{
        struct sockaddr_in6 sin6;
        struct llentry *ln;
        struct rtentry *rt;

        sockaddr_in6_init(&sin6, addr6, 0, 0, 0);
        rt = rtalloc1(sin6tosa(&sin6), 0);

        IF_AFDATA_WLOCK(ifp);
        ln = lla_create(LLTABLE6(ifp), LLE_EXCLUSIVE, sin6tosa(&sin6), rt);
        IF_AFDATA_WUNLOCK(ifp);

        if (rt != NULL)
                rt_unref(rt);
        if (ln != NULL)
                ln->ln_state = ND_LLINFO_NOSTATE;

        return ln;
}

/*
 * Test whether a given IPv6 address is a neighbor or not, ignoring
 * the actual neighbor cache.  The neighbor cache is ignored in order
 * to not reenter the routing code from within itself.
 */
static int
nd6_is_new_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
{
        struct ifaddr *dstaddr;
        int s;

        /*
         * A link-local address is always a neighbor.
         * XXX: a link does not necessarily specify a single interface.
         */
        if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
                struct sockaddr_in6 sin6_copy;
                u_int32_t zone;

                /*
                 * We need sin6_copy since sa6_recoverscope() may modify the
                 * content (XXX).
                 */
                sin6_copy = *addr;
                if (sa6_recoverscope(&sin6_copy))
                        return 0; /* XXX: should be impossible */
                if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
                        return 0;
                if (sin6_copy.sin6_scope_id == zone)
                        return 1;
                else
                        return 0;
        }

        /*
         * If the address is assigned on the node of the other side of
         * a p2p interface, the address should be a neighbor.
         */
        s = pserialize_read_enter();
        dstaddr = ifa_ifwithdstaddr(sin6tocsa(addr));
        if (dstaddr != NULL) {
                if (dstaddr->ifa_ifp == ifp) {
                        pserialize_read_exit(s);
                        return 1;
                }
        }
        pserialize_read_exit(s);

        return 0;
}

/*
 * Detect if a given IPv6 address identifies a neighbor on a given link.
 * XXX: should take care of the destination of a p2p link?
 */
int
nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
{
        struct llentry *ln;
        struct rtentry *rt;

        /*
         * A link-local address is always a neighbor.
         * XXX: a link does not necessarily specify a single interface.
         */
        if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
                struct sockaddr_in6 sin6_copy;
                u_int32_t zone;

                /*
                 * We need sin6_copy since sa6_recoverscope() may modify the
                 * content (XXX).
                 */
                sin6_copy = *addr;
                if (sa6_recoverscope(&sin6_copy))
                        return 0; /* XXX: should be impossible */
                if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
                        return 0;
                if (sin6_copy.sin6_scope_id == zone)
                        return 1;
                else
                        return 0;
        }

        if (nd6_is_new_addr_neighbor(addr, ifp))
                return 1;

        /*
         * Even if the address matches none of our addresses, it might be
         * in the neighbor cache or a connected route.
         */
        ln = nd6_lookup(&addr->sin6_addr, ifp, false);
        if (ln != NULL) {
                LLE_RUNLOCK(ln);
                return 1;
        }

        rt = rtalloc1(sin6tocsa(addr), 0);
        if (rt == NULL)
                return 0;

        if ((rt->rt_flags & RTF_CONNECTED) && (rt->rt_ifp == ifp
#if NBRIDGE > 0
            || rt->rt_ifp->if_bridge == ifp->if_bridge
#endif
#if NCARP > 0
            || (ifp->if_type == IFT_CARP && rt->rt_ifp == ifp->if_carpdev) ||
            (rt->rt_ifp->if_type == IFT_CARP && rt->rt_ifp->if_carpdev == ifp)||
            (ifp->if_type == IFT_CARP && rt->rt_ifp->if_type == IFT_CARP &&
            rt->rt_ifp->if_carpdev == ifp->if_carpdev)
#endif
            )) {
                rt_unref(rt);
                return 1;
        }
        rt_unref(rt);

        return 0;
}

/*
 * Free an nd6 llinfo entry.
 * Since the function would cause significant changes in the kernel, DO NOT
 * make it global, unless you have a strong reason for the change, and are sure
 * that the change is safe.
 */
static void
nd6_free(struct llentry *ln, int gc)
{
        struct ifnet *ifp;

        KASSERT(ln != NULL);
        LLE_WLOCK_ASSERT(ln);

        /*
         * If the reason for the deletion is just garbage collection,
         * and the neighbor is an active router, do not delete it.
         * Instead, reset the GC timer using the router's lifetime.
         * XXX: the check for ln_state should be redundant,
         *      but we intentionally keep it just in case.
         */
        if (!ip6_forwarding && ln->ln_router &&
            ln->ln_state == ND_LLINFO_STALE && gc)
        {
                nd_set_timer(ln, ND_TIMER_EXPIRE);
                LLE_WUNLOCK(ln);
                return;
        }

        ifp = ln->lle_tbl->llt_ifp;

        if (ln->la_flags & LLE_VALID || gc) {
                struct sockaddr_in6 sin6;
                const char *lladdr;

                sockaddr_in6_init(&sin6, &ln->r_l3addr.addr6, 0, 0, 0);
                lladdr = ln->la_flags & LLE_VALID ?
                    (const char *)&ln->ll_addr : NULL;
                rt_clonedmsg(RTM_DELETE, NULL, sin6tosa(&sin6), lladdr, ifp);
        }

        /*
         * Save to unlock. We still hold an extra reference and will not
         * free(9) in llentry_free() if someone else holds one as well.
         */
        LLE_WUNLOCK(ln);
        IF_AFDATA_LOCK(ifp);
        LLE_WLOCK(ln);

        lltable_free_entry(LLTABLE6(ifp), ln);

        IF_AFDATA_UNLOCK(ifp);
}

/*
 * Upper-layer reachability hint for Neighbor Unreachability Detection.
 *
 * XXX cost-effective methods?
 */
void
nd6_nud_hint(struct rtentry *rt)
{
        struct llentry *ln;
        struct ifnet *ifp;

        if (rt == NULL)
                return;

        ifp = rt->rt_ifp;
        ln = nd6_lookup(&(satocsin6(rt_getkey(rt)))->sin6_addr, ifp, true);
        nd_nud_hint(ln);
}

struct gc_args {
        int gc_entries;
        const struct in6_addr *skip_in6;
};

static int
nd6_purge_entry(struct lltable *llt, struct llentry *ln, void *farg)
{
        struct gc_args *args = farg;
        int *n = &args->gc_entries;
        const struct in6_addr *skip_in6 = args->skip_in6;

        if (*n <= 0)
                return 0;

        if (ND_IS_LLINFO_PERMANENT(ln))
                return 0;

        if (IN6_ARE_ADDR_EQUAL(&ln->r_l3addr.addr6, skip_in6))
                return 0;

        LLE_WLOCK(ln);
        if (ln->ln_state > ND_LLINFO_INCOMPLETE)
                ln->ln_state = ND_LLINFO_STALE;
        else
                ln->ln_state = ND_LLINFO_PURGE;
        nd_set_timer(ln, ND_TIMER_IMMEDIATE);
        LLE_WUNLOCK(ln);

        (*n)--;
        return 0;
}

static void
nd6_gc_neighbors(struct lltable *llt, const struct in6_addr *in6)
{

        if (ip6_neighborgcthresh >= 0 &&
            lltable_get_entry_count(llt) >= ip6_neighborgcthresh) {
                struct gc_args gc_args = {10, in6};
                /*
                 * XXX entries that are "less recently used" should be
                 * freed first.
                 */
                lltable_foreach_lle(llt, nd6_purge_entry, &gc_args);
        }
}

void
nd6_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info)
{
        struct sockaddr *gate = rt->rt_gateway;
        struct ifnet *ifp = rt->rt_ifp;
        uint8_t namelen = strlen(ifp->if_xname), addrlen = ifp->if_addrlen;
        struct ifaddr *ifa;

        RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt));

        if (req == RTM_LLINFO_UPD) {
                int rc;
                struct in6_addr *in6;
                struct in6_addr in6_all;
                int anycast;

                if ((ifa = info->rti_ifa) == NULL)
                        return;

                in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
                anycast = ifatoia6(ifa)->ia6_flags & IN6_IFF_ANYCAST;

                in6_all = in6addr_linklocal_allnodes;
                if ((rc = in6_setscope(&in6_all, ifa->ifa_ifp, NULL)) != 0) {
                        log(LOG_ERR, "%s: failed to set scope %s "
                            "(errno=%d)\n", __func__, if_name(ifp), rc);
                        return;
                }

                /* XXX don't set Override for proxy addresses */
                nd6_na_output(ifa->ifa_ifp, &in6_all, in6,
                    (anycast ? 0 : ND_NA_FLAG_OVERRIDE)
#if 0
                    | (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0)
#endif
                    , 1, NULL);
                return;
        }

        if ((rt->rt_flags & RTF_GATEWAY) != 0) {
                if (req != RTM_ADD)
                        return;
                /*
                 * linklayers with particular MTU limitation.
                 */
                switch(ifp->if_type) {
#if NARCNET > 0
                case IFT_ARCNET:
                        if (rt->rt_rmx.rmx_mtu > ARC_PHDS_MAXMTU) /* RFC2497 */
                                rt->rt_rmx.rmx_mtu = ARC_PHDS_MAXMTU;
                        break;
#endif
                }
                return;
        }

        if (nd6_need_cache(ifp) == 0 && (rt->rt_flags & RTF_HOST) == 0) {
                RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt));
                /*
                 * This is probably an interface direct route for a link
                 * which does not need neighbor caches (e.g. fe80::%lo0/64).
                 * We do not need special treatment below for such a route.
                 * Moreover, the RTF_LLINFO flag which would be set below
                 * would annoy the ndp(8) command.
                 */
                return;
        }

        switch (req) {
        case RTM_ADD: {
                struct psref psref;

                RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt));
                /*
                 * There is no backward compatibility :)
                 *
                 * if ((rt->rt_flags & RTF_HOST) == 0 &&
                 *     SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
                 *           rt->rt_flags |= RTF_CLONING;
                 */
                /* XXX should move to route.c? */
                if (rt->rt_flags & (RTF_CONNECTED | RTF_LOCAL)) {
                        union {
                                struct sockaddr sa;
                                struct sockaddr_dl sdl;
                                struct sockaddr_storage ss;
                        } u;
                        /*
                         * Case 1: This route should come from a route to
                         * interface (RTF_CLONING case) or the route should be
                         * treated as on-link but is currently not
                         * (RTF_LLINFO && ln == NULL case).
                         */
                        if (sockaddr_dl_init(&u.sdl, sizeof(u.ss),
                            ifp->if_index, ifp->if_type,
                            NULL, namelen, NULL, addrlen) == NULL) {
                                printf("%s.%d: sockaddr_dl_init(, %zu, ) "
                                    "failed on %s\n", __func__, __LINE__,
                                    sizeof(u.ss), if_name(ifp));
                        }
                        rt_setgate(rt, &u.sa);
                        gate = rt->rt_gateway;
                        RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt));
                        if (gate == NULL) {
                                log(LOG_ERR,
                                    "%s: rt_setgate failed on %s\n", __func__,
                                    if_name(ifp));
                                break;
                        }

                        RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt));
                        if ((rt->rt_flags & RTF_CONNECTED) != 0)
                                break;
                }
                RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt));
                /*
                 * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here.
                 * We don't do that here since llinfo is not ready yet.
                 *
                 * There are also couple of other things to be discussed:
                 * - unsolicited NA code needs improvement beforehand
                 * - RFC2461 says we MAY send multicast unsolicited NA
                 *   (7.2.6 paragraph 4), however, it also says that we
                 *   SHOULD provide a mechanism to prevent multicast NA storm.
                 *   we don't have anything like it right now.
                 *   note that the mechanism needs a mutual agreement
                 *   between proxies, which means that we need to implement
                 *   a new protocol, or a new kludge.
                 * - from RFC2461 6.2.4, host MUST NOT send an unsolicited NA.
                 *   we need to check ip6forwarding before sending it.
                 *   (or should we allow proxy ND configuration only for
                 *   routers?  there's no mention about proxy ND from hosts)
                 */
#if 0
                /* XXX it does not work */
                if (rt->rt_flags & RTF_ANNOUNCE)
                        nd6_na_output(ifp,
                              &satocsin6(rt_getkey(rt))->sin6_addr,
                              &satocsin6(rt_getkey(rt))->sin6_addr,
                              ip6_forwarding ? ND_NA_FLAG_ROUTER : 0,
                              1, NULL);
#endif

                if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) == 0) {
                        RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt));
                        /*
                         * Address resolution isn't necessary for a point to
                         * point link, so we can skip this test for a p2p link.
                         */
                        if (gate->sa_family != AF_LINK ||
                            gate->sa_len <
                            sockaddr_dl_measure(namelen, addrlen)) {
                                log(LOG_DEBUG,
                                    "nd6_rtrequest: bad gateway value: %s\n",
                                    if_name(ifp));
                                break;
                        }
                        satosdl(gate)->sdl_type = ifp->if_type;
                        satosdl(gate)->sdl_index = ifp->if_index;
                        RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt));
                }
                RT_DPRINTF("rt_getkey(rt) = %p\n", rt_getkey(rt));

                /*
                 * When called from rt_ifa_addlocal, we cannot depend on that
                 * the address (rt_getkey(rt)) exits in the address list of the
                 * interface. So check RTF_LOCAL instead.
                 */
                if (rt->rt_flags & RTF_LOCAL) {
                        if (nd6_useloopback)
                                rt->rt_ifp = lo0ifp;        /* XXX */
                        break;
                }

                /*
                 * check if rt_getkey(rt) is an address assigned
                 * to the interface.
                 */
                ifa = (struct ifaddr *)in6ifa_ifpwithaddr_psref(ifp,
                    &satocsin6(rt_getkey(rt))->sin6_addr, &psref);
                if (ifa != NULL) {
                        if (nd6_useloopback) {
                                rt->rt_ifp = lo0ifp;        /* XXX */
                                /*
                                 * Make sure rt_ifa be equal to the ifaddr
                                 * corresponding to the address.
                                 * We need this because when we refer
                                 * rt_ifa->ia6_flags in ip6_input, we assume
                                 * that the rt_ifa points to the address instead
                                 * of the loopback address.
                                 */
                                if (!ISSET(info->rti_flags, RTF_DONTCHANGEIFA)
                                    && ifa != rt->rt_ifa)
                                        rt_replace_ifa(rt, ifa);
                        }
                } else if (rt->rt_flags & RTF_ANNOUNCE) {
                        /* join solicited node multicast for proxy ND */
                        if (ifp->if_flags & IFF_MULTICAST) {
                                struct in6_addr llsol;
                                int error;

                                llsol = satocsin6(rt_getkey(rt))->sin6_addr;
                                llsol.s6_addr32[0] = htonl(0xff020000);
                                llsol.s6_addr32[1] = 0;
                                llsol.s6_addr32[2] = htonl(1);
                                llsol.s6_addr8[12] = 0xff;
                                if (in6_setscope(&llsol, ifp, NULL))
                                        goto out;
                                if (!in6_addmulti(&llsol, ifp, &error, 0)) {
                                        char ip6buf[INET6_ADDRSTRLEN];
                                        nd6log(LOG_ERR, "%s: failed to join "
                                            "%s (errno=%d)\n", if_name(ifp),
                                            IN6_PRINT(ip6buf, &llsol), error);
                                }
                        }
                }
        out:
                ifa_release(ifa, &psref);
                /*
                 * If we have too many cache entries, initiate immediate
                 * purging for some entries.
                 */
                if (rt->rt_ifp != NULL)
                        nd6_gc_neighbors(LLTABLE6(rt->rt_ifp), NULL);
                break;
            }

        case RTM_DELETE:
                /* leave from solicited node multicast for proxy ND */
                if ((rt->rt_flags & RTF_ANNOUNCE) != 0 &&
                    (ifp->if_flags & IFF_MULTICAST) != 0) {
                        struct in6_addr llsol;

                        llsol = satocsin6(rt_getkey(rt))->sin6_addr;
                        llsol.s6_addr32[0] = htonl(0xff020000);
                        llsol.s6_addr32[1] = 0;
                        llsol.s6_addr32[2] = htonl(1);
                        llsol.s6_addr8[12] = 0xff;
                        if (in6_setscope(&llsol, ifp, NULL) == 0)
                                in6_lookup_and_delete_multi(&llsol, ifp);
                }
                break;
        }
}

static void
nd6_setifflags(struct ifnet *ifp, uint32_t flags)
{
        struct nd_kifinfo *ndi = ND_IFINFO(ifp);
        struct ifaddr *ifa;
        struct in6_ifaddr *ia;
        int s;

        if (ndi->flags & ND6_IFF_IFDISABLED && !(flags & ND6_IFF_IFDISABLED)) {
                /*
                 * If the interface is marked as ND6_IFF_IFDISABLED and
                 * has a link-local address with IN6_IFF_DUPLICATED,
                 * do not clear ND6_IFF_IFDISABLED.
                 * See RFC 4862, section 5.4.5.
                 */
                bool duplicated_linklocal = false;

                s = pserialize_read_enter();
                IFADDR_READER_FOREACH(ifa, ifp) {
                        if (ifa->ifa_addr->sa_family != AF_INET6)
                                continue;
                        ia = (struct in6_ifaddr *)ifa;
                        if ((ia->ia6_flags & IN6_IFF_DUPLICATED) &&
                            IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
                        {
                                duplicated_linklocal = true;
                                break;
                        }
                }
                pserialize_read_exit(s);

                if (duplicated_linklocal) {
                        flags |= ND6_IFF_IFDISABLED;
                        log(LOG_ERR, "%s: Cannot enable an interface"
                            " with a link-local address marked"
                            " duplicate.\n", if_name(ifp));
                } else {
                        ndi->flags &= ~ND6_IFF_IFDISABLED;
                        if (ifp->if_flags & IFF_UP)
                                in6_if_up(ifp);
                }
        } else if (!(ndi->flags & ND6_IFF_IFDISABLED) &&
            (flags & ND6_IFF_IFDISABLED))
        {
                struct psref psref;
                int bound = curlwp_bind();

                /* Mark all IPv6 addresses as tentative. */

                ndi->flags |= ND6_IFF_IFDISABLED;
                s = pserialize_read_enter();
                IFADDR_READER_FOREACH(ifa, ifp) {
                        if (ifa->ifa_addr->sa_family != AF_INET6)
                                continue;
                        ifa_acquire(ifa, &psref);
                        pserialize_read_exit(s);

                        nd6_dad_stop(ifa);

                        ia = (struct in6_ifaddr *)ifa;
                        ia->ia6_flags |= IN6_IFF_TENTATIVE;

                        s = pserialize_read_enter();
                        ifa_release(ifa, &psref);
                }
                pserialize_read_exit(s);
                curlwp_bindx(bound);
        }

        if (flags & ND6_IFF_AUTO_LINKLOCAL) {
                if (!(ndi->flags & ND6_IFF_AUTO_LINKLOCAL)) {
                        /* auto_linklocal 0->1 transition */

                        ndi->flags |= ND6_IFF_AUTO_LINKLOCAL;
                        in6_ifattach(ifp, NULL);
                } else if (!(flags & ND6_IFF_IFDISABLED) &&
                    ifp->if_flags & IFF_UP)
                {
                        /*
                         * When the IF already has
                         * ND6_IFF_AUTO_LINKLOCAL, no link-local
                         * address is assigned, and IFF_UP, try to
                         * assign one.
                         */
                        bool haslinklocal = 0;

                        s = pserialize_read_enter();
                        IFADDR_READER_FOREACH(ifa, ifp) {
                                if (ifa->ifa_addr->sa_family !=AF_INET6)
                                        continue;
                                ia = (struct in6_ifaddr *)ifa;
                                if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))){
                                        haslinklocal = true;
                                        break;
                                }
                        }
                        pserialize_read_exit(s);
                        if (!haslinklocal)
                                in6_ifattach(ifp, NULL);
                }
        }

        ndi->flags = flags;
}

int
nd6_ioctl(u_long cmd, void *data, struct ifnet *ifp)
{
#ifdef OSIOCGIFINFO_IN6_90
        struct in6_ndireq90 *ondi = (struct in6_ndireq90 *)data;
        struct in6_ndifreq90 *ndif = (struct in6_ndifreq90 *)data;
#define OND        ondi->ndi
#endif
        struct in6_ndireq *ndi = (struct in6_ndireq *)data;
        struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
        struct nd_kifinfo *ifndi = ND_IFINFO(ifp);
        int error = 0;
#define ND     ndi->ndi

        switch (cmd) {
#ifdef OSIOCSRTRFLUSH_IN6
        case OSIOCGDRLST_IN6:                /* FALLTHROUGH */
        case OSIOCGPRLST_IN6:                /* FALLTHROUGH */
        case OSIOCSNDFLUSH_IN6:                /* FALLTHROUGH */
        case OSIOCSPFXFLUSH_IN6:        /* FALLTHROUGH */
        case OSIOCSRTRFLUSH_IN6:        /* FALLTHROUGH */
                break;
        case OSIOCGDEFIFACE_IN6:
                ndif->ifindex = 0;
                break;
        case OSIOCSDEFIFACE_IN6:
                error = ENOTSUP;
                break;
#endif
#ifdef OSIOCGIFINFO_IN6
        case OSIOCGIFINFO_IN6:                /* FALLTHROUGH */
#endif
#ifdef OSIOCGIFINFO_IN6_90
        case OSIOCGIFINFO_IN6_90:
                memset(&OND, 0, sizeof(OND));
                OND.initialized = 1;
                OND.chlim = ifndi->chlim;
                OND.basereachable = ifndi->basereachable;
                OND.retrans = ifndi->retrans;
                OND.flags = ifndi->flags;
                break;
        case OSIOCSIFINFO_IN6_90:
                /* Allow userland to set Neighour Unreachability Detection
                 * timers. */
                if (OND.chlim != 0)
                        ifndi->chlim = OND.chlim;
                if (OND.basereachable != 0 &&
                    OND.basereachable != ifndi->basereachable)
                {
                        ifndi->basereachable = OND.basereachable;
                        ifndi->reachable = ND_COMPUTE_RTIME(OND.basereachable);
                }
                if (OND.retrans != 0)
                        ifndi->retrans = OND.retrans;
                /* Retain the old behaviour .... */
                /* FALLTHROUGH */
        case OSIOCSIFINFO_FLAGS_90:
                nd6_setifflags(ifp, OND.flags);
                break;
#undef OND
#endif
        case SIOCGIFINFO_IN6:
                ND.chlim = ifndi->chlim;
                ND.basereachable = ifndi->basereachable;
                ND.retrans = ifndi->retrans;
                ND.flags = ifndi->flags;
                break;
        case SIOCSIFINFO_IN6:
                /* Allow userland to set Neighour Unreachability Detection
                 * timers. */
                if (ND.chlim != 0)
                        ifndi->chlim = ND.chlim;
                if (ND.basereachable != 0 &&
                    ND.basereachable != ifndi->basereachable)
                {
                        ifndi->basereachable = ND.basereachable;
                        ifndi->reachable = ND_COMPUTE_RTIME(ND.basereachable);
                }
                if (ND.retrans != 0)
                        ifndi->retrans = ND.retrans;
                break;
        case SIOCSIFINFO_FLAGS:
                nd6_setifflags(ifp, ND.flags);
                break;
#undef ND
        case SIOCGNBRINFO_IN6:
        {
                struct llentry *ln;
                struct in6_addr nb_addr = nbi->addr; /* make local for safety */

                if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0)
                        return error;

                ln = nd6_lookup(&nb_addr, ifp, false);
                if (ln == NULL) {
                        error = EINVAL;
                        break;
                }
                nbi->state = ln->ln_state;
                nbi->asked = ln->ln_asked;
                nbi->isrouter = ln->ln_router;
                nbi->expire = ln->ln_expire ?
                    time_mono_to_wall(ln->ln_expire) : 0;
                LLE_RUNLOCK(ln);

                break;
        }
        }
        return error;
}

void
nd6_llinfo_release_pkts(struct llentry *ln, struct ifnet *ifp)
{
        struct mbuf *m_hold, *m_hold_next;
        struct sockaddr_in6 sin6;

        LLE_WLOCK_ASSERT(ln);

        sockaddr_in6_init(&sin6, &ln->r_l3addr.addr6, 0, 0, 0);

        m_hold = ln->la_hold, ln->la_hold = NULL, ln->la_numheld = 0;

        LLE_ADDREF(ln);
        LLE_WUNLOCK(ln);
        for (; m_hold != NULL; m_hold = m_hold_next) {
                m_hold_next = m_hold->m_nextpkt;
                m_hold->m_nextpkt = NULL;

                /*
                 * we assume ifp is not a p2p here, so
                 * just set the 2nd argument as the 
                 * 1st one.
                 */
                ip6_if_output(ifp, ifp, m_hold, &sin6, NULL);
        }
        LLE_WLOCK(ln);
        LLE_REMREF(ln);
}

/*
 * Create neighbor cache entry and cache link-layer address,
 * on reception of inbound ND6 packets.  (RS/RA/NS/redirect)
 */
void
nd6_cache_lladdr(
    struct ifnet *ifp,
    struct in6_addr *from,
    char *lladdr,
    int lladdrlen,
    int type,        /* ICMP6 type */
    int code        /* type dependent information */
)
{
        struct llentry *ln = NULL;
        int is_newentry;
        int do_update;
        int olladdr;
        int llchange;
        int newstate = 0;

        KASSERT(ifp != NULL);
        KASSERT(from != NULL);

        /* nothing must be updated for unspecified address */
        if (IN6_IS_ADDR_UNSPECIFIED(from))
                return;

        /*
         * Validation about ifp->if_addrlen and lladdrlen must be done in
         * the caller.
         *
         * XXX If the link does not have link-layer adderss, what should
         * we do? (ifp->if_addrlen == 0)
         * Spec says nothing in sections for RA, RS and NA.  There's small
         * description on it in NS section (RFC 2461 7.2.3).
         */

        ln = nd6_lookup(from, ifp, true);
        if (ln == NULL) {
#if 0
                /* nothing must be done if there's no lladdr */
                if (!lladdr || !lladdrlen)
                        return NULL;
#endif

                ln = nd6_create(from, ifp);
                is_newentry = 1;
        } else {
                /* do nothing if static ndp is set */
                if (ln->la_flags & LLE_STATIC) {
                        LLE_WUNLOCK(ln);
                        return;
                }
                is_newentry = 0;
        }

        if (ln == NULL)
                return;

        olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
        if (olladdr && lladdr) {
                llchange = memcmp(lladdr, &ln->ll_addr, ifp->if_addrlen);
        } else
                llchange = 0;

        /*
         * newentry olladdr  lladdr  llchange        (*=record)
         *        0        n        n        --        (1)
         *        0        y        n        --        (2)
         *        0        n        y        --        (3) * STALE
         *        0        y        y        n        (4) *
         *        0        y        y        y        (5) * STALE
         *        1        --        n        --        (6)   NOSTATE(= PASSIVE)
         *        1        --        y        --        (7) * STALE
         */

        if (lladdr) {                /* (3-5) and (7) */
                /*
                 * Record source link-layer address
                 * XXX is it dependent to ifp->if_type?
                 */
                memcpy(&ln->ll_addr, lladdr, ifp->if_addrlen);
                ln->la_flags |= LLE_VALID;
        }

        if (!is_newentry) {
                if ((!olladdr && lladdr) ||                /* (3) */
                    (olladdr && lladdr && llchange)) {        /* (5) */
                        do_update = 1;
                        newstate = ND_LLINFO_STALE;
                } else                                        /* (1-2,4) */
                        do_update = 0;
        } else {
                do_update = 1;
                if (lladdr == NULL)                        /* (6) */
                        newstate = ND_LLINFO_NOSTATE;
                else                                        /* (7) */
                        newstate = ND_LLINFO_STALE;
        }

        if (do_update) {
                /*
                 * Update the state of the neighbor cache.
                 */
                ln->ln_state = newstate;

                if (ln->ln_state == ND_LLINFO_STALE) {
                        /*
                         * XXX: since nd6_output() below will cause
                         * state tansition to DELAY and reset the timer,
                         * we must set the timer now, although it is actually
                         * meaningless.
                         */
                        nd_set_timer(ln, ND_TIMER_GC);

                        nd6_llinfo_release_pkts(ln, ifp);
                } else if (ln->ln_state == ND_LLINFO_INCOMPLETE) {
                        /* probe right away */
                        nd_set_timer(ln, ND_TIMER_IMMEDIATE);
                }
        }

        /*
         * ICMP6 type dependent behavior.
         *
         * NS: clear IsRouter if new entry
         * RS: clear IsRouter
         * RA: set IsRouter if there's lladdr
         * redir: clear IsRouter if new entry
         *
         * RA case, (1):
         * The spec says that we must set IsRouter in the following cases:
         * - If lladdr exist, set IsRouter.  This means (1-5).
         * - If it is old entry (!newentry), set IsRouter.  This means (7).
         * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
         * A question arises for (1) case.  (1) case has no lladdr in the
         * neighbor cache, this is similar to (6).
         * This case is rare but we figured that we MUST NOT set IsRouter.
         *
         * newentry olladdr  lladdr  llchange            NS  RS  RA        redir
         *                                                        D R
         *        0        n        n        --        (1)        c   ?     s
         *        0        y        n        --        (2)        c   s     s
         *        0        n        y        --        (3)        c   s     s
         *        0        y        y        n        (4)        c   s     s
         *        0        y        y        y        (5)        c   s     s
         *        1        --        n        --        (6) c        c         c s
         *        1        --        y        --        (7) c        c   s        c s
         *
         *                                        (c=clear s=set)
         */
        switch (type & 0xff) {
        case ND_NEIGHBOR_SOLICIT:
                /*
                 * New entry must have is_router flag cleared.
                 */
                if (is_newentry)        /* (6-7) */
                        ln->ln_router = 0;
                break;
        case ND_REDIRECT:
                /*
                 * If the icmp is a redirect to a better router, always set the
                 * is_router flag.  Otherwise, if the entry is newly created,
                 * clear the flag.  [RFC 2461, sec 8.3]
                 */
                if (code == ND_REDIRECT_ROUTER)
                        ln->ln_router = 1;
                else if (is_newentry) /* (6-7) */
                        ln->ln_router = 0;
                break;
        case ND_ROUTER_SOLICIT:
                /*
                 * is_router flag must always be cleared.
                 */
                ln->ln_router = 0;
                break;
        case ND_ROUTER_ADVERT:
                /*
                 * Mark an entry with lladdr as a router.
                 */
                if ((!is_newentry && (olladdr || lladdr)) ||        /* (2-5) */
                    (is_newentry && lladdr)) {                        /* (7) */
                        ln->ln_router = 1;
                }
                break;
        }

        if (do_update && lladdr != NULL) {
                struct sockaddr_in6 sin6;

                sockaddr_in6_init(&sin6, from, 0, 0, 0);
                rt_clonedmsg(is_newentry ? RTM_ADD : RTM_CHANGE,
                    NULL, sin6tosa(&sin6), lladdr, ifp);
        }

        if (ln != NULL)
                LLE_WUNLOCK(ln);

        /*
         * If we have too many cache entries, initiate immediate
         * purging for some entries.
         */
        if (is_newentry)
                nd6_gc_neighbors(LLTABLE6(ifp), &ln->r_l3addr.addr6);
}

static void
nd6_slowtimo(void *ignored_arg)
{
        struct nd_kifinfo *ndi;
        struct ifnet *ifp;
        int s;

        SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
        callout_reset(&nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
            nd6_slowtimo, NULL);

        s = pserialize_read_enter();
        IFNET_READER_FOREACH(ifp) {
                ndi = ND_IFINFO(ifp);
                if (ndi->basereachable && /* already initialized */
                    (ndi->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
                        /*
                         * Since reachable time rarely changes by router
                         * advertisements, we SHOULD insure that a new random
                         * value gets recomputed at least once every few hours.
                         * (RFC 2461, 6.3.4)
                         */
                        ndi->recalctm = nd6_recalc_reachtm_interval;
                        ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable);
                }
        }
        pserialize_read_exit(s);

        SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
}

/*
 * Return 0 if a neighbor cache is found. Return EWOULDBLOCK if a cache is not
 * found and trying to resolve a neighbor; in this case the mbuf is queued in
 * the list. Otherwise return errno after freeing the mbuf.
 */
int
nd6_resolve(struct ifnet *ifp, const struct rtentry *rt, struct mbuf *m,
    const struct sockaddr *_dst, uint8_t *lldst, size_t dstsize)
{
        struct llentry *ln = NULL;
        bool created = false;
        const struct sockaddr_in6 *dst = satocsin6(_dst);
        int error;
        struct nd_kifinfo *ndi = ND_IFINFO(ifp);

        /* discard the packet if IPv6 operation is disabled on the interface */
        if (ndi->flags & ND6_IFF_IFDISABLED) {
                m_freem(m);
                return ENETDOWN; /* better error? */
        }

        /*
         * Address resolution or Neighbor Unreachability Detection
         * for the next hop.
         * At this point, the destination of the packet must be a unicast
         * or an anycast address(i.e. not a multicast).
         */

        /* Look up the neighbor cache for the nexthop */
        ln = nd6_lookup(&dst->sin6_addr, ifp, false);

        if (ln != NULL && (ln->la_flags & LLE_VALID) != 0 &&
            ln->ln_state == ND_LLINFO_REACHABLE) {
                /* Fast path */
                memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen));
                LLE_RUNLOCK(ln);
                return 0;
        }
        if (ln != NULL)
                LLE_RUNLOCK(ln);

        /* Slow path */
        ln = nd6_lookup(&dst->sin6_addr, ifp, true);
        if (ln == NULL && nd6_is_addr_neighbor(dst, ifp))  {
                /*
                 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
                 * the condition below is not very efficient.  But we believe
                 * it is tolerable, because this should be a rare case.
                 */
                ln = nd6_create(&dst->sin6_addr, ifp);
                if (ln == NULL) {
                        char ip6buf[INET6_ADDRSTRLEN];
                        log(LOG_DEBUG,
                            "%s: can't allocate llinfo for %s "
                            "(ln=%p, rt=%p)\n", __func__,
                            IN6_PRINT(ip6buf, &dst->sin6_addr), ln, rt);
                        m_freem(m);
                        return ENOBUFS;
                }
                created = true;
        }

        if (ln == NULL) {
                m_freem(m);
                return ENETDOWN; /* better error? */
        }

        error = nd_resolve(ln, rt, m, lldst, dstsize);

        if (created)
                nd6_gc_neighbors(LLTABLE6(ifp), &dst->sin6_addr);

        return error;
}

int
nd6_need_cache(struct ifnet *ifp)
{
        /*
         * XXX: we currently do not make neighbor cache on any interface
         * other than ARCnet, Ethernet, and GIF.
         *
         * RFC2893 says:
         * - unidirectional tunnels needs no ND
         */
        switch (ifp->if_type) {
        case IFT_ARCNET:
        case IFT_ETHER:
        case IFT_IEEE1394:
        case IFT_CARP:
        case IFT_GIF:                /* XXX need more cases? */
        case IFT_PPP:
        case IFT_TUNNEL:
                return 1;
        default:
                return 0;
        }
}

int
nd6_sysctl(
    int name,
    void *oldp,        /* syscall arg, need copyout */
    size_t *oldlenp,
    void *newp,        /* syscall arg, need copyin */
    size_t newlen
)
{

        if (newp)
                return EPERM;

        switch (name) {
#ifdef COMPAT_90
        case OICMPV6CTL_ND6_DRLIST: /* FALLTHROUGH */
        case OICMPV6CTL_ND6_PRLIST:
                *oldlenp = 0;
                return 0;
#endif
        case ICMPV6CTL_ND6_MAXQLEN:
                return 0;
        default:
                return ENOPROTOOPT;
        }
}






















































































































    3 




    3 




    3 













































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
/*        $NetBSD: uep.c,v 1.25 2021/08/07 16:19:17 thorpej Exp $        */

/*
 * Copyright (c) 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Tyler C. Sarna (tsarna@netbsd.org).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 *  eGalax USB touchpanel controller driver.
 */
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uep.c,v 1.25 2021/08/07 16:19:17 thorpej Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/vnode.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usb_quirks.h>

#include <dev/wscons/wsconsio.h>
#include <dev/wscons/wsmousevar.h>
#include <dev/wscons/tpcalibvar.h>

#define UIDSTR        "eGalax USB SN000000"
/* calibration - integer values, perhaps sysctls?  */
#define X_RATIO   293 
#define X_OFFSET  -28
#define Y_RATIO   -348
#define Y_OFFSET  537
/* an X_RATIO of ``312''  means : reduce by a factor 3.12 x axis amplitude */
/* an Y_RATIO of ``-157'' means : reduce by a factor 1.57 y axis amplitude,
 * and reverse y motion */

struct uep_softc {
        device_t sc_dev;
        struct usbd_device *sc_udev;        /* device */
        struct usbd_interface *sc_iface;        /* interface */
        int sc_iface_number;

        int                        sc_intr_number; /* interrupt number */
        struct usbd_pipe *        sc_intr_pipe;        /* interrupt pipe */
        u_char                        *sc_ibuf;
        int                        sc_isize;

        device_t                sc_wsmousedev;        /* wsmouse device */
        struct tpcalib_softc        sc_tpcalib;        /* calibration */

        u_char sc_enabled;
        u_char sc_dying;
};

static struct wsmouse_calibcoords default_calib = {
        .minx = 0,
        .miny = 0,
        .maxx = 2047,
        .maxy = 2047,
        .samplelen = WSMOUSE_CALIBCOORDS_RESET,
};

Static void uep_intr(struct usbd_xfer *, void *, usbd_status);

Static int        uep_enable(void *);
Static void        uep_disable(void *);
Static int        uep_ioctl(void *, u_long, void *, int, struct lwp *);

static const struct wsmouse_accessops uep_accessops = {
        uep_enable,
        uep_ioctl,
        uep_disable,
};

static int uep_match(device_t, cfdata_t, void *);
static void uep_attach(device_t, device_t, void *);
static void uep_childdet(device_t, device_t);
static int uep_detach(device_t, int);
static int uep_activate(device_t, enum devact);

CFATTACH_DECL2_NEW(uep, sizeof(struct uep_softc), uep_match, uep_attach,
    uep_detach, uep_activate, NULL, uep_childdet);

static int
uep_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        if ((uaa->uaa_vendor == USB_VENDOR_EGALAX) && (
                (uaa->uaa_product == USB_PRODUCT_EGALAX_TPANEL)
                || (uaa->uaa_product == USB_PRODUCT_EGALAX_TPANEL2)))
                return UMATCH_VENDOR_PRODUCT;

        if ((uaa->uaa_vendor == USB_VENDOR_EGALAX2)
        &&  (uaa->uaa_product == USB_PRODUCT_EGALAX2_TPANEL))
                return UMATCH_VENDOR_PRODUCT;


        return UMATCH_NONE;
}

static void
uep_attach(device_t parent, device_t self, void *aux)
{
        struct uep_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        usb_config_descriptor_t *cdesc;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        usb_device_request_t req;
        uByte act;
        struct wsmousedev_attach_args a;
        char *devinfop;
        usbd_status err;
        int i;

        sc->sc_dev = self;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);
        sc->sc_udev = dev;
        sc->sc_intr_number = -1;
        sc->sc_intr_pipe = NULL;
        sc->sc_enabled = sc->sc_isize = 0;

        /* Move the device into the configured state. */
        err = usbd_set_config_index(dev, 0, 1);
        if (err) {
                aprint_error("\n%s: failed to set configuration, err=%s\n",
                        device_xname(sc->sc_dev),  usbd_errstr(err));
                sc->sc_dying = 1;
                return;
        }

        /* get the config descriptor */
        cdesc = usbd_get_config_descriptor(sc->sc_udev);
        if (cdesc == NULL) {
                aprint_error_dev(self,
                    "failed to get configuration descriptor\n");
                sc->sc_dying = 1;
                return;
        }

        /* get the interface */
        err = usbd_device2interface_handle(dev, 0, &sc->sc_iface);
        if (err) {
                aprint_error("\n%s: failed to get interface, err=%s\n",
                        device_xname(sc->sc_dev), usbd_errstr(err));
                sc->sc_dying = 1;
                return;
        }

        /* Find the interrupt endpoint */
        id = usbd_get_interface_descriptor(sc->sc_iface);
        sc->sc_iface_number = id->bInterfaceNumber;

        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "no endpoint descriptor for %d\n", i);
                        sc->sc_dying = 1;
                        return;
                }

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        sc->sc_intr_number = ed->bEndpointAddress;
                        sc->sc_isize = UGETW(ed->wMaxPacketSize);
                }
        }

        if (sc->sc_intr_number== -1) {
                aprint_error_dev(self, "Could not find interrupt in\n");
                sc->sc_dying = 1;
                return;
        }

        /* Newer controllers need an activation command */
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = 0x0a;
        USETW(req.wValue, 'A');
        USETW(req.wIndex, 0);
        USETW(req.wLength, 1);
        usbd_do_request(dev, &req, &act);

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        a.accessops = &uep_accessops;
        a.accesscookie = sc;

        sc->sc_wsmousedev = config_found(self, &a, wsmousedevprint, CFARGS_NONE);

        tpcalib_init(&sc->sc_tpcalib);
        tpcalib_ioctl(&sc->sc_tpcalib, WSMOUSEIO_SCALIBCOORDS,
                (void *)&default_calib, 0, 0);

        return;
}

static int
uep_detach(device_t self, int flags)
{
        struct uep_softc *sc = device_private(self);
        int rv = 0;

        if (sc->sc_intr_pipe != NULL) {
                usbd_abort_pipe(sc->sc_intr_pipe);
                usbd_close_pipe(sc->sc_intr_pipe);
                sc->sc_intr_pipe = NULL;
        }
        sc->sc_dying = 1;

        /* save current calib as defaults */
        default_calib = sc->sc_tpcalib.sc_saved;

        if (sc->sc_wsmousedev != NULL)
                rv = config_detach(sc->sc_wsmousedev, flags);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);
        return rv;
}

static void
uep_childdet(device_t self, device_t child)
{
        struct uep_softc *sc = device_private(self);

        KASSERT(sc->sc_wsmousedev == child);
        sc->sc_wsmousedev = NULL;
}

static int
uep_activate(device_t self, enum devact act)
{
        struct uep_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

Static int
uep_enable(void *v)
{
        struct uep_softc *sc = v;
        int err;

        if (sc->sc_dying)
                return EIO;

        if (sc->sc_enabled)
                return EBUSY;

        if (sc->sc_isize == 0)
                return 0;

        sc->sc_ibuf = kmem_alloc(sc->sc_isize, KM_SLEEP);
        err = usbd_open_pipe_intr(sc->sc_iface, sc->sc_intr_number,
                USBD_SHORT_XFER_OK, &sc->sc_intr_pipe, sc, sc->sc_ibuf,
                sc->sc_isize, uep_intr, USBD_DEFAULT_INTERVAL);
        if (err) {
                kmem_free(sc->sc_ibuf, sc->sc_isize);
                sc->sc_intr_pipe = NULL;
                return EIO;
        }

        sc->sc_enabled = 1;

        return 0;
}

Static void
uep_disable(void *v)
{
        struct uep_softc *sc = v;

        if (!sc->sc_enabled) {
                printf("uep_disable: already disabled!\n");
                return;
        }

        /* Disable interrupts. */
        if (sc->sc_intr_pipe != NULL) {
                usbd_abort_pipe(sc->sc_intr_pipe);
                usbd_close_pipe(sc->sc_intr_pipe);
                sc->sc_intr_pipe = NULL;
        }

        if (sc->sc_ibuf != NULL) {
                kmem_free(sc->sc_ibuf, sc->sc_isize);
                sc->sc_ibuf = NULL;
        }

        sc->sc_enabled = 0;
}

Static int
uep_ioctl(void *v, u_long cmd, void *data, int flag, struct lwp *l)
{
        struct uep_softc *sc = v;
        struct wsmouse_id *id;

        switch (cmd) {
        case WSMOUSEIO_GTYPE:
                *(u_int *)data = WSMOUSE_TYPE_TPANEL;
                return 0;

        case WSMOUSEIO_GETID:
                /*
                 * return unique ID string
                 * "<vendor> <model> <serial number>"
                 * unfortunately we have no serial number...
                 */
                 id = (struct wsmouse_id *)data;
                 if (id->type != WSMOUSE_ID_TYPE_UIDSTR)
                         return EINVAL;

                strcpy(id->data, UIDSTR);
                id->length = strlen(UIDSTR);
                return 0;

        case WSMOUSEIO_SCALIBCOORDS:
        case WSMOUSEIO_GCALIBCOORDS:
                return tpcalib_ioctl(&sc->sc_tpcalib, cmd, data, flag, l);
        }

        return EPASSTHROUGH;
}

static int
uep_adjust(int v, int off, int rat)
{
        int num = 100 * v;
        int quot = num / rat;
        int rem = num % rat;
        if (num >= 0 && rem < 0)
                quot++;
        return quot + off;
}

void
uep_intr(struct usbd_xfer *xfer, void *addr, usbd_status status)
{
        struct uep_softc *sc = addr;
        u_char *p = sc->sc_ibuf;
        u_char msk;
        uint32_t len;
        int x = 0, y = 0, s;

        usbd_get_xfer_status(xfer, NULL, NULL, &len, NULL);

        if (status == USBD_CANCELLED)
                return;

        if (status != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(sc->sc_dev, "status %d\n", status);
                usbd_clear_endpoint_stall_async(sc->sc_intr_pipe);
                return;
        }

        /* First bit is always set to 1 */
        if ((p[0] & 0x80) != 0x80) {
                aprint_error_dev(sc->sc_dev, "bad input packet format\n");
                return;
        }

        if (sc->sc_wsmousedev != NULL) {
                /*
                 * Each report package may contain 5 or 6 bytes as below:
                 *
                 * Byte 0        1ZM00HLT
                 * Byte 1        0AAAAAAA
                 * Byte 2        0AAAAAAA
                 * Byte 3        0BBBBBBB
                 * Byte 4        0BBBBBBB
                 * Byte 5        0PPPPPPP
                 *
                 * Z: 1=byte 5 is pressure information, 0=no pressure
                 * M: 1=byte 5 is play id, 0=no player id
                 * T: 1=touched, 0=not touched
                 * H,L: Resolution
                 *        0,0: 11 bits
                 *        0,1: 12 bits
                 *        1,0: 13 bits
                 *        1,1: 14 bits
                 * A: bits of axis A position, MSB to LSB
                 * B: bits of axis B position, MSB to LSB
                 *
                 * The packet has six bytes only if Z or M is set.
                 * Byte 5, if sent, is ignored.
                 *
                 * For the unit I have, A = Y and B = X.
                 * I don't know if units exist with A=X and B=Y,
                 * if so we'll cross that bridge when we come to it.
                 *
                 * The controller sends a stream of T=1 events while the
                 * panel is touched, followed by a single T=0 event.
                 */
                switch (p[0] & 0x06) {
                case 0x02:
                        msk = 0x1f;
                        break;
                case 0x04:
                        msk = 0x3f;
                        break;
                case 0x06:
                        msk = 0x7f;
                        break;
                default:
                        msk = 0x0f;        /* H=0, L=0 */
                }
                x = uep_adjust(((p[3] & msk) << 7) | p[4], X_OFFSET, X_RATIO);
                y = uep_adjust(((p[1] & msk) << 7) | p[2], Y_OFFSET, Y_RATIO);

                tpcalib_trans(&sc->sc_tpcalib, x, y, &x, &y);

                s = spltty();
                wsmouse_input(sc->sc_wsmousedev, p[0] & 0x01, x, y, 0, 0,
                        WSMOUSE_INPUT_ABSOLUTE_X | WSMOUSE_INPUT_ABSOLUTE_Y);
                splx(s);
        }
}













































































































    1 





    1 

























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
/* $NetBSD: radio.c,v 1.31 2021/08/07 16:19:08 thorpej Exp $ */
/* $OpenBSD: radio.c,v 1.2 2001/12/05 10:27:06 mickey Exp $ */
/* $RuOBSD: radio.c,v 1.7 2001/12/04 06:03:05 tm Exp $ */

/*
 * Copyright (c) 2001 Maxim Tsyplakov <tm@oganer.net>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/* This is the /dev/radio driver from OpenBSD */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: radio.c,v 1.31 2021/08/07 16:19:08 thorpej Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/ioctl.h>
#include <sys/device.h>
#include <sys/vnode.h>
#include <sys/radioio.h>
#include <sys/conf.h>

#include <dev/radio_if.h>

#include "ioconf.h"

struct radio_softc {
        void                *hw_hdl;        /* hardware driver handle */
        device_t         sc_dev;                /* hardware device struct */
        const struct radio_hw_if *hw_if; /* hardware interface */
};

static int        radioprobe(device_t, cfdata_t, void *);
static void        radioattach(device_t, device_t, void *);
static int        radioprint(void *, const char *);
static int        radiodetach(device_t, int);

CFATTACH_DECL_NEW(radio, sizeof(struct radio_softc),
    radioprobe, radioattach, radiodetach, NULL);

static dev_type_open(radioopen);
static dev_type_close(radioclose);
static dev_type_ioctl(radioioctl);

const struct cdevsw radio_cdevsw = {
        .d_open = radioopen,
        .d_close = radioclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = radioioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER,
};

static int
radioprobe(device_t parent, cfdata_t match, void *aux)
{
        return (1);
}

static void
radioattach(device_t parent, device_t self, void *aux)
{
        struct radio_softc *sc = device_private(self);
        struct radio_attach_args *sa = aux;
        const struct radio_hw_if *hwp = sa->hwif;
        void  *hdlp = sa->hdl;

        aprint_naive("\n");
        aprint_normal("\n");
        sc->hw_if = hwp;
        sc->hw_hdl = hdlp;
        sc->sc_dev = self;
}

static int
radioopen(dev_t dev, int flags, int fmt, struct lwp *l)
{
        int        unit;
        struct radio_softc *sc;

        unit = RADIOUNIT(dev);
        sc = device_lookup_private(&radio_cd, unit);
        if (sc == NULL || sc->hw_if == NULL)
                return (ENXIO);

        if (sc->hw_if->open != NULL)
                return (sc->hw_if->open(sc->hw_hdl, flags, fmt, l->l_proc));
        else
                return (0);
}

static int
radioclose(dev_t dev, int flags, int fmt, struct lwp *l)
{
        struct radio_softc *sc;

        sc = device_lookup_private(&radio_cd, RADIOUNIT(dev));

        if (sc->hw_if->close != NULL)
                return (sc->hw_if->close(sc->hw_hdl, flags, fmt, l->l_proc));
        else
                return (0);
}

static int
radioioctl(dev_t dev, u_long cmd, void *data, int flags,
    struct lwp *l)
{
        struct radio_softc *sc;
        int unit, error;

        unit = RADIOUNIT(dev);
        sc = device_lookup_private(&radio_cd, unit);
        if (sc == NULL || sc->hw_if == NULL)
                return (ENXIO);

        error = EOPNOTSUPP;
        switch (cmd) {
        case RIOCGINFO:
                if (sc->hw_if->get_info)
                        error = (sc->hw_if->get_info)(sc->hw_hdl,
                                        (struct radio_info *)data);
                break;
        case RIOCSINFO:
                if (sc->hw_if->set_info)
                        error = (sc->hw_if->set_info)(sc->hw_hdl,
                                (struct radio_info *)data);
                break;
        case RIOCSSRCH:
                if (sc->hw_if->search)
                        error = (sc->hw_if->search)(sc->hw_hdl,
                                        *(int *)data);
                break;
        default:
                error = EINVAL;
        }

        return (error);
}

/*
 * Called from hardware driver. This is where the MI radio driver gets
 * probed/attached to the hardware driver
 */
device_t
radio_attach_mi(const struct radio_hw_if *rhwp, void *hdlp, device_t dev)
{
        struct radio_attach_args arg;

        arg.hwif = rhwp;
        arg.hdl = hdlp;
        return (config_found(dev, &arg, radioprint, CFARGS_NONE));
}

static int
radioprint(void *aux, const char *pnp)
{
        if (pnp != NULL)
                aprint_normal("radio at %s", pnp);
        return (UNCONF);
}

static int
radiodetach(device_t self, int flags)
{
        int maj, mn;

        /* locate the major number */
        maj = cdevsw_lookup_major(&radio_cdevsw);

        /* Nuke the vnodes for any open instances (calls close). */
        mn = device_unit(self);
        vdevgone(maj, mn, mn, VCHR);

        return (0);
}


































































































































    3 
    3 


























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
/*        $NetBSD: ugensa.c,v 1.45 2021/08/07 16:19:17 thorpej Exp $        */

/*
 * Copyright (c) 2004, 2005 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Roland C. Dowdeswell <elric@netbsd.org>.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ugensa.c,v 1.45 2021/08/07 16:19:17 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/conf.h>
#include <sys/tty.h>

#include <dev/usb/usb.h>

#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <dev/usb/ucomvar.h>

#ifdef UGENSA_DEBUG
#define DPRINTF(x)        if (ugensadebug) printf x
#define DPRINTFN(n,x)        if (ugensadebug>(n)) printf x
int ugensadebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

struct ugensa_softc {
        device_t                sc_dev;                /* base device */

        enum {
                UGENSA_INIT_NONE,
                UGENSA_INIT_INITED
        } sc_init_state;

        struct usbd_device *        sc_udev;        /* device */
        struct usbd_interface *        sc_iface;        /* interface */

        device_t                sc_subdev;
        int                        sc_numcon;

        bool                        sc_dying;
};

struct ucom_methods ugensa_methods = { 0 };

#define UGENSA_CONFIG_INDEX        0
#define UGENSA_IFACE_INDEX        0
#define UGENSA_BUFSIZE                1024

struct ugensa_type {
        struct usb_devno        ugensa_dev;
        uint16_t                ugensa_flags;
#define UNTESTED                0x0001
};

static const struct ugensa_type ugensa_devs[] = {
        {{ USB_VENDOR_AIRPRIME, USB_PRODUCT_AIRPRIME_PC5220 }, 0 },
        {{ USB_VENDOR_DELL, USB_PRODUCT_DELL_HSDPA }, 0 },
        {{ USB_VENDOR_NOVATEL, USB_PRODUCT_NOVATEL_FLEXPACKGPS }, 0 },
        {{ USB_VENDOR_QUALCOMM_K, USB_PRODUCT_QUALCOMM_K_CDMA_MSM_K }, 0 },
        {{ USB_VENDOR_ZTE, USB_PRODUCT_ZTE_AC8700 }, 0 },
        {{ USB_VENDOR_LINUXFOUNDATION, USB_PRODUCT_LINUXFOUNDATION_USB3DEBUG}, 0 },

        /*
         * The following devices are untested, but they are purported to
         * to work in similar device drivers on other OSes:
         */

        {{ USB_VENDOR_ANYDATA, USB_PRODUCT_ANYDATA_ADU_500A }, UNTESTED },
        {{ USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_EXPRESSCARD }, UNTESTED },
        {{ USB_VENDOR_LG, USB_PRODUCT_LG_MSM_HSDPA }, UNTESTED },
        {{ USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_AIRCARD875 }, UNTESTED },
        {{ USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_EM5625 }, UNTESTED },
};
#define ugensa_lookup(v, p) \
        ((const struct ugensa_type *)usb_lookup(ugensa_devs, v, p))

static int        ugensa_match(device_t, cfdata_t, void *);
static void        ugensa_attach(device_t, device_t, void *);
static void        ugensa_childdet(device_t, device_t);
static int        ugensa_detach(device_t, int);

CFATTACH_DECL2_NEW(ugensa, sizeof(struct ugensa_softc), ugensa_match,
    ugensa_attach, ugensa_detach, NULL, NULL, ugensa_childdet);

static int
ugensa_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        DPRINTFN(20,("ugensa: vendor=%#x, product=%#x\n",
                     uaa->uaa_vendor, uaa->uaa_product));

        return ugensa_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
ugensa_attach(device_t parent, device_t self, void *aux)
{
        struct ugensa_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        struct usbd_interface *iface;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        const char *devname = device_xname(self);
        usbd_status err;
        struct ucom_attach_args ucaa;
        int i;

        DPRINTFN(10,("\nugensa_attach: sc=%p\n", sc));

        sc->sc_dev = self;
        sc->sc_dying = false;
        sc->sc_init_state = UGENSA_INIT_NONE;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        /* Move the device into the configured state. */
        err = usbd_set_config_index(dev, UGENSA_CONFIG_INDEX, 1);
        if (err) {
                aprint_error("\n%s: failed to set configuration, err=%s\n",
                       devname, usbd_errstr(err));
                goto bad;
        }

        err = usbd_device2interface_handle(dev, UGENSA_IFACE_INDEX, &iface);
        if (err) {
                aprint_error("\n%s: failed to get interface, err=%s\n",
                       devname, usbd_errstr(err));
                goto bad;
        }

        if (ugensa_lookup(uaa->uaa_vendor, uaa->uaa_product)->ugensa_flags & UNTESTED)
                aprint_normal_dev(self, "WARNING: This device is marked as "
                    "untested. Please submit a report via send-pr(1).\n");

        id = usbd_get_interface_descriptor(iface);

        sc->sc_udev = dev;
        sc->sc_iface = iface;

        ucaa.ucaa_info = "Generic Serial Device";
        ucaa.ucaa_ibufsize = UGENSA_BUFSIZE;
        ucaa.ucaa_obufsize = UGENSA_BUFSIZE;
        ucaa.ucaa_ibufsizepad = UGENSA_BUFSIZE;
        ucaa.ucaa_portno = UCOM_UNK_PORTNO;
        ucaa.ucaa_opkthdrlen = 0;
        ucaa.ucaa_device = dev;
        ucaa.ucaa_iface = iface;
        ucaa.ucaa_methods = &ugensa_methods;
        ucaa.ucaa_arg = sc;

        ucaa.ucaa_bulkin = ucaa.ucaa_bulkout = -1;
        for (i = 0; i < id->bNumEndpoints; i++) {
                int addr, dir, attr;

                ed = usbd_interface2endpoint_descriptor(iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "could not read endpoint descriptor: %s\n",
                            usbd_errstr(err));
                        goto bad;
                }

                addr = ed->bEndpointAddress;
                dir = UE_GET_DIR(ed->bEndpointAddress);
                attr = ed->bmAttributes & UE_XFERTYPE;
                if (attr == UE_BULK) {
                        if (ucaa.ucaa_bulkin == -1 && dir == UE_DIR_IN) {
                                DPRINTF(("%s: Bulk in %d\n", devname, i));
                                ucaa.ucaa_bulkin = addr;
                                continue;
                        }
                        if (ucaa.ucaa_bulkout == -1 && dir == UE_DIR_OUT) {
                                DPRINTF(("%s: Bulk out %d\n", devname, i));
                                ucaa.ucaa_bulkout = addr;
                                continue;
                        }
                }
                aprint_error_dev(self, "unexpected endpoint\n");
        }
        if (ucaa.ucaa_bulkin == -1) {
                aprint_error_dev(self, "Could not find data bulk in\n");
                goto bad;
        }
        if (ucaa.ucaa_bulkout == -1) {
                aprint_error_dev(self, "Could not find data bulk out\n");
                goto bad;
        }

        sc->sc_init_state = UGENSA_INIT_INITED;
        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        DPRINTF(("ugensa: in=%#x out=%#x\n", ucaa.ucaa_bulkin,
            ucaa.ucaa_bulkout));
        sc->sc_subdev = config_found(self, &ucaa, ucomprint,
            CFARGS(.submatch = ucomsubmatch));

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");
        return;

bad:
        DPRINTF(("ugensa_attach: ATTACH ERROR\n"));
        sc->sc_dying = true;
        return;
}

static void
ugensa_childdet(device_t self, device_t child)
{
        struct ugensa_softc *sc = device_private(self);

        KASSERT(sc->sc_subdev == child);
        sc->sc_subdev = NULL;
}

static int
ugensa_detach(device_t self, int flags)
{
        struct ugensa_softc *sc = device_private(self);
        int rv = 0;

        DPRINTF(("ugensa_detach: sc=%p flags=%d\n", sc, flags));

        sc->sc_dying = true;

        if (sc->sc_init_state < UGENSA_INIT_INITED)
                return 0;

        if (sc->sc_subdev != NULL) {
                rv = config_detach(sc->sc_subdev, flags);
                sc->sc_subdev = NULL;
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        pmf_device_deregister(self);

        return rv;
}

























































































































































































































    2 

    2 











































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
/*        $NetBSD: rf_compat50.c,v 1.13 2019/12/15 16:48:27 tsutsui Exp $        */

/*-
 * Copyright (c) 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/module.h>

#include <sys/compat_stub.h>

#include <dev/raidframe/raidframeio.h>
#include <dev/raidframe/raidframevar.h>

#include "rf_raid.h"
#include "rf_compat50.h"
#include "rf_debugMem.h"

typedef struct RF_Config50_s {
        RF_RowCol_t                numRow, numCol, numSpare;
        int32_t                        devs[RF_MAXROW][RF_MAXCOL];
        char                        devnames[RF_MAXROW][RF_MAXCOL][50];
        int32_t                        spare_devs[RF_MAXSPARE];
        char                        spare_names[RF_MAXSPARE][50];
        RF_SectorNum_t                sectPerSU;
        RF_StripeNum_t                SUsPerPU;
        RF_StripeNum_t                SUsPerRU;
        RF_ParityConfig_t        parityConfig;
        RF_DiskQueueType_t        diskQueueType;
        char                        maxOutstandingDiskReqs;
        char                        debugVars[RF_MAXDBGV][50];
        unsigned int                layoutSpecificSize;
        void                       *layoutSpecific;
        int                        force;
} RF_Config50_t;

typedef struct RF_RaidDisk50_s {
        char                 devname[56];
        RF_DiskStatus_t         status;
        RF_RowCol_t         spareRow;
        RF_RowCol_t         spareCol;
        RF_SectorCount_t numBlocks;
        int              blockSize;
        RF_SectorCount_t partitionSize;
        int                     auto_configured;
        int32_t                 dev;
} RF_RaidDisk50_t;

typedef struct RF_DeviceConfig50_s {
        u_int                        rows;
        u_int                        cols;
        u_int                        maxqdepth;
        int                        ndevs;
        RF_RaidDisk50_t                devs[RF_MAX_DISKS];
        u_int                        nspares;
        RF_RaidDisk50_t                spares[RF_MAX_DISKS];
} RF_DeviceConfig50_t;

static void
rf_disk_to_disk50(RF_RaidDisk50_t *d50, const RF_RaidDisk_t *d)
{
        memcpy(d50->devname, d->devname, sizeof(d50->devname));
        d50->status = d->status;
        d50->spareRow = 0;
        d50->spareCol = d->spareCol;
        d50->numBlocks = d->numBlocks;
        d50->blockSize = d->blockSize;
        d50->partitionSize = d->partitionSize;
        d50->auto_configured = d->auto_configured;
        d50->dev = d->dev;
}

static int
rf_config50(struct raid_softc *rs, void *data)
{
        RF_Config50_t *u50_cfg, *k50_cfg;
        RF_Config_t *k_cfg;
        RF_Raid_t *raidPtr = rf_get_raid(rs);
        size_t i, j;
        int error;

        if (raidPtr->valid) {
                /* There is a valid RAID set running on this unit! */
                printf("raid%d: Device already configured!\n", rf_get_unit(rs));
                return EINVAL;
        }

        /* copy-in the configuration information */
        /* data points to a pointer to the configuration structure */

        u50_cfg = *((RF_Config50_t **) data);
        k50_cfg = RF_Malloc(sizeof(*k50_cfg));
        if (k50_cfg == NULL)
                return ENOMEM;

        error = copyin(u50_cfg, k50_cfg, sizeof(*k50_cfg));
        if (error) {
                RF_Free(k50_cfg, sizeof(*k50_cfg));
                return error;
        }
        k_cfg = RF_Malloc(sizeof(*k_cfg));
        if (k_cfg == NULL) {
                RF_Free(k50_cfg, sizeof(*k50_cfg));
                return ENOMEM;
        }

        k_cfg->numCol = k50_cfg->numCol;
        k_cfg->numSpare = k50_cfg->numSpare;

        for (i = 0; i < RF_MAXROW; i++)
                for (j = 0; j < RF_MAXCOL; j++)
                        k_cfg->devs[i][j] = k50_cfg->devs[i][j];

        memcpy(k_cfg->devnames, k50_cfg->devnames,
            sizeof(k_cfg->devnames));

        for (i = 0; i < RF_MAXSPARE; i++)
                k_cfg->spare_devs[i] = k50_cfg->spare_devs[i];

        memcpy(k_cfg->spare_names, k50_cfg->spare_names,
            sizeof(k_cfg->spare_names));

        k_cfg->sectPerSU = k50_cfg->sectPerSU;
        k_cfg->SUsPerPU = k50_cfg->SUsPerPU;
        k_cfg->SUsPerRU = k50_cfg->SUsPerRU;
        k_cfg->parityConfig = k50_cfg->parityConfig;

        memcpy(k_cfg->diskQueueType, k50_cfg->diskQueueType,
            sizeof(k_cfg->diskQueueType));

        k_cfg->maxOutstandingDiskReqs = k50_cfg->maxOutstandingDiskReqs;

        memcpy(k_cfg->debugVars, k50_cfg->debugVars,
            sizeof(k_cfg->debugVars));

        k_cfg->layoutSpecificSize = k50_cfg->layoutSpecificSize;
        k_cfg->layoutSpecific = k50_cfg->layoutSpecific;
        k_cfg->force = k50_cfg->force;

        RF_Free(k50_cfg, sizeof(*k50_cfg));
        return rf_construct(rs, k_cfg);
}

static int
rf_get_info50(RF_Raid_t *raidPtr, void *data)
{
        RF_DeviceConfig50_t **ucfgp = data, *d_cfg;
        size_t i, j;
        int error;

        if (!raidPtr->valid)
                return ENODEV;

        d_cfg = RF_Malloc(sizeof(*d_cfg));

        if (d_cfg == NULL)
                return ENOMEM;

        d_cfg->rows = 1; /* there is only 1 row now */
        d_cfg->cols = raidPtr->numCol;
        d_cfg->ndevs = raidPtr->numCol;
        if (d_cfg->ndevs >= RF_MAX_DISKS) {
                error = ENOMEM;
                goto out;
        }

        d_cfg->nspares = raidPtr->numSpare;
        if (d_cfg->nspares >= RF_MAX_DISKS) {
                error = ENOMEM;
                goto out;
        }

        d_cfg->maxqdepth = raidPtr->maxQueueDepth;
        for (j = 0; j < d_cfg->cols; j++)
                rf_disk_to_disk50(&d_cfg->devs[j], &raidPtr->Disks[j]);

        for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++)
                rf_disk_to_disk50(&d_cfg->spares[i], &raidPtr->Disks[j]);

        error = copyout(d_cfg, *ucfgp, sizeof(**ucfgp));

out:
        RF_Free(d_cfg, sizeof(*d_cfg));
        return error;
}

static int
raidframe_ioctl_50(struct raid_softc *rs, u_long cmd, void *data)
{
        RF_Raid_t *raidPtr = rf_get_raid(rs);

        switch (cmd) {
        case RAIDFRAME_GET_INFO50:
                if (!rf_inited(rs))
                        return ENXIO;
                return rf_get_info50(raidPtr, data);

        case RAIDFRAME_CONFIGURE50:
                return rf_config50(rs, data);
        default:
                return EPASSTHROUGH;
        }
}

static void
raidframe_50_init(void)
{

        MODULE_HOOK_SET(raidframe_ioctl_50_hook, raidframe_ioctl_50);
}

static void
raidframe_50_fini(void)
{

        MODULE_HOOK_UNSET(raidframe_ioctl_50_hook);
}

MODULE(MODULE_CLASS_EXEC, compat_raid_50, "raid,compat_50,compat_raid_80");

static int
compat_raid_50_modcmd(modcmd_t cmd, void *arg)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                raidframe_50_init();
                return 0;
        case MODULE_CMD_FINI:
                raidframe_50_fini();
                return 0;
        default:
                return ENOTTY;
        }
}
































































 1600 


  132 


  124 

 1673 

  621 






 1587 


 1588 

 1599 




 1551 














































































































































































































 1670 
  127 


































































  363 







  363 
























 1363 








 1352 

 1362 






















  889 









  876 


  887 
  817 


  887 























   87 










   85 

   83 


   83 


   83 























  126 










  125 

  123 


  123 


  123 






















  643 









  624 

  640 
  640 






















   61 









   55 

   59 
   57 






















 1254 









 1252 

 1254 






















 1275 









 1273 

 1274 






















  309 









  280 

  308 






















  128 










  112 

  121 
  121 























  120 










  114 

   75 
   74 























   97 











   95 

   73 
   72 






























































































  435 











  431 

  379 
























    8 











    5 

    7 





















   29 








   26 
























































    2 










    1 






















   95 








   95 


























   90 











   78 

   86 























  165 









  164 

























    8 







    8 



    8 

    8 
    8 























    5 










    5 

    5 
    5 




























   31 












   31 

























  103 










  103 

   98 


   98 


   98 























    8 











    8 

    7 
    7 
























   31 











   31 

   31 


   31 


   31 

























   72 












   62 

   68 






















  181 









  178 

  181 





















   69 







   69 























  958 








  927 

  957 




















   31 







   26 

   33 





















 1598 




 1600 


 1601 
 1596 





















 1570 









 1571 
























   76 










   76 























  442 







  443 

  435 


















































 1203 






 1198 

 1201 






















   42 









   22 

   42 
























   54 











   48 

   50 






















    6 









    5 

    5 



























 1118 













 1120 

 1115 























  149 









  138 

  142 













































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
/*        $NetBSD: vnode_if.c,v 1.118 2022/07/18 04:32:35 thorpej Exp $        */

/*
 * Warning: DO NOT EDIT! This file is automatically generated!
 * (Modifications made here may easily be lost!)
 *
 * Created from the file:
 *        NetBSD: vnode_if.src,v 1.84 2022/05/03 08:33:59 hannken Exp
 * by the script:
 *        NetBSD: vnode_if.sh,v 1.76 2022/07/18 04:30:30 thorpej Exp
 */

/*
 * Copyright (c) 1992, 1993, 1994, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vnode_if.c,v 1.118 2022/07/18 04:32:35 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_vnode_lockdebug.h"
#endif /* _KERNEL_OPT */

#include <sys/param.h>
#include <sys/mount.h>
#include <sys/buf.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/lock.h>
#include <sys/fstrans.h>

enum fst_op { FST_NO, FST_YES, FST_LAZY, FST_TRY };

static inline int
vop_pre(vnode_t *vp, struct mount **mp, bool *mpsafe, enum fst_op op)
{
        int error;

        *mpsafe = (vp->v_vflag & VV_MPSAFE);

        if (!*mpsafe) {
                KERNEL_LOCK(1, curlwp);
        }

        if (op == FST_YES || op == FST_LAZY || op == FST_TRY) {
                for (;;) {
                        *mp = vp->v_mount;
                        if (op == FST_TRY) {
                                error = fstrans_start_nowait(*mp);
                                if (error) {
                                        if (!*mpsafe) {
                                                KERNEL_UNLOCK_ONE(curlwp);
                                        }
                                        return error;
                                }
                        } else if (op == FST_LAZY) {
                                fstrans_start_lazy(*mp);
                        } else {
                                fstrans_start(*mp);
                        }
                        if (__predict_true(*mp == vp->v_mount))
                                break;
                        fstrans_done(*mp);
                }
        } else {
                *mp = vp->v_mount;
        }

        return 0;
}

static inline u_quad_t
vop_pre_get_size(struct vnode *vp)
{
        mutex_enter(vp->v_interlock);
        KASSERT(vp->v_size != VSIZENOTSET);
        u_quad_t rv = (u_quad_t)vp->v_size;
        mutex_exit(vp->v_interlock);

        return rv;
}

/*
 * VOP_RMDIR(), VOP_REMOVE(), and VOP_RENAME() need special handling
 * because they each drop the caller's references on one or more of
 * their arguments.  While there must be an open file descriptor in
 * associated with a vnode in order for knotes to be attached to it,
 * that status could change during the course of the operation.  So,
 * for the vnode arguments that are WILLRELE or WILLPUT, we check
 * pre-op if there are registered knotes, take a hold count if so,
 * and post-op release the hold after activating any knotes still
 * associated with the vnode.
 */

#define        VOP_POST_KNOTE(thisvp, e, n)                                        \
do {                                                                        \
        if (__predict_true((e) == 0)) {                                        \
                /*                                                        \
                 * VN_KNOTE() does the VN_KEVENT_INTEREST()                \
                 * check for us.                                        \
                 */                                                        \
                VN_KNOTE((thisvp), (n));                                \
        }                                                                \
} while (/*CONSTCOND*/0)

#define        VOP_POST_KNOTE_HELD(thisvp, e, n)                                \
do {                                                                        \
        /*                                                                \
         * We don't perform a VN_KEVENT_INTEREST() check here; it        \
         * was already performed when we did the pre-op work that        \
         * caused the vnode to be held in the first place.                \
         */                                                                \
        mutex_enter((thisvp)->v_interlock);                                \
        if (__predict_true((e) == 0)) {                                        \
                knote(&(thisvp)->v_klist->vk_klist, (n));                \
        }                                                                \
        holdrelel((thisvp));                                                \
        mutex_exit((thisvp)->v_interlock);                                \
        /*                                                                \
         * thisvp might be gone now!  Don't touch!                        \
         */                                                                \
} while (/*CONSTCOND*/0)

#define        vop_create_post(ap, e)                                                \
        VOP_POST_KNOTE((ap)->a_dvp, (e), NOTE_WRITE)

#define        vop_mknod_post(ap, e)                                                \
        VOP_POST_KNOTE((ap)->a_dvp, (e), NOTE_WRITE)

#define        vop_setattr_pre(ap)                                                \
        u_quad_t osize = 0;                                                \
        long vp_events =                                                \
            VN_KEVENT_INTEREST((ap)->a_vp, NOTE_ATTRIB | NOTE_EXTEND)        \
            ? NOTE_ATTRIB : 0;                                                \
        bool check_extend = false;                                        \
        if (__predict_false(vp_events != 0 &&                                \
            (ap)->a_vap->va_size != VNOVALSIZE)) {                        \
                check_extend = true;                                        \
                osize = vop_pre_get_size((ap)->a_vp);                        \
        }

#define        vop_setattr_post(ap, e)                                                \
do {                                                                        \
        if (__predict_false(vp_events != 0)) {                                \
                if (__predict_false(check_extend &&                        \
                    (ap)->a_vap->va_size > osize)) {                        \
                        vp_events |= NOTE_EXTEND;                        \
                }                                                        \
                VOP_POST_KNOTE((ap)->a_vp, (e), vp_events);                \
        }                                                                \
} while (/*CONSTCOND*/0)

#define        vop_setacl_post(ap, e)                                                \
        VOP_POST_KNOTE((ap)->a_vp, (e), NOTE_ATTRIB)

#define        vop_link_post(ap, e)                                                \
do {                                                                        \
        VOP_POST_KNOTE((ap)->a_dvp, (e), NOTE_WRITE);                        \
        VOP_POST_KNOTE((ap)->a_vp, (e), NOTE_LINK);                        \
} while (/*CONSTCOND*/0)

#define        vop_mkdir_post(ap, e)                                                \
        VOP_POST_KNOTE((ap)->a_dvp, (e), NOTE_WRITE | NOTE_LINK)

#define        vop_remove_pre_common(ap)                                        \
        bool post_event_vp =                                                \
            VN_KEVENT_INTEREST((ap)->a_vp, NOTE_DELETE | NOTE_LINK);        \
        if (__predict_false(post_event_vp)) {                                \
                vhold((ap)->a_vp);                                        \
        }

#define        vop_remove_post_common(ap, e, dn, lc)                                \
do {                                                                        \
        VOP_POST_KNOTE((ap)->a_dvp, (e), (dn));                                \
        if (__predict_false(post_event_vp)) {                                \
                VOP_POST_KNOTE_HELD((ap)->a_vp, (e),                        \
                    (lc) ? NOTE_LINK : NOTE_DELETE);                        \
        }                                                                \
} while (/*CONSTCOND*/0)

/*
 * One could make the argument that VOP_REMOVE() should send NOTE_LINK
 * on vp if the resulting link count is not zero, but that's not what
 * the documentation says.
 *
 * We could change this easily by passing ap->ctx_vp_new_nlink to
 * vop_remove_post_common().
 */
#define        vop_remove_pre(ap)                                                \
        vop_remove_pre_common((ap));                                        \
        /*                                                                \
         * We will assume that the file being removed is deleted unless        \
         * the file system tells us otherwise by updating vp_new_nlink.        \
         */                                                                \
        (ap)->ctx_vp_new_nlink = 0;

#define        vop_remove_post(ap, e)                                                \
        vop_remove_post_common((ap), (e), NOTE_WRITE, 0)

#define        vop_rmdir_pre(ap)                                                \
        vop_remove_pre_common(ap)

#define        vop_rmdir_post(ap, e)                                                \
        vop_remove_post_common((ap), (e), NOTE_WRITE | NOTE_LINK, 0)

#define        vop_symlink_post(ap, e)                                                \
        VOP_POST_KNOTE((ap)->a_dvp, (e), NOTE_WRITE)

#define        vop_open_post(ap, e)                                                \
        VOP_POST_KNOTE((ap)->a_vp, (e), NOTE_OPEN)

#define        vop_close_post(ap, e)                                                \
do {                                                                        \
        extern int (**dead_vnodeop_p)(void *);                                \
                                                                        \
        /* See the definition of VN_KNOTE() in <sys/vnode.h>. */        \
        if (__predict_false(VN_KEVENT_INTEREST((ap)->a_vp,                \
            NOTE_CLOSE_WRITE | NOTE_CLOSE) && (e) == 0)) {                \
                struct vnode *thisvp = (ap)->a_vp;                        \
                mutex_enter(thisvp->v_interlock);                        \
                /*                                                        \
                 * Don't send NOTE_CLOSE when closing a vnode that's        \
                 * been reclaimed or otherwise revoked; a NOTE_REVOKE        \
                 * has already been sent, and this close is effectively        \
                 * meaningless from the watcher's perspective.                \
                 */                                                        \
                if (__predict_true(thisvp->v_op != dead_vnodeop_p)) {        \
                        knote(&thisvp->v_klist->vk_klist,                \
                            ((ap)->a_fflag & FWRITE)                        \
                            ? NOTE_CLOSE_WRITE : NOTE_CLOSE);                \
                }                                                        \
                mutex_exit(thisvp->v_interlock);                        \
        }                                                                \
} while (/*CONSTCOND*/0)

#define        vop_read_post(ap, e)                                                \
        VOP_POST_KNOTE((ap)->a_vp, (e), NOTE_READ)

#define        vop_write_pre(ap)                                                \
        off_t ooffset = 0, noffset = 0;                                        \
        u_quad_t osize = 0;                                                \
        long vp_events =                                                \
            VN_KEVENT_INTEREST((ap)->a_vp, NOTE_WRITE | NOTE_EXTEND)        \
            ? NOTE_WRITE : 0;                                                \
        if (__predict_false(vp_events != 0)) {                                \
                ooffset = (ap)->a_uio->uio_offset;                        \
                osize = vop_pre_get_size((ap)->a_vp);                        \
        }

#define        vop_write_post(ap, e)                                                \
do {                                                                        \
        /*                                                                \
         * If any data was written, we'll post an event, even if        \
         * there was an error.                                                \
         */                                                                \
        noffset = (ap)->a_uio->uio_offset;                                \
        if (__predict_false(vp_events != 0 && noffset > ooffset)) {        \
                if (noffset > osize) {                                        \
                        vp_events |= NOTE_EXTEND;                        \
                }                                                        \
                VN_KNOTE((ap)->a_vp, vp_events);                        \
        }                                                                \
} while (/*CONSTCOND*/0)

static inline void
vop_post(vnode_t *vp, struct mount *mp, bool mpsafe, enum fst_op op)
{

        if (op == FST_YES || op == FST_LAZY) {
                fstrans_done(mp);
        }

        if (!mpsafe) {
                KERNEL_UNLOCK_ONE(curlwp);
        }
}

static inline void
assert_vop_unlocked(vnode_t *vp, const char *str)
{
#if defined(VNODE_LOCKDEBUG)

        if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE)
                panic("%s: %p %d/%d is locked but should not be",
                    str, vp, vp->v_tag, vp->v_type);
#endif
}

static inline void
assert_vop_locked(vnode_t *vp, const char *str)
{
#if defined(VNODE_LOCKDEBUG)

        if (VOP_ISLOCKED(vp) == LK_NONE)
                panic("%s: %p %d/%d is not locked but should be",
                    str, vp, vp->v_tag, vp->v_type);
#endif
}

static inline void
assert_vop_elocked(vnode_t *vp, const char *str)
{
#if defined(VNODE_LOCKDEBUG)

        if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
                panic("%s: %p %d/%d is not exclusive locked but should be",
                    str, vp, vp->v_tag, vp->v_type);
#endif
}

const struct vnodeop_desc vop_default_desc = {
        0,
        "default",
        0,
        NULL,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};


const int vop_bwrite_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_bwrite_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_bwrite_desc = {
        VOP_BWRITE_DESCOFFSET,
        "vop_bwrite",
        0,
        vop_bwrite_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_BWRITE(struct vnode *vp,
    struct buf *bp)
{
        int error;
        bool mpsafe;
        struct vop_bwrite_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_bwrite);
        a.a_vp = vp;
        a.a_bp = bp;
        error = vop_pre(vp, &mp, &mpsafe, FST_YES);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_bwrite), &a));
        vop_post(vp, mp, mpsafe, FST_YES);
        return error;
}

const int vop_parsepath_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_parsepath_args,a_dvp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_parsepath_desc = {
        VOP_PARSEPATH_DESCOFFSET,
        "vop_parsepath",
        0,
        vop_parsepath_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_PARSEPATH(struct vnode *dvp,
    const char *name,
    size_t *retval)
{
        int error;
        bool mpsafe;
        struct vop_parsepath_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_parsepath);
        a.a_dvp = dvp;
        a.a_name = name;
        a.a_retval = retval;
        error = vop_pre(dvp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(dvp, VOFFSET(vop_parsepath), &a));
        vop_post(dvp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_lookup_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_lookup_v2_args,a_dvp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_lookup_desc = {
        VOP_LOOKUP_DESCOFFSET,
        "vop_lookup",
        0,
        vop_lookup_vp_offsets,
        VOPARG_OFFSETOF(struct vop_lookup_v2_args, a_vpp),
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_lookup_v2_args, a_cnp),
};
int
VOP_LOOKUP(struct vnode *dvp,
    struct vnode **vpp,
    struct componentname *cnp)
{
        int error;
        bool mpsafe;
        struct vop_lookup_v2_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_lookup);
        a.a_dvp = dvp;
        a.a_vpp = vpp;
        a.a_cnp = cnp;
        assert_vop_locked(dvp, "vop_lookup: dvp");
        error = vop_pre(dvp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(dvp, VOFFSET(vop_lookup), &a));
        vop_post(dvp, mp, mpsafe, FST_NO);
#ifdef DIAGNOSTIC
        if (error == 0)
                KASSERT((*vpp)->v_size != VSIZENOTSET
                    && (*vpp)->v_writesize != VSIZENOTSET);
#endif /* DIAGNOSTIC */
        return error;
}

const int vop_create_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_create_v3_args,a_dvp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_create_desc = {
        VOP_CREATE_DESCOFFSET,
        "vop_create",
        0,
        vop_create_vp_offsets,
        VOPARG_OFFSETOF(struct vop_create_v3_args, a_vpp),
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_create_v3_args, a_cnp),
};
int
VOP_CREATE(struct vnode *dvp,
    struct vnode **vpp,
    struct componentname *cnp,
    struct vattr *vap)
{
        int error;
        bool mpsafe;
        struct vop_create_v3_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_create);
        a.a_dvp = dvp;
        a.a_vpp = vpp;
        a.a_cnp = cnp;
        a.a_vap = vap;
        assert_vop_elocked(dvp, "vop_create: dvp");
        error = vop_pre(dvp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(dvp, VOFFSET(vop_create), &a));
        vop_post(dvp, mp, mpsafe, FST_NO);
        vop_create_post(&a, error);
#ifdef DIAGNOSTIC
        if (error == 0)
                KASSERT((*vpp)->v_size != VSIZENOTSET
                    && (*vpp)->v_writesize != VSIZENOTSET);
#endif /* DIAGNOSTIC */
        return error;
}

const int vop_mknod_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_mknod_v3_args,a_dvp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_mknod_desc = {
        VOP_MKNOD_DESCOFFSET,
        "vop_mknod",
        0,
        vop_mknod_vp_offsets,
        VOPARG_OFFSETOF(struct vop_mknod_v3_args, a_vpp),
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_mknod_v3_args, a_cnp),
};
int
VOP_MKNOD(struct vnode *dvp,
    struct vnode **vpp,
    struct componentname *cnp,
    struct vattr *vap)
{
        int error;
        bool mpsafe;
        struct vop_mknod_v3_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_mknod);
        a.a_dvp = dvp;
        a.a_vpp = vpp;
        a.a_cnp = cnp;
        a.a_vap = vap;
        assert_vop_elocked(dvp, "vop_mknod: dvp");
        error = vop_pre(dvp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(dvp, VOFFSET(vop_mknod), &a));
        vop_post(dvp, mp, mpsafe, FST_NO);
        vop_mknod_post(&a, error);
#ifdef DIAGNOSTIC
        if (error == 0)
                KASSERT((*vpp)->v_size != VSIZENOTSET
                    && (*vpp)->v_writesize != VSIZENOTSET);
#endif /* DIAGNOSTIC */
        return error;
}

const int vop_open_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_open_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_open_desc = {
        VOP_OPEN_DESCOFFSET,
        "vop_open",
        0,
        vop_open_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_open_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_OPEN(struct vnode *vp,
    int mode,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_open_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_open);
        a.a_vp = vp;
        a.a_mode = mode;
        a.a_cred = cred;
        assert_vop_locked(vp, "vop_open: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_open), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        vop_open_post(&a, error);
        return error;
}

const int vop_close_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_close_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_close_desc = {
        VOP_CLOSE_DESCOFFSET,
        "vop_close",
        0,
        vop_close_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_close_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_CLOSE(struct vnode *vp,
    int fflag,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_close_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_close);
        a.a_vp = vp;
        a.a_fflag = fflag;
        a.a_cred = cred;
        assert_vop_locked(vp, "vop_close: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_close), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        vop_close_post(&a, error);
        return error;
}

const int vop_access_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_access_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_access_desc = {
        VOP_ACCESS_DESCOFFSET,
        "vop_access",
        0,
        vop_access_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_access_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_ACCESS(struct vnode *vp,
    accmode_t accmode,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_access_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_access);
        a.a_vp = vp;
        a.a_accmode = accmode;
        a.a_cred = cred;
        assert_vop_locked(vp, "vop_access: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_access), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_accessx_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_accessx_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_accessx_desc = {
        VOP_ACCESSX_DESCOFFSET,
        "vop_accessx",
        0,
        vop_accessx_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_accessx_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_ACCESSX(struct vnode *vp,
    accmode_t accmode,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_accessx_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_accessx);
        a.a_vp = vp;
        a.a_accmode = accmode;
        a.a_cred = cred;
        assert_vop_locked(vp, "vop_accessx: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_accessx), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_getattr_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_getattr_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_getattr_desc = {
        VOP_GETATTR_DESCOFFSET,
        "vop_getattr",
        0,
        vop_getattr_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_getattr_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_GETATTR(struct vnode *vp,
    struct vattr *vap,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_getattr_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_getattr);
        a.a_vp = vp;
        a.a_vap = vap;
        a.a_cred = cred;
        assert_vop_locked(vp, "vop_getattr: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_getattr), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_setattr_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_setattr_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_setattr_desc = {
        VOP_SETATTR_DESCOFFSET,
        "vop_setattr",
        0,
        vop_setattr_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_setattr_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_SETATTR(struct vnode *vp,
    struct vattr *vap,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_setattr_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_setattr);
        a.a_vp = vp;
        a.a_vap = vap;
        a.a_cred = cred;
        assert_vop_elocked(vp, "vop_setattr: vp");
        vop_setattr_pre(&a);
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_setattr), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        vop_setattr_post(&a, error);
        return error;
}

const int vop_read_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_read_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_read_desc = {
        VOP_READ_DESCOFFSET,
        "vop_read",
        0,
        vop_read_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_read_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_READ(struct vnode *vp,
    struct uio *uio,
    int ioflag,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_read_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_read);
        a.a_vp = vp;
        a.a_uio = uio;
        a.a_ioflag = ioflag;
        a.a_cred = cred;
        assert_vop_locked(vp, "vop_read: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_read), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        vop_read_post(&a, error);
        return error;
}

const int vop_write_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_write_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_write_desc = {
        VOP_WRITE_DESCOFFSET,
        "vop_write",
        0,
        vop_write_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_write_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_WRITE(struct vnode *vp,
    struct uio *uio,
    int ioflag,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_write_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_write);
        a.a_vp = vp;
        a.a_uio = uio;
        a.a_ioflag = ioflag;
        a.a_cred = cred;
        assert_vop_locked(vp, "vop_write: vp");
        vop_write_pre(&a);
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_write), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        vop_write_post(&a, error);
        return error;
}

const int vop_fallocate_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_fallocate_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_fallocate_desc = {
        VOP_FALLOCATE_DESCOFFSET,
        "vop_fallocate",
        0,
        vop_fallocate_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_FALLOCATE(struct vnode *vp,
    off_t pos,
    off_t len)
{
        int error;
        bool mpsafe;
        struct vop_fallocate_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_fallocate);
        a.a_vp = vp;
        a.a_pos = pos;
        a.a_len = len;
        assert_vop_locked(vp, "vop_fallocate: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_fallocate), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_fdiscard_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_fdiscard_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_fdiscard_desc = {
        VOP_FDISCARD_DESCOFFSET,
        "vop_fdiscard",
        0,
        vop_fdiscard_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_FDISCARD(struct vnode *vp,
    off_t pos,
    off_t len)
{
        int error;
        bool mpsafe;
        struct vop_fdiscard_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_fdiscard);
        a.a_vp = vp;
        a.a_pos = pos;
        a.a_len = len;
        assert_vop_locked(vp, "vop_fdiscard: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_fdiscard), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_ioctl_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_ioctl_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_ioctl_desc = {
        VOP_IOCTL_DESCOFFSET,
        "vop_ioctl",
        0,
        vop_ioctl_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_ioctl_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_IOCTL(struct vnode *vp,
    u_long command,
    void *data,
    int fflag,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_ioctl_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_ioctl);
        a.a_vp = vp;
        a.a_command = command;
        a.a_data = data;
        a.a_fflag = fflag;
        a.a_cred = cred;
        assert_vop_unlocked(vp, "vop_ioctl: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_ioctl), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_fcntl_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_fcntl_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_fcntl_desc = {
        VOP_FCNTL_DESCOFFSET,
        "vop_fcntl",
        0,
        vop_fcntl_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_fcntl_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_FCNTL(struct vnode *vp,
    u_int command,
    void *data,
    int fflag,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_fcntl_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_fcntl);
        a.a_vp = vp;
        a.a_command = command;
        a.a_data = data;
        a.a_fflag = fflag;
        a.a_cred = cred;
        assert_vop_unlocked(vp, "vop_fcntl: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_fcntl), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_poll_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_poll_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_poll_desc = {
        VOP_POLL_DESCOFFSET,
        "vop_poll",
        0,
        vop_poll_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_POLL(struct vnode *vp,
    int events)
{
        int error;
        bool mpsafe;
        struct vop_poll_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_poll);
        a.a_vp = vp;
        a.a_events = events;
        assert_vop_unlocked(vp, "vop_poll: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_YES);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_poll), &a));
        vop_post(vp, mp, mpsafe, FST_YES);
        return error;
}

const int vop_kqfilter_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_kqfilter_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_kqfilter_desc = {
        VOP_KQFILTER_DESCOFFSET,
        "vop_kqfilter",
        0,
        vop_kqfilter_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_KQFILTER(struct vnode *vp,
    struct knote *kn)
{
        int error;
        bool mpsafe;
        struct vop_kqfilter_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_kqfilter);
        a.a_vp = vp;
        a.a_kn = kn;
        assert_vop_unlocked(vp, "vop_kqfilter: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_YES);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_kqfilter), &a));
        vop_post(vp, mp, mpsafe, FST_YES);
        return error;
}

const int vop_revoke_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_revoke_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_revoke_desc = {
        VOP_REVOKE_DESCOFFSET,
        "vop_revoke",
        0,
        vop_revoke_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_REVOKE(struct vnode *vp,
    int flags)
{
        int error;
        bool mpsafe;
        struct vop_revoke_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_revoke);
        a.a_vp = vp;
        a.a_flags = flags;
        assert_vop_unlocked(vp, "vop_revoke: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_revoke), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_mmap_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_mmap_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_mmap_desc = {
        VOP_MMAP_DESCOFFSET,
        "vop_mmap",
        0,
        vop_mmap_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_mmap_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_MMAP(struct vnode *vp,
    vm_prot_t prot,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_mmap_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_mmap);
        a.a_vp = vp;
        a.a_prot = prot;
        a.a_cred = cred;
        error = vop_pre(vp, &mp, &mpsafe, FST_YES);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_mmap), &a));
        vop_post(vp, mp, mpsafe, FST_YES);
        return error;
}

const int vop_fsync_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_fsync_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_fsync_desc = {
        VOP_FSYNC_DESCOFFSET,
        "vop_fsync",
        0,
        vop_fsync_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_fsync_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_FSYNC(struct vnode *vp,
    kauth_cred_t cred,
    int flags,
    off_t offlo,
    off_t offhi)
{
        int error;
        bool mpsafe;
        struct vop_fsync_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_fsync);
        a.a_vp = vp;
        a.a_cred = cred;
        a.a_flags = flags;
        a.a_offlo = offlo;
        a.a_offhi = offhi;
        assert_vop_locked(vp, "vop_fsync: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_fsync), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_seek_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_seek_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_seek_desc = {
        VOP_SEEK_DESCOFFSET,
        "vop_seek",
        0,
        vop_seek_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_seek_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_SEEK(struct vnode *vp,
    off_t oldoff,
    off_t newoff,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_seek_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_seek);
        a.a_vp = vp;
        a.a_oldoff = oldoff;
        a.a_newoff = newoff;
        a.a_cred = cred;
        error = vop_pre(vp, &mp, &mpsafe, FST_YES);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_seek), &a));
        vop_post(vp, mp, mpsafe, FST_YES);
        return error;
}

const int vop_remove_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_remove_v3_args,a_dvp),
        VOPARG_OFFSETOF(struct vop_remove_v3_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_remove_desc = {
        VOP_REMOVE_DESCOFFSET,
        "vop_remove",
        0 | VDESC_VP1_WILLPUT,
        vop_remove_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_remove_v3_args, a_cnp),
};
int
VOP_REMOVE(struct vnode *dvp,
    struct vnode *vp,
    struct componentname *cnp)
{
        int error;
        bool mpsafe;
        struct vop_remove_v3_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_remove);
        a.a_dvp = dvp;
        a.a_vp = vp;
        a.a_cnp = cnp;
        assert_vop_elocked(dvp, "vop_remove: dvp");
        assert_vop_locked(vp, "vop_remove: vp");
        vop_remove_pre(&a);
        error = vop_pre(dvp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(dvp, VOFFSET(vop_remove), &a));
        vop_post(dvp, mp, mpsafe, FST_NO);
        vop_remove_post(&a, error);
        return error;
}

const int vop_link_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_link_v2_args,a_dvp),
        VOPARG_OFFSETOF(struct vop_link_v2_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_link_desc = {
        VOP_LINK_DESCOFFSET,
        "vop_link",
        0,
        vop_link_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_link_v2_args, a_cnp),
};
int
VOP_LINK(struct vnode *dvp,
    struct vnode *vp,
    struct componentname *cnp)
{
        int error;
        bool mpsafe;
        struct vop_link_v2_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_link);
        a.a_dvp = dvp;
        a.a_vp = vp;
        a.a_cnp = cnp;
        assert_vop_elocked(dvp, "vop_link: dvp");
        assert_vop_unlocked(vp, "vop_link: vp");
        error = vop_pre(dvp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(dvp, VOFFSET(vop_link), &a));
        vop_post(dvp, mp, mpsafe, FST_NO);
        vop_link_post(&a, error);
        return error;
}

const int vop_rename_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_rename_args,a_fdvp),
        VOPARG_OFFSETOF(struct vop_rename_args,a_fvp),
        VOPARG_OFFSETOF(struct vop_rename_args,a_tdvp),
        VOPARG_OFFSETOF(struct vop_rename_args,a_tvp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_rename_desc = {
        VOP_RENAME_DESCOFFSET,
        "vop_rename",
        0 | VDESC_VP0_WILLRELE | VDESC_VP1_WILLRELE | VDESC_VP2_WILLPUT | VDESC_VP3_WILLPUT,
        vop_rename_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_rename_args, a_fcnp),
};
int
VOP_RENAME(struct vnode *fdvp,
    struct vnode *fvp,
    struct componentname *fcnp,
    struct vnode *tdvp,
    struct vnode *tvp,
    struct componentname *tcnp)
{
        int error;
        bool mpsafe;
        struct vop_rename_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_rename);
        a.a_fdvp = fdvp;
        a.a_fvp = fvp;
        a.a_fcnp = fcnp;
        a.a_tdvp = tdvp;
        a.a_tvp = tvp;
        a.a_tcnp = tcnp;
        assert_vop_locked(tdvp, "vop_rename: tdvp");
        error = vop_pre(fdvp, &mp, &mpsafe, FST_YES);
        if (error)
                return error;
        error = (VCALL(fdvp, VOFFSET(vop_rename), &a));
        vop_post(fdvp, mp, mpsafe, FST_YES);
        return error;
}

const int vop_mkdir_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_mkdir_v3_args,a_dvp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_mkdir_desc = {
        VOP_MKDIR_DESCOFFSET,
        "vop_mkdir",
        0,
        vop_mkdir_vp_offsets,
        VOPARG_OFFSETOF(struct vop_mkdir_v3_args, a_vpp),
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_mkdir_v3_args, a_cnp),
};
int
VOP_MKDIR(struct vnode *dvp,
    struct vnode **vpp,
    struct componentname *cnp,
    struct vattr *vap)
{
        int error;
        bool mpsafe;
        struct vop_mkdir_v3_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_mkdir);
        a.a_dvp = dvp;
        a.a_vpp = vpp;
        a.a_cnp = cnp;
        a.a_vap = vap;
        assert_vop_elocked(dvp, "vop_mkdir: dvp");
        error = vop_pre(dvp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(dvp, VOFFSET(vop_mkdir), &a));
        vop_post(dvp, mp, mpsafe, FST_NO);
        vop_mkdir_post(&a, error);
#ifdef DIAGNOSTIC
        if (error == 0)
                KASSERT((*vpp)->v_size != VSIZENOTSET
                    && (*vpp)->v_writesize != VSIZENOTSET);
#endif /* DIAGNOSTIC */
        return error;
}

const int vop_rmdir_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_rmdir_v2_args,a_dvp),
        VOPARG_OFFSETOF(struct vop_rmdir_v2_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_rmdir_desc = {
        VOP_RMDIR_DESCOFFSET,
        "vop_rmdir",
        0 | VDESC_VP1_WILLPUT,
        vop_rmdir_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_rmdir_v2_args, a_cnp),
};
int
VOP_RMDIR(struct vnode *dvp,
    struct vnode *vp,
    struct componentname *cnp)
{
        int error;
        bool mpsafe;
        struct vop_rmdir_v2_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_rmdir);
        a.a_dvp = dvp;
        a.a_vp = vp;
        a.a_cnp = cnp;
        assert_vop_elocked(dvp, "vop_rmdir: dvp");
        assert_vop_elocked(vp, "vop_rmdir: vp");
        vop_rmdir_pre(&a);
        error = vop_pre(dvp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(dvp, VOFFSET(vop_rmdir), &a));
        vop_post(dvp, mp, mpsafe, FST_NO);
        vop_rmdir_post(&a, error);
        return error;
}

const int vop_symlink_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_symlink_v3_args,a_dvp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_symlink_desc = {
        VOP_SYMLINK_DESCOFFSET,
        "vop_symlink",
        0,
        vop_symlink_vp_offsets,
        VOPARG_OFFSETOF(struct vop_symlink_v3_args, a_vpp),
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_symlink_v3_args, a_cnp),
};
int
VOP_SYMLINK(struct vnode *dvp,
    struct vnode **vpp,
    struct componentname *cnp,
    struct vattr *vap,
    char *target)
{
        int error;
        bool mpsafe;
        struct vop_symlink_v3_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_symlink);
        a.a_dvp = dvp;
        a.a_vpp = vpp;
        a.a_cnp = cnp;
        a.a_vap = vap;
        a.a_target = target;
        assert_vop_elocked(dvp, "vop_symlink: dvp");
        error = vop_pre(dvp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(dvp, VOFFSET(vop_symlink), &a));
        vop_post(dvp, mp, mpsafe, FST_NO);
        vop_symlink_post(&a, error);
#ifdef DIAGNOSTIC
        if (error == 0)
                KASSERT((*vpp)->v_size != VSIZENOTSET
                    && (*vpp)->v_writesize != VSIZENOTSET);
#endif /* DIAGNOSTIC */
        return error;
}

const int vop_readdir_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_readdir_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_readdir_desc = {
        VOP_READDIR_DESCOFFSET,
        "vop_readdir",
        0,
        vop_readdir_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_readdir_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_READDIR(struct vnode *vp,
    struct uio *uio,
    kauth_cred_t cred,
    int *eofflag,
    off_t **cookies,
    int *ncookies)
{
        int error;
        bool mpsafe;
        struct vop_readdir_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_readdir);
        a.a_vp = vp;
        a.a_uio = uio;
        a.a_cred = cred;
        a.a_eofflag = eofflag;
        a.a_cookies = cookies;
        a.a_ncookies = ncookies;
        assert_vop_locked(vp, "vop_readdir: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_readdir), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_readlink_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_readlink_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_readlink_desc = {
        VOP_READLINK_DESCOFFSET,
        "vop_readlink",
        0,
        vop_readlink_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_readlink_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_READLINK(struct vnode *vp,
    struct uio *uio,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_readlink_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_readlink);
        a.a_vp = vp;
        a.a_uio = uio;
        a.a_cred = cred;
        assert_vop_locked(vp, "vop_readlink: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_readlink), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_abortop_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_abortop_args,a_dvp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_abortop_desc = {
        VOP_ABORTOP_DESCOFFSET,
        "vop_abortop",
        0,
        vop_abortop_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_abortop_args, a_cnp),
};
int
VOP_ABORTOP(struct vnode *dvp,
    struct componentname *cnp)
{
        int error;
        bool mpsafe;
        struct vop_abortop_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_abortop);
        a.a_dvp = dvp;
        a.a_cnp = cnp;
        error = vop_pre(dvp, &mp, &mpsafe, FST_YES);
        if (error)
                return error;
        error = (VCALL(dvp, VOFFSET(vop_abortop), &a));
        vop_post(dvp, mp, mpsafe, FST_YES);
        return error;
}

const int vop_inactive_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_inactive_v2_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_inactive_desc = {
        VOP_INACTIVE_DESCOFFSET,
        "vop_inactive",
        0,
        vop_inactive_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_INACTIVE(struct vnode *vp,
    bool *recycle)
{
        int error;
        bool mpsafe;
        struct vop_inactive_v2_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_inactive);
        a.a_vp = vp;
        a.a_recycle = recycle;
        assert_vop_elocked(vp, "vop_inactive: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_inactive), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_reclaim_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_reclaim_v2_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_reclaim_desc = {
        VOP_RECLAIM_DESCOFFSET,
        "vop_reclaim",
        0,
        vop_reclaim_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_RECLAIM(struct vnode *vp)
{
        int error;
        bool mpsafe;
        struct vop_reclaim_v2_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_reclaim);
        a.a_vp = vp;
        assert_vop_elocked(vp, "vop_reclaim: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_reclaim), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_lock_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_lock_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_lock_desc = {
        VOP_LOCK_DESCOFFSET,
        "vop_lock",
        0,
        vop_lock_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_LOCK(struct vnode *vp,
    int flags)
{
        int error;
        bool mpsafe;
        struct vop_lock_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_lock);
        a.a_vp = vp;
        a.a_flags = flags;
        error = vop_pre(vp, &mp, &mpsafe, (!(flags & (LK_SHARED|LK_EXCLUSIVE)) ? FST_NO : (flags & LK_NOWAIT ? FST_TRY : FST_YES)));
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_lock), &a));
        vop_post(vp, mp, mpsafe, (flags & (LK_UPGRADE|LK_DOWNGRADE) ? FST_NO : (error ? FST_YES : FST_NO)));
        return error;
}

const int vop_unlock_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_unlock_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_unlock_desc = {
        VOP_UNLOCK_DESCOFFSET,
        "vop_unlock",
        0,
        vop_unlock_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_UNLOCK(struct vnode *vp)
{
        int error;
        bool mpsafe;
        struct vop_unlock_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_unlock);
        a.a_vp = vp;
        assert_vop_locked(vp, "vop_unlock: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_unlock), &a));
        vop_post(vp, mp, mpsafe, FST_YES);
        return error;
}

const int vop_bmap_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_bmap_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_bmap_desc = {
        VOP_BMAP_DESCOFFSET,
        "vop_bmap",
        0,
        vop_bmap_vp_offsets,
        VOPARG_OFFSETOF(struct vop_bmap_args, a_vpp),
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_BMAP(struct vnode *vp,
    daddr_t bn,
    struct vnode **vpp,
    daddr_t *bnp,
    int *runp)
{
        int error;
        bool mpsafe;
        struct vop_bmap_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_bmap);
        a.a_vp = vp;
        a.a_bn = bn;
        a.a_vpp = vpp;
        a.a_bnp = bnp;
        a.a_runp = runp;
        error = vop_pre(vp, &mp, &mpsafe, FST_YES);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_bmap), &a));
        vop_post(vp, mp, mpsafe, FST_YES);
        return error;
}

const int vop_strategy_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_strategy_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_strategy_desc = {
        VOP_STRATEGY_DESCOFFSET,
        "vop_strategy",
        0,
        vop_strategy_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_STRATEGY(struct vnode *vp,
    struct buf *bp)
{
        int error;
        bool mpsafe;
        struct vop_strategy_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_strategy);
        a.a_vp = vp;
        a.a_bp = bp;
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_strategy), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_print_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_print_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_print_desc = {
        VOP_PRINT_DESCOFFSET,
        "vop_print",
        0,
        vop_print_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_PRINT(struct vnode *vp)
{
        int error;
        bool mpsafe;
        struct vop_print_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_print);
        a.a_vp = vp;
        error = vop_pre(vp, &mp, &mpsafe, FST_YES);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_print), &a));
        vop_post(vp, mp, mpsafe, FST_YES);
        return error;
}

const int vop_islocked_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_islocked_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_islocked_desc = {
        VOP_ISLOCKED_DESCOFFSET,
        "vop_islocked",
        0,
        vop_islocked_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_ISLOCKED(struct vnode *vp)
{
        int error;
        bool mpsafe;
        struct vop_islocked_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_islocked);
        a.a_vp = vp;
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_islocked), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_pathconf_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_pathconf_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_pathconf_desc = {
        VOP_PATHCONF_DESCOFFSET,
        "vop_pathconf",
        0,
        vop_pathconf_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_PATHCONF(struct vnode *vp,
    int name,
    register_t *retval)
{
        int error;
        bool mpsafe;
        struct vop_pathconf_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_pathconf);
        a.a_vp = vp;
        a.a_name = name;
        a.a_retval = retval;
        assert_vop_locked(vp, "vop_pathconf: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_pathconf), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_advlock_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_advlock_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_advlock_desc = {
        VOP_ADVLOCK_DESCOFFSET,
        "vop_advlock",
        0,
        vop_advlock_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_ADVLOCK(struct vnode *vp,
    void *id,
    int op,
    struct flock *fl,
    int flags)
{
        int error;
        bool mpsafe;
        struct vop_advlock_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_advlock);
        a.a_vp = vp;
        a.a_id = id;
        a.a_op = op;
        a.a_fl = fl;
        a.a_flags = flags;
        assert_vop_unlocked(vp, "vop_advlock: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_advlock), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_whiteout_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_whiteout_args,a_dvp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_whiteout_desc = {
        VOP_WHITEOUT_DESCOFFSET,
        "vop_whiteout",
        0,
        vop_whiteout_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_whiteout_args, a_cnp),
};
int
VOP_WHITEOUT(struct vnode *dvp,
    struct componentname *cnp,
    int flags)
{
        int error;
        bool mpsafe;
        struct vop_whiteout_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_whiteout);
        a.a_dvp = dvp;
        a.a_cnp = cnp;
        a.a_flags = flags;
        assert_vop_elocked(dvp, "vop_whiteout: dvp");
        error = vop_pre(dvp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(dvp, VOFFSET(vop_whiteout), &a));
        vop_post(dvp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_getpages_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_getpages_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_getpages_desc = {
        VOP_GETPAGES_DESCOFFSET,
        "vop_getpages",
        0,
        vop_getpages_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_GETPAGES(struct vnode *vp,
    voff_t offset,
    struct vm_page **m,
    int *count,
    int centeridx,
    vm_prot_t access_type,
    int advice,
    int flags)
{
        int error;
        bool mpsafe;
        struct vop_getpages_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_getpages);
        a.a_vp = vp;
        a.a_offset = offset;
        a.a_m = m;
        a.a_count = count;
        a.a_centeridx = centeridx;
        a.a_access_type = access_type;
        a.a_advice = advice;
        a.a_flags = flags;
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_getpages), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_putpages_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_putpages_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_putpages_desc = {
        VOP_PUTPAGES_DESCOFFSET,
        "vop_putpages",
        0,
        vop_putpages_vp_offsets,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
        VDESC_NO_OFFSET,
};
int
VOP_PUTPAGES(struct vnode *vp,
    voff_t offlo,
    voff_t offhi,
    int flags)
{
        int error;
        bool mpsafe;
        struct vop_putpages_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_putpages);
        a.a_vp = vp;
        a.a_offlo = offlo;
        a.a_offhi = offhi;
        a.a_flags = flags;
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_putpages), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_getacl_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_getacl_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_getacl_desc = {
        VOP_GETACL_DESCOFFSET,
        "vop_getacl",
        0,
        vop_getacl_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_getacl_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_GETACL(struct vnode *vp,
    acl_type_t type,
    struct acl *aclp,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_getacl_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_getacl);
        a.a_vp = vp;
        a.a_type = type;
        a.a_aclp = aclp;
        a.a_cred = cred;
        error = vop_pre(vp, &mp, &mpsafe, FST_YES);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_getacl), &a));
        vop_post(vp, mp, mpsafe, FST_YES);
        return error;
}

const int vop_setacl_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_setacl_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_setacl_desc = {
        VOP_SETACL_DESCOFFSET,
        "vop_setacl",
        0,
        vop_setacl_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_setacl_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_SETACL(struct vnode *vp,
    acl_type_t type,
    struct acl *aclp,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_setacl_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_setacl);
        a.a_vp = vp;
        a.a_type = type;
        a.a_aclp = aclp;
        a.a_cred = cred;
        assert_vop_elocked(vp, "vop_setacl: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_setacl), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        vop_setacl_post(&a, error);
        return error;
}

const int vop_aclcheck_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_aclcheck_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_aclcheck_desc = {
        VOP_ACLCHECK_DESCOFFSET,
        "vop_aclcheck",
        0,
        vop_aclcheck_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_aclcheck_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_ACLCHECK(struct vnode *vp,
    acl_type_t type,
    struct acl *aclp,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_aclcheck_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_aclcheck);
        a.a_vp = vp;
        a.a_type = type;
        a.a_aclp = aclp;
        a.a_cred = cred;
        error = vop_pre(vp, &mp, &mpsafe, FST_YES);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_aclcheck), &a));
        vop_post(vp, mp, mpsafe, FST_YES);
        return error;
}

const int vop_closeextattr_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_closeextattr_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_closeextattr_desc = {
        VOP_CLOSEEXTATTR_DESCOFFSET,
        "vop_closeextattr",
        0,
        vop_closeextattr_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_closeextattr_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_CLOSEEXTATTR(struct vnode *vp,
    int commit,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_closeextattr_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_closeextattr);
        a.a_vp = vp;
        a.a_commit = commit;
        a.a_cred = cred;
        assert_vop_locked(vp, "vop_closeextattr: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_closeextattr), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_getextattr_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_getextattr_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_getextattr_desc = {
        VOP_GETEXTATTR_DESCOFFSET,
        "vop_getextattr",
        0,
        vop_getextattr_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_getextattr_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_GETEXTATTR(struct vnode *vp,
    int attrnamespace,
    const char *name,
    struct uio *uio,
    size_t *size,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_getextattr_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_getextattr);
        a.a_vp = vp;
        a.a_attrnamespace = attrnamespace;
        a.a_name = name;
        a.a_uio = uio;
        a.a_size = size;
        a.a_cred = cred;
        assert_vop_locked(vp, "vop_getextattr: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_getextattr), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_listextattr_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_listextattr_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_listextattr_desc = {
        VOP_LISTEXTATTR_DESCOFFSET,
        "vop_listextattr",
        0,
        vop_listextattr_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_listextattr_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_LISTEXTATTR(struct vnode *vp,
    int attrnamespace,
    struct uio *uio,
    size_t *size,
    int flag,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_listextattr_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_listextattr);
        a.a_vp = vp;
        a.a_attrnamespace = attrnamespace;
        a.a_uio = uio;
        a.a_size = size;
        a.a_flag = flag;
        a.a_cred = cred;
        assert_vop_locked(vp, "vop_listextattr: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_listextattr), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_openextattr_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_openextattr_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_openextattr_desc = {
        VOP_OPENEXTATTR_DESCOFFSET,
        "vop_openextattr",
        0,
        vop_openextattr_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_openextattr_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_OPENEXTATTR(struct vnode *vp,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_openextattr_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_openextattr);
        a.a_vp = vp;
        a.a_cred = cred;
        assert_vop_locked(vp, "vop_openextattr: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_openextattr), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_deleteextattr_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_deleteextattr_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_deleteextattr_desc = {
        VOP_DELETEEXTATTR_DESCOFFSET,
        "vop_deleteextattr",
        0,
        vop_deleteextattr_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_deleteextattr_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_DELETEEXTATTR(struct vnode *vp,
    int attrnamespace,
    const char *name,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_deleteextattr_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_deleteextattr);
        a.a_vp = vp;
        a.a_attrnamespace = attrnamespace;
        a.a_name = name;
        a.a_cred = cred;
        assert_vop_elocked(vp, "vop_deleteextattr: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_deleteextattr), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const int vop_setextattr_vp_offsets[] = {
        VOPARG_OFFSETOF(struct vop_setextattr_args,a_vp),
        VDESC_NO_OFFSET
};
const struct vnodeop_desc vop_setextattr_desc = {
        VOP_SETEXTATTR_DESCOFFSET,
        "vop_setextattr",
        0,
        vop_setextattr_vp_offsets,
        VDESC_NO_OFFSET,
        VOPARG_OFFSETOF(struct vop_setextattr_args, a_cred),
        VDESC_NO_OFFSET,
};
int
VOP_SETEXTATTR(struct vnode *vp,
    int attrnamespace,
    const char *name,
    struct uio *uio,
    kauth_cred_t cred)
{
        int error;
        bool mpsafe;
        struct vop_setextattr_args a;
        struct mount *mp;
        a.a_desc = VDESC(vop_setextattr);
        a.a_vp = vp;
        a.a_attrnamespace = attrnamespace;
        a.a_name = name;
        a.a_uio = uio;
        a.a_cred = cred;
        assert_vop_elocked(vp, "vop_setextattr: vp");
        error = vop_pre(vp, &mp, &mpsafe, FST_NO);
        if (error)
                return error;
        error = (VCALL(vp, VOFFSET(vop_setextattr), &a));
        vop_post(vp, mp, mpsafe, FST_NO);
        return error;
}

const struct vnodeop_desc * const vfs_op_descs[] = {
        &vop_default_desc,        /* MUST BE FIRST */

        &vop_bwrite_desc,
        &vop_parsepath_desc,
        &vop_lookup_desc,
        &vop_create_desc,
        &vop_mknod_desc,
        &vop_open_desc,
        &vop_close_desc,
        &vop_access_desc,
        &vop_accessx_desc,
        &vop_getattr_desc,
        &vop_setattr_desc,
        &vop_read_desc,
        &vop_write_desc,
        &vop_fallocate_desc,
        &vop_fdiscard_desc,
        &vop_ioctl_desc,
        &vop_fcntl_desc,
        &vop_poll_desc,
        &vop_kqfilter_desc,
        &vop_revoke_desc,
        &vop_mmap_desc,
        &vop_fsync_desc,
        &vop_seek_desc,
        &vop_remove_desc,
        &vop_link_desc,
        &vop_rename_desc,
        &vop_mkdir_desc,
        &vop_rmdir_desc,
        &vop_symlink_desc,
        &vop_readdir_desc,
        &vop_readlink_desc,
        &vop_abortop_desc,
        &vop_inactive_desc,
        &vop_reclaim_desc,
        &vop_lock_desc,
        &vop_unlock_desc,
        &vop_bmap_desc,
        &vop_strategy_desc,
        &vop_print_desc,
        &vop_islocked_desc,
        &vop_pathconf_desc,
        &vop_advlock_desc,
        &vop_whiteout_desc,
        &vop_getpages_desc,
        &vop_putpages_desc,
        &vop_getacl_desc,
        &vop_setacl_desc,
        &vop_aclcheck_desc,
        &vop_closeextattr_desc,
        &vop_getextattr_desc,
        &vop_listextattr_desc,
        &vop_openextattr_desc,
        &vop_deleteextattr_desc,
        &vop_setextattr_desc,
        NULL
};











































































































































































































































































































   10 



    9 

    7 
    9 
    7 
   10 
    7 






    7 
    7 

   10 
    8 
    1 
    7 
    3 

    4 

    6 
    3 
    4 











   10 
    5 
   10 
    5 
   10 
    6 
   10 
    5 
   10 
    8 


    3 
    5 






    5 





   10 

    6 


    6 









   10 


   10 
   10 
    4 

   10 
   10 







































    1 










    1 















    1 

    1 




















































    1 








    1 


    1 
































    1 


































































    5 







    6 

    4 

    4 

    4 


    4 
    4 








    4 



    4 

    4 

    4 
    4 









    4 
    2 

    3 






































































































































































































































    7 






    6 






    6 






    6 



    6 
    6 


























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
/*        $NetBSD: kern_ntptime.c,v 1.63 2022/03/13 12:57:33 riastradh Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 ***********************************************************************
 *                                                                       *
 * Copyright (c) David L. Mills 1993-2001                               *
 *                                                                       *
 * Permission to use, copy, modify, and distribute this software and   *
 * its documentation for any purpose and without fee is hereby               *
 * granted, provided that the above copyright notice appears in all    *
 * copies and that both the copyright notice and this permission       *
 * notice appear in supporting documentation, and that the name               *
 * University of Delaware not be used in advertising or publicity      *
 * pertaining to distribution of the software without specific,               *
 * written prior permission. The University of Delaware makes no       *
 * representations about the suitability this software for any               *
 * purpose. It is provided "as is" without express or implied               *
 * warranty.                                                               *
 *                                                                       *
 **********************************************************************/

/*
 * Adapted from the original sources for FreeBSD and timecounters by:
 * Poul-Henning Kamp <phk@FreeBSD.org>.
 *
 * The 32bit version of the "LP" macros seems a bit past its "sell by" 
 * date so I have retained only the 64bit version and included it directly
 * in this file.
 *
 * Only minor changes done to interface with the timecounters over in
 * sys/kern/kern_clock.c.   Some of the comments below may be (even more)
 * confusing and/or plain wrong in that context.
 */

#include <sys/cdefs.h>
/* __FBSDID("$FreeBSD: src/sys/kern/kern_ntptime.c,v 1.59 2005/05/28 14:34:41 rwatson Exp $"); */
__KERNEL_RCSID(0, "$NetBSD: kern_ntptime.c,v 1.63 2022/03/13 12:57:33 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_ntp.h"
#endif

#include <sys/param.h>
#include <sys/resourcevar.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/timex.h>
#include <sys/vnode.h>
#include <sys/kauth.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
#include <sys/cpu.h>

#include <compat/sys/timex.h>

/*
 * Single-precision macros for 64-bit machines
 */
typedef int64_t l_fp;
#define L_ADD(v, u)        ((v) += (u))
#define L_SUB(v, u)        ((v) -= (u))
#define L_ADDHI(v, a)        ((v) += (int64_t)(a) << 32)
#define L_NEG(v)        ((v) = -(v))
#define L_RSHIFT(v, n) \
        do { \
                if ((v) < 0) \
                        (v) = -(-(v) >> (n)); \
                else \
                        (v) = (v) >> (n); \
        } while (0)
#define L_MPY(v, a)        ((v) *= (a))
#define L_CLR(v)        ((v) = 0)
#define L_ISNEG(v)        ((v) < 0)
#define L_LINT(v, a)        ((v) = (int64_t)((uint64_t)(a) << 32))
#define L_GINT(v)        ((v) < 0 ? -(-(v) >> 32) : (v) >> 32)

#ifdef NTP
/*
 * Generic NTP kernel interface
 *
 * These routines constitute the Network Time Protocol (NTP) interfaces
 * for user and daemon application programs. The ntp_gettime() routine
 * provides the time, maximum error (synch distance) and estimated error
 * (dispersion) to client user application programs. The ntp_adjtime()
 * routine is used by the NTP daemon to adjust the system clock to an
 * externally derived time. The time offset and related variables set by
 * this routine are used by other routines in this module to adjust the
 * phase and frequency of the clock discipline loop which controls the
 * system clock.
 *
 * When the kernel time is reckoned directly in nanoseconds (NTP_NANO
 * defined), the time at each tick interrupt is derived directly from
 * the kernel time variable. When the kernel time is reckoned in
 * microseconds, (NTP_NANO undefined), the time is derived from the
 * kernel time variable together with a variable representing the
 * leftover nanoseconds at the last tick interrupt. In either case, the
 * current nanosecond time is reckoned from these values plus an
 * interpolated value derived by the clock routines in another
 * architecture-specific module. The interpolation can use either a
 * dedicated counter or a processor cycle counter (PCC) implemented in
 * some architectures.
 *
 * Note that all routines must run at priority splclock or higher.
 */
/*
 * Phase/frequency-lock loop (PLL/FLL) definitions
 *
 * The nanosecond clock discipline uses two variable types, time
 * variables and frequency variables. Both types are represented as 64-
 * bit fixed-point quantities with the decimal point between two 32-bit
 * halves. On a 32-bit machine, each half is represented as a single
 * word and mathematical operations are done using multiple-precision
 * arithmetic. On a 64-bit machine, ordinary computer arithmetic is
 * used.
 *
 * A time variable is a signed 64-bit fixed-point number in ns and
 * fraction. It represents the remaining time offset to be amortized
 * over succeeding tick interrupts. The maximum time offset is about
 * 0.5 s and the resolution is about 2.3e-10 ns.
 *
 *                        1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
 *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 * |s s s|                         ns                                   |
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 * |                            fraction                                   |
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *
 * A frequency variable is a signed 64-bit fixed-point number in ns/s
 * and fraction. It represents the ns and fraction to be added to the
 * kernel time variable at each second. The maximum frequency offset is
 * about +-500000 ns/s and the resolution is about 2.3e-10 ns/s.
 *
 *                        1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
 *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 * |s s s s s s s s s s s s s|                  ns/s                           |
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 * |                            fraction                                   |
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 */
/*
 * The following variables establish the state of the PLL/FLL and the
 * residual time and frequency offset of the local clock.
 */
#define SHIFT_PLL        4                /* PLL loop gain (shift) */
#define SHIFT_FLL        2                /* FLL loop gain (shift) */

static int time_state = TIME_OK;        /* clock state */
static int time_status = STA_UNSYNC;        /* clock status bits */
static long time_tai;                        /* TAI offset (s) */
static long time_monitor;                /* last time offset scaled (ns) */
static long time_constant;                /* poll interval (shift) (s) */
static long time_precision = 1;                /* clock precision (ns) */
static long time_maxerror = MAXPHASE / 1000; /* maximum error (us) */
static long time_esterror = MAXPHASE / 1000; /* estimated error (us) */
static time_t time_reftime;                /* time at last adjustment (s) */
static l_fp time_offset;                /* time offset (ns) */
static l_fp time_freq;                        /* frequency offset (ns/s) */
#endif /* NTP */

static l_fp time_adj;                        /* tick adjust (ns/s) */
int64_t time_adjtime;                /* correction from adjtime(2) (usec) */

extern int time_adjusted;        /* ntp might have changed the system time */

#ifdef NTP
#ifdef PPS_SYNC
/*
 * The following variables are used when a pulse-per-second (PPS) signal
 * is available and connected via a modem control lead. They establish
 * the engineering parameters of the clock discipline loop when
 * controlled by the PPS signal.
 */
#define PPS_FAVG        2                /* min freq avg interval (s) (shift) */
#define PPS_FAVGDEF        8                /* default freq avg int (s) (shift) */
#define PPS_FAVGMAX        15                /* max freq avg interval (s) (shift) */
#define PPS_PAVG        4                /* phase avg interval (s) (shift) */
#define PPS_VALID        120                /* PPS signal watchdog max (s) */
#define PPS_MAXWANDER        100000                /* max PPS wander (ns/s) */
#define PPS_POPCORN        2                /* popcorn spike threshold (shift) */

static struct timespec pps_tf[3];        /* phase median filter */
static l_fp pps_freq;                        /* scaled frequency offset (ns/s) */
static long pps_fcount;                        /* frequency accumulator */
static long pps_jitter;                        /* nominal jitter (ns) */
static long pps_stabil;                        /* nominal stability (scaled ns/s) */
static long pps_lastsec;                /* time at last calibration (s) */
static int pps_valid;                        /* signal watchdog counter */
static int pps_shift = PPS_FAVG;        /* interval duration (s) (shift) */
static int pps_shiftmax = PPS_FAVGDEF;        /* max interval duration (s) (shift) */
static int pps_intcnt;                        /* wander counter */

/*
 * PPS signal quality monitors
 */
static long pps_calcnt;                        /* calibration intervals */
static long pps_jitcnt;                        /* jitter limit exceeded */
static long pps_stbcnt;                        /* stability limit exceeded */
static long pps_errcnt;                        /* calibration errors */
#endif /* PPS_SYNC */
/*
 * End of phase/frequency-lock loop (PLL/FLL) definitions
 */

static void hardupdate(long offset);

/*
 * ntp_gettime() - NTP user application interface
 */
void
ntp_gettime(struct ntptimeval *ntv)
{
        memset(ntv, 0, sizeof(*ntv));

        mutex_spin_enter(&timecounter_lock);
        nanotime(&ntv->time);
        ntv->maxerror = time_maxerror;
        ntv->esterror = time_esterror;
        ntv->tai = time_tai;
        ntv->time_state = time_state;
        mutex_spin_exit(&timecounter_lock);
}

/* ARGSUSED */
/*
 * ntp_adjtime() - NTP daemon application interface
 */
int
sys_ntp_adjtime(struct lwp *l, const struct sys_ntp_adjtime_args *uap, register_t *retval)
{
        /* {
                syscallarg(struct timex *) tp;
        } */
        struct timex ntv;
        int error;

        error = copyin((void *)SCARG(uap, tp), (void *)&ntv, sizeof(ntv));
        if (error != 0)
                return (error);

        if (ntv.modes != 0 && (error = kauth_authorize_system(l->l_cred,
            KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_NTPADJTIME, NULL,
            NULL, NULL)) != 0)
                return (error);

        ntp_adjtime1(&ntv);

        error = copyout((void *)&ntv, (void *)SCARG(uap, tp), sizeof(ntv));
        if (!error)
                *retval = ntp_timestatus();

        return error;
}

void
ntp_adjtime1(struct timex *ntv)
{
        long freq;
        int modes;

        /*
         * Update selected clock variables - only the superuser can
         * change anything. Note that there is no error checking here on
         * the assumption the superuser should know what it is doing.
         * Note that either the time constant or TAI offset are loaded
         * from the ntv.constant member, depending on the mode bits. If
         * the STA_PLL bit in the status word is cleared, the state and
         * status words are reset to the initial values at boot.
         */
        mutex_spin_enter(&timecounter_lock);
        modes = ntv->modes;
        if (modes != 0)
                /* We need to save the system time during shutdown */
                time_adjusted |= 2;
        if (modes & MOD_MAXERROR)
                time_maxerror = ntv->maxerror;
        if (modes & MOD_ESTERROR)
                time_esterror = ntv->esterror;
        if (modes & MOD_STATUS) {
                if (time_status & STA_PLL && !(ntv->status & STA_PLL)) {
                        time_state = TIME_OK;
                        time_status = STA_UNSYNC;
#ifdef PPS_SYNC
                        pps_shift = PPS_FAVG;
#endif /* PPS_SYNC */
                }
                time_status &= STA_RONLY;
                time_status |= ntv->status & ~STA_RONLY;
        }
        if (modes & MOD_TIMECONST) {
                if (ntv->constant < 0)
                        time_constant = 0;
                else if (ntv->constant > MAXTC)
                        time_constant = MAXTC;
                else
                        time_constant = ntv->constant;
        }
        if (modes & MOD_TAI) {
                if (ntv->constant > 0)        /* XXX zero & negative numbers ? */
                        time_tai = ntv->constant;
        }
#ifdef PPS_SYNC
        if (modes & MOD_PPSMAX) {
                if (ntv->shift < PPS_FAVG)
                        pps_shiftmax = PPS_FAVG;
                else if (ntv->shift > PPS_FAVGMAX)
                        pps_shiftmax = PPS_FAVGMAX;
                else
                        pps_shiftmax = ntv->shift;
        }
#endif /* PPS_SYNC */
        if (modes & MOD_NANO)
                time_status |= STA_NANO;
        if (modes & MOD_MICRO)
                time_status &= ~STA_NANO;
        if (modes & MOD_CLKB)
                time_status |= STA_CLK;
        if (modes & MOD_CLKA)
                time_status &= ~STA_CLK;
        if (modes & MOD_FREQUENCY) {
                freq = MIN(INT32_MAX, MAX(INT32_MIN, ntv->freq));
                freq = (freq * (int64_t)1000) >> 16;
                if (freq > MAXFREQ)
                        L_LINT(time_freq, MAXFREQ);
                else if (freq < -MAXFREQ)
                        L_LINT(time_freq, -MAXFREQ);
                else {
                        /*
                         * ntv.freq is [PPM * 2^16] = [us/s * 2^16]
                         * time_freq is [ns/s * 2^32]
                         */
                        time_freq = ntv->freq * 1000LL * 65536LL;
                }
#ifdef PPS_SYNC
                pps_freq = time_freq;
#endif /* PPS_SYNC */
        }
        if (modes & MOD_OFFSET) {
                if (time_status & STA_NANO) {
                        hardupdate(ntv->offset);
                } else {
                        long offset = ntv->offset;
                        offset = MIN(offset, MAXPHASE/1000);
                        offset = MAX(offset, -MAXPHASE/1000);
                        hardupdate(offset * 1000);
                }
        }

        /*
         * Retrieve all clock variables. Note that the TAI offset is
         * returned only by ntp_gettime();
         */
        if (time_status & STA_NANO)
                ntv->offset = L_GINT(time_offset);
        else
                ntv->offset = L_GINT(time_offset) / 1000; /* XXX rounding ? */
        if (time_freq < 0)
                ntv->freq = L_GINT(-((-time_freq / 1000LL) << 16));
        else
                ntv->freq = L_GINT((time_freq / 1000LL) << 16);
        ntv->maxerror = time_maxerror;
        ntv->esterror = time_esterror;
        ntv->status = time_status;
        ntv->constant = time_constant;
        if (time_status & STA_NANO)
                ntv->precision = time_precision;
        else
                ntv->precision = time_precision / 1000;
        ntv->tolerance = MAXFREQ * SCALE_PPM;
#ifdef PPS_SYNC
        ntv->shift = pps_shift;
        ntv->ppsfreq = L_GINT((pps_freq / 1000LL) << 16);
        if (time_status & STA_NANO)
                ntv->jitter = pps_jitter;
        else
                ntv->jitter = pps_jitter / 1000;
        ntv->stabil = pps_stabil;
        ntv->calcnt = pps_calcnt;
        ntv->errcnt = pps_errcnt;
        ntv->jitcnt = pps_jitcnt;
        ntv->stbcnt = pps_stbcnt;
#endif /* PPS_SYNC */
        mutex_spin_exit(&timecounter_lock);
}
#endif /* NTP */

/*
 * second_overflow() - called after ntp_tick_adjust()
 *
 * This routine is ordinarily called immediately following the above
 * routine ntp_tick_adjust(). While these two routines are normally
 * combined, they are separated here only for the purposes of
 * simulation.
 */
void
ntp_update_second(int64_t *adjustment, time_t *newsec)
{
        int tickrate;
        l_fp ftemp;                /* 32/64-bit temporary */

        KASSERT(mutex_owned(&timecounter_lock));

#ifdef NTP

        /*
         * On rollover of the second both the nanosecond and microsecond
         * clocks are updated and the state machine cranked as
         * necessary. The phase adjustment to be used for the next
         * second is calculated and the maximum error is increased by
         * the tolerance.
         */
        time_maxerror += MAXFREQ / 1000;

        /*
         * Leap second processing. If in leap-insert state at
         * the end of the day, the system clock is set back one
         * second; if in leap-delete state, the system clock is
         * set ahead one second. The nano_time() routine or
         * external clock driver will insure that reported time
         * is always monotonic.
         */
        switch (time_state) {

                /*
                 * No warning.
                 */
                case TIME_OK:
                if (time_status & STA_INS)
                        time_state = TIME_INS;
                else if (time_status & STA_DEL)
                        time_state = TIME_DEL;
                break;

                /*
                 * Insert second 23:59:60 following second
                 * 23:59:59.
                 */
                case TIME_INS:
                if (!(time_status & STA_INS))
                        time_state = TIME_OK;
                else if ((*newsec) % 86400 == 0) {
                        (*newsec)--;
                        time_state = TIME_OOP;
                        time_tai++;
                }
                break;

                /*
                 * Delete second 23:59:59.
                 */
                case TIME_DEL:
                if (!(time_status & STA_DEL))
                        time_state = TIME_OK;
                else if (((*newsec) + 1) % 86400 == 0) {
                        (*newsec)++;
                        time_tai--;
                        time_state = TIME_WAIT;
                }
                break;

                /*
                 * Insert second in progress.
                 */
                case TIME_OOP:
                        time_state = TIME_WAIT;
                break;

                /*
                 * Wait for status bits to clear.
                 */
                case TIME_WAIT:
                if (!(time_status & (STA_INS | STA_DEL)))
                        time_state = TIME_OK;
        }

        /*
         * Compute the total time adjustment for the next second
         * in ns. The offset is reduced by a factor depending on
         * whether the PPS signal is operating. Note that the
         * value is in effect scaled by the clock frequency,
         * since the adjustment is added at each tick interrupt.
         */
        ftemp = time_offset;
#ifdef PPS_SYNC
        /* XXX even if PPS signal dies we should finish adjustment ? */
        if (time_status & STA_PPSTIME && time_status &
            STA_PPSSIGNAL)
                L_RSHIFT(ftemp, pps_shift);
        else
                L_RSHIFT(ftemp, SHIFT_PLL + time_constant);
#else
                L_RSHIFT(ftemp, SHIFT_PLL + time_constant);
#endif /* PPS_SYNC */
        time_adj = ftemp;
        L_SUB(time_offset, ftemp);
        L_ADD(time_adj, time_freq);
        
#ifdef PPS_SYNC
        if (pps_valid > 0)
                pps_valid--;
        else
                time_status &= ~STA_PPSSIGNAL;
#endif /* PPS_SYNC */
#else  /* !NTP */
        L_CLR(time_adj);
#endif /* !NTP */

        /*
         * Apply any correction from adjtime(2).  If more than one second
         * off we slew at a rate of 5ms/s (5000 PPM) else 500us/s (500PPM)
         * until the last second is slewed the final < 500 usecs.
         */
        if (time_adjtime != 0) {
                if (time_adjtime > 1000000)
                        tickrate = 5000;
                else if (time_adjtime < -1000000)
                        tickrate = -5000;
                else if (time_adjtime > 500)
                        tickrate = 500;
                else if (time_adjtime < -500)
                        tickrate = -500;
                else
                        tickrate = time_adjtime;
                time_adjtime -= tickrate;
                L_LINT(ftemp, tickrate * 1000);
                L_ADD(time_adj, ftemp);
        }
        *adjustment = time_adj;
}

/*
 * ntp_init() - initialize variables and structures
 *
 * This routine must be called after the kernel variables hz and tick
 * are set or changed and before the next tick interrupt. In this
 * particular implementation, these values are assumed set elsewhere in
 * the kernel. The design allows the clock frequency and tick interval
 * to be changed while the system is running. So, this routine should
 * probably be integrated with the code that does that.
 */
void
ntp_init(void)
{

        /*
         * The following variables are initialized only at startup. Only
         * those structures not cleared by the compiler need to be
         * initialized, and these only in the simulator. In the actual
         * kernel, any nonzero values here will quickly evaporate.
         */
        L_CLR(time_adj);
#ifdef NTP
        L_CLR(time_offset);
        L_CLR(time_freq);
#ifdef PPS_SYNC
        pps_tf[0].tv_sec = pps_tf[0].tv_nsec = 0;
        pps_tf[1].tv_sec = pps_tf[1].tv_nsec = 0;
        pps_tf[2].tv_sec = pps_tf[2].tv_nsec = 0;
        pps_fcount = 0;
        L_CLR(pps_freq);
#endif /* PPS_SYNC */
#endif
}

#ifdef NTP
/*
 * hardupdate() - local clock update
 *
 * This routine is called by ntp_adjtime() to update the local clock
 * phase and frequency. The implementation is of an adaptive-parameter,
 * hybrid phase/frequency-lock loop (PLL/FLL). The routine computes new
 * time and frequency offset estimates for each call. If the kernel PPS
 * discipline code is configured (PPS_SYNC), the PPS signal itself
 * determines the new time offset, instead of the calling argument.
 * Presumably, calls to ntp_adjtime() occur only when the caller
 * believes the local clock is valid within some bound (+-128 ms with
 * NTP). If the caller's time is far different than the PPS time, an
 * argument will ensue, and it's not clear who will lose.
 *
 * For uncompensated quartz crystal oscillators and nominal update
 * intervals less than 256 s, operation should be in phase-lock mode,
 * where the loop is disciplined to phase. For update intervals greater
 * than 1024 s, operation should be in frequency-lock mode, where the
 * loop is disciplined to frequency. Between 256 s and 1024 s, the mode
 * is selected by the STA_MODE status bit.
 *
 * Note: splclock() is in effect.
 */
void
hardupdate(long offset)
{
        long mtemp;
        l_fp ftemp;

        KASSERT(mutex_owned(&timecounter_lock));

        /*
         * Select how the phase is to be controlled and from which
         * source. If the PPS signal is present and enabled to
         * discipline the time, the PPS offset is used; otherwise, the
         * argument offset is used.
         */
        if (!(time_status & STA_PLL))
                return;
        if (!(time_status & STA_PPSTIME && time_status &
            STA_PPSSIGNAL)) {
                if (offset > MAXPHASE)
                        time_monitor = MAXPHASE;
                else if (offset < -MAXPHASE)
                        time_monitor = -MAXPHASE;
                else
                        time_monitor = offset;
                L_LINT(time_offset, time_monitor);
        }

        /*
         * Select how the frequency is to be controlled and in which
         * mode (PLL or FLL). If the PPS signal is present and enabled
         * to discipline the frequency, the PPS frequency is used;
         * otherwise, the argument offset is used to compute it.
         */
        if (time_status & STA_PPSFREQ && time_status & STA_PPSSIGNAL) {
                time_reftime = time_second;
                return;
        }
        if (time_status & STA_FREQHOLD || time_reftime == 0)
                time_reftime = time_second;
        mtemp = time_second - time_reftime;
        L_LINT(ftemp, time_monitor);
        L_RSHIFT(ftemp, (SHIFT_PLL + 2 + time_constant) << 1);
        L_MPY(ftemp, mtemp);
        L_ADD(time_freq, ftemp);
        time_status &= ~STA_MODE;
        if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp >
            MAXSEC)) {
                L_LINT(ftemp, (time_monitor << 4) / mtemp);
                L_RSHIFT(ftemp, SHIFT_FLL + 4);
                L_ADD(time_freq, ftemp);
                time_status |= STA_MODE;
        }
        time_reftime = time_second;
        if (L_GINT(time_freq) > MAXFREQ)
                L_LINT(time_freq, MAXFREQ);
        else if (L_GINT(time_freq) < -MAXFREQ)
                L_LINT(time_freq, -MAXFREQ);
}

#ifdef PPS_SYNC
/*
 * hardpps() - discipline CPU clock oscillator to external PPS signal
 *
 * This routine is called at each PPS interrupt in order to discipline
 * the CPU clock oscillator to the PPS signal. It measures the PPS phase
 * and leaves it in a handy spot for the hardclock() routine. It
 * integrates successive PPS phase differences and calculates the
 * frequency offset. This is used in hardclock() to discipline the CPU
 * clock oscillator so that intrinsic frequency error is cancelled out.
 * The code requires the caller to capture the time and hardware counter
 * value at the on-time PPS signal transition.
 *
 * Note that, on some Unix systems, this routine runs at an interrupt
 * priority level higher than the timer interrupt routine hardclock().
 * Therefore, the variables used are distinct from the hardclock()
 * variables, except for certain exceptions: The PPS frequency pps_freq
 * and phase pps_offset variables are determined by this routine and
 * updated atomically. The time_tolerance variable can be considered a
 * constant, since it is infrequently changed, and then only when the
 * PPS signal is disabled. The watchdog counter pps_valid is updated
 * once per second by hardclock() and is atomically cleared in this
 * routine.
 */
void
hardpps(struct timespec *tsp,                /* time at PPS */
        long nsec                        /* hardware counter at PPS */)
{
        long u_sec, u_nsec, v_nsec; /* temps */
        l_fp ftemp;

        KASSERT(mutex_owned(&timecounter_lock));

        /*
         * The signal is first processed by a range gate and frequency
         * discriminator. The range gate rejects noise spikes outside
         * the range +-500 us. The frequency discriminator rejects input
         * signals with apparent frequency outside the range 1 +-500
         * PPM. If two hits occur in the same second, we ignore the
         * later hit; if not and a hit occurs outside the range gate,
         * keep the later hit for later comparison, but do not process
         * it.
         */
        time_status |= STA_PPSSIGNAL | STA_PPSJITTER;
        time_status &= ~(STA_PPSWANDER | STA_PPSERROR);
        pps_valid = PPS_VALID;
        u_sec = tsp->tv_sec;
        u_nsec = tsp->tv_nsec;
        if (u_nsec >= (NANOSECOND >> 1)) {
                u_nsec -= NANOSECOND;
                u_sec++;
        }
        v_nsec = u_nsec - pps_tf[0].tv_nsec;
        if (u_sec == pps_tf[0].tv_sec && v_nsec < NANOSECOND -
            MAXFREQ)
                return;
        pps_tf[2] = pps_tf[1];
        pps_tf[1] = pps_tf[0];
        pps_tf[0].tv_sec = u_sec;
        pps_tf[0].tv_nsec = u_nsec;

        /*
         * Compute the difference between the current and previous
         * counter values. If the difference exceeds 0.5 s, assume it
         * has wrapped around, so correct 1.0 s. If the result exceeds
         * the tick interval, the sample point has crossed a tick
         * boundary during the last second, so correct the tick. Very
         * intricate.
         */
        u_nsec = nsec;
        if (u_nsec > (NANOSECOND >> 1))
                u_nsec -= NANOSECOND;
        else if (u_nsec < -(NANOSECOND >> 1))
                u_nsec += NANOSECOND;
        pps_fcount += u_nsec;
        if (v_nsec > MAXFREQ || v_nsec < -MAXFREQ)
                return;
        time_status &= ~STA_PPSJITTER;

        /*
         * A three-stage median filter is used to help denoise the PPS
         * time. The median sample becomes the time offset estimate; the
         * difference between the other two samples becomes the time
         * dispersion (jitter) estimate.
         */
        if (pps_tf[0].tv_nsec > pps_tf[1].tv_nsec) {
                if (pps_tf[1].tv_nsec > pps_tf[2].tv_nsec) {
                        v_nsec = pps_tf[1].tv_nsec;        /* 0 1 2 */
                        u_nsec = pps_tf[0].tv_nsec - pps_tf[2].tv_nsec;
                } else if (pps_tf[2].tv_nsec > pps_tf[0].tv_nsec) {
                        v_nsec = pps_tf[0].tv_nsec;        /* 2 0 1 */
                        u_nsec = pps_tf[2].tv_nsec - pps_tf[1].tv_nsec;
                } else {
                        v_nsec = pps_tf[2].tv_nsec;        /* 0 2 1 */
                        u_nsec = pps_tf[0].tv_nsec - pps_tf[1].tv_nsec;
                }
        } else {
                if (pps_tf[1].tv_nsec < pps_tf[2].tv_nsec) {
                        v_nsec = pps_tf[1].tv_nsec;        /* 2 1 0 */
                        u_nsec = pps_tf[2].tv_nsec - pps_tf[0].tv_nsec;
                } else if (pps_tf[2].tv_nsec < pps_tf[0].tv_nsec) {
                        v_nsec = pps_tf[0].tv_nsec;        /* 1 0 2 */
                        u_nsec = pps_tf[1].tv_nsec - pps_tf[2].tv_nsec;
                } else {
                        v_nsec = pps_tf[2].tv_nsec;        /* 1 2 0 */
                        u_nsec = pps_tf[1].tv_nsec - pps_tf[0].tv_nsec;
                }
        }

        /*
         * Nominal jitter is due to PPS signal noise and interrupt
         * latency. If it exceeds the popcorn threshold, the sample is
         * discarded. otherwise, if so enabled, the time offset is
         * updated. We can tolerate a modest loss of data here without
         * much degrading time accuracy.
         */
        if (u_nsec > (pps_jitter << PPS_POPCORN)) {
                time_status |= STA_PPSJITTER;
                pps_jitcnt++;
        } else if (time_status & STA_PPSTIME) {
                time_monitor = -v_nsec;
                L_LINT(time_offset, time_monitor);
        }
        pps_jitter += (u_nsec - pps_jitter) >> PPS_FAVG;
        u_sec = pps_tf[0].tv_sec - pps_lastsec;
        if (u_sec < (1 << pps_shift))
                return;

        /*
         * At the end of the calibration interval the difference between
         * the first and last counter values becomes the scaled
         * frequency. It will later be divided by the length of the
         * interval to determine the frequency update. If the frequency
         * exceeds a sanity threshold, or if the actual calibration
         * interval is not equal to the expected length, the data are
         * discarded. We can tolerate a modest loss of data here without
         * much degrading frequency accuracy.
         */
        pps_calcnt++;
        v_nsec = -pps_fcount;
        pps_lastsec = pps_tf[0].tv_sec;
        pps_fcount = 0;
        u_nsec = MAXFREQ << pps_shift;
        if (v_nsec > u_nsec || v_nsec < -u_nsec || u_sec != (1 <<
            pps_shift)) {
                time_status |= STA_PPSERROR;
                pps_errcnt++;
                return;
        }

        /*
         * Here the raw frequency offset and wander (stability) is
         * calculated. If the wander is less than the wander threshold
         * for four consecutive averaging intervals, the interval is
         * doubled; if it is greater than the threshold for four
         * consecutive intervals, the interval is halved. The scaled
         * frequency offset is converted to frequency offset. The
         * stability metric is calculated as the average of recent
         * frequency changes, but is used only for performance
         * monitoring.
         */
        L_LINT(ftemp, v_nsec);
        L_RSHIFT(ftemp, pps_shift);
        L_SUB(ftemp, pps_freq);
        u_nsec = L_GINT(ftemp);
        if (u_nsec > PPS_MAXWANDER) {
                L_LINT(ftemp, PPS_MAXWANDER);
                pps_intcnt--;
                time_status |= STA_PPSWANDER;
                pps_stbcnt++;
        } else if (u_nsec < -PPS_MAXWANDER) {
                L_LINT(ftemp, -PPS_MAXWANDER);
                pps_intcnt--;
                time_status |= STA_PPSWANDER;
                pps_stbcnt++;
        } else {
                pps_intcnt++;
        }
        if (pps_intcnt >= 4) {
                pps_intcnt = 4;
                if (pps_shift < pps_shiftmax) {
                        pps_shift++;
                        pps_intcnt = 0;
                }
        } else if (pps_intcnt <= -4 || pps_shift > pps_shiftmax) {
                pps_intcnt = -4;
                if (pps_shift > PPS_FAVG) {
                        pps_shift--;
                        pps_intcnt = 0;
                }
        }
        if (u_nsec < 0)
                u_nsec = -u_nsec;
        pps_stabil += (u_nsec * SCALE_PPM - pps_stabil) >> PPS_FAVG;

        /*
         * The PPS frequency is recalculated and clamped to the maximum
         * MAXFREQ. If enabled, the system clock frequency is updated as
         * well.
         */
        L_ADD(pps_freq, ftemp);
        u_nsec = L_GINT(pps_freq);
        if (u_nsec > MAXFREQ)
                L_LINT(pps_freq, MAXFREQ);
        else if (u_nsec < -MAXFREQ)
                L_LINT(pps_freq, -MAXFREQ);
        if (time_status & STA_PPSFREQ)
                time_freq = pps_freq;
}
#endif /* PPS_SYNC */
#endif /* NTP */

#ifdef NTP
int
ntp_timestatus(void)
{
        int rv;

        /*
         * Status word error decode. If any of these conditions
         * occur, an error is returned, instead of the status
         * word. Most applications will care only about the fact
         * the system clock may not be trusted, not about the
         * details.
         *
         * Hardware or software error
         */
        mutex_spin_enter(&timecounter_lock);
        if ((time_status & (STA_UNSYNC | STA_CLOCKERR)) ||

        /*
         * PPS signal lost when either time or frequency
         * synchronization requested
         */
            (time_status & (STA_PPSFREQ | STA_PPSTIME) &&
             !(time_status & STA_PPSSIGNAL)) ||

        /*
         * PPS jitter exceeded when time synchronization
         * requested
         */
            (time_status & STA_PPSTIME &&
             time_status & STA_PPSJITTER) ||

        /*
         * PPS wander exceeded or calibration error when
         * frequency synchronization requested
         */
            (time_status & STA_PPSFREQ &&
             time_status & (STA_PPSWANDER | STA_PPSERROR)))
                rv = TIME_ERROR;
        else
                rv = time_state;
        mutex_spin_exit(&timecounter_lock);

        return rv;
}

/*ARGSUSED*/
/*
 * ntp_gettime() - NTP user application interface
 */
int
sys___ntp_gettime50(struct lwp *l, const struct sys___ntp_gettime50_args *uap, register_t *retval)
{
        /* {
                syscallarg(struct ntptimeval *) ntvp;
        } */
        struct ntptimeval ntv;
        int error = 0;

        if (SCARG(uap, ntvp)) {
                ntp_gettime(&ntv);

                error = copyout((void *)&ntv, (void *)SCARG(uap, ntvp),
                                sizeof(ntv));
        }
        if (!error) {
                *retval = ntp_timestatus();
        }
        return(error);
}

/*
 * return information about kernel precision timekeeping
 */
static int
sysctl_kern_ntptime(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        struct ntptimeval ntv;

        ntp_gettime(&ntv);

        node = *rnode;
        node.sysctl_data = &ntv;
        node.sysctl_size = sizeof(ntv);
        return (sysctl_lookup(SYSCTLFN_CALL(&node)));
}

SYSCTL_SETUP(sysctl_kern_ntptime_setup, "sysctl kern.ntptime node setup")
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "ntptime",
                       SYSCTL_DESCR("Kernel clock values for NTP"),
                       sysctl_kern_ntptime, 0, NULL,
                       sizeof(struct ntptimeval),
                       CTL_KERN, KERN_NTPTIME, CTL_EOL);
}
#endif /* !NTP */




















































 4205 



 4206 












 4668 



















 4655 


 4187 








  415 


  383 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
/*        $NetBSD: sys_syscall.c,v 1.15 2022/06/29 16:33:09 hannken Exp $        */

/*-
 * Copyright (c) 2006 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by David Laight.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_syscall.c,v 1.15 2022/06/29 16:33:09 hannken Exp $");

#include <sys/syscall_stats.h>
#include <sys/syscallvar.h>

/*
 * MI indirect system call support.
 * Included from sys_indirect.c and compat/netbsd32/netbsd32_indirect.c
 *
 * SYS_SYSCALL is set to the required function name.
 */

#define CONCAT(a,b) __CONCAT(a,b)

static void
CONCAT(SYS_SYSCALL, _biglockcheck)(struct proc *p, int code)
{

#ifdef DIAGNOSTIC
       kpreempt_disable();     /* make curcpu() stable */
       KASSERTMSG(curcpu()->ci_biglock_count == 0,
           "syscall %ld of emul %s leaked %d kernel locks",
           (long)code, p->p_emul->e_name, curcpu()->ci_biglock_count);
       kpreempt_enable();
#endif
}

int
SYS_SYSCALL(struct lwp *l, const struct CONCAT(SYS_SYSCALL, _args) *uap,
    register_t *rval)
{
        /* {
                syscallarg(int) code;
                syscallarg(register_t) args[SYS_MAXSYSARGS];
        } */
        const struct sysent *callp;
        struct proc *p = l->l_proc;
        int code;
        int error;
#ifdef NETBSD32_SYSCALL
        register_t args64[SYS_MAXSYSARGS];
        int i, narg;
        #define TRACE_ARGS args64
#else
        #define TRACE_ARGS &SCARG(uap, args[0])
#endif

        callp = p->p_emul->e_sysent;

        code = SCARG(uap, code) & (SYS_NSYSENT - 1);
        SYSCALL_COUNT(syscall_counts, code);
        callp += code;

        if (__predict_false(callp->sy_flags & SYCALL_INDIRECT))
                return ENOSYS;

        if (__predict_true(!p->p_trace_enabled)) {
                error = sy_call(callp, l, &uap->args, rval);
                CONCAT(SYS_SYSCALL, _biglockcheck)(p, code);
                return error;
        }

#ifdef NETBSD32_SYSCALL
        narg = callp->sy_narg;
        for (i = 0; i < narg; i++)
                args64[i] = SCARG(uap, args[i]);
#endif

        error = trace_enter(code, callp, TRACE_ARGS);
        if (__predict_true(error == 0))
                error = sy_call(callp, l, &uap->args, rval);
        trace_exit(code, callp, &uap->args, rval, error);
        CONCAT(SYS_SYSCALL, _biglockcheck)(p, code);
        return error;

        #undef TRACE_ARGS
}







































































































































































































































   63 











   63 




   62 













   63 
    1 
   63 


   63 






    1 
    1 
    1 

   63 







    1 







   63 







   63 
   63 

   63 
   63 

    1 








   63 
    3 













   63 
















   18 





   17 
    3 




   18 






    5 







   18 





   13 


   17 

   10 
   10 
   18 



















   11 


    8 
    7 
    3 
   10 

   10 
    1 

   11 












    1 



   10 














    7 







    7 
    7 











   11 








    1 



    1 






   11 















   10 







   10 
   10 
    2 


















    4 








    4 






















    5 







    5 









   11 























































    1 









    1 





    1 





















































    3 


















    3 


    3 

    3 

































































































    5 



    5 


    5 
    5 




    5 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
/*        $NetBSD: layer_vnops.c,v 1.72 2021/10/20 03:08:18 thorpej Exp $        */

/*
 * Copyright (c) 1999 National Aeronautics & Space Administration
 * All rights reserved.
 *
 * This software was written by William Studenmund of the
 * Numerical Aerospace Simulation Facility, NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the National Aeronautics & Space Administration
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this software without specific prior written
 *    permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NATIONAL AERONAUTICS & SPACE ADMINISTRATION
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE ADMINISTRATION OR CONTRIB-
 * UTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1992, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * John Heidemann of the UCLA Ficus project.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)null_vnops.c        8.6 (Berkeley) 5/27/95
 *
 * Ancestors:
 *        @(#)lofs_vnops.c        1.2 (Berkeley) 6/18/92
 *        Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp
 *        ...and...
 *        @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
 */

/*
 * Generic layer vnode operations.
 *
 * The layer.h, layer_extern.h, layer_vfs.c, and layer_vnops.c files provide
 * the core implementation of stacked file-systems.
 *
 * The layerfs duplicates a portion of the file system name space under
 * a new name.  In this respect, it is similar to the loopback file system.
 * It differs from the loopback fs in two respects: it is implemented using
 * a stackable layers technique, and it is "layerfs-nodes" stack above all
 * lower-layer vnodes, not just over directory vnodes.
 *
 * OPERATION OF LAYERFS
 *
 * The layerfs is the minimum file system layer, bypassing all possible
 * operations to the lower layer for processing there.  The majority of its
 * activity centers on the bypass routine, through which nearly all vnode
 * operations pass.
 *
 * The bypass routine accepts arbitrary vnode operations for handling by
 * the lower layer.  It begins by examining vnode operation arguments and
 * replacing any layered nodes by their lower-layer equivalents.  It then
 * invokes an operation on the lower layer.  Finally, it replaces the
 * layered nodes in the arguments and, if a vnode is returned by the
 * operation, stacks a layered node on top of the returned vnode.
 *
 * The bypass routine in this file, layer_bypass(), is suitable for use
 * by many different layered filesystems. It can be used by multiple
 * filesystems simultaneously. Alternatively, a layered fs may provide
 * its own bypass routine, in which case layer_bypass() should be used as
 * a model. For instance, the main functionality provided by umapfs, the user
 * identity mapping file system, is handled by a custom bypass routine.
 *
 * Typically a layered fs registers its selected bypass routine as the
 * default vnode operation in its vnodeopv_entry_desc table. Additionally
 * the filesystem must store the bypass entry point in the layerm_bypass
 * field of struct layer_mount. All other layer routines in this file will
 * use the layerm_bypass() routine.
 *
 * Although the bypass routine handles most operations outright, a number
 * of operations are special cased and handled by the layerfs.  For instance,
 * layer_getattr() must change the fsid being returned.  While layer_lock()
 * and layer_unlock() must handle any locking for the current vnode as well
 * as pass the lock request down.  layer_inactive() and layer_reclaim() are
 * not bypassed so that they can handle freeing layerfs-specific data.  Also,
 * certain vnode operations (create, mknod, remove, link, rename, mkdir,
 * rmdir, and symlink) change the locking state within the operation.  Ideally
 * these operations should not change the lock state, but should be changed
 * to let the caller of the function unlock them.  Otherwise, all intermediate
 * vnode layers (such as union, umapfs, etc) must catch these functions to do
 * the necessary locking at their layer.
 *
 * INSTANTIATING VNODE STACKS
 *
 * Mounting associates "layerfs-nodes" stack and lower layer, in effect
 * stacking two VFSes.  The initial mount creates a single vnode stack for
 * the root of the new layerfs.  All other vnode stacks are created as a
 * result of vnode operations on this or other layerfs vnode stacks.
 *
 * New vnode stacks come into existence as a result of an operation which
 * returns a vnode.  The bypass routine stacks a layerfs-node above the new
 * vnode before returning it to the caller.
 *
 * For example, imagine mounting a null layer with:
 *
 *        "mount_null /usr/include /dev/layer/null"
 *
 * Changing directory to /dev/layer/null will assign the root layerfs-node,
 * which was created when the null layer was mounted).  Now consider opening
 * "sys".  A layer_lookup() would be performed on the root layerfs-node.
 * This operation would bypass through to the lower layer which would return
 * a vnode representing the UFS "sys".  Then, layer_bypass() builds a
 * layerfs-node aliasing the UFS "sys" and returns this to the caller.
 * Later operations on the layerfs-node "sys" will repeat this process when
 * constructing other vnode stacks.
 *
 * INVOKING OPERATIONS ON LOWER LAYERS
 *
 * There are two techniques to invoke operations on a lower layer when the
 * operation cannot be completely bypassed.  Each method is appropriate in
 * different situations.  In both cases, it is the responsibility of the
 * aliasing layer to make the operation arguments "correct" for the lower
 * layer by mapping any vnode arguments to the lower layer.
 *
 * The first approach is to call the aliasing layer's bypass routine.  This
 * method is most suitable when you wish to invoke the operation currently
 * being handled on the lower layer.  It has the advantage that the bypass
 * routine already must do argument mapping.  An example of this is
 * layer_getattr().
 *
 * A second approach is to directly invoke vnode operations on the lower
 * layer with the VOP_OPERATIONNAME interface.  The advantage of this method
 * is that it is easy to invoke arbitrary operations on the lower layer.
 * The disadvantage is that vnode's arguments must be manually mapped.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: layer_vnops.c,v 1.72 2021/10/20 03:08:18 thorpej Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/time.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/kmem.h>
#include <sys/buf.h>
#include <sys/kauth.h>
#include <sys/fcntl.h>
#include <sys/fstrans.h>

#include <miscfs/genfs/layer.h>
#include <miscfs/genfs/layer_extern.h>
#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>

/*
 * This is the 08-June-99 bypass routine, based on the 10-Apr-92 bypass
 *                routine by John Heidemann.
 *        The new element for this version is that the whole nullfs
 * system gained the concept of locks on the lower node.
 *    The 10-Apr-92 version was optimized for speed, throwing away some
 * safety checks.  It should still always work, but it's not as
 * robust to programmer errors.
 *
 * In general, we map all vnodes going down and unmap them on the way back.
 *
 * Also, some BSD vnode operations have the side effect of vrele'ing
 * their arguments.  With stacking, the reference counts are held
 * by the upper node, not the lower one, so we must handle these
 * side-effects here.  This is not of concern in Sun-derived systems
 * since there are no such side-effects.
 *
 * New for the 08-June-99 version: we also handle operations which unlock
 * the passed-in node (typically they vput the node).
 *
 * This makes the following assumptions:
 * - only one returned vpp
 * - no INOUT vpp's (Sun's vop_open has one of these)
 * - the vnode operation vector of the first vnode should be used
 *   to determine what implementation of the op should be invoked
 * - all mapped vnodes are of our vnode-type (NEEDSWORK:
 *   problems on rmdir'ing mount points and renaming?)
 */
int
layer_bypass(void *v)
{
        struct vop_generic_args /* {
                struct vnodeop_desc *a_desc;
                <other random data follows, presumably>
        } */ *ap = v;
        int (**our_vnodeop_p)(void *);
        struct vnode **this_vp_p;
        int error;
        struct vnode *old_vps[VDESC_MAX_VPS], *vp0;
        struct vnode **vps_p[VDESC_MAX_VPS];
        struct vnode ***vppp;
        struct mount *mp;
        struct vnodeop_desc *descp = ap->a_desc;
        int reles, i, flags;

#ifdef DIAGNOSTIC
        /*
         * We require at least one vp.
         */
        if (descp->vdesc_vp_offsets == NULL ||
            descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
                panic("%s: no vp's in map.\n", __func__);
#endif

        vps_p[0] =
            VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[0], ap);
        vp0 = *vps_p[0];
        mp = vp0->v_mount;
        flags = MOUNTTOLAYERMOUNT(mp)->layerm_flags;
        our_vnodeop_p = vp0->v_op;

        if (flags & LAYERFS_MBYPASSDEBUG)
                printf("%s: %s\n", __func__, descp->vdesc_name);

        /*
         * Map the vnodes going in.
         * Later, we'll invoke the operation based on
         * the first mapped vnode's operation vector.
         */
        reles = descp->vdesc_flags;
        for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
                if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
                        break;   /* bail out at end of list */
                vps_p[i] = this_vp_p =
                    VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[i],
                    ap);
                /*
                 * We're not guaranteed that any but the first vnode
                 * are of our type.  Check for and don't map any
                 * that aren't.  (We must always map first vp or vclean fails.)
                 */
                if (i && (*this_vp_p == NULL ||
                    (*this_vp_p)->v_op != our_vnodeop_p)) {
                        old_vps[i] = NULL;
                } else {
                        old_vps[i] = *this_vp_p;
                        *(vps_p[i]) = LAYERVPTOLOWERVP(*this_vp_p);
                        /*
                         * XXX - Several operations have the side effect
                         * of vrele'ing their vp's.  We must account for
                         * that.  (This should go away in the future.)
                         */
                        if (reles & VDESC_VP0_WILLRELE)
                                vref(*this_vp_p);
                }
        }

        /*
         * Call the operation on the lower layer
         * with the modified argument structure.
         */
        error = VCALL(*vps_p[0], descp->vdesc_offset, ap);

        /*
         * Maintain the illusion of call-by-value
         * by restoring vnodes in the argument structure
         * to their original value.
         */
        reles = descp->vdesc_flags;
        for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
                if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
                        break;   /* bail out at end of list */
                if (old_vps[i]) {
                        *(vps_p[i]) = old_vps[i];
                        if (reles & VDESC_VP0_WILLRELE)
                                vrele(*(vps_p[i]));
                }
        }

        /*
         * Map the possible out-going vpp
         * (Assumes that the lower layer always returns
         * a VREF'ed vpp unless it gets an error.)
         */
        if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && !error) {
                vppp = VOPARG_OFFSETTO(struct vnode***,
                                 descp->vdesc_vpp_offset, ap);
                /*
                 * Only vop_lookup, vop_create, vop_makedir, vop_mknod
                 * and vop_symlink return vpp's. vop_lookup doesn't call bypass
                 * as a lookup on "." would generate a locking error.
                 * So all the calls which get us here have a unlocked vpp. :-)
                 */
                error = layer_node_create(mp, **vppp, *vppp);
                if (error) {
                        vrele(**vppp);
                        **vppp = NULL;
                }
        }
        return error;
}

/*
 * We have to carry on the locking protocol on the layer vnodes
 * as we progress through the tree. We also have to enforce read-only
 * if this layer is mounted read-only.
 */
int
layer_lookup(void *v)
{
        struct vop_lookup_v2_args /* {
                struct vnodeop_desc *a_desc;
                struct vnode * a_dvp;
                struct vnode ** a_vpp;
                struct componentname * a_cnp;
        } */ *ap = v;
        struct componentname *cnp = ap->a_cnp;
        struct vnode *dvp, *lvp, *ldvp;
        int error, flags = cnp->cn_flags;

        dvp = ap->a_dvp;

        if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
            (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
                *ap->a_vpp = NULL;
                return EROFS;
        }

        ldvp = LAYERVPTOLOWERVP(dvp);
        ap->a_dvp = ldvp;
        error = VCALL(ldvp, ap->a_desc->vdesc_offset, ap);
        lvp = *ap->a_vpp;
        *ap->a_vpp = NULL;

        if (error == EJUSTRETURN && (flags & ISLASTCN) &&
            (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
            (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME))
                error = EROFS;

        /*
         * We must do the same locking and unlocking at this layer as
         * is done in the layers below us.
         */
        if (ldvp == lvp) {
                /*
                 * Got the same object back, because we looked up ".",
                 * or ".." in the root node of a mount point.
                 * So we make another reference to dvp and return it.
                 */
                vref(dvp);
                *ap->a_vpp = dvp;
                vrele(lvp);
        } else if (lvp != NULL) {
                /* Note: dvp and ldvp are both locked. */
                KASSERT(error != ENOLCK);
                error = layer_node_create(dvp->v_mount, lvp, ap->a_vpp);
                if (error) {
                        vrele(lvp);
                }
        }
        return error;
}

/*
 * Setattr call. Disallow write attempts if the layer is mounted read-only.
 */
int
layer_setattr(void *v)
{
        struct vop_setattr_args /* {
                struct vnodeop_desc *a_desc;
                struct vnode *a_vp;
                struct vattr *a_vap;
                kauth_cred_t a_cred;
                struct lwp *a_l;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct vattr *vap = ap->a_vap;

          if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
            vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
            vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
            (vp->v_mount->mnt_flag & MNT_RDONLY))
                return EROFS;
        if (vap->va_size != VNOVAL) {
                 switch (vp->v_type) {
                 case VDIR:
                         return EISDIR;
                 case VCHR:
                 case VBLK:
                 case VSOCK:
                 case VFIFO:
                        return 0;
                case VREG:
                case VLNK:
                 default:
                        /*
                         * Disallow write attempts if the filesystem is
                         * mounted read-only.
                         */
                        if (vp->v_mount->mnt_flag & MNT_RDONLY)
                                return EROFS;
                }
        }
        return LAYERFS_DO_BYPASS(vp, ap);
}

/*
 *  We handle getattr only to change the fsid.
 */
int
layer_getattr(void *v)
{
        struct vop_getattr_args /* {
                struct vnode *a_vp;
                struct vattr *a_vap;
                kauth_cred_t a_cred;
                struct lwp *a_l;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        int error;

        error = LAYERFS_DO_BYPASS(vp, ap);
        if (error) {
                return error;
        }
        /* Requires that arguments be restored. */
        ap->a_vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
        return 0;
}

int
layer_access(void *v)
{
        struct vop_access_args /* {
                struct vnode *a_vp;
                accmode_t  a_accmode;
                kauth_cred_t a_cred;
                struct lwp *a_l;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        accmode_t accmode = ap->a_accmode;

        /*
         * Disallow write attempts on read-only layers;
         * unless the file is a socket, fifo, or a block or
         * character device resident on the file system.
         */
        if (accmode & VWRITE) {
                switch (vp->v_type) {
                case VDIR:
                case VLNK:
                case VREG:
                        if (vp->v_mount->mnt_flag & MNT_RDONLY)
                                return EROFS;
                        break;
                default:
                        break;
                }
        }
        return LAYERFS_DO_BYPASS(vp, ap);
}

/*
 * We must handle open to be able to catch MNT_NODEV and friends
 * and increment the lower v_writecount.
 */
int
layer_open(void *v)
{
        struct vop_open_args /* {
                const struct vnodeop_desc *a_desc;
                struct vnode *a_vp;
                int a_mode;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct vnode *lvp = LAYERVPTOLOWERVP(vp);
        int error;

        if (((lvp->v_type == VBLK) || (lvp->v_type == VCHR)) &&
            (vp->v_mount->mnt_flag & MNT_NODEV))
                return ENXIO;

        error = LAYERFS_DO_BYPASS(vp, ap);
        if (error == 0 && (ap->a_mode & FWRITE)) {
                mutex_enter(lvp->v_interlock);
                lvp->v_writecount++;
                mutex_exit(lvp->v_interlock);
        }
        return error;
}

/*
 * We must handle close to decrement the lower v_writecount.
 */
int
layer_close(void *v)
{
        struct vop_close_args /* {
                const struct vnodeop_desc *a_desc;
                struct vnode *a_vp;
                int a_fflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct vnode *lvp = LAYERVPTOLOWERVP(vp);

        if ((ap->a_fflag & FWRITE)) {
                mutex_enter(lvp->v_interlock);
                KASSERT(lvp->v_writecount > 0);
                lvp->v_writecount--;
                mutex_exit(lvp->v_interlock);
        }
        return LAYERFS_DO_BYPASS(vp, ap);
}

/*
 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't bother
 * syncing the underlying vnodes, since they'll be fsync'ed when
 * reclaimed; otherwise, pass it through to the underlying layer.
 *
 * XXX Do we still need to worry about shallow fsync?
 */
int
layer_fsync(void *v)
{
        struct vop_fsync_args /* {
                struct vnode *a_vp;
                kauth_cred_t a_cred;
                int  a_flags;
                off_t offlo;
                off_t offhi;
                struct lwp *a_l;
        } */ *ap = v;
        int error;

        if (ap->a_flags & FSYNC_RECLAIM) {
                return 0;
        }
        if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
                error = spec_fsync(v);
                if (error)
                        return error;
        }
        return LAYERFS_DO_BYPASS(ap->a_vp, ap);
}

int
layer_inactive(void *v)
{
        struct vop_inactive_v2_args /* {
                struct vnode *a_vp;
                bool *a_recycle;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;

        /*
         * If we did a remove, don't cache the node.
         */
        *ap->a_recycle = ((VTOLAYER(vp)->layer_flags & LAYERFS_REMOVED) != 0);

        /*
         * Do nothing (and _don't_ bypass).
         * Wait to vrele lowervp until reclaim,
         * so that until then our layer_node is in the
         * cache and reusable.
         *
         * NEEDSWORK: Someday, consider inactive'ing
         * the lowervp and then trying to reactivate it
         * with capabilities (v_id)
         * like they do in the name lookup cache code.
         * That's too much work for now.
         */

        return 0;
}

int
layer_remove(void *v)
{
        struct vop_remove_v3_args /* {
                struct vnode                *a_dvp;
                struct vnode                *a_vp;
                struct componentname        *a_cnp;
                nlink_t                         ctx_vp_new_nlink;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        int error;

        vref(vp);
        error = LAYERFS_DO_BYPASS(vp, ap);
        if (error == 0) {
                VTOLAYER(vp)->layer_flags |= LAYERFS_REMOVED;
        }
        vrele(vp);

        return error;
}

int
layer_rename(void *v)
{
        struct vop_rename_args /* {
                struct vnode                *a_fdvp;
                struct vnode                *a_fvp;
                struct componentname        *a_fcnp;
                struct vnode                *a_tdvp;
                struct vnode                *a_tvp;
                struct componentname        *a_tcnp;
        } */ *ap = v;
        struct vnode *fdvp = ap->a_fdvp, *tvp;
        int error;

        tvp = ap->a_tvp;
        if (tvp) {
                if (tvp->v_mount != fdvp->v_mount)
                        tvp = NULL;
                else
                        vref(tvp);
        }
        error = LAYERFS_DO_BYPASS(fdvp, ap);
        if (tvp) {
                if (error == 0)
                        VTOLAYER(tvp)->layer_flags |= LAYERFS_REMOVED;
                vrele(tvp);
        }
        return error;
}

int
layer_rmdir(void *v)
{
        struct vop_rmdir_v2_args /* {
                struct vnode                *a_dvp;
                struct vnode                *a_vp;
                struct componentname        *a_cnp;
        } */ *ap = v;
        int                error;
        struct vnode        *vp = ap->a_vp;

        vref(vp);
        error = LAYERFS_DO_BYPASS(vp, ap);
        if (error == 0) {
                VTOLAYER(vp)->layer_flags |= LAYERFS_REMOVED;
        }
        vrele(vp);

        return error;
}

int
layer_revoke(void *v)
{
        struct vop_revoke_args /* {
                struct vnode *a_vp;
                int a_flags;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct vnode *lvp = LAYERVPTOLOWERVP(vp);
        int error;

        /*
         * We will most likely end up in vclean which uses the usecount
         * to determine if a vnode is active.  Take an extra reference on
         * the lower vnode so it will always close and inactivate.
         */
        vref(lvp);
        error = LAYERFS_DO_BYPASS(vp, ap);
        vrele(lvp);

        return error;
}

int
layer_reclaim(void *v)
{
        struct vop_reclaim_v2_args /* {
                struct vnode *a_vp;
                struct lwp *a_l;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct layer_mount *lmp = MOUNTTOLAYERMOUNT(vp->v_mount);
        struct layer_node *xp = VTOLAYER(vp);
        struct vnode *lowervp = xp->layer_lowervp;

        VOP_UNLOCK(vp);

        /*
         * Note: in vop_reclaim, the node's struct lock has been
         * decomissioned, so we have to be careful about calling
         * VOP's on ourself.  We must be careful as VXLOCK is set.
         */
        if (vp == lmp->layerm_rootvp) {
                /*
                 * Oops! We no longer have a root node. Most likely reason is
                 * that someone forcably unmunted the underlying fs.
                 *
                 * Now getting the root vnode will fail. We're dead. :-(
                 */
                lmp->layerm_rootvp = NULL;
        }

        mutex_enter(vp->v_interlock);
        KASSERT(vp->v_interlock == lowervp->v_interlock);
        lowervp->v_writecount -= vp->v_writecount;
        mutex_exit(vp->v_interlock);

        /* After this assignment, this node will not be re-used. */
        xp->layer_lowervp = NULL;
        kmem_free(vp->v_data, lmp->layerm_size);
        vp->v_data = NULL;
        vrele(lowervp);

        return 0;
}

/*
 * We just feed the returned vnode up to the caller - there's no need
 * to build a layer node on top of the node on which we're going to do
 * i/o. :-)
 */
int
layer_bmap(void *v)
{
        struct vop_bmap_args /* {
                struct vnode *a_vp;
                daddr_t  a_bn;
                struct vnode **a_vpp;
                daddr_t *a_bnp;
                int *a_runp;
        } */ *ap = v;
        struct vnode *vp;

        vp = LAYERVPTOLOWERVP(ap->a_vp);
        ap->a_vp = vp;

        return VCALL(vp, ap->a_desc->vdesc_offset, ap);
}

int
layer_print(void *v)
{
        struct vop_print_args /* {
                struct vnode *a_vp;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        printf ("\ttag VT_LAYERFS, vp=%p, lowervp=%p\n", vp, LAYERVPTOLOWERVP(vp));
        return 0;
}

int
layer_getpages(void *v)
{
        struct vop_getpages_args /* {
                struct vnode *a_vp;
                voff_t a_offset;
                struct vm_page **a_m;
                int *a_count;
                int a_centeridx;
                vm_prot_t a_access_type;
                int a_advice;
                int a_flags;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct mount *mp = vp->v_mount;
        int error;
        krw_t op;

        KASSERT(rw_lock_held(vp->v_uobj.vmobjlock));

        if (ap->a_flags & PGO_LOCKED) {
                return EBUSY;
        }
        ap->a_vp = LAYERVPTOLOWERVP(vp);
        KASSERT(vp->v_uobj.vmobjlock == ap->a_vp->v_uobj.vmobjlock);

        /* Just pass the request on to the underlying layer. */
        op = rw_lock_op(vp->v_uobj.vmobjlock);
        rw_exit(vp->v_uobj.vmobjlock);
        fstrans_start(mp);
        rw_enter(vp->v_uobj.vmobjlock, op);
        if (mp == vp->v_mount) {
                /* Will release the lock. */
                error = VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
        } else {
                rw_exit(vp->v_uobj.vmobjlock);
                error = ENOENT;
        }
        fstrans_done(mp);

        return error;
}

int
layer_putpages(void *v)
{
        struct vop_putpages_args /* {
                struct vnode *a_vp;
                voff_t a_offlo;
                voff_t a_offhi;
                int a_flags;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;

        KASSERT(rw_write_held(vp->v_uobj.vmobjlock));

        ap->a_vp = LAYERVPTOLOWERVP(vp);
        KASSERT(vp->v_uobj.vmobjlock == ap->a_vp->v_uobj.vmobjlock);

        if (ap->a_flags & PGO_RECLAIM) {
                rw_exit(vp->v_uobj.vmobjlock);
                return 0;
        }

        /* Just pass the request on to the underlying layer. */
        return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
}



































































































































































































































































































































   23 

   23 




   23 
   23 




   23 




   23 
   22 
   23 


   23 












   23 







   21 

   21 
   21 
   21 






































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
/*        $NetBSD: tcp_congctl.c,v 1.28 2021/07/31 20:29:37 andvar Exp $        */

/*-
 * Copyright (c) 1997, 1998, 1999, 2001, 2005, 2006 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
 * Facility, NASA Ames Research Center.
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 * This code is derived from software contributed to The NetBSD Foundation
 * by Rui Paulo.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 *      @(#)COPYRIGHT   1.1 (NRL) 17 January 1995
 *
 * NRL grants permission for redistribution and use in source and binary
 * forms, with or without modification, of the software and documentation
 * created at NRL provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgements:
 *      This product includes software developed by the University of
 *      California, Berkeley and its contributors.
 *      This product includes software developed at the Information
 *      Technology Division, US Naval Research Laboratory.
 * 4. Neither the name of the NRL nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * The views and conclusions contained in the software and documentation
 * are those of the authors and should not be interpreted as representing
 * official policies, either expressed or implied, of the US Naval
 * Research Laboratory (NRL).
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)tcp_input.c        8.12 (Berkeley) 5/24/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tcp_congctl.c,v 1.28 2021/07/31 20:29:37 andvar Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_tcp_debug.h"
#include "opt_tcp_congctl.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/errno.h>
#include <sys/syslog.h>
#include <sys/pool.h>
#include <sys/domain.h>
#include <sys/kernel.h>
#include <sys/mutex.h>

#include <net/if.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>

#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_var.h>
#include <netinet/icmp6.h>
#endif

#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_congctl.h>
#ifdef TCP_DEBUG
#include <netinet/tcp_debug.h>
#endif

/*
 * TODO:
 *   consider separating the actual implementations in another file.
 */

static void tcp_common_congestion_exp(struct tcpcb *, int, int);

static int  tcp_reno_do_fast_retransmit(struct tcpcb *, const struct tcphdr *);
static int  tcp_reno_fast_retransmit(struct tcpcb *, const struct tcphdr *);
static void tcp_reno_slow_retransmit(struct tcpcb *);
static void tcp_reno_fast_retransmit_newack(struct tcpcb *,
    const struct tcphdr *);
static void tcp_reno_newack(struct tcpcb *, const struct tcphdr *);
static void tcp_reno_congestion_exp(struct tcpcb *tp);

static int  tcp_newreno_fast_retransmit(struct tcpcb *, const struct tcphdr *);
static void tcp_newreno_fast_retransmit_newack(struct tcpcb *,
        const struct tcphdr *);
static void tcp_newreno_newack(struct tcpcb *, const struct tcphdr *);

static int tcp_cubic_fast_retransmit(struct tcpcb *, const struct tcphdr *);
static void tcp_cubic_slow_retransmit(struct tcpcb *tp);
static void tcp_cubic_newack(struct tcpcb *, const struct tcphdr *);
static void tcp_cubic_congestion_exp(struct tcpcb *);

static void tcp_congctl_fillnames(void);

extern int tcprexmtthresh;

MALLOC_DEFINE(M_TCPCONGCTL, "tcpcongctl", "TCP congestion control structures");

/* currently selected global congestion control */
char tcp_congctl_global_name[TCPCC_MAXLEN];

/* available global congestion control algorithms */
char tcp_congctl_avail[10 * TCPCC_MAXLEN];

/*
 * Used to list the available congestion control algorithms.
 */
TAILQ_HEAD(, tcp_congctlent) tcp_congctlhd =
    TAILQ_HEAD_INITIALIZER(tcp_congctlhd);

static struct tcp_congctlent * tcp_congctl_global;

static kmutex_t tcp_congctl_mtx;

void
tcp_congctl_init(void)
{
        int r __diagused;
        
        mutex_init(&tcp_congctl_mtx, MUTEX_DEFAULT, IPL_NONE);

        /* Base algorithms. */
        r = tcp_congctl_register("reno", &tcp_reno_ctl);
        KASSERT(r == 0);
        r = tcp_congctl_register("newreno", &tcp_newreno_ctl);
        KASSERT(r == 0);
        r = tcp_congctl_register("cubic", &tcp_cubic_ctl);
        KASSERT(r == 0);

        /* NewReno is the default. */
#ifndef TCP_CONGCTL_DEFAULT
#define TCP_CONGCTL_DEFAULT "newreno"
#endif

        r = tcp_congctl_select(NULL, TCP_CONGCTL_DEFAULT);
        KASSERT(r == 0);
}

/*
 * Register a congestion algorithm and select it if we have none.
 */
int
tcp_congctl_register(const char *name, const struct tcp_congctl *tcc)
{
        struct tcp_congctlent *ntcc, *tccp;

        TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) 
                if (!strcmp(name, tccp->congctl_name)) {
                        /* name already registered */
                        return EEXIST;
                }

        ntcc = malloc(sizeof(*ntcc), M_TCPCONGCTL, M_WAITOK|M_ZERO);

        strlcpy(ntcc->congctl_name, name, sizeof(ntcc->congctl_name) - 1);
        ntcc->congctl_ctl = tcc;

        TAILQ_INSERT_TAIL(&tcp_congctlhd, ntcc, congctl_ent);
        tcp_congctl_fillnames();

        if (TAILQ_FIRST(&tcp_congctlhd) == ntcc)
                tcp_congctl_select(NULL, name);
                
        return 0;
}

int
tcp_congctl_unregister(const char *name)
{
        struct tcp_congctlent *tccp, *rtccp;
        unsigned int size;
        
        rtccp = NULL;
        size = 0;
        TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) {
                if (!strcmp(name, tccp->congctl_name))
                        rtccp = tccp;
                size++;
        }
        
        if (!rtccp)
                return ENOENT;

        if (size <= 1 || tcp_congctl_global == rtccp || rtccp->congctl_refcnt)
                return EBUSY;

        TAILQ_REMOVE(&tcp_congctlhd, rtccp, congctl_ent);
        free(rtccp, M_TCPCONGCTL);
        tcp_congctl_fillnames();

        return 0;
}

/*
 * Select a congestion algorithm by name.
 */
int
tcp_congctl_select(struct tcpcb *tp, const char *name)
{
        struct tcp_congctlent *tccp, *old_tccp, *new_tccp;
        bool old_found, new_found;

        KASSERT(name);

        old_found = (tp == NULL || tp->t_congctl == NULL);
        old_tccp = NULL;
        new_found = false;
        new_tccp = NULL;

        TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) {
                if (!old_found && tccp->congctl_ctl == tp->t_congctl) {
                        old_tccp = tccp;
                        old_found = true;
                }

                if (!new_found && !strcmp(name, tccp->congctl_name)) {
                        new_tccp = tccp;
                        new_found = true;
                }

                if (new_found && old_found) {
                        if (tp) {
                                mutex_enter(&tcp_congctl_mtx);
                                if (old_tccp)
                                        old_tccp->congctl_refcnt--;
                                tp->t_congctl = new_tccp->congctl_ctl;
                                new_tccp->congctl_refcnt++;
                                mutex_exit(&tcp_congctl_mtx);
                        } else {
                                tcp_congctl_global = new_tccp;
                                strlcpy(tcp_congctl_global_name,
                                    new_tccp->congctl_name,
                                    sizeof(tcp_congctl_global_name) - 1);
                        }
                        return 0;
                }
        }

        return EINVAL;
}

void
tcp_congctl_release(struct tcpcb *tp)
{
        struct tcp_congctlent *tccp;

        KASSERT(tp->t_congctl);
        
        TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) {
                if (tccp->congctl_ctl == tp->t_congctl) {
                        tccp->congctl_refcnt--;
                        return;
                }
        }
}

/*
 * Returns the name of a congestion algorithm.
 */
const char *
tcp_congctl_bystruct(const struct tcp_congctl *tcc)
{
        struct tcp_congctlent *tccp;
        
        KASSERT(tcc);
        
        TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent)
                if (tccp->congctl_ctl == tcc)
                        return tccp->congctl_name;

        return NULL;
}

static void
tcp_congctl_fillnames(void)
{
        struct tcp_congctlent *tccp;
        const char *delim = " ";
        
        tcp_congctl_avail[0] = '\0';
        TAILQ_FOREACH(tccp, &tcp_congctlhd, congctl_ent) {
                strlcat(tcp_congctl_avail, tccp->congctl_name,
                    sizeof(tcp_congctl_avail) - 1);
                if (TAILQ_NEXT(tccp, congctl_ent))
                        strlcat(tcp_congctl_avail, delim, 
                            sizeof(tcp_congctl_avail) - 1);
        }        
        
}

/* ------------------------------------------------------------------------ */

/*
 * Common stuff
 */

/* Window reduction (1-beta) for [New]Reno: 0.5 */
#define RENO_BETAA 1
#define RENO_BETAB 2
/* Window reduction (1-beta) for Cubic: 0.8 */
#define CUBIC_BETAA 4
#define CUBIC_BETAB 5
/* Draft Rhee Section 4.1 */
#define CUBIC_CA 4
#define CUBIC_CB 10

static void
tcp_common_congestion_exp(struct tcpcb *tp, int betaa, int betab)
{
        u_long win;

        /* 
         * Reduce the congestion window and the slow start threshold.
         */
        win = ulmin(tp->snd_wnd, tp->snd_cwnd) * betaa / betab / tp->t_segsz;
        if (win < 2)
                win = 2;

        tp->snd_ssthresh = win * tp->t_segsz;
        tp->snd_recover = tp->snd_max;
        tp->snd_cwnd = tp->snd_ssthresh;

        /*
         * When using TCP ECN, notify the peer that
         * we reduced the cwnd.
         */
        if (TCP_ECN_ALLOWED(tp))
                tp->t_flags |= TF_ECN_SND_CWR;
}


/* ------------------------------------------------------------------------ */

/*
 * TCP/Reno congestion control.
 */
static void
tcp_reno_congestion_exp(struct tcpcb *tp)
{

        tcp_common_congestion_exp(tp, RENO_BETAA, RENO_BETAB);
}

static int
tcp_reno_do_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th)
{
        /*
         * Dup acks mean that packets have left the
         * network (they're now cached at the receiver)
         * so bump cwnd by the amount in the receiver
         * to keep a constant cwnd packets in the
         * network.
         *
         * If we are using TCP/SACK, then enter
         * Fast Recovery if the receiver SACKs
         * data that is tcprexmtthresh * MSS
         * bytes past the last ACKed segment,
         * irrespective of the number of DupAcks.
         */
        
        tcp_seq onxt = tp->snd_nxt;

        tp->t_partialacks = 0;
        TCP_TIMER_DISARM(tp, TCPT_REXMT);
        tp->t_rtttime = 0;
        if (TCP_SACK_ENABLED(tp)) {
                tp->t_dupacks = tcprexmtthresh;
                tp->sack_newdata = tp->snd_nxt;
                tp->snd_cwnd = tp->t_segsz;
                (void) tcp_output(tp);
                return 0;
        }
        tp->snd_nxt = th->th_ack;
        tp->snd_cwnd = tp->t_segsz;
        (void) tcp_output(tp);
        tp->snd_cwnd = tp->snd_ssthresh + tp->t_segsz * tp->t_dupacks;
        if (SEQ_GT(onxt, tp->snd_nxt))
                tp->snd_nxt = onxt;

        return 0;
}

static int
tcp_reno_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th)
{

        /*
         * We know we're losing at the current
         * window size so do congestion avoidance
         * (set ssthresh to half the current window
         * and pull our congestion window back to
         * the new ssthresh).
         */

        tcp_reno_congestion_exp(tp);
        return tcp_reno_do_fast_retransmit(tp, th);
}

static void
tcp_reno_slow_retransmit(struct tcpcb *tp)
{
        u_long win;

        /*
         * Close the congestion window down to one segment
         * (we'll open it by one segment for each ack we get).
         * Since we probably have a window's worth of unacked
         * data accumulated, this "slow start" keeps us from
         * dumping all that data as back-to-back packets (which
         * might overwhelm an intermediate gateway).
         *
         * There are two phases to the opening: Initially we
         * open by one mss on each ack.  This makes the window
         * size increase exponentially with time.  If the
         * window is larger than the path can handle, this
         * exponential growth results in dropped packet(s)
         * almost immediately.  To get more time between
         * drops but still "push" the network to take advantage
         * of improving conditions, we switch from exponential
         * to linear window opening at some threshold size.
         * For a threshold, we use half the current window
         * size, truncated to a multiple of the mss.
         *
         * (the minimum cwnd that will give us exponential
         * growth is 2 mss.  We don't allow the threshold
         * to go below this.)
         */

        win = ulmin(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_segsz;
        if (win < 2)
                win = 2;
        /* Loss Window MUST be one segment. */
        tp->snd_cwnd = tp->t_segsz;
        tp->snd_ssthresh = win * tp->t_segsz;
        tp->t_partialacks = -1;
        tp->t_dupacks = 0;
        tp->t_bytes_acked = 0;

        if (TCP_ECN_ALLOWED(tp))
                tp->t_flags |= TF_ECN_SND_CWR;
}

static void
tcp_reno_fast_retransmit_newack(struct tcpcb *tp,
    const struct tcphdr *th)
{
        if (tp->t_partialacks < 0) {
                /*
                 * We were not in fast recovery.  Reset the duplicate ack
                 * counter.
                 */
                tp->t_dupacks = 0;
        } else {
                /*
                 * Clamp the congestion window to the crossover point and
                 * exit fast recovery.
                 */
                if (tp->snd_cwnd > tp->snd_ssthresh)
                        tp->snd_cwnd = tp->snd_ssthresh;
                tp->t_partialacks = -1;
                tp->t_dupacks = 0;
                tp->t_bytes_acked = 0;
                if (TCP_SACK_ENABLED(tp) && SEQ_GT(th->th_ack, tp->snd_fack))
                        tp->snd_fack = th->th_ack;
        }
}

static void
tcp_reno_newack(struct tcpcb *tp, const struct tcphdr *th)
{
        /*
         * When new data is acked, open the congestion window.
         */

        u_int cw = tp->snd_cwnd;
        u_int incr = tp->t_segsz;

        if (tcp_do_abc) {

                /*
                 * RFC 3465 Appropriate Byte Counting (ABC)
                 */

                int acked = th->th_ack - tp->snd_una;

                if (cw >= tp->snd_ssthresh) {
                        tp->t_bytes_acked += acked;
                        if (tp->t_bytes_acked >= cw) {
                                /* Time to increase the window. */
                                tp->t_bytes_acked -= cw;
                        } else {
                                /* No need to increase yet. */
                                incr = 0;
                        }
                } else {
                        /*
                         * use 2*SMSS or 1*SMSS for the "L" param,
                         * depending on sysctl setting.
                         *
                         * (See RFC 3465 2.3 Choosing the Limit)
                         */
                        u_int abc_lim;

                        abc_lim = (tcp_abc_aggressive == 0 ||
                            tp->snd_nxt != tp->snd_max) ? incr : incr * 2;
                        incr = uimin(acked, abc_lim);
                }
        } else {

                /*
                 * If the window gives us less than ssthresh packets
                 * in flight, open exponentially (segsz per packet).
                 * Otherwise open linearly: segsz per window
                 * (segsz^2 / cwnd per packet).
                 */

                if (cw >= tp->snd_ssthresh) {
                        incr = incr * incr / cw;
                }
        }

        tp->snd_cwnd = uimin(cw + incr, TCP_MAXWIN << tp->snd_scale);
}

const struct tcp_congctl tcp_reno_ctl = {
        .fast_retransmit = tcp_reno_fast_retransmit,
        .slow_retransmit = tcp_reno_slow_retransmit,
        .fast_retransmit_newack = tcp_reno_fast_retransmit_newack,
        .newack = tcp_reno_newack,
        .cong_exp = tcp_reno_congestion_exp,
};

/*
 * TCP/NewReno Congestion control.
 */
static int
tcp_newreno_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th)
{

        if (SEQ_LT(th->th_ack, tp->snd_high)) {
                /*
                 * False fast retransmit after timeout.
                 * Do not enter fast recovery
                 */
                tp->t_dupacks = 0;
                return 1;
        }
        /*
         * Fast retransmit is same as reno.
         */
        return tcp_reno_fast_retransmit(tp, th);
}

/*
 * Implement the NewReno response to a new ack, checking for partial acks in
 * fast recovery.
 */
static void
tcp_newreno_fast_retransmit_newack(struct tcpcb *tp, const struct tcphdr *th)
{
        if (tp->t_partialacks < 0) {
                /*
                 * We were not in fast recovery.  Reset the duplicate ack
                 * counter.
                 */
                tp->t_dupacks = 0;
        } else if (SEQ_LT(th->th_ack, tp->snd_recover)) {
                /*
                 * This is a partial ack.  Retransmit the first unacknowledged
                 * segment and deflate the congestion window by the amount of
                 * acknowledged data.  Do not exit fast recovery.
                 */
                tcp_seq onxt = tp->snd_nxt;
                u_long ocwnd = tp->snd_cwnd;
                int sack_num_segs = 1, sack_bytes_rxmt = 0;

                /*
                 * snd_una has not yet been updated and the socket's send
                 * buffer has not yet drained off the ACK'd data, so we
                 * have to leave snd_una as it was to get the correct data
                 * offset in tcp_output().
                 */
                tp->t_partialacks++;
                TCP_TIMER_DISARM(tp, TCPT_REXMT);
                tp->t_rtttime = 0;

                if (TCP_SACK_ENABLED(tp)) {
                        /*
                         * Partial ack handling within a sack recovery episode.
                         * Keeping this very simple for now. When a partial ack
                         * is received, force snd_cwnd to a value that will
                         * allow the sender to transmit no more than 2 segments.
                         * If necessary, a fancier scheme can be adopted at a
                         * later point, but for now, the goal is to prevent the
                         * sender from bursting a large amount of data in the
                         * midst of sack recovery.
                          */

                        /*
                         * send one or 2 segments based on how much
                         * new data was acked
                         */
                        if (((th->th_ack - tp->snd_una) / tp->t_segsz) > 2)
                                sack_num_segs = 2;
                        (void)tcp_sack_output(tp, &sack_bytes_rxmt);
                        tp->snd_cwnd = sack_bytes_rxmt +
                            (tp->snd_nxt - tp->sack_newdata) +
                            sack_num_segs * tp->t_segsz;
                        tp->t_flags |= TF_ACKNOW;
                        (void) tcp_output(tp);
                } else {
                        tp->snd_nxt = th->th_ack;
                        /*
                         * Set snd_cwnd to one segment beyond ACK'd offset
                         * snd_una is not yet updated when we're called
                         */
                        tp->snd_cwnd = tp->t_segsz + (th->th_ack - tp->snd_una);
                        (void) tcp_output(tp);
                        tp->snd_cwnd = ocwnd;
                        if (SEQ_GT(onxt, tp->snd_nxt))
                                tp->snd_nxt = onxt;
                        /*
                         * Partial window deflation.  Relies on fact that
                         * tp->snd_una not updated yet.
                          */
                        tp->snd_cwnd -= (th->th_ack - tp->snd_una -
                            tp->t_segsz);
                }
        } else {
                /*
                 * Complete ack.  Inflate the congestion window to ssthresh
                 * and exit fast recovery.
                 *
                 * Window inflation should have left us with approx.
                 * snd_ssthresh outstanding data.  But in case we
                 * would be inclined to send a burst, better to do
                 * it via the slow start mechanism.
                 */
                if (SEQ_SUB(tp->snd_max, th->th_ack) < tp->snd_ssthresh)
                        tp->snd_cwnd = SEQ_SUB(tp->snd_max, th->th_ack)
                            + tp->t_segsz;
                else
                        tp->snd_cwnd = tp->snd_ssthresh;
                tp->t_partialacks = -1;
                tp->t_dupacks = 0;
                tp->t_bytes_acked = 0;
                if (TCP_SACK_ENABLED(tp) && SEQ_GT(th->th_ack, tp->snd_fack))
                        tp->snd_fack = th->th_ack;
        }
}

static void
tcp_newreno_newack(struct tcpcb *tp, const struct tcphdr *th)
{
        /*
         * If we are still in fast recovery (meaning we are using
         * NewReno and we have only received partial acks), do not
         * inflate the window yet.
         */
        if (tp->t_partialacks < 0)
                tcp_reno_newack(tp, th);
}


const struct tcp_congctl tcp_newreno_ctl = {
        .fast_retransmit = tcp_newreno_fast_retransmit,
        .slow_retransmit = tcp_reno_slow_retransmit,
        .fast_retransmit_newack = tcp_newreno_fast_retransmit_newack,
        .newack = tcp_newreno_newack,
        .cong_exp = tcp_reno_congestion_exp,
};

/*
 * CUBIC - http://tools.ietf.org/html/draft-rhee-tcpm-cubic-02
 */

/* Cubic prototypes */
static void        tcp_cubic_update_ctime(struct tcpcb *tp);
static uint32_t        tcp_cubic_diff_ctime(struct tcpcb *);
static uint32_t        tcp_cubic_cbrt(uint32_t);
static ulong        tcp_cubic_getW(struct tcpcb *, uint32_t, uint32_t);

/* Cubic TIME functions - XXX I don't like using timevals and microuptime */
/*
 * Set congestion timer to now
 */
static void
tcp_cubic_update_ctime(struct tcpcb *tp)
{
        struct timeval now_timeval;

        getmicrouptime(&now_timeval);
        tp->snd_cubic_ctime = now_timeval.tv_sec * 1000 +
            now_timeval.tv_usec / 1000;
}

/*
 * miliseconds from last congestion
 */
static uint32_t
tcp_cubic_diff_ctime(struct tcpcb *tp)
{
        struct timeval now_timeval;

        getmicrouptime(&now_timeval);
        return now_timeval.tv_sec * 1000 + now_timeval.tv_usec / 1000 -
            tp->snd_cubic_ctime;
}

/*
 * Approximate cubic root
 */
#define CBRT_ROUNDS 30
static uint32_t
tcp_cubic_cbrt(uint32_t v)
{
        int i, rounds = CBRT_ROUNDS;
        uint64_t x = v / 3;

        /* We fail to calculate correct for small numbers */
        if (v == 0)
                return 0;
        else if (v < 4)
                return 1;

        /*
         * largest x that 2*x^3+3*x fits 64bit
         * Avoid overflow for a time cost
         */
        if (x > 2097151)
                rounds += 10;

        for (i = 0; i < rounds; i++)
                if (rounds == CBRT_ROUNDS)
                        x = (v + 2 * x * x * x) / (3 * x * x);
                else
                        /* Avoid overflow */
                        x = v / (3 * x * x) + 2 * x / 3;

        return (uint32_t)x;
}

/* Draft Rhee Section 3.1 - get W(t+rtt) - Eq. 1 */
static ulong
tcp_cubic_getW(struct tcpcb *tp, uint32_t ms_elapsed, uint32_t rtt)
{
        uint32_t K;
        long tK3;

        /* Section 3.1 Eq. 2 */
        K = tcp_cubic_cbrt(tp->snd_cubic_wmax / CUBIC_BETAB *
            CUBIC_CB / CUBIC_CA);
        /*  (t-K)^3 - not clear why is the measure unit mattering */
        tK3 = (long)(ms_elapsed + rtt) - (long)K;
        tK3 = tK3 * tK3 * tK3;

        return CUBIC_CA * tK3 / CUBIC_CB + tp->snd_cubic_wmax;
}

static void
tcp_cubic_congestion_exp(struct tcpcb *tp)
{

        /*
         * Congestion - Set WMax and shrink cwnd
         */
        tcp_cubic_update_ctime(tp);

        /* Section 3.6 - Fast Convergence */
        if (tp->snd_cubic_wmax < tp->snd_cubic_wmax_last) {
                tp->snd_cubic_wmax_last = tp->snd_cubic_wmax;
                tp->snd_cubic_wmax = tp->snd_cubic_wmax / 2 +
                    tp->snd_cubic_wmax * CUBIC_BETAA / CUBIC_BETAB / 2;
        } else {
                tp->snd_cubic_wmax_last = tp->snd_cubic_wmax;
                tp->snd_cubic_wmax = tp->snd_cwnd;
        }

        tp->snd_cubic_wmax = uimax(tp->t_segsz, tp->snd_cubic_wmax);

        /* Shrink CWND */
        tcp_common_congestion_exp(tp, CUBIC_BETAA, CUBIC_BETAB);
}

static int
tcp_cubic_fast_retransmit(struct tcpcb *tp, const struct tcphdr *th)
{

        if (SEQ_LT(th->th_ack, tp->snd_high)) {
                /* See newreno */
                tp->t_dupacks = 0;
                return 1;
        }

        /*
         * mark WMax
         */
        tcp_cubic_congestion_exp(tp);

        /* Do fast retransmit */
        return tcp_reno_do_fast_retransmit(tp, th);
}

static void
tcp_cubic_newack(struct tcpcb *tp, const struct tcphdr *th)
{
        uint32_t ms_elapsed, rtt;
        u_long w_tcp;

        /* Congestion avoidance and not in fast recovery and usable rtt */
        if (tp->snd_cwnd > tp->snd_ssthresh && tp->t_partialacks < 0 &&
            /*
             * t_srtt is 1/32 units of slow ticks
             * converting it in ms would be equal to
             * (t_srtt >> 5) * 1000 / PR_SLOWHZ ~= (t_srtt << 5) / PR_SLOWHZ
             */
            (rtt = (tp->t_srtt << 5) / PR_SLOWHZ) > 0) {
                ms_elapsed = tcp_cubic_diff_ctime(tp);

                /* Compute W_tcp(t) */
                w_tcp = tp->snd_cubic_wmax * CUBIC_BETAA / CUBIC_BETAB +
                    ms_elapsed / rtt / 3;

                if (tp->snd_cwnd > w_tcp) {
                        /* Not in TCP friendly mode */
                        tp->snd_cwnd += (tcp_cubic_getW(tp, ms_elapsed, rtt) -
                            tp->snd_cwnd) / tp->snd_cwnd;
                } else {
                        /* friendly TCP mode */
                        tp->snd_cwnd = w_tcp;
                }

                /* Make sure we are within limits */
                tp->snd_cwnd = uimax(tp->snd_cwnd, tp->t_segsz);
                tp->snd_cwnd = uimin(tp->snd_cwnd, TCP_MAXWIN << tp->snd_scale);
        } else {
                /* Use New Reno */
                tcp_newreno_newack(tp, th);
        }
}

static void
tcp_cubic_slow_retransmit(struct tcpcb *tp)
{

        /* Timeout - Mark new congestion */
        tcp_cubic_congestion_exp(tp);

        /* Loss Window MUST be one segment. */
        tp->snd_cwnd = tp->t_segsz;
        tp->t_partialacks = -1;
        tp->t_dupacks = 0;
        tp->t_bytes_acked = 0;

        if (TCP_ECN_ALLOWED(tp))
                tp->t_flags |= TF_ECN_SND_CWR;
}

const struct tcp_congctl tcp_cubic_ctl = {
        .fast_retransmit = tcp_cubic_fast_retransmit,
        .slow_retransmit = tcp_cubic_slow_retransmit,
        .fast_retransmit_newack = tcp_newreno_fast_retransmit_newack,
        .newack = tcp_cubic_newack,
        .cong_exp = tcp_cubic_congestion_exp,
};












































































    2 







    2 

























































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
/*        $NetBSD: rf_psstatus.c,v 1.38 2021/07/23 00:54:45 oster Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Mark Holland
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*****************************************************************************
 *
 * psstatus.c
 *
 * The reconstruction code maintains a bunch of status related to the parity
 * stripes that are currently under reconstruction.  This header file defines
 * the status structures.
 *
 *****************************************************************************/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_psstatus.c,v 1.38 2021/07/23 00:54:45 oster Exp $");

#include <dev/raidframe/raidframevar.h>

#include "rf_raid.h"
#include "rf_general.h"
#include "rf_debugprint.h"
#include "rf_psstatus.h"
#include "rf_shutdown.h"

#if RF_DEBUG_PSS
#define Dprintf1(s,a)         if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
#define Dprintf2(s,a,b)       if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
#define Dprintf3(s,a,b,c)     if (rf_pssDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL)
#else
#define Dprintf1(s,a)
#define Dprintf2(s,a,b)
#define Dprintf3(s,a,b,c)
#endif

static void
RealPrintPSStatusTable(RF_Raid_t * raidPtr,
    RF_PSStatusHeader_t * pssTable);

#define RF_MAX_FREE_PSS  32
#define RF_MIN_FREE_PSS   8

static void rf_ShutdownPSStatus(void *);

static void
rf_ShutdownPSStatus(void *arg)
{
        RF_Raid_t *raidPtr;

        raidPtr = (RF_Raid_t *) arg;

        pool_destroy(&raidPtr->pools.pss);
}

int
rf_ConfigurePSStatus(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                     RF_Config_t *cfgPtr)
{

        rf_pool_init(raidPtr, raidPtr->poolNames.pss, &raidPtr->pools.pss, sizeof(RF_ReconParityStripeStatus_t),
                     "pss", RF_MIN_FREE_PSS, RF_MAX_FREE_PSS);
        rf_ShutdownCreate(listp, rf_ShutdownPSStatus, raidPtr);

        return (0);
}

void
rf_InitPSStatus(RF_Raid_t *raidPtr)
{
        raidPtr->pssTableSize = RF_PSS_DEFAULT_TABLESIZE;
}


/*****************************************************************************************
 * sets up the pss table
 * We pre-allocate a bunch of entries to avoid as much as possible having to
 * malloc up hash chain entries.
 ****************************************************************************************/
RF_PSStatusHeader_t *
rf_MakeParityStripeStatusTable(RF_Raid_t *raidPtr)
{
        RF_PSStatusHeader_t *pssTable;
        int     i;

        pssTable = RF_Malloc(raidPtr->pssTableSize * sizeof(*pssTable));
        for (i = 0; i < raidPtr->pssTableSize; i++) {
                rf_init_mutex2(pssTable[i].mutex, IPL_VM);
                rf_init_cond2(pssTable[i].cond, "rfpsslk");
        }
        return (pssTable);
}

void
rf_FreeParityStripeStatusTable(RF_Raid_t *raidPtr,
                               RF_PSStatusHeader_t *pssTable)
{
        int     i;

#if RF_DEBUG_PSS
        if (rf_pssDebug)
                RealPrintPSStatusTable(raidPtr, pssTable);

        for (i = 0; i < raidPtr->pssTableSize; i++) {
                if (pssTable[i].chain) {
                        printf("ERROR: pss hash chain not null at recon shutdown\n");
                }
        }
#endif
        for (i = 0; i < raidPtr->pssTableSize; i++) {
                rf_destroy_mutex2(pssTable[i].mutex);
                rf_destroy_cond2(pssTable[i].cond);
        }
        RF_Free(pssTable, raidPtr->pssTableSize * sizeof(RF_PSStatusHeader_t));
}


/* looks up the status structure for a parity stripe.
 * if the create_flag is on, uses (and returns) newpssPtr if
 * a parity status structure doesn't exist
 * otherwise returns NULL if the status structure does not exist
 *
 * ASSUMES THE PSS DESCRIPTOR IS LOCKED UPON ENTRY
 *
 * flags - whether or not to use newpssPtr if the needed PSS
 *         doesn't exist and what flags to set it to initially
 */
RF_ReconParityStripeStatus_t *
rf_LookupRUStatus(RF_Raid_t *raidPtr, RF_PSStatusHeader_t *pssTable,
                  RF_StripeNum_t psID, RF_ReconUnitNum_t which_ru,
                  RF_PSSFlags_t flags, RF_ReconParityStripeStatus_t *newpssPtr)
{
        RF_PSStatusHeader_t *hdr = &pssTable[RF_HASH_PSID(raidPtr, psID)];
        RF_ReconParityStripeStatus_t *p, *pssPtr = hdr->chain;

        for (p = pssPtr; p; p = p->next) {
                if (p->parityStripeID == psID && p->which_ru == which_ru)
                        break;
        }

        if (!p && (flags & RF_PSS_CREATE)) {
                p = newpssPtr;
                p->next = hdr->chain;
                hdr->chain = p;

                p->parityStripeID = psID;
                p->which_ru = which_ru;
                p->flags = flags;
                p->rbuf = NULL;
                p->writeRbuf = NULL;
                p->xorBufCount = 0;
                p->blockCount = 0;
                p->procWaitList = NULL;
                p->blockWaitList = NULL;
                p->bufWaitList = NULL;
        } else
                if (p) {        /* we didn't create, but we want to specify
                                 * some new status */
                        p->flags |= flags;        /* add in whatever flags we're
                                                 * specifying */
                }
        if (p && (flags & RF_PSS_RECON_BLOCKED)) {
                p->blockCount++;/* if we're asking to block recon, bump the
                                 * count */
                Dprintf3("raid%d: Blocked recon on psid %ld.  count now %d\n",
                         raidPtr->raidid, psID, p->blockCount);
        }
        return (p);
}
/* deletes an entry from the parity stripe status table.  typically used
 * when an entry has been allocated solely to block reconstruction, and
 * no recon was requested while recon was blocked.  Assumes the hash
 * chain is ALREADY LOCKED.
 */
void
rf_PSStatusDelete(RF_Raid_t *raidPtr, RF_PSStatusHeader_t *pssTable,
                  RF_ReconParityStripeStatus_t *pssPtr)
{
        RF_PSStatusHeader_t *hdr = &(pssTable[RF_HASH_PSID(raidPtr, pssPtr->parityStripeID)]);
        RF_ReconParityStripeStatus_t *p = hdr->chain, *pt = NULL;

        while (p) {
                if (p == pssPtr) {
                        if (pt)
                                pt->next = p->next;
                        else
                                hdr->chain = p->next;
                        p->next = NULL;
                        rf_FreePSStatus(raidPtr, p);
                        return;
                }
                pt = p;
                p = p->next;
        }
        RF_ASSERT(0);                /* we must find it here */
}
/* deletes an entry from the ps status table after reconstruction has completed */
void
rf_RemoveFromActiveReconTable(RF_Raid_t *raidPtr, RF_StripeNum_t psid,
                              RF_ReconUnitNum_t which_ru)
{
        RF_PSStatusHeader_t *hdr = &(raidPtr->reconControl->pssTable[RF_HASH_PSID(raidPtr, psid)]);
        RF_ReconParityStripeStatus_t *p, *pt;
        RF_CallbackFuncDesc_t *cb, *cb1;

        rf_lock_mutex2(hdr->mutex);
        while(hdr->lock) {
                rf_wait_cond2(hdr->cond, hdr->mutex);
        }
        hdr->lock = 1;
        rf_unlock_mutex2(hdr->mutex);
        for (pt = NULL, p = hdr->chain; p; pt = p, p = p->next) {
                if ((p->parityStripeID == psid) && (p->which_ru == which_ru))
                        break;
        }
        if (p == NULL) {
                rf_PrintPSStatusTable(raidPtr);
        }
        RF_ASSERT(p);                /* it must be there */

        Dprintf2("PSS: deleting pss for psid %ld ru %d\n", psid, which_ru);

        /* delete this entry from the hash chain */
        if (pt)
                pt->next = p->next;
        else
                hdr->chain = p->next;
        p->next = NULL;

        rf_lock_mutex2(hdr->mutex);
        hdr->lock = 0;
        rf_unlock_mutex2(hdr->mutex);

        /* wakup anyone waiting on the parity stripe ID */
        cb = p->procWaitList;
        p->procWaitList = NULL;
        while (cb) {
                Dprintf1("Waking up access waiting on parity stripe ID %ld\n", p->parityStripeID);
                cb1 = cb->next;
                (cb->callbackFunc) (cb->callbackArg);
                rf_FreeCallbackFuncDesc(raidPtr, cb);
                cb = cb1;
        }

        rf_FreePSStatus(raidPtr, p);
}

RF_ReconParityStripeStatus_t *
rf_AllocPSStatus(RF_Raid_t *raidPtr)
{
        return pool_get(&raidPtr->pools.pss, PR_WAITOK | PR_ZERO);
}

void
rf_FreePSStatus(RF_Raid_t *raidPtr, RF_ReconParityStripeStatus_t *p)
{
        RF_ASSERT(p->procWaitList == NULL);
        RF_ASSERT(p->blockWaitList == NULL);
        RF_ASSERT(p->bufWaitList == NULL);

        pool_put(&raidPtr->pools.pss, p);
}

static void
RealPrintPSStatusTable(RF_Raid_t *raidPtr, RF_PSStatusHeader_t *pssTable)
{
        int     i, j, procsWaiting, blocksWaiting, bufsWaiting;
        RF_ReconParityStripeStatus_t *p;
        RF_CallbackValueDesc_t *vb;
        RF_CallbackFuncDesc_t *fb;

        printf("\nParity Stripe Status Table\n");
        for (i = 0; i < raidPtr->pssTableSize; i++) {
                for (p = pssTable[i].chain; p; p = p->next) {
                        procsWaiting = blocksWaiting = bufsWaiting = 0;
                        for (fb = p->procWaitList; fb; fb = fb->next)
                                procsWaiting++;
                        for (vb = p->blockWaitList; vb; vb = vb->next)
                                blocksWaiting++;
                        for (vb = p->bufWaitList; vb; vb = vb->next)
                                bufsWaiting++;
                        printf("PSID %ld RU %d : blockCount %d %d/%d/%d proc/block/buf waiting, issued ",
                            (long) p->parityStripeID, p->which_ru, p->blockCount, procsWaiting, blocksWaiting, bufsWaiting);
                        for (j = 0; j < raidPtr->numCol; j++)
                                printf("%c", (p->issued[j]) ? '1' : '0');
                        if (!p->flags)
                                printf(" flags: (none)");
                        else {
                                if (p->flags & RF_PSS_UNDER_RECON)
                                        printf(" under-recon");
                                if (p->flags & RF_PSS_FORCED_ON_WRITE)
                                        printf(" forced-w");
                                if (p->flags & RF_PSS_FORCED_ON_READ)
                                        printf(" forced-r");
                                if (p->flags & RF_PSS_RECON_BLOCKED)
                                        printf(" blocked");
                                if (p->flags & RF_PSS_BUFFERWAIT)
                                        printf(" bufwait");
                        }
                        printf("\n");
                }
        }
}

void
rf_PrintPSStatusTable(RF_Raid_t *raidPtr)
{
        RF_PSStatusHeader_t *pssTable = raidPtr->reconControl->pssTable;
        RealPrintPSStatusTable(raidPtr, pssTable);
}


























































































    2 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/*        $NetBSD: rf_debugMem.h,v 1.14 2019/02/09 03:34:00 christos Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Daniel Stodolsky, Mark Holland
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*
 * rf_debugMem.h -- memory leak debugging module
 *
 * IMPORTANT:  if you put the lock/unlock mutex stuff back in here, you
 *             need to take it out of the routines in debugMem.c
 *
 */

#ifndef _RF__RF_DEBUGMEM_H_
#define _RF__RF_DEBUGMEM_H_

#include "rf_alloclist.h"

#include <sys/types.h>
#include <sys/malloc.h>

int     rf_ConfigureDebugMem(RF_ShutdownList_t **);

#ifndef RF_DEBUG_MEM
# define RF_DEBUG_MEM        0
# define RF_Malloc(s)        malloc(s, M_RAIDFRAME, M_WAITOK | M_ZERO)
# define RF_Free(p, s)        free(p, M_RAIDFRAME)
# define RF_MallocAndAdd(s, l) _RF_MallocAndAdd(s, l)
#else
extern long rf_memDebug;

void    rf_record_malloc(void *, size_t, const char *, uint32_t);
void    rf_unrecord_malloc(void *, size_t);
void    rf_print_unfreed(void);

# define RF_Malloc(s) _RF_Malloc(s, __FILE__, __LINE__)
# define RF_MallocAndAdd(s, l) \
    _RF_MallocAndAdd(s, l, __FILE__, __LINE__)

static __inline void *
_RF_Malloc(size_t size, const char *file, uint32_t line)
{
        void *p = malloc(size, M_RAIDFRAME, M_WAITOK | M_ZERO);
        if (rf_memDebug) rf_record_malloc(p, size, file, line);
        return p;
}

static __inline void
RF_Free(void *p, size_t size)
{
        free(p, M_RAIDFRAME);
        if (rf_memDebug) rf_unrecord_malloc(p, size);
  }
#endif

static __inline void *
_RF_MallocAndAdd(size_t size, RF_AllocListElem_t *l
#if RF_DEBUG_MEM
    , const char *file, uint32_t line
#endif
)
{
        void *p = malloc(size, M_RAIDFRAME, M_WAITOK | M_ZERO);
#if RF_DEBUG_MEM
        if (rf_memDebug) rf_record_malloc(p, size, file, line);
#endif
        if (l) rf_AddToAllocList(l, p, size);
        return p;
}


#endif                                /* !_RF__RF_DEBUGMEM_H_ */















































































    1 





    1 










































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
/*        $NetBSD: rtsock_50.c,v 1.16 2020/01/29 05:47:12 thorpej Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1988, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)rtsock.c        8.7 (Berkeley) 10/12/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rtsock_50.c,v 1.16 2020/01/29 05:47:12 thorpej Exp $");

#define        COMPAT_RTSOCK        /* Use the COMPATNAME/COMPATCALL macros and the
                         * various other compat definitions - see
                         * sys/net/rtsock_shared.c for details
                         */

#include <net/rtsock_shared.c>
#include <compat/net/route_50.h>

static struct sysctllog *clog;

void
compat_50_rt_oifmsg(struct ifnet *ifp)
{
        struct if_msghdr50 oifm;
        struct if_data ifi;
        struct mbuf *m;
        struct rt_addrinfo info;

        if (COMPATNAME(route_info).ri_cb.any_count == 0)
                return;
        (void)memset(&info, 0, sizeof(info));
        (void)memset(&oifm, 0, sizeof(oifm));
        if_export_if_data(ifp, &ifi, false);
        oifm.ifm_index = ifp->if_index;
        oifm.ifm_flags = ifp->if_flags;
        oifm.ifm_data.ifi_type = ifi.ifi_type;
        oifm.ifm_data.ifi_addrlen = ifi.ifi_addrlen;
        oifm.ifm_data.ifi_hdrlen = ifi.ifi_hdrlen;
        oifm.ifm_data.ifi_link_state = ifi.ifi_link_state;
        oifm.ifm_data.ifi_mtu = ifi.ifi_mtu;
        oifm.ifm_data.ifi_metric = ifi.ifi_metric;
        oifm.ifm_data.ifi_baudrate = ifi.ifi_baudrate;
        oifm.ifm_data.ifi_ipackets = ifi.ifi_ipackets;
        oifm.ifm_data.ifi_ierrors = ifi.ifi_ierrors;
        oifm.ifm_data.ifi_opackets = ifi.ifi_opackets;
        oifm.ifm_data.ifi_oerrors = ifi.ifi_oerrors;
        oifm.ifm_data.ifi_collisions = ifi.ifi_collisions;
        oifm.ifm_data.ifi_ibytes = ifi.ifi_ibytes;
        oifm.ifm_data.ifi_obytes = ifi.ifi_obytes;
        oifm.ifm_data.ifi_imcasts = ifi.ifi_imcasts;
        oifm.ifm_data.ifi_omcasts = ifi.ifi_omcasts;
        oifm.ifm_data.ifi_iqdrops = ifi.ifi_iqdrops;
        oifm.ifm_data.ifi_noproto = ifi.ifi_noproto;
        TIMESPEC_TO_TIMEVAL(&oifm.ifm_data.ifi_lastchange,
            &ifi.ifi_lastchange);
        oifm.ifm_addrs = 0;
        m = COMPATNAME(rt_msg1)(RTM_OIFINFO, &info, (void *)&oifm, sizeof(oifm));
        if (m == NULL)
                return;
        COMPATNAME(route_enqueue)(m, 0);
}

int
compat_50_iflist(struct ifnet *ifp, struct rt_walkarg *w,
    struct rt_addrinfo *info, size_t len)
{
        struct if_msghdr50 *ifm;
        struct if_data ifi;
        int error;

        ifm = (struct if_msghdr50 *)w->w_tmem;
        if_export_if_data(ifp, &ifi, false);
        ifm->ifm_index = ifp->if_index;
        ifm->ifm_flags = ifp->if_flags;
        ifm->ifm_data.ifi_type = ifi.ifi_type;
        ifm->ifm_data.ifi_addrlen = ifi.ifi_addrlen;
        ifm->ifm_data.ifi_hdrlen = ifi.ifi_hdrlen;
        ifm->ifm_data.ifi_link_state = ifi.ifi_link_state;
        ifm->ifm_data.ifi_mtu = ifi.ifi_mtu;
        ifm->ifm_data.ifi_metric = ifi.ifi_metric;
        ifm->ifm_data.ifi_baudrate = ifi.ifi_baudrate;
        ifm->ifm_data.ifi_ipackets = ifi.ifi_ipackets;
        ifm->ifm_data.ifi_ierrors = ifi.ifi_ierrors;
        ifm->ifm_data.ifi_opackets = ifi.ifi_opackets;
        ifm->ifm_data.ifi_oerrors = ifi.ifi_oerrors;
        ifm->ifm_data.ifi_collisions = ifi.ifi_collisions;
        ifm->ifm_data.ifi_ibytes = ifi.ifi_ibytes;
        ifm->ifm_data.ifi_obytes = ifi.ifi_obytes;
        ifm->ifm_data.ifi_imcasts = ifi.ifi_imcasts;
        ifm->ifm_data.ifi_omcasts = ifi.ifi_omcasts;
        ifm->ifm_data.ifi_iqdrops = ifi.ifi_iqdrops;
        ifm->ifm_data.ifi_noproto = ifi.ifi_noproto;
        TIMESPEC_TO_TIMEVAL(&ifm->ifm_data.ifi_lastchange,
            &ifi.ifi_lastchange);
        ifm->ifm_addrs = info->rti_addrs;
        error = copyout(ifm, w->w_where, len);
        if (error)
                return error;
        w->w_where = (char *)w->w_where + len;
        return 0;
}

void
rtsock_50_init(void)
{
 
        MODULE_HOOK_SET(rtsock_iflist_50_hook, compat_50_iflist);
        MODULE_HOOK_SET(rtsock_oifmsg_50_hook, compat_50_rt_oifmsg);
        MODULE_HOOK_SET(rtsock_rt_missmsg_50_hook, compat_50_rt_missmsg);
        MODULE_HOOK_SET(rtsock_rt_ifmsg_50_hook, compat_50_rt_ifmsg);
        MODULE_HOOK_SET(rtsock_rt_addrmsg_rt_50_hook, compat_50_rt_addrmsg_rt);
        MODULE_HOOK_SET(rtsock_rt_addrmsg_src_50_hook,
            compat_50_rt_addrmsg_src);
        MODULE_HOOK_SET(rtsock_rt_addrmsg_50_hook, compat_50_rt_addrmsg);
        MODULE_HOOK_SET(rtsock_rt_ifannouncemsg_50_hook,
            compat_50_rt_ifannouncemsg);
        MODULE_HOOK_SET(rtsock_rt_ieee80211msg_50_hook,
            compat_50_rt_ieee80211msg);
        sysctl_net_route_setup(&clog, PF_OROUTE, "ortable");
}
 
void
rtsock_50_fini(void)
{  

        sysctl_teardown(&clog);
        MODULE_HOOK_UNSET(rtsock_iflist_50_hook); 
        MODULE_HOOK_UNSET(rtsock_oifmsg_50_hook); 
        MODULE_HOOK_UNSET(rtsock_rt_missmsg_50_hook); 
        MODULE_HOOK_UNSET(rtsock_rt_ifmsg_50_hook); 
        MODULE_HOOK_UNSET(rtsock_rt_addrmsg_rt_50_hook); 
        MODULE_HOOK_UNSET(rtsock_rt_addrmsg_src_50_hook); 
        MODULE_HOOK_UNSET(rtsock_rt_addrmsg_50_hook); 
        MODULE_HOOK_UNSET(rtsock_rt_ifannouncemsg_50_hook); 
        MODULE_HOOK_UNSET(rtsock_rt_ieee80211msg_50_hook); 
}














































































































































































   31 









   30 
   31 
   31 
   31 
   31 
   31 


   31 
   31 
   31 
   30 

   30 















   31 
   12 
   12 

   31 



   31 



























   31 




   31 
   31 
   31 
   31 


   31 
   31 
   31 
   31 
   30 
   31 


   31 
   31 

   31 

   31 

   31 


   31 











   26 
   25 
   26 
   26 
   26 






   26 
    3 
    3 


    3 


    3 
    3 
    1 





    1 


    3 





   23 









   23 
    7 

    4 


    3 
    3 





    3 





   22 


   21 










   21 







   18 

   23 

















   18 
   18 
   18 
   18 
   18 
   18 
   18 
   18 
   18 
   18 
   18 
   18 

   18 






   18 







    6 

    4 





   18 
    4 








































































   31 
   31 
   31 

   31 
   31 


   31 
   31 
   31 
   31 

   31 











   26 
   26 
   26 
   26 
   26 
   26 
   26 















   20 



   20 
   20 
   20 

   20 

   20 
   20 

   20 




   20 




   19 
   19 



   19 


   19 





   19 
   19 

   11 
    8 


    8 


    8 










   19 


   19 




   19 
    5 


    5 





   19 
   19 
   19 

   19 
    8 








    1 















   11 




   11 
   11 
   11 

   11 
   11 


   11 
   11 
   11 

   11 
   11 
   11 
   11 

   11 






   11 
    5 




    6 



   11 


    9 




    9 
    2 
   28 



    7 



    2 
    8 




















































   10 



   11 
   11 
   11 

   11 
   11 

   11 
   11 
   11 
   11 
   11 
   11 
   11 




   11 




   11 







   11 

    2 
    2 



    9 


    9 





    9 






    9 

















    9 
    6 


    6 








    9 




    9 







    9 

    5 
    5 



    4 


    4 








    4 










    9 








    9 
    1 










    9 
    4 


    4 





    9 
    9 
    9 
    9 
    9 
    9 

    9 
    4 











    2 
    2 
    2 
   11 



















   25 
   25 
   25 
   25 
   25 
   25 
   25 
   24 
   25 
   25 
   25 
   25 

    9 
    6 
   25 


   25 
    8 
   25 


















    3 
    3 
    3 
    3 
    3 
    3 
    3 
    3 
    3 
    3 
    3 
    3 

    3 



    2 





    2 





















   22 


   22 


























   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   20 
   21 




   21 









   21 
    7 


    7 
    5 



































    3 



















    2 
    2 
    2 
    2 
    2 
    2 
    2 
    2 




    2 
























   23 
    3 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
/*        $NetBSD: genfs_rename.c,v 1.7 2021/10/20 13:29:06 thorpej Exp $        */

/*-
 * Copyright (c) 2012 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Generic rename abstraction.
 *
 * Rename is unbelievably hairy.  Try to use this if you can --
 * otherwise you are practically guaranteed to get it wrong.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: genfs_rename.c,v 1.7 2021/10/20 13:29:06 thorpej Exp $");

#include <sys/param.h>
#include <sys/kauth.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/stat.h>
#include <sys/vnode.h>
#include <sys/types.h>

#include <miscfs/genfs/genfs.h>

/*
 * Sample copypasta for implementing VOP_RENAME via genfs_rename.
 * Don't change this template without carefully considering whether
 * every other file system that already uses it needs to change too.
 * That way, once we have changed all the file systems to use it, we
 * can easily replace mumblefs_rename by mumblefs_sane_rename and
 * eliminate the insane API altogether.
 */

/* begin sample copypasta */
#if 0

static const struct genfs_rename_ops mumblefs_genfs_rename_ops;

/*
 * mumblefs_sane_rename: The hairiest vop, with the saner API.
 *
 * Arguments:
 *
 * . fdvp (from directory vnode),
 * . fcnp (from component name),
 * . tdvp (to directory vnode),
 * . tcnp (to component name),
 * . cred (credentials structure), and
 * . posixly_correct (flag for behaviour if target & source link same file).
 *
 * fdvp and tdvp may be the same, and must be referenced and unlocked.
 */
static int
mumblefs_sane_rename(
    struct vnode *fdvp, struct componentname *fcnp,
    struct vnode *tdvp, struct componentname *tcnp,
    kauth_cred_t cred, bool posixly_correct)
{
        struct mumblefs_lookup_results fulr, tulr;

        return genfs_sane_rename(&mumblefs_genfs_rename_ops,
            fdvp, fcnp, &fulr, tdvp, tcnp, &tulr,
            cred, posixly_correct);
}

/*
 * mumblefs_rename: The hairiest vop, with the insanest API.  Defer to
 * genfs_insane_rename immediately.
 */
int
mumblefs_rename(void *v)
{

        return genfs_insane_rename(v, &mumblefs_sane_rename);
}

#endif
/* end sample copypasta */

/*
 * Forward declarations
 */

static int genfs_rename_enter(const struct genfs_rename_ops *, struct mount *,
    kauth_cred_t,
    struct vnode *, struct componentname *, void *, struct vnode **,
    struct vnode *, struct componentname *, void *, struct vnode **);
static int genfs_rename_enter_common(const struct genfs_rename_ops *,
    struct mount *, kauth_cred_t, struct vnode *,
    struct componentname *, void *, struct vnode **,
    struct componentname *, void *, struct vnode **);
static int genfs_rename_enter_separate(const struct genfs_rename_ops *,
    struct mount *, kauth_cred_t,
    struct vnode *, struct componentname *, void *, struct vnode **,
    struct vnode *, struct componentname *, void *, struct vnode **);
static int genfs_rename_lock(const struct genfs_rename_ops *, struct mount *,
    kauth_cred_t, int, int, int,
    struct vnode *, struct componentname *, bool, void *, struct vnode **,
    struct vnode *, struct componentname *, bool, void *, struct vnode **);
static void genfs_rename_exit(const struct genfs_rename_ops *, struct mount *,
    struct vnode *, struct vnode *,
    struct vnode *, struct vnode *);
static int genfs_rename_remove(const struct genfs_rename_ops *, struct mount *,
    kauth_cred_t,
    struct vnode *, struct componentname *, void *, struct vnode *, nlink_t *);

/*
 * genfs_insane_rename: Generic implementation of the insane API for
 * the rename vop.
 *
 * Arguments:
 *
 * . fdvp (from directory vnode),
 * . fvp (from vnode),
 * . fcnp (from component name),
 * . tdvp (to directory vnode),
 * . tvp (to vnode, or NULL), and
 * . tcnp (to component name).
 *
 * Any pair of vnode parameters may have the same vnode.
 *
 * On entry,
 *
 * . fdvp, fvp, tdvp, and tvp are referenced,
 * . fdvp and fvp are unlocked, and
 * . tdvp and tvp (if nonnull) are locked.
 *
 * On exit,
 *
 * . fdvp, fvp, tdvp, and tvp (if nonnull) are unreferenced, and
 * . tdvp and tvp (if nonnull) are unlocked.
 */
int
genfs_insane_rename(void *v,
    int (*sane_rename)(struct vnode *fdvp, struct componentname *fcnp,
        struct vnode *tdvp, struct componentname *tcnp,
        kauth_cred_t cred, bool posixly_correct))
{
        struct vop_rename_args /* {
                struct vnode *a_fdvp;
                struct vnode *a_fvp;
                struct componentname *a_fcnp;
                struct vnode *a_tdvp;
                struct vnode *a_tvp;
                struct componentname *a_tcnp;
        } */ *ap = v;
        struct vnode *fdvp = ap->a_fdvp;
        struct vnode *fvp = ap->a_fvp;
        struct componentname *fcnp = ap->a_fcnp;
        struct vnode *tdvp = ap->a_tdvp;
        struct vnode *tvp = ap->a_tvp;
        struct componentname *tcnp = ap->a_tcnp;
        kauth_cred_t cred;
        int error;

        KASSERT(fdvp != NULL);
        KASSERT(fvp != NULL);
        KASSERT(fcnp != NULL);
        KASSERT(fcnp->cn_nameptr != NULL);
        KASSERT(tdvp != NULL);
        KASSERT(tcnp != NULL);
        KASSERT(fcnp->cn_nameptr != NULL);
        /* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
        /* KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */
        KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
        KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));
        KASSERT(fdvp->v_type == VDIR);
        KASSERT(tdvp->v_type == VDIR);

        cred = fcnp->cn_cred;

        /*
         * XXX Want a better equality test.  `tcnp->cn_cred == cred'
         * hoses p2k because puffs transmits the creds separately and
         * allocates distinct but equivalent structures for them.
         */
        KASSERT(kauth_cred_uidmatch(cred, tcnp->cn_cred));

        /*
         * Sanitize our world from the VFS insanity.  Unlock the target
         * directory and node, which are locked.  Release the children,
         * which are referenced, since we'll be looking them up again
         * later.
         */

        VOP_UNLOCK(tdvp);
        if ((tvp != NULL) && (tvp != tdvp))
                VOP_UNLOCK(tvp);

        vrele(fvp);
        if (tvp != NULL)
                vrele(tvp);

        error = (*sane_rename)(fdvp, fcnp, tdvp, tcnp, cred, false);

        /*
         * All done, whether with success or failure.  Release the
         * directory nodes now, as the caller expects from the VFS
         * protocol.
         */
        vrele(fdvp);
        vrele(tdvp);

        return error;
}

/*
 * genfs_sane_rename: Generic implementation of the saner API for the
 * rename vop.  Handles ancestry checks, locking, and permissions
 * checks.  Caller is responsible for implementing the genfs rename
 * operations.
 *
 * fdvp and tdvp must be referenced and unlocked.
 */
int
genfs_sane_rename(const struct genfs_rename_ops *ops,
    struct vnode *fdvp, struct componentname *fcnp, void *fde,
    struct vnode *tdvp, struct componentname *tcnp, void *tde,
    kauth_cred_t cred, bool posixly_correct)
{
        struct mount *mp;
        struct vnode *fvp = NULL, *tvp = NULL;
        nlink_t tvp_new_nlink = 0;
        int error;

        KASSERT(ops != NULL);
        KASSERT(fdvp != NULL);
        KASSERT(fcnp != NULL);
        KASSERT(tdvp != NULL);
        KASSERT(tcnp != NULL);
        /* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */
        /* KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */
        KASSERT(fdvp->v_type == VDIR);
        KASSERT(tdvp->v_type == VDIR);
        KASSERT(fdvp->v_mount == tdvp->v_mount);
        KASSERT(fcnp != tcnp);
        KASSERT(fcnp->cn_nameiop == DELETE);
        KASSERT(tcnp->cn_nameiop == RENAME);

        /* XXX Want a better equality test.  */
        KASSERT(kauth_cred_uidmatch(cred, fcnp->cn_cred));
        KASSERT(kauth_cred_uidmatch(cred, tcnp->cn_cred));

        mp = fdvp->v_mount;
        KASSERT(mp != NULL);
        KASSERT(mp == tdvp->v_mount);
        /* XXX How can we be sure this stays true?  */
        KASSERT((mp->mnt_flag & MNT_RDONLY) == 0);

        /* Reject rename("x/..", ...) and rename(..., "x/..") early.  */
        if ((fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT)
                return EINVAL;        /* XXX EISDIR?  */

        error = genfs_rename_enter(ops, mp, cred,
            fdvp, fcnp, fde, &fvp,
            tdvp, tcnp, tde, &tvp);
        if (error)
                return error;

        /*
         * Check that everything is locked and looks right.
         */
        KASSERT(fvp != NULL);
        KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
        KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));

        /*
         * If the source and destination are the same object, we need
         * only at most delete the source entry.  We are guaranteed at
         * this point that the entries are distinct.
         */
        if (fvp == tvp) {
                KASSERT(tvp != NULL);
                if (fvp->v_type == VDIR)
                        /* XXX This shouldn't be possible.  */
                        error = EINVAL;
                else if (posixly_correct)
                        /* POSIX sez to leave them alone.  */
                        error = 0;
                else if ((fdvp == tdvp) &&
                    (fcnp->cn_namelen == tcnp->cn_namelen) &&
                    (memcmp(fcnp->cn_nameptr, tcnp->cn_nameptr,
                        fcnp->cn_namelen) == 0))
                        /* Renaming an entry over itself does nothing.  */
                        error = 0;
                else {
                        /* XXX Can't use VOP_REMOVE because of locking.  */
                        error = genfs_rename_remove(ops, mp, cred,
                            fdvp, fcnp, fde, fvp, &tvp_new_nlink);
                        VN_KNOTE(fdvp, NOTE_WRITE);
                        VN_KNOTE(fvp,
                            tvp_new_nlink == 0 ? NOTE_DELETE : NOTE_LINK);
                }
                goto out;
        }
        KASSERT(fvp != tvp);
        KASSERT((fdvp != tdvp) ||
            (fcnp->cn_namelen != tcnp->cn_namelen) ||
            (memcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen)
                != 0));

        /*
         * If the target exists, refuse to rename a directory over a
         * non-directory or vice versa, or to clobber a non-empty
         * directory.
         */
        if (tvp != NULL) {
                if (fvp->v_type == VDIR && tvp->v_type == VDIR)
                        error =
                            (ops->gro_directory_empty_p(mp, cred, tvp, tdvp)?
                                0 : ENOTEMPTY);
                else if (fvp->v_type == VDIR && tvp->v_type != VDIR)
                        error = ENOTDIR;
                else if (fvp->v_type != VDIR && tvp->v_type == VDIR)
                        error = EISDIR;
                else
                        error = 0;
                if (error)
                        goto out;
                KASSERT((fvp->v_type == VDIR) == (tvp->v_type == VDIR));
        }

        /*
         * Authorize the rename.
         */
        error = ops->gro_rename_check_possible(mp, fdvp, fvp, tdvp, tvp);
        if (error)
                goto out;
        error = ops->gro_rename_check_permitted(mp, cred, fdvp, fvp, tdvp, tvp);
        error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, fvp, fdvp,
            error);
        error = kauth_authorize_vnode(cred, KAUTH_VNODE_RENAME, tvp, tdvp,
            error);
        if (error)
                goto out;

        /*
         * Everything is hunky-dory.  Shuffle the directory entries.
         */
        error = ops->gro_rename(mp, cred,
            fdvp, fcnp, fde, fvp,
            tdvp, tcnp, tde, tvp,
            &tvp_new_nlink);
        if (error)
                goto out;

        /* Success!  */
        genfs_rename_knote(fdvp, fvp, tdvp, tvp, tvp_new_nlink);

out:
        genfs_rename_exit(ops, mp, fdvp, fvp, tdvp, tvp);
        return error;
}

/*
 * genfs_rename_knote: Note events about the various vnodes in a
 * rename.  To be called by gro_rename on success.  The only pair of
 * vnodes that may be identical is {fdvp, tdvp}.  tvp_new_nlink is
 * the resulting link count of tvp.
 */
void
genfs_rename_knote(struct vnode *fdvp, struct vnode *fvp,
    struct vnode *tdvp, struct vnode *tvp, nlink_t tvp_new_nlink)
{
        long fdvp_events, tdvp_events;
        bool directory_p, reparent_p, replaced_p;

        KASSERT(fdvp != NULL);
        KASSERT(fvp != NULL);
        KASSERT(tdvp != NULL);
        KASSERT(fdvp != fvp);
        KASSERT(fdvp != tvp);
        KASSERT(tdvp != fvp);
        KASSERT(tdvp != tvp);
        KASSERT(fvp != tvp);
        KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
        KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));

        directory_p = (fvp->v_type == VDIR);
        reparent_p = (fdvp != tdvp);
        replaced_p = (tvp != NULL);

        KASSERT((tvp == NULL) || (directory_p == (tvp->v_type == VDIR)));

        fdvp_events = NOTE_WRITE;
        if (directory_p && reparent_p)
                fdvp_events |= NOTE_LINK;
        VN_KNOTE(fdvp, fdvp_events);

        VN_KNOTE(fvp, NOTE_RENAME);

        if (reparent_p) {
                tdvp_events = NOTE_WRITE;
                if (!replaced_p) {
                        tdvp_events |= NOTE_EXTEND;
                        if (directory_p)
                                tdvp_events |= NOTE_LINK;
                }
                VN_KNOTE(tdvp, tdvp_events);
        }

        if (replaced_p)
                VN_KNOTE(tvp, (tvp_new_nlink == 0 ? NOTE_DELETE : NOTE_LINK));
}

/*
 * genfs_rename_cache_purge: Purge the name cache.  To be called by
 * gro_rename on success.  The only pair of vnodes that may be
 * identical is {fdvp, tdvp}.
 */
void
genfs_rename_cache_purge(struct vnode *fdvp, struct vnode *fvp,
    struct vnode *tdvp, struct vnode *tvp)
{

        KASSERT(fdvp != NULL);
        KASSERT(fvp != NULL);
        KASSERT(tdvp != NULL);
        KASSERT(fdvp != fvp);
        KASSERT(fdvp != tvp);
        KASSERT(tdvp != fvp);
        KASSERT(tdvp != tvp);
        KASSERT(fvp != tvp);
        KASSERT(fdvp->v_type == VDIR);
        KASSERT(tdvp->v_type == VDIR);

        /*
         * XXX What actually needs to be purged?
         */

        cache_purge(fdvp);

        if (fvp->v_type == VDIR)
                cache_purge(fvp);

        if (tdvp != fdvp)
                cache_purge(tdvp);

        if ((tvp != NULL) && (tvp->v_type == VDIR))
                cache_purge(tvp);
}

/*
 * genfs_rename_enter: Look up fcnp in fdvp, and store the lookup
 * results in *fde_ret and the associated vnode in *fvp_ret; fail if
 * not found.  Look up tcnp in tdvp, and store the lookup results in
 * *tde_ret and the associated vnode in *tvp_ret; store null instead if
 * not found.  Fail if anything has been mounted on any of the nodes
 * involved.
 *
 * fdvp and tdvp must be referenced.
 *
 * On entry, nothing is locked.
 *
 * On success, everything is locked, and *fvp_ret, and *tvp_ret if
 * nonnull, are referenced.  The only pairs of vnodes that may be
 * identical are {fdvp, tdvp} and {fvp, tvp}.
 *
 * On failure, everything remains as was.
 *
 * Locking everything including the source and target nodes is
 * necessary to make sure that, e.g., link count updates are OK.  The
 * locking order is, in general, ancestor-first, matching the order you
 * need to use to look up a descendant anyway.
 */
static int
genfs_rename_enter(const struct genfs_rename_ops *ops,
    struct mount *mp, kauth_cred_t cred,
    struct vnode *fdvp, struct componentname *fcnp,
    void *fde_ret, struct vnode **fvp_ret,
    struct vnode *tdvp, struct componentname *tcnp,
    void *tde_ret, struct vnode **tvp_ret)
{
        int error;

        KASSERT(mp != NULL);
        KASSERT(fdvp != NULL);
        KASSERT(fcnp != NULL);
        KASSERT(fvp_ret != NULL);
        KASSERT(tdvp != NULL);
        KASSERT(tcnp != NULL);
        KASSERT(tvp_ret != NULL);
        KASSERT(fvp_ret != tvp_ret);
        KASSERT(fdvp->v_type == VDIR);
        KASSERT(tdvp->v_type == VDIR);
        KASSERT(fdvp->v_mount == mp);
        KASSERT(tdvp->v_mount == mp);

        if (fdvp == tdvp)
                error = genfs_rename_enter_common(ops, mp, cred, fdvp,
                    fcnp, fde_ret, fvp_ret,
                    tcnp, tde_ret, tvp_ret);
        else
                error = genfs_rename_enter_separate(ops, mp, cred,
                    fdvp, fcnp, fde_ret, fvp_ret,
                    tdvp, tcnp, tde_ret, tvp_ret);

        if (error)
                return error;

        KASSERT(*fvp_ret != NULL);
        KASSERT(VOP_ISLOCKED(*fvp_ret) == LK_EXCLUSIVE);
        KASSERT((*tvp_ret == NULL) || (VOP_ISLOCKED(*tvp_ret) == LK_EXCLUSIVE));
        KASSERT(*fvp_ret != fdvp);
        KASSERT(*fvp_ret != tdvp);
        KASSERT(*tvp_ret != fdvp);
        KASSERT(*tvp_ret != tdvp);
        return 0;
}

/*
 * genfs_rename_enter_common: Lock and look up with a common
 * source/target directory.
 */
static int
genfs_rename_enter_common(const struct genfs_rename_ops *ops,
    struct mount *mp, kauth_cred_t cred, struct vnode *dvp,
    struct componentname *fcnp,
    void *fde_ret, struct vnode **fvp_ret,
    struct componentname *tcnp,
    void *tde_ret, struct vnode **tvp_ret)
{
        struct vnode *fvp, *tvp;
        int error;

        KASSERT(ops != NULL);
        KASSERT(mp != NULL);
        KASSERT(dvp != NULL);
        KASSERT(fcnp != NULL);
        KASSERT(fvp_ret != NULL);
        KASSERT(tcnp != NULL);
        KASSERT(tvp_ret != NULL);
        KASSERT(dvp->v_type == VDIR);
        KASSERT(dvp->v_mount == mp);

        error = ops->gro_lock_directory(mp, dvp);
        if (error)
                goto fail0;

        /* Did we lose a race with mount?  */
        if (dvp->v_mountedhere != NULL) {
                error = EBUSY;
                goto fail1;
        }

        KASSERT(fcnp->cn_nameiop == DELETE);
        error = ops->gro_lookup(mp, dvp, fcnp, fde_ret, &fvp);
        if (error)
                goto fail1;

        KASSERT(fvp != NULL);

        /* Refuse to rename `.'.  */
        if (fvp == dvp) {
                error = EINVAL;
                goto fail2;
        }
        KASSERT(fvp != dvp);

        KASSERT(tcnp->cn_nameiop == RENAME);
        error = ops->gro_lookup(mp, dvp, tcnp, tde_ret, &tvp);
        if (error == ENOENT) {
                tvp = NULL;
        } else if (error) {
                goto fail2;
        } else {
                KASSERT(tvp != NULL);

                /* Refuse to rename over `.'.  */
                if (tvp == dvp) {
                        error = EISDIR; /* XXX EINVAL?  */
                        goto fail2;
                }
        }
        KASSERT(tvp != dvp);

        /*
         * We've looked up both nodes.  Now lock them and check them.
         */

        vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
        KASSERT(fvp->v_mount == mp);
        /* Refuse to rename a mount point.  */
        if ((fvp->v_type == VDIR) && (fvp->v_mountedhere != NULL)) {
                error = EBUSY;
                goto fail3;
        }

        if ((tvp != NULL) && (tvp != fvp)) {
                vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
                KASSERT(tvp->v_mount == mp);
                /* Refuse to rename over a mount point.  */
                if ((tvp->v_type == VDIR) && (tvp->v_mountedhere != NULL)) {
                        error = EBUSY;
                        goto fail4;
                }
        }

        KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
        KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));

        *fvp_ret = fvp;
        *tvp_ret = tvp;
        return 0;

fail4:        if ((tvp != NULL) && (tvp != fvp))
                VOP_UNLOCK(tvp);
fail3:        VOP_UNLOCK(fvp);
        if (tvp != NULL)
                vrele(tvp);
fail2:        vrele(fvp);
fail1:        VOP_UNLOCK(dvp);
fail0:        return error;
}

/*
 * genfs_rename_enter_separate: Lock and look up with separate source
 * and target directories.
 */
static int
genfs_rename_enter_separate(const struct genfs_rename_ops *ops,
    struct mount *mp, kauth_cred_t cred,
    struct vnode *fdvp, struct componentname *fcnp,
    void *fde_ret, struct vnode **fvp_ret,
    struct vnode *tdvp, struct componentname *tcnp,
    void *tde_ret, struct vnode **tvp_ret)
{
        struct vnode *intermediate_node;
        struct vnode *fvp, *tvp;
        int error;

        KASSERT(ops != NULL);
        KASSERT(mp != NULL);
        KASSERT(fdvp != NULL);
        KASSERT(fcnp != NULL);
        KASSERT(fvp_ret != NULL);
        KASSERT(tdvp != NULL);
        KASSERT(tcnp != NULL);
        KASSERT(tvp_ret != NULL);
        KASSERT(fdvp != tdvp);
        KASSERT(fcnp != tcnp);
        KASSERT(fcnp->cn_nameiop == DELETE);
        KASSERT(tcnp->cn_nameiop == RENAME);
        KASSERT(fvp_ret != tvp_ret);
        KASSERT(fdvp->v_type == VDIR);
        KASSERT(tdvp->v_type == VDIR);
        KASSERT(fdvp->v_mount == mp);
        KASSERT(tdvp->v_mount == mp);

        error = ops->gro_genealogy(mp, cred, fdvp, tdvp, &intermediate_node);
        if (error)
                return error;

        /*
         * intermediate_node == NULL means fdvp is not an ancestor of tdvp.
         */
        if (intermediate_node == NULL)
                error = genfs_rename_lock(ops, mp, cred,
                    ENOTEMPTY, EISDIR, EINVAL,
                    tdvp, tcnp, true, tde_ret, &tvp,
                    fdvp, fcnp, false, fde_ret, &fvp);
        else
                error = genfs_rename_lock(ops, mp, cred,
                    EINVAL, EISDIR, EINVAL,
                    fdvp, fcnp, false, fde_ret, &fvp,
                    tdvp, tcnp, true, tde_ret, &tvp);
        if (error)
                goto out;

        KASSERT(fvp != NULL);

        /*
         * Reject rename("foo/bar", "foo/bar/baz/quux/zot").
         */
        if (fvp == intermediate_node) {
                genfs_rename_exit(ops, mp, fdvp, fvp, tdvp, tvp);
                error = EINVAL;
                goto out;
        }

        *fvp_ret = fvp;
        *tvp_ret = tvp;
        error = 0;

out:        if (intermediate_node != NULL)
                vrele(intermediate_node);
        return error;
}

/*
 * genfs_rename_lock: Lookup and lock it all.  The lock order is:
 *
 *        a_dvp -> a_vp -> b_dvp -> b_vp,
 *
 * except if a_vp is a nondirectory in which case the lock order is:
 *
 *        a_dvp -> b_dvp -> b_vp -> a_vp,
 *
 * which can't violate ancestor->descendant because a_vp has no
 * descendants in this case.  This edge case is necessary because some
 * file systems can only lookup/lock/unlock, and we can't hold a_vp
 * locked when we lookup/lock/unlock b_vp if they turn out to be the
 * same, and we can't find out that they're the same until after the
 * lookup.
 *
 * b_dvp must not be an ancestor of a_dvp, although a_dvp may be an
 * ancestor of b_dvp.
 *
 * Fail with overlap_error if node a is directory b.  Neither
 * componentname may be `.' or `..'.
 *
 * a_dvp and b_dvp must be referenced.
 *
 * On entry, a_dvp and b_dvp are unlocked.
 *
 * On success,
 * . a_dvp and b_dvp are locked,
 * . *a_dirent_ret is filled with a directory entry whose node is
 *     locked and referenced,
 * . *b_vp_ret is filled with the corresponding vnode,
 * . *b_dirent_ret is filled either with null or with a directory entry
 *     whose node is locked and referenced,
 * . *b_vp is filled either with null or with the corresponding vnode,
 *     and
 * . the only pair of vnodes that may be identical is a_vp and b_vp.
 *
 * On failure, a_dvp and b_dvp are left unlocked, and *a_dirent_ret,
 * *a_vp, *b_dirent_ret, and *b_vp are left alone.
 */
static int
genfs_rename_lock(const struct genfs_rename_ops *ops,
    struct mount *mp, kauth_cred_t cred,
    int overlap_error, int a_dot_error, int b_dot_error,
    struct vnode *a_dvp, struct componentname *a_cnp, bool a_missing_ok,
    void *a_de_ret, struct vnode **a_vp_ret,
    struct vnode *b_dvp, struct componentname *b_cnp, bool b_missing_ok,
    void *b_de_ret, struct vnode **b_vp_ret)
{
        struct vnode *a_vp, *b_vp;
        int error;

        KASSERT(ops != NULL);
        KASSERT(mp != NULL);
        KASSERT(a_dvp != NULL);
        KASSERT(a_cnp != NULL);
        KASSERT(a_vp_ret != NULL);
        KASSERT(b_dvp != NULL);
        KASSERT(b_cnp != NULL);
        KASSERT(b_vp_ret != NULL);
        KASSERT(a_dvp != b_dvp);
        KASSERT(a_vp_ret != b_vp_ret);
        KASSERT(a_dvp->v_type == VDIR);
        KASSERT(b_dvp->v_type == VDIR);
        KASSERT(a_dvp->v_mount == mp);
        KASSERT(b_dvp->v_mount == mp);
        KASSERT(a_missing_ok != b_missing_ok);

        /*
         * 1. Lock a_dvp.
         */
        error = ops->gro_lock_directory(mp, a_dvp);
        if (error)
                goto fail0;

        /* Did we lose a race with mount?  */
        if (a_dvp->v_mountedhere != NULL) {
                error = EBUSY;
                goto fail1;
        }

        /*
         * 2. Lookup a_vp.  May lock/unlock a_vp.
         */
        error = ops->gro_lookup(mp, a_dvp, a_cnp, a_de_ret, &a_vp);
        if (error) {
                if (a_missing_ok && (error == ENOENT))
                        a_vp = NULL;
                else
                        goto fail1;
        } else {
                KASSERT(a_vp != NULL);

                /* Refuse to rename (over) `.'.  */
                if (a_vp == a_dvp) {
                        error = a_dot_error;
                        goto fail2;
                }

                /* Reject rename("x", "x/y") or rename("x/y", "x").  */
                if (a_vp == b_dvp) {
                        error = overlap_error;
                        goto fail2;
                }
        }

        KASSERT(a_vp != a_dvp);
        KASSERT(a_vp != b_dvp);

        /*
         * 3. Lock a_vp, if it is a directory.
         *
         * We already ruled out a_vp == a_dvp (i.e., a_cnp is `.'), so
         * this is not locking against self, and we already ruled out
         * a_vp == b_dvp, so this won't cause subsequent locking of
         * b_dvp to lock against self.
         *
         * If a_vp is a nondirectory, we can't hold it when we lookup
         * b_vp in case (a) the file system can only lookup/lock/unlock
         * and (b) b_vp turns out to be the same file as a_vp due to
         * hard links -- and we can't even detect that case until after
         * we've looked up b_vp.  Fortunately, if a_vp is a
         * nondirectory, then it is a leaf, so we can safely lock it
         * last.
         */
        if (a_vp != NULL && a_vp->v_type == VDIR) {
                vn_lock(a_vp, LK_EXCLUSIVE | LK_RETRY);
                KASSERT(a_vp->v_mount == mp);
                /* Refuse to rename (over) a mount point.  */
                if (a_vp->v_mountedhere != NULL) {
                        error = EBUSY;
                        goto fail3;
                }
        }

        /*
         * 4. Lock b_dvp.
         */
        error = ops->gro_lock_directory(mp, b_dvp);
        if (error)
                goto fail3;

        /* Did we lose a race with mount?  */
        if (b_dvp->v_mountedhere != NULL) {
                error = EBUSY;
                goto fail4;
        }

        /*
         * 5. Lookup b_vp.  May lock/unlock b_vp.
         */
        error = ops->gro_lookup(mp, b_dvp, b_cnp, b_de_ret, &b_vp);
        if (error) {
                if (b_missing_ok && (error == ENOENT))
                        b_vp = NULL;
                else
                        goto fail4;
        } else {
                KASSERT(b_vp != NULL);

                /* Refuse to rename (over) `.'.  */
                if (b_vp == b_dvp) {
                        error = b_dot_error;
                        goto fail5;
                }

                /*
                 * b_dvp must not be an ancestor of a_dvp, so if we
                 * find b_dvp/b_vp=a_dvp/a_vp something is wrong.
                 */
                if (b_vp == a_dvp) {
                        /*
                         * We have a directory hard link before us.
                         * XXX What error should this return?  EDEADLK?
                         * Panic?
                         */
                        error = EIO;
                        goto fail5;
                }
        }
        KASSERT(b_vp != b_dvp);
        KASSERT(b_vp != a_dvp);

        /*
         * 6. Lock a_vp, if it is a nondirectory.
         *
         * In this case a_vp is a leaf, so it is either equal to or
         * incommensurate with b_vp, and so we can safely lock it at
         * any point now.
         */
        if (a_vp != NULL && a_vp->v_type != VDIR) {
                vn_lock(a_vp, LK_EXCLUSIVE | LK_RETRY);
                KASSERT(a_vp->v_mount == mp);
                /* (not a directory so can't have anything mounted here) */
        }

        /*
         * 7. Lock b_vp, if it is not a_vp.
         *
         * b_vp and a_vp may the same inode if they are hard links to
         * one another.
         */
        if ((b_vp != NULL) && (b_vp != a_vp)) {
                vn_lock(b_vp, LK_EXCLUSIVE | LK_RETRY);
                KASSERT(b_vp->v_mount == mp);
                /* Refuse to rename (over) a mount point.  */
                if ((b_vp->v_type == VDIR) && (b_vp->v_mountedhere != NULL)) {
                        error = EBUSY;
                        goto fail6;
                }
        }

        KASSERT(VOP_ISLOCKED(a_dvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(b_dvp) == LK_EXCLUSIVE);
        KASSERT(a_missing_ok || (a_vp != NULL));
        KASSERT(b_missing_ok || (b_vp != NULL));
        KASSERT((a_vp == NULL) || (VOP_ISLOCKED(a_vp) == LK_EXCLUSIVE));
        KASSERT((b_vp == NULL) || (VOP_ISLOCKED(b_vp) == LK_EXCLUSIVE));

        *a_vp_ret = a_vp;
        *b_vp_ret = b_vp;
        return 0;

fail6:        if ((b_vp != NULL) && (b_vp != a_vp))
                VOP_UNLOCK(b_vp);
        if (a_vp != NULL && a_vp->v_type != VDIR)
                VOP_UNLOCK(a_vp);
fail5:        if (b_vp != NULL)
                vrele(b_vp);
fail4:        VOP_UNLOCK(b_dvp);
fail3:        if (a_vp != NULL && a_vp->v_type == VDIR)
                VOP_UNLOCK(a_vp);
fail2:        if (a_vp != NULL)
                vrele(a_vp);
fail1:        VOP_UNLOCK(a_dvp);
fail0:        return error;
}

/*
 * genfs_rename_exit: Unlock everything we locked for rename.
 *
 * fdvp and tdvp must be referenced.
 *
 * On entry, everything is locked, and fvp and tvp referenced.
 *
 * On exit, everything is unlocked, and fvp and tvp are released.
 */
static void
genfs_rename_exit(const struct genfs_rename_ops *ops,
    struct mount *mp,
    struct vnode *fdvp, struct vnode *fvp,
    struct vnode *tdvp, struct vnode *tvp)
{

        (void)ops;
        KASSERT(ops != NULL);
        KASSERT(mp != NULL);
        KASSERT(fdvp != NULL);
        KASSERT(fvp != NULL);
        KASSERT(fdvp != fvp);
        KASSERT(fdvp != tvp);
        KASSERT(tdvp != tvp);
        KASSERT(tdvp != fvp);
        KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
        KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));

        if ((tvp != NULL) && (tvp != fvp))
                VOP_UNLOCK(tvp);
        VOP_UNLOCK(fvp);
        if (tvp != NULL)
                vrele(tvp);
        if (tdvp != fdvp)
                VOP_UNLOCK(tdvp);
        vrele(fvp);
        VOP_UNLOCK(fdvp);
}

/*
 * genfs_rename_remove: Remove the entry for the non-directory vp with
 * componentname cnp from the directory dvp, using the lookup results
 * de.  It is the responsibility of gro_remove to purge the name cache.
 *
 * Everything must be locked and referenced.
 */
static int
genfs_rename_remove(const struct genfs_rename_ops *ops,
    struct mount *mp, kauth_cred_t cred,
    struct vnode *dvp, struct componentname *cnp, void *de, struct vnode *vp,
    nlink_t *tvp_nlinkp)
{
        int error;

        KASSERT(ops != NULL);
        KASSERT(mp != NULL);
        KASSERT(dvp != NULL);
        KASSERT(cnp != NULL);
        KASSERT(vp != NULL);
        KASSERT(dvp != vp);
        KASSERT(dvp->v_type == VDIR);
        KASSERT(vp->v_type != VDIR);
        KASSERT(dvp->v_mount == mp);
        KASSERT(vp->v_mount == mp);
        KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);

        error = ops->gro_remove_check_possible(mp, dvp, vp);
        if (error)
                return error;

        error = ops->gro_remove_check_permitted(mp, cred, dvp, vp);
        error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE, vp, dvp,
            error);
        if (error)
                return error;

        error = ops->gro_remove(mp, cred, dvp, cnp, de, vp, tvp_nlinkp);
        if (error)
                return error;

        return 0;
}

static int
genfs_ufslike_check_sticky(kauth_cred_t, mode_t, uid_t, struct vnode *, uid_t);

/*
 * genfs_ufslike_rename_check_possible: Check whether a rename is
 * possible independent of credentials, assuming UFS-like inode flag
 * semantics.  clobber_p is true iff the target node already exists.
 */
int
genfs_ufslike_rename_check_possible(
    unsigned long fdflags, unsigned long fflags,
    unsigned long tdflags, unsigned long tflags, bool clobber_p,
    unsigned long immutable, unsigned long append)
{

        if ((fdflags | fflags) & (immutable | append))
                return EPERM;

        if (tdflags & (immutable | (clobber_p? append : 0)))
                return EPERM;

        if (clobber_p && (tflags & (immutable | append)))
                return EPERM;

        return 0;
}

/*
 * genfs_ufslike_rename_check_permitted: Check whether a rename is
 * permitted given our credentials, assuming UFS-like permission and
 * ownership semantics.
 *
 * The only pair of vnodes that may be identical is {fdvp, tdvp}.
 *
 * Everything must be locked and referenced.
 */
int
genfs_ufslike_rename_check_permitted(kauth_cred_t cred,
    struct vnode *fdvp, mode_t fdmode, uid_t fduid,
    struct vnode *fvp, uid_t fuid,
    struct vnode *tdvp, mode_t tdmode, uid_t tduid,
    struct vnode *tvp, uid_t tuid)
{
        int error;

        KASSERT(fdvp != NULL);
        KASSERT(fvp != NULL);
        KASSERT(tdvp != NULL);
        KASSERT(fdvp != fvp);
        KASSERT(fdvp != tvp);
        KASSERT(tdvp != fvp);
        KASSERT(tdvp != tvp);
        KASSERT(fvp != tvp);
        KASSERT(fdvp->v_type == VDIR);
        KASSERT(tdvp->v_type == VDIR);
        KASSERT(fdvp->v_mount == fvp->v_mount);
        KASSERT(fdvp->v_mount == tdvp->v_mount);
        KASSERT((tvp == NULL) || (fdvp->v_mount == tvp->v_mount));
        KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
        KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));

        /*
         * We need to remove or change an entry in the source directory.
         */
        error = VOP_ACCESS(fdvp, VWRITE, cred);
        if (error)
                return error;

        /*
         * If we are changing directories, then we need to write to the
         * target directory to add or change an entry.  Also, if fvp is
         * a directory, we need to write to it to change its `..'
         * entry.
         */
        if (fdvp != tdvp) {
                error = VOP_ACCESS(tdvp, VWRITE, cred);
                if (error)
                        return error;
                if (fvp->v_type == VDIR) {
                        error = VOP_ACCESS(fvp, VWRITE, cred);
                        if (error)
                                return error;
                }
        }

        error = genfs_ufslike_check_sticky(cred, fdmode, fduid, fvp, fuid);
        if (error)
                return error;

        error = genfs_ufslike_check_sticky(cred, tdmode, tduid, tvp, tuid);
        if (error)
                return error;

        return 0;
}

/*
 * genfs_ufslike_remove_check_possible: Check whether a remove is
 * possible independent of credentials, assuming UFS-like inode flag
 * semantics.
 */
int
genfs_ufslike_remove_check_possible(unsigned long dflags, unsigned long flags,
    unsigned long immutable, unsigned long append)
{

        /*
         * We want to delete the entry.  If the directory is immutable,
         * we can't write to it to delete the entry.  If the directory
         * is append-only, the only change we can make is to add
         * entries, so we can't delete entries.  If the node is
         * immutable, we can't change the links to it, so we can't
         * delete the entry.  If the node is append-only...well, this
         * is what UFS does.
         */
        if ((dflags | flags) & (immutable | append))
                return EPERM;

        return 0;
}

/*
 * genfs_ufslike_remove_check_permitted: Check whether a remove is
 * permitted given our credentials, assuming UFS-like permission and
 * ownership semantics.
 *
 * Everything must be locked and referenced.
 */
int
genfs_ufslike_remove_check_permitted(kauth_cred_t cred,
    struct vnode *dvp, mode_t dmode, uid_t duid,
    struct vnode *vp, uid_t uid)
{
        int error;

        KASSERT(dvp != NULL);
        KASSERT(vp != NULL);
        KASSERT(dvp != vp);
        KASSERT(dvp->v_type == VDIR);
        KASSERT(vp->v_type != VDIR);
        KASSERT(dvp->v_mount == vp->v_mount);
        KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);

        /*
         * We need to write to the directory to remove from it.
         */
        error = VOP_ACCESS(dvp, VWRITE, cred);
        if (error)
                return error;

        error = genfs_ufslike_check_sticky(cred, dmode, duid, vp, uid);
        if (error)
                return error;

        return 0;
}

/*
 * genfs_ufslike_check_sticky: Check whether a party with credentials
 * cred may change an entry in a sticky directory, assuming UFS-like
 * permission, ownership, and stickiness semantics: If the directory is
 * sticky and the entry exists, the user must own either the directory
 * or the entry's node in order to change the entry.
 *
 * Everything must be locked and referenced.
 */
int
genfs_ufslike_check_sticky(kauth_cred_t cred, mode_t dmode, uid_t duid,
    struct vnode *vp, uid_t uid)
{

        if ((dmode & S_ISTXT) && (vp != NULL))
                return genfs_can_sticky(vp, cred, duid, uid);

        return 0;
}





































































































































































































































































































































































































































































  420 
  419 










































































































   92 



   82 


























   90 

   13 

   13 





   11 









   92 









   82 
   81 






   80 
   81 

   81 






































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   95 
   92 

   97 




   81 
   79 




   80 


   90 







   18 













   22 



















































































   95 

   94 





   69 



   66 

   27 

















   95 










   95 


   61 
    7 



   56 

    3 



   54 

   54 
   93 


   74 
   74 
   74 
   75 

   73 








   77 

   31 








   96 

   96 
    3 
    3 
    3 


   95 







   97 
















































   60 

    6 
    6 







   59 










    1 



    1 

    1 










































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
/*        $NetBSD: route.c,v 1.230 2021/12/05 04:57:38 msaitoh Exp $        */

/*-
 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1980, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)route.c        8.3 (Berkeley) 1/9/95
 */

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_route.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: route.c,v 1.230 2021/12/05 04:57:38 msaitoh Exp $");

#include <sys/param.h>
#ifdef RTFLUSH_DEBUG
#include <sys/sysctl.h>
#endif
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/proc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/domain.h>
#include <sys/kernel.h>
#include <sys/ioctl.h>
#include <sys/pool.h>
#include <sys/kauth.h>
#include <sys/workqueue.h>
#include <sys/syslog.h>
#include <sys/rwlock.h>
#include <sys/mutex.h>
#include <sys/cpu.h>
#include <sys/kmem.h>

#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#if defined(INET) || defined(INET6)
#include <net/if_llatbl.h>
#endif

#include <netinet/in.h>
#include <netinet/in_var.h>

#define        PRESERVED_RTF        (RTF_UP | RTF_GATEWAY | RTF_HOST | RTF_DONE | RTF_MASK)

#ifdef RTFLUSH_DEBUG
#define        rtcache_debug() __predict_false(_rtcache_debug)
#else /* RTFLUSH_DEBUG */
#define        rtcache_debug() 0
#endif /* RTFLUSH_DEBUG */

#ifdef RT_DEBUG
#define RT_REFCNT_TRACE(rt)        printf("%s:%d: rt=%p refcnt=%d\n", \
                                    __func__, __LINE__, (rt), (rt)->rt_refcnt)
#else
#define RT_REFCNT_TRACE(rt)        do {} while (0)
#endif

#ifdef RT_DEBUG
#define dlog(level, fmt, args...)        log(level, fmt, ##args)
#else
#define dlog(level, fmt, args...)        do {} while (0)
#endif

struct rtstat                rtstat;

static int                rttrash;        /* routes not in table but not freed */

static struct pool        rtentry_pool;
static struct pool        rttimer_pool;

static struct callout        rt_timer_ch; /* callout for rt_timer_timer() */
static struct workqueue        *rt_timer_wq;
static struct work        rt_timer_wk;

static void        rt_timer_init(void);
static void        rt_timer_queue_remove_all(struct rttimer_queue *);
static void        rt_timer_remove_all(struct rtentry *);
static void        rt_timer_timer(void *);

/*
 * Locking notes:
 * - The routing table is protected by a global rwlock
 *   - API: RT_RLOCK and friends
 * - rtcaches are NOT protected by the framework
 *   - Callers must guarantee a rtcache isn't accessed simultaneously
 *   - How the constraint is guaranteed in the wild
 *     - Protect a rtcache by a mutex (e.g., inp_route)
 *     - Make rtcache per-CPU and allow only accesses from softint
 *       (e.g., ipforward_rt_percpu)
 * - References to a rtentry is managed by reference counting and psref
 *   - Reference counting is used for temporal reference when a rtentry
 *     is fetched from the routing table
 *   - psref is used for temporal reference when a rtentry is fetched
 *     from a rtcache
 *     - struct route (rtcache) has struct psref, so we cannot obtain
 *       a reference twice on the same struct route
 *   - Before destroying or updating a rtentry, we have to wait for
 *     all references left (see below for details)
 *   - APIs
 *     - An obtained rtentry via rtalloc1 or rtrequest* must be
 *       unreferenced by rt_unref
 *     - An obtained rtentry via rtcache_* must be unreferenced by
 *       rtcache_unref
 *   - TODO: once we get a lockless routing table, we should use only
 *           psref for rtentries
 * - rtentry destruction
 *   - A rtentry is destroyed (freed) only when we call rtrequest(RTM_DELETE)
 *   - If a caller of rtrequest grabs a reference of a rtentry, the caller
 *     has a responsibility to destroy the rtentry by itself by calling
 *     rt_free
 *     - If not, rtrequest itself does that
 *   - If rt_free is called in softint, the actual destruction routine is
 *     deferred to a workqueue
 * - rtentry update
 *   - When updating a rtentry, RTF_UPDATING flag is set
 *   - If a rtentry is set RTF_UPDATING, fetching the rtentry from
 *     the routing table or a rtcache results in either of the following
 *     cases:
 *     - if the caller runs in softint, the caller fails to fetch
 *     - otherwise, the caller waits for the update completed and retries
 *       to fetch (probably succeed to fetch for the second time)
 * - rtcache invalidation
 *   - There is a global generation counter that is incremented when
 *     any routes have been added or deleted
 *   - When a rtcache caches a rtentry into itself, it also stores
 *     a snapshot of the generation counter
 *   - If the snapshot equals to the global counter, the cache is valid,
 *     otherwise the cache is invalidated
 */

/*
 * Global lock for the routing table.
 */
static krwlock_t                rt_lock __cacheline_aligned;
#ifdef NET_MPSAFE
#define RT_RLOCK()                rw_enter(&rt_lock, RW_READER)
#define RT_WLOCK()                rw_enter(&rt_lock, RW_WRITER)
#define RT_UNLOCK()                rw_exit(&rt_lock)
#define RT_WLOCKED()                rw_write_held(&rt_lock)
#define        RT_ASSERT_WLOCK()        KASSERT(rw_write_held(&rt_lock))
#else
#define RT_RLOCK()                do {} while (0)
#define RT_WLOCK()                do {} while (0)
#define RT_UNLOCK()                do {} while (0)
#define RT_WLOCKED()                true
#define        RT_ASSERT_WLOCK()        do {} while (0)
#endif

static uint64_t rtcache_generation;

/*
 * mutex and cv that are used to wait for references to a rtentry left
 * before updating the rtentry.
 */
static struct {
        kmutex_t                lock;
        kcondvar_t                cv;
        bool                        ongoing;
        const struct lwp        *lwp;
} rt_update_global __cacheline_aligned;

/*
 * A workqueue and stuff that are used to defer the destruction routine
 * of rtentries.
 */
static struct {
        struct workqueue        *wq;
        struct work                wk;
        kmutex_t                lock;
        SLIST_HEAD(, rtentry)        queue;
        bool                        enqueued;
} rt_free_global __cacheline_aligned;

/* psref for rtentry */
static struct psref_class *rt_psref_class __read_mostly;

#ifdef RTFLUSH_DEBUG
static int _rtcache_debug = 0;
#endif /* RTFLUSH_DEBUG */

static kauth_listener_t route_listener;

static int rtdeletemsg(struct rtentry *);

static void rt_maskedcopy(const struct sockaddr *,
    struct sockaddr *, const struct sockaddr *);

static void rtcache_invalidate(void);

static void rt_ref(struct rtentry *);

static struct rtentry *
    rtalloc1_locked(const struct sockaddr *, int, bool, bool);

static struct ifaddr *rt_getifa(struct rt_addrinfo *, struct psref *);
static struct ifnet *rt_getifp(struct rt_addrinfo *, struct psref *);
static struct ifaddr *ifa_ifwithroute_psref(int, const struct sockaddr *,
    const struct sockaddr *, struct psref *);

static void rtcache_ref(struct rtentry *, struct route *);

#ifdef NET_MPSAFE
static void rt_update_wait(void);
#endif

static bool rt_wait_ok(void);
static void rt_wait_refcnt(const char *, struct rtentry *, int);
static void rt_wait_psref(struct rtentry *);

#ifdef DDB
static void db_print_sa(const struct sockaddr *);
static void db_print_ifa(struct ifaddr *);
static int db_show_rtentry(struct rtentry *, void *);
#endif

#ifdef RTFLUSH_DEBUG
static void sysctl_net_rtcache_setup(struct sysctllog **);
static void
sysctl_net_rtcache_setup(struct sysctllog **clog)
{
        const struct sysctlnode *rnode;

        if (sysctl_createv(clog, 0, NULL, &rnode, CTLFLAG_PERMANENT,
            CTLTYPE_NODE,
            "rtcache", SYSCTL_DESCR("Route cache related settings"),
            NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL) != 0)
                return;
        if (sysctl_createv(clog, 0, &rnode, &rnode,
            CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
            "debug", SYSCTL_DESCR("Debug route caches"),
            NULL, 0, &_rtcache_debug, 0, CTL_CREATE, CTL_EOL) != 0)
                return;
}
#endif /* RTFLUSH_DEBUG */

static inline void
rt_destroy(struct rtentry *rt)
{
        if (rt->_rt_key != NULL)
                sockaddr_free(rt->_rt_key);
        if (rt->rt_gateway != NULL)
                sockaddr_free(rt->rt_gateway);
        if (rt_gettag(rt) != NULL)
                sockaddr_free(rt_gettag(rt));
        rt->_rt_key = rt->rt_gateway = rt->rt_tag = NULL;
}

static inline const struct sockaddr *
rt_setkey(struct rtentry *rt, const struct sockaddr *key, int flags)
{
        if (rt->_rt_key == key)
                goto out;

        if (rt->_rt_key != NULL)
                sockaddr_free(rt->_rt_key);
        rt->_rt_key = sockaddr_dup(key, flags);
out:
        rt->rt_nodes->rn_key = (const char *)rt->_rt_key;
        return rt->_rt_key;
}

struct ifaddr *
rt_get_ifa(struct rtentry *rt)
{
        struct ifaddr *ifa;

        ifa = rt->rt_ifa;
        if (ifa->ifa_getifa == NULL)
                return ifa;
#if 0
        else if (ifa->ifa_seqno != NULL && *ifa->ifa_seqno == rt->rt_ifa_seqno)
                return ifa;
#endif
        else {
                ifa = (*ifa->ifa_getifa)(ifa, rt_getkey(rt));
                if (ifa == NULL)
                        return NULL;
                rt_replace_ifa(rt, ifa);
                return ifa;
        }
}

static void
rt_set_ifa1(struct rtentry *rt, struct ifaddr *ifa)
{
        rt->rt_ifa = ifa;
        if (ifa->ifa_seqno != NULL)
                rt->rt_ifa_seqno = *ifa->ifa_seqno;
}

/*
 * Is this route the connected route for the ifa?
 */
static int
rt_ifa_connected(const struct rtentry *rt, const struct ifaddr *ifa)
{
        const struct sockaddr *key, *dst, *odst;
        struct sockaddr_storage maskeddst;

        key = rt_getkey(rt);
        dst = rt->rt_flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
        if (dst == NULL ||
            dst->sa_family != key->sa_family ||
            dst->sa_len != key->sa_len)
                return 0;
        if ((rt->rt_flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
                odst = dst;
                dst = (struct sockaddr *)&maskeddst;
                rt_maskedcopy(odst, (struct sockaddr *)&maskeddst,
                    ifa->ifa_netmask);
        }
        return (memcmp(dst, key, dst->sa_len) == 0);
}

void
rt_replace_ifa(struct rtentry *rt, struct ifaddr *ifa)
{
        struct ifaddr *old;

        if (rt->rt_ifa == ifa)
                return;

        if (rt->rt_ifa != ifa &&
            rt->rt_ifa->ifa_flags & IFA_ROUTE &&
            rt_ifa_connected(rt, rt->rt_ifa))
        {
                RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
                    "replace deleted IFA_ROUTE\n",
                    (void *)rt->_rt_key, (void *)rt->rt_ifa);
                rt->rt_ifa->ifa_flags &= ~IFA_ROUTE;
                if (rt_ifa_connected(rt, ifa)) {
                        RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
                            "replace added IFA_ROUTE\n",
                            (void *)rt->_rt_key, (void *)ifa);
                        ifa->ifa_flags |= IFA_ROUTE;
                }
        }

        ifaref(ifa);
        old = rt->rt_ifa;
        rt_set_ifa1(rt, ifa);
        ifafree(old);
}

static void
rt_set_ifa(struct rtentry *rt, struct ifaddr *ifa)
{
        ifaref(ifa);
        rt_set_ifa1(rt, ifa);
}

static int
route_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        struct rt_msghdr *rtm;
        int result;

        result = KAUTH_RESULT_DEFER;
        rtm = arg1;

        if (action != KAUTH_NETWORK_ROUTE)
                return result;

        if (rtm->rtm_type == RTM_GET)
                result = KAUTH_RESULT_ALLOW;

        return result;
}

static void rt_free_work(struct work *, void *);

void
rt_init(void)
{
        int error;

#ifdef RTFLUSH_DEBUG
        sysctl_net_rtcache_setup(NULL);
#endif

        mutex_init(&rt_free_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
        SLIST_INIT(&rt_free_global.queue);
        rt_free_global.enqueued = false;

        rt_psref_class = psref_class_create("rtentry", IPL_SOFTNET);

        error = workqueue_create(&rt_free_global.wq, "rt_free",
            rt_free_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
        if (error)
                panic("%s: workqueue_create failed (%d)\n", __func__, error);

        mutex_init(&rt_update_global.lock, MUTEX_DEFAULT, IPL_SOFTNET);
        cv_init(&rt_update_global.cv, "rt_update");

        pool_init(&rtentry_pool, sizeof(struct rtentry), 0, 0, 0, "rtentpl",
            NULL, IPL_SOFTNET);
        pool_init(&rttimer_pool, sizeof(struct rttimer), 0, 0, 0, "rttmrpl",
            NULL, IPL_SOFTNET);

        rn_init();        /* initialize all zeroes, all ones, mask table */
        rtbl_init();

        route_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
            route_listener_cb, NULL);
}

static void
rtcache_invalidate(void)
{

        RT_ASSERT_WLOCK();

        if (rtcache_debug())
                printf("%s: enter\n", __func__);

        rtcache_generation++;
}

#ifdef RT_DEBUG
static void
dump_rt(const struct rtentry *rt)
{
        char buf[512];

        log(LOG_DEBUG, "rt: ");
        log(LOG_DEBUG, "p=%p ", rt);
        if (rt->_rt_key == NULL) {
                log(LOG_DEBUG, "dst=(NULL) ");
        } else {
                sockaddr_format(rt->_rt_key, buf, sizeof(buf));
                log(LOG_DEBUG, "dst=%s ", buf);
        }
        if (rt->rt_gateway == NULL) {
                log(LOG_DEBUG, "gw=(NULL) ");
        } else {
                sockaddr_format(rt->_rt_key, buf, sizeof(buf));
                log(LOG_DEBUG, "gw=%s ", buf);
        }
        log(LOG_DEBUG, "flags=%x ", rt->rt_flags);
        if (rt->rt_ifp == NULL) {
                log(LOG_DEBUG, "if=(NULL) ");
        } else {
                log(LOG_DEBUG, "if=%s ", rt->rt_ifp->if_xname);
        }
        log(LOG_DEBUG, "\n");
}
#endif /* RT_DEBUG */

/*
 * Packet routing routines. If success, refcnt of a returned rtentry
 * will be incremented. The caller has to rtfree it by itself.
 */
struct rtentry *
rtalloc1_locked(const struct sockaddr *dst, int report, bool wait_ok,
    bool wlock)
{
        rtbl_t *rtbl;
        struct rtentry *rt;
        int s;

#ifdef NET_MPSAFE
retry:
#endif
        s = splsoftnet();
        rtbl = rt_gettable(dst->sa_family);
        if (rtbl == NULL)
                goto miss;

        rt = rt_matchaddr(rtbl, dst);
        if (rt == NULL)
                goto miss;

        if (!ISSET(rt->rt_flags, RTF_UP))
                goto miss;

#ifdef NET_MPSAFE
        if (ISSET(rt->rt_flags, RTF_UPDATING) &&
            /* XXX updater should be always able to acquire */
            curlwp != rt_update_global.lwp) {
                if (!wait_ok || !rt_wait_ok())
                        goto miss;
                RT_UNLOCK();
                splx(s);

                /* We can wait until the update is complete */
                rt_update_wait();

                if (wlock)
                        RT_WLOCK();
                else
                        RT_RLOCK();
                goto retry;
        }
#endif /* NET_MPSAFE */

        rt_ref(rt);
        RT_REFCNT_TRACE(rt);

        splx(s);
        return rt;
miss:
        rtstat.rts_unreach++;
        if (report) {
                struct rt_addrinfo info;

                memset(&info, 0, sizeof(info));
                info.rti_info[RTAX_DST] = dst;
                rt_missmsg(RTM_MISS, &info, 0, 0);
        }
        splx(s);
        return NULL;
}

struct rtentry *
rtalloc1(const struct sockaddr *dst, int report)
{
        struct rtentry *rt;

        RT_RLOCK();
        rt = rtalloc1_locked(dst, report, true, false);
        RT_UNLOCK();

        return rt;
}

static void
rt_ref(struct rtentry *rt)
{

        KASSERTMSG(rt->rt_refcnt >= 0, "rt_refcnt=%d", rt->rt_refcnt);
        atomic_inc_uint(&rt->rt_refcnt);
}

void
rt_unref(struct rtentry *rt)
{

        KASSERT(rt != NULL);
        KASSERTMSG(rt->rt_refcnt > 0, "refcnt=%d", rt->rt_refcnt);

        atomic_dec_uint(&rt->rt_refcnt);
        if (!ISSET(rt->rt_flags, RTF_UP) || ISSET(rt->rt_flags, RTF_UPDATING)) {
                mutex_enter(&rt_free_global.lock);
                cv_broadcast(&rt->rt_cv);
                mutex_exit(&rt_free_global.lock);
        }
}

static bool
rt_wait_ok(void)
{

        KASSERT(!cpu_intr_p());
        return !cpu_softintr_p();
}

void
rt_wait_refcnt(const char *title, struct rtentry *rt, int cnt)
{
        mutex_enter(&rt_free_global.lock);
        while (rt->rt_refcnt > cnt) {
                dlog(LOG_DEBUG, "%s: %s waiting (refcnt=%d)\n",
                    __func__, title, rt->rt_refcnt);
                cv_wait(&rt->rt_cv, &rt_free_global.lock);
                dlog(LOG_DEBUG, "%s: %s waited (refcnt=%d)\n",
                    __func__, title, rt->rt_refcnt);
        }
        mutex_exit(&rt_free_global.lock);
}

void
rt_wait_psref(struct rtentry *rt)
{

        psref_target_destroy(&rt->rt_psref, rt_psref_class);
        psref_target_init(&rt->rt_psref, rt_psref_class);
}

static void
_rt_free(struct rtentry *rt)
{
        struct ifaddr *ifa;

        /*
         * Need to avoid a deadlock on rt_wait_refcnt of update
         * and a conflict on psref_target_destroy of update.
         */
#ifdef NET_MPSAFE
        rt_update_wait();
#endif

        RT_REFCNT_TRACE(rt);
        KASSERTMSG(rt->rt_refcnt >= 0, "refcnt=%d", rt->rt_refcnt);
        rt_wait_refcnt("free", rt, 0);
#ifdef NET_MPSAFE
        psref_target_destroy(&rt->rt_psref, rt_psref_class);
#endif

        rt_assert_inactive(rt);
        rttrash--;
        ifa = rt->rt_ifa;
        rt->rt_ifa = NULL;
        ifafree(ifa);
        rt->rt_ifp = NULL;
        cv_destroy(&rt->rt_cv);
        rt_destroy(rt);
        pool_put(&rtentry_pool, rt);
}

static void
rt_free_work(struct work *wk, void *arg)
{

        for (;;) {
                struct rtentry *rt;

                mutex_enter(&rt_free_global.lock);
                if ((rt = SLIST_FIRST(&rt_free_global.queue)) == NULL) {
                        rt_free_global.enqueued = false;
                        mutex_exit(&rt_free_global.lock);
                        return;
                }
                SLIST_REMOVE_HEAD(&rt_free_global.queue, rt_free);
                mutex_exit(&rt_free_global.lock);
                atomic_dec_uint(&rt->rt_refcnt);
                _rt_free(rt);
        }
}

void
rt_free(struct rtentry *rt)
{

        KASSERTMSG(rt->rt_refcnt > 0, "rt_refcnt=%d", rt->rt_refcnt);
        if (rt_wait_ok()) {
                atomic_dec_uint(&rt->rt_refcnt);
                _rt_free(rt);
                return;
        }

        mutex_enter(&rt_free_global.lock);
        /* No need to add a reference here. */
        SLIST_INSERT_HEAD(&rt_free_global.queue, rt, rt_free);
        if (!rt_free_global.enqueued) {
                workqueue_enqueue(rt_free_global.wq, &rt_free_global.wk, NULL);
                rt_free_global.enqueued = true;
        }
        mutex_exit(&rt_free_global.lock);
}

#ifdef NET_MPSAFE
static void
rt_update_wait(void)
{

        mutex_enter(&rt_update_global.lock);
        while (rt_update_global.ongoing) {
                dlog(LOG_DEBUG, "%s: waiting lwp=%p\n", __func__, curlwp);
                cv_wait(&rt_update_global.cv, &rt_update_global.lock);
                dlog(LOG_DEBUG, "%s: waited lwp=%p\n", __func__, curlwp);
        }
        mutex_exit(&rt_update_global.lock);
}
#endif

int
rt_update_prepare(struct rtentry *rt)
{

        dlog(LOG_DEBUG, "%s: updating rt=%p lwp=%p\n", __func__, rt, curlwp);

        RT_WLOCK();
        /* If the entry is being destroyed, don't proceed the update. */
        if (!ISSET(rt->rt_flags, RTF_UP)) {
                RT_UNLOCK();
                return ESRCH;
        }
        rt->rt_flags |= RTF_UPDATING;
        RT_UNLOCK();

        mutex_enter(&rt_update_global.lock);
        while (rt_update_global.ongoing) {
                dlog(LOG_DEBUG, "%s: waiting ongoing updating rt=%p lwp=%p\n",
                    __func__, rt, curlwp);
                cv_wait(&rt_update_global.cv, &rt_update_global.lock);
                dlog(LOG_DEBUG, "%s: waited ongoing updating rt=%p lwp=%p\n",
                    __func__, rt, curlwp);
        }
        rt_update_global.ongoing = true;
        /* XXX need it to avoid rt_update_wait by updater itself. */
        rt_update_global.lwp = curlwp;
        mutex_exit(&rt_update_global.lock);

        rt_wait_refcnt("update", rt, 1);
        rt_wait_psref(rt);

        return 0;
}

void
rt_update_finish(struct rtentry *rt)
{

        RT_WLOCK();
        rt->rt_flags &= ~RTF_UPDATING;
        RT_UNLOCK();

        mutex_enter(&rt_update_global.lock);
        rt_update_global.ongoing = false;
        rt_update_global.lwp = NULL;
        cv_broadcast(&rt_update_global.cv);
        mutex_exit(&rt_update_global.lock);

        dlog(LOG_DEBUG, "%s: updated rt=%p lwp=%p\n", __func__, rt, curlwp);
}

/*
 * Force a routing table entry to the specified
 * destination to go through the given gateway.
 * Normally called as a result of a routing redirect
 * message from the network layer.
 *
 * N.B.: must be called at splsoftnet
 */
void
rtredirect(const struct sockaddr *dst, const struct sockaddr *gateway,
        const struct sockaddr *netmask, int flags, const struct sockaddr *src,
        struct rtentry **rtp)
{
        struct rtentry *rt;
        int error = 0;
        uint64_t *stat = NULL;
        struct rt_addrinfo info;
        struct ifaddr *ifa;
        struct psref psref;

        /* verify the gateway is directly reachable */
        if ((ifa = ifa_ifwithnet_psref(gateway, &psref)) == NULL) {
                error = ENETUNREACH;
                goto out;
        }
        rt = rtalloc1(dst, 0);
        /*
         * If the redirect isn't from our current router for this dst,
         * it's either old or wrong.  If it redirects us to ourselves,
         * we have a routing loop, perhaps as a result of an interface
         * going down recently.
         */
        if (!(flags & RTF_DONE) && rt &&
             (sockaddr_cmp(src, rt->rt_gateway) != 0 || rt->rt_ifa != ifa))
                error = EINVAL;
        else {
                int s = pserialize_read_enter();
                struct ifaddr *_ifa;

                _ifa = ifa_ifwithaddr(gateway);
                if (_ifa != NULL)
                        error = EHOSTUNREACH;
                pserialize_read_exit(s);
        }
        if (error)
                goto done;
        /*
         * Create a new entry if we just got back a wildcard entry
         * or the lookup failed.  This is necessary for hosts
         * which use routing redirects generated by smart gateways
         * to dynamically build the routing tables.
         */
        if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
                goto create;
        /*
         * Don't listen to the redirect if it's
         * for a route to an interface.
         */
        if (rt->rt_flags & RTF_GATEWAY) {
                if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
                        /*
                         * Changing from route to net => route to host.
                         * Create new route, rather than smashing route to net.
                         */
                create:
                        if (rt != NULL)
                                rt_unref(rt);
                        flags |=  RTF_GATEWAY | RTF_DYNAMIC;
                        memset(&info, 0, sizeof(info));
                        info.rti_info[RTAX_DST] = dst;
                        info.rti_info[RTAX_GATEWAY] = gateway;
                        info.rti_info[RTAX_NETMASK] = netmask;
                        info.rti_ifa = ifa;
                        info.rti_flags = flags;
                        rt = NULL;
                        error = rtrequest1(RTM_ADD, &info, &rt);
                        if (rt != NULL)
                                flags = rt->rt_flags;
                        stat = &rtstat.rts_dynamic;
                } else {
                        /*
                         * Smash the current notion of the gateway to
                         * this destination.  Should check about netmask!!!
                         */
#ifdef NET_MPSAFE
                        KASSERT(!cpu_softintr_p());

                        error = rt_update_prepare(rt);
                        if (error == 0) {
#endif
                                RT_WLOCK();
                                error = rt_setgate(rt, gateway);
                                if (error == 0) {
                                        rt->rt_flags |= RTF_MODIFIED;
                                        flags |= RTF_MODIFIED;
                                }
                                RT_UNLOCK();
#ifdef NET_MPSAFE
                                rt_update_finish(rt);
                        } else {
                                /*
                                 * If error != 0, the rtentry is being
                                 * destroyed, so doing nothing doesn't
                                 * matter.
                                 */
                        }
#endif
                        stat = &rtstat.rts_newgateway;
                }
        } else
                error = EHOSTUNREACH;
done:
        if (rt) {
                if (rtp != NULL && !error)
                        *rtp = rt;
                else
                        rt_unref(rt);
        }
out:
        if (error)
                rtstat.rts_badredirect++;
        else if (stat != NULL)
                (*stat)++;
        memset(&info, 0, sizeof(info));
        info.rti_info[RTAX_DST] = dst;
        info.rti_info[RTAX_GATEWAY] = gateway;
        info.rti_info[RTAX_NETMASK] = netmask;
        info.rti_info[RTAX_AUTHOR] = src;
        rt_missmsg(RTM_REDIRECT, &info, flags, error);
        ifa_release(ifa, &psref);
}

/*
 * Delete a route and generate a message.
 * It doesn't free a passed rt.
 */
static int
rtdeletemsg(struct rtentry *rt)
{
        int error;
        struct rt_addrinfo info;
        struct rtentry *retrt;

        /*
         * Request the new route so that the entry is not actually
         * deleted.  That will allow the information being reported to
         * be accurate (and consistent with route_output()).
         */
        memset(&info, 0, sizeof(info));
        info.rti_info[RTAX_DST] = rt_getkey(rt);
        info.rti_info[RTAX_NETMASK] = rt_mask(rt);
        info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
        info.rti_flags = rt->rt_flags;
        error = rtrequest1(RTM_DELETE, &info, &retrt);

        rt_missmsg(RTM_DELETE, &info, info.rti_flags, error);

        return error;
}

static struct ifaddr *
ifa_ifwithroute_psref(int flags, const struct sockaddr *dst,
    const struct sockaddr *gateway, struct psref *psref)
{
        struct ifaddr *ifa = NULL;

        if ((flags & RTF_GATEWAY) == 0) {
                /*
                 * If we are adding a route to an interface,
                 * and the interface is a pt to pt link
                 * we should search for the destination
                 * as our clue to the interface.  Otherwise
                 * we can use the local address.
                 */
                if ((flags & RTF_HOST) && gateway->sa_family != AF_LINK)
                        ifa = ifa_ifwithdstaddr_psref(dst, psref);
                if (ifa == NULL)
                        ifa = ifa_ifwithaddr_psref(gateway, psref);
        } else {
                /*
                 * If we are adding a route to a remote net
                 * or host, the gateway may still be on the
                 * other end of a pt to pt link.
                 */
                ifa = ifa_ifwithdstaddr_psref(gateway, psref);
        }
        if (ifa == NULL)
                ifa = ifa_ifwithnet_psref(gateway, psref);
        if (ifa == NULL) {
                int s;
                struct rtentry *rt;

                rt = rtalloc1_locked(gateway, 0, true, true);
                if (rt == NULL)
                        return NULL;
                if (rt->rt_flags & RTF_GATEWAY) {
                        rt_unref(rt);
                        return NULL;
                }
                /*
                 * Just in case. May not need to do this workaround.
                 * Revisit when working on rtentry MP-ification.
                 */
                s = pserialize_read_enter();
                IFADDR_READER_FOREACH(ifa, rt->rt_ifp) {
                        if (ifa == rt->rt_ifa)
                                break;
                }
                if (ifa != NULL)
                        ifa_acquire(ifa, psref);
                pserialize_read_exit(s);
                rt_unref(rt);
                if (ifa == NULL)
                        return NULL;
        }
        if (ifa->ifa_addr->sa_family != dst->sa_family) {
                struct ifaddr *nifa;
                int s;

                s = pserialize_read_enter();
                nifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
                if (nifa != NULL) {
                        ifa_release(ifa, psref);
                        ifa_acquire(nifa, psref);
                        ifa = nifa;
                }
                pserialize_read_exit(s);
        }
        return ifa;
}

/*
 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
 * The caller has to rtfree it by itself.
 */
int
rtrequest(int req, const struct sockaddr *dst, const struct sockaddr *gateway,
        const struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
{
        struct rt_addrinfo info;

        memset(&info, 0, sizeof(info));
        info.rti_flags = flags;
        info.rti_info[RTAX_DST] = dst;
        info.rti_info[RTAX_GATEWAY] = gateway;
        info.rti_info[RTAX_NETMASK] = netmask;
        return rtrequest1(req, &info, ret_nrt);
}

/*
 * It's a utility function to add/remove a route to/from the routing table
 * and tell user processes the addition/removal on success.
 */
int
rtrequest_newmsg(const int req, const struct sockaddr *dst,
        const struct sockaddr *gateway, const struct sockaddr *netmask,
        const int flags)
{
        int error;
        struct rtentry *ret_nrt = NULL;

        KASSERT(req == RTM_ADD || req == RTM_DELETE);

        error = rtrequest(req, dst, gateway, netmask, flags, &ret_nrt);
        if (error != 0)
                return error;

        KASSERT(ret_nrt != NULL);

        rt_newmsg(req, ret_nrt); /* tell user process */
        if (req == RTM_DELETE)
                rt_free(ret_nrt);
        else
                rt_unref(ret_nrt);

        return 0;
}

static struct ifnet *
rt_getifp(struct rt_addrinfo *info, struct psref *psref)
{
        const struct sockaddr *ifpaddr = info->rti_info[RTAX_IFP];

        if (info->rti_ifp != NULL)
                return NULL;
        /*
         * ifp may be specified by sockaddr_dl when protocol address
         * is ambiguous
         */
        if (ifpaddr != NULL && ifpaddr->sa_family == AF_LINK) {
                struct ifaddr *ifa;
                int s = pserialize_read_enter();

                ifa = ifa_ifwithnet(ifpaddr);
                if (ifa != NULL)
                        info->rti_ifp = if_get_byindex(ifa->ifa_ifp->if_index,
                            psref);
                pserialize_read_exit(s);
        }

        return info->rti_ifp;
}

static struct ifaddr *
rt_getifa(struct rt_addrinfo *info, struct psref *psref)
{
        struct ifaddr *ifa = NULL;
        const struct sockaddr *dst = info->rti_info[RTAX_DST];
        const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
        const struct sockaddr *ifaaddr = info->rti_info[RTAX_IFA];
        int flags = info->rti_flags;
        const struct sockaddr *sa;

        if (info->rti_ifa == NULL && ifaaddr != NULL) {
                ifa = ifa_ifwithaddr_psref(ifaaddr, psref);
                if (ifa != NULL)
                        goto got;
        }

        sa = ifaaddr != NULL ? ifaaddr :
            (gateway != NULL ? gateway : dst);
        if (sa != NULL && info->rti_ifp != NULL)
                ifa = ifaof_ifpforaddr_psref(sa, info->rti_ifp, psref);
        else if (dst != NULL && gateway != NULL)
                ifa = ifa_ifwithroute_psref(flags, dst, gateway, psref);
        else if (sa != NULL)
                ifa = ifa_ifwithroute_psref(flags, sa, sa, psref);
        if (ifa == NULL)
                return NULL;
got:
        if (ifa->ifa_getifa != NULL) {
                /* FIXME ifa_getifa is NOMPSAFE */
                ifa = (*ifa->ifa_getifa)(ifa, dst);
                if (ifa == NULL)
                        return NULL;
                ifa_acquire(ifa, psref);
        }
        info->rti_ifa = ifa;
        if (info->rti_ifp == NULL)
                info->rti_ifp = ifa->ifa_ifp;
        return ifa;
}

/*
 * If it suceeds and ret_nrt isn't NULL, refcnt of ret_nrt is incremented.
 * The caller has to rtfree it by itself.
 */
int
rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
{
        int s = splsoftnet(), ss;
        int error = 0, rc;
        struct rtentry *rt;
        rtbl_t *rtbl;
        struct ifaddr *ifa = NULL;
        struct sockaddr_storage maskeddst;
        const struct sockaddr *dst = info->rti_info[RTAX_DST];
        const struct sockaddr *gateway = info->rti_info[RTAX_GATEWAY];
        const struct sockaddr *netmask = info->rti_info[RTAX_NETMASK];
        int flags = info->rti_flags;
        struct psref psref_ifp, psref_ifa;
        int bound = 0;
        struct ifnet *ifp = NULL;
        bool need_to_release_ifa = true;
        bool need_unlock = true;
#define senderr(x) { error = x ; goto bad; }

        RT_WLOCK();

        bound = curlwp_bind();
        if ((rtbl = rt_gettable(dst->sa_family)) == NULL)
                senderr(ESRCH);
        if (flags & RTF_HOST)
                netmask = NULL;
        switch (req) {
        case RTM_DELETE:
                if (netmask) {
                        rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
                            netmask);
                        dst = (struct sockaddr *)&maskeddst;
                }
                if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
                        senderr(ESRCH);
                if ((rt = rt_deladdr(rtbl, dst, netmask)) == NULL)
                        senderr(ESRCH);
                rt->rt_flags &= ~RTF_UP;
                ifa = rt->rt_ifa;
                if (ifa->ifa_flags & IFA_ROUTE &&
                    rt_ifa_connected(rt, ifa)) {
                        RT_DPRINTF("rt->_rt_key = %p, ifa = %p, "
                            "deleted IFA_ROUTE\n",
                            (void *)rt->_rt_key, (void *)ifa);
                        ifa->ifa_flags &= ~IFA_ROUTE;
                }
                if (ifa->ifa_rtrequest)
                        ifa->ifa_rtrequest(RTM_DELETE, rt, info);
                ifa = NULL;
                rttrash++;
                if (ret_nrt) {
                        *ret_nrt = rt;
                        rt_ref(rt);
                        RT_REFCNT_TRACE(rt);
                }
                rtcache_invalidate();
                RT_UNLOCK();
                need_unlock = false;
                rt_timer_remove_all(rt);
#if defined(INET) || defined(INET6)
                if (netmask != NULL)
                        lltable_prefix_free(dst->sa_family, dst, netmask, 0);
#endif
                if (ret_nrt == NULL) {
                        /* Adjust the refcount */
                        rt_ref(rt);
                        RT_REFCNT_TRACE(rt);
                        rt_free(rt);
                }
                break;

        case RTM_ADD:
                if (info->rti_ifa == NULL) {
                        ifp = rt_getifp(info, &psref_ifp);
                        ifa = rt_getifa(info, &psref_ifa);
                        if (ifa == NULL)
                                senderr(ENETUNREACH);
                } else {
                        /* Caller should have a reference of ifa */
                        ifa = info->rti_ifa;
                        need_to_release_ifa = false;
                }
                rt = pool_get(&rtentry_pool, PR_NOWAIT);
                if (rt == NULL)
                        senderr(ENOBUFS);
                memset(rt, 0, sizeof(*rt));
                rt->rt_flags = RTF_UP | (flags & ~RTF_DONTCHANGEIFA);
                LIST_INIT(&rt->rt_timer);

                RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
                if (netmask) {
                        rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
                            netmask);
                        rt_setkey(rt, (struct sockaddr *)&maskeddst, M_NOWAIT);
                } else {
                        rt_setkey(rt, dst, M_NOWAIT);
                }
                RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
                if (rt_getkey(rt) == NULL ||
                    rt_setgate(rt, gateway) != 0) {
                        pool_put(&rtentry_pool, rt);
                        senderr(ENOBUFS);
                }

                rt_set_ifa(rt, ifa);
                if (info->rti_info[RTAX_TAG] != NULL) {
                        const struct sockaddr *tag;
                        tag = rt_settag(rt, info->rti_info[RTAX_TAG]);
                        if (tag == NULL)
                                senderr(ENOBUFS);
                }
                RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);

                ss = pserialize_read_enter();
                if (info->rti_info[RTAX_IFP] != NULL) {
                        struct ifaddr *ifa2;
                        ifa2 = ifa_ifwithnet(info->rti_info[RTAX_IFP]);
                        if (ifa2 != NULL)
                                rt->rt_ifp = ifa2->ifa_ifp;
                        else
                                rt->rt_ifp = ifa->ifa_ifp;
                } else
                        rt->rt_ifp = ifa->ifa_ifp;
                pserialize_read_exit(ss);
                cv_init(&rt->rt_cv, "rtentry");
                psref_target_init(&rt->rt_psref, rt_psref_class);

                RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
                rc = rt_addaddr(rtbl, rt, netmask);
                RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
                if (rc != 0) {
                        ifafree(ifa); /* for rt_set_ifa above */
                        cv_destroy(&rt->rt_cv);
                        rt_destroy(rt);
                        pool_put(&rtentry_pool, rt);
                        senderr(rc);
                }
                RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
                if (ifa->ifa_rtrequest)
                        ifa->ifa_rtrequest(req, rt, info);
                if (need_to_release_ifa)
                        ifa_release(ifa, &psref_ifa);
                ifa = NULL;
                if_put(ifp, &psref_ifp);
                ifp = NULL;
                RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
                if (ret_nrt) {
                        *ret_nrt = rt;
                        rt_ref(rt);
                        RT_REFCNT_TRACE(rt);
                }
                rtcache_invalidate();
                RT_UNLOCK();
                need_unlock = false;
                break;
        case RTM_GET:
                if (netmask != NULL) {
                        rt_maskedcopy(dst, (struct sockaddr *)&maskeddst,
                            netmask);
                        dst = (struct sockaddr *)&maskeddst;
                }
                if ((rt = rt_lookup(rtbl, dst, netmask)) == NULL)
                        senderr(ESRCH);
                if (ret_nrt != NULL) {
                        *ret_nrt = rt;
                        rt_ref(rt);
                        RT_REFCNT_TRACE(rt);
                }
                break;
        }
bad:
        if (need_to_release_ifa)
                ifa_release(ifa, &psref_ifa);
        if_put(ifp, &psref_ifp);
        curlwp_bindx(bound);
        if (need_unlock)
                RT_UNLOCK();
        splx(s);
        return error;
}

int
rt_setgate(struct rtentry *rt, const struct sockaddr *gate)
{
        struct sockaddr *new, *old;

        KASSERT(RT_WLOCKED());
        KASSERT(rt->_rt_key != NULL);
        RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);

        new = sockaddr_dup(gate, M_ZERO | M_NOWAIT);
        if (new == NULL)
                return ENOMEM;

        old = rt->rt_gateway;
        rt->rt_gateway = new;
        if (old != NULL)
                sockaddr_free(old);

        KASSERT(rt->_rt_key != NULL);
        RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);

        if (rt->rt_flags & RTF_GATEWAY) {
                struct rtentry *gwrt;

                gwrt = rtalloc1_locked(gate, 1, false, true);
                /*
                 * If we switched gateways, grab the MTU from the new
                 * gateway route if the current MTU, if the current MTU is
                 * greater than the MTU of gateway.
                 * Note that, if the MTU of gateway is 0, we will reset the
                 * MTU of the route to run PMTUD again from scratch. XXX
                 */
                if (gwrt != NULL) {
                        KASSERT(gwrt->_rt_key != NULL);
                        RT_DPRINTF("gwrt->_rt_key = %p\n", gwrt->_rt_key);
                        if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
                            rt->rt_rmx.rmx_mtu &&
                            rt->rt_rmx.rmx_mtu > gwrt->rt_rmx.rmx_mtu) {
                                rt->rt_rmx.rmx_mtu = gwrt->rt_rmx.rmx_mtu;
                        }
                        rt_unref(gwrt);
                }
        }
        KASSERT(rt->_rt_key != NULL);
        RT_DPRINTF("rt->_rt_key = %p\n", (void *)rt->_rt_key);
        return 0;
}

static struct ifaddr *
rt_update_get_ifa(const struct rt_addrinfo *info, const struct rtentry *rt,
    struct ifnet **ifp, struct psref *psref_ifp, struct psref *psref)
{
        struct ifaddr *ifa = NULL;

        *ifp = NULL;
        if (info->rti_info[RTAX_IFP] != NULL) {
                ifa = ifa_ifwithnet_psref(info->rti_info[RTAX_IFP], psref);
                if (ifa == NULL)
                        goto next;
                *ifp = ifa->ifa_ifp;
                if_acquire(*ifp, psref_ifp);
                if (info->rti_info[RTAX_IFA] == NULL &&
                    info->rti_info[RTAX_GATEWAY] == NULL)
                        goto out;
                ifa_release(ifa, psref);
                if (info->rti_info[RTAX_IFA] == NULL) {
                        /* route change <dst> <gw> -ifp <if> */
                        ifa = ifaof_ifpforaddr_psref(
                            info->rti_info[RTAX_GATEWAY], *ifp, psref);
                } else {
                        /* route change <dst> -ifp <if> -ifa <addr> */
                        ifa = ifa_ifwithaddr_psref(info->rti_info[RTAX_IFA],
                            psref);
                        if (ifa != NULL)
                                goto out;
                        ifa = ifaof_ifpforaddr_psref(info->rti_info[RTAX_IFA],
                            *ifp, psref);
                }
                goto out;
        }
next:
        if (info->rti_info[RTAX_IFA] != NULL) {
                /* route change <dst> <gw> -ifa <addr> */
                ifa = ifa_ifwithaddr_psref(info->rti_info[RTAX_IFA], psref);
                if (ifa != NULL)
                        goto out;
        }
        if (info->rti_info[RTAX_GATEWAY] != NULL) {
                /* route change <dst> <gw> */
                ifa = ifa_ifwithroute_psref(rt->rt_flags, rt_getkey(rt),
                    info->rti_info[RTAX_GATEWAY], psref);
        }
out:
        if (ifa != NULL && *ifp == NULL) {
                *ifp = ifa->ifa_ifp;
                if_acquire(*ifp, psref_ifp);
        }
        if (ifa == NULL && *ifp != NULL) {
                if_put(*ifp, psref_ifp);
                *ifp = NULL;
        }
        return ifa;
}

int
rt_update(struct rtentry *rt, struct rt_addrinfo *info, void *rtm)
{
        int error = 0;
        struct ifnet *ifp = NULL, *new_ifp = NULL;
        struct ifaddr *ifa = NULL, *new_ifa;
        struct psref psref_ifa, psref_new_ifa, psref_ifp, psref_new_ifp;
        bool newgw, ifp_changed = false;

        RT_WLOCK();
        /*
         * New gateway could require new ifaddr, ifp;
         * flags may also be different; ifp may be specified
         * by ll sockaddr when protocol address is ambiguous
         */
        newgw = info->rti_info[RTAX_GATEWAY] != NULL &&
            sockaddr_cmp(info->rti_info[RTAX_GATEWAY], rt->rt_gateway) != 0;

        if (newgw || info->rti_info[RTAX_IFP] != NULL ||
            info->rti_info[RTAX_IFA] != NULL) {
                ifp = rt_getifp(info, &psref_ifp);
                /* info refers ifp so we need to keep a reference */
                ifa = rt_getifa(info, &psref_ifa);
                if (ifa == NULL) {
                        error = ENETUNREACH;
                        goto out;
                }
        }
        if (newgw) {
                error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]);
                if (error != 0)
                        goto out;
        }
        if (info->rti_info[RTAX_TAG]) {
                const struct sockaddr *tag;
                tag = rt_settag(rt, info->rti_info[RTAX_TAG]);
                if (tag == NULL) {
                        error = ENOBUFS;
                        goto out;
                }
        }
        /*
         * New gateway could require new ifaddr, ifp;
         * flags may also be different; ifp may be specified
         * by ll sockaddr when protocol address is ambiguous
         */
        new_ifa = rt_update_get_ifa(info, rt, &new_ifp, &psref_new_ifp,
            &psref_new_ifa);
        if (new_ifa != NULL) {
                ifa_release(ifa, &psref_ifa);
                ifa = new_ifa;
        }
        if (ifa) {
                struct ifaddr *oifa = rt->rt_ifa;
                if (oifa != ifa && !ifa_is_destroying(ifa) &&
                    new_ifp != NULL && !if_is_deactivated(new_ifp)) {
                        if (oifa && oifa->ifa_rtrequest)
                                oifa->ifa_rtrequest(RTM_DELETE, rt, info);
                        rt_replace_ifa(rt, ifa);
                        rt->rt_ifp = new_ifp;
                        ifp_changed = true;
                }
                if (new_ifa == NULL)
                        ifa_release(ifa, &psref_ifa);
                /* To avoid ifa_release below */
                ifa = NULL;
        }
        ifa_release(new_ifa, &psref_new_ifa);
        if (new_ifp && rt->rt_ifp != new_ifp && !if_is_deactivated(new_ifp)) {
                rt->rt_ifp = new_ifp;
                ifp_changed = true;
        }
        rt_setmetrics(rtm, rt);
        if (rt->rt_flags != info->rti_flags) {
                rt->rt_flags = (info->rti_flags & ~PRESERVED_RTF) |
                    (rt->rt_flags & PRESERVED_RTF);
        }
        if (rt->rt_ifa->ifa_rtrequest)
                rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info);
#if defined(INET) || defined(INET6)
        if (ifp_changed && rt_mask(rt) != NULL)
                lltable_prefix_free(rt_getkey(rt)->sa_family, rt_getkey(rt),
                    rt_mask(rt), 0);
#else
        (void)ifp_changed; /* XXX gcc */
#endif
out:
        ifa_release(ifa, &psref_ifa);
        if_put(new_ifp, &psref_new_ifp);
        if_put(ifp, &psref_ifp);

        RT_UNLOCK();

        return error;
}

static void
rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
        const struct sockaddr *netmask)
{
        const char *netmaskp = &netmask->sa_data[0],
                   *srcp = &src->sa_data[0];
        char *dstp = &dst->sa_data[0];
        const char *maskend = (char *)dst + MIN(netmask->sa_len, src->sa_len);
        const char *srcend = (char *)dst + src->sa_len;

        dst->sa_len = src->sa_len;
        dst->sa_family = src->sa_family;

        while (dstp < maskend)
                *dstp++ = *srcp++ & *netmaskp++;
        if (dstp < srcend)
                memset(dstp, 0, (size_t)(srcend - dstp));
}

/*
 * Inform the routing socket of a route change.
 */
void
rt_newmsg(const int cmd, const struct rtentry *rt)
{
        struct rt_addrinfo info;

        memset((void *)&info, 0, sizeof(info));
        info.rti_info[RTAX_DST] = rt_getkey(rt);
        info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
        info.rti_info[RTAX_NETMASK] = rt_mask(rt);
        if (rt->rt_ifp) {
                info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
                info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
        }

        rt_missmsg(cmd, &info, rt->rt_flags, 0);
}

/*
 * Set up or tear down a routing table entry, normally
 * for an interface.
 */
int
rtinit(struct ifaddr *ifa, int cmd, int flags)
{
        struct rtentry *rt;
        struct sockaddr *dst, *odst;
        struct sockaddr_storage maskeddst;
        struct rtentry *nrt = NULL;
        int error;
        struct rt_addrinfo info;

        dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
        if (cmd == RTM_DELETE) {
                if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
                        /* Delete subnet route for this interface */
                        odst = dst;
                        dst = (struct sockaddr *)&maskeddst;
                        rt_maskedcopy(odst, dst, ifa->ifa_netmask);
                }
                if ((rt = rtalloc1(dst, 0)) != NULL) {
                        if (rt->rt_ifa != ifa) {
                                rt_unref(rt);
                                return (flags & RTF_HOST) ? EHOSTUNREACH
                                                        : ENETUNREACH;
                        }
                        rt_unref(rt);
                }
        }
        memset(&info, 0, sizeof(info));
        info.rti_ifa = ifa;
        info.rti_flags = flags | ifa->ifa_flags | RTF_DONTCHANGEIFA;
        info.rti_info[RTAX_DST] = dst;
        info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;

        /*
         * XXX here, it seems that we are assuming that ifa_netmask is NULL
         * for RTF_HOST.  bsdi4 passes NULL explicitly (via intermediate
         * variable) when RTF_HOST is 1.  still not sure if i can safely
         * change it to meet bsdi4 behavior.
         */
        if (cmd != RTM_LLINFO_UPD)
                info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
        error = rtrequest1((cmd == RTM_LLINFO_UPD) ? RTM_GET : cmd, &info,
            &nrt);
        if (error != 0)
                return error;

        rt = nrt;
        RT_REFCNT_TRACE(rt);
        switch (cmd) {
        case RTM_DELETE:
                rt_newmsg(cmd, rt);
                rt_free(rt);
                break;
        case RTM_LLINFO_UPD:
                if (cmd == RTM_LLINFO_UPD && ifa->ifa_rtrequest != NULL)
                        ifa->ifa_rtrequest(RTM_LLINFO_UPD, rt, &info);
                rt_newmsg(RTM_CHANGE, rt);
                rt_unref(rt);
                break;
        case RTM_ADD:
                KASSERT(rt->rt_ifa == ifa);
                rt_newmsg(cmd, rt);
                rt_unref(rt);
                RT_REFCNT_TRACE(rt);
                break;
        }
        return error;
}

/*
 * Create a local route entry for the address.
 * Announce the addition of the address and the route to the routing socket.
 */
int
rt_ifa_addlocal(struct ifaddr *ifa)
{
        struct rtentry *rt;
        int e;

        /* If there is no loopback entry, allocate one. */
        rt = rtalloc1(ifa->ifa_addr, 0);
#ifdef RT_DEBUG
        if (rt != NULL)
                dump_rt(rt);
#endif
        if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 ||
            (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0)
        {
                struct rt_addrinfo info;
                struct rtentry *nrt;

                memset(&info, 0, sizeof(info));
                info.rti_flags = RTF_HOST | RTF_LOCAL | RTF_DONTCHANGEIFA;
                info.rti_info[RTAX_DST] = ifa->ifa_addr;
                info.rti_info[RTAX_GATEWAY] =
                    (const struct sockaddr *)ifa->ifa_ifp->if_sadl;
                info.rti_ifa = ifa;
                nrt = NULL;
                e = rtrequest1(RTM_ADD, &info, &nrt);
                rt_addrmsg_rt(RTM_ADD, ifa, e, nrt);
                if (nrt != NULL) {
                        KASSERT(nrt->rt_ifa == ifa);
#ifdef RT_DEBUG
                        dump_rt(nrt);
#endif
                        rt_unref(nrt);
                        RT_REFCNT_TRACE(nrt);
                }
        } else {
                e = 0;
                rt_addrmsg(RTM_NEWADDR, ifa);
        }
        if (rt != NULL)
                rt_unref(rt);
        return e;
}

/*
 * Remove the local route entry for the address.
 * Announce the removal of the address and the route to the routing socket.
 */
int
rt_ifa_remlocal(struct ifaddr *ifa, struct ifaddr *alt_ifa)
{
        struct rtentry *rt;
        int e = 0;

        rt = rtalloc1(ifa->ifa_addr, 0);

        /*
         * Before deleting, check if a corresponding loopbacked
         * host route surely exists.  With this check, we can avoid
         * deleting an interface direct route whose destination is
         * the same as the address being removed.  This can happen
         * when removing a subnet-router anycast address on an
         * interface attached to a shared medium.
         */
        if (rt != NULL &&
            (rt->rt_flags & RTF_HOST) &&
            (rt->rt_ifp->if_flags & IFF_LOOPBACK))
        {
                /* If we cannot replace the route's ifaddr with the equivalent
                 * ifaddr of another interface, I believe it is safest to
                 * delete the route.
                 */
                if (alt_ifa == NULL) {
                        e = rtdeletemsg(rt);
                        if (e == 0) {
                                rt_unref(rt);
                                rt_free(rt);
                                rt = NULL;
                        }
                        rt_addrmsg(RTM_DELADDR, ifa);
                } else {
#ifdef NET_MPSAFE
                        int error = rt_update_prepare(rt);
                        if (error == 0) {
                                rt_replace_ifa(rt, alt_ifa);
                                rt_update_finish(rt);
                        } else {
                                /*
                                 * If error != 0, the rtentry is being
                                 * destroyed, so doing nothing doesn't
                                 * matter.
                                 */
                        }
#else
                        rt_replace_ifa(rt, alt_ifa);
#endif
                        rt_newmsg(RTM_CHANGE, rt);
                }
        } else
                rt_addrmsg(RTM_DELADDR, ifa);
        if (rt != NULL)
                rt_unref(rt);
        return e;
}

/*
 * Route timer routines.  These routes allow functions to be called
 * for various routes at any time.  This is useful in supporting
 * path MTU discovery and redirect route deletion.
 *
 * This is similar to some BSDI internal functions, but it provides
 * for multiple queues for efficiency's sake...
 */

LIST_HEAD(, rttimer_queue) rttimer_queue_head;
static int rt_init_done = 0;

/*
 * Some subtle order problems with domain initialization mean that
 * we cannot count on this being run from rt_init before various
 * protocol initializations are done.  Therefore, we make sure
 * that this is run when the first queue is added...
 */

static void rt_timer_work(struct work *, void *);

static void
rt_timer_init(void)
{
        int error;

        assert(rt_init_done == 0);

        /* XXX should be in rt_init */
        rw_init(&rt_lock);

        LIST_INIT(&rttimer_queue_head);
        callout_init(&rt_timer_ch, CALLOUT_MPSAFE);
        error = workqueue_create(&rt_timer_wq, "rt_timer",
            rt_timer_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
        if (error)
                panic("%s: workqueue_create failed (%d)\n", __func__, error);
        callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
        rt_init_done = 1;
}

struct rttimer_queue *
rt_timer_queue_create(u_int timeout)
{
        struct rttimer_queue *rtq;

        if (rt_init_done == 0)
                rt_timer_init();

        R_Malloc(rtq, struct rttimer_queue *, sizeof *rtq);
        if (rtq == NULL)
                return NULL;
        memset(rtq, 0, sizeof(*rtq));

        rtq->rtq_timeout = timeout;
        TAILQ_INIT(&rtq->rtq_head);
        RT_WLOCK();
        LIST_INSERT_HEAD(&rttimer_queue_head, rtq, rtq_link);
        RT_UNLOCK();

        return rtq;
}

void
rt_timer_queue_change(struct rttimer_queue *rtq, long timeout)
{

        rtq->rtq_timeout = timeout;
}

static void
rt_timer_queue_remove_all(struct rttimer_queue *rtq)
{
        struct rttimer *r;

        RT_ASSERT_WLOCK();

        while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL) {
                LIST_REMOVE(r, rtt_link);
                TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
                rt_ref(r->rtt_rt); /* XXX */
                RT_REFCNT_TRACE(r->rtt_rt);
                RT_UNLOCK();
                (*r->rtt_func)(r->rtt_rt, r);
                pool_put(&rttimer_pool, r);
                RT_WLOCK();
                if (rtq->rtq_count > 0)
                        rtq->rtq_count--;
                else
                        printf("rt_timer_queue_remove_all: "
                            "rtq_count reached 0\n");
        }
}

void
rt_timer_queue_destroy(struct rttimer_queue *rtq)
{

        RT_WLOCK();
        rt_timer_queue_remove_all(rtq);
        LIST_REMOVE(rtq, rtq_link);
        RT_UNLOCK();

        /*
         * Caller is responsible for freeing the rttimer_queue structure.
         */
}

unsigned long
rt_timer_count(struct rttimer_queue *rtq)
{
        return rtq->rtq_count;
}

static void
rt_timer_remove_all(struct rtentry *rt)
{
        struct rttimer *r;

        RT_WLOCK();
        while ((r = LIST_FIRST(&rt->rt_timer)) != NULL) {
                LIST_REMOVE(r, rtt_link);
                TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
                if (r->rtt_queue->rtq_count > 0)
                        r->rtt_queue->rtq_count--;
                else
                        printf("rt_timer_remove_all: rtq_count reached 0\n");
                pool_put(&rttimer_pool, r);
        }
        RT_UNLOCK();
}

int
rt_timer_add(struct rtentry *rt,
        void (*func)(struct rtentry *, struct rttimer *),
        struct rttimer_queue *queue)
{
        struct rttimer *r;

        KASSERT(func != NULL);
        RT_WLOCK();
        /*
         * If there's already a timer with this action, destroy it before
         * we add a new one.
         */
        LIST_FOREACH(r, &rt->rt_timer, rtt_link) {
                if (r->rtt_func == func)
                        break;
        }
        if (r != NULL) {
                LIST_REMOVE(r, rtt_link);
                TAILQ_REMOVE(&r->rtt_queue->rtq_head, r, rtt_next);
                if (r->rtt_queue->rtq_count > 0)
                        r->rtt_queue->rtq_count--;
                else
                        printf("rt_timer_add: rtq_count reached 0\n");
        } else {
                r = pool_get(&rttimer_pool, PR_NOWAIT);
                if (r == NULL) {
                        RT_UNLOCK();
                        return ENOBUFS;
                }
        }

        memset(r, 0, sizeof(*r));

        r->rtt_rt = rt;
        r->rtt_time = time_uptime;
        r->rtt_func = func;
        r->rtt_queue = queue;
        LIST_INSERT_HEAD(&rt->rt_timer, r, rtt_link);
        TAILQ_INSERT_TAIL(&queue->rtq_head, r, rtt_next);
        r->rtt_queue->rtq_count++;

        RT_UNLOCK();

        return 0;
}

static void
rt_timer_work(struct work *wk, void *arg)
{
        struct rttimer_queue *rtq;
        struct rttimer *r;

        RT_WLOCK();
        LIST_FOREACH(rtq, &rttimer_queue_head, rtq_link) {
                while ((r = TAILQ_FIRST(&rtq->rtq_head)) != NULL &&
                    (r->rtt_time + rtq->rtq_timeout) < time_uptime) {
                        LIST_REMOVE(r, rtt_link);
                        TAILQ_REMOVE(&rtq->rtq_head, r, rtt_next);
                        /*
                         * Take a reference to avoid the rtentry is freed
                         * accidentally after RT_UNLOCK.  The callback
                         * (rtt_func) must rt_unref it by itself.
                         */
                        rt_ref(r->rtt_rt);
                        RT_REFCNT_TRACE(r->rtt_rt);
                        RT_UNLOCK();
                        (*r->rtt_func)(r->rtt_rt, r);
                        pool_put(&rttimer_pool, r);
                        RT_WLOCK();
                        if (rtq->rtq_count > 0)
                                rtq->rtq_count--;
                        else
                                printf("rt_timer_timer: rtq_count reached 0\n");
                }
        }
        RT_UNLOCK();

        callout_reset(&rt_timer_ch, hz, rt_timer_timer, NULL);
}

static void
rt_timer_timer(void *arg)
{

        workqueue_enqueue(rt_timer_wq, &rt_timer_wk, NULL);
}

static struct rtentry *
_rtcache_init(struct route *ro, int flag)
{
        struct rtentry *rt;

        rtcache_invariants(ro);
        KASSERT(ro->_ro_rt == NULL);

        if (rtcache_getdst(ro) == NULL)
                return NULL;
        rt = rtalloc1(rtcache_getdst(ro), flag);
        if (rt != NULL) {
                RT_RLOCK();
                if (ISSET(rt->rt_flags, RTF_UP)) {
                        ro->_ro_rt = rt;
                        ro->ro_rtcache_generation = rtcache_generation;
                        rtcache_ref(rt, ro);
                }
                RT_UNLOCK();
                rt_unref(rt);
        }

        rtcache_invariants(ro);
        return ro->_ro_rt;
}

struct rtentry *
rtcache_init(struct route *ro)
{

        return _rtcache_init(ro, 1);
}

struct rtentry *
rtcache_init_noclone(struct route *ro)
{

        return _rtcache_init(ro, 0);
}

struct rtentry *
rtcache_update(struct route *ro, int clone)
{

        ro->_ro_rt = NULL;
        return _rtcache_init(ro, clone);
}

void
rtcache_copy(struct route *new_ro, struct route *old_ro)
{
        struct rtentry *rt;
        int ret;

        KASSERT(new_ro != old_ro);
        rtcache_invariants(new_ro);
        rtcache_invariants(old_ro);

        rt = rtcache_validate(old_ro);

        if (rtcache_getdst(old_ro) == NULL)
                goto out;
        ret = rtcache_setdst(new_ro, rtcache_getdst(old_ro));
        if (ret != 0)
                goto out;

        RT_RLOCK();
        new_ro->_ro_rt = rt;
        new_ro->ro_rtcache_generation = rtcache_generation;
        RT_UNLOCK();
        rtcache_invariants(new_ro);
out:
        rtcache_unref(rt, old_ro);
        return;
}

#if defined(RT_DEBUG) && defined(NET_MPSAFE)
static void
rtcache_trace(const char *func, struct rtentry *rt, struct route *ro)
{
        char dst[64];

        sockaddr_format(ro->ro_sa, dst, 64);
        printf("trace: %s:\tdst=%s cpu=%d lwp=%p psref=%p target=%p\n", func, dst,
            cpu_index(curcpu()), curlwp, &ro->ro_psref, &rt->rt_psref);
}
#define RTCACHE_PSREF_TRACE(rt, ro)        rtcache_trace(__func__, (rt), (ro))
#else
#define RTCACHE_PSREF_TRACE(rt, ro)        do {} while (0)
#endif

static void
rtcache_ref(struct rtentry *rt, struct route *ro)
{

        KASSERT(rt != NULL);

#ifdef NET_MPSAFE
        RTCACHE_PSREF_TRACE(rt, ro);
        ro->ro_bound = curlwp_bind();
        /* XXX Use a real caller's address */
        PSREF_DEBUG_FILL_RETURN_ADDRESS(&ro->ro_psref);
        psref_acquire(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
#endif
}

void
rtcache_unref(struct rtentry *rt, struct route *ro)
{

        if (rt == NULL)
                return;

#ifdef NET_MPSAFE
        psref_release(&ro->ro_psref, &rt->rt_psref, rt_psref_class);
        curlwp_bindx(ro->ro_bound);
        RTCACHE_PSREF_TRACE(rt, ro);
#endif
}

struct rtentry *
rtcache_validate(struct route *ro)
{
        struct rtentry *rt = NULL;

#ifdef NET_MPSAFE
retry:
#endif
        rtcache_invariants(ro);
        RT_RLOCK();
        if (ro->ro_rtcache_generation != rtcache_generation) {
                /* The cache is invalidated */
                rt = NULL;
                goto out;
        }

        rt = ro->_ro_rt;
        if (rt == NULL)
                goto out;

        if ((rt->rt_flags & RTF_UP) == 0) {
                rt = NULL;
                goto out;
        }
#ifdef NET_MPSAFE
        if (ISSET(rt->rt_flags, RTF_UPDATING)) {
                if (rt_wait_ok()) {
                        RT_UNLOCK();

                        /* We can wait until the update is complete */
                        rt_update_wait();
                        goto retry;
                } else {
                        rt = NULL;
                }
        } else
#endif
                rtcache_ref(rt, ro);
out:
        RT_UNLOCK();
        return rt;
}

struct rtentry *
rtcache_lookup2(struct route *ro, const struct sockaddr *dst,
    int clone, int *hitp)
{
        const struct sockaddr *odst;
        struct rtentry *rt = NULL;

        odst = rtcache_getdst(ro);
        if (odst == NULL)
                goto miss;

        if (sockaddr_cmp(odst, dst) != 0) {
                rtcache_free(ro);
                goto miss;
        }

        rt = rtcache_validate(ro);
        if (rt == NULL) {
                ro->_ro_rt = NULL;
                goto miss;
        }

        rtcache_invariants(ro);

        if (hitp != NULL)
                *hitp = 1;
        return rt;
miss:
        if (hitp != NULL)
                *hitp = 0;
        if (rtcache_setdst(ro, dst) == 0)
                rt = _rtcache_init(ro, clone);

        rtcache_invariants(ro);

        return rt;
}

void
rtcache_free(struct route *ro)
{

        ro->_ro_rt = NULL;
        if (ro->ro_sa != NULL) {
                sockaddr_free(ro->ro_sa);
                ro->ro_sa = NULL;
        }
        rtcache_invariants(ro);
}

int
rtcache_setdst(struct route *ro, const struct sockaddr *sa)
{
        KASSERT(sa != NULL);

        rtcache_invariants(ro);
        if (ro->ro_sa != NULL) {
                if (ro->ro_sa->sa_family == sa->sa_family) {
                        ro->_ro_rt = NULL;
                        sockaddr_copy(ro->ro_sa, ro->ro_sa->sa_len, sa);
                        rtcache_invariants(ro);
                        return 0;
                }
                /* free ro_sa, wrong family */
                rtcache_free(ro);
        }

        KASSERT(ro->_ro_rt == NULL);

        if ((ro->ro_sa = sockaddr_dup(sa, M_ZERO | M_NOWAIT)) == NULL) {
                rtcache_invariants(ro);
                return ENOMEM;
        }
        rtcache_invariants(ro);
        return 0;
}

static void
rtcache_percpu_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unused)
{
        struct route **rop = p;

        /*
         * We can't have struct route as percpu data because it can be destroyed
         * over a memory enlargement processing of percpu.
         */
        *rop = kmem_zalloc(sizeof(**rop), KM_SLEEP);
}

percpu_t *
rtcache_percpu_alloc(void)
{

        return percpu_create(sizeof(struct route *),
            rtcache_percpu_init_cpu, NULL, NULL);
}

const struct sockaddr *
rt_settag(struct rtentry *rt, const struct sockaddr *tag)
{
        if (rt->rt_tag != tag) {
                if (rt->rt_tag != NULL)
                        sockaddr_free(rt->rt_tag);
                rt->rt_tag = sockaddr_dup(tag, M_ZERO | M_NOWAIT);
        }
        return rt->rt_tag;
}

struct sockaddr *
rt_gettag(const struct rtentry *rt)
{
        return rt->rt_tag;
}

int
rt_check_reject_route(const struct rtentry *rt, const struct ifnet *ifp)
{

        if ((rt->rt_flags & RTF_REJECT) != 0) {
                /* Mimic looutput */
                if (ifp->if_flags & IFF_LOOPBACK)
                        return (rt->rt_flags & RTF_HOST) ?
                            EHOSTUNREACH : ENETUNREACH;
                else if (rt->rt_rmx.rmx_expire == 0 ||
                    time_uptime < rt->rt_rmx.rmx_expire)
                        return (rt->rt_flags & RTF_GATEWAY) ?
                            EHOSTUNREACH : EHOSTDOWN;
        }

        return 0;
}

void
rt_delete_matched_entries(sa_family_t family, int (*f)(struct rtentry *, void *),
    void *v)
{

        for (;;) {
                int s;
                int error;
                struct rtentry *rt, *retrt = NULL;

                RT_RLOCK();
                s = splsoftnet();
                rt = rtbl_search_matched_entry(family, f, v);
                if (rt == NULL) {
                        splx(s);
                        RT_UNLOCK();
                        return;
                }
                rt_ref(rt);
                splx(s);
                RT_UNLOCK();

                error = rtrequest(RTM_DELETE, rt_getkey(rt), rt->rt_gateway,
                    rt_mask(rt), rt->rt_flags, &retrt);
                if (error == 0) {
                        KASSERT(retrt == rt);
                        KASSERT((retrt->rt_flags & RTF_UP) == 0);
                        retrt->rt_ifp = NULL;
                        rt_unref(rt);
                        rt_free(retrt);
                } else if (error == ESRCH) {
                        /* Someone deleted the entry already. */
                        rt_unref(rt);
                } else {
                        log(LOG_ERR, "%s: unable to delete rtentry @ %p, "
                            "error = %d\n", rt->rt_ifp->if_xname, rt, error);
                        /* XXX how to treat this case? */
                }
        }
}

static int
rt_walktree_locked(sa_family_t family, int (*f)(struct rtentry *, void *),
    void *v)
{

        return rtbl_walktree(family, f, v);
}

int
rt_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v)
{
        int error;

        RT_RLOCK();
        error = rt_walktree_locked(family, f, v);
        RT_UNLOCK();

        return error;
}

#ifdef DDB

#include <machine/db_machdep.h>
#include <ddb/db_interface.h>
#include <ddb/db_output.h>

#define        rt_expire rt_rmx.rmx_expire

static void
db_print_sa(const struct sockaddr *sa)
{
        int len;
        const u_char *p;

        if (sa == NULL) {
                db_printf("[NULL]");
                return;
        }

        p = (const u_char *)sa;
        len = sa->sa_len;
        db_printf("[");
        while (len > 0) {
                db_printf("%d", *p);
                p++; len--;
                if (len) db_printf(",");
        }
        db_printf("]\n");
}

static void
db_print_ifa(struct ifaddr *ifa)
{
        if (ifa == NULL)
                return;
        db_printf("  ifa_addr=");
        db_print_sa(ifa->ifa_addr);
        db_printf("  ifa_dsta=");
        db_print_sa(ifa->ifa_dstaddr);
        db_printf("  ifa_mask=");
        db_print_sa(ifa->ifa_netmask);
        db_printf("  flags=0x%x,refcnt=%d,metric=%d\n",
                          ifa->ifa_flags,
                          ifa->ifa_refcnt,
                          ifa->ifa_metric);
}

/*
 * Function to pass to rt_walktree().
 * Return non-zero error to abort walk.
 */
static int
db_show_rtentry(struct rtentry *rt, void *w)
{
        db_printf("rtentry=%p", rt);

        db_printf(" flags=0x%x refcnt=%d use=%"PRId64" expire=%"PRId64"\n",
                          rt->rt_flags, rt->rt_refcnt,
                          rt->rt_use, (uint64_t)rt->rt_expire);

        db_printf(" key="); db_print_sa(rt_getkey(rt));
        db_printf(" mask="); db_print_sa(rt_mask(rt));
        db_printf(" gw="); db_print_sa(rt->rt_gateway);

        db_printf(" ifp=%p ", rt->rt_ifp);
        if (rt->rt_ifp)
                db_printf("(%s)", rt->rt_ifp->if_xname);
        else
                db_printf("(NULL)");

        db_printf(" ifa=%p\n", rt->rt_ifa);
        db_print_ifa(rt->rt_ifa);

        db_printf(" gwroute=%p llinfo=%p\n",
                          rt->rt_gwroute, rt->rt_llinfo);

        return 0;
}

/*
 * Function to print all the route trees.
 * Use this from ddb:  "show routes"
 */
void
db_show_routes(db_expr_t addr, bool have_addr,
    db_expr_t count, const char *modif)
{

        /* Taking RT_LOCK will fail if LOCKDEBUG is enabled. */
        rt_walktree_locked(AF_INET, db_show_rtentry, NULL);
}
#endif



















































































































    2 







    2 







    2 










   29 


   24 

   28 








    1 

    1 

    1 






















































    6 














    6 
    2 


    6 















    6 







    4 








    6 







    6 
    5 





    4 














    4 




    2 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
/*        $NetBSD: random.c,v 1.10 2021/12/28 13:22:43 riastradh Exp $        */

/*-
 * Copyright (c) 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * /dev/random, /dev/urandom -- stateless version
 *
 *        For short reads from /dev/urandom, up to 256 bytes, read from a
 *        per-CPU NIST Hash_DRBG instance that is reseeded as soon as the
 *        system has enough entropy.
 *
 *        For all other reads, instantiate a fresh NIST Hash_DRBG from
 *        the global entropy pool, and draw from it.
 *
 *        Each read is independent; there is no per-open state.
 *        Concurrent reads from the same open run in parallel.
 *
 *        Reading from /dev/random may block until entropy is available.
 *        Either device may return short reads if interrupted.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: random.c,v 1.10 2021/12/28 13:22:43 riastradh Exp $");

#include <sys/param.h>
#include <sys/types.h>
#include <sys/atomic.h>
#include <sys/conf.h>
#include <sys/cprng.h>
#include <sys/entropy.h>
#include <sys/errno.h>
#include <sys/event.h>
#include <sys/fcntl.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/lwp.h>
#include <sys/poll.h>
#include <sys/random.h>
#include <sys/rnd.h>
#include <sys/rndsource.h>
#include <sys/signalvar.h>
#include <sys/systm.h>
#include <sys/vnode.h>                /* IO_NDELAY */

#include "ioconf.h"

static dev_type_open(random_open);
static dev_type_close(random_close);
static dev_type_ioctl(random_ioctl);
static dev_type_poll(random_poll);
static dev_type_kqfilter(random_kqfilter);
static dev_type_read(random_read);
static dev_type_write(random_write);

const struct cdevsw rnd_cdevsw = {
        .d_open = random_open,
        .d_close = random_close,
        .d_read = random_read,
        .d_write = random_write,
        .d_ioctl = random_ioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = random_poll,
        .d_mmap = nommap,
        .d_kqfilter = random_kqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER|D_MPSAFE,
};

#define        RANDOM_BUFSIZE        512        /* XXX pulled from arse */

/* Entropy source for writes to /dev/random and /dev/urandom */
static krndsource_t        user_rndsource;

void
rndattach(int num)
{

        rnd_attach_source(&user_rndsource, "/dev/random", RND_TYPE_UNKNOWN,
            RND_FLAG_COLLECT_VALUE);
}

static int
random_open(dev_t dev, int flags, int fmt, struct lwp *l)
{

        /* Validate minor.  */
        switch (minor(dev)) {
        case RND_DEV_RANDOM:
        case RND_DEV_URANDOM:
                break;
        default:
                return ENXIO;
        }

        return 0;
}

static int
random_close(dev_t dev, int flags, int fmt, struct lwp *l)
{

        /* Success!  */
        return 0;
}

static int
random_ioctl(dev_t dev, unsigned long cmd, void *data, int flag, struct lwp *l)
{

        /*
         * No non-blocking/async options; otherwise defer to
         * entropy_ioctl.
         */
        switch (cmd) {
        case FIONBIO:
        case FIOASYNC:
                return 0;
        default:
                return entropy_ioctl(cmd, data);
        }
}

static int
random_poll(dev_t dev, int events, struct lwp *l)
{

        /* /dev/random may block; /dev/urandom is always ready.  */
        switch (minor(dev)) {
        case RND_DEV_RANDOM:
                return entropy_poll(events);
        case RND_DEV_URANDOM:
                return events & (POLLIN|POLLRDNORM | POLLOUT|POLLWRNORM);
        default:
                return 0;
        }
}

static int
random_kqfilter(dev_t dev, struct knote *kn)
{

        /* Validate the event filter.  */
        switch (kn->kn_filter) {
        case EVFILT_READ:
        case EVFILT_WRITE:
                break;
        default:
                return EINVAL;
        }

        /* /dev/random may block; /dev/urandom never does.  */
        switch (minor(dev)) {
        case RND_DEV_RANDOM:
                if (kn->kn_filter == EVFILT_READ)
                        return entropy_kqfilter(kn);
                /* FALLTHROUGH */
        case RND_DEV_URANDOM:
                kn->kn_fop = &seltrue_filtops;
                return 0;
        default:
                return ENXIO;
        }
}

/*
 * random_read(dev, uio, flags)
 *
 *        Generate data from a PRNG seeded from the entropy pool.
 *
 *        - If /dev/random, block until we have full entropy, or fail
 *          with EWOULDBLOCK, and if `depleting' entropy, return at most
 *          the entropy pool's capacity at once.
 *
 *        - If /dev/urandom, generate data from whatever is in the
 *          entropy pool now.
 *
 *        On interrupt, return a short read, but not shorter than 256
 *        bytes (actually, no shorter than RANDOM_BUFSIZE bytes, which is
 *        512 for hysterical raisins).
 */
static int
random_read(dev_t dev, struct uio *uio, int flags)
{
        int gflags;

        /* Set the appropriate GRND_* mode.  */
        switch (minor(dev)) {
        case RND_DEV_RANDOM:
                gflags = GRND_RANDOM;
                break;
        case RND_DEV_URANDOM:
                gflags = GRND_INSECURE;
                break;
        default:
                return ENXIO;
        }

        /*
         * Set GRND_NONBLOCK if the user requested IO_NDELAY (i.e., the
         * file was opened with O_NONBLOCK).
         */
        if (flags & IO_NDELAY)
                gflags |= GRND_NONBLOCK;

        /* Defer to getrandom.  */
        return dogetrandom(uio, gflags);
}

/*
 * random_write(dev, uio, flags)
 *
 *        Enter data from uio into the entropy pool.
 *
 *        Assume privileged users provide full entropy, and unprivileged
 *        users provide no entropy.  If you have a nonuniform source of
 *        data with n bytes of min-entropy, hash it with an XOF like
 *        SHAKE128 into exactly n bytes first.
 */
static int
random_write(dev_t dev, struct uio *uio, int flags)
{
        kauth_cred_t cred = kauth_cred_get();
        uint8_t *buf;
        bool privileged = false, any = false;
        int error = 0;

        /* Verify user's authorization to affect the entropy pool.  */
        error = kauth_authorize_device(cred, KAUTH_DEVICE_RND_ADDDATA,
            NULL, NULL, NULL, NULL);
        if (error)
                return error;

        /*
         * Check whether user is privileged.  If so, assume user
         * furnishes full-entropy data; if not, accept user's data but
         * assume it has zero entropy when we do accounting.  If you
         * want to specify less entropy, use ioctl(RNDADDDATA).
         */
        if (kauth_authorize_device(cred, KAUTH_DEVICE_RND_ADDDATA_ESTIMATE,
                NULL, NULL, NULL, NULL) == 0)
                privileged = true;

        /* Get a buffer for transfers.  */
        buf = kmem_alloc(RANDOM_BUFSIZE, KM_SLEEP);

        /* Consume data.  */
        while (uio->uio_resid) {
                size_t n = MIN(uio->uio_resid, RANDOM_BUFSIZE);

                /* Transfer n bytes in and enter them into the pool.  */
                error = uiomove(buf, n, uio);
                if (error)
                        break;
                rnd_add_data(&user_rndsource, buf, n, privileged ? n*NBBY : 0);
                any = true;

                /* Now's a good time to yield if needed.  */
                preempt_point();

                /* Check for interruption.  */
                if (__predict_false(curlwp->l_flag & LW_PENDSIG) &&
                    sigispending(curlwp, 0)) {
                        error = EINTR;
                        break;
                }
        }

        /* Zero the buffer and free it.  */
        explicit_memset(buf, 0, RANDOM_BUFSIZE);
        kmem_free(buf, RANDOM_BUFSIZE);

        /* If we added anything, consolidate entropy now.  */
        if (any)
                entropy_consolidate();

        return error;
}










































































































































































































   51 
   42 

    2 





























   54 



























   53 
   54 

   53 


   54 
    5 


   54 





   54 











   54 
    9 











   54 
   35 







   19 

   19 









   54 
    2 








    1 



   52 

   52 
   10 
    9 

    1 



    9 
































   43 
    9 





   43 






   42 




   43 

   22 
   43 
   21 

   22 

   54 

   53 



   15 





   15 

















   15 
   14 
   15 
    1 








   15 


   12 



















   12 



   15 
    7 





    3 

































   13 
    1 










   38 



    5 









    5 






   37 









   38 


















   38 

   51 

   51 
   38 
   18 
















   18 


   50 








   50 
   23 



   51 


















   51 
   51 





   51 
   23 



















   51 



















   51 
   28 

   46 


   50 
    5 





















   46 
   23 



   46 
   34 

   11 

   34 




   46 


   46 






   46 


    5 






    2 

    2 

    2 






    5 









    5 





    5 
    5 















    5 




    5 

    5 



    4 
    4 


   47 

   48 

    9 

   48 
    9 


















    5 












    5 

    5 







    5 








    5 







    5 













    5 






    2 










    5 




    5 
    5 











    5 














    5 

















    5 






    5 



















   26 

    1 
    1 

   25 












    5 




    5 

    5 
    1 
    5 













    5 



    5 

























































   89 







   89 
   86 




   82 

   56 

















   16 



   15 

    1 



    1 



    1 











    2 







    2 



    2 



    2 



    1 



    2 




    2 





    4 


    2 


    3 



    2 
    1 









    1 








    1 














   22 



    2 



    1 
















    2 



    2 





    1 















   27 


    5 




    3 




    2 














   13 

    2 



    1 



    1 



    2 





    1 



    1 







    1 



    2 



    2 






   13 



    1 



















    1 
























    3 



    2 







    2 










   46 
   60 












    2 
    1 







    1 




































    2 













   26 




   26 










    5 
   26 

    5 
    2 

    5 
    5 
    5 



    5 


    4 










    3 

    2 



























   10 


   10 
    1 

   10 




    8 

    9 








    8 

    8 



















    6 


    6 
    5 



    4 








    2 


    1 










    1 





    4 


    3 













    4 



    4 














    7 
    3 
    7 




    4 
    7 

    2 
    1 
    1 

    1 











    5 






    5 

    3 




    1 






    4 


    4 


    3 








   13 





   14 
   14 

   13 





    8 



    3 
    3 






    6 





    6 

    6 


    6 

    6 


    5 

    3 
    1 


    2 
    3 


    3 

    4 











    9 








    8 


    2 





    9 






    9 








    8 
    4 
    4 







    3 








    7 





    7 

   10 












    8 








    7 


    1 





    8 





    5 
    4 
    4 
    4 


    2 








    3 





    1 
    3 

    8 











   26 











   20 



   19 







   25 





    5 








    4 
    1 







    3 

    2 
    1 



    2 


    1 

    1 







    3 







    2 


















   25 
   23 

   20 
   10 












    4 







    1 
    1 















    1 



    1 






    2 









    4 












   40 
    2 
    1 




    2 

















    2 







    2 


    1 




    2 















   50 


   51 

   51 




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
/*        $NetBSD: ip_output.c,v 1.320 2020/09/08 14:12:57 christos Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1998 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Public Access Networks Corporation ("Panix").  It was developed under
 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ip_output.c        8.3 (Berkeley) 1/21/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.320 2020/09/08 14:12:57 christos Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_ipsec.h"
#include "opt_mrouting.h"
#include "opt_net_mpsafe.h"
#include "opt_mpls.h"
#endif

#include "arp.h"

#include <sys/param.h>
#include <sys/kmem.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/kauth.h>
#include <sys/systm.h>
#include <sys/syslog.h>

#include <net/if.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/pfil.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#include <netinet/ip_private.h>
#include <netinet/in_offload.h>
#include <netinet/portalgo.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>

#ifdef INET6
#include <netinet6/ip6_var.h>
#endif

#ifdef MROUTING
#include <netinet/ip_mroute.h>
#endif

#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/key.h>
#endif

#ifdef MPLS
#include <netmpls/mpls.h>
#include <netmpls/mpls_var.h>
#endif

static int ip_pcbopts(struct inpcb *, const struct sockopt *);
static struct mbuf *ip_insertoptions(struct mbuf *, struct mbuf *, int *);
static struct ifnet *ip_multicast_if(struct in_addr *, int *);
static void ip_mloopback(struct ifnet *, struct mbuf *,
    const struct sockaddr_in *);
static int ip_ifaddrvalid(const struct in_ifaddr *);

extern pfil_head_t *inet_pfil_hook;                        /* XXX */

int ip_do_loopback_cksum = 0;

static int
ip_mark_mpls(struct ifnet * const ifp, struct mbuf * const m,
    const struct rtentry *rt)
{
        int error = 0;
#ifdef MPLS
        union mpls_shim msh;

        if (rt == NULL || rt_gettag(rt) == NULL ||
            rt_gettag(rt)->sa_family != AF_MPLS ||
            (m->m_flags & (M_MCAST | M_BCAST)) != 0 ||
            ifp->if_type != IFT_ETHER)
                return 0;

        msh.s_addr = MPLS_GETSADDR(rt);
        if (msh.shim.label != MPLS_LABEL_IMPLNULL) {
                struct m_tag *mtag;
                /*
                 * XXX tentative solution to tell ether_output
                 * it's MPLS. Need some more efficient solution.
                 */
                mtag = m_tag_get(PACKET_TAG_MPLS,
                    sizeof(int) /* dummy */,
                    M_NOWAIT);
                if (mtag == NULL)
                        return ENOMEM;
                m_tag_prepend(m, mtag);
        }
#endif
        return error;
}

/*
 * Send an IP packet to a host.
 */
int
ip_if_output(struct ifnet * const ifp, struct mbuf * const m,
    const struct sockaddr * const dst, const struct rtentry *rt)
{
        int error = 0;

        if (rt != NULL) {
                error = rt_check_reject_route(rt, ifp);
                if (error != 0) {
                        IP_STATINC(IP_STAT_RTREJECT);
                        m_freem(m);
                        return error;
                }
        }

        error = ip_mark_mpls(ifp, m, rt);
        if (error != 0) {
                m_freem(m);
                return error;
        }

        error = if_output_lock(ifp, ifp, m, dst, rt);

        return error;
}

/*
 * IP output.  The packet in mbuf chain m contains a skeletal IP
 * header (with len, off, ttl, proto, tos, src, dst).
 * The mbuf chain containing the packet will be freed.
 * The mbuf opt, if present, will not be freed.
 */
int
ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int flags,
    struct ip_moptions *imo, struct inpcb *inp)
{
        struct rtentry *rt;
        struct ip *ip;
        struct ifnet *ifp, *mifp = NULL;
        struct mbuf *m = m0;
        int len, hlen, error = 0;
        struct route iproute;
        const struct sockaddr_in *dst;
        struct in_ifaddr *ia = NULL;
        struct ifaddr *ifa;
        int isbroadcast;
        int sw_csum;
        u_long mtu;
        bool natt_frag = false;
        bool rtmtu_nolock;
        union {
                struct sockaddr                sa;
                struct sockaddr_in        sin;
        } udst, usrc;
        struct sockaddr *rdst = &udst.sa;        /* real IP destination, as
                                                 * opposed to the nexthop
                                                 */
        struct psref psref, psref_ia;
        int bound;
        bool bind_need_restore = false;
        const struct sockaddr *sa;

        len = 0;

        MCLAIM(m, &ip_tx_mowner);

        KASSERT((m->m_flags & M_PKTHDR) != 0);
        KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) == 0);
        KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) !=
            (M_CSUM_TCPv4|M_CSUM_UDPv4));
        KASSERT(m->m_len >= sizeof(struct ip));

        hlen = sizeof(struct ip);
        if (opt) {
                m = ip_insertoptions(m, opt, &len);
                hlen = len;
        }
        ip = mtod(m, struct ip *);

        /*
         * Fill in IP header.
         */
        if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
                ip->ip_v = IPVERSION;
                ip->ip_off = htons(0);
                /* ip->ip_id filled in after we find out source ia */
                ip->ip_hl = hlen >> 2;
                IP_STATINC(IP_STAT_LOCALOUT);
        } else {
                hlen = ip->ip_hl << 2;
        }

        /*
         * Route packet.
         */
        if (ro == NULL) {
                memset(&iproute, 0, sizeof(iproute));
                ro = &iproute;
        }
        sockaddr_in_init(&udst.sin, &ip->ip_dst, 0);
        dst = satocsin(rtcache_getdst(ro));

        /*
         * If there is a cached route, check that it is to the same
         * destination and is still up.  If not, free it and try again.
         * The address family should also be checked in case of sharing
         * the cache with IPv6.
         */
        if (dst && (dst->sin_family != AF_INET ||
            !in_hosteq(dst->sin_addr, ip->ip_dst)))
                rtcache_free(ro);

        /* XXX must be before rtcache operations */
        bound = curlwp_bind();
        bind_need_restore = true;

        if ((rt = rtcache_validate(ro)) == NULL &&
            (rt = rtcache_update(ro, 1)) == NULL) {
                dst = &udst.sin;
                error = rtcache_setdst(ro, &udst.sa);
                if (error != 0) {
                        IP_STATINC(IP_STAT_ODROPPED);
                        goto bad;
                }
        }

        /*
         * If routing to interface only, short circuit routing lookup.
         */
        if (flags & IP_ROUTETOIF) {
                ifa = ifa_ifwithladdr_psref(sintocsa(dst), &psref_ia);
                if (ifa == NULL) {
                        IP_STATINC(IP_STAT_NOROUTE);
                        error = ENETUNREACH;
                        goto bad;
                }
                /* ia is already referenced by psref_ia */
                ia = ifatoia(ifa);

                ifp = ia->ia_ifp;
                mtu = ifp->if_mtu;
                ip->ip_ttl = 1;
                isbroadcast = in_broadcast(dst->sin_addr, ifp);
        } else if (((IN_MULTICAST(ip->ip_dst.s_addr) ||
            ip->ip_dst.s_addr == INADDR_BROADCAST) ||
            (flags & IP_ROUTETOIFINDEX)) &&
            imo != NULL && imo->imo_multicast_if_index != 0) {
                ifp = mifp = if_get_byindex(imo->imo_multicast_if_index, &psref);
                if (ifp == NULL) {
                        IP_STATINC(IP_STAT_NOROUTE);
                        error = ENETUNREACH;
                        goto bad;
                }
                mtu = ifp->if_mtu;
                ia = in_get_ia_from_ifp_psref(ifp, &psref_ia);
                if (ia == NULL) {
                        IP_STATINC(IP_STAT_IFNOADDR);
                        error = EADDRNOTAVAIL;
                        goto bad;
                }
                if (IN_MULTICAST(ip->ip_dst.s_addr) ||
                    ip->ip_dst.s_addr == INADDR_BROADCAST) {
                        isbroadcast = 0;
                } else {
                        /* IP_ROUTETOIFINDEX */
                        isbroadcast = in_broadcast(dst->sin_addr, ifp);
                        if ((isbroadcast == 0) && ((ifp->if_flags &
                            (IFF_LOOPBACK | IFF_POINTOPOINT)) == 0) &&
                            (in_direct(dst->sin_addr, ifp) == 0)) {
                                /* gateway address required */
                                if (rt == NULL)
                                        rt = rtcache_init(ro);
                                if (rt == NULL || rt->rt_ifp != ifp) {
                                        IP_STATINC(IP_STAT_NOROUTE);
                                        error = EHOSTUNREACH;
                                        goto bad;
                                }
                                rt->rt_use++;
                                if (rt->rt_flags & RTF_GATEWAY)
                                        dst = satosin(rt->rt_gateway);
                                if (rt->rt_flags & RTF_HOST)
                                        isbroadcast =
                                            rt->rt_flags & RTF_BROADCAST;
                        }
                }
        } else {
                if (rt == NULL)
                        rt = rtcache_init(ro);
                if (rt == NULL) {
                        IP_STATINC(IP_STAT_NOROUTE);
                        error = EHOSTUNREACH;
                        goto bad;
                }
                if (ifa_is_destroying(rt->rt_ifa)) {
                        rtcache_unref(rt, ro);
                        rt = NULL;
                        IP_STATINC(IP_STAT_NOROUTE);
                        error = EHOSTUNREACH;
                        goto bad;
                }
                ifa_acquire(rt->rt_ifa, &psref_ia);
                ia = ifatoia(rt->rt_ifa);
                ifp = rt->rt_ifp;
                if ((mtu = rt->rt_rmx.rmx_mtu) == 0)
                        mtu = ifp->if_mtu;
                rt->rt_use++;
                if (rt->rt_flags & RTF_GATEWAY)
                        dst = satosin(rt->rt_gateway);
                if (rt->rt_flags & RTF_HOST)
                        isbroadcast = rt->rt_flags & RTF_BROADCAST;
                else
                        isbroadcast = in_broadcast(dst->sin_addr, ifp);
        }
        rtmtu_nolock = rt && (rt->rt_rmx.rmx_locks & RTV_MTU) == 0;

        if (IN_MULTICAST(ip->ip_dst.s_addr) ||
            (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
                bool inmgroup;

                m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
                    M_BCAST : M_MCAST;
                /*
                 * See if the caller provided any multicast options
                 */
                if (imo != NULL)
                        ip->ip_ttl = imo->imo_multicast_ttl;
                else
                        ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;

                /*
                 * if we don't know the outgoing ifp yet, we can't generate
                 * output
                 */
                if (!ifp) {
                        IP_STATINC(IP_STAT_NOROUTE);
                        error = ENETUNREACH;
                        goto bad;
                }

                /*
                 * If the packet is multicast or broadcast, confirm that
                 * the outgoing interface can transmit it.
                 */
                if (((m->m_flags & M_MCAST) &&
                     (ifp->if_flags & IFF_MULTICAST) == 0) ||
                    ((m->m_flags & M_BCAST) &&
                     (ifp->if_flags & (IFF_BROADCAST|IFF_POINTOPOINT)) == 0))  {
                        IP_STATINC(IP_STAT_NOROUTE);
                        error = ENETUNREACH;
                        goto bad;
                }
                /*
                 * If source address not specified yet, use an address
                 * of outgoing interface.
                 */
                if (in_nullhost(ip->ip_src)) {
                        struct in_ifaddr *xia;
                        struct ifaddr *xifa;
                        struct psref _psref;

                        xia = in_get_ia_from_ifp_psref(ifp, &_psref);
                        if (!xia) {
                                IP_STATINC(IP_STAT_IFNOADDR);
                                error = EADDRNOTAVAIL;
                                goto bad;
                        }
                        xifa = &xia->ia_ifa;
                        if (xifa->ifa_getifa != NULL) {
                                ia4_release(xia, &_psref);
                                /* FIXME ifa_getifa is NOMPSAFE */
                                xia = ifatoia((*xifa->ifa_getifa)(xifa, rdst));
                                if (xia == NULL) {
                                        IP_STATINC(IP_STAT_IFNOADDR);
                                        error = EADDRNOTAVAIL;
                                        goto bad;
                                }
                                ia4_acquire(xia, &_psref);
                        }
                        ip->ip_src = xia->ia_addr.sin_addr;
                        ia4_release(xia, &_psref);
                }

                inmgroup = in_multi_group(ip->ip_dst, ifp, flags);
                if (inmgroup && (imo == NULL || imo->imo_multicast_loop)) {
                        /*
                         * If we belong to the destination multicast group
                         * on the outgoing interface, and the caller did not
                         * forbid loopback, loop back a copy.
                         */
                        ip_mloopback(ifp, m, &udst.sin);
                }
#ifdef MROUTING
                else {
                        /*
                         * If we are acting as a multicast router, perform
                         * multicast forwarding as if the packet had just
                         * arrived on the interface to which we are about
                         * to send.  The multicast forwarding function
                         * recursively calls this function, using the
                         * IP_FORWARDING flag to prevent infinite recursion.
                         *
                         * Multicasts that are looped back by ip_mloopback(),
                         * above, will be forwarded by the ip_input() routine,
                         * if necessary.
                         */
                        extern struct socket *ip_mrouter;

                        if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
                                if (ip_mforward(m, ifp) != 0) {
                                        m_freem(m);
                                        goto done;
                                }
                        }
                }
#endif
                /*
                 * Multicasts with a time-to-live of zero may be looped-
                 * back, above, but must not be transmitted on a network.
                 * Also, multicasts addressed to the loopback interface
                 * are not sent -- the above call to ip_mloopback() will
                 * loop back a copy if this host actually belongs to the
                 * destination group on the loopback interface.
                 */
                if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
                        IP_STATINC(IP_STAT_ODROPPED);
                        m_freem(m);
                        goto done;
                }
                goto sendit;
        }

        /*
         * If source address not specified yet, use address
         * of outgoing interface.
         */
        if (in_nullhost(ip->ip_src)) {
                struct ifaddr *xifa;

                xifa = &ia->ia_ifa;
                if (xifa->ifa_getifa != NULL) {
                        ia4_release(ia, &psref_ia);
                        /* FIXME ifa_getifa is NOMPSAFE */
                        ia = ifatoia((*xifa->ifa_getifa)(xifa, rdst));
                        if (ia == NULL) {
                                error = EADDRNOTAVAIL;
                                goto bad;
                        }
                        ia4_acquire(ia, &psref_ia);
                }
                ip->ip_src = ia->ia_addr.sin_addr;
        }

        /*
         * Packets with Class-D address as source are not valid per
         * RFC1112.
         */
        if (IN_MULTICAST(ip->ip_src.s_addr)) {
                IP_STATINC(IP_STAT_ODROPPED);
                error = EADDRNOTAVAIL;
                goto bad;
        }

        /*
         * Look for broadcast address and verify user is allowed to
         * send such a packet.
         */
        if (isbroadcast) {
                if ((ifp->if_flags & IFF_BROADCAST) == 0) {
                        IP_STATINC(IP_STAT_BCASTDENIED);
                        error = EADDRNOTAVAIL;
                        goto bad;
                }
                if ((flags & IP_ALLOWBROADCAST) == 0) {
                        IP_STATINC(IP_STAT_BCASTDENIED);
                        error = EACCES;
                        goto bad;
                }
                /* don't allow broadcast messages to be fragmented */
                if (ntohs(ip->ip_len) > ifp->if_mtu) {
                        IP_STATINC(IP_STAT_BCASTDENIED);
                        error = EMSGSIZE;
                        goto bad;
                }
                m->m_flags |= M_BCAST;
        } else
                m->m_flags &= ~M_BCAST;

sendit:
        if ((flags & (IP_FORWARDING|IP_NOIPNEWID)) == 0) {
                if (m->m_pkthdr.len < IP_MINFRAGSIZE) {
                        ip->ip_id = 0;
                } else if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) {
                        ip->ip_id = ip_newid(ia);
                } else {
                        /*
                         * TSO capable interfaces (typically?) increment
                         * ip_id for each segment.
                         * "allocate" enough ids here to increase the chance
                         * for them to be unique.
                         *
                         * note that the following calculation is not
                         * needed to be precise.  wasting some ip_id is fine.
                         */

                        unsigned int segsz = m->m_pkthdr.segsz;
                        unsigned int datasz = ntohs(ip->ip_len) - hlen;
                        unsigned int num = howmany(datasz, segsz);

                        ip->ip_id = ip_newid_range(ia, num);
                }
        }
        if (ia != NULL) {
                ia4_release(ia, &psref_ia);
                ia = NULL;
        }

        /*
         * If we're doing Path MTU Discovery, we need to set DF unless
         * the route's MTU is locked.
         */
        if ((flags & IP_MTUDISC) != 0 && rtmtu_nolock) {
                ip->ip_off |= htons(IP_DF);
        }

#ifdef IPSEC
        if (ipsec_used) {
                bool ipsec_done = false;
                bool count_drop = false;

                /* Perform IPsec processing, if any. */
                error = ipsec4_output(m, inp, flags, &mtu, &natt_frag,
                    &ipsec_done, &count_drop);
                if (count_drop)
                        IP_STATINC(IP_STAT_IPSECDROP_OUT);
                if (error || ipsec_done)
                        goto done;
        }

        if (!ipsec_used || !natt_frag)
#endif
        {
                /*
                 * Run through list of hooks for output packets.
                 */
                error = pfil_run_hooks(inet_pfil_hook, &m, ifp, PFIL_OUT);
                if (error || m == NULL) {
                        IP_STATINC(IP_STAT_PFILDROP_OUT);
                        goto done;
                }
        }

        ip = mtod(m, struct ip *);
        hlen = ip->ip_hl << 2;

        m->m_pkthdr.csum_data |= hlen << 16;

        /*
         * search for the source address structure to
         * maintain output statistics, and verify address
         * validity
         */
        KASSERT(ia == NULL);
        sockaddr_in_init(&usrc.sin, &ip->ip_src, 0);
        ifa = ifaof_ifpforaddr_psref(&usrc.sa, ifp, &psref_ia);
        if (ifa != NULL)
                ia = ifatoia(ifa);

        /*
         * Ensure we only send from a valid address.
         * A NULL address is valid because the packet could be
         * generated from a packet filter.
         */
        if (ia != NULL && (flags & IP_FORWARDING) == 0 &&
            (error = ip_ifaddrvalid(ia)) != 0)
        {
                ARPLOG(LOG_ERR,
                    "refusing to send from invalid address %s (pid %d)\n",
                    ARPLOGADDR(&ip->ip_src), curproc->p_pid);
                IP_STATINC(IP_STAT_ODROPPED);
                if (error == 1)
                        /*
                         * Address exists, but is tentative or detached.
                         * We can't send from it because it's invalid,
                         * so we drop the packet.
                         */
                        error = 0;
                else
                        error = EADDRNOTAVAIL;
                goto bad;
        }

        /* Maybe skip checksums on loopback interfaces. */
        if (IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) {
                m->m_pkthdr.csum_flags |= M_CSUM_IPv4;
        }
        sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;

        /* Need to fragment the packet */
        if (ntohs(ip->ip_len) > mtu &&
            (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) {
                goto fragment;
        }

#if IFA_STATS
        if (ia)
                ia->ia_ifa.ifa_data.ifad_outbytes += ntohs(ip->ip_len);
#endif
        /*
         * Always initialize the sum to 0!  Some HW assisted
         * checksumming requires this.
         */
        ip->ip_sum = 0;

        if ((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0) {
                /*
                 * Perform any checksums that the hardware can't do
                 * for us.
                 *
                 * XXX Does any hardware require the {th,uh}_sum
                 * XXX fields to be 0?
                 */
                if (sw_csum & M_CSUM_IPv4) {
                        KASSERT(IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4));
                        ip->ip_sum = in_cksum(m, hlen);
                        m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4;
                }
                if (sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
                        if (IN_NEED_CHECKSUM(ifp,
                            sw_csum & (M_CSUM_TCPv4|M_CSUM_UDPv4))) {
                                in_undefer_cksum_tcpudp(m);
                        }
                        m->m_pkthdr.csum_flags &=
                            ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
                }
        }

        sa = (m->m_flags & M_MCAST) ? sintocsa(rdst) : sintocsa(dst);

        /* Send it */
        if (__predict_false(sw_csum & M_CSUM_TSOv4)) {
                /*
                 * TSO4 is required by a packet, but disabled for
                 * the interface.
                 */
                error = ip_tso_output(ifp, m, sa, rt);
        } else
                error = ip_if_output(ifp, m, sa, rt);
        goto done;

fragment:
        /*
         * We can't use HW checksumming if we're about to fragment the packet.
         *
         * XXX Some hardware can do this.
         */
        if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
                if (IN_NEED_CHECKSUM(ifp,
                    m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4))) {
                        in_undefer_cksum_tcpudp(m);
                }
                m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
        }

        /*
         * Too large for interface; fragment if possible.
         * Must be able to put at least 8 bytes per fragment.
         */
        if (ntohs(ip->ip_off) & IP_DF) {
                if (flags & IP_RETURNMTU) {
                        KASSERT(inp != NULL);
                        inp->inp_errormtu = mtu;
                }
                error = EMSGSIZE;
                IP_STATINC(IP_STAT_CANTFRAG);
                goto bad;
        }

        error = ip_fragment(m, ifp, mtu);
        if (error) {
                m = NULL;
                goto bad;
        }

        for (; m; m = m0) {
                m0 = m->m_nextpkt;
                m->m_nextpkt = NULL;
                if (error) {
                        m_freem(m);
                        continue;
                }
#if IFA_STATS
                if (ia)
                        ia->ia_ifa.ifa_data.ifad_outbytes += ntohs(ip->ip_len);
#endif
                /*
                 * If we get there, the packet has not been handled by
                 * IPsec whereas it should have. Now that it has been
                 * fragmented, re-inject it in ip_output so that IPsec
                 * processing can occur.
                 */
                if (natt_frag) {
                        error = ip_output(m, opt, NULL,
                            flags | IP_RAWOUTPUT | IP_NOIPNEWID,
                            imo, inp);
                } else {
                        KASSERT((m->m_pkthdr.csum_flags &
                            (M_CSUM_UDPv4 | M_CSUM_TCPv4)) == 0);
                        error = ip_if_output(ifp, m, (m->m_flags & M_MCAST) ?
                            sintocsa(rdst) : sintocsa(dst), rt);
                }
        }
        if (error == 0) {
                IP_STATINC(IP_STAT_FRAGMENTED);
        }

done:
        ia4_release(ia, &psref_ia);
        rtcache_unref(rt, ro);
        if (ro == &iproute) {
                rtcache_free(&iproute);
        }
        if (mifp != NULL) {
                if_put(mifp, &psref);
        }
        if (bind_need_restore)
                curlwp_bindx(bound);
        return error;

bad:
        m_freem(m);
        goto done;
}

int
ip_fragment(struct mbuf *m, struct ifnet *ifp, u_long mtu)
{
        struct ip *ip, *mhip;
        struct mbuf *m0;
        int len, hlen, off;
        int mhlen, firstlen;
        struct mbuf **mnext;
        int sw_csum = m->m_pkthdr.csum_flags;
        int fragments = 0;
        int error = 0;
        int ipoff, ipflg;

        ip = mtod(m, struct ip *);
        hlen = ip->ip_hl << 2;

        /* Preserve the offset and flags. */
        ipoff = ntohs(ip->ip_off) & IP_OFFMASK;
        ipflg = ntohs(ip->ip_off) & (IP_RF|IP_DF|IP_MF);

        if (ifp != NULL)
                sw_csum &= ~ifp->if_csum_flags_tx;

        len = (mtu - hlen) &~ 7;
        if (len < 8) {
                IP_STATINC(IP_STAT_CANTFRAG);
                m_freem(m);
                return EMSGSIZE;
        }

        firstlen = len;
        mnext = &m->m_nextpkt;

        /*
         * Loop through length of segment after first fragment,
         * make new header and copy data of each part and link onto chain.
         */
        m0 = m;
        mhlen = sizeof(struct ip);
        for (off = hlen + len; off < ntohs(ip->ip_len); off += len) {
                MGETHDR(m, M_DONTWAIT, MT_HEADER);
                if (m == NULL) {
                        error = ENOBUFS;
                        IP_STATINC(IP_STAT_ODROPPED);
                        goto sendorfree;
                }
                MCLAIM(m, m0->m_owner);

                *mnext = m;
                mnext = &m->m_nextpkt;

                m->m_data += max_linkhdr;
                mhip = mtod(m, struct ip *);
                *mhip = *ip;

                /* we must inherit the flags */
                m->m_flags |= m0->m_flags & M_COPYFLAGS;

                if (hlen > sizeof(struct ip)) {
                        mhlen = ip_optcopy(ip, mhip) + sizeof(struct ip);
                        mhip->ip_hl = mhlen >> 2;
                }
                m->m_len = mhlen;

                mhip->ip_off = ((off - hlen) >> 3) + ipoff;
                mhip->ip_off |= ipflg;
                if (off + len >= ntohs(ip->ip_len))
                        len = ntohs(ip->ip_len) - off;
                else
                        mhip->ip_off |= IP_MF;
                HTONS(mhip->ip_off);

                mhip->ip_len = htons((u_int16_t)(len + mhlen));
                m->m_next = m_copym(m0, off, len, M_DONTWAIT);
                if (m->m_next == NULL) {
                        error = ENOBUFS;
                        IP_STATINC(IP_STAT_ODROPPED);
                        goto sendorfree;
                }

                m->m_pkthdr.len = mhlen + len;
                m_reset_rcvif(m);

                mhip->ip_sum = 0;
                KASSERT((m->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0);
                if (sw_csum & M_CSUM_IPv4) {
                        mhip->ip_sum = in_cksum(m, mhlen);
                } else {
                        /*
                         * checksum is hw-offloaded or not necessary.
                         */
                        m->m_pkthdr.csum_flags |=
                            m0->m_pkthdr.csum_flags & M_CSUM_IPv4;
                        m->m_pkthdr.csum_data |= mhlen << 16;
                        KASSERT(!(ifp != NULL &&
                            IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) ||
                            (m->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0);
                }
                IP_STATINC(IP_STAT_OFRAGMENTS);
                fragments++;
        }

        /*
         * Update first fragment by trimming what's been copied out
         * and updating header, then send each fragment (in order).
         */
        m = m0;
        m_adj(m, hlen + firstlen - ntohs(ip->ip_len));
        m->m_pkthdr.len = hlen + firstlen;
        ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
        ip->ip_off |= htons(IP_MF);
        ip->ip_sum = 0;
        if (sw_csum & M_CSUM_IPv4) {
                ip->ip_sum = in_cksum(m, hlen);
                m->m_pkthdr.csum_flags &= ~M_CSUM_IPv4;
        } else {
                /*
                 * checksum is hw-offloaded or not necessary.
                 */
                KASSERT(!(ifp != NULL && IN_NEED_CHECKSUM(ifp, M_CSUM_IPv4)) ||
                    (m->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0);
                KASSERT(M_CSUM_DATA_IPv4_IPHL(m->m_pkthdr.csum_data) >=
                    sizeof(struct ip));
        }

sendorfree:
        /*
         * If there is no room for all the fragments, don't queue
         * any of them.
         */
        if (ifp != NULL) {
                IFQ_LOCK(&ifp->if_snd);
                if (ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len < fragments &&
                    error == 0) {
                        error = ENOBUFS;
                        IP_STATINC(IP_STAT_ODROPPED);
                        IFQ_INC_DROPS(&ifp->if_snd);
                }
                IFQ_UNLOCK(&ifp->if_snd);
        }
        if (error) {
                for (m = m0; m; m = m0) {
                        m0 = m->m_nextpkt;
                        m->m_nextpkt = NULL;
                        m_freem(m);
                }
        }

        return error;
}

/*
 * Determine the maximum length of the options to be inserted;
 * we would far rather allocate too much space rather than too little.
 */
u_int
ip_optlen(struct inpcb *inp)
{
        struct mbuf *m = inp->inp_options;

        if (m && m->m_len > offsetof(struct ipoption, ipopt_dst)) {
                return (m->m_len - offsetof(struct ipoption, ipopt_dst));
        }
        return 0;
}

/*
 * Insert IP options into preformed packet.
 * Adjust IP destination as required for IP source routing,
 * as indicated by a non-zero in_addr at the start of the options.
 */
static struct mbuf *
ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
{
        struct ipoption *p = mtod(opt, struct ipoption *);
        struct mbuf *n;
        struct ip *ip = mtod(m, struct ip *);
        unsigned optlen;

        optlen = opt->m_len - sizeof(p->ipopt_dst);
        KASSERT(optlen % 4 == 0);
        if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET)
                return m;                /* XXX should fail */
        if (!in_nullhost(p->ipopt_dst))
                ip->ip_dst = p->ipopt_dst;
        if (M_READONLY(m) || M_LEADINGSPACE(m) < optlen) {
                MGETHDR(n, M_DONTWAIT, MT_HEADER);
                if (n == NULL)
                        return m;
                MCLAIM(n, m->m_owner);
                m_move_pkthdr(n, m);
                m->m_len -= sizeof(struct ip);
                m->m_data += sizeof(struct ip);
                n->m_next = m;
                n->m_len = optlen + sizeof(struct ip);
                n->m_data += max_linkhdr;
                memcpy(mtod(n, void *), ip, sizeof(struct ip));
                m = n;
        } else {
                m->m_data -= optlen;
                m->m_len += optlen;
                memmove(mtod(m, void *), ip, sizeof(struct ip));
        }
        m->m_pkthdr.len += optlen;
        ip = mtod(m, struct ip *);
        memcpy(ip + 1, p->ipopt_list, optlen);
        *phlen = sizeof(struct ip) + optlen;
        ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
        return m;
}

/*
 * Copy options from ipsrc to ipdst, omitting those not copied during
 * fragmentation.
 */
int
ip_optcopy(struct ip *ipsrc, struct ip *ipdst)
{
        u_char *cp, *dp;
        int opt, optlen, cnt;

        cp = (u_char *)(ipsrc + 1);
        dp = (u_char *)(ipdst + 1);
        cnt = (ipsrc->ip_hl << 2) - sizeof(struct ip);
        for (; cnt > 0; cnt -= optlen, cp += optlen) {
                opt = cp[0];
                if (opt == IPOPT_EOL)
                        break;
                if (opt == IPOPT_NOP) {
                        /* Preserve for IP mcast tunnel's LSRR alignment. */
                        *dp++ = IPOPT_NOP;
                        optlen = 1;
                        continue;
                }

                KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp));
                optlen = cp[IPOPT_OLEN];
                KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen < cnt);

                /* Invalid lengths should have been caught by ip_dooptions. */
                if (optlen > cnt)
                        optlen = cnt;
                if (IPOPT_COPIED(opt)) {
                        bcopy((void *)cp, (void *)dp, (unsigned)optlen);
                        dp += optlen;
                }
        }

        for (optlen = dp - (u_char *)(ipdst+1); optlen & 0x3; optlen++) {
                *dp++ = IPOPT_EOL;
        }

        return optlen;
}

/*
 * IP socket option processing.
 */
int
ip_ctloutput(int op, struct socket *so, struct sockopt *sopt)
{
        struct inpcb *inp = sotoinpcb(so);
        struct ip *ip = &inp->inp_ip;
        int inpflags = inp->inp_flags;
        int optval = 0, error = 0;
        struct in_pktinfo pktinfo;

        KASSERT(solocked(so));

        if (sopt->sopt_level != IPPROTO_IP) {
                if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_NOHEADER)
                        return 0;
                return ENOPROTOOPT;
        }

        switch (op) {
        case PRCO_SETOPT:
                switch (sopt->sopt_name) {
                case IP_OPTIONS:
#ifdef notyet
                case IP_RETOPTS:
#endif
                        error = ip_pcbopts(inp, sopt);
                        break;

                case IP_TOS:
                case IP_TTL:
                case IP_MINTTL:
                case IP_RECVOPTS:
                case IP_RECVRETOPTS:
                case IP_RECVDSTADDR:
                case IP_RECVIF:
                case IP_RECVPKTINFO:
                case IP_RECVTTL:
                case IP_BINDANY:
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                break;

                        switch (sopt->sopt_name) {
                        case IP_TOS:
                                ip->ip_tos = optval;
                                break;

                        case IP_TTL:
                                ip->ip_ttl = optval;
                                break;

                        case IP_MINTTL:
                                if (optval > 0 && optval <= MAXTTL)
                                        inp->inp_ip_minttl = optval;
                                else
                                        error = EINVAL;
                                break;
#define        OPTSET(bit) \
        if (optval) \
                inpflags |= bit; \
        else \
                inpflags &= ~bit;

                        case IP_RECVOPTS:
                                OPTSET(INP_RECVOPTS);
                                break;

                        case IP_RECVPKTINFO:
                                OPTSET(INP_RECVPKTINFO);
                                break;

                        case IP_RECVRETOPTS:
                                OPTSET(INP_RECVRETOPTS);
                                break;

                        case IP_RECVDSTADDR:
                                OPTSET(INP_RECVDSTADDR);
                                break;

                        case IP_RECVIF:
                                OPTSET(INP_RECVIF);
                                break;

                        case IP_RECVTTL:
                                OPTSET(INP_RECVTTL);
                                break;

                        case IP_BINDANY:
                                error = kauth_authorize_network(
                                    kauth_cred_get(), KAUTH_NETWORK_BIND,
                                    KAUTH_REQ_NETWORK_BIND_ANYADDR, so,
                                    NULL, NULL);
                                if (error == 0) {
                                        OPTSET(INP_BINDANY);
                                }
                                break;
                        }
                        break;
                case IP_PKTINFO:
                        error = sockopt_getint(sopt, &optval);
                        if (!error) {
                                /* Linux compatibility */
                                OPTSET(INP_RECVPKTINFO);
                                break;
                        }
                        error = sockopt_get(sopt, &pktinfo, sizeof(pktinfo));
                        if (error)
                                break;

                        if (pktinfo.ipi_ifindex == 0) {
                                inp->inp_prefsrcip = pktinfo.ipi_addr;
                                break;
                        }

                        /* Solaris compatibility */
                        struct ifnet *ifp;
                        struct in_ifaddr *ia;
                        int s;

                        /* pick up primary address */
                        s = pserialize_read_enter();
                        ifp = if_byindex(pktinfo.ipi_ifindex);
                        if (ifp == NULL) {
                                pserialize_read_exit(s);
                                error = EADDRNOTAVAIL;
                                break;
                        }
                        ia = in_get_ia_from_ifp(ifp);
                        if (ia == NULL) {
                                pserialize_read_exit(s);
                                error = EADDRNOTAVAIL;
                                break;
                        }
                        inp->inp_prefsrcip = IA_SIN(ia)->sin_addr;
                        pserialize_read_exit(s);
                        break;
                break;
#undef OPTSET

                case IP_MULTICAST_IF:
                case IP_MULTICAST_TTL:
                case IP_MULTICAST_LOOP:
                case IP_ADD_MEMBERSHIP:
                case IP_DROP_MEMBERSHIP:
                        error = ip_setmoptions(&inp->inp_moptions, sopt);
                        break;

                case IP_PORTRANGE:
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                break;

                        switch (optval) {
                        case IP_PORTRANGE_DEFAULT:
                        case IP_PORTRANGE_HIGH:
                                inpflags &= ~(INP_LOWPORT);
                                break;

                        case IP_PORTRANGE_LOW:
                                inpflags |= INP_LOWPORT;
                                break;

                        default:
                                error = EINVAL;
                                break;
                        }
                        break;

                case IP_PORTALGO:
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                break;

                        error = portalgo_algo_index_select(
                            (struct inpcb_hdr *)inp, optval);
                        break;

#if defined(IPSEC)
                case IP_IPSEC_POLICY:
                        if (ipsec_enabled) {
                                error = ipsec_set_policy(inp,
                                    sopt->sopt_data, sopt->sopt_size,
                                    curlwp->l_cred);
                        } else 
                                error = ENOPROTOOPT;
                        break;
#endif /* IPSEC */

                default:
                        error = ENOPROTOOPT;
                        break;
                }
                break;

        case PRCO_GETOPT:
                switch (sopt->sopt_name) {
                case IP_OPTIONS:
                case IP_RETOPTS: {
                        struct mbuf *mopts = inp->inp_options;

                        if (mopts) {
                                struct mbuf *m;

                                m = m_copym(mopts, 0, M_COPYALL, M_DONTWAIT);
                                if (m == NULL) {
                                        error = ENOBUFS;
                                        break;
                                }
                                error = sockopt_setmbuf(sopt, m);
                        }
                        break;
                }
                case IP_TOS:
                case IP_TTL:
                case IP_MINTTL:
                case IP_RECVOPTS:
                case IP_RECVRETOPTS:
                case IP_RECVDSTADDR:
                case IP_RECVIF:
                case IP_RECVPKTINFO:
                case IP_RECVTTL:
                case IP_ERRORMTU:
                case IP_BINDANY:
                        switch (sopt->sopt_name) {
                        case IP_TOS:
                                optval = ip->ip_tos;
                                break;

                        case IP_TTL:
                                optval = ip->ip_ttl;
                                break;

                        case IP_MINTTL:
                                optval = inp->inp_ip_minttl;
                                break;

                        case IP_ERRORMTU:
                                optval = inp->inp_errormtu;
                                break;

#define        OPTBIT(bit)        (inpflags & bit ? 1 : 0)

                        case IP_RECVOPTS:
                                optval = OPTBIT(INP_RECVOPTS);
                                break;

                        case IP_RECVPKTINFO:
                                optval = OPTBIT(INP_RECVPKTINFO);
                                break;

                        case IP_RECVRETOPTS:
                                optval = OPTBIT(INP_RECVRETOPTS);
                                break;

                        case IP_RECVDSTADDR:
                                optval = OPTBIT(INP_RECVDSTADDR);
                                break;

                        case IP_RECVIF:
                                optval = OPTBIT(INP_RECVIF);
                                break;

                        case IP_RECVTTL:
                                optval = OPTBIT(INP_RECVTTL);
                                break;

                        case IP_BINDANY:
                                optval = OPTBIT(INP_BINDANY);
                                break;
                        }
                        error = sockopt_setint(sopt, optval);
                        break;

                case IP_PKTINFO:
                        switch (sopt->sopt_size) {
                        case sizeof(int):
                                /* Linux compatibility */
                                optval = OPTBIT(INP_RECVPKTINFO);
                                error = sockopt_setint(sopt, optval);
                                break;
                        case sizeof(struct in_pktinfo):
                                /* Solaris compatibility */
                                pktinfo.ipi_ifindex = 0;
                                pktinfo.ipi_addr = inp->inp_prefsrcip;
                                error = sockopt_set(sopt, &pktinfo,
                                    sizeof(pktinfo));
                                break;
                        default:
                                /*
                                 * While size is stuck at 0, and, later, if
                                 * the caller doesn't use an exactly sized
                                 * recipient for the data, default to Linux
                                 * compatibility
                                 */
                                optval = OPTBIT(INP_RECVPKTINFO);
                                error = sockopt_setint(sopt, optval);
                                break;
                        }
                        break;

#if 0        /* defined(IPSEC) */
                case IP_IPSEC_POLICY:
                {
                        struct mbuf *m = NULL;

                        /* XXX this will return EINVAL as sopt is empty */
                        error = ipsec_get_policy(inp, sopt->sopt_data,
                            sopt->sopt_size, &m);
                        if (error == 0)
                                error = sockopt_setmbuf(sopt, m);
                        break;
                }
#endif /*IPSEC*/

                case IP_MULTICAST_IF:
                case IP_MULTICAST_TTL:
                case IP_MULTICAST_LOOP:
                case IP_ADD_MEMBERSHIP:
                case IP_DROP_MEMBERSHIP:
                        error = ip_getmoptions(inp->inp_moptions, sopt);
                        break;

                case IP_PORTRANGE:
                        if (inpflags & INP_LOWPORT)
                                optval = IP_PORTRANGE_LOW;
                        else
                                optval = IP_PORTRANGE_DEFAULT;
                        error = sockopt_setint(sopt, optval);
                        break;

                case IP_PORTALGO:
                        optval = inp->inp_portalgo;
                        error = sockopt_setint(sopt, optval);
                        break;

                default:
                        error = ENOPROTOOPT;
                        break;
                }
                break;
        }

        if (!error) {
                inp->inp_flags = inpflags;
        }
        return error;
}

static int
ip_pktinfo_prepare(const struct inpcb *inp, const struct in_pktinfo *pktinfo,
    struct ip_pktopts *pktopts, int *flags, kauth_cred_t cred)
{
        struct ip_moptions *imo;
        int error = 0;
        bool addrset = false;

        if (!in_nullhost(pktinfo->ipi_addr)) {
                pktopts->ippo_laddr.sin_addr = pktinfo->ipi_addr;
                /* EADDRNOTAVAIL? */
                error = in_pcbbindableaddr(inp, &pktopts->ippo_laddr, cred);
                if (error != 0)
                        return error;
                addrset = true;
        }

        if (pktinfo->ipi_ifindex != 0) {
                if (!addrset) {
                        struct ifnet *ifp;
                        struct in_ifaddr *ia;
                        int s;

                        /* pick up primary address */
                        s = pserialize_read_enter();
                        ifp = if_byindex(pktinfo->ipi_ifindex);
                        if (ifp == NULL) {
                                pserialize_read_exit(s);
                                return EADDRNOTAVAIL;
                        }
                        ia = in_get_ia_from_ifp(ifp);
                        if (ia == NULL) {
                                pserialize_read_exit(s);
                                return EADDRNOTAVAIL;
                        }
                        pktopts->ippo_laddr.sin_addr = IA_SIN(ia)->sin_addr;
                        pserialize_read_exit(s);
                }

                /*
                 * If specified ipi_ifindex,
                 * use copied or locally initialized ip_moptions.
                 * Original ip_moptions must not be modified.
                 */
                imo = &pktopts->ippo_imobuf;        /* local buf in pktopts */
                if (pktopts->ippo_imo != NULL) {
                        memcpy(imo, pktopts->ippo_imo, sizeof(*imo));
                } else {
                        memset(imo, 0, sizeof(*imo));
                        imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
                        imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
                }
                imo->imo_multicast_if_index = pktinfo->ipi_ifindex;
                pktopts->ippo_imo = imo;
                *flags |= IP_ROUTETOIFINDEX;
        }
        return error;
}

/*
 * Set up IP outgoing packet options. Even if control is NULL,
 * pktopts->ippo_laddr and pktopts->ippo_imo are set and used.
 */
int
ip_setpktopts(struct mbuf *control, struct ip_pktopts *pktopts, int *flags,
    struct inpcb *inp, kauth_cred_t cred)
{
        struct cmsghdr *cm;
        struct in_pktinfo pktinfo;
        int error;

        pktopts->ippo_imo = inp->inp_moptions;

        struct in_addr *ia = in_nullhost(inp->inp_prefsrcip) ? &inp->inp_laddr :
            &inp->inp_prefsrcip;
        sockaddr_in_init(&pktopts->ippo_laddr, ia, 0);

        if (control == NULL)
                return 0;

        /*
         * XXX: Currently, we assume all the optional information is
         * stored in a single mbuf.
         */
        if (control->m_next)
                return EINVAL;

        for (; control->m_len > 0;
            control->m_data += CMSG_ALIGN(cm->cmsg_len),
            control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
                cm = mtod(control, struct cmsghdr *);
                if ((control->m_len < sizeof(*cm)) ||
                    (cm->cmsg_len == 0) ||
                    (cm->cmsg_len > control->m_len)) {
                        return EINVAL;
                }
                if (cm->cmsg_level != IPPROTO_IP)
                        continue;

                switch (cm->cmsg_type) {
                case IP_PKTINFO:
                        if (cm->cmsg_len != CMSG_LEN(sizeof(pktinfo)))
                                return EINVAL;
                        memcpy(&pktinfo, CMSG_DATA(cm), sizeof(pktinfo));
                        error = ip_pktinfo_prepare(inp, &pktinfo, pktopts,
                            flags, cred);
                        if (error)
                                return error;
                        break;
                case IP_SENDSRCADDR: /* FreeBSD compatibility */
                        if (cm->cmsg_len != CMSG_LEN(sizeof(struct in_addr)))
                                return EINVAL;
                        pktinfo.ipi_ifindex = 0;
                        pktinfo.ipi_addr =
                            ((struct in_pktinfo *)CMSG_DATA(cm))->ipi_addr;
                        error = ip_pktinfo_prepare(inp, &pktinfo, pktopts,
                            flags, cred);
                        if (error)
                                return error;
                        break;
                default:
                        return ENOPROTOOPT;
                }
        }
        return 0;
}

/*
 * Set up IP options in pcb for insertion in output packets.
 * Store in mbuf with pointer in pcbopt, adding pseudo-option
 * with destination address if source routed.
 */
static int
ip_pcbopts(struct inpcb *inp, const struct sockopt *sopt)
{
        struct mbuf *m;
        const u_char *cp;
        u_char *dp;
        int cnt;

        KASSERT(inp_locked(inp));

        /* Turn off any old options. */
        if (inp->inp_options) {
                m_free(inp->inp_options);
        }
        inp->inp_options = NULL;
        if ((cnt = sopt->sopt_size) == 0) {
                /* Only turning off any previous options. */
                return 0;
        }
        cp = sopt->sopt_data;

        if (cnt % 4) {
                /* Must be 4-byte aligned, because there's no padding. */
                return EINVAL;
        }

        m = m_get(M_DONTWAIT, MT_SOOPTS);
        if (m == NULL)
                return ENOBUFS;

        dp = mtod(m, u_char *);
        memset(dp, 0, sizeof(struct in_addr));
        dp += sizeof(struct in_addr);
        m->m_len = sizeof(struct in_addr);

        /*
         * IP option list according to RFC791. Each option is of the form
         *
         *        [optval] [olen] [(olen - 2) data bytes]
         *
         * We validate the list and copy options to an mbuf for prepending
         * to data packets. The IP first-hop destination address will be
         * stored before actual options and is zero if unset.
         */
        while (cnt > 0) {
                uint8_t optval, olen, offset;

                optval = cp[IPOPT_OPTVAL];

                if (optval == IPOPT_EOL || optval == IPOPT_NOP) {
                        olen = 1;
                } else {
                        if (cnt < IPOPT_OLEN + 1)
                                goto bad;

                        olen = cp[IPOPT_OLEN];
                        if (olen < IPOPT_OLEN + 1 || olen > cnt)
                                goto bad;
                }

                if (optval == IPOPT_LSRR || optval == IPOPT_SSRR) {
                        /*
                         * user process specifies route as:
                         *        ->A->B->C->D
                         * D must be our final destination (but we can't
                         * check that since we may not have connected yet).
                         * A is first hop destination, which doesn't appear in
                         * actual IP option, but is stored before the options.
                         */
                        if (olen < IPOPT_OFFSET + 1 + sizeof(struct in_addr))
                                goto bad;

                        offset = cp[IPOPT_OFFSET];
                        memcpy(mtod(m, u_char *), cp + IPOPT_OFFSET + 1,
                            sizeof(struct in_addr));

                        cp += sizeof(struct in_addr);
                        cnt -= sizeof(struct in_addr);
                        olen -= sizeof(struct in_addr);

                        if (m->m_len + olen > MAX_IPOPTLEN + sizeof(struct in_addr))
                                goto bad;

                        memcpy(dp, cp, olen);
                        dp[IPOPT_OPTVAL] = optval;
                        dp[IPOPT_OLEN] = olen;
                        dp[IPOPT_OFFSET] = offset;
                        break;
                } else {
                        if (m->m_len + olen > MAX_IPOPTLEN + sizeof(struct in_addr))
                                goto bad;

                        memcpy(dp, cp, olen);
                        break;
                }

                dp += olen;
                m->m_len += olen;

                if (optval == IPOPT_EOL)
                        break;

                cp += olen;
                cnt -= olen;
        }

        inp->inp_options = m;
        return 0;

bad:
        (void)m_free(m);
        return EINVAL;
}

/*
 * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
 * Must be called in a pserialize critical section.
 */
static struct ifnet *
ip_multicast_if(struct in_addr *a, int *ifindexp)
{
        int ifindex;
        struct ifnet *ifp = NULL;
        struct in_ifaddr *ia;

        if (ifindexp)
                *ifindexp = 0;
        if (ntohl(a->s_addr) >> 24 == 0) {
                ifindex = ntohl(a->s_addr) & 0xffffff;
                ifp = if_byindex(ifindex);
                if (!ifp)
                        return NULL;
                if (ifindexp)
                        *ifindexp = ifindex;
        } else {
                IN_ADDRHASH_READER_FOREACH(ia, a->s_addr) {
                        if (in_hosteq(ia->ia_addr.sin_addr, *a) &&
                            (ia->ia_ifp->if_flags & IFF_MULTICAST) != 0) {
                                ifp = ia->ia_ifp;
                                if (if_is_deactivated(ifp))
                                        ifp = NULL;
                                break;
                        }
                }
        }
        return ifp;
}

static int
ip_getoptval(const struct sockopt *sopt, u_int8_t *val, u_int maxval)
{
        u_int tval;
        u_char cval;
        int error;

        if (sopt == NULL)
                return EINVAL;

        switch (sopt->sopt_size) {
        case sizeof(u_char):
                error = sockopt_get(sopt, &cval, sizeof(u_char));
                tval = cval;
                break;

        case sizeof(u_int):
                error = sockopt_get(sopt, &tval, sizeof(u_int));
                break;

        default:
                error = EINVAL;
        }

        if (error)
                return error;

        if (tval > maxval)
                return EINVAL;

        *val = tval;
        return 0;
}

static int
ip_get_membership(const struct sockopt *sopt, struct ifnet **ifp,
    struct psref *psref, struct in_addr *ia, bool add)
{
        int error;
        struct ip_mreq mreq;

        error = sockopt_get(sopt, &mreq, sizeof(mreq));
        if (error)
                return error;

        if (!IN_MULTICAST(mreq.imr_multiaddr.s_addr))
                return EINVAL;

        memcpy(ia, &mreq.imr_multiaddr, sizeof(*ia));

        if (in_nullhost(mreq.imr_interface)) {
                union {
                        struct sockaddr                dst;
                        struct sockaddr_in        dst4;
                } u;
                struct route ro;

                if (!add) {
                        *ifp = NULL;
                        return 0;
                }
                /*
                 * If no interface address was provided, use the interface of
                 * the route to the given multicast address.
                 */
                struct rtentry *rt;
                memset(&ro, 0, sizeof(ro));

                sockaddr_in_init(&u.dst4, ia, 0);
                error = rtcache_setdst(&ro, &u.dst);
                if (error != 0)
                        return error;
                *ifp = (rt = rtcache_init(&ro)) != NULL ? rt->rt_ifp : NULL;
                if (*ifp != NULL) {
                        if (if_is_deactivated(*ifp))
                                *ifp = NULL;
                        else
                                if_acquire(*ifp, psref);
                }
                rtcache_unref(rt, &ro);
                rtcache_free(&ro);
        } else {
                int s = pserialize_read_enter();
                *ifp = ip_multicast_if(&mreq.imr_interface, NULL);
                if (!add && *ifp == NULL) {
                        pserialize_read_exit(s);
                        return EADDRNOTAVAIL;
                }
                if (*ifp != NULL) {
                        if (if_is_deactivated(*ifp))
                                *ifp = NULL;
                        else
                                if_acquire(*ifp, psref);
                }
                pserialize_read_exit(s);
        }
        return 0;
}

/*
 * Add a multicast group membership.
 * Group must be a valid IP multicast address.
 */
static int
ip_add_membership(struct ip_moptions *imo, const struct sockopt *sopt)
{
        struct ifnet *ifp = NULL;        // XXX: gcc [ppc]
        struct in_addr ia;
        int i, error, bound;
        struct psref psref;

        /* imo is protected by solock or referenced only by the caller */

        bound = curlwp_bind();
        if (sopt->sopt_size == sizeof(struct ip_mreq))
                error = ip_get_membership(sopt, &ifp, &psref, &ia, true);
        else {
#ifdef INET6
                error = ip6_get_membership(sopt, &ifp, &psref, &ia, sizeof(ia));
#else
                error = EINVAL;
#endif
        }

        if (error)
                goto out;

        /*
         * See if we found an interface, and confirm that it
         * supports multicast.
         */
        if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
                error = EADDRNOTAVAIL;
                goto out;
        }

        /*
         * See if the membership already exists or if all the
         * membership slots are full.
         */
        for (i = 0; i < imo->imo_num_memberships; ++i) {
                if (imo->imo_membership[i]->inm_ifp == ifp &&
                    in_hosteq(imo->imo_membership[i]->inm_addr, ia))
                        break;
        }
        if (i < imo->imo_num_memberships) {
                error = EADDRINUSE;
                goto out;
        }

        if (i == IP_MAX_MEMBERSHIPS) {
                error = ETOOMANYREFS;
                goto out;
        }

        /*
         * Everything looks good; add a new record to the multicast
         * address list for the given interface.
         */
        imo->imo_membership[i] = in_addmulti(&ia, ifp);
        if (imo->imo_membership[i] == NULL) {
                error = ENOBUFS;
                goto out;
        }

        ++imo->imo_num_memberships;
        error = 0;
out:
        if_put(ifp, &psref);
        curlwp_bindx(bound);
        return error;
}

/*
 * Drop a multicast group membership.
 * Group must be a valid IP multicast address.
 */
static int
ip_drop_membership(struct ip_moptions *imo, const struct sockopt *sopt)
{
        struct in_addr ia = { .s_addr = 0 };        // XXX: gcc [ppc]
        struct ifnet *ifp = NULL;                // XXX: gcc [ppc]
        int i, error, bound;
        struct psref psref;

        /* imo is protected by solock or referenced only by the caller */

        bound = curlwp_bind();
        if (sopt->sopt_size == sizeof(struct ip_mreq))
                error = ip_get_membership(sopt, &ifp, &psref, &ia, false);
        else {
#ifdef INET6
                error = ip6_get_membership(sopt, &ifp, &psref, &ia, sizeof(ia));
#else
                error = EINVAL;
#endif
        }

        if (error)
                goto out;

        /*
         * Find the membership in the membership array.
         */
        for (i = 0; i < imo->imo_num_memberships; ++i) {
                if ((ifp == NULL ||
                     imo->imo_membership[i]->inm_ifp == ifp) &&
                    in_hosteq(imo->imo_membership[i]->inm_addr, ia))
                        break;
        }
        if (i == imo->imo_num_memberships) {
                error = EADDRNOTAVAIL;
                goto out;
        }

        /*
         * Give up the multicast address record to which the
         * membership points.
         */
        in_delmulti(imo->imo_membership[i]);

        /*
         * Remove the gap in the membership array.
         */
        for (++i; i < imo->imo_num_memberships; ++i)
                imo->imo_membership[i-1] = imo->imo_membership[i];
        --imo->imo_num_memberships;
        error = 0;
out:
        if_put(ifp, &psref);
        curlwp_bindx(bound);
        return error;
}

/*
 * Set the IP multicast options in response to user setsockopt().
 */
int
ip_setmoptions(struct ip_moptions **pimo, const struct sockopt *sopt)
{
        struct ip_moptions *imo = *pimo;
        struct in_addr addr;
        struct ifnet *ifp;
        int ifindex, error = 0;

        /* The passed imo isn't NULL, it should be protected by solock */

        if (!imo) {
                /*
                 * No multicast option buffer attached to the pcb;
                 * allocate one and initialize to default values.
                 */
                imo = kmem_intr_alloc(sizeof(*imo), KM_NOSLEEP);
                if (imo == NULL)
                        return ENOBUFS;

                imo->imo_multicast_if_index = 0;
                imo->imo_multicast_addr.s_addr = INADDR_ANY;
                imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
                imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
                imo->imo_num_memberships = 0;
                *pimo = imo;
        }

        switch (sopt->sopt_name) {
        case IP_MULTICAST_IF: {
                int s;
                /*
                 * Select the interface for outgoing multicast packets.
                 */
                error = sockopt_get(sopt, &addr, sizeof(addr));
                if (error)
                        break;

                /*
                 * INADDR_ANY is used to remove a previous selection.
                 * When no interface is selected, a default one is
                 * chosen every time a multicast packet is sent.
                 */
                if (in_nullhost(addr)) {
                        imo->imo_multicast_if_index = 0;
                        break;
                }
                /*
                 * The selected interface is identified by its local
                 * IP address.  Find the interface and confirm that
                 * it supports multicasting.
                 */
                s = pserialize_read_enter();
                ifp = ip_multicast_if(&addr, &ifindex);
                if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
                        pserialize_read_exit(s);
                        error = EADDRNOTAVAIL;
                        break;
                }
                imo->imo_multicast_if_index = ifp->if_index;
                pserialize_read_exit(s);
                if (ifindex)
                        imo->imo_multicast_addr = addr;
                else
                        imo->imo_multicast_addr.s_addr = INADDR_ANY;
                break;
            }

        case IP_MULTICAST_TTL:
                /*
                 * Set the IP time-to-live for outgoing multicast packets.
                 */
                error = ip_getoptval(sopt, &imo->imo_multicast_ttl, MAXTTL);
                break;

        case IP_MULTICAST_LOOP:
                /*
                 * Set the loopback flag for outgoing multicast packets.
                 * Must be zero or one.
                 */
                error = ip_getoptval(sopt, &imo->imo_multicast_loop, 1);
                break;

        case IP_ADD_MEMBERSHIP: /* IPV6_JOIN_GROUP */
                error = ip_add_membership(imo, sopt);
                break;

        case IP_DROP_MEMBERSHIP: /* IPV6_LEAVE_GROUP */
                error = ip_drop_membership(imo, sopt);
                break;

        default:
                error = EOPNOTSUPP;
                break;
        }

        /*
         * If all options have default values, no need to keep the mbuf.
         */
        if (imo->imo_multicast_if_index == 0 &&
            imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
            imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
            imo->imo_num_memberships == 0) {
                kmem_intr_free(imo, sizeof(*imo));
                *pimo = NULL;
        }

        return error;
}

/*
 * Return the IP multicast options in response to user getsockopt().
 */
int
ip_getmoptions(struct ip_moptions *imo, struct sockopt *sopt)
{
        struct in_addr addr;
        uint8_t optval;
        int error = 0;

        /* imo is protected by solock or referenced only by the caller */

        switch (sopt->sopt_name) {
        case IP_MULTICAST_IF:
                if (imo == NULL || imo->imo_multicast_if_index == 0)
                        addr = zeroin_addr;
                else if (imo->imo_multicast_addr.s_addr) {
                        /* return the value user has set */
                        addr = imo->imo_multicast_addr;
                } else {
                        struct ifnet *ifp;
                        struct in_ifaddr *ia = NULL;
                        int s = pserialize_read_enter();

                        ifp = if_byindex(imo->imo_multicast_if_index);
                        if (ifp != NULL) {
                                ia = in_get_ia_from_ifp(ifp);
                        }
                        addr = ia ? ia->ia_addr.sin_addr : zeroin_addr;
                        pserialize_read_exit(s);
                }
                error = sockopt_set(sopt, &addr, sizeof(addr));
                break;

        case IP_MULTICAST_TTL:
                optval = imo ? imo->imo_multicast_ttl
                    : IP_DEFAULT_MULTICAST_TTL;

                error = sockopt_set(sopt, &optval, sizeof(optval));
                break;

        case IP_MULTICAST_LOOP:
                optval = imo ? imo->imo_multicast_loop
                    : IP_DEFAULT_MULTICAST_LOOP;

                error = sockopt_set(sopt, &optval, sizeof(optval));
                break;

        default:
                error = EOPNOTSUPP;
        }

        return error;
}

/*
 * Discard the IP multicast options.
 */
void
ip_freemoptions(struct ip_moptions *imo)
{
        int i;

        /* The owner of imo (inp) should be protected by solock */

        if (imo != NULL) {
                for (i = 0; i < imo->imo_num_memberships; ++i) {
                        struct in_multi *inm = imo->imo_membership[i];
                        in_delmulti(inm);
                        /* ifp should not leave thanks to solock */
                }

                kmem_intr_free(imo, sizeof(*imo));
        }
}

/*
 * Routine called from ip_output() to loop back a copy of an IP multicast
 * packet to the input queue of a specified interface.  Note that this
 * calls the output routine of the loopback "driver", but with an interface
 * pointer that might NOT be lo0ifp -- easier than replicating that code here.
 */
static void
ip_mloopback(struct ifnet *ifp, struct mbuf *m, const struct sockaddr_in *dst)
{
        struct ip *ip;
        struct mbuf *copym;

        copym = m_copypacket(m, M_DONTWAIT);
        if (copym != NULL &&
            (copym->m_flags & M_EXT || copym->m_len < sizeof(struct ip)))
                copym = m_pullup(copym, sizeof(struct ip));
        if (copym == NULL)
                return;
        /*
         * We don't bother to fragment if the IP length is greater
         * than the interface's MTU.  Can this possibly matter?
         */
        ip = mtod(copym, struct ip *);

        if (copym->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
                in_undefer_cksum_tcpudp(copym);
                copym->m_pkthdr.csum_flags &=
                    ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
        }

        ip->ip_sum = 0;
        ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
        KERNEL_LOCK_UNLESS_NET_MPSAFE();
        (void)looutput(ifp, copym, sintocsa(dst), NULL);
        KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
}

/*
 * Ensure sending address is valid.
 * Returns 0 on success, -1 if an error should be sent back or 1
 * if the packet could be dropped without error (protocol dependent).
 */
static int
ip_ifaddrvalid(const struct in_ifaddr *ia)
{

        if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)
                return 0;

        if (ia->ia4_flags & IN_IFF_DUPLICATED)
                return -1;
        else if (ia->ia4_flags & (IN_IFF_TENTATIVE | IN_IFF_DETACHED))
                return 1;

        return 0;
}


































































    9 




    6 





    8 
    4 




    9 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
/*        $NetBSD: kern_time_60.c,v 1.3 2020/01/29 15:47:51 ad Exp $        */

/*-
 * Copyright (c) 2013, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_time_60.c,v 1.3 2020/01/29 15:47:51 ad Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lwp.h>
#include <sys/time.h>
#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>

#include <compat/common/compat_util.h>
#include <compat/common/compat_mod.h>

static const struct syscall_package compat_60_syscalls[] = {
        { SYS_compat_60__lwp_park, 0, (sy_call_t *)compat_60_sys__lwp_park },
        { 0, 0, NULL }
};

int
compat_60_sys__lwp_park(struct lwp *l,
    const struct compat_60_sys__lwp_park_args *uap, register_t *retval)
{
        /* {
                syscallarg(const struct timespec *)        ts;
                syscallarg(lwpid_t)                        unpark;
                syscallarg(const void *)                hint;
                syscallarg(const void *)                unparkhint;
        } */

        int error;
        struct timespec ts, *tsp;

        if (SCARG(uap, ts) == NULL)
                tsp = NULL;
        else {
                error = copyin(SCARG(uap, ts), &ts, sizeof(ts));
                if (error != 0)
                        return error;
                tsp = &ts;
        }

        if (SCARG(uap, unpark) != 0) {
                error = lwp_unpark(&SCARG(uap, unpark), 1);
                if (error != 0)
                        return error;
        }

        return lwp_park(CLOCK_REALTIME, TIMER_ABSTIME, tsp);
}

int
kern_time_60_init(void)
{

        return syscall_establish(NULL, compat_60_syscalls);
}

int
kern_time_60_fini(void)
{

        return syscall_disestablish(NULL, compat_60_syscalls);
}


























































































































































    2 







    2 










































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
/*        $NetBSD: rf_reconstruct.c,v 1.127 2021/07/27 03:01:48 oster Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Mark Holland
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/************************************************************
 *
 * rf_reconstruct.c -- code to perform on-line reconstruction
 *
 ************************************************************/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.127 2021/07/27 03:01:48 oster Exp $");

#include <sys/param.h>
#include <sys/time.h>
#include <sys/buf.h>
#include <sys/errno.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/namei.h> /* for pathbuf */
#include <dev/raidframe/raidframevar.h>

#include <miscfs/specfs/specdev.h> /* for v_rdev */

#include "rf_raid.h"
#include "rf_reconutil.h"
#include "rf_revent.h"
#include "rf_reconbuffer.h"
#include "rf_acctrace.h"
#include "rf_etimer.h"
#include "rf_dag.h"
#include "rf_desc.h"
#include "rf_debugprint.h"
#include "rf_general.h"
#include "rf_driver.h"
#include "rf_utils.h"
#include "rf_shutdown.h"

#include "rf_kintf.h"

/* setting these to -1 causes them to be set to their default values if not set by debug options */

#if RF_DEBUG_RECON
#define Dprintf(s)         if (rf_reconDebug) rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
#define Dprintf1(s,a)         if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
#define Dprintf2(s,a,b)       if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
#define Dprintf3(s,a,b,c)     if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL)
#define Dprintf4(s,a,b,c,d)   if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL)
#define Dprintf5(s,a,b,c,d,e) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL)
#define Dprintf6(s,a,b,c,d,e,f) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),NULL,NULL)
#define Dprintf7(s,a,b,c,d,e,f,g) if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),NULL)

#define DDprintf1(s,a)         if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
#define DDprintf2(s,a,b)       if (rf_reconDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)

#else /* RF_DEBUG_RECON */

#define Dprintf(s) {}
#define Dprintf1(s,a) {}
#define Dprintf2(s,a,b) {}
#define Dprintf3(s,a,b,c) {}
#define Dprintf4(s,a,b,c,d) {}
#define Dprintf5(s,a,b,c,d,e) {}
#define Dprintf6(s,a,b,c,d,e,f) {}
#define Dprintf7(s,a,b,c,d,e,f,g) {}

#define DDprintf1(s,a) {}
#define DDprintf2(s,a,b) {}

#endif /* RF_DEBUG_RECON */

#define RF_RECON_DONE_READS   1
#define RF_RECON_READ_ERROR   2
#define RF_RECON_WRITE_ERROR  3
#define RF_RECON_READ_STOPPED 4
#define RF_RECON_WRITE_DONE   5

#define RF_MAX_FREE_RECONBUFFER 32
#define RF_MIN_FREE_RECONBUFFER 16

static RF_RaidReconDesc_t *AllocRaidReconDesc(RF_Raid_t *, RF_RowCol_t,
                                              RF_RaidDisk_t *, int, RF_RowCol_t);
static void FreeReconDesc(RF_RaidReconDesc_t *);
static int ProcessReconEvent(RF_Raid_t *, RF_ReconEvent_t *);
static int IssueNextReadRequest(RF_Raid_t *, RF_RowCol_t);
static int TryToRead(RF_Raid_t *, RF_RowCol_t);
static int ComputePSDiskOffsets(RF_Raid_t *, RF_StripeNum_t, RF_RowCol_t,
                                RF_SectorNum_t *, RF_SectorNum_t *, RF_RowCol_t *,
                                RF_SectorNum_t *);
static int IssueNextWriteRequest(RF_Raid_t *);
static void ReconReadDoneProc(void *, int);
static void ReconWriteDoneProc(void *, int);
static void CheckForNewMinHeadSep(RF_Raid_t *, RF_HeadSepLimit_t);
static int CheckHeadSeparation(RF_Raid_t *, RF_PerDiskReconCtrl_t *,
                               RF_RowCol_t, RF_HeadSepLimit_t,
                               RF_ReconUnitNum_t);
static int CheckForcedOrBlockedReconstruction(RF_Raid_t *,
                                              RF_ReconParityStripeStatus_t *,
                                              RF_PerDiskReconCtrl_t *,
                                              RF_RowCol_t, RF_StripeNum_t,
                                              RF_ReconUnitNum_t);
static void ForceReconReadDoneProc(void *, int);
static void rf_ShutdownReconstruction(void *);

struct RF_ReconDoneProc_s {
        void    (*proc) (RF_Raid_t *, void *);
        void   *arg;
        RF_ReconDoneProc_t *next;
};

/**************************************************************************
 *
 * sets up the parameters that will be used by the reconstruction process
 * currently there are none, except for those that the layout-specific
 * configuration (e.g. rf_ConfigureDeclustered) routine sets up.
 *
 * in the kernel, we fire off the recon thread.
 *
 **************************************************************************/
static void
rf_ShutdownReconstruction(void *arg)
{
        RF_Raid_t *raidPtr;

        raidPtr = (RF_Raid_t *) arg;
        
        pool_destroy(&raidPtr->pools.reconbuffer);
}

int
rf_ConfigureReconstruction(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                           RF_Config_t *cfgPtr)
{

        rf_pool_init(raidPtr, raidPtr->poolNames.reconbuffer, &raidPtr->pools.reconbuffer, sizeof(RF_ReconBuffer_t),
                     "reconbuf", RF_MIN_FREE_RECONBUFFER, RF_MAX_FREE_RECONBUFFER);
        rf_ShutdownCreate(listp, rf_ShutdownReconstruction, raidPtr);

        return (0);
}

static RF_RaidReconDesc_t *
AllocRaidReconDesc(RF_Raid_t *raidPtr, RF_RowCol_t col,
                   RF_RaidDisk_t *spareDiskPtr, int numDisksDone,
                   RF_RowCol_t scol)
{

        RF_RaidReconDesc_t *reconDesc;

        reconDesc = RF_Malloc(sizeof(*reconDesc));
        reconDesc->raidPtr = raidPtr;
        reconDesc->col = col;
        reconDesc->spareDiskPtr = spareDiskPtr;
        reconDesc->numDisksDone = numDisksDone;
        reconDesc->scol = scol;
        reconDesc->next = NULL;

        return (reconDesc);
}

static void
FreeReconDesc(RF_RaidReconDesc_t *reconDesc)
{
#if RF_RECON_STATS > 0
        printf("raid%d: %lu recon event waits, %lu recon delays\n",
               reconDesc->raidPtr->raidid,
               (long) reconDesc->numReconEventWaits,
               (long) reconDesc->numReconExecDelays);
#endif                                /* RF_RECON_STATS > 0 */
        printf("raid%d: %lu max exec ticks\n",
               reconDesc->raidPtr->raidid,
               (long) reconDesc->maxReconExecTicks);
        RF_Free(reconDesc, sizeof(RF_RaidReconDesc_t));
}


/*****************************************************************************
 *
 * primary routine to reconstruct a failed disk.  This should be called from
 * within its own thread.  It won't return until reconstruction completes,
 * fails, or is aborted.
 *****************************************************************************/
int
rf_ReconstructFailedDisk(RF_Raid_t *raidPtr, RF_RowCol_t col)
{
        const RF_LayoutSW_t *lp;
        int     rc;

        lp = raidPtr->Layout.map;
        if (lp->SubmitReconBuffer) {
                /*
                 * The current infrastructure only supports reconstructing one
                 * disk at a time for each array.
                 */
                rf_lock_mutex2(raidPtr->mutex);
                while (raidPtr->reconInProgress) {
                        rf_wait_cond2(raidPtr->waitForReconCond, raidPtr->mutex);
                }
                raidPtr->reconInProgress++;
                rf_unlock_mutex2(raidPtr->mutex);
                rc = rf_ReconstructFailedDiskBasic(raidPtr, col);
                rf_lock_mutex2(raidPtr->mutex);
                raidPtr->reconInProgress--;
        } else {
                RF_ERRORMSG1("RECON: no way to reconstruct failed disk for arch %c\n",
                    lp->parityConfig);
                rc = EIO;
                rf_lock_mutex2(raidPtr->mutex);
        }
        rf_signal_cond2(raidPtr->waitForReconCond);
        rf_unlock_mutex2(raidPtr->mutex);
        return (rc);
}

int
rf_ReconstructFailedDiskBasic(RF_Raid_t *raidPtr, RF_RowCol_t col)
{
        RF_ComponentLabel_t *c_label;
        RF_RaidDisk_t *spareDiskPtr = NULL;
        RF_RaidReconDesc_t *reconDesc;
        RF_RowCol_t scol;
        int     numDisksDone = 0, rc;

        /* first look for a spare drive onto which to reconstruct the data */
        /* spare disk descriptors are stored in row 0.  This may have to
         * change eventually */

        rf_lock_mutex2(raidPtr->mutex);
        RF_ASSERT(raidPtr->Disks[col].status == rf_ds_failed);
#if RF_INCLUDE_PARITY_DECLUSTERING_DS > 0
        if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
                if (raidPtr->status != rf_rs_degraded) {
                        RF_ERRORMSG1("Unable to reconstruct disk at col %d because status not degraded\n", col);
                        rf_unlock_mutex2(raidPtr->mutex);
                        return (EINVAL);
                }
                scol = (-1);
        } else {
#endif
                for (scol = raidPtr->numCol; scol < raidPtr->numCol + raidPtr->numSpare; scol++) {
                        if (raidPtr->Disks[scol].status == rf_ds_spare) {
                                spareDiskPtr = &raidPtr->Disks[scol];
                                spareDiskPtr->status = rf_ds_rebuilding_spare;
                                break;
                        }
                }
                if (!spareDiskPtr) {
                        RF_ERRORMSG1("Unable to reconstruct disk at col %d because no spares are available\n", col);
                        rf_unlock_mutex2(raidPtr->mutex);
                        return (ENOSPC);
                }
                printf("RECON: initiating reconstruction on col %d -> spare at col %d\n", col, scol);
#if RF_INCLUDE_PARITY_DECLUSTERING_DS > 0
        }
#endif
        rf_unlock_mutex2(raidPtr->mutex);

        reconDesc = AllocRaidReconDesc((void *) raidPtr, col, spareDiskPtr, numDisksDone, scol);
        raidPtr->reconDesc = (void *) reconDesc;
#if RF_RECON_STATS > 0
        reconDesc->hsStallCount = 0;
        reconDesc->numReconExecDelays = 0;
        reconDesc->numReconEventWaits = 0;
#endif                                /* RF_RECON_STATS > 0 */
        reconDesc->reconExecTimerRunning = 0;
        reconDesc->reconExecTicks = 0;
        reconDesc->maxReconExecTicks = 0;
        rc = rf_ContinueReconstructFailedDisk(reconDesc);

        if (!rc) {
                /* fix up the component label */
                /* Don't actually need the read here.. */
                c_label = raidget_component_label(raidPtr, scol);

                raid_init_component_label(raidPtr, c_label);
                c_label->row = 0;
                c_label->column = col;
                c_label->clean = RF_RAID_DIRTY;
                c_label->status = rf_ds_optimal;
                rf_component_label_set_partitionsize(c_label,
                    raidPtr->Disks[scol].partitionSize);

                /* We've just done a rebuild based on all the other
                   disks, so at this point the parity is known to be
                   clean, even if it wasn't before. */

                /* XXX doesn't hold for RAID 6!!*/

                rf_lock_mutex2(raidPtr->mutex);
                /* The failed disk has already been marked as rf_ds_spared 
                   (or rf_ds_dist_spared) in
                   rf_ContinueReconstructFailedDisk() 
                   so we just update the spare disk as being a used spare
                */

                spareDiskPtr->status = rf_ds_used_spare;
                raidPtr->parity_good = RF_RAID_CLEAN;
                rf_unlock_mutex2(raidPtr->mutex);

                /* XXXX MORE NEEDED HERE */

                raidflush_component_label(raidPtr, scol);
        } else {
                /* Reconstruct failed. */

                rf_lock_mutex2(raidPtr->mutex);
                /* Failed disk goes back to "failed" status */
                raidPtr->Disks[col].status = rf_ds_failed;

                /* Spare disk goes back to "spare" status. */
                spareDiskPtr->status = rf_ds_spare;
                rf_unlock_mutex2(raidPtr->mutex);

        }
        rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
        return (rc);
}

/*

   Allow reconstructing a disk in-place -- i.e. component /dev/sd2e goes AWOL,
   and you don't get a spare until the next Monday.  With this function
   (and hot-swappable drives) you can now put your new disk containing
   /dev/sd2e on the bus, scsictl it alive, and then use raidctl(8) to
   rebuild the data "on the spot".

*/

int
rf_ReconstructInPlace(RF_Raid_t *raidPtr, RF_RowCol_t col)
{
        RF_RaidDisk_t *spareDiskPtr = NULL;
        RF_RaidReconDesc_t *reconDesc;
        const RF_LayoutSW_t *lp;
        RF_ComponentLabel_t *c_label;
        int     numDisksDone = 0, rc;
        uint64_t numsec;
        unsigned int secsize;
        struct pathbuf *pb;
        struct vnode *vp;
        int retcode;
        int ac;

        rf_lock_mutex2(raidPtr->mutex);
        lp = raidPtr->Layout.map;
        if (!lp->SubmitReconBuffer) {
                RF_ERRORMSG1("RECON: no way to reconstruct failed disk for arch %c\n",
                             lp->parityConfig);
                /* wakeup anyone who might be waiting to do a reconstruct */
                rf_signal_cond2(raidPtr->waitForReconCond);
                rf_unlock_mutex2(raidPtr->mutex);
                return(EIO);
        }

        /*
         * The current infrastructure only supports reconstructing one
         * disk at a time for each array.
         */

        if (raidPtr->Disks[col].status != rf_ds_failed) {
                /* "It's gone..." */
                raidPtr->numFailures++;
                raidPtr->Disks[col].status = rf_ds_failed;
                raidPtr->status = rf_rs_degraded;
                rf_unlock_mutex2(raidPtr->mutex);
                rf_update_component_labels(raidPtr,
                                           RF_NORMAL_COMPONENT_UPDATE);
                rf_lock_mutex2(raidPtr->mutex);
        }

        while (raidPtr->reconInProgress) {
                rf_wait_cond2(raidPtr->waitForReconCond, raidPtr->mutex);
        }

        raidPtr->reconInProgress++;

        /* first look for a spare drive onto which to reconstruct the
           data.  spare disk descriptors are stored in row 0.  This
           may have to change eventually */

        /* Actually, we don't care if it's failed or not...  On a RAID
           set with correct parity, this function should be callable
           on any component without ill effects. */
        /* RF_ASSERT(raidPtr->Disks[col].status == rf_ds_failed); */

#if RF_INCLUDE_PARITY_DECLUSTERING_DS > 0
        if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
                RF_ERRORMSG1("Unable to reconstruct to disk at col %d: operation not supported for RF_DISTRIBUTE_SPARE\n", col);

                raidPtr->reconInProgress--;
                rf_signal_cond2(raidPtr->waitForReconCond);
                rf_unlock_mutex2(raidPtr->mutex);
                return (EINVAL);
        }
#endif

        /* This device may have been opened successfully the
           first time. Close it before trying to open it again.. */

        if (raidPtr->raid_cinfo[col].ci_vp != NULL) {
#if 0
                printf("Closed the open device: %s\n",
                       raidPtr->Disks[col].devname);
#endif
                vp = raidPtr->raid_cinfo[col].ci_vp;
                ac = raidPtr->Disks[col].auto_configured;
                rf_unlock_mutex2(raidPtr->mutex);
                rf_close_component(raidPtr, vp, ac);
                rf_lock_mutex2(raidPtr->mutex);
                raidPtr->raid_cinfo[col].ci_vp = NULL;
        }
        /* note that this disk was *not* auto_configured (any longer)*/
        raidPtr->Disks[col].auto_configured = 0;

#if 0
        printf("About to (re-)open the device for rebuilding: %s\n",
               raidPtr->Disks[col].devname);
#endif
        rf_unlock_mutex2(raidPtr->mutex);
        pb = pathbuf_create(raidPtr->Disks[col].devname);
        if (pb == NULL) {
                retcode = ENOMEM;
        } else {
                retcode = vn_bdev_openpath(pb, &vp, curlwp);
                pathbuf_destroy(pb);
        }

        if (retcode) {
                printf("raid%d: rebuilding: open device: %s failed: %d!\n",raidPtr->raidid,
                       raidPtr->Disks[col].devname, retcode);

                /* the component isn't responding properly...
                   must be still dead :-( */
                rf_lock_mutex2(raidPtr->mutex);
                raidPtr->reconInProgress--;
                rf_signal_cond2(raidPtr->waitForReconCond);
                rf_unlock_mutex2(raidPtr->mutex);
                return(retcode);
        }

        /* Ok, so we can at least do a lookup...
           How about actually getting a vp for it? */

        retcode = getdisksize(vp, &numsec, &secsize);
        if (retcode) {
                vn_close(vp, FREAD | FWRITE, kauth_cred_get());
                rf_lock_mutex2(raidPtr->mutex);
                raidPtr->reconInProgress--;
                rf_signal_cond2(raidPtr->waitForReconCond);
                rf_unlock_mutex2(raidPtr->mutex);
                return(retcode);
        }
        rf_lock_mutex2(raidPtr->mutex);
        raidPtr->Disks[col].blockSize =        secsize;
        raidPtr->Disks[col].numBlocks = numsec - rf_protectedSectors;

        raidPtr->raid_cinfo[col].ci_vp = vp;
        raidPtr->raid_cinfo[col].ci_dev = vp->v_rdev;

        raidPtr->Disks[col].dev = vp->v_rdev;

        /* we allow the user to specify that only a fraction
           of the disks should be used this is just for debug:
           it speeds up * the parity scan */
        raidPtr->Disks[col].numBlocks = raidPtr->Disks[col].numBlocks *
                rf_sizePercentage / 100;
        rf_unlock_mutex2(raidPtr->mutex);

        spareDiskPtr = &raidPtr->Disks[col];
        spareDiskPtr->status = rf_ds_rebuilding_spare;

        printf("raid%d: initiating in-place reconstruction on column %d\n",
               raidPtr->raidid, col);

        reconDesc = AllocRaidReconDesc((void *) raidPtr, col, spareDiskPtr,
                                       numDisksDone, col);
        raidPtr->reconDesc = (void *) reconDesc;
#if RF_RECON_STATS > 0
        reconDesc->hsStallCount = 0;
        reconDesc->numReconExecDelays = 0;
        reconDesc->numReconEventWaits = 0;
#endif                                /* RF_RECON_STATS > 0 */
        reconDesc->reconExecTimerRunning = 0;
        reconDesc->reconExecTicks = 0;
        reconDesc->maxReconExecTicks = 0;
        rc = rf_ContinueReconstructFailedDisk(reconDesc);

        if (!rc) {
                rf_lock_mutex2(raidPtr->mutex);
                /* Need to set these here, as at this point it'll be claiming
                   that the disk is in rf_ds_spared!  But we know better :-) */

                raidPtr->Disks[col].status = rf_ds_optimal;
                raidPtr->status = rf_rs_optimal;
                rf_unlock_mutex2(raidPtr->mutex);

                /* fix up the component label */
                /* Don't actually need the read here.. */
                c_label = raidget_component_label(raidPtr, col);

                rf_lock_mutex2(raidPtr->mutex);
                raid_init_component_label(raidPtr, c_label);

                c_label->row = 0;
                c_label->column = col;

                /* We've just done a rebuild based on all the other
                   disks, so at this point the parity is known to be
                   clean, even if it wasn't before. */

                /* XXX doesn't hold for RAID 6!!*/

                raidPtr->parity_good = RF_RAID_CLEAN;
                rf_unlock_mutex2(raidPtr->mutex);

                raidflush_component_label(raidPtr, col);
        } else {
                /* Reconstruct-in-place failed.  Disk goes back to
                   "failed" status, regardless of what it was before.  */
                rf_lock_mutex2(raidPtr->mutex);
                raidPtr->Disks[col].status = rf_ds_failed;
                rf_unlock_mutex2(raidPtr->mutex);
        }

        rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);

        rf_lock_mutex2(raidPtr->mutex);
        raidPtr->reconInProgress--;
        rf_signal_cond2(raidPtr->waitForReconCond);
        rf_unlock_mutex2(raidPtr->mutex);

        return (rc);
}


int
rf_ContinueReconstructFailedDisk(RF_RaidReconDesc_t *reconDesc)
{
        RF_Raid_t *raidPtr = reconDesc->raidPtr;
        RF_RowCol_t col = reconDesc->col;
        RF_RowCol_t scol = reconDesc->scol;
        RF_ReconMap_t *mapPtr;
        RF_ReconCtrl_t *tmp_reconctrl;
        RF_ReconEvent_t *event;
        RF_StripeCount_t incPSID,lastPSID,num_writes,pending_writes,prev;
#if RF_INCLUDE_RAID5_RS > 0
        RF_StripeCount_t startPSID,endPSID,aPSID,bPSID,offPSID;
#endif
        RF_ReconUnitCount_t RUsPerPU;
        struct timeval etime, elpsd;
        unsigned long xor_s, xor_resid_us;
        int     i, ds;
        int status, done;
        int recon_error, write_error;

        raidPtr->accumXorTimeUs = 0;
#if RF_ACC_TRACE > 0
        /* create one trace record per physical disk */
        raidPtr->recon_tracerecs =
            RF_Malloc(raidPtr->numCol * sizeof(*raidPtr->recon_tracerecs));
#endif

        /* quiesce the array prior to starting recon.  this is needed
         * to assure no nasty interactions with pending user writes.
         * We need to do this before we change the disk or row status. */

        Dprintf("RECON: begin request suspend\n");
        rf_SuspendNewRequestsAndWait(raidPtr);
        Dprintf("RECON: end request suspend\n");

        /* allocate our RF_ReconCTRL_t before we protect raidPtr->reconControl[row] */
        tmp_reconctrl = rf_MakeReconControl(reconDesc, col, scol);

        rf_lock_mutex2(raidPtr->mutex);

        /* create the reconstruction control pointer and install it in
         * the right slot */
        raidPtr->reconControl = tmp_reconctrl;
        mapPtr = raidPtr->reconControl->reconMap;
        raidPtr->reconControl->numRUsTotal = mapPtr->totalRUs;
        raidPtr->reconControl->numRUsComplete =        0;
        raidPtr->status = rf_rs_reconstructing;
        raidPtr->Disks[col].status = rf_ds_reconstructing;
        raidPtr->Disks[col].spareCol = scol;

        rf_unlock_mutex2(raidPtr->mutex);

        RF_GETTIME(raidPtr->reconControl->starttime);

        Dprintf("RECON: resume requests\n");
        rf_ResumeNewRequests(raidPtr);


        mapPtr = raidPtr->reconControl->reconMap;

        incPSID = RF_RECONMAP_SIZE;
        lastPSID = raidPtr->Layout.numStripe / raidPtr->Layout.SUsPerPU - 1;
        RUsPerPU = raidPtr->Layout.SUsPerPU / raidPtr->Layout.SUsPerRU;
        recon_error = 0;
        write_error = 0;
        pending_writes = incPSID;
        raidPtr->reconControl->lastPSID = incPSID - 1;

        /* bounds check raidPtr->reconControl->lastPSID and
           pending_writes so that we don't attempt to wait for more IO
           than can possibly happen */

        if (raidPtr->reconControl->lastPSID > lastPSID)
                raidPtr->reconControl->lastPSID = lastPSID;

        if (pending_writes > lastPSID)
                pending_writes = lastPSID + 1;

        /* start the actual reconstruction */

        done = 0;
        while (!done) {
                
                if (raidPtr->waitShutdown) {
                        /* someone is unconfiguring this array... bail on the reconstruct.. */
                        recon_error = 1;
                        break;
                }

                num_writes = 0;

#if RF_INCLUDE_RAID5_RS > 0
                /* For RAID5 with Rotated Spares we will be 'short'
                   some number of writes since no writes will get
                   issued for stripes where the spare is on the
                   component being rebuilt.  Account for the shortage
                   here so that we don't hang indefinitely below
                   waiting for writes to complete that were never
                   scheduled.

                   XXX: Should be fixed for PARITY_DECLUSTERING and
                   others too! 

                */

                if (raidPtr->Layout.numDataCol < 
                    raidPtr->numCol - raidPtr->Layout.numParityCol) {
                        /* numDataCol is at least 2 less than numCol, so
                           should be RAID 5 with Rotated Spares */

                        /* XXX need to update for RAID 6 */
                        
                        startPSID = raidPtr->reconControl->lastPSID - pending_writes + 1;
                        endPSID = raidPtr->reconControl->lastPSID;
                        
                        offPSID = raidPtr->numCol - col - 1;
                        
                        aPSID = startPSID - startPSID % raidPtr->numCol + offPSID;
                        if (aPSID < startPSID) {
                                aPSID += raidPtr->numCol;
                        }
                        
                        bPSID = endPSID - ((endPSID - offPSID) % raidPtr->numCol);
                        
                        if (aPSID < endPSID) {
                                num_writes = ((bPSID - aPSID) / raidPtr->numCol) + 1;
                        }
                        
                        if ((aPSID == endPSID) && (bPSID == endPSID)) {
                                num_writes++;
                        }
                }
#endif
                
                /* issue a read for each surviving disk */
                
                reconDesc->numDisksDone = 0;
                for (i = 0; i < raidPtr->numCol; i++) {
                        if (i != col) {
                                /* find and issue the next I/O on the
                                 * indicated disk */
                                if (IssueNextReadRequest(raidPtr, i)) {
                                        Dprintf1("RECON: done issuing for c%d\n", i);
                                        reconDesc->numDisksDone++;
                                }
                        }
                }

                /* process reconstruction events until all disks report that
                 * they've completed all work */

                while (reconDesc->numDisksDone < raidPtr->numCol - 1) {

                        event = rf_GetNextReconEvent(reconDesc);
                        status = ProcessReconEvent(raidPtr, event);
                        
                        /* the normal case is that a read completes, and all is well. */
                        if (status == RF_RECON_DONE_READS) {
                                reconDesc->numDisksDone++;
                        } else if ((status == RF_RECON_READ_ERROR) ||
                                   (status == RF_RECON_WRITE_ERROR)) {
                                /* an error was encountered while reconstructing...
                                   Pretend we've finished this disk.
                                */
                                recon_error = 1;
                                raidPtr->reconControl->error = 1;
                                
                                /* bump the numDisksDone count for reads,
                                   but not for writes */
                                if (status == RF_RECON_READ_ERROR)
                                        reconDesc->numDisksDone++;
                                
                                /* write errors are special -- when we are
                                   done dealing with the reads that are
                                   finished, we don't want to wait for any
                                   writes */
                                if (status == RF_RECON_WRITE_ERROR) {
                                        write_error = 1;
                                        num_writes++;
                                }
                                
                        } else if (status == RF_RECON_READ_STOPPED) {
                                /* count this component as being "done" */
                                reconDesc->numDisksDone++;
                        } else if (status == RF_RECON_WRITE_DONE) {
                                num_writes++;
                        } 
                        
                        if (recon_error) {
                                /* make sure any stragglers are woken up so that
                                   their theads will complete, and we can get out
                                   of here with all IO processed */

                                rf_WakeupHeadSepCBWaiters(raidPtr);
                        }

                        raidPtr->reconControl->numRUsTotal =
                                mapPtr->totalRUs;
                        raidPtr->reconControl->numRUsComplete =
                                mapPtr->totalRUs -
                                rf_UnitsLeftToReconstruct(mapPtr);

#if RF_DEBUG_RECON
                        raidPtr->reconControl->percentComplete =
                                (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
                        if (rf_prReconSched) {
                                rf_PrintReconSchedule(raidPtr->reconControl->reconMap, &(raidPtr->reconControl->starttime));
                        }
#endif
                }

                /* reads done, wakeup any waiters, and then wait for writes */

                rf_WakeupHeadSepCBWaiters(raidPtr);

                while (!recon_error && (num_writes < pending_writes)) {
                        event = rf_GetNextReconEvent(reconDesc);
                        status = ProcessReconEvent(raidPtr, event);
                        
                        if (status == RF_RECON_WRITE_ERROR) {
                                num_writes++;
                                recon_error = 1;
                                raidPtr->reconControl->error = 1;
                                /* an error was encountered at the very end... bail */
                        } else if (status == RF_RECON_WRITE_DONE) {
                                num_writes++;
                        } /* else it's something else, and we don't care */
                }
                if (recon_error || 
                    (raidPtr->reconControl->lastPSID == lastPSID)) {
                        done = 1;
                        break;
                }

                prev = raidPtr->reconControl->lastPSID;
                raidPtr->reconControl->lastPSID += incPSID;

                if (raidPtr->reconControl->lastPSID > lastPSID) {
                        pending_writes = lastPSID - prev;
                        raidPtr->reconControl->lastPSID = lastPSID;
                }
                /* back down curPSID to get ready for the next round... */
                for (i = 0; i < raidPtr->numCol; i++) {
                        if (i != col) {
                                raidPtr->reconControl->perDiskInfo[i].curPSID--;
                                raidPtr->reconControl->perDiskInfo[i].ru_count = RUsPerPU - 1;
                        }
                }
        }

        mapPtr = raidPtr->reconControl->reconMap;
        if (rf_reconDebug) {
                printf("RECON: all reads completed\n");
        }
        /* at this point all the reads have completed.  We now wait
         * for any pending writes to complete, and then we're done */

        while (!recon_error && rf_UnitsLeftToReconstruct(raidPtr->reconControl->reconMap) > 0) {

                event = rf_GetNextReconEvent(reconDesc);
                status = ProcessReconEvent(raidPtr, event);

                if (status == RF_RECON_WRITE_ERROR) {
                        recon_error = 1;
                        raidPtr->reconControl->error = 1;
                        /* an error was encountered at the very end... bail */
                } else {
#if RF_DEBUG_RECON
                        raidPtr->reconControl->percentComplete = 100 - (rf_UnitsLeftToReconstruct(mapPtr) * 100 / mapPtr->totalRUs);
                        if (rf_prReconSched) {
                                rf_PrintReconSchedule(raidPtr->reconControl->reconMap, &(raidPtr->reconControl->starttime));
                        }
#endif
                }
        }

        if (recon_error) {
                /* we've encountered an error in reconstructing. */
                printf("raid%d: reconstruction failed.\n", raidPtr->raidid);

                /* we start by blocking IO to the RAID set. */
                rf_SuspendNewRequestsAndWait(raidPtr);

                rf_lock_mutex2(raidPtr->mutex);
                /* mark set as being degraded, rather than
                   rf_rs_reconstructing as we were before the problem.
                   After this is done we can update status of the
                   component disks without worrying about someone
                   trying to read from a failed component.
                */
                raidPtr->status = rf_rs_degraded;
                rf_unlock_mutex2(raidPtr->mutex);

                /* resume IO */
                rf_ResumeNewRequests(raidPtr);

                /* At this point there are two cases:
                   1) If we've experienced a read error, then we've
                   already waited for all the reads we're going to get,
                   and we just need to wait for the writes.

                   2) If we've experienced a write error, we've also
                   already waited for all the reads to complete,
                   but there is little point in waiting for the writes --
                   when they do complete, they will just be ignored.

                   So we just wait for writes to complete if we didn't have a
                   write error.
                */

                if (!write_error) {
                        /* wait for writes to complete */
                        while (raidPtr->reconControl->pending_writes > 0) {

                                event = rf_GetNextReconEvent(reconDesc);
                                status = ProcessReconEvent(raidPtr, event);

                                if (status == RF_RECON_WRITE_ERROR) {
                                        raidPtr->reconControl->error = 1;
                                        /* an error was encountered at the very end... bail.
                                           This will be very bad news for the user, since
                                           at this point there will have been a read error
                                           on one component, and a write error on another!
                                        */
                                        break;
                                }
                        }
                }


                /* cleanup */

                /* drain the event queue - after waiting for the writes above,
                   there shouldn't be much (if anything!) left in the queue. */

                rf_DrainReconEventQueue(reconDesc);

                /* XXX  As much as we'd like to free the recon control structure
                   and the reconDesc, we have no way of knowing if/when those will
                   be touched by IO that has yet to occur.  It is rather poor to be
                   basically causing a 'memory leak' here, but there doesn't seem to be
                   a cleaner alternative at this time.  Perhaps when the reconstruct code
                   gets a makeover this problem will go away.
                */
#if 0
                rf_FreeReconControl(raidPtr);
#endif

#if RF_ACC_TRACE > 0
                RF_Free(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t));
#endif
                /* XXX see comment above */
#if 0
                FreeReconDesc(reconDesc);
#endif

                return (1);
        }

        /* Success:  mark the dead disk as reconstructed.  We quiesce
         * the array here to assure no nasty interactions with pending
         * user accesses when we free up the psstatus structure as
         * part of FreeReconControl() */

        rf_SuspendNewRequestsAndWait(raidPtr);

        rf_lock_mutex2(raidPtr->mutex);
        raidPtr->numFailures--;
        ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE);
        raidPtr->Disks[col].status = (ds) ? rf_ds_dist_spared : rf_ds_spared;
        raidPtr->status = (ds) ? rf_rs_reconfigured : rf_rs_optimal;
        rf_unlock_mutex2(raidPtr->mutex);
        RF_GETTIME(etime);
        RF_TIMEVAL_DIFF(&(raidPtr->reconControl->starttime), &etime, &elpsd);

        rf_ResumeNewRequests(raidPtr);

        printf("raid%d: Reconstruction of disk at col %d completed\n",
               raidPtr->raidid, col);
        xor_s = raidPtr->accumXorTimeUs / 1000000;
        xor_resid_us = raidPtr->accumXorTimeUs % 1000000;
        printf("raid%d: Recon time was %d.%06d seconds, accumulated XOR time was %ld us (%ld.%06ld)\n",
               raidPtr->raidid,
               (int) elpsd.tv_sec, (int) elpsd.tv_usec,
               raidPtr->accumXorTimeUs, xor_s, xor_resid_us);
        printf("raid%d:  (start time %d sec %d usec, end time %d sec %d usec)\n",
               raidPtr->raidid,
               (int) raidPtr->reconControl->starttime.tv_sec,
               (int) raidPtr->reconControl->starttime.tv_usec,
               (int) etime.tv_sec, (int) etime.tv_usec);
#if RF_RECON_STATS > 0
        printf("raid%d: Total head-sep stall count was %d\n",
               raidPtr->raidid, (int) reconDesc->hsStallCount);
#endif                                /* RF_RECON_STATS > 0 */
        rf_FreeReconControl(raidPtr);
#if RF_ACC_TRACE > 0
        RF_Free(raidPtr->recon_tracerecs, raidPtr->numCol * sizeof(RF_AccTraceEntry_t));
#endif
        FreeReconDesc(reconDesc);

        return (0);

}
/*****************************************************************************
 * do the right thing upon each reconstruction event.
 *****************************************************************************/
static int
ProcessReconEvent(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
{
        int     retcode = 0, submitblocked;
        RF_ReconBuffer_t *rbuf;
        RF_SectorCount_t sectorsPerRU;

        retcode = RF_RECON_READ_STOPPED;

        Dprintf1("RECON: ProcessReconEvent type %d\n", event->type);

        switch (event->type) {

                /* a read I/O has completed */
        case RF_REVENT_READDONE:
                rbuf = raidPtr->reconControl->perDiskInfo[event->col].rbuf;
                Dprintf2("RECON: READDONE EVENT: col %d psid %ld\n",
                    event->col, rbuf->parityStripeID);
                Dprintf7("RECON: done read  psid %ld buf %lx  %02x %02x %02x %02x %02x\n",
                    rbuf->parityStripeID, rbuf->buffer, rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff,
                    rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff);
                rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
                if (!raidPtr->reconControl->error) {
                        submitblocked = rf_SubmitReconBuffer(rbuf, 0, 0);
                        Dprintf1("RECON: submitblocked=%d\n", submitblocked);
                        if (!submitblocked)
                                retcode = IssueNextReadRequest(raidPtr, event->col);
                        else
                                retcode = 0;
                }
                break;

                /* a write I/O has completed */
        case RF_REVENT_WRITEDONE:
#if RF_DEBUG_RECON
                if (rf_floatingRbufDebug) {
                        rf_CheckFloatingRbufCount(raidPtr, 1);
                }
#endif
                sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU;
                rbuf = (RF_ReconBuffer_t *) event->arg;
                rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
                Dprintf3("RECON: WRITEDONE EVENT: psid %d ru %d (%d %% complete)\n",
                    rbuf->parityStripeID, rbuf->which_ru, raidPtr->reconControl->percentComplete);
                rf_ReconMapUpdate(raidPtr, raidPtr->reconControl->reconMap,
                    rbuf->failedDiskSectorOffset, rbuf->failedDiskSectorOffset + sectorsPerRU - 1);
                rf_RemoveFromActiveReconTable(raidPtr, rbuf->parityStripeID, rbuf->which_ru);

                rf_lock_mutex2(raidPtr->reconControl->rb_mutex);
                raidPtr->reconControl->pending_writes--;
                rf_unlock_mutex2(raidPtr->reconControl->rb_mutex);

                if (rbuf->type == RF_RBUF_TYPE_FLOATING) {
                        rf_lock_mutex2(raidPtr->reconControl->rb_mutex);
                        while(raidPtr->reconControl->rb_lock) {
                                rf_wait_cond2(raidPtr->reconControl->rb_cv,
                                              raidPtr->reconControl->rb_mutex);
                        }
                        raidPtr->reconControl->rb_lock = 1;
                        rf_unlock_mutex2(raidPtr->reconControl->rb_mutex);

                        raidPtr->numFullReconBuffers--;
                        rf_ReleaseFloatingReconBuffer(raidPtr, rbuf);

                        rf_lock_mutex2(raidPtr->reconControl->rb_mutex);
                        raidPtr->reconControl->rb_lock = 0;
                        rf_broadcast_cond2(raidPtr->reconControl->rb_cv);
                        rf_unlock_mutex2(raidPtr->reconControl->rb_mutex);
                } else
                        if (rbuf->type == RF_RBUF_TYPE_FORCED)
                                rf_FreeReconBuffer(rbuf);
                        else
                                RF_ASSERT(0);
                retcode = RF_RECON_WRITE_DONE;
                break;

        case RF_REVENT_BUFCLEAR:        /* A buffer-stall condition has been
                                         * cleared */
                Dprintf1("RECON: BUFCLEAR EVENT: col %d\n", event->col);
                if (!raidPtr->reconControl->error) {
                        submitblocked = rf_SubmitReconBuffer(raidPtr->reconControl->perDiskInfo[event->col].rbuf,
                                                             0, (int) (long) event->arg);
                        RF_ASSERT(!submitblocked);        /* we wouldn't have gotten the
                                                         * BUFCLEAR event if we
                                                         * couldn't submit */
                        retcode = IssueNextReadRequest(raidPtr, event->col);
                }
                break;

        case RF_REVENT_BLOCKCLEAR:        /* A user-write reconstruction
                                         * blockage has been cleared */
                DDprintf1("RECON: BLOCKCLEAR EVENT: col %d\n", event->col);
                if (!raidPtr->reconControl->error) {
                        retcode = TryToRead(raidPtr, event->col);
                }
                break;

        case RF_REVENT_HEADSEPCLEAR:        /* A max-head-separation
                                         * reconstruction blockage has been
                                         * cleared */
                Dprintf1("RECON: HEADSEPCLEAR EVENT: col %d\n", event->col);
                if (!raidPtr->reconControl->error) {
                        retcode = TryToRead(raidPtr, event->col);
                }
                break;

                /* a buffer has become ready to write */
        case RF_REVENT_BUFREADY:
                Dprintf1("RECON: BUFREADY EVENT: col %d\n", event->col);
                if (!raidPtr->reconControl->error) {
                        retcode = IssueNextWriteRequest(raidPtr);
#if RF_DEBUG_RECON
                        if (rf_floatingRbufDebug) {
                                rf_CheckFloatingRbufCount(raidPtr, 1);
                        }
#endif
                }
                break;

                /* we need to skip the current RU entirely because it got
                 * recon'd while we were waiting for something else to happen */
        case RF_REVENT_SKIP:
                DDprintf1("RECON: SKIP EVENT: col %d\n", event->col);
                if (!raidPtr->reconControl->error) {
                        retcode = IssueNextReadRequest(raidPtr, event->col);
                }
                break;

                /* a forced-reconstruction read access has completed.  Just
                 * submit the buffer */
        case RF_REVENT_FORCEDREADDONE:
                rbuf = (RF_ReconBuffer_t *) event->arg;
                rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
                DDprintf1("RECON: FORCEDREADDONE EVENT: col %d\n", event->col);
                if (!raidPtr->reconControl->error) {
                        submitblocked = rf_SubmitReconBuffer(rbuf, 1, 0);
                        RF_ASSERT(!submitblocked);
                        retcode = 0;
                }
                break;

                /* A read I/O failed to complete */
        case RF_REVENT_READ_FAILED:
                retcode = RF_RECON_READ_ERROR;
                break;

                /* A write I/O failed to complete */
        case RF_REVENT_WRITE_FAILED:
                retcode = RF_RECON_WRITE_ERROR;

                /* This is an error, but it was a pending write.
                   Account for it. */
                rf_lock_mutex2(raidPtr->reconControl->rb_mutex);
                raidPtr->reconControl->pending_writes--;
                rf_unlock_mutex2(raidPtr->reconControl->rb_mutex);

                rbuf = (RF_ReconBuffer_t *) event->arg;

                /* cleanup the disk queue data */
                rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);

                /* At this point we're erroring out, badly, and floatingRbufs
                   may not even be valid.  Rather than putting this back onto
                   the floatingRbufs list, just arrange for its immediate
                   destruction.
                */
                rf_FreeReconBuffer(rbuf);
                break;

                /* a forced read I/O failed to complete */
        case RF_REVENT_FORCEDREAD_FAILED:
                retcode = RF_RECON_READ_ERROR;
                break;

        default:
                RF_PANIC();
        }
        rf_FreeReconEventDesc(raidPtr, event);
        return (retcode);
}
/*****************************************************************************
 *
 * find the next thing that's needed on the indicated disk, and issue
 * a read request for it.  We assume that the reconstruction buffer
 * associated with this process is free to receive the data.  If
 * reconstruction is blocked on the indicated RU, we issue a
 * blockage-release request instead of a physical disk read request.
 * If the current disk gets too far ahead of the others, we issue a
 * head-separation wait request and return.
 *
 * ctrl->{ru_count, curPSID, diskOffset} and
 * rbuf->failedDiskSectorOffset are maintained to point to the unit
 * we're currently accessing.  Note that this deviates from the
 * standard C idiom of having counters point to the next thing to be
 * accessed.  This allows us to easily retry when we're blocked by
 * head separation or reconstruction-blockage events.
 *
 *****************************************************************************/
static int
IssueNextReadRequest(RF_Raid_t *raidPtr, RF_RowCol_t col)
{
        RF_PerDiskReconCtrl_t *ctrl = &raidPtr->reconControl->perDiskInfo[col];
        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
        RF_ReconBuffer_t *rbuf = ctrl->rbuf;
        RF_ReconUnitCount_t RUsPerPU = layoutPtr->SUsPerPU / layoutPtr->SUsPerRU;
        RF_SectorCount_t sectorsPerRU = layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU;
        int     do_new_check = 0, retcode = 0, status;

        /* if we are currently the slowest disk, mark that we have to do a new
         * check */
        if (ctrl->headSepCounter <= raidPtr->reconControl->minHeadSepCounter)
                do_new_check = 1;

        while (1) {

                ctrl->ru_count++;
                if (ctrl->ru_count < RUsPerPU) {
                        ctrl->diskOffset += sectorsPerRU;
                        rbuf->failedDiskSectorOffset += sectorsPerRU;
                } else {
                        ctrl->curPSID++;
                        ctrl->ru_count = 0;
                        /* code left over from when head-sep was based on
                         * parity stripe id */
                        if (ctrl->curPSID > raidPtr->reconControl->lastPSID) {
                                CheckForNewMinHeadSep(raidPtr, ++(ctrl->headSepCounter));
                                return (RF_RECON_DONE_READS);        /* finito! */
                        }
                        /* find the disk offsets of the start of the parity
                         * stripe on both the current disk and the failed
                         * disk. skip this entire parity stripe if either disk
                         * does not appear in the indicated PS */
                        status = ComputePSDiskOffsets(raidPtr, ctrl->curPSID, col, &ctrl->diskOffset, &rbuf->failedDiskSectorOffset,
                            &rbuf->spCol, &rbuf->spOffset);
                        if (status) {
                                ctrl->ru_count = RUsPerPU - 1;
                                continue;
                        }
                }
                rbuf->which_ru = ctrl->ru_count;

                /* skip this RU if it's already been reconstructed */
                if (rf_CheckRUReconstructed(raidPtr->reconControl->reconMap, rbuf->failedDiskSectorOffset)) {
                        Dprintf2("Skipping psid %ld ru %d: already reconstructed\n", ctrl->curPSID, ctrl->ru_count);
                        continue;
                }
                break;
        }
        ctrl->headSepCounter++;
        if (do_new_check)
                CheckForNewMinHeadSep(raidPtr, ctrl->headSepCounter);        /* update min if needed */


        /* at this point, we have definitely decided what to do, and we have
         * only to see if we can actually do it now */
        rbuf->parityStripeID = ctrl->curPSID;
        rbuf->which_ru = ctrl->ru_count;
#if RF_ACC_TRACE > 0
        memset(&raidPtr->recon_tracerecs[col], 0,
            sizeof(raidPtr->recon_tracerecs[col]));
        raidPtr->recon_tracerecs[col].reconacc = 1;
        RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer);
#endif
        retcode = TryToRead(raidPtr, col);
        return (retcode);
}

/*
 * tries to issue the next read on the indicated disk.  We may be
 * blocked by (a) the heads being too far apart, or (b) recon on the
 * indicated RU being blocked due to a write by a user thread.  In
 * this case, we issue a head-sep or blockage wait request, which will
 * cause this same routine to be invoked again later when the blockage
 * has cleared.
 */

static int
TryToRead(RF_Raid_t *raidPtr, RF_RowCol_t col)
{
        RF_PerDiskReconCtrl_t *ctrl = &raidPtr->reconControl->perDiskInfo[col];
        RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU;
        RF_StripeNum_t psid = ctrl->curPSID;
        RF_ReconUnitNum_t which_ru = ctrl->ru_count;
        RF_DiskQueueData_t *req;
        int     status;
        RF_ReconParityStripeStatus_t *pssPtr, *newpssPtr;

        /* if the current disk is too far ahead of the others, issue a
         * head-separation wait and return */
        if (CheckHeadSeparation(raidPtr, ctrl, col, ctrl->headSepCounter, which_ru))
                return (0);

        /* allocate a new PSS in case we need it */
        newpssPtr = rf_AllocPSStatus(raidPtr);

        RF_LOCK_PSS_MUTEX(raidPtr, psid);
        pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl->pssTable, psid, which_ru, RF_PSS_CREATE, newpssPtr);

        if (pssPtr != newpssPtr) {
                rf_FreePSStatus(raidPtr, newpssPtr);
        }

        /* if recon is blocked on the indicated parity stripe, issue a
         * block-wait request and return. this also must mark the indicated RU
         * in the stripe as under reconstruction if not blocked. */
        status = CheckForcedOrBlockedReconstruction(raidPtr, pssPtr, ctrl, col, psid, which_ru);
        if (status == RF_PSS_RECON_BLOCKED) {
                Dprintf2("RECON: Stalling psid %ld ru %d: recon blocked\n", psid, which_ru);
                goto out;
        } else
                if (status == RF_PSS_FORCED_ON_WRITE) {
                        rf_CauseReconEvent(raidPtr, col, NULL, RF_REVENT_SKIP);
                        goto out;
                }
        /* make one last check to be sure that the indicated RU didn't get
         * reconstructed while we were waiting for something else to happen.
         * This is unfortunate in that it causes us to make this check twice
         * in the normal case.  Might want to make some attempt to re-work
         * this so that we only do this check if we've definitely blocked on
         * one of the above checks.  When this condition is detected, we may
         * have just created a bogus status entry, which we need to delete. */
        if (rf_CheckRUReconstructed(raidPtr->reconControl->reconMap, ctrl->rbuf->failedDiskSectorOffset)) {
                Dprintf2("RECON: Skipping psid %ld ru %d: prior recon after stall\n", psid, which_ru);
                if (pssPtr == newpssPtr)
                        rf_PSStatusDelete(raidPtr, raidPtr->reconControl->pssTable, pssPtr);
                rf_CauseReconEvent(raidPtr, col, NULL, RF_REVENT_SKIP);
                goto out;
        }
        /* found something to read.  issue the I/O */
        Dprintf4("RECON: Read for psid %ld on col %d offset %ld buf %lx\n",
            psid, col, ctrl->diskOffset, ctrl->rbuf->buffer);
#if RF_ACC_TRACE > 0
        RF_ETIMER_STOP(raidPtr->recon_tracerecs[col].recon_timer);
        RF_ETIMER_EVAL(raidPtr->recon_tracerecs[col].recon_timer);
        raidPtr->recon_tracerecs[col].specific.recon.recon_start_to_fetch_us =
            RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[col].recon_timer);
        RF_ETIMER_START(raidPtr->recon_tracerecs[col].recon_timer);
#endif
        /* should be ok to use a NULL proc pointer here, all the bufs we use
         * should be in kernel space */
        req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, ctrl->diskOffset, sectorsPerRU, ctrl->rbuf->buffer, psid, which_ru,
            ReconReadDoneProc, (void *) ctrl,
#if RF_ACC_TRACE > 0
                                     &raidPtr->recon_tracerecs[col],
#else
                                     NULL,
#endif
                                     (void *) raidPtr, 0, NULL);

        ctrl->rbuf->arg = (void *) req;
        rf_DiskIOEnqueue(&raidPtr->Queues[col], req, RF_IO_RECON_PRIORITY);
        pssPtr->issued[col] = 1;

out:
        RF_UNLOCK_PSS_MUTEX(raidPtr, psid);
        return (0);
}


/*
 * given a parity stripe ID, we want to find out whether both the
 * current disk and the failed disk exist in that parity stripe.  If
 * not, we want to skip this whole PS.  If so, we want to find the
 * disk offset of the start of the PS on both the current disk and the
 * failed disk.
 *
 * this works by getting a list of disks comprising the indicated
 * parity stripe, and searching the list for the current and failed
 * disks.  Once we've decided they both exist in the parity stripe, we
 * need to decide whether each is data or parity, so that we'll know
 * which mapping function to call to get the corresponding disk
 * offsets.
 *
 * this is kind of unpleasant, but doing it this way allows the
 * reconstruction code to use parity stripe IDs rather than physical
 * disks address to march through the failed disk, which greatly
 * simplifies a lot of code, as well as eliminating the need for a
 * reverse-mapping function.  I also think it will execute faster,
 * since the calls to the mapping module are kept to a minimum.
 *
 * ASSUMES THAT THE STRIPE IDENTIFIER IDENTIFIES THE DISKS COMPRISING
 * THE STRIPE IN THE CORRECT ORDER
 *
 * raidPtr          - raid descriptor
 * psid             - parity stripe identifier
 * col              - column of disk to find the offsets for
 * spCol            - out: col of spare unit for failed unit
 * spOffset         - out: offset into disk containing spare unit
 *
 */


static int
ComputePSDiskOffsets(RF_Raid_t *raidPtr, RF_StripeNum_t psid,
                     RF_RowCol_t col, RF_SectorNum_t *outDiskOffset,
                     RF_SectorNum_t *outFailedDiskSectorOffset,
                     RF_RowCol_t *spCol, RF_SectorNum_t *spOffset)
{
        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
        RF_RowCol_t fcol = raidPtr->reconControl->fcol;
        RF_RaidAddr_t sosRaidAddress;        /* start-of-stripe */
        RF_RowCol_t *diskids;
        u_int   i, j, k, i_offset, j_offset;
        RF_RowCol_t pcol;
        int     testcol;
        RF_SectorNum_t poffset;
        char    i_is_parity = 0, j_is_parity = 0;
        RF_RowCol_t stripeWidth = layoutPtr->numDataCol + layoutPtr->numParityCol;

        /* get a listing of the disks comprising that stripe */
        sosRaidAddress = rf_ParityStripeIDToRaidAddress(layoutPtr, psid);
        (layoutPtr->map->IdentifyStripe) (raidPtr, sosRaidAddress, &diskids);
        RF_ASSERT(diskids);

        /* reject this entire parity stripe if it does not contain the
         * indicated disk or it does not contain the failed disk */

        for (i = 0; i < stripeWidth; i++) {
                if (col == diskids[i])
                        break;
        }
        if (i == stripeWidth)
                goto skipit;
        for (j = 0; j < stripeWidth; j++) {
                if (fcol == diskids[j])
                        break;
        }
        if (j == stripeWidth) {
                goto skipit;
        }
        /* find out which disk the parity is on */
        (layoutPtr->map->MapParity) (raidPtr, sosRaidAddress, &pcol, &poffset, RF_DONT_REMAP);

        /* find out if either the current RU or the failed RU is parity */
        /* also, if the parity occurs in this stripe prior to the data and/or
         * failed col, we need to decrement i and/or j */
        for (k = 0; k < stripeWidth; k++)
                if (diskids[k] == pcol)
                        break;
        RF_ASSERT(k < stripeWidth);
        i_offset = i;
        j_offset = j;
        if (k < i)
                i_offset--;
        else
                if (k == i) {
                        i_is_parity = 1;
                        i_offset = 0;
                }                /* set offsets to zero to disable multiply
                                 * below */
        if (k < j)
                j_offset--;
        else
                if (k == j) {
                        j_is_parity = 1;
                        j_offset = 0;
                }
        /* at this point, [ij]_is_parity tells us whether the [current,failed]
         * disk is parity at the start of this RU, and, if data, "[ij]_offset"
         * tells us how far into the stripe the [current,failed] disk is. */

        /* call the mapping routine to get the offset into the current disk,
         * repeat for failed disk. */
        if (i_is_parity)
                layoutPtr->map->MapParity(raidPtr, sosRaidAddress + i_offset * layoutPtr->sectorsPerStripeUnit, &testcol, outDiskOffset, RF_DONT_REMAP);
        else
                layoutPtr->map->MapSector(raidPtr, sosRaidAddress + i_offset * layoutPtr->sectorsPerStripeUnit, &testcol, outDiskOffset, RF_DONT_REMAP);

        RF_ASSERT(col == testcol);

        if (j_is_parity)
                layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, &testcol, outFailedDiskSectorOffset, RF_DONT_REMAP);
        else
                layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, &testcol, outFailedDiskSectorOffset, RF_DONT_REMAP);
        RF_ASSERT(fcol == testcol);

        /* now locate the spare unit for the failed unit */
#if RF_INCLUDE_PARITY_DECLUSTERING_DS > 0
        if (layoutPtr->map->flags & RF_DISTRIBUTE_SPARE) {
                if (j_is_parity)
                        layoutPtr->map->MapParity(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, spCol, spOffset, RF_REMAP);
                else
                        layoutPtr->map->MapSector(raidPtr, sosRaidAddress + j_offset * layoutPtr->sectorsPerStripeUnit, spCol, spOffset, RF_REMAP);
        } else {
#endif
                *spCol = raidPtr->reconControl->spareCol;
                *spOffset = *outFailedDiskSectorOffset;
#if RF_INCLUDE_PARITY_DECLUSTERING_DS > 0
        }
#endif
        return (0);

skipit:
        Dprintf2("RECON: Skipping psid %ld: nothing needed from c%d\n",
            psid, col);
        return (1);
}
/* this is called when a buffer has become ready to write to the replacement disk */
static int
IssueNextWriteRequest(RF_Raid_t *raidPtr)
{
        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
        RF_SectorCount_t sectorsPerRU = layoutPtr->sectorsPerStripeUnit * layoutPtr->SUsPerRU;
#if RF_ACC_TRACE > 0
        RF_RowCol_t fcol = raidPtr->reconControl->fcol;
#endif
        RF_ReconBuffer_t *rbuf;
        RF_DiskQueueData_t *req;

        rbuf = rf_GetFullReconBuffer(raidPtr->reconControl);
        RF_ASSERT(rbuf);        /* there must be one available, or we wouldn't
                                 * have gotten the event that sent us here */
        RF_ASSERT(rbuf->pssPtr);

        rbuf->pssPtr->writeRbuf = rbuf;
        rbuf->pssPtr = NULL;

        Dprintf6("RECON: New write (c %d offs %d) for psid %ld ru %d (failed disk offset %ld) buf %lx\n",
            rbuf->spCol, rbuf->spOffset, rbuf->parityStripeID,
            rbuf->which_ru, rbuf->failedDiskSectorOffset, rbuf->buffer);
        Dprintf6("RECON: new write psid %ld   %02x %02x %02x %02x %02x\n",
            rbuf->parityStripeID, rbuf->buffer[0] & 0xff, rbuf->buffer[1] & 0xff,
            rbuf->buffer[2] & 0xff, rbuf->buffer[3] & 0xff, rbuf->buffer[4] & 0xff);

        /* should be ok to use a NULL b_proc here b/c all addrs should be in
         * kernel space */
        req = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, rbuf->spOffset,
            sectorsPerRU, rbuf->buffer,
            rbuf->parityStripeID, rbuf->which_ru,
            ReconWriteDoneProc, (void *) rbuf,
#if RF_ACC_TRACE > 0
            &raidPtr->recon_tracerecs[fcol],
#else
                                     NULL,
#endif
            (void *) raidPtr, 0, NULL);

        rbuf->arg = (void *) req;
        rf_lock_mutex2(raidPtr->reconControl->rb_mutex);
        raidPtr->reconControl->pending_writes++;
        rf_unlock_mutex2(raidPtr->reconControl->rb_mutex);
        rf_DiskIOEnqueue(&raidPtr->Queues[rbuf->spCol], req, RF_IO_RECON_PRIORITY);

        return (0);
}

/*
 * this gets called upon the completion of a reconstruction read
 * operation the arg is a pointer to the per-disk reconstruction
 * control structure for the process that just finished a read.
 *
 * called at interrupt context in the kernel, so don't do anything
 * illegal here.
 */
static void
ReconReadDoneProc(void *arg, int status)
{
        RF_PerDiskReconCtrl_t *ctrl = (RF_PerDiskReconCtrl_t *) arg;
        RF_Raid_t *raidPtr;

        /* Detect that reconCtrl is no longer valid, and if that
           is the case, bail without calling rf_CauseReconEvent().
           There won't be anyone listening for this event anyway */

        if (ctrl->reconCtrl == NULL)
                return;

        raidPtr = ctrl->reconCtrl->reconDesc->raidPtr;

        if (status) {
                printf("raid%d: Recon read failed: %d\n", raidPtr->raidid, status);
                rf_CauseReconEvent(raidPtr, ctrl->col, NULL, RF_REVENT_READ_FAILED);
                return;
        }
#if RF_ACC_TRACE > 0
        RF_ETIMER_STOP(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
        RF_ETIMER_EVAL(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
        raidPtr->recon_tracerecs[ctrl->col].specific.recon.recon_fetch_to_return_us =
            RF_ETIMER_VAL_US(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
        RF_ETIMER_START(raidPtr->recon_tracerecs[ctrl->col].recon_timer);
#endif
        rf_CauseReconEvent(raidPtr, ctrl->col, NULL, RF_REVENT_READDONE);
        return;
}
/* this gets called upon the completion of a reconstruction write operation.
 * the arg is a pointer to the rbuf that was just written
 *
 * called at interrupt context in the kernel, so don't do anything illegal here.
 */
static void
ReconWriteDoneProc(void *arg, int status)
{
        RF_ReconBuffer_t *rbuf = (RF_ReconBuffer_t *) arg;

        /* Detect that reconControl is no longer valid, and if that
           is the case, bail without calling rf_CauseReconEvent().
           There won't be anyone listening for this event anyway */

        if (rbuf->raidPtr->reconControl == NULL)
                return;

        Dprintf2("Reconstruction completed on psid %ld ru %d\n", rbuf->parityStripeID, rbuf->which_ru);
        if (status) {
                printf("raid%d: Recon write failed (status %d(0x%x))!\n", rbuf->raidPtr->raidid,status,status);
                rf_CauseReconEvent(rbuf->raidPtr, rbuf->col, arg, RF_REVENT_WRITE_FAILED);
                return;
        }
        rf_CauseReconEvent(rbuf->raidPtr, rbuf->col, arg, RF_REVENT_WRITEDONE);
}


/*
 * computes a new minimum head sep, and wakes up anyone who needs to
 * be woken as a result
 */
static void
CheckForNewMinHeadSep(RF_Raid_t *raidPtr, RF_HeadSepLimit_t hsCtr)
{
        RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl;
        RF_HeadSepLimit_t new_min;
        RF_RowCol_t i;
        RF_CallbackValueDesc_t *p;
        RF_ASSERT(hsCtr >= reconCtrlPtr->minHeadSepCounter);        /* from the definition
                                                                 * of a minimum */


        rf_lock_mutex2(reconCtrlPtr->rb_mutex);
        while(reconCtrlPtr->rb_lock) {
                rf_wait_cond2(reconCtrlPtr->rb_cv, reconCtrlPtr->rb_mutex);
        }
        reconCtrlPtr->rb_lock = 1;
        rf_unlock_mutex2(reconCtrlPtr->rb_mutex);

        new_min = ~(1L << (8 * sizeof(long) - 1));        /* 0x7FFF....FFF */
        for (i = 0; i < raidPtr->numCol; i++)
                if (i != reconCtrlPtr->fcol) {
                        if (reconCtrlPtr->perDiskInfo[i].headSepCounter < new_min)
                                new_min = reconCtrlPtr->perDiskInfo[i].headSepCounter;
                }
        /* set the new minimum and wake up anyone who can now run again */
        if (new_min != reconCtrlPtr->minHeadSepCounter) {
                reconCtrlPtr->minHeadSepCounter = new_min;
                Dprintf1("RECON:  new min head pos counter val is %ld\n", new_min);
                while (reconCtrlPtr->headSepCBList) {
                        if (reconCtrlPtr->headSepCBList->v > new_min)
                                break;
                        p = reconCtrlPtr->headSepCBList;
                        reconCtrlPtr->headSepCBList = p->next;
                        p->next = NULL;
                        rf_CauseReconEvent(raidPtr, p->col, NULL, RF_REVENT_HEADSEPCLEAR);
                        rf_FreeCallbackValueDesc(raidPtr, p);
                }

        }
        rf_lock_mutex2(reconCtrlPtr->rb_mutex);
        reconCtrlPtr->rb_lock = 0;
        rf_broadcast_cond2(reconCtrlPtr->rb_cv);
        rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
}

/*
 * checks to see that the maximum head separation will not be violated
 * if we initiate a reconstruction I/O on the indicated disk.
 * Limiting the maximum head separation between two disks eliminates
 * the nasty buffer-stall conditions that occur when one disk races
 * ahead of the others and consumes all of the floating recon buffers.
 * This code is complex and unpleasant but it's necessary to avoid
 * some very nasty, albeit fairly rare, reconstruction behavior.
 *
 * returns non-zero if and only if we have to stop working on the
 * indicated disk due to a head-separation delay.
 */
static int
CheckHeadSeparation(RF_Raid_t *raidPtr, RF_PerDiskReconCtrl_t *ctrl,
                    RF_RowCol_t col, RF_HeadSepLimit_t hsCtr,
                    RF_ReconUnitNum_t which_ru)
{
        RF_ReconCtrl_t *reconCtrlPtr = raidPtr->reconControl;
        RF_CallbackValueDesc_t *cb, *p, *pt;
        int     retval = 0;

        /* if we're too far ahead of the slowest disk, stop working on this
         * disk until the slower ones catch up.  We do this by scheduling a
         * wakeup callback for the time when the slowest disk has caught up.
         * We define "caught up" with 20% hysteresis, i.e. the head separation
         * must have fallen to at most 80% of the max allowable head
         * separation before we'll wake up.
         *
         */
        rf_lock_mutex2(reconCtrlPtr->rb_mutex);
        while(reconCtrlPtr->rb_lock) {
                rf_wait_cond2(reconCtrlPtr->rb_cv, reconCtrlPtr->rb_mutex);
        }
        reconCtrlPtr->rb_lock = 1;
        rf_unlock_mutex2(reconCtrlPtr->rb_mutex);
        if ((raidPtr->headSepLimit >= 0) &&
            ((ctrl->headSepCounter - reconCtrlPtr->minHeadSepCounter) > raidPtr->headSepLimit)) {
                Dprintf5("raid%d: RECON: head sep stall: col %d hsCtr %ld minHSCtr %ld limit %ld\n",
                         raidPtr->raidid, col, ctrl->headSepCounter,
                         reconCtrlPtr->minHeadSepCounter,
                         raidPtr->headSepLimit);
                cb = rf_AllocCallbackValueDesc(raidPtr);
                /* the minHeadSepCounter value we have to get to before we'll
                 * wake up.  build in 20% hysteresis. */
                cb->v = (ctrl->headSepCounter - raidPtr->headSepLimit + raidPtr->headSepLimit / 5);
                cb->col = col;
                cb->next = NULL;

                /* insert this callback descriptor into the sorted list of
                 * pending head-sep callbacks */
                p = reconCtrlPtr->headSepCBList;
                if (!p)
                        reconCtrlPtr->headSepCBList = cb;
                else
                        if (cb->v < p->v) {
                                cb->next = reconCtrlPtr->headSepCBList;
                                reconCtrlPtr->headSepCBList = cb;
                        } else {
                                for (pt = p, p = p->next; p && (p->v < cb->v); pt = p, p = p->next);
                                cb->next = p;
                                pt->next = cb;
                        }
                retval = 1;
#if RF_RECON_STATS > 0
                ctrl->reconCtrl->reconDesc->hsStallCount++;
#endif                                /* RF_RECON_STATS > 0 */
        }
        rf_lock_mutex2(reconCtrlPtr->rb_mutex);
        reconCtrlPtr->rb_lock = 0;
        rf_broadcast_cond2(reconCtrlPtr->rb_cv);
        rf_unlock_mutex2(reconCtrlPtr->rb_mutex);

        return (retval);
}
/*
 * checks to see if reconstruction has been either forced or blocked
 * by a user operation.  if forced, we skip this RU entirely.  else if
 * blocked, put ourselves on the wait list.  else return 0.
 *
 * ASSUMES THE PSS MUTEX IS LOCKED UPON ENTRY
 */
static int
CheckForcedOrBlockedReconstruction(RF_Raid_t *raidPtr,
                                   RF_ReconParityStripeStatus_t *pssPtr,
                                   RF_PerDiskReconCtrl_t *ctrl,
                                   RF_RowCol_t col,
                                   RF_StripeNum_t psid,
                                   RF_ReconUnitNum_t which_ru)
{
        RF_CallbackValueDesc_t *cb;
        int     retcode = 0;

        if ((pssPtr->flags & RF_PSS_FORCED_ON_READ) || (pssPtr->flags & RF_PSS_FORCED_ON_WRITE))
                retcode = RF_PSS_FORCED_ON_WRITE;
        else
                if (pssPtr->flags & RF_PSS_RECON_BLOCKED) {
                        Dprintf3("RECON: col %d blocked at psid %ld ru %d\n", col, psid, which_ru);
                        cb = rf_AllocCallbackValueDesc(raidPtr);        /* append ourselves to
                                                                         * the blockage-wait
                                                                         * list */
                        cb->col = col;
                        cb->next = pssPtr->blockWaitList;
                        pssPtr->blockWaitList = cb;
                        retcode = RF_PSS_RECON_BLOCKED;
                }
        if (!retcode)
                pssPtr->flags |= RF_PSS_UNDER_RECON;        /* mark this RU as under
                                                         * reconstruction */

        return (retcode);
}
/*
 * if reconstruction is currently ongoing for the indicated stripeID,
 * reconstruction is forced to completion and we return non-zero to
 * indicate that the caller must wait.  If not, then reconstruction is
 * blocked on the indicated stripe and the routine returns zero.  If
 * and only if we return non-zero, we'll cause the cbFunc to get
 * invoked with the cbArg when the reconstruction has completed.
 */
int
rf_ForceOrBlockRecon(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
                     void (*cbFunc)(void *), void *cbArg)
{
        RF_StripeNum_t stripeID = asmap->stripeID;        /* the stripe ID we're
                                                         * forcing recon on */
        RF_SectorCount_t sectorsPerRU = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.SUsPerRU;        /* num sects in one RU */
        RF_ReconParityStripeStatus_t *pssPtr, *newpssPtr;        /* a pointer to the parity
                                                 * stripe status structure */
        RF_StripeNum_t psid;        /* parity stripe id */
        RF_SectorNum_t offset, fd_offset;        /* disk offset, failed-disk
                                                 * offset */
        RF_RowCol_t *diskids;
        RF_ReconUnitNum_t which_ru;        /* RU within parity stripe */
        RF_RowCol_t fcol, diskno, i;
        RF_ReconBuffer_t *new_rbuf;        /* ptr to newly allocated rbufs */
        RF_DiskQueueData_t *req;/* disk I/O req to be enqueued */
        RF_CallbackFuncDesc_t *cb;
        int     nPromoted;

        psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID, &which_ru);

        /* allocate a new PSS in case we need it */
        newpssPtr = rf_AllocPSStatus(raidPtr);

        RF_LOCK_PSS_MUTEX(raidPtr, psid);

        pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl->pssTable, psid, which_ru, RF_PSS_CREATE | RF_PSS_RECON_BLOCKED, newpssPtr);

        if (pssPtr != newpssPtr) {
                rf_FreePSStatus(raidPtr, newpssPtr);
        }

        /* if recon is not ongoing on this PS, just return */
        if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) {
                RF_UNLOCK_PSS_MUTEX(raidPtr, psid);
                return (0);
        }
        /* otherwise, we have to wait for reconstruction to complete on this
         * RU. */
        /* In order to avoid waiting for a potentially large number of
         * low-priority accesses to complete, we force a normal-priority (i.e.
         * not low-priority) reconstruction on this RU. */
        if (!(pssPtr->flags & RF_PSS_FORCED_ON_WRITE) && !(pssPtr->flags & RF_PSS_FORCED_ON_READ)) {
                DDprintf1("Forcing recon on psid %ld\n", psid);
                pssPtr->flags |= RF_PSS_FORCED_ON_WRITE;        /* mark this RU as under
                                                                 * forced recon */
                pssPtr->flags &= ~RF_PSS_RECON_BLOCKED;        /* clear the blockage
                                                         * that we just set */
                fcol = raidPtr->reconControl->fcol;

                /* get a listing of the disks comprising the indicated stripe */
                (raidPtr->Layout.map->IdentifyStripe) (raidPtr, asmap->raidAddress, &diskids);

                /* For previously issued reads, elevate them to normal
                 * priority.  If the I/O has already completed, it won't be
                 * found in the queue, and hence this will be a no-op. For
                 * unissued reads, allocate buffers and issue new reads.  The
                 * fact that we've set the FORCED bit means that the regular
                 * recon procs will not re-issue these reqs */
                for (i = 0; i < raidPtr->Layout.numDataCol + raidPtr->Layout.numParityCol; i++)
                        if ((diskno = diskids[i]) != fcol) {
                                if (pssPtr->issued[diskno]) {
                                        nPromoted = rf_DiskIOPromote(&raidPtr->Queues[diskno], psid, which_ru);
                                        if (rf_reconDebug && nPromoted)
                                                printf("raid%d: promoted read from col %d\n", raidPtr->raidid, diskno);
                                } else {
                                        new_rbuf = rf_MakeReconBuffer(raidPtr, diskno, RF_RBUF_TYPE_FORCED);        /* create new buf */
                                        ComputePSDiskOffsets(raidPtr, psid, diskno, &offset, &fd_offset,
                                            &new_rbuf->spCol, &new_rbuf->spOffset);        /* find offsets & spare
                                                                                                         * location */
                                        new_rbuf->parityStripeID = psid;        /* fill in the buffer */
                                        new_rbuf->which_ru = which_ru;
                                        new_rbuf->failedDiskSectorOffset = fd_offset;
                                        new_rbuf->priority = RF_IO_NORMAL_PRIORITY;

                                        /* use NULL b_proc b/c all addrs
                                         * should be in kernel space */
                                        req = rf_CreateDiskQueueData(RF_IO_TYPE_READ, offset + which_ru * sectorsPerRU, sectorsPerRU, new_rbuf->buffer,
                                            psid, which_ru,
                                            ForceReconReadDoneProc,
                                            (void *) new_rbuf,
                                            NULL, (void *) raidPtr, 0, NULL);

                                        new_rbuf->arg = req;
                                        rf_DiskIOEnqueue(&raidPtr->Queues[diskno], req, RF_IO_NORMAL_PRIORITY);        /* enqueue the I/O */
                                        Dprintf2("raid%d: Issued new read req on col %d\n", raidPtr->raidid, diskno);
                                }
                        }
                /* if the write is sitting in the disk queue, elevate its
                 * priority */
                if (rf_DiskIOPromote(&raidPtr->Queues[fcol], psid, which_ru))
                        if (rf_reconDebug)
                                printf("raid%d: promoted write to col %d\n",
                                       raidPtr->raidid, fcol);
        }
        /* install a callback descriptor to be invoked when recon completes on
         * this parity stripe. */
        cb = rf_AllocCallbackFuncDesc(raidPtr);
        cb->callbackFunc = cbFunc;
        cb->callbackArg = cbArg;
        cb->next = pssPtr->procWaitList;
        pssPtr->procWaitList = cb;
        DDprintf2("raid%d: Waiting for forced recon on psid %ld\n",
                  raidPtr->raidid, psid);

        RF_UNLOCK_PSS_MUTEX(raidPtr, psid);
        return (1);
}
/* called upon the completion of a forced reconstruction read.
 * all we do is schedule the FORCEDREADONE event.
 * called at interrupt context in the kernel, so don't do anything illegal here.
 */
static void
ForceReconReadDoneProc(void *arg, int status)
{
        RF_ReconBuffer_t *rbuf = arg;

        /* Detect that reconControl is no longer valid, and if that
           is the case, bail without calling rf_CauseReconEvent().
           There won't be anyone listening for this event anyway */

        if (rbuf->raidPtr->reconControl == NULL)
                return;

        if (status) {
                printf("raid%d: Forced recon read failed!\n", rbuf->raidPtr->raidid);
                rf_CauseReconEvent(rbuf->raidPtr, rbuf->col, (void *) rbuf, RF_REVENT_FORCEDREAD_FAILED);
                return;
        }
        rf_CauseReconEvent(rbuf->raidPtr, rbuf->col, (void *) rbuf, RF_REVENT_FORCEDREADDONE);
}
/* releases a block on the reconstruction of the indicated stripe */
int
rf_UnblockRecon(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap)
{
        RF_StripeNum_t stripeID = asmap->stripeID;
        RF_ReconParityStripeStatus_t *pssPtr;
        RF_ReconUnitNum_t which_ru;
        RF_StripeNum_t psid;
        RF_CallbackValueDesc_t *cb;

        psid = rf_MapStripeIDToParityStripeID(&raidPtr->Layout, stripeID, &which_ru);
        RF_LOCK_PSS_MUTEX(raidPtr, psid);
        pssPtr = rf_LookupRUStatus(raidPtr, raidPtr->reconControl->pssTable, psid, which_ru, RF_PSS_NONE, NULL);

        /* When recon is forced, the pss desc can get deleted before we get
         * back to unblock recon. But, this can _only_ happen when recon is
         * forced. It would be good to put some kind of sanity check here, but
         * how to decide if recon was just forced or not? */
        if (!pssPtr) {
                /* printf("Warning: no pss descriptor upon unblock on psid %ld
                 * RU %d\n",psid,which_ru); */
#if (RF_DEBUG_RECON > 0) || (RF_DEBUG_PSS > 0)
                if (rf_reconDebug || rf_pssDebug)
                        printf("Warning: no pss descriptor upon unblock on psid %ld RU %d\n", (long) psid, which_ru);
#endif
                goto out;
        }
        pssPtr->blockCount--;
        Dprintf3("raid%d: unblocking recon on psid %ld: blockcount is %d\n",
                 raidPtr->raidid, psid, pssPtr->blockCount);
        if (pssPtr->blockCount == 0) {        /* if recon blockage has been released */

                /* unblock recon before calling CauseReconEvent in case
                 * CauseReconEvent causes us to try to issue a new read before
                 * returning here. */
                pssPtr->flags &= ~RF_PSS_RECON_BLOCKED;


                while (pssPtr->blockWaitList) {
                        /* spin through the block-wait list and
                           release all the waiters */
                        cb = pssPtr->blockWaitList;
                        pssPtr->blockWaitList = cb->next;
                        cb->next = NULL;
                        rf_CauseReconEvent(raidPtr, cb->col, NULL, RF_REVENT_BLOCKCLEAR);
                        rf_FreeCallbackValueDesc(raidPtr, cb);
                }
                if (!(pssPtr->flags & RF_PSS_UNDER_RECON)) {
                        /* if no recon was requested while recon was blocked */
                        rf_PSStatusDelete(raidPtr, raidPtr->reconControl->pssTable, pssPtr);
                }
        }
out:
        RF_UNLOCK_PSS_MUTEX(raidPtr, psid);
        return (0);
}

void
rf_WakeupHeadSepCBWaiters(RF_Raid_t *raidPtr)
{
        RF_CallbackValueDesc_t *p;

        rf_lock_mutex2(raidPtr->reconControl->rb_mutex);
        while(raidPtr->reconControl->rb_lock) {
                rf_wait_cond2(raidPtr->reconControl->rb_cv,
                              raidPtr->reconControl->rb_mutex);
        }
        
        raidPtr->reconControl->rb_lock = 1;
        rf_unlock_mutex2(raidPtr->reconControl->rb_mutex);
        
        while (raidPtr->reconControl->headSepCBList) {
                p = raidPtr->reconControl->headSepCBList;
                raidPtr->reconControl->headSepCBList = p->next;
                p->next = NULL;
                rf_CauseReconEvent(raidPtr, p->col, NULL, RF_REVENT_HEADSEPCLEAR);
                rf_FreeCallbackValueDesc(raidPtr, p);
        }
        rf_lock_mutex2(raidPtr->reconControl->rb_mutex);
        raidPtr->reconControl->rb_lock = 0;
        rf_broadcast_cond2(raidPtr->reconControl->rb_cv);
        rf_unlock_mutex2(raidPtr->reconControl->rb_mutex);
        
}



















































































































































































    7 



    7 


    8 


















    3 



    3 


    3 

























   47 

   47 
    1 
   47 

   47 



    1 















    2 


















   13 
   13 


   13 


   13 
   13 


















  115 
  115 

  115 




  115 

    1 
  115 















   58 









  967 
    2 
























 1817 






   41 


 1814 
 1814 

  440 
  423 

  440 


 1814 









  296 

  267 

  296 


 1808 





 1804 




 1793 
 1558 



 1790 















 1808 














 2013 










 3006 
 3577 
 3005 
































 2072 
























 2075 

 2068 

 2061 
 2063 









  122 













 2069 





















 2085 



 2083 

 2082 



 2082 


 2078 
 2077 


































 2079 
 1789 
 1790 
 1790 









    4 
    4 
    4 




 1789 




 2071 



 2067 




















 2073 
















 2073 




 1766 





  916 



 2054 





 1763 








 2058 



 2055 




 2040 











 2057 





 2057 







 2053 






 2054 






 2052 






 2035 
 2036 
 2035 












 2036 
 2038 

  181 


 2022 

 2035 



















 2035 
 2038 
 2034 



   91 





 2050 


 2049 










  130 




  129 
  130 
  130 
  130 

  130 





    7 
    5 
    6 


    3 
    3 




   19 


  117 








  122 
   19 









  122 










  122 















































































































   63 

   62 



   10 
   10 



   57 







   57 









  806 

  804 



  283 
  283 




  646 

    3 
    3 





  645 








   48 















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
/*        $NetBSD: kern_synch.c,v 1.351 2022/06/29 22:27:01 riastradh Exp $        */

/*-
 * Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008, 2009, 2019, 2020
 *    The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
 * Daniel Sieger.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1982, 1986, 1990, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_synch.c        8.9 (Berkeley) 5/19/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.351 2022/06/29 22:27:01 riastradh Exp $");

#include "opt_kstack.h"
#include "opt_dtrace.h"

#define        __MUTEX_PRIVATE

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/cpu.h>
#include <sys/pserialize.h>
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
#include <sys/sched.h>
#include <sys/syscall_stats.h>
#include <sys/sleepq.h>
#include <sys/lockdebug.h>
#include <sys/evcnt.h>
#include <sys/intr.h>
#include <sys/lwpctl.h>
#include <sys/atomic.h>
#include <sys/syslog.h>

#include <uvm/uvm_extern.h>

#include <dev/lockstat.h>

#include <sys/dtrace_bsd.h>
int                             dtrace_vtime_active=0;
dtrace_vtime_switch_func_t      dtrace_vtime_switch_func;

static void        sched_unsleep(struct lwp *, bool);
static void        sched_changepri(struct lwp *, pri_t);
static void        sched_lendpri(struct lwp *, pri_t);

syncobj_t sleep_syncobj = {
        .sobj_flag        = SOBJ_SLEEPQ_SORTED,
        .sobj_unsleep        = sleepq_unsleep,
        .sobj_changepri        = sleepq_changepri,
        .sobj_lendpri        = sleepq_lendpri,
        .sobj_owner        = syncobj_noowner,
};

syncobj_t sched_syncobj = {
        .sobj_flag        = SOBJ_SLEEPQ_SORTED,
        .sobj_unsleep        = sched_unsleep,
        .sobj_changepri        = sched_changepri,
        .sobj_lendpri        = sched_lendpri,
        .sobj_owner        = syncobj_noowner,
};

syncobj_t kpause_syncobj = {
        .sobj_flag        = SOBJ_SLEEPQ_NULL,
        .sobj_unsleep        = sleepq_unsleep,
        .sobj_changepri        = sleepq_changepri,
        .sobj_lendpri        = sleepq_lendpri,
        .sobj_owner        = syncobj_noowner,
};

/* "Lightning bolt": once a second sleep address. */
kcondvar_t                lbolt                        __cacheline_aligned;

u_int                        sched_pstats_ticks        __cacheline_aligned;

/* Preemption event counters. */
static struct evcnt        kpreempt_ev_crit        __cacheline_aligned;
static struct evcnt        kpreempt_ev_klock        __cacheline_aligned;
static struct evcnt        kpreempt_ev_immed        __cacheline_aligned;

void
synch_init(void)
{

        cv_init(&lbolt, "lbolt");

        evcnt_attach_dynamic(&kpreempt_ev_crit, EVCNT_TYPE_MISC, NULL,
           "kpreempt", "defer: critical section");
        evcnt_attach_dynamic(&kpreempt_ev_klock, EVCNT_TYPE_MISC, NULL,
           "kpreempt", "defer: kernel_lock");
        evcnt_attach_dynamic(&kpreempt_ev_immed, EVCNT_TYPE_MISC, NULL,
           "kpreempt", "immediate");
}

/*
 * OBSOLETE INTERFACE
 *
 * General sleep call.  Suspends the current LWP until a wakeup is
 * performed on the specified identifier.  The LWP will then be made
 * runnable with the specified priority.  Sleeps at most timo/hz seconds (0
 * means no timeout).  If pri includes PCATCH flag, signals are checked
 * before and after sleeping, else signals are not checked.  Returns 0 if
 * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
 * signal needs to be delivered, ERESTART is returned if the current system
 * call should be restarted if possible, and EINTR is returned if the system
 * call should be interrupted by the signal (return EINTR).
 */
int
tsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo)
{
        struct lwp *l = curlwp;
        sleepq_t *sq;
        kmutex_t *mp;
        bool catch_p;

        KASSERT((l->l_pflag & LP_INTR) == 0);
        KASSERT(ident != &lbolt);

        if (sleepq_dontsleep(l)) {
                (void)sleepq_abort(NULL, 0);
                return 0;
        }

        l->l_kpriority = true;
        catch_p = priority & PCATCH;
        sq = sleeptab_lookup(&sleeptab, ident, &mp);
        sleepq_enter(sq, l, mp);
        sleepq_enqueue(sq, ident, wmesg, &sleep_syncobj, catch_p);
        return sleepq_block(timo, catch_p, &sleep_syncobj);
}

int
mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
        kmutex_t *mtx)
{
        struct lwp *l = curlwp;
        sleepq_t *sq;
        kmutex_t *mp;
        bool catch_p;
        int error;

        KASSERT((l->l_pflag & LP_INTR) == 0);
        KASSERT(ident != &lbolt);

        if (sleepq_dontsleep(l)) {
                (void)sleepq_abort(mtx, (priority & PNORELOCK) != 0);
                return 0;
        }

        l->l_kpriority = true;
        catch_p = priority & PCATCH;
        sq = sleeptab_lookup(&sleeptab, ident, &mp);
        sleepq_enter(sq, l, mp);
        sleepq_enqueue(sq, ident, wmesg, &sleep_syncobj, catch_p);
        mutex_exit(mtx);
        error = sleepq_block(timo, catch_p, &sleep_syncobj);

        if ((priority & PNORELOCK) == 0)
                mutex_enter(mtx);

        return error;
}

/*
 * General sleep call for situations where a wake-up is not expected.
 */
int
kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
{
        struct lwp *l = curlwp;
        int error;

        KASSERT(!(timo == 0 && intr == false));

        if (sleepq_dontsleep(l))
                return sleepq_abort(NULL, 0);

        if (mtx != NULL)
                mutex_exit(mtx);
        l->l_kpriority = true;
        lwp_lock(l);
        KERNEL_UNLOCK_ALL(NULL, &l->l_biglocks);
        sleepq_enqueue(NULL, l, wmesg, &kpause_syncobj, intr);
        error = sleepq_block(timo, intr, &kpause_syncobj);
        if (mtx != NULL)
                mutex_enter(mtx);

        return error;
}

/*
 * OBSOLETE INTERFACE
 *
 * Make all LWPs sleeping on the specified identifier runnable.
 */
void
wakeup(wchan_t ident)
{
        sleepq_t *sq;
        kmutex_t *mp;

        if (__predict_false(cold))
                return;

        sq = sleeptab_lookup(&sleeptab, ident, &mp);
        sleepq_wake(sq, ident, (u_int)-1, mp);
}

/*
 * General yield call.  Puts the current LWP back on its run queue and
 * performs a context switch.
 */
void
yield(void)
{
        struct lwp *l = curlwp;

        KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
        lwp_lock(l);

        KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock));
        KASSERT(l->l_stat == LSONPROC);

        /* Voluntary - ditch kpriority boost. */
        l->l_kpriority = false;
        spc_lock(l->l_cpu);
        mi_switch(l);
        KERNEL_LOCK(l->l_biglocks, l);
}

/*
 * General preemption call.  Puts the current LWP back on its run queue
 * and performs an involuntary context switch.  Different from yield()
 * in that:
 *
 * - It's counted differently (involuntary vs. voluntary).
 * - Realtime threads go to the head of their runqueue vs. tail for yield().
 * - Priority boost is retained unless LWP has exceeded timeslice.
 */
void
preempt(void)
{
        struct lwp *l = curlwp;

        KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
        lwp_lock(l);

        KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock));
        KASSERT(l->l_stat == LSONPROC);

        spc_lock(l->l_cpu);
        /* Involuntary - keep kpriority boost unless a CPU hog. */
        if ((l->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) != 0) {
                l->l_kpriority = false;
        }
        l->l_pflag |= LP_PREEMPTING;
        mi_switch(l);
        KERNEL_LOCK(l->l_biglocks, l);
}

/*
 * Return true if the current LWP should yield the processor.  Intended to
 * be used by long-running code in kernel.
 */
inline bool
preempt_needed(void)
{
        lwp_t *l = curlwp;
        int needed;

        KPREEMPT_DISABLE(l);
        needed = l->l_cpu->ci_want_resched;
        KPREEMPT_ENABLE(l);

        return (needed != 0);
}

/*
 * A breathing point for long running code in kernel.
 */
void
preempt_point(void)
{

        if (__predict_false(preempt_needed())) {
                preempt();
        }
}

/*
 * Handle a request made by another agent to preempt the current LWP
 * in-kernel.  Usually called when l_dopreempt may be non-zero.
 *
 * Character addresses for lockstat only.
 */
static char        kpreempt_is_disabled;
static char        kernel_lock_held;
static char        is_softint_lwp;
static char        spl_is_raised;

bool
kpreempt(uintptr_t where)
{
        uintptr_t failed;
        lwp_t *l;
        int s, dop, lsflag;

        l = curlwp;
        failed = 0;
        while ((dop = l->l_dopreempt) != 0) {
                if (l->l_stat != LSONPROC) {
                        /*
                         * About to block (or die), let it happen.
                         * Doesn't really count as "preemption has
                         * been blocked", since we're going to
                         * context switch.
                         */
                        atomic_swap_uint(&l->l_dopreempt, 0);
                        return true;
                }
                KASSERT((l->l_flag & LW_IDLE) == 0);
                if (__predict_false(l->l_nopreempt != 0)) {
                        /* LWP holds preemption disabled, explicitly. */
                        if ((dop & DOPREEMPT_COUNTED) == 0) {
                                kpreempt_ev_crit.ev_count++;
                        }
                        failed = (uintptr_t)&kpreempt_is_disabled;
                        break;
                }
                if (__predict_false((l->l_pflag & LP_INTR) != 0)) {
                        /* Can't preempt soft interrupts yet. */
                        atomic_swap_uint(&l->l_dopreempt, 0);
                        failed = (uintptr_t)&is_softint_lwp;
                        break;
                }
                s = splsched();
                if (__predict_false(l->l_blcnt != 0 ||
                    curcpu()->ci_biglock_wanted != NULL)) {
                        /* Hold or want kernel_lock, code is not MT safe. */
                        splx(s);
                        if ((dop & DOPREEMPT_COUNTED) == 0) {
                                kpreempt_ev_klock.ev_count++;
                        }
                        failed = (uintptr_t)&kernel_lock_held;
                        break;
                }
                if (__predict_false(!cpu_kpreempt_enter(where, s))) {
                        /*
                         * It may be that the IPL is too high.
                         * kpreempt_enter() can schedule an
                         * interrupt to retry later.
                         */
                        splx(s);
                        failed = (uintptr_t)&spl_is_raised;
                        break;
                }
                /* Do it! */
                if (__predict_true((dop & DOPREEMPT_COUNTED) == 0)) {
                        kpreempt_ev_immed.ev_count++;
                }
                lwp_lock(l);
                /* Involuntary - keep kpriority boost. */
                l->l_pflag |= LP_PREEMPTING;
                spc_lock(l->l_cpu);
                mi_switch(l);
                l->l_nopreempt++;
                splx(s);

                /* Take care of any MD cleanup. */
                cpu_kpreempt_exit(where);
                l->l_nopreempt--;
        }

        if (__predict_true(!failed)) {
                return false;
        }

        /* Record preemption failure for reporting via lockstat. */
        atomic_or_uint(&l->l_dopreempt, DOPREEMPT_COUNTED);
        lsflag = 0;
        LOCKSTAT_ENTER(lsflag);
        if (__predict_false(lsflag)) {
                if (where == 0) {
                        where = (uintptr_t)__builtin_return_address(0);
                }
                /* Preemption is on, might recurse, so make it atomic. */
                if (atomic_cas_ptr_ni((void *)&l->l_pfailaddr, NULL,
                    (void *)where) == NULL) {
                        LOCKSTAT_START_TIMER(lsflag, l->l_pfailtime);
                        l->l_pfaillock = failed;
                }
        }
        LOCKSTAT_EXIT(lsflag);
        return true;
}

/*
 * Return true if preemption is explicitly disabled.
 */
bool
kpreempt_disabled(void)
{
        const lwp_t *l = curlwp;

        return l->l_nopreempt != 0 || l->l_stat == LSZOMB ||
            (l->l_flag & LW_IDLE) != 0 || (l->l_pflag & LP_INTR) != 0 ||
            cpu_kpreempt_disabled();
}

/*
 * Disable kernel preemption.
 */
void
kpreempt_disable(void)
{

        KPREEMPT_DISABLE(curlwp);
}

/*
 * Reenable kernel preemption.
 */
void
kpreempt_enable(void)
{

        KPREEMPT_ENABLE(curlwp);
}

/*
 * Compute the amount of time during which the current lwp was running.
 *
 * - update l_rtime unless it's an idle lwp.
 */

void
updatertime(lwp_t *l, const struct bintime *now)
{

        if (__predict_false(l->l_flag & LW_IDLE))
                return;

        /* rtime += now - stime */
        bintime_add(&l->l_rtime, now);
        bintime_sub(&l->l_rtime, &l->l_stime);
}

/*
 * Select next LWP from the current CPU to run..
 */
static inline lwp_t *
nextlwp(struct cpu_info *ci, struct schedstate_percpu *spc)
{
        lwp_t *newl;

        /*
         * Let sched_nextlwp() select the LWP to run the CPU next.
         * If no LWP is runnable, select the idle LWP.
         * 
         * On arrival here LWPs on a run queue are locked by spc_mutex which
         * is currently held.  Idle LWPs are always locked by spc_lwplock,
         * which may or may not be held here.  On exit from this code block,
         * in all cases newl is locked by spc_lwplock.
         */
        newl = sched_nextlwp();
        if (newl != NULL) {
                sched_dequeue(newl);
                KASSERT(lwp_locked(newl, spc->spc_mutex));
                KASSERT(newl->l_cpu == ci);
                newl->l_stat = LSONPROC;
                newl->l_pflag |= LP_RUNNING;
                spc->spc_curpriority = lwp_eprio(newl);
                spc->spc_flags &= ~(SPCF_SWITCHCLEAR | SPCF_IDLE);
                lwp_setlock(newl, spc->spc_lwplock);
        } else {
                /*
                 * The idle LWP does not get set to LSONPROC, because
                 * otherwise it screws up the output from top(1) etc.
                 */
                newl = ci->ci_data.cpu_idlelwp;
                newl->l_pflag |= LP_RUNNING;
                spc->spc_curpriority = PRI_IDLE;
                spc->spc_flags = (spc->spc_flags & ~SPCF_SWITCHCLEAR) |
                    SPCF_IDLE;
        }

        /*
         * Only clear want_resched if there are no pending (slow) software
         * interrupts.  We can do this without an atomic, because no new
         * LWPs can appear in the queue due to our hold on spc_mutex, and
         * the update to ci_want_resched will become globally visible before
         * the release of spc_mutex becomes globally visible.
         */
        ci->ci_want_resched = ci->ci_data.cpu_softints;

        return newl;
}

/*
 * The machine independent parts of context switch.
 *
 * NOTE: l->l_cpu is not changed in this routine, because an LWP never
 * changes its own l_cpu (that would screw up curcpu on many ports and could
 * cause all kinds of other evil stuff).  l_cpu is always changed by some
 * other actor, when it's known the LWP is not running (the LP_RUNNING flag
 * is checked under lock).
 */
void
mi_switch(lwp_t *l)
{
        struct cpu_info *ci;
        struct schedstate_percpu *spc;
        struct lwp *newl;
        kmutex_t *lock;
        int oldspl;
        struct bintime bt;
        bool returning;

        KASSERT(lwp_locked(l, NULL));
        KASSERT(kpreempt_disabled());
        KASSERT(mutex_owned(curcpu()->ci_schedstate.spc_mutex));
        KASSERTMSG(l->l_blcnt == 0, "kernel_lock leaked");

        kstack_check_magic(l);

        binuptime(&bt);

        KASSERTMSG(l == curlwp, "l %p curlwp %p", l, curlwp);
        KASSERT((l->l_pflag & LP_RUNNING) != 0);
        KASSERT(l->l_cpu == curcpu() || l->l_stat == LSRUN);
        ci = curcpu();
        spc = &ci->ci_schedstate;
        returning = false;
        newl = NULL;

        /*
         * If we have been asked to switch to a specific LWP, then there
         * is no need to inspect the run queues.  If a soft interrupt is
         * blocking, then return to the interrupted thread without adjusting
         * VM context or its start time: neither have been changed in order
         * to take the interrupt.
         */
        if (l->l_switchto != NULL) {
                if ((l->l_pflag & LP_INTR) != 0) {
                        returning = true;
                        softint_block(l);
                        if ((l->l_pflag & LP_TIMEINTR) != 0)
                                updatertime(l, &bt);
                }
                newl = l->l_switchto;
                l->l_switchto = NULL;
        }
#ifndef __HAVE_FAST_SOFTINTS
        else if (ci->ci_data.cpu_softints != 0) {
                /* There are pending soft interrupts, so pick one. */
                newl = softint_picklwp();
                newl->l_stat = LSONPROC;
                newl->l_pflag |= LP_RUNNING;
        }
#endif        /* !__HAVE_FAST_SOFTINTS */

        /*
         * If on the CPU and we have gotten this far, then we must yield.
         */
        if (l->l_stat == LSONPROC && l != newl) {
                KASSERT(lwp_locked(l, spc->spc_lwplock));
                KASSERT((l->l_flag & LW_IDLE) == 0);
                l->l_stat = LSRUN;
                lwp_setlock(l, spc->spc_mutex);
                sched_enqueue(l);
                sched_preempted(l);

                /*
                 * Handle migration.  Note that "migrating LWP" may
                 * be reset here, if interrupt/preemption happens
                 * early in idle LWP.
                 */
                if (l->l_target_cpu != NULL && (l->l_pflag & LP_BOUND) == 0) {
                        KASSERT((l->l_pflag & LP_INTR) == 0);
                        spc->spc_migrating = l;
                }
        }

        /* Pick new LWP to run. */
        if (newl == NULL) {
                newl = nextlwp(ci, spc);
        }

        /* Items that must be updated with the CPU locked. */
        if (!returning) {
                /* Count time spent in current system call */
                SYSCALL_TIME_SLEEP(l);

                updatertime(l, &bt);

                /* Update the new LWP's start time. */
                newl->l_stime = bt;

                /*
                 * ci_curlwp changes when a fast soft interrupt occurs.
                 * We use ci_onproc to keep track of which kernel or
                 * user thread is running 'underneath' the software
                 * interrupt.  This is important for time accounting,
                 * itimers and forcing user threads to preempt (aston).
                 */
                ci->ci_onproc = newl;
        }

        /*
         * Preemption related tasks.  Must be done holding spc_mutex.  Clear
         * l_dopreempt without an atomic - it's only ever set non-zero by
         * sched_resched_cpu() which also holds spc_mutex, and only ever
         * cleared by the LWP itself (us) with atomics when not under lock.
         */
        l->l_dopreempt = 0;
        if (__predict_false(l->l_pfailaddr != 0)) {
                LOCKSTAT_FLAG(lsflag);
                LOCKSTAT_ENTER(lsflag);
                LOCKSTAT_STOP_TIMER(lsflag, l->l_pfailtime);
                LOCKSTAT_EVENT_RA(lsflag, l->l_pfaillock, LB_NOPREEMPT|LB_SPIN,
                    1, l->l_pfailtime, l->l_pfailaddr);
                LOCKSTAT_EXIT(lsflag);
                l->l_pfailtime = 0;
                l->l_pfaillock = 0;
                l->l_pfailaddr = 0;
        }

        if (l != newl) {
                struct lwp *prevlwp;

                /* Release all locks, but leave the current LWP locked */
                if (l->l_mutex == spc->spc_mutex) {
                        /*
                         * Drop spc_lwplock, if the current LWP has been moved
                         * to the run queue (it is now locked by spc_mutex).
                         */
                        mutex_spin_exit(spc->spc_lwplock);
                } else {
                        /*
                         * Otherwise, drop the spc_mutex, we are done with the
                         * run queues.
                         */
                        mutex_spin_exit(spc->spc_mutex);
                }

                /* We're down to only one lock, so do debug checks. */
                LOCKDEBUG_BARRIER(l->l_mutex, 1);

                /* Count the context switch. */
                CPU_COUNT(CPU_COUNT_NSWTCH, 1);
                l->l_ncsw++;
                if ((l->l_pflag & LP_PREEMPTING) != 0) {
                        l->l_nivcsw++;
                        l->l_pflag &= ~LP_PREEMPTING;
                }

                /*
                 * Increase the count of spin-mutexes before the release
                 * of the last lock - we must remain at IPL_SCHED after
                 * releasing the lock.
                 */
                KASSERTMSG(ci->ci_mtx_count == -1,
                    "%s: cpu%u: ci_mtx_count (%d) != -1 "
                    "(block with spin-mutex held)",
                     __func__, cpu_index(ci), ci->ci_mtx_count);
                oldspl = MUTEX_SPIN_OLDSPL(ci);
                ci->ci_mtx_count = -2;

                /* Update status for lwpctl, if present. */
                if (l->l_lwpctl != NULL) {
                        l->l_lwpctl->lc_curcpu = (l->l_stat == LSZOMB ?
                            LWPCTL_CPU_EXITED : LWPCTL_CPU_NONE);
                }

                /*
                 * If curlwp is a soft interrupt LWP, there's nobody on the
                 * other side to unlock - we're returning into an assembly
                 * trampoline.  Unlock now.  This is safe because this is a
                 * kernel LWP and is bound to current CPU: the worst anyone
                 * else will do to it, is to put it back onto this CPU's run
                 * queue (and the CPU is busy here right now!).
                 */
                if (returning) {
                        /* Keep IPL_SCHED after this; MD code will fix up. */
                        l->l_pflag &= ~LP_RUNNING;
                        lwp_unlock(l);
                } else {
                        /* A normal LWP: save old VM context. */
                        pmap_deactivate(l);
                }

                /*
                 * If DTrace has set the active vtime enum to anything
                 * other than INACTIVE (0), then it should have set the
                 * function to call.
                 */
                if (__predict_false(dtrace_vtime_active)) {
                        (*dtrace_vtime_switch_func)(newl);
                }

                /*
                 * We must ensure not to come here from inside a read section.
                 */
                KASSERT(pserialize_not_in_read_section());

                /* Switch to the new LWP.. */
#ifdef MULTIPROCESSOR
                KASSERT(curlwp == ci->ci_curlwp);
#endif
                KASSERTMSG(l == curlwp, "l %p curlwp %p", l, curlwp);
                prevlwp = cpu_switchto(l, newl, returning);
                ci = curcpu();
#ifdef MULTIPROCESSOR
                KASSERT(curlwp == ci->ci_curlwp);
#endif
                KASSERTMSG(l == curlwp, "l %p curlwp %p prevlwp %p",
                    l, curlwp, prevlwp);
                KASSERT(prevlwp != NULL);
                KASSERT(l->l_cpu == ci);
                KASSERT(ci->ci_mtx_count == -2);

                /*
                 * Immediately mark the previous LWP as no longer running
                 * and unlock (to keep lock wait times short as possible).
                 * We'll still be at IPL_SCHED afterwards.  If a zombie,
                 * don't touch after clearing LP_RUNNING as it could be
                 * reaped by another CPU.  Issue a memory barrier to ensure
                 * this.
                 *
                 * atomic_store_release matches atomic_load_acquire in
                 * lwp_free.
                 */
                KASSERT((prevlwp->l_pflag & LP_RUNNING) != 0);
                lock = prevlwp->l_mutex;
                if (__predict_false(prevlwp->l_stat == LSZOMB)) {
                        atomic_store_release(&prevlwp->l_pflag,
                            prevlwp->l_pflag & ~LP_RUNNING);
                } else {
                        prevlwp->l_pflag &= ~LP_RUNNING;
                }
                mutex_spin_exit(lock);

                /*
                 * Switched away - we have new curlwp.
                 * Restore VM context and IPL.
                 */
                pmap_activate(l);
                pcu_switchpoint(l);

                /* Update status for lwpctl, if present. */
                if (l->l_lwpctl != NULL) {
                        l->l_lwpctl->lc_curcpu = (int)cpu_index(ci);
                        l->l_lwpctl->lc_pctr++;
                }

                /*
                 * Normalize the spin mutex count and restore the previous
                 * SPL.  Note that, unless the caller disabled preemption,
                 * we can be preempted at any time after this splx().
                 */
                KASSERT(l->l_cpu == ci);
                KASSERT(ci->ci_mtx_count == -1);
                ci->ci_mtx_count = 0;
                splx(oldspl);
        } else {
                /* Nothing to do - just unlock and return. */
                mutex_spin_exit(spc->spc_mutex);
                l->l_pflag &= ~LP_PREEMPTING;
                lwp_unlock(l);
        }

        KASSERT(l == curlwp);
        KASSERT(l->l_stat == LSONPROC || (l->l_flag & LW_IDLE) != 0); 

        SYSCALL_TIME_WAKEUP(l);
        LOCKDEBUG_BARRIER(NULL, 1);
}

/*
 * setrunnable: change LWP state to be runnable, placing it on the run queue.
 *
 * Call with the process and LWP locked.  Will return with the LWP unlocked.
 */
void
setrunnable(struct lwp *l)
{
        struct proc *p = l->l_proc;
        struct cpu_info *ci;
        kmutex_t *oldlock;

        KASSERT((l->l_flag & LW_IDLE) == 0);
        KASSERT((l->l_flag & LW_DBGSUSPEND) == 0);
        KASSERT(mutex_owned(p->p_lock));
        KASSERT(lwp_locked(l, NULL));
        KASSERT(l->l_mutex != l->l_cpu->ci_schedstate.spc_mutex);

        switch (l->l_stat) {
        case LSSTOP:
                /*
                 * If we're being traced (possibly because someone attached us
                 * while we were stopped), check for a signal from the debugger.
                 */
                if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xsig != 0)
                        signotify(l);
                p->p_nrlwps++;
                break;
        case LSSUSPENDED:
                KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock));
                l->l_flag &= ~LW_WSUSPEND;
                p->p_nrlwps++;
                cv_broadcast(&p->p_lwpcv);
                break;
        case LSSLEEP:
                KASSERT(l->l_wchan != NULL);
                break;
        case LSIDL:
                KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock));
                break;
        default:
                panic("setrunnable: lwp %p state was %d", l, l->l_stat);
        }

        /*
         * If the LWP was sleeping, start it again.
         */
        if (l->l_wchan != NULL) {
                l->l_stat = LSSLEEP;
                /* lwp_unsleep() will release the lock. */
                lwp_unsleep(l, true);
                return;
        }

        /*
         * If the LWP is still on the CPU, mark it as LSONPROC.  It may be
         * about to call mi_switch(), in which case it will yield.
         */
        if ((l->l_pflag & LP_RUNNING) != 0) {
                l->l_stat = LSONPROC;
                l->l_slptime = 0;
                lwp_unlock(l);
                return;
        }

        /*
         * Look for a CPU to run.
         * Set the LWP runnable.
         */
        ci = sched_takecpu(l);
        l->l_cpu = ci;
        spc_lock(ci);
        oldlock = lwp_setlock(l, l->l_cpu->ci_schedstate.spc_mutex);
        sched_setrunnable(l);
        l->l_stat = LSRUN;
        l->l_slptime = 0;
        sched_enqueue(l);
        sched_resched_lwp(l, true);
        /* SPC & LWP now unlocked. */
        mutex_spin_exit(oldlock);
}

/*
 * suspendsched:
 *
 *        Convert all non-LW_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED. 
 */
void
suspendsched(void)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;
        struct lwp *l;
        struct proc *p;

        /*
         * We do this by process in order not to violate the locking rules.
         */
        mutex_enter(&proc_lock);
        PROCLIST_FOREACH(p, &allproc) {
                mutex_enter(p->p_lock);
                if ((p->p_flag & PK_SYSTEM) != 0) {
                        mutex_exit(p->p_lock);
                        continue;
                }

                if (p->p_stat != SSTOP) {
                        if (p->p_stat != SZOMB && p->p_stat != SDEAD) {
                                p->p_pptr->p_nstopchild++;
                                p->p_waited = 0;
                        }
                        p->p_stat = SSTOP;
                }

                LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                        if (l == curlwp)
                                continue;

                        lwp_lock(l);

                        /*
                         * Set L_WREBOOT so that the LWP will suspend itself
                         * when it tries to return to user mode.  We want to
                         * try and get to get as many LWPs as possible to
                         * the user / kernel boundary, so that they will
                         * release any locks that they hold.
                         */
                        l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);

                        if (l->l_stat == LSSLEEP &&
                            (l->l_flag & LW_SINTR) != 0) {
                                /* setrunnable() will release the lock. */
                                setrunnable(l);
                                continue;
                        }

                        lwp_unlock(l);
                }

                mutex_exit(p->p_lock);
        }
        mutex_exit(&proc_lock);

        /*
         * Kick all CPUs to make them preempt any LWPs running in user mode. 
         * They'll trap into the kernel and suspend themselves in userret(). 
         *
         * Unusually, we don't hold any other scheduler object locked, which
         * would keep preemption off for sched_resched_cpu(), so disable it
         * explicitly.
         */
        kpreempt_disable();
        for (CPU_INFO_FOREACH(cii, ci)) {
                spc_lock(ci);
                sched_resched_cpu(ci, PRI_KERNEL, true);
                /* spc now unlocked */
        }
        kpreempt_enable();
}

/*
 * sched_unsleep:
 *
 *        The is called when the LWP has not been awoken normally but instead
 *        interrupted: for example, if the sleep timed out.  Because of this,
 *        it's not a valid action for running or idle LWPs.
 */
static void
sched_unsleep(struct lwp *l, bool cleanup)
{

        lwp_unlock(l);
        panic("sched_unsleep");
}

static void
sched_changepri(struct lwp *l, pri_t pri)
{
        struct schedstate_percpu *spc;
        struct cpu_info *ci;

        KASSERT(lwp_locked(l, NULL));

        ci = l->l_cpu;
        spc = &ci->ci_schedstate;

        if (l->l_stat == LSRUN) {
                KASSERT(lwp_locked(l, spc->spc_mutex));
                sched_dequeue(l);
                l->l_priority = pri;
                sched_enqueue(l);
                sched_resched_lwp(l, false);
        } else if (l->l_stat == LSONPROC && l->l_class != SCHED_OTHER) {
                /* On priority drop, only evict realtime LWPs. */
                KASSERT(lwp_locked(l, spc->spc_lwplock));
                l->l_priority = pri;
                spc_lock(ci);
                sched_resched_cpu(ci, spc->spc_maxpriority, true);
                /* spc now unlocked */
        } else {
                l->l_priority = pri;
        }
}

static void
sched_lendpri(struct lwp *l, pri_t pri)
{
        struct schedstate_percpu *spc;
        struct cpu_info *ci;

        KASSERT(lwp_locked(l, NULL));

        ci = l->l_cpu;
        spc = &ci->ci_schedstate;

        if (l->l_stat == LSRUN) {
                KASSERT(lwp_locked(l, spc->spc_mutex));
                sched_dequeue(l);
                l->l_inheritedprio = pri;
                l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio);
                sched_enqueue(l);
                sched_resched_lwp(l, false);
        } else if (l->l_stat == LSONPROC && l->l_class != SCHED_OTHER) {
                /* On priority drop, only evict realtime LWPs. */
                KASSERT(lwp_locked(l, spc->spc_lwplock));
                l->l_inheritedprio = pri;
                l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio);
                spc_lock(ci);
                sched_resched_cpu(ci, spc->spc_maxpriority, true);
                /* spc now unlocked */
        } else {
                l->l_inheritedprio = pri;
                l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio);
        }
}

struct lwp *
syncobj_noowner(wchan_t wchan)
{

        return NULL;
}

/* Decay 95% of proc::p_pctcpu in 60 seconds, ccpu = exp(-1/20) */
const fixpt_t ccpu = 0.95122942450071400909 * FSCALE;

/*
 * Constants for averages over 1, 5 and 15 minutes when sampling at
 * 5 second intervals.
 */
static const fixpt_t cexp[ ] = {
        0.9200444146293232 * FSCALE,        /* exp(-1/12) */
        0.9834714538216174 * FSCALE,        /* exp(-1/60) */
        0.9944598480048967 * FSCALE,        /* exp(-1/180) */
};

/*
 * sched_pstats:
 *
 * => Update process statistics and check CPU resource allocation.
 * => Call scheduler-specific hook to eventually adjust LWP priorities.
 * => Compute load average of a quantity on 1, 5 and 15 minute intervals.
 */
void
sched_pstats(void)
{
        extern struct loadavg averunnable;
        struct loadavg *avg = &averunnable;
        const int clkhz = (stathz != 0 ? stathz : hz);
        static bool backwards = false;
        static u_int lavg_count = 0;
        struct proc *p;
        int nrun;

        sched_pstats_ticks++;
        if (++lavg_count >= 5) {
                lavg_count = 0;
                nrun = 0;
        }
        mutex_enter(&proc_lock);
        PROCLIST_FOREACH(p, &allproc) {
                struct lwp *l;
                struct rlimit *rlim;
                time_t runtm;
                int sig;

                /* Increment sleep time (if sleeping), ignore overflow. */
                mutex_enter(p->p_lock);
                runtm = p->p_rtime.sec;
                LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                        fixpt_t lpctcpu;
                        u_int lcpticks;

                        if (__predict_false((l->l_flag & LW_IDLE) != 0))
                                continue;
                        lwp_lock(l);
                        runtm += l->l_rtime.sec;
                        l->l_swtime++;
                        sched_lwp_stats(l);

                        /* For load average calculation. */
                        if (__predict_false(lavg_count == 0) &&
                            (l->l_flag & (LW_SINTR | LW_SYSTEM)) == 0) {
                                switch (l->l_stat) {
                                case LSSLEEP:
                                        if (l->l_slptime > 1) {
                                                break;
                                        }
                                        /* FALLTHROUGH */
                                case LSRUN:
                                case LSONPROC:
                                case LSIDL:
                                        nrun++;
                                }
                        }
                        lwp_unlock(l);

                        l->l_pctcpu = (l->l_pctcpu * ccpu) >> FSHIFT;
                        if (l->l_slptime != 0)
                                continue;

                        lpctcpu = l->l_pctcpu;
                        lcpticks = atomic_swap_uint(&l->l_cpticks, 0);
                        lpctcpu += ((FSCALE - ccpu) *
                            (lcpticks * FSCALE / clkhz)) >> FSHIFT;
                        l->l_pctcpu = lpctcpu;
                }
                /* Calculating p_pctcpu only for ps(1) */
                p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;

                if (__predict_false(runtm < 0)) {
                        if (!backwards) {
                                backwards = true;
                                printf("WARNING: negative runtime; "
                                    "monotonic clock has gone backwards\n");
                        }
                        mutex_exit(p->p_lock);
                        continue;
                }

                /*
                 * Check if the process exceeds its CPU resource allocation.
                 * If over the hard limit, kill it with SIGKILL.
                 * If over the soft limit, send SIGXCPU and raise
                 * the soft limit a little.
                 */
                rlim = &p->p_rlimit[RLIMIT_CPU];
                sig = 0;
                if (__predict_false(runtm >= rlim->rlim_cur)) {
                        if (runtm >= rlim->rlim_max) {
                                sig = SIGKILL;
                                log(LOG_NOTICE,
                                    "pid %d, command %s, is killed: %s\n",
                                    p->p_pid, p->p_comm, "exceeded RLIMIT_CPU");
                                uprintf("pid %d, command %s, is killed: %s\n",
                                    p->p_pid, p->p_comm, "exceeded RLIMIT_CPU");
                        } else {
                                sig = SIGXCPU;
                                if (rlim->rlim_cur < rlim->rlim_max)
                                        rlim->rlim_cur += 5;
                        }
                }
                mutex_exit(p->p_lock);
                if (__predict_false(sig)) {
                        KASSERT((p->p_flag & PK_SYSTEM) == 0);
                        psignal(p, sig);
                }
        }

        /* Load average calculation. */
        if (__predict_false(lavg_count == 0)) {
                int i;
                CTASSERT(__arraycount(cexp) == __arraycount(avg->ldavg));
                for (i = 0; i < __arraycount(cexp); i++) {
                        avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
                            nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
                }
        }

        /* Lightning bolt. */
        cv_broadcast(&lbolt);

        mutex_exit(&proc_lock);
}













































































































































































































   11 

   11 

   11 



   11 













































































































  639 

  638 
   38 
   38 
   37 





  627 
  627 



  627 




















    2 

    2 
    2 





























 4606 





 4620 













 1808 







 1802 
 1804 






 1790 












 1774 






 1777 
 1133 
    1 

















































































































































































































































































































































































































































































































































































































































































































































































    9 















    9 

















































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
/*        $NetBSD: x86_machdep.c,v 1.152 2022/08/20 23:48:51 riastradh Exp $        */

/*-
 * Copyright (c) 2002, 2006, 2007 YAMAMOTO Takashi,
 * Copyright (c) 2005, 2008, 2009, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Julio M. Merino Vidal, and Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: x86_machdep.c,v 1.152 2022/08/20 23:48:51 riastradh Exp $");

#include "opt_modular.h"
#include "opt_physmem.h"
#include "opt_splash.h"
#include "opt_kaslr.h"
#include "opt_svs.h"
#include "opt_xen.h"

#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kcore.h>
#include <sys/errno.h>
#include <sys/kauth.h>
#include <sys/mutex.h>
#include <sys/cpu.h>
#include <sys/intr.h>
#include <sys/atomic.h>
#include <sys/module.h>
#include <sys/sysctl.h>
#include <sys/extent.h>
#include <sys/rnd.h>

#include <x86/bootspace.h>
#include <x86/cpuvar.h>
#include <x86/cputypes.h>
#include <x86/efi.h>
#include <x86/machdep.h>
#include <x86/nmi.h>
#include <x86/pio.h>

#include <dev/splash/splash.h>
#include <dev/isa/isareg.h>
#include <dev/ic/i8042reg.h>
#include <dev/mm.h>

#include <machine/bootinfo.h>
#include <machine/pmap_private.h>
#include <machine/vmparam.h>

#include <uvm/uvm_extern.h>

#include "tsc.h"

#include "acpica.h"
#include "ioapic.h"
#include "lapic.h"

#if NACPICA > 0
#include <dev/acpi/acpivar.h>
#endif

#if NIOAPIC > 0 || NACPICA > 0
#include <machine/i82093var.h>
#endif

#include "opt_md.h"
#if defined(MEMORY_DISK_HOOKS) && defined(MEMORY_DISK_DYNAMIC)
#include <dev/md.h>
#endif

void (*x86_cpu_idle)(void);
static bool x86_cpu_idle_ipi;
static char x86_cpu_idle_text[16];

static bool x86_user_ldt_enabled __read_mostly = false;

#ifdef XEN

#include <xen/xen.h>
#include <xen/hypervisor.h>
#endif

#ifndef XENPV
void (*delay_func)(unsigned int) = i8254_delay;
void (*x86_initclock_func)(void) = i8254_initclocks;
#else /* XENPV */
void (*delay_func)(unsigned int) = xen_delay;
void (*x86_initclock_func)(void) = xen_initclocks;
#endif


/* --------------------------------------------------------------------- */

/*
 * Main bootinfo structure.  This is filled in by the bootstrap process
 * done in locore.S based on the information passed by the boot loader.
 */
struct bootinfo bootinfo;

/* --------------------------------------------------------------------- */

bool bootmethod_efi;

static kauth_listener_t x86_listener;

extern paddr_t lowmem_rsvd, avail_start, avail_end;

vaddr_t msgbuf_vaddr;

struct msgbuf_p_seg msgbuf_p_seg[VM_PHYSSEG_MAX];

unsigned int msgbuf_p_cnt = 0;

void init_x86_msgbuf(void);

/*
 * Given the type of a bootinfo entry, looks for a matching item inside
 * the bootinfo structure.  If found, returns a pointer to it (which must
 * then be casted to the appropriate bootinfo_* type); otherwise, returns
 * NULL.
 */
void *
lookup_bootinfo(int type)
{
        bool found;
        int i;
        struct btinfo_common *bic;

        bic = (struct btinfo_common *)(bootinfo.bi_data);
        found = FALSE;
        for (i = 0; i < bootinfo.bi_nentries && !found; i++) {
                if (bic->type == type)
                        found = TRUE;
                else
                        bic = (struct btinfo_common *)
                            ((uint8_t *)bic + bic->len);
        }

        return found ? bic : NULL;
}

#ifdef notyet
/*
 * List the available bootinfo entries.
 */
static const char *btinfo_str[] = {
        BTINFO_STR
};

void
aprint_bootinfo(void)
{
        int i;
        struct btinfo_common *bic;

        aprint_normal("bootinfo:");
        bic = (struct btinfo_common *)(bootinfo.bi_data);
        for (i = 0; i < bootinfo.bi_nentries; i++) {
                if (bic->type >= 0 && bic->type < __arraycount(btinfo_str))
                        aprint_normal(" %s", btinfo_str[bic->type]);
                else
                        aprint_normal(" %d", bic->type);
                bic = (struct btinfo_common *)
                    ((uint8_t *)bic + bic->len);
        }
        aprint_normal("\n");
}
#endif

/*
 * mm_md_physacc: check if given pa is accessible.
 */
int
mm_md_physacc(paddr_t pa, vm_prot_t prot)
{
        extern phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];
        extern int mem_cluster_cnt;
        int i;

        for (i = 0; i < mem_cluster_cnt; i++) {
                const phys_ram_seg_t *seg = &mem_clusters[i];
                paddr_t lstart = seg->start;

                if (lstart <= pa && pa - lstart <= seg->size) {
                        return 0;
                }
        }
        return kauth_authorize_machdep(kauth_cred_get(),
            KAUTH_MACHDEP_UNMANAGEDMEM, NULL, NULL, NULL, NULL);
}

#ifdef MODULAR
/*
 * Push any modules loaded by the boot loader.
 */
void
module_init_md(void)
{
        struct btinfo_modulelist *biml;
        struct bi_modulelist_entry *bi, *bimax;

        biml = lookup_bootinfo(BTINFO_MODULELIST);
        if (biml == NULL) {
                aprint_debug("No module info at boot\n");
                return;
        }

        bi = (struct bi_modulelist_entry *)((uint8_t *)biml + sizeof(*biml));
        bimax = bi + biml->num;
        for (; bi < bimax; bi++) {
                switch (bi->type) {
                case BI_MODULE_ELF:
                        aprint_debug("Prep module path=%s len=%d pa=%x\n",
                            bi->path, bi->len, bi->base);
                        KASSERT(trunc_page(bi->base) == bi->base);
                        module_prime(bi->path,
#ifdef KASLR
                            (void *)PMAP_DIRECT_MAP((uintptr_t)bi->base),
#else
                            (void *)((uintptr_t)bi->base + KERNBASE),
#endif
                            bi->len);
                        break;
                case BI_MODULE_IMAGE:
#ifdef SPLASHSCREEN
                        aprint_debug("Splash image path=%s len=%d pa=%x\n",
                            bi->path, bi->len, bi->base);
                        KASSERT(trunc_page(bi->base) == bi->base);
                        splash_setimage(
#ifdef KASLR
                            (void *)PMAP_DIRECT_MAP((uintptr_t)bi->base),
#else
                            (void *)((uintptr_t)bi->base + KERNBASE),
#endif
                            bi->len);
#endif
                        break;
                case BI_MODULE_RND:
                        /* handled in x86_rndseed */
                        break;
                case BI_MODULE_FS:
                        aprint_debug("File-system image path=%s len=%d pa=%x\n",
                            bi->path, bi->len, bi->base);
                        KASSERT(trunc_page(bi->base) == bi->base);
#if defined(MEMORY_DISK_HOOKS) && defined(MEMORY_DISK_DYNAMIC)
                        md_root_setconf(
#ifdef KASLR
                            (void *)PMAP_DIRECT_MAP((uintptr_t)bi->base),
#else
                            (void *)((uintptr_t)bi->base + KERNBASE),
#endif
                            bi->len);
#endif
                        break;
                default:
                        aprint_debug("Skipping non-ELF module\n");
                        break;
                }
        }
}
#endif        /* MODULAR */

void
x86_rndseed(void)
{
        struct btinfo_modulelist *biml;
        struct bi_modulelist_entry *bi, *bimax;

        biml = lookup_bootinfo(BTINFO_MODULELIST);
        if (biml == NULL) {
                aprint_debug("No module info at boot\n");
                return;
        }

        bi = (struct bi_modulelist_entry *)((uint8_t *)biml + sizeof(*biml));
        bimax = bi + biml->num;
        for (; bi < bimax; bi++) {
                switch (bi->type) {
                case BI_MODULE_RND:
                        aprint_debug("Random seed data path=%s len=%d pa=%x\n",
                                     bi->path, bi->len, bi->base);
                        KASSERT(trunc_page(bi->base) == bi->base);
                        rnd_seed(
#ifdef KASLR
                            (void *)PMAP_DIRECT_MAP((uintptr_t)bi->base),
#else
                            (void *)((uintptr_t)bi->base + KERNBASE),
#endif
                             bi->len);
                }
        }
}

void
cpu_need_resched(struct cpu_info *ci, struct lwp *l, int flags)
{

        KASSERT(kpreempt_disabled());

        if ((flags & RESCHED_IDLE) != 0) {
                if ((flags & RESCHED_REMOTE) != 0 &&
                    x86_cpu_idle_ipi != false) {
                        cpu_kick(ci);
                }
                return;
        }

#ifdef __HAVE_PREEMPTION
        if ((flags & RESCHED_KPREEMPT) != 0) {
                if ((flags & RESCHED_REMOTE) != 0) {
#ifdef XENPV
                        xen_send_ipi(ci, XEN_IPI_KPREEMPT);
#else
                        x86_send_ipi(ci, X86_IPI_KPREEMPT);
#endif
                } else {
                        softint_trigger(1 << SIR_PREEMPT);
                }
                return;
        }
#endif

        KASSERT((flags & RESCHED_UPREEMPT) != 0);
        if ((flags & RESCHED_REMOTE) != 0) {
                cpu_kick(ci);
        } else {
                aston(l);
        }
}

void
cpu_signotify(struct lwp *l)
{

        KASSERT(kpreempt_disabled());

        if (l->l_cpu != curcpu()) {
                cpu_kick(l->l_cpu);
        } else {
                aston(l);
        }
}

void
cpu_need_proftick(struct lwp *l)
{

        KASSERT(kpreempt_disabled());
        KASSERT(l->l_cpu == curcpu());

        l->l_pflag |= LP_OWEUPC;
        aston(l);
}

bool
cpu_intr_p(void)
{
        uint64_t ncsw;
        int idepth;
        lwp_t *l;

        l = curlwp;
        if (__predict_false(l->l_cpu == NULL)) {
                KASSERT(l == &lwp0);
                return false;
        }
        do {
                ncsw = l->l_ncsw;
                __insn_barrier();
                idepth = l->l_cpu->ci_idepth;
                __insn_barrier();
        } while (__predict_false(ncsw != l->l_ncsw));

        return idepth >= 0;
}

#ifdef __HAVE_PREEMPTION
/*
 * Called to check MD conditions that would prevent preemption, and to
 * arrange for those conditions to be rechecked later.
 */
bool
cpu_kpreempt_enter(uintptr_t where, int s)
{
        struct pcb *pcb;
        lwp_t *l;

        KASSERT(kpreempt_disabled());
        l = curlwp;

        /*
         * If SPL raised, can't go.  Note this implies that spin
         * mutexes at IPL_NONE are _not_ valid to use.
         */
        if (s > IPL_PREEMPT) {
                softint_trigger(1 << SIR_PREEMPT);
                return false;
        }

        /* Must save cr2 or it could be clobbered. */
        pcb = lwp_getpcb(l);
        pcb->pcb_cr2 = rcr2();

        return true;
}

/*
 * Called after returning from a kernel preemption, and called with
 * preemption disabled.
 */
void
cpu_kpreempt_exit(uintptr_t where)
{
        extern char x86_copyfunc_start, x86_copyfunc_end;
        struct pcb *pcb;

        KASSERT(kpreempt_disabled());

        /*
         * If we interrupted any of the copy functions we must reload
         * the pmap when resuming, as they cannot tolerate it being
         * swapped out.
         */
        if (where >= (uintptr_t)&x86_copyfunc_start &&
            where < (uintptr_t)&x86_copyfunc_end) {
                pmap_load();
        }

        /* Restore cr2 only after the pmap, as pmap_load can block. */
        pcb = lwp_getpcb(curlwp);
        lcr2(pcb->pcb_cr2);
}

/*
 * Return true if preemption is disabled for MD reasons.  Must be called
 * with preemption disabled, and thus is only for diagnostic checks.
 */
bool
cpu_kpreempt_disabled(void)
{

        return curcpu()->ci_ilevel > IPL_NONE;
}
#endif        /* __HAVE_PREEMPTION */

SYSCTL_SETUP(sysctl_machdep_cpu_idle, "sysctl machdep cpu_idle")
{
        const struct sysctlnode        *mnode, *node;

        sysctl_createv(NULL, 0, NULL, &mnode,
            CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL,
            NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL);

        sysctl_createv(NULL, 0, &mnode, &node,
                       CTLFLAG_PERMANENT, CTLTYPE_STRING, "idle-mechanism",
                       SYSCTL_DESCR("Mechanism used for the idle loop."),
                       NULL, 0, x86_cpu_idle_text, 0,
                       CTL_CREATE, CTL_EOL);
}

void
x86_cpu_idle_init(void)
{

#ifndef XENPV
        if ((cpu_feature[1] & CPUID2_MONITOR) == 0)
                x86_cpu_idle_set(x86_cpu_idle_halt, "halt", true);
        else
                x86_cpu_idle_set(x86_cpu_idle_mwait, "mwait", false);
#else
        x86_cpu_idle_set(x86_cpu_idle_xen, "xen", true);
#endif
}

void
x86_cpu_idle_get(void (**func)(void), char *text, size_t len)
{

        *func = x86_cpu_idle;

        (void)strlcpy(text, x86_cpu_idle_text, len);
}

void
x86_cpu_idle_set(void (*func)(void), const char *text, bool ipi)
{

        x86_cpu_idle = func;
        x86_cpu_idle_ipi = ipi;

        (void)strlcpy(x86_cpu_idle_text, text, sizeof(x86_cpu_idle_text));
}

#ifndef XENPV

#define KBTOB(x)        ((size_t)(x) * 1024UL)
#define MBTOB(x)        ((size_t)(x) * 1024UL * 1024UL)

static struct {
        int freelist;
        uint64_t limit;
} x86_freelists[VM_NFREELIST] = {
        { VM_FREELIST_DEFAULT, 0 },
#ifdef VM_FREELIST_FIRST1T
        /* 40-bit addresses needed for modern graphics. */
        { VM_FREELIST_FIRST1T,        1ULL * 1024 * 1024 * 1024 * 1024 },
#endif
#ifdef VM_FREELIST_FIRST64G
        /* 36-bit addresses needed for oldish graphics. */
        { VM_FREELIST_FIRST64G, 64ULL * 1024 * 1024 * 1024 },
#endif
#ifdef VM_FREELIST_FIRST4G
        /* 32-bit addresses needed for PCI 32-bit DMA and old graphics. */
        { VM_FREELIST_FIRST4G,  4ULL * 1024 * 1024 * 1024 },
#endif
        /* 30-bit addresses needed for ancient graphics. */
        { VM_FREELIST_FIRST1G,        1ULL * 1024 * 1024 * 1024 },
        /* 24-bit addresses needed for ISA DMA. */
        { VM_FREELIST_FIRST16,        16 * 1024 * 1024 },
};

int
x86_select_freelist(uint64_t maxaddr)
{
        unsigned int i;

        if (avail_end <= maxaddr)
                return VM_NFREELIST;

        for (i = 0; i < __arraycount(x86_freelists); i++) {
                if ((x86_freelists[i].limit - 1) <= maxaddr)
                        return x86_freelists[i].freelist;
        }

        panic("no freelist for maximum address %"PRIx64, maxaddr);
}

static int
x86_add_cluster(uint64_t seg_start, uint64_t seg_end, uint32_t type)
{
        extern struct extent *iomem_ex;
        const uint64_t endext = MAXIOMEM + 1;
        uint64_t new_physmem = 0;
        phys_ram_seg_t *cluster;
        int i;

        if (seg_end > MAXPHYSMEM) {
                aprint_verbose("WARNING: skipping large memory map entry: "
                    "0x%"PRIx64"/0x%"PRIx64"/0x%x\n",
                    seg_start, (seg_end - seg_start), type);
                return 0;
        }

        /*
         * XXX: Chop the last page off the size so that it can fit in avail_end.
         */
        if (seg_end == MAXPHYSMEM)
                seg_end -= PAGE_SIZE;

        if (seg_end <= seg_start)
                return 0;

        for (i = 0; i < mem_cluster_cnt; i++) {
                cluster = &mem_clusters[i];
                if ((cluster->start == round_page(seg_start)) &&
                    (cluster->size == trunc_page(seg_end) - cluster->start)) {
#ifdef DEBUG_MEMLOAD
                        printf("WARNING: skipping duplicate segment entry\n");
#endif
                        return 0;
                }
        }

        /*
         * This cluster is used by RAM. If it is included in the iomem extent,
         * allocate it from there, so that we won't unintentionally reuse it
         * later with extent_alloc_region. A way to avoid collision (with UVM
         * for example).
         *
         * This is done before the addresses are page rounded just to make
         * sure we get them all.
         */
        if (seg_start < endext) {
                uint64_t io_end;

                if (seg_end > endext)
                        io_end = endext;
                else
                        io_end = seg_end;

                if (iomem_ex != NULL && extent_alloc_region(iomem_ex, seg_start,
                    io_end - seg_start, EX_NOWAIT)) {
                        /* XXX What should we do? */
                        printf("WARNING: CAN't ALLOCATE MEMORY SEGMENT "
                            "(0x%"PRIx64"/0x%"PRIx64"/0x%x) FROM "
                            "IOMEM EXTENT MAP!\n",
                            seg_start, seg_end - seg_start, type);
                        return 0;
                }
        }

        /* If it's not free memory, skip it. */
        if (type != BIM_Memory)
                return 0;

        if (mem_cluster_cnt >= VM_PHYSSEG_MAX) {
                printf("WARNING: too many memory segments"
                    "(increase VM_PHYSSEG_MAX)");
                return -1;
        }

#ifdef PHYSMEM_MAX_ADDR
        if (seg_start >= MBTOB(PHYSMEM_MAX_ADDR))
                return 0;
        if (seg_end > MBTOB(PHYSMEM_MAX_ADDR))
                seg_end = MBTOB(PHYSMEM_MAX_ADDR);
#endif

        seg_start = round_page(seg_start);
        seg_end = trunc_page(seg_end);

        if (seg_start == seg_end)
                return 0;

        cluster = &mem_clusters[mem_cluster_cnt];
        cluster->start = seg_start;
        if (iomem_ex != NULL)
                new_physmem = physmem + atop(seg_end - seg_start);

#ifdef PHYSMEM_MAX_SIZE
        if (iomem_ex != NULL) {
                if (physmem >= atop(MBTOB(PHYSMEM_MAX_SIZE)))
                        return 0;
                if (new_physmem > atop(MBTOB(PHYSMEM_MAX_SIZE))) {
                        seg_end = seg_start + MBTOB(PHYSMEM_MAX_SIZE) - ptoa(physmem);
                        new_physmem = atop(MBTOB(PHYSMEM_MAX_SIZE));
                }
        }
#endif

        cluster->size = seg_end - seg_start;

        if (iomem_ex != NULL) {
                if (avail_end < seg_end)
                        avail_end = seg_end;
                physmem = new_physmem;
        }
        mem_cluster_cnt++;

        return 0;
}

static int
x86_parse_clusters(struct btinfo_memmap *bim)
{
        uint64_t seg_start, seg_end;
        uint64_t addr, size;
        uint32_t type;
        int x;

        KASSERT(bim != NULL);
        KASSERT(bim->num > 0);

#ifdef DEBUG_MEMLOAD
        printf("MEMMAP: %s MEMORY MAP (%d ENTRIES):\n",
            lookup_bootinfo(BTINFO_EFIMEMMAP) != NULL ? "UEFI" : "BIOS",
            bim->num);
#endif

        for (x = 0; x < bim->num; x++) {
                addr = bim->entry[x].addr;
                size = bim->entry[x].size;
                type = bim->entry[x].type;
#ifdef DEBUG_MEMLOAD
                printf("MEMMAP: 0x%016" PRIx64 "-0x%016" PRIx64
                    "\n\tsize=0x%016" PRIx64 ", type=%d(%s)\n",
                    addr, addr + size - 1, size, type,
                    (type == BIM_Memory) ?  "Memory" :
                    (type == BIM_Reserved) ?  "Reserved" :
                    (type == BIM_ACPI) ? "ACPI" :
                    (type == BIM_NVS) ? "NVS" :
                    (type == BIM_PMEM) ? "Persistent" :
                    (type == BIM_PRAM) ? "Persistent (Legacy)" :
                    "unknown");
#endif

                /* If the segment is not memory, skip it. */
                switch (type) {
                case BIM_Memory:
                case BIM_ACPI:
                case BIM_NVS:
                        break;
                default:
                        continue;
                }

                /* If the segment is smaller than a page, skip it. */
                if (size < PAGE_SIZE)
                        continue;

                seg_start = addr;
                seg_end = addr + size;

                /*
                 * XXX XXX: Avoid the ISA I/O MEM.
                 *
                 * Some laptops (for example, Toshiba Satellite2550X) report
                 * this area as valid.
                 */
                if (seg_start < IOM_END && seg_end > IOM_BEGIN) {
                        printf("WARNING: memory map entry overlaps "
                            "with ``Compatibility Holes'': "
                            "0x%"PRIx64"/0x%"PRIx64"/0x%x\n", seg_start,
                            seg_end - seg_start, type);

                        if (x86_add_cluster(seg_start, IOM_BEGIN, type) == -1)
                                break;
                        if (x86_add_cluster(IOM_END, seg_end, type) == -1)
                                break;
                } else {
                        if (x86_add_cluster(seg_start, seg_end, type) == -1)
                                break;
                }
        }

        return 0;
}

static int
x86_fake_clusters(void)
{
        extern struct extent *iomem_ex;
        phys_ram_seg_t *cluster;
        KASSERT(mem_cluster_cnt == 0);

        /*
         * Allocate the physical addresses used by RAM from the iomem extent
         * map. This is done before the addresses are page rounded just to make
         * sure we get them all.
         */
        if (extent_alloc_region(iomem_ex, 0, KBTOB(biosbasemem), EX_NOWAIT)) {
                /* XXX What should we do? */
                printf("WARNING: CAN'T ALLOCATE BASE MEMORY FROM "
                    "IOMEM EXTENT MAP!\n");
        }

        cluster = &mem_clusters[0];
        cluster->start = 0;
        cluster->size = trunc_page(KBTOB(biosbasemem));
        physmem += atop(cluster->size);

        if (extent_alloc_region(iomem_ex, IOM_END, KBTOB(biosextmem),
            EX_NOWAIT)) {
                /* XXX What should we do? */
                printf("WARNING: CAN'T ALLOCATE EXTENDED MEMORY FROM "
                    "IOMEM EXTENT MAP!\n");
        }

#if NISADMA > 0
        /*
         * Some motherboards/BIOSes remap the 384K of RAM that would
         * normally be covered by the ISA hole to the end of memory
         * so that it can be used.  However, on a 16M system, this
         * would cause bounce buffers to be allocated and used.
         * This is not desirable behaviour, as more than 384K of
         * bounce buffers might be allocated.  As a work-around,
         * we round memory down to the nearest 1M boundary if
         * we're using any isadma devices and the remapped memory
         * is what puts us over 16M.
         */
        if (biosextmem > (15*1024) && biosextmem < (16*1024)) {
                char pbuf[9];

                format_bytes(pbuf, sizeof(pbuf), biosextmem - (15*1024));
                printf("Warning: ignoring %s of remapped memory\n", pbuf);
                biosextmem = (15*1024);
        }
#endif

        cluster = &mem_clusters[1];
        cluster->start = IOM_END;
        cluster->size = trunc_page(KBTOB(biosextmem));
        physmem += atop(cluster->size);

        mem_cluster_cnt = 2;

        avail_end = IOM_END + trunc_page(KBTOB(biosextmem));

        return 0;
}

/*
 * x86_load_region: load the physical memory region from seg_start to seg_end
 * into the VM system.
 */
static void
x86_load_region(uint64_t seg_start, uint64_t seg_end)
{
        unsigned int i;
        uint64_t tmp;

        i = __arraycount(x86_freelists);
        while (i--) {
                if (x86_freelists[i].limit <= seg_start)
                        continue;
                if (x86_freelists[i].freelist == VM_FREELIST_DEFAULT)
                        continue;
                tmp = MIN(x86_freelists[i].limit, seg_end);
                if (tmp == seg_start)
                        continue;

#ifdef DEBUG_MEMLOAD
                printf("loading freelist %d 0x%"PRIx64"-0x%"PRIx64
                    " (0x%"PRIx64"-0x%"PRIx64")\n", x86_freelists[i].freelist,
                    seg_start, tmp, (uint64_t)atop(seg_start),
                    (uint64_t)atop(tmp));
#endif

                uvm_page_physload(atop(seg_start), atop(tmp), atop(seg_start),
                    atop(tmp), x86_freelists[i].freelist);
                seg_start = tmp;
        }

        if (seg_start != seg_end) {
#ifdef DEBUG_MEMLOAD
                printf("loading default 0x%"PRIx64"-0x%"PRIx64
                    " (0x%"PRIx64"-0x%"PRIx64")\n", seg_start, seg_end,
                    (uint64_t)atop(seg_start), (uint64_t)atop(seg_end));
#endif
                uvm_page_physload(atop(seg_start), atop(seg_end),
                    atop(seg_start), atop(seg_end), VM_FREELIST_DEFAULT);
        }
}

#ifdef XEN
static void
x86_add_xen_clusters(void)
{
        if (hvm_start_info->memmap_entries > 0) {
                struct hvm_memmap_table_entry *map_entry;
                map_entry = (void *)((uintptr_t)hvm_start_info->memmap_paddr + KERNBASE);
                for (int i = 0; i < hvm_start_info->memmap_entries; i++) {
                        if (map_entry[i].size < PAGE_SIZE)
                                continue;
                        switch (map_entry[i].type) {
                        case XEN_HVM_MEMMAP_TYPE_RAM:
                                x86_add_cluster(map_entry[i].addr,
                                    map_entry[i].size, BIM_Memory);
                                break;
                        case XEN_HVM_MEMMAP_TYPE_ACPI:
                                x86_add_cluster(map_entry[i].addr,
                                    map_entry[i].size, BIM_ACPI);
                                break;
                        }
                }
        } else {
                struct xen_memory_map memmap;
                static struct _xen_mmap {
                        struct btinfo_memmap bim;
                        struct bi_memmap_entry map[128]; /* same as FreeBSD */
                } __packed xen_mmap;
                int err;

                memmap.nr_entries = 128;
                set_xen_guest_handle(memmap.buffer, &xen_mmap.bim.entry[0]);
                if ((err = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap))
                    < 0)
                        panic("XENMEM_memory_map %d", err);
                xen_mmap.bim.num = memmap.nr_entries;
                x86_parse_clusters(&xen_mmap.bim);
        }
}
#endif /* XEN */
/*
 * init_x86_clusters: retrieve the memory clusters provided by the BIOS, and
 * initialize mem_clusters.
 */
void
init_x86_clusters(void)
{
        struct btinfo_memmap *bim;
        struct btinfo_efimemmap *biem;

        /*
         * Check to see if we have a memory map from the BIOS (passed to us by
         * the boot program).
         */
#ifdef XEN
        if (vm_guest == VM_GUEST_XENPVH) {
                x86_add_xen_clusters();
        }
#endif /* XEN */

#ifdef i386
        extern int biosmem_implicit;
        biem = lookup_bootinfo(BTINFO_EFIMEMMAP);
        if (biem != NULL)
                bim = efi_get_e820memmap();
        else
                bim = lookup_bootinfo(BTINFO_MEMMAP);
        if ((biosmem_implicit || (biosbasemem == 0 && biosextmem == 0)) &&
            bim != NULL && bim->num > 0)
                x86_parse_clusters(bim);
#else
#if !defined(REALBASEMEM) && !defined(REALEXTMEM)
        biem = lookup_bootinfo(BTINFO_EFIMEMMAP);
        if (biem != NULL)
                bim = efi_get_e820memmap();
        else
                bim = lookup_bootinfo(BTINFO_MEMMAP);
        if (bim != NULL && bim->num > 0)
                x86_parse_clusters(bim);
#else
        (void)bim, (void)biem;
#endif
#endif

        if (mem_cluster_cnt == 0) {
                /*
                 * If x86_parse_clusters didn't find any valid segment, create
                 * fake clusters.
                 */
                x86_fake_clusters();
        }
}

/*
 * init_x86_vm: initialize the VM system on x86. We basically internalize as
 * many physical pages as we can, starting at lowmem_rsvd, but we don't
 * internalize the kernel physical pages (from pa_kstart to pa_kend).
 */
int
init_x86_vm(paddr_t pa_kend)
{
        extern struct bootspace bootspace;
        paddr_t pa_kstart = bootspace.head.pa;
        uint64_t seg_start, seg_end;
        uint64_t seg_start1, seg_end1;
        int x;
        unsigned i;

        for (i = 0; i < __arraycount(x86_freelists); i++) {
                if (avail_end < x86_freelists[i].limit)
                        x86_freelists[i].freelist = VM_FREELIST_DEFAULT;
        }

        /*
         * Now, load the memory clusters (which have already been rounded and
         * truncated) into the VM system.
         *
         * NOTE: we assume that memory starts at 0.
         */
        for (x = 0; x < mem_cluster_cnt; x++) {
                const phys_ram_seg_t *cluster = &mem_clusters[x];

                seg_start = cluster->start;
                seg_end = cluster->start + cluster->size;
                seg_start1 = 0;
                seg_end1 = 0;

#ifdef DEBUG_MEMLOAD
                printf("segment %" PRIx64 " - %" PRIx64 "\n",
                    seg_start, seg_end);
#endif

                /* Skip memory before our available starting point. */
                if (seg_end <= lowmem_rsvd) {
#ifdef DEBUG_MEMLOAD
                        printf("discard segment below starting point "
                            "%" PRIx64 " - %" PRIx64 "\n", seg_start, seg_end);
#endif
                        continue;
                }

                if (seg_start <= lowmem_rsvd && lowmem_rsvd < seg_end) {
                        seg_start = lowmem_rsvd;
                        if (seg_start == seg_end) {
#ifdef DEBUG_MEMLOAD
                                printf("discard segment below starting point "
                                    "%" PRIx64 " - %" PRIx64 "\n",
                                    seg_start, seg_end);


#endif
                                continue;
                        }
                }

                /*
                 * If this segment contains the kernel, split it in two, around
                 * the kernel.
                 *  [seg_start                       seg_end]
                 *             [pa_kstart  pa_kend]
                 */
                if (seg_start <= pa_kstart && pa_kend <= seg_end) {
#ifdef DEBUG_MEMLOAD
                        printf("split kernel overlapping to "
                            "%" PRIx64 " - %" PRIxPADDR " and "
                            "%" PRIxPADDR " - %" PRIx64 "\n",
                            seg_start, pa_kstart, pa_kend, seg_end);
#endif
                        seg_start1 = pa_kend;
                        seg_end1 = seg_end;
                        seg_end = pa_kstart;
                        KASSERT(seg_end < seg_end1);
                }

                /*
                 * Discard a segment inside the kernel
                 *  [pa_kstart                       pa_kend]
                 *             [seg_start  seg_end]
                 */
                if (pa_kstart < seg_start && seg_end < pa_kend) {
#ifdef DEBUG_MEMLOAD
                        printf("discard complete kernel overlap "
                            "%" PRIx64 " - %" PRIx64 "\n", seg_start, seg_end);
#endif
                        continue;
                }

                /*
                 * Discard leading hunk that overlaps the kernel
                 *  [pa_kstart             pa_kend]
                 *            [seg_start            seg_end]
                 */
                if (pa_kstart < seg_start &&
                    seg_start < pa_kend &&
                    pa_kend < seg_end) {
#ifdef DEBUG_MEMLOAD
                        printf("discard leading kernel overlap "
                            "%" PRIx64 " - %" PRIxPADDR "\n",
                            seg_start, pa_kend);
#endif
                        seg_start = pa_kend;
                }

                /*
                 * Discard trailing hunk that overlaps the kernel
                 *             [pa_kstart            pa_kend]
                 *  [seg_start              seg_end]
                 */
                if (seg_start < pa_kstart &&
                    pa_kstart < seg_end &&
                    seg_end < pa_kend) {
#ifdef DEBUG_MEMLOAD
                        printf("discard trailing kernel overlap "
                            "%" PRIxPADDR " - %" PRIx64 "\n",
                            pa_kstart, seg_end);
#endif
                        seg_end = pa_kstart;
                }

                /* First hunk */
                if (seg_start != seg_end) {
                        x86_load_region(seg_start, seg_end);
                }

                /* Second hunk */
                if (seg_start1 != seg_end1) {
                        x86_load_region(seg_start1, seg_end1);
                }
        }

        return 0;
}

#endif /* !XENPV */

void
init_x86_msgbuf(void)
{
        /* Message buffer is located at end of core. */
        psize_t sz = round_page(MSGBUFSIZE);
        psize_t reqsz = sz;
        uvm_physseg_t x;

search_again:
        for (x = uvm_physseg_get_first();
             uvm_physseg_valid_p(x);
             x = uvm_physseg_get_next(x)) {

                if (ctob(uvm_physseg_get_avail_end(x)) == avail_end)
                        break;
        }

        if (uvm_physseg_valid_p(x) == false)
                panic("init_x86_msgbuf: can't find end of memory");

        /* Shrink so it'll fit in the last segment. */
        if (uvm_physseg_get_avail_end(x) - uvm_physseg_get_avail_start(x) < atop(sz))
                sz = ctob(uvm_physseg_get_avail_end(x) - uvm_physseg_get_avail_start(x));

        msgbuf_p_seg[msgbuf_p_cnt].sz = sz;
        msgbuf_p_seg[msgbuf_p_cnt++].paddr = ctob(uvm_physseg_get_avail_end(x)) - sz;
        uvm_physseg_unplug(uvm_physseg_get_end(x) - atop(sz), atop(sz));

        /* Now find where the new avail_end is. */
        avail_end = ctob(uvm_physseg_get_highest_frame());

        if (sz == reqsz)
                return;

        reqsz -= sz;
        if (msgbuf_p_cnt == VM_PHYSSEG_MAX) {
                /* No more segments available, bail out. */
                printf("WARNING: MSGBUFSIZE (%zu) too large, using %zu.\n",
                    (size_t)MSGBUFSIZE, (size_t)(MSGBUFSIZE - reqsz));
                return;
        }

        sz = reqsz;
        goto search_again;
}

void
x86_reset(void)
{
        uint8_t b;

#if NACPICA > 0
        /*
         * If ACPI is active, try to reset using the reset register
         * defined in the FADT.
         */
        if (acpi_active) {
                if (acpi_reset() == 0) {
                        delay(500000); /* wait 0.5 sec to see if that did it */
                }
        }
#endif

        /*
         * The keyboard controller has 4 random output pins, one of which is
         * connected to the RESET pin on the CPU in many PCs.  We tell the
         * keyboard controller to pulse this line a couple of times.
         */
        outb(IO_KBD + KBCMDP, KBC_PULSE0);
        delay(100000);
        outb(IO_KBD + KBCMDP, KBC_PULSE0);
        delay(100000);

        /*
         * Attempt to force a reset via the Reset Control register at
         * I/O port 0xcf9.  Bit 2 forces a system reset when it
         * transitions from 0 to 1.  Bit 1 selects the type of reset
         * to attempt: 0 selects a "soft" reset, and 1 selects a
         * "hard" reset.  We try a "hard" reset.  The first write sets
         * bit 1 to select a "hard" reset and clears bit 2.  The
         * second write forces a 0 -> 1 transition in bit 2 to trigger
         * a reset.
         */
        outb(0xcf9, 0x2);
        outb(0xcf9, 0x6);
        DELAY(500000);        /* wait 0.5 sec to see if that did it */

        /*
         * Attempt to force a reset via the Fast A20 and Init register
         * at I/O port 0x92. Bit 1 serves as an alternate A20 gate.
         * Bit 0 asserts INIT# when set to 1. We are careful to only
         * preserve bit 1 while setting bit 0. We also must clear bit
         * 0 before setting it if it isn't already clear.
         */
        b = inb(0x92);
        if (b != 0xff) {
                if ((b & 0x1) != 0)
                        outb(0x92, b & 0xfe);
                outb(0x92, b | 0x1);
                DELAY(500000);        /* wait 0.5 sec to see if that did it */
        }
}

static int
x86_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        int result;

        result = KAUTH_RESULT_DEFER;

        switch (action) {
        case KAUTH_MACHDEP_IOPERM_GET:
                result = KAUTH_RESULT_ALLOW;
                break;

        case KAUTH_MACHDEP_LDT_GET:
        case KAUTH_MACHDEP_LDT_SET:
                if (x86_user_ldt_enabled) {
                        result = KAUTH_RESULT_ALLOW;
                }
                break;

        default:
                break;
        }

        return result;
}

void
machdep_init(void)
{

        x86_listener = kauth_listen_scope(KAUTH_SCOPE_MACHDEP,
            x86_listener_cb, NULL);
}

/*
 * x86_startup: x86 common startup routine
 *
 * called by cpu_startup.
 */

void
x86_startup(void)
{
#if !defined(XENPV)
        nmi_init();
#endif
}

const char *
get_booted_kernel(void)
{
        const struct btinfo_bootpath *bibp = lookup_bootinfo(BTINFO_BOOTPATH);
        return bibp ? bibp->bootpath : NULL;
}

/*
 * machine dependent system variables.
 */
static int
sysctl_machdep_booted_kernel(SYSCTLFN_ARGS)
{
        struct btinfo_bootpath *bibp;
        struct sysctlnode node;

        bibp = lookup_bootinfo(BTINFO_BOOTPATH);
        if (!bibp)
                return ENOENT; /* ??? */

        node = *rnode;
        node.sysctl_data = bibp->bootpath;
        node.sysctl_size = sizeof(bibp->bootpath);
        return sysctl_lookup(SYSCTLFN_CALL(&node));
}

static int
sysctl_machdep_bootmethod(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        char buf[5];

        node = *rnode;
        node.sysctl_data = buf;
        if (bootmethod_efi)
                memcpy(node.sysctl_data, "UEFI", 5);
        else
                memcpy(node.sysctl_data, "BIOS", 5);

        return sysctl_lookup(SYSCTLFN_CALL(&node));
}


static int
sysctl_machdep_diskinfo(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        extern struct bi_devmatch *x86_alldisks;
        extern int x86_ndisks;

        if (x86_alldisks == NULL)
                return EOPNOTSUPP;

        node = *rnode;
        node.sysctl_data = x86_alldisks;
        node.sysctl_size = sizeof(struct disklist) +
            (x86_ndisks - 1) * sizeof(struct nativedisk_info);
        return sysctl_lookup(SYSCTLFN_CALL(&node));
}

#ifndef XENPV
static int
sysctl_machdep_tsc_enable(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int error, val;

        val = *(int *)rnode->sysctl_data;

        node = *rnode;
        node.sysctl_data = &val;

        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error != 0 || newp == NULL)
                return error;

        if (val == 1) {
                tsc_user_enable();
        } else if (val == 0) {
                tsc_user_disable();
        } else {
                error = EINVAL;
        }
        if (error)
                return error;

        *(int *)rnode->sysctl_data = val;

        return 0;
}
#endif

static const char * const vm_guest_name[VM_LAST] = {
        [VM_GUEST_NO] =                "none",
        [VM_GUEST_VM] =                "generic",
        [VM_GUEST_XENPV] =        "XenPV",
        [VM_GUEST_XENPVH] =        "XenPVH",
        [VM_GUEST_XENHVM] =        "XenHVM",
        [VM_GUEST_XENPVHVM] =        "XenPVHVM",
        [VM_GUEST_HV] =                "Hyper-V",
        [VM_GUEST_VMWARE] =        "VMware",
        [VM_GUEST_KVM] =        "KVM",
        [VM_GUEST_VIRTUALBOX] =        "VirtualBox",
};

static int
sysctl_machdep_hypervisor(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        const char *t = NULL;
        char buf[64];

        node = *rnode;
        node.sysctl_data = buf;
        if (vm_guest >= VM_GUEST_NO && vm_guest < VM_LAST)
                t = vm_guest_name[vm_guest];
        if (t == NULL)
                t = "unknown";
        strlcpy(buf, t, sizeof(buf));
        return sysctl_lookup(SYSCTLFN_CALL(&node));
}

static void
const_sysctl(struct sysctllog **clog, const char *name, int type,
    u_quad_t value, int tag)
{
        (sysctl_createv)(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
                       type, name, NULL, NULL, value, NULL, 0,
                       CTL_MACHDEP, tag, CTL_EOL);
}

SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup")
{
        extern uint64_t tsc_freq;
#ifndef XENPV
        extern int tsc_user_enabled;
#endif
        extern int sparse_dump;

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "machdep", NULL,
                       NULL, 0, NULL, 0,
                       CTL_MACHDEP, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "console_device", NULL,
                       sysctl_consdev, 0, NULL, sizeof(dev_t),
                       CTL_MACHDEP, CPU_CONSDEV, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "booted_kernel", NULL,
                       sysctl_machdep_booted_kernel, 0, NULL, 0,
                       CTL_MACHDEP, CPU_BOOTED_KERNEL, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "bootmethod", NULL,
                       sysctl_machdep_bootmethod, 0, NULL, 0,
                       CTL_MACHDEP, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "diskinfo", NULL,
                       sysctl_machdep_diskinfo, 0, NULL, 0,
                       CTL_MACHDEP, CPU_DISKINFO, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "cpu_brand", NULL,
                       NULL, 0, cpu_brand_string, 0,
                       CTL_MACHDEP, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "sparse_dump", NULL,
                       NULL, 0, &sparse_dump, 0,
                       CTL_MACHDEP, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_QUAD, "tsc_freq", NULL,
                       NULL, 0, &tsc_freq, 0,
                       CTL_MACHDEP, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "pae",
                       SYSCTL_DESCR("Whether the kernel uses PAE"),
                       NULL, 0, &use_pae, 0,
                       CTL_MACHDEP, CTL_CREATE, CTL_EOL);
#ifndef XENPV
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_READWRITE,
                       CTLTYPE_INT, "tsc_user_enable",
                       SYSCTL_DESCR("RDTSC instruction enabled in usermode"),
                       sysctl_machdep_tsc_enable, 0, &tsc_user_enabled, 0,
                       CTL_MACHDEP, CTL_CREATE, CTL_EOL);
#endif
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "hypervisor", NULL,
                       sysctl_machdep_hypervisor, 0, NULL, 0,
                       CTL_MACHDEP, CTL_CREATE, CTL_EOL);
#ifdef SVS
        const struct sysctlnode *svs_rnode = NULL;
        sysctl_createv(clog, 0, NULL, &svs_rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "svs", NULL,
                       NULL, 0, NULL, 0,
                       CTL_MACHDEP, CTL_CREATE);
        sysctl_createv(clog, 0, &svs_rnode, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_BOOL, "enabled",
                       SYSCTL_DESCR("Whether the kernel uses SVS"),
                       NULL, 0, &svs_enabled, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &svs_rnode, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_BOOL, "pcid",
                       SYSCTL_DESCR("Whether SVS uses PCID"),
                       NULL, 0, &svs_pcid, 0,
                       CTL_CREATE, CTL_EOL);
#endif

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_READWRITE,
                       CTLTYPE_BOOL, "user_ldt",
                       SYSCTL_DESCR("Whether USER_LDT is enabled"),
                       NULL, 0, &x86_user_ldt_enabled, 0,
                       CTL_MACHDEP, CTL_CREATE, CTL_EOL);

#ifndef XENPV
        void sysctl_speculation_init(struct sysctllog **);
        sysctl_speculation_init(clog);
#endif

        /* None of these can ever change once the system has booted */
        const_sysctl(clog, "fpu_present", CTLTYPE_INT, i386_fpu_present,
            CPU_FPU_PRESENT);
        const_sysctl(clog, "osfxsr", CTLTYPE_INT, i386_use_fxsave,
            CPU_OSFXSR);
        const_sysctl(clog, "sse", CTLTYPE_INT, i386_has_sse,
            CPU_SSE);
        const_sysctl(clog, "sse2", CTLTYPE_INT, i386_has_sse2,
            CPU_SSE2);

        const_sysctl(clog, "fpu_save", CTLTYPE_INT, x86_fpu_save,
            CPU_FPU_SAVE);
        const_sysctl(clog, "fpu_save_size", CTLTYPE_INT, x86_fpu_save_size,
            CPU_FPU_SAVE_SIZE);
        const_sysctl(clog, "xsave_features", CTLTYPE_QUAD, x86_xsave_features,
            CPU_XSAVE_FEATURES);

#ifndef XENPV
        const_sysctl(clog, "biosbasemem", CTLTYPE_INT, biosbasemem,
            CPU_BIOSBASEMEM);
        const_sysctl(clog, "biosextmem", CTLTYPE_INT, biosextmem,
            CPU_BIOSEXTMEM);
#endif
}

/* Here for want of a better place */
#if defined(DOM0OPS) || !defined(XENPV)
struct pic *
intr_findpic(int num)
{
#if NIOAPIC > 0
        struct ioapic_softc *pic;

        pic = ioapic_find_bybase(num);
        if (pic != NULL)
                return &pic->sc_pic;
#endif
        if (num < NUM_LEGACY_IRQS)
                return &i8259_pic;

        return NULL;
}
#endif

void
cpu_initclocks(void)
{

        /*
         * Re-calibrate TSC on boot CPU using most accurate time source,
         * thus making accurate TSC available for x86_initclock_func().
         */
        cpu_get_tsc_freq(curcpu());

        /* Now start the clocks on this CPU (the boot CPU). */
        (*x86_initclock_func)();
}

int
x86_cpu_is_lcall(const void *ip)
{
        static const uint8_t lcall[] = { 0x9a, 0, 0, 0, 0 };
        int error;
        const size_t sz = sizeof(lcall) + 2;
        uint8_t tmp[sizeof(lcall) + 2];

        if ((error = copyin(ip, tmp, sz)) != 0)
                return error;

        if (memcmp(tmp, lcall, sizeof(lcall)) != 0 || tmp[sz - 1] != 0)
                return EINVAL;

        switch (tmp[sz - 2]) {
        case (uint8_t)0x07: /* NetBSD */
        case (uint8_t)0x87: /* BSD/OS */
                return 0;
        default:
                return EINVAL;
        }
}



























































































































































































































































































































































































































































































































































































































    9 



   10 
    9 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
/* $NetBSD: acpi_machdep.c,v 1.33 2022/08/20 23:48:50 riastradh Exp $ */

/*
 * Copyright 2001 Wasabi Systems, Inc.
 * All rights reserved.
 *
 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed for the NetBSD Project by
 *        Wasabi Systems, Inc.
 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
 *    or promote products derived from this software without specific prior
 *    written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Machine-dependent routines for ACPICA.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: acpi_machdep.c,v 1.33 2022/08/20 23:48:50 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/cpu.h>
#include <sys/device.h>

#include <uvm/uvm_extern.h>

#include <machine/cpufunc.h>
#include <machine/bootinfo.h>
#include <machine/autoconf.h>

#include <dev/acpi/acpica.h>
#include <dev/acpi/acpivar.h>
#include <dev/acpi/acpi_mcfg.h>

#include <machine/acpi_machdep.h>
#include <machine/mpbiosvar.h>
#include <machine/mpacpi.h>
#include <machine/i82093reg.h>
#include <machine/i82093var.h>
#include <machine/pic.h>
#include <machine/pmap_private.h>

#include <x86/efi.h>

#include <dev/pci/pcivar.h>

#include <dev/isa/isareg.h>
#include <dev/isa/isavar.h>

#include "ioapic.h"

#include "acpica.h"
#include "opt_mpbios.h"
#include "opt_acpi.h"
#include "opt_vga.h"

#ifdef XEN
#include <xen/hypervisor.h>
#endif

/*
 * Default VBIOS reset method for non-HW accelerated VGA drivers.
 */
#ifdef VGA_POST
# define VBIOS_RESET_DEFAULT        2
#else
# define VBIOS_RESET_DEFAULT        1
#endif

ACPI_STATUS
acpi_md_OsInitialize(void)
{
        return AE_OK;
}

ACPI_PHYSICAL_ADDRESS
acpi_md_OsGetRootPointer(void)
{
        ACPI_PHYSICAL_ADDRESS PhysicalAddress;
        ACPI_STATUS Status;

#ifdef XENPV
        /*
         * Obtain the ACPI RSDP from the hypervisor. 
         * This is the only way to go if Xen booted from EFI: the 
         * Extended BIOS Data Area (EBDA) is not mapped, and Xen 
         * does not pass an EFI SystemTable to the kernel.
         */
        struct xen_platform_op op = {
                .cmd = XENPF_firmware_info,
                .u.firmware_info = {
                        .type = XEN_FW_EFI_INFO,  
                        .index = XEN_FW_EFI_CONFIG_TABLE
                }
        };
        union xenpf_efi_info *info = &op.u.firmware_info.u.efi_info;

        if (HYPERVISOR_platform_op(&op) == 0) {
                struct efi_cfgtbl *ct;
                int i;

                ct = AcpiOsMapMemory(info->cfg.addr, 
                    sizeof(*ct) * info->cfg.nent);

                for (i = 0; i < info->cfg.nent; i++) {
                        if (memcmp(&ct[i].ct_uuid,
                            &EFI_UUID_ACPI20, sizeof(EFI_UUID_ACPI20)) == 0) {
                                PhysicalAddress = (ACPI_PHYSICAL_ADDRESS)
                                    (uintptr_t)ct[i].ct_data;
                                if (PhysicalAddress)
                                        goto out;
                                        
                        }
                }

                for (i = 0; i < info->cfg.nent; i++) {
                        if (memcmp(&ct[i].ct_uuid,
                            &EFI_UUID_ACPI10, sizeof(EFI_UUID_ACPI10)) == 0) {
                                PhysicalAddress = (ACPI_PHYSICAL_ADDRESS)
                                    (uintptr_t)ct[i].ct_data;
                                if (PhysicalAddress)
                                        goto out;
                                        
                        }
                }
out:
                AcpiOsUnmapMemory(ct, sizeof(*ct) * info->cfg.nent);

                if (PhysicalAddress)
                        return PhysicalAddress;
        }
#else
#ifdef XEN
        if (vm_guest == VM_GUEST_XENPVH) {
                PhysicalAddress = hvm_start_info->rsdp_paddr;
                if (PhysicalAddress)
                        return PhysicalAddress;
        }
#endif
        /* 
         * Get the ACPI RSDP from EFI SystemTable. This works when the 
         * kernel was loaded from EFI bootloader.
         */
        if (efi_probe()) {
                PhysicalAddress = efi_getcfgtblpa(&EFI_UUID_ACPI20);
                if (!PhysicalAddress)
                        PhysicalAddress = efi_getcfgtblpa(&EFI_UUID_ACPI10);
                if (PhysicalAddress)
                        return PhysicalAddress;
        }

#endif
        /*
         * Find ACPI RSDP from Extended BIOS Data Area (EBDA). This
         * works when the kernel was started from BIOS bootloader,
         * or for Xen PV when Xen was started from BIOS bootloader.
         */
        Status = AcpiFindRootPointer(&PhysicalAddress);
        if (ACPI_FAILURE(Status))
                PhysicalAddress = 0;

        return PhysicalAddress;
}

struct acpi_md_override {
        int irq;
        int pin;
        int flags;
};

#if NIOAPIC > 0
static ACPI_STATUS
acpi_md_findoverride(ACPI_SUBTABLE_HEADER *hdrp, void *aux)
{
        ACPI_MADT_INTERRUPT_OVERRIDE *iop;
        struct acpi_md_override *ovrp;

        if (hdrp->Type != ACPI_MADT_TYPE_INTERRUPT_OVERRIDE) {
                return AE_OK;
        }

        iop = (void *)hdrp;
        ovrp = aux;
        if (iop->SourceIrq == ovrp->irq) {
                ovrp->pin = iop->GlobalIrq;
                ovrp->flags = iop->IntiFlags;
        }
        return AE_OK;
}
#endif

ACPI_STATUS
acpi_md_OsInstallInterruptHandler(uint32_t InterruptNumber,
    ACPI_OSD_HANDLER ServiceRoutine, void *Context, void **cookiep,
    const char *xname)
{
        void *ih;

        ih = acpi_md_intr_establish(InterruptNumber, IPL_TTY, IST_LEVEL,
            (int (*)(void *))ServiceRoutine, Context, false, xname);
        if (ih == NULL)
                return AE_NO_MEMORY;

        *cookiep = ih;

        return AE_OK;
}

void
acpi_md_OsRemoveInterruptHandler(void *cookie)
{
        intr_disestablish(cookie);
}

void *
acpi_md_intr_establish(uint32_t InterruptNumber, int ipl, int type,
    int (*handler)(void *), void *arg, bool mpsafe, const char *xname)
{
        void *ih;
        struct pic *pic;
        int irq = InterruptNumber, pin;
#if NIOAPIC > 0
        struct ioapic_softc *ioapic;
        struct acpi_md_override ovr;
        struct mp_intr_map tmpmap, *mip, **mipp = NULL;
        intr_handle_t mpih;
        int redir, mpflags;

        /*
         * ACPI interrupts default to level-triggered active-low.
         */

        mpflags = (MPS_INTTR_LEVEL << 2) | MPS_INTPO_ACTLO;
        redir = IOAPIC_REDLO_LEVEL | IOAPIC_REDLO_ACTLO;

        /*
         * Apply any MADT override setting.
         */

        ovr.irq = irq;
        ovr.pin = -1;
        if (acpi_madt_map() == AE_OK) {
                acpi_madt_walk(acpi_md_findoverride, &ovr);
                acpi_madt_unmap();
        } else {
                aprint_debug("acpi_madt_map() failed, can't check for MADT override\n");
        }

        if (ovr.pin != -1) {
                bool sci = irq == AcpiGbl_FADT.SciInterrupt;
                int polarity = ovr.flags & ACPI_MADT_POLARITY_MASK;
                int trigger = ovr.flags & ACPI_MADT_TRIGGER_MASK;

                irq = ovr.pin;
                if (polarity == ACPI_MADT_POLARITY_ACTIVE_HIGH ||
                    (!sci && polarity == ACPI_MADT_POLARITY_CONFORMS)) {
                        mpflags &= ~MPS_INTPO_ACTLO;
                        mpflags |= MPS_INTPO_ACTHI;
                        redir &= ~IOAPIC_REDLO_ACTLO;
                }
                if (trigger == ACPI_MADT_TRIGGER_EDGE ||
                    (!sci && trigger == ACPI_MADT_TRIGGER_CONFORMS)) {
                        type = IST_EDGE;
                        mpflags &= ~(MPS_INTTR_LEVEL << 2);
                        mpflags |= (MPS_INTTR_EDGE << 2);
                        redir &= ~IOAPIC_REDLO_LEVEL;
                }
        }

        pic = NULL;
        pin = irq;

        /*
         * If the interrupt is handled via IOAPIC, update the map.
         * If the map isn't set up yet, install a temporary one.
         * Identify ISA & EISA interrupts
         */
        if (mp_busses != NULL) {
                if (intr_find_mpmapping(mp_isa_bus, irq, &mpih) == 0 ||
                    intr_find_mpmapping(mp_eisa_bus, irq, &mpih) == 0) {
                        if (!APIC_IRQ_ISLEGACY(mpih)) {
                                pin = APIC_IRQ_PIN(mpih);
                                ioapic = ioapic_find(APIC_IRQ_APIC(mpih));
                                if (ioapic != NULL)
                                        pic = &ioapic->sc_pic;
                        }
                }
        }

        if (pic == NULL) {
                /*
                 * If the interrupt is handled via IOAPIC, update the map.
                 * If the map isn't set up yet, install a temporary one.
                 */
                ioapic = ioapic_find_bybase(irq);
                if (ioapic != NULL) {
                        pic = &ioapic->sc_pic;
 
                        if (pic->pic_type == PIC_IOAPIC) {
                                pin = irq - pic->pic_vecbase;
                                irq = -1;
                        } else {
                                pin = irq;
                        }
 
                        mip = ioapic->sc_pins[pin].ip_map;
                        if (mip) {
                                mip->flags &= ~0xf;
                                mip->flags |= mpflags;
                                mip->redir &= ~(IOAPIC_REDLO_LEVEL |
                                                IOAPIC_REDLO_ACTLO);
                                mip->redir |= redir;
                        } else {
                                mipp = &ioapic->sc_pins[pin].ip_map;
                                *mipp = &tmpmap;
                                tmpmap.redir = redir;
                                tmpmap.flags = mpflags;
                        }
                }
        }
 
        if (pic == NULL)
#endif
        {
                pic = &i8259_pic;
                pin = irq;
        }

        ih = intr_establish_xname(irq, pic, pin, type, ipl,
            handler, arg, mpsafe, xname);

#if NIOAPIC > 0
        if (mipp) {
                *mipp = NULL;
        }
#endif

        return ih;
}

void
acpi_md_intr_mask(void *ih)
{
        intr_mask(ih);
}

void
acpi_md_intr_unmask(void *ih)
{
        intr_unmask(ih);
}

void
acpi_md_intr_disestablish(void *ih)
{
        intr_disestablish(ih);
}

ACPI_STATUS
acpi_md_OsMapMemory(ACPI_PHYSICAL_ADDRESS PhysicalAddress,
    uint32_t Length, void **LogicalAddress)
{
        int rv;

        rv = _x86_memio_map(x86_bus_space_mem, PhysicalAddress,
            Length, 0, (bus_space_handle_t *)LogicalAddress);

        return (rv != 0) ? AE_NO_MEMORY : AE_OK;
}

void
acpi_md_OsUnmapMemory(void *LogicalAddress, uint32_t Length)
{
        (void) _x86_memio_unmap(x86_bus_space_mem,
            (bus_space_handle_t)LogicalAddress, Length, NULL);
}

ACPI_STATUS
acpi_md_OsGetPhysicalAddress(void *LogicalAddress,
    ACPI_PHYSICAL_ADDRESS *PhysicalAddress)
{
        paddr_t pa;

        if (pmap_extract(pmap_kernel(), (vaddr_t) LogicalAddress, &pa)) {
                *PhysicalAddress = pa;
                return AE_OK;
        }

        return AE_ERROR;
}

BOOLEAN
acpi_md_OsReadable(void *Pointer, uint32_t Length)
{
        BOOLEAN rv = TRUE;
        vaddr_t sva, eva;
        pt_entry_t *pte;

        sva = trunc_page((vaddr_t) Pointer);
        eva = round_page((vaddr_t) Pointer + Length);

        if (sva < VM_MIN_KERNEL_ADDRESS)
                return FALSE;

        for (; sva < eva; sva += PAGE_SIZE) {
                pte = kvtopte(sva);
                if ((*pte & PTE_P) == 0) {
                        rv = FALSE;
                        break;
                }
        }

        return rv;
}

BOOLEAN
acpi_md_OsWritable(void *Pointer, uint32_t Length)
{
        BOOLEAN rv = TRUE;
        vaddr_t sva, eva;
        pt_entry_t *pte;

        sva = trunc_page((vaddr_t) Pointer);
        eva = round_page((vaddr_t) Pointer + Length);

        if (sva < VM_MIN_KERNEL_ADDRESS)
                return FALSE;

        for (; sva < eva; sva += PAGE_SIZE) {
                pte = kvtopte(sva);
                if ((*pte & (PTE_P|PTE_W)) != (PTE_P|PTE_W)) {
                        rv = FALSE;
                        break;
                }
        }

        return rv;
}

void
acpi_md_OsDisableInterrupt(void)
{
        x86_disable_intr();
}

void
acpi_md_OsEnableInterrupt(void)
{
        x86_enable_intr();
}

uint32_t
acpi_md_ncpus(void)
{
        return kcpuset_countset(kcpuset_attached);
}

static bool
acpi_md_mcfg_validate(uint64_t addr, int bus_start, int *bus_end)
{
        struct btinfo_memmap *bim;
        uint64_t size, mapaddr, mapsize;
        uint32_t type;
        int i, n;

#ifndef XENPV
        if (lookup_bootinfo(BTINFO_EFIMEMMAP) != NULL)
                bim = efi_get_e820memmap();
        else
#endif
                bim = lookup_bootinfo(BTINFO_MEMMAP);
        if (bim == NULL)
                return false;

        size = *bus_end - bus_start + 1;
        size *= ACPIMCFG_SIZE_PER_BUS;
        for (i = 0; i < bim->num; i++) {
                mapaddr = bim->entry[i].addr;
                mapsize = bim->entry[i].size;
                type = bim->entry[i].type;

                aprint_debug("MCFG: MEMMAP: 0x%016" PRIx64
                    "-0x%016" PRIx64 ", size=0x%016" PRIx64
                    ", type=%d(%s)\n",
                    mapaddr, mapaddr + mapsize - 1, mapsize, type,
                    (type == BIM_Memory) ?  "Memory" :
                    (type == BIM_Reserved) ?  "Reserved" :
                    (type == BIM_ACPI) ? "ACPI" :
                    (type == BIM_NVS) ? "NVS" :
                    (type == BIM_PMEM) ? "Persistent" :
                    (type == BIM_PRAM) ? "Persistent (Legacy)" :
                    "unknown");

                switch (type) {
                case BIM_ACPI:
                case BIM_Reserved:
                        if (addr < mapaddr || addr >= mapaddr + mapsize)
                                break;

                        /* full map */
                        if (addr + size <= mapaddr + mapsize)
                                return true;

                        /* partial map */
                        n = (mapsize - (addr - mapaddr)) /
                            ACPIMCFG_SIZE_PER_BUS;
                        /* bus_start == bus_end is not allowed. */
                        if (n > 1) {
                                *bus_end = bus_start + n - 1;
                                return true;
                        }
                        aprint_debug("MCFG: bus %d-%d, address 0x%016" PRIx64
                            ": invalid size: request 0x%016" PRIx64 ", "
                            "actual 0x%016" PRIx64 "\n",
                            bus_start, *bus_end, addr, size, mapsize);
                        break;
                }
        }
        aprint_debug("MCFG: bus %d-%d, address 0x%016" PRIx64 ": "
            "no valid region\n", bus_start, *bus_end, addr);
        return false;
}

static uint32_t
acpi_md_mcfg_read(bus_space_tag_t bst, bus_space_handle_t bsh, bus_addr_t addr)
{
        vaddr_t va = bsh + addr;
        uint32_t data = (uint32_t) -1;

        KASSERT(bst == x86_bus_space_mem);

        __asm("movl %1, %0" : "=a" (data) : "m" (*(volatile uint32_t *)va));

        return data;
}

static void
acpi_md_mcfg_write(bus_space_tag_t bst, bus_space_handle_t bsh, bus_addr_t addr,
    uint32_t data)
{
        vaddr_t va = bsh + addr;

        KASSERT(bst == x86_bus_space_mem);

        __asm("movl %1, %0" : "=m" (*(volatile uint32_t *)va) : "a" (data));
}

static const struct acpimcfg_ops acpi_md_mcfg_ops = {
        .ao_validate = acpi_md_mcfg_validate,

        .ao_read = acpi_md_mcfg_read,
        .ao_write = acpi_md_mcfg_write,
};

void
acpi_md_callback(struct acpi_softc *sc)
{
#ifdef MPBIOS
        if (!mpbios_scanned)
#endif
        mpacpi_find_interrupts(sc);

#ifndef XENPV
        acpi_md_sleep_init();
#endif

        acpimcfg_init(x86_bus_space_mem, &acpi_md_mcfg_ops);
}

#ifndef XENPV
void
device_acpi_register(device_t dev, void *aux)
{
        device_t parent;
        bool device_is_vga, device_is_pci, device_is_isa;

        parent = device_parent(dev);
        if (parent == NULL)
                return;

        device_is_vga = device_is_a(dev, "vga") || device_is_a(dev, "genfb");
        device_is_pci = device_is_a(parent, "pci");
        device_is_isa = device_is_a(parent, "isa");

        if (device_is_vga && (device_is_pci || device_is_isa)) {
                extern int acpi_md_vbios_reset;

                acpi_md_vbios_reset = VBIOS_RESET_DEFAULT;
        }
}
#endif




















































































































































































































































































































































































































































    1 





    1 

















    1 


























































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
/*-
 * Copyright (c) 2020 Mindaugas Rasiukevicius <rmind at noxt eu>
 * Copyright (c) 2009-2015 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This material is based upon work partially supported by The
 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * NPF ruleset module.
 */

#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_ruleset.c,v 1.51 2020/05/30 14:16:56 rmind Exp $");

#include <sys/param.h>
#include <sys/types.h>

#include <sys/atomic.h>
#include <sys/kmem.h>
#include <sys/queue.h>
#include <sys/mbuf.h>
#include <sys/types.h>

#include <net/bpf.h>
#include <net/bpfjit.h>
#include <net/pfil.h>
#include <net/if.h>
#endif

#include "npf_impl.h"

struct npf_ruleset {
        /*
         * - List of all rules.
         * - Dynamic (i.e. named) rules.
         * - G/C list for convenience.
         */
        LIST_HEAD(, npf_rule)        rs_all;
        LIST_HEAD(, npf_rule)        rs_dynamic;
        LIST_HEAD(, npf_rule)        rs_gc;

        /* Unique ID counter. */
        uint64_t                rs_idcnt;

        /* Number of array slots and active rules. */
        unsigned                rs_slots;
        unsigned                rs_nitems;

        /* Array of ordered rules. */
        npf_rule_t *                rs_rules[];
};

struct npf_rule {
        /* Attributes, interface and skip slot. */
        uint32_t                r_attr;
        unsigned                r_ifid;
        unsigned                r_skip_to;

        /* Code to process, if any. */
        int                        r_type;
        bpfjit_func_t                r_jcode;
        void *                        r_code;
        unsigned                r_clen;

        /* NAT policy (optional), rule procedure and subset. */
        npf_natpolicy_t *        r_natp;
        npf_rproc_t *                r_rproc;

        union {
                /*
                 * Dynamic group: rule subset and a group list entry.
                 */
                struct {
                        npf_rule_t *                r_subset;
                        LIST_ENTRY(npf_rule)        r_dentry;
                };

                /*
                 * Dynamic rule: priority, parent group and next rule.
                 */
                struct {
                        int                        r_priority;
                        npf_rule_t *                r_parent;
                        npf_rule_t *                r_next;
                };
        };

        /* Rule ID, name and the optional key. */
        uint64_t                r_id;
        char                        r_name[NPF_RULE_MAXNAMELEN];
        uint8_t                        r_key[NPF_RULE_MAXKEYLEN];

        /* All-list entry and the auxiliary info. */
        LIST_ENTRY(npf_rule)        r_aentry;
        nvlist_t *                r_info;
        size_t                        r_info_len;
};

#define        SKIPTO_ADJ_FLAG                (1U << 31)
#define        SKIPTO_MASK                (SKIPTO_ADJ_FLAG - 1)

static nvlist_t *        npf_rule_export(npf_t *, const npf_rule_t *);

/*
 * Private attributes - must be in the NPF_RULE_PRIVMASK range.
 */
#define        NPF_RULE_KEEPNAT        (0x01000000 & NPF_RULE_PRIVMASK)

#define        NPF_DYNAMIC_GROUP_P(attr) \
    (((attr) & NPF_DYNAMIC_GROUP) == NPF_DYNAMIC_GROUP)

#define        NPF_DYNAMIC_RULE_P(attr) \
    (((attr) & NPF_DYNAMIC_GROUP) == NPF_RULE_DYNAMIC)

npf_ruleset_t *
npf_ruleset_create(size_t slots)
{
        size_t len = offsetof(npf_ruleset_t, rs_rules[slots]);
        npf_ruleset_t *rlset;

        rlset = kmem_zalloc(len, KM_SLEEP);
        LIST_INIT(&rlset->rs_dynamic);
        LIST_INIT(&rlset->rs_all);
        LIST_INIT(&rlset->rs_gc);
        rlset->rs_slots = slots;

        return rlset;
}

void
npf_ruleset_destroy(npf_ruleset_t *rlset)
{
        size_t len = offsetof(npf_ruleset_t, rs_rules[rlset->rs_slots]);
        npf_rule_t *rl;

        while ((rl = LIST_FIRST(&rlset->rs_all)) != NULL) {
                if (NPF_DYNAMIC_GROUP_P(rl->r_attr)) {
                        /*
                         * Note: r_subset may point to the rules which
                         * were inherited by a new ruleset.
                         */
                        rl->r_subset = NULL;
                        LIST_REMOVE(rl, r_dentry);
                }
                if (NPF_DYNAMIC_RULE_P(rl->r_attr)) {
                        /* Not removing from r_subset, see above. */
                        KASSERT(rl->r_parent != NULL);
                }
                LIST_REMOVE(rl, r_aentry);
                npf_rule_free(rl);
        }
        KASSERT(LIST_EMPTY(&rlset->rs_dynamic));

        npf_ruleset_gc(rlset);
        KASSERT(LIST_EMPTY(&rlset->rs_gc));
        kmem_free(rlset, len);
}

/*
 * npf_ruleset_insert: insert the rule into the specified ruleset.
 */
void
npf_ruleset_insert(npf_ruleset_t *rlset, npf_rule_t *rl)
{
        unsigned n = rlset->rs_nitems;

        KASSERT(n < rlset->rs_slots);

        LIST_INSERT_HEAD(&rlset->rs_all, rl, r_aentry);
        if (NPF_DYNAMIC_GROUP_P(rl->r_attr)) {
                LIST_INSERT_HEAD(&rlset->rs_dynamic, rl, r_dentry);
        } else {
                KASSERTMSG(rl->r_parent == NULL, "cannot be dynamic rule");
                rl->r_attr &= ~NPF_RULE_DYNAMIC;
        }

        rlset->rs_rules[n] = rl;
        rlset->rs_nitems++;
        rl->r_id = ++rlset->rs_idcnt;

        if (rl->r_skip_to < ++n) {
                rl->r_skip_to = SKIPTO_ADJ_FLAG | n;
        }
}

npf_rule_t *
npf_ruleset_lookup(npf_ruleset_t *rlset, const char *name)
{
        npf_rule_t *rl;

        LIST_FOREACH(rl, &rlset->rs_dynamic, r_dentry) {
                KASSERT(NPF_DYNAMIC_GROUP_P(rl->r_attr));
                if (strncmp(rl->r_name, name, NPF_RULE_MAXNAMELEN) == 0)
                        break;
        }
        return rl;
}

/*
 * npf_ruleset_add: insert dynamic rule into the (active) ruleset.
 */
int
npf_ruleset_add(npf_ruleset_t *rlset, const char *rname, npf_rule_t *rl)
{
        npf_rule_t *rg, *it, *target;
        int priocmd;

        if (!NPF_DYNAMIC_RULE_P(rl->r_attr)) {
                return EINVAL;
        }
        rg = npf_ruleset_lookup(rlset, rname);
        if (rg == NULL) {
                return ESRCH;
        }

        /* Dynamic rule - assign a unique ID and save the parent. */
        rl->r_id = ++rlset->rs_idcnt;
        rl->r_parent = rg;

        /*
         * Rule priority: (highest) 1, 2 ... n (lowest).
         * Negative priority indicates an operation and is reset to zero.
         */
        if ((priocmd = rl->r_priority) < 0) {
                rl->r_priority = 0;
        }

        /*
         * WARNING: once rg->subset or target->r_next of an *active*
         * rule is set, then our rule becomes globally visible and active.
         * Must issue a load fence to ensure rl->r_next visibility first.
         */
        switch (priocmd) {
        case NPF_PRI_LAST:
        default:
                target = NULL;
                it = rg->r_subset;
                while (it && it->r_priority <= rl->r_priority) {
                        target = it;
                        it = it->r_next;
                }
                if (target) {
                        atomic_store_relaxed(&rl->r_next, target->r_next);
                        membar_producer();
                        atomic_store_relaxed(&target->r_next, rl);
                        break;
                }
                /* FALLTHROUGH */

        case NPF_PRI_FIRST:
                atomic_store_relaxed(&rl->r_next, rg->r_subset);
                membar_producer();
                atomic_store_relaxed(&rg->r_subset, rl);
                break;
        }

        /* Finally, add into the all-list. */
        LIST_INSERT_HEAD(&rlset->rs_all, rl, r_aentry);
        return 0;
}

static void
npf_ruleset_unlink(npf_rule_t *rl, npf_rule_t *prev)
{
        KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
        if (prev) {
                prev->r_next = rl->r_next;
        } else {
                npf_rule_t *rg = rl->r_parent;
                rg->r_subset = rl->r_next;
        }
        LIST_REMOVE(rl, r_aentry);
}

/*
 * npf_ruleset_remove: remove the dynamic rule given the rule ID.
 */
int
npf_ruleset_remove(npf_ruleset_t *rlset, const char *rname, uint64_t id)
{
        npf_rule_t *rg, *prev = NULL;

        if ((rg = npf_ruleset_lookup(rlset, rname)) == NULL) {
                return ESRCH;
        }
        for (npf_rule_t *rl = rg->r_subset; rl; rl = rl->r_next) {
                KASSERT(rl->r_parent == rg);
                KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));

                /* Compare ID.  On match, remove and return. */
                if (rl->r_id == id) {
                        npf_ruleset_unlink(rl, prev);
                        LIST_INSERT_HEAD(&rlset->rs_gc, rl, r_aentry);
                        return 0;
                }
                prev = rl;
        }
        return ENOENT;
}

/*
 * npf_ruleset_remkey: remove the dynamic rule given the rule key.
 */
int
npf_ruleset_remkey(npf_ruleset_t *rlset, const char *rname,
    const void *key, size_t len)
{
        npf_rule_t *rg, *rlast = NULL, *prev = NULL, *lastprev = NULL;

        KASSERT(len && len <= NPF_RULE_MAXKEYLEN);

        if ((rg = npf_ruleset_lookup(rlset, rname)) == NULL) {
                return ESRCH;
        }

        /* Compare the key and find the last in the list. */
        for (npf_rule_t *rl = rg->r_subset; rl; rl = rl->r_next) {
                KASSERT(rl->r_parent == rg);
                KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
                if (memcmp(rl->r_key, key, len) == 0) {
                        lastprev = prev;
                        rlast = rl;
                }
                prev = rl;
        }
        if (!rlast) {
                return ENOENT;
        }
        npf_ruleset_unlink(rlast, lastprev);
        LIST_INSERT_HEAD(&rlset->rs_gc, rlast, r_aentry);
        return 0;
}

/*
 * npf_ruleset_list: serialise and return the dynamic rules.
 */
int
npf_ruleset_list(npf_t *npf, npf_ruleset_t *rlset, const char *rname,
    nvlist_t *rlset_nvl)
{
        const npf_rule_t *rg;

        KASSERT(npf_config_locked_p(npf));

        if ((rg = npf_ruleset_lookup(rlset, rname)) == NULL) {
                return ESRCH;
        }
        for (const npf_rule_t *rl = rg->r_subset; rl; rl = rl->r_next) {
                nvlist_t *rule;

                KASSERT(rl->r_parent == rg);
                KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));

                if ((rule = npf_rule_export(npf, rl)) == NULL) {
                        return ENOMEM;
                }
                nvlist_append_nvlist_array(rlset_nvl, "rules", rule);
                nvlist_destroy(rule);
        }
        return 0;
}

/*
 * npf_ruleset_flush: flush the dynamic rules in the ruleset by inserting
 * them into the G/C list.
 */
int
npf_ruleset_flush(npf_ruleset_t *rlset, const char *rname)
{
        npf_rule_t *rg, *rl;

        if ((rg = npf_ruleset_lookup(rlset, rname)) == NULL) {
                return ESRCH;
        }

        rl = atomic_swap_ptr(&rg->r_subset, NULL);
        membar_producer();

        while (rl) {
                KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
                KASSERT(rl->r_parent == rg);

                LIST_REMOVE(rl, r_aentry);
                LIST_INSERT_HEAD(&rlset->rs_gc, rl, r_aentry);
                rl = rl->r_next;
        }
        rlset->rs_idcnt = 0;
        return 0;
}

/*
 * npf_ruleset_gc: destroy the rules in G/C list.
 */
void
npf_ruleset_gc(npf_ruleset_t *rlset)
{
        npf_rule_t *rl;

        while ((rl = LIST_FIRST(&rlset->rs_gc)) != NULL) {
                LIST_REMOVE(rl, r_aentry);
                npf_rule_free(rl);
        }
}

/*
 * npf_ruleset_export: serialise and return the static rules.
 */
int
npf_ruleset_export(npf_t *npf, const npf_ruleset_t *rlset,
    const char *key, nvlist_t *npf_nv)
{
        const unsigned nitems = rlset->rs_nitems;
        unsigned n = 0;
        int error = 0;

        KASSERT(npf_config_locked_p(npf));

        while (n < nitems) {
                const npf_rule_t *rl = rlset->rs_rules[n];
                const npf_natpolicy_t *natp = rl->r_natp;
                nvlist_t *rule;

                rule = npf_rule_export(npf, rl);
                if (!rule) {
                        error = ENOMEM;
                        break;
                }
                if (natp && (error = npf_natpolicy_export(natp, rule)) != 0) {
                        nvlist_destroy(rule);
                        break;
                }
                nvlist_append_nvlist_array(npf_nv, key, rule);
                nvlist_destroy(rule);
                n++;
        }
        return error;
}

/*
 * npf_ruleset_reload: prepare the new ruleset by scanning the active
 * ruleset and: 1) sharing the dynamic rules 2) sharing NAT policies.
 *
 * => The active (old) ruleset should be exclusively locked.
 */
void
npf_ruleset_reload(npf_t *npf, npf_ruleset_t *newset,
    npf_ruleset_t *oldset, bool load)
{
        npf_rule_t *rg, *rl;
        uint64_t nid = 0;

        KASSERT(npf_config_locked_p(npf));

        /*
         * Scan the dynamic rules and share (migrate) if needed.
         */
        LIST_FOREACH(rg, &newset->rs_dynamic, r_dentry) {
                npf_rule_t *active_rgroup;

                /* Look for a dynamic ruleset group with such name. */
                active_rgroup = npf_ruleset_lookup(oldset, rg->r_name);
                if (active_rgroup == NULL) {
                        continue;
                }

                /*
                 * ATOMICITY: Copy the head pointer of the linked-list,
                 * but do not remove the rules from the active r_subset.
                 * This is necessary because the rules are still active
                 * and therefore are accessible for inspection via the
                 * old ruleset.
                 */
                rg->r_subset = active_rgroup->r_subset;

                /*
                 * We can safely migrate to the new all-rule list and
                 * reset the parent rule, though.
                 */
                for (rl = rg->r_subset; rl; rl = rl->r_next) {
                        KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
                        LIST_REMOVE(rl, r_aentry);
                        LIST_INSERT_HEAD(&newset->rs_all, rl, r_aentry);

                        KASSERT(rl->r_parent == active_rgroup);
                        rl->r_parent = rg;
                }
        }

        /*
         * If performing the load of connections then NAT policies might
         * already have translated connections associated with them and
         * we should not share or inherit anything.
         */
        if (load)
                return;

        /*
         * Scan all rules in the new ruleset and inherit the active NAT
         * policies if they are the same.  Also, assign a unique ID for
         * each policy here.
         */
        LIST_FOREACH(rl, &newset->rs_all, r_aentry) {
                npf_natpolicy_t *np;
                npf_rule_t *actrl;

                /* Does the rule have a NAT policy associated? */
                if ((np = rl->r_natp) == NULL) {
                        continue;
                }

                /* Does it match with any policy in the active ruleset? */
                LIST_FOREACH(actrl, &oldset->rs_all, r_aentry) {
                        if (!actrl->r_natp)
                                continue;
                        if ((actrl->r_attr & NPF_RULE_KEEPNAT) != 0)
                                continue;
                        if (npf_natpolicy_cmp(actrl->r_natp, np))
                                break;
                }
                if (!actrl) {
                        /* No: just set the ID and continue. */
                        npf_nat_setid(np, ++nid);
                        continue;
                }

                /* Yes: inherit the matching NAT policy. */
                rl->r_natp = actrl->r_natp;
                npf_nat_setid(rl->r_natp, ++nid);

                /*
                 * Finally, mark the active rule to not destroy its NAT
                 * policy later as we inherited it (but the rule must be
                 * kept active for now).  Destroy the new/unused policy.
                 */
                actrl->r_attr |= NPF_RULE_KEEPNAT;
                npf_natpolicy_destroy(np);
        }

        /* Inherit the ID counter. */
        newset->rs_idcnt = oldset->rs_idcnt;
}

/*
 * npf_ruleset_findnat: find a NAT policy in the ruleset by a given ID.
 */
npf_natpolicy_t *
npf_ruleset_findnat(npf_ruleset_t *rlset, uint64_t id)
{
        npf_rule_t *rl;

        LIST_FOREACH(rl, &rlset->rs_all, r_aentry) {
                npf_natpolicy_t *np = rl->r_natp;
                if (np && npf_nat_getid(np) == id) {
                        return np;
                }
        }
        return NULL;
}

/*
 * npf_ruleset_freealg: inspect the ruleset and disassociate specified
 * ALG from all NAT entries using it.
 */
void
npf_ruleset_freealg(npf_ruleset_t *rlset, npf_alg_t *alg)
{
        npf_rule_t *rl;
        npf_natpolicy_t *np;

        LIST_FOREACH(rl, &rlset->rs_all, r_aentry) {
                if ((np = rl->r_natp) != NULL) {
                        npf_nat_freealg(np, alg);
                }
        }
}

/*
 * npf_rule_alloc: allocate a rule and initialise it.
 */
npf_rule_t *
npf_rule_alloc(npf_t *npf, const nvlist_t *rule)
{
        npf_rule_t *rl;
        const char *rname;
        const void *key, *info;
        size_t len;

        /* Allocate a rule structure and keep the information. */
        rl = kmem_zalloc(sizeof(npf_rule_t), KM_SLEEP);
        info = dnvlist_get_binary(rule, "info", &rl->r_info_len, NULL, 0);
        if (info) {
                rl->r_info = kmem_alloc(rl->r_info_len, KM_SLEEP);
                memcpy(rl->r_info, info, rl->r_info_len);
        }
        rl->r_natp = NULL;

        /* Name (optional) */
        if ((rname = dnvlist_get_string(rule, "name", NULL)) != NULL) {
                strlcpy(rl->r_name, rname, NPF_RULE_MAXNAMELEN);
        } else {
                rl->r_name[0] = '\0';
        }

        /* Attributes, priority and interface ID (optional). */
        rl->r_attr = dnvlist_get_number(rule, "attr", 0);
        rl->r_attr &= ~NPF_RULE_PRIVMASK;

        if (NPF_DYNAMIC_RULE_P(rl->r_attr)) {
                /* Priority of the dynamic rule. */
                rl->r_priority = (int)dnvlist_get_number(rule, "prio", 0);
        } else {
                /* The skip-to index.  No need to validate it. */
                rl->r_skip_to = dnvlist_get_number(rule, "skip-to", 0);
        }

        /* Interface name; register and get the npf-if-id. */
        if ((rname = dnvlist_get_string(rule, "ifname", NULL)) != NULL) {
                if ((rl->r_ifid = npf_ifmap_register(npf, rname)) == 0) {
                        kmem_free(rl, sizeof(npf_rule_t));
                        return NULL;
                }
        } else {
                rl->r_ifid = 0;
        }

        /* Key (optional). */
        if ((key = dnvlist_get_binary(rule, "key", &len, NULL, 0)) != NULL) {
                if (len > NPF_RULE_MAXKEYLEN) {
                        kmem_free(rl, sizeof(npf_rule_t));
                        return NULL;
                }
                memcpy(rl->r_key, key, len);
        }
        return rl;
}

static nvlist_t *
npf_rule_export(npf_t *npf, const npf_rule_t *rl)
{
        nvlist_t *rule = nvlist_create(0);
        unsigned skip_to = 0;
        npf_rproc_t *rp;

        nvlist_add_number(rule, "attr", rl->r_attr);
        nvlist_add_number(rule, "prio", rl->r_priority);
        if ((rl->r_skip_to & SKIPTO_ADJ_FLAG) == 0) {
                skip_to = rl->r_skip_to & SKIPTO_MASK;
        }
        nvlist_add_number(rule, "skip-to", skip_to);
        nvlist_add_number(rule, "code-type", rl->r_type);
        if (rl->r_code) {
                nvlist_add_binary(rule, "code", rl->r_code, rl->r_clen);
        }
        if (rl->r_ifid) {
                char ifname[IFNAMSIZ];
                npf_ifmap_copyname(npf, rl->r_ifid, ifname, sizeof(ifname));
                nvlist_add_string(rule, "ifname", ifname);
        }
        nvlist_add_number(rule, "id", rl->r_id);

        if (rl->r_name[0]) {
                nvlist_add_string(rule, "name", rl->r_name);
        }
        if (NPF_DYNAMIC_RULE_P(rl->r_attr)) {
                nvlist_add_binary(rule, "key", rl->r_key, NPF_RULE_MAXKEYLEN);
        }
        if (rl->r_info) {
                nvlist_add_binary(rule, "info", rl->r_info, rl->r_info_len);
        }
        if ((rp = npf_rule_getrproc(rl)) != NULL) {
                const char *rname = npf_rproc_getname(rp);
                nvlist_add_string(rule, "rproc", rname);
                npf_rproc_release(rp);
        }
        return rule;
}

/*
 * npf_rule_setcode: assign filter code to the rule.
 *
 * => The code must be validated by the caller.
 * => JIT compilation may be performed here.
 */
void
npf_rule_setcode(npf_rule_t *rl, const int type, void *code, size_t size)
{
        KASSERT(type == NPF_CODE_BPF);

        rl->r_type = type;
        rl->r_code = code;
        rl->r_clen = size;
        rl->r_jcode = npf_bpf_compile(code, size);
}

/*
 * npf_rule_setrproc: assign a rule procedure and hold a reference on it.
 */
void
npf_rule_setrproc(npf_rule_t *rl, npf_rproc_t *rp)
{
        npf_rproc_acquire(rp);
        rl->r_rproc = rp;
}

/*
 * npf_rule_free: free the specified rule.
 */
void
npf_rule_free(npf_rule_t *rl)
{
        npf_natpolicy_t *np = rl->r_natp;
        npf_rproc_t *rp = rl->r_rproc;

        if (np && (rl->r_attr & NPF_RULE_KEEPNAT) == 0) {
                /* Destroy the NAT policy. */
                npf_natpolicy_destroy(np);
        }
        if (rp) {
                /* Release rule procedure. */
                npf_rproc_release(rp);
        }
        if (rl->r_code) {
                /* Free byte-code. */
                kmem_free(rl->r_code, rl->r_clen);
        }
        if (rl->r_jcode) {
                /* Free JIT code. */
                bpf_jit_freecode(rl->r_jcode);
        }
        if (rl->r_info) {
                kmem_free(rl->r_info, rl->r_info_len);
        }
        kmem_free(rl, sizeof(npf_rule_t));
}

/*
 * npf_rule_getid: return the unique ID of a rule.
 * npf_rule_getrproc: acquire a reference and return rule procedure, if any.
 * npf_rule_getnat: get NAT policy assigned to the rule.
 */

uint64_t
npf_rule_getid(const npf_rule_t *rl)
{
        KASSERT(NPF_DYNAMIC_RULE_P(rl->r_attr));
        return rl->r_id;
}

npf_rproc_t *
npf_rule_getrproc(const npf_rule_t *rl)
{
        npf_rproc_t *rp = rl->r_rproc;

        if (rp) {
                npf_rproc_acquire(rp);
        }
        return rp;
}

npf_natpolicy_t *
npf_rule_getnat(const npf_rule_t *rl)
{
        return rl->r_natp;
}

/*
 * npf_rule_setnat: assign NAT policy to the rule and insert into the
 * NAT policy list in the ruleset.
 */
void
npf_rule_setnat(npf_rule_t *rl, npf_natpolicy_t *np)
{
        KASSERT(rl->r_natp == NULL);
        rl->r_natp = np;
}

/*
 * npf_rule_inspect: match the interface, direction and run the filter code.
 * Returns true if rule matches and false otherwise.
 */
static inline bool
npf_rule_inspect(const npf_rule_t *rl, bpf_args_t *bc_args,
    const int di_mask, const unsigned ifid)
{
        /* Match the interface. */
        if (rl->r_ifid && rl->r_ifid != ifid) {
                return false;
        }

        /* Match the direction. */
        if ((rl->r_attr & NPF_RULE_DIMASK) != NPF_RULE_DIMASK) {
                if ((rl->r_attr & di_mask) == 0)
                        return false;
        }

        /* Any code? */
        if (!rl->r_code) {
                KASSERT(rl->r_jcode == NULL);
                return true;
        }
        KASSERT(rl->r_type == NPF_CODE_BPF);
        return npf_bpf_filter(bc_args, rl->r_code, rl->r_jcode) != 0;
}

/*
 * npf_rule_reinspect: re-inspect the dynamic rule by iterating its list.
 * This is only for the dynamic rules.  Subrules cannot have nested rules.
 */
static inline npf_rule_t *
npf_rule_reinspect(const npf_rule_t *rg, bpf_args_t *bc_args,
    const int di_mask, const unsigned ifid)
{
        npf_rule_t *final_rl = NULL, *rl;

        KASSERT(NPF_DYNAMIC_GROUP_P(rg->r_attr));

        rl = atomic_load_relaxed(&rg->r_subset);
        for (; rl; rl = atomic_load_relaxed(&rl->r_next)) {
                KASSERT(!final_rl || rl->r_priority >= final_rl->r_priority);
                if (!npf_rule_inspect(rl, bc_args, di_mask, ifid)) {
                        continue;
                }
                if (rl->r_attr & NPF_RULE_FINAL) {
                        return rl;
                }
                final_rl = rl;
        }
        return final_rl;
}

/*
 * npf_ruleset_inspect: inspect the packet against the given ruleset.
 *
 * Loop through the rules in the set and run the byte-code of each rule
 * against the packet (nbuf chain).  If sub-ruleset is found, inspect it.
 */
npf_rule_t *
npf_ruleset_inspect(npf_cache_t *npc, const npf_ruleset_t *rlset,
    const int di, const int layer)
{
        nbuf_t *nbuf = npc->npc_nbuf;
        const int di_mask = (di & PFIL_IN) ? NPF_RULE_IN : NPF_RULE_OUT;
        const unsigned nitems = rlset->rs_nitems;
        const unsigned ifid = nbuf->nb_ifid;
        npf_rule_t *final_rl = NULL;
        bpf_args_t bc_args;
        unsigned n = 0;

        KASSERT(((di & PFIL_IN) != 0) ^ ((di & PFIL_OUT) != 0));

        /*
         * Prepare the external memory store and the arguments for
         * the BPF programs to be executed.  Reset mbuf before taking
         * any pointers for the BPF.
         */
        uint32_t bc_words[NPF_BPF_NWORDS];

        nbuf_reset(nbuf);
        npf_bpf_prepare(npc, &bc_args, bc_words);

        while (n < nitems) {
                npf_rule_t *rl = rlset->rs_rules[n];
                const unsigned skip_to = rl->r_skip_to & SKIPTO_MASK;
                const uint32_t attr = rl->r_attr;

                KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
                KASSERT(n < skip_to);

                /* Group is a barrier: return a matching if found any. */
                if ((attr & NPF_RULE_GROUP) != 0 && final_rl) {
                        break;
                }

                /* Main inspection of the rule. */
                if (!npf_rule_inspect(rl, &bc_args, di_mask, ifid)) {
                        n = skip_to;
                        continue;
                }

                if (NPF_DYNAMIC_GROUP_P(attr)) {
                        /*
                         * If this is a dynamic rule, re-inspect the subrules.
                         * If it has any matching rule, then it is final.
                         */
                        rl = npf_rule_reinspect(rl, &bc_args, di_mask, ifid);
                        if (rl != NULL) {
                                final_rl = rl;
                                break;
                        }
                } else if ((attr & NPF_RULE_GROUP) == 0) {
                        /*
                         * Groups themselves are not matching.
                         */
                        final_rl = rl;
                }

                /* Set the matching rule and check for "final". */
                if (attr & NPF_RULE_FINAL) {
                        break;
                }
                n++;
        }

        KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
        return final_rl;
}

/*
 * npf_rule_conclude: return decision and the flags for conclusion.
 *
 * => Returns ENETUNREACH if "block" and 0 if "pass".
 */
int
npf_rule_conclude(const npf_rule_t *rl, npf_match_info_t *mi)
{
        /* If not passing - drop the packet. */
        mi->mi_retfl = rl->r_attr;
        mi->mi_rid = rl->r_id;
        return (rl->r_attr & NPF_RULE_PASS) ? 0 : ENETUNREACH;
}


#if defined(DDB) || defined(_NPF_TESTING)

void
npf_ruleset_dump(npf_t *npf, const char *name)
{
        npf_ruleset_t *rlset = npf_config_ruleset(npf);
        npf_rule_t *rg, *rl;

        LIST_FOREACH(rg, &rlset->rs_dynamic, r_dentry) {
                printf("ruleset '%s':\n", rg->r_name);
                for (rl = rg->r_subset; rl; rl = rl->r_next) {
                        printf("\tid %"PRIu64", key: ", rl->r_id);
                        for (unsigned i = 0; i < NPF_RULE_MAXKEYLEN; i++)
                                printf("%x", rl->r_key[i]);
                        printf("\n");
                }
        }
}

#endif


























































































































  679 
  681 


  680 





  681 

  681 






  682 
  680 

  681 




  678 



  680 






















  115 









  115 

  114 
  115 

  114 



















 1291 





 1290 


 1265 

 1265 
 1263 

 1263 
 1289 



















  272 





  273 


  273 
  114 

  272 


  273 








































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
/*        $NetBSD: uvm_pgflcache.c,v 1.6 2020/10/18 18:31:31 chs Exp $        */

/*-
 * Copyright (c) 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * uvm_pgflcache.c: page freelist cache.
 *
 * This implements a tiny per-CPU cache of pages that sits between the main
 * page allocator and the freelists.  By allocating and freeing pages in
 * batch, it reduces freelist contention by an order of magnitude.
 *
 * The cache can be paused & resumed at runtime so that UVM_HOTPLUG,
 * uvm_pglistalloc() and uvm_page_redim() can have a consistent view of the
 * world.  On system with one CPU per physical package (e.g. a uniprocessor)
 * the cache is not enabled.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_pgflcache.c,v 1.6 2020/10/18 18:31:31 chs Exp $");

#include "opt_uvm.h"
#include "opt_multiprocessor.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sched.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/proc.h>
#include <sys/atomic.h>
#include <sys/cpu.h>
#include <sys/xcall.h>

#include <uvm/uvm.h>
#include <uvm/uvm_pglist.h>
#include <uvm/uvm_pgflcache.h>

/* There is no point doing any of this on a uniprocessor. */
#ifdef MULTIPROCESSOR

/*
 * MAXPGS - maximum pages per color, per bucket.
 * FILLPGS - number of pages to allocate at once, per color, per bucket.
 *
 * Why the chosen values:
 *
 * (1) In 2019, an average Intel system has 4kB pages and 8x L2 cache
 * colors.  We make the assumption that most of the time allocation activity
 * will be centered around one UVM freelist, so most of the time there will
 * be no more than 224kB worth of cached pages per-CPU.  That's tiny, but
 * enough to hugely reduce contention on the freelist locks, and give us a
 * small pool of pages which if we're very lucky may have some L1/L2 cache
 * locality, and do so without subtracting too much from the L2/L3 cache
 * benefits of having per-package free lists in the page allocator.
 *
 * (2) With the chosen values on _LP64, the data structure for each color
 * takes up a single cache line (64 bytes) giving this very low overhead
 * even in the "miss" case.
 *
 * (3) We don't want to cause too much pressure by hiding away memory that
 * could otherwise be put to good use.
 */
#define        MAXPGS                7
#define        FILLPGS                6

/* Variable size, according to # colors. */
struct pgflcache {
        struct pccolor {
                intptr_t        count;
                struct vm_page        *pages[MAXPGS];
        } color[1];
};

static kmutex_t                uvm_pgflcache_lock;
static int                uvm_pgflcache_sem;

/*
 * uvm_pgflcache_fill: fill specified freelist/color from global list
 *
 * => must be called at IPL_VM
 * => must be called with given bucket lock held
 * => must only fill from the correct bucket for this CPU
 */

void
uvm_pgflcache_fill(struct uvm_cpu *ucpu, int fl, int b, int c)
{
        struct pgflbucket *pgb;
        struct pgflcache *pc;
        struct pccolor *pcc;
        struct pgflist *head;
        struct vm_page *pg;
        int count;

        KASSERT(mutex_owned(&uvm_freelist_locks[b].lock));
        KASSERT(ucpu->pgflbucket == b);

        /* If caching is off, then bail out. */
        if (__predict_false((pc = ucpu->pgflcache[fl]) == NULL)) {
                return;
        }

        /* Fill only to the limit. */
        pcc = &pc->color[c];
        pgb = uvm.page_free[fl].pgfl_buckets[b];
        head = &pgb->pgb_colors[c];
        if (pcc->count >= FILLPGS) {
                return;
        }

        /* Pull pages from the bucket until it's empty, or we are full. */
        count = pcc->count;
        pg = LIST_FIRST(head);
        while (__predict_true(pg != NULL && count < FILLPGS)) {
                KASSERT(pg->flags & PG_FREE);
                KASSERT(uvm_page_get_bucket(pg) == b);
                pcc->pages[count++] = pg;
                pg = LIST_NEXT(pg, pageq.list);
        }

        /* Violate LIST abstraction to remove all pages at once. */
        head->lh_first = pg;
        if (__predict_true(pg != NULL)) {
                pg->pageq.list.le_prev = &head->lh_first;
        }
        pgb->pgb_nfree -= (count - pcc->count);
        CPU_COUNT(CPU_COUNT_FREEPAGES, -(count - pcc->count));
        pcc->count = count;
}

/*
 * uvm_pgflcache_spill: spill specified freelist/color to global list
 *
 * => must be called at IPL_VM
 * => mark __noinline so we don't pull it into uvm_pgflcache_free()
 */

static void __noinline
uvm_pgflcache_spill(struct uvm_cpu *ucpu, int fl, int c)
{
        struct pgflbucket *pgb;
        struct pgfreelist *pgfl;
        struct pgflcache *pc;
        struct pccolor *pcc;
        struct pgflist *head;
        kmutex_t *lock;
        int b, adj;

        pc = ucpu->pgflcache[fl];
        pcc = &pc->color[c];
        pgfl = &uvm.page_free[fl];
        b = ucpu->pgflbucket;
        pgb = pgfl->pgfl_buckets[b];
        head = &pgb->pgb_colors[c];
        lock = &uvm_freelist_locks[b].lock;

        mutex_spin_enter(lock);
        for (adj = pcc->count; pcc->count != 0;) {
                pcc->count--;
                KASSERT(pcc->pages[pcc->count] != NULL);
                KASSERT(pcc->pages[pcc->count]->flags & PG_FREE);
                LIST_INSERT_HEAD(head, pcc->pages[pcc->count], pageq.list);
        }
        pgb->pgb_nfree += adj;
        CPU_COUNT(CPU_COUNT_FREEPAGES, adj);
        mutex_spin_exit(lock);
}

/*
 * uvm_pgflcache_alloc: try to allocate a cached page.
 *
 * => must be called at IPL_VM
 * => allocate only from the given freelist and given page color
 */

struct vm_page *
uvm_pgflcache_alloc(struct uvm_cpu *ucpu, int fl, int c)
{
        struct pgflcache *pc;
        struct pccolor *pcc;
        struct vm_page *pg;

        /* If caching is off, then bail out. */
        if (__predict_false((pc = ucpu->pgflcache[fl]) == NULL)) {
                return NULL;
        }

        /* Very simple: if we have a page then return it. */
        pcc = &pc->color[c];
        if (__predict_false(pcc->count == 0)) {
                return NULL;
        }
        pg = pcc->pages[--(pcc->count)];
        KASSERT(pg != NULL);
        KASSERT(pg->flags == PG_FREE);
        KASSERT(uvm_page_get_freelist(pg) == fl);
        KASSERT(uvm_page_get_bucket(pg) == ucpu->pgflbucket);
        pg->flags = PG_BUSY | PG_CLEAN | PG_FAKE;
        return pg;
}

/*
 * uvm_pgflcache_free: cache a page, if possible.
 *
 * => must be called at IPL_VM
 * => must only send pages for the correct bucket for this CPU
 */

bool
uvm_pgflcache_free(struct uvm_cpu *ucpu, struct vm_page *pg)
{
        struct pgflcache *pc;
        struct pccolor *pcc;
        int fl, c;

        KASSERT(uvm_page_get_bucket(pg) == ucpu->pgflbucket);

        /* If caching is off, then bail out. */
         fl = uvm_page_get_freelist(pg);
        if (__predict_false((pc = ucpu->pgflcache[fl]) == NULL)) {
                return false;
        }

        /* If the array is full spill it first, then add page to array. */
        c = VM_PGCOLOR(pg);
        pcc = &pc->color[c];
        KASSERT((pg->flags & PG_FREE) == 0);
        if (__predict_false(pcc->count == MAXPGS)) {
                uvm_pgflcache_spill(ucpu, fl, c);
        }
        pg->flags = PG_FREE;
        pcc->pages[pcc->count] = pg;
        pcc->count++;
        return true;
}

/*
 * uvm_pgflcache_init: allocate and initialize per-CPU data structures for
 * the free page cache.  Don't set anything in motion - that's taken care
 * of by uvm_pgflcache_resume().
 */

static void
uvm_pgflcache_init_cpu(struct cpu_info *ci)
{
        struct uvm_cpu *ucpu;
        size_t sz;

        ucpu = ci->ci_data.cpu_uvm;
        KASSERT(ucpu->pgflcachemem == NULL);
        KASSERT(ucpu->pgflcache[0] == NULL);

        sz = offsetof(struct pgflcache, color[uvmexp.ncolors]);
        ucpu->pgflcachememsz =
            (roundup2(sz * VM_NFREELIST, coherency_unit) + coherency_unit - 1);
        ucpu->pgflcachemem = kmem_zalloc(ucpu->pgflcachememsz, KM_SLEEP);
}

/*
 * uvm_pgflcache_fini_cpu: dump all cached pages back to global free list
 * and shut down caching on the CPU.  Called on each CPU in the system via
 * xcall.
 */

static void
uvm_pgflcache_fini_cpu(void *arg1 __unused, void *arg2 __unused)
{
        struct uvm_cpu *ucpu;
        int fl, color, s;

        ucpu = curcpu()->ci_data.cpu_uvm;
        for (fl = 0; fl < VM_NFREELIST; fl++) {
                s = splvm();
                for (color = 0; color < uvmexp.ncolors; color++) {
                        uvm_pgflcache_spill(ucpu, fl, color);
                }
                ucpu->pgflcache[fl] = NULL;
                splx(s);
        }
}

/*
 * uvm_pgflcache_pause: pause operation of the caches
 */

void
uvm_pgflcache_pause(void)
{
        uint64_t where;

        /* First one in starts draining.  Everyone else waits. */
        mutex_enter(&uvm_pgflcache_lock);
        if (uvm_pgflcache_sem++ == 0) {
                where = xc_broadcast(XC_HIGHPRI, uvm_pgflcache_fini_cpu,
                    (void *)1, NULL);
                xc_wait(where);
        }
        mutex_exit(&uvm_pgflcache_lock);
}

/*
 * uvm_pgflcache_resume: resume operation of the caches
 */

void
uvm_pgflcache_resume(void)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;
        struct uvm_cpu *ucpu;
        uintptr_t addr;
        size_t sz;
        int fl;

        /* Last guy out takes care of business. */
        mutex_enter(&uvm_pgflcache_lock);
        KASSERT(uvm_pgflcache_sem > 0);
        if (uvm_pgflcache_sem-- > 1) {
                mutex_exit(&uvm_pgflcache_lock);
                return;
        }

        /*
         * Make sure dependant data structure updates are remotely visible.
         * Essentially this functions as a global memory barrier.
         */
        xc_barrier(XC_HIGHPRI);

        /*
         * Then set all of the pointers in place on each CPU.  As soon as
         * each pointer is set, caching is operational in that dimension.
         */
        sz = offsetof(struct pgflcache, color[uvmexp.ncolors]);
        for (CPU_INFO_FOREACH(cii, ci)) {
                ucpu = ci->ci_data.cpu_uvm;
                addr = roundup2((uintptr_t)ucpu->pgflcachemem, coherency_unit);
                for (fl = 0; fl < VM_NFREELIST; fl++) {
                        ucpu->pgflcache[fl] = (struct pgflcache *)addr;
                        addr += sz;
                }
        }
        mutex_exit(&uvm_pgflcache_lock);
}

/*
 * uvm_pgflcache_start: start operation of the cache.
 *
 * => called once only, when init(8) is about to be started
 */

void
uvm_pgflcache_start(void)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;

        KASSERT(uvm_pgflcache_sem > 0);

        /*
         * There's not much point doing this if every CPU has its own
         * bucket (and that includes the uniprocessor case).
         */
        if (ncpu == uvm.bucketcount) {
                return;
        }

        /* Create data structures for each CPU. */
        for (CPU_INFO_FOREACH(cii, ci)) {
                uvm_pgflcache_init_cpu(ci);
        }

        /* Kick it into action. */
        uvm_pgflcache_resume();
}

/*
 * uvm_pgflcache_init: set up data structures for the free page cache.
 */

void
uvm_pgflcache_init(void)
{

        uvm_pgflcache_sem = 1;
        mutex_init(&uvm_pgflcache_lock, MUTEX_DEFAULT, IPL_NONE);
}

#else        /* MULTIPROCESSOR */

struct vm_page *
uvm_pgflcache_alloc(struct uvm_cpu *ucpu, int fl, int c)
{

        return NULL;
}

bool
uvm_pgflcache_free(struct uvm_cpu *ucpu, struct vm_page *pg)
{

        return false;
}

void
uvm_pgflcache_fill(struct uvm_cpu *ucpu, int fl, int b, int c)
{

}

void
uvm_pgflcache_pause(void)
{

}

void
uvm_pgflcache_resume(void)
{

}

void
uvm_pgflcache_start(void)
{

}

void
uvm_pgflcache_init(void)
{

}

#endif        /* MULTIPROCESSOR */

























































































































    4 


















    4 
    4 
    4 





    3 

    3 
    3 
    2 







    2 


    2 








    2 
    2 


    1 


    1 

    1 









    1 
    1 

    1 




























































    2 















    2 

















    2 











































    2 


    2 
    2 





    2 
    2 
    2 


    2 






    2 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
/*        $NetBSD: exec_script.c,v 1.83 2021/05/03 10:25:14 fcambus Exp $        */

/*
 * Copyright (c) 1993, 1994, 1996 Christopher G. Demetriou
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Christopher G. Demetriou.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: exec_script.c,v 1.83 2021/05/03 10:25:14 fcambus Exp $");

#ifdef _KERNEL_OPT
#include "opt_script.h"
#endif

#if defined(SETUIDSCRIPTS) && !defined(FDSCRIPTS)
#define FDSCRIPTS                /* Need this for safe set-id scripts. */
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kmem.h>
#include <sys/vnode.h>
#include <sys/namei.h>
#include <sys/file.h>
#ifdef SETUIDSCRIPTS
#include <sys/stat.h>
#endif
#include <sys/filedesc.h>
#include <sys/exec.h>
#include <sys/resourcevar.h>
#include <sys/module.h>
#include <sys/exec_script.h>
#include <sys/exec_elf.h>

MODULE(MODULE_CLASS_EXEC, exec_script, NULL);

static struct execsw exec_script_execsw = {
        .es_hdrsz = SCRIPT_HDR_SIZE,
        .es_makecmds = exec_script_makecmds,
        .u = {
                .elf_probe_func = NULL,
        },
        .es_emul = NULL,
        .es_prio = EXECSW_PRIO_ANY,
        .es_arglen = 0,
        .es_copyargs = NULL,
        .es_setregs = NULL,
        .es_coredump = NULL,
        .es_setup_stack = exec_setup_stack,
};

static int
exec_script_modcmd(modcmd_t cmd, void *arg)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                return exec_add(&exec_script_execsw, 1);

        case MODULE_CMD_FINI:
                return exec_remove(&exec_script_execsw, 1);

        case MODULE_CMD_AUTOUNLOAD:
                /*
                 * We don't want to be autounloaded because our use is
                 * transient: no executables with p_execsw equal to
                 * exec_script_execsw will exist, so FINI will never
                 * return EBUSY.  However, the system will run scripts
                 * often.  Return EBUSY here to prevent this module from
                 * ping-ponging in and out of the kernel.
                 */
                return EBUSY;

        default:
                return ENOTTY;
        }
}

/*
 * exec_script_makecmds(): Check if it's an executable shell script.
 *
 * Given a proc pointer and an exec package pointer, see if the referent
 * of the epp is in shell script.  If it is, then set thing up so that
 * the script can be run.  This involves preparing the address space
 * and arguments for the shell which will run the script.
 *
 * This function is ultimately responsible for creating a set of vmcmds
 * which can be used to build the process's vm space and inserting them
 * into the exec package.
 */
int
exec_script_makecmds(struct lwp *l, struct exec_package *epp)
{
        int error, hdrlinelen, shellnamelen, shellarglen;
        char *hdrstr = epp->ep_hdr;
        char *cp, *shellname, *shellarg;
        size_t shellargp_len;
        struct exec_fakearg *shellargp;
        struct exec_fakearg *tmpsap;
        struct pathbuf *shell_pathbuf;
        struct vnode *scriptvp;
#ifdef SETUIDSCRIPTS
        /* Gcc needs those initialized for spurious uninitialized warning */
        uid_t script_uid = (uid_t) -1;
        gid_t script_gid = NOGROUP;
        u_short script_sbits;
#endif

        /*
         * if the magic isn't that of a shell script, or we've already
         * done shell script processing for this exec, punt on it.
         */
        if ((epp->ep_flags & EXEC_INDIR) != 0 ||
            epp->ep_hdrvalid < EXEC_SCRIPT_MAGICLEN ||
            strncmp(hdrstr, EXEC_SCRIPT_MAGIC, EXEC_SCRIPT_MAGICLEN))
                return ENOEXEC;

        /*
         * Check that the shell spec is terminated by a newline, and that
         * it isn't too large.
         */
        hdrlinelen = uimin(epp->ep_hdrvalid, SCRIPT_HDR_SIZE);
        for (cp = hdrstr + EXEC_SCRIPT_MAGICLEN; cp < hdrstr + hdrlinelen;
            cp++) {
                if (*cp == '\n') {
                        *cp = '\0';
                        break;
                }
        }
        if (cp >= hdrstr + hdrlinelen)
                return ENOEXEC;

        /* strip spaces before the shell name */
        for (cp = hdrstr + EXEC_SCRIPT_MAGICLEN; *cp == ' ' || *cp == '\t';
            cp++)
                ;
        if (*cp == '\0')
                return ENOEXEC;

        shellarg = NULL;
        shellarglen = 0;

        /* collect the shell name; remember its length for later */
        shellname = cp;
        shellnamelen = 0;
        for ( /* cp = cp */ ; *cp != '\0' && *cp != ' ' && *cp != '\t'; cp++)
                shellnamelen++;
        if (*cp == '\0')
                goto check_shell;
        *cp++ = '\0';

        /* skip spaces before any argument */
        for ( /* cp = cp */ ; *cp == ' ' || *cp == '\t'; cp++)
                ;
        if (*cp == '\0')
                goto check_shell;

        /*
         * collect the shell argument.  everything after the shell name
         * is passed as ONE argument; that's the correct (historical)
         * behaviour.
         */
        shellarg = cp;
        for ( /* cp = cp */ ; *cp != '\0'; cp++)
                shellarglen++;
        *cp++ = '\0';

check_shell:
#ifdef SETUIDSCRIPTS
        /*
         * MNT_NOSUID has already taken care of by check_exec,
         * so we don't need to worry about it now or later.  We
         * will need to check PSL_TRACED later, however.
         */
        script_sbits = epp->ep_vap->va_mode & (S_ISUID | S_ISGID);
        if (script_sbits != 0) {
                script_uid = epp->ep_vap->va_uid;
                script_gid = epp->ep_vap->va_gid;
        }
#endif
#ifdef FDSCRIPTS
        /*
         * if the script isn't readable, or it's set-id, then we've
         * gotta supply a "/dev/fd/..." for the shell to read.
         * Note that stupid shells (csh) do the wrong thing, and
         * close all open fd's when they start.  That kills this
         * method of implementing "safe" set-id and x-only scripts.
         */
        vn_lock(epp->ep_vp, LK_SHARED | LK_RETRY);
        error = VOP_ACCESS(epp->ep_vp, VREAD, l->l_cred);
        VOP_UNLOCK(epp->ep_vp);
        if (error == EACCES
#ifdef SETUIDSCRIPTS
            || script_sbits
#endif
            ) {
                struct file *fp;

                KASSERT(!(epp->ep_flags & EXEC_HASFD));

                if ((error = fd_allocfile(&fp, &epp->ep_fd)) != 0) {
                        scriptvp = NULL;
                        shellargp = NULL;
                        goto fail;
                }
                epp->ep_flags |= EXEC_HASFD;
                fp->f_type = DTYPE_VNODE;
                fp->f_ops = &vnops;
                fp->f_vnode = epp->ep_vp;
                fp->f_flag = FREAD;
                fd_affix(curproc, fp, epp->ep_fd);
        }
#endif

        /* set up the fake args list */
        shellargp_len = 4 * sizeof(*shellargp);
        shellargp = kmem_alloc(shellargp_len, KM_SLEEP);
        tmpsap = shellargp;
        tmpsap->fa_len = shellnamelen + 1;
        tmpsap->fa_arg = kmem_alloc(tmpsap->fa_len, KM_SLEEP);
        strlcpy(tmpsap->fa_arg, shellname, tmpsap->fa_len);
        tmpsap++;
        if (shellarg != NULL) {
                tmpsap->fa_len = shellarglen + 1;
                tmpsap->fa_arg = kmem_alloc(tmpsap->fa_len, KM_SLEEP);
                strlcpy(tmpsap->fa_arg, shellarg, tmpsap->fa_len);
                tmpsap++;
        }
        tmpsap->fa_len = MAXPATHLEN;
        tmpsap->fa_arg = kmem_alloc(tmpsap->fa_len, KM_SLEEP);
#ifdef FDSCRIPTS
        if ((epp->ep_flags & EXEC_HASFD) == 0) {
#endif
                /* normally can't fail, but check for it if diagnostic */
                error = copystr(epp->ep_kname, tmpsap->fa_arg, MAXPATHLEN,
                    NULL);
                KASSERT(error == 0);
                tmpsap++;
#ifdef FDSCRIPTS
        } else {
                snprintf(tmpsap->fa_arg, MAXPATHLEN, "/dev/fd/%d", epp->ep_fd);
                tmpsap++;
        }
#endif
        tmpsap->fa_arg = NULL;

        /* Save the old vnode so we can clean it up later. */
        scriptvp = epp->ep_vp;
        epp->ep_vp = NULL;

        /* Note that we're trying recursively. */
        epp->ep_flags |= EXEC_INDIR;

        /*
         * mark the header we have as invalid; check_exec will read
         * the header from the new executable
         */
        epp->ep_hdrvalid = 0;

        /* try loading the interpreter */
        if ((error = exec_makepathbuf(l, shellname, UIO_SYSSPACE,
            &shell_pathbuf, NULL)) == 0) {
                error = check_exec(l, epp, shell_pathbuf, NULL);
                pathbuf_destroy(shell_pathbuf);
        }

        /* note that we've clobbered the header */
        epp->ep_flags |= EXEC_DESTR;

        if (error == 0) {
                /*
                 * It succeeded.  Unlock the script and
                 * close it if we aren't using it any more.
                 * Also, set things up so that the fake args
                 * list will be used.
                 */
                if ((epp->ep_flags & EXEC_HASFD) == 0) {
                        vn_lock(scriptvp, LK_EXCLUSIVE | LK_RETRY);
                        VOP_CLOSE(scriptvp, FREAD, l->l_cred);
                        vput(scriptvp);
                }

                epp->ep_flags |= (EXEC_HASARGL | EXEC_SKIPARG);
                epp->ep_fa = shellargp;
                epp->ep_fa_len = shellargp_len;
#ifdef SETUIDSCRIPTS
                /*
                 * set thing up so that set-id scripts will be
                 * handled appropriately.  PSL_TRACED will be
                 * checked later when the shell is actually
                 * exec'd.
                 */
                epp->ep_vap->va_mode |= script_sbits;
                if (script_sbits & S_ISUID)
                        epp->ep_vap->va_uid = script_uid;
                if (script_sbits & S_ISGID)
                        epp->ep_vap->va_gid = script_gid;
#endif
                return (0);
        }

#ifdef FDSCRIPTS
fail:
#endif

        /* kill the opened file descriptor, else close the file */
        if (epp->ep_flags & EXEC_HASFD) {
                epp->ep_flags &= ~EXEC_HASFD;
                fd_close(epp->ep_fd);
        } else if (scriptvp) {
                vn_lock(scriptvp, LK_EXCLUSIVE | LK_RETRY);
                VOP_CLOSE(scriptvp, FREAD, l->l_cred);
                vput(scriptvp);
        }

        /* free the fake arg list, because we're not returning it */
        if ((tmpsap = shellargp) != NULL) {
                while (tmpsap->fa_arg != NULL) {
                        kmem_free(tmpsap->fa_arg, tmpsap->fa_len);
                        tmpsap++;
                }
                kmem_free(shellargp, shellargp_len);
        }

        /*
         * free any vmspace-creation commands,
         * and release their references
         */
        kill_vmcmds(&epp->ep_vmcmds);

        return error;
}























































































































































































































































































































































































































































































































































































    3 
    3 


































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
/*        $NetBSD: if_axen.c,v 1.94 2022/08/20 14:08:59 riastradh Exp $        */
/*        $OpenBSD: if_axen.c,v 1.3 2013/10/21 10:10:22 yuo Exp $        */

/*
 * Copyright (c) 2013 Yojiro UO <yuo@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*
 * ASIX Electronics AX88178a USB 2.0 ethernet and AX88179 USB 3.0 Ethernet
 * driver.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_axen.c,v 1.94 2022/08/20 14:08:59 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>

#include <netinet/in.h>                /* XXX for netinet/ip.h */
#include <netinet/ip.h>                /* XXX for IP_MAXPACKET */

#include <dev/usb/usbnet.h>

#include <dev/usb/if_axenreg.h>

#ifdef AXEN_DEBUG
#define DPRINTF(x)        do { if (axendebug) printf x; } while (/*CONSTCOND*/0)
#define DPRINTFN(n, x)        do { if (axendebug >= (n)) printf x; } while (/*CONSTCOND*/0)
int        axendebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n, x)
#endif

struct axen_type {
        struct usb_devno        axen_devno;
        uint16_t                axen_flags;
#define AX178A        0x0001                /* AX88178a */
#define AX179        0x0002                /* AX88179 */
};

/*
 * Various supported device vendors/products.
 */
static const struct axen_type axen_devs[] = {
#if 0 /* not tested */
        { { USB_VENDOR_ASIX, USB_PRODUCT_ASIX_AX88178A}, AX178A },
#endif
        { { USB_VENDOR_ASIX, USB_PRODUCT_ASIX_AX88179}, AX179 },
        { { USB_VENDOR_DLINK, USB_PRODUCT_DLINK_DUB1312}, AX179 }
};

#define axen_lookup(v, p) ((const struct axen_type *)usb_lookup(axen_devs, v, p))

static int        axen_match(device_t, cfdata_t, void *);
static void        axen_attach(device_t, device_t, void *);

CFATTACH_DECL_NEW(axen, sizeof(struct usbnet),
        axen_match, axen_attach, usbnet_detach, usbnet_activate);

static int        axen_cmd(struct usbnet *, int, int, int, void *);
static void        axen_reset(struct usbnet *);
static int        axen_get_eaddr(struct usbnet *, void *);
static void        axen_ax88179_init(struct usbnet *);

static void        axen_uno_stop(struct ifnet *, int);
static int        axen_uno_ioctl(struct ifnet *, u_long, void *);
static void        axen_uno_mcast(struct ifnet *);
static int        axen_uno_mii_read_reg(struct usbnet *, int, int, uint16_t *);
static int        axen_uno_mii_write_reg(struct usbnet *, int, int, uint16_t);
static void        axen_uno_mii_statchg(struct ifnet *);
static void        axen_uno_rx_loop(struct usbnet *, struct usbnet_chain *,
                                 uint32_t);
static unsigned        axen_uno_tx_prepare(struct usbnet *, struct mbuf *,
                                    struct usbnet_chain *);
static int        axen_uno_init(struct ifnet *);

static const struct usbnet_ops axen_ops = {
        .uno_stop = axen_uno_stop,
        .uno_ioctl = axen_uno_ioctl,
        .uno_mcast = axen_uno_mcast,
        .uno_read_reg = axen_uno_mii_read_reg,
        .uno_write_reg = axen_uno_mii_write_reg,
        .uno_statchg = axen_uno_mii_statchg,
        .uno_tx_prepare = axen_uno_tx_prepare,
        .uno_rx_loop = axen_uno_rx_loop,
        .uno_init = axen_uno_init,
};

static int
axen_cmd(struct usbnet *un, int cmd, int index, int val, void *buf)
{
        usb_device_request_t req;
        usbd_status err;

        if (usbnet_isdying(un))
                return 0;

        if (AXEN_CMD_DIR(cmd))
                req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        else
                req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = AXEN_CMD_CMD(cmd);
        USETW(req.wValue, val);
        USETW(req.wIndex, index);
        USETW(req.wLength, AXEN_CMD_LEN(cmd));

        err = usbd_do_request(un->un_udev, &req, buf);
        DPRINTFN(5, ("axen_cmd: cmd 0x%04x val 0x%04x len %d\n",
            cmd, val, AXEN_CMD_LEN(cmd)));

        if (err) {
                DPRINTF(("%s: cmd: %d, error: %d\n", __func__, cmd, err));
                return -1;
        }

        return 0;
}

static int
axen_uno_mii_read_reg(struct usbnet *un, int phy, int reg, uint16_t *val)
{
        uint16_t data;

        if (un->un_phyno != phy) {
                *val = 0;
                return EINVAL;
        }

        usbd_status err = axen_cmd(un, AXEN_CMD_MII_READ_REG, reg, phy, &data);
        if (err) {
                *val = 0;
                return EIO;
        }

        *val = le16toh(data);
        if (reg == MII_BMSR)
                *val &= ~BMSR_EXTCAP;

        return 0;
}

static int
axen_uno_mii_write_reg(struct usbnet *un, int phy, int reg, uint16_t val)
{
        uint16_t uval = htole16(val);

        if (un->un_phyno != phy)
                return EINVAL;

        usbd_status err = axen_cmd(un, AXEN_CMD_MII_WRITE_REG, reg, phy, &uval);
        if (err)
                return EIO;

        return 0;
}

static void
axen_uno_mii_statchg(struct ifnet *ifp)
{
        struct usbnet * const un = ifp->if_softc;
        struct mii_data * const mii = usbnet_mii(un);
        int err;
        uint16_t val;
        uint16_t wval;

        if (usbnet_isdying(un))
                return;

        if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
            (IFM_ACTIVE | IFM_AVALID)) {
                switch (IFM_SUBTYPE(mii->mii_media_active)) {
                case IFM_10_T:
                case IFM_100_TX:
                        usbnet_set_link(un, true);
                        break;
                case IFM_1000_T:
                        usbnet_set_link(un, true);
                        break;
                default:
                        break;
                }
        }

        /* Lost link, do nothing. */
        if (!usbnet_havelink(un))
                return;

        val = 0;
        if ((mii->mii_media_active & IFM_FDX) != 0)
                val |= AXEN_MEDIUM_FDX;

        val |= AXEN_MEDIUM_RXFLOW_CTRL_EN | AXEN_MEDIUM_TXFLOW_CTRL_EN |
            AXEN_MEDIUM_RECV_EN;
        switch (IFM_SUBTYPE(mii->mii_media_active)) {
        case IFM_1000_T:
                val |= AXEN_MEDIUM_GIGA | AXEN_MEDIUM_EN_125MHZ;
                break;
        case IFM_100_TX:
                val |= AXEN_MEDIUM_PS;
                break;
        case IFM_10_T:
                /* doesn't need to be handled */
                break;
        }

        DPRINTF(("%s: val=%#x\n", __func__, val));
        wval = htole16(val);
        err = axen_cmd(un, AXEN_CMD_MAC_WRITE2, 2, AXEN_MEDIUM_STATUS, &wval);
        if (err)
                aprint_error_dev(un->un_dev, "media change failed\n");
}

static void
axen_uno_mcast(struct ifnet *ifp)
{
        struct usbnet * const un = ifp->if_softc;
        struct ethercom *ec = usbnet_ec(un);
        struct ether_multi *enm;
        struct ether_multistep step;
        uint32_t h = 0;
        uint16_t rxmode;
        uint8_t hashtbl[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
        uint16_t wval;

        if (usbnet_isdying(un))
                return;

        rxmode = 0;

        /* Enable receiver, set RX mode */
        axen_cmd(un, AXEN_CMD_MAC_READ2, 2, AXEN_MAC_RXCTL, &wval);
        rxmode = le16toh(wval);
        rxmode &= ~(AXEN_RXCTL_ACPT_ALL_MCAST | AXEN_RXCTL_PROMISC |
            AXEN_RXCTL_ACPT_MCAST);

        if (usbnet_ispromisc(un)) {
                DPRINTF(("%s: promisc\n", device_xname(un->un_dev)));
                rxmode |= AXEN_RXCTL_PROMISC;
allmulti:
                ETHER_LOCK(ec);
                ec->ec_flags |= ETHER_F_ALLMULTI;
                ETHER_UNLOCK(ec);
                rxmode |= AXEN_RXCTL_ACPT_ALL_MCAST
                /* | AXEN_RXCTL_ACPT_PHY_MCAST */;
        } else {
                /* now program new ones */
                DPRINTF(("%s: initializing hash table\n",
                    device_xname(un->un_dev)));
                ETHER_LOCK(ec);
                ec->ec_flags &= ~ETHER_F_ALLMULTI;

                ETHER_FIRST_MULTI(step, ec, enm);
                while (enm != NULL) {
                        if (memcmp(enm->enm_addrlo, enm->enm_addrhi,
                            ETHER_ADDR_LEN)) {
                                DPRINTF(("%s: allmulti\n",
                                    device_xname(un->un_dev)));
                                memset(hashtbl, 0, sizeof(hashtbl));
                                ETHER_UNLOCK(ec);
                                goto allmulti;
                        }
                        h = ether_crc32_be(enm->enm_addrlo,
                            ETHER_ADDR_LEN) >> 26;
                        hashtbl[h / 8] |= 1 << (h % 8);
                        DPRINTF(("%s: %s added\n",
                            device_xname(un->un_dev),
                            ether_sprintf(enm->enm_addrlo)));
                        ETHER_NEXT_MULTI(step, enm);
                }
                ETHER_UNLOCK(ec);
                rxmode |= AXEN_RXCTL_ACPT_MCAST;
        }

        axen_cmd(un, AXEN_CMD_MAC_WRITE_FILTER, 8, AXEN_FILTER_MULTI, hashtbl);
        wval = htole16(rxmode);
        axen_cmd(un, AXEN_CMD_MAC_WRITE2, 2, AXEN_MAC_RXCTL, &wval);
}

static void
axen_reset(struct usbnet *un)
{
        if (usbnet_isdying(un))
                return;
        /* XXX What to reset? */

        /* Wait a little while for the chip to get its brains in order. */
        DELAY(1000);
}

static int
axen_get_eaddr(struct usbnet *un, void *addr)
{
#if 1
        return axen_cmd(un, AXEN_CMD_MAC_READ_ETHER, 6, AXEN_CMD_MAC_NODE_ID,
            addr);
#else
        int i, retry;
        uint8_t eeprom[20];
        uint16_t csum;
        uint16_t buf;

        for (i = 0; i < 6; i++) {
                /* set eeprom address */
                buf = htole16(i);
                axen_cmd(un, AXEN_CMD_MAC_WRITE, 1, AXEN_MAC_EEPROM_ADDR, &buf);

                /* set eeprom command */
                buf = htole16(AXEN_EEPROM_READ);
                axen_cmd(un, AXEN_CMD_MAC_WRITE, 1, AXEN_MAC_EEPROM_CMD, &buf);

                /* check the value is ready */
                retry = 3;
                do {
                        buf = htole16(AXEN_EEPROM_READ);
                        usbd_delay_ms(un->un_udev, 10);
                        axen_cmd(un, AXEN_CMD_MAC_READ, 1, AXEN_MAC_EEPROM_CMD,
                            &buf);
                        retry--;
                        if (retry < 0)
                                return EINVAL;
                } while ((le16toh(buf) & 0xff) & AXEN_EEPROM_BUSY);

                /* read data */
                axen_cmd(un, AXEN_CMD_MAC_READ2, 2, AXEN_EEPROM_READ,
                    &eeprom[i * 2]);

                /* sanity check */
                if ((i == 0) && (eeprom[0] == 0xff))
                        return EINVAL;
        }

        /* check checksum */
        csum = eeprom[6] + eeprom[7] + eeprom[8] + eeprom[9];
        csum = (csum >> 8) + (csum & 0xff) + eeprom[10];
        if (csum != 0xff) {
                printf("eeprom checksum mismatch(0x%02x)\n", csum);
                return EINVAL;
        }

        memcpy(addr, eeprom, ETHER_ADDR_LEN);
        return 0;
#endif
}

static void
axen_ax88179_init(struct usbnet *un)
{
        struct axen_qctrl qctrl;
        uint16_t ctl, temp;
        uint16_t wval;
        uint8_t val;

        /* XXX: ? */
        axen_cmd(un, AXEN_CMD_MAC_READ, 1, AXEN_UNK_05, &val);
        DPRINTFN(5, ("AXEN_CMD_MAC_READ(0x05): 0x%02x\n", val));

        /* check AX88179 version, UA1 / UA2 */
        axen_cmd(un, AXEN_CMD_MAC_READ, 1, AXEN_GENERAL_STATUS, &val);
        /* UA1 */
        if (!(val & AXEN_GENERAL_STATUS_MASK)) {
                DPRINTF(("AX88179 ver. UA1\n"));
        } else {
                DPRINTF(("AX88179 ver. UA2\n"));
        }

        /* power up ethernet PHY */
        wval = htole16(0);
        axen_cmd(un, AXEN_CMD_MAC_WRITE2, 2, AXEN_PHYPWR_RSTCTL, &wval);

        wval = htole16(AXEN_PHYPWR_RSTCTL_IPRL);
        axen_cmd(un, AXEN_CMD_MAC_WRITE2, 2, AXEN_PHYPWR_RSTCTL, &wval);
        usbd_delay_ms(un->un_udev, 200);

        /* set clock mode */
        val = AXEN_PHYCLK_ACS | AXEN_PHYCLK_BCS;
        axen_cmd(un, AXEN_CMD_MAC_WRITE, 1, AXEN_PHYCLK, &val);
        usbd_delay_ms(un->un_udev, 100);

        /* set monitor mode (disable) */
        val = AXEN_MONITOR_NONE;
        axen_cmd(un, AXEN_CMD_MAC_WRITE, 1, AXEN_MONITOR_MODE, &val);

        /* enable auto detach */
        axen_cmd(un, AXEN_CMD_EEPROM_READ, 2, AXEN_EEPROM_STAT, &wval);
        temp = le16toh(wval);
        DPRINTFN(2,("EEPROM0x43 = 0x%04x\n", temp));
        if (!(temp == 0xffff) && !(temp & 0x0100)) {
                /* Enable auto detach bit */
                val = 0;
                axen_cmd(un, AXEN_CMD_MAC_WRITE, 1, AXEN_PHYCLK, &val);
                val = AXEN_PHYCLK_ULR;
                axen_cmd(un, AXEN_CMD_MAC_WRITE, 1, AXEN_PHYCLK, &val);
                usbd_delay_ms(un->un_udev, 100);

                axen_cmd(un, AXEN_CMD_MAC_READ2, 2, AXEN_PHYPWR_RSTCTL, &wval);
                ctl = le16toh(wval);
                ctl |= AXEN_PHYPWR_RSTCTL_AUTODETACH;
                wval = htole16(ctl);
                axen_cmd(un, AXEN_CMD_MAC_WRITE2, 2, AXEN_PHYPWR_RSTCTL, &wval);
                usbd_delay_ms(un->un_udev, 200);
                aprint_error_dev(un->un_dev, "enable auto detach (0x%04x)\n",
                    ctl);
        }

        /* bulkin queue setting */
        axen_cmd(un, AXEN_CMD_MAC_READ, 1, AXEN_USB_UPLINK, &val);
        switch (val) {
        case AXEN_USB_FS:
                DPRINTF(("uplink: USB1.1\n"));
                qctrl.ctrl         = 0x07;
                qctrl.timer_low         = 0xcc;
                qctrl.timer_high = 0x4c;
                qctrl.bufsize         = AXEN_BUFSZ_LS - 1;
                qctrl.ifg         = 0x08;
                break;
        case AXEN_USB_HS:
                DPRINTF(("uplink: USB2.0\n"));
                qctrl.ctrl         = 0x07;
                qctrl.timer_low         = 0x02;
                qctrl.timer_high = 0xa0;
                qctrl.bufsize         = AXEN_BUFSZ_HS - 1;
                qctrl.ifg         = 0xff;
                break;
        case AXEN_USB_SS:
                DPRINTF(("uplink: USB3.0\n"));
                qctrl.ctrl         = 0x07;
                qctrl.timer_low         = 0x4f;
                qctrl.timer_high = 0x00;
                qctrl.bufsize         = AXEN_BUFSZ_SS - 1;
                qctrl.ifg         = 0xff;
                break;
        default:
                aprint_error_dev(un->un_dev, "unknown uplink bus:0x%02x\n",
                    val);
                return;
        }
        axen_cmd(un, AXEN_CMD_MAC_SET_RXSR, 5, AXEN_RX_BULKIN_QCTRL, &qctrl);

        /*
         * set buffer high/low watermark to pause/resume.
         * write 2byte will set high/log simultaneous with AXEN_PAUSE_HIGH.
         * XXX: what is the best value? OSX driver uses 0x3c-0x4c as LOW-HIGH
         * watermark parameters.
         */
        val = 0x34;
        axen_cmd(un, AXEN_CMD_MAC_WRITE, 1, AXEN_PAUSE_LOW_WATERMARK, &val);
        val = 0x52;
        axen_cmd(un, AXEN_CMD_MAC_WRITE, 1, AXEN_PAUSE_HIGH_WATERMARK, &val);

        /* Set RX/TX configuration. */
        /* Set RX control register */
        ctl = AXEN_RXCTL_IPE | AXEN_RXCTL_DROPCRCERR | AXEN_RXCTL_AUTOB;
        wval = htole16(ctl);
        axen_cmd(un, AXEN_CMD_MAC_WRITE2, 2, AXEN_MAC_RXCTL, &wval);

        /* set monitor mode (enable) */
        val = AXEN_MONITOR_PMETYPE | AXEN_MONITOR_PMEPOL | AXEN_MONITOR_RWMP;
        axen_cmd(un, AXEN_CMD_MAC_WRITE, 1, AXEN_MONITOR_MODE, &val);
        axen_cmd(un, AXEN_CMD_MAC_READ, 1, AXEN_MONITOR_MODE, &val);
        DPRINTF(("axen: Monitor mode = 0x%02x\n", val));

        /* set medium type */
        ctl = AXEN_MEDIUM_GIGA | AXEN_MEDIUM_FDX | AXEN_MEDIUM_EN_125MHZ |
            AXEN_MEDIUM_RXFLOW_CTRL_EN | AXEN_MEDIUM_TXFLOW_CTRL_EN |
            AXEN_MEDIUM_RECV_EN;
        wval = htole16(ctl);
        DPRINTF(("axen: set to medium mode: 0x%04x\n", ctl));
        axen_cmd(un, AXEN_CMD_MAC_WRITE2, 2, AXEN_MEDIUM_STATUS, &wval);
        usbd_delay_ms(un->un_udev, 100);

        axen_cmd(un, AXEN_CMD_MAC_READ2, 2, AXEN_MEDIUM_STATUS, &wval);
        DPRINTF(("axen: current medium mode: 0x%04x\n", le16toh(wval)));

#if 0 /* XXX: TBD.... */
#define GMII_LED_ACTIVE                0x1a
#define GMII_PHY_PAGE_SEL        0x1e
#define GMII_PHY_PAGE_SEL        0x1f
#define GMII_PAGE_EXT                0x0007
        axen_uno_mii_write_reg(un, un->un_phyno, GMII_PHY_PAGE_SEL,
            GMII_PAGE_EXT);
        axen_uno_mii_write_reg(un, un->un_phyno, GMII_PHY_PAGE,
            0x002c);
#endif

#if 1 /* XXX: phy hack ? */
        axen_uno_mii_write_reg(un, un->un_phyno, 0x1F, 0x0005);
        axen_uno_mii_write_reg(un, un->un_phyno, 0x0C, 0x0000);
        axen_uno_mii_read_reg(un, un->un_phyno, 0x0001, &wval);
        axen_uno_mii_write_reg(un, un->un_phyno, 0x01, wval | 0x0080);
        axen_uno_mii_write_reg(un, un->un_phyno, 0x1F, 0x0000);
#endif
}

static void
axen_setoe_locked(struct usbnet *un)
{
        struct ifnet * const ifp = usbnet_ifp(un);
        uint64_t enabled = ifp->if_capenable;
        uint8_t val;

        KASSERT(IFNET_LOCKED(ifp));

        val = AXEN_RXCOE_OFF;
        if (enabled & IFCAP_CSUM_IPv4_Rx)
                val |= AXEN_RXCOE_IPv4;
        if (enabled & IFCAP_CSUM_TCPv4_Rx)
                val |= AXEN_RXCOE_TCPv4;
        if (enabled & IFCAP_CSUM_UDPv4_Rx)
                val |= AXEN_RXCOE_UDPv4;
        if (enabled & IFCAP_CSUM_TCPv6_Rx)
                val |= AXEN_RXCOE_TCPv6;
        if (enabled & IFCAP_CSUM_UDPv6_Rx)
                val |= AXEN_RXCOE_UDPv6;
        axen_cmd(un, AXEN_CMD_MAC_WRITE, 1, AXEN_RX_COE, &val);

        val = AXEN_TXCOE_OFF;
        if (enabled & IFCAP_CSUM_IPv4_Tx)
                val |= AXEN_TXCOE_IPv4;
        if (enabled & IFCAP_CSUM_TCPv4_Tx)
                val |= AXEN_TXCOE_TCPv4;
        if (enabled & IFCAP_CSUM_UDPv4_Tx)
                val |= AXEN_TXCOE_UDPv4;
        if (enabled & IFCAP_CSUM_TCPv6_Tx)
                val |= AXEN_TXCOE_TCPv6;
        if (enabled & IFCAP_CSUM_UDPv6_Tx)
                val |= AXEN_TXCOE_UDPv6;
        axen_cmd(un, AXEN_CMD_MAC_WRITE, 1, AXEN_TX_COE, &val);
}

static int
axen_uno_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
        struct usbnet * const un = ifp->if_softc;

        switch (cmd) {
        case SIOCSIFCAP:
                axen_setoe_locked(un);
                break;
        default:
                break;
        }

        return 0;
}

static int
axen_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return axen_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
axen_attach(device_t parent, device_t self, void *aux)
{
        USBNET_MII_DECL_DEFAULT(unm);
        struct usbnet * const un = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        usbd_status err;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        uint16_t axen_flags;
        int i;

        aprint_naive("\n");
        aprint_normal("\n");
        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        un->un_dev = self;
        un->un_udev = dev;
        un->un_sc = un;
        un->un_ops = &axen_ops;
        un->un_rx_xfer_flags = USBD_SHORT_XFER_OK;
        un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER;
        un->un_rx_list_cnt = AXEN_RX_LIST_CNT;
        un->un_tx_list_cnt = AXEN_TX_LIST_CNT;

        err = usbd_set_config_no(dev, AXEN_CONFIG_NO, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(err));
                return;
        }

        axen_flags = axen_lookup(uaa->uaa_vendor, uaa->uaa_product)->axen_flags;

        err = usbd_device2interface_handle(dev, AXEN_IFACE_IDX, &un->un_iface);
        if (err) {
                aprint_error_dev(self, "getting interface handle failed\n");
                return;
        }

        /* decide on what our bufsize will be */
        switch (dev->ud_speed) {
        case USB_SPEED_SUPER:
                un->un_rx_bufsz = AXEN_BUFSZ_SS * 1024;
                break;
        case USB_SPEED_HIGH:
                un->un_rx_bufsz = AXEN_BUFSZ_HS * 1024;
                break;
        default:
                un->un_rx_bufsz = AXEN_BUFSZ_LS * 1024;
                break;
        }
        un->un_tx_bufsz = IP_MAXPACKET + ETHER_HDR_LEN + ETHER_CRC_LEN +
            ETHER_VLAN_ENCAP_LEN + sizeof(struct axen_sframe_hdr);

        /* Find endpoints. */
        id = usbd_get_interface_descriptor(un->un_iface);
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(un->un_iface, i);
                if (!ed) {
                        aprint_error_dev(self, "couldn't get ep %d\n", i);
                        return;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress;
#if 0 /* not used yet */
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        un->un_ed[USBNET_ENDPT_INTR] = ed->bEndpointAddress;
#endif
                }
        }

        /* Set these up now for axen_cmd().  */
        usbnet_attach(un);

        un->un_phyno = AXEN_PHY_ID;
        DPRINTF(("%s: phyno %d\n", device_xname(self), un->un_phyno));

        /* Get station address.  */
        if (axen_get_eaddr(un, &un->un_eaddr)) {
                printf("EEPROM checksum error\n");
                return;
        }

        axen_ax88179_init(un);

        /* An ASIX chip was detected. Inform the world.  */
        if (axen_flags & AX178A)
                aprint_normal_dev(self, "AX88178a\n");
        else if (axen_flags & AX179)
                aprint_normal_dev(self, "AX88179\n");
        else
                aprint_normal_dev(self, "(unknown)\n");

        struct ethercom *ec = usbnet_ec(un);
        ec->ec_capabilities = ETHERCAP_VLAN_MTU;

        /* Adapter does not support TSOv6 (They call it LSOv2). */
        struct ifnet *ifp = usbnet_ifp(un);
        ifp->if_capabilities |= IFCAP_TSOv4 |
            IFCAP_CSUM_IPv4_Rx        | IFCAP_CSUM_IPv4_Tx  |
            IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx |
            IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx |
            IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_TCPv6_Tx |
            IFCAP_CSUM_UDPv6_Rx | IFCAP_CSUM_UDPv6_Tx;

        usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST,
            0, &unm);
}

static int
axen_csum_flags_rx(struct ifnet *ifp, uint32_t pkt_hdr)
{
        int enabled_flags = ifp->if_csum_flags_rx;
        int csum_flags = 0;
        int l3_type, l4_type;

        if (enabled_flags == 0)
                return 0;

        l3_type = (pkt_hdr & AXEN_RXHDR_L3_TYPE_MASK) >>
            AXEN_RXHDR_L3_TYPE_OFFSET;

        if (l3_type == AXEN_RXHDR_L3_TYPE_IPV4)
                csum_flags |= M_CSUM_IPv4;

        l4_type = (pkt_hdr & AXEN_RXHDR_L4_TYPE_MASK) >>
            AXEN_RXHDR_L4_TYPE_OFFSET;

        switch (l4_type) {
        case AXEN_RXHDR_L4_TYPE_TCP:
                if (l3_type == AXEN_RXHDR_L3_TYPE_IPV4)
                        csum_flags |= M_CSUM_TCPv4;
                else
                        csum_flags |= M_CSUM_TCPv6;
                break;
        case AXEN_RXHDR_L4_TYPE_UDP:
                if (l3_type == AXEN_RXHDR_L3_TYPE_IPV4)
                        csum_flags |= M_CSUM_UDPv4;
                else
                        csum_flags |= M_CSUM_UDPv6;
                break;
        default:
                break;
        }

        csum_flags &= enabled_flags;
        if ((csum_flags & M_CSUM_IPv4) && (pkt_hdr & AXEN_RXHDR_L3CSUM_ERR))
                csum_flags |= M_CSUM_IPv4_BAD;
        if ((csum_flags & ~M_CSUM_IPv4) && (pkt_hdr & AXEN_RXHDR_L4CSUM_ERR))
                csum_flags |= M_CSUM_TCP_UDP_BAD;

        return csum_flags;
}

static void
axen_uno_rx_loop(struct usbnet *un, struct usbnet_chain *c, uint32_t total_len)
{
        struct ifnet *ifp = usbnet_ifp(un);
        uint8_t *buf = c->unc_buf;
        uint32_t rx_hdr, pkt_hdr;
        uint32_t *hdr_p;
        uint16_t hdr_offset, pkt_count;
        size_t pkt_len;
        size_t temp;

        if (total_len < sizeof(pkt_hdr)) {
                aprint_error_dev(un->un_dev, "rxeof: too short transfer\n");
                if_statinc(ifp, if_ierrors);
                return;
        }

        /*
         * buffer map
         * [packet #0]...[packet #n][pkt hdr#0]..[pkt hdr#n][recv_hdr]
         * each packet has 0xeeee as psuedo header..
         */
        hdr_p = (uint32_t *)(buf + total_len - sizeof(uint32_t));
        rx_hdr = le32toh(*hdr_p);
        hdr_offset = (uint16_t)(rx_hdr >> 16);
        pkt_count  = (uint16_t)(rx_hdr & 0xffff);

        /* sanity check */
        if (hdr_offset > total_len) {
                aprint_error_dev(un->un_dev,
                    "rxeof: invalid hdr offset (%u > %u)\n",
                    hdr_offset, total_len);
                if_statinc(ifp, if_ierrors);
                usbd_delay_ms(un->un_udev, 100);
                return;
        }

        /* point first packet header */
        hdr_p = (uint32_t *)(buf + hdr_offset);

        /*
         * ax88179 will pack multiple ip packet to a USB transaction.
         * process all of packets in the buffer
         */

#if 1 /* XXX: paranoiac check. need to remove later */
#define AXEN_MAX_PACKED_PACKET 200
        if (pkt_count > AXEN_MAX_PACKED_PACKET) {
                DPRINTF(("%s: Too many packets (%d) in a transaction, discard.\n",
                    device_xname(un->un_dev), pkt_count));
                return;
        }
#endif

        if (pkt_count)
                rnd_add_uint32(usbnet_rndsrc(un), pkt_count);

        do {
                if ((buf[0] != 0xee) || (buf[1] != 0xee)) {
                        aprint_error_dev(un->un_dev,
                            "invalid buffer(pkt#%d), continue\n", pkt_count);
                        if_statadd(ifp, if_ierrors, pkt_count);
                        return;
                }

                pkt_hdr = le32toh(*hdr_p);
                pkt_len = (pkt_hdr >> 16) & 0x1fff;
                DPRINTFN(10,
                    ("%s: rxeof: packet#%d, pkt_hdr 0x%08x, pkt_len %zu\n",
                   device_xname(un->un_dev), pkt_count, pkt_hdr, pkt_len));

                if (pkt_hdr & (AXEN_RXHDR_CRC_ERR | AXEN_RXHDR_DROP_ERR)) {
                        if_statinc(ifp, if_ierrors);
                        /* move to next pkt header */
                        DPRINTF(("%s: %s err (pkt#%d)\n",
                            device_xname(un->un_dev),
                            (pkt_hdr & AXEN_RXHDR_CRC_ERR) ? "crc" : "drop",
                            pkt_count));
                        goto nextpkt;
                }

                usbnet_enqueue(un, buf + ETHER_ALIGN, pkt_len - 6,
                               axen_csum_flags_rx(ifp, pkt_hdr), 0, 0);

nextpkt:
                /*
                 * prepare next packet
                 * as each packet will be aligned 8byte boundary,
                 * need to fix up the start point of the buffer.
                 */
                temp = ((pkt_len + 7) & 0xfff8);
                buf = buf + temp;
                hdr_p++;
                pkt_count--;
        } while (pkt_count > 0);
}

static unsigned
axen_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c)
{
        struct axen_sframe_hdr hdr;
        u_int length, boundary;

        if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz - sizeof(hdr))
                return 0;
        length = m->m_pkthdr.len + sizeof(hdr);

        /* XXX Is this needed?  wMaxPacketSize? */
        switch (un->un_udev->ud_speed) {
        case USB_SPEED_SUPER:
                boundary = 4096;
                break;
        case USB_SPEED_HIGH:
                boundary = 512;
                break;
        default:
                boundary = 64;
                break;
        }

        hdr.plen = htole32(m->m_pkthdr.len);

        hdr.gso = (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) ?
            m->m_pkthdr.segsz : 0;
        if ((length % boundary) == 0) {
                DPRINTF(("%s: boundary hit\n", device_xname(un->un_dev)));
                hdr.gso |= 0x80008000;        /* XXX enable padding */
        }
        hdr.gso = htole32(hdr.gso);

        memcpy(c->unc_buf, &hdr, sizeof(hdr));
        m_copydata(m, 0, m->m_pkthdr.len, c->unc_buf + sizeof(hdr));

        return length;
}

static int
axen_uno_init(struct ifnet *ifp)
{
        struct usbnet * const un = ifp->if_softc;
        uint16_t rxmode;
        uint16_t wval;
        uint8_t bval;

        /* Cancel pending I/O */
        axen_uno_stop(ifp, 1);

        /* Reset the ethernet interface. */
        axen_reset(un);

        /* XXX: ? */
        bval = 0x01;
        axen_cmd(un, AXEN_CMD_MAC_WRITE, 1, AXEN_UNK_28, &bval);

        /* Configure offloading engine. */
        axen_setoe_locked(un);

        /* Enable receiver, set RX mode */
        axen_cmd(un, AXEN_CMD_MAC_READ2, 2, AXEN_MAC_RXCTL, &wval);
        rxmode = le16toh(wval);
        rxmode |= AXEN_RXCTL_START;
        wval = htole16(rxmode);
        axen_cmd(un, AXEN_CMD_MAC_WRITE2, 2, AXEN_MAC_RXCTL, &wval);

        return 0;
}

static void
axen_uno_stop(struct ifnet *ifp, int disable)
{
        struct usbnet * const un = ifp->if_softc;
        uint16_t rxmode, wval;

        axen_reset(un);

        /* Disable receiver, set RX mode */
        axen_cmd(un, AXEN_CMD_MAC_READ2, 2, AXEN_MAC_RXCTL, &wval);
        rxmode = le16toh(wval);
        rxmode &= ~AXEN_RXCTL_START;
        wval = htole16(rxmode);
        axen_cmd(un, AXEN_CMD_MAC_WRITE2, 2, AXEN_MAC_RXCTL, &wval);
}

#ifdef _MODULE
#include "ioconf.c"
#endif

USBNET_MODULE(axen)




































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
/*        $NetBSD: exec_elf64.c,v 1.8 2019/11/20 19:37:53 pgoyette Exp $        */

/*
 * Copyright (c) 1996 Christopher G. Demetriou
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Christopher G. Demetriou
 *        for the NetBSD Project.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: exec_elf64.c,v 1.8 2019/11/20 19:37:53 pgoyette Exp $");

#define        ELFSIZE        64

#include "exec_elf.c"

#include <sys/module.h>

#define ELF64_AUXSIZE (ELF_AUX_ENTRIES * sizeof(Aux64Info) \
    + MAXPATHLEN + ALIGN(1))

MODULE(MODULE_CLASS_EXEC, exec_elf64, NULL);

static struct execsw exec_elf64_execsw[] = {
        /* Native Elf64 */
        {
                .es_hdrsz = sizeof (Elf64_Ehdr),
                  .es_makecmds = exec_elf64_makecmds,
                  .u = {
                        .elf_probe_func = netbsd_elf64_probe,
                },
                .es_emul = &emul_netbsd,
                .es_prio = EXECSW_PRIO_FIRST,
                .es_arglen = ELF64_AUXSIZE,
                .es_copyargs = elf64_copyargs,
                .es_setregs = NULL,
                .es_coredump = coredump_elf64,
                .es_setup_stack = exec_setup_stack,
        },
#if EXEC_ELF_NOTELESS
        /* Generic Elf64 -- run at NetBSD Elf64 */
        {
                .es_hdrsz = sizeof (Elf64_Ehdr),
                .es_makecmds = exec_elf64_makecmds,
                .u = {
                        .elf_probe_func = NULL,
                },
                .es_emul = &emul_netbsd,
                .es_prio = EXECSW_PRIO_ANY,
                .es_arglen = ELF64_AUXSIZE,
                .es_copyargs = elf64_copyargs,
                .es_setregs = NULL,
                .es_coredump = coredump_elf64,
                .es_setup_stack = exec_setup_stack,
        },
#endif
};

static int
exec_elf64_modcmd(modcmd_t cmd, void *arg)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                return exec_add(exec_elf64_execsw,
                    __arraycount(exec_elf64_execsw));

        case MODULE_CMD_FINI:
                return exec_remove(exec_elf64_execsw,
                    __arraycount(exec_elf64_execsw));

        default:
                return ENOTTY;
        }
}





































































































































































    2 



    2 





































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
/*        $NetBSD: udsir.c,v 1.15 2021/09/26 01:16:09 thorpej Exp $        */

/*
 * Copyright (c) 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by David Sainty <dsainty@NetBSD.org>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: udsir.c,v 1.15 2021/09/26 01:16:09 thorpej Exp $");

#include <sys/param.h>
#include <sys/device.h>
#include <sys/errno.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/conf.h>
#include <sys/file.h>
#include <sys/poll.h>
#include <sys/select.h>
#include <sys/proc.h>
#include <sys/kthread.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>

#include <dev/ir/ir.h>
#include <dev/ir/irdaio.h>
#include <dev/ir/irframevar.h>
#include <dev/ir/sir.h>

#ifdef UDSIR_DEBUG
#define DPRINTFN(n,x)        if (udsirdebug > (n)) printf x
int        udsirdebug = 0;
#else
#define DPRINTFN(n,x)
#endif

/* Max size with framing. */
#define MAX_UDSIR_OUTPUT_FRAME        (2 * IRDA_MAX_FRAME_SIZE + IRDA_MAX_EBOFS + 4)

struct udsir_softc {
        device_t                sc_dev;
        struct usbd_device        *sc_udev;
        struct usbd_interface        *sc_iface;

        uint8_t                        *sc_ur_buf; /* Unencapsulated frame */
        u_int                        sc_ur_framelen;

        uint8_t                        *sc_rd_buf; /* Raw incoming data stream */
        int                        sc_rd_maxpsz;
        size_t                        sc_rd_index;
        int                        sc_rd_addr;
        struct usbd_pipe        *sc_rd_pipe;
        struct usbd_xfer        *sc_rd_xfer;
        u_int                        sc_rd_count;
        int                        sc_rd_readinprogress;
        int                        sc_rd_expectdataticks;
        u_char                        sc_rd_err;
        struct framestate        sc_framestate;
        struct lwp                *sc_thread;
        struct selinfo                sc_rd_sel;

        uint8_t                        *sc_wr_buf;
        int                        sc_wr_maxpsz;
        int                        sc_wr_addr;
        int                        sc_wr_stalewrite;
        struct usbd_xfer        *sc_wr_xfer;
        struct usbd_pipe        *sc_wr_pipe;
        struct selinfo                sc_wr_sel;

        enum {
                udir_input, /* Receiving data */
                udir_output, /* Transmitting data */
                udir_stalled, /* Error preventing data flow */
                udir_idle /* Neither receiving nor transmitting */
        } sc_direction;

        device_t                sc_child;
        struct irda_params        sc_params;

        int                        sc_refcnt;
        char                        sc_closing;
        char                        sc_dying;
};

/* True if we cannot safely read data from the device */
#define UDSIR_BLOCK_RX_DATA(sc) ((sc)->sc_ur_framelen != 0)

#define UDSIR_WR_TIMEOUT 200

static int udsir_match(device_t, cfdata_t, void *);
static void udsir_attach(device_t, device_t, void *);
static int udsir_detach(device_t, int);
static void udsir_childdet(device_t, device_t);
static int udsir_activate(device_t, enum devact);

static int udsir_open(void *, int, int, struct lwp *);
static int udsir_close(void *, int, int, struct lwp *);
static int udsir_read(void *, struct uio *, int);
static int udsir_write(void *, struct uio *, int);
static int udsir_poll(void *, int, struct lwp *);
static int udsir_kqfilter(void *, struct knote *);
static int udsir_set_params(void *, struct irda_params *);
static int udsir_get_speeds(void *, int *);
static int udsir_get_turnarounds(void *, int *);

static void filt_udsirrdetach(struct knote *);
static int filt_udsirread(struct knote *, long);
static void filt_udsirwdetach(struct knote *);
static int filt_udsirwrite(struct knote *, long);

static void udsir_thread(void *);

#ifdef UDSIR_DEBUG
static void udsir_dumpdata(uint8_t const *, size_t, char const *);
#endif
static int deframe_rd_ur(struct udsir_softc *);
static void udsir_periodic(struct udsir_softc *);
static void udsir_rd_cb(struct usbd_xfer *, void *, usbd_status);
static usbd_status udsir_start_read(struct udsir_softc *);

CFATTACH_DECL2_NEW(udsir, sizeof(struct udsir_softc),
    udsir_match, udsir_attach, udsir_detach,
    udsir_activate, NULL, udsir_childdet);

static struct irframe_methods const udsir_methods = {
    udsir_open, udsir_close, udsir_read, udsir_write, udsir_poll,
    udsir_kqfilter, udsir_set_params, udsir_get_speeds, udsir_get_turnarounds,
};

static int
udsir_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;

        DPRINTFN(50, ("udsir_match\n"));

        if (uiaa->uiaa_vendor == USB_VENDOR_KINGSUN &&
            uiaa->uiaa_product == USB_PRODUCT_KINGSUN_IRDA)
                return UMATCH_VENDOR_PRODUCT;

        return UMATCH_NONE;
}

static void
udsir_attach(device_t parent, device_t self, void *aux)
{
        struct udsir_softc *sc = device_private(self);
        struct usbif_attach_arg *uiaa = aux;
        struct usbd_device *dev = uiaa->uiaa_device;
        struct usbd_interface *iface = uiaa->uiaa_iface;
        char *devinfop;
        usb_endpoint_descriptor_t *ed;
        uint8_t epcount;
        int i;
        struct ir_attach_args ia;

        DPRINTFN(10, ("udsir_attach: sc=%p\n", sc));

        sc->sc_dev = self;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        sc->sc_udev = dev;
        sc->sc_iface = iface;

        epcount = 0;
        (void)usbd_endpoint_count(iface, &epcount);

        sc->sc_rd_addr = -1;
        sc->sc_wr_addr = -1;
        for (i = 0; i < epcount; i++) {
                ed = usbd_interface2endpoint_descriptor(iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self, "couldn't get ep %d\n", i);
                        return;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        sc->sc_rd_addr = ed->bEndpointAddress;
                        sc->sc_rd_maxpsz = UGETW(ed->wMaxPacketSize);
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        sc->sc_wr_addr = ed->bEndpointAddress;
                        sc->sc_wr_maxpsz = UGETW(ed->wMaxPacketSize);
                }
        }
        if (sc->sc_rd_addr == -1 || sc->sc_wr_addr == -1) {
                aprint_error_dev(self, "missing endpoint\n");
                return;
        }

        DPRINTFN(10, ("udsir_attach: %p\n", sc->sc_udev));

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        ia.ia_type = IR_TYPE_IRFRAME;
        ia.ia_methods = &udsir_methods;
        ia.ia_handle = sc;

        sc->sc_child = config_found(self, &ia, ir_print, CFARGS_NONE);
        selinit(&sc->sc_rd_sel);
        selinit(&sc->sc_wr_sel);

        return;
}

static int
udsir_detach(device_t self, int flags)
{
        struct udsir_softc *sc = device_private(self);
        int s;
        int rv = 0;

        DPRINTFN(0, ("udsir_detach: sc=%p flags=%d\n", sc, flags));

        sc->sc_closing = sc->sc_dying = 1;

        wakeup(&sc->sc_thread);

        while (sc->sc_thread != NULL)
                tsleep(&sc->sc_closing, PWAIT, "usircl", 0);

        /* Abort all pipes.  Causes processes waiting for transfer to wake. */
        if (sc->sc_rd_pipe != NULL) {
                usbd_abort_pipe(sc->sc_rd_pipe);
        }
        if (sc->sc_wr_pipe != NULL) {
                usbd_abort_pipe(sc->sc_wr_pipe);
        }
        if (sc->sc_rd_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_rd_xfer);
                sc->sc_rd_xfer = NULL;
                sc->sc_rd_buf = NULL;
        }
        if (sc->sc_wr_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_wr_xfer);
                sc->sc_wr_xfer = NULL;
                sc->sc_wr_buf = NULL;
        }
        /* Close pipes. */
        if (sc->sc_rd_pipe != NULL) {
                usbd_close_pipe(sc->sc_rd_pipe);
                sc->sc_rd_pipe = NULL;
        }
        if (sc->sc_wr_pipe != NULL) {
                usbd_close_pipe(sc->sc_wr_pipe);
                sc->sc_wr_pipe = NULL;
        }
        wakeup(&sc->sc_ur_framelen);
        wakeup(&sc->sc_wr_buf);

        s = splusb();
        if (--sc->sc_refcnt >= 0) {
                /* Wait for processes to go away. */
                usb_detach_waitold(sc->sc_dev);
        }
        splx(s);

        if (sc->sc_child != NULL)
                rv = config_detach(sc->sc_child, flags);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        seldestroy(&sc->sc_rd_sel);
        seldestroy(&sc->sc_wr_sel);

        return rv;
}

static void
udsir_childdet(device_t self, device_t child)
{
        struct udsir_softc *sc = device_private(self);

        KASSERT(sc->sc_child == child);
        sc->sc_child = NULL;
}

static int
udsir_activate(device_t self, enum devact act)
{
        struct udsir_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

/* ARGSUSED */
static int
udsir_open(void *h, int flag, int mode, struct lwp *l)
{
        struct udsir_softc *sc = h;
        int error;
        usbd_status err;

        DPRINTFN(0, ("%s: sc=%p\n", __func__, sc));

        err = usbd_open_pipe(sc->sc_iface, sc->sc_rd_addr, 0, &sc->sc_rd_pipe);
        if (err != USBD_NORMAL_COMPLETION) {
                error = EIO;
                goto bad1;
        }
        err = usbd_open_pipe(sc->sc_iface, sc->sc_wr_addr, 0, &sc->sc_wr_pipe);
        if (err != USBD_NORMAL_COMPLETION) {
                error = EIO;
                goto bad2;
        }
        error = usbd_create_xfer(sc->sc_rd_pipe, sc->sc_rd_maxpsz,
            0, 0, &sc->sc_rd_xfer);
        if (error)
                 goto bad3;

        error = usbd_create_xfer(sc->sc_wr_pipe, IRDA_MAX_FRAME_SIZE,
            USBD_FORCE_SHORT_XFER, 0, &sc->sc_wr_xfer);
        if (error)
                goto bad4;

        sc->sc_rd_buf = usbd_get_buffer(sc->sc_rd_xfer);
        sc->sc_wr_buf = usbd_get_buffer(sc->sc_wr_xfer);

        sc->sc_ur_buf = kmem_alloc(IRDA_MAX_FRAME_SIZE, KM_SLEEP);
        sc->sc_rd_index = sc->sc_rd_count = 0;
        sc->sc_closing = 0;
        sc->sc_rd_readinprogress = 0;
        sc->sc_rd_expectdataticks = 0;
        sc->sc_ur_framelen = 0;
        sc->sc_rd_err = 0;
        sc->sc_wr_stalewrite = 0;
        sc->sc_direction = udir_idle;
        sc->sc_params.speed = 0;
        sc->sc_params.ebofs = 0;
        sc->sc_params.maxsize = uimin(sc->sc_rd_maxpsz, sc->sc_wr_maxpsz);

        deframe_init(&sc->sc_framestate, sc->sc_ur_buf, IRDA_MAX_FRAME_SIZE);

        /* Increment reference for thread */
        sc->sc_refcnt++;

        error = kthread_create(PRI_NONE, 0, NULL, udsir_thread, sc,
            &sc->sc_thread, "%s", device_xname(sc->sc_dev));
        if (error) {
                sc->sc_refcnt--;
                goto bad5;
        }

        return 0;

 bad5:
        usbd_destroy_xfer(sc->sc_wr_xfer);
        sc->sc_wr_xfer = NULL;
 bad4:
        usbd_destroy_xfer(sc->sc_rd_xfer);
        sc->sc_rd_xfer = NULL;
 bad3:
        usbd_close_pipe(sc->sc_wr_pipe);
        sc->sc_wr_pipe = NULL;
 bad2:
        usbd_close_pipe(sc->sc_rd_pipe);
        sc->sc_rd_pipe = NULL;
 bad1:
        return error;
}

/* ARGSUSED */
static int
udsir_close(void *h, int flag, int mode, struct lwp *l)
{
        struct udsir_softc *sc = h;

        DPRINTFN(0, ("%s: sc=%p\n", __func__, sc));

        sc->sc_refcnt++;

        sc->sc_rd_readinprogress = 1;
        sc->sc_closing = 1;

        wakeup(&sc->sc_thread);

        while (sc->sc_thread != NULL)
                tsleep(&sc->sc_closing, PWAIT, "usircl", 0);

        if (sc->sc_rd_pipe != NULL) {
                usbd_abort_pipe(sc->sc_rd_pipe);
        }
        if (sc->sc_wr_pipe != NULL) {
                usbd_abort_pipe(sc->sc_wr_pipe);
        }
        if (sc->sc_rd_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_rd_xfer);
                sc->sc_rd_xfer = NULL;
                sc->sc_rd_buf = NULL;
        }
        if (sc->sc_wr_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_wr_xfer);
                sc->sc_wr_xfer = NULL;
                sc->sc_wr_buf = NULL;
        }
        if (sc->sc_rd_pipe != NULL) {
                usbd_close_pipe(sc->sc_rd_pipe);
                sc->sc_rd_pipe = NULL;
        }
        if (sc->sc_wr_pipe != NULL) {
                usbd_close_pipe(sc->sc_wr_pipe);
                sc->sc_wr_pipe = NULL;
        }
        if (sc->sc_ur_buf != NULL) {
                kmem_free(sc->sc_ur_buf, IRDA_MAX_FRAME_SIZE);
                sc->sc_ur_buf = NULL;
        }

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        return 0;
}

/* ARGSUSED */
static int
udsir_read(void *h, struct uio *uio, int flag)
{
        struct udsir_softc *sc = h;
        int s;
        int error;
        u_int uframelen;

        DPRINTFN(1, ("%s: sc=%p\n", __func__, sc));

        if (sc->sc_dying)
                return EIO;

#ifdef DIAGNOSTIC
        if (sc->sc_rd_buf == NULL)
                return EINVAL;
#endif

        sc->sc_refcnt++;

        if (!sc->sc_rd_readinprogress && !UDSIR_BLOCK_RX_DATA(sc))
                /* Possibly wake up polling thread */
                wakeup(&sc->sc_thread);

        do {
                s = splusb();
                while (sc->sc_ur_framelen == 0) {
                        DPRINTFN(5, ("%s: calling tsleep()\n", __func__));
                        error = tsleep(&sc->sc_ur_framelen, PZERO | PCATCH,
                                       "usirrd", 0);
                        if (sc->sc_dying)
                                error = EIO;
                        if (error) {
                                splx(s);
                                DPRINTFN(0, ("%s: tsleep() = %d\n",
                                             __func__, error));
                                goto ret;
                        }
                }
                splx(s);

                uframelen = sc->sc_ur_framelen;
                DPRINTFN(1, ("%s: sc=%p framelen=%u, hdr=0x%02x\n",
                             __func__, sc, uframelen, sc->sc_ur_buf[0]));
                if (uframelen > uio->uio_resid)
                        error = EINVAL;
                else
                        error = uiomove(sc->sc_ur_buf, uframelen, uio);
                sc->sc_ur_framelen = 0;

                if (deframe_rd_ur(sc) == 0 && uframelen > 0) {
                        /*
                         * Need to wait for another read to obtain a
                         * complete frame...  If we also obtained
                         * actual data, wake up the possibly sleeping
                         * thread immediately...
                         */
                        wakeup(&sc->sc_thread);
                }
        } while (uframelen == 0);

        DPRINTFN(1, ("%s: return %d\n", __func__, error));

 ret:
        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);
        return error;
}

/* ARGSUSED */
static int
udsir_write(void *h, struct uio *uio, int flag)
{
        struct udsir_softc *sc = h;
        usbd_status err;
        uint32_t wrlen;
        int error, sirlength;
        uint8_t *wrbuf;
        int s;

        DPRINTFN(1, ("%s: sc=%p\n", __func__, sc));

        if (sc->sc_dying)
                return EIO;

#ifdef DIAGNOSTIC
        if (sc->sc_wr_buf == NULL)
                return EINVAL;
#endif

        wrlen = uio->uio_resid;
        if (wrlen > sc->sc_wr_maxpsz)
                return EINVAL;

        sc->sc_refcnt++;

        if (!UDSIR_BLOCK_RX_DATA(sc)) {
                /*
                 * If reads are not blocked, determine what action we
                 * should potentially take...
                 */
                if (sc->sc_direction == udir_output) {
                        /*
                         * If the last operation was an output, wait for the
                         * polling thread to check for incoming data.
                         */
                        sc->sc_wr_stalewrite = 1;
                        wakeup(&sc->sc_thread);
                } else if (!sc->sc_rd_readinprogress &&
                           (sc->sc_direction == udir_idle ||
                            sc->sc_direction == udir_input)) {
                        /* If idle, check for input before outputting */
                        udsir_start_read(sc);
                }
        }

        s = splusb();
        while (sc->sc_wr_stalewrite ||
               (sc->sc_direction != udir_output &&
                sc->sc_direction != udir_idle)) {
                DPRINTFN(5, ("%s: sc=%p stalewrite=%d direction=%d, "
                             "calling tsleep()\n",
                             __func__, sc, sc->sc_wr_stalewrite,
                             sc->sc_direction));
                error = tsleep(&sc->sc_wr_buf, PZERO | PCATCH, "usirwr", 0);
                if (sc->sc_dying)
                        error = EIO;
                if (error) {
                        splx(s);
                        DPRINTFN(0, ("%s: tsleep() = %d\n", __func__, error));
                        goto ret;
                }
        }
        splx(s);

        wrbuf = sc->sc_wr_buf;

        sirlength = irda_sir_frame(wrbuf, MAX_UDSIR_OUTPUT_FRAME,
            uio, sc->sc_params.ebofs);
        if (sirlength < 0)
                error = -sirlength;
        else {
                uint32_t btlen;

                DPRINTFN(1, ("%s: transfer %u bytes\n",
                             __func__, (unsigned int)wrlen));

                btlen = sirlength;

                sc->sc_direction = udir_output;

#ifdef UDSIR_DEBUG
                if (udsirdebug >= 20)
                        udsir_dumpdata(wrbuf, btlen, __func__);
#endif

                err = usbd_intr_transfer(sc->sc_wr_xfer, sc->sc_wr_pipe,
                     USBD_FORCE_SHORT_XFER, UDSIR_WR_TIMEOUT,
                     wrbuf, &btlen);
                DPRINTFN(2, ("%s: err=%d\n", __func__, err));
                if (err != USBD_NORMAL_COMPLETION) {
                        if (err == USBD_INTERRUPTED)
                                error = EINTR;
                        else if (err == USBD_TIMEOUT)
                                error = ETIMEDOUT;
                        else
                                error = EIO;
                } else
                        error = 0;
        }

 ret:
        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        DPRINTFN(1, ("%s: sc=%p done\n", __func__, sc));
        return error;
}

static int
udsir_poll(void *h, int events, struct lwp *l)
{
        struct udsir_softc *sc = h;
        int revents = 0;

        DPRINTFN(1, ("%s: sc=%p\n", __func__, sc));

        if (events & (POLLOUT | POLLWRNORM)) {
                if (sc->sc_direction != udir_input)
                        revents |= events & (POLLOUT | POLLWRNORM);
                else {
                        DPRINTFN(2, ("%s: recording write select\n", __func__));
                        selrecord(l, &sc->sc_wr_sel);
                }
        }

        if (events & (POLLIN | POLLRDNORM)) {
                if (sc->sc_ur_framelen != 0) {
                        DPRINTFN(2, ("%s: have data\n", __func__));
                        revents |= events & (POLLIN | POLLRDNORM);
                } else {
                        DPRINTFN(2, ("%s: recording read select\n", __func__));
                        selrecord(l, &sc->sc_rd_sel);
                }
        }

        return revents;
}

static const struct filterops udsirread_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_udsirrdetach,
        .f_event = filt_udsirread,
};

static const struct filterops udsirwrite_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_udsirwdetach,
        .f_event = filt_udsirwrite,
};

static int
udsir_kqfilter(void *h, struct knote *kn)
{
        struct udsir_softc *sc = h;
        struct selinfo *sip;
        int s;

        switch (kn->kn_filter) {
        case EVFILT_READ:
                sip = &sc->sc_rd_sel;
                kn->kn_fop = &udsirread_filtops;
                break;
        case EVFILT_WRITE:
                sip = &sc->sc_wr_sel;
                kn->kn_fop = &udsirwrite_filtops;
                break;
        default:
                return EINVAL;
        }

        kn->kn_hook = sc;

        s = splusb();
        selrecord_knote(sip, kn);
        splx(s);

        return 0;
}

static int
udsir_set_params(void *h, struct irda_params *p)
{
        struct udsir_softc *sc = h;

        DPRINTFN(0, ("%s: sc=%p, speed=%d ebofs=%d maxsize=%d\n",
                     __func__, sc, p->speed, p->ebofs, p->maxsize));

        if (sc->sc_dying)
                return EIO;

        if (p->speed != 9600)
                return EINVAL;

        if (p->maxsize != sc->sc_params.maxsize) {
                if (p->maxsize > uimin(sc->sc_rd_maxpsz, sc->sc_wr_maxpsz))
                        return EINVAL;
                sc->sc_params.maxsize = p->maxsize;
        }

        sc->sc_params = *p;

        return 0;
}

static int
udsir_get_speeds(void *h, int *speeds)
{
        struct udsir_softc *sc = h;

        DPRINTFN(0, ("%s: sc=%p\n", __func__, sc));

        if (sc->sc_dying)
                return EIO;

        /* Support only 9600bps now. */
        *speeds = IRDA_SPEED_9600;

        return 0;
}

static int
udsir_get_turnarounds(void *h, int *turnarounds)
{
        struct udsir_softc *sc = h;

        DPRINTFN(0, ("%s: sc=%p\n", __func__, sc));

        if (sc->sc_dying)
                return EIO;

        /*
         * Documentation is on the light side with respect to
         * turnaround time for this device.
         */
        *turnarounds = IRDA_TURNT_10000;

        return 0;
}

static void
filt_udsirrdetach(struct knote *kn)
{
        struct udsir_softc *sc = kn->kn_hook;
        int s;

        s = splusb();
        selremove_knote(&sc->sc_rd_sel, kn);
        splx(s);
}

/* ARGSUSED */
static int
filt_udsirread(struct knote *kn, long hint)
{
        struct udsir_softc *sc = kn->kn_hook;

        kn->kn_data = sc->sc_ur_framelen;
        return kn->kn_data > 0;
}

static void
filt_udsirwdetach(struct knote *kn)
{
        struct udsir_softc *sc = kn->kn_hook;
        int s;

        s = splusb();
        selremove_knote(&sc->sc_wr_sel, kn);
        splx(s);
}

/* ARGSUSED */
static int
filt_udsirwrite(struct knote *kn, long hint)
{
        struct udsir_softc *sc = kn->kn_hook;

        kn->kn_data = 0;
        return sc->sc_direction != udir_input;
}


static void
udsir_thread(void *arg)
{
        struct udsir_softc *sc = arg;
        int error;

        DPRINTFN(20, ("%s: starting polling thread\n", __func__));

        while (!sc->sc_closing) {
                if (!sc->sc_rd_readinprogress && !UDSIR_BLOCK_RX_DATA(sc))
                        udsir_periodic(sc);

                if (!sc->sc_closing) {
                        error = tsleep(&sc->sc_thread, PWAIT, "udsir", hz / 10);
                        if (error == EWOULDBLOCK &&
                            sc->sc_rd_expectdataticks > 0)
                                /*
                                 * After a timeout decrement the tick
                                 * counter within which time we expect
                                 * data to arrive if we are receiving
                                 * data...
                                 */
                                sc->sc_rd_expectdataticks--;
                }
        }

        DPRINTFN(20, ("%s: exiting polling thread\n", __func__));

        sc->sc_thread = NULL;

        wakeup(&sc->sc_closing);

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        kthread_exit(0);
}

#ifdef UDSIR_DEBUG
static void
udsir_dumpdata(uint8_t const *data, size_t dlen, char const *desc)
{
        size_t bdindex;

        printf("%s: (%lx)", desc, (unsigned long)dlen);
        for (bdindex = 0; bdindex < dlen; bdindex++)
                printf(" %02x", (unsigned int)data[bdindex]);
        printf("\n");
}
#endif

/* Returns 0 if more data required, 1 if a complete frame was extracted */
static int
deframe_rd_ur(struct udsir_softc *sc)
{

        if (sc->sc_rd_index == 0) {
                KASSERT(sc->sc_rd_count == sc->sc_rd_maxpsz);
                /* valid count */
                sc->sc_rd_count = sc->sc_rd_buf[sc->sc_rd_index++] + 1;
                KASSERT(sc->sc_rd_count < sc->sc_rd_maxpsz);
        }

        while (sc->sc_rd_index < sc->sc_rd_count) {
                uint8_t const *buf;
                size_t buflen;
                enum frameresult fresult;

                buf = &sc->sc_rd_buf[sc->sc_rd_index];
                buflen = sc->sc_rd_count - sc->sc_rd_index;

                fresult = deframe_process(&sc->sc_framestate, &buf, &buflen);

                sc->sc_rd_index = sc->sc_rd_count - buflen;

                DPRINTFN(1,("%s: result=%d\n", __func__, (int)fresult));

                switch (fresult) {
                case FR_IDLE:
                case FR_INPROGRESS:
                case FR_FRAMEBADFCS:
                case FR_FRAMEMALFORMED:
                case FR_BUFFEROVERRUN:
                        break;
                case FR_FRAMEOK:
                        sc->sc_ur_framelen = sc->sc_framestate.bufindex;
                        wakeup(&sc->sc_ur_framelen); /* XXX should use flag */
                        selnotify(&sc->sc_rd_sel, 0, 0);
                        return 1;
                }
        }

        /* Reset indices into USB-side buffer */
        sc->sc_rd_index = sc->sc_rd_count = 0;

        return 0;
}

/*
 * Direction transitions:
 *
 * udsir_periodic() can switch the direction from:
 *
 *        output -> idle
 *        output -> stalled
 *        stalled -> idle
 *        idle -> input
 *
 * udsir_rd_cb() can switch the direction from:
 *
 *        input -> stalled
 *        input -> idle
 *
 * udsir_write() can switch the direction from:
 *
 *        idle -> output
 */
static void
udsir_periodic(struct udsir_softc *sc)
{

        DPRINTFN(60, ("%s: direction = %d\n", __func__, sc->sc_direction));

        if (sc->sc_wr_stalewrite && sc->sc_direction == udir_idle) {
                /*
                 * In a stale write case, we need to check if the
                 * write has completed.  Once that has happened, the
                 * write is no longer stale.
                 *
                 * But note that we may immediately start a read poll...
                 */
                sc->sc_wr_stalewrite = 0;
                wakeup(&sc->sc_wr_buf);
        }

        if (!sc->sc_rd_readinprogress &&
            (sc->sc_direction == udir_idle ||
             sc->sc_direction == udir_input))
                /* Do a read poll if appropriate... */
                udsir_start_read(sc);
}

static void
udsir_rd_cb(struct usbd_xfer *xfer, void * priv, usbd_status status)
{
        struct udsir_softc *sc = priv;
        uint32_t size;

        DPRINTFN(60, ("%s: sc=%p\n", __func__, sc));

        /* Read is no longer in progress */
        sc->sc_rd_readinprogress = 0;

        if (status == USBD_CANCELLED || sc->sc_closing)        /* this is normal */
                return;
        if (status) {
                size = 0;
                sc->sc_rd_err = 1;

                if (sc->sc_direction == udir_input ||
                    sc->sc_direction == udir_idle) {
                        /*
                         * Receive error, probably need to clear error
                         * condition.
                         */
                        sc->sc_direction = udir_stalled;
                }
        } else
                usbd_get_xfer_status(xfer, NULL, NULL, &size, NULL);

        sc->sc_rd_index = 0;
        sc->sc_rd_count = size;

        DPRINTFN(((size > 0 || sc->sc_rd_err != 0) ? 20 : 60),
                 ("%s: sc=%p size=%u, err=%d\n",
                  __func__, sc, size, sc->sc_rd_err));

#ifdef UDSIR_DEBUG
        if (udsirdebug >= 20 && size > 0)
                udsir_dumpdata(sc->sc_rd_buf, size, __func__);
#endif

        if (deframe_rd_ur(sc) == 0) {
                if (!deframe_isclear(&sc->sc_framestate) && size == 0 &&
                    sc->sc_rd_expectdataticks == 0) {
                        /*
                         * Expected data, but didn't get it
                         * within expected time...
                         */
                        DPRINTFN(5,("%s: incoming packet timeout\n",
                                    __func__));
                        deframe_clear(&sc->sc_framestate);
                } else if (size > 0) {
                        /*
                         * If we also received actual data, reset the
                         * data read timeout and wake up the possibly
                         * sleeping thread...
                         */
                        sc->sc_rd_expectdataticks = 2;
                        wakeup(&sc->sc_thread);
                }
        }

        /*
         * Check if incoming data has stopped, or that we cannot
         * safely read any more data.  In the case of the latter we
         * must switch to idle so that a write will not block...
         */
        if (sc->sc_direction == udir_input &&
            ((size == 0 && sc->sc_rd_expectdataticks == 0) ||
             UDSIR_BLOCK_RX_DATA(sc))) {
                DPRINTFN(8, ("%s: idling on packet timeout, "
                             "complete frame, or no data\n", __func__));
                sc->sc_direction = udir_idle;

                /* Wake up for possible output */
                wakeup(&sc->sc_wr_buf);
                selnotify(&sc->sc_wr_sel, 0, 0);
        }
}

static usbd_status
udsir_start_read(struct udsir_softc *sc)
{
        usbd_status err;

        DPRINTFN(60, ("%s: sc=%p, size=%d\n", __func__, sc, sc->sc_rd_maxpsz));

        if (sc->sc_dying)
                return USBD_IOERROR;

        if (UDSIR_BLOCK_RX_DATA(sc) || deframe_rd_ur(sc)) {
                /*
                 * Can't start reading just yet.  Since we aren't
                 * going to start a read, have to switch direction to
                 * idle.
                 */
                sc->sc_direction = udir_idle;
                return USBD_NORMAL_COMPLETION;
        }

        /* Starting a read... */
        sc->sc_rd_readinprogress = 1;
        sc->sc_direction = udir_input;

        if (sc->sc_rd_err) {
                sc->sc_rd_err = 0;
                DPRINTFN(0, ("%s: clear stall\n", __func__));
                usbd_clear_endpoint_stall(sc->sc_rd_pipe);
        }

        usbd_setup_xfer(sc->sc_rd_xfer, sc, sc->sc_rd_buf, sc->sc_rd_maxpsz,
            USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, udsir_rd_cb);
        err = usbd_transfer(sc->sc_rd_xfer);
        if (err != USBD_IN_PROGRESS) {
                DPRINTFN(0, ("%s: err=%d\n", __func__, (int)err));
                return err;
        }
        return USBD_NORMAL_COMPLETION;
}
























































































































































































































































   57 
   57 



   57 




























   57 




   57 



   57 


















   57 

















   57 


   57 



   57 
   57 



















   52 


   52 






   57 










   57 








   57 









   57 
















   55 











   56 




   57 




   56 
   56 












   56 














  122 
  111 



























  122 
  121 




  122 










   58 







   58 






















   58 

   58 

   58 

   58 


  103 


  122 
  122 


  122 
   60 

   60 



  122 






   28 






   29 
   29 


   29 


    8 
   29 

   29 











   29 
   29 


   29 















   29 




   29 



  116 



  122 
















   72 




   71 
   29 




   29 
   29 


   29 


   29 

   29 




   29 

   29 



   29 
   29 
   29 




   29 








   72 




   71 


















   72 
   67 

   72 





   72 









  117 






  117 





















  117 


  117 






  117 

    8 





    5 

   67 


   66 




   66 








   19 














   18 

   19 








































































































































































































































































  130 





  130 


    1 

    1 
    1 
    1 


    1 
    1 



    1 

































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
/*        $NetBSD: uvm_bio.c,v 1.126 2021/04/01 06:26:26 simonb Exp $        */

/*
 * Copyright (c) 1998 Chuck Silvers.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

/*
 * uvm_bio.c: buffered i/o object mapping cache
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.126 2021/04/01 06:26:26 simonb Exp $");

#include "opt_uvmhist.h"
#include "opt_ubc.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <sys/bitops.h>                /* for ilog2() */

#include <uvm/uvm.h>
#include <uvm/uvm_pdpolicy.h>

#ifdef PMAP_DIRECT
#  define UBC_USE_PMAP_DIRECT
#endif

/*
 * local functions
 */

static int        ubc_fault(struct uvm_faultinfo *, vaddr_t, struct vm_page **,
                          int, int, vm_prot_t, int);
static struct ubc_map *ubc_find_mapping(struct uvm_object *, voff_t);
static int        ubchash_stats(struct hashstat_sysctl *hs, bool fill);
#ifdef UBC_USE_PMAP_DIRECT
static int __noinline ubc_uiomove_direct(struct uvm_object *, struct uio *, vsize_t,
                          int, int);
static void __noinline ubc_zerorange_direct(struct uvm_object *, off_t, size_t, int);

/* XXX disabled by default until the kinks are worked out. */
bool ubc_direct = false;
#endif

/*
 * local data structues
 */

#define UBC_HASH(uobj, offset)                                                 \
        (((((u_long)(uobj)) >> 8) + (((u_long)(offset)) >> PAGE_SHIFT)) & \
                                ubc_object.hashmask)

#define UBC_QUEUE(offset)                                                \
        (&ubc_object.inactive[(((u_long)(offset)) >> ubc_winshift) &        \
                             (UBC_NQUEUES - 1)])

#define UBC_UMAP_ADDR(u)                                                \
        (vaddr_t)(ubc_object.kva + (((u) - ubc_object.umap) << ubc_winshift))


#define UMAP_PAGES_LOCKED        0x0001
#define UMAP_MAPPING_CACHED        0x0002

struct ubc_map {
        struct uvm_object *        uobj;                /* mapped object */
        voff_t                        offset;                /* offset into uobj */
        voff_t                        writeoff;        /* write offset */
        vsize_t                        writelen;        /* write len */
        int                        refcount;        /* refcount on mapping */
        int                        flags;                /* extra state */
        int                        advice;

        LIST_ENTRY(ubc_map)        hash;                /* hash table */
        TAILQ_ENTRY(ubc_map)        inactive;        /* inactive queue */
        LIST_ENTRY(ubc_map)        list;                /* per-object list */
};

TAILQ_HEAD(ubc_inactive_head, ubc_map);
static struct ubc_object {
        struct uvm_object uobj;                /* glue for uvm_map() */
        char *kva;                        /* where ubc_object is mapped */
        struct ubc_map *umap;                /* array of ubc_map's */

        LIST_HEAD(, ubc_map) *hash;        /* hashtable for cached ubc_map's */
        u_long hashmask;                /* mask for hashtable */

        struct ubc_inactive_head *inactive;
                                        /* inactive queues for ubc_map's */
} ubc_object;

const struct uvm_pagerops ubc_pager = {
        .pgo_fault = ubc_fault,
        /* ... rest are NULL */
};

/* Use value at least as big as maximum page size supported by architecture */
#define UBC_MAX_WINSHIFT        \
    ((1 << UBC_WINSHIFT) > MAX_PAGE_SIZE ? UBC_WINSHIFT : ilog2(MAX_PAGE_SIZE))

int ubc_nwins = UBC_NWINS;
const int ubc_winshift = UBC_MAX_WINSHIFT;
const int ubc_winsize = 1 << UBC_MAX_WINSHIFT;
#if defined(PMAP_PREFER)
int ubc_nqueues;
#define UBC_NQUEUES ubc_nqueues
#else
#define UBC_NQUEUES 1
#endif

#if defined(UBC_STATS)

#define        UBC_EVCNT_DEFINE(name) \
struct evcnt ubc_evcnt_##name = \
EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "ubc", #name); \
EVCNT_ATTACH_STATIC(ubc_evcnt_##name);
#define        UBC_EVCNT_INCR(name) ubc_evcnt_##name.ev_count++

#else /* defined(UBC_STATS) */

#define        UBC_EVCNT_DEFINE(name)        /* nothing */
#define        UBC_EVCNT_INCR(name)        /* nothing */

#endif /* defined(UBC_STATS) */

UBC_EVCNT_DEFINE(wincachehit)
UBC_EVCNT_DEFINE(wincachemiss)
UBC_EVCNT_DEFINE(faultbusy)

/*
 * ubc_init
 *
 * init pager private data structures.
 */

void
ubc_init(void)
{
        /*
         * Make sure ubc_winshift is sane.
         */
        KASSERT(ubc_winshift >= PAGE_SHIFT);

        /*
         * init ubc_object.
         * alloc and init ubc_map's.
         * init inactive queues.
         * alloc and init hashtable.
         * map in ubc_object.
         */

        uvm_obj_init(&ubc_object.uobj, &ubc_pager, true, UVM_OBJ_KERN);

        ubc_object.umap = kmem_zalloc(ubc_nwins * sizeof(struct ubc_map),
            KM_SLEEP);
        if (ubc_object.umap == NULL)
                panic("ubc_init: failed to allocate ubc_map");

        vaddr_t va = (vaddr_t)1L;
#ifdef PMAP_PREFER
        PMAP_PREFER(0, &va, 0, 0);        /* kernel is never topdown */
        ubc_nqueues = va >> ubc_winshift;
        if (ubc_nqueues == 0) {
                ubc_nqueues = 1;
        }
#endif
        ubc_object.inactive = kmem_alloc(UBC_NQUEUES *
            sizeof(struct ubc_inactive_head), KM_SLEEP);
        for (int i = 0; i < UBC_NQUEUES; i++) {
                TAILQ_INIT(&ubc_object.inactive[i]);
        }
        for (int i = 0; i < ubc_nwins; i++) {
                struct ubc_map *umap;
                umap = &ubc_object.umap[i];
                TAILQ_INSERT_TAIL(&ubc_object.inactive[i & (UBC_NQUEUES - 1)],
                                  umap, inactive);
        }

        ubc_object.hash = hashinit(ubc_nwins, HASH_LIST, true,
            &ubc_object.hashmask);
        for (int i = 0; i <= ubc_object.hashmask; i++) {
                LIST_INIT(&ubc_object.hash[i]);
        }

        if (uvm_map(kernel_map, (vaddr_t *)&ubc_object.kva,
                    ubc_nwins << ubc_winshift, &ubc_object.uobj, 0, (vsize_t)va,
                    UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE,
                                UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) {
                panic("ubc_init: failed to map ubc_object");
        }

        hashstat_register("ubchash", ubchash_stats);
}

void
ubchist_init(void)
{

        UVMHIST_INIT(ubchist, 300);
}

/*
 * ubc_fault_page: helper of ubc_fault to handle a single page.
 *
 * => Caller has UVM object locked.
 * => Caller will perform pmap_update().
 */

static inline int
ubc_fault_page(const struct uvm_faultinfo *ufi, const struct ubc_map *umap,
    struct vm_page *pg, vm_prot_t prot, vm_prot_t access_type, vaddr_t va)
{
        vm_prot_t mask;
        int error;
        bool rdonly;

        KASSERT(rw_write_held(pg->uobject->vmobjlock));

        KASSERT((pg->flags & PG_FAKE) == 0);
        if (pg->flags & PG_RELEASED) {
                uvm_pagefree(pg);
                return 0;
        }
        if (pg->loan_count != 0) {

                /*
                 * Avoid unneeded loan break, if possible.
                 */

                if ((access_type & VM_PROT_WRITE) == 0) {
                        prot &= ~VM_PROT_WRITE;
                }
                if (prot & VM_PROT_WRITE) {
                        struct vm_page *newpg;

                        newpg = uvm_loanbreak(pg);
                        if (newpg == NULL) {
                                uvm_page_unbusy(&pg, 1);
                                return ENOMEM;
                        }
                        pg = newpg;
                }
        }

        /*
         * Note that a page whose backing store is partially allocated
         * is marked as PG_RDONLY.
         *
         * it's a responsibility of ubc_alloc's caller to allocate backing
         * blocks before writing to the window.
         */

        KASSERT((pg->flags & PG_RDONLY) == 0 ||
            (access_type & VM_PROT_WRITE) == 0 ||
            pg->offset < umap->writeoff ||
            pg->offset + PAGE_SIZE > umap->writeoff + umap->writelen);

        rdonly = uvm_pagereadonly_p(pg);
        mask = rdonly ? ~VM_PROT_WRITE : VM_PROT_ALL;

        error = pmap_enter(ufi->orig_map->pmap, va, VM_PAGE_TO_PHYS(pg),
            prot & mask, PMAP_CANFAIL | (access_type & mask));

        uvm_pagelock(pg);
        uvm_pageactivate(pg);
        uvm_pagewakeup(pg);
        uvm_pageunlock(pg);
        pg->flags &= ~PG_BUSY;
        UVM_PAGE_OWN(pg, NULL);

        return error;
}

/*
 * ubc_fault: fault routine for ubc mapping
 */

static int
ubc_fault(struct uvm_faultinfo *ufi, vaddr_t ign1, struct vm_page **ign2,
    int ign3, int ign4, vm_prot_t access_type, int flags)
{
        struct uvm_object *uobj;
        struct ubc_map *umap;
        vaddr_t va, eva, ubc_offset, slot_offset;
        struct vm_page *pgs[howmany(ubc_winsize, MIN_PAGE_SIZE)];
        int i, error, npages;
        vm_prot_t prot;

        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);

        /*
         * no need to try with PGO_LOCKED...
         * we don't need to have the map locked since we know that
         * no one will mess with it until our reference is released.
         */

        if (flags & PGO_LOCKED) {
                uvmfault_unlockall(ufi, NULL, &ubc_object.uobj);
                flags &= ~PGO_LOCKED;
        }

        va = ufi->orig_rvaddr;
        ubc_offset = va - (vaddr_t)ubc_object.kva;
        umap = &ubc_object.umap[ubc_offset >> ubc_winshift];
        KASSERT(umap->refcount != 0);
        KASSERT((umap->flags & UMAP_PAGES_LOCKED) == 0);
        slot_offset = ubc_offset & (ubc_winsize - 1);

        /*
         * some platforms cannot write to individual bytes atomically, so
         * software has to do read/modify/write of larger quantities instead.
         * this means that the access_type for "write" operations
         * can be VM_PROT_READ, which confuses us mightily.
         *
         * deal with this by resetting access_type based on the info
         * that ubc_alloc() stores for us.
         */

        access_type = umap->writelen ? VM_PROT_WRITE : VM_PROT_READ;
        UVMHIST_LOG(ubchist, "va %#jx ubc_offset %#jx access_type %jd",
            va, ubc_offset, access_type, 0);

        if ((access_type & VM_PROT_WRITE) != 0) {
#ifndef PRIxOFF                /* XXX */
#define PRIxOFF "jx"        /* XXX */
#endif                        /* XXX */
                KASSERTMSG((trunc_page(umap->writeoff) <= slot_offset),
                    "out of range write: slot=%#"PRIxVSIZE" off=%#"PRIxOFF,
                    slot_offset, (intmax_t)umap->writeoff);
                KASSERTMSG((slot_offset < umap->writeoff + umap->writelen),
                    "out of range write: slot=%#"PRIxVADDR
                        " off=%#"PRIxOFF" len=%#"PRIxVSIZE,
                    slot_offset, (intmax_t)umap->writeoff, umap->writelen);
        }

        /* no umap locking needed since we have a ref on the umap */
        uobj = umap->uobj;

        if ((access_type & VM_PROT_WRITE) == 0) {
                npages = (ubc_winsize - slot_offset) >> PAGE_SHIFT;
        } else {
                npages = (round_page(umap->offset + umap->writeoff +
                    umap->writelen) - (umap->offset + slot_offset))
                    >> PAGE_SHIFT;
                flags |= PGO_PASTEOF;
        }

again:
        memset(pgs, 0, sizeof (pgs));
        rw_enter(uobj->vmobjlock, RW_WRITER);

        UVMHIST_LOG(ubchist, "slot_offset %#jx writeoff %#jx writelen %#jx ",
            slot_offset, umap->writeoff, umap->writelen, 0);
        UVMHIST_LOG(ubchist, "getpages uobj %#jx offset %#jx npages %jd",
            (uintptr_t)uobj, umap->offset + slot_offset, npages, 0);

        error = (*uobj->pgops->pgo_get)(uobj, umap->offset + slot_offset, pgs,
            &npages, 0, access_type, umap->advice, flags | PGO_NOBLOCKALLOC |
            PGO_NOTIMESTAMP);
        UVMHIST_LOG(ubchist, "getpages error %jd npages %jd", error, npages, 0,
            0);

        if (error == EAGAIN) {
                kpause("ubc_fault", false, hz >> 2, NULL);
                goto again;
        }
        if (error) {
                return error;
        }

        /*
         * For virtually-indexed, virtually-tagged caches we should avoid
         * creating writable mappings when we do not absolutely need them,
         * since the "compatible alias" trick does not work on such caches.
         * Otherwise, we can always map the pages writable.
         */

#ifdef PMAP_CACHE_VIVT
        prot = VM_PROT_READ | access_type;
#else
        prot = VM_PROT_READ | VM_PROT_WRITE;
#endif

        va = ufi->orig_rvaddr;
        eva = ufi->orig_rvaddr + (npages << PAGE_SHIFT);

        UVMHIST_LOG(ubchist, "va %#jx eva %#jx", va, eva, 0, 0);

        /*
         * Note: normally all returned pages would have the same UVM object.
         * However, layered file-systems and e.g. tmpfs, may return pages
         * which belong to underlying UVM object.  In such case, lock is
         * shared amongst the objects.
         */
        rw_enter(uobj->vmobjlock, RW_WRITER);
        for (i = 0; va < eva; i++, va += PAGE_SIZE) {
                struct vm_page *pg;

                UVMHIST_LOG(ubchist, "pgs[%jd] = %#jx", i, (uintptr_t)pgs[i],
                    0, 0);
                pg = pgs[i];

                if (pg == NULL || pg == PGO_DONTCARE) {
                        continue;
                }
                KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);
                error = ubc_fault_page(ufi, umap, pg, prot, access_type, va);
                if (error) {
                        /*
                         * Flush (there might be pages entered), drop the lock,
                         * and perform uvm_wait().  Note: page will re-fault.
                         */
                        pmap_update(ufi->orig_map->pmap);
                        rw_exit(uobj->vmobjlock);
                        uvm_wait("ubc_fault");
                        rw_enter(uobj->vmobjlock, RW_WRITER);
                }
        }
        /* Must make VA visible before the unlock. */
        pmap_update(ufi->orig_map->pmap);
        rw_exit(uobj->vmobjlock);

        return 0;
}

/*
 * local functions
 */

static struct ubc_map *
ubc_find_mapping(struct uvm_object *uobj, voff_t offset)
{
        struct ubc_map *umap;

        LIST_FOREACH(umap, &ubc_object.hash[UBC_HASH(uobj, offset)], hash) {
                if (umap->uobj == uobj && umap->offset == offset) {
                        return umap;
                }
        }
        return NULL;
}


/*
 * ubc interface functions
 */

/*
 * ubc_alloc:  allocate a file mapping window
 */

static void * __noinline
ubc_alloc(struct uvm_object *uobj, voff_t offset, vsize_t *lenp, int advice,
    int flags, struct vm_page **pgs, int *npagesp)
{
        vaddr_t slot_offset, va;
        struct ubc_map *umap;
        voff_t umap_offset;
        int error;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(ubchist, "uobj %#jx offset %#jx len %#jx",
            (uintptr_t)uobj, offset, *lenp, 0);

        KASSERT(*lenp > 0);
        umap_offset = (offset & ~((voff_t)ubc_winsize - 1));
        slot_offset = (vaddr_t)(offset & ((voff_t)ubc_winsize - 1));
        *lenp = MIN(*lenp, ubc_winsize - slot_offset);
        KASSERT(*lenp > 0);

        rw_enter(ubc_object.uobj.vmobjlock, RW_WRITER);
again:
        /*
         * The UVM object is already referenced.
         * Lock order: UBC object -> ubc_map::uobj.
         */
        umap = ubc_find_mapping(uobj, umap_offset);
        if (umap == NULL) {
                struct uvm_object *oobj;

                UBC_EVCNT_INCR(wincachemiss);
                umap = TAILQ_FIRST(UBC_QUEUE(offset));
                if (umap == NULL) {
                        rw_exit(ubc_object.uobj.vmobjlock);
                        kpause("ubc_alloc", false, hz >> 2, NULL);
                        rw_enter(ubc_object.uobj.vmobjlock, RW_WRITER);
                        goto again;
                }

                va = UBC_UMAP_ADDR(umap);
                oobj = umap->uobj;

                /*
                 * Remove from old hash (if any), add to new hash.
                 */

                if (oobj != NULL) {
                        /*
                         * Mapping must be removed before the list entry,
                         * since there is a race with ubc_purge().
                         */
                        if (umap->flags & UMAP_MAPPING_CACHED) {
                                umap->flags &= ~UMAP_MAPPING_CACHED;
                                rw_enter(oobj->vmobjlock, RW_WRITER);
                                pmap_remove(pmap_kernel(), va,
                                    va + ubc_winsize);
                                pmap_update(pmap_kernel());
                                rw_exit(oobj->vmobjlock);
                        }
                        LIST_REMOVE(umap, hash);
                        LIST_REMOVE(umap, list);
                } else {
                        KASSERT((umap->flags & UMAP_MAPPING_CACHED) == 0);
                }
                umap->uobj = uobj;
                umap->offset = umap_offset;
                LIST_INSERT_HEAD(&ubc_object.hash[UBC_HASH(uobj, umap_offset)],
                    umap, hash);
                LIST_INSERT_HEAD(&uobj->uo_ubc, umap, list);
        } else {
                UBC_EVCNT_INCR(wincachehit);
                va = UBC_UMAP_ADDR(umap);
        }

        if (umap->refcount == 0) {
                TAILQ_REMOVE(UBC_QUEUE(offset), umap, inactive);
        }

        if (flags & UBC_WRITE) {
                KASSERTMSG(umap->writeoff == 0 && umap->writelen == 0,
                    "ubc_alloc: concurrent writes to uobj %p", uobj);
                umap->writeoff = slot_offset;
                umap->writelen = *lenp;
        }

        umap->refcount++;
        umap->advice = advice;
        rw_exit(ubc_object.uobj.vmobjlock);
        UVMHIST_LOG(ubchist, "umap %#jx refs %jd va %#jx flags %#jx",
            (uintptr_t)umap, umap->refcount, (uintptr_t)va, flags);

        if (flags & UBC_FAULTBUSY) {
                int npages = (*lenp + (offset & (PAGE_SIZE - 1)) +
                    PAGE_SIZE - 1) >> PAGE_SHIFT;
                int gpflags =
                    PGO_SYNCIO|PGO_OVERWRITE|PGO_PASTEOF|PGO_NOBLOCKALLOC|
                    PGO_NOTIMESTAMP;
                int i;
                KDASSERT(flags & UBC_WRITE);
                KASSERT(npages <= *npagesp);
                KASSERT(umap->refcount == 1);

                UBC_EVCNT_INCR(faultbusy);
again_faultbusy:
                rw_enter(uobj->vmobjlock, RW_WRITER);
                if (umap->flags & UMAP_MAPPING_CACHED) {
                        umap->flags &= ~UMAP_MAPPING_CACHED;
                        pmap_remove(pmap_kernel(), va, va + ubc_winsize);
                }
                memset(pgs, 0, *npagesp * sizeof(pgs[0]));

                error = (*uobj->pgops->pgo_get)(uobj, trunc_page(offset), pgs,
                    &npages, 0, VM_PROT_READ | VM_PROT_WRITE, advice, gpflags);
                UVMHIST_LOG(ubchist, "faultbusy getpages %jd", error, 0, 0, 0);
                if (error) {
                        /*
                         * Flush: the mapping above might have been removed.
                         */
                        pmap_update(pmap_kernel());
                        goto out;
                }
                for (i = 0; i < npages; i++) {
                        struct vm_page *pg = pgs[i];

                        KASSERT(pg->uobject == uobj);
                        if (pg->loan_count != 0) {
                                rw_enter(uobj->vmobjlock, RW_WRITER);
                                if (pg->loan_count != 0) {
                                        pg = uvm_loanbreak(pg);
                                }
                                if (pg == NULL) {
                                        pmap_kremove(va, ubc_winsize);
                                        pmap_update(pmap_kernel());
                                        uvm_page_unbusy(pgs, npages);
                                        rw_exit(uobj->vmobjlock);
                                        uvm_wait("ubc_alloc");
                                        goto again_faultbusy;
                                }
                                rw_exit(uobj->vmobjlock);
                                pgs[i] = pg;
                        }
                        pmap_kenter_pa(
                            va + trunc_page(slot_offset) + (i << PAGE_SHIFT),
                            VM_PAGE_TO_PHYS(pg),
                            VM_PROT_READ | VM_PROT_WRITE, 0);
                }
                pmap_update(pmap_kernel());
                umap->flags |= UMAP_PAGES_LOCKED;
                *npagesp = npages;
        } else {
                KASSERT((umap->flags & UMAP_PAGES_LOCKED) == 0);
        }

out:
        return (void *)(va + slot_offset);
}

/*
 * ubc_release:  free a file mapping window.
 */

static void __noinline
ubc_release(void *va, int flags, struct vm_page **pgs, int npages)
{
        struct ubc_map *umap;
        struct uvm_object *uobj;
        vaddr_t umapva;
        bool unmapped;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(ubchist, "va %#jx", (uintptr_t)va, 0, 0, 0);

        umap = &ubc_object.umap[((char *)va - ubc_object.kva) >> ubc_winshift];
        umapva = UBC_UMAP_ADDR(umap);
        uobj = umap->uobj;
        KASSERT(uobj != NULL);

        if (umap->flags & UMAP_PAGES_LOCKED) {
                const voff_t endoff = umap->writeoff + umap->writelen;
                const voff_t zerolen = round_page(endoff) - endoff;

                KASSERT(npages == (round_page(endoff) -
                    trunc_page(umap->writeoff)) >> PAGE_SHIFT);
                KASSERT((umap->flags & UMAP_MAPPING_CACHED) == 0);
                if (zerolen) {
                        memset((char *)umapva + endoff, 0, zerolen);
                }
                umap->flags &= ~UMAP_PAGES_LOCKED;
                rw_enter(uobj->vmobjlock, RW_WRITER);
                for (u_int i = 0; i < npages; i++) {
                        struct vm_page *pg = pgs[i];
#ifdef DIAGNOSTIC
                        paddr_t pa;
                        bool rv;
                        rv = pmap_extract(pmap_kernel(), umapva +
                            umap->writeoff + (i << PAGE_SHIFT), &pa);
                        KASSERT(rv);
                        KASSERT(PHYS_TO_VM_PAGE(pa) == pg);
#endif
                        pg->flags &= ~PG_FAKE;
                        KASSERTMSG(uvm_pagegetdirty(pg) ==
                            UVM_PAGE_STATUS_DIRTY,
                            "page %p not dirty", pg);
                        KASSERT(pg->loan_count == 0);
                        if (uvmpdpol_pageactivate_p(pg)) {
                                uvm_pagelock(pg);
                                uvm_pageactivate(pg);
                                uvm_pageunlock(pg);
                        }
                }
                pmap_kremove(umapva, ubc_winsize);
                pmap_update(pmap_kernel());
                uvm_page_unbusy(pgs, npages);
                rw_exit(uobj->vmobjlock);
                unmapped = true;
        } else {
                unmapped = false;
        }

        rw_enter(ubc_object.uobj.vmobjlock, RW_WRITER);
        umap->writeoff = 0;
        umap->writelen = 0;
        umap->refcount--;
        if (umap->refcount == 0) {
                if (flags & UBC_UNMAP) {
                        /*
                         * Invalidate any cached mappings if requested.
                         * This is typically used to avoid leaving
                         * incompatible cache aliases around indefinitely.
                         */
                        rw_enter(uobj->vmobjlock, RW_WRITER);
                        pmap_remove(pmap_kernel(), umapva,
                                    umapva + ubc_winsize);
                        pmap_update(pmap_kernel());
                        rw_exit(uobj->vmobjlock);

                        umap->flags &= ~UMAP_MAPPING_CACHED;
                        LIST_REMOVE(umap, hash);
                        LIST_REMOVE(umap, list);
                        umap->uobj = NULL;
                        TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap,
                            inactive);
                } else {
                        if (!unmapped) {
                                umap->flags |= UMAP_MAPPING_CACHED;
                        }
                        TAILQ_INSERT_TAIL(UBC_QUEUE(umap->offset), umap,
                            inactive);
                }
        }
        UVMHIST_LOG(ubchist, "umap %#jx refs %jd", (uintptr_t)umap,
            umap->refcount, 0, 0);
        rw_exit(ubc_object.uobj.vmobjlock);
}

/*
 * ubc_uiomove: move data to/from an object.
 */

int
ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo, int advice,
    int flags)
{
        const bool overwrite = (flags & UBC_FAULTBUSY) != 0;
        struct vm_page *pgs[howmany(ubc_winsize, MIN_PAGE_SIZE)];
        voff_t off;
        int error, npages;

        KASSERT(todo <= uio->uio_resid);
        KASSERT(((flags & UBC_WRITE) != 0 && uio->uio_rw == UIO_WRITE) ||
            ((flags & UBC_READ) != 0 && uio->uio_rw == UIO_READ));

#ifdef UBC_USE_PMAP_DIRECT
        /*
         * during direct access pages need to be held busy to prevent them
         * changing identity, and therefore if we read or write an object
         * into a mapped view of same we could deadlock while faulting.
         *
         * avoid the problem by disallowing direct access if the object
         * might be visible somewhere via mmap().
         *
         * XXX concurrent reads cause thundering herd issues with PG_BUSY.
         * In the future enable by default for writes or if ncpu<=2, and
         * make the toggle override that.
         */
        if ((ubc_direct && (flags & UBC_ISMAPPED) == 0) ||
            (flags & UBC_FAULTBUSY) != 0) {
                return ubc_uiomove_direct(uobj, uio, todo, advice, flags);
        }
#endif

        off = uio->uio_offset;
        error = 0;
        while (todo > 0) {
                vsize_t bytelen = todo;
                void *win;

                npages = __arraycount(pgs);
                win = ubc_alloc(uobj, off, &bytelen, advice, flags, pgs,
                    &npages);
                if (error == 0) {
                        error = uiomove(win, bytelen, uio);
                }
                if (error != 0 && overwrite) {
                        /*
                         * if we haven't initialized the pages yet,
                         * do it now.  it's safe to use memset here
                         * because we just mapped the pages above.
                         */
                        memset(win, 0, bytelen);
                }
                ubc_release(win, flags, pgs, npages);
                off += bytelen;
                todo -= bytelen;
                if (error != 0 && (flags & UBC_PARTIALOK) != 0) {
                        break;
                }
        }

        return error;
}

/*
 * ubc_zerorange: set a range of bytes in an object to zero.
 */

void
ubc_zerorange(struct uvm_object *uobj, off_t off, size_t len, int flags)
{
        struct vm_page *pgs[howmany(ubc_winsize, MIN_PAGE_SIZE)];
        int npages;

#ifdef UBC_USE_PMAP_DIRECT
        if (ubc_direct || (flags & UBC_FAULTBUSY) != 0) {
                ubc_zerorange_direct(uobj, off, len, flags);
                return;
        }
#endif

        /*
         * XXXUBC invent kzero() and use it
         */

        while (len) {
                void *win;
                vsize_t bytelen = len;

                npages = __arraycount(pgs);
                win = ubc_alloc(uobj, off, &bytelen, UVM_ADV_NORMAL, UBC_WRITE,
                    pgs, &npages);
                memset(win, 0, bytelen);
                ubc_release(win, flags, pgs, npages);

                off += bytelen;
                len -= bytelen;
        }
}

#ifdef UBC_USE_PMAP_DIRECT
/* Copy data using direct map */

/*
 * ubc_alloc_direct:  allocate a file mapping window using direct map
 */
static int __noinline
ubc_alloc_direct(struct uvm_object *uobj, voff_t offset, vsize_t *lenp,
    int advice, int flags, struct vm_page **pgs, int *npages)
{
        voff_t pgoff;
        int error;
        int gpflags = flags | PGO_NOTIMESTAMP | PGO_SYNCIO;
        int access_type = VM_PROT_READ;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);

        if (flags & UBC_WRITE) {
                if (flags & UBC_FAULTBUSY)
                        gpflags |= PGO_OVERWRITE | PGO_NOBLOCKALLOC;
#if 0
                KASSERT(!UVM_OBJ_NEEDS_WRITEFAULT(uobj));
#endif

                /*
                 * Tell genfs_getpages() we already have the journal lock,
                 * allow allocation past current EOF.
                 */
                gpflags |= PGO_JOURNALLOCKED | PGO_PASTEOF;
                access_type |= VM_PROT_WRITE;
        } else {
                /* Don't need the empty blocks allocated, PG_RDONLY is okay */
                gpflags |= PGO_NOBLOCKALLOC;
        }

        pgoff = (offset & PAGE_MASK);
        *lenp = MIN(*lenp, ubc_winsize - pgoff);

again:
        *npages = (*lenp + pgoff + PAGE_SIZE - 1) >> PAGE_SHIFT;
        KASSERT((*npages * PAGE_SIZE) <= ubc_winsize);
        KASSERT(*lenp + pgoff <= ubc_winsize);
        memset(pgs, 0, *npages * sizeof(pgs[0]));

        rw_enter(uobj->vmobjlock, RW_WRITER);
        error = (*uobj->pgops->pgo_get)(uobj, trunc_page(offset), pgs,
            npages, 0, access_type, advice, gpflags);
        UVMHIST_LOG(ubchist, "alloc_direct getpages %jd", error, 0, 0, 0);
        if (error) {
                if (error == EAGAIN) {
                        kpause("ubc_alloc_directg", false, hz >> 2, NULL);
                        goto again;
                }
                return error;
        }

        rw_enter(uobj->vmobjlock, RW_WRITER);
        for (int i = 0; i < *npages; i++) {
                struct vm_page *pg = pgs[i];

                KASSERT(pg != NULL);
                KASSERT(pg != PGO_DONTCARE);
                KASSERT((pg->flags & PG_FAKE) == 0 || (gpflags & PGO_OVERWRITE));
                KASSERT(pg->uobject->vmobjlock == uobj->vmobjlock);

                /* Avoid breaking loan if possible, only do it on write */
                if ((flags & UBC_WRITE) && pg->loan_count != 0) {
                        pg = uvm_loanbreak(pg);
                        if (pg == NULL) {
                                uvm_page_unbusy(pgs, *npages);
                                rw_exit(uobj->vmobjlock);
                                uvm_wait("ubc_alloc_directl");
                                goto again;
                        }
                        pgs[i] = pg;
                }

                /* Page must be writable by now */
                KASSERT((pg->flags & PG_RDONLY) == 0 || (flags & UBC_WRITE) == 0);

                /*
                 * XXX For aobj pages.  No managed mapping - mark the page
                 * dirty.
                 */
                if ((flags & UBC_WRITE) != 0) {
                        uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
                }
        }
        rw_exit(uobj->vmobjlock);

        return 0;
}

static void __noinline
ubc_direct_release(struct uvm_object *uobj,
        int flags, struct vm_page **pgs, int npages)
{
        rw_enter(uobj->vmobjlock, RW_WRITER);
        for (int i = 0; i < npages; i++) {
                struct vm_page *pg = pgs[i];

                pg->flags &= ~PG_BUSY;
                UVM_PAGE_OWN(pg, NULL);
                if (pg->flags & PG_RELEASED) {
                        pg->flags &= ~PG_RELEASED;
                        uvm_pagefree(pg);
                        continue;
                }

                if (uvm_pagewanted_p(pg) || uvmpdpol_pageactivate_p(pg)) {
                        uvm_pagelock(pg);
                        uvm_pageactivate(pg);
                        uvm_pagewakeup(pg);
                        uvm_pageunlock(pg);
                }

                /* Page was changed, no longer fake and neither clean. */
                if (flags & UBC_WRITE) {
                        KASSERTMSG(uvm_pagegetdirty(pg) ==
                            UVM_PAGE_STATUS_DIRTY,
                            "page %p not dirty", pg);
                        pg->flags &= ~PG_FAKE;
                }
        }
        rw_exit(uobj->vmobjlock);
}

static int
ubc_uiomove_process(void *win, size_t len, void *arg)
{
        struct uio *uio = (struct uio *)arg;

        return uiomove(win, len, uio);
}

static int
ubc_zerorange_process(void *win, size_t len, void *arg)
{
        memset(win, 0, len);
        return 0;
}

static int __noinline
ubc_uiomove_direct(struct uvm_object *uobj, struct uio *uio, vsize_t todo, int advice,
    int flags)
{
        const bool overwrite = (flags & UBC_FAULTBUSY) != 0;
        voff_t off;
        int error, npages;
        struct vm_page *pgs[howmany(ubc_winsize, MIN_PAGE_SIZE)];

        KASSERT(todo <= uio->uio_resid);
        KASSERT(((flags & UBC_WRITE) != 0 && uio->uio_rw == UIO_WRITE) ||
            ((flags & UBC_READ) != 0 && uio->uio_rw == UIO_READ));

        off = uio->uio_offset;
        error = 0;
        while (todo > 0) {
                vsize_t bytelen = todo;

                error = ubc_alloc_direct(uobj, off, &bytelen, advice, flags,
                    pgs, &npages);
                if (error != 0) {
                        /* can't do anything, failed to get the pages */
                        break;
                }

                if (error == 0) {
                        error = uvm_direct_process(pgs, npages, off, bytelen,
                            ubc_uiomove_process, uio);
                }

                if (overwrite) {
                        voff_t endoff;

                        /*
                         * if we haven't initialized the pages yet due to an
                         * error above, do it now.
                         */
                        if (error != 0) {
                                (void) uvm_direct_process(pgs, npages, off,
                                    bytelen, ubc_zerorange_process, NULL);
                        }

                        off += bytelen;
                        todo -= bytelen;
                        endoff = off & (PAGE_SIZE - 1);

                        /*
                         * zero out the remaining portion of the final page
                         * (if any).
                         */
                        if (todo == 0 && endoff != 0) {
                                vsize_t zlen = PAGE_SIZE - endoff;
                                (void) uvm_direct_process(pgs + npages - 1, 1,
                                    off, zlen, ubc_zerorange_process, NULL);
                        }
                } else {
                        off += bytelen;
                        todo -= bytelen;
                }

                ubc_direct_release(uobj, flags, pgs, npages);

                if (error != 0 && ISSET(flags, UBC_PARTIALOK)) {
                        break;
                }
        }

        return error;
}

static void __noinline
ubc_zerorange_direct(struct uvm_object *uobj, off_t off, size_t todo, int flags)
{
        int error, npages;
        struct vm_page *pgs[howmany(ubc_winsize, MIN_PAGE_SIZE)];

        flags |= UBC_WRITE;

        error = 0;
        while (todo > 0) {
                vsize_t bytelen = todo;

                error = ubc_alloc_direct(uobj, off, &bytelen, UVM_ADV_NORMAL,
                    flags, pgs, &npages);
                if (error != 0) {
                        /* can't do anything, failed to get the pages */
                        break;
                }

                error = uvm_direct_process(pgs, npages, off, bytelen,
                    ubc_zerorange_process, NULL);

                ubc_direct_release(uobj, flags, pgs, npages);

                off += bytelen;
                todo -= bytelen;
        }
}

#endif /* UBC_USE_PMAP_DIRECT */

/*
 * ubc_purge: disassociate ubc_map structures from an empty uvm_object.
 */

void
ubc_purge(struct uvm_object *uobj)
{
        struct ubc_map *umap;
        vaddr_t va;

        KASSERT(uobj->uo_npages == 0);

        /*
         * Safe to check without lock held, as ubc_alloc() removes
         * the mapping and list entry in the correct order.
         */
        if (__predict_true(LIST_EMPTY(&uobj->uo_ubc))) {
                return;
        }
        rw_enter(ubc_object.uobj.vmobjlock, RW_WRITER);
        while ((umap = LIST_FIRST(&uobj->uo_ubc)) != NULL) {
                KASSERT(umap->refcount == 0);
                for (va = 0; va < ubc_winsize; va += PAGE_SIZE) {
                        KASSERT(!pmap_extract(pmap_kernel(),
                            va + UBC_UMAP_ADDR(umap), NULL));
                }
                LIST_REMOVE(umap, list);
                LIST_REMOVE(umap, hash);
                umap->flags &= ~UMAP_MAPPING_CACHED;
                umap->uobj = NULL;
        }
        rw_exit(ubc_object.uobj.vmobjlock);
}

static int
ubchash_stats(struct hashstat_sysctl *hs, bool fill)
{
        struct ubc_map *umap;
        uint64_t chain;

        strlcpy(hs->hash_name, "ubchash", sizeof(hs->hash_name));
        strlcpy(hs->hash_desc, "ubc object hash", sizeof(hs->hash_desc));
        if (!fill)
                return 0;

        hs->hash_size = ubc_object.hashmask + 1;

        for (size_t i = 0; i < hs->hash_size; i++) {
                chain = 0;
                rw_enter(ubc_object.uobj.vmobjlock, RW_READER);
                LIST_FOREACH(umap, &ubc_object.hash[i], hash) {
                        chain++;
                }
                rw_exit(ubc_object.uobj.vmobjlock);
                if (chain > 0) {
                        hs->hash_used++;
                        hs->hash_items += chain;
                        if (chain > hs->hash_maxchain)
                                hs->hash_maxchain = chain;
                }
                preempt_point();
        }

        return 0;
}







































































































































































































































































































































  609 


  180 


  138 
  611 












 1371 


 1371 










  138 



  136 
  138 

  137 






  138 


   58 
  110 


   61 




  138 



  138 





    8 


  132 











 1371 















 1371 




 1335 
 1334 
 1126 
 1128 





 1325 








 1128 
 1369 




































































  788 


  790 
  783 

  789 





  789 







  756 






  556 


  723 

  723 



   16 





   52 
   52 




  101 




   27 



   27 






   27 








   27 



   74 












   74 

   73 
































 1338 


 1339 



 1330 



























 1329 














 1330 
 1330 





 1330 
 1330 
 1329 


 1330 

    2 
    2 


 1339 






 1328 

  631 
  453 


  640 



 1121 

   32 




 1122 







 1120 
  388 

 1120 
 1122 

 1122 


























   53 

   53 


   53 










   41 



   41 
   41 
   41 









   53 
   53 
   53 
   53 












   53 










   53 




   53 
   46 


    1 





   46 



   53 











   53 



   53 

   47 


   47 














   89 


  678 
  653 
  678 




  662 






  662 






  662 
  653 

   14 







  662 


























  662 


  662 











  649 
  649 
   25 
  379 

  535 

  649 

  662 












  425 

  129 


  128 







  128 















   41 
   41 
   41 


   41 











   89 
   89 
   89 
   89 

   89 
   89 











   90 


   90 









  169 























































  528 
















   32 
   32 
   32 














   79 



   80 
   61 





   61 


   61 





























   80 












   87 

   87 
   87 
   10 

   87 
































   80 


   80 
   80 

   80 











   20 









   21 




   21 



   19 


   19 





























  662 


  662 

























































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
/*        $NetBSD: vfs_cache.c,v 1.152 2021/11/01 21:28:03 andvar Exp $        */

/*-
 * Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vfs_cache.c        8.3 (Berkeley) 8/22/94
 */

/*
 * Name caching:
 *
 *        Names found by directory scans are retained in a cache for future
 *        reference.  It is managed LRU, so frequently used names will hang
 *        around.  The cache is indexed by hash value obtained from the name.
 *
 *        The name cache is the brainchild of Robert Elz and was introduced in
 *        4.3BSD.  See "Using gprof to Tune the 4.2BSD Kernel", Marshall Kirk
 *        McKusick, May 21 1984.
 *
 * Data structures:
 *
 *        Most Unix namecaches very sensibly use a global hash table to index
 *        names.  The global hash table works well, but can cause concurrency
 *        headaches for the kernel hacker.  In the NetBSD 10.0 implementation
 *        we are not sensible, and use a per-directory data structure to index
 *        names, but the cache otherwise functions the same.
 *
 *        The index is a red-black tree.  There are no special concurrency
 *        requirements placed on it, because it's per-directory and protected
 *        by the namecache's per-directory locks.  It should therefore not be
 *        difficult to experiment with other types of index.
 *
 *        Each cached name is stored in a struct namecache, along with a
 *        pointer to the associated vnode (nc_vp).  Names longer than a
 *        maximum length of NCHNAMLEN are allocated with kmem_alloc(); they
 *        occur infrequently, and names shorter than this are stored directly
 *        in struct namecache.  If it is a "negative" entry, (i.e. for a name
 *        that is known NOT to exist) the vnode pointer will be NULL.
 *
 *        For a directory with 3 cached names for 3 distinct vnodes, the
 *        various vnodes and namecache structs would be connected like this
 *        (the root is at the bottom of the diagram):
 *
 *          ...
 *           ^
 *           |- vi_nc_tree
 *           |
 *      +----o----+               +---------+               +---------+
 *      |  VDIR   |               |  VCHR   |               |  VREG   |
 *      |  vnode  o-----+         |  vnode  o-----+         |  vnode  o------+
 *      +---------+     |         +---------+     |         +---------+      |
 *           ^          |              ^          |              ^           |
 *           |- nc_vp   |- vi_nc_list  |- nc_vp   |- vi_nc_list  |- nc_vp    |
 *           |          |              |          |              |           |
 *      +----o----+     |         +----o----+     |         +----o----+      |
 *  +---onamecache|<----+     +---onamecache|<----+     +---onamecache|<-----+
 *  |   +---------+           |   +---------+           |   +---------+
 *  |        ^                |        ^                |        ^
 *  |        |                |        |                |        |
 *  |        |  +----------------------+                |        |
 *  |-nc_dvp | +-------------------------------------------------+
 *  |        |/- vi_nc_tree   |                         |
 *  |        |                |- nc_dvp                 |- nc_dvp
 *  |   +----o----+           |                         |
 *  +-->|  VDIR   |<----------+                         |
 *      |  vnode  |<------------------------------------+
 *      +---------+
 *
 *      START HERE
 *
 * Replacement:
 *
 *        As the cache becomes full, old and unused entries are purged as new
 *        entries are added.  The synchronization overhead in maintaining a
 *        strict ordering would be prohibitive, so the VM system's "clock" or
 *        "second chance" page replacement algorithm is aped here.  New
 *        entries go to the tail of the active list.  After they age out and
 *        reach the head of the list, they are moved to the tail of the
 *        inactive list.  Any use of the deactivated cache entry reactivates
 *        it, saving it from impending doom; if not reactivated, the entry
 *        eventually reaches the head of the inactive list and is purged.
 *
 * Concurrency:
 *
 *        From a performance perspective, cache_lookup(nameiop == LOOKUP) is
 *        what really matters; insertion of new entries with cache_enter() is
 *        comparatively infrequent, and overshadowed by the cost of expensive
 *        file system metadata operations (which may involve disk I/O).  We
 *        therefore want to make everything simplest in the lookup path.
 *
 *        struct namecache is mostly stable except for list and tree related
 *        entries, changes to which don't affect the cached name or vnode. 
 *        For changes to name+vnode, entries are purged in preference to
 *        modifying them.
 *
 *        Read access to namecache entries is made via tree, list, or LRU
 *        list.  A lock corresponding to the direction of access should be
 *        held.  See definition of "struct namecache" in src/sys/namei.src,
 *        and the definition of "struct vnode" for the particulars.
 *
 *        Per-CPU statistics, and LRU list totals are read unlocked, since
 *        an approximate value is OK.  We maintain 32-bit sized per-CPU
 *        counters and 64-bit global counters under the theory that 32-bit
 *        sized counters are less likely to be hosed by nonatomic increment
 *        (on 32-bit platforms).
 *
 *        The lock order is:
 *
 *        1) vi->vi_nc_lock        (tree or parent -> child direction,
 *                                 used during forward lookup)
 *
 *        2) vi->vi_nc_listlock        (list or child -> parent direction,
 *                                 used during reverse lookup)
 *
 *        3) cache_lru_lock        (LRU list direction, used during reclaim)
 *
 *        4) vp->v_interlock        (what the cache entry points to)
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.152 2021/11/01 21:28:03 andvar Exp $");

#define __NAMECACHE_PRIVATE
#ifdef _KERNEL_OPT
#include "opt_ddb.h"
#include "opt_dtrace.h"
#endif

#include <sys/param.h>
#include <sys/types.h>
#include <sys/atomic.h>
#include <sys/callout.h>
#include <sys/cpu.h>
#include <sys/errno.h>
#include <sys/evcnt.h>
#include <sys/hash.h>
#include <sys/kernel.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/namei.h>
#include <sys/param.h>
#include <sys/pool.h>
#include <sys/sdt.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/vnode_impl.h>

#include <miscfs/genfs/genfs.h>

static void        cache_activate(struct namecache *);
static void        cache_update_stats(void *);
static int        cache_compare_nodes(void *, const void *, const void *);
static void        cache_deactivate(void);
static void        cache_reclaim(void);
static int        cache_stat_sysctl(SYSCTLFN_ARGS);

/*
 * Global pool cache.
 */
static pool_cache_t cache_pool __read_mostly;

/*
 * LRU replacement.
 */
enum cache_lru_id {
        LRU_ACTIVE,
        LRU_INACTIVE,
        LRU_COUNT
};

static struct {
        TAILQ_HEAD(, namecache)        list[LRU_COUNT];
        u_int                        count[LRU_COUNT];
} cache_lru __cacheline_aligned;

static kmutex_t cache_lru_lock __cacheline_aligned;

/*
 * Cache effectiveness statistics.  nchstats holds system-wide total.
 */
struct nchstats        nchstats;
struct nchstats_percpu _NAMEI_CACHE_STATS(uint32_t);
struct nchcpu {
        struct nchstats_percpu cur;
        struct nchstats_percpu last;
};
static callout_t cache_stat_callout;
static kmutex_t cache_stat_lock __cacheline_aligned;

#define        COUNT(f) do { \
        lwp_t *l = curlwp; \
        KPREEMPT_DISABLE(l); \
        struct nchcpu *nchcpu = curcpu()->ci_data.cpu_nch; \
        nchcpu->cur.f++; \
        KPREEMPT_ENABLE(l); \
} while (/* CONSTCOND */ 0);

#define        UPDATE(nchcpu, f) do { \
        uint32_t cur = atomic_load_relaxed(&nchcpu->cur.f); \
        nchstats.f += (uint32_t)(cur - nchcpu->last.f); \
        nchcpu->last.f = cur; \
} while (/* CONSTCOND */ 0)

/*
 * Tunables.  cache_maxlen replaces the historical doingcache:
 * set it zero to disable caching for debugging purposes.
 */
int cache_lru_maxdeact __read_mostly = 2;        /* max # to deactivate */
int cache_lru_maxscan __read_mostly = 64;        /* max # to scan/reclaim */
int cache_maxlen __read_mostly = USHRT_MAX;        /* max name length to cache */
int cache_stat_interval __read_mostly = 300;        /* in seconds */

/*
 * sysctl stuff.
 */
static struct        sysctllog *cache_sysctllog;

/*
 * This is a dummy name that cannot usually occur anywhere in the cache nor
 * file system.  It's used when caching the root vnode of mounted file
 * systems.  The name is attached to the directory that the file system is
 * mounted on.
 */
static const char cache_mp_name[] = "";
static const int cache_mp_nlen = sizeof(cache_mp_name) - 1;

/*
 * Red-black tree stuff.
 */
static const rb_tree_ops_t cache_rbtree_ops = {
        .rbto_compare_nodes = cache_compare_nodes,
        .rbto_compare_key = cache_compare_nodes,
        .rbto_node_offset = offsetof(struct namecache, nc_tree),
        .rbto_context = NULL
};

/*
 * dtrace probes.
 */
SDT_PROVIDER_DEFINE(vfs);

SDT_PROBE_DEFINE1(vfs, namecache, invalidate, done, "struct vnode *");
SDT_PROBE_DEFINE1(vfs, namecache, purge, parents, "struct vnode *");
SDT_PROBE_DEFINE1(vfs, namecache, purge, children, "struct vnode *");
SDT_PROBE_DEFINE2(vfs, namecache, purge, name, "char *", "size_t");
SDT_PROBE_DEFINE1(vfs, namecache, purge, vfs, "struct mount *");
SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, "struct vnode *",
    "char *", "size_t");
SDT_PROBE_DEFINE3(vfs, namecache, lookup, miss, "struct vnode *",
    "char *", "size_t");
SDT_PROBE_DEFINE3(vfs, namecache, lookup, toolong, "struct vnode *",
    "char *", "size_t");
SDT_PROBE_DEFINE2(vfs, namecache, revlookup, success, "struct vnode *",
     "struct vnode *");
SDT_PROBE_DEFINE2(vfs, namecache, revlookup, fail, "struct vnode *",
     "int");
SDT_PROBE_DEFINE2(vfs, namecache, prune, done, "int", "int");
SDT_PROBE_DEFINE3(vfs, namecache, enter, toolong, "struct vnode *",
    "char *", "size_t");
SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *",
    "char *", "size_t");

/*
 * rbtree: compare two nodes.
 */
static int
cache_compare_nodes(void *context, const void *n1, const void *n2)
{
        const struct namecache *nc1 = n1;
        const struct namecache *nc2 = n2;

        if (nc1->nc_key < nc2->nc_key) {
                return -1;
        }
        if (nc1->nc_key > nc2->nc_key) {
                return 1;
        }
        KASSERT(nc1->nc_nlen == nc2->nc_nlen);
        return memcmp(nc1->nc_name, nc2->nc_name, nc1->nc_nlen);
}

/*
 * Compute a key value for the given name.  The name length is encoded in
 * the key value to try and improve uniqueness, and so that length doesn't
 * need to be compared separately for string comparisons.
 */
static inline uint64_t
cache_key(const char *name, size_t nlen)
{
        uint64_t key;

        KASSERT(nlen <= USHRT_MAX);

        key = hash32_buf(name, nlen, HASH32_STR_INIT);
        return (key << 32) | nlen;
}

/*
 * Remove an entry from the cache.  vi_nc_lock must be held, and if dir2node
 * is true, then we're locking in the conventional direction and the list
 * lock will be acquired when removing the entry from the vnode list.
 */
static void
cache_remove(struct namecache *ncp, const bool dir2node)
{
        struct vnode *vp, *dvp = ncp->nc_dvp;
        vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);

        KASSERT(rw_write_held(&dvi->vi_nc_lock));
        KASSERT(cache_key(ncp->nc_name, ncp->nc_nlen) == ncp->nc_key);
        KASSERT(rb_tree_find_node(&dvi->vi_nc_tree, ncp) == ncp);

        SDT_PROBE(vfs, namecache, invalidate, done, ncp,
            0, 0, 0, 0);

        /*
         * Remove from the vnode's list.  This excludes cache_revlookup(),
         * and then it's safe to remove from the LRU lists.
         */
        if ((vp = ncp->nc_vp) != NULL) {
                vnode_impl_t *vi = VNODE_TO_VIMPL(vp);
                if (__predict_true(dir2node)) {
                        rw_enter(&vi->vi_nc_listlock, RW_WRITER);
                        TAILQ_REMOVE(&vi->vi_nc_list, ncp, nc_list);
                        rw_exit(&vi->vi_nc_listlock);
                } else {
                        TAILQ_REMOVE(&vi->vi_nc_list, ncp, nc_list);
                }
        }

        /* Remove from the directory's rbtree. */
        rb_tree_remove_node(&dvi->vi_nc_tree, ncp);

        /* Remove from the LRU lists. */
        mutex_enter(&cache_lru_lock);
        TAILQ_REMOVE(&cache_lru.list[ncp->nc_lrulist], ncp, nc_lru);
        cache_lru.count[ncp->nc_lrulist]--;
        mutex_exit(&cache_lru_lock);

        /* Finally, free it. */
        if (ncp->nc_nlen > NCHNAMLEN) {
                size_t sz = offsetof(struct namecache, nc_name[ncp->nc_nlen]);
                kmem_free(ncp, sz);
        } else {
                pool_cache_put(cache_pool, ncp);
        }
}

/*
 * Find a single cache entry and return it.  vi_nc_lock must be held.
 */
static struct namecache * __noinline
cache_lookup_entry(struct vnode *dvp, const char *name, size_t namelen,
    uint64_t key)
{
        vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);
        struct rb_node *node = dvi->vi_nc_tree.rbt_root;
        struct namecache *ncp;
        int lrulist, diff;

        KASSERT(rw_lock_held(&dvi->vi_nc_lock));

        /*
         * Search the RB tree for the key.  This is an inlined lookup
         * tailored for exactly what's needed here (64-bit key and so on)
         * that is quite a bit faster than using rb_tree_find_node().
         *
         * For a matching key memcmp() needs to be called once to confirm
         * that the correct name has been found.  Very rarely there will be
         * a key value collision and the search will continue.
         */
        for (;;) {
                if (__predict_false(RB_SENTINEL_P(node))) {
                        return NULL;
                }
                ncp = (struct namecache *)node;
                KASSERT((void *)&ncp->nc_tree == (void *)ncp);
                KASSERT(ncp->nc_dvp == dvp);
                if (ncp->nc_key == key) {
                        KASSERT(ncp->nc_nlen == namelen);
                        diff = memcmp(ncp->nc_name, name, namelen);
                        if (__predict_true(diff == 0)) {
                                break;
                        }
                        node = node->rb_nodes[diff < 0];
                } else {
                        node = node->rb_nodes[ncp->nc_key < key];
                }
        }

        /*
         * If the entry is on the wrong LRU list, requeue it.  This is an
         * unlocked check, but it will rarely be wrong and even then there
         * will be no harm caused.
         */
        lrulist = atomic_load_relaxed(&ncp->nc_lrulist);
        if (__predict_false(lrulist != LRU_ACTIVE)) {
                cache_activate(ncp);
        }
        return ncp;
}

/*
 * Look for a the name in the cache. We don't do this
 * if the segment name is long, simply so the cache can avoid
 * holding long names (which would either waste space, or
 * add greatly to the complexity).
 *
 * Lookup is called with DVP pointing to the directory to search,
 * and CNP providing the name of the entry being sought: cn_nameptr
 * is the name, cn_namelen is its length, and cn_flags is the flags
 * word from the namei operation.
 *
 * DVP must be locked.
 *
 * There are three possible non-error return states:
 *    1. Nothing was found in the cache. Nothing is known about
 *       the requested name.
 *    2. A negative entry was found in the cache, meaning that the
 *       requested name definitely does not exist.
 *    3. A positive entry was found in the cache, meaning that the
 *       requested name does exist and that we are providing the
 *       vnode.
 * In these cases the results are:
 *    1. 0 returned; VN is set to NULL.
 *    2. 1 returned; VN is set to NULL.
 *    3. 1 returned; VN is set to the vnode found.
 *
 * The additional result argument ISWHT is set to zero, unless a
 * negative entry is found that was entered as a whiteout, in which
 * case ISWHT is set to one.
 *
 * The ISWHT_RET argument pointer may be null. In this case an
 * assertion is made that the whiteout flag is not set. File systems
 * that do not support whiteouts can/should do this.
 *
 * Filesystems that do support whiteouts should add ISWHITEOUT to
 * cnp->cn_flags if ISWHT comes back nonzero.
 *
 * When a vnode is returned, it is locked, as per the vnode lookup
 * locking protocol.
 *
 * There is no way for this function to fail, in the sense of
 * generating an error that requires aborting the namei operation.
 *
 * (Prior to October 2012, this function returned an integer status,
 * and a vnode, and mucked with the flags word in CNP for whiteouts.
 * The integer status was -1 for "nothing found", ENOENT for "a
 * negative entry found", 0 for "a positive entry found", and possibly
 * other errors, and the value of VN might or might not have been set
 * depending on what error occurred.)
 */
bool
cache_lookup(struct vnode *dvp, const char *name, size_t namelen,
             uint32_t nameiop, uint32_t cnflags,
             int *iswht_ret, struct vnode **vn_ret)
{
        vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);
        struct namecache *ncp;
        struct vnode *vp;
        uint64_t key;
        int error;
        bool hit;
        krw_t op;

        KASSERT(namelen != cache_mp_nlen || name == cache_mp_name);

        /* Establish default result values */
        if (iswht_ret != NULL) {
                *iswht_ret = 0;
        }
        *vn_ret = NULL;

        if (__predict_false(namelen > cache_maxlen)) {
                SDT_PROBE(vfs, namecache, lookup, toolong, dvp,
                    name, namelen, 0, 0);
                COUNT(ncs_long);
                return false;
        }

        /* Compute the key up front - don't need the lock. */
        key = cache_key(name, namelen);

        /* Could the entry be purged below? */
        if ((cnflags & ISLASTCN) != 0 &&
            ((cnflags & MAKEENTRY) == 0 || nameiop == CREATE)) {
                    op = RW_WRITER;
        } else {
                op = RW_READER;
        }

        /* Now look for the name. */
        rw_enter(&dvi->vi_nc_lock, op);
        ncp = cache_lookup_entry(dvp, name, namelen, key);
        if (__predict_false(ncp == NULL)) {
                rw_exit(&dvi->vi_nc_lock);
                COUNT(ncs_miss);
                SDT_PROBE(vfs, namecache, lookup, miss, dvp,
                    name, namelen, 0, 0);
                return false;
        }
        if (__predict_false((cnflags & MAKEENTRY) == 0)) {
                /*
                 * Last component and we are renaming or deleting,
                 * the cache entry is invalid, or otherwise don't
                 * want cache entry to exist.
                 */
                KASSERT((cnflags & ISLASTCN) != 0);
                cache_remove(ncp, true);
                rw_exit(&dvi->vi_nc_lock);
                COUNT(ncs_badhits);
                return false;
        }
        if (ncp->nc_vp == NULL) {
                if (iswht_ret != NULL) {
                        /*
                         * Restore the ISWHITEOUT flag saved earlier.
                         */
                        *iswht_ret = ncp->nc_whiteout;
                } else {
                        KASSERT(!ncp->nc_whiteout);
                }
                if (nameiop == CREATE && (cnflags & ISLASTCN) != 0) {
                        /*
                         * Last component and we are preparing to create
                         * the named object, so flush the negative cache
                         * entry.
                         */
                        COUNT(ncs_badhits);
                        cache_remove(ncp, true);
                        hit = false;
                } else {
                        COUNT(ncs_neghits);
                        SDT_PROBE(vfs, namecache, lookup, hit, dvp, name,
                            namelen, 0, 0);
                        /* found neg entry; vn is already null from above */
                        hit = true;
                }
                rw_exit(&dvi->vi_nc_lock);
                return hit;
        }
        vp = ncp->nc_vp;
        error = vcache_tryvget(vp);
        rw_exit(&dvi->vi_nc_lock);
        if (error) {
                KASSERT(error == EBUSY);
                /*
                 * This vnode is being cleaned out.
                 * XXX badhits?
                 */
                COUNT(ncs_falsehits);
                return false;
        }

        COUNT(ncs_goodhits);
        SDT_PROBE(vfs, namecache, lookup, hit, dvp, name, namelen, 0, 0);
        /* found it */
        *vn_ret = vp;
        return true;
}

/*
 * Version of the above without the nameiop argument, for NFS.
 */
bool
cache_lookup_raw(struct vnode *dvp, const char *name, size_t namelen,
                 uint32_t cnflags,
                 int *iswht_ret, struct vnode **vn_ret)
{

        return cache_lookup(dvp, name, namelen, LOOKUP, cnflags | MAKEENTRY,
            iswht_ret, vn_ret);
}

/*
 * Used by namei() to walk down a path, component by component by looking up
 * names in the cache.  The node locks are chained along the way: a parent's
 * lock is not dropped until the child's is acquired.
 */
bool
cache_lookup_linked(struct vnode *dvp, const char *name, size_t namelen,
                    struct vnode **vn_ret, krwlock_t **plock,
                    kauth_cred_t cred)
{
        vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);
        struct namecache *ncp;
        krwlock_t *oldlock, *newlock;
        uint64_t key;
        int error;

        KASSERT(namelen != cache_mp_nlen || name == cache_mp_name);

        /* If disabled, or file system doesn't support this, bail out. */
        if (__predict_false((dvp->v_mount->mnt_iflag & IMNT_NCLOOKUP) == 0)) {
                return false;
        }

        if (__predict_false(namelen > cache_maxlen)) {
                COUNT(ncs_long);
                return false;
        }

        /* Compute the key up front - don't need the lock. */
        key = cache_key(name, namelen);

        /*
         * Acquire the directory lock.  Once we have that, we can drop the
         * previous one (if any).
         *
         * The two lock holds mean that the directory can't go away while
         * here: the directory must be purged with cache_purge() before
         * being freed, and both parent & child's vi_nc_lock must be taken
         * before that point is passed.
         *
         * However if there's no previous lock, like at the root of the
         * chain, then "dvp" must be referenced to prevent dvp going away
         * before we get its lock.
         *
         * Note that the two locks can be the same if looking up a dot, for
         * example: /usr/bin/.  If looking up the parent (..) we can't wait
         * on the lock as child -> parent is the wrong direction.
         */
        if (*plock != &dvi->vi_nc_lock) {
                oldlock = *plock;
                newlock = &dvi->vi_nc_lock;
                if (!rw_tryenter(&dvi->vi_nc_lock, RW_READER)) {
                        return false;
                }
        } else {
                oldlock = NULL;
                newlock = NULL;
                if (*plock == NULL) {
                        KASSERT(vrefcnt(dvp) > 0);
                }
        }

        /*
         * First up check if the user is allowed to look up files in this
         * directory.
         */
        if (cred != FSCRED) {
                if (dvi->vi_nc_mode == VNOVAL) {
                        if (newlock != NULL) {
                                rw_exit(newlock);
                        }
                        return false;
                }
                KASSERT(dvi->vi_nc_uid != VNOVAL && dvi->vi_nc_gid != VNOVAL);
                error = kauth_authorize_vnode(cred,
                    KAUTH_ACCESS_ACTION(VEXEC,
                    dvp->v_type, dvi->vi_nc_mode & ALLPERMS), dvp, NULL,
                    genfs_can_access(dvp, cred, dvi->vi_nc_uid, dvi->vi_nc_gid,
                    dvi->vi_nc_mode & ALLPERMS, NULL, VEXEC));
                if (error != 0) {
                        if (newlock != NULL) {
                                rw_exit(newlock);
                        }
                        COUNT(ncs_denied);
                        return false;
                }
        }

        /*
         * Now look for a matching cache entry.
         */
        ncp = cache_lookup_entry(dvp, name, namelen, key);
        if (__predict_false(ncp == NULL)) {
                if (newlock != NULL) {
                        rw_exit(newlock);
                }
                COUNT(ncs_miss);
                SDT_PROBE(vfs, namecache, lookup, miss, dvp,
                    name, namelen, 0, 0);
                return false;
        }
        if (ncp->nc_vp == NULL) {
                /* found negative entry; vn is already null from above */
                KASSERT(namelen != cache_mp_nlen && name != cache_mp_name);
                COUNT(ncs_neghits);
        } else {
                COUNT(ncs_goodhits); /* XXX can be "badhits" */
        }
        SDT_PROBE(vfs, namecache, lookup, hit, dvp, name, namelen, 0, 0);

        /*
         * Return with the directory lock still held.  It will either be
         * returned to us with another call to cache_lookup_linked() when
         * looking up the next component, or the caller will release it
         * manually when finished.
         */
        if (oldlock) {
                rw_exit(oldlock);
        }
        if (newlock) {
                *plock = newlock;
        }
        *vn_ret = ncp->nc_vp;
        return true;
}

/*
 * Scan cache looking for name of directory entry pointing at vp.
 * Will not search for "." or "..".
 *
 * If the lookup succeeds the vnode is referenced and stored in dvpp.
 *
 * If bufp is non-NULL, also place the name in the buffer which starts
 * at bufp, immediately before *bpp, and move bpp backwards to point
 * at the start of it.  (Yes, this is a little baroque, but it's done
 * this way to cater to the whims of getcwd).
 *
 * Returns 0 on success, -1 on cache miss, positive errno on failure.
 */
int
cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp,
    bool checkaccess, accmode_t accmode)
{
        vnode_impl_t *vi = VNODE_TO_VIMPL(vp);
        struct namecache *ncp;
        struct vnode *dvp;
        int error, nlen, lrulist;
        char *bp;

        KASSERT(vp != NULL);

        if (cache_maxlen == 0)
                goto out;

        rw_enter(&vi->vi_nc_listlock, RW_READER);
        if (checkaccess) {
                /*
                 * Check if the user is allowed to see.  NOTE: this is
                 * checking for access on the "wrong" directory.  getcwd()
                 * wants to see that there is access on every component
                 * along the way, not that there is access to any individual
                 * component.  Don't use this to check you can look in vp.
                 *
                 * I don't like it, I didn't come up with it, don't blame me!
                 */
                if (vi->vi_nc_mode == VNOVAL) {
                        rw_exit(&vi->vi_nc_listlock);
                        return -1;
                }
                KASSERT(vi->vi_nc_uid != VNOVAL && vi->vi_nc_gid != VNOVAL);
                error = kauth_authorize_vnode(kauth_cred_get(),
                    KAUTH_ACCESS_ACTION(VEXEC, vp->v_type, vi->vi_nc_mode &
                    ALLPERMS), vp, NULL, genfs_can_access(vp, curlwp->l_cred,
                    vi->vi_nc_uid, vi->vi_nc_gid, vi->vi_nc_mode & ALLPERMS,
                    NULL, accmode));
                    if (error != 0) {
                            rw_exit(&vi->vi_nc_listlock);
                        COUNT(ncs_denied);
                        return EACCES;
                }
        }
        TAILQ_FOREACH(ncp, &vi->vi_nc_list, nc_list) {
                KASSERT(ncp->nc_vp == vp);
                KASSERT(ncp->nc_dvp != NULL);
                nlen = ncp->nc_nlen;

                /*
                 * Ignore mountpoint entries.
                 */
                if (ncp->nc_nlen == cache_mp_nlen) {
                        continue;
                }

                /*
                 * The queue is partially sorted.  Once we hit dots, nothing
                 * else remains but dots and dotdots, so bail out.
                 */
                if (ncp->nc_name[0] == '.') {
                        if (nlen == 1 ||
                            (nlen == 2 && ncp->nc_name[1] == '.')) {
                                    break;
                        }
                }

                /*
                 * Record a hit on the entry.  This is an unlocked read but
                 * even if wrong it doesn't matter too much.
                 */
                lrulist = atomic_load_relaxed(&ncp->nc_lrulist);
                if (lrulist != LRU_ACTIVE) {
                        cache_activate(ncp);
                }

                if (bufp) {
                        bp = *bpp;
                        bp -= nlen;
                        if (bp <= bufp) {
                                *dvpp = NULL;
                                rw_exit(&vi->vi_nc_listlock);
                                SDT_PROBE(vfs, namecache, revlookup,
                                    fail, vp, ERANGE, 0, 0, 0);
                                return (ERANGE);
                        }
                        memcpy(bp, ncp->nc_name, nlen);
                        *bpp = bp;
                }

                dvp = ncp->nc_dvp;
                error = vcache_tryvget(dvp);
                rw_exit(&vi->vi_nc_listlock);
                if (error) {
                        KASSERT(error == EBUSY);
                        if (bufp)
                                (*bpp) += nlen;
                        *dvpp = NULL;
                        SDT_PROBE(vfs, namecache, revlookup, fail, vp,
                            error, 0, 0, 0);
                        return -1;
                }
                *dvpp = dvp;
                SDT_PROBE(vfs, namecache, revlookup, success, vp, dvp,
                    0, 0, 0);
                COUNT(ncs_revhits);
                return (0);
        }
        rw_exit(&vi->vi_nc_listlock);
        COUNT(ncs_revmiss);
 out:
        *dvpp = NULL;
        return (-1);
}

/*
 * Add an entry to the cache.
 */
void
cache_enter(struct vnode *dvp, struct vnode *vp,
            const char *name, size_t namelen, uint32_t cnflags)
{
        vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);
        struct namecache *ncp, *oncp;
        int total;

        KASSERT(namelen != cache_mp_nlen || name == cache_mp_name);

        /* First, check whether we can/should add a cache entry. */
        if ((cnflags & MAKEENTRY) == 0 ||
            __predict_false(namelen > cache_maxlen)) {
                SDT_PROBE(vfs, namecache, enter, toolong, vp, name, namelen,
                    0, 0);
                return;
        }

        SDT_PROBE(vfs, namecache, enter, done, vp, name, namelen, 0, 0);

        /*
         * Reclaim some entries if over budget.  This is an unlocked check,
         * but it doesn't matter.  Just need to catch up with things
         * eventually: it doesn't matter if we go over temporarily.
         */
        total = atomic_load_relaxed(&cache_lru.count[LRU_ACTIVE]);
        total += atomic_load_relaxed(&cache_lru.count[LRU_INACTIVE]);
        if (__predict_false(total > desiredvnodes)) {
                cache_reclaim();
        }

        /* Now allocate a fresh entry. */
        if (__predict_true(namelen <= NCHNAMLEN)) {
                ncp = pool_cache_get(cache_pool, PR_WAITOK);
        } else {
                size_t sz = offsetof(struct namecache, nc_name[namelen]);
                ncp = kmem_alloc(sz, KM_SLEEP);
        }

        /*
         * Fill in cache info.  For negative hits, save the ISWHITEOUT flag
         * so we can restore it later when the cache entry is used again.
         */
        ncp->nc_vp = vp;
        ncp->nc_dvp = dvp;
        ncp->nc_key = cache_key(name, namelen);
        ncp->nc_nlen = namelen;
        ncp->nc_whiteout = ((cnflags & ISWHITEOUT) != 0);
        memcpy(ncp->nc_name, name, namelen);

        /*
         * Insert to the directory.  Concurrent lookups may race for a cache
         * entry.  If there's a entry there already, purge it.
         */
        rw_enter(&dvi->vi_nc_lock, RW_WRITER);
        oncp = rb_tree_insert_node(&dvi->vi_nc_tree, ncp);
        if (oncp != ncp) {
                KASSERT(oncp->nc_key == ncp->nc_key);
                KASSERT(oncp->nc_nlen == ncp->nc_nlen);
                KASSERT(memcmp(oncp->nc_name, name, namelen) == 0);
                cache_remove(oncp, true);
                oncp = rb_tree_insert_node(&dvi->vi_nc_tree, ncp);
                KASSERT(oncp == ncp);
        }

        /*
         * With the directory lock still held, insert to the tail of the
         * ACTIVE LRU list (new) and take the opportunity to incrementally
         * balance the lists.
         */
        mutex_enter(&cache_lru_lock);
        ncp->nc_lrulist = LRU_ACTIVE;
        cache_lru.count[LRU_ACTIVE]++;
        TAILQ_INSERT_TAIL(&cache_lru.list[LRU_ACTIVE], ncp, nc_lru);
        cache_deactivate();
        mutex_exit(&cache_lru_lock);

        /*
         * Finally, insert to the vnode and unlock.  With everything set up
         * it's safe to let cache_revlookup() see the entry.  Partially sort
         * the per-vnode list: dots go to back so cache_revlookup() doesn't
         * have to consider them.
         */
        if (vp != NULL) {
                vnode_impl_t *vi = VNODE_TO_VIMPL(vp);
                rw_enter(&vi->vi_nc_listlock, RW_WRITER);
                if ((namelen == 1 && name[0] == '.') ||
                    (namelen == 2 && name[0] == '.' && name[1] == '.')) {
                        TAILQ_INSERT_TAIL(&vi->vi_nc_list, ncp, nc_list);
                } else {
                        TAILQ_INSERT_HEAD(&vi->vi_nc_list, ncp, nc_list);
                }
                rw_exit(&vi->vi_nc_listlock);
        }
        rw_exit(&dvi->vi_nc_lock);
}

/*
 * Set identity info in cache for a vnode.  We only care about directories
 * so ignore other updates.  The cached info may be marked invalid if the
 * inode has an ACL.
 */
void
cache_enter_id(struct vnode *vp, mode_t mode, uid_t uid, gid_t gid, bool valid)
{
        vnode_impl_t *vi = VNODE_TO_VIMPL(vp);

        if (vp->v_type == VDIR) {
                /* Grab both locks, for forward & reverse lookup. */
                rw_enter(&vi->vi_nc_lock, RW_WRITER);
                rw_enter(&vi->vi_nc_listlock, RW_WRITER);
                if (valid) {
                        vi->vi_nc_mode = mode;
                        vi->vi_nc_uid = uid;
                        vi->vi_nc_gid = gid;
                } else {
                        vi->vi_nc_mode = VNOVAL;
                        vi->vi_nc_uid = VNOVAL;
                        vi->vi_nc_gid = VNOVAL;
                }
                rw_exit(&vi->vi_nc_listlock);
                rw_exit(&vi->vi_nc_lock);
        }
}

/*
 * Return true if we have identity for the given vnode, and use as an
 * opportunity to confirm that everything squares up.
 *
 * Because of shared code, some file systems could provide partial
 * information, missing some updates, so check the mount flag too.
 */
bool
cache_have_id(struct vnode *vp)
{

        if (vp->v_type == VDIR &&
            (vp->v_mount->mnt_iflag & IMNT_NCLOOKUP) != 0 &&
            atomic_load_relaxed(&VNODE_TO_VIMPL(vp)->vi_nc_mode) != VNOVAL) {
                return true;
        } else {
                return false;
        }
}

/*
 * Enter a mount point.  cvp is the covered vnode, and rvp is the root of
 * the mounted file system.
 */
void
cache_enter_mount(struct vnode *cvp, struct vnode *rvp)
{

        KASSERT(vrefcnt(cvp) > 0);
        KASSERT(vrefcnt(rvp) > 0);
        KASSERT(cvp->v_type == VDIR);
        KASSERT((rvp->v_vflag & VV_ROOT) != 0);

        if (rvp->v_type == VDIR) {
                cache_enter(cvp, rvp, cache_mp_name, cache_mp_nlen, MAKEENTRY);
        }
}

/*
 * Look up a cached mount point.  Used in the strongly locked path.
 */
bool
cache_lookup_mount(struct vnode *dvp, struct vnode **vn_ret)
{
        bool ret;

        ret = cache_lookup(dvp, cache_mp_name, cache_mp_nlen, LOOKUP,
            MAKEENTRY, NULL, vn_ret);
        KASSERT((*vn_ret != NULL) == ret);
        return ret;
}

/*
 * Try to cross a mount point.  For use with cache_lookup_linked().
 */
bool
cache_cross_mount(struct vnode **dvp, krwlock_t **plock)
{

        return cache_lookup_linked(*dvp, cache_mp_name, cache_mp_nlen,
           dvp, plock, FSCRED);
}

/*
 * Name cache initialization, from vfs_init() when the system is booting.
 */
void
nchinit(void)
{

        cache_pool = pool_cache_init(sizeof(struct namecache),
            coherency_unit, 0, 0, "namecache", NULL, IPL_NONE, NULL,
            NULL, NULL);
        KASSERT(cache_pool != NULL);

        mutex_init(&cache_lru_lock, MUTEX_DEFAULT, IPL_NONE);
        TAILQ_INIT(&cache_lru.list[LRU_ACTIVE]);
        TAILQ_INIT(&cache_lru.list[LRU_INACTIVE]);

        mutex_init(&cache_stat_lock, MUTEX_DEFAULT, IPL_NONE);
        callout_init(&cache_stat_callout, CALLOUT_MPSAFE);
        callout_setfunc(&cache_stat_callout, cache_update_stats, NULL);
        callout_schedule(&cache_stat_callout, cache_stat_interval * hz);

        KASSERT(cache_sysctllog == NULL);
        sysctl_createv(&cache_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "namecache_stats",
                       SYSCTL_DESCR("namecache statistics"),
                       cache_stat_sysctl, 0, NULL, 0,
                       CTL_VFS, CTL_CREATE, CTL_EOL);
}

/*
 * Called once for each CPU in the system as attached.
 */
void
cache_cpu_init(struct cpu_info *ci)
{
        void *p;
        size_t sz;

        sz = roundup2(sizeof(struct nchcpu), coherency_unit) + coherency_unit;
        p = kmem_zalloc(sz, KM_SLEEP);
        ci->ci_data.cpu_nch = (void *)roundup2((uintptr_t)p, coherency_unit);
}

/*
 * A vnode is being allocated: set up cache structures.
 */
void
cache_vnode_init(struct vnode *vp)
{
        vnode_impl_t *vi = VNODE_TO_VIMPL(vp);

        rw_init(&vi->vi_nc_lock);
        rw_init(&vi->vi_nc_listlock);
        rb_tree_init(&vi->vi_nc_tree, &cache_rbtree_ops);
        TAILQ_INIT(&vi->vi_nc_list);
        vi->vi_nc_mode = VNOVAL;
        vi->vi_nc_uid = VNOVAL;
        vi->vi_nc_gid = VNOVAL;
}

/*
 * A vnode is being freed: finish cache structures.
 */
void
cache_vnode_fini(struct vnode *vp)
{
        vnode_impl_t *vi = VNODE_TO_VIMPL(vp);

        KASSERT(RB_TREE_MIN(&vi->vi_nc_tree) == NULL);
        KASSERT(TAILQ_EMPTY(&vi->vi_nc_list));
        rw_destroy(&vi->vi_nc_lock);
        rw_destroy(&vi->vi_nc_listlock);
}

/*
 * Helper for cache_purge1(): purge cache entries for the given vnode from
 * all directories that the vnode is cached in.
 */
static void
cache_purge_parents(struct vnode *vp)
{
        vnode_impl_t *dvi, *vi = VNODE_TO_VIMPL(vp);
        struct vnode *dvp, *blocked;
        struct namecache *ncp;

        SDT_PROBE(vfs, namecache, purge, parents, vp, 0, 0, 0, 0);

        blocked = NULL;

        rw_enter(&vi->vi_nc_listlock, RW_WRITER);
        while ((ncp = TAILQ_FIRST(&vi->vi_nc_list)) != NULL) {
                /*
                 * Locking in the wrong direction.  Try for a hold on the
                 * directory node's lock, and if we get it then all good,
                 * nuke the entry and move on to the next.
                 */
                dvp = ncp->nc_dvp;
                dvi = VNODE_TO_VIMPL(dvp);
                if (rw_tryenter(&dvi->vi_nc_lock, RW_WRITER)) {
                        cache_remove(ncp, false);
                        rw_exit(&dvi->vi_nc_lock);
                        blocked = NULL;
                        continue;
                }

                /*
                 * We can't wait on the directory node's lock with our list
                 * lock held or the system could deadlock.
                 *
                 * Take a hold on the directory vnode to prevent it from
                 * being freed (taking the vnode & lock with it).  Then
                 * wait for the lock to become available with no other locks
                 * held, and retry.
                 *
                 * If this happens twice in a row, give the other side a
                 * breather; we can do nothing until it lets go.
                 */
                vhold(dvp);
                rw_exit(&vi->vi_nc_listlock);
                rw_enter(&dvi->vi_nc_lock, RW_WRITER);
                /* Do nothing. */
                rw_exit(&dvi->vi_nc_lock);
                holdrele(dvp);
                if (blocked == dvp) {
                        kpause("ncpurge", false, 1, NULL);
                }
                rw_enter(&vi->vi_nc_listlock, RW_WRITER);
                blocked = dvp;
        }
        rw_exit(&vi->vi_nc_listlock);
}

/*
 * Helper for cache_purge1(): purge all cache entries hanging off the given
 * directory vnode.
 */
static void
cache_purge_children(struct vnode *dvp)
{
        vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);
        struct namecache *ncp;

        SDT_PROBE(vfs, namecache, purge, children, dvp, 0, 0, 0, 0);

        rw_enter(&dvi->vi_nc_lock, RW_WRITER);
        while ((ncp = RB_TREE_MIN(&dvi->vi_nc_tree)) != NULL) {
                cache_remove(ncp, true);
        }
        rw_exit(&dvi->vi_nc_lock);
}

/*
 * Helper for cache_purge1(): purge cache entry from the given vnode,
 * finding it by name.
 */
static void
cache_purge_name(struct vnode *dvp, const char *name, size_t namelen)
{
        vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);
        struct namecache *ncp;
        uint64_t key;

        SDT_PROBE(vfs, namecache, purge, name, name, namelen, 0, 0, 0);

        key = cache_key(name, namelen);
        rw_enter(&dvi->vi_nc_lock, RW_WRITER);
        ncp = cache_lookup_entry(dvp, name, namelen, key);
        if (ncp) {
                cache_remove(ncp, true);
        }
        rw_exit(&dvi->vi_nc_lock);
}

/*
 * Cache flush, a particular vnode; called when a vnode is renamed to
 * hide entries that would now be invalid.
 */
void
cache_purge1(struct vnode *vp, const char *name, size_t namelen, int flags)
{

        if (flags & PURGE_PARENTS) {
                cache_purge_parents(vp);
        }
        if (flags & PURGE_CHILDREN) {
                cache_purge_children(vp);
        }
        if (name != NULL) {
                cache_purge_name(vp, name, namelen);
        }
}

/*
 * vnode filter for cache_purgevfs().
 */
static bool
cache_vdir_filter(void *cookie, vnode_t *vp)
{

        return vp->v_type == VDIR;
}

/*
 * Cache flush, a whole filesystem; called when filesys is umounted to
 * remove entries that would now be invalid.
 */
void
cache_purgevfs(struct mount *mp)
{
        struct vnode_iterator *iter;
        vnode_t *dvp;

        vfs_vnode_iterator_init(mp, &iter);
        for (;;) {
                dvp = vfs_vnode_iterator_next(iter, cache_vdir_filter, NULL);
                if (dvp == NULL) {
                        break;
                }
                cache_purge_children(dvp);
                vrele(dvp);
        }
        vfs_vnode_iterator_destroy(iter);
}

/*
 * Re-queue an entry onto the tail of the active LRU list, after it has
 * scored a hit.
 */
static void
cache_activate(struct namecache *ncp)
{

        mutex_enter(&cache_lru_lock);
        TAILQ_REMOVE(&cache_lru.list[ncp->nc_lrulist], ncp, nc_lru);
        TAILQ_INSERT_TAIL(&cache_lru.list[LRU_ACTIVE], ncp, nc_lru);
        cache_lru.count[ncp->nc_lrulist]--;
        cache_lru.count[LRU_ACTIVE]++;
        ncp->nc_lrulist = LRU_ACTIVE;
        mutex_exit(&cache_lru_lock);
}

/*
 * Try to balance the LRU lists.  Pick some victim entries, and re-queue
 * them from the head of the active list to the tail of the inactive list.
 */
static void
cache_deactivate(void)
{
        struct namecache *ncp;
        int total, i;

        KASSERT(mutex_owned(&cache_lru_lock));

        /* If we're nowhere near budget yet, don't bother. */
        total = cache_lru.count[LRU_ACTIVE] + cache_lru.count[LRU_INACTIVE];
        if (total < (desiredvnodes >> 1)) {
                    return;
        }

        /*
         * Aim for a 1:1 ratio of active to inactive.  This is to allow each
         * potential victim a reasonable amount of time to cycle through the
         * inactive list in order to score a hit and be reactivated, while
         * trying not to cause reactivations too frequently.
         */
        if (cache_lru.count[LRU_ACTIVE] < cache_lru.count[LRU_INACTIVE]) {
                return;
        }

        /* Move only a few at a time; will catch up eventually. */
        for (i = 0; i < cache_lru_maxdeact; i++) {
                ncp = TAILQ_FIRST(&cache_lru.list[LRU_ACTIVE]);
                if (ncp == NULL) {
                        break;
                }
                KASSERT(ncp->nc_lrulist == LRU_ACTIVE);
                ncp->nc_lrulist = LRU_INACTIVE;
                TAILQ_REMOVE(&cache_lru.list[LRU_ACTIVE], ncp, nc_lru);
                TAILQ_INSERT_TAIL(&cache_lru.list[LRU_INACTIVE], ncp, nc_lru);
                cache_lru.count[LRU_ACTIVE]--;
                cache_lru.count[LRU_INACTIVE]++;
        }
}

/*
 * Free some entries from the cache, when we have gone over budget.
 *
 * We don't want to cause too much work for any individual caller, and it
 * doesn't matter if we temporarily go over budget.  This is also "just a
 * cache" so it's not a big deal if we screw up and throw out something we
 * shouldn't.  So we take a relaxed attitude to this process to reduce its
 * impact.
 */
static void
cache_reclaim(void)
{
        struct namecache *ncp;
        vnode_impl_t *dvi;
        int toscan;

        /*
         * Scan up to a preset maximum number of entries, but no more than
         * 0.8% of the total at once (to allow for very small systems).
         *
         * On bigger systems, do a larger chunk of work to reduce the number
         * of times that cache_lru_lock is held for any length of time.
         */
        mutex_enter(&cache_lru_lock);
        toscan = MIN(cache_lru_maxscan, desiredvnodes >> 7);
        toscan = MAX(toscan, 1);
        SDT_PROBE(vfs, namecache, prune, done, cache_lru.count[LRU_ACTIVE] +
            cache_lru.count[LRU_INACTIVE], toscan, 0, 0, 0);
        while (toscan-- != 0) {
                /* First try to balance the lists. */
                cache_deactivate();

                /* Now look for a victim on head of inactive list (old). */
                ncp = TAILQ_FIRST(&cache_lru.list[LRU_INACTIVE]);
                if (ncp == NULL) {
                        break;
                }
                dvi = VNODE_TO_VIMPL(ncp->nc_dvp);
                KASSERT(ncp->nc_lrulist == LRU_INACTIVE);
                KASSERT(dvi != NULL);

                /*
                 * Locking in the wrong direction.  If we can't get the
                 * lock, the directory is actively busy, and it could also
                 * cause problems for the next guy in here, so send the
                 * entry to the back of the list.
                 */
                if (!rw_tryenter(&dvi->vi_nc_lock, RW_WRITER)) {
                        TAILQ_REMOVE(&cache_lru.list[LRU_INACTIVE],
                            ncp, nc_lru);
                        TAILQ_INSERT_TAIL(&cache_lru.list[LRU_INACTIVE],
                            ncp, nc_lru);
                        continue;
                }

                /*
                 * Now have the victim entry locked.  Drop the LRU list
                 * lock, purge the entry, and start over.  The hold on
                 * vi_nc_lock will prevent the vnode from vanishing until
                 * finished (cache_purge() will be called on dvp before it
                 * disappears, and that will wait on vi_nc_lock).
                 */
                mutex_exit(&cache_lru_lock);
                cache_remove(ncp, true);
                rw_exit(&dvi->vi_nc_lock);
                mutex_enter(&cache_lru_lock);
        }
        mutex_exit(&cache_lru_lock);
}

/*
 * For file system code: count a lookup that required a full re-scan of
 * directory metadata.
 */
void
namecache_count_pass2(void)
{

        COUNT(ncs_pass2);
}

/*
 * For file system code: count a lookup that scored a hit in the directory
 * metadata near the location of the last lookup.
 */
void
namecache_count_2passes(void)
{

        COUNT(ncs_2passes);
}

/*
 * Sum the stats from all CPUs into nchstats.  This needs to run at least
 * once within every window where a 32-bit counter could roll over.  It's
 * called regularly by timer to ensure this.
 */
static void
cache_update_stats(void *cookie)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;

        mutex_enter(&cache_stat_lock);
        for (CPU_INFO_FOREACH(cii, ci)) {
                struct nchcpu *nchcpu = ci->ci_data.cpu_nch;
                UPDATE(nchcpu, ncs_goodhits);
                UPDATE(nchcpu, ncs_neghits);
                UPDATE(nchcpu, ncs_badhits);
                UPDATE(nchcpu, ncs_falsehits);
                UPDATE(nchcpu, ncs_miss);
                UPDATE(nchcpu, ncs_long);
                UPDATE(nchcpu, ncs_pass2);
                UPDATE(nchcpu, ncs_2passes);
                UPDATE(nchcpu, ncs_revhits);
                UPDATE(nchcpu, ncs_revmiss);
                UPDATE(nchcpu, ncs_denied);
        }
        if (cookie != NULL) {
                memcpy(cookie, &nchstats, sizeof(nchstats));
        }
        /* Reset the timer; arrive back here in N minutes at latest. */
        callout_schedule(&cache_stat_callout, cache_stat_interval * hz);
        mutex_exit(&cache_stat_lock);
}

/*
 * Fetch the current values of the stats for sysctl.
 */
static int
cache_stat_sysctl(SYSCTLFN_ARGS)
{
        struct nchstats stats;

        if (oldp == NULL) {
                *oldlenp = sizeof(nchstats);
                return 0;
        }

        if (*oldlenp <= 0) {
                *oldlenp = 0;
                return 0;
        }

        /* Refresh the global stats. */
        sysctl_unlock();
        cache_update_stats(&stats);
        sysctl_relock();

        *oldlenp = MIN(sizeof(stats), *oldlenp);
        return sysctl_copyout(l, &stats, oldp, *oldlenp);
}

/*
 * For the debugger, given the address of a vnode, print all associated
 * names in the cache.
 */
#ifdef DDB
void
namecache_print(struct vnode *vp, void (*pr)(const char *, ...))
{
        struct vnode *dvp = NULL;
        struct namecache *ncp;
        enum cache_lru_id id;

        for (id = 0; id < LRU_COUNT; id++) {
                TAILQ_FOREACH(ncp, &cache_lru.list[id], nc_lru) {
                        if (ncp->nc_vp == vp) {
                                (*pr)("name %.*s\n", ncp->nc_nlen,
                                    ncp->nc_name);
                                dvp = ncp->nc_dvp;
                        }
                }
        }
        if (dvp == NULL) {
                (*pr)("name not found\n");
                return;
        }
        for (id = 0; id < LRU_COUNT; id++) {
                TAILQ_FOREACH(ncp, &cache_lru.list[id], nc_lru) {
                        if (ncp->nc_vp == dvp) {
                                (*pr)("parent %.*s\n", ncp->nc_nlen,
                                    ncp->nc_name);
                        }
                }
        }
}
#endif













































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
/*        $NetBSD: lfs_accessors.h,v 1.51 2022/04/24 20:32:44 rillig Exp $        */

/*  from NetBSD: lfs.h,v 1.165 2015/07/24 06:59:32 dholland Exp  */
/*  from NetBSD: dinode.h,v 1.25 2016/01/22 23:06:10 dholland Exp  */
/*  from NetBSD: dir.h,v 1.25 2015/09/01 06:16:03 dholland Exp  */

/*-
 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Konrad E. Schroder <perseant@hhhh.org>.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/*-
 * Copyright (c) 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)lfs.h        8.9 (Berkeley) 5/8/95
 */
/*
 * Copyright (c) 2002 Networks Associates Technology, Inc.
 * All rights reserved.
 *
 * This software was developed for the FreeBSD Project by Marshall
 * Kirk McKusick and Network Associates Laboratories, the Security
 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
 * research program
 *
 * Copyright (c) 1982, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)dinode.h        8.9 (Berkeley) 3/29/95
 */
/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)dir.h        8.5 (Berkeley) 4/27/95
 */

#ifndef _UFS_LFS_LFS_ACCESSORS_H_
#define _UFS_LFS_LFS_ACCESSORS_H_

#if defined(_KERNEL_OPT)
#include "opt_lfs.h"
#endif

#include <sys/bswap.h>

#include <ufs/lfs/lfs.h>

#if !defined(_KERNEL) && !defined(_STANDALONE)
#include <assert.h>
#include <string.h>
#define KASSERT assert
#else
#include <sys/systm.h>
#endif

/*
 * STRUCT_LFS is used by the libsa code to get accessors that work
 * with struct salfs instead of struct lfs, and by the cleaner to
 * get accessors that work with struct clfs.
 */

#ifndef STRUCT_LFS
#define STRUCT_LFS struct lfs
#endif

/*
 * byte order
 */

/*
 * For now at least, the bootblocks shall not be endian-independent.
 * We can see later if it fits in the size budget. Also disable the
 * byteswapping if LFS_EI is off.
 *
 * Caution: these functions "know" that bswap16/32/64 are unsigned,
 * and if that changes will likely break silently.
 */

#if defined(_STANDALONE) || (defined(_KERNEL) && !defined(LFS_EI))
#define LFS_SWAP_int16_t(fs, val) (val)
#define LFS_SWAP_int32_t(fs, val) (val)
#define LFS_SWAP_int64_t(fs, val) (val)
#define LFS_SWAP_uint16_t(fs, val) (val)
#define LFS_SWAP_uint32_t(fs, val) (val)
#define LFS_SWAP_uint64_t(fs, val) (val)
#else
#define LFS_SWAP_int16_t(fs, val) \
        ((fs)->lfs_dobyteswap ? (int16_t)bswap16(val) : (val))
#define LFS_SWAP_int32_t(fs, val) \
        ((fs)->lfs_dobyteswap ? (int32_t)bswap32(val) : (val))
#define LFS_SWAP_int64_t(fs, val) \
        ((fs)->lfs_dobyteswap ? (int64_t)bswap64(val) : (val))
#define LFS_SWAP_uint16_t(fs, val) \
        ((fs)->lfs_dobyteswap ? bswap16(val) : (val))
#define LFS_SWAP_uint32_t(fs, val) \
        ((fs)->lfs_dobyteswap ? bswap32(val) : (val))
#define LFS_SWAP_uint64_t(fs, val) \
        ((fs)->lfs_dobyteswap ? bswap64(val) : (val))
#endif

/*
 * For handling directories we will need to know if the volume is
 * little-endian.
 */
#if BYTE_ORDER == LITTLE_ENDIAN
#define LFS_LITTLE_ENDIAN_ONDISK(fs) (!(fs)->lfs_dobyteswap)
#else
#define LFS_LITTLE_ENDIAN_ONDISK(fs) ((fs)->lfs_dobyteswap)
#endif


/*
 * Suppress spurious warnings -- we use
 *
 *        type *foo = &obj->member;
 *
 * in macros to verify that obj->member has the right type.  When the
 * object is a packed structure with misaligned members, this causes
 * some compiles to squeal that taking the address might lead to
 * undefined behaviour later on -- which is helpful in general, not
 * relevant in this case, because we don't do anything with foo
 * afterward; we only declare it to get a type check and then we
 * discard it.
 */
#ifdef __GNUC__
#if defined(__clang__)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Waddress-of-packed-member"
#elif __GNUC_PREREQ__(9,0)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
#endif
#endif



/*
 * directories
 */

#define LFS_DIRHEADERSIZE(fs) \
        ((fs)->lfs_is64 ? sizeof(struct lfs_dirheader64) : sizeof(struct lfs_dirheader32))

/*
 * The LFS_DIRSIZ macro gives the minimum record length which will hold
 * the directory entry.  This requires the amount of space in struct lfs_direct
 * without the d_name field, plus enough space for the name with a terminating
 * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary.
 */
#define        LFS_DIRECTSIZ(fs, namlen) \
        (LFS_DIRHEADERSIZE(fs) + (((namlen)+1 + 3) &~ 3))

/*
 * The size of the largest possible directory entry. This is
 * used by ulfs_dirhash to figure the size of an array, so we
 * need a single constant value true for both lfs32 and lfs64.
 */
#define LFS_MAXDIRENTRYSIZE \
        (sizeof(struct lfs_dirheader64) + (((LFS_MAXNAMLEN+1)+1 + 3) & ~3))

#if (BYTE_ORDER == LITTLE_ENDIAN)
#define LFS_OLDDIRSIZ(oldfmt, dp, needswap)        \
    (((oldfmt) && !(needswap)) ?                \
    LFS_DIRECTSIZ((dp)->d_type) : LFS_DIRECTSIZ((dp)->d_namlen))
#else
#define LFS_OLDDIRSIZ(oldfmt, dp, needswap)        \
    (((oldfmt) && (needswap)) ?                        \
    LFS_DIRECTSIZ((dp)->d_type) : LFS_DIRECTSIZ((dp)->d_namlen))
#endif

#define LFS_DIRSIZ(fs, dp) LFS_DIRECTSIZ(fs, lfs_dir_getnamlen(fs, dp))

/* Constants for the first argument of LFS_OLDDIRSIZ */
#define LFS_OLDDIRFMT        1
#define LFS_NEWDIRFMT        0

#define LFS_NEXTDIR(fs, dp) \
        ((LFS_DIRHEADER *)((char *)(dp) + lfs_dir_getreclen(fs, dp)))

static __inline char *
lfs_dir_nameptr(const STRUCT_LFS *fs, LFS_DIRHEADER *dh)
{
        if (fs->lfs_is64) {
                return (char *)(&dh->u_64 + 1);
        } else {
                return (char *)(&dh->u_32 + 1);
        }
}

static __inline uint64_t
lfs_dir_getino(const STRUCT_LFS *fs, const LFS_DIRHEADER *dh)
{
        if (fs->lfs_is64) {
                return LFS_SWAP_uint64_t(fs, dh->u_64.dh_ino);
        } else {
                return LFS_SWAP_uint32_t(fs, dh->u_32.dh_ino);
        }
}

static __inline uint16_t
lfs_dir_getreclen(const STRUCT_LFS *fs, const LFS_DIRHEADER *dh)
{
        if (fs->lfs_is64) {
                return LFS_SWAP_uint16_t(fs, dh->u_64.dh_reclen);
        } else {
                return LFS_SWAP_uint16_t(fs, dh->u_32.dh_reclen);
        }
}

static __inline uint8_t
lfs_dir_gettype(const STRUCT_LFS *fs, const LFS_DIRHEADER *dh)
{
        if (fs->lfs_is64) {
                KASSERT(fs->lfs_hasolddirfmt == 0);
                return dh->u_64.dh_type;
        } else if (fs->lfs_hasolddirfmt) {
                return LFS_DT_UNKNOWN;
        } else {
                return dh->u_32.dh_type;
        }
}

static __inline uint8_t
lfs_dir_getnamlen(const STRUCT_LFS *fs, const LFS_DIRHEADER *dh)
{
        if (fs->lfs_is64) {
                KASSERT(fs->lfs_hasolddirfmt == 0);
                return dh->u_64.dh_namlen;
        } else if (fs->lfs_hasolddirfmt && LFS_LITTLE_ENDIAN_ONDISK(fs)) {
                /* low-order byte of old 16-bit namlen field */
                return dh->u_32.dh_type;
        } else {
                return dh->u_32.dh_namlen;
        }
}

static __inline void
lfs_dir_setino(STRUCT_LFS *fs, LFS_DIRHEADER *dh, uint64_t ino)
{
        if (fs->lfs_is64) {
                dh->u_64.dh_ino = LFS_SWAP_uint64_t(fs, ino);
        } else {
                dh->u_32.dh_ino = LFS_SWAP_uint32_t(fs, ino);
        }
}

static __inline void
lfs_dir_setreclen(STRUCT_LFS *fs, LFS_DIRHEADER *dh, uint16_t reclen)
{
        if (fs->lfs_is64) {
                dh->u_64.dh_reclen = LFS_SWAP_uint16_t(fs, reclen);
        } else {
                dh->u_32.dh_reclen = LFS_SWAP_uint16_t(fs, reclen);
        }
}

static __inline void
lfs_dir_settype(const STRUCT_LFS *fs, LFS_DIRHEADER *dh, uint8_t type)
{
        if (fs->lfs_is64) {
                KASSERT(fs->lfs_hasolddirfmt == 0);
                dh->u_64.dh_type = type;
        } else if (fs->lfs_hasolddirfmt) {
                /* do nothing */
                return;
        } else {
                dh->u_32.dh_type = type;
        }
}

static __inline void
lfs_dir_setnamlen(const STRUCT_LFS *fs, LFS_DIRHEADER *dh, uint8_t namlen)
{
        if (fs->lfs_is64) {
                KASSERT(fs->lfs_hasolddirfmt == 0);
                dh->u_64.dh_namlen = namlen;
        } else if (fs->lfs_hasolddirfmt && LFS_LITTLE_ENDIAN_ONDISK(fs)) {
                /* low-order byte of old 16-bit namlen field */
                dh->u_32.dh_type = namlen;
        } else {
                dh->u_32.dh_namlen = namlen;
        }
}

static __inline void
lfs_copydirname(STRUCT_LFS *fs, char *dest, const char *src,
                unsigned namlen, unsigned reclen)
{
        unsigned spacelen;

        KASSERT(reclen > LFS_DIRHEADERSIZE(fs));
        spacelen = reclen - LFS_DIRHEADERSIZE(fs);

        /* must always be at least 1 byte as a null terminator */
        KASSERT(spacelen > namlen);

        memcpy(dest, src, namlen);
        memset(dest + namlen, '\0', spacelen - namlen);
}

static __inline LFS_DIRHEADER *
lfs_dirtemplate_dotdot(STRUCT_LFS *fs, union lfs_dirtemplate *dt)
{
        /* XXX blah, be nice to have a way to do this w/o casts */
        if (fs->lfs_is64) {
                return (LFS_DIRHEADER *)&dt->u_64.dotdot_header;
        } else {
                return (LFS_DIRHEADER *)&dt->u_32.dotdot_header;
        }
}

static __inline char *
lfs_dirtemplate_dotdotname(STRUCT_LFS *fs, union lfs_dirtemplate *dt)
{
        if (fs->lfs_is64) {
                return dt->u_64.dotdot_name;
        } else {
                return dt->u_32.dotdot_name;
        }
}

/*
 * dinodes
 */

/*
 * Maximum length of a symlink that can be stored within the inode.
 */
#define LFS32_MAXSYMLINKLEN        ((ULFS_NDADDR + ULFS_NIADDR) * sizeof(int32_t))
#define LFS64_MAXSYMLINKLEN        ((ULFS_NDADDR + ULFS_NIADDR) * sizeof(int64_t))

#define LFS_MAXSYMLINKLEN(fs) \
        ((fs)->lfs_is64 ? LFS64_MAXSYMLINKLEN : LFS32_MAXSYMLINKLEN)

#define DINOSIZE(fs) ((fs)->lfs_is64 ? sizeof(struct lfs64_dinode) : sizeof(struct lfs32_dinode))

#define DINO_IN_BLOCK(fs, base, ix) \
        ((union lfs_dinode *)((char *)(base) + DINOSIZE(fs) * (ix)))

static __inline void
lfs_copy_dinode(STRUCT_LFS *fs,
    union lfs_dinode *dst, const union lfs_dinode *src)
{
        /*
         * We can do structure assignment of the structs, but not of
         * the whole union, as the union is the size of the (larger)
         * 64-bit struct and on a 32-bit fs the upper half of it might
         * be off the end of a buffer or otherwise invalid.
         */
        if (fs->lfs_is64) {
                dst->u_64 = src->u_64;
        } else {
                dst->u_32 = src->u_32;
        }
}

#define LFS_DEF_DINO_ACCESSOR(type, type32, field) \
        static __inline type                                \
        lfs_dino_get##field(STRUCT_LFS *fs, union lfs_dinode *dip) \
        {                                                        \
                if (fs->lfs_is64) {                                \
                        return LFS_SWAP_##type(fs, dip->u_64.di_##field); \
                } else {                                        \
                        return LFS_SWAP_##type32(fs, dip->u_32.di_##field); \
                }                                                \
        }                                                        \
        static __inline void                                \
        lfs_dino_set##field(STRUCT_LFS *fs, union lfs_dinode *dip, type val) \
        {                                                        \
                if (fs->lfs_is64) {                                \
                        type *p = &dip->u_64.di_##field;        \
                        (void)p;                                \
                        dip->u_64.di_##field = LFS_SWAP_##type(fs, val); \
                } else {                                        \
                        type32 *p = &dip->u_32.di_##field;        \
                        (void)p;                                \
                        dip->u_32.di_##field = LFS_SWAP_##type32(fs, val); \
                }                                                \
        }                                                        \

LFS_DEF_DINO_ACCESSOR(uint16_t, uint16_t, mode)
LFS_DEF_DINO_ACCESSOR(int16_t, int16_t, nlink)
LFS_DEF_DINO_ACCESSOR(uint64_t, uint32_t, inumber)
LFS_DEF_DINO_ACCESSOR(uint64_t, uint64_t, size)
LFS_DEF_DINO_ACCESSOR(int64_t, int32_t, atime)
LFS_DEF_DINO_ACCESSOR(int32_t, int32_t, atimensec)
LFS_DEF_DINO_ACCESSOR(int64_t, int32_t, mtime)
LFS_DEF_DINO_ACCESSOR(int32_t, int32_t, mtimensec)
LFS_DEF_DINO_ACCESSOR(int64_t, int32_t, ctime)
LFS_DEF_DINO_ACCESSOR(int32_t, int32_t, ctimensec)
LFS_DEF_DINO_ACCESSOR(uint32_t, uint32_t, flags)
LFS_DEF_DINO_ACCESSOR(uint64_t, uint32_t, blocks)
LFS_DEF_DINO_ACCESSOR(int32_t, int32_t, gen)
LFS_DEF_DINO_ACCESSOR(uint32_t, uint32_t, uid)
LFS_DEF_DINO_ACCESSOR(uint32_t, uint32_t, gid)

/* XXX this should be done differently (it's a fake field) */
LFS_DEF_DINO_ACCESSOR(int64_t, int32_t, rdev)

static __inline daddr_t
lfs_dino_getdb(STRUCT_LFS *fs, union lfs_dinode *dip, unsigned ix)
{
        KASSERT(ix < ULFS_NDADDR);
        if (fs->lfs_is64) {
                return LFS_SWAP_int64_t(fs, dip->u_64.di_db[ix]);
        } else {
                /* note: this must sign-extend or UNWRITTEN gets trashed */
                return (int32_t)LFS_SWAP_int32_t(fs, dip->u_32.di_db[ix]);
        }
}

static __inline daddr_t
lfs_dino_getib(STRUCT_LFS *fs, union lfs_dinode *dip, unsigned ix)
{
        KASSERT(ix < ULFS_NIADDR);
        if (fs->lfs_is64) {
                return LFS_SWAP_int64_t(fs, dip->u_64.di_ib[ix]);
        } else {
                /* note: this must sign-extend or UNWRITTEN gets trashed */
                return (int32_t)LFS_SWAP_int32_t(fs, dip->u_32.di_ib[ix]);
        }
}

static __inline void
lfs_dino_setdb(STRUCT_LFS *fs, union lfs_dinode *dip, unsigned ix, daddr_t val)
{
        KASSERT(ix < ULFS_NDADDR);
        if (fs->lfs_is64) {
                dip->u_64.di_db[ix] = LFS_SWAP_int64_t(fs, val);
        } else {
                dip->u_32.di_db[ix] = LFS_SWAP_uint32_t(fs, val);
        }
}

static __inline void
lfs_dino_setib(STRUCT_LFS *fs, union lfs_dinode *dip, unsigned ix, daddr_t val)
{
        KASSERT(ix < ULFS_NIADDR);
        if (fs->lfs_is64) {
                dip->u_64.di_ib[ix] = LFS_SWAP_int64_t(fs, val);
        } else {
                dip->u_32.di_ib[ix] = LFS_SWAP_uint32_t(fs, val);
        }
}

/* birthtime is present only in the 64-bit inode */
static __inline void
lfs_dino_setbirthtime(STRUCT_LFS *fs, union lfs_dinode *dip,
    const struct timespec *ts)
{
        if (fs->lfs_is64) {
                dip->u_64.di_birthtime = ts->tv_sec;
                dip->u_64.di_birthnsec = ts->tv_nsec;
        } else {
                /* drop it on the floor */
        }
}

/*
 * indirect blocks
 */

static __inline daddr_t
lfs_iblock_get(STRUCT_LFS *fs, void *block, unsigned ix)
{
        if (fs->lfs_is64) {
                // XXX re-enable these asserts after reorging this file
                //KASSERT(ix < lfs_sb_getbsize(fs) / sizeof(int64_t));
                return (daddr_t)(((int64_t *)block)[ix]);
        } else {
                //KASSERT(ix < lfs_sb_getbsize(fs) / sizeof(int32_t));
                /* must sign-extend or UNWRITTEN gets trashed */
                return (daddr_t)(int64_t)(((int32_t *)block)[ix]);
        }
}

static __inline void
lfs_iblock_set(STRUCT_LFS *fs, void *block, unsigned ix, daddr_t val)
{
        if (fs->lfs_is64) {
                //KASSERT(ix < lfs_sb_getbsize(fs) / sizeof(int64_t));
                ((int64_t *)block)[ix] = val;
        } else {
                //KASSERT(ix < lfs_sb_getbsize(fs) / sizeof(int32_t));
                ((int32_t *)block)[ix] = val;
        }
}

/*
 * "struct buf" associated definitions
 */

# define LFS_LOCK_BUF(bp) do {                                                \
        if (((bp)->b_flags & B_LOCKED) == 0 && bp->b_iodone == NULL) {        \
                mutex_enter(&lfs_lock);                                        \
                ++locked_queue_count;                                        \
                locked_queue_bytes += bp->b_bufsize;                        \
                mutex_exit(&lfs_lock);                                        \
        }                                                                \
        (bp)->b_flags |= B_LOCKED;                                        \
} while (0)

# define LFS_UNLOCK_BUF(bp) do {                                        \
        if (((bp)->b_flags & B_LOCKED) != 0 && bp->b_iodone == NULL) {        \
                mutex_enter(&lfs_lock);                                        \
                --locked_queue_count;                                        \
                locked_queue_bytes -= bp->b_bufsize;                        \
                if (locked_queue_count < LFS_WAIT_BUFS &&                \
                    locked_queue_bytes < LFS_WAIT_BYTES)                \
                        cv_broadcast(&locked_queue_cv);                        \
                mutex_exit(&lfs_lock);                                        \
        }                                                                \
        (bp)->b_flags &= ~B_LOCKED;                                        \
} while (0)

/*
 * "struct inode" associated definitions
 */

#define LFS_SET_UINO(ip, states) do {                                        \
        if (((states) & IN_ACCESSED) && !((ip)->i_state & IN_ACCESSED))        \
                lfs_sb_adduinodes((ip)->i_lfs, 1);                        \
        if (((states) & IN_CLEANING) && !((ip)->i_state & IN_CLEANING))        \
                lfs_sb_adduinodes((ip)->i_lfs, 1);                        \
        if (((states) & IN_MODIFIED) && !((ip)->i_state & IN_MODIFIED))        \
                lfs_sb_adduinodes((ip)->i_lfs, 1);                        \
        (ip)->i_state |= (states);                                        \
} while (0)

#define LFS_CLR_UINO(ip, states) do {                                        \
        if (((states) & IN_ACCESSED) && ((ip)->i_state & IN_ACCESSED))        \
                lfs_sb_subuinodes((ip)->i_lfs, 1);                        \
        if (((states) & IN_CLEANING) && ((ip)->i_state & IN_CLEANING))        \
                lfs_sb_subuinodes((ip)->i_lfs, 1);                        \
        if (((states) & IN_MODIFIED) && ((ip)->i_state & IN_MODIFIED))        \
                lfs_sb_subuinodes((ip)->i_lfs, 1);                        \
        (ip)->i_state &= ~(states);                                        \
        if (lfs_sb_getuinodes((ip)->i_lfs) < 0) {                        \
                panic("lfs_uinodes < 0");                                \
        }                                                                \
} while (0)

#define LFS_ITIMES(ip, acc, mod, cre) \
        while ((ip)->i_state & (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY)) \
                lfs_itimes(ip, acc, mod, cre)

/*
 * On-disk and in-memory checkpoint segment usage structure.
 */

#define        SEGUPB(fs)        (lfs_sb_getsepb(fs))
#define        SEGTABSIZE_SU(fs)                                                \
        ((lfs_sb_getnseg(fs) + SEGUPB(fs) - 1) / lfs_sb_getsepb(fs))

#ifdef _KERNEL
# define SHARE_IFLOCK(F)                                                 \
  do {                                                                        \
        rw_enter(&(F)->lfs_iflock, RW_READER);                                \
  } while(0)
# define UNSHARE_IFLOCK(F)                                                \
  do {                                                                        \
        rw_exit(&(F)->lfs_iflock);                                        \
  } while(0)
#else /* ! _KERNEL */
# define SHARE_IFLOCK(F)
# define UNSHARE_IFLOCK(F)
#endif /* ! _KERNEL */

/* Read in the block with a specific segment usage entry from the ifile. */
#define        LFS_SEGENTRY(SP, F, IN, BP) do {                                \
        int _e;                                                                \
        SHARE_IFLOCK(F);                                                \
        VTOI((F)->lfs_ivnode)->i_state |= IN_ACCESS;                        \
        if ((_e = bread((F)->lfs_ivnode,                                \
            ((IN) / lfs_sb_getsepb(F)) + lfs_sb_getcleansz(F),                \
            lfs_sb_getbsize(F), 0, &(BP))) != 0)                        \
                panic("lfs: ifile read: segentry %llu: error %d\n",        \
                         (unsigned long long)(IN), _e);                        \
        if (lfs_sb_getversion(F) == 1)                                        \
                (SP) = (SEGUSE *)((SEGUSE_V1 *)(BP)->b_data +                \
                        ((IN) & (lfs_sb_getsepb(F) - 1)));                \
        else                                                                \
                (SP) = (SEGUSE *)(BP)->b_data + ((IN) % lfs_sb_getsepb(F)); \
        UNSHARE_IFLOCK(F);                                                \
} while (0)

#define LFS_WRITESEGENTRY(SP, F, IN, BP) do {                                \
        if ((SP)->su_nbytes == 0)                                        \
                (SP)->su_flags |= SEGUSE_EMPTY;                                \
        else                                                                \
                (SP)->su_flags &= ~SEGUSE_EMPTY;                        \
        (F)->lfs_suflags[(F)->lfs_activesb][(IN)] = (SP)->su_flags;        \
        LFS_BWRITE_LOG(BP);                                                \
} while (0)

/*
 * FINFO (file info) entries.
 */

/* Size of an on-disk block pointer, e.g. in an indirect block. */
/* XXX: move to a more suitable location in this file */
#define LFS_BLKPTRSIZE(fs) ((fs)->lfs_is64 ? sizeof(int64_t) : sizeof(int32_t))

/* Size of an on-disk inode number. */
/* XXX: move to a more suitable location in this file */
#define LFS_INUMSIZE(fs) ((fs)->lfs_is64 ? sizeof(int64_t) : sizeof(int32_t))

/* size of a FINFO, without the block pointers */
#define        FINFOSIZE(fs)        ((fs)->lfs_is64 ? sizeof(FINFO64) : sizeof(FINFO32))

/* Full size of the provided FINFO record, including its block pointers. */
#define FINFO_FULLSIZE(fs, fip) \
        (FINFOSIZE(fs) + lfs_fi_getnblocks(fs, fip) * LFS_BLKPTRSIZE(fs))

#define NEXT_FINFO(fs, fip) \
        ((FINFO *)((char *)(fip) + FINFO_FULLSIZE(fs, fip)))

#define LFS_DEF_FI_ACCESSOR(type, type32, field) \
        static __inline type                                \
        lfs_fi_get##field(STRUCT_LFS *fs, FINFO *fip)                \
        {                                                        \
                if (fs->lfs_is64) {                                \
                        return fip->u_64.fi_##field;                 \
                } else {                                        \
                        return fip->u_32.fi_##field;                 \
                }                                                \
        }                                                        \
        static __inline void                                \
        lfs_fi_set##field(STRUCT_LFS *fs, FINFO *fip, type val) \
        {                                                        \
                if (fs->lfs_is64) {                                \
                        type *p = &fip->u_64.fi_##field;        \
                        (void)p;                                \
                        fip->u_64.fi_##field = val;                \
                } else {                                        \
                        type32 *p = &fip->u_32.fi_##field;        \
                        (void)p;                                \
                        fip->u_32.fi_##field = val;                \
                }                                                \
        }                                                        \

LFS_DEF_FI_ACCESSOR(uint32_t, uint32_t, nblocks)
LFS_DEF_FI_ACCESSOR(uint32_t, uint32_t, version)
LFS_DEF_FI_ACCESSOR(uint64_t, uint32_t, ino)
LFS_DEF_FI_ACCESSOR(uint32_t, uint32_t, lastlength)

static __inline daddr_t
lfs_fi_getblock(STRUCT_LFS *fs, FINFO *fip, unsigned idx)
{
        void *firstblock;

        firstblock = (char *)fip + FINFOSIZE(fs);
        KASSERT(idx < lfs_fi_getnblocks(fs, fip));
        if (fs->lfs_is64) {
                return ((int64_t *)firstblock)[idx];
        } else {
                return ((int32_t *)firstblock)[idx];
        }
}

static __inline void
lfs_fi_setblock(STRUCT_LFS *fs, FINFO *fip, unsigned idx, daddr_t blk)
{
        void *firstblock;

        firstblock = (char *)fip + FINFOSIZE(fs);
        KASSERT(idx < lfs_fi_getnblocks(fs, fip));
        if (fs->lfs_is64) {
                ((int64_t *)firstblock)[idx] = blk;
        } else {
                ((int32_t *)firstblock)[idx] = blk;
        }
}

/*
 * inode info entries (in the segment summary)
 */

#define IINFOSIZE(fs)        ((fs)->lfs_is64 ? sizeof(IINFO64) : sizeof(IINFO32))

/* iinfos scroll backward from the end of the segment summary block */
#define SEGSUM_IINFOSTART(fs, buf) \
        ((IINFO *)((char *)buf + lfs_sb_getsumsize(fs) - IINFOSIZE(fs)))

#define NEXTLOWER_IINFO(fs, iip) \
        ((IINFO *)((char *)(iip) - IINFOSIZE(fs)))

#define NTH_IINFO(fs, buf, n) \
        ((IINFO *)((char *)SEGSUM_IINFOSTART(fs, buf) - (n)*IINFOSIZE(fs)))

static __inline uint64_t
lfs_ii_getblock(STRUCT_LFS *fs, IINFO *iip)
{
        if (fs->lfs_is64) {
                return iip->u_64.ii_block;
        } else {
                return iip->u_32.ii_block;
        }
}

static __inline void
lfs_ii_setblock(STRUCT_LFS *fs, IINFO *iip, uint64_t block)
{
        if (fs->lfs_is64) {
                iip->u_64.ii_block = block;
        } else {
                iip->u_32.ii_block = block;
        }
}

/*
 * Index file inode entries.
 */

#define IFILE_ENTRYSIZE(fs) \
        ((fs)->lfs_is64 ? sizeof(IFILE64) : sizeof(IFILE32))

/*
 * LFSv1 compatibility code is not allowed to touch if_atime, since it
 * may not be mapped!
 */
/* Read in the block with a specific inode from the ifile. */
#define        LFS_IENTRY(IP, F, IN, BP) do {                                        \
        int _e;                                                                \
        SHARE_IFLOCK(F);                                                \
        VTOI((F)->lfs_ivnode)->i_state |= IN_ACCESS;                        \
        if ((_e = bread((F)->lfs_ivnode,                                \
        (IN) / lfs_sb_getifpb(F) + lfs_sb_getcleansz(F) + lfs_sb_getsegtabsz(F), \
        lfs_sb_getbsize(F), 0, &(BP))) != 0)                                \
                panic("lfs: ifile ino %d read %d", (int)(IN), _e);        \
        if ((F)->lfs_is64) {                                                \
                (IP) = (IFILE *)((IFILE64 *)(BP)->b_data +                \
                                 (IN) % lfs_sb_getifpb(F));                \
        } else if (lfs_sb_getversion(F) > 1) {                                \
                (IP) = (IFILE *)((IFILE32 *)(BP)->b_data +                \
                                (IN) % lfs_sb_getifpb(F));                 \
        } else {                                                        \
                (IP) = (IFILE *)((IFILE_V1 *)(BP)->b_data +                \
                                 (IN) % lfs_sb_getifpb(F));                \
        }                                                                \
        UNSHARE_IFLOCK(F);                                                \
} while (0)
#define LFS_IENTRY_NEXT(IP, F) do { \
        if ((F)->lfs_is64) {                                                \
                (IP) = (IFILE *)((IFILE64 *)(IP) + 1);                        \
        } else if (lfs_sb_getversion(F) > 1) {                                \
                (IP) = (IFILE *)((IFILE32 *)(IP) + 1);                        \
        } else {                                                        \
                (IP) = (IFILE *)((IFILE_V1 *)(IP) + 1);                        \
        }                                                                \
} while (0)

#define LFS_DEF_IF_ACCESSOR(type, type32, field) \
        static __inline type                                \
        lfs_if_get##field(STRUCT_LFS *fs, IFILE *ifp)                \
        {                                                        \
                if (fs->lfs_is64) {                                \
                        return ifp->u_64.if_##field;                 \
                } else {                                        \
                        return ifp->u_32.if_##field;                 \
                }                                                \
        }                                                        \
        static __inline void                                \
        lfs_if_set##field(STRUCT_LFS *fs, IFILE *ifp, type val) \
        {                                                        \
                if (fs->lfs_is64) {                                \
                        type *p = &ifp->u_64.if_##field;        \
                        (void)p;                                \
                        ifp->u_64.if_##field = val;                \
                } else {                                        \
                        type32 *p = &ifp->u_32.if_##field;        \
                        (void)p;                                \
                        ifp->u_32.if_##field = val;                \
                }                                                \
        }                                                        \

LFS_DEF_IF_ACCESSOR(uint32_t, uint32_t, version)
LFS_DEF_IF_ACCESSOR(int64_t, int32_t, daddr)
LFS_DEF_IF_ACCESSOR(uint64_t, uint32_t, nextfree)
LFS_DEF_IF_ACCESSOR(uint64_t, uint32_t, atime_sec)
LFS_DEF_IF_ACCESSOR(uint32_t, uint32_t, atime_nsec)

/*
 * Cleaner information structure.  This resides in the ifile and is used
 * to pass information from the kernel to the cleaner.
 */

#define        CLEANSIZE_SU(fs)                                                \
        ((((fs)->lfs_is64 ? sizeof(CLEANERINFO64) : sizeof(CLEANERINFO32)) + \
                lfs_sb_getbsize(fs) - 1) >> lfs_sb_getbshift(fs))

#define LFS_DEF_CI_ACCESSOR(type, type32, field) \
        static __inline type                                \
        lfs_ci_get##field(STRUCT_LFS *fs, CLEANERINFO *cip)        \
        {                                                        \
                if (fs->lfs_is64) {                                \
                        return cip->u_64.field;                 \
                } else {                                        \
                        return cip->u_32.field;                 \
                }                                                \
        }                                                        \
        static __inline void                                \
        lfs_ci_set##field(STRUCT_LFS *fs, CLEANERINFO *cip, type val) \
        {                                                        \
                if (fs->lfs_is64) {                                \
                        type *p = &cip->u_64.field;                \
                        (void)p;                                \
                        cip->u_64.field = val;                        \
                } else {                                        \
                        type32 *p = &cip->u_32.field;                \
                        (void)p;                                \
                        cip->u_32.field = val;                        \
                }                                                \
        }                                                        \

LFS_DEF_CI_ACCESSOR(uint32_t, uint32_t, clean)
LFS_DEF_CI_ACCESSOR(uint32_t, uint32_t, dirty)
LFS_DEF_CI_ACCESSOR(int64_t, int32_t, bfree)
LFS_DEF_CI_ACCESSOR(int64_t, int32_t, avail)
LFS_DEF_CI_ACCESSOR(uint64_t, uint32_t, free_head)
LFS_DEF_CI_ACCESSOR(uint64_t, uint32_t, free_tail)
LFS_DEF_CI_ACCESSOR(uint32_t, uint32_t, flags)

static __inline void
lfs_ci_shiftcleantodirty(STRUCT_LFS *fs, CLEANERINFO *cip, unsigned num)
{
        lfs_ci_setclean(fs, cip, lfs_ci_getclean(fs, cip) - num);
        lfs_ci_setdirty(fs, cip, lfs_ci_getdirty(fs, cip) + num);
}

static __inline void
lfs_ci_shiftdirtytoclean(STRUCT_LFS *fs, CLEANERINFO *cip, unsigned num)
{
        lfs_ci_setdirty(fs, cip, lfs_ci_getdirty(fs, cip) - num);
        lfs_ci_setclean(fs, cip, lfs_ci_getclean(fs, cip) + num);
}

/* Read in the block with the cleaner info from the ifile. */
#define LFS_CLEANERINFO(CP, F, BP) do {                                        \
        int _e;                                                                \
        SHARE_IFLOCK(F);                                                \
        VTOI((F)->lfs_ivnode)->i_state |= IN_ACCESS;                        \
        _e = bread((F)->lfs_ivnode,                                        \
            (daddr_t)0, lfs_sb_getbsize(F), 0, &(BP));                        \
        if (_e)                                                                \
                panic("lfs: ifile read: cleanerinfo: error %d\n", _e);        \
        (CP) = (CLEANERINFO *)(BP)->b_data;                                \
        UNSHARE_IFLOCK(F);                                                \
} while (0)

/*
 * Synchronize the Ifile cleaner info with current avail and bfree.
 */
#define LFS_SYNC_CLEANERINFO(cip, fs, bp, w) do {                         \
    mutex_enter(&lfs_lock);                                                \
    if ((w) || lfs_ci_getbfree(fs, cip) != lfs_sb_getbfree(fs) ||        \
        lfs_ci_getavail(fs, cip) != lfs_sb_getavail(fs) - fs->lfs_ravail - \
        fs->lfs_favail) {                                                 \
        lfs_ci_setbfree(fs, cip, lfs_sb_getbfree(fs));                         \
        lfs_ci_setavail(fs, cip, lfs_sb_getavail(fs) - fs->lfs_ravail -        \
                fs->lfs_favail);                                         \
        if (((bp)->b_flags & B_GATHERED) == 0) {                         \
                fs->lfs_flags |= LFS_IFDIRTY;                                \
        }                                                                \
        mutex_exit(&lfs_lock);                                                \
        (void) LFS_BWRITE_LOG(bp); /* Ifile */                                 \
    } else {                                                                 \
        mutex_exit(&lfs_lock);                                                \
        brelse(bp, 0);                                                         \
    }                                                                        \
} while (0)

/*
 * Get the head of the inode free list.
 * Always called with the segment lock held.
 */
#define LFS_GET_HEADFREE(FS, CIP, BP, FREEP) do {                        \
        if (lfs_sb_getversion(FS) > 1) {                                \
                LFS_CLEANERINFO((CIP), (FS), (BP));                        \
                lfs_sb_setfreehd(FS, lfs_ci_getfree_head(FS, CIP));        \
                brelse(BP, 0);                                                \
        }                                                                \
        *(FREEP) = lfs_sb_getfreehd(FS);                                \
} while (0)

#define LFS_PUT_HEADFREE(FS, CIP, BP, VAL) do {                                \
        lfs_sb_setfreehd(FS, VAL);                                        \
        if (lfs_sb_getversion(FS) > 1) {                                \
                LFS_CLEANERINFO((CIP), (FS), (BP));                        \
                lfs_ci_setfree_head(FS, CIP, VAL);                        \
                LFS_BWRITE_LOG(BP);                                        \
                mutex_enter(&lfs_lock);                                        \
                (FS)->lfs_flags |= LFS_IFDIRTY;                                \
                mutex_exit(&lfs_lock);                                        \
        }                                                                \
} while (0)

#define LFS_GET_TAILFREE(FS, CIP, BP, FREEP) do {                        \
        LFS_CLEANERINFO((CIP), (FS), (BP));                                \
        *(FREEP) = lfs_ci_getfree_tail(FS, CIP);                        \
        brelse(BP, 0);                                                        \
} while (0)

#define LFS_PUT_TAILFREE(FS, CIP, BP, VAL) do {                                \
        LFS_CLEANERINFO((CIP), (FS), (BP));                                \
        lfs_ci_setfree_tail(FS, CIP, VAL);                                \
        LFS_BWRITE_LOG(BP);                                                \
        mutex_enter(&lfs_lock);                                                \
        (FS)->lfs_flags |= LFS_IFDIRTY;                                        \
        mutex_exit(&lfs_lock);                                                \
} while (0)

/*
 * On-disk segment summary information
 */

#define SEGSUM_SIZE(fs) \
        (fs->lfs_is64 ? sizeof(SEGSUM64) : \
         lfs_sb_getversion(fs) > 1 ? sizeof(SEGSUM32) : sizeof(SEGSUM_V1))

/*
 * The SEGSUM structure is followed by FINFO structures. Get the pointer
 * to the first FINFO.
 *
 * XXX this can't be a macro yet; this file needs to be resorted.
 */
#if 0
static __inline FINFO *
segsum_finfobase(STRUCT_LFS *fs, SEGSUM *ssp)
{
        return (FINFO *)((char *)ssp + SEGSUM_SIZE(fs));
}
#else
#define SEGSUM_FINFOBASE(fs, ssp) \
        ((FINFO *)((char *)(ssp) + SEGSUM_SIZE(fs)));
#endif

#define LFS_DEF_SS_ACCESSOR(type, type32, field) \
        static __inline type                                \
        lfs_ss_get##field(STRUCT_LFS *fs, SEGSUM *ssp)                \
        {                                                        \
                if (fs->lfs_is64) {                                \
                        return ssp->u_64.ss_##field;                 \
                } else {                                        \
                        return ssp->u_32.ss_##field;                 \
                }                                                \
        }                                                        \
        static __inline void                                \
        lfs_ss_set##field(STRUCT_LFS *fs, SEGSUM *ssp, type val) \
        {                                                        \
                if (fs->lfs_is64) {                                \
                        type *p = &ssp->u_64.ss_##field;        \
                        (void)p;                                \
                        ssp->u_64.ss_##field = val;                \
                } else {                                        \
                        type32 *p = &ssp->u_32.ss_##field;        \
                        (void)p;                                \
                        ssp->u_32.ss_##field = val;                \
                }                                                \
        }                                                        \

LFS_DEF_SS_ACCESSOR(uint32_t, uint32_t, sumsum)
LFS_DEF_SS_ACCESSOR(uint32_t, uint32_t, datasum)
LFS_DEF_SS_ACCESSOR(uint32_t, uint32_t, magic)
LFS_DEF_SS_ACCESSOR(uint32_t, uint32_t, ident)
LFS_DEF_SS_ACCESSOR(int64_t, int32_t, next)
LFS_DEF_SS_ACCESSOR(uint16_t, uint16_t, nfinfo)
LFS_DEF_SS_ACCESSOR(uint16_t, uint16_t, ninos)
LFS_DEF_SS_ACCESSOR(uint16_t, uint16_t, flags)
LFS_DEF_SS_ACCESSOR(uint64_t, uint32_t, reclino)
LFS_DEF_SS_ACCESSOR(uint64_t, uint64_t, serial)
LFS_DEF_SS_ACCESSOR(uint64_t, uint64_t, create)

static __inline size_t
lfs_ss_getsumstart(STRUCT_LFS *fs)
{
        /* These are actually all the same. */
        if (fs->lfs_is64) {
                return offsetof(SEGSUM64, ss_datasum);
        } else /* if (lfs_sb_getversion(fs) > 1) */ {
                return offsetof(SEGSUM32, ss_datasum);
        } /* else {
                return offsetof(SEGSUM_V1, ss_datasum);
        } */
        /*
         * XXX ^^^ until this file is resorted lfs_sb_getversion isn't
         * defined yet.
         */
}

static __inline uint32_t
lfs_ss_getocreate(STRUCT_LFS *fs, SEGSUM *ssp)
{
        KASSERT(fs->lfs_is64 == 0);
        /* XXX need to resort this file before we can do this */
        //KASSERT(lfs_sb_getversion(fs) == 1);

        return ssp->u_v1.ss_create;
}

static __inline void
lfs_ss_setocreate(STRUCT_LFS *fs, SEGSUM *ssp, uint32_t val)
{
        KASSERT(fs->lfs_is64 == 0);
        /* XXX need to resort this file before we can do this */
        //KASSERT(lfs_sb_getversion(fs) == 1);

        ssp->u_v1.ss_create = val;
}


/*
 * Super block.
 */

/*
 * Generate accessors for the on-disk superblock fields with cpp.
 */

#define LFS_DEF_SB_ACCESSOR_FULL(type, type32, field) \
        static __inline type                                \
        lfs_sb_get##field(STRUCT_LFS *fs)                        \
        {                                                        \
                if (fs->lfs_is64) {                                \
                        return fs->lfs_dlfs_u.u_64.dlfs_##field; \
                } else {                                        \
                        return fs->lfs_dlfs_u.u_32.dlfs_##field; \
                }                                                \
        }                                                        \
        static __inline void                                \
        lfs_sb_set##field(STRUCT_LFS *fs, type val)                \
        {                                                        \
                if (fs->lfs_is64) {                                \
                        fs->lfs_dlfs_u.u_64.dlfs_##field = val;        \
                } else {                                        \
                        fs->lfs_dlfs_u.u_32.dlfs_##field = val;        \
                }                                                \
        }                                                        \
        static __inline void                                \
        lfs_sb_add##field(STRUCT_LFS *fs, type val)                \
        {                                                        \
                if (fs->lfs_is64) {                                \
                        type *p64 = &fs->lfs_dlfs_u.u_64.dlfs_##field; \
                        *p64 += val;                                \
                } else {                                        \
                        type32 *p32 = &fs->lfs_dlfs_u.u_32.dlfs_##field; \
                        *p32 += val;                                \
                }                                                \
        }                                                        \
        static __inline void                                \
        lfs_sb_sub##field(STRUCT_LFS *fs, type val)                \
        {                                                        \
                if (fs->lfs_is64) {                                \
                        type *p64 = &fs->lfs_dlfs_u.u_64.dlfs_##field; \
                        *p64 -= val;                                \
                } else {                                        \
                        type32 *p32 = &fs->lfs_dlfs_u.u_32.dlfs_##field; \
                        *p32 -= val;                                \
                }                                                \
        }

#define LFS_DEF_SB_ACCESSOR(t, f) LFS_DEF_SB_ACCESSOR_FULL(t, t, f)

#define LFS_DEF_SB_ACCESSOR_32ONLY(type, field, val64) \
        static __inline type                                \
        lfs_sb_get##field(STRUCT_LFS *fs)                        \
        {                                                        \
                if (fs->lfs_is64) {                                \
                        return val64;                                \
                } else {                                        \
                        return fs->lfs_dlfs_u.u_32.dlfs_##field; \
                }                                                \
        }

LFS_DEF_SB_ACCESSOR(uint32_t, version)
LFS_DEF_SB_ACCESSOR_FULL(uint64_t, uint32_t, size)
LFS_DEF_SB_ACCESSOR(uint32_t, ssize)
LFS_DEF_SB_ACCESSOR_FULL(uint64_t, uint32_t, dsize)
LFS_DEF_SB_ACCESSOR(uint32_t, bsize)
LFS_DEF_SB_ACCESSOR(uint32_t, fsize)
LFS_DEF_SB_ACCESSOR(uint32_t, frag)
LFS_DEF_SB_ACCESSOR_FULL(uint64_t, uint32_t, freehd)
LFS_DEF_SB_ACCESSOR_FULL(int64_t, int32_t, bfree)
LFS_DEF_SB_ACCESSOR_FULL(uint64_t, uint32_t, nfiles)
LFS_DEF_SB_ACCESSOR_FULL(int64_t, int32_t, avail)
LFS_DEF_SB_ACCESSOR(int32_t, uinodes)
LFS_DEF_SB_ACCESSOR_FULL(int64_t, int32_t, idaddr)
LFS_DEF_SB_ACCESSOR_32ONLY(uint32_t, ifile, LFS_IFILE_INUM)
LFS_DEF_SB_ACCESSOR_FULL(int64_t, int32_t, lastseg)
LFS_DEF_SB_ACCESSOR_FULL(int64_t, int32_t, nextseg)
LFS_DEF_SB_ACCESSOR_FULL(int64_t, int32_t, curseg)
LFS_DEF_SB_ACCESSOR_FULL(int64_t, int32_t, offset)
LFS_DEF_SB_ACCESSOR_FULL(int64_t, int32_t, lastpseg)
LFS_DEF_SB_ACCESSOR(uint32_t, inopf)
LFS_DEF_SB_ACCESSOR(uint32_t, minfree)
LFS_DEF_SB_ACCESSOR(uint64_t, maxfilesize)
LFS_DEF_SB_ACCESSOR(uint32_t, fsbpseg)
LFS_DEF_SB_ACCESSOR(uint32_t, inopb)
LFS_DEF_SB_ACCESSOR(uint32_t, ifpb)
LFS_DEF_SB_ACCESSOR(uint32_t, sepb)
LFS_DEF_SB_ACCESSOR(uint32_t, nindir)
LFS_DEF_SB_ACCESSOR(uint32_t, nseg)
LFS_DEF_SB_ACCESSOR(uint32_t, nspf)
LFS_DEF_SB_ACCESSOR(uint32_t, cleansz)
LFS_DEF_SB_ACCESSOR(uint32_t, segtabsz)
LFS_DEF_SB_ACCESSOR_32ONLY(uint32_t, segmask, 0)
LFS_DEF_SB_ACCESSOR_32ONLY(uint32_t, segshift, 0)
LFS_DEF_SB_ACCESSOR(uint64_t, bmask)
LFS_DEF_SB_ACCESSOR(uint32_t, bshift)
LFS_DEF_SB_ACCESSOR(uint64_t, ffmask)
LFS_DEF_SB_ACCESSOR(uint32_t, ffshift)
LFS_DEF_SB_ACCESSOR(uint64_t, fbmask)
LFS_DEF_SB_ACCESSOR(uint32_t, fbshift)
LFS_DEF_SB_ACCESSOR(uint32_t, blktodb)
LFS_DEF_SB_ACCESSOR(uint32_t, fsbtodb)
LFS_DEF_SB_ACCESSOR(uint32_t, sushift)
LFS_DEF_SB_ACCESSOR(int32_t, maxsymlinklen)
LFS_DEF_SB_ACCESSOR(uint32_t, cksum)
LFS_DEF_SB_ACCESSOR(uint16_t, pflags)
LFS_DEF_SB_ACCESSOR(uint32_t, nclean)
LFS_DEF_SB_ACCESSOR(int32_t, dmeta)
LFS_DEF_SB_ACCESSOR(uint32_t, minfreeseg)
LFS_DEF_SB_ACCESSOR(uint32_t, sumsize)
LFS_DEF_SB_ACCESSOR(uint64_t, serial)
LFS_DEF_SB_ACCESSOR(uint32_t, ibsize)
LFS_DEF_SB_ACCESSOR_FULL(int64_t, int32_t, s0addr)
LFS_DEF_SB_ACCESSOR(uint64_t, tstamp)
LFS_DEF_SB_ACCESSOR(uint32_t, inodefmt)
LFS_DEF_SB_ACCESSOR(uint32_t, interleave)
LFS_DEF_SB_ACCESSOR(uint32_t, ident)
LFS_DEF_SB_ACCESSOR(uint32_t, resvseg)

/* special-case accessors */

/*
 * the v1 otstamp field lives in what's now dlfs_inopf
 */
#define lfs_sb_getotstamp(fs) lfs_sb_getinopf(fs)
#define lfs_sb_setotstamp(fs, val) lfs_sb_setinopf(fs, val)

/*
 * lfs_sboffs is an array
 */
static __inline int32_t
lfs_sb_getsboff(STRUCT_LFS *fs, unsigned n)
{
#ifdef KASSERT /* ugh */
        KASSERT(n < LFS_MAXNUMSB);
#endif
        if (fs->lfs_is64) {
                return fs->lfs_dlfs_u.u_64.dlfs_sboffs[n];
        } else {
                return fs->lfs_dlfs_u.u_32.dlfs_sboffs[n];
        }
}
static __inline void
lfs_sb_setsboff(STRUCT_LFS *fs, unsigned n, int32_t val)
{
#ifdef KASSERT /* ugh */
        KASSERT(n < LFS_MAXNUMSB);
#endif
        if (fs->lfs_is64) {
                fs->lfs_dlfs_u.u_64.dlfs_sboffs[n] = val;
        } else {
                fs->lfs_dlfs_u.u_32.dlfs_sboffs[n] = val;
        }
}

/*
 * lfs_fsmnt is a string
 */
static __inline const char *
lfs_sb_getfsmnt(STRUCT_LFS *fs)
{
        if (fs->lfs_is64) {
                return (const char *)fs->lfs_dlfs_u.u_64.dlfs_fsmnt;
        } else {
                return (const char *)fs->lfs_dlfs_u.u_32.dlfs_fsmnt;
        }
}

static __inline void
lfs_sb_setfsmnt(STRUCT_LFS *fs, const char *str)
{
        if (fs->lfs_is64) {
                (void)strncpy((char *)fs->lfs_dlfs_u.u_64.dlfs_fsmnt, str,
                        sizeof(fs->lfs_dlfs_u.u_64.dlfs_fsmnt));
        } else {
                (void)strncpy((char *)fs->lfs_dlfs_u.u_32.dlfs_fsmnt, str,
                        sizeof(fs->lfs_dlfs_u.u_32.dlfs_fsmnt));
        }
}

/* Highest addressable fsb */
#define LFS_MAX_DADDR(fs) \
        ((fs)->lfs_is64 ? 0x7fffffffffffffff : 0x7fffffff)

/* LFS_NINDIR is the number of indirects in a file system block. */
#define        LFS_NINDIR(fs)        (lfs_sb_getnindir(fs))

/* LFS_INOPB is the number of inodes in a secondary storage block. */
#define        LFS_INOPB(fs)        (lfs_sb_getinopb(fs))
/* LFS_INOPF is the number of inodes in a fragment. */
#define LFS_INOPF(fs)        (lfs_sb_getinopf(fs))

#define        lfs_blkoff(fs, loc)        ((int)((loc) & lfs_sb_getbmask(fs)))
#define lfs_fragoff(fs, loc)    /* calculates (loc % fs->lfs_fsize) */ \
    ((int)((loc) & lfs_sb_getffmask(fs)))

/* XXX: lowercase these as they're no longer macros */
/* Frags to diskblocks */
static __inline uint64_t
LFS_FSBTODB(STRUCT_LFS *fs, uint64_t b)
{
#if defined(_KERNEL)
        return b << (lfs_sb_getffshift(fs) - DEV_BSHIFT);
#else
        return b << lfs_sb_getfsbtodb(fs);
#endif
}
/* Diskblocks to frags */
static __inline uint64_t
LFS_DBTOFSB(STRUCT_LFS *fs, uint64_t b)
{
#if defined(_KERNEL)
        return b >> (lfs_sb_getffshift(fs) - DEV_BSHIFT);
#else
        return b >> lfs_sb_getfsbtodb(fs);
#endif
}

#define        lfs_lblkno(fs, loc)        ((loc) >> lfs_sb_getbshift(fs))
#define        lfs_lblktosize(fs, blk)        ((blk) << lfs_sb_getbshift(fs))

/* Frags to bytes */
static __inline uint64_t
lfs_fsbtob(STRUCT_LFS *fs, uint64_t b)
{
        return b << lfs_sb_getffshift(fs);
}
/* Bytes to frags */
static __inline uint64_t
lfs_btofsb(STRUCT_LFS *fs, uint64_t b)
{
        return b >> lfs_sb_getffshift(fs);
}

#define lfs_numfrags(fs, loc)        /* calculates (loc / fs->lfs_fsize) */        \
        ((loc) >> lfs_sb_getffshift(fs))
#define lfs_blkroundup(fs, size)/* calculates roundup(size, lfs_sb_getbsize(fs)) */ \
        ((off_t)(((size) + lfs_sb_getbmask(fs)) & (~lfs_sb_getbmask(fs))))
#define lfs_fragroundup(fs, size)/* calculates roundup(size, fs->lfs_fsize) */ \
        ((off_t)(((size) + lfs_sb_getffmask(fs)) & (~lfs_sb_getffmask(fs))))
#define lfs_fragstoblks(fs, frags)/* calculates (frags / fs->fs_frag) */ \
        ((frags) >> lfs_sb_getfbshift(fs))
#define lfs_blkstofrags(fs, blks)/* calculates (blks * fs->fs_frag) */ \
        ((blks) << lfs_sb_getfbshift(fs))
#define lfs_fragnum(fs, fsb)        /* calculates (fsb % fs->lfs_frag) */        \
        ((fsb) & ((fs)->lfs_frag - 1))
#define lfs_blknum(fs, fsb)        /* calculates rounddown(fsb, fs->lfs_frag) */ \
        ((fsb) &~ ((fs)->lfs_frag - 1))
#define lfs_dblksize(fs, dp, lbn) \
        (((lbn) >= ULFS_NDADDR || lfs_dino_getsize(fs, dp) >= ((lbn) + 1) << lfs_sb_getbshift(fs)) \
            ? lfs_sb_getbsize(fs) \
            : (lfs_fragroundup(fs, lfs_blkoff(fs, lfs_dino_getsize(fs, dp)))))

#define        lfs_segsize(fs)        (lfs_sb_getversion(fs) == 1 ?                             \
                           lfs_lblktosize((fs), lfs_sb_getssize(fs)) :        \
                           lfs_sb_getssize(fs))
/* XXX segtod produces a result in frags despite the 'd' */
#define lfs_segtod(fs, seg) (lfs_btofsb(fs, lfs_segsize(fs)) * (seg))
#define        lfs_dtosn(fs, daddr)        /* block address to segment number */        \
        ((uint32_t)(((daddr) - lfs_sb_gets0addr(fs)) / lfs_segtod((fs), 1)))
#define lfs_sntod(fs, sn)        /* segment number to disk address */        \
        ((daddr_t)(lfs_segtod((fs), (sn)) + lfs_sb_gets0addr(fs)))

/* XXX, blah. make this appear only if struct inode is defined */
#ifdef _UFS_LFS_LFS_INODE_H_
static __inline uint32_t
lfs_blksize(STRUCT_LFS *fs, struct inode *ip, uint64_t lbn)
{
        if (lbn >= ULFS_NDADDR || lfs_dino_getsize(fs, ip->i_din) >= (lbn + 1) << lfs_sb_getbshift(fs)) {
                return lfs_sb_getbsize(fs);
        } else {
                return lfs_fragroundup(fs, lfs_blkoff(fs, lfs_dino_getsize(fs, ip->i_din)));
        }
}
#endif

/*
 * union lfs_blocks
 */

static __inline void
lfs_blocks_fromvoid(STRUCT_LFS *fs, union lfs_blocks *bp, void *p)
{
        if (fs->lfs_is64) {
                bp->b64 = p;
        } else {
                bp->b32 = p;
        }
}

static __inline void
lfs_blocks_fromfinfo(STRUCT_LFS *fs, union lfs_blocks *bp, FINFO *fip)
{
        void *firstblock;

        firstblock = (char *)fip + FINFOSIZE(fs);
        if (fs->lfs_is64) {
                bp->b64 = (int64_t *)firstblock;
        }  else {
                bp->b32 = (int32_t *)firstblock;
        }
}

static __inline daddr_t
lfs_blocks_get(STRUCT_LFS *fs, union lfs_blocks *bp, unsigned idx)
{
        if (fs->lfs_is64) {
                return bp->b64[idx];
        } else {
                return bp->b32[idx];
        }
}

static __inline void
lfs_blocks_set(STRUCT_LFS *fs, union lfs_blocks *bp, unsigned idx, daddr_t val)
{
        if (fs->lfs_is64) {
                bp->b64[idx] = val;
        } else {
                bp->b32[idx] = val;
        }
}

static __inline void
lfs_blocks_inc(STRUCT_LFS *fs, union lfs_blocks *bp)
{
        if (fs->lfs_is64) {
                bp->b64++;
        } else {
                bp->b32++;
        }
}

static __inline int
lfs_blocks_eq(STRUCT_LFS *fs, union lfs_blocks *bp1, union lfs_blocks *bp2)
{
        if (fs->lfs_is64) {
                return bp1->b64 == bp2->b64;
        } else {
                return bp1->b32 == bp2->b32;
        }
}

static __inline int
lfs_blocks_sub(STRUCT_LFS *fs, union lfs_blocks *bp1, union lfs_blocks *bp2)
{
        /* (remember that the pointers are typed) */
        if (fs->lfs_is64) {
                return bp1->b64 - bp2->b64;
        } else {
                return bp1->b32 - bp2->b32;
        }
}

/*
 * struct segment
 */


/*
 * Macros for determining free space on the disk, with the variable metadata
 * of segment summaries and inode blocks taken into account.
 */
/*
 * Estimate number of clean blocks not available for writing because
 * they will contain metadata or overhead.  This is calculated as
 *
 *                E = ((C * M / D) * D + (0) * (T - D)) / T
 * or more simply
 *                E = (C * M) / T
 *
 * where
 * C is the clean space,
 * D is the dirty space,
 * M is the dirty metadata, and
 * T = C + D is the total space on disk.
 *
 * This approximates the old formula of E = C * M / D when D is close to T,
 * but avoids falsely reporting "disk full" when the sample size (D) is small.
 */
#define LFS_EST_CMETA(F) ((                                                \
        (lfs_sb_getdmeta(F) * (int64_t)lfs_sb_getnclean(F)) /                 \
        (lfs_sb_getnseg(F))))

/* Estimate total size of the disk not including metadata */
#define LFS_EST_NONMETA(F) (lfs_sb_getdsize(F) - lfs_sb_getdmeta(F) - LFS_EST_CMETA(F))

/* Estimate number of blocks actually available for writing */
#define LFS_EST_BFREE(F) (lfs_sb_getbfree(F) > LFS_EST_CMETA(F) ?             \
                          lfs_sb_getbfree(F) - LFS_EST_CMETA(F) : 0)

/* Amount of non-meta space not available to mortal man */
#define LFS_EST_RSVD(F) ((LFS_EST_NONMETA(F) *                             \
                                   (uint64_t)lfs_sb_getminfree(F)) /             \
                                  100)

/* Can credential C write BB blocks? XXX: kauth_cred_geteuid is abusive */
#define ISSPACE(F, BB, C)                                                \
        ((((C) == NOCRED || kauth_cred_geteuid(C) == 0) &&                \
          LFS_EST_BFREE(F) >= (BB)) ||                                        \
         (kauth_cred_geteuid(C) != 0 && IS_FREESPACE(F, BB)))

/* Can an ordinary user write BB blocks */
#define IS_FREESPACE(F, BB)                                                \
          (LFS_EST_BFREE(F) >= (BB) + LFS_EST_RSVD(F))

/*
 * The minimum number of blocks to create a new inode.  This is:
 * directory direct block (1) + ULFS_NIADDR indirect blocks + inode block (1) +
 * ifile direct block (1) + ULFS_NIADDR indirect blocks = 3 + 2 * ULFS_NIADDR blocks.
 */
#define LFS_NRESERVE(F) (lfs_btofsb((F), (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(F)))


/*
 * Suppress spurious clang warnings
 */
#ifdef __GNUC__
#if defined(__clang__)
#pragma clang diagnostic pop
#elif __GNUC_PREREQ__(9,0)
#pragma GCC diagnostic pop
#endif
#endif


#endif /* _UFS_LFS_LFS_ACCESSORS_H_ */


















































































































































































































































































































    2 









    1 





    1 


    2 




    1 



    1 

    1 





























































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
/*        $NetBSD: ufs_quota1.c,v 1.25 2022/04/26 15:37:25 hannken Exp $        */

/*
 * Copyright (c) 1982, 1986, 1990, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Robert Elz at The University of Melbourne.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ufs_quota.c        8.5 (Berkeley) 5/20/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_quota1.c,v 1.25 2022/04/26 15:37:25 hannken Exp $");

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/file.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/kauth.h>

#include <ufs/ufs/quota1.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_quota.h>

static int chkdqchg(struct inode *, int64_t, kauth_cred_t, int);
static int chkiqchg(struct inode *, int32_t, kauth_cred_t, int);

/*
 * Update disk usage, and take corrective action.
 */
int
chkdq1(struct inode *ip, int64_t change, kauth_cred_t cred, int flags)
{
        struct dquot *dq;
        int i;
        int ncurblocks, error;

        if ((error = getinoquota(ip)) != 0)
                return error;
        if (change == 0)
                return (0);
        if (change < 0) {
                for (i = 0; i < MAXQUOTAS; i++) {
                        if ((dq = ip->i_dquot[i]) == NODQUOT)
                                continue;
                        mutex_enter(&dq->dq_interlock);
                        ncurblocks = dq->dq_curblocks + change;
                        if (ncurblocks >= 0)
                                dq->dq_curblocks = ncurblocks;
                        else
                                dq->dq_curblocks = 0;
                        dq->dq_flags &= ~DQ_WARN(QL_BLOCK);
                        dq->dq_flags |= DQ_MOD;
                        mutex_exit(&dq->dq_interlock);
                }
                return (0);
        }
        for (i = 0; i < MAXQUOTAS; i++) {
                if ((dq = ip->i_dquot[i]) == NODQUOT)
                        continue;
                if ((flags & FORCE) == 0 &&
                    kauth_authorize_system(cred, KAUTH_SYSTEM_FS_QUOTA,
                    KAUTH_REQ_SYSTEM_FS_QUOTA_NOLIMIT, KAUTH_ARG(i),
                    KAUTH_ARG(QL_BLOCK), NULL) != 0) {
                        mutex_enter(&dq->dq_interlock);
                        error = chkdqchg(ip, change, cred, i);
                        mutex_exit(&dq->dq_interlock);
                        if (error != 0)
                                return (error);
                }
        }
        for (i = 0; i < MAXQUOTAS; i++) {
                if ((dq = ip->i_dquot[i]) == NODQUOT)
                        continue;
                mutex_enter(&dq->dq_interlock);
                dq->dq_curblocks += change;
                dq->dq_flags |= DQ_MOD;
                mutex_exit(&dq->dq_interlock);
        }
        return (0);
}

/*
 * Check for a valid change to a users allocation.
 * Issue an error message if appropriate.
 */
static int
chkdqchg(struct inode *ip, int64_t change, kauth_cred_t cred, int type)
{
        struct dquot *dq = ip->i_dquot[type];
        long ncurblocks = dq->dq_curblocks + change;

        KASSERT(mutex_owned(&dq->dq_interlock));
        /*
         * If user would exceed their hard limit, disallow space allocation.
         */
        if (ncurblocks >= dq->dq_bhardlimit && dq->dq_bhardlimit) {
                if ((dq->dq_flags & DQ_WARN(QL_BLOCK)) == 0 &&
                    ip->i_uid == kauth_cred_geteuid(cred)) {
                        uprintf("\n%s: write failed, %s disk limit reached\n",
                            ITOV(ip)->v_mount->mnt_stat.f_mntonname,
                            quotatypes[type]);
                        dq->dq_flags |= DQ_WARN(QL_BLOCK);
                }
                return (EDQUOT);
        }
        /*
         * If user is over their soft limit for too long, disallow space
         * allocation. Reset time limit as they cross their soft limit.
         */
        if (ncurblocks >= dq->dq_bsoftlimit && dq->dq_bsoftlimit) {
                if (dq->dq_curblocks < dq->dq_bsoftlimit) {
                        dq->dq_btime =
                            time_second + ip->i_ump->umq1_btime[type];
                        if (ip->i_uid == kauth_cred_geteuid(cred))
                                uprintf("\n%s: warning, %s %s\n",
                                    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
                                    quotatypes[type], "disk quota exceeded");
                        return (0);
                }
                if (time_second > dq->dq_btime) {
                        if ((dq->dq_flags & DQ_WARN(QL_BLOCK)) == 0 &&
                            ip->i_uid == kauth_cred_geteuid(cred)) {
                                uprintf("\n%s: write failed, %s %s\n",
                                    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
                                    quotatypes[type],
                                    "disk quota exceeded for too long");
                                dq->dq_flags |= DQ_WARN(QL_BLOCK);
                        }
                        return (EDQUOT);
                }
        }
        return (0);
}

/*
 * Check the inode limit, applying corrective action.
 */
int
chkiq1(struct inode *ip, int32_t change, kauth_cred_t cred, int flags)
{
        struct dquot *dq;
        int i;
        int ncurinodes, error;

        if ((error = getinoquota(ip)) != 0)
                return error;
        if (change == 0)
                return (0);
        if (change < 0) {
                for (i = 0; i < MAXQUOTAS; i++) {
                        if ((dq = ip->i_dquot[i]) == NODQUOT)
                                continue;
                        mutex_enter(&dq->dq_interlock);
                        ncurinodes = dq->dq_curinodes + change;
                        if (ncurinodes >= 0)
                                dq->dq_curinodes = ncurinodes;
                        else
                                dq->dq_curinodes = 0;
                        dq->dq_flags &= ~DQ_WARN(QL_FILE);
                        dq->dq_flags |= DQ_MOD;
                        mutex_exit(&dq->dq_interlock);
                }
                return (0);
        }
        for (i = 0; i < MAXQUOTAS; i++) {
                if ((dq = ip->i_dquot[i]) == NODQUOT)
                        continue;
                if ((flags & FORCE) == 0 && kauth_authorize_system(cred,
                    KAUTH_SYSTEM_FS_QUOTA, KAUTH_REQ_SYSTEM_FS_QUOTA_NOLIMIT,
                    KAUTH_ARG(i), KAUTH_ARG(QL_FILE), NULL) != 0) {
                        mutex_enter(&dq->dq_interlock);
                        error = chkiqchg(ip, change, cred, i);
                        mutex_exit(&dq->dq_interlock);
                        if (error != 0)
                                return (error);
                }
        }
        for (i = 0; i < MAXQUOTAS; i++) {
                if ((dq = ip->i_dquot[i]) == NODQUOT)
                        continue;
                mutex_enter(&dq->dq_interlock);
                dq->dq_curinodes += change;
                dq->dq_flags |= DQ_MOD;
                mutex_exit(&dq->dq_interlock);
        }
        return (0);
}

/*
 * Check for a valid change to a users allocation.
 * Issue an error message if appropriate.
 */
static int
chkiqchg(struct inode *ip, int32_t change, kauth_cred_t cred, int type)
{
        struct dquot *dq = ip->i_dquot[type];
        long ncurinodes = dq->dq_curinodes + change;

        KASSERT(mutex_owned(&dq->dq_interlock));
        /*
         * If user would exceed their hard limit, disallow inode allocation.
         */
        if (ncurinodes >= dq->dq_ihardlimit && dq->dq_ihardlimit) {
                if ((dq->dq_flags & DQ_WARN(QL_FILE)) == 0 &&
                    ip->i_uid == kauth_cred_geteuid(cred)) {
                        uprintf("\n%s: write failed, %s inode limit reached\n",
                            ITOV(ip)->v_mount->mnt_stat.f_mntonname,
                            quotatypes[type]);
                        dq->dq_flags |= DQ_WARN(QL_FILE);
                }
                return (EDQUOT);
        }
        /*
         * If user is over their soft limit for too long, disallow inode
         * allocation. Reset time limit as they cross their soft limit.
         */
        if (ncurinodes >= dq->dq_isoftlimit && dq->dq_isoftlimit) {
                if (dq->dq_curinodes < dq->dq_isoftlimit) {
                        dq->dq_itime =
                            time_second + ip->i_ump->umq1_itime[type];
                        if (ip->i_uid == kauth_cred_geteuid(cred))
                                uprintf("\n%s: warning, %s %s\n",
                                    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
                                    quotatypes[type], "inode quota exceeded");
                        return (0);
                }
                if (time_second > dq->dq_itime) {
                        if ((dq->dq_flags & DQ_WARN(QL_FILE)) == 0 &&
                            ip->i_uid == kauth_cred_geteuid(cred)) {
                                uprintf("\n%s: write failed, %s %s\n",
                                    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
                                    quotatypes[type],
                                    "inode quota exceeded for too long");
                                dq->dq_flags |= DQ_WARN(QL_FILE);
                        }
                        return (EDQUOT);
                }
        }
        return (0);
}

int
quota1_umount(struct mount *mp, int flags)
{
        int i, error;
        struct ufsmount *ump = VFSTOUFS(mp);
        struct lwp *l = curlwp;

        if ((ump->um_flags & UFS_QUOTA) == 0)
                return 0;

        if ((error = vflush(mp, NULLVP, SKIPSYSTEM | flags)) != 0)
                return (error);

        for (i = 0; i < MAXQUOTAS; i++) {
                if (ump->um_quotas[i] != NULLVP) {
                        quota1_handle_cmd_quotaoff(l, ump, i);
                }
        }
        return 0;
}

/*
 * Code to process quotactl commands.
 */

/*
 * set up a quota file for a particular file system.
 */
int
quota1_handle_cmd_quotaon(struct lwp *l, struct ufsmount *ump, int type,
    const char *fname)
{
        struct mount *mp = ump->um_mountp;
        struct vnode *vp, **vpp;
        struct vnode_iterator *marker;
        struct dquot *dq;
        int error;
        struct pathbuf *pb;

        if (type < 0 || type >= MAXQUOTAS)
                return EINVAL;

        if (ump->um_flags & UFS_QUOTA2) {
                uprintf("%s: quotas v2 already enabled\n",
                    mp->mnt_stat.f_mntonname);
                return (EBUSY);
        }
                
        if (mp->mnt_wapbl != NULL) {
                printf("%s: quota v1 cannot be used with -o log\n",
                    mp->mnt_stat.f_mntonname);
                return (EOPNOTSUPP);
        }

        vpp = &ump->um_quotas[type];

        pb = pathbuf_create(fname);
        if (pb == NULL) {
                return ENOMEM;
        }
        error = vn_open(NULL, pb, 0, FREAD|FWRITE, 0, &vp, NULL, NULL);
        if (error != 0) {
                pathbuf_destroy(pb);
                return error;
        }
        pathbuf_destroy(pb);

        VOP_UNLOCK(vp);
        if (vp->v_type != VREG) {
                (void) vn_close(vp, FREAD|FWRITE, l->l_cred);
                return (EACCES);
        }
        if (*vpp != vp)
                quota1_handle_cmd_quotaoff(l, ump, type);
        mutex_enter(&dqlock);
        while ((ump->umq1_qflags[type] & (QTF_CLOSING | QTF_OPENING)) != 0)
                cv_wait(&dqcv, &dqlock);
        ump->umq1_qflags[type] |= QTF_OPENING;
        mutex_exit(&dqlock);
        mp->mnt_flag |= MNT_QUOTA;
        vp->v_vflag |= VV_SYSTEM;        /* XXXSMP */
        *vpp = vp;
        /*
         * Save the credential of the process that turned on quotas.
         * Set up the time limits for this quota.
         */
        kauth_cred_hold(l->l_cred);
        ump->um_cred[type] = l->l_cred;
        ump->umq1_btime[type] = MAX_DQ_TIME;
        ump->umq1_itime[type] = MAX_IQ_TIME;
        if (dqget(NULLVP, 0, ump, type, &dq) == 0) {
                if (dq->dq_btime > 0)
                        ump->umq1_btime[type] = dq->dq_btime;
                if (dq->dq_itime > 0)
                        ump->umq1_itime[type] = dq->dq_itime;
                dqrele(NULLVP, dq);
        }
        /*
         * Search vnodes associated with this mount point,
         * adding references to quota file being opened.
         * NB: only need to add dquot's for inodes being modified.
         */
        vfs_vnode_iterator_init(mp, &marker);
        while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) {
                error = vn_lock(vp, LK_EXCLUSIVE);
                if (error) {
                        vrele(vp);
                        continue;
                }
                mutex_enter(vp->v_interlock);
                if (VTOI(vp) == NULL || vp->v_type == VNON ||
                    vp->v_writecount == 0) {
                        mutex_exit(vp->v_interlock);
                        vput(vp);
                        continue;
                }
                mutex_exit(vp->v_interlock);
                if ((error = getinoquota(VTOI(vp))) != 0) {
                        vput(vp);
                        break;
                }
                vput(vp);
        }
        vfs_vnode_iterator_destroy(marker);

        mutex_enter(&dqlock);
        ump->umq1_qflags[type] &= ~QTF_OPENING;
        cv_broadcast(&dqcv);
        if (error == 0)
                ump->um_flags |= UFS_QUOTA;
        mutex_exit(&dqlock);
        if (error)
                quota1_handle_cmd_quotaoff(l, ump, type);
        return (error);
}

/*
 * turn off disk quotas for a filesystem.
 */
int
quota1_handle_cmd_quotaoff(struct lwp *l, struct ufsmount *ump, int type)
{
        struct mount *mp = ump->um_mountp;
        struct vnode *vp;
        struct vnode *qvp;
        struct vnode_iterator *marker;
        struct dquot *dq;
        struct inode *ip;
        kauth_cred_t cred;
        int i, error;

        if (type < 0 || type >= MAXQUOTAS)
                return EINVAL;

        mutex_enter(&dqlock);
        while ((ump->umq1_qflags[type] & (QTF_CLOSING | QTF_OPENING)) != 0)
                cv_wait(&dqcv, &dqlock);
        if ((qvp = ump->um_quotas[type]) == NULLVP) {
                mutex_exit(&dqlock);
                return (0);
        }
        ump->umq1_qflags[type] |= QTF_CLOSING;
        mutex_exit(&dqlock);
        /*
         * Search vnodes associated with this mount point,
         * deleting any references to quota file being closed.
         */
        vfs_vnode_iterator_init(mp, &marker);
        while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) {
                error = vn_lock(vp, LK_EXCLUSIVE);
                if (error) {
                        vrele(vp);
                        continue;
                }
                ip = VTOI(vp);
                if (ip == NULL || vp->v_type == VNON) {
                        vput(vp);
                        continue;
                }
                dq = ip->i_dquot[type];
                ip->i_dquot[type] = NODQUOT;
                dqrele(vp, dq);
                vput(vp);
        }
        vfs_vnode_iterator_destroy(marker);
#ifdef DIAGNOSTIC
        dqflush(qvp);
#endif
        qvp->v_vflag &= ~VV_SYSTEM;
        error = vn_close(qvp, FREAD|FWRITE, l->l_cred);
        mutex_enter(&dqlock);
        ump->um_quotas[type] = NULLVP;
        cred = ump->um_cred[type];
        ump->um_cred[type] = NOCRED;
        for (i = 0; i < MAXQUOTAS; i++)
                if (ump->um_quotas[i] != NULLVP)
                        break;
        ump->umq1_qflags[type] &= ~QTF_CLOSING;
        if (i == MAXQUOTAS)
                ump->um_flags &= ~UFS_QUOTA;
        cv_broadcast(&dqcv);
        mutex_exit(&dqlock);
        kauth_cred_free(cred);
        if (i == MAXQUOTAS)
                mp->mnt_flag &= ~MNT_QUOTA;
        return (error);
}

int             
quota1_handle_cmd_get(struct ufsmount *ump, const struct quotakey *qk,
    struct quotaval *qv)
{
        struct dquot *dq;
        int error;
        struct quotaval blocks, files;
        int idtype;
        id_t id;

        idtype = qk->qk_idtype;
        id = qk->qk_id;

        if (ump->um_quotas[idtype] == NULLVP)
                return ENODEV;

        if (id == QUOTA_DEFAULTID) { /* we want the grace period of id 0 */
                if ((error = dqget(NULLVP, 0, ump, idtype, &dq)) != 0)
                        return error;

        } else {
                if ((error = dqget(NULLVP, id, ump, idtype, &dq)) != 0)
                        return error;
        }
        dqblk_to_quotavals(&dq->dq_un.dq1_dqb, &blocks, &files);
        dqrele(NULLVP, dq);
        if (id == QUOTA_DEFAULTID) {
                if (blocks.qv_expiretime > 0)
                        blocks.qv_grace = blocks.qv_expiretime;
                else
                        blocks.qv_grace = MAX_DQ_TIME;
                if (files.qv_expiretime > 0)
                        files.qv_grace = files.qv_expiretime;
                else
                        files.qv_grace = MAX_DQ_TIME;
        }

        switch (qk->qk_objtype) {
            case QUOTA_OBJTYPE_BLOCKS:
                *qv = blocks;
                break;
            case QUOTA_OBJTYPE_FILES:
                *qv = files;
                break;
            default:
                return EINVAL;
        }

        return 0;
}

static uint32_t
quota1_encode_limit(uint64_t lim)
{
        if (lim == QUOTA_NOLIMIT || lim >= 0xffffffff) {
                return 0;
        }
        return lim;
}

int
quota1_handle_cmd_put(struct ufsmount *ump, const struct quotakey *key,
    const struct quotaval *val)
{
        struct dquot *dq;
        struct dqblk dqb;
        int error;

        switch (key->qk_idtype) {
            case QUOTA_IDTYPE_USER:
            case QUOTA_IDTYPE_GROUP:
                break;
            default:
                return EINVAL;
        }

        switch (key->qk_objtype) {
            case QUOTA_OBJTYPE_BLOCKS:
            case QUOTA_OBJTYPE_FILES:
                break;
            default:
                return EINVAL;
        }

        if (ump->um_quotas[key->qk_idtype] == NULLVP)
                return ENODEV;

        if (key->qk_id == QUOTA_DEFAULTID) {
                /* just update grace times */
                id_t id = 0;

                if ((error = dqget(NULLVP, id, ump, key->qk_idtype, &dq)) != 0)
                        return error;
                mutex_enter(&dq->dq_interlock);
                if (val->qv_grace != QUOTA_NOTIME) {
                        if (key->qk_objtype == QUOTA_OBJTYPE_BLOCKS)
                                ump->umq1_btime[key->qk_idtype] = dq->dq_btime =
                                        val->qv_grace;
                        if (key->qk_objtype == QUOTA_OBJTYPE_FILES)
                                ump->umq1_itime[key->qk_idtype] = dq->dq_itime =
                                        val->qv_grace;
                }
                dq->dq_flags |= DQ_MOD;
                mutex_exit(&dq->dq_interlock);
                dqrele(NULLVP, dq);
                return 0;
        }

        if ((error = dqget(NULLVP, key->qk_id, ump, key->qk_idtype, &dq)) != 0)
                return (error);
        mutex_enter(&dq->dq_interlock);
        /*
         * Copy all but the current values.
         * Reset time limit if previously had no soft limit or were
         * under it, but now have a soft limit and are over it.
         */
        dqb.dqb_curblocks = dq->dq_curblocks;
        dqb.dqb_curinodes = dq->dq_curinodes;
        dqb.dqb_btime = dq->dq_btime;
        dqb.dqb_itime = dq->dq_itime;
        if (key->qk_objtype == QUOTA_OBJTYPE_BLOCKS) {
                dqb.dqb_bsoftlimit = quota1_encode_limit(val->qv_softlimit);
                dqb.dqb_bhardlimit = quota1_encode_limit(val->qv_hardlimit);
                dqb.dqb_isoftlimit = dq->dq_isoftlimit;
                dqb.dqb_ihardlimit = dq->dq_ihardlimit;
        } else {
                KASSERT(key->qk_objtype == QUOTA_OBJTYPE_FILES);
                dqb.dqb_bsoftlimit = dq->dq_bsoftlimit;
                dqb.dqb_bhardlimit = dq->dq_bhardlimit;
                dqb.dqb_isoftlimit = quota1_encode_limit(val->qv_softlimit);
                dqb.dqb_ihardlimit = quota1_encode_limit(val->qv_hardlimit);
        }
        if (dq->dq_id == 0 && val->qv_grace != QUOTA_NOTIME) {
                /* also update grace time if available */
                if (key->qk_objtype == QUOTA_OBJTYPE_BLOCKS) {
                        ump->umq1_btime[key->qk_idtype] = dqb.dqb_btime =
                                val->qv_grace;
                }
                if (key->qk_objtype == QUOTA_OBJTYPE_FILES) {
                        ump->umq1_itime[key->qk_idtype] = dqb.dqb_itime =
                                val->qv_grace;
                }
        }
        if (dqb.dqb_bsoftlimit &&
            dq->dq_curblocks >= dqb.dqb_bsoftlimit &&
            (dq->dq_bsoftlimit == 0 || dq->dq_curblocks < dq->dq_bsoftlimit))
                dqb.dqb_btime = time_second + ump->umq1_btime[key->qk_idtype];
        if (dqb.dqb_isoftlimit &&
            dq->dq_curinodes >= dqb.dqb_isoftlimit &&
            (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit))
                dqb.dqb_itime = time_second + ump->umq1_itime[key->qk_idtype];
        dq->dq_un.dq1_dqb = dqb;
        if (dq->dq_curblocks < dq->dq_bsoftlimit)
                dq->dq_flags &= ~DQ_WARN(QL_BLOCK);
        if (dq->dq_curinodes < dq->dq_isoftlimit)
                dq->dq_flags &= ~DQ_WARN(QL_FILE);
        if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 &&
            dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0)
                dq->dq_flags |= DQ_FAKE;
        else
                dq->dq_flags &= ~DQ_FAKE;
        dq->dq_flags |= DQ_MOD;
        mutex_exit(&dq->dq_interlock);
        dqrele(NULLVP, dq);
        return (0);
}


#if 0
/*
 * Q_SETQUOTA - assign an entire dqblk structure.
 */
int
setquota1(struct mount *mp, u_long id, int type, struct dqblk *dqb)
{
        struct dquot *dq;
        struct dquot *ndq;
        struct ufsmount *ump = VFSTOUFS(mp);
        

        if ((error = dqget(NULLVP, id, ump, type, &ndq)) != 0)
                return (error);
        dq = ndq;
        mutex_enter(&dq->dq_interlock);
        /*
         * Copy all but the current values.
         * Reset time limit if previously had no soft limit or were
         * under it, but now have a soft limit and are over it.
         */
        dqb->dqb_curblocks = dq->dq_curblocks;
        dqb->dqb_curinodes = dq->dq_curinodes;
        if (dq->dq_id != 0) {
                dqb->dqb_btime = dq->dq_btime;
                dqb->dqb_itime = dq->dq_itime;
        }
        if (dqb->dqb_bsoftlimit &&
            dq->dq_curblocks >= dqb->dqb_bsoftlimit &&
            (dq->dq_bsoftlimit == 0 || dq->dq_curblocks < dq->dq_bsoftlimit))
                dqb->dqb_btime = time_second + ump->umq1_btime[type];
        if (dqb->dqb_isoftlimit &&
            dq->dq_curinodes >= dqb->dqb_isoftlimit &&
            (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit))
                dqb->dqb_itime = time_second + ump->umq1_itime[type];
        dq->dq_un.dq1_dqb = *dqb;
        if (dq->dq_curblocks < dq->dq_bsoftlimit)
                dq->dq_flags &= ~DQ_WARN(QL_BLOCK);
        if (dq->dq_curinodes < dq->dq_isoftlimit)
                dq->dq_flags &= ~DQ_WARN(QL_FILE);
        if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 &&
            dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0)
                dq->dq_flags |= DQ_FAKE;
        else
                dq->dq_flags &= ~DQ_FAKE;
        dq->dq_flags |= DQ_MOD;
        mutex_exit(&dq->dq_interlock);
        dqrele(NULLVP, dq);
        return (0);
}

/*
 * Q_SETUSE - set current inode and block usage.
 */
int
setuse(struct mount *mp, u_long id, int type, void *addr)
{
        struct dquot *dq;
        struct ufsmount *ump = VFSTOUFS(mp);
        struct dquot *ndq;
        struct dqblk usage;
        int error;

        error = copyin(addr, (void *)&usage, sizeof (struct dqblk));
        if (error)
                return (error);
        if ((error = dqget(NULLVP, id, ump, type, &ndq)) != 0)
                return (error);
        dq = ndq;
        mutex_enter(&dq->dq_interlock);
        /*
         * Reset time limit if have a soft limit and were
         * previously under it, but are now over it.
         */
        if (dq->dq_bsoftlimit && dq->dq_curblocks < dq->dq_bsoftlimit &&
            usage.dqb_curblocks >= dq->dq_bsoftlimit)
                dq->dq_btime = time_second + ump->umq1_btime[type];
        if (dq->dq_isoftlimit && dq->dq_curinodes < dq->dq_isoftlimit &&
            usage.dqb_curinodes >= dq->dq_isoftlimit)
                dq->dq_itime = time_second + ump->umq1_itime[type];
        dq->dq_curblocks = usage.dqb_curblocks;
        dq->dq_curinodes = usage.dqb_curinodes;
        if (dq->dq_curblocks < dq->dq_bsoftlimit)
                dq->dq_flags &= ~DQ_WARN(QL_BLOCK);
        if (dq->dq_curinodes < dq->dq_isoftlimit)
                dq->dq_flags &= ~DQ_WARN(QL_FILE);
        dq->dq_flags |= DQ_MOD;
        mutex_exit(&dq->dq_interlock);
        dqrele(NULLVP, dq);
        return (0);
}
#endif

/*
 * Q_SYNC - sync quota files to disk.
 */
int
q1sync(struct mount *mp)
{
        struct ufsmount *ump = VFSTOUFS(mp);
        struct vnode *vp;
        struct vnode_iterator *marker;
        struct dquot *dq;
        int i, error;

        /*
         * Check if the mount point has any quotas.
         * If not, simply return.
         */
        for (i = 0; i < MAXQUOTAS; i++)
                if (ump->um_quotas[i] != NULLVP)
                        break;
        if (i == MAXQUOTAS)
                return (0);

        /*
         * Search vnodes associated with this mount point,
         * synchronizing any modified dquot structures.
         */
        vfs_vnode_iterator_init(mp, &marker);
        while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) {
                error = vn_lock(vp, LK_EXCLUSIVE);
                if (error) {
                        vrele(vp);
                        continue;
                }
                if (VTOI(vp) == NULL || vp->v_type == VNON) {
                        vput(vp);
                        continue;
                }
                for (i = 0; i < MAXQUOTAS; i++) {
                        dq = VTOI(vp)->i_dquot[i];
                        if (dq == NODQUOT)
                                continue;
                        mutex_enter(&dq->dq_interlock);
                        if (dq->dq_flags & DQ_MOD)
                                dq1sync(vp, dq);
                        mutex_exit(&dq->dq_interlock);
                }
                vput(vp);
        }
        vfs_vnode_iterator_destroy(marker);
        return (0);
}

/*
 * Obtain a dquot structure for the specified identifier and quota file
 * reading the information from the file if necessary.
 */
int
dq1get(struct vnode *dqvp, u_long id, struct ufsmount *ump, int type,
    struct dquot *dq)
{
        struct iovec aiov;
        struct uio auio;
        int error;

        KASSERT(mutex_owned(&dq->dq_interlock));
        vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY);
        auio.uio_iov = &aiov;
        auio.uio_iovcnt = 1;
        aiov.iov_base = (void *)&dq->dq_un.dq1_dqb;
        aiov.iov_len = sizeof (struct dqblk);
        auio.uio_resid = sizeof (struct dqblk);
        auio.uio_offset = (off_t)id * sizeof (struct dqblk);
        auio.uio_rw = UIO_READ;
        UIO_SETUP_SYSSPACE(&auio);
        error = VOP_READ(dqvp, &auio, 0, ump->um_cred[type]);
        if (auio.uio_resid == sizeof(struct dqblk) && error == 0)
                memset((void *)&dq->dq_un.dq1_dqb, 0, sizeof(struct dqblk));
        VOP_UNLOCK(dqvp);
        /*
         * I/O error in reading quota file, release
         * quota structure and reflect problem to caller.
         */
        if (error)
                return (error);
        /*
         * Check for no limit to enforce.
         * Initialize time values if necessary.
         */
        if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 &&
            dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0)
                dq->dq_flags |= DQ_FAKE;
        if (dq->dq_id != 0) {
                if (dq->dq_btime == 0)
                        dq->dq_btime = time_second + ump->umq1_btime[type];
                if (dq->dq_itime == 0)
                        dq->dq_itime = time_second + ump->umq1_itime[type];
        }
        return (0);
}

/*
 * Update the disk quota in the quota file.
 */
int
dq1sync(struct vnode *vp, struct dquot *dq)
{
        struct vnode *dqvp;
        struct iovec aiov;
        struct uio auio;
        int error;

        if (dq == NODQUOT)
                panic("dq1sync: dquot");
        KASSERT(mutex_owned(&dq->dq_interlock));
        if ((dq->dq_flags & DQ_MOD) == 0)
                return (0);
        if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP)
                panic("dq1sync: file");
        KASSERT(dqvp != vp);
        vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY);
        auio.uio_iov = &aiov;
        auio.uio_iovcnt = 1;
        aiov.iov_base = (void *)&dq->dq_un.dq1_dqb;
        aiov.iov_len = sizeof (struct dqblk);
        auio.uio_resid = sizeof (struct dqblk);
        auio.uio_offset = (off_t)dq->dq_id * sizeof (struct dqblk);
        auio.uio_rw = UIO_WRITE;
        UIO_SETUP_SYSSPACE(&auio);
        error = VOP_WRITE(dqvp, &auio, 0, dq->dq_ump->um_cred[dq->dq_type]);
        if (auio.uio_resid && error == 0)
                error = EIO;
        dq->dq_flags &= ~DQ_MOD;
        VOP_UNLOCK(dqvp);
        return (error);
}


















































































































































































   64 















































  414 
  415 

   50 





   50 
   38 


   38 
   38 
   37 
   37 



























  349 

  348 
  349 
  349 
  348 

   15 








  348 

  354 








   56 

   56 
   49 
   53 





   56 





  333 









    1 




    1 



  332 













  331 



  332 




  330 
  332 
  328 





    9 




    9 











    8 
    2 
    8 




    8 
    8 


  329 

  322 
   38 
   38 




  329 




















    1 












   49 
   49 
   49 
   40 





   43 



   43 


   13 








   39 














    2 
    2 

    2 


    2 
    2 



    2 


    2 








  401 









   29 

  318 

  335 















  336 




















  419 

   77 




   77 



   75 

   76 










  321 

  346 



  280 


  280 
  279 









   38 




























    1 

    1 

















































   38 



    4 
   35 






   14 




   14 



   12 

   13 






   11 
   12 


   11 





   10 

    1 






   12 












    1 










    9 
  707 
    9 









    7 

    4 








    1 
    8 
    1 
















































  809 







    6 

    5 
    5 



















    5 









    5 

    5 




    5 





    5 














    5 


    5 


    5 
















































  403 





  408 

    8 


    8 



    8 




































   66 




   52 




   66 





    4 




    2 



   53 



   44 






















   30 

   44 













   37 





   37 













   23 

    4 











    7 



    7 
    7 
    3 


    4 








   45 


   42 
    8 

   34 

    9 
   48 

   48 


   46 







   46 
   47 
   46 



















   48 





   46 


   45 

   37 
   39 






   48 


   49 















   46 
   40 



   38 



   40 



   40 


    7 

    5 



    7 
   42 





    5 

   46 





   49 












   11 



   11 










   11 
   11 
   10 



   10 














































































































































































   50 
   50 

   50 





















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
/*        $NetBSD: kern_ktrace.c,v 1.182 2022/07/01 01:07:56 riastradh Exp $        */

/*-
 * Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_ktrace.c        8.5 (Berkeley) 5/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_ktrace.c,v 1.182 2022/07/01 01:07:56 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/ktrace.h>
#include <sys/kmem.h>
#include <sys/syslog.h>
#include <sys/filedesc.h>
#include <sys/ioctl.h>
#include <sys/callout.h>
#include <sys/kauth.h>
#include <sys/cpu.h>

#include <sys/mount.h>
#include <sys/syscallargs.h>

/*
 * TODO:
 *        - need better error reporting?
 *        - userland utility to sort ktrace.out by timestamp.
 *        - keep minimum information in ktrace_entry when rest of alloc failed.
 *        - per trace control of configurable parameters.
 */

struct ktrace_entry {
        TAILQ_ENTRY(ktrace_entry) kte_list;
        struct        ktr_header kte_kth;
        void        *kte_buf;
        size_t        kte_bufsz;
#define        KTE_SPACE                32
        uint8_t kte_space[KTE_SPACE] __aligned(sizeof(register_t));
};

struct ktr_desc {
        TAILQ_ENTRY(ktr_desc) ktd_list;
        int ktd_flags;
#define        KTDF_WAIT                0x0001
#define        KTDF_DONE                0x0002
#define        KTDF_BLOCKING                0x0004
#define        KTDF_INTERACTIVE        0x0008
        int ktd_error;
#define        KTDE_ENOMEM                0x0001
#define        KTDE_ENOSPC                0x0002
        int ktd_errcnt;
        int ktd_ref;                        /* # of reference */
        int ktd_qcount;                        /* # of entry in the queue */

        /*
         * Params to control behaviour.
         */
        int ktd_delayqcnt;                /* # of entry allowed to delay */
        int ktd_wakedelay;                /* delay of wakeup in *tick* */
        int ktd_intrwakdl;                /* ditto, but when interactive */

        file_t *ktd_fp;                        /* trace output file */
        lwp_t *ktd_lwp;                        /* our kernel thread */
        TAILQ_HEAD(, ktrace_entry) ktd_queue;
        callout_t ktd_wakch;                /* delayed wakeup */
        kcondvar_t ktd_sync_cv;
        kcondvar_t ktd_cv;
};

static void        ktrwrite(struct ktr_desc *, struct ktrace_entry *);
static int        ktrops(lwp_t *, struct proc *, int, int,
                    struct ktr_desc *);
static int        ktrsetchildren(lwp_t *, struct proc *, int, int,
                    struct ktr_desc *);
static int        ktrcanset(lwp_t *, struct proc *);
static int        ktrsamefile(file_t *, file_t *);
static void        ktr_kmem(lwp_t *, int, const void *, size_t);
static void        ktr_io(lwp_t *, int, enum uio_rw, struct iovec *, size_t);

static struct ktr_desc *
                ktd_lookup(file_t *);
static void        ktdrel(struct ktr_desc *);
static void        ktdref(struct ktr_desc *);
static void        ktefree(struct ktrace_entry *);
static void        ktd_logerrl(struct ktr_desc *, int);
static void        ktrace_thread(void *);
static int        ktrderefall(struct ktr_desc *, int);

/*
 * Default values.
 */
#define        KTD_MAXENTRY                1000        /* XXX: tune */
#define        KTD_TIMEOUT                5        /* XXX: tune */
#define        KTD_DELAYQCNT                100        /* XXX: tune */
#define        KTD_WAKEDELAY                5000        /* XXX: tune */
#define        KTD_INTRWAKDL                100        /* XXX: tune */

/*
 * Patchable variables.
 */
int ktd_maxentry = KTD_MAXENTRY;        /* max # of entry in the queue */
int ktd_timeout = KTD_TIMEOUT;                /* timeout in seconds */
int ktd_delayqcnt = KTD_DELAYQCNT;        /* # of entry allowed to delay */
int ktd_wakedelay = KTD_WAKEDELAY;        /* delay of wakeup in *ms* */
int ktd_intrwakdl = KTD_INTRWAKDL;        /* ditto, but when interactive */

kmutex_t ktrace_lock;
int ktrace_on;
static TAILQ_HEAD(, ktr_desc) ktdq = TAILQ_HEAD_INITIALIZER(ktdq);
static pool_cache_t kte_cache;

static kauth_listener_t ktrace_listener;

static void
ktd_wakeup(struct ktr_desc *ktd)
{

        callout_stop(&ktd->ktd_wakch);
        cv_signal(&ktd->ktd_cv);
}

static void
ktd_callout(void *arg)
{

        mutex_enter(&ktrace_lock);
        ktd_wakeup(arg);
        mutex_exit(&ktrace_lock);
}

static void
ktd_logerrl(struct ktr_desc *ktd, int error)
{

        ktd->ktd_error |= error;
        ktd->ktd_errcnt++;
}

#if 0
static void
ktd_logerr(struct proc *p, int error)
{
        struct ktr_desc *ktd;

        KASSERT(mutex_owned(&ktrace_lock));

        ktd = p->p_tracep;
        if (ktd == NULL)
                return;

        ktd_logerrl(ktd, error);
}
#endif

static int
ktrace_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        struct proc *p;
        int result;
        enum kauth_process_req req;

        result = KAUTH_RESULT_DEFER;
        p = arg0;

        if (action != KAUTH_PROCESS_KTRACE)
                return result;

        req = (enum kauth_process_req)(uintptr_t)arg1;

        /* Privileged; secmodel should handle these. */
        if (req == KAUTH_REQ_PROCESS_KTRACE_PERSISTENT)
                return result;

        if ((p->p_traceflag & KTRFAC_PERSISTENT) ||
            (p->p_flag & PK_SUGID))
                return result;

        if (kauth_cred_geteuid(cred) == kauth_cred_getuid(p->p_cred) &&
            kauth_cred_getuid(cred) == kauth_cred_getsvuid(p->p_cred) &&
            kauth_cred_getgid(cred) == kauth_cred_getgid(p->p_cred) &&
            kauth_cred_getgid(cred) == kauth_cred_getsvgid(p->p_cred))
                result = KAUTH_RESULT_ALLOW;

        return result;
}

/*
 * Initialise the ktrace system.
 */
void
ktrinit(void)
{

        mutex_init(&ktrace_lock, MUTEX_DEFAULT, IPL_NONE);
        kte_cache = pool_cache_init(sizeof(struct ktrace_entry), 0, 0, 0,
            "ktrace", &pool_allocator_nointr, IPL_NONE, NULL, NULL, NULL);

        ktrace_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
            ktrace_listener_cb, NULL);
}

/*
 * Release a reference.  Called with ktrace_lock held.
 */
static void
ktdrel(struct ktr_desc *ktd)
{

        KASSERT(mutex_owned(&ktrace_lock));

        KDASSERT(ktd->ktd_ref != 0);
        KASSERT(ktd->ktd_ref > 0);
        KASSERT(ktrace_on > 0);
        ktrace_on--;
        if (--ktd->ktd_ref <= 0) {
                ktd->ktd_flags |= KTDF_DONE;
                cv_signal(&ktd->ktd_cv);
        }
}

static void
ktdref(struct ktr_desc *ktd)
{

        KASSERT(mutex_owned(&ktrace_lock));

        ktd->ktd_ref++;
        ktrace_on++;
}

static struct ktr_desc *
ktd_lookup(file_t *fp)
{
        struct ktr_desc *ktd;

        KASSERT(mutex_owned(&ktrace_lock));

        for (ktd = TAILQ_FIRST(&ktdq); ktd != NULL;
            ktd = TAILQ_NEXT(ktd, ktd_list)) {
                if (ktrsamefile(ktd->ktd_fp, fp)) {
                        ktdref(ktd);
                        break;
                }
        }

        return (ktd);
}

void
ktraddentry(lwp_t *l, struct ktrace_entry *kte, int flags)
{
        struct proc *p = l->l_proc;
        struct ktr_desc *ktd;
#ifdef DEBUG
        struct timeval t1, t2;
#endif

        mutex_enter(&ktrace_lock);

        if (p->p_traceflag & KTRFAC_TRC_EMUL) {
                /* Add emulation trace before first entry for this process */
                p->p_traceflag &= ~KTRFAC_TRC_EMUL;
                mutex_exit(&ktrace_lock);
                ktrexit(l);
                ktremul();
                (void)ktrenter(l);
                mutex_enter(&ktrace_lock);
        }

        /* Tracing may have been cancelled. */
        ktd = p->p_tracep;
        if (ktd == NULL)
                goto freekte;

        /*
         * Bump reference count so that the object will remain while
         * we are here.  Note that the trace is controlled by other
         * process.
         */
        ktdref(ktd);

        if (ktd->ktd_flags & KTDF_DONE)
                goto relktd;

        if (ktd->ktd_qcount > ktd_maxentry) {
                ktd_logerrl(ktd, KTDE_ENOSPC);
                goto relktd;
        }
        TAILQ_INSERT_TAIL(&ktd->ktd_queue, kte, kte_list);
        ktd->ktd_qcount++;
        if (ktd->ktd_flags & KTDF_BLOCKING)
                goto skip_sync;

        if (flags & KTA_WAITOK &&
            (/* flags & KTA_LARGE */0 || ktd->ktd_flags & KTDF_WAIT ||
            ktd->ktd_qcount > ktd_maxentry >> 1))
                /*
                 * Sync with writer thread since we're requesting rather
                 * big one or many requests are pending.
                 */
                do {
                        ktd->ktd_flags |= KTDF_WAIT;
                        ktd_wakeup(ktd);
#ifdef DEBUG
                        getmicrouptime(&t1);
#endif
                        if (cv_timedwait(&ktd->ktd_sync_cv, &ktrace_lock,
                            ktd_timeout * hz) != 0) {
                                ktd->ktd_flags |= KTDF_BLOCKING;
                                /*
                                 * Maybe the writer thread is blocking
                                 * completely for some reason, but
                                 * don't stop target process forever.
                                 */
                                log(LOG_NOTICE, "ktrace timeout\n");
                                break;
                        }
#ifdef DEBUG
                        getmicrouptime(&t2);
                        timersub(&t2, &t1, &t2);
                        if (t2.tv_sec > 0)
                                log(LOG_NOTICE,
                                    "ktrace long wait: %lld.%06ld\n",
                                    (long long)t2.tv_sec, (long)t2.tv_usec);
#endif
                } while (p->p_tracep == ktd &&
                    (ktd->ktd_flags & (KTDF_WAIT | KTDF_DONE)) == KTDF_WAIT);
        else {
                /* Schedule delayed wakeup */
                if (ktd->ktd_qcount > ktd->ktd_delayqcnt)
                        ktd_wakeup(ktd);        /* Wakeup now */
                else if (!callout_pending(&ktd->ktd_wakch))
                        callout_reset(&ktd->ktd_wakch,
                            ktd->ktd_flags & KTDF_INTERACTIVE ?
                            ktd->ktd_intrwakdl : ktd->ktd_wakedelay,
                            ktd_callout, ktd);
        }

skip_sync:
        ktdrel(ktd);
        mutex_exit(&ktrace_lock);
        ktrexit(l);
        return;

relktd:
        ktdrel(ktd);

freekte:
        mutex_exit(&ktrace_lock);
        ktefree(kte);
        ktrexit(l);
}

static void
ktefree(struct ktrace_entry *kte)
{

        if (kte->kte_buf != kte->kte_space)
                kmem_free(kte->kte_buf, kte->kte_bufsz);
        pool_cache_put(kte_cache, kte);
}

/*
 * "deep" compare of two files for the purposes of clearing a trace.
 * Returns true if they're the same open file, or if they point at the
 * same underlying vnode/socket.
 */

static int
ktrsamefile(file_t *f1, file_t *f2)
{

        return ((f1 == f2) ||
            ((f1 != NULL) && (f2 != NULL) &&
                (f1->f_type == f2->f_type) &&
                (f1->f_data == f2->f_data)));
}

void
ktrderef(struct proc *p)
{
        struct ktr_desc *ktd = p->p_tracep;

        KASSERT(mutex_owned(&ktrace_lock));

        p->p_traceflag = 0;
        if (ktd == NULL)
                return;
        p->p_tracep = NULL;

        cv_broadcast(&ktd->ktd_sync_cv);
        ktdrel(ktd);
}

void
ktradref(struct proc *p)
{
        struct ktr_desc *ktd = p->p_tracep;

        KASSERT(mutex_owned(&ktrace_lock));

        ktdref(ktd);
}

static int
ktrderefall(struct ktr_desc *ktd, int auth)
{
        lwp_t *curl = curlwp;
        struct proc *p;
        int error = 0;

        mutex_enter(&proc_lock);
        PROCLIST_FOREACH(p, &allproc) {
                if (p->p_tracep != ktd)
                        continue;
                mutex_enter(p->p_lock);
                mutex_enter(&ktrace_lock);
                if (p->p_tracep == ktd) {
                        if (!auth || ktrcanset(curl, p))
                                ktrderef(p);
                        else
                                error = EPERM;
                }
                mutex_exit(&ktrace_lock);
                mutex_exit(p->p_lock);
        }
        mutex_exit(&proc_lock);

        return error;
}

int
ktealloc(struct ktrace_entry **ktep, void **bufp, lwp_t *l, int type,
         size_t sz)
{
        struct proc *p = l->l_proc;
        struct ktrace_entry *kte;
        struct ktr_header *kth;
        void *buf;

        if (ktrenter(l))
                return EAGAIN;

        kte = pool_cache_get(kte_cache, PR_WAITOK);
        if (sz > sizeof(kte->kte_space)) {
                buf = kmem_alloc(sz, KM_SLEEP);
        } else
                buf = kte->kte_space;

        kte->kte_bufsz = sz;
        kte->kte_buf = buf;

        kth = &kte->kte_kth;
        (void)memset(kth, 0, sizeof(*kth));
        kth->ktr_len = sz;
        kth->ktr_type = type;
        kth->ktr_pid = p->p_pid;
        memcpy(kth->ktr_comm, p->p_comm, MAXCOMLEN);
        kth->ktr_version = KTRFAC_VERSION(p->p_traceflag);
        kth->ktr_lid = l->l_lid;
        nanotime(&kth->ktr_ts);

        *ktep = kte;
        *bufp = buf;

        return 0;
}

void
ktesethdrlen(struct ktrace_entry *kte, size_t l)
{
        kte->kte_kth.ktr_len = l;
}

void
ktr_syscall(register_t code, const register_t args[], int narg)
{
        lwp_t *l = curlwp;
        struct proc *p = l->l_proc;
        struct ktrace_entry *kte;
        struct ktr_syscall *ktp;
        register_t *argp;
        size_t len;
        u_int i;

        if (!KTRPOINT(p, KTR_SYSCALL))
                return;

        len = sizeof(struct ktr_syscall) + narg * sizeof argp[0];

        if (ktealloc(&kte, (void *)&ktp, l, KTR_SYSCALL, len))
                return;

        ktp->ktr_code = code;
        ktp->ktr_argsize = narg * sizeof argp[0];
        argp = (register_t *)(ktp + 1);
        for (i = 0; i < narg; i++)
                *argp++ = args[i];

        ktraddentry(l, kte, KTA_WAITOK);
}

void
ktr_sysret(register_t code, int error, register_t *retval)
{
        lwp_t *l = curlwp;
        struct ktrace_entry *kte;
        struct ktr_sysret *ktp;

        if (!KTRPOINT(l->l_proc, KTR_SYSRET))
                return;

        if (ktealloc(&kte, (void *)&ktp, l, KTR_SYSRET,
            sizeof(struct ktr_sysret)))
                return;

        ktp->ktr_code = code;
        ktp->ktr_eosys = 0;                        /* XXX unused */
        ktp->ktr_error = error;
        ktp->ktr_retval = retval && error == 0 ? retval[0] : 0;
        ktp->ktr_retval_1 = retval && error == 0 ? retval[1] : 0;

        ktraddentry(l, kte, KTA_WAITOK);
}

void
ktr_namei(const char *path, size_t pathlen)
{
        lwp_t *l = curlwp;

        if (!KTRPOINT(l->l_proc, KTR_NAMEI))
                return;

        ktr_kmem(l, KTR_NAMEI, path, pathlen);
}

void
ktr_namei2(const char *eroot, size_t erootlen,
          const char *path, size_t pathlen)
{
        lwp_t *l = curlwp;
        struct ktrace_entry *kte;
        void *buf;

        if (!KTRPOINT(l->l_proc, KTR_NAMEI))
                return;

        if (ktealloc(&kte, &buf, l, KTR_NAMEI, erootlen + pathlen))
                return;
        memcpy(buf, eroot, erootlen);
        buf = (char *)buf + erootlen;
        memcpy(buf, path, pathlen);
        ktraddentry(l, kte, KTA_WAITOK);
}

void
ktr_emul(void)
{
        lwp_t *l = curlwp;
        const char *emul = l->l_proc->p_emul->e_name;

        if (!KTRPOINT(l->l_proc, KTR_EMUL))
                return;

        ktr_kmem(l, KTR_EMUL, emul, strlen(emul));
}

void
ktr_execarg(const void *bf, size_t len)
{
        lwp_t *l = curlwp;

        if (!KTRPOINT(l->l_proc, KTR_EXEC_ARG))
                return;

        ktr_kmem(l, KTR_EXEC_ARG, bf, len);
}

void
ktr_execenv(const void *bf, size_t len)
{
        lwp_t *l = curlwp;

        if (!KTRPOINT(l->l_proc, KTR_EXEC_ENV))
                return;

        ktr_kmem(l, KTR_EXEC_ENV, bf, len);
}

void
ktr_execfd(int fd, u_int dtype)
{
        struct ktrace_entry *kte;
        struct ktr_execfd* ktp;

        lwp_t *l = curlwp;

        if (!KTRPOINT(l->l_proc, KTR_EXEC_FD))
                return;

        if (ktealloc(&kte, (void *)&ktp, l, KTR_EXEC_FD, sizeof(*ktp)))
                return;

        ktp->ktr_fd = fd;
        ktp->ktr_dtype = dtype;
        ktraddentry(l, kte, KTA_WAITOK);
}

static void
ktr_kmem(lwp_t *l, int type, const void *bf, size_t len)
{
        struct ktrace_entry *kte;
        void *buf;

        if (ktealloc(&kte, &buf, l, type, len))
                return;
        memcpy(buf, bf, len);
        ktraddentry(l, kte, KTA_WAITOK);
}

static void
ktr_io(lwp_t *l, int fd, enum uio_rw rw, struct iovec *iov, size_t len)
{
        struct ktrace_entry *kte;
        struct ktr_genio *ktp;
        size_t resid = len, cnt, buflen;
        char *cp;

 next:
        buflen = uimin(PAGE_SIZE, resid + sizeof(struct ktr_genio));

        if (ktealloc(&kte, (void *)&ktp, l, KTR_GENIO, buflen))
                return;

        ktp->ktr_fd = fd;
        ktp->ktr_rw = rw;

        cp = (void *)(ktp + 1);
        buflen -= sizeof(struct ktr_genio);
        kte->kte_kth.ktr_len = sizeof(struct ktr_genio);

        while (buflen > 0) {
                cnt = uimin(iov->iov_len, buflen);
                if (copyin(iov->iov_base, cp, cnt) != 0)
                        goto out;
                kte->kte_kth.ktr_len += cnt;
                cp += cnt;
                buflen -= cnt;
                resid -= cnt;
                iov->iov_len -= cnt;
                if (iov->iov_len == 0)
                        iov++;
                else
                        iov->iov_base = (char *)iov->iov_base + cnt;
        }

        /*
         * Don't push so many entry at once.  It will cause kmem map
         * shortage.
         */
        ktraddentry(l, kte, KTA_WAITOK | KTA_LARGE);
        if (resid > 0) {
                if (preempt_needed()) {
                        (void)ktrenter(l);
                        preempt();
                        ktrexit(l);
                }

                goto next;
        }

        return;

out:
        ktefree(kte);
        ktrexit(l);
}

void
ktr_genio(int fd, enum uio_rw rw, const void *addr, size_t len, int error)
{
        lwp_t *l = curlwp;
        struct iovec iov;

        if (!KTRPOINT(l->l_proc, KTR_GENIO) || error != 0)
                return;
        iov.iov_base = __UNCONST(addr);
        iov.iov_len = len;
        ktr_io(l, fd, rw, &iov, len);
}

void
ktr_geniov(int fd, enum uio_rw rw, struct iovec *iov, size_t len, int error)
{
        lwp_t *l = curlwp;

        if (!KTRPOINT(l->l_proc, KTR_GENIO) || error != 0)
                return;
        ktr_io(l, fd, rw, iov, len);
}

void
ktr_mibio(int fd, enum uio_rw rw, const void *addr, size_t len, int error)
{
        lwp_t *l = curlwp;
        struct iovec iov;

        if (!KTRPOINT(l->l_proc, KTR_MIB) || error != 0)
                return;
        iov.iov_base = __UNCONST(addr);
        iov.iov_len = len;
        ktr_io(l, fd, rw, &iov, len);
}

void
ktr_psig(int sig, sig_t action, const sigset_t *mask,
         const ksiginfo_t *ksi)
{
        struct ktrace_entry *kte;
        lwp_t *l = curlwp;
        struct {
                struct ktr_psig        kp;
                siginfo_t        si;
        } *kbuf;

        if (!KTRPOINT(l->l_proc, KTR_PSIG))
                return;

        if (ktealloc(&kte, (void *)&kbuf, l, KTR_PSIG, sizeof(*kbuf)))
                return;

        memset(&kbuf->kp, 0, sizeof(kbuf->kp));
        kbuf->kp.signo = (char)sig;
        kbuf->kp.action = action;
        kbuf->kp.mask = *mask;

        if (ksi) {
                kbuf->kp.code = KSI_TRAPCODE(ksi);
                (void)memset(&kbuf->si, 0, sizeof(kbuf->si));
                kbuf->si._info = ksi->ksi_info;
                kte->kte_kth.ktr_len = sizeof(*kbuf);
        } else {
                kbuf->kp.code = 0;
                kte->kte_kth.ktr_len = sizeof(struct ktr_psig);
        }

        ktraddentry(l, kte, KTA_WAITOK);
}

void
ktr_csw(int out, int user, const struct syncobj *syncobj)
{
        lwp_t *l = curlwp;
        struct proc *p = l->l_proc;
        struct ktrace_entry *kte;
        struct ktr_csw *kc;

        if (!KTRPOINT(p, KTR_CSW))
                return;

        /*
         * Don't record context switches resulting from blocking on
         * locks; the results are not useful, and the mutex may be in a
         * softint, which would lead us to ktealloc in softint context,
         * which is forbidden.
         */
        if (syncobj == &mutex_syncobj || syncobj == &rw_syncobj)
                return;
        KASSERT(!cpu_intr_p());
        KASSERT(!cpu_softintr_p());

        /*
         * We can't sleep if we're already going to sleep (if original
         * condition is met during sleep, we hang up).
         *
         * XXX This is not ideal: it would be better to maintain a pool
         * of ktes and actually push this to the kthread when context
         * switch happens, however given the points where we are called
         * from that is difficult to do.
         */
        if (out) {
                if (ktrenter(l))
                        return;

                nanotime(&l->l_ktrcsw);
                l->l_pflag |= LP_KTRCSW;
                if (user)
                        l->l_pflag |= LP_KTRCSWUSER;
                else
                        l->l_pflag &= ~LP_KTRCSWUSER;

                ktrexit(l);
                return;
        }

        /*
         * On the way back in, we need to record twice: once for entry, and
         * once for exit.
         */
        if ((l->l_pflag & LP_KTRCSW) != 0) {
                struct timespec *ts;
                l->l_pflag &= ~LP_KTRCSW;

                if (ktealloc(&kte, (void *)&kc, l, KTR_CSW, sizeof(*kc)))
                        return;

                kc->out = 1;
                kc->user = ((l->l_pflag & LP_KTRCSWUSER) != 0);

                ts = &l->l_ktrcsw;
                switch (KTRFAC_VERSION(p->p_traceflag)) {
                case 0:
                        kte->kte_kth.ktr_otv.tv_sec = ts->tv_sec;
                        kte->kte_kth.ktr_otv.tv_usec = ts->tv_nsec / 1000;
                        break;
                case 1:
                        kte->kte_kth.ktr_ots.tv_sec = ts->tv_sec;
                        kte->kte_kth.ktr_ots.tv_nsec = ts->tv_nsec;
                        break;
                case 2:
                        kte->kte_kth.ktr_ts.tv_sec = ts->tv_sec;
                        kte->kte_kth.ktr_ts.tv_nsec = ts->tv_nsec;
                        break;
                default:
                        break;
                }

                ktraddentry(l, kte, KTA_WAITOK);
        }

        if (ktealloc(&kte, (void *)&kc, l, KTR_CSW, sizeof(*kc)))
                return;

        kc->out = 0;
        kc->user = user;

        ktraddentry(l, kte, KTA_WAITOK);
}

bool
ktr_point(int fac_bit)
{
        return curlwp->l_proc->p_traceflag & fac_bit;
}

int
ktruser(const char *id, void *addr, size_t len, int ustr)
{
        struct ktrace_entry *kte;
        struct ktr_user *ktp;
        lwp_t *l = curlwp;
        void *user_dta;
        int error;

        if (!KTRPOINT(l->l_proc, KTR_USER))
                return 0;

        if (len > KTR_USER_MAXLEN)
                return ENOSPC;

        error = ktealloc(&kte, (void *)&ktp, l, KTR_USER, sizeof(*ktp) + len);
        if (error != 0)
                return error;

        if (ustr) {
                if (copyinstr(id, ktp->ktr_id, KTR_USER_MAXIDLEN, NULL) != 0)
                        ktp->ktr_id[0] = '\0';
        } else
                strncpy(ktp->ktr_id, id, KTR_USER_MAXIDLEN);
        ktp->ktr_id[KTR_USER_MAXIDLEN-1] = '\0';

        user_dta = (void *)(ktp + 1);
        if ((error = copyin(addr, user_dta, len)) != 0)
                kte->kte_kth.ktr_len = 0;

        ktraddentry(l, kte, KTA_WAITOK);
        return error;
}

void
ktr_kuser(const char *id, const void *addr, size_t len)
{
        struct ktrace_entry *kte;
        struct ktr_user *ktp;
        lwp_t *l = curlwp;
        int error;

        if (!KTRPOINT(l->l_proc, KTR_USER))
                return;

        if (len > KTR_USER_MAXLEN)
                return;

        error = ktealloc(&kte, (void *)&ktp, l, KTR_USER, sizeof(*ktp) + len);
        if (error != 0)
                return;

        strncpy(ktp->ktr_id, id, KTR_USER_MAXIDLEN - 1);
        ktp->ktr_id[KTR_USER_MAXIDLEN - 1] = '\0';

        memcpy(ktp + 1, addr, len);

        ktraddentry(l, kte, KTA_WAITOK);
}

void
ktr_mib(const int *name, u_int namelen)
{
        struct ktrace_entry *kte;
        int *namep;
        size_t size;
        lwp_t *l = curlwp;

        if (!KTRPOINT(l->l_proc, KTR_MIB))
                return;

        size = namelen * sizeof(*name);

        if (ktealloc(&kte, (void *)&namep, l, KTR_MIB, size))
                return;

        (void)memcpy(namep, name, namelen * sizeof(*name));

        ktraddentry(l, kte, KTA_WAITOK);
}

/* Interface and common routines */

int
ktrace_common(lwp_t *curl, int ops, int facs, int pid, file_t **fpp)
{
        struct proc *p;
        struct pgrp *pg;
        struct ktr_desc *ktd = NULL, *nktd;
        file_t *fp = *fpp;
        int ret = 0;
        int error = 0;
        int descend;

        descend = ops & KTRFLAG_DESCEND;
        facs = facs & ~((unsigned) KTRFAC_PERSISTENT);

        (void)ktrenter(curl);

        switch (KTROP(ops)) {

        case KTROP_CLEARFILE:
                /*
                 * Clear all uses of the tracefile
                 */
                mutex_enter(&ktrace_lock);
                ktd = ktd_lookup(fp);
                mutex_exit(&ktrace_lock);
                if (ktd == NULL)
                        goto done;
                error = ktrderefall(ktd, 1);
                goto done;

        case KTROP_SET:
                mutex_enter(&ktrace_lock);
                ktd = ktd_lookup(fp);
                mutex_exit(&ktrace_lock);
                if (ktd == NULL) {
                        nktd = kmem_alloc(sizeof(*nktd), KM_SLEEP);
                        TAILQ_INIT(&nktd->ktd_queue);
                        callout_init(&nktd->ktd_wakch, CALLOUT_MPSAFE);
                        cv_init(&nktd->ktd_cv, "ktrwait");
                        cv_init(&nktd->ktd_sync_cv, "ktrsync");
                        nktd->ktd_flags = 0;
                        nktd->ktd_qcount = 0;
                        nktd->ktd_error = 0;
                        nktd->ktd_errcnt = 0;
                        nktd->ktd_delayqcnt = ktd_delayqcnt;
                        nktd->ktd_wakedelay = mstohz(ktd_wakedelay);
                        nktd->ktd_intrwakdl = mstohz(ktd_intrwakdl);
                        nktd->ktd_ref = 0;
                        nktd->ktd_fp = fp;
                        mutex_enter(&ktrace_lock);
                        ktdref(nktd);
                        mutex_exit(&ktrace_lock);

                        /*
                         * XXX: not correct.  needs an way to detect
                         * whether ktruss or ktrace.
                         */
                        if (fp->f_type == DTYPE_PIPE)
                                nktd->ktd_flags |= KTDF_INTERACTIVE;

                        mutex_enter(&fp->f_lock);
                        fp->f_count++;
                        mutex_exit(&fp->f_lock);
                        error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
                            ktrace_thread, nktd, &nktd->ktd_lwp, "ktrace");
                        if (error != 0) {
                                kmem_free(nktd, sizeof(*nktd));
                                nktd = NULL;
                                mutex_enter(&fp->f_lock);
                                fp->f_count--;
                                mutex_exit(&fp->f_lock);
                                goto done;
                        }

                        mutex_enter(&ktrace_lock);
                        ktd = ktd_lookup(fp);
                        if (ktd != NULL) {
                                ktdrel(nktd);
                                nktd = NULL;
                        } else {
                                TAILQ_INSERT_TAIL(&ktdq, nktd, ktd_list);
                                ktd = nktd;
                        }
                        mutex_exit(&ktrace_lock);
                }
                break;

        case KTROP_CLEAR:
                break;
        }

        /*
         * need something to (un)trace (XXX - why is this here?)
         */
        if (!facs) {
                error = EINVAL;
                *fpp = NULL;
                goto done;
        }

        /*
         * do it
         */
        mutex_enter(&proc_lock);
        if (pid < 0) {
                /*
                 * by process group
                 */
                pg = pgrp_find(-pid);
                if (pg == NULL)
                        error = ESRCH;
                else {
                        LIST_FOREACH(p, &pg->pg_members, p_pglist) {
                                if (descend)
                                        ret |= ktrsetchildren(curl, p, ops,
                                            facs, ktd);
                                else
                                        ret |= ktrops(curl, p, ops, facs,
                                            ktd);
                        }
                }

        } else {
                /*
                 * by pid
                 */
                p = proc_find(pid);
                if (p == NULL)
                        error = ESRCH;
                else if (descend)
                        ret |= ktrsetchildren(curl, p, ops, facs, ktd);
                else
                        ret |= ktrops(curl, p, ops, facs, ktd);
        }
        mutex_exit(&proc_lock);
        if (error == 0 && !ret)
                error = EPERM;
        *fpp = NULL;
done:
        if (ktd != NULL) {
                mutex_enter(&ktrace_lock);
                if (error != 0) {
                        /*
                         * Wakeup the thread so that it can be die if we
                         * can't trace any process.
                         */
                        ktd_wakeup(ktd);
                }
                if (KTROP(ops) == KTROP_SET || KTROP(ops) == KTROP_CLEARFILE)
                        ktdrel(ktd);
                mutex_exit(&ktrace_lock);
        }
        ktrexit(curl);
        return (error);
}

/*
 * fktrace system call
 */
/* ARGSUSED */
int
sys_fktrace(struct lwp *l, const struct sys_fktrace_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(int) ops;
                syscallarg(int) facs;
                syscallarg(int) pid;
        } */
        file_t *fp;
        int error, fd;

        fd = SCARG(uap, fd);
        if ((fp = fd_getfile(fd)) == NULL)
                return (EBADF);
        if ((fp->f_flag & FWRITE) == 0)
                error = EBADF;
        else
                error = ktrace_common(l, SCARG(uap, ops),
                    SCARG(uap, facs), SCARG(uap, pid), &fp);
        fd_putfile(fd);
        return error;
}

static int
ktrops(lwp_t *curl, struct proc *p, int ops, int facs,
    struct ktr_desc *ktd)
{
        int vers = ops & KTRFAC_VER_MASK;
        int error = 0;

        mutex_enter(p->p_lock);
        mutex_enter(&ktrace_lock);

        if (!ktrcanset(curl, p))
                goto out;

        switch (vers) {
        case KTRFACv0:
        case KTRFACv1:
        case KTRFACv2:
                break;
        default:
                error = EINVAL;
                goto out;
        }

        if (KTROP(ops) == KTROP_SET) {
                if (p->p_tracep != ktd) {
                        /*
                         * if trace file already in use, relinquish
                         */
                        ktrderef(p);
                        p->p_tracep = ktd;
                        ktradref(p);
                }
                p->p_traceflag |= facs;
                if (kauth_authorize_process(curl->l_cred, KAUTH_PROCESS_KTRACE,
                    p, KAUTH_ARG(KAUTH_REQ_PROCESS_KTRACE_PERSISTENT), NULL,
                    NULL) == 0)
                        p->p_traceflag |= KTRFAC_PERSISTENT;
        } else {
                /* KTROP_CLEAR */
                if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
                        /* no more tracing */
                        ktrderef(p);
                }
        }

        if (p->p_traceflag)
                p->p_traceflag |= vers;
        /*
         * Emit an emulation record, every time there is a ktrace
         * change/attach request.
         */
        if (KTRPOINT(p, KTR_EMUL))
                p->p_traceflag |= KTRFAC_TRC_EMUL;

        p->p_trace_enabled = trace_is_enabled(p);
#ifdef __HAVE_SYSCALL_INTERN
        (*p->p_emul->e_syscall_intern)(p);
#endif

 out:
        mutex_exit(&ktrace_lock);
        mutex_exit(p->p_lock);

        return error ? 0 : 1;
}

static int
ktrsetchildren(lwp_t *curl, struct proc *top, int ops, int facs,
    struct ktr_desc *ktd)
{
        struct proc *p;
        int ret = 0;

        KASSERT(mutex_owned(&proc_lock));

        p = top;
        for (;;) {
                ret |= ktrops(curl, p, ops, facs, ktd);
                /*
                 * If this process has children, descend to them next,
                 * otherwise do any siblings, and if done with this level,
                 * follow back up the tree (but not past top).
                 */
                if (LIST_FIRST(&p->p_children) != NULL) {
                        p = LIST_FIRST(&p->p_children);
                        continue;
                }
                for (;;) {
                        if (p == top)
                                return (ret);
                        if (LIST_NEXT(p, p_sibling) != NULL) {
                                p = LIST_NEXT(p, p_sibling);
                                break;
                        }
                        p = p->p_pptr;
                }
        }
        /*NOTREACHED*/
}

static void
ktrwrite(struct ktr_desc *ktd, struct ktrace_entry *kte)
{
        size_t hlen;
        struct uio auio;
        struct iovec aiov[64], *iov;
        struct ktrace_entry *top = kte;
        struct ktr_header *kth;
        file_t *fp = ktd->ktd_fp;
        int error;
next:
        auio.uio_iov = iov = &aiov[0];
        auio.uio_offset = 0;
        auio.uio_rw = UIO_WRITE;
        auio.uio_resid = 0;
        auio.uio_iovcnt = 0;
        UIO_SETUP_SYSSPACE(&auio);
        do {
                struct timespec ts;
                lwpid_t lid;
                kth = &kte->kte_kth;

                hlen = sizeof(struct ktr_header);
                switch (kth->ktr_version) {
                case 0:
                        ts = kth->ktr_time;

                        kth->ktr_otv.tv_sec = ts.tv_sec;
                        kth->ktr_otv.tv_usec = ts.tv_nsec / 1000;
                        kth->ktr_unused = NULL;
                        hlen -= sizeof(kth->_v) -
                            MAX(sizeof(kth->_v._v0), sizeof(kth->_v._v1));
                        break;
                case 1:
                        ts = kth->ktr_time;
                        lid = kth->ktr_lid;

                        kth->ktr_ots.tv_sec = ts.tv_sec;
                        kth->ktr_ots.tv_nsec = ts.tv_nsec;
                        kth->ktr_olid = lid;
                        hlen -= sizeof(kth->_v) -
                            MAX(sizeof(kth->_v._v0), sizeof(kth->_v._v1));
                        break;
                }
                iov->iov_base = (void *)kth;
                iov++->iov_len = hlen;
                auio.uio_resid += hlen;
                auio.uio_iovcnt++;
                if (kth->ktr_len > 0) {
                        iov->iov_base = kte->kte_buf;
                        iov++->iov_len = kth->ktr_len;
                        auio.uio_resid += kth->ktr_len;
                        auio.uio_iovcnt++;
                }
        } while ((kte = TAILQ_NEXT(kte, kte_list)) != NULL &&
            auio.uio_iovcnt < sizeof(aiov) / sizeof(aiov[0]) - 1);

again:
        error = (*fp->f_ops->fo_write)(fp, &fp->f_offset, &auio,
            fp->f_cred, FOF_UPDATE_OFFSET);
        switch (error) {

        case 0:
                if (auio.uio_resid > 0)
                        goto again;
                if (kte != NULL)
                        goto next;
                break;

        case EWOULDBLOCK:
                kpause("ktrzzz", false, 1, NULL);
                goto again;

        default:
                /*
                 * If error encountered, give up tracing on this
                 * vnode.  Don't report EPIPE as this can easily
                 * happen with fktrace()/ktruss.
                 */
#ifndef DEBUG
                if (error != EPIPE)
#endif
                        log(LOG_NOTICE,
                            "ktrace write failed, errno %d, tracing stopped\n",
                            error);
                (void)ktrderefall(ktd, 0);
        }

        while ((kte = top) != NULL) {
                top = TAILQ_NEXT(top, kte_list);
                ktefree(kte);
        }
}

static void
ktrace_thread(void *arg)
{
        struct ktr_desc *ktd = arg;
        file_t *fp = ktd->ktd_fp;
        struct ktrace_entry *kte;
        int ktrerr, errcnt;

        mutex_enter(&ktrace_lock);
        for (;;) {
                kte = TAILQ_FIRST(&ktd->ktd_queue);
                if (kte == NULL) {
                        if (ktd->ktd_flags & KTDF_WAIT) {
                                ktd->ktd_flags &= ~(KTDF_WAIT | KTDF_BLOCKING);
                                cv_broadcast(&ktd->ktd_sync_cv);
                        }
                        if (ktd->ktd_ref == 0)
                                break;
                        cv_wait(&ktd->ktd_cv, &ktrace_lock);
                        continue;
                }
                TAILQ_INIT(&ktd->ktd_queue);
                ktd->ktd_qcount = 0;
                ktrerr = ktd->ktd_error;
                errcnt = ktd->ktd_errcnt;
                ktd->ktd_error = ktd->ktd_errcnt = 0;
                mutex_exit(&ktrace_lock);

                if (ktrerr) {
                        log(LOG_NOTICE,
                            "ktrace failed, fp %p, error 0x%x, total %d\n",
                            fp, ktrerr, errcnt);
                }
                ktrwrite(ktd, kte);
                mutex_enter(&ktrace_lock);
        }

        if (ktd_lookup(ktd->ktd_fp) == ktd) {
                TAILQ_REMOVE(&ktdq, ktd, ktd_list);
        } else {
                /* nothing, collision in KTROP_SET */
        }

        callout_halt(&ktd->ktd_wakch, &ktrace_lock);
        callout_destroy(&ktd->ktd_wakch);
        mutex_exit(&ktrace_lock);

        /*
         * ktrace file descriptor can't be watched (are not visible to
         * userspace), so no kqueue stuff here
         * XXX: The above comment is wrong, because the fktrace file
         * descriptor is available in userland.
         */
        closef(fp);

        cv_destroy(&ktd->ktd_sync_cv);
        cv_destroy(&ktd->ktd_cv);

        kmem_free(ktd, sizeof(*ktd));

        kthread_exit(0);
}

/*
 * Return true if caller has permission to set the ktracing state
 * of target.  Essentially, the target can't possess any
 * more permissions than the caller.  KTRFAC_PERSISTENT signifies that
 * the tracing will persist on sugid processes during exec; it is only
 * settable by a process with appropriate credentials.
 *
 * TODO: check groups.  use caller effective gid.
 */
static int
ktrcanset(lwp_t *calll, struct proc *targetp)
{
        KASSERT(mutex_owned(targetp->p_lock));
        KASSERT(mutex_owned(&ktrace_lock));

        if (kauth_authorize_process(calll->l_cred, KAUTH_PROCESS_KTRACE,
            targetp, NULL, NULL, NULL) == 0)
                return (1);

        return (0);
}

/*
 * Put user defined entry to ktrace records.
 */
int
sys_utrace(struct lwp *l, const struct sys_utrace_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) label;
                syscallarg(void *) addr;
                syscallarg(size_t) len;
        } */

        return ktruser(SCARG(uap, label), SCARG(uap, addr),
            SCARG(uap, len), 1);
}



















































































































    4 



















    4 

























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
/*        $NetBSD: ocryptodev.c,v 1.17 2022/03/12 17:15:04 riastradh Exp $ */
/*        $FreeBSD: src/sys/opencrypto/cryptodev.c,v 1.4.2.4 2003/06/03 00:09:02 sam Exp $        */
/*        $OpenBSD: cryptodev.c,v 1.53 2002/07/10 22:21:30 mickey Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Coyote Point Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2001 Theo de Raadt
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in the
 *   documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *   derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Effort sponsored in part by the Defense Advanced Research Projects
 * Agency (DARPA) and Air Force Research Laboratory, Air Force
 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
 *
 */

/*
 * Implement backward compatibility IOCTLs in this module.
 *
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ocryptodev.c,v 1.17 2022/03/12 17:15:04 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/pool.h>
#include <sys/sysctl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/errno.h>
#include <sys/md5.h>
#include <sys/sha1.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/kauth.h>
#include <sys/select.h>
#include <sys/poll.h>
#include <sys/atomic.h>
#include <sys/compat_stub.h> 
#include <sys/module.h>

#ifdef _KERNEL_OPT
#include "opt_ocf.h"
#endif

#include <opencrypto/cryptodev.h>
#include <opencrypto/cryptodev_internal.h>
#include <opencrypto/ocryptodev.h>
#include <opencrypto/xform.h>

static int        ocryptodev_op(struct csession *, struct ocrypt_op *,
                    struct lwp *);
static int        ocryptodev_mop(struct fcrypt *, struct ocrypt_n_op *, int,
                    struct lwp *);
static int        ocryptodev_session(struct fcrypt *, struct osession_op *);
static int        ocryptodev_msession(struct fcrypt *, struct osession_n_op *,
                    int);

int
ocryptof_ioctl(struct file *fp, u_long cmd, void *data)
{
        struct fcrypt *fcr = fp->f_fcrypt;
        struct csession *cse;
        struct osession_op *osop;
        struct osession_n_op *osnop;
        struct ocrypt_op *ocop;
        struct ocrypt_mop *omop;
        struct ocrypt_n_op *ocnop;
        struct ocrypt_sgop *osgop;

        int error = 0;

        switch (cmd) {
        case OCIOCGSESSION:
                osop = (struct osession_op *)data;
                error = ocryptodev_session(fcr, osop);
                break;
        case CIOCNGSESSION:
                osgop = (struct ocrypt_sgop *)data;
                if ((osgop->count <= 0) ||
                    (SIZE_MAX/sizeof(struct osession_n_op) < osgop->count)) {
                        error = EINVAL;
                        break;
                }
                osnop = kmem_alloc((osgop->count *
                                  sizeof(struct osession_n_op)), KM_SLEEP);
                error = copyin(osgop->sessions, osnop, osgop->count *
                               sizeof(struct osession_n_op));
                if (error) {
                        goto mbail;
                }

                error = ocryptodev_msession(fcr, osnop, osgop->count);
                if (error) {
                        goto mbail;
                }

                error = copyout(osnop, osgop->sessions, osgop->count *
                    sizeof(struct osession_n_op));
mbail:
                kmem_free(osnop, osgop->count * sizeof(struct osession_n_op));
                break;
        case OCIOCCRYPT:
                mutex_enter(&cryptodev_mtx);
                ocop = (struct ocrypt_op *)data;
                cse = cryptodev_csefind(fcr, ocop->ses);
                mutex_exit(&cryptodev_mtx);
                if (cse == NULL) {
                        DPRINTF("csefind failed\n");
                        return EINVAL;
                }
                error = ocryptodev_op(cse, ocop, curlwp);
                DPRINTF("ocryptodev_op error = %d\n", error);
                break;
        case OCIOCNCRYPTM:
                omop = (struct ocrypt_mop *)data;
                if (omop->count <= 0 || omop->count > 1) {
                        error = EINVAL;
                        break;
                }
                ocnop = kmem_alloc((omop->count * sizeof(struct ocrypt_n_op)),
                    KM_SLEEP);
                error = copyin(omop->reqs, ocnop,
                    (omop->count * sizeof(struct ocrypt_n_op)));
                if(!error) {
                        error = ocryptodev_mop(fcr, ocnop, omop->count,
                            curlwp);
                        if (!error) {
                                error = copyout(ocnop, omop->reqs, 
                                    (omop->count * sizeof(struct ocrypt_n_op)));
                        }
                }
                kmem_free(ocnop, (omop->count * sizeof(struct ocrypt_n_op)));
                break;        
        default:
                DPRINTF("invalid ioctl cmd 0x%lx\n", cmd);
                return EINVAL;
        }
        return error;
}


static int
ocryptodev_op(struct csession *cse, struct ocrypt_op *ocop, struct lwp *l)
{
        struct crypt_op cop;

        cop.ses = ocop->ses;
        cop.op = ocop->op;
        cop.flags = ocop->flags;
        cop.len = ocop->len;
        cop.src = ocop->src;
        cop.dst = ocop->dst;
        cop.mac = ocop->mac;
        cop.iv = ocop->iv;
        cop.dst_len = 0;

        return cryptodev_op(cse, &cop, l);
};

static int 
ocryptodev_mop(struct fcrypt *fcr, struct ocrypt_n_op *ocnop, int count,
    struct lwp *l)
{
        int res;

        struct crypt_n_op cnop;

        cnop.ses = ocnop->ses;
        cnop.op = ocnop->op;
        cnop.flags = ocnop->flags;
        cnop.len = ocnop->len;
        cnop.reqid = ocnop->reqid;
        cnop.status = ocnop->status;
        cnop.opaque = ocnop->opaque;
        cnop.keylen = ocnop->keylen;
        cnop.key = ocnop->key;
        cnop.mackeylen = ocnop->mackeylen;
        cnop.mackey = ocnop->mackey;
        cnop.src = ocnop->src;
        cnop.dst = ocnop->dst;
        cnop.mac = ocnop->mac;
        cnop.iv = ocnop->iv;
        cnop.dst_len = 0;
        res = cryptodev_mop(fcr, &cnop, count, l);
        ocnop->reqid = cnop.reqid;
        ocnop->status = cnop.status;

        return res;
};


static int
ocryptodev_session(struct fcrypt *fcr, struct osession_op *osop)
{
        struct session_op sop;
        int res;

        sop.cipher = osop->cipher;
        sop.mac = osop->mac;
        sop.comp_alg = 0;
        sop.keylen = osop->keylen;
        sop.key = osop->key;
        sop.mackeylen = osop->mackeylen;
        sop.mackey = osop->mackey;
        res = cryptodev_session(fcr, &sop);
        if (res)
                return res;
        osop->ses = sop.ses;
        return 0;

}

static int
ocryptodev_msession(struct fcrypt *fcr, struct osession_n_op *osn_ops,
                   int count)
{
        int i;

        for (i = 0; i < count; i++, osn_ops++) {
                struct osession_op os_op;
                os_op.cipher =                osn_ops->cipher;
                os_op.mac =                osn_ops->mac;
                os_op.keylen =                osn_ops->keylen;
                os_op.key =                osn_ops->key;
                os_op.mackeylen =        osn_ops->mackeylen;
                os_op.mackey =                osn_ops->mackey;
                os_op.ses =                ~0;

                osn_ops->status = ocryptodev_session(fcr, &os_op);
                osn_ops->ses =                os_op.ses;
        }

        return 0;
}

























































































































































































   19 
    6 


   12 


   19 







   17 
    6 


   10 


   15 

























































































































































   19 
   10 


    9 


























   19 



   19 






   19 

























































   18 




   16 












    3 



























   32 




   32 
   30 
   31 
   31 
   32 





   31 




   32 





   24 



   28 









   32 







   32 











   32 
   32 
   32 





















   32 
   32 




   30 

   31 



















































































































   11 









   10 




   11 










   11 
















   11 
   10 
   11 





   10 

   11 


   11 



   11 

   11 






   11 






   11 




   11 





   11 








   11 

   10 



   11 


   10 



   11 





   11 










   10 


   11 
   11 
   11 




    3 



   11 







   11 













   11 









   11 

   11 











   11 




   11 





   10 



   11 

   10 








   11 





























































    8 

































   11 













   11 


   11 












































   11 
   11 

    3 


















   11 
   11 


   11 
   11 





   11 





   10 
   11 
   11 



   11 






   11 












   11 


















   11 












   11 

   11 












   11 


   11 






   11 


   11 

   10 


   11 


















































    7 




    7 






    7 
    7 






    7 



















    7 
    7 


    7 

    7 


    7 


    7 
    7 



    7 





    7 
    7 







    7 

    7 
    7 










    7 
    7 










    7 










    6 



    3 


    3 
    3 


    3 

    3 



    3 





    7 







    7 







    7 

    7 

    7 

    6 
















   18 

   18 




   18 








   18 







   18 












   18 
   10 
   10 









   10 













   18 









   18 

   18 
   16 

    2 
   18 
    6 





   17 











   10 
   16 




























    2 














    8 






    7 




    6 










    5 
    1 







    4 
    4 


    4 
    4 
    5 

















































































































































































































































































































































   11 









    3 




   11 
   11 
   11 
   11 




   11 

   11 




   11 




   11 



   11 







   11 
   11 














   11 




   10 




   11 














   11 

   11 






























   11 












    3 

    3 

    3 















   11 
    8 




   11 

   11 






   11 
   11 



   11 











   11 
   11 
   11 
   11 
   11 







   11 


   11 










   11 



































































































































































































































































   11 
   11 





   11 






   11 












   11 
   11 
   11 








   10 
   11 

   11 




   11 

   10 






   11 
   11 
   11 


   11 

   11 







    7 






    7 



    6 



    7 


    7 











    7 







    6 


    6 
    7 



    7 













   19 



    8 



   11 



   11 

   11 
    6 


   16 





















































































   11 

   11 



   10 








   11 


   11 


   10 

   11 
   10 





















































   10 






   11 




   11 
















   12 

   13 
   13 


   12 






   13 
   13 
   13 


   13 



























   13 
   11 




   13 
   13 






















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
/*        $NetBSD: kern_sysctl.c,v 1.266 2020/08/27 14:11:57 riastradh Exp $        */

/*-
 * Copyright (c) 2003, 2007, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Brown.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Mike Karels at Berkeley Software Design, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_sysctl.c        8.9 (Berkeley) 5/20/95
 */

/*
 * sysctl system call.
 */

#define __COMPAT_SYSCTL

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_sysctl.c,v 1.266 2020/08/27 14:11:57 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_defcorename.h"
#endif

#include "ksyms.h"

#include <sys/param.h>
#include <sys/types.h>

#include <sys/buf.h>
#include <sys/cprng.h>
#include <sys/kauth.h>
#include <sys/ksyms.h>
#include <sys/ktrace.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/once.h>
#include <sys/rndsource.h>
#include <sys/syscallargs.h>
#include <sys/sysctl.h>
#include <sys/systm.h>

#include <crypto/blake2/blake2s.h>

#define        MAXDESCLEN        1024
MALLOC_DEFINE(M_SYSCTLNODE, "sysctlnode", "sysctl node structures");
MALLOC_DEFINE(M_SYSCTLDATA, "sysctldata", "misc sysctl data");

static int sysctl_mmap(SYSCTLFN_PROTO);
static int sysctl_alloc(struct sysctlnode *, int);
static int sysctl_realloc(struct sysctlnode *);

static int sysctl_cvt_in(struct lwp *, int *, const void *, size_t,
                         struct sysctlnode *);
static int sysctl_cvt_out(struct lwp *, int, const struct sysctlnode *,
                          void *, size_t, size_t *);

static int sysctl_log_add(struct sysctllog **, const struct sysctlnode *);
static int sysctl_log_realloc(struct sysctllog *);

typedef void sysctl_setup_func(struct sysctllog **);

#ifdef SYSCTL_DEBUG
#define DPRINTF(a)        printf a
#else
#define DPRINTF(a)
#endif

struct sysctllog {
        const struct sysctlnode *log_root;
        int *log_num;
        int log_size, log_left;
};

/*
 * the "root" of the new sysctl tree
 */
struct sysctlnode sysctl_root = {
        .sysctl_flags = SYSCTL_VERSION|
            CTLFLAG_ROOT|CTLFLAG_READWRITE|
            CTLTYPE_NODE,
        .sysctl_num = 0,
        .sysctl_size = sizeof(struct sysctlnode),
        .sysctl_name = "(root)",
};

/*
 * link set of functions that add nodes at boot time (see also
 * sysctl_buildtree())
 */
__link_set_decl(sysctl_funcs, sysctl_setup_func);

/*
 * The `sysctl_treelock' is intended to serialize access to the sysctl
 * tree.  XXX This has serious problems; allocating memory and
 * copying data out with the lock held is insane.
 */
krwlock_t sysctl_treelock;

kmutex_t sysctl_file_marker_lock;

/*
 * Attributes stored in the kernel.
 */
char hostname[MAXHOSTNAMELEN];
int hostnamelen;

char domainname[MAXHOSTNAMELEN];
int domainnamelen;

long hostid;

#ifndef DEFCORENAME
#define        DEFCORENAME        "%n.core"
#endif
char defcorename[MAXPATHLEN] = DEFCORENAME;

/*
 * ********************************************************************
 * Section 0: Some simple glue
 * ********************************************************************
 * By wrapping copyin(), copyout(), and copyinstr() like this, we can
 * stop caring about who's calling us and simplify some code a bunch.
 * ********************************************************************
 */
int
sysctl_copyin(struct lwp *l, const void *uaddr, void *kaddr, size_t len)
{
        int error;

        if (l != NULL) {
                error = copyin(uaddr, kaddr, len);
                ktrmibio(-1, UIO_WRITE, uaddr, len, error);
        } else {
                error = kcopy(uaddr, kaddr, len);
        }

        return error;
}

int
sysctl_copyout(struct lwp *l, const void *kaddr, void *uaddr, size_t len)
{
        int error;

        if (l != NULL) {
                error = copyout(kaddr, uaddr, len);
                ktrmibio(-1, UIO_READ, uaddr, len, error);
        } else {
                error = kcopy(kaddr, uaddr, len);
        }
        
        return error;
}

int
sysctl_copyinstr(struct lwp *l, const void *uaddr, void *kaddr,
                 size_t len, size_t *done)
{
        int error;

        if (l != NULL) {
                error = copyinstr(uaddr, kaddr, len, done);
                ktrmibio(-1, UIO_WRITE, uaddr, len, error);
        } else {
                error = copystr(uaddr, kaddr, len, done);
        }

        return error;
}

/*
 * ********************************************************************
 * Initialize sysctl subsystem.
 * ********************************************************************
 */
void
sysctl_init(void)
{
        sysctl_setup_func *const *sysctl_setup;

        rw_init(&sysctl_treelock);

        /*
         * dynamic mib numbers start here
         */
        sysctl_root.sysctl_num = CREATE_BASE;
        sysctl_basenode_init();

        __link_set_foreach(sysctl_setup, sysctl_funcs) {
                (**sysctl_setup)(NULL);
        }

        mutex_init(&sysctl_file_marker_lock, MUTEX_DEFAULT, IPL_NONE);
}

/*
 * Setting this means no more permanent nodes can be added,
 * trees that claim to be readonly at the root now are, and if
 * the main tree is readonly, *everything* is.
 *
 * Also starts up the PRNG used for the "random" sysctl: it's
 * better to start it later than sooner.
 *
 * Call this at the end of kernel init.
 */
void
sysctl_finalize(void)
{

        sysctl_root.sysctl_flags |= CTLFLAG_PERMANENT;
}

/*
 * ********************************************************************
 * The main native sysctl system call itself.
 * ********************************************************************
 */
int
sys___sysctl(struct lwp *l, const struct sys___sysctl_args *uap, register_t *retval)
{
        /* {
                syscallarg(const int *) name;
                syscallarg(u_int) namelen;
                syscallarg(void *) old;
                syscallarg(size_t *) oldlenp;
                syscallarg(const void *) new;
                syscallarg(size_t) newlen;
        } */
        int error, nerror, name[CTL_MAXNAME];
        size_t oldlen, savelen, *oldlenp;

        /*
         * get oldlen
         */
        oldlen = 0;
        oldlenp = SCARG(uap, oldlenp);
        if (oldlenp != NULL) {
                error = copyin(oldlenp, &oldlen, sizeof(oldlen));
                if (error)
                        return (error);
        }
        savelen = oldlen;

        /*
         * top-level sysctl names may or may not be non-terminal, but
         * we don't care
         */
        if (SCARG(uap, namelen) > CTL_MAXNAME || SCARG(uap, namelen) < 1)
                return (EINVAL);
        error = copyin(SCARG(uap, name), &name,
                       SCARG(uap, namelen) * sizeof(int));
        if (error)
                return (error);

        ktrmib(name, SCARG(uap, namelen));

        sysctl_lock(SCARG(uap, newv) != NULL);

        /*
         * do sysctl work (NULL means main built-in default tree)
         */
        error = sysctl_dispatch(&name[0], SCARG(uap, namelen),
                                SCARG(uap, oldv), &oldlen,
                                SCARG(uap, newv), SCARG(uap, newlen),
                                &name[0], l, NULL);

        /*
         * release the sysctl lock
         */
        sysctl_unlock();

        /*
         * set caller's oldlen to new value even in the face of an
         * error (if this gets an error and they didn't have one, they
         * get this one)
         */
        if (oldlenp) {
                nerror = copyout(&oldlen, oldlenp, sizeof(oldlen));
                if (error == 0)
                        error = nerror;
        }

        /*
         * if the only problem is that we weren't given enough space,
         * that's an ENOMEM error
         */
        if (error == 0 && SCARG(uap, oldv) != NULL && savelen < oldlen)
                error = ENOMEM;

        return (error);
}

/*
 * ********************************************************************
 * Section 1: How the tree is used
 * ********************************************************************
 * Implementations of sysctl for emulations should typically need only
 * these three functions in this order: lock the tree, dispatch
 * request into it, unlock the tree.
 * ********************************************************************
 */
void
sysctl_lock(bool write)
{

        if (write) {
                rw_enter(&sysctl_treelock, RW_WRITER);
                curlwp->l_pflag |= LP_SYSCTLWRITE;
        } else {
                rw_enter(&sysctl_treelock, RW_READER);
                curlwp->l_pflag &= ~LP_SYSCTLWRITE;
        }
}

void
sysctl_relock(void)
{

        if ((curlwp->l_pflag & LP_SYSCTLWRITE) != 0) {
                rw_enter(&sysctl_treelock, RW_WRITER);
        } else {
                rw_enter(&sysctl_treelock, RW_READER);
        }
}

/*
 * ********************************************************************
 * the main sysctl dispatch routine.  scans the given tree and picks a
 * function to call based on what it finds.
 * ********************************************************************
 */
int
sysctl_dispatch(SYSCTLFN_ARGS)
{
        int error;
        sysctlfn fn;
        int ni;

        KASSERT(rw_lock_held(&sysctl_treelock));

        if (rnode && SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
                printf("sysctl_dispatch: rnode %p wrong version\n", rnode);
                error = EINVAL;
                goto out;
        }

        fn = NULL;
        error = sysctl_locate(l, name, namelen, &rnode, &ni);

        if (rnode->sysctl_func != NULL) {
                /*
                 * the node we ended up at has a function, so call it.  it can
                 * hand off to query or create if it wants to.
                 */
                fn = rnode->sysctl_func;
        } else if (error == 0) {
                /*
                 * we found the node they were looking for, so do a lookup.
                 */
                fn = (sysctlfn)sysctl_lookup; /* XXX may write to rnode */
        } else if (error == ENOENT && (ni + 1) == namelen && name[ni] < 0) {
                /*
                 * prospective parent node found, but the terminal node was
                 * not.  generic operations associate with the parent.
                 */
                switch (name[ni]) {
                case CTL_QUERY:
                        fn = sysctl_query;
                        break;
                case CTL_CREATE:
#if NKSYMS > 0
                case CTL_CREATESYM:
#endif /* NKSYMS > 0 */
                        if (newp == NULL) {
                                error = EINVAL;
                                break;
                        }
                        KASSERT(rw_write_held(&sysctl_treelock));
                        fn = (sysctlfn)sysctl_create; /* we own the rnode */
                        break;
                case CTL_DESTROY:
                        if (newp == NULL) {
                                error = EINVAL;
                                break;
                        }
                        KASSERT(rw_write_held(&sysctl_treelock));
                        fn = (sysctlfn)sysctl_destroy; /* we own the rnode */
                        break;
                case CTL_MMAP:
                        fn = (sysctlfn)sysctl_mmap; /* we own the rnode */
                        break;
                case CTL_DESCRIBE:
                        fn = sysctl_describe;
                        break;
                default:
                        error = EOPNOTSUPP;
                        break;
                }
        }

        /*
         * after all of that, maybe we found someone who knows how to
         * get us what we want?
         */
        if (fn != NULL)
                error = (*fn)(name + ni, namelen - ni, oldp, oldlenp,
                              newp, newlen, name, l, rnode);
        else if (error == 0)
                error = EOPNOTSUPP;

out:
        return (error);
}

/*
 * ********************************************************************
 * Releases the tree lock.
 * ********************************************************************
 */
void
sysctl_unlock(void)
{

        rw_exit(&sysctl_treelock);
}

/*
 * ********************************************************************
 * Section 2: The main tree interfaces
 * ********************************************************************
 * This is how sysctl_dispatch() does its work, and you can too, by
 * calling these routines from helpers (though typically only
 * sysctl_lookup() will be used).  The tree MUST BE LOCKED when these
 * are called.
 * ********************************************************************
 */

/*
 * sysctl_locate -- Finds the node matching the given mib under the
 * given tree (via rv).  If no tree is given, we fall back to the
 * native tree.  The current process (via l) is used for access
 * control on the tree (some nodes may be traversable only by root) and
 * on return, nip will show how many numbers in the mib were consumed.
 */
int
sysctl_locate(struct lwp *l, const int *name, u_int namelen,
              const struct sysctlnode **rnode, int *nip)
{
        const struct sysctlnode *node, *pnode;
        int tn, si, ni, error, alias;

        KASSERT(rw_lock_held(&sysctl_treelock));

        /*
         * basic checks and setup
         */
        if (*rnode == NULL)
                *rnode = &sysctl_root;
        if (nip)
                *nip = 0;
        if (namelen == 0)
                return (0);

        /*
         * search starts from "root"
         */
        pnode = *rnode;
        if (SYSCTL_VERS(pnode->sysctl_flags) != SYSCTL_VERSION) {
                printf("sysctl_locate: pnode %p wrong version\n", pnode);
                return (EINVAL);
        }
        node = pnode->sysctl_child;
        error = 0;

        /*
         * scan for node to which new node should be attached
         */
        for (ni = 0; ni < namelen; ni++) {
                /*
                 * walked off bottom of tree
                 */
                if (node == NULL) {
                        if (SYSCTL_TYPE(pnode->sysctl_flags) == CTLTYPE_NODE)
                                error = ENOENT;
                        else
                                error = ENOTDIR;
                        break;
                }
                /*
                 * can anyone traverse this node or only root?
                 */
                if (l != NULL && (pnode->sysctl_flags & CTLFLAG_PRIVATE) &&
                    (error = kauth_authorize_system(l->l_cred,
                    KAUTH_SYSTEM_SYSCTL, KAUTH_REQ_SYSTEM_SYSCTL_PRVT,
                    NULL, NULL, NULL)) != 0)
                        return (error);
                /*
                 * find a child node with the right number
                 */
                tn = name[ni];
                alias = 0;

                si = 0;
                /*
                 * Note: ANYNUMBER only matches positive integers.
                 * Since ANYNUMBER is only permitted on single-node
                 * sub-trees (eg proc), check before the loop and skip
                 * it if we can.
                 */
                if ((node[si].sysctl_flags & CTLFLAG_ANYNUMBER) && (tn >= 0))
                        goto foundit;
                for (; si < pnode->sysctl_clen; si++) {
                        if (node[si].sysctl_num == tn) {
                                if (node[si].sysctl_flags & CTLFLAG_ALIAS) {
                                        if (alias++ == 4)
                                                break;
                                        else {
                                                tn = node[si].sysctl_alias;
                                                si = -1;
                                        }
                                } else
                                        goto foundit;
                        }
                }
                /*
                 * if we ran off the end, it obviously doesn't exist
                 */
                error = ENOENT;
                break;

                /*
                 * so far so good, move on down the line
                 */
          foundit:
                pnode = &node[si];
                if (SYSCTL_TYPE(pnode->sysctl_flags) == CTLTYPE_NODE)
                        node = node[si].sysctl_child;
                else
                        node = NULL;
        }

        *rnode = pnode;
        if (nip)
                *nip = ni;

        return (error);
}

/*
 * sysctl_query -- The auto-discovery engine.  Copies out the structs
 * describing nodes under the given node and handles overlay trees.
 */
int
sysctl_query(SYSCTLFN_ARGS)
{
        int error, ni, elim, v;
        size_t out, left, t;
        const struct sysctlnode *enode, *onode;
        struct sysctlnode qnode;

        KASSERT(rw_lock_held(&sysctl_treelock));

        if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
                printf("sysctl_query: rnode %p wrong version\n", rnode);
                return (EINVAL);
        }

        if (SYSCTL_TYPE(rnode->sysctl_flags) != CTLTYPE_NODE)
                return (ENOTDIR);
        if (namelen != 1 || name[0] != CTL_QUERY)
                return (EINVAL);

        error = 0;
        out = 0;
        left = *oldlenp;
        elim = 0;
        enode = NULL;

        /*
         * translate the given request to a current node
         */
        error = sysctl_cvt_in(l, &v, newp, newlen, &qnode);
        if (error)
                return (error);

        /*
         * if the request specifies a version, check it
         */
        if (qnode.sysctl_ver != 0) {
                enode = rnode;
                if (qnode.sysctl_ver != enode->sysctl_ver &&
                    qnode.sysctl_ver != sysctl_rootof(enode)->sysctl_ver)
                        return (EINVAL);
        }

        /*
         * process has overlay tree
         */
        if (l && l->l_proc->p_emul->e_sysctlovly) {
                enode = l->l_proc->p_emul->e_sysctlovly;
                elim = (name - oname);
                error = sysctl_locate(l, oname, elim, &enode, NULL);
                if (error == 0) {
                        /* ah, found parent in overlay */
                        elim = enode->sysctl_clen;
                        enode = enode->sysctl_child;
                } else {
                        error = 0;
                        elim = 0;
                        enode = NULL;
                }
        }

        for (ni = 0; ni < rnode->sysctl_clen; ni++) {
                onode = &rnode->sysctl_child[ni];
                if (enode && enode->sysctl_num == onode->sysctl_num) {
                        if (SYSCTL_TYPE(enode->sysctl_flags) != CTLTYPE_NODE)
                                onode = enode;
                        if (--elim > 0)
                                enode++;
                        else
                                enode = NULL;
                }
                error = sysctl_cvt_out(l, v, onode, oldp, left, &t);
                if (error)
                        return (error);
                if (oldp != NULL)
                        oldp = (char*)oldp + t;
                out += t;
                left -= MIN(left, t);
        }

        /*
         * overlay trees *MUST* be entirely consumed
         */
        KASSERT(enode == NULL);

        *oldlenp = out;

        return (error);
}

/*
 * sysctl_create -- Adds a node (the description of which is taken
 * from newp) to the tree, returning a copy of it in the space pointed
 * to by oldp.  In the event that the requested slot is already taken
 * (either by name or by number), the offending node is returned
 * instead.  Yes, this is complex, but we want to make sure everything
 * is proper.
 */
#ifdef SYSCTL_DEBUG_CREATE
int _sysctl_create(SYSCTLFN_ARGS);
int
_sysctl_create(SYSCTLFN_ARGS)
#else
int
sysctl_create(SYSCTLFN_ARGS)
#endif
{
        struct sysctlnode nnode, *node, *pnode;
        int error, ni, at, nm, type, nsz, sz, flags, anum, v;
        void *own;

        KASSERT(rw_write_held(&sysctl_treelock));

        error = 0;
        own = NULL;
        anum = -1;

        if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
                printf("sysctl_create: rnode %p wrong version\n", rnode);
                return (EINVAL);
        }

        if (namelen != 1 || (name[namelen - 1] != CTL_CREATE
#if NKSYMS > 0
                             && name[namelen - 1] != CTL_CREATESYM
#endif /* NKSYMS > 0 */
                             ))
                return (EINVAL);

        /*
         * processes can only add nodes at securelevel 0, must be
         * root, and can't add nodes to a parent that's not writeable
         */
        if (l != NULL) {
#ifndef SYSCTL_DISALLOW_CREATE
                error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_SYSCTL,
                    KAUTH_REQ_SYSTEM_SYSCTL_ADD, NULL, NULL, NULL);
                if (error)
                        return (error);
                if (!(rnode->sysctl_flags & CTLFLAG_READWRITE))
#endif /* SYSCTL_DISALLOW_CREATE */
                        return (EPERM);
        }

        /*
         * nothing can add a node if:
         * we've finished initial set up of this tree and
         * (the tree itself is not writeable or
         * the entire sysctl system is not writeable)
         */
        if ((sysctl_rootof(rnode)->sysctl_flags & CTLFLAG_PERMANENT) &&
            (!(sysctl_rootof(rnode)->sysctl_flags & CTLFLAG_READWRITE) ||
             !(sysctl_root.sysctl_flags & CTLFLAG_READWRITE)))
                return (EPERM);

        /*
         * it must be a "node", not a "int" or something
         */
        if (SYSCTL_TYPE(rnode->sysctl_flags) != CTLTYPE_NODE)
                return (ENOTDIR);
        if (rnode->sysctl_flags & CTLFLAG_ALIAS) {
                printf("sysctl_create: attempt to add node to aliased "
                       "node %p\n", rnode);
                return (EINVAL);
        }
        pnode = __UNCONST(rnode); /* we are adding children to this node */

        if (newp == NULL)
                return (EINVAL);
        error = sysctl_cvt_in(l, &v, newp, newlen, &nnode);
        if (error)
                return (error);

        /*
         * nodes passed in don't *have* parents
         */
        if (nnode.sysctl_parent != NULL)
                return (EINVAL);

        /*
         * if we are indeed adding it, it should be a "good" name and
         * number
         */
        nm = nnode.sysctl_num;
#if NKSYMS > 0
        if (nm == CTL_CREATESYM)
                nm = CTL_CREATE;
#endif /* NKSYMS > 0 */
        if (nm < 0 && nm != CTL_CREATE)
                return (EINVAL);

        /*
         * the name can't start with a digit
         */
        if (nnode.sysctl_name[0] >= '0' &&
            nnode.sysctl_name[0] <= '9')
                return (EINVAL);

        /*
         * the name must be only alphanumerics or - or _, longer than
         * 0 bytes and less than SYSCTL_NAMELEN
         */
        nsz = 0;
        while (nsz < SYSCTL_NAMELEN && nnode.sysctl_name[nsz] != '\0') {
                if ((nnode.sysctl_name[nsz] >= '0' &&
                     nnode.sysctl_name[nsz] <= '9') ||
                    (nnode.sysctl_name[nsz] >= 'A' &&
                     nnode.sysctl_name[nsz] <= 'Z') ||
                    (nnode.sysctl_name[nsz] >= 'a' &&
                     nnode.sysctl_name[nsz] <= 'z') ||
                    nnode.sysctl_name[nsz] == '-' ||
                    nnode.sysctl_name[nsz] == '_')
                        nsz++;
                else
                        return (EINVAL);
        }
        if (nsz == 0 || nsz == SYSCTL_NAMELEN)
                return (EINVAL);

        /*
         * various checks revolve around size vs type, etc
         */
        type = SYSCTL_TYPE(nnode.sysctl_flags);
        flags = SYSCTL_FLAGS(nnode.sysctl_flags);
        sz = nnode.sysctl_size;

        /*
         * find out if there's a collision, and if so, let the caller
         * know what they collided with
         */
        node = pnode->sysctl_child;
        at = 0;
        if (node) {
                if ((flags | node->sysctl_flags) & CTLFLAG_ANYNUMBER)
                        /* No siblings for a CTLFLAG_ANYNUMBER node */
                        return EINVAL;
                for (ni = 0; ni < pnode->sysctl_clen; ni++) {
                        if (nm == node[ni].sysctl_num ||
                            strcmp(nnode.sysctl_name, node[ni].sysctl_name) == 0) {
                                /*
                                 * ignore error here, since we
                                 * are already fixed on EEXIST
                                 */
                                (void)sysctl_cvt_out(l, v, &node[ni], oldp,
                                                     *oldlenp, oldlenp);
                                return (EEXIST);
                        }
                        if (nm > node[ni].sysctl_num)
                                at++;
                }
        }

        /*
         * use sysctl_ver to add to the tree iff it hasn't changed
         */
        if (nnode.sysctl_ver != 0) {
                /*
                 * a specified value must match either the parent
                 * node's version or the root node's version
                 */
                if (nnode.sysctl_ver != sysctl_rootof(rnode)->sysctl_ver &&
                    nnode.sysctl_ver != rnode->sysctl_ver) {
                        return (EINVAL);
                }
        }

        /*
         * only the kernel can assign functions to entries
         */
        if (l != NULL && nnode.sysctl_func != NULL)
                return (EPERM);

        /*
         * only the kernel can create permanent entries, and only then
         * before the kernel is finished setting itself up
         */
        if (l != NULL && (flags & ~SYSCTL_USERFLAGS))
                return (EPERM);
        if ((flags & CTLFLAG_PERMANENT) &
            (sysctl_root.sysctl_flags & CTLFLAG_PERMANENT))
                return (EPERM);
        if ((flags & (CTLFLAG_OWNDATA | CTLFLAG_IMMEDIATE)) ==
            (CTLFLAG_OWNDATA | CTLFLAG_IMMEDIATE))
                return (EINVAL);
        if ((flags & CTLFLAG_IMMEDIATE) &&
            type != CTLTYPE_INT && type != CTLTYPE_QUAD && type != CTLTYPE_BOOL)
                return (EINVAL);

        /*
         * check size, or set it if unset and we can figure it out.
         * kernel created nodes are allowed to have a function instead
         * of a size (or a data pointer).
         */
        switch (type) {
        case CTLTYPE_NODE:
                /*
                 * only *i* can assert the size of a node
                 */
                if (flags & CTLFLAG_ALIAS) {
                        anum = nnode.sysctl_alias;
                        if (anum < 0)
                                return (EINVAL);
                        nnode.sysctl_alias = 0;
                }
                if (sz != 0 || nnode.sysctl_data != NULL)
                        return (EINVAL);
                if (nnode.sysctl_csize != 0 ||
                    nnode.sysctl_clen != 0 ||
                    nnode.sysctl_child != 0)
                        return (EINVAL);
                if (flags & CTLFLAG_OWNDATA)
                        return (EINVAL);
                sz = sizeof(struct sysctlnode);
                break;
        case CTLTYPE_INT:
                /*
                 * since an int is an int, if the size is not given or
                 * is wrong, we can "int-uit" it.
                 */
                if (sz != 0 && sz != sizeof(int))
                        return (EINVAL);
                sz = sizeof(int);
                break;
        case CTLTYPE_STRING:
                /*
                 * strings are a little more tricky
                 */
                if (sz == 0) {
                        if (l == NULL) {
                                if (nnode.sysctl_func == NULL) {
                                        if (nnode.sysctl_data == NULL)
                                                return (EINVAL);
                                        else
                                                sz = strlen(nnode.sysctl_data) +
                                                    1;
                                }
                        } else if (nnode.sysctl_data == NULL &&
                                 flags & CTLFLAG_OWNDATA) {
                                return (EINVAL);
                        } else {
                                char *vp, *e;
                                size_t s;

                                /*
                                 * we want a rough idea of what the
                                 * size is now
                                 */
                                vp = malloc(PAGE_SIZE, M_SYSCTLDATA, M_WAITOK);
                                if (vp == NULL)
                                        return (ENOMEM);
                                e = nnode.sysctl_data;
                                do {
                                        error = copyinstr(e, vp, PAGE_SIZE, &s);
                                        if (error) {
                                                if (error != ENAMETOOLONG) {
                                                        free(vp, M_SYSCTLDATA);
                                                        return (error);
                                                }
                                                e += PAGE_SIZE;
                                                if ((e - 32 * PAGE_SIZE) >
                                                    (char*)nnode.sysctl_data) {
                                                        free(vp, M_SYSCTLDATA);
                                                        return (ERANGE);
                                                }
                                        }
                                } while (error != 0);
                                sz = s + (e - (char*)nnode.sysctl_data);
                                free(vp, M_SYSCTLDATA);
                        }
                }
                break;
        case CTLTYPE_QUAD:
                if (sz != 0 && sz != sizeof(u_quad_t))
                        return (EINVAL);
                sz = sizeof(u_quad_t);
                break;
        case CTLTYPE_BOOL:
                /*
                 * since an bool is an bool, if the size is not given or
                 * is wrong, we can "intuit" it.
                 */
                if (sz != 0 && sz != sizeof(bool))
                        return (EINVAL);
                sz = sizeof(bool);
                break;
        case CTLTYPE_STRUCT:
                if (sz == 0) {
                        if (l != NULL || nnode.sysctl_func == NULL)
                                return (EINVAL);
                        if (flags & CTLFLAG_OWNDATA)
                                return (EINVAL);
                }
                break;
        default:
                return (EINVAL);
        }

        /*
         * at this point, if sz is zero, we *must* have a
         * function to go with it and we can't own it.
         */

        /*
         *  l  ptr own
         *  0   0   0  -> EINVAL (if no func)
         *  0   0   1  -> own
         *  0   1   0  -> kptr
         *  0   1   1  -> kptr
         *  1   0   0  -> EINVAL
         *  1   0   1  -> own
         *  1   1   0  -> kptr, no own (fault on lookup)
         *  1   1   1  -> uptr, own
         */
        if (type != CTLTYPE_NODE) {
                if (sz != 0) {
                        if (flags & CTLFLAG_OWNDATA) {
                                own = malloc(sz, M_SYSCTLDATA, M_WAITOK);
                                if (own == NULL)
                                        return ENOMEM;
                                if (nnode.sysctl_data == NULL)
                                        memset(own, 0, sz);
                                else {
                                        error = sysctl_copyin(l,
                                            nnode.sysctl_data, own, sz);
                                        if (error != 0) {
                                                free(own, M_SYSCTLDATA);
                                                return (error);
                                        }
                                }
                        } else if ((nnode.sysctl_data != NULL) &&
                                 !(flags & CTLFLAG_IMMEDIATE)) {
#if NKSYMS > 0
                                if (name[namelen - 1] == CTL_CREATESYM) {
                                        char symname[128]; /* XXX enough? */
                                        u_long symaddr;
                                        size_t symlen;

                                        error = sysctl_copyinstr(l,
                                            nnode.sysctl_data, symname,
                                            sizeof(symname), &symlen);
                                        if (error)
                                                return (error);
                                        error = ksyms_getval(NULL, symname,
                                            &symaddr, KSYMS_EXTERN);
                                        if (error)
                                                return (error); /* EINVAL? */
                                        nnode.sysctl_data = (void*)symaddr;
                                }
#endif /* NKSYMS > 0 */
                                /*
                                 * Ideally, we'd like to verify here
                                 * that this address is acceptable,
                                 * but...
                                 *
                                 * - it might be valid now, only to
                                 *   become invalid later
                                 *
                                 * - it might be invalid only for the
                                 *   moment and valid later
                                 *
                                 * - or something else.
                                 *
                                 * Since we can't get a good answer,
                                 * we'll just accept the address as
                                 * given, and fault on individual
                                 * lookups.
                                 */
                        }
                } else if (nnode.sysctl_func == NULL)
                        return (EINVAL);
        }

        /*
         * a process can't assign a function to a node, and the kernel
         * can't create a node that has no function or data.
         * (XXX somewhat redundant check)
         */
        if (l != NULL || nnode.sysctl_func == NULL) {
                if (type != CTLTYPE_NODE &&
                    !(flags & CTLFLAG_IMMEDIATE) &&
                    nnode.sysctl_data == NULL &&
                    own == NULL)
                        return (EINVAL);
        }

#ifdef SYSCTL_DISALLOW_KWRITE
        /*
         * a process can't create a writable node unless it refers to
         * new data.
         */
        if (l != NULL && own == NULL && type != CTLTYPE_NODE &&
            (flags & CTLFLAG_READWRITE) != CTLFLAG_READONLY &&
            !(flags & CTLFLAG_IMMEDIATE))
                return (EPERM);
#endif /* SYSCTL_DISALLOW_KWRITE */

        /*
         * make sure there's somewhere to put the new stuff.
         */
        if (pnode->sysctl_child == NULL) {
                if (flags & CTLFLAG_ANYNUMBER)
                        error = sysctl_alloc(pnode, 1);
                else
                        error = sysctl_alloc(pnode, 0);
                if (error) {
                        if (own != NULL)
                                free(own, M_SYSCTLDATA);
                        return (error);
                }
        }
        node = pnode->sysctl_child;

        /*
         * no collisions, so pick a good dynamic number if we need to.
         */
        if (nm == CTL_CREATE) {
                nm = ++sysctl_root.sysctl_num;
                for (ni = 0; ni < pnode->sysctl_clen; ni++) {
                        if (nm == node[ni].sysctl_num) {
                                nm++;
                                ni = -1;
                        } else if (nm > node[ni].sysctl_num)
                                at = ni + 1;
                }
        }

        /*
         * oops...ran out of space
         */
        if (pnode->sysctl_clen == pnode->sysctl_csize) {
                error = sysctl_realloc(pnode);
                if (error) {
                        if (own != NULL)
                                free(own, M_SYSCTLDATA);
                        return (error);
                }
                node = pnode->sysctl_child;
        }

        /*
         * insert new node data
         */
        if (at < pnode->sysctl_clen) {
                int t;

                /*
                 * move the nodes that should come after the new one
                 */
                memmove(&node[at + 1], &node[at],
                        (pnode->sysctl_clen - at) * sizeof(struct sysctlnode));
                memset(&node[at], 0, sizeof(struct sysctlnode));
                node[at].sysctl_parent = pnode;
                /*
                 * and...reparent any children of any moved nodes
                 */
                for (ni = at; ni <= pnode->sysctl_clen; ni++)
                        if (node[ni].sysctl_child != NULL)
                                for (t = 0; t < node[ni].sysctl_csize; t++)
                                        node[ni].sysctl_child[t].sysctl_parent =
                                                &node[ni];
        }
        node = &node[at];
        pnode->sysctl_clen++;

        strlcpy(node->sysctl_name, nnode.sysctl_name,
                sizeof(node->sysctl_name));
        node->sysctl_num = nm;
        node->sysctl_size = sz;
        node->sysctl_flags = SYSCTL_VERSION|type|flags; /* XXX other trees */
        node->sysctl_csize = 0;
        node->sysctl_clen = 0;
        if (own) {
                node->sysctl_data = own;
                node->sysctl_flags |= CTLFLAG_OWNDATA;
        } else if (flags & CTLFLAG_ALIAS) {
                node->sysctl_alias = anum;
        } else if (flags & CTLFLAG_IMMEDIATE) {
                switch (type) {
                case CTLTYPE_BOOL:
                        node->sysctl_bdata = nnode.sysctl_bdata;
                        break;
                case CTLTYPE_INT:
                        node->sysctl_idata = nnode.sysctl_idata;
                        break;
                case CTLTYPE_QUAD:
                        node->sysctl_qdata = nnode.sysctl_qdata;
                        break;
                }
        } else {
                node->sysctl_data = nnode.sysctl_data;
                node->sysctl_flags &= ~CTLFLAG_OWNDATA;
        }
        node->sysctl_func = nnode.sysctl_func;
        node->sysctl_child = NULL;
        /* node->sysctl_parent should already be done */

        /*
         * update "version" on path to "root"
         */
        for (; rnode->sysctl_parent != NULL; rnode = rnode->sysctl_parent)
                ;
        pnode = node;
        for (nm = rnode->sysctl_ver + 1; pnode != NULL;
             pnode = pnode->sysctl_parent)
                pnode->sysctl_ver = nm;

        /* If this fails, the node is already added - the user won't know! */
        error = sysctl_cvt_out(l, v, node, oldp, *oldlenp, oldlenp);

        return (error);
}

/*
 * ********************************************************************
 * A wrapper around sysctl_create() that prints the thing we're trying
 * to add.
 * ********************************************************************
 */
#ifdef SYSCTL_DEBUG_CREATE
int
sysctl_create(SYSCTLFN_ARGS)
{
        const struct sysctlnode *node;
        int k, rc, ni, nl = namelen + (name - oname);

        node = newp;

        printf("namelen %d (", nl);
        for (ni = 0; ni < nl - 1; ni++)
                printf(" %d", oname[ni]);
        printf(" %d )\t[%s]\tflags %08x (%08x %d %zu)\n",
               k = node->sysctl_num,
               node->sysctl_name,
               node->sysctl_flags,
               SYSCTL_FLAGS(node->sysctl_flags),
               SYSCTL_TYPE(node->sysctl_flags),
               node->sysctl_size);

        node = rnode;
        rc = _sysctl_create(SYSCTLFN_CALL(rnode));

        printf("sysctl_create(");
        for (ni = 0; ni < nl - 1; ni++)
                printf(" %d", oname[ni]);
        printf(" %d ) returned %d\n", k, rc);

        return (rc);
}
#endif /* SYSCTL_DEBUG_CREATE */

/*
 * sysctl_destroy -- Removes a node (as described by newp) from the
 * given tree, returning (if successful) a copy of the dead node in
 * oldp.  Since we're removing stuff, there's not much to check.
 */
int
sysctl_destroy(SYSCTLFN_ARGS)
{
        struct sysctlnode *node, *pnode, onode, nnode;
        int ni, error, v;

        KASSERT(rw_write_held(&sysctl_treelock));

        if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
                printf("sysctl_destroy: rnode %p wrong version\n", rnode);
                return (EINVAL);
        }

        error = 0;

        if (namelen != 1 || name[namelen - 1] != CTL_DESTROY)
                return (EINVAL);

        /*
         * processes can only destroy nodes at securelevel 0, must be
         * root, and can't remove nodes from a parent that's not
         * writeable
         */
        if (l != NULL) {
#ifndef SYSCTL_DISALLOW_CREATE
                error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_SYSCTL,
                    KAUTH_REQ_SYSTEM_SYSCTL_DELETE, NULL, NULL, NULL);
                if (error)
                        return (error);
                if (!(rnode->sysctl_flags & CTLFLAG_READWRITE))
#endif /* SYSCTL_DISALLOW_CREATE */
                        return (EPERM);
        }

        /*
         * nothing can remove a node if:
         * the node is permanent (checked later) or
         * the tree itself is not writeable or
         * the entire sysctl system is not writeable
         *
         * note that we ignore whether setup is complete or not,
         * because these rules always apply.
         */
        if (!(sysctl_rootof(rnode)->sysctl_flags & CTLFLAG_READWRITE) ||
            !(sysctl_root.sysctl_flags & CTLFLAG_READWRITE))
                return (EPERM);

        if (newp == NULL)
                return (EINVAL);
        error = sysctl_cvt_in(l, &v, newp, newlen, &nnode);
        if (error)
                return (error);
        memset(&onode, 0, sizeof(struct sysctlnode));

        node = rnode->sysctl_child;
        for (ni = 0; ni < rnode->sysctl_clen; ni++) {
                if (nnode.sysctl_num == node[ni].sysctl_num) {
                        /*
                         * if name specified, must match
                         */
                        if (nnode.sysctl_name[0] != '\0' &&
                            strcmp(nnode.sysctl_name, node[ni].sysctl_name))
                                continue;
                        /*
                         * if version specified, must match
                         */
                        if (nnode.sysctl_ver != 0 &&
                            nnode.sysctl_ver != node[ni].sysctl_ver)
                                continue;
                        /*
                         * this must be the one
                         */
                        break;
                }
        }
        if (ni == rnode->sysctl_clen)
                return (ENOENT);
        node = &node[ni];
        pnode = node->sysctl_parent;

        /*
         * if the kernel says permanent, it is, so there.  nyah.
         */
        if (SYSCTL_FLAGS(node->sysctl_flags) & CTLFLAG_PERMANENT)
                return (EPERM);

        /*
         * can't delete non-empty nodes
         */
        if (SYSCTL_TYPE(node->sysctl_flags) == CTLTYPE_NODE &&
            node->sysctl_clen != 0)
                return (ENOTEMPTY);

        /*
         * if the node "owns" data, release it now
         */
        if (node->sysctl_flags & CTLFLAG_OWNDATA) {
                if (node->sysctl_data != NULL)
                        free(node->sysctl_data, M_SYSCTLDATA);
                node->sysctl_data = NULL;
        }
        if (node->sysctl_flags & CTLFLAG_OWNDESC) {
                if (node->sysctl_desc != NULL)
                        /*XXXUNCONST*/
                        free(__UNCONST(node->sysctl_desc), M_SYSCTLDATA);
                node->sysctl_desc = NULL;
        }

        /*
         * if the node to be removed is not the last one on the list,
         * move the remaining nodes up, and reparent any grandchildren
         */
        onode = *node;
        if (ni < pnode->sysctl_clen - 1) {
                int t;

                memmove(&pnode->sysctl_child[ni], &pnode->sysctl_child[ni + 1],
                        (pnode->sysctl_clen - ni - 1) *
                        sizeof(struct sysctlnode));
                for (; ni < pnode->sysctl_clen - 1; ni++)
                        if (SYSCTL_TYPE(pnode->sysctl_child[ni].sysctl_flags) ==
                            CTLTYPE_NODE)
                                for (t = 0;
                                     t < pnode->sysctl_child[ni].sysctl_clen;
                                     t++)
                                        pnode->sysctl_child[ni].sysctl_child[t].
                                                sysctl_parent =
                                                &pnode->sysctl_child[ni];
                ni = pnode->sysctl_clen - 1;
                node = &pnode->sysctl_child[ni];
        }

        /*
         * reset the space we just vacated
         */
        memset(node, 0, sizeof(struct sysctlnode));
        node->sysctl_parent = pnode;
        pnode->sysctl_clen--;

        /*
         * if this parent just lost its last child, nuke the creche
         */
        if (pnode->sysctl_clen == 0) {
                free(pnode->sysctl_child, M_SYSCTLNODE);
                pnode->sysctl_csize = 0;
                pnode->sysctl_child = NULL;
        }

        /*
         * update "version" on path to "root"
         */
        for (; rnode->sysctl_parent != NULL; rnode = rnode->sysctl_parent)
                ;
        for (ni = rnode->sysctl_ver + 1; pnode != NULL;
             pnode = pnode->sysctl_parent)
                pnode->sysctl_ver = ni;

        error = sysctl_cvt_out(l, v, &onode, oldp, *oldlenp, oldlenp);

        return (error);
}

/*
 * sysctl_lookup -- Handles copyin/copyout of new and old values.
 * Partial reads are globally allowed.  Only root can write to things
 * unless the node says otherwise.
 */
int
sysctl_lookup(SYSCTLFN_ARGS)
{
        int error, rw;
        size_t sz, len;
        void *d;

        KASSERT(rw_lock_held(&sysctl_treelock));

        if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
                printf("%s: rnode %p wrong version\n", __func__, rnode);
                return EINVAL;
        }

        if (newlen == 0)
                newp = NULL;

        error = 0;

        /*
         * you can't "look up" a node.  you can "query" it, but you
         * can't "look it up".
         */
        if (SYSCTL_TYPE(rnode->sysctl_flags) == CTLTYPE_NODE || namelen != 0) {
                DPRINTF(("%s: can't lookup a node\n", __func__));
                return EINVAL;
        }

        /*
         * some nodes are private, so only root can look into them.
         */
        if (l != NULL && (rnode->sysctl_flags & CTLFLAG_PRIVATE) &&
            (error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_SYSCTL,
            KAUTH_REQ_SYSTEM_SYSCTL_PRVT, NULL, NULL, NULL)) != 0) {
                DPRINTF(("%s: private node\n", __func__));
                return error;
        }

        /*
         * if a node wants to be writable according to different rules
         * other than "only root can write to stuff unless a flag is
         * set", then it needs its own function which should have been
         * called and not us.
         */
        if (l != NULL && newp != NULL &&
            !(rnode->sysctl_flags & CTLFLAG_ANYWRITE) &&
            (error = kauth_authorize_system(l->l_cred,
            KAUTH_SYSTEM_SYSCTL, KAUTH_REQ_SYSTEM_SYSCTL_MODIFY, NULL, NULL,
            NULL)) != 0) {
                DPRINTF(("%s: can't modify\n", __func__));
                return error;
        }

        /*
         * is this node supposedly writable?
         */
        rw = (rnode->sysctl_flags & CTLFLAG_READWRITE) ? 1 : 0;

        /*
         * it appears not to be writable at this time, so if someone
         * tried to write to it, we must tell them to go away
         */
        if (!rw && newp != NULL) {
                DPRINTF(("%s: not writable\n", __func__));
                return EPERM;
        }

        /*
         * step one, copy out the stuff we have presently
         */
        if (rnode->sysctl_flags & CTLFLAG_IMMEDIATE) {
                /*
                 * note that we discard const here because we are
                 * modifying the contents of the node (which is okay
                 * because it's ours)
                 *
                 * It also doesn't matter which field of the union we pick.
                 */
                d = __UNCONST(&rnode->sysctl_qdata);
        } else
                d = rnode->sysctl_data;

        if (SYSCTL_TYPE(rnode->sysctl_flags) == CTLTYPE_STRING)
                sz = strlen(d) + 1; /* XXX@@@ possible fault here */
        else
                sz = rnode->sysctl_size;
        if (oldp != NULL) {
                error = sysctl_copyout(l, d, oldp, MIN(sz, *oldlenp));
                if (error) {
                        DPRINTF(("%s: bad copyout %d\n", __func__, error));
                        return error;
                }
        }
        *oldlenp = sz;

        /*
         * are we done?
         */
        if (newp == NULL)
                return 0;

        /*
         * hmm...not done.  must now "copy in" new value.  re-adjust
         * sz to maximum value (strings are "weird").
         */
        sz = rnode->sysctl_size;
        switch (SYSCTL_TYPE(rnode->sysctl_flags)) {
        case CTLTYPE_BOOL: {
                bool tmp;
                /*
                 * these data must be *exactly* the same size coming
                 * in.  bool may only be true or false.
                 */
                if (newlen != sz) {
                        DPRINTF(("%s: bad size %zu != %zu\n", __func__, newlen,
                            sz));
                        return EINVAL;
                }
                error = sysctl_copyin(l, newp, &tmp, sz);
                if (error)
                        break;
                if (tmp != true && tmp != false) {
                        DPRINTF(("%s: tmp %d\n", __func__, tmp));
                        return EINVAL;
                }
                *(bool *)d = tmp;
                break;
        }
        case CTLTYPE_INT:
        case CTLTYPE_QUAD:
        case CTLTYPE_STRUCT:
                /*
                 * these data must be *exactly* the same size coming
                 * in.
                 */
                if (newlen != sz)
                        goto bad_size;
                error = sysctl_copyin(l, newp, d, sz);
                rnd_add_data(NULL, d, sz, 0);
                break;
        case CTLTYPE_STRING: {
                /*
                 * strings, on the other hand, can be shorter, and we
                 * let userland be sloppy about the trailing nul.
                 */
                char *newbuf;

                /*
                 * too much new string?
                 */
                if (newlen > sz)
                        goto bad_size;

                /*
                 * temporary copy of new inbound string
                 */
                len = MIN(sz, newlen);
                newbuf = malloc(len, M_SYSCTLDATA, M_WAITOK);
                if (newbuf == NULL) {
                        DPRINTF(("%s: oomem %zu\n", __func__, len));
                        return ENOMEM;
                }
                error = sysctl_copyin(l, newp, newbuf, len);
                if (error) {
                        free(newbuf, M_SYSCTLDATA);
                        DPRINTF(("%s: copyin %d\n", __func__, error));
                        return error;
                }

                /*
                 * did they NUL terminate it, or do we have space
                 * left to do it ourselves?
                 */
                if (newbuf[len - 1] != '\0' && len == sz) {
                        free(newbuf, M_SYSCTLDATA);
                        DPRINTF(("%s: string too long\n", __func__));
                        return EINVAL;
                }

                /*
                 * looks good, so pop it into place and zero the rest.
                 */
                if (len > 0) {
                        memcpy(d, newbuf, len);
                        rnd_add_data(NULL, d, len, 0);
                }
                if (sz != len)
                        memset((char*)d + len, 0, sz - len);
                free(newbuf, M_SYSCTLDATA);
                break;
        }
        default:
                DPRINTF(("%s: bad type\n", __func__));
                return EINVAL;
        }
        if (error) {
                DPRINTF(("%s: copyin %d\n", __func__, error));
        }

        return error;

    bad_size:
        DPRINTF(("%s: bad size %zu > %zu\n", __func__, newlen, sz));
        return EINVAL;
}

/*
 * sysctl_mmap -- Dispatches sysctl mmap requests to those nodes that
 * purport to handle it.  This interface isn't fully fleshed out yet,
 * unfortunately.
 */
static int
sysctl_mmap(SYSCTLFN_ARGS)
{
        const struct sysctlnode *node;
        struct sysctlnode nnode;
        int error;
        int sysctl_num;

        if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
                printf("sysctl_mmap: rnode %p wrong version\n", rnode);
                return (EINVAL);
        }

        /*
         * let's just pretend that didn't happen, m'kay?
         */
        if (l == NULL)
                return (EPERM);

        /*
         * is this a sysctlnode description of an mmap request?
         */
        if (newp == NULL || newlen != sizeof(struct sysctlnode))
                return (EINVAL);
        error = sysctl_copyin(l, newp, &nnode, sizeof(nnode));
        if (error)
                return (error);

        /*
         * does the node they asked for exist?
         */
        if (namelen != 1)
                return (EOPNOTSUPP);
        node = rnode;
        sysctl_num = nnode.sysctl_num;
        error = sysctl_locate(l, &sysctl_num, 1, &node, NULL);
        if (error)
                return (error);

        /*
         * does this node that we have found purport to handle mmap?
         */
        if (node->sysctl_func == NULL ||
            !(node->sysctl_flags & CTLFLAG_MMAP))
                return (EOPNOTSUPP);

        /*
         * well...okay, they asked for it.
         */
        return ((*node->sysctl_func)(SYSCTLFN_CALL(node)));
}

int
sysctl_describe(SYSCTLFN_ARGS)
{
        struct sysctldesc *d;
        void *bf;
        size_t sz, left, tot;
        int i, error, v = -1;
        struct sysctlnode *node;
        struct sysctlnode dnode;

        if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
                printf("sysctl_query: rnode %p wrong version\n", rnode);
                return (EINVAL);
        }

        if (SYSCTL_TYPE(rnode->sysctl_flags) != CTLTYPE_NODE)
                return (ENOTDIR);
        if (namelen != 1 || name[0] != CTL_DESCRIBE)
                return (EINVAL);

        /*
         * get ready...
         */
        error = 0;
        d = bf = malloc(MAXDESCLEN, M_TEMP, M_WAITOK);
        if (bf == NULL)
                return ENOMEM;
        tot = 0;
        node = rnode->sysctl_child;
        left = *oldlenp;

        /*
         * no request -> all descriptions at this level
         * request with desc unset -> just this node
         * request with desc set -> set descr for this node
         */
        if (newp != NULL) {
                error = sysctl_cvt_in(l, &v, newp, newlen, &dnode);
                if (error)
                        goto out;
                if (dnode.sysctl_desc != NULL) {
                        /*
                         * processes cannot set descriptions above
                         * securelevel 0.  and must be root.  blah
                         * blah blah.  a couple more checks are made
                         * once we find the node we want.
                         */
                        if (l != NULL) {
#ifndef SYSCTL_DISALLOW_CREATE
                                error = kauth_authorize_system(l->l_cred,
                                    KAUTH_SYSTEM_SYSCTL,
                                    KAUTH_REQ_SYSTEM_SYSCTL_DESC, NULL,
                                    NULL, NULL);
                                if (error)
                                        goto out;
#else /* SYSCTL_DISALLOW_CREATE */
                                error = EPERM;
                                goto out;
#endif /* SYSCTL_DISALLOW_CREATE */
                        }

                        /*
                         * find node and try to set the description on it
                         */
                        for (i = 0; i < rnode->sysctl_clen; i++)
                                if (node[i].sysctl_num == dnode.sysctl_num)
                                        break;
                        if (i == rnode->sysctl_clen) {
                                error = ENOENT;
                                goto out;
                        }
                        node = &node[i];

                        /*
                         * did the caller specify a node version?
                         */
                        if (dnode.sysctl_ver != 0 &&
                            dnode.sysctl_ver != node->sysctl_ver) {
                                error = EINVAL;
                                goto out;
                        }

                        /*
                         * okay...some rules:
                         * (1) if setup is done and the tree is
                         *     read-only or the whole system is
                         *     read-only
                         * (2) no one can set a description on a
                         *     permanent node (it must be set when
                         *     using createv)
                         * (3) processes cannot *change* a description
                         * (4) processes *can*, however, set a
                         *     description on a read-only node so that
                         *     one can be created and then described
                         *     in two steps
                         * anything else come to mind?
                         */
                        if ((sysctl_root.sysctl_flags & CTLFLAG_PERMANENT) &&
                            (!(sysctl_rootof(node)->sysctl_flags &
                               CTLFLAG_READWRITE) ||
                             !(sysctl_root.sysctl_flags & CTLFLAG_READWRITE))) {
                                error = EPERM;
                                goto out;
                        }
                        if (node->sysctl_flags & CTLFLAG_PERMANENT) {
                                error = EPERM;
                                goto out;
                        }
                        if (l != NULL && node->sysctl_desc != NULL) {
                                error = EPERM;
                                goto out;
                        }

                        /*
                         * right, let's go ahead.  the first step is
                         * making the description into something the
                         * node can "own", if need be.
                         */
                        if (l != NULL ||
                            dnode.sysctl_flags & CTLFLAG_OWNDESC) {
                                char *nd, *k;

                                k = malloc(MAXDESCLEN, M_TEMP, M_WAITOK);
                                if (k == NULL) {
                                        error = ENOMEM;
                                        goto out;
                                }
                                error = sysctl_copyinstr(l, dnode.sysctl_desc,
                                                         k, MAXDESCLEN, &sz);
                                if (error) {
                                        free(k, M_TEMP);
                                        goto out;
                                }
                                nd = malloc(sz, M_SYSCTLDATA, M_WAITOK);
                                if (nd == NULL) {
                                        free(k, M_TEMP);
                                        error = ENOMEM;
                                        goto out;
                                }
                                memcpy(nd, k, sz);
                                dnode.sysctl_flags |= CTLFLAG_OWNDESC;
                                dnode.sysctl_desc = nd;
                                free(k, M_TEMP);
                        }

                        /*
                         * now "release" the old description and
                         * attach the new one.  ta-da.
                         */
                        if ((node->sysctl_flags & CTLFLAG_OWNDESC) &&
                            node->sysctl_desc != NULL)
                                /*XXXUNCONST*/
                                free(__UNCONST(node->sysctl_desc), M_SYSCTLDATA);
                        node->sysctl_desc = dnode.sysctl_desc;
                        node->sysctl_flags |=
                                (dnode.sysctl_flags & CTLFLAG_OWNDESC);

                        /*
                         * now we "fall out" and into the loop which
                         * will copy the new description back out for
                         * those interested parties
                         */
                }
        }

        /*
         * scan for one description or just retrieve all descriptions
         */
        for (i = 0; i < rnode->sysctl_clen; i++) {
                /*
                 * did they ask for the description of only one node?
                 */
                if (v != -1 && node[i].sysctl_num != dnode.sysctl_num)
                        continue;

                /*
                 * don't describe "private" nodes to non-suser users
                 */
                if ((node[i].sysctl_flags & CTLFLAG_PRIVATE) && (l != NULL) &&
                    !(kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_SYSCTL,
                    KAUTH_REQ_SYSTEM_SYSCTL_PRVT, NULL, NULL, NULL)))
                        continue;

                /*
                 * is this description "valid"?
                 */
                memset(bf, 0, MAXDESCLEN);
                if (node[i].sysctl_desc == NULL)
                        sz = 1;
                else if (copystr(node[i].sysctl_desc, &d->descr_str[0],
                                 MAXDESCLEN - sizeof(*d), &sz) != 0) {
                        /*
                         * erase possible partial description
                         */
                        memset(bf, 0, MAXDESCLEN);
                        sz = 1;
                }

                /*
                 * we've got it, stuff it into the caller's buffer
                 */
                d->descr_num = node[i].sysctl_num;
                d->descr_ver = node[i].sysctl_ver;
                d->descr_len = sz; /* includes trailing nul */
                sz = (char *)NEXT_DESCR(d) - (char *)d;
                if (oldp != NULL && left >= sz) {
                        error = sysctl_copyout(l, d, oldp, sz);
                        if (error)
                                goto out;
                        left -= sz;
                        oldp = (void *)__sysc_desc_adv(oldp, d->descr_len);
                }
                tot += sz;

                /*
                 * if we get this far with v not "unset", they asked
                 * for a specific node and we found it
                 */
                if (v != -1)
                        break;
        }

        /*
         * did we find it after all?
         */
        if (v != -1 && tot == 0)
                error = ENOENT;
        else
                *oldlenp = tot;

out:
        free(bf, M_TEMP);
        return (error);
}

/*
 * ********************************************************************
 * Section 3: Create and destroy from inside the kernel
 * ********************************************************************
 * sysctl_createv() and sysctl_destroyv() are simpler-to-use
 * interfaces for the kernel to fling new entries into the mib and rip
 * them out later.  In the case of sysctl_createv(), the returned copy
 * of the node (see sysctl_create()) will be translated back into a
 * pointer to the actual node.
 *
 * Note that sysctl_createv() will return 0 if the create request
 * matches an existing node (ala mkdir -p), and that sysctl_destroyv()
 * will return 0 if the node to be destroyed already does not exist
 * (aka rm -f) or if it is a parent of other nodes.
 *
 * This allows two (or more) different subsystems to assert sub-tree
 * existence before populating their own nodes, and to remove their
 * own nodes without orphaning the others when they are done.
 * ********************************************************************
 */
#undef sysctl_createv
int
sysctl_createv(struct sysctllog **log, int cflags,
               const struct sysctlnode **rnode, const struct sysctlnode **cnode,
               int flags, int type, const char *namep, const char *descr,
               sysctlfn func, u_quad_t qv, void *newp, size_t newlen,
               ...)
{
        va_list ap;
        int error, ni, namelen, name[CTL_MAXNAME];
        const struct sysctlnode *root, *pnode;
        struct sysctlnode nnode, onode, *dnode;
        size_t sz;
        const struct sysctlnode *snode __diagused;

        /*
         * where are we putting this?
         */
        if (rnode != NULL && *rnode == NULL) {
                printf("sysctl_createv: rnode NULL\n");
                return (EINVAL);
        }
        root = rnode ? *rnode : NULL;
        if (cnode != NULL)
                *cnode = NULL;
        if (cflags != 0)
                return (EINVAL);

        /*
         * what is it?
         */
        flags = SYSCTL_VERSION|SYSCTL_TYPE(type)|SYSCTL_FLAGS(flags);
        if (log != NULL)
                flags &= ~CTLFLAG_PERMANENT;

        /*
         * where do we put it?
         */
        va_start(ap, newlen);
        namelen = 0;
        error = 0;
        ni = -1;
        do {
                if (++ni == CTL_MAXNAME) {
                        error = ENAMETOOLONG;
                        break;
                }
                name[ni] = va_arg(ap, int);
                /*
                 * sorry, this is not supported from here
                 */
                if (name[ni] == CTL_CREATESYM) {
                        error = EINVAL;
                        break;
                }
        } while (name[ni] != CTL_EOL && name[ni] != CTL_CREATE);
        va_end(ap);
        if (error)
                return error;
        namelen = ni + (name[ni] == CTL_CREATE ? 1 : 0);

        /*
         * what's it called
         */
        if (strlcpy(nnode.sysctl_name, namep, sizeof(nnode.sysctl_name)) >=
            sizeof(nnode.sysctl_name))
                return (ENAMETOOLONG);

        /*
         * cons up the description of the new node
         */
        nnode.sysctl_num = name[namelen - 1];
        name[namelen - 1] = CTL_CREATE;
        nnode.sysctl_size = newlen;
        nnode.sysctl_flags = flags;
        if (type == CTLTYPE_NODE) {
                nnode.sysctl_csize = 0;
                nnode.sysctl_clen = 0;
                nnode.sysctl_child = NULL;
                if (flags & CTLFLAG_ALIAS)
                        nnode.sysctl_alias = qv;
        } else if (flags & CTLFLAG_IMMEDIATE) {
                switch (type) {
                case CTLTYPE_BOOL:
                        nnode.sysctl_bdata = qv;
                        break;
                case CTLTYPE_INT:
                        nnode.sysctl_idata = qv;
                        break;
                case CTLTYPE_QUAD:
                        nnode.sysctl_qdata = qv;
                        break;
                default:
                        return (EINVAL);
                }
        } else {
                nnode.sysctl_data = newp;
        }
        nnode.sysctl_func = func;
        nnode.sysctl_parent = NULL;
        nnode.sysctl_ver = 0;

        /*
         * initialize lock state -- we need locks if the main tree has
         * been marked as complete, but since we could be called from
         * either there, or from a device driver (say, at device
         * insertion), or from a module (at module load time, say), we
         * don't really want to "wait"...
         */
        sysctl_lock(true);

        /*
         * locate the prospective parent of the new node, and if we
         * find it, add the new node.
         */
        sz = sizeof(onode);
        pnode = root;
        error = sysctl_locate(NULL, &name[0], namelen - 1, &pnode, &ni);
        if (error) {
                /*
                 * XXX: If you are seeing this printf in early bringup
                 * stages, perhaps your setfault is not functioning and
                 * thus kcopy() is mis-behaving.
                 */
                printf("sysctl_createv: sysctl_locate(%s) returned %d\n",
                       nnode.sysctl_name, error);
                sysctl_unlock();
                return (error);
        }
        error = sysctl_create(&name[ni], namelen - ni, &onode, &sz,
                              &nnode, sizeof(nnode), &name[0], NULL,
                              pnode);

        /*
         * unfortunately the node we wanted to create is already
         * there.  if the node that's already there is a reasonable
         * facsimile of the node we wanted to create, just pretend
         * (for the caller's benefit) that we managed to create the
         * node they wanted.
         */
        if (error == EEXIST) {
                /* name is the same as requested... */
                if (strcmp(nnode.sysctl_name, onode.sysctl_name) == 0 &&
                    /* they want the same function... */
                    nnode.sysctl_func == onode.sysctl_func &&
                    /* number is the same as requested, or... */
                    (nnode.sysctl_num == onode.sysctl_num ||
                     /* they didn't pick a number... */
                     nnode.sysctl_num == CTL_CREATE)) {
                        /*
                         * collision here from trying to create
                         * something that already existed; let's give
                         * our customers a hand and tell them they got
                         * what they wanted.
                         */
#ifdef SYSCTL_DEBUG_CREATE
                        printf("cleared\n");
#endif /* SYSCTL_DEBUG_CREATE */
                        error = 0;
                }
        }

        if (error == 0 &&
            (cnode != NULL || log != NULL || descr != NULL)) {
                /*
                 * sysctl_create() gave us back a copy of the node,
                 * but we need to know where it actually is...
                 */
                pnode = root;
                error = sysctl_locate(NULL, &name[0], namelen - 1, &pnode, &ni);
                snode = pnode;

                /*
                 * manual scan of last layer so that aliased nodes
                 * aren't followed.
                 */
                if (error == 0) {
                        for (ni = 0; ni < pnode->sysctl_clen; ni++)
                                if (pnode->sysctl_child[ni].sysctl_num ==
                                    onode.sysctl_num)
                                        break;
                        if (ni < pnode->sysctl_clen)
                                pnode = &pnode->sysctl_child[ni];
                        else
                                error = ENOENT;
                }

                /*
                 * not expecting an error here, but...
                 */
                if (error == 0) {
                        KASSERTMSG(pnode->sysctl_parent == snode,
                            "sysctl parent mis-match pnode %s, snode %s",
                            pnode->sysctl_name, snode->sysctl_name);
                        if (log != NULL)
                                sysctl_log_add(log, pnode);
                        if (cnode != NULL)
                                *cnode = pnode;
                        if (descr != NULL) {
                                /*
                                 * allow first caller to *set* a
                                 * description actually to set it
                                 * 
                                 * discard const here so we can attach
                                 * the description
                                 */
                                dnode = __UNCONST(pnode);
                                if (pnode->sysctl_desc != NULL)
                                        /* skip it...we've got one */;
                                else if (flags & CTLFLAG_OWNDESC) {
                                        size_t l = strlen(descr) + 1;
                                        char *d = malloc(l, M_SYSCTLDATA,
                                                         M_WAITOK);
                                        if (d != NULL) {
                                                memcpy(d, descr, l);
                                                dnode->sysctl_desc = d;
                                                dnode->sysctl_flags |=
                                                    CTLFLAG_OWNDESC;
                                        }
                                } else
                                        dnode->sysctl_desc = descr;
                        }
                } else {
                        printf("sysctl_create succeeded but node not found?!\n");
                        /*
                         *  confusing, but the create said it
                         * succeeded, so...
                         */
                        error = 0;
                }
        }

        /*
         * now it should be safe to release the lock state.  note that
         * the pointer to the newly created node being passed back may
         * not be "good" for very long.
         */
        sysctl_unlock();

        if (error != 0) {
                printf("sysctl_createv: sysctl_create(%s) returned %d\n",
                       nnode.sysctl_name, error);
#if 0
                if (error != ENOENT)
                        sysctl_dump(&onode);
#endif
        }

        return (error);
}

int
sysctl_destroyv(struct sysctlnode *rnode, ...)
{
        va_list ap;
        int error, name[CTL_MAXNAME], namelen, ni;
        const struct sysctlnode *pnode, *node;
        struct sysctlnode dnode, *onode;
        size_t sz;

        va_start(ap, rnode);
        namelen = 0;
        ni = 0;
        do {
                if (ni == CTL_MAXNAME) {
                        va_end(ap);
                        return (ENAMETOOLONG);
                }
                name[ni] = va_arg(ap, int);
        } while (name[ni++] != CTL_EOL);
        namelen = ni - 1;
        va_end(ap);

        /*
         * i can't imagine why we'd be destroying a node when the tree
         * wasn't complete, but who knows?
         */
        sysctl_lock(true);

        /*
         * where is it?
         */
        node = rnode;
        error = sysctl_locate(NULL, &name[0], namelen - 1, &node, &ni);
        if (error) {
                /* they want it gone and it's not there, so... */
                sysctl_unlock();
                return (error == ENOENT ? 0 : error);
        }

        /*
         * set up the deletion
         */
        pnode = node;
        node = &dnode;
        memset(&dnode, 0, sizeof(dnode));
        dnode.sysctl_flags = SYSCTL_VERSION;
        dnode.sysctl_num = name[namelen - 1];

        /*
         * we found it, now let's nuke it
         */
        name[namelen - 1] = CTL_DESTROY;
        sz = 0;
        error = sysctl_destroy(&name[namelen - 1], 1, NULL, &sz,
                               node, sizeof(*node), &name[0], NULL,
                               pnode);
        if (error == ENOTEMPTY) {
                /*
                 * think of trying to delete "foo" when "foo.bar"
                 * (which someone else put there) is still in
                 * existence
                 */
                error = 0;

                /*
                 * dunno who put the description there, but if this
                 * node can ever be removed, we need to make sure the
                 * string doesn't go out of context.  that means we
                 * need to find the node that's still there (don't use
                 * sysctl_locate() because that follows aliasing).
                 */
                node = pnode->sysctl_child;
                for (ni = 0; ni < pnode->sysctl_clen; ni++)
                        if (node[ni].sysctl_num == dnode.sysctl_num)
                                break;
                node = (ni < pnode->sysctl_clen) ? &node[ni] : NULL;

                /*
                 * if we found it, and this node has a description,
                 * and this node can be released, and it doesn't
                 * already own its own description...sigh.  :)
                 */
                if (node != NULL && node->sysctl_desc != NULL &&
                    !(node->sysctl_flags & CTLFLAG_PERMANENT) &&
                    !(node->sysctl_flags & CTLFLAG_OWNDESC)) {
                        char *d;

                        sz = strlen(node->sysctl_desc) + 1;
                        d = malloc(sz, M_SYSCTLDATA, M_WAITOK);
                        if (d != NULL) {
                                /*
                                 * discard const so that we can
                                 * re-attach the description
                                 */
                                memcpy(d, node->sysctl_desc, sz);
                                onode = __UNCONST(node);
                                onode->sysctl_desc = d;
                                onode->sysctl_flags |= CTLFLAG_OWNDESC;
                        } else {
                                /*
                                 * XXX drop the description?  be
                                 * afraid?  don't care?
                                 */
                        }
                }
        }

        sysctl_unlock();

        return (error);
}

/*
 * ********************************************************************
 * Deletes an entire n-ary tree.  Not recommended unless you know why
 * you're doing it.  Personally, I don't know why you'd even think
 * about it.
 * ********************************************************************
 */
void
sysctl_free(struct sysctlnode *rnode)
{
        struct sysctlnode *node, *pnode;

        rw_enter(&sysctl_treelock, RW_WRITER);

        if (rnode == NULL)
                rnode = &sysctl_root;

        if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
                printf("sysctl_free: rnode %p wrong version\n", rnode);
                rw_exit(&sysctl_treelock);
                return;
        }

        pnode = rnode;

        node = pnode->sysctl_child;
        do {
                while (node != NULL && pnode->sysctl_csize > 0) {
                        while (node <
                               &pnode->sysctl_child[pnode->sysctl_clen] &&
                               (SYSCTL_TYPE(node->sysctl_flags) !=
                                CTLTYPE_NODE ||
                                node->sysctl_csize == 0)) {
                                if (SYSCTL_FLAGS(node->sysctl_flags) &
                                    CTLFLAG_OWNDATA) {
                                        if (node->sysctl_data != NULL) {
                                                free(node->sysctl_data,
                                                     M_SYSCTLDATA);
                                                node->sysctl_data = NULL;
                                        }
                                }
                                if (SYSCTL_FLAGS(node->sysctl_flags) &
                                    CTLFLAG_OWNDESC) {
                                        if (node->sysctl_desc != NULL) {
                                                /*XXXUNCONST*/
                                                free(__UNCONST(node->sysctl_desc),
                                                     M_SYSCTLDATA);
                                                node->sysctl_desc = NULL;
                                        }
                                }
                                node++;
                        }
                        if (node < &pnode->sysctl_child[pnode->sysctl_clen]) {
                                pnode = node;
                                node = node->sysctl_child;
                        } else
                                break;
                }
                if (pnode->sysctl_child != NULL)
                        free(pnode->sysctl_child, M_SYSCTLNODE);
                pnode->sysctl_clen = 0;
                pnode->sysctl_csize = 0;
                pnode->sysctl_child = NULL;
                node = pnode;
                pnode = node->sysctl_parent;
        } while (pnode != NULL && node != rnode);

        rw_exit(&sysctl_treelock);
}

void
sysctl_log_print(const struct sysctllog *slog)
{
        int i, len;

        printf("root %p left %d size %d content", (const void *)slog->log_root,
            slog->log_left, slog->log_size);

        for (len = 0, i = slog->log_left; i < slog->log_size; i++) {
                switch (len) {
                case 0:
                        len = -1;
                        printf(" version %d", slog->log_num[i]);
                        break;
                case -1:
                        len = -2;
                        printf(" type %d", slog->log_num[i]);
                        break;
                case -2:
                        len =  slog->log_num[i];
                        printf(" len %d:", slog->log_num[i]);
                        if (len <= 0)
                                len = -1;
                        break;
                default:
                        len--;
                        printf(" %d", slog->log_num[i]);
                        break;
                }
        }
        printf(" end\n");
}

int
sysctl_log_add(struct sysctllog **logp, const struct sysctlnode *node)
{
        const int size0 = 16;
        int name[CTL_MAXNAME], namelen, i;
        const struct sysctlnode *pnode;
        struct sysctllog *log;

        if (node->sysctl_flags & CTLFLAG_PERMANENT)
                return (0);

        if (logp == NULL)
                return (0);

        if (*logp == NULL) {
                log = malloc(sizeof(struct sysctllog),
                       M_SYSCTLDATA, M_WAITOK);
                if (log == NULL) {
                        /* XXX print error message? */
                        return (-1);
                }
                log->log_num = malloc(size0 * sizeof(int),
                       M_SYSCTLDATA, M_WAITOK);
                if (log->log_num == NULL) {
                        /* XXX print error message? */
                        free(log, M_SYSCTLDATA);
                        return (-1);
                }
                memset(log->log_num, 0, size0 * sizeof(int));
                log->log_root = NULL;
                log->log_size = size0;
                log->log_left = size0;
                *logp = log;
        } else
                log = *logp;

        /*
         * check that the root is proper.  it's okay to record the
         * address of the root of a tree.  it's the only thing that's
         * guaranteed not to shift around as nodes come and go.
         */
        if (log->log_root == NULL)
                log->log_root = sysctl_rootof(node);
        else if (log->log_root != sysctl_rootof(node)) {
                printf("sysctl: log %p root mismatch (%p)\n",
                       log->log_root, sysctl_rootof(node));
                return (-1);
        }

        /*
         * we will copy out name in reverse order
         */
        for (pnode = node, namelen = 0;
             pnode != NULL && !(pnode->sysctl_flags & CTLFLAG_ROOT);
             pnode = pnode->sysctl_parent)
                name[namelen++] = pnode->sysctl_num;

        /*
         * do we have space?
         */
        if (log->log_left < (namelen + 3))
                sysctl_log_realloc(log);
        if (log->log_left < (namelen + 3))
                return (-1);

        /*
         * stuff name in, then namelen, then node type, and finally,
         * the version for non-node nodes.
         */
        for (i = 0; i < namelen && i < CTL_MAXNAME; i++)
                log->log_num[--log->log_left] = name[i];
        log->log_num[--log->log_left] = namelen;
        log->log_num[--log->log_left] = SYSCTL_TYPE(node->sysctl_flags);
        if (log->log_num[log->log_left] != CTLTYPE_NODE)
                log->log_num[--log->log_left] = node->sysctl_ver;
        else
                log->log_num[--log->log_left] = 0;

        return (0);
}

void
sysctl_teardown(struct sysctllog **logp)
{
        const struct sysctlnode *rnode;
        struct sysctlnode node;
        struct sysctllog *log;
        uint namelen;
        int *name, t, v, error, ni;
        size_t sz;

        if (logp == NULL || *logp == NULL)
                return;
        log = *logp;

        rw_enter(&sysctl_treelock, RW_WRITER);
        memset(&node, 0, sizeof(node));

        while (log->log_left < log->log_size) {
                KASSERT((log->log_left + 3 < log->log_size) &&
                        (log->log_left + log->log_num[log->log_left + 2] <=
                         log->log_size));
                v = log->log_num[log->log_left++];
                t = log->log_num[log->log_left++];
                namelen = log->log_num[log->log_left++];
                name = &log->log_num[log->log_left];

                node.sysctl_num = name[namelen - 1];
                node.sysctl_flags = SYSCTL_VERSION|t;
                node.sysctl_ver = v;

                rnode = log->log_root;
                error = sysctl_locate(NULL, &name[0], namelen, &rnode, &ni);
                if (error == 0) {
                        name[namelen - 1] = CTL_DESTROY;
                        rnode = rnode->sysctl_parent;
                        sz = 0;
                        (void)sysctl_destroy(&name[namelen - 1], 1, NULL,
                                             &sz, &node, sizeof(node),
                                             &name[0], NULL, rnode);
                }

                log->log_left += namelen;
        }

        KASSERT(log->log_size == log->log_left);
        free(log->log_num, M_SYSCTLDATA);
        free(log, M_SYSCTLDATA);
        *logp = NULL;

        rw_exit(&sysctl_treelock);
}

/*
 * ********************************************************************
 * old_sysctl -- A routine to bridge old-style internal calls to the
 * new infrastructure.
 * ********************************************************************
 */
int
old_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
           void *newp, size_t newlen, struct lwp *l)
{
        int error;
        size_t oldlen = 0;
        size_t savelen;

        if (oldlenp) {
                oldlen = *oldlenp;
        }
        savelen = oldlen;

        sysctl_lock(newp != NULL);
        error = sysctl_dispatch(name, namelen, oldp, &oldlen,
                                newp, newlen, name, l, NULL);
        sysctl_unlock();
        if (error == 0 && oldp != NULL && savelen < oldlen)
                error = ENOMEM;
        if (oldlenp) {
                *oldlenp = oldlen;
        }

        return (error);
}

/*
 * ********************************************************************
 * Section 4: Generic helper routines
 * ********************************************************************
 * "helper" routines that can do more finely grained access control,
 * construct structures from disparate information, create the
 * appearance of more nodes and sub-trees, etc.  for example, if
 * CTL_PROC wanted a helper function, it could respond to a CTL_QUERY
 * with a dynamically created list of nodes that represented the
 * currently running processes at that instant.
 * ********************************************************************
 */

/*
 * first, a few generic helpers that provide:
 *
 * sysctl_needfunc()                a readonly interface that emits a warning
 * sysctl_notavail()                returns EOPNOTSUPP (generic error)
 * sysctl_null()                an empty return buffer with no error
 */
int
sysctl_needfunc(SYSCTLFN_ARGS)
{
        int error;

        printf("!!SYSCTL_NEEDFUNC!!\n");

        if (newp != NULL || namelen != 0)
                return (EOPNOTSUPP);

        error = 0;
        if (oldp != NULL)
                error = sysctl_copyout(l, rnode->sysctl_data, oldp,
                                       MIN(rnode->sysctl_size, *oldlenp));
        *oldlenp = rnode->sysctl_size;

        return (error);
}

int
sysctl_notavail(SYSCTLFN_ARGS)
{

        if (namelen == 1 && name[0] == CTL_QUERY)
                return (sysctl_query(SYSCTLFN_CALL(rnode)));

        return (EOPNOTSUPP);
}

int
sysctl_null(SYSCTLFN_ARGS)
{

        *oldlenp = 0;

        return (0);
}

u_int
sysctl_map_flags(const u_int *map, u_int word)
{
        u_int rv;

        for (rv = 0; *map != 0; map += 2)
                if ((word & map[0]) != 0)
                        rv |= map[1];

        return rv;
}

/*
 * ********************************************************************
 * Section 5: The machinery that makes it all go
 * ********************************************************************
 * Memory "manglement" routines.  Not much to this, eh?
 * ********************************************************************
 */
static int
sysctl_alloc(struct sysctlnode *p, int x)
{
        int i;
        struct sysctlnode *n;

        assert(p->sysctl_child == NULL);

        if (x == 1)
                n = malloc(sizeof(struct sysctlnode),
                       M_SYSCTLNODE, M_WAITOK);
        else
                n = malloc(SYSCTL_DEFSIZE * sizeof(struct sysctlnode),
                       M_SYSCTLNODE, M_WAITOK);
        if (n == NULL)
                return (ENOMEM);

        if (x == 1) {
                memset(n, 0, sizeof(struct sysctlnode));
                p->sysctl_csize = 1;
        } else {
                memset(n, 0, SYSCTL_DEFSIZE * sizeof(struct sysctlnode));
                p->sysctl_csize = SYSCTL_DEFSIZE;
        }
        p->sysctl_clen = 0;

        for (i = 0; i < p->sysctl_csize; i++)
                n[i].sysctl_parent = p;

        p->sysctl_child = n;
        return (0);
}

static int
sysctl_realloc(struct sysctlnode *p)
{
        int i, j, olen;
        struct sysctlnode *n;

        assert(p->sysctl_csize == p->sysctl_clen);

        /*
         * how many do we have...how many should we make?
         */
        olen = p->sysctl_clen;
        n = malloc(2 * olen * sizeof(struct sysctlnode), M_SYSCTLNODE,
                   M_WAITOK);
        if (n == NULL)
                return (ENOMEM);

        /*
         * move old children over...initialize new children
         */
        memcpy(n, p->sysctl_child, olen * sizeof(struct sysctlnode));
        memset(&n[olen], 0, olen * sizeof(struct sysctlnode));
        p->sysctl_csize = 2 * olen;

        /*
         * reattach moved (and new) children to parent; if a moved
         * child node has children, reattach the parent pointers of
         * grandchildren
         */
        for (i = 0; i < p->sysctl_csize; i++) {
                n[i].sysctl_parent = p;
                if (n[i].sysctl_child != NULL) {
                        for (j = 0; j < n[i].sysctl_csize; j++)
                                n[i].sysctl_child[j].sysctl_parent = &n[i];
                }
        }

        /*
         * get out with the old and in with the new
         */
        free(p->sysctl_child, M_SYSCTLNODE);
        p->sysctl_child = n;

        return (0);
}

static int
sysctl_log_realloc(struct sysctllog *log)
{
        int *n, s, d;

        s = log->log_size * 2;
        d = log->log_size;

        n = malloc(s * sizeof(int), M_SYSCTLDATA, M_WAITOK);
        if (n == NULL)
                return (-1);

        memset(n, 0, s * sizeof(int));
        memcpy(&n[d], log->log_num, d * sizeof(int));
        free(log->log_num, M_SYSCTLDATA);
        log->log_num = n;
        if (d)
                log->log_left += d;
        else
                log->log_left = s;
        log->log_size = s;

        return (0);
}

/*
 * ********************************************************************
 * Section 6: Conversion between API versions wrt the sysctlnode
 * ********************************************************************
 */
static int
sysctl_cvt_in(struct lwp *l, int *vp, const void *i, size_t sz,
              struct sysctlnode *node)
{
        int error, flags;

        if (i == NULL || sz < sizeof(flags))
                return (EINVAL);

        error = sysctl_copyin(l, i, &flags, sizeof(flags));
        if (error)
                return (error);

#if (SYSCTL_VERSION != SYSCTL_VERS_1)
#error sysctl_cvt_in: no support for SYSCTL_VERSION
#endif /*  (SYSCTL_VERSION != SYSCTL_VERS_1) */

        if (sz == sizeof(*node) &&
            SYSCTL_VERS(flags) == SYSCTL_VERSION) {
                error = sysctl_copyin(l, i, node, sizeof(*node));
                if (error)
                        return (error);
                *vp = SYSCTL_VERSION;
                return (0);
        }

        return (EINVAL);
}

static int
sysctl_cvt_out(struct lwp *l, int v, const struct sysctlnode *i,
               void *ovp, size_t left, size_t *szp)
{
        size_t sz = sizeof(*i);
        const void *src = i;
        int error;

        switch (v) {
        case SYSCTL_VERS_0:
                return (EINVAL);

#if (SYSCTL_VERSION != SYSCTL_VERS_1)
#error sysctl_cvt_out: no support for SYSCTL_VERSION
#endif /*  (SYSCTL_VERSION != SYSCTL_VERS_1) */

        case SYSCTL_VERSION:
                /* nothing more to do here */
                break;
        }

        if (ovp != NULL && left >= sz) {
                error = sysctl_copyout(l, src, ovp, sz);
                if (error)
                        return (error);
        }

        if (szp != NULL)
                *szp = sz;

        return (0);
}

static uint8_t address_key[32];        /* key used in address hashing */
static ONCE_DECL(random_inithook);

static int
random_address_init(void)
{

        cprng_strong(kern_cprng, address_key, sizeof(address_key), 0);
        return 0;
}

void
hash_value(void *d, size_t ds, const void *s, size_t ss)
{

        RUN_ONCE(&random_inithook, random_address_init);
        blake2s(d, ds, address_key, sizeof(address_key), s, ss);
}
















































































































































































































  522 
  521 
  523 

  522 

   86 


   69 

   44 

   45 



   69 
    4 

   69 

   17 


  522 











  522 



  523 


  523 














  133 
  133 
  133 

  133 

   36 


   33 

   10 

   10 



   33 
    6 

   33 

    3 


  132 









  133 

  133 






















































































































































































































































































































































































 1527 





 1527 







 1811 























































































































































































































































































































    4 
































  680 





  680 














  679 


  681 





  682 

  682 











  682 
  681 

  680 

  681 



  681 






















 1291 
 1265 





  682 



  681 
 1290 






























 1291 




  522 
 1183 
 1289 
 1291 
 1291 










 1293 

  468 













 1292 
    4 


 1290 




 1291 




























 1286 






























 1290 












  766 

  995 







  994 
  901 

  569 



  522 
  766 


  523 



  523 
   69 
  461 
   17 

  522 
















  996 


  776 

































































  100 

  100 






  100 


















  100 






















  272 





  272 
  273 
  273 
  271 






  101 







  273 














































  273 
  240 









  134 
   35 
  246 
  101 









  273 
    2 


  272 



  134 







  134 


  241 







  272 







  273 
  272 



































   73 
   73 




   73 
   73 

   73 





   73 


    7 

    7 




   68 
   68 






   72 
















    3 
    3 
    3 

    3 
















   92 



   92 
    2 












































































 1463 


 1461 

 1461 

 1385 
 1382 

 1463 













   88 
   89 






   89 
   88 


   89 















   32 
   32 
   32 
   32 
   32 

   31 

   31 

















   22 
   22 
   22 
   21 
   21 














 1311 
 1308 






 1311 
 1183 













  212 
  212 
  211 
  149 














   11 
   11 
   11 
    8 










 1426 




























 1349 

  793 


 1247 



































































  185 

























 1330 











 1822 

 1202 


 1282 

 1005 
 1822 











 1304 

 1077 
  678 
 1304 
 1304 

 1302 


  738 


  120 







































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
/*        $NetBSD: uvm_page.c,v 1.250 2020/12/20 11:11:34 skrll Exp $        */

/*-
 * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * Copyright (c) 1991, 1993, The Regents of the University of California.
 *
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * The Mach Operating System project at Carnegie-Mellon University.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vm_page.c   8.3 (Berkeley) 3/21/94
 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
 *
 *
 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*
 * uvm_page.c: page ops.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.250 2020/12/20 11:11:34 skrll Exp $");

#include "opt_ddb.h"
#include "opt_uvm.h"
#include "opt_uvmhist.h"
#include "opt_readahead.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sched.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/proc.h>
#include <sys/radixtree.h>
#include <sys/atomic.h>
#include <sys/cpu.h>

#include <uvm/uvm.h>
#include <uvm/uvm_ddb.h>
#include <uvm/uvm_pdpolicy.h>
#include <uvm/uvm_pgflcache.h>

/*
 * number of pages per-CPU to reserve for the kernel.
 */
#ifndef        UVM_RESERVED_PAGES_PER_CPU
#define        UVM_RESERVED_PAGES_PER_CPU        5
#endif
int vm_page_reserve_kernel = UVM_RESERVED_PAGES_PER_CPU;

/*
 * physical memory size;
 */
psize_t physmem;

/*
 * local variables
 */

/*
 * these variables record the values returned by vm_page_bootstrap,
 * for debugging purposes.  The implementation of uvm_pageboot_alloc
 * and pmap_startup here also uses them internally.
 */

static vaddr_t      virtual_space_start;
static vaddr_t      virtual_space_end;

/*
 * we allocate an initial number of page colors in uvm_page_init(),
 * and remember them.  We may re-color pages as cache sizes are
 * discovered during the autoconfiguration phase.  But we can never
 * free the initial set of buckets, since they are allocated using
 * uvm_pageboot_alloc().
 */

static size_t recolored_pages_memsize /* = 0 */;
static char *recolored_pages_mem;

/*
 * freelist locks - one per bucket.
 */

union uvm_freelist_lock        uvm_freelist_locks[PGFL_MAX_BUCKETS]
    __cacheline_aligned;

/*
 * basic NUMA information.
 */

static struct uvm_page_numa_region {
        struct uvm_page_numa_region        *next;
        paddr_t                                start;
        paddr_t                                size;
        u_int                                numa_id;
} *uvm_page_numa_region;

#ifdef DEBUG
kmutex_t uvm_zerochecklock __cacheline_aligned;
vaddr_t uvm_zerocheckkva;
#endif /* DEBUG */

/*
 * These functions are reserved for uvm(9) internal use and are not
 * exported in the header file uvm_physseg.h
 *
 * Thus they are redefined here.
 */
void uvm_physseg_init_seg(uvm_physseg_t, struct vm_page *);
void uvm_physseg_seg_chomp_slab(uvm_physseg_t, struct vm_page *, size_t);

/* returns a pgs array */
struct vm_page *uvm_physseg_seg_alloc_from_slab(uvm_physseg_t, size_t);

/*
 * inline functions
 */

/*
 * uvm_pageinsert: insert a page in the object.
 *
 * => caller must lock object
 * => call should have already set pg's object and offset pointers
 *    and bumped the version counter
 */

static inline void
uvm_pageinsert_object(struct uvm_object *uobj, struct vm_page *pg)
{

        KASSERT(uobj == pg->uobject);
        KASSERT(rw_write_held(uobj->vmobjlock));
        KASSERT((pg->flags & PG_TABLED) == 0);

        if ((pg->flags & PG_STAT) != 0) {
                /* Cannot use uvm_pagegetdirty(): not yet in radix tree. */
                const unsigned int status = pg->flags & (PG_CLEAN | PG_DIRTY);

                if ((pg->flags & PG_FILE) != 0) {
                        if (uobj->uo_npages == 0) {
                                struct vnode *vp = (struct vnode *)uobj;
                                mutex_enter(vp->v_interlock);
                                KASSERT((vp->v_iflag & VI_PAGES) == 0);
                                vp->v_iflag |= VI_PAGES;
                                vholdl(vp);
                                mutex_exit(vp->v_interlock);
                        }
                        if (UVM_OBJ_IS_VTEXT(uobj)) {
                                cpu_count(CPU_COUNT_EXECPAGES, 1);
                        }
                        cpu_count(CPU_COUNT_FILEUNKNOWN + status, 1);
                } else {
                        cpu_count(CPU_COUNT_ANONUNKNOWN + status, 1);
                }
        }
        pg->flags |= PG_TABLED;
        uobj->uo_npages++;
}

static inline int
uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg)
{
        const uint64_t idx = pg->offset >> PAGE_SHIFT;
        int error;

        KASSERT(rw_write_held(uobj->vmobjlock));

        error = radix_tree_insert_node(&uobj->uo_pages, idx, pg);
        if (error != 0) {
                return error;
        }
        if ((pg->flags & PG_CLEAN) == 0) {
                uvm_obj_page_set_dirty(pg);
        }
        KASSERT(((pg->flags & PG_CLEAN) == 0) ==
                uvm_obj_page_dirty_p(pg));
        return 0;
}

/*
 * uvm_page_remove: remove page from object.
 *
 * => caller must lock object
 */

static inline void
uvm_pageremove_object(struct uvm_object *uobj, struct vm_page *pg)
{

        KASSERT(uobj == pg->uobject);
        KASSERT(rw_write_held(uobj->vmobjlock));
        KASSERT(pg->flags & PG_TABLED);

        if ((pg->flags & PG_STAT) != 0) {
                /* Cannot use uvm_pagegetdirty(): no longer in radix tree. */
                const unsigned int status = pg->flags & (PG_CLEAN | PG_DIRTY);

                if ((pg->flags & PG_FILE) != 0) {
                        if (uobj->uo_npages == 1) {
                                struct vnode *vp = (struct vnode *)uobj;
                                mutex_enter(vp->v_interlock);
                                KASSERT((vp->v_iflag & VI_PAGES) != 0);
                                vp->v_iflag &= ~VI_PAGES;
                                holdrelel(vp);
                                mutex_exit(vp->v_interlock);
                        }
                        if (UVM_OBJ_IS_VTEXT(uobj)) {
                                cpu_count(CPU_COUNT_EXECPAGES, -1);
                        }
                        cpu_count(CPU_COUNT_FILEUNKNOWN + status, -1);
                } else {
                        cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1);
                }
        }
        uobj->uo_npages--;
        pg->flags &= ~PG_TABLED;
        pg->uobject = NULL;
}

static inline void
uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg)
{
        struct vm_page *opg __unused;

        KASSERT(rw_write_held(uobj->vmobjlock));

        opg = radix_tree_remove_node(&uobj->uo_pages, pg->offset >> PAGE_SHIFT);
        KASSERT(pg == opg);
}

static void
uvm_page_init_bucket(struct pgfreelist *pgfl, struct pgflbucket *pgb, int num)
{
        int i;

        pgb->pgb_nfree = 0;
        for (i = 0; i < uvmexp.ncolors; i++) {
                LIST_INIT(&pgb->pgb_colors[i]);
        }
        pgfl->pgfl_buckets[num] = pgb;
}

/*
 * uvm_page_init: init the page system.   called from uvm_init().
 *
 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
 */

void
uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
{
        static struct uvm_cpu boot_cpu __cacheline_aligned;
        psize_t freepages, pagecount, bucketsize, n;
        struct pgflbucket *pgb;
        struct vm_page *pagearray;
        char *bucketarray;
        uvm_physseg_t bank;
        int fl, b;

        KASSERT(ncpu <= 1);

        /*
         * init the page queues and free page queue locks, except the
         * free list; we allocate that later (with the initial vm_page
         * structures).
         */

        curcpu()->ci_data.cpu_uvm = &boot_cpu;
        uvmpdpol_init();
        for (b = 0; b < __arraycount(uvm_freelist_locks); b++) {
                mutex_init(&uvm_freelist_locks[b].lock, MUTEX_DEFAULT, IPL_VM);
        }

        /*
         * allocate vm_page structures.
         */

        /*
         * sanity check:
         * before calling this function the MD code is expected to register
         * some free RAM with the uvm_page_physload() function.   our job
         * now is to allocate vm_page structures for this memory.
         */

        if (uvm_physseg_get_last() == UVM_PHYSSEG_TYPE_INVALID)
                panic("uvm_page_bootstrap: no memory pre-allocated");

        /*
         * first calculate the number of free pages...
         *
         * note that we use start/end rather than avail_start/avail_end.
         * this allows us to allocate extra vm_page structures in case we
         * want to return some memory to the pool after booting.
         */

        freepages = 0;

        for (bank = uvm_physseg_get_first();
             uvm_physseg_valid_p(bank) ;
             bank = uvm_physseg_get_next(bank)) {
                freepages += (uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank));
        }

        /*
         * Let MD code initialize the number of colors, or default
         * to 1 color if MD code doesn't care.
         */
        if (uvmexp.ncolors == 0)
                uvmexp.ncolors = 1;
        uvmexp.colormask = uvmexp.ncolors - 1;
        KASSERT((uvmexp.colormask & uvmexp.ncolors) == 0);

        /* We always start with only 1 bucket. */
        uvm.bucketcount = 1;

        /*
         * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
         * use.   for each page of memory we use we need a vm_page structure.
         * thus, the total number of pages we can use is the total size of
         * the memory divided by the PAGE_SIZE plus the size of the vm_page
         * structure.   we add one to freepages as a fudge factor to avoid
         * truncation errors (since we can only allocate in terms of whole
         * pages).
         */
        pagecount = ((freepages + 1) << PAGE_SHIFT) /
            (PAGE_SIZE + sizeof(struct vm_page));
        bucketsize = offsetof(struct pgflbucket, pgb_colors[uvmexp.ncolors]);
        bucketsize = roundup2(bucketsize, coherency_unit);
        bucketarray = (void *)uvm_pageboot_alloc(
            bucketsize * VM_NFREELIST +
            pagecount * sizeof(struct vm_page));
        pagearray = (struct vm_page *)
            (bucketarray + bucketsize * VM_NFREELIST);

        for (fl = 0; fl < VM_NFREELIST; fl++) {
                pgb = (struct pgflbucket *)(bucketarray + bucketsize * fl);
                uvm_page_init_bucket(&uvm.page_free[fl], pgb, 0);
        }
        memset(pagearray, 0, pagecount * sizeof(struct vm_page));

        /*
         * init the freelist cache in the disabled state.
         */
        uvm_pgflcache_init();

        /*
         * init the vm_page structures and put them in the correct place.
         */
        /* First init the extent */

        for (bank = uvm_physseg_get_first(),
                 uvm_physseg_seg_chomp_slab(bank, pagearray, pagecount);
             uvm_physseg_valid_p(bank);
             bank = uvm_physseg_get_next(bank)) {

                n = uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank);
                uvm_physseg_seg_alloc_from_slab(bank, n);
                uvm_physseg_init_seg(bank, pagearray);

                /* set up page array pointers */
                pagearray += n;
                pagecount -= n;
        }

        /*
         * pass up the values of virtual_space_start and
         * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
         * layers of the VM.
         */

        *kvm_startp = round_page(virtual_space_start);
        *kvm_endp = trunc_page(virtual_space_end);

        /*
         * init various thresholds.
         */

        uvmexp.reserve_pagedaemon = 1;
        uvmexp.reserve_kernel = vm_page_reserve_kernel;

        /*
         * done!
         */

        uvm.page_init_done = true;
}

/*
 * uvm_pgfl_lock: lock all freelist buckets
 */

void
uvm_pgfl_lock(void)
{
        int i;

        for (i = 0; i < __arraycount(uvm_freelist_locks); i++) {
                mutex_spin_enter(&uvm_freelist_locks[i].lock);
        }
}

/*
 * uvm_pgfl_unlock: unlock all freelist buckets
 */

void
uvm_pgfl_unlock(void)
{
        int i;

        for (i = 0; i < __arraycount(uvm_freelist_locks); i++) {
                mutex_spin_exit(&uvm_freelist_locks[i].lock);
        }
}

/*
 * uvm_setpagesize: set the page size
 *
 * => sets page_shift and page_mask from uvmexp.pagesize.
 */

void
uvm_setpagesize(void)
{

        /*
         * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE
         * to be a constant (indicated by being a non-zero value).
         */
        if (uvmexp.pagesize == 0) {
                if (PAGE_SIZE == 0)
                        panic("uvm_setpagesize: uvmexp.pagesize not set");
                uvmexp.pagesize = PAGE_SIZE;
        }
        uvmexp.pagemask = uvmexp.pagesize - 1;
        if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
                panic("uvm_setpagesize: page size %u (%#x) not a power of two",
                    uvmexp.pagesize, uvmexp.pagesize);
        for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
                if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
                        break;
}

/*
 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
 */

vaddr_t
uvm_pageboot_alloc(vsize_t size)
{
        static bool initialized = false;
        vaddr_t addr;
#if !defined(PMAP_STEAL_MEMORY)
        vaddr_t vaddr;
        paddr_t paddr;
#endif

        /*
         * on first call to this function, initialize ourselves.
         */
        if (initialized == false) {
                pmap_virtual_space(&virtual_space_start, &virtual_space_end);

                /* round it the way we like it */
                virtual_space_start = round_page(virtual_space_start);
                virtual_space_end = trunc_page(virtual_space_end);

                initialized = true;
        }

        /* round to page size */
        size = round_page(size);
        uvmexp.bootpages += atop(size);

#if defined(PMAP_STEAL_MEMORY)

        /*
         * defer bootstrap allocation to MD code (it may want to allocate
         * from a direct-mapped segment).  pmap_steal_memory should adjust
         * virtual_space_start/virtual_space_end if necessary.
         */

        addr = pmap_steal_memory(size, &virtual_space_start,
            &virtual_space_end);

        return addr;

#else /* !PMAP_STEAL_MEMORY */

        /*
         * allocate virtual memory for this request
         */
        if (virtual_space_start == virtual_space_end ||
            (virtual_space_end - virtual_space_start) < size)
                panic("uvm_pageboot_alloc: out of virtual space");

        addr = virtual_space_start;

#ifdef PMAP_GROWKERNEL
        /*
         * If the kernel pmap can't map the requested space,
         * then allocate more resources for it.
         */
        if (uvm_maxkaddr < (addr + size)) {
                uvm_maxkaddr = pmap_growkernel(addr + size);
                if (uvm_maxkaddr < (addr + size))
                        panic("uvm_pageboot_alloc: pmap_growkernel() failed");
        }
#endif

        virtual_space_start += size;

        /*
         * allocate and mapin physical pages to back new virtual pages
         */

        for (vaddr = round_page(addr) ; vaddr < addr + size ;
            vaddr += PAGE_SIZE) {

                if (!uvm_page_physget(&paddr))
                        panic("uvm_pageboot_alloc: out of memory");

                /*
                 * Note this memory is no longer managed, so using
                 * pmap_kenter is safe.
                 */
                pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
        }
        pmap_update(pmap_kernel());
        return addr;
#endif        /* PMAP_STEAL_MEMORY */
}

#if !defined(PMAP_STEAL_MEMORY)
/*
 * uvm_page_physget: "steal" one page from the vm_physmem structure.
 *
 * => attempt to allocate it off the end of a segment in which the "avail"
 *    values match the start/end values.   if we can't do that, then we
 *    will advance both values (making them equal, and removing some
 *    vm_page structures from the non-avail area).
 * => return false if out of memory.
 */

/* subroutine: try to allocate from memory chunks on the specified freelist */
static bool uvm_page_physget_freelist(paddr_t *, int);

static bool
uvm_page_physget_freelist(paddr_t *paddrp, int freelist)
{
        uvm_physseg_t lcv;

        /* pass 1: try allocating from a matching end */
#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
        for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv))
#else
        for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv))
#endif
        {
                if (uvm.page_init_done == true)
                        panic("uvm_page_physget: called _after_ bootstrap");

                /* Try to match at front or back on unused segment */
                if (uvm_page_physunload(lcv, freelist, paddrp))
                        return true;
        }

        /* pass2: forget about matching ends, just allocate something */
#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
        for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv))
#else
        for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv))
#endif
        {
                /* Try the front regardless. */
                if (uvm_page_physunload_force(lcv, freelist, paddrp))
                        return true;
        }
        return false;
}

bool
uvm_page_physget(paddr_t *paddrp)
{
        int i;

        /* try in the order of freelist preference */
        for (i = 0; i < VM_NFREELIST; i++)
                if (uvm_page_physget_freelist(paddrp, i) == true)
                        return (true);
        return (false);
}
#endif /* PMAP_STEAL_MEMORY */

/*
 * PHYS_TO_VM_PAGE: find vm_page for a PA.   used by MI code to get vm_pages
 * back from an I/O mapping (ugh!).   used in some MD code as well.
 */
struct vm_page *
uvm_phys_to_vm_page(paddr_t pa)
{
        paddr_t pf = atop(pa);
        paddr_t        off;
        uvm_physseg_t        upm;

        upm = uvm_physseg_find(pf, &off);
        if (upm != UVM_PHYSSEG_TYPE_INVALID)
                return uvm_physseg_get_pg(upm, off);
        return(NULL);
}

paddr_t
uvm_vm_page_to_phys(const struct vm_page *pg)
{

        return pg->phys_addr & ~(PAGE_SIZE - 1);
}

/*
 * uvm_page_numa_load: load NUMA range description.
 */
void
uvm_page_numa_load(paddr_t start, paddr_t size, u_int numa_id)
{
        struct uvm_page_numa_region *d;

        KASSERT(numa_id < PGFL_MAX_BUCKETS);

        d = kmem_alloc(sizeof(*d), KM_SLEEP);
        d->start = start;
        d->size = size;
        d->numa_id = numa_id;
        d->next = uvm_page_numa_region;
        uvm_page_numa_region = d;
}

/*
 * uvm_page_numa_lookup: lookup NUMA node for the given page.
 */
static u_int
uvm_page_numa_lookup(struct vm_page *pg)
{
        struct uvm_page_numa_region *d;
        static bool warned;
        paddr_t pa;

        KASSERT(uvm_page_numa_region != NULL);

        pa = VM_PAGE_TO_PHYS(pg);
        for (d = uvm_page_numa_region; d != NULL; d = d->next) {
                if (pa >= d->start && pa < d->start + d->size) {
                        return d->numa_id;
                }
        }

        if (!warned) {
                printf("uvm_page_numa_lookup: failed, first pg=%p pa=%#"
                    PRIxPADDR "\n", pg, VM_PAGE_TO_PHYS(pg));
                warned = true;
        }

        return 0;
}

/*
 * uvm_page_redim: adjust freelist dimensions if they have changed.
 */

static void
uvm_page_redim(int newncolors, int newnbuckets)
{
        struct pgfreelist npgfl;
        struct pgflbucket *opgb, *npgb;
        struct pgflist *ohead, *nhead;
        struct vm_page *pg;
        size_t bucketsize, bucketmemsize, oldbucketmemsize;
        int fl, ob, oc, nb, nc, obuckets, ocolors;
        char *bucketarray, *oldbucketmem, *bucketmem;

        KASSERT(((newncolors - 1) & newncolors) == 0);

        /* Anything to do? */
        if (newncolors <= uvmexp.ncolors &&
            newnbuckets == uvm.bucketcount) {
                return;
        }
        if (uvm.page_init_done == false) {
                uvmexp.ncolors = newncolors;
                return;
        }

        bucketsize = offsetof(struct pgflbucket, pgb_colors[newncolors]);
        bucketsize = roundup2(bucketsize, coherency_unit);
        bucketmemsize = bucketsize * newnbuckets * VM_NFREELIST +
            coherency_unit - 1;
        bucketmem = kmem_zalloc(bucketmemsize, KM_SLEEP);
        bucketarray = (char *)roundup2((uintptr_t)bucketmem, coherency_unit);

        ocolors = uvmexp.ncolors;
        obuckets = uvm.bucketcount;

        /* Freelist cache musn't be enabled. */
        uvm_pgflcache_pause();

        /* Make sure we should still do this. */
        uvm_pgfl_lock();
        if (newncolors <= uvmexp.ncolors &&
            newnbuckets == uvm.bucketcount) {
                uvm_pgfl_unlock();
                uvm_pgflcache_resume();
                kmem_free(bucketmem, bucketmemsize);
                return;
        }

        uvmexp.ncolors = newncolors;
        uvmexp.colormask = uvmexp.ncolors - 1;
        uvm.bucketcount = newnbuckets;

        for (fl = 0; fl < VM_NFREELIST; fl++) {
                /* Init new buckets in new freelist. */
                memset(&npgfl, 0, sizeof(npgfl));
                for (nb = 0; nb < newnbuckets; nb++) {
                        npgb = (struct pgflbucket *)bucketarray;
                        uvm_page_init_bucket(&npgfl, npgb, nb);
                        bucketarray += bucketsize;
                }
                /* Now transfer pages from the old freelist. */
                for (nb = ob = 0; ob < obuckets; ob++) {
                        opgb = uvm.page_free[fl].pgfl_buckets[ob];
                        for (oc = 0; oc < ocolors; oc++) {
                                ohead = &opgb->pgb_colors[oc];
                                while ((pg = LIST_FIRST(ohead)) != NULL) {
                                        LIST_REMOVE(pg, pageq.list);
                                        /*
                                         * Here we decide on the NEW color &
                                         * bucket for the page.  For NUMA
                                         * we'll use the info that the
                                         * hardware gave us.  For non-NUMA
                                         * assign take physical page frame
                                         * number and cache color into
                                         * account.  We do this to try and
                                         * avoid defeating any memory
                                         * interleaving in the hardware.
                                         */
                                        KASSERT(
                                            uvm_page_get_bucket(pg) == ob);
                                        KASSERT(fl ==
                                            uvm_page_get_freelist(pg));
                                        if (uvm_page_numa_region != NULL) {
                                                nb = uvm_page_numa_lookup(pg);
                                        } else {
                                                nb = atop(VM_PAGE_TO_PHYS(pg))
                                                    / uvmexp.ncolors / 8
                                                    % newnbuckets;
                                        }
                                        uvm_page_set_bucket(pg, nb);
                                        npgb = npgfl.pgfl_buckets[nb];
                                        npgb->pgb_nfree++;
                                        nc = VM_PGCOLOR(pg);
                                        nhead = &npgb->pgb_colors[nc];
                                        LIST_INSERT_HEAD(nhead, pg, pageq.list);
                                }
                        }
                }
                /* Install the new freelist. */
                memcpy(&uvm.page_free[fl], &npgfl, sizeof(npgfl));
        }

        /* Unlock and free the old memory. */
        oldbucketmemsize = recolored_pages_memsize;
        oldbucketmem = recolored_pages_mem;
        recolored_pages_memsize = bucketmemsize;
        recolored_pages_mem = bucketmem;

        uvm_pgfl_unlock();
        uvm_pgflcache_resume();

        if (oldbucketmemsize) {
                kmem_free(oldbucketmem, oldbucketmemsize);
        }

        /*
         * this calls uvm_km_alloc() which may want to hold
         * uvm_freelist_lock.
         */
        uvm_pager_realloc_emerg();
}

/*
 * uvm_page_recolor: Recolor the pages if the new color count is
 * larger than the old one.
 */

void
uvm_page_recolor(int newncolors)
{

        uvm_page_redim(newncolors, uvm.bucketcount);
}

/*
 * uvm_page_rebucket: Determine a bucket structure and redim the free
 * lists to match.
 */

void
uvm_page_rebucket(void)
{
        u_int min_numa, max_numa, npackage, shift;
        struct cpu_info *ci, *ci2, *ci3;
        CPU_INFO_ITERATOR cii;

        /*
         * If we have more than one NUMA node, and the maximum NUMA node ID
         * is less than PGFL_MAX_BUCKETS, then we'll use NUMA distribution
         * for free pages.
         */
        min_numa = (u_int)-1;
        max_numa = 0;
        for (CPU_INFO_FOREACH(cii, ci)) {
                if (ci->ci_numa_id < min_numa) {
                        min_numa = ci->ci_numa_id;
                }
                if (ci->ci_numa_id > max_numa) {
                        max_numa = ci->ci_numa_id;
                }
        }
        if (min_numa != max_numa && max_numa < PGFL_MAX_BUCKETS) {
                aprint_debug("UVM: using NUMA allocation scheme\n");
                for (CPU_INFO_FOREACH(cii, ci)) {
                        ci->ci_data.cpu_uvm->pgflbucket = ci->ci_numa_id;
                }
                 uvm_page_redim(uvmexp.ncolors, max_numa + 1);
                 return;
        }

        /*
         * Otherwise we'll go with a scheme to maximise L2/L3 cache locality
         * and minimise lock contention.  Count the total number of CPU
         * packages, and then try to distribute the buckets among CPU
         * packages evenly.
         */
        npackage = curcpu()->ci_nsibling[CPUREL_PACKAGE1ST];

        /*
         * Figure out how to arrange the packages & buckets, and the total
         * number of buckets we need.  XXX 2 may not be the best factor.
         */
        for (shift = 0; npackage > PGFL_MAX_BUCKETS; shift++) {
                npackage >>= 1;
        }
         uvm_page_redim(uvmexp.ncolors, npackage);

         /*
          * Now tell each CPU which bucket to use.  In the outer loop, scroll
          * through all CPU packages.
          */
         npackage = 0;
        ci = curcpu();
        ci2 = ci->ci_sibling[CPUREL_PACKAGE1ST];
        do {
                /*
                 * In the inner loop, scroll through all CPUs in the package
                 * and assign the same bucket ID.
                 */
                ci3 = ci2;
                do {
                        ci3->ci_data.cpu_uvm->pgflbucket = npackage >> shift;
                        ci3 = ci3->ci_sibling[CPUREL_PACKAGE];
                } while (ci3 != ci2);
                npackage++;
                ci2 = ci2->ci_sibling[CPUREL_PACKAGE1ST];
        } while (ci2 != ci->ci_sibling[CPUREL_PACKAGE1ST]);

        aprint_debug("UVM: using package allocation scheme, "
            "%d package(s) per bucket\n", 1 << shift);
}

/*
 * uvm_cpu_attach: initialize per-CPU data structures.
 */

void
uvm_cpu_attach(struct cpu_info *ci)
{
        struct uvm_cpu *ucpu;

        /* Already done in uvm_page_init(). */
        if (!CPU_IS_PRIMARY(ci)) {
                /* Add more reserve pages for this CPU. */
                uvmexp.reserve_kernel += vm_page_reserve_kernel;

                /* Allocate per-CPU data structures. */
                ucpu = kmem_zalloc(sizeof(struct uvm_cpu) + coherency_unit - 1,
                    KM_SLEEP);
                ucpu = (struct uvm_cpu *)roundup2((uintptr_t)ucpu,
                    coherency_unit);
                ci->ci_data.cpu_uvm = ucpu;
        } else {
                ucpu = ci->ci_data.cpu_uvm;
        }

        uvmpdpol_init_cpu(ucpu);

        /*
         * Attach RNG source for this CPU's VM events
         */
        rnd_attach_source(&ucpu->rs, ci->ci_data.cpu_name, RND_TYPE_VM,
            RND_FLAG_COLLECT_TIME|RND_FLAG_COLLECT_VALUE|
            RND_FLAG_ESTIMATE_VALUE);
}

/*
 * uvm_availmem: fetch the total amount of free memory in pages.  this can
 * have a detrimental effect on performance due to false sharing; don't call
 * unless needed.
 *
 * some users can request the amount of free memory so often that it begins
 * to impact upon performance.  if calling frequently and an inexact value
 * is okay, call with cached = true.
 */

int
uvm_availmem(bool cached)
{
        int64_t fp;

        cpu_count_sync(cached);
        if ((fp = cpu_count_get(CPU_COUNT_FREEPAGES)) < 0) {
                /*
                 * XXXAD could briefly go negative because it's impossible
                 * to get a clean snapshot.  address this for other counters
                 * used as running totals before NetBSD 10 although less
                 * important for those.
                 */
                fp = 0;
        }
        return (int)fp;
}

/*
 * uvm_pagealloc_pgb: helper routine that tries to allocate any color from a
 * specific freelist and specific bucket only.
 *
 * => must be at IPL_VM or higher to protect per-CPU data structures.
 */

static struct vm_page *
uvm_pagealloc_pgb(struct uvm_cpu *ucpu, int f, int b, int *trycolorp, int flags)
{
        int c, trycolor, colormask;
        struct pgflbucket *pgb;
        struct vm_page *pg;
        kmutex_t *lock;
        bool fill;

        /*
         * Skip the bucket if empty, no lock needed.  There could be many
         * empty freelists/buckets.
         */
        pgb = uvm.page_free[f].pgfl_buckets[b];
        if (pgb->pgb_nfree == 0) {
                return NULL;
        }

        /* Skip bucket if low on memory. */
        lock = &uvm_freelist_locks[b].lock;
        mutex_spin_enter(lock);
        if (__predict_false(pgb->pgb_nfree <= uvmexp.reserve_kernel)) {
                if ((flags & UVM_PGA_USERESERVE) == 0 ||
                    (pgb->pgb_nfree <= uvmexp.reserve_pagedaemon &&
                     curlwp != uvm.pagedaemon_lwp)) {
                        mutex_spin_exit(lock);
                             return NULL;
                }
                fill = false;
        } else {
                fill = true;
        }

        /* Try all page colors as needed. */
        c = trycolor = *trycolorp;
        colormask = uvmexp.colormask;
        do {
                pg = LIST_FIRST(&pgb->pgb_colors[c]);
                if (__predict_true(pg != NULL)) {
                        /*
                         * Got a free page!  PG_FREE must be cleared under
                         * lock because of uvm_pglistalloc().
                         */
                        LIST_REMOVE(pg, pageq.list);
                        KASSERT(pg->flags == PG_FREE);
                        pg->flags = PG_BUSY | PG_CLEAN | PG_FAKE;
                        pgb->pgb_nfree--;
                        CPU_COUNT(CPU_COUNT_FREEPAGES, -1);

                        /*
                         * While we have the bucket locked and our data
                         * structures fresh in L1 cache, we have an ideal
                         * opportunity to grab some pages for the freelist
                         * cache without causing extra contention.  Only do
                         * so if we found pages in this CPU's preferred
                         * bucket.
                         */
                        if (__predict_true(b == ucpu->pgflbucket && fill)) {
                                uvm_pgflcache_fill(ucpu, f, b, c);
                        }
                        mutex_spin_exit(lock);
                        KASSERT(uvm_page_get_bucket(pg) == b);
                        CPU_COUNT(c == trycolor ?
                            CPU_COUNT_COLORHIT : CPU_COUNT_COLORMISS, 1);
                        CPU_COUNT(CPU_COUNT_CPUMISS, 1);
                        *trycolorp = c;
                        return pg;
                }
                c = (c + 1) & colormask;
        } while (c != trycolor);
        mutex_spin_exit(lock);

        return NULL;
}

/*
 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat that allocates
 * any color from any bucket, in a specific freelist.
 *
 * => must be at IPL_VM or higher to protect per-CPU data structures.
 */

static struct vm_page *
uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int f, int *trycolorp, int flags)
{
        int b, trybucket, bucketcount;
        struct vm_page *pg;

        /* Try for the exact thing in the per-CPU cache. */
        if ((pg = uvm_pgflcache_alloc(ucpu, f, *trycolorp)) != NULL) {
                CPU_COUNT(CPU_COUNT_CPUHIT, 1);
                CPU_COUNT(CPU_COUNT_COLORHIT, 1);
                return pg;
        }

        /* Walk through all buckets, trying our preferred bucket first. */
        trybucket = ucpu->pgflbucket;
        b = trybucket;
        bucketcount = uvm.bucketcount;
        do {
                pg = uvm_pagealloc_pgb(ucpu, f, b, trycolorp, flags);
                if (pg != NULL) {
                        return pg;
                }
                b = (b + 1 == bucketcount ? 0 : b + 1);
        } while (b != trybucket);

        return NULL;
}

/*
 * uvm_pagealloc_strat: allocate vm_page from a particular free list.
 *
 * => return null if no pages free
 * => wake up pagedaemon if number of free pages drops below low water mark
 * => if obj != NULL, obj must be locked (to put in obj's tree)
 * => if anon != NULL, anon must be locked (to put in anon)
 * => only one of obj or anon can be non-null
 * => caller must activate/deactivate page if it is not wired.
 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
 * => policy decision: it is more important to pull a page off of the
 *        appropriate priority free list than it is to get a page from the
 *        correct bucket or color bin.  This is because we live with the
 *        consequences of a bad free list decision for the entire
 *        lifetime of the page, e.g. if the page comes from memory that
 *        is slower to access.
 */

struct vm_page *
uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
    int flags, int strat, int free_list)
{
        int color, lcv, error, s;
        struct uvm_cpu *ucpu;
        struct vm_page *pg;
        lwp_t *l;

        KASSERT(obj == NULL || anon == NULL);
        KASSERT(anon == NULL || (flags & UVM_FLAG_COLORMATCH) || off == 0);
        KASSERT(off == trunc_page(off));
        KASSERT(obj == NULL || rw_write_held(obj->vmobjlock));
        KASSERT(anon == NULL || anon->an_lock == NULL ||
            rw_write_held(anon->an_lock));

        /*
         * This implements a global round-robin page coloring
         * algorithm.
         */

        s = splvm();
        ucpu = curcpu()->ci_data.cpu_uvm;
        if (flags & UVM_FLAG_COLORMATCH) {
                color = atop(off) & uvmexp.colormask;
        } else {
                color = ucpu->pgflcolor;
        }

        /*
         * fail if any of these conditions is true:
         * [1]  there really are no free pages, or
         * [2]  only kernel "reserved" pages remain and
         *        reserved pages have not been requested.
         * [3]  only pagedaemon "reserved" pages remain and
         *        the requestor isn't the pagedaemon.
         * we make kernel reserve pages available if called by a
         * kernel thread.
         */
        l = curlwp;
        if (__predict_true(l != NULL) && (l->l_flag & LW_SYSTEM) != 0) {
                flags |= UVM_PGA_USERESERVE;
        }

 again:
        switch (strat) {
        case UVM_PGA_STRAT_NORMAL:
                /* Check freelists: descending priority (ascending id) order. */
                for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
                        pg = uvm_pagealloc_pgfl(ucpu, lcv, &color, flags);
                        if (pg != NULL) {
                                goto gotit;
                        }
                }

                /* No pages free!  Have pagedaemon free some memory. */
                splx(s);
                uvm_kick_pdaemon();
                return NULL;

        case UVM_PGA_STRAT_ONLY:
        case UVM_PGA_STRAT_FALLBACK:
                /* Attempt to allocate from the specified free list. */
                KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
                pg = uvm_pagealloc_pgfl(ucpu, free_list, &color, flags);
                if (pg != NULL) {
                        goto gotit;
                }

                /* Fall back, if possible. */
                if (strat == UVM_PGA_STRAT_FALLBACK) {
                        strat = UVM_PGA_STRAT_NORMAL;
                        goto again;
                }

                /* No pages free!  Have pagedaemon free some memory. */
                splx(s);
                uvm_kick_pdaemon();
                return NULL;

        case UVM_PGA_STRAT_NUMA:
                /*
                 * NUMA strategy (experimental): allocating from the correct
                 * bucket is more important than observing freelist
                 * priority.  Look only to the current NUMA node; if that
                 * fails, we need to look to other NUMA nodes, so retry with
                 * the normal strategy.
                 */
                for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
                        pg = uvm_pgflcache_alloc(ucpu, lcv, color);
                        if (pg != NULL) {
                                CPU_COUNT(CPU_COUNT_CPUHIT, 1);
                                CPU_COUNT(CPU_COUNT_COLORHIT, 1);
                                goto gotit;
                        }
                        pg = uvm_pagealloc_pgb(ucpu, lcv,
                            ucpu->pgflbucket, &color, flags);
                        if (pg != NULL) {
                                goto gotit;
                        }
                }
                strat = UVM_PGA_STRAT_NORMAL;
                goto again;

        default:
                panic("uvm_pagealloc_strat: bad strat %d", strat);
                /* NOTREACHED */
        }

 gotit:
        /*
         * We now know which color we actually allocated from; set
         * the next color accordingly.
         */

        ucpu->pgflcolor = (color + 1) & uvmexp.colormask;

        /*
         * while still at IPL_VM, update allocation statistics.
         */

        if (anon) {
                CPU_COUNT(CPU_COUNT_ANONCLEAN, 1);
        }
        splx(s);
        KASSERT(pg->flags == (PG_BUSY|PG_CLEAN|PG_FAKE));

        /*
         * assign the page to the object.  as the page was free, we know
         * that pg->uobject and pg->uanon are NULL.  we only need to take
         * the page's interlock if we are changing the values.
         */
        if (anon != NULL || obj != NULL) {
                mutex_enter(&pg->interlock);
        }
        pg->offset = off;
        pg->uobject = obj;
        pg->uanon = anon;
        KASSERT(uvm_page_owner_locked_p(pg, true));
        if (anon) {
                anon->an_page = pg;
                pg->flags |= PG_ANON;
                mutex_exit(&pg->interlock);
        } else if (obj) {
                /*
                 * set PG_FILE|PG_AOBJ before the first uvm_pageinsert.
                 */
                if (UVM_OBJ_IS_VNODE(obj)) {
                        pg->flags |= PG_FILE;
                } else if (UVM_OBJ_IS_AOBJ(obj)) {
                        pg->flags |= PG_AOBJ;
                }
                uvm_pageinsert_object(obj, pg);
                mutex_exit(&pg->interlock);
                error = uvm_pageinsert_tree(obj, pg);
                if (error != 0) {
                        mutex_enter(&pg->interlock);
                        uvm_pageremove_object(obj, pg);
                        mutex_exit(&pg->interlock);
                        uvm_pagefree(pg);
                        return NULL;
                }
        }

#if defined(UVM_PAGE_TRKOWN)
        pg->owner_tag = NULL;
#endif
        UVM_PAGE_OWN(pg, "new alloc");

        if (flags & UVM_PGA_ZERO) {
                /* A zero'd page is not clean. */
                if (obj != NULL || anon != NULL) {
                        uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
                }
                pmap_zero_page(VM_PAGE_TO_PHYS(pg));
        }

        return(pg);
}

/*
 * uvm_pagereplace: replace a page with another
 *
 * => object must be locked
 * => page interlocks must be held
 */

void
uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg)
{
        struct uvm_object *uobj = oldpg->uobject;
        struct vm_page *pg __diagused;
        uint64_t idx;

        KASSERT((oldpg->flags & PG_TABLED) != 0);
        KASSERT(uobj != NULL);
        KASSERT((newpg->flags & PG_TABLED) == 0);
        KASSERT(newpg->uobject == NULL);
        KASSERT(rw_write_held(uobj->vmobjlock));
        KASSERT(mutex_owned(&oldpg->interlock));
        KASSERT(mutex_owned(&newpg->interlock));

        newpg->uobject = uobj;
        newpg->offset = oldpg->offset;
        idx = newpg->offset >> PAGE_SHIFT;
        pg = radix_tree_replace_node(&uobj->uo_pages, idx, newpg);
        KASSERT(pg == oldpg);
        if (((oldpg->flags ^ newpg->flags) & PG_CLEAN) != 0) {
                if ((newpg->flags & PG_CLEAN) != 0) {
                        uvm_obj_page_clear_dirty(newpg);
                } else {
                        uvm_obj_page_set_dirty(newpg);
                }
        }
        /*
         * oldpg's PG_STAT is stable.  newpg is not reachable by others yet.
         */
        newpg->flags |=
            (newpg->flags & ~PG_STAT) | (oldpg->flags & PG_STAT);
        uvm_pageinsert_object(uobj, newpg);
        uvm_pageremove_object(uobj, oldpg);
}

/*
 * uvm_pagerealloc: reallocate a page from one object to another
 *
 * => both objects must be locked
 */

int
uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
{
        int error = 0;

        /*
         * remove it from the old object
         */

        if (pg->uobject) {
                uvm_pageremove_tree(pg->uobject, pg);
                uvm_pageremove_object(pg->uobject, pg);
        }

        /*
         * put it in the new object
         */

        if (newobj) {
                mutex_enter(&pg->interlock);
                pg->uobject = newobj;
                pg->offset = newoff;
                if (UVM_OBJ_IS_VNODE(newobj)) {
                        pg->flags |= PG_FILE;
                } else if (UVM_OBJ_IS_AOBJ(newobj)) {
                        pg->flags |= PG_AOBJ;
                }
                uvm_pageinsert_object(newobj, pg);
                mutex_exit(&pg->interlock);
                error = uvm_pageinsert_tree(newobj, pg);
                if (error != 0) {
                        mutex_enter(&pg->interlock);
                        uvm_pageremove_object(newobj, pg);
                        mutex_exit(&pg->interlock);
                }
        }

        return error;
}

/*
 * uvm_pagefree: free page
 *
 * => erase page's identity (i.e. remove from object)
 * => put page on free list
 * => caller must lock owning object (either anon or uvm_object)
 * => assumes all valid mappings of pg are gone
 */

void
uvm_pagefree(struct vm_page *pg)
{
        struct pgfreelist *pgfl;
        struct pgflbucket *pgb;
        struct uvm_cpu *ucpu;
        kmutex_t *lock;
        int bucket, s;
        bool locked;

#ifdef DEBUG
        if (pg->uobject == (void *)0xdeadbeef &&
            pg->uanon == (void *)0xdeadbeef) {
                panic("uvm_pagefree: freeing free page %p", pg);
        }
#endif /* DEBUG */

        KASSERT((pg->flags & PG_PAGEOUT) == 0);
        KASSERT(!(pg->flags & PG_FREE));
        KASSERT(pg->uobject == NULL || rw_write_held(pg->uobject->vmobjlock));
        KASSERT(pg->uobject != NULL || pg->uanon == NULL ||
                rw_write_held(pg->uanon->an_lock));

        /*
         * remove the page from the object's tree before acquiring any page
         * interlocks: this can acquire locks to free radixtree nodes.
         */
        if (pg->uobject != NULL) {
                uvm_pageremove_tree(pg->uobject, pg);
        }

        /*
         * if the page is loaned, resolve the loan instead of freeing.
         */

        if (pg->loan_count) {
                KASSERT(pg->wire_count == 0);

                /*
                 * if the page is owned by an anon then we just want to
                 * drop anon ownership.  the kernel will free the page when
                 * it is done with it.  if the page is owned by an object,
                 * remove it from the object and mark it dirty for the benefit
                 * of possible anon owners.
                 *
                 * regardless of previous ownership, wakeup any waiters,
                 * unbusy the page, and we're done.
                 */

                uvm_pagelock(pg);
                locked = true;
                if (pg->uobject != NULL) {
                        uvm_pageremove_object(pg->uobject, pg);
                        pg->flags &= ~(PG_FILE|PG_AOBJ);
                } else if (pg->uanon != NULL) {
                        if ((pg->flags & PG_ANON) == 0) {
                                pg->loan_count--;
                        } else {
                                const unsigned status = uvm_pagegetdirty(pg);
                                pg->flags &= ~PG_ANON;
                                cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1);
                        }
                        pg->uanon->an_page = NULL;
                        pg->uanon = NULL;
                }
                if (pg->pqflags & PQ_WANTED) {
                        wakeup(pg);
                }
                pg->pqflags &= ~PQ_WANTED;
                pg->flags &= ~(PG_BUSY|PG_RELEASED|PG_PAGER1);
#ifdef UVM_PAGE_TRKOWN
                pg->owner_tag = NULL;
#endif
                KASSERT((pg->flags & PG_STAT) == 0);
                if (pg->loan_count) {
                        KASSERT(pg->uobject == NULL);
                        if (pg->uanon == NULL) {
                                uvm_pagedequeue(pg);
                        }
                        uvm_pageunlock(pg);
                        return;
                }
        } else if (pg->uobject != NULL || pg->uanon != NULL ||
                   pg->wire_count != 0) {
                uvm_pagelock(pg);
                locked = true;
        } else {
                locked = false;
        }

        /*
         * remove page from its object or anon.
         */
        if (pg->uobject != NULL) {
                uvm_pageremove_object(pg->uobject, pg);
        } else if (pg->uanon != NULL) {
                const unsigned int status = uvm_pagegetdirty(pg);
                pg->uanon->an_page = NULL;
                pg->uanon = NULL;
                cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1);
        }

        /*
         * if the page was wired, unwire it now.
         */

        if (pg->wire_count) {
                pg->wire_count = 0;
                atomic_dec_uint(&uvmexp.wired);
        }
        if (locked) {
                /*
                 * wake anyone waiting on the page.
                 */
                if ((pg->pqflags & PQ_WANTED) != 0) {
                        pg->pqflags &= ~PQ_WANTED;
                        wakeup(pg);
                }

                /*
                 * now remove the page from the queues.
                 */
                uvm_pagedequeue(pg);
                uvm_pageunlock(pg);
        } else {
                KASSERT(!uvmpdpol_pageisqueued_p(pg));
        }

        /*
         * and put on free queue
         */

#ifdef DEBUG
        pg->uobject = (void *)0xdeadbeef;
        pg->uanon = (void *)0xdeadbeef;
#endif /* DEBUG */

        /* Try to send the page to the per-CPU cache. */
        s = splvm();
        ucpu = curcpu()->ci_data.cpu_uvm;
        bucket = uvm_page_get_bucket(pg);
        if (bucket == ucpu->pgflbucket && uvm_pgflcache_free(ucpu, pg)) {
                splx(s);
                return;
        }

        /* Didn't work.  Never mind, send it to a global bucket. */
        pgfl = &uvm.page_free[uvm_page_get_freelist(pg)];
        pgb = pgfl->pgfl_buckets[bucket];
        lock = &uvm_freelist_locks[bucket].lock;

        mutex_spin_enter(lock);
        /* PG_FREE must be set under lock because of uvm_pglistalloc(). */
        pg->flags = PG_FREE;
        LIST_INSERT_HEAD(&pgb->pgb_colors[VM_PGCOLOR(pg)], pg, pageq.list);
        pgb->pgb_nfree++;
            CPU_COUNT(CPU_COUNT_FREEPAGES, 1);
        mutex_spin_exit(lock);
        splx(s);
}

/*
 * uvm_page_unbusy: unbusy an array of pages.
 *
 * => pages must either all belong to the same object, or all belong to anons.
 * => if pages are object-owned, object must be locked.
 * => if pages are anon-owned, anons must be locked.
 * => caller must make sure that anon-owned pages are not PG_RELEASED.
 */

void
uvm_page_unbusy(struct vm_page **pgs, int npgs)
{
        struct vm_page *pg;
        int i, pageout_done;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);

        pageout_done = 0;
        for (i = 0; i < npgs; i++) {
                pg = pgs[i];
                if (pg == NULL || pg == PGO_DONTCARE) {
                        continue;
                }

                KASSERT(uvm_page_owner_locked_p(pg, true));
                KASSERT(pg->flags & PG_BUSY);

                if (pg->flags & PG_PAGEOUT) {
                        pg->flags &= ~PG_PAGEOUT;
                        pg->flags |= PG_RELEASED;
                        pageout_done++;
                        atomic_inc_uint(&uvmexp.pdfreed);
                }
                if (pg->flags & PG_RELEASED) {
                        UVMHIST_LOG(ubchist, "releasing pg %#jx",
                            (uintptr_t)pg, 0, 0, 0);
                        KASSERT(pg->uobject != NULL ||
                            (pg->uanon != NULL && pg->uanon->an_ref > 0));
                        pg->flags &= ~PG_RELEASED;
                        uvm_pagefree(pg);
                } else {
                        UVMHIST_LOG(ubchist, "unbusying pg %#jx",
                            (uintptr_t)pg, 0, 0, 0);
                        KASSERT((pg->flags & PG_FAKE) == 0);
                        pg->flags &= ~PG_BUSY;
                        uvm_pagelock(pg);
                        uvm_pagewakeup(pg);
                        uvm_pageunlock(pg);
                        UVM_PAGE_OWN(pg, NULL);
                }
        }
        if (pageout_done != 0) {
                uvm_pageout_done(pageout_done);
        }
}

/*
 * uvm_pagewait: wait for a busy page
 *
 * => page must be known PG_BUSY
 * => object must be read or write locked
 * => object will be unlocked on return
 */

void
uvm_pagewait(struct vm_page *pg, krwlock_t *lock, const char *wmesg)
{

        KASSERT(rw_lock_held(lock));
        KASSERT((pg->flags & PG_BUSY) != 0);
        KASSERT(uvm_page_owner_locked_p(pg, false));

        mutex_enter(&pg->interlock);
        pg->pqflags |= PQ_WANTED;
        rw_exit(lock);
        UVM_UNLOCK_AND_WAIT(pg, &pg->interlock, false, wmesg, 0);
}

/*
 * uvm_pagewakeup: wake anyone waiting on a page
 *
 * => page interlock must be held
 */

void
uvm_pagewakeup(struct vm_page *pg)
{
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);

        KASSERT(mutex_owned(&pg->interlock));

        UVMHIST_LOG(ubchist, "waking pg %#jx", (uintptr_t)pg, 0, 0, 0);

        if ((pg->pqflags & PQ_WANTED) != 0) {
                wakeup(pg);
                pg->pqflags &= ~PQ_WANTED;
        }
}

/*
 * uvm_pagewanted_p: return true if someone is waiting on the page
 *
 * => object must be write locked (lock out all concurrent access)
 */

bool
uvm_pagewanted_p(struct vm_page *pg)
{

        KASSERT(uvm_page_owner_locked_p(pg, true));

        return (atomic_load_relaxed(&pg->pqflags) & PQ_WANTED) != 0;
}

#if defined(UVM_PAGE_TRKOWN)
/*
 * uvm_page_own: set or release page ownership
 *
 * => this is a debugging function that keeps track of who sets PG_BUSY
 *        and where they do it.   it can be used to track down problems
 *        such a process setting "PG_BUSY" and never releasing it.
 * => page's object [if any] must be locked
 * => if "tag" is NULL then we are releasing page ownership
 */
void
uvm_page_own(struct vm_page *pg, const char *tag)
{

        KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0);
        KASSERT(uvm_page_owner_locked_p(pg, true));

        /* gain ownership? */
        if (tag) {
                KASSERT((pg->flags & PG_BUSY) != 0);
                if (pg->owner_tag) {
                        printf("uvm_page_own: page %p already owned "
                            "by proc %d.%d [%s]\n", pg,
                            pg->owner, pg->lowner, pg->owner_tag);
                        panic("uvm_page_own");
                }
                pg->owner = curproc->p_pid;
                pg->lowner = curlwp->l_lid;
                pg->owner_tag = tag;
                return;
        }

        /* drop ownership */
        KASSERT((pg->flags & PG_BUSY) == 0);
        if (pg->owner_tag == NULL) {
                printf("uvm_page_own: dropping ownership of an non-owned "
                    "page (%p)\n", pg);
                panic("uvm_page_own");
        }
        pg->owner_tag = NULL;
}
#endif

/*
 * uvm_pagelookup: look up a page
 *
 * => caller should lock object to keep someone from pulling the page
 *        out from under it
 */

struct vm_page *
uvm_pagelookup(struct uvm_object *obj, voff_t off)
{
        struct vm_page *pg;
        bool ddb __diagused = false;
#ifdef DDB
        extern int db_active;
        ddb = db_active != 0;
#endif

        KASSERT(ddb || rw_lock_held(obj->vmobjlock));

        pg = radix_tree_lookup_node(&obj->uo_pages, off >> PAGE_SHIFT);

        KASSERT(pg == NULL || obj->uo_npages != 0);
        KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
                (pg->flags & PG_BUSY) != 0);
        return pg;
}

/*
 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
 *
 * => caller must lock objects
 * => caller must hold pg->interlock
 */

void
uvm_pagewire(struct vm_page *pg)
{

        KASSERT(uvm_page_owner_locked_p(pg, true));
        KASSERT(mutex_owned(&pg->interlock));
#if defined(READAHEAD_STATS)
        if ((pg->flags & PG_READAHEAD) != 0) {
                uvm_ra_hit.ev_count++;
                pg->flags &= ~PG_READAHEAD;
        }
#endif /* defined(READAHEAD_STATS) */
        if (pg->wire_count == 0) {
                uvm_pagedequeue(pg);
                atomic_inc_uint(&uvmexp.wired);
        }
        pg->wire_count++;
        KASSERT(pg->wire_count > 0);        /* detect wraparound */
}

/*
 * uvm_pageunwire: unwire the page.
 *
 * => activate if wire count goes to zero.
 * => caller must lock objects
 * => caller must hold pg->interlock
 */

void
uvm_pageunwire(struct vm_page *pg)
{

        KASSERT(uvm_page_owner_locked_p(pg, true));
        KASSERT(pg->wire_count != 0);
        KASSERT(!uvmpdpol_pageisqueued_p(pg));
        KASSERT(mutex_owned(&pg->interlock));
        pg->wire_count--;
        if (pg->wire_count == 0) {
                uvm_pageactivate(pg);
                KASSERT(uvmexp.wired != 0);
                atomic_dec_uint(&uvmexp.wired);
        }
}

/*
 * uvm_pagedeactivate: deactivate page
 *
 * => caller must lock objects
 * => caller must check to make sure page is not wired
 * => object that page belongs to must be locked (so we can adjust pg->flags)
 * => caller must clear the reference on the page before calling
 * => caller must hold pg->interlock
 */

void
uvm_pagedeactivate(struct vm_page *pg)
{

        KASSERT(uvm_page_owner_locked_p(pg, false));
        KASSERT(mutex_owned(&pg->interlock));
        if (pg->wire_count == 0) {
                KASSERT(uvmpdpol_pageisqueued_p(pg));
                uvmpdpol_pagedeactivate(pg);
        }
}

/*
 * uvm_pageactivate: activate page
 *
 * => caller must lock objects
 * => caller must hold pg->interlock
 */

void
uvm_pageactivate(struct vm_page *pg)
{

        KASSERT(uvm_page_owner_locked_p(pg, false));
        KASSERT(mutex_owned(&pg->interlock));
#if defined(READAHEAD_STATS)
        if ((pg->flags & PG_READAHEAD) != 0) {
                uvm_ra_hit.ev_count++;
                pg->flags &= ~PG_READAHEAD;
        }
#endif /* defined(READAHEAD_STATS) */
        if (pg->wire_count == 0) {
                uvmpdpol_pageactivate(pg);
        }
}

/*
 * uvm_pagedequeue: remove a page from any paging queue
 *
 * => caller must lock objects
 * => caller must hold pg->interlock
 */
void
uvm_pagedequeue(struct vm_page *pg)
{

        KASSERT(uvm_page_owner_locked_p(pg, true));
        KASSERT(mutex_owned(&pg->interlock));
        if (uvmpdpol_pageisqueued_p(pg)) {
                uvmpdpol_pagedequeue(pg);
        }
}

/*
 * uvm_pageenqueue: add a page to a paging queue without activating.
 * used where a page is not really demanded (yet).  eg. read-ahead
 *
 * => caller must lock objects
 * => caller must hold pg->interlock
 */
void
uvm_pageenqueue(struct vm_page *pg)
{

        KASSERT(uvm_page_owner_locked_p(pg, false));
        KASSERT(mutex_owned(&pg->interlock));
        if (pg->wire_count == 0 && !uvmpdpol_pageisqueued_p(pg)) {
                uvmpdpol_pageenqueue(pg);
        }
}

/*
 * uvm_pagelock: acquire page interlock
 */
void
uvm_pagelock(struct vm_page *pg)
{

        mutex_enter(&pg->interlock);
}

/*
 * uvm_pagelock2: acquire two page interlocks
 */
void
uvm_pagelock2(struct vm_page *pg1, struct vm_page *pg2)
{

        if (pg1 < pg2) {
                mutex_enter(&pg1->interlock);
                mutex_enter(&pg2->interlock);
        } else {
                mutex_enter(&pg2->interlock);
                mutex_enter(&pg1->interlock);
        }
}

/*
 * uvm_pageunlock: release page interlock, and if a page replacement intent
 * is set on the page, pass it to uvmpdpol to make real.
 *
 * => caller must hold pg->interlock
 */
void
uvm_pageunlock(struct vm_page *pg)
{

        if ((pg->pqflags & PQ_INTENT_SET) == 0 ||
            (pg->pqflags & PQ_INTENT_QUEUED) != 0) {
                    mutex_exit(&pg->interlock);
                    return;
        }
        pg->pqflags |= PQ_INTENT_QUEUED;
        mutex_exit(&pg->interlock);
        uvmpdpol_pagerealize(pg);
}

/*
 * uvm_pageunlock2: release two page interlocks, and for both pages if a
 * page replacement intent is set on the page, pass it to uvmpdpol to make
 * real.
 *
 * => caller must hold pg->interlock
 */
void
uvm_pageunlock2(struct vm_page *pg1, struct vm_page *pg2)
{

        if ((pg1->pqflags & PQ_INTENT_SET) == 0 ||
            (pg1->pqflags & PQ_INTENT_QUEUED) != 0) {
                    mutex_exit(&pg1->interlock);
                    pg1 = NULL;
        } else {
                pg1->pqflags |= PQ_INTENT_QUEUED;
                mutex_exit(&pg1->interlock);
        }

        if ((pg2->pqflags & PQ_INTENT_SET) == 0 ||
            (pg2->pqflags & PQ_INTENT_QUEUED) != 0) {
                    mutex_exit(&pg2->interlock);
                    pg2 = NULL;
        } else {
                pg2->pqflags |= PQ_INTENT_QUEUED;
                mutex_exit(&pg2->interlock);
        }

        if (pg1 != NULL) {
                uvmpdpol_pagerealize(pg1);
        }
        if (pg2 != NULL) {
                uvmpdpol_pagerealize(pg2);
        }
}

/*
 * uvm_pagezero: zero fill a page
 *
 * => if page is part of an object then the object should be locked
 *        to protect pg->flags.
 */

void
uvm_pagezero(struct vm_page *pg)
{

        uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
        pmap_zero_page(VM_PAGE_TO_PHYS(pg));
}

/*
 * uvm_pagecopy: copy a page
 *
 * => if page is part of an object then the object should be locked
 *        to protect pg->flags.
 */

void
uvm_pagecopy(struct vm_page *src, struct vm_page *dst)
{

        uvm_pagemarkdirty(dst, UVM_PAGE_STATUS_DIRTY);
        pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
}

/*
 * uvm_pageismanaged: test it see that a page (specified by PA) is managed.
 */

bool
uvm_pageismanaged(paddr_t pa)
{

        return (uvm_physseg_find(atop(pa), NULL) != UVM_PHYSSEG_TYPE_INVALID);
}

/*
 * uvm_page_lookup_freelist: look up the free list for the specified page
 */

int
uvm_page_lookup_freelist(struct vm_page *pg)
{
        uvm_physseg_t upm;

        upm = uvm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL);
        KASSERT(upm != UVM_PHYSSEG_TYPE_INVALID);
        return uvm_physseg_get_free_list(upm);
}

/*
 * uvm_page_owner_locked_p: return true if object associated with page is
 * locked.  this is a weak check for runtime assertions only.
 */

bool
uvm_page_owner_locked_p(struct vm_page *pg, bool exclusive)
{

        if (pg->uobject != NULL) {
                return exclusive
                    ? rw_write_held(pg->uobject->vmobjlock)
                    : rw_lock_held(pg->uobject->vmobjlock);
        }
        if (pg->uanon != NULL) {
                return exclusive
                    ? rw_write_held(pg->uanon->an_lock)
                    : rw_lock_held(pg->uanon->an_lock);
        }
        return true;
}

/*
 * uvm_pagereadonly_p: return if the page should be mapped read-only
 */

bool
uvm_pagereadonly_p(struct vm_page *pg)
{
        struct uvm_object * const uobj = pg->uobject;

        KASSERT(uobj == NULL || rw_lock_held(uobj->vmobjlock));
        KASSERT(uobj != NULL || rw_lock_held(pg->uanon->an_lock));
        if ((pg->flags & PG_RDONLY) != 0) {
                return true;
        }
        if (uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN) {
                return true;
        }
        if (uobj == NULL) {
                return false;
        }
        return UVM_OBJ_NEEDS_WRITEFAULT(uobj);
}

#ifdef PMAP_DIRECT
/*
 * Call pmap to translate physical address into a virtual and to run a callback
 * for it. Used to avoid actually mapping the pages, pmap most likely uses direct map
 * or equivalent.
 */
int
uvm_direct_process(struct vm_page **pgs, u_int npages, voff_t off, vsize_t len,
            int (*process)(void *, size_t, void *), void *arg)
{
        int error = 0;
        paddr_t pa;
        size_t todo;
        voff_t pgoff = (off & PAGE_MASK);
        struct vm_page *pg;

        KASSERT(npages > 0 && len > 0);

        for (int i = 0; i < npages; i++) {
                pg = pgs[i];

                KASSERT(len > 0);

                /*
                 * Caller is responsible for ensuring all the pages are
                 * available.
                 */
                KASSERT(pg != NULL && pg != PGO_DONTCARE);

                pa = VM_PAGE_TO_PHYS(pg);
                todo = MIN(len, PAGE_SIZE - pgoff);

                error = pmap_direct_process(pa, pgoff, todo, process, arg);
                if (error)
                        break;

                pgoff = 0;
                len -= todo;
        }

        KASSERTMSG(error != 0 || len == 0, "len %lu != 0 for non-error", len);
        return error;
}
#endif /* PMAP_DIRECT */

#if defined(DDB) || defined(DEBUGPRINT)

/*
 * uvm_page_printit: actually print the page
 */

static const char page_flagbits[] = UVM_PGFLAGBITS;
static const char page_pqflagbits[] = UVM_PQFLAGBITS;

void
uvm_page_printit(struct vm_page *pg, bool full,
    void (*pr)(const char *, ...))
{
        struct vm_page *tpg;
        struct uvm_object *uobj;
        struct pgflbucket *pgb;
        struct pgflist *pgl;
        char pgbuf[128];

        (*pr)("PAGE %p:\n", pg);
        snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags);
        (*pr)("  flags=%s\n", pgbuf);
        snprintb(pgbuf, sizeof(pgbuf), page_pqflagbits, pg->pqflags);
        (*pr)("  pqflags=%s\n", pgbuf);
        (*pr)("  uobject=%p, uanon=%p, offset=0x%llx\n",
            pg->uobject, pg->uanon, (long long)pg->offset);
        (*pr)("  loan_count=%d wire_count=%d bucket=%d freelist=%d\n",
            pg->loan_count, pg->wire_count, uvm_page_get_bucket(pg),
            uvm_page_get_freelist(pg));
        (*pr)("  pa=0x%lx\n", (long)VM_PAGE_TO_PHYS(pg));
#if defined(UVM_PAGE_TRKOWN)
        if (pg->flags & PG_BUSY)
                (*pr)("  owning process = %d.%d, tag=%s\n",
                    pg->owner, pg->lowner, pg->owner_tag);
        else
                (*pr)("  page not busy, no owner\n");
#else
        (*pr)("  [page ownership tracking disabled]\n");
#endif

        if (!full)
                return;

        /* cross-verify object/anon */
        if ((pg->flags & PG_FREE) == 0) {
                if (pg->flags & PG_ANON) {
                        if (pg->uanon == NULL || pg->uanon->an_page != pg)
                            (*pr)("  >>> ANON DOES NOT POINT HERE <<< (%p)\n",
                                (pg->uanon) ? pg->uanon->an_page : NULL);
                        else
                                (*pr)("  anon backpointer is OK\n");
                } else {
                        uobj = pg->uobject;
                        if (uobj) {
                                (*pr)("  checking object list\n");
                                tpg = uvm_pagelookup(uobj, pg->offset);
                                if (tpg)
                                        (*pr)("  page found on object list\n");
                                else
                        (*pr)("  >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n");
                        }
                }
        }

        /* cross-verify page queue */
        if (pg->flags & PG_FREE) {
                int fl = uvm_page_get_freelist(pg);
                int b = uvm_page_get_bucket(pg);
                pgb = uvm.page_free[fl].pgfl_buckets[b];
                pgl = &pgb->pgb_colors[VM_PGCOLOR(pg)];
                (*pr)("  checking pageq list\n");
                LIST_FOREACH(tpg, pgl, pageq.list) {
                        if (tpg == pg) {
                                break;
                        }
                }
                if (tpg)
                        (*pr)("  page found on pageq list\n");
                else
                        (*pr)("  >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
        }
}

/*
 * uvm_page_printall - print a summary of all managed pages
 */

void
uvm_page_printall(void (*pr)(const char *, ...))
{
        uvm_physseg_t i;
        paddr_t pfn;
        struct vm_page *pg;

        (*pr)("%18s %4s %4s %18s %18s"
#ifdef UVM_PAGE_TRKOWN
            " OWNER"
#endif
            "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON");
        for (i = uvm_physseg_get_first();
             uvm_physseg_valid_p(i);
             i = uvm_physseg_get_next(i)) {
                for (pfn = uvm_physseg_get_start(i);
                     pfn < uvm_physseg_get_end(i);
                     pfn++) {
                        pg = PHYS_TO_VM_PAGE(ptoa(pfn));

                        (*pr)("%18p %04x %08x %18p %18p",
                            pg, pg->flags, pg->pqflags, pg->uobject,
                            pg->uanon);
#ifdef UVM_PAGE_TRKOWN
                        if (pg->flags & PG_BUSY)
                                (*pr)(" %d [%s]", pg->owner, pg->owner_tag);
#endif
                        (*pr)("\n");
                }
        }
}

/*
 * uvm_page_print_freelists - print a summary freelists
 */

void
uvm_page_print_freelists(void (*pr)(const char *, ...))
{
        struct pgfreelist *pgfl;
        struct pgflbucket *pgb;
        int fl, b, c;

        (*pr)("There are %d freelists with %d buckets of %d colors.\n\n",
            VM_NFREELIST, uvm.bucketcount, uvmexp.ncolors);

        for (fl = 0; fl < VM_NFREELIST; fl++) {
                pgfl = &uvm.page_free[fl];
                (*pr)("freelist(%d) @ %p\n", fl, pgfl);
                for (b = 0; b < uvm.bucketcount; b++) {
                        pgb = uvm.page_free[fl].pgfl_buckets[b];
                        (*pr)("    bucket(%d) @ %p, nfree = %d, lock @ %p:\n",
                            b, pgb, pgb->pgb_nfree,
                            &uvm_freelist_locks[b].lock);
                        for (c = 0; c < uvmexp.ncolors; c++) {
                                (*pr)("        color(%d) @ %p, ", c,
                                    &pgb->pgb_colors[c]);
                                (*pr)("first page = %p\n",
                                    LIST_FIRST(&pgb->pgb_colors[c]));
                        }
                }
        }
}

#endif /* DDB || DEBUGPRINT */













































































































































































































































































































































































  515 











  147 

























































  514 










  204 

  415 






  199 








































  416 










  315 

  420 

  421 


























 1388 



 1388 

    5 



    5 


    5 











1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
/* $NetBSD: secmodel_extensions.c,v 1.15 2022/03/29 22:29:29 christos Exp $ */
/*-
 * Copyright (c) 2011 Elad Efrat <elad@NetBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: secmodel_extensions.c,v 1.15 2022/03/29 22:29:29 christos Exp $");

#include <sys/types.h>
#include <sys/param.h>
#include <sys/kauth.h>

#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/proc.h>
#include <sys/ptrace.h>
#include <sys/module.h>

#include <secmodel/secmodel.h>
#include <secmodel/extensions/extensions.h>

MODULE(MODULE_CLASS_SECMODEL, extensions, NULL);

static int dovfsusermount;
static int curtain;
static int user_set_cpu_affinity;
static int hardlink_check_uid;
static int hardlink_check_gid;

#ifdef PT_SETDBREGS
int user_set_dbregs;
#endif

static kauth_listener_t l_system, l_process, l_network, l_vnode;

static secmodel_t extensions_sm;

static void secmodel_extensions_init(void);
static void secmodel_extensions_start(void);
static void secmodel_extensions_stop(void);

static void sysctl_security_extensions_setup(struct sysctllog **);
static int  sysctl_extensions_user_handler(SYSCTLFN_PROTO);
static int  sysctl_extensions_curtain_handler(SYSCTLFN_PROTO);
static bool is_securelevel_above(int);

static int secmodel_extensions_system_cb(kauth_cred_t, kauth_action_t,
    void *, void *, void *, void *, void *);
static int secmodel_extensions_process_cb(kauth_cred_t, kauth_action_t,
    void *, void *, void *, void *, void *);
static int secmodel_extensions_network_cb(kauth_cred_t, kauth_action_t,
    void *, void *, void *, void *, void *);
static int secmodel_extensions_vnode_cb(kauth_cred_t, kauth_action_t,
    void *, void *, void *, void *, void *);

SYSCTL_SETUP(sysctl_security_extensions_setup,
    "security extensions sysctl")
{
        const struct sysctlnode *rnode, *rnode2;

        sysctl_createv(clog, 0, NULL, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "models", NULL,
                       NULL, 0, NULL, 0,
                       CTL_SECURITY, CTL_CREATE, CTL_EOL);

        /* Compatibility: security.models.bsd44 */
        rnode2 = rnode;
        sysctl_createv(clog, 0, &rnode2, &rnode2,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "bsd44", NULL,
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);

        /* Compatibility: security.models.bsd44.curtain */
        sysctl_createv(clog, 0, &rnode2, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "curtain",
                       SYSCTL_DESCR("Curtain information about objects to "\
                                           "users not owning them."),
                       sysctl_extensions_curtain_handler, 0, &curtain, 0,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "extensions", NULL,
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "name", NULL,
                       NULL, 0, __UNCONST(SECMODEL_EXTENSIONS_NAME), 0,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "usermount",
                       SYSCTL_DESCR("Whether unprivileged users may mount "
                                    "filesystems"),
                       sysctl_extensions_user_handler, 0, &dovfsusermount, 0,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "curtain",
                       SYSCTL_DESCR("Curtain information about objects to "\
                                           "users not owning them."),
                       sysctl_extensions_curtain_handler, 0, &curtain, 0,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "user_set_cpu_affinity",
                       SYSCTL_DESCR("Whether unprivileged users may control "\
                                           "CPU affinity."),
                       sysctl_extensions_user_handler, 0,
                       &user_set_cpu_affinity, 0,
                       CTL_CREATE, CTL_EOL);

#ifdef PT_SETDBREGS
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "user_set_dbregs",
                       SYSCTL_DESCR("Whether unprivileged users may set "\
                                           "CPU Debug Registers."),
                       sysctl_extensions_user_handler, 0,
                       &user_set_dbregs, 0,
                       CTL_CREATE, CTL_EOL);
#endif

        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "hardlink_check_uid",
                       SYSCTL_DESCR("Whether unprivileged users can hardlink "\
                            "to files they don't own"),
                       sysctl_extensions_user_handler, 0,
                       &hardlink_check_uid, 0,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "hardlink_check_gid",
                       SYSCTL_DESCR("Whether unprivileged users can hardlink "\
                            "to files that are not in their " \
                            "group membership"),
                       sysctl_extensions_user_handler, 0,
                       &hardlink_check_gid, 0,
                       CTL_CREATE, CTL_EOL);

        /* Compatibility: vfs.generic.usermount */
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "generic",
                       SYSCTL_DESCR("Non-specific vfs related information"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, VFS_GENERIC, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "usermount",
                       SYSCTL_DESCR("Whether unprivileged users may mount "
                                    "filesystems"),
                       sysctl_extensions_user_handler, 0, &dovfsusermount, 0,
                       CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL);

        /* Compatibility: security.curtain */
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "curtain",
                       SYSCTL_DESCR("Curtain information about objects to "\
                                           "users not owning them."),
                       sysctl_extensions_curtain_handler, 0, &curtain, 0,
                       CTL_SECURITY, CTL_CREATE, CTL_EOL);
}

static int
sysctl_extensions_curtain_handler(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int val, error;

        val = *(int *)rnode->sysctl_data;

        node = *rnode;
        node.sysctl_data = &val;

        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        /* shortcut */
        if (val == *(int *)rnode->sysctl_data)
                return 0;

        /* curtain cannot be disabled when securelevel is above 0 */
        if (val == 0 && is_securelevel_above(0)) {
                return EPERM;
        }

        *(int *)rnode->sysctl_data = val;
        return 0;
}

/*
 * Generic sysctl extensions handler for user mount and set CPU affinity
 * rights. Checks the following conditions:
 * - setting value to 0 is always permitted (decrease user rights)
 * - setting value != 0 is not permitted when securelevel is above 0 (increase
 *   user rights).
 */
static int
sysctl_extensions_user_handler(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int val, error;

        val = *(int *)rnode->sysctl_data;

        node = *rnode;
        node.sysctl_data = &val;

        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        /* shortcut */
        if (val == *(int *)rnode->sysctl_data)
                return 0;

        /* we cannot grant more rights to users when securelevel is above 0 */
        if (val != 0 && is_securelevel_above(0)) {
                return EPERM;
        }

        *(int *)rnode->sysctl_data = val;
        return 0;
}

/*
 * Query secmodel_securelevel(9) to know whether securelevel is strictly
 * above 'level' or not.
 * Returns true if it is, false otherwise (when securelevel is absent or
 * securelevel is at or below 'level').
 */
static bool
is_securelevel_above(int level)
{
        bool above;
        int error;

        error = secmodel_eval("org.netbsd.secmodel.securelevel",
            "is-securelevel-above", KAUTH_ARG(level), &above);
        if (error == 0 && above)
                return true;
        else
                return false;
}

static void
secmodel_extensions_init(void)
{

        curtain = 0;
        user_set_cpu_affinity = 0;
#ifdef PT_SETDBREGS
        user_set_dbregs = 0;
#endif
}

static void
secmodel_extensions_start(void)
{

        l_system = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
            secmodel_extensions_system_cb, NULL);
        l_process = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
            secmodel_extensions_process_cb, NULL);
        l_network = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
            secmodel_extensions_network_cb, NULL);
        l_vnode = kauth_listen_scope(KAUTH_SCOPE_VNODE,
            secmodel_extensions_vnode_cb, NULL);
}

static void
secmodel_extensions_stop(void)
{

        kauth_unlisten_scope(l_system);
        kauth_unlisten_scope(l_process);
        kauth_unlisten_scope(l_network);
        kauth_unlisten_scope(l_vnode);
}

static int
extensions_modcmd(modcmd_t cmd, void *arg)
{
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = secmodel_register(&extensions_sm,
                    SECMODEL_EXTENSIONS_ID, SECMODEL_EXTENSIONS_NAME,
                    NULL, NULL, NULL);
                if (error != 0)
                        printf("extensions_modcmd::init: secmodel_register "
                            "returned %d\n", error);

                secmodel_extensions_init();
                secmodel_extensions_start();
                break;

        case MODULE_CMD_FINI:
                secmodel_extensions_stop();

                error = secmodel_deregister(extensions_sm);
                if (error != 0)
                        printf("extensions_modcmd::fini: secmodel_deregister "
                            "returned %d\n", error);

                break;

        case MODULE_CMD_AUTOUNLOAD:
                error = EPERM;
                break;

        default:
                error = ENOTTY;
                break;
        }

        return (error);
}

static int
secmodel_extensions_system_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        vnode_t *vp;
        struct vattr va;
        struct mount *mp;
        u_long flags;
        int result;
        enum kauth_system_req req;
        int error;

        req = (enum kauth_system_req)(uintptr_t)arg0;
        result = KAUTH_RESULT_DEFER;

        switch (action) {
        case KAUTH_SYSTEM_MOUNT:
                if (dovfsusermount == 0)
                        break;
                switch (req) {
                case KAUTH_REQ_SYSTEM_MOUNT_NEW:
                        vp = (vnode_t *)arg1;
                        mp = vp->v_mount;
                        flags = (u_long)arg2;

                        /*
                         * Ensure that the user owns the directory onto which
                         * the mount is attempted.
                         */
                        vn_lock(vp, LK_SHARED | LK_RETRY);
                        error = VOP_GETATTR(vp, &va, cred);
                        VOP_UNLOCK(vp);
                        if (error)
                                break;

                        if (va.va_uid != kauth_cred_geteuid(cred))
                                break;

                        error = usermount_common_policy(mp, flags);
                        if (error)
                                break;

                        result = KAUTH_RESULT_ALLOW;

                        break;

                case KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT:
                        mp = arg1;

                        /* Must own the mount. */
                        if (mp->mnt_stat.f_owner == kauth_cred_geteuid(cred))
                                result = KAUTH_RESULT_ALLOW;

                        break;

                case KAUTH_REQ_SYSTEM_MOUNT_UPDATE:
                        mp = arg1;
                        flags = (u_long)arg2;

                        /* Must own the mount. */
                        if (mp->mnt_stat.f_owner == kauth_cred_geteuid(cred) &&
                                usermount_common_policy(mp, flags) == 0)
                                result = KAUTH_RESULT_ALLOW;

                        break;

                default:
                        break;
                }
                break;

        default:
                break;
        }

        return (result);
}

static int
secmodel_extensions_process_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        int result;
        enum kauth_process_req req;

        result = KAUTH_RESULT_DEFER;
        req = (enum kauth_process_req)(uintptr_t)arg1;

        switch (action) {
        case KAUTH_PROCESS_CANSEE:
                switch (req) {
                case KAUTH_REQ_PROCESS_CANSEE_ARGS:
                case KAUTH_REQ_PROCESS_CANSEE_ENTRY:
                case KAUTH_REQ_PROCESS_CANSEE_OPENFILES:
                case KAUTH_REQ_PROCESS_CANSEE_EPROC:
                        if (curtain != 0) {
                                struct proc *p = arg0;

                                /*
                                 * Only process' owner and root can see
                                 * through curtain
                                 */
                                if (!kauth_cred_uidmatch(cred, p->p_cred)) {
                                        int error;
                                        bool isroot = false;

                                        error = secmodel_eval(
                                            "org.netbsd.secmodel.suser",
                                            "is-root", cred, &isroot);
                                        if (error == 0 && !isroot)
                                                result = KAUTH_RESULT_DENY;
                                }
                        }

                        break;

                case KAUTH_REQ_PROCESS_CANSEE_KPTR:
                default:
                        break;
                }

                break;

        case KAUTH_PROCESS_SCHEDULER_SETAFFINITY:
                if (user_set_cpu_affinity != 0) {
                        struct proc *p = arg0;

                        if (kauth_cred_uidmatch(cred, p->p_cred))
                                result = KAUTH_RESULT_ALLOW;
                }
                break;

        default:
                break;
        }

        return (result);
}

static int
secmodel_extensions_network_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        int result;
        enum kauth_network_req req;

        result = KAUTH_RESULT_DEFER;
        req = (enum kauth_network_req)(uintptr_t)arg0;

        if (action != KAUTH_NETWORK_SOCKET ||
            req != KAUTH_REQ_NETWORK_SOCKET_CANSEE)
                return result;

        if (curtain != 0) {
                struct socket *so = (struct socket *)arg1;

                if (__predict_false(so == NULL || so->so_cred == NULL))
                        return KAUTH_RESULT_DENY;

                if (!kauth_cred_uidmatch(cred, so->so_cred)) {
                        int error;
                        bool isroot = false;

                        error = secmodel_eval("org.netbsd.secmodel.suser",
                            "is-root", cred, &isroot);
                        if (error == 0 && !isroot)
                                result = KAUTH_RESULT_DENY;
                }
        }

        return (result);
}

static int
secmodel_extensions_vnode_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        int error;
        bool isroot;
        struct vattr va;

        if ((action & KAUTH_VNODE_ADD_LINK) == 0)
                return KAUTH_RESULT_DEFER;

        error = VOP_GETATTR((vnode_t *)arg0, &va, cred);
        if (error)
                goto checkroot;

        if (hardlink_check_uid && kauth_cred_geteuid(cred) != va.va_uid)
                goto checkroot;

        if (hardlink_check_gid && kauth_cred_groupmember(cred, va.va_gid) != 0)
                goto checkroot;

        return KAUTH_RESULT_DEFER;
checkroot:
        error = secmodel_eval("org.netbsd.secmodel.suser", "is-root",
            cred, &isroot);
        if (error || !isroot)
                return KAUTH_RESULT_DENY;

        return KAUTH_RESULT_DEFER;
}

























































































































































   39 

   39 


   38 


   33 


   39 
















    2 

    2 



























    6 

    6 

    6 
    6 


    5 
    1 



    6 












   11 


   11 
    7 



    6 




















   11 












    6 

    6 

    4 

    4 



    3 

    3 


    2 















    6 











    6 

    3 




    5 
    4 






    3 















    3 






    1 

    3 

    6 














   15 



















   15 




   10 
   10 



    9 

    4 




    9 



    9 

    9 

    3 
    6 
    6 














    8 
















    4 




    6 

    6 




    3 






    9 
   12 














   17 




    6 




   15 
   11 

   16 
    3 








   25 








   25 





   20 

    7 

    6 











    5 

    5 







    2 

    2 


    1 





    7 










   11 
   23 
    1 













   10 


    9 


   10 
   10 
    9 

    7 





    5 
















    4 




    3 


    3 


    2 












   25 







    2 




   25 




   25 


    4 


    7 
    5 








   17 
   16 
    2 


   15 
    3 


   13 






   13 
   13 
   13 

   13 











   13 


























   11 



















   11 
   11 



   11 



   23 









    2 



    2 
    1 
    2 

    2 


















































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
/*        $NetBSD: sysv_shm.c,v 1.141 2019/10/09 17:47:13 chs Exp $        */

/*-
 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, and by Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1994 Adam Glass and Charles M. Hannum.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by Adam Glass and Charles M.
 *        Hannum.
 * 4. The names of the authors may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.141 2019/10/09 17:47:13 chs Exp $");

#ifdef _KERNEL_OPT
#include "opt_sysv.h"
#endif

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/shm.h>
#include <sys/mutex.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/mount.h>                /* XXX for <sys/syscallargs.h> */
#include <sys/syscallargs.h>
#include <sys/queue.h>
#include <sys/kauth.h>

#include <uvm/uvm_extern.h>
#include <uvm/uvm_object.h>

struct shmmap_entry {
        SLIST_ENTRY(shmmap_entry) next;
        vaddr_t va;
        int shmid;
};

int                        shm_nused                __cacheline_aligned;
struct shmid_ds *        shmsegs                        __read_mostly;

static kmutex_t                shm_lock                __cacheline_aligned;
static kcondvar_t *        shm_cv                        __cacheline_aligned;
static int                shm_last_free                __cacheline_aligned;
static size_t                shm_committed                __cacheline_aligned;
static int                shm_use_phys                __read_mostly;

static kcondvar_t        shm_realloc_cv;
static bool                shm_realloc_state;
static u_int                shm_realloc_disable;

struct shmmap_state {
        unsigned int nitems;
        unsigned int nrefs;
        SLIST_HEAD(, shmmap_entry) entries;
};

extern int kern_has_sysvshm;

SYSCTL_SETUP_PROTO(sysctl_ipc_shm_setup);

#ifdef SHMDEBUG
#define SHMPRINTF(a) printf a
#else
#define SHMPRINTF(a)
#endif

static int shmrealloc(int);

/*
 * Find the shared memory segment permission by the index. Only used by
 * compat_linux to implement SHM_STAT.
 */
int
shm_find_segment_perm_by_index(int index, struct ipc_perm *perm)
{
        struct shmid_ds *shmseg;

        mutex_enter(&shm_lock);
        if (index < 0 || index >= shminfo.shmmni) {
                mutex_exit(&shm_lock);
                return EINVAL;
        }
        shmseg = &shmsegs[index];
        memcpy(perm, &shmseg->shm_perm, sizeof(*perm));
        mutex_exit(&shm_lock);
        return 0;
}

/*
 * Find the shared memory segment by the identifier.
 *  => must be called with shm_lock held;
 */
static struct shmid_ds *
shm_find_segment_by_shmid(int shmid)
{
        int segnum;
        struct shmid_ds *shmseg;

        KASSERT(mutex_owned(&shm_lock));

        segnum = IPCID_TO_IX(shmid);
        if (segnum < 0 || segnum >= shminfo.shmmni)
                return NULL;
        shmseg = &shmsegs[segnum];
        if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0)
                return NULL;
        if ((shmseg->shm_perm.mode &
            (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED)
                return NULL;
        if (shmseg->shm_perm._seq != IPCID_TO_SEQ(shmid))
                return NULL;

        return shmseg;
}

/*
 * Free memory segment.
 *  => must be called with shm_lock held;
 */
static void
shm_free_segment(int segnum)
{
        struct shmid_ds *shmseg;
        size_t size;
        bool wanted;

        KASSERT(mutex_owned(&shm_lock));

        shmseg = &shmsegs[segnum];
        SHMPRINTF(("shm freeing key 0x%lx seq 0x%x\n",
            shmseg->shm_perm._key, shmseg->shm_perm._seq));

        size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
        wanted = (shmseg->shm_perm.mode & SHMSEG_WANTED);

        shmseg->_shm_internal = NULL;
        shm_committed -= btoc(size);
        shm_nused--;
        shmseg->shm_perm.mode = SHMSEG_FREE;
        shm_last_free = segnum;
        if (wanted == true)
                cv_broadcast(&shm_cv[segnum]);
}

/*
 * Delete entry from the shm map.
 *  => must be called with shm_lock held;
 */
static struct uvm_object *
shm_delete_mapping(struct shmmap_state *shmmap_s,
    struct shmmap_entry *shmmap_se)
{
        struct uvm_object *uobj = NULL;
        struct shmid_ds *shmseg;
        int segnum;

        KASSERT(mutex_owned(&shm_lock));

        segnum = IPCID_TO_IX(shmmap_se->shmid);
        shmseg = &shmsegs[segnum];
        SLIST_REMOVE(&shmmap_s->entries, shmmap_se, shmmap_entry, next);
        shmmap_s->nitems--;
        shmseg->shm_dtime = time_second;
        if ((--shmseg->shm_nattch <= 0) &&
            (shmseg->shm_perm.mode & SHMSEG_REMOVED)) {
                uobj = shmseg->_shm_internal;
                shm_free_segment(segnum);
        }

        return uobj;
}

/*
 * Get a non-shared shm map for that vmspace.  Note, that memory
 * allocation might be performed with lock held.
 */
static struct shmmap_state *
shmmap_getprivate(struct proc *p)
{
        struct shmmap_state *oshmmap_s, *shmmap_s;
        struct shmmap_entry *oshmmap_se, *shmmap_se;

        KASSERT(mutex_owned(&shm_lock));

        /* 1. A shm map with refcnt = 1, used by ourselves, thus return */
        oshmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm;
        if (oshmmap_s && oshmmap_s->nrefs == 1)
                return oshmmap_s;

        /* 2. No shm map preset - create a fresh one */
        shmmap_s = kmem_zalloc(sizeof(struct shmmap_state), KM_SLEEP);
        shmmap_s->nrefs = 1;
        SLIST_INIT(&shmmap_s->entries);
        p->p_vmspace->vm_shm = (void *)shmmap_s;

        if (oshmmap_s == NULL)
                return shmmap_s;

        SHMPRINTF(("shmmap_getprivate: vm %p split (%d entries), was used by %d\n",
            p->p_vmspace, oshmmap_s->nitems, oshmmap_s->nrefs));

        /* 3. A shared shm map, copy to a fresh one and adjust refcounts */
        SLIST_FOREACH(oshmmap_se, &oshmmap_s->entries, next) {
                shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP);
                shmmap_se->va = oshmmap_se->va;
                shmmap_se->shmid = oshmmap_se->shmid;
                SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
        }
        shmmap_s->nitems = oshmmap_s->nitems;
        oshmmap_s->nrefs--;

        return shmmap_s;
}

/*
 * Lock/unlock the memory.
 *  => must be called with shm_lock held;
 */
static int
shm_memlock(struct shmid_ds *shmseg, int shmid, int cmd)
{
        size_t size;
        int error;

        KASSERT(mutex_owned(&shm_lock));

        size = round_page(shmseg->shm_segsz);

        if (cmd == SHM_LOCK && (shmseg->shm_perm.mode & SHMSEG_WIRED) == 0) {
                /* Wire the object and map, then tag it */
                error = uvm_obj_wirepages(shmseg->_shm_internal,
                    0, size, NULL);
                if (error)
                        return EIO;
                shmseg->shm_perm.mode |= SHMSEG_WIRED;

        } else if (cmd == SHM_UNLOCK &&
            (shmseg->shm_perm.mode & SHMSEG_WIRED) != 0) {
                /* Unwire the object, then untag it */
                uvm_obj_unwirepages(shmseg->_shm_internal, 0, size);
                shmseg->shm_perm.mode &= ~SHMSEG_WIRED;
        }

        return 0;
}

/*
 * Unmap shared memory.
 */
int
sys_shmdt(struct lwp *l, const struct sys_shmdt_args *uap, register_t *retval)
{
        /* {
                syscallarg(const void *) shmaddr;
        } */
        struct proc *p = l->l_proc;
        struct shmmap_state *shmmap_s1, *shmmap_s;
        struct shmmap_entry *shmmap_se;
        struct uvm_object *uobj;
        struct shmid_ds *shmseg;
        size_t size;

        mutex_enter(&shm_lock);
        /* In case of reallocation, we will wait for completion */
        while (__predict_false(shm_realloc_state))
                cv_wait(&shm_realloc_cv, &shm_lock);

        shmmap_s1 = (struct shmmap_state *)p->p_vmspace->vm_shm;
        if (shmmap_s1 == NULL) {
                mutex_exit(&shm_lock);
                return EINVAL;
        }

        /* Find the map entry */
        SLIST_FOREACH(shmmap_se, &shmmap_s1->entries, next)
                if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
                        break;
        if (shmmap_se == NULL) {
                mutex_exit(&shm_lock);
                return EINVAL;
        }

        shmmap_s = shmmap_getprivate(p);
        if (shmmap_s != shmmap_s1) {
                /* Map has been copied, lookup entry in new map */
                SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
                        if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr))
                                break;
                if (shmmap_se == NULL) {
                        mutex_exit(&shm_lock);
                        return EINVAL;
                }
        }

        SHMPRINTF(("shmdt: vm %p: remove %d @%lx\n",
            p->p_vmspace, shmmap_se->shmid, shmmap_se->va));

        /* Delete the entry from shm map */
        uobj = shm_delete_mapping(shmmap_s, shmmap_se);
        shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
        size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
        mutex_exit(&shm_lock);

        uvm_deallocate(&p->p_vmspace->vm_map, shmmap_se->va, size);
        if (uobj != NULL) {
                uao_detach(uobj);
        }
        kmem_free(shmmap_se, sizeof(struct shmmap_entry));

        return 0;
}

/*
 * Map shared memory.
 */
int
sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) shmid;
                syscallarg(const void *) shmaddr;
                syscallarg(int) shmflg;
        } */
        int error, flags = 0;
        struct proc *p = l->l_proc;
        kauth_cred_t cred = l->l_cred;
        struct shmid_ds *shmseg;
        struct shmmap_state *shmmap_s;
        struct shmmap_entry *shmmap_se;
        struct uvm_object *uobj;
        struct vmspace *vm;
        vaddr_t attach_va;
        vm_prot_t prot;
        vsize_t size;

        /* Allocate a new map entry and set it */
        shmmap_se = kmem_alloc(sizeof(struct shmmap_entry), KM_SLEEP);
        shmmap_se->shmid = SCARG(uap, shmid);

        mutex_enter(&shm_lock);
        /* In case of reallocation, we will wait for completion */
        while (__predict_false(shm_realloc_state))
                cv_wait(&shm_realloc_cv, &shm_lock);

        shmseg = shm_find_segment_by_shmid(SCARG(uap, shmid));
        if (shmseg == NULL) {
                error = EINVAL;
                goto err;
        }
        error = ipcperm(cred, &shmseg->shm_perm,
            (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
        if (error)
                goto err;

        vm = p->p_vmspace;
        shmmap_s = (struct shmmap_state *)vm->vm_shm;
        if (shmmap_s && shmmap_s->nitems >= shminfo.shmseg) {
                error = EMFILE;
                goto err;
        }

        size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
        prot = VM_PROT_READ;
        if ((SCARG(uap, shmflg) & SHM_RDONLY) == 0)
                prot |= VM_PROT_WRITE;
        if (SCARG(uap, shmaddr)) {
                flags |= UVM_FLAG_FIXED;
                if (SCARG(uap, shmflg) & SHM_RND)
                        attach_va =
                            (vaddr_t)SCARG(uap, shmaddr) & ~(SHMLBA-1);
                else if (((vaddr_t)SCARG(uap, shmaddr) & (SHMLBA-1)) == 0)
                        attach_va = (vaddr_t)SCARG(uap, shmaddr);
                else {
                        error = EINVAL;
                        goto err;
                }
        } else {
                /* This is just a hint to uvm_map() about where to put it. */
                attach_va = p->p_emul->e_vm_default_addr(p,
                    (vaddr_t)vm->vm_daddr, size,
                    p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
        }

        /*
         * Create a map entry, add it to the list and increase the counters.
         */
        shmmap_s = shmmap_getprivate(p);
        SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next);
        shmmap_s->nitems++;
        shmseg->shm_lpid = p->p_pid;
        shmseg->shm_nattch++;

        /*
         * Map the segment into the address space.
         */
        uobj = shmseg->_shm_internal;
        uao_reference(uobj);
        error = uvm_map(&vm->vm_map, &attach_va, size, uobj, 0, 0,
            UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, flags));
        if (error)
                goto err_detach;

        /* Set the new address, and update the time */
        shmmap_se->va = attach_va;
        shmseg->shm_atime = time_second;
        retval[0] = attach_va;
        SHMPRINTF(("shmat: vm %p: add %d @%lx\n",
            p->p_vmspace, shmmap_se->shmid, attach_va));
err:
        mutex_exit(&shm_lock);
        if (error && shmmap_se) {
                kmem_free(shmmap_se, sizeof(struct shmmap_entry));
        }
        return error;

err_detach:
        uao_detach(uobj);
        uobj = shm_delete_mapping(shmmap_s, shmmap_se);
        mutex_exit(&shm_lock);
        if (uobj != NULL) {
                uao_detach(uobj);
        }
        kmem_free(shmmap_se, sizeof(struct shmmap_entry));
        return error;
}

/*
 * Shared memory control operations.
 */
int
sys___shmctl50(struct lwp *l, const struct sys___shmctl50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) shmid;
                syscallarg(int) cmd;
                syscallarg(struct shmid_ds *) buf;
        } */
        struct shmid_ds shmbuf;
        int cmd, error;

        cmd = SCARG(uap, cmd);
        if (cmd == IPC_SET) {
                error = copyin(SCARG(uap, buf), &shmbuf, sizeof(shmbuf));
                if (error)
                        return error;
        }

        error = shmctl1(l, SCARG(uap, shmid), cmd,
            (cmd == IPC_SET || cmd == IPC_STAT) ? &shmbuf : NULL);

        if (error == 0 && cmd == IPC_STAT)
                error = copyout(&shmbuf, SCARG(uap, buf), sizeof(shmbuf));

        return error;
}

int
shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf)
{
        struct uvm_object *uobj = NULL;
        kauth_cred_t cred = l->l_cred;
        struct shmid_ds *shmseg;
        int error = 0;

        mutex_enter(&shm_lock);
        /* In case of reallocation, we will wait for completion */
        while (__predict_false(shm_realloc_state))
                cv_wait(&shm_realloc_cv, &shm_lock);

        shmseg = shm_find_segment_by_shmid(shmid);
        if (shmseg == NULL) {
                mutex_exit(&shm_lock);
                return EINVAL;
        }

        switch (cmd) {
        case IPC_STAT:
                if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_R)) != 0)
                        break;
                memset(shmbuf, 0, sizeof *shmbuf);
                shmbuf->shm_perm = shmseg->shm_perm;
                shmbuf->shm_perm.mode &= 0777;
                shmbuf->shm_segsz = shmseg->shm_segsz;
                shmbuf->shm_lpid = shmseg->shm_lpid;
                shmbuf->shm_cpid = shmseg->shm_cpid;
                shmbuf->shm_nattch = shmseg->shm_nattch;
                shmbuf->shm_atime = shmseg->shm_atime;
                shmbuf->shm_dtime = shmseg->shm_dtime;
                shmbuf->shm_ctime = shmseg->shm_ctime;
                break;
        case IPC_SET:
                if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
                        break;
                shmseg->shm_perm.uid = shmbuf->shm_perm.uid;
                shmseg->shm_perm.gid = shmbuf->shm_perm.gid;
                shmseg->shm_perm.mode =
                    (shmseg->shm_perm.mode & ~ACCESSPERMS) |
                    (shmbuf->shm_perm.mode & ACCESSPERMS);
                shmseg->shm_ctime = time_second;
                break;
        case IPC_RMID:
                if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0)
                        break;
                shmseg->shm_perm._key = IPC_PRIVATE;
                shmseg->shm_perm.mode |= SHMSEG_REMOVED;
                if (shmseg->shm_nattch <= 0) {
                        uobj = shmseg->_shm_internal;
                        shm_free_segment(IPCID_TO_IX(shmid));
                }
                break;
        case SHM_LOCK:
        case SHM_UNLOCK:
                if ((error = kauth_authorize_system(cred,
                    KAUTH_SYSTEM_SYSVIPC,
                    (cmd == SHM_LOCK) ? KAUTH_REQ_SYSTEM_SYSVIPC_SHM_LOCK :
                    KAUTH_REQ_SYSTEM_SYSVIPC_SHM_UNLOCK, NULL, NULL, NULL)) != 0)
                        break;
                error = shm_memlock(shmseg, shmid, cmd);
                break;
        default:
                error = EINVAL;
        }

        mutex_exit(&shm_lock);
        if (uobj != NULL)
                uao_detach(uobj);
        return error;
}

/*
 * Try to take an already existing segment.
 *  => must be called with shm_lock held;
 *  => called from one place, thus, inline;
 */
static inline int
shmget_existing(struct lwp *l, const struct sys_shmget_args *uap, int mode,
    register_t *retval)
{
        struct shmid_ds *shmseg;
        kauth_cred_t cred = l->l_cred;
        int segnum, error;
again:
        KASSERT(mutex_owned(&shm_lock));

        /* Find segment by key */
        for (segnum = 0; segnum < shminfo.shmmni; segnum++)
                if ((shmsegs[segnum].shm_perm.mode & SHMSEG_ALLOCATED) &&
                    shmsegs[segnum].shm_perm._key == SCARG(uap, key))
                        break;
        if (segnum == shminfo.shmmni) {
                /* Not found */
                return -1;
        }

        shmseg = &shmsegs[segnum];
        if (shmseg->shm_perm.mode & SHMSEG_REMOVED) {
                /*
                 * This segment is in the process of being allocated.  Wait
                 * until it's done, and look the key up again (in case the
                 * allocation failed or it was freed).
                 */
                shmseg->shm_perm.mode |= SHMSEG_WANTED;
                error = cv_wait_sig(&shm_cv[segnum], &shm_lock);
                if (error)
                        return error;
                goto again;
        }

        /*
         * First check the flags, to generate a useful error when a
         * segment already exists.
         */
        if ((SCARG(uap, shmflg) & (IPC_CREAT | IPC_EXCL)) ==
            (IPC_CREAT | IPC_EXCL))
                return EEXIST;

        /* Check the permission and segment size. */
        error = ipcperm(cred, &shmseg->shm_perm, mode);
        if (error)
                return error;
        if (SCARG(uap, size) && SCARG(uap, size) > shmseg->shm_segsz)
                return EINVAL;

        *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
        return 0;
}

int
sys_shmget(struct lwp *l, const struct sys_shmget_args *uap, register_t *retval)
{
        /* {
                syscallarg(key_t) key;
                syscallarg(size_t) size;
                syscallarg(int) shmflg;
        } */
        struct shmid_ds *shmseg;
        kauth_cred_t cred = l->l_cred;
        key_t key = SCARG(uap, key);
        size_t size;
        int error, mode, segnum;
        bool lockmem;

        mode = SCARG(uap, shmflg) & ACCESSPERMS;
        if (SCARG(uap, shmflg) & _SHM_RMLINGER)
                mode |= SHMSEG_RMLINGER;

        SHMPRINTF(("shmget: key 0x%lx size 0x%zx shmflg 0x%x mode 0x%x\n",
            SCARG(uap, key), SCARG(uap, size), SCARG(uap, shmflg), mode));

        mutex_enter(&shm_lock);
        /* In case of reallocation, we will wait for completion */
        while (__predict_false(shm_realloc_state))
                cv_wait(&shm_realloc_cv, &shm_lock);

        if (key != IPC_PRIVATE) {
                error = shmget_existing(l, uap, mode, retval);
                if (error != -1) {
                        mutex_exit(&shm_lock);
                        return error;
                }
                if ((SCARG(uap, shmflg) & IPC_CREAT) == 0) {
                        mutex_exit(&shm_lock);
                        return ENOENT;
                }
        }
        error = 0;

        /*
         * Check the for the limits.
         */
        size = SCARG(uap, size);
        if (size < shminfo.shmmin || size > shminfo.shmmax) {
                mutex_exit(&shm_lock);
                return EINVAL;
        }
        if (shm_nused >= shminfo.shmmni) {
                mutex_exit(&shm_lock);
                return ENOSPC;
        }
        size = round_page(size);
        if (shm_committed + btoc(size) > shminfo.shmall) {
                mutex_exit(&shm_lock);
                return ENOMEM;
        }

        /* Find the first available segment */
        if (shm_last_free < 0) {
                for (segnum = 0; segnum < shminfo.shmmni; segnum++)
                        if (shmsegs[segnum].shm_perm.mode & SHMSEG_FREE)
                                break;
                KASSERT(segnum < shminfo.shmmni);
        } else {
                segnum = shm_last_free;
                shm_last_free = -1;
        }

        /*
         * Initialize the segment.
         * We will drop the lock while allocating the memory, thus mark the
         * segment present, but removed, that no other thread could take it.
         * Also, disable reallocation, while lock is dropped.
         */
        shmseg = &shmsegs[segnum];
        shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
        shm_committed += btoc(size);
        shm_nused++;
        lockmem = shm_use_phys;
        shm_realloc_disable++;
        mutex_exit(&shm_lock);

        /* Allocate the memory object and lock it if needed */
        shmseg->_shm_internal = uao_create(size, 0);
        if (lockmem) {
                /* Wire the pages and tag it */
                error = uvm_obj_wirepages(shmseg->_shm_internal, 0, size, NULL);
                if (error) {
                        uao_detach(shmseg->_shm_internal);
                        mutex_enter(&shm_lock);
                        shm_free_segment(segnum);
                        shm_realloc_disable--;
                        mutex_exit(&shm_lock);
                        return error;
                }
        }

        /*
         * Please note, while segment is marked, there are no need to hold the
         * lock, while setting it (except shm_perm.mode).
         */
        shmseg->shm_perm._key = SCARG(uap, key);
        shmseg->shm_perm._seq = (shmseg->shm_perm._seq + 1) & 0x7fff;
        *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);

        shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_geteuid(cred);
        shmseg->shm_perm.cgid = shmseg->shm_perm.gid = kauth_cred_getegid(cred);
        shmseg->shm_segsz = SCARG(uap, size);
        shmseg->shm_cpid = l->l_proc->p_pid;
        shmseg->shm_lpid = shmseg->shm_nattch = 0;
        shmseg->shm_atime = shmseg->shm_dtime = 0;
        shmseg->shm_ctime = time_second;

        /*
         * Segment is initialized.
         * Enter the lock, mark as allocated, and notify waiters (if any).
         * Also, unmark the state of reallocation.
         */
        mutex_enter(&shm_lock);
        shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) |
            (mode & (ACCESSPERMS | SHMSEG_RMLINGER)) |
            SHMSEG_ALLOCATED | (lockmem ? SHMSEG_WIRED : 0);
        if (shmseg->shm_perm.mode & SHMSEG_WANTED) {
                shmseg->shm_perm.mode &= ~SHMSEG_WANTED;
                cv_broadcast(&shm_cv[segnum]);
        }
        shm_realloc_disable--;
        cv_broadcast(&shm_realloc_cv);
        mutex_exit(&shm_lock);

        return error;
}

void
shmfork(struct vmspace *vm1, struct vmspace *vm2)
{
        struct shmmap_state *shmmap_s;
        struct shmmap_entry *shmmap_se;

        SHMPRINTF(("shmfork %p->%p\n", vm1, vm2));
        mutex_enter(&shm_lock);
        vm2->vm_shm = vm1->vm_shm;
        if (vm1->vm_shm) {
                shmmap_s = (struct shmmap_state *)vm1->vm_shm;
                SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next)
                        shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch++;
                shmmap_s->nrefs++;
        }
        mutex_exit(&shm_lock);
}

void
shmexit(struct vmspace *vm)
{
        struct shmmap_state *shmmap_s;
        struct shmmap_entry *shmmap_se;

        mutex_enter(&shm_lock);
        shmmap_s = (struct shmmap_state *)vm->vm_shm;
        if (shmmap_s == NULL) {
                mutex_exit(&shm_lock);
                return;
        }
        vm->vm_shm = NULL;

        if (--shmmap_s->nrefs > 0) {
                SHMPRINTF(("shmexit: vm %p drop ref (%d entries), refs = %d\n",
                    vm, shmmap_s->nitems, shmmap_s->nrefs));
                SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) {
                        shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch--;
                }
                mutex_exit(&shm_lock);
                return;
        }

        SHMPRINTF(("shmexit: vm %p cleanup (%d entries)\n", vm, shmmap_s->nitems));
        if (shmmap_s->nitems == 0) {
                mutex_exit(&shm_lock);
                kmem_free(shmmap_s, sizeof(struct shmmap_state));
                return;
        }

        /*
         * Delete the entry from shm map.
         */
        for (;;) {
                struct shmid_ds *shmseg;
                struct uvm_object *uobj;
                size_t sz;

                shmmap_se = SLIST_FIRST(&shmmap_s->entries);
                KASSERT(shmmap_se != NULL);

                shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)];
                sz = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET;
                /* shm_delete_mapping() removes from the list. */
                uobj = shm_delete_mapping(shmmap_s, shmmap_se);
                mutex_exit(&shm_lock);

                uvm_deallocate(&vm->vm_map, shmmap_se->va, sz);
                if (uobj != NULL) {
                        uao_detach(uobj);
                }
                kmem_free(shmmap_se, sizeof(struct shmmap_entry));

                if (SLIST_EMPTY(&shmmap_s->entries)) {
                        break;
                }
                mutex_enter(&shm_lock);
                KASSERT(!SLIST_EMPTY(&shmmap_s->entries));
        }
        kmem_free(shmmap_s, sizeof(struct shmmap_state));
}

static int
shmrealloc(int newshmni)
{
        vaddr_t v;
        struct shmid_ds *oldshmsegs, *newshmsegs;
        kcondvar_t *newshm_cv, *oldshm_cv;
        size_t sz;
        int i, lsegid, oldshmni;

        if (newshmni < 1)
                return EINVAL;

        /* Allocate new memory area */
        sz = ALIGN(newshmni * sizeof(struct shmid_ds)) +
            ALIGN(newshmni * sizeof(kcondvar_t));
        sz = round_page(sz);
        v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
        if (v == 0)
                return ENOMEM;

        mutex_enter(&shm_lock);
        while (shm_realloc_state || shm_realloc_disable)
                cv_wait(&shm_realloc_cv, &shm_lock);

        /*
         * Get the number of last segment.  Fail we are trying to
         * reallocate less memory than we use.
         */
        lsegid = 0;
        for (i = 0; i < shminfo.shmmni; i++)
                if ((shmsegs[i].shm_perm.mode & SHMSEG_FREE) == 0)
                        lsegid = i;
        if (lsegid >= newshmni) {
                mutex_exit(&shm_lock);
                uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
                return EBUSY;
        }
        shm_realloc_state = true;

        newshmsegs = (void *)v;
        newshm_cv = (void *)((uintptr_t)newshmsegs +
            ALIGN(newshmni * sizeof(struct shmid_ds)));

        /* Copy all memory to the new area */
        for (i = 0; i < shm_nused; i++) {
                cv_init(&newshm_cv[i], "shmwait");
                (void)memcpy(&newshmsegs[i], &shmsegs[i],
                    sizeof(newshmsegs[0]));
        }

        /* Mark as free all new segments, if there is any */
        for (; i < newshmni; i++) {
                cv_init(&newshm_cv[i], "shmwait");
                newshmsegs[i].shm_perm.mode = SHMSEG_FREE;
                newshmsegs[i].shm_perm._seq = 0;
        }

        oldshmsegs = shmsegs;
        oldshmni = shminfo.shmmni;
        shminfo.shmmni = newshmni;
        shmsegs = newshmsegs;
        shm_cv = newshm_cv;

        /* Reallocation completed - notify all waiters, if any */
        shm_realloc_state = false;
        cv_broadcast(&shm_realloc_cv);
        mutex_exit(&shm_lock);

        /* Release now unused resources. */
        oldshm_cv = (void *)((uintptr_t)oldshmsegs +
            ALIGN(oldshmni * sizeof(struct shmid_ds)));
        for (i = 0; i < oldshmni; i++)
                cv_destroy(&oldshm_cv[i]);

        sz = ALIGN(oldshmni * sizeof(struct shmid_ds)) +
            ALIGN(oldshmni * sizeof(kcondvar_t));
        sz = round_page(sz);
        uvm_km_free(kernel_map, (vaddr_t)oldshmsegs, sz, UVM_KMF_WIRED);

        return 0;
}

int
shminit(void)
{
        vaddr_t v;
        size_t sz;
        int i;

        mutex_init(&shm_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&shm_realloc_cv, "shmrealc");

        /* Allocate the wired memory for our structures */
        sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
            ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
        sz = round_page(sz);
        v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
        if (v == 0) {
                printf("sysv_shm: cannot allocate memory");
                return ENOMEM;
        }
        shmsegs = (void *)v;
        shm_cv = (void *)((uintptr_t)shmsegs +
            ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)));

        if (shminfo.shmmax == 0)
                shminfo.shmmax = uimax(physmem / 4, 1024) * PAGE_SIZE;
        else
                shminfo.shmmax *= PAGE_SIZE;
        shminfo.shmall = shminfo.shmmax / PAGE_SIZE;

        for (i = 0; i < shminfo.shmmni; i++) {
                cv_init(&shm_cv[i], "shmwait");
                shmsegs[i].shm_perm.mode = SHMSEG_FREE;
                shmsegs[i].shm_perm._seq = 0;
        }
        shm_last_free = 0;
        shm_nused = 0;
        shm_committed = 0;
        shm_realloc_disable = 0;
        shm_realloc_state = false;

        kern_has_sysvshm = 1;

        /* Load the callback function pointers for the uvm subsystem */
        uvm_shmexit = shmexit;
        uvm_shmfork = shmfork;

        return 0;
}

int
shmfini(void)
{
        size_t sz;
        int i;
        vaddr_t v = (vaddr_t)shmsegs;

        mutex_enter(&shm_lock);
        if (shm_nused) {
                mutex_exit(&shm_lock);
                return 1;
        }

        /* Clear the callback function pointers for the uvm subsystem */
        uvm_shmexit = NULL;
        uvm_shmfork = NULL;

        /* Destroy all condvars */
        for (i = 0; i < shminfo.shmmni; i++)
                cv_destroy(&shm_cv[i]);
        cv_destroy(&shm_realloc_cv);

        /* Free the allocated/wired memory */
        sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) +
            ALIGN(shminfo.shmmni * sizeof(kcondvar_t));
        sz = round_page(sz);
        uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);

        /* Release and destroy our mutex */
        mutex_exit(&shm_lock);
        mutex_destroy(&shm_lock);

        kern_has_sysvshm = 0;

        return 0;
}

static int
sysctl_ipc_shmmni(SYSCTLFN_ARGS)
{
        int newsize, error;
        struct sysctlnode node;
        node = *rnode;
        node.sysctl_data = &newsize;

        newsize = shminfo.shmmni;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        sysctl_unlock();
        error = shmrealloc(newsize);
        sysctl_relock();
        return error;
}

static int
sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS)
{
        uint32_t newsize;
        int error;
        struct sysctlnode node;
        node = *rnode;
        node.sysctl_data = &newsize;

        newsize = shminfo.shmall;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        if (newsize < 1)
                return EINVAL;

        shminfo.shmall = newsize;
        shminfo.shmmax = (uint64_t)shminfo.shmall * PAGE_SIZE;

        return 0;
}

static int
sysctl_ipc_shmmax(SYSCTLFN_ARGS)
{
        uint64_t newsize;
        int error;
        struct sysctlnode node;
        node = *rnode;
        node.sysctl_data = &newsize;

        newsize = shminfo.shmmax;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        if (newsize < PAGE_SIZE)
                return EINVAL;

        shminfo.shmmax = round_page(newsize);
        shminfo.shmall = shminfo.shmmax >> PAGE_SHIFT;

        return 0;
}

SYSCTL_SETUP(sysctl_ipc_shm_setup, "sysctl kern.ipc subtree setup")
{

        sysctl_createv(clog, 0, NULL, NULL,
                CTLFLAG_PERMANENT,
                CTLTYPE_NODE, "ipc",
                SYSCTL_DESCR("SysV IPC options"),
                NULL, 0, NULL, 0,
                CTL_KERN, KERN_SYSVIPC, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_QUAD, "shmmax",
                SYSCTL_DESCR("Max shared memory segment size in bytes"),
                sysctl_ipc_shmmax, 0, &shminfo.shmmax, 0,
                CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAX, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "shmmni",
                SYSCTL_DESCR("Max number of shared memory identifiers"),
                sysctl_ipc_shmmni, 0, &shminfo.shmmni, 0,
                CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMNI, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "shmseg",
                SYSCTL_DESCR("Max shared memory segments per process"),
                NULL, 0, &shminfo.shmseg, 0,
                CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMSEG, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "shmmaxpgs",
                SYSCTL_DESCR("Max amount of shared memory in pages"),
                sysctl_ipc_shmmaxpgs, 0, &shminfo.shmall, 0,
                CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAXPGS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "shm_use_phys",
                SYSCTL_DESCR("Enable/disable locking of shared memory in "
                    "physical memory"), NULL, 0, &shm_use_phys, 0,
                CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMUSEPHYS, CTL_EOL);
}









































   12 






   13 







    4 

    2 





    4 






    4 




















    4 

    4 











    2 

    2 







    2 
    2 





    2 

















    4 


    4 




    4 

    4 



    4 



    2 

    2 





















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
/*        $NetBSD: uipc_syscalls_40.c,v 1.24 2022/07/07 18:17:33 riastradh Exp $        */

/* written by Pavel Cahyna, 2006. Public domain. */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uipc_syscalls_40.c,v 1.24 2022/07/07 18:17:33 riastradh Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

/*
 * System call interface to the socket abstraction.
 */

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/msg.h>
#include <sys/sysctl.h>
#include <sys/syscallargs.h>
#include <sys/errno.h>
#include <sys/compat_stub.h>

#include <net/if.h>

#include <compat/sys/socket.h>
#include <compat/sys/sockio.h>

#include <compat/common/compat_mod.h>

/*
 * Return interface configuration of system.  List may be used in later
 * ioctl's (above) to get other information.
 */
/*ARGSUSED*/
static int
compat_ifconf(u_long cmd, void *data)
{
        struct oifconf *ifc = data;
        struct ifnet *ifp;
        struct oifreq ifr, *ifrp = NULL;
        int space = 0, error = 0;
        const int sz = (int)sizeof(ifr);
        int s;
        int bound;
        struct psref psref;

        switch (cmd) {
        case OSIOCGIFCONF:
        case OOSIOCGIFCONF:
                break;
        default:
                return ENOSYS;
        }

        const bool docopy = ifc->ifc_req != NULL;
        if (docopy) {
                if (ifc->ifc_len < 0)
                        return EINVAL;

                space = ifc->ifc_len;
                ifrp = ifc->ifc_req;
        }
        memset(&ifr, 0, sizeof(ifr));

        bound = curlwp_bind();
        s = pserialize_read_enter();
        IFNET_READER_FOREACH(ifp) {
                struct ifaddr *ifa;

                if_acquire(ifp, &psref);
                pserialize_read_exit(s);

                (void)strncpy(ifr.ifr_name, ifp->if_xname,
                    sizeof(ifr.ifr_name));
                if (ifr.ifr_name[sizeof(ifr.ifr_name) - 1] != '\0') {
                        error = ENAMETOOLONG;
                        goto release_exit;
                }
                if (IFADDR_READER_EMPTY(ifp)) {
                        memset(&ifr.ifr_addr, 0, sizeof(ifr.ifr_addr));
                        if (space >= sz) {
                                error = copyout(&ifr, ifrp, sz);
                                if (error != 0)
                                        goto release_exit;
                                ifrp++;
                        }
                        space -= sizeof(ifr);
                        goto next;
                }

                s = pserialize_read_enter();
                IFADDR_READER_FOREACH(ifa, ifp) {
                        struct sockaddr *sa = ifa->ifa_addr;
                        struct psref psref_ifa;

                        ifa_acquire(ifa, &psref_ifa);
                        pserialize_read_exit(s);
#ifdef COMPAT_OSOCK
                        if (cmd == OOSIOCGIFCONF) {
                                struct osockaddr *osa =
                                    (struct osockaddr *)&ifr.ifr_addr;
                                /*
                                 * If it does not fit, we don't bother with it
                                 */
                                if (sa->sa_len > sizeof(*osa))
                                        goto next_ifa;
                                memcpy(&ifr.ifr_addr, sa, sa->sa_len);
                                osa->sa_family = sa->sa_family;
                                if (space >= sz) {
                                        error = copyout(&ifr, ifrp, sz);
                                        ifrp++;
                                }
                        } else
#endif
                        if (sa->sa_len <= sizeof(*sa)) {
                                memcpy(&ifr.ifr_addr, sa, sa->sa_len);
                                if (space >= sz) {
                                        error = copyout(&ifr, ifrp, sz);
                                        ifrp++;
                                }
                        } else {
                                space -= sa->sa_len - sizeof(*sa);
                                if (space >= sz) {
                                        error = copyout(&ifr.ifr_name, ifrp,
                                            sizeof(ifr.ifr_name));
                                        if (error == 0) {
                                                error = copyout(sa,
                                                    &ifrp->ifr_addr,
                                                    sa->sa_len);
                                        }
                                        ifrp = (struct oifreq *)
                                                (sa->sa_len +
                                                 (char *)&ifrp->ifr_addr);
                                }
                        }
                        if (error != 0) {
                                ifa_release(ifa, &psref_ifa);
                                goto release_exit;
                        }
                        space -= sz;

#ifdef COMPAT_OSOCK
                next_ifa:
#endif
                        s = pserialize_read_enter();
                        ifa_release(ifa, &psref_ifa);
                }
                pserialize_read_exit(s);

        next:
                s = pserialize_read_enter();
                if_release(ifp, &psref);
        }
        pserialize_read_exit(s);
        curlwp_bindx(bound);

        if (docopy)
                ifc->ifc_len -= space;
        else
                ifc->ifc_len = -space;
        return 0;

release_exit:
        if_release(ifp, &psref);
        curlwp_bindx(bound);
        return error;
}

void      
uipc_syscalls_40_init(void)
{
 
        MODULE_HOOK_SET(uipc_syscalls_40_hook, compat_ifconf);
}
 
void
uipc_syscalls_40_fini(void)
{
 
        MODULE_HOOK_UNSET(uipc_syscalls_40_hook);
}



































































































































































    6 
















    7 

    6 








































  516 
  517 

   10 













    3 
















    3 
































































































































































































































































































































































































































    4 


    1 






    3 




    3 



    4 


















   53 
   52 


































    3 





    3 



    2 












    7 

    7 
    7 



    7 


























































































































































































































































































































    3 



















    3 















    3 






















    3 







    3 











    3 

















    3 









    3 
    3 
    3 


    3 





















































































































































































































































    3 
    3 

    3 



    3 





































































    2 

























































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
/*        $NetBSD: kern_module.c,v 1.158 2022/08/12 15:17:10 riastradh Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software developed for The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Kernel module support.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_module.c,v 1.158 2022/08/12 15:17:10 riastradh Exp $");

#define _MODULE_INTERNAL

#ifdef _KERNEL_OPT
#include "opt_ddb.h"
#include "opt_modular.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/lwp.h>
#include <sys/kauth.h>
#include <sys/kobj.h>
#include <sys/kmem.h>
#include <sys/module.h>
#include <sys/module_hook.h>
#include <sys/kthread.h>
#include <sys/sysctl.h>
#include <sys/lock.h>
#include <sys/evcnt.h>

#include <uvm/uvm_extern.h>

struct vm_map *module_map;
const char *module_machine;
char        module_base[MODULE_BASE_SIZE];

struct modlist        module_list = TAILQ_HEAD_INITIALIZER(module_list);
struct modlist        module_builtins = TAILQ_HEAD_INITIALIZER(module_builtins);
static struct modlist module_bootlist = TAILQ_HEAD_INITIALIZER(module_bootlist);

struct module_callbacks {
        TAILQ_ENTRY(module_callbacks) modcb_list;
        void (*modcb_load)(struct module *);
        void (*modcb_unload)(struct module *);
};
TAILQ_HEAD(modcblist, module_callbacks);
static struct modcblist modcblist;

static module_t *module_netbsd;
static const modinfo_t module_netbsd_modinfo = {
        .mi_version = __NetBSD_Version__,
        .mi_class = MODULE_CLASS_MISC,
        .mi_name = "netbsd"
};

static module_t        *module_active;
#ifdef MODULAR_DEFAULT_VERBOSE
bool                module_verbose_on = true;
#else
bool                module_verbose_on = false;
#endif
#ifdef MODULAR_DEFAULT_AUTOLOAD
bool                module_autoload_on = true;
#else
bool                module_autoload_on = false;
#endif
bool                module_autounload_unsafe = 0;
u_int                module_count;
u_int                module_builtinlist;
u_int                module_autotime = 10;
u_int                module_gen = 1;
static kcondvar_t module_thread_cv;
static kmutex_t module_thread_lock;
static int        module_thread_ticks;
int (*module_load_vfs_vec)(const char *, int, bool, module_t *,
                           prop_dictionary_t *) = (void *)eopnotsupp;

static kauth_listener_t        module_listener;

static specificdata_domain_t module_specificdata_domain;

/* Ensure that the kernel's link set isn't empty. */
static modinfo_t module_dummy;
__link_set_add_rodata(modules, module_dummy);

static module_t        *module_newmodule(modsrc_t);
static void        module_free(module_t *);
static void        module_require_force(module_t *);
static int        module_do_load(const char *, bool, int, prop_dictionary_t,
                    module_t **, modclass_t modclass, bool);
static int        module_do_unload(const char *, bool);
static int        module_do_builtin(const module_t *, const char *, module_t **,
    prop_dictionary_t);
static int        module_fetch_info(module_t *);
static void        module_thread(void *);

static module_t        *module_lookup(const char *);
static void        module_enqueue(module_t *);

static bool        module_merge_dicts(prop_dictionary_t, const prop_dictionary_t);

static void        sysctl_module_setup(void);
static int        sysctl_module_autotime(SYSCTLFN_PROTO);

static void        module_callback_load(struct module *);
static void        module_callback_unload(struct module *);

#define MODULE_CLASS_MATCH(mi, modclass) \
        ((modclass) == MODULE_CLASS_ANY || (modclass) == (mi)->mi_class)

static void
module_incompat(const modinfo_t *mi, int modclass)
{
        module_error("incompatible module class %d for `%s' (wanted %d)",
            mi->mi_class, mi->mi_name, modclass);
}

struct module *
module_kernel(void)
{

        return module_netbsd;
}

/*
 * module_error:
 *
 *        Utility function: log an error.
 */
void
module_error(const char *fmt, ...)
{
        va_list ap;

        va_start(ap, fmt);
        printf("WARNING: module error: ");
        vprintf(fmt, ap);
        printf("\n");
        va_end(ap);
}

/*
 * module_print:
 *
 *        Utility function: log verbose output.
 */
void
module_print(const char *fmt, ...)
{
        va_list ap;

        if (module_verbose_on) {
                va_start(ap, fmt);
                printf("DEBUG: module: ");
                vprintf(fmt, ap);
                printf("\n");
                va_end(ap);
        }
}

/*
 * module_name:
 *
 *        Utility function: return the module's name.
 */
const char *
module_name(struct module *mod)
{

        return mod->mod_info->mi_name;
}

/*
 * module_source:
 *
 *        Utility function: return the module's source.
 */
modsrc_t
module_source(struct module *mod)
{

        return mod->mod_source;
}

static int
module_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        int result;

        result = KAUTH_RESULT_DEFER;

        if (action != KAUTH_SYSTEM_MODULE)
                return result;

        if ((uintptr_t)arg2 != 0)        /* autoload */
                result = KAUTH_RESULT_ALLOW;

        return result;
}

/*
 * Allocate a new module_t
 */
static module_t *
module_newmodule(modsrc_t source)
{
        module_t *mod;

        mod = kmem_zalloc(sizeof(*mod), KM_SLEEP);
        mod->mod_source = source;
        specificdata_init(module_specificdata_domain, &mod->mod_sdref);
        return mod;
}

/*
 * Free a module_t
 */
static void
module_free(module_t *mod)
{

        specificdata_fini(module_specificdata_domain, &mod->mod_sdref);
        if (mod->mod_required)
                kmem_free(mod->mod_required, mod->mod_arequired *
                    sizeof(module_t *));
        kmem_free(mod, sizeof(*mod));
}

/*
 * Require the -f (force) flag to load a module
 */
static void
module_require_force(struct module *mod)
{
        SET(mod->mod_flags, MODFLG_MUST_FORCE);
}

/*
 * Add modules to the builtin list.  This can done at boottime or
 * at runtime if the module is linked into the kernel with an
 * external linker.  All or none of the input will be handled.
 * Optionally, the modules can be initialized.  If they are not
 * initialized, module_init_class() or module_load() can be used
 * later, but these are not guaranteed to give atomic results.
 */
int
module_builtin_add(modinfo_t *const *mip, size_t nmodinfo, bool init)
{
        struct module **modp = NULL, *mod_iter;
        int rv = 0, i, mipskip;

        if (init) {
                rv = kauth_authorize_system(kauth_cred_get(),
                    KAUTH_SYSTEM_MODULE, 0, (void *)(uintptr_t)MODCTL_LOAD,
                    (void *)(uintptr_t)1, NULL);
                if (rv) {
                        return rv;
                }
        }

        for (i = 0, mipskip = 0; i < nmodinfo; i++) {
                if (mip[i] == &module_dummy) {
                        KASSERT(nmodinfo > 0);
                        nmodinfo--;
                }
        }
        if (nmodinfo == 0)
                return 0;

        modp = kmem_zalloc(sizeof(*modp) * nmodinfo, KM_SLEEP);
        for (i = 0, mipskip = 0; i < nmodinfo; i++) {
                if (mip[i+mipskip] == &module_dummy) {
                        mipskip++;
                        continue;
                }
                modp[i] = module_newmodule(MODULE_SOURCE_KERNEL);
                modp[i]->mod_info = mip[i+mipskip];
        }
        kernconfig_lock();

        /* do this in three stages for error recovery and atomicity */

        /* first check for presence */
        for (i = 0; i < nmodinfo; i++) {
                TAILQ_FOREACH(mod_iter, &module_builtins, mod_chain) {
                        if (strcmp(mod_iter->mod_info->mi_name,
                            modp[i]->mod_info->mi_name) == 0)
                                break;
                }
                if (mod_iter) {
                        rv = EEXIST;
                        goto out;
                }

                if (module_lookup(modp[i]->mod_info->mi_name) != NULL) {
                        rv = EEXIST;
                        goto out;
                }
        }

        /* then add to list */
        for (i = 0; i < nmodinfo; i++) {
                TAILQ_INSERT_TAIL(&module_builtins, modp[i], mod_chain);
                module_builtinlist++;
        }

        /* finally, init (if required) */
        if (init) {
                for (i = 0; i < nmodinfo; i++) {
                        rv = module_do_builtin(modp[i],
                            modp[i]->mod_info->mi_name, NULL, NULL);
                        /* throw in the towel, recovery hard & not worth it */
                        if (rv)
                                panic("%s: builtin module \"%s\" init failed:"
                                    " %d", __func__,
                                    modp[i]->mod_info->mi_name, rv);
                }
        }

 out:
        kernconfig_unlock();
        if (rv != 0) {
                for (i = 0; i < nmodinfo; i++) {
                        if (modp[i])
                                module_free(modp[i]);
                }
        }
        kmem_free(modp, sizeof(*modp) * nmodinfo);
        return rv;
}

/*
 * Optionally fini and remove builtin module from the kernel.
 * Note: the module will now be unreachable except via mi && builtin_add.
 */
int
module_builtin_remove(modinfo_t *mi, bool fini)
{
        struct module *mod;
        int rv = 0;

        if (fini) {
                rv = kauth_authorize_system(kauth_cred_get(),
                    KAUTH_SYSTEM_MODULE, 0, (void *)(uintptr_t)MODCTL_UNLOAD,
                    NULL, NULL);
                if (rv)
                        return rv;

                kernconfig_lock();
                rv = module_do_unload(mi->mi_name, true);
                if (rv) {
                        goto out;
                }
        } else {
                kernconfig_lock();
        }
        TAILQ_FOREACH(mod, &module_builtins, mod_chain) {
                if (strcmp(mod->mod_info->mi_name, mi->mi_name) == 0)
                        break;
        }
        if (mod) {
                TAILQ_REMOVE(&module_builtins, mod, mod_chain);
                module_builtinlist--;
        } else {
                KASSERT(fini == false);
                rv = ENOENT;
        }

 out:
        kernconfig_unlock();
        return rv;
}

/*
 * module_init:
 *
 *        Initialize the module subsystem.
 */
void
module_init(void)
{
        __link_set_decl(modules, modinfo_t);
        extern struct vm_map *module_map;
        modinfo_t *const *mip;
        int rv;

        if (module_map == NULL) {
                module_map = kernel_map;
        }
        cv_init(&module_thread_cv, "mod_unld");
        mutex_init(&module_thread_lock, MUTEX_DEFAULT, IPL_NONE);
        TAILQ_INIT(&modcblist);

#ifdef MODULAR        /* XXX */
        module_init_md();
#endif

#ifdef KERNEL_DIR
        const char *booted_kernel = get_booted_kernel();
        if (booted_kernel) {
                char *ptr = strrchr(booted_kernel, '/');
                snprintf(module_base, sizeof(module_base), "/%.*s/modules",
                    (int)(ptr - booted_kernel), booted_kernel);
        } else {
                strlcpy(module_base, "/netbsd/modules", sizeof(module_base));
                printf("Cannot find kernel name, loading modules from \"%s\"\n",
                    module_base);
        }
#else
        if (!module_machine)
                module_machine = machine;
#if __NetBSD_Version__ / 1000000 % 100 == 99        /* -current */
        snprintf(module_base, sizeof(module_base), "/stand/%s/%s/modules",
            module_machine, osrelease);
#else                                                /* release */
        snprintf(module_base, sizeof(module_base), "/stand/%s/%d.%d/modules",
            module_machine, __NetBSD_Version__ / 100000000,
            __NetBSD_Version__ / 1000000 % 100);
#endif
#endif

        module_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
            module_listener_cb, NULL);

        __link_set_foreach(mip, modules) {
                if ((rv = module_builtin_add(mip, 1, false)) != 0)
                        module_error("builtin %s failed: %d\n",
                            (*mip)->mi_name, rv);
        }

        sysctl_module_setup();
        module_specificdata_domain = specificdata_domain_create();

        module_netbsd = module_newmodule(MODULE_SOURCE_KERNEL);
        module_netbsd->mod_refcnt = 1;
        module_netbsd->mod_info = &module_netbsd_modinfo;
}

/*
 * module_start_unload_thread:
 *
 *        Start the auto unload kthread.
 */
void
module_start_unload_thread(void)
{
        int error;

        error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, module_thread,
            NULL, NULL, "modunload");
        if (error != 0)
                panic("%s: %d", __func__, error);
}

/*
 * module_builtin_require_force
 *
 * Require MODCTL_MUST_FORCE to load any built-in modules that have 
 * not yet been initialized
 */
void
module_builtin_require_force(void)
{
        module_t *mod;

        kernconfig_lock();
        TAILQ_FOREACH(mod, &module_builtins, mod_chain) {
                module_require_force(mod);
        }
        kernconfig_unlock();
}

static struct sysctllog *module_sysctllog;

static int
sysctl_module_autotime(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int t, error;

        t = *(int *)rnode->sysctl_data;

        node = *rnode;
        node.sysctl_data = &t;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return (error);

        if (t < 0)
                return (EINVAL);

        *(int *)rnode->sysctl_data = t;
        return (0);
}

static void
sysctl_module_setup(void)
{
        const struct sysctlnode *node = NULL;

        sysctl_createv(&module_sysctllog, 0, NULL, &node,
                CTLFLAG_PERMANENT,
                CTLTYPE_NODE, "module",
                SYSCTL_DESCR("Module options"),
                NULL, 0, NULL, 0,
                CTL_KERN, CTL_CREATE, CTL_EOL);

        if (node == NULL)
                return;

        sysctl_createv(&module_sysctllog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_BOOL, "autoload",
                SYSCTL_DESCR("Enable automatic load of modules"),
                NULL, 0, &module_autoload_on, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(&module_sysctllog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_BOOL, "autounload_unsafe",
                SYSCTL_DESCR("Enable automatic unload of unaudited modules"),
                NULL, 0, &module_autounload_unsafe, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(&module_sysctllog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_BOOL, "verbose",
                SYSCTL_DESCR("Enable verbose output"),
                NULL, 0, &module_verbose_on, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(&module_sysctllog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READONLY,
                CTLTYPE_STRING, "path",
                SYSCTL_DESCR("Default module load path"),
                NULL, 0, module_base, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(&module_sysctllog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "autotime",
                SYSCTL_DESCR("Auto-unload delay"),
                sysctl_module_autotime, 0, &module_autotime, 0,
                CTL_CREATE, CTL_EOL);
}

/*
 * module_init_class:
 *
 *        Initialize all built-in and pre-loaded modules of the
 *        specified class.
 */
void
module_init_class(modclass_t modclass)
{
        TAILQ_HEAD(, module) bi_fail = TAILQ_HEAD_INITIALIZER(bi_fail);
        module_t *mod;
        modinfo_t *mi;

        kernconfig_lock();
        /*
         * Builtins first.  These will not depend on pre-loaded modules
         * (because the kernel would not link).
         */
        do {
                TAILQ_FOREACH(mod, &module_builtins, mod_chain) {
                        mi = mod->mod_info;
                        if (!MODULE_CLASS_MATCH(mi, modclass))
                                continue;
                        /*
                         * If initializing a builtin module fails, don't try
                         * to load it again.  But keep it around and queue it
                         * on the builtins list after we're done with module
                         * init.  Don't set it to MODFLG_MUST_FORCE in case a
                         * future attempt to initialize can be successful.
                         * (If the module has previously been set to
                         * MODFLG_MUST_FORCE, don't try to override that!)
                         */
                        if (ISSET(mod->mod_flags, MODFLG_MUST_FORCE) ||
                            module_do_builtin(mod, mi->mi_name, NULL,
                            NULL) != 0) {
                                TAILQ_REMOVE(&module_builtins, mod, mod_chain);
                                TAILQ_INSERT_TAIL(&bi_fail, mod, mod_chain);
                        }
                        break;
                }
        } while (mod != NULL);

        /*
         * Now preloaded modules.  These will be pulled off the
         * list as we call module_do_load();
         */
        do {
                TAILQ_FOREACH(mod, &module_bootlist, mod_chain) {
                        mi = mod->mod_info;
                        if (!MODULE_CLASS_MATCH(mi, modclass))
                                continue;
                        module_do_load(mi->mi_name, false, 0, NULL, NULL,
                            modclass, false);
                        break;
                }
        } while (mod != NULL);

        /* return failed builtin modules to builtin list */
        while ((mod = TAILQ_FIRST(&bi_fail)) != NULL) {
                TAILQ_REMOVE(&bi_fail, mod, mod_chain);
                TAILQ_INSERT_TAIL(&module_builtins, mod, mod_chain);
        }

        kernconfig_unlock();
}

/*
 * module_compatible:
 *
 *        Return true if the two supplied kernel versions are said to
 *        have the same binary interface for kernel code.  The entire
 *        version is signficant for the development tree (-current),
 *        major and minor versions are significant for official
 *        releases of the system.
 */
bool
module_compatible(int v1, int v2)
{

#if __NetBSD_Version__ / 1000000 % 100 == 99        /* -current */
        return v1 == v2;
#else                                                /* release */
        return abs(v1 - v2) < 10000;
#endif
}

/*
 * module_load:
 *
 *        Load a single module from the file system.
 */
int
module_load(const char *filename, int flags, prop_dictionary_t props,
            modclass_t modclass)
{
        module_t *mod;
        int error;

        /* Test if we already have the module loaded before
         * authorizing so we have the opportunity to return EEXIST. */
        kernconfig_lock();
        mod = module_lookup(filename);
        if (mod != NULL) {
                module_print("%s module `%s' already loaded",
                    "requested", filename);
                error = EEXIST;
                goto out;
        }

        /* Authorize. */
        error = kauth_authorize_system(kauth_cred_get(), KAUTH_SYSTEM_MODULE,
            0, (void *)(uintptr_t)MODCTL_LOAD, NULL, NULL);
        if (error != 0)
                goto out;

        error = module_do_load(filename, false, flags, props, NULL, modclass,
            false);

out:
        kernconfig_unlock();
        return error;
}

/*
 * module_autoload:
 *
 *        Load a single module from the file system, system initiated.
 */
int
module_autoload(const char *filename, modclass_t modclass)
{
        int error;
        struct proc *p = curlwp->l_proc;

        kernconfig_lock();

        /* Nothing if the user has disabled it. */
        if (!module_autoload_on) {
                kernconfig_unlock();
                return EPERM;
        }

        /* Disallow path separators and magic symlinks. */
        if (strchr(filename, '/') != NULL || strchr(filename, '@') != NULL ||
            strchr(filename, '.') != NULL) {
                kernconfig_unlock();
                return EPERM;
        }

        /* Authorize. */
        error = kauth_authorize_system(kauth_cred_get(), KAUTH_SYSTEM_MODULE,
            0, (void *)(uintptr_t)MODCTL_LOAD, (void *)(uintptr_t)1, NULL);

        if (error == 0)
                error = module_do_load(filename, false, 0, NULL, NULL, modclass,
                    true);

        module_print("Autoload for `%s' requested by pid %d (%s), status %d",
            filename, p->p_pid, p->p_comm, error);
        kernconfig_unlock();
        return error;
}

/*
 * module_unload:
 *
 *        Find and unload a module by name.
 */
int
module_unload(const char *name)
{
        int error;

        /* Authorize. */
        error = kauth_authorize_system(kauth_cred_get(), KAUTH_SYSTEM_MODULE,
            0, (void *)(uintptr_t)MODCTL_UNLOAD, NULL, NULL);
        if (error != 0) {
                return error;
        }

        kernconfig_lock();
        error = module_do_unload(name, true);
        kernconfig_unlock();

        return error;
}

/*
 * module_lookup:
 *
 *        Look up a module by name.
 */
module_t *
module_lookup(const char *name)
{
        module_t *mod;

        KASSERT(kernconfig_is_held());

        TAILQ_FOREACH(mod, &module_list, mod_chain) {
                if (strcmp(mod->mod_info->mi_name, name) == 0)
                        break;
        }

        return mod;
}

/*
 * module_hold:
 *
 *        Add a single reference to a module.  It's the caller's
 *        responsibility to ensure that the reference is dropped
 *        later.
 */
void
module_hold(module_t *mod)
{

        kernconfig_lock();
        mod->mod_refcnt++;
        kernconfig_unlock();
}

/*
 * module_rele:
 *
 *        Release a reference acquired with module_hold().
 */
void
module_rele(module_t *mod)
{

        kernconfig_lock();
        KASSERT(mod->mod_refcnt > 0);
        mod->mod_refcnt--;
        kernconfig_unlock();
}

/*
 * module_enqueue:
 *
 *        Put a module onto the global list and update counters.
 */
void
module_enqueue(module_t *mod)
{
        int i;

        KASSERT(kernconfig_is_held());

        /*
         * Put new entry at the head of the queue so autounload can unload
         * requisite modules with only one pass through the queue.
         */
        TAILQ_INSERT_HEAD(&module_list, mod, mod_chain);
        if (mod->mod_nrequired) {

                /* Add references to the requisite modules. */
                for (i = 0; i < mod->mod_nrequired; i++) {
                        KASSERT((*mod->mod_required)[i] != NULL);
                        (*mod->mod_required)[i]->mod_refcnt++;
                }
        }
        module_count++;
        module_gen++;
}

/*
 * Our array of required module pointers starts with zero entries.  If we
 * need to add a new entry, and the list is already full, we reallocate a
 * larger array, adding MAXMODDEPS entries.
 */
static void
alloc_required(module_t *mod)
{
        module_t *(*new)[], *(*old)[];
        int areq;
        int i;

        if (mod->mod_nrequired >= mod->mod_arequired) {
                areq = mod->mod_arequired + MAXMODDEPS;
                old = mod->mod_required;
                new = kmem_zalloc(areq * sizeof(module_t *), KM_SLEEP);
                for (i = 0; i < mod->mod_arequired; i++)
                        (*new)[i] = (*old)[i];
                mod->mod_required = new;
                if (old)
                        kmem_free(old, mod->mod_arequired * sizeof(module_t *));
                mod->mod_arequired = areq;
        }
}

/*
 * module_do_builtin:
 *
 *        Initialize a module from the list of modules that are
 *        already linked into the kernel.
 */
static int
module_do_builtin(const module_t *pmod, const char *name, module_t **modp,
    prop_dictionary_t props)
{
        const char *p, *s;
        char buf[MAXMODNAME];
        modinfo_t *mi = NULL;
        module_t *mod, *mod2, *mod_loaded, *prev_active;
        size_t len;
        int error;

        KASSERT(kernconfig_is_held());

        /*
         * Search the list to see if we have a module by this name.
         */
        TAILQ_FOREACH(mod, &module_builtins, mod_chain) {
                if (strcmp(mod->mod_info->mi_name, name) == 0) {
                        mi = mod->mod_info;
                        break;
                }
        }

        /*
         * Check to see if already loaded.  This might happen if we
         * were already loaded as a dependency.
         */
        if ((mod_loaded = module_lookup(name)) != NULL) {
                KASSERT(mod == NULL);
                if (modp)
                        *modp = mod_loaded;
                return 0;
        }

        /* Note! This is from TAILQ, not immediate above */
        if (mi == NULL) {
                /*
                 * XXX: We'd like to panic here, but currently in some
                 * cases (such as nfsserver + nfs), the dependee can be
                 * successfully linked without the dependencies.
                 */
                module_error("built-in module %s can't find builtin "
                    "dependency `%s'", pmod->mod_info->mi_name, name);
                return ENOENT;
        }

        /*
         * Initialize pre-requisites.
         */
        KASSERT(mod->mod_required == NULL);
        KASSERT(mod->mod_arequired == 0);
        KASSERT(mod->mod_nrequired == 0);
        if (mi->mi_required != NULL) {
                for (s = mi->mi_required; *s != '\0'; s = p) {
                        if (*s == ',')
                                s++;
                        p = s;
                        while (*p != '\0' && *p != ',')
                                p++;
                        len = uimin(p - s + 1, sizeof(buf));
                        strlcpy(buf, s, len);
                        if (buf[0] == '\0')
                                break;
                        alloc_required(mod);
                        error = module_do_builtin(mod, buf, &mod2, NULL);
                        if (error != 0) {
                                module_error("built-in module %s prerequisite "
                                    "%s failed, error %d", name, buf, error);
                                goto fail;
                        }
                        (*mod->mod_required)[mod->mod_nrequired++] = mod2;
                }
        }

        /*
         * Try to initialize the module.
         */
        prev_active = module_active;
        module_active = mod;
        error = (*mi->mi_modcmd)(MODULE_CMD_INIT, props);
        module_active = prev_active;
        if (error != 0) {
                module_error("built-in module %s failed its MODULE_CMD_INIT, "
                    "error %d", mi->mi_name, error);
                goto fail;
        }

        /* load always succeeds after this point */

        TAILQ_REMOVE(&module_builtins, mod, mod_chain);
        module_builtinlist--;
        if (modp != NULL) {
                *modp = mod;
        }
        module_enqueue(mod);
        return 0;

 fail:
        if (mod->mod_required)
                kmem_free(mod->mod_required, mod->mod_arequired *
                    sizeof(module_t *));
        mod->mod_arequired = 0;
        mod->mod_nrequired = 0;
        mod->mod_required = NULL;
        return error;
}

/*
 * module_load_sysctl
 *
 * Check to see if a non-builtin module has any SYSCTL_SETUP() routine(s)
 * registered.  If so, call it (them).
 */

static void
module_load_sysctl(module_t *mod)
{
        void (**ls_funcp)(struct sysctllog **);
        void *ls_start;
        size_t ls_size, count;
        int error;

        /*
         * Built-in modules don't have a mod_kobj so we cannot search
         * for their link_set_sysctl_funcs
         */
        if (mod->mod_source == MODULE_SOURCE_KERNEL)
                return;

        error = kobj_find_section(mod->mod_kobj, "link_set_sysctl_funcs",
            &ls_start, &ls_size);
        if (error == 0) {
                count = ls_size / sizeof(ls_start);
                ls_funcp = ls_start;
                while (count--) {
                        (**ls_funcp)(&mod->mod_sysctllog);
                        ls_funcp++;
                }
        }
}

/*
 * module_load_evcnt
 *
 * Check to see if a non-builtin module has any static evcnt's defined;
 * if so, attach them.
 */

static void
module_load_evcnt(module_t *mod)
{
        struct evcnt * const *ls_evp;
        void *ls_start;
        size_t ls_size, count;
        int error;

        /*
         * Built-in modules' static evcnt stuff will be handled
         * automatically as part of general kernel initialization
         */
        if (mod->mod_source == MODULE_SOURCE_KERNEL)
                return;

        error = kobj_find_section(mod->mod_kobj, "link_set_evcnts",
            &ls_start, &ls_size);
        if (error == 0) {
                count = ls_size / sizeof(*ls_evp);
                ls_evp = ls_start;
                while (count--) {
                        evcnt_attach_static(*ls_evp++);
                }
        }
}

/*
 * module_unload_evcnt
 *
 * Check to see if a non-builtin module has any static evcnt's defined;
 * if so, detach them.
 */

static void
module_unload_evcnt(module_t *mod)
{
        struct evcnt * const *ls_evp;
        void *ls_start;
        size_t ls_size, count;
        int error;

        /*
         * Built-in modules' static evcnt stuff will be handled
         * automatically as part of general kernel initialization
         */
        if (mod->mod_source == MODULE_SOURCE_KERNEL)
                return;

        error = kobj_find_section(mod->mod_kobj, "link_set_evcnts",
            &ls_start, &ls_size);
        if (error == 0) {
                count = ls_size / sizeof(*ls_evp);
                ls_evp = (void *)((char *)ls_start + ls_size);
                while (count--) {
                        evcnt_detach(*--ls_evp);
                }
        }
}

/*
 * module_do_load:
 *
 *        Helper routine: load a module from the file system, or one
 *        pushed by the boot loader.
 */
static int
module_do_load(const char *name, bool isdep, int flags,
               prop_dictionary_t props, module_t **modp, modclass_t modclass,
               bool autoload)
{
        /* The pending list for this level of recursion */
        TAILQ_HEAD(pending_t, module);
        struct pending_t *pending;
        struct pending_t new_pending = TAILQ_HEAD_INITIALIZER(new_pending);

        /* The stack of pending lists */
        static SLIST_HEAD(pend_head, pend_entry) pend_stack =
                SLIST_HEAD_INITIALIZER(pend_stack);
        struct pend_entry {
                SLIST_ENTRY(pend_entry) pe_entry;
                struct pending_t *pe_pending;
        } my_pend_entry;

        modinfo_t *mi;
        module_t *mod, *mod2, *prev_active;
        prop_dictionary_t filedict;
        char buf[MAXMODNAME];
        const char *s, *p;
        int error;
        size_t len;

        KASSERT(kernconfig_is_held());

        filedict = NULL;
        error = 0;

        /*
         * Set up the pending list for this entry.  If this is an
         * internal entry (for a dependency), then use the same list
         * as for the outer call;  otherwise, it's an external entry
         * (possibly recursive, ie a module's xxx_modcmd(init, ...)
         * routine called us), so use the locally allocated list.  In
         * either case, add it to our stack.
         */
        if (isdep) {
                KASSERT(SLIST_FIRST(&pend_stack) != NULL);
                pending = SLIST_FIRST(&pend_stack)->pe_pending;
        } else
                pending = &new_pending;
        my_pend_entry.pe_pending = pending;
        SLIST_INSERT_HEAD(&pend_stack, &my_pend_entry, pe_entry);

        /*
         * Search the list of disabled builtins first.
         */
        TAILQ_FOREACH(mod, &module_builtins, mod_chain) {
                if (strcmp(mod->mod_info->mi_name, name) == 0) {
                        break;
                }
        }
        if (mod) {
                if (ISSET(mod->mod_flags, MODFLG_MUST_FORCE) &&
                    !ISSET(flags, MODCTL_LOAD_FORCE)) {
                        if (!autoload) {
                                module_error("use -f to reinstate "
                                    "builtin module `%s'", name);
                        }
                        SLIST_REMOVE_HEAD(&pend_stack, pe_entry);
                        return EPERM;
                } else {
                        SLIST_REMOVE_HEAD(&pend_stack, pe_entry);
                        error = module_do_builtin(mod, name, modp, props);
                        return error;
                }
        }

        /*
         * Load the module and link.  Before going to the file system,
         * scan the list of modules loaded by the boot loader.
         */
        TAILQ_FOREACH(mod, &module_bootlist, mod_chain) {
                if (strcmp(mod->mod_info->mi_name, name) == 0) {
                        TAILQ_REMOVE(&module_bootlist, mod, mod_chain);
                        break;
                }
        }
        if (mod != NULL) {
                TAILQ_INSERT_TAIL(pending, mod, mod_chain);
        } else {
                /*
                 * Check to see if module is already present.
                 */
                mod = module_lookup(name);
                if (mod != NULL) {
                        if (modp != NULL) {
                                *modp = mod;
                        }
                        module_print("%s module `%s' already loaded",
                            isdep ? "dependent" : "requested", name);
                        SLIST_REMOVE_HEAD(&pend_stack, pe_entry);
                        return EEXIST;
                }

                mod = module_newmodule(MODULE_SOURCE_FILESYS);
                if (mod == NULL) {
                        module_error("out of memory for `%s'", name);
                        SLIST_REMOVE_HEAD(&pend_stack, pe_entry);
                        return ENOMEM;
                }

                error = module_load_vfs_vec(name, flags, autoload, mod,
                                            &filedict);
                if (error != 0) {
#ifdef DEBUG
                        /*
                         * The exec class of modules contains a list of
                         * modules that is the union of all the modules
                         * available for each architecture, so we don't
                         * print an error if they are missing.
                         */
                        if ((modclass != MODULE_CLASS_EXEC || error != ENOENT)
                            && root_device != NULL)
                                module_error("vfs load failed for `%s', "
                                    "error %d", name, error);
#endif
                        SLIST_REMOVE_HEAD(&pend_stack, pe_entry);
                        module_free(mod);
                        return error;
                }
                TAILQ_INSERT_TAIL(pending, mod, mod_chain);

                error = module_fetch_info(mod);
                if (error != 0) {
                        module_error("cannot fetch info for `%s', error %d",
                            name, error);
                        goto fail;
                }
        }

        /*
         * Check compatibility.
         */
        mi = mod->mod_info;
        if (strnlen(mi->mi_name, MAXMODNAME) >= MAXMODNAME) {
                error = EINVAL;
                module_error("module name `%s' longer than %d", mi->mi_name,
                    MAXMODNAME);
                goto fail;
        }
        if (mi->mi_class <= MODULE_CLASS_ANY ||
            mi->mi_class >= MODULE_CLASS_MAX) {
                error = EINVAL;
                module_error("module `%s' has invalid class %d",
                    mi->mi_name, mi->mi_class);
                    goto fail;
        }
        if (!module_compatible(mi->mi_version, __NetBSD_Version__)) {
                module_error("module `%s' built for `%d', system `%d'",
                    mi->mi_name, mi->mi_version, __NetBSD_Version__);
                if (ISSET(flags, MODCTL_LOAD_FORCE)) {
                        module_error("forced load, system may be unstable");
                } else {
                        error = EPROGMISMATCH;
                        goto fail;
                }
        }

        /*
         * If a specific kind of module was requested, ensure that we have
         * a match.
         */
        if (!MODULE_CLASS_MATCH(mi, modclass)) {
                module_incompat(mi, modclass);
                error = ENOENT;
                goto fail;
        }

        /*
         * If loading a dependency, `name' is a plain module name.
         * The name must match.
         */
        if (isdep && strcmp(mi->mi_name, name) != 0) {
                module_error("dependency name mismatch (`%s' != `%s')",
                    name, mi->mi_name);
                error = ENOENT;
                goto fail;
        }

        /*
         * If we loaded a module from the filesystem, check the actual
         * module name (from the modinfo_t) to ensure another module
         * with the same name doesn't already exist.  (There's no
         * guarantee the filename will match the module name, and the
         * dup-symbols check may not be sufficient.)
         */
        if (mod->mod_source == MODULE_SOURCE_FILESYS) {
                mod2 = module_lookup(mod->mod_info->mi_name);
                if ( mod2 && mod2 != mod) {
                        module_error("module with name `%s' already loaded",
                            mod2->mod_info->mi_name);
                        error = EEXIST;
                        if (modp != NULL)
                                *modp = mod2;
                        goto fail;
                }
        }

        /*
         * Block circular dependencies.
         */
        TAILQ_FOREACH(mod2, pending, mod_chain) {
                if (mod == mod2) {
                        continue;
                }
                if (strcmp(mod2->mod_info->mi_name, mi->mi_name) == 0) {
                        error = EDEADLK;
                        module_error("circular dependency detected for `%s'",
                            mi->mi_name);
                        goto fail;
                }
        }

        /*
         * Now try to load any requisite modules.
         */
        if (mi->mi_required != NULL) {
                mod->mod_arequired = 0;
                for (s = mi->mi_required; *s != '\0'; s = p) {
                        if (*s == ',')
                                s++;
                        p = s;
                        while (*p != '\0' && *p != ',')
                                p++;
                        len = p - s + 1;
                        if (len >= MAXMODNAME) {
                                error = EINVAL;
                                module_error("required module name `%s' "
                                    "longer than %d", mi->mi_required,
                                    MAXMODNAME);
                                goto fail;
                        }
                        strlcpy(buf, s, len);
                        if (buf[0] == '\0')
                                break;
                        alloc_required(mod);
                        if (strcmp(buf, mi->mi_name) == 0) {
                                error = EDEADLK;
                                module_error("self-dependency detected for "
                                   "`%s'", mi->mi_name);
                                goto fail;
                        }
                        error = module_do_load(buf, true, flags, NULL,
                            &mod2, MODULE_CLASS_ANY, true);
                        if (error != 0 && error != EEXIST) {
                                module_error("recursive load failed for `%s' "
                                    "(`%s' required), error %d", mi->mi_name,
                                    buf, error);
                                goto fail;
                        }
                        (*mod->mod_required)[mod->mod_nrequired++] = mod2;
                }
        }

        /*
         * We loaded all needed modules successfully: perform global
         * relocations and initialize.
         */
        {
                char xname[MAXMODNAME];

                /*
                 * In case of error the entire module is gone, so we
                 * need to save its name for possible error report.
                 */

                strlcpy(xname, mi->mi_name, MAXMODNAME);
                error = kobj_affix(mod->mod_kobj, mi->mi_name);
                if (error != 0) {
                        module_error("unable to affix module `%s', error %d",
                            xname, error);
                        goto fail2;
                }
        }

        if (filedict) {
                if (!module_merge_dicts(filedict, props)) {
                        module_error("module properties failed for %s", name);
                        error = EINVAL;
                        goto fail;
                }
        }

        prev_active = module_active;
        module_active = mod;
        error = (*mi->mi_modcmd)(MODULE_CMD_INIT, filedict ? filedict : props);
        module_active = prev_active;
        if (filedict) {
                prop_object_release(filedict);
                filedict = NULL;
        }
        if (error != 0) {
                module_error("modcmd(CMD_INIT) failed for `%s', error %d",
                    mi->mi_name, error);
                goto fail;
        }

        /*
         * If a recursive load already added a module with the same
         * name, abort.
         */
        mod2 = module_lookup(mi->mi_name);
        if (mod2 && mod2 != mod) {
                module_error("recursive load causes duplicate module `%s'",
                    mi->mi_name);
                error = EEXIST;
                goto fail1;
        }

        module_load_sysctl(mod);        /* Set-up module's sysctl if any */
        module_load_evcnt(mod);                /* Attach any static evcnt needed */

        /*
         * Good, the module loaded successfully.  Put it onto the
         * list and add references to its requisite modules.
         */
        TAILQ_REMOVE(pending, mod, mod_chain);
        module_enqueue(mod);
        if (modp != NULL) {
                *modp = mod;
        }
        if (autoload && module_autotime > 0) {
                /*
                 * Arrange to try unloading the module after
                 * a short delay unless auto-unload is disabled.
                 */
                mod->mod_autotime = time_second + module_autotime;
                SET(mod->mod_flags, MODFLG_AUTO_LOADED);
                module_thread_kick();
        }
        SLIST_REMOVE_HEAD(&pend_stack, pe_entry);
        module_print("module `%s' loaded successfully", mi->mi_name);
        module_callback_load(mod);
        return 0;

 fail1:
        (*mi->mi_modcmd)(MODULE_CMD_FINI, NULL);
 fail:
        kobj_unload(mod->mod_kobj);
 fail2:
        if (filedict != NULL) {
                prop_object_release(filedict);
                filedict = NULL;
        }
        TAILQ_REMOVE(pending, mod, mod_chain);
        SLIST_REMOVE_HEAD(&pend_stack, pe_entry);
        module_free(mod);
        return error;
}

/*
 * module_do_unload:
 *
 *        Helper routine: do the dirty work of unloading a module.
 */
static int
module_do_unload(const char *name, bool load_requires_force)
{
        module_t *mod, *prev_active;
        int error;
        u_int i;

        KASSERT(kernconfig_is_held());
        KASSERT(name != NULL);

        module_print("unload requested for '%s' (%s)", name,
            load_requires_force ? "TRUE" : "FALSE");
        mod = module_lookup(name);
        if (mod == NULL) {
                module_error("module `%s' not found", name);
                return ENOENT;
        }
        if (mod->mod_refcnt != 0) {
                module_print("module `%s' busy (%d refs)", name,
                    mod->mod_refcnt);
                return EBUSY;
        }

        /*
         * Builtin secmodels are there to stay.
         */
        if (mod->mod_source == MODULE_SOURCE_KERNEL &&
            mod->mod_info->mi_class == MODULE_CLASS_SECMODEL) {
                module_print("cannot unload built-in secmodel module `%s'",
                    name);
                return EPERM;
        }

        prev_active = module_active;
        module_active = mod;
        module_callback_unload(mod);

        /*
         * If there were any registered SYSCTL_SETUP funcs, make sure
         * we release the sysctl entries
         */
        if (mod->mod_sysctllog) {
                sysctl_teardown(&mod->mod_sysctllog);
        }
        module_unload_evcnt(mod);
        error = (*mod->mod_info->mi_modcmd)(MODULE_CMD_FINI, NULL);
        module_active = prev_active;
        if (error != 0) {
                module_load_sysctl(mod);        /* re-enable sysctl stuff */
                module_load_evcnt(mod);                /* and reenable evcnts */
                module_print("cannot unload module `%s' error=%d", name,
                    error);
                return error;
        }
        module_count--;
        TAILQ_REMOVE(&module_list, mod, mod_chain);
        for (i = 0; i < mod->mod_nrequired; i++) {
                (*mod->mod_required)[i]->mod_refcnt--;
        }
        module_print("unloaded module `%s'", name);
        if (mod->mod_kobj != NULL) {
                kobj_unload(mod->mod_kobj);
        }
        if (mod->mod_source == MODULE_SOURCE_KERNEL) {
                if (mod->mod_required != NULL) {
                        /*
                         * release "required" resources - will be re-parsed
                         * if the module is re-enabled
                         */
                        kmem_free(mod->mod_required,
                            mod->mod_arequired * sizeof(module_t *));
                        mod->mod_nrequired = 0;
                        mod->mod_arequired = 0;
                        mod->mod_required = NULL;
                }
                if (load_requires_force)
                        module_require_force(mod);
                TAILQ_INSERT_TAIL(&module_builtins, mod, mod_chain);
                module_builtinlist++;
        } else {
                module_free(mod);
        }
        module_gen++;

        return 0;
}

/*
 * module_prime:
 *
 *        Push a module loaded by the bootloader onto our internal
 *        list.
 */
int
module_prime(const char *name, void *base, size_t size)
{
        __link_set_decl(modules, modinfo_t);
        modinfo_t *const *mip;
        module_t *mod;
        int error;

        /* Check for module name same as a built-in module */

        __link_set_foreach(mip, modules) {
                if (*mip == &module_dummy)
                        continue;
                if (strcmp((*mip)->mi_name, name) == 0) {
                        module_error("module `%s' pushed by boot loader "
                            "already exists", name);
                        return EEXIST;
                }
        }

        /* Also eliminate duplicate boolist entries */

        TAILQ_FOREACH(mod, &module_bootlist, mod_chain) {
                if (strcmp(mod->mod_info->mi_name, name) == 0) {
                        module_error("duplicate bootlist entry for module "
                            "`%s'", name);
                        return EEXIST;
                }
        }

        mod = module_newmodule(MODULE_SOURCE_BOOT);
        if (mod == NULL) {
                return ENOMEM;
        }

        error = kobj_load_mem(&mod->mod_kobj, name, base, size);
        if (error != 0) {
                module_free(mod);
                module_error("unable to load `%s' pushed by boot loader, "
                    "error %d", name, error);
                return error;
        }
        error = module_fetch_info(mod);
        if (error != 0) {
                kobj_unload(mod->mod_kobj);
                module_free(mod);
                module_error("unable to fetch_info for `%s' pushed by boot "
                    "loader, error %d", name, error);
                return error;
        }

        TAILQ_INSERT_TAIL(&module_bootlist, mod, mod_chain);

        return 0;
}

/*
 * module_fetch_into:
 *
 *        Fetch modinfo record from a loaded module.
 */
static int
module_fetch_info(module_t *mod)
{
        int error;
        void *addr;
        size_t size;

        /*
         * Find module info record and check compatibility.
         */
        error = kobj_find_section(mod->mod_kobj, "link_set_modules",
            &addr, &size);
        if (error != 0) {
                module_error("`link_set_modules' section not present, "
                    "error %d", error);
                return error;
        }
        if (size != sizeof(modinfo_t **)) {
                module_error("`link_set_modules' section wrong size "
                    "(got %zu, wanted %zu)", size, sizeof(modinfo_t **));
                return ENOEXEC;
        }
        mod->mod_info = *(modinfo_t **)addr;

        return 0;
}

/*
 * module_find_section:
 *
 *        Allows a module that is being initialized to look up a section
 *        within its ELF object.
 */
int
module_find_section(const char *name, void **addr, size_t *size)
{

        KASSERT(kernconfig_is_held());
        KASSERT(module_active != NULL);

        return kobj_find_section(module_active->mod_kobj, name, addr, size);
}

/*
 * module_thread:
 *
 *        Automatically unload modules.  We try once to unload autoloaded
 *        modules after module_autotime seconds.  If the system is under
 *        severe memory pressure, we'll try unloading all modules, else if
 *        module_autotime is zero, we don't try to unload, even if the
 *        module was previously scheduled for unload.
 */
static void
module_thread(void *cookie)
{
        module_t *mod, *next;
        modinfo_t *mi;
        int error;

        for (;;) {
                kernconfig_lock();
                for (mod = TAILQ_FIRST(&module_list); mod != NULL; mod = next) {
                        next = TAILQ_NEXT(mod, mod_chain);

                        /* skip built-in modules */
                        if (mod->mod_source == MODULE_SOURCE_KERNEL)
                                continue;
                        /* skip modules that weren't auto-loaded */
                        if (!ISSET(mod->mod_flags, MODFLG_AUTO_LOADED))
                                continue;

                        if (uvm_availmem(false) < uvmexp.freemin) {
                                module_thread_ticks = hz;
                        } else if (module_autotime == 0 ||
                                   mod->mod_autotime == 0) {
                                continue;
                        } else if (time_second < mod->mod_autotime) {
                                module_thread_ticks = hz;
                                    continue;
                        } else {
                                mod->mod_autotime = 0;
                        }

                        /*
                         * Ask the module if it can be safely unloaded.
                         *
                         * - Modules which have been audited to be OK
                         *   with that will return 0.
                         *
                         * - Modules which have not been audited for
                         *   safe autounload will return ENOTTY.
                         *
                         *   => With kern.module.autounload_unsafe=1,
                         *      we treat ENOTTY as acceptance.
                         *
                         * - Some modules would ping-ping in and out
                         *   because their use is transient but often.
                         *   Example: exec_script.  Other modules may
                         *   still be in use.  These modules can
                         *   prevent autounload in all cases by
                         *   returning EBUSY or some other error code.
                         */
                        mi = mod->mod_info;
                        error = (*mi->mi_modcmd)(MODULE_CMD_AUTOUNLOAD, NULL);
                        if (error == 0 ||
                            (error == ENOTTY && module_autounload_unsafe)) {
                                (void)module_do_unload(mi->mi_name, false);
                        } else
                                module_print("module `%s' declined to be "
                                    "auto-unloaded error=%d", mi->mi_name,
                                    error);
                }
                kernconfig_unlock();

                mutex_enter(&module_thread_lock);
                (void)cv_timedwait(&module_thread_cv, &module_thread_lock,
                    module_thread_ticks);
                module_thread_ticks = 0;
                mutex_exit(&module_thread_lock);
        }
}

/*
 * module_thread:
 *
 *        Kick the module thread into action, perhaps because the
 *        system is low on memory.
 */
void
module_thread_kick(void)
{

        mutex_enter(&module_thread_lock);
        module_thread_ticks = hz;
        cv_broadcast(&module_thread_cv);
        mutex_exit(&module_thread_lock);
}

#ifdef DDB
/*
 * module_whatis:
 *
 *        Helper routine for DDB.
 */
void
module_whatis(uintptr_t addr, void (*pr)(const char *, ...))
{
        module_t *mod;
        size_t msize;
        vaddr_t maddr;

        TAILQ_FOREACH(mod, &module_list, mod_chain) {
                if (mod->mod_kobj == NULL) {
                        continue;
                }
                if (kobj_stat(mod->mod_kobj, &maddr, &msize) != 0)
                        continue;
                if (addr < maddr || addr >= maddr + msize) {
                        continue;
                }
                (*pr)("%p is %p+%zu, in kernel module `%s'\n",
                    (void *)addr, (void *)maddr,
                    (size_t)(addr - maddr), mod->mod_info->mi_name);
        }
}

/*
 * module_print_list:
 *
 *        Helper routine for DDB.
 */
void
module_print_list(void (*pr)(const char *, ...))
{
        const char *src;
        module_t *mod;
        size_t msize;
        vaddr_t maddr;

        (*pr)("%16s %16s %8s %8s\n", "NAME", "TEXT/DATA", "SIZE", "SOURCE");

        TAILQ_FOREACH(mod, &module_list, mod_chain) {
                switch (mod->mod_source) {
                case MODULE_SOURCE_KERNEL:
                        src = "builtin";
                        break;
                case MODULE_SOURCE_FILESYS:
                        src = "filesys";
                        break;
                case MODULE_SOURCE_BOOT:
                        src = "boot";
                        break;
                default:
                        src = "unknown";
                        break;
                }
                if (mod->mod_kobj == NULL) {
                        maddr = 0;
                        msize = 0;
                } else if (kobj_stat(mod->mod_kobj, &maddr, &msize) != 0)
                        continue;
                (*pr)("%16s %16lx %8ld %8s\n", mod->mod_info->mi_name,
                    (long)maddr, (long)msize, src);
        }
}
#endif        /* DDB */

static bool
module_merge_dicts(prop_dictionary_t existing_dict,
                   const prop_dictionary_t new_dict)
{
        prop_dictionary_keysym_t props_keysym;
        prop_object_iterator_t props_iter;
        prop_object_t props_obj;
        const char *props_key;
        bool error;

        if (new_dict == NULL) {                        /* nothing to merge */
                return true;
        }

        error = false;
        props_iter = prop_dictionary_iterator(new_dict);
        if (props_iter == NULL) {
                return false;
        }

        while ((props_obj = prop_object_iterator_next(props_iter)) != NULL) {
                props_keysym = (prop_dictionary_keysym_t)props_obj;
                props_key = prop_dictionary_keysym_value(props_keysym);
                props_obj = prop_dictionary_get_keysym(new_dict, props_keysym);
                if ((props_obj == NULL) || !prop_dictionary_set(existing_dict,
                    props_key, props_obj)) {
                        error = true;
                        goto out;
                }
        }
        error = false;

out:
        prop_object_iterator_release(props_iter);

        return !error;
}

/*
 * module_specific_key_create:
 *
 *        Create a key for subsystem module-specific data.
 */
specificdata_key_t
module_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
{

        return specificdata_key_create(module_specificdata_domain, keyp, dtor);
}

/*
 * module_specific_key_delete:
 *
 *        Delete a key for subsystem module-specific data.
 */
void
module_specific_key_delete(specificdata_key_t key)
{

        return specificdata_key_delete(module_specificdata_domain, key);
}

/*
 * module_getspecific:
 *
 *        Return module-specific data corresponding to the specified key.
 */
void *
module_getspecific(module_t *mod, specificdata_key_t key)
{

        return specificdata_getspecific(module_specificdata_domain,
            &mod->mod_sdref, key);
}

/*
 * module_setspecific:
 *
 *        Set module-specific data corresponding to the specified key.
 */
void
module_setspecific(module_t *mod, specificdata_key_t key, void *data)
{

        specificdata_setspecific(module_specificdata_domain,
            &mod->mod_sdref, key, data);
}

/*
 * module_register_callbacks:
 *
 *        Register a new set of callbacks to be called on module load/unload.
 *        Call the load callback on each existing module.
 *        Return an opaque handle for unregistering these later.
 */
void *
module_register_callbacks(void (*load)(struct module *),
    void (*unload)(struct module *))
{
        struct module_callbacks *modcb;
        struct module *mod;

        modcb = kmem_alloc(sizeof(*modcb), KM_SLEEP);
        modcb->modcb_load = load;
        modcb->modcb_unload = unload;

        kernconfig_lock();
        TAILQ_INSERT_TAIL(&modcblist, modcb, modcb_list);
        TAILQ_FOREACH_REVERSE(mod, &module_list, modlist, mod_chain)
                load(mod);
        kernconfig_unlock();

        return modcb;
}

/*
 * module_unregister_callbacks:
 *
 *        Unregister a previously-registered set of module load/unload callbacks.
 *        Call the unload callback on each existing module.
 */
void
module_unregister_callbacks(void *opaque)
{
        struct module_callbacks *modcb;
        struct module *mod;

        modcb = opaque;
        kernconfig_lock();
        TAILQ_FOREACH(mod, &module_list, mod_chain)
                modcb->modcb_unload(mod);
        TAILQ_REMOVE(&modcblist, modcb, modcb_list);
        kernconfig_unlock();
        kmem_free(modcb, sizeof(*modcb));
}

/*
 * module_callback_load:
 *
 *        Helper routine: call all load callbacks on a module being loaded.
 */
static void
module_callback_load(struct module *mod)
{
        struct module_callbacks *modcb;

        TAILQ_FOREACH(modcb, &modcblist, modcb_list) {
                modcb->modcb_load(mod);
        }
}

/*
 * module_callback_unload:
 *
 *        Helper routine: call all unload callbacks on a module being unloaded.
 */
static void
module_callback_unload(struct module *mod)
{
        struct module_callbacks *modcb;

        TAILQ_FOREACH(modcb, &modcblist, modcb_list) {
                modcb->modcb_unload(mod);
        }
}


























































































    1 






    4 


















    6 

   10 
   10 

   10 


    6 
   10 
































    3 
    3 
    3 

    3 





    3 







    4 
    4 

    4 
    3 








    4 





























    3 

    9 








    1 
    9 
















  121 









  131 














  143 








  130 

  124 

























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
/*        $NetBSD: pslist.h,v 1.7 2019/12/01 15:28:19 riastradh Exp $        */

/*-
 * Copyright (c) 2016 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef        _SYS_PSLIST_H
#define        _SYS_PSLIST_H

#include <sys/param.h>
#include <sys/atomic.h>

struct pslist_head;
struct pslist_entry;

struct pslist_head {
        struct pslist_entry *plh_first;
};

struct pslist_entry {
        struct pslist_entry **ple_prevp;
        struct pslist_entry *ple_next;
};

#ifdef _KERNEL
#define        _PSLIST_ASSERT        KASSERT
#else
#include <assert.h>
#define        _PSLIST_ASSERT        assert
#endif

#define        _PSLIST_POISON        ((void *)1ul)

/*
 * Initialization.  Allowed only when the caller has exclusive access,
 * excluding writers and readers.
 */

static __inline void
pslist_init(struct pslist_head *head)
{

        head->plh_first = NULL;        /* not yet published, so no atomic */
}

static __inline void
pslist_destroy(struct pslist_head *head __diagused)
{

        _PSLIST_ASSERT(head->plh_first == NULL);
}

static __inline void
pslist_entry_init(struct pslist_entry *entry)
{

        entry->ple_next = NULL;
        entry->ple_prevp = NULL;
}

static __inline void
pslist_entry_destroy(struct pslist_entry *entry)
{

        _PSLIST_ASSERT(entry->ple_prevp == NULL);

        /*
         * Poison the next entry.  If we used NULL here, then readers
         * would think they were simply at the end of the list.
         * Instead, cause readers to crash.
         */
        atomic_store_relaxed(&entry->ple_next, _PSLIST_POISON);
}

/*
 * Writer operations.  Caller must exclude other writers, but not
 * necessarily readers.
 *
 * Writes to initialize a new entry must precede its publication by
 * writing to plh_first / ple_next / *ple_prevp.
 *
 * The ple_prevp field is serialized by the caller's exclusive lock and
 * not read by readers, and hence its ordering relative to the internal
 * memory barriers is inconsequential.
 */

static __inline void
pslist_writer_insert_head(struct pslist_head *head, struct pslist_entry *new)
{

        _PSLIST_ASSERT(head->plh_first == NULL ||
            head->plh_first->ple_prevp == &head->plh_first);
        _PSLIST_ASSERT(new->ple_next == NULL);
        _PSLIST_ASSERT(new->ple_prevp == NULL);

        new->ple_prevp = &head->plh_first;
        new->ple_next = head->plh_first; /* not yet published, so no atomic */
        if (head->plh_first != NULL)
                head->plh_first->ple_prevp = &new->ple_next;
        atomic_store_release(&head->plh_first, new);
}

static __inline void
pslist_writer_insert_before(struct pslist_entry *entry,
    struct pslist_entry *new)
{

        _PSLIST_ASSERT(entry->ple_next != _PSLIST_POISON);
        _PSLIST_ASSERT(entry->ple_prevp != NULL);
        _PSLIST_ASSERT(*entry->ple_prevp == entry);
        _PSLIST_ASSERT(new->ple_next == NULL);
        _PSLIST_ASSERT(new->ple_prevp == NULL);

        new->ple_prevp = entry->ple_prevp;
        new->ple_next = entry;        /* not yet published, so no atomic */

        /*
         * Pairs with atomic_load_consume in pslist_reader_first or
         * pslist_reader_next.
         */
        atomic_store_release(entry->ple_prevp, new);

        entry->ple_prevp = &new->ple_next;
}

static __inline void
pslist_writer_insert_after(struct pslist_entry *entry,
    struct pslist_entry *new)
{

        _PSLIST_ASSERT(entry->ple_next != _PSLIST_POISON);
        _PSLIST_ASSERT(entry->ple_prevp != NULL);
        _PSLIST_ASSERT(*entry->ple_prevp == entry);
        _PSLIST_ASSERT(new->ple_next == NULL);
        _PSLIST_ASSERT(new->ple_prevp == NULL);

        new->ple_prevp = &entry->ple_next;
        new->ple_next = entry->ple_next; /* not yet published, so no atomic */
        if (new->ple_next != NULL)
                new->ple_next->ple_prevp = &new->ple_next;

        /* Pairs with atomic_load_consume in pslist_reader_next.  */
        atomic_store_release(&entry->ple_next, new);
}

static __inline void
pslist_writer_remove(struct pslist_entry *entry)
{

        _PSLIST_ASSERT(entry->ple_next != _PSLIST_POISON);
        _PSLIST_ASSERT(entry->ple_prevp != NULL);
        _PSLIST_ASSERT(*entry->ple_prevp == entry);

        if (entry->ple_next != NULL)
                entry->ple_next->ple_prevp = entry->ple_prevp;

        /*
         * No need for atomic_store_release because there's no
         * initialization that this must happen after -- the store
         * transitions from a good state with the entry to a good state
         * without the entry, both of which are valid for readers to
         * witness.
         */
        atomic_store_relaxed(entry->ple_prevp, entry->ple_next);
        entry->ple_prevp = NULL;

        /*
         * Leave entry->ple_next intact so that any extant readers can
         * continue iterating through the list.  The caller must then
         * wait for readers to drain, e.g. with pserialize_perform,
         * before destroying and reusing the entry.
         */
}

static __inline struct pslist_entry *
pslist_writer_first(const struct pslist_head *head)
{

        return head->plh_first;
}

static __inline struct pslist_entry *
pslist_writer_next(const struct pslist_entry *entry)
{

        _PSLIST_ASSERT(entry->ple_next != _PSLIST_POISON);
        return entry->ple_next;
}

static __inline void *
_pslist_writer_first_container(const struct pslist_head *head,
    const ptrdiff_t offset)
{
        struct pslist_entry *first = head->plh_first;

        return (first == NULL ? NULL : (char *)first - offset);
}

static __inline void *
_pslist_writer_next_container(const struct pslist_entry *entry,
    const ptrdiff_t offset)
{
        struct pslist_entry *next = entry->ple_next;

        _PSLIST_ASSERT(next != _PSLIST_POISON);
        return (next == NULL ? NULL : (char *)next - offset);
}

/*
 * Reader operations.  Caller must block pserialize_perform or
 * equivalent and be bound to a CPU.  Only plh_first/ple_next may be
 * read, and only with consuming memory order so that data-dependent
 * loads happen afterward.
 */

static __inline struct pslist_entry *
pslist_reader_first(const struct pslist_head *head)
{
        /*
         * Pairs with atomic_store_release in pslist_writer_insert_head
         * or pslist_writer_insert_before.
         */
        return atomic_load_consume(&head->plh_first);
}

static __inline struct pslist_entry *
pslist_reader_next(const struct pslist_entry *entry)
{
        /*
         * Pairs with atomic_store_release in
         * pslist_writer_insert_before or pslist_writer_insert_after.
         */
        struct pslist_entry *next = atomic_load_consume(&entry->ple_next);

        _PSLIST_ASSERT(next != _PSLIST_POISON);

        return next;
}

static __inline void *
_pslist_reader_first_container(const struct pslist_head *head,
    const ptrdiff_t offset)
{
        struct pslist_entry *first = pslist_reader_first(head);

        if (first == NULL)
                return NULL;
        return (char *)first - offset;
}

static __inline void *
_pslist_reader_next_container(const struct pslist_entry *entry,
    const ptrdiff_t offset)
{
        struct pslist_entry *next = pslist_reader_next(entry);

        if (next == NULL)
                return NULL;
        return (char *)next - offset;
}

/*
 * Type-safe macros for convenience.
 */

#if defined(__COVERITY__) || defined(__LGTM_BOT__)
#define        _PSLIST_VALIDATE_PTRS(P, Q)                0
#define        _PSLIST_VALIDATE_CONTAINER(P, T, F)        0
#else
#define        _PSLIST_VALIDATE_PTRS(P, Q)                                              \
        (0 * sizeof((P) - (Q)) * sizeof(*(P)) * sizeof(*(Q)))
#define        _PSLIST_VALIDATE_CONTAINER(P, T, F)                                      \
        (0 * sizeof((P) - &((T *)(((char *)(P)) - offsetof(T, F)))->F))
#endif

#define        PSLIST_INITIALIZER                { .plh_first = NULL }
#define        PSLIST_ENTRY_INITIALIZER        { .ple_next = NULL, .ple_prevp = NULL }

#define        PSLIST_INIT(H)                        pslist_init((H))
#define        PSLIST_DESTROY(H)                pslist_destroy((H))
#define        PSLIST_ENTRY_INIT(E, F)                pslist_entry_init(&(E)->F)
#define        PSLIST_ENTRY_DESTROY(E, F)        pslist_entry_destroy(&(E)->F)

#define        PSLIST_WRITER_INSERT_HEAD(H, V, F)                                      \
        pslist_writer_insert_head((H), &(V)->F)
#define        PSLIST_WRITER_INSERT_BEFORE(E, N, F)                                      \
        pslist_writer_insert_before(&(E)->F + _PSLIST_VALIDATE_PTRS(E, N),    \
            &(N)->F)
#define        PSLIST_WRITER_INSERT_AFTER(E, N, F)                                      \
        pslist_writer_insert_after(&(E)->F + _PSLIST_VALIDATE_PTRS(E, N),     \
            &(N)->F)
#define        PSLIST_WRITER_REMOVE(E, F)                                              \
        pslist_writer_remove(&(E)->F)
#define        PSLIST_WRITER_FIRST(H, T, F)                                              \
        ((T *)(_pslist_writer_first_container((H), offsetof(T, F))) +              \
            _PSLIST_VALIDATE_CONTAINER(pslist_writer_first(H), T, F))
#define        PSLIST_WRITER_NEXT(V, T, F)                                              \
        ((T *)(_pslist_writer_next_container(&(V)->F, offsetof(T, F))) +      \
            _PSLIST_VALIDATE_CONTAINER(pslist_writer_next(&(V)->F), T, F))
#define        PSLIST_WRITER_FOREACH(V, H, T, F)                                      \
        for ((V) = PSLIST_WRITER_FIRST((H), T, F);                              \
                (V) != NULL;                                                      \
                (V) = PSLIST_WRITER_NEXT((V), T, F))

#define        PSLIST_READER_FIRST(H, T, F)                                              \
        ((T *)(_pslist_reader_first_container((H), offsetof(T, F))) +              \
            _PSLIST_VALIDATE_CONTAINER(pslist_reader_first(H), T, F))
#define        PSLIST_READER_NEXT(V, T, F)                                              \
        ((T *)(_pslist_reader_next_container(&(V)->F, offsetof(T, F))) +      \
            _PSLIST_VALIDATE_CONTAINER(pslist_reader_next(&(V)->F), T, F))
#define        PSLIST_READER_FOREACH(V, H, T, F)                                      \
        for ((V) = PSLIST_READER_FIRST((H), T, F);                              \
                (V) != NULL;                                                      \
                (V) = PSLIST_READER_NEXT((V), T, F))

#endif        /* _SYS_PSLIST_H */


































































































































































































































































































































    4 




























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
/*        $NetBSD: tmpfs.h,v 1.56 2020/05/17 19:39:15 ad Exp $        */

/*
 * Copyright (c) 2005, 2006, 2007, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
 * 2005 program.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _FS_TMPFS_TMPFS_H_
#define _FS_TMPFS_TMPFS_H_

#if !defined(_KERNEL) && !defined(_KMEMUSER)
#error "not supposed to be exposed to userland"
#endif

#include <sys/dirent.h>
#include <sys/mount.h>
#include <sys/pool.h>
#include <sys/queue.h>
#include <sys/vnode.h>

/*
 * Internal representation of a tmpfs directory entry.
 *
 * All fields are protected by vnode lock.
 */
typedef struct tmpfs_dirent {
        TAILQ_ENTRY(tmpfs_dirent)        td_entries;

        /* Pointer to the inode this entry refers to. */
        struct tmpfs_node *                td_node;

        /* Sequence number, see tmpfs_dir_getseq(). */
        uint32_t                        td_seq;

        /* Name and its length. */
        char *                                td_name;
        uint16_t                        td_namelen;
} tmpfs_dirent_t;

TAILQ_HEAD(tmpfs_dir, tmpfs_dirent);

/*
 * Internal representation of a tmpfs file system node -- inode.
 *
 * This structure is split in two parts: one holds attributes common
 * to all file types and the other holds data that is only applicable to
 * a particular type.
 *
 * All fields are protected by vnode lock.  The vnode association itself
 * is protected by vcache.
 */
typedef struct tmpfs_node {
        LIST_ENTRY(tmpfs_node)        tn_entries;

        /*
         * Each inode has a corresponding vnode.  It is a bi-directional
         * association.  Whenever vnode is allocated, its v_data field is
         * set to the inode it reference, and tmpfs_node_t::tn_vnode is
         * set to point to the said vnode.
         *
         * Further attempts to allocate a vnode for this same node will
         * result in returning a new reference to the value stored in
         * tn_vnode.  It may be NULL when the node is unused (that is,
         * no vnode has been allocated or it has been reclaimed).
         */
        vnode_t *                tn_vnode;

        /* Prevent node from being reclaimed. */
        uint32_t                tn_holdcount;

        /* Directory entry.  Only a hint, since hard link can have multiple. */
        tmpfs_dirent_t *        tn_dirent_hint;

        /* The inode type: VBLK, VCHR, VDIR, VFIFO, VLNK, VREG or VSOCK. */
        enum vtype                tn_type;

        /* Inode identifier and generation number. */
        ino_t                        tn_id;
        uint32_t                tn_gen;

        /* The inode size. */
        off_t                        tn_size;

        /* Generic node attributes. */
        uid_t                        tn_uid;
        gid_t                        tn_gid;
        mode_t                        tn_mode;
        int                        tn_flags;
        nlink_t                        tn_links;
        unsigned                tn_tflags;
        struct timespec                tn_atime;
        struct timespec                tn_mtime;
        struct timespec                tn_ctime;
        struct timespec                tn_birthtime;
        kmutex_t                tn_timelock;

        /* Head of byte-level lock list (used by tmpfs_advlock). */
        struct lockf *                tn_lockf;

        union {
                /* Type case: VBLK or VCHR. */
                struct {
                        dev_t                        tn_rdev;
                } tn_dev;

                /* Type case: VDIR. */
                struct {
                        /* Parent directory (root inode points to itself). */
                        struct tmpfs_node *        tn_parent;

                        /* List of directory entries. */
                        struct tmpfs_dir        tn_dir;

                        /* Last given sequence number and their arena. */
                        uint32_t                tn_next_seq;
                        void *                        tn_seq_arena;

                        /*
                         * Pointer of the last directory entry returned
                         * by the readdir(3) operation.
                         */
                        struct tmpfs_dirent *        tn_readdir_lastp;
                } tn_dir;

                /* Type case: VLNK. */
                struct tn_lnk {
                        /* The link's target. */
                        char *                        tn_link;
                } tn_lnk;

                /* Type case: VREG. */
                struct tn_reg {
                        /* Underlying UVM object to store contents. */
                        struct uvm_object *        tn_aobj;
                        size_t                        tn_aobj_pages;
                } tn_reg;
        } tn_spec;
} tmpfs_node_t;

#if defined(_KERNEL)

VFS_PROTOS(tmpfs);

LIST_HEAD(tmpfs_node_list, tmpfs_node);

#define        TMPFS_MAXNAMLEN                255
/* Validate maximum td_namelen length. */
CTASSERT(TMPFS_MAXNAMLEN < UINT16_MAX);

/*
 * Reserved values for the virtual entries (the first must be 0) and EOF.
 * The start/end of the incremental range, see tmpfs_dir_getseq().
 */
#define        TMPFS_DIRSEQ_DOT        0
#define        TMPFS_DIRSEQ_DOTDOT        1
#define        TMPFS_DIRSEQ_EOF        2

#define        TMPFS_DIRSEQ_START        3                /* inclusive */
#define        TMPFS_DIRSEQ_END        (1U << 30)        /* exclusive */

/* Mark to indicate that the number is not set. */
#define        TMPFS_DIRSEQ_NONE        (1U << 31)

/* Flags: time update requests. */
#define        TMPFS_UPDATE_ATIME        0x01
#define        TMPFS_UPDATE_MTIME        0x02
#define        TMPFS_UPDATE_CTIME        0x04

/*
 * Bits indicating whiteout use for the directory.
 * We abuse tmpfs_node_t::tn_gen for that.
 */
#define        TMPFS_WHITEOUT_BIT        (1U << 31)
#define        TMPFS_NODE_GEN_MASK        (TMPFS_WHITEOUT_BIT - 1)

#define        TMPFS_NODE_GEN(node) \
    ((node)->tn_gen & TMPFS_NODE_GEN_MASK)

/* White-out inode indicator. */
#define        TMPFS_NODE_WHITEOUT        ((tmpfs_node_t *)-1)

/*
 * Bit indicating this node must be reclaimed when holdcount reaches zero.
 * Ored into tmpfs_node_t::tn_holdcount.
 */
#define TMPFS_NODE_RECLAIMED                (1U << 30)

/*
 * Internal representation of a tmpfs mount point.
 */
typedef struct tmpfs_mount {
        /* Limit and number of bytes in use by the file system. */
        uint64_t                tm_mem_limit;
        uint64_t                tm_bytes_used;
        kmutex_t                tm_acc_lock;

        /* Pointer to the root inode. */
        tmpfs_node_t *                tm_root;

        /* Maximum number of possible nodes for this file system. */
        unsigned int                tm_nodes_max;

        /* Number of nodes currently allocated. */
        unsigned int                tm_nodes_cnt;

        /* List of inodes and the lock protecting it. */
        kmutex_t                tm_lock;
        struct tmpfs_node_list        tm_nodes;
} tmpfs_mount_t;

/*
 * This structure maps a file identifier to a tmpfs node.  Used by the
 * NFS code.
 */
typedef struct tmpfs_fid {
        uint16_t                tf_len;
        uint16_t                tf_pad;
        uint32_t                tf_gen;
        ino_t                        tf_id;
} tmpfs_fid_t;

/*
 * Prototypes for tmpfs_subr.c.
 */

void                tmpfs_free_node(tmpfs_mount_t *, tmpfs_node_t *);

int                tmpfs_construct_node(vnode_t *, vnode_t **, struct vattr *,
                    struct componentname *, char *);

int                tmpfs_alloc_dirent(tmpfs_mount_t *, const char *, uint16_t,
                    tmpfs_dirent_t **);
void                tmpfs_free_dirent(tmpfs_mount_t *, tmpfs_dirent_t *);
void                tmpfs_dir_attach(tmpfs_node_t *, tmpfs_dirent_t *, tmpfs_node_t *);
void                tmpfs_dir_detach(tmpfs_node_t *, tmpfs_dirent_t *);

tmpfs_dirent_t *tmpfs_dir_lookup(tmpfs_node_t *, struct componentname *);
tmpfs_dirent_t *tmpfs_dir_cached(tmpfs_node_t *);

uint32_t        tmpfs_dir_getseq(tmpfs_node_t *, tmpfs_dirent_t *);
tmpfs_dirent_t *tmpfs_dir_lookupbyseq(tmpfs_node_t *, off_t);
int                tmpfs_dir_getdents(tmpfs_node_t *, struct uio *, off_t *);

int                tmpfs_reg_resize(vnode_t *, off_t);

int                tmpfs_chflags(vnode_t *, int, kauth_cred_t, lwp_t *);
int                tmpfs_chmod(vnode_t *, mode_t, kauth_cred_t, lwp_t *);
int                tmpfs_chown(vnode_t *, uid_t, gid_t, kauth_cred_t, lwp_t *);
int                tmpfs_chsize(vnode_t *, u_quad_t, kauth_cred_t, lwp_t *);
int                tmpfs_chtimes(vnode_t *, const struct timespec *,
                    const struct timespec *, const struct timespec *, int,
                    kauth_cred_t, lwp_t *);
void                tmpfs_update(vnode_t *, unsigned);
void                tmpfs_update_locked(vnode_t *, unsigned);
void                tmpfs_update_lazily(vnode_t *, unsigned);

/*
 * Prototypes for tmpfs_mem.c.
 */

void                tmpfs_mntmem_init(tmpfs_mount_t *, uint64_t);
void                tmpfs_mntmem_destroy(tmpfs_mount_t *);
int                tmpfs_mntmem_set(tmpfs_mount_t *, uint64_t);

size_t                tmpfs_mem_info(bool);
uint64_t        tmpfs_bytes_max(tmpfs_mount_t *);
size_t                tmpfs_pages_avail(tmpfs_mount_t *);
bool                tmpfs_mem_incr(tmpfs_mount_t *, size_t);
void                tmpfs_mem_decr(tmpfs_mount_t *, size_t);

tmpfs_dirent_t *tmpfs_dirent_get(tmpfs_mount_t *);
void                tmpfs_dirent_put(tmpfs_mount_t *, tmpfs_dirent_t *);

tmpfs_node_t *        tmpfs_node_get(tmpfs_mount_t *);
void                tmpfs_node_put(tmpfs_mount_t *, tmpfs_node_t *);

char *                tmpfs_strname_alloc(tmpfs_mount_t *, size_t);
void                tmpfs_strname_free(tmpfs_mount_t *, char *, size_t);
bool                tmpfs_strname_neqlen(struct componentname *, struct componentname *);

/*
 * Ensures that the node pointed by 'node' is a directory and that its
 * contents are consistent with respect to directories.
 */
#define        TMPFS_VALIDATE_DIR(node) \
    KASSERT((node)->tn_vnode == NULL || VOP_ISLOCKED((node)->tn_vnode)); \
    KASSERT((node)->tn_type == VDIR); \
    KASSERT((node)->tn_size % sizeof(tmpfs_dirent_t) == 0);

/*
 * Routines to convert VFS structures to tmpfs internal ones.
 */

static __inline tmpfs_mount_t *
VFS_TO_TMPFS(struct mount *mp)
{
        tmpfs_mount_t *tmp = mp->mnt_data;

        KASSERT(tmp != NULL);
        return tmp;
}

static __inline tmpfs_node_t *
VP_TO_TMPFS_DIR(vnode_t *vp)
{
        tmpfs_node_t *node = vp->v_data;

        KASSERT(node != NULL);
        TMPFS_VALIDATE_DIR(node);
        return node;
}

#endif /* defined(_KERNEL) */

static __inline tmpfs_node_t *
VP_TO_TMPFS_NODE(vnode_t *vp)
{
        tmpfs_node_t *node = vp->v_data;
#ifdef KASSERT
        KASSERT(node != NULL);
#endif
        return node;
}

#endif /* _FS_TMPFS_TMPFS_H_ */



















































































































































































































    2 














   20 




















































    3 



    3 




    2 

















    1 



    1 












































    2 








    1 










    1 






    2 








    1 












    2 


    1 









    1 

















    1 








    4 

    4 
   16 



























































































































































































































































































































    1 




    1 













    1 
















































































































    1 









    1 








































































































    5 






    5 


    5 




















































































































































































































































































































































































































































    3 
























































    2 














    1 
























    3 








































    1 
















    1 




















    1 





















    1 















    1 

    1 



    1 



















































































































































































































    1 





























































































    2 


















    3 


    3 





    2 

    2 




    2 

    2 


















































    2 



















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
/*        $NetBSD: cryptodev.c,v 1.124 2022/05/22 11:40:38 riastradh Exp $ */
/*        $FreeBSD: src/sys/opencrypto/cryptodev.c,v 1.4.2.4 2003/06/03 00:09:02 sam Exp $        */
/*        $OpenBSD: cryptodev.c,v 1.53 2002/07/10 22:21:30 mickey Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Coyote Point Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2001 Theo de Raadt
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in the
 *   documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *   derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Effort sponsored in part by the Defense Advanced Research Projects
 * Agency (DARPA) and Air Force Research Laboratory, Air Force
 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
 *
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: cryptodev.c,v 1.124 2022/05/22 11:40:38 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/pool.h>
#include <sys/sysctl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/errno.h>
#include <sys/md5.h>
#include <sys/sha1.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/kauth.h>
#include <sys/select.h>
#include <sys/poll.h>
#include <sys/atomic.h>
#include <sys/stat.h>
#include <sys/module.h>
#include <sys/compat_stub.h> 

#ifdef _KERNEL_OPT
#include "opt_ocf.h"
#include "opt_compat_netbsd.h"
#endif

#include <opencrypto/cryptodev.h>
#include <opencrypto/ocryptodev.h>
#include <opencrypto/cryptodev_internal.h>
#include <opencrypto/xform.h>

#include "ioconf.h"

kmutex_t cryptodev_mtx;

struct csession {
        TAILQ_ENTRY(csession) next;
        u_int64_t        sid;
        u_int32_t        ses;

        u_int32_t        cipher;                /* note: shares name space in crd_alg */
        const struct enc_xform *txform;
        u_int32_t        mac;                /* note: shares name space in crd_alg */
        const struct auth_hash *thash;
        u_int32_t        comp_alg;        /* note: shares name space in crd_alg */
        const struct comp_algo *tcomp;

        void *                key;
        int                keylen;
        u_char                tmp_iv[EALG_MAX_BLOCK_LEN];

        void *                mackey;
        int                mackeylen;
        u_char                tmp_mac[CRYPTO_MAX_MAC_LEN];

        struct iovec        iovec[1];        /* user requests never have more */
        struct uio        uio;
        int                error;
};

struct fcrypt {
        TAILQ_HEAD(csessionlist, csession) csessions;
        TAILQ_HEAD(crprethead, cryptop) crp_ret_mq;
        TAILQ_HEAD(krprethead, cryptkop) crp_ret_mkq;
        int                sesn;
        struct selinfo        sinfo;
        u_int32_t        requestid;
        struct timespec atime;
        struct timespec mtime;
        struct timespec btime;
};

/* For our fixed-size allocations */
static struct pool fcrpl;
static struct pool csepl;

/* Declaration of master device (fd-cloning/ctxt-allocating) entrypoints */
static int        cryptoopen(dev_t dev, int flag, int mode, struct lwp *l);
static int        cryptoread(dev_t dev, struct uio *uio, int ioflag);
static int        cryptowrite(dev_t dev, struct uio *uio, int ioflag);
static int        cryptoselect(dev_t dev, int rw, struct lwp *l);

static int        crypto_refcount = 0;        /* Prevent detaching while in use */

/* Declaration of cloned-device (per-ctxt) entrypoints */
static int        cryptof_read(struct file *, off_t *, struct uio *,
    kauth_cred_t, int);
static int        cryptof_write(struct file *, off_t *, struct uio *,
    kauth_cred_t, int);
static int        cryptof_ioctl(struct file *, u_long, void *);
static int        cryptof_close(struct file *);
static int         cryptof_poll(struct file *, int);
static int         cryptof_stat(struct file *, struct stat *);

static const struct fileops cryptofops = {
        .fo_name = "cryptof",
        .fo_read = cryptof_read,
        .fo_write = cryptof_write,
        .fo_ioctl = cryptof_ioctl,
        .fo_fcntl = fnullop_fcntl,
        .fo_poll = cryptof_poll,
        .fo_stat = cryptof_stat,
        .fo_close = cryptof_close,
        .fo_kqfilter = fnullop_kqfilter,
        .fo_restart = fnullop_restart,
};

struct csession *cryptodev_csefind(struct fcrypt *, u_int);
static struct        csession *csefind(struct fcrypt *, u_int);
static int        csedelete(struct fcrypt *, struct csession *);
static struct        csession *cseadd(struct fcrypt *, struct csession *);
static struct        csession *csecreate(struct fcrypt *, u_int64_t, void *,
    u_int64_t, void *, u_int64_t, u_int32_t, u_int32_t, u_int32_t,
    const struct enc_xform *, const struct auth_hash *,
    const struct comp_algo *);
static void        csefree(struct csession *);

static int        cryptodev_key(struct crypt_kop *);
static int        cryptodev_mkey(struct fcrypt *, struct crypt_n_kop *, int);
static void        cryptodev_msessionfin(struct fcrypt *, int, u_int32_t *);

static void        cryptodev_cb(struct cryptop *);
static void        cryptodevkey_cb(struct cryptkop *);

static void        cryptodev_mcb(struct cryptop *);
static void        cryptodevkey_mcb(struct cryptkop *);

static int         cryptodev_getmstatus(struct fcrypt *, struct crypt_result *,
    int);
static int        cryptodev_getstatus(struct fcrypt *, struct crypt_result *);

/*
 * sysctl-able control variables for /dev/crypto now defined in crypto.c:
 * crypto_usercrypto, crypto_userasmcrypto, crypto_devallowsoft.
 */

/* ARGSUSED */
int
cryptof_read(file_t *fp, off_t *poff,
    struct uio *uio, kauth_cred_t cred, int flags)
{
        return EIO;
}

/* ARGSUSED */
int
cryptof_write(file_t *fp, off_t *poff,
    struct uio *uio, kauth_cred_t cred, int flags)
{
        return EIO;
}

/* ARGSUSED */
int
cryptof_ioctl(struct file *fp, u_long cmd, void *data)
{
        struct fcrypt *fcr = fp->f_fcrypt;
        struct csession *cse;
        struct session_op *sop;
        struct session_n_op *snop;
        struct crypt_op *cop;
        struct crypt_mop *mop;
        struct crypt_mkop *mkop;
        struct crypt_n_op *cnop;
        struct crypt_n_kop *knop;
        struct crypt_sgop *sgop;
        struct crypt_sfop *sfop;
        struct cryptret *crypt_ret;
        struct crypt_result *crypt_res;
        u_int32_t ses;
        u_int32_t *sesid;
        int error = 0;
        size_t count;

        /* backwards compatibility */
        file_t *criofp;
        struct fcrypt *criofcr;
        int criofd;

        mutex_enter(&cryptodev_mtx);
        getnanotime(&fcr->atime);
        mutex_exit(&cryptodev_mtx);

        switch (cmd) {
        case CRIOGET:   /* XXX deprecated, remove after 5.0 */
                if ((error = fd_allocfile(&criofp, &criofd)) != 0)
                        return error;
                criofcr = pool_get(&fcrpl, PR_WAITOK);
                mutex_enter(&cryptodev_mtx);
                TAILQ_INIT(&criofcr->csessions);
                TAILQ_INIT(&criofcr->crp_ret_mq);
                TAILQ_INIT(&criofcr->crp_ret_mkq);
                selinit(&criofcr->sinfo);

                /*
                 * Don't ever return session 0, to allow detection of
                 * failed creation attempts with multi-create ioctl.
                 */
                criofcr->sesn = 1;
                criofcr->requestid = 1;
                crypto_refcount++;
                mutex_exit(&cryptodev_mtx);
                (void)fd_clone(criofp, criofd, (FREAD|FWRITE),
                              &cryptofops, criofcr);
                *(u_int32_t *)data = criofd;
                return error;
                break;
        case CIOCGSESSION:
                sop = (struct session_op *)data;
                error = cryptodev_session(fcr, sop);
                break;
        case CIOCNGSESSION:
                sgop = (struct crypt_sgop *)data;
                if (sgop->count <= 0
                    || SIZE_MAX / sizeof(struct session_n_op) <= sgop->count) {
                        error = EINVAL;
                        break;
                }
                snop = kmem_alloc((sgop->count *
                                  sizeof(struct session_n_op)), KM_SLEEP);
                error = copyin(sgop->sessions, snop, sgop->count *
                               sizeof(struct session_n_op));
                if (error) {
                        goto mbail;
                }

                mutex_enter(&cryptodev_mtx);
                fcr->mtime = fcr->atime;
                mutex_exit(&cryptodev_mtx);
                error = cryptodev_msession(fcr, snop, sgop->count);
                if (error) {
                        goto mbail;
                }

                error = copyout(snop, sgop->sessions, sgop->count *
                    sizeof(struct session_n_op));
mbail:
                kmem_free(snop, sgop->count * sizeof(struct session_n_op));
                break;
        case CIOCFSESSION:
                mutex_enter(&cryptodev_mtx);
                fcr->mtime = fcr->atime;
                ses = *(u_int32_t *)data;
                cse = csefind(fcr, ses);
                if (cse == NULL) {
                        mutex_exit(&cryptodev_mtx);
                        return EINVAL;
                }
                csedelete(fcr, cse);
                mutex_exit(&cryptodev_mtx);
                csefree(cse);
                break;
        case CIOCNFSESSION:
                mutex_enter(&cryptodev_mtx);
                fcr->mtime = fcr->atime;
                mutex_exit(&cryptodev_mtx);
                sfop = (struct crypt_sfop *)data;
                if (sfop->count <= 0
                    || SIZE_MAX / sizeof(u_int32_t) <= sfop->count) {
                        error = EINVAL;
                        break;
                }
                sesid = kmem_alloc((sfop->count * sizeof(u_int32_t)), 
                    KM_SLEEP);
                error = copyin(sfop->sesid, sesid,
                    (sfop->count * sizeof(u_int32_t)));
                if (!error) {
                        cryptodev_msessionfin(fcr, sfop->count, sesid);
                }
                kmem_free(sesid, (sfop->count * sizeof(u_int32_t)));
                break;
        case CIOCCRYPT:
                mutex_enter(&cryptodev_mtx);
                fcr->mtime = fcr->atime;
                cop = (struct crypt_op *)data;
                cse = csefind(fcr, cop->ses);
                mutex_exit(&cryptodev_mtx);
                if (cse == NULL) {
                        DPRINTF("csefind failed\n");
                        return EINVAL;
                }
                error = cryptodev_op(cse, cop, curlwp);
                DPRINTF("cryptodev_op error = %d\n", error);
                break;
        case CIOCNCRYPTM:
                mutex_enter(&cryptodev_mtx);
                fcr->mtime = fcr->atime;
                mutex_exit(&cryptodev_mtx);
                mop = (struct crypt_mop *)data;
                if (mop->count <= 0
                    || SIZE_MAX / sizeof(struct crypt_n_op) <= mop->count) {
                        error = EINVAL;
                        break;
                }
                cnop = kmem_alloc((mop->count * sizeof(struct crypt_n_op)),
                    KM_SLEEP);
                error = copyin(mop->reqs, cnop,
                    (mop->count * sizeof(struct crypt_n_op)));
                if(!error) {
                        error = cryptodev_mop(fcr, cnop, mop->count, curlwp);
                        if (!error) {
                                error = copyout(cnop, mop->reqs, 
                                    (mop->count * sizeof(struct crypt_n_op)));
                        }
                }
                kmem_free(cnop, (mop->count * sizeof(struct crypt_n_op)));
                break;
        case CIOCKEY:
                error = cryptodev_key((struct crypt_kop *)data);
                DPRINTF("cryptodev_key error = %d\n", error);
                break;
        case CIOCNFKEYM:
                mutex_enter(&cryptodev_mtx);
                fcr->mtime = fcr->atime;
                mutex_exit(&cryptodev_mtx);
                mkop = (struct crypt_mkop *)data;
                if (mkop->count <= 0
                    || SIZE_MAX / sizeof(struct crypt_n_kop) <= mkop->count) {
                        error = EINVAL;
                        break;
                }
                knop = kmem_alloc((mkop->count * sizeof(struct crypt_n_kop)),
                    KM_SLEEP);
                error = copyin(mkop->reqs, knop,
                    (mkop->count * sizeof(struct crypt_n_kop)));
                if (!error) {
                        error = cryptodev_mkey(fcr, knop, mkop->count);
                        if (!error)
                                error = copyout(knop, mkop->reqs,
                                    (mkop->count * sizeof(struct crypt_n_kop)));
                }
                kmem_free(knop, (mkop->count * sizeof(struct crypt_n_kop)));
                break;
        case CIOCASYMFEAT:
                error = crypto_getfeat((int *)data);
                break;
        case CIOCNCRYPTRETM:
                mutex_enter(&cryptodev_mtx);
                fcr->mtime = fcr->atime;
                mutex_exit(&cryptodev_mtx);
                crypt_ret = (struct cryptret *)data;
                count = crypt_ret->count;
                if (count <= 0
                    || SIZE_MAX / sizeof(struct crypt_result) <= count) {
                        error = EINVAL;
                        break;
                }
                crypt_res = kmem_alloc((count * sizeof(struct crypt_result)),  
                    KM_SLEEP);
                error = copyin(crypt_ret->results, crypt_res,
                    (count * sizeof(struct crypt_result)));
                if (error)
                        goto reterr;
                crypt_ret->count = cryptodev_getmstatus(fcr, crypt_res,
                    crypt_ret->count);
                /* sanity check count */
                if (crypt_ret->count > count) {
                        printf("%s.%d: error returned count %zd > original "
                            " count %zd\n",
                            __FILE__, __LINE__, crypt_ret->count, count);
                        crypt_ret->count = count;

                }
                error = copyout(crypt_res, crypt_ret->results,
                    (crypt_ret->count * sizeof(struct crypt_result)));
reterr:
                kmem_free(crypt_res, (count * sizeof(struct crypt_result)));
                break;
        case CIOCNCRYPTRET:
                error = cryptodev_getstatus(fcr, (struct crypt_result *)data);
                break;
        default:
                /* Check for backward compatible commands */

                MODULE_HOOK_CALL(ocryptof_50_hook, (fp, cmd, data),
                    enosys(), error);
                if (error == ENOSYS)
                        error = EINVAL;
                return error;
        }
        return error;
}

int
cryptodev_op(struct csession *cse, struct crypt_op *cop, struct lwp *l)
{
        struct cryptop *crp = NULL;
        struct cryptodesc *crde = NULL, *crda = NULL, *crdc = NULL;
        int error;
        int iov_len = cop->len;
        int flags=0;
        int dst_len;        /* copyout size */

        if (cop->len > 256*1024-4)
                return E2BIG;

        if (cse->txform) {
                if (cop->len < cse->txform->blocksize
                    + (cop->iv ? 0 : cse->txform->ivsize) ||
                    (cop->len - (cop->iv ? 0 : cse->txform->ivsize))
                    % cse->txform->blocksize != 0)
                        return EINVAL;
        }

        if (cse->tcomp == NULL && cse->txform == NULL && cse->thash == NULL)
                return EINVAL;

        DPRINTF("cryptodev_op[%u]: iov_len %d\n",
                CRYPTO_SESID2LID(cse->sid), iov_len);
        if ((cse->tcomp) && cop->dst_len) {
                if (iov_len < cop->dst_len) {
                        /* Need larger iov to deal with decompress */
                        iov_len = cop->dst_len;
                }
                DPRINTF("cryptodev_op: iov_len -> %d for decompress\n", iov_len);
        }

        (void)memset(&cse->uio, 0, sizeof(cse->uio));
        cse->uio.uio_iovcnt = 1;
        cse->uio.uio_resid = 0;
        cse->uio.uio_rw = UIO_WRITE;
        cse->uio.uio_iov = cse->iovec;
        UIO_SETUP_SYSSPACE(&cse->uio);
        memset(&cse->iovec, 0, sizeof(cse->iovec));

        /* the iov needs to be big enough to handle the uncompressed
         * data.... */
        cse->uio.uio_iov[0].iov_len = iov_len;
        if (iov_len > 0)
                cse->uio.uio_iov[0].iov_base = kmem_alloc(iov_len, KM_SLEEP);
        cse->uio.uio_resid = cse->uio.uio_iov[0].iov_len;
        DPRINTF("lid[%u]: uio.iov_base %p malloced %d bytes\n",
                CRYPTO_SESID2LID(cse->sid),
                cse->uio.uio_iov[0].iov_base, iov_len);

        crp = crypto_getreq((cse->tcomp != NULL) + (cse->txform != NULL) + (cse->thash != NULL));
        if (crp == NULL) {
                error = ENOMEM;
                goto bail;
        }
        DPRINTF("lid[%u]: crp %p\n", CRYPTO_SESID2LID(cse->sid), crp);

        /* crds are always ordered tcomp, thash, then txform */
        /* with optional missing links */

        /* XXX: If we're going to compress then hash or encrypt, we need
         * to be able to pass on the new size of the data.
         */

        if (cse->tcomp) {
                crdc = crp->crp_desc;
        }

        if (cse->thash) {
                crda = crdc ? crdc->crd_next : crp->crp_desc;
                if (cse->txform && crda)
                        crde = crda->crd_next;
        } else {
                if (cse->txform) {
                        crde = crdc ? crdc->crd_next : crp->crp_desc;
                } else if (!cse->tcomp) {
                        error = EINVAL;
                        goto bail;
                }
        }

        DPRINTF("ocf[%u]: iov_len %zu, cop->len %u\n",
                        CRYPTO_SESID2LID(cse->sid),
                        cse->uio.uio_iov[0].iov_len, 
                        cop->len);

        if ((error = copyin(cop->src, cse->uio.uio_iov[0].iov_base, cop->len)))
        {
                printf("copyin failed %s %d \n", (char *)cop->src, error);
                goto bail;
        }

        if (crdc) {
                switch (cop->op) {
                case COP_COMP:
                        crdc->crd_flags |= CRD_F_COMP;
                        break;
                case COP_DECOMP:
                        crdc->crd_flags &= ~CRD_F_COMP;
                        break;
                default:
                        break;
                }
                /* more data to follow? */
                if (cop->flags & COP_F_MORE) {
                        flags |= CRYPTO_F_MORE;
                }
                crdc->crd_len = cop->len;
                crdc->crd_inject = 0;

                crdc->crd_alg = cse->comp_alg;
                crdc->crd_key = NULL;
                crdc->crd_klen = 0;
                DPRINTF("lid[%u]: crdc setup for comp_alg %d.\n",
                        CRYPTO_SESID2LID(cse->sid), crdc->crd_alg);
        }

        if (crda) {
                crda->crd_skip = 0;
                crda->crd_len = cop->len;
                crda->crd_inject = 0;        /* ??? */

                crda->crd_alg = cse->mac;
                crda->crd_key = cse->mackey;
                crda->crd_klen = cse->mackeylen * 8;
                DPRINTF("crda setup for mac %d.\n", crda->crd_alg);
        }

        if (crde) {
                switch (cop->op) {
                case COP_ENCRYPT:
                        crde->crd_flags |= CRD_F_ENCRYPT;
                        break;
                case COP_DECRYPT:
                        crde->crd_flags &= ~CRD_F_ENCRYPT;
                        break;
                default:
                        break;
                }
                crde->crd_len = cop->len;
                crde->crd_inject = 0;

                if (cse->cipher == CRYPTO_AES_GCM_16 && crda)
                        crda->crd_len = 0;
                else if (cse->cipher == CRYPTO_AES_GMAC)
                        crde->crd_len = 0;

                crde->crd_alg = cse->cipher;
                crde->crd_key = cse->key;
                crde->crd_klen = cse->keylen * 8;
                DPRINTF("crde setup for cipher %d.\n", crde->crd_alg);
        }


        crp->crp_ilen = cop->len;
        crp->crp_flags = CRYPTO_F_IOV | (cop->flags & COP_F_BATCH) | flags;
        crp->crp_buf = (void *)&cse->uio;
        crp->crp_callback = cryptodev_cb;
        crp->crp_sid = cse->sid;
        crp->crp_opaque = cse;

        if (cop->iv) {
                if (crde == NULL) {
                        error = EINVAL;
                        goto bail;
                }
                if (cse->txform->ivsize == 0) {
                        error = EINVAL;
                        goto bail;
                }
                if ((error = copyin(cop->iv, cse->tmp_iv,
                    cse->txform->ivsize)))
                        goto bail;
                (void)memcpy(crde->crd_iv, cse->tmp_iv, cse->txform->ivsize);
                crde->crd_flags |= CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT;
                crde->crd_skip = 0;
        } else if (crde) {
                if (cse->txform->ivsize == 0) {
                        crde->crd_skip = 0;
                } else {
                        if (!(crde->crd_flags & CRD_F_ENCRYPT))
                                crde->crd_flags |= CRD_F_IV_PRESENT;
                        crde->crd_skip = cse->txform->ivsize;
                        crde->crd_len -= cse->txform->ivsize;
                }
        }

        if (cop->mac) {
                if (crda == NULL) {
                        error = EINVAL;
                        goto bail;
                }
                crp->crp_mac = cse->tmp_mac;
        }

        cv_init(&crp->crp_cv, "crydev");
        crypto_dispatch(crp);
        mutex_enter(&cryptodev_mtx);
        while (!(crp->crp_devflags & CRYPTODEV_F_RET)) {
                DPRINTF("cse->sid[%d]: sleeping on cv %p for crp %p\n",
                        (uint32_t)cse->sid, &crp->crp_cv, crp);
                cv_wait(&crp->crp_cv, &cryptodev_mtx);        /* XXX cv_wait_sig? */
        }
        mutex_exit(&cryptodev_mtx);
        cv_destroy(&crp->crp_cv);

        if (crp->crp_etype != 0) {
                DPRINTF("crp_etype %d\n", crp->crp_etype);
                error = crp->crp_etype;
                goto bail;
        }

        if (cse->error) {
                DPRINTF("cse->error %d\n", cse->error);
                error = cse->error;
                goto bail;
        }

        dst_len = crp->crp_ilen;
        /* let the user know how much data was returned */
        if (crp->crp_olen) {
                if (crp->crp_olen > (cop->dst_len ? cop->dst_len : cop->len)) {
                        error = ENOSPC;
                        goto bail;
                }
                dst_len = cop->dst_len = crp->crp_olen;
        }

        if (cop->dst) {
                DPRINTF("copyout %d bytes to %p\n", dst_len, cop->dst);
        }
        if (cop->dst &&
            (error = copyout(cse->uio.uio_iov[0].iov_base, cop->dst, dst_len)))
        {
                DPRINTF("copyout error %d\n", error);
                goto bail;
        }

        if (cop->mac &&
            (error = copyout(crp->crp_mac, cop->mac, cse->thash->authsize))) {
                DPRINTF("mac copyout error %d\n", error);
                goto bail;
        }


bail:
        if (crp) {
                crypto_freereq(crp);
        }
        if (cse->uio.uio_iov[0].iov_base) {
                kmem_free(cse->uio.uio_iov[0].iov_base,iov_len);
        }

        return error;
}

static void
cryptodev_cb(struct cryptop *crp)
{
        struct csession *cse = crp->crp_opaque;

        mutex_enter(&cryptodev_mtx);
        cse->error = crp->crp_etype;
        crp->crp_devflags |= CRYPTODEV_F_RET;
        cv_signal(&crp->crp_cv);
        mutex_exit(&cryptodev_mtx);
}

static void
cryptodev_mcb(struct cryptop *crp)
{
        struct csession *cse = crp->crp_opaque;

        mutex_enter(&cryptodev_mtx);
        cse->error = crp->crp_etype;
        TAILQ_INSERT_TAIL(&crp->fcrp->crp_ret_mq, crp, crp_next);
        selnotify(&crp->fcrp->sinfo, 0, 0);
        mutex_exit(&cryptodev_mtx);
}

static void
cryptodevkey_cb(struct cryptkop *krp)
{

        mutex_enter(&cryptodev_mtx);
        krp->krp_devflags |= CRYPTODEV_F_RET;
        cv_signal(&krp->krp_cv);
        mutex_exit(&cryptodev_mtx);
}

static void
cryptodevkey_mcb(struct cryptkop *krp)
{

        mutex_enter(&cryptodev_mtx);
        cv_signal(&krp->krp_cv);
        TAILQ_INSERT_TAIL(&krp->fcrp->crp_ret_mkq, krp, krp_next);
        selnotify(&krp->fcrp->sinfo, 0, 0);
        mutex_exit(&cryptodev_mtx);
}

static int
cryptodev_key(struct crypt_kop *kop)
{
        struct cryptkop *krp = NULL;
        int error = EINVAL;
        int in, out, size, i;

        if (kop->crk_iparams + kop->crk_oparams > CRK_MAXPARAM)
                return EFBIG;

        in = kop->crk_iparams;
        out = kop->crk_oparams;
        switch (kop->crk_op) {
        case CRK_MOD_EXP:
                if (in == 3 && out == 1)
                        break;
                return EINVAL;
        case CRK_MOD_EXP_CRT:
                if (in == 6 && out == 1)
                        break;
                return EINVAL;
        case CRK_DSA_SIGN:
                if (in == 5 && out == 2)
                        break;
                return EINVAL;
        case CRK_DSA_VERIFY:
                if (in == 7 && out == 0)
                        break;
                return EINVAL;
        case CRK_DH_COMPUTE_KEY:
                if (in == 3 && out == 1)
                        break;
                return EINVAL;
        case CRK_MOD_ADD:
                if (in == 3 && out == 1)
                        break;
                return EINVAL;
        case CRK_MOD_ADDINV:
                if (in == 2 && out == 1)
                        break;
                return EINVAL;
        case CRK_MOD_SUB:
                if (in == 3 && out == 1)
                        break;
                return EINVAL;
        case CRK_MOD_MULT:
                if (in == 3 && out == 1)
                        break;
                return EINVAL;
        case CRK_MOD_MULTINV:
                if (in == 2 && out == 1)
                        break;
                return EINVAL;
        case CRK_MOD:
                if (in == 2 && out == 1)
                        break;
                return EINVAL;
        default:
                return EINVAL;
        }

        krp = crypto_kgetreq(1, PR_WAITOK);
        if (krp == NULL) {
                /* limited by opencrypto.crypto_ret_kq.maxlen */
                return ENOMEM;
        }
        (void)memset(krp, 0, sizeof *krp);
        cv_init(&krp->krp_cv, "crykdev");
        krp->krp_op = kop->crk_op;
        krp->krp_status = kop->crk_status;
        krp->krp_iparams = kop->crk_iparams;
        krp->krp_oparams = kop->crk_oparams;
        krp->krp_status = 0;
        krp->krp_callback = cryptodevkey_cb;

        for (i = 0; i < CRK_MAXPARAM; i++)
                krp->krp_param[i].crp_nbits = kop->crk_param[i].crp_nbits;
        for (i = 0; i < krp->krp_iparams + krp->krp_oparams; i++) {
                size = (krp->krp_param[i].crp_nbits + 7) / 8;
                if (size == 0)
                        continue;
                krp->krp_param[i].crp_p = kmem_alloc(size, KM_SLEEP);
                if (i >= krp->krp_iparams)
                        continue;
                error = copyin(kop->crk_param[i].crp_p,
                    krp->krp_param[i].crp_p, size);
                if (error)
                        goto fail;
        }

        crypto_kdispatch(krp);

        mutex_enter(&cryptodev_mtx);
        while (!(krp->krp_devflags & CRYPTODEV_F_RET)) {
                cv_wait(&krp->krp_cv, &cryptodev_mtx);        /* XXX cv_wait_sig? */
        }
        mutex_exit(&cryptodev_mtx);

        if (krp->krp_status != 0) {
                DPRINTF("krp->krp_status 0x%08x\n", krp->krp_status);
                error = krp->krp_status;
                goto fail;
        }

        for (i = krp->krp_iparams; i < krp->krp_iparams + krp->krp_oparams;
            i++) {
                size = (krp->krp_param[i].crp_nbits + 7) / 8;
                if (size == 0)
                        continue;
                error = copyout(krp->krp_param[i].crp_p,
                    kop->crk_param[i].crp_p, size);
                if (error) {
                        DPRINTF("copyout oparam %d failed, "
                            "error=%d\n", i-krp->krp_iparams, error);
                        goto fail;
                }
        }

fail:
        kop->crk_status = krp->krp_status;
        for (i = 0; i < CRK_MAXPARAM; i++) {
                struct crparam *kp = &(krp->krp_param[i]);
                if (krp->krp_param[i].crp_p) {
                        size = (kp->crp_nbits + 7)  / 8;
                        KASSERT(size > 0);
                        (void)memset(kp->crp_p, 0, size);
                        kmem_free(kp->crp_p, size);
                }
        }
        cv_destroy(&krp->krp_cv);
        crypto_kfreereq(krp);
        DPRINTF("error=0x%08x\n", error);
        return error;
}

/* ARGSUSED */
static int
cryptof_close(struct file *fp)
{
        struct fcrypt *fcr = fp->f_fcrypt;
        struct csession *cse;

        mutex_enter(&cryptodev_mtx);
        while ((cse = TAILQ_FIRST(&fcr->csessions))) {
                TAILQ_REMOVE(&fcr->csessions, cse, next);
                mutex_exit(&cryptodev_mtx);
                csefree(cse);
                mutex_enter(&cryptodev_mtx);
        }
        seldestroy(&fcr->sinfo);
        fp->f_fcrypt = NULL;
        crypto_refcount--;
        mutex_exit(&cryptodev_mtx);

        pool_put(&fcrpl, fcr);
        return 0;
}

/* needed for compatibility module */
struct        csession *cryptodev_csefind(struct fcrypt *fcr, u_int ses)
{
        return csefind(fcr, ses);
}

/* csefind: call with cryptodev_mtx held. */
static struct csession *
csefind(struct fcrypt *fcr, u_int ses)
{
        struct csession *cse, *cnext, *ret = NULL;

        KASSERT(mutex_owned(&cryptodev_mtx));
        TAILQ_FOREACH_SAFE(cse, &fcr->csessions, next, cnext)
                if (cse->ses == ses)
                        ret = cse;

        return ret;
}

/* csedelete: call with cryptodev_mtx held. */
static int
csedelete(struct fcrypt *fcr, struct csession *cse_del)
{
        struct csession *cse, *cnext;
        int ret = 0;

        KASSERT(mutex_owned(&cryptodev_mtx));
        TAILQ_FOREACH_SAFE(cse, &fcr->csessions, next, cnext) {
                if (cse == cse_del) {
                        TAILQ_REMOVE(&fcr->csessions, cse, next);
                        ret = 1;
                }
        }
        return ret;
}

static struct csession *
cseadd(struct fcrypt *fcr, struct csession *cse)
{
        mutex_enter(&cryptodev_mtx);
        /* don't let session ID wrap! */
        if (fcr->sesn + 1 == 0) return NULL;
        TAILQ_INSERT_TAIL(&fcr->csessions, cse, next);
        cse->ses = fcr->sesn++;
        mutex_exit(&cryptodev_mtx);
        return cse;
}

static struct csession *
csecreate(struct fcrypt *fcr, u_int64_t sid, void *key, u_int64_t keylen,
    void *mackey, u_int64_t mackeylen, u_int32_t cipher, u_int32_t mac,
    u_int32_t comp_alg, const struct enc_xform *txform,
    const struct auth_hash *thash, const struct comp_algo *tcomp)
{
        struct csession *cse;

        cse = pool_get(&csepl, PR_NOWAIT);
        if (cse == NULL)
                return NULL;
        cse->key = key;
        cse->keylen = keylen/8;
        cse->mackey = mackey;
        cse->mackeylen = mackeylen/8;
        cse->sid = sid;
        cse->cipher = cipher;
        cse->mac = mac;
        cse->comp_alg = comp_alg;
        cse->txform = txform;
        cse->thash = thash;
        cse->tcomp = tcomp;
        cse->error = 0;
        if (cseadd(fcr, cse))
                return cse;
        else {
                pool_put(&csepl, cse);
                return NULL;
        }
}

static void
csefree(struct csession *cse)
{

        crypto_freesession(cse->sid);
        if (cse->key)
                free(cse->key, M_XDATA);
        if (cse->mackey)
                free(cse->mackey, M_XDATA);
        pool_put(&csepl, cse);
}

static int
cryptoopen(dev_t dev, int flag, int mode,
    struct lwp *l)
{
        file_t *fp;
        struct fcrypt *fcr;
        int fd, error;

        if (crypto_usercrypto == 0)
                return ENXIO;

        if ((error = fd_allocfile(&fp, &fd)) != 0)
                return error;

        fcr = pool_get(&fcrpl, PR_WAITOK);
        getnanotime(&fcr->btime);
        fcr->atime = fcr->mtime = fcr->btime;
        mutex_enter(&cryptodev_mtx);
        TAILQ_INIT(&fcr->csessions);
        TAILQ_INIT(&fcr->crp_ret_mq);
        TAILQ_INIT(&fcr->crp_ret_mkq);
        selinit(&fcr->sinfo);
        /*
         * Don't ever return session 0, to allow detection of
         * failed creation attempts with multi-create ioctl.
         */
        fcr->sesn = 1;
        fcr->requestid = 1;
        crypto_refcount++;
        mutex_exit(&cryptodev_mtx);
        return fd_clone(fp, fd, flag, &cryptofops, fcr);
}

static int
cryptoread(dev_t dev, struct uio *uio, int ioflag)
{
        return EIO;
}

static int
cryptowrite(dev_t dev, struct uio *uio, int ioflag)
{
        return EIO;
}

int
cryptoselect(dev_t dev, int rw, struct lwp *l)
{
        return 0;
}

/*static*/
struct cdevsw crypto_cdevsw = {
        .d_open = cryptoopen,
        .d_close = noclose,
        .d_read = cryptoread,
        .d_write = cryptowrite,
        .d_ioctl = noioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = cryptoselect /*nopoll*/,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

int 
cryptodev_mop(struct fcrypt *fcr, 
              struct crypt_n_op * cnop,
              int count, struct lwp *l)
{
        struct cryptop *crp = NULL;
        struct cryptodesc *crde = NULL, *crda = NULL, *crdc = NULL;
        int req, error=0;
        struct csession *cse;
        int flags=0;
        int iov_len;

        for (req = 0; req < count; req++) {
                mutex_enter(&cryptodev_mtx);
                cse = csefind(fcr, cnop[req].ses);
                if (cse == NULL) {
                        DPRINTF("csefind failed\n");
                        cnop[req].status = EINVAL;
                        mutex_exit(&cryptodev_mtx);
                        continue;
                }
                mutex_exit(&cryptodev_mtx);
        
                if (cnop[req].len > 256*1024-4) {
                        DPRINTF("length failed\n");
                        cnop[req].status = EINVAL;
                        continue;
                }
                if (cse->txform) {
                        if (cnop[req].len < cse->txform->blocksize -
                            (cnop[req].iv ? 0 : cse->txform->ivsize) ||
                            (cnop[req].len -
                             (cnop[req].iv ? 0 : cse->txform->ivsize))
                            % cse->txform->blocksize) {
                                cnop[req].status = EINVAL;
                                continue;
                        }
                }

                if (cse->txform == NULL &&
                    cse->thash == NULL &&
                    cse->tcomp == NULL) {
                        cnop[req].status = EINVAL;
                        goto bail;
                }

                /* sanitize */
                if (cnop[req].len <= 0) {
                        cnop[req].status = ENOMEM;
                        goto bail;
                }

                crp = crypto_getreq((cse->txform != NULL) +
                                    (cse->thash != NULL) +
                                    (cse->tcomp != NULL));
                if (crp == NULL) {
                        cnop[req].status = ENOMEM;
                        goto bail;
                }

                iov_len = cnop[req].len;
                /* got a compression/decompression max size? */
                if ((cse->tcomp) && cnop[req].dst_len) {
                        if (iov_len < cnop[req].dst_len) {
                                /* Need larger iov to deal with decompress */
                                iov_len = cnop[req].dst_len;
                        }
                        DPRINTF("iov_len -> %d for decompress\n", iov_len);
                }

                (void)memset(&crp->uio, 0, sizeof(crp->uio));
                crp->uio.uio_iovcnt = 1;
                crp->uio.uio_resid = 0;
                crp->uio.uio_rw = UIO_WRITE;
                crp->uio.uio_iov = crp->iovec;
                UIO_SETUP_SYSSPACE(&crp->uio);
                memset(&crp->iovec, 0, sizeof(crp->iovec));
                crp->uio.uio_iov[0].iov_len = iov_len;
                DPRINTF("kmem_alloc(%d) for iov \n", iov_len);
                crp->uio.uio_iov[0].iov_base = kmem_alloc(iov_len, KM_SLEEP);
                crp->uio.uio_resid = crp->uio.uio_iov[0].iov_len;

                if (cse->tcomp) {
                        crdc = crp->crp_desc;
                }

                if (cse->thash) {
                        crda = crdc ? crdc->crd_next : crp->crp_desc;
                        if (cse->txform && crda)
                                crde = crda->crd_next;
                } else {
                        if (cse->txform) {
                                crde = crdc ? crdc->crd_next : crp->crp_desc;
                        } else if (!cse->tcomp) {
                                error = EINVAL;
                                goto bail;
                        }
                }

                if ((copyin(cnop[req].src, 
                    crp->uio.uio_iov[0].iov_base, cnop[req].len))) {
                        cnop[req].status = EINVAL;
                        goto bail;
                }

                if (crdc) {
                        switch (cnop[req].op) {
                        case COP_COMP:
                                crdc->crd_flags |= CRD_F_COMP;
                                break;
                        case COP_DECOMP:
                                crdc->crd_flags &= ~CRD_F_COMP;
                                break;
                        default:
                                break;
                        }
                        /* more data to follow? */
                        if (cnop[req].flags & COP_F_MORE) {
                                flags |= CRYPTO_F_MORE;
                        }
                        crdc->crd_len = cnop[req].len;
                        crdc->crd_inject = 0;

                        crdc->crd_alg = cse->comp_alg;
                        crdc->crd_key = NULL;
                        crdc->crd_klen = 0;
                        DPRINTF("cse->sid[%d]: crdc setup for comp_alg %d"
                                 " len %d.\n",
                                (uint32_t)cse->sid, crdc->crd_alg,
                                crdc->crd_len);
                }
        
                if (crda) {
                        crda->crd_skip = 0;
                        crda->crd_len = cnop[req].len;
                        crda->crd_inject = 0;        /* ??? */

                        crda->crd_alg = cse->mac;
                        crda->crd_key = cse->mackey;
                        crda->crd_klen = cse->mackeylen * 8;
                }

                if (crde) {
                        if (cnop[req].op == COP_ENCRYPT)
                                crde->crd_flags |= CRD_F_ENCRYPT;
                        else
                                crde->crd_flags &= ~CRD_F_ENCRYPT;
                        crde->crd_len = cnop[req].len;
                        crde->crd_inject = 0;

                        crde->crd_alg = cse->cipher;
#ifdef notyet                /* XXX must notify h/w driver new key, drain */
                        if(cnop[req].key && cnop[req].keylen) {
                                crde->crd_key = malloc(cnop[req].keylen,
                                                    M_XDATA, M_WAITOK);
                                if((error = copyin(cnop[req].key, 
                                    crde->crd_key, cnop[req].keylen))) {
                                        cnop[req].status = EINVAL;
                                        goto bail;
                                }
                                crde->crd_klen =  cnop[req].keylen * 8;
                        } else { ... }
#endif
                        crde->crd_key = cse->key;
                        crde->crd_klen = cse->keylen * 8;
                }

                crp->crp_ilen = cnop[req].len;
                crp->crp_flags = CRYPTO_F_IOV |
                    (cnop[req].flags & COP_F_BATCH) | flags;
                crp->crp_buf = (void *)&crp->uio;
                crp->crp_callback = cryptodev_mcb;
                crp->crp_sid = cse->sid;
                crp->crp_opaque = cse;
                crp->fcrp = fcr;
                crp->dst = cnop[req].dst;
                crp->len = cnop[req].len; /* input len, iov may be larger */
                crp->mac = cnop[req].mac;
                DPRINTF("iov_base %p dst %p len %d mac %p\n",
                            crp->uio.uio_iov[0].iov_base, crp->dst, crp->len,
                            crp->mac);

                if (cnop[req].iv) {
                        if (crde == NULL) {
                                cnop[req].status = EINVAL;
                                goto bail;
                        }
                        if (cse->cipher == CRYPTO_ARC4) { /* XXX use flag? */
                                cnop[req].status = EINVAL;
                                goto bail;
                        }
                        if ((error = copyin(cnop[req].iv, crp->tmp_iv,
                            cse->txform->ivsize))) {
                                cnop[req].status = EINVAL;
                                goto bail;
                        }
                        (void)memcpy(crde->crd_iv, crp->tmp_iv,
                            cse->txform->ivsize);
                        crde->crd_flags |= CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT;
                        crde->crd_skip = 0;
                } else if (crde) {
                        if (cse->cipher == CRYPTO_ARC4) { /* XXX use flag? */
                                crde->crd_skip = 0;
                        } else {
                                if (!(crde->crd_flags & CRD_F_ENCRYPT))
                                        crde->crd_flags |= CRD_F_IV_PRESENT;
                                crde->crd_skip = cse->txform->ivsize;
                                crde->crd_len -= cse->txform->ivsize;
                        }
                }
        
                if (cnop[req].mac) {
                        if (crda == NULL) {
                                cnop[req].status = EINVAL;
                                goto bail;
                        }
                        crp->crp_mac=cse->tmp_mac;
                }
                cnop[req].reqid = atomic_inc_32_nv(&(fcr->requestid));
                crp->crp_reqid = cnop[req].reqid;
                crp->crp_usropaque = cnop[req].opaque;
                cv_init(&crp->crp_cv, "crydev");
                crypto_dispatch(crp);
                cnop[req].status = 0;
                cv_destroy(&crp->crp_cv);
bail:
                if (cnop[req].status) {
                        if (crp) {
                                if (crp->uio.uio_iov[0].iov_base) {
                                        kmem_free(crp->uio.uio_iov[0].iov_base,
                                            crp->uio.uio_iov[0].iov_len);
                                }
                                crypto_freereq(crp);
                        }
                        error = 0;
                }
        }
        return error;
}

static int
cryptodev_mkey(struct fcrypt *fcr, struct crypt_n_kop *kop, int count)
{
        struct cryptkop *krp = NULL;
        int error = EINVAL;
        int in, out, size, i, req;

        for (req = 0; req < count; req++) {
                if (kop[req].crk_iparams + kop[req].crk_oparams > CRK_MAXPARAM)
                        return EFBIG;

                in = kop[req].crk_iparams;
                out = kop[req].crk_oparams;
                switch (kop[req].crk_op) {
                case CRK_MOD_EXP:
                        if (in == 3 && out == 1)
                                break;
                        kop[req].crk_status = EINVAL;
                        continue;
                case CRK_MOD_EXP_CRT:
                        if (in == 6 && out == 1)
                                break;
                        kop[req].crk_status = EINVAL;
                        continue;
                case CRK_DSA_SIGN:
                        if (in == 5 && out == 2)
                                break;
                        kop[req].crk_status = EINVAL;
                        continue;
                case CRK_DSA_VERIFY:
                        if (in == 7 && out == 0)
                                break;
                        kop[req].crk_status = EINVAL;
                        continue;
                case CRK_DH_COMPUTE_KEY:
                        if (in == 3 && out == 1)
                                break;
                        kop[req].crk_status = EINVAL;
                        continue;
                case CRK_MOD_ADD:
                        if (in == 3 && out == 1)
                                break;
                        kop[req].crk_status = EINVAL;
                        continue;
                case CRK_MOD_ADDINV:
                        if (in == 2 && out == 1)
                                break;
                        kop[req].crk_status = EINVAL;
                        continue;
                case CRK_MOD_SUB:
                        if (in == 3 && out == 1)
                                break;
                        kop[req].crk_status = EINVAL;
                        continue;
                case CRK_MOD_MULT:
                        if (in == 3 && out == 1)
                                break;
                        kop[req].crk_status = EINVAL;
                        continue;
                case CRK_MOD_MULTINV:
                        if (in == 2 && out == 1)
                                break;
                        kop[req].crk_status = EINVAL;
                        continue;
                case CRK_MOD:
                        if (in == 2 && out == 1)
                                break;
                        kop[req].crk_status = EINVAL;
                        continue;
                default:
                        kop[req].crk_status = EINVAL;
                        continue;
                }

                krp = crypto_kgetreq(1, PR_WAITOK);
                if (krp == NULL) {
                        /* limited by opencrypto.crypto_ret_kq.maxlen */
                        continue;
                }
                (void)memset(krp, 0, sizeof *krp);
                cv_init(&krp->krp_cv, "crykdev");
                krp->krp_op = kop[req].crk_op;
                krp->krp_status = kop[req].crk_status;
                krp->krp_iparams = kop[req].crk_iparams;
                krp->krp_oparams = kop[req].crk_oparams;
                krp->krp_status = 0;
                krp->krp_callback = cryptodevkey_mcb;
                (void)memcpy(krp->crk_param, kop[req].crk_param,
                    sizeof(kop[req].crk_param));

                krp->krp_flags = 0;

                for (i = 0; i < CRK_MAXPARAM; i++)
                        krp->krp_param[i].crp_nbits =
                            kop[req].crk_param[i].crp_nbits;
                for (i = 0; i < krp->krp_iparams + krp->krp_oparams; i++) {
                        size = (krp->krp_param[i].crp_nbits + 7) / 8;
                        if (size == 0)
                                continue;
                        krp->krp_param[i].crp_p =
                            kmem_alloc(size, KM_SLEEP);
                        if (i >= krp->krp_iparams)
                                continue;
                        kop[req].crk_status =
                            copyin(kop[req].crk_param[i].crp_p,
                            krp->krp_param[i].crp_p, size);
                        if (kop[req].crk_status)
                                goto fail;
                }
                krp->fcrp = fcr;

                kop[req].crk_reqid = atomic_inc_32_nv(&(fcr->requestid));
                krp->krp_reqid = kop[req].crk_reqid;
                krp->krp_usropaque = kop[req].crk_opaque;

                crypto_kdispatch(krp);
                kop[req].crk_status = 0;
fail:
                if (kop[req].crk_status) {
                        if (krp) {
                                kop[req].crk_status = krp->krp_status;
                                for (i = 0; i < CRK_MAXPARAM; i++) {
                                        struct crparam *kp =
                                                &(krp->krp_param[i]);
                                        if (kp->crp_p) {
                                                size = (kp->crp_nbits + 7) / 8;
                                                KASSERT(size > 0);
                                                memset(kp->crp_p, 0, size);
                                                kmem_free(kp->crp_p, size);
                                        }
                                }
                                cv_destroy(&krp->krp_cv);
                                crypto_kfreereq(krp);
                        }
                }
                error = 0;
        }
        DPRINTF("error=0x%08x\n", error);
        return error;
}

int
cryptodev_session(struct fcrypt *fcr, struct session_op *sop) 
{
        struct cryptoini cria, crie;
        struct cryptoini cric;                /* compressor */
        struct cryptoini *crihead = NULL;
        const struct enc_xform *txform = NULL;
        const struct auth_hash *thash = NULL;
        const struct comp_algo *tcomp = NULL;
        struct csession *cse;
        u_int64_t sid;
        int error = 0;

        DPRINTF("cipher=%d, mac=%d\n", sop->cipher, sop->mac);

        /* XXX there must be a way to not embed the list of xforms here */
        switch (sop->cipher) {
        case 0:
                break;
        case CRYPTO_DES_CBC:
                txform = &enc_xform_des;
                break;
        case CRYPTO_3DES_CBC:
                txform = &enc_xform_3des;
                break;
        case CRYPTO_BLF_CBC:
                txform = &enc_xform_blf;
                break;
        case CRYPTO_CAST_CBC:
                txform = &enc_xform_cast5;
                break;
        case CRYPTO_SKIPJACK_CBC:
                txform = &enc_xform_skipjack;
                break;
        case CRYPTO_AES_CBC:
                txform = &enc_xform_aes;
                break;
        case CRYPTO_CAMELLIA_CBC:
                txform = &enc_xform_camellia;
                break;
        case CRYPTO_AES_CTR:
                txform = &enc_xform_aes_ctr;
                break;
        case CRYPTO_AES_GCM_16:
                txform = &enc_xform_aes_gcm;
                break;
        case CRYPTO_AES_GMAC:
                txform = &enc_xform_aes_gmac;
                break;
        case CRYPTO_NULL_CBC:
                txform = &enc_xform_null;
                break;
        case CRYPTO_ARC4:
                txform = &enc_xform_arc4;
                break;
        default:
                DPRINTF("Invalid cipher %d\n", sop->cipher);
                return EINVAL;
        }

        switch (sop->comp_alg) {
        case 0:
                break;
        case CRYPTO_DEFLATE_COMP:
                tcomp = &comp_algo_deflate;
                break;
        case CRYPTO_GZIP_COMP:
                tcomp = &comp_algo_gzip;
                DPRINTF("tcomp for GZIP\n");
                break;
        default:
                DPRINTF("Invalid compression alg %d\n", sop->comp_alg);
                return EINVAL;
        }

        switch (sop->mac) {
        case 0:
                break;
        case CRYPTO_MD5_HMAC:
                thash = &auth_hash_hmac_md5;
                break;
        case CRYPTO_SHA1_HMAC:
                thash = &auth_hash_hmac_sha1;
                break;
        case CRYPTO_MD5_HMAC_96:
                thash = &auth_hash_hmac_md5_96;
                break;
        case CRYPTO_SHA1_HMAC_96:
                thash = &auth_hash_hmac_sha1_96;
                break;
        case CRYPTO_SHA2_HMAC:
                /* XXX switching on key length seems questionable */
                if (sop->mackeylen == auth_hash_hmac_sha2_256.keysize) {
                        thash = &auth_hash_hmac_sha2_256;
                } else if (sop->mackeylen == auth_hash_hmac_sha2_384.keysize) {
                        thash = &auth_hash_hmac_sha2_384;
                } else if (sop->mackeylen == auth_hash_hmac_sha2_512.keysize) {
                        thash = &auth_hash_hmac_sha2_512;
                } else {
                        DPRINTF("Invalid mackeylen %d\n", sop->mackeylen);
                        return EINVAL;
                }
                break;
        case CRYPTO_SHA2_384_HMAC:
                thash = &auth_hash_hmac_sha2_384;
                break;
        case CRYPTO_SHA2_512_HMAC:
                thash = &auth_hash_hmac_sha2_512;
                break;
        case CRYPTO_RIPEMD160_HMAC:
                thash = &auth_hash_hmac_ripemd_160;
                break;
        case CRYPTO_RIPEMD160_HMAC_96:
                thash = &auth_hash_hmac_ripemd_160_96;
                break;
        case CRYPTO_MD5:
                thash = &auth_hash_md5;
                break;
        case CRYPTO_SHA1:
                thash = &auth_hash_sha1;
                break;
        case CRYPTO_AES_XCBC_MAC_96:
                thash = &auth_hash_aes_xcbc_mac_96;
                break;
        case CRYPTO_AES_128_GMAC:
                thash = &auth_hash_gmac_aes_128;
                break;
        case CRYPTO_AES_192_GMAC:
                thash = &auth_hash_gmac_aes_192;
                break;
        case CRYPTO_AES_256_GMAC:
                thash = &auth_hash_gmac_aes_256;
                break;
        case CRYPTO_NULL_HMAC:
                thash = &auth_hash_null;
                break;
        default:
                DPRINTF("Invalid mac %d\n", sop->mac);
                return EINVAL;
        }

        memset(&crie, 0, sizeof(crie));
        memset(&cria, 0, sizeof(cria));
        memset(&cric, 0, sizeof(cric));

        if (tcomp) {
                cric.cri_alg = tcomp->type;
                cric.cri_klen = 0;
                DPRINTF("tcomp->type = %d\n", tcomp->type);

                crihead = &cric;
                if (txform) {
                        cric.cri_next = &crie;
                } else if (thash) {
                        cric.cri_next = &cria;
                }
        }

        if (txform) {
                crie.cri_alg = txform->type;
                crie.cri_klen = sop->keylen * 8;
                if (sop->keylen > txform->maxkey ||
                    sop->keylen < txform->minkey) {
                        DPRINTF("keylen %d not in [%d,%d]\n",
                            sop->keylen, txform->minkey, txform->maxkey);
                        error = EINVAL;
                        goto bail;
                }

                crie.cri_key = malloc(crie.cri_klen / 8, M_XDATA, M_WAITOK);
                if ((error = copyin(sop->key, crie.cri_key, crie.cri_klen / 8)))
                        goto bail;
                if (!crihead) {
                        crihead = &crie;
                }
                if (thash)
                        crie.cri_next = &cria;
        } 

        if (thash) {
                cria.cri_alg = thash->type;
                cria.cri_klen = sop->mackeylen * 8;
                if (sop->mackeylen != thash->keysize) {
                        DPRINTF("mackeylen %d != keysize %d\n",
                            sop->mackeylen, thash->keysize);
                        error = EINVAL;
                        goto bail;
                }
                if (cria.cri_klen) {
                        cria.cri_key = malloc(cria.cri_klen / 8, M_XDATA,
                            M_WAITOK);
                        if ((error = copyin(sop->mackey, cria.cri_key,
                            cria.cri_klen / 8))) {
                                goto bail;
                        }
                }
                if (!crihead) {
                        crihead = &cria;
                }
        }

        error = crypto_newsession(&sid, crihead, crypto_devallowsoft);
        if (!error) {
                DPRINTF("got session %d\n", (uint32_t)sid);
                cse = csecreate(fcr, sid, crie.cri_key, crie.cri_klen,
                    cria.cri_key, cria.cri_klen, (txform ? sop->cipher : 0), sop->mac,
                    (tcomp ? sop->comp_alg : 0), txform, thash, tcomp);
                if (cse != NULL) {
                        sop->ses = cse->ses;
                } else {
                        DPRINTF("csecreate failed\n");
                        crypto_freesession(sid);
                        error = EINVAL;
                }
        } else {
                DPRINTF("SIOCSESSION violates kernel parameters %d\n", error);
        }
bail:
        if (error) {
                if (crie.cri_key) {
                        memset(crie.cri_key, 0, crie.cri_klen / 8);
                        free(crie.cri_key, M_XDATA);
                }
                if (cria.cri_key) {
                        memset(cria.cri_key, 0, cria.cri_klen / 8);
                        free(cria.cri_key, M_XDATA);
                }
        }
        return error;
}

int
cryptodev_msession(struct fcrypt *fcr, struct session_n_op *sn_ops,
                   int count)
{
        int i;

        for (i = 0; i < count; i++, sn_ops++) {
                struct session_op s_op;
                s_op.cipher =                sn_ops->cipher;
                s_op.mac =                sn_ops->mac;
                s_op.comp_alg =                sn_ops->comp_alg;
                s_op.keylen =                sn_ops->keylen;
                s_op.key =                sn_ops->key;
                s_op.mackeylen =        sn_ops->mackeylen;
                s_op.mackey =                sn_ops->mackey;
                s_op.ses =                ~0;

                sn_ops->status = cryptodev_session(fcr, &s_op);

                sn_ops->ses =                s_op.ses;
        }

        return 0;
}

static void
cryptodev_msessionfin(struct fcrypt *fcr, int count, u_int32_t *sesid)
{
        struct csession *cse;
        int req;

        mutex_enter(&cryptodev_mtx);
        for(req = 0; req < count; req++) {
                cse = csefind(fcr, sesid[req]);
                if (cse == NULL)
                        continue;
                csedelete(fcr, cse);
                mutex_exit(&cryptodev_mtx);
                csefree(cse);
                mutex_enter(&cryptodev_mtx);
        }
        mutex_exit(&cryptodev_mtx);
}

/*
 * collect as many completed requests as are availble, or count completed
 * requests whichever is less.
 * return the number of requests.
 */
static int
cryptodev_getmstatus(struct fcrypt *fcr, struct crypt_result *crypt_res,
    int count)
{
        struct cryptop *crp = NULL;
        struct cryptkop *krp = NULL;
        struct csession *cse;
        int i, size, req = 0;
        int completed=0;

        /* On queue so nobody else can grab them
         * and copyout can be delayed-- no locking */
        TAILQ_HEAD(, cryptop) crp_delfree_q = 
                TAILQ_HEAD_INITIALIZER(crp_delfree_q);
        TAILQ_HEAD(, cryptkop) krp_delfree_q = 
                TAILQ_HEAD_INITIALIZER(krp_delfree_q);

        /* at this point we do not know which response user is requesting for 
         * (symmetric or asymmetric) so we copyout one from each i.e if the 
         * count is 2 then 1 from symmetric and 1 from asymmetric queue and 
         * if 3 then 2 symmetric and 1 asymmetric and so on */

        /* pull off a list of requests while protected from changes */
        mutex_enter(&cryptodev_mtx);
        while (req < count) {
                crp = TAILQ_FIRST(&fcr->crp_ret_mq);
                if (crp) {
                        TAILQ_REMOVE(&fcr->crp_ret_mq, crp, crp_next);
                        TAILQ_INSERT_TAIL(&crp_delfree_q, crp, crp_next);
                        cse = (struct csession *)crp->crp_opaque;

                        /* see if the session is still valid */
                        cse = csefind(fcr, cse->ses);
                        if (cse != NULL) {
                                crypt_res[req].status = 0;
                        } else {
                                DPRINTF("csefind failed\n");
                                crypt_res[req].status = EINVAL;
                        }
                        req++;
                }
                if(req < count) {
                        crypt_res[req].status = 0;
                        krp = TAILQ_FIRST(&fcr->crp_ret_mkq);
                        if (krp) {
                                TAILQ_REMOVE(&fcr->crp_ret_mkq, krp, krp_next);
                                TAILQ_INSERT_TAIL(&krp_delfree_q, krp, krp_next);
                        req++;
                        }
                }
        }
        mutex_exit(&cryptodev_mtx);

        /* now do all the work outside the mutex */
        for(req=0; req < count ;) {
                crp = TAILQ_FIRST(&crp_delfree_q);
                if (crp) {
                        if (crypt_res[req].status != 0) {
                                /* csefind failed during collection */
                                goto bail;
                        }
                        cse = (struct csession *)crp->crp_opaque;
                        crypt_res[req].reqid = crp->crp_reqid;
                        crypt_res[req].opaque = crp->crp_usropaque;
                        completed++;
                        
                        if (crp->crp_etype != 0) {
                                crypt_res[req].status = crp->crp_etype;
                                goto bail;
                        }

                        if (cse->error) {
                                crypt_res[req].status = cse->error;
                                goto bail;
                        }

                        if (crp->dst && (crypt_res[req].status =
                            copyout(crp->uio.uio_iov[0].iov_base, crp->dst,
                            crp->len)))
                                goto bail;

                        if (crp->mac && (crypt_res[req].status =
                            copyout(crp->crp_mac, crp->mac,
                            cse->thash->authsize)))
                                goto bail;

bail:
                        TAILQ_REMOVE(&crp_delfree_q, crp, crp_next);
                        kmem_free(crp->uio.uio_iov[0].iov_base,
                            crp->uio.uio_iov[0].iov_len);
                        crypto_freereq(crp);
                        req++;
                }

                if (req < count) {
                        krp = TAILQ_FIRST(&krp_delfree_q);
                        if (krp) {
                                crypt_res[req].reqid = krp->krp_reqid;
                                crypt_res[req].opaque = krp->krp_usropaque;
                                completed++;
                                if (krp->krp_status != 0) {
                                        DPRINTF("krp->krp_status 0x%08x\n",
                                            krp->krp_status);
                                        crypt_res[req].status = krp->krp_status;
                                        goto fail;
                                }

                                for (i = krp->krp_iparams; i < krp->krp_iparams
                                    + krp->krp_oparams; i++) {
                                        size = (krp->krp_param[i].crp_nbits
                                            + 7) / 8;
                                        if (size == 0)
                                                continue;
                                        crypt_res[req].status = copyout
                                            (krp->krp_param[i].crp_p,
                                            krp->crk_param[i].crp_p, size);
                                        if (crypt_res[req].status) {
                                                DPRINTF("copyout oparam %d failed, "
                                                    "error=%d\n",
                                                    i - krp->krp_iparams, 
                                                    crypt_res[req].status);
                                                goto fail;
                                        }
                                }
fail:
                                TAILQ_REMOVE(&krp_delfree_q, krp, krp_next);
                                /* not sure what to do for this */
                                /* kop[req].crk_status = krp->krp_status; */ 
                                for (i = 0; i < CRK_MAXPARAM; i++) {
                                        struct crparam *kp = &(krp->krp_param[i]);
                                        if (kp->crp_p) {
                                                size = (kp->crp_nbits + 7) / 8;
                                                KASSERT(size > 0);
                                                (void)memset(kp->crp_p, 0, size);
                                                kmem_free(kp->crp_p, size);
                                        }
                                }
                                cv_destroy(&krp->krp_cv);
                                crypto_kfreereq(krp);
                                req++;
                        }
                }
        }

        return completed;
}

static int
cryptodev_getstatus (struct fcrypt *fcr, struct crypt_result *crypt_res)
{
        struct cryptop *crp = NULL, *cnext;
        struct cryptkop *krp = NULL, *knext;
        struct csession *cse;
        int i, size, req = 0;

        mutex_enter(&cryptodev_mtx);
        /* Here we dont know for which request the user is requesting the 
         * response so checking in both the queues */
        TAILQ_FOREACH_SAFE(crp, &fcr->crp_ret_mq, crp_next, cnext) {
                if(crp && (crp->crp_reqid == crypt_res->reqid)) {
                        cse = (struct csession *)crp->crp_opaque;
                        crypt_res->opaque = crp->crp_usropaque;
                        cse = csefind(fcr, cse->ses);
                        if (cse == NULL) {
                                DPRINTF("csefind failed\n");
                                crypt_res->status = EINVAL;
                                goto bail;
                        }

                        if (crp->crp_etype != 0) {
                                crypt_res->status = crp->crp_etype;
                                goto bail;
                        }

                        if (cse->error) {
                                crypt_res->status = cse->error;
                                goto bail;
                        }

                        if (crp->dst && (crypt_res->status =
                            copyout(crp->uio.uio_iov[0].iov_base, 
                            crp->dst, crp->len)))
                                goto bail;
                        
                        if (crp->mac && (crypt_res->status =
                            copyout(crp->crp_mac, crp->mac,
                            cse->thash->authsize)))
                                goto bail;
bail:
                        TAILQ_REMOVE(&fcr->crp_ret_mq, crp, crp_next);

                        mutex_exit(&cryptodev_mtx);
                        crypto_freereq(crp);
                        return 0;
                }
        }

        TAILQ_FOREACH_SAFE(krp, &fcr->crp_ret_mkq, krp_next, knext) {
                if(krp && (krp->krp_reqid == crypt_res->reqid)) {
                        crypt_res[req].opaque = krp->krp_usropaque;
                        if (krp->krp_status != 0) {
                                DPRINTF("krp->krp_status 0x%08x\n", 
                                    krp->krp_status);
                                crypt_res[req].status = krp->krp_status;
                                goto fail;
                        }

                        for (i = krp->krp_iparams; i < krp->krp_iparams +
                            krp->krp_oparams; i++) {
                                size = (krp->krp_param[i].crp_nbits + 7) / 8;
                                if (size == 0)
                                        continue;
                                crypt_res[req].status = copyout(
                                    krp->krp_param[i].crp_p, 
                                    krp->crk_param[i].crp_p, size);
                                if (crypt_res[req].status) {
                                        DPRINTF("copyout oparam "
                                            "%d failed, error=%d\n", 
                                            i - krp->krp_iparams, 
                                            crypt_res[req].status);
                                        goto fail;
                                }
                        }
fail:
                        TAILQ_REMOVE(&fcr->crp_ret_mkq, krp, krp_next);
                        mutex_exit(&cryptodev_mtx);
                        /* not sure what to do for this */
                        /* kop[req].crk_status = krp->krp_status; */ 
                        for (i = 0; i < CRK_MAXPARAM; i++) {
                                struct crparam *kp = &(krp->krp_param[i]);
                                if (kp->crp_p) {
                                        size = (kp->crp_nbits + 7) / 8;
                                        KASSERT(size > 0);
                                        memset(kp->crp_p, 0, size);
                                        kmem_free(kp->crp_p, size);
                                }
                        }
                        cv_destroy(&krp->krp_cv);
                        crypto_kfreereq(krp);
                        return 0;
                }
        }
        mutex_exit(&cryptodev_mtx);
        return EINPROGRESS;
}

static int      
cryptof_stat(struct file *fp, struct stat *st)
{
        struct fcrypt *fcr = fp->f_fcrypt;

        (void)memset(st, 0, sizeof(*st));

        mutex_enter(&cryptodev_mtx);
        st->st_dev = makedev(cdevsw_lookup_major(&crypto_cdevsw), fcr->sesn);
        st->st_atimespec = fcr->atime;
        st->st_mtimespec = fcr->mtime;
        st->st_ctimespec = st->st_birthtimespec = fcr->btime;
        st->st_uid = kauth_cred_geteuid(fp->f_cred);
        st->st_gid = kauth_cred_getegid(fp->f_cred);
        mutex_exit(&cryptodev_mtx);

        return 0;
}

static int      
cryptof_poll(struct file *fp, int events)
{
        struct fcrypt *fcr = fp->f_fcrypt;
        int revents = 0;

        if (!(events & (POLLIN | POLLRDNORM))) {
                /* only support read and POLLIN */
                return 0;
        }

        mutex_enter(&cryptodev_mtx);
        if (TAILQ_EMPTY(&fcr->crp_ret_mq) && TAILQ_EMPTY(&fcr->crp_ret_mkq)) {
                /* no completed requests pending, save the poll for later */
                selrecord(curlwp, &fcr->sinfo);
        } else {
                /* let the app(s) know that there are completed requests */
                revents = events & (POLLIN | POLLRDNORM);
        }
        mutex_exit(&cryptodev_mtx);

        return revents;
}

/*
 * Pseudo-device initialization routine for /dev/crypto
 */
void
cryptoattach(int num)
{

        crypto_init();

        mutex_init(&cryptodev_mtx, MUTEX_DEFAULT, IPL_NONE);

        pool_init(&fcrpl, sizeof(struct fcrypt), 0, 0, 0, "fcrpl",
            NULL, IPL_NONE);
        pool_init(&csepl, sizeof(struct csession), 0, 0, 0, "csepl",
            NULL, IPL_NONE);

        /*
         * Preallocate space for 64 users, with 5 sessions each.
         * (consider that a TLS protocol session requires at least
         * 3DES, MD5, and SHA1 (both hashes are used in the PRF) for
         * the negotiation, plus HMAC_SHA1 for the actual SSL records,
         * consuming one session here for each algorithm.
         */
        pool_prime(&fcrpl, 64);
        pool_prime(&csepl, 64 * 5);
}

void        crypto_attach(device_t, device_t, void *);

void
crypto_attach(device_t parent, device_t self, void * opaque)
{

        cryptoattach(0);
}

int        crypto_detach(device_t, int);

int
crypto_detach(device_t self, int num)
{

        pool_destroy(&fcrpl);
        pool_destroy(&csepl);

        mutex_destroy(&cryptodev_mtx);

        return 0;
}

int crypto_match(device_t, cfdata_t, void *);
 
int
crypto_match(device_t parent, cfdata_t data, void *opaque) 
{   
 
        return 1;
}

MODULE(MODULE_CLASS_DRIVER, crypto, "opencrypto");

CFDRIVER_DECL(crypto, DV_DULL, NULL);

CFATTACH_DECL2_NEW(crypto, 0, crypto_match, crypto_attach, crypto_detach,
    NULL, NULL, NULL);

#ifdef _MODULE
static int cryptoloc[] = { -1, -1 };

static struct cfdata crypto_cfdata[] = {
        {
                .cf_name = "crypto",
                .cf_atname = "crypto",
                .cf_unit = 0,
                .cf_fstate = 0,
                .cf_loc = cryptoloc,
                .cf_flags = 0,
                .cf_pspec = NULL,
        },
        { NULL, NULL, 0, 0, NULL, 0, NULL }
};
#endif

static int
crypto_modcmd(modcmd_t cmd, void *arg)
{
        int error = 0;
#ifdef _MODULE
        devmajor_t cmajor = NODEVMAJOR, bmajor = NODEVMAJOR;
#endif

        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE

                error = devsw_attach(crypto_cd.cd_name, NULL, &bmajor,
                    &crypto_cdevsw, &cmajor);
                if (error) {
                        aprint_error("%s: unable to register devsw, error %d\n",
                                crypto_cd.cd_name, error);
                        return error;
                }

                error = config_cfdriver_attach(&crypto_cd);
                if (error) {
                        devsw_detach(NULL, &crypto_cdevsw);
                        return error;
                }

                error = config_cfattach_attach(crypto_cd.cd_name, &crypto_ca);
                if (error) {
                        config_cfdriver_detach(&crypto_cd);
                        devsw_detach(NULL, &crypto_cdevsw);
                        aprint_error("%s: unable to register cfattach\n",
                                crypto_cd.cd_name);

                        return error;
                }

                error = config_cfdata_attach(crypto_cfdata, 1);
                if (error) {
                        config_cfattach_detach(crypto_cd.cd_name, &crypto_ca);
                        config_cfdriver_detach(&crypto_cd);
                        devsw_detach(NULL, &crypto_cdevsw);
                        aprint_error("%s: unable to register cfdata\n",
                                crypto_cd.cd_name);

                        return error;
                }

                (void)config_attach_pseudo(crypto_cfdata);
#endif

                return error;
        case MODULE_CMD_FINI:
#ifdef _MODULE
                if (crypto_refcount != 0)
                        return EBUSY;
                error = config_cfdata_detach(crypto_cfdata);
                if (error) {
                        return error;
                }

                config_cfattach_detach(crypto_cd.cd_name, &crypto_ca);
                config_cfdriver_detach(&crypto_cd);
                devsw_detach(NULL, &crypto_cdevsw);
#endif

                return error;
#ifdef _MODULE
        case MODULE_CMD_AUTOUNLOAD:
#if 0        /*
         * XXX Completely disable auto-unload for now, since there is still
         * XXX a (small) window where in-module ref-counting doesn't help
         */
                if (crypto_refcount != 0)
#endif
                        return EBUSY;
        /* FALLTHROUGH */
#endif
        default:
                return ENOTTY;
        }
}









































































































































































































































    9 







    9 












    9 
























































































































































    5 


















    5 




























    5 





































































































































    5 
















































































































































































    5 
    5 











    5 

    5 





























































































































































    1 








































































































































































































































































































































































































































































































   23 






   23 


   23 
   23 





   20 

   20 
   20 
   20 
   20 
   20 











   20 












































































    1 


    1 






















    1 







    1 


























   16 





   16 
    8 


   16 











   15 
    6 




    9 
    9 

   14 








    7 











    1 

    7 





    7 




    7 

















    7 




    7 














    4 











    4 











    4 



    4 






































    1 





    1 





   11 














   25 
    1 




   24 












   24 






   13 

   12 



   12 





   11 






























   22 



    1 























   22 














   22 

























































































































































































































   13 
   13 

   13 




































































































   13 
   13 


   13 




   13 


   13 









































































































































































    3 






















    1 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
/*        $NetBSD: in.c,v 1.242 2021/09/21 15:05:41 christos Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1998 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Public Access Networks Corporation ("Panix").  It was developed under
 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)in.c        8.4 (Berkeley) 1/9/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: in.c,v 1.242 2021/09/21 15:05:41 christos Exp $");

#include "arp.h"

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_inet_conf.h"
#include "opt_mrouting.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/ioctl.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/syslog.h>
#include <sys/kauth.h>
#include <sys/kmem.h>

#include <sys/cprng.h>

#include <net/if.h>
#include <net/route.h>
#include <net/pfil.h>

#include <net/if_arp.h>
#include <net/if_ether.h>
#include <net/if_types.h>
#include <net/if_llatbl.h>
#include <net/if_dl.h>

#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/in_ifattach.h>
#include <netinet/in_pcb.h>
#include <netinet/in_selsrc.h>
#include <netinet/if_inarp.h>
#include <netinet/ip_mroute.h>
#include <netinet/igmp_var.h>

#ifdef IPSELSRC
#include <netinet/in_selsrc.h>
#endif

static u_int        in_mask2len(struct in_addr *);
static int        in_lifaddr_ioctl(struct socket *, u_long, void *,
        struct ifnet *);

static void        in_addrhash_insert_locked(struct in_ifaddr *);
static void        in_addrhash_remove_locked(struct in_ifaddr *);

static int        in_addprefix(struct in_ifaddr *, int);
static void        in_scrubaddr(struct in_ifaddr *);
static int        in_scrubprefix(struct in_ifaddr *);
static void        in_sysctl_init(struct sysctllog **);

#ifndef SUBNETSARELOCAL
#define        SUBNETSARELOCAL        1
#endif

#ifndef HOSTZEROBROADCAST
#define HOSTZEROBROADCAST 0
#endif

/* Note: 61, 127, 251, 509, 1021, 2039 are good. */
#ifndef IN_MULTI_HASH_SIZE
#define IN_MULTI_HASH_SIZE        509
#endif

static int                        subnetsarelocal = SUBNETSARELOCAL;
static int                        hostzeroisbroadcast = HOSTZEROBROADCAST;

/*
 * This list is used to keep track of in_multi chains which belong to
 * deleted interface addresses.  We use in_ifaddr so that a chain head
 * won't be deallocated until all multicast address record are deleted.
 */

LIST_HEAD(in_multihashhead, in_multi);                /* Type of the hash head */

static struct pool                inmulti_pool;
static u_int                        in_multientries;
static struct in_multihashhead *in_multihashtbl;
static u_long                        in_multihash;
static krwlock_t                in_multilock;

#define IN_MULTI_HASH(x, ifp) \
    (in_multihashtbl[(u_long)((x) ^ (ifp->if_index)) % IN_MULTI_HASH_SIZE])

/* XXX DEPRECATED. Keep them to avoid breaking kvm(3) users. */
struct in_ifaddrhashhead *        in_ifaddrhashtbl;
u_long                                in_ifaddrhash;
struct in_ifaddrhead                in_ifaddrhead;
static kmutex_t                        in_ifaddr_lock;

pserialize_t                        in_ifaddrhash_psz;
struct pslist_head *                in_ifaddrhashtbl_pslist;
u_long                                in_ifaddrhash_pslist;
struct pslist_head                in_ifaddrhead_pslist;

void
in_init(void)
{
        pool_init(&inmulti_pool, sizeof(struct in_multi), 0, 0, 0, "inmltpl",
            NULL, IPL_SOFTNET);
        TAILQ_INIT(&in_ifaddrhead);
        PSLIST_INIT(&in_ifaddrhead_pslist);

        in_ifaddrhashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, true,
            &in_ifaddrhash);

        in_ifaddrhash_psz = pserialize_create();
        in_ifaddrhashtbl_pslist = hashinit(IN_IFADDR_HASH_SIZE, HASH_PSLIST,
            true, &in_ifaddrhash_pslist);
        mutex_init(&in_ifaddr_lock, MUTEX_DEFAULT, IPL_NONE);

        in_multihashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, true,
            &in_multihash);
        rw_init(&in_multilock);

        in_sysctl_init(NULL);
}

/*
 * Return 1 if an internet address is for a ``local'' host
 * (one to which we have a connection).  If subnetsarelocal
 * is true, this includes other subnets of the local net.
 * Otherwise, it includes only the directly-connected (sub)nets.
 */
int
in_localaddr(struct in_addr in)
{
        struct in_ifaddr *ia;
        int localaddr = 0;
        int s = pserialize_read_enter();

        if (subnetsarelocal) {
                IN_ADDRLIST_READER_FOREACH(ia) {
                        if ((in.s_addr & ia->ia_netmask) == ia->ia_net) {
                                localaddr = 1;
                                break;
                        }
                }
        } else {
                IN_ADDRLIST_READER_FOREACH(ia) {
                        if ((in.s_addr & ia->ia_subnetmask) == ia->ia_subnet) {
                                localaddr = 1;
                                break;
                        }
                }
        }
        pserialize_read_exit(s);

        return localaddr;
}

/*
 * like in_localaddr() but can specify ifp.
 */
int
in_direct(struct in_addr in, struct ifnet *ifp)
{
        struct ifaddr *ifa;
        int localaddr = 0;
        int s;

        KASSERT(ifp != NULL);

#define ia (ifatoia(ifa))
        s = pserialize_read_enter();
        if (subnetsarelocal) {
                IFADDR_READER_FOREACH(ifa, ifp) {
                        if (ifa->ifa_addr->sa_family == AF_INET &&
                            ((in.s_addr & ia->ia_netmask) == ia->ia_net)) {
                                localaddr = 1;
                                break;
                        }
                }
        } else {
                IFADDR_READER_FOREACH(ifa, ifp) {
                        if (ifa->ifa_addr->sa_family == AF_INET &&
                            (in.s_addr & ia->ia_subnetmask) == ia->ia_subnet) {
                                localaddr = 1;
                                break;
                        }
                }
        }
        pserialize_read_exit(s);

        return localaddr;
#undef ia
}

/*
 * Determine whether an IP address is in a reserved set of addresses
 * that may not be forwarded, or whether datagrams to that destination
 * may be forwarded.
 */
int
in_canforward(struct in_addr in)
{
        u_int32_t net;

        if (IN_EXPERIMENTAL(in.s_addr) || IN_MULTICAST(in.s_addr))
                return (0);
        if (IN_CLASSA(in.s_addr)) {
                net = in.s_addr & IN_CLASSA_NET;
                if (net == 0 || net == htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
                        return (0);
        }
        return (1);
}

/*
 * Trim a mask in a sockaddr
 */
void
in_socktrim(struct sockaddr_in *ap)
{
        char *cplim = (char *) &ap->sin_addr;
        char *cp = (char *) (&ap->sin_addr + 1);

        ap->sin_len = 0;
        while (--cp >= cplim)
                if (*cp) {
                        (ap)->sin_len = cp - (char *) (ap) + 1;
                        break;
                }
}

/*
 * Maintain the "in_maxmtu" variable, which is the largest
 * mtu for non-local interfaces with AF_INET addresses assigned
 * to them that are up.
 */
unsigned long in_maxmtu;

void
in_setmaxmtu(void)
{
        struct in_ifaddr *ia;
        struct ifnet *ifp;
        unsigned long maxmtu = 0;
        int s = pserialize_read_enter();

        IN_ADDRLIST_READER_FOREACH(ia) {
                if ((ifp = ia->ia_ifp) == 0)
                        continue;
                if ((ifp->if_flags & (IFF_UP|IFF_LOOPBACK)) != IFF_UP)
                        continue;
                if (ifp->if_mtu > maxmtu)
                        maxmtu = ifp->if_mtu;
        }
        if (maxmtu)
                in_maxmtu = maxmtu;
        pserialize_read_exit(s);
}

static u_int
in_mask2len(struct in_addr *mask)
{
        u_int x, y;
        u_char *p;

        p = (u_char *)mask;
        for (x = 0; x < sizeof(*mask); x++) {
                if (p[x] != 0xff)
                        break;
        }
        y = 0;
        if (x < sizeof(*mask)) {
                for (y = 0; y < NBBY; y++) {
                        if ((p[x] & (0x80 >> y)) == 0)
                                break;
                }
        }
        return x * NBBY + y;
}

void
in_len2mask(struct in_addr *mask, u_int len)
{
        u_int i;
        u_char *p;

        p = (u_char *)mask;
        memset(mask, 0, sizeof(*mask));
        for (i = 0; i < len / NBBY; i++)
                p[i] = 0xff;
        if (len % NBBY)
                p[i] = (0xff00 >> (len % NBBY)) & 0xff;
}

/*
 * Generic internet control operations (ioctl's).
 * Ifp is 0 if not an interface-specific ioctl.
 */
/* ARGSUSED */
static int
in_control0(struct socket *so, u_long cmd, void *data, struct ifnet *ifp)
{
        struct ifreq *ifr = (struct ifreq *)data;
        struct in_ifaddr *ia = NULL;
        struct in_aliasreq *ifra = (struct in_aliasreq *)data;
        struct sockaddr_in oldaddr, *new_dstaddr;
        int error, hostIsNew, maskIsNew;
        int newifaddr = 0;
        bool run_hook = false;
        bool need_reinsert = false;
        struct psref psref;
        int bound;

        switch (cmd) {
        case SIOCALIFADDR:
        case SIOCDLIFADDR:
        case SIOCGLIFADDR:
                if (ifp == NULL)
                        return EINVAL;
                return in_lifaddr_ioctl(so, cmd, data, ifp);
        case SIOCGIFADDRPREF:
        case SIOCSIFADDRPREF:
                if (ifp == NULL)
                        return EINVAL;
                return ifaddrpref_ioctl(so, cmd, data, ifp);
#if NARP > 0
        case SIOCGNBRINFO:
        {
                struct in_nbrinfo *nbi = (struct in_nbrinfo *)data;
                struct llentry *ln;
                struct in_addr nb_addr = nbi->addr; /* make local for safety */

                ln = arplookup(ifp, &nb_addr, NULL, 0);
                if (ln == NULL)
                        return EINVAL;
                nbi->state = ln->ln_state;
                nbi->asked = ln->ln_asked;
                nbi->expire = ln->ln_expire ?
                    time_mono_to_wall(ln->ln_expire) : 0;
                LLE_RUNLOCK(ln);
                return 0;
        }
#endif
        }

        bound = curlwp_bind();
        /*
         * Find address for this interface, if it exists.
         */
        if (ifp != NULL)
                ia = in_get_ia_from_ifp_psref(ifp, &psref);

        hostIsNew = 1;                /* moved here to appease gcc */
        switch (cmd) {
        case SIOCAIFADDR:
        case SIOCDIFADDR:
        case SIOCGIFALIAS:
        case SIOCGIFAFLAG_IN:
                if (ifra->ifra_addr.sin_family == AF_INET) {
                        int s;

                        if (ia != NULL)
                                ia4_release(ia, &psref);
                        s = pserialize_read_enter();
                        IN_ADDRHASH_READER_FOREACH(ia,
                            ifra->ifra_addr.sin_addr.s_addr) {
                                if (ia->ia_ifp == ifp &&
                                    in_hosteq(ia->ia_addr.sin_addr,
                                    ifra->ifra_addr.sin_addr))
                                        break;
                        }
                        if (ia != NULL)
                                ia4_acquire(ia, &psref);
                        pserialize_read_exit(s);
                }
                if ((cmd == SIOCDIFADDR ||
                    cmd == SIOCGIFALIAS ||
                    cmd == SIOCGIFAFLAG_IN) &&
                    ia == NULL) {
                        error = EADDRNOTAVAIL;
                        goto out;
                }

                if (cmd == SIOCDIFADDR &&
                    ifra->ifra_addr.sin_family == AF_UNSPEC) {
                        ifra->ifra_addr.sin_family = AF_INET;
                }
                /* FALLTHROUGH */
        case SIOCSIFADDR:
                if (ia == NULL || ia->ia_addr.sin_family != AF_INET)
                        ;
                else if (ifra->ifra_addr.sin_len == 0) {
                        ifra->ifra_addr = ia->ia_addr;
                        hostIsNew = 0;
                } else if (in_hosteq(ia->ia_addr.sin_addr,
                           ifra->ifra_addr.sin_addr))
                        hostIsNew = 0;
                if (ifra->ifra_addr.sin_family != AF_INET) {
                        error = EAFNOSUPPORT;
                        goto out;
                }
                /* FALLTHROUGH */
        case SIOCSIFDSTADDR:
                if (cmd == SIOCSIFDSTADDR &&
                    ifreq_getaddr(cmd, ifr)->sa_family != AF_INET) {
                        error = EAFNOSUPPORT;
                        goto out;
                }
                /* FALLTHROUGH */
        case SIOCSIFNETMASK:
                if (ifp == NULL)
                        panic("in_control");

                if (cmd == SIOCGIFALIAS || cmd == SIOCGIFAFLAG_IN)
                        break;

                if (ia == NULL &&
                    (cmd == SIOCSIFNETMASK || cmd == SIOCSIFDSTADDR)) {
                        error = EADDRNOTAVAIL;
                        goto out;
                }

                if (kauth_authorize_network(kauth_cred_get(),
                    KAUTH_NETWORK_INTERFACE,
                    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
                    NULL) != 0) {
                        error = EPERM;
                        goto out;
                }

                if (ia == NULL) {
                        ia = malloc(sizeof(*ia), M_IFADDR, M_WAITOK|M_ZERO);
                        if (ia == NULL) {
                                error = ENOBUFS;
                                goto out;
                        }
                        ia->ia_ifa.ifa_addr = sintosa(&ia->ia_addr);
                        ia->ia_ifa.ifa_dstaddr = sintosa(&ia->ia_dstaddr);
                        ia->ia_ifa.ifa_netmask = sintosa(&ia->ia_sockmask);
#ifdef IPSELSRC
                        ia->ia_ifa.ifa_getifa = in_getifa;
#else /* IPSELSRC */
                        ia->ia_ifa.ifa_getifa = NULL;
#endif /* IPSELSRC */
                        ia->ia_sockmask.sin_len = 8;
                        ia->ia_sockmask.sin_family = AF_INET;
                        if (ifp->if_flags & IFF_BROADCAST) {
                                ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
                                ia->ia_broadaddr.sin_family = AF_INET;
                        }
                        ia->ia_ifp = ifp;
                        ia->ia_idsalt = cprng_fast32() % 65535;
                        LIST_INIT(&ia->ia_multiaddrs);
                        IN_ADDRHASH_ENTRY_INIT(ia);
                        IN_ADDRLIST_ENTRY_INIT(ia);
                        ifa_psref_init(&ia->ia_ifa);
                        /*
                         * We need a reference to make ia survive over in_ifinit
                         * that does ifaref and ifafree.
                         */
                        ifaref(&ia->ia_ifa);

                        newifaddr = 1;
                }
                break;

        case SIOCSIFBRDADDR:
                if (kauth_authorize_network(kauth_cred_get(),
                    KAUTH_NETWORK_INTERFACE,
                    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
                    NULL) != 0) {
                        error = EPERM;
                        goto out;
                }
                /* FALLTHROUGH */

        case SIOCGIFADDR:
        case SIOCGIFNETMASK:
        case SIOCGIFDSTADDR:
        case SIOCGIFBRDADDR:
                if (ia == NULL) {
                        error = EADDRNOTAVAIL;
                        goto out;
                }
                break;
        }
        error = 0;
        switch (cmd) {

        case SIOCGIFADDR:
                ifreq_setaddr(cmd, ifr, sintocsa(&ia->ia_addr));
                break;

        case SIOCGIFBRDADDR:
                if ((ifp->if_flags & IFF_BROADCAST) == 0) {
                        error = EINVAL;
                        goto out;
                }
                ifreq_setdstaddr(cmd, ifr, sintocsa(&ia->ia_broadaddr));
                break;

        case SIOCGIFDSTADDR:
                if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
                        error = EINVAL;
                        goto out;
                }
                ifreq_setdstaddr(cmd, ifr, sintocsa(&ia->ia_dstaddr));
                break;

        case SIOCGIFNETMASK:
                /* 
                 * We keep the number of trailing zero bytes the sin_len field
                 * of ia_sockmask, so we fix this before we pass it back to
                 * userland.
                 */
                oldaddr = ia->ia_sockmask;
                oldaddr.sin_len = sizeof(struct sockaddr_in);
                ifreq_setaddr(cmd, ifr, (const void *)&oldaddr);
                break;

        case SIOCSIFDSTADDR:
                if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
                        error = EINVAL;
                        goto out;
                }
                oldaddr = ia->ia_dstaddr;
                ia->ia_dstaddr = *satocsin(ifreq_getdstaddr(cmd, ifr));
                if ((error = if_addr_init(ifp, &ia->ia_ifa, false)) != 0) {
                        ia->ia_dstaddr = oldaddr;
                        goto out;
                }
                if (ia->ia_flags & IFA_ROUTE) {
                        ia->ia_ifa.ifa_dstaddr = sintosa(&oldaddr);
                        rtinit(&ia->ia_ifa, RTM_DELETE, RTF_HOST);
                        ia->ia_ifa.ifa_dstaddr = sintosa(&ia->ia_dstaddr);
                        rtinit(&ia->ia_ifa, RTM_ADD, RTF_HOST|RTF_UP);
                }
                break;

        case SIOCSIFBRDADDR:
                if ((ifp->if_flags & IFF_BROADCAST) == 0) {
                        error = EINVAL;
                        goto out;
                }
                ia->ia_broadaddr = *satocsin(ifreq_getbroadaddr(cmd, ifr));
                break;

        case SIOCSIFADDR:
                if (!newifaddr) {
                        in_addrhash_remove(ia);
                        need_reinsert = true;
                }
                error = in_ifinit(ifp, ia, satocsin(ifreq_getaddr(cmd, ifr)),
                    NULL, 1);

                run_hook = true;
                break;

        case SIOCSIFNETMASK:
                in_scrubprefix(ia);
                ia->ia_sockmask = *satocsin(ifreq_getaddr(cmd, ifr));
                ia->ia_subnetmask = ia->ia_sockmask.sin_addr.s_addr;
                if (!newifaddr) {
                        in_addrhash_remove(ia);
                        need_reinsert = true;
                }
                error = in_ifinit(ifp, ia, NULL, NULL, 0);
                break;

        case SIOCAIFADDR:
                maskIsNew = 0;
                if (ifra->ifra_mask.sin_len) {
                        in_scrubprefix(ia);
                        ia->ia_sockmask = ifra->ifra_mask;
                        ia->ia_subnetmask = ia->ia_sockmask.sin_addr.s_addr;
                        maskIsNew = 1;
                }
                if ((ifp->if_flags & IFF_POINTOPOINT) &&
                    (ifra->ifra_dstaddr.sin_family == AF_INET)) {
                        new_dstaddr = &ifra->ifra_dstaddr;
                        maskIsNew  = 1; /* We lie; but the effect's the same */
                } else
                        new_dstaddr = NULL;
                if (ifra->ifra_addr.sin_family == AF_INET &&
                    (hostIsNew || maskIsNew)) {
                        if (!newifaddr) {
                                in_addrhash_remove(ia);
                                need_reinsert = true;
                        }
                        error = in_ifinit(ifp, ia, &ifra->ifra_addr,
                            new_dstaddr, 0);
                }
                if ((ifp->if_flags & IFF_BROADCAST) &&
                    (ifra->ifra_broadaddr.sin_family == AF_INET))
                        ia->ia_broadaddr = ifra->ifra_broadaddr;
                run_hook = true;
                break;

        case SIOCGIFALIAS:
                ifra->ifra_mask = ia->ia_sockmask;
                if ((ifp->if_flags & IFF_POINTOPOINT) &&
                    (ia->ia_dstaddr.sin_family == AF_INET))
                        ifra->ifra_dstaddr = ia->ia_dstaddr;
                else if ((ifp->if_flags & IFF_BROADCAST) &&
                    (ia->ia_broadaddr.sin_family == AF_INET))
                        ifra->ifra_broadaddr = ia->ia_broadaddr;
                else
                        memset(&ifra->ifra_broadaddr, 0,
                              sizeof(ifra->ifra_broadaddr));
                break;

        case SIOCGIFAFLAG_IN:
                ifr->ifr_addrflags = ia->ia4_flags;
                break;

        case SIOCDIFADDR:
                ia4_release(ia, &psref);
                ifaref(&ia->ia_ifa);
                in_purgeaddr(&ia->ia_ifa);
                pfil_run_addrhooks(if_pfil, cmd, &ia->ia_ifa);
                ifafree(&ia->ia_ifa);
                ia = NULL;
                break;

#ifdef MROUTING
        case SIOCGETVIFCNT:
        case SIOCGETSGCNT:
                error = mrt_ioctl(so, cmd, data);
                break;
#endif /* MROUTING */

        default:
                error = ENOTTY;
                goto out;
        }

        /*
         * XXX insert regardless of error to make in_purgeaddr below work.
         * Need to improve.
         */
        if (newifaddr) {
                ifaref(&ia->ia_ifa);
                ifa_insert(ifp, &ia->ia_ifa);

                mutex_enter(&in_ifaddr_lock);
                TAILQ_INSERT_TAIL(&in_ifaddrhead, ia, ia_list);
                IN_ADDRLIST_WRITER_INSERT_TAIL(ia);
                in_addrhash_insert_locked(ia);
                /* Release a reference that is held just after creation. */
                ifafree(&ia->ia_ifa);
                mutex_exit(&in_ifaddr_lock);
        } else if (need_reinsert) {
                in_addrhash_insert(ia);
        }

        if (error == 0) {
                if (run_hook)
                        pfil_run_addrhooks(if_pfil, cmd, &ia->ia_ifa);
        } else if (newifaddr) {
                KASSERT(ia != NULL);
                in_purgeaddr(&ia->ia_ifa);
                ia = NULL;
        }

out:
        if (!newifaddr && ia != NULL)
                ia4_release(ia, &psref);
        curlwp_bindx(bound);
        return error;
}

int
in_control(struct socket *so, u_long cmd, void *data, struct ifnet *ifp)
{
        int error;

#ifndef NET_MPSAFE
        KASSERT(KERNEL_LOCKED_P());
#endif
        error = in_control0(so, cmd, data, ifp);

        return error;
}

/* Add ownaddr as loopback rtentry. */
static void
in_ifaddlocal(struct ifaddr *ifa)
{
        struct in_ifaddr *ia;

        ia = (struct in_ifaddr *)ifa;
        if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY ||
            (ia->ia_ifp->if_flags & IFF_POINTOPOINT &&
            in_hosteq(ia->ia_dstaddr.sin_addr, ia->ia_addr.sin_addr)))
        {
                rt_addrmsg(RTM_NEWADDR, ifa);
                return;
        }

        rt_ifa_addlocal(ifa);
}

/* Remove loopback entry of ownaddr */
static void
in_ifremlocal(struct ifaddr *ifa)
{
        struct in_ifaddr *ia, *p;
        struct ifaddr *alt_ifa = NULL;
        int ia_count = 0;
        int s;
        struct psref psref;
        int bound = curlwp_bind();

        ia = (struct in_ifaddr *)ifa;
        /* Delete the entry if exactly one ifaddr matches the
         * address, ifa->ifa_addr. */
        s = pserialize_read_enter();
        IN_ADDRLIST_READER_FOREACH(p) {
                if (!in_hosteq(p->ia_addr.sin_addr, ia->ia_addr.sin_addr))
                        continue;
                if (p->ia_ifp != ia->ia_ifp)
                        alt_ifa = &p->ia_ifa;
                if (++ia_count > 1 && alt_ifa != NULL)
                        break;
        }
        if (alt_ifa != NULL && ia_count > 1)
                ifa_acquire(alt_ifa, &psref);
        pserialize_read_exit(s);

        if (ia_count == 0)
                goto out;

        rt_ifa_remlocal(ifa, ia_count == 1 ? NULL : alt_ifa);
        if (alt_ifa != NULL && ia_count > 1)
                ifa_release(alt_ifa, &psref);
out:
        curlwp_bindx(bound);
}

static void
in_scrubaddr(struct in_ifaddr *ia)
{

        /* stop DAD processing */
        if (ia->ia_dad_stop != NULL)
                ia->ia_dad_stop(&ia->ia_ifa);

        in_scrubprefix(ia);
        in_ifremlocal(&ia->ia_ifa);

        mutex_enter(&in_ifaddr_lock);
        if (ia->ia_allhosts != NULL) {
                in_delmulti(ia->ia_allhosts);
                ia->ia_allhosts = NULL;
        }
        mutex_exit(&in_ifaddr_lock);
}

/*
 * Depends on it isn't called in concurrent. It should be guaranteed
 * by ifa->ifa_ifp's ioctl lock. The possible callers are in_control
 * and if_purgeaddrs; the former is called iva ifa->ifa_ifp's ioctl
 * and the latter is called via ifa->ifa_ifp's if_detach. The functions
 * never be executed in concurrent.
 */
void
in_purgeaddr(struct ifaddr *ifa)
{
        struct in_ifaddr *ia = (void *) ifa;
        struct ifnet *ifp = ifa->ifa_ifp;

        /* KASSERT(!ifa_held(ifa)); XXX need ifa_not_held (psref_not_held) */

        ifa->ifa_flags |= IFA_DESTROYING;
        in_scrubaddr(ia);

        mutex_enter(&in_ifaddr_lock);
        in_addrhash_remove_locked(ia);
        TAILQ_REMOVE(&in_ifaddrhead, ia, ia_list);
        IN_ADDRLIST_WRITER_REMOVE(ia);
        ifa_remove(ifp, &ia->ia_ifa);
        /* Assume ifa_remove called pserialize_perform and psref_destroy */
        mutex_exit(&in_ifaddr_lock);
        IN_ADDRHASH_ENTRY_DESTROY(ia);
        IN_ADDRLIST_ENTRY_DESTROY(ia);
        ifafree(&ia->ia_ifa);
        in_setmaxmtu();
}

static void
in_addrhash_insert_locked(struct in_ifaddr *ia)
{

        KASSERT(mutex_owned(&in_ifaddr_lock));

        LIST_INSERT_HEAD(&IN_IFADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia,
            ia_hash);
        IN_ADDRHASH_ENTRY_INIT(ia);
        IN_ADDRHASH_WRITER_INSERT_HEAD(ia);
}

void
in_addrhash_insert(struct in_ifaddr *ia)
{

        mutex_enter(&in_ifaddr_lock);
        in_addrhash_insert_locked(ia);
        mutex_exit(&in_ifaddr_lock);
}

static void
in_addrhash_remove_locked(struct in_ifaddr *ia)
{

        KASSERT(mutex_owned(&in_ifaddr_lock));

        LIST_REMOVE(ia, ia_hash);
        IN_ADDRHASH_WRITER_REMOVE(ia);
}

void
in_addrhash_remove(struct in_ifaddr *ia)
{

        mutex_enter(&in_ifaddr_lock);
        in_addrhash_remove_locked(ia);
#ifdef NET_MPSAFE
        pserialize_perform(in_ifaddrhash_psz);
#endif
        mutex_exit(&in_ifaddr_lock);
        IN_ADDRHASH_ENTRY_DESTROY(ia);
}

void
in_purgeif(struct ifnet *ifp)                /* MUST be called at splsoftnet() */
{

        IFNET_LOCK(ifp);
        if_purgeaddrs(ifp, AF_INET, in_purgeaddr);
        igmp_purgeif(ifp);                /* manipulates pools */
#ifdef MROUTING
        ip_mrouter_detach(ifp);
#endif
        IFNET_UNLOCK(ifp);
}

/*
 * SIOC[GAD]LIFADDR.
 *        SIOCGLIFADDR: get first address. (???)
 *        SIOCGLIFADDR with IFLR_PREFIX:
 *                get first address that matches the specified prefix.
 *        SIOCALIFADDR: add the specified address.
 *        SIOCALIFADDR with IFLR_PREFIX:
 *                EINVAL since we can't deduce hostid part of the address.
 *        SIOCDLIFADDR: delete the specified address.
 *        SIOCDLIFADDR with IFLR_PREFIX:
 *                delete the first address that matches the specified prefix.
 * return values:
 *        EINVAL on invalid parameters
 *        EADDRNOTAVAIL on prefix match failed/specified address not found
 *        other values may be returned from in_ioctl()
 */
static int
in_lifaddr_ioctl(struct socket *so, u_long cmd, void *data,
    struct ifnet *ifp)
{
        struct if_laddrreq *iflr = (struct if_laddrreq *)data;
        struct ifaddr *ifa;
        struct sockaddr *sa;

        /* sanity checks */
        if (data == NULL || ifp == NULL) {
                panic("invalid argument to in_lifaddr_ioctl");
                /*NOTRECHED*/
        }

        switch (cmd) {
        case SIOCGLIFADDR:
                /* address must be specified on GET with IFLR_PREFIX */
                if ((iflr->flags & IFLR_PREFIX) == 0)
                        break;
                /*FALLTHROUGH*/
        case SIOCALIFADDR:
        case SIOCDLIFADDR:
                /* address must be specified on ADD and DELETE */
                sa = (struct sockaddr *)&iflr->addr;
                if (sa->sa_family != AF_INET)
                        return EINVAL;
                if (sa->sa_len != sizeof(struct sockaddr_in))
                        return EINVAL;
                /* XXX need improvement */
                sa = (struct sockaddr *)&iflr->dstaddr;
                if (sa->sa_family != AF_UNSPEC && sa->sa_family != AF_INET)
                        return EINVAL;
                if (sa->sa_len != 0 && sa->sa_len != sizeof(struct sockaddr_in))
                        return EINVAL;
                break;
        default: /*shouldn't happen*/
#if 0
                panic("invalid cmd to in_lifaddr_ioctl");
                /*NOTREACHED*/
#else
                return EOPNOTSUPP;
#endif
        }
        if (sizeof(struct in_addr) * NBBY < iflr->prefixlen)
                return EINVAL;

        switch (cmd) {
        case SIOCALIFADDR:
            {
                struct in_aliasreq ifra;

                if (iflr->flags & IFLR_PREFIX)
                        return EINVAL;

                /* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR). */
                memset(&ifra, 0, sizeof(ifra));
                memcpy(ifra.ifra_name, iflr->iflr_name,
                        sizeof(ifra.ifra_name));

                memcpy(&ifra.ifra_addr, &iflr->addr,
                        ((struct sockaddr *)&iflr->addr)->sa_len);

                if (((struct sockaddr *)&iflr->dstaddr)->sa_family) {        /*XXX*/
                        memcpy(&ifra.ifra_dstaddr, &iflr->dstaddr,
                                ((struct sockaddr *)&iflr->dstaddr)->sa_len);
                }

                ifra.ifra_mask.sin_family = AF_INET;
                ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in);
                in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen);

                return in_control(so, SIOCAIFADDR, &ifra, ifp);
            }
        case SIOCGLIFADDR:
        case SIOCDLIFADDR:
            {
                struct in_ifaddr *ia;
                struct in_addr mask, candidate, match;
                struct sockaddr_in *sin;
                int cmp, s;

                memset(&mask, 0, sizeof(mask));
                memset(&match, 0, sizeof(match));        /* XXX gcc */
                if (iflr->flags & IFLR_PREFIX) {
                        /* lookup a prefix rather than address. */
                        in_len2mask(&mask, iflr->prefixlen);

                        sin = (struct sockaddr_in *)&iflr->addr;
                        match.s_addr = sin->sin_addr.s_addr;
                        match.s_addr &= mask.s_addr;

                        /* if you set extra bits, that's wrong */
                        if (match.s_addr != sin->sin_addr.s_addr)
                                return EINVAL;

                        cmp = 1;
                } else {
                        if (cmd == SIOCGLIFADDR) {
                                /* on getting an address, take the 1st match */
                                cmp = 0;        /*XXX*/
                        } else {
                                /* on deleting an address, do exact match */
                                in_len2mask(&mask, 32);
                                sin = (struct sockaddr_in *)&iflr->addr;
                                match.s_addr = sin->sin_addr.s_addr;

                                cmp = 1;
                        }
                }

                s = pserialize_read_enter();
                IFADDR_READER_FOREACH(ifa, ifp) {
                        if (ifa->ifa_addr->sa_family != AF_INET)
                                continue;
                        if (cmp == 0)
                                break;
                        candidate.s_addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr;
                        candidate.s_addr &= mask.s_addr;
                        if (candidate.s_addr == match.s_addr)
                                break;
                }
                if (ifa == NULL) {
                        pserialize_read_exit(s);
                        return EADDRNOTAVAIL;
                }
                ia = (struct in_ifaddr *)ifa;

                if (cmd == SIOCGLIFADDR) {
                        /* fill in the if_laddrreq structure */
                        memcpy(&iflr->addr, &ia->ia_addr, ia->ia_addr.sin_len);

                        if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
                                memcpy(&iflr->dstaddr, &ia->ia_dstaddr,
                                        ia->ia_dstaddr.sin_len);
                        } else
                                memset(&iflr->dstaddr, 0, sizeof(iflr->dstaddr));

                        iflr->prefixlen =
                                in_mask2len(&ia->ia_sockmask.sin_addr);

                        iflr->flags = 0;        /*XXX*/
                        pserialize_read_exit(s);

                        return 0;
                } else {
                        struct in_aliasreq ifra;

                        /* fill in_aliasreq and do ioctl(SIOCDIFADDR) */
                        memset(&ifra, 0, sizeof(ifra));
                        memcpy(ifra.ifra_name, iflr->iflr_name,
                                sizeof(ifra.ifra_name));

                        memcpy(&ifra.ifra_addr, &ia->ia_addr,
                                ia->ia_addr.sin_len);
                        if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
                                memcpy(&ifra.ifra_dstaddr, &ia->ia_dstaddr,
                                        ia->ia_dstaddr.sin_len);
                        }
                        memcpy(&ifra.ifra_dstaddr, &ia->ia_sockmask,
                                ia->ia_sockmask.sin_len);
                        pserialize_read_exit(s);

                        return in_control(so, SIOCDIFADDR, &ifra, ifp);
                }
            }
        }

        return EOPNOTSUPP;        /*just for safety*/
}

/*
 * Initialize an interface's internet address
 * and routing table entry.
 */
int
in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia,
    const struct sockaddr_in *sin, const struct sockaddr_in *dst, int scrub)
{
        u_int32_t i;
        struct sockaddr_in oldaddr, olddst;
        int s, oldflags, flags = RTF_UP, error, hostIsNew;

        if (sin == NULL)
                sin = &ia->ia_addr;
        if (dst == NULL)
                dst = &ia->ia_dstaddr;

        /*
         * Set up new addresses.
         */
        oldaddr = ia->ia_addr;
        olddst = ia->ia_dstaddr;
        oldflags = ia->ia4_flags;
        ia->ia_addr = *sin;
        ia->ia_dstaddr = *dst;
        hostIsNew = oldaddr.sin_family != AF_INET ||
            !in_hosteq(ia->ia_addr.sin_addr, oldaddr.sin_addr);
        if (!scrub)
                scrub = oldaddr.sin_family != ia->ia_dstaddr.sin_family ||
                    !in_hosteq(ia->ia_dstaddr.sin_addr, olddst.sin_addr);

        /*
         * Configure address flags.
         * We need to do this early because they may be adjusted
         * by if_addr_init depending on the address.
         */
        if (ia->ia4_flags & IN_IFF_DUPLICATED) {
                ia->ia4_flags &= ~IN_IFF_DUPLICATED;
                hostIsNew = 1;
        }
        if (ifp->if_link_state == LINK_STATE_DOWN) {
                ia->ia4_flags |= IN_IFF_DETACHED;
                ia->ia4_flags &= ~IN_IFF_TENTATIVE;
        } else if (hostIsNew && if_do_dad(ifp) && ip_dad_enabled())
                ia->ia4_flags |= IN_IFF_TRYTENTATIVE;

        /*
         * Give the interface a chance to initialize
         * if this is its first address,
         * and to validate the address if necessary.
         */
        s = splsoftnet();
        error = if_addr_init(ifp, &ia->ia_ifa, true);
        splx(s);
        /* Now clear the try tentative flag, its job is done. */
        ia->ia4_flags &= ~IN_IFF_TRYTENTATIVE;
        if (error != 0) {
                ia->ia_addr = oldaddr;
                ia->ia_dstaddr = olddst;
                ia->ia4_flags = oldflags;
                return error;
        }

        if (scrub || hostIsNew) {
                int newflags = ia->ia4_flags;

                ia->ia_ifa.ifa_addr = sintosa(&oldaddr);
                ia->ia_ifa.ifa_dstaddr = sintosa(&olddst);
                ia->ia4_flags = oldflags;
                if (hostIsNew)
                        in_scrubaddr(ia);
                else if (scrub)
                        in_scrubprefix(ia);
                ia->ia_ifa.ifa_addr = sintosa(&ia->ia_addr);
                ia->ia_ifa.ifa_dstaddr = sintosa(&ia->ia_dstaddr);
                ia->ia4_flags = newflags;
        }

        i = ia->ia_addr.sin_addr.s_addr;
        if (ifp->if_flags & IFF_POINTOPOINT)
                ia->ia_netmask = INADDR_BROADCAST;        /* default to /32 */
        else if (IN_CLASSA(i))
                ia->ia_netmask = IN_CLASSA_NET;
        else if (IN_CLASSB(i))
                ia->ia_netmask = IN_CLASSB_NET;
        else
                ia->ia_netmask = IN_CLASSC_NET;
        /*
         * The subnet mask usually includes at least the standard network part,
         * but may may be smaller in the case of supernetting.
         * If it is set, we believe it.
         */
        if (ia->ia_subnetmask == 0) {
                ia->ia_subnetmask = ia->ia_netmask;
                ia->ia_sockmask.sin_addr.s_addr = ia->ia_subnetmask;
        } else
                ia->ia_netmask &= ia->ia_subnetmask;

        ia->ia_net = i & ia->ia_netmask;
        ia->ia_subnet = i & ia->ia_subnetmask;
        in_socktrim(&ia->ia_sockmask);

        /* re-calculate the "in_maxmtu" value */
        in_setmaxmtu();

        ia->ia_ifa.ifa_metric = ifp->if_metric;
        if (ifp->if_flags & IFF_BROADCAST) {
                if (ia->ia_subnetmask == IN_RFC3021_MASK) {
                        ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST;
                        ia->ia_netbroadcast.s_addr = INADDR_BROADCAST;
                } else {
                        ia->ia_broadaddr.sin_addr.s_addr =
                                ia->ia_subnet | ~ia->ia_subnetmask;
                        ia->ia_netbroadcast.s_addr =
                                ia->ia_net | ~ia->ia_netmask;
                }
        } else if (ifp->if_flags & IFF_LOOPBACK) {
                ia->ia_dstaddr = ia->ia_addr;
                flags |= RTF_HOST;
        } else if (ifp->if_flags & IFF_POINTOPOINT) {
                if (ia->ia_dstaddr.sin_family != AF_INET)
                        return (0);
                flags |= RTF_HOST;
        }

        /* Add the local route to the address */
        in_ifaddlocal(&ia->ia_ifa);

        /* Add the prefix route for the address */
        error = in_addprefix(ia, flags);

        /*
         * If the interface supports multicast, join the "all hosts"
         * multicast group on that interface.
         */
        mutex_enter(&in_ifaddr_lock);
        if ((ifp->if_flags & IFF_MULTICAST) != 0 && ia->ia_allhosts == NULL) {
                struct in_addr addr;

                addr.s_addr = INADDR_ALLHOSTS_GROUP;
                ia->ia_allhosts = in_addmulti(&addr, ifp);
        }
        mutex_exit(&in_ifaddr_lock);

        if (hostIsNew &&
            ia->ia4_flags & IN_IFF_TENTATIVE &&
            if_do_dad(ifp))
                ia->ia_dad_start((struct ifaddr *)ia);

        return error;
}

#define rtinitflags(x) \
        ((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \
            ? RTF_HOST : 0)

/*
 * add a route to prefix ("connected route" in cisco terminology).
 * does nothing if there's some interface address with the same prefix already.
 */
static int
in_addprefix(struct in_ifaddr *target, int flags)
{
        struct in_ifaddr *ia;
        struct in_addr prefix, mask, p;
        int error;
        int s;

        if ((flags & RTF_HOST) != 0)
                prefix = target->ia_dstaddr.sin_addr;
        else {
                prefix = target->ia_addr.sin_addr;
                mask = target->ia_sockmask.sin_addr;
                prefix.s_addr &= mask.s_addr;
        }

        s = pserialize_read_enter();
        IN_ADDRLIST_READER_FOREACH(ia) {
                if (rtinitflags(ia))
                        p = ia->ia_dstaddr.sin_addr;
                else {
                        p = ia->ia_addr.sin_addr;
                        p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
                }

                if (prefix.s_addr != p.s_addr)
                        continue;

                /*
                 * if we got a matching prefix route inserted by other
                 * interface address, we don't need to bother
                 *
                 * XXX RADIX_MPATH implications here? -dyoung
                 */
                if (ia->ia_flags & IFA_ROUTE) {
                        pserialize_read_exit(s);
                        return 0;
                }
        }
        pserialize_read_exit(s);

        /*
         * noone seem to have prefix route.  insert it.
         */
        error = rtinit(&target->ia_ifa, RTM_ADD, flags);
        if (error == 0)
                target->ia_flags |= IFA_ROUTE;
        else if (error == EEXIST) {
                /*
                 * the fact the route already exists is not an error.
                 */
                error = 0;
        }
        return error;
}

/*
 * remove a route to prefix ("connected route" in cisco terminology).
 * re-installs the route by using another interface address, if there's one
 * with the same prefix (otherwise we lose the route mistakenly).
 */
static int
in_scrubprefix(struct in_ifaddr *target)
{
        struct in_ifaddr *ia;
        struct in_addr prefix, mask, p;
        int error;
        int s;

        /* If we don't have IFA_ROUTE we have nothing to do */
        if ((target->ia_flags & IFA_ROUTE) == 0)
                return 0;

        if (rtinitflags(target))
                prefix = target->ia_dstaddr.sin_addr;
        else {
                prefix = target->ia_addr.sin_addr;
                mask = target->ia_sockmask.sin_addr;
                prefix.s_addr &= mask.s_addr;
        }

        s = pserialize_read_enter();
        IN_ADDRLIST_READER_FOREACH(ia) {
                if (rtinitflags(ia))
                        p = ia->ia_dstaddr.sin_addr;
                else {
                        p = ia->ia_addr.sin_addr;
                        p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
                }

                if (prefix.s_addr != p.s_addr)
                        continue;

                /*
                 * if we got a matching prefix route, move IFA_ROUTE to him
                 */
                if ((ia->ia_flags & IFA_ROUTE) == 0) {
                        struct psref psref;
                        int bound = curlwp_bind();

                        ia4_acquire(ia, &psref);
                        pserialize_read_exit(s);

                        rtinit(&target->ia_ifa, RTM_DELETE,
                            rtinitflags(target));
                        target->ia_flags &= ~IFA_ROUTE;

                        error = rtinit(&ia->ia_ifa, RTM_ADD,
                            rtinitflags(ia) | RTF_UP);
                        if (error == 0)
                                ia->ia_flags |= IFA_ROUTE;

                        ia4_release(ia, &psref);
                        curlwp_bindx(bound);

                        return error;
                }
        }
        pserialize_read_exit(s);

        /*
         * noone seem to have prefix route.  remove it.
         */
        rtinit(&target->ia_ifa, RTM_DELETE, rtinitflags(target));
        target->ia_flags &= ~IFA_ROUTE;
        return 0;
}

#undef rtinitflags

/*
 * Return 1 if the address might be a local broadcast address.
 */
int
in_broadcast(struct in_addr in, struct ifnet *ifp)
{
        struct ifaddr *ifa;
        int s;

        KASSERT(ifp != NULL);

        if (in.s_addr == INADDR_BROADCAST ||
            in_nullhost(in))
                return 1;
        if ((ifp->if_flags & IFF_BROADCAST) == 0)
                return 0;
        /*
         * Look through the list of addresses for a match
         * with a broadcast address.
         */
#define ia (ifatoia(ifa))
        s = pserialize_read_enter();
        IFADDR_READER_FOREACH(ifa, ifp) {
                if (ifa->ifa_addr->sa_family == AF_INET &&
                    !in_hosteq(in, ia->ia_addr.sin_addr) &&
                    (in_hosteq(in, ia->ia_broadaddr.sin_addr) ||
                     in_hosteq(in, ia->ia_netbroadcast) ||
                     (hostzeroisbroadcast &&
                      /*
                       * Check for old-style (host 0) broadcast, but
                       * taking into account that RFC 3021 obsoletes it.
                       */
                      ia->ia_subnetmask != IN_RFC3021_MASK &&
                      (in.s_addr == ia->ia_subnet ||
                       in.s_addr == ia->ia_net)))) {
                        pserialize_read_exit(s);
                        return 1;
                }
        }
        pserialize_read_exit(s);
        return (0);
#undef ia
}

/*
 * perform DAD when interface becomes IFF_UP.
 */
void
in_if_link_up(struct ifnet *ifp)
{
        struct ifaddr *ifa;
        struct in_ifaddr *ia;
        int s, bound;

        /* Ensure it's sane to run DAD */
        if (ifp->if_link_state == LINK_STATE_DOWN)
                return;
        if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
                return;

        bound = curlwp_bind();
        s = pserialize_read_enter();
        IFADDR_READER_FOREACH(ifa, ifp) {
                struct psref psref;

                if (ifa->ifa_addr->sa_family != AF_INET)
                        continue;
                ifa_acquire(ifa, &psref);
                pserialize_read_exit(s);

                ia = (struct in_ifaddr *)ifa;

                /* If detached then mark as tentative */
                if (ia->ia4_flags & IN_IFF_DETACHED) {
                        ia->ia4_flags &= ~IN_IFF_DETACHED;
                        if (ip_dad_enabled() && if_do_dad(ifp) &&
                            ia->ia_dad_start != NULL)
                                ia->ia4_flags |= IN_IFF_TENTATIVE;
                        else if ((ia->ia4_flags & IN_IFF_TENTATIVE) == 0)
                                rt_addrmsg(RTM_NEWADDR, ifa);
                }

                if (ia->ia4_flags & IN_IFF_TENTATIVE) {
                        /* Clear the duplicated flag as we're starting DAD. */
                        ia->ia4_flags &= ~IN_IFF_DUPLICATED;
                        ia->ia_dad_start(ifa);
                }

                s = pserialize_read_enter();
                ifa_release(ifa, &psref);
        }
        pserialize_read_exit(s);
        curlwp_bindx(bound);
}

void
in_if_up(struct ifnet *ifp)
{

        /* interface may not support link state, so bring it up also */
        in_if_link_up(ifp);
}

/*
 * Mark all addresses as detached.
 */
void
in_if_link_down(struct ifnet *ifp)
{
        struct ifaddr *ifa;
        struct in_ifaddr *ia;
        int s, bound;

        bound = curlwp_bind();
        s = pserialize_read_enter();
        IFADDR_READER_FOREACH(ifa, ifp) {
                struct psref psref;

                if (ifa->ifa_addr->sa_family != AF_INET)
                        continue;
                ifa_acquire(ifa, &psref);
                pserialize_read_exit(s);

                ia = (struct in_ifaddr *)ifa;

                /* Stop DAD processing */
                if (ia->ia_dad_stop != NULL)
                        ia->ia_dad_stop(ifa);

                /*
                 * Mark the address as detached.
                 */
                if (!(ia->ia4_flags & IN_IFF_DETACHED)) {
                        ia->ia4_flags |= IN_IFF_DETACHED;
                        ia->ia4_flags &=
                            ~(IN_IFF_TENTATIVE | IN_IFF_DUPLICATED);
                        rt_addrmsg(RTM_NEWADDR, ifa);
                }

                s = pserialize_read_enter();
                ifa_release(ifa, &psref);
        }
        pserialize_read_exit(s);
        curlwp_bindx(bound);
}

void
in_if_down(struct ifnet *ifp)
{

        in_if_link_down(ifp);
#if NARP > 0
        lltable_purge_entries(LLTABLE(ifp));
#endif
}

void
in_if_link_state_change(struct ifnet *ifp, int link_state)
{

        /*
         * Treat LINK_STATE_UNKNOWN as UP.
         * LINK_STATE_UNKNOWN transitions to LINK_STATE_DOWN when
         * if_link_state_change() transitions to LINK_STATE_UP.
         */
        if (link_state == LINK_STATE_DOWN)
                in_if_link_down(ifp);
        else
                in_if_link_up(ifp);
}

/*
 * in_lookup_multi: look up the in_multi record for a given IP
 * multicast address on a given interface.  If no matching record is
 * found, return NULL.
 */
struct in_multi *
in_lookup_multi(struct in_addr addr, ifnet_t *ifp)
{
        struct in_multi *inm;

        KASSERT(rw_lock_held(&in_multilock));

        LIST_FOREACH(inm, &IN_MULTI_HASH(addr.s_addr, ifp), inm_list) {
                if (in_hosteq(inm->inm_addr, addr) && inm->inm_ifp == ifp)
                        break;
        }
        return inm;
}

/*
 * in_multi_group: check whether the address belongs to an IP multicast
 * group we are joined on this interface.  Returns true or false.
 */
bool
in_multi_group(struct in_addr addr, ifnet_t *ifp, int flags)
{
        bool ingroup;

        if (__predict_true(flags & IP_IGMP_MCAST) == 0) {
                rw_enter(&in_multilock, RW_READER);
                ingroup = in_lookup_multi(addr, ifp) != NULL;
                rw_exit(&in_multilock);
        } else {
                /* XXX Recursive call from ip_output(). */
                KASSERT(rw_lock_held(&in_multilock));
                ingroup = in_lookup_multi(addr, ifp) != NULL;
        }
        return ingroup;
}

/*
 * Add an address to the list of IP multicast addresses for a given interface.
 */
struct in_multi *
in_addmulti(struct in_addr *ap, ifnet_t *ifp)
{
        struct sockaddr_in sin;
        struct in_multi *inm;

        /*
         * See if address already in list.
         */
        rw_enter(&in_multilock, RW_WRITER);
        inm = in_lookup_multi(*ap, ifp);
        if (inm != NULL) {
                /*
                 * Found it; just increment the reference count.
                 */
                inm->inm_refcount++;
                rw_exit(&in_multilock);
                return inm;
        }

        /*
         * New address; allocate a new multicast record.
         */
        inm = pool_get(&inmulti_pool, PR_NOWAIT);
        if (inm == NULL) {
                rw_exit(&in_multilock);
                return NULL;
        }
        inm->inm_addr = *ap;
        inm->inm_ifp = ifp;
        inm->inm_refcount = 1;

        /*
         * Ask the network driver to update its multicast reception
         * filter appropriately for the new address.
         */
        sockaddr_in_init(&sin, ap, 0);
        if (if_mcast_op(ifp, SIOCADDMULTI, sintosa(&sin)) != 0) {
                rw_exit(&in_multilock);
                pool_put(&inmulti_pool, inm);
                return NULL;
        }

        /*
         * Let IGMP know that we have joined a new IP multicast group.
         */
        if (igmp_joingroup(inm) != 0) {
                rw_exit(&in_multilock);
                pool_put(&inmulti_pool, inm);
                return NULL;
        }
        LIST_INSERT_HEAD(
            &IN_MULTI_HASH(inm->inm_addr.s_addr, ifp),
            inm, inm_list);
        in_multientries++;
        rw_exit(&in_multilock);

        return inm;
}

/*
 * Delete a multicast address record.
 */
void
in_delmulti(struct in_multi *inm)
{
        struct sockaddr_in sin;

        rw_enter(&in_multilock, RW_WRITER);
        if (--inm->inm_refcount > 0) {
                rw_exit(&in_multilock);
                return;
        }

        /*
         * No remaining claims to this record; let IGMP know that
         * we are leaving the multicast group.
         */
        igmp_leavegroup(inm);

        /*
         * Notify the network driver to update its multicast reception
         * filter.
         */
        sockaddr_in_init(&sin, &inm->inm_addr, 0);
        if_mcast_op(inm->inm_ifp, SIOCDELMULTI, sintosa(&sin));

        /*
         * Unlink from list.
         */
        LIST_REMOVE(inm, inm_list);
        in_multientries--;
        rw_exit(&in_multilock);

        pool_put(&inmulti_pool, inm);
}

/*
 * in_next_multi: step through all of the in_multi records, one at a time.
 * The current position is remembered in "step", which the caller must
 * provide.  in_first_multi(), below, must be called to initialize "step"
 * and get the first record.  Both macros return a NULL "inm" when there
 * are no remaining records.
 */
struct in_multi *
in_next_multi(struct in_multistep *step)
{
        struct in_multi *inm;

        KASSERT(rw_lock_held(&in_multilock));

        while (step->i_inm == NULL && step->i_n < IN_MULTI_HASH_SIZE) {
                step->i_inm = LIST_FIRST(&in_multihashtbl[++step->i_n]);
        }
        if ((inm = step->i_inm) != NULL) {
                step->i_inm = LIST_NEXT(inm, inm_list);
        }
        return inm;
}

struct in_multi *
in_first_multi(struct in_multistep *step)
{
        KASSERT(rw_lock_held(&in_multilock));

        step->i_n = 0;
        step->i_inm = LIST_FIRST(&in_multihashtbl[0]);
        return in_next_multi(step);
}

void
in_multi_lock(int op)
{
        rw_enter(&in_multilock, op);
}

void
in_multi_unlock(void)
{
        rw_exit(&in_multilock);
}

int
in_multi_lock_held(void)
{
        return rw_lock_held(&in_multilock);
}

struct in_ifaddr *
in_selectsrc(struct sockaddr_in *sin, struct route *ro,
    int soopts, struct ip_moptions *mopts, int *errorp, struct psref *psref)
{
        struct rtentry *rt = NULL;
        struct in_ifaddr *ia = NULL;

        KASSERT(ISSET(curlwp->l_pflag, LP_BOUND));
        /*
         * If route is known or can be allocated now, take the
         * source address from the interface.  Otherwise, punt.
         */
        if ((soopts & SO_DONTROUTE) != 0)
                rtcache_free(ro);
        else {
                union {
                        struct sockaddr                dst;
                        struct sockaddr_in        dst4;
                } u;

                sockaddr_in_init(&u.dst4, &sin->sin_addr, 0);
                rt = rtcache_lookup(ro, &u.dst);
        }
        /*
         * If we found a route, use the address
         * corresponding to the outgoing interface
         * unless it is the loopback (in case a route
         * to our address on another net goes to loopback).
         *
         * XXX Is this still true?  Do we care?
         */
        if (rt != NULL && (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) {
                int s;
                struct ifaddr *ifa;
                /*
                 * Just in case. May not need to do this workaround.
                 * Revisit when working on rtentry MP-ification.
                 */
                s = pserialize_read_enter();
                IFADDR_READER_FOREACH(ifa, rt->rt_ifp) {
                        if (ifa == rt->rt_ifa)
                                break;
                }
                if (ifa != NULL)
                        ifa_acquire(ifa, psref);
                pserialize_read_exit(s);

                ia = ifatoia(ifa);
        }
        if (ia == NULL) {
                u_int16_t fport = sin->sin_port;
                struct ifaddr *ifa;
                int s;

                sin->sin_port = 0;
                ifa = ifa_ifwithladdr_psref(sintosa(sin), psref);
                sin->sin_port = fport;
                if (ifa == NULL) {
                        /* Find 1st non-loopback AF_INET address */
                        s = pserialize_read_enter();
                        IN_ADDRLIST_READER_FOREACH(ia) {
                                if (!(ia->ia_ifp->if_flags & IFF_LOOPBACK))
                                        break;
                        }
                        if (ia != NULL)
                                ia4_acquire(ia, psref);
                        pserialize_read_exit(s);
                } else {
                        /* ia is already referenced by psref */
                        ia = ifatoia(ifa);
                }
                if (ia == NULL) {
                        *errorp = EADDRNOTAVAIL;
                        goto out;
                }
        }
        /*
         * If the destination address is multicast and an outgoing
         * interface has been set as a multicast option, use the
         * address of that interface as our source address.
         */
        if (IN_MULTICAST(sin->sin_addr.s_addr) && mopts != NULL) {
                struct ip_moptions *imo;

                imo = mopts;
                if (imo->imo_multicast_if_index != 0) {
                        struct ifnet *ifp;
                        int s;

                        if (ia != NULL)
                                ia4_release(ia, psref);
                        s = pserialize_read_enter();
                        ifp = if_byindex(imo->imo_multicast_if_index);
                        if (ifp != NULL) {
                                /* XXX */
                                ia = in_get_ia_from_ifp_psref(ifp, psref);
                        } else
                                ia = NULL;
                        if (ia == NULL || ia->ia4_flags & IN_IFF_NOTREADY) {
                                pserialize_read_exit(s);
                                if (ia != NULL)
                                        ia4_release(ia, psref);
                                *errorp = EADDRNOTAVAIL;
                                ia = NULL;
                                goto out;
                        }
                        pserialize_read_exit(s);
                }
        }
        if (ia->ia_ifa.ifa_getifa != NULL) {
                ia = ifatoia((*ia->ia_ifa.ifa_getifa)(&ia->ia_ifa,
                                                      sintosa(sin)));
                if (ia == NULL) {
                        *errorp = EADDRNOTAVAIL;
                        goto out;
                }
                /* FIXME NOMPSAFE */
                ia4_acquire(ia, psref);
        }
#ifdef GETIFA_DEBUG
        else
                printf("%s: missing ifa_getifa\n", __func__);
#endif
out:
        rtcache_unref(rt, ro);
        return ia;
}

int
in_tunnel_validate(const struct ip *ip, struct in_addr src, struct in_addr dst)
{
        struct in_ifaddr *ia4;
        int s;

        /* check for address match */
        if (src.s_addr != ip->ip_dst.s_addr ||
            dst.s_addr != ip->ip_src.s_addr)
                return 0;

        /* martian filters on outer source - NOT done in ip_input! */
        if (IN_MULTICAST(ip->ip_src.s_addr))
                return 0;
        switch ((ntohl(ip->ip_src.s_addr) & 0xff000000) >> 24) {
        case 0:
        case 127:
        case 255:
                return 0;
        }
        /* reject packets with broadcast on source */
        s = pserialize_read_enter();
        IN_ADDRLIST_READER_FOREACH(ia4) {
                if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
                        continue;
                if (ip->ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
                        pserialize_read_exit(s);
                        return 0;
                }
        }
        pserialize_read_exit(s);

        /* NOTE: packet may dropped by uRPF */

        /* return valid bytes length */
        return sizeof(src) + sizeof(dst);
}

#if NARP > 0

#define        IN_LLTBL_DEFAULT_HSIZE        32
#define        IN_LLTBL_HASH(k, h) \
        (((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1))

/*
 * Do actual deallocation of @lle.
 * Called by LLE_FREE_LOCKED when number of references
 * drops to zero.
 */
static void
in_lltable_destroy_lle(struct llentry *lle)
{

        KASSERTMSG(lle->la_numheld == 0, "la_numheld=%d", lle->la_numheld);

        LLE_WUNLOCK(lle);
        LLE_LOCK_DESTROY(lle);
        llentry_pool_put(lle);
}

static struct llentry *
in_lltable_new(struct in_addr addr4, u_int flags)
{
        struct llentry *lle;

        lle = llentry_pool_get(PR_NOWAIT);
        if (lle == NULL)                /* NB: caller generates msg */
                return NULL;

        lle->r_l3addr.addr4 = addr4;
        lle->lle_refcnt = 1;
        lle->lle_free = in_lltable_destroy_lle;
        LLE_LOCK_INIT(lle);
        callout_init(&lle->la_timer, CALLOUT_MPSAFE);

        return lle;
}

#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m)        (                        \
            (((ntohl((d).s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 )

static int
in_lltable_match_prefix(const struct sockaddr *prefix,
    const struct sockaddr *mask, u_int flags, struct llentry *lle)
{
        const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix;
        const struct sockaddr_in *msk = (const struct sockaddr_in *)mask;
        struct in_addr lle_addr;

        lle_addr.s_addr = ntohl(lle->r_l3addr.addr4.s_addr);

        /*
         * (flags & LLE_STATIC) means deleting all entries
         * including static ARP entries.
         */
        if (IN_ARE_MASKED_ADDR_EQUAL(lle_addr, pfx, msk) &&
            ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC)))
                return (1);

        return (0);
}

static void
in_lltable_free_entry(struct lltable *llt, struct llentry *lle)
{
        size_t pkts_dropped;

        LLE_WLOCK_ASSERT(lle);
        KASSERT(llt != NULL);

        pkts_dropped = llentry_free(lle);
        arp_stat_add(ARP_STAT_DFRDROPPED, (uint64_t)pkts_dropped);
}

static int
in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr,
    const struct rtentry *rt)
{
        int error = EINVAL;

        if (rt == NULL)
                return error;

        /*
         * If the gateway for an existing host route matches the target L3
         * address, which is a special route inserted by some implementation
         * such as MANET, and the interface is of the correct type, then
         * allow for ARP to proceed.
         */
        if (rt->rt_flags & RTF_GATEWAY) {
                if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp ||
                    rt->rt_ifp->if_type != IFT_ETHER ||
                    (rt->rt_ifp->if_flags & IFF_NOARP) != 0 ||
                    memcmp(rt->rt_gateway->sa_data, l3addr->sa_data,
                    sizeof(in_addr_t)) != 0) {
                        goto error;
                }
        }

        /*
         * Make sure that at least the destination address is covered
         * by the route. This is for handling the case where 2 or more
         * interfaces have the same prefix. An incoming packet arrives
         * on one interface and the corresponding outgoing packet leaves
         * another interface.
         */
        if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) {
                const char *sa, *mask, *addr, *lim;
                int len;

                mask = (const char *)rt_mask(rt);
                /*
                 * Just being extra cautious to avoid some custom
                 * code getting into trouble.
                 */
                if (mask == NULL)
                        goto error;

                sa = (const char *)rt_getkey(rt);
                addr = (const char *)l3addr;
                len = ((const struct sockaddr_in *)l3addr)->sin_len;
                lim = addr + len;

                for ( ; addr < lim; sa++, mask++, addr++) {
                        if ((*sa ^ *addr) & *mask) {
#ifdef DIAGNOSTIC
                                log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n",
                                    inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr));
#endif
                                goto error;
                        }
                }
        }

        error = 0;
error:
        return error;
}

static inline uint32_t
in_lltable_hash_dst(const struct in_addr dst, uint32_t hsize)
{

        return (IN_LLTBL_HASH(dst.s_addr, hsize));
}

static uint32_t
in_lltable_hash(const struct llentry *lle, uint32_t hsize)
{

        return (in_lltable_hash_dst(lle->r_l3addr.addr4, hsize));
}

static void
in_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
{
        struct sockaddr_in *sin;

        sin = (struct sockaddr_in *)sa;
        memset(sin, 0, sizeof(*sin));
        sin->sin_family = AF_INET;
        sin->sin_len = sizeof(*sin);
        sin->sin_addr = lle->r_l3addr.addr4;
}

static inline struct llentry *
in_lltable_find_dst(struct lltable *llt, struct in_addr dst)
{
        struct llentry *lle;
        struct llentries *lleh;
        u_int hashidx;

        hashidx = in_lltable_hash_dst(dst, llt->llt_hsize);
        lleh = &llt->lle_head[hashidx];
        LIST_FOREACH(lle, lleh, lle_next) {
                if (lle->la_flags & LLE_DELETED)
                        continue;
                if (lle->r_l3addr.addr4.s_addr == dst.s_addr)
                        break;
        }

        return (lle);
}

static int
in_lltable_delete(struct lltable *llt, u_int flags,
    const struct sockaddr *l3addr)
{
        const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
        struct ifnet *ifp __diagused = llt->llt_ifp;
        struct llentry *lle;

        IF_AFDATA_WLOCK_ASSERT(ifp);
        KASSERTMSG(l3addr->sa_family == AF_INET,
            "sin_family %d", l3addr->sa_family);

        lle = in_lltable_find_dst(llt, sin->sin_addr);
        if (lle == NULL) {
#ifdef LLTABLE_DEBUG
                char buf[64];
                sockaddr_format(l3addr, buf, sizeof(buf));
                log(LOG_INFO, "%s: cache for %s is not found\n",
                    __func__, buf);
#endif
                return (ENOENT);
        }

        LLE_WLOCK(lle);
#ifdef LLTABLE_DEBUG
        {
                char buf[64];
                sockaddr_format(l3addr, buf, sizeof(buf));
                log(LOG_INFO, "%s: cache for %s (%p) is deleted\n",
                    __func__, buf, lle);
        }
#endif
        llentry_free(lle);

        return (0);
}

static struct llentry *
in_lltable_create(struct lltable *llt, u_int flags, const struct sockaddr *l3addr,
    const struct rtentry *rt)
{
        const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
        struct ifnet *ifp = llt->llt_ifp;
        struct llentry *lle;

        IF_AFDATA_WLOCK_ASSERT(ifp);
        KASSERTMSG(l3addr->sa_family == AF_INET,
            "sin_family %d", l3addr->sa_family);

        lle = in_lltable_find_dst(llt, sin->sin_addr);

        if (lle != NULL) {
                LLE_WLOCK(lle);
                return (lle);
        }

        /* no existing record, we need to create new one */

        /*
         * A route that covers the given address must have
         * been installed 1st because we are doing a resolution,
         * verify this.
         */
        if (!(flags & LLE_IFADDR) &&
            in_lltable_rtcheck(ifp, flags, l3addr, rt) != 0)
                return (NULL);

        lle = in_lltable_new(sin->sin_addr, flags);
        if (lle == NULL) {
                log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
                return (NULL);
        }
        lle->la_flags = flags;
        if ((flags & LLE_IFADDR) == LLE_IFADDR) {
                memcpy(&lle->ll_addr, CLLADDR(ifp->if_sadl), ifp->if_addrlen);
                lle->la_flags |= (LLE_VALID | LLE_STATIC);
        }

        lltable_link_entry(llt, lle);
        LLE_WLOCK(lle);

        return (lle);
}

/*
 * Return NULL if not found or marked for deletion.
 * If found return lle read locked.
 */
static struct llentry *
in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
{
        const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
        struct llentry *lle;

        IF_AFDATA_LOCK_ASSERT(llt->llt_ifp);
        KASSERTMSG(l3addr->sa_family == AF_INET,
            "sin_family %d", l3addr->sa_family);

        lle = in_lltable_find_dst(llt, sin->sin_addr);

        if (lle == NULL)
                return NULL;

        if (flags & LLE_EXCLUSIVE)
                LLE_WLOCK(lle);
        else
                LLE_RLOCK(lle);

        return lle;
}

static int
in_lltable_dump_entry(struct lltable *llt, struct llentry *lle,
    struct rt_walkarg *w)
{
        struct sockaddr_in sin;

        LLTABLE_LOCK_ASSERT();

        /* skip deleted entries */
        if (lle->la_flags & LLE_DELETED)
                return 0;

        sockaddr_in_init(&sin, &lle->r_l3addr.addr4, 0);

        return lltable_dump_entry(llt, lle, w, sintosa(&sin));
}

#endif /* NARP > 0 */

static int
in_multicast_sysctl(SYSCTLFN_ARGS)
{
        struct ifnet *ifp;
        struct ifaddr *ifa;
        struct in_ifaddr *ifa4;
        struct in_multi *inm;
        uint32_t tmp;
        int error;
        size_t written;
        struct psref psref;
        int bound;

        if (namelen != 1)
                return EINVAL;

        bound = curlwp_bind();
        ifp = if_get_byindex(name[0], &psref);
        if (ifp == NULL) {
                curlwp_bindx(bound);
                return ENODEV;
        }

        if (oldp == NULL) {
                *oldlenp = 0;
                IFADDR_FOREACH(ifa, ifp) {
                        if (ifa->ifa_addr->sa_family != AF_INET)
                                continue;
                        ifa4 = (void *)ifa;
                        LIST_FOREACH(inm, &ifa4->ia_multiaddrs, inm_list) {
                                *oldlenp += 2 * sizeof(struct in_addr) +
                                    sizeof(uint32_t);
                        }
                }
                if_put(ifp, &psref);
                curlwp_bindx(bound);
                return 0;
        }

        error = 0;
        written = 0;
        IFADDR_FOREACH(ifa, ifp) {
                if (ifa->ifa_addr->sa_family != AF_INET)
                        continue;
                ifa4 = (void *)ifa;
                LIST_FOREACH(inm, &ifa4->ia_multiaddrs, inm_list) {
                        if (written + 2 * sizeof(struct in_addr) +
                            sizeof(uint32_t) > *oldlenp)
                                goto done;
                        error = sysctl_copyout(l, &ifa4->ia_addr.sin_addr,
                            oldp, sizeof(struct in_addr));
                        if (error)
                                goto done;
                        oldp = (char *)oldp + sizeof(struct in_addr);
                        written += sizeof(struct in_addr);
                        error = sysctl_copyout(l, &inm->inm_addr,
                            oldp, sizeof(struct in_addr));
                        if (error)
                                goto done;
                        oldp = (char *)oldp + sizeof(struct in_addr);
                        written += sizeof(struct in_addr);
                        tmp = inm->inm_refcount;
                        error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
                        if (error)
                                goto done;
                        oldp = (char *)oldp + sizeof(tmp);
                        written += sizeof(tmp);
                }
        }
done:
        if_put(ifp, &psref);
        curlwp_bindx(bound);
        *oldlenp = written;
        return error;
}

static void
in_sysctl_init(struct sysctllog **clog)
{
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "inet",
                       SYSCTL_DESCR("PF_INET related settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "multicast",
                       SYSCTL_DESCR("Multicast information"),
                       in_multicast_sysctl, 0, NULL, 0,
                       CTL_NET, PF_INET, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "ip",
                       SYSCTL_DESCR("IPv4 related settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "subnetsarelocal",
                       SYSCTL_DESCR("Whether logical subnets are considered "
                                    "local"),
                       NULL, 0, &subnetsarelocal, 0,
                       CTL_NET, PF_INET, IPPROTO_IP,
                       IPCTL_SUBNETSARELOCAL, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "hostzerobroadcast",
                       SYSCTL_DESCR("All zeroes address is broadcast address"),
                       NULL, 0, &hostzeroisbroadcast, 0,
                       CTL_NET, PF_INET, IPPROTO_IP,
                       IPCTL_HOSTZEROBROADCAST, CTL_EOL);
}

#if NARP > 0

static struct lltable *
in_lltattach(struct ifnet *ifp)
{
        struct lltable *llt;

        llt = lltable_allocate_htbl(IN_LLTBL_DEFAULT_HSIZE);
        llt->llt_af = AF_INET;
        llt->llt_ifp = ifp;

        llt->llt_lookup = in_lltable_lookup;
        llt->llt_create = in_lltable_create;
        llt->llt_delete = in_lltable_delete;
        llt->llt_dump_entry = in_lltable_dump_entry;
        llt->llt_hash = in_lltable_hash;
        llt->llt_fill_sa_entry = in_lltable_fill_sa_entry;
        llt->llt_free_entry = in_lltable_free_entry;
        llt->llt_match_prefix = in_lltable_match_prefix;
        lltable_link(llt);

        return (llt);
}

#endif /* NARP > 0 */

void *
in_domifattach(struct ifnet *ifp)
{
        struct in_ifinfo *ii;

        ii = kmem_zalloc(sizeof(struct in_ifinfo), KM_SLEEP);

#if NARP > 0
        ii->ii_llt = in_lltattach(ifp);
#endif

#ifdef IPSELSRC
        ii->ii_selsrc = in_selsrc_domifattach(ifp);
        KASSERT(ii->ii_selsrc != NULL);
#endif

        return ii;
}

void
in_domifdetach(struct ifnet *ifp, void *aux)
{
        struct in_ifinfo *ii = aux;

#ifdef IPSELSRC
        in_selsrc_domifdetach(ifp, ii->ii_selsrc);
#endif
#if NARP > 0
        lltable_free(ii->ii_llt);
#endif
        kmem_free(ii, sizeof(struct in_ifinfo));
}



















































































    2 
    2 






















































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
/*        $NetBSD: ualea.c,v 1.19 2022/03/20 13:18:30 riastradh Exp $        */

/*-
 * Copyright (c) 2017 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ualea.c,v 1.19 2022/03/20 13:18:30 riastradh Exp $");

#include <sys/types.h>
#include <sys/atomic.h>
#include <sys/device_if.h>
#include <sys/kmem.h>
#include <sys/module.h>
#include <sys/rndsource.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>

struct ualea_softc {
        device_t                sc_dev;
        kmutex_t                sc_lock;
        krndsource_t                sc_rnd;
        uint16_t                sc_maxpktsize;
        struct usbd_pipe        *sc_pipe;
        /*
         * Lock covers:
         * - sc_needed
         * - sc_inflight
         * - usbd_transfer(sc_xfer)
         */
        struct usbd_xfer        *sc_xfer;
        size_t                        sc_needed;
        bool                        sc_attached:1;
        bool                        sc_inflight:1;
};

static int        ualea_match(device_t, cfdata_t, void *);
static void        ualea_attach(device_t, device_t, void *);
static int        ualea_detach(device_t, int);
static void        ualea_get(size_t, void *);
static void        ualea_xfer_done(struct usbd_xfer *, void *, usbd_status);

CFATTACH_DECL_NEW(ualea, sizeof(struct ualea_softc),
    ualea_match, ualea_attach, ualea_detach, NULL);

static const struct usb_devno ualea_devs[] = {
        { USB_VENDOR_ARANEUS,        USB_PRODUCT_ARANEUS_ALEA },
};

static int
ualea_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;

        if (usb_lookup(ualea_devs, uiaa->uiaa_vendor, uiaa->uiaa_product))
                return UMATCH_VENDOR_PRODUCT;

        return UMATCH_NONE;
}

static void
ualea_attach(device_t parent, device_t self, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;
        struct ualea_softc *sc = device_private(self);
        const usb_endpoint_descriptor_t *ed;
        char *devinfop;
        usbd_status status;

        /* Print the device info.  */
        aprint_naive("\n");
        aprint_normal("\n");
        devinfop = usbd_devinfo_alloc(uiaa->uiaa_device, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        /* Initialize the softc.  */
        sc->sc_dev = self;
        mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_SOFTSERIAL);

        /* Get endpoint descriptor 0.  Make sure it's bulk-in.  */
        ed = usbd_interface2endpoint_descriptor(uiaa->uiaa_iface, 0);
        if (ed == NULL) {
                aprint_error_dev(sc->sc_dev, "failed to read endpoint 0\n");
                return;
        }
        if (UE_GET_DIR(ed->bEndpointAddress) != UE_DIR_IN ||
            UE_GET_XFERTYPE(ed->bmAttributes) != UE_BULK) {
                aprint_error_dev(sc->sc_dev, "invalid endpoint\n");
                return;
        }

        /* Remember the maximum packet size.  */
        sc->sc_maxpktsize = UGETW(ed->wMaxPacketSize);

        /* Open an exclusive MP-safe pipe for endpoint 0.  */
        status = usbd_open_pipe(uiaa->uiaa_iface, ed->bEndpointAddress,
            USBD_EXCLUSIVE_USE|USBD_MPSAFE, &sc->sc_pipe);
        if (status) {
                aprint_error_dev(sc->sc_dev, "failed to open pipe: %d\n",
                    status);
                return;
        }

        /* Create an xfer of maximum packet size on the pipe.  */
        status = usbd_create_xfer(sc->sc_pipe, sc->sc_maxpktsize,
            0, 0, &sc->sc_xfer);
        if (status) {
                aprint_error_dev(sc->sc_dev, "failed to create xfer: %d\n",
                    status);
                return;
        }

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(sc->sc_dev, "failed to register power handler"
                    "\n");

        /* Success!  We are ready to run.  */
        sc->sc_attached = true;
        rndsource_setcb(&sc->sc_rnd, ualea_get, sc);
        rnd_attach_source(&sc->sc_rnd, device_xname(self), RND_TYPE_RNG,
            RND_FLAG_COLLECT_VALUE|RND_FLAG_HASCB);
}

static int
ualea_detach(device_t self, int flags)
{
        struct ualea_softc *sc = device_private(self);

        /* Prevent new use of xfer.  */
        if (sc->sc_attached)
                rnd_detach_source(&sc->sc_rnd);

        /* Prevent xfer from rescheduling itself, if still pending.  */
        mutex_enter(&sc->sc_lock);
        sc->sc_needed = 0;
        mutex_exit(&sc->sc_lock);

        /* Cancel pending xfer.  */
        if (sc->sc_pipe)
                usbd_abort_pipe(sc->sc_pipe);
        KASSERT(!sc->sc_inflight);

        /* All users have drained.  Tear it all down.  */
        if (sc->sc_xfer)
                usbd_destroy_xfer(sc->sc_xfer);
        if (sc->sc_pipe)
                usbd_close_pipe(sc->sc_pipe);
        mutex_destroy(&sc->sc_lock);

        return 0;
}

static void
ualea_xfer(struct ualea_softc *sc)
{
        usbd_status status;

        KASSERT(mutex_owned(&sc->sc_lock));
        KASSERT(sc->sc_attached);
        KASSERT(!sc->sc_inflight);

        /* Do nothing if we need nothing.  */
        if (sc->sc_needed == 0)
                return;

        /* Setup the xfer to call ualea_xfer_done with sc.  */
        usbd_setup_xfer(sc->sc_xfer, sc, usbd_get_buffer(sc->sc_xfer),
            sc->sc_maxpktsize, USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT,
            ualea_xfer_done);

        /* Issue xfer or complain if we can't.  */
        status = usbd_transfer(sc->sc_xfer);
        KASSERT(status != USBD_NORMAL_COMPLETION); /* asynchronous xfer */
        if (status != USBD_IN_PROGRESS) {
                device_printf(sc->sc_dev, "failed to issue xfer: %s\n",
                    usbd_errstr(status));
                /* We failed -- let someone else have a go.  */
                return;
        }

        /* Mark xfer in-flight.  */
        sc->sc_inflight = true;
}

static void
ualea_get(size_t nbytes, void *cookie)
{
        struct ualea_softc *sc = cookie;

        mutex_enter(&sc->sc_lock);
        sc->sc_needed = MAX(sc->sc_needed, nbytes);
        if (!sc->sc_inflight)
                ualea_xfer(sc);
        mutex_exit(&sc->sc_lock);
}

static void
ualea_xfer_done(struct usbd_xfer *xfer, void *cookie, usbd_status status)
{
        struct ualea_softc *sc = cookie;
        void *pkt;
        uint32_t pktsize;

        /*
         * If the transfer failed, give up -- forget what we need and
         * don't reschedule ourselves.
         */
        if (status) {
                device_printf(sc->sc_dev, "xfer failed: %s\n",
                    usbd_errstr(status));
                mutex_enter(&sc->sc_lock);
                sc->sc_needed = 0;
                sc->sc_inflight = false;
                mutex_exit(&sc->sc_lock);
                return;
        }

        /* Get the transferred size.  */
        usbd_get_xfer_status(xfer, NULL, &pkt, &pktsize, NULL);
        KASSERTMSG(pktsize <= sc->sc_maxpktsize,
            "pktsize %"PRIu32" > %"PRIu16" (max)",
            pktsize, sc->sc_maxpktsize);

        /*
         * Enter the data, debit what we contributed from what we need,
         * mark the xfer as done, and reschedule the xfer if we still
         * need more.
         *
         * Must enter the data under the lock so it happens atomically
         * with updating sc_needed -- otherwise we might hang needing
         * entropy and not scheduling xfer.  Must not touch pkt after
         * clearing sc_inflight and possibly rescheduling the xfer.
         */
        mutex_enter(&sc->sc_lock);
        rnd_add_data(&sc->sc_rnd, pkt, pktsize, NBBY*pktsize);
        sc->sc_needed -= MIN(sc->sc_needed, pktsize);
        sc->sc_inflight = false;
        ualea_xfer(sc);
        mutex_exit(&sc->sc_lock);
}

MODULE(MODULE_CLASS_DRIVER, ualea, NULL);

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
ualea_modcmd(modcmd_t cmd, void *aux)
{
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                error = config_init_component(cfdriver_ioconf_ualea,
                    cfattach_ioconf_ualea, cfdata_ioconf_ualea);
#endif
                return error;
        case MODULE_CMD_FINI:
#ifdef _MODULE
                error = config_fini_component(cfdriver_ioconf_ualea,
                    cfattach_ioconf_ualea, cfdata_ioconf_ualea);
#endif
                return error;
        default:
                return ENOTTY;
        }
}





































































    2 


















    2 


    2 





    2 





























    2 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/*        $NetBSD: rf_alloclist.c,v 1.29 2021/07/23 00:54:45 oster Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Mark Holland
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/****************************************************************************
 *
 * Alloclist.c -- code to manipulate allocation lists
 *
 * an allocation list is just a list of AllocListElem structures.  Each
 * such structure contains a fixed-size array of pointers.  Calling
 * FreeAList() causes each pointer to be freed.
 *
 ***************************************************************************/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_alloclist.c,v 1.29 2021/07/23 00:54:45 oster Exp $");

#include <dev/raidframe/raidframevar.h>

#include "rf_options.h"
#include "rf_threadstuff.h"
#include "rf_alloclist.h"
#include "rf_debugMem.h"
#include "rf_etimer.h"
#include "rf_general.h"
#include "rf_shutdown.h"
#include "rf_netbsd.h"

#include <sys/pool.h>

#define RF_AL_FREELIST_MAX 256
#define RF_AL_FREELIST_MIN 64

static void rf_ShutdownAllocList(void *);

static void rf_ShutdownAllocList(void *ignored)
{
        pool_destroy(&rf_alloclist_pool);
}

int
rf_ConfigureAllocList(RF_ShutdownList_t **listp)
{

        pool_init(&rf_alloclist_pool, sizeof(RF_AllocListElem_t), 0, 0, 0, "rf_alloclist_pl", NULL, IPL_BIO);
        pool_sethiwat(&rf_alloclist_pool, RF_AL_FREELIST_MAX);
        pool_prime(&rf_alloclist_pool, RF_AL_FREELIST_MIN);

        rf_ShutdownCreate(listp, rf_ShutdownAllocList, NULL);

        return (0);
}


/* we expect the lists to have at most one or two elements, so we're willing
 * to search for the end.  If you ever observe the lists growing longer,
 * increase POINTERS_PER_ALLOC_LIST_ELEMENT.
 */
void
rf_real_AddToAllocList(RF_AllocListElem_t *l, void *p, int size)
{
        RF_AllocListElem_t *newelem;

        for (; l->next; l = l->next)
                RF_ASSERT(l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT);        /* find end of list */

        RF_ASSERT(l->numPointers >= 0 && l->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT);
        if (l->numPointers == RF_POINTERS_PER_ALLOC_LIST_ELEMENT) {
                newelem = rf_real_MakeAllocList();
                l->next = newelem;
                l = newelem;
        }
        l->pointers[l->numPointers] = p;
        l->sizes[l->numPointers] = size;
        l->numPointers++;

}

void
rf_FreeAllocList(RF_AllocListElem_t *l)
{
        int     i;
        RF_AllocListElem_t *temp, *p;

        for (p = l; p; p = p->next) {
                RF_ASSERT(p->numPointers >= 0 &&
                          p->numPointers <= RF_POINTERS_PER_ALLOC_LIST_ELEMENT);
                for (i = 0; i < p->numPointers; i++) {
                        RF_ASSERT(p->pointers[i]);
                        RF_Free(p->pointers[i], p->sizes[i]);
                }
        }
        while (l) {
                temp = l;
                l = l->next;
                pool_put(&rf_alloclist_pool, temp);
        }
}

RF_AllocListElem_t *
rf_real_MakeAllocList(void)
{
        return pool_get(&rf_alloclist_pool, PR_WAITOK | PR_ZERO);
}































































































































































































































































































































































































































































































































































































































































    1 




















    1 





    1 




    1 




    1 



    1 




























































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
/*        $NetBSD: sys_sig.c,v 1.56 2022/04/21 21:31:11 andvar Exp $        */

/*-
 * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_sig.c        8.14 (Berkeley) 5/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_sig.c,v 1.56 2022/04/21 21:31:11 andvar Exp $");

#include "opt_dtrace.h"

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/signalvar.h>
#include <sys/proc.h>
#include <sys/pool.h>
#include <sys/syscallargs.h>
#include <sys/kauth.h>
#include <sys/wait.h>
#include <sys/kmem.h>
#include <sys/module.h>
#include <sys/sdt.h>
#include <sys/compat_stub.h>

SDT_PROVIDER_DECLARE(proc);
SDT_PROBE_DEFINE2(proc, kernel, , signal__clear,
    "int",                 /* signal */
    "ksiginfo_t *");        /* signal-info */

int
sys___sigaction_sigtramp(struct lwp *l,
    const struct sys___sigaction_sigtramp_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)                                signum;
                syscallarg(const struct sigaction *)        nsa;
                syscallarg(struct sigaction *)                osa;
                syscallarg(void *)                        tramp;
                syscallarg(int)                                vers;
        } */
        struct sigaction nsa, osa;
        int error;

        if (SCARG(uap, nsa)) {
                error = copyin(SCARG(uap, nsa), &nsa, sizeof(nsa));
                if (error)
                        return (error);
        }
        error = sigaction1(l, SCARG(uap, signum),
            SCARG(uap, nsa) ? &nsa : 0, SCARG(uap, osa) ? &osa : 0,
            SCARG(uap, tramp), SCARG(uap, vers));
        if (error)
                return (error);
        if (SCARG(uap, osa)) {
                error = copyout(&osa, SCARG(uap, osa), sizeof(osa));
                if (error)
                        return (error);
        }
        return 0;
}

/*
 * Manipulate signal mask.  Note that we receive new mask, not pointer, and
 * return old mask as return value; the library stub does the rest.
 */
int
sys___sigprocmask14(struct lwp *l, const struct sys___sigprocmask14_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                        how;
                syscallarg(const sigset_t *)        set;
                syscallarg(sigset_t *)                oset;
        } */
        struct proc        *p = l->l_proc;
        sigset_t        nss, oss;
        int                error;

        if (SCARG(uap, set)) {
                error = copyin(SCARG(uap, set), &nss, sizeof(nss));
                if (error)
                        return error;
        }
        mutex_enter(p->p_lock);
        error = sigprocmask1(l, SCARG(uap, how),
            SCARG(uap, set) ? &nss : 0, SCARG(uap, oset) ? &oss : 0);
        mutex_exit(p->p_lock);
        if (error)
                return error;
        if (SCARG(uap, oset)) {
                error = copyout(&oss, SCARG(uap, oset), sizeof(oss));
                if (error)
                        return error;
        }
        return 0;
}

int
sys___sigpending14(struct lwp *l, const struct sys___sigpending14_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(sigset_t *)        set;
        } */
        sigset_t ss;

        sigpending1(l, &ss);
        return copyout(&ss, SCARG(uap, set), sizeof(ss));
}

/*
 * Suspend process until signal, providing mask to be set in the meantime. 
 * Note nonstandard calling convention: libc stub passes mask, not pointer,
 * to save a copyin.
 */
int
sys___sigsuspend14(struct lwp *l, const struct sys___sigsuspend14_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const sigset_t *)        set;
        } */
        sigset_t        ss;
        int                error;

        if (SCARG(uap, set)) {
                error = copyin(SCARG(uap, set), &ss, sizeof(ss));
                if (error)
                        return error;
        }
        return sigsuspend1(l, SCARG(uap, set) ? &ss : 0);
}

int
sys___sigaltstack14(struct lwp *l, const struct sys___sigaltstack14_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const struct sigaltstack *)        nss;
                syscallarg(struct sigaltstack *)        oss;
        } */
        stack_t        nss, oss;
        int        error;

        if (SCARG(uap, nss)) {
                error = copyin(SCARG(uap, nss), &nss, sizeof(nss));
                if (error)
                        return error;
        }
        error = sigaltstack1(l,
            SCARG(uap, nss) ? &nss : 0, SCARG(uap, oss) ? &oss : 0);
        if (error)
                return error;
        if (SCARG(uap, oss)) {
                error = copyout(&oss, SCARG(uap, oss), sizeof(oss));
                if (error)
                        return error;
        }
        return 0;
}

int
kill1(struct lwp *l, pid_t pid, ksiginfo_t *ksi, register_t *retval)
{
        int error;
        struct proc *p;

        if ((u_int)ksi->ksi_signo >= NSIG)
                return EINVAL;

        if (pid != l->l_proc->p_pid) {
                if (ksi->ksi_pid != l->l_proc->p_pid)
                        return EPERM;

                if (ksi->ksi_uid != kauth_cred_geteuid(l->l_cred))
                        return EPERM;

                switch (ksi->ksi_code) {
                case SI_USER:
                case SI_QUEUE:
                        break;
                default:
                        return EPERM;
                }
        }

        if (pid > 0) {
                /* kill single process */
                mutex_enter(&proc_lock);
                p = proc_find_raw(pid);
                if (p == NULL || (p->p_stat != SACTIVE && p->p_stat != SSTOP)) {
                        mutex_exit(&proc_lock);
                        /* IEEE Std 1003.1-2001: return success for zombies */
                        return p ? 0 : ESRCH;
                }
                mutex_enter(p->p_lock);
                error = kauth_authorize_process(l->l_cred,
                    KAUTH_PROCESS_SIGNAL, p, KAUTH_ARG(ksi->ksi_signo),
                    NULL, NULL);
                if (!error && ksi->ksi_signo) {
                        error = kpsignal2(p, ksi);
                }
                mutex_exit(p->p_lock);
                mutex_exit(&proc_lock);
                return error;
        }

        switch (pid) {
        case -1:                /* broadcast signal */
                return killpg1(l, ksi, 0, 1);
        case 0:                        /* signal own process group */
                return killpg1(l, ksi, 0, 0);
        default:                /* negative explicit process group */
                return killpg1(l, ksi, -pid, 0);
        }
        /* NOTREACHED */
}

int
sys_sigqueueinfo(struct lwp *l, const struct sys_sigqueueinfo_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(pid_t int)        pid;
                syscallarg(const siginfo_t *)        info;
        } */
        ksiginfo_t        ksi;
        int error;

        KSI_INIT(&ksi);

        if ((error = copyin(&SCARG(uap, info)->_info, &ksi.ksi_info,
            sizeof(ksi.ksi_info))) != 0)
                return error;

        return kill1(l, SCARG(uap, pid), &ksi, retval);
}

int
sys_kill(struct lwp *l, const struct sys_kill_args *uap, register_t *retval)
{
        /* {
                syscallarg(pid_t)        pid;
                syscallarg(int)        signum;
        } */
        ksiginfo_t        ksi;

        KSI_INIT(&ksi);

        ksi.ksi_signo = SCARG(uap, signum);
        ksi.ksi_code = SI_USER;
        ksi.ksi_pid = l->l_proc->p_pid;
        ksi.ksi_uid = kauth_cred_geteuid(l->l_cred);

        return kill1(l, SCARG(uap, pid), &ksi, retval);
}

int
sys_getcontext(struct lwp *l, const struct sys_getcontext_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(struct __ucontext *) ucp;
        } */
        struct proc *p = l->l_proc;
        ucontext_t uc;

        memset(&uc, 0, sizeof(uc));

        mutex_enter(p->p_lock);
        getucontext(l, &uc);
        mutex_exit(p->p_lock);

        return copyout(&uc, SCARG(uap, ucp), sizeof (*SCARG(uap, ucp)));
}

int
sys_setcontext(struct lwp *l, const struct sys_setcontext_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const ucontext_t *) ucp;
        } */
        struct proc *p = l->l_proc;
        ucontext_t uc;
        int error;

        error = copyin(SCARG(uap, ucp), &uc, sizeof (uc));
        if (error)
                return error;
        if ((uc.uc_flags & _UC_CPU) == 0)
                return EINVAL;
        mutex_enter(p->p_lock);
        error = setucontext(l, &uc);
        mutex_exit(p->p_lock);
        if (error)
                 return error;

        return EJUSTRETURN;
}

/*
 * sigtimedwait(2) system call, used also for implementation
 * of sigwaitinfo() and sigwait().
 *
 * This only handles single LWP in signal wait. libpthread provides
 * its own sigtimedwait() wrapper to DTRT WRT individual threads.
 */
int
sys_____sigtimedwait50(struct lwp *l,
    const struct sys_____sigtimedwait50_args *uap, register_t *retval)
{

        return sigtimedwait1(l, uap, retval, copyin, copyout, copyin, copyout);
}

int
sigaction1(struct lwp *l, int signum, const struct sigaction *nsa,
        struct sigaction *osa, const void *tramp, int vers)
{
        struct proc *p;
        struct sigacts *ps;
        sigset_t tset;
        int prop, error;
        ksiginfoq_t kq;
        static bool v0v1valid;

        if (signum <= 0 || signum >= NSIG)
                return EINVAL;

        p = l->l_proc;
        error = 0;
        ksiginfo_queue_init(&kq);

        /*
         * Trampoline ABI version __SIGTRAMP_SIGCODE_VERSION (0) is reserved
         * for the legacy kernel provided on-stack trampoline.  Conversely,
         * if we are using a non-0 ABI version, we must have a trampoline.
         * Only validate the vers if a new sigaction was supplied and there
         * was an actual handler specified (not SIG_IGN or SIG_DFL), which
         * don't require a trampoline. Emulations use legacy kernel
         * trampolines with version 0, alternatively check for that too.
         *
         * If version < __SIGTRAMP_SIGINFO_VERSION_MIN (usually 2), we try
         * to autoload the compat module.  Note that we interlock with the
         * unload check in compat_modcmd() using kernconfig_lock.  If the
         * autoload fails, we don't try it again for this process.
         */
        if (nsa != NULL && nsa->sa_handler != SIG_IGN
            && nsa->sa_handler != SIG_DFL) {
                if (__predict_false(vers < __SIGTRAMP_SIGINFO_VERSION_MIN)) {
                        if (vers == __SIGTRAMP_SIGCODE_VERSION &&
                            p->p_sigctx.ps_sigcode != NULL) {
                                /*
                                 * if sigcode is used for this emulation,
                                 * version 0 is allowed.
                                 */
                        }
#ifdef __HAVE_STRUCT_SIGCONTEXT
                        else if (p->p_flag & PK_32) {
                                /*
                                 * The 32-bit compat module will have
                                 * pre-validated this for us.
                                 */
                                v0v1valid = true;
                        } else if ((p->p_lflag & PL_SIGCOMPAT) == 0) {
                                kernconfig_lock();
                                (void)module_autoload("compat_16",
                                    MODULE_CLASS_ANY);
                                if (sendsig_sigcontext_16_hook.hooked) {
                                        /*
                                         * We need to remember if the
                                         * sigcontext method may be useable,
                                         * because libc may use it even
                                         * if siginfo is available.
                                         */
                                        v0v1valid = true;
                                }
                                mutex_enter(&proc_lock);
                                /*
                                 * Prevent unload of compat module while
                                 * this process remains.
                                 */
                                p->p_lflag |= PL_SIGCOMPAT;
                                mutex_exit(&proc_lock);
                                kernconfig_unlock();
                        }
#endif /* __HAVE_STRUCT_SIGCONTEXT */
                }

                switch (vers) {
                case __SIGTRAMP_SIGCODE_VERSION:
                        /* kernel supplied trampoline. */
                        if (tramp != NULL ||
                            (p->p_sigctx.ps_sigcode == NULL && !v0v1valid)) {
                                return EINVAL;
                        }
                        break;
#ifdef __HAVE_STRUCT_SIGCONTEXT
                case __SIGTRAMP_SIGCONTEXT_VERSION_MIN ...
                     __SIGTRAMP_SIGCONTEXT_VERSION_MAX:
                        /* sigcontext, user supplied trampoline. */
                        if (tramp == NULL || !v0v1valid) {
                                return EINVAL;
                        }
                        break;
#endif /* __HAVE_STRUCT_SIGCONTEXT */
                case __SIGTRAMP_SIGINFO_VERSION_MIN ...
                     __SIGTRAMP_SIGINFO_VERSION_MAX:
                        /* siginfo, user supplied trampoline. */
                        if (tramp == NULL) {
                                return EINVAL;
                        }
                        break;
                default:
                        /* Invalid trampoline version. */
                        return EINVAL;
                }
        }

        mutex_enter(p->p_lock);

        ps = p->p_sigacts;
        if (osa)
                sigaction_copy(osa, &SIGACTION_PS(ps, signum));
        if (!nsa)
                goto out;

        prop = sigprop[signum];
        if ((nsa->sa_flags & ~SA_ALLBITS) || (prop & SA_CANTMASK)) {
                error = EINVAL;
                goto out;
        }

        sigaction_copy(&SIGACTION_PS(ps, signum), nsa);
        ps->sa_sigdesc[signum].sd_tramp = tramp;
        ps->sa_sigdesc[signum].sd_vers = vers;
        sigminusset(&sigcantmask, &SIGACTION_PS(ps, signum).sa_mask);

        if ((prop & SA_NORESET) != 0)
                SIGACTION_PS(ps, signum).sa_flags &= ~SA_RESETHAND;

        if (signum == SIGCHLD) {
                if (nsa->sa_flags & SA_NOCLDSTOP)
                        p->p_sflag |= PS_NOCLDSTOP;
                else
                        p->p_sflag &= ~PS_NOCLDSTOP;
                if (nsa->sa_flags & SA_NOCLDWAIT) {
                        /*
                         * Paranoia: since SA_NOCLDWAIT is implemented by
                         * reparenting the dying child to PID 1 (and trust
                         * it to reap the zombie), PID 1 itself is forbidden
                         * to set SA_NOCLDWAIT.
                         */
                        if (p->p_pid == 1)
                                p->p_flag &= ~PK_NOCLDWAIT;
                        else
                                p->p_flag |= PK_NOCLDWAIT;
                } else
                        p->p_flag &= ~PK_NOCLDWAIT;

                if (nsa->sa_handler == SIG_IGN) {
                        /*
                         * Paranoia: same as above.
                         */
                        if (p->p_pid == 1)
                                p->p_flag &= ~PK_CLDSIGIGN;
                        else
                                p->p_flag |= PK_CLDSIGIGN;
                } else
                        p->p_flag &= ~PK_CLDSIGIGN;
        }

        if ((nsa->sa_flags & SA_NODEFER) == 0)
                sigaddset(&SIGACTION_PS(ps, signum).sa_mask, signum);
        else
                sigdelset(&SIGACTION_PS(ps, signum).sa_mask, signum);

        /*
         * Set bit in p_sigctx.ps_sigignore for signals that are set to
         * SIG_IGN, and for signals set to SIG_DFL where the default is to
         * ignore. However, don't put SIGCONT in p_sigctx.ps_sigignore, as
         * we have to restart the process.
         */
        if (nsa->sa_handler == SIG_IGN ||
            (nsa->sa_handler == SIG_DFL && (prop & SA_IGNORE) != 0)) {
                /* Never to be seen again. */
                sigemptyset(&tset);
                sigaddset(&tset, signum);
                sigclearall(p, &tset, &kq);
                if (signum != SIGCONT) {
                        /* Easier in psignal */
                        sigaddset(&p->p_sigctx.ps_sigignore, signum);
                }
                sigdelset(&p->p_sigctx.ps_sigcatch, signum);
        } else {
                sigdelset(&p->p_sigctx.ps_sigignore, signum);
                if (nsa->sa_handler == SIG_DFL)
                        sigdelset(&p->p_sigctx.ps_sigcatch, signum);
                else
                        sigaddset(&p->p_sigctx.ps_sigcatch, signum);
        }

        /*
         * Previously held signals may now have become visible.  Ensure that
         * we check for them before returning to userspace.
         */
        if (sigispending(l, 0)) {
                lwp_lock(l);
                l->l_flag |= LW_PENDSIG;
                lwp_unlock(l);
        }
out:
        mutex_exit(p->p_lock);
        ksiginfo_queue_drain(&kq);

        return error;
}

int
sigprocmask1(struct lwp *l, int how, const sigset_t *nss, sigset_t *oss)
{
        sigset_t *mask = &l->l_sigmask;
        bool more;

        KASSERT(mutex_owned(l->l_proc->p_lock));

        if (oss) {
                *oss = *mask;
        }

        if (nss == NULL) {
                return 0;
        }

        switch (how) {
        case SIG_BLOCK:
                sigplusset(nss, mask);
                more = false;
                break;
        case SIG_UNBLOCK:
                sigminusset(nss, mask);
                more = true;
                break;
        case SIG_SETMASK:
                *mask = *nss;
                more = true;
                break;
        default:
                return EINVAL;
        }
        sigminusset(&sigcantmask, mask);
        if (more && sigispending(l, 0)) {
                /*
                 * Check for pending signals on return to user.
                 */
                lwp_lock(l);
                l->l_flag |= LW_PENDSIG;
                lwp_unlock(l);
        }
        return 0;
}

void
sigpending1(struct lwp *l, sigset_t *ss)
{
        struct proc *p = l->l_proc;

        mutex_enter(p->p_lock);
        *ss = l->l_sigpend.sp_set;
        sigplusset(&p->p_sigpend.sp_set, ss);
        mutex_exit(p->p_lock);
}

void
sigsuspendsetup(struct lwp *l, const sigset_t *ss)
{
        struct proc *p = l->l_proc;

        /*
         * When returning from sigsuspend/pselect/pollts, we want
         * the old mask to be restored after the
         * signal handler has finished.  Thus, we
         * save it here and mark the sigctx structure
         * to indicate this.
         */
        mutex_enter(p->p_lock);
        l->l_sigrestore = 1;
        l->l_sigoldmask = l->l_sigmask;
        l->l_sigmask = *ss;
        sigminusset(&sigcantmask, &l->l_sigmask);

        /* Check for pending signals when sleeping. */
        if (sigispending(l, 0)) {
                lwp_lock(l);
                l->l_flag |= LW_PENDSIG;
                lwp_unlock(l);
        }
        mutex_exit(p->p_lock);
}

void
sigsuspendteardown(struct lwp *l)
{
        struct proc *p = l->l_proc;

        mutex_enter(p->p_lock);
        /* Check for pending signals when sleeping. */
        if (l->l_sigrestore) {
                if (sigispending(l, 0)) {
                        lwp_lock(l);
                        l->l_flag |= LW_PENDSIG;
                        lwp_unlock(l);
                } else {
                        l->l_sigrestore = 0;
                        l->l_sigmask = l->l_sigoldmask;
                }
        }
        mutex_exit(p->p_lock);
}

int
sigsuspend1(struct lwp *l, const sigset_t *ss)
{

        if (ss)
                sigsuspendsetup(l, ss);

        while (kpause("pause", true, 0, NULL) == 0)
                ;

        /* always return EINTR rather than ERESTART... */
        return EINTR;
}

int
sigaltstack1(struct lwp *l, const stack_t *nss, stack_t *oss)
{
        struct proc *p = l->l_proc;
        int error = 0;

        mutex_enter(p->p_lock);

        if (oss)
                *oss = l->l_sigstk;

        if (nss) {
                if (nss->ss_flags & ~SS_ALLBITS)
                        error = EINVAL;
                else if (nss->ss_flags & SS_DISABLE) {
                        if (l->l_sigstk.ss_flags & SS_ONSTACK)
                                error = EINVAL;
                } else if (nss->ss_size < MINSIGSTKSZ)
                        error = ENOMEM;

                if (!error)
                        l->l_sigstk = *nss;
        }

        mutex_exit(p->p_lock);

        return error;
}

int
sigtimedwait1(struct lwp *l, const struct sys_____sigtimedwait50_args *uap,
    register_t *retval, copyin_t fetchss, copyout_t storeinf, copyin_t fetchts,
    copyout_t storets)
{
        /* {
                syscallarg(const sigset_t *) set;
                syscallarg(siginfo_t *) info;
                syscallarg(struct timespec *) timeout;
        } */
        struct proc *p = l->l_proc;
        int error, signum, timo;
        struct timespec ts, tsstart, tsnow;
        ksiginfo_t ksi;

        /*
         * Calculate timeout, if it was specified.
         *
         * NULL pointer means an infinite timeout.
         * {.tv_sec = 0, .tv_nsec = 0} means do not block.
         */
        if (SCARG(uap, timeout)) {
                error = (*fetchts)(SCARG(uap, timeout), &ts, sizeof(ts));
                if (error)
                        return error;

                if ((error = itimespecfix(&ts)) != 0)
                        return error;

                timo = tstohz(&ts);
                if (timo == 0) {
                        if (ts.tv_sec == 0 && ts.tv_nsec == 0)
                                timo = -1; /* do not block */
                        else
                                timo = 1; /* the shortest possible timeout */
                }

                /*
                 * Remember current uptime, it would be used in
                 * ECANCELED/ERESTART case.
                 */
                getnanouptime(&tsstart);
        } else {
                memset(&tsstart, 0, sizeof(tsstart)); /* XXXgcc */
                timo = 0; /* infinite timeout */
        }

        error = (*fetchss)(SCARG(uap, set), &l->l_sigwaitset,
            sizeof(l->l_sigwaitset));
        if (error)
                return error;

        /*
         * Silently ignore SA_CANTMASK signals. psignal1() would ignore
         * SA_CANTMASK signals in waitset, we do this only for the below
         * siglist check.
         */
        sigminusset(&sigcantmask, &l->l_sigwaitset);

        memset(&ksi.ksi_info, 0, sizeof(ksi.ksi_info));

        mutex_enter(p->p_lock);

        /* Check for pending signals in the process, if no - then in LWP. */
        if ((signum = sigget(&p->p_sigpend, &ksi, 0, &l->l_sigwaitset)) == 0)
                signum = sigget(&l->l_sigpend, &ksi, 0, &l->l_sigwaitset);

        if (signum != 0) {
                /* If found a pending signal, just copy it out to the user. */
                mutex_exit(p->p_lock);
                goto out;
        }

        if (timo < 0) {
                /* If not allowed to block, return an error */
                mutex_exit(p->p_lock);
                return EAGAIN;
        }

        /*
         * Set up the sigwait list and wait for signal to arrive.
         * We can either be woken up or time out.
         */
        l->l_sigwaited = &ksi;
        LIST_INSERT_HEAD(&p->p_sigwaiters, l, l_sigwaiter);
        error = cv_timedwait_sig(&l->l_sigcv, p->p_lock, timo);

        /*
         * Need to find out if we woke as a result of _lwp_wakeup() or a
         * signal outside our wait set.
         */
        if (l->l_sigwaited != NULL) {
                if (error == EINTR) {
                        /* Wakeup via _lwp_wakeup(). */
                        error = ECANCELED;
                } else if (!error) {
                        /* Spurious wakeup - arrange for syscall restart. */
                        error = ERESTART;
                }
                l->l_sigwaited = NULL;
                LIST_REMOVE(l, l_sigwaiter);
        }
        mutex_exit(p->p_lock);

        /*
         * If the sleep was interrupted (either by signal or wakeup), update
         * the timeout and copyout new value back.  It would be used when
         * the syscall would be restarted or called again.
         */
        if (timo && (error == ERESTART || error == ECANCELED)) {
                getnanouptime(&tsnow);

                /* Compute how much time has passed since start. */
                timespecsub(&tsnow, &tsstart, &tsnow);

                /* Subtract passed time from timeout. */
                timespecsub(&ts, &tsnow, &ts);

                if (ts.tv_sec < 0)
                        error = EAGAIN;
                else {
                        /* Copy updated timeout to userland. */
                        error = (*storets)(&ts, SCARG(uap, timeout),
                            sizeof(ts));
                }
        }
out:
        /*
         * If a signal from the wait set arrived, copy it to userland.
         * Copy only the used part of siginfo, the padding part is
         * left unchanged (userland is not supposed to touch it anyway).
         */
        if (error == 0 && SCARG(uap, info)) {
                error = (*storeinf)(&ksi.ksi_info, SCARG(uap, info),
                    sizeof(ksi.ksi_info));
        }
        if (error == 0) {
                *retval = ksi.ksi_info._signo;
                SDT_PROBE(proc, kernel, , signal__clear, *retval,
                    &ksi, 0, 0, 0);
        }
        return error;
}




































































































  453 













    6 
   20 


  421 



   20 

  438 




















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
/*        $NetBSD: bufq_impl.h,v 1.10 2016/11/16 00:46:46 pgoyette Exp $        */
/*        NetBSD: bufq.h,v 1.3 2005/03/31 11:28:53 yamt Exp        */
/*        NetBSD: buf.h,v 1.75 2004/09/18 16:40:11 yamt Exp         */

/*-
 * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)buf.h        8.9 (Berkeley) 3/30/95
 */

#if !defined(_KERNEL)
#error not supposed to be exposed to userland.
#endif

struct bufq_strat;

/*
 * Device driver buffer queue.
 */
struct bufq_state {
        void (*bq_put)(struct bufq_state *, struct buf *);
        struct buf *(*bq_get)(struct bufq_state *, int);
        struct buf *(*bq_cancel)(struct bufq_state *, struct buf *);
        void (*bq_fini)(struct bufq_state *);
        void *bq_private;
        int bq_flags;                        /* Flags from bufq_alloc() */
        struct bufq_strat *bq_strat;
};

static __inline void *bufq_private(const struct bufq_state *) __unused;
static __inline bool buf_inorder(const struct buf *, const struct buf *, int)
    __unused;

#include <sys/null.h> /* for NULL */

static __inline void *
bufq_private(const struct bufq_state *bufq)
{

        return bufq->bq_private;
}

/*
 * Check if two buf's are in ascending order.
 *
 * this function consider a NULL buf is after any non-NULL buf.
 *
 * this function returns false if two are "same".
 */
static __inline bool
buf_inorder(const struct buf *bp, const struct buf *bq, int sortby)
{

        KASSERT(bp != NULL || bq != NULL);
        if (bp == NULL || bq == NULL)
                return (bq == NULL);

        if (sortby == BUFQ_SORT_CYLINDER) {
                if (bp->b_cylinder != bq->b_cylinder)
                        return bp->b_cylinder < bq->b_cylinder;
                else
                        return bp->b_rawblkno < bq->b_rawblkno;
        } else
                return bp->b_rawblkno < bq->b_rawblkno;
}

struct bufq_strat {
        const char *bs_name;
        void (*bs_initfn)(struct bufq_state *);
        int bs_prio;
        int bs_refcnt;
        SLIST_ENTRY(bufq_strat) bs_next;
};

#define        BUFQ_DEFINE(name, prio, initfn)                        \
static struct bufq_strat bufq_strat_##name = {                \
        .bs_name = #name,                                \
        .bs_prio = prio,                                \
        .bs_initfn = initfn,                                \
        .bs_refcnt = 0                                        \
};

int bufq_register(struct bufq_strat *);
int bufq_unregister(struct bufq_strat *);
















































































































































































































































































































































    1 
















   41 





   41 
    3 
   41 
   41 
   41 


    1 




    1 






    2 
    2 



























































    2 














































    2 






    1 























    1 



































































































































































































































































































































    8 

















    7 


    7 
    7 














































    3 
















    3 










    3 
    2 






















    1 

    1 


    1 

















    1 












































    1 





    1 


    1 







    1 





    1 


    1 












































   38 





























   15 




























































    1 
























    7 

    7 











    6 


    2 

    2 







    2 

    4 










    3 


    3 


    3 




    3 



    2 
































    2 





    2 

    2 


















































































































































































   38 

















   38 








   23 




    4 




    1 





    1 
    1 

    1 



    1 


















































   34 

























































































































































































































   14 


    2 








    2 
    2 

    2 


    2 
    2 

    2 





   14 






















































































































































































































































































































































































































   11 
   10 





    9 











    9 
    9 




   10 
























































































































































































































































































































































































































































    1 






















    2 







    2 
    1 

































































































































































    2 






















    2 
    2 


    2 



    2 




    2 




    2 




    2 




    2 




    2 




    2 





    2 





    2 
    1 
    2 


    2 





    2 







































































































































    1 

    1 



















































































































    1 








































    1 


















































































































































































































































































































































































































    2 




























































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
/*        $NetBSD: rf_netbsdkintf.c,v 1.408 2022/08/10 01:16:38 mrg Exp $        */

/*-
 * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Greg Oster; Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1988 University of Utah.
 * Copyright (c) 1990, 1993
 *      The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the Systems Programming Group of the University of Utah Computer
 * Science Department.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from: Utah $Hdr: cd.c 1.6 90/11/28$
 *
 *      @(#)cd.c        8.2 (Berkeley) 11/16/93
 */

/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Authors: Mark Holland, Jim Zelenka
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/***********************************************************
 *
 * rf_kintf.c -- the kernel interface routines for RAIDframe
 *
 ***********************************************************/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.408 2022/08/10 01:16:38 mrg Exp $");

#ifdef _KERNEL_OPT
#include "opt_raid_autoconfig.h"
#include "opt_compat_netbsd32.h"
#endif

#include <sys/param.h>
#include <sys/errno.h>
#include <sys/pool.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/disk.h>
#include <sys/device.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/systm.h>
#include <sys/vnode.h>
#include <sys/disklabel.h>
#include <sys/conf.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/reboot.h>
#include <sys/kauth.h>
#include <sys/module.h>
#include <sys/compat_stub.h>

#include <prop/proplib.h>

#include <dev/raidframe/raidframevar.h>
#include <dev/raidframe/raidframeio.h>
#include <dev/raidframe/rf_paritymap.h>

#include "rf_raid.h"
#include "rf_copyback.h"
#include "rf_dag.h"
#include "rf_dagflags.h"
#include "rf_desc.h"
#include "rf_diskqueue.h"
#include "rf_etimer.h"
#include "rf_general.h"
#include "rf_kintf.h"
#include "rf_options.h"
#include "rf_driver.h"
#include "rf_parityscan.h"
#include "rf_threadstuff.h"

#include "ioconf.h"

#ifdef DEBUG
int     rf_kdebug_level = 0;
#define db1_printf(a) if (rf_kdebug_level > 0) printf a
#else                                /* DEBUG */
#define db1_printf(a) { }
#endif                                /* DEBUG */

#if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
static rf_declare_mutex2(rf_sparet_wait_mutex);
static rf_declare_cond2(rf_sparet_wait_cv);
static rf_declare_cond2(rf_sparet_resp_cv);

static RF_SparetWait_t *rf_sparet_wait_queue;        /* requests to install a
                                                 * spare table */
static RF_SparetWait_t *rf_sparet_resp_queue;        /* responses from
                                                 * installation process */
#endif

const int rf_b_pass = (B_PHYS|B_RAW|B_MEDIA_FLAGS);

MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");

/* prototypes */
static void KernelWakeupFunc(struct buf *);
static void InitBP(struct buf *, struct vnode *, unsigned,
    dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
    void *, int);
static void raidinit(struct raid_softc *);
static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);

static int raid_match(device_t, cfdata_t, void *);
static void raid_attach(device_t, device_t, void *);
static int raid_detach(device_t, int);

static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
    daddr_t, daddr_t);
static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
    daddr_t, daddr_t, int);

static int raidwrite_component_label(unsigned,
    dev_t, struct vnode *, RF_ComponentLabel_t *);
static int raidread_component_label(unsigned,
    dev_t, struct vnode *, RF_ComponentLabel_t *);

static int raid_diskstart(device_t, struct buf *bp);
static int raid_dumpblocks(device_t, void *, daddr_t, int);
static int raid_lastclose(device_t);

static dev_type_open(raidopen);
static dev_type_close(raidclose);
static dev_type_read(raidread);
static dev_type_write(raidwrite);
static dev_type_ioctl(raidioctl);
static dev_type_strategy(raidstrategy);
static dev_type_dump(raiddump);
static dev_type_size(raidsize);

const struct bdevsw raid_bdevsw = {
        .d_open = raidopen,
        .d_close = raidclose,
        .d_strategy = raidstrategy,
        .d_ioctl = raidioctl,
        .d_dump = raiddump,
        .d_psize = raidsize,
        .d_discard = nodiscard,
        .d_flag = D_DISK
};

const struct cdevsw raid_cdevsw = {
        .d_open = raidopen,
        .d_close = raidclose,
        .d_read = raidread,
        .d_write = raidwrite,
        .d_ioctl = raidioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_DISK
};

static struct dkdriver rf_dkdriver = {
        .d_open = raidopen,
        .d_close = raidclose,
        .d_strategy = raidstrategy,
        .d_diskstart = raid_diskstart,
        .d_dumpblocks = raid_dumpblocks,
        .d_lastclose = raid_lastclose,
        .d_minphys = minphys
};

#define        raidunit(x)        DISKUNIT(x)
#define        raidsoftc(dev)        (((struct raid_softc *)device_private(dev))->sc_r.softc)

extern struct cfdriver raid_cd;
CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
    raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
    DVF_DETACH_SHUTDOWN);

/* Internal representation of a rf_recon_req */
struct rf_recon_req_internal {
        RF_RowCol_t col;
        RF_ReconReqFlags_t flags;
        void   *raidPtr;
};

/*
 * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
 * Be aware that large numbers can allow the driver to consume a lot of
 * kernel memory, especially on writes, and in degraded mode reads.
 *
 * For example: with a stripe width of 64 blocks (32k) and 5 disks,
 * a single 64K write will typically require 64K for the old data,
 * 64K for the old parity, and 64K for the new parity, for a total
 * of 192K (if the parity buffer is not re-used immediately).
 * Even it if is used immediately, that's still 128K, which when multiplied
 * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
 *
 * Now in degraded mode, for example, a 64K read on the above setup may
 * require data reconstruction, which will require *all* of the 4 remaining
 * disks to participate -- 4 * 32K/disk == 128K again.
 */

#ifndef RAIDOUTSTANDING
#define RAIDOUTSTANDING   6
#endif

#define RAIDLABELDEV(dev)        \
        (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))

/* declared here, and made public, for the benefit of KVM stuff.. */

static int raidlock(struct raid_softc *);
static void raidunlock(struct raid_softc *);

static int raid_detach_unlocked(struct raid_softc *);

static void rf_markalldirty(RF_Raid_t *);
static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);

static void rf_ReconThread(struct rf_recon_req_internal *);
static void rf_RewriteParityThread(RF_Raid_t *raidPtr);
static void rf_CopybackThread(RF_Raid_t *raidPtr);
static void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
static int rf_autoconfig(device_t);
static int rf_rescan(void);
static void rf_buildroothack(RF_ConfigSet_t *);

static RF_AutoConfig_t *rf_find_raid_components(void);
static RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
static void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
static int rf_set_autoconfig(RF_Raid_t *, int);
static int rf_set_rootpartition(RF_Raid_t *, int);
static void rf_release_all_vps(RF_ConfigSet_t *);
static void rf_cleanup_config_set(RF_ConfigSet_t *);
static int rf_have_enough_components(RF_ConfigSet_t *);
static struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);

/*
 * Debugging, mostly.  Set to 0 to not allow autoconfig to take place.
 * Note that this is overridden by having RAID_AUTOCONFIG as an option
 * in the kernel config file.
 */
#ifdef RAID_AUTOCONFIG
int raidautoconfig = 1;
#else
int raidautoconfig = 0;
#endif
static bool raidautoconfigdone = false;

struct pool rf_alloclist_pool;   /* AllocList */

static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
static kmutex_t raid_lock;

static struct raid_softc *
raidcreate(int unit) {
        struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
        sc->sc_unit = unit;
        cv_init(&sc->sc_cv, "raidunit");
        mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
        return sc;
}

static void
raiddestroy(struct raid_softc *sc) {
        cv_destroy(&sc->sc_cv);
        mutex_destroy(&sc->sc_mutex);
        kmem_free(sc, sizeof(*sc));
}

static struct raid_softc *
raidget(int unit, bool create) {
        struct raid_softc *sc;
        if (unit < 0) {
#ifdef DIAGNOSTIC
                panic("%s: unit %d!", __func__, unit);
#endif
                return NULL;
        }
        mutex_enter(&raid_lock);
        LIST_FOREACH(sc, &raids, sc_link) {
                if (sc->sc_unit == unit) {
                        mutex_exit(&raid_lock);
                        return sc;
                }
        }
        mutex_exit(&raid_lock);
        if (!create)
                return NULL;
        sc = raidcreate(unit);
        mutex_enter(&raid_lock);
        LIST_INSERT_HEAD(&raids, sc, sc_link);
        mutex_exit(&raid_lock);
        return sc;
}

static void
raidput(struct raid_softc *sc) {
        mutex_enter(&raid_lock);
        LIST_REMOVE(sc, sc_link);
        mutex_exit(&raid_lock);
        raiddestroy(sc);
}

void
raidattach(int num)
{

        /*
         * Device attachment and associated initialization now occurs
         * as part of the module initialization.
         */
}

static int
rf_autoconfig(device_t self)
{
        RF_AutoConfig_t *ac_list;
        RF_ConfigSet_t *config_sets;

        if (!raidautoconfig || raidautoconfigdone == true)
                return 0;

        /* XXX This code can only be run once. */
        raidautoconfigdone = true;

#ifdef __HAVE_CPU_BOOTCONF
        /*
         * 0. find the boot device if needed first so we can use it later
         * this needs to be done before we autoconfigure any raid sets,
         * because if we use wedges we are not going to be able to open
         * the boot device later
         */
        if (booted_device == NULL)
                cpu_bootconf();
#endif
        /* 1. locate all RAID components on the system */
        aprint_debug("Searching for RAID components...\n");
        ac_list = rf_find_raid_components();

        /* 2. Sort them into their respective sets. */
        config_sets = rf_create_auto_sets(ac_list);

        /*
         * 3. Evaluate each set and configure the valid ones.
         * This gets done in rf_buildroothack().
         */
        rf_buildroothack(config_sets);

        return 1;
}

int
rf_inited(const struct raid_softc *rs) {
        return (rs->sc_flags & RAIDF_INITED) != 0;
}

RF_Raid_t *
rf_get_raid(struct raid_softc *rs) {
        return &rs->sc_r;
}

int
rf_get_unit(const struct raid_softc *rs) {
        return rs->sc_unit;
}

static int
rf_containsboot(RF_Raid_t *r, device_t bdv) {
        const char *bootname;
        size_t len;

        /* if bdv is NULL, the set can't contain it. exit early. */
        if (bdv == NULL)
                return 0;

        bootname = device_xname(bdv);
        len = strlen(bootname);

        for (int col = 0; col < r->numCol; col++) {
                const char *devname = r->Disks[col].devname;
                devname += sizeof("/dev/") - 1;
                if (strncmp(devname, "dk", 2) == 0) {
                        const char *parent =
                            dkwedge_get_parent_name(r->Disks[col].dev);
                        if (parent != NULL)
                                devname = parent;
                }
                if (strncmp(devname, bootname, len) == 0) {
                        struct raid_softc *sc = r->softc;
                        aprint_debug("raid%d includes boot device %s\n",
                            sc->sc_unit, devname);
                        return 1;
                }
        }
        return 0;
}

static int
rf_rescan(void)
{
        RF_AutoConfig_t *ac_list;
        RF_ConfigSet_t *config_sets, *cset, *next_cset;
        struct raid_softc *sc;
        int raid_added;
        
        ac_list = rf_find_raid_components();
        config_sets = rf_create_auto_sets(ac_list);

        raid_added = 1;
        while (raid_added > 0) {
                raid_added = 0;
                cset = config_sets;
                while (cset != NULL) {
                        next_cset = cset->next;
                        if (rf_have_enough_components(cset) &&
                            cset->ac->clabel->autoconfigure == 1) {
                                sc = rf_auto_config_set(cset);
                                if (sc != NULL) {
                                        aprint_debug("raid%d: configured ok, rootable %d\n",
                                                     sc->sc_unit, cset->rootable);
                                        /* We added one RAID set */
                                        raid_added++;
                                } else {
                                        /* The autoconfig didn't work :( */
                                        aprint_debug("Autoconfig failed\n");
                                        rf_release_all_vps(cset);
                                }
                        } else {
                                /* we're not autoconfiguring this set...
                                   release the associated resources */
                                rf_release_all_vps(cset);
                        }
                        /* cleanup */
                        rf_cleanup_config_set(cset);
                        cset = next_cset;
                }
                if (raid_added > 0) {
                        /* We added at least one RAID set, so re-scan for recursive RAID */
                        ac_list = rf_find_raid_components();
                        config_sets = rf_create_auto_sets(ac_list);
                }
        }
        
        return 0;
}


static void
rf_buildroothack(RF_ConfigSet_t *config_sets)
{
        RF_AutoConfig_t *ac_list;
        RF_ConfigSet_t *cset;
        RF_ConfigSet_t *next_cset;
        int num_root;
        int raid_added;
        struct raid_softc *sc, *rsc;
        struct dk_softc *dksc = NULL;        /* XXX gcc -Os: may be used uninit. */

        sc = rsc = NULL;
        num_root = 0;

        raid_added = 1;
        while (raid_added > 0) {
                raid_added = 0;
                cset = config_sets;
                while (cset != NULL) {
                        next_cset = cset->next;
                        if (rf_have_enough_components(cset) &&
                            cset->ac->clabel->autoconfigure == 1) {
                                sc = rf_auto_config_set(cset);
                                if (sc != NULL) {
                                        aprint_debug("raid%d: configured ok, rootable %d\n",
                                                     sc->sc_unit, cset->rootable);
                                        /* We added one RAID set */
                                        raid_added++;
                                        if (cset->rootable) {
                                                rsc = sc;
                                                num_root++;
                                        }
                                } else {
                                        /* The autoconfig didn't work :( */
                                        aprint_debug("Autoconfig failed\n");
                                        rf_release_all_vps(cset);
                                }
                        } else {
                                /* we're not autoconfiguring this set...
                                   release the associated resources */
                                rf_release_all_vps(cset);
                        }
                        /* cleanup */
                        rf_cleanup_config_set(cset);
                        cset = next_cset;
                }
                if (raid_added > 0) {
                        /* We added at least one RAID set, so re-scan for recursive RAID */
                        ac_list = rf_find_raid_components();
                        config_sets = rf_create_auto_sets(ac_list);
                }
        }
        
        /* if the user has specified what the root device should be
           then we don't touch booted_device or boothowto... */

        if (rootspec != NULL) {
                aprint_debug("%s: rootspec %s\n", __func__, rootspec);
                return;
        }

        /* we found something bootable... */

        /*
         * XXX: The following code assumes that the root raid
         * is the first ('a') partition. This is about the best
         * we can do with a BSD disklabel, but we might be able
         * to do better with a GPT label, by setting a specified
         * attribute to indicate the root partition. We can then
         * stash the partition number in the r->root_partition
         * high bits (the bottom 2 bits are already used). For
         * now we just set booted_partition to 0 when we override
         * root.
         */
        if (num_root == 1) {
                device_t candidate_root;
                dksc = &rsc->sc_dksc;
                if (dksc->sc_dkdev.dk_nwedges != 0) {
                        char cname[sizeof(cset->ac->devname)];
                        /* XXX: assume partition 'a' first */
                        snprintf(cname, sizeof(cname), "%s%c",
                            device_xname(dksc->sc_dev), 'a');
                        candidate_root = dkwedge_find_by_wname(cname);
                        aprint_debug("%s: candidate wedge root=%s\n", __func__,
                            cname);
                        if (candidate_root == NULL) {
                                /*
                                 * If that is not found, because we don't use
                                 * disklabel, return the first dk child
                                 * XXX: we can skip the 'a' check above
                                 * and always do this...
                                 */
                                size_t i = 0;
                                candidate_root = dkwedge_find_by_parent(
                                    device_xname(dksc->sc_dev), &i);
                        }
                        aprint_debug("%s: candidate wedge root=%p\n", __func__,
                            candidate_root);
                } else
                        candidate_root = dksc->sc_dev;
                aprint_debug("%s: candidate root=%p booted_device=%p "
                             "root_partition=%d contains_boot=%d\n",
                    __func__, candidate_root, booted_device,
                    rsc->sc_r.root_partition,
                    rf_containsboot(&rsc->sc_r, booted_device));
                /* XXX the check for booted_device == NULL can probably be
                 * dropped, now that rf_containsboot handles that case.
                 */
                if (booted_device == NULL ||
                    rsc->sc_r.root_partition == 1 ||
                    rf_containsboot(&rsc->sc_r, booted_device)) {
                        booted_device = candidate_root;
                        booted_method = "raidframe/single";
                        booted_partition = 0;        /* XXX assume 'a' */
                        aprint_debug("%s: set booted_device=%s(%p)\n", __func__,
                            device_xname(booted_device), booted_device);
                }
        } else if (num_root > 1) {
                aprint_debug("%s: many roots=%d, %p\n", __func__, num_root,
                    booted_device);

                /*
                 * Maybe the MD code can help. If it cannot, then
                 * setroot() will discover that we have no
                 * booted_device and will ask the user if nothing was
                 * hardwired in the kernel config file
                 */
                if (booted_device == NULL)
                        return;

                num_root = 0;
                mutex_enter(&raid_lock);
                LIST_FOREACH(sc, &raids, sc_link) {
                        RF_Raid_t *r = &sc->sc_r;
                        if (r->valid == 0)
                                continue;

                        if (r->root_partition == 0)
                                continue;

                        if (rf_containsboot(r, booted_device)) {
                                num_root++;
                                rsc = sc;
                                dksc = &rsc->sc_dksc;
                        }
                }
                mutex_exit(&raid_lock);

                if (num_root == 1) {
                        booted_device = dksc->sc_dev;
                        booted_method = "raidframe/multi";
                        booted_partition = 0;        /* XXX assume 'a' */
                } else {
                        /* we can't guess.. require the user to answer... */
                        boothowto |= RB_ASKNAME;
                }
        }
}

static int
raidsize(dev_t dev)
{
        struct raid_softc *rs;
        struct dk_softc *dksc;
        unsigned int unit;

        unit = raidunit(dev);
        if ((rs = raidget(unit, false)) == NULL)
                return -1;
        dksc = &rs->sc_dksc;

        if ((rs->sc_flags & RAIDF_INITED) == 0)
                return -1;

        return dk_size(dksc, dev);
}

static int
raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
{
        unsigned int unit;
        struct raid_softc *rs;
        struct dk_softc *dksc;

        unit = raidunit(dev);
        if ((rs = raidget(unit, false)) == NULL)
                return ENXIO;
        dksc = &rs->sc_dksc;

        if ((rs->sc_flags & RAIDF_INITED) == 0)
                return ENODEV;

        /*
           Note that blkno is relative to this particular partition.
           By adding adding RF_PROTECTED_SECTORS, we get a value that
           is relative to the partition used for the underlying component.
        */
        blkno += RF_PROTECTED_SECTORS;

        return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE);
}

static int
raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
{
        struct raid_softc *rs = raidsoftc(dev);
        const struct bdevsw *bdev;
        RF_Raid_t *raidPtr;
        int     c, sparecol, j, scol, dumpto;
        int     error = 0;

        raidPtr = &rs->sc_r;

        /* we only support dumping to RAID 1 sets */
        if (raidPtr->Layout.numDataCol != 1 ||
            raidPtr->Layout.numParityCol != 1)
                return EINVAL;

        if ((error = raidlock(rs)) != 0)
                return error;

        /* figure out what device is alive.. */

        /*
           Look for a component to dump to.  The preference for the
           component to dump to is as follows:
           1) the first component
           2) a used_spare of the first component
           3) the second component
           4) a used_spare of the second component
        */

        dumpto = -1;
        for (c = 0; c < raidPtr->numCol; c++) {
                if (raidPtr->Disks[c].status == rf_ds_optimal) {
                        /* this might be the one */
                        dumpto = c;
                        break;
                }
        }

        /*
           At this point we have possibly selected a live component.
           If we didn't find a live ocmponent, we now check to see
           if there is a relevant spared component.
        */

        for (c = 0; c < raidPtr->numSpare; c++) {
                sparecol = raidPtr->numCol + c;
                if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
                        /* How about this one? */
                        scol = -1;
                        for(j=0;j<raidPtr->numCol;j++) {
                                if (raidPtr->Disks[j].spareCol == sparecol) {
                                        scol = j;
                                        break;
                                }
                        }
                        if (scol == 0) {
                                /*
                                   We must have found a spared first
                                   component!  We'll take that over
                                   anything else found so far.  (We
                                   couldn't have found a real first
                                   component before, since this is a
                                   used spare, and it's saying that
                                   it's replacing the first
                                   component.)  On reboot (with
                                   autoconfiguration turned on)
                                   sparecol will become the first
                                   component (component0) of this set.
                                */
                                dumpto = sparecol;
                                break;
                        } else if (scol != -1) {
                                /*
                                   Must be a spared second component.
                                   We'll dump to that if we havn't found
                                   anything else so far.
                                */
                                if (dumpto == -1)
                                        dumpto = sparecol;
                        }
                }
        }

        if (dumpto == -1) {
                /* we couldn't find any live components to dump to!?!?
                 */
                error = EINVAL;
                goto out;
        }

        bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
        if (bdev == NULL) {
                error = ENXIO;
                goto out;
        }

        error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
                                blkno, va, nblk * raidPtr->bytesPerSector);

out:
        raidunlock(rs);

        return error;
}

/* ARGSUSED */
static int
raidopen(dev_t dev, int flags, int fmt,
    struct lwp *l)
{
        int     unit = raidunit(dev);
        struct raid_softc *rs;
        struct dk_softc *dksc;
        int     error = 0;
        int     part, pmask;

        if ((rs = raidget(unit, true)) == NULL)
                return ENXIO;
        if ((error = raidlock(rs)) != 0)
                return error;

        if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
                error = EBUSY;
                goto bad;
        }

        dksc = &rs->sc_dksc;

        part = DISKPART(dev);
        pmask = (1 << part);

        if (!DK_BUSY(dksc, pmask) &&
            ((rs->sc_flags & RAIDF_INITED) != 0)) {
                /* First one... mark things as dirty... Note that we *MUST*
                 have done a configure before this.  I DO NOT WANT TO BE
                 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
                 THAT THEY BELONG TOGETHER!!!!! */
                /* XXX should check to see if we're only open for reading
                   here... If so, we needn't do this, but then need some
                   other way of keeping track of what's happened.. */

                rf_markalldirty(&rs->sc_r);
        }

        if ((rs->sc_flags & RAIDF_INITED) != 0)
                error = dk_open(dksc, dev, flags, fmt, l);

bad:
        raidunlock(rs);

        return error;


}

static int
raid_lastclose(device_t self)
{
        struct raid_softc *rs = raidsoftc(self);

        /* Last one... device is not unconfigured yet.
           Device shutdown has taken care of setting the
           clean bits if RAIDF_INITED is not set
           mark things as clean... */

        rf_update_component_labels(&rs->sc_r,
            RF_FINAL_COMPONENT_UPDATE);

        /* pass to unlocked code */
        if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
                rs->sc_flags |= RAIDF_DETACH;

        return 0;
}

/* ARGSUSED */
static int
raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
{
        int     unit = raidunit(dev);
        struct raid_softc *rs;
        struct dk_softc *dksc;
        cfdata_t cf;
        int     error = 0, do_detach = 0, do_put = 0;

        if ((rs = raidget(unit, false)) == NULL)
                return ENXIO;
        dksc = &rs->sc_dksc;

        if ((error = raidlock(rs)) != 0)
                return error;

        if ((rs->sc_flags & RAIDF_INITED) != 0) {
                error = dk_close(dksc, dev, flags, fmt, l);
                if ((rs->sc_flags & RAIDF_DETACH) != 0)
                        do_detach = 1;
        } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
                do_put = 1;

        raidunlock(rs);

        if (do_detach) {
                /* free the pseudo device attach bits */
                cf = device_cfdata(dksc->sc_dev);
                error = config_detach(dksc->sc_dev, 0);
                if (error == 0)
                        free(cf, M_RAIDFRAME);
        } else if (do_put) {
                raidput(rs);
        }

        return error;

}

static void
raid_wakeup(RF_Raid_t *raidPtr)
{
        rf_lock_mutex2(raidPtr->iodone_lock);
        rf_signal_cond2(raidPtr->iodone_cv);
        rf_unlock_mutex2(raidPtr->iodone_lock);
}

static void
raidstrategy(struct buf *bp)
{
        unsigned int unit;
        struct raid_softc *rs;
        struct dk_softc *dksc;
        RF_Raid_t *raidPtr;

        unit = raidunit(bp->b_dev);
        if ((rs = raidget(unit, false)) == NULL) {
                bp->b_error = ENXIO;
                goto fail;
        }
        if ((rs->sc_flags & RAIDF_INITED) == 0) {
                bp->b_error = ENXIO;
                goto fail;
        }
        dksc = &rs->sc_dksc;
        raidPtr = &rs->sc_r;

        /* Queue IO only */
        if (dk_strategy_defer(dksc, bp))
                goto done;

        /* schedule the IO to happen at the next convenient time */
        raid_wakeup(raidPtr);

done:
        return;

fail:
        bp->b_resid = bp->b_bcount;
        biodone(bp);
}

static int
raid_diskstart(device_t dev, struct buf *bp)
{
        struct raid_softc *rs = raidsoftc(dev);
        RF_Raid_t *raidPtr;

        raidPtr = &rs->sc_r;
        if (!raidPtr->valid) {
                db1_printf(("raid is not valid..\n"));
                return ENODEV;
        }

        /* XXX */
        bp->b_resid = 0;

        return raiddoaccess(raidPtr, bp);
}

void
raiddone(RF_Raid_t *raidPtr, struct buf *bp)
{
        struct raid_softc *rs;
        struct dk_softc *dksc;

        rs = raidPtr->softc;
        dksc = &rs->sc_dksc;

        dk_done(dksc, bp);

        rf_lock_mutex2(raidPtr->mutex);
        raidPtr->openings++;
        rf_unlock_mutex2(raidPtr->mutex);

        /* schedule more IO */
        raid_wakeup(raidPtr);
}

/* ARGSUSED */
static int
raidread(dev_t dev, struct uio *uio, int flags)
{
        int     unit = raidunit(dev);
        struct raid_softc *rs;

        if ((rs = raidget(unit, false)) == NULL)
                return ENXIO;

        if ((rs->sc_flags & RAIDF_INITED) == 0)
                return ENXIO;

        return physio(raidstrategy, NULL, dev, B_READ, minphys, uio);

}

/* ARGSUSED */
static int
raidwrite(dev_t dev, struct uio *uio, int flags)
{
        int     unit = raidunit(dev);
        struct raid_softc *rs;

        if ((rs = raidget(unit, false)) == NULL)
                return ENXIO;

        if ((rs->sc_flags & RAIDF_INITED) == 0)
                return ENXIO;

        return physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio);

}

static int
raid_detach_unlocked(struct raid_softc *rs)
{
        struct dk_softc *dksc = &rs->sc_dksc;
        RF_Raid_t *raidPtr;
        int error;

        raidPtr = &rs->sc_r;

        if (DK_BUSY(dksc, 0) ||
            raidPtr->recon_in_progress != 0 ||
            raidPtr->parity_rewrite_in_progress != 0 ||
            raidPtr->copyback_in_progress != 0)
                return EBUSY;

        if ((rs->sc_flags & RAIDF_INITED) == 0)
                return 0;

        rs->sc_flags &= ~RAIDF_SHUTDOWN;

        if ((error = rf_Shutdown(raidPtr)) != 0)
                return error;

        rs->sc_flags &= ~RAIDF_INITED;

        /* Kill off any queued buffers */
        dk_drain(dksc);
        bufq_free(dksc->sc_bufq);

        /* Detach the disk. */
        dkwedge_delall(&dksc->sc_dkdev);
        disk_detach(&dksc->sc_dkdev);
        disk_destroy(&dksc->sc_dkdev);
        dk_detach(dksc);

        return 0;
}

static bool
rf_must_be_initialized(const struct raid_softc *rs, u_long cmd)
{
        switch (cmd) {
        case RAIDFRAME_ADD_HOT_SPARE:
        case RAIDFRAME_CHECK_COPYBACK_STATUS:
        case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
        case RAIDFRAME_CHECK_PARITY:
        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
        case RAIDFRAME_CHECK_RECON_STATUS:
        case RAIDFRAME_CHECK_RECON_STATUS_EXT:
        case RAIDFRAME_COPYBACK:
        case RAIDFRAME_DELETE_COMPONENT:
        case RAIDFRAME_FAIL_DISK:
        case RAIDFRAME_GET_ACCTOTALS:
        case RAIDFRAME_GET_COMPONENT_LABEL:
        case RAIDFRAME_GET_INFO:
        case RAIDFRAME_GET_SIZE:
        case RAIDFRAME_INCORPORATE_HOT_SPARE:
        case RAIDFRAME_INIT_LABELS:
        case RAIDFRAME_KEEP_ACCTOTALS:
        case RAIDFRAME_PARITYMAP_GET_DISABLE:
        case RAIDFRAME_PARITYMAP_SET_DISABLE:
        case RAIDFRAME_PARITYMAP_SET_PARAMS:
        case RAIDFRAME_PARITYMAP_STATUS:
        case RAIDFRAME_REBUILD_IN_PLACE:
        case RAIDFRAME_REMOVE_HOT_SPARE:
        case RAIDFRAME_RESET_ACCTOTALS:
        case RAIDFRAME_REWRITEPARITY:
        case RAIDFRAME_SET_AUTOCONFIG:
        case RAIDFRAME_SET_COMPONENT_LABEL:
        case RAIDFRAME_SET_ROOT:
                return (rs->sc_flags & RAIDF_INITED) == 0;
        }
        return false;
}

int
rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
{
        struct rf_recon_req_internal *rrint;

        if (raidPtr->Layout.map->faultsTolerated == 0) {
                /* Can't do this on a RAID 0!! */
                return EINVAL;
        }

        if (rr->col < 0 || rr->col >= raidPtr->numCol) {
                /* bad column */
                return EINVAL;
        }

        rf_lock_mutex2(raidPtr->mutex);
        if (raidPtr->status == rf_rs_reconstructing) {
                /* you can't fail a disk while we're reconstructing! */
                /* XXX wrong for RAID6 */
                goto out;
        }
        if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
            (raidPtr->numFailures > 0)) {
                /* some other component has failed.  Let's not make
                   things worse. XXX wrong for RAID6 */
                goto out;
        }
        if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
                /* Can't fail a spared disk! */
                goto out;
        }
        rf_unlock_mutex2(raidPtr->mutex);

        /* make a copy of the recon request so that we don't rely on
         * the user's buffer */
        rrint = RF_Malloc(sizeof(*rrint));
        if (rrint == NULL)
                return(ENOMEM);
        rrint->col = rr->col;
        rrint->flags = rr->flags;
        rrint->raidPtr = raidPtr;

        return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
            rrint, "raid_recon");
out:
        rf_unlock_mutex2(raidPtr->mutex);
        return EINVAL;
}

static int
rf_copyinspecificbuf(RF_Config_t *k_cfg)
{
        /* allocate a buffer for the layout-specific data, and copy it in */
        if (k_cfg->layoutSpecificSize == 0)
                return 0;

        if (k_cfg->layoutSpecificSize > 10000) {
            /* sanity check */
            return EINVAL;
        }

        u_char *specific_buf;
        specific_buf =  RF_Malloc(k_cfg->layoutSpecificSize);
        if (specific_buf == NULL)
                return ENOMEM;

        int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
            k_cfg->layoutSpecificSize);
        if (retcode) {
                RF_Free(specific_buf, k_cfg->layoutSpecificSize);
                db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
                return retcode;
        }

        k_cfg->layoutSpecific = specific_buf;
        return 0;
}

static int
rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
{
        RF_Config_t *u_cfg = *((RF_Config_t **) data);

        if (rs->sc_r.valid) {
                /* There is a valid RAID set running on this unit! */
                printf("raid%d: Device already configured!\n", rs->sc_unit);
                return EINVAL;
        }

        /* copy-in the configuration information */
        /* data points to a pointer to the configuration structure */
        *k_cfg = RF_Malloc(sizeof(**k_cfg));
        if (*k_cfg == NULL) {
                return ENOMEM;
        }
        int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t));
        if (retcode == 0)
                return 0;
        RF_Free(*k_cfg, sizeof(RF_Config_t));
        db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
        rs->sc_flags |= RAIDF_SHUTDOWN;
        return retcode;
}

int
rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
{
        int retcode, i;
        RF_Raid_t *raidPtr = &rs->sc_r;

        rs->sc_flags &= ~RAIDF_SHUTDOWN;

        if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
                goto out;

        /* should do some kind of sanity check on the configuration.
         * Store the sum of all the bytes in the last byte? */

        /* Force nul-termination on all strings. */
#define ZERO_FINAL(s)        do { s[sizeof(s) - 1] = '\0'; } while (0)
        for (i = 0; i < RF_MAXCOL; i++) {
                ZERO_FINAL(k_cfg->devnames[0][i]);
        }
        for (i = 0; i < RF_MAXSPARE; i++) {
                ZERO_FINAL(k_cfg->spare_names[i]);
        }
        for (i = 0; i < RF_MAXDBGV; i++) {
                ZERO_FINAL(k_cfg->debugVars[i]);
        }
#undef ZERO_FINAL

        /* Check some basic limits. */
        if (k_cfg->numCol >= RF_MAXCOL || k_cfg->numCol < 0) {
                retcode = EINVAL;
                goto out;
        }
        if (k_cfg->numSpare >= RF_MAXSPARE || k_cfg->numSpare < 0) {
                retcode = EINVAL;
                goto out;
        }

        /* configure the system */

        /*
         * Clear the entire RAID descriptor, just to make sure
         *  there is no stale data left in the case of a
         *  reconfiguration
         */
        memset(raidPtr, 0, sizeof(*raidPtr));
        raidPtr->softc = rs;
        raidPtr->raidid = rs->sc_unit;

        retcode = rf_Configure(raidPtr, k_cfg, NULL);

        if (retcode == 0) {
                /* allow this many simultaneous IO's to
                   this RAID device */
                raidPtr->openings = RAIDOUTSTANDING;

                raidinit(rs);
                raid_wakeup(raidPtr);
                rf_markalldirty(raidPtr);
        }

        /* free the buffers.  No return code here. */
        if (k_cfg->layoutSpecificSize) {
                RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
        }
out:
        RF_Free(k_cfg, sizeof(RF_Config_t));
        if (retcode) {
                /*
                 * If configuration failed, set sc_flags so that we
                 * will detach the device when we close it.
                 */
                rs->sc_flags |= RAIDF_SHUTDOWN;
        }
        return retcode;
}

#if RF_DISABLED
static int
rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
{

        /* XXX check the label for valid stuff... */
        /* Note that some things *should not* get modified --
           the user should be re-initing the labels instead of
           trying to patch things.
           */
#ifdef DEBUG
        int raidid = raidPtr->raidid;
        printf("raid%d: Got component label:\n", raidid);
        printf("raid%d: Version: %d\n", raidid, clabel->version);
        printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
        printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
        printf("raid%d: Column: %d\n", raidid, clabel->column);
        printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
        printf("raid%d: Clean: %d\n", raidid, clabel->clean);
        printf("raid%d: Status: %d\n", raidid, clabel->status);
#endif        /* DEBUG */
        clabel->row = 0;
        int column = clabel->column;

        if ((column < 0) || (column >= raidPtr->numCol)) {
                return(EINVAL);
        }

        /* XXX this isn't allowed to do anything for now :-) */

        /* XXX and before it is, we need to fill in the rest
           of the fields!?!?!?! */
        memcpy(raidget_component_label(raidPtr, column),
            clabel, sizeof(*clabel));
        raidflush_component_label(raidPtr, column);
        return 0;
}
#endif

static int
rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
{
        /*
           we only want the serial number from
           the above.  We get all the rest of the information
           from the config that was used to create this RAID
           set.
           */

        raidPtr->serial_number = clabel->serial_number;

        for (int column = 0; column < raidPtr->numCol; column++) {
                RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
                if (RF_DEAD_DISK(diskPtr->status))
                        continue;
                RF_ComponentLabel_t *ci_label = raidget_component_label(
                    raidPtr, column);
                /* Zeroing this is important. */
                memset(ci_label, 0, sizeof(*ci_label));
                raid_init_component_label(raidPtr, ci_label);
                ci_label->serial_number = raidPtr->serial_number;
                ci_label->row = 0; /* we dont' pretend to support more */
                rf_component_label_set_partitionsize(ci_label,
                    diskPtr->partitionSize);
                ci_label->column = column;
                raidflush_component_label(raidPtr, column);
                /* XXXjld what about the spares? */
        }

        return 0;
}

static int
rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
{

        if (raidPtr->Layout.map->faultsTolerated == 0) {
                /* Can't do this on a RAID 0!! */
                return EINVAL;
        }

        if (raidPtr->recon_in_progress == 1) {
                /* a reconstruct is already in progress! */
                return EINVAL;
        }

        RF_SingleComponent_t component;
        memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
        component.row = 0; /* we don't support any more */
        int column = component.column;

        if ((column < 0) || (column >= raidPtr->numCol)) {
                return EINVAL;
        }

        rf_lock_mutex2(raidPtr->mutex);
        if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
            (raidPtr->numFailures > 0)) {
                /* XXX 0 above shouldn't be constant!!! */
                /* some component other than this has failed.
                   Let's not make things worse than they already
                   are... */
                printf("raid%d: Unable to reconstruct to disk at:\n",
                       raidPtr->raidid);
                printf("raid%d:     Col: %d   Too many failures.\n",
                       raidPtr->raidid, column);
                rf_unlock_mutex2(raidPtr->mutex);
                return EINVAL;
        }

        if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
                printf("raid%d: Unable to reconstruct to disk at:\n",
                       raidPtr->raidid);
                printf("raid%d:    Col: %d   "
                    "Reconstruction already occurring!\n",
                    raidPtr->raidid, column);

                rf_unlock_mutex2(raidPtr->mutex);
                return EINVAL;
        }

        if (raidPtr->Disks[column].status == rf_ds_spared) {
                rf_unlock_mutex2(raidPtr->mutex);
                return EINVAL;
        }

        rf_unlock_mutex2(raidPtr->mutex);

        struct rf_recon_req_internal *rrint;
        rrint = RF_Malloc(sizeof(*rrint));
        if (rrint == NULL)
                return ENOMEM;

        rrint->col = column;
        rrint->raidPtr = raidPtr;

        return RF_CREATE_THREAD(raidPtr->recon_thread,
            rf_ReconstructInPlaceThread, rrint, "raid_reconip");
}

static int
rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
{
        /*
         * This makes no sense on a RAID 0, or if we are not reconstructing
         * so tell the user it's done.
         */
        if (raidPtr->Layout.map->faultsTolerated == 0 ||
            raidPtr->status != rf_rs_reconstructing) {
                *data = 100;
                return 0;
        }
        if (raidPtr->reconControl->numRUsTotal == 0) {
                *data = 0;
                return 0;
        }
        *data = (raidPtr->reconControl->numRUsComplete * 100
            / raidPtr->reconControl->numRUsTotal);
        return 0;
}

/*
 * Copy a RF_SingleComponent_t from 'data', ensuring nul-termination
 * on the component_name[] array.
 */
static void
rf_copy_single_component(RF_SingleComponent_t *component, void *data)
{

        memcpy(component, data, sizeof *component);
        component->component_name[sizeof(component->component_name) - 1] = '\0';
}

static int
raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        int     unit = raidunit(dev);
        int     part, pmask;
        struct raid_softc *rs;
        struct dk_softc *dksc;
        RF_Config_t *k_cfg;
        RF_Raid_t *raidPtr;
        RF_AccTotals_t *totals;
        RF_SingleComponent_t component;
        RF_DeviceConfig_t *d_cfg, *ucfgp;
        int retcode = 0;
        int column;
        RF_ComponentLabel_t *clabel;
        int d;

        if ((rs = raidget(unit, false)) == NULL)
                return ENXIO;

        dksc = &rs->sc_dksc;
        raidPtr = &rs->sc_r;

        db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
            (int) DISKPART(dev), (int) unit, cmd));

        /* Must be initialized for these... */
        if (rf_must_be_initialized(rs, cmd))
                return ENXIO;

        switch (cmd) {
                /* configure the system */
        case RAIDFRAME_CONFIGURE:
                if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
                        return retcode;
                return rf_construct(rs, k_cfg);

                /* shutdown the system */
        case RAIDFRAME_SHUTDOWN:

                part = DISKPART(dev);
                pmask = (1 << part);

                if ((retcode = raidlock(rs)) != 0)
                        return retcode;

                if (DK_BUSY(dksc, pmask) ||
                    raidPtr->recon_in_progress != 0 ||
                    raidPtr->parity_rewrite_in_progress != 0 ||
                    raidPtr->copyback_in_progress != 0)
                        retcode = EBUSY;
                else {
                        /* detach and free on close */
                        rs->sc_flags |= RAIDF_SHUTDOWN;
                        retcode = 0;
                }

                raidunlock(rs);

                return retcode;
        case RAIDFRAME_GET_COMPONENT_LABEL:
                return rf_get_component_label(raidPtr, data);

#if RF_DISABLED
        case RAIDFRAME_SET_COMPONENT_LABEL:
                return rf_set_component_label(raidPtr, data);
#endif

        case RAIDFRAME_INIT_LABELS:
                return rf_init_component_label(raidPtr, data);

        case RAIDFRAME_SET_AUTOCONFIG:
                d = rf_set_autoconfig(raidPtr, *(int *) data);
                printf("raid%d: New autoconfig value is: %d\n",
                       raidPtr->raidid, d);
                *(int *) data = d;
                return retcode;

        case RAIDFRAME_SET_ROOT:
                d = rf_set_rootpartition(raidPtr, *(int *) data);
                printf("raid%d: New rootpartition value is: %d\n",
                       raidPtr->raidid, d);
                *(int *) data = d;
                return retcode;

                /* initialize all parity */
        case RAIDFRAME_REWRITEPARITY:

                if (raidPtr->Layout.map->faultsTolerated == 0) {
                        /* Parity for RAID 0 is trivially correct */
                        raidPtr->parity_good = RF_RAID_CLEAN;
                        return 0;
                }

                if (raidPtr->parity_rewrite_in_progress == 1) {
                        /* Re-write is already in progress! */
                        return EINVAL;
                }

                return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
                    rf_RewriteParityThread, raidPtr,"raid_parity");

        case RAIDFRAME_ADD_HOT_SPARE:
                rf_copy_single_component(&component, data);
                return rf_add_hot_spare(raidPtr, &component);

        case RAIDFRAME_REMOVE_HOT_SPARE:
                return retcode;

        case RAIDFRAME_DELETE_COMPONENT:
                rf_copy_single_component(&component, data);
                return rf_delete_component(raidPtr, &component);

        case RAIDFRAME_INCORPORATE_HOT_SPARE:
                rf_copy_single_component(&component, data);
                return rf_incorporate_hot_spare(raidPtr, &component);

        case RAIDFRAME_REBUILD_IN_PLACE:
                return rf_rebuild_in_place(raidPtr, data);
                
        case RAIDFRAME_GET_INFO:
                ucfgp = *(RF_DeviceConfig_t **)data;
                d_cfg = RF_Malloc(sizeof(*d_cfg));
                if (d_cfg == NULL)
                        return ENOMEM;
                retcode = rf_get_info(raidPtr, d_cfg);
                if (retcode == 0) {
                        retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg));
                }
                RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
                return retcode;

        case RAIDFRAME_CHECK_PARITY:
                *(int *) data = raidPtr->parity_good;
                return 0;

        case RAIDFRAME_PARITYMAP_STATUS:
                if (rf_paritymap_ineligible(raidPtr))
                        return EINVAL;
                rf_paritymap_status(raidPtr->parity_map, data);
                return 0;

        case RAIDFRAME_PARITYMAP_SET_PARAMS:
                if (rf_paritymap_ineligible(raidPtr))
                        return EINVAL;
                if (raidPtr->parity_map == NULL)
                        return ENOENT; /* ??? */
                if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
                        return EINVAL;
                return 0;

        case RAIDFRAME_PARITYMAP_GET_DISABLE:
                if (rf_paritymap_ineligible(raidPtr))
                        return EINVAL;
                *(int *) data = rf_paritymap_get_disable(raidPtr);
                return 0;

        case RAIDFRAME_PARITYMAP_SET_DISABLE:
                if (rf_paritymap_ineligible(raidPtr))
                        return EINVAL;
                rf_paritymap_set_disable(raidPtr, *(int *)data);
                /* XXX should errors be passed up? */
                return 0;

        case RAIDFRAME_RESCAN:
                return rf_rescan();

        case RAIDFRAME_RESET_ACCTOTALS:
                memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
                return 0;

        case RAIDFRAME_GET_ACCTOTALS:
                totals = (RF_AccTotals_t *) data;
                *totals = raidPtr->acc_totals;
                return 0;

        case RAIDFRAME_KEEP_ACCTOTALS:
                raidPtr->keep_acc_totals = *(int *)data;
                return 0;

        case RAIDFRAME_GET_SIZE:
                *(int *) data = raidPtr->totalSectors;
                return 0;

        case RAIDFRAME_FAIL_DISK:
                return rf_fail_disk(raidPtr, data);

                /* invoke a copyback operation after recon on whatever disk
                 * needs it, if any */
        case RAIDFRAME_COPYBACK:

                if (raidPtr->Layout.map->faultsTolerated == 0) {
                        /* This makes no sense on a RAID 0!! */
                        return EINVAL;
                }

                if (raidPtr->copyback_in_progress == 1) {
                        /* Copyback is already in progress! */
                        return EINVAL;
                }

                return RF_CREATE_THREAD(raidPtr->copyback_thread,
                    rf_CopybackThread, raidPtr, "raid_copyback");

                /* return the percentage completion of reconstruction */
        case RAIDFRAME_CHECK_RECON_STATUS:
                return rf_check_recon_status(raidPtr, data);

        case RAIDFRAME_CHECK_RECON_STATUS_EXT:
                rf_check_recon_status_ext(raidPtr, data);
                return 0;

        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
                if (raidPtr->Layout.map->faultsTolerated == 0) {
                        /* This makes no sense on a RAID 0, so tell the
                           user it's done. */
                        *(int *) data = 100;
                        return 0;
                }
                if (raidPtr->parity_rewrite_in_progress == 1) {
                        *(int *) data = 100 *
                                raidPtr->parity_rewrite_stripes_done /
                                raidPtr->Layout.numStripe;
                } else {
                        *(int *) data = 100;
                }
                return 0;

        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
                rf_check_parityrewrite_status_ext(raidPtr, data);
                return 0;

        case RAIDFRAME_CHECK_COPYBACK_STATUS:
                if (raidPtr->Layout.map->faultsTolerated == 0) {
                        /* This makes no sense on a RAID 0 */
                        *(int *) data = 100;
                        return 0;
                }
                if (raidPtr->copyback_in_progress == 1) {
                        *(int *) data = 100 * raidPtr->copyback_stripes_done /
                                raidPtr->Layout.numStripe;
                } else {
                        *(int *) data = 100;
                }
                return 0;

        case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
                rf_check_copyback_status_ext(raidPtr, data);
                return 0;

        case RAIDFRAME_SET_LAST_UNIT:
                for (column = 0; column < raidPtr->numCol; column++)
                        if (raidPtr->Disks[column].status != rf_ds_optimal)
                                return EBUSY;

                for (column = 0; column < raidPtr->numCol; column++) {
                        clabel = raidget_component_label(raidPtr, column);
                        clabel->last_unit = *(int *)data;
                        raidflush_component_label(raidPtr, column);
                }
                rs->sc_cflags |= RAIDF_UNIT_CHANGED;
                return 0;

                /* the sparetable daemon calls this to wait for the kernel to
                 * need a spare table. this ioctl does not return until a
                 * spare table is needed. XXX -- calling mpsleep here in the
                 * ioctl code is almost certainly wrong and evil. -- XXX XXX
                 * -- I should either compute the spare table in the kernel,
                 * or have a different -- XXX XXX -- interface (a different
                 * character device) for delivering the table     -- XXX */
#if RF_DISABLED
        case RAIDFRAME_SPARET_WAIT:
                rf_lock_mutex2(rf_sparet_wait_mutex);
                while (!rf_sparet_wait_queue)
                        rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
                RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
                rf_sparet_wait_queue = rf_sparet_wait_queue->next;
                rf_unlock_mutex2(rf_sparet_wait_mutex);

                /* structure assignment */
                *((RF_SparetWait_t *) data) = *waitreq;

                RF_Free(waitreq, sizeof(*waitreq));
                return 0;

                /* wakes up a process waiting on SPARET_WAIT and puts an error
                 * code in it that will cause the dameon to exit */
        case RAIDFRAME_ABORT_SPARET_WAIT:
                waitreq = RF_Malloc(sizeof(*waitreq));
                waitreq->fcol = -1;
                rf_lock_mutex2(rf_sparet_wait_mutex);
                waitreq->next = rf_sparet_wait_queue;
                rf_sparet_wait_queue = waitreq;
                rf_broadcast_cond2(rf_sparet_wait_cv);
                rf_unlock_mutex2(rf_sparet_wait_mutex);
                return 0;

                /* used by the spare table daemon to deliver a spare table
                 * into the kernel */
        case RAIDFRAME_SEND_SPARET:

                /* install the spare table */
                retcode = rf_SetSpareTable(raidPtr, *(void **) data);

                /* respond to the requestor.  the return status of the spare
                 * table installation is passed in the "fcol" field */
                waitred = RF_Malloc(sizeof(*waitreq));
                waitreq->fcol = retcode;
                rf_lock_mutex2(rf_sparet_wait_mutex);
                waitreq->next = rf_sparet_resp_queue;
                rf_sparet_resp_queue = waitreq;
                rf_broadcast_cond2(rf_sparet_resp_cv);
                rf_unlock_mutex2(rf_sparet_wait_mutex);

                return retcode;
#endif
        default:
                /*
                 * Don't bother trying to load compat modules
                 * if it is not our ioctl. This is more efficient
                 * and makes rump tests not depend on compat code
                 */
                if (IOCGROUP(cmd) != 'r')
                        break;
#ifdef _LP64
                if ((l->l_proc->p_flag & PK_32) != 0) {
                        module_autoload("compat_netbsd32_raid",
                            MODULE_CLASS_EXEC);
                        MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook,
                            (rs, cmd, data), enosys(), retcode);
                        if (retcode != EPASSTHROUGH)
                                return retcode;
                }
#endif
                module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
                MODULE_HOOK_CALL(raidframe_ioctl_80_hook,
                    (rs, cmd, data), enosys(), retcode);
                if (retcode != EPASSTHROUGH)
                        return retcode;

                module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
                MODULE_HOOK_CALL(raidframe_ioctl_50_hook,
                    (rs, cmd, data), enosys(), retcode);
                if (retcode != EPASSTHROUGH)
                        return retcode;
                break; /* fall through to the os-specific code below */

        }

        if (!raidPtr->valid)
                return EINVAL;

        /*
         * Add support for "regular" device ioctls here.
         */

        switch (cmd) {
        case DIOCGCACHE:
                retcode = rf_get_component_caches(raidPtr, (int *)data);
                break;

        case DIOCCACHESYNC:
                retcode = rf_sync_component_caches(raidPtr, *(int *)data);
                break;

        default:
                retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
                break;
        }

        return retcode;

}


/* raidinit -- complete the rest of the initialization for the
   RAIDframe device.  */


static void
raidinit(struct raid_softc *rs)
{
        cfdata_t cf;
        unsigned int unit;
        struct dk_softc *dksc = &rs->sc_dksc;
        RF_Raid_t *raidPtr = &rs->sc_r;
        device_t dev;

        unit = raidPtr->raidid;

        /* XXX doesn't check bounds. */
        snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);

        /* attach the pseudo device */
        cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
        cf->cf_name = raid_cd.cd_name;
        cf->cf_atname = raid_cd.cd_name;
        cf->cf_unit = unit;
        cf->cf_fstate = FSTATE_STAR;

        dev = config_attach_pseudo(cf);
        if (dev == NULL) {
                printf("raid%d: config_attach_pseudo failed\n",
                    raidPtr->raidid);
                free(cf, M_RAIDFRAME);
                return;
        }

        /* provide a backpointer to the real softc */
        raidsoftc(dev) = rs;

        /* disk_attach actually creates space for the CPU disklabel, among
         * other things, so it's critical to call this *BEFORE* we try putzing
         * with disklabels. */
        dk_init(dksc, dev, DKTYPE_RAID);
        disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);

        /* XXX There may be a weird interaction here between this, and
         * protectedSectors, as used in RAIDframe.  */

        rs->sc_size = raidPtr->totalSectors;

        /* Attach dk and disk subsystems */
        dk_attach(dksc);
        disk_attach(&dksc->sc_dkdev);
        rf_set_geometry(rs, raidPtr);

        bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);

        /* mark unit as usuable */
        rs->sc_flags |= RAIDF_INITED;

        dkwedge_discover(&dksc->sc_dkdev);
}

#if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
/* wake up the daemon & tell it to get us a spare table
 * XXX
 * the entries in the queues should be tagged with the raidPtr
 * so that in the extremely rare case that two recons happen at once,
 * we know for which device were requesting a spare table
 * XXX
 *
 * XXX This code is not currently used. GO
 */
int
rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
{
        int     retcode;

        rf_lock_mutex2(rf_sparet_wait_mutex);
        req->next = rf_sparet_wait_queue;
        rf_sparet_wait_queue = req;
        rf_broadcast_cond2(rf_sparet_wait_cv);

        /* mpsleep unlocks the mutex */
        while (!rf_sparet_resp_queue) {
                rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
        }
        req = rf_sparet_resp_queue;
        rf_sparet_resp_queue = req->next;
        rf_unlock_mutex2(rf_sparet_wait_mutex);

        retcode = req->fcol;
        RF_Free(req, sizeof(*req));        /* this is not the same req as we
                                         * alloc'd */
        return retcode;
}
#endif

/* a wrapper around rf_DoAccess that extracts appropriate info from the
 * bp & passes it down.
 * any calls originating in the kernel must use non-blocking I/O
 * do some extra sanity checking to return "appropriate" error values for
 * certain conditions (to make some standard utilities work)
 *
 * Formerly known as: rf_DoAccessKernel
 */
void
raidstart(RF_Raid_t *raidPtr)
{
        struct raid_softc *rs;
        struct dk_softc *dksc;

        rs = raidPtr->softc;
        dksc = &rs->sc_dksc;
        /* quick check to see if anything has died recently */
        rf_lock_mutex2(raidPtr->mutex);
        if (raidPtr->numNewFailures > 0) {
                rf_unlock_mutex2(raidPtr->mutex);
                rf_update_component_labels(raidPtr,
                                           RF_NORMAL_COMPONENT_UPDATE);
                rf_lock_mutex2(raidPtr->mutex);
                raidPtr->numNewFailures--;
        }
        rf_unlock_mutex2(raidPtr->mutex);

        if ((rs->sc_flags & RAIDF_INITED) == 0) {
                printf("raid%d: raidstart not ready\n", raidPtr->raidid);
                return;
        }

        dk_start(dksc, NULL);
}

static int
raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
{
        RF_SectorCount_t num_blocks, pb, sum;
        RF_RaidAddr_t raid_addr;
        daddr_t blocknum;
        int rc;

        rf_lock_mutex2(raidPtr->mutex);
        if (raidPtr->openings == 0) {
                rf_unlock_mutex2(raidPtr->mutex);
                return EAGAIN;
        }
        rf_unlock_mutex2(raidPtr->mutex);

        blocknum = bp->b_rawblkno;

        db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
                    (int) blocknum));

        db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
        db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));

        /* *THIS* is where we adjust what block we're going to...
         * but DO NOT TOUCH bp->b_blkno!!! */
        raid_addr = blocknum;

        num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
        pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
        sum = raid_addr + num_blocks + pb;
        if (1 || rf_debugKernelAccess) {
                db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
                            (int) raid_addr, (int) sum, (int) num_blocks,
                            (int) pb, (int) bp->b_resid));
        }
        if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
            || (sum < num_blocks) || (sum < pb)) {
                rc = ENOSPC;
                goto done;
        }
        /*
         * XXX rf_DoAccess() should do this, not just DoAccessKernel()
         */

        if (bp->b_bcount & raidPtr->sectorMask) {
                rc = ENOSPC;
                goto done;
        }
        db1_printf(("Calling DoAccess..\n"));


        rf_lock_mutex2(raidPtr->mutex);
        raidPtr->openings--;
        rf_unlock_mutex2(raidPtr->mutex);

        /* don't ever condition on bp->b_flags & B_WRITE.
         * always condition on B_READ instead */

        rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
                         RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
                         raid_addr, num_blocks,
                         bp->b_data, bp, RF_DAG_NONBLOCKING_IO);

done:
        return rc;
}

/* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */

int
rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
{
        int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
        struct buf *bp;

        req->queue = queue;
        bp = req->bp;

        switch (req->type) {
        case RF_IO_TYPE_NOP:        /* used primarily to unlock a locked queue */
                /* XXX need to do something extra here.. */
                /* I'm leaving this in, as I've never actually seen it used,
                 * and I'd like folks to report it... GO */
                printf("%s: WAKEUP CALLED\n", __func__);
                queue->numOutstanding++;

                bp->b_flags = 0;
                bp->b_private = req;

                KernelWakeupFunc(bp);
                break;

        case RF_IO_TYPE_READ:
        case RF_IO_TYPE_WRITE:
#if RF_ACC_TRACE > 0
                if (req->tracerec) {
                        RF_ETIMER_START(req->tracerec->timer);
                }
#endif
                InitBP(bp, queue->rf_cinfo->ci_vp,
                    op, queue->rf_cinfo->ci_dev,
                    req->sectorOffset, req->numSector,
                    req->buf, KernelWakeupFunc, (void *) req,
                    queue->raidPtr->logBytesPerSector);

                if (rf_debugKernelAccess) {
                        db1_printf(("dispatch: bp->b_blkno = %ld\n",
                                (long) bp->b_blkno));
                }
                queue->numOutstanding++;
                queue->last_deq_sector = req->sectorOffset;
                /* acc wouldn't have been let in if there were any pending
                 * reqs at any other priority */
                queue->curPriority = req->priority;

                db1_printf(("Going for %c to unit %d col %d\n",
                            req->type, queue->raidPtr->raidid,
                            queue->col));
                db1_printf(("sector %d count %d (%d bytes) %d\n",
                        (int) req->sectorOffset, (int) req->numSector,
                        (int) (req->numSector <<
                            queue->raidPtr->logBytesPerSector),
                        (int) queue->raidPtr->logBytesPerSector));

                /*
                 * XXX: drop lock here since this can block at
                 * least with backing SCSI devices.  Retake it
                 * to minimize fuss with calling interfaces.
                 */

                RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
                bdev_strategy(bp);
                RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
                break;

        default:
                panic("bad req->type in rf_DispatchKernelIO");
        }
        db1_printf(("Exiting from DispatchKernelIO\n"));

        return 0;
}
/* this is the callback function associated with a I/O invoked from
   kernel code.
 */
static void
KernelWakeupFunc(struct buf *bp)
{
        RF_DiskQueueData_t *req = NULL;
        RF_DiskQueue_t *queue;

        db1_printf(("recovering the request queue:\n"));

        req = bp->b_private;

        queue = (RF_DiskQueue_t *) req->queue;

        rf_lock_mutex2(queue->raidPtr->iodone_lock);

#if RF_ACC_TRACE > 0
        if (req->tracerec) {
                RF_ETIMER_STOP(req->tracerec->timer);
                RF_ETIMER_EVAL(req->tracerec->timer);
                rf_lock_mutex2(rf_tracing_mutex);
                req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
                req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
                req->tracerec->num_phys_ios++;
                rf_unlock_mutex2(rf_tracing_mutex);
        }
#endif

        /* XXX Ok, let's get aggressive... If b_error is set, let's go
         * ballistic, and mark the component as hosed... */

        if (bp->b_error != 0) {
                /* Mark the disk as dead */
                /* but only mark it once... */
                /* and only if it wouldn't leave this RAID set
                   completely broken */
                if (((queue->raidPtr->Disks[queue->col].status ==
                      rf_ds_optimal) ||
                     (queue->raidPtr->Disks[queue->col].status ==
                      rf_ds_used_spare)) &&
                     (queue->raidPtr->numFailures <
                      queue->raidPtr->Layout.map->faultsTolerated)) {
                        printf("raid%d: IO Error (%d). Marking %s as failed.\n",
                               queue->raidPtr->raidid,
                               bp->b_error,
                               queue->raidPtr->Disks[queue->col].devname);
                        queue->raidPtr->Disks[queue->col].status =
                            rf_ds_failed;
                        queue->raidPtr->status = rf_rs_degraded;
                        queue->raidPtr->numFailures++;
                        queue->raidPtr->numNewFailures++;
                } else {        /* Disk is already dead... */
                        /* printf("Disk already marked as dead!\n"); */
                }

        }

        /* Fill in the error value */
        req->error = bp->b_error;

        /* Drop this one on the "finished" queue... */
        TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);

        /* Let the raidio thread know there is work to be done. */
        rf_signal_cond2(queue->raidPtr->iodone_cv);

        rf_unlock_mutex2(queue->raidPtr->iodone_lock);
}


/*
 * initialize a buf structure for doing an I/O in the kernel.
 */
static void
InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
       RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
       void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector)
{
        bp->b_flags = rw_flag | (bp->b_flags & rf_b_pass);
        bp->b_oflags = 0;
        bp->b_cflags = 0;
        bp->b_bcount = numSect << logBytesPerSector;
        bp->b_bufsize = bp->b_bcount;
        bp->b_error = 0;
        bp->b_dev = dev;
        bp->b_data = bf;
        bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
        bp->b_resid = bp->b_bcount;        /* XXX is this right!??!?!! */
        if (bp->b_bcount == 0) {
                panic("bp->b_bcount is zero in InitBP!!");
        }
        bp->b_iodone = cbFunc;
        bp->b_private = cbArg;
}

/*
 * Wait interruptibly for an exclusive lock.
 *
 * XXX
 * Several drivers do this; it should be abstracted and made MP-safe.
 * (Hmm... where have we seen this warning before :->  GO )
 */
static int
raidlock(struct raid_softc *rs)
{
        int     error;

        error = 0;
        mutex_enter(&rs->sc_mutex);
        while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
                rs->sc_flags |= RAIDF_WANTED;
                error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
                if (error != 0)
                        goto done;
        }
        rs->sc_flags |= RAIDF_LOCKED;
done:
        mutex_exit(&rs->sc_mutex);
        return error;
}
/*
 * Unlock and wake up any waiters.
 */
static void
raidunlock(struct raid_softc *rs)
{

        mutex_enter(&rs->sc_mutex);
        rs->sc_flags &= ~RAIDF_LOCKED;
        if ((rs->sc_flags & RAIDF_WANTED) != 0) {
                rs->sc_flags &= ~RAIDF_WANTED;
                cv_broadcast(&rs->sc_cv);
        }
        mutex_exit(&rs->sc_mutex);
}


#define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
#define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
#define RF_PARITY_MAP_SIZE   RF_PARITYMAP_NBYTE

static daddr_t
rf_component_info_offset(void)
{

        return RF_COMPONENT_INFO_OFFSET;
}

static daddr_t
rf_component_info_size(unsigned secsize)
{
        daddr_t info_size;

        KASSERT(secsize);
        if (secsize > RF_COMPONENT_INFO_SIZE)
                info_size = secsize;
        else
                info_size = RF_COMPONENT_INFO_SIZE;

        return info_size;
}

static daddr_t
rf_parity_map_offset(RF_Raid_t *raidPtr)
{
        daddr_t map_offset;

        KASSERT(raidPtr->bytesPerSector);
        if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
                map_offset = raidPtr->bytesPerSector;
        else
                map_offset = RF_COMPONENT_INFO_SIZE;
        map_offset += rf_component_info_offset();

        return map_offset;
}

static daddr_t
rf_parity_map_size(RF_Raid_t *raidPtr)
{
        daddr_t map_size;

        if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
                map_size = raidPtr->bytesPerSector;
        else
                map_size = RF_PARITY_MAP_SIZE;

        return map_size;
}

int
raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
{
        RF_ComponentLabel_t *clabel;

        clabel = raidget_component_label(raidPtr, col);
        clabel->clean = RF_RAID_CLEAN;
        raidflush_component_label(raidPtr, col);
        return(0);
}


int
raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
{
        RF_ComponentLabel_t *clabel;

        clabel = raidget_component_label(raidPtr, col);
        clabel->clean = RF_RAID_DIRTY;
        raidflush_component_label(raidPtr, col);
        return(0);
}

int
raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
{
        KASSERT(raidPtr->bytesPerSector);

        return raidread_component_label(raidPtr->bytesPerSector,
            raidPtr->Disks[col].dev,
            raidPtr->raid_cinfo[col].ci_vp,
            &raidPtr->raid_cinfo[col].ci_label);
}

RF_ComponentLabel_t *
raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
{
        return &raidPtr->raid_cinfo[col].ci_label;
}

int
raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
{
        RF_ComponentLabel_t *label;

        label = &raidPtr->raid_cinfo[col].ci_label;
        label->mod_counter = raidPtr->mod_counter;
#ifndef RF_NO_PARITY_MAP
        label->parity_map_modcount = label->mod_counter;
#endif
        return raidwrite_component_label(raidPtr->bytesPerSector,
            raidPtr->Disks[col].dev,
            raidPtr->raid_cinfo[col].ci_vp, label);
}

/*
 * Swap the label endianness.
 *
 * Everything in the component label is 4-byte-swapped except the version,
 * which is kept in the byte-swapped version at all times, and indicates
 * for the writer that a swap is necessary.
 *
 * For reads it is expected that out_label == clabel, but writes expect
 * separate labels so only the re-swapped label is written out to disk,
 * leaving the swapped-except-version internally.
 *
 * Only support swapping label version 2.
 */
static void
rf_swap_label(RF_ComponentLabel_t *clabel, RF_ComponentLabel_t *out_label)
{
        int        *in, *out, *in_last;

        KASSERT(clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION));

        /* Don't swap the label, but do copy it. */
        out_label->version = clabel->version;

        in = &clabel->serial_number;
        in_last = &clabel->future_use2[42];
        out = &out_label->serial_number;

        for (; in < in_last; in++, out++)
                *out = bswap32(*in);
}

static int
raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
    RF_ComponentLabel_t *clabel)
{
        int error;

        error = raidread_component_area(dev, b_vp, clabel,
            sizeof(RF_ComponentLabel_t),
            rf_component_info_offset(),
            rf_component_info_size(secsize));

        if (error == 0 &&
            clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
                rf_swap_label(clabel, clabel);
        }

        return error;
}

/* ARGSUSED */
static int
raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
    size_t msize, daddr_t offset, daddr_t dsize)
{
        struct buf *bp;
        int error;

        /* XXX should probably ensure that we don't try to do this if
           someone has changed rf_protected_sectors. */

        if (b_vp == NULL) {
                /* For whatever reason, this component is not valid.
                   Don't try to read a component label from it. */
                return(EINVAL);
        }

        /* get a block of the appropriate size... */
        bp = geteblk((int)dsize);
        bp->b_dev = dev;

        /* get our ducks in a row for the read */
        bp->b_blkno = offset / DEV_BSIZE;
        bp->b_bcount = dsize;
        bp->b_flags |= B_READ;
         bp->b_resid = dsize;

        bdev_strategy(bp);
        error = biowait(bp);

        if (!error) {
                memcpy(data, bp->b_data, msize);
        }

        brelse(bp, 0);
        return(error);
}

static int
raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
    RF_ComponentLabel_t *clabel)
{
        RF_ComponentLabel_t *clabel_write = clabel;
        RF_ComponentLabel_t lclabel;
        int error;

        if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
                clabel_write = &lclabel;
                rf_swap_label(clabel, clabel_write);
        }
        error = raidwrite_component_area(dev, b_vp, clabel_write,
            sizeof(RF_ComponentLabel_t),
            rf_component_info_offset(),
            rf_component_info_size(secsize), 0);

        return error;
}

/* ARGSUSED */
static int
raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
    size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
{
        struct buf *bp;
        int error;

        /* get a block of the appropriate size... */
        bp = geteblk((int)dsize);
        bp->b_dev = dev;

        /* get our ducks in a row for the write */
        bp->b_blkno = offset / DEV_BSIZE;
        bp->b_bcount = dsize;
        bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
         bp->b_resid = dsize;

        memset(bp->b_data, 0, dsize);
        memcpy(bp->b_data, data, msize);

        bdev_strategy(bp);
        if (asyncp)
                return 0;
        error = biowait(bp);
        brelse(bp, 0);
        if (error) {
#if 1
                printf("Failed to write RAID component info!\n");
#endif
        }

        return(error);
}

void
rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
{
        int c;

        for (c = 0; c < raidPtr->numCol; c++) {
                /* Skip dead disks. */
                if (RF_DEAD_DISK(raidPtr->Disks[c].status))
                        continue;
                /* XXXjld: what if an error occurs here? */
                raidwrite_component_area(raidPtr->Disks[c].dev,
                    raidPtr->raid_cinfo[c].ci_vp, map,
                    RF_PARITYMAP_NBYTE,
                    rf_parity_map_offset(raidPtr),
                    rf_parity_map_size(raidPtr), 0);
        }
}

void
rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
{
        struct rf_paritymap_ondisk tmp;
        int c,first;

        first=1;
        for (c = 0; c < raidPtr->numCol; c++) {
                /* Skip dead disks. */
                if (RF_DEAD_DISK(raidPtr->Disks[c].status))
                        continue;
                raidread_component_area(raidPtr->Disks[c].dev,
                    raidPtr->raid_cinfo[c].ci_vp, &tmp,
                    RF_PARITYMAP_NBYTE,
                    rf_parity_map_offset(raidPtr),
                    rf_parity_map_size(raidPtr));
                if (first) {
                        memcpy(map, &tmp, sizeof(*map));
                        first = 0;
                } else {
                        rf_paritymap_merge(map, &tmp);
                }
        }
}

void
rf_markalldirty(RF_Raid_t *raidPtr)
{
        RF_ComponentLabel_t *clabel;
        int sparecol;
        int c;
        int j;
        int scol = -1;

        raidPtr->mod_counter++;
        for (c = 0; c < raidPtr->numCol; c++) {
                /* we don't want to touch (at all) a disk that has
                   failed */
                if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
                        clabel = raidget_component_label(raidPtr, c);
                        if (clabel->status == rf_ds_spared) {
                                /* XXX do something special...
                                   but whatever you do, don't
                                   try to access it!! */
                        } else {
                                raidmarkdirty(raidPtr, c);
                        }
                }
        }

        for( c = 0; c < raidPtr->numSpare ; c++) {
                sparecol = raidPtr->numCol + c;
                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
                        /*

                           we claim this disk is "optimal" if it's
                           rf_ds_used_spare, as that means it should be
                           directly substitutable for the disk it replaced.
                           We note that too...

                         */

                        for(j=0;j<raidPtr->numCol;j++) {
                                if (raidPtr->Disks[j].spareCol == sparecol) {
                                        scol = j;
                                        break;
                                }
                        }

                        clabel = raidget_component_label(raidPtr, sparecol);
                        /* make sure status is noted */

                        raid_init_component_label(raidPtr, clabel);

                        clabel->row = 0;
                        clabel->column = scol;
                        /* Note: we *don't* change status from rf_ds_used_spare
                           to rf_ds_optimal */
                        /* clabel.status = rf_ds_optimal; */

                        raidmarkdirty(raidPtr, sparecol);
                }
        }
}


void
rf_update_component_labels(RF_Raid_t *raidPtr, int final)
{
        RF_ComponentLabel_t *clabel;
        int sparecol;
        int c;
        int j;
        int scol;
        struct raid_softc *rs = raidPtr->softc;

        scol = -1;

        /* XXX should do extra checks to make sure things really are clean,
           rather than blindly setting the clean bit... */

        raidPtr->mod_counter++;

        for (c = 0; c < raidPtr->numCol; c++) {
                if (raidPtr->Disks[c].status == rf_ds_optimal) {
                        clabel = raidget_component_label(raidPtr, c);
                        /* make sure status is noted */
                        clabel->status = rf_ds_optimal;

                        /* note what unit we are configured as */
                        if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
                                clabel->last_unit = raidPtr->raidid;

                        raidflush_component_label(raidPtr, c);
                        if (final == RF_FINAL_COMPONENT_UPDATE) {
                                if (raidPtr->parity_good == RF_RAID_CLEAN) {
                                        raidmarkclean(raidPtr, c);
                                }
                        }
                }
                /* else we don't touch it.. */
        }

        for( c = 0; c < raidPtr->numSpare ; c++) {
                sparecol = raidPtr->numCol + c;
                /* Need to ensure that the reconstruct actually completed! */
                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
                        /*

                           we claim this disk is "optimal" if it's
                           rf_ds_used_spare, as that means it should be
                           directly substitutable for the disk it replaced.
                           We note that too...

                         */

                        for(j=0;j<raidPtr->numCol;j++) {
                                if (raidPtr->Disks[j].spareCol == sparecol) {
                                        scol = j;
                                        break;
                                }
                        }

                        /* XXX shouldn't *really* need this... */
                        clabel = raidget_component_label(raidPtr, sparecol);
                        /* make sure status is noted */

                        raid_init_component_label(raidPtr, clabel);

                        clabel->column = scol;
                        clabel->status = rf_ds_optimal;
                        if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
                                clabel->last_unit = raidPtr->raidid;

                        raidflush_component_label(raidPtr, sparecol);
                        if (final == RF_FINAL_COMPONENT_UPDATE) {
                                if (raidPtr->parity_good == RF_RAID_CLEAN) {
                                        raidmarkclean(raidPtr, sparecol);
                                }
                        }
                }
        }
}

void
rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
{

        if (vp != NULL) {
                if (auto_configured == 1) {
                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                        VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
                        vput(vp);

                } else {
                        (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
                }
        }
}


void
rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
{
        int r,c;
        struct vnode *vp;
        int acd;


        /* We take this opportunity to close the vnodes like we should.. */

        for (c = 0; c < raidPtr->numCol; c++) {
                vp = raidPtr->raid_cinfo[c].ci_vp;
                acd = raidPtr->Disks[c].auto_configured;
                rf_close_component(raidPtr, vp, acd);
                raidPtr->raid_cinfo[c].ci_vp = NULL;
                raidPtr->Disks[c].auto_configured = 0;
        }

        for (r = 0; r < raidPtr->numSpare; r++) {
                vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
                acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
                rf_close_component(raidPtr, vp, acd);
                raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
                raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
        }
}


static void
rf_ReconThread(struct rf_recon_req_internal *req)
{
        int     s;
        RF_Raid_t *raidPtr;

        s = splbio();
        raidPtr = (RF_Raid_t *) req->raidPtr;
        raidPtr->recon_in_progress = 1;

        if (req->flags & RF_FDFLAGS_RECON_FORCE) {
                raidPtr->forceRecon = 1;
        }
        
        rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
                    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));

        if (req->flags & RF_FDFLAGS_RECON_FORCE) {
                raidPtr->forceRecon = 0;
        }

        RF_Free(req, sizeof(*req));

        raidPtr->recon_in_progress = 0;
        splx(s);

        /* That's all... */
        kthread_exit(0);        /* does not return */
}

static void
rf_RewriteParityThread(RF_Raid_t *raidPtr)
{
        int retcode;
        int s;

        raidPtr->parity_rewrite_stripes_done = 0;
        raidPtr->parity_rewrite_in_progress = 1;
        s = splbio();
        retcode = rf_RewriteParity(raidPtr);
        splx(s);
        if (retcode) {
                printf("raid%d: Error re-writing parity (%d)!\n",
                    raidPtr->raidid, retcode);
        } else {
                /* set the clean bit!  If we shutdown correctly,
                   the clean bit on each component label will get
                   set */
                raidPtr->parity_good = RF_RAID_CLEAN;
        }
        raidPtr->parity_rewrite_in_progress = 0;

        /* Anyone waiting for us to stop?  If so, inform them... */
        if (raidPtr->waitShutdown) {
                rf_lock_mutex2(raidPtr->rad_lock);
                cv_broadcast(&raidPtr->parity_rewrite_cv);
                rf_unlock_mutex2(raidPtr->rad_lock);
        }

        /* That's all... */
        kthread_exit(0);        /* does not return */
}


static void
rf_CopybackThread(RF_Raid_t *raidPtr)
{
        int s;

        raidPtr->copyback_in_progress = 1;
        s = splbio();
        rf_CopybackReconstructedData(raidPtr);
        splx(s);
        raidPtr->copyback_in_progress = 0;

        /* That's all... */
        kthread_exit(0);        /* does not return */
}


static void
rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
{
        int s;
        RF_Raid_t *raidPtr;

        s = splbio();
        raidPtr = req->raidPtr;
        raidPtr->recon_in_progress = 1;

        if (req->flags & RF_FDFLAGS_RECON_FORCE) {
                raidPtr->forceRecon = 1;
        }

        rf_ReconstructInPlace(raidPtr, req->col);

        if (req->flags & RF_FDFLAGS_RECON_FORCE) {
                raidPtr->forceRecon = 0;
        }

        RF_Free(req, sizeof(*req));
        raidPtr->recon_in_progress = 0;
        splx(s);

        /* That's all... */
        kthread_exit(0);        /* does not return */
}

static RF_AutoConfig_t *
rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
    const char *cname, RF_SectorCount_t size, uint64_t numsecs,
    unsigned secsize)
{
        int good_one = 0;
        RF_ComponentLabel_t *clabel;
        RF_AutoConfig_t *ac;

        clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_WAITOK);

        if (!raidread_component_label(secsize, dev, vp, clabel)) {
                /* Got the label.  Does it look reasonable? */
                if (rf_reasonable_label(clabel, numsecs) &&
                    (rf_component_label_partitionsize(clabel) <= size)) {
#ifdef DEBUG
                        printf("Component on: %s: %llu\n",
                                cname, (unsigned long long)size);
                        rf_print_component_label(clabel);
#endif
                        /* if it's reasonable, add it, else ignore it. */
                        ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
                                M_WAITOK);
                        strlcpy(ac->devname, cname, sizeof(ac->devname));
                        ac->dev = dev;
                        ac->vp = vp;
                        ac->clabel = clabel;
                        ac->next = ac_list;
                        ac_list = ac;
                        good_one = 1;
                }
        }
        if (!good_one) {
                /* cleanup */
                free(clabel, M_RAIDFRAME);
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
                vput(vp);
        }
        return ac_list;
}

static RF_AutoConfig_t *
rf_find_raid_components(void)
{
        struct vnode *vp;
        struct disklabel label;
        device_t dv;
        deviter_t di;
        dev_t dev;
        int bmajor, bminor, wedge, rf_part_found;
        int error;
        int i;
        RF_AutoConfig_t *ac_list;
        uint64_t numsecs;
        unsigned secsize;
        int dowedges;

        /* initialize the AutoConfig list */
        ac_list = NULL;

        /*
         * we begin by trolling through *all* the devices on the system *twice*
         * first we scan for wedges, second for other devices. This avoids
         * using a raw partition instead of a wedge that covers the whole disk
         */

        for (dowedges=1; dowedges>=0; --dowedges) {
                for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
                     dv = deviter_next(&di)) {

                        /* we are only interested in disks */
                        if (device_class(dv) != DV_DISK)
                                continue;

                        /* we don't care about floppies */
                        if (device_is_a(dv, "fd")) {
                                continue;
                        }

                        /* we don't care about CDs. */
                        if (device_is_a(dv, "cd")) {
                                continue;
                        }

                        /* we don't care about md. */
                        if (device_is_a(dv, "md")) {
                                continue;
                        }

                        /* hdfd is the Atari/Hades floppy driver */
                        if (device_is_a(dv, "hdfd")) {
                                continue;
                        }

                        /* fdisa is the Atari/Milan floppy driver */
                        if (device_is_a(dv, "fdisa")) {
                                continue;
                        }

                        /* we don't care about spiflash */
                        if (device_is_a(dv, "spiflash")) {
                                continue;
                        }

                        /* are we in the wedges pass ? */
                        wedge = device_is_a(dv, "dk");
                        if (wedge != dowedges) {
                                continue;
                        }

                        /* need to find the device_name_to_block_device_major stuff */
                        bmajor = devsw_name2blk(device_xname(dv), NULL, 0);

                        rf_part_found = 0; /*No raid partition as yet*/

                        /* get a vnode for the raw partition of this disk */
                        bminor = minor(device_unit(dv));
                        dev = wedge ? makedev(bmajor, bminor) :
                            MAKEDISKDEV(bmajor, bminor, RAW_PART);
                        if (bdevvp(dev, &vp))
                                panic("RAID can't alloc vnode");

                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                        error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);

                        if (error) {
                                /* "Who cares."  Continue looking
                                   for something that exists*/
                                vput(vp);
                                continue;
                        }

                        VOP_UNLOCK(vp);
                        error = getdisksize(vp, &numsecs, &secsize);
                        if (error) {
                                /*
                                 * Pseudo devices like vnd and cgd can be
                                 * opened but may still need some configuration.
                                 * Ignore these quietly.
                                 */
                                if (error != ENXIO)
                                        printf("RAIDframe: can't get disk size"
                                            " for dev %s (%d)\n",
                                            device_xname(dv), error);
                                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                                VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
                                vput(vp);
                                continue;
                        }
                        if (wedge) {
                                struct dkwedge_info dkw;
                                error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
                                    NOCRED);
                                if (error) {
                                        printf("RAIDframe: can't get wedge info for "
                                            "dev %s (%d)\n", device_xname(dv), error);
                                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                                        VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
                                        vput(vp);
                                        continue;
                                }

                                if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
                                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                                        VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
                                        vput(vp);
                                        continue;
                                }

                                ac_list = rf_get_component(ac_list, dev, vp,
                                    device_xname(dv), dkw.dkw_size, numsecs, secsize);
                                rf_part_found = 1; /*There is a raid component on this disk*/
                                continue;
                        }

                        /* Ok, the disk exists.  Go get the disklabel. */
                        error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
                        if (error) {
                                /*
                                 * XXX can't happen - open() would
                                 * have errored out (or faked up one)
                                 */
                                if (error != ENOTTY)
                                        printf("RAIDframe: can't get label for dev "
                                            "%s (%d)\n", device_xname(dv), error);
                        }

                        /* don't need this any more.  We'll allocate it again
                           a little later if we really do... */
                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                        VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
                        vput(vp);

                        if (error)
                                continue;

                        rf_part_found = 0; /*No raid partitions yet*/
                        for (i = 0; i < label.d_npartitions; i++) {
                                char cname[sizeof(ac_list->devname)];

                                /* We only support partitions marked as RAID */
                                if (label.d_partitions[i].p_fstype != FS_RAID)
                                        continue;

                                dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
                                if (bdevvp(dev, &vp))
                                        panic("RAID can't alloc vnode");

                                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                                error = VOP_OPEN(vp, FREAD, NOCRED);
                                if (error) {
                                        /* Not quite a 'whatever'.  In
                                         * this situation we know 
                                         * there is a FS_RAID
                                         * partition, but we can't
                                         * open it.  The most likely
                                         * reason is that the
                                         * partition is already in
                                         * use by another RAID set.
                                         * So note that we've already
                                         * found a partition on this
                                         * disk so we don't attempt
                                         * to use the raw disk later. */
                                        rf_part_found = 1;
                                        vput(vp);
                                        continue;
                                }
                                VOP_UNLOCK(vp);
                                snprintf(cname, sizeof(cname), "%s%c",
                                    device_xname(dv), 'a' + i);
                                ac_list = rf_get_component(ac_list, dev, vp, cname,
                                        label.d_partitions[i].p_size, numsecs, secsize);
                                rf_part_found = 1; /*There is at least one raid partition on this disk*/
                        }

                        /*
                         *If there is no raid component on this disk, either in a
                         *disklabel or inside a wedge, check the raw partition as well,
                         *as it is possible to configure raid components on raw disk
                         *devices.
                         */

                        if (!rf_part_found) {
                                char cname[sizeof(ac_list->devname)];

                                dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
                                if (bdevvp(dev, &vp))
                                        panic("RAID can't alloc vnode");

                                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);

                                error = VOP_OPEN(vp, FREAD, NOCRED);
                                if (error) {
                                        /* Whatever... */
                                        vput(vp);
                                        continue;
                                }
                                VOP_UNLOCK(vp);
                                snprintf(cname, sizeof(cname), "%s%c",
                                    device_xname(dv), 'a' + RAW_PART);
                                ac_list = rf_get_component(ac_list, dev, vp, cname,
                                        label.d_partitions[RAW_PART].p_size, numsecs, secsize);
                        }
                }
                deviter_release(&di);
        }
        return ac_list;
}

int
rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
{

        if ((clabel->version==RF_COMPONENT_LABEL_VERSION_1 ||
             clabel->version==RF_COMPONENT_LABEL_VERSION ||
             clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) &&
            (clabel->clean == RF_RAID_CLEAN ||
             clabel->clean == RF_RAID_DIRTY) &&
            clabel->row >=0 &&
            clabel->column >= 0 &&
            clabel->num_rows > 0 &&
            clabel->num_columns > 0 &&
            clabel->row < clabel->num_rows &&
            clabel->column < clabel->num_columns &&
            clabel->blockSize > 0 &&
            /*
             * numBlocksHi may contain garbage, but it is ok since
             * the type is unsigned.  If it is really garbage,
             * rf_fix_old_label_size() will fix it.
             */
            rf_component_label_numblocks(clabel) > 0) {
                /*
                 * label looks reasonable enough...
                 * let's make sure it has no old garbage.
                 */
                if (numsecs)
                        rf_fix_old_label_size(clabel, numsecs);
                return(1);
        }
        return(0);
}


/*
 * For reasons yet unknown, some old component labels have garbage in
 * the newer numBlocksHi region, and this causes lossage.  Since those
 * disks will also have numsecs set to less than 32 bits of sectors,
 * we can determine when this corruption has occurred, and fix it.
 *
 * The exact same problem, with the same unknown reason, happens to
 * the partitionSizeHi member as well.
 */
static void
rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
{

        if (numsecs < ((uint64_t)1 << 32)) {
                if (clabel->numBlocksHi) {
                        printf("WARNING: total sectors < 32 bits, yet "
                               "numBlocksHi set\n"
                               "WARNING: resetting numBlocksHi to zero.\n");
                        clabel->numBlocksHi = 0;
                }

                if (clabel->partitionSizeHi) {
                        printf("WARNING: total sectors < 32 bits, yet "
                               "partitionSizeHi set\n"
                               "WARNING: resetting partitionSizeHi to zero.\n");
                        clabel->partitionSizeHi = 0;
                }
        }
}


#ifdef DEBUG
void
rf_print_component_label(RF_ComponentLabel_t *clabel)
{
        uint64_t numBlocks;
        static const char *rp[] = {
            "No", "Force", "Soft", "*invalid*"
        };


        numBlocks = rf_component_label_numblocks(clabel);

        printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
               clabel->row, clabel->column,
               clabel->num_rows, clabel->num_columns);
        printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
               clabel->version, clabel->serial_number,
               clabel->mod_counter);
        printf("   Clean: %s Status: %d\n",
               clabel->clean ? "Yes" : "No", clabel->status);
        printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
               clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
        printf("   RAID Level: %c  blocksize: %d numBlocks: %"PRIu64"\n",
               (char) clabel->parityConfig, clabel->blockSize, numBlocks);
        printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
        printf("   Root partition: %s\n", rp[clabel->root_partition & 3]);
        printf("   Last configured as: raid%d\n", clabel->last_unit);
#if 0
           printf("   Config order: %d\n", clabel->config_order);
#endif

}
#endif

static RF_ConfigSet_t *
rf_create_auto_sets(RF_AutoConfig_t *ac_list)
{
        RF_AutoConfig_t *ac;
        RF_ConfigSet_t *config_sets;
        RF_ConfigSet_t *cset;
        RF_AutoConfig_t *ac_next;


        config_sets = NULL;

        /* Go through the AutoConfig list, and figure out which components
           belong to what sets.  */
        ac = ac_list;
        while(ac!=NULL) {
                /* we're going to putz with ac->next, so save it here
                   for use at the end of the loop */
                ac_next = ac->next;

                if (config_sets == NULL) {
                        /* will need at least this one... */
                        config_sets = malloc(sizeof(RF_ConfigSet_t),
                                       M_RAIDFRAME, M_WAITOK);
                        /* this one is easy :) */
                        config_sets->ac = ac;
                        config_sets->next = NULL;
                        config_sets->rootable = 0;
                        ac->next = NULL;
                } else {
                        /* which set does this component fit into? */
                        cset = config_sets;
                        while(cset!=NULL) {
                                if (rf_does_it_fit(cset, ac)) {
                                        /* looks like it matches... */
                                        ac->next = cset->ac;
                                        cset->ac = ac;
                                        break;
                                }
                                cset = cset->next;
                        }
                        if (cset==NULL) {
                                /* didn't find a match above... new set..*/
                                cset = malloc(sizeof(RF_ConfigSet_t),
                                               M_RAIDFRAME, M_WAITOK);
                                cset->ac = ac;
                                ac->next = NULL;
                                cset->next = config_sets;
                                cset->rootable = 0;
                                config_sets = cset;
                        }
                }
                ac = ac_next;
        }


        return(config_sets);
}

static int
rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
{
        RF_ComponentLabel_t *clabel1, *clabel2;

        /* If this one matches the *first* one in the set, that's good
           enough, since the other members of the set would have been
           through here too... */
        /* note that we are not checking partitionSize here..

           Note that we are also not checking the mod_counters here.
           If everything else matches except the mod_counter, that's
           good enough for this test.  We will deal with the mod_counters
           a little later in the autoconfiguration process.

            (clabel1->mod_counter == clabel2->mod_counter) &&

           The reason we don't check for this is that failed disks
           will have lower modification counts.  If those disks are
           not added to the set they used to belong to, then they will
           form their own set, which may result in 2 different sets,
           for example, competing to be configured at raid0, and
           perhaps competing to be the root filesystem set.  If the
           wrong ones get configured, or both attempt to become /,
           weird behaviour and or serious lossage will occur.  Thus we
           need to bring them into the fold here, and kick them out at
           a later point.

        */

        clabel1 = cset->ac->clabel;
        clabel2 = ac->clabel;
        if ((clabel1->version == clabel2->version) &&
            (clabel1->serial_number == clabel2->serial_number) &&
            (clabel1->num_rows == clabel2->num_rows) &&
            (clabel1->num_columns == clabel2->num_columns) &&
            (clabel1->sectPerSU == clabel2->sectPerSU) &&
            (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
            (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
            (clabel1->parityConfig == clabel2->parityConfig) &&
            (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
            (clabel1->blockSize == clabel2->blockSize) &&
            rf_component_label_numblocks(clabel1) ==
            rf_component_label_numblocks(clabel2) &&
            (clabel1->autoconfigure == clabel2->autoconfigure) &&
            (clabel1->root_partition == clabel2->root_partition) &&
            (clabel1->last_unit == clabel2->last_unit) &&
            (clabel1->config_order == clabel2->config_order)) {
                /* if it get's here, it almost *has* to be a match */
        } else {
                /* it's not consistent with somebody in the set..
                   punt */
                return(0);
        }
        /* all was fine.. it must fit... */
        return(1);
}

static int
rf_have_enough_components(RF_ConfigSet_t *cset)
{
        RF_AutoConfig_t *ac;
        RF_AutoConfig_t *auto_config;
        RF_ComponentLabel_t *clabel;
        int c;
        int num_cols;
        int num_missing;
        int mod_counter;
        int mod_counter_found;
        int even_pair_failed;
        char parity_type;


        /* check to see that we have enough 'live' components
           of this set.  If so, we can configure it if necessary */

        num_cols = cset->ac->clabel->num_columns;
        parity_type = cset->ac->clabel->parityConfig;

        /* XXX Check for duplicate components!?!?!? */

        /* Determine what the mod_counter is supposed to be for this set. */

        mod_counter_found = 0;
        mod_counter = 0;
        ac = cset->ac;
        while(ac!=NULL) {
                if (mod_counter_found==0) {
                        mod_counter = ac->clabel->mod_counter;
                        mod_counter_found = 1;
                } else {
                        if (ac->clabel->mod_counter > mod_counter) {
                                mod_counter = ac->clabel->mod_counter;
                        }
                }
                ac = ac->next;
        }

        num_missing = 0;
        auto_config = cset->ac;

        even_pair_failed = 0;
        for(c=0; c<num_cols; c++) {
                ac = auto_config;
                while(ac!=NULL) {
                        if ((ac->clabel->column == c) &&
                            (ac->clabel->mod_counter == mod_counter)) {
                                /* it's this one... */
#ifdef DEBUG
                                printf("Found: %s at %d\n",
                                       ac->devname,c);
#endif
                                break;
                        }
                        ac=ac->next;
                }
                if (ac==NULL) {
                                /* Didn't find one here! */
                                /* special case for RAID 1, especially
                                   where there are more than 2
                                   components (where RAIDframe treats
                                   things a little differently :( ) */
                        if (parity_type == '1') {
                                if (c%2 == 0) { /* even component */
                                        even_pair_failed = 1;
                                } else { /* odd component.  If
                                            we're failed, and
                                            so is the even
                                            component, it's
                                            "Good Night, Charlie" */
                                        if (even_pair_failed == 1) {
                                                return(0);
                                        }
                                }
                        } else {
                                /* normal accounting */
                                num_missing++;
                        }
                }
                if ((parity_type == '1') && (c%2 == 1)) {
                                /* Just did an even component, and we didn't
                                   bail.. reset the even_pair_failed flag,
                                   and go on to the next component.... */
                        even_pair_failed = 0;
                }
        }

        clabel = cset->ac->clabel;

        if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
            ((clabel->parityConfig == '4') && (num_missing > 1)) ||
            ((clabel->parityConfig == '5') && (num_missing > 1))) {
                /* XXX this needs to be made *much* more general */
                /* Too many failures */
                return(0);
        }
        /* otherwise, all is well, and we've got enough to take a kick
           at autoconfiguring this set */
        return(1);
}

static void
rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
                        RF_Raid_t *raidPtr)
{
        RF_ComponentLabel_t *clabel;
        int i;

        clabel = ac->clabel;

        /* 1. Fill in the common stuff */
        config->numCol = clabel->num_columns;
        config->numSpare = 0; /* XXX should this be set here? */
        config->sectPerSU = clabel->sectPerSU;
        config->SUsPerPU = clabel->SUsPerPU;
        config->SUsPerRU = clabel->SUsPerRU;
        config->parityConfig = clabel->parityConfig;
        /* XXX... */
        strcpy(config->diskQueueType,"fifo");
        config->maxOutstandingDiskReqs = clabel->maxOutstanding;
        config->layoutSpecificSize = 0; /* XXX ?? */

        while(ac!=NULL) {
                /* row/col values will be in range due to the checks
                   in reasonable_label() */
                strcpy(config->devnames[0][ac->clabel->column],
                       ac->devname);
                ac = ac->next;
        }

        for(i=0;i<RF_MAXDBGV;i++) {
                config->debugVars[i][0] = 0;
        }
}

static int
rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
{
        RF_ComponentLabel_t *clabel;
        int column;
        int sparecol;

        raidPtr->autoconfigure = new_value;

        for(column=0; column<raidPtr->numCol; column++) {
                if (raidPtr->Disks[column].status == rf_ds_optimal) {
                        clabel = raidget_component_label(raidPtr, column);
                        clabel->autoconfigure = new_value;
                        raidflush_component_label(raidPtr, column);
                }
        }
        for(column = 0; column < raidPtr->numSpare ; column++) {
                sparecol = raidPtr->numCol + column;
                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
                        clabel = raidget_component_label(raidPtr, sparecol);
                        clabel->autoconfigure = new_value;
                        raidflush_component_label(raidPtr, sparecol);
                }
        }
        return(new_value);
}

static int
rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
{
        RF_ComponentLabel_t *clabel;
        int column;
        int sparecol;

        raidPtr->root_partition = new_value;
        for(column=0; column<raidPtr->numCol; column++) {
                if (raidPtr->Disks[column].status == rf_ds_optimal) {
                        clabel = raidget_component_label(raidPtr, column);
                        clabel->root_partition = new_value;
                        raidflush_component_label(raidPtr, column);
                }
        }
        for(column = 0; column < raidPtr->numSpare ; column++) {
                sparecol = raidPtr->numCol + column;
                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
                        clabel = raidget_component_label(raidPtr, sparecol);
                        clabel->root_partition = new_value;
                        raidflush_component_label(raidPtr, sparecol);
                }
        }
        return(new_value);
}

static void
rf_release_all_vps(RF_ConfigSet_t *cset)
{
        RF_AutoConfig_t *ac;

        ac = cset->ac;
        while(ac!=NULL) {
                /* Close the vp, and give it back */
                if (ac->vp) {
                        vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
                        VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
                        vput(ac->vp);
                        ac->vp = NULL;
                }
                ac = ac->next;
        }
}


static void
rf_cleanup_config_set(RF_ConfigSet_t *cset)
{
        RF_AutoConfig_t *ac;
        RF_AutoConfig_t *next_ac;

        ac = cset->ac;
        while(ac!=NULL) {
                next_ac = ac->next;
                /* nuke the label */
                free(ac->clabel, M_RAIDFRAME);
                /* cleanup the config structure */
                free(ac, M_RAIDFRAME);
                /* "next.." */
                ac = next_ac;
        }
        /* and, finally, nuke the config set */
        free(cset, M_RAIDFRAME);
}


void
raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
{
        /* avoid over-writing byteswapped version. */
        if (clabel->version != bswap32(RF_COMPONENT_LABEL_VERSION))
                clabel->version = RF_COMPONENT_LABEL_VERSION;
        clabel->serial_number = raidPtr->serial_number;
        clabel->mod_counter = raidPtr->mod_counter;

        clabel->num_rows = 1;
        clabel->num_columns = raidPtr->numCol;
        clabel->clean = RF_RAID_DIRTY; /* not clean */
        clabel->status = rf_ds_optimal; /* "It's good!" */

        clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
        clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
        clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;

        clabel->blockSize = raidPtr->bytesPerSector;
        rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);

        /* XXX not portable */
        clabel->parityConfig = raidPtr->Layout.map->parityConfig;
        clabel->maxOutstanding = raidPtr->maxOutstanding;
        clabel->autoconfigure = raidPtr->autoconfigure;
        clabel->root_partition = raidPtr->root_partition;
        clabel->last_unit = raidPtr->raidid;
        clabel->config_order = raidPtr->config_order;

#ifndef RF_NO_PARITY_MAP
        rf_paritymap_init_label(raidPtr->parity_map, clabel);
#endif
}

static struct raid_softc *
rf_auto_config_set(RF_ConfigSet_t *cset)
{
        RF_Raid_t *raidPtr;
        RF_Config_t *config;
        int raidID;
        struct raid_softc *sc;

#ifdef DEBUG
        printf("RAID autoconfigure\n");
#endif

        /* 1. Create a config structure */
        config = malloc(sizeof(*config), M_RAIDFRAME, M_WAITOK|M_ZERO);

        /*
           2. Figure out what RAID ID this one is supposed to live at
           See if we can get the same RAID dev that it was configured
           on last time..
        */

        raidID = cset->ac->clabel->last_unit;
        for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
             sc = raidget(++raidID, false))
                continue;
#ifdef DEBUG
        printf("Configuring raid%d:\n",raidID);
#endif

        if (sc == NULL)
                sc = raidget(raidID, true);
        raidPtr = &sc->sc_r;

        /* XXX all this stuff should be done SOMEWHERE ELSE! */
        raidPtr->softc = sc;
        raidPtr->raidid = raidID;
        raidPtr->openings = RAIDOUTSTANDING;

        /* 3. Build the configuration structure */
        rf_create_configuration(cset->ac, config, raidPtr);

        /* 4. Do the configuration */
        if (rf_Configure(raidPtr, config, cset->ac) == 0) {
                raidinit(sc);

                rf_markalldirty(raidPtr);
                raidPtr->autoconfigure = 1; /* XXX do this here? */
                switch (cset->ac->clabel->root_partition) {
                case 1:        /* Force Root */
                case 2:        /* Soft Root: root when boot partition part of raid */
                        /*
                         * everything configured just fine.  Make a note
                         * that this set is eligible to be root,
                         * or forced to be root
                         */
                        cset->rootable = cset->ac->clabel->root_partition;
                        /* XXX do this here? */
                        raidPtr->root_partition = cset->rootable;
                        break;
                default:
                        break;
                }
        } else {
                raidput(sc);
                sc = NULL;
        }

        /* 5. Cleanup */
        free(config, M_RAIDFRAME);
        return sc;
}

void
rf_pool_init(RF_Raid_t *raidPtr, char *w_chan, struct pool *p, size_t size, const char *pool_name,
             size_t xmin, size_t xmax)
{

        /* Format: raid%d_foo */
        snprintf(w_chan, RF_MAX_POOLNAMELEN, "raid%d_%s", raidPtr->raidid, pool_name);
        
        pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
        pool_sethiwat(p, xmax);
        pool_prime(p, xmin);
}


/*
 * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
 * to see if there is IO pending and if that IO could possibly be done
 * for a given RAID set.  Returns 0 if IO is waiting and can be done, 1
 * otherwise.
 *
 */
int
rf_buf_queue_check(RF_Raid_t *raidPtr)
{
        struct raid_softc *rs;
        struct dk_softc *dksc;

        rs = raidPtr->softc;
        dksc = &rs->sc_dksc;

        if ((rs->sc_flags & RAIDF_INITED) == 0)
                return 1;

        if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
                /* there is work to do */
                return 0;
        }
        /* default is nothing to do */
        return 1;
}

int
rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
{
        uint64_t numsecs;
        unsigned secsize;
        int error;

        error = getdisksize(vp, &numsecs, &secsize);
        if (error == 0) {
                diskPtr->blockSize = secsize;
                diskPtr->numBlocks = numsecs - rf_protectedSectors;
                diskPtr->partitionSize = numsecs;
                return 0;
        }
        return error;
}

static int
raid_match(device_t self, cfdata_t cfdata, void *aux)
{
        return 1;
}

static void
raid_attach(device_t parent, device_t self, void *aux)
{
}


static int
raid_detach(device_t self, int flags)
{
        int error;
        struct raid_softc *rs = raidsoftc(self);

        if (rs == NULL)
                return ENXIO;

        if ((error = raidlock(rs)) != 0)
                return error;

        error = raid_detach_unlocked(rs);

        raidunlock(rs);

        /* XXX raid can be referenced here */

        if (error)
                return error;

        /* Free the softc */
        raidput(rs);

        return 0;
}

static void
rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
{
        struct dk_softc *dksc = &rs->sc_dksc;
        struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;

        memset(dg, 0, sizeof(*dg));

        dg->dg_secperunit = raidPtr->totalSectors;
        dg->dg_secsize = raidPtr->bytesPerSector;
        dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
        dg->dg_ntracks = 4 * raidPtr->numCol;

        disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
}

/*
 * Get cache info for all the components (including spares).
 * Returns intersection of all the cache flags of all disks, or first
 * error if any encountered.
 * XXXfua feature flags can change as spares are added - lock down somehow
 */
static int
rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
{
        int c;
        int error;
        int dkwhole = 0, dkpart;

        for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
                /*
                 * Check any non-dead disk, even when currently being
                 * reconstructed.
                 */
                if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
                    || raidPtr->Disks[c].status == rf_ds_reconstructing) {
                        error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
                            DIOCGCACHE, &dkpart, FREAD, NOCRED);
                        if (error) {
                                if (error != ENODEV) {
                                        printf("raid%d: get cache for component %s failed\n",
                                            raidPtr->raidid,
                                            raidPtr->Disks[c].devname);
                                }

                                return error;
                        }

                        if (c == 0)
                                dkwhole = dkpart;
                        else
                                dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
                }
        }

        *data = dkwhole;

        return 0;
}

/*
 * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
 * We end up returning whatever error was returned by the first cache flush
 * that fails.
 */

static int
rf_sync_component_cache(RF_Raid_t *raidPtr, int c, int force)
{
        int e = 0;
        for (int i = 0; i < 5; i++) {
                e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
                    &force, FWRITE, NOCRED);
                if (!e || e == ENODEV)
                        return e;
                printf("raid%d: cache flush[%d] to component %s failed (%d)\n",
                    raidPtr->raidid, i, raidPtr->Disks[c].devname, e);
        }
        return e;
}

int
rf_sync_component_caches(RF_Raid_t *raidPtr, int force)
{
        int c, error;

        error = 0;
        for (c = 0; c < raidPtr->numCol; c++) {
                if (raidPtr->Disks[c].status == rf_ds_optimal) {
                        int e = rf_sync_component_cache(raidPtr, c, force);
                        if (e && !error)
                                error = e;
                }
        }

        for (c = 0; c < raidPtr->numSpare ; c++) {
                int sparecol = raidPtr->numCol + c;
                /* Need to ensure that the reconstruct actually completed! */
                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
                        int e = rf_sync_component_cache(raidPtr, sparecol,
                            force);
                        if (e && !error)
                                error = e;
                }
        }
        return error;
}

/* Fill in info with the current status */
void
rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
{

        memset(info, 0, sizeof(*info));

        if (raidPtr->status != rf_rs_reconstructing) {
                info->total = 100;
                info->completed = 100;
        } else {
                info->total = raidPtr->reconControl->numRUsTotal;
                info->completed = raidPtr->reconControl->numRUsComplete;
        }
        info->remaining = info->total - info->completed;
}

/* Fill in info with the current status */
void
rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
{

        memset(info, 0, sizeof(*info));

        if (raidPtr->parity_rewrite_in_progress == 1) {
                info->total = raidPtr->Layout.numStripe;
                info->completed = raidPtr->parity_rewrite_stripes_done;
        } else {
                info->completed = 100;
                info->total = 100;
        }
        info->remaining = info->total - info->completed;
}

/* Fill in info with the current status */
void
rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
{

        memset(info, 0, sizeof(*info));

        if (raidPtr->copyback_in_progress == 1) {
                info->total = raidPtr->Layout.numStripe;
                info->completed = raidPtr->copyback_stripes_done;
                info->remaining = info->total - info->completed;
        } else {
                info->remaining = 0;
                info->completed = 100;
                info->total = 100;
        }
}

/* Fill in config with the current info */
int
rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
{
        int        d, i, j;

        if (!raidPtr->valid)
                return ENODEV;
        config->cols = raidPtr->numCol;
        config->ndevs = raidPtr->numCol;
        if (config->ndevs >= RF_MAX_DISKS)
                return ENOMEM;
        config->nspares = raidPtr->numSpare;
        if (config->nspares >= RF_MAX_DISKS)
                return ENOMEM;
        config->maxqdepth = raidPtr->maxQueueDepth;
        d = 0;
        for (j = 0; j < config->cols; j++) {
                config->devs[d] = raidPtr->Disks[j];
                d++;
        }
        for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
                config->spares[i] = raidPtr->Disks[j];
                if (config->spares[i].status == rf_ds_rebuilding_spare) {
                        /* XXX: raidctl(8) expects to see this as a used spare */
                        config->spares[i].status = rf_ds_used_spare;
                }
        }
        return 0;
}

int
rf_get_component_label(RF_Raid_t *raidPtr, void *data)
{
        RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
        RF_ComponentLabel_t *raid_clabel;
        int column = clabel->column;

        if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
                return EINVAL;
        raid_clabel = raidget_component_label(raidPtr, column);
        memcpy(clabel, raid_clabel, sizeof *clabel);
        /* Fix-up for userland. */
        if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION))
                clabel->version = RF_COMPONENT_LABEL_VERSION;

        return 0;
}

/*
 * Module interface
 */

MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");

#ifdef _MODULE
CFDRIVER_DECL(raid, DV_DISK, NULL);
#endif

static int raid_modcmd(modcmd_t, void *);
static int raid_modcmd_init(void);
static int raid_modcmd_fini(void);

static int
raid_modcmd(modcmd_t cmd, void *data)
{
        int error;

        error = 0;
        switch (cmd) {
        case MODULE_CMD_INIT:
                error = raid_modcmd_init();
                break;
        case MODULE_CMD_FINI:
                error = raid_modcmd_fini();
                break;
        default:
                error = ENOTTY;
                break;
        }
        return error;
}

static int
raid_modcmd_init(void)
{
        int error;
        int bmajor, cmajor;

        mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
        mutex_enter(&raid_lock);
#if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
        rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
        rf_init_cond2(rf_sparet_wait_cv, "sparetw");
        rf_init_cond2(rf_sparet_resp_cv, "rfgst");

        rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
#endif

        bmajor = cmajor = -1;
        error = devsw_attach("raid", &raid_bdevsw, &bmajor,
            &raid_cdevsw, &cmajor);
        if (error != 0 && error != EEXIST) {
                aprint_error("%s: devsw_attach failed %d\n", __func__, error);
                mutex_exit(&raid_lock);
                return error;
        }
#ifdef _MODULE
        error = config_cfdriver_attach(&raid_cd);
        if (error != 0) {
                aprint_error("%s: config_cfdriver_attach failed %d\n",
                    __func__, error);
                devsw_detach(&raid_bdevsw, &raid_cdevsw);
                mutex_exit(&raid_lock);
                return error;
        }
#endif
        error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
        if (error != 0) {
                aprint_error("%s: config_cfattach_attach failed %d\n",
                    __func__, error);
#ifdef _MODULE
                config_cfdriver_detach(&raid_cd);
#endif
                devsw_detach(&raid_bdevsw, &raid_cdevsw);
                mutex_exit(&raid_lock);
                return error;
        }

        raidautoconfigdone = false;

        mutex_exit(&raid_lock);

        if (error == 0) {
                if (rf_BootRaidframe(true) == 0)
                        aprint_verbose("Kernelized RAIDframe activated\n");
                else
                        panic("Serious error activating RAID!!");
        }

        /*
         * Register a finalizer which will be used to auto-config RAID
         * sets once all real hardware devices have been found.
         */
        error = config_finalize_register(NULL, rf_autoconfig);
        if (error != 0) {
                aprint_error("WARNING: unable to register RAIDframe "
                    "finalizer\n");
                error = 0;
        }

        return error;
}

static int
raid_modcmd_fini(void)
{
        int error;

        mutex_enter(&raid_lock);

        /* Don't allow unload if raid device(s) exist.  */
        if (!LIST_EMPTY(&raids)) {
                mutex_exit(&raid_lock);
                return EBUSY;
        }

        error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
        if (error != 0) {
                aprint_error("%s: cannot detach cfattach\n",__func__);
                mutex_exit(&raid_lock);
                return error;
        }
#ifdef _MODULE
        error = config_cfdriver_detach(&raid_cd);
        if (error != 0) {
                aprint_error("%s: cannot detach cfdriver\n",__func__);
                config_cfattach_attach(raid_cd.cd_name, &raid_ca);
                mutex_exit(&raid_lock);
                return error;
        }
#endif
        devsw_detach(&raid_bdevsw, &raid_cdevsw);
        rf_BootRaidframe(false);
#if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
        rf_destroy_mutex2(rf_sparet_wait_mutex);
        rf_destroy_cond2(rf_sparet_wait_cv);
        rf_destroy_cond2(rf_sparet_resp_cv);
#endif
        mutex_exit(&raid_lock);
        mutex_destroy(&raid_lock);

        return error;
}













































































































































    2 



    2 

    2 












    2 















    2 









    2 

    2 


    2 

    2 





















































































    2 






    2 











    2 









    2 




    2 


















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
/*        $NetBSD: mfs_vnops.c,v 1.64 2022/03/19 13:48:42 hannken Exp $        */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)mfs_vnops.c        8.11 (Berkeley) 5/22/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: mfs_vnops.c,v 1.64 2022/03/19 13:48:42 hannken Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/vnode.h>
#include <sys/kmem.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>

#include <machine/vmparam.h>

#include <ufs/mfs/mfsnode.h>
#include <ufs/mfs/mfs_extern.h>

/*
 * mfs vnode operations.
 */
int (**mfs_vnodeop_p)(void *);
const struct vnodeopv_entry_desc mfs_vnodeop_entries[] = {
        { &vop_default_desc, vn_default_error },
        { &vop_parsepath_desc, genfs_parsepath },        /* parsepath */
        { &vop_lookup_desc, genfs_badop },                /* lookup */
        { &vop_create_desc, genfs_badop },                /* create */
        { &vop_mknod_desc, genfs_badop },                /* mknod */
        { &vop_open_desc, mfs_open },                        /* open */
        { &vop_close_desc, mfs_close },                        /* close */
        { &vop_access_desc, genfs_badop },                /* access */
        { &vop_accessx_desc, genfs_badop },                /* accessx */
        { &vop_getattr_desc, genfs_badop },                /* getattr */
        { &vop_setattr_desc, genfs_badop },                /* setattr */
        { &vop_read_desc, genfs_badop },                /* read */
        { &vop_write_desc, genfs_badop },                /* write */
        { &vop_fallocate_desc, genfs_eopnotsupp },        /* fallocate */
        { &vop_fdiscard_desc, genfs_eopnotsupp },        /* fdiscard */
        { &vop_ioctl_desc, genfs_enoioctl },                /* ioctl */
        { &vop_poll_desc, genfs_badop },                /* poll */
        { &vop_revoke_desc, genfs_revoke },                /* revoke */
        { &vop_mmap_desc, genfs_badop },                /* mmap */
        { &vop_fsync_desc, spec_fsync },                /* fsync */
        { &vop_seek_desc, genfs_badop },                /* seek */
        { &vop_remove_desc, genfs_badop },                /* remove */
        { &vop_link_desc, genfs_badop },                /* link */
        { &vop_rename_desc, genfs_badop },                /* rename */
        { &vop_mkdir_desc, genfs_badop },                /* mkdir */
        { &vop_rmdir_desc, genfs_badop },                /* rmdir */
        { &vop_symlink_desc, genfs_badop },                /* symlink */
        { &vop_readdir_desc, genfs_badop },                /* readdir */
        { &vop_readlink_desc, genfs_badop },                /* readlink */
        { &vop_abortop_desc, genfs_badop },                /* abortop */
        { &vop_inactive_desc, mfs_inactive },                /* inactive */
        { &vop_reclaim_desc, mfs_reclaim },                /* reclaim */
        { &vop_lock_desc, genfs_lock },                        /* lock */
        { &vop_unlock_desc, genfs_unlock },                /* unlock */
        { &vop_bmap_desc, mfs_bmap },                        /* bmap */
        { &vop_strategy_desc, mfs_strategy },                /* strategy */
        { &vop_print_desc, mfs_print },                        /* print */
        { &vop_islocked_desc, genfs_islocked },                /* islocked */
        { &vop_pathconf_desc, genfs_badop },                /* pathconf */
        { &vop_advlock_desc, genfs_badop },                /* advlock */
        { &vop_bwrite_desc, vn_bwrite },                /* bwrite */
        { &vop_putpages_desc, genfs_null_putpages },        /* putpages */
        { NULL, NULL }
};
const struct vnodeopv_desc mfs_vnodeop_opv_desc =
        { &mfs_vnodeop_p, mfs_vnodeop_entries };

/*
 * Vnode Operations.
 *
 * Open called to allow memory filesystem to initialize and
 * validate before actual IO. Record our process identifier
 * so we can tell when we are doing I/O to ourself.
 */
/* ARGSUSED */
int
mfs_open(void *v)
{
        struct vop_open_args /* {
                struct vnode *a_vp;
                int  a_mode;
                kauth_cred_t a_cred;
        } */ *ap = v;

        if (ap->a_vp->v_type != VBLK) {
                panic("mfs_open not VBLK");
                /* NOTREACHED */
        }
        return (0);
}

/*
 * Pass I/O requests to the memory filesystem process.
 */
int
mfs_strategy(void *v)
{
        struct vop_strategy_args /* {
                struct vnode *a_vp;
                struct buf *a_bp;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct buf *bp = ap->a_bp;
        struct mfsnode *mfsp;

        if (vp->v_type != VBLK || vrefcnt(vp) == 0)
                panic("mfs_strategy: bad dev");
        mfsp = VTOMFS(vp);
        /* check for mini-root access */
        if (mfsp->mfs_proc == NULL) {
                void *base;

                base = (char *)mfsp->mfs_baseoff + (bp->b_blkno << DEV_BSHIFT);
                if (bp->b_flags & B_READ)
                        memcpy(bp->b_data, base, bp->b_bcount);
                else
                        memcpy(base, bp->b_data, bp->b_bcount);
                bp->b_resid = 0;
                biodone(bp);
        } else if (mfsp->mfs_proc == curproc) {
                mfs_doio(bp, mfsp->mfs_baseoff);
        } else if (doing_shutdown) {
                /*
                 * bitbucket I/O during shutdown.
                 * Note that reads should *not* happen here, but..
                 */
                if (bp->b_flags & B_READ)
                        printf("warning: mfs read during shutdown\n");
                bp->b_resid = 0;
                biodone(bp);
        } else {
                mutex_enter(&mfs_lock);
                bufq_put(mfsp->mfs_buflist, bp);
                cv_broadcast(&mfsp->mfs_cv);
                mutex_exit(&mfs_lock);
        }
        return (0);
}

/*
 * Memory file system I/O.
 */
void
mfs_doio(struct buf *bp, void *base)
{

        base = (char *)base + (bp->b_blkno << DEV_BSHIFT);
        if (bp->b_flags & B_READ)
                bp->b_error = copyin(base, bp->b_data, bp->b_bcount);
        else
                bp->b_error = copyout(bp->b_data, base, bp->b_bcount);
        if (bp->b_error == 0)
                bp->b_resid = 0;
        biodone(bp);
}

/*
 * This is a noop, simply returning what one has been given.
 */
int
mfs_bmap(void *v)
{
        struct vop_bmap_args /* {
                struct vnode *a_vp;
                daddr_t  a_bn;
                struct vnode **a_vpp;
                daddr_t *a_bnp;
                int *a_runp;
        } */ *ap = v;

        if (ap->a_vpp != NULL)
                *ap->a_vpp = ap->a_vp;
        if (ap->a_bnp != NULL)
                *ap->a_bnp = ap->a_bn;
        if (ap->a_runp != NULL)
                 *ap->a_runp = 0;
        return (0);
}

/*
 * Memory filesystem close routine
 */
/* ARGSUSED */
int
mfs_close(void *v)
{
        struct vop_close_args /* {
                struct vnode *a_vp;
                int  a_fflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct mfsnode *mfsp = VTOMFS(vp);
        struct buf *bp;
        int error;

        /*
         * Finish any pending I/O requests.
         */
        mutex_enter(&mfs_lock);
        while ((bp = bufq_get(mfsp->mfs_buflist)) != NULL) {
                mutex_exit(&mfs_lock);
                mfs_doio(bp, mfsp->mfs_baseoff);
                mutex_enter(&mfs_lock);
        }
        mutex_exit(&mfs_lock);
        /*
         * On last close of a memory filesystem
         * we must invalidate any in core blocks, so that
         * we can, free up its vnode.
         */
        if ((error = vinvalbuf(vp, V_SAVE, ap->a_cred, curlwp, 0, 0)) != 0)
                return (error);
        /*
         * There should be no way to have any more uses of this
         * vnode, so if we find any other uses, it is a panic.
         */
        if (bufq_peek(mfsp->mfs_buflist) != NULL)
                panic("mfs_close");
        /*
         * Send a request to the filesystem server to exit.
         */
        mutex_enter(&mfs_lock);
        mfsp->mfs_shutdown = 1;
        cv_broadcast(&mfsp->mfs_cv);
        mutex_exit(&mfs_lock);
        return (0);
}

/*
 * Memory filesystem inactive routine
 */
/* ARGSUSED */
int
mfs_inactive(void *v)
{
        struct vop_inactive_v2_args /* {
                struct vnode *a_vp;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct mfsnode *mfsp = VTOMFS(vp);

        if (bufq_peek(mfsp->mfs_buflist) != NULL)
                panic("mfs_inactive: not inactive (mfs_buflist %p)",
                        bufq_peek(mfsp->mfs_buflist));

        return VOCALL(spec_vnodeop_p,  VOFFSET(vop_inactive), ap);
}

/*
 * Reclaim a memory filesystem devvp so that it can be reused.
 */
int
mfs_reclaim(void *v)
{
        struct vop_reclaim_v2_args /* {
                struct vnode *a_vp;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct mfsnode *mfsp = VTOMFS(vp);
        int refcnt;

        mutex_enter(&mfs_lock);
        vp->v_data = NULL;
        refcnt = --mfsp->mfs_refcnt;
        mutex_exit(&mfs_lock);

        if (refcnt == 0) {
                bufq_free(mfsp->mfs_buflist);
                cv_destroy(&mfsp->mfs_cv);
                kmem_free(mfsp, sizeof(*mfsp));
        }

        return VOCALL(spec_vnodeop_p,  VOFFSET(vop_reclaim), ap);
}

/*
 * Print out the contents of an mfsnode.
 */
int
mfs_print(void *v)
{
        struct vop_print_args /* {
                struct vnode *a_vp;
        } */ *ap = v;
        struct mfsnode *mfsp = VTOMFS(ap->a_vp);

        printf("tag VT_MFS, pid %d, base %p, size %ld\n",
            (mfsp->mfs_proc != NULL) ? mfsp->mfs_proc->p_pid : 0,
            mfsp->mfs_baseoff, mfsp->mfs_size);
        return (0);
}




















































































































































    5 




    5 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
/*        $NetBSD: midi_pcppi.c,v 1.27 2019/05/08 13:40:18 isaki Exp $        */

/*
 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (augustss@NetBSD.org), and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: midi_pcppi.c,v 1.27 2019/05/08 13:40:18 isaki Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/errno.h>
#include <sys/device.h>
#include <sys/proc.h>
#include <sys/conf.h>
#include <sys/select.h>
#include <sys/audioio.h>
#include <sys/midiio.h>
#include <sys/tty.h>
#include <sys/bus.h>

#include <dev/isa/pcppivar.h>

#include <dev/audio/audio_if.h>
#include <dev/midi_if.h>
#include <dev/midivar.h>
#include <dev/midisynvar.h>

#define MAX_DURATION 30                /* turn off sound automagically after 30 s */

struct midi_pcppi_softc {
        struct midi_softc sc_mididev;
        midisyn sc_midisyn;
};

static int        midi_pcppi_match(device_t, cfdata_t , void *);
static void        midi_pcppi_attach(device_t, device_t, void *);
static int        midi_pcppi_detach(device_t, int);

static void        midi_pcppi_on   (midisyn *, uint_fast16_t, midipitch_t, int16_t);
static void        midi_pcppi_off  (midisyn *, uint_fast16_t, uint_fast8_t);
static void        midi_pcppi_close(midisyn *);
static void        midi_pcppi_repitchv(midisyn *, uint_fast16_t, midipitch_t);

CFATTACH_DECL3_NEW(midi_pcppi, sizeof(struct midi_pcppi_softc),
    midi_pcppi_match, midi_pcppi_attach, midi_pcppi_detach, NULL, NULL, NULL,
    DVF_DETACH_SHUTDOWN);

static struct midisyn_methods midi_pcppi_hw = {
        .close    = midi_pcppi_close,
        .attackv  = midi_pcppi_on,
        .releasev = midi_pcppi_off,
        .repitchv = midi_pcppi_repitchv,
};

int midi_pcppi_attached = 0;        /* Not very nice */

static int
midi_pcppi_match(device_t parent, cfdata_t match, void *aux)
{
        return (!midi_pcppi_attached);
}

static void
midi_pcppi_attach(device_t parent, device_t self, void *aux)
{
        struct midi_pcppi_softc *sc = device_private(self);
        struct pcppi_attach_args *pa = (struct pcppi_attach_args *)aux;
        midisyn *ms;

        midi_pcppi_attached++;

        ms = &sc->sc_midisyn;
        ms->mets = &midi_pcppi_hw;
        strcpy(ms->name, "PC speaker");
        ms->nvoice = 1;
        ms->data = pa->pa_cookie;
        ms->lock = &tty_lock;
        midisyn_init(ms);

        sc->sc_mididev.dev = self;
        sc->sc_mididev.hw_if = &midisyn_hw_if;
        sc->sc_mididev.hw_hdl = ms;
        midi_attach(&sc->sc_mididev);
}

static int
midi_pcppi_detach(device_t self, int flags)
{
        KASSERT(midi_pcppi_attached > 0);

        midi_pcppi_attached--;
        return mididetach(self, flags);
} 

static void
midi_pcppi_on(midisyn *ms,
    uint_fast16_t voice, midipitch_t mp, int16_t level)
{
        pcppi_tag_t t = ms->data;

        KASSERT(mutex_owned(&tty_lock));

        pcppi_bell_locked(t, MIDIHZ18_TO_HZ(MIDIPITCH_TO_HZ18(mp)),
            MAX_DURATION * hz, 0);
}

static void
midi_pcppi_off(midisyn *ms, uint_fast16_t voice, uint_fast8_t vel)
{
        pcppi_tag_t t = ms->data;

        KASSERT(mutex_owned(&tty_lock));

        /*printf("OFF %p %d\n", t, note >> 16);*/
        pcppi_bell_locked(t, 0, 0, 0);
}

static void
midi_pcppi_close(midisyn *ms)
{
        pcppi_tag_t t = ms->data;

        KASSERT(mutex_owned(&tty_lock));

        /* Make sure we are quiet. */
        pcppi_bell_locked(t, 0, 0, 0);
}

static void
midi_pcppi_repitchv(midisyn *ms, uint_fast16_t voice, midipitch_t newpitch)
{
        KASSERT(mutex_owned(&tty_lock));

        midi_pcppi_on(ms, voice, newpitch, 64);
}



























































































































































































































    5 





    5 


































































    1 

    1 













    1 
























































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
/*-
 * Copyright (c) 2009-2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This material is based upon work partially supported by The
 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * NPF tableset module.
 *
 * Notes
 *
 *        The tableset is an array of tables.  After the creation, the array
 *        is immutable.  The caller is responsible to synchronise the access
 *        to the tableset.
 *
 * Warning (not applicable for the userspace npfkern):
 *
 *        The thmap_put()/thmap_del() are not called from the interrupt
 *        context and are protected by an IPL_NET mutex(9), therefore they
 *        do not need SPL wrappers -- see the comment at the top of the
 *        npf_conndb.c source file.
 */

#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.38 2022/04/09 23:38:33 riastradh Exp $");

#include <sys/param.h>
#include <sys/types.h>

#include <sys/atomic.h>
#include <sys/cdbr.h>
#include <sys/kmem.h>
#include <sys/pool.h>
#include <sys/queue.h>
#include <sys/mutex.h>
#include <sys/thmap.h>

#include "lpm.h"
#endif

#include "npf_impl.h"

typedef struct npf_tblent {
        LIST_ENTRY(npf_tblent)        te_listent;
        uint16_t                te_preflen;
        uint16_t                te_alen;
        npf_addr_t                te_addr;
} npf_tblent_t;

#define        NPF_ADDRLEN2IDX(alen)        ((alen) >> 4)
#define        NPF_ADDR_SLOTS                (2)

struct npf_table {
        /*
         * The storage type can be: a) hashmap b) LPM c) cdb.
         * There are separate trees for IPv4 and IPv6.
         */
        union {
                struct {
                        thmap_t *        t_map;
                        LIST_HEAD(, npf_tblent) t_gc;
                };
                lpm_t *                        t_lpm;
                struct {
                        void *                t_blob;
                        size_t                t_bsize;
                        struct cdbr *        t_cdb;
                };
                struct {
                        npf_tblent_t **        t_elements[NPF_ADDR_SLOTS];
                        unsigned        t_allocated[NPF_ADDR_SLOTS];
                        unsigned        t_used[NPF_ADDR_SLOTS];
                };
        } /* C11 */;
        LIST_HEAD(, npf_tblent)                t_list;
        unsigned                        t_nitems;

        /*
         * Table ID, type and lock.  The ID may change during the
         * config reload, it is protected by the npf->config_lock.
         */
        int                        t_type;
        unsigned                t_id;
        kmutex_t                t_lock;

        /* Reference count and table name. */
        unsigned                t_refcnt;
        char                        t_name[NPF_TABLE_MAXNAMELEN];
};

struct npf_tableset {
        unsigned                ts_nitems;
        npf_table_t *                ts_map[];
};

#define        NPF_TABLESET_SIZE(n)        \
    (offsetof(npf_tableset_t, ts_map[n]) * sizeof(npf_table_t *))

#define        NPF_IFADDR_STEP                4

static pool_cache_t                tblent_cache        __read_mostly;

/*
 * npf_table_sysinit: initialise tableset structures.
 */
void
npf_tableset_sysinit(void)
{
        tblent_cache = pool_cache_init(sizeof(npf_tblent_t), 0,
            0, 0, "npftblpl", NULL, IPL_NONE, NULL, NULL, NULL);
}

void
npf_tableset_sysfini(void)
{
        pool_cache_destroy(tblent_cache);
}

npf_tableset_t *
npf_tableset_create(u_int nitems)
{
        npf_tableset_t *ts = kmem_zalloc(NPF_TABLESET_SIZE(nitems), KM_SLEEP);
        ts->ts_nitems = nitems;
        return ts;
}

void
npf_tableset_destroy(npf_tableset_t *ts)
{
        /*
         * Destroy all tables (no references should be held, since the
         * ruleset should be destroyed before).
         */
        for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
                npf_table_t *t = ts->ts_map[tid];

                if (t == NULL)
                        continue;
                membar_release();
                if (atomic_dec_uint_nv(&t->t_refcnt) > 0)
                        continue;
                membar_acquire();
                npf_table_destroy(t);
        }
        kmem_free(ts, NPF_TABLESET_SIZE(ts->ts_nitems));
}

/*
 * npf_tableset_insert: insert the table into the specified tableset.
 *
 * => Returns 0 on success.  Fails and returns error if ID is already used.
 */
int
npf_tableset_insert(npf_tableset_t *ts, npf_table_t *t)
{
        const u_int tid = t->t_id;
        int error;

        KASSERT((u_int)tid < ts->ts_nitems);

        if (ts->ts_map[tid] == NULL) {
                atomic_inc_uint(&t->t_refcnt);
                ts->ts_map[tid] = t;
                error = 0;
        } else {
                error = EEXIST;
        }
        return error;
}

npf_table_t *
npf_tableset_swap(npf_tableset_t *ts, npf_table_t *newt)
{
        const u_int tid = newt->t_id;
        npf_table_t *oldt = ts->ts_map[tid];

        KASSERT(tid < ts->ts_nitems);
        KASSERT(oldt->t_id == newt->t_id);

        newt->t_refcnt = oldt->t_refcnt;
        oldt->t_refcnt = 0;
        membar_producer();

        return atomic_swap_ptr(&ts->ts_map[tid], newt);
}

/*
 * npf_tableset_getbyname: look for a table in the set given the name.
 */
npf_table_t *
npf_tableset_getbyname(npf_tableset_t *ts, const char *name)
{
        npf_table_t *t;

        for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
                if ((t = ts->ts_map[tid]) == NULL)
                        continue;
                if (strcmp(name, t->t_name) == 0)
                        return t;
        }
        return NULL;
}

npf_table_t *
npf_tableset_getbyid(npf_tableset_t *ts, unsigned tid)
{
        if (__predict_true(tid < ts->ts_nitems)) {
                return atomic_load_relaxed(&ts->ts_map[tid]);
        }
        return NULL;
}

/*
 * npf_tableset_reload: iterate all tables and if the new table is of the
 * same type and has no items, then we preserve the old one and its entries.
 *
 * => The caller is responsible for providing synchronisation.
 */
void
npf_tableset_reload(npf_t *npf, npf_tableset_t *nts, npf_tableset_t *ots)
{
        for (u_int tid = 0; tid < nts->ts_nitems; tid++) {
                npf_table_t *t, *ot;

                if ((t = nts->ts_map[tid]) == NULL) {
                        continue;
                }

                /* If our table has entries, just load it. */
                if (t->t_nitems) {
                        continue;
                }

                /* Look for a currently existing table with such name. */
                ot = npf_tableset_getbyname(ots, t->t_name);
                if (ot == NULL) {
                        /* Not found: we have a new table. */
                        continue;
                }

                /* Found.  Did the type change? */
                if (t->t_type != ot->t_type) {
                        /* Yes, load the new. */
                        continue;
                }

                /*
                 * Preserve the current table.  Acquire a reference since
                 * we are keeping it in the old table set.  Update its ID.
                 */
                atomic_inc_uint(&ot->t_refcnt);
                nts->ts_map[tid] = ot;

                KASSERT(npf_config_locked_p(npf));
                ot->t_id = tid;

                /* Destroy the new table (we hold the only reference). */
                t->t_refcnt--;
                npf_table_destroy(t);
        }
}

int
npf_tableset_export(npf_t *npf, const npf_tableset_t *ts, nvlist_t *nvl)
{
        const npf_table_t *t;

        KASSERT(npf_config_locked_p(npf));

        for (u_int tid = 0; tid < ts->ts_nitems; tid++) {
                nvlist_t *table;

                if ((t = ts->ts_map[tid]) == NULL) {
                        continue;
                }
                table = nvlist_create(0);
                nvlist_add_string(table, "name", t->t_name);
                nvlist_add_number(table, "type", t->t_type);
                nvlist_add_number(table, "id", tid);

                nvlist_append_nvlist_array(nvl, "tables", table);
                nvlist_destroy(table);
        }
        return 0;
}

/*
 * Few helper routines.
 */

static void
table_ipset_flush(npf_table_t *t)
{
        npf_tblent_t *ent;

        while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
                thmap_del(t->t_map, &ent->te_addr, ent->te_alen);
                LIST_REMOVE(ent, te_listent);
                pool_cache_put(tblent_cache, ent);
        }
        t->t_nitems = 0;
}

static void
table_tree_flush(npf_table_t *t)
{
        npf_tblent_t *ent;

        while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
                LIST_REMOVE(ent, te_listent);
                pool_cache_put(tblent_cache, ent);
        }
        lpm_clear(t->t_lpm, NULL, NULL);
        t->t_nitems = 0;
}

static void
table_ifaddr_flush(npf_table_t *t)
{
        npf_tblent_t *ent;

        for (unsigned i = 0; i < NPF_ADDR_SLOTS; i++) {
                size_t len;

                if (!t->t_allocated[i]) {
                        KASSERT(t->t_elements[i] == NULL);
                        continue;
                }
                len = t->t_allocated[i] * sizeof(npf_tblent_t *);
                kmem_free(t->t_elements[i], len);
                t->t_elements[i] = NULL;
                t->t_allocated[i] = 0;
                t->t_used[i] = 0;
        }
        while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
                LIST_REMOVE(ent, te_listent);
                pool_cache_put(tblent_cache, ent);
        }
        t->t_nitems = 0;
}

/*
 * npf_table_create: create table with a specified ID.
 */
npf_table_t *
npf_table_create(const char *name, u_int tid, int type,
    const void *blob, size_t size)
{
        npf_table_t *t;

        t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
        strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN);

        switch (type) {
        case NPF_TABLE_LPM:
                t->t_lpm = lpm_create(KM_NOSLEEP);
                if (t->t_lpm == NULL) {
                        goto out;
                }
                LIST_INIT(&t->t_list);
                break;
        case NPF_TABLE_IPSET:
                t->t_map = thmap_create(0, NULL, THMAP_NOCOPY);
                if (t->t_map == NULL) {
                        goto out;
                }
                break;
        case NPF_TABLE_CONST:
                t->t_blob = kmem_alloc(size, KM_SLEEP);
                if (t->t_blob == NULL) {
                        goto out;
                }
                memcpy(t->t_blob, blob, size);
                t->t_bsize = size;

                t->t_cdb = cdbr_open_mem(t->t_blob, size,
                    CDBR_DEFAULT, NULL, NULL);
                if (t->t_cdb == NULL) {
                        kmem_free(t->t_blob, t->t_bsize);
                        goto out;
                }
                t->t_nitems = cdbr_entries(t->t_cdb);
                break;
        case NPF_TABLE_IFADDR:
                break;
        default:
                KASSERT(false);
        }
        mutex_init(&t->t_lock, MUTEX_DEFAULT, IPL_NET);
        t->t_type = type;
        t->t_id = tid;
        return t;
out:
        kmem_free(t, sizeof(npf_table_t));
        return NULL;
}

/*
 * npf_table_destroy: free all table entries and table itself.
 */
void
npf_table_destroy(npf_table_t *t)
{
        KASSERT(t->t_refcnt == 0);

        switch (t->t_type) {
        case NPF_TABLE_IPSET:
                table_ipset_flush(t);
                npf_table_gc(NULL, t);
                thmap_destroy(t->t_map);
                break;
        case NPF_TABLE_LPM:
                table_tree_flush(t);
                lpm_destroy(t->t_lpm);
                break;
        case NPF_TABLE_CONST:
                cdbr_close(t->t_cdb);
                kmem_free(t->t_blob, t->t_bsize);
                break;
        case NPF_TABLE_IFADDR:
                table_ifaddr_flush(t);
                break;
        default:
                KASSERT(false);
        }
        mutex_destroy(&t->t_lock);
        kmem_free(t, sizeof(npf_table_t));
}

u_int
npf_table_getid(npf_table_t *t)
{
        return t->t_id;
}

/*
 * npf_table_check: validate the name, ID and type.
 */
int
npf_table_check(npf_tableset_t *ts, const char *name, uint64_t tid,
    uint64_t type, bool replacing)
{
        const npf_table_t *t;

        if (tid >= ts->ts_nitems) {
                return EINVAL;
        }
        if (!replacing && ts->ts_map[tid] != NULL) {
                return EEXIST;
        }
        switch (type) {
        case NPF_TABLE_LPM:
        case NPF_TABLE_IPSET:
        case NPF_TABLE_CONST:
        case NPF_TABLE_IFADDR:
                break;
        default:
                return EINVAL;
        }
        if (strlen(name) >= NPF_TABLE_MAXNAMELEN) {
                return ENAMETOOLONG;
        }
        if ((t = npf_tableset_getbyname(ts, name)) != NULL) {
                if (!replacing || t->t_id != tid) {
                        return EEXIST;
                }
        }
        return 0;
}

static int
table_ifaddr_insert(npf_table_t *t, const int alen, npf_tblent_t *ent)
{
        const unsigned aidx = NPF_ADDRLEN2IDX(alen);
        const unsigned allocated = t->t_allocated[aidx];
        const unsigned used = t->t_used[aidx];

        /*
         * No need to check for duplicates.
         */
        if (allocated <= used) {
                npf_tblent_t **old_elements = t->t_elements[aidx];
                npf_tblent_t **elements;
                size_t toalloc, newsize;

                toalloc = roundup2(allocated + 1, NPF_IFADDR_STEP);
                newsize = toalloc * sizeof(npf_tblent_t *);

                elements = kmem_zalloc(newsize, KM_NOSLEEP);
                if (elements == NULL) {
                        return ENOMEM;
                }
                for (unsigned i = 0; i < used; i++) {
                        elements[i] = old_elements[i];
                }
                if (allocated) {
                        const size_t len = allocated * sizeof(npf_tblent_t *);
                        KASSERT(old_elements != NULL);
                        kmem_free(old_elements, len);
                }
                t->t_elements[aidx] = elements;
                t->t_allocated[aidx] = toalloc;
        }
        t->t_elements[aidx][used] = ent;
        t->t_used[aidx]++;
        return 0;
}

/*
 * npf_table_insert: add an IP CIDR entry into the table.
 */
int
npf_table_insert(npf_table_t *t, const int alen,
    const npf_addr_t *addr, const npf_netmask_t mask)
{
        npf_tblent_t *ent;
        int error;

        error = npf_netmask_check(alen, mask);
        if (error) {
                return error;
        }
        ent = pool_cache_get(tblent_cache, PR_WAITOK);
        memcpy(&ent->te_addr, addr, alen);
        ent->te_alen = alen;
        ent->te_preflen = 0;

        /*
         * Insert the entry.  Return an error on duplicate.
         */
        mutex_enter(&t->t_lock);
        switch (t->t_type) {
        case NPF_TABLE_IPSET:
                /*
                 * Hashmap supports only IPs.
                 *
                 * Note: the key must be already persistent, since we
                 * use THMAP_NOCOPY.
                 */
                if (mask != NPF_NO_NETMASK) {
                        error = EINVAL;
                        break;
                }
                if (thmap_put(t->t_map, &ent->te_addr, alen, ent) == ent) {
                        LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
                        t->t_nitems++;
                } else {
                        error = EEXIST;
                }
                break;
        case NPF_TABLE_LPM: {
                const unsigned preflen =
                    (mask == NPF_NO_NETMASK) ? (alen * 8) : mask;
                ent->te_preflen = preflen;

                if (lpm_lookup(t->t_lpm, addr, alen) == NULL &&
                    lpm_insert(t->t_lpm, addr, alen, preflen, ent) == 0) {
                        LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
                        t->t_nitems++;
                        error = 0;
                } else {
                        error = EEXIST;
                }
                break;
        }
        case NPF_TABLE_CONST:
                error = EINVAL;
                break;
        case NPF_TABLE_IFADDR:
                if ((error = table_ifaddr_insert(t, alen, ent)) != 0) {
                        break;
                }
                LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
                t->t_nitems++;
                break;
        default:
                KASSERT(false);
        }
        mutex_exit(&t->t_lock);

        if (error) {
                pool_cache_put(tblent_cache, ent);
        }
        return error;
}

/*
 * npf_table_remove: remove the IP CIDR entry from the table.
 */
int
npf_table_remove(npf_table_t *t, const int alen,
    const npf_addr_t *addr, const npf_netmask_t mask)
{
        npf_tblent_t *ent = NULL;
        int error;

        error = npf_netmask_check(alen, mask);
        if (error) {
                return error;
        }

        mutex_enter(&t->t_lock);
        switch (t->t_type) {
        case NPF_TABLE_IPSET:
                ent = thmap_del(t->t_map, addr, alen);
                if (__predict_true(ent != NULL)) {
                        LIST_REMOVE(ent, te_listent);
                        LIST_INSERT_HEAD(&t->t_gc, ent, te_listent);
                        ent = NULL; // to be G/C'ed
                        t->t_nitems--;
                } else {
                        error = ENOENT;
                }
                break;
        case NPF_TABLE_LPM:
                ent = lpm_lookup(t->t_lpm, addr, alen);
                if (__predict_true(ent != NULL)) {
                        LIST_REMOVE(ent, te_listent);
                        lpm_remove(t->t_lpm, &ent->te_addr,
                            ent->te_alen, ent->te_preflen);
                        t->t_nitems--;
                } else {
                        error = ENOENT;
                }
                break;
        case NPF_TABLE_CONST:
        case NPF_TABLE_IFADDR:
                error = EINVAL;
                break;
        default:
                KASSERT(false);
                ent = NULL;
        }
        mutex_exit(&t->t_lock);

        if (ent) {
                pool_cache_put(tblent_cache, ent);
        }
        return error;
}

/*
 * npf_table_lookup: find the table according to ID, lookup and match
 * the contents with the specified IP address.
 */
int
npf_table_lookup(npf_table_t *t, const int alen, const npf_addr_t *addr)
{
        const void *data;
        size_t dlen;
        bool found;
        int error;

        error = npf_netmask_check(alen, NPF_NO_NETMASK);
        if (error) {
                return error;
        }

        switch (t->t_type) {
        case NPF_TABLE_IPSET:
                /* Note: the caller is in the npf_config_read_enter(). */
                found = thmap_get(t->t_map, addr, alen) != NULL;
                break;
        case NPF_TABLE_LPM:
                mutex_enter(&t->t_lock);
                found = lpm_lookup(t->t_lpm, addr, alen) != NULL;
                mutex_exit(&t->t_lock);
                break;
        case NPF_TABLE_CONST:
                if (cdbr_find(t->t_cdb, addr, alen, &data, &dlen) == 0) {
                        found = dlen == (unsigned)alen &&
                            memcmp(addr, data, dlen) == 0;
                } else {
                        found = false;
                }
                break;
        case NPF_TABLE_IFADDR: {
                const unsigned aidx = NPF_ADDRLEN2IDX(alen);

                found = false;
                for (unsigned i = 0; i < t->t_used[aidx]; i++) {
                        const npf_tblent_t *elm = t->t_elements[aidx][i];

                        KASSERT(elm->te_alen == alen);

                        if (memcmp(&elm->te_addr, addr, alen) == 0) {
                                found = true;
                                break;
                        }
                }
                break;
        }
        default:
                KASSERT(false);
                found = false;
        }

        return found ? 0 : ENOENT;
}

npf_addr_t *
npf_table_getsome(npf_table_t *t, const int alen, unsigned idx)
{
        const unsigned aidx = NPF_ADDRLEN2IDX(alen);
        npf_tblent_t *elm;
        unsigned nitems;

        KASSERT(t->t_type == NPF_TABLE_IFADDR);
        KASSERT(aidx < NPF_ADDR_SLOTS);

        nitems = t->t_used[aidx];
        if (nitems == 0) {
                return NULL;
        }

        /*
         * No need to acquire the lock, since the table is immutable.
         */
        elm = t->t_elements[aidx][idx % nitems];
        return &elm->te_addr;
}

static int
table_ent_copyout(const npf_addr_t *addr, const int alen, npf_netmask_t mask,
    void *ubuf, size_t len, size_t *off)
{
        void *ubufp = (uint8_t *)ubuf + *off;
        npf_ioctl_ent_t uent;

        if ((*off += sizeof(npf_ioctl_ent_t)) > len) {
                return ENOMEM;
        }
        uent.alen = alen;
        memcpy(&uent.addr, addr, sizeof(npf_addr_t));
        uent.mask = mask;

        return copyout(&uent, ubufp, sizeof(npf_ioctl_ent_t));
}

static int
table_generic_list(const npf_table_t *t, void *ubuf, size_t len)
{
        npf_tblent_t *ent;
        size_t off = 0;
        int error = 0;

        LIST_FOREACH(ent, &t->t_list, te_listent) {
                error = table_ent_copyout(&ent->te_addr,
                    ent->te_alen, ent->te_preflen, ubuf, len, &off);
                if (error)
                        break;
        }
        return error;
}

static int
table_cdb_list(npf_table_t *t, void *ubuf, size_t len)
{
        size_t off = 0, dlen;
        const void *data;
        int error = 0;

        for (size_t i = 0; i < t->t_nitems; i++) {
                if (cdbr_get(t->t_cdb, i, &data, &dlen) != 0) {
                        return EINVAL;
                }
                error = table_ent_copyout(data, dlen, 0, ubuf, len, &off);
                if (error)
                        break;
        }
        return error;
}

/*
 * npf_table_list: copy a list of all table entries into a userspace buffer.
 */
int
npf_table_list(npf_table_t *t, void *ubuf, size_t len)
{
        int error = 0;

        mutex_enter(&t->t_lock);
        switch (t->t_type) {
        case NPF_TABLE_IPSET:
                error = table_generic_list(t, ubuf, len);
                break;
        case NPF_TABLE_LPM:
                error = table_generic_list(t, ubuf, len);
                break;
        case NPF_TABLE_CONST:
                error = table_cdb_list(t, ubuf, len);
                break;
        case NPF_TABLE_IFADDR:
                error = table_generic_list(t, ubuf, len);
                break;
        default:
                KASSERT(false);
        }
        mutex_exit(&t->t_lock);

        return error;
}

/*
 * npf_table_flush: remove all table entries.
 */
int
npf_table_flush(npf_table_t *t)
{
        int error = 0;

        mutex_enter(&t->t_lock);
        switch (t->t_type) {
        case NPF_TABLE_IPSET:
                table_ipset_flush(t);
                break;
        case NPF_TABLE_LPM:
                table_tree_flush(t);
                break;
        case NPF_TABLE_CONST:
        case NPF_TABLE_IFADDR:
                error = EINVAL;
                break;
        default:
                KASSERT(false);
        }
        mutex_exit(&t->t_lock);
        return error;
}

void
npf_table_gc(npf_t *npf, npf_table_t *t)
{
        npf_tblent_t *ent;
        void *ref;

        if (t->t_type != NPF_TABLE_IPSET || LIST_EMPTY(&t->t_gc)) {
                return;
        }

        ref = thmap_stage_gc(t->t_map);
        if (npf) {
                npf_config_locked_p(npf);
                npf_config_sync(npf);
        }
        thmap_gc(t->t_map, ref);

        while ((ent = LIST_FIRST(&t->t_gc)) != NULL) {
                LIST_REMOVE(ent, te_listent);
                pool_cache_put(tblent_cache, ent);
        }
}








































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 
    2 






    2 
    2 






    2 






























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
6859
6860
6861
6862
6863
6864
6865
6866
6867
6868
6869
6870
6871
6872
6873
6874
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897
6898
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6963
6964
6965
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987
6988
6989
6990
6991
6992
6993
6994
6995
6996
6997
6998
6999
7000
7001
7002
7003
7004
7005
7006
7007
7008
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052
7053
7054
7055
7056
7057
7058
7059
7060
7061
7062
7063
7064
7065
7066
7067
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088
7089
7090
7091
7092
7093
7094
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114
7115
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140
7141
7142
7143
7144
7145
7146
7147
7148
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181
7182
7183
7184
7185
7186
7187
7188
7189
7190
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217
7218
7219
7220
7221
7222
7223
7224
7225
7226
7227
7228
7229
7230
7231
7232
7233
7234
7235
7236
7237
7238
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293
7294
7295
7296
7297
7298
7299
7300
7301
7302
7303
7304
7305
7306
7307
7308
7309
7310
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347
7348
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364
7365
7366
7367
7368
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7400
7401
7402
7403
7404
7405
7406
7407
7408
7409
7410
7411
7412
7413
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429
7430
7431
7432
7433
7434
7435
7436
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447
7448
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458
7459
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470
7471
7472
7473
7474
7475
7476
7477
7478
7479
7480
7481
7482
7483
7484
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495
7496
7497
7498
7499
7500
7501
7502
7503
7504
7505
7506
7507
7508
7509
7510
7511
7512
7513
7514
7515
7516
7517
7518
7519
7520
7521
7522
7523
7524
7525
7526
7527
7528
7529
7530
7531
7532
7533
7534
7535
7536
7537
7538
7539
7540
7541
7542
7543
7544
7545
7546
7547
7548
7549
7550
7551
7552
7553
7554
7555
7556
7557
7558
7559
7560
7561
7562
7563
7564
7565
7566
7567
7568
7569
7570
7571
7572
7573
7574
7575
7576
7577
7578
7579
7580
7581
7582
7583
7584
7585
7586
7587
7588
7589
7590
7591
7592
7593
7594
7595
7596
7597
7598
7599
7600
7601
7602
7603
7604
7605
7606
7607
7608
7609
7610
7611
7612
7613
7614
7615
7616
7617
7618
7619
7620
7621
7622
7623
7624
7625
7626
7627
7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651
7652
7653
7654
7655
7656
7657
7658
7659
7660
7661
7662
7663
7664
7665
7666
7667
7668
7669
7670
7671
7672
7673
7674
7675
7676
7677
7678
7679
7680
7681
7682
7683
7684
7685
7686
7687
7688
7689
7690
7691
7692
7693
7694
7695
7696
7697
7698
7699
7700
7701
7702
7703
7704
7705
7706
7707
7708
7709
7710
7711
7712
7713
7714
7715
7716
7717
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727
7728
7729
7730
7731
7732
7733
7734
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744
7745
7746
7747
7748
7749
7750
7751
7752
7753
7754
7755
7756
7757
7758
7759
7760
7761
7762
7763
7764
7765
7766
7767
7768
7769
7770
7771
7772
7773
7774
7775
7776
7777
7778
7779
7780
7781
7782
7783
7784
7785
7786
7787
7788
7789
7790
7791
7792
7793
7794
7795
7796
7797
7798
7799
7800
7801
7802
7803
7804
7805
7806
7807
7808
7809
7810
7811
7812
7813
7814
7815
7816
7817
7818
7819
7820
7821
7822
7823
7824
7825
7826
7827
7828
7829
7830
7831
7832
7833
7834
7835
7836
7837
7838
7839
7840
7841
7842
7843
7844
7845
7846
7847
7848
7849
7850
7851
7852
7853
7854
7855
7856
7857
7858
7859
7860
7861
7862
7863
7864
7865
7866
7867
7868
7869
7870
7871
7872
7873
7874
7875
7876
7877
7878
7879
7880
7881
7882
7883
7884
7885
7886
7887
7888
7889
7890
7891
7892
7893
7894
7895
7896
7897
7898
7899
7900
7901
7902
7903
7904
7905
7906
7907
7908
7909
7910
7911
7912
7913
7914
7915
7916
7917
7918
7919
7920
7921
7922
7923
7924
7925
7926
7927
7928
7929
7930
7931
7932
7933
7934
7935
7936
7937
7938
7939
7940
7941
7942
7943
7944
7945
7946
7947
7948
7949
7950
7951
7952
7953
7954
7955
7956
7957
7958
7959
7960
7961
7962
7963
7964
7965
7966
7967
7968
7969
7970
7971
7972
7973
7974
7975
7976
7977
7978
7979
7980
7981
7982
7983
7984
7985
7986
7987
7988
7989
7990
7991
7992
7993
7994
7995
7996
7997
7998
7999
8000
8001
8002
8003
8004
8005
8006
8007
8008
8009
8010
8011
8012
8013
8014
8015
8016
8017
8018
8019
8020
8021
8022
8023
8024
8025
8026
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8040
8041
8042
8043
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8059
8060
8061
8062
8063
8064
8065
8066
8067
8068
8069
8070
8071
8072
8073
8074
8075
8076
8077
8078
8079
8080
8081
8082
8083
8084
8085
8086
8087
8088
8089
8090
8091
8092
8093
8094
8095
8096
8097
8098
8099
8100
8101
8102
8103
8104
8105
8106
8107
8108
8109
8110
8111
8112
8113
8114
8115
8116
8117
8118
8119
8120
8121
8122
8123
8124
8125
8126
8127
8128
8129
8130
8131
8132
8133
8134
8135
8136
8137
8138
8139
8140
8141
8142
8143
8144
8145
8146
8147
8148
8149
8150
8151
8152
8153
8154
8155
8156
8157
8158
8159
8160
8161
8162
8163
8164
8165
8166
8167
8168
8169
8170
8171
8172
8173
8174
8175
8176
8177
8178
8179
8180
8181
8182
8183
8184
8185
8186
8187
8188
8189
8190
8191
8192
8193
8194
8195
8196
8197
8198
8199
8200
8201
8202
8203
8204
8205
8206
8207
8208
8209
8210
8211
8212
8213
8214
8215
8216
8217
8218
8219
8220
8221
8222
8223
8224
8225
8226
8227
8228
8229
8230
8231
8232
8233
8234
8235
8236
8237
8238
8239
8240
8241
8242
8243
8244
8245
8246
8247
8248
8249
8250
8251
8252
8253
8254
8255
8256
8257
8258
8259
8260
8261
8262
8263
8264
8265
8266
8267
8268
8269
8270
8271
8272
8273
8274
8275
8276
8277
8278
8279
8280
8281
8282
8283
8284
8285
8286
8287
8288
8289
8290
8291
8292
8293
8294
8295
8296
8297
8298
8299
8300
8301
8302
8303
8304
8305
8306
8307
8308
8309
8310
8311
8312
8313
8314
8315
8316
8317
8318
8319
8320
8321
8322
8323
8324
8325
8326
8327
8328
8329
8330
8331
8332
8333
8334
8335
8336
8337
8338
8339
8340
8341
8342
8343
8344
8345
8346
8347
8348
8349
8350
8351
8352
8353
8354
8355
8356
8357
8358
8359
8360
8361
8362
8363
8364
8365
8366
8367
8368
8369
8370
8371
8372
8373
8374
8375
8376
8377
8378
8379
8380
8381
8382
8383
8384
8385
8386
8387
8388
8389
8390
8391
8392
8393
8394
8395
8396
8397
8398
8399
8400
8401
8402
8403
8404
8405
8406
8407
8408
8409
8410
8411
8412
8413
8414
8415
8416
8417
8418
8419
8420
8421
8422
8423
8424
8425
8426
8427
8428
8429
8430
8431
8432
8433
8434
8435
8436
8437
8438
8439
8440
8441
8442
8443
8444
8445
8446
8447
8448
8449
8450
8451
8452
8453
8454
8455
8456
8457
8458
8459
8460
8461
8462
8463
8464
8465
8466
8467
8468
8469
8470
8471
8472
8473
8474
8475
8476
8477
8478
8479
8480
8481
8482
8483
8484
8485
8486
8487
8488
8489
8490
8491
8492
8493
8494
8495
8496
8497
8498
8499
8500
8501
8502
8503
8504
8505
8506
8507
8508
8509
8510
8511
8512
8513
8514
8515
8516
8517
8518
8519
8520
8521
8522
8523
8524
8525
8526
8527
8528
8529
8530
8531
8532
8533
8534
8535
8536
8537
8538
8539
8540
8541
8542
8543
8544
8545
8546
8547
8548
8549
8550
8551
8552
8553
8554
8555
8556
8557
8558
8559
8560
8561
8562
8563
8564
8565
8566
8567
8568
8569
8570
8571
8572
8573
8574
8575
8576
8577
8578
8579
8580
8581
8582
8583
8584
8585
8586
8587
8588
8589
8590
8591
8592
8593
8594
8595
8596
8597
8598
8599
8600
8601
8602
8603
8604
8605
8606
8607
8608
8609
8610
8611
8612
8613
8614
8615
8616
8617
8618
8619
8620
8621
8622
8623
8624
8625
8626
8627
8628
8629
8630
8631
8632
8633
8634
8635
8636
8637
8638
8639
8640
8641
8642
8643
8644
8645
8646
8647
8648
8649
8650
8651
8652
8653
8654
8655
8656
8657
8658
8659
8660
8661
8662
8663
8664
8665
8666
8667
8668
8669
8670
8671
8672
8673
8674
8675
8676
8677
8678
8679
8680
8681
8682
8683
8684
8685
8686
8687
8688
8689
8690
8691
8692
8693
8694
8695
8696
8697
8698
8699
8700
8701
8702
8703
8704
8705
8706
8707
8708
8709
8710
8711
8712
8713
8714
8715
8716
8717
8718
8719
8720
8721
8722
8723
8724
8725
8726
8727
8728
8729
8730
8731
8732
8733
8734
8735
8736
8737
8738
8739
8740
8741
8742
8743
8744
8745
8746
8747
8748
8749
8750
8751
8752
8753
8754
8755
8756
8757
8758
8759
8760
8761
8762
8763
8764
8765
8766
8767
8768
8769
8770
8771
8772
8773
8774
8775
8776
8777
8778
8779
8780
8781
8782
8783
8784
8785
8786
8787
8788
8789
8790
8791
8792
8793
8794
8795
8796
8797
8798
8799
8800
8801
8802
8803
8804
8805
8806
8807
8808
8809
8810
8811
8812
8813
8814
8815
8816
8817
8818
8819
8820
8821
8822
8823
8824
8825
8826
8827
8828
8829
8830
8831
8832
8833
8834
8835
8836
8837
8838
8839
8840
8841
8842
8843
8844
8845
8846
8847
8848
8849
8850
8851
8852
8853
8854
8855
8856
8857
8858
8859
8860
8861
8862
8863
8864
8865
8866
8867
8868
8869
8870
8871
8872
8873
8874
8875
8876
8877
8878
8879
8880
8881
8882
8883
8884
8885
8886
8887
8888
8889
8890
8891
8892
8893
8894
8895
8896
8897
8898
8899
8900
8901
8902
8903
8904
8905
8906
8907
8908
8909
8910
8911
8912
8913
8914
8915
8916
8917
8918
8919
8920
8921
8922
8923
8924
8925
8926
8927
8928
8929
8930
8931
8932
8933
8934
8935
8936
8937
8938
8939
8940
8941
8942
8943
8944
8945
8946
8947
8948
8949
8950
8951
8952
8953
8954
8955
8956
8957
8958
8959
8960
8961
8962
8963
8964
8965
8966
8967
8968
8969
8970
8971
8972
8973
8974
8975
8976
8977
8978
8979
8980
8981
8982
8983
8984
8985
8986
8987
8988
8989
8990
8991
8992
8993
8994
8995
8996
8997
8998
8999
9000
9001
9002
9003
9004
9005
9006
9007
9008
9009
9010
9011
9012
9013
9014
9015
9016
9017
9018
9019
9020
9021
9022
9023
9024
9025
9026
9027
9028
9029
9030
9031
9032
9033
9034
9035
9036
9037
9038
9039
9040
9041
9042
9043
9044
9045
9046
9047
9048
9049
9050
9051
9052
9053
9054
9055
9056
9057
9058
9059
9060
9061
9062
9063
9064
9065
9066
9067
9068
9069
9070
9071
9072
9073
9074
9075
9076
9077
9078
/*        $NetBSD: key.c,v 1.276 2022/08/09 08:03:22 knakahara Exp $        */
/*        $FreeBSD: key.c,v 1.3.2.3 2004/02/14 22:23:23 bms Exp $        */
/*        $KAME: key.c,v 1.191 2001/06/27 10:46:49 sakane Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: key.c,v 1.276 2022/08/09 08:03:22 knakahara Exp $");

/*
 * This code is referred to RFC 2367
 */

#if defined(_KERNEL_OPT)
#include "opt_inet.h"
#include "opt_ipsec.h"
#include "opt_gateway.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/errno.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/syslog.h>
#include <sys/once.h>
#include <sys/cprng.h>
#include <sys/psref.h>
#include <sys/lwp.h>
#include <sys/workqueue.h>
#include <sys/kmem.h>
#include <sys/cpu.h>
#include <sys/atomic.h>
#include <sys/pslist.h>
#include <sys/mutex.h>
#include <sys/condvar.h>
#include <sys/localcount.h>
#include <sys/pserialize.h>
#include <sys/hash.h>
#include <sys/xcall.h>

#include <net/if.h>
#include <net/route.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_var.h>
#ifdef INET
#include <netinet/ip_var.h>
#endif

#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/in6_var.h>
#include <netinet6/ip6_var.h>
#endif /* INET6 */

#ifdef INET
#include <netinet/in_pcb.h>
#endif
#ifdef INET6
#include <netinet6/in6_pcb.h>
#endif /* INET6 */

#include <net/pfkeyv2.h>
#include <netipsec/keydb.h>
#include <netipsec/key.h>
#include <netipsec/keysock.h>
#include <netipsec/key_debug.h>

#include <netipsec/ipsec.h>
#ifdef INET6
#include <netipsec/ipsec6.h>
#endif
#include <netipsec/ipsec_private.h>

#include <netipsec/xform.h>
#include <netipsec/ipcomp.h>

#define FULLMASK        0xffu
#define        _BITS(bytes)        ((bytes) << 3)

#define PORT_NONE        0
#define PORT_LOOSE        1
#define PORT_STRICT        2

#ifndef SAHHASH_NHASH
#define SAHHASH_NHASH                128
#endif

#ifndef SAVLUT_NHASH
#define SAVLUT_NHASH                128
#endif

percpu_t *pfkeystat_percpu;

/*
 * Note on SA reference counting:
 * - SAs that are not in DEAD state will have (total external reference + 1)
 *   following value in reference count field.  they cannot be freed and are
 *   referenced from SA header.
 * - SAs that are in DEAD state will have (total external reference)
 *   in reference count field.  they are ready to be freed.  reference from
 *   SA header will be removed in key_delsav(), when the reference count
 *   field hits 0 (= no external reference other than from SA header.
 */

u_int32_t key_debug_level = 0;
static u_int key_spi_trycnt = 1000;
static u_int32_t key_spi_minval = 0x100;
static u_int32_t key_spi_maxval = 0x0fffffff;        /* XXX */
static u_int32_t policy_id = 0;
static u_int key_int_random = 60;        /*interval to initialize randseed,1(m)*/
static u_int key_larval_lifetime = 30;        /* interval to expire acquiring, 30(s)*/
static int key_blockacq_count = 10;        /* counter for blocking SADB_ACQUIRE.*/
static int key_blockacq_lifetime = 20;        /* lifetime for blocking SADB_ACQUIRE.*/
static int key_prefered_oldsa = 0;        /* prefered old sa rather than new sa.*/

static u_int32_t acq_seq = 0;

/*
 * Locking order: there is no order for now; it means that any locks aren't
 * overlapped.
 */
/*
 * Locking notes on SPD:
 * - Modifications to the key_spd.splist must be done with holding key_spd.lock
 *   which is a adaptive mutex
 * - Read accesses to the key_spd.splist must be in pserialize(9) read sections
 * - SP's lifetime is managed by localcount(9)
 * - An SP that has been inserted to the key_spd.splist is initially referenced
 *   by none, i.e., a reference from the key_spd.splist isn't counted
 * - When an SP is being destroyed, we change its state as DEAD, wait for
 *   references to the SP to be released, and then deallocate the SP
 *   (see key_unlink_sp)
 * - Getting an SP
 *   - Normally we get an SP from the key_spd.splist (see key_lookup_sp_byspidx)
 *     - Must iterate the list and increment the reference count of a found SP
 *       (by key_sp_ref) in a pserialize read section
 *   - We can gain another reference from a held SP only if we check its state
 *     and take its reference in a pserialize read section
 *     (see esp_output for example)
 *   - We may get an SP from an SP cache. See below
 *   - A gotten SP must be released after use by KEY_SP_UNREF (key_sp_unref)
 * - Updating member variables of an SP
 *   - Most member variables of an SP are immutable
 *   - Only sp->state and sp->lastused can be changed
 *   - sp->state of an SP is updated only when destroying it under key_spd.lock
 * - SP caches
 *   - SPs can be cached in PCBs
 *   - The lifetime of the caches is controlled by the global generation counter
 *     (ipsec_spdgen)
 *   - The global counter value is stored when an SP is cached
 *   - If the stored value is different from the global counter then the cache
 *     is considered invalidated
 *   - The counter is incremented when an SP is being destroyed
 *   - So checking the generation and taking a reference to an SP should be
 *     in a pserialize read section
 *   - Note that caching doesn't increment the reference counter of an SP
 * - SPs in sockets
 *   - Userland programs can set a policy to a socket by
 *     setsockopt(IP_IPSEC_POLICY)
 *   - Such policies (SPs) are set to a socket (PCB) and also inserted to
 *     the key_spd.socksplist list (not the key_spd.splist)
 *   - Such a policy is destroyed when a corresponding socket is destroed,
 *     however, a socket can be destroyed in softint so we cannot destroy
 *     it directly instead we just mark it DEAD and delay the destruction
 *     until GC by the timer
 * - SP origin
 *   - SPs can be created by both userland programs and kernel components.
 *     The SPs created in kernel must not be removed by userland programs,
 *     although the SPs can be read by userland programs.
 */
/*
 * Locking notes on SAD:
 * - Data structures
 *   - SAs are managed by the list called key_sad.sahlists and sav lists of
 *     sah entries
 *     - An sav is supposed to be an SA from a viewpoint of users
 *   - A sah has sav lists for each SA state
 *   - Multiple saves with the same saidx can exist
 *     - Only one entry has MATURE state and others should be DEAD
 *     - DEAD entries are just ignored from searching
 *   - All sav whose state is MATURE or DYING are registered to the lookup
 *     table called key_sad.savlut in addition to the savlists.
 *     - The table is used to search an sav without use of saidx.
 * - Modifications to the key_sad.sahlists, sah.savlist and key_sad.savlut
 *   must be done with holding key_sad.lock which is a adaptive mutex
 * - Read accesses to the key_sad.sahlists, sah.savlist and key_sad.savlut
 *   must be in pserialize(9) read sections
 * - sah's lifetime is managed by localcount(9)
 * - Getting an sah entry
 *   - We get an sah from the key_sad.sahlists
 *     - Must iterate the list and increment the reference count of a found sah
 *       (by key_sah_ref) in a pserialize read section
 *   - A gotten sah must be released after use by key_sah_unref
 * - An sah is destroyed when its state become DEAD and no sav is
 *   listed to the sah
 *   - The destruction is done only in the timer (see key_timehandler_sad)
 * - sav's lifetime is managed by localcount(9)
 * - Getting an sav entry
 *   - First get an sah by saidx and get an sav from either of sah's savlists
 *     - Must iterate the list and increment the reference count of a found sav
 *       (by key_sa_ref) in a pserialize read section
 *   - We can gain another reference from a held SA only if we check its state
 *     and take its reference in a pserialize read section
 *     (see esp_output for example)
 *   - A gotten sav must be released after use by key_sa_unref
 * - An sav is destroyed when its state become DEAD
 */
/*
 * Locking notes on misc data:
 * - All lists of key_misc are protected by key_misc.lock
 *   - key_misc.lock must be held even for read accesses
 */

/* SPD */
static struct {
        kmutex_t lock;
        kcondvar_t cv_lc;
        struct pslist_head splist[IPSEC_DIR_MAX];
        /*
         * The list has SPs that are set to a socket via
         * setsockopt(IP_IPSEC_POLICY) from userland. See ipsec_set_policy.
         */
        struct pslist_head socksplist;

        pserialize_t psz;
        kcondvar_t cv_psz;
        bool psz_performing;
} key_spd __cacheline_aligned;

/* SAD */
static struct {
        kmutex_t lock;
        kcondvar_t cv_lc;
        struct pslist_head *sahlists;
        u_long sahlistmask;
        struct pslist_head *savlut;
        u_long savlutmask;

        pserialize_t psz;
        kcondvar_t cv_psz;
        bool psz_performing;
} key_sad __cacheline_aligned;

/* Misc data */
static struct {
        kmutex_t lock;
        /* registed list */
        LIST_HEAD(_reglist, secreg) reglist[SADB_SATYPE_MAX + 1];
#ifndef IPSEC_NONBLOCK_ACQUIRE
        /* acquiring list */
        LIST_HEAD(_acqlist, secacq) acqlist;
#endif
#ifdef notyet
        /* SP acquiring list */
        LIST_HEAD(_spacqlist, secspacq) spacqlist;
#endif
} key_misc __cacheline_aligned;

/* Macros for key_spd.splist */
#define SPLIST_ENTRY_INIT(sp)                                                \
        PSLIST_ENTRY_INIT((sp), pslist_entry)
#define SPLIST_ENTRY_DESTROY(sp)                                        \
        PSLIST_ENTRY_DESTROY((sp), pslist_entry)
#define SPLIST_WRITER_REMOVE(sp)                                        \
        PSLIST_WRITER_REMOVE((sp), pslist_entry)
#define SPLIST_READER_EMPTY(dir)                                        \
        (PSLIST_READER_FIRST(&key_spd.splist[(dir)], struct secpolicy,        \
                             pslist_entry) == NULL)
#define SPLIST_READER_FOREACH(sp, dir)                                        \
        PSLIST_READER_FOREACH((sp), &key_spd.splist[(dir)],                \
                              struct secpolicy, pslist_entry)
#define SPLIST_WRITER_FOREACH(sp, dir)                                        \
        PSLIST_WRITER_FOREACH((sp), &key_spd.splist[(dir)],                \
                              struct secpolicy, pslist_entry)
#define SPLIST_WRITER_INSERT_AFTER(sp, new)                                \
        PSLIST_WRITER_INSERT_AFTER((sp), (new), pslist_entry)
#define SPLIST_WRITER_EMPTY(dir)                                        \
        (PSLIST_WRITER_FIRST(&key_spd.splist[(dir)], struct secpolicy,        \
                             pslist_entry) == NULL)
#define SPLIST_WRITER_INSERT_HEAD(dir, sp)                                \
        PSLIST_WRITER_INSERT_HEAD(&key_spd.splist[(dir)], (sp),                \
                                  pslist_entry)
#define SPLIST_WRITER_NEXT(sp)                                                \
        PSLIST_WRITER_NEXT((sp), struct secpolicy, pslist_entry)
#define SPLIST_WRITER_INSERT_TAIL(dir, new)                                \
        do {                                                                \
                if (SPLIST_WRITER_EMPTY((dir))) {                        \
                        SPLIST_WRITER_INSERT_HEAD((dir), (new));        \
                } else {                                                \
                        struct secpolicy *__sp;                                \
                        SPLIST_WRITER_FOREACH(__sp, (dir)) {                \
                                if (SPLIST_WRITER_NEXT(__sp) == NULL) {        \
                                        SPLIST_WRITER_INSERT_AFTER(__sp,\
                                            (new));                        \
                                        break;                                \
                                }                                        \
                        }                                                \
                }                                                        \
        } while (0)

/* Macros for key_spd.socksplist */
#define SOCKSPLIST_WRITER_FOREACH(sp)                                        \
        PSLIST_WRITER_FOREACH((sp), &key_spd.socksplist,                \
                              struct secpolicy,        pslist_entry)
#define SOCKSPLIST_READER_EMPTY()                                        \
        (PSLIST_READER_FIRST(&key_spd.socksplist, struct secpolicy,        \
                             pslist_entry) == NULL)

/* Macros for key_sad.sahlist */
#define SAHLIST_ENTRY_INIT(sah)                                                \
        PSLIST_ENTRY_INIT((sah), pslist_entry)
#define SAHLIST_ENTRY_DESTROY(sah)                                        \
        PSLIST_ENTRY_DESTROY((sah), pslist_entry)
#define SAHLIST_WRITER_REMOVE(sah)                                        \
        PSLIST_WRITER_REMOVE((sah), pslist_entry)
#define SAHLIST_READER_FOREACH(sah)                                        \
        for(int _i_sah = 0; _i_sah <= key_sad.sahlistmask; _i_sah++)        \
                PSLIST_READER_FOREACH((sah), &key_sad.sahlists[_i_sah],        \
                                      struct secashead, pslist_entry)
#define SAHLIST_READER_FOREACH_SAIDX(sah, saidx)                        \
        PSLIST_READER_FOREACH((sah),                                        \
            &key_sad.sahlists[key_saidxhash((saidx),                        \
                               key_sad.sahlistmask)],                        \
            struct secashead, pslist_entry)
#define SAHLIST_WRITER_FOREACH(sah)                                        \
        for(int _i_sah = 0; _i_sah <= key_sad.sahlistmask; _i_sah++)        \
                PSLIST_WRITER_FOREACH((sah), &key_sad.sahlists[_i_sah],        \
                                     struct secashead, pslist_entry)
#define SAHLIST_WRITER_INSERT_HEAD(sah)                                        \
        PSLIST_WRITER_INSERT_HEAD(                                        \
            &key_sad.sahlists[key_saidxhash(&(sah)->saidx,                \
                              key_sad.sahlistmask)],        \
            (sah), pslist_entry)

/* Macros for key_sad.sahlist#savlist */
#define SAVLIST_ENTRY_INIT(sav)                                                \
        PSLIST_ENTRY_INIT((sav), pslist_entry)
#define SAVLIST_ENTRY_DESTROY(sav)                                        \
        PSLIST_ENTRY_DESTROY((sav), pslist_entry)
#define SAVLIST_READER_FIRST(sah, state)                                \
        PSLIST_READER_FIRST(&(sah)->savlist[(state)], struct secasvar,        \
                            pslist_entry)
#define SAVLIST_WRITER_REMOVE(sav)                                        \
        PSLIST_WRITER_REMOVE((sav), pslist_entry)
#define SAVLIST_READER_FOREACH(sav, sah, state)                                \
        PSLIST_READER_FOREACH((sav), &(sah)->savlist[(state)],                \
                              struct secasvar, pslist_entry)
#define SAVLIST_WRITER_FOREACH(sav, sah, state)                                \
        PSLIST_WRITER_FOREACH((sav), &(sah)->savlist[(state)],                \
                              struct secasvar, pslist_entry)
#define SAVLIST_WRITER_INSERT_BEFORE(sav, new)                                \
        PSLIST_WRITER_INSERT_BEFORE((sav), (new), pslist_entry)
#define SAVLIST_WRITER_INSERT_AFTER(sav, new)                                \
        PSLIST_WRITER_INSERT_AFTER((sav), (new), pslist_entry)
#define SAVLIST_WRITER_EMPTY(sah, state)                                \
        (PSLIST_WRITER_FIRST(&(sah)->savlist[(state)], struct secasvar,        \
                             pslist_entry) == NULL)
#define SAVLIST_WRITER_INSERT_HEAD(sah, state, sav)                        \
        PSLIST_WRITER_INSERT_HEAD(&(sah)->savlist[(state)], (sav),        \
                                  pslist_entry)
#define SAVLIST_WRITER_NEXT(sav)                                        \
        PSLIST_WRITER_NEXT((sav), struct secasvar, pslist_entry)
#define SAVLIST_WRITER_INSERT_TAIL(sah, state, new)                        \
        do {                                                                \
                if (SAVLIST_WRITER_EMPTY((sah), (state))) {                \
                        SAVLIST_WRITER_INSERT_HEAD((sah), (state), (new));\
                } else {                                                \
                        struct secasvar *__sav;                                \
                        SAVLIST_WRITER_FOREACH(__sav, (sah), (state)) {        \
                                if (SAVLIST_WRITER_NEXT(__sav) == NULL) {\
                                        SAVLIST_WRITER_INSERT_AFTER(__sav,\
                                            (new));                        \
                                        break;                                \
                                }                                        \
                        }                                                \
                }                                                        \
        } while (0)
#define SAVLIST_READER_NEXT(sav)                                        \
        PSLIST_READER_NEXT((sav), struct secasvar, pslist_entry)

/* Macros for key_sad.savlut */
#define SAVLUT_ENTRY_INIT(sav)                                                \
        PSLIST_ENTRY_INIT((sav), pslist_entry_savlut)
#define SAVLUT_READER_FOREACH(sav, dst, proto, hash_key)                \
        PSLIST_READER_FOREACH((sav),                                        \
        &key_sad.savlut[key_savluthash(dst, proto, hash_key,                \
                          key_sad.savlutmask)],                                \
        struct secasvar, pslist_entry_savlut)
#define SAVLUT_WRITER_INSERT_HEAD(sav)                                        \
        key_savlut_writer_insert_head((sav))
#define SAVLUT_WRITER_REMOVE(sav)                                        \
        do {                                                                \
                if (!(sav)->savlut_added)                                \
                        break;                                                \
                PSLIST_WRITER_REMOVE((sav), pslist_entry_savlut);        \
                (sav)->savlut_added = false;                                \
        } while(0)

/* search order for SAs */
        /*
         * This order is important because we must select the oldest SA
         * for outbound processing.  For inbound, This is not important.
         */
static const u_int saorder_state_valid_prefer_old[] = {
        SADB_SASTATE_DYING, SADB_SASTATE_MATURE,
};
static const u_int saorder_state_valid_prefer_new[] = {
        SADB_SASTATE_MATURE, SADB_SASTATE_DYING,
};

static const u_int saorder_state_alive[] = {
        /* except DEAD */
        SADB_SASTATE_MATURE, SADB_SASTATE_DYING, SADB_SASTATE_LARVAL
};
static const u_int saorder_state_any[] = {
        SADB_SASTATE_MATURE, SADB_SASTATE_DYING,
        SADB_SASTATE_LARVAL, SADB_SASTATE_DEAD
};

#define SASTATE_ALIVE_FOREACH(s)                                \
        for (int _i = 0;                                        \
            _i < __arraycount(saorder_state_alive) ?                \
            (s) = saorder_state_alive[_i], true : false;        \
            _i++)
#define SASTATE_ANY_FOREACH(s)                                        \
        for (int _i = 0;                                        \
            _i < __arraycount(saorder_state_any) ?                \
            (s) = saorder_state_any[_i], true : false;                \
            _i++)
#define SASTATE_USABLE_FOREACH(s)                                \
        for (int _i = 0;                                        \
            _i < __arraycount(saorder_state_valid_prefer_new) ?        \
            (s) = saorder_state_valid_prefer_new[_i],                \
            true : false;                                        \
            _i++)

static const int minsize[] = {
        sizeof(struct sadb_msg),        /* SADB_EXT_RESERVED */
        sizeof(struct sadb_sa),                /* SADB_EXT_SA */
        sizeof(struct sadb_lifetime),        /* SADB_EXT_LIFETIME_CURRENT */
        sizeof(struct sadb_lifetime),        /* SADB_EXT_LIFETIME_HARD */
        sizeof(struct sadb_lifetime),        /* SADB_EXT_LIFETIME_SOFT */
        sizeof(struct sadb_address),        /* SADB_EXT_ADDRESS_SRC */
        sizeof(struct sadb_address),        /* SADB_EXT_ADDRESS_DST */
        sizeof(struct sadb_address),        /* SADB_EXT_ADDRESS_PROXY */
        sizeof(struct sadb_key),        /* SADB_EXT_KEY_AUTH */
        sizeof(struct sadb_key),        /* SADB_EXT_KEY_ENCRYPT */
        sizeof(struct sadb_ident),        /* SADB_EXT_IDENTITY_SRC */
        sizeof(struct sadb_ident),        /* SADB_EXT_IDENTITY_DST */
        sizeof(struct sadb_sens),        /* SADB_EXT_SENSITIVITY */
        sizeof(struct sadb_prop),        /* SADB_EXT_PROPOSAL */
        sizeof(struct sadb_supported),        /* SADB_EXT_SUPPORTED_AUTH */
        sizeof(struct sadb_supported),        /* SADB_EXT_SUPPORTED_ENCRYPT */
        sizeof(struct sadb_spirange),        /* SADB_EXT_SPIRANGE */
        0,                                /* SADB_X_EXT_KMPRIVATE */
        sizeof(struct sadb_x_policy),        /* SADB_X_EXT_POLICY */
        sizeof(struct sadb_x_sa2),        /* SADB_X_SA2 */
        sizeof(struct sadb_x_nat_t_type),        /* SADB_X_EXT_NAT_T_TYPE */
        sizeof(struct sadb_x_nat_t_port),        /* SADB_X_EXT_NAT_T_SPORT */
        sizeof(struct sadb_x_nat_t_port),        /* SADB_X_EXT_NAT_T_DPORT */
        sizeof(struct sadb_address),                /* SADB_X_EXT_NAT_T_OAI */
        sizeof(struct sadb_address),                /* SADB_X_EXT_NAT_T_OAR */
        sizeof(struct sadb_x_nat_t_frag),        /* SADB_X_EXT_NAT_T_FRAG */
};
static const int maxsize[] = {
        sizeof(struct sadb_msg),        /* SADB_EXT_RESERVED */
        sizeof(struct sadb_sa),                /* SADB_EXT_SA */
        sizeof(struct sadb_lifetime),        /* SADB_EXT_LIFETIME_CURRENT */
        sizeof(struct sadb_lifetime),        /* SADB_EXT_LIFETIME_HARD */
        sizeof(struct sadb_lifetime),        /* SADB_EXT_LIFETIME_SOFT */
        0,                                /* SADB_EXT_ADDRESS_SRC */
        0,                                /* SADB_EXT_ADDRESS_DST */
        0,                                /* SADB_EXT_ADDRESS_PROXY */
        0,                                /* SADB_EXT_KEY_AUTH */
        0,                                /* SADB_EXT_KEY_ENCRYPT */
        0,                                /* SADB_EXT_IDENTITY_SRC */
        0,                                /* SADB_EXT_IDENTITY_DST */
        0,                                /* SADB_EXT_SENSITIVITY */
        0,                                /* SADB_EXT_PROPOSAL */
        0,                                /* SADB_EXT_SUPPORTED_AUTH */
        0,                                /* SADB_EXT_SUPPORTED_ENCRYPT */
        sizeof(struct sadb_spirange),        /* SADB_EXT_SPIRANGE */
        0,                                /* SADB_X_EXT_KMPRIVATE */
        0,                                /* SADB_X_EXT_POLICY */
        sizeof(struct sadb_x_sa2),        /* SADB_X_SA2 */
        sizeof(struct sadb_x_nat_t_type),        /* SADB_X_EXT_NAT_T_TYPE */
        sizeof(struct sadb_x_nat_t_port),        /* SADB_X_EXT_NAT_T_SPORT */
        sizeof(struct sadb_x_nat_t_port),        /* SADB_X_EXT_NAT_T_DPORT */
        0,                                        /* SADB_X_EXT_NAT_T_OAI */
        0,                                        /* SADB_X_EXT_NAT_T_OAR */
        sizeof(struct sadb_x_nat_t_frag),        /* SADB_X_EXT_NAT_T_FRAG */
};

static int ipsec_esp_keymin = 256;
static int ipsec_esp_auth = 0;
static int ipsec_ah_keymin = 128;
static bool ipsec_allow_different_idtype = false;

#ifdef SYSCTL_DECL
SYSCTL_DECL(_net_key);
#endif

#ifdef SYSCTL_INT
SYSCTL_INT(_net_key, KEYCTL_DEBUG_LEVEL,        debug,        CTLFLAG_RW, \
        &key_debug_level,        0,        "");

/* max count of trial for the decision of spi value */
SYSCTL_INT(_net_key, KEYCTL_SPI_TRY,                spi_trycnt,        CTLFLAG_RW, \
        &key_spi_trycnt,        0,        "");

/* minimum spi value to allocate automatically. */
SYSCTL_INT(_net_key, KEYCTL_SPI_MIN_VALUE,        spi_minval,        CTLFLAG_RW, \
        &key_spi_minval,        0,        "");

/* maximun spi value to allocate automatically. */
SYSCTL_INT(_net_key, KEYCTL_SPI_MAX_VALUE,        spi_maxval,        CTLFLAG_RW, \
        &key_spi_maxval,        0,        "");

/* interval to initialize randseed */
SYSCTL_INT(_net_key, KEYCTL_RANDOM_INT,        int_random,        CTLFLAG_RW, \
        &key_int_random,        0,        "");

/* lifetime for larval SA */
SYSCTL_INT(_net_key, KEYCTL_LARVAL_LIFETIME,        larval_lifetime, CTLFLAG_RW, \
        &key_larval_lifetime,        0,        "");

/* counter for blocking to send SADB_ACQUIRE to IKEd */
SYSCTL_INT(_net_key, KEYCTL_BLOCKACQ_COUNT,        blockacq_count,        CTLFLAG_RW, \
        &key_blockacq_count,        0,        "");

/* lifetime for blocking to send SADB_ACQUIRE to IKEd */
SYSCTL_INT(_net_key, KEYCTL_BLOCKACQ_LIFETIME,        blockacq_lifetime, CTLFLAG_RW, \
        &key_blockacq_lifetime,        0,        "");

/* ESP auth */
SYSCTL_INT(_net_key, KEYCTL_ESP_AUTH,        esp_auth, CTLFLAG_RW, \
        &ipsec_esp_auth,        0,        "");

/* minimum ESP key length */
SYSCTL_INT(_net_key, KEYCTL_ESP_KEYMIN,        esp_keymin, CTLFLAG_RW, \
        &ipsec_esp_keymin,        0,        "");

/* minimum AH key length */
SYSCTL_INT(_net_key, KEYCTL_AH_KEYMIN,        ah_keymin, CTLFLAG_RW, \
        &ipsec_ah_keymin,        0,        "");

/* perfered old SA rather than new SA */
SYSCTL_INT(_net_key, KEYCTL_PREFERED_OLDSA,        prefered_oldsa, CTLFLAG_RW,\
        &key_prefered_oldsa,        0,        "");
#endif /* SYSCTL_INT */

#define __LIST_CHAINED(elm) \
        (!((elm)->chain.le_next == NULL && (elm)->chain.le_prev == NULL))
#define LIST_INSERT_TAIL(head, elm, type, field) \
do {\
        struct type *curelm = LIST_FIRST(head); \
        if (curelm == NULL) {\
                LIST_INSERT_HEAD(head, elm, field); \
        } else { \
                while (LIST_NEXT(curelm, field)) \
                        curelm = LIST_NEXT(curelm, field);\
                LIST_INSERT_AFTER(curelm, elm, field);\
        }\
} while (0)

#define KEY_CHKSASTATE(head, sav) \
/* do */ { \
        if ((head) != (sav)) {                                                \
                IPSECLOG(LOG_DEBUG,                                        \
                    "state mismatched (TREE=%d SA=%d)\n",                \
                    (head), (sav));                                        \
                continue;                                                \
        }                                                                \
} /* while (0) */

#define KEY_CHKSPDIR(head, sp) \
do { \
        if ((head) != (sp)) {                                                \
                IPSECLOG(LOG_DEBUG,                                        \
                    "direction mismatched (TREE=%d SP=%d), anyway continue.\n",\
                    (head), (sp));                                        \
        }                                                                \
} while (0)

/*
 * set parameters into secasindex buffer.
 * Must allocate secasindex buffer before calling this function.
 */
static int
key_setsecasidx(int, int, int, const struct sockaddr *,
    const struct sockaddr *, struct secasindex *);

/* key statistics */
struct _keystat {
        u_long getspi_count; /* the avarage of count to try to get new SPI */
} keystat;

static void
key_init_spidx_bymsghdr(struct secpolicyindex *, const struct sadb_msghdr *);

static const struct sockaddr *
key_msghdr_get_sockaddr(const struct sadb_msghdr *mhp, int idx)
{

        return PFKEY_ADDR_SADDR(mhp->ext[idx]);
}

static void
key_fill_replymsg(struct mbuf *m, int seq)
{
        struct sadb_msg *msg;

        KASSERT(m->m_len >= sizeof(*msg));

        msg = mtod(m, struct sadb_msg *);
        msg->sadb_msg_errno = 0;
        msg->sadb_msg_len = PFKEY_UNIT64(m->m_pkthdr.len);
        if (seq != 0)
                msg->sadb_msg_seq = seq;
}

#if 0
static void key_freeso(struct socket *);
static void key_freesp_so(struct secpolicy **);
#endif
static struct secpolicy *key_getsp (const struct secpolicyindex *);
static struct secpolicy *key_getspbyid (u_int32_t);
static struct secpolicy *key_lookup_and_remove_sp(const struct secpolicyindex *, bool);
static struct secpolicy *key_lookupbyid_and_remove_sp(u_int32_t, bool);
static void key_destroy_sp(struct secpolicy *);
static struct mbuf *key_gather_mbuf (struct mbuf *,
        const struct sadb_msghdr *, int, int, ...);
static int key_api_spdadd(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);
static u_int32_t key_getnewspid (void);
static int key_api_spddelete(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);
static int key_api_spddelete2(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);
static int key_api_spdget(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);
static int key_api_spdflush(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);
static int key_api_spddump(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);
static struct mbuf * key_setspddump (int *errorp, pid_t);
static struct mbuf * key_setspddump_chain (int *errorp, int *lenp, pid_t pid);
static int key_api_nat_map(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);
static struct mbuf *key_setdumpsp (struct secpolicy *,
        u_int8_t, u_int32_t, pid_t);
static u_int key_getspreqmsglen (const struct secpolicy *);
static int key_spdexpire (struct secpolicy *);
static struct secashead *key_newsah (const struct secasindex *);
static void key_unlink_sah(struct secashead *);
static void key_destroy_sah(struct secashead *);
static bool key_sah_has_sav(struct secashead *);
static void key_sah_ref(struct secashead *);
static void key_sah_unref(struct secashead *);
static void key_init_sav(struct secasvar *);
static void key_wait_sav(struct secasvar *);
static void key_destroy_sav(struct secasvar *);
static struct secasvar *key_newsav(struct mbuf *,
        const struct sadb_msghdr *, int *, int, const char*, int);
#define        KEY_NEWSAV(m, sadb, e, proto)                                \
        key_newsav(m, sadb, e, proto, __func__, __LINE__)
static void key_delsav (struct secasvar *);
static struct secashead *key_getsah(const struct secasindex *, int);
static struct secashead *key_getsah_ref(const struct secasindex *, int);
static bool key_checkspidup(const struct secasindex *, u_int32_t);
static struct secasvar *key_getsavbyspi (struct secashead *, u_int32_t);
static int key_setsaval (struct secasvar *, struct mbuf *,
        const struct sadb_msghdr *);
static void key_freesaval(struct secasvar *);
static int key_init_xform(struct secasvar *);
static void key_clear_xform(struct secasvar *);
static struct mbuf *key_setdumpsa (struct secasvar *, u_int8_t,
        u_int8_t, u_int32_t, u_int32_t);
static struct mbuf *key_setsadbxport (u_int16_t, u_int16_t);
static struct mbuf *key_setsadbxtype (u_int16_t);
static struct mbuf *key_setsadbxfrag (u_int16_t);
static void key_porttosaddr (union sockaddr_union *, u_int16_t);
static int key_checksalen (const union sockaddr_union *);
static struct mbuf *key_setsadbmsg (u_int8_t, u_int16_t, u_int8_t,
        u_int32_t, pid_t, u_int16_t, int);
static struct mbuf *key_setsadbsa (struct secasvar *);
static struct mbuf *key_setsadbaddr(u_int16_t,
        const struct sockaddr *, u_int8_t, u_int16_t, int);
#if 0
static struct mbuf *key_setsadbident (u_int16_t, u_int16_t, void *,
        int, u_int64_t);
#endif
static struct mbuf *key_setsadbxsa2 (u_int8_t, u_int32_t, u_int16_t);
static struct mbuf *key_setsadbxpolicy (u_int16_t, u_int8_t,
        u_int32_t, int);
static void *key_newbuf (const void *, u_int);
#ifdef INET6
static int key_ismyaddr6 (const struct sockaddr_in6 *);
#endif

static void sysctl_net_keyv2_setup(struct sysctllog **);
static void sysctl_net_key_compat_setup(struct sysctllog **);

/* flags for key_saidx_match() */
#define CMP_HEAD        1        /* protocol, addresses. */
#define CMP_MODE_REQID        2        /* additionally HEAD, reqid, mode. */
#define CMP_REQID        3        /* additionally HEAD, reaid. */
#define CMP_EXACTLY        4        /* all elements. */
static int key_saidx_match(const struct secasindex *,
    const struct secasindex *, int);

static int key_sockaddr_match(const struct sockaddr *,
    const struct sockaddr *, int);
static int key_bb_match_withmask(const void *, const void *, u_int);
static u_int16_t key_satype2proto (u_int8_t);
static u_int8_t key_proto2satype (u_int16_t);

static int key_spidx_match_exactly(const struct secpolicyindex *,
    const struct secpolicyindex *);
static int key_spidx_match_withmask(const struct secpolicyindex *,
    const struct secpolicyindex *);

static int key_api_getspi(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);
static u_int32_t key_do_getnewspi (const struct sadb_spirange *,
                                        const struct secasindex *);
static int key_handle_natt_info (struct secasvar *,
                                     const struct sadb_msghdr *);
static int key_set_natt_ports (union sockaddr_union *,
                                 union sockaddr_union *,
                                const struct sadb_msghdr *);
static int key_api_update(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);
#ifdef IPSEC_DOSEQCHECK
static struct secasvar *key_getsavbyseq (struct secashead *, u_int32_t);
#endif
static int key_api_add(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);
static int key_setident (struct secashead *, struct mbuf *,
        const struct sadb_msghdr *);
static struct mbuf *key_getmsgbuf_x1 (struct mbuf *,
        const struct sadb_msghdr *);
static int key_api_delete(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);
static int key_api_get(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);

static void key_getcomb_setlifetime (struct sadb_comb *);
static struct mbuf *key_getcomb_esp(int);
static struct mbuf *key_getcomb_ah(int);
static struct mbuf *key_getcomb_ipcomp(int);
static struct mbuf *key_getprop(const struct secasindex *, int);

static int key_acquire(const struct secasindex *, const struct secpolicy *,
            int);
static int key_acquire_sendup_mbuf_later(struct mbuf *);
static void key_acquire_sendup_pending_mbuf(void);
#ifndef IPSEC_NONBLOCK_ACQUIRE
static struct secacq *key_newacq (const struct secasindex *);
static struct secacq *key_getacq (const struct secasindex *);
static struct secacq *key_getacqbyseq (u_int32_t);
#endif
#ifdef notyet
static struct secspacq *key_newspacq (const struct secpolicyindex *);
static struct secspacq *key_getspacq (const struct secpolicyindex *);
#endif
static int key_api_acquire(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);
static int key_api_register(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);
static int key_expire (struct secasvar *);
static int key_api_flush(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);
static struct mbuf *key_setdump_chain (u_int8_t req_satype, int *errorp,
        int *lenp, pid_t pid);
static int key_api_dump(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);
static int key_api_promisc(struct socket *, struct mbuf *,
        const struct sadb_msghdr *);
static int key_senderror (struct socket *, struct mbuf *, int);
static int key_validate_ext (const struct sadb_ext *, int);
static int key_align (struct mbuf *, struct sadb_msghdr *);
#if 0
static const char *key_getfqdn (void);
static const char *key_getuserfqdn (void);
#endif
static void key_sa_chgstate (struct secasvar *, u_int8_t);

static struct mbuf *key_alloc_mbuf(int, int);
static struct mbuf *key_alloc_mbuf_simple(int, int);

static void key_timehandler(void *);
static void key_timehandler_work(struct work *, void *);
static struct callout        key_timehandler_ch;
static struct workqueue        *key_timehandler_wq;
static struct work        key_timehandler_wk;

static inline void
    key_savlut_writer_insert_head(struct secasvar *sav);
static inline uint32_t
    key_saidxhash(const struct secasindex *, u_long);
static inline uint32_t
    key_savluthash(const struct sockaddr *,
    uint32_t, uint32_t, u_long);

/*
 * Utilities for percpu counters for sadb_lifetime_allocations and
 * sadb_lifetime_bytes.
 */
#define LIFETIME_COUNTER_ALLOCATIONS        0
#define LIFETIME_COUNTER_BYTES                1
#define LIFETIME_COUNTER_SIZE                2

typedef uint64_t lifetime_counters_t[LIFETIME_COUNTER_SIZE];

static void
key_sum_lifetime_counters(void *p, void *arg, struct cpu_info *ci __unused)
{
        lifetime_counters_t *one = p;
        lifetime_counters_t *sum = arg;

        (*sum)[LIFETIME_COUNTER_ALLOCATIONS] += (*one)[LIFETIME_COUNTER_ALLOCATIONS];
        (*sum)[LIFETIME_COUNTER_BYTES] += (*one)[LIFETIME_COUNTER_BYTES];
}

u_int
key_sp_refcnt(const struct secpolicy *sp)
{

        /* FIXME */
        return 0;
}

static void
key_spd_pserialize_perform(void)
{

        KASSERT(mutex_owned(&key_spd.lock));

        while (key_spd.psz_performing)
                cv_wait(&key_spd.cv_psz, &key_spd.lock);
        key_spd.psz_performing = true;
        mutex_exit(&key_spd.lock);

        pserialize_perform(key_spd.psz);

        mutex_enter(&key_spd.lock);
        key_spd.psz_performing = false;
        cv_broadcast(&key_spd.cv_psz);
}

/*
 * Remove the sp from the key_spd.splist and wait for references to the sp
 * to be released. key_spd.lock must be held.
 */
static void
key_unlink_sp(struct secpolicy *sp)
{

        KASSERT(mutex_owned(&key_spd.lock));

        sp->state = IPSEC_SPSTATE_DEAD;
        SPLIST_WRITER_REMOVE(sp);

        /* Invalidate all cached SPD pointers in the PCBs. */
        ipsec_invalpcbcacheall();

        KDASSERT(mutex_ownable(softnet_lock));
        key_spd_pserialize_perform();

        localcount_drain(&sp->localcount, &key_spd.cv_lc, &key_spd.lock);
}

/*
 * Return 0 when there are known to be no SP's for the specified
 * direction.  Otherwise return 1.  This is used by IPsec code
 * to optimize performance.
 */
int
key_havesp(u_int dir)
{
        return (dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND ?
                !SPLIST_READER_EMPTY(dir) : 1);
}

/* %%% IPsec policy management */
/*
 * allocating a SP for OUTBOUND or INBOUND packet.
 * Must call key_freesp() later.
 * OUT:        NULL:        not found
 *        others:        found and return the pointer.
 */
struct secpolicy *
key_lookup_sp_byspidx(const struct secpolicyindex *spidx,
    u_int dir, const char* where, int tag)
{
        struct secpolicy *sp;
        int s;

        KASSERT(spidx != NULL);
        KASSERTMSG(IPSEC_DIR_IS_INOROUT(dir), "invalid direction %u", dir);

        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP, "DP from %s:%u\n", where, tag);

        /* get a SP entry */
        if (KEYDEBUG_ON(KEYDEBUG_IPSEC_DATA)) {
                kdebug_secpolicyindex("objects", spidx);
        }

        s = pserialize_read_enter();
        SPLIST_READER_FOREACH(sp, dir) {
                if (KEYDEBUG_ON(KEYDEBUG_IPSEC_DATA)) {
                        kdebug_secpolicyindex("in SPD", &sp->spidx);
                }

                if (sp->state == IPSEC_SPSTATE_DEAD)
                        continue;
                if (key_spidx_match_withmask(&sp->spidx, spidx))
                        goto found;
        }
        sp = NULL;
found:
        if (sp) {
                /* sanity check */
                KEY_CHKSPDIR(sp->spidx.dir, dir);

                /* found a SPD entry */
                sp->lastused = time_uptime;
                key_sp_ref(sp, where, tag);
        }
        pserialize_read_exit(s);

        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
            "DP return SP:%p (ID=%u) refcnt %u\n",
            sp, sp ? sp->id : 0, key_sp_refcnt(sp));
        return sp;
}

/*
 * return a policy that matches this particular inbound packet.
 * XXX slow
 */
struct secpolicy *
key_gettunnel(const struct sockaddr *osrc,
              const struct sockaddr *odst,
              const struct sockaddr *isrc,
              const struct sockaddr *idst,
              const char* where, int tag)
{
        struct secpolicy *sp;
        const int dir = IPSEC_DIR_INBOUND;
        int s;
        struct ipsecrequest *r1, *r2, *p;
        struct secpolicyindex spidx;

        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP, "DP from %s:%u\n", where, tag);

        if (isrc->sa_family != idst->sa_family) {
                IPSECLOG(LOG_ERR,
                    "address family mismatched src %u, dst %u.\n",
                    isrc->sa_family, idst->sa_family);
                sp = NULL;
                goto done;
        }

        s = pserialize_read_enter();
        SPLIST_READER_FOREACH(sp, dir) {
                if (sp->state == IPSEC_SPSTATE_DEAD)
                        continue;

                r1 = r2 = NULL;
                for (p = sp->req; p; p = p->next) {
                        if (p->saidx.mode != IPSEC_MODE_TUNNEL)
                                continue;

                        r1 = r2;
                        r2 = p;

                        if (!r1) {
                                /* here we look at address matches only */
                                spidx = sp->spidx;
                                if (isrc->sa_len > sizeof(spidx.src) ||
                                    idst->sa_len > sizeof(spidx.dst))
                                        continue;
                                memcpy(&spidx.src, isrc, isrc->sa_len);
                                memcpy(&spidx.dst, idst, idst->sa_len);
                                if (!key_spidx_match_withmask(&sp->spidx, &spidx))
                                        continue;
                        } else {
                                if (!key_sockaddr_match(&r1->saidx.src.sa, isrc, PORT_NONE) ||
                                    !key_sockaddr_match(&r1->saidx.dst.sa, idst, PORT_NONE))
                                        continue;
                        }

                        if (!key_sockaddr_match(&r2->saidx.src.sa, osrc, PORT_NONE) ||
                            !key_sockaddr_match(&r2->saidx.dst.sa, odst, PORT_NONE))
                                continue;

                        goto found;
                }
        }
        sp = NULL;
found:
        if (sp) {
                sp->lastused = time_uptime;
                key_sp_ref(sp, where, tag);
        }
        pserialize_read_exit(s);
done:
        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
            "DP return SP:%p (ID=%u) refcnt %u\n",
            sp, sp ? sp->id : 0, key_sp_refcnt(sp));
        return sp;
}

/*
 * allocating an SA entry for an *OUTBOUND* packet.
 * checking each request entries in SP, and acquire an SA if need.
 * OUT:        0: there are valid requests.
 *        ENOENT: policy may be valid, but SA with REQUIRE is on acquiring.
 */
int
key_checkrequest(const struct ipsecrequest *isr, const struct secasindex *saidx,
    struct secasvar **ret)
{
        u_int level;
        int error;
        struct secasvar *sav;

        KASSERT(isr != NULL);
        KASSERTMSG(saidx->mode == IPSEC_MODE_TRANSPORT ||
            saidx->mode == IPSEC_MODE_TUNNEL,
            "unexpected policy %u", saidx->mode);

        /* get current level */
        level = ipsec_get_reqlevel(isr);

        /*
         * XXX guard against protocol callbacks from the crypto
         * thread as they reference ipsecrequest.sav which we
         * temporarily null out below.  Need to rethink how we
         * handle bundled SA's in the callback thread.
         */

        sav = key_lookup_sa_bysaidx(saidx);
        if (sav != NULL) {
                *ret = sav;
                return 0;
        }

        /* there is no SA */
        error = key_acquire(saidx, isr->sp, M_NOWAIT);
        if (error != 0) {
                /* XXX What should I do ? */
                IPSECLOG(LOG_DEBUG, "error %d returned from key_acquire.\n",
                    error);
                return error;
        }

        if (level != IPSEC_LEVEL_REQUIRE) {
                /* XXX sigh, the interface to this routine is botched */
                *ret = NULL;
                return 0;
        } else {
                return ENOENT;
        }
}

/*
 * looking up a SA for policy entry from SAD.
 * NOTE: searching SAD of aliving state.
 * OUT:        NULL:        not found.
 *        others:        found and return the pointer.
 */
struct secasvar *
key_lookup_sa_bysaidx(const struct secasindex *saidx)
{
        struct secashead *sah;
        struct secasvar *sav = NULL;
        u_int stateidx, state;
        const u_int *saorder_state_valid;
        int arraysize;
        int s;

        s = pserialize_read_enter();
        sah = key_getsah(saidx, CMP_MODE_REQID);
        if (sah == NULL)
                goto out;

        /*
         * search a valid state list for outbound packet.
         * This search order is important.
         */
        if (key_prefered_oldsa) {
                saorder_state_valid = saorder_state_valid_prefer_old;
                arraysize = _ARRAYLEN(saorder_state_valid_prefer_old);
        } else {
                saorder_state_valid = saorder_state_valid_prefer_new;
                arraysize = _ARRAYLEN(saorder_state_valid_prefer_new);
        }

        /* search valid state */
        for (stateidx = 0;
             stateidx < arraysize;
             stateidx++) {

                state = saorder_state_valid[stateidx];

                if (key_prefered_oldsa)
                        sav = SAVLIST_READER_FIRST(sah, state);
                else {
                        /* XXX need O(1) lookup */
                        struct secasvar *last = NULL;

                        SAVLIST_READER_FOREACH(sav, sah, state)
                                last = sav;
                        sav = last;
                }
                if (sav != NULL) {
                        KEY_SA_REF(sav);
                        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
                            "DP cause refcnt++:%d SA:%p\n",
                            key_sa_refcnt(sav), sav);
                        break;
                }
        }
out:
        pserialize_read_exit(s);

        return sav;
}

#if 0
static void
key_sendup_message_delete(struct secasvar *sav)
{
        struct mbuf *m, *result = 0;
        uint8_t satype;

        satype = key_proto2satype(sav->sah->saidx.proto);
        if (satype == 0)
                goto msgfail;

        m = key_setsadbmsg(SADB_DELETE, 0, satype, 0, 0, key_sa_refcnt(sav) - 1);
        if (m == NULL)
                goto msgfail;
        result = m;

        /* set sadb_address for saidx's. */
        m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &sav->sah->saidx.src.sa,
            _BITS(sav->sah->saidx.src.sa.sa_len), IPSEC_ULPROTO_ANY);
        if (m == NULL)
                goto msgfail;
        m_cat(result, m);

        /* set sadb_address for saidx's. */
        m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &sav->sah->saidx.src.sa,
            _BITS(sav->sah->saidx.src.sa.sa_len), IPSEC_ULPROTO_ANY);
        if (m == NULL)
                goto msgfail;
        m_cat(result, m);

        /* create SA extension */
        m = key_setsadbsa(sav);
        if (m == NULL)
                goto msgfail;
        m_cat(result, m);

        if (result->m_len < sizeof(struct sadb_msg)) {
                result = m_pullup(result, sizeof(struct sadb_msg));
                if (result == NULL)
                        goto msgfail;
        }

        result->m_pkthdr.len = 0;
        for (m = result; m; m = m->m_next)
                result->m_pkthdr.len += m->m_len;
        mtod(result, struct sadb_msg *)->sadb_msg_len =
            PFKEY_UNIT64(result->m_pkthdr.len);

        key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED);
        result = NULL;
msgfail:
        if (result)
                m_freem(result);
}
#endif

/*
 * allocating a usable SA entry for a *INBOUND* packet.
 * Must call key_freesav() later.
 * OUT: positive:        pointer to a usable sav (i.e. MATURE or DYING state).
 *        NULL:                not found, or error occurred.
 *
 * In the comparison, no source address is used--for RFC2401 conformance.
 * To quote, from section 4.1:
 *        A security association is uniquely identified by a triple consisting
 *        of a Security Parameter Index (SPI), an IP Destination Address, and a
 *        security protocol (AH or ESP) identifier.
 * Note that, however, we do need to keep source address in IPsec SA.
 * IKE specification and PF_KEY specification do assume that we
 * keep source address in IPsec SA.  We see a tricky situation here.
 *
 * sport and dport are used for NAT-T. network order is always used.
 */
struct secasvar *
key_lookup_sa(
        const union sockaddr_union *dst,
        u_int proto,
        u_int32_t spi,
        u_int16_t sport,
        u_int16_t dport,
        const char* where, int tag)
{
        struct secasvar *sav;
        int chkport;
        int s;

        int must_check_spi = 1;
        int must_check_alg = 0;
        u_int16_t cpi = 0;
        u_int8_t algo = 0;
        uint32_t hash_key = spi;

        if ((sport != 0) && (dport != 0))
                chkport = PORT_STRICT;
        else
                chkport = PORT_NONE;

        KASSERT(dst != NULL);

        /*
         * XXX IPCOMP case
         * We use cpi to define spi here. In the case where cpi <=
         * IPCOMP_CPI_NEGOTIATE_MIN, cpi just define the algorithm used, not
         * the real spi. In this case, don't check the spi but check the
         * algorithm
         */

        if (proto == IPPROTO_IPCOMP) {
                u_int32_t tmp;
                tmp = ntohl(spi);
                cpi = (u_int16_t) tmp;
                if (cpi < IPCOMP_CPI_NEGOTIATE_MIN) {
                        algo = (u_int8_t) cpi;
                        hash_key = algo;
                        must_check_spi = 0;
                        must_check_alg = 1;
                }
        }
        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
            "DP from %s:%u check_spi=%d(%#x), check_alg=%d(%d), proto=%d\n",
            where, tag,
            must_check_spi, ntohl(spi),
            must_check_alg, algo,
            proto);


        /*
         * searching SAD.
         * XXX: to be checked internal IP header somewhere.  Also when
         * IPsec tunnel packet is received.  But ESP tunnel mode is
         * encrypted so we can't check internal IP header.
         */
        s = pserialize_read_enter();
        SAVLUT_READER_FOREACH(sav, &dst->sa, proto, hash_key) {
                KEYDEBUG_PRINTF(KEYDEBUG_MATCH,
                    "try match spi %#x, %#x\n",
                    ntohl(spi), ntohl(sav->spi));

                /* do not return entries w/ unusable state */
                if (!SADB_SASTATE_USABLE_P(sav)) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH,
                            "bad state %d\n", sav->state);
                        continue;
                }
                if (proto != sav->sah->saidx.proto) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH,
                            "proto fail %d != %d\n",
                            proto, sav->sah->saidx.proto);
                        continue;
                }
                if (must_check_spi && spi != sav->spi) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH,
                            "spi fail %#x != %#x\n",
                            ntohl(spi), ntohl(sav->spi));
                        continue;
                }
                /* XXX only on the ipcomp case */
                if (must_check_alg && algo != sav->alg_comp) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH,
                            "algo fail %d != %d\n",
                            algo, sav->alg_comp);
                        continue;
                }

#if 0        /* don't check src */
        /* Fix port in src->sa */

                /* check src address */
                if (!key_sockaddr_match(&src->sa, &sav->sah->saidx.src.sa, PORT_NONE))
                        continue;
#endif
                /* fix port of dst address XXX*/
                key_porttosaddr(__UNCONST(dst), dport);
                /* check dst address */
                if (!key_sockaddr_match(&dst->sa, &sav->sah->saidx.dst.sa, chkport))
                        continue;
                key_sa_ref(sav, where, tag);
                goto done;
        }
        sav = NULL;
done:
        pserialize_read_exit(s);

        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
            "DP return SA:%p; refcnt %u\n", sav, key_sa_refcnt(sav));
        return sav;
}

static void
key_validate_savlist(const struct secashead *sah, const u_int state)
{
#ifdef DEBUG
        struct secasvar *sav, *next;
        int s;

        /*
         * The list should be sorted by lft_c->sadb_lifetime_addtime
         * in ascending order.
         */
        s = pserialize_read_enter();
        SAVLIST_READER_FOREACH(sav, sah, state) {
                next = SAVLIST_READER_NEXT(sav);
                if (next != NULL &&
                    sav->lft_c != NULL && next->lft_c != NULL) {
                        KDASSERTMSG(sav->lft_c->sadb_lifetime_addtime <=
                            next->lft_c->sadb_lifetime_addtime,
                            "savlist is not sorted: sah=%p, state=%d, "
                            "sav=%" PRIu64 ", next=%" PRIu64, sah, state,
                            sav->lft_c->sadb_lifetime_addtime,
                            next->lft_c->sadb_lifetime_addtime);
                }
        }
        pserialize_read_exit(s);
#endif
}

void
key_init_sp(struct secpolicy *sp)
{

        ASSERT_SLEEPABLE();

        sp->state = IPSEC_SPSTATE_ALIVE;
        if (sp->policy == IPSEC_POLICY_IPSEC)
                KASSERT(sp->req != NULL);
        localcount_init(&sp->localcount);
        SPLIST_ENTRY_INIT(sp);
}

/*
 * Must be called in a pserialize read section. A held SP
 * must be released by key_sp_unref after use.
 */
void
key_sp_ref(struct secpolicy *sp, const char* where, int tag)
{

        localcount_acquire(&sp->localcount);

        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
            "DP SP:%p (ID=%u) from %s:%u; refcnt++ now %u\n",
            sp, sp->id, where, tag, key_sp_refcnt(sp));
}

/*
 * Must be called without holding key_spd.lock because the lock
 * would be held in localcount_release.
 */
void
key_sp_unref(struct secpolicy *sp, const char* where, int tag)
{

        KDASSERT(mutex_ownable(&key_spd.lock));

        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
            "DP SP:%p (ID=%u) from %s:%u; refcnt-- now %u\n",
            sp, sp->id, where, tag, key_sp_refcnt(sp));

        localcount_release(&sp->localcount, &key_spd.cv_lc, &key_spd.lock);
}

static void
key_init_sav(struct secasvar *sav)
{

        ASSERT_SLEEPABLE();

        localcount_init(&sav->localcount);
        SAVLIST_ENTRY_INIT(sav);
        SAVLUT_ENTRY_INIT(sav);
}

u_int
key_sa_refcnt(const struct secasvar *sav)
{

        /* FIXME */
        return 0;
}

void
key_sa_ref(struct secasvar *sav, const char* where, int tag)
{

        localcount_acquire(&sav->localcount);

        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
            "DP cause refcnt++: SA:%p from %s:%u\n",
            sav, where, tag);
}

void
key_sa_unref(struct secasvar *sav, const char* where, int tag)
{

        KDASSERT(mutex_ownable(&key_sad.lock));

        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
            "DP cause refcnt--: SA:%p from %s:%u\n",
            sav, where, tag);

        localcount_release(&sav->localcount, &key_sad.cv_lc, &key_sad.lock);
}

#if 0
/*
 * Must be called after calling key_lookup_sp*().
 * For the packet with socket.
 */
static void
key_freeso(struct socket *so)
{
        /* sanity check */
        KASSERT(so != NULL);

        switch (so->so_proto->pr_domain->dom_family) {
#ifdef INET
        case PF_INET:
            {
                struct inpcb *pcb = sotoinpcb(so);

                /* Does it have a PCB ? */
                if (pcb == NULL)
                        return;

                struct inpcbpolicy *sp = pcb->inp_sp;
                key_freesp_so(&sp->sp_in);
                key_freesp_so(&sp->sp_out);
            }
                break;
#endif
#ifdef INET6
        case PF_INET6:
            {
#ifdef HAVE_NRL_INPCB
                struct inpcb *pcb  = sotoinpcb(so);
                struct inpcbpolicy *sp = pcb->inp_sp;

                /* Does it have a PCB ? */
                if (pcb == NULL)
                        return;
                key_freesp_so(&sp->sp_in);
                key_freesp_so(&sp->sp_out);
#else
                struct in6pcb *pcb  = sotoin6pcb(so);

                /* Does it have a PCB ? */
                if (pcb == NULL)
                        return;
                key_freesp_so(&pcb->in6p_sp->sp_in);
                key_freesp_so(&pcb->in6p_sp->sp_out);
#endif
            }
                break;
#endif /* INET6 */
        default:
                IPSECLOG(LOG_DEBUG, "unknown address family=%d.\n",
                    so->so_proto->pr_domain->dom_family);
                return;
        }
}

static void
key_freesp_so(struct secpolicy **sp)
{

        KASSERT(sp != NULL);
        KASSERT(*sp != NULL);

        if ((*sp)->policy == IPSEC_POLICY_ENTRUST ||
            (*sp)->policy == IPSEC_POLICY_BYPASS)
                return;

        KASSERTMSG((*sp)->policy == IPSEC_POLICY_IPSEC,
            "invalid policy %u", (*sp)->policy);
        KEY_SP_UNREF(&sp);
}
#endif

static void
key_sad_pserialize_perform(void)
{

        KASSERT(mutex_owned(&key_sad.lock));

        while (key_sad.psz_performing)
                cv_wait(&key_sad.cv_psz, &key_sad.lock);
        key_sad.psz_performing = true;
        mutex_exit(&key_sad.lock);

        pserialize_perform(key_sad.psz);

        mutex_enter(&key_sad.lock);
        key_sad.psz_performing = false;
        cv_broadcast(&key_sad.cv_psz);
}

/*
 * Remove the sav from the savlist of its sah and wait for references to the sav
 * to be released. key_sad.lock must be held.
 */
static void
key_unlink_sav(struct secasvar *sav)
{

        KASSERT(mutex_owned(&key_sad.lock));

        SAVLIST_WRITER_REMOVE(sav);
        SAVLUT_WRITER_REMOVE(sav);

        KDASSERT(mutex_ownable(softnet_lock));
        key_sad_pserialize_perform();

        localcount_drain(&sav->localcount, &key_sad.cv_lc, &key_sad.lock);
}

/*
 * Destroy an sav where the sav must be unlinked from an sah
 * by say key_unlink_sav.
 */
static void
key_destroy_sav(struct secasvar *sav)
{

        ASSERT_SLEEPABLE();

        localcount_fini(&sav->localcount);
        SAVLIST_ENTRY_DESTROY(sav);

        key_delsav(sav);
}

/*
 * Wait for references of a passed sav to go away.
 */
static void
key_wait_sav(struct secasvar *sav)
{

        ASSERT_SLEEPABLE();

        mutex_enter(&key_sad.lock);
        KASSERT(sav->state == SADB_SASTATE_DEAD);
        KDASSERT(mutex_ownable(softnet_lock));
        key_sad_pserialize_perform();
        localcount_drain(&sav->localcount, &key_sad.cv_lc, &key_sad.lock);
        mutex_exit(&key_sad.lock);
}

/* %%% SPD management */
/*
 * free security policy entry.
 */
static void
key_destroy_sp(struct secpolicy *sp)
{

        SPLIST_ENTRY_DESTROY(sp);
        localcount_fini(&sp->localcount);

        key_free_sp(sp);

        key_update_used();
}

void
key_free_sp(struct secpolicy *sp)
{
        struct ipsecrequest *isr = sp->req, *nextisr;

        while (isr != NULL) {
                nextisr = isr->next;
                kmem_free(isr, sizeof(*isr));
                isr = nextisr;
        }

        kmem_free(sp, sizeof(*sp));
}

void
key_socksplist_add(struct secpolicy *sp)
{

        mutex_enter(&key_spd.lock);
        PSLIST_WRITER_INSERT_HEAD(&key_spd.socksplist, sp, pslist_entry);
        mutex_exit(&key_spd.lock);

        key_update_used();
}

/*
 * search SPD
 * OUT:        NULL        : not found
 *        others        : found, pointer to a SP.
 */
static struct secpolicy *
key_getsp(const struct secpolicyindex *spidx)
{
        struct secpolicy *sp;
        int s;

        KASSERT(spidx != NULL);

        s = pserialize_read_enter();
        SPLIST_READER_FOREACH(sp, spidx->dir) {
                if (sp->state == IPSEC_SPSTATE_DEAD)
                        continue;
                if (key_spidx_match_exactly(spidx, &sp->spidx)) {
                        KEY_SP_REF(sp);
                        pserialize_read_exit(s);
                        return sp;
                }
        }
        pserialize_read_exit(s);

        return NULL;
}

/*
 * search SPD and remove found SP
 * OUT:        NULL        : not found
 *        others        : found, pointer to a SP.
 */
static struct secpolicy *
key_lookup_and_remove_sp(const struct secpolicyindex *spidx, bool from_kernel)
{
        struct secpolicy *sp = NULL;

        mutex_enter(&key_spd.lock);
        SPLIST_WRITER_FOREACH(sp, spidx->dir) {
                KASSERTMSG(sp->state != IPSEC_SPSTATE_DEAD, "sp->state=%u",
                    sp->state);
                /*
                 * SPs created in kernel(e.g. ipsec(4) I/F) must not be
                 * removed by userland programs.
                 */
                if (!from_kernel && sp->origin == IPSEC_SPORIGIN_KERNEL)
                        continue;
                if (key_spidx_match_exactly(spidx, &sp->spidx)) {
                        key_unlink_sp(sp);
                        goto out;
                }
        }
        sp = NULL;
out:
        mutex_exit(&key_spd.lock);

        return sp;
}

/*
 * get SP by index.
 * OUT:        NULL        : not found
 *        others        : found, pointer to a SP.
 */
static struct secpolicy *
key_getspbyid(u_int32_t id)
{
        struct secpolicy *sp;
        int s;

        s = pserialize_read_enter();
        SPLIST_READER_FOREACH(sp, IPSEC_DIR_INBOUND) {
                if (sp->state == IPSEC_SPSTATE_DEAD)
                        continue;
                if (sp->id == id) {
                        KEY_SP_REF(sp);
                        goto out;
                }
        }

        SPLIST_READER_FOREACH(sp, IPSEC_DIR_OUTBOUND) {
                if (sp->state == IPSEC_SPSTATE_DEAD)
                        continue;
                if (sp->id == id) {
                        KEY_SP_REF(sp);
                        goto out;
                }
        }
out:
        pserialize_read_exit(s);
        return sp;
}

/*
 * get SP by index, remove and return it.
 * OUT:        NULL        : not found
 *        others        : found, pointer to a SP.
 */
static struct secpolicy *
key_lookupbyid_and_remove_sp(u_int32_t id, bool from_kernel)
{
        struct secpolicy *sp;

        mutex_enter(&key_spd.lock);
        SPLIST_READER_FOREACH(sp, IPSEC_DIR_INBOUND) {
                KASSERTMSG(sp->state != IPSEC_SPSTATE_DEAD, "sp->state=%u",
                    sp->state);
                /*
                 * SPs created in kernel(e.g. ipsec(4) I/F) must not be
                 * removed by userland programs.
                 */
                if (!from_kernel && sp->origin == IPSEC_SPORIGIN_KERNEL)
                        continue;
                if (sp->id == id)
                        goto out;
        }

        SPLIST_READER_FOREACH(sp, IPSEC_DIR_OUTBOUND) {
                KASSERTMSG(sp->state != IPSEC_SPSTATE_DEAD, "sp->state=%u",
                    sp->state);
                /*
                 * SPs created in kernel(e.g. ipsec(4) I/F) must not be
                 * removed by userland programs.
                 */
                if (!from_kernel && sp->origin == IPSEC_SPORIGIN_KERNEL)
                        continue;
                if (sp->id == id)
                        goto out;
        }
out:
        if (sp != NULL)
                key_unlink_sp(sp);
        mutex_exit(&key_spd.lock);
        return sp;
}

struct secpolicy *
key_newsp(const char* where, int tag)
{
        struct secpolicy *newsp = NULL;

        newsp = kmem_zalloc(sizeof(struct secpolicy), KM_SLEEP);

        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
            "DP from %s:%u return SP:%p\n", where, tag, newsp);
        return newsp;
}

/*
 * create secpolicy structure from sadb_x_policy structure.
 * NOTE: `state', `secpolicyindex' in secpolicy structure are not set,
 * so must be set properly later.
 */
static struct secpolicy *
_key_msg2sp(const struct sadb_x_policy *xpl0, size_t len, int *error,
    bool from_kernel)
{
        struct secpolicy *newsp;

        KASSERT(!cpu_softintr_p());
        KASSERT(xpl0 != NULL);
        KASSERT(len >= sizeof(*xpl0));

        if (len != PFKEY_EXTLEN(xpl0)) {
                IPSECLOG(LOG_DEBUG, "Invalid msg length.\n");
                *error = EINVAL;
                return NULL;
        }

        newsp = KEY_NEWSP();
        if (newsp == NULL) {
                *error = ENOBUFS;
                return NULL;
        }

        newsp->spidx.dir = xpl0->sadb_x_policy_dir;
        newsp->policy = xpl0->sadb_x_policy_type;

        /* check policy */
        switch (xpl0->sadb_x_policy_type) {
        case IPSEC_POLICY_DISCARD:
        case IPSEC_POLICY_NONE:
        case IPSEC_POLICY_ENTRUST:
        case IPSEC_POLICY_BYPASS:
                newsp->req = NULL;
                *error = 0;
                return newsp;

        case IPSEC_POLICY_IPSEC:
                /* Continued */
                break;
        default:
                IPSECLOG(LOG_DEBUG, "invalid policy type.\n");
                key_free_sp(newsp);
                *error = EINVAL;
                return NULL;
        }

        /* IPSEC_POLICY_IPSEC */
    {
        int tlen;
        const struct sadb_x_ipsecrequest *xisr;
        uint16_t xisr_reqid;
        struct ipsecrequest **p_isr = &newsp->req;

        /* validity check */
        if (PFKEY_EXTLEN(xpl0) < sizeof(*xpl0)) {
                IPSECLOG(LOG_DEBUG, "Invalid msg length.\n");
                *error = EINVAL;
                goto free_exit;
        }

        tlen = PFKEY_EXTLEN(xpl0) - sizeof(*xpl0);
        xisr = (const struct sadb_x_ipsecrequest *)(xpl0 + 1);

        while (tlen > 0) {
                /* length check */
                if (xisr->sadb_x_ipsecrequest_len < sizeof(*xisr)) {
                        IPSECLOG(LOG_DEBUG, "invalid ipsecrequest length.\n");
                        *error = EINVAL;
                        goto free_exit;
                }

                /* allocate request buffer */
                *p_isr = kmem_zalloc(sizeof(**p_isr), KM_SLEEP);

                /* set values */
                (*p_isr)->next = NULL;

                switch (xisr->sadb_x_ipsecrequest_proto) {
                case IPPROTO_ESP:
                case IPPROTO_AH:
                case IPPROTO_IPCOMP:
                        break;
                default:
                        IPSECLOG(LOG_DEBUG, "invalid proto type=%u\n",
                            xisr->sadb_x_ipsecrequest_proto);
                        *error = EPROTONOSUPPORT;
                        goto free_exit;
                }
                (*p_isr)->saidx.proto = xisr->sadb_x_ipsecrequest_proto;

                switch (xisr->sadb_x_ipsecrequest_mode) {
                case IPSEC_MODE_TRANSPORT:
                case IPSEC_MODE_TUNNEL:
                        break;
                case IPSEC_MODE_ANY:
                default:
                        IPSECLOG(LOG_DEBUG, "invalid mode=%u\n",
                            xisr->sadb_x_ipsecrequest_mode);
                        *error = EINVAL;
                        goto free_exit;
                }
                (*p_isr)->saidx.mode = xisr->sadb_x_ipsecrequest_mode;

                switch (xisr->sadb_x_ipsecrequest_level) {
                case IPSEC_LEVEL_DEFAULT:
                case IPSEC_LEVEL_USE:
                case IPSEC_LEVEL_REQUIRE:
                        break;
                case IPSEC_LEVEL_UNIQUE:
                        xisr_reqid = xisr->sadb_x_ipsecrequest_reqid;
                        /* validity check */
                        /*
                         * case 1) from_kernel == false
                         * That means the request comes from userland.
                         * If range violation of reqid, kernel will
                         * update it, don't refuse it.
                         *
                         * case 2) from_kernel == true
                         * That means the request comes from kernel
                         * (e.g. ipsec(4) I/F).
                         * Use thre requested reqid to avoid inconsistency
                         * between kernel's reqid and the reqid in pf_key
                         * message sent to userland. The pf_key message is
                         * built by diverting request mbuf.
                         */
                        if (!from_kernel &&
                            xisr_reqid > IPSEC_MANUAL_REQID_MAX) {
                                IPSECLOG(LOG_DEBUG,
                                    "reqid=%d range "
                                    "violation, updated by kernel.\n",
                                    xisr_reqid);
                                xisr_reqid = 0;
                        }

                        /* allocate new reqid id if reqid is zero. */
                        if (xisr_reqid == 0) {
                                u_int16_t reqid = key_newreqid();
                                if (reqid == 0) {
                                        *error = ENOBUFS;
                                        goto free_exit;
                                }
                                (*p_isr)->saidx.reqid = reqid;
                        } else {
                        /* set it for manual keying. */
                                (*p_isr)->saidx.reqid = xisr_reqid;
                        }
                        break;

                default:
                        IPSECLOG(LOG_DEBUG, "invalid level=%u\n",
                            xisr->sadb_x_ipsecrequest_level);
                        *error = EINVAL;
                        goto free_exit;
                }
                (*p_isr)->level = xisr->sadb_x_ipsecrequest_level;

                /* set IP addresses if there */
                /*
                 * NOTE:
                 * MOBIKE Extensions for PF_KEY draft says:
                 *     If tunnel mode is specified, the sadb_x_ipsecrequest
                 *     structure is followed by two sockaddr structures that
                 *     define the tunnel endpoint addresses.  In the case that
                 *     transport mode is used, no additional addresses are
                 *     specified.
                 * see: https://tools.ietf.org/html/draft-schilcher-mobike-pfkey-extension-01
                 *
                 * And then, the IP addresses will be set by
                 * ipsec_fill_saidx_bymbuf() from packet in transport mode.
                 * This behavior is used by NAT-T enabled ipsecif(4).
                 */
                if (xisr->sadb_x_ipsecrequest_len > sizeof(*xisr)) {
                        const struct sockaddr *paddr;

                        paddr = (const struct sockaddr *)(xisr + 1);

                        /* validity check */
                        if (paddr->sa_len > sizeof((*p_isr)->saidx.src)) {
                                IPSECLOG(LOG_DEBUG, "invalid request "
                                    "address length.\n");
                                *error = EINVAL;
                                goto free_exit;
                        }
                        memcpy(&(*p_isr)->saidx.src, paddr, paddr->sa_len);

                        paddr = (const struct sockaddr *)((const char *)paddr
                            + paddr->sa_len);

                        /* validity check */
                        if (paddr->sa_len > sizeof((*p_isr)->saidx.dst)) {
                                IPSECLOG(LOG_DEBUG, "invalid request "
                                    "address length.\n");
                                *error = EINVAL;
                                goto free_exit;
                        }
                        memcpy(&(*p_isr)->saidx.dst, paddr, paddr->sa_len);
                }

                (*p_isr)->sp = newsp;

                /* initialization for the next. */
                p_isr = &(*p_isr)->next;
                tlen -= xisr->sadb_x_ipsecrequest_len;

                /* validity check */
                if (tlen < 0) {
                        IPSECLOG(LOG_DEBUG, "becoming tlen < 0.\n");
                        *error = EINVAL;
                        goto free_exit;
                }

                xisr = (const struct sadb_x_ipsecrequest *)((const char *)xisr +
                    xisr->sadb_x_ipsecrequest_len);
        }
    }

        *error = 0;
        return newsp;

free_exit:
        key_free_sp(newsp);
        return NULL;
}

struct secpolicy *
key_msg2sp(const struct sadb_x_policy *xpl0, size_t len, int *error)
{

        return _key_msg2sp(xpl0, len, error, false);
}

u_int16_t
key_newreqid(void)
{
        static u_int16_t auto_reqid = IPSEC_MANUAL_REQID_MAX + 1;

        auto_reqid = (auto_reqid == 0xffff ?
            IPSEC_MANUAL_REQID_MAX + 1 : auto_reqid + 1);

        /* XXX should be unique check */

        return auto_reqid;
}

/*
 * copy secpolicy struct to sadb_x_policy structure indicated.
 */
struct mbuf *
key_sp2msg(const struct secpolicy *sp, int mflag)
{
        struct sadb_x_policy *xpl;
        int tlen;
        char *p;
        struct mbuf *m;

        KASSERT(sp != NULL);

        tlen = key_getspreqmsglen(sp);

        m = key_alloc_mbuf(tlen, mflag);
        if (!m || m->m_next) {        /*XXX*/
                if (m)
                        m_freem(m);
                return NULL;
        }

        m->m_len = tlen;
        m->m_next = NULL;
        xpl = mtod(m, struct sadb_x_policy *);
        memset(xpl, 0, tlen);

        xpl->sadb_x_policy_len = PFKEY_UNIT64(tlen);
        xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
        xpl->sadb_x_policy_type = sp->policy;
        xpl->sadb_x_policy_dir = sp->spidx.dir;
        xpl->sadb_x_policy_id = sp->id;
        p = (char *)xpl + sizeof(*xpl);

        /* if is the policy for ipsec ? */
        if (sp->policy == IPSEC_POLICY_IPSEC) {
                struct sadb_x_ipsecrequest *xisr;
                struct ipsecrequest *isr;

                for (isr = sp->req; isr != NULL; isr = isr->next) {

                        xisr = (struct sadb_x_ipsecrequest *)p;

                        xisr->sadb_x_ipsecrequest_proto = isr->saidx.proto;
                        xisr->sadb_x_ipsecrequest_mode = isr->saidx.mode;
                        xisr->sadb_x_ipsecrequest_level = isr->level;
                        xisr->sadb_x_ipsecrequest_reqid = isr->saidx.reqid;

                        p += sizeof(*xisr);
                        memcpy(p, &isr->saidx.src, isr->saidx.src.sa.sa_len);
                        p += isr->saidx.src.sa.sa_len;
                        memcpy(p, &isr->saidx.dst, isr->saidx.dst.sa.sa_len);
                        p += isr->saidx.src.sa.sa_len;

                        xisr->sadb_x_ipsecrequest_len =
                            PFKEY_ALIGN8(sizeof(*xisr)
                            + isr->saidx.src.sa.sa_len
                            + isr->saidx.dst.sa.sa_len);
                }
        }

        return m;
}

/*
 * m will not be freed nor modified. It never return NULL.
 * If it returns a mbuf of M_PKTHDR, the mbuf ensures to have
 * contiguous length at least sizeof(struct sadb_msg).
 */
static struct mbuf *
key_gather_mbuf(struct mbuf *m, const struct sadb_msghdr *mhp,
                int ndeep, int nitem, ...)
{
        va_list ap;
        int idx;
        int i;
        struct mbuf *result = NULL, *n;
        int len;

        KASSERT(m != NULL);
        KASSERT(mhp != NULL);
        KASSERT(!cpu_softintr_p());

        va_start(ap, nitem);
        for (i = 0; i < nitem; i++) {
                idx = va_arg(ap, int);
                KASSERT(idx >= 0);
                KASSERT(idx <= SADB_EXT_MAX);
                /* don't attempt to pull empty extension */
                if (idx == SADB_EXT_RESERVED && mhp->msg == NULL)
                        continue;
                if (idx != SADB_EXT_RESERVED &&
                    (mhp->ext[idx] == NULL || mhp->extlen[idx] == 0))
                        continue;

                if (idx == SADB_EXT_RESERVED) {
                        CTASSERT(PFKEY_ALIGN8(sizeof(struct sadb_msg)) <= MHLEN);
                        len = PFKEY_ALIGN8(sizeof(struct sadb_msg));
                        MGETHDR(n, M_WAITOK, MT_DATA);
                        n->m_len = len;
                        n->m_next = NULL;
                        m_copydata(m, 0, sizeof(struct sadb_msg),
                            mtod(n, void *));
                } else if (i < ndeep) {
                        len = mhp->extlen[idx];
                        n = key_alloc_mbuf(len, M_WAITOK);
                        KASSERT(n->m_next == NULL);
                        m_copydata(m, mhp->extoff[idx], mhp->extlen[idx],
                            mtod(n, void *));
                } else {
                        n = m_copym(m, mhp->extoff[idx], mhp->extlen[idx],
                            M_WAITOK);
                }
                KASSERT(n != NULL);

                if (result)
                        m_cat(result, n);
                else
                        result = n;
        }
        va_end(ap);

        KASSERT(result != NULL);
        if ((result->m_flags & M_PKTHDR) != 0) {
                result->m_pkthdr.len = 0;
                for (n = result; n; n = n->m_next)
                        result->m_pkthdr.len += n->m_len;
                KASSERT(result->m_len >= sizeof(struct sadb_msg));
        }

        return result;
}

/*
 * The argument _sp must not overwrite until SP is created and registered
 * successfully.
 */
static int
key_spdadd(struct socket *so, struct mbuf *m,
           const struct sadb_msghdr *mhp, struct secpolicy **_sp,
           bool from_kernel)
{
        const struct sockaddr *src, *dst;
        const struct sadb_x_policy *xpl0;
        struct sadb_x_policy *xpl;
        const struct sadb_lifetime *lft = NULL;
        struct secpolicyindex spidx;
        struct secpolicy *newsp;
        int error;
        uint32_t sadb_x_policy_id;

        if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
            mhp->ext[SADB_EXT_ADDRESS_DST] == NULL ||
            mhp->ext[SADB_X_EXT_POLICY] == NULL) {
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }
        if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
            mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) ||
            mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) {
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }
        if (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL) {
                if (mhp->extlen[SADB_EXT_LIFETIME_HARD] <
                    sizeof(struct sadb_lifetime)) {
                        IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                        return key_senderror(so, m, EINVAL);
                }
                lft = mhp->ext[SADB_EXT_LIFETIME_HARD];
        }

        xpl0 = mhp->ext[SADB_X_EXT_POLICY];

        /* checking the direciton. */
        switch (xpl0->sadb_x_policy_dir) {
        case IPSEC_DIR_INBOUND:
        case IPSEC_DIR_OUTBOUND:
                break;
        default:
                IPSECLOG(LOG_DEBUG, "Invalid SP direction.\n");
                return key_senderror(so, m, EINVAL);
        }

        /* check policy */
        /* key_api_spdadd() accepts DISCARD, NONE and IPSEC. */
        if (xpl0->sadb_x_policy_type == IPSEC_POLICY_ENTRUST ||
            xpl0->sadb_x_policy_type == IPSEC_POLICY_BYPASS) {
                IPSECLOG(LOG_DEBUG, "Invalid policy type.\n");
                return key_senderror(so, m, EINVAL);
        }

        /* policy requests are mandatory when action is ipsec. */
        if (mhp->msg->sadb_msg_type != SADB_X_SPDSETIDX &&
            xpl0->sadb_x_policy_type == IPSEC_POLICY_IPSEC &&
            mhp->extlen[SADB_X_EXT_POLICY] <= sizeof(*xpl0)) {
                IPSECLOG(LOG_DEBUG, "some policy requests part required.\n");
                return key_senderror(so, m, EINVAL);
        }

        src = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_SRC);
        dst = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_DST);

        /* sanity check on addr pair */
        if (src->sa_family != dst->sa_family)
                return key_senderror(so, m, EINVAL);
        if (src->sa_len != dst->sa_len)
                return key_senderror(so, m, EINVAL);

        key_init_spidx_bymsghdr(&spidx, mhp);

        /*
         * checking there is SP already or not.
         * SPDUPDATE doesn't depend on whether there is a SP or not.
         * If the type is either SPDADD or SPDSETIDX AND a SP is found,
         * then error.
         */
    {
        struct secpolicy *sp;

        if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) {
                sp = key_lookup_and_remove_sp(&spidx, from_kernel);
                if (sp != NULL)
                        key_destroy_sp(sp);
        } else {
                sp = key_getsp(&spidx);
                if (sp != NULL) {
                        KEY_SP_UNREF(&sp);
                        IPSECLOG(LOG_DEBUG, "a SP entry exists already.\n");
                        return key_senderror(so, m, EEXIST);
                }
        }
    }

        /* allocation new SP entry */
        newsp = _key_msg2sp(xpl0, PFKEY_EXTLEN(xpl0), &error, from_kernel);
        if (newsp == NULL) {
                return key_senderror(so, m, error);
        }

        newsp->id = key_getnewspid();
        if (newsp->id == 0) {
                kmem_free(newsp, sizeof(*newsp));
                return key_senderror(so, m, ENOBUFS);
        }

        newsp->spidx = spidx;
        newsp->created = time_uptime;
        newsp->lastused = newsp->created;
        newsp->lifetime = lft ? lft->sadb_lifetime_addtime : 0;
        newsp->validtime = lft ? lft->sadb_lifetime_usetime : 0;
        if (from_kernel)
                newsp->origin = IPSEC_SPORIGIN_KERNEL;
        else
                newsp->origin = IPSEC_SPORIGIN_USER;

        key_init_sp(newsp);
        if (from_kernel)
                KEY_SP_REF(newsp);

        sadb_x_policy_id = newsp->id;

        if (_sp != NULL)
                *_sp = newsp;

        mutex_enter(&key_spd.lock);
        SPLIST_WRITER_INSERT_TAIL(newsp->spidx.dir, newsp);
        mutex_exit(&key_spd.lock);
        /*
         * We don't have a reference to newsp, so we must not touch newsp from
         * now on.  If you want to do, you must take a reference beforehand.
         */
        newsp = NULL;

#ifdef notyet
        /* delete the entry in key_misc.spacqlist */
        if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) {
                struct secspacq *spacq = key_getspacq(&spidx);
                if (spacq != NULL) {
                        /* reset counter in order to deletion by timehandler. */
                        spacq->created = time_uptime;
                        spacq->count = 0;
                }
            }
#endif

        /* Invalidate all cached SPD pointers in the PCBs. */
        ipsec_invalpcbcacheall();

#if defined(GATEWAY)
        /* Invalidate the ipflow cache, as well. */
        ipflow_invalidate_all(0);
#ifdef INET6
        if (in6_present)
                ip6flow_invalidate_all(0);
#endif /* INET6 */
#endif /* GATEWAY */

        key_update_used();

    {
        struct mbuf *n, *mpolicy;
        int off;

        /* create new sadb_msg to reply. */
        if (lft) {
                n = key_gather_mbuf(m, mhp, 2, 5, SADB_EXT_RESERVED,
                    SADB_X_EXT_POLICY, SADB_EXT_LIFETIME_HARD,
                    SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST);
        } else {
                n = key_gather_mbuf(m, mhp, 2, 4, SADB_EXT_RESERVED,
                    SADB_X_EXT_POLICY,
                    SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST);
        }

        key_fill_replymsg(n, 0);
        off = 0;
        mpolicy = m_pulldown(n, PFKEY_ALIGN8(sizeof(struct sadb_msg)),
            sizeof(*xpl), &off);
        if (mpolicy == NULL) {
                /* n is already freed */
                /*
                 * valid sp has been created, so we does not overwrite _sp
                 * NULL here. let caller decide to use the sp or not.
                 */
                return key_senderror(so, m, ENOBUFS);
        }
        xpl = (struct sadb_x_policy *)(mtod(mpolicy, char *) + off);
        if (xpl->sadb_x_policy_exttype != SADB_X_EXT_POLICY) {
                m_freem(n);
                /* ditto */
                return key_senderror(so, m, EINVAL);
        }

        xpl->sadb_x_policy_id = sadb_x_policy_id;

        m_freem(m);
        return key_sendup_mbuf(so, n, KEY_SENDUP_ALL);
    }
}

/*
 * SADB_X_SPDADD, SADB_X_SPDSETIDX or SADB_X_SPDUPDATE processing
 * add an entry to SP database, when received
 *   <base, address(SD), (lifetime(H),) policy>
 * from the user(?).
 * Adding to SP database,
 * and send
 *   <base, address(SD), (lifetime(H),) policy>
 * to the socket which was send.
 *
 * SPDADD set a unique policy entry.
 * SPDSETIDX like SPDADD without a part of policy requests.
 * SPDUPDATE replace a unique policy entry.
 *
 * m will always be freed.
 */
static int
key_api_spdadd(struct socket *so, struct mbuf *m,
               const struct sadb_msghdr *mhp)
{

        return key_spdadd(so, m, mhp, NULL, false);
}

struct secpolicy *
key_kpi_spdadd(struct mbuf *m)
{
        struct sadb_msghdr mh;
        int error;
        struct secpolicy *sp = NULL;

        error = key_align(m, &mh);
        if (error)
                return NULL;

        error = key_spdadd(NULL, m, &mh, &sp, true);
        if (error) {
                /*
                 * Currently, when key_spdadd() cannot send a PFKEY message
                 * which means SP has been created, key_spdadd() returns error
                 * although SP is created successfully.
                 * Kernel components would not care PFKEY messages, so return
                 * the "sp" regardless of error code. key_spdadd() overwrites
                 * the argument only if SP  is created successfully.
                 */
        }
        return sp;
}

/*
 * get new policy id.
 * OUT:
 *        0:        failure.
 *        others: success.
 */
static u_int32_t
key_getnewspid(void)
{
        u_int32_t newid = 0;
        int count = key_spi_trycnt;        /* XXX */
        struct secpolicy *sp;

        /* when requesting to allocate spi ranged */
        while (count--) {
                newid = (policy_id = (policy_id == ~0 ? 1 : policy_id + 1));

                sp = key_getspbyid(newid);
                if (sp == NULL)
                        break;

                KEY_SP_UNREF(&sp);
        }

        if (count == 0 || newid == 0) {
                IPSECLOG(LOG_DEBUG, "to allocate policy id is failed.\n");
                return 0;
        }

        return newid;
}

/*
 * SADB_SPDDELETE processing
 * receive
 *   <base, address(SD), policy(*)>
 * from the user(?), and set SADB_SASTATE_DEAD,
 * and send,
 *   <base, address(SD), policy(*)>
 * to the ikmpd.
 * policy(*) including direction of policy.
 *
 * m will always be freed.
 */
static int
key_api_spddelete(struct socket *so, struct mbuf *m,
              const struct sadb_msghdr *mhp)
{
        struct sadb_x_policy *xpl0;
        struct secpolicyindex spidx;
        struct secpolicy *sp;

        if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
            mhp->ext[SADB_EXT_ADDRESS_DST] == NULL ||
            mhp->ext[SADB_X_EXT_POLICY] == NULL) {
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }
        if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
            mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) ||
            mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) {
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }

        xpl0 = mhp->ext[SADB_X_EXT_POLICY];

        /* checking the direction. */
        switch (xpl0->sadb_x_policy_dir) {
        case IPSEC_DIR_INBOUND:
        case IPSEC_DIR_OUTBOUND:
                break;
        default:
                IPSECLOG(LOG_DEBUG, "Invalid SP direction.\n");
                return key_senderror(so, m, EINVAL);
        }

        /* make secindex */
        key_init_spidx_bymsghdr(&spidx, mhp);

        /* Is there SP in SPD ? */
        sp = key_lookup_and_remove_sp(&spidx, false);
        if (sp == NULL) {
                IPSECLOG(LOG_DEBUG, "no SP found.\n");
                return key_senderror(so, m, EINVAL);
        }

        /* save policy id to buffer to be returned. */
        xpl0->sadb_x_policy_id = sp->id;

        key_destroy_sp(sp);

        /* We're deleting policy; no need to invalidate the ipflow cache. */

    {
        struct mbuf *n;

        /* create new sadb_msg to reply. */
        n = key_gather_mbuf(m, mhp, 1, 4, SADB_EXT_RESERVED,
            SADB_X_EXT_POLICY, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST);
        key_fill_replymsg(n, 0);
        m_freem(m);
        return key_sendup_mbuf(so, n, KEY_SENDUP_ALL);
    }
}

static struct mbuf *
key_alloc_mbuf_simple(int len, int mflag)
{
        struct mbuf *n;

        KASSERT(mflag == M_NOWAIT || (mflag == M_WAITOK && !cpu_softintr_p()));

        MGETHDR(n, mflag, MT_DATA);
        if (n && len > MHLEN) {
                MCLGET(n, mflag);
                if ((n->m_flags & M_EXT) == 0) {
                        m_freem(n);
                        n = NULL;
                }
        }
        return n;
}

/*
 * SADB_SPDDELETE2 processing
 * receive
 *   <base, policy(*)>
 * from the user(?), and set SADB_SASTATE_DEAD,
 * and send,
 *   <base, policy(*)>
 * to the ikmpd.
 * policy(*) including direction of policy.
 *
 * m will always be freed.
 */
static int
key_spddelete2(struct socket *so, struct mbuf *m,
               const struct sadb_msghdr *mhp, bool from_kernel)
{
        u_int32_t id;
        struct secpolicy *sp;
        const struct sadb_x_policy *xpl;

        if (mhp->ext[SADB_X_EXT_POLICY] == NULL ||
            mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) {
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }

        xpl = mhp->ext[SADB_X_EXT_POLICY];
        id = xpl->sadb_x_policy_id;

        /* Is there SP in SPD ? */
        sp = key_lookupbyid_and_remove_sp(id, from_kernel);
        if (sp == NULL) {
                IPSECLOG(LOG_DEBUG, "no SP found id:%u.\n", id);
                return key_senderror(so, m, EINVAL);
        }

        key_destroy_sp(sp);

        /* We're deleting policy; no need to invalidate the ipflow cache. */

    {
        struct mbuf *n, *nn;
        int off, len;

        CTASSERT(PFKEY_ALIGN8(sizeof(struct sadb_msg)) <= MCLBYTES);

        /* create new sadb_msg to reply. */
        len = PFKEY_ALIGN8(sizeof(struct sadb_msg));

        n = key_alloc_mbuf_simple(len, M_WAITOK);
        n->m_len = len;
        n->m_next = NULL;
        off = 0;

        m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, char *) + off);
        off += PFKEY_ALIGN8(sizeof(struct sadb_msg));

        KASSERTMSG(off == len, "length inconsistency");

        n->m_next = m_copym(m, mhp->extoff[SADB_X_EXT_POLICY],
            mhp->extlen[SADB_X_EXT_POLICY], M_WAITOK);

        n->m_pkthdr.len = 0;
        for (nn = n; nn; nn = nn->m_next)
                n->m_pkthdr.len += nn->m_len;

        key_fill_replymsg(n, 0);
        m_freem(m);
        return key_sendup_mbuf(so, n, KEY_SENDUP_ALL);
    }
}

/*
 * SADB_SPDDELETE2 processing
 * receive
 *   <base, policy(*)>
 * from the user(?), and set SADB_SASTATE_DEAD,
 * and send,
 *   <base, policy(*)>
 * to the ikmpd.
 * policy(*) including direction of policy.
 *
 * m will always be freed.
 */
static int
key_api_spddelete2(struct socket *so, struct mbuf *m,
               const struct sadb_msghdr *mhp)
{

        return key_spddelete2(so, m, mhp, false);
}

int
key_kpi_spddelete2(struct mbuf *m)
{
        struct sadb_msghdr mh;
        int error;

        error = key_align(m, &mh);
        if (error)
                return EINVAL;

        return key_spddelete2(NULL, m, &mh, true);
}

/*
 * SADB_X_GET processing
 * receive
 *   <base, policy(*)>
 * from the user(?),
 * and send,
 *   <base, address(SD), policy>
 * to the ikmpd.
 * policy(*) including direction of policy.
 *
 * m will always be freed.
 */
static int
key_api_spdget(struct socket *so, struct mbuf *m,
           const struct sadb_msghdr *mhp)
{
        u_int32_t id;
        struct secpolicy *sp;
        struct mbuf *n;
        const struct sadb_x_policy *xpl;

        if (mhp->ext[SADB_X_EXT_POLICY] == NULL ||
            mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) {
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }

        xpl = mhp->ext[SADB_X_EXT_POLICY];
        id = xpl->sadb_x_policy_id;

        /* Is there SP in SPD ? */
        sp = key_getspbyid(id);
        if (sp == NULL) {
                IPSECLOG(LOG_DEBUG, "no SP found id:%u.\n", id);
                return key_senderror(so, m, ENOENT);
        }

        n = key_setdumpsp(sp, SADB_X_SPDGET, mhp->msg->sadb_msg_seq,
            mhp->msg->sadb_msg_pid);
        KEY_SP_UNREF(&sp); /* ref gained by key_getspbyid */
        m_freem(m);
        return key_sendup_mbuf(so, n, KEY_SENDUP_ONE);
}

#ifdef notyet
/*
 * SADB_X_SPDACQUIRE processing.
 * Acquire policy and SA(s) for a *OUTBOUND* packet.
 * send
 *   <base, policy(*)>
 * to KMD, and expect to receive
 *   <base> with SADB_X_SPDACQUIRE if error occurred,
 * or
 *   <base, policy>
 * with SADB_X_SPDUPDATE from KMD by PF_KEY.
 * policy(*) is without policy requests.
 *
 *    0     : succeed
 *    others: error number
 */
int
key_spdacquire(const struct secpolicy *sp)
{
        struct mbuf *result = NULL, *m;
        struct secspacq *newspacq;
        int error;

        KASSERT(sp != NULL);
        KASSERTMSG(sp->req == NULL, "called but there is request");
        KASSERTMSG(sp->policy == IPSEC_POLICY_IPSEC,
            "policy mismathed. IPsec is expected");

        /* Get an entry to check whether sent message or not. */
        newspacq = key_getspacq(&sp->spidx);
        if (newspacq != NULL) {
                if (key_blockacq_count < newspacq->count) {
                        /* reset counter and do send message. */
                        newspacq->count = 0;
                } else {
                        /* increment counter and do nothing. */
                        newspacq->count++;
                        return 0;
                }
        } else {
                /* make new entry for blocking to send SADB_ACQUIRE. */
                newspacq = key_newspacq(&sp->spidx);
                if (newspacq == NULL)
                        return ENOBUFS;

                /* add to key_misc.acqlist */
                LIST_INSERT_HEAD(&key_misc.spacqlist, newspacq, chain);
        }

        /* create new sadb_msg to reply. */
        m = key_setsadbmsg(SADB_X_SPDACQUIRE, 0, 0, 0, 0, 0);
        if (!m) {
                error = ENOBUFS;
                goto fail;
        }
        result = m;

        result->m_pkthdr.len = 0;
        for (m = result; m; m = m->m_next)
                result->m_pkthdr.len += m->m_len;

        mtod(result, struct sadb_msg *)->sadb_msg_len =
            PFKEY_UNIT64(result->m_pkthdr.len);

        return key_sendup_mbuf(NULL, m, KEY_SENDUP_REGISTERED);

fail:
        if (result)
                m_freem(result);
        return error;
}
#endif /* notyet */

/*
 * SADB_SPDFLUSH processing
 * receive
 *   <base>
 * from the user, and free all entries in secpctree.
 * and send,
 *   <base>
 * to the user.
 * NOTE: what to do is only marking SADB_SASTATE_DEAD.
 *
 * m will always be freed.
 */
static int
key_api_spdflush(struct socket *so, struct mbuf *m,
             const struct sadb_msghdr *mhp)
{
        struct sadb_msg *newmsg;
        struct secpolicy *sp;
        u_int dir;

        if (m->m_len != PFKEY_ALIGN8(sizeof(struct sadb_msg)))
                return key_senderror(so, m, EINVAL);

        for (dir = 0; dir < IPSEC_DIR_MAX; dir++) {
            retry:
                mutex_enter(&key_spd.lock);
                SPLIST_WRITER_FOREACH(sp, dir) {
                        KASSERTMSG(sp->state != IPSEC_SPSTATE_DEAD,
                            "sp->state=%u", sp->state);
                        /*
                         * Userlang programs can remove SPs created by userland
                         * probrams only, that is, they cannot remove SPs
                         * created in kernel(e.g. ipsec(4) I/F).
                         */
                        if (sp->origin == IPSEC_SPORIGIN_USER) {
                                key_unlink_sp(sp);
                                mutex_exit(&key_spd.lock);
                                key_destroy_sp(sp);
                                goto retry;
                        }
                }
                mutex_exit(&key_spd.lock);
        }

        /* We're deleting policy; no need to invalidate the ipflow cache. */

        if (sizeof(struct sadb_msg) > m->m_len + M_TRAILINGSPACE(m)) {
                IPSECLOG(LOG_DEBUG, "No more memory.\n");
                return key_senderror(so, m, ENOBUFS);
        }

        if (m->m_next)
                m_freem(m->m_next);
        m->m_next = NULL;
        m->m_pkthdr.len = m->m_len = PFKEY_ALIGN8(sizeof(struct sadb_msg));
        newmsg = mtod(m, struct sadb_msg *);
        newmsg->sadb_msg_errno = 0;
        newmsg->sadb_msg_len = PFKEY_UNIT64(m->m_pkthdr.len);

        return key_sendup_mbuf(so, m, KEY_SENDUP_ALL);
}

static struct sockaddr key_src = {
        .sa_len = 2,
        .sa_family = PF_KEY,
};

static struct mbuf *
key_setspddump_chain(int *errorp, int *lenp, pid_t pid)
{
        struct secpolicy *sp;
        int cnt;
        u_int dir;
        struct mbuf *m, *n, *prev;
        int totlen;

        KASSERT(mutex_owned(&key_spd.lock));

        *lenp = 0;

        /* search SPD entry and get buffer size. */
        cnt = 0;
        for (dir = 0; dir < IPSEC_DIR_MAX; dir++) {
                SPLIST_WRITER_FOREACH(sp, dir) {
                        cnt++;
                }
        }

        if (cnt == 0) {
                *errorp = ENOENT;
                return (NULL);
        }

        m = NULL;
        prev = m;
        totlen = 0;
        for (dir = 0; dir < IPSEC_DIR_MAX; dir++) {
                SPLIST_WRITER_FOREACH(sp, dir) {
                        --cnt;
                        n = key_setdumpsp(sp, SADB_X_SPDDUMP, cnt, pid);

                        totlen += n->m_pkthdr.len;
                        if (!m) {
                                m = n;
                        } else {
                                prev->m_nextpkt = n;
                        }
                        prev = n;
                }
        }

        *lenp = totlen;
        *errorp = 0;
        return (m);
}

/*
 * SADB_SPDDUMP processing
 * receive
 *   <base>
 * from the user, and dump all SP leaves
 * and send,
 *   <base> .....
 * to the ikmpd.
 *
 * m will always be freed.
 */
static int
key_api_spddump(struct socket *so, struct mbuf *m0,
             const struct sadb_msghdr *mhp)
{
        struct mbuf *n;
        int error, len;
        int ok;
        pid_t pid;

        pid = mhp->msg->sadb_msg_pid;
        /*
         * If the requestor has insufficient socket-buffer space
         * for the entire chain, nobody gets any response to the DUMP.
         * XXX For now, only the requestor ever gets anything.
         * Moreover, if the requestor has any space at all, they receive
         * the entire chain, otherwise the request is refused with  ENOBUFS.
         */
        if (sbspace(&so->so_rcv) <= 0) {
                return key_senderror(so, m0, ENOBUFS);
        }

        mutex_enter(&key_spd.lock);
        n = key_setspddump_chain(&error, &len, pid);
        mutex_exit(&key_spd.lock);

        if (n == NULL) {
                return key_senderror(so, m0, ENOENT);
        }
        {
                uint64_t *ps = PFKEY_STAT_GETREF();
                ps[PFKEY_STAT_IN_TOTAL]++;
                ps[PFKEY_STAT_IN_BYTES] += len;
                PFKEY_STAT_PUTREF();
        }

        /*
         * PF_KEY DUMP responses are no longer broadcast to all PF_KEY sockets.
         * The requestor receives either the entire chain, or an
         * error message with ENOBUFS.
         */

        /*
         * sbappendchainwith record takes the chain of entries, one
         * packet-record per SPD entry, prepends the key_src sockaddr
         * to each packet-record, links the sockaddr mbufs into a new
         * list of records, then   appends the entire resulting
         * list to the requesting socket.
         */
        ok = sbappendaddrchain(&so->so_rcv, (struct sockaddr *)&key_src, n,
            SB_PRIO_ONESHOT_OVERFLOW);

        if (!ok) {
                PFKEY_STATINC(PFKEY_STAT_IN_NOMEM);
                m_freem(n);
                return key_senderror(so, m0, ENOBUFS);
        }

        m_freem(m0);
        return error;
}

/*
 * SADB_X_NAT_T_NEW_MAPPING. Unused by racoon as of 2005/04/23
 */
static int
key_api_nat_map(struct socket *so, struct mbuf *m,
            const struct sadb_msghdr *mhp)
{
        struct sadb_x_nat_t_type *type;
        struct sadb_x_nat_t_port *sport;
        struct sadb_x_nat_t_port *dport;
        struct sadb_address *iaddr, *raddr;
        struct sadb_x_nat_t_frag *frag;

        if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] == NULL ||
            mhp->ext[SADB_X_EXT_NAT_T_SPORT] == NULL ||
            mhp->ext[SADB_X_EXT_NAT_T_DPORT] == NULL) {
                IPSECLOG(LOG_DEBUG, "invalid message.\n");
                return key_senderror(so, m, EINVAL);
        }
        if ((mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type)) ||
            (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport)) ||
            (mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport))) {
                IPSECLOG(LOG_DEBUG, "invalid message.\n");
                return key_senderror(so, m, EINVAL);
        }

        if ((mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL) &&
            (mhp->extlen[SADB_X_EXT_NAT_T_OAI] < sizeof(*iaddr))) {
                IPSECLOG(LOG_DEBUG, "invalid message\n");
                return key_senderror(so, m, EINVAL);
        }

        if ((mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL) &&
            (mhp->extlen[SADB_X_EXT_NAT_T_OAR] < sizeof(*raddr))) {
                IPSECLOG(LOG_DEBUG, "invalid message\n");
                return key_senderror(so, m, EINVAL);
        }

        if ((mhp->ext[SADB_X_EXT_NAT_T_FRAG] != NULL) &&
            (mhp->extlen[SADB_X_EXT_NAT_T_FRAG] < sizeof(*frag))) {
                IPSECLOG(LOG_DEBUG, "invalid message\n");
                return key_senderror(so, m, EINVAL);
        }

        type = mhp->ext[SADB_X_EXT_NAT_T_TYPE];
        sport = mhp->ext[SADB_X_EXT_NAT_T_SPORT];
        dport = mhp->ext[SADB_X_EXT_NAT_T_DPORT];
        iaddr = mhp->ext[SADB_X_EXT_NAT_T_OAI];
        raddr = mhp->ext[SADB_X_EXT_NAT_T_OAR];
        frag = mhp->ext[SADB_X_EXT_NAT_T_FRAG];

        /*
         * XXX handle that, it should also contain a SA, or anything
         * that enable to update the SA information.
         */

        return 0;
}

/*
 * Never return NULL.
 */
static struct mbuf *
key_setdumpsp(struct secpolicy *sp, u_int8_t type, u_int32_t seq, pid_t pid)
{
        struct mbuf *result = NULL, *m;

        KASSERT(!cpu_softintr_p());

        m = key_setsadbmsg(type, 0, SADB_SATYPE_UNSPEC, seq, pid,
            key_sp_refcnt(sp), M_WAITOK);
        result = m;

        m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC,
            &sp->spidx.src.sa, sp->spidx.prefs, sp->spidx.ul_proto, M_WAITOK);
        m_cat(result, m);

        m = key_setsadbaddr(SADB_EXT_ADDRESS_DST,
            &sp->spidx.dst.sa, sp->spidx.prefd, sp->spidx.ul_proto, M_WAITOK);
        m_cat(result, m);

        m = key_sp2msg(sp, M_WAITOK);
        m_cat(result, m);

        KASSERT(result->m_flags & M_PKTHDR);
        KASSERT(result->m_len >= sizeof(struct sadb_msg));

        result->m_pkthdr.len = 0;
        for (m = result; m; m = m->m_next)
                result->m_pkthdr.len += m->m_len;

        mtod(result, struct sadb_msg *)->sadb_msg_len =
            PFKEY_UNIT64(result->m_pkthdr.len);

        return result;
}

/*
 * get PFKEY message length for security policy and request.
 */
static u_int
key_getspreqmsglen(const struct secpolicy *sp)
{
        u_int tlen;

        tlen = sizeof(struct sadb_x_policy);

        /* if is the policy for ipsec ? */
        if (sp->policy != IPSEC_POLICY_IPSEC)
                return tlen;

        /* get length of ipsec requests */
    {
        const struct ipsecrequest *isr;
        int len;

        for (isr = sp->req; isr != NULL; isr = isr->next) {
                len = sizeof(struct sadb_x_ipsecrequest)
                    + isr->saidx.src.sa.sa_len + isr->saidx.dst.sa.sa_len;

                tlen += PFKEY_ALIGN8(len);
        }
    }

        return tlen;
}

/*
 * SADB_SPDEXPIRE processing
 * send
 *   <base, address(SD), lifetime(CH), policy>
 * to KMD by PF_KEY.
 *
 * OUT:        0        : succeed
 *        others        : error number
 */
static int
key_spdexpire(struct secpolicy *sp)
{
        int s;
        struct mbuf *result = NULL, *m;
        int len;
        int error = -1;
        struct sadb_lifetime *lt;

        /* XXX: Why do we lock ? */
        s = splsoftnet();        /*called from softclock()*/

        KASSERT(sp != NULL);

        /* set msg header */
        m = key_setsadbmsg(SADB_X_SPDEXPIRE, 0, 0, 0, 0, 0, M_WAITOK);
        result = m;

        /* create lifetime extension (current and hard) */
        len = PFKEY_ALIGN8(sizeof(*lt)) * 2;
        m = key_alloc_mbuf(len, M_WAITOK);
        KASSERT(m->m_next == NULL);

        memset(mtod(m, void *), 0, len);
        lt = mtod(m, struct sadb_lifetime *);
        lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime));
        lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
        lt->sadb_lifetime_allocations = 0;
        lt->sadb_lifetime_bytes = 0;
        lt->sadb_lifetime_addtime = time_mono_to_wall(sp->created);
        lt->sadb_lifetime_usetime = time_mono_to_wall(sp->lastused);
        lt = (struct sadb_lifetime *)(mtod(m, char *) + len / 2);
        lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime));
        lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
        lt->sadb_lifetime_allocations = 0;
        lt->sadb_lifetime_bytes = 0;
        lt->sadb_lifetime_addtime = sp->lifetime;
        lt->sadb_lifetime_usetime = sp->validtime;
        m_cat(result, m);

        /* set sadb_address for source */
        m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &sp->spidx.src.sa,
            sp->spidx.prefs, sp->spidx.ul_proto, M_WAITOK);
        m_cat(result, m);

        /* set sadb_address for destination */
        m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &sp->spidx.dst.sa,
            sp->spidx.prefd, sp->spidx.ul_proto, M_WAITOK);
        m_cat(result, m);

        /* set secpolicy */
        m = key_sp2msg(sp, M_WAITOK);
        m_cat(result, m);

        KASSERT(result->m_flags & M_PKTHDR);
        KASSERT(result->m_len >= sizeof(struct sadb_msg));

        result->m_pkthdr.len = 0;
        for (m = result; m; m = m->m_next)
                result->m_pkthdr.len += m->m_len;

        mtod(result, struct sadb_msg *)->sadb_msg_len =
            PFKEY_UNIT64(result->m_pkthdr.len);

        error = key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED);
        splx(s);
        return error;
}

/* %%% SAD management */
/*
 * allocating a memory for new SA head, and copy from the values of mhp.
 * OUT:        NULL        : failure due to the lack of memory.
 *        others        : pointer to new SA head.
 */
static struct secashead *
key_newsah(const struct secasindex *saidx)
{
        struct secashead *newsah;
        int i;

        KASSERT(saidx != NULL);

        newsah = kmem_zalloc(sizeof(struct secashead), KM_SLEEP);
        for (i = 0; i < __arraycount(newsah->savlist); i++)
                PSLIST_INIT(&newsah->savlist[i]);
        newsah->saidx = *saidx;

        localcount_init(&newsah->localcount);
        /* Take a reference for the caller */
        localcount_acquire(&newsah->localcount);

        /* Add to the sah list */
        SAHLIST_ENTRY_INIT(newsah);
        newsah->state = SADB_SASTATE_MATURE;
        mutex_enter(&key_sad.lock);
        SAHLIST_WRITER_INSERT_HEAD(newsah);
        mutex_exit(&key_sad.lock);

        return newsah;
}

static bool
key_sah_has_sav(struct secashead *sah)
{
        u_int state;

        KASSERT(mutex_owned(&key_sad.lock));

        SASTATE_ANY_FOREACH(state) {
                if (!SAVLIST_WRITER_EMPTY(sah, state))
                        return true;
        }

        return false;
}

static void
key_unlink_sah(struct secashead *sah)
{

        KASSERT(!cpu_softintr_p());
        KASSERT(mutex_owned(&key_sad.lock));
        KASSERTMSG(sah->state == SADB_SASTATE_DEAD, "sah->state=%u", sah->state);

        /* Remove from the sah list */
        SAHLIST_WRITER_REMOVE(sah);

        KDASSERT(mutex_ownable(softnet_lock));
        key_sad_pserialize_perform();

        localcount_drain(&sah->localcount, &key_sad.cv_lc, &key_sad.lock);
}

static void
key_destroy_sah(struct secashead *sah)
{

        rtcache_free(&sah->sa_route);

        SAHLIST_ENTRY_DESTROY(sah);
        localcount_fini(&sah->localcount);

        if (sah->idents != NULL)
                kmem_free(sah->idents, sah->idents_len);
        if (sah->identd != NULL)
                kmem_free(sah->identd, sah->identd_len);

        kmem_free(sah, sizeof(*sah));
}

/*
 * allocating a new SA with LARVAL state.
 * key_api_add() and key_api_getspi() call,
 * and copy the values of mhp into new buffer.
 * When SAD message type is GETSPI:
 *        to set sequence number from acq_seq++,
 *        to set zero to SPI.
 *        not to call key_setsaval().
 * OUT:        NULL        : fail
 *        others        : pointer to new secasvar.
 *
 * does not modify mbuf.  does not free mbuf on error.
 */
static struct secasvar *
key_newsav(struct mbuf *m, const struct sadb_msghdr *mhp,
    int *errp, int proto, const char* where, int tag)
{
        struct secasvar *newsav;
        const struct sadb_sa *xsa;

        KASSERT(!cpu_softintr_p());
        KASSERT(m != NULL);
        KASSERT(mhp != NULL);
        KASSERT(mhp->msg != NULL);

        newsav = kmem_zalloc(sizeof(struct secasvar), KM_SLEEP);

        switch (mhp->msg->sadb_msg_type) {
        case SADB_GETSPI:
                newsav->spi = 0;

#ifdef IPSEC_DOSEQCHECK
                /* sync sequence number */
                if (mhp->msg->sadb_msg_seq == 0)
                        newsav->seq =
                            (acq_seq = (acq_seq == ~0 ? 1 : ++acq_seq));
                else
#endif
                        newsav->seq = mhp->msg->sadb_msg_seq;
                break;

        case SADB_ADD:
                /* sanity check */
                if (mhp->ext[SADB_EXT_SA] == NULL) {
                        IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                        *errp = EINVAL;
                        goto error;
                }
                xsa = mhp->ext[SADB_EXT_SA];
                newsav->spi = xsa->sadb_sa_spi;
                newsav->seq = mhp->msg->sadb_msg_seq;
                break;
        default:
                *errp = EINVAL;
                goto error;
        }

        /* copy sav values */
        if (mhp->msg->sadb_msg_type != SADB_GETSPI) {
                *errp = key_setsaval(newsav, m, mhp);
                if (*errp)
                        goto error;
        } else {
                /* We don't allow lft_c to be NULL */
                newsav->lft_c = kmem_zalloc(sizeof(struct sadb_lifetime),
                    KM_SLEEP);
                newsav->lft_c_counters_percpu =
                    percpu_alloc(sizeof(lifetime_counters_t));
        }

        /* reset created */
        newsav->created = time_uptime;
        newsav->pid = mhp->msg->sadb_msg_pid;

        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
            "DP from %s:%u return SA:%p spi=%#x proto=%d\n",
            where, tag, newsav, ntohl(newsav->spi), proto);
        return newsav;

error:
        KASSERT(*errp != 0);
        kmem_free(newsav, sizeof(*newsav));
        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
            "DP from %s:%u return SA:NULL\n", where, tag);
        return NULL;
}


static void
key_clear_xform(struct secasvar *sav)
{

        /*
         * Cleanup xform state.  Note that zeroize'ing causes the
         * keys to be cleared; otherwise we must do it ourself.
         */
        if (sav->tdb_xform != NULL) {
                sav->tdb_xform->xf_zeroize(sav);
                sav->tdb_xform = NULL;
        } else {
                if (sav->key_auth != NULL)
                        explicit_memset(_KEYBUF(sav->key_auth), 0,
                            _KEYLEN(sav->key_auth));
                if (sav->key_enc != NULL)
                        explicit_memset(_KEYBUF(sav->key_enc), 0,
                            _KEYLEN(sav->key_enc));
        }
}

/*
 * free() SA variable entry.
 */
static void
key_delsav(struct secasvar *sav)
{

        key_clear_xform(sav);
        key_freesaval(sav);
        kmem_free(sav, sizeof(*sav));
}

/*
 * Must be called in a pserialize read section. A held sah
 * must be released by key_sah_unref after use.
 */
static void
key_sah_ref(struct secashead *sah)
{

        localcount_acquire(&sah->localcount);
}

/*
 * Must be called without holding key_sad.lock because the lock
 * would be held in localcount_release.
 */
static void
key_sah_unref(struct secashead *sah)
{

        KDASSERT(mutex_ownable(&key_sad.lock));

        localcount_release(&sah->localcount, &key_sad.cv_lc, &key_sad.lock);
}

/*
 * Search SAD and return sah. Must be called in a pserialize
 * read section.
 * OUT:
 *        NULL        : not found
 *        others        : found, pointer to a SA.
 */
static struct secashead *
key_getsah(const struct secasindex *saidx, int flag)
{
        struct secashead *sah;

        SAHLIST_READER_FOREACH_SAIDX(sah, saidx) {
                if (sah->state == SADB_SASTATE_DEAD)
                        continue;
                if (key_saidx_match(&sah->saidx, saidx, flag))
                        return sah;
        }

        return NULL;
}

/*
 * Search SAD and return sah. If sah is returned, the caller must call
 * key_sah_unref to releaset a reference.
 * OUT:
 *        NULL        : not found
 *        others        : found, pointer to a SA.
 */
static struct secashead *
key_getsah_ref(const struct secasindex *saidx, int flag)
{
        struct secashead *sah;
        int s;

        s = pserialize_read_enter();
        sah = key_getsah(saidx, flag);
        if (sah != NULL)
                key_sah_ref(sah);
        pserialize_read_exit(s);

        return sah;
}

/*
 * check not to be duplicated SPI.
 * NOTE: this function is too slow due to searching all SAD.
 * OUT:
 *        NULL        : not found
 *        others        : found, pointer to a SA.
 */
static bool
key_checkspidup(const struct secasindex *saidx, u_int32_t spi)
{
        struct secashead *sah;
        struct secasvar *sav;

        /* check address family */
        if (saidx->src.sa.sa_family != saidx->dst.sa.sa_family) {
                IPSECLOG(LOG_DEBUG,
                    "address family mismatched src %u, dst %u.\n",
                    saidx->src.sa.sa_family, saidx->dst.sa.sa_family);
                return false;
        }

        /* check all SAD */
        /* key_ismyaddr may sleep, so use mutex, not pserialize, here. */
        mutex_enter(&key_sad.lock);
        SAHLIST_WRITER_FOREACH(sah) {
                if (!key_ismyaddr((struct sockaddr *)&sah->saidx.dst))
                        continue;
                sav = key_getsavbyspi(sah, spi);
                if (sav != NULL) {
                        KEY_SA_UNREF(&sav);
                        mutex_exit(&key_sad.lock);
                        return true;
                }
        }
        mutex_exit(&key_sad.lock);

        return false;
}

/*
 * search SAD litmited alive SA, protocol, SPI.
 * OUT:
 *        NULL        : not found
 *        others        : found, pointer to a SA.
 */
static struct secasvar *
key_getsavbyspi(struct secashead *sah, u_int32_t spi)
{
        struct secasvar *sav = NULL;
        u_int state;
        int s;

        /* search all status */
        s = pserialize_read_enter();
        SASTATE_ALIVE_FOREACH(state) {
                SAVLIST_READER_FOREACH(sav, sah, state) {
                        /* sanity check */
                        if (sav->state != state) {
                                IPSECLOG(LOG_DEBUG,
                                    "invalid sav->state (queue: %d SA: %d)\n",
                                    state, sav->state);
                                continue;
                        }

                        if (sav->spi == spi) {
                                KEY_SA_REF(sav);
                                goto out;
                        }
                }
        }
out:
        pserialize_read_exit(s);

        return sav;
}

/*
 * Search SAD litmited alive SA by an SPI and remove it from a list.
 * OUT:
 *        NULL        : not found
 *        others        : found, pointer to a SA.
 */
static struct secasvar *
key_lookup_and_remove_sav(struct secashead *sah, u_int32_t spi,
    const struct secasvar *hint)
{
        struct secasvar *sav = NULL;
        u_int state;

        /* search all status */
        mutex_enter(&key_sad.lock);
        SASTATE_ALIVE_FOREACH(state) {
                SAVLIST_WRITER_FOREACH(sav, sah, state) {
                        KASSERT(sav->state == state);

                        if (sav->spi == spi) {
                                if (hint != NULL && hint != sav)
                                        continue;
                                sav->state = SADB_SASTATE_DEAD;
                                SAVLIST_WRITER_REMOVE(sav);
                                SAVLUT_WRITER_REMOVE(sav);
                                goto out;
                        }
                }
        }
out:
        mutex_exit(&key_sad.lock);

        return sav;
}

/*
 * Free allocated data to member variables of sav:
 * sav->replay, sav->key_* and sav->lft_*.
 */
static void
key_freesaval(struct secasvar *sav)
{

        KASSERTMSG(key_sa_refcnt(sav) == 0, "key_sa_refcnt(sav)=%u",
            key_sa_refcnt(sav));

        if (sav->replay != NULL)
                kmem_intr_free(sav->replay, sav->replay_len);
        if (sav->key_auth != NULL)
                kmem_intr_free(sav->key_auth, sav->key_auth_len);
        if (sav->key_enc != NULL)
                kmem_intr_free(sav->key_enc, sav->key_enc_len);
        if (sav->lft_c_counters_percpu != NULL) {
                percpu_free(sav->lft_c_counters_percpu,
                    sizeof(lifetime_counters_t));
        }
        if (sav->lft_c != NULL)
                kmem_intr_free(sav->lft_c, sizeof(*(sav->lft_c)));
        if (sav->lft_h != NULL)
                kmem_intr_free(sav->lft_h, sizeof(*(sav->lft_h)));
        if (sav->lft_s != NULL)
                kmem_intr_free(sav->lft_s, sizeof(*(sav->lft_s)));
}

/*
 * copy SA values from PF_KEY message except *SPI, SEQ, PID, STATE and TYPE*.
 * You must update these if need.
 * OUT:        0:        success.
 *        !0:        failure.
 *
 * does not modify mbuf.  does not free mbuf on error.
 */
static int
key_setsaval(struct secasvar *sav, struct mbuf *m,
             const struct sadb_msghdr *mhp)
{
        int error = 0;

        KASSERT(!cpu_softintr_p());
        KASSERT(m != NULL);
        KASSERT(mhp != NULL);
        KASSERT(mhp->msg != NULL);

        /* We shouldn't initialize sav variables while someone uses it. */
        KASSERTMSG(key_sa_refcnt(sav) == 0, "key_sa_refcnt(sav)=%u",
            key_sa_refcnt(sav));

        /* SA */
        if (mhp->ext[SADB_EXT_SA] != NULL) {
                const struct sadb_sa *sa0;

                sa0 = mhp->ext[SADB_EXT_SA];
                if (mhp->extlen[SADB_EXT_SA] < sizeof(*sa0)) {
                        error = EINVAL;
                        goto fail;
                }

                sav->alg_auth = sa0->sadb_sa_auth;
                sav->alg_enc = sa0->sadb_sa_encrypt;
                sav->flags = sa0->sadb_sa_flags;

                /* replay window */
                if ((sa0->sadb_sa_flags & SADB_X_EXT_OLD) == 0) {
                        size_t len = sizeof(struct secreplay) +
                            sa0->sadb_sa_replay;
                        sav->replay = kmem_zalloc(len, KM_SLEEP);
                        sav->replay_len = len;
                        if (sa0->sadb_sa_replay != 0)
                                sav->replay->bitmap = (char*)(sav->replay+1);
                        sav->replay->wsize = sa0->sadb_sa_replay;
                }
        }

        /* Authentication keys */
        if (mhp->ext[SADB_EXT_KEY_AUTH] != NULL) {
                const struct sadb_key *key0;
                int len;

                key0 = mhp->ext[SADB_EXT_KEY_AUTH];
                len = mhp->extlen[SADB_EXT_KEY_AUTH];

                error = 0;
                if (len < sizeof(*key0)) {
                        error = EINVAL;
                        goto fail;
                }
                switch (mhp->msg->sadb_msg_satype) {
                case SADB_SATYPE_AH:
                case SADB_SATYPE_ESP:
                case SADB_X_SATYPE_TCPSIGNATURE:
                        if (len == PFKEY_ALIGN8(sizeof(struct sadb_key)) &&
                            sav->alg_auth != SADB_X_AALG_NULL)
                                error = EINVAL;
                        break;
                case SADB_X_SATYPE_IPCOMP:
                default:
                        error = EINVAL;
                        break;
                }
                if (error) {
                        IPSECLOG(LOG_DEBUG, "invalid key_auth values.\n");
                        goto fail;
                }

                sav->key_auth = key_newbuf(key0, len);
                sav->key_auth_len = len;
        }

        /* Encryption key */
        if (mhp->ext[SADB_EXT_KEY_ENCRYPT] != NULL) {
                const struct sadb_key *key0;
                int len;

                key0 = mhp->ext[SADB_EXT_KEY_ENCRYPT];
                len = mhp->extlen[SADB_EXT_KEY_ENCRYPT];

                error = 0;
                if (len < sizeof(*key0)) {
                        error = EINVAL;
                        goto fail;
                }
                switch (mhp->msg->sadb_msg_satype) {
                case SADB_SATYPE_ESP:
                        if (len == PFKEY_ALIGN8(sizeof(struct sadb_key)) &&
                            sav->alg_enc != SADB_EALG_NULL) {
                                error = EINVAL;
                                break;
                        }
                        sav->key_enc = key_newbuf(key0, len);
                        sav->key_enc_len = len;
                        break;
                case SADB_X_SATYPE_IPCOMP:
                        if (len != PFKEY_ALIGN8(sizeof(struct sadb_key)))
                                error = EINVAL;
                        sav->key_enc = NULL;        /*just in case*/
                        break;
                case SADB_SATYPE_AH:
                case SADB_X_SATYPE_TCPSIGNATURE:
                default:
                        error = EINVAL;
                        break;
                }
                if (error) {
                        IPSECLOG(LOG_DEBUG, "invalid key_enc value.\n");
                        goto fail;
                }
        }

        /* set iv */
        sav->ivlen = 0;

        switch (mhp->msg->sadb_msg_satype) {
        case SADB_SATYPE_AH:
                error = xform_init(sav, XF_AH);
                break;
        case SADB_SATYPE_ESP:
                error = xform_init(sav, XF_ESP);
                break;
        case SADB_X_SATYPE_IPCOMP:
                error = xform_init(sav, XF_IPCOMP);
                break;
        case SADB_X_SATYPE_TCPSIGNATURE:
                error = xform_init(sav, XF_TCPSIGNATURE);
                break;
        default:
                error = EOPNOTSUPP;
                break;
        }
        if (error) {
                IPSECLOG(LOG_DEBUG, "unable to initialize SA type %u (%d)\n",
                    mhp->msg->sadb_msg_satype, error);
                goto fail;
        }

        /* reset created */
        sav->created = time_uptime;

        /* make lifetime for CURRENT */
        sav->lft_c = kmem_alloc(sizeof(struct sadb_lifetime), KM_SLEEP);

        sav->lft_c->sadb_lifetime_len =
            PFKEY_UNIT64(sizeof(struct sadb_lifetime));
        sav->lft_c->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
        sav->lft_c->sadb_lifetime_allocations = 0;
        sav->lft_c->sadb_lifetime_bytes = 0;
        sav->lft_c->sadb_lifetime_addtime = time_uptime;
        sav->lft_c->sadb_lifetime_usetime = 0;

        sav->lft_c_counters_percpu = percpu_alloc(sizeof(lifetime_counters_t));

        /* lifetimes for HARD and SOFT */
    {
        const struct sadb_lifetime *lft0;

        lft0 = mhp->ext[SADB_EXT_LIFETIME_HARD];
        if (lft0 != NULL) {
                if (mhp->extlen[SADB_EXT_LIFETIME_HARD] < sizeof(*lft0)) {
                        error = EINVAL;
                        goto fail;
                }
                sav->lft_h = key_newbuf(lft0, sizeof(*lft0));
        }

        lft0 = mhp->ext[SADB_EXT_LIFETIME_SOFT];
        if (lft0 != NULL) {
                if (mhp->extlen[SADB_EXT_LIFETIME_SOFT] < sizeof(*lft0)) {
                        error = EINVAL;
                        goto fail;
                }
                sav->lft_s = key_newbuf(lft0, sizeof(*lft0));
                /* to be initialize ? */
        }
    }

        return 0;

 fail:
        key_clear_xform(sav);
        key_freesaval(sav);

        return error;
}

/*
 * validation with a secasvar entry, and set SADB_SATYPE_MATURE.
 * OUT:        0:        valid
 *        other:        errno
 */
static int
key_init_xform(struct secasvar *sav)
{
        int error;

        /* We shouldn't initialize sav variables while someone uses it. */
        KASSERTMSG(key_sa_refcnt(sav) == 0, "key_sa_refcnt(sav)=%u",
            key_sa_refcnt(sav));

        /* check SPI value */
        switch (sav->sah->saidx.proto) {
        case IPPROTO_ESP:
        case IPPROTO_AH:
                if (ntohl(sav->spi) <= 255) {
                        IPSECLOG(LOG_DEBUG, "illegal range of SPI %u.\n",
                            (u_int32_t)ntohl(sav->spi));
                        return EINVAL;
                }
                break;
        }

        /* check algo */
        switch (sav->sah->saidx.proto) {
        case IPPROTO_AH:
        case IPPROTO_TCP:
                if (sav->alg_enc != SADB_EALG_NONE) {
                        IPSECLOG(LOG_DEBUG,
                            "protocol %u and algorithm mismatched %u != %u.\n",
                            sav->sah->saidx.proto,
                            sav->alg_enc, SADB_EALG_NONE);
                        return EINVAL;
                }
                break;
        case IPPROTO_IPCOMP:
                if (sav->alg_auth != SADB_AALG_NONE) {
                        IPSECLOG(LOG_DEBUG,
                            "protocol %u and algorithm mismatched %d != %d.\n",
                            sav->sah->saidx.proto,
                            sav->alg_auth, SADB_AALG_NONE);
                        return(EINVAL);
                }
                break;
        default:
                break;
        }

        /* check satype */
        switch (sav->sah->saidx.proto) {
        case IPPROTO_ESP:
                /* check flags */
                if ((sav->flags & (SADB_X_EXT_OLD|SADB_X_EXT_DERIV)) ==
                    (SADB_X_EXT_OLD|SADB_X_EXT_DERIV)) {
                        IPSECLOG(LOG_DEBUG,
                            "invalid flag (derived) given to old-esp.\n");
                        return EINVAL;
                }
                error = xform_init(sav, XF_ESP);
                break;
        case IPPROTO_AH:
                /* check flags */
                if (sav->flags & SADB_X_EXT_DERIV) {
                        IPSECLOG(LOG_DEBUG,
                            "invalid flag (derived) given to AH SA.\n");
                        return EINVAL;
                }
                error = xform_init(sav, XF_AH);
                break;
        case IPPROTO_IPCOMP:
                if ((sav->flags & SADB_X_EXT_RAWCPI) == 0
                    && ntohl(sav->spi) >= 0x10000) {
                        IPSECLOG(LOG_DEBUG, "invalid cpi for IPComp.\n");
                        return(EINVAL);
                }
                error = xform_init(sav, XF_IPCOMP);
                break;
        case IPPROTO_TCP:
                error = xform_init(sav, XF_TCPSIGNATURE);
                break;
        default:
                IPSECLOG(LOG_DEBUG, "Invalid satype.\n");
                error = EPROTONOSUPPORT;
                break;
        }

        return error;
}

/*
 * subroutine for SADB_GET and SADB_DUMP. It never return NULL.
 */
static struct mbuf *
key_setdumpsa(struct secasvar *sav, u_int8_t type, u_int8_t satype,
              u_int32_t seq, u_int32_t pid)
{
        struct mbuf *result = NULL, *tres = NULL, *m;
        int l = 0;
        int i;
        void *p;
        struct sadb_lifetime lt;
        int dumporder[] = {
                SADB_EXT_SA, SADB_X_EXT_SA2,
                SADB_EXT_LIFETIME_HARD, SADB_EXT_LIFETIME_SOFT,
                SADB_EXT_LIFETIME_CURRENT, SADB_EXT_ADDRESS_SRC,
                SADB_EXT_ADDRESS_DST, SADB_EXT_ADDRESS_PROXY, SADB_EXT_KEY_AUTH,
                SADB_EXT_KEY_ENCRYPT, SADB_EXT_IDENTITY_SRC,
                SADB_EXT_IDENTITY_DST, SADB_EXT_SENSITIVITY,
                SADB_X_EXT_NAT_T_TYPE,
                SADB_X_EXT_NAT_T_SPORT, SADB_X_EXT_NAT_T_DPORT,
                SADB_X_EXT_NAT_T_OAI, SADB_X_EXT_NAT_T_OAR,
                SADB_X_EXT_NAT_T_FRAG,

        };

        m = key_setsadbmsg(type, 0, satype, seq, pid, key_sa_refcnt(sav), M_WAITOK);
        result = m;

        for (i = __arraycount(dumporder) - 1; i >= 0; i--) {
                m = NULL;
                p = NULL;
                switch (dumporder[i]) {
                case SADB_EXT_SA:
                        m = key_setsadbsa(sav);
                        break;

                case SADB_X_EXT_SA2:
                        m = key_setsadbxsa2(sav->sah->saidx.mode,
                            sav->replay ? sav->replay->count : 0,
                            sav->sah->saidx.reqid);
                        break;

                case SADB_EXT_ADDRESS_SRC:
                        m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC,
                            &sav->sah->saidx.src.sa,
                            FULLMASK, IPSEC_ULPROTO_ANY, M_WAITOK);
                        break;

                case SADB_EXT_ADDRESS_DST:
                        m = key_setsadbaddr(SADB_EXT_ADDRESS_DST,
                            &sav->sah->saidx.dst.sa,
                            FULLMASK, IPSEC_ULPROTO_ANY, M_WAITOK);
                        break;

                case SADB_EXT_KEY_AUTH:
                        if (!sav->key_auth)
                                continue;
                        l = PFKEY_UNUNIT64(sav->key_auth->sadb_key_len);
                        p = sav->key_auth;
                        break;

                case SADB_EXT_KEY_ENCRYPT:
                        if (!sav->key_enc)
                                continue;
                        l = PFKEY_UNUNIT64(sav->key_enc->sadb_key_len);
                        p = sav->key_enc;
                        break;

                case SADB_EXT_LIFETIME_CURRENT: {
                        lifetime_counters_t sum = {0};

                        KASSERT(sav->lft_c != NULL);
                        l = PFKEY_UNUNIT64(((struct sadb_ext *)sav->lft_c)->sadb_ext_len);
                        memcpy(&lt, sav->lft_c, sizeof(struct sadb_lifetime));
                        lt.sadb_lifetime_addtime =
                            time_mono_to_wall(lt.sadb_lifetime_addtime);
                        lt.sadb_lifetime_usetime =
                            time_mono_to_wall(lt.sadb_lifetime_usetime);
                        percpu_foreach_xcall(sav->lft_c_counters_percpu,
                            XC_HIGHPRI_IPL(IPL_SOFTNET),
                            key_sum_lifetime_counters, sum);
                        lt.sadb_lifetime_allocations =
                            sum[LIFETIME_COUNTER_ALLOCATIONS];
                        lt.sadb_lifetime_bytes =
                            sum[LIFETIME_COUNTER_BYTES];
                        p = &lt;
                        break;
                    }

                case SADB_EXT_LIFETIME_HARD:
                        if (!sav->lft_h)
                                continue;
                        l = PFKEY_UNUNIT64(((struct sadb_ext *)sav->lft_h)->sadb_ext_len);
                        p = sav->lft_h;
                        break;

                case SADB_EXT_LIFETIME_SOFT:
                        if (!sav->lft_s)
                                continue;
                        l = PFKEY_UNUNIT64(((struct sadb_ext *)sav->lft_s)->sadb_ext_len);
                        p = sav->lft_s;
                        break;

                case SADB_X_EXT_NAT_T_TYPE:
                        m = key_setsadbxtype(sav->natt_type);
                        break;

                case SADB_X_EXT_NAT_T_DPORT:
                        if (sav->natt_type == 0)
                                continue;
                        m = key_setsadbxport(
                            key_portfromsaddr(&sav->sah->saidx.dst),
                            SADB_X_EXT_NAT_T_DPORT);
                        break;

                case SADB_X_EXT_NAT_T_SPORT:
                        if (sav->natt_type == 0)
                                continue;
                        m = key_setsadbxport(
                            key_portfromsaddr(&sav->sah->saidx.src),
                            SADB_X_EXT_NAT_T_SPORT);
                        break;

                case SADB_X_EXT_NAT_T_FRAG:
                        /* don't send frag info if not set */
                        if (sav->natt_type == 0 || sav->esp_frag == IP_MAXPACKET)
                                continue;
                        m = key_setsadbxfrag(sav->esp_frag);
                        break;

                case SADB_X_EXT_NAT_T_OAI:
                case SADB_X_EXT_NAT_T_OAR:
                        continue;

                case SADB_EXT_ADDRESS_PROXY:
                case SADB_EXT_IDENTITY_SRC:
                case SADB_EXT_IDENTITY_DST:
                        /* XXX: should we brought from SPD ? */
                case SADB_EXT_SENSITIVITY:
                default:
                        continue;
                }

                KASSERT(!(m && p));
                KASSERT(m != NULL || p != NULL);
                if (p && tres) {
                        M_PREPEND(tres, l, M_WAITOK);
                        memcpy(mtod(tres, void *), p, l);
                        continue;
                }
                if (p) {
                        m = key_alloc_mbuf(l, M_WAITOK);
                        m_copyback(m, 0, l, p);
                }

                if (tres)
                        m_cat(m, tres);
                tres = m;
        }

        m_cat(result, tres);
        tres = NULL; /* avoid free on error below */

        KASSERT(result->m_len >= sizeof(struct sadb_msg));

        result->m_pkthdr.len = 0;
        for (m = result; m; m = m->m_next)
                result->m_pkthdr.len += m->m_len;

        mtod(result, struct sadb_msg *)->sadb_msg_len =
            PFKEY_UNIT64(result->m_pkthdr.len);

        return result;
}


/*
 * set a type in sadb_x_nat_t_type
 */
static struct mbuf *
key_setsadbxtype(u_int16_t type)
{
        struct mbuf *m;
        size_t len;
        struct sadb_x_nat_t_type *p;

        len = PFKEY_ALIGN8(sizeof(struct sadb_x_nat_t_type));

        m = key_alloc_mbuf(len, M_WAITOK);
        KASSERT(m->m_next == NULL);

        p = mtod(m, struct sadb_x_nat_t_type *);

        memset(p, 0, len);
        p->sadb_x_nat_t_type_len = PFKEY_UNIT64(len);
        p->sadb_x_nat_t_type_exttype = SADB_X_EXT_NAT_T_TYPE;
        p->sadb_x_nat_t_type_type = type;

        return m;
}
/*
 * set a port in sadb_x_nat_t_port. port is in network order
 */
static struct mbuf *
key_setsadbxport(u_int16_t port, u_int16_t type)
{
        struct mbuf *m;
        size_t len;
        struct sadb_x_nat_t_port *p;

        len = PFKEY_ALIGN8(sizeof(struct sadb_x_nat_t_port));

        m = key_alloc_mbuf(len, M_WAITOK);
        KASSERT(m->m_next == NULL);

        p = mtod(m, struct sadb_x_nat_t_port *);

        memset(p, 0, len);
        p->sadb_x_nat_t_port_len = PFKEY_UNIT64(len);
        p->sadb_x_nat_t_port_exttype = type;
        p->sadb_x_nat_t_port_port = port;

        return m;
}

/*
 * set fragmentation info in sadb_x_nat_t_frag
 */
static struct mbuf *
key_setsadbxfrag(u_int16_t flen)
{
        struct mbuf *m;
        size_t len;
        struct sadb_x_nat_t_frag *p;

        len = PFKEY_ALIGN8(sizeof(struct sadb_x_nat_t_frag));

        m = key_alloc_mbuf(len, M_WAITOK);
        KASSERT(m->m_next == NULL);

        p = mtod(m, struct sadb_x_nat_t_frag *);

        memset(p, 0, len);
        p->sadb_x_nat_t_frag_len = PFKEY_UNIT64(len);
        p->sadb_x_nat_t_frag_exttype = SADB_X_EXT_NAT_T_FRAG;
        p->sadb_x_nat_t_frag_fraglen = flen;

        return m;
}

/*
 * Get port from sockaddr, port is in network order
 */
u_int16_t
key_portfromsaddr(const union sockaddr_union *saddr)
{
        u_int16_t port;

        switch (saddr->sa.sa_family) {
        case AF_INET: {
                port = saddr->sin.sin_port;
                break;
        }
#ifdef INET6
        case AF_INET6: {
                port = saddr->sin6.sin6_port;
                break;
        }
#endif
        default:
                printf("%s: unexpected address family\n", __func__);
                port = 0;
                break;
        }

        return port;
}


/*
 * Set port is struct sockaddr. port is in network order
 */
static void
key_porttosaddr(union sockaddr_union *saddr, u_int16_t port)
{
        switch (saddr->sa.sa_family) {
        case AF_INET: {
                saddr->sin.sin_port = port;
                break;
        }
#ifdef INET6
        case AF_INET6: {
                saddr->sin6.sin6_port = port;
                break;
        }
#endif
        default:
                printf("%s: unexpected address family %d\n", __func__,
                    saddr->sa.sa_family);
                break;
        }

        return;
}

/*
 * Safety check sa_len
 */
static int
key_checksalen(const union sockaddr_union *saddr)
{
        switch (saddr->sa.sa_family) {
        case AF_INET:
                if (saddr->sa.sa_len != sizeof(struct sockaddr_in))
                        return -1;
                break;
#ifdef INET6
        case AF_INET6:
                if (saddr->sa.sa_len != sizeof(struct sockaddr_in6))
                        return -1;
                break;
#endif
        default:
                printf("%s: unexpected sa_family %d\n", __func__,
                    saddr->sa.sa_family);
                        return -1;
                break;
        }
        return 0;
}


/*
 * set data into sadb_msg.
 */
static struct mbuf *
key_setsadbmsg(u_int8_t type,  u_int16_t tlen, u_int8_t satype,
               u_int32_t seq, pid_t pid, u_int16_t reserved, int mflag)
{
        struct mbuf *m;
        struct sadb_msg *p;
        int len;

        CTASSERT(PFKEY_ALIGN8(sizeof(struct sadb_msg)) <= MCLBYTES);

        len = PFKEY_ALIGN8(sizeof(struct sadb_msg));

        m = key_alloc_mbuf_simple(len, mflag);
        if (!m)
                return NULL;
        m->m_pkthdr.len = m->m_len = len;
        m->m_next = NULL;

        p = mtod(m, struct sadb_msg *);

        memset(p, 0, len);
        p->sadb_msg_version = PF_KEY_V2;
        p->sadb_msg_type = type;
        p->sadb_msg_errno = 0;
        p->sadb_msg_satype = satype;
        p->sadb_msg_len = PFKEY_UNIT64(tlen);
        p->sadb_msg_reserved = reserved;
        p->sadb_msg_seq = seq;
        p->sadb_msg_pid = (u_int32_t)pid;

        return m;
}

/*
 * copy secasvar data into sadb_address.
 */
static struct mbuf *
key_setsadbsa(struct secasvar *sav)
{
        struct mbuf *m;
        struct sadb_sa *p;
        int len;

        len = PFKEY_ALIGN8(sizeof(struct sadb_sa));
        m = key_alloc_mbuf(len, M_WAITOK);
        KASSERT(m->m_next == NULL);

        p = mtod(m, struct sadb_sa *);

        memset(p, 0, len);
        p->sadb_sa_len = PFKEY_UNIT64(len);
        p->sadb_sa_exttype = SADB_EXT_SA;
        p->sadb_sa_spi = sav->spi;
        p->sadb_sa_replay = (sav->replay != NULL ? sav->replay->wsize : 0);
        p->sadb_sa_state = sav->state;
        p->sadb_sa_auth = sav->alg_auth;
        p->sadb_sa_encrypt = sav->alg_enc;
        p->sadb_sa_flags = sav->flags;

        return m;
}

static uint8_t
key_sabits(const struct sockaddr *saddr)
{
        switch (saddr->sa_family) {
        case AF_INET:
                return _BITS(sizeof(struct in_addr));
        case AF_INET6:
                return _BITS(sizeof(struct in6_addr));
        default:
                return FULLMASK;
        }
}

/*
 * set data into sadb_address.
 */
static struct mbuf *
key_setsadbaddr(u_int16_t exttype, const struct sockaddr *saddr,
                u_int8_t prefixlen, u_int16_t ul_proto, int mflag)
{
        struct mbuf *m;
        struct sadb_address *p;
        size_t len;

        len = PFKEY_ALIGN8(sizeof(struct sadb_address)) +
            PFKEY_ALIGN8(saddr->sa_len);
        m = key_alloc_mbuf(len, mflag);
        if (!m || m->m_next) {        /*XXX*/
                if (m)
                        m_freem(m);
                return NULL;
        }

        p = mtod(m, struct sadb_address *);

        memset(p, 0, len);
        p->sadb_address_len = PFKEY_UNIT64(len);
        p->sadb_address_exttype = exttype;
        p->sadb_address_proto = ul_proto;
        if (prefixlen == FULLMASK) {
                prefixlen = key_sabits(saddr);
        }
        p->sadb_address_prefixlen = prefixlen;
        p->sadb_address_reserved = 0;

        memcpy(mtod(m, char *) + PFKEY_ALIGN8(sizeof(struct sadb_address)),
            saddr, saddr->sa_len);

        return m;
}

#if 0
/*
 * set data into sadb_ident.
 */
static struct mbuf *
key_setsadbident(u_int16_t exttype, u_int16_t idtype,
                 void *string, int stringlen, u_int64_t id)
{
        struct mbuf *m;
        struct sadb_ident *p;
        size_t len;

        len = PFKEY_ALIGN8(sizeof(struct sadb_ident)) + PFKEY_ALIGN8(stringlen);
        m = key_alloc_mbuf(len);
        if (!m || m->m_next) {        /*XXX*/
                if (m)
                        m_freem(m);
                return NULL;
        }

        p = mtod(m, struct sadb_ident *);

        memset(p, 0, len);
        p->sadb_ident_len = PFKEY_UNIT64(len);
        p->sadb_ident_exttype = exttype;
        p->sadb_ident_type = idtype;
        p->sadb_ident_reserved = 0;
        p->sadb_ident_id = id;

        memcpy(mtod(m, void *) + PFKEY_ALIGN8(sizeof(struct sadb_ident)),
                      string, stringlen);

        return m;
}
#endif

/*
 * set data into sadb_x_sa2.
 */
static struct mbuf *
key_setsadbxsa2(u_int8_t mode, u_int32_t seq, u_int16_t reqid)
{
        struct mbuf *m;
        struct sadb_x_sa2 *p;
        size_t len;

        len = PFKEY_ALIGN8(sizeof(struct sadb_x_sa2));
        m = key_alloc_mbuf(len, M_WAITOK);
        KASSERT(m->m_next == NULL);

        p = mtod(m, struct sadb_x_sa2 *);

        memset(p, 0, len);
        p->sadb_x_sa2_len = PFKEY_UNIT64(len);
        p->sadb_x_sa2_exttype = SADB_X_EXT_SA2;
        p->sadb_x_sa2_mode = mode;
        p->sadb_x_sa2_reserved1 = 0;
        p->sadb_x_sa2_reserved2 = 0;
        p->sadb_x_sa2_sequence = seq;
        p->sadb_x_sa2_reqid = reqid;

        return m;
}

/*
 * set data into sadb_x_policy
 */
static struct mbuf *
key_setsadbxpolicy(const u_int16_t type, const u_int8_t dir, const u_int32_t id,
    int mflag)
{
        struct mbuf *m;
        struct sadb_x_policy *p;
        size_t len;

        len = PFKEY_ALIGN8(sizeof(struct sadb_x_policy));
        m = key_alloc_mbuf(len, mflag);
        if (!m || m->m_next) {        /*XXX*/
                if (m)
                        m_freem(m);
                return NULL;
        }

        p = mtod(m, struct sadb_x_policy *);

        memset(p, 0, len);
        p->sadb_x_policy_len = PFKEY_UNIT64(len);
        p->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
        p->sadb_x_policy_type = type;
        p->sadb_x_policy_dir = dir;
        p->sadb_x_policy_id = id;

        return m;
}

/* %%% utilities */
/*
 * copy a buffer into the new buffer allocated.
 */
static void *
key_newbuf(const void *src, u_int len)
{
        void *new;

        new = kmem_alloc(len, KM_SLEEP);
        memcpy(new, src, len);

        return new;
}

/* compare my own address
 * OUT:        1: true, i.e. my address.
 *        0: false
 */
int
key_ismyaddr(const struct sockaddr *sa)
{
#ifdef INET
        const struct sockaddr_in *sin;
        const struct in_ifaddr *ia;
        int s;
#endif

        KASSERT(sa != NULL);

        switch (sa->sa_family) {
#ifdef INET
        case AF_INET:
                sin = (const struct sockaddr_in *)sa;
                s = pserialize_read_enter();
                IN_ADDRLIST_READER_FOREACH(ia) {
                        if (sin->sin_family == ia->ia_addr.sin_family &&
                            sin->sin_len == ia->ia_addr.sin_len &&
                            sin->sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
                        {
                                pserialize_read_exit(s);
                                return 1;
                        }
                }
                pserialize_read_exit(s);
                break;
#endif
#ifdef INET6
        case AF_INET6:
                return key_ismyaddr6((const struct sockaddr_in6 *)sa);
#endif
        }

        return 0;
}

#ifdef INET6
/*
 * compare my own address for IPv6.
 * 1: ours
 * 0: other
 * NOTE: derived ip6_input() in KAME. This is necessary to modify more.
 */
#include <netinet6/in6_var.h>

static int
key_ismyaddr6(const struct sockaddr_in6 *sin6)
{
        struct in6_ifaddr *ia;
        int s;
        struct psref psref;
        int bound;
        int ours = 1;

        bound = curlwp_bind();
        s = pserialize_read_enter();
        IN6_ADDRLIST_READER_FOREACH(ia) {
                if (key_sockaddr_match((const struct sockaddr *)&sin6,
                    (const struct sockaddr *)&ia->ia_addr, 0)) {
                        pserialize_read_exit(s);
                        goto ours;
                }

                if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
                        bool ingroup;

                        ia6_acquire(ia, &psref);
                        pserialize_read_exit(s);

                        /*
                         * XXX Multicast
                         * XXX why do we care about multlicast here while we don't care
                         * about IPv4 multicast??
                         * XXX scope
                         */
                        ingroup = in6_multi_group(&sin6->sin6_addr, ia->ia_ifp);
                        if (ingroup) {
                                ia6_release(ia, &psref);
                                goto ours;
                        }

                        s = pserialize_read_enter();
                        ia6_release(ia, &psref);
                }

        }
        pserialize_read_exit(s);

        /* loopback, just for safety */
        if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))
                goto ours;

        ours = 0;
ours:
        curlwp_bindx(bound);

        return ours;
}
#endif /*INET6*/

/*
 * compare two secasindex structure.
 * flag can specify to compare 2 saidxes.
 * compare two secasindex structure without both mode and reqid.
 * don't compare port.
 * IN:
 *      saidx0: source, it can be in SAD.
 *      saidx1: object.
 * OUT:
 *      1 : equal
 *      0 : not equal
 */
static int
key_saidx_match(
        const struct secasindex *saidx0,
        const struct secasindex *saidx1,
        int flag)
{
        int chkport;
        const struct sockaddr *sa0src, *sa0dst, *sa1src, *sa1dst;

        KASSERT(saidx0 != NULL);
        KASSERT(saidx1 != NULL);

        /* sanity */
        if (saidx0->proto != saidx1->proto)
                return 0;

        if (flag == CMP_EXACTLY) {
                if (saidx0->mode != saidx1->mode)
                        return 0;
                if (saidx0->reqid != saidx1->reqid)
                        return 0;
                if (memcmp(&saidx0->src, &saidx1->src, saidx0->src.sa.sa_len) != 0 ||
                    memcmp(&saidx0->dst, &saidx1->dst, saidx0->dst.sa.sa_len) != 0)
                        return 0;
        } else {

                /* CMP_MODE_REQID, CMP_REQID, CMP_HEAD */
                if (flag == CMP_MODE_REQID ||flag == CMP_REQID) {
                        /*
                         * If reqid of SPD is non-zero, unique SA is required.
                         * The result must be of same reqid in this case.
                         */
                        if (saidx1->reqid != 0 && saidx0->reqid != saidx1->reqid)
                                return 0;
                }

                if (flag == CMP_MODE_REQID) {
                        if (saidx0->mode != IPSEC_MODE_ANY &&
                            saidx0->mode != saidx1->mode)
                                return 0;
                }


                sa0src = &saidx0->src.sa;
                sa0dst = &saidx0->dst.sa;
                sa1src = &saidx1->src.sa;
                sa1dst = &saidx1->dst.sa;
                /*
                 * If NAT-T is enabled, check ports for tunnel mode.
                 * For ipsecif(4), check ports for transport mode, too.
                 * Don't check ports if they are set to zero
                 * in the SPD: This means we have a non-generated
                 * SPD which can't know UDP ports.
                 */
                if (saidx1->mode == IPSEC_MODE_TUNNEL ||
                    saidx1->mode == IPSEC_MODE_TRANSPORT)
                        chkport = PORT_LOOSE;
                else
                        chkport = PORT_NONE;

                if (!key_sockaddr_match(sa0src, sa1src, chkport)) {
                        return 0;
                }
                if (!key_sockaddr_match(sa0dst, sa1dst, chkport)) {
                        return 0;
                }
        }

        return 1;
}

/*
 * compare two secindex structure exactly.
 * IN:
 *        spidx0: source, it is often in SPD.
 *        spidx1: object, it is often from PFKEY message.
 * OUT:
 *        1 : equal
 *        0 : not equal
 */
static int
key_spidx_match_exactly(
        const struct secpolicyindex *spidx0,
        const struct secpolicyindex *spidx1)
{

        KASSERT(spidx0 != NULL);
        KASSERT(spidx1 != NULL);

        /* sanity */
        if (spidx0->prefs != spidx1->prefs ||
            spidx0->prefd != spidx1->prefd ||
            spidx0->ul_proto != spidx1->ul_proto)
                return 0;

        return key_sockaddr_match(&spidx0->src.sa, &spidx1->src.sa, PORT_STRICT) &&
               key_sockaddr_match(&spidx0->dst.sa, &spidx1->dst.sa, PORT_STRICT);
}

/*
 * compare two secindex structure with mask.
 * IN:
 *        spidx0: source, it is often in SPD.
 *        spidx1: object, it is often from IP header.
 * OUT:
 *        1 : equal
 *        0 : not equal
 */
static int
key_spidx_match_withmask(
        const struct secpolicyindex *spidx0,
        const struct secpolicyindex *spidx1)
{

        KASSERT(spidx0 != NULL);
        KASSERT(spidx1 != NULL);

        if (spidx0->src.sa.sa_family != spidx1->src.sa.sa_family ||
            spidx0->dst.sa.sa_family != spidx1->dst.sa.sa_family ||
            spidx0->src.sa.sa_len != spidx1->src.sa.sa_len ||
            spidx0->dst.sa.sa_len != spidx1->dst.sa.sa_len) {
                KEYDEBUG_PRINTF(KEYDEBUG_MATCH, ".sa wrong\n");
                return 0;
        }

        /* if spidx.ul_proto == IPSEC_ULPROTO_ANY, ignore. */
        if (spidx0->ul_proto != (u_int16_t)IPSEC_ULPROTO_ANY &&
            spidx0->ul_proto != spidx1->ul_proto) {
                KEYDEBUG_PRINTF(KEYDEBUG_MATCH, "proto wrong\n");
                return 0;
        }

        switch (spidx0->src.sa.sa_family) {
        case AF_INET:
                if (spidx0->src.sin.sin_port != IPSEC_PORT_ANY &&
                    spidx0->src.sin.sin_port != spidx1->src.sin.sin_port) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH, "v4 src port wrong\n");
                        return 0;
                }
                if (!key_bb_match_withmask(&spidx0->src.sin.sin_addr,
                                           &spidx1->src.sin.sin_addr, spidx0->prefs)) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH, "v4 src addr wrong\n");
                        return 0;
                }
                break;
        case AF_INET6:
                if (spidx0->src.sin6.sin6_port != IPSEC_PORT_ANY &&
                    spidx0->src.sin6.sin6_port != spidx1->src.sin6.sin6_port) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH, "v6 src port wrong\n");
                        return 0;
                }
                /*
                 * scope_id check. if sin6_scope_id is 0, we regard it
                 * as a wildcard scope, which matches any scope zone ID.
                 */
                if (spidx0->src.sin6.sin6_scope_id &&
                    spidx1->src.sin6.sin6_scope_id &&
                    spidx0->src.sin6.sin6_scope_id != spidx1->src.sin6.sin6_scope_id) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH, "v6 src scope wrong\n");
                        return 0;
                }
                if (!key_bb_match_withmask(&spidx0->src.sin6.sin6_addr,
                    &spidx1->src.sin6.sin6_addr, spidx0->prefs)) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH, "v6 src addr wrong\n");
                        return 0;
                }
                break;
        default:
                /* XXX */
                if (memcmp(&spidx0->src, &spidx1->src, spidx0->src.sa.sa_len) != 0) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH, "src memcmp wrong\n");
                        return 0;
                }
                break;
        }

        switch (spidx0->dst.sa.sa_family) {
        case AF_INET:
                if (spidx0->dst.sin.sin_port != IPSEC_PORT_ANY &&
                    spidx0->dst.sin.sin_port != spidx1->dst.sin.sin_port) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH, "v4 dst port wrong\n");
                        return 0;
                }
                if (!key_bb_match_withmask(&spidx0->dst.sin.sin_addr,
                    &spidx1->dst.sin.sin_addr, spidx0->prefd)) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH, "v4 dst addr wrong\n");
                        return 0;
                }
                break;
        case AF_INET6:
                if (spidx0->dst.sin6.sin6_port != IPSEC_PORT_ANY &&
                    spidx0->dst.sin6.sin6_port != spidx1->dst.sin6.sin6_port) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH, "v6 dst port wrong\n");
                        return 0;
                }
                /*
                 * scope_id check. if sin6_scope_id is 0, we regard it
                 * as a wildcard scope, which matches any scope zone ID.
                 */
                if (spidx0->src.sin6.sin6_scope_id &&
                    spidx1->src.sin6.sin6_scope_id &&
                    spidx0->dst.sin6.sin6_scope_id != spidx1->dst.sin6.sin6_scope_id) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH, "DP v6 dst scope wrong\n");
                        return 0;
                }
                if (!key_bb_match_withmask(&spidx0->dst.sin6.sin6_addr,
                    &spidx1->dst.sin6.sin6_addr, spidx0->prefd)) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH, "v6 dst addr wrong\n");
                        return 0;
                }
                break;
        default:
                /* XXX */
                if (memcmp(&spidx0->dst, &spidx1->dst, spidx0->dst.sa.sa_len) != 0) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH, "dst memcmp wrong\n");
                        return 0;
                }
                break;
        }

        /* XXX Do we check other field ?  e.g. flowinfo */

        return 1;
}

/* returns 0 on match */
static int
key_portcomp(in_port_t port1, in_port_t port2, int howport)
{
        switch (howport) {
        case PORT_NONE:
                return 0;
        case PORT_LOOSE:
                if (port1 == 0 || port2 == 0)
                        return 0;
                /*FALLTHROUGH*/
        case PORT_STRICT:
                if (port1 != port2) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH,
                            "port fail %d != %d\n", ntohs(port1), ntohs(port2));
                        return 1;
                }
                return 0;
        default:
                KASSERT(0);
                return 1;
        }
}

/* returns 1 on match */
static int
key_sockaddr_match(
        const struct sockaddr *sa1,
        const struct sockaddr *sa2,
        int howport)
{
        const struct sockaddr_in *sin1, *sin2;
        const struct sockaddr_in6 *sin61, *sin62;
        char s1[IPSEC_ADDRSTRLEN], s2[IPSEC_ADDRSTRLEN];

        if (sa1->sa_family != sa2->sa_family || sa1->sa_len != sa2->sa_len) {
                KEYDEBUG_PRINTF(KEYDEBUG_MATCH,
                    "fam/len fail %d != %d || %d != %d\n",
                        sa1->sa_family, sa2->sa_family, sa1->sa_len,
                        sa2->sa_len);
                return 0;
        }

        switch (sa1->sa_family) {
        case AF_INET:
                if (sa1->sa_len != sizeof(struct sockaddr_in)) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH,
                            "len fail %d != %zu\n",
                            sa1->sa_len, sizeof(struct sockaddr_in));
                        return 0;
                }
                sin1 = (const struct sockaddr_in *)sa1;
                sin2 = (const struct sockaddr_in *)sa2;
                if (sin1->sin_addr.s_addr != sin2->sin_addr.s_addr) {
                        KEYDEBUG_PRINTF(KEYDEBUG_MATCH,
                            "addr fail %s != %s\n",
                            (in_print(s1, sizeof(s1), &sin1->sin_addr), s1),
                            (in_print(s2, sizeof(s2), &sin2->sin_addr), s2));
                        return 0;
                }
                if (key_portcomp(sin1->sin_port, sin2->sin_port, howport)) {
                        return 0;
                }
                KEYDEBUG_PRINTF(KEYDEBUG_MATCH,
                    "addr success %s[%d] == %s[%d]\n",
                    (in_print(s1, sizeof(s1), &sin1->sin_addr), s1),
                    ntohs(sin1->sin_port),
                    (in_print(s2, sizeof(s2), &sin2->sin_addr), s2),
                    ntohs(sin2->sin_port));
                break;
        case AF_INET6:
                sin61 = (const struct sockaddr_in6 *)sa1;
                sin62 = (const struct sockaddr_in6 *)sa2;
                if (sa1->sa_len != sizeof(struct sockaddr_in6))
                        return 0;        /*EINVAL*/

                if (sin61->sin6_scope_id != sin62->sin6_scope_id) {
                        return 0;
                }
                if (!IN6_ARE_ADDR_EQUAL(&sin61->sin6_addr, &sin62->sin6_addr)) {
                        return 0;
                }
                if (key_portcomp(sin61->sin6_port, sin62->sin6_port, howport)) {
                        return 0;
                }
                break;
        default:
                if (memcmp(sa1, sa2, sa1->sa_len) != 0)
                        return 0;
                break;
        }

        return 1;
}

/*
 * compare two buffers with mask.
 * IN:
 *        addr1: source
 *        addr2: object
 *        bits:  Number of bits to compare
 * OUT:
 *        1 : equal
 *        0 : not equal
 */
static int
key_bb_match_withmask(const void *a1, const void *a2, u_int bits)
{
        const unsigned char *p1 = a1;
        const unsigned char *p2 = a2;

        /* XXX: This could be considerably faster if we compare a word
         * at a time, but it is complicated on LSB Endian machines */

        /* Handle null pointers */
        if (p1 == NULL || p2 == NULL)
                return (p1 == p2);

        while (bits >= 8) {
                if (*p1++ != *p2++)
                        return 0;
                bits -= 8;
        }

        if (bits > 0) {
                u_int8_t mask = ~((1<<(8-bits))-1);
                if ((*p1 & mask) != (*p2 & mask))
                        return 0;
        }
        return 1;        /* Match! */
}

static void
key_timehandler_spd(void)
{
        u_int dir;
        struct secpolicy *sp;
        volatile time_t now;

        for (dir = 0; dir < IPSEC_DIR_MAX; dir++) {
            retry:
                mutex_enter(&key_spd.lock);
                /*
                 * To avoid for sp->created to overtake "now" because of
                 * waiting mutex, set time_uptime here.
                 */
                now = time_uptime;
                SPLIST_WRITER_FOREACH(sp, dir) {
                        KASSERTMSG(sp->state != IPSEC_SPSTATE_DEAD,
                            "sp->state=%u", sp->state);

                        if (sp->lifetime == 0 && sp->validtime == 0)
                                continue;

                        if ((sp->lifetime && now - sp->created > sp->lifetime) ||
                            (sp->validtime && now - sp->lastused > sp->validtime)) {
                                key_unlink_sp(sp);
                                mutex_exit(&key_spd.lock);
                                key_spdexpire(sp);
                                key_destroy_sp(sp);
                                goto retry;
                        }
                }
                mutex_exit(&key_spd.lock);
        }

    retry_socksplist:
        mutex_enter(&key_spd.lock);
        SOCKSPLIST_WRITER_FOREACH(sp) {
                if (sp->state != IPSEC_SPSTATE_DEAD)
                        continue;

                key_unlink_sp(sp);
                mutex_exit(&key_spd.lock);
                key_destroy_sp(sp);
                goto retry_socksplist;
        }
        mutex_exit(&key_spd.lock);
}

static void
key_timehandler_sad(void)
{
        struct secashead *sah;
        int s;
        volatile time_t now;

restart:
        mutex_enter(&key_sad.lock);
        SAHLIST_WRITER_FOREACH(sah) {
                /* If sah has been dead and has no sav, then delete it */
                if (sah->state == SADB_SASTATE_DEAD &&
                    !key_sah_has_sav(sah)) {
                        key_unlink_sah(sah);
                        mutex_exit(&key_sad.lock);
                        key_destroy_sah(sah);
                        goto restart;
                }
        }
        mutex_exit(&key_sad.lock);

        s = pserialize_read_enter();
        SAHLIST_READER_FOREACH(sah) {
                struct secasvar *sav;

                key_sah_ref(sah);
                pserialize_read_exit(s);

                /* if LARVAL entry doesn't become MATURE, delete it. */
                mutex_enter(&key_sad.lock);
        restart_sav_LARVAL:
                /*
                 * Same as key_timehandler_spd(), set time_uptime here.
                 */
                now = time_uptime;
                SAVLIST_WRITER_FOREACH(sav, sah, SADB_SASTATE_LARVAL) {
                        if (now - sav->created > key_larval_lifetime) {
                                key_sa_chgstate(sav, SADB_SASTATE_DEAD);
                                goto restart_sav_LARVAL;
                        }
                }
                mutex_exit(&key_sad.lock);

                /*
                 * check MATURE entry to start to send expire message
                 * whether or not.
                 */
        restart_sav_MATURE:
                mutex_enter(&key_sad.lock);
                /*
                 * ditto
                 */
                now = time_uptime;
                SAVLIST_WRITER_FOREACH(sav, sah, SADB_SASTATE_MATURE) {
                        /* we don't need to check. */
                        if (sav->lft_s == NULL)
                                continue;

                        /* sanity check */
                        KASSERT(sav->lft_c != NULL);

                        /* check SOFT lifetime */
                        if (sav->lft_s->sadb_lifetime_addtime != 0 &&
                            now - sav->created > sav->lft_s->sadb_lifetime_addtime) {
                                /*
                                 * check SA to be used whether or not.
                                 * when SA hasn't been used, delete it.
                                 */
                                if (sav->lft_c->sadb_lifetime_usetime == 0) {
                                        key_sa_chgstate(sav, SADB_SASTATE_DEAD);
                                        mutex_exit(&key_sad.lock);
                                } else {
                                        key_sa_chgstate(sav, SADB_SASTATE_DYING);
                                        mutex_exit(&key_sad.lock);
                                        /*
                                         * XXX If we keep to send expire
                                         * message in the status of
                                         * DYING. Do remove below code.
                                         */
                                        key_expire(sav);
                                }
                                goto restart_sav_MATURE;
                        }
                        /* check SOFT lifetime by bytes */
                        /*
                         * XXX I don't know the way to delete this SA
                         * when new SA is installed.  Caution when it's
                         * installed too big lifetime by time.
                         */
                        else {
                                uint64_t lft_c_bytes = 0;
                                lifetime_counters_t sum = {0};

                                percpu_foreach_xcall(sav->lft_c_counters_percpu,
                                    XC_HIGHPRI_IPL(IPL_SOFTNET),
                                    key_sum_lifetime_counters, sum);
                                lft_c_bytes = sum[LIFETIME_COUNTER_BYTES];

                                if (sav->lft_s->sadb_lifetime_bytes == 0 ||
                                    sav->lft_s->sadb_lifetime_bytes >= lft_c_bytes)
                                        continue;

                                key_sa_chgstate(sav, SADB_SASTATE_DYING);
                                mutex_exit(&key_sad.lock);
                                /*
                                 * XXX If we keep to send expire
                                 * message in the status of
                                 * DYING. Do remove below code.
                                 */
                                key_expire(sav);
                                goto restart_sav_MATURE;
                        }
                }
                mutex_exit(&key_sad.lock);

                /* check DYING entry to change status to DEAD. */
                mutex_enter(&key_sad.lock);
        restart_sav_DYING:
                /*
                 * ditto
                 */
                now = time_uptime;
                SAVLIST_WRITER_FOREACH(sav, sah, SADB_SASTATE_DYING) {
                        /* we don't need to check. */
                        if (sav->lft_h == NULL)
                                continue;

                        /* sanity check */
                        KASSERT(sav->lft_c != NULL);

                        if (sav->lft_h->sadb_lifetime_addtime != 0 &&
                            now - sav->created > sav->lft_h->sadb_lifetime_addtime) {
                                key_sa_chgstate(sav, SADB_SASTATE_DEAD);
                                goto restart_sav_DYING;
                        }
#if 0        /* XXX Should we keep to send expire message until HARD lifetime ? */
                        else if (sav->lft_s != NULL
                              && sav->lft_s->sadb_lifetime_addtime != 0
                              && now - sav->created > sav->lft_s->sadb_lifetime_addtime) {
                                /*
                                 * XXX: should be checked to be
                                 * installed the valid SA.
                                 */

                                /*
                                 * If there is no SA then sending
                                 * expire message.
                                 */
                                key_expire(sav);
                        }
#endif
                        /* check HARD lifetime by bytes */
                        else {
                                uint64_t lft_c_bytes = 0;
                                lifetime_counters_t sum = {0};

                                percpu_foreach_xcall(sav->lft_c_counters_percpu,
                                    XC_HIGHPRI_IPL(IPL_SOFTNET),
                                    key_sum_lifetime_counters, sum);
                                lft_c_bytes = sum[LIFETIME_COUNTER_BYTES];

                                if (sav->lft_h->sadb_lifetime_bytes == 0 ||
                                    sav->lft_h->sadb_lifetime_bytes >= lft_c_bytes)
                                        continue;

                                key_sa_chgstate(sav, SADB_SASTATE_DEAD);
                                goto restart_sav_DYING;
                        }
                }
                mutex_exit(&key_sad.lock);

                /* delete entry in DEAD */
        restart_sav_DEAD:
                mutex_enter(&key_sad.lock);
                SAVLIST_WRITER_FOREACH(sav, sah, SADB_SASTATE_DEAD) {
                        key_unlink_sav(sav);
                        mutex_exit(&key_sad.lock);
                        key_destroy_sav(sav);
                        goto restart_sav_DEAD;
                }
                mutex_exit(&key_sad.lock);

                s = pserialize_read_enter();
                key_sah_unref(sah);
        }
        pserialize_read_exit(s);
}

static void
key_timehandler_acq(void)
{
#ifndef IPSEC_NONBLOCK_ACQUIRE
        struct secacq *acq, *nextacq;
        volatile time_t now;

    restart:
        mutex_enter(&key_misc.lock);
        /*
         * Same as key_timehandler_spd(), set time_uptime here.
         */
        now = time_uptime;
        LIST_FOREACH_SAFE(acq, &key_misc.acqlist, chain, nextacq) {
                if (now - acq->created > key_blockacq_lifetime) {
                        LIST_REMOVE(acq, chain);
                        mutex_exit(&key_misc.lock);
                        kmem_free(acq, sizeof(*acq));
                        goto restart;
                }
        }
        mutex_exit(&key_misc.lock);
#endif
}

static void
key_timehandler_spacq(void)
{
#ifdef notyet
        struct secspacq *acq, *nextacq;
        time_t now = time_uptime;

        LIST_FOREACH_SAFE(acq, &key_misc.spacqlist, chain, nextacq) {
                if (now - acq->created > key_blockacq_lifetime) {
                        KASSERT(__LIST_CHAINED(acq));
                        LIST_REMOVE(acq, chain);
                        kmem_free(acq, sizeof(*acq));
                }
        }
#endif
}

static unsigned int key_timehandler_work_enqueued = 0;

/*
 * time handler.
 * scanning SPD and SAD to check status for each entries,
 * and do to remove or to expire.
 */
static void
key_timehandler_work(struct work *wk, void *arg)
{

        /* We can allow enqueuing another work at this point */
        atomic_swap_uint(&key_timehandler_work_enqueued, 0);

        key_timehandler_spd();
        key_timehandler_sad();
        key_timehandler_acq();
        key_timehandler_spacq();

        key_acquire_sendup_pending_mbuf();

        /* do exchange to tick time !! */
        callout_reset(&key_timehandler_ch, hz, key_timehandler, NULL);

        return;
}

static void
key_timehandler(void *arg)
{

        /* Avoid enqueuing another work when one is already enqueued */
        if (atomic_swap_uint(&key_timehandler_work_enqueued, 1) == 1)
                return;

        workqueue_enqueue(key_timehandler_wq, &key_timehandler_wk, NULL);
}

u_long
key_random(void)
{
        u_long value;

        key_randomfill(&value, sizeof(value));
        return value;
}

void
key_randomfill(void *p, size_t l)
{

        cprng_fast(p, l);
}

/*
 * map SADB_SATYPE_* to IPPROTO_*.
 * if satype == SADB_SATYPE then satype is mapped to ~0.
 * OUT:
 *        0: invalid satype.
 */
static u_int16_t
key_satype2proto(u_int8_t satype)
{
        switch (satype) {
        case SADB_SATYPE_UNSPEC:
                return IPSEC_PROTO_ANY;
        case SADB_SATYPE_AH:
                return IPPROTO_AH;
        case SADB_SATYPE_ESP:
                return IPPROTO_ESP;
        case SADB_X_SATYPE_IPCOMP:
                return IPPROTO_IPCOMP;
        case SADB_X_SATYPE_TCPSIGNATURE:
                return IPPROTO_TCP;
        default:
                return 0;
        }
        /* NOTREACHED */
}

/*
 * map IPPROTO_* to SADB_SATYPE_*
 * OUT:
 *        0: invalid protocol type.
 */
static u_int8_t
key_proto2satype(u_int16_t proto)
{
        switch (proto) {
        case IPPROTO_AH:
                return SADB_SATYPE_AH;
        case IPPROTO_ESP:
                return SADB_SATYPE_ESP;
        case IPPROTO_IPCOMP:
                return SADB_X_SATYPE_IPCOMP;
        case IPPROTO_TCP:
                return SADB_X_SATYPE_TCPSIGNATURE;
        default:
                return 0;
        }
        /* NOTREACHED */
}

static int
key_setsecasidx(int proto, int mode, int reqid,
    const struct sockaddr *src, const struct sockaddr *dst,
    struct secasindex * saidx)
{
        const union sockaddr_union *src_u = (const union sockaddr_union *)src;
        const union sockaddr_union *dst_u = (const union sockaddr_union *)dst;

        /* sa len safety check */
        if (key_checksalen(src_u) != 0)
                return -1;
        if (key_checksalen(dst_u) != 0)
                return -1;

        memset(saidx, 0, sizeof(*saidx));
        saidx->proto = proto;
        saidx->mode = mode;
        saidx->reqid = reqid;
        memcpy(&saidx->src, src_u, src_u->sa.sa_len);
        memcpy(&saidx->dst, dst_u, dst_u->sa.sa_len);

        key_porttosaddr(&((saidx)->src), 0);
        key_porttosaddr(&((saidx)->dst), 0);
        return 0;
}

static void
key_init_spidx_bymsghdr(struct secpolicyindex *spidx,
    const struct sadb_msghdr *mhp)
{
        const struct sadb_address *src0, *dst0;
        const struct sockaddr *src, *dst;
        const struct sadb_x_policy *xpl0;

        src0 = mhp->ext[SADB_EXT_ADDRESS_SRC];
        dst0 = mhp->ext[SADB_EXT_ADDRESS_DST];
        src = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_SRC);
        dst = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_DST);
        xpl0 = mhp->ext[SADB_X_EXT_POLICY];

        memset(spidx, 0, sizeof(*spidx));
        spidx->dir = xpl0->sadb_x_policy_dir;
        spidx->prefs = src0->sadb_address_prefixlen;
        spidx->prefd = dst0->sadb_address_prefixlen;
        spidx->ul_proto = src0->sadb_address_proto;
        /* XXX boundary check against sa_len */
        memcpy(&spidx->src, src, src->sa_len);
        memcpy(&spidx->dst, dst, dst->sa_len);
}

/* %%% PF_KEY */
/*
 * SADB_GETSPI processing is to receive
 *        <base, (SA2), src address, dst address, (SPI range)>
 * from the IKMPd, to assign a unique spi value, to hang on the INBOUND
 * tree with the status of LARVAL, and send
 *        <base, SA(*), address(SD)>
 * to the IKMPd.
 *
 * IN:        mhp: pointer to the pointer to each header.
 * OUT:        NULL if fail.
 *        other if success, return pointer to the message to send.
 */
static int
key_api_getspi(struct socket *so, struct mbuf *m,
           const struct sadb_msghdr *mhp)
{
        const struct sockaddr *src, *dst;
        struct secasindex saidx;
        struct secashead *sah;
        struct secasvar *newsav;
        u_int8_t proto;
        u_int32_t spi;
        u_int8_t mode;
        u_int16_t reqid;
        int error;

        if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
            mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) {
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }
        if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
            mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) {
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }
        if (mhp->ext[SADB_X_EXT_SA2] != NULL) {
                const struct sadb_x_sa2 *sa2 = mhp->ext[SADB_X_EXT_SA2];
                mode = sa2->sadb_x_sa2_mode;
                reqid = sa2->sadb_x_sa2_reqid;
        } else {
                mode = IPSEC_MODE_ANY;
                reqid = 0;
        }

        src = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_SRC);
        dst = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_DST);

        /* map satype to proto */
        proto = key_satype2proto(mhp->msg->sadb_msg_satype);
        if (proto == 0) {
                IPSECLOG(LOG_DEBUG, "invalid satype is passed.\n");
                return key_senderror(so, m, EINVAL);
        }


        error = key_setsecasidx(proto, mode, reqid, src, dst, &saidx);
        if (error != 0)
                return key_senderror(so, m, EINVAL);

        error = key_set_natt_ports(&saidx.src, &saidx.dst, mhp);
        if (error != 0)
                return key_senderror(so, m, EINVAL);

        /* SPI allocation */
        spi = key_do_getnewspi(mhp->ext[SADB_EXT_SPIRANGE], &saidx);
        if (spi == 0)
                return key_senderror(so, m, EINVAL);

        /* get a SA index */
        sah = key_getsah_ref(&saidx, CMP_REQID);
        if (sah == NULL) {
                /* create a new SA index */
                sah = key_newsah(&saidx);
                if (sah == NULL) {
                        IPSECLOG(LOG_DEBUG, "No more memory.\n");
                        return key_senderror(so, m, ENOBUFS);
                }
        }

        /* get a new SA */
        /* XXX rewrite */
        newsav = KEY_NEWSAV(m, mhp, &error, proto);
        if (newsav == NULL) {
                key_sah_unref(sah);
                /* XXX don't free new SA index allocated in above. */
                return key_senderror(so, m, error);
        }

        /* set spi */
        newsav->spi = htonl(spi);

        /* Add to sah#savlist */
        key_init_sav(newsav);
        newsav->sah = sah;
        newsav->state = SADB_SASTATE_LARVAL;
        mutex_enter(&key_sad.lock);
        SAVLIST_WRITER_INSERT_TAIL(sah, SADB_SASTATE_LARVAL, newsav);
        mutex_exit(&key_sad.lock);
        key_validate_savlist(sah, SADB_SASTATE_LARVAL);

        key_sah_unref(sah);

#ifndef IPSEC_NONBLOCK_ACQUIRE
        /* delete the entry in key_misc.acqlist */
        if (mhp->msg->sadb_msg_seq != 0) {
                struct secacq *acq;
                mutex_enter(&key_misc.lock);
                acq = key_getacqbyseq(mhp->msg->sadb_msg_seq);
                if (acq != NULL) {
                        /* reset counter in order to deletion by timehandler. */
                        acq->created = time_uptime;
                        acq->count = 0;
                }
                mutex_exit(&key_misc.lock);
        }
#endif

    {
        struct mbuf *n, *nn;
        struct sadb_sa *m_sa;
        int off, len;

        CTASSERT(PFKEY_ALIGN8(sizeof(struct sadb_msg)) +
            PFKEY_ALIGN8(sizeof(struct sadb_sa)) <= MCLBYTES);

        /* create new sadb_msg to reply. */
        len = PFKEY_ALIGN8(sizeof(struct sadb_msg)) +
            PFKEY_ALIGN8(sizeof(struct sadb_sa));

        n = key_alloc_mbuf_simple(len, M_WAITOK);
        n->m_len = len;
        n->m_next = NULL;
        off = 0;

        m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, char *) + off);
        off += PFKEY_ALIGN8(sizeof(struct sadb_msg));

        m_sa = (struct sadb_sa *)(mtod(n, char *) + off);
        m_sa->sadb_sa_len = PFKEY_UNIT64(sizeof(struct sadb_sa));
        m_sa->sadb_sa_exttype = SADB_EXT_SA;
        m_sa->sadb_sa_spi = htonl(spi);
        off += PFKEY_ALIGN8(sizeof(struct sadb_sa));

        KASSERTMSG(off == len, "length inconsistency");

        n->m_next = key_gather_mbuf(m, mhp, 0, 2, SADB_EXT_ADDRESS_SRC,
            SADB_EXT_ADDRESS_DST);

        KASSERT(n->m_len >= sizeof(struct sadb_msg));

        n->m_pkthdr.len = 0;
        for (nn = n; nn; nn = nn->m_next)
                n->m_pkthdr.len += nn->m_len;

        key_fill_replymsg(n, newsav->seq);
        m_freem(m);
        return key_sendup_mbuf(so, n, KEY_SENDUP_ONE);
    }
}

/*
 * allocating new SPI
 * called by key_api_getspi().
 * OUT:
 *        0:        failure.
 *        others: success.
 */
static u_int32_t
key_do_getnewspi(const struct sadb_spirange *spirange,
                 const struct secasindex *saidx)
{
        u_int32_t newspi;
        u_int32_t spmin, spmax;
        int count = key_spi_trycnt;

        /* set spi range to allocate */
        if (spirange != NULL) {
                spmin = spirange->sadb_spirange_min;
                spmax = spirange->sadb_spirange_max;
        } else {
                spmin = key_spi_minval;
                spmax = key_spi_maxval;
        }
        /* IPCOMP needs 2-byte SPI */
        if (saidx->proto == IPPROTO_IPCOMP) {
                u_int32_t t;
                if (spmin >= 0x10000)
                        spmin = 0xffff;
                if (spmax >= 0x10000)
                        spmax = 0xffff;
                if (spmin > spmax) {
                        t = spmin; spmin = spmax; spmax = t;
                }
        }

        if (spmin == spmax) {
                if (key_checkspidup(saidx, htonl(spmin))) {
                        IPSECLOG(LOG_DEBUG, "SPI %u exists already.\n", spmin);
                        return 0;
                }

                count--; /* taking one cost. */
                newspi = spmin;

        } else {

                /* init SPI */
                newspi = 0;

                /* when requesting to allocate spi ranged */
                while (count--) {
                        /* generate pseudo-random SPI value ranged. */
                        newspi = spmin + (key_random() % (spmax - spmin + 1));

                        if (!key_checkspidup(saidx, htonl(newspi)))
                                break;
                }

                if (count == 0 || newspi == 0) {
                        IPSECLOG(LOG_DEBUG, "to allocate spi is failed.\n");
                        return 0;
                }
        }

        /* statistics */
        keystat.getspi_count =
            (keystat.getspi_count + key_spi_trycnt - count) / 2;

        return newspi;
}

static int
key_handle_natt_info(struct secasvar *sav,
                           const struct sadb_msghdr *mhp)
{
        const char *msg = "?" ;
        struct sadb_x_nat_t_type *type;
        struct sadb_x_nat_t_port *sport, *dport;
        struct sadb_address *iaddr, *raddr;
        struct sadb_x_nat_t_frag *frag;

        if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] == NULL ||
            mhp->ext[SADB_X_EXT_NAT_T_SPORT] == NULL ||
            mhp->ext[SADB_X_EXT_NAT_T_DPORT] == NULL)
                return 0;

        if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type)) {
                msg = "TYPE";
                goto bad;
        }

        if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport)) {
                msg = "SPORT";
                goto bad;
        }

        if (mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) {
                msg = "DPORT";
                goto bad;
        }

        if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL) {
                IPSECLOG(LOG_DEBUG, "NAT-T OAi present\n");
                if (mhp->extlen[SADB_X_EXT_NAT_T_OAI] < sizeof(*iaddr)) {
                        msg = "OAI";
                        goto bad;
                }
        }

        if (mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL) {
                IPSECLOG(LOG_DEBUG, "NAT-T OAr present\n");
                if (mhp->extlen[SADB_X_EXT_NAT_T_OAR] < sizeof(*raddr)) {
                        msg = "OAR";
                        goto bad;
                }
        }

        if (mhp->ext[SADB_X_EXT_NAT_T_FRAG] != NULL) {
            if (mhp->extlen[SADB_X_EXT_NAT_T_FRAG] < sizeof(*frag)) {
                    msg = "FRAG";
                    goto bad;
            }
        }

        type = mhp->ext[SADB_X_EXT_NAT_T_TYPE];
        sport = mhp->ext[SADB_X_EXT_NAT_T_SPORT];
        dport = mhp->ext[SADB_X_EXT_NAT_T_DPORT];
        iaddr = mhp->ext[SADB_X_EXT_NAT_T_OAI];
        raddr = mhp->ext[SADB_X_EXT_NAT_T_OAR];
        frag = mhp->ext[SADB_X_EXT_NAT_T_FRAG];

        IPSECLOG(LOG_DEBUG, "type %d, sport = %d, dport = %d\n",
            type->sadb_x_nat_t_type_type,
            ntohs(sport->sadb_x_nat_t_port_port),
            ntohs(dport->sadb_x_nat_t_port_port));

        sav->natt_type = type->sadb_x_nat_t_type_type;
        key_porttosaddr(&sav->sah->saidx.src, sport->sadb_x_nat_t_port_port);
        key_porttosaddr(&sav->sah->saidx.dst, dport->sadb_x_nat_t_port_port);
        if (frag)
                sav->esp_frag = frag->sadb_x_nat_t_frag_fraglen;
        else
                sav->esp_frag = IP_MAXPACKET;

        return 0;
bad:
        IPSECLOG(LOG_DEBUG, "invalid message %s\n", msg);
        __USE(msg);
        return -1;
}

/* Just update the IPSEC_NAT_T ports if present */
static int
key_set_natt_ports(union sockaddr_union *src, union sockaddr_union *dst,
                           const struct sadb_msghdr *mhp)
{
        if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL)
                IPSECLOG(LOG_DEBUG, "NAT-T OAi present\n");
        if (mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL)
                IPSECLOG(LOG_DEBUG, "NAT-T OAr present\n");

        if ((mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL) &&
            (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL) &&
            (mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL)) {
                struct sadb_x_nat_t_type *type;
                struct sadb_x_nat_t_port *sport;
                struct sadb_x_nat_t_port *dport;

                if ((mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type)) ||
                    (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport)) ||
                    (mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport))) {
                        IPSECLOG(LOG_DEBUG, "invalid message\n");
                        return -1;
                }

                type = mhp->ext[SADB_X_EXT_NAT_T_TYPE];
                sport = mhp->ext[SADB_X_EXT_NAT_T_SPORT];
                dport = mhp->ext[SADB_X_EXT_NAT_T_DPORT];

                key_porttosaddr(src, sport->sadb_x_nat_t_port_port);
                key_porttosaddr(dst, dport->sadb_x_nat_t_port_port);

                IPSECLOG(LOG_DEBUG, "type %d, sport = %d, dport = %d\n",
                    type->sadb_x_nat_t_type_type,
                    ntohs(sport->sadb_x_nat_t_port_port),
                    ntohs(dport->sadb_x_nat_t_port_port));
        }

        return 0;
}


/*
 * SADB_UPDATE processing
 * receive
 *   <base, SA, (SA2), (lifetime(HSC),) address(SD), (address(P),)
 *       key(AE), (identity(SD),) (sensitivity)>
 * from the ikmpd, and update a secasvar entry whose status is SADB_SASTATE_LARVAL.
 * and send
 *   <base, SA, (SA2), (lifetime(HSC),) address(SD), (address(P),)
 *       (identity(SD),) (sensitivity)>
 * to the ikmpd.
 *
 * m will always be freed.
 */
static int
key_api_update(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp)
{
        struct sadb_sa *sa0;
        const struct sockaddr *src, *dst;
        struct secasindex saidx;
        struct secashead *sah;
        struct secasvar *sav, *newsav, *oldsav;
        u_int16_t proto;
        u_int8_t mode;
        u_int16_t reqid;
        int error;

        /* map satype to proto */
        proto = key_satype2proto(mhp->msg->sadb_msg_satype);
        if (proto == 0) {
                IPSECLOG(LOG_DEBUG, "invalid satype is passed.\n");
                return key_senderror(so, m, EINVAL);
        }

        if (mhp->ext[SADB_EXT_SA] == NULL ||
            mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
            mhp->ext[SADB_EXT_ADDRESS_DST] == NULL ||
            (mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP &&
             mhp->ext[SADB_EXT_KEY_ENCRYPT] == NULL) ||
            (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH &&
             mhp->ext[SADB_EXT_KEY_AUTH] == NULL) ||
            (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL &&
             mhp->ext[SADB_EXT_LIFETIME_SOFT] == NULL) ||
            (mhp->ext[SADB_EXT_LIFETIME_HARD] == NULL &&
             mhp->ext[SADB_EXT_LIFETIME_SOFT] != NULL)) {
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }
        if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) ||
            mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
            mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) {
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }
        if (mhp->ext[SADB_X_EXT_SA2] != NULL) {
                const struct sadb_x_sa2 *sa2 = mhp->ext[SADB_X_EXT_SA2];
                mode = sa2->sadb_x_sa2_mode;
                reqid = sa2->sadb_x_sa2_reqid;
        } else {
                mode = IPSEC_MODE_ANY;
                reqid = 0;
        }
        /* XXX boundary checking for other extensions */

        sa0 = mhp->ext[SADB_EXT_SA];
        src = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_SRC);
        dst = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_DST);

        error = key_setsecasidx(proto, mode, reqid, src, dst, &saidx);
        if (error != 0)
                return key_senderror(so, m, EINVAL);

        error = key_set_natt_ports(&saidx.src, &saidx.dst, mhp);
        if (error != 0)
                return key_senderror(so, m, EINVAL);

        /* get a SA header */
        sah = key_getsah_ref(&saidx, CMP_REQID);
        if (sah == NULL) {
                IPSECLOG(LOG_DEBUG, "no SA index found.\n");
                return key_senderror(so, m, ENOENT);
        }

        /* set spidx if there */
        /* XXX rewrite */
        error = key_setident(sah, m, mhp);
        if (error)
                goto error_sah;

        /* find a SA with sequence number. */
#ifdef IPSEC_DOSEQCHECK
        if (mhp->msg->sadb_msg_seq != 0) {
                sav = key_getsavbyseq(sah, mhp->msg->sadb_msg_seq);
                if (sav == NULL) {
                        IPSECLOG(LOG_DEBUG,
                            "no larval SA with sequence %u exists.\n",
                            mhp->msg->sadb_msg_seq);
                        error = ENOENT;
                        goto error_sah;
                }
        }
#else
        sav = key_getsavbyspi(sah, sa0->sadb_sa_spi);
        if (sav == NULL) {
                IPSECLOG(LOG_DEBUG, "no such a SA found (spi:%u)\n",
                    (u_int32_t)ntohl(sa0->sadb_sa_spi));
                error = EINVAL;
                goto error_sah;
        }
#endif

        /* validity check */
        if (sav->sah->saidx.proto != proto) {
                IPSECLOG(LOG_DEBUG, "protocol mismatched (DB=%u param=%u)\n",
                    sav->sah->saidx.proto, proto);
                error = EINVAL;
                goto error;
        }
#ifdef IPSEC_DOSEQCHECK
        if (sav->spi != sa0->sadb_sa_spi) {
                IPSECLOG(LOG_DEBUG, "SPI mismatched (DB:%u param:%u)\n",
                    (u_int32_t)ntohl(sav->spi),
                    (u_int32_t)ntohl(sa0->sadb_sa_spi));
                error = EINVAL;
                goto error;
        }
#endif
        if (sav->pid != mhp->msg->sadb_msg_pid) {
                IPSECLOG(LOG_DEBUG, "pid mismatched (DB:%u param:%u)\n",
                    sav->pid, mhp->msg->sadb_msg_pid);
                error = EINVAL;
                goto error;
        }

        /*
         * Allocate a new SA instead of modifying the existing SA directly
         * to avoid race conditions.
         */
        newsav = kmem_zalloc(sizeof(struct secasvar), KM_SLEEP);

        /* copy sav values */
        newsav->spi = sav->spi;
        newsav->seq = sav->seq;
        newsav->created = sav->created;
        newsav->pid = sav->pid;
        newsav->sah = sav->sah;
         KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
            "DP from %s:%u update SA:%p to SA:%p spi=%#x proto=%d\n",
            __func__, __LINE__, sav, newsav,
            ntohl(newsav->spi), proto);

        error = key_setsaval(newsav, m, mhp);
        if (error) {
                kmem_free(newsav, sizeof(*newsav));
                goto error;
        }

        error = key_handle_natt_info(newsav, mhp);
        if (error != 0) {
                key_delsav(newsav);
                goto error;
        }

        error = key_init_xform(newsav);
        if (error != 0) {
                key_delsav(newsav);
                goto error;
        }

        /* Add to sah#savlist */
        key_init_sav(newsav);
        newsav->state = SADB_SASTATE_MATURE;
        mutex_enter(&key_sad.lock);
        SAVLIST_WRITER_INSERT_TAIL(sah, SADB_SASTATE_MATURE, newsav);
        SAVLUT_WRITER_INSERT_HEAD(newsav);
        mutex_exit(&key_sad.lock);
        key_validate_savlist(sah, SADB_SASTATE_MATURE);

        /*
         * We need to lookup and remove the sav atomically, so get it again
         * here by a special API while we have a reference to it.
         */
        oldsav = key_lookup_and_remove_sav(sah, sa0->sadb_sa_spi, sav);
        KASSERT(oldsav == NULL || oldsav == sav);
        /* We can release the reference because of oldsav */
        KEY_SA_UNREF(&sav);
        if (oldsav == NULL) {
                /* Someone has already removed the sav.  Nothing to do. */
        } else {
                key_wait_sav(oldsav);
                key_destroy_sav(oldsav);
                oldsav = NULL;
        }
        sav = NULL;

        key_sah_unref(sah);
        sah = NULL;

    {
        struct mbuf *n;

        /* set msg buf from mhp */
        n = key_getmsgbuf_x1(m, mhp);
        if (n == NULL) {
                IPSECLOG(LOG_DEBUG, "No more memory.\n");
                return key_senderror(so, m, ENOBUFS);
        }

        m_freem(m);
        return key_sendup_mbuf(so, n, KEY_SENDUP_ALL);
    }
error:
        KEY_SA_UNREF(&sav);
error_sah:
        key_sah_unref(sah);
        return key_senderror(so, m, error);
}

/*
 * search SAD with sequence for a SA which state is SADB_SASTATE_LARVAL.
 * only called by key_api_update().
 * OUT:
 *        NULL        : not found
 *        others        : found, pointer to a SA.
 */
#ifdef IPSEC_DOSEQCHECK
static struct secasvar *
key_getsavbyseq(struct secashead *sah, u_int32_t seq)
{
        struct secasvar *sav;
        u_int state;
        int s;

        state = SADB_SASTATE_LARVAL;

        /* search SAD with sequence number ? */
        s = pserialize_read_enter();
        SAVLIST_READER_FOREACH(sav, sah, state) {
                KEY_CHKSASTATE(state, sav->state);

                if (sav->seq == seq) {
                        SA_ADDREF(sav);
                        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
                            "DP cause refcnt++:%d SA:%p\n",
                            key_sa_refcnt(sav), sav);
                        break;
                }
        }
        pserialize_read_exit(s);

        return sav;
}
#endif

/*
 * SADB_ADD processing
 * add an entry to SA database, when received
 *   <base, SA, (SA2), (lifetime(HSC),) address(SD), (address(P),)
 *       key(AE), (identity(SD),) (sensitivity)>
 * from the ikmpd,
 * and send
 *   <base, SA, (SA2), (lifetime(HSC),) address(SD), (address(P),)
 *       (identity(SD),) (sensitivity)>
 * to the ikmpd.
 *
 * IGNORE identity and sensitivity messages.
 *
 * m will always be freed.
 */
static int
key_api_add(struct socket *so, struct mbuf *m,
        const struct sadb_msghdr *mhp)
{
        struct sadb_sa *sa0;
        const struct sockaddr *src, *dst;
        struct secasindex saidx;
        struct secashead *sah;
        struct secasvar *newsav;
        u_int16_t proto;
        u_int8_t mode;
        u_int16_t reqid;
        int error;

        /* map satype to proto */
        proto = key_satype2proto(mhp->msg->sadb_msg_satype);
        if (proto == 0) {
                IPSECLOG(LOG_DEBUG, "invalid satype is passed.\n");
                return key_senderror(so, m, EINVAL);
        }

        if (mhp->ext[SADB_EXT_SA] == NULL ||
            mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
            mhp->ext[SADB_EXT_ADDRESS_DST] == NULL ||
            (mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP &&
             mhp->ext[SADB_EXT_KEY_ENCRYPT] == NULL) ||
            (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH &&
             mhp->ext[SADB_EXT_KEY_AUTH] == NULL) ||
            (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL &&
             mhp->ext[SADB_EXT_LIFETIME_SOFT] == NULL) ||
            (mhp->ext[SADB_EXT_LIFETIME_HARD] == NULL &&
             mhp->ext[SADB_EXT_LIFETIME_SOFT] != NULL)) {
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }
        if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) ||
            mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
            mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) {
                /* XXX need more */
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }
        if (mhp->ext[SADB_X_EXT_SA2] != NULL) {
                const struct sadb_x_sa2 *sa2 = mhp->ext[SADB_X_EXT_SA2];
                mode = sa2->sadb_x_sa2_mode;
                reqid = sa2->sadb_x_sa2_reqid;
        } else {
                mode = IPSEC_MODE_ANY;
                reqid = 0;
        }

        sa0 = mhp->ext[SADB_EXT_SA];
        src = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_SRC);
        dst = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_DST);

        error = key_setsecasidx(proto, mode, reqid, src, dst, &saidx);
        if (error != 0)
                return key_senderror(so, m, EINVAL);

        error = key_set_natt_ports(&saidx.src, &saidx.dst, mhp);
        if (error != 0)
                return key_senderror(so, m, EINVAL);

        /* get a SA header */
        sah = key_getsah_ref(&saidx, CMP_REQID);
        if (sah == NULL) {
                /* create a new SA header */
                sah = key_newsah(&saidx);
                if (sah == NULL) {
                        IPSECLOG(LOG_DEBUG, "No more memory.\n");
                        return key_senderror(so, m, ENOBUFS);
                }
        }

        /* set spidx if there */
        /* XXX rewrite */
        error = key_setident(sah, m, mhp);
        if (error)
                goto error;

    {
        struct secasvar *sav;

        /* We can create new SA only if SPI is differenct. */
        sav = key_getsavbyspi(sah, sa0->sadb_sa_spi);
        if (sav != NULL) {
                KEY_SA_UNREF(&sav);
                IPSECLOG(LOG_DEBUG, "SA already exists.\n");
                error = EEXIST;
                goto error;
        }
    }

        /* create new SA entry. */
        newsav = KEY_NEWSAV(m, mhp, &error, proto);
        if (newsav == NULL)
                goto error;
        newsav->sah = sah;

        error = key_handle_natt_info(newsav, mhp);
        if (error != 0) {
                key_delsav(newsav);
                error = EINVAL;
                goto error;
        }

        error = key_init_xform(newsav);
        if (error != 0) {
                key_delsav(newsav);
                goto error;
        }

        /* Add to sah#savlist */
        key_init_sav(newsav);
        newsav->state = SADB_SASTATE_MATURE;
        mutex_enter(&key_sad.lock);
        SAVLIST_WRITER_INSERT_TAIL(sah, SADB_SASTATE_MATURE, newsav);
        SAVLUT_WRITER_INSERT_HEAD(newsav);
        mutex_exit(&key_sad.lock);
        key_validate_savlist(sah, SADB_SASTATE_MATURE);

        key_sah_unref(sah);
        sah = NULL;

        /*
         * don't call key_freesav() here, as we would like to keep the SA
         * in the database on success.
         */

    {
        struct mbuf *n;

        /* set msg buf from mhp */
        n = key_getmsgbuf_x1(m, mhp);
        if (n == NULL) {
                IPSECLOG(LOG_DEBUG, "No more memory.\n");
                return key_senderror(so, m, ENOBUFS);
        }

        m_freem(m);
        return key_sendup_mbuf(so, n, KEY_SENDUP_ALL);
    }
error:
        key_sah_unref(sah);
        return key_senderror(so, m, error);
}

/* m is retained */
static int
key_setident(struct secashead *sah, struct mbuf *m,
             const struct sadb_msghdr *mhp)
{
        const struct sadb_ident *idsrc, *iddst;
        int idsrclen, iddstlen;

        KASSERT(!cpu_softintr_p());
        KASSERT(sah != NULL);
        KASSERT(m != NULL);
        KASSERT(mhp != NULL);
        KASSERT(mhp->msg != NULL);

        /*
         * Can be called with an existing sah from key_api_update().
         */
        if (sah->idents != NULL) {
                kmem_free(sah->idents, sah->idents_len);
                sah->idents = NULL;
                sah->idents_len = 0;
        }
        if (sah->identd != NULL) {
                kmem_free(sah->identd, sah->identd_len);
                sah->identd = NULL;
                sah->identd_len = 0;
        }

        /* don't make buffer if not there */
        if (mhp->ext[SADB_EXT_IDENTITY_SRC] == NULL &&
            mhp->ext[SADB_EXT_IDENTITY_DST] == NULL) {
                sah->idents = NULL;
                sah->identd = NULL;
                return 0;
        }

        if (mhp->ext[SADB_EXT_IDENTITY_SRC] == NULL ||
            mhp->ext[SADB_EXT_IDENTITY_DST] == NULL) {
                IPSECLOG(LOG_DEBUG, "invalid identity.\n");
                return EINVAL;
        }

        idsrc = mhp->ext[SADB_EXT_IDENTITY_SRC];
        iddst = mhp->ext[SADB_EXT_IDENTITY_DST];
        idsrclen = mhp->extlen[SADB_EXT_IDENTITY_SRC];
        iddstlen = mhp->extlen[SADB_EXT_IDENTITY_DST];

        /* validity check */
        if (idsrc->sadb_ident_type != iddst->sadb_ident_type) {
                IPSECLOG(LOG_DEBUG, "ident type mismatched src %u, dst %u.\n",
                    idsrc->sadb_ident_type, iddst->sadb_ident_type);
                /*
                 * Some VPN appliances(e.g. NetScreen) can send different
                 * identifier types on IDii and IDir, so be able to allow
                 * such message.
                 */
                if (!ipsec_allow_different_idtype) {
                        return EINVAL;
                }
        }

        switch (idsrc->sadb_ident_type) {
        case SADB_IDENTTYPE_PREFIX:
        case SADB_IDENTTYPE_FQDN:
        case SADB_IDENTTYPE_USERFQDN:
        default:
                /* XXX do nothing */
                sah->idents = NULL;
                sah->identd = NULL;
                 return 0;
        }

        /* make structure */
        sah->idents = kmem_alloc(idsrclen, KM_SLEEP);
        sah->idents_len = idsrclen;
        sah->identd = kmem_alloc(iddstlen, KM_SLEEP);
        sah->identd_len = iddstlen;
        memcpy(sah->idents, idsrc, idsrclen);
        memcpy(sah->identd, iddst, iddstlen);

        return 0;
}

/*
 * m will not be freed on return. It never return NULL.
 * it is caller's responsibility to free the result.
 */
static struct mbuf *
key_getmsgbuf_x1(struct mbuf *m, const struct sadb_msghdr *mhp)
{
        struct mbuf *n;

        KASSERT(m != NULL);
        KASSERT(mhp != NULL);
        KASSERT(mhp->msg != NULL);

        /* create new sadb_msg to reply. */
        n = key_gather_mbuf(m, mhp, 1, 15, SADB_EXT_RESERVED,
            SADB_EXT_SA, SADB_X_EXT_SA2,
            SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST,
            SADB_EXT_LIFETIME_HARD, SADB_EXT_LIFETIME_SOFT,
            SADB_EXT_IDENTITY_SRC, SADB_EXT_IDENTITY_DST,
            SADB_X_EXT_NAT_T_TYPE, SADB_X_EXT_NAT_T_SPORT,
            SADB_X_EXT_NAT_T_DPORT, SADB_X_EXT_NAT_T_OAI,
            SADB_X_EXT_NAT_T_OAR, SADB_X_EXT_NAT_T_FRAG);

        KASSERT(n->m_len >= sizeof(struct sadb_msg));

        mtod(n, struct sadb_msg *)->sadb_msg_errno = 0;
        mtod(n, struct sadb_msg *)->sadb_msg_len =
            PFKEY_UNIT64(n->m_pkthdr.len);

        return n;
}

static int key_delete_all (struct socket *, struct mbuf *,
                           const struct sadb_msghdr *, u_int16_t);

/*
 * SADB_DELETE processing
 * receive
 *   <base, SA(*), address(SD)>
 * from the ikmpd, and set SADB_SASTATE_DEAD,
 * and send,
 *   <base, SA(*), address(SD)>
 * to the ikmpd.
 *
 * m will always be freed.
 */
static int
key_api_delete(struct socket *so, struct mbuf *m,
           const struct sadb_msghdr *mhp)
{
        struct sadb_sa *sa0;
        const struct sockaddr *src, *dst;
        struct secasindex saidx;
        struct secashead *sah;
        struct secasvar *sav = NULL;
        u_int16_t proto;
        int error;

        /* map satype to proto */
        proto = key_satype2proto(mhp->msg->sadb_msg_satype);
        if (proto == 0) {
                IPSECLOG(LOG_DEBUG, "invalid satype is passed.\n");
                return key_senderror(so, m, EINVAL);
        }

        if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
            mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) {
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }

        if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
            mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) {
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }

        if (mhp->ext[SADB_EXT_SA] == NULL) {
                /*
                 * Caller wants us to delete all non-LARVAL SAs
                 * that match the src/dst.  This is used during
                 * IKE INITIAL-CONTACT.
                 */
                IPSECLOG(LOG_DEBUG, "doing delete all.\n");
                return key_delete_all(so, m, mhp, proto);
        } else if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa)) {
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }

        sa0 = mhp->ext[SADB_EXT_SA];
        src = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_SRC);
        dst = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_DST);

        error = key_setsecasidx(proto, IPSEC_MODE_ANY, 0, src, dst, &saidx);
        if (error != 0)
                return key_senderror(so, m, EINVAL);

        error = key_set_natt_ports(&saidx.src, &saidx.dst, mhp);
        if (error != 0)
                return key_senderror(so, m, EINVAL);

        /* get a SA header */
        sah = key_getsah_ref(&saidx, CMP_HEAD);
        if (sah != NULL) {
                /* get a SA with SPI. */
                sav = key_lookup_and_remove_sav(sah, sa0->sadb_sa_spi, NULL);
                key_sah_unref(sah);
        }

        if (sav == NULL) {
                IPSECLOG(LOG_DEBUG, "no SA found.\n");
                return key_senderror(so, m, ENOENT);
        }

        key_wait_sav(sav);
        key_destroy_sav(sav);
        sav = NULL;

    {
        struct mbuf *n;

        /* create new sadb_msg to reply. */
        n = key_gather_mbuf(m, mhp, 1, 4, SADB_EXT_RESERVED,
            SADB_EXT_SA, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST);

        key_fill_replymsg(n, 0);
        m_freem(m);
        return key_sendup_mbuf(so, n, KEY_SENDUP_ALL);
    }
}

/*
 * delete all SAs for src/dst.  Called from key_api_delete().
 */
static int
key_delete_all(struct socket *so, struct mbuf *m,
               const struct sadb_msghdr *mhp, u_int16_t proto)
{
        const struct sockaddr *src, *dst;
        struct secasindex saidx;
        struct secashead *sah;
        struct secasvar *sav;
        u_int state;
        int error;

        src = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_SRC);
        dst = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_DST);

        error = key_setsecasidx(proto, IPSEC_MODE_ANY, 0, src, dst, &saidx);
        if (error != 0)
                return key_senderror(so, m, EINVAL);

        error = key_set_natt_ports(&saidx.src, &saidx.dst, mhp);
        if (error != 0)
                return key_senderror(so, m, EINVAL);

        sah = key_getsah_ref(&saidx, CMP_HEAD);
        if (sah != NULL) {
                /* Delete all non-LARVAL SAs. */
                SASTATE_ALIVE_FOREACH(state) {
                        if (state == SADB_SASTATE_LARVAL)
                                continue;
                restart:
                        mutex_enter(&key_sad.lock);
                        SAVLIST_WRITER_FOREACH(sav, sah, state) {
                                sav->state = SADB_SASTATE_DEAD;
                                key_unlink_sav(sav);
                                mutex_exit(&key_sad.lock);
                                key_destroy_sav(sav);
                                goto restart;
                        }
                        mutex_exit(&key_sad.lock);
                }
                key_sah_unref(sah);
        }
    {
        struct mbuf *n;

        /* create new sadb_msg to reply. */
        n = key_gather_mbuf(m, mhp, 1, 3, SADB_EXT_RESERVED,
            SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST);

        key_fill_replymsg(n, 0);
        m_freem(m);
        return key_sendup_mbuf(so, n, KEY_SENDUP_ALL);
    }
}

/*
 * SADB_GET processing
 * receive
 *   <base, SA(*), address(SD)>
 * from the ikmpd, and get a SP and a SA to respond,
 * and send,
 *   <base, SA, (lifetime(HSC),) address(SD), (address(P),) key(AE),
 *       (identity(SD),) (sensitivity)>
 * to the ikmpd.
 *
 * m will always be freed.
 */
static int
key_api_get(struct socket *so, struct mbuf *m,
        const struct sadb_msghdr *mhp)
{
        struct sadb_sa *sa0;
        const struct sockaddr *src, *dst;
        struct secasindex saidx;
        struct secasvar *sav = NULL;
        u_int16_t proto;
        int error;

        /* map satype to proto */
        if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) {
                IPSECLOG(LOG_DEBUG, "invalid satype is passed.\n");
                return key_senderror(so, m, EINVAL);
        }

        if (mhp->ext[SADB_EXT_SA] == NULL ||
            mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
            mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) {
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }
        if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) ||
            mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
            mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) {
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }

        sa0 = mhp->ext[SADB_EXT_SA];
        src = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_SRC);
        dst = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_DST);

        error = key_setsecasidx(proto, IPSEC_MODE_ANY, 0, src, dst, &saidx);
        if (error != 0)
                return key_senderror(so, m, EINVAL);

        error = key_set_natt_ports(&saidx.src, &saidx.dst, mhp);
        if (error != 0)
                return key_senderror(so, m, EINVAL);

        /* get a SA header */
    {
        struct secashead *sah;
        int s = pserialize_read_enter();

        sah = key_getsah(&saidx, CMP_HEAD);
        if (sah != NULL) {
                /* get a SA with SPI. */
                sav = key_getsavbyspi(sah, sa0->sadb_sa_spi);
        }
        pserialize_read_exit(s);
    }
        if (sav == NULL) {
                IPSECLOG(LOG_DEBUG, "no SA found.\n");
                return key_senderror(so, m, ENOENT);
        }

    {
        struct mbuf *n;
        u_int8_t satype;

        /* map proto to satype */
        satype = key_proto2satype(sav->sah->saidx.proto);
        if (satype == 0) {
                KEY_SA_UNREF(&sav);
                IPSECLOG(LOG_DEBUG, "there was invalid proto in SAD.\n");
                return key_senderror(so, m, EINVAL);
        }

        /* create new sadb_msg to reply. */
        n = key_setdumpsa(sav, SADB_GET, satype, mhp->msg->sadb_msg_seq,
            mhp->msg->sadb_msg_pid);
        KEY_SA_UNREF(&sav);
        m_freem(m);
        return key_sendup_mbuf(so, n, KEY_SENDUP_ONE);
    }
}

/* XXX make it sysctl-configurable? */
static void
key_getcomb_setlifetime(struct sadb_comb *comb)
{

        comb->sadb_comb_soft_allocations = 1;
        comb->sadb_comb_hard_allocations = 1;
        comb->sadb_comb_soft_bytes = 0;
        comb->sadb_comb_hard_bytes = 0;
        comb->sadb_comb_hard_addtime = 86400;        /* 1 day */
        comb->sadb_comb_soft_addtime = comb->sadb_comb_hard_addtime * 80 / 100;
        comb->sadb_comb_hard_usetime = 28800;        /* 8 hours */
        comb->sadb_comb_soft_usetime = comb->sadb_comb_hard_usetime * 80 / 100;
}

/*
 * XXX reorder combinations by preference
 * XXX no idea if the user wants ESP authentication or not
 */
static struct mbuf *
key_getcomb_esp(int mflag)
{
        struct sadb_comb *comb;
        const struct enc_xform *algo;
        struct mbuf *result = NULL, *m, *n;
        int encmin;
        int i, off, o;
        int totlen;
        const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb));

        m = NULL;
        for (i = 1; i <= SADB_EALG_MAX; i++) {
                algo = esp_algorithm_lookup(i);
                if (algo == NULL)
                        continue;

                /* discard algorithms with key size smaller than system min */
                if (_BITS(algo->maxkey) < ipsec_esp_keymin)
                        continue;
                if (_BITS(algo->minkey) < ipsec_esp_keymin)
                        encmin = ipsec_esp_keymin;
                else
                        encmin = _BITS(algo->minkey);

                if (ipsec_esp_auth)
                        m = key_getcomb_ah(mflag);
                else {
                        KASSERTMSG(l <= MLEN,
                            "l=%u > MLEN=%lu", l, (u_long) MLEN);
                        MGET(m, mflag, MT_DATA);
                        if (m) {
                                m_align(m, l);
                                m->m_len = l;
                                m->m_next = NULL;
                                memset(mtod(m, void *), 0, m->m_len);
                        }
                }
                if (!m)
                        goto fail;

                totlen = 0;
                for (n = m; n; n = n->m_next)
                        totlen += n->m_len;
                KASSERTMSG((totlen % l) == 0, "totlen=%u, l=%u", totlen, l);

                for (off = 0; off < totlen; off += l) {
                        n = m_pulldown(m, off, l, &o);
                        if (!n) {
                                /* m is already freed */
                                goto fail;
                        }
                        comb = (struct sadb_comb *)(mtod(n, char *) + o);
                        memset(comb, 0, sizeof(*comb));
                        key_getcomb_setlifetime(comb);
                        comb->sadb_comb_encrypt = i;
                        comb->sadb_comb_encrypt_minbits = encmin;
                        comb->sadb_comb_encrypt_maxbits = _BITS(algo->maxkey);
                }

                if (!result)
                        result = m;
                else
                        m_cat(result, m);
        }

        return result;

 fail:
        if (result)
                m_freem(result);
        return NULL;
}

static void
key_getsizes_ah(const struct auth_hash *ah, int alg,
                u_int16_t* ksmin, u_int16_t* ksmax)
{
        *ksmin = *ksmax = ah->keysize;
        if (ah->keysize == 0) {
                /*
                 * Transform takes arbitrary key size but algorithm
                 * key size is restricted.  Enforce this here.
                 */
                switch (alg) {
                case SADB_X_AALG_MD5:        *ksmin = *ksmax = 16; break;
                case SADB_X_AALG_SHA:        *ksmin = *ksmax = 20; break;
                case SADB_X_AALG_NULL:        *ksmin = 0; *ksmax = 256; break;
                default:
                        IPSECLOG(LOG_DEBUG, "unknown AH algorithm %u\n", alg);
                        break;
                }
        }
}

/*
 * XXX reorder combinations by preference
 */
static struct mbuf *
key_getcomb_ah(int mflag)
{
        struct sadb_comb *comb;
        const struct auth_hash *algo;
        struct mbuf *m;
        u_int16_t minkeysize, maxkeysize;
        int i;
        const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb));

        m = NULL;
        for (i = 1; i <= SADB_AALG_MAX; i++) {
#if 1
                /* we prefer HMAC algorithms, not old algorithms */
                if (i != SADB_AALG_SHA1HMAC &&
                    i != SADB_AALG_MD5HMAC &&
                    i != SADB_X_AALG_SHA2_256 &&
                    i != SADB_X_AALG_SHA2_384 &&
                    i != SADB_X_AALG_SHA2_512)
                        continue;
#endif
                algo = ah_algorithm_lookup(i);
                if (!algo)
                        continue;
                key_getsizes_ah(algo, i, &minkeysize, &maxkeysize);
                /* discard algorithms with key size smaller than system min */
                if (_BITS(minkeysize) < ipsec_ah_keymin)
                        continue;

                if (!m) {
                        KASSERTMSG(l <= MLEN,
                            "l=%u > MLEN=%lu", l, (u_long) MLEN);
                        MGET(m, mflag, MT_DATA);
                        if (m) {
                                m_align(m, l);
                                m->m_len = l;
                                m->m_next = NULL;
                        }
                } else
                        M_PREPEND(m, l, mflag);
                if (!m)
                        return NULL;

                if (m->m_len < sizeof(struct sadb_comb)) {
                        m = m_pullup(m, sizeof(struct sadb_comb));
                        if (m == NULL)
                                return NULL;
                }

                comb = mtod(m, struct sadb_comb *);
                memset(comb, 0, sizeof(*comb));
                key_getcomb_setlifetime(comb);
                comb->sadb_comb_auth = i;
                comb->sadb_comb_auth_minbits = _BITS(minkeysize);
                comb->sadb_comb_auth_maxbits = _BITS(maxkeysize);
        }

        return m;
}

/*
 * not really an official behavior.  discussed in pf_key@inner.net in Sep2000.
 * XXX reorder combinations by preference
 */
static struct mbuf *
key_getcomb_ipcomp(int mflag)
{
        struct sadb_comb *comb;
        const struct comp_algo *algo;
        struct mbuf *m;
        int i;
        const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb));

        m = NULL;
        for (i = 1; i <= SADB_X_CALG_MAX; i++) {
                algo = ipcomp_algorithm_lookup(i);
                if (!algo)
                        continue;

                if (!m) {
                        KASSERTMSG(l <= MLEN,
                            "l=%u > MLEN=%lu", l, (u_long) MLEN);
                        MGET(m, mflag, MT_DATA);
                        if (m) {
                                m_align(m, l);
                                m->m_len = l;
                                m->m_next = NULL;
                        }
                } else
                        M_PREPEND(m, l, mflag);
                if (!m)
                        return NULL;

                if (m->m_len < sizeof(struct sadb_comb)) {
                        m = m_pullup(m, sizeof(struct sadb_comb));
                        if (m == NULL)
                                return NULL;
                }

                comb = mtod(m, struct sadb_comb *);
                memset(comb, 0, sizeof(*comb));
                key_getcomb_setlifetime(comb);
                comb->sadb_comb_encrypt = i;
                /* what should we set into sadb_comb_*_{min,max}bits? */
        }

        return m;
}

/*
 * XXX no way to pass mode (transport/tunnel) to userland
 * XXX replay checking?
 * XXX sysctl interface to ipsec_{ah,esp}_keymin
 */
static struct mbuf *
key_getprop(const struct secasindex *saidx, int mflag)
{
        struct sadb_prop *prop;
        struct mbuf *m, *n;
        const int l = PFKEY_ALIGN8(sizeof(struct sadb_prop));
        int totlen;

        switch (saidx->proto)  {
        case IPPROTO_ESP:
                m = key_getcomb_esp(mflag);
                break;
        case IPPROTO_AH:
                m = key_getcomb_ah(mflag);
                break;
        case IPPROTO_IPCOMP:
                m = key_getcomb_ipcomp(mflag);
                break;
        default:
                return NULL;
        }

        if (!m)
                return NULL;
        M_PREPEND(m, l, mflag);
        if (!m)
                return NULL;

        totlen = 0;
        for (n = m; n; n = n->m_next)
                totlen += n->m_len;

        prop = mtod(m, struct sadb_prop *);
        memset(prop, 0, sizeof(*prop));
        prop->sadb_prop_len = PFKEY_UNIT64(totlen);
        prop->sadb_prop_exttype = SADB_EXT_PROPOSAL;
        prop->sadb_prop_replay = 32;        /* XXX */

        return m;
}

/*
 * SADB_ACQUIRE processing called by key_checkrequest() and key_api_acquire().
 * send
 *   <base, SA, address(SD), (address(P)), x_policy,
 *       (identity(SD),) (sensitivity,) proposal>
 * to KMD, and expect to receive
 *   <base> with SADB_ACQUIRE if error occurred,
 * or
 *   <base, src address, dst address, (SPI range)> with SADB_GETSPI
 * from KMD by PF_KEY.
 *
 * XXX x_policy is outside of RFC2367 (KAME extension).
 * XXX sensitivity is not supported.
 * XXX for ipcomp, RFC2367 does not define how to fill in proposal.
 * see comment for key_getcomb_ipcomp().
 *
 * OUT:
 *    0     : succeed
 *    others: error number
 */
static int
key_acquire(const struct secasindex *saidx, const struct secpolicy *sp, int mflag)
{
        struct mbuf *result = NULL, *m;
#ifndef IPSEC_NONBLOCK_ACQUIRE
        struct secacq *newacq;
#endif
        u_int8_t satype;
        int error = -1;
        u_int32_t seq;

        /* sanity check */
        KASSERT(saidx != NULL);
        satype = key_proto2satype(saidx->proto);
        KASSERTMSG(satype != 0, "null satype, protocol %u", saidx->proto);

#ifndef IPSEC_NONBLOCK_ACQUIRE
        /*
         * We never do anything about acquiring SA.  There is another
         * solution that kernel blocks to send SADB_ACQUIRE message until
         * getting something message from IKEd.  In later case, to be
         * managed with ACQUIRING list.
         */
        /* Get an entry to check whether sending message or not. */
        mutex_enter(&key_misc.lock);
        newacq = key_getacq(saidx);
        if (newacq != NULL) {
                if (key_blockacq_count < newacq->count) {
                        /* reset counter and do send message. */
                        newacq->count = 0;
                } else {
                        /* increment counter and do nothing. */
                        newacq->count++;
                        mutex_exit(&key_misc.lock);
                        return 0;
                }
        } else {
                /* make new entry for blocking to send SADB_ACQUIRE. */
                newacq = key_newacq(saidx);
                if (newacq == NULL) {
                        mutex_exit(&key_misc.lock);
                        return ENOBUFS;
                }

                /* add to key_misc.acqlist */
                LIST_INSERT_HEAD(&key_misc.acqlist, newacq, chain);
        }

        seq = newacq->seq;
        mutex_exit(&key_misc.lock);
#else
        seq = (acq_seq = (acq_seq == ~0 ? 1 : ++acq_seq));
#endif
        m = key_setsadbmsg(SADB_ACQUIRE, 0, satype, seq, 0, 0, mflag);
        if (!m) {
                error = ENOBUFS;
                goto fail;
        }
        result = m;

        /* set sadb_address for saidx's. */
        m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &saidx->src.sa, FULLMASK,
            IPSEC_ULPROTO_ANY, mflag);
        if (!m) {
                error = ENOBUFS;
                goto fail;
        }
        m_cat(result, m);

        m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &saidx->dst.sa, FULLMASK,
            IPSEC_ULPROTO_ANY, mflag);
        if (!m) {
                error = ENOBUFS;
                goto fail;
        }
        m_cat(result, m);

        /* XXX proxy address (optional) */

        /* set sadb_x_policy */
        if (sp) {
                m = key_setsadbxpolicy(sp->policy, sp->spidx.dir, sp->id,
                    mflag);
                if (!m) {
                        error = ENOBUFS;
                        goto fail;
                }
                m_cat(result, m);
        }

        /* XXX identity (optional) */
#if 0
        if (idexttype && fqdn) {
                /* create identity extension (FQDN) */
                struct sadb_ident *id;
                int fqdnlen;

                fqdnlen = strlen(fqdn) + 1;        /* +1 for terminating-NUL */
                id = (struct sadb_ident *)p;
                memset(id, 0, sizeof(*id) + PFKEY_ALIGN8(fqdnlen));
                id->sadb_ident_len = PFKEY_UNIT64(sizeof(*id) + PFKEY_ALIGN8(fqdnlen));
                id->sadb_ident_exttype = idexttype;
                id->sadb_ident_type = SADB_IDENTTYPE_FQDN;
                memcpy(id + 1, fqdn, fqdnlen);
                p += sizeof(struct sadb_ident) + PFKEY_ALIGN8(fqdnlen);
        }

        if (idexttype) {
                /* create identity extension (USERFQDN) */
                struct sadb_ident *id;
                int userfqdnlen;

                if (userfqdn) {
                        /* +1 for terminating-NUL */
                        userfqdnlen = strlen(userfqdn) + 1;
                } else
                        userfqdnlen = 0;
                id = (struct sadb_ident *)p;
                memset(id, 0, sizeof(*id) + PFKEY_ALIGN8(userfqdnlen));
                id->sadb_ident_len = PFKEY_UNIT64(sizeof(*id) + PFKEY_ALIGN8(userfqdnlen));
                id->sadb_ident_exttype = idexttype;
                id->sadb_ident_type = SADB_IDENTTYPE_USERFQDN;
                /* XXX is it correct? */
                if (curlwp)
                        id->sadb_ident_id = kauth_cred_getuid(curlwp->l_cred);
                if (userfqdn && userfqdnlen)
                        memcpy(id + 1, userfqdn, userfqdnlen);
                p += sizeof(struct sadb_ident) + PFKEY_ALIGN8(userfqdnlen);
        }
#endif

        /* XXX sensitivity (optional) */

        /* create proposal/combination extension */
        m = key_getprop(saidx, mflag);
#if 0
        /*
         * spec conformant: always attach proposal/combination extension,
         * the problem is that we have no way to attach it for ipcomp,
         * due to the way sadb_comb is declared in RFC2367.
         */
        if (!m) {
                error = ENOBUFS;
                goto fail;
        }
        m_cat(result, m);
#else
        /*
         * outside of spec; make proposal/combination extension optional.
         */
        if (m)
                m_cat(result, m);
#endif

        KASSERT(result->m_flags & M_PKTHDR);
        KASSERT(result->m_len >= sizeof(struct sadb_msg));

        result->m_pkthdr.len = 0;
        for (m = result; m; m = m->m_next)
                result->m_pkthdr.len += m->m_len;

        mtod(result, struct sadb_msg *)->sadb_msg_len =
            PFKEY_UNIT64(result->m_pkthdr.len);

        /*
         * Called from key_api_acquire that must come from userland, so
         * we can call key_sendup_mbuf immediately.
         */
        if (mflag == M_WAITOK)
                return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED);
        /*
         * XXX we cannot call key_sendup_mbuf directly here because
         * it can cause a deadlock:
         * - We have a reference to an SP (and an SA) here
         * - key_sendup_mbuf will try to take key_so_mtx
         * - Some other thread may try to localcount_drain to the SP with
         *   holding key_so_mtx in say key_api_spdflush
         * - In this case localcount_drain never return because key_sendup_mbuf
         *   that has stuck on key_so_mtx never release a reference to the SP
         *
         * So defer key_sendup_mbuf to the timer.
         */
        return key_acquire_sendup_mbuf_later(result);

 fail:
        if (result)
                m_freem(result);
        return error;
}

static struct mbuf *key_acquire_mbuf_head = NULL;
static unsigned key_acquire_mbuf_count = 0;
#define KEY_ACQUIRE_MBUF_MAX        10

static void
key_acquire_sendup_pending_mbuf(void)
{
        struct mbuf *m, *prev;
        int error;

again:
        prev = NULL;
        mutex_enter(&key_misc.lock);
        m = key_acquire_mbuf_head;
        /* Get an earliest mbuf (one at the tail of the list) */
        while (m != NULL) {
                if (m->m_nextpkt == NULL) {
                        if (prev != NULL)
                                prev->m_nextpkt = NULL;
                        if (m == key_acquire_mbuf_head)
                                key_acquire_mbuf_head = NULL;
                        key_acquire_mbuf_count--;
                        break;
                }
                prev = m;
                m = m->m_nextpkt;
        }
        mutex_exit(&key_misc.lock);

        if (m == NULL)
                return;

        m->m_nextpkt = NULL;
        error = key_sendup_mbuf(NULL, m, KEY_SENDUP_REGISTERED);
        if (error != 0)
                IPSECLOG(LOG_WARNING, "key_sendup_mbuf failed (error=%d)\n",
                    error);

        if (prev != NULL)
                goto again;
}

static int
key_acquire_sendup_mbuf_later(struct mbuf *m)
{

        mutex_enter(&key_misc.lock);
        /* Avoid queuing too much mbufs */
        if (key_acquire_mbuf_count >= KEY_ACQUIRE_MBUF_MAX) {
                mutex_exit(&key_misc.lock);
                m_freem(m);
                return ENOBUFS; /* XXX */
        }
        /* Enqueue mbuf at the head of the list */
        m->m_nextpkt = key_acquire_mbuf_head;
        key_acquire_mbuf_head = m;
        key_acquire_mbuf_count++;
        mutex_exit(&key_misc.lock);

        /* Kick the timer */
        key_timehandler(NULL);

        return 0;
}

#ifndef IPSEC_NONBLOCK_ACQUIRE
static struct secacq *
key_newacq(const struct secasindex *saidx)
{
        struct secacq *newacq;

        /* get new entry */
        newacq = kmem_intr_zalloc(sizeof(struct secacq), KM_NOSLEEP);
        if (newacq == NULL) {
                IPSECLOG(LOG_DEBUG, "No more memory.\n");
                return NULL;
        }

        /* copy secindex */
        memcpy(&newacq->saidx, saidx, sizeof(newacq->saidx));
        newacq->seq = (acq_seq == ~0 ? 1 : ++acq_seq);
        newacq->created = time_uptime;
        newacq->count = 0;

        return newacq;
}

static struct secacq *
key_getacq(const struct secasindex *saidx)
{
        struct secacq *acq;

        KASSERT(mutex_owned(&key_misc.lock));

        LIST_FOREACH(acq, &key_misc.acqlist, chain) {
                if (key_saidx_match(saidx, &acq->saidx, CMP_EXACTLY))
                        return acq;
        }

        return NULL;
}

static struct secacq *
key_getacqbyseq(u_int32_t seq)
{
        struct secacq *acq;

        KASSERT(mutex_owned(&key_misc.lock));

        LIST_FOREACH(acq, &key_misc.acqlist, chain) {
                if (acq->seq == seq)
                        return acq;
        }

        return NULL;
}
#endif

#ifdef notyet
static struct secspacq *
key_newspacq(const struct secpolicyindex *spidx)
{
        struct secspacq *acq;

        /* get new entry */
        acq = kmem_intr_zalloc(sizeof(struct secspacq), KM_NOSLEEP);
        if (acq == NULL) {
                IPSECLOG(LOG_DEBUG, "No more memory.\n");
                return NULL;
        }

        /* copy secindex */
        memcpy(&acq->spidx, spidx, sizeof(acq->spidx));
        acq->created = time_uptime;
        acq->count = 0;

        return acq;
}

static struct secspacq *
key_getspacq(const struct secpolicyindex *spidx)
{
        struct secspacq *acq;

        LIST_FOREACH(acq, &key_misc.spacqlist, chain) {
                if (key_spidx_match_exactly(spidx, &acq->spidx))
                        return acq;
        }

        return NULL;
}
#endif /* notyet */

/*
 * SADB_ACQUIRE processing,
 * in first situation, is receiving
 *   <base>
 * from the ikmpd, and clear sequence of its secasvar entry.
 *
 * In second situation, is receiving
 *   <base, address(SD), (address(P),) (identity(SD),) (sensitivity,) proposal>
 * from a user land process, and return
 *   <base, address(SD), (address(P),) (identity(SD),) (sensitivity,) proposal>
 * to the socket.
 *
 * m will always be freed.
 */
static int
key_api_acquire(struct socket *so, struct mbuf *m,
                   const struct sadb_msghdr *mhp)
{
        const struct sockaddr *src, *dst;
        struct secasindex saidx;
        u_int16_t proto;
        int error;

        /*
         * Error message from KMd.
         * We assume that if error was occurred in IKEd, the length of PFKEY
         * message is equal to the size of sadb_msg structure.
         * We do not raise error even if error occurred in this function.
         */
        if (mhp->msg->sadb_msg_len == PFKEY_UNIT64(sizeof(struct sadb_msg))) {
#ifndef IPSEC_NONBLOCK_ACQUIRE
                struct secacq *acq;

                /* check sequence number */
                if (mhp->msg->sadb_msg_seq == 0) {
                        IPSECLOG(LOG_DEBUG, "must specify sequence number.\n");
                        m_freem(m);
                        return 0;
                }

                mutex_enter(&key_misc.lock);
                acq = key_getacqbyseq(mhp->msg->sadb_msg_seq);
                if (acq == NULL) {
                        mutex_exit(&key_misc.lock);
                        /*
                         * the specified larval SA is already gone, or we got
                         * a bogus sequence number.  we can silently ignore it.
                         */
                        m_freem(m);
                        return 0;
                }

                /* reset acq counter in order to deletion by timehander. */
                acq->created = time_uptime;
                acq->count = 0;
                mutex_exit(&key_misc.lock);
#endif
                m_freem(m);
                return 0;
        }

        /*
         * This message is from user land.
         */

        /* map satype to proto */
        proto = key_satype2proto(mhp->msg->sadb_msg_satype);
        if (proto == 0) {
                IPSECLOG(LOG_DEBUG, "invalid satype is passed.\n");
                return key_senderror(so, m, EINVAL);
        }

        if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL ||
            mhp->ext[SADB_EXT_ADDRESS_DST] == NULL ||
            mhp->ext[SADB_EXT_PROPOSAL] == NULL) {
                /* error */
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }
        if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) ||
            mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) ||
            mhp->extlen[SADB_EXT_PROPOSAL] < sizeof(struct sadb_prop)) {
                /* error */
                IPSECLOG(LOG_DEBUG, "invalid message is passed.\n");
                return key_senderror(so, m, EINVAL);
        }

        src = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_SRC);
        dst = key_msghdr_get_sockaddr(mhp, SADB_EXT_ADDRESS_DST);

        error = key_setsecasidx(proto, IPSEC_MODE_ANY, 0, src, dst, &saidx);
        if (error != 0)
                return key_senderror(so, m, EINVAL);

        error = key_set_natt_ports(&saidx.src, &saidx.dst, mhp);
        if (error != 0)
                return key_senderror(so, m, EINVAL);

        /* get a SA index */
    {
        struct secashead *sah;
        int s = pserialize_read_enter();

        sah = key_getsah(&saidx, CMP_MODE_REQID);
        if (sah != NULL) {
                pserialize_read_exit(s);
                IPSECLOG(LOG_DEBUG, "a SA exists already.\n");
                return key_senderror(so, m, EEXIST);
        }
        pserialize_read_exit(s);
    }

        error = key_acquire(&saidx, NULL, M_WAITOK);
        if (error != 0) {
                IPSECLOG(LOG_DEBUG, "error %d returned from key_acquire.\n",
                    error);
                return key_senderror(so, m, error);
        }

        return key_sendup_mbuf(so, m, KEY_SENDUP_REGISTERED);
}

/*
 * SADB_REGISTER processing.
 * If SATYPE_UNSPEC has been passed as satype, only return sabd_supported.
 * receive
 *   <base>
 * from the ikmpd, and register a socket to send PF_KEY messages,
 * and send
 *   <base, supported>
 * to KMD by PF_KEY.
 * If socket is detached, must free from regnode.
 *
 * m will always be freed.
 */
static int
key_api_register(struct socket *so, struct mbuf *m,
             const struct sadb_msghdr *mhp)
{
        struct secreg *reg, *newreg = 0;

        /* check for invalid register message */
        if (mhp->msg->sadb_msg_satype >= __arraycount(key_misc.reglist))
                return key_senderror(so, m, EINVAL);

        /* When SATYPE_UNSPEC is specified, only return sabd_supported. */
        if (mhp->msg->sadb_msg_satype == SADB_SATYPE_UNSPEC)
                goto setmsg;

        /* Allocate regnode in advance, out of mutex */
        newreg = kmem_zalloc(sizeof(*newreg), KM_SLEEP);

        /* check whether existing or not */
        mutex_enter(&key_misc.lock);
        LIST_FOREACH(reg, &key_misc.reglist[mhp->msg->sadb_msg_satype], chain) {
                if (reg->so == so) {
                        IPSECLOG(LOG_DEBUG, "socket exists already.\n");
                        mutex_exit(&key_misc.lock);
                        kmem_free(newreg, sizeof(*newreg));
                        return key_senderror(so, m, EEXIST);
                }
        }

        newreg->so = so;
        ((struct keycb *)sotorawcb(so))->kp_registered++;

        /* add regnode to key_misc.reglist. */
        LIST_INSERT_HEAD(&key_misc.reglist[mhp->msg->sadb_msg_satype], newreg, chain);
        mutex_exit(&key_misc.lock);

  setmsg:
    {
        struct mbuf *n;
        struct sadb_supported *sup;
        u_int len, alen, elen;
        int off;
        int i;
        struct sadb_alg *alg;

        /* create new sadb_msg to reply. */
        alen = 0;
        for (i = 1; i <= SADB_AALG_MAX; i++) {
                if (ah_algorithm_lookup(i))
                        alen += sizeof(struct sadb_alg);
        }
        if (alen)
                alen += sizeof(struct sadb_supported);
        elen = 0;
        for (i = 1; i <= SADB_EALG_MAX; i++) {
                if (esp_algorithm_lookup(i))
                        elen += sizeof(struct sadb_alg);
        }
        if (elen)
                elen += sizeof(struct sadb_supported);

        len = sizeof(struct sadb_msg) + alen + elen;

        if (len > MCLBYTES)
                return key_senderror(so, m, ENOBUFS);

        n = key_alloc_mbuf_simple(len, M_WAITOK);
        n->m_pkthdr.len = n->m_len = len;
        n->m_next = NULL;
        off = 0;

        m_copydata(m, 0, sizeof(struct sadb_msg), mtod(n, char *) + off);
        key_fill_replymsg(n, 0);

        off += PFKEY_ALIGN8(sizeof(struct sadb_msg));

        /* for authentication algorithm */
        if (alen) {
                sup = (struct sadb_supported *)(mtod(n, char *) + off);
                sup->sadb_supported_len = PFKEY_UNIT64(alen);
                sup->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
                sup->sadb_supported_reserved = 0;
                off += PFKEY_ALIGN8(sizeof(*sup));

                for (i = 1; i <= SADB_AALG_MAX; i++) {
                        const struct auth_hash *aalgo;
                        u_int16_t minkeysize, maxkeysize;

                        aalgo = ah_algorithm_lookup(i);
                        if (!aalgo)
                                continue;
                        alg = (struct sadb_alg *)(mtod(n, char *) + off);
                        alg->sadb_alg_id = i;
                        alg->sadb_alg_ivlen = 0;
                        key_getsizes_ah(aalgo, i, &minkeysize, &maxkeysize);
                        alg->sadb_alg_minbits = _BITS(minkeysize);
                        alg->sadb_alg_maxbits = _BITS(maxkeysize);
                        alg->sadb_alg_reserved = 0;
                        off += PFKEY_ALIGN8(sizeof(*alg));
                }
        }

        /* for encryption algorithm */
        if (elen) {
                sup = (struct sadb_supported *)(mtod(n, char *) + off);
                sup->sadb_supported_len = PFKEY_UNIT64(elen);
                sup->sadb_supported_exttype = SADB_EXT_SUPPORTED_ENCRYPT;
                sup->sadb_supported_reserved = 0;
                off += PFKEY_ALIGN8(sizeof(*sup));

                for (i = 1; i <= SADB_EALG_MAX; i++) {
                        const struct enc_xform *ealgo;

                        ealgo = esp_algorithm_lookup(i);
                        if (!ealgo)
                                continue;
                        alg = (struct sadb_alg *)(mtod(n, char *) + off);
                        alg->sadb_alg_id = i;
                        alg->sadb_alg_ivlen = ealgo->blocksize;
                        alg->sadb_alg_minbits = _BITS(ealgo->minkey);
                        alg->sadb_alg_maxbits = _BITS(ealgo->maxkey);
                        alg->sadb_alg_reserved = 0;
                        off += PFKEY_ALIGN8(sizeof(struct sadb_alg));
                }
        }

        KASSERTMSG(off == len, "length inconsistency");

        m_freem(m);
        return key_sendup_mbuf(so, n, KEY_SENDUP_REGISTERED);
    }
}

/*
 * free secreg entry registered.
 * XXX: I want to do free a socket marked done SADB_RESIGER to socket.
 */
void
key_freereg(struct socket *so)
{
        struct secreg *reg;
        int i;

        KASSERT(!cpu_softintr_p());
        KASSERT(so != NULL);

        /*
         * check whether existing or not.
         * check all type of SA, because there is a potential that
         * one socket is registered to multiple type of SA.
         */
        for (i = 0; i <= SADB_SATYPE_MAX; i++) {
                mutex_enter(&key_misc.lock);
                LIST_FOREACH(reg, &key_misc.reglist[i], chain) {
                        if (reg->so == so) {
                                LIST_REMOVE(reg, chain);
                                break;
                        }
                }
                mutex_exit(&key_misc.lock);
                if (reg != NULL)
                        kmem_free(reg, sizeof(*reg));
        }

        return;
}

/*
 * SADB_EXPIRE processing
 * send
 *   <base, SA, SA2, lifetime(C and one of HS), address(SD)>
 * to KMD by PF_KEY.
 * NOTE: We send only soft lifetime extension.
 *
 * OUT:        0        : succeed
 *        others        : error number
 */
static int
key_expire(struct secasvar *sav)
{
        int s;
        int satype;
        struct mbuf *result = NULL, *m;
        int len;
        int error = -1;
        struct sadb_lifetime *lt;
        lifetime_counters_t sum = {0};

        /* XXX: Why do we lock ? */
        s = splsoftnet();        /*called from softclock()*/

        KASSERT(sav != NULL);

        satype = key_proto2satype(sav->sah->saidx.proto);
        KASSERTMSG(satype != 0, "invalid proto is passed");

        /* set msg header */
        m = key_setsadbmsg(SADB_EXPIRE, 0, satype, sav->seq, 0, key_sa_refcnt(sav),
            M_WAITOK);
        result = m;

        /* create SA extension */
        m = key_setsadbsa(sav);
        m_cat(result, m);

        /* create SA extension */
        m = key_setsadbxsa2(sav->sah->saidx.mode,
            sav->replay ? sav->replay->count : 0, sav->sah->saidx.reqid);
        m_cat(result, m);

        /* create lifetime extension (current and soft) */
        len = PFKEY_ALIGN8(sizeof(*lt)) * 2;
        m = key_alloc_mbuf(len, M_WAITOK);
        KASSERT(m->m_next == NULL);

        memset(mtod(m, void *), 0, len);
        lt = mtod(m, struct sadb_lifetime *);
        lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime));
        lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
        percpu_foreach_xcall(sav->lft_c_counters_percpu,
            XC_HIGHPRI_IPL(IPL_SOFTNET), key_sum_lifetime_counters, sum);
        lt->sadb_lifetime_allocations = sum[LIFETIME_COUNTER_ALLOCATIONS];
        lt->sadb_lifetime_bytes = sum[LIFETIME_COUNTER_BYTES];
        lt->sadb_lifetime_addtime =
            time_mono_to_wall(sav->lft_c->sadb_lifetime_addtime);
        lt->sadb_lifetime_usetime =
            time_mono_to_wall(sav->lft_c->sadb_lifetime_usetime);
        lt = (struct sadb_lifetime *)(mtod(m, char *) + len / 2);
        memcpy(lt, sav->lft_s, sizeof(*lt));
        m_cat(result, m);

        /* set sadb_address for source */
        m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &sav->sah->saidx.src.sa,
            FULLMASK, IPSEC_ULPROTO_ANY, M_WAITOK);
        m_cat(result, m);

        /* set sadb_address for destination */
        m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, &sav->sah->saidx.dst.sa,
            FULLMASK, IPSEC_ULPROTO_ANY, M_WAITOK);
        m_cat(result, m);

        if ((result->m_flags & M_PKTHDR) == 0) {
                error = EINVAL;
                goto fail;
        }

        if (result->m_len < sizeof(struct sadb_msg)) {
                result = m_pullup(result, sizeof(struct sadb_msg));
                if (result == NULL) {
                        error = ENOBUFS;
                        goto fail;
                }
        }

        result->m_pkthdr.len = 0;
        for (m = result; m; m = m->m_next)
                result->m_pkthdr.len += m->m_len;

        mtod(result, struct sadb_msg *)->sadb_msg_len =
            PFKEY_UNIT64(result->m_pkthdr.len);

        splx(s);
        return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED);

 fail:
        if (result)
                m_freem(result);
        splx(s);
        return error;
}

/*
 * SADB_FLUSH processing
 * receive
 *   <base>
 * from the ikmpd, and free all entries in secastree.
 * and send,
 *   <base>
 * to the ikmpd.
 * NOTE: to do is only marking SADB_SASTATE_DEAD.
 *
 * m will always be freed.
 */
static int
key_api_flush(struct socket *so, struct mbuf *m,
          const struct sadb_msghdr *mhp)
{
        struct sadb_msg *newmsg;
        struct secashead *sah;
        struct secasvar *sav;
        u_int16_t proto;
        u_int8_t state;
        int s;

        /* map satype to proto */
        proto = key_satype2proto(mhp->msg->sadb_msg_satype);
        if (proto == 0) {
                IPSECLOG(LOG_DEBUG, "invalid satype is passed.\n");
                return key_senderror(so, m, EINVAL);
        }

        /* no SATYPE specified, i.e. flushing all SA. */
        s = pserialize_read_enter();
        SAHLIST_READER_FOREACH(sah) {
                if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC &&
                    proto != sah->saidx.proto)
                        continue;

                key_sah_ref(sah);
                pserialize_read_exit(s);

                SASTATE_ALIVE_FOREACH(state) {
                restart:
                        mutex_enter(&key_sad.lock);
                        SAVLIST_WRITER_FOREACH(sav, sah, state) {
                                sav->state = SADB_SASTATE_DEAD;
                                key_unlink_sav(sav);
                                mutex_exit(&key_sad.lock);
                                key_destroy_sav(sav);
                                goto restart;
                        }
                        mutex_exit(&key_sad.lock);
                }

                s = pserialize_read_enter();
                sah->state = SADB_SASTATE_DEAD;
                key_sah_unref(sah);
        }
        pserialize_read_exit(s);

        if (m->m_len < sizeof(struct sadb_msg) ||
            sizeof(struct sadb_msg) > m->m_len + M_TRAILINGSPACE(m)) {
                IPSECLOG(LOG_DEBUG, "No more memory.\n");
                return key_senderror(so, m, ENOBUFS);
        }

        if (m->m_next)
                m_freem(m->m_next);
        m->m_next = NULL;
        m->m_pkthdr.len = m->m_len = sizeof(struct sadb_msg);
        newmsg = mtod(m, struct sadb_msg *);
        newmsg->sadb_msg_errno = 0;
        newmsg->sadb_msg_len = PFKEY_UNIT64(m->m_pkthdr.len);

        return key_sendup_mbuf(so, m, KEY_SENDUP_ALL);
}


static struct mbuf *
key_setdump_chain(u_int8_t req_satype, int *errorp, int *lenp, pid_t pid)
{
        struct secashead *sah;
        struct secasvar *sav;
        u_int16_t proto;
        u_int8_t satype;
        u_int8_t state;
        int cnt;
        struct mbuf *m, *n, *prev;

        KASSERT(mutex_owned(&key_sad.lock));

        *lenp = 0;

        /* map satype to proto */
        proto = key_satype2proto(req_satype);
        if (proto == 0) {
                *errorp = EINVAL;
                return (NULL);
        }

        /* count sav entries to be sent to userland. */
        cnt = 0;
        SAHLIST_WRITER_FOREACH(sah) {
                if (req_satype != SADB_SATYPE_UNSPEC &&
                    proto != sah->saidx.proto)
                        continue;

                SASTATE_ANY_FOREACH(state) {
                        SAVLIST_WRITER_FOREACH(sav, sah, state) {
                                cnt++;
                        }
                }
        }

        if (cnt == 0) {
                *errorp = ENOENT;
                return (NULL);
        }

        /* send this to the userland, one at a time. */
        m = NULL;
        prev = m;
        SAHLIST_WRITER_FOREACH(sah) {
                if (req_satype != SADB_SATYPE_UNSPEC &&
                    proto != sah->saidx.proto)
                        continue;

                /* map proto to satype */
                satype = key_proto2satype(sah->saidx.proto);
                if (satype == 0) {
                        m_freem(m);
                        *errorp = EINVAL;
                        return (NULL);
                }

                SASTATE_ANY_FOREACH(state) {
                        SAVLIST_WRITER_FOREACH(sav, sah, state) {
                                n = key_setdumpsa(sav, SADB_DUMP, satype,
                                    --cnt, pid);
                                if (!m)
                                        m = n;
                                else
                                        prev->m_nextpkt = n;
                                prev = n;
                        }
                }
        }

        if (!m) {
                *errorp = EINVAL;
                return (NULL);
        }

        if ((m->m_flags & M_PKTHDR) != 0) {
                m->m_pkthdr.len = 0;
                for (n = m; n; n = n->m_next)
                        m->m_pkthdr.len += n->m_len;
        }

        *errorp = 0;
        return (m);
}

/*
 * SADB_DUMP processing
 * dump all entries including status of DEAD in SAD.
 * receive
 *   <base>
 * from the ikmpd, and dump all secasvar leaves
 * and send,
 *   <base> .....
 * to the ikmpd.
 *
 * m will always be freed.
 */
static int
key_api_dump(struct socket *so, struct mbuf *m0,
         const struct sadb_msghdr *mhp)
{
        u_int16_t proto;
        u_int8_t satype;
        struct mbuf *n;
        int error, len, ok;

        /* map satype to proto */
        satype = mhp->msg->sadb_msg_satype;
        proto = key_satype2proto(satype);
        if (proto == 0) {
                IPSECLOG(LOG_DEBUG, "invalid satype is passed.\n");
                return key_senderror(so, m0, EINVAL);
        }

        /*
         * If the requestor has insufficient socket-buffer space
         * for the entire chain, nobody gets any response to the DUMP.
         * XXX For now, only the requestor ever gets anything.
         * Moreover, if the requestor has any space at all, they receive
         * the entire chain, otherwise the request is refused with ENOBUFS.
         */
        if (sbspace(&so->so_rcv) <= 0) {
                return key_senderror(so, m0, ENOBUFS);
        }

        mutex_enter(&key_sad.lock);
        n = key_setdump_chain(satype, &error, &len, mhp->msg->sadb_msg_pid);
        mutex_exit(&key_sad.lock);

        if (n == NULL) {
                return key_senderror(so, m0, ENOENT);
        }
        {
                uint64_t *ps = PFKEY_STAT_GETREF();
                ps[PFKEY_STAT_IN_TOTAL]++;
                ps[PFKEY_STAT_IN_BYTES] += len;
                PFKEY_STAT_PUTREF();
        }

        /*
         * PF_KEY DUMP responses are no longer broadcast to all PF_KEY sockets.
         * The requestor receives either the entire chain, or an
         * error message with ENOBUFS.
         *
         * sbappendaddrchain() takes the chain of entries, one
         * packet-record per SPD entry, prepends the key_src sockaddr
         * to each packet-record, links the sockaddr mbufs into a new
         * list of records, then   appends the entire resulting
         * list to the requesting socket.
         */
        ok = sbappendaddrchain(&so->so_rcv, (struct sockaddr *)&key_src, n,
            SB_PRIO_ONESHOT_OVERFLOW);

        if (!ok) {
                PFKEY_STATINC(PFKEY_STAT_IN_NOMEM);
                m_freem(n);
                return key_senderror(so, m0, ENOBUFS);
        }

        m_freem(m0);
        return 0;
}

/*
 * SADB_X_PROMISC processing
 *
 * m will always be freed.
 */
static int
key_api_promisc(struct socket *so, struct mbuf *m,
            const struct sadb_msghdr *mhp)
{
        int olen;

        olen = PFKEY_UNUNIT64(mhp->msg->sadb_msg_len);

        if (olen < sizeof(struct sadb_msg)) {
#if 1
                return key_senderror(so, m, EINVAL);
#else
                m_freem(m);
                return 0;
#endif
        } else if (olen == sizeof(struct sadb_msg)) {
                /* enable/disable promisc mode */
                struct keycb *kp = (struct keycb *)sotorawcb(so);
                if (kp == NULL)
                        return key_senderror(so, m, EINVAL);
                mhp->msg->sadb_msg_errno = 0;
                switch (mhp->msg->sadb_msg_satype) {
                case 0:
                case 1:
                        kp->kp_promisc = mhp->msg->sadb_msg_satype;
                        break;
                default:
                        return key_senderror(so, m, EINVAL);
                }

                /* send the original message back to everyone */
                mhp->msg->sadb_msg_errno = 0;
                return key_sendup_mbuf(so, m, KEY_SENDUP_ALL);
        } else {
                /* send packet as is */

                m_adj(m, PFKEY_ALIGN8(sizeof(struct sadb_msg)));

                /* TODO: if sadb_msg_seq is specified, send to specific pid */
                return key_sendup_mbuf(so, m, KEY_SENDUP_ALL);
        }
}

static int (*key_api_typesw[]) (struct socket *, struct mbuf *,
                const struct sadb_msghdr *) = {
        NULL,                        /* SADB_RESERVED */
        key_api_getspi,                /* SADB_GETSPI */
        key_api_update,                /* SADB_UPDATE */
        key_api_add,                /* SADB_ADD */
        key_api_delete,                /* SADB_DELETE */
        key_api_get,                /* SADB_GET */
        key_api_acquire,        /* SADB_ACQUIRE */
        key_api_register,        /* SADB_REGISTER */
        NULL,                        /* SADB_EXPIRE */
        key_api_flush,                /* SADB_FLUSH */
        key_api_dump,                /* SADB_DUMP */
        key_api_promisc,        /* SADB_X_PROMISC */
        NULL,                        /* SADB_X_PCHANGE */
        key_api_spdadd,                /* SADB_X_SPDUPDATE */
        key_api_spdadd,                /* SADB_X_SPDADD */
        key_api_spddelete,        /* SADB_X_SPDDELETE */
        key_api_spdget,                /* SADB_X_SPDGET */
        NULL,                        /* SADB_X_SPDACQUIRE */
        key_api_spddump,        /* SADB_X_SPDDUMP */
        key_api_spdflush,        /* SADB_X_SPDFLUSH */
        key_api_spdadd,                /* SADB_X_SPDSETIDX */
        NULL,                        /* SADB_X_SPDEXPIRE */
        key_api_spddelete2,        /* SADB_X_SPDDELETE2 */
        key_api_nat_map,        /* SADB_X_NAT_T_NEW_MAPPING */
};

/*
 * parse sadb_msg buffer to process PFKEYv2,
 * and create a data to response if needed.
 * I think to be dealed with mbuf directly.
 * IN:
 *     msgp  : pointer to pointer to a received buffer pulluped.
 *             This is rewrited to response.
 *     so    : pointer to socket.
 * OUT:
 *    length for buffer to send to user process.
 */
int
key_parse(struct mbuf *m, struct socket *so)
{
        struct sadb_msg *msg;
        struct sadb_msghdr mh;
        u_int orglen;
        int error;

        KASSERT(m != NULL);
        KASSERT(so != NULL);

#if 0        /*kdebug_sadb assumes msg in linear buffer*/
        if (KEYDEBUG_ON(KEYDEBUG_KEY_DUMP)) {
                kdebug_sadb("passed sadb_msg", msg);
        }
#endif

        if (m->m_len < sizeof(struct sadb_msg)) {
                m = m_pullup(m, sizeof(struct sadb_msg));
                if (!m)
                        return ENOBUFS;
        }
        msg = mtod(m, struct sadb_msg *);
        orglen = PFKEY_UNUNIT64(msg->sadb_msg_len);

        if ((m->m_flags & M_PKTHDR) == 0 ||
            m->m_pkthdr.len != orglen) {
                IPSECLOG(LOG_DEBUG, "invalid message length.\n");
                PFKEY_STATINC(PFKEY_STAT_OUT_INVLEN);
                error = EINVAL;
                goto senderror;
        }

        if (msg->sadb_msg_version != PF_KEY_V2) {
                IPSECLOG(LOG_DEBUG, "PF_KEY version %u is mismatched.\n",
                    msg->sadb_msg_version);
                PFKEY_STATINC(PFKEY_STAT_OUT_INVVER);
                error = EINVAL;
                goto senderror;
        }

        if (msg->sadb_msg_type > SADB_MAX) {
                IPSECLOG(LOG_DEBUG, "invalid type %u is passed.\n",
                    msg->sadb_msg_type);
                PFKEY_STATINC(PFKEY_STAT_OUT_INVMSGTYPE);
                error = EINVAL;
                goto senderror;
        }

        /* for old-fashioned code - should be nuked */
        if (m->m_pkthdr.len > MCLBYTES) {
                m_freem(m);
                return ENOBUFS;
        }
        if (m->m_next) {
                struct mbuf *n;

                n = key_alloc_mbuf_simple(m->m_pkthdr.len, M_WAITOK);

                m_copydata(m, 0, m->m_pkthdr.len, mtod(n, void *));
                n->m_pkthdr.len = n->m_len = m->m_pkthdr.len;
                n->m_next = NULL;
                m_freem(m);
                m = n;
        }

        /* align the mbuf chain so that extensions are in contiguous region. */
        error = key_align(m, &mh);
        if (error)
                return error;

        if (m->m_next) {        /*XXX*/
                m_freem(m);
                return ENOBUFS;
        }

        msg = mh.msg;

        /* check SA type */
        switch (msg->sadb_msg_satype) {
        case SADB_SATYPE_UNSPEC:
                switch (msg->sadb_msg_type) {
                case SADB_GETSPI:
                case SADB_UPDATE:
                case SADB_ADD:
                case SADB_DELETE:
                case SADB_GET:
                case SADB_ACQUIRE:
                case SADB_EXPIRE:
                        IPSECLOG(LOG_DEBUG,
                            "must specify satype when msg type=%u.\n",
                            msg->sadb_msg_type);
                        PFKEY_STATINC(PFKEY_STAT_OUT_INVSATYPE);
                        error = EINVAL;
                        goto senderror;
                }
                break;
        case SADB_SATYPE_AH:
        case SADB_SATYPE_ESP:
        case SADB_X_SATYPE_IPCOMP:
        case SADB_X_SATYPE_TCPSIGNATURE:
                switch (msg->sadb_msg_type) {
                case SADB_X_SPDADD:
                case SADB_X_SPDDELETE:
                case SADB_X_SPDGET:
                case SADB_X_SPDDUMP:
                case SADB_X_SPDFLUSH:
                case SADB_X_SPDSETIDX:
                case SADB_X_SPDUPDATE:
                case SADB_X_SPDDELETE2:
                        IPSECLOG(LOG_DEBUG, "illegal satype=%u\n",
                            msg->sadb_msg_type);
                        PFKEY_STATINC(PFKEY_STAT_OUT_INVSATYPE);
                        error = EINVAL;
                        goto senderror;
                }
                break;
        case SADB_SATYPE_RSVP:
        case SADB_SATYPE_OSPFV2:
        case SADB_SATYPE_RIPV2:
        case SADB_SATYPE_MIP:
                IPSECLOG(LOG_DEBUG, "type %u isn't supported.\n",
                    msg->sadb_msg_satype);
                PFKEY_STATINC(PFKEY_STAT_OUT_INVSATYPE);
                error = EOPNOTSUPP;
                goto senderror;
        case 1:        /* XXX: What does it do? */
                if (msg->sadb_msg_type == SADB_X_PROMISC)
                        break;
                /*FALLTHROUGH*/
        default:
                IPSECLOG(LOG_DEBUG, "invalid type %u is passed.\n",
                    msg->sadb_msg_satype);
                PFKEY_STATINC(PFKEY_STAT_OUT_INVSATYPE);
                error = EINVAL;
                goto senderror;
        }

        /* check field of upper layer protocol and address family */
        if (mh.ext[SADB_EXT_ADDRESS_SRC] != NULL &&
            mh.ext[SADB_EXT_ADDRESS_DST] != NULL) {
                const struct sadb_address *src0, *dst0;
                const struct sockaddr *sa0, *da0;
                u_int plen;

                src0 = mh.ext[SADB_EXT_ADDRESS_SRC];
                dst0 = mh.ext[SADB_EXT_ADDRESS_DST];
                sa0 = key_msghdr_get_sockaddr(&mh, SADB_EXT_ADDRESS_SRC);
                da0 = key_msghdr_get_sockaddr(&mh, SADB_EXT_ADDRESS_DST);

                /* check upper layer protocol */
                if (src0->sadb_address_proto != dst0->sadb_address_proto) {
                        IPSECLOG(LOG_DEBUG,
                            "upper layer protocol mismatched src %u, dst %u.\n",
                            src0->sadb_address_proto, dst0->sadb_address_proto);

                        goto invaddr;
                }

                /* check family */
                if (sa0->sa_family != da0->sa_family) {
                        IPSECLOG(LOG_DEBUG,
                            "address family mismatched src %u, dst %u.\n",
                            sa0->sa_family, da0->sa_family);
                        goto invaddr;
                }
                if (sa0->sa_len != da0->sa_len) {
                        IPSECLOG(LOG_DEBUG,
                            "address size mismatched src %u, dst %u.\n",
                            sa0->sa_len, da0->sa_len);
                        goto invaddr;
                }

                switch (sa0->sa_family) {
                case AF_INET:
                        if (sa0->sa_len != sizeof(struct sockaddr_in)) {
                                IPSECLOG(LOG_DEBUG,
                                    "address size mismatched %u != %zu.\n",
                                    sa0->sa_len, sizeof(struct sockaddr_in));
                                goto invaddr;
                        }
                        break;
                case AF_INET6:
                        if (sa0->sa_len != sizeof(struct sockaddr_in6)) {
                                IPSECLOG(LOG_DEBUG,
                                    "address size mismatched %u != %zu.\n",
                                    sa0->sa_len, sizeof(struct sockaddr_in6));
                                goto invaddr;
                        }
                        break;
                default:
                        IPSECLOG(LOG_DEBUG, "unsupported address family %u.\n",
                            sa0->sa_family);
                        error = EAFNOSUPPORT;
                        goto senderror;
                }
                plen = key_sabits(sa0);

                /* check max prefix length */
                if (src0->sadb_address_prefixlen > plen ||
                    dst0->sadb_address_prefixlen > plen) {
                        IPSECLOG(LOG_DEBUG, "illegal prefixlen.\n");
                        goto invaddr;
                }

                /*
                 * prefixlen == 0 is valid because there can be a case when
                 * all addresses are matched.
                 */
        }

        if (msg->sadb_msg_type >= __arraycount(key_api_typesw) ||
            key_api_typesw[msg->sadb_msg_type] == NULL) {
                PFKEY_STATINC(PFKEY_STAT_OUT_INVMSGTYPE);
                error = EINVAL;
                goto senderror;
        }

        return (*key_api_typesw[msg->sadb_msg_type])(so, m, &mh);

invaddr:
        error = EINVAL;
senderror:
        PFKEY_STATINC(PFKEY_STAT_OUT_INVADDR);
        return key_senderror(so, m, error);
}

static int
key_senderror(struct socket *so, struct mbuf *m, int code)
{
        struct sadb_msg *msg;

        KASSERT(m->m_len >= sizeof(struct sadb_msg));

        if (so == NULL) {
                /*
                 * This means the request comes from kernel.
                 * As the request comes from kernel, it is unnecessary to
                 * send message to userland. Just return errcode directly.
                 */
                m_freem(m);
                return code;
        }

        msg = mtod(m, struct sadb_msg *);
        msg->sadb_msg_errno = code;
        return key_sendup_mbuf(so, m, KEY_SENDUP_ONE);
}

/*
 * set the pointer to each header into message buffer.
 * m will be freed on error.
 * XXX larger-than-MCLBYTES extension?
 */
static int
key_align(struct mbuf *m, struct sadb_msghdr *mhp)
{
        struct mbuf *n;
        struct sadb_ext *ext;
        size_t off, end;
        int extlen;
        int toff;

        KASSERT(m != NULL);
        KASSERT(mhp != NULL);
        KASSERT(m->m_len >= sizeof(struct sadb_msg));

        /* initialize */
        memset(mhp, 0, sizeof(*mhp));

        mhp->msg = mtod(m, struct sadb_msg *);
        mhp->ext[0] = mhp->msg;        /*XXX backward compat */

        end = PFKEY_UNUNIT64(mhp->msg->sadb_msg_len);
        extlen = end;        /*just in case extlen is not updated*/
        for (off = sizeof(struct sadb_msg); off < end; off += extlen) {
                n = m_pulldown(m, off, sizeof(struct sadb_ext), &toff);
                if (!n) {
                        /* m is already freed */
                        return ENOBUFS;
                }
                ext = (struct sadb_ext *)(mtod(n, char *) + toff);

                /* set pointer */
                switch (ext->sadb_ext_type) {
                case SADB_EXT_SA:
                case SADB_EXT_ADDRESS_SRC:
                case SADB_EXT_ADDRESS_DST:
                case SADB_EXT_ADDRESS_PROXY:
                case SADB_EXT_LIFETIME_CURRENT:
                case SADB_EXT_LIFETIME_HARD:
                case SADB_EXT_LIFETIME_SOFT:
                case SADB_EXT_KEY_AUTH:
                case SADB_EXT_KEY_ENCRYPT:
                case SADB_EXT_IDENTITY_SRC:
                case SADB_EXT_IDENTITY_DST:
                case SADB_EXT_SENSITIVITY:
                case SADB_EXT_PROPOSAL:
                case SADB_EXT_SUPPORTED_AUTH:
                case SADB_EXT_SUPPORTED_ENCRYPT:
                case SADB_EXT_SPIRANGE:
                case SADB_X_EXT_POLICY:
                case SADB_X_EXT_SA2:
                case SADB_X_EXT_NAT_T_TYPE:
                case SADB_X_EXT_NAT_T_SPORT:
                case SADB_X_EXT_NAT_T_DPORT:
                case SADB_X_EXT_NAT_T_OAI:
                case SADB_X_EXT_NAT_T_OAR:
                case SADB_X_EXT_NAT_T_FRAG:
                        /* duplicate check */
                        /*
                         * XXX Are there duplication payloads of either
                         * KEY_AUTH or KEY_ENCRYPT ?
                         */
                        if (mhp->ext[ext->sadb_ext_type] != NULL) {
                                IPSECLOG(LOG_DEBUG,
                                    "duplicate ext_type %u is passed.\n",
                                    ext->sadb_ext_type);
                                m_freem(m);
                                PFKEY_STATINC(PFKEY_STAT_OUT_DUPEXT);
                                return EINVAL;
                        }
                        break;
                default:
                        IPSECLOG(LOG_DEBUG, "invalid ext_type %u is passed.\n",
                            ext->sadb_ext_type);
                        m_freem(m);
                        PFKEY_STATINC(PFKEY_STAT_OUT_INVEXTTYPE);
                        return EINVAL;
                }

                extlen = PFKEY_UNUNIT64(ext->sadb_ext_len);

                if (key_validate_ext(ext, extlen)) {
                        m_freem(m);
                        PFKEY_STATINC(PFKEY_STAT_OUT_INVLEN);
                        return EINVAL;
                }

                n = m_pulldown(m, off, extlen, &toff);
                if (!n) {
                        /* m is already freed */
                        return ENOBUFS;
                }
                ext = (struct sadb_ext *)(mtod(n, char *) + toff);

                mhp->ext[ext->sadb_ext_type] = ext;
                mhp->extoff[ext->sadb_ext_type] = off;
                mhp->extlen[ext->sadb_ext_type] = extlen;
        }

        if (off != end) {
                m_freem(m);
                PFKEY_STATINC(PFKEY_STAT_OUT_INVLEN);
                return EINVAL;
        }

        return 0;
}

static int
key_validate_ext(const struct sadb_ext *ext, int len)
{
        const struct sockaddr *sa;
        enum { NONE, ADDR } checktype = NONE;
        int baselen = 0;
        const int sal = offsetof(struct sockaddr, sa_len) + sizeof(sa->sa_len);

        if (len != PFKEY_UNUNIT64(ext->sadb_ext_len))
                return EINVAL;

        /* if it does not match minimum/maximum length, bail */
        if (ext->sadb_ext_type >= __arraycount(minsize) ||
            ext->sadb_ext_type >= __arraycount(maxsize))
                return EINVAL;
        if (!minsize[ext->sadb_ext_type] || len < minsize[ext->sadb_ext_type])
                return EINVAL;
        if (maxsize[ext->sadb_ext_type] && len > maxsize[ext->sadb_ext_type])
                return EINVAL;

        /* more checks based on sadb_ext_type XXX need more */
        switch (ext->sadb_ext_type) {
        case SADB_EXT_ADDRESS_SRC:
        case SADB_EXT_ADDRESS_DST:
        case SADB_EXT_ADDRESS_PROXY:
                baselen = PFKEY_ALIGN8(sizeof(struct sadb_address));
                checktype = ADDR;
                break;
        case SADB_EXT_IDENTITY_SRC:
        case SADB_EXT_IDENTITY_DST:
                if (((const struct sadb_ident *)ext)->sadb_ident_type ==
                    SADB_X_IDENTTYPE_ADDR) {
                        baselen = PFKEY_ALIGN8(sizeof(struct sadb_ident));
                        checktype = ADDR;
                } else
                        checktype = NONE;
                break;
        default:
                checktype = NONE;
                break;
        }

        switch (checktype) {
        case NONE:
                break;
        case ADDR:
                sa = (const struct sockaddr *)(((const u_int8_t*)ext)+baselen);
                if (len < baselen + sal)
                        return EINVAL;
                if (baselen + PFKEY_ALIGN8(sa->sa_len) != len)
                        return EINVAL;
                break;
        }

        return 0;
}

static int
key_do_init(void)
{
        int i, error;

        mutex_init(&key_misc.lock, MUTEX_DEFAULT, IPL_NONE);

        mutex_init(&key_spd.lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&key_spd.cv_lc, "key_sp_lc");
        key_spd.psz = pserialize_create();
        cv_init(&key_spd.cv_psz, "key_sp_psz");
        key_spd.psz_performing = false;

        mutex_init(&key_sad.lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&key_sad.cv_lc, "key_sa_lc");
        key_sad.psz = pserialize_create();
        cv_init(&key_sad.cv_psz, "key_sa_psz");
        key_sad.psz_performing = false;

        pfkeystat_percpu = percpu_alloc(sizeof(uint64_t) * PFKEY_NSTATS);

        callout_init(&key_timehandler_ch, CALLOUT_MPSAFE);
        error = workqueue_create(&key_timehandler_wq, "key_timehandler",
            key_timehandler_work, NULL, PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE);
        if (error != 0)
                panic("%s: workqueue_create failed (%d)\n", __func__, error);

        for (i = 0; i < IPSEC_DIR_MAX; i++) {
                PSLIST_INIT(&key_spd.splist[i]);
        }

        PSLIST_INIT(&key_spd.socksplist);

        key_sad.sahlists = hashinit(SAHHASH_NHASH, HASH_PSLIST, true,
            &key_sad.sahlistmask);
        key_sad.savlut = hashinit(SAVLUT_NHASH, HASH_PSLIST, true,
            &key_sad.savlutmask);

        for (i = 0; i <= SADB_SATYPE_MAX; i++) {
                LIST_INIT(&key_misc.reglist[i]);
        }

#ifndef IPSEC_NONBLOCK_ACQUIRE
        LIST_INIT(&key_misc.acqlist);
#endif
#ifdef notyet
        LIST_INIT(&key_misc.spacqlist);
#endif

        /* system default */
        ip4_def_policy.policy = IPSEC_POLICY_NONE;
        ip4_def_policy.state = IPSEC_SPSTATE_ALIVE;
        localcount_init(&ip4_def_policy.localcount);

#ifdef INET6
        ip6_def_policy.policy = IPSEC_POLICY_NONE;
        ip6_def_policy.state = IPSEC_SPSTATE_ALIVE;
        localcount_init(&ip6_def_policy.localcount);
#endif

        callout_reset(&key_timehandler_ch, hz, key_timehandler, NULL);

        /* initialize key statistics */
        keystat.getspi_count = 1;

        aprint_verbose("IPsec: Initialized Security Association Processing.\n");

        return (0);
}

void
key_init(void)
{
        static ONCE_DECL(key_init_once);

        sysctl_net_keyv2_setup(NULL);
        sysctl_net_key_compat_setup(NULL);

        RUN_ONCE(&key_init_once, key_do_init);

        key_init_so();
}

/*
 * XXX: maybe This function is called after INBOUND IPsec processing.
 *
 * Special check for tunnel-mode packets.
 * We must make some checks for consistency between inner and outer IP header.
 *
 * xxx more checks to be provided
 */
int
key_checktunnelsanity(
    struct secasvar *sav,
    u_int family,
    void *src,
    void *dst
)
{

        /* XXX: check inner IP header */

        return 1;
}

#if 0
#define hostnamelen        strlen(hostname)

/*
 * Get FQDN for the host.
 * If the administrator configured hostname (by hostname(1)) without
 * domain name, returns nothing.
 */
static const char *
key_getfqdn(void)
{
        int i;
        int hasdot;
        static char fqdn[MAXHOSTNAMELEN + 1];

        if (!hostnamelen)
                return NULL;

        /* check if it comes with domain name. */
        hasdot = 0;
        for (i = 0; i < hostnamelen; i++) {
                if (hostname[i] == '.')
                        hasdot++;
        }
        if (!hasdot)
                return NULL;

        /* NOTE: hostname may not be NUL-terminated. */
        memset(fqdn, 0, sizeof(fqdn));
        memcpy(fqdn, hostname, hostnamelen);
        fqdn[hostnamelen] = '\0';
        return fqdn;
}

/*
 * get username@FQDN for the host/user.
 */
static const char *
key_getuserfqdn(void)
{
        const char *host;
        static char userfqdn[MAXHOSTNAMELEN + MAXLOGNAME + 2];
        struct proc *p = curproc;
        char *q;

        if (!p || !p->p_pgrp || !p->p_pgrp->pg_session)
                return NULL;
        if (!(host = key_getfqdn()))
                return NULL;

        /* NOTE: s_login may not be-NUL terminated. */
        memset(userfqdn, 0, sizeof(userfqdn));
        memcpy(userfqdn, Mp->p_pgrp->pg_session->s_login, AXLOGNAME);
        userfqdn[MAXLOGNAME] = '\0';        /* safeguard */
        q = userfqdn + strlen(userfqdn);
        *q++ = '@';
        memcpy(q, host, strlen(host));
        q += strlen(host);
        *q++ = '\0';

        return userfqdn;
}
#endif

/* record data transfer on SA, and update timestamps */
void
key_sa_recordxfer(struct secasvar *sav, struct mbuf *m)
{
        lifetime_counters_t *counters;

        KASSERT(sav != NULL);
        KASSERT(sav->lft_c != NULL);
        KASSERT(m != NULL);

        counters = percpu_getref(sav->lft_c_counters_percpu);

        /*
         * XXX Currently, there is a difference of bytes size
         * between inbound and outbound processing.
         */
        (*counters)[LIFETIME_COUNTER_BYTES] += m->m_pkthdr.len;
        /* to check bytes lifetime is done in key_timehandler(). */

        /*
         * We use the number of packets as the unit of
         * sadb_lifetime_allocations.  We increment the variable
         * whenever {esp,ah}_{in,out}put is called.
         */
        (*counters)[LIFETIME_COUNTER_ALLOCATIONS]++;
        /* XXX check for expires? */

        percpu_putref(sav->lft_c_counters_percpu);

        /*
         * NOTE: We record CURRENT sadb_lifetime_usetime by using wall clock,
         * in seconds.  HARD and SOFT lifetime are measured by the time
         * difference (again in seconds) from sadb_lifetime_usetime.
         *
         *        usetime
         *        v     expire   expire
         * -----+-----+--------+---> t
         *        <--------------> HARD
         *        <-----> SOFT
         */
        sav->lft_c->sadb_lifetime_usetime = time_uptime;
        /* XXX check for expires? */

        return;
}

/* dumb version */
void
key_sa_routechange(struct sockaddr *dst)
{
        struct secashead *sah;
        int s;

        s = pserialize_read_enter();
        SAHLIST_READER_FOREACH(sah) {
                struct route *ro;
                const struct sockaddr *sa;

                key_sah_ref(sah);
                pserialize_read_exit(s);

                ro = &sah->sa_route;
                sa = rtcache_getdst(ro);
                if (sa != NULL && dst->sa_len == sa->sa_len &&
                    memcmp(dst, sa, dst->sa_len) == 0)
                        rtcache_free(ro);

                s = pserialize_read_enter();
                key_sah_unref(sah);
        }
        pserialize_read_exit(s);

        return;
}

static void
key_sa_chgstate(struct secasvar *sav, u_int8_t state)
{
        struct secasvar *_sav;

        ASSERT_SLEEPABLE();
        KASSERT(mutex_owned(&key_sad.lock));

        if (sav->state == state)
                return;

        key_unlink_sav(sav);
        localcount_fini(&sav->localcount);
        SAVLIST_ENTRY_DESTROY(sav);
        key_init_sav(sav);

        sav->state = state;
        if (!SADB_SASTATE_USABLE_P(sav)) {
                /* We don't need to care about the order */
                SAVLIST_WRITER_INSERT_HEAD(sav->sah, state, sav);
                return;
        }
        /*
         * Sort the list by lft_c->sadb_lifetime_addtime
         * in ascending order.
         */
        SAVLIST_WRITER_FOREACH(_sav, sav->sah, state) {
                if (_sav->lft_c->sadb_lifetime_addtime >
                    sav->lft_c->sadb_lifetime_addtime) {
                        SAVLIST_WRITER_INSERT_BEFORE(_sav, sav);
                        break;
                }
        }
        if (_sav == NULL) {
                SAVLIST_WRITER_INSERT_TAIL(sav->sah, state, sav);
        }

        SAVLUT_WRITER_INSERT_HEAD(sav);

        key_validate_savlist(sav->sah, state);
}

/* XXX too much? */
static struct mbuf *
key_alloc_mbuf(int l, int mflag)
{
        struct mbuf *m = NULL, *n;
        int len, t;

        KASSERT(mflag == M_NOWAIT || (mflag == M_WAITOK && !cpu_softintr_p()));

        len = l;
        while (len > 0) {
                MGET(n, mflag, MT_DATA);
                if (n && len > MLEN) {
                        MCLGET(n, mflag);
                        if ((n->m_flags & M_EXT) == 0) {
                                m_freem(n);
                                n = NULL;
                        }
                }
                if (!n) {
                        m_freem(m);
                        return NULL;
                }

                n->m_next = NULL;
                n->m_len = 0;
                n->m_len = M_TRAILINGSPACE(n);
                /* use the bottom of mbuf, hoping we can prepend afterwards */
                if (n->m_len > len) {
                        t = (n->m_len - len) & ~(sizeof(long) - 1);
                        n->m_data += t;
                        n->m_len = len;
                }

                len -= n->m_len;

                if (m)
                        m_cat(m, n);
                else
                        m = n;
        }

        return m;
}

static struct mbuf *
key_setdump(u_int8_t req_satype, int *errorp, uint32_t pid)
{
        struct secashead *sah;
        struct secasvar *sav;
        u_int16_t proto;
        u_int8_t satype;
        u_int8_t state;
        int cnt;
        struct mbuf *m, *n;

        KASSERT(mutex_owned(&key_sad.lock));

        /* map satype to proto */
        proto = key_satype2proto(req_satype);
        if (proto == 0) {
                *errorp = EINVAL;
                return (NULL);
        }

        /* count sav entries to be sent to the userland. */
        cnt = 0;
        SAHLIST_WRITER_FOREACH(sah) {
                if (req_satype != SADB_SATYPE_UNSPEC &&
                    proto != sah->saidx.proto)
                        continue;

                SASTATE_ANY_FOREACH(state) {
                        SAVLIST_WRITER_FOREACH(sav, sah, state) {
                                cnt++;
                        }
                }
        }

        if (cnt == 0) {
                *errorp = ENOENT;
                return (NULL);
        }

        /* send this to the userland, one at a time. */
        m = NULL;
        SAHLIST_WRITER_FOREACH(sah) {
                if (req_satype != SADB_SATYPE_UNSPEC &&
                    proto != sah->saidx.proto)
                        continue;

                /* map proto to satype */
                satype = key_proto2satype(sah->saidx.proto);
                if (satype == 0) {
                        m_freem(m);
                        *errorp = EINVAL;
                        return (NULL);
                }

                SASTATE_ANY_FOREACH(state) {
                        SAVLIST_WRITER_FOREACH(sav, sah, state) {
                                n = key_setdumpsa(sav, SADB_DUMP, satype,
                                    --cnt, pid);
                                if (!m)
                                        m = n;
                                else
                                        m_cat(m, n);
                        }
                }
        }

        if (!m) {
                *errorp = EINVAL;
                return (NULL);
        }

        if ((m->m_flags & M_PKTHDR) != 0) {
                m->m_pkthdr.len = 0;
                for (n = m; n; n = n->m_next)
                        m->m_pkthdr.len += n->m_len;
        }

        *errorp = 0;
        return (m);
}

static struct mbuf *
key_setspddump(int *errorp, pid_t pid)
{
        struct secpolicy *sp;
        int cnt;
        u_int dir;
        struct mbuf *m, *n;

        KASSERT(mutex_owned(&key_spd.lock));

        /* search SPD entry and get buffer size. */
        cnt = 0;
        for (dir = 0; dir < IPSEC_DIR_MAX; dir++) {
                SPLIST_WRITER_FOREACH(sp, dir) {
                        cnt++;
                }
        }

        if (cnt == 0) {
                *errorp = ENOENT;
                return (NULL);
        }

        m = NULL;
        for (dir = 0; dir < IPSEC_DIR_MAX; dir++) {
                SPLIST_WRITER_FOREACH(sp, dir) {
                        --cnt;
                        n = key_setdumpsp(sp, SADB_X_SPDDUMP, cnt, pid);

                        if (!m)
                                m = n;
                        else {
                                m->m_pkthdr.len += n->m_pkthdr.len;
                                m_cat(m, n);
                        }
                }
        }

        *errorp = 0;
        return (m);
}

int
key_get_used(void) {
        return !SPLIST_READER_EMPTY(IPSEC_DIR_INBOUND) ||
            !SPLIST_READER_EMPTY(IPSEC_DIR_OUTBOUND) ||
            !SOCKSPLIST_READER_EMPTY();
}

void
key_update_used(void)
{
        switch (ipsec_enabled) {
        default:
        case 0:
#ifdef notyet
                /* XXX: racy */
                ipsec_used = 0;
#endif
                break;
        case 1:
#ifndef notyet
                /* XXX: racy */
                if (!ipsec_used)
#endif
                ipsec_used = key_get_used();
                break;
        case 2:
                ipsec_used = 1;
                break;
        }
}

static inline void
key_savlut_writer_insert_head(struct secasvar *sav)
{
        uint32_t hash_key;
        uint32_t hash;

        KASSERT(mutex_owned(&key_sad.lock));
        KASSERT(!sav->savlut_added);

        if (sav->sah->saidx.proto == IPPROTO_IPCOMP)
                hash_key = sav->alg_comp;
        else
                hash_key = sav->spi;

        hash = key_savluthash(&sav->sah->saidx.dst.sa,
            sav->sah->saidx.proto, hash_key, key_sad.savlutmask);

        PSLIST_WRITER_INSERT_HEAD(&key_sad.savlut[hash], sav,
            pslist_entry_savlut);
        sav->savlut_added = true;
}

/*
 * Calculate hash using protocol, source address,
 * and destination address included in saidx.
 */
static inline uint32_t
key_saidxhash(const struct secasindex *saidx, u_long mask)
{
        uint32_t hash32;
        const struct sockaddr_in *sin;
        const struct sockaddr_in6 *sin6;

        hash32 = saidx->proto;

        switch (saidx->src.sa.sa_family) {
        case AF_INET:
                sin = &saidx->src.sin;
                hash32 = hash32_buf(&sin->sin_addr,
                    sizeof(sin->sin_addr), hash32);
                sin = &saidx->dst.sin;
                hash32 = hash32_buf(&sin->sin_addr,
                    sizeof(sin->sin_addr), hash32 << 1);
                break;
        case AF_INET6:
                sin6 = &saidx->src.sin6;
                hash32 = hash32_buf(&sin6->sin6_addr,
                    sizeof(sin6->sin6_addr), hash32);
                sin6 = &saidx->dst.sin6;
                hash32 = hash32_buf(&sin6->sin6_addr,
                    sizeof(sin6->sin6_addr), hash32 << 1);
                break;
        default:
                hash32 = 0;
                break;
        }

        return hash32 & mask;
}

/*
 * Calculate hash using destination address, protocol,
 * and spi. Those parameter depend on the search of
 * key_lookup_sa().
 */
static uint32_t
key_savluthash(const struct sockaddr *dst, uint32_t proto,
    uint32_t spi, u_long mask)
{
        uint32_t hash32;
        const struct sockaddr_in *sin;
        const struct sockaddr_in6 *sin6;

        hash32 = hash32_buf(&proto, sizeof(proto), spi);

        switch(dst->sa_family) {
        case AF_INET:
                sin = satocsin(dst);
                hash32 = hash32_buf(&sin->sin_addr,
                    sizeof(sin->sin_addr), hash32);
                break;
        case AF_INET6:
                sin6 = satocsin6(dst);
                hash32 = hash32_buf(&sin6->sin6_addr,
                    sizeof(sin6->sin6_addr), hash32);
                break;
        default:
                hash32 = 0;
        }

        return hash32 & mask;
}

static int
sysctl_net_key_dumpsa(SYSCTLFN_ARGS)
{
        struct mbuf *m, *n;
        int err2 = 0;
        char *p, *ep;
        size_t len;
        int error;

        if (newp)
                return (EPERM);
        if (namelen != 1)
                return (EINVAL);

        mutex_enter(&key_sad.lock);
        m = key_setdump(name[0], &error, l->l_proc->p_pid);
        mutex_exit(&key_sad.lock);
        if (!m)
                return (error);
        if (!oldp)
                *oldlenp = m->m_pkthdr.len;
        else {
                p = oldp;
                if (*oldlenp < m->m_pkthdr.len) {
                        err2 = ENOMEM;
                        ep = p + *oldlenp;
                } else {
                        *oldlenp = m->m_pkthdr.len;
                        ep = p + m->m_pkthdr.len;
                }
                for (n = m; n; n = n->m_next) {
                        len =  (ep - p < n->m_len) ?
                                ep - p : n->m_len;
                        error = copyout(mtod(n, const void *), p, len);
                        p += len;
                        if (error)
                                break;
                }
                if (error == 0)
                        error = err2;
        }
        m_freem(m);

        return (error);
}

static int
sysctl_net_key_dumpsp(SYSCTLFN_ARGS)
{
        struct mbuf *m, *n;
        int err2 = 0;
        char *p, *ep;
        size_t len;
        int error;

        if (newp)
                return (EPERM);
        if (namelen != 0)
                return (EINVAL);

        mutex_enter(&key_spd.lock);
        m = key_setspddump(&error, l->l_proc->p_pid);
        mutex_exit(&key_spd.lock);
        if (!m)
                return (error);
        if (!oldp)
                *oldlenp = m->m_pkthdr.len;
        else {
                p = oldp;
                if (*oldlenp < m->m_pkthdr.len) {
                        err2 = ENOMEM;
                        ep = p + *oldlenp;
                } else {
                        *oldlenp = m->m_pkthdr.len;
                        ep = p + m->m_pkthdr.len;
                }
                for (n = m; n; n = n->m_next) {
                        len = (ep - p < n->m_len) ? ep - p : n->m_len;
                        error = copyout(mtod(n, const void *), p, len);
                        p += len;
                        if (error)
                                break;
                }
                if (error == 0)
                        error = err2;
        }
        m_freem(m);

        return (error);
}

/*
 * Create sysctl tree for native IPSEC key knobs, originally
 * under name "net.keyv2"  * with MIB number { CTL_NET, PF_KEY_V2. }.
 * However, sysctl(8) never checked for nodes under { CTL_NET, PF_KEY_V2 };
 * and in any case the part of our sysctl namespace used for dumping the
 * SPD and SA database  *HAS* to be compatible with the KAME sysctl
 * namespace, for API reasons.
 *
 * Pending a consensus on the right way  to fix this, add a level of
 * indirection in how we number the `native' IPSEC key nodes;
 * and (as requested by Andrew Brown)  move registration of the
 * KAME-compatible names  to a separate function.
 */
#if 0
#  define IPSEC_PFKEY PF_KEY_V2
# define IPSEC_PFKEY_NAME "keyv2"
#else
#  define IPSEC_PFKEY PF_KEY
# define IPSEC_PFKEY_NAME "key"
#endif

static int
sysctl_net_key_stats(SYSCTLFN_ARGS)
{

        return (NETSTAT_SYSCTL(pfkeystat_percpu, PFKEY_NSTATS));
}

static void
sysctl_net_keyv2_setup(struct sysctllog **clog)
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, IPSEC_PFKEY_NAME, NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, IPSEC_PFKEY, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "debug", NULL,
                       NULL, 0, &key_debug_level, 0,
                       CTL_NET, IPSEC_PFKEY, KEYCTL_DEBUG_LEVEL, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "spi_try", NULL,
                       NULL, 0, &key_spi_trycnt, 0,
                       CTL_NET, IPSEC_PFKEY, KEYCTL_SPI_TRY, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "spi_min_value", NULL,
                       NULL, 0, &key_spi_minval, 0,
                       CTL_NET, IPSEC_PFKEY, KEYCTL_SPI_MIN_VALUE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "spi_max_value", NULL,
                       NULL, 0, &key_spi_maxval, 0,
                       CTL_NET, IPSEC_PFKEY, KEYCTL_SPI_MAX_VALUE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "random_int", NULL,
                       NULL, 0, &key_int_random, 0,
                       CTL_NET, IPSEC_PFKEY, KEYCTL_RANDOM_INT, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "larval_lifetime", NULL,
                       NULL, 0, &key_larval_lifetime, 0,
                       CTL_NET, IPSEC_PFKEY, KEYCTL_LARVAL_LIFETIME, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "blockacq_count", NULL,
                       NULL, 0, &key_blockacq_count, 0,
                       CTL_NET, IPSEC_PFKEY, KEYCTL_BLOCKACQ_COUNT, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "blockacq_lifetime", NULL,
                       NULL, 0, &key_blockacq_lifetime, 0,
                       CTL_NET, IPSEC_PFKEY, KEYCTL_BLOCKACQ_LIFETIME, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "esp_keymin", NULL,
                       NULL, 0, &ipsec_esp_keymin, 0,
                       CTL_NET, IPSEC_PFKEY, KEYCTL_ESP_KEYMIN, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "prefered_oldsa", NULL,
                       NULL, 0, &key_prefered_oldsa, 0,
                       CTL_NET, PF_KEY, KEYCTL_PREFERED_OLDSA, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "esp_auth", NULL,
                       NULL, 0, &ipsec_esp_auth, 0,
                       CTL_NET, IPSEC_PFKEY, KEYCTL_ESP_AUTH, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "ah_keymin", NULL,
                       NULL, 0, &ipsec_ah_keymin, 0,
                       CTL_NET, IPSEC_PFKEY, KEYCTL_AH_KEYMIN, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "stats",
                       SYSCTL_DESCR("PF_KEY statistics"),
                       sysctl_net_key_stats, 0, NULL, 0,
                       CTL_NET, IPSEC_PFKEY, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_BOOL, "allow_different_idtype", NULL,
                       NULL, 0, &ipsec_allow_different_idtype, 0,
                       CTL_NET, IPSEC_PFKEY, KEYCTL_ALLOW_DIFFERENT_IDTYPE, CTL_EOL);
}

/*
 * Register sysctl names used by setkey(8). For historical reasons,
 * and to share a single API, these names appear under { CTL_NET, PF_KEY }
 * for both IPSEC and KAME IPSEC.
 */
static void
sysctl_net_key_compat_setup(struct sysctllog **clog)
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "key", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_KEY, CTL_EOL);

        /* Register the net.key.dump{sa,sp} nodes used by setkey(8). */
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "dumpsa", NULL,
                       sysctl_net_key_dumpsa, 0, NULL, 0,
                       CTL_NET, PF_KEY, KEYCTL_DUMPSA, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "dumpsp", NULL,
                       sysctl_net_key_dumpsp, 0, NULL, 0,
                       CTL_NET, PF_KEY, KEYCTL_DUMPSP, CTL_EOL);
}





























   14 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
/* $NetBSD: explicit_memset.c,v 1.4 2014/06/24 16:39:39 drochner Exp $ */

/*
 * Written by Matthias Drochner <drochner@NetBSD.org>.
 * Public domain.
 */

#if !defined(_KERNEL) && !defined(_STANDALONE)
#include "namespace.h"
#include <string.h>
#ifdef __weak_alias
__weak_alias(explicit_memset,_explicit_memset)
#endif
#define explicit_memset_impl __explicit_memset_impl
#else
#include <lib/libkern/libkern.h>
#endif

/*
 * The use of a volatile pointer guarantees that the compiler
 * will not optimise the call away.
 */
void *(* volatile explicit_memset_impl)(void *, int, size_t) = memset;

void *
explicit_memset(void *b, int c, size_t len)
{

        return (*explicit_memset_impl)(b, c, len);
}


































































































   12 


    6 



   11 


   10 



   12 



   10 




   10 
    1 





    9 
    1 



    8 
    1 




    6 
    7 
    1 








    1 


    6 
    1 





    3 


    3 
    2 
    1 







    2 
    1 
    1 









    3 




    3 
    1 
    1 
    1 


    3 




    2 
    1 
    1 

    3 





    3 















    3 

























































































    3 
    3 


    3 







































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
/*        $NetBSD: puffs_vfsops.c,v 1.126 2021/04/01 19:00:33 christos Exp $        */

/*
 * Copyright (c) 2005, 2006  Antti Kantee.  All Rights Reserved.
 *
 * Development of this software was supported by the
 * Google Summer of Code program and the Ulla Tuominen Foundation.
 * The Google SoC project was mentored by Bill Studenmund.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: puffs_vfsops.c,v 1.126 2021/04/01 19:00:33 christos Exp $");

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/mount.h>
#include <sys/extattr.h>
#include <sys/queue.h>
#include <sys/vnode.h>
#include <sys/dirent.h>
#include <sys/kauth.h>
#include <sys/proc.h>
#include <sys/module.h>
#include <sys/kthread.h>

#include <uvm/uvm.h>

#include <dev/putter/putter_sys.h>

#include <miscfs/genfs/genfs.h>

#include <fs/puffs/puffs_msgif.h>
#include <fs/puffs/puffs_sys.h>

#include <lib/libkern/libkern.h>

#include <nfs/nfsproto.h> /* for fh sizes */

MODULE(MODULE_CLASS_VFS, puffs, "putter");

VFS_PROTOS(puffs_vfsop);

static struct putter_ops puffs_putter = {
        .pop_getout        = puffs_msgif_getout,
        .pop_releaseout        = puffs_msgif_releaseout,
        .pop_waitcount        = puffs_msgif_waitcount,
        .pop_dispatch        = puffs_msgif_dispatch,
        .pop_close        = puffs_msgif_close,
};

static const struct genfs_ops puffs_genfsops = {
        .gop_size = puffs_gop_size,
        .gop_write = genfs_gop_write,
        .gop_markupdate = puffs_gop_markupdate,
#if 0
        .gop_alloc, should ask userspace
#endif
        .gop_putrange = genfs_gop_putrange,
};

/*
 * Try to ensure data structures used by the puffs protocol
 * do not unexpectedly change.
 */
#if defined(__i386__) && defined(__ELF__)
CTASSERT(sizeof(struct puffs_kargs) == 3928);
CTASSERT(sizeof(struct vattr) == 136);
CTASSERT(sizeof(struct puffs_req) == 44);
#endif

int
puffs_vfsop_mount(struct mount *mp, const char *path, void *data,
        size_t *data_len)
{
        struct puffs_mount *pmp = NULL;
        struct puffs_kargs *args;
        char fstype[_VFS_NAMELEN];
        char *p;
        int error = 0, i;
        pid_t mntpid = curlwp->l_proc->p_pid;

        if (data == NULL)
                return EINVAL;
        if (*data_len < sizeof *args)
                return EINVAL;

        if (mp->mnt_flag & MNT_GETARGS) {
                pmp = MPTOPUFFSMP(mp);
                *(struct puffs_kargs *)data = pmp->pmp_args;
                *data_len = sizeof *args;
                return 0;
        }

        /* update is not supported currently */
        if (mp->mnt_flag & MNT_UPDATE)
                return EOPNOTSUPP;

        args = (struct puffs_kargs *)data;

        if (args->pa_vers != PUFFSVERSION) {
                printf("puffs_mount: development version mismatch: "
                    "kernel %d, lib %d\n", PUFFSVERSION, args->pa_vers);
                error = EINVAL;
                goto out;
        }

        if ((args->pa_flags & ~PUFFS_KFLAG_MASK) != 0) {
                printf("puffs_mount: invalid KFLAGs 0x%x\n", args->pa_flags);
                error = EINVAL;
                goto out;
        }
        if ((args->pa_fhflags & ~PUFFS_FHFLAG_MASK) != 0) {
                printf("puffs_mount: invalid FHFLAGs 0x%x\n", args->pa_fhflags);
                error = EINVAL;
                goto out;
        }

        for (i = 0; i < __arraycount(args->pa_spare); i++) {
                if (args->pa_spare[i] != 0) {
                        printf("puffs_mount: pa_spare[%d] = 0x%x\n",
                            i, args->pa_spare[i]);
                        error = EINVAL;
                        goto out;
                }
        }

        /* use dummy value for passthrough */
        if (args->pa_fhflags & PUFFS_FHFLAG_PASSTHROUGH)
                args->pa_fhsize = sizeof(struct fid);

        /* sanitize file handle length */
        if (PUFFS_TOFHSIZE(args->pa_fhsize) > FHANDLE_SIZE_MAX) {
                printf("puffs_mount: handle size %zu too large\n",
                    args->pa_fhsize);
                error = EINVAL;
                goto out;
        }
        /* sanity check file handle max sizes */
        if (args->pa_fhsize && args->pa_fhflags & PUFFS_FHFLAG_PROTOMASK) {
                size_t kfhsize = PUFFS_TOFHSIZE(args->pa_fhsize);

                if (args->pa_fhflags & PUFFS_FHFLAG_NFSV2) {
                        if (NFSX_FHTOOBIG_P(kfhsize, 0)) {
                                printf("puffs_mount: fhsize larger than "
                                    "NFSv2 max %d\n",
                                    PUFFS_FROMFHSIZE(NFSX_V2FH));
                                error = EINVAL;
                                goto out;
                        }
                }

                if (args->pa_fhflags & PUFFS_FHFLAG_NFSV3) {
                        if (NFSX_FHTOOBIG_P(kfhsize, 1)) {
                                printf("puffs_mount: fhsize larger than "
                                    "NFSv3 max %d\n",
                                    PUFFS_FROMFHSIZE(NFSX_V3FHMAX));
                                error = EINVAL;
                                goto out;
                        }
                }
        }

        /* don't allow non-printing characters (like my sweet umlauts.. snif) */
        args->pa_typename[sizeof(args->pa_typename)-1] = '\0';
        for (p = args->pa_typename; *p; p++)
                if (*p < ' ' || *p > '~')
                        *p = '.';

        args->pa_mntfromname[sizeof(args->pa_mntfromname)-1] = '\0';
        for (p = args->pa_mntfromname; *p; p++)
                if (*p < ' ' || *p > '~')
                        *p = '.';

        /* build real name */
        (void)strlcpy(fstype, PUFFS_TYPEPREFIX, sizeof(fstype));
        (void)strlcat(fstype, args->pa_typename, sizeof(fstype));

        /* inform user server if it got the max request size it wanted */
        if (args->pa_maxmsglen == 0 || args->pa_maxmsglen > PUFFS_MSG_MAXSIZE)
                args->pa_maxmsglen = PUFFS_MSG_MAXSIZE;
        else if (args->pa_maxmsglen < 2*PUFFS_MSGSTRUCT_MAX)
                args->pa_maxmsglen = 2*PUFFS_MSGSTRUCT_MAX;

        (void)strlcpy(args->pa_typename, fstype, sizeof(args->pa_typename));

        error = set_statvfs_info(path, UIO_USERSPACE, args->pa_mntfromname,
            UIO_SYSSPACE, fstype, mp, curlwp);
        if (error)
                goto out;
        mp->mnt_stat.f_iosize = DEV_BSIZE;
        mp->mnt_stat.f_namemax = args->pa_svfsb.f_namemax;

        /*
         * We can't handle the VFS_STATVFS() mount_domount() does
         * after VFS_MOUNT() because we'd deadlock, so handle it
         * here already.
         */
        struct statvfs *sb = STATVFSBUF_GET();
        puffs_statvfs_to_statvfs(&args->pa_svfsb, sb);
        copy_statvfs_info(sb, mp);
        STATVFSBUF_PUT(sb);

        statvfs_to_puffs_statvfs(&mp->mnt_stat, &args->pa_svfsb);

        KASSERT(curlwp != uvm.pagedaemon_lwp);
        pmp = kmem_zalloc(sizeof(struct puffs_mount), KM_SLEEP);

        mp->mnt_fs_bshift = DEV_BSHIFT;
        mp->mnt_dev_bshift = DEV_BSHIFT;
        mp->mnt_flag &= ~MNT_LOCAL; /* we don't really know, so ... */
        mp->mnt_data = pmp;

#if 0
        /*
         * XXX: puffs code is MPSAFE.  However, VFS really isn't.
         * Currently, there is nothing which protects an inode from
         * reclaim while there are threads inside the file system.
         * This means that in the event of a server crash, an MPSAFE
         * mount is likely to end up accessing invalid memory.  For the
         * non-mpsafe case, the kernel lock, general structure of
         * puffs and pmp_refcount protect the threads during escape.
         *
         * Fixing this will require:
         *  a) fixing vfs
         * OR
         *  b) adding a small sleep to puffs_msgif_close() between
         *     userdead() and dounmount().
         *     (well, this isn't really a fix, but would solve
         *     99.999% of the race conditions).
         *
         * Also, in the event of "b", unmount -f should be used,
         * like with any other file system, sparingly and only when
         * it is "known" to be safe.
         */
        mp->mnt_iflags |= IMNT_MPSAFE;
#endif

        pmp->pmp_status = PUFFSTAT_MOUNTING;
        pmp->pmp_mp = mp;
        pmp->pmp_msg_maxsize = args->pa_maxmsglen;
        pmp->pmp_args = *args;

        /*
         * Inform the fileops processing code that we have a mountpoint.
         * If it doesn't know about anyone with our pid/fd having the
         * device open, punt
         */
        if ((pmp->pmp_pi
            = putter_attach(mntpid, args->pa_fd, pmp, &puffs_putter)) == NULL) {
                error = ENOENT;
                goto out;
        }

        /* XXX: check parameters */
        pmp->pmp_root_cookie = args->pa_root_cookie;
        switch (args->pa_root_vtype) {
        case VNON: case VREG: case VDIR: case VBLK:
        case VCHR: case VLNK: case VSOCK: case VFIFO:
                break;
        default:
                error = EINVAL;
                goto out;
        }
        pmp->pmp_root_vtype = args->pa_root_vtype;

        if (args->pa_root_vsize < 0) {
                error = EINVAL;
                goto out;
        }
        pmp->pmp_root_vsize = args->pa_root_vsize;

        pmp->pmp_root_rdev = args->pa_root_rdev;
        pmp->pmp_docompat = args->pa_time32;

        mutex_init(&pmp->pmp_lock, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&pmp->pmp_sopmtx, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&pmp->pmp_msg_waiter_cv, "puffsget");
        cv_init(&pmp->pmp_refcount_cv, "puffsref");
        cv_init(&pmp->pmp_unmounting_cv, "puffsum");
        cv_init(&pmp->pmp_sopcv, "puffsop");
        TAILQ_INIT(&pmp->pmp_msg_touser);
        TAILQ_INIT(&pmp->pmp_msg_replywait);
        TAILQ_INIT(&pmp->pmp_sopfastreqs);
        TAILQ_INIT(&pmp->pmp_sopnodereqs);

        if ((error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
            puffs_sop_thread, pmp, NULL, "puffsop")) != 0)
                goto out;
        pmp->pmp_sopthrcount = 1;

        DPRINTF(("puffs_mount: mount point at %p, puffs specific at %p\n",
            mp, MPTOPUFFSMP(mp)));

        vfs_getnewfsid(mp);

 out:
        if (error && pmp && pmp->pmp_pi)
                putter_detach(pmp->pmp_pi);
        if (error && pmp)
                kmem_free(pmp, sizeof(struct puffs_mount));
        return error;
}

int
puffs_vfsop_start(struct mount *mp, int flags)
{
        struct puffs_mount *pmp = MPTOPUFFSMP(mp);

        KASSERT(pmp->pmp_status == PUFFSTAT_MOUNTING);
        pmp->pmp_status = PUFFSTAT_RUNNING;

        return 0;
}

int
puffs_vfsop_unmount(struct mount *mp, int mntflags)
{
        PUFFS_MSG_VARS(vfs, unmount);
        struct puffs_mount *pmp;
        int error, force;

        error = 0;
        force = mntflags & MNT_FORCE;
        pmp = MPTOPUFFSMP(mp);

        DPRINTF(("puffs_unmount: detach filesystem from vfs, current "
            "status 0x%x\n", pmp->pmp_status));

        /*
         * flush all the vnodes.  VOP_RECLAIM() takes care that the
         * root vnode does not get flushed until unmount.  The
         * userspace root node cookie is stored in the mount
         * structure, so we can always re-instantiate a root vnode,
         * should userspace unmount decide it doesn't want to
         * cooperate.
         */
        error = vflush(mp, NULLVP, force ? FORCECLOSE : 0);
        if (error)
                goto out;

        /*
         * If we are not DYING, we should ask userspace's opinion
         * about the situation
         */
        mutex_enter(&pmp->pmp_lock);
        if (pmp->pmp_status != PUFFSTAT_DYING) {
                pmp->pmp_unmounting = 1;
                mutex_exit(&pmp->pmp_lock);

                PUFFS_MSG_ALLOC(vfs, unmount);
                puffs_msg_setinfo(park_unmount,
                    PUFFSOP_VFS, PUFFS_VFS_UNMOUNT, NULL);
                unmount_msg->pvfsr_flags = mntflags;

                PUFFS_MSG_ENQUEUEWAIT(pmp, park_unmount, error);
                PUFFS_MSG_RELEASE(unmount);

                error = checkerr(pmp, error, __func__);
                DPRINTF(("puffs_unmount: error %d force %d\n", error, force));

                mutex_enter(&pmp->pmp_lock);
                pmp->pmp_unmounting = 0;
                cv_broadcast(&pmp->pmp_unmounting_cv);
        }

        /*
         * if userspace cooperated or we really need to die,
         * screw what userland thinks and just die.
         */
        if (error == 0 || force) {
                struct puffs_sopreq *psopr;

                /* tell waiters & other resources to go unwait themselves */
                puffs_userdead(pmp);
                putter_detach(pmp->pmp_pi);

                /*
                 * Wait until there are no more users for the mount resource.
                 * Notice that this is hooked against transport_close
                 * and return from touser.  In an ideal world, it would
                 * be hooked against final return from all operations.
                 * But currently it works well enough, since nobody
                 * does weird blocking voodoo after return from touser().
                 */
                while (pmp->pmp_refcount != 0)
                        cv_wait(&pmp->pmp_refcount_cv, &pmp->pmp_lock);
                mutex_exit(&pmp->pmp_lock);

                /*
                 * Release kernel thread now that there is nothing
                 * it would be wanting to lock.
                 */
                KASSERT(curlwp != uvm.pagedaemon_lwp);
                psopr = kmem_alloc(sizeof(*psopr), KM_SLEEP);
                psopr->psopr_sopreq = PUFFS_SOPREQSYS_EXIT;
                mutex_enter(&pmp->pmp_sopmtx);
                if (pmp->pmp_sopthrcount == 0) {
                        mutex_exit(&pmp->pmp_sopmtx);
                        kmem_free(psopr, sizeof(*psopr));
                        mutex_enter(&pmp->pmp_sopmtx);
                        KASSERT(pmp->pmp_sopthrcount == 0);
                } else {
                        TAILQ_INSERT_TAIL(&pmp->pmp_sopfastreqs,
                            psopr, psopr_entries);
                        cv_signal(&pmp->pmp_sopcv);
                }
                while (pmp->pmp_sopthrcount > 0)
                        cv_wait(&pmp->pmp_sopcv, &pmp->pmp_sopmtx);
                mutex_exit(&pmp->pmp_sopmtx);

                /* free resources now that we hopefully have no waiters left */
                cv_destroy(&pmp->pmp_unmounting_cv);
                cv_destroy(&pmp->pmp_refcount_cv);
                cv_destroy(&pmp->pmp_msg_waiter_cv);
                cv_destroy(&pmp->pmp_sopcv);
                mutex_destroy(&pmp->pmp_lock);
                mutex_destroy(&pmp->pmp_sopmtx);

                kmem_free(pmp, sizeof(struct puffs_mount));
                error = 0;
        } else {
                mutex_exit(&pmp->pmp_lock);
        }

 out:
        DPRINTF(("puffs_unmount: return %d\n", error));
        return error;
}

/*
 * This doesn't need to travel to userspace
 */
int
puffs_vfsop_root(struct mount *mp, int lktype, struct vnode **vpp)
{
        struct puffs_mount *pmp = MPTOPUFFSMP(mp);
        int rv;

        rv = puffs_cookie2vnode(pmp, pmp->pmp_root_cookie, vpp);
        KASSERT(rv != PUFFS_NOSUCHCOOKIE);
        if (rv != 0)
                return rv;
        rv = vn_lock(*vpp, lktype);
        if (rv != 0) {
                vrele(*vpp);
                *vpp = NULL;
                return rv;
        }
        return 0;
}

int
puffs_vfsop_statvfs(struct mount *mp, struct statvfs *sbp)
{
        PUFFS_MSG_VARS(vfs, statvfs);
        struct puffs_mount *pmp;
        int error = 0;

        pmp = MPTOPUFFSMP(mp);

        /*
         * If we are mounting, it means that the userspace counterpart
         * is calling mount(2), but mount(2) also calls statvfs.  So
         * requesting statvfs from userspace would mean a deadlock.
         * Compensate.
         */
        if (__predict_false(pmp->pmp_status == PUFFSTAT_MOUNTING))
                return EINPROGRESS;

        PUFFS_MSG_ALLOC(vfs, statvfs);
        puffs_msg_setinfo(park_statvfs, PUFFSOP_VFS, PUFFS_VFS_STATVFS, NULL);

        PUFFS_MSG_ENQUEUEWAIT(pmp, park_statvfs, error);
        error = checkerr(pmp, error, __func__);
        statvfs_msg->pvfsr_sb.f_iosize = DEV_BSIZE;

        /*
         * Try to produce a sensible result even in the event
         * of userspace error.
         *
         * XXX: cache the copy in non-error case
         */
        if (!error) {
                puffs_statvfs_to_statvfs(&statvfs_msg->pvfsr_sb, sbp);
        }
        copy_statvfs_info(sbp, mp);
        if (!error) {
                statvfs_to_puffs_statvfs(sbp, &statvfs_msg->pvfsr_sb);
        }

        PUFFS_MSG_RELEASE(statvfs);
        return error;
}

static bool
pageflush_selector(void *cl, struct vnode *vp)
{

        KASSERT(mutex_owned(vp->v_interlock));

        return vp->v_type == VREG &&
            !(LIST_EMPTY(&vp->v_dirtyblkhd) &&
            (vp->v_iflag & VI_ONWORKLST) == 0);

}

static int
pageflush(struct mount *mp, kauth_cred_t cred, int waitfor)
{
        struct puffs_node *pn;
        struct vnode *vp;
        struct vnode_iterator *marker;
        int error, rv, fsyncwait;

        error = 0;
        fsyncwait = (waitfor == MNT_WAIT) ? FSYNC_WAIT : 0;

        /*
         * Sync all cached data from regular vnodes (which are not
         * currently locked, see below).  After this we call VFS_SYNC
         * for the fs server, which should handle data and metadata for
         * all the nodes it knows to exist.
         */
        vfs_vnode_iterator_init(mp, &marker);
        while ((vp = vfs_vnode_iterator_next(marker, pageflush_selector,
            NULL)))
        {
                /*
                 * Here we try to get a reference to the vnode and to
                 * lock it.  This is mostly cargo-culted, but I will
                 * offer an explanation to why I believe this might
                 * actually do the right thing.
                 *
                 * If the vnode is a goner, we quite obviously don't need
                 * to sync it.
                 *
                 * If the vnode was busy, we don't need to sync it because
                 * this is never called with MNT_WAIT except from
                 * dounmount(), when we are wait-flushing all the dirty
                 * vnodes through other routes in any case.  So there,
                 * sync() doesn't actually sync.  Happy now?
                 */
                error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT);
                if (error) {
                        vrele(vp);
                        continue;
                }
                pn = VPTOPP(vp);
                /* hmm.. is the FAF thing entirely sensible? */
                if (waitfor == MNT_LAZY) {
                        mutex_enter(vp->v_interlock);
                        pn->pn_stat |= PNODE_FAF;
                        mutex_exit(vp->v_interlock);
                }
                rv = VOP_FSYNC(vp, cred, fsyncwait, 0, 0);
                if (waitfor == MNT_LAZY) {
                        mutex_enter(vp->v_interlock);
                        pn->pn_stat &= ~PNODE_FAF;
                        mutex_exit(vp->v_interlock);
                }
                if (rv)
                        error = rv;
                vput(vp);
        }
        vfs_vnode_iterator_destroy(marker);

        return error;
}

int
puffs_vfsop_sync(struct mount *mp, int waitfor, struct kauth_cred *cred)
{
        PUFFS_MSG_VARS(vfs, sync);
        struct puffs_mount *pmp = MPTOPUFFSMP(mp);
        int error, rv;

        error = pageflush(mp, cred, waitfor);

        /* sync fs */
        PUFFS_MSG_ALLOC(vfs, sync);
        sync_msg->pvfsr_waitfor = waitfor;
        puffs_credcvt(&sync_msg->pvfsr_cred, cred);
        puffs_msg_setinfo(park_sync, PUFFSOP_VFS, PUFFS_VFS_SYNC, NULL);

        PUFFS_MSG_ENQUEUEWAIT(pmp, park_sync, rv);
        rv = checkerr(pmp, rv, __func__);
        if (rv)
                error = rv;

        PUFFS_MSG_RELEASE(sync);
        return error;
}

int
puffs_vfsop_fhtovp(struct mount *mp, struct fid *fhp, int lktype,
    struct vnode **vpp)
{
        PUFFS_MSG_VARS(vfs, fhtonode);
        struct puffs_mount *pmp = MPTOPUFFSMP(mp);
        struct vnode *vp;
        void *fhdata;
        size_t argsize, fhlen;
        int error;

        if (pmp->pmp_args.pa_fhsize == 0)
                return EOPNOTSUPP;

        if (pmp->pmp_args.pa_fhflags & PUFFS_FHFLAG_PASSTHROUGH) {
                fhlen = fhp->fid_len;
                fhdata = fhp;
        } else {
                fhlen = PUFFS_FROMFHSIZE(fhp->fid_len);
                fhdata = fhp->fid_data;

                if (pmp->pmp_args.pa_fhflags & PUFFS_FHFLAG_DYNAMIC) {
                        if (pmp->pmp_args.pa_fhsize < fhlen)
                                return EINVAL;
                } else {
                        if (pmp->pmp_args.pa_fhsize != fhlen)
                                return EINVAL;
                }
        }

        argsize = sizeof(struct puffs_vfsmsg_fhtonode) + fhlen;
        puffs_msgmem_alloc(argsize, &park_fhtonode, (void *)&fhtonode_msg, 1);
        fhtonode_msg->pvfsr_dsize = fhlen;
        memcpy(fhtonode_msg->pvfsr_data, fhdata, fhlen);
        puffs_msg_setinfo(park_fhtonode, PUFFSOP_VFS, PUFFS_VFS_FHTOVP, NULL);

        PUFFS_MSG_ENQUEUEWAIT(pmp, park_fhtonode, error);
        error = checkerr(pmp, error, __func__);
        if (error)
                goto out;

        error = puffs_getvnode(mp, fhtonode_msg->pvfsr_fhcookie,
            fhtonode_msg->pvfsr_vtype, fhtonode_msg->pvfsr_size,
            fhtonode_msg->pvfsr_rdev, &vp);
        if (error)
                goto out;
        vn_lock(vp, lktype | LK_RETRY);

        *vpp = vp;
 out:
        puffs_msgmem_release(park_fhtonode);
        return error;
}

int
puffs_vfsop_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
{
        PUFFS_MSG_VARS(vfs, nodetofh);
        struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
        size_t argsize, fhlen;
        int error;

        if (pmp->pmp_args.pa_fhsize == 0)
                return EOPNOTSUPP;

        /* if file handles are static len, we can test len immediately */
        if (((pmp->pmp_args.pa_fhflags & PUFFS_FHFLAG_DYNAMIC) == 0)
            && ((pmp->pmp_args.pa_fhflags & PUFFS_FHFLAG_PASSTHROUGH) == 0)
            && (PUFFS_FROMFHSIZE(*fh_size) < pmp->pmp_args.pa_fhsize)) {
                *fh_size = PUFFS_TOFHSIZE(pmp->pmp_args.pa_fhsize);
                return E2BIG;
        }

        if (pmp->pmp_args.pa_fhflags & PUFFS_FHFLAG_PASSTHROUGH)
                fhlen = *fh_size;
        else
                fhlen = PUFFS_FROMFHSIZE(*fh_size);

        argsize = sizeof(struct puffs_vfsmsg_nodetofh) + fhlen;
        puffs_msgmem_alloc(argsize, &park_nodetofh, (void *)&nodetofh_msg, 1);
        nodetofh_msg->pvfsr_fhcookie = VPTOPNC(vp);
        nodetofh_msg->pvfsr_dsize = fhlen;
        puffs_msg_setinfo(park_nodetofh, PUFFSOP_VFS, PUFFS_VFS_VPTOFH, NULL);

        PUFFS_MSG_ENQUEUEWAIT(pmp, park_nodetofh, error);
        error = checkerr(pmp, error, __func__);

        if (pmp->pmp_args.pa_fhflags & PUFFS_FHFLAG_PASSTHROUGH)
                fhlen = nodetofh_msg->pvfsr_dsize;
        else if (pmp->pmp_args.pa_fhflags & PUFFS_FHFLAG_DYNAMIC)
                fhlen = PUFFS_TOFHSIZE(nodetofh_msg->pvfsr_dsize);
        else
                fhlen = PUFFS_TOFHSIZE(pmp->pmp_args.pa_fhsize);

        if (error) {
                if (error == E2BIG)
                        *fh_size = fhlen;
                goto out;
        }

        if (fhlen > FHANDLE_SIZE_MAX) {
                puffs_senderr(pmp, PUFFS_ERR_VPTOFH, E2BIG,
                    "file handle too big", VPTOPNC(vp));
                error = EPROTO;
                goto out;
        }

        if (*fh_size < fhlen) {
                *fh_size = fhlen;
                error = E2BIG;
                goto out;
        }
        *fh_size = fhlen;

        if (fhp) {
                if (pmp->pmp_args.pa_fhflags & PUFFS_FHFLAG_PASSTHROUGH) {
                        memcpy(fhp, nodetofh_msg->pvfsr_data, fhlen);
                } else {
                        fhp->fid_len = *fh_size;
                        memcpy(fhp->fid_data, nodetofh_msg->pvfsr_data,
                            nodetofh_msg->pvfsr_dsize);
                }
        }

 out:
        puffs_msgmem_release(park_nodetofh);
        return error;
}

int
puffs_vfsop_loadvnode(struct mount *mp, struct vnode *vp,
    const void *key, size_t key_len, const void **new_key)
{
        struct puffs_mount *pmp;
        struct puffs_node *pnode;

        KASSERT(key_len == sizeof(puffs_cookie_t));

        pmp = MPTOPUFFSMP(mp);

        /* Allocate and initialize the pnode. */
        pnode = pool_get(&puffs_pnpool, PR_WAITOK);
        memset(pnode, 0, sizeof(struct puffs_node));

        pnode->pn_vp = vp;
        memcpy(&pnode->pn_cookie, key, key_len);
        pnode->pn_refcount = 1;
        mutex_init(&pnode->pn_mtx, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&pnode->pn_sizemtx, MUTEX_DEFAULT, IPL_NONE);
        selinit(&pnode->pn_sel);
        vp->v_tag = VT_PUFFS;
        vp->v_type = VNON;
        vp->v_op = puffs_vnodeop_p;
        if (pnode->pn_cookie == pmp->pmp_root_cookie)
                vp->v_vflag |= VV_ROOT;
        vp->v_data = pnode;

        genfs_node_init(vp, &puffs_genfsops);
        uvm_vnp_setsize(vp, 0);

        *new_key = &pnode->pn_cookie;
        return 0;
}

void
puffs_vfsop_init(void)
{

        /* some checks depend on this */
        KASSERT(VNOVAL == VSIZENOTSET);

        pool_init(&puffs_pnpool, sizeof(struct puffs_node), 0, 0, 0,
            "puffpnpl", &pool_allocator_nointr, IPL_NONE);
        pool_init(&puffs_vapool, sizeof(struct vattr), 0, 0, 0,
            "puffvapl", &pool_allocator_nointr, IPL_NONE);
        puffs_msgif_init();
}

void
puffs_vfsop_done(void)
{

        puffs_msgif_destroy();
        pool_destroy(&puffs_pnpool);
        pool_destroy(&puffs_vapool);
}

int
puffs_vfsop_snapshot(struct mount *mp, struct vnode *vp, struct timespec *ts)
{

        return EOPNOTSUPP;
}

int
puffs_vfsop_extattrctl(struct mount *mp, int cmd, struct vnode *vp,
        int attrnamespace, const char *attrname)
{
        PUFFS_MSG_VARS(vfs, extattrctl);
        struct puffs_mount *pmp = MPTOPUFFSMP(mp);
        struct puffs_node *pnp;
        puffs_cookie_t pnc;
        int error, flags;

        if (vp) {
                /* doesn't make sense for puffs servers */
                if (vp->v_mount != mp)
                        return EXDEV;
                pnp = vp->v_data;
                pnc = pnp->pn_cookie;
                flags = PUFFS_EXTATTRCTL_HASNODE;
        } else {
                pnp = pnc = NULL;
                flags = 0;
        }

        PUFFS_MSG_ALLOC(vfs, extattrctl);
        extattrctl_msg->pvfsr_cmd = cmd;
        extattrctl_msg->pvfsr_attrnamespace = attrnamespace;
        extattrctl_msg->pvfsr_flags = flags;
        if (attrname) {
                strlcpy(extattrctl_msg->pvfsr_attrname, attrname,
                    sizeof(extattrctl_msg->pvfsr_attrname));
                extattrctl_msg->pvfsr_flags |= PUFFS_EXTATTRCTL_HASATTRNAME;
        }
        puffs_msg_setinfo(park_extattrctl,
            PUFFSOP_VFS, PUFFS_VFS_EXTATTRCTL, pnc);

        puffs_msg_enqueue(pmp, park_extattrctl);
        if (vp) {
                mutex_enter(&pnp->pn_mtx);
                puffs_referencenode(pnp);
                mutex_exit(&pnp->pn_mtx);
                VOP_UNLOCK(vp);
        }
        error = puffs_msg_wait2(pmp, park_extattrctl, pnp, NULL);
        PUFFS_MSG_RELEASE(extattrctl);
        if (vp) {
                puffs_releasenode(pnp);
        }

        return checkerr(pmp, error, __func__);
}

const struct vnodeopv_desc * const puffs_vnodeopv_descs[] = {
        &puffs_vnodeop_opv_desc,
        &puffs_specop_opv_desc,
        &puffs_fifoop_opv_desc,
        &puffs_msgop_opv_desc,
        NULL,
};

struct vfsops puffs_vfsops = {
        .vfs_name = MOUNT_PUFFS,
        .vfs_min_mount_data = sizeof (struct puffs_kargs),
        .vfs_mount = puffs_vfsop_mount,
        .vfs_start = puffs_vfsop_start,
        .vfs_unmount = puffs_vfsop_unmount,
        .vfs_root = puffs_vfsop_root,
        .vfs_quotactl = (void *)eopnotsupp,
        .vfs_statvfs = puffs_vfsop_statvfs,
        .vfs_sync = puffs_vfsop_sync,
        .vfs_vget = (void *)eopnotsupp,
        .vfs_loadvnode = puffs_vfsop_loadvnode,
        .vfs_fhtovp = puffs_vfsop_fhtovp,
        .vfs_vptofh = puffs_vfsop_vptofh,
        .vfs_init = puffs_vfsop_init,
        .vfs_done = puffs_vfsop_done,
        .vfs_snapshot = puffs_vfsop_snapshot,
        .vfs_extattrctl = puffs_vfsop_extattrctl,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_renamelock_enter = genfs_renamelock_enter,
        .vfs_renamelock_exit = genfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = puffs_vnodeopv_descs
};

static int
puffs_modcmd(modcmd_t cmd, void *arg)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                return vfs_attach(&puffs_vfsops);
        case MODULE_CMD_FINI:
                return vfs_detach(&puffs_vfsops);
        default:
                return ENOTTY;
        }
}

















































































































































































































































































































































































































































   21 


   21 













































































   29 
   29 


   19 
































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
/* $NetBSD: tcp_sack.c,v 1.36 2018/05/18 18:58:51 maxv Exp $ */

/*
 * Copyright (c) 2005 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Kentaro A. Kurahone.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)tcp_sack.c        8.12 (Berkeley) 5/24/95
 * $FreeBSD: src/sys/netinet/tcp_sack.c,v 1.3.2.2 2004/12/25 23:02:57 rwatson Exp $
 */

/*
 *        @@(#)COPYRIGHT        1.1 (NRL) 17 January 1995
 *
 * NRL grants permission for redistribution and use in source and binary
 * forms, with or without modification, of the software and documentation
 * created at NRL provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgements:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 *        This product includes software developed at the Information
 *        Technology Division, US Naval Research Laboratory.
 * 4. Neither the name of the NRL nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * The views and conclusions contained in the software and documentation
 * are those of the authors and should not be interpreted as representing
 * official policies, either expressed or implied, of the US Naval
 * Research Laboratory (NRL).
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tcp_sack.c,v 1.36 2018/05/18 18:58:51 maxv Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_inet_csum.h"
#include "opt_tcp_debug.h"
#include "opt_ddb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/errno.h>
#include <sys/syslog.h>
#include <sys/pool.h>
#include <sys/domain.h>
#include <sys/kernel.h>

#include <net/if.h>
#include <net/route.h>
#include <net/if_types.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>

#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_var.h>
#include <netinet/icmp6.h>
#endif

#ifndef INET6
#include <netinet/ip6.h>
#endif

#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_debug.h>

/* SACK block pool. */
static struct pool sackhole_pool;

void
tcp_sack_init(void)
{

        pool_init(&sackhole_pool, sizeof(struct sackhole), 0, 0, 0,
            "sackholepl", NULL, IPL_SOFTNET);
}

static struct sackhole *
sack_allochole(struct tcpcb *tp)
{
        struct sackhole *hole;

        if (tp->snd_numholes >= tcp_sack_tp_maxholes ||
            tcp_sack_globalholes >= tcp_sack_globalmaxholes) {
                return NULL;
        }
        hole = pool_get(&sackhole_pool, PR_NOWAIT);
        if (hole == NULL) {
                return NULL;
        }
        tp->snd_numholes++;
        tcp_sack_globalholes++;

        return hole;
}

static struct sackhole *
sack_inserthole(struct tcpcb *tp, tcp_seq start, tcp_seq end,
    struct sackhole *prev)
{
        struct sackhole *hole;

        hole = sack_allochole(tp);
        if (hole == NULL) {
                return NULL;
        }
        hole->start = hole->rxmit = start;
        hole->end = end;
        if (prev != NULL) {
                TAILQ_INSERT_AFTER(&tp->snd_holes, prev, hole, sackhole_q);
        } else {
                TAILQ_INSERT_TAIL(&tp->snd_holes, hole, sackhole_q);
        }
        return hole;
}

static struct sackhole *
sack_removehole(struct tcpcb *tp, struct sackhole *hole)
{
        struct sackhole *next;

        next = TAILQ_NEXT(hole, sackhole_q);
        tp->snd_numholes--;
        tcp_sack_globalholes--;
        TAILQ_REMOVE(&tp->snd_holes, hole, sackhole_q);
        pool_put(&sackhole_pool, hole);

        return next;
}

/*
 * tcp_new_dsack: record the reception of a duplicated segment.
 */

void
tcp_new_dsack(struct tcpcb *tp, tcp_seq seq, u_int32_t len)
{

        if (TCP_SACK_ENABLED(tp)) {
                tp->rcv_dsack_block.left = seq;
                tp->rcv_dsack_block.right = seq + len;
                tp->rcv_sack_flags |= TCPSACK_HAVED;
        }
}

/*
 * tcp_sack_option: parse the given SACK option and update the scoreboard.
 */

void
tcp_sack_option(struct tcpcb *tp, const struct tcphdr *th, const u_char *cp,
    int optlen)
{
        struct sackblk
            t_sack_block[(MAX_TCPOPTLEN - 2) / (sizeof(u_int32_t) * 2)];
        struct sackblk *sack = NULL;
        struct sackhole *cur = NULL;
        struct sackhole *tmp = NULL;
        const char *lp = cp + 2;
        int i, j, num_sack_blks;
        tcp_seq left, right, acked;

        /*
         * If we aren't processing SACK responses, this is not an ACK
         * or the peer sends us a sack option with invalid length, don't
         * update the scoreboard.
         */
        if (!TCP_SACK_ENABLED(tp) || ((th->th_flags & TH_ACK) == 0) ||
                        (optlen % 8 != 2 || optlen < 10)) {
                return;
        }

        /*
         * If we don't want any SACK holes to be allocated, just return.
         */
        if (tcp_sack_globalmaxholes == 0 || tcp_sack_tp_maxholes == 0) {
                return;
        }

        /* If the ACK is outside [snd_una, snd_max], ignore the SACK options. */
        if (SEQ_LT(th->th_ack, tp->snd_una) || SEQ_GT(th->th_ack, tp->snd_max))
                return;

        /*
         * Extract SACK blocks.
         *
         * Note that t_sack_block is sorted so that we only need to do
         * one pass over the sequence number space. (SACK "fast-path")
         */
        num_sack_blks = optlen / 8;
        acked = (SEQ_GT(th->th_ack, tp->snd_una)) ? th->th_ack : tp->snd_una;
        for (i = 0; i < num_sack_blks; i++, lp += sizeof(uint32_t) * 2) {
                memcpy(&left, lp, sizeof(uint32_t));
                memcpy(&right, lp + sizeof(uint32_t), sizeof(uint32_t));
                left = ntohl(left);
                right = ntohl(right);

                if (SEQ_LEQ(right, acked) || SEQ_GT(right, tp->snd_max) ||
                    SEQ_GEQ(left, right)) {
                        /* SACK entry that's old, or invalid. */
                        i--;
                        num_sack_blks--;
                        continue;
                }

                /* Insertion sort. */
                for (j = i; (j > 0) && SEQ_LT(left, t_sack_block[j - 1].left);
                    j--) {
                        t_sack_block[j].left = t_sack_block[j - 1].left;
                        t_sack_block[j].right = t_sack_block[j - 1].right;
                }
                t_sack_block[j].left = left;
                t_sack_block[j].right = right;
        }

        /* Update the scoreboard. */
        cur = TAILQ_FIRST(&tp->snd_holes);
        for (i = 0; i < num_sack_blks; i++) {
                sack = &t_sack_block[i];
                /*
                 * FACK TCP.  Update snd_fack so we can enter Fast
                 * Recovery early.
                 */
                if (SEQ_GEQ(sack->right, tp->snd_fack))
                        tp->snd_fack = sack->right;

                if (TAILQ_EMPTY(&tp->snd_holes)) {
                        /* First hole. */
                        cur = sack_inserthole(tp, th->th_ack, sack->left, NULL);
                        if (cur == NULL) {
                                /* ENOBUFS, bail out*/
                                return;
                        }
                        tp->rcv_lastsack = sack->right;
                        continue; /* With next sack block */
                }

                /* Go through the list of holes. */
                while (cur) {
                        if (SEQ_LEQ(sack->right, cur->start))
                                /* SACKs data before the current hole */
                                break; /* No use going through more holes */

                        if (SEQ_GEQ(sack->left, cur->end)) {
                                /* SACKs data beyond the current hole */
                                cur = TAILQ_NEXT(cur, sackhole_q);
                                continue;
                        }

                        if (SEQ_LEQ(sack->left, cur->start)) {
                                /* Data acks at least the beginning of hole */
                                if (SEQ_GEQ(sack->right, cur->end)) {
                                        /* Acks entire hole, so delete hole */
                                        cur = sack_removehole(tp, cur);
                                        break;
                                }

                                /* Otherwise, move start of hole forward */
                                cur->start = sack->right;
                                cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
                                break;
                        }

                        if (SEQ_GEQ(sack->right, cur->end)) {
                                /* Move end of hole backward. */
                                cur->end = sack->left;
                                cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
                                cur = TAILQ_NEXT(cur, sackhole_q);
                                break;
                        }

                        if (SEQ_LT(cur->start, sack->left) &&
                            SEQ_GT(cur->end, sack->right)) {
                                /*
                                 * ACKs some data in middle of a hole; need to
                                 * split current hole
                                 */
                                tmp = sack_inserthole(tp, sack->right, cur->end,
                                    cur);
                                if (tmp == NULL) {
                                        return;
                                }
                                tmp->rxmit = SEQ_MAX(cur->rxmit, tmp->start);
                                cur->end = sack->left;
                                cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
                                cur = tmp;
                                break;
                        }
                }

                /* At this point, we have reached the tail of the list. */
                if (SEQ_LT(tp->rcv_lastsack, sack->left)) {
                        /*
                         * Need to append new hole at end.
                         */
                        cur = sack_inserthole(tp, tp->rcv_lastsack, sack->left,
                            NULL);
                        if (cur == NULL) {
                                return;
                        }
                }
                if (SEQ_LT(tp->rcv_lastsack, sack->right)) {
                        tp->rcv_lastsack = sack->right;
                }
        }
}

/*
 * tcp_del_sackholes: remove holes covered by a cumulative ACK.
 */

void
tcp_del_sackholes(struct tcpcb *tp, const struct tcphdr *th)
{
        /* Max because this could be an older ack that just arrived. */
        tcp_seq lastack = SEQ_GT(th->th_ack, tp->snd_una) ?
                th->th_ack : tp->snd_una;
        struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);

        while (cur) {
                if (SEQ_LEQ(cur->end, lastack)) {
                        cur = sack_removehole(tp, cur);
                } else if (SEQ_LT(cur->start, lastack)) {
                        cur->start = lastack;
                        if (SEQ_LT(cur->rxmit, cur->start))
                                cur->rxmit = cur->start;
                        break;
                } else
                        break;
        }
}

/*
 * tcp_free_sackholes: clear the scoreboard.
 */

void
tcp_free_sackholes(struct tcpcb *tp)
{
        struct sackhole *sack;

        /* Free up the SACK hole list. */
        while ((sack = TAILQ_FIRST(&tp->snd_holes)) != NULL) {
                sack_removehole(tp, sack);
        }
        KASSERT(tp->snd_numholes == 0);
}

/*
 * Returns pointer to a sackhole if there are any pending retransmissions;
 * NULL otherwise.
 */
struct sackhole *
tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
{
        struct sackhole *cur = NULL;

        if (!TCP_SACK_ENABLED(tp))
                return (NULL);

        *sack_bytes_rexmt = 0;
        TAILQ_FOREACH(cur, &tp->snd_holes, sackhole_q) {
                if (SEQ_LT(cur->rxmit, cur->end)) {
                        if (SEQ_LT(cur->rxmit, tp->snd_una)) {
                                /* old SACK hole */
                                continue;
                        }
                        *sack_bytes_rexmt += (cur->rxmit - cur->start);
                        break;
                }
                *sack_bytes_rexmt += (cur->rxmit - cur->start);
        }

        return (cur);
}

/*
 * After a timeout, the SACK list may be rebuilt.  This SACK information
 * should be used to avoid retransmitting SACKed data.  This function
 * traverses the SACK list to see if snd_nxt should be moved forward.
 */
void
tcp_sack_adjust(struct tcpcb *tp)
{
        struct sackhole *cur = TAILQ_FIRST(&tp->snd_holes);
        struct sackhole *n = NULL;

        if (TAILQ_EMPTY(&tp->snd_holes))
                return; /* No holes */
        if (SEQ_GEQ(tp->snd_nxt, tp->rcv_lastsack))
                return; /* We're already beyond any SACKed blocks */

        /*
         * Two cases for which we want to advance snd_nxt:
         * i) snd_nxt lies between end of one hole and beginning of another
         * ii) snd_nxt lies between end of last hole and rcv_lastsack
         */
        while ((n = TAILQ_NEXT(cur, sackhole_q)) != NULL) {
                if (SEQ_LT(tp->snd_nxt, cur->end))
                        return;
                if (SEQ_GEQ(tp->snd_nxt, n->start))
                        cur = n;
                else {
                        tp->snd_nxt = n->start;
                        return;
                }
        }
        if (SEQ_LT(tp->snd_nxt, cur->end))
                return;
        tp->snd_nxt = tp->rcv_lastsack;

        return;
}

/*
 * tcp_sack_numblks: return the number of SACK blocks to send.
 */

int
tcp_sack_numblks(const struct tcpcb *tp)
{
        int numblks;

        if (!TCP_SACK_ENABLED(tp)) {
                return 0;
        }

        numblks = (((tp->rcv_sack_flags & TCPSACK_HAVED) != 0) ? 1 : 0) +
            tp->t_segqlen;

        if (numblks == 0) {
                return 0;
        }

        if (numblks > TCP_SACK_MAX) {
                numblks = TCP_SACK_MAX;
        }

        return numblks;
}

#if defined(DDB)
void sack_dump(const struct tcpcb *);

void
sack_dump(const struct tcpcb *tp)
{
        const struct sackhole *cur;

        printf("snd_una=%" PRIu32 ", snd_max=%" PRIu32 "\n",
            tp->snd_una, tp->snd_max);
        printf("rcv_lastsack=%" PRIu32 ", snd_fack=%" PRIu32 "\n",
            tp->rcv_lastsack, tp->snd_fack);
        printf("numholes=%d\n", tp->snd_numholes);
        TAILQ_FOREACH(cur, &tp->snd_holes, sackhole_q) {
                printf("\t%" PRIu32 "-%" PRIu32 ", rxmit=%" PRIu32 "\n",
                    cur->start, cur->end, cur->rxmit);
        }
}
#endif /* defined(DDB) */


































































































































  146 













    1 










    1 
    1 











    1 








    1 

    1 









    1 














































































































    1 


















































































































  145 








  144 
  133 
  144 









  139 



  135 

  138 


  139 









  137 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
/*        $NetBSD: procfs_subr.c,v 1.116 2020/05/23 23:42:43 ad Exp $        */

/*-
 * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)procfs_subr.c        8.6 (Berkeley) 5/14/95
 */

/*
 * Copyright (c) 1994 Christopher G. Demetriou.  All rights reserved.
 * Copyright (c) 1993 Jan-Simon Pendry
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)procfs_subr.c        8.6 (Berkeley) 5/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: procfs_subr.c,v 1.116 2020/05/23 23:42:43 ad Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/fstrans.h>
#include <sys/vnode.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/kauth.h>
#include <sys/sysctl.h>

#include <miscfs/procfs/procfs.h>

/*
 * Allocate a pfsnode/vnode pair.  The vnode is referenced.
 * The pid, type, and file descriptor uniquely identify a pfsnode.
 */
int
procfs_allocvp(struct mount *mp, struct vnode **vpp, pid_t pid,
    pfstype type, int fd)
{
        struct pfskey key;

        memset(&key, 0, sizeof(key));
        key.pk_type = type;
        key.pk_pid = pid;
        key.pk_fd = fd;

        return vcache_get(mp, &key, sizeof(key), vpp);
}

int
procfs_rw(void *v)
{
        struct vop_read_args *ap = v;
        struct vnode *vp = ap->a_vp;
        struct uio *uio = ap->a_uio;
        struct lwp *curl;
        struct lwp *l;
        struct pfsnode *pfs = VTOPFS(vp);
        struct proc *p;
        int error;

        if (uio->uio_offset < 0)
                return EINVAL;

        if ((error =
             procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, ESRCH)) != 0)
                return error;

        curl = curlwp;

        /*
         * Do not allow init to be modified while in secure mode; it
         * could be duped into changing the security level.
         */
#define        M2K(m)        ((m) == UIO_READ ? KAUTH_REQ_PROCESS_PROCFS_READ : \
                 KAUTH_REQ_PROCESS_PROCFS_WRITE)
        mutex_enter(p->p_lock);
        error = kauth_authorize_process(curl->l_cred, KAUTH_PROCESS_PROCFS,
            p, pfs, KAUTH_ARG(M2K(uio->uio_rw)), NULL);
        mutex_exit(p->p_lock);
        if (error) {
                procfs_proc_unlock(p);
                return (error);
        }
#undef        M2K

        mutex_enter(p->p_lock);
        LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                if (l->l_stat != LSZOMB)
                        break;
        }
        /* Process is exiting if no-LWPS or all LWPs are LSZOMB */
        if (l == NULL) {
                mutex_exit(p->p_lock);
                procfs_proc_unlock(p);
                return ESRCH;
        }

        lwp_addref(l);
        mutex_exit(p->p_lock);

        switch (pfs->pfs_type) {
        case PFSnote:
        case PFSnotepg:
                error = procfs_donote(curl, p, pfs, uio);
                break;

        case PFSregs:
                error = procfs_doregs(curl, l, pfs, uio);
                break;

        case PFSfpregs:
                error = procfs_dofpregs(curl, l, pfs, uio);
                break;

        case PFSstatus:
                error = procfs_dostatus(curl, l, pfs, uio);
                break;

        case PFSstat:
                error = procfs_do_pid_stat(curl, l, pfs, uio);
                break;

        case PFSlimit:
                error = procfs_dolimit(curl, p, pfs, uio);
                break;

        case PFSmap:
                error = procfs_domap(curl, p, pfs, uio, 0);
                break;

        case PFSmaps:
                error = procfs_domap(curl, p, pfs, uio, 1);
                break;

        case PFSmem:
                error = procfs_domem(curl, l, pfs, uio);
                break;

        case PFScmdline:
                error = procfs_doprocargs(curl, p, pfs, uio, KERN_PROC_ARGV);
                break;

        case PFSenviron:
                error = procfs_doprocargs(curl, p, pfs, uio, KERN_PROC_ENV);
                break;

        case PFSmeminfo:
                error = procfs_domeminfo(curl, p, pfs, uio);
                break;

        case PFSdevices:
                error = procfs_dodevices(curl, p, pfs, uio);
                break;

        case PFScpuinfo:
                error = procfs_docpuinfo(curl, p, pfs, uio);
                break;

        case PFScpustat:
                error = procfs_docpustat(curl, p, pfs, uio);
                break;

        case PFSloadavg:
                error = procfs_doloadavg(curl, p, pfs, uio);
                break;

        case PFSstatm:
                error = procfs_do_pid_statm(curl, l, pfs, uio);
                break;

        case PFSfd:
                error = procfs_dofd(curl, p, pfs, uio);
                break;

        case PFSuptime:
                error = procfs_douptime(curl, p, pfs, uio);
                break;

        case PFSmounts:
                error = procfs_domounts(curl, p, pfs, uio);
                break;

        case PFSemul:
                error = procfs_doemul(curl, p, pfs, uio);
                break;

        case PFSversion:
                error = procfs_doversion(curl, p, pfs, uio);
                break;

        case PFSauxv:
                error = procfs_doauxv(curl, p, pfs, uio);
                break;

#ifdef __HAVE_PROCFS_MACHDEP
        PROCFS_MACHDEP_NODETYPE_CASES
                error = procfs_machdep_rw(curl, l, pfs, uio);
                break;
#endif

        default:
                error = EOPNOTSUPP;
                break;
        }

        /*
         * Release the references that we acquired earlier.
         */
        lwp_delref(l);
        procfs_proc_unlock(p);

        return (error);
}

/*
 * Get a string from userland into (bf).  Strip a trailing
 * nl character (to allow easy access from the shell).
 * The buffer should be *buflenp + 1 chars long.  vfs_getuserstr
 * will automatically add a nul char at the end.
 *
 * Returns 0 on success or the following errors
 *
 * EINVAL:    file offset is non-zero.
 * EMSGSIZE:  message is longer than kernel buffer
 * EFAULT:    user i/o buffer is not addressable
 */
int
vfs_getuserstr(struct uio *uio, char *bf, int *buflenp)
{
        size_t xlen;
        int error;

        if (uio->uio_offset != 0)
                return (EINVAL);

        xlen = *buflenp;

        /* must be able to read the whole string in one go */
        if (xlen < uio->uio_resid)
                return (EMSGSIZE);
        xlen = uio->uio_resid;

        if ((error = uiomove(bf, xlen, uio)) != 0)
                return (error);

        /* allow multiple writes without seeks */
        uio->uio_offset = 0;

        /* cleanup string and remove trailing newline */
        bf[xlen] = '\0';
        xlen = strlen(bf);
        if (xlen > 0 && bf[xlen-1] == '\n')
                bf[--xlen] = '\0';
        *buflenp = xlen;

        return (0);
}

const vfs_namemap_t *
vfs_findname(const vfs_namemap_t *nm, const char *bf, int buflen)
{

        for (; nm->nm_name; nm++)
                if (memcmp(bf, nm->nm_name, buflen+1) == 0)
                        return (nm);

        return (0);
}

static bool
procfs_revoke_selector(void *arg, struct vnode *vp)
{
        struct proc *p = arg;
        struct pfsnode *pfs;

        KASSERT(mutex_owned(vp->v_interlock));

        pfs = VTOPFS(vp);

        return (pfs != NULL && pfs->pfs_pid == p->p_pid);
}

void
procfs_revoke_vnodes(struct proc *p, void *arg)
{
        int error;
        bool suspended;
        struct vnode *vp;
        struct vnode_iterator *marker;
        struct mount *mp = (struct mount *)arg;

        if (!(p->p_flag & PK_SUGID))
                return;

        suspended = false;
        vfs_vnode_iterator_init(mp, &marker);

        while ((vp = vfs_vnode_iterator_next(marker,
            procfs_revoke_selector, p)) != NULL) {
                if (vrecycle(vp))
                        continue;
                /* Vnode is busy, we have to suspend the mount for vgone(). */
                while (! suspended) {
                        error = vfs_suspend(mp, 0);
                        if (error == 0) {
                                suspended = true;
                        } else if (error != EINTR && error != ERESTART) {
                                KASSERT(error == EOPNOTSUPP);
                                break;
                        }
                }
                vgone(vp);
        }

        if (suspended)
                vfs_resume(mp);

        vfs_vnode_iterator_destroy(marker);
}

bool
procfs_use_linux_compat(struct mount *mp)
{
        const int flags = VFSTOPROC(mp)->pmnt_flags;

        return (flags & PROCFSMNT_LINUXCOMPAT) ? true : false;
}

struct proc *
procfs_proc_find(struct mount *mp, pid_t pid)
{

        KASSERT(mutex_owned(&proc_lock));
        return procfs_use_linux_compat(mp) ? proc_find_lwpid(pid)
                                           : proc_find(pid);
}

int
procfs_proc_lock(struct mount *mp, int pid, struct proc **bunghole,
                 int notfound)
{
        struct proc *tp;
        int error = 0;

        mutex_enter(&proc_lock);

        if (pid == 0)
                tp = &proc0;
        else if ((tp = procfs_proc_find(mp, pid)) == NULL)
                error = notfound;
        if (tp != NULL && !rw_tryenter(&tp->p_reflock, RW_READER))
                error = EBUSY;

        mutex_exit(&proc_lock);

        *bunghole = tp;
        return error;
}

void
procfs_proc_unlock(struct proc *p)
{

        rw_exit(&p->p_reflock);
}

int
procfs_doemul(struct lwp *curl, struct proc *p,
    struct pfsnode *pfs, struct uio *uio)
{
        const char *ename = p->p_emul->e_name;
        return uiomove_frombuf(__UNCONST(ename), strlen(ename), uio);
}













































































































































































































































































































































































































































































































































































































































































































































































































































    2 
   19 


    2 
    2 







   38 





   42 


  229 











































































    6 


    6 

















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
/*        $NetBSD: mbuf.h,v 1.232 2021/02/19 14:51:59 christos Exp $        */

/*
 * Copyright (c) 1996, 1997, 1999, 2001, 2007 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center and Matt Thomas of 3am Software Foundry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)mbuf.h        8.5 (Berkeley) 2/19/95
 */

#ifndef _SYS_MBUF_H_
#define _SYS_MBUF_H_

#ifdef _KERNEL_OPT
#include "opt_mbuftrace.h"
#endif

#ifndef M_WAITOK
#include <sys/malloc.h>
#endif
#include <sys/pool.h>
#include <sys/queue.h>
#if defined(_KERNEL)
#include <sys/percpu_types.h>
#include <sys/socket.h>        /* for AF_UNSPEC */
#include <sys/psref.h>
#endif /* defined(_KERNEL) */

/* For offsetof() */
#if defined(_KERNEL) || defined(_STANDALONE)
#include <sys/systm.h>
#else
#include <stddef.h>
#endif

#include <uvm/uvm_param.h>        /* for MIN_PAGE_SIZE */

#include <net/if.h>

/*
 * Mbufs are of a single size, MSIZE (machine/param.h), which
 * includes overhead.  An mbuf may add a single "mbuf cluster" of size
 * MCLBYTES (also in machine/param.h), which has no additional overhead
 * and is used instead of the internal data area; this is done when
 * at least MINCLSIZE of data must be stored.
 */

/* Packet tags structure */
struct m_tag {
        SLIST_ENTRY(m_tag)        m_tag_link;        /* List of packet tags */
        uint16_t                m_tag_id;        /* Tag ID */
        uint16_t                m_tag_len;        /* Length of data */
};

/* mbuf ownership structure */
struct mowner {
        char mo_name[16];                /* owner name (fxp0) */
        char mo_descr[16];                /* owner description (input) */
        LIST_ENTRY(mowner) mo_link;        /* */
        struct percpu *mo_counters;
};

#define MOWNER_INIT(x, y) { .mo_name = x, .mo_descr = y }

enum mowner_counter_index {
        MOWNER_COUNTER_CLAIMS,                /* # of small mbuf claimed */
        MOWNER_COUNTER_RELEASES,        /* # of small mbuf released */
        MOWNER_COUNTER_CLUSTER_CLAIMS,        /* # of cluster mbuf claimed */
        MOWNER_COUNTER_CLUSTER_RELEASES,/* # of cluster mbuf released */
        MOWNER_COUNTER_EXT_CLAIMS,        /* # of M_EXT mbuf claimed */
        MOWNER_COUNTER_EXT_RELEASES,        /* # of M_EXT mbuf released */

        MOWNER_COUNTER_NCOUNTERS,
};

#if defined(_KERNEL)
struct mowner_counter {
        u_long mc_counter[MOWNER_COUNTER_NCOUNTERS];
};
#endif

/* userland-exported version of struct mowner */
struct mowner_user {
        char mo_name[16];                /* owner name (fxp0) */
        char mo_descr[16];                /* owner description (input) */
        LIST_ENTRY(mowner) mo_link;        /* unused padding; for compatibility */
        u_long mo_counter[MOWNER_COUNTER_NCOUNTERS]; /* counters */
};

/*
 * Macros for type conversion
 * mtod(m,t) -        convert mbuf pointer to data pointer of correct type
 */
#define mtod(m, t)        ((t)((m)->m_data))

/* header at beginning of each mbuf */
struct m_hdr {
        struct        mbuf *mh_next;                /* next buffer in chain */
        struct        mbuf *mh_nextpkt;        /* next chain in queue/record */
        char        *mh_data;                /* location of data */
        struct        mowner *mh_owner;        /* mbuf owner */
        int        mh_len;                        /* amount of data in this mbuf */
        int        mh_flags;                /* flags; see below */
        paddr_t        mh_paddr;                /* physical address of mbuf */
        short        mh_type;                /* type of data in this mbuf */
};

/*
 * record/packet header in first mbuf of chain; valid if M_PKTHDR set
 *
 * A note about csum_data:
 *
 *  o For the out-bound direction, the low 16 bits indicates the offset after
 *    the L4 header where the final L4 checksum value is to be stored and the
 *    high 16 bits is the length of the L3 header (the start of the data to
 *    be checksummed).
 *
 *  o For the in-bound direction, it is only valid if the M_CSUM_DATA flag is
 *    set. In this case, an L4 checksum has been calculated by hardware and
 *    is stored in csum_data, but it is up to software to perform final
 *    verification.
 *
 * Note for in-bound TCP/UDP checksums: we expect the csum_data to NOT
 * be bit-wise inverted (the final step in the calculation of an IP
 * checksum) -- this is so we can accumulate the checksum for fragmented
 * packets during reassembly.
 *
 * Size ILP32: 40
 *       LP64: 56
 */
struct pkthdr {
        union {
                void                *ctx;                /* for M_GETCTX/M_SETCTX */
                if_index_t        index;                /* rcv interface index */
        } _rcvif;
#define rcvif_index                _rcvif.index
        SLIST_HEAD(packet_tags, m_tag) tags;        /* list of packet tags */
        int                len;                        /* total packet length */
        int                csum_flags;                /* checksum flags */
        uint32_t        csum_data;                /* checksum data */
        u_int                segsz;                        /* segment size */
        uint16_t        ether_vtag;                /* ethernet 802.1p+q vlan tag */
        uint16_t        pkthdr_flags;                /* flags for pkthdr, see blow */
#define PKTHDR_FLAG_IPSEC_SKIP_PFIL        0x0001        /* skip pfil_run_hooks() after ipsec decrypt */

        /*
         * Following three fields are open-coded struct altq_pktattr
         * to rearrange struct pkthdr fields flexibly.
         */
        int        pattr_af;                /* ALTQ: address family */
        void        *pattr_class;                /* ALTQ: sched class set by classifier */
        void        *pattr_hdr;                /* ALTQ: saved header position in mbuf */
};

/* Checksumming flags (csum_flags). */
#define M_CSUM_TCPv4                0x00000001        /* TCP header/payload */
#define M_CSUM_UDPv4                0x00000002        /* UDP header/payload */
#define M_CSUM_TCP_UDP_BAD        0x00000004        /* TCP/UDP checksum bad */
#define M_CSUM_DATA                0x00000008        /* consult csum_data */
#define M_CSUM_TCPv6                0x00000010        /* IPv6 TCP header/payload */
#define M_CSUM_UDPv6                0x00000020        /* IPv6 UDP header/payload */
#define M_CSUM_IPv4                0x00000040        /* IPv4 header */
#define M_CSUM_IPv4_BAD                0x00000080        /* IPv4 header checksum bad */
#define M_CSUM_TSOv4                0x00000100        /* TCPv4 segmentation offload */
#define M_CSUM_TSOv6                0x00000200        /* TCPv6 segmentation offload */

/* Checksum-assist quirks: keep separate from jump-table bits. */
#define M_CSUM_BLANK                0x40000000        /* csum is missing */
#define M_CSUM_NO_PSEUDOHDR        0x80000000        /* Rx csum_data does not include
                                                 * the UDP/TCP pseudo-hdr, and
                                                 * is not yet 1s-complemented.
                                                 */

#define M_CSUM_BITS \
    "\20\1TCPv4\2UDPv4\3TCP_UDP_BAD\4DATA\5TCPv6\6UDPv6\7IPv4\10IPv4_BAD" \
    "\11TSOv4\12TSOv6\39BLANK\40NO_PSEUDOHDR"

/*
 * Macros for manipulating csum_data on outgoing packets. These are
 * used to pass information down from the L4/L3 to the L2.
 *
 *   _IPHL:   Length of the IPv{4/6} header, plus the options; in other
 *            words the offset of the UDP/TCP header in the packet.
 *   _OFFSET: Offset of the checksum field in the UDP/TCP header.
 */
#define M_CSUM_DATA_IPv4_IPHL(x)        ((x) >> 16)
#define M_CSUM_DATA_IPv4_OFFSET(x)        ((x) & 0xffff)
#define M_CSUM_DATA_IPv6_IPHL(x)        ((x) >> 16)
#define M_CSUM_DATA_IPv6_OFFSET(x)        ((x) & 0xffff)
#define M_CSUM_DATA_IPv6_SET(x, v)        (x) = ((x) & 0xffff) | ((v) << 16)

/*
 * Max # of pages we can attach to m_ext.  This is carefully chosen
 * to be able to handle SOSEND_LOAN_CHUNK with our minimum sized page.
 */
#ifdef MIN_PAGE_SIZE
#define M_EXT_MAXPAGES                ((65536 / MIN_PAGE_SIZE) + 1)
#endif

/*
 * Description of external storage mapped into mbuf, valid if M_EXT set.
 */
struct _m_ext_storage {
        unsigned int ext_refcnt;
        char *ext_buf;                        /* start of buffer */
        void (*ext_free)                /* free routine if not the usual */
                (struct mbuf *, void *, size_t, void *);
        void *ext_arg;                        /* argument for ext_free */
        size_t ext_size;                /* size of buffer, for ext_free */

        union {
                /* M_EXT_CLUSTER: physical address */
                paddr_t extun_paddr;
#ifdef M_EXT_MAXPAGES
                /* M_EXT_PAGES: pages */
                struct vm_page *extun_pgs[M_EXT_MAXPAGES];
#endif
        } ext_un;
#define ext_paddr        ext_un.extun_paddr
#define ext_pgs                ext_un.extun_pgs
};

struct _m_ext {
        struct mbuf *ext_ref;
        struct _m_ext_storage ext_storage;
};

#define M_PADDR_INVALID                POOL_PADDR_INVALID

/*
 * Definition of "struct mbuf".
 * Don't change this without understanding how MHLEN/MLEN are defined.
 */
#define MBUF_DEFINE(name, mhlen, mlen)                                        \
        struct name {                                                        \
                struct m_hdr m_hdr;                                        \
                union {                                                        \
                        struct {                                        \
                                struct pkthdr MH_pkthdr;                \
                                union {                                        \
                                        struct _m_ext MH_ext;                \
                                        char MH_databuf[(mhlen)];        \
                                } MH_dat;                                \
                        } MH;                                                \
                        char M_databuf[(mlen)];                                \
                } M_dat;                                                \
        }
#define m_next                m_hdr.mh_next
#define m_len                m_hdr.mh_len
#define m_data                m_hdr.mh_data
#define m_owner                m_hdr.mh_owner
#define m_type                m_hdr.mh_type
#define m_flags                m_hdr.mh_flags
#define m_nextpkt        m_hdr.mh_nextpkt
#define m_paddr                m_hdr.mh_paddr
#define m_pkthdr        M_dat.MH.MH_pkthdr
#define m_ext_storage        M_dat.MH.MH_dat.MH_ext.ext_storage
#define m_ext_ref        M_dat.MH.MH_dat.MH_ext.ext_ref
#define m_ext                m_ext_ref->m_ext_storage
#define m_pktdat        M_dat.MH.MH_dat.MH_databuf
#define m_dat                M_dat.M_databuf

/*
 * Dummy mbuf structure to calculate the right values for MLEN/MHLEN, taking
 * into account inter-structure padding.
 */
MBUF_DEFINE(_mbuf_dummy, 1, 1);

/* normal data len */
#define MLEN                ((int)(MSIZE - offsetof(struct _mbuf_dummy, m_dat)))
/* data len w/pkthdr */
#define MHLEN                ((int)(MSIZE - offsetof(struct _mbuf_dummy, m_pktdat)))

#define MINCLSIZE        (MHLEN+MLEN+1)        /* smallest amount to put in cluster */

/*
 * The *real* struct mbuf
 */
MBUF_DEFINE(mbuf, MHLEN, MLEN);

/* mbuf flags */
#define M_EXT                0x00000001        /* has associated external storage */
#define M_PKTHDR        0x00000002        /* start of record */
#define M_EOR                0x00000004        /* end of record */
#define M_PROTO1        0x00000008        /* protocol-specific */

/* mbuf pkthdr flags, also in m_flags */
#define M_AUTHIPHDR        0x00000010        /* authenticated (IPsec) */
#define M_DECRYPTED        0x00000020        /* decrypted (IPsec) */
#define M_LOOP                0x00000040        /* received on loopback */
#define M_BCAST                0x00000100        /* send/received as L2 broadcast */
#define M_MCAST                0x00000200        /* send/received as L2 multicast */
#define M_CANFASTFWD        0x00000400        /* packet can be fast-forwarded */
#define M_ANYCAST6        0x00000800        /* received as IPv6 anycast */

#define M_LINK0                0x00001000        /* link layer specific flag */
#define M_LINK1                0x00002000        /* link layer specific flag */
#define M_LINK2                0x00004000        /* link layer specific flag */
#define M_LINK3                0x00008000        /* link layer specific flag */
#define M_LINK4                0x00010000        /* link layer specific flag */
#define M_LINK5                0x00020000        /* link layer specific flag */
#define M_LINK6                0x00040000        /* link layer specific flag */
#define M_LINK7                0x00080000        /* link layer specific flag */

#define M_VLANTAG        0x00100000        /* ether_vtag is valid */

/* additional flags for M_EXT mbufs */
#define M_EXT_FLAGS        0xff000000
#define M_EXT_CLUSTER        0x01000000        /* ext is a cluster */
#define M_EXT_PAGES        0x02000000        /* ext_pgs is valid */
#define M_EXT_ROMAP        0x04000000        /* ext mapping is r-o at MMU */
#define M_EXT_RW        0x08000000        /* ext storage is writable */

/* for source-level compatibility */
#define M_NOTIFICATION        M_PROTO1

#define M_FLAGS_BITS \
    "\20\1EXT\2PKTHDR\3EOR\4PROTO1\5AUTHIPHDR\6DECRYPTED\7LOOP\10NONE" \
    "\11BCAST\12MCAST\13CANFASTFWD\14ANYCAST6\15LINK0\16LINK1\17LINK2\20LINK3" \
    "\21LINK4\22LINK5\23LINK6\24LINK7" \
    "\25VLANTAG" \
    "\31EXT_CLUSTER\32EXT_PAGES\33EXT_ROMAP\34EXT_RW"

/* flags copied when copying m_pkthdr */
#define M_COPYFLAGS        (M_PKTHDR|M_EOR|M_BCAST|M_MCAST|M_CANFASTFWD| \
    M_ANYCAST6|M_LINK0|M_LINK1|M_LINK2|M_AUTHIPHDR|M_DECRYPTED|M_LOOP| \
    M_VLANTAG)

/* flag copied when shallow-copying external storage */
#define M_EXTCOPYFLAGS        (M_EXT|M_EXT_FLAGS)

/* mbuf types */
#define MT_FREE                0        /* should be on free list */
#define MT_DATA                1        /* dynamic (data) allocation */
#define MT_HEADER        2        /* packet header */
#define MT_SONAME        3        /* socket name */
#define MT_SOOPTS        4        /* socket options */
#define MT_FTABLE        5        /* fragment reassembly header */
#define MT_CONTROL        6        /* extra-data protocol message */
#define MT_OOBDATA        7        /* expedited data  */

#ifdef MBUFTYPES
const char * const mbuftypes[] = {
        "mbfree",
        "mbdata",
        "mbheader",
        "mbsoname",
        "mbsopts",
        "mbftable",
        "mbcontrol",
        "mboobdata",
};
#else
extern const char * const mbuftypes[];
#endif

/* flags to m_get/MGET */
#define M_DONTWAIT        M_NOWAIT
#define M_WAIT                M_WAITOK

#ifdef MBUFTRACE
/* Mbuf allocation tracing. */
void mowner_init_owner(struct mowner *, const char *, const char *);
void mowner_init(struct mbuf *, int);
void mowner_ref(struct mbuf *, int);
void m_claim(struct mbuf *, struct mowner *);
void mowner_revoke(struct mbuf *, bool, int);
void mowner_attach(struct mowner *);
void mowner_detach(struct mowner *);
void m_claimm(struct mbuf *, struct mowner *);
#else
#define mowner_init_owner(mo, n, d)        __nothing
#define mowner_init(m, type)                __nothing
#define mowner_ref(m, flags)                __nothing
#define mowner_revoke(m, all, flags)        __nothing
#define m_claim(m, mowner)                __nothing
#define mowner_attach(mo)                __nothing
#define mowner_detach(mo)                __nothing
#define m_claimm(m, mo)                        __nothing
#endif

#define MCLAIM(m, mo)                m_claim((m), (mo))
#define MOWNER_ATTACH(mo)        mowner_attach(mo)
#define MOWNER_DETACH(mo)        mowner_detach(mo)

/*
 * mbuf allocation/deallocation macros:
 *
 *        MGET(struct mbuf *m, int how, int type)
 * allocates an mbuf and initializes it to contain internal data.
 *
 *        MGETHDR(struct mbuf *m, int how, int type)
 * allocates an mbuf and initializes it to contain a packet header
 * and internal data.
 *
 * If 'how' is M_WAIT, these macros (and the corresponding functions)
 * are guaranteed to return successfully.
 */
#define MGET(m, how, type)        m = m_get((how), (type))
#define MGETHDR(m, how, type)        m = m_gethdr((how), (type))

#if defined(_KERNEL)

#define MCLINITREFERENCE(m)                                                \
do {                                                                        \
        KASSERT(((m)->m_flags & M_EXT) == 0);                                \
        (m)->m_ext_ref = (m);                                                \
        (m)->m_ext.ext_refcnt = 1;                                        \
} while (/* CONSTCOND */ 0)

/*
 * Macros for mbuf external storage.
 *
 * MCLGET allocates and adds an mbuf cluster to a normal mbuf;
 * the flag M_EXT is set upon success.
 *
 * MEXTMALLOC allocates external storage and adds it to
 * a normal mbuf; the flag M_EXT is set upon success.
 *
 * MEXTADD adds pre-allocated external storage to
 * a normal mbuf; the flag M_EXT is set upon success.
 */

#define MCLGET(m, how)        m_clget((m), (how))

#define MEXTMALLOC(m, size, how)                                        \
do {                                                                        \
        (m)->m_ext_storage.ext_buf = malloc((size), 0, (how));                \
        if ((m)->m_ext_storage.ext_buf != NULL) {                        \
                MCLINITREFERENCE(m);                                        \
                (m)->m_data = (m)->m_ext.ext_buf;                        \
                (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) |        \
                                M_EXT|M_EXT_RW;                                \
                (m)->m_ext.ext_size = (size);                                \
                (m)->m_ext.ext_free = NULL;                                \
                (m)->m_ext.ext_arg = NULL;                                \
                mowner_ref((m), M_EXT);                                        \
        }                                                                \
} while (/* CONSTCOND */ 0)

#define MEXTADD(m, buf, size, type, free, arg)                                \
do {                                                                        \
        MCLINITREFERENCE(m);                                                \
        (m)->m_data = (m)->m_ext.ext_buf = (char *)(buf);                \
        (m)->m_flags = ((m)->m_flags & ~M_EXTCOPYFLAGS) | M_EXT;        \
        (m)->m_ext.ext_size = (size);                                        \
        (m)->m_ext.ext_free = (free);                                        \
        (m)->m_ext.ext_arg = (arg);                                        \
        mowner_ref((m), M_EXT);                                                \
} while (/* CONSTCOND */ 0)

#define M_BUFADDR(m)                                                        \
        (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf :                        \
            ((m)->m_flags & M_PKTHDR) ? (m)->m_pktdat : (m)->m_dat)

#define M_BUFSIZE(m)                                                        \
        (((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size :                        \
            ((m)->m_flags & M_PKTHDR) ? MHLEN : MLEN)

#define MRESETDATA(m)        (m)->m_data = M_BUFADDR(m)

/*
 * Compute the offset of the beginning of the data buffer of a non-ext
 * mbuf.
 */
#define M_BUFOFFSET(m)                                                        \
        (((m)->m_flags & M_PKTHDR) ?                                        \
         offsetof(struct mbuf, m_pktdat) : offsetof(struct mbuf, m_dat))

/*
 * Determine if an mbuf's data area is read-only.  This is true
 * if external storage is read-only mapped, or not marked as R/W,
 * or referenced by more than one mbuf.
 */
#define M_READONLY(m)                                                        \
        (((m)->m_flags & M_EXT) != 0 &&                                        \
          (((m)->m_flags & (M_EXT_ROMAP|M_EXT_RW)) != M_EXT_RW ||        \
          (m)->m_ext.ext_refcnt > 1))

#define M_UNWRITABLE(__m, __len)                                        \
        ((__m)->m_len < (__len) || M_READONLY((__m)))

/*
 * Determine if an mbuf's data area is read-only at the MMU.
 */
#define M_ROMAP(m)                                                        \
        (((m)->m_flags & (M_EXT|M_EXT_ROMAP)) == (M_EXT|M_EXT_ROMAP))

/*
 * Compute the amount of space available before the current start of
 * data in an mbuf.
 */
#define M_LEADINGSPACE(m)                                                \
        (M_READONLY((m)) ? 0 : ((m)->m_data - M_BUFADDR(m)))

/*
 * Compute the amount of space available
 * after the end of data in an mbuf.
 */
#define _M_TRAILINGSPACE(m)                                                \
        ((m)->m_flags & M_EXT ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size - \
         ((m)->m_data + (m)->m_len) :                                        \
         &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))

#define M_TRAILINGSPACE(m)                                                \
        (M_READONLY((m)) ? 0 : _M_TRAILINGSPACE((m)))

/*
 * Arrange to prepend space of size plen to mbuf m.
 * If a new mbuf must be allocated, how specifies whether to wait.
 * If how is M_DONTWAIT and allocation fails, the original mbuf chain
 * is freed and m is set to NULL.
 */
#define M_PREPEND(m, plen, how)                                                \
do {                                                                        \
        if (M_LEADINGSPACE(m) >= (plen)) {                                \
                (m)->m_data -= (plen);                                        \
                (m)->m_len += (plen);                                        \
        } else                                                                \
                (m) = m_prepend((m), (plen), (how));                        \
        if ((m) && (m)->m_flags & M_PKTHDR)                                \
                (m)->m_pkthdr.len += (plen);                                \
} while (/* CONSTCOND */ 0)

/* change mbuf to new type */
#define MCHTYPE(m, t)                                                        \
do {                                                                        \
        KASSERT((t) != MT_FREE);                                        \
        mbstat_type_add((m)->m_type, -1);                                \
        mbstat_type_add(t, 1);                                                \
        (m)->m_type = t;                                                \
} while (/* CONSTCOND */ 0)

#ifdef DIAGNOSTIC
#define M_VERIFY_PACKET(m)        m_verify_packet(m)
#else
#define M_VERIFY_PACKET(m)        __nothing
#endif

/* The "copy all" special length. */
#define M_COPYALL        -1

/*
 * Allow drivers and/or protocols to store private context information.
 */
#define M_GETCTX(m, t)                ((t)(m)->m_pkthdr._rcvif.ctx)
#define M_SETCTX(m, c)                ((void)((m)->m_pkthdr._rcvif.ctx = (void *)(c)))
#define M_CLEARCTX(m)                M_SETCTX((m), NULL)

/*
 * M_REGION_GET ensures that the "len"-sized region of type "typ" starting
 * from "off" within "m" is located in a single mbuf, contiguously.
 *
 * The pointer to the region will be returned to pointer variable "val".
 */
#define M_REGION_GET(val, typ, m, off, len) \
do {                                                                        \
        struct mbuf *_t;                                                \
        int _tmp;                                                        \
        if ((m)->m_len >= (off) + (len))                                \
                (val) = (typ)(mtod((m), char *) + (off));                \
        else {                                                                \
                _t = m_pulldown((m), (off), (len), &_tmp);                \
                if (_t) {                                                \
                        if (_t->m_len < _tmp + (len))                        \
                                panic("m_pulldown malfunction");        \
                        (val) = (typ)(mtod(_t, char *) + _tmp);        \
                } else {                                                \
                        (val) = (typ)NULL;                                \
                        (m) = NULL;                                        \
                }                                                        \
        }                                                                \
} while (/*CONSTCOND*/ 0)

#endif /* defined(_KERNEL) */

/*
 * Simple mbuf queueing system
 *
 * this is basically a SIMPLEQ adapted to mbuf use (ie using
 * m_nextpkt instead of field.sqe_next).
 *
 * m_next is ignored, so queueing chains of mbufs is possible
 */
#define MBUFQ_HEAD(name)                                        \
struct name {                                                        \
        struct mbuf *mq_first;                                        \
        struct mbuf **mq_last;                                        \
}

#define MBUFQ_INIT(q)                do {                                \
        (q)->mq_first = NULL;                                        \
        (q)->mq_last = &(q)->mq_first;                                \
} while (/*CONSTCOND*/0)

#define MBUFQ_ENQUEUE(q, m)        do {                                \
        (m)->m_nextpkt = NULL;                                        \
        *(q)->mq_last = (m);                                        \
        (q)->mq_last = &(m)->m_nextpkt;                                \
} while (/*CONSTCOND*/0)

#define MBUFQ_PREPEND(q, m)        do {                                \
        if (((m)->m_nextpkt = (q)->mq_first) == NULL)                \
                (q)->mq_last = &(m)->m_nextpkt;                        \
        (q)->mq_first = (m);                                        \
} while (/*CONSTCOND*/0)

#define MBUFQ_DEQUEUE(q, m)        do {                                \
        if (((m) = (q)->mq_first) != NULL) {                        \
                if (((q)->mq_first = (m)->m_nextpkt) == NULL)        \
                        (q)->mq_last = &(q)->mq_first;                \
                else                                                \
                        (m)->m_nextpkt = NULL;                        \
        }                                                        \
} while (/*CONSTCOND*/0)

#define MBUFQ_DRAIN(q)                do {                                \
        struct mbuf *__m0;                                        \
        while ((__m0 = (q)->mq_first) != NULL) {                \
                (q)->mq_first = __m0->m_nextpkt;                \
                m_freem(__m0);                                        \
        }                                                        \
        (q)->mq_last = &(q)->mq_first;                                \
} while (/*CONSTCOND*/0)

#define MBUFQ_FIRST(q)                ((q)->mq_first)
#define MBUFQ_NEXT(m)                ((m)->m_nextpkt)
#define MBUFQ_LAST(q)                (*(q)->mq_last)

/*
 * Mbuf statistics.
 * For statistics related to mbuf and cluster allocations, see also the
 * pool headers (mb_cache and mcl_cache).
 */
struct mbstat {
        u_long        _m_spare;        /* formerly m_mbufs */
        u_long        _m_spare1;        /* formerly m_clusters */
        u_long        _m_spare2;        /* spare field */
        u_long        _m_spare3;        /* formely m_clfree - free clusters */
        u_long        m_drops;        /* times failed to find space */
        u_long        m_wait;                /* times waited for space */
        u_long        m_drain;        /* times drained protocols for space */
        u_short        m_mtypes[256];        /* type specific mbuf allocations */
};

struct mbstat_cpu {
        u_int        m_mtypes[256];        /* type specific mbuf allocations */
};

/*
 * Mbuf sysctl variables.
 */
#define MBUF_MSIZE                1        /* int: mbuf base size */
#define MBUF_MCLBYTES                2        /* int: mbuf cluster size */
#define MBUF_NMBCLUSTERS        3        /* int: limit on the # of clusters */
#define MBUF_MBLOWAT                4        /* int: mbuf low water mark */
#define MBUF_MCLLOWAT                5        /* int: mbuf cluster low water mark */
#define MBUF_STATS                6        /* struct: mbstat */
#define MBUF_MOWNERS                7        /* struct: m_owner[] */

#ifdef _KERNEL
extern struct mbstat mbstat;
extern int nmbclusters;                /* limit on the # of clusters */
extern int mblowat;                /* mbuf low water mark */
extern int mcllowat;                /* mbuf cluster low water mark */
extern int max_linkhdr;                /* largest link-level header */
extern int max_protohdr;                /* largest protocol header */
extern int max_hdr;                /* largest link+protocol header */
extern int max_datalen;                /* MHLEN - max_hdr */
extern const int msize;                        /* mbuf base size */
extern const int mclbytes;                /* mbuf cluster size */
extern pool_cache_t mb_cache;
#ifdef MBUFTRACE
LIST_HEAD(mownerhead, mowner);
extern struct mownerhead mowners;
extern struct mowner unknown_mowners[];
extern struct mowner revoked_mowner;
#endif

MALLOC_DECLARE(M_MBUF);
MALLOC_DECLARE(M_SONAME);

struct        mbuf *m_copym(struct mbuf *, int, int, int);
struct        mbuf *m_copypacket(struct mbuf *, int);
struct        mbuf *m_devget(char *, int, int, struct ifnet *);
struct        mbuf *m_dup(struct mbuf *, int, int, int);
struct        mbuf *m_get(int, int);
struct        mbuf *m_gethdr(int, int);
struct        mbuf *m_prepend(struct mbuf *,int, int);
struct        mbuf *m_pulldown(struct mbuf *, int, int, int *);
struct        mbuf *m_pullup(struct mbuf *, int);
struct        mbuf *m_copyup(struct mbuf *, int, int);
struct        mbuf *m_split(struct mbuf *,int, int);
struct        mbuf *m_getptr(struct mbuf *, int, int *);
void        m_adj(struct mbuf *, int);
struct        mbuf *m_defrag(struct mbuf *, int);
int        m_apply(struct mbuf *, int, int,
    int (*)(void *, void *, unsigned int), void *);
void        m_cat(struct mbuf *,struct mbuf *);
void        m_clget(struct mbuf *, int);
void        m_copyback(struct mbuf *, int, int, const void *);
struct        mbuf *m_copyback_cow(struct mbuf *, int, int, const void *, int);
int        m_makewritable(struct mbuf **, int, int, int);
struct        mbuf *m_getcl(int, int, int);
void        m_copydata(struct mbuf *, int, int, void *);
void        m_verify_packet(struct mbuf *);
struct        mbuf *m_free(struct mbuf *);
void        m_freem(struct mbuf *);
void        mbinit(void);
void        m_remove_pkthdr(struct mbuf *);
void        m_copy_pkthdr(struct mbuf *, struct mbuf *);
void        m_move_pkthdr(struct mbuf *, struct mbuf *);
void        m_align(struct mbuf *, int);

bool        m_ensure_contig(struct mbuf **, int);
struct mbuf *m_add(struct mbuf *, struct mbuf *);

/* Inline routines. */
static __inline u_int m_length(const struct mbuf *) __unused;

/* Statistics */
void mbstat_type_add(int, int);

/* Packet tag routines */
struct        m_tag *m_tag_get(int, int, int);
void        m_tag_free(struct m_tag *);
void        m_tag_prepend(struct mbuf *, struct m_tag *);
void        m_tag_unlink(struct mbuf *, struct m_tag *);
void        m_tag_delete(struct mbuf *, struct m_tag *);
void        m_tag_delete_chain(struct mbuf *);
struct        m_tag *m_tag_find(const struct mbuf *, int);
struct        m_tag *m_tag_copy(struct m_tag *);
int        m_tag_copy_chain(struct mbuf *, struct mbuf *);

/* Packet tag types */
#define PACKET_TAG_NONE                        0  /* Nothing */
#define PACKET_TAG_SO                        4  /* sending socket pointer */
#define PACKET_TAG_NPF                        10 /* packet filter */
#define PACKET_TAG_PF                        11 /* packet filter */
#define PACKET_TAG_ALTQ_QID                12 /* ALTQ queue id */
#define PACKET_TAG_IPSEC_OUT_DONE        18
#define PACKET_TAG_IPSEC_NAT_T_PORTS        25 /* two uint16_t */
#define PACKET_TAG_INET6                26 /* IPv6 info */
#define PACKET_TAG_TUNNEL_INFO                28 /* tunnel identification and
                                            * protocol callback, for loop
                                            * detection/recovery
                                            */
#define PACKET_TAG_MPLS                        29 /* Indicate it's for MPLS */
#define PACKET_TAG_SRCROUTE                30 /* IPv4 source routing */

/*
 * Return the number of bytes in the mbuf chain, m.
 */
static __inline u_int
m_length(const struct mbuf *m)
{
        const struct mbuf *m0;
        u_int pktlen;

        if ((m->m_flags & M_PKTHDR) != 0)
                return m->m_pkthdr.len;

        pktlen = 0;
        for (m0 = m; m0 != NULL; m0 = m0->m_next)
                pktlen += m0->m_len;
        return pktlen;
}

static __inline void
m_set_rcvif(struct mbuf *m, const struct ifnet *ifp)
{
        KASSERT(m->m_flags & M_PKTHDR);
        m->m_pkthdr.rcvif_index = ifp->if_index;
}

static __inline void
m_reset_rcvif(struct mbuf *m)
{
        KASSERT(m->m_flags & M_PKTHDR);
        /* A caller may expect whole _rcvif union is zeroed */
        /* m->m_pkthdr.rcvif_index = 0; */
        m->m_pkthdr._rcvif.ctx = NULL;
}

static __inline void
m_copy_rcvif(struct mbuf *m, const struct mbuf *n)
{
        KASSERT(m->m_flags & M_PKTHDR);
        KASSERT(n->m_flags & M_PKTHDR);
        m->m_pkthdr.rcvif_index = n->m_pkthdr.rcvif_index;
}

#define M_GET_ALIGNED_HDR(m, type, linkhdr) \
    m_get_aligned_hdr((m), __alignof(type) - 1, sizeof(type), (linkhdr))

static __inline int
m_get_aligned_hdr(struct mbuf **m, int mask, size_t hlen, bool linkhdr)
{
#ifndef __NO_STRICT_ALIGNMENT
        if (((uintptr_t)mtod(*m, void *) & mask) != 0)
                *m = m_copyup(*m, hlen, 
                      linkhdr ? (max_linkhdr + mask) & ~mask : 0);
        else
#endif
        if (__predict_false((size_t)(*m)->m_len < hlen))
                *m = m_pullup(*m, hlen);

        return *m == NULL;
}

void m_print(const struct mbuf *, const char *, void (*)(const char *, ...)
    __printflike(1, 2));

/* from uipc_mbufdebug.c */
void        m_examine(const struct mbuf *, int, const char *,
    void (*)(const char *, ...) __printflike(1, 2));

/* parsers for m_examine() */
void m_examine_ether(const struct mbuf *, int, const char *,
    void (*)(const char *, ...) __printflike(1, 2));
void m_examine_pppoe(const struct mbuf *, int, const char *,
    void (*)(const char *, ...) __printflike(1, 2));
void m_examine_ppp(const struct mbuf *, int, const char *,
    void (*)(const char *, ...) __printflike(1, 2));
void m_examine_arp(const struct mbuf *, int, const char *,
    void (*)(const char *, ...) __printflike(1, 2));
void m_examine_ip(const struct mbuf *, int, const char *,
    void (*)(const char *, ...) __printflike(1, 2));
void m_examine_icmp(const struct mbuf *, int, const char *,
    void (*)(const char *, ...) __printflike(1, 2));
void m_examine_ip6(const struct mbuf *, int, const char *,
    void (*)(const char *, ...) __printflike(1, 2));
void m_examine_icmp6(const struct mbuf *, int, const char *,
    void (*)(const char *, ...) __printflike(1, 2));
void m_examine_tcp(const struct mbuf *, int, const char *,
    void (*)(const char *, ...) __printflike(1, 2));
void m_examine_udp(const struct mbuf *, int, const char *,
    void (*)(const char *, ...) __printflike(1, 2));
void m_examine_hex(const struct mbuf *, int, const char *,
    void (*)(const char *, ...) __printflike(1, 2));

/*
 * Get rcvif of a mbuf.
 *
 * The caller must call m_put_rcvif after using rcvif if the returned rcvif
 * isn't NULL. If the returned rcvif is NULL, the caller doesn't need to call
 * m_put_rcvif (although calling it is safe).
 *
 * The caller must not block or sleep while using rcvif. The API ensures a
 * returned rcvif isn't freed until m_put_rcvif is called.
 */
static __inline struct ifnet *
m_get_rcvif(const struct mbuf *m, int *s)
{
        struct ifnet *ifp;

        KASSERT(m->m_flags & M_PKTHDR);
        *s = pserialize_read_enter();
        ifp = if_byindex(m->m_pkthdr.rcvif_index);
        if (__predict_false(ifp == NULL))
                pserialize_read_exit(*s);

        return ifp;
}

static __inline void
m_put_rcvif(struct ifnet *ifp, int *s)
{

        if (ifp == NULL)
                return;
        pserialize_read_exit(*s);
}

/*
 * Get rcvif of a mbuf.
 *
 * The caller must call m_put_rcvif_psref after using rcvif. The API ensures
 * a got rcvif isn't be freed until m_put_rcvif_psref is called.
 */
static __inline struct ifnet *
m_get_rcvif_psref(const struct mbuf *m, struct psref *psref)
{
        KASSERT(m->m_flags & M_PKTHDR);
        return if_get_byindex(m->m_pkthdr.rcvif_index, psref);
}

static __inline void
m_put_rcvif_psref(struct ifnet *ifp, struct psref *psref)
{

        if (ifp == NULL)
                return;
        if_put(ifp, psref);
}

/*
 * Get rcvif of a mbuf.
 *
 * This is NOT an MP-safe API and shouldn't be used at where we want MP-safe.
 */
static __inline struct ifnet *
m_get_rcvif_NOMPSAFE(const struct mbuf *m)
{
        KASSERT(m->m_flags & M_PKTHDR);
        return if_byindex(m->m_pkthdr.rcvif_index);
}

#endif /* _KERNEL */
#endif /* !_SYS_MBUF_H_ */



































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
/*        $NetBSD: raidframevar.h,v 1.23 2021/08/07 16:19:15 thorpej Exp $ */
/*-
 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Greg Oster
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Mark Holland
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Jim Zelenka
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*****************************************************
 *
 * raidframevar.h
 *
 * main header file for using raidframe in the kernel.
 *
 *****************************************************/


#ifndef _RF_RAIDFRAMEVAR_H_
#define _RF_RAIDFRAMEVAR_H_

#ifndef _STANDALONE
#include <sys/ioctl.h>

#include <sys/errno.h>
#include <sys/types.h>

#include <sys/uio.h>
#include <sys/param.h>
#include <sys/proc.h>

#include <sys/mallocvar.h>
#endif

/*
 * First, define system-dependent types and constants.
 *
 * If the machine is big-endian, RF_BIG_ENDIAN should be 1.
 * Otherwise, it should be 0.
 *
 * The various integer types should be self-explanatory; we
 * use these elsewhere to avoid size confusion.
 *
 * LONGSHIFT is lg(sizeof(long)) (that is, log base two of sizeof(long)
 *
 */

#include <sys/types.h>
#include <machine/endian.h>
#include <machine/limits.h>

#if BYTE_ORDER == BIG_ENDIAN
#define RF_IS_BIG_ENDIAN    1
#elif BYTE_ORDER == LITTLE_ENDIAN
#define RF_IS_BIG_ENDIAN    0
#else
#error byte order not defined
#endif
typedef int8_t RF_int8;
typedef u_int8_t RF_uint8;
typedef int16_t RF_int16;
typedef u_int16_t RF_uint16;
typedef int32_t RF_int32;
typedef u_int32_t RF_uint32;
typedef int64_t RF_int64;
typedef u_int64_t RF_uint64;
#if LONG_BIT == 32
#define RF_LONGSHIFT        2
#elif LONG_BIT == 64
#define RF_LONGSHIFT        3
#else
#error word size not defined
#endif

/*
 * These are just zero and non-zero. We don't use "TRUE"
 * and "FALSE" because there's too much nonsense trying
 * to get them defined exactly once on every platform, given
 * the different places they may be defined in system header
 * files.
 */
#define RF_TRUE  1
#define RF_FALSE 0

/* Malloc types. */
#ifdef _KERNEL
MALLOC_DECLARE(M_RAIDFRAME);
#endif

/*
 * Now, some generic types
 */
typedef RF_uint64 RF_IoCount_t;
typedef RF_uint64 RF_Offset_t;
typedef RF_uint32 RF_PSSFlags_t;
typedef RF_uint64 RF_SectorCount_t;
typedef RF_uint64 RF_StripeCount_t;
typedef RF_int64 RF_SectorNum_t;/* these are unsigned so we can set them to
                                 * (-1) for "uninitialized" */
typedef RF_int64 RF_StripeNum_t;
typedef RF_int64 RF_RaidAddr_t;
typedef int RF_RowCol_t;        /* unsigned so it can be (-1) */
typedef RF_int64 RF_HeadSepLimit_t;
typedef RF_int64 RF_ReconUnitCount_t;
typedef int RF_ReconUnitNum_t;

typedef char RF_ParityConfig_t;

typedef char RF_DiskQueueType_t[1024];
#define RF_DISK_QUEUE_TYPE_NONE ""

/* values for the 'type' field in a reconstruction buffer */
typedef int RF_RbufType_t;
#define RF_RBUF_TYPE_EXCLUSIVE   0        /* this buf assigned exclusively to
                                         * one disk */
#define RF_RBUF_TYPE_FLOATING    1        /* this is a floating recon buf */
#define RF_RBUF_TYPE_FORCED      2        /* this rbuf was allocated to complete
                                         * a forced recon */

typedef char RF_IoType_t;
#define RF_IO_TYPE_READ          'r'
#define RF_IO_TYPE_WRITE         'w'
#define RF_IO_TYPE_NOP           'n'
#define RF_IO_IS_R_OR_W(_type_) (((_type_) == RF_IO_TYPE_READ) \
                                || ((_type_) == RF_IO_TYPE_WRITE))

typedef void (*RF_VoidFuncPtr) (void *,...);

typedef RF_uint32 RF_AccessStripeMapFlags_t;
typedef RF_uint32 RF_DiskQueueDataFlags_t;
typedef RF_uint32 RF_DiskQueueFlags_t;
typedef RF_uint32 RF_RaidAccessFlags_t;

#define RF_DISKQUEUE_DATA_FLAGS_NONE ((RF_DiskQueueDataFlags_t)0)

typedef struct RF_AccessStripeMap_s RF_AccessStripeMap_t;
typedef struct RF_AccessStripeMapHeader_s RF_AccessStripeMapHeader_t;
typedef struct RF_AllocListElem_s RF_AllocListElem_t;
typedef struct RF_CallbackFuncDesc_s RF_CallbackFuncDesc_t;
typedef struct RF_CallbackValueDesc_s RF_CallbackValueDesc_t;
typedef struct RF_ChunkDesc_s RF_ChunkDesc_t;
typedef struct RF_CommonLogData_s RF_CommonLogData_t;
typedef struct RF_Config_s RF_Config_t;
typedef struct RF_CumulativeStats_s RF_CumulativeStats_t;
typedef struct RF_DagHeader_s RF_DagHeader_t;
typedef struct RF_DagList_s RF_DagList_t;
typedef struct RF_DagNode_s RF_DagNode_t;
typedef struct RF_DeclusteredConfigInfo_s RF_DeclusteredConfigInfo_t;
typedef struct RF_DiskId_s RF_DiskId_t;
typedef struct RF_DiskMap_s RF_DiskMap_t;
typedef struct RF_DiskQueue_s RF_DiskQueue_t;
typedef struct RF_DiskQueueData_s RF_DiskQueueData_t;
typedef struct RF_DiskQueueSW_s RF_DiskQueueSW_t;
typedef struct RF_Etimer_s RF_Etimer_t;
typedef struct RF_EventCreate_s RF_EventCreate_t;
typedef struct RF_FreeList_s RF_FreeList_t;
typedef struct RF_LockReqDesc_s RF_LockReqDesc_t;
typedef struct RF_LockTableEntry_s RF_LockTableEntry_t;
typedef struct RF_MCPair_s RF_MCPair_t;
typedef struct RF_OwnerInfo_s RF_OwnerInfo_t;
typedef struct RF_ParityLog_s RF_ParityLog_t;
typedef struct RF_ParityLogAppendQueue_s RF_ParityLogAppendQueue_t;
typedef struct RF_ParityLogData_s RF_ParityLogData_t;
typedef struct RF_ParityLogDiskQueue_s RF_ParityLogDiskQueue_t;
typedef struct RF_ParityLogQueue_s RF_ParityLogQueue_t;
typedef struct RF_ParityLogRecord_s RF_ParityLogRecord_t;
typedef struct RF_PerDiskReconCtrl_s RF_PerDiskReconCtrl_t;
typedef struct RF_PSStatusHeader_s RF_PSStatusHeader_t;
typedef struct RF_PhysDiskAddr_s RF_PhysDiskAddr_t;
typedef struct RF_PropHeader_s RF_PropHeader_t;
typedef struct RF_Raid_s RF_Raid_t;
typedef struct RF_RaidAccessDesc_s RF_RaidAccessDesc_t;
typedef struct RF_RaidDisk_s RF_RaidDisk_t;
typedef struct RF_RaidLayout_s RF_RaidLayout_t;
typedef struct RF_RaidReconDesc_s RF_RaidReconDesc_t;
typedef struct RF_ReconBuffer_s RF_ReconBuffer_t;
typedef struct RF_ReconConfig_s RF_ReconConfig_t;
typedef struct RF_ReconCtrl_s RF_ReconCtrl_t;
typedef struct RF_ReconDoneProc_s RF_ReconDoneProc_t;
typedef struct RF_ReconEvent_s RF_ReconEvent_t;
typedef struct RF_ReconMap_s RF_ReconMap_t;
typedef struct RF_ReconMapListElem_s RF_ReconMapListElem_t;
typedef struct RF_ReconParityStripeStatus_s RF_ReconParityStripeStatus_t;
typedef struct RF_RedFuncs_s RF_RedFuncs_t;
typedef struct RF_RegionBufferQueue_s RF_RegionBufferQueue_t;
typedef struct RF_RegionInfo_s RF_RegionInfo_t;
typedef struct RF_ShutdownList_s RF_ShutdownList_t;
typedef struct RF_SpareTableEntry_s RF_SpareTableEntry_t;
typedef struct RF_SparetWait_s RF_SparetWait_t;
typedef struct RF_StripeLockDesc_s RF_StripeLockDesc_t;
typedef struct RF_ThreadGroup_s RF_ThreadGroup_t;
typedef struct RF_ThroughputStats_s RF_ThroughputStats_t;

struct rf_paritymap;
struct rf_paritymap_ondisk;

/*
 * Important assumptions regarding ordering of the states in this list
 * have been made!!!  Before disturbing this ordering, look at code in
 * sys/dev/raidframe/rf_states.c
 */
typedef enum RF_AccessState_e {
        /* original states */
        rf_QuiesceState,            /* handles queisence for reconstruction */
        rf_IncrAccessesCountState,  /* count accesses in flight */
        rf_MapState,                    /* map access to disk addresses */
        rf_LockState,                    /* take stripe locks */
        rf_CreateDAGState,            /* create DAGs */
        rf_ExecuteDAGState,            /* execute DAGs */
        rf_ProcessDAGState,            /* DAGs are completing- check if correct,
                                     * or if we need to retry */
        rf_CleanupState,            /* release stripe locks, clean up */
        rf_DecrAccessesCountState,
        rf_LastState                    /* must be the last state */
}       RF_AccessState_t;


/* Some constants related to RAIDframe.  These are arbitrary and
   can be modified at will. */

#define RF_MAXROW    10
#define RF_MAXCOL    40
#define RF_MAXSPARE  10
#define RF_MAXDBGV   75                    /* max number of debug variables */
#define RF_MAX_DISKS 128            /* max disks per array */
#define RF_SPAREMAP_NAME_LEN 128
#define RF_PROTECTED_SECTORS 64L    /* # of sectors at start of disk to
                                       exclude from RAID address space */

struct RF_SpareTableEntry_s {
        u_int   spareDisk;          /* disk to which this block is spared */
        u_int   spareBlockOffsetInSUs;  /* offset into spare table for that
                                         * disk */
};

union RF_GenericParam_u {
        void   *p;
        RF_uint64 v;
};
typedef union RF_GenericParam_u RF_DagParam_t;
typedef union RF_GenericParam_u RF_CBParam_t;

/* the raidframe configuration, passed down through an ioctl.
 * the driver can be reconfigured (with total loss of data) at any time,
 * but it must be shut down first.
 */
struct RF_Config_s {
        RF_RowCol_t numCol, numSpare;                /* number of columns,
                                                 * and spare disks */
        dev_t   devs[RF_MAXROW][RF_MAXCOL];        /* device numbers for disks
                                                 * comprising array */
        char    devnames[RF_MAXROW][RF_MAXCOL][50];        /* device names */
        dev_t   spare_devs[RF_MAXSPARE];        /* device numbers for spare
                                                 * disks */
        char    spare_names[RF_MAXSPARE][50];        /* device names */
        RF_SectorNum_t sectPerSU;        /* sectors per stripe unit */
        RF_StripeNum_t SUsPerPU;/* stripe units per parity unit */
        RF_StripeNum_t SUsPerRU;/* stripe units per reconstruction unit */
        RF_ParityConfig_t parityConfig;        /* identifies the RAID architecture to
                                         * be used */
        RF_DiskQueueType_t diskQueueType;        /* 'f' = fifo, 'c' = cvscan,
                                                 * not used in kernel */
        char    maxOutstandingDiskReqs;        /* # concurrent reqs to be sent to a
                                         * disk.  not used in kernel. */
        char    debugVars[RF_MAXDBGV][50];        /* space for specifying debug
                                                 * variables & their values */
        unsigned int layoutSpecificSize;        /* size in bytes of
                                                 * layout-specific info */
        void   *layoutSpecific;        /* a pointer to a layout-specific structure to
                                 * be copied in */
        int     force;                          /* if !0, ignore many fatal
                                                   configuration conditions */
        /*
           "force" is used to override cases where the component labels would
           indicate that configuration should not proceed without user
           intervention
         */
};

typedef RF_uint32 RF_ReconReqFlags_t;
/* flags that can be put in the rf_recon_req structure */
#define RF_FDFLAGS_NONE   0x0        /* just fail the disk */
#define RF_FDFLAGS_RECON  0x1        /* fail and initiate recon */
#define RF_FDFLAGS_RECON_FORCE  0x2        /* fail and initiate recon, ignoring errors */

struct rf_recon_req {                /* used to tell the kernel to fail a disk */
        RF_RowCol_t col;
        RF_ReconReqFlags_t flags;
};

struct RF_SparetWait_s {
        int     C, G, fcol;        /* C = # disks in row, G = # units in stripe,
                                 * fcol = which disk has failed */

        RF_StripeCount_t SUsPerPU;        /* this stuff is the info required to
                                         * create a spare table */
        int     TablesPerSpareRegion;
        int     BlocksPerTable;
        RF_StripeCount_t TableDepthInPUs;
        RF_StripeCount_t SpareSpaceDepthPerRegionInSUs;

        RF_SparetWait_t *next;        /* used internally; need not be set at ioctl
                                 * time */
};
/*
 * A physical disk can be in one of several states:
 * IF YOU ADD A STATE, CHECK TO SEE IF YOU NEED TO MODIFY RF_DEAD_DISK().
 */
enum RF_DiskStatus_e {
        rf_ds_optimal,          /* no problems */
        rf_ds_failed,           /* disk has failed */
        rf_ds_reconstructing,   /* reconstruction ongoing */
        rf_ds_dist_spared,      /* reconstruction complete to distributed
                                 * spare space, dead disk not yet replaced */
        rf_ds_spared,           /* reconstruction complete, dead disk not 
                                   yet replaced */
        rf_ds_spare,            /* an available spare disk */
        rf_ds_used_spare,       /* a spare which has been used, and hence is
                                 * not available */
        rf_ds_rebuilding_spare        /* a spare which is being rebuilt to */
};
typedef enum RF_DiskStatus_e RF_DiskStatus_t;

struct RF_RaidDisk_s {
        char    devname[56];    /* name of device file */
        RF_DiskStatus_t status; /* whether it is up or down */
        RF_RowCol_t spareCol;   /* if in status "spared", this identifies the
                                 * spare disk */
        int     blockSize;
        int     auto_configured;/* 1 if this component was autoconfigured.
                                   0 otherwise. */
        RF_SectorCount_t numBlocks;     /* number of blocks, obtained via READ
                                         * CAPACITY */
        RF_SectorCount_t partitionSize; /* The *actual* and *full* size of
                                           the partition, from the disklabel */
        dev_t   dev;
};
#if 0
/* The per-component label information that the user can set */
typedef struct RF_ComponentInfo_s {
        int row;              /* the row number of this component */
        int column;           /* the column number of this component */
        int serial_number;    /* a user-specified serial number for this
                                 RAID set */
} RF_ComponentInfo_t;
#endif

/* The per-component label information */
typedef struct RF_ComponentLabel_s {
        int version;          /* The version of this label. */
        int serial_number;    /* a user-specified serial number for this
                                 RAID set */
        int mod_counter;      /* modification counter.  Changed (usually
                                 by incrementing) every time the label
                                 is changed */
        int row;              /* the row number of this component */
        int column;           /* the column number of this component */
        int num_rows;         /* number of rows in this RAID set */
        int num_columns;      /* number of columns in this RAID set */
        int clean;            /* 1 when clean, 0 when dirty */
        int status;           /* rf_ds_optimal, rf_ds_dist_spared, whatever. */
        /* stuff that will be in version 2 of the label */
        int sectPerSU;        /* Sectors per Stripe Unit */
        int SUsPerPU;         /* Stripe Units per Parity Units */
        int SUsPerRU;         /* Stripe Units per Reconstruction Units */
        int parityConfig;     /* '0' == RAID0, '1' == RAID1, etc. */
        int maxOutstanding;   /* maxOutstanding disk requests */
        int blockSize;        /* size of component block.
                                 (disklabel->d_secsize) */
        u_int __numBlocks;    /* number of blocks on this component.  May
                                 be smaller than the partition size. */
        u_int __partitionSize;/* number of blocks on this *partition*.
                                 Must exactly match the partition size
                                 from the disklabel. */
        /* Parity map stuff. */
        int parity_map_modcount; /* If equal to mod_counter, then the last
                                    kernel to touch this label was
                                    parity-map-enabled. */
        u_int parity_map_flags;  /* See top of rf_paritymap.h */
        int parity_map_tickms; /* Length of parity map cooldown ticks. */
        int parity_map_ntick;  /* Number of parity map cooldown ticks. */
        u_int parity_map_regions; /* Number of parity map regions. */
        int future_use[28];   /* Future expansion */

        int autoconfigure;    /* automatically configure this RAID set.
                                 0 == no, 1 == yes */
        int root_partition;   /* Use this set as /
                                 0 == no, 1 == yes*/
        int last_unit;        /* last unit number (e.g. 0 for /dev/raid0)
                                 of this component.  Used for autoconfigure
                                 only. */
        int config_order;     /* 0 .. n.  The order in which the component
                                 should be auto-configured.  E.g. 0 is will
                                 done first, (and would become raid0).
                                 This may be in conflict with last_unit!!?! */
                              /* Not currently used. */
        u_int numBlocksHi;    /* The top 32-bits of the numBlocks member. */
        u_int partitionSizeHi;/* The top 32-bits of the partitionSize member. */
        int future_use2[42];  /* More future expansion */
} RF_ComponentLabel_t;

/*
 * Following four functions are access macros for the number of blocks
 * and partition size in component label.
 */
static __inline RF_SectorCount_t
rf_component_label_numblocks(const RF_ComponentLabel_t *cl)
{

        return ((RF_SectorCount_t)cl->numBlocksHi << 32) |
            cl->__numBlocks;
}

static __inline void
rf_component_label_set_numblocks(RF_ComponentLabel_t *cl, RF_SectorCount_t siz)
{

        cl->numBlocksHi = siz >> 32;
        cl->__numBlocks = siz;
}

static __inline RF_SectorCount_t
rf_component_label_partitionsize(const RF_ComponentLabel_t *cl)
{

        return ((RF_SectorCount_t)cl->partitionSizeHi << 32) |
            cl->__partitionSize;
}

static __inline void
rf_component_label_set_partitionsize(RF_ComponentLabel_t *cl,
    RF_SectorCount_t siz)
{

        cl->partitionSizeHi = siz >> 32;
        cl->__partitionSize = siz;
}

typedef struct RF_SingleComponent_s {
        int row;                /* obsolete */
        int column;
        char component_name[50]; /* name of the component */
} RF_SingleComponent_t;

typedef struct RF_DeviceConfig_s {
        u_int   cols;
        u_int   maxqdepth;
        int     ndevs;
        RF_RaidDisk_t devs[RF_MAX_DISKS];
        u_int    nspares;
        RF_RaidDisk_t spares[RF_MAX_DISKS];
}       RF_DeviceConfig_t;

typedef struct RF_ProgressInfo_s {
        RF_uint64 remaining;
        RF_uint64 completed;
        RF_uint64 total;
} RF_ProgressInfo_t;

#ifndef _STANDALONE
typedef struct RF_LayoutSW_s {
        RF_ParityConfig_t parityConfig;
        const char *configName;

#ifndef _KERNEL
        /* layout-specific parsing */
        int     (*MakeLayoutSpecific) (FILE * fp, RF_Config_t * cfgPtr,
                                       void *arg);
        void   *makeLayoutSpecificArg;
#else                                /* !KERNEL */

        /* initialization routine */
        int     (*Configure) (RF_ShutdownList_t ** shutdownListp,
                              RF_Raid_t * raidPtr, RF_Config_t * cfgPtr);

        /* routine to map RAID sector address -> physical (col, offset) */
        void    (*MapSector) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
                              RF_RowCol_t * col,
                              RF_SectorNum_t * diskSector, int remap);

        /* routine to map RAID sector address -> physical (c,o) of parity
         * unit */
        void    (*MapParity) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
                              RF_RowCol_t * col,
                              RF_SectorNum_t * diskSector, int remap);

        /* routine to map RAID sector address -> physical (c,o) of Q unit */
        void    (*MapQ) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidSector,
                         RF_RowCol_t * col,
                         RF_SectorNum_t * diskSector, int remap);

        /* routine to identify the disks comprising a stripe */
        void    (*IdentifyStripe) (RF_Raid_t * raidPtr, RF_RaidAddr_t addr,
                                   RF_RowCol_t ** diskids);

        /* routine to select a dag */
        void    (*SelectionFunc) (RF_Raid_t * raidPtr, RF_IoType_t type,
                                  RF_AccessStripeMap_t * asmap,
                                  RF_VoidFuncPtr *);

        /* map a stripe ID to a parity stripe ID.  This is typically the
         * identity mapping */
        void    (*MapSIDToPSID) (RF_RaidLayout_t * layoutPtr,
                                 RF_StripeNum_t stripeID,
                                 RF_StripeNum_t * psID,
                                 RF_ReconUnitNum_t * which_ru);

        /* get default head separation limit (may be NULL) */
        RF_HeadSepLimit_t(*GetDefaultHeadSepLimit) (RF_Raid_t * raidPtr);

        /* get default num recon buffers (may be NULL) */
        int     (*GetDefaultNumFloatingReconBuffers) (RF_Raid_t * raidPtr);

        /* get number of spare recon units (may be NULL) */
        RF_ReconUnitCount_t(*GetNumSpareRUs) (RF_Raid_t * raidPtr);

        /* spare table installation (may be NULL) */
        int     (*InstallSpareTable) (RF_Raid_t * raidPtr, RF_RowCol_t fcol);

        /* recon buffer submission function */
        int     (*SubmitReconBuffer) (RF_ReconBuffer_t * rbuf, int keep_it,
                                      int use_committed);

        /*
         * verify that parity information for a stripe is correct
         * see rf_parityscan.h for return vals
         */
        int     (*VerifyParity) (RF_Raid_t * raidPtr, RF_RaidAddr_t raidAddr,
                                 RF_PhysDiskAddr_t * parityPDA,
                                 int correct_it, RF_RaidAccessFlags_t flags);

        /* number of faults tolerated by this mapping */
        int     faultsTolerated;

        /* states to step through in an access. Must end with "LastState". The
         * default is DefaultStates in rf_layout.c */
        const RF_AccessState_t *states;

        RF_AccessStripeMapFlags_t flags;
#endif                                /* !KERNEL */
}       RF_LayoutSW_t;
#endif


/* Parity map declarations. */
#define RF_PARITYMAP_NREG 4096
#define RF_PARITYMAP_NBYTE howmany(RF_PARITYMAP_NREG, NBBY)

struct rf_pmctrs {
        uint64_t nwrite, ncachesync, nclearing;
};

struct rf_pmparams {
        int cooldown, tickms;
        u_int regions;
};

struct rf_pmstat {
        int enabled; /* if not set, rest of struct is zeroed */
        struct rf_pmparams params;
        daddr_t region_size;
        char dirty[RF_PARITYMAP_NBYTE];
        struct rf_pmctrs ctrs;
};



#endif                                /* !_RF_RAIDFRAMEVAR_H_ */




























































































































































































































































































































































































































































































































































































   10 
























   10 







   10 










1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
/*        $NetBSD: x86_autoconf.c,v 1.87 2022/03/19 13:51:35 hannken Exp $        */

/*-
 * Copyright (c) 1990 The Regents of the University of California.
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * William Jolitz.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)autoconf.c        7.1 (Berkeley) 5/9/91
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: x86_autoconf.c,v 1.87 2022/03/19 13:51:35 hannken Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/disklabel.h>
#include <sys/conf.h>
#include <sys/malloc.h>
#include <sys/vnode.h>
#include <sys/fcntl.h>
#include <sys/disk.h>
#include <sys/proc.h>
#include <sys/md5.h>
#include <sys/kauth.h>

#include <machine/autoconf.h>
#include <machine/bootinfo.h>
#include <machine/pio.h>

#include <xen/xen.h>

#include <dev/i2c/i2cvar.h>

#include "acpica.h"
#include "wsdisplay.h"
#ifndef XENPV
#include "hyperv.h"
#endif

#if NACPICA > 0
#include <dev/acpi/acpivar.h>
#endif
#if NHYPERV > 0
#include <x86/x86/hypervvar.h>
#endif

struct disklist *x86_alldisks;
int x86_ndisks;
int x86_found_console;

#ifdef DEBUG_GEOM
#define DPRINTF(a) printf a
#else
#define DPRINTF(a)
#endif

static void
dmatch(const char *func, device_t dv, const char *method)
{

        printf("WARNING: %s: double match for boot device (%s:%s %s:%s)\n",
            func, booted_method, device_xname(booted_device),
            method, device_xname(dv));
}

static int
is_valid_disk(device_t dv)
{

        if (device_class(dv) != DV_DISK)
                return 0;

        return (device_is_a(dv, "dk") ||
                device_is_a(dv, "sd") ||
                device_is_a(dv, "wd") ||
                device_is_a(dv, "ld") ||
                device_is_a(dv, "xbd") ||
                device_is_a(dv, "ed"));
}

/*
 * XXX Ugly bit of code.  But, this is the only safe time that the
 * match between BIOS disks and native disks can be done.
 */
static void
matchbiosdisks(void)
{
        struct btinfo_biosgeom *big;
        struct bi_biosgeom_entry *be;
        device_t dv;
        deviter_t di;
        int i, ck, error, m, n;
        struct vnode *tv;
        char mbr[DEV_BSIZE];
        int dklist_size;
        int numbig;

        if (x86_ndisks)
                return;
        big = lookup_bootinfo(BTINFO_BIOSGEOM);

        numbig = big ? big->num : 0;

        /* First, count all native disks. */
        for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
             dv = deviter_next(&di)) {
                if (is_valid_disk(dv))
                        x86_ndisks++;
        }
        deviter_release(&di);

        dklist_size = sizeof(struct disklist) + (x86_ndisks - 1) *
            sizeof(struct nativedisk_info);

        /* XXX M_TEMP is wrong */
        x86_alldisks = malloc(dklist_size, M_TEMP, M_WAITOK | M_ZERO);
        x86_alldisks->dl_nnativedisks = x86_ndisks;
        x86_alldisks->dl_nbiosdisks = numbig;
        for (i = 0; i < numbig; i++) {
                x86_alldisks->dl_biosdisks[i].bi_dev = big->disk[i].dev;
                x86_alldisks->dl_biosdisks[i].bi_sec = big->disk[i].sec;
                x86_alldisks->dl_biosdisks[i].bi_head = big->disk[i].head;
                x86_alldisks->dl_biosdisks[i].bi_cyl = big->disk[i].cyl;
                x86_alldisks->dl_biosdisks[i].bi_lbasecs = big->disk[i].totsec;
                x86_alldisks->dl_biosdisks[i].bi_flags = big->disk[i].flags;
                DPRINTF(("%s: disk %x: flags %x",
                    __func__, big->disk[i].dev, big->disk[i].flags));
#ifdef BIOSDISK_EXTINFO_V3
                DPRINTF((", interface %x, device %llx",
                    big->disk[i].interface_path, big->disk[i].device_path));
#endif
                DPRINTF(("\n"));
        }

        /* XXX Code duplication from findroot(). */
        n = -1;
        for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
             dv = deviter_next(&di)) {
                if (!is_valid_disk(dv))
                        continue;
                DPRINTF(("%s: trying to match (%s) %s: ", __func__,
                    device_xname(dv), device_cfdata(dv)->cf_name));
                n++;
                snprintf(x86_alldisks->dl_nativedisks[n].ni_devname,
                    sizeof(x86_alldisks->dl_nativedisks[n].ni_devname),
                    "%s", device_xname(dv));

                if ((tv = opendisk(dv)) == NULL) {
                        DPRINTF(("cannot open\n"));
                        continue;
                }

                error = vn_rdwr(UIO_READ, tv, mbr, DEV_BSIZE, 0, UIO_SYSSPACE,
                    IO_NODELOCKED, NOCRED, NULL, NULL);
                VOP_CLOSE(tv, FREAD, NOCRED);
                vput(tv);
                if (error) {
                        DPRINTF(("MBR read failure %d\n", error));
                        continue;
                }

                for (ck = i = 0; i < DEV_BSIZE; i++)
                        ck += mbr[i];
                for (m = i = 0; i < numbig; i++) {
                        be = &big->disk[i];
                        if (be->flags & BI_GEOM_INVALID)
                                continue;
                        DPRINTF(("matched with %d dev ck %x bios ck %x\n",
                            i, ck, be->cksum));
                        if (be->cksum == ck && memcmp(&mbr[MBR_PART_OFFSET],
                            be->mbrparts, MBR_PART_COUNT
                            * sizeof(struct mbr_partition)) == 0) {
                                DPRINTF(("%s: matched BIOS disk %x with %s\n",
                                    __func__, be->dev, device_xname(dv)));
                                x86_alldisks->dl_nativedisks[n].
                                    ni_biosmatches[m++] = i;
                        }
                }
                x86_alldisks->dl_nativedisks[n].ni_nmatches = m;
        }
        deviter_release(&di);
}

/*
 * Helper function for findroot():
 * Return non-zero if wedge device matches bootinfo.
 */
static int
match_bootwedge(device_t dv, struct btinfo_bootwedge *biw)
{
        MD5_CTX ctx;
        struct vnode *tmpvn;
        int error;
        uint8_t bf[DEV_BSIZE];
        uint8_t hash[16];
        int found = 0;
        daddr_t blk;
        uint64_t nblks;

        /*
         * If the boot loader didn't specify the sector, abort.
         */
        if (biw->matchblk == -1) {
                DPRINTF(("%s: no sector specified for %s\n", __func__,
                        device_xname(dv)));
                return 0;
        }

        if ((tmpvn = opendisk(dv)) == NULL) {
                DPRINTF(("%s: can't open %s\n", __func__, device_xname(dv)));
                return 0;
        }

        MD5Init(&ctx);
        for (blk = biw->matchblk, nblks = biw->matchnblks;
             nblks != 0; nblks--, blk++) {
                error = vn_rdwr(UIO_READ, tmpvn, (void *) bf,
                    sizeof(bf), blk * DEV_BSIZE, UIO_SYSSPACE,
                    IO_NODELOCKED, NOCRED, NULL, NULL);
                if (error) {
                        if (error != EINVAL) {
                                aprint_error("%s: unable to read block %"
                                    PRId64 " " "of dev %s (%d)\n", __func__,
                                    blk, device_xname(dv), error);
                        }
                        goto closeout;
                }
                MD5Update(&ctx, bf, sizeof(bf));
        }
        MD5Final(hash, &ctx);

        /* Compare with the provided hash. */
        found = memcmp(biw->matchhash, hash, sizeof(hash)) == 0;
        DPRINTF(("%s: %s found=%d\n", __func__, device_xname(dv), found));

 closeout:
        VOP_CLOSE(tmpvn, FREAD, NOCRED);
        vput(tmpvn);
        return found;
}

/*
 * Helper function for findroot():
 * Return non-zero if disk device matches bootinfo.
 */
static int
match_bootdisk(device_t dv, struct btinfo_bootdisk *bid)
{
        struct vnode *tmpvn;
        int error;
        struct disklabel label;
        int found = 0;

        if (device_is_a(dv, "dk")) {
                DPRINTF(("%s: dk %s\n", __func__, device_xname(dv)));
                return 0;
        }

        /*
         * A disklabel is required here.  The boot loader doesn't refuse
         * to boot from a disk without a label, but this is normally not
         * wanted.
         */
        if (bid->labelsector == -1) {
                DPRINTF(("%s: no label %s\n", __func__, device_xname(dv)));
                return 0;
        }

        if ((tmpvn = opendisk(dv)) == NULL) {
                DPRINTF(("%s: can't open %s\n", __func__, device_xname(dv)));
                return 0;
        }

        VOP_UNLOCK(tmpvn);
        error = VOP_IOCTL(tmpvn, DIOCGDINFO, &label, FREAD, NOCRED);
        vn_lock(tmpvn, LK_EXCLUSIVE | LK_RETRY);
        if (error) {
                /*
                 * XXX Can't happen -- open() would have errored out
                 * or faked one up.
                 */
                printf("%s: can't get label for dev %s (%d)\n", __func__,
                    device_xname(dv), error);
                goto closeout;
        }

        /* Compare with our data. */
        if (label.d_type == bid->label.type &&
            label.d_checksum == bid->label.checksum &&
            strncmp(label.d_packname, bid->label.packname, 16) == 0)
                found = 1;

        DPRINTF(("%s: %s found=%d\n", __func__, device_xname(dv), found));
 closeout:
        VOP_CLOSE(tmpvn, FREAD, NOCRED);
        vput(tmpvn);
        return found;
}

/*
 * Attempt to find the device from which we were booted.  If we can do so,
 * and not instructed not to do so, change rootdev to correspond to the
 * load device.
 */
static void
findroot(void)
{
        struct btinfo_rootdevice *biv;
        struct btinfo_bootdisk *bid;
        struct btinfo_bootwedge *biw;
        struct btinfo_biosgeom *big;
        device_t dv;
        deviter_t di;
        static char bootspecbuf[sizeof(biv->devname)+1];

        if (booted_device)
                return;

        if (lookup_bootinfo(BTINFO_NETIF) != NULL) {
                /*
                 * We got netboot interface information, but device_register()
                 * failed to match it to a configured device.  Boot disk
                 * information cannot be present at the same time, so give
                 * up.
                 */
                printf("%s: netboot interface not found.\n", __func__);
                return;
        }

        if ((biv = lookup_bootinfo(BTINFO_ROOTDEVICE)) != NULL) {
                for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST);
                     dv != NULL;
                     dv = deviter_next(&di)) {
                        cfdata_t cd;
                        size_t len;

                        if (device_class(dv) != DV_DISK)
                                continue;

                        cd = device_cfdata(dv);
                        len = strlen(cd->cf_name);

                        if (strncmp(cd->cf_name, biv->devname, len) == 0 &&
                            biv->devname[len] - '0' == device_unit(dv)) {
                                booted_device = dv;
                                booted_method = "bootinfo/rootdevice";
                                booted_partition = biv->devname[len + 1] - 'a';
                                booted_nblks = 0;
                                break;
                        }
                }
                DPRINTF(("%s: BTINFO_ROOTDEVICE %s\n", __func__,
                    booted_device ? device_xname(booted_device) : "not found"));
                deviter_release(&di);
                if (dv != NULL)
                        return;

                if (biv->devname[0] != '\0') {
                        strlcpy(bootspecbuf, biv->devname, sizeof(bootspecbuf));
                        bootspec = bootspecbuf;
                        return;
                }
        }

        bid = lookup_bootinfo(BTINFO_BOOTDISK);
        biw = lookup_bootinfo(BTINFO_BOOTWEDGE);

        if (biw != NULL) {
                /*
                 * Scan all disk devices for ones that match the passed data.
                 * Don't break if one is found, to get possible multiple
                 * matches - for problem tracking.  Use the first match anyway
                 * because lower devices numbers are more likely to be the
                 * boot device.
                 */
                for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST);
                     dv != NULL;
                     dv = deviter_next(&di)) {
                        if (is_valid_disk(dv)) {
                                /*
                                 * Don't trust BIOS device numbers, try
                                 * to match the information passed by the
                                 * boot loader instead.
                                 */
                                if ((biw->biosdev & 0x80) == 0 ||
                                    match_bootwedge(dv, biw) == 0)
                                        continue;
                                goto bootwedge_found;
                        }

                        continue;
 bootwedge_found:
                        if (booted_device) {
                                dmatch(__func__, dv, "bootinfo/bootwedge");
                                continue;
                        }
                        booted_device = dv;
                        booted_method = "bootinfo/bootwedge";
                        booted_partition = bid != NULL ? bid->partition : 0;
                        booted_nblks = biw->nblks;
                        booted_startblk = biw->startblk;
                }
                deviter_release(&di);

                DPRINTF(("%s: BTINFO_BOOTWEDGE %s\n", __func__,
                    booted_device ? device_xname(booted_device) : "not found"));
                if (booted_nblks)
                        return;
        }

        if (bid != NULL) {
                /*
                 * Scan all disk devices for ones that match the passed data.
                 * Don't break if one is found, to get possible multiple
                 * matches - for problem tracking.  Use the first match anyway
                 * because lower device numbers are more likely to be the
                 * boot device.
                 */
                for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST);
                     dv != NULL;
                     dv = deviter_next(&di)) {

                        if (device_is_a(dv, "fd") &&
                            device_class(dv) == DV_DISK) {
                                /*
                                 * Assume the configured unit number matches
                                 * the BIOS device number.  (This is the old
                                 * behavior.)  Needs some ideas how to handle
                                 * the BIOS's "swap floppy drive" options.
                                 */
                                /* XXX device_unit() abuse */
                                if ((bid->biosdev & 0x80) != 0 ||
                                    device_unit(dv) != bid->biosdev)
                                        continue;
                                goto bootdisk_found;
                        }

                        if (is_valid_disk(dv)) {
                                /*
                                 * Don't trust BIOS device numbers, try
                                 * to match the information passed by the
                                 * boot loader instead.
                                 */
                                if ((bid->biosdev & 0x80) == 0 ||
                                    match_bootdisk(dv, bid) == 0)
                                        continue;
                                goto bootdisk_found;
                        }

                        continue;
 bootdisk_found:
                        if (booted_device) {
                                dmatch(__func__, dv, "bootinfo/bootdisk");
                                continue;
                        }
                        booted_device = dv;
                        booted_method = "bootinfo/bootdisk";
                        booted_partition = bid->partition;
                        booted_nblks = 0;
                }
                deviter_release(&di);

                DPRINTF(("%s: BTINFO_BOOTDISK %s\n", __func__,
                    booted_device ? device_xname(booted_device) : "not found"));
                if (booted_device)
                        return;

                /*
                 * No booted device found; check CD-ROM boot at last.
                 *
                 * Our bootloader assumes CD-ROM boot if biosdev is larger
                 * or equal than the number of hard drives recognized by the
                 * BIOS. The number of drives can be found in BTINFO_BIOSGEOM
                 * here. For example, if we have wd0, wd1, and cd0:
                 *
                 *        big->num = 2 (for wd0 and wd1)
                 *        bid->biosdev = 0x80 (wd0)
                 *        bid->biosdev = 0x81 (wd1)
                 *        bid->biosdev = 0x82 (cd0)
                 *
                 * See src/sys/arch/i386/stand/boot/devopen.c and
                 * src/sys/arch/i386/stand/lib/bootinfo_biosgeom.c .
                 */
                if ((big = lookup_bootinfo(BTINFO_BIOSGEOM)) != NULL &&
                    bid->biosdev >= 0x80 + big->num) {
                        /*
                         * XXX
                         * There is no proper way to detect which unit is
                         * recognized as a bootable CD-ROM drive by the BIOS.
                         * Assume the first unit is the one.
                         */
                        for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST);
                             dv != NULL;
                             dv = deviter_next(&di)) {
                                if (device_class(dv) == DV_DISK &&
                                    device_is_a(dv, "cd")) {
                                        booted_device = dv;
                                        booted_method = "bootinfo/biosgeom";
                                        booted_partition = 0;
                                        booted_nblks = 0;
                                        break;
                                }
                        }
                        deviter_release(&di);
                        DPRINTF(("%s: BTINFO_BIOSGEOM %s\n", __func__,
                            booted_device ? device_xname(booted_device) :
                            "not found"));
                }
        }
}

void
cpu_bootconf(void)
{
#ifdef XEN
        if (vm_guest == VM_GUEST_XENPVH) {
                xen_bootconf();
                return;
        }
#endif
        findroot();
        matchbiosdisks();
}

void
cpu_rootconf(void)
{
        cpu_bootconf();

        aprint_normal("boot device: %s\n",
            booted_device ? device_xname(booted_device) : "<unknown>");
        rootconf();
}

void
device_register(device_t dev, void *aux)
{
        device_t isaboot, pciboot;

        /*
         * The Intel Integrated Memory Controller has a built-in i2c
         * controller that's rather limited in capability; it is intended
         * only for reading memory module EERPOMs and sensors.
         */
        if (device_is_a(dev, "iic") &&
            device_is_a(device_parent(dev), "imcsmb")) {
                static const char *imcsmb_device_permitlist[] = {
                        "spdmem",
                        "sdtemp",
                        NULL,
                };
                prop_array_t permitlist = prop_array_create();
                prop_dictionary_t props = device_properties(dev);
                int i;

                for (i = 0; imcsmb_device_permitlist[i] != NULL; i++) {
                        prop_string_t pstr = prop_string_create_nocopy(
                            imcsmb_device_permitlist[i]);
                        (void) prop_array_add(permitlist, pstr);
                        prop_object_release(pstr);
                }
                (void) prop_dictionary_set(props,
                                           I2C_PROP_INDIRECT_DEVICE_PERMITLIST,
                                           permitlist);
                (void) prop_dictionary_set_string_nocopy(props,
                                           I2C_PROP_INDIRECT_PROBE_STRATEGY,
                                           I2C_PROBE_STRATEGY_NONE);
        }

        device_acpi_register(dev, aux);

        isaboot = device_isa_register(dev, aux);
        pciboot = device_pci_register(dev, aux);
#if NHYPERV > 0
        (void)device_hyperv_register(dev, aux);
#endif

        if (isaboot == NULL && pciboot == NULL)
                return;

        if (booted_device != NULL) {
                /* XXX should be a panic() */
                dmatch(__func__, dev, "device/register");
        } else {
                booted_device = (isaboot != NULL) ? isaboot : pciboot;
                booted_method = "device/register";
        }
}






































































    6 



    4 









    6 
    6 
    2 




    6 






    6 
    6 











    6 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
/*        $NetBSD: kern_rate.c,v 1.2 2012/12/12 11:10:56 pooka Exp $        */

/*-
 * Copyright (c) 2000, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christopher G. Demetriou.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_rate.c,v 1.2 2012/12/12 11:10:56 pooka Exp $");

#include <sys/param.h>
#include <sys/time.h>

/*
 * ratecheck(): simple time-based rate-limit checking.  see ratecheck(9)
 * for usage and rationale.
 */
int
ratecheck(struct timeval *lasttime, const struct timeval *mininterval)
{
        struct timeval tv, delta;
        int rv = 0;

        getmicrouptime(&tv);
        timersub(&tv, lasttime, &delta);

        /*
         * check for 0,0 is so that the message will be seen at least once,
         * even if interval is huge.
         */
        if (timercmp(&delta, mininterval, >=) ||
            (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) {
                *lasttime = tv;
                rv = 1;
        }

        return (rv);
}

/*
 * ppsratecheck(): packets (or events) per second limitation.
 */
int
ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps)
{
        struct timeval tv, delta;
        int rv;

        getmicrouptime(&tv);
        timersub(&tv, lasttime, &delta);

        /*
         * check for 0,0 is so that the message will be seen at least once.
         * if more than one second have passed since the last update of
         * lasttime, reset the counter.
         *
         * we do increment *curpps even in *curpps < maxpps case, as some may
         * try to use *curpps for stat purposes as well.
         */
        if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) ||
            delta.tv_sec >= 1) {
                *lasttime = tv;
                *curpps = 0;
        }
        if (maxpps < 0)
                rv = 1;
        else if (*curpps < maxpps)
                rv = 1;
        else
                rv = 0;

#if 1 /*DIAGNOSTIC?*/
        /* be careful about wrap-around */
        if (__predict_true(*curpps != INT_MAX))
                *curpps = *curpps + 1;
#else
        /*
         * assume that there's not too many calls to this function.
         * not sure if the assumption holds, as it depends on *caller's*
         * behavior, not the behavior of this function.
         * IMHO it is wrong to make assumption on the caller's behavior,
         * so the above #if is #if 1, not #ifdef DIAGNOSTIC.
         */
        *curpps = *curpps + 1;
#endif

        return (rv);
}


























































    1 




















    1 





























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
/*        $NetBSD: tty_tty.c,v 1.41 2020/05/23 23:42:43 ad Exp $        */

/*-
 * Copyright (c) 1982, 1986, 1991, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)tty_tty.c        8.2 (Berkeley) 9/23/93
 */

/*
 * Indirect driver for controlling tty.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tty_tty.c,v 1.41 2020/05/23 23:42:43 ad Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/ioctl.h>
#include <sys/proc.h>
#include <sys/tty.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/conf.h>
#include <sys/kauth.h>

/* XXXSMP */
#define cttyvp(p) ((p)->p_lflag & PL_CONTROLT ? (p)->p_session->s_ttyvp : NULL)

/*ARGSUSED*/
static int
cttyopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct vnode *ttyvp = cttyvp(l->l_proc);
        int error;

        if (ttyvp == NULL)
                return (ENXIO);
        vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY);
#ifdef PARANOID
        /*
         * Since group is tty and mode is 620 on most terminal lines
         * and since sessions protect terminals from processes outside
         * your session, this check is probably no longer necessary.
         * Since it inhibits setuid root programs that later switch
         * to another user from accessing /dev/tty, we have decided
         * to delete this test. (mckusick 5/93)
         */
        error = VOP_ACCESS(ttyvp,
          (flag&FREAD ? VREAD : 0) | (flag&FWRITE ? VWRITE : 0), l->l_cred, l);
        if (!error)
#endif /* PARANOID */
                error = VOP_OPEN(ttyvp, flag, NOCRED);
        VOP_UNLOCK(ttyvp);
        return (error);
}

/*ARGSUSED*/
static int
cttyread(dev_t dev, struct uio *uio, int flag)
{
        struct vnode *ttyvp = cttyvp(curproc);
        int error;

        if (ttyvp == NULL)
                return (EIO);
        vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_READ(ttyvp, uio, flag, NOCRED);
        VOP_UNLOCK(ttyvp);
        return (error);
}

/*ARGSUSED*/
static int
cttywrite(dev_t dev, struct uio *uio, int flag)
{
        struct vnode *ttyvp = cttyvp(curproc);
        int error;

        if (ttyvp == NULL)
                return (EIO);
        vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_WRITE(ttyvp, uio, flag, NOCRED);
        VOP_UNLOCK(ttyvp);
        return (error);
}

/*ARGSUSED*/
static int
cttyioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
{
        struct vnode *ttyvp = cttyvp(l->l_proc);
        int rv;

        if (ttyvp == NULL)
                return (EIO);
        if (cmd == TIOCSCTTY)                /* XXX */
                return (EINVAL);
        if (cmd == TIOCNOTTY) {
                mutex_enter(&proc_lock);
                if (!SESS_LEADER(l->l_proc)) {
                        l->l_proc->p_lflag &= ~PL_CONTROLT;
                        rv = 0;
                } else
                        rv = EINVAL;
                mutex_exit(&proc_lock);
                return (rv);
        }
        return (VOP_IOCTL(ttyvp, cmd, addr, flag, NOCRED));
}

/*ARGSUSED*/
static int
cttypoll(dev_t dev, int events, struct lwp *l)
{
        struct vnode *ttyvp = cttyvp(l->l_proc);

        if (ttyvp == NULL)
                return (seltrue(dev, events, l));
        return (VOP_POLL(ttyvp, events));
}

static int
cttykqfilter(dev_t dev, struct knote *kn)
{
        /* This is called from filt_fileattach() by the attaching process. */
        struct proc *p = curproc;
        struct vnode *ttyvp = cttyvp(p);

        if (ttyvp == NULL)
                return (1);
        return (VOP_KQFILTER(ttyvp, kn));
}

const struct cdevsw ctty_cdevsw = {
        .d_open = cttyopen,
        .d_close = nullclose,
        .d_read = cttyread,
        .d_write = cttywrite,
        .d_ioctl = cttyioctl,
        .d_stop = nullstop,
        .d_tty = notty,
        .d_poll = cttypoll,
        .d_mmap = nommap,
        .d_kqfilter = cttykqfilter,
        .d_discard = nodiscard,
        .d_flag = D_TTY
};




















































































































   70 






















   55 





































  122 






















   12 





  122 






































    5 





















   77 




























   10 



   10 


















    9 






    8 









































   26 









   26 







   26 


















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
/*        $NetBSD: vm_machdep.c,v 1.45 2021/03/28 10:29:05 skrll Exp $        */

/*-
 * Copyright (c) 1982, 1986 The Regents of the University of California.
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the Systems Programming Group of the University of Utah Computer
 * Science Department, and William Jolitz.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vm_machdep.c        7.3 (Berkeley) 5/13/91
 */

/*-
 * Copyright (c) 1995 Charles M. Hannum.  All rights reserved.
 * Copyright (c) 1989, 1990 William Jolitz
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the Systems Programming Group of the University of Utah Computer
 * Science Department, and William Jolitz.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vm_machdep.c        7.3 (Berkeley) 5/13/91
 */

/*
 *        Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.45 2021/03/28 10:29:05 skrll Exp $");

#include "opt_mtrr.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/buf.h>
#include <sys/core.h>
#include <sys/exec.h>
#include <sys/ptrace.h>

#include <uvm/uvm.h>

#include <machine/cpu.h>
#include <machine/gdt.h>
#include <machine/reg.h>
#include <machine/specialreg.h>

#ifdef MTRR
#include <machine/mtrr.h>
#endif

#include <x86/fpu.h>
#include <x86/dbregs.h>

extern struct pool x86_dbregspl;

void
cpu_proc_fork(struct proc *p1, struct proc *p2)
{

        p2->p_md.md_flags = p1->p_md.md_flags;
}

/*
 * cpu_lwp_fork: finish a new LWP (l2) operation.
 *
 * First LWP (l1) is the process being forked.  If it is &lwp0, then we
 * are creating a kthread, where return path and argument are specified
 * with `func' and `arg'.
 *
 * If an alternate user-level stack is requested (with non-zero values
 * in both the stack and stacksize arguments), then set up the user stack
 * pointer accordingly.
 */
void
cpu_lwp_fork(struct lwp *l1, struct lwp *l2, void *stack, size_t stacksize,
    void (*func)(void *), void *arg)
{
        struct pcb *pcb1, *pcb2;
        struct trapframe *tf;
        struct switchframe *sf;
        vaddr_t uv;

        KASSERT(l1 == curlwp || l1 == &lwp0);

        pcb1 = lwp_getpcb(l1);
        pcb2 = lwp_getpcb(l2);

        /* Copy the PCB from parent, except the FPU state. */
        memcpy(pcb2, pcb1, offsetof(struct pcb, pcb_savefpu));

        /* Fork the FPU state. */
        fpu_lwp_fork(l1, l2);

        /* Never inherit CPU Debug Registers */
        pcb2->pcb_dbregs = NULL;
        pcb2->pcb_flags &= ~PCB_DBREGS;

#if defined(XENPV)
        pcb2->pcb_iopl = IOPL_KPL;
#endif

        /*
         * Set the kernel stack address (from the address to uarea) and
         * trapframe address for child.
         *
         * Rig kernel stack so that it would start out in lwp_trampoline()
         * and call child_return() with l2 as an argument.  This causes the
         * newly-created child process to go directly to user level with a
         * parent return value of 0 from fork(), while the parent process
         * returns normally.
         */
        uv = uvm_lwp_getuarea(l2);
        KASSERT(uv % PAGE_SIZE == 0);

#ifdef __x86_64__
#ifdef SVS
        pcb2->pcb_rsp0 = (uv + USPACE - PAGE_SIZE +
            sizeof(struct trapframe));
        KASSERT((pcb2->pcb_rsp0 & 0xF) == 0);
#else
        pcb2->pcb_rsp0 = (uv + USPACE - 16);
#endif
        tf = (struct trapframe *)pcb2->pcb_rsp0 - 1;
#else
        pcb2->pcb_esp0 = (uv + USPACE - 16);
        tf = (struct trapframe *)pcb2->pcb_esp0 - 1;

        pcb2->pcb_iomap = NULL;
#endif
        l2->l_md.md_regs = tf;

        /*
         * Copy the trapframe from parent, so that return to userspace
         * will be to right address, with correct registers.
         */
        memcpy(tf, l1->l_md.md_regs, sizeof(struct trapframe));

        /* Child LWP might get aston() before returning to userspace. */
        tf->tf_trapno = T_ASTFLT;

        /* If specified, set a different user stack for a child. */
        if (stack != NULL) {
#ifdef __x86_64__
                tf->tf_rsp = (uint64_t)stack + stacksize;
#else
                tf->tf_esp = (uint32_t)stack + stacksize;
#endif
        }

        l2->l_md.md_flags = l1->l_md.md_flags;
        l2->l_md.md_astpending = 0;

        sf = (struct switchframe *)tf - 1;

#ifdef __x86_64__
        sf->sf_r12 = (uint64_t)func;
        sf->sf_r13 = (uint64_t)arg;
        sf->sf_rip = (uint64_t)lwp_trampoline;
        pcb2->pcb_rsp = (uint64_t)sf;
        pcb2->pcb_rbp = (uint64_t)l2;
#else
        /*
         * XXX Is there a reason sf->sf_edi isn't initialized here?
         * Could this leak potentially sensitive information to new
         * userspace processes?
         */
        sf->sf_esi = (int)func;
        sf->sf_ebx = (int)arg;
        sf->sf_eip = (int)lwp_trampoline;
        pcb2->pcb_esp = (int)sf;
        pcb2->pcb_ebp = (int)l2;
#endif
}

/*
 * cpu_lwp_free is called from exit() to let machine-dependent
 * code free machine-dependent resources.  Note that this routine
 * must not block.  NB: this may be called with l != curlwp in
 * error paths.
 */
void
cpu_lwp_free(struct lwp *l, int proc)
{

        if (l != curlwp)
                return;

        /* Abandon the FPU state. */
        fpu_lwp_abandon(l);

        /* Abandon the dbregs state. */
        x86_dbregs_abandon(l);

#ifdef MTRR
        if (proc && l->l_proc->p_md.md_flags & MDP_USEDMTRR)
                mtrr_clean(l->l_proc);
#endif
}

/*
 * cpu_lwp_free2 is called when an LWP is being reaped.
 * This routine may block.
 */
void
cpu_lwp_free2(struct lwp *l)
{
        struct pcb *pcb;

        pcb = lwp_getpcb(l);
        KASSERT((pcb->pcb_flags & PCB_DBREGS) == 0);
        if (pcb->pcb_dbregs) {
                pool_put(&x86_dbregspl, pcb->pcb_dbregs);
                pcb->pcb_dbregs = NULL;
        }
}

/*
 * Convert kernel VA to physical address
 */
paddr_t
kvtop(void *addr)
{
        paddr_t pa;
        bool ret __diagused;

        ret = pmap_extract(pmap_kernel(), (vaddr_t)addr, &pa);
        KASSERT(ret == true);
        return pa;
}

/*
 * Map a user I/O request into kernel virtual address space.
 * Note: the pages are already locked by uvm_vslock(), so we
 * do not need to pass an access_type to pmap_enter().
 */
int
vmapbuf(struct buf *bp, vsize_t len)
{
        vaddr_t faddr, taddr, off;
        paddr_t fpa;

        KASSERT((bp->b_flags & B_PHYS) != 0);

        bp->b_saveaddr = bp->b_data;
        faddr = trunc_page((vaddr_t)bp->b_data);
        off = (vaddr_t)bp->b_data - faddr;
        len = round_page(off + len);
        taddr = uvm_km_alloc(phys_map, len, 0, UVM_KMF_VAONLY | UVM_KMF_WAITVA);
        bp->b_data = (void *)(taddr + off);
        /*
         * The region is locked, so we expect that pmap_extract() will return
         * true.
         * XXX: unwise to expect this in a multithreaded environment.
         * anything can happen to a pmap between the time we lock a
         * region, release the pmap lock, and then relock it for
         * the pmap_extract().
         *
         * no need to flush TLB since we expect nothing to be mapped
         * where we just allocated (TLB will be flushed when our
         * mapping is removed).
         */
        while (len) {
                (void) pmap_extract(vm_map_pmap(&bp->b_proc->p_vmspace->vm_map),
                    faddr, &fpa);
                pmap_kenter_pa(taddr, fpa, VM_PROT_READ|VM_PROT_WRITE, 0);
                faddr += PAGE_SIZE;
                taddr += PAGE_SIZE;
                len -= PAGE_SIZE;
        }
        pmap_update(pmap_kernel());

        return 0;
}

/*
 * Unmap a previously-mapped user I/O request.
 */
void
vunmapbuf(struct buf *bp, vsize_t len)
{
        vaddr_t addr, off;

        KASSERT((bp->b_flags & B_PHYS) != 0);

        addr = trunc_page((vaddr_t)bp->b_data);
        off = (vaddr_t)bp->b_data - addr;
        len = round_page(off + len);
        pmap_kremove(addr, len);
        pmap_update(pmap_kernel());
        uvm_km_free(phys_map, addr, len, UVM_KMF_VAONLY);
        bp->b_data = bp->b_saveaddr;
        bp->b_saveaddr = 0;
}

#ifdef __HAVE_CPU_UAREA_ROUTINES
/*
 * Layout of the uarea:
 *    Page[0]        = PCB
 *    Page[1]        = RedZone
 *    Page[2]        = Stack
 *    Page[...]      = Stack
 *    Page[UPAGES-1] = Stack
 *    Page[UPAGES]   = RedZone
 * There is a redzone at the beginning of the stack, and another one at the
 * end. The former is to protect against deep recursions that could corrupt
 * the PCB, the latter to protect against severe stack overflows.
 */
void *
cpu_uarea_alloc(bool system)
{
        vaddr_t base, va;
        paddr_t pa;

        base = uvm_km_alloc(kernel_map, USPACE + PAGE_SIZE, 0,
            UVM_KMF_WIRED|UVM_KMF_WAITVA);

        /* Page[1] = RedZone */
        va = base + PAGE_SIZE;
        if (!pmap_extract(pmap_kernel(), va, &pa)) {
                panic("%s: impossible, Page[1] unmapped", __func__);
        }
        pmap_kremove(va, PAGE_SIZE);
        uvm_pagefree(PHYS_TO_VM_PAGE(pa));

        /* Page[UPAGES] = RedZone */
        va = base + USPACE;
        if (!pmap_extract(pmap_kernel(), va, &pa)) {
                panic("%s: impossible, Page[UPAGES] unmapped", __func__);
        }
        pmap_kremove(va, PAGE_SIZE);
        uvm_pagefree(PHYS_TO_VM_PAGE(pa));

        pmap_update(pmap_kernel());

        return (void *)base;
}

bool
cpu_uarea_free(void *addr)
{
        vaddr_t base = (vaddr_t)addr;

        KASSERT(!pmap_extract(pmap_kernel(), base + PAGE_SIZE, NULL));
        KASSERT(!pmap_extract(pmap_kernel(), base + USPACE, NULL));
        uvm_km_free(kernel_map, base, USPACE + PAGE_SIZE, UVM_KMF_WIRED);
        return true;
}
#endif /* __HAVE_CPU_UAREA_ROUTINES */











































































































  333 




















 1176 
 1178 
 1177 
 1178 

 1176 







  573 
  572 
  574 
  573 
  575 










  120 







  308 
  308 
  307 
  308 
  307 
  308 
  308 

  308 






  300 



  308 





   23 










  277 
  277 
  277 
  277 

  276 





































   78 


















  237 
  237 
  237 

  237 





 2283 
 2286 
 2283 

 2284 





   48 
   48 
   48 

   48 





   64 
   64 
   64 

   64 





  488 
  489 
  489 

  489 





   44 
   43 
   44 

   44 





    5 
    5 
    5 
    5 

    5 





   21 
   21 
   21 
   21 

   21 





    6 
    6 
    6 
    6 

    6 





    4 
    4 
    4 
    4 

    4 





   13 
   13 
   13 
   13 

   13 





    4 
    4 
    4 
    4 

    4 








   29 
   29 
   29 
   28 

   29 

    3 
   27 
   24 



   29 





   26 


   25 
   26 

   26 
   26 


   26 








    6 
    6 
    6 

    6 








    1 
    1 
    1 
    1 

    1 









   10 
   10 
   10 
   10 

   10 




    9 


    6 





    9 




   10 














    8 








    1 




    7 
    7 






    3 

    3 



    3 


    3 
































































   31 
   31 
   30 
   30 
   30 
   31 

   31 



   31 























































































































































































































































































































































































 2284 
 2286 


 2286 


 2285 





 2286 
 2209 




 2207 




 2207 


 2205 
 2277 
















 1738 


  485 






















  517 










  408 










  421 







    9 







   35 







   24 







  364 



















 1385 

 1387 

 1386 
    1 
 1387 





















 1388 





 1388 








 1387 











  464 



  462 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
/* $NetBSD: kern_auth.c,v 1.81 2022/04/09 23:38:33 riastradh Exp $ */

/*-
 * Copyright (c) 2005, 2006 Elad Efrat <elad@NetBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_auth.c,v 1.81 2022/04/09 23:38:33 riastradh Exp $");

#include <sys/types.h>
#include <sys/param.h>
#include <sys/queue.h>
#include <sys/proc.h>
#include <sys/ucred.h>
#include <sys/pool.h>
#define __KAUTH_PRIVATE
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/rwlock.h>
#include <sys/sysctl.h>
#include <sys/atomic.h>
#include <sys/specificdata.h>
#include <sys/vnode.h>

#include <secmodel/secmodel.h>

/*
 * Secmodel-specific credentials.
 */
struct kauth_key {
        secmodel_t ks_secmodel;                /* secmodel */
        specificdata_key_t ks_key;        /* key */
};


/*
 * Listener.
 */
struct kauth_listener {
        kauth_scope_callback_t                func;                /* callback */
        kauth_scope_t                        scope;                /* scope backpointer */
        u_int                                refcnt;                /* reference count */
        SIMPLEQ_ENTRY(kauth_listener)        listener_next;        /* listener list */
};

/*
 * Scope.
 */
struct kauth_scope {
        const char                       *id;                /* scope name */
        void                               *cookie;                /* user cookie */
        u_int                                nlisteners;        /* # of listeners */
        SIMPLEQ_HEAD(, kauth_listener)        listenq;        /* listener list */
        SIMPLEQ_ENTRY(kauth_scope)        next_scope;        /* scope list */
};

static int kauth_cred_hook(kauth_cred_t, kauth_action_t, void *, void *);

/* List of scopes and its lock. */
static SIMPLEQ_HEAD(, kauth_scope) scope_list =
    SIMPLEQ_HEAD_INITIALIZER(scope_list);

/* Built-in scopes: generic, process. */
static kauth_scope_t kauth_builtin_scope_generic;
static kauth_scope_t kauth_builtin_scope_system;
static kauth_scope_t kauth_builtin_scope_process;
static kauth_scope_t kauth_builtin_scope_network;
static kauth_scope_t kauth_builtin_scope_machdep;
static kauth_scope_t kauth_builtin_scope_device;
static kauth_scope_t kauth_builtin_scope_cred;
static kauth_scope_t kauth_builtin_scope_vnode;

static specificdata_domain_t kauth_domain;
static pool_cache_t kauth_cred_cache;

krwlock_t        kauth_lock;

/* Allocate new, empty kauth credentials. */
kauth_cred_t
kauth_cred_alloc(void)
{
        kauth_cred_t cred;

        cred = pool_cache_get(kauth_cred_cache, PR_WAITOK);

        cred->cr_refcnt = 1;
        cred->cr_uid = 0;
        cred->cr_euid = 0;
        cred->cr_svuid = 0;
        cred->cr_gid = 0;
        cred->cr_egid = 0;
        cred->cr_svgid = 0;
        cred->cr_ngroups = 0;

        specificdata_init(kauth_domain, &cred->cr_sd);
        kauth_cred_hook(cred, KAUTH_CRED_INIT, NULL, NULL);

        return (cred);
}

/* Increment reference count to cred. */
void
kauth_cred_hold(kauth_cred_t cred)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(cred->cr_refcnt > 0);

        atomic_inc_uint(&cred->cr_refcnt);
}

/* Decrease reference count to cred. If reached zero, free it. */
void
kauth_cred_free(kauth_cred_t cred)
{

        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(cred->cr_refcnt > 0);
        ASSERT_SLEEPABLE();

#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_release();
#endif
        if (atomic_dec_uint_nv(&cred->cr_refcnt) > 0)
                return;
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_acquire();
#endif

        kauth_cred_hook(cred, KAUTH_CRED_FREE, NULL, NULL);
        specificdata_fini(kauth_domain, &cred->cr_sd);
        pool_cache_put(kauth_cred_cache, cred);
}

static void
kauth_cred_clone1(kauth_cred_t from, kauth_cred_t to, bool copy_groups)
{
        KASSERT(from != NULL);
        KASSERT(from != NOCRED);
        KASSERT(from != FSCRED);
        KASSERT(to != NULL);
        KASSERT(to != NOCRED);
        KASSERT(to != FSCRED);
        KASSERT(from->cr_refcnt > 0);

        to->cr_uid = from->cr_uid;
        to->cr_euid = from->cr_euid;
        to->cr_svuid = from->cr_svuid;
        to->cr_gid = from->cr_gid;
        to->cr_egid = from->cr_egid;
        to->cr_svgid = from->cr_svgid;
        if (copy_groups) {
                to->cr_ngroups = from->cr_ngroups;
                memcpy(to->cr_groups, from->cr_groups, sizeof(to->cr_groups));
        }

        kauth_cred_hook(from, KAUTH_CRED_COPY, to, NULL);
}

void
kauth_cred_clone(kauth_cred_t from, kauth_cred_t to)
{
        kauth_cred_clone1(from, to, true);
}

/*
 * Duplicate cred and return a new kauth_cred_t.
 */
kauth_cred_t
kauth_cred_dup(kauth_cred_t cred)
{
        kauth_cred_t new_cred;

        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(cred->cr_refcnt > 0);

        new_cred = kauth_cred_alloc();

        kauth_cred_clone(cred, new_cred);

        return (new_cred);
}

/*
 * Similar to crcopy(), only on a kauth_cred_t.
 * XXX: Is this even needed? [kauth_cred_copy]
 */
kauth_cred_t
kauth_cred_copy(kauth_cred_t cred)
{
        kauth_cred_t new_cred;

        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(cred->cr_refcnt > 0);

        /* If the provided credentials already have one reference, use them. */
        if (cred->cr_refcnt == 1)
                return (cred);

        new_cred = kauth_cred_alloc();

        kauth_cred_clone(cred, new_cred);

        kauth_cred_free(cred);

        return (new_cred);
}

void
kauth_proc_fork(struct proc *parent, struct proc *child)
{

        mutex_enter(parent->p_lock);
        kauth_cred_hold(parent->p_cred);
        child->p_cred = parent->p_cred;
        mutex_exit(parent->p_lock);

        /* XXX: relies on parent process stalling during fork() */
        kauth_cred_hook(parent->p_cred, KAUTH_CRED_FORK, parent,
            child);
}

void
kauth_proc_chroot(kauth_cred_t cred, struct cwdinfo *cwdi)
{
        kauth_cred_hook(cred, KAUTH_CRED_CHROOT, cwdi, NULL);
}

uid_t
kauth_cred_getuid(kauth_cred_t cred)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);

        return (cred->cr_uid);
}

uid_t
kauth_cred_geteuid(kauth_cred_t cred)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);

        return (cred->cr_euid);
}

uid_t
kauth_cred_getsvuid(kauth_cred_t cred)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);

        return (cred->cr_svuid);
}

gid_t
kauth_cred_getgid(kauth_cred_t cred)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);

        return (cred->cr_gid);
}

gid_t
kauth_cred_getegid(kauth_cred_t cred)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);

        return (cred->cr_egid);
}

gid_t
kauth_cred_getsvgid(kauth_cred_t cred)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);

        return (cred->cr_svgid);
}

void
kauth_cred_setuid(kauth_cred_t cred, uid_t uid)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(cred->cr_refcnt == 1);

        cred->cr_uid = uid;
}

void
kauth_cred_seteuid(kauth_cred_t cred, uid_t uid)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(cred->cr_refcnt == 1);

        cred->cr_euid = uid;
}

void
kauth_cred_setsvuid(kauth_cred_t cred, uid_t uid)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(cred->cr_refcnt == 1);

        cred->cr_svuid = uid;
}

void
kauth_cred_setgid(kauth_cred_t cred, gid_t gid)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(cred->cr_refcnt == 1);

        cred->cr_gid = gid;
}

void
kauth_cred_setegid(kauth_cred_t cred, gid_t gid)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(cred->cr_refcnt == 1);

        cred->cr_egid = gid;
}

void
kauth_cred_setsvgid(kauth_cred_t cred, gid_t gid)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(cred->cr_refcnt == 1);

        cred->cr_svgid = gid;
}

/* Checks if gid is a member of the groups in cred. */
int
kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp)
{
        uint32_t i;

        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(resultp != NULL);

        *resultp = 0;

        for (i = 0; i < cred->cr_ngroups; i++)
                if (cred->cr_groups[i] == gid) {
                        *resultp = 1;
                        break;
                }

        return (0);
}

int
kauth_cred_groupmember(kauth_cred_t cred, gid_t gid)
{
        int ismember, error;

        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);

        error = kauth_cred_ismember_gid(cred, gid, &ismember);
        if (error)
                return error;

        if (kauth_cred_getegid(cred) == gid || ismember)
                return 0;

        return -1;
}

u_int
kauth_cred_ngroups(kauth_cred_t cred)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);

        return (cred->cr_ngroups);
}

/*
 * Return the group at index idx from the groups in cred.
 */
gid_t
kauth_cred_group(kauth_cred_t cred, u_int idx)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(idx < cred->cr_ngroups);

        return (cred->cr_groups[idx]);
}

/* XXX elad: gmuid is unused for now. */
int
kauth_cred_setgroups(kauth_cred_t cred, const gid_t *grbuf, size_t len,
    uid_t gmuid, enum uio_seg seg)
{
        int error = 0;

        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(cred->cr_refcnt == 1);

        if (len > __arraycount(cred->cr_groups))
                return EINVAL;

        if (len) {
                if (seg == UIO_SYSSPACE) {
                        memcpy(cred->cr_groups, grbuf,
                            len * sizeof(cred->cr_groups[0]));
                } else {
                        error = copyin(grbuf, cred->cr_groups,
                            len * sizeof(cred->cr_groups[0]));
                        if (error != 0)
                                len = 0;
                }
        }
        memset(cred->cr_groups + len, 0xff,
            sizeof(cred->cr_groups) - (len * sizeof(cred->cr_groups[0])));

        cred->cr_ngroups = len;

        return error;
}

/* This supports sys_setgroups() */
int
kauth_proc_setgroups(struct lwp *l, kauth_cred_t ncred)
{
        kauth_cred_t cred;
        int error;

        /*
         * At this point we could delete duplicate groups from ncred,
         * and plausibly sort the list - but in general the later is
         * a bad idea.
         */
        proc_crmod_enter();
        /* Maybe we should use curproc here ? */
        cred = l->l_proc->p_cred;

        kauth_cred_clone1(cred, ncred, false);

        error = kauth_authorize_process(cred, KAUTH_PROCESS_SETID,
            l->l_proc, NULL, NULL, NULL);
        if (error != 0) {
                proc_crmod_leave(cred, ncred, false);
                        return error;
        }

        /* Broadcast our credentials to the process and other LWPs. */
         proc_crmod_leave(ncred, cred, true);
        return 0;
}

int
kauth_cred_getgroups(kauth_cred_t cred, gid_t *grbuf, size_t len,
    enum uio_seg seg)
{
        KASSERT(cred != NULL);

        if (len > cred->cr_ngroups)
                return EINVAL;

        if (seg == UIO_USERSPACE)
                return copyout(cred->cr_groups, grbuf, sizeof(*grbuf) * len);
        memcpy(grbuf, cred->cr_groups, sizeof(*grbuf) * len);

        return 0;
}

int
kauth_register_key(secmodel_t secmodel, kauth_key_t *result)
{
        kauth_key_t k;
        specificdata_key_t key;
        int error;

        KASSERT(result != NULL);

        error = specificdata_key_create(kauth_domain, &key, NULL);
        if (error)
                return (error);

        k = kmem_alloc(sizeof(*k), KM_SLEEP);
        k->ks_secmodel = secmodel;
        k->ks_key = key;

        *result = k;

        return (0);
}

int
kauth_deregister_key(kauth_key_t key)
{
        KASSERT(key != NULL);

        specificdata_key_delete(kauth_domain, key->ks_key);
        kmem_free(key, sizeof(*key));

        return (0);
}

void *
kauth_cred_getdata(kauth_cred_t cred, kauth_key_t key)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(key != NULL);

        return (specificdata_getspecific(kauth_domain, &cred->cr_sd,
            key->ks_key));
}

void
kauth_cred_setdata(kauth_cred_t cred, kauth_key_t key, void *data)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(key != NULL);

        specificdata_setspecific(kauth_domain, &cred->cr_sd, key->ks_key, data);
}

/*
 * Match uids in two credentials.
 */
int
kauth_cred_uidmatch(kauth_cred_t cred1, kauth_cred_t cred2)
{
        KASSERT(cred1 != NULL);
        KASSERT(cred1 != NOCRED);
        KASSERT(cred1 != FSCRED);
        KASSERT(cred2 != NULL);
        KASSERT(cred2 != NOCRED);
        KASSERT(cred2 != FSCRED);

        if (cred1->cr_uid == cred2->cr_uid ||
            cred1->cr_euid == cred2->cr_uid ||
            cred1->cr_uid == cred2->cr_euid ||
            cred1->cr_euid == cred2->cr_euid)
                return (1);

        return (0);
}

u_int
kauth_cred_getrefcnt(kauth_cred_t cred)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);

        return (cred->cr_refcnt);
}

/*
 * Convert userland credentials (struct uucred) to kauth_cred_t.
 * XXX: For NFS & puffs
 */
void    
kauth_uucred_to_cred(kauth_cred_t cred, const struct uucred *uuc)
{       
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(uuc != NULL);
 
        cred->cr_refcnt = 1;
        cred->cr_uid = uuc->cr_uid;
        cred->cr_euid = uuc->cr_uid;
        cred->cr_svuid = uuc->cr_uid;
        cred->cr_gid = uuc->cr_gid;
        cred->cr_egid = uuc->cr_gid;
        cred->cr_svgid = uuc->cr_gid;
        cred->cr_ngroups = uimin(uuc->cr_ngroups, NGROUPS);
        kauth_cred_setgroups(cred, __UNCONST(uuc->cr_groups),
            cred->cr_ngroups, -1, UIO_SYSSPACE);
}

/*
 * Convert kauth_cred_t to userland credentials (struct uucred).
 * XXX: For NFS & puffs
 */
void    
kauth_cred_to_uucred(struct uucred *uuc, const kauth_cred_t cred)
{       
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(uuc != NULL);
        int ng;

        ng = uimin(cred->cr_ngroups, NGROUPS);
        uuc->cr_uid = cred->cr_euid;  
        uuc->cr_gid = cred->cr_egid;  
        uuc->cr_ngroups = ng;
        kauth_cred_getgroups(cred, uuc->cr_groups, ng, UIO_SYSSPACE);
}

/*
 * Compare kauth_cred_t and uucred credentials.
 * XXX: Modelled after crcmp() for NFS.
 */
int
kauth_cred_uucmp(kauth_cred_t cred, const struct uucred *uuc)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(uuc != NULL);

        if (cred->cr_euid == uuc->cr_uid &&
            cred->cr_egid == uuc->cr_gid &&
            cred->cr_ngroups == (uint32_t)uuc->cr_ngroups) {
                int i;

                /* Check if all groups from uuc appear in cred. */
                for (i = 0; i < uuc->cr_ngroups; i++) {
                        int ismember;

                        ismember = 0;
                        if (kauth_cred_ismember_gid(cred, uuc->cr_groups[i],
                            &ismember) != 0 || !ismember)
                                return (1);
                }

                return (0);
        }

        return (1);
}

/*
 * Make a struct ucred out of a kauth_cred_t.  For compatibility.
 */
void
kauth_cred_toucred(kauth_cred_t cred, struct ki_ucred *uc)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(uc != NULL);

        uc->cr_ref = cred->cr_refcnt;
        uc->cr_uid = cred->cr_euid;
        uc->cr_gid = cred->cr_egid;
        uc->cr_ngroups = uimin(cred->cr_ngroups, __arraycount(uc->cr_groups));
        memcpy(uc->cr_groups, cred->cr_groups,
               uc->cr_ngroups * sizeof(uc->cr_groups[0]));
}

/*
 * Make a struct pcred out of a kauth_cred_t.  For compatibility.
 */
void
kauth_cred_topcred(kauth_cred_t cred, struct ki_pcred *pc)
{
        KASSERT(cred != NULL);
        KASSERT(cred != NOCRED);
        KASSERT(cred != FSCRED);
        KASSERT(pc != NULL);

        pc->p_pad = NULL;
        pc->p_ruid = cred->cr_uid;
        pc->p_svuid = cred->cr_svuid;
        pc->p_rgid = cred->cr_gid;
        pc->p_svgid = cred->cr_svgid;
        pc->p_refcnt = cred->cr_refcnt;
}

/*
 * Return kauth_cred_t for the current LWP.
 */
kauth_cred_t
kauth_cred_get(void)
{
        return (curlwp->l_cred);
}

/*
 * Returns a scope matching the provided id.
 * Requires the scope list lock to be held by the caller.
 */
static kauth_scope_t
kauth_ifindscope(const char *id)
{
        kauth_scope_t scope;

        KASSERT(rw_lock_held(&kauth_lock));

        scope = NULL;
        SIMPLEQ_FOREACH(scope, &scope_list, next_scope) {
                if (strcmp(scope->id, id) == 0)
                        break;
        }

        return (scope);
}

/*
 * Register a new scope.
 *
 * id - identifier for the scope
 * callback - the scope's default listener
 * cookie - cookie to be passed to the listener(s)
 */
kauth_scope_t
kauth_register_scope(const char *id, kauth_scope_callback_t callback,
    void *cookie)
{
        kauth_scope_t scope;
        kauth_listener_t listener = NULL; /* XXX gcc */

        /* Sanitize input */
        if (id == NULL)
                return (NULL);

        /* Allocate space for a new scope and listener. */
        scope = kmem_alloc(sizeof(*scope), KM_SLEEP);
        if (callback != NULL)
                listener = kmem_alloc(sizeof(*listener), KM_SLEEP);

        /*
         * Acquire scope list lock.
         */
        rw_enter(&kauth_lock, RW_WRITER);

        /* Check we don't already have a scope with the same id */
        if (kauth_ifindscope(id) != NULL) {
                rw_exit(&kauth_lock);

                kmem_free(scope, sizeof(*scope));
                if (callback != NULL)
                        kmem_free(listener, sizeof(*listener));

                return (NULL);
        }

        /* Initialize new scope with parameters */
        scope->id = id;
        scope->cookie = cookie;
        scope->nlisteners = 1;

        SIMPLEQ_INIT(&scope->listenq);

        /* Add default listener */
        if (callback != NULL) {
                listener->func = callback;
                listener->scope = scope;
                listener->refcnt = 0;
                SIMPLEQ_INSERT_HEAD(&scope->listenq, listener, listener_next);
        }

        /* Insert scope to scopes list */
        SIMPLEQ_INSERT_TAIL(&scope_list, scope, next_scope);

        rw_exit(&kauth_lock);

        return (scope);
}

/*
 * Initialize the kernel authorization subsystem.
 *
 * Initialize the scopes list lock.
 * Create specificdata domain.
 * Register the credentials scope, used in kauth(9) internally.
 * Register built-in scopes: generic, system, process, network, machdep, device.
 */
void
kauth_init(void)
{
        rw_init(&kauth_lock);

        kauth_cred_cache = pool_cache_init(sizeof(struct kauth_cred),
            coherency_unit, 0, 0, "kcredpl", NULL, IPL_NONE,
            NULL, NULL, NULL);

        /* Create specificdata domain. */
        kauth_domain = specificdata_domain_create();

        /* Register credentials scope. */
        kauth_builtin_scope_cred =
            kauth_register_scope(KAUTH_SCOPE_CRED, NULL, NULL);

        /* Register generic scope. */
        kauth_builtin_scope_generic = kauth_register_scope(KAUTH_SCOPE_GENERIC,
            NULL, NULL);

        /* Register system scope. */
        kauth_builtin_scope_system = kauth_register_scope(KAUTH_SCOPE_SYSTEM,
            NULL, NULL);

        /* Register process scope. */
        kauth_builtin_scope_process = kauth_register_scope(KAUTH_SCOPE_PROCESS,
            NULL, NULL);

        /* Register network scope. */
        kauth_builtin_scope_network = kauth_register_scope(KAUTH_SCOPE_NETWORK,
            NULL, NULL);

        /* Register machdep scope. */
        kauth_builtin_scope_machdep = kauth_register_scope(KAUTH_SCOPE_MACHDEP,
            NULL, NULL);

        /* Register device scope. */
        kauth_builtin_scope_device = kauth_register_scope(KAUTH_SCOPE_DEVICE,
            NULL, NULL);

        /* Register vnode scope. */
        kauth_builtin_scope_vnode = kauth_register_scope(KAUTH_SCOPE_VNODE,
            NULL, NULL);
}

/*
 * Deregister a scope.
 * Requires scope list lock to be held by the caller.
 *
 * scope - the scope to deregister
 */
void
kauth_deregister_scope(kauth_scope_t scope)
{
        if (scope != NULL) {
                /* Remove scope from list */
                SIMPLEQ_REMOVE(&scope_list, scope, kauth_scope, next_scope);
                kmem_free(scope, sizeof(*scope));
        }
}

/*
 * Register a listener.
 *
 * id - scope identifier.
 * callback - the callback routine for the listener.
 * cookie - cookie to pass unmoidfied to the callback.
 */
kauth_listener_t
kauth_listen_scope(const char *id, kauth_scope_callback_t callback,
   void *cookie)
{
        kauth_scope_t scope;
        kauth_listener_t listener;

        listener = kmem_alloc(sizeof(*listener), KM_SLEEP);
        rw_enter(&kauth_lock, RW_WRITER);

        /*
         * Find scope struct.
         */
        scope = kauth_ifindscope(id);
        if (scope == NULL) {
                rw_exit(&kauth_lock);
                kmem_free(listener, sizeof(*listener));
                return (NULL);
        }

        /* Allocate listener */

        /* Initialize listener with parameters */
        listener->func = callback;
        listener->refcnt = 0;

        /* Add listener to scope */
        SIMPLEQ_INSERT_TAIL(&scope->listenq, listener, listener_next);

        /* Raise number of listeners on scope. */
        scope->nlisteners++;
        listener->scope = scope;

        rw_exit(&kauth_lock);

        return (listener);
}

/*
 * Deregister a listener.
 *
 * listener - listener reference as returned from kauth_listen_scope().
 */
void
kauth_unlisten_scope(kauth_listener_t listener)
{

        if (listener != NULL) {
                rw_enter(&kauth_lock, RW_WRITER);
                SIMPLEQ_REMOVE(&listener->scope->listenq, listener,
                    kauth_listener, listener_next);
                listener->scope->nlisteners--;
                rw_exit(&kauth_lock);
                kmem_free(listener, sizeof(*listener));
        }
}

/*
 * Authorize a request.
 *
 * scope - the scope of the request as defined by KAUTH_SCOPE_* or as
 *           returned from kauth_register_scope().
 * credential - credentials of the user ("actor") making the request.
 * action - request identifier.
 * arg[0-3] - passed unmodified to listener(s).
 *
 * Returns the aggregated result:
 *     - KAUTH_RESULT_ALLOW if there is at least one KAUTH_RESULT_ALLOW and
 *       zero KAUTH_DESULT_DENY
 *     - KAUTH_RESULT_DENY if there is at least one KAUTH_RESULT_DENY
 *     - KAUTH_RESULT_DEFER if there is nothing but KAUTH_RESULT_DEFER
 */
static int
kauth_authorize_action_internal(kauth_scope_t scope, kauth_cred_t cred,
    kauth_action_t action, void *arg0, void *arg1, void *arg2, void *arg3)
{
        kauth_listener_t listener;
        int error, allow, fail;

        KASSERT(cred != NULL);
        KASSERT(action != 0);

        /* Short-circuit requests coming from the kernel. */
        if (cred == NOCRED || cred == FSCRED)
                return KAUTH_RESULT_ALLOW;

        KASSERT(scope != NULL);

        fail = 0;
        allow = 0;

        /* rw_enter(&kauth_lock, RW_READER); XXX not yet */
        SIMPLEQ_FOREACH(listener, &scope->listenq, listener_next) {
                error = listener->func(cred, action, scope->cookie, arg0,
                    arg1, arg2, arg3);

                if (error == KAUTH_RESULT_ALLOW)
                        allow = 1;
                else if (error == KAUTH_RESULT_DENY)
                        fail = 1;
        }
        /* rw_exit(&kauth_lock); */

        if (fail)
                return (KAUTH_RESULT_DENY);

        if (allow)
                return (KAUTH_RESULT_ALLOW);

        return (KAUTH_RESULT_DEFER);
};

int
kauth_authorize_action(kauth_scope_t scope, kauth_cred_t cred,
    kauth_action_t action, void *arg0, void *arg1, void *arg2, void *arg3)
{
        int r;

        r = kauth_authorize_action_internal(scope, cred, action, arg0, arg1,
            arg2, arg3);

        if (r == KAUTH_RESULT_DENY)
                return (EPERM);

        if (r == KAUTH_RESULT_ALLOW)
                return (0);

        if (secmodel_nsecmodels() == 0)
                return (0);

        return (EPERM);
}

/*
 * Generic scope authorization wrapper.
 */
int
kauth_authorize_generic(kauth_cred_t cred, kauth_action_t action, void *arg0)
{
        return (kauth_authorize_action(kauth_builtin_scope_generic, cred, 
            action, arg0, NULL, NULL, NULL));
}

/*
 * System scope authorization wrapper.
 */
int
kauth_authorize_system(kauth_cred_t cred, kauth_action_t action,
    enum kauth_system_req req, void *arg1, void *arg2, void *arg3)
{
        return (kauth_authorize_action(kauth_builtin_scope_system, cred,
            action, (void *)req, arg1, arg2, arg3));
}

/*
 * Process scope authorization wrapper.
 */
int
kauth_authorize_process(kauth_cred_t cred, kauth_action_t action,
    struct proc *p, void *arg1, void *arg2, void *arg3)
{
        return (kauth_authorize_action(kauth_builtin_scope_process, cred,
            action, p, arg1, arg2, arg3));
}

/*
 * Network scope authorization wrapper.
 */
int
kauth_authorize_network(kauth_cred_t cred, kauth_action_t action,
    enum kauth_network_req req, void *arg1, void *arg2, void *arg3)
{
        return (kauth_authorize_action(kauth_builtin_scope_network, cred,
            action, (void *)req, arg1, arg2, arg3));
}

int
kauth_authorize_machdep(kauth_cred_t cred, kauth_action_t action,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        return (kauth_authorize_action(kauth_builtin_scope_machdep, cred,
            action, arg0, arg1, arg2, arg3));
}

int
kauth_authorize_device(kauth_cred_t cred, kauth_action_t action,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        return (kauth_authorize_action(kauth_builtin_scope_device, cred,
            action, arg0, arg1, arg2, arg3));
}

int
kauth_authorize_device_tty(kauth_cred_t cred, kauth_action_t action,
    struct tty *tty)
{
        return (kauth_authorize_action(kauth_builtin_scope_device, cred,
            action, tty, NULL, NULL, NULL));
}

int
kauth_authorize_device_spec(kauth_cred_t cred, enum kauth_device_req req,
    struct vnode *vp)
{
        return (kauth_authorize_action(kauth_builtin_scope_device, cred,
            KAUTH_DEVICE_RAWIO_SPEC, (void *)req, vp, NULL, NULL));
}

int
kauth_authorize_device_passthru(kauth_cred_t cred, dev_t dev, u_long bits,
    void *data)
{
        return (kauth_authorize_action(kauth_builtin_scope_device, cred,
            KAUTH_DEVICE_RAWIO_PASSTHRU, (void *)bits, (void *)(u_long)dev,
            data, NULL));
}

kauth_action_t
kauth_accmode_to_action(accmode_t accmode)
{
        kauth_action_t action = 0;

        // XXX: Revisit we need to have a richer set of kauth primitives
        // We also get only the Unix perms here sometimes
        if (accmode & (VSTAT_PERMS|VREAD))
                action |= KAUTH_VNODE_READ_DATA;
        if (accmode & (VMODIFY_PERMS|VADMIN_PERMS))
                action |= KAUTH_VNODE_WRITE_DATA;
        if (accmode & VEXEC)
                action |= KAUTH_VNODE_EXECUTE;
        return action == 0 ? KAUTH_VNODE_ACCESS : action;
}

kauth_action_t
kauth_extattr_action(mode_t access_mode)
{
        kauth_action_t action = 0;

        if (access_mode & VREAD)
                action |= KAUTH_VNODE_READ_EXTATTRIBUTES;
        if (access_mode & VWRITE)
                action |= KAUTH_VNODE_WRITE_EXTATTRIBUTES;

        return action;
}

int
kauth_authorize_vnode(kauth_cred_t cred, kauth_action_t action,
    struct vnode *vp, struct vnode *dvp, int fs_decision)
{
        int error;

        error = kauth_authorize_action_internal(kauth_builtin_scope_vnode, cred,
            action, vp, dvp, NULL, NULL);

        if (error == KAUTH_RESULT_DENY)
                return (EACCES);

        if (error == KAUTH_RESULT_ALLOW)
                return (0);

        /*
         * If the file-system does not support decision-before-action, we can
         * only short-circuit the operation (deny). If we're here, it means no
         * listener denied it, so our only alternative is to supposedly-allow
         * it and let the file-system have the last word.
         */
        if (fs_decision == KAUTH_VNODE_REMOTEFS)
                return (0);

        return (fs_decision);
}

static int
kauth_cred_hook(kauth_cred_t cred, kauth_action_t action, void *arg0,
    void *arg1)
{
        int r;

        r = kauth_authorize_action(kauth_builtin_scope_cred, cred, action,
            arg0, arg1, NULL, NULL);

#ifdef DIAGNOSTIC
        if (!SIMPLEQ_EMPTY(&kauth_builtin_scope_cred->listenq))
                KASSERT(r == 0);
#endif /* DIAGNOSTIC */

        return (r);
}



































































































   14 




































   17 


   17 













   14 
   14 






   14 








    6 


    6 





    3 

    4 





   27 




   27 

   27 














    4 


















   22 




   22 




   22 

    3 
    3 


    3 
    3 
    3 

    3 







   22 





   22 

   22 

   22 









   20 

   22 
    2 



   22 















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
/*        $NetBSD: kernfs_vfsops.c,v 1.100 2020/04/07 08:35:49 jdolecek Exp $        */

/*
 * Copyright (c) 1992, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software donated to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kernfs_vfsops.c        8.10 (Berkeley) 5/14/95
 */

/*
 * Kernel params Filesystem
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kernfs_vfsops.c,v 1.100 2020/04/07 08:35:49 jdolecek Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/conf.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/dirent.h>
#include <sys/syslog.h>
#include <sys/kauth.h>
#include <sys/module.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>
#include <miscfs/kernfs/kernfs.h>

MODULE(MODULE_CLASS_VFS, kernfs, NULL);

dev_t rrootdev = NODEV;
kmutex_t kfs_lock;

VFS_PROTOS(kernfs);

void        kernfs_get_rrootdev(void);

void
kernfs_init(void)
{

        mutex_init(&kfs_lock, MUTEX_DEFAULT, IPL_NONE);
}

void
kernfs_reinit(void)
{

}

void
kernfs_done(void)
{

        mutex_destroy(&kfs_lock);
}

void
kernfs_get_rrootdev(void)
{
        static int tried = 0;

        if (tried) {
                /* Already did it once. */
                return;
        }
        tried = 1;

        if (rootdev == NODEV)
                return;
        rrootdev = devsw_blk2chr(rootdev);
        if (rrootdev != NODEV)
                return;
        rrootdev = NODEV;
        printf("kernfs_get_rrootdev: no raw root device\n");
}

/*
 * Mount the Kernel params filesystem
 */
int
kernfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct lwp *l = curlwp;
        int error = 0;
        struct kernfs_mount *fmp;

        if (UIO_MX & (UIO_MX - 1)) {
                log(LOG_ERR, "kernfs: invalid directory entry size");
                return (EINVAL);
        }

        if (mp->mnt_flag & MNT_GETARGS) {
                *data_len = 0;
                return 0;
        }
        /*
         * Update is a no-op
         */
        if (mp->mnt_flag & MNT_UPDATE)
                return (EOPNOTSUPP);

        fmp = kmem_zalloc(sizeof(struct kernfs_mount), KM_SLEEP);
        TAILQ_INIT(&fmp->nodelist);

        mp->mnt_stat.f_namemax = KERNFS_MAXNAMLEN;
        mp->mnt_flag |= MNT_LOCAL;
        mp->mnt_data = fmp;
        vfs_getnewfsid(mp);

        if ((error = set_statvfs_info(path, UIO_USERSPACE, "kernfs",
            UIO_SYSSPACE, mp->mnt_op->vfs_name, mp, l)) != 0) {
                kmem_free(fmp, sizeof(struct kernfs_mount));
                return error;
        }

        kernfs_get_rrootdev();
        return 0;
}

int
kernfs_start(struct mount *mp, int flags)
{

        return (0);
}

int
kernfs_unmount(struct mount *mp, int mntflags)
{
        int error;
        int flags = 0;

        if (mntflags & MNT_FORCE)
                flags |= FORCECLOSE;

        if ((error = vflush(mp, 0, flags)) != 0)
                return (error);

        /*
         * Finally, throw away the kernfs_mount structure
         */
        kmem_free(mp->mnt_data, sizeof(struct kernfs_mount));
        mp->mnt_data = NULL;
        return (0);
}

int
kernfs_root(struct mount *mp, int lktype, struct vnode **vpp)
{
        const struct kern_target *root_target = &kern_targets[0];
        int error;

        /* setup "." */
        error = vcache_get(mp, &root_target, sizeof(root_target), vpp);
        if (error)
                return error;
        error = vn_lock(*vpp, lktype);
        if (error) {
                vrele(*vpp);
                *vpp = NULL;
                return error;
        }
        return 0;
}

/*ARGSUSED*/
int
kernfs_sync(struct mount *mp, int waitfor,
    kauth_cred_t uc)
{

        return (0);
}

/*
 * Kernfs flat namespace lookup.
 * Currently unsupported.
 */
int
kernfs_vget(struct mount *mp, ino_t ino, int lktype,
    struct vnode **vpp)
{

        return (EOPNOTSUPP);
}

int
kernfs_loadvnode(struct mount *mp, struct vnode *vp,
    const void *key, size_t key_len, const void **new_key)
{
        const struct kern_target *kt;
        struct kernfs_node *kfs, *kfsp;
        long *cookie;

        KASSERT(key_len == sizeof(kt));
        memcpy(&kt, key, key_len);

        kfs = kmem_zalloc(sizeof(struct kernfs_node), KM_SLEEP);
        cookie = &(VFSTOKERNFS(mp)->fileno_cookie);
        mutex_enter(&kfs_lock);
again:
        TAILQ_FOREACH(kfsp, &VFSTOKERNFS(mp)->nodelist, kfs_list) {
                if (kfsp->kfs_cookie == *cookie) {
                        (*cookie) ++;
                        goto again;
                }
                if (TAILQ_NEXT(kfsp, kfs_list)) {
                        if (kfsp->kfs_cookie < *cookie &&
                            *cookie < TAILQ_NEXT(kfsp, kfs_list)->kfs_cookie)
                                break;
                        if (kfsp->kfs_cookie + 1 <
                            TAILQ_NEXT(kfsp, kfs_list)->kfs_cookie) {
                                *cookie = kfsp->kfs_cookie + 1;
                                break;
                        }
                }
        }

        kfs->kfs_cookie = *cookie;

        if (kfsp)
                TAILQ_INSERT_AFTER(&VFSTOKERNFS(mp)->nodelist, kfsp, kfs,
                    kfs_list);
        else
                TAILQ_INSERT_TAIL(&VFSTOKERNFS(mp)->nodelist, kfs, kfs_list);

        kfs->kfs_type = kt->kt_tag;
        kfs->kfs_vnode = vp;
        kfs->kfs_fileno = KERNFS_FILENO(kt, kt->kt_tag, kfs->kfs_cookie);
        kfs->kfs_kt = kt;
        kfs->kfs_mode = kt->kt_mode;
        vp->v_tag = VT_KERNFS;
        vp->v_op = kernfs_vnodeop_p;
        vp->v_data = kfs;
        vp->v_type = kt->kt_vtype;
        mutex_exit(&kfs_lock);

        if (kt->kt_tag == KFSkern)
                vp->v_vflag = VV_ROOT;

        if (kt->kt_tag == KFSdevice) {
                vp->v_op = kernfs_specop_p;
                spec_node_init(vp, *(dev_t *)kt->kt_data);
        }

        uvm_vnp_setsize(vp, 0);

        *new_key = &kfs->kfs_kt;
        return 0;
}

extern const struct vnodeopv_desc kernfs_vnodeop_opv_desc;
extern const struct vnodeopv_desc kernfs_specop_opv_desc;

const struct vnodeopv_desc * const kernfs_vnodeopv_descs[] = {
        &kernfs_vnodeop_opv_desc,
        &kernfs_specop_opv_desc,
        NULL,
};

struct vfsops kernfs_vfsops = {
        .vfs_name = MOUNT_KERNFS,
        .vfs_min_mount_data = 0,
        .vfs_mount = kernfs_mount,
        .vfs_start = kernfs_start,
        .vfs_unmount = kernfs_unmount,
        .vfs_root = kernfs_root,
        .vfs_quotactl = (void *)eopnotsupp,
        .vfs_statvfs = genfs_statvfs,
        .vfs_sync = kernfs_sync,
        .vfs_vget = kernfs_vget,
        .vfs_loadvnode = kernfs_loadvnode,
        .vfs_fhtovp = (void *)eopnotsupp,
        .vfs_vptofh = (void *)eopnotsupp,
        .vfs_init = kernfs_init,
        .vfs_reinit = kernfs_reinit,
        .vfs_done = kernfs_done,
        .vfs_snapshot = (void *)eopnotsupp,
        .vfs_extattrctl = vfs_stdextattrctl,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_renamelock_enter = genfs_renamelock_enter,
        .vfs_renamelock_exit = genfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = kernfs_vnodeopv_descs
};

SYSCTL_SETUP(kernfs_sysctl_setup, "kernfs sysctl")
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "kernfs",
                       SYSCTL_DESCR("/kern file system"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, 11, CTL_EOL);
        /*
         * XXX the "11" above could be dynamic, thereby eliminating one
         * more instance of the "number to vfs" mapping problem, but
         * "11" is the order as taken from sys/mount.h
         */
}

static int
kernfs_modcmd(modcmd_t cmd, void *arg)
{
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = vfs_attach(&kernfs_vfsops);
                if (error != 0)
                        break;
                break;
        case MODULE_CMD_FINI:
                error = vfs_detach(&kernfs_vfsops);
                if (error != 0)
                        break;
                break;
        default:
                error = ENOTTY;
                break;
        }

        return (error);
}








































































































































    7 
    7 


    7 






    7 











    7 
    7 











    5 

    5 


    5 

    5 
    5 

    5 










    7 

    7 



























































































































































    3 



    3 














































    3 























    3 





    3 
































    8 


    8 


    8 





    8 













    3 


    3 


    5 



    5 


    7 




    7 




    7 




    7 









    7 





    7 


















    7 



    7 










    7 









    7 


























































    8 

    8 











    7 

    7 



















    7 

    7 


    7 


























    7 



    7 






































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
/*        $NetBSD: midisyn.c,v 1.25 2019/05/08 13:40:17 isaki Exp $        */

/*
 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (augustss@NetBSD.org), and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: midisyn.c,v 1.25 2019/05/08 13:40:17 isaki Exp $");

#include <sys/param.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/select.h>
#include <sys/proc.h>
#include <sys/kmem.h>
#include <sys/systm.h>
#include <sys/syslog.h>
#include <sys/kernel.h>
#include <sys/audioio.h>
#include <sys/midiio.h>
#include <sys/device.h>

#include <dev/audio/audio_if.h>
#include <dev/midi_if.h>
#include <dev/midivar.h>
#include <dev/midisynvar.h>

#ifdef AUDIO_DEBUG
#define DPRINTF(x)        if (midisyndebug) printf x
#define DPRINTFN(n,x)        if (midisyndebug >= (n)) printf x
int        midisyndebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

static int        midisyn_findvoice(midisyn *, int, int);
static void        midisyn_freevoice(midisyn *, int);
static uint_fast16_t        midisyn_allocvoice(midisyn *, uint_fast8_t, uint_fast8_t);
static void        midisyn_attackv_vel(midisyn *, uint_fast16_t, midipitch_t,
                                    int16_t, uint_fast8_t);

static midictl_notify midisyn_notify;

static midipitch_t midisyn_clamp_pitch(midipitch_t);
static int16_t midisyn_adj_level(midisyn *, uint_fast8_t);
static midipitch_t midisyn_adj_pitch(midisyn *, uint_fast8_t);
static void midisyn_chan_releasev(midisyn *, uint_fast8_t, uint_fast8_t);
static void midisyn_upd_level(midisyn *, uint_fast8_t);
static void midisyn_upd_pitch(midisyn *, uint_fast8_t);

static int        midisyn_open(void *, int,
                             void (*iintr)(void *, int),
                             void (*ointr)(void *), void *arg);
static void        midisyn_close(void *);
static int        midisyn_sysrt(void *, int);
static void        midisyn_getinfo(void *, struct midi_info *);
static int        midisyn_ioctl(void *, u_long, void *, int, struct lwp *);
static void        midisyn_get_locks(void *, kmutex_t **, kmutex_t **);

const struct midi_hw_if midisyn_hw_if = {
        midisyn_open,
        midisyn_close,
        midisyn_sysrt,
        midisyn_getinfo,
        midisyn_ioctl,
        midisyn_get_locks,
};

static int        midisyn_channelmsg(void *, int, int, u_char *, int);
static int        midisyn_commonmsg(void *, int, u_char *, int);
static int        midisyn_sysex(void *, u_char *, int);

struct midi_hw_if_ext midisyn_hw_if_ext = {
        .channel = midisyn_channelmsg,
        .common  = midisyn_commonmsg,
        .sysex   = midisyn_sysex,
};

struct channelstate { /* dyamically allocated in open() on account of size */
        /* volume state components in centibels; just sum for overall level */
        int16_t volume;
        int16_t expression;
        /* pitch state components in midipitch units; sum for overall effect */
        midipitch_t bend;
        midipitch_t tuning_fine;
        midipitch_t tuning_coarse;
        /* used by bend handlers */
        int16_t bendraw;
        int16_t pendingreset;
/* rearrange as more controls supported - 16 bits should last for a while */
#define PEND_VOL 1
#define PEND_EXP 2
#define PEND_LEVEL (PEND_VOL|PEND_EXP)
#define PEND_PBS 4
#define PEND_TNF 8
#define PEND_TNC 16
#define PEND_PITCH (PEND_PBS|PEND_TNF|PEND_TNC)
#define PEND_ALL   (PEND_LEVEL|PEND_PITCH)
};

static int
midisyn_open(void *addr, int flags, void (*iintr)(void *, int),
    void (*ointr)(void *), void *arg)
{
        midisyn *ms = addr;
        int rslt, error;
        uint_fast8_t chan;

        KASSERT(ms->lock != NULL);
        KASSERT(mutex_owned(ms->lock));
        DPRINTF(("midisyn_open: ms=%p ms->mets=%p\n", ms, ms->mets));

        mutex_exit(ms->lock);
        ms->ctl.lock = ms->lock;
        error = midictl_open(&ms->ctl);
        if (error != 0) {
                mutex_enter(ms->lock);        
                return error;
        }
        ms->chnstate = kmem_alloc(MIDI_MAX_CHANS * sizeof(*ms->chnstate),
            KM_SLEEP); /* init'd by RESET below */
        mutex_enter(ms->lock);        
        
        rslt = 0;
        if (ms->mets->open)
                rslt = (ms->mets->open(ms, flags));
        
        /*
         * Make the right initial things happen by faking receipt of RESET on
         * all channels. The hw driver's ctlnotice() will be called in turn.
         */
        for ( chan = 0 ; chan < MIDI_MAX_CHANS ; ++ chan )
                midisyn_notify(ms, MIDICTL_RESET, chan, 0);
        
        return rslt;
}

static void
midisyn_close(void *addr)
{
        midisyn *ms = addr;
        struct midisyn_methods *fs;
        int chan;

        KASSERT(mutex_owned(ms->lock));
        DPRINTF(("midisyn_close: ms=%p ms->mets=%p\n", ms, ms->mets));
        fs = ms->mets;

        for (chan = 0; chan < MIDI_MAX_CHANS; chan++)
                midisyn_notify(ms, MIDICTL_SOUND_OFF, chan, 0);

        if (fs->close)
                fs->close(ms);

        mutex_exit(ms->lock);
        midictl_close(&ms->ctl);
        kmem_free(ms->chnstate, MIDI_MAX_CHANS * sizeof(*ms->chnstate));
        mutex_enter(ms->lock);
}

static void
midisyn_getinfo(void *addr, struct midi_info *mi)
{
        midisyn *ms = addr;

        KASSERT(mutex_owned(ms->lock));

        mi->name = ms->name;
        /*
         * I was going to add a property here to suppress midi(4)'s warning
         * about an output device that uses no transmit interrupt, on the
         * assumption that as an onboard synth we handle "output" internally
         * with nothing like the 320 us per byte busy wait of a dumb UART.
         * Then I noticed that opl (at least as currently implemented) seems
         * to need 40 us busy wait to set each register on an OPL2, and sets
         * about 21 registers for every note-on. (Half of that is patch loading
         * and could probably be reduced by different management of voices and
         * patches.) For now I won't bother suppressing that warning....
         */
        mi->props = 0;
        
        midi_register_hw_if_ext(&midisyn_hw_if_ext);
}

static void
midisyn_get_locks(void *addr, kmutex_t **intr, kmutex_t **proc)
{
        midisyn *ms = addr;

        *intr = ms->lock;
        *proc = NULL;
}

static int
midisyn_ioctl(void *maddr, u_long cmd, void *addr, int flag, struct lwp *l)
{
        midisyn *ms = maddr;

        KASSERT(mutex_owned(ms->lock));

        if (ms->mets->ioctl)
                return (ms->mets->ioctl(ms, cmd, addr, flag, l));
        else
                return (EINVAL);
}

static int
midisyn_findvoice(midisyn *ms, int chan, int note)
{
        u_int cn;
        int v;

        KASSERT(mutex_owned(ms->lock));

        cn = MS_CHANNOTE(chan, note);
        for (v = 0; v < ms->nvoice; v++)
                if (ms->voices[v].chan_note == cn && ms->voices[v].inuse)
                        return (v);
        return (-1);
}

void
midisyn_init(midisyn *ms)
{

        KASSERT(ms->lock != NULL);

        /*
         * XXX there should be a way for this function to indicate failure
         * (other than panic) if some preconditions aren't met, for example
         * if some nonoptional methods are missing.
         */
        if (ms->mets->allocv == 0) {
                ms->voices = kmem_zalloc(ms->nvoice * sizeof(struct voice),
                    KM_SLEEP);
                ms->seqno = 1;
                ms->mets->allocv = midisyn_allocvoice;
        }
        
        if (ms->mets->attackv_vel == 0 && ms->mets->attackv != 0)
                ms->mets->attackv_vel = midisyn_attackv_vel;
        
        ms->ctl = (midictl) {
                .base_channel = 16,
                .cookie = ms,
                .notify = midisyn_notify
        };
        
        DPRINTF(("midisyn_init: ms=%p\n", ms));
}

static void
midisyn_freevoice(midisyn *ms, int voice)
{

        KASSERT(mutex_owned(ms->lock));

        if (ms->mets->allocv != midisyn_allocvoice)
                return;
        ms->voices[voice].inuse = 0;
}

static uint_fast16_t
midisyn_allocvoice(midisyn *ms, uint_fast8_t chan, uint_fast8_t note)
{
        int bestv, v;
        u_int bestseq, s;

        KASSERT(mutex_owned(ms->lock));

        /* Find a free voice, or if no free voice is found the oldest. */
        bestv = 0;
        bestseq = ms->voices[0].seqno + (ms->voices[0].inuse ? 0x40000000 : 0);
        for (v = 1; v < ms->nvoice; v++) {
                s = ms->voices[v].seqno;
                if (ms->voices[v].inuse)
                        s += 0x40000000;
                if (s < bestseq) {
                        bestseq = s;
                        bestv = v;
                }
        }
        DPRINTFN(10,("midisyn_allocvoice: v=%d seq=%d cn=%x inuse=%d\n",
                     bestv, ms->voices[bestv].seqno,
                     ms->voices[bestv].chan_note,
                     ms->voices[bestv].inuse));
#ifdef AUDIO_DEBUG
        if (ms->voices[bestv].inuse)
                DPRINTFN(1,("midisyn_allocvoice: steal %x\n",
                            ms->voices[bestv].chan_note));
#endif
        ms->voices[bestv].chan_note = MS_CHANNOTE(chan, note);
        ms->voices[bestv].seqno = ms->seqno++;
        ms->voices[bestv].inuse = 1;
        return (bestv);
}

/* dummy attackv_vel that just adds vel into level for simple drivers */
static void
midisyn_attackv_vel(midisyn *ms, uint_fast16_t voice, midipitch_t mp,
                    int16_t level_cB, uint_fast8_t vel)
{

        KASSERT(mutex_owned(ms->lock));

        ms->voices[voice].velcB = midisyn_vol2cB((uint_fast16_t)vel << 7);
        ms->mets->attackv(ms, voice, mp, level_cB + ms->voices[voice].velcB);
}

static int
midisyn_sysrt(void *addr, int b)
{

        return 0;
}

static int
midisyn_channelmsg(void *addr, int status, int chan, u_char *buf, int len)
{
        midisyn *ms = addr;
        int voice = 0;                /* initialize to keep gcc quiet */
        struct midisyn_methods *fs;

        KASSERT(mutex_owned(ms->lock));

        DPRINTF(("midisyn_channelmsg: ms=%p status=%#02x chan=%d\n",
               ms, status, chan));
        fs = ms->mets;

        switch (status) {
        case MIDI_NOTEOFF:
                /*
                 * for a device that leaves voice allocation to us--and that's
                 * all of 'em at the moment--the voice and release velocity
                 * should be the only necessary arguments to noteoff. what use
                 * are they making of note? checking... None. Cool.
                 * IF there is ever a device added that does its own allocation,
                 * extend the interface; this findvoice won't be what to do...
                 */
                voice = midisyn_findvoice(ms, chan, buf[1]);
                if (voice >= 0) {
                        fs->releasev(ms, voice, buf[2]);
                        midisyn_freevoice(ms, voice);
                }
                break;
        case MIDI_NOTEON:
                /*
                 * what's called for here, given current drivers, is an i/f
                 * where midisyn computes a volume from vel*volume*expression*
                 * mastervolume and passes that result as a single arg. It can
                 * evolve later to support drivers that expose some of those
                 * bits separately (e.g. a driver could expose a mixer register
                 * on its sound card and use that for mastervolume).
                 */
                voice = fs->allocv(ms, chan, buf[1]);
                ms->voices[voice].velcB = 0; /* assume driver handles vel */
                fs->attackv_vel(ms, voice,
                    midisyn_clamp_pitch(MIDIPITCH_FROM_KEY(buf[1]) +
                                        midisyn_adj_pitch(ms, chan)),
                    midisyn_adj_level(ms,chan), buf[2]);
                break;
        case MIDI_KEY_PRESSURE:
                /*
                 * unimplemented by the existing drivers. if we are doing
                 * voice allocation, find the voice that corresponds to this
                 * chan/note and define a method that passes the voice and
                 * pressure to the driver ... not the note, /it/ doesn't matter.
                 * For a driver that does its own allocation, a different
                 * method may be needed passing pressure, chan, note so it can
                 * find the right voice on its own. Be sure that whatever is
                 * done here is undone when midisyn_notify sees MIDICTL_RESET.
                 */
                break;
        case MIDI_CTL_CHANGE:
                midictl_change(&ms->ctl, chan, buf+1);
                break;
        case MIDI_PGM_CHANGE:
                if (fs->pgmchg)
                        fs->pgmchg(ms, chan, buf[1]);
                break;
        case MIDI_CHN_PRESSURE:
                /*
                 * unimplemented by the existing drivers. if driver exposes no
                 * distinct method, can use KEY_PRESSURE method for each voice
                 * on channel. Be sure that whatever is
                 * done here is undone when midisyn_notify sees MIDICTL_RESET.
                 */
                break;
        case MIDI_PITCH_BEND:
                /*
                 * Will work for most drivers that simply render the midipitch
                 * as we pass it (but not cms, which chops all the bits after
                 * the note number and then computes its own pitch :( ). If the
                 * driver has a repitchv method for voices already sounding, so
                 * much the better.
                 * The bending logic lives in the handler for bend sensitivity,
                 * so fake a change to that to kick it off.
                 */
                ms->chnstate[chan].bendraw = buf[2]<<7 | buf[1];
                ms->chnstate[chan].bendraw -= MIDI_BEND_NEUTRAL;
                midisyn_notify(ms, MIDICTL_RPN, chan,
                               MIDI_RPN_PITCH_BEND_SENSITIVITY);
                break;
        }
        return 0;
}

static int
midisyn_commonmsg(void *addr, int status, u_char *buf, int len)
{

        return 0;
}

static int
midisyn_sysex(void *addr, u_char *buf, int len)
{

        /*
         * unimplemented by existing drivers. it is surely more sensible
         * to do some parsing of well-defined sysex messages here, either
         * handling them internally or calling specific methods on the
         * driver after parsing out the details, than to ask every driver
         * to deal with sysex messages poked at it a byte at a time.
         */
        return 0;
}

static void
midisyn_notify(void *cookie, midictl_evt evt,
               uint_fast8_t chan, uint_fast16_t key)
{
        struct midisyn *ms;
        int drvhandled;
        
        ms = (struct midisyn *)cookie;

        KASSERT(mutex_owned(ms->lock));

        drvhandled = 0;
        if ( ms->mets->ctlnotice )
                drvhandled = ms->mets->ctlnotice(ms, evt, chan, key);

        switch ( evt | key ) {
        case MIDICTL_RESET:
                /*
                 * Re-read all ctls we use, revert pitchbend state.
                 * Can do it by faking change notifications.
                 */
                ms->chnstate[chan].pendingreset |= PEND_ALL;
                midisyn_notify(ms, MIDICTL_CTLR, chan,
                               MIDI_CTRL_CHANNEL_VOLUME_MSB);
                midisyn_notify(ms, MIDICTL_CTLR, chan,
                               MIDI_CTRL_EXPRESSION_MSB);
                ms->chnstate[chan].bendraw = 0; /* MIDI_BEND_NEUTRAL - itself */
                midisyn_notify(ms, MIDICTL_RPN, chan,
                               MIDI_RPN_PITCH_BEND_SENSITIVITY);
                midisyn_notify(ms, MIDICTL_RPN, chan,
                               MIDI_RPN_CHANNEL_FINE_TUNING);
                midisyn_notify(ms, MIDICTL_RPN, chan,
                               MIDI_RPN_CHANNEL_COARSE_TUNING);
                break;
        case MIDICTL_NOTES_OFF:
                if ( drvhandled )
                        break;
                /* releasev all voices sounding on chan; use normal vel 64 */
                midisyn_chan_releasev(ms, chan, 64);
                break;
        case MIDICTL_SOUND_OFF:
                if ( drvhandled )
                        break;
                /* releasev all voices sounding on chan; use max vel 127 */
                /* it is really better for driver to handle this, instantly */
                midisyn_chan_releasev(ms, chan, 127);
                break;
        case MIDICTL_CTLR | MIDI_CTRL_CHANNEL_VOLUME_MSB:
                ms->chnstate[chan].pendingreset &= ~PEND_VOL;
                if ( drvhandled ) {
                        ms->chnstate[chan].volume = 0;
                        break;
                }
                ms->chnstate[chan].volume = midisyn_vol2cB(
                        midictl_read(&ms->ctl, chan, key, 100<<7));
                midisyn_upd_level(ms, chan);
                break;
        case MIDICTL_CTLR | MIDI_CTRL_EXPRESSION_MSB:
                ms->chnstate[chan].pendingreset &= ~PEND_EXP;
                if ( drvhandled ) {
                        ms->chnstate[chan].expression = 0;
                        break;
                }
                ms->chnstate[chan].expression = midisyn_vol2cB(
                        midictl_read(&ms->ctl, chan, key, 16383));
                midisyn_upd_level(ms, chan);
                break;
        /*
         * SOFT_PEDAL: supporting this will be trickier; must apply only
         * to notes subsequently struck, and must remember which voices
         * they are for follow-on adjustments. For another day....
         */
        case MIDICTL_RPN | MIDI_RPN_PITCH_BEND_SENSITIVITY:
                ms->chnstate[chan].pendingreset &= ~PEND_PBS;
                if ( drvhandled )
                        ms->chnstate[chan].bend = 0;
                else {
                        uint16_t w;
                        int8_t semis, cents;
                        w = midictl_rpn_read(&ms->ctl, chan, key, 2<<7);
                        semis = w>>7;
                        cents = w&0x7f;
                        /*
                         * Mathematically, multiply semis by
                         * MIDIPITCH_SEMITONE*bendraw/8192. Practically, avoid
                         * shifting significant bits off by observing that
                         * MIDIPITCH_SEMITONE == 1<<14 and 8192 == 1<<13, so
                         * just take semis*bendraw<<1. Do the same with cents
                         * except <<1 becomes /50 (but rounded).
                         */
                        ms->chnstate[chan].bend =
                            ( ms->chnstate[chan].bendraw * semis ) << 1;
                        ms->chnstate[chan].bend +=
                            ((ms->chnstate[chan].bendraw * cents)/25 + 1) >> 1;
                        midisyn_upd_pitch(ms, chan);
                }
                break;
        case MIDICTL_RPN | MIDI_RPN_CHANNEL_FINE_TUNING:
                if ( drvhandled )
                        ms->chnstate[chan].tuning_fine = 0;
                else {
                        midipitch_t mp;
                        mp = midictl_rpn_read(&ms->ctl, chan, key, 8192);
                        /*
                         * Mathematically, subtract 8192 and scale by
                         * MIDIPITCH_SEMITONE/8192. Practically, subtract 8192
                         * and then << 1.
                         */
                        ms->chnstate[chan].tuning_fine = ( mp - 8192 ) << 1;
                        midisyn_upd_pitch(ms, chan);
                }
                break;
        case MIDICTL_RPN | MIDI_RPN_CHANNEL_COARSE_TUNING:
                ms->chnstate[chan].pendingreset &= ~PEND_TNC;
                if ( drvhandled )
                        ms->chnstate[chan].tuning_coarse = 0;
                else {
                        midipitch_t mp;
                        /*
                         * By definition only the MSB of this parameter is used.
                         * Subtract 64 for a signed count of semitones; << 14
                         * will convert to midipitch scale.
                         */
                        mp = midictl_rpn_read(&ms->ctl, chan, key, 64<<7) >> 7;
                        ms->chnstate[chan].tuning_coarse = ( mp - 64 ) << 14;
                        midisyn_upd_pitch(ms, chan);
                }
                break;
        }
}

static midipitch_t
midisyn_clamp_pitch(midipitch_t mp)
{

        if ( mp <= 0 )
                return 0;
        if ( mp >= MIDIPITCH_MAX )
                return MIDIPITCH_MAX;
        return mp;
}

static int16_t
midisyn_adj_level(midisyn *ms, uint_fast8_t chan)
{
        int32_t level;

        KASSERT(mutex_owned(ms->lock));
        
        level = ms->chnstate[chan].volume + ms->chnstate[chan].expression;
        if ( level <= INT16_MIN )
                return INT16_MIN;
        return level;
}

static midipitch_t
midisyn_adj_pitch(midisyn *ms, uint_fast8_t chan)
{
        struct channelstate *s = ms->chnstate + chan;

        KASSERT(mutex_owned(ms->lock));

        return s->bend + s->tuning_fine +s->tuning_coarse;
}

#define VOICECHAN_FOREACH_BEGIN(ms,vp,ch)                        \
        {                                                        \
                struct voice *vp, *_end_##vp;                        \
                for (vp=(ms)->voices,_end_##vp=vp+(ms)->nvoice;        \
                    vp < _end_##vp; ++ vp) {                        \
                        if ( !vp->inuse )                        \
                                continue;                        \
                        if ( MS_GETCHAN(vp) == (ch) )                \
                                ;                                \
                        else                                        \
                                continue;
#define VOICECHAN_FOREACH_END }}

static void
midisyn_chan_releasev(midisyn *ms, uint_fast8_t chan, uint_fast8_t vel)
{

        KASSERT(mutex_owned(ms->lock));

        VOICECHAN_FOREACH_BEGIN(ms,vp,chan)
                ms->mets->releasev(ms, vp - ms->voices, vel);
                midisyn_freevoice(ms, vp - ms->voices);
        VOICECHAN_FOREACH_END
}

static void
midisyn_upd_level(midisyn *ms, uint_fast8_t chan)
{
        int32_t level;
        int16_t chan_level;

        KASSERT(mutex_owned(ms->lock));

        if ( NULL == ms->mets->relevelv )
                return;
        
        if ( ms->chnstate[chan].pendingreset & PEND_LEVEL )
                return;

        chan_level = midisyn_adj_level(ms, chan);
        
        VOICECHAN_FOREACH_BEGIN(ms,vp,chan)
                level = vp->velcB + chan_level;
                ms->mets->relevelv(ms, vp - ms->voices,
                    level <= INT16_MIN ? INT16_MIN : level);
        VOICECHAN_FOREACH_END
}

static void
midisyn_upd_pitch(midisyn *ms, uint_fast8_t chan)
{
        midipitch_t chan_adj;

        KASSERT(mutex_owned(ms->lock));
        
        if ( NULL == ms->mets->repitchv )
                return;
        
        if ( ms->chnstate[chan].pendingreset & PEND_PITCH )
                return;

        chan_adj = midisyn_adj_pitch(ms, chan);
        
        VOICECHAN_FOREACH_BEGIN(ms,vp,chan)
                ms->mets->repitchv(ms, vp - ms->voices,
                    midisyn_clamp_pitch(chan_adj +
                        MIDIPITCH_FROM_KEY(vp->chan_note&0x7f)));
        VOICECHAN_FOREACH_END
}

#undef VOICECHAN_FOREACH_END
#undef VOICECHAN_FOREACH_BEGIN

int16_t
midisyn_vol2cB(uint_fast16_t vol)
{
        int16_t cB = 0;
        int32_t v;
        
        if ( 0 == vol )
                return INT16_MIN;
        /*
         * Adjust vol to fall in the range 8192..16383. Each doubling is
         * worth 12 dB.
         */
        while ( vol < 8192 ) {
                vol <<= 1;
                cB -= 120;
        }
        v = vol; /* ensure evaluation in signed 32 bit below */
        /*
         * The GM vol-to-dB formula is dB = 40 log ( v / 127 ) for 7-bit v.
         * The vol and expression controllers are in 14-bit space so the
         * equivalent is 40 log ( v / 16256 ) - that is, MSB 127 LSB 0 because
         * the LSB is commonly unused. MSB 127 LSB 127 would then be a tiny
         * bit over.
         * 1 dB resolution is a little coarser than we'd like, so let's shoot
         * for centibels, i.e. 400 log ( v / 16256 ), and shift everything left
         * as far as will fit in 32 bits, which turns out to be a shift of 22.
         * This minimax polynomial approximation is good to about a centibel
         * on the range 8192..16256, a shade worse (1.4 or so) above that.
         * 26385/10166 is the 6th convergent of the coefficient for v^2.
         */
        cB += ( v * ( 124828 - ( v * 26385 ) / 10166 ) - 1347349038 ) >> 22;
        return cB;
}

/*
 * MIDI RP-012 constitutes a MIDI Tuning Specification. The units are
 * fractional-MIDIkeys, that is, the key number 00 - 7f left shifted
 * 14 bits to provide a 14-bit fraction that divides each semitone. The
 * whole thing is just a 21-bit number that is bent and tuned simply by
 * adding and subtracting--the same offset is the same pitch change anywhere
 * on the scale. One downside is that a cent is 163.84 of these units, so
 * you can't expect a lengthy integer sum of cents to come out in tune; if you
 * do anything in cents it is best to use them only for local adjustment of
 * a pitch.
 * 
 * This function converts a pitch in MIDItune units to Hz left-shifted 18 bits.
 * That should leave you enough to shift down to whatever precision the hardware
 * supports.
 *
 * Its prototype is exposed in <sys/midiio.h>.
 */
midihz18_t
midisyn_mp2hz18(midipitch_t mp)
{
        int64_t t64a, t64b;
        uint_fast8_t shift;
        
        /*
         * Scale from the logarithmic MIDI-Tuning units to Hz<<18. Uses the
         * continued-fraction form of a 2/2 rational function derived to
         * cover the highest octave (mt 1900544..2097151 or 74.00.00..7f.7f.7f
         * in RP-012-speak, the dotted bits are 7 wide) to produce Hz shifted
         * left just as far as the maximum Hz will fit in a uint32, which
         * turns out to be 18. Just shift off the result for lower octaves.
         * Fit is within 1/4 MIDI tuning unit throughout (disclaimer: the
         * comparison relied on the double-precision log in libm).
         */

        if ( 0 == mp )
                return 2143236;
        
        for ( shift = 0; mp < 1900544; ++ shift )
                mp += MIDIPITCH_OCTAVE;

        if ( 1998848 == mp )
                return UINT32_C(2463438621) >> shift;
        
        t64a  = 0x5a1a0ee4; /* INT64_C(967879298788) gcc333: spurious warning */
        t64a |= (int64_t)0xe1 << 32;
        t64a /= mp - 1998848; /* here's why 1998848 is special-cased above ;) */
        t64a += mp - 3704981;
        t64b  = 0x6763759d; /* INT64_C(8405905567872413) goofy warning again */
        t64b |= (int64_t)0x1ddd20 << 32;
        t64b /= t64a;
        t64b += UINT32_C(2463438619);
        return (uint32_t)t64b >> shift;
}






























































































































































































































    2 






















    1 
































    3 








    1 


    1 




    3 









































    1 












    1 




    1 




    1 












































































    1 



















































































































































































































































































































































































































































































    2 






    2 
























































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
/*        $NetBSD: irframe_tty.c,v 1.66 2022/05/24 20:50:19 andvar Exp $        */

/*
 * TODO
 *  Test dongle code.
 */

/*
 * Copyright (c) 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) and Tommy Bohlin
 * (tommy@gatespace.com).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Loosely based on ppp_tty.c.
 * Framing and dongle handling written by Tommy Bohlin.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: irframe_tty.c,v 1.66 2022/05/24 20:50:19 andvar Exp $");

#include <sys/param.h>
#include <sys/proc.h>
#include <sys/ioctl.h>
#include <sys/tty.h>
#include <sys/kernel.h>
#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/conf.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/file.h>
#include <sys/vnode.h>
#include <sys/poll.h>
#include <sys/kauth.h>

#include <dev/ir/ir.h>
#include <dev/ir/sir.h>
#include <dev/ir/irdaio.h>
#include <dev/ir/irframevar.h>

#include "ioconf.h"

#ifdef IRFRAMET_DEBUG
#define DPRINTF(x)        if (irframetdebug) printf x
int irframetdebug = 0;
#else
#define DPRINTF(x)
#endif

/*****/

/* Max size with framing. */
#define MAX_IRDA_FRAME (2*IRDA_MAX_FRAME_SIZE + IRDA_MAX_EBOFS + 4)

struct irt_frame {
        u_char *buf;
        u_int len;
};
#define MAXFRAMES 8

struct irframet_softc {
        struct irframe_softc sc_irp;
        struct tty *sc_tp;

        int sc_dongle;
        int sc_dongle_private;

        int sc_state;
#define        IRT_RSLP                0x01        /* waiting for data (read) */
#if 0
#define        IRT_WSLP                0x02        /* waiting for data (write) */
#define IRT_CLOSING                0x04        /* waiting for output to drain */
#endif
        kmutex_t sc_wr_lk;

        struct irda_params sc_params;

        u_char* sc_inbuf;
        int sc_framestate;
#define FRAME_OUTSIDE    0
#define FRAME_INSIDE     1
#define FRAME_ESCAPE     2
        int sc_inchars;
        int sc_inFCS;
        struct callout sc_timeout;

        u_int sc_nframes;
        u_int sc_framei;
        u_int sc_frameo;
        struct irt_frame sc_frames[MAXFRAMES];
        u_int8_t sc_buffer[MAX_IRDA_FRAME];
        struct selinfo sc_rsel;
        /* XXXJRT Nothing selnotify's sc_wsel */
        struct selinfo sc_wsel;
};

/* line discipline methods */
int        irframetopen(dev_t, struct tty *);
int        irframetclose(struct tty *, int);
int        irframetioctl(struct tty *, u_long, void *, int, struct lwp *);
int        irframetinput(int, struct tty *);
int        irframetstart(struct tty *);


/* irframe methods */
static int        irframet_open(void *, int, int, struct lwp *);
static int        irframet_close(void *, int, int, struct lwp *);
static int        irframet_read(void *, struct uio *, int);
static int        irframet_write(void *, struct uio *, int);
static int        irframet_poll(void *, int, struct lwp *);
static int        irframet_kqfilter(void *, struct knote *);

static int        irframet_set_params(void *, struct irda_params *);
static int        irframet_get_speeds(void *, int *);
static int        irframet_get_turnarounds(void *, int *);

/* internal */
static int        irt_write_frame(struct tty *, u_int8_t *, size_t);
static int        irt_putc(struct tty *, int);
static void        irt_frame(struct irframet_softc *, u_char *, u_int);
static void        irt_timeout(void *);
static void        irt_ioctl(struct tty *, u_long, void *);
static void        irt_setspeed(struct tty *, u_int);
static void        irt_setline(struct tty *, u_int);
static void        irt_delay(struct tty *, u_int);
static void        irt_buffer(struct irframet_softc *, u_int);

static const struct irframe_methods irframet_methods = {
        irframet_open, irframet_close, irframet_read, irframet_write,
        irframet_poll, irframet_kqfilter, irframet_set_params,
        irframet_get_speeds, irframet_get_turnarounds
};

static void irts_none(struct tty *, u_int);
static void irts_tekram(struct tty *, u_int);
static void irts_jeteye(struct tty *, u_int);
static void irts_actisys(struct tty *, u_int);
static void irts_litelink(struct tty *, u_int);
static void irts_girbil(struct tty *, u_int);

#define NORMAL_SPEEDS (IRDA_SPEEDS_SIR & ~IRDA_SPEED_2400)
#define TURNT_POS (IRDA_TURNT_10000 | IRDA_TURNT_5000 | IRDA_TURNT_1000 | \
        IRDA_TURNT_500 | IRDA_TURNT_100 | IRDA_TURNT_50 | IRDA_TURNT_10)
static const struct dongle {
        void (*setspeed)(struct tty *, u_int);
        u_int speedmask;
        u_int turnmask;
} irt_dongles[DONGLE_MAX] = {
        /* Indexed by dongle number from irdaio.h */
        { irts_none, IRDA_SPEEDS_SIR, IRDA_TURNT_10000 },
        { irts_tekram, IRDA_SPEEDS_SIR, IRDA_TURNT_10000 },
        { irts_jeteye, IRDA_SPEED_9600|IRDA_SPEED_19200|IRDA_SPEED_115200,
                                          IRDA_TURNT_10000 },
        { irts_actisys, NORMAL_SPEEDS & ~IRDA_SPEED_38400, TURNT_POS },
        { irts_actisys, NORMAL_SPEEDS, TURNT_POS },
        { irts_litelink, NORMAL_SPEEDS, TURNT_POS },
        { irts_girbil, IRDA_SPEEDS_SIR, IRDA_TURNT_10000 | IRDA_TURNT_5000 },
};

static struct linesw irframet_disc = {
        .l_name = "irframe",
        .l_open = irframetopen,
        .l_close = irframetclose,
        .l_read = ttyerrio,
        .l_write = ttyerrio,
        .l_ioctl = irframetioctl,
        .l_rint = irframetinput,
        .l_start = irframetstart,
        .l_modem = ttymodem,
        .l_poll = ttyerrpoll
};

/* glue to attach irframe device */
static void irframet_attach(device_t, device_t, void *);
static int irframet_detach(device_t, int);

CFATTACH_DECL_NEW(irframet, sizeof(struct irframet_softc),
        NULL, irframet_attach, irframet_detach, NULL);

void
irframettyattach(int n)
{
        extern struct cfdriver irframe_cd;

        (void) ttyldisc_attach(&irframet_disc);

        /* XXX might fail if "real" attachments have pulled this in */
        /* XXX should not be done here */
        config_cfdriver_attach(&irframe_cd);

        config_cfattach_attach("irframe", &irframet_ca);
}

static void
irframet_attach(device_t parent, device_t self, void *aux)
{
        struct irframet_softc *sc = device_private(self);

        /* pseudo-device attachment does not print name */
        aprint_normal("%s", device_xname(self));

        callout_init(&sc->sc_timeout, 0);
        mutex_init(&sc->sc_wr_lk, MUTEX_DEFAULT, IPL_NONE);
        selinit(&sc->sc_rsel);
        selinit(&sc->sc_wsel);
        
#if 0 /* XXX can't do it yet because pseudo-devices don't get aux */
        struct ir_attach_args ia;

        ia.ia_methods = &irframet_methods;
        ia.ia_handle = aux->xxx;

        irframe_attach(parent, self, &ia);
#endif
}

static int
irframet_detach(device_t dev, int flags)
{
        struct irframet_softc *sc = device_private(dev);
        int rc;

        callout_halt(&sc->sc_timeout, NULL);

        rc = irframe_detach(dev, flags);

        callout_destroy(&sc->sc_timeout);
        mutex_destroy(&sc->sc_wr_lk);
        seldestroy(&sc->sc_wsel);
        seldestroy(&sc->sc_rsel);

        return rc;
}

/*
 * Line specific open routine for async tty devices.
 * Attach the given tty to the first available irframe unit.
 * Called from device open routine or ttioctl.
 */
/* ARGSUSED */
int
irframetopen(dev_t dev, struct tty *tp)
{
        struct lwp *l = curlwp;                /* XXX */
        struct irframet_softc *sc;
        int error, s;
        cfdata_t cfdata;
        struct ir_attach_args ia;
        device_t d;

        DPRINTF(("%s\n", __func__));

        if ((error = kauth_authorize_device_tty(l->l_cred, 
                KAUTH_DEVICE_TTY_OPEN, tp)))
                return (error);

        s = spltty();

        DPRINTF(("%s: linesw=%p disc=%s\n", __func__, tp->t_linesw,
                 tp->t_linesw->l_name));
        if (tp->t_linesw == &irframet_disc) {
                sc = (struct irframet_softc *)tp->t_sc;
                DPRINTF(("%s: sc=%p sc_tp=%p\n", __func__, sc, sc->sc_tp));
                if (sc != NULL) {
                        splx(s);
                        return (EBUSY);
                }
        }

        cfdata = malloc(sizeof(struct cfdata), M_DEVBUF, M_WAITOK);
        cfdata->cf_name = "irframe";
        cfdata->cf_atname = "irframet";
        cfdata->cf_fstate = FSTATE_STAR;
        cfdata->cf_unit = 0;
        d = config_attach_pseudo(cfdata);
        sc = device_private(d);
        sc->sc_irp.sc_dev = d;

        /* XXX should be done in irframet_attach() */
        ia.ia_methods = &irframet_methods;
        ia.ia_handle = tp;
        irframe_attach(0, d, &ia);

        tp->t_sc = sc;
        sc->sc_tp = tp;
        aprint_normal("%s attached at tty%02d\n", device_xname(d),
            (int)minor(tp->t_dev));

        DPRINTF(("%s: set sc=%p\n", __func__, sc));

        mutex_spin_enter(&tty_lock);
        ttyflush(tp, FREAD | FWRITE);
        mutex_spin_exit(&tty_lock);

        sc->sc_dongle = DONGLE_NONE;
        sc->sc_dongle_private = 0;

        splx(s);

        return (0);
}

/*
 * Line specific close routine, called from device close routine
 * and from ttioctl.
 * Detach the tty from the irframe unit.
 * Mimics part of ttyclose().
 */
int
irframetclose(struct tty *tp, int flag)
{
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;
        int s;
        cfdata_t cfdata;

        DPRINTF(("%s: tp=%p\n", __func__, tp));

        s = spltty();
        mutex_spin_enter(&tty_lock);
        ttyflush(tp, FREAD | FWRITE);
        mutex_spin_exit(&tty_lock);         /* XXX */
        ttyldisc_release(tp->t_linesw);
        tp->t_linesw = ttyldisc_default(); if (sc != NULL) {
                irt_buffer(sc, 0);
                tp->t_sc = NULL;
                aprint_normal("%s detached from tty%02d\n",
                    device_xname(sc->sc_irp.sc_dev), (int)minor(tp->t_dev));

                if (sc->sc_tp == tp) {
                        cfdata = device_cfdata(sc->sc_irp.sc_dev);
                        config_detach(sc->sc_irp.sc_dev, 0);
                        free(cfdata, M_DEVBUF);
                }
        }
        splx(s);
        return (0);
}

/*
 * Line specific (tty) ioctl routine.
 * This discipline requires that tty device drivers call
 * the line specific l_ioctl routine from their ioctl routines.
 */
/* ARGSUSED */
int
irframetioctl(struct tty *tp, u_long cmd, void *data, int flag,
    struct lwp *l)
{
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;
        int error;
        int d;

        DPRINTF(("%s: tp=%p\n", __func__, tp));

        /*
         * XXX
         * This function can be called without KERNEL_LOCK when caller's
         * struct cdevsw is set D_MPSAFE. Is KERNEL_LOCK required?
         */

        if (sc == NULL || tp != sc->sc_tp)
                return (EPASSTHROUGH);

        error = 0;
        switch (cmd) {
        case IRFRAMETTY_GET_DEVICE:
                *(int *)data = device_unit(sc->sc_irp.sc_dev);
                break;
        case IRFRAMETTY_GET_DONGLE:
                *(int *)data = sc->sc_dongle;
                break;
        case IRFRAMETTY_SET_DONGLE:
                d = *(int *)data;
                if (d < 0 || d >= DONGLE_MAX)
                        return (EINVAL);
                sc->sc_dongle = d;
                break;
        default:
                error = EPASSTHROUGH;
                break;
        }

        return (error);
}

/*
 * Start output on async tty interface.
 */
int
irframetstart(struct tty *tp)
{
        /*struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;*/
        int s;

        DPRINTF(("%s: tp=%p\n", __func__, tp));

        s = spltty();
        if (tp->t_oproc != NULL)
                (*tp->t_oproc)(tp);
        splx(s);

        return (0);
}

static void
irt_buffer(struct irframet_softc *sc, u_int maxsize)
{
        int i;

        DPRINTF(("%s: sc=%p, maxsize=%u\n", __func__, sc, maxsize));

        if (sc->sc_params.maxsize != maxsize) {
                sc->sc_params.maxsize = maxsize;
                if (sc->sc_inbuf != NULL)
                        free(sc->sc_inbuf, M_DEVBUF);
                for (i = 0; i < MAXFRAMES; i++)
                        if (sc->sc_frames[i].buf != NULL)
                                free(sc->sc_frames[i].buf, M_DEVBUF);
                if (sc->sc_params.maxsize != 0) {
                        sc->sc_inbuf = malloc(sc->sc_params.maxsize+2,
                                              M_DEVBUF, M_WAITOK);
                        for (i = 0; i < MAXFRAMES; i++)
                                sc->sc_frames[i].buf =
                                        malloc(sc->sc_params.maxsize,
                                               M_DEVBUF, M_WAITOK);
                } else {
                        sc->sc_inbuf = NULL;
                        for (i = 0; i < MAXFRAMES; i++)
                                sc->sc_frames[i].buf = NULL;
                }
        }
}

void
irt_frame(struct irframet_softc *sc, u_char *tbuf, u_int len)
{
        DPRINTF(("%s: nframe=%d framei=%d frameo=%d\n",
                 __func__, sc->sc_nframes, sc->sc_framei, sc->sc_frameo));

        if (sc->sc_inbuf == NULL) /* XXX happens if device is closed? */
                return;
        if (sc->sc_nframes >= MAXFRAMES) {
#ifdef IRFRAMET_DEBUG
                printf("%s: dropped frame\n", __func__);
#endif
                return;
        }
        if (sc->sc_frames[sc->sc_framei].buf == NULL)
                return;
        memcpy(sc->sc_frames[sc->sc_framei].buf, tbuf, len);
        sc->sc_frames[sc->sc_framei].len = len;
        sc->sc_framei = (sc->sc_framei+1) % MAXFRAMES;
        sc->sc_nframes++;
        if (sc->sc_state & IRT_RSLP) {
                sc->sc_state &= ~IRT_RSLP;
                DPRINTF(("%s: waking up reader\n", __func__));
                wakeup(sc->sc_frames);
        }
        selnotify(&sc->sc_rsel, 0, 0);
}

void
irt_timeout(void *v)
{
        struct irframet_softc *sc = v;

#ifdef IRFRAMET_DEBUG
        if (sc->sc_framestate != FRAME_OUTSIDE)
                printf("%s: input frame timeout\n", __func__);
#endif
        sc->sc_framestate = FRAME_OUTSIDE;
}

int
irframetinput(int c, struct tty *tp)
{
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;

        c &= 0xff;

#if IRFRAMET_DEBUG
        if (irframetdebug > 1)
                DPRINTF(("%s: tp=%p c=0x%02x\n", __func__, tp, c));
#endif

        if (sc == NULL || tp != (struct tty *)sc->sc_tp)
                return (0);

        if (sc->sc_inbuf == NULL)
                return (0);

        switch (c) {
        case SIR_BOF:
                DPRINTF(("%s: BOF\n", __func__));
                sc->sc_framestate = FRAME_INSIDE;
                sc->sc_inchars = 0;
                sc->sc_inFCS = INITFCS;
                break;
        case SIR_EOF:
                DPRINTF(("%s: EOF state=%d inchars=%d fcs=0x%04x\n",
                         __func__,
                         sc->sc_framestate, sc->sc_inchars, sc->sc_inFCS));
                if (sc->sc_framestate == FRAME_INSIDE &&
                    sc->sc_inchars >= 4 && sc->sc_inFCS == GOODFCS) {
                        irt_frame(sc, sc->sc_inbuf, sc->sc_inchars - 2);
                } else if (sc->sc_framestate != FRAME_OUTSIDE) {
#ifdef IRFRAMET_DEBUG
                        printf("%s: malformed input frame\n", __func__);
#endif
                }
                sc->sc_framestate = FRAME_OUTSIDE;
                break;
        case SIR_CE:
                DPRINTF(("%s: CE\n", __func__));
                if (sc->sc_framestate == FRAME_INSIDE)
                        sc->sc_framestate = FRAME_ESCAPE;
                break;
        default:
#if IRFRAMET_DEBUG
        if (irframetdebug > 1)
                DPRINTF(("%s: c=0x%02x, inchar=%d state=%d\n", __func__, c,
                         sc->sc_inchars, sc->sc_state));
#endif
                if (sc->sc_framestate != FRAME_OUTSIDE) {
                        if (sc->sc_framestate == FRAME_ESCAPE) {
                                sc->sc_framestate = FRAME_INSIDE;
                                c ^= SIR_ESC_BIT;
                        }
                        if (sc->sc_inchars < sc->sc_params.maxsize + 2) {
                                sc->sc_inbuf[sc->sc_inchars++] = c;
                                sc->sc_inFCS = updateFCS(sc->sc_inFCS, c);
                        } else {
                                sc->sc_framestate = FRAME_OUTSIDE;
#ifdef IRFRAMET_DEBUG
                                printf("%s: input frame overrun\n",
                                       __func__);
#endif
                        }
                }
                break;
        }

#if 1
        if (sc->sc_framestate != FRAME_OUTSIDE) {
                callout_reset(&sc->sc_timeout, hz/20, irt_timeout, sc);
        }
#endif

        return (0);
}


/*** irframe methods ***/

int
irframet_open(void *h, int flag, int mode,
    struct lwp *l)
{
        struct tty *tp = h;
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;

        DPRINTF(("%s: tp=%p\n", __func__, tp));

        sc->sc_params.speed = 0;
        sc->sc_params.ebofs = IRDA_DEFAULT_EBOFS;
        sc->sc_params.maxsize = 0;
        sc->sc_framestate = FRAME_OUTSIDE;
        sc->sc_nframes = 0;
        sc->sc_framei = 0;
        sc->sc_frameo = 0;

        return (0);
}

int
irframet_close(void *h, int flag, int mode,
    struct lwp *l)
{
        struct tty *tp = h;
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;
        int s;

        DPRINTF(("%s: tp=%p\n", __func__, tp));

        /* line discipline was closed */
        if (sc == NULL)
                return (0);

        callout_stop(&sc->sc_timeout);
        s = splir();
        irt_buffer(sc, 0);
        splx(s);

        return (0);
}

int
irframet_read(void *h, struct uio *uio, int flag)
{
        struct tty *tp = h;
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;
        int error = 0;
        int s;

        DPRINTF(("%s: resid=%zd, iovcnt=%d, offset=%ld\n",
                 __func__, uio->uio_resid, uio->uio_iovcnt,
                 (long)uio->uio_offset));
        DPRINTF(("%s: nframe=%d framei=%d frameo=%d\n",
                 __func__, sc->sc_nframes, sc->sc_framei, sc->sc_frameo));


        s = splir();
        while (sc->sc_nframes == 0) {
                if (flag & IO_NDELAY) {
                        splx(s);
                        return (EWOULDBLOCK);
                }
                sc->sc_state |= IRT_RSLP;
                DPRINTF(("%s: sleep\n", __func__));
                error = tsleep(sc->sc_frames, PZERO | PCATCH, "irtrd", 0);
                DPRINTF(("%s: woke, error=%d\n", __func__, error));
                if (error) {
                        sc->sc_state &= ~IRT_RSLP;
                        break;
                }
        }

        /* Do just one frame transfer per read */
        if (!error) {
                if (uio->uio_resid < sc->sc_frames[sc->sc_frameo].len) {
                        DPRINTF(("%s: uio buffer smaller than frame size "
                                 "(%zd < %d)\n", __func__, uio->uio_resid,
                                 sc->sc_frames[sc->sc_frameo].len));
                        error = EINVAL;
                } else {
                        DPRINTF(("%s: moving %d bytes\n", __func__,
                                 sc->sc_frames[sc->sc_frameo].len));
                        error = uiomove(sc->sc_frames[sc->sc_frameo].buf,
                                        sc->sc_frames[sc->sc_frameo].len, uio);
                        DPRINTF(("%s: error=%d\n", __func__, error));
                }
                sc->sc_frameo = (sc->sc_frameo+1) % MAXFRAMES;
                sc->sc_nframes--;
        }
        splx(s);

        return (error);
}

int
irt_putc(struct tty *tp, int c)
{
        int error;

#if IRFRAMET_DEBUG
        if (irframetdebug > 3)
                DPRINTF(("%s: tp=%p c=0x%02x cc=%d\n", __func__, tp, c,
                         tp->t_outq.c_cc));
#endif
        if (tp->t_outq.c_cc > tp->t_hiwat) {
                irframetstart(tp);
                mutex_spin_enter(&tty_lock);
                /*
                 * This can only occur if FLUSHO is set in t_lflag,
                 * or if ttstart/oproc is synchronous (or very fast).
                 */
                if (tp->t_outq.c_cc <= tp->t_hiwat) {
                        mutex_spin_exit(&tty_lock);
                        goto go;
                }
                error = ttysleep(tp, &tp->t_outcv, true, 0);
                mutex_spin_exit(&tty_lock);
                if (error)
                        return (error);
        }
 go:
        if (putc(c, &tp->t_outq) < 0) {
                printf("irframe: putc failed\n");
                return (EIO);
        }
        return (0);
}

int
irframet_write(void *h, struct uio *uio, int flag)
{
        struct tty *tp = h;
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;
        int n;

        DPRINTF(("%s: resid=%zd, iovcnt=%d, offset=%ld\n",
                 __func__, uio->uio_resid, uio->uio_iovcnt,
                 (long)uio->uio_offset));

        n = irda_sir_frame(sc->sc_buffer, sizeof(sc->sc_buffer), uio,
            sc->sc_params.ebofs);
        if (n < 0) {
#ifdef IRFRAMET_DEBUG
                printf("%s: irda_sir_frame() error=%d\n", __func__, -n);
#endif
                return (-n);
        }
        return (irt_write_frame(tp, sc->sc_buffer, n));
}

int
irt_write_frame(struct tty *tp, u_int8_t *tbuf, size_t len)
{
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;
        int error, i;

        DPRINTF(("%s: tp=%p len=%zd\n", __func__, tp, len));

        mutex_enter(&sc->sc_wr_lk);
        error = 0;
        for (i = 0; !error && i < len; i++)
                error = irt_putc(tp, tbuf[i]);
        mutex_exit(&sc->sc_wr_lk);

        irframetstart(tp);

        DPRINTF(("%s: done, error=%d\n", __func__, error));

        return (error);
}

int
irframet_poll(void *h, int events, struct lwp *l)
{
        struct tty *tp = h;
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;
        int revents = 0;
        int s;

        DPRINTF(("%s: sc=%p\n", __func__, sc));

        s = splir();
        /* XXX is this a good check? */
        if (events & (POLLOUT | POLLWRNORM))
                if (tp->t_outq.c_cc <= tp->t_lowat)
                        revents |= events & (POLLOUT | POLLWRNORM);

        if (events & (POLLIN | POLLRDNORM)) {
                if (sc->sc_nframes > 0) {
                        DPRINTF(("%s: have data\n", __func__));
                        revents |= events & (POLLIN | POLLRDNORM);
                } else {
                        DPRINTF(("%s: recording select\n", __func__));
                        selrecord(l, &sc->sc_rsel);
                }
        }
        splx(s);

        return (revents);
}

static void
filt_irframetrdetach(struct knote *kn)
{
        struct tty *tp = kn->kn_hook;
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;
        int s;

        s = splir();
        selremove_knote(&sc->sc_rsel, kn);
        splx(s);
}

static int
filt_irframetread(struct knote *kn, long hint)
{
        struct tty *tp = kn->kn_hook;
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;

        kn->kn_data = sc->sc_nframes;
        return (kn->kn_data > 0);
}

static void
filt_irframetwdetach(struct knote *kn)
{
        struct tty *tp = kn->kn_hook;
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;
        int s;

        s = splir();
        selremove_knote(&sc->sc_wsel, kn);
        splx(s);
}

static int
filt_irframetwrite(struct knote *kn, long hint)
{
        struct tty *tp = kn->kn_hook;

        /* XXX double-check this */

        if (tp->t_outq.c_cc <= tp->t_lowat) {
                kn->kn_data = tp->t_lowat - tp->t_outq.c_cc;
                return (1);
        }

        kn->kn_data = 0;
        return (0);
}

static const struct filterops irframetread_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_irframetrdetach,
        .f_event = filt_irframetread,
};

static const struct filterops irframetwrite_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_irframetwdetach,
        .f_event = filt_irframetwrite,
};

int
irframet_kqfilter(void *h, struct knote *kn)
{
        struct tty *tp = h;
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;
        struct selinfo *sip;
        int s;

        switch (kn->kn_filter) {
        case EVFILT_READ:
                sip = &sc->sc_rsel;
                kn->kn_fop = &irframetread_filtops;
                break;
        case EVFILT_WRITE:
                sip = &sc->sc_wsel;
                kn->kn_fop = &irframetwrite_filtops;
                break;
        default:
                return (EINVAL);
        }

        kn->kn_hook = tp;

        s = splir();
        selrecord_knote(sip, kn);
        splx(s);

        return (0);
}

int
irframet_set_params(void *h, struct irda_params *p)
{
        struct tty *tp = h;
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;

        DPRINTF(("%s: tp=%p speed=%d ebofs=%d maxsize=%d\n",
                 __func__, tp, p->speed, p->ebofs, p->maxsize));

        if (p->speed != sc->sc_params.speed) {
                /* Checked in irframe.c */
                mutex_enter(&sc->sc_wr_lk);
                irt_dongles[sc->sc_dongle].setspeed(tp, p->speed);
                mutex_exit(&sc->sc_wr_lk);
                sc->sc_params.speed = p->speed;
        }

        /* Max size checked in irframe.c */
        sc->sc_params.ebofs = p->ebofs;
        irt_buffer(sc, p->maxsize);
        sc->sc_framestate = FRAME_OUTSIDE;

        return (0);
}

int
irframet_get_speeds(void *h, int *speeds)
{
        struct tty *tp = h;
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;

        DPRINTF(("%s: tp=%p\n", __func__, tp));

        if (sc == NULL)                /* during attach */
                *speeds = IRDA_SPEEDS_SIR;
        else
                *speeds = irt_dongles[sc->sc_dongle].speedmask;
        return (0);
}

int
irframet_get_turnarounds(void *h, int *turnarounds)
{
        struct tty *tp = h;
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;

        DPRINTF(("%s: tp=%p\n", __func__, tp));

        *turnarounds = irt_dongles[sc->sc_dongle].turnmask;
        return (0);
}

void
irt_ioctl(struct tty *tp, u_long cmd, void *arg)
{
        const struct cdevsw *cdev;
        int error __diagused;
        dev_t dev;

        dev = tp->t_dev;
        cdev = cdevsw_lookup(dev);
        if (cdev != NULL)
                error = (*cdev->d_ioctl)(dev, cmd, arg, 0, curlwp);
        else
                error = ENXIO;
#ifdef DIAGNOSTIC
        if (error)
                printf("irt_ioctl: cmd=0x%08lx error=%d\n", cmd, error);
#endif
}

void
irt_setspeed(struct tty *tp, u_int speed)
{
        struct termios tt;

        irt_ioctl(tp, TIOCGETA,  &tt);
        tt.c_ispeed = tt.c_ospeed = speed;
        tt.c_cflag &= ~HUPCL;
        tt.c_cflag |= CLOCAL;
        irt_ioctl(tp, TIOCSETAF, &tt);
}

void
irt_setline(struct tty *tp, u_int line)
{
        int mline;

        irt_ioctl(tp, TIOCMGET, &mline);
        mline &= ~(TIOCM_DTR | TIOCM_RTS);
        mline |= line;
        irt_ioctl(tp, TIOCMSET, (void *)&mline);
}

void
irt_delay(struct tty *tp, u_int ms)
{
        if (cold)
                delay(ms * 1000);
        else
                tsleep(&irt_delay, PZERO, "irtdly", ms * hz / 1000 + 1);

}

/**********************************************************************
 * No dongle
 **********************************************************************/
void
irts_none(struct tty *tp, u_int speed)
{
        irt_setspeed(tp, speed);
}

/**********************************************************************
 * Tekram
 **********************************************************************/
#define TEKRAM_PW     0x10

#define TEKRAM_115200 (TEKRAM_PW|0x00)
#define TEKRAM_57600  (TEKRAM_PW|0x01)
#define TEKRAM_38400  (TEKRAM_PW|0x02)
#define TEKRAM_19200  (TEKRAM_PW|0x03)
#define TEKRAM_9600   (TEKRAM_PW|0x04)
#define TEKRAM_2400   (TEKRAM_PW|0x08)

#define TEKRAM_TV     (TEKRAM_PW|0x05)

void
irts_tekram(struct tty *tp, u_int speed)
{
        int s;

        irt_setspeed(tp, 9600);
        irt_setline(tp, 0);
        irt_delay(tp, 50);

        irt_setline(tp, TIOCM_RTS);
        irt_delay(tp, 1);

        irt_setline(tp, TIOCM_DTR | TIOCM_RTS);
        irt_delay(tp, 1);        /* 50 us */

        irt_setline(tp, TIOCM_DTR);
        irt_delay(tp, 1);        /* 7 us */

        switch(speed) {
        case 115200: s = TEKRAM_115200; break;
        case 57600:  s = TEKRAM_57600; break;
        case 38400:  s = TEKRAM_38400; break;
        case 19200:  s = TEKRAM_19200; break;
        case 2400:   s = TEKRAM_2400; break;
        default:     s = TEKRAM_9600; break;
        }
        irt_putc(tp, s);
        irframetstart(tp);

        irt_delay(tp, 100);

        irt_setline(tp, TIOCM_DTR | TIOCM_RTS);
        if (speed != 9600)
                irt_setspeed(tp, speed);
        irt_delay(tp, 1);        /* 50 us */
}

/**********************************************************************
 * Jeteye
 **********************************************************************/
void
irts_jeteye(struct tty *tp, u_int speed)
{
        switch (speed) {
        case 19200:
                irt_setline(tp, TIOCM_DTR);
                break;
        case 115200:
                irt_setline(tp, TIOCM_DTR | TIOCM_RTS);
                break;
        default: /*9600*/
                irt_setline(tp, TIOCM_RTS);
                break;
        }
        irt_setspeed(tp, speed);
}

/**********************************************************************
 * Actisys
 **********************************************************************/
void
irts_actisys(struct tty *tp, u_int speed)
{
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;
        int pulses;

        irt_setspeed(tp, speed);

        switch(speed) {
        case 19200:  pulses=1; break;
        case 57600:  pulses=2; break;
        case 115200: pulses=3; break;
        case 38400:  pulses=4; break;
        default: /* 9600 */ pulses=0; break;
        }

        if (sc->sc_dongle_private == 0) {
                sc->sc_dongle_private = 1;
                irt_setline(tp, TIOCM_DTR | TIOCM_RTS);
                /*
                 * Must wait at least 50ms after initial
                 * power on to charge internal capacitor
                 */
                irt_delay(tp, 50);
        }
        irt_setline(tp, TIOCM_RTS);
        delay(2);
        for (;;) {
                irt_setline(tp, TIOCM_DTR | TIOCM_RTS);
                delay(2);
                if (--pulses <= 0)
                        break;
                irt_setline(tp, TIOCM_DTR);
                delay(2);
        }
}

/**********************************************************************
 * Litelink
 **********************************************************************/
void
irts_litelink(struct tty *tp, u_int speed)
{
        struct irframet_softc *sc = (struct irframet_softc *)tp->t_sc;
        int pulses;

        irt_setspeed(tp, speed);

        switch(speed) {
        case 57600:  pulses=1; break;
        case 38400:  pulses=2; break;
        case 19200:  pulses=3; break;
        case 9600:   pulses=4; break;
        default: /* 115200 */ pulses=0; break;
        }

        if (sc->sc_dongle_private == 0) {
                sc->sc_dongle_private = 1;
                irt_setline(tp, TIOCM_DTR | TIOCM_RTS);
        }
        irt_setline(tp, TIOCM_RTS);
        irt_delay(tp, 1); /* 15 us */;
        for (;;) {
                irt_setline(tp, TIOCM_DTR | TIOCM_RTS);
                irt_delay(tp, 1); /* 15 us */;
                if (--pulses <= 0)
                        break;
                irt_setline(tp, TIOCM_DTR);
                irt_delay(tp, 1); /* 15 us */;
        }
}

/**********************************************************************
 * Girbil
 **********************************************************************/
/* Control register 1 */
#define GIRBIL_TXEN      0x01 /* Enable transmitter */
#define GIRBIL_RXEN      0x02 /* Enable receiver */
#define GIRBIL_ECAN      0x04 /* Cancel self emitted data */
#define GIRBIL_ECHO      0x08 /* Echo control characters */

/* LED Current Register */
#define GIRBIL_HIGH      0x20
#define GIRBIL_MEDIUM    0x21
#define GIRBIL_LOW       0x22

/* Baud register */
#define GIRBIL_2400      0x30
#define GIRBIL_4800      0x31
#define GIRBIL_9600      0x32
#define GIRBIL_19200     0x33
#define GIRBIL_38400     0x34
#define GIRBIL_57600     0x35
#define GIRBIL_115200    0x36

/* Mode register */
#define GIRBIL_IRDA      0x40
#define GIRBIL_ASK       0x41

/* Control register 2 */
#define GIRBIL_LOAD      0x51 /* Load the new baud rate value */

void
irts_girbil(struct tty *tp, u_int speed)
{
        int s;

        irt_setspeed(tp, 9600);
        irt_setline(tp, TIOCM_DTR);
        irt_delay(tp, 5);
        irt_setline(tp, TIOCM_RTS);
        irt_delay(tp, 20);
        switch(speed) {
        case 115200: s = GIRBIL_115200; break;
        case 57600:  s = GIRBIL_57600; break;
        case 38400:  s = GIRBIL_38400; break;
        case 19200:  s = GIRBIL_19200; break;
        case 4800:   s = GIRBIL_4800; break;
        case 2400:   s = GIRBIL_2400; break;
        default:     s = GIRBIL_9600; break;
        }
        irt_putc(tp, GIRBIL_TXEN|GIRBIL_RXEN);
        irt_putc(tp, s);
        irt_putc(tp, GIRBIL_LOAD);
        irframetstart(tp);
        irt_delay(tp, 100);
        irt_setline(tp, TIOCM_DTR | TIOCM_RTS);
        if (speed != 9600)
                irt_setspeed(tp, speed);
}




















































































































   24 


   23 





   22 





   23 





   23 









   23 

































   20 









   18 
   18 













    7 











    7 

    7 








    3 










































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
/*        $NetBSD: overlay_vfsops.c,v 1.72 2022/07/08 07:43:48 hannken Exp $        */

/*
 * Copyright (c) 1999, 2000 National Aeronautics & Space Administration
 * All rights reserved.
 *
 * This software was written by William Studenmund of the
 * Numerical Aerospace Simulation Facility, NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the National Aeronautics & Space Administration
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this software without specific prior written
 *    permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NATIONAL AERONAUTICS & SPACE ADMINISTRATION
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE ADMINISTRATION OR CONTRIB-
 * UTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/*
 * Copyright (c) 1992, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software donated to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        from: Id: lofs_vfsops.c,v 1.9 1992/05/30 10:26:24 jsp Exp
 *        from: @(#)lofs_vfsops.c        1.2 (Berkeley) 6/18/92
 *        @(#)null_vfsops.c        8.7 (Berkeley) 5/14/95
 */

/*
 * Overlay Layer
 * (See overlay_vnops.c for a description of what this does.)
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: overlay_vfsops.c,v 1.72 2022/07/08 07:43:48 hannken Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/module.h>
#include <miscfs/overlay/overlay.h>
#include <miscfs/genfs/layer_extern.h>

MODULE(MODULE_CLASS_VFS, overlay, "layerfs");

VFS_PROTOS(ov);

#define        NOVERLAYNODECACHE        16

/*
 * Mount overlay layer
 */
int
ov_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct lwp *l = curlwp;
        int error = 0;
        struct overlay_args *args = data;
        struct vnode *lowerrootvp, *vp;
        struct overlay_mount *nmp;
        struct layer_mount *lmp;

#ifdef OVERLAYFS_DIAGNOSTIC
        printf("ov_mount(mp = %p)\n", mp);
#endif

        if (args == NULL)
                return EINVAL;
        if (*data_len < sizeof *args)
                return EINVAL;

        if (mp->mnt_flag & MNT_GETARGS) {
                lmp = MOUNTTOLAYERMOUNT(mp);
                if (lmp == NULL)
                        return EIO;
                args->la.target = NULL;
                *data_len = sizeof *args;
                return 0;
        }

        /*
         * Update is not supported
         */
        if (mp->mnt_flag & MNT_UPDATE)
                return EOPNOTSUPP;

        /*
         * Find lower node
         */
        lowerrootvp = mp->mnt_vnodecovered;
        vref(lowerrootvp);
        if ((error = vn_lock(lowerrootvp, LK_EXCLUSIVE))) {
                vrele(lowerrootvp);
                return (error);
        }

        /*
         * First cut at fixing up upper mount point
         */
        nmp = kmem_zalloc(sizeof(struct overlay_mount), KM_SLEEP);

        mp->mnt_data = nmp;

        /*
         * Make sure that the mount point is sufficiently initialized
         * that the node create call will work.
         */
        vfs_getnewfsid(mp);
        mp->mnt_lower = lowerrootvp->v_mount;

        nmp->ovm_size = sizeof (struct overlay_node);
        nmp->ovm_tag = VT_OVERLAY;
        nmp->ovm_bypass = layer_bypass;
        nmp->ovm_vnodeop_p = overlay_vnodeop_p;

        /*
         * Fix up overlay node for root vnode
         */
        VOP_UNLOCK(lowerrootvp);
        error = layer_node_create(mp, lowerrootvp, &vp);
        /*
         * Make sure the fixup worked
         */
        if (error) {
                vrele(lowerrootvp);
                kmem_free(nmp, sizeof(struct overlay_mount));
                return error;
        }

        /*
         * Keep a held reference to the root vnode.
         * It is vrele'd in ov_unmount.
         */
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        vp->v_vflag |= VV_ROOT;
        nmp->ovm_rootvp = vp;
        VOP_UNLOCK(vp);

        error = set_statvfs_info(path, UIO_USERSPACE, args->la.target,
            UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
        if (error)
                return error;

        if (mp->mnt_lower->mnt_flag & MNT_LOCAL)
                mp->mnt_flag |= MNT_LOCAL;
#ifdef OVERLAYFS_DIAGNOSTIC
        printf("ov_mount: lower %s, alias at %s\n",
            mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
#endif
        return 0;
}

/*
 * Free reference to overlay layer
 */
int
ov_unmount(struct mount *mp, int mntflags)
{
        struct vnode *overlay_rootvp = MOUNTTOOVERLAYMOUNT(mp)->ovm_rootvp;
        struct overlay_mount *omp;
        int error;
        int flags = 0;

#ifdef OVERLAYFS_DIAGNOSTIC
        printf("ov_unmount(mp = %p)\n", mp);
#endif

        if (mntflags & MNT_FORCE)
                flags |= FORCECLOSE;

        if (vrefcnt(overlay_rootvp) > 1 && (mntflags & MNT_FORCE) == 0)
                return (EBUSY);
        if ((error = vflush(mp, overlay_rootvp, flags)) != 0)
                return (error);

#ifdef OVERLAYFS_DIAGNOSTIC
        vprint("alias root of lower", overlay_rootvp);
#endif
        /*
         * Blow it away for future re-use
         */
        vgone(overlay_rootvp);
        /*
         * Finally, throw away the overlay_mount structure
         */
        omp = mp->mnt_data;
        kmem_free(omp, sizeof(struct overlay_mount));
        mp->mnt_data = NULL;
        return 0;
}

extern const struct vnodeopv_desc overlay_vnodeop_opv_desc;

const struct vnodeopv_desc * const ov_vnodeopv_descs[] = {
        &overlay_vnodeop_opv_desc,
        NULL,
};

struct vfsops overlay_vfsops = {
        .vfs_name = MOUNT_OVERLAY,
        .vfs_min_mount_data = sizeof (struct overlay_args),
        .vfs_mount = ov_mount,
        .vfs_start = layerfs_start,
        .vfs_unmount = ov_unmount,
        .vfs_root = layerfs_root,
        .vfs_quotactl = layerfs_quotactl,
        .vfs_statvfs = layerfs_statvfs,
        .vfs_sync = layerfs_sync,
        .vfs_loadvnode = layerfs_loadvnode,
        .vfs_vget = layerfs_vget,
        .vfs_fhtovp = layerfs_fhtovp,
        .vfs_vptofh = layerfs_vptofh,
        .vfs_init = layerfs_init,
        .vfs_done = layerfs_done,
        .vfs_snapshot = layerfs_snapshot,
        .vfs_extattrctl = vfs_stdextattrctl,
        .vfs_suspendctl = layerfs_suspendctl,
        .vfs_renamelock_enter = layerfs_renamelock_enter,
        .vfs_renamelock_exit = layerfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = ov_vnodeopv_descs
};

SYSCTL_SETUP(overlay_sysctl_setup, "overlay fs sysctl")
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT, CTLTYPE_NODE, "overlay",
                       SYSCTL_DESCR("Overlay file system"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, CTL_CREATE, CTL_EOL);
}

static int
overlay_modcmd(modcmd_t cmd, void *arg)
{
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = vfs_attach(&overlay_vfsops);
                if (error != 0)
                        break;
                break;
        case MODULE_CMD_FINI:
                error = vfs_detach(&overlay_vfsops);
                if (error != 0)
                        break;
                break;
        default:
                error = ENOTTY;
                break;
        }

        return (error);
}



















































    3 


    3 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
/*        $NetBSD: uvm_user.c,v 1.14 2011/02/02 15:13:34 chuck Exp $        */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * from: Id: uvm_user.c,v 1.1.2.1 1997/08/14 19:10:41 chuck Exp
 */

/*
 * uvm_user.c: high level uvm_allocate/uvm_deallocate interface into vm.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_user.c,v 1.14 2011/02/02 15:13:34 chuck Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>

#include <uvm/uvm.h>

/*
 * uvm_deallocate: deallocate memory (unmap)
 */

void
uvm_deallocate(struct vm_map *map, vaddr_t start, vsize_t size)
{

        if (size == 0)
                return;

        uvm_unmap(map, trunc_page(start), round_page(start + size));
}























































































  415 

   13 
    2 



   13 







   70 















   23 


















  415 





























   12 






    4 
    4 
    3 





    4 
    3 
    2 

    3 
    3 






    3 
    2 
    3 
    3 

    2 

    3 




    1 
   11 

   10 




    7 












   18 






    4 
    4 
    3 









    6 
    4 
    3 

    5 
    5 










    7 
    6 
    7 
    7 


    6 


    6 






    1 
   14 

   11 












   14 



   14 
    2 
    1 


   13 












   13 
   13 










   22 



   18 


   19 








   29 


   27 







    6 

   26 
    4 



   23 
   18 


   23 






    3 
    2 





    5 
    2 
    5 
    2 







    5 
    4 














    4 







    3 


    2 



    1 


    3 






    3 
    1 





    3 
    2 





    3 
    2 
   12 
    9 



   20 













    6 






    4 



    5 














   13 


   13 

   13 











    4 
    4 















   13 





    9 

    9 

















   13 



   13 





   13 
   13 



   13 
   13 



   13 





   12 
   13 












    5 






    5 





    9 

    3 




    9 


    5 

































   79 
   79 
   79 
   13 
   78 

   79 







    3 

    3 
    3 

















   21 
















   20 
























   21 





   73 








   23 





   19 













   18 

































   13 



    6 



    6 











   76 














   13 




































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
/*        $NetBSD: kern_resource.c,v 1.189 2022/04/09 23:38:33 riastradh Exp $        */

/*-
 * Copyright (c) 1982, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_resource.c        8.8 (Berkeley) 2/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_resource.c,v 1.189 2022/04/09 23:38:33 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/resourcevar.h>
#include <sys/kmem.h>
#include <sys/namei.h>
#include <sys/pool.h>
#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/timevar.h>
#include <sys/kauth.h>
#include <sys/atomic.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
#include <sys/atomic.h>

#include <uvm/uvm_extern.h>

/*
 * Maximum process data and stack limits.
 * They are variables so they are patchable.
 */
rlim_t                        maxdmap = MAXDSIZ;
rlim_t                        maxsmap = MAXSSIZ;

static pool_cache_t        plimit_cache        __read_mostly;
static pool_cache_t        pstats_cache        __read_mostly;

static kauth_listener_t        resource_listener;
static struct sysctllog        *proc_sysctllog;

static int        donice(struct lwp *, struct proc *, int);
static void        sysctl_proc_setup(void);

static int
resource_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        struct proc *p;
        int result;

        result = KAUTH_RESULT_DEFER;
        p = arg0;

        switch (action) {
        case KAUTH_PROCESS_NICE:
                if (kauth_cred_geteuid(cred) != kauth_cred_geteuid(p->p_cred) &&
                    kauth_cred_getuid(cred) != kauth_cred_geteuid(p->p_cred)) {
                        break;
                }

                if ((u_long)arg1 >= p->p_nice)
                        result = KAUTH_RESULT_ALLOW;

                break;

        case KAUTH_PROCESS_RLIMIT: {
                enum kauth_process_req req;

                req = (enum kauth_process_req)(uintptr_t)arg1;

                switch (req) {
                case KAUTH_REQ_PROCESS_RLIMIT_GET:
                        result = KAUTH_RESULT_ALLOW;
                        break;

                case KAUTH_REQ_PROCESS_RLIMIT_SET: {
                        struct rlimit *new_rlimit;
                        u_long which;

                        if ((p != curlwp->l_proc) &&
                            (proc_uidmatch(cred, p->p_cred) != 0))
                                break;

                        new_rlimit = arg2;
                        which = (u_long)arg3;

                        if (new_rlimit->rlim_max <= p->p_rlimit[which].rlim_max)
                                result = KAUTH_RESULT_ALLOW;

                        break;
                        }

                default:
                        break;
                }

                break;
        }

        default:
                break;
        }

        return result;
}

void
resource_init(void)
{

        plimit_cache = pool_cache_init(sizeof(struct plimit), 0, 0, 0,
            "plimitpl", NULL, IPL_NONE, NULL, NULL, NULL);
        pstats_cache = pool_cache_init(sizeof(struct pstats), 0, 0, 0,
            "pstatspl", NULL, IPL_NONE, NULL, NULL, NULL);

        resource_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
            resource_listener_cb, NULL);

        sysctl_proc_setup();
}

/*
 * Resource controls and accounting.
 */

int
sys_getpriority(struct lwp *l, const struct sys_getpriority_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) which;
                syscallarg(id_t) who;
        } */
        struct proc *curp = l->l_proc, *p;
        id_t who = SCARG(uap, who);
        int low = NZERO + PRIO_MAX + 1;

        mutex_enter(&proc_lock);
        switch (SCARG(uap, which)) {
        case PRIO_PROCESS:
                p = who ? proc_find(who) : curp;
                if (p != NULL)
                        low = p->p_nice;
                break;

        case PRIO_PGRP: {
                struct pgrp *pg;

                if (who == 0)
                        pg = curp->p_pgrp;
                else if ((pg = pgrp_find(who)) == NULL)
                        break;
                LIST_FOREACH(p, &pg->pg_members, p_pglist) {
                        if (p->p_nice < low)
                                low = p->p_nice;
                }
                break;
        }

        case PRIO_USER:
                if (who == 0)
                        who = (int)kauth_cred_geteuid(l->l_cred);
                PROCLIST_FOREACH(p, &allproc) {
                        mutex_enter(p->p_lock);
                        if (kauth_cred_geteuid(p->p_cred) ==
                            (uid_t)who && p->p_nice < low)
                                low = p->p_nice;
                        mutex_exit(p->p_lock);
                }
                break;

        default:
                mutex_exit(&proc_lock);
                return EINVAL;
        }
        mutex_exit(&proc_lock);

        if (low == NZERO + PRIO_MAX + 1) {
                return ESRCH;
        }
        *retval = low - NZERO;
        return 0;
}

int
sys_setpriority(struct lwp *l, const struct sys_setpriority_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) which;
                syscallarg(id_t) who;
                syscallarg(int) prio;
        } */
        struct proc *curp = l->l_proc, *p;
        id_t who = SCARG(uap, who);
        int found = 0, error = 0;

        mutex_enter(&proc_lock);
        switch (SCARG(uap, which)) {
        case PRIO_PROCESS:
                p = who ? proc_find(who) : curp;
                if (p != NULL) {
                        mutex_enter(p->p_lock);
                        found++;
                        error = donice(l, p, SCARG(uap, prio));
                        mutex_exit(p->p_lock);
                }
                break;

        case PRIO_PGRP: {
                struct pgrp *pg;

                if (who == 0)
                        pg = curp->p_pgrp;
                else if ((pg = pgrp_find(who)) == NULL)
                        break;
                LIST_FOREACH(p, &pg->pg_members, p_pglist) {
                        mutex_enter(p->p_lock);
                        found++;
                        error = donice(l, p, SCARG(uap, prio));
                        mutex_exit(p->p_lock);
                        if (error)
                                break;
                }
                break;
        }

        case PRIO_USER:
                if (who == 0)
                        who = (int)kauth_cred_geteuid(l->l_cred);
                PROCLIST_FOREACH(p, &allproc) {
                        mutex_enter(p->p_lock);
                        if (kauth_cred_geteuid(p->p_cred) ==
                            (uid_t)SCARG(uap, who)) {
                                found++;
                                error = donice(l, p, SCARG(uap, prio));
                        }
                        mutex_exit(p->p_lock);
                        if (error)
                                break;
                }
                break;

        default:
                mutex_exit(&proc_lock);
                return EINVAL;
        }
        mutex_exit(&proc_lock);

        return (found == 0) ? ESRCH : error;
}

/*
 * Renice a process.
 *
 * Call with the target process' credentials locked.
 */
static int
donice(struct lwp *l, struct proc *chgp, int n)
{
        kauth_cred_t cred = l->l_cred;

        KASSERT(mutex_owned(chgp->p_lock));

        if (kauth_cred_geteuid(cred) && kauth_cred_getuid(cred) &&
            kauth_cred_geteuid(cred) != kauth_cred_geteuid(chgp->p_cred) &&
            kauth_cred_getuid(cred) != kauth_cred_geteuid(chgp->p_cred))
                return EPERM;

        if (n > PRIO_MAX) {
                n = PRIO_MAX;
        }
        if (n < PRIO_MIN) {
                n = PRIO_MIN;
        }
        n += NZERO;

        if (kauth_authorize_process(cred, KAUTH_PROCESS_NICE, chgp,
            KAUTH_ARG(n), NULL, NULL)) {
                return EACCES;
        }

        sched_nice(chgp, n);
        return 0;
}

int
sys_setrlimit(struct lwp *l, const struct sys_setrlimit_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) which;
                syscallarg(const struct rlimit *) rlp;
        } */
        int error, which = SCARG(uap, which);
        struct rlimit alim;

        error = copyin(SCARG(uap, rlp), &alim, sizeof(struct rlimit));
        if (error) {
                return error;
        }
        return dosetrlimit(l, l->l_proc, which, &alim);
}

int
dosetrlimit(struct lwp *l, struct proc *p, int which, struct rlimit *limp)
{
        struct rlimit *alimp;
        int error;

        if ((u_int)which >= RLIM_NLIMITS)
                return EINVAL;

        if (limp->rlim_cur > limp->rlim_max) {
                /*
                 * This is programming error. According to SUSv2, we should
                 * return error in this case.
                 */
                return EINVAL;
        }

        alimp = &p->p_rlimit[which];
        /* if we don't change the value, no need to limcopy() */
        if (limp->rlim_cur == alimp->rlim_cur &&
            limp->rlim_max == alimp->rlim_max)
                return 0;

        error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
            p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_SET), limp, KAUTH_ARG(which));
        if (error)
                return error;

        lim_privatise(p);
        /* p->p_limit is now unchangeable */
        alimp = &p->p_rlimit[which];

        switch (which) {

        case RLIMIT_DATA:
                if (limp->rlim_cur > maxdmap)
                        limp->rlim_cur = maxdmap;
                if (limp->rlim_max > maxdmap)
                        limp->rlim_max = maxdmap;
                break;

        case RLIMIT_STACK:
                if (limp->rlim_cur > maxsmap)
                        limp->rlim_cur = maxsmap;
                if (limp->rlim_max > maxsmap)
                        limp->rlim_max = maxsmap;

                /*
                 * Return EINVAL if the new stack size limit is lower than
                 * current usage. Otherwise, the process would get SIGSEGV the
                 * moment it would try to access anything on its current stack.
                 * This conforms to SUSv2.
                 */
                if (btoc(limp->rlim_cur) < p->p_vmspace->vm_ssize ||
                    btoc(limp->rlim_max) < p->p_vmspace->vm_ssize) {
                        return EINVAL;
                }

                /*
                 * Stack is allocated to the max at exec time with
                 * only "rlim_cur" bytes accessible (In other words,
                 * allocates stack dividing two contiguous regions at
                 * "rlim_cur" bytes boundary).
                 *
                 * Since allocation is done in terms of page, roundup
                 * "rlim_cur" (otherwise, contiguous regions
                 * overlap).  If stack limit is going up make more
                 * accessible, if going down make inaccessible.
                 */
                limp->rlim_max = round_page(limp->rlim_max);
                limp->rlim_cur = round_page(limp->rlim_cur);
                if (limp->rlim_cur != alimp->rlim_cur) {
                        vaddr_t addr;
                        vsize_t size;
                        vm_prot_t prot;
                        char *base, *tmp;

                        base = p->p_vmspace->vm_minsaddr;
                        if (limp->rlim_cur > alimp->rlim_cur) {
                                prot = VM_PROT_READ | VM_PROT_WRITE;
                                size = limp->rlim_cur - alimp->rlim_cur;
                                tmp = STACK_GROW(base, alimp->rlim_cur);
                        } else {
                                prot = VM_PROT_NONE;
                                size = alimp->rlim_cur - limp->rlim_cur;
                                tmp = STACK_GROW(base, limp->rlim_cur);
                        }
                        addr = (vaddr_t)STACK_ALLOC(tmp, size);
                        (void) uvm_map_protect(&p->p_vmspace->vm_map,
                            addr, addr + size, prot, false);
                }
                break;

        case RLIMIT_NOFILE:
                if (limp->rlim_cur > maxfiles)
                        limp->rlim_cur = maxfiles;
                if (limp->rlim_max > maxfiles)
                        limp->rlim_max = maxfiles;
                break;

        case RLIMIT_NPROC:
                if (limp->rlim_cur > maxproc)
                        limp->rlim_cur = maxproc;
                if (limp->rlim_max > maxproc)
                        limp->rlim_max = maxproc;
                break;

        case RLIMIT_NTHR:
                if (limp->rlim_cur > maxlwp)
                        limp->rlim_cur = maxlwp;
                if (limp->rlim_max > maxlwp)
                        limp->rlim_max = maxlwp;
                break;
        }

        mutex_enter(&p->p_limit->pl_lock);
        *alimp = *limp;
        mutex_exit(&p->p_limit->pl_lock);
        return 0;
}

int
sys_getrlimit(struct lwp *l, const struct sys_getrlimit_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) which;
                syscallarg(struct rlimit *) rlp;
        } */
        struct proc *p = l->l_proc;
        int which = SCARG(uap, which);
        struct rlimit rl;

        if ((u_int)which >= RLIM_NLIMITS)
                return EINVAL;

        mutex_enter(p->p_lock);
        memcpy(&rl, &p->p_rlimit[which], sizeof(rl));
        mutex_exit(p->p_lock);

        return copyout(&rl, SCARG(uap, rlp), sizeof(rl));
}

/*
 * Transform the running time and tick information in proc p into user,
 * system, and interrupt time usage.
 *
 * Should be called with p->p_lock held unless called from exit1().
 */
void
calcru(struct proc *p, struct timeval *up, struct timeval *sp,
    struct timeval *ip, struct timeval *rp)
{
        uint64_t u, st, ut, it, tot, dt;
        struct lwp *l;
        struct bintime tm;
        struct timeval tv;

        KASSERT(p->p_stat == SDEAD || mutex_owned(p->p_lock));

        mutex_spin_enter(&p->p_stmutex);
        st = p->p_sticks;
        ut = p->p_uticks;
        it = p->p_iticks;
        mutex_spin_exit(&p->p_stmutex);

        tm = p->p_rtime;

        LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                lwp_lock(l);
                bintime_add(&tm, &l->l_rtime);
                if ((l->l_pflag & LP_RUNNING) != 0 &&
                    (l->l_pflag & (LP_INTR | LP_TIMEINTR)) != LP_INTR) {
                        struct bintime diff;
                        /*
                         * Adjust for the current time slice.  This is
                         * actually fairly important since the error
                         * here is on the order of a time quantum,
                         * which is much greater than the sampling
                         * error.
                         */
                        binuptime(&diff);
                        membar_consumer(); /* for softint_dispatch() */
                        bintime_sub(&diff, &l->l_stime);
                        bintime_add(&tm, &diff);
                }
                lwp_unlock(l);
        }

        tot = st + ut + it;
        bintime2timeval(&tm, &tv);
        u = (uint64_t)tv.tv_sec * 1000000ul + tv.tv_usec;

        if (tot == 0) {
                /* No ticks, so can't use to share time out, split 50-50 */
                st = ut = u / 2;
        } else {
                st = (u * st) / tot;
                ut = (u * ut) / tot;
        }

        /*
         * Try to avoid lying to the users (too much)
         *
         * Of course, user/sys time are based on sampling (ie: statistics)
         * so that would be impossible, but convincing the mark
         * that we have used less ?time this call than we had
         * last time, is beyond reasonable...  (the con fails!)
         *
         * Note that since actual used time cannot decrease, either
         * utime or stime (or both) must be greater now than last time
         * (or both the same) - if one seems to have decreased, hold
         * it constant and steal the necessary bump from the other
         * which must have increased.
         */
        if (p->p_xutime > ut) {
                dt = p->p_xutime - ut;
                st -= uimin(dt, st);
                ut = p->p_xutime;
        } else if (p->p_xstime > st) {
                dt = p->p_xstime - st;
                ut -= uimin(dt, ut);
                st = p->p_xstime;
        }

        if (sp != NULL) {
                p->p_xstime = st;
                sp->tv_sec = st / 1000000;
                sp->tv_usec = st % 1000000;
        }
        if (up != NULL) {
                p->p_xutime = ut;
                up->tv_sec = ut / 1000000;
                up->tv_usec = ut % 1000000;
        }
        if (ip != NULL) {
                if (it != 0)                /* it != 0 --> tot != 0 */
                        it = (u * it) / tot;
                ip->tv_sec = it / 1000000;
                ip->tv_usec = it % 1000000;
        }
        if (rp != NULL) {
                *rp = tv;
        }
}

int
sys___getrusage50(struct lwp *l, const struct sys___getrusage50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) who;
                syscallarg(struct rusage *) rusage;
        } */
        int error;
        struct rusage ru;
        struct proc *p = l->l_proc;

        error = getrusage1(p, SCARG(uap, who), &ru);
        if (error != 0)
                return error;

        return copyout(&ru, SCARG(uap, rusage), sizeof(ru));
}

int
getrusage1(struct proc *p, int who, struct rusage *ru) {

        switch (who) {
        case RUSAGE_SELF:
                mutex_enter(p->p_lock);
                ruspace(p);
                memcpy(ru, &p->p_stats->p_ru, sizeof(*ru));
                calcru(p, &ru->ru_utime, &ru->ru_stime, NULL, NULL);
                rulwps(p, ru);
                mutex_exit(p->p_lock);
                break;
        case RUSAGE_CHILDREN:
                mutex_enter(p->p_lock);
                memcpy(ru, &p->p_stats->p_cru, sizeof(*ru));
                mutex_exit(p->p_lock);
                break;
        default:
                return EINVAL;
        }

        return 0;
}

void
ruspace(struct proc *p)
{
        struct vmspace *vm = p->p_vmspace;
        struct rusage *ru = &p->p_stats->p_ru;

        ru->ru_ixrss = vm->vm_tsize << (PAGE_SHIFT - 10);
        ru->ru_idrss = vm->vm_dsize << (PAGE_SHIFT - 10);
        ru->ru_isrss = vm->vm_ssize << (PAGE_SHIFT - 10);
#ifdef __HAVE_NO_PMAP_STATS
        /* We don't keep track of the max so we get the current */
        ru->ru_maxrss = vm_resident_count(vm) << (PAGE_SHIFT - 10);
#else
        ru->ru_maxrss = vm->vm_rssmax << (PAGE_SHIFT - 10);
#endif
}

void
ruadd(struct rusage *ru, struct rusage *ru2)
{
        long *ip, *ip2;
        int i;

        timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
        timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
        if (ru->ru_maxrss < ru2->ru_maxrss)
                ru->ru_maxrss = ru2->ru_maxrss;
        ip = &ru->ru_first; ip2 = &ru2->ru_first;
        for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
                *ip++ += *ip2++;
}

void
rulwps(proc_t *p, struct rusage *ru)
{
        lwp_t *l;

        KASSERT(mutex_owned(p->p_lock));

        LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                ruadd(ru, &l->l_ru);
                ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw);
                ru->ru_nivcsw += l->l_nivcsw;
        }
}

/*
 * lim_copy: make a copy of the plimit structure.
 *
 * We use copy-on-write after fork, and copy when a limit is changed.
 */
struct plimit *
lim_copy(struct plimit *lim)
{
        struct plimit *newlim;
        char *corename;
        size_t alen, len;

        newlim = pool_cache_get(plimit_cache, PR_WAITOK);
        mutex_init(&newlim->pl_lock, MUTEX_DEFAULT, IPL_NONE);
        newlim->pl_writeable = false;
        newlim->pl_refcnt = 1;
        newlim->pl_sv_limit = NULL;

        mutex_enter(&lim->pl_lock);
        memcpy(newlim->pl_rlimit, lim->pl_rlimit,
            sizeof(struct rlimit) * RLIM_NLIMITS);

        /*
         * Note: the common case is a use of default core name.
         */
        alen = 0;
        corename = NULL;
        for (;;) {
                if (lim->pl_corename == defcorename) {
                        newlim->pl_corename = defcorename;
                        newlim->pl_cnlen = 0;
                        break;
                }
                len = lim->pl_cnlen;
                if (len == alen) {
                        newlim->pl_corename = corename;
                        newlim->pl_cnlen = len;
                        memcpy(corename, lim->pl_corename, len);
                        corename = NULL;
                        break;
                }
                mutex_exit(&lim->pl_lock);
                if (corename) {
                        kmem_free(corename, alen);
                }
                alen = len;
                corename = kmem_alloc(alen, KM_SLEEP);
                mutex_enter(&lim->pl_lock);
        }
        mutex_exit(&lim->pl_lock);

        if (corename) {
                kmem_free(corename, alen);
        }
        return newlim;
}

void
lim_addref(struct plimit *lim)
{
        atomic_inc_uint(&lim->pl_refcnt);
}

/*
 * lim_privatise: give a process its own private plimit structure.
 */
void
lim_privatise(proc_t *p)
{
        struct plimit *lim = p->p_limit, *newlim;

        if (lim->pl_writeable) {
                return;
        }

        newlim = lim_copy(lim);

        mutex_enter(p->p_lock);
        if (p->p_limit->pl_writeable) {
                /* Other thread won the race. */
                mutex_exit(p->p_lock);
                lim_free(newlim);
                return;
        }

        /*
         * Since p->p_limit can be accessed without locked held,
         * old limit structure must not be deleted yet.
         */
        newlim->pl_sv_limit = p->p_limit;
        newlim->pl_writeable = true;
        p->p_limit = newlim;
        mutex_exit(p->p_lock);
}

void
lim_setcorename(proc_t *p, char *name, size_t len)
{
        struct plimit *lim;
        char *oname;
        size_t olen;

        lim_privatise(p);
        lim = p->p_limit;

        mutex_enter(&lim->pl_lock);
        oname = lim->pl_corename;
        olen = lim->pl_cnlen;
        lim->pl_corename = name;
        lim->pl_cnlen = len;
        mutex_exit(&lim->pl_lock);

        if (oname != defcorename) {
                kmem_free(oname, olen);
        }
}

void
lim_free(struct plimit *lim)
{
        struct plimit *sv_lim;

        do {
                membar_release();
                if (atomic_dec_uint_nv(&lim->pl_refcnt) > 0) {
                        return;
                }
                membar_acquire();
                if (lim->pl_corename != defcorename) {
                        kmem_free(lim->pl_corename, lim->pl_cnlen);
                }
                sv_lim = lim->pl_sv_limit;
                mutex_destroy(&lim->pl_lock);
                pool_cache_put(plimit_cache, lim);
        } while ((lim = sv_lim) != NULL);
}

struct pstats *
pstatscopy(struct pstats *ps)
{
        struct pstats *nps;
        size_t len;

        nps = pool_cache_get(pstats_cache, PR_WAITOK);

        len = (char *)&nps->pstat_endzero - (char *)&nps->pstat_startzero;
        memset(&nps->pstat_startzero, 0, len);

        len = (char *)&nps->pstat_endcopy - (char *)&nps->pstat_startcopy;
        memcpy(&nps->pstat_startcopy, &ps->pstat_startcopy, len);

        return nps;
}

void
pstatsfree(struct pstats *ps)
{

        pool_cache_put(pstats_cache, ps);
}

/*
 * sysctl_proc_findproc: a routine for sysctl proc subtree helpers that
 * need to pick a valid process by PID.
 *
 * => Hold a reference on the process, on success.
 */
static int
sysctl_proc_findproc(lwp_t *l, pid_t pid, proc_t **p2)
{
        proc_t *p;
        int error;

        if (pid == PROC_CURPROC) {
                p = l->l_proc;
        } else {
                mutex_enter(&proc_lock);
                p = proc_find(pid);
                if (p == NULL) {
                        mutex_exit(&proc_lock);
                        return ESRCH;
                }
        }
        error = rw_tryenter(&p->p_reflock, RW_READER) ? 0 : EBUSY;
        if (pid != PROC_CURPROC) {
                mutex_exit(&proc_lock);
        }
        *p2 = p;
        return error;
}

/*
 * sysctl_proc_paxflags: helper routine to get process's paxctl flags
 */
static int
sysctl_proc_paxflags(SYSCTLFN_ARGS)
{
        struct proc *p;
        struct sysctlnode node;
        int paxflags;
        int error;

        /* First, validate the request. */
        if (namelen != 0 || name[-1] != PROC_PID_PAXFLAGS)
                return EINVAL;

        /* Find the process.  Hold a reference (p_reflock), if found. */
        error = sysctl_proc_findproc(l, (pid_t)name[-2], &p);
        if (error)
                return error;

        /* XXX-elad */
        error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, p,
            KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
        if (error) {
                rw_exit(&p->p_reflock);
                return error;
        }

        /* Retrieve the limits. */
        node = *rnode;
        paxflags = p->p_pax;
        node.sysctl_data = &paxflags;

        error = sysctl_lookup(SYSCTLFN_CALL(&node));

        /* If attempting to write new value, it's an error */
        if (error == 0 && newp != NULL)
                error = EACCES;

        rw_exit(&p->p_reflock);
        return error;
}

/*
 * sysctl_proc_corename: helper routine to get or set the core file name
 * for a process specified by PID.
 */
static int
sysctl_proc_corename(SYSCTLFN_ARGS)
{
        struct proc *p;
        struct plimit *lim;
        char *cnbuf, *cname;
        struct sysctlnode node;
        size_t len;
        int error;

        /* First, validate the request. */
        if (namelen != 0 || name[-1] != PROC_PID_CORENAME)
                return EINVAL;

        /* Find the process.  Hold a reference (p_reflock), if found. */
        error = sysctl_proc_findproc(l, (pid_t)name[-2], &p);
        if (error)
                return error;

        /* XXX-elad */
        error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, p,
            KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
        if (error) {
                rw_exit(&p->p_reflock);
                return error;
        }

        cnbuf = PNBUF_GET();

        if (oldp) {
                /* Get case: copy the core name into the buffer. */
                error = kauth_authorize_process(l->l_cred,
                    KAUTH_PROCESS_CORENAME, p,
                    KAUTH_ARG(KAUTH_REQ_PROCESS_CORENAME_GET), NULL, NULL);
                if (error) {
                        goto done;
                }
                lim = p->p_limit;
                mutex_enter(&lim->pl_lock);
                strlcpy(cnbuf, lim->pl_corename, MAXPATHLEN);
                mutex_exit(&lim->pl_lock);
        }

        node = *rnode;
        node.sysctl_data = cnbuf;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));

        /* Return if error, or if caller is only getting the core name. */
        if (error || newp == NULL) {
                goto done;
        }

        /*
         * Set case.  Check permission and then validate new core name.
         * It must be either "core", "/core", or end in ".core".
         */
        error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CORENAME,
            p, KAUTH_ARG(KAUTH_REQ_PROCESS_CORENAME_SET), cnbuf, NULL);
        if (error) {
                goto done;
        }
        len = strlen(cnbuf);
        if ((len < 4 || strcmp(cnbuf + len - 4, "core") != 0) ||
            (len > 4 && cnbuf[len - 5] != '/' && cnbuf[len - 5] != '.')) {
                error = EINVAL;
                goto done;
        }

        /* Allocate, copy and set the new core name for plimit structure. */
        cname = kmem_alloc(++len, KM_NOSLEEP);
        if (cname == NULL) {
                error = ENOMEM;
                goto done;
        }
        memcpy(cname, cnbuf, len);
        lim_setcorename(p, cname, len);
done:
        rw_exit(&p->p_reflock);
        PNBUF_PUT(cnbuf);
        return error;
}

/*
 * sysctl_proc_stop: helper routine for checking/setting the stop flags.
 */
static int
sysctl_proc_stop(SYSCTLFN_ARGS)
{
        struct proc *p;
        int isset, flag, error = 0;
        struct sysctlnode node;

        if (namelen != 0)
                return EINVAL;

        /* Find the process.  Hold a reference (p_reflock), if found. */
        error = sysctl_proc_findproc(l, (pid_t)name[-2], &p);
        if (error)
                return error;

        /* XXX-elad */
        error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, p,
            KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
        if (error) {
                goto out;
        }

        /* Determine the flag. */
        switch (rnode->sysctl_num) {
        case PROC_PID_STOPFORK:
                flag = PS_STOPFORK;
                break;
        case PROC_PID_STOPEXEC:
                flag = PS_STOPEXEC;
                break;
        case PROC_PID_STOPEXIT:
                flag = PS_STOPEXIT;
                break;
        default:
                error = EINVAL;
                goto out;
        }
        isset = (p->p_flag & flag) ? 1 : 0;
        node = *rnode;
        node.sysctl_data = &isset;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));

        /* Return if error, or if callers is only getting the flag. */
        if (error || newp == NULL) {
                goto out;
        }

        /* Check if caller can set the flags. */
        error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_STOPFLAG,
            p, KAUTH_ARG(flag), NULL, NULL);
        if (error) {
                goto out;
        }
        mutex_enter(p->p_lock);
        if (isset) {
                p->p_sflag |= flag;
        } else {
                p->p_sflag &= ~flag;
        }
        mutex_exit(p->p_lock);
out:
        rw_exit(&p->p_reflock);
        return error;
}

/*
 * sysctl_proc_plimit: helper routine to get/set rlimits of a process.
 */
static int
sysctl_proc_plimit(SYSCTLFN_ARGS)
{
        struct proc *p;
        u_int limitno;
        int which, error = 0;
        struct rlimit alim;
        struct sysctlnode node;

        if (namelen != 0)
                return EINVAL;

        which = name[-1];
        if (which != PROC_PID_LIMIT_TYPE_SOFT &&
            which != PROC_PID_LIMIT_TYPE_HARD)
                return EINVAL;

        limitno = name[-2] - 1;
        if (limitno >= RLIM_NLIMITS)
                return EINVAL;

        if (name[-3] != PROC_PID_LIMIT)
                return EINVAL;

        /* Find the process.  Hold a reference (p_reflock), if found. */
        error = sysctl_proc_findproc(l, (pid_t)name[-4], &p);
        if (error)
                return error;

        /* XXX-elad */
        error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, p,
            KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
        if (error)
                goto out;

        /* Check if caller can retrieve the limits. */
        if (newp == NULL) {
                error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
                    p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_GET), &alim,
                    KAUTH_ARG(which));
                if (error)
                        goto out;
        }

        /* Retrieve the limits. */
        node = *rnode;
        memcpy(&alim, &p->p_rlimit[limitno], sizeof(alim));
        if (which == PROC_PID_LIMIT_TYPE_HARD) {
                node.sysctl_data = &alim.rlim_max;
        } else {
                node.sysctl_data = &alim.rlim_cur;
        }
        error = sysctl_lookup(SYSCTLFN_CALL(&node));

        /* Return if error, or if we are only retrieving the limits. */
        if (error || newp == NULL) {
                goto out;
        }
        error = dosetrlimit(l, p, limitno, &alim);
out:
        rw_exit(&p->p_reflock);
        return error;
}

/*
 * Setup sysctl nodes.
 */
static void
sysctl_proc_setup(void)
{

        sysctl_createv(&proc_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_ANYNUMBER,
                       CTLTYPE_NODE, "curproc",
                       SYSCTL_DESCR("Per-process settings"),
                       NULL, 0, NULL, 0,
                       CTL_PROC, PROC_CURPROC, CTL_EOL);

        sysctl_createv(&proc_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_INT, "paxflags",
                       SYSCTL_DESCR("Process PAX control flags"),
                       sysctl_proc_paxflags, 0, NULL, 0,
                       CTL_PROC, PROC_CURPROC, PROC_PID_PAXFLAGS, CTL_EOL);

        sysctl_createv(&proc_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
                       CTLTYPE_STRING, "corename",
                       SYSCTL_DESCR("Core file name"),
                       sysctl_proc_corename, 0, NULL, MAXPATHLEN,
                       CTL_PROC, PROC_CURPROC, PROC_PID_CORENAME, CTL_EOL);
        sysctl_createv(&proc_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "rlimit",
                       SYSCTL_DESCR("Process limits"),
                       NULL, 0, NULL, 0,
                       CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, CTL_EOL);

#define create_proc_plimit(s, n) do {                                        \
        sysctl_createv(&proc_sysctllog, 0, NULL, NULL,                        \
                       CTLFLAG_PERMANENT,                                \
                       CTLTYPE_NODE, s,                                        \
                       SYSCTL_DESCR("Process " s " limits"),                \
                       NULL, 0, NULL, 0,                                \
                       CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n,        \
                       CTL_EOL);                                        \
        sysctl_createv(&proc_sysctllog, 0, NULL, NULL,                        \
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, \
                       CTLTYPE_QUAD, "soft",                                \
                       SYSCTL_DESCR("Process soft " s " limit"),        \
                       sysctl_proc_plimit, 0, NULL, 0,                        \
                       CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n,        \
                       PROC_PID_LIMIT_TYPE_SOFT, CTL_EOL);                \
        sysctl_createv(&proc_sysctllog, 0, NULL, NULL,                        \
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, \
                       CTLTYPE_QUAD, "hard",                                \
                       SYSCTL_DESCR("Process hard " s " limit"),        \
                       sysctl_proc_plimit, 0, NULL, 0,                        \
                       CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n,        \
                       PROC_PID_LIMIT_TYPE_HARD, CTL_EOL);                \
        } while (0/*CONSTCOND*/)

        create_proc_plimit("cputime",                PROC_PID_LIMIT_CPU);
        create_proc_plimit("filesize",                PROC_PID_LIMIT_FSIZE);
        create_proc_plimit("datasize",                PROC_PID_LIMIT_DATA);
        create_proc_plimit("stacksize",                PROC_PID_LIMIT_STACK);
        create_proc_plimit("coredumpsize",        PROC_PID_LIMIT_CORE);
        create_proc_plimit("memoryuse",                PROC_PID_LIMIT_RSS);
        create_proc_plimit("memorylocked",        PROC_PID_LIMIT_MEMLOCK);
        create_proc_plimit("maxproc",                PROC_PID_LIMIT_NPROC);
        create_proc_plimit("descriptors",        PROC_PID_LIMIT_NOFILE);
        create_proc_plimit("sbsize",                PROC_PID_LIMIT_SBSIZE);
        create_proc_plimit("vmemoryuse",        PROC_PID_LIMIT_AS);
        create_proc_plimit("maxlwp",                PROC_PID_LIMIT_NTHR);

#undef create_proc_plimit

        sysctl_createv(&proc_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
                       CTLTYPE_INT, "stopfork",
                       SYSCTL_DESCR("Stop process at fork(2)"),
                       sysctl_proc_stop, 0, NULL, 0,
                       CTL_PROC, PROC_CURPROC, PROC_PID_STOPFORK, CTL_EOL);
        sysctl_createv(&proc_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
                       CTLTYPE_INT, "stopexec",
                       SYSCTL_DESCR("Stop process at execve(2)"),
                       sysctl_proc_stop, 0, NULL, 0,
                       CTL_PROC, PROC_CURPROC, PROC_PID_STOPEXEC, CTL_EOL);
        sysctl_createv(&proc_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
                       CTLTYPE_INT, "stopexit",
                       SYSCTL_DESCR("Stop process before completing exit"),
                       sysctl_proc_stop, 0, NULL, 0,
                       CTL_PROC, PROC_CURPROC, PROC_PID_STOPEXIT, CTL_EOL);
}
























































 4803 











 4866 






























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
/*        $NetBSD: cpu.h,v 1.70 2021/11/02 11:26:03 ryo Exp $        */

/*-
 * Copyright (c) 1990 The Regents of the University of California.
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * William Jolitz.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)cpu.h        5.4 (Berkeley) 5/9/91
 */

#ifndef _AMD64_CPU_H_
#define _AMD64_CPU_H_

#ifdef __x86_64__

#include <x86/cpu.h>

#ifdef _KERNEL

#if defined(__GNUC__) && !defined(_MODULE)

static struct cpu_info *x86_curcpu(void);
static lwp_t *x86_curlwp(void);

__inline __always_inline static struct cpu_info * __unused __nomsan
x86_curcpu(void)
{
        struct cpu_info *ci;

        __asm volatile("movq %%gs:%1, %0" :
            "=r" (ci) :
            "m"
            (*(struct cpu_info * const *)offsetof(struct cpu_info, ci_self)));
        return ci;
}

__inline static lwp_t * __unused __nomsan __attribute__ ((const))
x86_curlwp(void)
{
        lwp_t *l;

        __asm volatile("movq %%gs:%1, %0" :
            "=r" (l) :
            "m"
            (*(struct cpu_info * const *)offsetof(struct cpu_info, ci_curlwp)));
        return l;
}

#endif        /* __GNUC__ && !_MODULE */

#ifdef XENPV
#define        CLKF_USERMODE(frame)        (curcpu()->ci_xen_clockf_usermode)
#define CLKF_PC(frame)                (curcpu()->ci_xen_clockf_pc)
#else /* XENPV */
#define        CLKF_USERMODE(frame)        USERMODE((frame)->cf_if.if_tf.tf_cs)
#define CLKF_PC(frame)                ((frame)->cf_if.if_tf.tf_rip)
#endif /* XENPV */
#define CLKF_INTR(frame)        (curcpu()->ci_idepth > 0)
#define LWP_PC(l)                ((l)->l_md.md_regs->tf_rip)

void *cpu_uarea_alloc(bool);
bool cpu_uarea_free(void *);

#endif        /* _KERNEL */

#else        /*        __x86_64__        */

#include <i386/cpu.h>

#endif        /*        __x86_64__        */

#endif /* !_AMD64_CPU_H_ */































































































   39 


















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
/*        $NetBSD: vfs_syscalls_40.c,v 1.5 2019/01/27 02:08:39 pgoyette Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vfs_syscalls.c        8.42 (Berkeley) 7/31/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls_40.c,v 1.5 2019/01/27 02:08:39 pgoyette Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mount.h>
#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>

#include <compat/common/compat_mod.h>

static const struct syscall_package vfs_syscalls_40_syscalls[] = {
        { SYS_compat_40_mount, 0, (sy_call_t *)compat_40_sys_mount },
        { 0, 0, NULL },
};

int
compat_40_sys_mount(struct lwp *l, const struct compat_40_sys_mount_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) type;
                syscallarg(const char *) path;
                syscallarg(int) flags;
                syscallarg(void *) data;
        } */
        register_t dummy;

        return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path),
            SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 0, &dummy);
}

int
vfs_syscalls_40_init(void)
{

        return syscall_establish(NULL, vfs_syscalls_40_syscalls);
}

int
vfs_syscalls_40_fini(void)
{

        return syscall_disestablish(NULL, vfs_syscalls_40_syscalls);
}














































































   29 







    2 







   16 







    7 







   16 







    7 























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
/*        $NetBSD: time_types.h,v 1.6 2021/09/07 11:43:05 riastradh Exp $        */

/*
 * Copyright (c) 1982, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)time.h        8.5 (Berkeley) 5/4/95
 */

#ifndef _COMPAT_SYS_TIME_TYPES_H_
#define        _COMPAT_SYS_TIME_TYPES_H_

#ifdef _KERNEL
#include <lib/libkern/libkern.h>
#else
#include <stddef.h>
#include <string.h>
#endif

/*
 * Structure returned by gettimeofday(2) system call,
 * and used in other calls.
 */
struct timeval50 {
        long        tv_sec;                /* seconds */
        long        tv_usec;        /* and microseconds */
};

struct        itimerval50 {
        struct        timeval50 it_interval;        /* timer interval */
        struct        timeval50 it_value;        /* current value */
};

/*
 * Structure defined by POSIX.1b to be like a timeval.
 */
struct timespec50 {
        int32_t        tv_sec;                /* seconds */
        long        tv_nsec;        /* and nanoseconds */
};

/*
 * Structure defined by POSIX.1b to be like a itimerval, but with
 * timespecs. Used in the timer_*() system calls.
 */
struct        itimerspec50 {
        struct        timespec50 it_interval;
        struct        timespec50 it_value;
};

static __inline void
timeval50_to_timeval(const struct timeval50 *ts50, struct timeval *ts)
{
        memset(ts, 0, sizeof(*ts));
        ts->tv_sec = ts50->tv_sec;
        ts->tv_usec = (suseconds_t)ts50->tv_usec;
}

static __inline void
timeval_to_timeval50(const struct timeval *ts, struct timeval50 *ts50)
{
        memset(ts50, 0, sizeof(*ts50));
        ts50->tv_sec = (long)ts->tv_sec;
        ts50->tv_usec = ts->tv_usec;
}

static __inline void
timespec50_to_timespec(const struct timespec50 *ts50, struct timespec *ts)
{
        memset(ts, 0, sizeof(*ts));
        ts->tv_sec = ts50->tv_sec;
        ts->tv_nsec = ts50->tv_nsec;
}

static __inline void
timespec_to_timespec50(const struct timespec *ts, struct timespec50 *ts50)
{
        memset(ts50, 0, sizeof(*ts50));
        ts50->tv_sec = (int32_t)ts->tv_sec;
        ts50->tv_nsec = ts->tv_nsec;
}

static __inline void
itimerval50_to_itimerval(const struct itimerval50 *ts50, struct itimerval *ts)
{
        memset(ts, 0, sizeof(*ts));
        timeval50_to_timeval(&ts50->it_interval, &ts->it_interval);
        timeval50_to_timeval(&ts50->it_value, &ts->it_value);
}

static __inline void
itimerval_to_itimerval50(const struct itimerval *ts, struct itimerval50 *ts50)
{
        memset(ts50, 0, sizeof(*ts50));
        timeval_to_timeval50(&ts->it_interval, &ts50->it_interval);
        timeval_to_timeval50(&ts->it_value, &ts50->it_value);
}

static __inline void
itimerspec50_to_itimerspec(const struct itimerspec50 *ts50,
    struct itimerspec *ts)
{
        memset(ts, 0, sizeof(*ts));
        timespec50_to_timespec(&ts50->it_interval, &ts->it_interval);
        timespec50_to_timespec(&ts50->it_value, &ts->it_value);
}

static __inline void
itimerspec_to_itimerspec50(const struct itimerspec *ts,
    struct itimerspec50 *ts50)
{
        memset(ts50, 0, sizeof(*ts50));
        timespec_to_timespec50(&ts->it_interval, &ts50->it_interval);
        timespec_to_timespec50(&ts->it_value, &ts50->it_value);
}

#endif /* _COMPAT_SYS_TIME_TYPES_H_ */


















































































































































































   77 






   77 

   52 






   26 


   72 








   76 





    8 







    4 



    4 


    4 














    1 




    1 

    1 




    1 



    1 










































  114 












   45 







   71 












    2 


  114 


   17 



   68 










   30 




   30 

   22 













    2 


    1 
    1 





    4 


    4 
    2 











    3 


    3 
    2 






    4 


    4 
    2 






    5 


    4 
    2 






    3 


    3 
    2 












    8 






    1 


    1 










    1 


    1 


    1 


    1 
    6 







  108 
  108 































   24 


   19 







    5 

















   24 
   24 




   23 




   18 


   18 






    5 



    5 







   18 


    5 


















   23 




   24 
   24 





























    4 











    4 


    4 




    4 

    1 
















   11 










    6 



    4 


    2 


    2 





   10 














    7 








    5 
    3 




    6 
    2 




    6 

    6 
















   17 














   11 
   11 



   11 


    6 
    6 
    5 



    6 


    3 


    2 



   11 

   11 














   12 
   12 
   11 
   12 







   16 

















    1 






    1 

































    7 












    9 










    2 

    8 



























    1 







    3 


    2 























































    2 






    1 


    2 
    1 



    2 














    3 















    3 
    3 
















    7 






    4 
    5 
    4 
    3 



    4 
    1 



    3 


    2 




    3 















    5 









    1 
    1 






    5 






















    9 





    1 








    1 












    8 








    9 













    1 











    1 














    1 


















   20 


    6 
   20 




   20 
   12 
    8 
    1 

    7 



    7 

   17 
















   17 




    8 





    8 



    1 


   15 

    1 



    1 



    1 
    1 







    1 
    1 











   15 






























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































  101 









































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
/*        $NetBSD: tcp_usrreq.c,v 1.231 2022/06/28 01:44:19 riastradh Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1997, 1998, 2005, 2006 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
 * Facility, NASA Ames Research Center.
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 * This code is derived from software contributed to The NetBSD Foundation
 * by Rui Paulo.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)tcp_usrreq.c        8.5 (Berkeley) 6/21/95
 */

/*
 * TCP protocol interface to socket abstraction.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tcp_usrreq.c,v 1.231 2022/06/28 01:44:19 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_tcp_debug.h"
#include "opt_mbuftrace.h"
#include "opt_tcp_space.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <sys/errno.h>
#include <sys/stat.h>
#include <sys/proc.h>
#include <sys/domain.h>
#include <sys/sysctl.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/uidinfo.h>

#include <net/if.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
#include <netinet/in_offload.h>

#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#endif

#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_private.h>
#include <netinet/tcp_congctl.h>
#include <netinet/tcp_debug.h>
#include <netinet/tcp_vtw.h>

static int
tcp_debug_capture(struct tcpcb *tp, int req)
{
#ifdef TCP_DEBUG
        return tp->t_state;
#endif
        return 0;
}

static inline void
tcp_debug_trace(struct socket *so, struct tcpcb *tp, int ostate, int req)
{
#ifdef TCP_DEBUG
        if (tp && (so->so_options & SO_DEBUG))
                tcp_trace(TA_USER, ostate, tp, NULL, req);
#endif
}

static int
tcp_getpcb(struct socket *so, struct inpcb **inp,
    struct in6pcb **in6p, struct tcpcb **tp)
{

        KASSERT(solocked(so));

        /*
         * When a TCP is attached to a socket, then there will be
         * a (struct inpcb) pointed at by the socket, and this
         * structure will point at a subsidary (struct tcpcb).
         */
        switch (so->so_proto->pr_domain->dom_family) {
        case PF_INET:
                *inp = sotoinpcb(so);
                if (*inp == NULL)
                        return EINVAL;
                *tp = intotcpcb(*inp);
                break;
#ifdef INET6
        case PF_INET6:
                *in6p = sotoin6pcb(so);
                if (*in6p == NULL)
                        return EINVAL;
                *tp = in6totcpcb(*in6p);
                break;
#endif
        default:
                return EAFNOSUPPORT;
        }

        KASSERT(tp != NULL);

        return 0;
}

static void
change_keepalive(struct socket *so, struct tcpcb *tp)
{
        tp->t_maxidle = tp->t_keepcnt * MIN(tp->t_keepintvl,
            TCP_TIMER_MAXTICKS / tp->t_keepcnt);
        TCP_TIMER_DISARM(tp, TCPT_KEEP);
        TCP_TIMER_DISARM(tp, TCPT_2MSL);

        if (tp->t_state == TCPS_SYN_RECEIVED ||
            tp->t_state == TCPS_SYN_SENT) {
                TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit);
        } else if (so->so_options & SO_KEEPALIVE &&
            tp->t_state <= TCPS_CLOSE_WAIT) {
                TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepintvl);
        } else {
                TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle);
        }

        if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0))
                TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle);
}

/*
 * Export TCP internal state information via a struct tcp_info, based on the
 * Linux 2.6 API.  Not ABI compatible as our constants are mapped differently
 * (TCP state machine, etc).  We export all information using FreeBSD-native
 * constants -- for example, the numeric values for tcpi_state will differ
 * from Linux.
 */
static void
tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
{

        bzero(ti, sizeof(*ti));

        ti->tcpi_state = tp->t_state;
        if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
                ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
        if (tp->t_flags & TF_SACK_PERMIT)
                ti->tcpi_options |= TCPI_OPT_SACK;
        if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
                ti->tcpi_options |= TCPI_OPT_WSCALE;
                ti->tcpi_snd_wscale = tp->snd_scale;
                ti->tcpi_rcv_wscale = tp->rcv_scale;
        }
        if (tp->t_flags & TF_ECN_PERMIT) {
                ti->tcpi_options |= TCPI_OPT_ECN;
        }

        ti->tcpi_rto = tp->t_rxtcur * tick;
        ti->tcpi_last_data_recv = (long)(getticks() -
                                         (int)tp->t_rcvtime) * tick;
        ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick / PR_SLOWHZ)
                           >> (TCP_RTT_SHIFT + 2);
        ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick / PR_SLOWHZ)
                           >> (TCP_RTTVAR_SHIFT + 2);

        ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
        /* Linux API wants these in # of segments, apparently */
        ti->tcpi_snd_cwnd = tp->snd_cwnd / tp->t_segsz;
        ti->tcpi_snd_wnd = tp->snd_wnd / tp->t_segsz;

        /*
         * FreeBSD-specific extension fields for tcp_info.
         */
        ti->tcpi_rcv_space = tp->rcv_wnd;
        ti->tcpi_rcv_nxt = tp->rcv_nxt;
        ti->tcpi_snd_bwnd = 0;                /* Unused, kept for compat. */
        ti->tcpi_snd_nxt = tp->snd_nxt;
        ti->tcpi_snd_mss = tp->t_segsz;
        ti->tcpi_rcv_mss = tp->t_segsz;
#ifdef TF_TOE
        if (tp->t_flags & TF_TOE)
                ti->tcpi_options |= TCPI_OPT_TOE;
#endif
        /* From the redundant department of redundancies... */
        ti->__tcpi_retransmits = ti->__tcpi_retrans =
                ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;

        ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
        ti->tcpi_snd_zerowin = tp->t_sndzerowin;
}

int
tcp_ctloutput(int op, struct socket *so, struct sockopt *sopt)
{
        int error = 0, s;
        struct inpcb *inp;
#ifdef INET6
        struct in6pcb *in6p;
#endif
        struct tcpcb *tp;
        struct tcp_info ti;
        u_int ui;
        int family;        /* family of the socket */
        int level, optname, optval;

        level = sopt->sopt_level;
        optname = sopt->sopt_name;

        family = so->so_proto->pr_domain->dom_family;

        s = splsoftnet();
        switch (family) {
        case PF_INET:
                inp = sotoinpcb(so);
#ifdef INET6
                in6p = NULL;
#endif
                break;
#ifdef INET6
        case PF_INET6:
                inp = NULL;
                in6p = sotoin6pcb(so);
                break;
#endif
        default:
                splx(s);
                panic("%s: af %d", __func__, family);
        }
#ifndef INET6
        if (inp == NULL)
#else
        if (inp == NULL && in6p == NULL)
#endif
        {
                splx(s);
                return ECONNRESET;
        }
        if (level != IPPROTO_TCP) {
                switch (family) {
                case PF_INET:
                        error = ip_ctloutput(op, so, sopt);
                        break;
#ifdef INET6
                case PF_INET6:
                        error = ip6_ctloutput(op, so, sopt);
                        break;
#endif
                }
                splx(s);
                return error;
        }
        if (inp)
                tp = intotcpcb(inp);
#ifdef INET6
        else if (in6p)
                tp = in6totcpcb(in6p);
#endif
        else
                tp = NULL;

        switch (op) {
        case PRCO_SETOPT:
                switch (optname) {
#ifdef TCP_SIGNATURE
                case TCP_MD5SIG:
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                break;
                        if (optval > 0)
                                tp->t_flags |= TF_SIGNATURE;
                        else
                                tp->t_flags &= ~TF_SIGNATURE;
                        break;
#endif /* TCP_SIGNATURE */

                case TCP_NODELAY:
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                break;
                        if (optval)
                                tp->t_flags |= TF_NODELAY;
                        else
                                tp->t_flags &= ~TF_NODELAY;
                        break;

                case TCP_MAXSEG:
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                break;
                        if (optval > 0 && optval <= tp->t_peermss)
                                tp->t_peermss = optval; /* limit on send size */
                        else
                                error = EINVAL;
                        break;
#ifdef notyet
                case TCP_CONGCTL:
                        /* XXX string overflow XXX */
                        error = tcp_congctl_select(tp, sopt->sopt_data);
                        break;
#endif

                case TCP_KEEPIDLE:
                        error = sockopt_get(sopt, &ui, sizeof(ui));
                        if (error)
                                break;
                        if (ui > 0 && ui <= TCP_TIMER_MAXTICKS) {
                                tp->t_keepidle = ui;
                                change_keepalive(so, tp);
                        } else
                                error = EINVAL;
                        break;

                case TCP_KEEPINTVL:
                        error = sockopt_get(sopt, &ui, sizeof(ui));
                        if (error)
                                break;
                        if (ui > 0 && ui <= TCP_TIMER_MAXTICKS) {
                                tp->t_keepintvl = ui;
                                change_keepalive(so, tp);
                        } else
                                error = EINVAL;
                        break;

                case TCP_KEEPCNT:
                        error = sockopt_get(sopt, &ui, sizeof(ui));
                        if (error)
                                break;
                        if (ui > 0 && ui <= TCP_TIMER_MAXTICKS) {
                                tp->t_keepcnt = ui;
                                change_keepalive(so, tp);
                        } else
                                error = EINVAL;
                        break;

                case TCP_KEEPINIT:
                        error = sockopt_get(sopt, &ui, sizeof(ui));
                        if (error)
                                break;
                        if (ui > 0 && ui <= TCP_TIMER_MAXTICKS) {
                                tp->t_keepinit = ui;
                                change_keepalive(so, tp);
                        } else
                                error = EINVAL;
                        break;

                default:
                        error = ENOPROTOOPT;
                        break;
                }
                break;

        case PRCO_GETOPT:
                switch (optname) {
#ifdef TCP_SIGNATURE
                case TCP_MD5SIG:
                        optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
                        goto setval;
#endif
                case TCP_NODELAY:
                        optval = tp->t_flags & TF_NODELAY;
                        goto setval;
                case TCP_MAXSEG:
                        optval = tp->t_peermss;
                        goto setval;
                case TCP_INFO:
                        tcp_fill_info(tp, &ti);
                        error = sockopt_set(sopt, &ti, sizeof ti);
                        break;
#ifdef notyet
                case TCP_CONGCTL:
                        break;
#endif
                case TCP_KEEPIDLE:
                        optval = tp->t_keepidle;
                        goto setval;
                case TCP_KEEPINTVL:
                        optval = tp->t_keepintvl;
                        goto setval;
                case TCP_KEEPCNT:
                        optval = tp->t_keepcnt;
                        goto setval;
                case TCP_KEEPINIT:
                        optval = tp->t_keepinit;
setval:                        error = sockopt_set(sopt, &optval, sizeof(optval));
                        break;
                default:
                        error = ENOPROTOOPT;
                        break;
                }
                break;
        }
        splx(s);
        return error;
}

#ifndef TCP_SENDSPACE
#define        TCP_SENDSPACE        1024*32
#endif
int        tcp_sendspace = TCP_SENDSPACE;
#ifndef TCP_RECVSPACE
#define        TCP_RECVSPACE        1024*32
#endif
int        tcp_recvspace = TCP_RECVSPACE;

/*
 * tcp_attach: attach TCP protocol to socket, allocating internet protocol
 * control block, TCP control block, buffer space and entering LISTEN state
 * if to accept connections.
 */
static int
tcp_attach(struct socket *so, int proto)
{
        struct tcpcb *tp;
        struct inpcb *inp;
#ifdef INET6
        struct in6pcb *in6p;
#endif
        int s, error, family;

        /* Assign the lock (must happen even if we will error out). */
        s = splsoftnet();
        sosetlock(so);
        KASSERT(solocked(so));

        family = so->so_proto->pr_domain->dom_family;
        switch (family) {
        case PF_INET:
                inp = sotoinpcb(so);
#ifdef INET6
                in6p = NULL;
#endif
                break;
#ifdef INET6
        case PF_INET6:
                inp = NULL;
                in6p = sotoin6pcb(so);
                break;
#endif
        default:
                error = EAFNOSUPPORT;
                goto out;
        }

        KASSERT(inp == NULL);
#ifdef INET6
        KASSERT(in6p == NULL);
#endif

#ifdef MBUFTRACE
        so->so_mowner = &tcp_sock_mowner;
        so->so_rcv.sb_mowner = &tcp_sock_rx_mowner;
        so->so_snd.sb_mowner = &tcp_sock_tx_mowner;
#endif
        if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
                error = soreserve(so, tcp_sendspace, tcp_recvspace);
                if (error)
                        goto out;
        }

        so->so_rcv.sb_flags |= SB_AUTOSIZE;
        so->so_snd.sb_flags |= SB_AUTOSIZE;

        switch (family) {
        case PF_INET:
                error = in_pcballoc(so, &tcbtable);
                if (error)
                        goto out;
                inp = sotoinpcb(so);
#ifdef INET6
                in6p = NULL;
#endif
                break;
#ifdef INET6
        case PF_INET6:
                error = in6_pcballoc(so, &tcbtable);
                if (error)
                        goto out;
                inp = NULL;
                in6p = sotoin6pcb(so);
                break;
#endif
        default:
                error = EAFNOSUPPORT;
                goto out;
        }
        if (inp)
                tp = tcp_newtcpcb(family, (void *)inp);
#ifdef INET6
        else if (in6p)
                tp = tcp_newtcpcb(family, (void *)in6p);
#endif
        else
                tp = NULL;

        if (tp == NULL) {
                int nofd = so->so_state & SS_NOFDREF;        /* XXX */

                so->so_state &= ~SS_NOFDREF;        /* don't free the socket yet */
                if (inp)
                        in_pcbdetach(inp);
#ifdef INET6
                if (in6p)
                        in6_pcbdetach(in6p);
#endif
                so->so_state |= nofd;
                error = ENOBUFS;
                goto out;
        }
        tp->t_state = TCPS_CLOSED;
        if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
                so->so_linger = TCP_LINGERTIME;
        }
out:
        KASSERT(solocked(so));
        splx(s);
        return error;
}

static void
tcp_detach(struct socket *so)
{
        struct inpcb *inp = NULL;
        struct in6pcb *in6p = NULL;
        struct tcpcb *tp = NULL;
        int s;

        if (tcp_getpcb(so, &inp, &in6p, &tp) != 0)
                return;

        s = splsoftnet();
        (void)tcp_disconnect1(tp);
        splx(s);
}

static int
tcp_accept(struct socket *so, struct sockaddr *nam)
{
        struct inpcb *inp = NULL;
        struct in6pcb *in6p = NULL;
        struct tcpcb *tp = NULL;
        int ostate = 0;
        int error = 0;
        int s;

        if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
                return error;

        ostate = tcp_debug_capture(tp, PRU_ACCEPT);

        /*
         * Accept a connection.  Essentially all the work is
         * done at higher levels; just return the address
         * of the peer, storing through addr.
         */
        s = splsoftnet();
        if (inp) {
                in_setpeeraddr(inp, (struct sockaddr_in *)nam);
        }
#ifdef INET6
        if (in6p) {
                in6_setpeeraddr(in6p, (struct sockaddr_in6 *)nam);
        }
#endif
        tcp_debug_trace(so, tp, ostate, PRU_ACCEPT);
        splx(s);

        return 0;
}

static int
tcp_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct inpcb *inp = NULL;
        struct in6pcb *in6p = NULL;
        struct sockaddr_in *sin = (struct sockaddr_in *)nam;
#ifdef INET6
        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
#endif /* INET6 */
        struct tcpcb *tp = NULL;
        int s;
        int error = 0;
        int ostate = 0;

        if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
                return error;

        ostate = tcp_debug_capture(tp, PRU_BIND);

        /*
         * Give the socket an address.
         */
        s = splsoftnet();
        switch (so->so_proto->pr_domain->dom_family) {
        case PF_INET:
                error = in_pcbbind(inp, sin, l);
                break;
#ifdef INET6
        case PF_INET6:
                error = in6_pcbbind(in6p, sin6, l);
                if (!error) {
                        /* mapped addr case */
                        if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_laddr))
                                tp->t_family = AF_INET;
                        else
                                tp->t_family = AF_INET6;
                }
                break;
#endif
        }
        tcp_debug_trace(so, tp, ostate, PRU_BIND);
        splx(s);

        return error;
}

static int
tcp_listen(struct socket *so, struct lwp *l)
{
        struct inpcb *inp = NULL;
        struct in6pcb *in6p = NULL;
        struct tcpcb *tp = NULL;
        int error = 0;
        int ostate = 0;
        int s;

        if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
                return error;

        ostate = tcp_debug_capture(tp, PRU_LISTEN);

        /*
         * Prepare to accept connections.
         */
        s = splsoftnet();
        if (inp && inp->inp_lport == 0) {
                error = in_pcbbind(inp, NULL, l);
                if (error)
                        goto release;
        }
#ifdef INET6
        if (in6p && in6p->in6p_lport == 0) {
                error = in6_pcbbind(in6p, NULL, l);
                if (error)
                        goto release;
        }
#endif
        tp->t_state = TCPS_LISTEN;

release:
        tcp_debug_trace(so, tp, ostate, PRU_LISTEN);
        splx(s);

        return error;
}

static int
tcp_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct inpcb *inp = NULL;
        struct in6pcb *in6p = NULL;
        struct tcpcb *tp = NULL;
        int s;
        int error = 0;
        int ostate = 0;

        if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
                return error;

        ostate = tcp_debug_capture(tp, PRU_CONNECT);

        /*
         * Initiate connection to peer.
         * Create a template for use in transmissions on this connection.
         * Enter SYN_SENT state, and mark socket as connecting.
         * Start keep-alive timer, and seed output sequence space.
         * Send initial segment on connection.
         */
        s = splsoftnet();

        if (inp) {
                if (inp->inp_lport == 0) {
                        error = in_pcbbind(inp, NULL, l);
                        if (error)
                                goto release;
                }
                error = in_pcbconnect(inp, (struct sockaddr_in *)nam, l);
        }
#ifdef INET6
        if (in6p) {
                if (in6p->in6p_lport == 0) {
                        error = in6_pcbbind(in6p, NULL, l);
                        if (error)
                                goto release;
                }
                error = in6_pcbconnect(in6p, (struct sockaddr_in6 *)nam, l);
                if (!error) {
                        /* mapped addr case */
                        if (IN6_IS_ADDR_V4MAPPED(&in6p->in6p_faddr))
                                tp->t_family = AF_INET;
                        else
                                tp->t_family = AF_INET6;
                }
        }
#endif
        if (error)
                goto release;
        tp->t_template = tcp_template(tp);
        if (tp->t_template == 0) {
                if (inp)
                        in_pcbdisconnect(inp);
#ifdef INET6
                if (in6p)
                        in6_pcbdisconnect(in6p);
#endif
                error = ENOBUFS;
                goto release;
        }
        /*
         * Compute window scaling to request.
         * XXX: This should be moved to tcp_output().
         */
        while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
            (TCP_MAXWIN << tp->request_r_scale) < sb_max)
                tp->request_r_scale++;
        soisconnecting(so);
        TCP_STATINC(TCP_STAT_CONNATTEMPT);
        tp->t_state = TCPS_SYN_SENT;
        TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit);
        tp->iss = tcp_new_iss(tp);
        tcp_sendseqinit(tp);
        error = tcp_output(tp);

release:
        tcp_debug_trace(so, tp, ostate, PRU_CONNECT);
        splx(s);

        return error;
}

static int
tcp_connect2(struct socket *so, struct socket *so2)
{
        struct inpcb *inp = NULL;
        struct in6pcb *in6p = NULL;
        struct tcpcb *tp = NULL;
        int error = 0;
        int ostate = 0;

        KASSERT(solocked(so));

        if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
                return error;

        ostate = tcp_debug_capture(tp, PRU_CONNECT2);

        tcp_debug_trace(so, tp, ostate, PRU_CONNECT2);

        return EOPNOTSUPP;
}

static int
tcp_disconnect(struct socket *so)
{
        struct inpcb *inp = NULL;
        struct in6pcb *in6p = NULL;
        struct tcpcb *tp = NULL;
        int error = 0;
        int ostate = 0;
        int s;

        if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
                return error;

        ostate = tcp_debug_capture(tp, PRU_DISCONNECT);

        /*
         * Initiate disconnect from peer.
         * If connection never passed embryonic stage, just drop;
         * else if don't need to let data drain, then can just drop anyways,
         * else have to begin TCP shutdown process: mark socket disconnecting,
         * drain unread data, state switch to reflect user close, and
         * send segment (e.g. FIN) to peer.  Socket will be really disconnected
         * when peer sends FIN and acks ours.
         *
         * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
         */
        s = splsoftnet();
        tp = tcp_disconnect1(tp);
        tcp_debug_trace(so, tp, ostate, PRU_DISCONNECT);
        splx(s);

        return error;
}

static int
tcp_shutdown(struct socket *so)
{
        struct inpcb *inp = NULL;
        struct in6pcb *in6p = NULL;
        struct tcpcb *tp = NULL;
        int error = 0;
        int ostate = 0;
        int s;

        if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
                return error;

        ostate = tcp_debug_capture(tp, PRU_SHUTDOWN);
        /*
         * Mark the connection as being incapable of further output.
         */
        s = splsoftnet();
        socantsendmore(so);
        tp = tcp_usrclosed(tp);
        if (tp)
                error = tcp_output(tp);
        tcp_debug_trace(so, tp, ostate, PRU_SHUTDOWN);
        splx(s);

        return error;
}

static int
tcp_abort(struct socket *so)
{
        struct inpcb *inp = NULL;
        struct in6pcb *in6p = NULL;
        struct tcpcb *tp = NULL;
        int error = 0;
        int ostate = 0;
        int s;

        if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
                return error;

        ostate = tcp_debug_capture(tp, PRU_ABORT);

        /*
         * Abort the TCP.
         */
        s = splsoftnet();
        tp = tcp_drop(tp, ECONNABORTED);
        tcp_debug_trace(so, tp, ostate, PRU_ABORT);
        splx(s);

        return error;
}

static int
tcp_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
{
        switch (so->so_proto->pr_domain->dom_family) {
        case PF_INET:
                return in_control(so, cmd, nam, ifp);
#ifdef INET6
        case PF_INET6:
                return in6_control(so, cmd, nam, ifp);
#endif
        default:
                return EAFNOSUPPORT;
        }
}

static int
tcp_stat(struct socket *so, struct stat *ub)
{
        KASSERT(solocked(so));

        /* stat: don't bother with a blocksize.  */
        return 0;
}

static int
tcp_peeraddr(struct socket *so, struct sockaddr *nam)
{
        struct inpcb *inp = NULL;
        struct in6pcb *in6p = NULL;
        struct tcpcb *tp = NULL;
        int ostate = 0;
        int error = 0;
        int s;

        if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
                return error;

        ostate = tcp_debug_capture(tp, PRU_PEERADDR);

        s = splsoftnet();
        if (inp) {
                in_setpeeraddr(inp, (struct sockaddr_in *)nam);
        }
#ifdef INET6
        if (in6p) {
                in6_setpeeraddr(in6p, (struct sockaddr_in6 *)nam);
        }
#endif
        tcp_debug_trace(so, tp, ostate, PRU_PEERADDR);
        splx(s);

        return 0;
}

static int
tcp_sockaddr(struct socket *so, struct sockaddr *nam)
{
        struct inpcb *inp = NULL;
        struct in6pcb *in6p = NULL;
        struct tcpcb *tp = NULL;
        int ostate = 0;
        int error = 0;
        int s;

        if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
                return error;

        ostate = tcp_debug_capture(tp, PRU_SOCKADDR);

        s = splsoftnet();
        if (inp) {
                in_setsockaddr(inp, (struct sockaddr_in *)nam);
        }
#ifdef INET6
        if (in6p) {
                in6_setsockaddr(in6p, (struct sockaddr_in6 *)nam);
        }
#endif
        tcp_debug_trace(so, tp, ostate, PRU_SOCKADDR);
        splx(s);

        return 0;
}

static int
tcp_rcvd(struct socket *so, int flags, struct lwp *l)
{
        struct inpcb *inp = NULL;
        struct in6pcb *in6p = NULL;
        struct tcpcb *tp = NULL;
        int ostate = 0;
        int error = 0;
        int s;

        if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
                return error;

        ostate = tcp_debug_capture(tp, PRU_RCVD);

        /*
         * After a receive, possibly send window update to peer.
         *
         * soreceive() calls this function when a user receives
         * ancillary data on a listening socket. We don't call
         * tcp_output in such a case, since there is no header
         * template for a listening socket and hence the kernel
         * will panic.
         */
        s = splsoftnet();
        if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0)
                (void) tcp_output(tp);
        splx(s);

        tcp_debug_trace(so, tp, ostate, PRU_RCVD);

        return 0;
}

static int
tcp_recvoob(struct socket *so, struct mbuf *m, int flags)
{
        struct inpcb *inp = NULL;
        struct in6pcb *in6p = NULL;
        struct tcpcb *tp = NULL;
        int ostate = 0;
        int error = 0;
        int s;

        if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
                return error;

        ostate = tcp_debug_capture(tp, PRU_RCVOOB);

        s = splsoftnet();
        if ((so->so_oobmark == 0 &&
            (so->so_state & SS_RCVATMARK) == 0) ||
            so->so_options & SO_OOBINLINE ||
            tp->t_oobflags & TCPOOB_HADDATA) {
                splx(s);
                return EINVAL;
        }

        if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
                splx(s);
                return EWOULDBLOCK;
        }

        m->m_len = 1;
        *mtod(m, char *) = tp->t_iobc;
        if ((flags & MSG_PEEK) == 0) {
                tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
                so->so_state &= ~SS_POLLRDBAND;
        }

        tcp_debug_trace(so, tp, ostate, PRU_RCVOOB);
        splx(s);

        return 0;
}

static int
tcp_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
    struct mbuf *control, struct lwp *l)
{
        struct inpcb *inp = NULL;
        struct in6pcb *in6p = NULL;
        struct tcpcb *tp = NULL;
        int ostate = 0;
        int error = 0;
        int s;

        if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0)
                return error;

        ostate = tcp_debug_capture(tp, PRU_SEND);

        /*
         * Do a send by putting data in output queue and updating urgent
         * marker if URG set.  Possibly send more data.
         */
        s = splsoftnet();
        if (control && control->m_len) {
                m_freem(control);
                m_freem(m);
                tcp_debug_trace(so, tp, ostate, PRU_SEND);
                splx(s);
                return EINVAL;
        }

        sbappendstream(&so->so_snd, m);
        error = tcp_output(tp);
        tcp_debug_trace(so, tp, ostate, PRU_SEND);
        splx(s);

        return error;
}

static int
tcp_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
{
        struct inpcb *inp = NULL;
        struct in6pcb *in6p = NULL;
        struct tcpcb *tp = NULL;
        int ostate = 0;
        int error = 0;
        int s;

        if ((error = tcp_getpcb(so, &inp, &in6p, &tp)) != 0) {
                m_freem(m);
                m_freem(control);
                return error;
        }
        if (tp->t_template == NULL) {
                /*
                 * XXX FreeBSD appears to open the connection
                 * automagically in this case, but the socket address
                 * isn't passed through here so we can't do that.
                 */
                m_freem(m);
                m_freem(control);
                return ENOTCONN;
        }

        ostate = tcp_debug_capture(tp, PRU_SENDOOB);

        s = splsoftnet();
        if (sbspace_oob(&so->so_snd) == 0) {
                m_freem(m);
                m_freem(control);
                splx(s);
                return ENOBUFS;
        }
        /*
         * According to RFC961 (Assigned Protocols),
         * the urgent pointer points to the last octet
         * of urgent data.  We continue, however,
         * to consider it to indicate the first octet
         * of data past the urgent section.
         * Otherwise, snd_up should be one lower.
         */
        sbappendstream(&so->so_snd, m);
        tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
        tp->t_force = 1;
        error = tcp_output(tp);
        tp->t_force = 0;
        tcp_debug_trace(so, tp, ostate, PRU_SENDOOB);
        splx(s);
        m_freem(control);

        return error;
}

static int
tcp_purgeif(struct socket *so, struct ifnet *ifp)
{
        int s;
        int error = 0;

        s = splsoftnet();

        mutex_enter(softnet_lock);
        switch (so->so_proto->pr_domain->dom_family) {
        case PF_INET:
                in_pcbpurgeif0(&tcbtable, ifp);
#ifdef NET_MPSAFE
                mutex_exit(softnet_lock);
#endif
                in_purgeif(ifp);
#ifdef NET_MPSAFE
                mutex_enter(softnet_lock);
#endif
                in_pcbpurgeif(&tcbtable, ifp);
                break;
#ifdef INET6
        case PF_INET6:
                in6_pcbpurgeif0(&tcbtable, ifp);
#ifdef NET_MPSAFE
                mutex_exit(softnet_lock);
#endif
                in6_purgeif(ifp);
#ifdef NET_MPSAFE
                mutex_enter(softnet_lock);
#endif
                in6_pcbpurgeif(&tcbtable, ifp);
                break;
#endif
        default:
                error = EAFNOSUPPORT;
                break;
        }
        mutex_exit(softnet_lock);
        splx(s);

        return error;
}

/*
 * Initiate (or continue) disconnect.
 * If embryonic state, just send reset (once).
 * If in ``let data drain'' option and linger null, just drop.
 * Otherwise (hard), mark socket disconnecting and drop
 * current input data; switch states based on user close, and
 * send segment to peer (with FIN).
 */
struct tcpcb *
tcp_disconnect1(struct tcpcb *tp)
{
        struct socket *so;

        if (tp->t_inpcb)
                so = tp->t_inpcb->inp_socket;
#ifdef INET6
        else if (tp->t_in6pcb)
                so = tp->t_in6pcb->in6p_socket;
#endif
        else
                so = NULL;

        if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
                tp = tcp_close(tp);
        else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
                tp = tcp_drop(tp, 0);
        else {
                soisdisconnecting(so);
                sbflush(&so->so_rcv);
                tp = tcp_usrclosed(tp);
                if (tp)
                        (void) tcp_output(tp);
        }
        return tp;
}

/*
 * User issued close, and wish to trail through shutdown states:
 * if never received SYN, just forget it.  If got a SYN from peer,
 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
 * If already got a FIN from peer, then almost done; go to LAST_ACK
 * state.  In all other cases, have already sent FIN to peer (e.g.
 * after PRU_SHUTDOWN), and just have to play tedious game waiting
 * for peer to send FIN or not respond to keep-alives, etc.
 * We can let the user exit from the close as soon as the FIN is acked.
 */
struct tcpcb *
tcp_usrclosed(struct tcpcb *tp)
{

        switch (tp->t_state) {

        case TCPS_CLOSED:
        case TCPS_LISTEN:
        case TCPS_SYN_SENT:
                tp->t_state = TCPS_CLOSED;
                tp = tcp_close(tp);
                break;

        case TCPS_SYN_RECEIVED:
        case TCPS_ESTABLISHED:
                tp->t_state = TCPS_FIN_WAIT_1;
                break;

        case TCPS_CLOSE_WAIT:
                tp->t_state = TCPS_LAST_ACK;
                break;
        }
        if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
                struct socket *so;
                if (tp->t_inpcb)
                        so = tp->t_inpcb->inp_socket;
#ifdef INET6
                else if (tp->t_in6pcb)
                        so = tp->t_in6pcb->in6p_socket;
#endif
                else
                        so = NULL;
                if (so)
                        soisdisconnected(so);
                /*
                 * If we are in FIN_WAIT_2, we arrived here because the
                 * application did a shutdown of the send side.  Like the
                 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
                 * a full close, we start a timer to make sure sockets are
                 * not left in FIN_WAIT_2 forever.
                 */
                if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0))
                        TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle);
                else if (tp->t_state == TCPS_TIME_WAIT
                         && ((tp->t_inpcb
                              && (tcp4_vtw_enable & 1)
                              && vtw_add(AF_INET, tp))
                             ||
                             (tp->t_in6pcb
                              && (tcp6_vtw_enable & 1)
                              && vtw_add(AF_INET6, tp)))) {
                        tp = 0;
                }
        }
        return tp;
}

/*
 * sysctl helper routine for net.inet.ip.mssdflt.  it can't be less
 * than 32.
 */
static int
sysctl_net_inet_tcp_mssdflt(SYSCTLFN_ARGS)
{
        int error, mssdflt;
        struct sysctlnode node;

        mssdflt = tcp_mssdflt;
        node = *rnode;
        node.sysctl_data = &mssdflt;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        if (mssdflt < 32)
                return EINVAL;
        tcp_mssdflt = mssdflt;

        mutex_enter(softnet_lock);
        tcp_tcpcb_template();
        mutex_exit(softnet_lock);

        return 0;
}

/*
 * sysctl helper for TCP CB template update
 */
static int
sysctl_update_tcpcb_template(SYSCTLFN_ARGS)
{
        int t, error;
        struct sysctlnode node;

        /* follow procedures in sysctl(9) manpage */
        t = *(int *)rnode->sysctl_data;
        node = *rnode;
        node.sysctl_data = &t;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        if (t < 0)
                return EINVAL;

        *(int *)rnode->sysctl_data = t;

        mutex_enter(softnet_lock);
        tcp_tcpcb_template();
        mutex_exit(softnet_lock);

        return 0;
}

/*
 * sysctl helper routine for setting port related values under
 * net.inet.ip and net.inet6.ip6.  does basic range checking and does
 * additional checks for each type.  this code has placed in
 * tcp_input.c since INET and INET6 both use the same tcp code.
 *
 * this helper is not static so that both inet and inet6 can use it.
 */
int
sysctl_net_inet_ip_ports(SYSCTLFN_ARGS)
{
        int error, tmp;
        int apmin, apmax;
#ifndef IPNOPRIVPORTS
        int lpmin, lpmax;
#endif /* IPNOPRIVPORTS */
        struct sysctlnode node;

        if (namelen != 0)
                return EINVAL;

        switch (name[-3]) {
            case PF_INET:
                apmin = anonportmin;
                apmax = anonportmax;
#ifndef IPNOPRIVPORTS
                lpmin = lowportmin;
                lpmax = lowportmax;
#endif /* IPNOPRIVPORTS */
                break;
#ifdef INET6
            case PF_INET6:
                apmin = ip6_anonportmin;
                apmax = ip6_anonportmax;
#ifndef IPNOPRIVPORTS
                lpmin = ip6_lowportmin;
                lpmax = ip6_lowportmax;
#endif /* IPNOPRIVPORTS */
                break;
#endif /* INET6 */
            default:
                return EINVAL;
        }

        /*
         * insert temporary copy into node, perform lookup on
         * temporary, then restore pointer
         */
        node = *rnode;
        tmp = *(int*)rnode->sysctl_data;
        node.sysctl_data = &tmp;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        /*
         * simple port range check
         */
        if (tmp < 0 || tmp > 65535)
                return EINVAL;

        /*
         * per-node range checks
         */
        switch (rnode->sysctl_num) {
        case IPCTL_ANONPORTMIN:
        case IPV6CTL_ANONPORTMIN:
                if (tmp >= apmax)
                        return EINVAL;
#ifndef IPNOPRIVPORTS
                if (tmp < IPPORT_RESERVED)
                        return EINVAL;
#endif /* IPNOPRIVPORTS */
                break;

        case IPCTL_ANONPORTMAX:
        case IPV6CTL_ANONPORTMAX:
                if (apmin >= tmp)
                        return EINVAL;
#ifndef IPNOPRIVPORTS
                if (tmp < IPPORT_RESERVED)
                        return EINVAL;
#endif /* IPNOPRIVPORTS */
                break;

#ifndef IPNOPRIVPORTS
        case IPCTL_LOWPORTMIN:
        case IPV6CTL_LOWPORTMIN:
                if (tmp >= lpmax ||
                    tmp > IPPORT_RESERVEDMAX ||
                    tmp < IPPORT_RESERVEDMIN)
                        return EINVAL;
                break;

        case IPCTL_LOWPORTMAX:
        case IPV6CTL_LOWPORTMAX:
                if (lpmin >= tmp ||
                    tmp > IPPORT_RESERVEDMAX ||
                    tmp < IPPORT_RESERVEDMIN)
                        return EINVAL;
                break;
#endif /* IPNOPRIVPORTS */

        default:
                return EINVAL;
        }

        *(int*)rnode->sysctl_data = tmp;

        return 0;
}

static inline int
copyout_uid(struct socket *sockp, void *oldp, size_t *oldlenp)
{
        if (oldp) {
                size_t sz;
                uid_t uid;
                int error;

                if (sockp->so_cred == NULL)
                        return EPERM;

                uid = kauth_cred_geteuid(sockp->so_cred);
                sz = MIN(sizeof(uid), *oldlenp);
                if ((error = copyout(&uid, oldp, sz)) != 0)
                        return error;
        }
        *oldlenp = sizeof(uid_t);
        return 0;
}

static inline int
inet4_ident_core(struct in_addr raddr, u_int rport,
    struct in_addr laddr, u_int lport,
    void *oldp, size_t *oldlenp,
    struct lwp *l, int dodrop)
{
        struct inpcb *inp;
        struct socket *sockp;

        inp = in_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport, 0);

        if (inp == NULL || (sockp = inp->inp_socket) == NULL)
                return ESRCH;

        if (dodrop) {
                struct tcpcb *tp;
                int error;

                if (inp == NULL || (tp = intotcpcb(inp)) == NULL ||
                    (inp->inp_socket->so_options & SO_ACCEPTCONN) != 0)
                        return ESRCH;

                error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET,
                    KAUTH_REQ_NETWORK_SOCKET_DROP, inp->inp_socket, tp, NULL);
                if (error)
                        return error;

                (void)tcp_drop(tp, ECONNABORTED);
                return 0;
        }

        return copyout_uid(sockp, oldp, oldlenp);
}

#ifdef INET6
static inline int
inet6_ident_core(struct in6_addr *raddr, u_int rport,
    struct in6_addr *laddr, u_int lport,
    void *oldp, size_t *oldlenp,
    struct lwp *l, int dodrop)
{
        struct in6pcb *in6p;
        struct socket *sockp;

        in6p = in6_pcblookup_connect(&tcbtable, raddr, rport, laddr, lport, 0, 0);

        if (in6p == NULL || (sockp = in6p->in6p_socket) == NULL)
                return ESRCH;

        if (dodrop) {
                struct tcpcb *tp;
                int error;

                if (in6p == NULL || (tp = in6totcpcb(in6p)) == NULL ||
                    (in6p->in6p_socket->so_options & SO_ACCEPTCONN) != 0)
                        return ESRCH;

                error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET,
                    KAUTH_REQ_NETWORK_SOCKET_DROP, in6p->in6p_socket, tp, NULL);
                if (error)
                        return error;

                (void)tcp_drop(tp, ECONNABORTED);
                return 0;
        }

        return copyout_uid(sockp, oldp, oldlenp);
}
#endif

/*
 * sysctl helper routine for the net.inet.tcp.drop and
 * net.inet6.tcp6.drop nodes.
 */
#define sysctl_net_inet_tcp_drop sysctl_net_inet_tcp_ident

/*
 * sysctl helper routine for the net.inet.tcp.ident and
 * net.inet6.tcp6.ident nodes.  contains backwards compat code for the
 * old way of looking up the ident information for ipv4 which involves
 * stuffing the port/addr pairs into the mib lookup.
 */
static int
sysctl_net_inet_tcp_ident(SYSCTLFN_ARGS)
{
        struct sockaddr_in *si4[2];
#ifdef INET6
        struct sockaddr_in6 *si6[2];
#endif
        struct sockaddr_storage sa[2];
        int error, pf, dodrop;

        dodrop = name[-1] == TCPCTL_DROP;
        if (dodrop) {
                if (oldp != NULL || *oldlenp != 0)
                        return EINVAL;
                if (newp == NULL)
                        return EPERM;
                if (newlen < sizeof(sa))
                        return ENOMEM;
        }
        if (namelen != 4 && namelen != 0)
                return EINVAL;
        if (name[-2] != IPPROTO_TCP)
                return EINVAL;
        pf = name[-3];

        /* old style lookup, ipv4 only */
        if (namelen == 4) {
                struct in_addr laddr, raddr;
                u_int lport, rport;

                if (pf != PF_INET)
                        return EPROTONOSUPPORT;
                raddr.s_addr = (uint32_t)name[0];
                rport = (u_int)name[1];
                laddr.s_addr = (uint32_t)name[2];
                lport = (u_int)name[3];

                mutex_enter(softnet_lock);
                error = inet4_ident_core(raddr, rport, laddr, lport,
                    oldp, oldlenp, l, dodrop);
                mutex_exit(softnet_lock);
                return error;
        }

        if (newp == NULL || newlen != sizeof(sa))
                return EINVAL;
        error = copyin(newp, &sa, newlen);
        if (error)
                return error;

        /*
         * requested families must match
         */
        if (pf != sa[0].ss_family || sa[0].ss_family != sa[1].ss_family)
                return EINVAL;

        switch (pf) {
#ifdef INET6
        case PF_INET6:
                si6[0] = (struct sockaddr_in6*)&sa[0];
                si6[1] = (struct sockaddr_in6*)&sa[1];
                if (si6[0]->sin6_len != sizeof(*si6[0]) ||
                    si6[1]->sin6_len != sizeof(*si6[1]))
                        return EINVAL;

                if (!IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) &&
                    !IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr)) {
                        error = sa6_embedscope(si6[0], ip6_use_defzone);
                        if (error)
                                return error;
                        error = sa6_embedscope(si6[1], ip6_use_defzone);
                        if (error)
                                return error;

                        mutex_enter(softnet_lock);
                        error = inet6_ident_core(&si6[0]->sin6_addr,
                            si6[0]->sin6_port, &si6[1]->sin6_addr,
                            si6[1]->sin6_port, oldp, oldlenp, l, dodrop);
                        mutex_exit(softnet_lock);
                        return error;
                }

                if (IN6_IS_ADDR_V4MAPPED(&si6[0]->sin6_addr) !=
                    IN6_IS_ADDR_V4MAPPED(&si6[1]->sin6_addr))
                        return EINVAL;

                in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[0]);
                in6_sin6_2_sin_in_sock((struct sockaddr *)&sa[1]);
#endif /* INET6 */
                /*FALLTHROUGH*/
        case PF_INET:
                si4[0] = (struct sockaddr_in*)&sa[0];
                si4[1] = (struct sockaddr_in*)&sa[1];
                if (si4[0]->sin_len != sizeof(*si4[0]) ||
                    si4[0]->sin_len != sizeof(*si4[1]))
                        return EINVAL;

                mutex_enter(softnet_lock);
                error = inet4_ident_core(si4[0]->sin_addr, si4[0]->sin_port,
                    si4[1]->sin_addr, si4[1]->sin_port,
                    oldp, oldlenp, l, dodrop);
                mutex_exit(softnet_lock);
                return error;
        default:
                return EPROTONOSUPPORT;
        }
}

/*
 * sysctl helper for the inet and inet6 pcblists.  handles tcp/udp and
 * inet/inet6, as well as raw pcbs for each.  specifically not
 * declared static so that raw sockets and udp/udp6 can use it as
 * well.
 */
int
sysctl_inpcblist(SYSCTLFN_ARGS)
{
        const bool allowaddr = get_expose_address(curproc);
        struct sockaddr_in *in;
        const struct inpcb *inp;
#ifdef INET6
        struct sockaddr_in6 *in6;
        const struct in6pcb *in6p;
#endif
        struct inpcbtable *pcbtbl = __UNCONST(rnode->sysctl_data);
        const struct inpcb_hdr *inph;
        struct tcpcb *tp;
        struct kinfo_pcb pcb;
        char *dp;
        size_t len, needed, elem_size, out_size;
        int error, elem_count, pf, proto, pf2;

        if (namelen != 4)
                return EINVAL;

        if (oldp != NULL) {
                    len = *oldlenp;
                    elem_size = name[2];
                    elem_count = name[3];
                    if (elem_size != sizeof(pcb))
                            return EINVAL;
        } else {
                    len = 0;
                    elem_count = INT_MAX;
                    elem_size = sizeof(pcb);
        }
        error = 0;
        dp = oldp;
        out_size = elem_size;
        needed = 0;

        if (namelen == 1 && name[0] == CTL_QUERY)
                return (sysctl_query(SYSCTLFN_CALL(rnode)));

        if (name - oname != 4)
                return EINVAL;

        pf = oname[1];
        proto = oname[2];
        pf2 = (oldp != NULL) ? pf : 0;

        mutex_enter(softnet_lock);

        TAILQ_FOREACH(inph, &pcbtbl->inpt_queue, inph_queue) {
                inp = (const struct inpcb *)inph;
#ifdef INET6
                in6p = (const struct in6pcb *)inph;
#endif

                if (inph->inph_af != pf)
                        continue;

                if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET,
                    KAUTH_REQ_NETWORK_SOCKET_CANSEE, inph->inph_socket, NULL,
                    NULL) != 0)
                        continue;

                memset(&pcb, 0, sizeof(pcb));

                pcb.ki_family = pf;
                pcb.ki_type = proto;

                switch (pf2) {
                case 0:
                        /* just probing for size */
                        break;
                case PF_INET:
                        pcb.ki_family = inp->inp_socket->so_proto->
                            pr_domain->dom_family;
                        pcb.ki_type = inp->inp_socket->so_proto->
                            pr_type;
                        pcb.ki_protocol = inp->inp_socket->so_proto->
                            pr_protocol;
                        pcb.ki_pflags = inp->inp_flags;

                        pcb.ki_sostate = inp->inp_socket->so_state;
                        pcb.ki_prstate = inp->inp_state;
                        if (proto == IPPROTO_TCP) {
                                tp = intotcpcb(inp);
                                pcb.ki_tstate = tp->t_state;
                                pcb.ki_tflags = tp->t_flags;
                        }

                        COND_SET_VALUE(pcb.ki_pcbaddr,
                            PTRTOUINT64(inp), allowaddr);
                        COND_SET_VALUE(pcb.ki_ppcbaddr,
                            PTRTOUINT64(inp->inp_ppcb), allowaddr);
                        COND_SET_VALUE(pcb.ki_sockaddr,
                            PTRTOUINT64(inp->inp_socket), allowaddr);

                        pcb.ki_rcvq = inp->inp_socket->so_rcv.sb_cc;
                        pcb.ki_sndq = inp->inp_socket->so_snd.sb_cc;

                        in = satosin(&pcb.ki_src);
                        in->sin_len = sizeof(*in);
                        in->sin_family = pf;
                        in->sin_port = inp->inp_lport;
                        in->sin_addr = inp->inp_laddr;
                        if (pcb.ki_prstate >= INP_CONNECTED) {
                                in = satosin(&pcb.ki_dst);
                                in->sin_len = sizeof(*in);
                                in->sin_family = pf;
                                in->sin_port = inp->inp_fport;
                                in->sin_addr = inp->inp_faddr;
                        }
                        break;
#ifdef INET6
                case PF_INET6:
                        pcb.ki_family = in6p->in6p_socket->so_proto->
                            pr_domain->dom_family;
                        pcb.ki_type = in6p->in6p_socket->so_proto->pr_type;
                        pcb.ki_protocol = in6p->in6p_socket->so_proto->
                            pr_protocol;
                        pcb.ki_pflags = in6p->in6p_flags;

                        pcb.ki_sostate = in6p->in6p_socket->so_state;
                        pcb.ki_prstate = in6p->in6p_state;
                        if (proto == IPPROTO_TCP) {
                                tp = in6totcpcb(in6p);
                                pcb.ki_tstate = tp->t_state;
                                pcb.ki_tflags = tp->t_flags;
                        }

                        COND_SET_VALUE(pcb.ki_pcbaddr,
                            PTRTOUINT64(in6p), allowaddr);
                        COND_SET_VALUE(pcb.ki_ppcbaddr,
                            PTRTOUINT64(in6p->in6p_ppcb), allowaddr);
                        COND_SET_VALUE(pcb.ki_sockaddr,
                            PTRTOUINT64(in6p->in6p_socket), allowaddr);

                        pcb.ki_rcvq = in6p->in6p_socket->so_rcv.sb_cc;
                        pcb.ki_sndq = in6p->in6p_socket->so_snd.sb_cc;

                        in6 = satosin6(&pcb.ki_src);
                        in6->sin6_len = sizeof(*in6);
                        in6->sin6_family = pf;
                        in6->sin6_port = in6p->in6p_lport;
                        in6->sin6_flowinfo = in6p->in6p_flowinfo;
                        in6->sin6_addr = in6p->in6p_laddr;
                        in6->sin6_scope_id = 0; /* XXX? */

                        if (pcb.ki_prstate >= IN6P_CONNECTED) {
                                in6 = satosin6(&pcb.ki_dst);
                                in6->sin6_len = sizeof(*in6);
                                in6->sin6_family = pf;
                                in6->sin6_port = in6p->in6p_fport;
                                in6->sin6_flowinfo = in6p->in6p_flowinfo;
                                in6->sin6_addr = in6p->in6p_faddr;
                                in6->sin6_scope_id = 0; /* XXX? */
                        }
                        break;
#endif
                }

                if (len >= elem_size && elem_count > 0) {
                        error = copyout(&pcb, dp, out_size);
                        if (error) {
                                mutex_exit(softnet_lock);
                                return error;
                        }
                        dp += elem_size;
                        len -= elem_size;
                }
                needed += elem_size;
                if (elem_count > 0 && elem_count != INT_MAX)
                        elem_count--;
        }

        *oldlenp = needed;
        if (oldp == NULL)
                *oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb);

        mutex_exit(softnet_lock);

        return error;
}

static int
sysctl_tcp_congctl(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int error;
        char newname[TCPCC_MAXLEN];

        strlcpy(newname, tcp_congctl_global_name, sizeof(newname) - 1);

        node = *rnode;
        node.sysctl_data = newname;
        node.sysctl_size = sizeof(newname);

        error = sysctl_lookup(SYSCTLFN_CALL(&node));

        if (error ||
            newp == NULL ||
            strncmp(newname, tcp_congctl_global_name, sizeof(newname)) == 0)
                return error;

        mutex_enter(softnet_lock);
        error = tcp_congctl_select(NULL, newname);
        mutex_exit(softnet_lock);

        return error;
}

static int
sysctl_tcp_init_win(SYSCTLFN_ARGS)
{
        int error;
        u_int iw;
        struct sysctlnode node;

        iw = *(u_int *)rnode->sysctl_data;
        node = *rnode;
        node.sysctl_data = &iw;
        node.sysctl_size = sizeof(iw);
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        if (iw >= __arraycount(tcp_init_win_max))
                return EINVAL;
        *(u_int *)rnode->sysctl_data = iw;
        return 0;
}

static int
sysctl_tcp_keep(SYSCTLFN_ARGS)
{
        int error;
        u_int tmp;
        struct sysctlnode node;

        node = *rnode;
        tmp = *(u_int *)rnode->sysctl_data;
        node.sysctl_data = &tmp;

        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        if (!(tmp > 0 && tmp <= TCP_TIMER_MAXTICKS))
                return EINVAL;

        mutex_enter(softnet_lock);

        *(u_int *)rnode->sysctl_data = tmp;
        tcp_tcpcb_template();        /* update the template */

        mutex_exit(softnet_lock);
        return 0;
}

static int
sysctl_net_inet_tcp_stats(SYSCTLFN_ARGS)
{

        return (NETSTAT_SYSCTL(tcpstat_percpu, TCP_NSTATS));
}

/*
 * this (second stage) setup routine is a replacement for tcp_sysctl()
 * (which is currently used for ipv4 and ipv6)
 */
static void
sysctl_net_inet_tcp_setup2(struct sysctllog **clog, int pf, const char *pfname,
                           const char *tcpname)
{
        const struct sysctlnode *sack_node;
        const struct sysctlnode *abc_node;
        const struct sysctlnode *ecn_node;
        const struct sysctlnode *congctl_node;
        const struct sysctlnode *mslt_node;
        const struct sysctlnode *vtw_node;
#ifdef TCP_DEBUG
        extern struct tcp_debug tcp_debug[TCP_NDEBUG];
        extern int tcp_debx;
#endif

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, pfname, NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, pf, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, tcpname,
                       SYSCTL_DESCR("TCP related settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, pf, IPPROTO_TCP, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "rfc1323",
                       SYSCTL_DESCR("Enable RFC1323 TCP extensions"),
                       sysctl_update_tcpcb_template, 0, &tcp_do_rfc1323, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_RFC1323, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "sendspace",
                       SYSCTL_DESCR("Default TCP send buffer size"),
                       NULL, 0, &tcp_sendspace, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_SENDSPACE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "recvspace",
                       SYSCTL_DESCR("Default TCP receive buffer size"),
                       NULL, 0, &tcp_recvspace, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_RECVSPACE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "mssdflt",
                       SYSCTL_DESCR("Default maximum segment size"),
                       sysctl_net_inet_tcp_mssdflt, 0, &tcp_mssdflt, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSSDFLT, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "minmss",
                       SYSCTL_DESCR("Lower limit for TCP maximum segment size"),
                       NULL, 0, &tcp_minmss, 0,
                       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "msl",
                       SYSCTL_DESCR("Maximum Segment Life"),
                       NULL, 0, &tcp_msl, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSL, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "syn_cache_limit",
                       SYSCTL_DESCR("Maximum number of entries in the TCP "
                                    "compressed state engine"),
                       NULL, 0, &tcp_syn_cache_limit, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_LIMIT,
                       CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "syn_bucket_limit",
                       SYSCTL_DESCR("Maximum number of entries per hash "
                                    "bucket in the TCP compressed state "
                                    "engine"),
                       NULL, 0, &tcp_syn_bucket_limit, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_BUCKET_LIMIT,
                       CTL_EOL);
#if 0 /* obsoleted */
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "syn_cache_interval",
                       SYSCTL_DESCR("TCP compressed state engine's timer interval"),
                       NULL, 0, &tcp_syn_cache_interval, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_SYN_CACHE_INTER,
                       CTL_EOL);
#endif
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "init_win",
                       SYSCTL_DESCR("Initial TCP congestion window"),
                       sysctl_tcp_init_win, 0, &tcp_init_win, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "mss_ifmtu",
                       SYSCTL_DESCR("Use interface MTU for calculating MSS"),
                       NULL, 0, &tcp_mss_ifmtu, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_MSS_IFMTU, CTL_EOL);
        sysctl_createv(clog, 0, NULL, &sack_node,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "sack",
                       SYSCTL_DESCR("RFC2018 Selective ACKnowledgement tunables"),
                       NULL, 0, NULL, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_EOL);

        /* Congctl subtree */
        sysctl_createv(clog, 0, NULL, &congctl_node,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "congctl",
                       SYSCTL_DESCR("TCP Congestion Control"),
                           NULL, 0, NULL, 0,
                       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &congctl_node, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "available",
                       SYSCTL_DESCR("Available Congestion Control Mechanisms"),
                       NULL, 0, tcp_congctl_avail, 0, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &congctl_node, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_STRING, "selected",
                       SYSCTL_DESCR("Selected Congestion Control Mechanism"),
                       sysctl_tcp_congctl, 0, NULL, TCPCC_MAXLEN,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "win_scale",
                       SYSCTL_DESCR("Use RFC1323 window scale options"),
                       sysctl_update_tcpcb_template, 0, &tcp_do_win_scale, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_WSCALE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "timestamps",
                       SYSCTL_DESCR("Use RFC1323 time stamp options"),
                       sysctl_update_tcpcb_template, 0, &tcp_do_timestamps, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_TSTAMP, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "cwm",
                       SYSCTL_DESCR("Hughes/Touch/Heidemann Congestion Window "
                                    "Monitoring"),
                       NULL, 0, &tcp_cwm, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "cwm_burstsize",
                       SYSCTL_DESCR("Congestion Window Monitoring allowed "
                                    "burst count in packets"),
                       NULL, 0, &tcp_cwm_burstsize, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_CWM_BURSTSIZE,
                       CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "ack_on_push",
                       SYSCTL_DESCR("Immediately return ACK when PSH is "
                                    "received"),
                       NULL, 0, &tcp_ack_on_push, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_ACK_ON_PUSH, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "keepidle",
                       SYSCTL_DESCR("Allowed connection idle ticks before a "
                                    "keepalive probe is sent"),
                       sysctl_tcp_keep, 0, &tcp_keepidle, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPIDLE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "keepintvl",
                       SYSCTL_DESCR("Ticks before next keepalive probe is sent"),
                       sysctl_tcp_keep, 0, &tcp_keepintvl, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPINTVL, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "keepcnt",
                       SYSCTL_DESCR("Number of keepalive probes to send"),
                       sysctl_tcp_keep, 0, &tcp_keepcnt, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "slowhz",
                       SYSCTL_DESCR("Keepalive ticks per second"),
                       NULL, PR_SLOWHZ, NULL, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_SLOWHZ, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "log_refused",
                       SYSCTL_DESCR("Log refused TCP connections"),
                       NULL, 0, &tcp_log_refused, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOG_REFUSED, CTL_EOL);
#if 0 /* obsoleted */
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "rstratelimit", NULL,
                       NULL, 0, &tcp_rst_ratelim, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTRATELIMIT, CTL_EOL);
#endif
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "rstppslimit",
                       SYSCTL_DESCR("Maximum number of RST packets to send "
                                    "per second"),
                       NULL, 0, &tcp_rst_ppslim, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_RSTPPSLIMIT, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "delack_ticks",
                       SYSCTL_DESCR("Number of ticks to delay sending an ACK"),
                       NULL, 0, &tcp_delack_ticks, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_DELACK_TICKS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "init_win_local",
                       SYSCTL_DESCR("Initial TCP window size (in segments)"),
                       sysctl_tcp_init_win, 0, &tcp_init_win_local, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_INIT_WIN_LOCAL,
                       CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_STRUCT, "ident",
                       SYSCTL_DESCR("RFC1413 Identification Protocol lookups"),
                       sysctl_net_inet_tcp_ident, 0, NULL, sizeof(uid_t),
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_IDENT, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "do_loopback_cksum",
                       SYSCTL_DESCR("Perform TCP checksum on loopback"),
                       NULL, 0, &tcp_do_loopback_cksum, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_LOOPBACKCKSUM,
                       CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "pcblist",
                       SYSCTL_DESCR("TCP protocol control block list"),
                       sysctl_inpcblist, 0, &tcbtable, 0,
                       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE,
                       CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "keepinit",
                       SYSCTL_DESCR("Ticks before initial tcp connection times out"),
                       sysctl_tcp_keep, 0, &tcp_keepinit, 0,
                       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);

        /* TCP socket buffers auto-sizing nodes */
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "recvbuf_auto",
                       SYSCTL_DESCR("Enable automatic receive "
                           "buffer sizing (experimental)"),
                       NULL, 0, &tcp_do_autorcvbuf, 0,
                       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "recvbuf_inc",
                       SYSCTL_DESCR("Incrementor step size of "
                           "automatic receive buffer"),
                       NULL, 0, &tcp_autorcvbuf_inc, 0,
                       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "recvbuf_max",
                       SYSCTL_DESCR("Max size of automatic receive buffer"),
                       NULL, 0, &tcp_autorcvbuf_max, 0,
                       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "sendbuf_auto",
                       SYSCTL_DESCR("Enable automatic send "
                           "buffer sizing (experimental)"),
                       NULL, 0, &tcp_do_autosndbuf, 0,
                       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "sendbuf_inc",
                       SYSCTL_DESCR("Incrementor step size of "
                           "automatic send buffer"),
                       NULL, 0, &tcp_autosndbuf_inc, 0,
                       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "sendbuf_max",
                       SYSCTL_DESCR("Max size of automatic send buffer"),
                       NULL, 0, &tcp_autosndbuf_max, 0,
                       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);

        /* ECN subtree */
        sysctl_createv(clog, 0, NULL, &ecn_node,
                           CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "ecn",
                           SYSCTL_DESCR("RFC3168 Explicit Congestion Notification"),
                           NULL, 0, NULL, 0,
                       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &ecn_node, NULL,
                           CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "enable",
                       SYSCTL_DESCR("Enable TCP Explicit Congestion "
                           "Notification"),
                           NULL, 0, &tcp_do_ecn, 0, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &ecn_node, NULL,
                           CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxretries",
                       SYSCTL_DESCR("Number of times to retry ECN setup "
                               "before disabling ECN on the connection"),
                           NULL, 0, &tcp_ecn_maxretries, 0, CTL_CREATE, CTL_EOL);

        /* SACK gets its own little subtree. */
        sysctl_createv(clog, 0, NULL, &sack_node,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "enable",
                       SYSCTL_DESCR("Enable RFC2018 Selective ACKnowledgement"),
                       NULL, 0, &tcp_do_sack, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, &sack_node,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxholes",
                       SYSCTL_DESCR("Maximum number of TCP SACK holes allowed per connection"),
                       NULL, 0, &tcp_sack_tp_maxholes, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, &sack_node,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "globalmaxholes",
                       SYSCTL_DESCR("Global maximum number of TCP SACK holes"),
                       NULL, 0, &tcp_sack_globalmaxholes, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, &sack_node,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "globalholes",
                       SYSCTL_DESCR("Global number of TCP SACK holes"),
                       NULL, 0, &tcp_sack_globalholes, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_SACK, CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "stats",
                       SYSCTL_DESCR("TCP statistics"),
                       sysctl_net_inet_tcp_stats, 0, NULL, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_STATS,
                       CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "local_by_rtt",
                       SYSCTL_DESCR("Use RTT estimator to decide which hosts "
                                    "are local"),
                       NULL, 0, &tcp_rttlocal, 0,
                       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
#ifdef TCP_DEBUG
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "debug",
                       SYSCTL_DESCR("TCP sockets debug information"),
                       NULL, 0, &tcp_debug, sizeof(tcp_debug),
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBUG,
                       CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "debx",
                       SYSCTL_DESCR("Number of TCP debug sockets messages"),
                       NULL, 0, &tcp_debx, sizeof(tcp_debx),
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_DEBX,
                       CTL_EOL);
#endif
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_STRUCT, "drop",
                       SYSCTL_DESCR("TCP drop connection"),
                       sysctl_net_inet_tcp_drop, 0, NULL, 0,
                       CTL_NET, pf, IPPROTO_TCP, TCPCTL_DROP, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "iss_hash",
                       SYSCTL_DESCR("Enable RFC 1948 ISS by cryptographic "
                                    "hash computation"),
                       NULL, 0, &tcp_do_rfc1948, sizeof(tcp_do_rfc1948),
                       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE,
                       CTL_EOL);

        /* ABC subtree */

        sysctl_createv(clog, 0, NULL, &abc_node,
                       CTLFLAG_PERMANENT, CTLTYPE_NODE, "abc",
                       SYSCTL_DESCR("RFC3465 Appropriate Byte Counting (ABC)"),
                       NULL, 0, NULL, 0,
                       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &abc_node, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "enable",
                       SYSCTL_DESCR("Enable RFC3465 Appropriate Byte Counting"),
                       NULL, 0, &tcp_do_abc, 0, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &abc_node, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "aggressive",
                       SYSCTL_DESCR("1: L=2*SMSS 0: L=1*SMSS"),
                       NULL, 0, &tcp_abc_aggressive, 0, CTL_CREATE, CTL_EOL);

        /* MSL tuning subtree */

        sysctl_createv(clog, 0, NULL, &mslt_node,
                       CTLFLAG_PERMANENT, CTLTYPE_NODE, "mslt",
                       SYSCTL_DESCR("MSL Tuning for TIME_WAIT truncation"),
                       NULL, 0, NULL, 0,
                       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &mslt_node, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "enable",
                       SYSCTL_DESCR("Enable TIME_WAIT truncation"),
                       NULL, 0, &tcp_msl_enable, 0, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &mslt_node, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "loopback",
                       SYSCTL_DESCR("MSL value to use for loopback connections"),
                       NULL, 0, &tcp_msl_loop, 0, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &mslt_node, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "local",
                       SYSCTL_DESCR("MSL value to use for local connections"),
                       NULL, 0, &tcp_msl_local, 0, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &mslt_node, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "remote",
                       SYSCTL_DESCR("MSL value to use for remote connections"),
                       NULL, 0, &tcp_msl_remote, 0, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &mslt_node, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "remote_threshold",
                       SYSCTL_DESCR("RTT estimate value to promote local to remote"),
                       NULL, 0, &tcp_msl_remote_threshold, 0, CTL_CREATE, CTL_EOL);

        /* vestigial TIME_WAIT tuning subtree */

        sysctl_createv(clog, 0, NULL, &vtw_node,
                       CTLFLAG_PERMANENT, CTLTYPE_NODE, "vtw",
                       SYSCTL_DESCR("Tuning for Vestigial TIME_WAIT"),
                       NULL, 0, NULL, 0,
                       CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &vtw_node, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "enable",
                       SYSCTL_DESCR("Enable Vestigial TIME_WAIT"),
                       sysctl_tcp_vtw_enable, 0,
                       (pf == AF_INET) ? &tcp4_vtw_enable : &tcp6_vtw_enable,
                       0, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &vtw_node, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_INT, "entries",
                       SYSCTL_DESCR("Maximum number of vestigial TIME_WAIT entries"),
                       NULL, 0, &tcp_vtw_entries, 0, CTL_CREATE, CTL_EOL);
}

void
tcp_usrreq_init(void)
{

        sysctl_net_inet_tcp_setup2(NULL, PF_INET, "inet", "tcp");
#ifdef INET6
        sysctl_net_inet_tcp_setup2(NULL, PF_INET6, "inet6", "tcp6");
#endif
}

PR_WRAP_USRREQS(tcp)
#define        tcp_attach        tcp_attach_wrapper
#define        tcp_detach        tcp_detach_wrapper
#define        tcp_accept        tcp_accept_wrapper
#define        tcp_bind        tcp_bind_wrapper
#define        tcp_listen        tcp_listen_wrapper
#define        tcp_connect        tcp_connect_wrapper
#define        tcp_connect2        tcp_connect2_wrapper
#define        tcp_disconnect        tcp_disconnect_wrapper
#define        tcp_shutdown        tcp_shutdown_wrapper
#define        tcp_abort        tcp_abort_wrapper
#define        tcp_ioctl        tcp_ioctl_wrapper
#define        tcp_stat        tcp_stat_wrapper
#define        tcp_peeraddr        tcp_peeraddr_wrapper
#define        tcp_sockaddr        tcp_sockaddr_wrapper
#define        tcp_rcvd        tcp_rcvd_wrapper
#define        tcp_recvoob        tcp_recvoob_wrapper
#define        tcp_send        tcp_send_wrapper
#define        tcp_sendoob        tcp_sendoob_wrapper
#define        tcp_purgeif        tcp_purgeif_wrapper

const struct pr_usrreqs tcp_usrreqs = {
        .pr_attach        = tcp_attach,
        .pr_detach        = tcp_detach,
        .pr_accept        = tcp_accept,
        .pr_bind        = tcp_bind,
        .pr_listen        = tcp_listen,
        .pr_connect        = tcp_connect,
        .pr_connect2        = tcp_connect2,
        .pr_disconnect        = tcp_disconnect,
        .pr_shutdown        = tcp_shutdown,
        .pr_abort        = tcp_abort,
        .pr_ioctl        = tcp_ioctl,
        .pr_stat        = tcp_stat,
        .pr_peeraddr        = tcp_peeraddr,
        .pr_sockaddr        = tcp_sockaddr,
        .pr_rcvd        = tcp_rcvd,
        .pr_recvoob        = tcp_recvoob,
        .pr_send        = tcp_send,
        .pr_sendoob        = tcp_sendoob,
        .pr_purgeif        = tcp_purgeif,
};



















































































































































































































































    5 































    8 










    4 

















 3178 






























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
/*        $NetBSD: kern_stub.c,v 1.50 2020/08/01 02:04:55 riastradh Exp $        */

/*-
 * Copyright (c) 2007, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)subr_xxx.c        8.3 (Berkeley) 3/29/95
 */

/*
 * Stubs for system calls and facilities not included in the system.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_stub.c,v 1.50 2020/08/01 02:04:55 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_ktrace.h"
#include "opt_sysv.h"
#include "opt_modular.h"
#endif

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/fstypes.h>
#include <sys/signalvar.h>
#include <sys/syscall.h>
#include <sys/ktrace.h>
#include <sys/intr.h>
#include <sys/cpu.h>
#include <sys/module.h>
#include <sys/bus.h>
#include <sys/userconf.h>

bool default_bus_space_is_equal(bus_space_tag_t, bus_space_tag_t);
bool default_bus_space_handle_is_equal(bus_space_tag_t, bus_space_handle_t,
    bus_space_handle_t);

/*
 * SYSV Semaphores, Shared Memory, Message Queues
 */
#ifndef MODULAR
#ifndef SYSVMSG
__strong_alias(msgctl1,enosys);
#endif
#ifndef SYSVSHM
__strong_alias(shmctl1,enosys);
#endif
#ifndef SYSVSEM
 __strong_alias(semctl1,enosys);
#endif
#endif

/*
 * ktrace stubs.  ktruser() goes to enosys as we want to fail the syscall,
 * but not kill the process: utrace() is a debugging feature.
 */
#ifndef KTRACE
__strong_alias(ktr_csw,nullop);                /* Probes */
__strong_alias(ktr_emul,nullop);
__strong_alias(ktr_geniov,nullop);
__strong_alias(ktr_genio,nullop);
__strong_alias(ktr_mibio,nullop);
__strong_alias(ktr_namei,nullop);
__strong_alias(ktr_namei2,nullop);
__strong_alias(ktr_psig,nullop);
__strong_alias(ktr_syscall,nullop);
__strong_alias(ktr_sysret,nullop);
__strong_alias(ktr_kuser,nullop);
__strong_alias(ktr_mib,nullop);
__strong_alias(ktr_execarg,nullop);
__strong_alias(ktr_execenv,nullop);
__strong_alias(ktr_execfd,nullop);

__strong_alias(sys_fktrace,sys_nosys);        /* Syscalls */
__strong_alias(sys_ktrace,sys_nosys);
__strong_alias(sys_utrace,sys_nosys);

int        ktrace_on;                        /* Misc */
__strong_alias(ktruser,enosys);
__strong_alias(ktr_point,nullop);
#endif        /* KTRACE */

__weak_alias(device_register, voidop);
__weak_alias(device_register_post_config, voidop);
__weak_alias(spldebug_start, voidop);
__weak_alias(spldebug_stop, voidop);
__weak_alias(machdep_init,nullop);
__weak_alias(pci_chipset_tag_create, eopnotsupp);
__weak_alias(pci_chipset_tag_destroy, voidop);
__weak_alias(bus_space_reserve, eopnotsupp);
__weak_alias(bus_space_reserve_subregion, eopnotsupp);
__weak_alias(bus_space_release, voidop);
__weak_alias(bus_space_reservation_map, eopnotsupp);
__weak_alias(bus_space_reservation_unmap, voidop);
__weak_alias(bus_dma_tag_create, eopnotsupp);
__weak_alias(bus_dma_tag_destroy, voidop);
__weak_alias(bus_space_tag_create, eopnotsupp);
__weak_alias(bus_space_tag_destroy, voidop);
__strict_weak_alias(bus_space_is_equal, default_bus_space_is_equal);
__strict_weak_alias(bus_space_handle_is_equal,
    default_bus_space_handle_is_equal);
__weak_alias(userconf_bootinfo, voidop);
__weak_alias(userconf_init, voidop);
__weak_alias(userconf_prompt, voidop);

__weak_alias(kobj_renamespace, nullop);

__weak_alias(interrupt_get_count, nullop);
__weak_alias(interrupt_get_assigned, voidop);
__weak_alias(interrupt_get_available, voidop);
__weak_alias(interrupt_get_devname, voidop);
__weak_alias(interrupt_construct_intrids, nullret);
__weak_alias(interrupt_destruct_intrids, voidop);
__weak_alias(interrupt_distribute, eopnotsupp);
__weak_alias(interrupt_distribute_handler, eopnotsupp);

/*
 * Scheduler activations system calls.  These need to remain until libc's
 * major version is bumped.
 */
__strong_alias(sys_sa_register,sys_nosys);
__strong_alias(sys_sa_stacks,sys_nosys);
__strong_alias(sys_sa_enable,sys_nosys);
__strong_alias(sys_sa_setconcurrency,sys_nosys);
__strong_alias(sys_sa_yield,sys_nosys);
__strong_alias(sys_sa_preempt,sys_nosys);
__strong_alias(sys_sa_unblockyield,sys_nosys);

/*
 * Stubs for compat_netbsd32.
 */
__strong_alias(dosa_register,sys_nosys);
__strong_alias(sa_stacks1,sys_nosys);

/*
 * Stubs for drivers.  See sys/conf.h.
 */
__strong_alias(devenodev,enodev);
__strong_alias(deveopnotsupp,eopnotsupp);
__strong_alias(devnullop,nullop);
__strong_alias(ttyenodev,enodev);
__strong_alias(ttyvenodev,voidop);
__strong_alias(ttyvnullop,nullop);

/*
 * Stubs for architectures that do not support kernel preemption.
 */
#ifndef __HAVE_PREEMPTION
bool
cpu_kpreempt_enter(uintptr_t where, int s)
{

        return false;
}

void
cpu_kpreempt_exit(uintptr_t where)
{

}

bool
cpu_kpreempt_disabled(void)
{

        return true;
}
#else
# ifndef MULTIPROCESSOR
#   error __HAVE_PREEMPTION requires MULTIPROCESSOR
# endif
#endif        /* !__HAVE_PREEMPTION */

int
sys_nosys(struct lwp *l, const void *v, register_t *retval)
{

        mutex_enter(&proc_lock);
        psignal(l->l_proc, SIGSYS);
        mutex_exit(&proc_lock);
        return ENOSYS;
}

/*
 * Unsupported device function (e.g. writing to read-only device).
 */
int
enodev(void)
{

        return (ENODEV);
}

/*
 * Unconfigured device function; driver not configured.
 */
int
enxio(void)
{

        return (ENXIO);
}

/*
 * Unsupported ioctl function.
 */
int
enoioctl(void)
{

        return (ENOTTY);
}

/*
 * Unsupported system function.
 * This is used for an otherwise-reasonable operation
 * that is not supported by the current system binary.
 */
int
enosys(void)
{

        return (ENOSYS);
}

/*
 * Return error for operation not supported
 * on a specific object or file type.
 */
int
eopnotsupp(void)
{

        return (EOPNOTSUPP);
}

/*
 * Generic null operation, void return value.
 */
void
voidop(void)
{
}

/*
 * Generic null operation, always returns success.
 */
int
nullop(void *v)
{

        return (0);
}

/*
 * Generic null operation, always returns null.
 */
void *
nullret(void)
{

        return (NULL);
}

bool
default_bus_space_handle_is_equal(bus_space_tag_t t,
    bus_space_handle_t h1, bus_space_handle_t h2)
{

        return memcmp(&h1, &h2, sizeof(h1)) == 0;
}

bool
default_bus_space_is_equal(bus_space_tag_t t1, bus_space_tag_t t2)
{

        return memcmp(&t1, &t2, sizeof(t1)) == 0;
}

/* Stubs for architectures with no kernel FPU access.  */
__weak_alias(kthread_fpu_enter_md, voidop);
__weak_alias(kthread_fpu_exit_md, voidop);







































































































































































































































































































































































    3 
    3 






































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
/*        $NetBSD: if_urtwn.c,v 1.105 2022/07/31 12:59:26 mlelstv Exp $        */
/*        $OpenBSD: if_urtwn.c,v 1.42 2015/02/10 23:25:46 mpi Exp $        */

/*-
 * Copyright (c) 2010 Damien Bergamini <damien.bergamini@free.fr>
 * Copyright (c) 2014 Kevin Lo <kevlo@FreeBSD.org>
 * Copyright (c) 2016 Nathanial Sloss <nathanialsloss@yahoo.com.au>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*-
 * Driver for Realtek RTL8188CE-VAU/RTL8188CUS/RTL8188EU/RTL8188RU/RTL8192CU
 * RTL8192EU.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_urtwn.c,v 1.105 2022/07/31 12:59:26 mlelstv Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/systm.h>
#include <sys/module.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/rndsource.h>

#include <sys/bus.h>
#include <machine/endian.h>
#include <sys/intr.h>

#include <net/bpf.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_ether.h>
#include <net/if_media.h>
#include <net/if_types.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/if_inarp.h>

#include <net80211/ieee80211_netbsd.h>
#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_radiotap.h>

#include <dev/firmload.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usbhist.h>

#include <dev/ic/rtwnreg.h>
#include <dev/ic/rtwn_data.h>
#include <dev/usb/if_urtwnreg.h>
#include <dev/usb/if_urtwnvar.h>

/*
 * The sc_write_mtx locking is to prevent sequences of writes from
 * being intermingled with each other.  I don't know if this is really
 * needed.  I have added it just to be on the safe side.
 */

#ifdef URTWN_DEBUG
#define        DBG_INIT        __BIT(0)
#define        DBG_FN                __BIT(1)
#define        DBG_TX                __BIT(2)
#define        DBG_RX                __BIT(3)
#define        DBG_STM                __BIT(4)
#define        DBG_RF                __BIT(5)
#define        DBG_REG                __BIT(6)
#define        DBG_ALL                0xffffffffU
u_int urtwn_debug = 0;
#define DPRINTFN(n, fmt, a, b, c, d) do {                        \
        if (urtwn_debug & (n)) {                                \
                KERNHIST_LOG(usbhist, fmt, a, b, c, d);                \
        }                                                        \
} while (/*CONSTCOND*/0)
#define URTWNHIST_FUNC() USBHIST_FUNC()
#define URTWNHIST_CALLED() do {                                        \
        if (urtwn_debug & DBG_FN) {                                \
                KERNHIST_CALLED(usbhist);                        \
        }                                                        \
} while(/*CONSTCOND*/0)
#define URTWNHIST_CALLARGS(fmt, a, b, c, d) do {                \
        if (urtwn_debug & DBG_FN) {                                \
                KERNHIST_CALLARGS(usbhist, fmt, a, b, c, d);        \
        }                                                        \
} while(/*CONSTCOND*/0)
#else
#define DPRINTFN(n, fmt, a, b, c, d)
#define URTWNHIST_FUNC()
#define URTWNHIST_CALLED()
#define URTWNHIST_CALLARGS(fmt, a, b, c, d)
#endif

#define URTWN_DEV(v,p)        { { USB_VENDOR_##v, USB_PRODUCT_##v##_##p }, 0 }
#define URTWN_RTL8188E_DEV(v,p) \
        { { USB_VENDOR_##v, USB_PRODUCT_##v##_##p }, FLAG_RTL8188E }
#define URTWN_RTL8192EU_DEV(v,p) \
        { { USB_VENDOR_##v, USB_PRODUCT_##v##_##p }, FLAG_RTL8192E }
static const struct urtwn_dev {
        struct usb_devno        dev;
        uint32_t                flags;
#define        FLAG_RTL8188E        __BIT(0)
#define        FLAG_RTL8192E        __BIT(1)
} urtwn_devs[] = {
        URTWN_DEV(ABOCOM,        RTL8188CU_1),
        URTWN_DEV(ABOCOM,        RTL8188CU_2),
        URTWN_DEV(ABOCOM,        RTL8192CU),
        URTWN_DEV(ASUSTEK,        RTL8192CU),
        URTWN_DEV(ASUSTEK,        RTL8192CU_3),
        URTWN_DEV(ASUSTEK,        USBN10NANO),
        URTWN_DEV(ASUSTEK,        RTL8192CU_3),
        URTWN_DEV(AZUREWAVE,        RTL8188CE_1),
        URTWN_DEV(AZUREWAVE,        RTL8188CE_2),
        URTWN_DEV(AZUREWAVE,        RTL8188CU),
        URTWN_DEV(BELKIN,        F7D2102),
        URTWN_DEV(BELKIN,        RTL8188CU),
        URTWN_DEV(BELKIN,        RTL8188CUS),
        URTWN_DEV(BELKIN,        RTL8192CU),
        URTWN_DEV(BELKIN,        RTL8192CU_1),
        URTWN_DEV(BELKIN,        RTL8192CU_2),
        URTWN_DEV(CHICONY,        RTL8188CUS_1),
        URTWN_DEV(CHICONY,        RTL8188CUS_2),
        URTWN_DEV(CHICONY,        RTL8188CUS_3),
        URTWN_DEV(CHICONY,        RTL8188CUS_4),
        URTWN_DEV(CHICONY,        RTL8188CUS_5),
        URTWN_DEV(CHICONY,        RTL8188CUS_6),
        URTWN_DEV(COMPARE,        RTL8192CU),
        URTWN_DEV(COREGA,        RTL8192CU),
        URTWN_DEV(DLINK,        DWA131B),
        URTWN_DEV(DLINK,        RTL8188CU),
        URTWN_DEV(DLINK,        RTL8192CU_1),
        URTWN_DEV(DLINK,        RTL8192CU_2),
        URTWN_DEV(DLINK,        RTL8192CU_3),
        URTWN_DEV(DLINK,        RTL8192CU_4),
        URTWN_DEV(EDIMAX,        RTL8188CU),
        URTWN_DEV(EDIMAX,        RTL8192CU),
        URTWN_DEV(FEIXUN,        RTL8188CU),
        URTWN_DEV(FEIXUN,        RTL8192CU),
        URTWN_DEV(GUILLEMOT,        HWNUP150),
        URTWN_DEV(GUILLEMOT,        RTL8192CU),
        URTWN_DEV(HAWKING,        RTL8192CU),
        URTWN_DEV(HAWKING,        RTL8192CU_2),
        URTWN_DEV(HP3,                RTL8188CU),
        URTWN_DEV(IODATA,        WNG150UM),
        URTWN_DEV(IODATA,        RTL8192CU),
        URTWN_DEV(NETGEAR,        WNA1000M),
        URTWN_DEV(NETGEAR,        RTL8192CU),
        URTWN_DEV(NETGEAR4,        RTL8188CU),
        URTWN_DEV(NOVATECH,        RTL8188CU),
        URTWN_DEV(PLANEX2,        RTL8188CU_1),
        URTWN_DEV(PLANEX2,        RTL8188CU_2),
        URTWN_DEV(PLANEX2,        RTL8192CU),
        URTWN_DEV(PLANEX2,        RTL8188CU_3),
        URTWN_DEV(PLANEX2,        RTL8188CU_4),
        URTWN_DEV(PLANEX2,        RTL8188CUS),
        URTWN_DEV(REALTEK,        RTL8188CE_0),
        URTWN_DEV(REALTEK,        RTL8188CE_1),
        URTWN_DEV(REALTEK,        RTL8188CTV),
        URTWN_DEV(REALTEK,        RTL8188CU_0),
        URTWN_DEV(REALTEK,        RTL8188CU_1),
        URTWN_DEV(REALTEK,        RTL8188CU_2),
        URTWN_DEV(REALTEK,        RTL8188CU_3),
        URTWN_DEV(REALTEK,        RTL8188CU_COMBO),
        URTWN_DEV(REALTEK,        RTL8188CUS),
        URTWN_DEV(REALTEK,        RTL8188RU),
        URTWN_DEV(REALTEK,        RTL8188RU_2),
        URTWN_DEV(REALTEK,        RTL8188RU_3),
        URTWN_DEV(REALTEK,        RTL8191CU),
        URTWN_DEV(REALTEK,        RTL8192CE),
        URTWN_DEV(REALTEK,        RTL8192CU),
        URTWN_DEV(SITECOMEU,        RTL8188CU),
        URTWN_DEV(SITECOMEU,        RTL8188CU_2),
        URTWN_DEV(SITECOMEU,        RTL8192CU),
        URTWN_DEV(SITECOMEU,        RTL8192CUR2),
        URTWN_DEV(TPLINK,        RTL8192CU),
        URTWN_DEV(TRENDNET,        RTL8188CU),
        URTWN_DEV(TRENDNET,        RTL8192CU),
        URTWN_DEV(TRENDNET,        TEW648UBM),
        URTWN_DEV(ZYXEL,        RTL8192CU),

        /* URTWN_RTL8188E */
        URTWN_RTL8188E_DEV(DLINK, DWA125D1),
        URTWN_RTL8188E_DEV(ELECOM, WDC150SU2M),
        URTWN_RTL8188E_DEV(REALTEK, RTL8188ETV),
        URTWN_RTL8188E_DEV(REALTEK, RTL8188EU),
        URTWN_RTL8188E_DEV(ABOCOM, RTL8188EU),
        URTWN_RTL8188E_DEV(TPLINK, RTL8188EU),
        URTWN_RTL8188E_DEV(DLINK, DWA121B1),
        URTWN_RTL8188E_DEV(EDIMAX, EW7811UNV2),

        /* URTWN_RTL8192EU */
        URTWN_RTL8192EU_DEV(DLINK,        DWA131E),
        URTWN_RTL8192EU_DEV(REALTEK,        RTL8192EU),
        URTWN_RTL8192EU_DEV(TPLINK,        WN821NV5),
        URTWN_RTL8192EU_DEV(TPLINK,        WN822NV4),
        URTWN_RTL8192EU_DEV(TPLINK,        WN823NV2),
};
#undef URTWN_DEV
#undef URTWN_RTL8188E_DEV
#undef URTWN_RTL8192EU_DEV

static int        urtwn_match(device_t, cfdata_t, void *);
static void        urtwn_attach(device_t, device_t, void *);
static int        urtwn_detach(device_t, int);
static int        urtwn_activate(device_t, enum devact);

CFATTACH_DECL_NEW(urtwn, sizeof(struct urtwn_softc), urtwn_match,
    urtwn_attach, urtwn_detach, urtwn_activate);

static int        urtwn_open_pipes(struct urtwn_softc *);
static void        urtwn_close_pipes(struct urtwn_softc *);
static int        urtwn_alloc_rx_list(struct urtwn_softc *);
static void        urtwn_free_rx_list(struct urtwn_softc *);
static int        urtwn_alloc_tx_list(struct urtwn_softc *);
static void        urtwn_free_tx_list(struct urtwn_softc *);
static void        urtwn_task(void *);
static void        urtwn_do_async(struct urtwn_softc *,
                    void (*)(struct urtwn_softc *, void *), void *, int);
static void        urtwn_wait_async(struct urtwn_softc *);
static int        urtwn_write_region_1(struct urtwn_softc *, uint16_t, uint8_t *,
                    int);
static void        urtwn_write_1(struct urtwn_softc *, uint16_t, uint8_t);
static void        urtwn_write_2(struct urtwn_softc *, uint16_t, uint16_t);
static void        urtwn_write_4(struct urtwn_softc *, uint16_t, uint32_t);
static int        urtwn_write_region(struct urtwn_softc *, uint16_t, uint8_t *,
                    int);
static int        urtwn_read_region_1(struct urtwn_softc *, uint16_t, uint8_t *,
                    int);
static uint8_t        urtwn_read_1(struct urtwn_softc *, uint16_t);
static uint16_t        urtwn_read_2(struct urtwn_softc *, uint16_t);
static uint32_t        urtwn_read_4(struct urtwn_softc *, uint16_t);
static int        urtwn_fw_cmd(struct urtwn_softc *, uint8_t, const void *, int);
static void        urtwn_r92c_rf_write(struct urtwn_softc *, int, uint8_t,
                    uint32_t);
static void        urtwn_r88e_rf_write(struct urtwn_softc *, int, uint8_t,
                    uint32_t);
static void        urtwn_r92e_rf_write(struct urtwn_softc *, int, uint8_t,
                    uint32_t);
static uint32_t        urtwn_rf_read(struct urtwn_softc *, int, uint8_t);
static int        urtwn_llt_write(struct urtwn_softc *, uint32_t, uint32_t);
static uint8_t        urtwn_efuse_read_1(struct urtwn_softc *, uint16_t);
static void        urtwn_efuse_read(struct urtwn_softc *);
static void        urtwn_efuse_switch_power(struct urtwn_softc *);
static int        urtwn_read_chipid(struct urtwn_softc *);
#ifdef URTWN_DEBUG
static void        urtwn_dump_rom(struct urtwn_softc *, struct r92c_rom *);
#endif
static void        urtwn_read_rom(struct urtwn_softc *);
static void        urtwn_r88e_read_rom(struct urtwn_softc *);
static int        urtwn_media_change(struct ifnet *);
static int        urtwn_ra_init(struct urtwn_softc *);
static int        urtwn_get_nettype(struct urtwn_softc *);
static void        urtwn_set_nettype0_msr(struct urtwn_softc *, uint8_t);
static void        urtwn_tsf_sync_enable(struct urtwn_softc *);
static void        urtwn_set_led(struct urtwn_softc *, int, int);
static void        urtwn_calib_to(void *);
static void        urtwn_calib_to_cb(struct urtwn_softc *, void *);
static void        urtwn_next_scan(void *);
static int        urtwn_newstate(struct ieee80211com *, enum ieee80211_state,
                    int);
static void        urtwn_newstate_cb(struct urtwn_softc *, void *);
static int        urtwn_wme_update(struct ieee80211com *);
static void        urtwn_wme_update_cb(struct urtwn_softc *, void *);
static void        urtwn_update_avgrssi(struct urtwn_softc *, int, int8_t);
static int8_t        urtwn_get_rssi(struct urtwn_softc *, int, void *);
static int8_t        urtwn_r88e_get_rssi(struct urtwn_softc *, int, void *);
static void        urtwn_rx_frame(struct urtwn_softc *, uint8_t *, int);
static void        urtwn_rxeof(struct usbd_xfer *, void *, usbd_status);
static void        urtwn_txeof(struct usbd_xfer *, void *, usbd_status);
static int        urtwn_tx(struct urtwn_softc *, struct mbuf *,
                    struct ieee80211_node *, struct urtwn_tx_data *);
static struct urtwn_tx_data *
                urtwn_get_tx_data(struct urtwn_softc *, size_t);
static void        urtwn_start(struct ifnet *);
static void        urtwn_watchdog(struct ifnet *);
static int        urtwn_ioctl(struct ifnet *, u_long, void *);
static int        urtwn_r92c_power_on(struct urtwn_softc *);
static int        urtwn_r92e_power_on(struct urtwn_softc *);
static int        urtwn_r88e_power_on(struct urtwn_softc *);
static int        urtwn_llt_init(struct urtwn_softc *);
static void        urtwn_fw_reset(struct urtwn_softc *);
static void        urtwn_r88e_fw_reset(struct urtwn_softc *);
static int        urtwn_fw_loadpage(struct urtwn_softc *, int, uint8_t *, int);
static int        urtwn_load_firmware(struct urtwn_softc *);
static int        urtwn_r92c_dma_init(struct urtwn_softc *);
static int        urtwn_r88e_dma_init(struct urtwn_softc *);
static void        urtwn_mac_init(struct urtwn_softc *);
static void        urtwn_bb_init(struct urtwn_softc *);
static void        urtwn_rf_init(struct urtwn_softc *);
static void        urtwn_cam_init(struct urtwn_softc *);
static void        urtwn_pa_bias_init(struct urtwn_softc *);
static void        urtwn_rxfilter_init(struct urtwn_softc *);
static void        urtwn_edca_init(struct urtwn_softc *);
static void        urtwn_write_txpower(struct urtwn_softc *, int, uint16_t[]);
static void        urtwn_get_txpower(struct urtwn_softc *, size_t, u_int, u_int,
                    uint16_t[]);
static void        urtwn_r88e_get_txpower(struct urtwn_softc *, size_t, u_int,
                    u_int, uint16_t[]);
static void        urtwn_set_txpower(struct urtwn_softc *, u_int, u_int);
static void        urtwn_set_chan(struct urtwn_softc *, struct ieee80211_channel *,
                    u_int);
static void        urtwn_iq_calib(struct urtwn_softc *, bool);
static void        urtwn_lc_calib(struct urtwn_softc *);
static void        urtwn_temp_calib(struct urtwn_softc *);
static int        urtwn_init(struct ifnet *);
static void        urtwn_stop(struct ifnet *, int);
static int        urtwn_reset(struct ifnet *);
static void        urtwn_chip_stop(struct urtwn_softc *);
static void        urtwn_newassoc(struct ieee80211_node *, int);
static void        urtwn_delay_ms(struct urtwn_softc *, int ms);

/* Aliases. */
#define        urtwn_bb_write        urtwn_write_4
#define        urtwn_bb_read        urtwn_read_4

#define        urtwn_lookup(d,v,p)        ((const struct urtwn_dev *)usb_lookup(d,v,p))

static const uint16_t addaReg[] = {
        R92C_FPGA0_XCD_SWITCHCTL, R92C_BLUETOOTH, R92C_RX_WAIT_CCA,
        R92C_TX_CCK_RFON, R92C_TX_CCK_BBON, R92C_TX_OFDM_RFON,
        R92C_TX_OFDM_BBON, R92C_TX_TO_RX, R92C_TX_TO_TX, R92C_RX_CCK,
        R92C_RX_OFDM, R92C_RX_WAIT_RIFS, R92C_RX_TO_RX,
        R92C_STANDBY, R92C_SLEEP, R92C_PMPD_ANAEN
};

static int
urtwn_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return urtwn_lookup(urtwn_devs, uaa->uaa_vendor, uaa->uaa_product) !=
            NULL ?  UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
urtwn_attach(device_t parent, device_t self, void *aux)
{
        struct urtwn_softc *sc = device_private(self);
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        struct usb_attach_arg *uaa = aux;
        char *devinfop;
        const struct urtwn_dev *dev;
        usb_device_request_t req;
        size_t i;
        int error;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        sc->sc_dev = self;
        sc->sc_udev = uaa->uaa_device;

        sc->chip = 0;
        dev = urtwn_lookup(urtwn_devs, uaa->uaa_vendor, uaa->uaa_product);
        if (dev != NULL && ISSET(dev->flags, FLAG_RTL8188E))
                SET(sc->chip, URTWN_CHIP_88E);
        if (dev != NULL && ISSET(dev->flags, FLAG_RTL8192E))
                SET(sc->chip, URTWN_CHIP_92EU);

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(sc->sc_udev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        req.bmRequestType = UT_WRITE_DEVICE;
        req.bRequest = UR_SET_FEATURE;
        USETW(req.wValue, UF_DEVICE_REMOTE_WAKEUP);
        USETW(req.wIndex, UHF_PORT_SUSPEND);
        USETW(req.wLength, 0);

        (void) usbd_do_request(sc->sc_udev, &req, 0);

        cv_init(&sc->sc_task_cv, "urtwntsk");
        mutex_init(&sc->sc_task_mtx, MUTEX_DEFAULT, IPL_NET);
        mutex_init(&sc->sc_tx_mtx, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&sc->sc_rx_mtx, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&sc->sc_fwcmd_mtx, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&sc->sc_write_mtx, MUTEX_DEFAULT, IPL_NONE);

        usb_init_task(&sc->sc_task, urtwn_task, sc, 0);

        callout_init(&sc->sc_scan_to, 0);
        callout_setfunc(&sc->sc_scan_to, urtwn_next_scan, sc);
        callout_init(&sc->sc_calib_to, 0);
        callout_setfunc(&sc->sc_calib_to, urtwn_calib_to, sc);

        rnd_attach_source(&sc->rnd_source, device_xname(sc->sc_dev),
            RND_TYPE_NET, RND_FLAG_DEFAULT);

        error = usbd_set_config_no(sc->sc_udev, 1, 0);
        if (error != 0) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(error));
                goto fail;
        }

        /* Get the first interface handle. */
        error = usbd_device2interface_handle(sc->sc_udev, 0, &sc->sc_iface);
        if (error != 0) {
                aprint_error_dev(self, "could not get interface handle\n");
                goto fail;
        }

        error = urtwn_read_chipid(sc);
        if (error != 0) {
                aprint_error_dev(self, "unsupported test chip\n");
                goto fail;
        }

        /* Determine number of Tx/Rx chains. */
        if (sc->chip & URTWN_CHIP_92C) {
                sc->ntxchains = (sc->chip & URTWN_CHIP_92C_1T2R) ? 1 : 2;
                sc->nrxchains = 2;
        } else if (sc->chip & URTWN_CHIP_92EU) {
                sc->ntxchains = 2;
                sc->nrxchains = 2;
        } else {
                sc->ntxchains = 1;
                sc->nrxchains = 1;
        }

        if (ISSET(sc->chip, URTWN_CHIP_88E) ||
            ISSET(sc->chip, URTWN_CHIP_92EU))
                urtwn_r88e_read_rom(sc);
        else
                urtwn_read_rom(sc);

        aprint_normal_dev(self, "MAC/BB RTL%s, RF 6052 %zdT%zdR, address %s\n",
            (sc->chip & URTWN_CHIP_92EU) ? "8192EU" :
            (sc->chip & URTWN_CHIP_92C) ? "8192CU" :
            (sc->chip & URTWN_CHIP_88E) ? "8188EU" :
            (sc->board_type == R92C_BOARD_TYPE_HIGHPA) ? "8188RU" :
            (sc->board_type == R92C_BOARD_TYPE_MINICARD) ? "8188CE-VAU" :
            "8188CUS", sc->ntxchains, sc->nrxchains,
            ether_sprintf(ic->ic_myaddr));

        error = urtwn_open_pipes(sc);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev, "could not open pipes\n");
                goto fail;
        }
        aprint_normal_dev(self, "%d rx pipe%s, %d tx pipe%s\n",
            sc->rx_npipe, sc->rx_npipe > 1 ? "s" : "",
            sc->tx_npipe, sc->tx_npipe > 1 ? "s" : "");

        /*
         * Setup the 802.11 device.
         */
        ic->ic_ifp = ifp;
        ic->ic_phytype = IEEE80211_T_OFDM;        /* Not only, but not used. */
        ic->ic_opmode = IEEE80211_M_STA;        /* Default to BSS mode. */
        ic->ic_state = IEEE80211_S_INIT;

        /* Set device capabilities. */
        ic->ic_caps =
            IEEE80211_C_MONITOR |        /* Monitor mode supported. */
            IEEE80211_C_IBSS |                /* IBSS mode supported */
            IEEE80211_C_HOSTAP |        /* HostAp mode supported */
            IEEE80211_C_SHPREAMBLE |        /* Short preamble supported. */
            IEEE80211_C_SHSLOT |        /* Short slot time supported. */
            IEEE80211_C_WME |                /* 802.11e */
            IEEE80211_C_WPA;                /* 802.11i */

        /* Set supported .11b and .11g rates. */
        ic->ic_sup_rates[IEEE80211_MODE_11B] = ieee80211_std_rateset_11b;
        ic->ic_sup_rates[IEEE80211_MODE_11G] = ieee80211_std_rateset_11g;

        /* Set supported .11b and .11g channels (1 through 14). */
        for (i = 1; i <= 14; i++) {
                ic->ic_channels[i].ic_freq =
                    ieee80211_ieee2mhz(i, IEEE80211_CHAN_2GHZ);
                ic->ic_channels[i].ic_flags =
                    IEEE80211_CHAN_CCK | IEEE80211_CHAN_OFDM |
                    IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ;
        }

        ifp->if_softc = sc;
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_init = urtwn_init;
        ifp->if_ioctl = urtwn_ioctl;
        ifp->if_start = urtwn_start;
        ifp->if_watchdog = urtwn_watchdog;
        IFQ_SET_READY(&ifp->if_snd);
        memcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);

        if_initialize(ifp);
        ieee80211_ifattach(ic);

        /* override default methods */
        ic->ic_newassoc = urtwn_newassoc;
        ic->ic_reset = urtwn_reset;
        ic->ic_wme.wme_update = urtwn_wme_update;

        /* Override state transition machine. */
        sc->sc_newstate = ic->ic_newstate;
        ic->ic_newstate = urtwn_newstate;

        /* XXX media locking needs revisiting */
        mutex_init(&sc->sc_media_mtx, MUTEX_DEFAULT, IPL_SOFTUSB);
        ieee80211_media_init_with_lock(ic,
            urtwn_media_change, ieee80211_media_status, &sc->sc_media_mtx);

        bpf_attach2(ifp, DLT_IEEE802_11_RADIO,
            sizeof(struct ieee80211_frame) + IEEE80211_RADIOTAP_HDRLEN,
            &sc->sc_drvbpf);

        sc->sc_rxtap_len = sizeof(sc->sc_rxtapu);
        sc->sc_rxtap.wr_ihdr.it_len = htole16(sc->sc_rxtap_len);
        sc->sc_rxtap.wr_ihdr.it_present = htole32(URTWN_RX_RADIOTAP_PRESENT);

        sc->sc_txtap_len = sizeof(sc->sc_txtapu);
        sc->sc_txtap.wt_ihdr.it_len = htole16(sc->sc_txtap_len);
        sc->sc_txtap.wt_ihdr.it_present = htole32(URTWN_TX_RADIOTAP_PRESENT);

        ifp->if_percpuq = if_percpuq_create(ifp);
        if_register(ifp);

        ieee80211_announce(ic);

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        SET(sc->sc_flags, URTWN_FLAG_ATTACHED);
        return;

 fail:
        sc->sc_dying = 1;
        aprint_error_dev(self, "attach failed\n");
}

static int
urtwn_detach(device_t self, int flags)
{
        struct urtwn_softc *sc = device_private(self);
        struct ifnet *ifp = &sc->sc_if;
        int s;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        pmf_device_deregister(self);

        s = splusb();

        sc->sc_dying = 1;

        callout_halt(&sc->sc_scan_to, NULL);
        callout_halt(&sc->sc_calib_to, NULL);

        if (ISSET(sc->sc_flags, URTWN_FLAG_ATTACHED)) {
                urtwn_stop(ifp, 0);
                usb_rem_task_wait(sc->sc_udev, &sc->sc_task, USB_TASKQ_DRIVER,
                    NULL);

                ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
                bpf_detach(ifp);
                ieee80211_ifdetach(&sc->sc_ic);
                if_detach(ifp);

                mutex_destroy(&sc->sc_media_mtx);

                /* Close Tx/Rx pipes.  Abort done by urtwn_stop. */
                urtwn_close_pipes(sc);
        }

        splx(s);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        rnd_detach_source(&sc->rnd_source);

        callout_destroy(&sc->sc_scan_to);
        callout_destroy(&sc->sc_calib_to);

        cv_destroy(&sc->sc_task_cv);
        mutex_destroy(&sc->sc_write_mtx);
        mutex_destroy(&sc->sc_fwcmd_mtx);
        mutex_destroy(&sc->sc_tx_mtx);
        mutex_destroy(&sc->sc_rx_mtx);
        mutex_destroy(&sc->sc_task_mtx);

        return 0;
}

static int
urtwn_activate(device_t self, enum devact act)
{
        struct urtwn_softc *sc = device_private(self);

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        switch (act) {
        case DVACT_DEACTIVATE:
                if_deactivate(sc->sc_ic.ic_ifp);
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

static int
urtwn_open_pipes(struct urtwn_softc *sc)
{
        /* Bulk-out endpoints addresses (from highest to lowest prio). */
        static uint8_t epaddr[R92C_MAX_EPOUT];
        static uint8_t rxepaddr[R92C_MAX_EPIN];
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        size_t i, ntx = 0, nrx = 0;
        int error;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        /* Determine the number of bulk-out pipes. */
        id = usbd_get_interface_descriptor(sc->sc_iface);
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL || UE_GET_XFERTYPE(ed->bmAttributes) != UE_BULK) {
                        continue;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT) {
                        if (ntx < sizeof(epaddr))
                                epaddr[ntx] = ed->bEndpointAddress;
                        ntx++;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN) {
                        if (nrx < sizeof(rxepaddr))
                                rxepaddr[nrx] = ed->bEndpointAddress;
                        nrx++;
                }
        }
        if (nrx == 0 || nrx > R92C_MAX_EPIN) {
                aprint_error_dev(sc->sc_dev,
                    "%zd: invalid number of Rx bulk pipes\n", nrx);
                return EIO;
        }
        if (ntx == 0 || ntx > R92C_MAX_EPOUT) {
                aprint_error_dev(sc->sc_dev,
                    "%zd: invalid number of Tx bulk pipes\n", ntx);
                return EIO;
        }
        DPRINTFN(DBG_INIT, "found %jd/%jd bulk-in/out pipes",
            nrx, ntx, 0, 0);
        sc->rx_npipe = nrx;
        sc->tx_npipe = ntx;

        /* Open bulk-in pipe at address 0x81. */
        for (i = 0; i < nrx; i++) {
                error = usbd_open_pipe(sc->sc_iface, rxepaddr[i],
                    USBD_EXCLUSIVE_USE, &sc->rx_pipe[i]);
                if (error != 0) {
                        aprint_error_dev(sc->sc_dev,
                            "could not open Rx bulk pipe 0x%02x: %d\n",
                            rxepaddr[i], error);
                        goto fail;
                }
        }

        /* Open bulk-out pipes (up to 3). */
        for (i = 0; i < ntx; i++) {
                error = usbd_open_pipe(sc->sc_iface, epaddr[i],
                    USBD_EXCLUSIVE_USE, &sc->tx_pipe[i]);
                if (error != 0) {
                        aprint_error_dev(sc->sc_dev,
                            "could not open Tx bulk pipe 0x%02x: %d\n",
                            epaddr[i], error);
                        goto fail;
                }
        }

        /* Map 802.11 access categories to USB pipes. */
        sc->ac2idx[WME_AC_BK] =
        sc->ac2idx[WME_AC_BE] = (ntx == 3) ? 2 : ((ntx == 2) ? 1 : 0);
        sc->ac2idx[WME_AC_VI] = (ntx == 3) ? 1 : 0;
        sc->ac2idx[WME_AC_VO] = 0;        /* Always use highest prio. */

 fail:
        if (error != 0)
                urtwn_close_pipes(sc);
        return error;
}

static void
urtwn_close_pipes(struct urtwn_softc *sc)
{
        struct usbd_pipe *pipe;
        size_t i;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        /* Close Rx pipes. */
        CTASSERT(sizeof(pipe) == sizeof(void *));
        for (i = 0; i < sc->rx_npipe; i++) {
                pipe = atomic_swap_ptr(&sc->rx_pipe[i], NULL);
                if (pipe != NULL) {
                        usbd_close_pipe(pipe);
                }
        }

        /* Close Tx pipes. */
        for (i = 0; i < sc->tx_npipe; i++) {
                pipe = atomic_swap_ptr(&sc->tx_pipe[i], NULL);
                if (pipe != NULL) {
                        usbd_close_pipe(pipe);
                }
        }
}

static int __noinline
urtwn_alloc_rx_list(struct urtwn_softc *sc)
{
        struct urtwn_rx_data *data;
        size_t i;
        int error = 0;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        for (size_t j = 0; j < sc->rx_npipe; j++) {
                TAILQ_INIT(&sc->rx_free_list[j]);
                for (i = 0; i < URTWN_RX_LIST_COUNT; i++) {
                        data = &sc->rx_data[j][i];

                        data->sc = sc;        /* Backpointer for callbacks. */

                        error = usbd_create_xfer(sc->rx_pipe[j], URTWN_RXBUFSZ,
                            0, 0, &data->xfer);
                        if (error) {
                                aprint_error_dev(sc->sc_dev,
                                    "could not allocate xfer\n");
                                break;
                        }

                        data->buf = usbd_get_buffer(data->xfer);
                        TAILQ_INSERT_TAIL(&sc->rx_free_list[j], data, next);
                }
        }
        if (error != 0)
                urtwn_free_rx_list(sc);
        return error;
}

static void
urtwn_free_rx_list(struct urtwn_softc *sc)
{
        struct usbd_xfer *xfer;
        size_t i;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        /* NB: Caller must abort pipe first. */
        for (size_t j = 0; j < sc->rx_npipe; j++) {
                for (i = 0; i < URTWN_RX_LIST_COUNT; i++) {
                        CTASSERT(sizeof(xfer) == sizeof(void *));
                        xfer = atomic_swap_ptr(&sc->rx_data[j][i].xfer, NULL);
                        if (xfer != NULL)
                                usbd_destroy_xfer(xfer);
                }
        }
}

static int __noinline
urtwn_alloc_tx_list(struct urtwn_softc *sc)
{
        struct urtwn_tx_data *data;
        size_t i;
        int error = 0;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        mutex_enter(&sc->sc_tx_mtx);
        for (size_t j = 0; j < sc->tx_npipe; j++) {
                TAILQ_INIT(&sc->tx_free_list[j]);
                for (i = 0; i < URTWN_TX_LIST_COUNT; i++) {
                        data = &sc->tx_data[j][i];

                        data->sc = sc;        /* Backpointer for callbacks. */
                        data->pidx = j;

                        error = usbd_create_xfer(sc->tx_pipe[j],
                            URTWN_TXBUFSZ, USBD_FORCE_SHORT_XFER, 0,
                            &data->xfer);
                        if (error) {
                                aprint_error_dev(sc->sc_dev,
                                    "could not allocate xfer\n");
                                goto fail;
                        }

                        data->buf = usbd_get_buffer(data->xfer);

                        /* Append this Tx buffer to our free list. */
                        TAILQ_INSERT_TAIL(&sc->tx_free_list[j], data, next);
                }
        }
        mutex_exit(&sc->sc_tx_mtx);
        return 0;

 fail:
        urtwn_free_tx_list(sc);
        mutex_exit(&sc->sc_tx_mtx);
        return error;
}

static void
urtwn_free_tx_list(struct urtwn_softc *sc)
{
        struct usbd_xfer *xfer;
        size_t i;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        /* NB: Caller must abort pipe first. */
        for (size_t j = 0; j < sc->tx_npipe; j++) {
                for (i = 0; i < URTWN_TX_LIST_COUNT; i++) {
                        CTASSERT(sizeof(xfer) == sizeof(void *));
                        xfer = atomic_swap_ptr(&sc->tx_data[j][i].xfer, NULL);
                        if (xfer != NULL)
                                usbd_destroy_xfer(xfer);
                }
        }
}

static int
urtwn_tx_beacon(struct urtwn_softc *sc, struct mbuf *m,
    struct ieee80211_node *ni)
{
        struct urtwn_tx_data *data =
            urtwn_get_tx_data(sc, sc->ac2idx[WME_AC_VO]);

        if (data == NULL)
                return ENOBUFS;

        return urtwn_tx(sc, m, ni, data);
}

static void
urtwn_task(void *arg)
{
        struct urtwn_softc *sc = arg;
        struct ieee80211com *ic = &sc->sc_ic;
        struct urtwn_host_cmd_ring *ring = &sc->cmdq;
        struct urtwn_host_cmd *cmd;
        int s;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();
        if (ic->ic_state == IEEE80211_S_RUN &&
            (ic->ic_opmode == IEEE80211_M_HOSTAP ||
            ic->ic_opmode == IEEE80211_M_IBSS)) {

                struct mbuf *m = ieee80211_beacon_alloc(ic, ic->ic_bss,
                    &sc->sc_bo);
                if (m == NULL) {
                        aprint_error_dev(sc->sc_dev,
                            "could not allocate beacon");
                }

                if (urtwn_tx_beacon(sc, m, ic->ic_bss) != 0) {
                        aprint_error_dev(sc->sc_dev, "could not send beacon\n");
                }

                /* beacon is no longer needed */
                m_freem(m);
        }

        /* Process host commands. */
        s = splusb();
        mutex_spin_enter(&sc->sc_task_mtx);
        while (ring->next != ring->cur) {
                cmd = &ring->cmd[ring->next];
                mutex_spin_exit(&sc->sc_task_mtx);
                splx(s);
                /* Invoke callback with kernel lock held. */
                cmd->cb(sc, cmd->data);
                s = splusb();
                mutex_spin_enter(&sc->sc_task_mtx);
                ring->queued--;
                ring->next = (ring->next + 1) % URTWN_HOST_CMD_RING_COUNT;
        }
        cv_broadcast(&sc->sc_task_cv);
        mutex_spin_exit(&sc->sc_task_mtx);
        splx(s);
}

static void
urtwn_do_async(struct urtwn_softc *sc, void (*cb)(struct urtwn_softc *, void *),
    void *arg, int len)
{
        struct urtwn_host_cmd_ring *ring = &sc->cmdq;
        struct urtwn_host_cmd *cmd;
        int s;

        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("cb=%#jx, arg=%#jx, len=%jd",
            (uintptr_t)cb, (uintptr_t)arg, len, 0);

        s = splusb();
        mutex_spin_enter(&sc->sc_task_mtx);
        cmd = &ring->cmd[ring->cur];
        cmd->cb = cb;
        KASSERT(len <= sizeof(cmd->data));
        memcpy(cmd->data, arg, len);
        ring->cur = (ring->cur + 1) % URTWN_HOST_CMD_RING_COUNT;

        /* If there is no pending command already, schedule a task. */
        if (!sc->sc_dying && ++ring->queued == 1) {
                mutex_spin_exit(&sc->sc_task_mtx);
                usb_add_task(sc->sc_udev, &sc->sc_task, USB_TASKQ_DRIVER);
        } else
                mutex_spin_exit(&sc->sc_task_mtx);
        splx(s);
}

static void
urtwn_wait_async(struct urtwn_softc *sc)
{

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        /* Wait for all queued asynchronous commands to complete. */
        mutex_spin_enter(&sc->sc_task_mtx);
        while (sc->cmdq.queued > 0)
                cv_wait(&sc->sc_task_cv, &sc->sc_task_mtx);
        mutex_spin_exit(&sc->sc_task_mtx);
}

static int
urtwn_write_region_1(struct urtwn_softc *sc, uint16_t addr, uint8_t *buf,
    int len)
{
        usb_device_request_t req;
        usbd_status error;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();
        KASSERT(mutex_owned(&sc->sc_write_mtx));

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = R92C_REQ_REGS;
        USETW(req.wValue, addr);
        USETW(req.wIndex, 0);
        USETW(req.wLength, len);
        error = usbd_do_request(sc->sc_udev, &req, buf);
        if (error != USBD_NORMAL_COMPLETION) {
                DPRINTFN(DBG_REG, "error=%jd: addr=%#jx, len=%jd",
                    error, addr, len, 0);
        }
        return error;
}

static void
urtwn_write_1(struct urtwn_softc *sc, uint16_t addr, uint8_t val)
{

        URTWNHIST_FUNC(); URTWNHIST_CALLED();
        DPRINTFN(DBG_REG, "addr=%#jx, val=%#jx", addr, val, 0, 0);

        urtwn_write_region_1(sc, addr, &val, 1);
}

static void
urtwn_write_2(struct urtwn_softc *sc, uint16_t addr, uint16_t val)
{
        uint8_t buf[2];

        URTWNHIST_FUNC(); URTWNHIST_CALLED();
        DPRINTFN(DBG_REG, "addr=%#jx, val=%#jx", addr, val, 0, 0);

        buf[0] = (uint8_t)val;
        buf[1] = (uint8_t)(val >> 8);
        urtwn_write_region_1(sc, addr, buf, 2);
}

static void
urtwn_write_4(struct urtwn_softc *sc, uint16_t addr, uint32_t val)
{
        uint8_t buf[4];

        URTWNHIST_FUNC(); URTWNHIST_CALLED();
        DPRINTFN(DBG_REG, "addr=%#jx, val=%#jx", addr, val, 0, 0);

        buf[0] = (uint8_t)val;
        buf[1] = (uint8_t)(val >> 8);
        buf[2] = (uint8_t)(val >> 16);
        buf[3] = (uint8_t)(val >> 24);
        urtwn_write_region_1(sc, addr, buf, 4);
}

static int
urtwn_write_region(struct urtwn_softc *sc, uint16_t addr, uint8_t *buf, int len)
{

        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("addr=%#jx, len=%#jx", addr, len, 0, 0);

        return urtwn_write_region_1(sc, addr, buf, len);
}

static int
urtwn_read_region_1(struct urtwn_softc *sc, uint16_t addr, uint8_t *buf,
    int len)
{
        usb_device_request_t req;
        usbd_status error;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = R92C_REQ_REGS;
        USETW(req.wValue, addr);
        USETW(req.wIndex, 0);
        USETW(req.wLength, len);
        error = usbd_do_request(sc->sc_udev, &req, buf);
        if (error != USBD_NORMAL_COMPLETION) {
                DPRINTFN(DBG_REG, "error=%jd: addr=%#jx, len=%jd",
                    error, addr, len, 0);
        }
        return error;
}

static uint8_t
urtwn_read_1(struct urtwn_softc *sc, uint16_t addr)
{
        uint8_t val;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        if (urtwn_read_region_1(sc, addr, &val, 1) != USBD_NORMAL_COMPLETION)
                return 0xff;

        DPRINTFN(DBG_REG, "addr=%#jx, val=%#jx", addr, val, 0, 0);
        return val;
}

static uint16_t
urtwn_read_2(struct urtwn_softc *sc, uint16_t addr)
{
        uint8_t buf[2];
        uint16_t val;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        if (urtwn_read_region_1(sc, addr, buf, 2) != USBD_NORMAL_COMPLETION)
                return 0xffff;

        val = LE_READ_2(&buf[0]);
        DPRINTFN(DBG_REG, "addr=%#jx, val=%#jx", addr, val, 0, 0);
        return val;
}

static uint32_t
urtwn_read_4(struct urtwn_softc *sc, uint16_t addr)
{
        uint8_t buf[4];
        uint32_t val;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        if (urtwn_read_region_1(sc, addr, buf, 4) != USBD_NORMAL_COMPLETION)
                return 0xffffffff;

        val = LE_READ_4(&buf[0]);
        DPRINTFN(DBG_REG, "addr=%#jx, val=%#jx", addr, val, 0, 0);
        return val;
}

static int
urtwn_fw_cmd(struct urtwn_softc *sc, uint8_t id, const void *buf, int len)
{
        struct r92c_fw_cmd cmd;
        uint8_t *cp;
        int fwcur;
        int ntries;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();
        DPRINTFN(DBG_REG, "id=%jd, buf=%#jx, len=%jd", id, (uintptr_t)buf, len, 0);

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        mutex_enter(&sc->sc_fwcmd_mtx);
        fwcur = sc->fwcur;
        sc->fwcur = (sc->fwcur + 1) % R92C_H2C_NBOX;

        /* Wait for current FW box to be empty. */
        for (ntries = 0; ntries < 100; ntries++) {
                if (!(urtwn_read_1(sc, R92C_HMETFR) & (1 << fwcur)))
                        break;
                urtwn_delay_ms(sc, 2);
        }
        if (ntries == 100) {
                aprint_error_dev(sc->sc_dev,
                    "could not send firmware command %d\n", id);
                mutex_exit(&sc->sc_fwcmd_mtx);
                return ETIMEDOUT;
        }

        memset(&cmd, 0, sizeof(cmd));
        KASSERT(len <= sizeof(cmd.msg));
        memcpy(cmd.msg, buf, len);

        /* Write the first word last since that will trigger the FW. */
        cp = (uint8_t *)&cmd;
        cmd.id = id;
        if (len >= 4) {
                if (!ISSET(sc->chip, URTWN_CHIP_92EU)) {
                        cmd.id |= R92C_CMD_FLAG_EXT;
                        urtwn_write_region(sc, R92C_HMEBOX_EXT(fwcur),
                            &cp[1], 2);
                        urtwn_write_4(sc, R92C_HMEBOX(fwcur),
                            cp[0] + (cp[3] << 8) + (cp[4] << 16) +
                            ((uint32_t)cp[5] << 24));
                } else {
                        urtwn_write_region(sc, R92E_HMEBOX_EXT(fwcur),
                            &cp[4], 2);
                        urtwn_write_4(sc, R92C_HMEBOX(fwcur),
                            cp[0] + (cp[1] << 8) + (cp[2] << 16) +
                            ((uint32_t)cp[3] << 24));
                }
        } else {
                urtwn_write_region(sc, R92C_HMEBOX(fwcur), cp, len);
        }
        mutex_exit(&sc->sc_fwcmd_mtx);

        return 0;
}

static __inline void
urtwn_rf_write(struct urtwn_softc *sc, int chain, uint8_t addr, uint32_t val)
{

        sc->sc_rf_write(sc, chain, addr, val);
}

static void
urtwn_r92c_rf_write(struct urtwn_softc *sc, int chain, uint8_t addr,
    uint32_t val)
{

        urtwn_bb_write(sc, R92C_LSSI_PARAM(chain),
            SM(R92C_LSSI_PARAM_ADDR, addr) | SM(R92C_LSSI_PARAM_DATA, val));
}

static void
urtwn_r88e_rf_write(struct urtwn_softc *sc, int chain, uint8_t addr,
    uint32_t val)
{

        urtwn_bb_write(sc, R92C_LSSI_PARAM(chain),
            SM(R88E_LSSI_PARAM_ADDR, addr) | SM(R92C_LSSI_PARAM_DATA, val));
}

static void
urtwn_r92e_rf_write(struct urtwn_softc *sc, int chain, uint8_t addr,
    uint32_t val)
{

        urtwn_bb_write(sc, R92C_LSSI_PARAM(chain),
            SM(R88E_LSSI_PARAM_ADDR, addr) | SM(R92C_LSSI_PARAM_DATA, val));
}

static uint32_t
urtwn_rf_read(struct urtwn_softc *sc, int chain, uint8_t addr)
{
        uint32_t reg[R92C_MAX_CHAINS], val;

        reg[0] = urtwn_bb_read(sc, R92C_HSSI_PARAM2(0));
        if (chain != 0) {
                reg[chain] = urtwn_bb_read(sc, R92C_HSSI_PARAM2(chain));
        }

        urtwn_bb_write(sc, R92C_HSSI_PARAM2(0),
            reg[0] & ~R92C_HSSI_PARAM2_READ_EDGE);
        urtwn_delay_ms(sc, 1);

        urtwn_bb_write(sc, R92C_HSSI_PARAM2(chain),
            RW(reg[chain], R92C_HSSI_PARAM2_READ_ADDR, addr) |
            R92C_HSSI_PARAM2_READ_EDGE);
        urtwn_delay_ms(sc, 1);

        urtwn_bb_write(sc, R92C_HSSI_PARAM2(0),
            reg[0] | R92C_HSSI_PARAM2_READ_EDGE);
        urtwn_delay_ms(sc, 1);

        if (urtwn_bb_read(sc, R92C_HSSI_PARAM1(chain)) & R92C_HSSI_PARAM1_PI) {
                val = urtwn_bb_read(sc, R92C_HSPI_READBACK(chain));
        } else {
                val = urtwn_bb_read(sc, R92C_LSSI_READBACK(chain));
        }
        return MS(val, R92C_LSSI_READBACK_DATA);
}

static int
urtwn_llt_write(struct urtwn_softc *sc, uint32_t addr, uint32_t data)
{
        int ntries;

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        urtwn_write_4(sc, R92C_LLT_INIT,
            SM(R92C_LLT_INIT_OP, R92C_LLT_INIT_OP_WRITE) |
            SM(R92C_LLT_INIT_ADDR, addr) |
            SM(R92C_LLT_INIT_DATA, data));
        /* Wait for write operation to complete. */
        for (ntries = 0; ntries < 20; ntries++) {
                if (MS(urtwn_read_4(sc, R92C_LLT_INIT), R92C_LLT_INIT_OP) ==
                    R92C_LLT_INIT_OP_NO_ACTIVE) {
                        /* Done */
                        return 0;
                }
                DELAY(5);
        }
        return ETIMEDOUT;
}

static uint8_t
urtwn_efuse_read_1(struct urtwn_softc *sc, uint16_t addr)
{
        uint32_t reg;
        int ntries;

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        reg = urtwn_read_4(sc, R92C_EFUSE_CTRL);
        reg = RW(reg, R92C_EFUSE_CTRL_ADDR, addr);
        reg &= ~R92C_EFUSE_CTRL_VALID;
        urtwn_write_4(sc, R92C_EFUSE_CTRL, reg);

        /* Wait for read operation to complete. */
        for (ntries = 0; ntries < 100; ntries++) {
                reg = urtwn_read_4(sc, R92C_EFUSE_CTRL);
                if (reg & R92C_EFUSE_CTRL_VALID) {
                        /* Done */
                        return MS(reg, R92C_EFUSE_CTRL_DATA);
                }
                DELAY(5);
        }
        aprint_error_dev(sc->sc_dev,
            "could not read efuse byte at address 0x%04x\n", addr);
        return 0xff;
}

static void
urtwn_efuse_read(struct urtwn_softc *sc)
{
        uint8_t *rom = (uint8_t *)&sc->rom;
        uint32_t reg;
        uint16_t addr = 0;
        uint8_t off, msk;
        size_t i;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        urtwn_efuse_switch_power(sc);

        memset(&sc->rom, 0xff, sizeof(sc->rom));
        while (addr < 512) {
                reg = urtwn_efuse_read_1(sc, addr);
                if (reg == 0xff)
                        break;
                addr++;
                off = reg >> 4;
                msk = reg & 0xf;
                for (i = 0; i < 4; i++) {
                        if (msk & (1U << i))
                                continue;

                        rom[off * 8 + i * 2 + 0] = urtwn_efuse_read_1(sc, addr);
                        addr++;
                        rom[off * 8 + i * 2 + 1] = urtwn_efuse_read_1(sc, addr);
                        addr++;
                }
        }
#ifdef URTWN_DEBUG
        /* Dump ROM content. */
        for (i = 0; i < (int)sizeof(sc->rom); i++)
                DPRINTFN(DBG_INIT, "%04jx: %02jx", i, rom[i], 0, 0);
#endif
}

static void
urtwn_efuse_switch_power(struct urtwn_softc *sc)
{
        uint32_t reg;

        reg = urtwn_read_2(sc, R92C_SYS_ISO_CTRL);
        if (!(reg & R92C_SYS_ISO_CTRL_PWC_EV12V)) {
                urtwn_write_2(sc, R92C_SYS_ISO_CTRL,
                    reg | R92C_SYS_ISO_CTRL_PWC_EV12V);
        }
        reg = urtwn_read_2(sc, R92C_SYS_FUNC_EN);
        if (!(reg & R92C_SYS_FUNC_EN_ELDR)) {
                urtwn_write_2(sc, R92C_SYS_FUNC_EN,
                    reg | R92C_SYS_FUNC_EN_ELDR);
        }
        reg = urtwn_read_2(sc, R92C_SYS_CLKR);
        if ((reg & (R92C_SYS_CLKR_LOADER_EN | R92C_SYS_CLKR_ANA8M)) !=
            (R92C_SYS_CLKR_LOADER_EN | R92C_SYS_CLKR_ANA8M)) {
                urtwn_write_2(sc, R92C_SYS_CLKR,
                    reg | R92C_SYS_CLKR_LOADER_EN | R92C_SYS_CLKR_ANA8M);
        }
}

static int
urtwn_read_chipid(struct urtwn_softc *sc)
{
        uint32_t reg;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        if (ISSET(sc->chip, URTWN_CHIP_88E) ||
            ISSET(sc->chip, URTWN_CHIP_92EU))
                return 0;

        reg = urtwn_read_4(sc, R92C_SYS_CFG);
        if (reg & R92C_SYS_CFG_TRP_VAUX_EN) {
                /* test chip, not supported */
                return EIO;
        }
        if (reg & R92C_SYS_CFG_TYPE_92C) {
                sc->chip |= URTWN_CHIP_92C;
                /* Check if it is a castrated 8192C. */
                if (MS(urtwn_read_4(sc, R92C_HPON_FSM),
                    R92C_HPON_FSM_CHIP_BONDING_ID) ==
                    R92C_HPON_FSM_CHIP_BONDING_ID_92C_1T2R) {
                        sc->chip |= URTWN_CHIP_92C_1T2R;
                }
        }
        if (reg & R92C_SYS_CFG_VENDOR_UMC) {
                sc->chip |= URTWN_CHIP_UMC;
                if (MS(reg, R92C_SYS_CFG_CHIP_VER_RTL) == 0) {
                        sc->chip |= URTWN_CHIP_UMC_A_CUT;
                }
        }
        return 0;
}

#ifdef URTWN_DEBUG
static void
urtwn_dump_rom(struct urtwn_softc *sc, struct r92c_rom *rp)
{

        aprint_normal_dev(sc->sc_dev,
            "id 0x%04x, dbg_sel %#x, vid %#x, pid %#x\n",
            rp->id, rp->dbg_sel, rp->vid, rp->pid);

        aprint_normal_dev(sc->sc_dev,
            "usb_opt %#x, ep_setting %#x, usb_phy %#x\n",
            rp->usb_opt, rp->ep_setting, rp->usb_phy);

        aprint_normal_dev(sc->sc_dev,
            "macaddr %s\n",
            ether_sprintf(rp->macaddr));

        aprint_normal_dev(sc->sc_dev,
            "string %s, subcustomer_id %#x\n",
            rp->string, rp->subcustomer_id);

        aprint_normal_dev(sc->sc_dev,
            "cck_tx_pwr c0: %d %d %d, c1: %d %d %d\n",
            rp->cck_tx_pwr[0][0], rp->cck_tx_pwr[0][1], rp->cck_tx_pwr[0][2],
            rp->cck_tx_pwr[1][0], rp->cck_tx_pwr[1][1], rp->cck_tx_pwr[1][2]);

        aprint_normal_dev(sc->sc_dev,
            "ht40_1s_tx_pwr c0 %d %d %d, c1 %d %d %d\n",
            rp->ht40_1s_tx_pwr[0][0], rp->ht40_1s_tx_pwr[0][1],
            rp->ht40_1s_tx_pwr[0][2],
            rp->ht40_1s_tx_pwr[1][0], rp->ht40_1s_tx_pwr[1][1],
            rp->ht40_1s_tx_pwr[1][2]);

        aprint_normal_dev(sc->sc_dev,
            "ht40_2s_tx_pwr_diff c0: %d %d %d, c1: %d %d %d\n",
            rp->ht40_2s_tx_pwr_diff[0] & 0xf, rp->ht40_2s_tx_pwr_diff[1] & 0xf,
            rp->ht40_2s_tx_pwr_diff[2] & 0xf,
            rp->ht40_2s_tx_pwr_diff[0] >> 4, rp->ht40_2s_tx_pwr_diff[1] & 0xf,
            rp->ht40_2s_tx_pwr_diff[2] >> 4);

        aprint_normal_dev(sc->sc_dev,
            "ht20_tx_pwr_diff c0: %d %d %d, c1: %d %d %d\n",
            rp->ht20_tx_pwr_diff[0] & 0xf, rp->ht20_tx_pwr_diff[1] & 0xf,
            rp->ht20_tx_pwr_diff[2] & 0xf,
            rp->ht20_tx_pwr_diff[0] >> 4, rp->ht20_tx_pwr_diff[1] >> 4,
            rp->ht20_tx_pwr_diff[2] >> 4);

        aprint_normal_dev(sc->sc_dev,
            "ofdm_tx_pwr_diff c0: %d %d %d, c1: %d %d %d\n",
            rp->ofdm_tx_pwr_diff[0] & 0xf, rp->ofdm_tx_pwr_diff[1] & 0xf,
            rp->ofdm_tx_pwr_diff[2] & 0xf,
            rp->ofdm_tx_pwr_diff[0] >> 4, rp->ofdm_tx_pwr_diff[1] >> 4,
            rp->ofdm_tx_pwr_diff[2] >> 4);

        aprint_normal_dev(sc->sc_dev,
            "ht40_max_pwr_offset c0: %d %d %d, c1: %d %d %d\n",
            rp->ht40_max_pwr[0] & 0xf, rp->ht40_max_pwr[1] & 0xf,
            rp->ht40_max_pwr[2] & 0xf,
            rp->ht40_max_pwr[0] >> 4, rp->ht40_max_pwr[1] >> 4,
            rp->ht40_max_pwr[2] >> 4);

        aprint_normal_dev(sc->sc_dev,
            "ht20_max_pwr_offset c0: %d %d %d, c1: %d %d %d\n",
            rp->ht20_max_pwr[0] & 0xf, rp->ht20_max_pwr[1] & 0xf,
            rp->ht20_max_pwr[2] & 0xf,
            rp->ht20_max_pwr[0] >> 4, rp->ht20_max_pwr[1] >> 4,
            rp->ht20_max_pwr[2] >> 4);

        aprint_normal_dev(sc->sc_dev,
            "xtal_calib %d, tssi %d %d, thermal %d\n",
            rp->xtal_calib, rp->tssi[0], rp->tssi[1], rp->thermal_meter);

        aprint_normal_dev(sc->sc_dev,
            "rf_opt1 %#x, rf_opt2 %#x, rf_opt3 %#x, rf_opt4 %#x\n",
            rp->rf_opt1, rp->rf_opt2, rp->rf_opt3, rp->rf_opt4);

        aprint_normal_dev(sc->sc_dev,
            "channnel_plan %d, version %d customer_id %#x\n",
            rp->channel_plan, rp->version, rp->curstomer_id);
}
#endif

static void
urtwn_read_rom(struct urtwn_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct r92c_rom *rom = &sc->rom;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        mutex_enter(&sc->sc_write_mtx);

        /* Read full ROM image. */
        urtwn_efuse_read(sc);
#ifdef URTWN_DEBUG
        if (urtwn_debug & DBG_REG)
                urtwn_dump_rom(sc, rom);
#endif

        /* XXX Weird but this is what the vendor driver does. */
        sc->pa_setting = urtwn_efuse_read_1(sc, 0x1fa);
        sc->board_type = MS(rom->rf_opt1, R92C_ROM_RF1_BOARD_TYPE);
        sc->regulatory = MS(rom->rf_opt1, R92C_ROM_RF1_REGULATORY);

        DPRINTFN(DBG_INIT,
            "PA setting=%#jx, board=%#jx, regulatory=%jd",
            sc->pa_setting, sc->board_type, sc->regulatory, 0);

        IEEE80211_ADDR_COPY(ic->ic_myaddr, rom->macaddr);

        sc->sc_rf_write = urtwn_r92c_rf_write;
        sc->sc_power_on = urtwn_r92c_power_on;
        sc->sc_dma_init = urtwn_r92c_dma_init;

        mutex_exit(&sc->sc_write_mtx);
}

static void
urtwn_r88e_read_rom(struct urtwn_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        uint8_t *rom = sc->r88e_rom;
        uint32_t reg;
        uint16_t addr = 0;
        uint8_t off, msk, tmp;
        int i;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        mutex_enter(&sc->sc_write_mtx);

        off = 0;
        urtwn_efuse_switch_power(sc);

        /* Read full ROM image. */
        memset(&sc->r88e_rom, 0xff, sizeof(sc->r88e_rom));
        while (addr < 4096) {
                reg = urtwn_efuse_read_1(sc, addr);
                if (reg == 0xff)
                        break;
                addr++;
                if ((reg & 0x1f) == 0x0f) {
                        tmp = (reg & 0xe0) >> 5;
                        reg = urtwn_efuse_read_1(sc, addr);
                        if ((reg & 0x0f) != 0x0f)
                                off = ((reg & 0xf0) >> 1) | tmp;
                        addr++;
                } else
                        off = reg >> 4;
                msk = reg & 0xf;
                for (i = 0; i < 4; i++) {
                        if (msk & (1 << i))
                                continue;
                        rom[off * 8 + i * 2 + 0] = urtwn_efuse_read_1(sc, addr);
                        addr++;
                        rom[off * 8 + i * 2 + 1] = urtwn_efuse_read_1(sc, addr);
                        addr++;
                }
        }
#ifdef URTWN_DEBUG
        if (urtwn_debug & DBG_REG) {
        }
#endif

        addr = 0x10;
        for (i = 0; i < 6; i++)
                sc->cck_tx_pwr[i] = sc->r88e_rom[addr++];
        for (i = 0; i < 5; i++)
                sc->ht40_tx_pwr[i] = sc->r88e_rom[addr++];
        sc->bw20_tx_pwr_diff = (sc->r88e_rom[addr] & 0xf0) >> 4;
        if (sc->bw20_tx_pwr_diff & 0x08)
                sc->bw20_tx_pwr_diff |= 0xf0;
        sc->ofdm_tx_pwr_diff = (sc->r88e_rom[addr] & 0xf);
        if (sc->ofdm_tx_pwr_diff & 0x08)
                sc->ofdm_tx_pwr_diff |= 0xf0;
        sc->regulatory = MS(sc->r88e_rom[0xc1], R92C_ROM_RF1_REGULATORY);

        IEEE80211_ADDR_COPY(ic->ic_myaddr, &sc->r88e_rom[0xd7]);

        if (ISSET(sc->chip, URTWN_CHIP_92EU)) {
                sc->sc_power_on = urtwn_r92e_power_on;
                sc->sc_rf_write = urtwn_r92e_rf_write;
        } else {
                sc->sc_power_on = urtwn_r88e_power_on;
                sc->sc_rf_write = urtwn_r88e_rf_write;
        }
        sc->sc_dma_init = urtwn_r88e_dma_init;

        mutex_exit(&sc->sc_write_mtx);
}

static int
urtwn_media_change(struct ifnet *ifp)
{
        int error;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        if ((error = ieee80211_media_change(ifp)) != ENETRESET)
                return error;

        if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) ==
            (IFF_UP | IFF_RUNNING)) {
                urtwn_init(ifp);
        }
        return 0;
}

/*
 * Initialize rate adaptation in firmware.
 */
static int __noinline
urtwn_ra_init(struct urtwn_softc *sc)
{
        static const uint8_t map[] = {
                2, 4, 11, 22, 12, 18, 24, 36, 48, 72, 96, 108
        };
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_node *ni = ic->ic_bss;
        struct ieee80211_rateset *rs = &ni->ni_rates;
        struct r92c_fw_cmd_macid_cfg cmd;
        uint32_t rates, basicrates;
        uint32_t rrsr_mask, rrsr_rate;
        uint8_t mode;
        size_t maxrate, maxbasicrate, i, j;
        int error;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        /* Get normal and basic rates mask. */
        rates = basicrates = 1;
        maxrate = maxbasicrate = 0;
        for (i = 0; i < rs->rs_nrates; i++) {
                /* Convert 802.11 rate to HW rate index. */
                for (j = 0; j < __arraycount(map); j++) {
                        if ((rs->rs_rates[i] & IEEE80211_RATE_VAL) == map[j]) {
                                break;
                        }
                }
                if (j == __arraycount(map)) {
                        /* Unknown rate, skip. */
                        continue;
                }

                rates |= 1U << j;
                if (j > maxrate) {
                        maxrate = j;
                }

                if (rs->rs_rates[i] & IEEE80211_RATE_BASIC) {
                        basicrates |= 1U << j;
                        if (j > maxbasicrate) {
                                maxbasicrate = j;
                        }
                }
        }
        if (ic->ic_curmode == IEEE80211_MODE_11B) {
                mode = R92C_RAID_11B;
        } else {
                mode = R92C_RAID_11BG;
        }
        DPRINTFN(DBG_INIT, "mode=%#jx", mode, 0, 0, 0);
        DPRINTFN(DBG_INIT, "rates=%#jx, basicrates=%#jx, "
            "maxrate=%jx, maxbasicrate=%jx",
            rates, basicrates, maxrate, maxbasicrate);

        if (ni->ni_capinfo & IEEE80211_CAPINFO_SHORT_PREAMBLE) {
                maxbasicrate |= R92C_RATE_SHORTGI;
                maxrate |= R92C_RATE_SHORTGI;
        }

        /* Set rates mask for group addressed frames. */
        cmd.macid = RTWN_MACID_BC | RTWN_MACID_VALID;
        if (ni->ni_capinfo & IEEE80211_CAPINFO_SHORT_PREAMBLE)
                cmd.macid |= RTWN_MACID_SHORTGI;
        cmd.mask = htole32((mode << 28) | basicrates);
        error = urtwn_fw_cmd(sc, R92C_CMD_MACID_CONFIG, &cmd, sizeof(cmd));
        if (error != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not add broadcast station\n");
                return error;
        }
        /* Set initial MRR rate. */
        DPRINTFN(DBG_INIT, "maxbasicrate=%jd", maxbasicrate, 0, 0, 0);
        urtwn_write_1(sc, R92C_INIDATA_RATE_SEL(RTWN_MACID_BC), maxbasicrate);

        /* Set rates mask for unicast frames. */
        cmd.macid = RTWN_MACID_BSS | RTWN_MACID_VALID;
        if (ni->ni_capinfo & IEEE80211_CAPINFO_SHORT_PREAMBLE)
                cmd.macid |= RTWN_MACID_SHORTGI;
        cmd.mask = htole32((mode << 28) | rates);
        error = urtwn_fw_cmd(sc, R92C_CMD_MACID_CONFIG, &cmd, sizeof(cmd));
        if (error != 0) {
                aprint_error_dev(sc->sc_dev, "could not add BSS station\n");
                return error;
        }
        /* Set initial MRR rate. */
        DPRINTFN(DBG_INIT, "maxrate=%jd", maxrate, 0, 0, 0);
        urtwn_write_1(sc, R92C_INIDATA_RATE_SEL(RTWN_MACID_BSS), maxrate);

        rrsr_rate = ic->ic_fixed_rate;
        if (rrsr_rate == -1)
                rrsr_rate = 11;

        rrsr_mask = 0xffff >> (15 - rrsr_rate);
        urtwn_write_2(sc, R92C_RRSR, rrsr_mask);

        /* Indicate highest supported rate. */
        ni->ni_txrate = rs->rs_nrates - 1;

        return 0;
}

static int
urtwn_get_nettype(struct urtwn_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        int type;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        switch (ic->ic_opmode) {
        case IEEE80211_M_STA:
                type = R92C_CR_NETTYPE_INFRA;
                break;

        case IEEE80211_M_IBSS:
                type = R92C_CR_NETTYPE_ADHOC;
                break;

        default:
                type = R92C_CR_NETTYPE_NOLINK;
                break;
        }

        return type;
}

static void
urtwn_set_nettype0_msr(struct urtwn_softc *sc, uint8_t type)
{
        uint8_t        reg;

        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("type=%jd", type, 0, 0, 0);

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        reg = urtwn_read_1(sc, R92C_CR + 2) & 0x0c;
        urtwn_write_1(sc, R92C_CR + 2, reg | type);
}

static void
urtwn_tsf_sync_enable(struct urtwn_softc *sc)
{
        struct ieee80211_node *ni = sc->sc_ic.ic_bss;
        uint64_t tsf;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        /* Enable TSF synchronization. */
        urtwn_write_1(sc, R92C_BCN_CTRL,
            urtwn_read_1(sc, R92C_BCN_CTRL) & ~R92C_BCN_CTRL_DIS_TSF_UDT0);

        /* Correct TSF */
        urtwn_write_1(sc, R92C_BCN_CTRL,
            urtwn_read_1(sc, R92C_BCN_CTRL) & ~R92C_BCN_CTRL_EN_BCN);

        /* Set initial TSF. */
        tsf = ni->ni_tstamp.tsf;
        tsf = le64toh(tsf);
        tsf = tsf - (tsf % (ni->ni_intval * IEEE80211_DUR_TU));
        tsf -= IEEE80211_DUR_TU;
        urtwn_write_4(sc, R92C_TSFTR + 0, (uint32_t)tsf);
        urtwn_write_4(sc, R92C_TSFTR + 4, (uint32_t)(tsf >> 32));

        urtwn_write_1(sc, R92C_BCN_CTRL,
            urtwn_read_1(sc, R92C_BCN_CTRL) | R92C_BCN_CTRL_EN_BCN);
}

static void
urtwn_set_led(struct urtwn_softc *sc, int led, int on)
{
        uint8_t reg;

        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("led=%jd, on=%jd", led, on, 0, 0);

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        if (led == URTWN_LED_LINK) {
                if (ISSET(sc->chip, URTWN_CHIP_92EU)) {
                        urtwn_write_1(sc, 0x64, urtwn_read_1(sc, 0x64) & 0xfe);
                        reg = urtwn_read_1(sc, R92C_LEDCFG1) & R92E_LEDSON;
                        urtwn_write_1(sc, R92C_LEDCFG1, reg |
                            (R92C_LEDCFG0_DIS << 1));
                        if (on) {
                                reg = urtwn_read_1(sc, R92C_LEDCFG1) &
                                    R92E_LEDSON;
                                urtwn_write_1(sc, R92C_LEDCFG1, reg);
                        }
                } else if (ISSET(sc->chip, URTWN_CHIP_88E)) {
                        reg = urtwn_read_1(sc, R92C_LEDCFG2) & 0xf0;
                        urtwn_write_1(sc, R92C_LEDCFG2, reg | 0x60);
                        if (!on) {
                                reg = urtwn_read_1(sc, R92C_LEDCFG2) & 0x90;
                                urtwn_write_1(sc, R92C_LEDCFG2,
                                    reg | R92C_LEDCFG0_DIS);
                                reg = urtwn_read_1(sc, R92C_MAC_PINMUX_CFG);
                                urtwn_write_1(sc, R92C_MAC_PINMUX_CFG,
                                    reg & 0xfe);
                        }
                } else {
                        reg = urtwn_read_1(sc, R92C_LEDCFG0) & 0x70;
                        if (!on) {
                                reg |= R92C_LEDCFG0_DIS;
                        }
                        urtwn_write_1(sc, R92C_LEDCFG0, reg);
                }
                sc->ledlink = on;        /* Save LED state. */
        }
}

static void
urtwn_calib_to(void *arg)
{
        struct urtwn_softc *sc = arg;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        if (sc->sc_dying)
                return;

        /* Do it in a process context. */
        urtwn_do_async(sc, urtwn_calib_to_cb, NULL, 0);
}

/* ARGSUSED */
static void
urtwn_calib_to_cb(struct urtwn_softc *sc, void *arg)
{
        struct r92c_fw_cmd_rssi cmd;
        struct r92e_fw_cmd_rssi cmde;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        if (sc->sc_ic.ic_state != IEEE80211_S_RUN)
                goto restart_timer;

        mutex_enter(&sc->sc_write_mtx);
        if (sc->avg_pwdb != -1) {
                /* Indicate Rx signal strength to FW for rate adaptation. */
                memset(&cmd, 0, sizeof(cmd));
                memset(&cmde, 0, sizeof(cmde));
                cmd.macid = 0;        /* BSS. */
                cmde.macid = 0;        /* BSS. */
                cmd.pwdb = sc->avg_pwdb;
                cmde.pwdb = sc->avg_pwdb;
                DPRINTFN(DBG_RF, "sending RSSI command avg=%jd",
                    sc->avg_pwdb, 0, 0, 0);
                if (!ISSET(sc->chip, URTWN_CHIP_92EU)) {
                        urtwn_fw_cmd(sc, R92C_CMD_RSSI_SETTING, &cmd,
                            sizeof(cmd));
                } else {
                        urtwn_fw_cmd(sc, R92E_CMD_RSSI_REPORT, &cmde,
                            sizeof(cmde));
                }
        }

        /* Do temperature compensation. */
        urtwn_temp_calib(sc);
        mutex_exit(&sc->sc_write_mtx);

 restart_timer:
        if (!sc->sc_dying) {
                /* Restart calibration timer. */
                callout_schedule(&sc->sc_calib_to, hz);
        }
}

static void
urtwn_next_scan(void *arg)
{
        struct urtwn_softc *sc = arg;
        int s;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        if (sc->sc_dying)
                return;

        s = splnet();
        if (sc->sc_ic.ic_state == IEEE80211_S_SCAN)
                ieee80211_next_scan(&sc->sc_ic);
        splx(s);
}

static void
urtwn_newassoc(struct ieee80211_node *ni, int isnew)
{
        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("new node %06jx%06jx",
            ni->ni_macaddr[0] << 2 |
            ni->ni_macaddr[1] << 1 |
            ni->ni_macaddr[2],
            ni->ni_macaddr[3] << 2 |
            ni->ni_macaddr[4] << 1 |
            ni->ni_macaddr[5],
            0, 0);
        /* start with lowest Tx rate */
        ni->ni_txrate = 0;
}

static int
urtwn_newstate(struct ieee80211com *ic, enum ieee80211_state nstate, int arg)
{
        struct urtwn_softc *sc = ic->ic_ifp->if_softc;
        struct urtwn_cmd_newstate cmd;

        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("nstate=%jd, arg=%jd", nstate, arg, 0, 0);

        callout_stop(&sc->sc_scan_to);
        callout_stop(&sc->sc_calib_to);

        /* Do it in a process context. */
        cmd.state = nstate;
        cmd.arg = arg;
        urtwn_do_async(sc, urtwn_newstate_cb, &cmd, sizeof(cmd));
        return 0;
}

static void
urtwn_newstate_cb(struct urtwn_softc *sc, void *arg)
{
        struct urtwn_cmd_newstate *cmd = arg;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_node *ni;
        enum ieee80211_state ostate = ic->ic_state;
        enum ieee80211_state nstate = cmd->state;
        uint32_t reg;
        uint8_t sifs_time, msr;
        int s;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();
        DPRINTFN(DBG_STM, "%jd->%jd", ostate, nstate, 0, 0);

        s = splnet();
        mutex_enter(&sc->sc_write_mtx);

        callout_stop(&sc->sc_scan_to);
        callout_stop(&sc->sc_calib_to);

        switch (ostate) {
        case IEEE80211_S_INIT:
                break;

        case IEEE80211_S_SCAN:
                if (nstate != IEEE80211_S_SCAN) {
                        /*
                         * End of scanning
                         */
                        /* flush 4-AC Queue after site_survey */
                        urtwn_write_1(sc, R92C_TXPAUSE, 0x0);

                        /* Allow Rx from our BSSID only. */
                        urtwn_write_4(sc, R92C_RCR,
                            urtwn_read_4(sc, R92C_RCR) |
                              R92C_RCR_CBSSID_DATA | R92C_RCR_CBSSID_BCN);
                }
                break;

        case IEEE80211_S_AUTH:
        case IEEE80211_S_ASSOC:
                break;

        case IEEE80211_S_RUN:
                /* Turn link LED off. */
                urtwn_set_led(sc, URTWN_LED_LINK, 0);

                /* Set media status to 'No Link'. */
                urtwn_set_nettype0_msr(sc, R92C_CR_NETTYPE_NOLINK);

                /* Stop Rx of data frames. */
                urtwn_write_2(sc, R92C_RXFLTMAP2, 0);

                /* Reset TSF. */
                urtwn_write_1(sc, R92C_DUAL_TSF_RST, 0x03);

                /* Disable TSF synchronization. */
                urtwn_write_1(sc, R92C_BCN_CTRL,
                    urtwn_read_1(sc, R92C_BCN_CTRL) |
                      R92C_BCN_CTRL_DIS_TSF_UDT0);

                /* Back to 20MHz mode */
                urtwn_set_chan(sc, ic->ic_curchan,
                    IEEE80211_HTINFO_2NDCHAN_NONE);

                if (ic->ic_opmode == IEEE80211_M_IBSS ||
                    ic->ic_opmode == IEEE80211_M_HOSTAP) {
                        /* Stop BCN */
                        urtwn_write_1(sc, R92C_BCN_CTRL,
                            urtwn_read_1(sc, R92C_BCN_CTRL) &
                            ~(R92C_BCN_CTRL_EN_BCN | R92C_BCN_CTRL_TXBCN_RPT));
                }

                /* Reset EDCA parameters. */
                urtwn_write_4(sc, R92C_EDCA_VO_PARAM, 0x002f3217);
                urtwn_write_4(sc, R92C_EDCA_VI_PARAM, 0x005e4317);
                urtwn_write_4(sc, R92C_EDCA_BE_PARAM, 0x00105320);
                urtwn_write_4(sc, R92C_EDCA_BK_PARAM, 0x0000a444);

                /* flush all cam entries */
                urtwn_cam_init(sc);
                break;
        }

        switch (nstate) {
        case IEEE80211_S_INIT:
                /* Turn link LED off. */
                urtwn_set_led(sc, URTWN_LED_LINK, 0);
                break;

        case IEEE80211_S_SCAN:
                if (ostate != IEEE80211_S_SCAN) {
                        /*
                         * Begin of scanning
                         */

                        /* Set gain for scanning. */
                        reg = urtwn_bb_read(sc, R92C_OFDM0_AGCCORE1(0));
                        reg = RW(reg, R92C_OFDM0_AGCCORE1_GAIN, 0x20);
                        urtwn_bb_write(sc, R92C_OFDM0_AGCCORE1(0), reg);

                        if (!ISSET(sc->chip, URTWN_CHIP_88E)) {
                                reg = urtwn_bb_read(sc, R92C_OFDM0_AGCCORE1(1));
                                reg = RW(reg, R92C_OFDM0_AGCCORE1_GAIN, 0x20);
                                urtwn_bb_write(sc, R92C_OFDM0_AGCCORE1(1), reg);
                        }

                        /* Set media status to 'No Link'. */
                        urtwn_set_nettype0_msr(sc, R92C_CR_NETTYPE_NOLINK);

                        /* Allow Rx from any BSSID. */
                        urtwn_write_4(sc, R92C_RCR,
                            urtwn_read_4(sc, R92C_RCR) &
                            ~(R92C_RCR_CBSSID_DATA | R92C_RCR_CBSSID_BCN));

                        /* Stop Rx of data frames. */
                        urtwn_write_2(sc, R92C_RXFLTMAP2, 0);

                        /* Disable update TSF */
                        urtwn_write_1(sc, R92C_BCN_CTRL,
                            urtwn_read_1(sc, R92C_BCN_CTRL) |
                              R92C_BCN_CTRL_DIS_TSF_UDT0);
                }

                /* Make link LED blink during scan. */
                urtwn_set_led(sc, URTWN_LED_LINK, !sc->ledlink);

                /* Pause AC Tx queues. */
                urtwn_write_1(sc, R92C_TXPAUSE,
                    urtwn_read_1(sc, R92C_TXPAUSE) | 0x0f);

                urtwn_set_chan(sc, ic->ic_curchan,
                    IEEE80211_HTINFO_2NDCHAN_NONE);

                /* Start periodic scan. */
                if (!sc->sc_dying)
                        callout_schedule(&sc->sc_scan_to, hz / 5);
                break;

        case IEEE80211_S_AUTH:
                /* Set initial gain under link. */
                reg = urtwn_bb_read(sc, R92C_OFDM0_AGCCORE1(0));
                reg = RW(reg, R92C_OFDM0_AGCCORE1_GAIN, 0x32);
                urtwn_bb_write(sc, R92C_OFDM0_AGCCORE1(0), reg);

                if (!ISSET(sc->chip, URTWN_CHIP_88E)) {
                        reg = urtwn_bb_read(sc, R92C_OFDM0_AGCCORE1(1));
                        reg = RW(reg, R92C_OFDM0_AGCCORE1_GAIN, 0x32);
                        urtwn_bb_write(sc, R92C_OFDM0_AGCCORE1(1), reg);
                }

                /* Set media status to 'No Link'. */
                urtwn_set_nettype0_msr(sc, R92C_CR_NETTYPE_NOLINK);

                /* Allow Rx from any BSSID. */
                urtwn_write_4(sc, R92C_RCR,
                    urtwn_read_4(sc, R92C_RCR) &
                      ~(R92C_RCR_CBSSID_DATA | R92C_RCR_CBSSID_BCN));

                urtwn_set_chan(sc, ic->ic_curchan,
                    IEEE80211_HTINFO_2NDCHAN_NONE);
                break;

        case IEEE80211_S_ASSOC:
                break;

        case IEEE80211_S_RUN:
                ni = ic->ic_bss;

                /* XXX: Set 20MHz mode */
                urtwn_set_chan(sc, ic->ic_curchan,
                    IEEE80211_HTINFO_2NDCHAN_NONE);

                if (ic->ic_opmode == IEEE80211_M_MONITOR) {
                        /* Back to 20MHz mode */
                        urtwn_set_chan(sc, ic->ic_curchan,
                            IEEE80211_HTINFO_2NDCHAN_NONE);

                        /* Set media status to 'No Link'. */
                        urtwn_set_nettype0_msr(sc, R92C_CR_NETTYPE_NOLINK);

                        /* Enable Rx of data frames. */
                        urtwn_write_2(sc, R92C_RXFLTMAP2, 0xffff);

                        /* Allow Rx from any BSSID. */
                        urtwn_write_4(sc, R92C_RCR,
                            urtwn_read_4(sc, R92C_RCR) &
                            ~(R92C_RCR_CBSSID_DATA | R92C_RCR_CBSSID_BCN));

                        /* Accept Rx data/control/management frames */
                        urtwn_write_4(sc, R92C_RCR,
                            urtwn_read_4(sc, R92C_RCR) |
                            R92C_RCR_ADF | R92C_RCR_ACF | R92C_RCR_AMF);

                        /* Turn link LED on. */
                        urtwn_set_led(sc, URTWN_LED_LINK, 1);
                        break;
                }

                /* Set media status to 'Associated'. */
                urtwn_set_nettype0_msr(sc, urtwn_get_nettype(sc));

                /* Set BSSID. */
                urtwn_write_4(sc, R92C_BSSID + 0, LE_READ_4(&ni->ni_bssid[0]));
                urtwn_write_4(sc, R92C_BSSID + 4, LE_READ_2(&ni->ni_bssid[4]));

                if (ic->ic_curmode == IEEE80211_MODE_11B) {
                        urtwn_write_1(sc, R92C_INIRTS_RATE_SEL, 0);
                } else {
                        /* 802.11b/g */
                        urtwn_write_1(sc, R92C_INIRTS_RATE_SEL, 3);
                }

                /* Enable Rx of data frames. */
                urtwn_write_2(sc, R92C_RXFLTMAP2, 0xffff);

                /* Set beacon interval. */
                urtwn_write_2(sc, R92C_BCN_INTERVAL, ni->ni_intval);

                msr = urtwn_read_1(sc, R92C_MSR);
                msr &= R92C_MSR_MASK;
                switch (ic->ic_opmode) {
                case IEEE80211_M_STA:
                        /* Allow Rx from our BSSID only. */
                        urtwn_write_4(sc, R92C_RCR,
                            urtwn_read_4(sc, R92C_RCR) |
                              R92C_RCR_CBSSID_DATA | R92C_RCR_CBSSID_BCN);

                        /* Enable TSF synchronization. */
                        urtwn_tsf_sync_enable(sc);

                        msr |= R92C_MSR_INFRA;
                        break;
                case IEEE80211_M_HOSTAP:
                        urtwn_write_2(sc, R92C_BCNTCFG, 0x000f);

                        /* Allow Rx from any BSSID. */
                        urtwn_write_4(sc, R92C_RCR,
                            urtwn_read_4(sc, R92C_RCR) &
                            ~(R92C_RCR_CBSSID_DATA | R92C_RCR_CBSSID_BCN));

                        /* Reset TSF timer to zero. */
                        reg = urtwn_read_4(sc, R92C_TCR);
                        reg &= ~0x01;
                        urtwn_write_4(sc, R92C_TCR, reg);
                        reg |= 0x01;
                        urtwn_write_4(sc, R92C_TCR, reg);

                        msr |= R92C_MSR_AP;
                        break;
                default:
                        msr |= R92C_MSR_ADHOC;
                        break;
                }
                urtwn_write_1(sc, R92C_MSR, msr);

                sifs_time = 10;
                urtwn_write_1(sc, R92C_SIFS_CCK + 1, sifs_time);
                urtwn_write_1(sc, R92C_SIFS_OFDM + 1, sifs_time);
                urtwn_write_1(sc, R92C_SPEC_SIFS + 1, sifs_time);
                urtwn_write_1(sc, R92C_MAC_SPEC_SIFS + 1, sifs_time);
                urtwn_write_1(sc, R92C_R2T_SIFS + 1, sifs_time);
                urtwn_write_1(sc, R92C_T2T_SIFS + 1, sifs_time);

                /* Initialize rate adaptation. */
                if (ISSET(sc->chip, URTWN_CHIP_88E) ||
                    ISSET(sc->chip, URTWN_CHIP_92EU))
                        ni->ni_txrate = ni->ni_rates.rs_nrates - 1;
                else
                        urtwn_ra_init(sc);

                /* Turn link LED on. */
                urtwn_set_led(sc, URTWN_LED_LINK, 1);

                /* Reset average RSSI. */
                sc->avg_pwdb = -1;

                /* Reset temperature calibration state machine. */
                sc->thcal_state = 0;
                sc->thcal_lctemp = 0;

                /* Start periodic calibration. */
                if (!sc->sc_dying)
                        callout_schedule(&sc->sc_calib_to, hz);
                break;
        }

        (*sc->sc_newstate)(ic, nstate, cmd->arg);

        mutex_exit(&sc->sc_write_mtx);
        splx(s);
}

static int
urtwn_wme_update(struct ieee80211com *ic)
{
        struct urtwn_softc *sc = ic->ic_ifp->if_softc;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        /* don't override default WME values if WME is not actually enabled */
        if (!(ic->ic_flags & IEEE80211_F_WME))
                return 0;

        /* Do it in a process context. */
        urtwn_do_async(sc, urtwn_wme_update_cb, NULL, 0);
        return 0;
}

static void
urtwn_wme_update_cb(struct urtwn_softc *sc, void *arg)
{
        static const uint16_t ac2reg[WME_NUM_AC] = {
                R92C_EDCA_BE_PARAM,
                R92C_EDCA_BK_PARAM,
                R92C_EDCA_VI_PARAM,
                R92C_EDCA_VO_PARAM
        };
        struct ieee80211com *ic = &sc->sc_ic;
        const struct wmeParams *wmep;
        int ac, aifs, slottime;
        int s;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();
        DPRINTFN(DBG_STM, "called", 0, 0, 0, 0);

        s = splnet();
        mutex_enter(&sc->sc_write_mtx);
        slottime = (ic->ic_flags & IEEE80211_F_SHSLOT) ? 9 : 20;
        for (ac = 0; ac < WME_NUM_AC; ac++) {
                wmep = &ic->ic_wme.wme_chanParams.cap_wmeParams[ac];
                /* AIFS[AC] = AIFSN[AC] * aSlotTime + aSIFSTime. */
                aifs = wmep->wmep_aifsn * slottime + 10;
                urtwn_write_4(sc, ac2reg[ac],
                    SM(R92C_EDCA_PARAM_TXOP, wmep->wmep_txopLimit) |
                    SM(R92C_EDCA_PARAM_ECWMIN, wmep->wmep_logcwmin) |
                    SM(R92C_EDCA_PARAM_ECWMAX, wmep->wmep_logcwmax) |
                    SM(R92C_EDCA_PARAM_AIFS, aifs));
        }
        mutex_exit(&sc->sc_write_mtx);
        splx(s);
}

static void
urtwn_update_avgrssi(struct urtwn_softc *sc, int rate, int8_t rssi)
{
        int pwdb;

        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("rate=%jd, rsst=%jd", rate, rssi, 0, 0);

        /* Convert antenna signal to percentage. */
        if (rssi <= -100 || rssi >= 20)
                pwdb = 0;
        else if (rssi >= 0)
                pwdb = 100;
        else
                pwdb = 100 + rssi;
        if (!ISSET(sc->chip, URTWN_CHIP_88E)) {
                if (rate <= 3) {
                        /* CCK gain is smaller than OFDM/MCS gain. */
                        pwdb += 6;
                        if (pwdb > 100)
                                pwdb = 100;
                        if (pwdb <= 14)
                                pwdb -= 4;
                        else if (pwdb <= 26)
                                pwdb -= 8;
                        else if (pwdb <= 34)
                                pwdb -= 6;
                        else if (pwdb <= 42)
                                pwdb -= 2;
                }
        }
        if (sc->avg_pwdb == -1)        /* Init. */
                sc->avg_pwdb = pwdb;
        else if (sc->avg_pwdb < pwdb)
                sc->avg_pwdb = ((sc->avg_pwdb * 19 + pwdb) / 20) + 1;
        else
                sc->avg_pwdb = ((sc->avg_pwdb * 19 + pwdb) / 20);

        DPRINTFN(DBG_RF, "rate=%jd rssi=%jd PWDB=%jd EMA=%jd",
            rate, rssi, pwdb, sc->avg_pwdb);
}

static int8_t
urtwn_get_rssi(struct urtwn_softc *sc, int rate, void *physt)
{
        static const int8_t cckoff[] = { 16, -12, -26, -46 };
        struct r92c_rx_phystat *phy;
        struct r92c_rx_cck *cck;
        uint8_t rpt;
        int8_t rssi;

        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("rate=%jd", rate, 0, 0, 0);

        if (rate <= 3) {
                cck = (struct r92c_rx_cck *)physt;
                if (ISSET(sc->sc_flags, URTWN_FLAG_CCK_HIPWR)) {
                        rpt = (cck->agc_rpt >> 5) & 0x3;
                        rssi = (cck->agc_rpt & 0x1f) << 1;
                } else {
                        rpt = (cck->agc_rpt >> 6) & 0x3;
                        rssi = cck->agc_rpt & 0x3e;
                }
                rssi = cckoff[rpt] - rssi;
        } else {        /* OFDM/HT. */
                phy = (struct r92c_rx_phystat *)physt;
                rssi = ((le32toh(phy->phydw1) >> 1) & 0x7f) - 110;
        }
        return rssi;
}

static int8_t
urtwn_r88e_get_rssi(struct urtwn_softc *sc, int rate, void *physt)
{
        struct r92c_rx_phystat *phy;
        struct r88e_rx_cck *cck;
        uint8_t cck_agc_rpt, lna_idx, vga_idx;
        int8_t rssi;

        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("rate=%jd", rate, 0, 0, 0);

        rssi = 0;
        if (rate <= 3) {
                cck = (struct r88e_rx_cck *)physt;
                cck_agc_rpt = cck->agc_rpt;
                lna_idx = (cck_agc_rpt & 0xe0) >> 5;
                vga_idx = cck_agc_rpt & 0x1f;
                switch (lna_idx) {
                case 7:
                        if (vga_idx <= 27)
                                rssi = -100 + 2* (27 - vga_idx);
                        else
                                rssi = -100;
                        break;
                case 6:
                        rssi = -48 + 2 * (2 - vga_idx);
                        break;
                case 5:
                        rssi = -42 + 2 * (7 - vga_idx);
                        break;
                case 4:
                        rssi = -36 + 2 * (7 - vga_idx);
                        break;
                case 3:
                        rssi = -24 + 2 * (7 - vga_idx);
                        break;
                case 2:
                        rssi = -12 + 2 * (5 - vga_idx);
                        break;
                case 1:
                        rssi = 8 - (2 * vga_idx);
                        break;
                case 0:
                        rssi = 14 - (2 * vga_idx);
                        break;
                }
                rssi += 6;
        } else {        /* OFDM/HT. */
                phy = (struct r92c_rx_phystat *)physt;
                rssi = ((le32toh(phy->phydw1) >> 1) & 0x7f) - 110;
        }
        return rssi;
}

static void
urtwn_rx_frame(struct urtwn_softc *sc, uint8_t *buf, int pktlen)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = ic->ic_ifp;
        struct ieee80211_frame *wh;
        struct ieee80211_node *ni;
        struct r92c_rx_desc_usb *stat;
        uint32_t rxdw0, rxdw3;
        struct mbuf *m;
        uint8_t rate;
        int8_t rssi = 0;
        int s, infosz;

        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("buf=%jp, pktlen=%#jd", (uintptr_t)buf, pktlen, 0, 0);

        stat = (struct r92c_rx_desc_usb *)buf;
        rxdw0 = le32toh(stat->rxdw0);
        rxdw3 = le32toh(stat->rxdw3);

        if (__predict_false(rxdw0 & (R92C_RXDW0_CRCERR | R92C_RXDW0_ICVERR))) {
                /*
                 * This should not happen since we setup our Rx filter
                 * to not receive these frames.
                 */
                DPRINTFN(DBG_RX, "CRC error", 0, 0, 0, 0);
                if_statinc(ifp, if_ierrors);
                return;
        }
        /*
         * XXX: This will drop most control packets.  Do we really
         * want this in IEEE80211_M_MONITOR mode?
         */
//        if (__predict_false(pktlen < (int)sizeof(*wh))) {
        if (__predict_false(pktlen < (int)sizeof(struct ieee80211_frame_ack))) {
                DPRINTFN(DBG_RX, "packet too short %jd", pktlen, 0, 0, 0);
                ic->ic_stats.is_rx_tooshort++;
                if_statinc(ifp, if_ierrors);
                return;
        }
        if (__predict_false(pktlen > MCLBYTES)) {
                DPRINTFN(DBG_RX, "packet too big %jd", pktlen, 0, 0, 0);
                if_statinc(ifp, if_ierrors);
                return;
        }

        rate = MS(rxdw3, R92C_RXDW3_RATE);
        infosz = MS(rxdw0, R92C_RXDW0_INFOSZ) * 8;

        /* Get RSSI from PHY status descriptor if present. */
        if (infosz != 0 && (rxdw0 & R92C_RXDW0_PHYST)) {
                if (!ISSET(sc->chip, URTWN_CHIP_92C))
                        rssi = urtwn_r88e_get_rssi(sc, rate, &stat[1]);
                else
                        rssi = urtwn_get_rssi(sc, rate, &stat[1]);
                /* Update our average RSSI. */
                urtwn_update_avgrssi(sc, rate, rssi);
        }

        DPRINTFN(DBG_RX, "Rx frame len=%jd rate=%jd infosz=%jd rssi=%jd",
            pktlen, rate, infosz, rssi);

        MGETHDR(m, M_DONTWAIT, MT_DATA);
        if (__predict_false(m == NULL)) {
                aprint_error_dev(sc->sc_dev, "couldn't allocate rx mbuf\n");
                ic->ic_stats.is_rx_nobuf++;
                if_statinc(ifp, if_ierrors);
                return;
        }
        if (pktlen > (int)MHLEN) {
                MCLGET(m, M_DONTWAIT);
                if (__predict_false(!(m->m_flags & M_EXT))) {
                        aprint_error_dev(sc->sc_dev,
                            "couldn't allocate rx mbuf cluster\n");
                        m_freem(m);
                        ic->ic_stats.is_rx_nobuf++;
                        if_statinc(ifp, if_ierrors);
                        return;
                }
        }

        /* Finalize mbuf. */
        m_set_rcvif(m, ifp);
        wh = (struct ieee80211_frame *)((uint8_t *)&stat[1] + infosz);
        memcpy(mtod(m, uint8_t *), wh, pktlen);
        m->m_pkthdr.len = m->m_len = pktlen;

        s = splnet();
        if (__predict_false(sc->sc_drvbpf != NULL)) {
                struct urtwn_rx_radiotap_header *tap = &sc->sc_rxtap;

                tap->wr_flags = 0;
                if (!(rxdw3 & R92C_RXDW3_HT)) {
                        switch (rate) {
                        /* CCK. */
                        case  0: tap->wr_rate =   2; break;
                        case  1: tap->wr_rate =   4; break;
                        case  2: tap->wr_rate =  11; break;
                        case  3: tap->wr_rate =  22; break;
                        /* OFDM. */
                        case  4: tap->wr_rate =  12; break;
                        case  5: tap->wr_rate =  18; break;
                        case  6: tap->wr_rate =  24; break;
                        case  7: tap->wr_rate =  36; break;
                        case  8: tap->wr_rate =  48; break;
                        case  9: tap->wr_rate =  72; break;
                        case 10: tap->wr_rate =  96; break;
                        case 11: tap->wr_rate = 108; break;
                        }
                } else if (rate >= 12) {        /* MCS0~15. */
                        /* Bit 7 set means HT MCS instead of rate. */
                        tap->wr_rate = 0x80 | (rate - 12);
                }
                tap->wr_dbm_antsignal = rssi;
                tap->wr_chan_freq = htole16(ic->ic_curchan->ic_freq);
                tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags);

                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_rxtap_len, m, BPF_D_IN);
        }

        ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh);

        /* push the frame up to the 802.11 stack */
        ieee80211_input(ic, m, ni, rssi, 0);

        /* Node is no longer needed. */
        ieee80211_free_node(ni);

        splx(s);
}

static void
urtwn_rxeof(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct urtwn_rx_data *data = priv;
        struct urtwn_softc *sc = data->sc;
        struct r92c_rx_desc_usb *stat;
        size_t pidx = data->pidx;
        uint32_t rxdw0;
        uint8_t *buf;
        int len, totlen, pktlen, infosz, npkts;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();
        DPRINTFN(DBG_RX, "status=%jd", status, 0, 0, 0);

        mutex_enter(&sc->sc_rx_mtx);
        TAILQ_REMOVE(&sc->rx_free_list[pidx], data, next);
        TAILQ_INSERT_TAIL(&sc->rx_free_list[pidx], data, next);
        /* Put this Rx buffer back to our free list. */
        mutex_exit(&sc->sc_rx_mtx);

        if (__predict_false(status != USBD_NORMAL_COMPLETION)) {
                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->rx_pipe[pidx]);
                else if (status != USBD_CANCELLED)
                        goto resubmit;
                return;
        }
        usbd_get_xfer_status(xfer, NULL, NULL, &len, NULL);

        if (__predict_false(len < (int)sizeof(*stat))) {
                DPRINTFN(DBG_RX, "xfer too short %jd", len, 0, 0, 0);
                goto resubmit;
        }
        buf = data->buf;

        /* Get the number of encapsulated frames. */
        stat = (struct r92c_rx_desc_usb *)buf;
        if (ISSET(sc->chip, URTWN_CHIP_92EU))
                npkts = MS(le32toh(stat->rxdw2), R92E_RXDW2_PKTCNT);
        else
                npkts = MS(le32toh(stat->rxdw2), R92C_RXDW2_PKTCNT);
        DPRINTFN(DBG_RX, "Rx %jd frames in one chunk", npkts, 0, 0, 0);

        if (npkts != 0)
                rnd_add_uint32(&sc->rnd_source, npkts);

        /* Process all of them. */
        while (npkts-- > 0) {
                if (__predict_false(len < (int)sizeof(*stat))) {
                        DPRINTFN(DBG_RX, "len(%jd) is short than header",
                            len, 0, 0, 0);
                        break;
                }
                stat = (struct r92c_rx_desc_usb *)buf;
                rxdw0 = le32toh(stat->rxdw0);

                pktlen = MS(rxdw0, R92C_RXDW0_PKTLEN);
                if (__predict_false(pktlen == 0)) {
                        DPRINTFN(DBG_RX, "pktlen is 0 byte", 0, 0, 0, 0);
                        break;
                }

                infosz = MS(rxdw0, R92C_RXDW0_INFOSZ) * 8;

                /* Make sure everything fits in xfer. */
                totlen = sizeof(*stat) + infosz + pktlen;
                if (__predict_false(totlen > len)) {
                        DPRINTFN(DBG_RX, "pktlen (%jd+%jd+%jd) > %jd",
                            (int)sizeof(*stat), infosz, pktlen, len);
                        break;
                }

                /* Process 802.11 frame. */
                urtwn_rx_frame(sc, buf, pktlen);

                /* Next chunk is 128-byte aligned. */
                totlen = roundup2(totlen, 128);
                buf += totlen;
                len -= totlen;
        }

 resubmit:
        /* Setup a new transfer. */
        usbd_setup_xfer(xfer, data, data->buf, URTWN_RXBUFSZ,
            USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, urtwn_rxeof);
        (void)usbd_transfer(xfer);
}

static void
urtwn_put_tx_data(struct urtwn_softc *sc, struct urtwn_tx_data *data)
{
        size_t pidx = data->pidx;

        mutex_enter(&sc->sc_tx_mtx);
        /* Put this Tx buffer back to our free list. */
        TAILQ_INSERT_TAIL(&sc->tx_free_list[pidx], data, next);
        mutex_exit(&sc->sc_tx_mtx);
}

static void
urtwn_txeof(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct urtwn_tx_data *data = priv;
        struct urtwn_softc *sc = data->sc;
        struct ifnet *ifp = &sc->sc_if;
        size_t pidx = data->pidx;
        int s;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();
        DPRINTFN(DBG_TX, "status=%jd", status, 0, 0, 0);

        urtwn_put_tx_data(sc, data);

        s = splnet();
        sc->tx_timer = 0;
        ifp->if_flags &= ~IFF_OACTIVE;

        if (__predict_false(status != USBD_NORMAL_COMPLETION)) {
                if (status != USBD_NOT_STARTED && status != USBD_CANCELLED) {
                        if (status == USBD_STALLED) {
                                struct usbd_pipe *pipe = sc->tx_pipe[pidx];
                                usbd_clear_endpoint_stall_async(pipe);
                        }
                        device_printf(sc->sc_dev, "transmit failed, %s\n",
                                      usbd_errstr(status));
                        if_statinc(ifp, if_oerrors);
                }
                splx(s);
                return;
        }

        if_statinc(ifp, if_opackets);
        urtwn_start(ifp);
        splx(s);

}

static int
urtwn_tx(struct urtwn_softc *sc, struct mbuf *m, struct ieee80211_node *ni,
    struct urtwn_tx_data *data)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_frame *wh;
        struct ieee80211_key *k = NULL;
        struct r92c_tx_desc_usb *txd;
        size_t i, padsize, xferlen, txd_len;
        uint16_t seq, sum;
        uint8_t raid, type, tid;
        int s, hasqos, error;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        wh = mtod(m, struct ieee80211_frame *);
        type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
        txd_len = sizeof(*txd);

        if (!ISSET(sc->chip, URTWN_CHIP_92EU))
                txd_len = 32;

        if (wh->i_fc[1] & IEEE80211_FC1_WEP) {
                k = ieee80211_crypto_encap(ic, ni, m);
                if (k == NULL) {
                        urtwn_put_tx_data(sc, data);
                        m_free(m);
                        return ENOBUFS;
                }

                /* packet header may have moved, reset our local pointer */
                wh = mtod(m, struct ieee80211_frame *);
        }

        if (__predict_false(sc->sc_drvbpf != NULL)) {
                struct urtwn_tx_radiotap_header *tap = &sc->sc_txtap;

                tap->wt_flags = 0;
                tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq);
                tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags);
                if (wh->i_fc[1] & IEEE80211_FC1_WEP)
                        tap->wt_flags |= IEEE80211_RADIOTAP_F_WEP;

                /* XXX: set tap->wt_rate? */

                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_txtap_len, m, BPF_D_OUT);
        }

        /* non-qos data frames */
        tid = R92C_TXDW1_QSEL_BE;
        if ((hasqos = ieee80211_has_qos(wh))) {
                /* data frames in 11n mode */
                struct ieee80211_qosframe *qwh = (void *)wh;
                tid = qwh->i_qos[0] & IEEE80211_QOS_TID;
        } else if (type != IEEE80211_FC0_TYPE_DATA) {
                tid = R92C_TXDW1_QSEL_MGNT;
        }

        if (((txd_len + m->m_pkthdr.len) % 64) == 0) /* XXX: 64 */
                padsize = 8;
        else
                padsize = 0;

        if (ISSET(sc->chip, URTWN_CHIP_92EU))
                padsize = 0;

        /* Fill Tx descriptor. */
        txd = (struct r92c_tx_desc_usb *)data->buf;
        memset(txd, 0, txd_len + padsize);

        txd->txdw0 |= htole32(
            SM(R92C_TXDW0_PKTLEN, m->m_pkthdr.len) |
            SM(R92C_TXDW0_OFFSET, txd_len));
        if (!ISSET(sc->chip, URTWN_CHIP_92EU)) {
                txd->txdw0 |= htole32(
                    R92C_TXDW0_OWN | R92C_TXDW0_FSG | R92C_TXDW0_LSG);
        }

        if (IEEE80211_IS_MULTICAST(wh->i_addr1))
                txd->txdw0 |= htole32(R92C_TXDW0_BMCAST);

        /* fix pad field */
        if (padsize > 0) {
                DPRINTFN(DBG_TX, "padding: size=%jd", padsize, 0, 0, 0);
                txd->txdw1 |= htole32(SM(R92C_TXDW1_PKTOFF, (padsize / 8)));
        }

        if (!IEEE80211_IS_MULTICAST(wh->i_addr1) &&
            type == IEEE80211_FC0_TYPE_DATA) {
                if (ic->ic_curmode == IEEE80211_MODE_11B)
                        raid = R92C_RAID_11B;
                else
                        raid = R92C_RAID_11BG;
                DPRINTFN(DBG_TX, "data packet: tid=%jd, raid=%jd",
                    tid, raid, 0, 0);

                if (!ISSET(sc->chip, URTWN_CHIP_92C)) {
                        txd->txdw1 |= htole32(
                            SM(R88E_TXDW1_MACID, RTWN_MACID_BSS) |
                            SM(R92C_TXDW1_QSEL, tid) |
                            SM(R92C_TXDW1_RAID, raid) |
                            R92C_TXDW1_AGGBK);
                } else
                        txd->txdw1 |= htole32(
                            SM(R92C_TXDW1_MACID, RTWN_MACID_BSS) |
                            SM(R92C_TXDW1_QSEL, tid) |
                            SM(R92C_TXDW1_RAID, raid) |
                            R92C_TXDW1_AGGBK);

                if (ISSET(sc->chip, URTWN_CHIP_88E))
                        txd->txdw2 |= htole32(R88E_TXDW2_AGGBK);
                if (ISSET(sc->chip, URTWN_CHIP_92EU))
                        txd->txdw3 |= htole32(R92E_TXDW3_AGGBK);

                if (hasqos) {
                        txd->txdw4 |= htole32(R92C_TXDW4_QOS);
                }

                if (ic->ic_flags & IEEE80211_F_USEPROT) {
                        /* for 11g */
                        if (ic->ic_protmode == IEEE80211_PROT_CTSONLY) {
                                txd->txdw4 |= htole32(R92C_TXDW4_CTS2SELF |
                                    R92C_TXDW4_HWRTSEN);
                        } else if (ic->ic_protmode == IEEE80211_PROT_RTSCTS) {
                                txd->txdw4 |= htole32(R92C_TXDW4_RTSEN |
                                    R92C_TXDW4_HWRTSEN);
                        }
                }
                /* Send RTS at OFDM24. */
                txd->txdw4 |= htole32(SM(R92C_TXDW4_RTSRATE, 8));
                txd->txdw5 |= htole32(0x0001ff00);
                /* Send data at OFDM54. */
                if (ISSET(sc->chip, URTWN_CHIP_88E))
                        txd->txdw5 |= htole32(0x13 & 0x3f);
                else
                        txd->txdw5 |= htole32(SM(R92C_TXDW5_DATARATE, 11));
        } else if (type == IEEE80211_FC0_TYPE_MGT) {
                DPRINTFN(DBG_TX, "mgmt packet", 0, 0, 0, 0);
                txd->txdw1 |= htole32(
                    SM(R92C_TXDW1_MACID, RTWN_MACID_BSS) |
                    SM(R92C_TXDW1_QSEL, R92C_TXDW1_QSEL_MGNT) |
                    SM(R92C_TXDW1_RAID, R92C_RAID_11B));

                /* Force CCK1. */
                txd->txdw4 |= htole32(R92C_TXDW4_DRVRATE);
                /* Use 1Mbps */
                txd->txdw5 |= htole32(SM(R92C_TXDW5_DATARATE, 0));
        } else {
                /* broadcast or multicast packets */
                DPRINTFN(DBG_TX, "bc or mc packet", 0, 0, 0, 0);
                txd->txdw1 |= htole32(
                    SM(R92C_TXDW1_MACID, RTWN_MACID_BC) |
                    SM(R92C_TXDW1_RAID, R92C_RAID_11B));

                /* Force CCK1. */
                txd->txdw4 |= htole32(R92C_TXDW4_DRVRATE);
                /* Use 1Mbps */
                txd->txdw5 |= htole32(SM(R92C_TXDW5_DATARATE, 0));
        }
        /* Set sequence number */
        seq = LE_READ_2(&wh->i_seq[0]) >> IEEE80211_SEQ_SEQ_SHIFT;
        if (!ISSET(sc->chip, URTWN_CHIP_92EU)) {
                txd->txdseq |= htole16(seq);

                if (!hasqos) {
                        /* Use HW sequence numbering for non-QoS frames. */
                        txd->txdw4  |= htole32(R92C_TXDW4_HWSEQ);
                        txd->txdseq |= htole16(R92C_HWSEQ_EN);
                }
        } else {
                txd->txdseq2 |= htole16((seq & R92E_HWSEQ_MASK) <<
                    R92E_HWSEQ_SHIFT);
                if (!hasqos) {
                        /* Use HW sequence numbering for non-QoS frames. */
                        txd->txdw4  |= htole32(R92C_TXDW4_HWSEQ);
                        txd->txdw7 |= htole16(R92C_HWSEQ_EN);
                }
        }

        /* Compute Tx descriptor checksum. */
        sum = 0;
        for (i = 0; i < R92C_TXDESC_SUMSIZE / 2; i++)
                sum ^= ((uint16_t *)txd)[i];
        txd->txdsum = sum;        /* NB: already little endian. */

        xferlen = txd_len + m->m_pkthdr.len + padsize;
        m_copydata(m, 0, m->m_pkthdr.len, (char *)&txd[0] + txd_len + padsize);

        s = splnet();
        usbd_setup_xfer(data->xfer, data, data->buf, xferlen,
            USBD_FORCE_SHORT_XFER, URTWN_TX_TIMEOUT,
            urtwn_txeof);
        error = usbd_transfer(data->xfer);
        if (__predict_false(error != USBD_NORMAL_COMPLETION &&
            error != USBD_IN_PROGRESS)) {
                splx(s);
                DPRINTFN(DBG_TX, "transfer failed %jd", error, 0, 0, 0);
                return error;
        }
        splx(s);
        return 0;
}

struct urtwn_tx_data *
urtwn_get_tx_data(struct urtwn_softc *sc, size_t pidx)
{
        struct urtwn_tx_data *data = NULL;

        mutex_enter(&sc->sc_tx_mtx);
        if (!TAILQ_EMPTY(&sc->tx_free_list[pidx])) {
                data = TAILQ_FIRST(&sc->tx_free_list[pidx]);
                TAILQ_REMOVE(&sc->tx_free_list[pidx], data, next);
        }
        mutex_exit(&sc->sc_tx_mtx);

        return data;
}

static void
urtwn_start(struct ifnet *ifp)
{
        struct urtwn_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct urtwn_tx_data *data;
        struct ether_header *eh;
        struct ieee80211_node *ni;
        struct mbuf *m;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
                return;

        data = NULL;
        for (;;) {
                /* Send pending management frames first. */
                IF_POLL(&ic->ic_mgtq, m);
                if (m != NULL) {
                        /* Use AC_VO for management frames. */

                        data = urtwn_get_tx_data(sc, sc->ac2idx[WME_AC_VO]);

                        if (data == NULL) {
                                ifp->if_flags |= IFF_OACTIVE;
                                DPRINTFN(DBG_TX, "empty tx_free_list",
                                    0, 0, 0, 0);
                                return;
                        }
                        IF_DEQUEUE(&ic->ic_mgtq, m);
                        ni = M_GETCTX(m, struct ieee80211_node *);
                        M_CLEARCTX(m);
                        goto sendit;
                }
                if (ic->ic_state != IEEE80211_S_RUN)
                        break;

                /* Encapsulate and send data frames. */
                IFQ_POLL(&ifp->if_snd, m);
                if (m == NULL)
                        break;

                struct ieee80211_frame *wh = mtod(m, struct ieee80211_frame *);
                uint8_t type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
                uint8_t qid = WME_AC_BE;
                if (ieee80211_has_qos(wh)) {
                        /* data frames in 11n mode */
                        struct ieee80211_qosframe *qwh = (void *)wh;
                        uint8_t tid = qwh->i_qos[0] & IEEE80211_QOS_TID;
                        qid = TID_TO_WME_AC(tid);
                } else if (type != IEEE80211_FC0_TYPE_DATA) {
                        qid = WME_AC_VO;
                }
                data = urtwn_get_tx_data(sc, sc->ac2idx[qid]);

                if (data == NULL) {
                        ifp->if_flags |= IFF_OACTIVE;
                        DPRINTFN(DBG_TX, "empty tx_free_list", 0, 0, 0, 0);
                        return;
                }
                IFQ_DEQUEUE(&ifp->if_snd, m);

                if (m->m_len < (int)sizeof(*eh) &&
                    (m = m_pullup(m, sizeof(*eh))) == NULL) {
                        device_printf(sc->sc_dev, "m_pullup failed\n");
                        if_statinc(ifp, if_oerrors);
                        urtwn_put_tx_data(sc, data);
                        m_freem(m);
                        continue;
                }
                eh = mtod(m, struct ether_header *);
                ni = ieee80211_find_txnode(ic, eh->ether_dhost);
                if (ni == NULL) {
                        device_printf(sc->sc_dev,
                            "unable to find transmit node\n");
                        if_statinc(ifp, if_oerrors);
                        urtwn_put_tx_data(sc, data);
                        m_freem(m);
                        continue;
                }

                bpf_mtap(ifp, m, BPF_D_OUT);

                if ((m = ieee80211_encap(ic, m, ni)) == NULL) {
                        ieee80211_free_node(ni);
                        device_printf(sc->sc_dev,
                            "unable to encapsulate packet\n");
                        if_statinc(ifp, if_oerrors);
                        urtwn_put_tx_data(sc, data);
                        m_freem(m);
                        continue;
                }
 sendit:
                bpf_mtap3(ic->ic_rawbpf, m, BPF_D_OUT);

                if (urtwn_tx(sc, m, ni, data) != 0) {
                        m_freem(m);
                        ieee80211_free_node(ni);
                        device_printf(sc->sc_dev,
                            "unable to transmit packet\n");
                        if_statinc(ifp, if_oerrors);
                        continue;
                }
                m_freem(m);
                ieee80211_free_node(ni);
                sc->tx_timer = 5;
                ifp->if_timer = 1;
        }
}

static void
urtwn_watchdog(struct ifnet *ifp)
{
        struct urtwn_softc *sc = ifp->if_softc;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        ifp->if_timer = 0;

        if (sc->tx_timer > 0) {
                if (--sc->tx_timer == 0) {
                        device_printf(sc->sc_dev, "device timeout\n");
                        /* urtwn_init(ifp); XXX needs a process context! */
                        if_statinc(ifp, if_oerrors);
                        return;
                }
                ifp->if_timer = 1;
        }
        ieee80211_watchdog(&sc->sc_ic);
}

static int
urtwn_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
        struct urtwn_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        int s, error = 0;

        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("cmd=0x%08jx, data=%#jx", cmd, (uintptr_t)data,
            0, 0);

        s = splnet();

        switch (cmd) {
        case SIOCSIFFLAGS:
                if ((error = ifioctl_common(ifp, cmd, data)) != 0)
                        break;
                switch (ifp->if_flags & (IFF_UP | IFF_RUNNING)) {
                case IFF_UP | IFF_RUNNING:
                        break;
                case IFF_UP:
                        urtwn_init(ifp);
                        break;
                case IFF_RUNNING:
                        urtwn_stop(ifp, 1);
                        break;
                case 0:
                        break;
                }
                break;

        case SIOCADDMULTI:
        case SIOCDELMULTI:
                if ((error = ether_ioctl(ifp, cmd, data)) == ENETRESET) {
                        /* setup multicast filter, etc */
                        error = 0;
                }
                break;

        case SIOCS80211CHANNEL:
                /*
                 * This allows for fast channel switching in monitor mode
                 * (used by kismet). In IBSS mode, we must explicitly reset
                 * the interface to generate a new beacon frame.
                 */
                error = ieee80211_ioctl(ic, cmd, data);
                if (error == ENETRESET &&
                    ic->ic_opmode == IEEE80211_M_MONITOR) {
                        urtwn_set_chan(sc, ic->ic_curchan,
                            IEEE80211_HTINFO_2NDCHAN_NONE);
                        error = 0;
                }
                break;

        default:
                error = ieee80211_ioctl(ic, cmd, data);
                break;
        }
        if (error == ENETRESET) {
                if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) ==
                    (IFF_UP | IFF_RUNNING) &&
                    ic->ic_roaming != IEEE80211_ROAMING_MANUAL) {
                        urtwn_init(ifp);
                }
                error = 0;
        }

        splx(s);

        return error;
}

static __inline int
urtwn_power_on(struct urtwn_softc *sc)
{

        return sc->sc_power_on(sc);
}

static int
urtwn_r92c_power_on(struct urtwn_softc *sc)
{
        uint32_t reg;
        int ntries;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        /* Wait for autoload done bit. */
        for (ntries = 0; ntries < 1000; ntries++) {
                if (urtwn_read_1(sc, R92C_APS_FSMCO) & R92C_APS_FSMCO_PFM_ALDN)
                        break;
                DELAY(5);
        }
        if (ntries == 1000) {
                aprint_error_dev(sc->sc_dev,
                    "timeout waiting for chip autoload\n");
                return ETIMEDOUT;
        }

        /* Unlock ISO/CLK/Power control register. */
        urtwn_write_1(sc, R92C_RSV_CTRL, 0);
        DELAY(5);
        /* Move SPS into PWM mode. */
        urtwn_write_1(sc, R92C_SPS0_CTRL, 0x2b);
        DELAY(5);

        reg = urtwn_read_1(sc, R92C_LDOV12D_CTRL);
        if (!(reg & R92C_LDOV12D_CTRL_LDV12_EN)) {
                urtwn_write_1(sc, R92C_LDOV12D_CTRL,
                    reg | R92C_LDOV12D_CTRL_LDV12_EN);
                DELAY(100);
                urtwn_write_1(sc, R92C_SYS_ISO_CTRL,
                    urtwn_read_1(sc, R92C_SYS_ISO_CTRL) &
                    ~R92C_SYS_ISO_CTRL_MD2PP);
        }

        /* Auto enable WLAN. */
        urtwn_write_2(sc, R92C_APS_FSMCO,
            urtwn_read_2(sc, R92C_APS_FSMCO) | R92C_APS_FSMCO_APFM_ONMAC);
        for (ntries = 0; ntries < 1000; ntries++) {
                if (!(urtwn_read_2(sc, R92C_APS_FSMCO) &
                    R92C_APS_FSMCO_APFM_ONMAC))
                        break;
                DELAY(100);
        }
        if (ntries == 1000) {
                aprint_error_dev(sc->sc_dev,
                    "timeout waiting for MAC auto ON\n");
                return ETIMEDOUT;
        }

        /* Enable radio, GPIO and LED functions. */
        KASSERT((R92C_APS_FSMCO_AFSM_HSUS | R92C_APS_FSMCO_PDN_EN |
            R92C_APS_FSMCO_PFM_ALDN) == 0x0812);
        urtwn_write_2(sc, R92C_APS_FSMCO,
            R92C_APS_FSMCO_AFSM_HSUS |
            R92C_APS_FSMCO_PDN_EN |
            R92C_APS_FSMCO_PFM_ALDN);

        /* Release RF digital isolation. */
        urtwn_write_2(sc, R92C_SYS_ISO_CTRL,
            urtwn_read_2(sc, R92C_SYS_ISO_CTRL) & ~R92C_SYS_ISO_CTRL_DIOR);

        /* Initialize MAC. */
        urtwn_write_1(sc, R92C_APSD_CTRL,
            urtwn_read_1(sc, R92C_APSD_CTRL) & ~R92C_APSD_CTRL_OFF);
        for (ntries = 0; ntries < 200; ntries++) {
                if (!(urtwn_read_1(sc, R92C_APSD_CTRL) &
                    R92C_APSD_CTRL_OFF_STATUS))
                        break;
                DELAY(5);
        }
        if (ntries == 200) {
                aprint_error_dev(sc->sc_dev,
                    "timeout waiting for MAC initialization\n");
                return ETIMEDOUT;
        }

        /* Enable MAC DMA/WMAC/SCHEDULE/SEC blocks. */
        reg = urtwn_read_2(sc, R92C_CR);
        reg |= R92C_CR_HCI_TXDMA_EN | R92C_CR_HCI_RXDMA_EN |
            R92C_CR_TXDMA_EN | R92C_CR_RXDMA_EN | R92C_CR_PROTOCOL_EN |
            R92C_CR_SCHEDULE_EN | R92C_CR_MACTXEN | R92C_CR_MACRXEN |
            R92C_CR_ENSEC;
        urtwn_write_2(sc, R92C_CR, reg);

        urtwn_write_1(sc, 0xfe10, 0x19);

        urtwn_delay_ms(sc, 1);

        return 0;
}

static int
urtwn_r92e_power_on(struct urtwn_softc *sc)
{
        uint32_t reg;
        uint32_t val;
        int ntries;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        /* Enable radio, GPIO and LED functions. */
        KASSERT((R92C_APS_FSMCO_AFSM_HSUS | R92C_APS_FSMCO_PDN_EN |
            R92C_APS_FSMCO_PFM_ALDN) == 0x0812);
        urtwn_write_2(sc, R92C_APS_FSMCO,
            R92C_APS_FSMCO_AFSM_HSUS |
            R92C_APS_FSMCO_PDN_EN |
            R92C_APS_FSMCO_PFM_ALDN);

        if (urtwn_read_4(sc, R92E_SYS_CFG1_8192E) & R92E_SPSLDO_SEL){
                /* LDO. */
                urtwn_write_1(sc, R92E_LDO_SWR_CTRL, 0xc3);
        }
        else        {
                urtwn_write_2(sc, R92C_SYS_SWR_CTRL2, urtwn_read_2(sc,
                    R92C_SYS_SWR_CTRL2) & 0xffff);
                urtwn_write_1(sc, R92E_LDO_SWR_CTRL, 0x83);
        }

        for (ntries = 0; ntries < 2; ntries++) {
                urtwn_write_1(sc, R92C_AFE_PLL_CTRL,
                    urtwn_read_1(sc, R92C_AFE_PLL_CTRL));
                urtwn_write_2(sc, R92C_AFE_CTRL4, urtwn_read_2(sc,
                    R92C_AFE_CTRL4));
        }

        /* Reset BB. */
        urtwn_write_1(sc, R92C_SYS_FUNC_EN,
        urtwn_read_1(sc, R92C_SYS_FUNC_EN) & ~(R92C_SYS_FUNC_EN_BBRSTB |
            R92C_SYS_FUNC_EN_BB_GLB_RST));

        urtwn_write_1(sc, R92C_AFE_XTAL_CTRL + 2, urtwn_read_1(sc,
            R92C_AFE_XTAL_CTRL + 2) | 0x80);

        /* Disable HWPDN. */
        urtwn_write_2(sc, R92C_APS_FSMCO, urtwn_read_2(sc,
            R92C_APS_FSMCO) & ~R92C_APS_FSMCO_APDM_HPDN);

        /* Disable WL suspend. */
        urtwn_write_2(sc, R92C_APS_FSMCO, urtwn_read_2(sc,
            R92C_APS_FSMCO) & ~(R92C_APS_FSMCO_AFSM_PCIE |
            R92C_APS_FSMCO_AFSM_HSUS));

        urtwn_write_4(sc, R92C_APS_FSMCO, urtwn_read_4(sc,
            R92C_APS_FSMCO) | R92C_APS_FSMCO_RDY_MACON);
        urtwn_write_2(sc, R92C_APS_FSMCO, urtwn_read_2(sc,
            R92C_APS_FSMCO) | R92C_APS_FSMCO_APFM_ONMAC);
        for (ntries = 0; ntries < 10000; ntries++) {
                val = urtwn_read_2(sc, R92C_APS_FSMCO) &
                 R92C_APS_FSMCO_APFM_ONMAC;
                if (val == 0x0)
                        break;
                DELAY(10);
        }
        if (ntries == 10000) {
                aprint_error_dev(sc->sc_dev,
                    "timeout waiting for chip power up\n");
                return ETIMEDOUT;
        }

        urtwn_write_2(sc, R92C_CR, 0x00);
        reg = urtwn_read_2(sc, R92C_CR);
        reg |= R92C_CR_HCI_TXDMA_EN | R92C_CR_HCI_RXDMA_EN |
            R92C_CR_TXDMA_EN | R92C_CR_RXDMA_EN | R92C_CR_PROTOCOL_EN |
            R92C_CR_SCHEDULE_EN | R92C_CR_ENSEC;
        urtwn_write_2(sc, R92C_CR, reg);

        return 0;
}

static int
urtwn_r88e_power_on(struct urtwn_softc *sc)
{
        uint32_t reg;
        uint8_t val;
        int ntries;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        /* Wait for power ready bit. */
        for (ntries = 0; ntries < 5000; ntries++) {
                val = urtwn_read_1(sc, 0x6) & 0x2;
                if (val == 0x2)
                        break;
                DELAY(10);
        }
        if (ntries == 5000) {
                aprint_error_dev(sc->sc_dev,
                    "timeout waiting for chip power up\n");
                return ETIMEDOUT;
        }

        /* Reset BB. */
        urtwn_write_1(sc, R92C_SYS_FUNC_EN,
        urtwn_read_1(sc, R92C_SYS_FUNC_EN) & ~(R92C_SYS_FUNC_EN_BBRSTB |
            R92C_SYS_FUNC_EN_BB_GLB_RST));

        urtwn_write_1(sc, 0x26, urtwn_read_1(sc, 0x26) | 0x80);

        /* Disable HWPDN. */
        urtwn_write_1(sc, 0x5, urtwn_read_1(sc, 0x5) & ~0x80);

        /* Disable WL suspend. */
        urtwn_write_1(sc, 0x5, urtwn_read_1(sc, 0x5) & ~0x18);

        urtwn_write_1(sc, 0x5, urtwn_read_1(sc, 0x5) | 0x1);
        for (ntries = 0; ntries < 5000; ntries++) {
                if (!(urtwn_read_1(sc, 0x5) & 0x1))
                        break;
                DELAY(10);
        }
        if (ntries == 5000)
                return ETIMEDOUT;

        /* Enable LDO normal mode. */
        urtwn_write_1(sc, 0x23, urtwn_read_1(sc, 0x23) & ~0x10);

        /* Enable MAC DMA/WMAC/SCHEDULE/SEC blocks. */
        urtwn_write_2(sc, R92C_CR, 0);
        reg = urtwn_read_2(sc, R92C_CR);
        reg |= R92C_CR_HCI_TXDMA_EN | R92C_CR_HCI_RXDMA_EN |
            R92C_CR_TXDMA_EN | R92C_CR_RXDMA_EN | R92C_CR_PROTOCOL_EN |
            R92C_CR_SCHEDULE_EN | R92C_CR_ENSEC | R92C_CR_CALTMR_EN;
        urtwn_write_2(sc, R92C_CR, reg);

        return 0;
}

static int __noinline
urtwn_llt_init(struct urtwn_softc *sc)
{
        size_t i, page_count, pktbuf_count;
        uint32_t val;
        int error;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        if (sc->chip & URTWN_CHIP_88E)
                page_count = R88E_TX_PAGE_COUNT;
        else if (sc->chip & URTWN_CHIP_92EU)
                page_count = R92E_TX_PAGE_COUNT;
        else
                page_count = R92C_TX_PAGE_COUNT;
        if (sc->chip & URTWN_CHIP_88E)
                pktbuf_count = R88E_TXPKTBUF_COUNT;
        else if (sc->chip & URTWN_CHIP_92EU)
                pktbuf_count = R88E_TXPKTBUF_COUNT;
        else
                pktbuf_count = R92C_TXPKTBUF_COUNT;

        if (sc->chip & URTWN_CHIP_92EU) {
                val = urtwn_read_4(sc, R92E_AUTO_LLT) | R92E_AUTO_LLT_EN;
                urtwn_write_4(sc, R92E_AUTO_LLT, val);
                DELAY(100);
                val = urtwn_read_4(sc, R92E_AUTO_LLT);
                if (val & R92E_AUTO_LLT_EN)
                        return EIO;
                return 0;
        }

        /* Reserve pages [0; page_count]. */
        for (i = 0; i < page_count; i++) {
                if ((error = urtwn_llt_write(sc, i, i + 1)) != 0)
                        return error;
        }
        /* NB: 0xff indicates end-of-list. */
        if ((error = urtwn_llt_write(sc, i, 0xff)) != 0)
                return error;
        /*
         * Use pages [page_count + 1; pktbuf_count - 1]
         * as ring buffer.
         */
        for (++i; i < pktbuf_count - 1; i++) {
                if ((error = urtwn_llt_write(sc, i, i + 1)) != 0)
                        return error;
        }
        /* Make the last page point to the beginning of the ring buffer. */
        error = urtwn_llt_write(sc, i, pktbuf_count + 1);
        return error;
}

static void
urtwn_fw_reset(struct urtwn_softc *sc)
{
        uint16_t reg;
        int ntries;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        /* Tell 8051 to reset itself. */
        mutex_enter(&sc->sc_fwcmd_mtx);
        urtwn_write_1(sc, R92C_HMETFR + 3, 0x20);
        sc->fwcur = 0;
        mutex_exit(&sc->sc_fwcmd_mtx);

        /* Wait until 8051 resets by itself. */
        for (ntries = 0; ntries < 100; ntries++) {
                reg = urtwn_read_2(sc, R92C_SYS_FUNC_EN);
                if (!(reg & R92C_SYS_FUNC_EN_CPUEN))
                        return;
                DELAY(50);
        }
        /* Force 8051 reset. */
        urtwn_write_2(sc, R92C_SYS_FUNC_EN,
            urtwn_read_2(sc, R92C_SYS_FUNC_EN) & ~R92C_SYS_FUNC_EN_CPUEN);
}

static void
urtwn_r88e_fw_reset(struct urtwn_softc *sc)
{
        uint16_t reg;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        if (ISSET(sc->chip, URTWN_CHIP_92EU)) {
                reg = urtwn_read_2(sc, R92C_RSV_CTRL) & ~R92E_RSV_MIO_EN;
                urtwn_write_2(sc,R92C_RSV_CTRL, reg);
        }
        DELAY(50);

        reg = urtwn_read_2(sc, R92C_SYS_FUNC_EN);
        urtwn_write_2(sc, R92C_SYS_FUNC_EN, reg & ~R92C_SYS_FUNC_EN_CPUEN);
        DELAY(50);

        urtwn_write_2(sc, R92C_SYS_FUNC_EN, reg | R92C_SYS_FUNC_EN_CPUEN);
        DELAY(50);

        if (ISSET(sc->chip, URTWN_CHIP_92EU)) {
                reg = urtwn_read_2(sc, R92C_RSV_CTRL) | R92E_RSV_MIO_EN;
                urtwn_write_2(sc,R92C_RSV_CTRL, reg);
        }
        DELAY(50);

        mutex_enter(&sc->sc_fwcmd_mtx);
        /* Init firmware commands ring. */
        sc->fwcur = 0;
        mutex_exit(&sc->sc_fwcmd_mtx);

}

static int
urtwn_fw_loadpage(struct urtwn_softc *sc, int page, uint8_t *buf, int len)
{
        uint32_t reg;
        int off, mlen, error = 0;

        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("page=%jd, buf=%#jx, len=%jd",
            page, (uintptr_t)buf, len, 0);

        reg = urtwn_read_4(sc, R92C_MCUFWDL);
        reg = RW(reg, R92C_MCUFWDL_PAGE, page);
        urtwn_write_4(sc, R92C_MCUFWDL, reg);

        off = R92C_FW_START_ADDR;
        while (len > 0) {
                if (len > 196)
                        mlen = 196;
                else if (len > 4)
                        mlen = 4;
                else
                        mlen = 1;
                error = urtwn_write_region(sc, off, buf, mlen);
                if (error != 0)
                        break;
                off += mlen;
                buf += mlen;
                len -= mlen;
        }
        return error;
}

static int __noinline
urtwn_load_firmware(struct urtwn_softc *sc)
{
        firmware_handle_t fwh;
        const struct r92c_fw_hdr *hdr;
        const char *name;
        u_char *fw, *ptr;
        size_t len;
        uint32_t reg;
        int mlen, ntries, page, error;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        /* Read firmware image from the filesystem. */
        if (ISSET(sc->chip, URTWN_CHIP_88E))
                name = "rtl8188eufw.bin";
        else if (ISSET(sc->chip, URTWN_CHIP_92EU))
                name = "rtl8192eefw.bin";
        else if ((sc->chip & (URTWN_CHIP_UMC_A_CUT | URTWN_CHIP_92C)) ==
            URTWN_CHIP_UMC_A_CUT)
                name = "rtl8192cfwU.bin";
        else
                name = "rtl8192cfw.bin";
        if ((error = firmware_open("if_urtwn", name, &fwh)) != 0) {
                aprint_error_dev(sc->sc_dev,
                    "failed load firmware of file %s (error %d)\n", name,
                    error);
                return error;
        }
        const size_t fwlen = len = firmware_get_size(fwh);
        fw = firmware_malloc(len);
        if (fw == NULL) {
                aprint_error_dev(sc->sc_dev,
                    "failed to allocate firmware memory\n");
                firmware_close(fwh);
                return ENOMEM;
        }
        error = firmware_read(fwh, 0, fw, len);
        firmware_close(fwh);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev,
                    "failed to read firmware (error %d)\n", error);
                firmware_free(fw, fwlen);
                return error;
        }

        len = fwlen;
        ptr = fw;
        hdr = (const struct r92c_fw_hdr *)ptr;
        /* Check if there is a valid FW header and skip it. */
        if ((le16toh(hdr->signature) >> 4) == 0x88c ||
            (le16toh(hdr->signature) >> 4) == 0x88e ||
            (le16toh(hdr->signature) >> 4) == 0x92e ||
            (le16toh(hdr->signature) >> 4) == 0x92c) {
                DPRINTFN(DBG_INIT, "FW V%jd.%jd",
                    le16toh(hdr->version), le16toh(hdr->subversion), 0, 0);
                DPRINTFN(DBG_INIT, "%02jd-%02jd %02jd:%02jd",
                    hdr->month, hdr->date, hdr->hour, hdr->minute);
                ptr += sizeof(*hdr);
                len -= sizeof(*hdr);
        }

        if (urtwn_read_1(sc, R92C_MCUFWDL) & R92C_MCUFWDL_RAM_DL_SEL) {
                /* Reset MCU ready status */
                urtwn_write_1(sc, R92C_MCUFWDL, 0);
                if (ISSET(sc->chip, URTWN_CHIP_88E) ||
                    ISSET(sc->chip, URTWN_CHIP_92EU))
                        urtwn_r88e_fw_reset(sc);
                else
                        urtwn_fw_reset(sc);
        }
        if (!ISSET(sc->chip, URTWN_CHIP_88E) &&
            !ISSET(sc->chip, URTWN_CHIP_92EU)) {
                urtwn_write_2(sc, R92C_SYS_FUNC_EN,
                    urtwn_read_2(sc, R92C_SYS_FUNC_EN) |
                    R92C_SYS_FUNC_EN_CPUEN);
        }

        /* download enabled */
        urtwn_write_1(sc, R92C_MCUFWDL,
            urtwn_read_1(sc, R92C_MCUFWDL) | R92C_MCUFWDL_EN);
        urtwn_write_1(sc, R92C_MCUFWDL + 2,
            urtwn_read_1(sc, R92C_MCUFWDL + 2) & ~0x08);

        /* Reset the FWDL checksum. */
        urtwn_write_1(sc, R92C_MCUFWDL,
        urtwn_read_1(sc, R92C_MCUFWDL) | R92C_MCUFWDL_CHKSUM_RPT);

        DELAY(50);
        /* download firmware */
        for (page = 0; len > 0; page++) {
                mlen = MIN(len, R92C_FW_PAGE_SIZE);
                error = urtwn_fw_loadpage(sc, page, ptr, mlen);
                if (error != 0) {
                        aprint_error_dev(sc->sc_dev,
                            "could not load firmware page %d\n", page);
                        goto fail;
                }
                ptr += mlen;
                len -= mlen;
        }

        /* download disable */
        urtwn_write_1(sc, R92C_MCUFWDL,
            urtwn_read_1(sc, R92C_MCUFWDL) & ~R92C_MCUFWDL_EN);
        urtwn_write_1(sc, R92C_MCUFWDL + 1, 0);

        /* Wait for checksum report. */
        for (ntries = 0; ntries < 1000; ntries++) {
                if (urtwn_read_4(sc, R92C_MCUFWDL) & R92C_MCUFWDL_CHKSUM_RPT)
                        break;
                DELAY(5);
        }
        if (ntries == 1000) {
                aprint_error_dev(sc->sc_dev,
                    "timeout waiting for checksum report\n");
                error = ETIMEDOUT;
                goto fail;
        }

        /* Wait for firmware readiness. */
        reg = urtwn_read_4(sc, R92C_MCUFWDL);
        reg = (reg & ~R92C_MCUFWDL_WINTINI_RDY) | R92C_MCUFWDL_RDY;
        urtwn_write_4(sc, R92C_MCUFWDL, reg);
        if (ISSET(sc->chip, URTWN_CHIP_88E) ||
            ISSET(sc->chip, URTWN_CHIP_92EU))
                urtwn_r88e_fw_reset(sc);
        for (ntries = 0; ntries < 6000; ntries++) {
                if (urtwn_read_4(sc, R92C_MCUFWDL) & R92C_MCUFWDL_WINTINI_RDY)
                        break;
                DELAY(5);
        }
        if (ntries == 6000) {
                aprint_error_dev(sc->sc_dev,
                    "timeout waiting for firmware readiness\n");
                error = ETIMEDOUT;
                goto fail;
        }
 fail:
        firmware_free(fw, fwlen);
        return error;
}

static __inline int
urtwn_dma_init(struct urtwn_softc *sc)
{

        return sc->sc_dma_init(sc);
}

static int
urtwn_r92c_dma_init(struct urtwn_softc *sc)
{
        int hashq, hasnq, haslq, nqueues, nqpages, nrempages;
        uint32_t reg;
        int error;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        /* Initialize LLT table. */
        error = urtwn_llt_init(sc);
        if (error != 0)
                return error;

        /* Get Tx queues to USB endpoints mapping. */
        hashq = hasnq = haslq = 0;
        reg = urtwn_read_2(sc, R92C_USB_EP + 1);
        DPRINTFN(DBG_INIT, "USB endpoints mapping %#jx", reg, 0, 0, 0);
        if (MS(reg, R92C_USB_EP_HQ) != 0)
                hashq = 1;
        if (MS(reg, R92C_USB_EP_NQ) != 0)
                hasnq = 1;
        if (MS(reg, R92C_USB_EP_LQ) != 0)
                haslq = 1;
        nqueues = hashq + hasnq + haslq;
        if (nqueues == 0)
                return EIO;
        /* Get the number of pages for each queue. */
        nqpages = (R92C_TX_PAGE_COUNT - R92C_PUBQ_NPAGES) / nqueues;
        /* The remaining pages are assigned to the high priority queue. */
        nrempages = (R92C_TX_PAGE_COUNT - R92C_PUBQ_NPAGES) % nqueues;

        /* Set number of pages for normal priority queue. */
        urtwn_write_1(sc, R92C_RQPN_NPQ, hasnq ? nqpages : 0);
        urtwn_write_4(sc, R92C_RQPN,
            /* Set number of pages for public queue. */
            SM(R92C_RQPN_PUBQ, R92C_PUBQ_NPAGES) |
            /* Set number of pages for high priority queue. */
            SM(R92C_RQPN_HPQ, hashq ? nqpages + nrempages : 0) |
            /* Set number of pages for low priority queue. */
            SM(R92C_RQPN_LPQ, haslq ? nqpages : 0) |
            /* Load values. */
            R92C_RQPN_LD);

        urtwn_write_1(sc, R92C_TXPKTBUF_BCNQ_BDNY, R92C_TX_PAGE_BOUNDARY);
        urtwn_write_1(sc, R92C_TXPKTBUF_MGQ_BDNY, R92C_TX_PAGE_BOUNDARY);
        urtwn_write_1(sc, R92C_TXPKTBUF_WMAC_LBK_BF_HD, R92C_TX_PAGE_BOUNDARY);
        urtwn_write_1(sc, R92C_TRXFF_BNDY, R92C_TX_PAGE_BOUNDARY);
        urtwn_write_1(sc, R92C_TDECTRL + 1, R92C_TX_PAGE_BOUNDARY);

        /* Set queue to USB pipe mapping. */
        reg = urtwn_read_2(sc, R92C_TRXDMA_CTRL);
        reg &= ~R92C_TRXDMA_CTRL_QMAP_M;
        if (nqueues == 1) {
                if (hashq) {
                        reg |= R92C_TRXDMA_CTRL_QMAP_HQ;
                } else if (hasnq) {
                        reg |= R92C_TRXDMA_CTRL_QMAP_NQ;
                } else {
                        reg |= R92C_TRXDMA_CTRL_QMAP_LQ;
                }
        } else if (nqueues == 2) {
                /* All 2-endpoints configs have a high priority queue. */
                if (!hashq) {
                        return EIO;
                }
                if (hasnq) {
                        reg |= R92C_TRXDMA_CTRL_QMAP_HQ_NQ;
                } else {
                        reg |= R92C_TRXDMA_CTRL_QMAP_HQ_LQ;
                }
        } else {
                reg |= R92C_TRXDMA_CTRL_QMAP_3EP;
        }
        urtwn_write_2(sc, R92C_TRXDMA_CTRL, reg);

        /* Set Tx/Rx transfer page boundary. */
        urtwn_write_2(sc, R92C_TRXFF_BNDY + 2, 0x27ff);

        /* Set Tx/Rx transfer page size. */
        urtwn_write_1(sc, R92C_PBP,
            SM(R92C_PBP_PSRX, R92C_PBP_128) | SM(R92C_PBP_PSTX, R92C_PBP_128));
        return 0;
}

static int
urtwn_r88e_dma_init(struct urtwn_softc *sc)
{
        usb_interface_descriptor_t *id;
        uint32_t reg;
        int nqueues;
        int error;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        /* Initialize LLT table. */
        error = urtwn_llt_init(sc);
        if (error != 0)
                return error;

        /* Get Tx queues to USB endpoints mapping. */
        id = usbd_get_interface_descriptor(sc->sc_iface);
        nqueues = id->bNumEndpoints - 1;
        if (nqueues == 0)
                return EIO;

        /* Set number of pages for normal priority queue. */
        urtwn_write_2(sc, R92C_RQPN_NPQ, 0);
        urtwn_write_2(sc, R92C_RQPN_NPQ, 0x000d);
        urtwn_write_4(sc, R92C_RQPN, 0x808e000d);

        urtwn_write_1(sc, R92C_TXPKTBUF_BCNQ_BDNY, R88E_TX_PAGE_BOUNDARY);
        urtwn_write_1(sc, R92C_TXPKTBUF_MGQ_BDNY, R88E_TX_PAGE_BOUNDARY);
        urtwn_write_1(sc, R92C_TXPKTBUF_WMAC_LBK_BF_HD, R88E_TX_PAGE_BOUNDARY);
        urtwn_write_1(sc, R92C_TRXFF_BNDY, R88E_TX_PAGE_BOUNDARY);
        urtwn_write_1(sc, R92C_TDECTRL + 1, R88E_TX_PAGE_BOUNDARY);

        /* Set queue to USB pipe mapping. */
        reg = urtwn_read_2(sc, R92C_TRXDMA_CTRL);
        reg &= ~R92C_TRXDMA_CTRL_QMAP_M;
        if (nqueues == 1)
                reg |= R92C_TRXDMA_CTRL_QMAP_LQ;
        else if (nqueues == 2)
                reg |= R92C_TRXDMA_CTRL_QMAP_HQ_NQ;
        else
                reg |= R92C_TRXDMA_CTRL_QMAP_3EP;
        urtwn_write_2(sc, R92C_TRXDMA_CTRL, reg);

        /* Set Tx/Rx transfer page boundary. */
        urtwn_write_2(sc, R92C_TRXFF_BNDY + 2, 0x23ff);

        /* Set Tx/Rx transfer page size. */
        urtwn_write_1(sc, R92C_PBP,
            SM(R92C_PBP_PSRX, R92C_PBP_128) | SM(R92C_PBP_PSTX, R92C_PBP_128));

        return 0;
}

static void __noinline
urtwn_mac_init(struct urtwn_softc *sc)
{
        size_t i;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        /* Write MAC initialization values. */
        if (ISSET(sc->chip, URTWN_CHIP_88E)) {
                for (i = 0; i < __arraycount(rtl8188eu_mac); i++)
                        urtwn_write_1(sc, rtl8188eu_mac[i].reg,
                            rtl8188eu_mac[i].val);
        } else if (ISSET(sc->chip, URTWN_CHIP_92EU)) {
                for (i = 0; i < __arraycount(rtl8192eu_mac); i++)
                        urtwn_write_1(sc, rtl8192eu_mac[i].reg,
                            rtl8192eu_mac[i].val);
        } else {
                for (i = 0; i < __arraycount(rtl8192cu_mac); i++)
                        urtwn_write_1(sc, rtl8192cu_mac[i].reg,
                            rtl8192cu_mac[i].val);
        }
}

static void __noinline
urtwn_bb_init(struct urtwn_softc *sc)
{
        const struct rtwn_bb_prog *prog;
        uint32_t reg;
        uint8_t crystalcap;
        size_t i;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        /* Enable BB and RF. */
        urtwn_write_2(sc, R92C_SYS_FUNC_EN,
            urtwn_read_2(sc, R92C_SYS_FUNC_EN) |
            R92C_SYS_FUNC_EN_BBRSTB | R92C_SYS_FUNC_EN_BB_GLB_RST |
            R92C_SYS_FUNC_EN_DIO_RF);

        if (!ISSET(sc->chip, URTWN_CHIP_88E) &&
            !ISSET(sc->chip, URTWN_CHIP_92EU)) {
                urtwn_write_1(sc, R92C_AFE_PLL_CTRL, 0x83);
                urtwn_write_1(sc, R92C_AFE_PLL_CTRL + 1, 0xdb);
        }

        urtwn_write_1(sc, R92C_RF_CTRL,
            R92C_RF_CTRL_EN | R92C_RF_CTRL_RSTB | R92C_RF_CTRL_SDMRSTB);
        urtwn_write_1(sc, R92C_SYS_FUNC_EN,
            R92C_SYS_FUNC_EN_USBA | R92C_SYS_FUNC_EN_USBD |
            R92C_SYS_FUNC_EN_BB_GLB_RST | R92C_SYS_FUNC_EN_BBRSTB);

        if (!ISSET(sc->chip, URTWN_CHIP_88E) &&
            !ISSET(sc->chip, URTWN_CHIP_92EU)) {
                urtwn_write_1(sc, R92C_LDOHCI12_CTRL, 0x0f);
                urtwn_write_1(sc, 0x15, 0xe9);
                urtwn_write_1(sc, R92C_AFE_XTAL_CTRL + 1, 0x80);
        }

        /* Select BB programming based on board type. */
        if (ISSET(sc->chip, URTWN_CHIP_88E))
                prog = &rtl8188eu_bb_prog;
        else if (ISSET(sc->chip, URTWN_CHIP_92EU))
                prog = &rtl8192eu_bb_prog;
        else if (!(sc->chip & URTWN_CHIP_92C)) {
                if (sc->board_type == R92C_BOARD_TYPE_MINICARD) {
                        prog = &rtl8188ce_bb_prog;
                } else if (sc->board_type == R92C_BOARD_TYPE_HIGHPA) {
                        prog = &rtl8188ru_bb_prog;
                } else {
                        prog = &rtl8188cu_bb_prog;
                }
        } else {
                if (sc->board_type == R92C_BOARD_TYPE_MINICARD) {
                        prog = &rtl8192ce_bb_prog;
                } else {
                        prog = &rtl8192cu_bb_prog;
                }
        }
        /* Write BB initialization values. */
        for (i = 0; i < prog->count; i++) {
                /* additional delay depend on registers */
                switch (prog->regs[i]) {
                case 0xfe:
                        urtwn_delay_ms(sc, 50);
                        break;
                case 0xfd:
                        urtwn_delay_ms(sc, 5);
                        break;
                case 0xfc:
                        urtwn_delay_ms(sc, 1);
                        break;
                case 0xfb:
                        DELAY(50);
                        break;
                case 0xfa:
                        DELAY(5);
                        break;
                case 0xf9:
                        DELAY(1);
                        break;
                }
                urtwn_bb_write(sc, prog->regs[i], prog->vals[i]);
                DELAY(1);
        }

        if (sc->chip & URTWN_CHIP_92C_1T2R) {
                /* 8192C 1T only configuration. */
                reg = urtwn_bb_read(sc, R92C_FPGA0_TXINFO);
                reg = (reg & ~0x00000003) | 0x2;
                urtwn_bb_write(sc, R92C_FPGA0_TXINFO, reg);

                reg = urtwn_bb_read(sc, R92C_FPGA1_TXINFO);
                reg = (reg & ~0x00300033) | 0x00200022;
                urtwn_bb_write(sc, R92C_FPGA1_TXINFO, reg);

                reg = urtwn_bb_read(sc, R92C_CCK0_AFESETTING);
                reg = (reg & ~0xff000000) | (0x45 << 24);
                urtwn_bb_write(sc, R92C_CCK0_AFESETTING, reg);

                reg = urtwn_bb_read(sc, R92C_OFDM0_TRXPATHENA);
                reg = (reg & ~0x000000ff) | 0x23;
                urtwn_bb_write(sc, R92C_OFDM0_TRXPATHENA, reg);

                reg = urtwn_bb_read(sc, R92C_OFDM0_AGCPARAM1);
                reg = (reg & ~0x00000030) | (1 << 4);
                urtwn_bb_write(sc, R92C_OFDM0_AGCPARAM1, reg);

                reg = urtwn_bb_read(sc, 0xe74);
                reg = (reg & ~0x0c000000) | (2 << 26);
                urtwn_bb_write(sc, 0xe74, reg);
                reg = urtwn_bb_read(sc, 0xe78);
                reg = (reg & ~0x0c000000) | (2 << 26);
                urtwn_bb_write(sc, 0xe78, reg);
                reg = urtwn_bb_read(sc, 0xe7c);
                reg = (reg & ~0x0c000000) | (2 << 26);
                urtwn_bb_write(sc, 0xe7c, reg);
                reg = urtwn_bb_read(sc, 0xe80);
                reg = (reg & ~0x0c000000) | (2 << 26);
                urtwn_bb_write(sc, 0xe80, reg);
                reg = urtwn_bb_read(sc, 0xe88);
                reg = (reg & ~0x0c000000) | (2 << 26);
                urtwn_bb_write(sc, 0xe88, reg);
        }

        /* Write AGC values. */
        for (i = 0; i < prog->agccount; i++) {
                urtwn_bb_write(sc, R92C_OFDM0_AGCRSSITABLE, prog->agcvals[i]);
                DELAY(1);
        }

        if (ISSET(sc->chip, URTWN_CHIP_88E) ||
            ISSET(sc->chip, URTWN_CHIP_92EU)) {
                urtwn_bb_write(sc, R92C_OFDM0_AGCCORE1(0), 0x69553422);
                DELAY(1);
                urtwn_bb_write(sc, R92C_OFDM0_AGCCORE1(0), 0x69553420);
                DELAY(1);
        }

        if (ISSET(sc->chip, URTWN_CHIP_92EU)) {
                crystalcap = sc->r88e_rom[0xb9];
                if (crystalcap == 0x00)
                        crystalcap = 0x20;
                crystalcap &= 0x3f;
                reg = urtwn_bb_read(sc, R92C_AFE_CTRL3);
                urtwn_bb_write(sc, R92C_AFE_CTRL3,
                    RW(reg, R92C_AFE_XTAL_CTRL_ADDR,
                    crystalcap | crystalcap << 6));
                urtwn_write_4(sc, R92C_AFE_XTAL_CTRL, 0xf81fb);
        } else if (ISSET(sc->chip, URTWN_CHIP_88E)) {
                crystalcap = sc->r88e_rom[0xb9];
                if (crystalcap == 0xff)
                        crystalcap = 0x20;
                crystalcap &= 0x3f;
                reg = urtwn_bb_read(sc, R92C_AFE_XTAL_CTRL);
                urtwn_bb_write(sc, R92C_AFE_XTAL_CTRL,
                    RW(reg, R92C_AFE_XTAL_CTRL_ADDR,
                    crystalcap | crystalcap << 6));
        } else {
                if (urtwn_bb_read(sc, R92C_HSSI_PARAM2(0)) &
                    R92C_HSSI_PARAM2_CCK_HIPWR) {
                        SET(sc->sc_flags, URTWN_FLAG_CCK_HIPWR);
                }
        }
}

static void __noinline
urtwn_rf_init(struct urtwn_softc *sc)
{
        const struct rtwn_rf_prog *prog;
        uint32_t reg, mask, saved;
        size_t i, j, idx;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        /* Select RF programming based on board type. */
        if (ISSET(sc->chip, URTWN_CHIP_88E))
                prog = rtl8188eu_rf_prog;
        else if (ISSET(sc->chip, URTWN_CHIP_92EU))
                prog = rtl8192eu_rf_prog;
        else if (!(sc->chip & URTWN_CHIP_92C)) {
                if (sc->board_type == R92C_BOARD_TYPE_MINICARD) {
                        prog = rtl8188ce_rf_prog;
                } else if (sc->board_type == R92C_BOARD_TYPE_HIGHPA) {
                        prog = rtl8188ru_rf_prog;
                } else {
                        prog = rtl8188cu_rf_prog;
                }
        } else {
                prog = rtl8192ce_rf_prog;
        }

        for (i = 0; i < sc->nrxchains; i++) {
                /* Save RF_ENV control type. */
                idx = i / 2;
                mask = 0xffffU << ((i % 2) * 16);
                saved = urtwn_bb_read(sc, R92C_FPGA0_RFIFACESW(idx)) & mask;

                /* Set RF_ENV enable. */
                reg = urtwn_bb_read(sc, R92C_FPGA0_RFIFACEOE(i));
                reg |= 0x100000;
                urtwn_bb_write(sc, R92C_FPGA0_RFIFACEOE(i), reg);
                DELAY(50);

                /* Set RF_ENV output high. */
                reg = urtwn_bb_read(sc, R92C_FPGA0_RFIFACEOE(i));
                reg |= 0x10;
                urtwn_bb_write(sc, R92C_FPGA0_RFIFACEOE(i), reg);
                DELAY(50);

                /* Set address and data lengths of RF registers. */
                reg = urtwn_bb_read(sc, R92C_HSSI_PARAM2(i));
                reg &= ~R92C_HSSI_PARAM2_ADDR_LENGTH;
                urtwn_bb_write(sc, R92C_HSSI_PARAM2(i), reg);
                DELAY(50);
                reg = urtwn_bb_read(sc, R92C_HSSI_PARAM2(i));
                reg &= ~R92C_HSSI_PARAM2_DATA_LENGTH;
                urtwn_bb_write(sc, R92C_HSSI_PARAM2(i), reg);
                DELAY(50);

                /* Write RF initialization values for this chain. */
                for (j = 0; j < prog[i].count; j++) {
                        if (prog[i].regs[j] >= 0xf9 &&
                            prog[i].regs[j] <= 0xfe) {
                                /*
                                 * These are fake RF registers offsets that
                                 * indicate a delay is required.
                                 */
                                urtwn_delay_ms(sc, 50);
                                continue;
                        }
                        urtwn_rf_write(sc, i, prog[i].regs[j], prog[i].vals[j]);
                        DELAY(5);
                }

                /* Restore RF_ENV control type. */
                reg = urtwn_bb_read(sc, R92C_FPGA0_RFIFACESW(idx)) & ~mask;
                urtwn_bb_write(sc, R92C_FPGA0_RFIFACESW(idx), reg | saved);
        }

        if ((sc->chip & (URTWN_CHIP_UMC_A_CUT | URTWN_CHIP_92C)) ==
            URTWN_CHIP_UMC_A_CUT) {
                urtwn_rf_write(sc, 0, R92C_RF_RX_G1, 0x30255);
                urtwn_rf_write(sc, 0, R92C_RF_RX_G2, 0x50a00);
        }

        /* Cache RF register CHNLBW. */
        for (i = 0; i < 2; i++) {
                sc->rf_chnlbw[i] = urtwn_rf_read(sc, i, R92C_RF_CHNLBW);
        }
}

static void __noinline
urtwn_cam_init(struct urtwn_softc *sc)
{
        uint32_t content, command;
        uint8_t idx;
        size_t i;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));
        if (ISSET(sc->chip, URTWN_CHIP_92EU))
                return;

        for (idx = 0; idx < R92C_CAM_ENTRY_COUNT; idx++) {
                content = (idx & 3)
                    | (R92C_CAM_ALGO_AES << R92C_CAM_ALGO_S)
                    | R92C_CAM_VALID;

                command = R92C_CAMCMD_POLLING
                    | R92C_CAMCMD_WRITE
                    | R92C_CAM_CTL0(idx);

                urtwn_write_4(sc, R92C_CAMWRITE, content);
                urtwn_write_4(sc, R92C_CAMCMD, command);
        }

        for (idx = 0; idx < R92C_CAM_ENTRY_COUNT; idx++) {
                for (i = 0; i < /* CAM_CONTENT_COUNT */ 8; i++) {
                        if (i == 0) {
                                content = (idx & 3)
                                    | (R92C_CAM_ALGO_AES << R92C_CAM_ALGO_S)
                                    | R92C_CAM_VALID;
                        } else {
                                content = 0;
                        }

                        command = R92C_CAMCMD_POLLING
                            | R92C_CAMCMD_WRITE
                            | R92C_CAM_CTL0(idx)
                            | i;

                        urtwn_write_4(sc, R92C_CAMWRITE, content);
                        urtwn_write_4(sc, R92C_CAMCMD, command);
                }
        }

        /* Invalidate all CAM entries. */
        urtwn_write_4(sc, R92C_CAMCMD, R92C_CAMCMD_POLLING | R92C_CAMCMD_CLR);
}

static void __noinline
urtwn_pa_bias_init(struct urtwn_softc *sc)
{
        uint8_t reg;
        size_t i;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        for (i = 0; i < sc->nrxchains; i++) {
                if (sc->pa_setting & (1U << i))
                        continue;

                urtwn_rf_write(sc, i, R92C_RF_IPA, 0x0f406);
                urtwn_rf_write(sc, i, R92C_RF_IPA, 0x4f406);
                urtwn_rf_write(sc, i, R92C_RF_IPA, 0x8f406);
                urtwn_rf_write(sc, i, R92C_RF_IPA, 0xcf406);
        }
        if (!(sc->pa_setting & 0x10)) {
                reg = urtwn_read_1(sc, 0x16);
                reg = (reg & ~0xf0) | 0x90;
                urtwn_write_1(sc, 0x16, reg);
        }
}

static void __noinline
urtwn_rxfilter_init(struct urtwn_softc *sc)
{

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        /* Initialize Rx filter. */
        /* TODO: use better filter for monitor mode. */
        urtwn_write_4(sc, R92C_RCR,
            R92C_RCR_AAP | R92C_RCR_APM | R92C_RCR_AM | R92C_RCR_AB |
            R92C_RCR_APP_ICV | R92C_RCR_AMF | R92C_RCR_HTC_LOC_CTRL |
            R92C_RCR_APP_MIC | R92C_RCR_APP_PHYSTS);
        /* Accept all multicast frames. */
        urtwn_write_4(sc, R92C_MAR + 0, 0xffffffff);
        urtwn_write_4(sc, R92C_MAR + 4, 0xffffffff);
        /* Accept all management frames. */
        urtwn_write_2(sc, R92C_RXFLTMAP0, 0xffff);
        /* Reject all control frames. */
        urtwn_write_2(sc, R92C_RXFLTMAP1, 0x0000);
        /* Accept all data frames. */
        urtwn_write_2(sc, R92C_RXFLTMAP2, 0xffff);
}

static void __noinline
urtwn_edca_init(struct urtwn_softc *sc)
{

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        /* set spec SIFS (used in NAV) */
        urtwn_write_2(sc, R92C_SPEC_SIFS, 0x100a);
        urtwn_write_2(sc, R92C_MAC_SPEC_SIFS, 0x100a);

        /* set SIFS CCK/OFDM */
        urtwn_write_2(sc, R92C_SIFS_CCK, 0x100a);
        urtwn_write_2(sc, R92C_SIFS_OFDM, 0x100a);

        /* TXOP */
        urtwn_write_4(sc, R92C_EDCA_BE_PARAM, 0x005ea42b);
        urtwn_write_4(sc, R92C_EDCA_BK_PARAM, 0x0000a44f);
        urtwn_write_4(sc, R92C_EDCA_VI_PARAM, 0x005ea324);
        urtwn_write_4(sc, R92C_EDCA_VO_PARAM, 0x002fa226);
}

static void
urtwn_write_txpower(struct urtwn_softc *sc, int chain,
    uint16_t power[URTWN_RIDX_COUNT])
{
        uint32_t reg;

        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("chain=%jd", chain, 0, 0, 0);

        /* Write per-CCK rate Tx power. */
        if (chain == 0) {
                reg = urtwn_bb_read(sc, R92C_TXAGC_A_CCK1_MCS32);
                reg = RW(reg, R92C_TXAGC_A_CCK1,  power[0]);
                urtwn_bb_write(sc, R92C_TXAGC_A_CCK1_MCS32, reg);

                reg = urtwn_bb_read(sc, R92C_TXAGC_B_CCK11_A_CCK2_11);
                reg = RW(reg, R92C_TXAGC_A_CCK2,  power[1]);
                reg = RW(reg, R92C_TXAGC_A_CCK55, power[2]);
                reg = RW(reg, R92C_TXAGC_A_CCK11, power[3]);
                urtwn_bb_write(sc, R92C_TXAGC_B_CCK11_A_CCK2_11, reg);
        } else {
                reg = urtwn_bb_read(sc, R92C_TXAGC_B_CCK1_55_MCS32);
                reg = RW(reg, R92C_TXAGC_B_CCK1,  power[0]);
                reg = RW(reg, R92C_TXAGC_B_CCK2,  power[1]);
                reg = RW(reg, R92C_TXAGC_B_CCK55, power[2]);
                urtwn_bb_write(sc, R92C_TXAGC_B_CCK1_55_MCS32, reg);

                reg = urtwn_bb_read(sc, R92C_TXAGC_B_CCK11_A_CCK2_11);
                reg = RW(reg, R92C_TXAGC_B_CCK11, power[3]);
                urtwn_bb_write(sc, R92C_TXAGC_B_CCK11_A_CCK2_11, reg);
        }
        /* Write per-OFDM rate Tx power. */
        urtwn_bb_write(sc, R92C_TXAGC_RATE18_06(chain),
            SM(R92C_TXAGC_RATE06, power[ 4]) |
            SM(R92C_TXAGC_RATE09, power[ 5]) |
            SM(R92C_TXAGC_RATE12, power[ 6]) |
            SM(R92C_TXAGC_RATE18, power[ 7]));
        urtwn_bb_write(sc, R92C_TXAGC_RATE54_24(chain),
            SM(R92C_TXAGC_RATE24, power[ 8]) |
            SM(R92C_TXAGC_RATE36, power[ 9]) |
            SM(R92C_TXAGC_RATE48, power[10]) |
            SM(R92C_TXAGC_RATE54, power[11]));
        /* Write per-MCS Tx power. */
        urtwn_bb_write(sc, R92C_TXAGC_MCS03_MCS00(chain),
            SM(R92C_TXAGC_MCS00,  power[12]) |
            SM(R92C_TXAGC_MCS01,  power[13]) |
            SM(R92C_TXAGC_MCS02,  power[14]) |
            SM(R92C_TXAGC_MCS03,  power[15]));
        urtwn_bb_write(sc, R92C_TXAGC_MCS07_MCS04(chain),
            SM(R92C_TXAGC_MCS04,  power[16]) |
            SM(R92C_TXAGC_MCS05,  power[17]) |
            SM(R92C_TXAGC_MCS06,  power[18]) |
            SM(R92C_TXAGC_MCS07,  power[19]));
        urtwn_bb_write(sc, R92C_TXAGC_MCS11_MCS08(chain),
            SM(R92C_TXAGC_MCS08,  power[20]) |
            SM(R92C_TXAGC_MCS09,  power[21]) |
            SM(R92C_TXAGC_MCS10,  power[22]) |
            SM(R92C_TXAGC_MCS11,  power[23]));
        urtwn_bb_write(sc, R92C_TXAGC_MCS15_MCS12(chain),
            SM(R92C_TXAGC_MCS12,  power[24]) |
            SM(R92C_TXAGC_MCS13,  power[25]) |
            SM(R92C_TXAGC_MCS14,  power[26]) |
            SM(R92C_TXAGC_MCS15,  power[27]));
}

static void
urtwn_get_txpower(struct urtwn_softc *sc, size_t chain, u_int chan, u_int ht40m,
    uint16_t power[URTWN_RIDX_COUNT])
{
        struct r92c_rom *rom = &sc->rom;
        uint16_t cckpow, ofdmpow, htpow, diff, maxpow;
        const struct rtwn_txpwr *base;
        int ridx, group;

        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("chain=%jd, chan=%jd", chain, chan, 0, 0);

        /* Determine channel group. */
        if (chan <= 3) {
                group = 0;
        } else if (chan <= 9) {
                group = 1;
        } else {
                group = 2;
        }

        /* Get original Tx power based on board type and RF chain. */
        if (!(sc->chip & URTWN_CHIP_92C)) {
                if (sc->board_type == R92C_BOARD_TYPE_HIGHPA) {
                        base = &rtl8188ru_txagc[chain];
                } else {
                        base = &rtl8192cu_txagc[chain];
                }
        } else {
                base = &rtl8192cu_txagc[chain];
        }

        memset(power, 0, URTWN_RIDX_COUNT * sizeof(power[0]));
        if (sc->regulatory == 0) {
                for (ridx = 0; ridx <= 3; ridx++) {
                        power[ridx] = base->pwr[0][ridx];
                }
        }
        for (ridx = 4; ridx < URTWN_RIDX_COUNT; ridx++) {
                if (sc->regulatory == 3) {
                        power[ridx] = base->pwr[0][ridx];
                        /* Apply vendor limits. */
                        if (ht40m != IEEE80211_HTINFO_2NDCHAN_NONE) {
                                maxpow = rom->ht40_max_pwr[group];
                        } else {
                                maxpow = rom->ht20_max_pwr[group];
                        }
                        maxpow = (maxpow >> (chain * 4)) & 0xf;
                        if (power[ridx] > maxpow) {
                                power[ridx] = maxpow;
                        }
                } else if (sc->regulatory == 1) {
                        if (ht40m == IEEE80211_HTINFO_2NDCHAN_NONE) {
                                power[ridx] = base->pwr[group][ridx];
                        }
                } else if (sc->regulatory != 2) {
                        power[ridx] = base->pwr[0][ridx];
                }
        }

        /* Compute per-CCK rate Tx power. */
        cckpow = rom->cck_tx_pwr[chain][group];
        for (ridx = 0; ridx <= 3; ridx++) {
                power[ridx] += cckpow;
                if (power[ridx] > R92C_MAX_TX_PWR) {
                        power[ridx] = R92C_MAX_TX_PWR;
                }
        }

        htpow = rom->ht40_1s_tx_pwr[chain][group];
        if (sc->ntxchains > 1) {
                /* Apply reduction for 2 spatial streams. */
                diff = rom->ht40_2s_tx_pwr_diff[group];
                diff = (diff >> (chain * 4)) & 0xf;
                htpow = (htpow > diff) ? htpow - diff : 0;
        }

        /* Compute per-OFDM rate Tx power. */
        diff = rom->ofdm_tx_pwr_diff[group];
        diff = (diff >> (chain * 4)) & 0xf;
        ofdmpow = htpow + diff;        /* HT->OFDM correction. */
        for (ridx = 4; ridx <= 11; ridx++) {
                power[ridx] += ofdmpow;
                if (power[ridx] > R92C_MAX_TX_PWR) {
                        power[ridx] = R92C_MAX_TX_PWR;
                }
        }

        /* Compute per-MCS Tx power. */
        if (ht40m == IEEE80211_HTINFO_2NDCHAN_NONE) {
                diff = rom->ht20_tx_pwr_diff[group];
                diff = (diff >> (chain * 4)) & 0xf;
                htpow += diff;        /* HT40->HT20 correction. */
        }
        for (ridx = 12; ridx < URTWN_RIDX_COUNT; ridx++) {
                power[ridx] += htpow;
                if (power[ridx] > R92C_MAX_TX_PWR) {
                        power[ridx] = R92C_MAX_TX_PWR;
                }
        }
#ifdef URTWN_DEBUG
        if (urtwn_debug & DBG_RF) {
                /* Dump per-rate Tx power values. */
                DPRINTFN(DBG_RF, "Tx power for chain %jd:", chain, 0, 0, 0);
                for (ridx = 0; ridx < URTWN_RIDX_COUNT; ridx++)
                        DPRINTFN(DBG_RF, "Rate %jd = %ju", ridx, power[ridx], 0, 0);
        }
#endif
}

void
urtwn_r88e_get_txpower(struct urtwn_softc *sc, size_t chain, u_int chan,
    u_int ht40m, uint16_t power[URTWN_RIDX_COUNT])
{
        uint16_t cckpow, ofdmpow, bw20pow, htpow;
        const struct rtwn_r88e_txpwr *base;
        int ridx, group;

        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("chain=%jd, chan=%jd", chain, chan, 0, 0);

        /* Determine channel group. */
        if (chan <= 2)
                group = 0;
        else if (chan <= 5)
                group = 1;
        else if (chan <= 8)
                group = 2;
        else if (chan <= 11)
                group = 3;
        else if (chan <= 13)
                group = 4;
        else
                group = 5;

        /* Get original Tx power based on board type and RF chain. */
        base = &rtl8188eu_txagc[chain];

        memset(power, 0, URTWN_RIDX_COUNT * sizeof(power[0]));
        if (sc->regulatory == 0) {
                for (ridx = 0; ridx <= 3; ridx++)
                        power[ridx] = base->pwr[0][ridx];
        }
        for (ridx = 4; ridx < URTWN_RIDX_COUNT; ridx++) {
                if (sc->regulatory == 3)
                        power[ridx] = base->pwr[0][ridx];
                else if (sc->regulatory == 1) {
                        if (ht40m == IEEE80211_HTINFO_2NDCHAN_NONE)
                                power[ridx] = base->pwr[group][ridx];
                } else if (sc->regulatory != 2)
                        power[ridx] = base->pwr[0][ridx];
        }

        /* Compute per-CCK rate Tx power. */
        cckpow = sc->cck_tx_pwr[group];
        for (ridx = 0; ridx <= 3; ridx++) {
                power[ridx] += cckpow;
                if (power[ridx] > R92C_MAX_TX_PWR)
                        power[ridx] = R92C_MAX_TX_PWR;
        }

        htpow = sc->ht40_tx_pwr[group];

        /* Compute per-OFDM rate Tx power. */
        ofdmpow = htpow + sc->ofdm_tx_pwr_diff;
        for (ridx = 4; ridx <= 11; ridx++) {
                power[ridx] += ofdmpow;
                if (power[ridx] > R92C_MAX_TX_PWR)
                        power[ridx] = R92C_MAX_TX_PWR;
        }

        bw20pow = htpow + sc->bw20_tx_pwr_diff;
        for (ridx = 12; ridx <= 27; ridx++) {
                power[ridx] += bw20pow;
                if (power[ridx] > R92C_MAX_TX_PWR)
                        power[ridx] = R92C_MAX_TX_PWR;
        }
}

static void
urtwn_set_txpower(struct urtwn_softc *sc, u_int chan, u_int ht40m)
{
        uint16_t power[URTWN_RIDX_COUNT];
        size_t i;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        for (i = 0; i < sc->ntxchains; i++) {
                /* Compute per-rate Tx power values. */
                if (ISSET(sc->chip, URTWN_CHIP_88E) ||
                    ISSET(sc->chip, URTWN_CHIP_92EU))
                        urtwn_r88e_get_txpower(sc, i, chan, ht40m, power);
                else
                        urtwn_get_txpower(sc, i, chan, ht40m, power);
                /* Write per-rate Tx power values to hardware. */
                urtwn_write_txpower(sc, i, power);
        }
}

static void __noinline
urtwn_set_chan(struct urtwn_softc *sc, struct ieee80211_channel *c, u_int ht40m)
{
        struct ieee80211com *ic = &sc->sc_ic;
        u_int chan;
        size_t i;

        chan = ieee80211_chan2ieee(ic, c);        /* XXX center freq! */

        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("chan=%jd", chan, 0, 0, 0);

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        if (ht40m == IEEE80211_HTINFO_2NDCHAN_ABOVE) {
                chan += 2;
        } else if (ht40m == IEEE80211_HTINFO_2NDCHAN_BELOW){
                chan -= 2;
        }

        /* Set Tx power for this new channel. */
        urtwn_set_txpower(sc, chan, ht40m);

        for (i = 0; i < sc->nrxchains; i++) {
                urtwn_rf_write(sc, i, R92C_RF_CHNLBW,
                    RW(sc->rf_chnlbw[i], R92C_RF_CHNLBW_CHNL, chan));
        }

        if (ht40m) {
                /* Is secondary channel below or above primary? */
                int prichlo = (ht40m == IEEE80211_HTINFO_2NDCHAN_ABOVE);
                uint32_t reg;

                urtwn_write_1(sc, R92C_BWOPMODE,
                    urtwn_read_1(sc, R92C_BWOPMODE) & ~R92C_BWOPMODE_20MHZ);

                reg = urtwn_read_1(sc, R92C_RRSR + 2);
                reg = (reg & ~0x6f) | (prichlo ? 1 : 2) << 5;
                urtwn_write_1(sc, R92C_RRSR + 2, (uint8_t)reg);

                urtwn_bb_write(sc, R92C_FPGA0_RFMOD,
                    urtwn_bb_read(sc, R92C_FPGA0_RFMOD) | R92C_RFMOD_40MHZ);
                urtwn_bb_write(sc, R92C_FPGA1_RFMOD,
                    urtwn_bb_read(sc, R92C_FPGA1_RFMOD) | R92C_RFMOD_40MHZ);

                /* Set CCK side band. */
                reg = urtwn_bb_read(sc, R92C_CCK0_SYSTEM);
                reg = (reg & ~0x00000010) | (prichlo ? 0 : 1) << 4;
                urtwn_bb_write(sc, R92C_CCK0_SYSTEM, reg);

                reg = urtwn_bb_read(sc, R92C_OFDM1_LSTF);
                reg = (reg & ~0x00000c00) | (prichlo ? 1 : 2) << 10;
                urtwn_bb_write(sc, R92C_OFDM1_LSTF, reg);

                urtwn_bb_write(sc, R92C_FPGA0_ANAPARAM2,
                    urtwn_bb_read(sc, R92C_FPGA0_ANAPARAM2) &
                    ~R92C_FPGA0_ANAPARAM2_CBW20);

                reg = urtwn_bb_read(sc, 0x818);
                reg = (reg & ~0x0c000000) | (prichlo ? 2 : 1) << 26;
                urtwn_bb_write(sc, 0x818, reg);

                /* Select 40MHz bandwidth. */
                urtwn_rf_write(sc, 0, R92C_RF_CHNLBW,
                    (sc->rf_chnlbw[0] & ~0xfff) | chan);
        } else {
                urtwn_write_1(sc, R92C_BWOPMODE,
                    urtwn_read_1(sc, R92C_BWOPMODE) | R92C_BWOPMODE_20MHZ);

                urtwn_bb_write(sc, R92C_FPGA0_RFMOD,
                    urtwn_bb_read(sc, R92C_FPGA0_RFMOD) & ~R92C_RFMOD_40MHZ);
                urtwn_bb_write(sc, R92C_FPGA1_RFMOD,
                    urtwn_bb_read(sc, R92C_FPGA1_RFMOD) & ~R92C_RFMOD_40MHZ);

                if (!ISSET(sc->chip, URTWN_CHIP_88E) &&
                    !ISSET(sc->chip, URTWN_CHIP_92EU)) {
                        urtwn_bb_write(sc, R92C_FPGA0_ANAPARAM2,
                            urtwn_bb_read(sc, R92C_FPGA0_ANAPARAM2) |
                            R92C_FPGA0_ANAPARAM2_CBW20);
                }

                /* Select 20MHz bandwidth. */
                urtwn_rf_write(sc, 0, R92C_RF_CHNLBW,
                    (sc->rf_chnlbw[0] & ~0xfff) | chan |
                    (ISSET(sc->chip, URTWN_CHIP_88E) ||
                     ISSET(sc->chip, URTWN_CHIP_92EU) ?
                      R88E_RF_CHNLBW_BW20 : R92C_RF_CHNLBW_BW20));
        }
}

static void __noinline
urtwn_iq_calib(struct urtwn_softc *sc, bool inited)
{

        URTWNHIST_FUNC();
        URTWNHIST_CALLARGS("inited=%jd", inited, 0, 0, 0);

        uint32_t addaBackup[16], iqkBackup[4], piMode;

#ifdef notyet
        uint32_t odfm0_agccore_regs[3];
        uint32_t ant_regs[3];
        uint32_t rf_regs[8];
#endif
        uint32_t reg0, reg1, reg2;
        int i, attempt;

#ifdef notyet
        urtwn_write_1(sc, R92E_STBC_SETTING + 2, urtwn_read_1(sc,
            R92E_STBC_SETTING + 2));
        urtwn_write_1(sc, R92C_ACLK_MON, 0);
        /* Save AGCCORE regs. */
        for (i = 0; i < sc->nrxchains; i++) {
                odfm0_agccore_regs[i] = urtwn_read_4(sc,
                    R92C_OFDM0_AGCCORE1(i));
        }
#endif
        /* Save BB regs. */
        reg0 = urtwn_bb_read(sc, R92C_OFDM0_TRXPATHENA);
        reg1 = urtwn_bb_read(sc, R92C_OFDM0_TRMUXPAR);
        reg2 = urtwn_bb_read(sc, R92C_FPGA0_RFIFACESW(1));

        /* Save adda regs to be restored when finished. */
        for (i = 0; i < __arraycount(addaReg); i++)
                addaBackup[i] = urtwn_bb_read(sc, addaReg[i]);
        /* Save mac regs. */
        iqkBackup[0] = urtwn_read_1(sc, R92C_TXPAUSE);
        iqkBackup[1] = urtwn_read_1(sc, R92C_BCN_CTRL);
        iqkBackup[2] = urtwn_read_1(sc, R92C_BCN_CTRL1);
        iqkBackup[3] = urtwn_read_4(sc, R92C_GPIO_MUXCFG);

#ifdef notyet
        ant_regs[0] = urtwn_read_4(sc, R92C_CONFIG_ANT_A);
        ant_regs[1] = urtwn_read_4(sc, R92C_CONFIG_ANT_B);

        rf_regs[0] = urtwn_read_4(sc, R92C_FPGA0_RFIFACESW(0));
        for (i = 0; i < sc->nrxchains; i++)
                rf_regs[i+1] = urtwn_read_4(sc, R92C_FPGA0_RFIFACEOE(i));
        reg4 = urtwn_read_4(sc, R92C_CCK0_AFESETTING);
#endif

        piMode = (urtwn_bb_read(sc, R92C_HSSI_PARAM1(0)) &
            R92C_HSSI_PARAM1_PI);
        if (piMode == 0) {
                urtwn_bb_write(sc, R92C_HSSI_PARAM1(0),
                    urtwn_bb_read(sc, R92C_HSSI_PARAM1(0))|
                    R92C_HSSI_PARAM1_PI);
                urtwn_bb_write(sc, R92C_HSSI_PARAM1(1),
                    urtwn_bb_read(sc, R92C_HSSI_PARAM1(1))|
                    R92C_HSSI_PARAM1_PI);
        }

        attempt = 1;

next_attempt:

        /* Set mac regs for calibration. */
        for (i = 0; i < __arraycount(addaReg); i++) {
                urtwn_bb_write(sc, addaReg[i],
                    addaReg[__arraycount(addaReg) - 1]);
        }
        urtwn_write_2(sc, R92C_CCK0_AFESETTING, urtwn_read_2(sc,
            R92C_CCK0_AFESETTING));
        urtwn_write_2(sc, R92C_OFDM0_TRXPATHENA, R92C_IQK_TRXPATHENA);
        urtwn_write_2(sc, R92C_OFDM0_TRMUXPAR, R92C_IQK_TRMUXPAR);
        urtwn_write_2(sc, R92C_FPGA0_RFIFACESW(1), R92C_IQK_RFIFACESW1);
        urtwn_write_4(sc, R92C_LSSI_PARAM(0), R92C_IQK_LSSI_PARAM);

        if (sc->ntxchains > 1)
                urtwn_bb_write(sc, R92C_LSSI_PARAM(1), R92C_IQK_LSSI_PARAM);

        urtwn_write_1(sc, R92C_TXPAUSE, (~R92C_TXPAUSE_BCN) & R92C_TXPAUSE_ALL);
        urtwn_write_1(sc, R92C_BCN_CTRL, (iqkBackup[1] &
            ~R92C_BCN_CTRL_EN_BCN));
        urtwn_write_1(sc, R92C_BCN_CTRL1, (iqkBackup[2] &
            ~R92C_BCN_CTRL_EN_BCN));

        urtwn_write_1(sc, R92C_GPIO_MUXCFG, (iqkBackup[3] &
            ~R92C_GPIO_MUXCFG_ENBT));

        urtwn_bb_write(sc, R92C_CONFIG_ANT_A, R92C_IQK_CONFIG_ANT);

        if (sc->ntxchains > 1)
                urtwn_bb_write(sc, R92C_CONFIG_ANT_B, R92C_IQK_CONFIG_ANT);
        urtwn_bb_write(sc, R92C_FPGA0_IQK, R92C_FPGA0_IQK_SETTING);
        urtwn_bb_write(sc, R92C_TX_IQK, R92C_TX_IQK_SETTING);
        urtwn_bb_write(sc, R92C_RX_IQK, R92C_RX_IQK_SETTING);

        /* Restore BB regs. */
        urtwn_bb_write(sc, R92C_OFDM0_TRXPATHENA, reg0);
        urtwn_bb_write(sc, R92C_FPGA0_RFIFACESW(1), reg2);
        urtwn_bb_write(sc, R92C_OFDM0_TRMUXPAR, reg1);

        urtwn_bb_write(sc, R92C_FPGA0_IQK, 0x0);
        urtwn_bb_write(sc, R92C_LSSI_PARAM(0), R92C_IQK_LSSI_RESTORE);
        if (sc->nrxchains > 1)
                urtwn_bb_write(sc, R92C_LSSI_PARAM(1), R92C_IQK_LSSI_RESTORE);

        if (attempt-- > 0)
                goto next_attempt;

        /* Restore mode. */
        if (piMode == 0) {
                urtwn_bb_write(sc, R92C_HSSI_PARAM1(0),
                    urtwn_bb_read(sc, R92C_HSSI_PARAM1(0)) &
                    ~R92C_HSSI_PARAM1_PI);
                urtwn_bb_write(sc, R92C_HSSI_PARAM1(1),
                    urtwn_bb_read(sc, R92C_HSSI_PARAM1(1)) &
                    ~R92C_HSSI_PARAM1_PI);
        }

#ifdef notyet
        for (i = 0; i < sc->nrxchains; i++) {
                urtwn_write_4(sc, R92C_OFDM0_AGCCORE1(i),
                    odfm0_agccore_regs[i]);
        }
#endif

        /* Restore adda regs. */
        for (i = 0; i < __arraycount(addaReg); i++)
                urtwn_bb_write(sc, addaReg[i], addaBackup[i]);
        /* Restore mac regs. */
        urtwn_write_1(sc, R92C_TXPAUSE, iqkBackup[0]);
        urtwn_write_1(sc, R92C_BCN_CTRL, iqkBackup[1]);
        urtwn_write_1(sc, R92C_USTIME_TSF, iqkBackup[2]);
        urtwn_write_4(sc, R92C_GPIO_MUXCFG, iqkBackup[3]);

#ifdef notyet
        urtwn_write_4(sc, R92C_CONFIG_ANT_A, ant_regs[0]);
        urtwn_write_4(sc, R92C_CONFIG_ANT_B, ant_regs[1]);

        urtwn_write_4(sc, R92C_FPGA0_RFIFACESW(0), rf_regs[0]);
        for (i = 0; i < sc->nrxchains; i++)
                urtwn_write_4(sc, R92C_FPGA0_RFIFACEOE(i), rf_regs[i+1]);
        urtwn_write_4(sc, R92C_CCK0_AFESETTING, reg4);
#endif
}

static void
urtwn_lc_calib(struct urtwn_softc *sc)
{
        uint32_t rf_ac[2];
        uint8_t txmode;
        size_t i;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        txmode = urtwn_read_1(sc, R92C_OFDM1_LSTF + 3);
        if ((txmode & 0x70) != 0) {
                /* Disable all continuous Tx. */
                urtwn_write_1(sc, R92C_OFDM1_LSTF + 3, txmode & ~0x70);

                /* Set RF mode to standby mode. */
                for (i = 0; i < sc->nrxchains; i++) {
                        rf_ac[i] = urtwn_rf_read(sc, i, R92C_RF_AC);
                        urtwn_rf_write(sc, i, R92C_RF_AC,
                            RW(rf_ac[i], R92C_RF_AC_MODE,
                                R92C_RF_AC_MODE_STANDBY));
                }
        } else {
                /* Block all Tx queues. */
                urtwn_write_1(sc, R92C_TXPAUSE, 0xff);
        }
        /* Start calibration. */
        urtwn_rf_write(sc, 0, R92C_RF_CHNLBW,
            urtwn_rf_read(sc, 0, R92C_RF_CHNLBW) | R92C_RF_CHNLBW_LCSTART);

        /* Give calibration the time to complete. */
        urtwn_delay_ms(sc, 100);

        /* Restore configuration. */
        if ((txmode & 0x70) != 0) {
                /* Restore Tx mode. */
                urtwn_write_1(sc, R92C_OFDM1_LSTF + 3, txmode);
                /* Restore RF mode. */
                for (i = 0; i < sc->nrxchains; i++) {
                        urtwn_rf_write(sc, i, R92C_RF_AC, rf_ac[i]);
                }
        } else {
                /* Unblock all Tx queues. */
                urtwn_write_1(sc, R92C_TXPAUSE, 0x00);
        }
}

static void
urtwn_temp_calib(struct urtwn_softc *sc)
{
        int temp, t_meter_reg;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        if (!ISSET(sc->chip, URTWN_CHIP_92EU))
                t_meter_reg = R92C_RF_T_METER;
        else
                t_meter_reg = R92E_RF_T_METER;

        if (sc->thcal_state == 0) {
                /* Start measuring temperature. */
                DPRINTFN(DBG_RF, "start measuring temperature", 0, 0, 0, 0);
                urtwn_rf_write(sc, 0, t_meter_reg, 0x60);
                sc->thcal_state = 1;
                return;
        }
        sc->thcal_state = 0;

        /* Read measured temperature. */
        temp = urtwn_rf_read(sc, 0, R92C_RF_T_METER) & 0x1f;
        DPRINTFN(DBG_RF, "temperature=%jd", temp, 0, 0, 0);
        if (temp == 0)                /* Read failed, skip. */
                return;

        /*
         * Redo LC calibration if temperature changed significantly since
         * last calibration.
         */
        if (sc->thcal_lctemp == 0) {
                /* First LC calibration is performed in urtwn_init(). */
                sc->thcal_lctemp = temp;
        } else if (abs(temp - sc->thcal_lctemp) > 1) {
                DPRINTFN(DBG_RF, "LC calib triggered by temp: %jd -> %jd",
                    sc->thcal_lctemp, temp, 0, 0);
                urtwn_lc_calib(sc);
                /* Record temperature of last LC calibration. */
                sc->thcal_lctemp = temp;
        }
}

static int
urtwn_init(struct ifnet *ifp)
{
        struct urtwn_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct urtwn_rx_data *data;
        uint32_t reg;
        size_t i;
        int error;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        urtwn_stop(ifp, 0);

        mutex_enter(&sc->sc_write_mtx);

        mutex_enter(&sc->sc_task_mtx);
        /* Init host async commands ring. */
        sc->cmdq.cur = sc->cmdq.next = sc->cmdq.queued = 0;
        mutex_exit(&sc->sc_task_mtx);

        mutex_enter(&sc->sc_fwcmd_mtx);
        /* Init firmware commands ring. */
        sc->fwcur = 0;
        mutex_exit(&sc->sc_fwcmd_mtx);

        /* Allocate Tx/Rx buffers. */
        error = urtwn_alloc_rx_list(sc);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not allocate Rx buffers\n");
                goto fail;
        }
        error = urtwn_alloc_tx_list(sc);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not allocate Tx buffers\n");
                goto fail;
        }

        /* Power on adapter. */
        error = urtwn_power_on(sc);
        if (error != 0)
                goto fail;

        /* Initialize DMA. */
        error = urtwn_dma_init(sc);
        if (error != 0)
                goto fail;

        /* Set info size in Rx descriptors (in 64-bit words). */
        urtwn_write_1(sc, R92C_RX_DRVINFO_SZ, 4);

        /* Init interrupts. */
        if (ISSET(sc->chip, URTWN_CHIP_88E) ||
             ISSET(sc->chip, URTWN_CHIP_92EU)) {
                urtwn_write_4(sc, R88E_HISR, 0xffffffff);
                urtwn_write_4(sc, R88E_HIMR, R88E_HIMR_CPWM | R88E_HIMR_CPWM2 |
                    R88E_HIMR_TBDER | R88E_HIMR_PSTIMEOUT);
                urtwn_write_4(sc, R88E_HIMRE, R88E_HIMRE_RXFOVW |
                    R88E_HIMRE_TXFOVW | R88E_HIMRE_RXERR | R88E_HIMRE_TXERR);
                if (ISSET(sc->chip, URTWN_CHIP_88E)) {
                        urtwn_write_1(sc, R92C_USB_SPECIAL_OPTION,
                            urtwn_read_1(sc, R92C_USB_SPECIAL_OPTION) |
                              R92C_USB_SPECIAL_OPTION_INT_BULK_SEL);
                }
                if (ISSET(sc->chip, URTWN_CHIP_92EU))
                        urtwn_write_1(sc, R92C_USB_HRPWM, 0);
        } else {
                urtwn_write_4(sc, R92C_HISR, 0xffffffff);
                urtwn_write_4(sc, R92C_HIMR, 0xffffffff);
        }

        /* Set MAC address. */
        IEEE80211_ADDR_COPY(ic->ic_myaddr, CLLADDR(ifp->if_sadl));
        urtwn_write_region(sc, R92C_MACID, ic->ic_myaddr, IEEE80211_ADDR_LEN);

        /* Set initial network type. */
        reg = urtwn_read_4(sc, R92C_CR);
        switch (ic->ic_opmode) {
        case IEEE80211_M_STA:
        default:
                reg = RW(reg, R92C_CR_NETTYPE, R92C_CR_NETTYPE_INFRA);
                break;

        case IEEE80211_M_IBSS:
                reg = RW(reg, R92C_CR_NETTYPE, R92C_CR_NETTYPE_ADHOC);
                break;
        }
        urtwn_write_4(sc, R92C_CR, reg);

        /* Set response rate */
        reg = urtwn_read_4(sc, R92C_RRSR);
        reg = RW(reg, R92C_RRSR_RATE_BITMAP, R92C_RRSR_RATE_CCK_ONLY_1M);
        urtwn_write_4(sc, R92C_RRSR, reg);

        /* SIFS (used in NAV) */
        urtwn_write_2(sc, R92C_SPEC_SIFS,
            SM(R92C_SPEC_SIFS_CCK, 0x10) | SM(R92C_SPEC_SIFS_OFDM, 0x10));

        /* Set short/long retry limits. */
        urtwn_write_2(sc, R92C_RL,
            SM(R92C_RL_SRL, 0x30) | SM(R92C_RL_LRL, 0x30));

        /* Initialize EDCA parameters. */
        urtwn_edca_init(sc);

        /* Setup rate fallback. */
        if (!ISSET(sc->chip, URTWN_CHIP_88E) &&
            !ISSET(sc->chip, URTWN_CHIP_92EU)) {
                urtwn_write_4(sc, R92C_DARFRC + 0, 0x00000000);
                urtwn_write_4(sc, R92C_DARFRC + 4, 0x10080404);
                urtwn_write_4(sc, R92C_RARFRC + 0, 0x04030201);
                urtwn_write_4(sc, R92C_RARFRC + 4, 0x08070605);
        }

        urtwn_write_1(sc, R92C_FWHW_TXQ_CTRL,
            urtwn_read_1(sc, R92C_FWHW_TXQ_CTRL) |
            R92C_FWHW_TXQ_CTRL_AMPDU_RTY_NEW);
        /* Set ACK timeout. */
        urtwn_write_1(sc, R92C_ACKTO, 0x40);

        /* Setup USB aggregation. */
        /* Tx */
        reg = urtwn_read_4(sc, R92C_TDECTRL);
        reg = RW(reg, R92C_TDECTRL_BLK_DESC_NUM, 6);
        urtwn_write_4(sc, R92C_TDECTRL, reg);
        /* Rx */
        urtwn_write_1(sc, R92C_TRXDMA_CTRL,
            urtwn_read_1(sc, R92C_TRXDMA_CTRL) |
              R92C_TRXDMA_CTRL_RXDMA_AGG_EN);
        urtwn_write_1(sc, R92C_USB_SPECIAL_OPTION,
            urtwn_read_1(sc, R92C_USB_SPECIAL_OPTION) &
              ~R92C_USB_SPECIAL_OPTION_AGG_EN);
        urtwn_write_1(sc, R92C_RXDMA_AGG_PG_TH, 48);
        if (ISSET(sc->chip, URTWN_CHIP_88E) ||
            ISSET(sc->chip, URTWN_CHIP_92EU))
                urtwn_write_1(sc, R92C_RXDMA_AGG_PG_TH + 1, 4);
        else
                urtwn_write_1(sc, R92C_USB_DMA_AGG_TO, 4);

        /* Initialize beacon parameters. */
        urtwn_write_2(sc, R92C_BCN_CTRL, 0x1010);
        urtwn_write_2(sc, R92C_TBTT_PROHIBIT, 0x6404);
        urtwn_write_1(sc, R92C_DRVERLYINT, R92C_DRVERLYINT_INIT_TIME);
        urtwn_write_1(sc, R92C_BCNDMATIM, R92C_BCNDMATIM_INIT_TIME);
        urtwn_write_2(sc, R92C_BCNTCFG, 0x660f);

        if (!ISSET(sc->chip, URTWN_CHIP_88E) &&
            !ISSET(sc->chip, URTWN_CHIP_92EU)) {
                /* Setup AMPDU aggregation. */
                urtwn_write_4(sc, R92C_AGGLEN_LMT, 0x99997631);        /* MCS7~0 */
                urtwn_write_1(sc, R92C_AGGR_BREAK_TIME, 0x16);
                urtwn_write_2(sc, 0x4ca, 0x0708);

                urtwn_write_1(sc, R92C_BCN_MAX_ERR, 0xff);
                urtwn_write_1(sc, R92C_BCN_CTRL, R92C_BCN_CTRL_DIS_TSF_UDT0);
        }

        /* Load 8051 microcode. */
        error = urtwn_load_firmware(sc);
        if (error != 0)
                goto fail;
        SET(sc->sc_flags, URTWN_FLAG_FWREADY);

        /* Initialize MAC/BB/RF blocks. */
        /*
         * XXX: urtwn_mac_init() sets R92C_RCR[0:15] = R92C_RCR_APM |
         * R92C_RCR_AM | R92C_RCR_AB | R92C_RCR_AICV | R92C_RCR_AMF.
         * XXX: This setting should be removed from rtl8192cu_mac[].
         */
        urtwn_mac_init(sc);                // sets R92C_RCR[0:15]
        urtwn_rxfilter_init(sc);        // reset R92C_RCR
        urtwn_bb_init(sc);
        urtwn_rf_init(sc);

        if (ISSET(sc->chip, URTWN_CHIP_88E) ||
            ISSET(sc->chip, URTWN_CHIP_92EU)) {
                urtwn_write_2(sc, R92C_CR,
                    urtwn_read_2(sc, R92C_CR) | R92C_CR_MACTXEN |
                      R92C_CR_MACRXEN);
        }

        /* Turn CCK and OFDM blocks on. */
        reg = urtwn_bb_read(sc, R92C_FPGA0_RFMOD);
        reg |= R92C_RFMOD_CCK_EN;
        urtwn_bb_write(sc, R92C_FPGA0_RFMOD, reg);
        reg = urtwn_bb_read(sc, R92C_FPGA0_RFMOD);
        reg |= R92C_RFMOD_OFDM_EN;
        urtwn_bb_write(sc, R92C_FPGA0_RFMOD, reg);

        /* Clear per-station keys table. */
        urtwn_cam_init(sc);

        /* Enable hardware sequence numbering. */
        urtwn_write_1(sc, R92C_HWSEQ_CTRL, 0xff);

        /* Perform LO and IQ calibrations. */
        urtwn_iq_calib(sc, sc->iqk_inited);
        sc->iqk_inited = true;

        /* Perform LC calibration. */
        urtwn_lc_calib(sc);

        if (!ISSET(sc->chip, URTWN_CHIP_88E) &&
            !ISSET(sc->chip, URTWN_CHIP_92EU)) {
                /* Fix USB interference issue. */
                urtwn_write_1(sc, 0xfe40, 0xe0);
                urtwn_write_1(sc, 0xfe41, 0x8d);
                urtwn_write_1(sc, 0xfe42, 0x80);
                urtwn_write_4(sc, 0x20c, 0xfd0320);

                urtwn_pa_bias_init(sc);
        }

        if (!(sc->chip & (URTWN_CHIP_92C | URTWN_CHIP_92C_1T2R)) ||
            !(sc->chip & URTWN_CHIP_92EU)) {
                /* 1T1R */
                urtwn_bb_write(sc, R92C_FPGA0_RFPARAM(0),
                    urtwn_bb_read(sc, R92C_FPGA0_RFPARAM(0)) | __BIT(13));
        }

        /* Initialize GPIO setting. */
        urtwn_write_1(sc, R92C_GPIO_MUXCFG,
            urtwn_read_1(sc, R92C_GPIO_MUXCFG) & ~R92C_GPIO_MUXCFG_ENBT);

        /* Fix for lower temperature. */
        if (!ISSET(sc->chip, URTWN_CHIP_88E) &&
            !ISSET(sc->chip, URTWN_CHIP_92EU))
                urtwn_write_1(sc, 0x15, 0xe9);

        /* Set default channel. */
        urtwn_set_chan(sc, ic->ic_curchan, IEEE80211_HTINFO_2NDCHAN_NONE);

        /* Queue Rx xfers. */
        for (size_t j = 0; j < sc->rx_npipe; j++) {
                for (i = 0; i < URTWN_RX_LIST_COUNT; i++) {
                        data = &sc->rx_data[j][i];
                        usbd_setup_xfer(data->xfer, data, data->buf,
                            URTWN_RXBUFSZ, USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT,
                            urtwn_rxeof);
                        error = usbd_transfer(data->xfer);
                        if (__predict_false(error != USBD_NORMAL_COMPLETION &&
                            error != USBD_IN_PROGRESS))
                                goto fail;
                }
        }

        /* We're ready to go. */
        ifp->if_flags &= ~IFF_OACTIVE;
        ifp->if_flags |= IFF_RUNNING;
        sc->sc_running = true;

        mutex_exit(&sc->sc_write_mtx);

        if (ic->ic_opmode == IEEE80211_M_MONITOR)
                ieee80211_new_state(ic, IEEE80211_S_RUN, -1);
        else if (ic->ic_roaming != IEEE80211_ROAMING_MANUAL)
                ieee80211_new_state(ic, IEEE80211_S_SCAN, -1);
        urtwn_wait_async(sc);

        return 0;

 fail:
        mutex_exit(&sc->sc_write_mtx);

        urtwn_stop(ifp, 1);
        return error;
}

static void __noinline
urtwn_stop(struct ifnet *ifp, int disable)
{
        struct urtwn_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        size_t i;
        int s;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        s = splusb();
        ieee80211_new_state(ic, IEEE80211_S_INIT, -1);
        urtwn_wait_async(sc);
        splx(s);

        sc->tx_timer = 0;
        ifp->if_timer = 0;
        ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);

        callout_stop(&sc->sc_scan_to);
        callout_stop(&sc->sc_calib_to);

        /* Abort Tx. */
        for (i = 0; i < sc->tx_npipe; i++) {
                if (sc->tx_pipe[i] != NULL)
                        usbd_abort_pipe(sc->tx_pipe[i]);
        }

        /* Stop Rx pipe. */
        for (i = 0; i < sc->rx_npipe; i++) {
                if (sc->rx_pipe[i] != NULL)
                        usbd_abort_pipe(sc->rx_pipe[i]);
        }

        /* Free Tx/Rx buffers. */
        urtwn_free_tx_list(sc);
        urtwn_free_rx_list(sc);

        sc->sc_running = false;
        if (disable)
                urtwn_chip_stop(sc);
}

static int
urtwn_reset(struct ifnet *ifp)
{
        struct urtwn_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;

        if (ic->ic_opmode != IEEE80211_M_MONITOR)
                return ENETRESET;

        urtwn_set_chan(sc, ic->ic_curchan, IEEE80211_HTINFO_2NDCHAN_NONE);

        return 0;
}

static void
urtwn_chip_stop(struct urtwn_softc *sc)
{
        uint32_t reg;
        bool disabled = true;

        URTWNHIST_FUNC(); URTWNHIST_CALLED();

        if (ISSET(sc->chip, URTWN_CHIP_88E) ||
            ISSET(sc->chip, URTWN_CHIP_92EU))
                return;

        mutex_enter(&sc->sc_write_mtx);

        /*
         * RF Off Sequence
         */
        /* Pause MAC TX queue */
        urtwn_write_1(sc, R92C_TXPAUSE, 0xFF);

        /* Disable RF */
        urtwn_rf_write(sc, 0, 0, 0);

        urtwn_write_1(sc, R92C_APSD_CTRL, R92C_APSD_CTRL_OFF);

        /* Reset BB state machine */
        urtwn_write_1(sc, R92C_SYS_FUNC_EN,
            R92C_SYS_FUNC_EN_USBD |
            R92C_SYS_FUNC_EN_USBA |
            R92C_SYS_FUNC_EN_BB_GLB_RST);
        urtwn_write_1(sc, R92C_SYS_FUNC_EN,
            R92C_SYS_FUNC_EN_USBD | R92C_SYS_FUNC_EN_USBA);

        /*
         * Reset digital sequence
         */
        if (urtwn_read_1(sc, R92C_MCUFWDL) & R92C_MCUFWDL_RDY) {
                /* Reset MCU ready status */
                urtwn_write_1(sc, R92C_MCUFWDL, 0);
                /* If firmware in ram code, do reset */
                if (ISSET(sc->sc_flags, URTWN_FLAG_FWREADY)) {
                        if (ISSET(sc->chip, URTWN_CHIP_88E) ||
                            ISSET(sc->chip, URTWN_CHIP_92EU))
                                urtwn_r88e_fw_reset(sc);
                        else
                                urtwn_fw_reset(sc);
                        CLR(sc->sc_flags, URTWN_FLAG_FWREADY);
                }
        }

        /* Reset MAC and Enable 8051 */
        urtwn_write_1(sc, R92C_SYS_FUNC_EN + 1, 0x54);

        /* Reset MCU ready status */
        urtwn_write_1(sc, R92C_MCUFWDL, 0);

        if (disabled) {
                /* Disable MAC clock */
                urtwn_write_2(sc, R92C_SYS_CLKR, 0x70A3);
                /* Disable AFE PLL */
                urtwn_write_1(sc, R92C_AFE_PLL_CTRL, 0x80);
                /* Gated AFE DIG_CLOCK */
                urtwn_write_2(sc, R92C_AFE_XTAL_CTRL, 0x880F);
                /* Isolated digital to PON */
                urtwn_write_1(sc, R92C_SYS_ISO_CTRL, 0xF9);
        }

        /*
         * Pull GPIO PIN to balance level and LED control
         */
        /* 1. Disable GPIO[7:0] */
        urtwn_write_2(sc, R92C_GPIO_PIN_CTRL + 2, 0x0000);

        reg = urtwn_read_4(sc, R92C_GPIO_PIN_CTRL) & ~0x0000ff00;
        reg |= ((reg << 8) & 0x0000ff00) | 0x00ff0000;
        urtwn_write_4(sc, R92C_GPIO_PIN_CTRL, reg);

        /* Disable GPIO[10:8] */
        urtwn_write_1(sc, R92C_GPIO_MUXCFG + 3, 0x00);

        reg = urtwn_read_2(sc, R92C_GPIO_MUXCFG + 2) & ~0x00f0;
        reg |= (((reg & 0x000f) << 4) | 0x0780);
        urtwn_write_2(sc, R92C_GPIO_MUXCFG + 2, reg);

        /* Disable LED0 & 1 */
        urtwn_write_2(sc, R92C_LEDCFG0, 0x8080);

        /*
         * Reset digital sequence
         */
        if (disabled) {
                /* Disable ELDR clock */
                urtwn_write_2(sc, R92C_SYS_CLKR, 0x70A3);
                /* Isolated ELDR to PON */
                urtwn_write_1(sc, R92C_SYS_ISO_CTRL + 1, 0x82);
        }

        /*
         * Disable analog sequence
         */
        if (disabled) {
                /* Disable A15 power */
                urtwn_write_1(sc, R92C_LDOA15_CTRL, 0x04);
                /* Disable digital core power */
                urtwn_write_1(sc, R92C_LDOV12D_CTRL,
                    urtwn_read_1(sc, R92C_LDOV12D_CTRL) &
                      ~R92C_LDOV12D_CTRL_LDV12_EN);
        }

        /* Enter PFM mode */
        urtwn_write_1(sc, R92C_SPS0_CTRL, 0x23);

        /* Set USB suspend */
        urtwn_write_2(sc, R92C_APS_FSMCO,
            R92C_APS_FSMCO_APDM_HOST |
            R92C_APS_FSMCO_AFSM_HSUS |
            R92C_APS_FSMCO_PFM_ALDN);

        urtwn_write_1(sc, R92C_RSV_CTRL, 0x0E);

        mutex_exit(&sc->sc_write_mtx);
}

static void
urtwn_delay_ms(struct urtwn_softc *sc, int ms)
{
        if (sc->sc_running == false)
                DELAY(ms * 1000);
        else
                usbd_delay_ms(sc->sc_udev, ms);
}

MODULE(MODULE_CLASS_DRIVER, if_urtwn, NULL);

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
if_urtwn_modcmd(modcmd_t cmd, void *aux)
{
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                error = config_init_component(cfdriver_ioconf_urtwn,
                    cfattach_ioconf_urtwn, cfdata_ioconf_urtwn);
#endif
                return error;
        case MODULE_CMD_FINI:
#ifdef _MODULE
                error = config_fini_component(cfdriver_ioconf_urtwn,
                    cfattach_ioconf_urtwn, cfdata_ioconf_urtwn);
#endif
                return error;
        default:
                return ENOTTY;
        }
}
























































































   22 








   22 





















































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
/*        $NetBSD: if_43.c,v 1.25 2020/06/12 11:04:45 roy Exp $        */

/*
 * Copyright (c) 1982, 1986, 1989, 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)uipc_syscalls.c        8.4 (Berkeley) 2/21/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_43.c,v 1.25 2020/06/12 11:04:45 roy Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/syslog.h>
#include <sys/unistd.h>
#include <sys/resourcevar.h>
#include <sys/mbuf.h>                /* for MLEN */
#include <sys/protosw.h>
#include <sys/compat_stub.h>

#include <sys/syscallargs.h>

#include <net/if.h>
#include <net/bpf.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <net/if_gre.h>
#include <net/if_tap.h>
#include <net80211/ieee80211_ioctl.h>
#include <netinet6/in6_var.h>
#include <netinet6/nd6.h>
#include <compat/sys/socket.h>
#include <compat/sys/sockio.h>

#include <compat/common/compat_util.h>
#include <compat/common/compat_mod.h>
#include <uvm/uvm_extern.h>

#if defined(COMPAT_43)

/* 
 * Use a wrapper so that the compat_cvtcmd() can return a u_long
 */
static int 
do_compat_cvtcmd(u_long *ncmd, u_long ocmd)
{ 

        *ncmd = compat_cvtcmd(ocmd);
        return 0;
}

u_long
compat_cvtcmd(u_long cmd)
{
        u_long ncmd;

        if (IOCPARM_LEN(cmd) != sizeof(struct oifreq))
                return cmd;

        switch (cmd) {
        case OSIOCSIFADDR:
                return SIOCSIFADDR;
        case OOSIOCGIFADDR:
                return SIOCGIFADDR;
        case OSIOCSIFDSTADDR:
                return SIOCSIFDSTADDR;
        case OOSIOCGIFDSTADDR:
                return SIOCGIFDSTADDR;
        case OSIOCSIFFLAGS:
                return SIOCSIFFLAGS;
        case OSIOCGIFFLAGS:
                return SIOCGIFFLAGS;
        case OOSIOCGIFBRDADDR:
                return SIOCGIFBRDADDR;
        case OSIOCSIFBRDADDR:
                return SIOCSIFBRDADDR;
        case OOSIOCGIFCONF:
                return SIOCGIFCONF;
        case OOSIOCGIFNETMASK:
                return SIOCGIFNETMASK;
        case OSIOCSIFNETMASK:
                return SIOCSIFNETMASK;
        case OSIOCGIFCONF:
                return SIOCGIFCONF;
        case OSIOCADDMULTI:
                return SIOCADDMULTI;
        case OSIOCDELMULTI:
                return SIOCDELMULTI;
        case SIOCSIFMEDIA_43:
                return SIOCSIFMEDIA_80;
        case OSIOCGIFMTU:
                return SIOCGIFMTU;
        case OSIOCGIFDATA:
                return SIOCGIFDATA;
        case OSIOCZIFDATA:
                return SIOCZIFDATA;
        case OBIOCGETIF:
                return BIOCGETIF;
        case OBIOCSETIF:
                return BIOCSETIF;
        case OTAPGIFNAME:
                return TAPGIFNAME;
        default:
                /*
                 * XXX: the following code should be removed and the
                 * needing treatment ioctls should move to the switch
                 * above.
                 */
                ncmd = ((cmd) & ~(IOCPARM_MASK << IOCPARM_SHIFT)) | 
                    (sizeof(struct ifreq) << IOCPARM_SHIFT);
                switch (ncmd) {
                case BIOCGETIF:
                case BIOCSETIF:
                case GREDSOCK:
                case GREGADDRD:
                case GREGADDRS:
                case GREGPROTO:
                case GRESADDRD:
                case GRESADDRS:
                case GRESPROTO:
                case GRESSOCK:
                case SIOCADDMULTI:
                case SIOCDELMULTI:
                case SIOCDIFADDR:
                case SIOCDIFADDR_IN6:
                case SIOCDIFPHYADDR:
                case SIOCG80211NWID:
                case SIOCG80211STATS:
                case SIOCG80211ZSTATS:
                case SIOCGIFADDR:
                case SIOCGIFADDR_IN6:
                case SIOCGIFAFLAG_IN6:
                case SIOCGIFALIFETIME_IN6:
                case SIOCGIFBRDADDR:
                case SIOCGIFDLT:
                case SIOCGIFDSTADDR:
                case SIOCGIFDSTADDR_IN6:
                case SIOCGIFFLAGS:
                case SIOCGIFGENERIC:
                case SIOCGIFMETRIC:
                case SIOCGIFMTU:
                case SIOCGIFNETMASK:
                case SIOCGIFNETMASK_IN6:
                case SIOCGIFPDSTADDR:
                case SIOCGIFPDSTADDR_IN6:
                case SIOCGIFPSRCADDR:
                case SIOCGIFPSRCADDR_IN6:
                case SIOCGIFSTAT_ICMP6:
                case SIOCGIFSTAT_IN6:
                case SIOCGVH:
                case SIOCIFCREATE:
                case SIOCIFDESTROY:
                case SIOCS80211NWID:
                case SIOCSIFADDR:
                case SIOCSIFADDR_IN6:
                case SIOCSIFBRDADDR:
                case SIOCSIFDSTADDR:
                case SIOCSIFDSTADDR_IN6:
                case SIOCSIFFLAGS:
                case SIOCSIFGENERIC:
                case SIOCSIFMEDIA:
                case SIOCSIFMETRIC:
                case SIOCSIFMTU:
                case SIOCSIFNETMASK:
                case SIOCSIFNETMASK_IN6:
                case SIOCSVH:
                case TAPGIFNAME:
                        return ncmd;
                default:
                    {        int rv;

                        MODULE_HOOK_CALL(if43_cvtcmd_20_hook, (ncmd), enosys(),
                            rv);
                        if (rv == 0)
                                return ncmd;
                        return cmd;
                    }
                }
        }
}

int
compat_ifioctl(struct socket *so, u_long ocmd, u_long cmd, void *data,
    struct lwp *l)
{
        int error;
        struct ifreq *ifr = (struct ifreq *)data;
        struct ifreq ifrb;
        struct oifreq *oifr = NULL;
        struct ifnet *ifp;
        struct sockaddr *sa;
        struct psref psref;
        int bound = curlwp_bind();

        ifp = if_get(ifr->ifr_name, &psref);
        if (ifp == NULL) {
                curlwp_bindx(bound);
                return ENXIO;
        }

        /*
         * If we have not been converted, make sure that we are.
         * (because the upper layer handles old socket calls, but
         * not oifreq calls.
         */
        if (cmd == ocmd) {
                cmd = compat_cvtcmd(ocmd);
        }
        if (cmd != ocmd) {
                oifr = data;
                ifr = &ifrb;
                IFREQO2N_43(oifr, ifr);
        }

        switch (ocmd) {
        case OSIOCSIFADDR:
        case OSIOCSIFDSTADDR:
        case OSIOCSIFBRDADDR:
        case OSIOCSIFNETMASK:
                sa = &ifr->ifr_addr;
#if BYTE_ORDER != BIG_ENDIAN
                if (sa->sa_family == 0 && sa->sa_len < 16) {
                        sa->sa_family = sa->sa_len;
                        sa->sa_len = 16;
                }
#else
                if (sa->sa_len == 0)
                        sa->sa_len = 16;
#endif
                break;
        }

        error = (*so->so_proto->pr_usrreqs->pr_ioctl)(so, cmd, ifr, ifp);
        if_put(ifp, &psref);
        curlwp_bindx(bound);

        switch (ocmd) {
        case OOSIOCGIFADDR:
        case OOSIOCGIFDSTADDR:
        case OOSIOCGIFBRDADDR:
        case OOSIOCGIFNETMASK:
                *(u_int16_t *)&ifr->ifr_addr = 
                    ((struct sockaddr *)&ifr->ifr_addr)->sa_family;
                break;
        }

        if (cmd != ocmd)
                IFREQN2O_43(oifr, ifr);

        return error;
}

int
if_43_init(void)
{

        MODULE_HOOK_SET(if_cvtcmd_43_hook, do_compat_cvtcmd);
        MODULE_HOOK_SET(if_ifioctl_43_hook, compat_ifioctl);
        return 0;
}

int
if_43_fini(void)
{

        MODULE_HOOK_UNSET(if_cvtcmd_43_hook);
        MODULE_HOOK_UNSET(if_ifioctl_43_hook);
        return 0;
}
#endif /* defined(COMPAT_43) */

























































































































    3 

    3 
    3 

    3 

















    3 







    3 










    3 


    3 









    3 
    3 


    3 


    3 

    3 


















    2 











    2 















    2 







    2 







   38 


   38 



















   38 






   38 






















   38 

   38 

   36 
   38 

















    3 




    3 







    3 

    3 

    3 



    3 



    3 

    3 




    3 



    3 
















































    3 














    2 


    2 
    2 
    2 
    2 
    2 

















    3 










    3 


    3 


























    3 


    3 







































    3 










    3 



    3 


    2 




    3 



    3 
    3 

    2 




    2 



    2 



    2 




    2 





    1 


    2 






















    2 



    2 




    2 




    2 







    1 
    1 
    3 





    2 


    2 


    2 
    1 





    2 
































    3 













    3 


    2 

    2 




    2 




    2 





    3 











    3 




    3 




    3 



    3 








    3 




    3 





    3 






























    3 
















    3 




















































    3 





    3 





    2 





    1 





    1 








    1 




    1 

    3 



























   38 




























   38 

   38 















   38 











   38 


   38 











   37 

   37 








   36 








   38 








































    2 

















    2 


    2 
    2 


    2 
    2 








    3 






    3 


























    2 





    3 








    2 









    2 

    2 
    2 
    2 





    2 













    2 
    2 



    2 




















    2 

    2 






















    2 













    3 


    3 






    3 

    3 



    1 



    2 






    3 

    2 



    2 




    2 





    2 















































































   38 







   38 
   36 









   38 
















   38 



   38 















   38 



   38 
   38 
   38 










   37 











   37 

    1 

   37 



















   38 
   38 

   36 
















   36 






   35 

    2 



   34 














   33 
















































    3 

   34 














   34 


    3 

    3 

























    3 












   36 

   36 



    1 

   35 
    1 

   36 
    1 

   36 







    3 


    3 






    2 





















    2 
    2 


























    3 















    3 

    3 




    3 






    3 
















    3 

    3 
    3 









































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
/*        $NetBSD: usb_subr.c,v 1.277 2022/04/06 22:01:45 mlelstv Exp $        */
/*        $FreeBSD: src/sys/dev/usb/usb_subr.c,v 1.18 1999/11/17 22:33:47 n_hibma Exp $        */

/*
 * Copyright (c) 1998, 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: usb_subr.c,v 1.277 2022/04/06 22:01:45 mlelstv Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
#include "opt_usb.h"
#include "opt_usbverbose.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/device.h>
#include <sys/select.h>
#include <sys/proc.h>

#include <sys/bus.h>
#include <sys/module.h>

#include <dev/usb/usb.h>

#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usb_quirks.h>
#include <dev/usb/usb_verbose.h>
#include <dev/usb/usbhist.h>

#include "locators.h"

#define        DPRINTF(FMT,A,B,C,D)        USBHIST_LOG(usbdebug,FMT,A,B,C,D)
#define        DPRINTFN(N,FMT,A,B,C,D)        USBHIST_LOGN(usbdebug,N,FMT,A,B,C,D)

Static void usbd_devinfo(struct usbd_device *, int, char *, size_t);
Static int usbd_getnewaddr(struct usbd_bus *);
Static int usbd_print(void *, const char *);
Static int usbd_ifprint(void *, const char *);
Static void usbd_free_iface_data(struct usbd_device *, int);

uint32_t usb_cookie_no = 0;

Static const char * const usbd_error_strs[] = {
        "NORMAL_COMPLETION",
        "IN_PROGRESS",
        "PENDING_REQUESTS",
        "NOT_STARTED",
        "INVAL",
        "NOMEM",
        "CANCELLED",
        "BAD_ADDRESS",
        "IN_USE",
        "NO_ADDR",
        "SET_ADDR_FAILED",
        "NO_POWER",
        "TOO_DEEP",
        "IOERROR",
        "NOT_CONFIGURED",
        "TIMEOUT",
        "SHORT_XFER",
        "STALLED",
        "INTERRUPTED",
        "XXX",
};

DEV_VERBOSE_DEFINE(usb);

const char *
usbd_errstr(usbd_status err)
{
        static char buffer[5];

        if (err < USBD_ERROR_MAX) {
                return usbd_error_strs[err];
        } else {
                snprintf(buffer, sizeof(buffer), "%d", err);
                return buffer;
        }
}

static void
usbd_trim_spaces(char *p)
{
        char *q, *e;

        q = e = p;
        while (*q == ' ')                /* skip leading spaces */
                q++;
        while ((*p = *q++))                /* copy string */
                if (*p++ != ' ')        /* remember last non-space */
                        e = p;
        *e = '\0';                        /* kill trailing spaces */
}

static void
usbd_get_device_string(struct usbd_device *ud, uByte index, char **buf)
{
        char *b;
        usbd_status err;

        b = kmem_alloc(USB_MAX_ENCODED_STRING_LEN, KM_SLEEP);
        err = usbd_get_string0(ud, index, b, true);
        if (err != USBD_NORMAL_COMPLETION) {
                kmem_free(b, USB_MAX_ENCODED_STRING_LEN);
                b = NULL;
        } else {
                usbd_trim_spaces(b);
        }

        *buf = b;
}

void
usbd_get_device_strings(struct usbd_device *ud)
{
        usb_device_descriptor_t *udd = &ud->ud_ddesc;

        usbd_get_device_string(ud, udd->iManufacturer, &ud->ud_vendor);
        usbd_get_device_string(ud, udd->iProduct, &ud->ud_product);
        usbd_get_device_string(ud, udd->iSerialNumber, &ud->ud_serial);
}


void
usbd_devinfo_vp(struct usbd_device *dev, char *v, size_t vl, char *p,
    size_t pl, int usedev, int useencoded)
{
        usb_device_descriptor_t *udd = &dev->ud_ddesc;
        if (dev == NULL)
                return;

        v[0] = p[0] = '\0';

        if (usedev) {
                if (usbd_get_string0(dev, udd->iManufacturer, v, useencoded) ==
                    USBD_NORMAL_COMPLETION)
                        usbd_trim_spaces(v);
                if (usbd_get_string0(dev, udd->iProduct, p, useencoded) ==
                    USBD_NORMAL_COMPLETION)
                        usbd_trim_spaces(p);
        } else {
                if (dev->ud_vendor) {
                        strlcpy(v, dev->ud_vendor, vl);
                }
                if (dev->ud_product) {
                        strlcpy(p, dev->ud_product, pl);
                }
        }
        if (v[0] == '\0')
                usb_findvendor(v, vl, UGETW(udd->idVendor));
        if (p[0] == '\0')
                usb_findproduct(p, pl, UGETW(udd->idVendor),
                    UGETW(udd->idProduct));
}

int
usbd_printBCD(char *cp, size_t l, int bcd)
{
        return snprintf(cp, l, "%x.%02x", bcd >> 8, bcd & 0xff);
}

Static void
usbd_devinfo(struct usbd_device *dev, int showclass, char *cp, size_t l)
{
        usb_device_descriptor_t *udd = &dev->ud_ddesc;
        char *vendor, *product;
        int bcdDevice, bcdUSB;
        char *ep;

        vendor = kmem_alloc(USB_MAX_ENCODED_STRING_LEN * 2, KM_SLEEP);
        product = &vendor[USB_MAX_ENCODED_STRING_LEN];

        ep = cp + l;

        usbd_devinfo_vp(dev, vendor, USB_MAX_ENCODED_STRING_LEN,
            product, USB_MAX_ENCODED_STRING_LEN, 0, 1);
        cp += snprintf(cp, ep - cp, "%s (0x%04x) %s (0x%04x)", vendor,
            UGETW(udd->idVendor), product, UGETW(udd->idProduct));
        if (showclass)
                cp += snprintf(cp, ep - cp, ", class %d/%d",
                    udd->bDeviceClass, udd->bDeviceSubClass);
        bcdUSB = UGETW(udd->bcdUSB);
        bcdDevice = UGETW(udd->bcdDevice);
        cp += snprintf(cp, ep - cp, ", rev ");
        cp += usbd_printBCD(cp, ep - cp, bcdUSB);
        *cp++ = '/';
        cp += usbd_printBCD(cp, ep - cp, bcdDevice);
        cp += snprintf(cp, ep - cp, ", addr %d", dev->ud_addr);
        *cp = 0;
        kmem_free(vendor, USB_MAX_ENCODED_STRING_LEN * 2);
}

char *
usbd_devinfo_alloc(struct usbd_device *dev, int showclass)
{
        char *devinfop;

        devinfop = kmem_alloc(DEVINFOSIZE, KM_SLEEP);
        usbd_devinfo(dev, showclass, devinfop, DEVINFOSIZE);
        return devinfop;
}

void
usbd_devinfo_free(char *devinfop)
{
        kmem_free(devinfop, DEVINFOSIZE);
}

/* Delay for a certain number of ms */
void
usb_delay_ms_locked(struct usbd_bus *bus, u_int ms, kmutex_t *lock)
{
        /* Wait at least two clock ticks so we know the time has passed. */
        if (bus->ub_usepolling || cold)
                delay((ms+1) * 1000);
        else
                kpause("usbdly", false, (ms*hz+999)/1000 + 1, lock);
}

void
usb_delay_ms(struct usbd_bus *bus, u_int ms)
{
        usb_delay_ms_locked(bus, ms, NULL);
}

/* Delay given a device handle. */
void
usbd_delay_ms_locked(struct usbd_device *dev, u_int ms, kmutex_t *lock)
{
        usb_delay_ms_locked(dev->ud_bus, ms, lock);
}

/* Delay given a device handle. */
void
usbd_delay_ms(struct usbd_device *dev, u_int ms)
{
        usb_delay_ms_locked(dev->ud_bus, ms, NULL);
}

usbd_status
usbd_reset_port(struct usbd_device *dev, int port, usb_port_status_t *ps)
{
        USBHIST_FUNC(); USBHIST_CALLARGS(usbdebug, "port %jd", port, 0, 0, 0);
        usb_device_request_t req;
        usbd_status err;
        int n;

        req.bmRequestType = UT_WRITE_CLASS_OTHER;
        req.bRequest = UR_SET_FEATURE;
        USETW(req.wValue, UHF_PORT_RESET);
        USETW(req.wIndex, port);
        USETW(req.wLength, 0);
        err = usbd_do_request(dev, &req, 0);
        DPRINTFN(1, "port %jd reset done, error=%jd", port, err, 0, 0);
        if (err)
                return err;
        n = 10;
        do {
                /* Wait for device to recover from reset. */
                usbd_delay_ms(dev, USB_PORT_RESET_DELAY);
                err = usbd_get_port_status(dev, port, ps);
                if (err) {
                        DPRINTF("get status failed %jd", err, 0, 0, 0);
                        return err;
                }
                /* If the device disappeared, just give up. */
                if (!(UGETW(ps->wPortStatus) & UPS_CURRENT_CONNECT_STATUS))
                        return USBD_NORMAL_COMPLETION;
        } while ((UGETW(ps->wPortChange) & UPS_C_PORT_RESET) == 0 && --n > 0);
        if (n == 0)
                return USBD_TIMEOUT;
        err = usbd_clear_port_feature(dev, port, UHF_C_PORT_RESET);
#ifdef USB_DEBUG
        if (err)
                DPRINTF("clear port feature failed %jd", err, 0, 0, 0);
#endif

        /* Wait for the device to recover from reset. */
        usbd_delay_ms(dev, USB_PORT_RESET_RECOVERY);
        return err;
}

usb_interface_descriptor_t *
usbd_find_idesc(usb_config_descriptor_t *cd, int ifaceidx, int altidx)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "iface/alt idx %jd/%jd",
            ifaceidx, altidx, 0, 0);
        char *p = (char *)cd;
        char *end = p + UGETW(cd->wTotalLength);
        usb_descriptor_t *desc;
        usb_interface_descriptor_t *idesc;
        int curidx, lastidx, curaidx = 0;

        for (curidx = lastidx = -1; end - p >= sizeof(*desc);) {
                desc = (usb_descriptor_t *)p;

                DPRINTFN(4, "idx=%jd(%jd) altidx=%jd(%jd)", ifaceidx, curidx,
                    altidx, curaidx);
                DPRINTFN(4, "len=%jd type=%jd", desc->bLength,
                    desc->bDescriptorType, 0, 0);

                if (desc->bLength < USB_DESCRIPTOR_SIZE)
                        break;
                if (desc->bLength > end - p)
                        break;
                p += desc->bLength;

                if (desc->bDescriptorType != UDESC_INTERFACE)
                        continue;
                if (desc->bLength < USB_INTERFACE_DESCRIPTOR_SIZE)
                        break;
                idesc = (usb_interface_descriptor_t *)desc;

                if (idesc->bInterfaceNumber != lastidx) {
                        lastidx = idesc->bInterfaceNumber;
                        curidx++;
                        curaidx = 0;
                } else {
                        curaidx++;
                }
                if (ifaceidx == curidx && altidx == curaidx)
                        return idesc;
        }

        return NULL;
}

usb_endpoint_descriptor_t *
usbd_find_edesc(usb_config_descriptor_t *cd, int ifaceidx, int altidx,
    int endptidx)
{
        char *p = (char *)cd;
        char *end = p + UGETW(cd->wTotalLength);
        usb_interface_descriptor_t *idesc;
        usb_endpoint_descriptor_t *edesc;
        usb_descriptor_t *desc;
        int curidx;

        idesc = usbd_find_idesc(cd, ifaceidx, altidx);
        if (idesc == NULL)
                return NULL;
        if (endptidx >= idesc->bNumEndpoints) /* quick exit */
                return NULL;

        curidx = -1;
        for (p = (char *)idesc + idesc->bLength; end - p >= sizeof(*edesc);) {
                desc = (usb_descriptor_t *)p;

                if (desc->bLength < USB_DESCRIPTOR_SIZE)
                        break;
                if (desc->bLength > end - p)
                        break;
                p += desc->bLength;

                if (desc->bDescriptorType == UDESC_INTERFACE)
                        break;
                if (desc->bDescriptorType != UDESC_ENDPOINT)
                        continue;

                if (desc->bLength < USB_ENDPOINT_DESCRIPTOR_SIZE)
                        break;
                edesc = (usb_endpoint_descriptor_t *)desc;

                curidx++;
                if (curidx == endptidx)
                        return edesc;
        }
        return NULL;
}

static void
usbd_iface_init(struct usbd_device *dev, int ifaceidx)
{
        struct usbd_interface *ifc = &dev->ud_ifaces[ifaceidx];

        memset(ifc, 0, sizeof(*ifc));

        ifc->ui_dev = dev;
        ifc->ui_idesc = NULL;
        ifc->ui_index = 0;
        ifc->ui_altindex = 0;
        ifc->ui_endpoints = NULL;
        ifc->ui_busy = 0;
}

static void
usbd_iface_fini(struct usbd_device *dev, int ifaceidx)
{
        struct usbd_interface *ifc __diagused = &dev->ud_ifaces[ifaceidx];

        KASSERT(ifc->ui_dev == dev);
        KASSERT(ifc->ui_idesc == NULL);
        KASSERT(ifc->ui_index == 0);
        KASSERT(ifc->ui_altindex == 0);
        KASSERT(ifc->ui_endpoints == NULL);
        KASSERTMSG(ifc->ui_busy == 0, "%"PRId64, ifc->ui_busy);
}

/*
 * usbd_iface_lock/locked/unlock, usbd_iface_piperef/pipeunref
 *
 *        We lock the interface while we are setting it, and we acquire a
 *        reference to the interface for each pipe opened on it.
 *
 *        Setting the interface while pipes are open is forbidden, and
 *        opening pipes while the interface is being set is forbidden.
 */

bool
usbd_iface_locked(struct usbd_interface *iface)
{
        bool locked;

        mutex_enter(iface->ui_dev->ud_bus->ub_lock);
        locked = (iface->ui_busy == -1);
        mutex_exit(iface->ui_dev->ud_bus->ub_lock);

        return locked;
}

static void
usbd_iface_exlock(struct usbd_interface *iface)
{

        mutex_enter(iface->ui_dev->ud_bus->ub_lock);
        KASSERTMSG(iface->ui_busy == 0, "interface is not idle,"
            " busy=%"PRId64, iface->ui_busy);
        iface->ui_busy = -1;
        mutex_exit(iface->ui_dev->ud_bus->ub_lock);
}

usbd_status
usbd_iface_lock(struct usbd_interface *iface)
{
        usbd_status err;

        mutex_enter(iface->ui_dev->ud_bus->ub_lock);
        KASSERTMSG(iface->ui_busy != -1, "interface is locked");
        KASSERTMSG(iface->ui_busy >= 0, "%"PRId64, iface->ui_busy);
        if (iface->ui_busy) {
                err = USBD_IN_USE;
        } else {
                iface->ui_busy = -1;
                err = 0;
        }
        mutex_exit(iface->ui_dev->ud_bus->ub_lock);

        return err;
}

void
usbd_iface_unlock(struct usbd_interface *iface)
{

        mutex_enter(iface->ui_dev->ud_bus->ub_lock);
        KASSERTMSG(iface->ui_busy == -1, "interface is not locked,"
            " busy=%"PRId64, iface->ui_busy);
        iface->ui_busy = 0;
        mutex_exit(iface->ui_dev->ud_bus->ub_lock);
}

usbd_status
usbd_iface_piperef(struct usbd_interface *iface)
{
        usbd_status err;

        mutex_enter(iface->ui_dev->ud_bus->ub_lock);
        KASSERTMSG(iface->ui_busy >= -1, "%"PRId64, iface->ui_busy);
        if (iface->ui_busy == -1) {
                err = USBD_IN_USE;
        } else {
                iface->ui_busy++;
                err = 0;
        }
        mutex_exit(iface->ui_dev->ud_bus->ub_lock);

        return err;
}

void
usbd_iface_pipeunref(struct usbd_interface *iface)
{

        mutex_enter(iface->ui_dev->ud_bus->ub_lock);
        KASSERTMSG(iface->ui_busy != -1, "interface is locked");
        KASSERTMSG(iface->ui_busy != 0, "interface not in use");
        KASSERTMSG(iface->ui_busy >= 1, "%"PRId64, iface->ui_busy);
        iface->ui_busy--;
        mutex_exit(iface->ui_dev->ud_bus->ub_lock);
}

usbd_status
usbd_fill_iface_data(struct usbd_device *dev, int ifaceidx, int altidx)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "ifaceidx=%jd altidx=%jd",
            ifaceidx, altidx, 0, 0);
        struct usbd_interface *ifc = &dev->ud_ifaces[ifaceidx];
        usb_descriptor_t *desc;
        usb_interface_descriptor_t *idesc;
        usb_endpoint_descriptor_t *ed;
        struct usbd_endpoint *endpoints;
        char *p, *end;
        int endpt, nendpt;

        KASSERT(ifc->ui_dev == dev);
        KASSERT(usbd_iface_locked(ifc));

        idesc = usbd_find_idesc(dev->ud_cdesc, ifaceidx, altidx);
        if (idesc == NULL)
                return USBD_INVAL;

        nendpt = idesc->bNumEndpoints;
        DPRINTFN(4, "found idesc nendpt=%jd", nendpt, 0, 0, 0);
        if (nendpt != 0) {
                endpoints = kmem_alloc(nendpt * sizeof(struct usbd_endpoint),
                    KM_SLEEP);
        } else
                endpoints = NULL;

        p = (char *)idesc + idesc->bLength;
        end = (char *)dev->ud_cdesc + UGETW(dev->ud_cdesc->wTotalLength);
        KASSERTMSG((char *)dev->ud_cdesc <= (char *)idesc, "cdesc=%p idesc=%p",
            dev->ud_cdesc, idesc);
        KASSERTMSG((char *)idesc < end, "idesc=%p end=%p", idesc, end);
        for (endpt = 0; endpt < nendpt; endpt++) {
                DPRINTFN(10, "endpt=%jd", endpt, 0, 0, 0);
                for (; end - p >= sizeof(*desc); p += desc->bLength) {
                        desc = (usb_descriptor_t *)p;
                        DPRINTFN(10, "p=%#jx end=%#jx len=%jd type=%jd",
                            (uintptr_t)p, (uintptr_t)end, desc->bLength,
                            desc->bDescriptorType);
                        if (desc->bLength < sizeof(*desc)) {
                                printf("%s: bad descriptor: too short\n",
                                    __func__);
                                goto bad;
                        } else if (desc->bLength > end - p) {
                                printf("%s: bad descriptor: too long\n",
                                    __func__);
                                goto bad;
                        } else if (desc->bDescriptorType == UDESC_INTERFACE) {
                                printf("%s: bad descriptor: iface desc\n",
                                    __func__);
                                goto bad;
                        }
                        if (desc->bLength >= USB_ENDPOINT_DESCRIPTOR_SIZE &&
                            desc->bDescriptorType == UDESC_ENDPOINT) {
                                ed = (usb_endpoint_descriptor_t *)p;
                                goto found;
                        }
                }
                printf("%s: no desc found\n", __func__);
                goto bad;
        found:
                endpoints[endpt].ue_edesc = ed;
                if (dev->ud_speed == USB_SPEED_HIGH) {
                        u_int mps;
                        /* Control and bulk endpoints have max packet limits. */
                        switch (UE_GET_XFERTYPE(ed->bmAttributes)) {
                        case UE_CONTROL:
                                mps = USB_2_MAX_CTRL_PACKET;
                                goto check;
                        case UE_BULK:
                                mps = USB_2_MAX_BULK_PACKET;
                        check:
                                if (UGETW(ed->wMaxPacketSize) != mps) {
                                        USETW(ed->wMaxPacketSize, mps);
#ifdef DIAGNOSTIC
                                        printf("usbd_fill_iface_data: bad max "
                                               "packet size\n");
#endif
                                }
                                break;
                        default:
                                break;
                        }
                }
                endpoints[endpt].ue_refcnt = 0;
                endpoints[endpt].ue_toggle = 0;
                KASSERTMSG(end - p >= ed->bLength, "p=%p end=%p length=%u",
                    p, end, ed->bLength);
                p += ed->bLength;
        }
#undef ed

        /* Success!  Free the old endpoints and commit the changes.  */
        if (ifc->ui_endpoints) {
                kmem_free(ifc->ui_endpoints, (sizeof(ifc->ui_endpoints[0]) *
                        ifc->ui_idesc->bNumEndpoints));
        }

        ifc->ui_idesc = idesc;
        ifc->ui_index = ifaceidx;
        ifc->ui_altindex = altidx;
        ifc->ui_endpoints = endpoints;

        return USBD_NORMAL_COMPLETION;

 bad:
        if (endpoints)
                kmem_free(endpoints, nendpt * sizeof(struct usbd_endpoint));
        return USBD_INVAL;
}

Static void
usbd_free_iface_data(struct usbd_device *dev, int ifcno)
{
        struct usbd_interface *ifc = &dev->ud_ifaces[ifcno];

        KASSERT(ifc->ui_dev == dev);
        KASSERT(ifc->ui_idesc != NULL);
        KASSERT(usbd_iface_locked(ifc));

        if (ifc->ui_endpoints) {
                int nendpt = ifc->ui_idesc->bNumEndpoints;
                size_t sz = nendpt * sizeof(struct usbd_endpoint);
                kmem_free(ifc->ui_endpoints, sz);
                ifc->ui_endpoints = NULL;
        }

        ifc->ui_altindex = 0;
        ifc->ui_index = 0;
        ifc->ui_idesc = NULL;
}

usbd_status
usbd_set_config_no(struct usbd_device *dev, int no, int msg)
{
        USBHIST_FUNC(); USBHIST_CALLARGS(usbdebug, "%jd", no, 0, 0, 0);
        usb_config_descriptor_t cd;
        usbd_status err;
        int index;

        if (no == USB_UNCONFIG_NO)
                return usbd_set_config_index(dev, USB_UNCONFIG_INDEX, msg);

        /* Figure out what config index to use. */
        for (index = 0; index < dev->ud_ddesc.bNumConfigurations; index++) {
                err = usbd_get_config_desc(dev, index, &cd);
                if (err)
                        return err;
                if (cd.bConfigurationValue == no)
                        return usbd_set_config_index(dev, index, msg);
        }
        return USBD_INVAL;
}

usbd_status
usbd_set_config_index(struct usbd_device *dev, int index, int msg)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev=%#jx index=%jd",
            (uintptr_t)dev, index, 0, 0);
        usb_config_descriptor_t cd, *cdp;
        usb_bos_descriptor_t *bdp = NULL;
        usbd_status err;
        int i, ifcidx, nifc, len, selfpowered, power;


        if (index >= dev->ud_ddesc.bNumConfigurations &&
            index != USB_UNCONFIG_INDEX) {
                /* panic? */
                printf("usbd_set_config_index: illegal index\n");
                return USBD_INVAL;
        }

        /* XXX check that all interfaces are idle */
        if (dev->ud_config != USB_UNCONFIG_NO) {
                DPRINTF("free old config", 0, 0, 0, 0);
                /* Free all configuration data structures. */
                nifc = dev->ud_cdesc->bNumInterface;
                for (ifcidx = 0; ifcidx < nifc; ifcidx++) {
                        usbd_iface_exlock(&dev->ud_ifaces[ifcidx]);
                        usbd_free_iface_data(dev, ifcidx);
                        usbd_iface_unlock(&dev->ud_ifaces[ifcidx]);
                        usbd_iface_fini(dev, ifcidx);
                }
                kmem_free(dev->ud_ifaces, nifc * sizeof(struct usbd_interface));
                kmem_free(dev->ud_cdesc, UGETW(dev->ud_cdesc->wTotalLength));
                if (dev->ud_bdesc != NULL)
                        kmem_free(dev->ud_bdesc,
                            UGETW(dev->ud_bdesc->wTotalLength));
                dev->ud_ifaces = NULL;
                dev->ud_cdesc = NULL;
                dev->ud_bdesc = NULL;
                dev->ud_config = USB_UNCONFIG_NO;
        }

        if (index == USB_UNCONFIG_INDEX) {
                /* We are unconfiguring the device, so leave unallocated. */
                DPRINTF("set config 0", 0, 0, 0, 0);
                err = usbd_set_config(dev, USB_UNCONFIG_NO);
                if (err) {
                        DPRINTF("setting config=0 failed, err = %jd", err,
                            0, 0, 0);
                }
                return err;
        }

        /* Get the short descriptor. */
        err = usbd_get_config_desc(dev, index, &cd);
        if (err) {
                DPRINTF("get_config_desc=%jd", err, 0, 0, 0);
                return err;
        }
        len = UGETW(cd.wTotalLength);
        if (len < USB_CONFIG_DESCRIPTOR_SIZE) {
                DPRINTF("empty short descriptor", 0, 0, 0, 0);
                return USBD_INVAL;
        }
        cdp = kmem_alloc(len, KM_SLEEP);

        /* Get the full descriptor.  Try a few times for slow devices. */
        for (i = 0; i < 3; i++) {
                err = usbd_get_desc(dev, UDESC_CONFIG, index, len, cdp);
                if (!err)
                        break;
                usbd_delay_ms(dev, 200);
        }
        if (err) {
                DPRINTF("get_desc=%jd", err, 0, 0, 0);
                goto bad;
        }
        if (cdp->bDescriptorType != UDESC_CONFIG) {
                DPRINTF("bad desc %jd", cdp->bDescriptorType, 0, 0, 0);
                err = USBD_INVAL;
                goto bad;
        }
        if (UGETW(cdp->wTotalLength) != UGETW(cd.wTotalLength)) {
                DPRINTF("bad len %jd", UGETW(cdp->wTotalLength), 0, 0, 0);
                err = USBD_INVAL;
                goto bad;
        }

        if (USB_IS_SS(dev->ud_speed)) {
                usb_bos_descriptor_t bd;

                /* get short bos desc */
                err = usbd_get_bos_desc(dev, index, &bd);
                if (!err) {
                        int blen = UGETW(bd.wTotalLength);
                        if (blen < USB_BOS_DESCRIPTOR_SIZE) {
                                DPRINTF("empty bos descriptor", 0, 0, 0, 0);
                                err = USBD_INVAL;
                                goto bad;
                        }
                        bdp = kmem_alloc(blen, KM_SLEEP);

                        /* Get the full desc */
                        for (i = 0; i < 3; i++) {
                                err = usbd_get_desc(dev, UDESC_BOS, index, blen,
                                    bdp);
                                if (!err)
                                        break;
                                usbd_delay_ms(dev, 200);
                        }
                        if (err || bdp->bDescriptorType != UDESC_BOS ||
                            UGETW(bdp->wTotalLength) != UGETW(bd.wTotalLength)) {
                                DPRINTF("error %jd or bad desc %jd", err,
                                    bdp->bDescriptorType, 0, 0);
                                kmem_free(bdp, blen);
                                bdp = NULL;
                        }
                }
        }
        dev->ud_bdesc = bdp;

        /*
         * Figure out if the device is self or bus powered.
         */
#if 0 /* XXX various devices don't report the power state correctly */
        selfpowered = 0;
        err = usbd_get_device_status(dev, &ds);
        if (!err && (UGETW(ds.wStatus) & UDS_SELF_POWERED))
                selfpowered = 1;
#endif
        /*
         * Use the power state in the configuration we are going
         * to set. This doesn't necessarily reflect the actual
         * power state of the device; the driver can control this
         * by choosing the appropriate configuration.
         */
        selfpowered = !!(cdp->bmAttributes & UC_SELF_POWERED);

        DPRINTF("addr %jd cno=%jd attr=0x%02jx, selfpowered=%jd",
            dev->ud_addr, cdp->bConfigurationValue, cdp->bmAttributes,
            selfpowered);
        DPRINTF("max power=%jd", cdp->bMaxPower * 2, 0, 0, 0);

        /* Check if we have enough power. */
#if 0 /* this is a no-op, see above */
        if ((cdp->bmAttributes & UC_SELF_POWERED) && !selfpowered) {
                if (msg)
                        printf("%s: device addr %d (config %d): "
                                 "can't set self powered configuration\n",
                               device_xname(dev->ud_bus->bdev), dev->ud_addr,
                               cdp->bConfigurationValue);
                err = USBD_NO_POWER;
                goto bad;
        }
#endif
#ifdef USB_DEBUG
        if (dev->ud_powersrc == NULL) {
                DPRINTF("No power source?", 0, 0, 0, 0);
                err = USBD_IOERROR;
                goto bad;
        }
#endif
        power = cdp->bMaxPower * 2;
        if (power > dev->ud_powersrc->up_power) {
                DPRINTF("power exceeded %jd %jd", power,
                    dev->ud_powersrc->up_power, 0, 0);
                /* XXX print nicer message. */
                if (msg)
                        printf("%s: device addr %d (config %d) exceeds power "
                                 "budget, %d mA > %d mA\n",
                               device_xname(dev->ud_bus->ub_usbctl), dev->ud_addr,
                               cdp->bConfigurationValue,
                               power, dev->ud_powersrc->up_power);
                err = USBD_NO_POWER;
                goto bad;
        }
        dev->ud_power = power;
        dev->ud_selfpowered = selfpowered;

        /* Set the actual configuration value. */
        DPRINTF("set config %jd", cdp->bConfigurationValue, 0, 0, 0);
        err = usbd_set_config(dev, cdp->bConfigurationValue);
        if (err) {
                DPRINTF("setting config=%jd failed, error=%jd",
                    cdp->bConfigurationValue, err, 0, 0);
                goto bad;
        }

        /* Allocate and fill interface data. */
        nifc = cdp->bNumInterface;
        if (nifc == 0) {
                DPRINTF("no interfaces", 0, 0, 0, 0);
                err = USBD_INVAL;
                goto bad;
        }
        dev->ud_ifaces = kmem_alloc(nifc * sizeof(struct usbd_interface),
            KM_SLEEP);
        DPRINTFN(5, "dev=%#jx cdesc=%#jx", (uintptr_t)dev, (uintptr_t)cdp,
            0, 0);
        dev->ud_cdesc = cdp;
        dev->ud_config = cdp->bConfigurationValue;
        for (ifcidx = 0; ifcidx < nifc; ifcidx++) {
                usbd_iface_init(dev, ifcidx);
                usbd_iface_exlock(&dev->ud_ifaces[ifcidx]);
                err = usbd_fill_iface_data(dev, ifcidx, 0);
                usbd_iface_unlock(&dev->ud_ifaces[ifcidx]);
                if (err) {
                        while (--ifcidx >= 0) {
                                usbd_iface_exlock(&dev->ud_ifaces[ifcidx]);
                                usbd_free_iface_data(dev, ifcidx);
                                usbd_iface_unlock(&dev->ud_ifaces[ifcidx]);
                                usbd_iface_fini(dev, ifcidx);
                        }
                        kmem_free(dev->ud_ifaces,
                            nifc * sizeof(struct usbd_interface));
                        dev->ud_ifaces = NULL;
                        goto bad;
                }
        }

        return USBD_NORMAL_COMPLETION;

bad:
        /* XXX Use usbd_set_config() to reset the config? */
        /* XXX Should we forbid USB_UNCONFIG_NO from bConfigurationValue? */
        dev->ud_config = USB_UNCONFIG_NO;
        KASSERT(dev->ud_ifaces == NULL);
        kmem_free(cdp, len);
        dev->ud_cdesc = NULL;
        if (bdp != NULL) {
                kmem_free(bdp, UGETW(bdp->wTotalLength));
                dev->ud_bdesc = NULL;
        }
        return err;
}

/* XXX add function for alternate settings */

usbd_status
usbd_setup_pipe(struct usbd_device *dev, struct usbd_interface *iface,
                struct usbd_endpoint *ep, int ival, struct usbd_pipe **pipe)
{
        return usbd_setup_pipe_flags(dev, iface, ep, ival, pipe, 0);
}

usbd_status
usbd_setup_pipe_flags(struct usbd_device *dev, struct usbd_interface *iface,
    struct usbd_endpoint *ep, int ival, struct usbd_pipe **pipe, uint8_t flags)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev=%#jx addr=%jd iface=%#jx ep=%#jx",
            (uintptr_t)dev, dev->ud_addr, (uintptr_t)iface, (uintptr_t)ep);
        struct usbd_pipe *p = NULL;
        bool ep_acquired = false;
        usbd_status err;

        /* Block exclusive use of the endpoint by later pipes.  */
        err = usbd_endpoint_acquire(dev, ep, flags & USBD_EXCLUSIVE_USE);
        if (err)
                goto out;
        ep_acquired = true;

        p = kmem_alloc(dev->ud_bus->ub_pipesize, KM_SLEEP);
        DPRINTFN(1, "pipe=%#jx", (uintptr_t)p, 0, 0, 0);
        p->up_dev = dev;
        p->up_iface = iface;
        p->up_endpoint = ep;
        p->up_intrxfer = NULL;
        p->up_running = 0;
        p->up_aborting = 0;
        p->up_serialise = true;
        p->up_repeat = 0;
        p->up_interval = ival;
        p->up_flags = flags;
        SIMPLEQ_INIT(&p->up_queue);
        p->up_callingxfer = NULL;
        cv_init(&p->up_callingcv, "usbpipecb");
        p->up_abortlwp = NULL;

        err = dev->ud_bus->ub_methods->ubm_open(p);
        if (err) {
                DPRINTF("endpoint=%#jx failed, error=%jd",
                    (uintptr_t)ep->ue_edesc->bEndpointAddress, err, 0, 0);
                goto out;
        }

        KASSERT(p->up_methods->upm_start || p->up_serialise == false);

        usb_init_task(&p->up_async_task, usbd_clear_endpoint_stall_task, p,
            USB_TASKQ_MPSAFE);
        DPRINTFN(1, "pipe=%#jx", (uintptr_t)p, 0, 0, 0);
        *pipe = p;
        p = NULL;                /* handed off to caller */
        ep_acquired = false;        /* handed off to pipe */
        err = USBD_NORMAL_COMPLETION;

out:        if (p) {
                KASSERT(p->up_abortlwp == NULL);
                KASSERT(p->up_callingxfer == NULL);
                cv_destroy(&p->up_callingcv);
                kmem_free(p, dev->ud_bus->ub_pipesize);
        }
        if (ep_acquired)
                usbd_endpoint_release(dev, ep);
        return err;
}

usbd_status
usbd_endpoint_acquire(struct usbd_device *dev, struct usbd_endpoint *ep,
    int flags)
{
        usbd_status err;

        mutex_enter(dev->ud_bus->ub_lock);
        if (ep->ue_refcnt == INT_MAX) {
                err = USBD_IN_USE; /* XXX rule out or switch to 64-bit */
        } else if ((flags & USBD_EXCLUSIVE_USE) && ep->ue_refcnt) {
                err = USBD_IN_USE;
        } else {
                ep->ue_refcnt++;
                err = 0;
        }
        mutex_exit(dev->ud_bus->ub_lock);

        return err;
}

void
usbd_endpoint_release(struct usbd_device *dev, struct usbd_endpoint *ep)
{

        mutex_enter(dev->ud_bus->ub_lock);
        KASSERT(ep->ue_refcnt);
        ep->ue_refcnt--;
        mutex_exit(dev->ud_bus->ub_lock);
}

/* Abort and close the device control pipe. */
void
usbd_kill_pipe(struct usbd_pipe *pipe)
{

        usbd_abort_pipe(pipe);
        usbd_close_pipe(pipe);
}

int
usbd_getnewaddr(struct usbd_bus *bus)
{
        int addr;

        for (addr = 1; addr < USB_MAX_DEVICES; addr++) {
                size_t dindex = usb_addr2dindex(addr);
                if (bus->ub_devices[dindex] == NULL)
                        return addr;
        }
        return -1;
}

usbd_status
usbd_attach_roothub(device_t parent, struct usbd_device *dev)
{
        struct usb_attach_arg uaa;
        usb_device_descriptor_t *dd = &dev->ud_ddesc;
        device_t dv;

        uaa.uaa_device = dev;
        uaa.uaa_usegeneric = 0;
        uaa.uaa_port = 0;
        uaa.uaa_vendor = UGETW(dd->idVendor);
        uaa.uaa_product = UGETW(dd->idProduct);
        uaa.uaa_release = UGETW(dd->bcdDevice);
        uaa.uaa_class = dd->bDeviceClass;
        uaa.uaa_subclass = dd->bDeviceSubClass;
        uaa.uaa_proto = dd->bDeviceProtocol;

        KERNEL_LOCK(1, curlwp);
        dv = config_found(parent, &uaa, NULL,
            CFARGS(.iattr = "usbroothubif"));
        KERNEL_UNLOCK_ONE(curlwp);
        if (dv) {
                dev->ud_subdevs = kmem_alloc(sizeof(dv), KM_SLEEP);
                dev->ud_subdevs[0] = dv;
                dev->ud_subdevlen = 1;
        }
        return USBD_NORMAL_COMPLETION;
}

static void
usbd_properties(device_t dv, struct usbd_device *dev)
{
        usb_device_descriptor_t *dd = &dev->ud_ddesc;
        prop_dictionary_t dict = device_properties(dv);
        int class, subclass, release, proto, vendor, product;

        class = dd->bDeviceClass;
        subclass = dd->bDeviceSubClass;
        release = UGETW(dd->bcdDevice);
        proto = dd->bDeviceProtocol;
        vendor = UGETW(dd->idVendor);
        product = UGETW(dd->idProduct);

        prop_dictionary_set_uint8(dict, "class", class);
        prop_dictionary_set_uint8(dict, "subclass", subclass);
        prop_dictionary_set_uint16(dict, "release", release);
        prop_dictionary_set_uint8(dict, "proto", proto);
        prop_dictionary_set_uint16(dict, "vendor-id", vendor);
        prop_dictionary_set_uint16(dict, "product-id", product);

        if (dev->ud_vendor) {
                prop_dictionary_set_string(dict,
                    "vendor-string", dev->ud_vendor);
        }
        if (dev->ud_product) {
                prop_dictionary_set_string(dict,
                    "product-string", dev->ud_product);
        }
        if (dev->ud_serial) {
                prop_dictionary_set_string(dict,
                    "serialnumber", dev->ud_serial);
        }
}

static usbd_status
usbd_attachwholedevice(device_t parent, struct usbd_device *dev, int port,
    int usegeneric)
{
        struct usb_attach_arg uaa;
        usb_device_descriptor_t *dd = &dev->ud_ddesc;
        device_t dv;
        int dlocs[USBDEVIFCF_NLOCS];

        KASSERT(usb_in_event_thread(parent));

        uaa.uaa_device = dev;
        uaa.uaa_usegeneric = usegeneric;
        uaa.uaa_port = port;
        uaa.uaa_vendor = UGETW(dd->idVendor);
        uaa.uaa_product = UGETW(dd->idProduct);
        uaa.uaa_release = UGETW(dd->bcdDevice);
        uaa.uaa_class = dd->bDeviceClass;
        uaa.uaa_subclass = dd->bDeviceSubClass;
        uaa.uaa_proto = dd->bDeviceProtocol;

        dlocs[USBDEVIFCF_PORT] = uaa.uaa_port;
        dlocs[USBDEVIFCF_VENDOR] = uaa.uaa_vendor;
        dlocs[USBDEVIFCF_PRODUCT] = uaa.uaa_product;
        dlocs[USBDEVIFCF_RELEASE] = uaa.uaa_release;
        /* the rest is historical ballast */
        dlocs[USBDEVIFCF_CONFIGURATION] = -1;
        dlocs[USBDEVIFCF_INTERFACE] = -1;

        config_pending_incr(parent);

        KERNEL_LOCK(1, curlwp);
        dv = config_found(parent, &uaa, usbd_print,
                          CFARGS(.submatch = config_stdsubmatch,
                                 .iattr = "usbdevif",
                                 .locators = dlocs));
        KERNEL_UNLOCK_ONE(curlwp);
        if (dv) {
                dev->ud_subdevs = kmem_alloc(sizeof(dv), KM_SLEEP);
                dev->ud_subdevs[0] = dv;
                dev->ud_subdevlen = 1;
                dev->ud_nifaces_claimed = 1; /* XXX */
                usbd_properties(dv, dev);
        }
        config_pending_decr(parent);
        return USBD_NORMAL_COMPLETION;
}

static usbd_status
usbd_attachinterfaces(device_t parent, struct usbd_device *dev,
    int port, const int *locators)
{
        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);
        struct usbif_attach_arg uiaa;
        int ilocs[USBIFIFCF_NLOCS];
        usb_device_descriptor_t *dd = &dev->ud_ddesc;
        int nifaces;
        struct usbd_interface **ifaces;
        int i, j, loc;
        device_t dv;

        KASSERT(usb_in_event_thread(parent));

        nifaces = dev->ud_cdesc->bNumInterface;
        ifaces = kmem_zalloc(nifaces * sizeof(*ifaces), KM_SLEEP);
        for (i = 0; i < nifaces; i++) {
                if (!dev->ud_subdevs[i]) {
                        ifaces[i] = &dev->ud_ifaces[i];
                }
                DPRINTF("interface %jd %#jx", i, (uintptr_t)ifaces[i], 0, 0);
        }


        uiaa.uiaa_device = dev;
        uiaa.uiaa_port = port;
        uiaa.uiaa_vendor = UGETW(dd->idVendor);
        uiaa.uiaa_product = UGETW(dd->idProduct);
        uiaa.uiaa_release = UGETW(dd->bcdDevice);
        uiaa.uiaa_configno = dev->ud_cdesc->bConfigurationValue;
        uiaa.uiaa_ifaces = ifaces;
        uiaa.uiaa_nifaces = nifaces;
        ilocs[USBIFIFCF_PORT] = uiaa.uiaa_port;
        ilocs[USBIFIFCF_VENDOR] = uiaa.uiaa_vendor;
        ilocs[USBIFIFCF_PRODUCT] = uiaa.uiaa_product;
        ilocs[USBIFIFCF_RELEASE] = uiaa.uiaa_release;
        ilocs[USBIFIFCF_CONFIGURATION] = uiaa.uiaa_configno;

        for (i = 0; i < nifaces; i++) {
                if (!ifaces[i]) {
                        DPRINTF("interface %jd claimed", i, 0, 0, 0);
                        continue; /* interface already claimed */
                }
                uiaa.uiaa_iface = ifaces[i];
                uiaa.uiaa_class = ifaces[i]->ui_idesc->bInterfaceClass;
                uiaa.uiaa_subclass = ifaces[i]->ui_idesc->bInterfaceSubClass;
                uiaa.uiaa_proto = ifaces[i]->ui_idesc->bInterfaceProtocol;
                uiaa.uiaa_ifaceno = ifaces[i]->ui_idesc->bInterfaceNumber;

                DPRINTF("searching for interface %jd...", i, 0, 0, 0);
                DPRINTF("class %jx subclass %jx proto %jx ifaceno %jd",
                    uiaa.uiaa_class, uiaa.uiaa_subclass, uiaa.uiaa_proto,
                    uiaa.uiaa_ifaceno);
                ilocs[USBIFIFCF_INTERFACE] = uiaa.uiaa_ifaceno;
                if (locators != NULL) {
                        loc = locators[USBIFIFCF_CONFIGURATION];
                        if (loc != USBIFIFCF_CONFIGURATION_DEFAULT &&
                            loc != uiaa.uiaa_configno)
                                continue;
                        loc = locators[USBIFIFCF_INTERFACE];
                        if (loc != USBIFIFCF_INTERFACE_DEFAULT &&
                            loc != uiaa.uiaa_ifaceno)
                                continue;
                }
                KERNEL_LOCK(1, curlwp);
                dv = config_found(parent, &uiaa, usbd_ifprint,
                                  CFARGS(.submatch = config_stdsubmatch,
                                         .iattr = "usbifif",
                                         .locators = ilocs));
                KERNEL_UNLOCK_ONE(curlwp);
                if (!dv)
                        continue;

                usbd_properties(dv, dev);

                /* claim */
                ifaces[i] = NULL;
                /* account for ifaces claimed by the driver behind our back */
                for (j = 0; j < nifaces; j++) {

                        if (!ifaces[j] && !dev->ud_subdevs[j]) {
                                DPRINTF("interface %jd claimed behind our back",
                                    j, 0, 0, 0);
                                dev->ud_subdevs[j] = dv;
                                dev->ud_nifaces_claimed++;
                        }
                }
        }

        kmem_free(ifaces, nifaces * sizeof(*ifaces));
        return USBD_NORMAL_COMPLETION;
}

usbd_status
usbd_probe_and_attach(device_t parent, struct usbd_device *dev,
    int port, int addr)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "trying device specific drivers", 0, 0, 0, 0);
        usb_device_descriptor_t *dd = &dev->ud_ddesc;
        int confi, nifaces;
        usbd_status err;

        KASSERT(usb_in_event_thread(parent));

        /* First try with device specific drivers. */
        err = usbd_attachwholedevice(parent, dev, port, 0);
        if (dev->ud_nifaces_claimed || err)
                return err;
        DPRINTF("no device specific driver found", 0, 0, 0, 0);

        DPRINTF("looping over %jd configurations", dd->bNumConfigurations,
            0, 0, 0);
        for (confi = 0; confi < dd->bNumConfigurations; confi++) {
                DPRINTFN(1, "trying config idx=%jd", confi, 0, 0, 0);
                err = usbd_set_config_index(dev, confi, 1);
                if (err) {
                        DPRINTF("port %jd, set config at addr %jd failed, "
                            "error=%jd", port, addr, err, 0);
                        printf("%s: port %d, set config at addr %d failed\n",
                            device_xname(parent), port, addr);
                        return err;
                }
                nifaces = dev->ud_cdesc->bNumInterface;
                dev->ud_subdevs = kmem_zalloc(nifaces * sizeof(device_t),
                    KM_SLEEP);
                dev->ud_subdevlen = nifaces;

                err = usbd_attachinterfaces(parent, dev, port, NULL);

                if (dev->ud_subdevs && dev->ud_nifaces_claimed == 0) {
                        kmem_free(dev->ud_subdevs,
                            dev->ud_subdevlen * sizeof(device_t));
                        dev->ud_subdevs = 0;
                        dev->ud_subdevlen = 0;
                }
                if (dev->ud_nifaces_claimed || err)
                        return err;
        }
        /* No interfaces were attached in any of the configurations. */

        if (dd->bNumConfigurations > 1) /* don't change if only 1 config */
                usbd_set_config_index(dev, 0, 0);

        DPRINTF("no interface drivers found", 0, 0, 0, 0);

        /* Finally try the generic driver. */
        err = usbd_attachwholedevice(parent, dev, port, 1);

        /*
         * The generic attach failed, but leave the device as it is.
         * We just did not find any drivers, that's all.  The device is
         * fully operational and not harming anyone.
         */
        DPRINTF("generic attach failed", 0, 0, 0, 0);

        return USBD_NORMAL_COMPLETION;
}

/**
 * Called from uhub_rescan().  usbd_new_device() for the target dev must be
 * called before calling this.
 */
usbd_status
usbd_reattach_device(device_t parent, struct usbd_device *dev,
    int port, const int *locators)
{
        int i, loc;

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "uhub%jd port=%jd",
            device_unit(parent), port, 0, 0);

        KASSERT(usb_in_event_thread(parent));

        if (locators != NULL) {
                loc = locators[USBIFIFCF_PORT];
                if (loc != USBIFIFCF_PORT_DEFAULT && loc != port)
                        return USBD_NORMAL_COMPLETION;
                loc = locators[USBIFIFCF_VENDOR];
                if (loc != USBIFIFCF_VENDOR_DEFAULT &&
                    loc != UGETW(dev->ud_ddesc.idVendor))
                        return USBD_NORMAL_COMPLETION;
                loc = locators[USBIFIFCF_PRODUCT];
                if (loc != USBIFIFCF_PRODUCT_DEFAULT &&
                    loc != UGETW(dev->ud_ddesc.idProduct))
                        return USBD_NORMAL_COMPLETION;
                loc = locators[USBIFIFCF_RELEASE];
                if (loc != USBIFIFCF_RELEASE_DEFAULT &&
                    loc != UGETW(dev->ud_ddesc.bcdDevice))
                        return USBD_NORMAL_COMPLETION;
        }
        if (dev->ud_subdevlen == 0) {
                /* XXX: check USBIFIFCF_CONFIGURATION and
                 * USBIFIFCF_INTERFACE too */
                return usbd_probe_and_attach(parent, dev, port, dev->ud_addr);
        } else if (dev->ud_subdevlen != dev->ud_cdesc->bNumInterface) {
                /* device-specific or generic driver is already attached. */
                return USBD_NORMAL_COMPLETION;
        }
        /* Does the device have unconfigured interfaces? */
        for (i = 0; i < dev->ud_subdevlen; i++) {
                if (dev->ud_subdevs[i] == NULL) {
                        break;
                }
        }
        if (i >= dev->ud_subdevlen)
                return USBD_NORMAL_COMPLETION;
        return usbd_attachinterfaces(parent, dev, port, locators);
}

/*
 * Called when a new device has been put in the powered state,
 * but not yet in the addressed state.
 * Get initial descriptor, set the address, get full descriptor,
 * and attach a driver.
 */
usbd_status
usbd_new_device(device_t parent, struct usbd_bus *bus, int depth, int speed,
    int port, struct usbd_port *up)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "bus=%#jx port=%jd depth=%jd speed=%jd",
            (uintptr_t)bus, port, depth, speed);
        struct usbd_device *dev, *adev;
        struct usbd_device *hub;
        usb_device_descriptor_t *dd;
        usb_port_status_t ps;
        usbd_status err;
        int addr;
        int i;
        int p;

        KASSERT(usb_in_event_thread(parent));

        if (bus->ub_methods->ubm_newdev != NULL)
                return (bus->ub_methods->ubm_newdev)(parent, bus, depth, speed,
                    port, up);

        addr = usbd_getnewaddr(bus);
        if (addr < 0) {
                printf("%s: No free USB addresses, new device ignored.\n",
                       device_xname(bus->ub_usbctl));
                return USBD_NO_ADDR;
        }

        dev = kmem_zalloc(sizeof(*dev), KM_SLEEP);
        dev->ud_bus = bus;

        /* Set up default endpoint handle. */
        dev->ud_ep0.ue_edesc = &dev->ud_ep0desc;

        /* Set up default endpoint descriptor. */
        dev->ud_ep0desc.bLength = USB_ENDPOINT_DESCRIPTOR_SIZE;
        dev->ud_ep0desc.bDescriptorType = UDESC_ENDPOINT;
        dev->ud_ep0desc.bEndpointAddress = USB_CONTROL_ENDPOINT;
        dev->ud_ep0desc.bmAttributes = UE_CONTROL;
        /*
         * temporary, will be fixed after first descriptor fetch
         * (which uses 64 bytes so it shouldn't be less),
         * highspeed devices must support 64 byte packets anyway
         */
        if (speed == USB_SPEED_HIGH || speed == USB_SPEED_FULL)
                USETW(dev->ud_ep0desc.wMaxPacketSize, 64);
        else
                USETW(dev->ud_ep0desc.wMaxPacketSize, USB_MAX_IPACKET);

        dev->ud_ep0desc.bInterval = 0;

        /* doesn't matter, just don't leave it uninitialized */
        dev->ud_ep0.ue_toggle = 0;

        dev->ud_quirks = &usbd_no_quirk;
        dev->ud_addr = USB_START_ADDR;
        dev->ud_ddesc.bMaxPacketSize = 0;
        dev->ud_depth = depth;
        dev->ud_powersrc = up;
        dev->ud_myhub = up->up_parent;

        up->up_dev = dev;

        /* Locate port on upstream high speed hub */
        for (adev = dev, hub = up->up_parent;
             hub != NULL && hub->ud_speed != USB_SPEED_HIGH;
             adev = hub, hub = hub->ud_myhub)
                ;
        if (hub) {
                for (p = 1; p <= hub->ud_hub->uh_hubdesc.bNbrPorts; p++) {
                        if (hub->ud_hub->uh_ports[p - 1].up_dev == adev) {
                                dev->ud_myhsport =
                                    &hub->ud_hub->uh_ports[p - 1];
                                goto found;
                        }
                }
                panic("usbd_new_device: cannot find HS port");
        found:
                DPRINTFN(1, "high speed port %jd", p, 0, 0, 0);
        } else {
                dev->ud_myhsport = NULL;
        }
        dev->ud_speed = speed;
        dev->ud_langid = USBD_NOLANG;
        dev->ud_cookie.cookie = ++usb_cookie_no;

        /* Establish the default pipe. */
        err = usbd_setup_pipe_flags(dev, 0, &dev->ud_ep0, USBD_DEFAULT_INTERVAL,
            &dev->ud_pipe0, USBD_MPSAFE);
        if (err) {
                usbd_remove_device(dev, up);
                return err;
        }

        dd = &dev->ud_ddesc;
        /* Try a few times in case the device is slow (i.e. outside specs.) */
        for (i = 0; i < 10; i++) {
                /* Get the first 8 bytes of the device descriptor. */
                err = usbd_get_initial_ddesc(dev, dd);
                if (!err)
                        break;
                /*
                 * The root hub can never fail to give the initial descriptor,
                 * but assert it just in case.
                 */
                KASSERT(up->up_parent);
                usbd_delay_ms(dev, 200);
                if ((i & 3) == 3)
                        usbd_reset_port(up->up_parent, port, &ps);
        }
        if (err) {
                DPRINTF("addr=%jd, getting first desc failed: %jd", addr, err,
                    0, 0);
                usbd_remove_device(dev, up);
                return err;
        }

        /* Windows resets the port here, do likewise */
        if (up->up_parent)
                usbd_reset_port(up->up_parent, port, &ps);

        if (speed == USB_SPEED_HIGH) {
                /* Max packet size must be 64 (sec 5.5.3). */
                if (dd->bMaxPacketSize != USB_2_MAX_CTRL_PACKET) {
#ifdef DIAGNOSTIC
                        printf("usbd_new_device: addr=%d bad max packet "
                            "size=%d. adjusting to %d.\n",
                            addr, dd->bMaxPacketSize, USB_2_MAX_CTRL_PACKET);
#endif
                        dd->bMaxPacketSize = USB_2_MAX_CTRL_PACKET;
                }
        }

        DPRINTF("adding unit addr=%jd, rev=%02jx, class=%jd, subclass=%jd",
            addr, UGETW(dd->bcdUSB), dd->bDeviceClass, dd->bDeviceSubClass);
        DPRINTF("protocol=%jd, maxpacket=%jd, len=%jd, speed=%jd",
            dd->bDeviceProtocol, dd->bMaxPacketSize, dd->bLength, dev->ud_speed);

        if (dd->bDescriptorType != UDESC_DEVICE) {
                /* Illegal device descriptor */
                DPRINTF("illegal descriptor %jd", dd->bDescriptorType, 0, 0, 0);
                usbd_remove_device(dev, up);
                return USBD_INVAL;
        }

        if (dd->bLength < USB_DEVICE_DESCRIPTOR_SIZE) {
                DPRINTF("bad length %jd", dd->bLength, 0, 0, 0);
                usbd_remove_device(dev, up);
                return USBD_INVAL;
        }

        USETW(dev->ud_ep0desc.wMaxPacketSize, dd->bMaxPacketSize);

        /* Re-establish the default pipe with the new MPS. */
        usbd_kill_pipe(dev->ud_pipe0);
        dev->ud_pipe0 = NULL;
        err = usbd_setup_pipe_flags(dev, 0, &dev->ud_ep0, USBD_DEFAULT_INTERVAL,
            &dev->ud_pipe0, USBD_MPSAFE);
        if (err) {
                DPRINTF("setup default pipe failed err %jd", err, 0, 0, 0);
                usbd_remove_device(dev, up);
                return err;
        }

        /* Set the address */
        DPRINTFN(5, "setting device address=%jd", addr, 0, 0, 0);
        err = usbd_set_address(dev, addr);
        if (err) {
                DPRINTF("set address %jd failed, err = %jd", addr, err, 0, 0);
                err = USBD_SET_ADDR_FAILED;
                usbd_remove_device(dev, up);
                return err;
        }

        /* Allow device time to set new address */
        usbd_delay_ms(dev, USB_SET_ADDRESS_SETTLE);
        dev->ud_addr = addr;        /* new device address now */
        bus->ub_devices[usb_addr2dindex(addr)] = dev;

        /* Re-establish the default pipe with the new address. */
        usbd_kill_pipe(dev->ud_pipe0);
        dev->ud_pipe0 = NULL;
        err = usbd_setup_pipe_flags(dev, 0, &dev->ud_ep0, USBD_DEFAULT_INTERVAL,
            &dev->ud_pipe0, USBD_MPSAFE);
        if (err) {
                DPRINTF("setup default pipe failed, err = %jd", err, 0, 0, 0);
                usbd_remove_device(dev, up);
                return err;
        }

        err = usbd_reload_device_desc(dev);
        if (err) {
                DPRINTF("addr=%jd, getting full desc failed, err = %jd", addr,
                    err, 0, 0);
                usbd_remove_device(dev, up);
                return err;
        }

        /* Assume 100mA bus powered for now. Changed when configured. */
        dev->ud_power = USB_MIN_POWER;
        dev->ud_selfpowered = 0;

        DPRINTF("new dev (addr %jd), dev=%#jx, parent=%#jx",
            addr, (uintptr_t)dev, (uintptr_t)parent, 0);

        usbd_get_device_strings(dev);

        usbd_add_dev_event(USB_EVENT_DEVICE_ATTACH, dev);

        if (port == 0) { /* root hub */
                KASSERT(addr == 1);
                usbd_attach_roothub(parent, dev);
                return USBD_NORMAL_COMPLETION;
        }

        err = usbd_probe_and_attach(parent, dev, port, addr);
        if (err) {
                usbd_remove_device(dev, up);
                return err;
        }

        return USBD_NORMAL_COMPLETION;
}

usbd_status
usbd_reload_device_desc(struct usbd_device *dev)
{
        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);
        usb_device_descriptor_t *udd = &dev->ud_ddesc;
        usbd_status err;

        /* Get the full device descriptor. */
        err = usbd_get_device_desc(dev, udd);
        if (err)
                return err;
        if (udd->bDescriptorType != UDESC_DEVICE)
                return USBD_INVAL;
        if (udd->bLength < USB_DEVICE_DESCRIPTOR_SIZE)
                return USBD_INVAL;

        DPRINTFN(15, "bLength             %5ju", udd->bLength, 0, 0, 0);
        DPRINTFN(15, "bDescriptorType     %5ju", udd->bDescriptorType, 0, 0, 0);
        DPRINTFN(15, "bcdUSB              %2jx.%02jx", udd->bcdUSB[1],
            udd->bcdUSB[0], 0, 0);
        DPRINTFN(15, "bDeviceClass        %5ju", udd->bDeviceClass, 0, 0, 0);
        DPRINTFN(15, "bDeviceSubClass     %5ju", udd->bDeviceSubClass, 0, 0, 0);
        DPRINTFN(15, "bDeviceProtocol     %5ju", udd->bDeviceProtocol, 0, 0, 0);
        DPRINTFN(15, "bMaxPacketSize0     %5ju", udd->bMaxPacketSize, 0, 0, 0);
        DPRINTFN(15, "idVendor            0x%02jx 0x%02jx",
                                                    udd->idVendor[0],
                                                    udd->idVendor[1], 0, 0);
        DPRINTFN(15, "idProduct           0x%02jx 0x%02jx",
                                                    udd->idProduct[0],
                                                    udd->idProduct[1], 0, 0);
        DPRINTFN(15, "bcdDevice           %2jx.%02jx", udd->bcdDevice[1],
            udd->bcdDevice[0], 0, 0);
        DPRINTFN(15, "iManufacturer       %5ju", udd->iManufacturer, 0, 0, 0);
        DPRINTFN(15, "iProduct            %5ju", udd->iProduct, 0, 0, 0);
        DPRINTFN(15, "iSerial             %5ju", udd->iSerialNumber, 0, 0, 0);
        DPRINTFN(15, "bNumConfigurations  %5ju", udd->bNumConfigurations, 0, 0,
            0);

        /* Figure out what's wrong with this device. */
        dev->ud_quirks = usbd_find_quirk(udd);

        return USBD_NORMAL_COMPLETION;
}

void
usbd_remove_device(struct usbd_device *dev, struct usbd_port *up)
{

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev %#jx up %#jx",
            (uintptr_t)dev, (uintptr_t)up, 0, 0);

        if (dev->ud_pipe0 != NULL)
                usbd_kill_pipe(dev->ud_pipe0);
        up->up_dev = NULL;
        dev->ud_bus->ub_devices[usb_addr2dindex(dev->ud_addr)] = NULL;

        if (dev->ud_vendor != NULL) {
                kmem_free(dev->ud_vendor, USB_MAX_ENCODED_STRING_LEN);
        }
        if (dev->ud_product != NULL) {
                kmem_free(dev->ud_product, USB_MAX_ENCODED_STRING_LEN);
        }
        if (dev->ud_serial != NULL) {
                kmem_free(dev->ud_serial, USB_MAX_ENCODED_STRING_LEN);
        }
        kmem_free(dev, sizeof(*dev));
}

int
usbd_print(void *aux, const char *pnp)
{
        struct usb_attach_arg *uaa = aux;

        if (pnp) {
#define USB_DEVINFO 1024
                char *devinfo;
                if (!uaa->uaa_usegeneric)
                        return QUIET;
                devinfo = kmem_alloc(USB_DEVINFO, KM_SLEEP);
                usbd_devinfo(uaa->uaa_device, 1, devinfo, USB_DEVINFO);
                aprint_normal("%s, %s", devinfo, pnp);
                kmem_free(devinfo, USB_DEVINFO);
        }
        aprint_normal(" port %d", uaa->uaa_port);
#if 0
        /*
         * It gets very crowded with these locators on the attach line.
         * They are not really needed since they are printed in the clear
         * by each driver.
         */
        if (uaa->uaa_vendor != UHUB_UNK_VENDOR)
                aprint_normal(" vendor 0x%04x", uaa->uaa_vendor);
        if (uaa->uaa_product != UHUB_UNK_PRODUCT)
                aprint_normal(" product 0x%04x", uaa->uaa_product);
        if (uaa->uaa_release != UHUB_UNK_RELEASE)
                aprint_normal(" release 0x%04x", uaa->uaa_release);
#endif
        return UNCONF;
}

int
usbd_ifprint(void *aux, const char *pnp)
{
        struct usbif_attach_arg *uiaa = aux;

        if (pnp)
                return QUIET;
        aprint_normal(" port %d", uiaa->uiaa_port);
        aprint_normal(" configuration %d", uiaa->uiaa_configno);
        aprint_normal(" interface %d", uiaa->uiaa_ifaceno);
#if 0
        /*
         * It gets very crowded with these locators on the attach line.
         * They are not really needed since they are printed in the clear
         * by each driver.
         */
        if (uaa->uaa_vendor != UHUB_UNK_VENDOR)
                aprint_normal(" vendor 0x%04x", uaa->uaa_vendor);
        if (uaa->uaa_product != UHUB_UNK_PRODUCT)
                aprint_normal(" product 0x%04x", uaa->uaa_product);
        if (uaa->uaa_release != UHUB_UNK_RELEASE)
                aprint_normal(" release 0x%04x", uaa->uaa_release);
#endif
        return UNCONF;
}

void
usbd_fill_deviceinfo(struct usbd_device *dev, struct usb_device_info *di,
                     int usedev)
{
        struct usbd_port *p;
        int i, j, err;

        di->udi_bus = device_unit(dev->ud_bus->ub_usbctl);
        di->udi_addr = dev->ud_addr;
        di->udi_cookie = dev->ud_cookie;
        usbd_devinfo_vp(dev, di->udi_vendor, sizeof(di->udi_vendor),
            di->udi_product, sizeof(di->udi_product), usedev, 1);
        usbd_printBCD(di->udi_release, sizeof(di->udi_release),
            UGETW(dev->ud_ddesc.bcdDevice));
        if (usedev) {
                usbd_status uerr = usbd_get_string(dev,
                    dev->ud_ddesc.iSerialNumber, di->udi_serial);
                if (uerr != USBD_NORMAL_COMPLETION) {
                        di->udi_serial[0] = '\0';
                } else {
                        usbd_trim_spaces(di->udi_serial);
                }
        } else {
                di->udi_serial[0] = '\0';
                if (dev->ud_serial) {
                        strlcpy(di->udi_serial, dev->ud_serial,
                            sizeof(di->udi_serial));
                }
        }

        di->udi_vendorNo = UGETW(dev->ud_ddesc.idVendor);
        di->udi_productNo = UGETW(dev->ud_ddesc.idProduct);
        di->udi_releaseNo = UGETW(dev->ud_ddesc.bcdDevice);
        di->udi_class = dev->ud_ddesc.bDeviceClass;
        di->udi_subclass = dev->ud_ddesc.bDeviceSubClass;
        di->udi_protocol = dev->ud_ddesc.bDeviceProtocol;
        di->udi_config = dev->ud_config;
        di->udi_power = dev->ud_selfpowered ? 0 : dev->ud_power;
        di->udi_speed = dev->ud_speed;

        if (dev->ud_subdevlen > 0) {
                for (i = 0, j = 0; i < dev->ud_subdevlen &&
                             j < USB_MAX_DEVNAMES; i++) {
                        if (!dev->ud_subdevs[i])
                                continue;
                        strncpy(di->udi_devnames[j],
                            device_xname(dev->ud_subdevs[i]), USB_MAX_DEVNAMELEN);
                        di->udi_devnames[j][USB_MAX_DEVNAMELEN-1] = '\0';
                        j++;
                }
        } else {
                j = 0;
        }
        for (/* j is set */; j < USB_MAX_DEVNAMES; j++)
                di->udi_devnames[j][0] = 0;                 /* empty */

        if (!dev->ud_hub) {
                di->udi_nports = 0;
                return;
        }

        const int nports = dev->ud_hub->uh_hubdesc.bNbrPorts;
        for (i = 1; i <= __arraycount(di->udi_ports) && i <= nports; i++) {
                p = &dev->ud_hub->uh_ports[i - 1];
                if (p->up_dev)
                        err = p->up_dev->ud_addr;
                else {
                        const int s = UGETW(p->up_status.wPortStatus);
                        const bool sshub_p = USB_IS_SS(dev->ud_speed);
                        if (s & UPS_PORT_ENABLED)
                                err = USB_PORT_ENABLED;
                        else if (s & UPS_SUSPEND)
                                err = USB_PORT_SUSPENDED;
                        /*
                         * Note: UPS_PORT_POWER_SS is available only
                         * on 3.x, and UPS_PORT_POWER is available
                         * only on 2.0 or 1.1.
                         */
                        else if (sshub_p && (s & UPS_PORT_POWER_SS))
                                err = USB_PORT_POWERED;
                        else if (!sshub_p && (s & UPS_PORT_POWER))
                                err = USB_PORT_POWERED;
                        else
                                err = USB_PORT_DISABLED;
                }
                di->udi_ports[i - 1] = err;
        }
        di->udi_nports = nports;
}

void
usb_free_device(struct usbd_device *dev)
{
        int ifcidx, nifc;

        if (dev->ud_pipe0 != NULL)
                usbd_kill_pipe(dev->ud_pipe0);
        if (dev->ud_ifaces != NULL) {
                nifc = dev->ud_cdesc->bNumInterface;
                for (ifcidx = 0; ifcidx < nifc; ifcidx++) {
                        usbd_iface_exlock(&dev->ud_ifaces[ifcidx]);
                        usbd_free_iface_data(dev, ifcidx);
                        usbd_iface_unlock(&dev->ud_ifaces[ifcidx]);
                        usbd_iface_fini(dev, ifcidx);
                }
                kmem_free(dev->ud_ifaces,
                    nifc * sizeof(struct usbd_interface));
        }
        if (dev->ud_cdesc != NULL)
                kmem_free(dev->ud_cdesc, UGETW(dev->ud_cdesc->wTotalLength));
        if (dev->ud_bdesc != NULL)
                kmem_free(dev->ud_bdesc, UGETW(dev->ud_bdesc->wTotalLength));
        if (dev->ud_subdevlen > 0) {
                kmem_free(dev->ud_subdevs,
                    dev->ud_subdevlen * sizeof(device_t));
                dev->ud_subdevlen = 0;
        }
        if (dev->ud_vendor) {
                kmem_free(dev->ud_vendor, USB_MAX_ENCODED_STRING_LEN);
        }
        if (dev->ud_product) {
                kmem_free(dev->ud_product, USB_MAX_ENCODED_STRING_LEN);
        }
        if (dev->ud_serial) {
                kmem_free(dev->ud_serial, USB_MAX_ENCODED_STRING_LEN);
        }
        kmem_free(dev, sizeof(*dev));
}

/*
 * The general mechanism for detaching drivers works as follows: Each
 * driver is responsible for maintaining a reference count on the
 * number of outstanding references to its softc (e.g.  from
 * processing hanging in a read or write).  The detach method of the
 * driver decrements this counter and flags in the softc that the
 * driver is dying and then wakes any sleepers.  It then sleeps on the
 * softc.  Each place that can sleep must maintain the reference
 * count.  When the reference count drops to -1 (0 is the normal value
 * of the reference count) then a wakeup on the softc is performed
 * signaling to the detach waiter that all references are gone.
 */

/*
 * Called from process context when we discover that a port has
 * been disconnected.
 */
int
usb_disconnect_port(struct usbd_port *up, device_t parent, int flags)
{
        struct usbd_device *dev = up->up_dev;
        device_t subdev;
        char subdevname[16];
        const char *hubname = device_xname(parent);
        int i, rc;

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "up=%#jx dev=%#jx port=%jd",
            (uintptr_t)up, (uintptr_t)dev, up->up_portno, 0);

        if (dev == NULL) {
                return 0;
        }

        usbd_suspend_pipe(dev->ud_pipe0);
        if (dev->ud_subdevlen > 0) {
                DPRINTFN(3, "disconnect subdevs", 0, 0, 0, 0);
                for (i = 0; i < dev->ud_subdevlen; i++) {
                        if ((subdev = dev->ud_subdevs[i]) == NULL)
                                continue;
                        strlcpy(subdevname, device_xname(subdev),
                            sizeof(subdevname));
                        KERNEL_LOCK(1, curlwp);
                        rc = config_detach(subdev, flags);
                        KERNEL_UNLOCK_ONE(curlwp);
                        if (rc != 0)
                                return rc;
                        printf("%s: at %s", subdevname, hubname);
                        if (up->up_portno != 0)
                                printf(" port %d", up->up_portno);
                        printf(" (addr %d) disconnected\n", dev->ud_addr);
                }
                KASSERT(!dev->ud_nifaces_claimed);
        }

        mutex_enter(dev->ud_bus->ub_lock);
        dev->ud_bus->ub_devices[usb_addr2dindex(dev->ud_addr)] = NULL;
        up->up_dev = NULL;
        mutex_exit(dev->ud_bus->ub_lock);

        usbd_add_dev_event(USB_EVENT_DEVICE_DETACH, dev);

        usb_free_device(dev);

        return 0;
}

































































  769 


















  776 

  769 
  769 
  769 

  769 

  769 

  769 












  101 




  101 
  101 





  101 
  101 







  101 










  101 













  101 












  101 













  101 






  101 

























































































































































   18 













































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
/*        $NetBSD: uvm_anon.c,v 1.80 2020/10/25 00:05:26 chs Exp $        */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * uvm_anon.c: uvm anon ops
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.80 2020/10/25 00:05:26 chs Exp $");

#include "opt_uvmhist.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/pool.h>
#include <sys/kernel.h>
#include <sys/atomic.h>

#include <uvm/uvm.h>
#include <uvm/uvm_swap.h>
#include <uvm/uvm_pdpolicy.h>

static struct pool_cache        uvm_anon_cache;

static int                        uvm_anon_ctor(void *, void *, int);

void
uvm_anon_init(void)
{

        pool_cache_bootstrap(&uvm_anon_cache, sizeof(struct vm_anon), 0, 0,
            PR_LARGECACHE, "anonpl", NULL, IPL_NONE, uvm_anon_ctor,
            NULL, NULL);
}

static int
uvm_anon_ctor(void *arg, void *object, int flags)
{
        struct vm_anon *anon = object;

        anon->an_ref = 0;
        anon->an_lock = NULL;
        anon->an_page = NULL;
#if defined(VMSWAP)
        anon->an_swslot = 0;
#endif
        return 0;
}

/*
 * uvm_analloc: allocate a new anon.
 *
 * => anon will have no lock associated.
 */
struct vm_anon *
uvm_analloc(void)
{
        struct vm_anon *anon;

        anon = pool_cache_get(&uvm_anon_cache, PR_NOWAIT);
        if (anon) {
                KASSERT(anon->an_ref == 0);
                KASSERT(anon->an_lock == NULL);
                KASSERT(anon->an_page == NULL);
#if defined(VMSWAP)
                KASSERT(anon->an_swslot == 0);
#endif
                anon->an_ref = 1;
        }
        return anon;
}

/*
 * uvm_anfree: free a single anon structure
 *
 * => anon must be removed from the amap (if anon was in an amap).
 * => amap must be locked, if anon was owned by amap.
 * => we may drop and re-acquire the lock here (to break loans).
 */
void
uvm_anfree(struct vm_anon *anon)
{
        struct vm_page *pg = anon->an_page, *pg2 __diagused;

        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist,"(anon=%#jx)", (uintptr_t)anon, 0,0,0);

        KASSERT(anon->an_lock == NULL || rw_write_held(anon->an_lock));
        KASSERT(anon->an_ref == 0);

        /*
         * Dispose of the page, if it is resident.
         */

        if (__predict_true(pg != NULL)) {
                KASSERT(anon->an_lock != NULL);

                /*
                 * If there is a resident page and it is loaned, then anon
                 * may not own it.  Call out to uvm_anon_lockloanpg() to
                 * identify and lock the real owner of the page.
                 */

                if (__predict_false(pg->loan_count != 0)) {
                        pg2 = uvm_anon_lockloanpg(anon);
                        KASSERT(pg2 == pg);
                }

                /*
                 * If the page is owned by a UVM object (now locked),
                 * then kill the loan on the page rather than free it,
                 * and release the object lock.
                 */

                if (__predict_false(pg->uobject != NULL)) {
                        mutex_enter(&pg->interlock);
                        KASSERT(pg->loan_count > 0);
                        pg->loan_count--;
                        pg->uanon = NULL;
                        mutex_exit(&pg->interlock);
                        rw_exit(pg->uobject->vmobjlock);
                } else {

                        /*
                         * If page has no UVM object, then anon is the owner,
                         * and it is already locked.
                         */

                        KASSERT((pg->flags & PG_RELEASED) == 0);
                        pmap_page_protect(pg, VM_PROT_NONE);

                        /*
                         * If the page is busy, mark it as PG_RELEASED, so
                         * that uvm_anon_release(9) would release it later.
                         */

                        if (__predict_false((pg->flags & PG_BUSY) != 0)) {
                                pg->flags |= PG_RELEASED;
                                rw_obj_hold(anon->an_lock);
                                return;
                        }
                        uvm_pagefree(pg);
                        UVMHIST_LOG(maphist, "anon %#jx, page %#jx: "
                            "freed now!", (uintptr_t)anon, (uintptr_t)pg,
                            0, 0);
                }
        } else {
#if defined(VMSWAP)
                if (anon->an_swslot > 0) {
                        /* This page is no longer only in swap. */
                        KASSERT(uvmexp.swpgonly > 0);
                        atomic_dec_uint(&uvmexp.swpgonly);
                }
#endif
        }
        anon->an_lock = NULL;

        /*
         * Free any swap resources, leave a page replacement hint.
         */

        uvm_anon_dropswap(anon);
        uvmpdpol_anfree(anon);
        UVMHIST_LOG(maphist,"<- done!",0,0,0,0);
        pool_cache_put(&uvm_anon_cache, anon);
}

/*
 * uvm_anon_lockloanpg: given a locked anon, lock its resident page owner.
 *
 * => anon is locked by caller
 * => on return: anon is locked
 *                 if there is a resident page:
 *                        if it has a uobject, it is locked by us
 *                        if it is ownerless, we take over as owner
 *                 we return the resident page (it can change during
 *                 this function)
 * => note that the only time an anon has an ownerless resident page
 *        is if the page was loaned from a uvm_object and the uvm_object
 *        disowned it
 * => this only needs to be called when you want to do an operation
 *        on an anon's resident page and that page has a non-zero loan
 *        count.
 */
struct vm_page *
uvm_anon_lockloanpg(struct vm_anon *anon)
{
        struct vm_page *pg;
        krw_t op;

        KASSERT(rw_lock_held(anon->an_lock));

        /*
         * loop while we have a resident page that has a non-zero loan count.
         * if we successfully get our lock, we will "break" the loop.
         * note that the test for pg->loan_count is not protected -- this
         * may produce false positive results.   note that a false positive
         * result may cause us to do more work than we need to, but it will
         * not produce an incorrect result.
         */

        while (((pg = anon->an_page) != NULL) && pg->loan_count != 0) {
                mutex_enter(&pg->interlock);
                if (pg->uobject) {
                        /*
                         * if we didn't get a lock (try lock failed), then we
                         * toggle our anon lock and try again
                         */

                        if (!rw_tryenter(pg->uobject->vmobjlock, RW_WRITER)) {
                                /*
                                 * someone locking the object has a chance to
                                 * lock us right now
                                 *
                                 * XXX Better than yielding but inadequate.
                                 */
                                mutex_exit(&pg->interlock);
                                op = rw_lock_op(anon->an_lock);
                                rw_exit(anon->an_lock);
                                kpause("lkloanpg", false, 1, NULL);
                                rw_enter(anon->an_lock, op);
                                continue;
                        }
                }

                /*
                 * If page is un-owned i.e. the object dropped its ownership,
                 * then we have to take the ownership.
                 */

                if (pg->uobject == NULL && (pg->flags & PG_ANON) == 0) {
                        pg->flags |= PG_ANON;
                        pg->loan_count--;
                }
                mutex_exit(&pg->interlock);
                break;
        }
        return pg;
}

#if defined(VMSWAP)

/*
 * uvm_anon_pagein: fetch an anon's page.
 *
 * => anon must be locked, and is unlocked upon return.
 * => returns true if pagein was aborted due to lack of memory.
 */

bool
uvm_anon_pagein(struct vm_amap *amap, struct vm_anon *anon)
{
        struct vm_page *pg;
        struct uvm_object *uobj;

        KASSERT(rw_write_held(anon->an_lock));
        KASSERT(anon->an_lock == amap->am_lock);

        /*
         * Get the page of the anon.
         */

        switch (uvmfault_anonget(NULL, amap, anon)) {
        case 0:
                /* Success - we have the page. */
                KASSERT(rw_write_held(anon->an_lock));
                break;
        case EIO:
        case ERESTART:
                /*
                 * Nothing more to do on errors.  ERESTART means that the
                 * anon was freed.
                 */
                return false;
        case ENOLCK:
                panic("uvm_anon_pagein");
        default:
                return true;
        }

        /*
         * Mark the page as dirty and clear its swslot.
         */

        pg = anon->an_page;
        uobj = pg->uobject;
        if (anon->an_swslot > 0) {
                uvm_swap_free(anon->an_swslot, 1);
        }
        anon->an_swslot = 0;
        uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);

        /*
         * Deactivate the page (to put it on a page queue).
         */

        uvm_pagelock(pg);
        uvm_pagedeactivate(pg);
        uvm_pageunlock(pg);
        rw_exit(anon->an_lock);
        if (uobj) {
                rw_exit(uobj->vmobjlock);
        }
        return false;
}

/*
 * uvm_anon_dropswap: release any swap resources from this anon.
 *
 * => anon must be locked or have a reference count of 0.
 */
void
uvm_anon_dropswap(struct vm_anon *anon)
{
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        if (anon->an_swslot == 0)
                return;

        UVMHIST_LOG(maphist,"freeing swap for anon %#jx, paged to swslot %#jx",
                    (uintptr_t)anon, anon->an_swslot, 0, 0);
        uvm_swap_free(anon->an_swslot, 1);
        anon->an_swslot = 0;
}

#endif

/*
 * uvm_anon_release: release an anon and its page.
 *
 * => anon should not have any references.
 * => anon must be locked.
 */

void
uvm_anon_release(struct vm_anon *anon)
{
        struct vm_page *pg = anon->an_page;
        krwlock_t *lock;

        KASSERT(rw_write_held(anon->an_lock));
        KASSERT(pg != NULL);
        KASSERT((pg->flags & PG_RELEASED) != 0);
        KASSERT((pg->flags & PG_BUSY) != 0);
        KASSERT(pg->uobject == NULL);
        KASSERT(pg->uanon == anon);
        KASSERT(pg->loan_count == 0);
        KASSERT(anon->an_ref == 0);

        if ((pg->flags & PG_PAGEOUT) != 0) {
                pg->flags &= ~PG_PAGEOUT;
                uvm_pageout_done(1);
        }

        uvm_pagefree(pg);
        KASSERT(anon->an_page == NULL);
        lock = anon->an_lock;
        uvm_anfree(anon);
        rw_exit(lock);
        /* Note: extra reference is held for PG_RELEASED case. */
        rw_obj_free(lock);
}



























































































































































































  414 
  415 











   75 


































































   76 


    1 


   75 






   76 







































































































































































































   23 
   23 


   23 



   23 









   21 




   21 

   21 












   20 

   21 


























    3 




    3 


    3 

    3 

    3 
    3 

    3 




    2 









    2 












    3 







    3 










    3 


    3 
    3 



    2 












   20 
   20 

   20 







   20 


   20 
    5 
    5 

    4 
    1 





    4 






   19 
   19 

   20 












   92 
   77 

   16 

   92 
   14 










































    3 












    3 











































































































































































    2 





    2 



    2 



















    2 







    2 






















    2 

    2 
    2 








































    1 




    1 

    1 



    1 












    1 

    1 

























    1 
    1 

    1 

    1 



    1 
    1 
    1 

    1 

    1 





    1 




























   19 







   19 


   19 


   19 



























   19 
    4 
    4 













   19 






   15 

   15 





    2 




































   17 








   21 

   21 

































































   24 









   24 
   24 
   24 
   23 





   23 





   24 





   23 

    8 
   24 
   15 







   15 




   15 

   15 









   10 


    8 










    5 


    5 
    2 







   23 
   16 



    2 
   16 
   14 








   22 








   23 






   21 
    5 

    5 

    5 










   16 
   15 







    1 







    1 



















































   14 




   13 
   14 









    4 


    5 






    4 


    4 





















































































































































































































































































    3 






    3 










    3 





















    3 







    3 












    3 



    3 
    3 












    3 




    3 


    2 




    2 







    2 







    3 











    3 

    3 














    3 










    3 























    3 




    3 










































































    3 















    3 






    3 









    3 



    3 




    3 














    3 











    2 

    2 












    2 



    2 













    4 














    4 


















    4 




    4 










    4 














































































































































































































































































































    7 
    7 

    7 




    2 

    7 

    7 



    7 
    6 











    6 

























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
/*        $NetBSD: kern_sig.c,v 1.404 2022/04/09 23:38:33 riastradh Exp $        */

/*-
 * Copyright (c) 2006, 2007, 2008, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_sig.c        8.14 (Berkeley) 5/14/95
 */

/*
 * Signal subsystem.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_sig.c,v 1.404 2022/04/09 23:38:33 riastradh Exp $");

#include "opt_execfmt.h"
#include "opt_ptrace.h"
#include "opt_dtrace.h"
#include "opt_compat_sunos.h"
#include "opt_compat_netbsd.h"
#include "opt_compat_netbsd32.h"
#include "opt_pax.h"

#define        SIGPROP                /* include signal properties table */
#include <sys/param.h>
#include <sys/signalvar.h>
#include <sys/proc.h>
#include <sys/ptrace.h>
#include <sys/systm.h>
#include <sys/wait.h>
#include <sys/ktrace.h>
#include <sys/syslog.h>
#include <sys/filedesc.h>
#include <sys/file.h>
#include <sys/pool.h>
#include <sys/ucontext.h>
#include <sys/exec.h>
#include <sys/kauth.h>
#include <sys/acct.h>
#include <sys/callout.h>
#include <sys/atomic.h>
#include <sys/cpu.h>
#include <sys/module.h>
#include <sys/sdt.h>
#include <sys/exec_elf.h>
#include <sys/compat_stub.h>

#ifdef PAX_SEGVGUARD
#include <sys/pax.h>
#endif /* PAX_SEGVGUARD */

#include <uvm/uvm_extern.h>

/* Many hard-coded assumptions that there are <= 4 x 32bit signal mask bits */
__CTASSERT(NSIG <= 128);

#define        SIGQUEUE_MAX        32
static pool_cache_t        sigacts_cache        __read_mostly;
static pool_cache_t        ksiginfo_cache        __read_mostly;
static callout_t        proc_stop_ch        __cacheline_aligned;

sigset_t                contsigmask        __cacheline_aligned;
sigset_t                stopsigmask        __cacheline_aligned;
static sigset_t                vforksigmask        __cacheline_aligned;
sigset_t                sigcantmask        __cacheline_aligned;

static void        ksiginfo_exechook(struct proc *, void *);
static void        proc_stop(struct proc *, int);
static void        proc_stop_done(struct proc *, int);
static void        proc_stop_callout(void *);
static int        sigchecktrace(void);
static int        sigpost(struct lwp *, sig_t, int, int);
static int        sigput(sigpend_t *, struct proc *, ksiginfo_t *);
static int        sigunwait(struct proc *, const ksiginfo_t *);
static void        sigswitch(int, int, bool);
static void        sigswitch_unlock_and_switch_away(struct lwp *);

static void        sigacts_poolpage_free(struct pool *, void *);
static void        *sigacts_poolpage_alloc(struct pool *, int);

/*
 * DTrace SDT provider definitions
 */
SDT_PROVIDER_DECLARE(proc);
SDT_PROBE_DEFINE3(proc, kernel, , signal__send,
    "struct lwp *",         /* target thread */
    "struct proc *",         /* target process */
    "int");                /* signal */
SDT_PROBE_DEFINE3(proc, kernel, , signal__discard,
    "struct lwp *",        /* target thread */
    "struct proc *",        /* target process */
    "int");                  /* signal */
SDT_PROBE_DEFINE3(proc, kernel, , signal__handle,
    "int",                 /* signal */
    "ksiginfo_t *",         /* signal info */
    "void (*)(void)");        /* handler address */


static struct pool_allocator sigactspool_allocator = {
        .pa_alloc = sigacts_poolpage_alloc,
        .pa_free = sigacts_poolpage_free
};

#ifdef DEBUG
int        kern_logsigexit = 1;
#else
int        kern_logsigexit = 0;
#endif

static const char logcoredump[] =
    "pid %d (%s), uid %d: exited on signal %d (core dumped)\n";
static const char lognocoredump[] =
    "pid %d (%s), uid %d: exited on signal %d (core not dumped, err = %d)\n";

static kauth_listener_t signal_listener;

static int
signal_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        struct proc *p;
        int result, signum;

        result = KAUTH_RESULT_DEFER;
        p = arg0;
        signum = (int)(unsigned long)arg1;

        if (action != KAUTH_PROCESS_SIGNAL)
                return result;

        if (kauth_cred_uidmatch(cred, p->p_cred) ||
            (signum == SIGCONT && (curproc->p_session == p->p_session)))
                result = KAUTH_RESULT_ALLOW;

        return result;
}

static int
sigacts_ctor(void *arg __unused, void *obj, int flags __unused)
{
        memset(obj, 0, sizeof(struct sigacts));
        return 0;
}

/*
 * signal_init:
 *
 *        Initialize global signal-related data structures.
 */
void
signal_init(void)
{

        sigactspool_allocator.pa_pagesz = (PAGE_SIZE)*2;

        sigacts_cache = pool_cache_init(sizeof(struct sigacts), 0, 0, 0,
            "sigacts", sizeof(struct sigacts) > PAGE_SIZE ?
            &sigactspool_allocator : NULL, IPL_NONE, sigacts_ctor, NULL, NULL);
        ksiginfo_cache = pool_cache_init(sizeof(ksiginfo_t), 0, 0, 0,
            "ksiginfo", NULL, IPL_VM, NULL, NULL, NULL);

        exechook_establish(ksiginfo_exechook, NULL);

        callout_init(&proc_stop_ch, CALLOUT_MPSAFE);
        callout_setfunc(&proc_stop_ch, proc_stop_callout, NULL);

        signal_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
            signal_listener_cb, NULL);
}

/*
 * sigacts_poolpage_alloc:
 *
 *        Allocate a page for the sigacts memory pool.
 */
static void *
sigacts_poolpage_alloc(struct pool *pp, int flags)
{

        return (void *)uvm_km_alloc(kernel_map,
            PAGE_SIZE * 2, PAGE_SIZE * 2,
            ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK)
            | UVM_KMF_WIRED);
}

/*
 * sigacts_poolpage_free:
 *
 *        Free a page on behalf of the sigacts memory pool.
 */
static void
sigacts_poolpage_free(struct pool *pp, void *v)
{

        uvm_km_free(kernel_map, (vaddr_t)v, PAGE_SIZE * 2, UVM_KMF_WIRED);
}

/*
 * sigactsinit:
 *
 *        Create an initial sigacts structure, using the same signal state
 *        as of specified process.  If 'share' is set, share the sigacts by
 *        holding a reference, otherwise just copy it from parent.
 */
struct sigacts *
sigactsinit(struct proc *pp, int share)
{
        struct sigacts *ps = pp->p_sigacts, *ps2;

        if (__predict_false(share)) {
                atomic_inc_uint(&ps->sa_refcnt);
                return ps;
        }
        ps2 = pool_cache_get(sigacts_cache, PR_WAITOK);
        mutex_init(&ps2->sa_mutex, MUTEX_DEFAULT, IPL_SCHED);
        ps2->sa_refcnt = 1;

        mutex_enter(&ps->sa_mutex);
        memcpy(ps2->sa_sigdesc, ps->sa_sigdesc, sizeof(ps2->sa_sigdesc));
        mutex_exit(&ps->sa_mutex);
        return ps2;
}

/*
 * sigactsunshare:
 *
 *        Make this process not share its sigacts, maintaining all signal state.
 */
void
sigactsunshare(struct proc *p)
{
        struct sigacts *ps, *oldps = p->p_sigacts;

        if (__predict_true(oldps->sa_refcnt == 1))
                return;

        ps = pool_cache_get(sigacts_cache, PR_WAITOK);
        mutex_init(&ps->sa_mutex, MUTEX_DEFAULT, IPL_SCHED);
        memcpy(ps->sa_sigdesc, oldps->sa_sigdesc, sizeof(ps->sa_sigdesc));
        ps->sa_refcnt = 1;

        p->p_sigacts = ps;
        sigactsfree(oldps);
}

/*
 * sigactsfree;
 *
 *        Release a sigacts structure.
 */
void
sigactsfree(struct sigacts *ps)
{

        membar_release();
        if (atomic_dec_uint_nv(&ps->sa_refcnt) == 0) {
                membar_acquire();
                mutex_destroy(&ps->sa_mutex);
                pool_cache_put(sigacts_cache, ps);
        }
}

/*
 * siginit:
 *
 *        Initialize signal state for process 0; set to ignore signals that
 *        are ignored by default and disable the signal stack.  Locking not
 *        required as the system is still cold.
 */
void
siginit(struct proc *p)
{
        struct lwp *l;
        struct sigacts *ps;
        int signo, prop;

        ps = p->p_sigacts;
        sigemptyset(&contsigmask);
        sigemptyset(&stopsigmask);
        sigemptyset(&vforksigmask);
        sigemptyset(&sigcantmask);
        for (signo = 1; signo < NSIG; signo++) {
                prop = sigprop[signo];
                if (prop & SA_CONT)
                        sigaddset(&contsigmask, signo);
                if (prop & SA_STOP)
                        sigaddset(&stopsigmask, signo);
                if (prop & SA_STOP && signo != SIGSTOP)
                        sigaddset(&vforksigmask, signo);
                if (prop & SA_CANTMASK)
                        sigaddset(&sigcantmask, signo);
                if (prop & SA_IGNORE && signo != SIGCONT)
                        sigaddset(&p->p_sigctx.ps_sigignore, signo);
                sigemptyset(&SIGACTION_PS(ps, signo).sa_mask);
                SIGACTION_PS(ps, signo).sa_flags = SA_RESTART;
        }
        sigemptyset(&p->p_sigctx.ps_sigcatch);
        p->p_sflag &= ~PS_NOCLDSTOP;

        ksiginfo_queue_init(&p->p_sigpend.sp_info);
        sigemptyset(&p->p_sigpend.sp_set);

        /*
         * Reset per LWP state.
         */
        l = LIST_FIRST(&p->p_lwps);
        l->l_sigwaited = NULL;
        l->l_sigstk = SS_INIT;
        ksiginfo_queue_init(&l->l_sigpend.sp_info);
        sigemptyset(&l->l_sigpend.sp_set);

        /* One reference. */
        ps->sa_refcnt = 1;
}

/*
 * execsigs:
 *
 *        Reset signals for an exec of the specified process.
 */
void
execsigs(struct proc *p)
{
        struct sigacts *ps;
        struct lwp *l;
        int signo, prop;
        sigset_t tset;
        ksiginfoq_t kq;

        KASSERT(p->p_nlwps == 1);

        sigactsunshare(p);
        ps = p->p_sigacts;

        /*
         * Reset caught signals.  Held signals remain held through
         * l->l_sigmask (unless they were caught, and are now ignored
         * by default).
         *
         * No need to lock yet, the process has only one LWP and
         * at this point the sigacts are private to the process.
         */
        sigemptyset(&tset);
        for (signo = 1; signo < NSIG; signo++) {
                if (sigismember(&p->p_sigctx.ps_sigcatch, signo)) {
                        prop = sigprop[signo];
                        if (prop & SA_IGNORE) {
                                if ((prop & SA_CONT) == 0)
                                        sigaddset(&p->p_sigctx.ps_sigignore,
                                            signo);
                                sigaddset(&tset, signo);
                        }
                        SIGACTION_PS(ps, signo).sa_handler = SIG_DFL;
                }
                sigemptyset(&SIGACTION_PS(ps, signo).sa_mask);
                SIGACTION_PS(ps, signo).sa_flags = SA_RESTART;
        }
        ksiginfo_queue_init(&kq);

        mutex_enter(p->p_lock);
        sigclearall(p, &tset, &kq);
        sigemptyset(&p->p_sigctx.ps_sigcatch);

        /*
         * Reset no zombies if child dies flag as Solaris does.
         */
        p->p_flag &= ~(PK_NOCLDWAIT | PK_CLDSIGIGN);
        if (SIGACTION_PS(ps, SIGCHLD).sa_handler == SIG_IGN)
                SIGACTION_PS(ps, SIGCHLD).sa_handler = SIG_DFL;

        /*
         * Reset per-LWP state.
         */
        l = LIST_FIRST(&p->p_lwps);
        l->l_sigwaited = NULL;
        l->l_sigstk = SS_INIT;
        ksiginfo_queue_init(&l->l_sigpend.sp_info);
        sigemptyset(&l->l_sigpend.sp_set);
        mutex_exit(p->p_lock);

        ksiginfo_queue_drain(&kq);
}

/*
 * ksiginfo_exechook:
 *
 *        Free all pending ksiginfo entries from a process on exec.
 *        Additionally, drain any unused ksiginfo structures in the
 *        system back to the pool.
 *
 *        XXX This should not be a hook, every process has signals.
 */
static void
ksiginfo_exechook(struct proc *p, void *v)
{
        ksiginfoq_t kq;

        ksiginfo_queue_init(&kq);

        mutex_enter(p->p_lock);
        sigclearall(p, NULL, &kq);
        mutex_exit(p->p_lock);

        ksiginfo_queue_drain(&kq);
}

/*
 * ksiginfo_alloc:
 *
 *        Allocate a new ksiginfo structure from the pool, and optionally copy
 *        an existing one.  If the existing ksiginfo_t is from the pool, and
 *        has not been queued somewhere, then just return it.  Additionally,
 *        if the existing ksiginfo_t does not contain any information beyond
 *        the signal number, then just return it.
 */
ksiginfo_t *
ksiginfo_alloc(struct proc *p, ksiginfo_t *ok, int flags)
{
        ksiginfo_t *kp;

        if (ok != NULL) {
                if ((ok->ksi_flags & (KSI_QUEUED | KSI_FROMPOOL)) ==
                    KSI_FROMPOOL)
                        return ok;
                if (KSI_EMPTY_P(ok))
                        return ok;
        }

        kp = pool_cache_get(ksiginfo_cache, flags);
        if (kp == NULL) {
#ifdef DIAGNOSTIC
                printf("Out of memory allocating ksiginfo for pid %d\n",
                    p->p_pid);
#endif
                return NULL;
        }

        if (ok != NULL) {
                memcpy(kp, ok, sizeof(*kp));
                kp->ksi_flags &= ~KSI_QUEUED;
        } else
                KSI_INIT_EMPTY(kp);

        kp->ksi_flags |= KSI_FROMPOOL;

        return kp;
}

/*
 * ksiginfo_free:
 *
 *        If the given ksiginfo_t is from the pool and has not been queued,
 *        then free it.
 */
void
ksiginfo_free(ksiginfo_t *kp)
{

        if ((kp->ksi_flags & (KSI_QUEUED | KSI_FROMPOOL)) != KSI_FROMPOOL)
                return;
        pool_cache_put(ksiginfo_cache, kp);
}

/*
 * ksiginfo_queue_drain:
 *
 *        Drain a non-empty ksiginfo_t queue.
 */
void
ksiginfo_queue_drain0(ksiginfoq_t *kq)
{
        ksiginfo_t *ksi;

        KASSERT(!TAILQ_EMPTY(kq));

        while (!TAILQ_EMPTY(kq)) {
                ksi = TAILQ_FIRST(kq);
                TAILQ_REMOVE(kq, ksi, ksi_list);
                pool_cache_put(ksiginfo_cache, ksi);
        }
}

static int
siggetinfo(sigpend_t *sp, ksiginfo_t *out, int signo)
{
        ksiginfo_t *ksi, *nksi;

        if (sp == NULL)
                goto out;

        /* Find siginfo and copy it out. */
        int count = 0;
        TAILQ_FOREACH_SAFE(ksi, &sp->sp_info, ksi_list, nksi) {
                if (ksi->ksi_signo != signo)
                        continue;
                if (count++ > 0) /* Only remove the first, count all of them */
                        continue;
                TAILQ_REMOVE(&sp->sp_info, ksi, ksi_list);
                KASSERT((ksi->ksi_flags & KSI_FROMPOOL) != 0);
                KASSERT((ksi->ksi_flags & KSI_QUEUED) != 0);
                ksi->ksi_flags &= ~KSI_QUEUED;
                if (out != NULL) {
                        memcpy(out, ksi, sizeof(*out));
                        out->ksi_flags &= ~(KSI_FROMPOOL | KSI_QUEUED);
                }
                ksiginfo_free(ksi);
        }
        if (count)
                return count;

out:
        /* If there is no siginfo, then manufacture it. */
        if (out != NULL) {
                KSI_INIT(out);
                out->ksi_info._signo = signo;
                out->ksi_info._code = SI_NOINFO;
        }
        return 0;
}

/*
 * sigget:
 *
 *        Fetch the first pending signal from a set.  Optionally, also fetch
 *        or manufacture a ksiginfo element.  Returns the number of the first
 *        pending signal, or zero.
 */
int
sigget(sigpend_t *sp, ksiginfo_t *out, int signo, const sigset_t *mask)
{
        sigset_t tset;
        int count;

        /* If there's no pending set, the signal is from the debugger. */
        if (sp == NULL)
                goto out;

        /* Construct mask from signo, and 'mask'. */
        if (signo == 0) {
                if (mask != NULL) {
                        tset = *mask;
                        __sigandset(&sp->sp_set, &tset);
                } else
                        tset = sp->sp_set;

                /* If there are no signals pending - return. */
                if ((signo = firstsig(&tset)) == 0)
                        goto out;
        } else {
                KASSERT(sigismember(&sp->sp_set, signo));
        }

        sigdelset(&sp->sp_set, signo);
out:
        count = siggetinfo(sp, out, signo);
        if (count > 1)
                sigaddset(&sp->sp_set, signo);
        return signo;
}

/*
 * sigput:
 *
 *        Append a new ksiginfo element to the list of pending ksiginfo's.
 */
static int
sigput(sigpend_t *sp, struct proc *p, ksiginfo_t *ksi)
{
        ksiginfo_t *kp;

        KASSERT(mutex_owned(p->p_lock));
        KASSERT((ksi->ksi_flags & KSI_QUEUED) == 0);

        sigaddset(&sp->sp_set, ksi->ksi_signo);

        /*
         * If there is no siginfo, we are done.
         */
        if (KSI_EMPTY_P(ksi))
                return 0;

        KASSERT((ksi->ksi_flags & KSI_FROMPOOL) != 0);

        size_t count = 0;
        TAILQ_FOREACH(kp, &sp->sp_info, ksi_list) {
                count++;
                if (ksi->ksi_signo >= SIGRTMIN && ksi->ksi_signo <= SIGRTMAX)
                        continue;
                if (kp->ksi_signo == ksi->ksi_signo) {
                        KSI_COPY(ksi, kp);
                        kp->ksi_flags |= KSI_QUEUED;
                        return 0;
                }
        }

        if (count >= SIGQUEUE_MAX) {
#ifdef DIAGNOSTIC
                printf("%s(%d): Signal queue is full signal=%d\n",
                    p->p_comm, p->p_pid, ksi->ksi_signo);
#endif
                return EAGAIN;
        }
        ksi->ksi_flags |= KSI_QUEUED;
        TAILQ_INSERT_TAIL(&sp->sp_info, ksi, ksi_list);

        return 0;
}

/*
 * sigclear:
 *
 *        Clear all pending signals in the specified set.
 */
void
sigclear(sigpend_t *sp, const sigset_t *mask, ksiginfoq_t *kq)
{
        ksiginfo_t *ksi, *next;

        if (mask == NULL)
                sigemptyset(&sp->sp_set);
        else
                sigminusset(mask, &sp->sp_set);

        TAILQ_FOREACH_SAFE(ksi, &sp->sp_info, ksi_list, next) {
                if (mask == NULL || sigismember(mask, ksi->ksi_signo)) {
                        TAILQ_REMOVE(&sp->sp_info, ksi, ksi_list);
                        KASSERT((ksi->ksi_flags & KSI_FROMPOOL) != 0);
                        KASSERT((ksi->ksi_flags & KSI_QUEUED) != 0);
                        TAILQ_INSERT_TAIL(kq, ksi, ksi_list);
                }
        }
}

/*
 * sigclearall:
 *
 *        Clear all pending signals in the specified set from a process and
 *        its LWPs.
 */
void
sigclearall(struct proc *p, const sigset_t *mask, ksiginfoq_t *kq)
{
        struct lwp *l;

        KASSERT(mutex_owned(p->p_lock));

        sigclear(&p->p_sigpend, mask, kq);

        LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                sigclear(&l->l_sigpend, mask, kq);
        }
}

/*
 * sigispending:
 *
 *        Return the first signal number if there are pending signals for the
 *        current LWP.  May be called unlocked provided that LW_PENDSIG is set,
 *        and that the signal has been posted to the appopriate queue before
 *        LW_PENDSIG is set.
 *
 *        This should only ever be called with (l == curlwp), unless the
 *        result does not matter (procfs, sysctl).
 */
int
sigispending(struct lwp *l, int signo)
{
        struct proc *p = l->l_proc;
        sigset_t tset;

        membar_consumer();

        tset = l->l_sigpend.sp_set;
        sigplusset(&p->p_sigpend.sp_set, &tset);
        sigminusset(&p->p_sigctx.ps_sigignore, &tset);
        sigminusset(&l->l_sigmask, &tset);

        if (signo == 0) {
                return firstsig(&tset);
        }
        return sigismember(&tset, signo) ? signo : 0;
}

void
getucontext(struct lwp *l, ucontext_t *ucp)
{
        struct proc *p = l->l_proc;

        KASSERT(mutex_owned(p->p_lock));

        ucp->uc_flags = 0;
        ucp->uc_link = l->l_ctxlink;
        ucp->uc_sigmask = l->l_sigmask;
        ucp->uc_flags |= _UC_SIGMASK;

        /*
         * The (unsupplied) definition of the `current execution stack'
         * in the System V Interface Definition appears to allow returning
         * the main context stack.
         */
        if ((l->l_sigstk.ss_flags & SS_ONSTACK) == 0) {
                ucp->uc_stack.ss_sp = (void *)l->l_proc->p_stackbase;
                ucp->uc_stack.ss_size = ctob(l->l_proc->p_vmspace->vm_ssize);
                ucp->uc_stack.ss_flags = 0;        /* XXX, def. is Very Fishy */
        } else {
                /* Simply copy alternate signal execution stack. */
                ucp->uc_stack = l->l_sigstk;
        }
        ucp->uc_flags |= _UC_STACK;
        mutex_exit(p->p_lock);
        cpu_getmcontext(l, &ucp->uc_mcontext, &ucp->uc_flags);
        mutex_enter(p->p_lock);
}

int
setucontext(struct lwp *l, const ucontext_t *ucp)
{
        struct proc *p = l->l_proc;
        int error;

        KASSERT(mutex_owned(p->p_lock));

        if ((ucp->uc_flags & _UC_SIGMASK) != 0) {
                error = sigprocmask1(l, SIG_SETMASK, &ucp->uc_sigmask, NULL);
                if (error != 0)
                        return error;
        }

        mutex_exit(p->p_lock);
        error = cpu_setmcontext(l, &ucp->uc_mcontext, ucp->uc_flags);
        mutex_enter(p->p_lock);
        if (error != 0)
                return (error);

        l->l_ctxlink = ucp->uc_link;

        /*
         * If there was stack information, update whether or not we are
         * still running on an alternate signal stack.
         */
        if ((ucp->uc_flags & _UC_STACK) != 0) {
                if (ucp->uc_stack.ss_flags & SS_ONSTACK)
                        l->l_sigstk.ss_flags |= SS_ONSTACK;
                else
                        l->l_sigstk.ss_flags &= ~SS_ONSTACK;
        }

        return 0;
}

/*
 * killpg1: common code for kill process group/broadcast kill.
 */
int
killpg1(struct lwp *l, ksiginfo_t *ksi, int pgid, int all)
{
        struct proc        *p, *cp;
        kauth_cred_t        pc;
        struct pgrp        *pgrp;
        int                nfound;
        int                signo = ksi->ksi_signo;

        cp = l->l_proc;
        pc = l->l_cred;
        nfound = 0;

        mutex_enter(&proc_lock);
        if (all) {
                /*
                 * Broadcast.
                 */
                PROCLIST_FOREACH(p, &allproc) {
                        if (p->p_pid <= 1 || p == cp ||
                            (p->p_flag & PK_SYSTEM) != 0)
                                continue;
                        mutex_enter(p->p_lock);
                        if (kauth_authorize_process(pc,
                            KAUTH_PROCESS_SIGNAL, p, KAUTH_ARG(signo), NULL,
                            NULL) == 0) {
                                nfound++;
                                if (signo)
                                        kpsignal2(p, ksi);
                        }
                        mutex_exit(p->p_lock);
                }
        } else {
                if (pgid == 0)
                        /* Zero pgid means send to my process group. */
                        pgrp = cp->p_pgrp;
                else {
                        pgrp = pgrp_find(pgid);
                        if (pgrp == NULL)
                                goto out;
                }
                LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
                        if (p->p_pid <= 1 || p->p_flag & PK_SYSTEM)
                                continue;
                        mutex_enter(p->p_lock);
                        if (kauth_authorize_process(pc, KAUTH_PROCESS_SIGNAL,
                            p, KAUTH_ARG(signo), NULL, NULL) == 0) {
                                nfound++;
                                if (signo && P_ZOMBIE(p) == 0)
                                        kpsignal2(p, ksi);
                        }
                        mutex_exit(p->p_lock);
                }
        }
out:
        mutex_exit(&proc_lock);
        return nfound ? 0 : ESRCH;
}

/*
 * Send a signal to a process group.  If checktty is set, limit to members
 * which have a controlling terminal.
 */
void
pgsignal(struct pgrp *pgrp, int sig, int checkctty)
{
        ksiginfo_t ksi;

        KASSERT(!cpu_intr_p());
        KASSERT(mutex_owned(&proc_lock));

        KSI_INIT_EMPTY(&ksi);
        ksi.ksi_signo = sig;
        kpgsignal(pgrp, &ksi, NULL, checkctty);
}

void
kpgsignal(struct pgrp *pgrp, ksiginfo_t *ksi, void *data, int checkctty)
{
        struct proc *p;

        KASSERT(!cpu_intr_p());
        KASSERT(mutex_owned(&proc_lock));
        KASSERT(pgrp != NULL);

        LIST_FOREACH(p, &pgrp->pg_members, p_pglist)
                if (checkctty == 0 || p->p_lflag & PL_CONTROLT)
                        kpsignal(p, ksi, data);
}

/*
 * Send a signal caused by a trap to the current LWP.  If it will be caught
 * immediately, deliver it with correct code.  Otherwise, post it normally.
 */
void
trapsignal(struct lwp *l, ksiginfo_t *ksi)
{
        struct proc        *p;
        struct sigacts        *ps;
        int signo = ksi->ksi_signo;
        sigset_t *mask;
        sig_t action;

        KASSERT(KSI_TRAP_P(ksi));

        ksi->ksi_lid = l->l_lid;
        p = l->l_proc;

        KASSERT(!cpu_intr_p());
        mutex_enter(&proc_lock);
        mutex_enter(p->p_lock);

repeat:
        /*
         * If we are exiting, demise now.
         *
         * This avoids notifying tracer and deadlocking.
         */
        if (__predict_false(ISSET(p->p_sflag, PS_WEXIT))) {
                mutex_exit(p->p_lock);
                mutex_exit(&proc_lock);
                lwp_exit(l);
                panic("trapsignal");
                /* NOTREACHED */
        }

        /*
         * The process is already stopping.
         */
        if ((p->p_sflag & PS_STOPPING) != 0) {
                mutex_exit(&proc_lock);
                sigswitch_unlock_and_switch_away(l);
                mutex_enter(&proc_lock);
                mutex_enter(p->p_lock);
                goto repeat;
        }

        mask = &l->l_sigmask;
        ps = p->p_sigacts;
        action = SIGACTION_PS(ps, signo).sa_handler;

        if (ISSET(p->p_slflag, PSL_TRACED) &&
            !(p->p_pptr == p->p_opptr && ISSET(p->p_lflag, PL_PPWAIT)) &&
            p->p_xsig != SIGKILL &&
            !sigismember(&p->p_sigpend.sp_set, SIGKILL)) {
                p->p_xsig = signo;
                p->p_sigctx.ps_faked = true;
                p->p_sigctx.ps_lwp = ksi->ksi_lid;
                p->p_sigctx.ps_info = ksi->ksi_info;
                sigswitch(0, signo, true);

                if (ktrpoint(KTR_PSIG)) {
                        if (p->p_emul->e_ktrpsig)
                                p->p_emul->e_ktrpsig(signo, action, mask, ksi);
                        else
                                ktrpsig(signo, action, mask, ksi);
                }
                return;
        }

        const bool caught = sigismember(&p->p_sigctx.ps_sigcatch, signo);
        const bool masked = sigismember(mask, signo);
        if (caught && !masked) {
                mutex_exit(&proc_lock);
                l->l_ru.ru_nsignals++;
                kpsendsig(l, ksi, mask);
                mutex_exit(p->p_lock);

                if (ktrpoint(KTR_PSIG)) {
                        if (p->p_emul->e_ktrpsig)
                                p->p_emul->e_ktrpsig(signo, action, mask, ksi);
                        else
                                ktrpsig(signo, action, mask, ksi);
                }
                return;
        }

        /*
         * If the signal is masked or ignored, then unmask it and
         * reset it to the default action so that the process or
         * its tracer will be notified.
         */
        const bool ignored = action == SIG_IGN;
        if (masked || ignored) {
                mutex_enter(&ps->sa_mutex);
                sigdelset(mask, signo);
                sigdelset(&p->p_sigctx.ps_sigcatch, signo);
                sigdelset(&p->p_sigctx.ps_sigignore, signo);
                sigdelset(&SIGACTION_PS(ps, signo).sa_mask, signo);
                SIGACTION_PS(ps, signo).sa_handler = SIG_DFL;
                mutex_exit(&ps->sa_mutex);
        }

        kpsignal2(p, ksi);
        mutex_exit(p->p_lock);
        mutex_exit(&proc_lock);
}

/*
 * Fill in signal information and signal the parent for a child status change.
 */
void
child_psignal(struct proc *p, int mask)
{
        ksiginfo_t ksi;
        struct proc *q;
        int xsig;

        KASSERT(mutex_owned(&proc_lock));
        KASSERT(mutex_owned(p->p_lock));

        xsig = p->p_xsig;

        KSI_INIT(&ksi);
        ksi.ksi_signo = SIGCHLD;
        ksi.ksi_code = (xsig == SIGCONT ? CLD_CONTINUED : CLD_STOPPED);
        ksi.ksi_pid = p->p_pid;
        ksi.ksi_uid = kauth_cred_geteuid(p->p_cred);
        ksi.ksi_status = xsig;
        ksi.ksi_utime = p->p_stats->p_ru.ru_utime.tv_sec;
        ksi.ksi_stime = p->p_stats->p_ru.ru_stime.tv_sec;

        q = p->p_pptr;

        mutex_exit(p->p_lock);
        mutex_enter(q->p_lock);

        if ((q->p_sflag & mask) == 0)
                kpsignal2(q, &ksi);

        mutex_exit(q->p_lock);
        mutex_enter(p->p_lock);
}

void
psignal(struct proc *p, int signo)
{
        ksiginfo_t ksi;

        KASSERT(!cpu_intr_p());
        KASSERT(mutex_owned(&proc_lock));

        KSI_INIT_EMPTY(&ksi);
        ksi.ksi_signo = signo;
        mutex_enter(p->p_lock);
        kpsignal2(p, &ksi);
        mutex_exit(p->p_lock);
}

void
kpsignal(struct proc *p, ksiginfo_t *ksi, void *data)
{
        fdfile_t *ff;
        file_t *fp;
        fdtab_t *dt;

        KASSERT(!cpu_intr_p());
        KASSERT(mutex_owned(&proc_lock));

        if ((p->p_sflag & PS_WEXIT) == 0 && data) {
                size_t fd;
                filedesc_t *fdp = p->p_fd;

                /* XXXSMP locking */
                ksi->ksi_fd = -1;
                dt = atomic_load_consume(&fdp->fd_dt);
                for (fd = 0; fd < dt->dt_nfiles; fd++) {
                        if ((ff = dt->dt_ff[fd]) == NULL)
                                continue;
                        if ((fp = atomic_load_consume(&ff->ff_file)) == NULL)
                                continue;
                        if (fp->f_data == data) {
                                ksi->ksi_fd = fd;
                                break;
                        }
                }
        }
        mutex_enter(p->p_lock);
        kpsignal2(p, ksi);
        mutex_exit(p->p_lock);
}

/*
 * sigismasked:
 *
 *        Returns true if signal is ignored or masked for the specified LWP.
 */
int
sigismasked(struct lwp *l, int sig)
{
        struct proc *p = l->l_proc;

        return sigismember(&p->p_sigctx.ps_sigignore, sig) ||
            sigismember(&l->l_sigmask, sig);
}

/*
 * sigpost:
 *
 *        Post a pending signal to an LWP.  Returns non-zero if the LWP may
 *        be able to take the signal.
 */
static int
sigpost(struct lwp *l, sig_t action, int prop, int sig)
{
        int rv, masked;
        struct proc *p = l->l_proc;

        KASSERT(mutex_owned(p->p_lock));

        /*
         * If the LWP is on the way out, sigclear() will be busy draining all
         * pending signals.  Don't give it more.
         */
        if (l->l_stat == LSZOMB)
                return 0;

        SDT_PROBE(proc, kernel, , signal__send, l, p, sig, 0, 0);

        lwp_lock(l);
        if (__predict_false((l->l_flag & LW_DBGSUSPEND) != 0)) {
                if ((prop & SA_KILL) != 0)
                        l->l_flag &= ~LW_DBGSUSPEND;
                else {
                        lwp_unlock(l);
                        return 0;
                }
        }

        /*
         * Have the LWP check for signals.  This ensures that even if no LWP
         * is found to take the signal immediately, it should be taken soon.
         */
        signotify(l);

        /*
         * SIGCONT can be masked, but if LWP is stopped, it needs restart.
         * Note: SIGKILL and SIGSTOP cannot be masked.
         */
        masked = sigismember(&l->l_sigmask, sig);
        if (masked && ((prop & SA_CONT) == 0 || l->l_stat != LSSTOP)) {
                lwp_unlock(l);
                return 0;
        }

        /*
         * If killing the process, make it run fast.
         */
        if (__predict_false((prop & SA_KILL) != 0) &&
            action == SIG_DFL && l->l_priority < MAXPRI_USER) {
                KASSERT(l->l_class == SCHED_OTHER);
                lwp_changepri(l, MAXPRI_USER);
        }

        /*
         * If the LWP is running or on a run queue, then we win.  If it's
         * sleeping interruptably, wake it and make it take the signal.  If
         * the sleep isn't interruptable, then the chances are it will get
         * to see the signal soon anyhow.  If suspended, it can't take the
         * signal right now.  If it's LWP private or for all LWPs, save it
         * for later; otherwise punt.
         */
        rv = 0;

        switch (l->l_stat) {
        case LSRUN:
        case LSONPROC:
                rv = 1;
                break;

        case LSSLEEP:
                if ((l->l_flag & LW_SINTR) != 0) {
                        /* setrunnable() will release the lock. */
                        setrunnable(l);
                        return 1;
                }
                break;

        case LSSUSPENDED:
                if ((prop & SA_KILL) != 0 && (l->l_flag & LW_WCORE) != 0) {
                        /* lwp_continue() will release the lock. */
                        lwp_continue(l);
                        return 1;
                }
                break;

        case LSSTOP:
                if ((prop & SA_STOP) != 0)
                        break;

                /*
                 * If the LWP is stopped and we are sending a continue
                 * signal, then start it again.
                 */
                if ((prop & SA_CONT) != 0) {
                        if (l->l_wchan != NULL) {
                                l->l_stat = LSSLEEP;
                                p->p_nrlwps++;
                                rv = 1;
                                break;
                        }
                        /* setrunnable() will release the lock. */
                        setrunnable(l);
                        return 1;
                } else if (l->l_wchan == NULL || (l->l_flag & LW_SINTR) != 0) {
                        /* setrunnable() will release the lock. */
                        setrunnable(l);
                        return 1;
                }
                break;

        default:
                break;
        }

        lwp_unlock(l);
        return rv;
}

/*
 * Notify an LWP that it has a pending signal.
 */
void
signotify(struct lwp *l)
{
        KASSERT(lwp_locked(l, NULL));

        l->l_flag |= LW_PENDSIG;
        lwp_need_userret(l);
}

/*
 * Find an LWP within process p that is waiting on signal ksi, and hand
 * it on.
 */
static int
sigunwait(struct proc *p, const ksiginfo_t *ksi)
{
        struct lwp *l;
        int signo;

        KASSERT(mutex_owned(p->p_lock));

        signo = ksi->ksi_signo;

        if (ksi->ksi_lid != 0) {
                /*
                 * Signal came via _lwp_kill().  Find the LWP and see if
                 * it's interested.
                 */
                if ((l = lwp_find(p, ksi->ksi_lid)) == NULL)
                        return 0;
                if (l->l_sigwaited == NULL ||
                    !sigismember(&l->l_sigwaitset, signo))
                        return 0;
        } else {
                /*
                 * Look for any LWP that may be interested.
                 */
                LIST_FOREACH(l, &p->p_sigwaiters, l_sigwaiter) {
                        KASSERT(l->l_sigwaited != NULL);
                        if (sigismember(&l->l_sigwaitset, signo))
                                break;
                }
        }

        if (l != NULL) {
                l->l_sigwaited->ksi_info = ksi->ksi_info;
                l->l_sigwaited = NULL;
                LIST_REMOVE(l, l_sigwaiter);
                cv_signal(&l->l_sigcv);
                return 1;
        }

        return 0;
}

/*
 * Send the signal to the process.  If the signal has an action, the action
 * is usually performed by the target process rather than the caller; we add
 * the signal to the set of pending signals for the process.
 *
 * Exceptions:
 *   o When a stop signal is sent to a sleeping process that takes the
 *     default action, the process is stopped without awakening it.
 *   o SIGCONT restarts stopped processes (or puts them back to sleep)
 *     regardless of the signal action (eg, blocked or ignored).
 *
 * Other ignored signals are discarded immediately.
 */
int
kpsignal2(struct proc *p, ksiginfo_t *ksi)
{
        int prop, signo = ksi->ksi_signo;
        struct lwp *l = NULL;
        ksiginfo_t *kp;
        lwpid_t lid;
        sig_t action;
        bool toall;
        bool traced;
        int error = 0;

        KASSERT(!cpu_intr_p());
        KASSERT(mutex_owned(&proc_lock));
        KASSERT(mutex_owned(p->p_lock));
        KASSERT((ksi->ksi_flags & KSI_QUEUED) == 0);
        KASSERT(signo > 0 && signo < NSIG);

        /*
         * If the process is being created by fork, is a zombie or is
         * exiting, then just drop the signal here and bail out.
         */
        if (p->p_stat != SACTIVE && p->p_stat != SSTOP)
                return 0;

        /*
         * Notify any interested parties of the signal.
         */
        KNOTE(&p->p_klist, NOTE_SIGNAL | signo);

        /*
         * Some signals including SIGKILL must act on the entire process.
         */
        kp = NULL;
        prop = sigprop[signo];
        toall = ((prop & SA_TOALL) != 0);
        lid = toall ? 0 : ksi->ksi_lid;
        traced = ISSET(p->p_slflag, PSL_TRACED) &&
            !sigismember(&p->p_sigctx.ps_sigpass, signo);

        /*
         * If proc is traced, always give parent a chance.
         */
        if (traced) {
                action = SIG_DFL;

                if (lid == 0) {
                        /*
                         * If the process is being traced and the signal
                         * is being caught, make sure to save any ksiginfo.
                         */
                        if ((kp = ksiginfo_alloc(p, ksi, PR_NOWAIT)) == NULL)
                                goto discard;
                        if ((error = sigput(&p->p_sigpend, p, kp)) != 0)
                                goto out;
                }
        } else {

                /*
                 * If the signal is being ignored, then drop it.  Note: we
                 * don't set SIGCONT in ps_sigignore, and if it is set to
                 * SIG_IGN, action will be SIG_DFL here.
                 */
                if (sigismember(&p->p_sigctx.ps_sigignore, signo))
                        goto discard;

                else if (sigismember(&p->p_sigctx.ps_sigcatch, signo))
                        action = SIG_CATCH;
                else {
                        action = SIG_DFL;

                        /*
                         * If sending a tty stop signal to a member of an
                         * orphaned process group, discard the signal here if
                         * the action is default; don't stop the process below
                         * if sleeping, and don't clear any pending SIGCONT.
                         */
                        if (prop & SA_TTYSTOP && p->p_pgrp->pg_jobc == 0)
                                goto discard;

                        if (prop & SA_KILL && p->p_nice > NZERO)
                                p->p_nice = NZERO;
                }
        }

        /*
         * If stopping or continuing a process, discard any pending
         * signals that would do the inverse.
         */
        if ((prop & (SA_CONT | SA_STOP)) != 0) {
                ksiginfoq_t kq;

                ksiginfo_queue_init(&kq);
                if ((prop & SA_CONT) != 0)
                        sigclear(&p->p_sigpend, &stopsigmask, &kq);
                if ((prop & SA_STOP) != 0)
                        sigclear(&p->p_sigpend, &contsigmask, &kq);
                ksiginfo_queue_drain(&kq);        /* XXXSMP */
        }

        /*
         * If the signal doesn't have SA_CANTMASK (no override for SIGKILL,
         * please!), check if any LWPs are waiting on it.  If yes, pass on
         * the signal info.  The signal won't be processed further here.
         */
        if ((prop & SA_CANTMASK) == 0 && !LIST_EMPTY(&p->p_sigwaiters) &&
            p->p_stat == SACTIVE && (p->p_sflag & PS_STOPPING) == 0 &&
            sigunwait(p, ksi))
                goto discard;

        /*
         * XXXSMP Should be allocated by the caller, we're holding locks
         * here.
         */
        if (kp == NULL && (kp = ksiginfo_alloc(p, ksi, PR_NOWAIT)) == NULL)
                goto discard;

        /*
         * LWP private signals are easy - just find the LWP and post
         * the signal to it.
         */
        if (lid != 0) {
                l = lwp_find(p, lid);
                if (l != NULL) {
                        if ((error = sigput(&l->l_sigpend, p, kp)) != 0)
                                goto out;
                        membar_producer();
                        if (sigpost(l, action, prop, kp->ksi_signo) != 0)
                                signo = -1;
                }
                goto out;
        }

        /*
         * Some signals go to all LWPs, even if posted with _lwp_kill()
         * or for an SA process.
         */
        if (p->p_stat == SACTIVE && (p->p_sflag & PS_STOPPING) == 0) {
                if (traced)
                        goto deliver;

                /*
                 * If SIGCONT is default (or ignored) and process is
                 * asleep, we are finished; the process should not
                 * be awakened.
                 */
                if ((prop & SA_CONT) != 0 && action == SIG_DFL)
                        goto out;
        } else {
                /*
                 * Process is stopped or stopping.
                 * - If traced, then no action is needed, unless killing.
                 * - Run the process only if sending SIGCONT or SIGKILL.
                 */
                if (traced && signo != SIGKILL) {
                        goto out;
                }
                if ((prop & SA_CONT) != 0 || signo == SIGKILL) {
                        /*
                         * Re-adjust p_nstopchild if the process was
                         * stopped but not yet collected by its parent.
                         */
                        if (p->p_stat == SSTOP && !p->p_waited)
                                p->p_pptr->p_nstopchild--;
                        p->p_stat = SACTIVE;
                        p->p_sflag &= ~PS_STOPPING;
                        if (traced) {
                                KASSERT(signo == SIGKILL);
                                goto deliver;
                        }
                        /*
                         * Do not make signal pending if SIGCONT is default.
                         *
                         * If the process catches SIGCONT, let it handle the
                         * signal itself (if waiting on event - process runs,
                         * otherwise continues sleeping).
                         */
                        if ((prop & SA_CONT) != 0) {
                                p->p_xsig = SIGCONT;
                                p->p_sflag |= PS_CONTINUED;
                                child_psignal(p, 0);
                                if (action == SIG_DFL) {
                                        KASSERT(signo != SIGKILL);
                                        goto deliver;
                                }
                        }
                } else if ((prop & SA_STOP) != 0) {
                        /*
                         * Already stopped, don't need to stop again.
                         * (If we did the shell could get confused.)
                         */
                        goto out;
                }
        }
        /*
         * Make signal pending.
         */
        KASSERT(!traced);
        if ((error = sigput(&p->p_sigpend, p, kp)) != 0)
                goto out;
deliver:
        /*
         * Before we set LW_PENDSIG on any LWP, ensure that the signal is
         * visible on the per process list (for sigispending()).  This
         * is unlikely to be needed in practice, but...
         */
        membar_producer();

        /*
         * Try to find an LWP that can take the signal.
         */
        LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                if (sigpost(l, action, prop, kp->ksi_signo) && !toall)
                        break;
        }
        signo = -1;
out:
        /*
         * If the ksiginfo wasn't used, then bin it.  XXXSMP freeing memory
         * with locks held.  The caller should take care of this.
         */
        ksiginfo_free(kp);
        if (signo == -1)
                return error;
discard:
        SDT_PROBE(proc, kernel, , signal__discard, l, p, signo, 0, 0);
        return error;
}

void
kpsendsig(struct lwp *l, const ksiginfo_t *ksi, const sigset_t *mask)
{
        struct proc *p = l->l_proc;

        KASSERT(mutex_owned(p->p_lock));
        (*p->p_emul->e_sendsig)(ksi, mask);
}

/*
 * Stop any LWPs sleeping interruptably.
 */
static void
proc_stop_lwps(struct proc *p)
{
        struct lwp *l;

        KASSERT(mutex_owned(p->p_lock));
        KASSERT((p->p_sflag & PS_STOPPING) != 0);

        LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                lwp_lock(l);
                if (l->l_stat == LSSLEEP && (l->l_flag & LW_SINTR) != 0) {
                        l->l_stat = LSSTOP;
                        p->p_nrlwps--;
                }
                lwp_unlock(l);
        }
}

/*
 * Finish stopping of a process.  Mark it stopped and notify the parent.
 *
 * Drop p_lock briefly if ppsig is true.
 */
static void
proc_stop_done(struct proc *p, int ppmask)
{

        KASSERT(mutex_owned(&proc_lock));
        KASSERT(mutex_owned(p->p_lock));
        KASSERT((p->p_sflag & PS_STOPPING) != 0);
        KASSERT(p->p_nrlwps == 0 || (p->p_nrlwps == 1 && p == curproc));

        p->p_sflag &= ~PS_STOPPING;
        p->p_stat = SSTOP;
        p->p_waited = 0;
        p->p_pptr->p_nstopchild++;

        /* child_psignal drops p_lock briefly. */
        child_psignal(p, ppmask);
        cv_broadcast(&p->p_pptr->p_waitcv);
}

/*
 * Stop the current process and switch away to the debugger notifying
 * an event specific to a traced process only.
 */
void
eventswitch(int code, int pe_report_event, int entity)
{
        struct lwp *l = curlwp;
        struct proc *p = l->l_proc;
        struct sigacts *ps;
        sigset_t *mask;
        sig_t action;
        ksiginfo_t ksi;
        const int signo = SIGTRAP;

        KASSERT(mutex_owned(&proc_lock));
        KASSERT(mutex_owned(p->p_lock));
        KASSERT(p->p_pptr != initproc);
        KASSERT(l->l_stat == LSONPROC);
        KASSERT(ISSET(p->p_slflag, PSL_TRACED));
        KASSERT(!ISSET(l->l_flag, LW_SYSTEM));
        KASSERT(p->p_nrlwps > 0);
        KASSERT((code == TRAP_CHLD) || (code == TRAP_LWP) ||
                (code == TRAP_EXEC));
        KASSERT((code != TRAP_CHLD) || (entity > 1)); /* prevent pid1 */
        KASSERT((code != TRAP_LWP) || (entity > 0));

repeat:
        /*
         * If we are exiting, demise now.
         *
         * This avoids notifying tracer and deadlocking.
         */
        if (__predict_false(ISSET(p->p_sflag, PS_WEXIT))) {
                mutex_exit(p->p_lock);
                mutex_exit(&proc_lock);

                if (pe_report_event == PTRACE_LWP_EXIT) {
                        /* Avoid double lwp_exit() and panic. */
                        return;
                }

                lwp_exit(l);
                panic("eventswitch");
                /* NOTREACHED */
        }

        /*
         * If we are no longer traced, abandon this event signal.
         *
         * This avoids killing a process after detaching the debugger.
         */
        if (__predict_false(!ISSET(p->p_slflag, PSL_TRACED))) {
                mutex_exit(p->p_lock);
                mutex_exit(&proc_lock);
                return;
        }

        /*
         * If there's a pending SIGKILL process it immediately.
         */
        if (p->p_xsig == SIGKILL ||
            sigismember(&p->p_sigpend.sp_set, SIGKILL)) {
                mutex_exit(p->p_lock);
                mutex_exit(&proc_lock);
                return;
        }

        /*
         * The process is already stopping.
         */
        if ((p->p_sflag & PS_STOPPING) != 0) {
                mutex_exit(&proc_lock);
                sigswitch_unlock_and_switch_away(l);
                mutex_enter(&proc_lock);
                mutex_enter(p->p_lock);
                goto repeat;
        }

        KSI_INIT_TRAP(&ksi);
        ksi.ksi_lid = l->l_lid;
        ksi.ksi_signo = signo;
        ksi.ksi_code = code;
        ksi.ksi_pe_report_event = pe_report_event;

        CTASSERT(sizeof(ksi.ksi_pe_other_pid) == sizeof(ksi.ksi_pe_lwp));
        ksi.ksi_pe_other_pid = entity;

        /* Needed for ktrace */
        ps = p->p_sigacts;
        action = SIGACTION_PS(ps, signo).sa_handler;
        mask = &l->l_sigmask;

        p->p_xsig = signo;
        p->p_sigctx.ps_faked = true;
        p->p_sigctx.ps_lwp = ksi.ksi_lid;
        p->p_sigctx.ps_info = ksi.ksi_info;

        sigswitch(0, signo, true);

        if (code == TRAP_CHLD) {
                mutex_enter(&proc_lock);
                while (l->l_vforkwaiting)
                        cv_wait(&l->l_waitcv, &proc_lock);
                mutex_exit(&proc_lock);
        }

        if (ktrpoint(KTR_PSIG)) {
                if (p->p_emul->e_ktrpsig)
                        p->p_emul->e_ktrpsig(signo, action, mask, &ksi);
                else
                        ktrpsig(signo, action, mask, &ksi);
        }
}

void
eventswitchchild(struct proc *p, int code, int pe_report_event)
{
        mutex_enter(&proc_lock);
        mutex_enter(p->p_lock);
        if ((p->p_slflag & (PSL_TRACED|PSL_TRACEDCHILD)) !=
            (PSL_TRACED|PSL_TRACEDCHILD)) {
                mutex_exit(p->p_lock);
                mutex_exit(&proc_lock);
                return;
        }
        eventswitch(code, pe_report_event, p->p_oppid);
}

/*
 * Stop the current process and switch away when being stopped or traced.
 */
static void
sigswitch(int ppmask, int signo, bool proc_lock_held)
{
        struct lwp *l = curlwp;
        struct proc *p = l->l_proc;

        KASSERT(mutex_owned(p->p_lock));
        KASSERT(l->l_stat == LSONPROC);
        KASSERT(p->p_nrlwps > 0);

        if (proc_lock_held) {
                KASSERT(mutex_owned(&proc_lock));
        } else {
                KASSERT(!mutex_owned(&proc_lock));
        }

        /*
         * On entry we know that the process needs to stop.  If it's
         * the result of a 'sideways' stop signal that has been sourced
         * through issignal(), then stop other LWPs in the process too.
         */
        if (p->p_stat == SACTIVE && (p->p_sflag & PS_STOPPING) == 0) {
                KASSERT(signo != 0);
                proc_stop(p, signo);
                KASSERT(p->p_nrlwps > 0);
        }

        /*
         * If we are the last live LWP, and the stop was a result of
         * a new signal, then signal the parent.
         */
        if ((p->p_sflag & PS_STOPPING) != 0) {
                if (!proc_lock_held && !mutex_tryenter(&proc_lock)) {
                        mutex_exit(p->p_lock);
                        mutex_enter(&proc_lock);
                        mutex_enter(p->p_lock);
                }

                if (p->p_nrlwps == 1 && (p->p_sflag & PS_STOPPING) != 0) {
                        /*
                         * Note that proc_stop_done() can drop
                         * p->p_lock briefly.
                         */
                        proc_stop_done(p, ppmask);
                }

                mutex_exit(&proc_lock);
        }

        sigswitch_unlock_and_switch_away(l);
}

/*
 * Unlock and switch away.
 */
static void
sigswitch_unlock_and_switch_away(struct lwp *l)
{
        struct proc *p;
        int biglocks;

        p = l->l_proc;

        KASSERT(mutex_owned(p->p_lock));
        KASSERT(!mutex_owned(&proc_lock));

        KASSERT(l->l_stat == LSONPROC);
        KASSERT(p->p_nrlwps > 0);

        KERNEL_UNLOCK_ALL(l, &biglocks);
        if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) {
                p->p_nrlwps--;
                lwp_lock(l);
                KASSERT(l->l_stat == LSONPROC || l->l_stat == LSSLEEP);
                l->l_stat = LSSTOP;
                lwp_unlock(l);
        }

        mutex_exit(p->p_lock);
        lwp_lock(l);
        spc_lock(l->l_cpu);
        mi_switch(l);
        KERNEL_LOCK(biglocks, l);
}

/*
 * Check for a signal from the debugger.
 */
static int
sigchecktrace(void)
{
        struct lwp *l = curlwp;
        struct proc *p = l->l_proc;
        int signo;

        KASSERT(mutex_owned(p->p_lock));

        /* If there's a pending SIGKILL, process it immediately. */
        if (sigismember(&p->p_sigpend.sp_set, SIGKILL))
                return 0;

        /*
         * If we are no longer being traced, or the parent didn't
         * give us a signal, or we're stopping, look for more signals.
         */
        if ((p->p_slflag & PSL_TRACED) == 0 || p->p_xsig == 0 ||
            (p->p_sflag & PS_STOPPING) != 0)
                return 0;

        /*
         * If the new signal is being masked, look for other signals.
         * `p->p_sigctx.ps_siglist |= mask' is done in setrunnable().
         */
        signo = p->p_xsig;
        p->p_xsig = 0;
        if (sigismember(&l->l_sigmask, signo)) {
                signo = 0;
        }
        return signo;
}

/*
 * If the current process has received a signal (should be caught or cause
 * termination, should interrupt current syscall), return the signal number.
 *
 * Stop signals with default action are processed immediately, then cleared;
 * they aren't returned.  This is checked after each entry to the system for
 * a syscall or trap.
 *
 * We will also return -1 if the process is exiting and the current LWP must
 * follow suit.
 */
int
issignal(struct lwp *l)
{
        struct proc *p;
        int siglwp, signo, prop;
        sigpend_t *sp;
        sigset_t ss;
        bool traced;

        p = l->l_proc;
        sp = NULL;
        signo = 0;

        KASSERT(p == curproc);
        KASSERT(mutex_owned(p->p_lock));

        for (;;) {
                /* Discard any signals that we have decided not to take. */
                if (signo != 0) {
                        (void)sigget(sp, NULL, signo, NULL);
                }

                /*
                 * If the process is stopped/stopping, then stop ourselves
                 * now that we're on the kernel/userspace boundary.  When
                 * we awaken, check for a signal from the debugger.
                 */
                if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) {
                        sigswitch_unlock_and_switch_away(l);
                        mutex_enter(p->p_lock);
                        continue;
                } else if (p->p_stat == SACTIVE)
                        signo = sigchecktrace();
                else
                        signo = 0;

                /* Signals from the debugger are "out of band". */
                sp = NULL;

                /*
                 * If the debugger didn't provide a signal, find a pending
                 * signal from our set.  Check per-LWP signals first, and
                 * then per-process.
                 */
                if (signo == 0) {
                        sp = &l->l_sigpend;
                        ss = sp->sp_set;
                        siglwp = l->l_lid;
                        if ((p->p_lflag & PL_PPWAIT) != 0)
                                sigminusset(&vforksigmask, &ss);
                        sigminusset(&l->l_sigmask, &ss);

                        if ((signo = firstsig(&ss)) == 0) {
                                sp = &p->p_sigpend;
                                ss = sp->sp_set;
                                siglwp = 0;
                                if ((p->p_lflag & PL_PPWAIT) != 0)
                                        sigminusset(&vforksigmask, &ss);
                                sigminusset(&l->l_sigmask, &ss);

                                if ((signo = firstsig(&ss)) == 0) {
                                        /*
                                         * No signal pending - clear the
                                         * indicator and bail out.
                                         */
                                        lwp_lock(l);
                                        l->l_flag &= ~LW_PENDSIG;
                                        lwp_unlock(l);
                                        sp = NULL;
                                        break;
                                }
                        }
                }

                traced = ISSET(p->p_slflag, PSL_TRACED) &&
                    !sigismember(&p->p_sigctx.ps_sigpass, signo);

                if (sp) {
                        /* Overwrite process' signal context to correspond
                         * to the currently reported LWP.  This is necessary
                         * for PT_GET_SIGINFO to report the correct signal when
                         * multiple LWPs have pending signals.  We do this only
                         * when the signal comes from the queue, for signals
                         * created by the debugger we assume it set correct
                         * siginfo.
                         */
                        ksiginfo_t *ksi = TAILQ_FIRST(&sp->sp_info);
                        if (ksi) {
                                p->p_sigctx.ps_lwp = ksi->ksi_lid;
                                p->p_sigctx.ps_info = ksi->ksi_info;
                        } else {
                                p->p_sigctx.ps_lwp = siglwp;
                                memset(&p->p_sigctx.ps_info, 0,
                                    sizeof(p->p_sigctx.ps_info));
                                p->p_sigctx.ps_info._signo = signo;
                                p->p_sigctx.ps_info._code = SI_NOINFO;
                        }
                }

                /*
                 * We should see pending but ignored signals only if
                 * we are being traced.
                 */
                if (sigismember(&p->p_sigctx.ps_sigignore, signo) &&
                    !traced) {
                        /* Discard the signal. */
                        continue;
                }

                /*
                 * If traced, always stop, and stay stopped until released
                 * by the debugger.  If the our parent is our debugger waiting
                 * for us and we vforked, don't hang as we could deadlock.
                 */
                if (traced && signo != SIGKILL &&
                    !(ISSET(p->p_lflag, PL_PPWAIT) &&
                     (p->p_pptr == p->p_opptr))) {
                        /*
                         * Take the signal, but don't remove it from the
                         * siginfo queue, because the debugger can send
                         * it later.
                         */
                        if (sp)
                                sigdelset(&sp->sp_set, signo);
                        p->p_xsig = signo;

                        /* Handling of signal trace */
                        sigswitch(0, signo, false);
                        mutex_enter(p->p_lock);

                        /* Check for a signal from the debugger. */
                        if ((signo = sigchecktrace()) == 0)
                                continue;

                        /* Signals from the debugger are "out of band". */
                        sp = NULL;
                }

                prop = sigprop[signo];

                /*
                 * Decide whether the signal should be returned.
                 */
                switch ((long)SIGACTION(p, signo).sa_handler) {
                case (long)SIG_DFL:
                        /*
                         * Don't take default actions on system processes.
                         */
                        if (p->p_pid <= 1) {
#ifdef DIAGNOSTIC
                                /*
                                 * Are you sure you want to ignore SIGSEGV
                                 * in init? XXX
                                 */
                                printf_nolog("Process (pid %d) got sig %d\n",
                                    p->p_pid, signo);
#endif
                                continue;
                        }

                        /*
                         * If there is a pending stop signal to process with
                         * default action, stop here, then clear the signal.
                         * However, if process is member of an orphaned
                         * process group, ignore tty stop signals.
                         */
                        if (prop & SA_STOP) {
                                /*
                                 * XXX Don't hold proc_lock for p_lflag,
                                 * but it's not a big deal.
                                 */
                                if ((traced &&
                                     !(ISSET(p->p_lflag, PL_PPWAIT) &&
                                     (p->p_pptr == p->p_opptr))) ||
                                    ((p->p_lflag & PL_ORPHANPG) != 0 &&
                                    prop & SA_TTYSTOP)) {
                                        /* Ignore the signal. */
                                        continue;
                                }
                                /* Take the signal. */
                                (void)sigget(sp, NULL, signo, NULL);
                                p->p_xsig = signo;
                                p->p_sflag &= ~PS_CONTINUED;
                                signo = 0;
                                sigswitch(PS_NOCLDSTOP, p->p_xsig, false);
                                mutex_enter(p->p_lock);
                        } else if (prop & SA_IGNORE) {
                                /*
                                 * Except for SIGCONT, shouldn't get here.
                                 * Default action is to ignore; drop it.
                                 */
                                continue;
                        }
                        break;

                case (long)SIG_IGN:
#ifdef DEBUG_ISSIGNAL
                        /*
                         * Masking above should prevent us ever trying
                         * to take action on an ignored signal other
                         * than SIGCONT, unless process is traced.
                         */
                        if ((prop & SA_CONT) == 0 && !traced)
                                printf_nolog("issignal\n");
#endif
                        continue;

                default:
                        /*
                         * This signal has an action, let postsig() process
                         * it.
                         */
                        break;
                }

                break;
        }

        l->l_sigpendset = sp;
        return signo;
}

/*
 * Take the action for the specified signal
 * from the current set of pending signals.
 */
void
postsig(int signo)
{
        struct lwp        *l;
        struct proc        *p;
        struct sigacts        *ps;
        sig_t                action;
        sigset_t        *returnmask;
        ksiginfo_t        ksi;

        l = curlwp;
        p = l->l_proc;
        ps = p->p_sigacts;

        KASSERT(mutex_owned(p->p_lock));
        KASSERT(signo > 0);

        /*
         * Set the new mask value and also defer further occurrences of this
         * signal.
         *
         * Special case: user has done a sigsuspend.  Here the current mask is
         * not of interest, but rather the mask from before the sigsuspend is
         * what we want restored after the signal processing is completed.
         */
        if (l->l_sigrestore) {
                returnmask = &l->l_sigoldmask;
                l->l_sigrestore = 0;
        } else
                returnmask = &l->l_sigmask;

        /*
         * Commit to taking the signal before releasing the mutex.
         */
        action = SIGACTION_PS(ps, signo).sa_handler;
        l->l_ru.ru_nsignals++;
        if (l->l_sigpendset == NULL) {
                /* From the debugger */
                if (p->p_sigctx.ps_faked &&
                    signo == p->p_sigctx.ps_info._signo) {
                        KSI_INIT(&ksi);
                        ksi.ksi_info = p->p_sigctx.ps_info;
                        ksi.ksi_lid = p->p_sigctx.ps_lwp;
                        p->p_sigctx.ps_faked = false;
                } else {
                        if (!siggetinfo(&l->l_sigpend, &ksi, signo))
                                (void)siggetinfo(&p->p_sigpend, &ksi, signo);
                }
        } else
                sigget(l->l_sigpendset, &ksi, signo, NULL);

        if (ktrpoint(KTR_PSIG)) {
                mutex_exit(p->p_lock);
                if (p->p_emul->e_ktrpsig)
                        p->p_emul->e_ktrpsig(signo, action,
                            returnmask, &ksi);
                else
                        ktrpsig(signo, action, returnmask, &ksi);
                mutex_enter(p->p_lock);
        }

        SDT_PROBE(proc, kernel, , signal__handle, signo, &ksi, action, 0, 0);

        if (action == SIG_DFL) {
                /*
                 * Default action, where the default is to kill
                 * the process.  (Other cases were ignored above.)
                 */
                sigexit(l, signo);
                return;
        }

        /*
         * If we get here, the signal must be caught.
         */
#ifdef DIAGNOSTIC
        if (action == SIG_IGN || sigismember(&l->l_sigmask, signo))
                panic("postsig action");
#endif

        kpsendsig(l, &ksi, returnmask);
}

/*
 * sendsig:
 *
 *        Default signal delivery method for NetBSD.
 */
void
sendsig(const struct ksiginfo *ksi, const sigset_t *mask)
{
        struct sigacts *sa;
        int sig;

        sig = ksi->ksi_signo;
        sa = curproc->p_sigacts;

        switch (sa->sa_sigdesc[sig].sd_vers)  {
        case __SIGTRAMP_SIGCODE_VERSION:
#ifdef __HAVE_STRUCT_SIGCONTEXT
        case __SIGTRAMP_SIGCONTEXT_VERSION_MIN ...
             __SIGTRAMP_SIGCONTEXT_VERSION_MAX:
                /* Compat for 1.6 and earlier. */
                MODULE_HOOK_CALL_VOID(sendsig_sigcontext_16_hook, (ksi, mask),
                    break);
                return;
#endif /* __HAVE_STRUCT_SIGCONTEXT */
        case __SIGTRAMP_SIGINFO_VERSION_MIN ...
             __SIGTRAMP_SIGINFO_VERSION_MAX:
                sendsig_siginfo(ksi, mask);
                return;
        default:
                break;
        }

        printf("sendsig: bad version %d\n", sa->sa_sigdesc[sig].sd_vers);
        sigexit(curlwp, SIGILL);
}

/*
 * sendsig_reset:
 *
 *        Reset the signal action.  Called from emulation specific sendsig()
 *        before unlocking to deliver the signal.
 */
void
sendsig_reset(struct lwp *l, int signo)
{
        struct proc *p = l->l_proc;
        struct sigacts *ps = p->p_sigacts;

        KASSERT(mutex_owned(p->p_lock));

        p->p_sigctx.ps_lwp = 0;
        memset(&p->p_sigctx.ps_info, 0, sizeof(p->p_sigctx.ps_info));

        mutex_enter(&ps->sa_mutex);
        sigplusset(&SIGACTION_PS(ps, signo).sa_mask, &l->l_sigmask);
        if (SIGACTION_PS(ps, signo).sa_flags & SA_RESETHAND) {
                sigdelset(&p->p_sigctx.ps_sigcatch, signo);
                if (signo != SIGCONT && sigprop[signo] & SA_IGNORE)
                        sigaddset(&p->p_sigctx.ps_sigignore, signo);
                SIGACTION_PS(ps, signo).sa_handler = SIG_DFL;
        }
        mutex_exit(&ps->sa_mutex);
}

/*
 * Kill the current process for stated reason.
 */
void
killproc(struct proc *p, const char *why)
{

        KASSERT(mutex_owned(&proc_lock));

        log(LOG_ERR, "pid %d was killed: %s\n", p->p_pid, why);
        uprintf_locked("sorry, pid %d was killed: %s\n", p->p_pid, why);
        psignal(p, SIGKILL);
}

/*
 * Force the current process to exit with the specified signal, dumping core
 * if appropriate.  We bypass the normal tests for masked and caught
 * signals, allowing unrecoverable failures to terminate the process without
 * changing signal state.  Mark the accounting record with the signal
 * termination.  If dumping core, save the signal number for the debugger.
 * Calls exit and does not return.
 */
void
sigexit(struct lwp *l, int signo)
{
        int exitsig, error, docore;
        struct proc *p;
        struct lwp *t;

        p = l->l_proc;

        KASSERT(mutex_owned(p->p_lock));
        KERNEL_UNLOCK_ALL(l, NULL);

        /*
         * Don't permit coredump() multiple times in the same process.
         * Call back into sigexit, where we will be suspended until
         * the deed is done.  Note that this is a recursive call, but
         * LW_WCORE will prevent us from coming back this way.
         */
        if ((p->p_sflag & PS_WCORE) != 0) {
                lwp_lock(l);
                l->l_flag |= (LW_WCORE | LW_WEXIT | LW_WSUSPEND);
                lwp_unlock(l);
                mutex_exit(p->p_lock);
                lwp_userret(l);
                panic("sigexit 1");
                /* NOTREACHED */
        }

        /* If process is already on the way out, then bail now. */
        if ((p->p_sflag & PS_WEXIT) != 0) {
                mutex_exit(p->p_lock);
                lwp_exit(l);
                panic("sigexit 2");
                /* NOTREACHED */
        }

        /*
         * Prepare all other LWPs for exit.  If dumping core, suspend them
         * so that their registers are available long enough to be dumped.
          */
        if ((docore = (sigprop[signo] & SA_CORE)) != 0) {
                p->p_sflag |= PS_WCORE;
                for (;;) {
                        LIST_FOREACH(t, &p->p_lwps, l_sibling) {
                                lwp_lock(t);
                                if (t == l) {
                                        t->l_flag &=
                                            ~(LW_WSUSPEND | LW_DBGSUSPEND);
                                        lwp_unlock(t);
                                        continue;
                                }
                                t->l_flag |= (LW_WCORE | LW_WEXIT);
                                lwp_suspend(l, t);
                        }

                        if (p->p_nrlwps == 1)
                                break;

                        /*
                         * Kick any LWPs sitting in lwp_wait1(), and wait
                         * for everyone else to stop before proceeding.
                         */
                        p->p_nlwpwait++;
                        cv_broadcast(&p->p_lwpcv);
                        cv_wait(&p->p_lwpcv, p->p_lock);
                        p->p_nlwpwait--;
                }
        }

        exitsig = signo;
        p->p_acflag |= AXSIG;
        memset(&p->p_sigctx.ps_info, 0, sizeof(p->p_sigctx.ps_info));
        p->p_sigctx.ps_info._signo = signo;
        p->p_sigctx.ps_info._code = SI_NOINFO;

        if (docore) {
                mutex_exit(p->p_lock);
                MODULE_HOOK_CALL(coredump_hook, (l, NULL), enosys(), error);

                if (kern_logsigexit) {
                        int uid = l->l_cred ?
                            (int)kauth_cred_geteuid(l->l_cred) : -1;

                        if (error)
                                log(LOG_INFO, lognocoredump, p->p_pid,
                                    p->p_comm, uid, signo, error);
                        else
                                log(LOG_INFO, logcoredump, p->p_pid,
                                    p->p_comm, uid, signo);
                }

#ifdef PAX_SEGVGUARD
                rw_enter(&exec_lock, RW_WRITER);
                pax_segvguard(l, p->p_textvp, p->p_comm, true);
                rw_exit(&exec_lock);
#endif /* PAX_SEGVGUARD */

                /* Acquire the sched state mutex.  exit1() will release it. */
                mutex_enter(p->p_lock);
                if (error == 0)
                        p->p_sflag |= PS_COREDUMP;
        }

        /* No longer dumping core. */
        p->p_sflag &= ~PS_WCORE;

        exit1(l, 0, exitsig);
        /* NOTREACHED */
}

/*
 * Since the "real" code may (or may not) be present in loadable module,
 * we provide routines here which calls the module hooks.
 */

int
coredump_netbsd(struct lwp *l, struct coredump_iostate *iocookie)
{

        int retval;

        MODULE_HOOK_CALL(coredump_netbsd_hook, (l, iocookie), ENOSYS, retval);
        return retval;
}

int
coredump_netbsd32(struct lwp *l, struct coredump_iostate *iocookie)
{

        int retval;

        MODULE_HOOK_CALL(coredump_netbsd32_hook, (l, iocookie), ENOSYS, retval);
        return retval;
}

int
coredump_elf32(struct lwp *l, struct coredump_iostate *iocookie)
{
        int retval;

        MODULE_HOOK_CALL(coredump_elf32_hook, (l, iocookie), ENOSYS, retval);
        return retval;
}

int
coredump_elf64(struct lwp *l, struct coredump_iostate *iocookie)
{
        int retval;

        MODULE_HOOK_CALL(coredump_elf64_hook, (l, iocookie), ENOSYS, retval);
        return retval;
}

/*
 * Put process 'p' into the stopped state and optionally, notify the parent.
 */
void
proc_stop(struct proc *p, int signo)
{
        struct lwp *l;

        KASSERT(mutex_owned(p->p_lock));

        /*
         * First off, set the stopping indicator and bring all sleeping
         * LWPs to a halt so they are included in p->p_nrlwps.  We musn't
         * unlock between here and the p->p_nrlwps check below.
         */
        p->p_sflag |= PS_STOPPING;
        membar_producer();

        proc_stop_lwps(p);

        /*
         * If there are no LWPs available to take the signal, then we
         * signal the parent process immediately.  Otherwise, the last
         * LWP to stop will take care of it.
         */

        if (p->p_nrlwps == 0) {
                proc_stop_done(p, PS_NOCLDSTOP);
        } else {
                /*
                 * Have the remaining LWPs come to a halt, and trigger
                 * proc_stop_callout() to ensure that they do.
                 */
                LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                        sigpost(l, SIG_DFL, SA_STOP, signo);
                }
                callout_schedule(&proc_stop_ch, 1);
        }
}

/*
 * When stopping a process, we do not immediately set sleeping LWPs stopped,
 * but wait for them to come to a halt at the kernel-user boundary.  This is
 * to allow LWPs to release any locks that they may hold before stopping.
 *
 * Non-interruptable sleeps can be long, and there is the potential for an
 * LWP to begin sleeping interruptably soon after the process has been set
 * stopping (PS_STOPPING).  These LWPs will not notice that the process is
 * stopping, and so complete halt of the process and the return of status
 * information to the parent could be delayed indefinitely.
 *
 * To handle this race, proc_stop_callout() runs once per tick while there
 * are stopping processes in the system.  It sets LWPs that are sleeping
 * interruptably into the LSSTOP state.
 *
 * Note that we are not concerned about keeping all LWPs stopped while the
 * process is stopped: stopped LWPs can awaken briefly to handle signals.
 * What we do need to ensure is that all LWPs in a stopping process have
 * stopped at least once, so that notification can be sent to the parent
 * process.
 */
static void
proc_stop_callout(void *cookie)
{
        bool more, restart;
        struct proc *p;

        (void)cookie;

        do {
                restart = false;
                more = false;

                mutex_enter(&proc_lock);
                PROCLIST_FOREACH(p, &allproc) {
                        mutex_enter(p->p_lock);

                        if ((p->p_sflag & PS_STOPPING) == 0) {
                                mutex_exit(p->p_lock);
                                continue;
                        }

                        /* Stop any LWPs sleeping interruptably. */
                        proc_stop_lwps(p);
                        if (p->p_nrlwps == 0) {
                                /*
                                 * We brought the process to a halt.
                                 * Mark it as stopped and notify the
                                 * parent.
                                 *
                                 * Note that proc_stop_done() will
                                 * drop p->p_lock briefly.
                                 * Arrange to restart and check
                                 * all processes again.
                                 */
                                restart = true;
                                proc_stop_done(p, PS_NOCLDSTOP);
                        } else
                                more = true;

                        mutex_exit(p->p_lock);
                        if (restart)
                                break;
                }
                mutex_exit(&proc_lock);
        } while (restart);

        /*
         * If we noted processes that are stopping but still have
         * running LWPs, then arrange to check again in 1 tick.
         */
        if (more)
                callout_schedule(&proc_stop_ch, 1);
}

/*
 * Given a process in state SSTOP, set the state back to SACTIVE and
 * move LSSTOP'd LWPs to LSSLEEP or make them runnable.
 */
void
proc_unstop(struct proc *p)
{
        struct lwp *l;
        int sig;

        KASSERT(mutex_owned(&proc_lock));
        KASSERT(mutex_owned(p->p_lock));

        p->p_stat = SACTIVE;
        p->p_sflag &= ~PS_STOPPING;
        sig = p->p_xsig;

        if (!p->p_waited)
                p->p_pptr->p_nstopchild--;

        LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                lwp_lock(l);
                if (l->l_stat != LSSTOP || (l->l_flag & LW_DBGSUSPEND) != 0) {
                        lwp_unlock(l);
                        continue;
                }
                if (l->l_wchan == NULL) {
                        setrunnable(l);
                        continue;
                }
                if (sig && (l->l_flag & LW_SINTR) != 0) {
                        setrunnable(l);
                        sig = 0;
                } else {
                        l->l_stat = LSSLEEP;
                        p->p_nrlwps++;
                        lwp_unlock(l);
                }
        }
}

void
proc_stoptrace(int trapno, int sysnum, const register_t args[],
               const register_t *ret, int error)
{
        struct lwp *l = curlwp;
        struct proc *p = l->l_proc;
        struct sigacts *ps;
        sigset_t *mask;
        sig_t action;
        ksiginfo_t ksi;
        size_t i, sy_narg;
        const int signo = SIGTRAP;

        KASSERT((trapno == TRAP_SCE) || (trapno == TRAP_SCX));
        KASSERT(p->p_pptr != initproc);
        KASSERT(ISSET(p->p_slflag, PSL_TRACED));
        KASSERT(ISSET(p->p_slflag, PSL_SYSCALL));

        sy_narg = p->p_emul->e_sysent[sysnum].sy_narg;

        KSI_INIT_TRAP(&ksi);
        ksi.ksi_lid = l->l_lid;
        ksi.ksi_signo = signo;
        ksi.ksi_code = trapno;

        ksi.ksi_sysnum = sysnum;
        if (trapno == TRAP_SCE) {
                ksi.ksi_retval[0] = 0;
                ksi.ksi_retval[1] = 0;
                ksi.ksi_error = 0;
        } else {
                ksi.ksi_retval[0] = ret[0];
                ksi.ksi_retval[1] = ret[1];
                ksi.ksi_error = error;
        }

        memset(ksi.ksi_args, 0, sizeof(ksi.ksi_args));

        for (i = 0; i < sy_narg; i++)
                ksi.ksi_args[i] = args[i];

        mutex_enter(p->p_lock);

repeat:
        /*
         * If we are exiting, demise now.
         *
         * This avoids notifying tracer and deadlocking.
         */
        if (__predict_false(ISSET(p->p_sflag, PS_WEXIT))) {
                mutex_exit(p->p_lock);
                lwp_exit(l);
                panic("proc_stoptrace");
                /* NOTREACHED */
        }

        /*
         * If there's a pending SIGKILL process it immediately.
         */
        if (p->p_xsig == SIGKILL ||
            sigismember(&p->p_sigpend.sp_set, SIGKILL)) {
                mutex_exit(p->p_lock);
                return;
        }

        /*
         * If we are no longer traced, abandon this event signal.
         *
         * This avoids killing a process after detaching the debugger.
         */
        if (__predict_false(!ISSET(p->p_slflag, PSL_TRACED))) {
                mutex_exit(p->p_lock);
                return;
        }

        /*
         * The process is already stopping.
         */
        if ((p->p_sflag & PS_STOPPING) != 0) {
                sigswitch_unlock_and_switch_away(l);
                mutex_enter(p->p_lock);
                goto repeat;
        }

        /* Needed for ktrace */
        ps = p->p_sigacts;
        action = SIGACTION_PS(ps, signo).sa_handler;
        mask = &l->l_sigmask;

        p->p_xsig = signo;
        p->p_sigctx.ps_lwp = ksi.ksi_lid;
        p->p_sigctx.ps_info = ksi.ksi_info;
        sigswitch(0, signo, false);

        if (ktrpoint(KTR_PSIG)) {
                if (p->p_emul->e_ktrpsig)
                        p->p_emul->e_ktrpsig(signo, action, mask, &ksi);
                else
                        ktrpsig(signo, action, mask, &ksi);
        }
}

static int
filt_sigattach(struct knote *kn)
{
        struct proc *p = curproc;

        kn->kn_obj = p;
        kn->kn_flags |= EV_CLEAR;        /* automatically set */

        mutex_enter(p->p_lock);
        klist_insert(&p->p_klist, kn);
        mutex_exit(p->p_lock);

        return 0;
}

static void
filt_sigdetach(struct knote *kn)
{
        struct proc *p = kn->kn_obj;

        mutex_enter(p->p_lock);
        klist_remove(&p->p_klist, kn);
        mutex_exit(p->p_lock);
}

/*
 * Signal knotes are shared with proc knotes, so we apply a mask to
 * the hint in order to differentiate them from process hints.  This
 * could be avoided by using a signal-specific knote list, but probably
 * isn't worth the trouble.
 */
static int
filt_signal(struct knote *kn, long hint)
{

        if (hint & NOTE_SIGNAL) {
                hint &= ~NOTE_SIGNAL;

                if (kn->kn_id == hint)
                        kn->kn_data++;
        }
        return (kn->kn_data != 0);
}

const struct filterops sig_filtops = {
        .f_flags = FILTEROP_MPSAFE,
        .f_attach = filt_sigattach,
        .f_detach = filt_sigdetach,
        .f_event = filt_signal,
};































































































































































   21 

















    8 











   75 
    4 


   71 


   69 





























  122 

















   20 





















   20 
   20 






































    6 
    6 


























































   78 










   57 










   21 







   56 







  141 






  142 



























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
/*        $NetBSD: uvm_glue.c,v 1.181 2020/06/14 21:41:42 ad Exp $        */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * Copyright (c) 1991, 1993, The Regents of the University of California.
 *
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * The Mach Operating System project at Carnegie-Mellon University.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vm_glue.c        8.6 (Berkeley) 1/5/94
 * from: Id: uvm_glue.c,v 1.1.2.8 1998/02/07 01:16:54 chs Exp
 *
 *
 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v 1.181 2020/06/14 21:41:42 ad Exp $");

#include "opt_kgdb.h"
#include "opt_kstack.h"
#include "opt_uvmhist.h"

/*
 * uvm_glue.c: glue functions
 */

#include <sys/param.h>
#include <sys/kernel.h>

#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/buf.h>
#include <sys/syncobj.h>
#include <sys/cpu.h>
#include <sys/atomic.h>
#include <sys/lwp.h>
#include <sys/asan.h>

#include <uvm/uvm.h>
#include <uvm/uvm_pdpolicy.h>
#include <uvm/uvm_pgflcache.h>

/*
 * uvm_kernacc: test if kernel can access a memory region.
 *
 * => Currently used only by /dev/kmem driver (dev/mm.c).
 */
bool
uvm_kernacc(void *addr, size_t len, vm_prot_t prot)
{
        vaddr_t saddr = trunc_page((vaddr_t)addr);
        vaddr_t eaddr = round_page(saddr + len);
        bool rv;

        vm_map_lock_read(kernel_map);
        rv = uvm_map_checkprot(kernel_map, saddr, eaddr, prot);
        vm_map_unlock_read(kernel_map);

        return rv;
}

#ifdef KGDB
/*
 * Change protections on kernel pages from addr to addr+len
 * (presumably so debugger can plant a breakpoint).
 *
 * We force the protection change at the pmap level.  If we were
 * to use vm_map_protect a change to allow writing would be lazily-
 * applied meaning we would still take a protection fault, something
 * we really don't want to do.  It would also fragment the kernel
 * map unnecessarily.  We cannot use pmap_protect since it also won't
 * enforce a write-enable request.  Using pmap_enter is the only way
 * we can ensure the change takes place properly.
 */
void
uvm_chgkprot(void *addr, size_t len, int rw)
{
        vm_prot_t prot;
        paddr_t pa;
        vaddr_t sva, eva;

        prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
        eva = round_page((vaddr_t)addr + len);
        for (sva = trunc_page((vaddr_t)addr); sva < eva; sva += PAGE_SIZE) {
                /*
                 * Extract physical address for the page.
                 */
                if (pmap_extract(pmap_kernel(), sva, &pa) == false)
                        panic("%s: invalid page", __func__);
                pmap_enter(pmap_kernel(), sva, pa, prot, PMAP_WIRED);
        }
        pmap_update(pmap_kernel());
}
#endif

/*
 * uvm_vslock: wire user memory for I/O
 *
 * - called from physio and sys___sysctl
 * - XXXCDC: consider nuking this (or making it a macro?)
 */

int
uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access_type)
{
        struct vm_map *map;
        vaddr_t start, end;
        int error;

        map = &vs->vm_map;
        start = trunc_page((vaddr_t)addr);
        end = round_page((vaddr_t)addr + len);
        error = uvm_fault_wire(map, start, end, access_type, 0);
        return error;
}

/*
 * uvm_vsunlock: unwire user memory wired by uvm_vslock()
 *
 * - called from physio and sys___sysctl
 * - XXXCDC: consider nuking this (or making it a macro?)
 */

void
uvm_vsunlock(struct vmspace *vs, void *addr, size_t len)
{
        uvm_fault_unwire(&vs->vm_map, trunc_page((vaddr_t)addr),
                round_page((vaddr_t)addr + len));
}

/*
 * uvm_proc_fork: fork a virtual address space
 *
 * - the address space is copied as per parent map's inherit values
 */
void
uvm_proc_fork(struct proc *p1, struct proc *p2, bool shared)
{

        if (shared == true) {
                p2->p_vmspace = NULL;
                uvmspace_share(p1, p2);
        } else {
                p2->p_vmspace = uvmspace_fork(p1->p_vmspace);
        }

        cpu_proc_fork(p1, p2);
}

/*
 * uvm_lwp_fork: fork a thread
 *
 * - a new PCB structure is allocated for the child process,
 *        and filled in by MD layer
 * - if specified, the child gets a new user stack described by
 *        stack and stacksize
 * - NOTE: the kernel stack may be at a different location in the child
 *        process, and thus addresses of automatic variables may be invalid
 *        after cpu_lwp_fork returns in the child process.  We do nothing here
 *        after cpu_lwp_fork returns.
 */
void
uvm_lwp_fork(struct lwp *l1, struct lwp *l2, void *stack, size_t stacksize,
    void (*func)(void *), void *arg)
{

        /* Fill stack with magic number. */
        kstack_setup_magic(l2);

        /*
         * cpu_lwp_fork() copy and update the pcb, and make the child ready
          * to run.  If this is a normal user fork, the child will exit
         * directly to user mode via child_return() on its first time
         * slice and will not return here.  If this is a kernel thread,
         * the specified entry point will be executed.
         */
        cpu_lwp_fork(l1, l2, stack, stacksize, func, arg);
}

#ifndef USPACE_ALIGN
#define        USPACE_ALIGN        0
#endif

static pool_cache_t uvm_uarea_cache;
#if defined(__HAVE_CPU_UAREA_ROUTINES)
static pool_cache_t uvm_uarea_system_cache;
#else
#define uvm_uarea_system_cache uvm_uarea_cache
#endif

static void *
uarea_poolpage_alloc(struct pool *pp, int flags)
{

        KASSERT((flags & PR_WAITOK) != 0);

#if defined(PMAP_MAP_POOLPAGE)
        while (USPACE == PAGE_SIZE &&
            (USPACE_ALIGN == 0 || USPACE_ALIGN == PAGE_SIZE)) {
                struct vm_page *pg;
                vaddr_t va;
#if defined(PMAP_ALLOC_POOLPAGE)
                pg = PMAP_ALLOC_POOLPAGE(0);
#else
                pg = uvm_pagealloc(NULL, 0, NULL, 0);
#endif
                if (pg == NULL) {
                        uvm_wait("uarea");
                        continue;
                }
                va = PMAP_MAP_POOLPAGE(VM_PAGE_TO_PHYS(pg));
                KASSERT(va != 0);
                return (void *)va;
        }
#endif
#if defined(__HAVE_CPU_UAREA_ROUTINES)
        void *va = cpu_uarea_alloc(false);
        if (va)
                return (void *)va;
#endif
        return (void *)uvm_km_alloc(kernel_map, pp->pr_alloc->pa_pagesz,
            USPACE_ALIGN, UVM_KMF_WIRED | UVM_KMF_WAITVA);
}

static void
uarea_poolpage_free(struct pool *pp, void *addr)
{
#if defined(PMAP_MAP_POOLPAGE)
        if (USPACE == PAGE_SIZE &&
            (USPACE_ALIGN == 0 || USPACE_ALIGN == PAGE_SIZE)) {
                paddr_t pa;

                pa = PMAP_UNMAP_POOLPAGE((vaddr_t) addr);
                KASSERT(pa != 0);
                uvm_pagefree(PHYS_TO_VM_PAGE(pa));
                return;
        }
#endif
#if defined(__HAVE_CPU_UAREA_ROUTINES)
        if (cpu_uarea_free(addr))
                return;
#endif
        uvm_km_free(kernel_map, (vaddr_t)addr, pp->pr_alloc->pa_pagesz,
            UVM_KMF_WIRED);
}

static struct pool_allocator uvm_uarea_allocator = {
        .pa_alloc = uarea_poolpage_alloc,
        .pa_free = uarea_poolpage_free,
        .pa_pagesz = USPACE,
};

#if defined(__HAVE_CPU_UAREA_ROUTINES)
static void *
uarea_system_poolpage_alloc(struct pool *pp, int flags)
{
        void * const va = cpu_uarea_alloc(true);
        if (va != NULL)
                return va;

        return (void *)uvm_km_alloc(kernel_map, pp->pr_alloc->pa_pagesz,
            USPACE_ALIGN, UVM_KMF_WIRED |
            ((flags & PR_WAITOK) ? UVM_KMF_WAITVA :
            (UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK)));
}

static void
uarea_system_poolpage_free(struct pool *pp, void *addr)
{
        if (cpu_uarea_free(addr))
                return;

        uvm_km_free(kernel_map, (vaddr_t)addr, pp->pr_alloc->pa_pagesz,
            UVM_KMF_WIRED);
}

static struct pool_allocator uvm_uarea_system_allocator = {
        .pa_alloc = uarea_system_poolpage_alloc,
        .pa_free = uarea_system_poolpage_free,
        .pa_pagesz = USPACE,
};
#endif /* __HAVE_CPU_UAREA_ROUTINES */

void
uvm_uarea_init(void)
{
        int flags = PR_NOTOUCH;

        /*
         * specify PR_NOALIGN unless the alignment provided by
         * the backend (USPACE_ALIGN) is sufficient to provide
         * pool page size (UPSACE) alignment.
         */

        if ((USPACE_ALIGN == 0 && USPACE != PAGE_SIZE) ||
            (USPACE_ALIGN % USPACE) != 0) {
                flags |= PR_NOALIGN;
        }

        uvm_uarea_cache = pool_cache_init(USPACE, USPACE_ALIGN, 0, flags,
            "uarea", &uvm_uarea_allocator, IPL_NONE, NULL, NULL, NULL);
#if defined(__HAVE_CPU_UAREA_ROUTINES)
        uvm_uarea_system_cache = pool_cache_init(USPACE, USPACE_ALIGN,
            0, flags, "uareasys", &uvm_uarea_system_allocator,
            IPL_NONE, NULL, NULL, NULL);
#endif
}

/*
 * uvm_uarea_alloc: allocate a u-area
 */

vaddr_t
uvm_uarea_alloc(void)
{

        return (vaddr_t)pool_cache_get(uvm_uarea_cache, PR_WAITOK);
}

vaddr_t
uvm_uarea_system_alloc(struct cpu_info *ci)
{
#ifdef __HAVE_CPU_UAREA_ALLOC_IDLELWP
        if (__predict_false(ci != NULL))
                return cpu_uarea_alloc_idlelwp(ci);
#endif

        return (vaddr_t)pool_cache_get(uvm_uarea_system_cache, PR_WAITOK);
}

/*
 * uvm_uarea_free: free a u-area
 */

void
uvm_uarea_free(vaddr_t uaddr)
{

        kasan_mark((void *)uaddr, USPACE, USPACE, 0);
        pool_cache_put(uvm_uarea_cache, (void *)uaddr);
}

void
uvm_uarea_system_free(vaddr_t uaddr)
{

        kasan_mark((void *)uaddr, USPACE, USPACE, 0);
        pool_cache_put(uvm_uarea_system_cache, (void *)uaddr);
}

vaddr_t
uvm_lwp_getuarea(lwp_t *l)
{

        return (vaddr_t)l->l_addr - UAREA_PCB_OFFSET;
}

void
uvm_lwp_setuarea(lwp_t *l, vaddr_t addr)
{

        l->l_addr = (void *)(addr + UAREA_PCB_OFFSET);
}

/*
 * uvm_proc_exit: exit a virtual address space
 *
 * - borrow proc0's address space because freeing the vmspace
 *   of the dead process may block.
 */

void
uvm_proc_exit(struct proc *p)
{
        struct lwp *l = curlwp; /* XXX */
        struct vmspace *ovm;

        KASSERT(p == l->l_proc);
        ovm = p->p_vmspace;
        KASSERT(ovm != NULL);

        if (__predict_false(ovm == proc0.p_vmspace))
                return;

        /*
         * borrow proc0's address space.
         */
        kpreempt_disable();
        pmap_deactivate(l);
        p->p_vmspace = proc0.p_vmspace;
        pmap_activate(l);
        kpreempt_enable();

        uvmspace_free(ovm);
}

void
uvm_lwp_exit(struct lwp *l)
{
        vaddr_t va = uvm_lwp_getuarea(l);
        bool system = (l->l_flag & LW_SYSTEM) != 0;

        if (system)
                uvm_uarea_system_free(va);
        else
                uvm_uarea_free(va);
#ifdef DIAGNOSTIC
        uvm_lwp_setuarea(l, (vaddr_t)NULL);
#endif
}

/*
 * uvm_init_limit: init per-process VM limits
 *
 * - called for process 0 and then inherited by all others.
 */

void
uvm_init_limits(struct proc *p)
{

        /*
         * Set up the initial limits on process VM.  Set the maximum
         * resident set size to be all of (reasonably) available memory.
         * This causes any single, large process to start random page
         * replacement once it fills memory.
         */

        p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
        p->p_rlimit[RLIMIT_STACK].rlim_max = maxsmap;
        p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
        p->p_rlimit[RLIMIT_DATA].rlim_max = maxdmap;
        p->p_rlimit[RLIMIT_AS].rlim_cur = RLIM_INFINITY;
        p->p_rlimit[RLIMIT_AS].rlim_max = RLIM_INFINITY;
        p->p_rlimit[RLIMIT_RSS].rlim_cur = MIN(VM_MAXUSER_ADDRESS,
            ctob((rlim_t)uvm_availmem(false)));
}

/*
 * uvm_scheduler: process zero main loop.
 */

extern struct loadavg averunnable;

void
uvm_scheduler(void)
{
        lwp_t *l = curlwp;

        lwp_lock(l);
        l->l_class = SCHED_FIFO;
        lwp_changepri(l, PRI_VM);
        lwp_unlock(l);

        /* Start the freelist cache. */
        uvm_pgflcache_start();

        for (;;) {
                /* Update legacy stats for post-mortem debugging. */
                uvm_update_uvmexp();

                /* See if the pagedaemon needs to generate some free pages. */
                uvm_kick_pdaemon();

                /* Calculate process statistics. */
                sched_pstats();
                (void)kpause("uvm", false, hz, NULL);
        }
}

/*
 * uvm_idle: called from the idle loop.
 */

void
uvm_idle(void)
{
        struct cpu_info *ci = curcpu();
        struct uvm_cpu *ucpu = ci->ci_data.cpu_uvm;

        KASSERT(kpreempt_disabled());

        if (!ci->ci_want_resched)
                uvmpdpol_idle(ucpu);
}


































































































 3279 

































 2526 

























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
/*        $NetBSD: subr_debug.c,v 1.7 2008/04/30 20:20:53 ad Exp $        */

/*-
 * Copyright (c) 2007, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Shared support code for kernels built with the DEBUG option.
 */
 
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_debug.c,v 1.7 2008/04/30 20:20:53 ad Exp $");

#include "opt_ddb.h"

#include <sys/param.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/debug.h>
#include <sys/atomic.h>
#include <sys/cpu.h>

#include <uvm/uvm_extern.h>

#include <machine/lock.h>

/*
 * Allocation/free validation by pointer address.  Introduces
 * significant overhead and is not enabled by default.  Patch
 * `debug_freecheck' to 1 at boot time to enable.
 */
#define        FREECHECK_BYTES                (8*1024*1024)

typedef struct fcitem {
        void                *i_addr;
        struct fcitem        *i_next;
} fcitem_t;

fcitem_t                *freecheck_free;
__cpu_simple_lock_t        freecheck_lock;
u_int                        debug_freecheck;

void
debug_init(void)
{
        size_t cnt;
        fcitem_t *i;

        __cpu_simple_lock_init(&freecheck_lock);

        if (debug_freecheck) {
                i = (fcitem_t *)uvm_km_alloc(kernel_map, FREECHECK_BYTES, 0,
                    UVM_KMF_WIRED);
                if (i == NULL) {
                        printf("freecheck_init: unable to allocate memory");
                        return;
                }

                for (cnt = FREECHECK_BYTES / sizeof(*i); cnt != 0; cnt--) {
                        i->i_next = freecheck_free;
                        freecheck_free = i++;
                }
        }
}

void
freecheck_out(void **head, void *addr)
{
        fcitem_t *i;
        int s;

        if (!debug_freecheck)
                return;

        s = splvm();
        __cpu_simple_lock(&freecheck_lock);
        for (i = *head; i != NULL; i = i->i_next) {
                if (i->i_addr != addr)
                        continue;
                __cpu_simple_unlock(&freecheck_lock);
                splx(s);
                panic("freecheck_out: %p already out", addr);
        }
        if ((i = freecheck_free) != NULL) {
                freecheck_free = i->i_next;
                i->i_addr = addr;
                i->i_next = *head;
                *head = i;
        }
        __cpu_simple_unlock(&freecheck_lock);
        splx(s);

        if (i == NULL) {
                if (atomic_swap_uint(&debug_freecheck, 1) == 0)
                        printf("freecheck_out: no more slots\n");
        }
}

void
freecheck_in(void **head, void *addr)
{
        fcitem_t *i;
        void *pp;
        int s;

        if (!debug_freecheck)
                return;

        s = splvm();
        __cpu_simple_lock(&freecheck_lock);
        for (i = *head, pp = head; i != NULL; pp = &i->i_next, i = i->i_next) {
                if (i->i_addr == addr) {
                        *(fcitem_t **)pp = i->i_next;
                        i->i_next = freecheck_free;
                        freecheck_free = i;
                        break;
                }
        }
        __cpu_simple_unlock(&freecheck_lock);
        splx(s);

        if (i != NULL)
                return;

#ifdef DDB
        printf("freecheck_in: %p not out\n", addr);
        Debugger();
#else
        panic("freecheck_in: %p not out", addr);
#endif
}





































































































































































































































































    9 

    9 
    9 
    8 




















































































































































































































































































































































    2 








    2 


    2 




    1 












    2 
    1 










    1 







    1 


    2 
    2 


























































































































































































































































































































































































    1 






    1 



    1 


    1 





























    7 

















    7 


    7 






    7 

    6 

    6 


    6 



    6 















    1 
    6 



    5 





















    1 



    5 



    4 






















































































    4 

















































































































































































































































    4 



































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
/*        $NetBSD: ccd.c,v 1.189 2022/03/28 12:48:35 riastradh Exp $        */

/*-
 * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1988 University of Utah.
 * Copyright (c) 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the Systems Programming Group of the University of Utah Computer
 * Science Department.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from: Utah $Hdr: cd.c 1.6 90/11/28$
 *
 *        @(#)cd.c        8.2 (Berkeley) 11/16/93
 */

/*
 * "Concatenated" disk driver.
 *
 * Notes on concurrency:
 *
 * => sc_dvlock serializes access to the device nodes, excluding block I/O.
 *
 * => sc_iolock serializes access to (sc_flags & CCDF_INITED), disk stats,
 *    sc_stop, sc_bufq and b_resid from master buffers.
 *
 * => a combination of CCDF_INITED, sc_inflight, and sc_iolock is used to
 *    serialize I/O and configuration changes.
 *
 * => the in-core disk label does not change while the device is open.
 *
 * On memory consumption: ccd fans out I/O requests and so needs to
 * allocate memory.  If the system is desperately low on memory, we
 * single thread I/O.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.189 2022/03/28 12:48:35 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/buf.h>
#include <sys/kmem.h>
#include <sys/pool.h>
#include <sys/module.h>
#include <sys/namei.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/disklabel.h>
#include <sys/device.h>
#include <sys/disk.h>
#include <sys/syslog.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/conf.h>
#include <sys/mutex.h>
#include <sys/queue.h>
#include <sys/kauth.h>
#include <sys/kthread.h>
#include <sys/bufq.h>
#include <sys/sysctl.h>
#include <sys/compat_stub.h>

#include <uvm/uvm_extern.h>

#include <dev/ccdvar.h>
#include <dev/dkvar.h>

#include <miscfs/specfs/specdev.h> /* for v_rdev */

#include "ioconf.h"

#if defined(CCDDEBUG) && !defined(DEBUG)
#define DEBUG
#endif

#ifdef DEBUG
#define CCDB_FOLLOW        0x01
#define CCDB_INIT        0x02
#define CCDB_IO                0x04
#define CCDB_LABEL        0x08
#define CCDB_VNODE        0x10
int ccddebug = 0x00;
#endif

#define        ccdunit(x)        DISKUNIT(x)

struct ccdbuf {
        struct buf        cb_buf;                /* new I/O buf */
        struct buf        *cb_obp;        /* ptr. to original I/O buf */
        struct ccd_softc *cb_sc;        /* pointer to ccd softc */
        int                cb_comp;        /* target component */
        SIMPLEQ_ENTRY(ccdbuf) cb_q;        /* fifo of component buffers */
};

/* component buffer pool */
static pool_cache_t ccd_cache;

#define        CCD_GETBUF()                pool_cache_get(ccd_cache, PR_WAITOK)
#define        CCD_PUTBUF(cbp)                pool_cache_put(ccd_cache, cbp)

#define CCDLABELDEV(dev)        \
        (MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART))

/* called by main() at boot time */
void        ccddetach(void);

/* called by biodone() at interrupt time */
static void        ccdiodone(struct buf *);

static void        ccdinterleave(struct ccd_softc *);
static int        ccdinit(struct ccd_softc *, char **, struct vnode **,
                    struct lwp *);
static struct ccdbuf *ccdbuffer(struct ccd_softc *, struct buf *,
                    daddr_t, void *, long);
static void        ccdgetdefaultlabel(struct ccd_softc *, struct disklabel *);
static void        ccdgetdisklabel(dev_t);
static void        ccdmakedisklabel(struct ccd_softc *);
static void        ccdstart(struct ccd_softc *);
static void        ccdthread(void *);

static dev_type_open(ccdopen);
static dev_type_close(ccdclose);
static dev_type_read(ccdread);
static dev_type_write(ccdwrite);
static dev_type_ioctl(ccdioctl);
static dev_type_strategy(ccdstrategy);
static dev_type_size(ccdsize);

const struct bdevsw ccd_bdevsw = {
        .d_open = ccdopen,
        .d_close = ccdclose,
        .d_strategy = ccdstrategy,
        .d_ioctl = ccdioctl,
        .d_dump = nodump,
        .d_psize = ccdsize,
        .d_discard = nodiscard,
        .d_flag = D_DISK | D_MPSAFE
};

const struct cdevsw ccd_cdevsw = {
        .d_open = ccdopen,
        .d_close = ccdclose,
        .d_read = ccdread,
        .d_write = ccdwrite,
        .d_ioctl = ccdioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_DISK | D_MPSAFE
};

static const struct dkdriver ccddkdriver = {
        .d_strategy = ccdstrategy,
        .d_minphys = minphys
}; 

#ifdef DEBUG
static        void printiinfo(struct ccdiinfo *);
#endif

static LIST_HEAD(, ccd_softc) ccds = LIST_HEAD_INITIALIZER(ccds);
static kmutex_t ccd_lock;

SYSCTL_SETUP_PROTO(sysctl_kern_ccd_setup);

static struct ccd_softc *
ccdcreate(int unit) {
        struct ccd_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);

        /* Initialize per-softc structures. */
        snprintf(sc->sc_xname, sizeof(sc->sc_xname), "ccd%d", unit);
        sc->sc_unit = unit;
        mutex_init(&sc->sc_dvlock, MUTEX_DEFAULT, IPL_NONE);
        sc->sc_iolock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
        cv_init(&sc->sc_stop, "ccdstop");
        cv_init(&sc->sc_push, "ccdthr");
        disk_init(&sc->sc_dkdev, sc->sc_xname, &ccddkdriver);
        return sc;
}

static void
ccddestroy(struct ccd_softc *sc) {
        mutex_obj_free(sc->sc_iolock);
        mutex_exit(&sc->sc_dvlock);
        mutex_destroy(&sc->sc_dvlock);
        cv_destroy(&sc->sc_stop);
        cv_destroy(&sc->sc_push);
        disk_destroy(&sc->sc_dkdev);
        kmem_free(sc, sizeof(*sc));
}

static struct ccd_softc *
ccdget(int unit, int make) {
        struct ccd_softc *sc;
        if (unit < 0) {
#ifdef DIAGNOSTIC
                panic("%s: unit %d!", __func__, unit);
#endif
                return NULL;
        }
        mutex_enter(&ccd_lock);
        LIST_FOREACH(sc, &ccds, sc_link) {
                if (sc->sc_unit == unit) {
                        mutex_exit(&ccd_lock);
                        return sc;
                }
        }
        mutex_exit(&ccd_lock);
        if (!make)
                return NULL;
        if ((sc = ccdcreate(unit)) == NULL)
                return NULL;
        mutex_enter(&ccd_lock);
        LIST_INSERT_HEAD(&ccds, sc, sc_link);
        mutex_exit(&ccd_lock);
        return sc;
}

static void
ccdput(struct ccd_softc *sc) {
        mutex_enter(&ccd_lock);
        LIST_REMOVE(sc, sc_link);
        mutex_exit(&ccd_lock);
        ccddestroy(sc);
}

/*
 * Called by main() during pseudo-device attachment.  All we need
 * to do is allocate enough space for devices to be configured later.
 */
void
ccdattach(int num)
{
        mutex_init(&ccd_lock, MUTEX_DEFAULT, IPL_NONE);

        /* Initialize the component buffer pool. */
        ccd_cache = pool_cache_init(sizeof(struct ccdbuf), 0,
            0, 0, "ccdbuf", NULL, IPL_BIO, NULL, NULL, NULL);
}

void
ccddetach(void)
{
        pool_cache_destroy(ccd_cache);
        mutex_destroy(&ccd_lock);
}

static int
ccdinit(struct ccd_softc *cs, char **cpaths, struct vnode **vpp,
    struct lwp *l)
{
        struct ccdcinfo *ci = NULL;
        int ix;
        struct ccdgeom *ccg = &cs->sc_geom;
        char *tmppath;
        int error, path_alloced;
        uint64_t psize, minsize;
        unsigned secsize, maxsecsize;
        struct disk_geom *dg;

#ifdef DEBUG
        if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
                printf("%s: ccdinit\n", cs->sc_xname);
#endif

        /* Allocate space for the component info. */
        cs->sc_cinfo = kmem_alloc(cs->sc_nccdisks * sizeof(*cs->sc_cinfo),
            KM_SLEEP);
        tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP);

        cs->sc_size = 0;

        /*
         * Verify that each component piece exists and record
         * relevant information about it.
         */
        maxsecsize = 0;
        minsize = 0;
        for (ix = 0, path_alloced = 0; ix < cs->sc_nccdisks; ix++) {
                ci = &cs->sc_cinfo[ix];
                ci->ci_vp = vpp[ix];

                /*
                 * Copy in the pathname of the component.
                 */
                memset(tmppath, 0, MAXPATHLEN);        /* sanity */
                error = copyinstr(cpaths[ix], tmppath,
                    MAXPATHLEN, &ci->ci_pathlen);
                if (ci->ci_pathlen == 0)
                        error = EINVAL;
                if (error) {
#ifdef DEBUG
                        if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
                                printf("%s: can't copy path, error = %d\n",
                                    cs->sc_xname, error);
#endif
                        goto out;
                }
                ci->ci_path = kmem_alloc(ci->ci_pathlen, KM_SLEEP);
                memcpy(ci->ci_path, tmppath, ci->ci_pathlen);
                path_alloced++;

                /*
                 * XXX: Cache the component's dev_t.
                 */
                ci->ci_dev = vpp[ix]->v_rdev;

                /*
                 * Get partition information for the component.
                 */
                error = getdisksize(vpp[ix], &psize, &secsize);
                if (error) {
#ifdef DEBUG
                        if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
                                 printf("%s: %s: disksize failed, error = %d\n",
                                     cs->sc_xname, ci->ci_path, error);
#endif
                        goto out;
                }

                /*
                 * Calculate the size, truncating to an interleave
                 * boundary if necessary.
                 */
                maxsecsize = secsize > maxsecsize ? secsize : maxsecsize;
                if (cs->sc_ileave > 1)
                        psize -= psize % cs->sc_ileave;

                if (psize == 0) {
#ifdef DEBUG
                        if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
                                printf("%s: %s: size == 0\n",
                                    cs->sc_xname, ci->ci_path);
#endif
                        error = ENODEV;
                        goto out;
                }

                if (minsize == 0 || psize < minsize)
                        minsize = psize;
                ci->ci_size = psize;
                cs->sc_size += psize;
        }

        /*
         * Don't allow the interleave to be smaller than
         * the biggest component sector.
         */
        if ((cs->sc_ileave > 0) &&
            (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
#ifdef DEBUG
                if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
                        printf("%s: interleave must be at least %d\n",
                            cs->sc_xname, (maxsecsize / DEV_BSIZE));
#endif
                error = EINVAL;
                goto out;
        }

        /*
         * If uniform interleave is desired set all sizes to that of
         * the smallest component.
         */
        if (cs->sc_flags & CCDF_UNIFORM) {
                for (ci = cs->sc_cinfo;
                     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++)
                        ci->ci_size = minsize;

                cs->sc_size = cs->sc_nccdisks * minsize;
        }

        /*
         * Construct the interleave table.
         */
        ccdinterleave(cs);

        /*
         * Create pseudo-geometry based on 1MB cylinders.  It's
         * pretty close.
         */
        ccg->ccg_secsize = DEV_BSIZE;
        ccg->ccg_ntracks = 1;
        ccg->ccg_nsectors = 1024 * (1024 / ccg->ccg_secsize);
        ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;

        dg = &cs->sc_dkdev.dk_geom;
        memset(dg, 0, sizeof(*dg));
        dg->dg_secperunit = cs->sc_size;
        dg->dg_secsize = ccg->ccg_secsize;
        dg->dg_nsectors = ccg->ccg_nsectors;
        dg->dg_ntracks = ccg->ccg_ntracks;
        dg->dg_ncylinders = ccg->ccg_ncylinders;

        if (cs->sc_ileave > 0)
                aprint_normal("%s: Interleaving %d component%s "
                    "(%d block interleave)\n", cs->sc_xname,
                    cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : ""),
                    cs->sc_ileave);
        else
                aprint_normal("%s: Concatenating %d component%s\n",
                    cs->sc_xname,
                    cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : ""));
        for (ix = 0; ix < cs->sc_nccdisks; ix++) {
                ci = &cs->sc_cinfo[ix];
                aprint_normal("%s: %s (%ju blocks)\n", cs->sc_xname,
                    ci->ci_path, (uintmax_t)ci->ci_size);
        }
        aprint_normal("%s: total %ju blocks\n", cs->sc_xname, cs->sc_size);

        /*
         * Create thread to handle deferred I/O.
         */
        cs->sc_zap = false;
        error = kthread_create(PRI_BIO, KTHREAD_MPSAFE, NULL, ccdthread,
            cs, &cs->sc_thread, "%s", cs->sc_xname);
        if (error) {
                printf("ccdinit: can't create thread: %d\n", error);
                goto out;
        }

        /*
         * Only now that everything is set up can we enable the device.
         */
        mutex_enter(cs->sc_iolock);
        cs->sc_flags |= CCDF_INITED;
        mutex_exit(cs->sc_iolock);
        kmem_free(tmppath, MAXPATHLEN);
        return (0);

 out:
        for (ix = 0; ix < path_alloced; ix++) {
                kmem_free(cs->sc_cinfo[ix].ci_path,
                    cs->sc_cinfo[ix].ci_pathlen);
        }
        kmem_free(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo));
        kmem_free(tmppath, MAXPATHLEN);
        return (error);
}

static void
ccdinterleave(struct ccd_softc *cs)
{
        struct ccdcinfo *ci, *smallci;
        struct ccdiinfo *ii;
        daddr_t bn, lbn;
        int ix;
        u_long size;

#ifdef DEBUG
        if (ccddebug & CCDB_INIT)
                printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave);
#endif
        /*
         * Allocate an interleave table.
         * Chances are this is too big, but we don't care.
         */
        size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
        cs->sc_itable = kmem_zalloc(size, KM_SLEEP);

        /*
         * Trivial case: no interleave (actually interleave of disk size).
         * Each table entry represents a single component in its entirety.
         */
        if (cs->sc_ileave == 0) {
                bn = 0;
                ii = cs->sc_itable;

                for (ix = 0; ix < cs->sc_nccdisks; ix++) {
                        /* Allocate space for ii_index. */
                        ii->ii_indexsz = sizeof(int);
                        ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP);
                        ii->ii_ndisk = 1;
                        ii->ii_startblk = bn;
                        ii->ii_startoff = 0;
                        ii->ii_index[0] = ix;
                        bn += cs->sc_cinfo[ix].ci_size;
                        ii++;
                }
                ii->ii_ndisk = 0;
#ifdef DEBUG
                if (ccddebug & CCDB_INIT)
                        printiinfo(cs->sc_itable);
#endif
                return;
        }

        /*
         * The following isn't fast or pretty; it doesn't have to be.
         */
        size = 0;
        bn = lbn = 0;
        for (ii = cs->sc_itable; ; ii++) {
                /* Allocate space for ii_index. */
                ii->ii_indexsz = sizeof(int) * cs->sc_nccdisks;
                ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP);

                /*
                 * Locate the smallest of the remaining components
                 */
                smallci = NULL;
                for (ci = cs->sc_cinfo;
                     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++)
                        if (ci->ci_size > size &&
                            (smallci == NULL ||
                             ci->ci_size < smallci->ci_size))
                                smallci = ci;

                /*
                 * Nobody left, all done
                 */
                if (smallci == NULL) {
                        ii->ii_ndisk = 0;
                        break;
                }

                /*
                 * Record starting logical block and component offset
                 */
                ii->ii_startblk = bn / cs->sc_ileave;
                ii->ii_startoff = lbn;

                /*
                 * Determine how many disks take part in this interleave
                 * and record their indices.
                 */
                ix = 0;
                for (ci = cs->sc_cinfo;
                     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++)
                        if (ci->ci_size >= smallci->ci_size)
                                ii->ii_index[ix++] = ci - cs->sc_cinfo;
                ii->ii_ndisk = ix;
                bn += ix * (smallci->ci_size - size);
                lbn = smallci->ci_size / cs->sc_ileave;
                size = smallci->ci_size;
        }
#ifdef DEBUG
        if (ccddebug & CCDB_INIT)
                printiinfo(cs->sc_itable);
#endif
}

/* ARGSUSED */
static int
ccdopen(dev_t dev, int flags, int fmt, struct lwp *l)
{
        int unit = ccdunit(dev);
        struct ccd_softc *cs;
        struct disklabel *lp;
        int error = 0, part, pmask;

#ifdef DEBUG
        if (ccddebug & CCDB_FOLLOW)
                printf("ccdopen(0x%"PRIx64", 0x%x)\n", dev, flags);
#endif
        if ((cs = ccdget(unit, 1)) == NULL)
                return ENXIO;

        mutex_enter(&cs->sc_dvlock);

        lp = cs->sc_dkdev.dk_label;

        part = DISKPART(dev);
        pmask = (1 << part);

        /*
         * If we're initialized, check to see if there are any other
         * open partitions.  If not, then it's safe to update
         * the in-core disklabel.  Only read the disklabel if it is
         * not already valid.
         */
        if ((cs->sc_flags & (CCDF_INITED|CCDF_VLABEL)) == CCDF_INITED &&
            cs->sc_dkdev.dk_openmask == 0)
                ccdgetdisklabel(dev);

        /* Check that the partition exists. */
        if (part != RAW_PART) {
                if (((cs->sc_flags & CCDF_INITED) == 0) ||
                    ((part >= lp->d_npartitions) ||
                     (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
                        error = ENXIO;
                        goto done;
                }
        }

        /* Prevent our unit from being unconfigured while open. */
        switch (fmt) {
        case S_IFCHR:
                cs->sc_dkdev.dk_copenmask |= pmask;
                break;

        case S_IFBLK:
                cs->sc_dkdev.dk_bopenmask |= pmask;
                break;
        }
        cs->sc_dkdev.dk_openmask =
            cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask;

 done:
        mutex_exit(&cs->sc_dvlock);
        return (error);
}

/* ARGSUSED */
static int
ccdclose(dev_t dev, int flags, int fmt, struct lwp *l)
{
        int unit = ccdunit(dev);
        struct ccd_softc *cs;
        int part;

#ifdef DEBUG
        if (ccddebug & CCDB_FOLLOW)
                printf("ccdclose(0x%"PRIx64", 0x%x)\n", dev, flags);
#endif

        if ((cs = ccdget(unit, 0)) == NULL)
                return ENXIO;

        mutex_enter(&cs->sc_dvlock);

        part = DISKPART(dev);

        /* ...that much closer to allowing unconfiguration... */
        switch (fmt) {
        case S_IFCHR:
                cs->sc_dkdev.dk_copenmask &= ~(1 << part);
                break;

        case S_IFBLK:
                cs->sc_dkdev.dk_bopenmask &= ~(1 << part);
                break;
        }
        cs->sc_dkdev.dk_openmask =
            cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask;

        if (cs->sc_dkdev.dk_openmask == 0) {
                if ((cs->sc_flags & CCDF_KLABEL) == 0)
                        cs->sc_flags &= ~CCDF_VLABEL;
        }

        mutex_exit(&cs->sc_dvlock);
        return (0);
}

static bool
ccdbackoff(struct ccd_softc *cs)
{

        /* XXX Arbitrary, should be a uvm call. */
        return uvm_availmem(true) < (uvmexp.freemin >> 1) &&
            disk_isbusy(&cs->sc_dkdev);
}

static void
ccdthread(void *cookie)
{
        struct ccd_softc *cs;

        cs = cookie;

#ifdef DEBUG
         if (ccddebug & CCDB_FOLLOW)
                 printf("ccdthread: hello\n");
#endif

        mutex_enter(cs->sc_iolock);
        while (__predict_true(!cs->sc_zap)) {
                if (bufq_peek(cs->sc_bufq) == NULL) {
                        /* Nothing to do. */
                        cv_wait(&cs->sc_push, cs->sc_iolock);
                        continue;
                }
                if (ccdbackoff(cs)) {
                        /* Wait for memory to become available. */
                        (void)cv_timedwait(&cs->sc_push, cs->sc_iolock, 1);
                        continue;
                }
#ifdef DEBUG
                 if (ccddebug & CCDB_FOLLOW)
                         printf("ccdthread: dispatching I/O\n");
#endif
                ccdstart(cs);
                mutex_enter(cs->sc_iolock);
        }
        cs->sc_thread = NULL;
        mutex_exit(cs->sc_iolock);
#ifdef DEBUG
         if (ccddebug & CCDB_FOLLOW)
                 printf("ccdthread: goodbye\n");
#endif
        kthread_exit(0);
}

static void
ccdstrategy(struct buf *bp)
{
        int unit = ccdunit(bp->b_dev);
        struct ccd_softc *cs;
        if ((cs = ccdget(unit, 0)) == NULL)
                return;

        /* Must be open or reading label. */
        KASSERT(cs->sc_dkdev.dk_openmask != 0 ||
            (cs->sc_flags & CCDF_RLABEL) != 0);

        mutex_enter(cs->sc_iolock);
        /* Synchronize with device init/uninit. */
        if (__predict_false((cs->sc_flags & CCDF_INITED) == 0)) {
                mutex_exit(cs->sc_iolock);
#ifdef DEBUG
                 if (ccddebug & CCDB_FOLLOW)
                         printf("ccdstrategy: unit %d: not inited\n", unit);
#endif
                 bp->b_error = ENXIO;
                 bp->b_resid = bp->b_bcount;
                 biodone(bp);
                return;
        }

        /* Defer to thread if system is low on memory. */
        bufq_put(cs->sc_bufq, bp);
        if (__predict_false(ccdbackoff(cs))) {
                mutex_exit(cs->sc_iolock);
#ifdef DEBUG
                 if (ccddebug & CCDB_FOLLOW)
                         printf("ccdstrategy: holding off on I/O\n");
#endif
                return;
        }
        ccdstart(cs);
}

static void
ccdstart(struct ccd_softc *cs)
{
        daddr_t blkno;
        int wlabel;
        struct disklabel *lp;
        long bcount, rcount;
        struct ccdbuf *cbp;
        char *addr;
        daddr_t bn;
        vnode_t *vp;
        buf_t *bp;

        KASSERT(mutex_owned(cs->sc_iolock));

        bp = bufq_get(cs->sc_bufq);
        KASSERT(bp != NULL);

        disk_busy(&cs->sc_dkdev);

#ifdef DEBUG
        if (ccddebug & CCDB_FOLLOW)
                printf("ccdstart(%s, %p)\n", cs->sc_xname, bp);
#endif

        /* If it's a nil transfer, wake up the top half now. */
        if (bp->b_bcount == 0)
                goto done;

        lp = cs->sc_dkdev.dk_label;

        /*
         * Do bounds checking and adjust transfer.  If there's an
         * error, the bounds check will flag that for us.  Convert
         * the partition relative block number to an absolute.
         */
        blkno = bp->b_blkno;
        wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
        if (DISKPART(bp->b_dev) != RAW_PART) {
                if (bounds_check_with_label(&cs->sc_dkdev, bp, wlabel) <= 0)
                        goto done;
                blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset;
        }
        mutex_exit(cs->sc_iolock);
        bp->b_rawblkno = blkno;

        /* Allocate the component buffers and start I/O! */
        bp->b_resid = bp->b_bcount;
        bn = bp->b_rawblkno;
        addr = bp->b_data;
        for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
                cbp = ccdbuffer(cs, bp, bn, addr, bcount);
                rcount = cbp->cb_buf.b_bcount;
                bn += btodb(rcount);
                addr += rcount;
                vp = cbp->cb_buf.b_vp;
                if ((cbp->cb_buf.b_flags & B_READ) == 0) {
                        mutex_enter(vp->v_interlock);
                        vp->v_numoutput++;
                        mutex_exit(vp->v_interlock);
                }
                (void)VOP_STRATEGY(vp, &cbp->cb_buf);
        }
        return;

 done:
        disk_unbusy(&cs->sc_dkdev, 0, 0);
        cv_broadcast(&cs->sc_stop);
        cv_broadcast(&cs->sc_push);
        mutex_exit(cs->sc_iolock);
        bp->b_resid = bp->b_bcount;
        biodone(bp);
}

/*
 * Build a component buffer header.
 */
static struct ccdbuf *
ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr,
    long bcount)
{
        struct ccdcinfo *ci;
        struct ccdbuf *cbp;
        daddr_t cbn, cboff;
        u_int64_t cbc;
        int ccdisk;

#ifdef DEBUG
        if (ccddebug & CCDB_IO)
                printf("ccdbuffer(%p, %p, %" PRId64 ", %p, %ld)\n",
                       cs, bp, bn, addr, bcount);
#endif
        /*
         * Determine which component bn falls in.
         */
        cbn = bn;
        cboff = 0;

        /*
         * Serially concatenated
         */
        if (cs->sc_ileave == 0) {
                daddr_t sblk;

                sblk = 0;
                for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk];
                    cbn >= sblk + ci->ci_size;
                    ccdisk++, ci = &cs->sc_cinfo[ccdisk])
                        sblk += ci->ci_size;
                cbn -= sblk;
        }
        /*
         * Interleaved
         */
        else {
                struct ccdiinfo *ii;
                int off;

                cboff = cbn % cs->sc_ileave;
                cbn /= cs->sc_ileave;
                for (ii = cs->sc_itable; ii->ii_ndisk; ii++)
                        if (ii->ii_startblk > cbn)
                                break;
                ii--;
                off = cbn - ii->ii_startblk;
                if (ii->ii_ndisk == 1) {
                        ccdisk = ii->ii_index[0];
                        cbn = ii->ii_startoff + off;
                } else {
                        ccdisk = ii->ii_index[off % ii->ii_ndisk];
                        cbn = ii->ii_startoff + off / ii->ii_ndisk;
                }
                cbn *= cs->sc_ileave;
                ci = &cs->sc_cinfo[ccdisk];
        }

        /*
         * Fill in the component buf structure.
         */
        cbp = CCD_GETBUF();
        KASSERT(cbp != NULL);
        buf_init(&cbp->cb_buf);
        cbp->cb_buf.b_flags = bp->b_flags;
        cbp->cb_buf.b_oflags = bp->b_oflags;
        cbp->cb_buf.b_cflags = bp->b_cflags;
        cbp->cb_buf.b_iodone = ccdiodone;
        cbp->cb_buf.b_proc = bp->b_proc;
        cbp->cb_buf.b_dev = ci->ci_dev;
        cbp->cb_buf.b_blkno = cbn + cboff;
        cbp->cb_buf.b_data = addr;
        cbp->cb_buf.b_vp = ci->ci_vp;
        cbp->cb_buf.b_objlock = ci->ci_vp->v_interlock;
        if (cs->sc_ileave == 0)
                cbc = dbtob((u_int64_t)(ci->ci_size - cbn));
        else
                cbc = dbtob((u_int64_t)(cs->sc_ileave - cboff));
        cbp->cb_buf.b_bcount = cbc < bcount ? cbc : bcount;

        /*
         * context for ccdiodone
         */
        cbp->cb_obp = bp;
        cbp->cb_sc = cs;
        cbp->cb_comp = ccdisk;

        BIO_COPYPRIO(&cbp->cb_buf, bp);

#ifdef DEBUG
        if (ccddebug & CCDB_IO)
                printf(" dev 0x%"PRIx64"(u%lu): cbp %p bn %" PRId64 " addr %p"
                       " bcnt %d\n",
                    ci->ci_dev, (unsigned long) (ci-cs->sc_cinfo), cbp,
                    cbp->cb_buf.b_blkno, cbp->cb_buf.b_data,
                    cbp->cb_buf.b_bcount);
#endif

        return (cbp);
}

/*
 * Called at interrupt time.
 * Mark the component as done and if all components are done,
 * take a ccd interrupt.
 */
static void
ccdiodone(struct buf *vbp)
{
        struct ccdbuf *cbp = (struct ccdbuf *) vbp;
        struct buf *bp = cbp->cb_obp;
        struct ccd_softc *cs = cbp->cb_sc;
        int count;

#ifdef DEBUG
        if (ccddebug & CCDB_FOLLOW)
                printf("ccdiodone(%p)\n", cbp);
        if (ccddebug & CCDB_IO) {
                printf("ccdiodone: bp %p bcount %d resid %d\n",
                       bp, bp->b_bcount, bp->b_resid);
                printf(" dev 0x%"PRIx64"(u%d), cbp %p bn %" PRId64 " addr %p"
                       " bcnt %d\n",
                       cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
                       cbp->cb_buf.b_blkno, cbp->cb_buf.b_data,
                       cbp->cb_buf.b_bcount);
        }
#endif

        if (cbp->cb_buf.b_error != 0) {
                bp->b_error = cbp->cb_buf.b_error;
                printf("%s: error %d on component %d\n",
                       cs->sc_xname, bp->b_error, cbp->cb_comp);
        }
        count = cbp->cb_buf.b_bcount;
        buf_destroy(&cbp->cb_buf);
        CCD_PUTBUF(cbp);

        /*
         * If all done, "interrupt".
         */
        mutex_enter(cs->sc_iolock);
        bp->b_resid -= count;
        if (bp->b_resid < 0)
                panic("ccdiodone: count");
        if (bp->b_resid == 0) {
                /*
                 * Request is done for better or worse, wakeup the top half.
                 */
                if (bp->b_error != 0)
                        bp->b_resid = bp->b_bcount;
                disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid),
                    (bp->b_flags & B_READ));
                if (!disk_isbusy(&cs->sc_dkdev)) {
                        if (bufq_peek(cs->sc_bufq) != NULL) {
                                cv_broadcast(&cs->sc_push);
                        }
                        cv_broadcast(&cs->sc_stop);
                }
                mutex_exit(cs->sc_iolock);
                biodone(bp);
        } else
                mutex_exit(cs->sc_iolock);
}

/* ARGSUSED */
static int
ccdread(dev_t dev, struct uio *uio, int flags)
{
        int unit = ccdunit(dev);
        struct ccd_softc *cs;

#ifdef DEBUG
        if (ccddebug & CCDB_FOLLOW)
                printf("ccdread(0x%"PRIx64", %p)\n", dev, uio);
#endif
        if ((cs = ccdget(unit, 0)) == NULL)
                return 0;

        /* Unlocked advisory check, ccdstrategy check is synchronous. */
        if ((cs->sc_flags & CCDF_INITED) == 0)
                return (ENXIO);

        return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio));
}

/* ARGSUSED */
static int
ccdwrite(dev_t dev, struct uio *uio, int flags)
{
        int unit = ccdunit(dev);
        struct ccd_softc *cs;

#ifdef DEBUG
        if (ccddebug & CCDB_FOLLOW)
                printf("ccdwrite(0x%"PRIx64", %p)\n", dev, uio);
#endif
        if ((cs = ccdget(unit, 0)) == NULL)
                return ENOENT;

        /* Unlocked advisory check, ccdstrategy check is synchronous. */
        if ((cs->sc_flags & CCDF_INITED) == 0)
                return (ENXIO);

        return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio));
}

int (*compat_ccd_ioctl_60)(dev_t, u_long, void *, int, struct lwp *,
    int (*)(dev_t, u_long, void *, int, struct lwp *)) = (void *)enosys;

static int
ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        int unit = ccdunit(dev);
        int i, j, lookedup = 0, error = 0;
        int part, pmask, make, hook;
        struct ccd_softc *cs;
        struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
        kauth_cred_t uc;
        char **cpp;
        struct pathbuf *pb;
        struct vnode **vpp;
#ifdef __HAVE_OLD_DISKLABEL
        struct disklabel newlabel;
#endif

        switch (cmd) {
        case CCDIOCSET:
                make = 1;
                break;
        default:
                MODULE_HOOK_CALL(ccd_ioctl_60_hook,
                                 (0, cmd, NULL, 0, NULL, NULL),
                                 enosys(), hook);
                if (hook == 0)
                        make = 1;
                else
                        make = 0;
                break;
        }

        if ((cs = ccdget(unit, make)) == NULL)
                return ENOENT;
        uc = kauth_cred_get();

        MODULE_HOOK_CALL(ccd_ioctl_60_hook,
                         (dev, cmd, data, flag, l, ccdioctl),
                         enosys(), error);
        if (error != ENOSYS)
                return error;

        /* Must be open for writes for these commands... */
        switch (cmd) {
        case CCDIOCSET:
        case CCDIOCCLR:
        case DIOCSDINFO:
        case DIOCWDINFO:
        case DIOCCACHESYNC:
        case DIOCAWEDGE:
        case DIOCDWEDGE:
        case DIOCRMWEDGES:
        case DIOCMWEDGES:
#ifdef __HAVE_OLD_DISKLABEL
        case ODIOCSDINFO:
        case ODIOCWDINFO:
#endif
        case DIOCKLABEL:
        case DIOCWLABEL:
                if ((flag & FWRITE) == 0)
                        return (EBADF);
        }

        /* Must be initialized for these... */
        switch (cmd) {
        case CCDIOCCLR:
        case DIOCGDINFO:
        case DIOCGSTRATEGY:
        case DIOCGCACHE:
        case DIOCCACHESYNC:
        case DIOCAWEDGE:
        case DIOCDWEDGE:
        case DIOCLWEDGES:
        case DIOCMWEDGES:
        case DIOCSDINFO:
        case DIOCWDINFO:
        case DIOCGPARTINFO:
        case DIOCWLABEL:
        case DIOCKLABEL:
        case DIOCGDEFLABEL:
#ifdef __HAVE_OLD_DISKLABEL
        case ODIOCGDINFO:
        case ODIOCSDINFO:
        case ODIOCWDINFO:
        case ODIOCGDEFLABEL:
#endif
                if ((cs->sc_flags & CCDF_INITED) == 0)
                        return ENXIO;
        }

        error = disk_ioctl(&cs->sc_dkdev, dev, cmd, data, flag, l);
        if (error != EPASSTHROUGH)
                return error;

        switch (cmd) {
        case DIOCGSTRATEGY:
            {
                struct disk_strategy *dks = (void *)data;

                mutex_enter(cs->sc_iolock);
                if (cs->sc_bufq != NULL)
                        strlcpy(dks->dks_name,
                            bufq_getstrategyname(cs->sc_bufq),
                            sizeof(dks->dks_name));
                else
                        error = EINVAL;
                mutex_exit(cs->sc_iolock);
                dks->dks_paramlen = 0;
                break;
            }

        case DIOCWDINFO:
        case DIOCSDINFO:
#ifdef __HAVE_OLD_DISKLABEL
        case ODIOCWDINFO:
        case ODIOCSDINFO:
#endif
        {
                struct disklabel *lp;
#ifdef __HAVE_OLD_DISKLABEL
                if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
                        memset(&newlabel, 0, sizeof newlabel);
                        memcpy(&newlabel, data, sizeof (struct olddisklabel));
                        lp = &newlabel;
                } else
#endif
                lp = (struct disklabel *)data;

                cs->sc_flags |= CCDF_LABELLING;

                error = setdisklabel(cs->sc_dkdev.dk_label,
                    lp, 0, cs->sc_dkdev.dk_cpulabel);
                if (error == 0) {
                        if (cmd == DIOCWDINFO
#ifdef __HAVE_OLD_DISKLABEL
                            || cmd == ODIOCWDINFO
#endif
                           )
                                error = writedisklabel(CCDLABELDEV(dev),
                                    ccdstrategy, cs->sc_dkdev.dk_label,
                                    cs->sc_dkdev.dk_cpulabel);
                }

                cs->sc_flags &= ~CCDF_LABELLING;
                break;
        }

        case DIOCKLABEL:
                if (*(int *)data != 0)
                        cs->sc_flags |= CCDF_KLABEL;
                else
                        cs->sc_flags &= ~CCDF_KLABEL;
                break;

        case DIOCWLABEL:
                if (*(int *)data != 0)
                        cs->sc_flags |= CCDF_WLABEL;
                else
                        cs->sc_flags &= ~CCDF_WLABEL;
                break;

        case DIOCGDEFLABEL:
                ccdgetdefaultlabel(cs, (struct disklabel *)data);
                break;

#ifdef __HAVE_OLD_DISKLABEL
        case ODIOCGDEFLABEL:
                ccdgetdefaultlabel(cs, &newlabel);
                if (newlabel.d_npartitions > OLDMAXPARTITIONS)
                        return ENOTTY;
                memcpy(data, &newlabel, sizeof (struct olddisklabel));
                break;
#endif
        default:
                error = ENOTTY;
                        break;
        }

        if (error != ENOTTY)
                return error;

        mutex_enter(&cs->sc_dvlock);

        error = 0;
        switch (cmd) {
        case CCDIOCSET:
                if (cs->sc_flags & CCDF_INITED) {
                        error = EBUSY;
                        goto out;
                }

                /* Validate the flags. */
                if ((ccio->ccio_flags & CCDF_USERMASK) != ccio->ccio_flags) {
                        error = EINVAL;
                        goto out;
                }

                if (ccio->ccio_ndisks > CCD_MAXNDISKS ||
                    ccio->ccio_ndisks == 0) {
                        error = EINVAL;
                        goto out;
                }

                /* Fill in some important bits. */
                cs->sc_ileave = ccio->ccio_ileave;
                cs->sc_nccdisks = ccio->ccio_ndisks;
                cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK;

                /*
                 * Allocate space for and copy in the array of
                 * component pathnames and device numbers.
                 */
                cpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*cpp), KM_SLEEP);
                vpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*vpp), KM_SLEEP);
                error = copyin(ccio->ccio_disks, cpp,
                    ccio->ccio_ndisks * sizeof(*cpp));
                if (error) {
                        kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp));
                        kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp));
                        goto out;
                }

#ifdef DEBUG
                if (ccddebug & CCDB_INIT)
                        for (i = 0; i < ccio->ccio_ndisks; ++i)
                                printf("ccdioctl: component %d: %p\n",
                                    i, cpp[i]);
#endif

                for (i = 0; i < ccio->ccio_ndisks; ++i) {
#ifdef DEBUG
                        if (ccddebug & CCDB_INIT)
                                printf("ccdioctl: lookedup = %d\n", lookedup);
#endif
                        error = pathbuf_copyin(cpp[i], &pb);
                        if (error == 0) {
                                error = vn_bdev_openpath(pb, &vpp[i], l);
                                pathbuf_destroy(pb);
                        }
                        if (error != 0) {
                                for (j = 0; j < lookedup; ++j)
                                        (void)vn_close(vpp[j], FREAD|FWRITE,
                                            uc);
                                kmem_free(vpp, ccio->ccio_ndisks *
                                    sizeof(*vpp));
                                kmem_free(cpp, ccio->ccio_ndisks *
                                    sizeof(*cpp));

                                /*
                                 * No component data is allocated,
                                 * nothing is to be freed.
                                */
                                cs->sc_nccdisks = 0;
                                goto out;
                        }
                        ++lookedup;
                }

                /* Attach the disk. */
                disk_attach(&cs->sc_dkdev);
                bufq_alloc(&cs->sc_bufq, "fcfs", 0);

                /*
                 * Initialize the ccd.  Fills in the softc for us.
                 */
                if ((error = ccdinit(cs, cpp, vpp, l)) != 0) {
                        for (j = 0; j < lookedup; ++j)
                                (void)vn_close(vpp[j], FREAD|FWRITE,
                                    uc);
                        kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp));
                        kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp));
                        disk_detach(&cs->sc_dkdev);
                        mutex_exit(&cs->sc_dvlock);
                        bufq_free(cs->sc_bufq);
                        return error;
                }

                /* We can free the temporary variables now. */
                kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp));
                kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp));

                /*
                 * The ccd has been successfully initialized, so
                 * we can place it into the array.  Don't try to
                 * read the disklabel until the disk has been attached,
                 * because space for the disklabel is allocated
                 * in disk_attach();
                 */
                ccio->ccio_unit = unit;
                ccio->ccio_size = cs->sc_size;

                /* Try and read the disklabel. */
                ccdgetdisklabel(dev);
                disk_set_info(NULL, &cs->sc_dkdev, NULL);

                /* discover wedges */
                mutex_exit(&cs->sc_dvlock);
                dkwedge_discover(&cs->sc_dkdev);
                return 0;

        case CCDIOCCLR:
                /*
                 * Don't unconfigure if any other partitions are open
                 * or if both the character and block flavors of this
                 * partition are open.
                 */
                part = DISKPART(dev);
                pmask = (1 << part);
                if ((cs->sc_dkdev.dk_openmask & ~pmask) ||
                    ((cs->sc_dkdev.dk_bopenmask & pmask) &&
                    (cs->sc_dkdev.dk_copenmask & pmask))) {
                        error = EBUSY;
                        goto out;
                }

                /* Delete all of our wedges. */
                dkwedge_delall(&cs->sc_dkdev);

                /* Stop new I/O, wait for in-flight I/O to complete. */
                mutex_enter(cs->sc_iolock);
                cs->sc_flags &= ~(CCDF_INITED|CCDF_VLABEL);
                cs->sc_zap = true;
                while (disk_isbusy(&cs->sc_dkdev) ||
                    bufq_peek(cs->sc_bufq) != NULL ||
                    cs->sc_thread != NULL) {
                        cv_broadcast(&cs->sc_push);
                        (void)cv_timedwait(&cs->sc_stop, cs->sc_iolock, hz);
                }
                mutex_exit(cs->sc_iolock);

                /*
                 * Free ccd_softc information and clear entry.
                 */

                /* Close the components and free their pathnames. */
                for (i = 0; i < cs->sc_nccdisks; ++i) {
                        /*
                         * XXX: this close could potentially fail and
                         * cause Bad Things.  Maybe we need to force
                         * the close to happen?
                         */
#ifdef DEBUG
                        if (ccddebug & CCDB_VNODE)
                                vprint("CCDIOCCLR: vnode info",
                                    cs->sc_cinfo[i].ci_vp);
#endif
                        (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE,
                            uc);
                        kmem_free(cs->sc_cinfo[i].ci_path,
                            cs->sc_cinfo[i].ci_pathlen);
                }

                if (cs->sc_nccdisks != 0) {
                        /* Free interleave index. */
                        for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) {
                                kmem_free(cs->sc_itable[i].ii_index,
                                    cs->sc_itable[i].ii_indexsz);
                        }
                        /* Free component info and interleave table. */
                        kmem_free(cs->sc_cinfo, cs->sc_nccdisks *
                            sizeof(struct ccdcinfo));
                        kmem_free(cs->sc_itable, (cs->sc_nccdisks + 1) *
                            sizeof(struct ccdiinfo));
                }

                aprint_normal("%s: detached\n", cs->sc_xname);

                /* Detach the disk. */
                disk_detach(&cs->sc_dkdev);
                bufq_free(cs->sc_bufq);

                /* also releases sc_dvlock */
                ccdput(cs);

                /* Don't break, otherwise cs is read again. */
                return 0;

        case DIOCGCACHE:
            {
                int dkcache = 0;

                /*
                 * We pass this call down to all components and report
                 * intersection of the flags returned by the components.
                 * If any errors out, we return error. CCD components
                 * can not change unless the device is unconfigured, so
                 * device feature flags will remain static. RCE/WCE can change
                 * of course, if set directly on underlying device.
                 */
                for (error = 0, i = 0; i < cs->sc_nccdisks; i++) {
                        error = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, &j,
                                      flag, uc);
                        if (error)
                                break;

                        if (i == 0)
                                dkcache = j;
                        else
                                dkcache = DKCACHE_COMBINE(dkcache, j);
                }

                *((int *)data) = dkcache;
                break;
            }

        case DIOCCACHESYNC:
                /*
                 * We pass this call down to all components and report
                 * the first error we encounter.
                 */
                for (error = 0, i = 0; i < cs->sc_nccdisks; i++) {
                        j = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, data,
                                      flag, uc);
                        if (j != 0 && error == 0)
                                error = j;
                }
                break;

default:
        error = ENOTTY;
                break;
        }

 out:
        mutex_exit(&cs->sc_dvlock);
        return (error);
}

static int
ccdsize(dev_t dev)
{
        struct ccd_softc *cs;
        struct disklabel *lp;
        int part, unit, omask, size;

        unit = ccdunit(dev);
        if ((cs = ccdget(unit, 0)) == NULL)
                return -1;

        if ((cs->sc_flags & CCDF_INITED) == 0)
                return (-1);

        part = DISKPART(dev);
        omask = cs->sc_dkdev.dk_openmask & (1 << part);
        lp = cs->sc_dkdev.dk_label;

        if (omask == 0 && ccdopen(dev, 0, S_IFBLK, curlwp))
                return (-1);

        if (lp->d_partitions[part].p_fstype != FS_SWAP)
                size = -1;
        else
                size = lp->d_partitions[part].p_size *
                    (lp->d_secsize / DEV_BSIZE);

        if (omask == 0 && ccdclose(dev, 0, S_IFBLK, curlwp))
                return (-1);

        return (size);
}

static void
ccdgetdefaultlabel(struct ccd_softc *cs, struct disklabel *lp)
{
        struct ccdgeom *ccg = &cs->sc_geom;

        memset(lp, 0, sizeof(*lp));

        if (cs->sc_size > UINT32_MAX)
                lp->d_secperunit = UINT32_MAX;
        else
                lp->d_secperunit = cs->sc_size;
        lp->d_secsize = ccg->ccg_secsize;
        lp->d_nsectors = ccg->ccg_nsectors;
        lp->d_ntracks = ccg->ccg_ntracks;
        lp->d_ncylinders = ccg->ccg_ncylinders;
        lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;

        strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
        lp->d_type = DKTYPE_CCD;
        strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
        lp->d_rpm = 3600;
        lp->d_interleave = 1;
        lp->d_flags = 0;

        lp->d_partitions[RAW_PART].p_offset = 0;
        lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
        lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
        lp->d_npartitions = RAW_PART + 1;

        lp->d_magic = DISKMAGIC;
        lp->d_magic2 = DISKMAGIC;
        lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label);
}

/*
 * Read the disklabel from the ccd.  If one is not present, fake one
 * up.
 */
static void
ccdgetdisklabel(dev_t dev)
{
        int unit = ccdunit(dev);
        struct ccd_softc *cs;
        const char *errstring;
        struct disklabel *lp;
        struct cpu_disklabel *clp;

        if ((cs = ccdget(unit, 0)) == NULL)
                return;
        lp = cs->sc_dkdev.dk_label;
        clp = cs->sc_dkdev.dk_cpulabel;
        KASSERT(mutex_owned(&cs->sc_dvlock));

        memset(clp, 0, sizeof(*clp));

        ccdgetdefaultlabel(cs, lp);

        /*
         * Call the generic disklabel extraction routine.
         */
        cs->sc_flags |= CCDF_RLABEL;
        if ((cs->sc_flags & CCDF_NOLABEL) != 0)
                errstring = "CCDF_NOLABEL set; ignoring on-disk label";
        else
                errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy,
                    cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel);
        if (errstring)
                ccdmakedisklabel(cs);
        else {
                int i;
                struct partition *pp;

                /*
                 * Sanity check whether the found disklabel is valid.
                 *
                 * This is necessary since total size of ccd may vary
                 * when an interleave is changed even though exactly
                 * same componets are used, and old disklabel may used
                 * if that is found.
                 */
                if (lp->d_secperunit < UINT32_MAX ?
                        lp->d_secperunit != cs->sc_size :
                        lp->d_secperunit > cs->sc_size)
                        printf("WARNING: %s: "
                            "total sector size in disklabel (%ju) != "
                            "the size of ccd (%ju)\n", cs->sc_xname,
                            (uintmax_t)lp->d_secperunit,
                            (uintmax_t)cs->sc_size);
                for (i = 0; i < lp->d_npartitions; i++) {
                        pp = &lp->d_partitions[i];
                        if (pp->p_offset + pp->p_size > cs->sc_size)
                                printf("WARNING: %s: end of partition `%c' "
                                    "exceeds the size of ccd (%ju)\n",
                                    cs->sc_xname, 'a' + i, (uintmax_t)cs->sc_size);
                }
        }

#ifdef DEBUG
        /* It's actually extremely common to have unlabeled ccds. */
        if (ccddebug & CCDB_LABEL)
                if (errstring != NULL)
                        printf("%s: %s\n", cs->sc_xname, errstring);
#endif

        /* In-core label now valid. */
        cs->sc_flags = (cs->sc_flags | CCDF_VLABEL) & ~CCDF_RLABEL;
}

/*
 * Take care of things one might want to take care of in the event
 * that a disklabel isn't present.
 */
static void
ccdmakedisklabel(struct ccd_softc *cs)
{
        struct disklabel *lp = cs->sc_dkdev.dk_label;

        /*
         * For historical reasons, if there's no disklabel present
         * the raw partition must be marked FS_BSDFFS.
         */
        lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;

        strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));

        lp->d_checksum = dkcksum(lp);
}

#ifdef DEBUG
static void
printiinfo(struct ccdiinfo *ii)
{
        int ix, i;

        for (ix = 0; ii->ii_ndisk; ix++, ii++) {
                printf(" itab[%d]: #dk %d sblk %" PRId64 " soff %" PRId64,
                    ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
                for (i = 0; i < ii->ii_ndisk; i++)
                        printf(" %d", ii->ii_index[i]);
                printf("\n");
        }
}
#endif

MODULE(MODULE_CLASS_DRIVER, ccd, "dk_subr,bufq_fcfs");

static int
ccd_modcmd(modcmd_t cmd, void *arg)
{
        int error = 0;
#ifdef _MODULE
        int bmajor = -1, cmajor = -1;
#endif


        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                ccdattach(0);

                error = devsw_attach("ccd", &ccd_bdevsw, &bmajor,
                    &ccd_cdevsw, &cmajor);
#endif
                break;

        case MODULE_CMD_FINI:
#ifdef _MODULE
                mutex_enter(&ccd_lock);
                if (!LIST_EMPTY(&ccds)) {
                        mutex_exit(&ccd_lock);
                        error = EBUSY;
                } else {
                        mutex_exit(&ccd_lock);
                        devsw_detach(&ccd_bdevsw, &ccd_cdevsw);
                        ccddetach();
                }
#endif
                break;

        case MODULE_CMD_STAT:
                return ENOTTY;

        default:
                return ENOTTY;
        }

        return error;
}

static int
ccd_units_sysctl(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        struct ccd_softc *sc;
        int error, i, nccd, *units;
        size_t size;

        nccd = 0;
        mutex_enter(&ccd_lock);
        LIST_FOREACH(sc, &ccds, sc_link)
                nccd++;
        mutex_exit(&ccd_lock);

        if (nccd != 0) {
                size = nccd * sizeof(*units);
                units = kmem_zalloc(size, KM_SLEEP);
                i = 0;
                mutex_enter(&ccd_lock);
                LIST_FOREACH(sc, &ccds, sc_link) {
                        if (i >= nccd)
                                break;
                        units[i] = sc->sc_unit;
                }
                mutex_exit(&ccd_lock);
        } else {
                units = NULL;
                size = 0;
        }

        node = *rnode;
        node.sysctl_data = units;
        node.sysctl_size = size;

        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (units)
                kmem_free(units, size);
        return error;
}

static int
ccd_info_sysctl(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        struct ccddiskinfo ccd;
        struct ccd_softc *sc;
        int unit;

        if (newp == NULL || newlen != sizeof(int))
                return EINVAL;

        unit = *(const int *)newp;
        newp = NULL;
        newlen = 0;
        ccd.ccd_ndisks = ~0;
        mutex_enter(&ccd_lock);
        LIST_FOREACH(sc, &ccds, sc_link) {
                if (sc->sc_unit == unit) {
                        ccd.ccd_ileave = sc->sc_ileave;
                        ccd.ccd_size = sc->sc_size;
                        ccd.ccd_ndisks = sc->sc_nccdisks;
                        ccd.ccd_flags = sc->sc_flags;
                        break;
                }
        }
        mutex_exit(&ccd_lock);

        if (ccd.ccd_ndisks == ~0)
                return ENOENT;

        node = *rnode;
        node.sysctl_data = &ccd;
        node.sysctl_size = sizeof(ccd);

        return sysctl_lookup(SYSCTLFN_CALL(&node));
}

static int
ccd_components_sysctl(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int error, unit;
        size_t size;
        char *names, *p, *ep;
        struct ccd_softc *sc;

        if (newp == NULL || newlen != sizeof(int))
                return EINVAL;

        size = 0;
        unit = *(const int *)newp;
        newp = NULL;
        newlen = 0;
        mutex_enter(&ccd_lock);
        LIST_FOREACH(sc, &ccds, sc_link)
                if (sc->sc_unit == unit) {
                        for (size_t i = 0; i < sc->sc_nccdisks; i++)
                                size += strlen(sc->sc_cinfo[i].ci_path) + 1;
                        break;
                }
        mutex_exit(&ccd_lock);

        if (size == 0)
                return ENOENT;
        names = kmem_zalloc(size, KM_SLEEP);
        p = names;
        ep = names + size;
        mutex_enter(&ccd_lock);
        LIST_FOREACH(sc, &ccds, sc_link)
                if (sc->sc_unit == unit) {
                        for (size_t i = 0; i < sc->sc_nccdisks; i++) {
                                char *d = sc->sc_cinfo[i].ci_path;
                                while (p < ep && (*p++ = *d++) != '\0')
                                        continue;
                        }
                        break;
                }
        mutex_exit(&ccd_lock);

        node = *rnode;
        node.sysctl_data = names;
        node.sysctl_size = ep - names;

        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        kmem_free(names, size);
        return error;
}

SYSCTL_SETUP(sysctl_kern_ccd_setup, "sysctl kern.ccd subtree setup")
{
        const struct sysctlnode *node = NULL;

        sysctl_createv(clog, 0, NULL, &node,
            CTLFLAG_PERMANENT,
            CTLTYPE_NODE, "ccd",
            SYSCTL_DESCR("ConCatenated Disk state"),
            NULL, 0, NULL, 0,
            CTL_KERN, CTL_CREATE, CTL_EOL);

        if (node == NULL)
                return;

        sysctl_createv(clog, 0, &node, NULL,
            CTLFLAG_PERMANENT | CTLFLAG_READONLY,
            CTLTYPE_STRUCT, "units",
            SYSCTL_DESCR("List of ccd unit numbers"),
            ccd_units_sysctl, 0, NULL, 0,
            CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
            CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
            CTLTYPE_STRUCT, "info",
            SYSCTL_DESCR("Information about a CCD unit"),
            ccd_info_sysctl, 0, NULL, 0,
            CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
            CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
            CTLTYPE_STRUCT, "components",
            SYSCTL_DESCR("Information about CCD components"),
            ccd_components_sysctl, 0, NULL, 0,
            CTL_CREATE, CTL_EOL);
}


















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 




































    1 
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
/* $NetBSD: video.c,v 1.45 2022/03/03 06:23:25 riastradh Exp $ */

/*
 * Copyright (c) 2008 Patrick Mahoney <pat@polycrystal.org>
 * All rights reserved.
 *
 * This code was written by Patrick Mahoney (pat@polycrystal.org) as
 * part of Google Summer of Code 2008.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * This ia a Video4Linux 2 compatible /dev/video driver for NetBSD
 *
 * See http://v4l2spec.bytesex.org/ for Video4Linux 2 specifications
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: video.c,v 1.45 2022/03/03 06:23:25 riastradh Exp $");

#include "video.h"
#if NVIDEO > 0

#include <sys/param.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/poll.h>
#include <sys/select.h>
#include <sys/kmem.h>
#include <sys/pool.h>
#include <sys/conf.h>
#include <sys/types.h>
#include <sys/device.h>
#include <sys/condvar.h>
#include <sys/queue.h>
#include <sys/videoio.h>

#include <dev/video_if.h>

#include "ioconf.h"

/* #define VIDEO_DEBUG 1 */

#ifdef VIDEO_DEBUG
#define        DPRINTF(x)        do { if (videodebug) printf x; } while (0)
#define        DPRINTFN(n,x)        do { if (videodebug>(n)) printf x; } while (0)
int        videodebug = VIDEO_DEBUG;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

#define PAGE_ALIGN(a)                (((a) + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))

#define VIDEO_DRIVER_VERSION                                \
        (((__NetBSD_Version__ / 100000000) << 16) |        \
         ((__NetBSD_Version__ / 1000000 % 100) << 8) |        \
         (__NetBSD_Version__ / 100 % 100))

/* TODO: move to sys/intr.h */
#define IPL_VIDEO        IPL_VM
#define splvideo()        splvm()

#define VIDEO_MIN_BUFS 2
#define VIDEO_MAX_BUFS 32
#define VIDEO_NUM_BUFS 4

/* Scatter Buffer - an array of fixed size (PAGE_SIZE) chunks
 * allocated non-contiguously and functions to get data into and out
 * of the scatter buffer. */
struct scatter_buf {
        pool_cache_t        sb_pool;
        size_t                sb_size;    /* size in bytes */
        size_t                sb_npages;  /* number of pages */
        uint8_t                **sb_page_ary; /* array of page pointers */
};

struct scatter_io {
        struct scatter_buf *sio_buf;
        off_t                sio_offset;
        size_t                sio_resid;
};

static void        scatter_buf_init(struct scatter_buf *);
static void        scatter_buf_destroy(struct scatter_buf *);
static int        scatter_buf_set_size(struct scatter_buf *, size_t);
static paddr_t        scatter_buf_map(struct scatter_buf *, off_t);

static bool        scatter_io_init(struct scatter_buf *, off_t, size_t, struct scatter_io *);
static bool        scatter_io_next(struct scatter_io *, void **, size_t *);
static void        scatter_io_undo(struct scatter_io *, size_t);
static void        scatter_io_copyin(struct scatter_io *, const void *);
/* static void        scatter_io_copyout(struct scatter_io *, void *); */
static int        scatter_io_uiomove(struct scatter_io *, struct uio *);


enum video_stream_method {
        VIDEO_STREAM_METHOD_NONE,
        VIDEO_STREAM_METHOD_READ,
        VIDEO_STREAM_METHOD_MMAP,
        VIDEO_STREAM_METHOD_USERPTR
};

struct video_buffer {
        struct v4l2_buffer                *vb_buf;
        SIMPLEQ_ENTRY(video_buffer)        entries;
};

SIMPLEQ_HEAD(sample_queue, video_buffer);

struct video_stream {
        int                        vs_flags; /* flags given to open() */

        struct video_format        vs_format;

        int                        vs_frameno; /* toggles between 0 and 1,
                                             * or -1 if new */
        uint32_t                vs_sequence; /* absolute frame/sample number in
                                              * sequence, wraps around */
        bool                        vs_drop; /* drop payloads from current
                                          * frameno? */

        enum v4l2_buf_type        vs_type;
        uint8_t                        vs_nbufs;
        struct video_buffer        **vs_buf;

        struct scatter_buf        vs_data; /* stores video data for MMAP
                                          * and READ */

        /* Video samples may exist in different locations.  Initially,
         * samples are queued into the ingress queue.  The driver
         * grabs these in turn and fills them with video data.  Once
         * filled, they are moved to the egress queue.  Samples are
         * dequeued either by user with MMAP method or, with READ
         * method, videoread() works from the first sample in the
         * ingress queue without dequeing.  In the first case, the
         * user re-queues the buffer when finished, and videoread()
         * does the same when all data has been read.  The sample now
         * returns to the ingress queue. */
        struct sample_queue        vs_ingress; /* samples under driver control */
        struct sample_queue        vs_egress; /* samples headed for userspace */

        bool                        vs_streaming;
        enum video_stream_method vs_method; /* method by which
                                             * userspace will read
                                             * samples */

        kmutex_t                vs_lock; /* Lock to manipulate queues.
                                          * Should also be held when
                                          * changing number of
                                          * buffers. */
        kcondvar_t                vs_sample_cv; /* signaled on new
                                               * ingress sample */
        struct selinfo                vs_sel;

        uint32_t                vs_bytesread; /* bytes read() from current
                                               * sample thus far */
};

struct video_softc {
        device_t        sc_dev;
        device_t        hw_dev;                   /* Hardware (parent) device */
        void *                hw_softc;         /* Hardware device private softc */
        const struct video_hw_if *hw_if; /* Hardware interface */

        u_int                sc_open;
        int                sc_refcnt;
        int                sc_opencnt;
        bool                sc_dying;

        struct video_stream sc_stream_in;
};
static int        video_print(void *, const char *);

static int        video_match(device_t, cfdata_t, void *);
static void        video_attach(device_t, device_t, void *);
static int        video_detach(device_t, int);
static int        video_activate(device_t, enum devact);

dev_type_open(videoopen);
dev_type_close(videoclose);
dev_type_read(videoread);
dev_type_write(videowrite);
dev_type_ioctl(videoioctl);
dev_type_poll(videopoll);
dev_type_mmap(videommap);

const struct cdevsw video_cdevsw = {
        .d_open = videoopen,
        .d_close = videoclose,
        .d_read = videoread,
        .d_write = videowrite,
        .d_ioctl = videoioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = videopoll,
        .d_mmap = videommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

#define VIDEOUNIT(n)        (minor(n))

CFATTACH_DECL_NEW(video, sizeof(struct video_softc),
                  video_match, video_attach, video_detach, video_activate);

static const char *        video_pixel_format_str(enum video_pixel_format);

/* convert various values from V4L2 to native values of this driver */
static uint16_t        v4l2id_to_control_id(uint32_t);
static uint32_t control_flags_to_v4l2flags(uint32_t);
static enum v4l2_ctrl_type control_type_to_v4l2type(enum video_control_type);

static void        v4l2_format_to_video_format(const struct v4l2_format *,
                                            struct video_format *);
static void        video_format_to_v4l2_format(const struct video_format *,
                                            struct v4l2_format *);
static void        v4l2_standard_to_video_standard(v4l2_std_id,
                                                enum video_standard *);
static void        video_standard_to_v4l2_standard(enum video_standard,
                                                struct v4l2_standard *);
static void        v4l2_input_to_video_input(const struct v4l2_input *,
                                          struct video_input *);
static void        video_input_to_v4l2_input(const struct video_input *,
                                          struct v4l2_input *);
static void        v4l2_audio_to_video_audio(const struct v4l2_audio *,
                                          struct video_audio *);
static void        video_audio_to_v4l2_audio(const struct video_audio *,
                                          struct v4l2_audio *);
static void        v4l2_tuner_to_video_tuner(const struct v4l2_tuner *,
                                          struct video_tuner *);
static void        video_tuner_to_v4l2_tuner(const struct video_tuner *,
                                          struct v4l2_tuner *);

/* V4L2 api functions, typically called from videoioctl() */
static int        video_enum_format(struct video_softc *, struct v4l2_fmtdesc *);
static int        video_get_format(struct video_softc *,
                                 struct v4l2_format *);
static int        video_set_format(struct video_softc *,
                                 struct v4l2_format *);
static int        video_try_format(struct video_softc *,
                                 struct v4l2_format *);
static int        video_get_parm(struct video_softc *,
                               struct v4l2_streamparm *);
static int        video_set_parm(struct video_softc *,
                               struct v4l2_streamparm *);
static int        video_enum_standard(struct video_softc *,
                                    struct v4l2_standard *);
static int        video_get_standard(struct video_softc *, v4l2_std_id *);
static int        video_set_standard(struct video_softc *, v4l2_std_id);
static int        video_enum_input(struct video_softc *, struct v4l2_input *);
static int        video_get_input(struct video_softc *, int *);
static int        video_set_input(struct video_softc *, int);
static int        video_enum_audio(struct video_softc *, struct v4l2_audio *);
static int        video_get_audio(struct video_softc *, struct v4l2_audio *);
static int        video_set_audio(struct video_softc *, struct v4l2_audio *);
static int        video_get_tuner(struct video_softc *, struct v4l2_tuner *);
static int        video_set_tuner(struct video_softc *, struct v4l2_tuner *);
static int        video_get_frequency(struct video_softc *,
                                    struct v4l2_frequency *);
static int        video_set_frequency(struct video_softc *,
                                    struct v4l2_frequency *);
static int        video_query_control(struct video_softc *,
                                    struct v4l2_queryctrl *);
static int        video_get_control(struct video_softc *,
                                  struct v4l2_control *);
static int        video_set_control(struct video_softc *,
                                  const struct v4l2_control *);
static int        video_request_bufs(struct video_softc *,
                                   struct v4l2_requestbuffers *);
static int        video_query_buf(struct video_softc *, struct v4l2_buffer *);
static int        video_queue_buf(struct video_softc *, struct v4l2_buffer *);
static int        video_dequeue_buf(struct video_softc *, struct v4l2_buffer *);
static int        video_stream_on(struct video_softc *, enum v4l2_buf_type);
static int        video_stream_off(struct video_softc *, enum v4l2_buf_type);

static struct video_buffer *        video_buffer_alloc(void);
static void                        video_buffer_free(struct video_buffer *);


/* functions for video_stream */
static void        video_stream_init(struct video_stream *);
static void        video_stream_fini(struct video_stream *);

static int        video_stream_setup_bufs(struct video_stream *,
                                        enum video_stream_method,
                                        uint8_t);
static void        video_stream_teardown_bufs(struct video_stream *);

static int        video_stream_realloc_bufs(struct video_stream *, uint8_t);
#define                video_stream_free_bufs(vs) \
        video_stream_realloc_bufs((vs), 0)

static void        video_stream_enqueue(struct video_stream *,
                                     struct video_buffer *);
static struct video_buffer * video_stream_dequeue(struct video_stream *);
static void        video_stream_write(struct video_stream *,
                                   const struct video_payload *);
static void        video_stream_sample_done(struct video_stream *);

#ifdef VIDEO_DEBUG
/* debugging */
static const char *        video_ioctl_str(u_long);
#endif


static int
video_match(device_t parent, cfdata_t match, void *aux)
{
#ifdef VIDEO_DEBUG
        struct video_attach_args *args;

        args = aux;
        DPRINTF(("video_match: hw=%p\n", args->hw_if));
#endif
        return 1;
}


static void
video_attach(device_t parent, device_t self, void *aux)
{
        struct video_softc *sc;
        struct video_attach_args *args;

        sc = device_private(self);
        args = aux;

        sc->sc_dev = self;
        sc->hw_dev = parent;
        sc->hw_if = args->hw_if;
        sc->hw_softc = args->hw_softc;

        sc->sc_open = 0;
        sc->sc_refcnt = 0;
        sc->sc_opencnt = 0;
        sc->sc_dying = false;

        video_stream_init(&sc->sc_stream_in);

        aprint_naive("\n");
        aprint_normal(": %s\n", sc->hw_if->get_devname(sc->hw_softc));

        DPRINTF(("video_attach: sc=%p hwif=%p\n", sc, sc->hw_if));

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");
}


static int
video_activate(device_t self, enum devact act)
{
        struct video_softc *sc = device_private(self);

        DPRINTF(("video_activate: sc=%p\n", sc));
        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = true;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}


static int
video_detach(device_t self, int flags)
{
        struct video_softc *sc;
        int maj, mn;

        sc = device_private(self);
        DPRINTF(("video_detach: sc=%p flags=%d\n", sc, flags));

        sc->sc_dying = true;

        pmf_device_deregister(self);

        maj = cdevsw_lookup_major(&video_cdevsw);
        mn = device_unit(self);
        /* close open instances */
        vdevgone(maj, mn, mn, VCHR);

        video_stream_fini(&sc->sc_stream_in);

        return 0;
}


static int
video_print(void *aux, const char *pnp)
{
        if (pnp != NULL) {
                DPRINTF(("video_print: have pnp\n"));
                aprint_normal("%s at %s\n", "video", pnp);
        } else {
                DPRINTF(("video_print: pnp is NULL\n"));
        }
        return UNCONF;
}


/*
 * Called from hardware driver.  This is where the MI audio driver
 * gets probed/attached to the hardware driver.
 */
device_t
video_attach_mi(const struct video_hw_if *hw_if, device_t parent, void *sc)
{
        struct video_attach_args args;

        args.hw_if = hw_if;
        args.hw_softc = sc;
        return config_found(parent, &args, video_print,
            CFARGS(.iattr = "videobus"));
}

/* video_submit_payload - called by hardware driver to submit payload data */
void
video_submit_payload(device_t self, const struct video_payload *payload)
{
        struct video_softc *sc;

        sc = device_private(self);

        if (sc == NULL)
                return;

        video_stream_write(&sc->sc_stream_in, payload);
}

static const char *
video_pixel_format_str(enum video_pixel_format px)
{
        switch (px) {
        case VIDEO_FORMAT_UYVY:                return "UYVY";
        case VIDEO_FORMAT_YUV420:        return "YUV420";
        case VIDEO_FORMAT_YUY2:         return "YUYV";
        case VIDEO_FORMAT_NV12:                return "NV12";
        case VIDEO_FORMAT_RGB24:        return "RGB24";
        case VIDEO_FORMAT_RGB555:        return "RGB555";
        case VIDEO_FORMAT_RGB565:        return "RGB565";
        case VIDEO_FORMAT_SBGGR8:        return "SBGGR8";
        case VIDEO_FORMAT_MJPEG:        return "MJPEG";
        case VIDEO_FORMAT_DV:                return "DV";
        case VIDEO_FORMAT_MPEG:                return "MPEG";
        default:                        return "Unknown";
        }
}

/* Takes a V4L2 id and returns a "native" video driver control id.
 * TODO: is there a better way to do this?  some kind of array? */
static uint16_t
v4l2id_to_control_id(uint32_t v4l2id)
{
        /* mask includes class bits and control id bits */
        switch (v4l2id & 0xffffff) {
        case V4L2_CID_BRIGHTNESS:        return VIDEO_CONTROL_BRIGHTNESS;
        case V4L2_CID_CONTRAST:                return VIDEO_CONTROL_CONTRAST;
        case V4L2_CID_SATURATION:        return VIDEO_CONTROL_SATURATION;
        case V4L2_CID_HUE:                return VIDEO_CONTROL_HUE;
        case V4L2_CID_HUE_AUTO:                return VIDEO_CONTROL_HUE_AUTO;
        case V4L2_CID_SHARPNESS:        return VIDEO_CONTROL_SHARPNESS;
        case V4L2_CID_GAMMA:                return VIDEO_CONTROL_GAMMA;

        /* "black level" means the same as "brightness", but V4L2
         * defines two separate controls that are not identical.
         * V4L2_CID_BLACK_LEVEL is deprecated however in V4L2. */
        case V4L2_CID_BLACK_LEVEL:        return VIDEO_CONTROL_BRIGHTNESS;

        case V4L2_CID_AUDIO_VOLUME:        return VIDEO_CONTROL_UNDEFINED;
        case V4L2_CID_AUDIO_BALANCE:        return VIDEO_CONTROL_UNDEFINED;
        case V4L2_CID_AUDIO_BASS:        return VIDEO_CONTROL_UNDEFINED;
        case V4L2_CID_AUDIO_TREBLE:        return VIDEO_CONTROL_UNDEFINED;
        case V4L2_CID_AUDIO_MUTE:        return VIDEO_CONTROL_UNDEFINED;
        case V4L2_CID_AUDIO_LOUDNESS:        return VIDEO_CONTROL_UNDEFINED;

        case V4L2_CID_AUTO_WHITE_BALANCE:
                return VIDEO_CONTROL_WHITE_BALANCE_AUTO;
        case V4L2_CID_DO_WHITE_BALANCE:
                return VIDEO_CONTROL_WHITE_BALANCE_ACTION;
        case V4L2_CID_RED_BALANCE:
        case V4L2_CID_BLUE_BALANCE:
                /* This might not fit in with the control_id/value_id scheme */
                return VIDEO_CONTROL_WHITE_BALANCE_COMPONENT;
        case V4L2_CID_WHITE_BALANCE_TEMPERATURE:
                return VIDEO_CONTROL_WHITE_BALANCE_TEMPERATURE;
        case V4L2_CID_EXPOSURE:
                return VIDEO_CONTROL_EXPOSURE_TIME_ABSOLUTE;
        case V4L2_CID_GAIN:                return VIDEO_CONTROL_GAIN;
        case V4L2_CID_AUTOGAIN:                return VIDEO_CONTROL_GAIN_AUTO;
        case V4L2_CID_HFLIP:                return VIDEO_CONTROL_HFLIP;
        case V4L2_CID_VFLIP:                return VIDEO_CONTROL_VFLIP;
        case V4L2_CID_HCENTER_DEPRECATED:
        case V4L2_CID_VCENTER_DEPRECATED:
                return VIDEO_CONTROL_UNDEFINED;
        case V4L2_CID_POWER_LINE_FREQUENCY:
                return VIDEO_CONTROL_POWER_LINE_FREQUENCY;
        case V4L2_CID_BACKLIGHT_COMPENSATION:
                return VIDEO_CONTROL_BACKLIGHT_COMPENSATION;
        default:                        return V4L2_CTRL_ID2CID(v4l2id);
        }
}


static uint32_t
control_flags_to_v4l2flags(uint32_t flags)
{
        uint32_t v4l2flags = 0;

        if (flags & VIDEO_CONTROL_FLAG_DISABLED)
                v4l2flags |= V4L2_CTRL_FLAG_INACTIVE;

        if (!(flags & VIDEO_CONTROL_FLAG_WRITE))
                v4l2flags |= V4L2_CTRL_FLAG_READ_ONLY;

        if (flags & VIDEO_CONTROL_FLAG_AUTOUPDATE)
                v4l2flags |= V4L2_CTRL_FLAG_GRABBED;

        return v4l2flags;
}


static enum v4l2_ctrl_type
control_type_to_v4l2type(enum video_control_type type) {
        switch (type) {
        case VIDEO_CONTROL_TYPE_INT:        return V4L2_CTRL_TYPE_INTEGER;
        case VIDEO_CONTROL_TYPE_BOOL:        return V4L2_CTRL_TYPE_BOOLEAN;
        case VIDEO_CONTROL_TYPE_LIST:        return V4L2_CTRL_TYPE_MENU;
        case VIDEO_CONTROL_TYPE_ACTION:        return V4L2_CTRL_TYPE_BUTTON;
        default:                        return V4L2_CTRL_TYPE_INTEGER; /* err? */
        }
}


static int
video_query_control(struct video_softc *sc,
                    struct v4l2_queryctrl *query)
{
        const struct video_hw_if *hw;
        struct video_control_desc_group desc_group;
        struct video_control_desc desc;
        int err;

        hw = sc->hw_if;
        if (hw->get_control_desc_group) {
                desc.group_id = desc.control_id =
                    v4l2id_to_control_id(query->id);

                desc_group.group_id = desc.group_id;
                desc_group.length = 1;
                desc_group.desc = &desc;

                err = hw->get_control_desc_group(sc->hw_softc, &desc_group);
                if (err != 0)
                        return err;

                query->type = control_type_to_v4l2type(desc.type);
                memcpy(query->name, desc.name, 32);
                query->minimum = desc.min;
                query->maximum = desc.max;
                query->step = desc.step;
                query->default_value = desc.def;
                query->flags = control_flags_to_v4l2flags(desc.flags);

                return 0;
        } else {
                return EINVAL;
        }
}


/* Takes a single Video4Linux2 control and queries the driver for the
 * current value. */
static int
video_get_control(struct video_softc *sc,
                  struct v4l2_control *vcontrol)
{
        const struct video_hw_if *hw;
        struct video_control_group group;
        struct video_control control;
        int err;

        hw = sc->hw_if;
        if (hw->get_control_group) {
                control.group_id = control.control_id =
                    v4l2id_to_control_id(vcontrol->id);
                /* ?? if "control_id" is arbitrarily defined by the
                 * driver, then we need some way to store it...  Maybe
                 * it doesn't matter for single value controls. */
                control.value = 0;

                group.group_id = control.group_id;
                group.length = 1;
                group.control = &control;

                err = hw->get_control_group(sc->hw_softc, &group);
                if (err != 0)
                        return err;

                vcontrol->value = control.value;
                return 0;
        } else {
                return EINVAL;
        }
}

static void
video_format_to_v4l2_format(const struct video_format *src,
                            struct v4l2_format *dest)
{
        /* TODO: what about win and vbi formats? */
        dest->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        dest->fmt.pix.width = src->width;
        dest->fmt.pix.height = src->height;
        if (VIDEO_INTERLACED(src->interlace_flags))
                dest->fmt.pix.field = V4L2_FIELD_INTERLACED;
        else
                dest->fmt.pix.field = V4L2_FIELD_NONE;
        dest->fmt.pix.bytesperline = src->stride;
        dest->fmt.pix.sizeimage = src->sample_size;
        dest->fmt.pix.priv = src->priv;

        switch (src->color.primaries) {
        case VIDEO_COLOR_PRIMARIES_SMPTE_170M:
                dest->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M;
                break;
        /* XXX */
        case VIDEO_COLOR_PRIMARIES_UNSPECIFIED:
        default:
                dest->fmt.pix.colorspace = 0;
                break;
        }

        switch (src->pixel_format) {
        case VIDEO_FORMAT_UYVY:
                dest->fmt.pix.pixelformat = V4L2_PIX_FMT_UYVY;
                break;
        case VIDEO_FORMAT_YUV420:
                dest->fmt.pix.pixelformat = V4L2_PIX_FMT_YUV420;
                break;
        case VIDEO_FORMAT_YUY2:
                dest->fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV;
                break;
        case VIDEO_FORMAT_NV12:
                dest->fmt.pix.pixelformat = V4L2_PIX_FMT_NV12;
                break;
        case VIDEO_FORMAT_RGB24:
                dest->fmt.pix.pixelformat = V4L2_PIX_FMT_RGB24;
                break;
        case VIDEO_FORMAT_RGB555:
                dest->fmt.pix.pixelformat = V4L2_PIX_FMT_RGB555;
                break;
        case VIDEO_FORMAT_RGB565:
                dest->fmt.pix.pixelformat = V4L2_PIX_FMT_RGB565;
                break;
        case VIDEO_FORMAT_SBGGR8:
                dest->fmt.pix.pixelformat = V4L2_PIX_FMT_SBGGR8;
                break;
        case VIDEO_FORMAT_MJPEG:
                dest->fmt.pix.pixelformat = V4L2_PIX_FMT_MJPEG;
                break;
        case VIDEO_FORMAT_DV:
                dest->fmt.pix.pixelformat = V4L2_PIX_FMT_DV;
                break;
        case VIDEO_FORMAT_MPEG:
                dest->fmt.pix.pixelformat = V4L2_PIX_FMT_MPEG;
                break;
        case VIDEO_FORMAT_UNDEFINED:
        default:
                DPRINTF(("video_get_format: unknown pixel format %d\n",
                         src->pixel_format));
                dest->fmt.pix.pixelformat = 0; /* V4L2 doesn't define
                                               * and "undefined"
                                               * format? */
                break;
        }

}

static void
v4l2_format_to_video_format(const struct v4l2_format *src,
                            struct video_format *dest)
{
        switch (src->type) {
        case V4L2_BUF_TYPE_VIDEO_CAPTURE:
                dest->width = src->fmt.pix.width;
                dest->height = src->fmt.pix.height;

                dest->stride = src->fmt.pix.bytesperline;
                dest->sample_size = src->fmt.pix.sizeimage;

                if (src->fmt.pix.field == V4L2_FIELD_INTERLACED)
                        dest->interlace_flags = VIDEO_INTERLACE_ON;
                else
                        dest->interlace_flags = VIDEO_INTERLACE_OFF;

                switch (src->fmt.pix.colorspace) {
                case V4L2_COLORSPACE_SMPTE170M:
                        dest->color.primaries =
                            VIDEO_COLOR_PRIMARIES_SMPTE_170M;
                        break;
                /* XXX */
                default:
                        dest->color.primaries =
                            VIDEO_COLOR_PRIMARIES_UNSPECIFIED;
                        break;
                }

                switch (src->fmt.pix.pixelformat) {
                case V4L2_PIX_FMT_UYVY:
                        dest->pixel_format = VIDEO_FORMAT_UYVY;
                        break;
                case V4L2_PIX_FMT_YUV420:
                        dest->pixel_format = VIDEO_FORMAT_YUV420;
                        break;
                case V4L2_PIX_FMT_YUYV:
                        dest->pixel_format = VIDEO_FORMAT_YUY2;
                        break;
                case V4L2_PIX_FMT_NV12:
                        dest->pixel_format = VIDEO_FORMAT_NV12;
                        break;
                case V4L2_PIX_FMT_RGB24:
                        dest->pixel_format = VIDEO_FORMAT_RGB24;
                        break;
                case V4L2_PIX_FMT_RGB555:
                        dest->pixel_format = VIDEO_FORMAT_RGB555;
                        break;
                case V4L2_PIX_FMT_RGB565:
                        dest->pixel_format = VIDEO_FORMAT_RGB565;
                        break;
                case V4L2_PIX_FMT_SBGGR8:
                        dest->pixel_format = VIDEO_FORMAT_SBGGR8;
                        break;
                case V4L2_PIX_FMT_MJPEG:
                        dest->pixel_format = VIDEO_FORMAT_MJPEG;
                        break;
                case V4L2_PIX_FMT_DV:
                        dest->pixel_format = VIDEO_FORMAT_DV;
                        break;
                case V4L2_PIX_FMT_MPEG:
                        dest->pixel_format = VIDEO_FORMAT_MPEG;
                        break;
                default:
                        DPRINTF(("video: unknown v4l2 pixel format %d\n",
                                 src->fmt.pix.pixelformat));
                        dest->pixel_format = VIDEO_FORMAT_UNDEFINED;
                        break;
                }
                break;
        default:
                /* TODO: other v4l2 format types */
                DPRINTF(("video: unsupported v4l2 format type %d\n",
                         src->type));
                break;
        }
}

static int
video_enum_format(struct video_softc *sc, struct v4l2_fmtdesc *fmtdesc)
{
        const struct video_hw_if *hw;
        struct video_format vfmt;
        struct v4l2_format fmt;
        int err;

        hw = sc->hw_if;
        if (hw->enum_format == NULL)
                return ENOTTY;

        err = hw->enum_format(sc->hw_softc, fmtdesc->index, &vfmt);
        if (err != 0)
                return err;

        video_format_to_v4l2_format(&vfmt, &fmt);

        fmtdesc->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; /* TODO: only one type for now */
        fmtdesc->flags = 0;
        if (vfmt.pixel_format >= VIDEO_FORMAT_MJPEG)
                fmtdesc->flags = V4L2_FMT_FLAG_COMPRESSED;
        strlcpy(fmtdesc->description,
                video_pixel_format_str(vfmt.pixel_format),
                sizeof(fmtdesc->description));
        fmtdesc->pixelformat = fmt.fmt.pix.pixelformat;

        return 0;
}

static int
video_enum_framesizes(struct video_softc *sc, struct v4l2_frmsizeenum *frmdesc)
{
        const struct video_hw_if *hw;
        struct video_format vfmt;
        struct v4l2_format fmt;
        int err;

        hw = sc->hw_if;
        if (hw->enum_format == NULL)
                return ENOTTY;

        err = hw->enum_format(sc->hw_softc, frmdesc->index, &vfmt);
        if (err != 0)
                return err;

        video_format_to_v4l2_format(&vfmt, &fmt);
        if (fmt.fmt.pix.pixelformat != frmdesc->pixel_format) {
                printf("video_enum_framesizes: type mismatch %x %x\n",
                    fmt.fmt.pix.pixelformat, frmdesc->pixel_format);
        }

        frmdesc->type = V4L2_FRMSIZE_TYPE_DISCRETE; /* TODO: only one type for now */
        frmdesc->discrete.width = vfmt.width;
        frmdesc->discrete.height = vfmt.height;
        return 0;
}

static int
video_enum_frameival(struct video_softc *sc, struct v4l2_frmivalenum *frmdesc)
{
        const struct video_hw_if *hw;

        hw = sc->hw_if;
        if (hw->enum_format == NULL)
                return ENOTTY;

        frmdesc->type = V4L2_FRMSIZE_TYPE_DISCRETE;
        frmdesc->discrete.numerator = 1;
        frmdesc->discrete.denominator = 15;
        return 0;
}

static int
video_get_format(struct video_softc *sc,
                      struct v4l2_format *format)
{
        const struct video_hw_if *hw;
        struct video_format vfmt;
        int err;

        hw = sc->hw_if;
        if (hw->get_format == NULL)
                return ENOTTY;

        err = hw->get_format(sc->hw_softc, &vfmt);
        if (err != 0)
                return err;

        video_format_to_v4l2_format(&vfmt, format);

        return 0;
}

static int
video_set_format(struct video_softc *sc, struct v4l2_format *fmt)
{
        const struct video_hw_if *hw;
        struct video_format vfmt;
        int err;

        hw = sc->hw_if;
        if (hw->set_format == NULL)
                return ENOTTY;

        v4l2_format_to_video_format(fmt, &vfmt);

        err = hw->set_format(sc->hw_softc, &vfmt);
        if (err != 0)
                return err;

        video_format_to_v4l2_format(&vfmt, fmt);
        sc->sc_stream_in.vs_format = vfmt;

        return 0;
}


static int
video_try_format(struct video_softc *sc,
                      struct v4l2_format *format)
{
        const struct video_hw_if *hw;
        struct video_format vfmt;
        int err;

        hw = sc->hw_if;
        if (hw->try_format == NULL)
                return ENOTTY;

        v4l2_format_to_video_format(format, &vfmt);

        err = hw->try_format(sc->hw_softc, &vfmt);
        if (err != 0)
                return err;

        video_format_to_v4l2_format(&vfmt, format);

        return 0;
}

static int
video_get_parm(struct video_softc *sc, struct v4l2_streamparm *parm)
{
        struct video_fract fract;
        const struct video_hw_if *hw;
        int error;

        if (parm->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
                return EINVAL;

        hw = sc->hw_if;
        if (hw == NULL)
                return ENXIO;

        memset(&parm->parm, 0, sizeof(parm->parm));
        if (hw->get_framerate != NULL) {
                error = hw->get_framerate(sc->hw_softc, &fract);
                if (error != 0)
                        return error;
                parm->parm.capture.capability = V4L2_CAP_TIMEPERFRAME;
                parm->parm.capture.timeperframe.numerator = fract.numerator;
                parm->parm.capture.timeperframe.denominator = fract.denominator;
        }

        return 0;
}

static int
video_set_parm(struct video_softc *sc, struct v4l2_streamparm *parm)
{
        struct video_fract fract;
        const struct video_hw_if *hw;
        int error;

        if (parm->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
                return EINVAL;

        hw = sc->hw_if;
        if (hw == NULL || hw->set_framerate == NULL)
                return ENXIO;

        error = hw->set_framerate(sc->hw_softc, &fract);
        if (error != 0)
                return error;

        parm->parm.capture.timeperframe.numerator = fract.numerator;
        parm->parm.capture.timeperframe.denominator = fract.denominator;

        return 0;
}

static void
v4l2_standard_to_video_standard(v4l2_std_id stdid,
    enum video_standard *vstd)
{
#define VSTD(id, vid)        case (id):        *vstd = (vid); break;
        switch (stdid) {
        VSTD(V4L2_STD_NTSC_M, VIDEO_STANDARD_NTSC_M)
        default:
                *vstd = VIDEO_STANDARD_UNKNOWN;
                break;
        }
#undef VSTD
}

static void
video_standard_to_v4l2_standard(enum video_standard vstd,
    struct v4l2_standard *std)
{
        switch (vstd) {
        case VIDEO_STANDARD_NTSC_M:
                std->id = V4L2_STD_NTSC_M;
                strlcpy(std->name, "NTSC-M", sizeof(std->name));
                std->frameperiod.numerator = 1001;
                std->frameperiod.denominator = 30000;
                std->framelines = 525;
                break;
        default:
                std->id = V4L2_STD_UNKNOWN;
                strlcpy(std->name, "Unknown", sizeof(std->name));
                break;
        }
}

static int
video_enum_standard(struct video_softc *sc, struct v4l2_standard *std)
{
        const struct video_hw_if *hw = sc->hw_if;
        enum video_standard vstd;
        int err;

        /* simple webcam drivers don't need to implement this callback */
        if (hw->enum_standard == NULL) {
                if (std->index != 0)
                        return EINVAL;
                std->id = V4L2_STD_UNKNOWN;
                strlcpy(std->name, "webcam", sizeof(std->name));
                return 0;
        }

        v4l2_standard_to_video_standard(std->id, &vstd);

        err = hw->enum_standard(sc->hw_softc, std->index, &vstd);
        if (err != 0)
                return err;

        video_standard_to_v4l2_standard(vstd, std);

        return 0;
}

static int
video_get_standard(struct video_softc *sc, v4l2_std_id *stdid)
{
        const struct video_hw_if *hw = sc->hw_if;
        struct v4l2_standard std;
        enum video_standard vstd;
        int err;

        /* simple webcam drivers don't need to implement this callback */
        if (hw->get_standard == NULL) {
                *stdid = V4L2_STD_UNKNOWN;
                return 0;
        }

        err = hw->get_standard(sc->hw_softc, &vstd);
        if (err != 0)
                return err;

        video_standard_to_v4l2_standard(vstd, &std);
        *stdid = std.id;

        return 0;
}

static int
video_set_standard(struct video_softc *sc, v4l2_std_id stdid)
{
        const struct video_hw_if *hw = sc->hw_if;
        enum video_standard vstd;

        /* simple webcam drivers don't need to implement this callback */
        if (hw->set_standard == NULL) {
                if (stdid != V4L2_STD_UNKNOWN)
                        return EINVAL;
                return 0;
        }

        v4l2_standard_to_video_standard(stdid, &vstd);

        return hw->set_standard(sc->hw_softc, vstd);
}

static void
v4l2_input_to_video_input(const struct v4l2_input *input,
    struct video_input *vi)
{
        vi->index = input->index;
        strlcpy(vi->name, input->name, sizeof(vi->name));
        switch (input->type) {
        case V4L2_INPUT_TYPE_TUNER:
                vi->type = VIDEO_INPUT_TYPE_TUNER;
                break;
        case V4L2_INPUT_TYPE_CAMERA:
                vi->type = VIDEO_INPUT_TYPE_CAMERA;
                break;
        }
        vi->audiomask = input->audioset;
        vi->tuner_index = input->tuner;
        vi->standards = input->std;        /* ... values are the same */
        vi->status = 0;
        if (input->status & V4L2_IN_ST_NO_POWER)
                vi->status |= VIDEO_STATUS_NO_POWER;
        if (input->status & V4L2_IN_ST_NO_SIGNAL)
                vi->status |= VIDEO_STATUS_NO_SIGNAL;
        if (input->status & V4L2_IN_ST_NO_COLOR)
                vi->status |= VIDEO_STATUS_NO_COLOR;
        if (input->status & V4L2_IN_ST_NO_H_LOCK)
                vi->status |= VIDEO_STATUS_NO_HLOCK;
        if (input->status & V4L2_IN_ST_MACROVISION)
                vi->status |= VIDEO_STATUS_MACROVISION;
}

static void
video_input_to_v4l2_input(const struct video_input *vi,
    struct v4l2_input *input)
{
        input->index = vi->index;
        strlcpy(input->name, vi->name, sizeof(input->name));
        switch (vi->type) {
        case VIDEO_INPUT_TYPE_TUNER:
                input->type = V4L2_INPUT_TYPE_TUNER;
                break;
        case VIDEO_INPUT_TYPE_CAMERA:
                input->type = V4L2_INPUT_TYPE_CAMERA;
                break;
        }
        input->audioset = vi->audiomask;
        input->tuner = vi->tuner_index;
        input->std = vi->standards;        /* ... values are the same */
        input->status = 0;
        if (vi->status & VIDEO_STATUS_NO_POWER)
                input->status |= V4L2_IN_ST_NO_POWER;
        if (vi->status & VIDEO_STATUS_NO_SIGNAL)
                input->status |= V4L2_IN_ST_NO_SIGNAL;
        if (vi->status & VIDEO_STATUS_NO_COLOR)
                input->status |= V4L2_IN_ST_NO_COLOR;
        if (vi->status & VIDEO_STATUS_NO_HLOCK)
                input->status |= V4L2_IN_ST_NO_H_LOCK;
        if (vi->status & VIDEO_STATUS_MACROVISION)
                input->status |= V4L2_IN_ST_MACROVISION;
}

static int
video_enum_input(struct video_softc *sc, struct v4l2_input *input)
{
        const struct video_hw_if *hw = sc->hw_if;
        struct video_input vi;
        int err;

        /* simple webcam drivers don't need to implement this callback */
        if (hw->enum_input == NULL) {
                if (input->index != 0)
                        return EINVAL;
                memset(input, 0, sizeof(*input));
                input->index = 0;
                strlcpy(input->name, "Camera", sizeof(input->name));
                input->type = V4L2_INPUT_TYPE_CAMERA;
                return 0;
        }

        v4l2_input_to_video_input(input, &vi);

        err = hw->enum_input(sc->hw_softc, input->index, &vi);
        if (err != 0)
                return err;

        video_input_to_v4l2_input(&vi, input);

        return 0;
}

static int
video_get_input(struct video_softc *sc, int *index)
{
        const struct video_hw_if *hw = sc->hw_if;
        struct video_input vi;
        struct v4l2_input input;
        int err;

        /* simple webcam drivers don't need to implement this callback */
        if (hw->get_input == NULL) {
                *index = 0;
                return 0;
        }

        input.index = *index;
        v4l2_input_to_video_input(&input, &vi);

        err = hw->get_input(sc->hw_softc, &vi);
        if (err != 0)
                return err;

        video_input_to_v4l2_input(&vi, &input);
        *index = input.index;

        return 0;
}

static int
video_set_input(struct video_softc *sc, int index)
{
        const struct video_hw_if *hw = sc->hw_if;
        struct video_input vi;
        struct v4l2_input input;

        /* simple webcam drivers don't need to implement this callback */
        if (hw->set_input == NULL) {
                if (index != 0)
                        return EINVAL;
                return 0;
        }

        input.index = index;
        v4l2_input_to_video_input(&input, &vi);

        return hw->set_input(sc->hw_softc, &vi);
}

static void
v4l2_audio_to_video_audio(const struct v4l2_audio *audio,
    struct video_audio *va)
{
        va->index = audio->index;
        strlcpy(va->name, audio->name, sizeof(va->name));
        va->caps = va->mode = 0;
        if (audio->capability & V4L2_AUDCAP_STEREO)
                va->caps |= VIDEO_AUDIO_F_STEREO;
        if (audio->capability & V4L2_AUDCAP_AVL)
                va->caps |= VIDEO_AUDIO_F_AVL;
        if (audio->mode & V4L2_AUDMODE_AVL)
                va->mode |= VIDEO_AUDIO_F_AVL;
}

static void
video_audio_to_v4l2_audio(const struct video_audio *va,
    struct v4l2_audio *audio)
{
        audio->index = va->index;
        strlcpy(audio->name, va->name, sizeof(audio->name));
        audio->capability = audio->mode = 0;
        if (va->caps & VIDEO_AUDIO_F_STEREO)
                audio->capability |= V4L2_AUDCAP_STEREO;
        if (va->caps & VIDEO_AUDIO_F_AVL)
                audio->capability |= V4L2_AUDCAP_AVL;
        if (va->mode & VIDEO_AUDIO_F_AVL)
                audio->mode |= V4L2_AUDMODE_AVL;
}

static int
video_enum_audio(struct video_softc *sc, struct v4l2_audio *audio)
{
        const struct video_hw_if *hw = sc->hw_if;
        struct video_audio va;
        int err;

        if (hw->enum_audio == NULL)
                return ENOTTY;

        v4l2_audio_to_video_audio(audio, &va);

        err = hw->enum_audio(sc->hw_softc, audio->index, &va);
        if (err != 0)
                return err;

        video_audio_to_v4l2_audio(&va, audio);

        return 0;
}

static int
video_get_audio(struct video_softc *sc, struct v4l2_audio *audio)
{
        const struct video_hw_if *hw = sc->hw_if;
        struct video_audio va;
        int err;

        if (hw->get_audio == NULL)
                return ENOTTY;

        v4l2_audio_to_video_audio(audio, &va);

        err = hw->get_audio(sc->hw_softc, &va);
        if (err != 0)
                return err;

        video_audio_to_v4l2_audio(&va, audio);

        return 0;
}

static int
video_set_audio(struct video_softc *sc, struct v4l2_audio *audio)
{
        const struct video_hw_if *hw = sc->hw_if;
        struct video_audio va;

        if (hw->set_audio == NULL)
                return ENOTTY;

        v4l2_audio_to_video_audio(audio, &va);

        return hw->set_audio(sc->hw_softc, &va);
}

static void
v4l2_tuner_to_video_tuner(const struct v4l2_tuner *tuner,
    struct video_tuner *vt)
{
        vt->index = tuner->index;
        strlcpy(vt->name, tuner->name, sizeof(vt->name));
        vt->freq_lo = tuner->rangelow;
        vt->freq_hi = tuner->rangehigh;
        vt->signal = tuner->signal;
        vt->afc = tuner->afc;
        vt->caps = 0;
        if (tuner->capability & V4L2_TUNER_CAP_STEREO)
                vt->caps |= VIDEO_TUNER_F_STEREO;
        if (tuner->capability & V4L2_TUNER_CAP_LANG1)
                vt->caps |= VIDEO_TUNER_F_LANG1;
        if (tuner->capability & V4L2_TUNER_CAP_LANG2)
                vt->caps |= VIDEO_TUNER_F_LANG2;
        switch (tuner->audmode) {
        case V4L2_TUNER_MODE_MONO:
                vt->mode = VIDEO_TUNER_F_MONO;
                break;
        case V4L2_TUNER_MODE_STEREO:
                vt->mode = VIDEO_TUNER_F_STEREO;
                break;
        case V4L2_TUNER_MODE_LANG1:
                vt->mode = VIDEO_TUNER_F_LANG1;
                break;
        case V4L2_TUNER_MODE_LANG2:
                vt->mode = VIDEO_TUNER_F_LANG2;
                break;
        case V4L2_TUNER_MODE_LANG1_LANG2:
                vt->mode = VIDEO_TUNER_F_LANG1 | VIDEO_TUNER_F_LANG2;
                break;
        }
}

static void
video_tuner_to_v4l2_tuner(const struct video_tuner *vt,
    struct v4l2_tuner *tuner)
{
        tuner->index = vt->index;
        strlcpy(tuner->name, vt->name, sizeof(tuner->name));
        tuner->rangelow = vt->freq_lo;
        tuner->rangehigh = vt->freq_hi;
        tuner->signal = vt->signal;
        tuner->afc = vt->afc;
        tuner->capability = 0;
        if (vt->caps & VIDEO_TUNER_F_STEREO)
                tuner->capability |= V4L2_TUNER_CAP_STEREO;
        if (vt->caps & VIDEO_TUNER_F_LANG1)
                tuner->capability |= V4L2_TUNER_CAP_LANG1;
        if (vt->caps & VIDEO_TUNER_F_LANG2)
                tuner->capability |= V4L2_TUNER_CAP_LANG2;
        switch (vt->mode) {
        case VIDEO_TUNER_F_MONO:
                tuner->audmode = V4L2_TUNER_MODE_MONO;
                break;
        case VIDEO_TUNER_F_STEREO:
                tuner->audmode = V4L2_TUNER_MODE_STEREO;
                break;
        case VIDEO_TUNER_F_LANG1:
                tuner->audmode = V4L2_TUNER_MODE_LANG1;
                break;
        case VIDEO_TUNER_F_LANG2:
                tuner->audmode = V4L2_TUNER_MODE_LANG2;
                break;
        case VIDEO_TUNER_F_LANG1|VIDEO_TUNER_F_LANG2:
                tuner->audmode = V4L2_TUNER_MODE_LANG1_LANG2;
                break;
        }
}

static int
video_get_tuner(struct video_softc *sc, struct v4l2_tuner *tuner)
{
        const struct video_hw_if *hw = sc->hw_if;
        struct video_tuner vt;
        int err;

        if (hw->get_tuner == NULL)
                return ENOTTY;

        v4l2_tuner_to_video_tuner(tuner, &vt);

        err = hw->get_tuner(sc->hw_softc, &vt);
        if (err != 0)
                return err;

        video_tuner_to_v4l2_tuner(&vt, tuner);

        return 0;
}

static int
video_set_tuner(struct video_softc *sc, struct v4l2_tuner *tuner)
{
        const struct video_hw_if *hw = sc->hw_if;
        struct video_tuner vt;

        if (hw->set_tuner == NULL)
                return ENOTTY;

        v4l2_tuner_to_video_tuner(tuner, &vt);

        return hw->set_tuner(sc->hw_softc, &vt);
}

static int
video_get_frequency(struct video_softc *sc, struct v4l2_frequency *freq)
{
        const struct video_hw_if *hw = sc->hw_if;
        struct video_frequency vfreq;
        int err;

        if (hw->get_frequency == NULL)
                return ENOTTY;

        err = hw->get_frequency(sc->hw_softc, &vfreq);
        if (err)
                return err;

        freq->tuner = vfreq.tuner_index;
        freq->type = V4L2_TUNER_ANALOG_TV;
        freq->frequency = vfreq.frequency;

        return 0;
}

static int
video_set_frequency(struct video_softc *sc, struct v4l2_frequency *freq)
{
        const struct video_hw_if *hw = sc->hw_if;
        struct video_frequency vfreq;
        struct video_tuner vt;
        int error;

        if (hw->set_frequency == NULL || hw->get_tuner == NULL)
                return ENOTTY;
        if (freq->type != V4L2_TUNER_ANALOG_TV)
                return EINVAL;

        vt.index = freq->tuner;
        error = hw->get_tuner(sc->hw_softc, &vt);
        if (error)
                return error;

        if (freq->frequency < vt.freq_lo)
                freq->frequency = vt.freq_lo;
        else if (freq->frequency > vt.freq_hi)
                freq->frequency = vt.freq_hi;

        vfreq.tuner_index = freq->tuner;
        vfreq.frequency = freq->frequency;

        return hw->set_frequency(sc->hw_softc, &vfreq);
}

/* Takes a single Video4Linux2 control, converts it to a struct
 * video_control, and calls the hardware driver. */
static int
video_set_control(struct video_softc *sc,
                       const struct v4l2_control *vcontrol)
{
        const struct video_hw_if *hw;
        struct video_control_group group;
        struct video_control control;

        hw = sc->hw_if;
        if (hw->set_control_group) {
                control.group_id = control.control_id =
                    v4l2id_to_control_id(vcontrol->id);
                /* ?? if "control_id" is arbitrarily defined by the
                 * driver, then we need some way to store it...  Maybe
                 * it doesn't matter for single value controls. */
                control.value = vcontrol->value;

                group.group_id = control.group_id;
                group.length = 1;
                group.control = &control;

                return (hw->set_control_group(sc->hw_softc, &group));
        } else {
                return EINVAL;
        }
}

static int
video_request_bufs(struct video_softc *sc,
                   struct v4l2_requestbuffers *req)
{
        struct video_stream *vs = &sc->sc_stream_in;
        struct v4l2_buffer *buf;
        int i, err;

        if (req->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
                return EINVAL;

        vs->vs_type = req->type;

        switch (req->memory) {
        case V4L2_MEMORY_MMAP:
                if (req->count < VIDEO_MIN_BUFS)
                        req->count = VIDEO_MIN_BUFS;
                else if (req->count > VIDEO_MAX_BUFS)
                        req->count = VIDEO_MAX_BUFS;

                err = video_stream_setup_bufs(vs,
                                              VIDEO_STREAM_METHOD_MMAP,
                                              req->count);
                if (err != 0)
                        return err;

                for (i = 0; i < req->count; ++i) {
                        buf = vs->vs_buf[i]->vb_buf;
                        buf->memory = V4L2_MEMORY_MMAP;
                        buf->flags |= V4L2_BUF_FLAG_MAPPED;
                }
                break;
        case V4L2_MEMORY_USERPTR:
        default:
                return EINVAL;
        }

        return 0;
}

static int
video_query_buf(struct video_softc *sc,
                struct v4l2_buffer *buf)
{
        struct video_stream *vs = &sc->sc_stream_in;

        if (buf->type != vs->vs_type)
                return EINVAL;
        if (buf->index >= vs->vs_nbufs)
                return EINVAL;

        memcpy(buf, vs->vs_buf[buf->index]->vb_buf, sizeof(*buf));

        return 0;
}

/* Accept a buffer descriptor from userspace and return the indicated
 * buffer to the driver's queue. */
static int
video_queue_buf(struct video_softc *sc, struct v4l2_buffer *userbuf)
{
        struct video_stream *vs = &sc->sc_stream_in;
        struct video_buffer *vb;
        struct v4l2_buffer *driverbuf;

        if (userbuf->type != vs->vs_type) {
                DPRINTF(("video_queue_buf: expected type=%d got type=%d\n",
                         userbuf->type, vs->vs_type));
                return EINVAL;
        }
        if (userbuf->index >= vs->vs_nbufs) {
                DPRINTF(("video_queue_buf: invalid index %d >= %d\n",
                         userbuf->index, vs->vs_nbufs));
                return EINVAL;
        }

        switch (vs->vs_method) {
        case VIDEO_STREAM_METHOD_MMAP:
                if (userbuf->memory != V4L2_MEMORY_MMAP) {
                        DPRINTF(("video_queue_buf: invalid memory=%d\n",
                                 userbuf->memory));
                        return EINVAL;
                }

                mutex_enter(&vs->vs_lock);

                vb = vs->vs_buf[userbuf->index];
                driverbuf = vb->vb_buf;
                if (driverbuf->flags & V4L2_BUF_FLAG_QUEUED) {
                        DPRINTF(("video_queue_buf: buf already queued; "
                                 "flags=0x%x\n", driverbuf->flags));
                        mutex_exit(&vs->vs_lock);
                        return EINVAL;
                }
                video_stream_enqueue(vs, vb);
                memcpy(userbuf, driverbuf, sizeof(*driverbuf));

                mutex_exit(&vs->vs_lock);
                break;
        default:
                return EINVAL;
        }

        return 0;
}

/* Dequeue the described buffer from the driver queue, making it
 * available for reading via mmap. */
static int
video_dequeue_buf(struct video_softc *sc, struct v4l2_buffer *buf)
{
        struct video_stream *vs = &sc->sc_stream_in;
        struct video_buffer *vb;
        int err;

        if (buf->type != vs->vs_type) {
                aprint_debug_dev(sc->sc_dev,
                    "requested type %d (expected %d)\n",
                    buf->type, vs->vs_type);
                return EINVAL;
        }

        switch (vs->vs_method) {
        case VIDEO_STREAM_METHOD_MMAP:
                if (buf->memory != V4L2_MEMORY_MMAP) {
                        aprint_debug_dev(sc->sc_dev,
                            "requested memory %d (expected %d)\n",
                            buf->memory, V4L2_MEMORY_MMAP);
                        return EINVAL;
                }

                mutex_enter(&vs->vs_lock);

                if (vs->vs_flags & O_NONBLOCK) {
                        vb = video_stream_dequeue(vs);
                        if (vb == NULL) {
                                mutex_exit(&vs->vs_lock);
                                return EAGAIN;
                        }
                } else {
                        /* Block until we have sample */
                        while ((vb = video_stream_dequeue(vs)) == NULL) {
                                if (!vs->vs_streaming) {
                                        mutex_exit(&vs->vs_lock);
                                        return EINVAL;
                                }
                                err = cv_wait_sig(&vs->vs_sample_cv,
                                                  &vs->vs_lock);
                                if (err != 0) {
                                        mutex_exit(&vs->vs_lock);
                                        return EINTR;
                                }
                        }
                }

                memcpy(buf, vb->vb_buf, sizeof(*buf));

                mutex_exit(&vs->vs_lock);
                break;
        default:
                aprint_debug_dev(sc->sc_dev, "unknown vs_method %d\n",
                    vs->vs_method);
                return EINVAL;
        }

        return 0;
}

static int
video_stream_on(struct video_softc *sc, enum v4l2_buf_type type)
{
        int err;
        struct video_stream *vs = &sc->sc_stream_in;
        const struct video_hw_if *hw;

        if (vs->vs_streaming)
                return 0;
        if (type != vs->vs_type)
                return EINVAL;

        hw = sc->hw_if;
        if (hw == NULL)
                return ENXIO;


        err = hw->start_transfer(sc->hw_softc);
        if (err != 0)
                return err;

        vs->vs_streaming = true;
        return 0;
}

static int
video_stream_off(struct video_softc *sc, enum v4l2_buf_type type)
{
        int err;
        struct video_stream *vs = &sc->sc_stream_in;
        const struct video_hw_if *hw;

        if (!vs->vs_streaming)
                return 0;
        if (type != vs->vs_type)
                return EINVAL;

        hw = sc->hw_if;
        if (hw == NULL)
                return ENXIO;

        err = hw->stop_transfer(sc->hw_softc);
        if (err != 0)
                return err;

        vs->vs_frameno = -1;
        vs->vs_sequence = 0;
        vs->vs_streaming = false;

        return 0;
}

int
videoopen(dev_t dev, int flags, int ifmt, struct lwp *l)
{
        struct video_softc *sc;
        const struct video_hw_if *hw;
        struct video_stream *vs;
        int err;

        DPRINTF(("videoopen\n"));

        sc = device_private(device_lookup(&video_cd, VIDEOUNIT(dev)));
        if (sc == NULL) {
                DPRINTF(("videoopen: failed to get softc for unit %d\n",
                        VIDEOUNIT(dev)));
                return ENXIO;
        }

        if (sc->sc_dying) {
                DPRINTF(("videoopen: dying\n"));
                return EIO;
        }

        sc->sc_stream_in.vs_flags = flags;

        DPRINTF(("videoopen: flags=0x%x sc=%p parent=%p\n",
                 flags, sc, sc->hw_dev));

        hw = sc->hw_if;
        if (hw == NULL)
                return ENXIO;

        device_active(sc->sc_dev, DVA_SYSTEM);

        sc->sc_opencnt++;

        if (hw->open != NULL) {
                err = hw->open(sc->hw_softc, flags);
                if (err)
                        return err;
        }

        /* set up input stream.  TODO: check flags to determine if
         * "read" is desired? */
        vs = &sc->sc_stream_in;

        if (hw->get_format != NULL) {
                err = hw->get_format(sc->hw_softc, &vs->vs_format);
                if (err != 0)
                        return err;
        }
        return 0;
}


int
videoclose(dev_t dev, int flags, int ifmt, struct lwp *l)
{
        struct video_softc *sc;
        const struct video_hw_if *hw;

        sc = device_private(device_lookup(&video_cd, VIDEOUNIT(dev)));
        if (sc == NULL)
                return ENXIO;

        DPRINTF(("videoclose: sc=%p\n", sc));

        hw = sc->hw_if;
        if (hw == NULL)
                return ENXIO;

        device_active(sc->sc_dev, DVA_SYSTEM);

        video_stream_off(sc, sc->sc_stream_in.vs_type);

        /* ignore error */
        if (hw->close != NULL)
                hw->close(sc->hw_softc);

        video_stream_teardown_bufs(&sc->sc_stream_in);

        sc->sc_open = 0;
        sc->sc_opencnt--;

        return 0;
}


int
videoread(dev_t dev, struct uio *uio, int ioflag)
{
        struct video_softc *sc;
        struct video_stream *vs;
        struct video_buffer *vb;
        struct scatter_io sio;
        int err;
        size_t len;
        off_t offset;

        sc = device_private(device_lookup(&video_cd, VIDEOUNIT(dev)));
        if (sc == NULL)
                return ENXIO;

        if (sc->sc_dying)
                return EIO;

        vs = &sc->sc_stream_in;

        /* userspace has chosen read() method */
        if (vs->vs_method == VIDEO_STREAM_METHOD_NONE) {
                err = video_stream_setup_bufs(vs,
                                              VIDEO_STREAM_METHOD_READ,
                                              VIDEO_NUM_BUFS);
                if (err != 0)
                        return err;

                err = video_stream_on(sc, vs->vs_type);
                if (err != 0)
                        return err;
        } else if (vs->vs_method != VIDEO_STREAM_METHOD_READ) {
                return EBUSY;
        }

        mutex_enter(&vs->vs_lock);

retry:
        if (SIMPLEQ_EMPTY(&vs->vs_egress)) {
                if (vs->vs_flags & O_NONBLOCK) {
                        mutex_exit(&vs->vs_lock);
                        return EAGAIN;
                }

                /* Block until we have a sample */
                while (SIMPLEQ_EMPTY(&vs->vs_egress)) {
                        err = cv_wait_sig(&vs->vs_sample_cv,
                                          &vs->vs_lock);
                        if (err != 0) {
                                mutex_exit(&vs->vs_lock);
                                return EINTR;
                        }
                }

                vb = SIMPLEQ_FIRST(&vs->vs_egress);
        } else {
                vb = SIMPLEQ_FIRST(&vs->vs_egress);
        }

        /* Oops, empty sample buffer. */
        if (vb->vb_buf->bytesused == 0) {
                vb = video_stream_dequeue(vs);
                video_stream_enqueue(vs, vb);
                vs->vs_bytesread = 0;
                goto retry;
        }

        mutex_exit(&vs->vs_lock);

        len = uimin(uio->uio_resid, vb->vb_buf->bytesused - vs->vs_bytesread);
        offset = vb->vb_buf->m.offset + vs->vs_bytesread;

        if (scatter_io_init(&vs->vs_data, offset, len, &sio)) {
                err = scatter_io_uiomove(&sio, uio);
                if (err == EFAULT)
                        return EFAULT;
                vs->vs_bytesread += (len - sio.sio_resid);
        } else {
                DPRINTF(("video: invalid read\n"));
        }

        /* Move the sample to the ingress queue if everything has
         * been read */
        if (vs->vs_bytesread >= vb->vb_buf->bytesused) {
                mutex_enter(&vs->vs_lock);
                vb = video_stream_dequeue(vs);
                video_stream_enqueue(vs, vb);
                mutex_exit(&vs->vs_lock);

                vs->vs_bytesread = 0;
        }

        return 0;
}


int
videowrite(dev_t dev, struct uio *uio, int ioflag)
{
        return ENXIO;
}


/*
 * Before 64-bit time_t, timeval's tv_sec was 'long'.  Thus on LP64 ports
 * v4l2_buffer is the same size and layout as before.  However it did change
 * on LP32 ports, and we thus handle this difference here for "COMPAT_50".
 */

#ifndef _LP64
static void
buf50tobuf(const void *data, struct v4l2_buffer *buf)
{
        const struct v4l2_buffer50 *b50 = data;

        buf->index = b50->index;
        buf->type = b50->type;
        buf->bytesused = b50->bytesused;
        buf->flags = b50->flags;
        buf->field = b50->field;
        timeval50_to_timeval(&b50->timestamp, &buf->timestamp);
        buf->timecode = b50->timecode;
        buf->sequence = b50->sequence;
        buf->memory = b50->memory;
        buf->m.offset = b50->m.offset;
        /* XXX: Handle userptr */
        buf->length = b50->length;
        buf->reserved2 = b50->reserved2;
        buf->reserved = b50->reserved;
}

static void
buftobuf50(void *data, const struct v4l2_buffer *buf)
{
        struct v4l2_buffer50 *b50 = data;

        b50->index = buf->index;
        b50->type = buf->type;
        b50->bytesused = buf->bytesused;
        b50->flags = buf->flags;
        b50->field = buf->field;
        timeval_to_timeval50(&buf->timestamp, &b50->timestamp);
        b50->timecode = buf->timecode;
        b50->sequence = buf->sequence;
        b50->memory = buf->memory;
        b50->m.offset = buf->m.offset;
        /* XXX: Handle userptr */
        b50->length = buf->length;
        b50->reserved2 = buf->reserved2;
        b50->reserved = buf->reserved;
}
#endif

int
videoioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        struct video_softc *sc;
        const struct video_hw_if *hw;
        struct v4l2_capability *cap;
        struct v4l2_fmtdesc *fmtdesc;
        struct v4l2_format *fmt;
        struct v4l2_standard *std;
        struct v4l2_input *input;
        struct v4l2_audio *audio;
        struct v4l2_tuner *tuner;
        struct v4l2_frequency *freq;
        struct v4l2_control *control;
        struct v4l2_queryctrl *query;
        struct v4l2_requestbuffers *reqbufs;
        struct v4l2_buffer *buf;
        struct v4l2_streamparm *parm;
        struct v4l2_frmsizeenum *size;
        struct v4l2_frmivalenum *ival;
        v4l2_std_id *stdid;
        enum v4l2_buf_type *typep;
        int *ip;
#ifndef _LP64
        struct v4l2_buffer bufspace;
        int error;
#endif

        sc = device_private(device_lookup(&video_cd, VIDEOUNIT(dev)));

        if (sc->sc_dying)
                return EIO;

        hw = sc->hw_if;
        if (hw == NULL)
                return ENXIO;

        switch (cmd) {
        case VIDIOC_QUERYCAP:
                cap = data;
                memset(cap, 0, sizeof(*cap));
                strlcpy(cap->driver,
                        device_cfdriver(sc->hw_dev)->cd_name,
                        sizeof(cap->driver));
                strlcpy(cap->card, hw->get_devname(sc->hw_softc),
                        sizeof(cap->card));
                strlcpy(cap->bus_info, hw->get_businfo(sc->hw_softc),
                        sizeof(cap->bus_info));
                cap->version = VIDEO_DRIVER_VERSION;
                cap->capabilities = 0;
                if (hw->start_transfer != NULL && hw->stop_transfer != NULL)
                        cap->capabilities |= V4L2_CAP_VIDEO_CAPTURE |
                            V4L2_CAP_READWRITE | V4L2_CAP_STREAMING;
                if (hw->set_tuner != NULL && hw->get_tuner != NULL)
                        cap->capabilities |= V4L2_CAP_TUNER;
                if (hw->set_audio != NULL && hw->get_audio != NULL &&
                    hw->enum_audio != NULL)
                        cap->capabilities |= V4L2_CAP_AUDIO;
                return 0;
        case VIDIOC_ENUM_FMT:
                /* TODO: for now, just enumerate one default format */
                fmtdesc = data;
                if (fmtdesc->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
                        return EINVAL;
                return video_enum_format(sc, fmtdesc);
        case VIDIOC_G_FMT:
                fmt = data;
                return video_get_format(sc, fmt);
        case VIDIOC_S_FMT:
                fmt = data;
                if ((flag & FWRITE) == 0)
                        return EPERM;
                return video_set_format(sc, fmt);
        case VIDIOC_TRY_FMT:
                fmt = data;
                return video_try_format(sc, fmt);
        case VIDIOC_G_PARM:
                parm = data;
                return video_get_parm(sc, parm);
        case VIDIOC_S_PARM:
                parm = data;
                if ((flag & FWRITE) == 0)
                        return EPERM;
                return video_set_parm(sc, parm);
        case VIDIOC_ENUMSTD:
                std = data;
                return video_enum_standard(sc, std);
        case VIDIOC_G_STD:
                stdid = data;
                return video_get_standard(sc, stdid);
        case VIDIOC_S_STD:
                stdid = data;
                if ((flag & FWRITE) == 0)
                        return EPERM;
                return video_set_standard(sc, *stdid);
        case VIDIOC_ENUMINPUT:
                input = data;
                return video_enum_input(sc, input);
        case VIDIOC_G_INPUT:
                ip = data;
                return video_get_input(sc, ip);
        case VIDIOC_S_INPUT:
                ip = data;
                if ((flag & FWRITE) == 0)
                        return EPERM;
                return video_set_input(sc, *ip);
        case VIDIOC_ENUMAUDIO:
                audio = data;
                return video_enum_audio(sc, audio);
        case VIDIOC_G_AUDIO:
                audio = data;
                return video_get_audio(sc, audio);
        case VIDIOC_S_AUDIO:
                audio = data;
                if ((flag & FWRITE) == 0)
                        return EPERM;
                return video_set_audio(sc, audio);
        case VIDIOC_G_TUNER:
                tuner = data;
                return video_get_tuner(sc, tuner);
        case VIDIOC_S_TUNER:
                tuner = data;
                if ((flag & FWRITE) == 0)
                        return EPERM;
                return video_set_tuner(sc, tuner);
        case VIDIOC_G_FREQUENCY:
                freq = data;
                return video_get_frequency(sc, freq);
        case VIDIOC_S_FREQUENCY:
                freq = data;
                if ((flag & FWRITE) == 0)
                        return EPERM;
                return video_set_frequency(sc, freq);
        case VIDIOC_QUERYCTRL:
                query = data;
                return (video_query_control(sc, query));
        case VIDIOC_G_CTRL:
                control = data;
                return (video_get_control(sc, control));
        case VIDIOC_S_CTRL:
                control = data;
                if ((flag & FWRITE) == 0)
                        return EPERM;
                return (video_set_control(sc, control));
        case VIDIOC_REQBUFS:
                reqbufs = data;
                return (video_request_bufs(sc, reqbufs));
        case VIDIOC_QUERYBUF:
                buf = data;
                return video_query_buf(sc, buf);
#ifndef _LP64
        case VIDIOC_QUERYBUF50:
                buf50tobuf(data, buf = &bufspace);
                if ((error = video_query_buf(sc, buf)) != 0)
                        return error;
                buftobuf50(data, buf);
                return 0;
#endif
        case VIDIOC_QBUF:
                buf = data;
                return video_queue_buf(sc, buf);
#ifndef _LP64
        case VIDIOC_QBUF50:
                buf50tobuf(data, buf = &bufspace);
                return video_queue_buf(sc, buf);
#endif
        case VIDIOC_DQBUF:
                buf = data;
                return video_dequeue_buf(sc, buf);
#ifndef _LP64
        case VIDIOC_DQBUF50:
                buf50tobuf(data, buf = &bufspace);
                if ((error = video_dequeue_buf(sc, buf)) != 0)
                        return error;
                buftobuf50(data, buf);
                return 0;
#endif
        case VIDIOC_STREAMON:
                typep = data;
                return video_stream_on(sc, *typep);
        case VIDIOC_STREAMOFF:
                typep = data;
                return video_stream_off(sc, *typep);
        case VIDIOC_ENUM_FRAMESIZES:
                size = data;
                return video_enum_framesizes(sc, size);
        case VIDIOC_ENUM_FRAMEINTERVALS:
                ival = data;
                return video_enum_frameival(sc, ival);
        default:
                DPRINTF(("videoioctl: invalid cmd %s (%lx)\n",
                         video_ioctl_str(cmd), cmd));
                return EINVAL;
        }
}

#ifdef VIDEO_DEBUG
static const char *
video_ioctl_str(u_long cmd)
{
        const char *str;

        switch (cmd) {
        case VIDIOC_QUERYCAP:
                str = "VIDIOC_QUERYCAP";
                break;
        case VIDIOC_RESERVED:
                str = "VIDIOC_RESERVED";
                break;
        case VIDIOC_ENUM_FMT:
                str = "VIDIOC_ENUM_FMT";
                break;
        case VIDIOC_G_FMT:
                str = "VIDIOC_G_FMT";
                break;
        case VIDIOC_S_FMT:
                str = "VIDIOC_S_FMT";
                break;
/* 6 and 7 are VIDIOC_[SG]_COMP, which are unsupported */
        case VIDIOC_REQBUFS:
                str = "VIDIOC_REQBUFS";
                break;
        case VIDIOC_QUERYBUF:
                str = "VIDIOC_QUERYBUF";
                break;
#ifndef _LP64
        case VIDIOC_QUERYBUF50:
                str = "VIDIOC_QUERYBUF50";
                break;
#endif
        case VIDIOC_G_FBUF:
                str = "VIDIOC_G_FBUF";
                break;
        case VIDIOC_S_FBUF:
                str = "VIDIOC_S_FBUF";
                break;
        case VIDIOC_OVERLAY:
                str = "VIDIOC_OVERLAY";
                break;
        case VIDIOC_QBUF:
                str = "VIDIOC_QBUF";
                break;
#ifndef _LP64
        case VIDIOC_QBUF50:
                str = "VIDIOC_QBUF50";
                break;
#endif
        case VIDIOC_DQBUF:
                str = "VIDIOC_DQBUF";
                break;
#ifndef _LP64
        case VIDIOC_DQBUF50:
                str = "VIDIOC_DQBUF50";
                break;
#endif
        case VIDIOC_STREAMON:
                str = "VIDIOC_STREAMON";
                break;
        case VIDIOC_STREAMOFF:
                str = "VIDIOC_STREAMOFF";
                break;
        case VIDIOC_G_PARM:
                str = "VIDIOC_G_PARM";
                break;
        case VIDIOC_S_PARM:
                str = "VIDIOC_S_PARM";
                break;
        case VIDIOC_G_STD:
                str = "VIDIOC_G_STD";
                break;
        case VIDIOC_S_STD:
                str = "VIDIOC_S_STD";
                break;
        case VIDIOC_ENUMSTD:
                str = "VIDIOC_ENUMSTD";
                break;
        case VIDIOC_ENUMINPUT:
                str = "VIDIOC_ENUMINPUT";
                break;
        case VIDIOC_G_CTRL:
                str = "VIDIOC_G_CTRL";
                break;
        case VIDIOC_S_CTRL:
                str = "VIDIOC_S_CTRL";
                break;
        case VIDIOC_G_TUNER:
                str = "VIDIOC_G_TUNER";
                break;
        case VIDIOC_S_TUNER:
                str = "VIDIOC_S_TUNER";
                break;
        case VIDIOC_G_AUDIO:
                str = "VIDIOC_G_AUDIO";
                break;
        case VIDIOC_S_AUDIO:
                str = "VIDIOC_S_AUDIO";
                break;
        case VIDIOC_QUERYCTRL:
                str = "VIDIOC_QUERYCTRL";
                break;
        case VIDIOC_QUERYMENU:
                str = "VIDIOC_QUERYMENU";
                break;
        case VIDIOC_G_INPUT:
                str = "VIDIOC_G_INPUT";
                break;
        case VIDIOC_S_INPUT:
                str = "VIDIOC_S_INPUT";
                break;
        case VIDIOC_G_OUTPUT:
                str = "VIDIOC_G_OUTPUT";
                break;
        case VIDIOC_S_OUTPUT:
                str = "VIDIOC_S_OUTPUT";
                break;
        case VIDIOC_ENUMOUTPUT:
                str = "VIDIOC_ENUMOUTPUT";
                break;
        case VIDIOC_G_AUDOUT:
                str = "VIDIOC_G_AUDOUT";
                break;
        case VIDIOC_S_AUDOUT:
                str = "VIDIOC_S_AUDOUT";
                break;
        case VIDIOC_G_MODULATOR:
                str = "VIDIOC_G_MODULATOR";
                break;
        case VIDIOC_S_MODULATOR:
                str = "VIDIOC_S_MODULATOR";
                break;
        case VIDIOC_G_FREQUENCY:
                str = "VIDIOC_G_FREQUENCY";
                break;
        case VIDIOC_S_FREQUENCY:
                str = "VIDIOC_S_FREQUENCY";
                break;
        case VIDIOC_CROPCAP:
                str = "VIDIOC_CROPCAP";
                break;
        case VIDIOC_G_CROP:
                str = "VIDIOC_G_CROP";
                break;
        case VIDIOC_S_CROP:
                str = "VIDIOC_S_CROP";
                break;
        case VIDIOC_G_JPEGCOMP:
                str = "VIDIOC_G_JPEGCOMP";
                break;
        case VIDIOC_S_JPEGCOMP:
                str = "VIDIOC_S_JPEGCOMP";
                break;
        case VIDIOC_QUERYSTD:
                str = "VIDIOC_QUERYSTD";
                break;
        case VIDIOC_TRY_FMT:
                str = "VIDIOC_TRY_FMT";
                break;
        case VIDIOC_ENUMAUDIO:
                str = "VIDIOC_ENUMAUDIO";
                break;
        case VIDIOC_ENUMAUDOUT:
                str = "VIDIOC_ENUMAUDOUT";
                break;
        case VIDIOC_G_PRIORITY:
                str = "VIDIOC_G_PRIORITY";
                break;
        case VIDIOC_S_PRIORITY:
                str = "VIDIOC_S_PRIORITY";
                break;
        case VIDIOC_ENUM_FRAMESIZES:
                str = "VIDIOC_ENUM_FRAMESIZES";
                break;
        case VIDIOC_ENUM_FRAMEINTERVALS:
                str = "VIDIOC_FRAMEINTERVALS";
                break;
        default:
                str = "unknown";
                break;
        }
        return str;
}
#endif


int
videopoll(dev_t dev, int events, struct lwp *l)
{
        struct video_softc *sc;
        struct video_stream *vs;
        int err, revents = 0;

        sc = device_private(device_lookup(&video_cd, VIDEOUNIT(dev)));
        vs = &sc->sc_stream_in;

        if (sc->sc_dying)
                return (POLLHUP);

        /* userspace has chosen read() method */
        if (vs->vs_method == VIDEO_STREAM_METHOD_NONE) {
                err = video_stream_setup_bufs(vs,
                                              VIDEO_STREAM_METHOD_READ,
                                              VIDEO_NUM_BUFS);
                if (err != 0)
                        return POLLERR;

                err = video_stream_on(sc, vs->vs_type);
                if (err != 0)
                        return POLLERR;
        }

        mutex_enter(&vs->vs_lock);
        if (!SIMPLEQ_EMPTY(&sc->sc_stream_in.vs_egress))
                revents |= events & (POLLIN | POLLRDNORM);
        else
                selrecord(l, &vs->vs_sel);
        mutex_exit(&vs->vs_lock);

        return (revents);
}


paddr_t
videommap(dev_t dev, off_t off, int prot)
{
        struct video_softc *sc;
        struct video_stream *vs;
        /* paddr_t pa; */

        sc = device_lookup_private(&video_cd, VIDEOUNIT(dev));
        if (sc->sc_dying)
                return -1;

        vs = &sc->sc_stream_in;

        return scatter_buf_map(&vs->vs_data, off);
}


/* Allocates buffers and initializes some fields.  The format field
 * must already have been initialized. */
void
video_stream_init(struct video_stream *vs)
{
        vs->vs_method = VIDEO_STREAM_METHOD_NONE;
        vs->vs_flags = 0;
        vs->vs_frameno = -1;
        vs->vs_sequence = 0;
        vs->vs_type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
        vs->vs_nbufs = 0;
        vs->vs_buf = NULL;
        vs->vs_streaming = false;

        memset(&vs->vs_format, 0, sizeof(vs->vs_format));

        SIMPLEQ_INIT(&vs->vs_ingress);
        SIMPLEQ_INIT(&vs->vs_egress);

        mutex_init(&vs->vs_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&vs->vs_sample_cv, "video");
        selinit(&vs->vs_sel);

        scatter_buf_init(&vs->vs_data);
}

void
video_stream_fini(struct video_stream *vs)
{
        /* Sample data in queues has already been freed */
        /* while (SIMPLEQ_FIRST(&vs->vs_ingress) != NULL)
                SIMPLEQ_REMOVE_HEAD(&vs->vs_ingress, entries);
        while (SIMPLEQ_FIRST(&vs->vs_egress) != NULL)
        SIMPLEQ_REMOVE_HEAD(&vs->vs_egress, entries); */

        mutex_destroy(&vs->vs_lock);
        cv_destroy(&vs->vs_sample_cv);
        seldestroy(&vs->vs_sel);

        scatter_buf_destroy(&vs->vs_data);
}

static int
video_stream_setup_bufs(struct video_stream *vs,
                        enum video_stream_method method,
                        uint8_t nbufs)
{
        int i, err;

        mutex_enter(&vs->vs_lock);

        /* Ensure that all allocated buffers are queued and not under
         * userspace control. */
        for (i = 0; i < vs->vs_nbufs; ++i) {
                if (!(vs->vs_buf[i]->vb_buf->flags & V4L2_BUF_FLAG_QUEUED)) {
                        mutex_exit(&vs->vs_lock);
                        return EBUSY;
                }
        }

        /* Allocate the buffers */
        err = video_stream_realloc_bufs(vs, nbufs);
        if (err != 0) {
                mutex_exit(&vs->vs_lock);
                return err;
        }

        /* Queue up buffers for read method.  Other methods are queued
         * by VIDIOC_QBUF ioctl. */
        if (method == VIDEO_STREAM_METHOD_READ) {
                for (i = 0; i < nbufs; ++i)
                        if (!(vs->vs_buf[i]->vb_buf->flags & V4L2_BUF_FLAG_QUEUED))
                                video_stream_enqueue(vs, vs->vs_buf[i]);
        }

        vs->vs_method = method;
        mutex_exit(&vs->vs_lock);

        return 0;
}

/* Free all buffer memory in preparation for close().  This should
 * free buffers regardless of errors.  Use video_stream_setup_bufs if
 * you need to check for errors. Streaming should be off before
 * calling this function. */
static void
video_stream_teardown_bufs(struct video_stream *vs)
{
        int err;

        mutex_enter(&vs->vs_lock);

        if (vs->vs_streaming) {
                DPRINTF(("video_stream_teardown_bufs: "
                         "tearing down bufs while streaming\n"));
        }

        /* dequeue all buffers */
        while (SIMPLEQ_FIRST(&vs->vs_ingress) != NULL)
                SIMPLEQ_REMOVE_HEAD(&vs->vs_ingress, entries);
        while (SIMPLEQ_FIRST(&vs->vs_egress) != NULL)
                SIMPLEQ_REMOVE_HEAD(&vs->vs_egress, entries);

        err = video_stream_free_bufs(vs);
        if (err != 0) {
                DPRINTF(("video_stream_teardown_bufs: "
                         "error releasing buffers: %d\n",
                         err));
        }
        vs->vs_method = VIDEO_STREAM_METHOD_NONE;

        mutex_exit(&vs->vs_lock);
}

static struct video_buffer *
video_buffer_alloc(void)
{
        struct video_buffer *vb;

        vb = kmem_alloc(sizeof(*vb), KM_SLEEP);
        vb->vb_buf = kmem_alloc(sizeof(*vb->vb_buf), KM_SLEEP);
        return vb;
}

static void
video_buffer_free(struct video_buffer *vb)
{
        kmem_free(vb->vb_buf, sizeof(*vb->vb_buf));
        vb->vb_buf = NULL;
        kmem_free(vb, sizeof(*vb));
}

/* TODO: for userptr method
struct video_buffer *
video_buf_alloc_with_ubuf(struct v4l2_buffer *buf)
{
}

void
video_buffer_free_with_ubuf(struct video_buffer *vb)
{
}
*/

static int
video_stream_realloc_bufs(struct video_stream *vs, uint8_t nbufs)
{
        int i, err;
        uint8_t minnbufs, oldnbufs;
        size_t size;
        off_t offset;
        struct video_buffer **oldbuf;
        struct v4l2_buffer *buf;

        size = PAGE_ALIGN(vs->vs_format.sample_size) * nbufs;
        err = scatter_buf_set_size(&vs->vs_data, size);
        if (err != 0)
                return err;

        oldnbufs = vs->vs_nbufs;
        oldbuf = vs->vs_buf;

        vs->vs_nbufs = nbufs;
        if (nbufs > 0) {
                vs->vs_buf =
                    kmem_alloc(sizeof(struct video_buffer *) * nbufs, KM_SLEEP);
        } else {
                vs->vs_buf = NULL;
        }

        minnbufs = uimin(vs->vs_nbufs, oldnbufs);
        /* copy any bufs that will be reused */
        for (i = 0; i < minnbufs; ++i)
                vs->vs_buf[i] = oldbuf[i];
        /* allocate any necessary new bufs */
        for (; i < vs->vs_nbufs; ++i)
                vs->vs_buf[i] = video_buffer_alloc();
        /* free any bufs no longer used */
        for (; i < oldnbufs; ++i) {
                video_buffer_free(oldbuf[i]);
                oldbuf[i] = NULL;
        }

        /* Free old buffer metadata */
        if (oldbuf != NULL)
                kmem_free(oldbuf, sizeof(struct video_buffer *) * oldnbufs);

        /* initialize bufs */
        offset = 0;
        for (i = 0; i < vs->vs_nbufs; ++i) {
                buf = vs->vs_buf[i]->vb_buf;
                buf->index = i;
                buf->type = vs->vs_type;
                buf->bytesused = 0;
                buf->flags = 0;
                buf->field = 0;
                buf->sequence = 0;
                buf->memory = V4L2_MEMORY_MMAP;
                buf->m.offset = offset;
                buf->length = PAGE_ALIGN(vs->vs_format.sample_size);
                buf->reserved2 = 0;
                buf->reserved = 0;

                offset += buf->length;
        }

        return 0;
}

/* Accepts a video_sample into the ingress queue.  Caller must hold
 * the stream lock. */
void
video_stream_enqueue(struct video_stream *vs, struct video_buffer *vb)
{
        if (vb->vb_buf->flags & V4L2_BUF_FLAG_QUEUED) {
                DPRINTF(("video_stream_enqueue: sample already queued\n"));
                return;
        }

        vb->vb_buf->flags |= V4L2_BUF_FLAG_QUEUED;
        vb->vb_buf->flags &= ~V4L2_BUF_FLAG_DONE;

        vb->vb_buf->bytesused = 0;

        SIMPLEQ_INSERT_TAIL(&vs->vs_ingress, vb, entries);
}


/* Removes the head of the egress queue for use by userspace.  Caller
 * must hold the stream lock. */
struct video_buffer *
video_stream_dequeue(struct video_stream *vs)
{
        struct video_buffer *vb;

        if (!SIMPLEQ_EMPTY(&vs->vs_egress)) {
                vb = SIMPLEQ_FIRST(&vs->vs_egress);
                SIMPLEQ_REMOVE_HEAD(&vs->vs_egress, entries);
                vb->vb_buf->flags &= ~V4L2_BUF_FLAG_QUEUED;
                vb->vb_buf->flags |= V4L2_BUF_FLAG_DONE;
                return vb;
        } else {
                return NULL;
        }
}

static void
v4l2buf_set_timestamp(struct v4l2_buffer *buf)
{

        getmicrotime(&buf->timestamp);
}

/*
 * write payload data to the appropriate video sample, possibly moving
 * the sample from ingress to egress queues
 */
void
video_stream_write(struct video_stream *vs,
                   const struct video_payload *payload)
{
        struct video_buffer *vb;
        struct v4l2_buffer *buf;
        struct scatter_io sio;

        mutex_enter(&vs->vs_lock);

        /* change of frameno implies end of current frame */
        if (vs->vs_frameno >= 0 && vs->vs_frameno != payload->frameno)
                video_stream_sample_done(vs);

        vs->vs_frameno = payload->frameno;

        if (vs->vs_drop || SIMPLEQ_EMPTY(&vs->vs_ingress)) {
                /* DPRINTF(("video_stream_write: dropping sample %d\n",
                   vs->vs_sequence)); */
                vs->vs_drop = true;
        } else if (payload->size > 0) {
                vb = SIMPLEQ_FIRST(&vs->vs_ingress);
                buf = vb->vb_buf;
                if (!buf->bytesused)
                        v4l2buf_set_timestamp(buf);
                if (payload->size > buf->length - buf->bytesused) {
                        DPRINTF(("video_stream_write: "
                                 "payload would overflow\n"));
                } else if (scatter_io_init(&vs->vs_data,
                                           buf->m.offset + buf->bytesused,
                                           payload->size,
                                           &sio))
                {
                        scatter_io_copyin(&sio, payload->data);
                        buf->bytesused += (payload->size - sio.sio_resid);
                } else {
                        DPRINTF(("video_stream_write: failed to init scatter io "
                                 "vb=%p buf=%p "
                                 "buf->m.offset=%d buf->bytesused=%u "
                                 "payload->size=%zu\n",
                                 vb, buf,
                                 buf->m.offset, buf->bytesused, payload->size));
                }
        }

        /* if the payload marks it, we can do sample_done() early */
        if (payload->end_of_frame)
                video_stream_sample_done(vs);

        mutex_exit(&vs->vs_lock);
}


/* Moves the head of the ingress queue to the tail of the egress
 * queue, or resets drop status if we were dropping this sample.
 * Caller should hold the stream queue lock. */
void
video_stream_sample_done(struct video_stream *vs)
{
        struct video_buffer *vb;

        if (vs->vs_drop) {
                vs->vs_drop = false;
        } else if (!SIMPLEQ_EMPTY(&vs->vs_ingress)) {
                vb = SIMPLEQ_FIRST(&vs->vs_ingress);
                vb->vb_buf->sequence = vs->vs_sequence;
                SIMPLEQ_REMOVE_HEAD(&vs->vs_ingress, entries);

                SIMPLEQ_INSERT_TAIL(&vs->vs_egress, vb, entries);
                cv_signal(&vs->vs_sample_cv);
                selnotify(&vs->vs_sel, 0, 0);
        } else {
                DPRINTF(("video_stream_sample_done: no sample\n"));
        }

        vs->vs_frameno ^= 1;
        vs->vs_sequence++;
}

/* Check if all buffers are queued, i.e. none are under control of
 * userspace. */
/*
static bool
video_stream_all_queued(struct video_stream *vs)
{
}
*/


static void
scatter_buf_init(struct scatter_buf *sb)
{
        sb->sb_pool = pool_cache_init(PAGE_SIZE, 0, 0, 0,
                                      "video", NULL, IPL_VIDEO,
                                      NULL, NULL, NULL);
        sb->sb_size = 0;
        sb->sb_npages = 0;
        sb->sb_page_ary = NULL;
}

static void
scatter_buf_destroy(struct scatter_buf *sb)
{
        /* Do we need to return everything to the pool first? */
        scatter_buf_set_size(sb, 0);
        pool_cache_destroy(sb->sb_pool);
        sb->sb_pool = 0;
        sb->sb_npages = 0;
        sb->sb_page_ary = NULL;
}

/* Increase or decrease the size of the buffer */
static int
scatter_buf_set_size(struct scatter_buf *sb, size_t sz)
{
        int i;
        size_t npages, minpages, oldnpages;
        uint8_t **old_ary;

        npages = (sz >> PAGE_SHIFT) + ((sz & PAGE_MASK) > 0);

        if (sb->sb_npages == npages) {
                return 0;
        }

        oldnpages = sb->sb_npages;
        old_ary = sb->sb_page_ary;

        sb->sb_npages = npages;
        if (npages > 0) {
                sb->sb_page_ary =
                    kmem_alloc(sizeof(uint8_t *) * npages, KM_SLEEP);
        } else {
                sb->sb_page_ary = NULL;
        }

        minpages = uimin(npages, oldnpages);
        /* copy any pages that will be reused */
        for (i = 0; i < minpages; ++i)
                sb->sb_page_ary[i] = old_ary[i];
        /* allocate any new pages */
        for (; i < npages; ++i)
                sb->sb_page_ary[i] = pool_cache_get(sb->sb_pool, PR_WAITOK);
        /* return any pages no longer needed */
        for (; i < oldnpages; ++i)
                pool_cache_put(sb->sb_pool, old_ary[i]);

        if (old_ary != NULL)
                kmem_free(old_ary, sizeof(uint8_t *) * oldnpages);

        sb->sb_size = sb->sb_npages << PAGE_SHIFT;

        return 0;
}


static paddr_t
scatter_buf_map(struct scatter_buf *sb, off_t off)
{
        size_t pg;
        paddr_t pa;

        pg = off >> PAGE_SHIFT;

        if (pg >= sb->sb_npages)
                return -1;
        else if (!pmap_extract(pmap_kernel(), (vaddr_t)sb->sb_page_ary[pg], &pa))
                return -1;

        return atop(pa);
}

/* Initialize data for an io operation on a scatter buffer. Returns
 * true if the transfer is valid, or false if out of range. */
static bool
scatter_io_init(struct scatter_buf *sb,
                    off_t off, size_t len,
                    struct scatter_io *sio)
{
        if ((off + len) > sb->sb_size) {
                DPRINTF(("video: scatter_io_init failed: off=%" PRId64
                         " len=%zu sb->sb_size=%zu\n",
                         off, len, sb->sb_size));
                return false;
        }

        sio->sio_buf = sb;
        sio->sio_offset = off;
        sio->sio_resid = len;

        return true;
}

/* Store the pointer and size of the next contiguous segment.  Returns
 * true if the segment is valid, or false if all has been transferred.
 * Does not check for overflow. */
static bool
scatter_io_next(struct scatter_io *sio, void **p, size_t *sz)
{
        size_t pg, pgo;

        if (sio->sio_resid == 0)
                return false;

        pg = sio->sio_offset >> PAGE_SHIFT;
        pgo = sio->sio_offset & PAGE_MASK;

        *sz = uimin(PAGE_SIZE - pgo, sio->sio_resid);
        *p = sio->sio_buf->sb_page_ary[pg] + pgo;

        sio->sio_offset += *sz;
        sio->sio_resid -= *sz;

        return true;
}

/* Semi-undo of a failed segment copy.  Updates the scatter_io
 * struct to the previous values prior to a failed segment copy. */
static void
scatter_io_undo(struct scatter_io *sio, size_t sz)
{
        sio->sio_offset -= sz;
        sio->sio_resid += sz;
}

/* Copy data from src into the scatter_buf as described by io. */
static void
scatter_io_copyin(struct scatter_io *sio, const void *p)
{
        void *dst;
        const uint8_t *src = p;
        size_t sz;

        while(scatter_io_next(sio, &dst, &sz)) {
                memcpy(dst, src, sz);
                src += sz;
        }
}

/* --not used; commented to avoid compiler warnings--
static void
scatter_io_copyout(struct scatter_io *sio, void *p)
{
        void *src;
        uint8_t *dst = p;
        size_t sz;

        while(scatter_io_next(sio, &src, &sz)) {
                memcpy(dst, src, sz);
                dst += sz;
        }
}
*/

/* Performat a series of uiomove calls on a scatter buf.  Returns
 * EFAULT if uiomove EFAULTs on the first segment.  Otherwise, returns
 * an incomplete transfer but with no error. */
static int
scatter_io_uiomove(struct scatter_io *sio, struct uio *uio)
{
        void *p;
        size_t sz;
        bool first = true;
        int err;

        while(scatter_io_next(sio, &p, &sz)) {
                err = uiomove(p, sz, uio);
                if (err == EFAULT) {
                        scatter_io_undo(sio, sz);
                        if (first)
                                return EFAULT;
                        else
                                return 0;
                }
                first = false;
        }

        return 0;
}

#endif /* NVIDEO > 0 */









































































































































    2 











    2 





    2 

    2 






    2 































    2 

    2 






    2 
    2 

    2 
















    2 
    2 
    2 


    2 









    1 

    1 






    1 




    1 
    1 








    1 
    1 






    1 









    2 

    2 














    1 






































    2 






    1 






    1 

    1 





    1 


    1 

    1 


    1 



    1 




    1 




















    1 

    1 
    1 

    1 


    1 









    1 

    1 
    1 

    1 


    1 










    1 
    1 

    1 
    1 









    1 









    1 




    1 









    1 









    1 




    1 





























































































































































    1 

    1 
    1 



    1 














































































































































































































































































































































































































































































































































    1 

    2 






    2 

    2 











































    1 







    2 









    1 




    1 


    1 



    1 











    2 




    2 



    1 




    1 












































































































































































    1 




    1 



    1 
    1 




    1 



    1 













    1 








    1 










    1 






































































































































































































































































































































    2 

    2 






    1 
    1 

    1 






    1 
    1 

    1 






































































    1 
    1 

    1 
    1 

    1 










































































































































    1 
    1 

    1 












    1 


    1 





    1 

    1 












    1 


    1 















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
/*        $NetBSD: nvpair.c,v 1.11 2019/07/24 14:25:56 martin Exp $        */

/*-
 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
 *
 * Copyright (c) 2009-2013 The FreeBSD Foundation
 * Copyright (c) 2013-2015 Mariusz Zaborski <oshogbo@FreeBSD.org>
 * All rights reserved.
 *
 * This software was developed by Pawel Jakub Dawidek under sponsorship from
 * the FreeBSD Foundation.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
#ifdef __FreeBSD__
__FBSDID("$FreeBSD: head/sys/contrib/libnv/nvpair.c 335382 2018-06-19 18:43:02Z lwhsu $");
#else
__RCSID("$NetBSD: nvpair.c,v 1.11 2019/07/24 14:25:56 martin Exp $");
#endif

#include <sys/param.h>
#include <sys/endian.h>
#include <sys/queue.h>

#if defined(_KERNEL) || defined(_STANDALONE)

#include <sys/errno.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/systm.h>
#include <sys/kmem.h>

#ifdef __FreeBSD__
#include <machine/stdarg.h>
#endif

#else
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "common_impl.h"
#endif

#ifdef HAVE_PJDLOG
#include <pjdlog.h>
#endif

#ifdef __FreeBSD__
#include <sys/nv.h>
#else
#include "nv.h"
#endif

#include "nv_impl.h"
#include "nvlist_impl.h"
#include "nvpair_impl.h"

#ifndef        HAVE_PJDLOG
#if defined(_KERNEL) || defined(_STANDALONE)
#ifdef __FreeBSD__
#define        PJDLOG_ASSERT(...)                MPASS(__VA_ARGS__)
#else
#define        PJDLOG_ASSERT(...)                KASSERT(__VA_ARGS__)
#endif
#define        PJDLOG_RASSERT(expr, ...)        KASSERT(expr, (__VA_ARGS__))
#define        PJDLOG_ABORT(...)                panic(__VA_ARGS__)
#else
#ifndef __lint__
#include <assert.h>
#define        PJDLOG_ASSERT(...)                assert(__VA_ARGS__)
#define        PJDLOG_RASSERT(expr, ...)        assert(expr)
#define        PJDLOG_ABORT(...)                abort()
#else
#define        PJDLOG_ASSERT(...)        
#define        PJDLOG_RASSERT(expr, ...)
#define        PJDLOG_ABORT(...)
#endif
#endif
#endif

#define        NVPAIR_MAGIC        0x6e7670        /* "nvp" */
struct nvpair {
        int                 nvp_magic;
        char                *nvp_name;
        int                 nvp_type;
        uint64_t         nvp_data;
        size_t                 nvp_datasize;
        size_t                 nvp_nitems;        /* Used only for array types. */
        nvlist_t        *nvp_list;
        TAILQ_ENTRY(nvpair) nvp_next;
};

#define        NVPAIR_ASSERT(nvp)        do {                                        \
        PJDLOG_ASSERT((nvp) != NULL);                                        \
        PJDLOG_ASSERT((nvp)->nvp_magic == NVPAIR_MAGIC);                \
} while (/*CONSTCOND*/0)

struct nvpair_header {
        uint8_t                nvph_type;
        uint16_t        nvph_namesize;
        uint64_t        nvph_datasize;
        uint64_t        nvph_nitems;
} __packed;


void
nvpair_assert(const nvpair_t *nvp)
{

        NVPAIR_ASSERT(nvp);
}

static nvpair_t *
nvpair_allocv(const char *name, int type, uint64_t data, size_t datasize,
    size_t nitems)
{
        nvpair_t *nvp;
        size_t namelen;

        PJDLOG_ASSERT(type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST);

        namelen = strlen(name);
        if (namelen >= NV_NAME_MAX) {
                ERRNO_SET(ENAMETOOLONG);
                return (NULL);
        }

        nvp = nv_calloc(1, sizeof(*nvp) + namelen + 1);
        if (nvp != NULL) {
                nvp->nvp_name = (char *)(nvp + 1);
                memcpy(nvp->nvp_name, name, namelen);
                nvp->nvp_name[namelen] = '\0';
                nvp->nvp_type = type;
                nvp->nvp_data = data;
                nvp->nvp_datasize = datasize;
                nvp->nvp_nitems = nitems;
                nvp->nvp_magic = NVPAIR_MAGIC;
        }

        return (nvp);
}

static int
nvpair_append(nvpair_t *nvp, const void *value, size_t valsize, size_t datasize)
{
        void *olddata, *data, *valp;
        size_t oldlen;

        oldlen = nvp->nvp_nitems * valsize;
        olddata = (void *)(uintptr_t)nvp->nvp_data;
        data = nv_realloc(olddata, oldlen + valsize);
        if (data == NULL) {
                ERRNO_SET(ENOMEM);
                return (-1);
        }
        valp = (unsigned char *)data + oldlen;
        memcpy(valp, value, valsize);

        nvp->nvp_data = (uint64_t)(uintptr_t)data;
        nvp->nvp_datasize += datasize;
        nvp->nvp_nitems++;
        return (0);
}

nvlist_t *
nvpair_nvlist(const nvpair_t *nvp)
{

        NVPAIR_ASSERT(nvp);

        return (nvp->nvp_list);
}

nvpair_t *
nvpair_next(const nvpair_t *nvp)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_list != NULL);

        return (TAILQ_NEXT(nvp, nvp_next));
}

nvpair_t *
nvpair_prev(const nvpair_t *nvp)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_list != NULL);

        return (TAILQ_PREV(nvp, nvl_head, nvp_next));
}

void
nvpair_insert(struct nvl_head *head, nvpair_t *nvp, nvlist_t *nvl)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_list == NULL);
        PJDLOG_ASSERT((nvlist_flags(nvl) & NV_FLAG_NO_UNIQUE) != 0 ||
            !nvlist_exists(nvl, nvpair_name(nvp)));

        TAILQ_INSERT_TAIL(head, nvp, nvp_next);
        nvp->nvp_list = nvl;
}

static void
nvpair_remove_nvlist(nvpair_t *nvp)
{
        nvlist_t *nvl;

        /* XXX: DECONST is bad, mkay? */
        nvl = __DECONST(nvlist_t *, nvpair_get_nvlist(nvp));
        PJDLOG_ASSERT(nvl != NULL);
        nvlist_set_parent(nvl, NULL);
}

static void
nvpair_remove_nvlist_array(nvpair_t *nvp)
{
        nvlist_t **nvlarray;
        size_t count, i;

        /* XXX: DECONST is bad, mkay? */
        nvlarray = __DECONST(nvlist_t **,
            nvpair_get_nvlist_array(nvp, &count));
        for (i = 0; i < count; i++) {
                nvlist_set_array_next(nvlarray[i], NULL);
                nvlist_set_parent(nvlarray[i], NULL);
        }
}

void
nvpair_remove(struct nvl_head *head, nvpair_t *nvp, const nvlist_t *nvl)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_list == nvl);

        if (nvpair_type(nvp) == NV_TYPE_NVLIST)
                nvpair_remove_nvlist(nvp);
        else if (nvpair_type(nvp) == NV_TYPE_NVLIST_ARRAY)
                nvpair_remove_nvlist_array(nvp);

        TAILQ_REMOVE(head, nvp, nvp_next);
        nvp->nvp_list = NULL;
}

nvpair_t *
nvpair_clone(const nvpair_t *nvp)
{
        nvpair_t *newnvp;
        const char *name;
        const void *data;
        size_t datasize;

        NVPAIR_ASSERT(nvp);

        name = nvpair_name(nvp);

        switch (nvpair_type(nvp)) {
        case NV_TYPE_NULL:
                newnvp = nvpair_create_null(name);
                break;
        case NV_TYPE_BOOL:
                newnvp = nvpair_create_bool(name, nvpair_get_bool(nvp));
                break;
        case NV_TYPE_NUMBER:
                newnvp = nvpair_create_number(name, nvpair_get_number(nvp));
                break;
        case NV_TYPE_STRING:
                newnvp = nvpair_create_string(name, nvpair_get_string(nvp));
                break;
        case NV_TYPE_NVLIST:
                newnvp = nvpair_create_nvlist(name, nvpair_get_nvlist(nvp));
                break;
        case NV_TYPE_BINARY:
                data = nvpair_get_binary(nvp, &datasize);
                newnvp = nvpair_create_binary(name, data, datasize);
                break;
        case NV_TYPE_BOOL_ARRAY:
                data = nvpair_get_bool_array(nvp, &datasize);
                newnvp = nvpair_create_bool_array(name, data, datasize);
                break;
        case NV_TYPE_NUMBER_ARRAY:
                data = nvpair_get_number_array(nvp, &datasize);
                newnvp = nvpair_create_number_array(name, data, datasize);
                break;
        case NV_TYPE_STRING_ARRAY:
                data = nvpair_get_string_array(nvp, &datasize);
                newnvp = nvpair_create_string_array(name, data, datasize);
                break;
        case NV_TYPE_NVLIST_ARRAY:
                data = nvpair_get_nvlist_array(nvp, &datasize);
                newnvp = nvpair_create_nvlist_array(name, data, datasize);
                break;
#if !defined(_KERNEL) && !defined(_STANDALONE)
        case NV_TYPE_DESCRIPTOR:
                newnvp = nvpair_create_descriptor(name,
                    nvpair_get_descriptor(nvp));
                break;
        case NV_TYPE_DESCRIPTOR_ARRAY:
                data = nvpair_get_descriptor_array(nvp, &datasize);
                newnvp = nvpair_create_descriptor_array(name, data, datasize);
                break;
#endif
        default:
                PJDLOG_ABORT("Unknown type: %d.", nvpair_type(nvp));
        }

        return (newnvp);
}

size_t
nvpair_header_size(void)
{

        return (sizeof(struct nvpair_header));
}

size_t
nvpair_size(const nvpair_t *nvp)
{

        NVPAIR_ASSERT(nvp);

        return (nvp->nvp_datasize);
}

unsigned char *
nvpair_pack_header(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp)
{
        struct nvpair_header nvphdr;
        size_t namesize;

        NVPAIR_ASSERT(nvp);

        nvphdr.nvph_type = nvp->nvp_type;
        namesize = strlen(nvp->nvp_name) + 1;
        PJDLOG_ASSERT(namesize > 0 && namesize <= UINT16_MAX);
        nvphdr.nvph_namesize = namesize;
        nvphdr.nvph_datasize = nvp->nvp_datasize;
        nvphdr.nvph_nitems = nvp->nvp_nitems;
        PJDLOG_ASSERT(*leftp >= sizeof(nvphdr));
        memcpy(ptr, &nvphdr, sizeof(nvphdr));
        ptr += sizeof(nvphdr);
        *leftp -= sizeof(nvphdr);

        PJDLOG_ASSERT(*leftp >= namesize);
        memcpy(ptr, nvp->nvp_name, namesize);
        ptr += namesize;
        *leftp -= namesize;

        return (ptr);
}

unsigned char *
nvpair_pack_null(const nvpair_t *nvp, unsigned char *ptr,
    size_t *leftp __unused)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NULL);

        return (ptr);
}

unsigned char *
nvpair_pack_bool(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp)
{
        uint8_t value;

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BOOL);

        value = (uint8_t)nvp->nvp_data;

        PJDLOG_ASSERT(*leftp >= sizeof(value));
        memcpy(ptr, &value, sizeof(value));
        ptr += sizeof(value);
        *leftp -= sizeof(value);

        return (ptr);
}

unsigned char *
nvpair_pack_number(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp)
{
        uint64_t value;

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NUMBER);

        value = (uint64_t)nvp->nvp_data;

        PJDLOG_ASSERT(*leftp >= sizeof(value));
        memcpy(ptr, &value, sizeof(value));
        ptr += sizeof(value);
        *leftp -= sizeof(value);

        return (ptr);
}

unsigned char *
nvpair_pack_string(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_STRING);

        PJDLOG_ASSERT(*leftp >= nvp->nvp_datasize);
        memcpy(ptr, (const void *)(intptr_t)nvp->nvp_data, nvp->nvp_datasize);
        ptr += nvp->nvp_datasize;
        *leftp -= nvp->nvp_datasize;

        return (ptr);
}

unsigned char *
nvpair_pack_nvlist_up(unsigned char *ptr, size_t *leftp)
{
        struct nvpair_header nvphdr;
        size_t namesize;
        const char *name = "";

        namesize = 1;
        nvphdr.nvph_type = NV_TYPE_NVLIST_UP;
        nvphdr.nvph_namesize = namesize;
        nvphdr.nvph_datasize = 0;
        nvphdr.nvph_nitems = 0;
        PJDLOG_ASSERT(*leftp >= sizeof(nvphdr));
        memcpy(ptr, &nvphdr, sizeof(nvphdr));
        ptr += sizeof(nvphdr);
        *leftp -= sizeof(nvphdr);

        PJDLOG_ASSERT(*leftp >= namesize);
        memcpy(ptr, name, namesize);
        ptr += namesize;
        *leftp -= namesize;

        return (ptr);
}

unsigned char *
nvpair_pack_nvlist_array_next(unsigned char *ptr, size_t *leftp)
{
        struct nvpair_header nvphdr;
        size_t namesize;
        const char *name = "";

        namesize = 1;
        nvphdr.nvph_type = NV_TYPE_NVLIST_ARRAY_NEXT;
        nvphdr.nvph_namesize = namesize;
        nvphdr.nvph_datasize = 0;
        nvphdr.nvph_nitems = 0;
        PJDLOG_ASSERT(*leftp >= sizeof(nvphdr));
        memcpy(ptr, &nvphdr, sizeof(nvphdr));
        ptr += sizeof(nvphdr);
        *leftp -= sizeof(nvphdr);

        PJDLOG_ASSERT(*leftp >= namesize);
        memcpy(ptr, name, namesize);
        ptr += namesize;
        *leftp -= namesize;

        return (ptr);
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
unsigned char *
nvpair_pack_descriptor(const nvpair_t *nvp, unsigned char *ptr, int64_t *fdidxp,
    size_t *leftp)
{
        int64_t value;

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR);

        value = (int64_t)nvp->nvp_data;
        if (value != -1) {
                /*
                 * If there is a real descriptor here, we change its number
                 * to position in the array of descriptors send via control
                 * message.
                 */
                PJDLOG_ASSERT(fdidxp != NULL);

                value = *fdidxp;
                (*fdidxp)++;
        }

        PJDLOG_ASSERT(*leftp >= sizeof(value));
        memcpy(ptr, &value, sizeof(value));
        ptr += sizeof(value);
        *leftp -= sizeof(value);

        return (ptr);
}
#endif

unsigned char *
nvpair_pack_binary(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BINARY);

        PJDLOG_ASSERT(*leftp >= nvp->nvp_datasize);
        memcpy(ptr, (const void *)(intptr_t)nvp->nvp_data, nvp->nvp_datasize);
        ptr += nvp->nvp_datasize;
        *leftp -= nvp->nvp_datasize;

        return (ptr);
}

unsigned char *
nvpair_pack_bool_array(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BOOL_ARRAY);
        PJDLOG_ASSERT(*leftp >= nvp->nvp_datasize);

        memcpy(ptr, (const void *)(intptr_t)nvp->nvp_data, nvp->nvp_datasize);
        ptr += nvp->nvp_datasize;
        *leftp -= nvp->nvp_datasize;

        return (ptr);
}

unsigned char *
nvpair_pack_number_array(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NUMBER_ARRAY);
        PJDLOG_ASSERT(*leftp >= nvp->nvp_datasize);

        memcpy(ptr, (const void *)(intptr_t)nvp->nvp_data, nvp->nvp_datasize);
        ptr += nvp->nvp_datasize;
        *leftp -= nvp->nvp_datasize;

        return (ptr);
}

unsigned char *
nvpair_pack_string_array(const nvpair_t *nvp, unsigned char *ptr, size_t *leftp)
{
        unsigned int ii;
        size_t size, len;
        const char * const *array;

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_STRING_ARRAY);
        PJDLOG_ASSERT(*leftp >= nvp->nvp_datasize);

        size = 0;
        array = nvpair_get_string_array(nvp, NULL);
        PJDLOG_ASSERT(array != NULL);

        for (ii = 0; ii < nvp->nvp_nitems; ii++) {
                len = strlen(array[ii]) + 1;
                PJDLOG_ASSERT(*leftp >= len);

                memcpy(ptr, (const void *)array[ii], len);
                size += len;
                ptr += len;
                *leftp -= len;
        }

        PJDLOG_ASSERT(size == nvp->nvp_datasize);

        return (ptr);
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
unsigned char *
nvpair_pack_descriptor_array(const nvpair_t *nvp, unsigned char *ptr,
    int64_t *fdidxp, size_t *leftp)
{
        int64_t value;
        const int *array;
        unsigned int ii;

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR_ARRAY);
        PJDLOG_ASSERT(*leftp >= nvp->nvp_datasize);

        array = nvpair_get_descriptor_array(nvp, NULL);
        PJDLOG_ASSERT(array != NULL);

        for (ii = 0; ii < nvp->nvp_nitems; ii++) {
                PJDLOG_ASSERT(*leftp >= sizeof(value));

                value = array[ii];
                if (value != -1) {
                        /*
                         * If there is a real descriptor here, we change its
                         * number to position in the array of descriptors send
                         * via control message.
                         */
                        PJDLOG_ASSERT(fdidxp != NULL);

                        value = *fdidxp;
                        (*fdidxp)++;
                }
                memcpy(ptr, &value, sizeof(value));
                ptr += sizeof(value);
                *leftp -= sizeof(value);
        }

        return (ptr);
}
#endif

void
nvpair_init_datasize(nvpair_t *nvp)
{

        NVPAIR_ASSERT(nvp);

        if (nvp->nvp_type == NV_TYPE_NVLIST) {
                if (nvp->nvp_data == 0) {
                        nvp->nvp_datasize = 0;
                } else {
                        nvp->nvp_datasize =
                            nvlist_size((const nvlist_t *)(intptr_t)nvp->nvp_data);
                }
        }
}

const unsigned char *
nvpair_unpack_header(bool isbe, nvpair_t *nvp, const unsigned char *ptr,
    size_t *leftp)
{
        struct nvpair_header nvphdr;

        if (*leftp < sizeof(nvphdr))
                goto fail;

        memcpy(&nvphdr, ptr, sizeof(nvphdr));
        ptr += sizeof(nvphdr);
        *leftp -= sizeof(nvphdr);

#if NV_TYPE_FIRST > 0
        if (nvphdr.nvph_type < NV_TYPE_FIRST)
                goto fail;
#endif
        if (nvphdr.nvph_type > NV_TYPE_LAST &&
            nvphdr.nvph_type != NV_TYPE_NVLIST_UP &&
            nvphdr.nvph_type != NV_TYPE_NVLIST_ARRAY_NEXT) {
                goto fail;
        }

#if BYTE_ORDER == BIG_ENDIAN
        if (!isbe) {
                nvphdr.nvph_namesize = le16toh(nvphdr.nvph_namesize);
                nvphdr.nvph_datasize = le64toh(nvphdr.nvph_datasize);
        }
#else
        if (isbe) {
                nvphdr.nvph_namesize = be16toh(nvphdr.nvph_namesize);
                nvphdr.nvph_datasize = be64toh(nvphdr.nvph_datasize);
        }
#endif

        if (nvphdr.nvph_namesize > NV_NAME_MAX)
                goto fail;
        if (*leftp < nvphdr.nvph_namesize)
                goto fail;
        if (nvphdr.nvph_namesize < 1)
                goto fail;
        if (strnlen((const char *)ptr, nvphdr.nvph_namesize) !=
            (size_t)(nvphdr.nvph_namesize - 1)) {
                goto fail;
        }

        memcpy(nvp->nvp_name, ptr, nvphdr.nvph_namesize);
        ptr += nvphdr.nvph_namesize;
        *leftp -= nvphdr.nvph_namesize;

        if (*leftp < nvphdr.nvph_datasize)
                goto fail;

        nvp->nvp_type = nvphdr.nvph_type;
        nvp->nvp_data = 0;
        nvp->nvp_datasize = nvphdr.nvph_datasize;
        nvp->nvp_nitems = nvphdr.nvph_nitems;

        return (ptr);
fail:
        ERRNO_SET(EINVAL);
        return (NULL);
}

const unsigned char *
nvpair_unpack_null(bool isbe __unused, nvpair_t *nvp, const unsigned char *ptr,
    size_t *leftp __unused)
{

        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NULL);

        if (nvp->nvp_datasize != 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        return (ptr);
}

const unsigned char *
nvpair_unpack_bool(bool isbe __unused, nvpair_t *nvp, const unsigned char *ptr,
    size_t *leftp)
{
        uint8_t value;

        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BOOL);

        if (nvp->nvp_datasize != sizeof(value)) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }
        if (*leftp < sizeof(value)) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        memcpy(&value, ptr, sizeof(value));
        ptr += sizeof(value);
        *leftp -= sizeof(value);

        if (value != 0 && value != 1) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        nvp->nvp_data = (uint64_t)value;

        return (ptr);
}

const unsigned char *
nvpair_unpack_number(bool isbe, nvpair_t *nvp, const unsigned char *ptr,
     size_t *leftp)
{

        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NUMBER);

        if (nvp->nvp_datasize != sizeof(uint64_t)) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }
        if (*leftp < sizeof(uint64_t)) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        if (isbe)
                nvp->nvp_data = be64dec(ptr);
        else
                nvp->nvp_data = le64dec(ptr);

        ptr += sizeof(uint64_t);
        *leftp -= sizeof(uint64_t);

        return (ptr);
}

const unsigned char *
nvpair_unpack_string(bool isbe __unused, nvpair_t *nvp,
    const unsigned char *ptr, size_t *leftp)
{

        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_STRING);

        if (*leftp < nvp->nvp_datasize || nvp->nvp_datasize == 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        if (strnlen((const char *)ptr, nvp->nvp_datasize) !=
            nvp->nvp_datasize - 1) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        nvp->nvp_data = (uint64_t)(uintptr_t)nv_strdup((const char *)ptr);
        if (nvp->nvp_data == 0)
                return (NULL);

        ptr += nvp->nvp_datasize;
        *leftp -= nvp->nvp_datasize;

        return (ptr);
}

const unsigned char *
nvpair_unpack_nvlist(bool isbe __unused, nvpair_t *nvp,
    const unsigned char *ptr, size_t *leftp, size_t nfds, nvlist_t **child)
{
        nvlist_t *value;

        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NVLIST);

        if (*leftp < nvp->nvp_datasize || nvp->nvp_datasize == 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        value = nvlist_create(0);
        if (value == NULL)
                return (NULL);

        ptr = nvlist_unpack_header(value, ptr, nfds, NULL, leftp);
        if (ptr == NULL)
                return (NULL);

        nvp->nvp_data = (uint64_t)(uintptr_t)value;
        *child = value;

        return (ptr);
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
const unsigned char *
nvpair_unpack_descriptor(bool isbe, nvpair_t *nvp, const unsigned char *ptr,
    size_t *leftp, const int *fds, size_t nfds)
{
        int64_t idx;

        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR);

        if (nvp->nvp_datasize != sizeof(idx)) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }
        if (*leftp < sizeof(idx)) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        if (isbe)
                idx = be64dec(ptr);
        else
                idx = le64dec(ptr);

        if (idx < 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        if ((size_t)idx >= nfds) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        nvp->nvp_data = (uint64_t)fds[idx];

        ptr += sizeof(idx);
        *leftp -= sizeof(idx);

        return (ptr);
}
#endif

const unsigned char *
nvpair_unpack_binary(bool isbe __unused, nvpair_t *nvp,
    const unsigned char *ptr, size_t *leftp)
{
        void *value;

        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BINARY);

        if (*leftp < nvp->nvp_datasize || nvp->nvp_datasize == 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        value = nv_malloc(nvp->nvp_datasize);
        if (value == NULL)
                return (NULL);

        memcpy(value, ptr, nvp->nvp_datasize);
        ptr += nvp->nvp_datasize;
        *leftp -= nvp->nvp_datasize;

        nvp->nvp_data = (uint64_t)(uintptr_t)value;

        return (ptr);
}

const unsigned char *
nvpair_unpack_bool_array(bool isbe __unused, nvpair_t *nvp,
    const unsigned char *ptr, size_t *leftp)
{
        uint8_t *value;
        size_t size;
        unsigned int i;

        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BOOL_ARRAY);

        size = sizeof(*value) * nvp->nvp_nitems;
        if (nvp->nvp_datasize != size || *leftp < size ||
            nvp->nvp_nitems == 0 || size < nvp->nvp_nitems) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        value = nv_malloc(size);
        if (value == NULL)
                return (NULL);

        for (i = 0; i < nvp->nvp_nitems; i++) {
                value[i] = *(const uint8_t *)ptr;

                ptr += sizeof(*value);
                *leftp -= sizeof(*value);
        }

        nvp->nvp_data = (uint64_t)(uintptr_t)value;

        return (ptr);
}

const unsigned char *
nvpair_unpack_number_array(bool isbe, nvpair_t *nvp, const unsigned char *ptr,
     size_t *leftp)
{
        uint64_t *value;
        size_t size;
        unsigned int i;

        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NUMBER_ARRAY);

        size = sizeof(*value) * nvp->nvp_nitems;
        if (nvp->nvp_datasize != size || *leftp < size ||
            nvp->nvp_nitems == 0 || size < nvp->nvp_nitems) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        value = nv_malloc(size);
        if (value == NULL)
                return (NULL);

        for (i = 0; i < nvp->nvp_nitems; i++) {
                if (isbe)
                        value[i] = be64dec(ptr);
                else
                        value[i] = le64dec(ptr);

                ptr += sizeof(*value);
                *leftp -= sizeof(*value);
        }

        nvp->nvp_data = (uint64_t)(uintptr_t)value;

        return (ptr);
}

const unsigned char *
nvpair_unpack_string_array(bool isbe __unused, nvpair_t *nvp,
    const unsigned char *ptr, size_t *leftp)
{
        ssize_t size;
        size_t len;
        const char *tmp;
        char **value;
        unsigned int ii, j;

        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_STRING_ARRAY);

        if (*leftp < nvp->nvp_datasize || nvp->nvp_datasize == 0 ||
            nvp->nvp_nitems == 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        size = nvp->nvp_datasize;
        tmp = (const char *)ptr;
        for (ii = 0; ii < nvp->nvp_nitems; ii++) {
                len = strnlen(tmp, size - 1) + 1;
                size -= len;
                if (size < 0) {
                        ERRNO_SET(EINVAL);
                        return (NULL);
                }
                tmp += len;
        }
        if (size != 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        value = nv_malloc(sizeof(*value) * nvp->nvp_nitems);
        if (value == NULL)
                return (NULL);

        for (ii = 0; ii < nvp->nvp_nitems; ii++) {
                value[ii] = nv_strdup((const char *)ptr);
                if (value[ii] == NULL)
                        goto out;
                len = strlen(value[ii]) + 1;
                ptr += len;
                *leftp -= len;
        }
        nvp->nvp_data = (uint64_t)(uintptr_t)value;

        return (ptr);
out:
        for (j = 0; j < ii; j++)
                nv_free(value[j]);
        nv_free(value);
        return (NULL);
}

#if !defined(_KERNEL) && !defined(_STANDALONE) && !defined(__NetBSD__)
const unsigned char *
nvpair_unpack_descriptor_array(bool isbe, nvpair_t *nvp,
    const unsigned char *ptr, size_t *leftp, const int *fds, size_t nfds)
{
        int64_t idx;
        size_t size;
        unsigned int ii;
        int *array;

        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR_ARRAY);

        size = sizeof(idx) * nvp->nvp_nitems;
        if (nvp->nvp_datasize != size || *leftp < size ||
            nvp->nvp_nitems == 0 || size < nvp->nvp_nitems) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        array = (int *)nv_malloc(size);
        if (array == NULL)
                return (NULL);

        for (ii = 0; ii < nvp->nvp_nitems; ii++) {
                if (isbe)
                        idx = be64dec(ptr);
                else
                        idx = le64dec(ptr);

                if (idx < 0) {
                        ERRNO_SET(EINVAL);
                        nv_free(array);
                        return (NULL);
                }

                if ((size_t)idx >= nfds) {
                        ERRNO_SET(EINVAL);
                        nv_free(array);
                        return (NULL);
                }

                array[ii] = (uint64_t)fds[idx];

                ptr += sizeof(idx);
                *leftp -= sizeof(idx);
        }

        nvp->nvp_data = (uint64_t)(uintptr_t)array;

        return (ptr);
}
#endif

const unsigned char *
nvpair_unpack_nvlist_array(bool isbe __unused, nvpair_t *nvp,
    const unsigned char *ptr, size_t *leftp, nvlist_t **firstel)
{
        nvlist_t **value;
        nvpair_t *tmpnvp;
        unsigned int ii, j;
        size_t sizeup;

        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NVLIST_ARRAY);

        sizeup = sizeof(struct nvpair_header) * nvp->nvp_nitems;
        if (nvp->nvp_nitems == 0 || sizeup < nvp->nvp_nitems ||
            sizeup > *leftp) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        value = nv_malloc(nvp->nvp_nitems * sizeof(*value));
        if (value == NULL)
                return (NULL);

        for (ii = 0; ii < nvp->nvp_nitems; ii++) {
                value[ii] = nvlist_create(0);
                if (value[ii] == NULL)
                        goto fail;
                if (ii > 0) {
                        tmpnvp = nvpair_allocv(" ", NV_TYPE_NVLIST,
                            (uint64_t)(uintptr_t)value[ii], 0, 0);
                        if (tmpnvp == NULL)
                                goto fail;
                        nvlist_set_array_next(value[ii - 1], tmpnvp);
                }
        }
        nvlist_set_flags(value[nvp->nvp_nitems - 1], NV_FLAG_IN_ARRAY);

        nvp->nvp_data = (uint64_t)(uintptr_t)value;
        *firstel = value[0];

        return (ptr);
fail:
        ERRNO_SAVE();
        for (j = 0; j <= ii; j++)
                nvlist_destroy(value[j]);
        nv_free(value);
        ERRNO_RESTORE();

        return (NULL);
}

const unsigned char *
nvpair_unpack(bool isbe, const unsigned char *ptr, size_t *leftp,
    nvpair_t **nvpp)
{
        nvpair_t *nvp, *tmp;

        nvp = nv_calloc(1, sizeof(*nvp) + NV_NAME_MAX);
        if (nvp == NULL)
                return (NULL);
        nvp->nvp_name = (char *)(nvp + 1);

        ptr = nvpair_unpack_header(isbe, nvp, ptr, leftp);
        if (ptr == NULL)
                goto fail;
        tmp = nv_realloc(nvp, sizeof(*nvp) + strlen(nvp->nvp_name) + 1);
        if (tmp == NULL)
                goto fail;
        nvp = tmp;

        /* Update nvp_name after realloc(). */
        nvp->nvp_name = (char *)(nvp + 1);
        nvp->nvp_data = 0x00;
        nvp->nvp_magic = NVPAIR_MAGIC;
        *nvpp = nvp;
        return (ptr);
fail:
        nv_free(nvp);
        return (NULL);
}

int
nvpair_type(const nvpair_t *nvp)
{

        NVPAIR_ASSERT(nvp);

        return (nvp->nvp_type);
}

const char *
nvpair_name(const nvpair_t *nvp)
{

        NVPAIR_ASSERT(nvp);

        return (nvp->nvp_name);
}

#if !defined(_STANDALONE)
nvpair_t *
nvpair_create_stringf(const char *name, const char *valuefmt, ...)
{
        va_list valueap;
        nvpair_t *nvp;

        va_start(valueap, valuefmt);
        nvp = nvpair_create_stringv(name, valuefmt, valueap);
        va_end(valueap);

        return (nvp);
}

nvpair_t *
nvpair_create_stringv(const char *name, const char *valuefmt, va_list valueap)
{
        nvpair_t *nvp;
        char *str;
        int len;

        len = vasprintf(&str, valuefmt, valueap);
        if (len < 0)
                return (NULL);
        nvp = nvpair_create_string(name, str);
        nv_kmem_free(str, len+1);
        return (nvp);
}
#endif

nvpair_t *
nvpair_create_null(const char *name)
{

        return (nvpair_allocv(name, NV_TYPE_NULL, 0, 0, 0));
}

nvpair_t *
nvpair_create_bool(const char *name, bool value)
{

        return (nvpair_allocv(name, NV_TYPE_BOOL, value ? 1 : 0,
            sizeof(uint8_t), 0));
}

nvpair_t *
nvpair_create_number(const char *name, uint64_t value)
{

        return (nvpair_allocv(name, NV_TYPE_NUMBER, value, sizeof(value), 0));
}

nvpair_t *
nvpair_create_string(const char *name, const char *value)
{
        nvpair_t *nvp;
        size_t size;
        char *data;

        if (value == NULL) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        data = nv_strdup(value);
        if (data == NULL)
                return (NULL);
        size = strlen(value) + 1;

        nvp = nvpair_allocv(name, NV_TYPE_STRING, (uint64_t)(uintptr_t)data,
            size, 0);
        if (nvp == NULL)
                nv_free(data);

        return (nvp);
}

nvpair_t *
nvpair_create_nvlist(const char *name, const nvlist_t *value)
{
        nvlist_t *nvl;
        nvpair_t *nvp;

        if (value == NULL) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        nvl = nvlist_clone(value);
        if (nvl == NULL)
                return (NULL);

        nvp = nvpair_allocv(name, NV_TYPE_NVLIST, (uint64_t)(uintptr_t)nvl, 0,
            0);
        if (nvp == NULL)
                nvlist_destroy(nvl);
        else
                nvlist_set_parent(nvl, nvp);

        return (nvp);
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
nvpair_t *
nvpair_create_descriptor(const char *name, int value)
{
        nvpair_t *nvp;

        if (value < 0 || !fd_is_valid(value)) {
                ERRNO_SET(EBADF);
                return (NULL);
        }

        value = fcntl(value, F_DUPFD_CLOEXEC, 0);
        if (value < 0)
                return (NULL);

        nvp = nvpair_allocv(name, NV_TYPE_DESCRIPTOR, (uint64_t)value,
            sizeof(int64_t), 0);
        if (nvp == NULL) {
                ERRNO_SAVE();
                close(value);
                ERRNO_RESTORE();
        }

        return (nvp);
}
#endif

nvpair_t *
nvpair_create_binary(const char *name, const void *value, size_t size)
{
        nvpair_t *nvp;
        void *data;

        if (value == NULL || size == 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        data = nv_malloc(size);
        if (data == NULL)
                return (NULL);
        memcpy(data, value, size);

        nvp = nvpair_allocv(name, NV_TYPE_BINARY, (uint64_t)(uintptr_t)data,
            size, 0);
        if (nvp == NULL)
                nv_free(data);

        return (nvp);
}

nvpair_t *
nvpair_create_bool_array(const char *name, const bool *value, size_t nitems)
{
        nvpair_t *nvp;
        size_t size;
        void *data;

        if (value == NULL || nitems == 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        size = sizeof(value[0]) * nitems;
        data = nv_malloc(size);
        if (data == NULL)
                return (NULL);

        memcpy(data, value, size);
        nvp = nvpair_allocv(name, NV_TYPE_BOOL_ARRAY, (uint64_t)(uintptr_t)data,
            size, nitems);
        if (nvp == NULL) {
                ERRNO_SAVE();
                nv_free(data);
                ERRNO_RESTORE();
        }

        return (nvp);
}

nvpair_t *
nvpair_create_number_array(const char *name, const uint64_t *value,
    size_t nitems)
{
        nvpair_t *nvp;
        size_t size;
        void *data;

        if (value == NULL || nitems == 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        size = sizeof(value[0]) * nitems;
        data = nv_malloc(size);
        if (data == NULL)
                return (NULL);

        memcpy(data, value, size);
        nvp = nvpair_allocv(name, NV_TYPE_NUMBER_ARRAY,
            (uint64_t)(uintptr_t)data, size, nitems);
        if (nvp == NULL) {
                ERRNO_SAVE();
                nv_free(data);
                ERRNO_RESTORE();
        }

        return (nvp);
}

nvpair_t *
nvpair_create_string_array(const char *name, const char * const *value,
    size_t nitems)
{
        nvpair_t *nvp;
        unsigned int ii;
        size_t datasize, size;
        char **data;

        if (value == NULL || nitems == 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        nvp = NULL;
        datasize = 0;
        data = nv_malloc(sizeof(value[0]) * nitems);
        if (data == NULL)
                return (NULL);

        for (ii = 0; ii < nitems; ii++) {
                if (value[ii] == NULL) {
                        ERRNO_SET(EINVAL);
                        goto fail;
                }

                size = strlen(value[ii]) + 1;
                datasize += size;
                data[ii] = nv_strdup(value[ii]);
                if (data[ii] == NULL)
                        goto fail;
        }
        nvp = nvpair_allocv(name, NV_TYPE_STRING_ARRAY,
            (uint64_t)(uintptr_t)data, datasize, nitems);

fail:
        if (nvp == NULL) {
                ERRNO_SAVE();
                for (; ii > 0; ii--)
                        nv_free(data[ii - 1]);
                nv_free(data);
                ERRNO_RESTORE();
        }

        return (nvp);
}

nvpair_t *
nvpair_create_nvlist_array(const char *name, const nvlist_t * const *value,
    size_t nitems)
{
        unsigned int ii;
        nvlist_t **nvls;
        nvpair_t *parent;
        int flags;

        nvls = NULL;

        if (value == NULL || nitems == 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        nvls = nv_malloc(sizeof(value[0]) * nitems);
        if (nvls == NULL)
                return (NULL);

        for (ii = 0; ii < nitems; ii++) {
                if (value[ii] == NULL) {
                        ERRNO_SET(EINVAL);
                        goto fail;
                }

                nvls[ii] = nvlist_clone(value[ii]);
                if (nvls[ii] == NULL)
                        goto fail;

                if (ii > 0) {
                        nvpair_t *nvp;

                        nvp = nvpair_allocv(" ", NV_TYPE_NVLIST,
                            (uint64_t)(uintptr_t)nvls[ii], 0, 0);
                        if (nvp == NULL) {
                                ERRNO_SAVE();
                                nvlist_destroy(nvls[ii]);
                                ERRNO_RESTORE();
                                goto fail;
                        }
                        nvlist_set_array_next(nvls[ii - 1], nvp);
                }
        }
        flags = nvlist_flags(nvls[nitems - 1]) | NV_FLAG_IN_ARRAY;
        nvlist_set_flags(nvls[nitems - 1], flags);

        parent = nvpair_allocv(name, NV_TYPE_NVLIST_ARRAY,
            (uint64_t)(uintptr_t)nvls, 0, nitems);
        if (parent == NULL)
                goto fail;

        for (ii = 0; ii < nitems; ii++)
                nvlist_set_parent(nvls[ii], parent);

        return (parent);

fail:
        ERRNO_SAVE();
        for (; ii > 0; ii--)
                nvlist_destroy(nvls[ii - 1]);
        nv_free(nvls);
        ERRNO_RESTORE();

        return (NULL);
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
nvpair_t *
nvpair_create_descriptor_array(const char *name, const int *value,
    size_t nitems)
{
        unsigned int ii;
        nvpair_t *nvp;
        int *fds;

        if (value == NULL) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        nvp = NULL;

        fds = nv_malloc(sizeof(value[0]) * nitems);
        if (fds == NULL)
                return (NULL);
        for (ii = 0; ii < nitems; ii++) {
                if (value[ii] == -1) {
                        fds[ii] = -1;
                } else {
                        if (!fd_is_valid(value[ii])) {
                                ERRNO_SET(EBADF);
                                goto fail;
                        }

                        fds[ii] = fcntl(value[ii], F_DUPFD_CLOEXEC, 0);
                        if (fds[ii] == -1)
                                goto fail;
                }
        }

        nvp = nvpair_allocv(name, NV_TYPE_DESCRIPTOR_ARRAY,
            (uint64_t)(uintptr_t)fds, sizeof(int64_t) * nitems, nitems);

fail:
        if (nvp == NULL) {
                ERRNO_SAVE();
                for (; ii > 0; ii--) {
                        if (fds[ii - 1] != -1)
                                close(fds[ii - 1]);
                }
                nv_free(fds);
                ERRNO_RESTORE();
        }

        return (nvp);
}
#endif

nvpair_t *
nvpair_move_string(const char *name, char *value)
{
        nvpair_t *nvp;

        if (value == NULL) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        nvp = nvpair_allocv(name, NV_TYPE_STRING, (uint64_t)(uintptr_t)value,
            strlen(value) + 1, 0);
        if (nvp == NULL) {
                ERRNO_SAVE();
                nv_free(value);
                ERRNO_RESTORE();
        }

        return (nvp);
}

nvpair_t *
nvpair_move_nvlist(const char *name, nvlist_t *value)
{
        nvpair_t *nvp;

        if (value == NULL || nvlist_get_nvpair_parent(value) != NULL) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        if (nvlist_error(value) != 0) {
                ERRNO_SET(nvlist_error(value));
                nvlist_destroy(value);
                return (NULL);
        }

        nvp = nvpair_allocv(name, NV_TYPE_NVLIST, (uint64_t)(uintptr_t)value,
            0, 0);
        if (nvp == NULL)
                nvlist_destroy(value);
        else
                nvlist_set_parent(value, nvp);

        return (nvp);
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
nvpair_t *
nvpair_move_descriptor(const char *name, int value)
{
        nvpair_t *nvp;

        if (value < 0 || !fd_is_valid(value)) {
                ERRNO_SET(EBADF);
                return (NULL);
        }

        nvp = nvpair_allocv(name, NV_TYPE_DESCRIPTOR, (uint64_t)value,
            sizeof(int64_t), 0);
        if (nvp == NULL) {
                ERRNO_SAVE();
                close(value);
                ERRNO_RESTORE();
        }

        return (nvp);
}
#endif

nvpair_t *
nvpair_move_binary(const char *name, void *value, size_t size)
{
        nvpair_t *nvp;

        if (value == NULL || size == 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        nvp = nvpair_allocv(name, NV_TYPE_BINARY, (uint64_t)(uintptr_t)value,
            size, 0);
        if (nvp == NULL) {
                ERRNO_SAVE();
                nv_free(value);
                ERRNO_RESTORE();
        }

        return (nvp);
}

nvpair_t *
nvpair_move_bool_array(const char *name, bool *value, size_t nitems)
{
        nvpair_t *nvp;

        if (value == NULL || nitems == 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        nvp = nvpair_allocv(name, NV_TYPE_BOOL_ARRAY,
            (uint64_t)(uintptr_t)value, sizeof(value[0]) * nitems, nitems);
        if (nvp == NULL) {
                ERRNO_SAVE();
                nv_free(value);
                ERRNO_RESTORE();
        }

        return (nvp);
}

nvpair_t *
nvpair_move_string_array(const char *name, char **value, size_t nitems)
{
        nvpair_t *nvp;
        size_t i, size;

        if (value == NULL || nitems == 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        size = 0;
        for (i = 0; i < nitems; i++) {
                if (value[i] == NULL) {
                        ERRNO_SET(EINVAL);
                        return (NULL);
                }

                size += strlen(value[i]) + 1;
        }

        nvp = nvpair_allocv(name, NV_TYPE_STRING_ARRAY,
            (uint64_t)(uintptr_t)value, size, nitems);
        if (nvp == NULL) {
                ERRNO_SAVE();
                for (i = 0; i < nitems; i++)
                        nv_free(value[i]);
                nv_free(value);
                ERRNO_RESTORE();
        }

        return (nvp);
}

nvpair_t *
nvpair_move_number_array(const char *name, uint64_t *value, size_t nitems)
{
        nvpair_t *nvp;

        if (value == NULL || nitems == 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        nvp = nvpair_allocv(name, NV_TYPE_NUMBER_ARRAY,
            (uint64_t)(uintptr_t)value, sizeof(value[0]) * nitems, nitems);
        if (nvp == NULL) {
                ERRNO_SAVE();
                nv_free(value);
                ERRNO_RESTORE();
        }

        return (nvp);
}

nvpair_t *
nvpair_move_nvlist_array(const char *name, nvlist_t **value, size_t nitems)
{
        nvpair_t *parent;
        unsigned int ii;
        int flags;

        if (value == NULL || nitems == 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        for (ii = 0; ii < nitems; ii++) {
                if (value == NULL || nvlist_error(value[ii]) != 0 ||
                    nvlist_get_pararr(value[ii], NULL) != NULL) {
                        ERRNO_SET(EINVAL);
                        goto fail;
                }
                if (ii > 0) {
                        nvpair_t *nvp;

                        nvp = nvpair_allocv(" ", NV_TYPE_NVLIST,
                            (uint64_t)(uintptr_t)value[ii], 0, 0);
                        if (nvp == NULL)
                                goto fail;
                        nvlist_set_array_next(value[ii - 1], nvp);
                }
        }
        flags = nvlist_flags(value[nitems - 1]) | NV_FLAG_IN_ARRAY;
        nvlist_set_flags(value[nitems - 1], flags);

        parent = nvpair_allocv(name, NV_TYPE_NVLIST_ARRAY,
            (uint64_t)(uintptr_t)value, 0, nitems);
        if (parent == NULL)
                goto fail;

        for (ii = 0; ii < nitems; ii++)
                nvlist_set_parent(value[ii], parent);

        return (parent);
fail:
        ERRNO_SAVE();
        for (ii = 0; ii < nitems; ii++) {
                if (value[ii] != NULL &&
                    nvlist_get_pararr(value[ii], NULL) != NULL) {
                        nvlist_destroy(value[ii]);
                }
        }
        nv_free(value);
        ERRNO_RESTORE();

        return (NULL);
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
nvpair_t *
nvpair_move_descriptor_array(const char *name, int *value, size_t nitems)
{
        nvpair_t *nvp;
        size_t i;

        if (value == NULL || nitems == 0) {
                ERRNO_SET(EINVAL);
                return (NULL);
        }

        for (i = 0; i < nitems; i++) {
                if (value[i] != -1 && !fd_is_valid(value[i])) {
                        ERRNO_SET(EBADF);
                        goto fail;
                }
        }

        nvp = nvpair_allocv(name, NV_TYPE_DESCRIPTOR_ARRAY,
            (uint64_t)(uintptr_t)value, sizeof(value[0]) * nitems, nitems);
        if (nvp == NULL)
                goto fail;

        return (nvp);
fail:
        ERRNO_SAVE();
        for (i = 0; i < nitems; i++) {
                if (fd_is_valid(value[i]))
                        close(value[i]);
        }
        nv_free(value);
        ERRNO_RESTORE();

        return (NULL);
}
#endif

bool
nvpair_get_bool(const nvpair_t *nvp)
{

        NVPAIR_ASSERT(nvp);

        return (nvp->nvp_data == 1);
}

uint64_t
nvpair_get_number(const nvpair_t *nvp)
{

        NVPAIR_ASSERT(nvp);

        return (nvp->nvp_data);
}

const char *
nvpair_get_string(const nvpair_t *nvp)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_STRING);

        return ((const char *)(intptr_t)nvp->nvp_data);
}

const nvlist_t *
nvpair_get_nvlist(const nvpair_t *nvp)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NVLIST);

        return ((const nvlist_t *)(intptr_t)nvp->nvp_data);
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
int
nvpair_get_descriptor(const nvpair_t *nvp)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR);

        return ((int)nvp->nvp_data);
}
#endif

const void *
nvpair_get_binary(const nvpair_t *nvp, size_t *sizep)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BINARY);

        if (sizep != NULL)
                *sizep = nvp->nvp_datasize;

        return ((const void *)(intptr_t)nvp->nvp_data);
}

const bool *
nvpair_get_bool_array(const nvpair_t *nvp, size_t *nitems)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BOOL_ARRAY);

        if (nitems != NULL)
                *nitems = nvp->nvp_nitems;

        return ((const bool *)(intptr_t)nvp->nvp_data);
}

const uint64_t *
nvpair_get_number_array(const nvpair_t *nvp, size_t *nitems)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NUMBER_ARRAY);

        if (nitems != NULL)
                *nitems = nvp->nvp_nitems;

        return ((const uint64_t *)(intptr_t)nvp->nvp_data);
}

const char * const *
nvpair_get_string_array(const nvpair_t *nvp, size_t *nitems)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_STRING_ARRAY);

        if (nitems != NULL)
                *nitems = nvp->nvp_nitems;

        return ((const char * const *)(intptr_t)nvp->nvp_data);
}

const nvlist_t * const *
nvpair_get_nvlist_array(const nvpair_t *nvp, size_t *nitems)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NVLIST_ARRAY);

        if (nitems != NULL)
                *nitems = nvp->nvp_nitems;

        return ((const nvlist_t * const *)((intptr_t)nvp->nvp_data));
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
const int *
nvpair_get_descriptor_array(const nvpair_t *nvp, size_t *nitems)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR_ARRAY);

        if (nitems != NULL)
                *nitems = nvp->nvp_nitems;

        return ((const int *)(intptr_t)nvp->nvp_data);
}
#endif

int
nvpair_append_bool_array(nvpair_t *nvp, const bool value)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_BOOL_ARRAY);
        return (nvpair_append(nvp, &value, sizeof(value), sizeof(value)));
}

int
nvpair_append_number_array(nvpair_t *nvp, const uint64_t value)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NUMBER_ARRAY);
        return (nvpair_append(nvp, &value, sizeof(value), sizeof(value)));
}

int
nvpair_append_string_array(nvpair_t *nvp, const char *value)
{
        char *str;

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_STRING_ARRAY);
        if (value == NULL) {
                ERRNO_SET(EINVAL);
                return (-1);
        }
        str = nv_strdup(value);
        if (str == NULL) {
                return (-1);
        }
        if (nvpair_append(nvp, &str, sizeof(str), strlen(str) + 1) == -1) {
                nv_free(str);
                return (-1);
        }
        return (0);
}

int
nvpair_append_nvlist_array(nvpair_t *nvp, const nvlist_t *value)
{
        nvpair_t *tmpnvp;
        nvlist_t *nvl, *prev;
        int flags;

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_NVLIST_ARRAY);
        if (value == NULL || nvlist_error(value) != 0 ||
            nvlist_get_pararr(value, NULL) != NULL) {
                ERRNO_SET(EINVAL);
                return (-1);
        }
        nvl = nvlist_clone(value);
        if (nvl == NULL) {
                return (-1);
        }
        flags = nvlist_flags(nvl) | NV_FLAG_IN_ARRAY;
        nvlist_set_flags(nvl, flags);

        tmpnvp = NULL;
        prev = NULL;
        if (nvp->nvp_nitems > 0) {
                nvlist_t **nvls = (void *)(uintptr_t)nvp->nvp_data;

                prev = nvls[nvp->nvp_nitems - 1];
                PJDLOG_ASSERT(prev != NULL);

                tmpnvp = nvpair_allocv(" ", NV_TYPE_NVLIST,
                    (uint64_t)(uintptr_t)nvl, 0, 0);
                if (tmpnvp == NULL) {
                        goto fail;
                }
        }
        if (nvpair_append(nvp, &nvl, sizeof(nvl), 0) == -1) {
                goto fail;
        }
        if (tmpnvp) {
                NVPAIR_ASSERT(tmpnvp);
                nvlist_set_array_next(prev, tmpnvp);
        }
        nvlist_set_parent(nvl, nvp);
        return (0);
fail:
        if (tmpnvp) {
                nvpair_free(tmpnvp);
        }
        nvlist_destroy(nvl);
        return (-1);
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
int
nvpair_append_descriptor_array(nvpair_t *nvp, const int value)
{
        int fd;

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR_ARRAY);
        if (value < 0 || !fd_is_valid(value)) {
                ERRNO_SET(EBADF);
                return -1;
        }
        fd = fcntl(value, F_DUPFD_CLOEXEC, 0);
        if (fd == -1) {
                return (-1);
        }
        if (nvpair_append(nvp, &fd, sizeof(fd), sizeof(fd)) == -1) {
                close(fd);
                return (-1);
        }
        return (0);
}
#endif

void
nvpair_free(nvpair_t *nvp)
{
        size_t i;

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_list == NULL);

        nvp->nvp_magic = 0;
        switch (nvp->nvp_type) {
#if !defined(_KERNEL) && !defined(_STANDALONE)
        case NV_TYPE_DESCRIPTOR:
                close((int)nvp->nvp_data);
                break;
        case NV_TYPE_DESCRIPTOR_ARRAY:
                for (i = 0; i < nvp->nvp_nitems; i++)
                        close(((int *)(intptr_t)nvp->nvp_data)[i]);
                nv_free((int *)(intptr_t)nvp->nvp_data);
                break;
#endif
        case NV_TYPE_NVLIST:
                nvlist_destroy((nvlist_t *)(intptr_t)nvp->nvp_data);
                break;
        case NV_TYPE_STRING:
                nv_free((char *)(intptr_t)nvp->nvp_data);
                break;
        case NV_TYPE_BINARY:
                nv_free((void *)(intptr_t)nvp->nvp_data);
                break;
        case NV_TYPE_NVLIST_ARRAY:
                for (i = 0; i < nvp->nvp_nitems; i++) {
                        nvlist_destroy(
                            ((nvlist_t **)(intptr_t)nvp->nvp_data)[i]);
                }
                nv_free(((nvlist_t **)(intptr_t)nvp->nvp_data));
                break;
        case NV_TYPE_NUMBER_ARRAY:
                nv_free((uint64_t *)(intptr_t)nvp->nvp_data);
                break;
        case NV_TYPE_BOOL_ARRAY:
                nv_free((bool *)(intptr_t)nvp->nvp_data);
                break;
        case NV_TYPE_STRING_ARRAY:
                for (i = 0; i < nvp->nvp_nitems; i++)
                        nv_free(((char **)(intptr_t)nvp->nvp_data)[i]);
                nv_free((char **)(intptr_t)nvp->nvp_data);
                break;
        }
        nv_free(nvp);
}

void
nvpair_free_structure(nvpair_t *nvp)
{

        NVPAIR_ASSERT(nvp);
        PJDLOG_ASSERT(nvp->nvp_list == NULL);

        nvp->nvp_magic = 0;
        nv_free(nvp);
}

const char *
nvpair_type_string(int type)
{

        switch (type) {
        case NV_TYPE_NULL:
                return ("NULL");
        case NV_TYPE_BOOL:
                return ("BOOL");
        case NV_TYPE_NUMBER:
                return ("NUMBER");
        case NV_TYPE_STRING:
                return ("STRING");
        case NV_TYPE_NVLIST:
                return ("NVLIST");
        case NV_TYPE_DESCRIPTOR:
                return ("DESCRIPTOR");
        case NV_TYPE_BINARY:
                return ("BINARY");
        case NV_TYPE_BOOL_ARRAY:
                return ("BOOL ARRAY");
        case NV_TYPE_NUMBER_ARRAY:
                return ("NUMBER ARRAY");
        case NV_TYPE_STRING_ARRAY:
                return ("STRING ARRAY");
        case NV_TYPE_NVLIST_ARRAY:
                return ("NVLIST ARRAY");
        case NV_TYPE_DESCRIPTOR_ARRAY:
                return ("DESCRIPTOR ARRAY");
        default:
                return ("<UNKNOWN>");
        }
}
























































































































































   22 
   22 



   22 


































































   25 




   25 
   25 



   25 








   92 















   91 
   92 















   92 
   18 
   92 
   68 
   92 





   56 



   41 

   36 
   39 






   33 
   40 





   16 
   13 

   90 





   35 









   30 
   10 
    1 

   22 

   22 
   22 
   22 


    9 


   17 

































































































































































































































































































































































































































































































































































































































    1 







    1 


    1 











    1 













































    1 




    1 
    1 

    1 
    1 




    1 

















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
/*        $NetBSD: radix.c,v 1.49 2020/10/18 13:07:31 gson Exp $        */

/*
 * Copyright (c) 1988, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)radix.c        8.6 (Berkeley) 10/17/95
 */

/*
 * Routines to build and maintain radix trees for routing lookups.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: radix.c,v 1.49 2020/10/18 13:07:31 gson Exp $");

#ifndef _NET_RADIX_H_
#include <sys/param.h>
#include <sys/queue.h>
#include <sys/kmem.h>
#ifdef        _KERNEL
#ifdef _KERNEL_OPT
#include "opt_inet.h"
#endif

#include <sys/systm.h>
#include <sys/malloc.h>
#define        M_DONTWAIT M_NOWAIT
#include <sys/domain.h>
#else
#include <stdlib.h>
#endif
#include <sys/syslog.h>
#include <net/radix.h>
#endif

typedef void (*rn_printer_t)(void *, const char *fmt, ...);

int        max_keylen;
struct radix_mask *rn_mkfreelist;
struct radix_node_head *mask_rnhead;
static char *addmask_key;
static const char normal_chars[] =
    {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1};
static char *rn_zeros, *rn_ones;

#define rn_masktop (mask_rnhead->rnh_treetop)

static int rn_satisfies_leaf(const char *, struct radix_node *, int);
static int rn_lexobetter(const void *, const void *);
static struct radix_mask *rn_new_radix_mask(struct radix_node *,
    struct radix_mask *);
static struct radix_node *rn_walknext(struct radix_node *, rn_printer_t,
    void *);
static struct radix_node *rn_walkfirst(struct radix_node *, rn_printer_t,
    void *);
static void rn_nodeprint(struct radix_node *, rn_printer_t, void *,
    const char *);

#define        SUBTREE_OPEN        "[ "
#define        SUBTREE_CLOSE        " ]"

#ifdef RN_DEBUG
static void rn_treeprint(struct radix_node_head *, rn_printer_t, void *);
#endif /* RN_DEBUG */

/*
 * The data structure for the keys is a radix tree with one way
 * branching removed.  The index rn_b at an internal node n represents a bit
 * position to be tested.  The tree is arranged so that all descendants
 * of a node n have keys whose bits all agree up to position rn_b - 1.
 * (We say the index of n is rn_b.)
 *
 * There is at least one descendant which has a one bit at position rn_b,
 * and at least one with a zero there.
 *
 * A route is determined by a pair of key and mask.  We require that the
 * bit-wise logical and of the key and mask to be the key.
 * We define the index of a route to associated with the mask to be
 * the first bit number in the mask where 0 occurs (with bit number 0
 * representing the highest order bit).
 *
 * We say a mask is normal if every bit is 0, past the index of the mask.
 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_b,
 * and m is a normal mask, then the route applies to every descendant of n.
 * If the index(m) < rn_b, this implies the trailing last few bits of k
 * before bit b are all 0, (and hence consequently true of every descendant
 * of n), so the route applies to all descendants of the node as well.
 *
 * Similar logic shows that a non-normal mask m such that
 * index(m) <= index(n) could potentially apply to many children of n.
 * Thus, for each non-host route, we attach its mask to a list at an internal
 * node as high in the tree as we can go.
 *
 * The present version of the code makes use of normal routes in short-
 * circuiting an explicit mask and compare operation when testing whether
 * a key satisfies a normal route, and also in remembering the unique leaf
 * that governs a subtree.
 */

struct radix_node *
rn_search(
        const void *v_arg,
        struct radix_node *head)
{
        const u_char * const v = v_arg;
        struct radix_node *x;

        for (x = head; x->rn_b >= 0;) {
                if (x->rn_bmask & v[x->rn_off])
                        x = x->rn_r;
                else
                        x = x->rn_l;
        }
        return x;
}

struct radix_node *
rn_search_m(
        const void *v_arg,
        struct radix_node *head,
        const void *m_arg)
{
        struct radix_node *x;
        const u_char * const v = v_arg;
        const u_char * const m = m_arg;

        for (x = head; x->rn_b >= 0;) {
                if ((x->rn_bmask & m[x->rn_off]) &&
                    (x->rn_bmask & v[x->rn_off]))
                        x = x->rn_r;
                else
                        x = x->rn_l;
        }
        return x;
}

int
rn_refines(
        const void *m_arg,
        const void *n_arg)
{
        const char *m = m_arg;
        const char *n = n_arg;
        const char *lim = n + *(const u_char *)n;
        const char *lim2 = lim;
        int longer = (*(const u_char *)n++) - (int)(*(const u_char *)m++);
        int masks_are_equal = 1;

        if (longer > 0)
                lim -= longer;
        while (n < lim) {
                if (*n & ~(*m))
                        return 0;
                if (*n++ != *m++)
                        masks_are_equal = 0;
        }
        while (n < lim2)
                if (*n++)
                        return 0;
        if (masks_are_equal && (longer < 0))
                for (lim2 = m - longer; m < lim2; )
                        if (*m++)
                                return 1;
        return !masks_are_equal;
}

struct radix_node *
rn_lookup(
        const void *v_arg,
        const void *m_arg,
        struct radix_node_head *head)
{
        struct radix_node *x;
        const char *netmask = NULL;

        if (m_arg) {
                if ((x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_off)) == 0)
                        return NULL;
                netmask = x->rn_key;
        }
        x = rn_match(v_arg, head);
        if (x != NULL && netmask != NULL) {
                while (x != NULL && x->rn_mask != netmask)
                        x = x->rn_dupedkey;
        }
        return x;
}

static int
rn_satisfies_leaf(
        const char *trial,
        struct radix_node *leaf,
        int skip)
{
        const char *cp = trial;
        const char *cp2 = leaf->rn_key;
        const char *cp3 = leaf->rn_mask;
        const char *cplim;
        int length = uimin(*(const u_char *)cp, *(const u_char *)cp2);

        if (cp3 == 0)
                cp3 = rn_ones;
        else
                length = uimin(length, *(const u_char *)cp3);
        cplim = cp + length; cp3 += skip; cp2 += skip;
        for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
                if ((*cp ^ *cp2) & *cp3)
                        return 0;
        return 1;
}

struct radix_node *
rn_match(
        const void *v_arg,
        struct radix_node_head *head)
{
        const char * const v = v_arg;
        struct radix_node *t = head->rnh_treetop;
        struct radix_node *top = t;
        struct radix_node *x;
        struct radix_node *saved_t;
        const char *cp = v;
        const char *cp2;
        const char *cplim;
        int off = t->rn_off;
        int vlen = *(const u_char *)cp;
        int matched_off;
        int test, b, rn_b;

        /*
         * Open code rn_search(v, top) to avoid overhead of extra
         * subroutine call.
         */
        for (; t->rn_b >= 0; ) {
                if (t->rn_bmask & cp[t->rn_off])
                        t = t->rn_r;
                else
                        t = t->rn_l;
        }
        /*
         * See if we match exactly as a host destination
         * or at least learn how many bits match, for normal mask finesse.
         *
         * It doesn't hurt us to limit how many bytes to check
         * to the length of the mask, since if it matches we had a genuine
         * match and the leaf we have is the most specific one anyway;
         * if it didn't match with a shorter length it would fail
         * with a long one.  This wins big for class B&C netmasks which
         * are probably the most common case...
         */
        if (t->rn_mask)
                vlen = *(const u_char *)t->rn_mask;
        cp += off; cp2 = t->rn_key + off; cplim = v + vlen;
        for (; cp < cplim; cp++, cp2++)
                if (*cp != *cp2)
                        goto on1;
        /*
         * This extra grot is in case we are explicitly asked
         * to look up the default.  Ugh!
         */
        if ((t->rn_flags & RNF_ROOT) && t->rn_dupedkey)
                t = t->rn_dupedkey;
        return t;
on1:
        test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
        for (b = 7; (test >>= 1) > 0;)
                b--;
        matched_off = cp - v;
        b += matched_off << 3;
        rn_b = -1 - b;
        /*
         * If there is a host route in a duped-key chain, it will be first.
         */
        if ((saved_t = t)->rn_mask == 0)
                t = t->rn_dupedkey;
        for (; t; t = t->rn_dupedkey)
                /*
                 * Even if we don't match exactly as a host,
                 * we may match if the leaf we wound up at is
                 * a route to a net.
                 */
                if (t->rn_flags & RNF_NORMAL) {
                        if (rn_b <= t->rn_b)
                                return t;
                } else if (rn_satisfies_leaf(v, t, matched_off))
                                return t;
        t = saved_t;
        /* start searching up the tree */
        do {
                struct radix_mask *m;
                t = t->rn_p;
                m = t->rn_mklist;
                if (m) {
                        /*
                         * If non-contiguous masks ever become important
                         * we can restore the masking and open coding of
                         * the search and satisfaction test and put the
                         * calculation of "off" back before the "do".
                         */
                        do {
                                if (m->rm_flags & RNF_NORMAL) {
                                        if (rn_b <= m->rm_b)
                                                return m->rm_leaf;
                                } else {
                                        off = uimin(t->rn_off, matched_off);
                                        x = rn_search_m(v, t, m->rm_mask);
                                        while (x && x->rn_mask != m->rm_mask)
                                                x = x->rn_dupedkey;
                                        if (x && rn_satisfies_leaf(v, x, off))
                                                return x;
                                }
                                m = m->rm_mklist;
                        } while (m);
                }
        } while (t != top);
        return NULL;
}

static void
rn_nodeprint(struct radix_node *rn, rn_printer_t printer, void *arg,
    const char *delim)
{
        (*printer)(arg, "%s(%s%p: p<%p> l<%p> r<%p>)",
            delim, ((void *)rn == arg) ? "*" : "", rn, rn->rn_p,
            rn->rn_l, rn->rn_r);
}

#ifdef RN_DEBUG
int        rn_debug =  1;

static void
rn_dbg_print(void *arg, const char *fmt, ...)
{
        va_list ap;

        va_start(ap, fmt);
        vlog(LOG_DEBUG, fmt, ap);
        va_end(ap);
}

static void
rn_treeprint(struct radix_node_head *h, rn_printer_t printer, void *arg)
{
        struct radix_node *dup, *rn;
        const char *delim;

        if (printer == NULL)
                return;

        rn = rn_walkfirst(h->rnh_treetop, printer, arg);
        for (;;) {
                /* Process leaves */
                delim = "";
                for (dup = rn; dup != NULL; dup = dup->rn_dupedkey) {
                        if ((dup->rn_flags & RNF_ROOT) != 0)
                                continue;
                        rn_nodeprint(dup, printer, arg, delim);
                        delim = ", ";
                }
                rn = rn_walknext(rn, printer, arg);
                if (rn->rn_flags & RNF_ROOT)
                        return;
        }
        /* NOTREACHED */
}

#define        traverse(__head, __rn)        rn_treeprint((__head), rn_dbg_print, (__rn))
#endif /* RN_DEBUG */

struct radix_node *
rn_newpair(
        const void *v,
        int b,
        struct radix_node nodes[2])
{
        struct radix_node *tt = nodes;
        struct radix_node *t = tt + 1;
        t->rn_b = b; t->rn_bmask = 0x80 >> (b & 7);
        t->rn_l = tt; t->rn_off = b >> 3;
        tt->rn_b = -1; tt->rn_key = v; tt->rn_p = t;
        tt->rn_flags = t->rn_flags = RNF_ACTIVE;
        return t;
}

struct radix_node *
rn_insert(
        const void *v_arg,
        struct radix_node_head *head,
        int *dupentry,
        struct radix_node nodes[2])
{
        struct radix_node *top = head->rnh_treetop;
        struct radix_node *t = rn_search(v_arg, top);
        struct radix_node *tt;
        const char *v = v_arg;
        int head_off = top->rn_off;
        int vlen = *((const u_char *)v);
        const char *cp = v + head_off;
        int b;
            /*
         * Find first bit at which v and t->rn_key differ
         */
    {
        const char *cp2 = t->rn_key + head_off;
        const char *cplim = v + vlen;
        int cmp_res;

        while (cp < cplim)
                if (*cp2++ != *cp++)
                        goto on1;
        *dupentry = 1;
        return t;
on1:
        *dupentry = 0;
        cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
        for (b = (cp - v) << 3; cmp_res; b--)
                cmp_res >>= 1;
    }
    {
        struct radix_node *p, *x = top;
        cp = v;
        do {
                p = x;
                if (cp[x->rn_off] & x->rn_bmask)
                        x = x->rn_r;
                else x = x->rn_l;
        } while (b > (unsigned) x->rn_b); /* x->rn_b < b && x->rn_b >= 0 */
#ifdef RN_DEBUG
        if (rn_debug)
                log(LOG_DEBUG, "%s: Going In:\n", __func__), traverse(head, p);
#endif
        t = rn_newpair(v_arg, b, nodes); tt = t->rn_l;
        if ((cp[p->rn_off] & p->rn_bmask) == 0)
                p->rn_l = t;
        else
                p->rn_r = t;
        x->rn_p = t; t->rn_p = p; /* frees x, p as temp vars below */
        if ((cp[t->rn_off] & t->rn_bmask) == 0) {
                t->rn_r = x;
        } else {
                t->rn_r = tt; t->rn_l = x;
        }
#ifdef RN_DEBUG
        if (rn_debug) {
                log(LOG_DEBUG, "%s: Coming Out:\n", __func__),
                    traverse(head, p);
        }
#endif /* RN_DEBUG */
    }
        return tt;
}

struct radix_node *
rn_addmask(
        const void *n_arg,
        int search,
        int skip)
{
        const char *netmask = n_arg;
        const char *cp;
        const char *cplim;
        struct radix_node *x;
        struct radix_node *saved_x;
        int b = 0, mlen, j;
        int maskduplicated, m0, isnormal;
        static int last_zeroed = 0;

        if ((mlen = *(const u_char *)netmask) > max_keylen)
                mlen = max_keylen;
        if (skip == 0)
                skip = 1;
        if (mlen <= skip)
                return mask_rnhead->rnh_nodes;
        if (skip > 1)
                memmove(addmask_key + 1, rn_ones + 1, skip - 1);
        if ((m0 = mlen) > skip)
                memmove(addmask_key + skip, netmask + skip, mlen - skip);
        /*
         * Trim trailing zeroes.
         */
        for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;)
                cp--;
        mlen = cp - addmask_key;
        if (mlen <= skip) {
                if (m0 >= last_zeroed)
                        last_zeroed = mlen;
                return mask_rnhead->rnh_nodes;
        }
        if (m0 < last_zeroed)
                memset(addmask_key + m0, 0, last_zeroed - m0);
        *addmask_key = last_zeroed = mlen;
        x = rn_search(addmask_key, rn_masktop);
        if (memcmp(addmask_key, x->rn_key, mlen) != 0)
                x = 0;
        if (x || search)
                return x;
        R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x));
        if ((saved_x = x) == NULL)
                return NULL;
        memset(x, 0, max_keylen + 2 * sizeof (*x));
        cp = netmask = (void *)(x + 2);
        memmove(x + 2, addmask_key, mlen);
        x = rn_insert(cp, mask_rnhead, &maskduplicated, x);
        if (maskduplicated) {
                log(LOG_ERR, "rn_addmask: mask impossibly already in tree\n");
                Free(saved_x);
                return x;
        }
        /*
         * Calculate index of mask, and check for normalcy.
         */
        cplim = netmask + mlen; isnormal = 1;
        for (cp = netmask + skip; (cp < cplim) && *(const u_char *)cp == 0xff;)
                cp++;
        if (cp != cplim) {
                for (j = 0x80; (j & *cp) != 0; j >>= 1)
                        b++;
                if (*cp != normal_chars[b] || cp != (cplim - 1))
                        isnormal = 0;
        }
        b += (cp - netmask) << 3;
        x->rn_b = -1 - b;
        if (isnormal)
                x->rn_flags |= RNF_NORMAL;
        return x;
}

static int        /* XXX: arbitrary ordering for non-contiguous masks */
rn_lexobetter(
        const void *m_arg,
        const void *n_arg)
{
        const u_char *mp = m_arg;
        const u_char *np = n_arg;
        const u_char *lim;

        if (*mp > *np)
                return 1;  /* not really, but need to check longer one first */
        if (*mp == *np)
                for (lim = mp + *mp; mp < lim;)
                        if (*mp++ > *np++)
                                return 1;
        return 0;
}

static struct radix_mask *
rn_new_radix_mask(
        struct radix_node *tt,
        struct radix_mask *next)
{
        struct radix_mask *m;

        MKGet(m);
        if (m == NULL) {
                log(LOG_ERR, "Mask for route not entered\n");
                return NULL;
        }
        memset(m, 0, sizeof(*m));
        m->rm_b = tt->rn_b;
        m->rm_flags = tt->rn_flags;
        if (tt->rn_flags & RNF_NORMAL)
                m->rm_leaf = tt;
        else
                m->rm_mask = tt->rn_mask;
        m->rm_mklist = next;
        tt->rn_mklist = m;
        return m;
}

struct radix_node *
rn_addroute(
        const void *v_arg,
        const void *n_arg,
        struct radix_node_head *head,
        struct radix_node treenodes[2])
{
        const char *v = v_arg, *netmask = n_arg;
        struct radix_node *t, *x = NULL, *tt;
        struct radix_node *saved_tt, *top = head->rnh_treetop;
        short b = 0, b_leaf = 0;
        int keyduplicated;
        const char *mmask;
        struct radix_mask *m, **mp;

        /*
         * In dealing with non-contiguous masks, there may be
         * many different routes which have the same mask.
         * We will find it useful to have a unique pointer to
         * the mask to speed avoiding duplicate references at
         * nodes and possibly save time in calculating indices.
         */
        if (netmask != NULL) {
                if ((x = rn_addmask(netmask, 0, top->rn_off)) == NULL)
                        return NULL;
                b_leaf = x->rn_b;
                b = -1 - x->rn_b;
                netmask = x->rn_key;
        }
        /*
         * Deal with duplicated keys: attach node to previous instance
         */
        saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
        if (keyduplicated) {
                for (t = tt; tt != NULL; t = tt, tt = tt->rn_dupedkey) {
                        if (tt->rn_mask == netmask)
                                return NULL;
                        if (netmask == NULL ||
                            (tt->rn_mask != NULL &&
                             (b_leaf < tt->rn_b || /* index(netmask) > node */
                               rn_refines(netmask, tt->rn_mask) ||
                               rn_lexobetter(netmask, tt->rn_mask))))
                                break;
                }
                /*
                 * If the mask is not duplicated, we wouldn't
                 * find it among possible duplicate key entries
                 * anyway, so the above test doesn't hurt.
                 *
                 * We sort the masks for a duplicated key the same way as
                 * in a masklist -- most specific to least specific.
                 * This may require the unfortunate nuisance of relocating
                 * the head of the list.
                 *
                 * We also reverse, or doubly link the list through the
                 * parent pointer.
                 */
                if (tt == saved_tt) {
                        struct        radix_node *xx = x;
                        /* link in at head of list */
                        (tt = treenodes)->rn_dupedkey = t;
                        tt->rn_flags = t->rn_flags;
                        tt->rn_p = x = t->rn_p;
                        t->rn_p = tt;
                        if (x->rn_l == t)
                                x->rn_l = tt;
                        else
                                x->rn_r = tt;
                        saved_tt = tt;
                        x = xx;
                } else {
                        (tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
                        t->rn_dupedkey = tt;
                        tt->rn_p = t;
                        if (tt->rn_dupedkey)
                                tt->rn_dupedkey->rn_p = tt;
                }
                tt->rn_key = v;
                tt->rn_b = -1;
                tt->rn_flags = RNF_ACTIVE;
        }
        /*
         * Put mask in tree.
         */
        if (netmask != NULL) {
                tt->rn_mask = netmask;
                tt->rn_b = x->rn_b;
                tt->rn_flags |= x->rn_flags & RNF_NORMAL;
        }
        t = saved_tt->rn_p;
        if (keyduplicated)
                goto on2;
        b_leaf = -1 - t->rn_b;
        if (t->rn_r == saved_tt)
                x = t->rn_l;
        else
                x = t->rn_r;
        /* Promote general routes from below */
        if (x->rn_b < 0) {
                for (mp = &t->rn_mklist; x != NULL; x = x->rn_dupedkey) {
                        if (x->rn_mask != NULL && x->rn_b >= b_leaf &&
                            x->rn_mklist == NULL) {
                                *mp = m = rn_new_radix_mask(x, NULL);
                                if (m != NULL)
                                        mp = &m->rm_mklist;
                        }
                }
        } else if (x->rn_mklist != NULL) {
                /*
                 * Skip over masks whose index is > that of new node
                 */
                for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist)
                        if (m->rm_b >= b_leaf)
                                break;
                t->rn_mklist = m;
                *mp = NULL;
        }
on2:
        /* Add new route to highest possible ancestor's list */
        if (netmask == NULL || b > t->rn_b)
                return tt; /* can't lift at all */
        b_leaf = tt->rn_b;
        do {
                x = t;
                t = t->rn_p;
        } while (b <= t->rn_b && x != top);
        /*
         * Search through routes associated with node to
         * insert new route according to index.
         * Need same criteria as when sorting dupedkeys to avoid
         * double loop on deletion.
         */
        for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) {
                if (m->rm_b < b_leaf)
                        continue;
                if (m->rm_b > b_leaf)
                        break;
                if (m->rm_flags & RNF_NORMAL) {
                        mmask = m->rm_leaf->rn_mask;
                        if (tt->rn_flags & RNF_NORMAL) {
                                log(LOG_ERR, "Non-unique normal route,"
                                    " mask not entered\n");
                                return tt;
                        }
                } else
                        mmask = m->rm_mask;
                if (mmask == netmask) {
                        m->rm_refs++;
                        tt->rn_mklist = m;
                        return tt;
                }
                if (rn_refines(netmask, mmask) || rn_lexobetter(netmask, mmask))
                        break;
        }
        *mp = rn_new_radix_mask(tt, *mp);
        return tt;
}

struct radix_node *
rn_delete1(
        const void *v_arg,
        const void *netmask_arg,
        struct radix_node_head *head,
        struct radix_node *rn)
{
        struct radix_node *t, *p, *x, *tt;
        struct radix_mask *m, *saved_m, **mp;
        struct radix_node *dupedkey, *saved_tt, *top;
        const char *v, *netmask;
        int b, head_off, vlen;

        v = v_arg;
        netmask = netmask_arg;
        x = head->rnh_treetop;
        tt = rn_search(v, x);
        head_off = x->rn_off;
        vlen =  *(const u_char *)v;
        saved_tt = tt;
        top = x;
        if (tt == NULL ||
            memcmp(v + head_off, tt->rn_key + head_off, vlen - head_off) != 0)
                return NULL;
        /*
         * Delete our route from mask lists.
         */
        if (netmask != NULL) {
                if ((x = rn_addmask(netmask, 1, head_off)) == NULL)
                        return NULL;
                netmask = x->rn_key;
                while (tt->rn_mask != netmask)
                        if ((tt = tt->rn_dupedkey) == NULL)
                                return NULL;
        }
        if (tt->rn_mask == NULL || (saved_m = m = tt->rn_mklist) == NULL)
                goto on1;
        if (tt->rn_flags & RNF_NORMAL) {
                if (m->rm_leaf != tt || m->rm_refs > 0) {
                        log(LOG_ERR, "rn_delete: inconsistent annotation\n");
                        return NULL;  /* dangling ref could cause disaster */
                }
        } else {
                if (m->rm_mask != tt->rn_mask) {
                        log(LOG_ERR, "rn_delete: inconsistent annotation\n");
                        goto on1;
                }
                if (--m->rm_refs >= 0)
                        goto on1;
        }
        b = -1 - tt->rn_b;
        t = saved_tt->rn_p;
        if (b > t->rn_b)
                goto on1; /* Wasn't lifted at all */
        do {
                x = t;
                t = t->rn_p;
        } while (b <= t->rn_b && x != top);
        for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist) {
                if (m == saved_m) {
                        *mp = m->rm_mklist;
                        MKFree(m);
                        break;
                }
        }
        if (m == NULL) {
                log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
                if (tt->rn_flags & RNF_NORMAL)
                        return NULL; /* Dangling ref to us */
        }
on1:
        /*
         * Eliminate us from tree
         */
        if (tt->rn_flags & RNF_ROOT)
                return NULL;
#ifdef RN_DEBUG
        if (rn_debug)
                log(LOG_DEBUG, "%s: Going In:\n", __func__), traverse(head, tt);
#endif
        t = tt->rn_p;
        dupedkey = saved_tt->rn_dupedkey;
        if (dupedkey != NULL) {
                /*
                 * Here, tt is the deletion target, and
                 * saved_tt is the head of the dupedkey chain.
                 */
                if (tt == saved_tt) {
                        x = dupedkey;
                        x->rn_p = t;
                        if (t->rn_l == tt)
                                t->rn_l = x;
                        else
                                t->rn_r = x;
                } else {
                        /* find node in front of tt on the chain */
                        for (x = p = saved_tt;
                             p != NULL && p->rn_dupedkey != tt;)
                                p = p->rn_dupedkey;
                        if (p != NULL) {
                                p->rn_dupedkey = tt->rn_dupedkey;
                                if (tt->rn_dupedkey != NULL)
                                        tt->rn_dupedkey->rn_p = p;
                        } else
                                log(LOG_ERR, "rn_delete: couldn't find us\n");
                }
                t = tt + 1;
                if  (t->rn_flags & RNF_ACTIVE) {
                        *++x = *t;
                        p = t->rn_p;
                        if (p->rn_l == t)
                                p->rn_l = x;
                        else
                                p->rn_r = x;
                        x->rn_l->rn_p = x;
                        x->rn_r->rn_p = x;
                }
                goto out;
        }
        if (t->rn_l == tt)
                x = t->rn_r;
        else
                x = t->rn_l;
        p = t->rn_p;
        if (p->rn_r == t)
                p->rn_r = x;
        else
                p->rn_l = x;
        x->rn_p = p;
        /*
         * Demote routes attached to us.
         */
        if (t->rn_mklist == NULL)
                ;
        else if (x->rn_b >= 0) {
                for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_mklist)
                        ;
                *mp = t->rn_mklist;
        } else {
                /* If there are any key,mask pairs in a sibling
                   duped-key chain, some subset will appear sorted
                   in the same order attached to our mklist */
                for (m = t->rn_mklist;
                     m != NULL && x != NULL;
                     x = x->rn_dupedkey) {
                        if (m == x->rn_mklist) {
                                struct radix_mask *mm = m->rm_mklist;
                                x->rn_mklist = NULL;
                                if (--(m->rm_refs) < 0)
                                        MKFree(m);
                                m = mm;
                        }
                }
                if (m != NULL) {
                        log(LOG_ERR, "rn_delete: Orphaned Mask %p at %p\n",
                            m, x);
                }
        }
        /*
         * We may be holding an active internal node in the tree.
         */
        x = tt + 1;
        if (t != x) {
                *t = *x;
                t->rn_l->rn_p = t;
                t->rn_r->rn_p = t;
                p = x->rn_p;
                if (p->rn_l == x)
                        p->rn_l = t;
                else
                        p->rn_r = t;
        }
out:
#ifdef RN_DEBUG
        if (rn_debug) {
                log(LOG_DEBUG, "%s: Coming Out:\n", __func__),
                    traverse(head, tt);
        }
#endif /* RN_DEBUG */
        tt->rn_flags &= ~RNF_ACTIVE;
        tt[1].rn_flags &= ~RNF_ACTIVE;
        return tt;
}

struct radix_node *
rn_delete(
        const void *v_arg,
        const void *netmask_arg,
        struct radix_node_head *head)
{
        return rn_delete1(v_arg, netmask_arg, head, NULL);
}

static struct radix_node *
rn_walknext(struct radix_node *rn, rn_printer_t printer, void *arg)
{
        /* If at right child go back up, otherwise, go right */
        while (rn->rn_p->rn_r == rn && (rn->rn_flags & RNF_ROOT) == 0) {
                if (printer != NULL)
                        (*printer)(arg, SUBTREE_CLOSE);
                rn = rn->rn_p;
        }
        if (printer)
                rn_nodeprint(rn->rn_p, printer, arg, "");
        /* Find the next *leaf* since next node might vanish, too */
        for (rn = rn->rn_p->rn_r; rn->rn_b >= 0;) {
                if (printer != NULL)
                        (*printer)(arg, SUBTREE_OPEN);
                rn = rn->rn_l;
        }
        return rn;
}

static struct radix_node *
rn_walkfirst(struct radix_node *rn, rn_printer_t printer, void *arg)
{
        /* First time through node, go left */
        while (rn->rn_b >= 0) {
                if (printer != NULL)
                        (*printer)(arg, SUBTREE_OPEN);
                rn = rn->rn_l;
        }
        return rn;
}

int
rn_walktree(
        struct radix_node_head *h,
        int (*f)(struct radix_node *, void *),
        void *w)
{
        int error;
        struct radix_node *base, *next, *rn;
        /*
         * This gets complicated because we may delete the node
         * while applying the function f to it, so we need to calculate
         * the successor node in advance.
         */
        rn = rn_walkfirst(h->rnh_treetop, NULL, NULL);
        for (;;) {
                base = rn;
                next = rn_walknext(rn, NULL, NULL);
                /* Process leaves */
                while ((rn = base) != NULL) {
                        base = rn->rn_dupedkey;
                        if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w)))
                                return error;
                }
                rn = next;
                if (rn->rn_flags & RNF_ROOT)
                        return 0;
        }
        /* NOTREACHED */
}

struct radix_node *
rn_search_matched(struct radix_node_head *h,
    int (*matcher)(struct radix_node *, void *), void *w)
{
        bool matched;
        struct radix_node *base, *next, *rn;
        /*
         * This gets complicated because we may delete the node
         * while applying the function f to it, so we need to calculate
         * the successor node in advance.
         */
        rn = rn_walkfirst(h->rnh_treetop, NULL, NULL);
        for (;;) {
                base = rn;
                next = rn_walknext(rn, NULL, NULL);
                /* Process leaves */
                while ((rn = base) != NULL) {
                        base = rn->rn_dupedkey;
                        if (!(rn->rn_flags & RNF_ROOT)) {
                                matched = (*matcher)(rn, w);
                                if (matched)
                                        return rn;
                        }
                }
                rn = next;
                if (rn->rn_flags & RNF_ROOT)
                        return NULL;
        }
        /* NOTREACHED */
}

struct delayinit {
        void **head;
        int off;
        SLIST_ENTRY(delayinit) entries;
};
static SLIST_HEAD(, delayinit) delayinits = SLIST_HEAD_INITIALIZER(delayheads);
static int radix_initialized;

/*
 * Initialize a radix tree once radix is initialized.  Only for bootstrap.
 * Assume that no concurrency protection is necessary at this stage.
 */
void
rn_delayedinit(void **head, int off)
{
        struct delayinit *di;

        if (radix_initialized)
                return;

        di = kmem_alloc(sizeof(*di), KM_SLEEP);
        di->head = head;
        di->off = off;
        SLIST_INSERT_HEAD(&delayinits, di, entries);
}

int
rn_inithead(void **head, int off)
{
        struct radix_node_head *rnh;

        if (*head != NULL)
                return 1;
        R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh));
        if (rnh == NULL)
                return 0;
        *head = rnh;
        return rn_inithead0(rnh, off);
}

int
rn_inithead0(struct radix_node_head *rnh, int off)
{
        struct radix_node *t;
        struct radix_node *tt;
        struct radix_node *ttt;

        memset(rnh, 0, sizeof(*rnh));
        t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
        ttt = rnh->rnh_nodes + 2;
        t->rn_r = ttt;
        t->rn_p = t;
        tt = t->rn_l;
        tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
        tt->rn_b = -1 - off;
        *ttt = *tt;
        ttt->rn_key = rn_ones;
        rnh->rnh_addaddr = rn_addroute;
        rnh->rnh_deladdr = rn_delete;
        rnh->rnh_matchaddr = rn_match;
        rnh->rnh_lookup = rn_lookup;
        rnh->rnh_treetop = t;
        return 1;
}

void
rn_init(void)
{
        char *cp, *cplim;
        struct delayinit *di;
#ifdef _KERNEL
        struct domain *dp;

        if (radix_initialized)
                panic("radix already initialized");
        radix_initialized = 1;

        DOMAIN_FOREACH(dp) {
                if (dp->dom_maxrtkey > max_keylen)
                        max_keylen = dp->dom_maxrtkey;
        }
#endif
        if (max_keylen == 0) {
#ifndef _KERNEL
                log(LOG_ERR,
                    "rn_init: radix functions require max_keylen be set\n");
#endif
                return;
        }

        R_Malloc(rn_zeros, char *, 3 * max_keylen);
        if (rn_zeros == NULL)
                panic("rn_init");
        memset(rn_zeros, 0, 3 * max_keylen);
        rn_ones = cp = rn_zeros + max_keylen;
        addmask_key = cplim = rn_ones + max_keylen;
        while (cp < cplim)
                *cp++ = -1;
        if (rn_inithead((void *)&mask_rnhead, 0) == 0)
                panic("rn_init 2");

        while ((di = SLIST_FIRST(&delayinits)) != NULL) {
                if (!rn_inithead(di->head, di->off))
                        panic("delayed rn_inithead failed");
                SLIST_REMOVE_HEAD(&delayinits, entries);
                kmem_free(di, sizeof(*di));
        }
}










































































































































































































































































































































  207 

  620 



  622 












































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
/*        $NetBSD: pmap_private.h,v 1.2 2022/08/20 23:49:31 riastradh Exp $        */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2001 Wasabi Systems, Inc.
 * All rights reserved.
 *
 * Written by Frank van der Linden for Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed for the NetBSD Project by
 *      Wasabi Systems, Inc.
 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
 *    or promote products derived from this software without specific prior
 *    written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef        _X86_PMAP_PRIVATE_H_
#define        _X86_PMAP_PRIVATE_H_

#ifndef        _MACHINE_PMAP_PRIVATE_H_X86
#error Include machine/pmap_private.h, not x86/pmap_private.h.
#endif

#ifdef _KERNEL_OPT
#include "opt_svs.h"
#endif

#include <sys/param.h>
#include <sys/types.h>

#include <sys/kcpuset.h>
#include <sys/mutex.h>
#include <sys/pool.h>
#include <sys/queue.h>
#include <sys/rwlock.h>

#include <machine/pte.h>
#include <machine/vmparam.h>

#include <uvm/uvm_object.h>
#include <uvm/uvm_pmap.h>

struct pmap;

#define SLAREA_USER        0
#define SLAREA_PTE        1
#define SLAREA_MAIN        2
#define SLAREA_PCPU        3
#define SLAREA_DMAP        4
#define SLAREA_HYPV        5
#define SLAREA_ASAN        6
#define SLAREA_MSAN        7
#define SLAREA_KERN        8
#define SLSPACE_NAREAS        9

struct slotspace {
        struct {
                size_t sslot; /* start slot */
                size_t nslot; /* # of slots */
                bool active;  /* area is active */
        } area[SLSPACE_NAREAS];
};

extern struct slotspace slotspace;

#include <x86/gdt.h>

struct pcpu_entry {
        uint8_t gdt[MAXGDTSIZ];
        uint8_t ldt[MAX_USERLDT_SIZE];
        uint8_t idt[PAGE_SIZE];
        uint8_t tss[PAGE_SIZE];
        uint8_t ist0[PAGE_SIZE];
        uint8_t ist1[PAGE_SIZE];
        uint8_t ist2[PAGE_SIZE];
        uint8_t ist3[PAGE_SIZE];
        uint8_t rsp0[2 * PAGE_SIZE];
} __packed;

struct pcpu_area {
#ifdef SVS
        uint8_t utls[PAGE_SIZE];
#endif
        uint8_t ldt[PAGE_SIZE];
        struct pcpu_entry ent[MAXCPUS];
} __packed;

extern struct pcpu_area *pcpuarea;

#define PMAP_PCID_KERN        0
#define PMAP_PCID_USER        1

/*
 * pmap data structures: see pmap.c for details of locking.
 */

/*
 * we maintain a list of all non-kernel pmaps
 */

LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */

/*
 * linked list of all non-kernel pmaps
 */
extern struct pmap_head pmaps;
extern kmutex_t pmaps_lock;    /* protects pmaps */

/*
 * pool_cache(9) that pmaps are allocated from
 */
extern struct pool_cache pmap_cache;

/*
 * the pmap structure
 *
 * note that the pm_obj contains the lock pointer, the reference count,
 * page list, and number of PTPs within the pmap.
 *
 * pm_lock is the same as the lock for vm object 0.  Changes to
 * the other objects may only be made if that lock has been taken
 * (the other object locks are only used when uvm_pagealloc is called)
 */

struct pv_page;

struct pmap {
        struct uvm_object pm_obj[PTP_LEVELS-1];/* objects for lvl >= 1) */
        LIST_ENTRY(pmap) pm_list;        /* list of all pmaps */
        pd_entry_t *pm_pdir;                /* VA of PD */
        paddr_t pm_pdirpa[PDP_SIZE];        /* PA of PDs (read-only after create) */
        struct vm_page *pm_ptphint[PTP_LEVELS-1];
                                        /* pointer to a PTP in our pmap */
        struct pmap_statistics pm_stats;  /* pmap stats */
        struct pv_entry *pm_pve;        /* spare pv_entry */
        LIST_HEAD(, pv_page) pm_pvp_part;
        LIST_HEAD(, pv_page) pm_pvp_empty;
        LIST_HEAD(, pv_page) pm_pvp_full;

#if !defined(__x86_64__)
        vaddr_t pm_hiexec;                /* highest executable mapping */
#endif /* !defined(__x86_64__) */

        union descriptor *pm_ldt;        /* user-set LDT */
        size_t pm_ldt_len;                /* XXX unused, remove */
        int pm_ldt_sel;                        /* LDT selector */

        kcpuset_t *pm_cpus;                /* mask of CPUs using pmap */
        kcpuset_t *pm_kernel_cpus;        /* mask of CPUs using kernel part
                                         of pmap */
        kcpuset_t *pm_xen_ptp_cpus;        /* mask of CPUs which have this pmap's
                                         ptp mapped */
        uint64_t pm_ncsw;                /* for assertions */
        LIST_HEAD(,vm_page) pm_gc_ptp;        /* PTPs queued for free */

        /* Used by NVMM and Xen */
        int (*pm_enter)(struct pmap *, vaddr_t, paddr_t, vm_prot_t, u_int);
        bool (*pm_extract)(struct pmap *, vaddr_t, paddr_t *);
        void (*pm_remove)(struct pmap *, vaddr_t, vaddr_t);
        int (*pm_sync_pv)(struct vm_page *, vaddr_t, paddr_t, int, uint8_t *,
            pt_entry_t *);
        void (*pm_pp_remove_ent)(struct pmap *, struct vm_page *, pt_entry_t,
            vaddr_t);
        void (*pm_write_protect)(struct pmap *, vaddr_t, vaddr_t, vm_prot_t);
        void (*pm_unwire)(struct pmap *, vaddr_t);

        void (*pm_tlb_flush)(struct pmap *);
        void *pm_data;

        kmutex_t pm_lock                /* locks for pm_objs */
            __aligned(64);                /* give lock own cache line */
        krwlock_t pm_dummy_lock;        /* ugly hack for abusing uvm_object */
};

/* macro to access pm_pdirpa slots */
#ifdef PAE
#define pmap_pdirpa(pmap, index) \
        ((pmap)->pm_pdirpa[l2tol3(index)] + l2tol2(index) * sizeof(pd_entry_t))
#else
#define pmap_pdirpa(pmap, index) \
        ((pmap)->pm_pdirpa[0] + (index) * sizeof(pd_entry_t))
#endif

/*
 * global kernel variables
 */

/*
 * PDPpaddr is the physical address of the kernel's PDP.
 * - i386 non-PAE and amd64: PDPpaddr corresponds directly to the %cr3
 * value associated to the kernel process, proc0.
 * - i386 PAE: it still represents the PA of the kernel's PDP (L2). Due to
 * the L3 PD, it cannot be considered as the equivalent of a %cr3 any more.
 * - Xen: it corresponds to the PFN of the kernel's PDP.
 */
extern u_long PDPpaddr;

extern pd_entry_t pmap_pg_g;                        /* do we support PTE_G? */
extern pd_entry_t pmap_pg_nx;                        /* do we support PTE_NX? */
extern int pmap_largepages;
extern long nkptp[PTP_LEVELS];

#define pmap_valid_entry(E)                 ((E) & PTE_P) /* is PDE or PTE valid? */

void                pmap_map_ptes(struct pmap *, struct pmap **, pd_entry_t **,
                    pd_entry_t * const **);
void                pmap_unmap_ptes(struct pmap *, struct pmap *);

bool                pmap_pdes_valid(vaddr_t, pd_entry_t * const *, pd_entry_t *,
                    int *lastlvl);

bool                pmap_is_curpmap(struct pmap *);

void                pmap_ept_transform(struct pmap *);

#ifndef __HAVE_DIRECT_MAP
void                pmap_vpage_cpu_init(struct cpu_info *);
#endif
vaddr_t                slotspace_rand(int, size_t, size_t, size_t, vaddr_t);

vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */

typedef enum tlbwhy {
        TLBSHOOT_REMOVE_ALL,
        TLBSHOOT_KENTER,
        TLBSHOOT_KREMOVE,
        TLBSHOOT_FREE_PTP,
        TLBSHOOT_REMOVE_PTE,
        TLBSHOOT_SYNC_PV,
        TLBSHOOT_WRITE_PROTECT,
        TLBSHOOT_ENTER,
        TLBSHOOT_NVMM,
        TLBSHOOT_BUS_DMA,
        TLBSHOOT_BUS_SPACE,
        TLBSHOOT__MAX,
} tlbwhy_t;

void                pmap_tlb_init(void);
void                pmap_tlb_cpu_init(struct cpu_info *);
void                pmap_tlb_shootdown(pmap_t, vaddr_t, pt_entry_t, tlbwhy_t);
void                pmap_tlb_shootnow(void);
void                pmap_tlb_intr(void);

/*
 * inline functions
 */

/*
 * pmap_update_pg: flush one page from the TLB (or flush the whole thing
 *        if hardware doesn't support one-page flushing)
 */

__inline static void __unused
pmap_update_pg(vaddr_t va)
{
        invlpg(va);
}

/*
 * various address inlines
 *
 *  vtopte: return a pointer to the PTE mapping a VA, works only for
 *  user and PT addresses
 *
 *  kvtopte: return a pointer to the PTE mapping a kernel VA
 */

#include <lib/libkern/libkern.h>

static __inline pt_entry_t * __unused
vtopte(vaddr_t va)
{

        KASSERT(va < VM_MIN_KERNEL_ADDRESS);

        return (PTE_BASE + pl1_i(va));
}

static __inline pt_entry_t * __unused
kvtopte(vaddr_t va)
{
        pd_entry_t *pde;

        KASSERT(va >= VM_MIN_KERNEL_ADDRESS);

        pde = L2_BASE + pl2_i(va);
        if (*pde & PTE_PS)
                return ((pt_entry_t *)pde);

        return (PTE_BASE + pl1_i(va));
}

#ifdef XENPV
#include <sys/bitops.h>

#define XPTE_MASK        L1_FRAME
/* Selects the index of a PTE in (A)PTE_BASE */
#define XPTE_SHIFT        (L1_SHIFT - ilog2(sizeof(pt_entry_t)))

/* PTE access inline functions */

/*
 * Get the machine address of the pointed pte
 * We use hardware MMU to get value so works only for levels 1-3
 */

static __inline paddr_t
xpmap_ptetomach(pt_entry_t *pte)
{
        pt_entry_t *up_pte;
        vaddr_t va = (vaddr_t) pte;

        va = ((va & XPTE_MASK) >> XPTE_SHIFT) | (vaddr_t) PTE_BASE;
        up_pte = (pt_entry_t *) va;

        return (paddr_t) (((*up_pte) & PTE_FRAME) + (((vaddr_t) pte) & (~PTE_FRAME & ~VA_SIGN_MASK)));
}

/* Xen helpers to change bits of a pte */
#define XPMAP_UPDATE_DIRECT        1        /* Update direct map entry flags too */

paddr_t        vtomach(vaddr_t);
#define vtomfn(va) (vtomach(va) >> PAGE_SHIFT)
#endif        /* XENPV */

#ifdef __HAVE_PCPU_AREA
extern struct pcpu_area *pcpuarea;
#define PDIR_SLOT_PCPU                510
#define PMAP_PCPU_BASE                (VA_SIGN_NEG((PDIR_SLOT_PCPU * NBPD_L4)))
#endif

void        svs_quad_copy(void *, void *, long);

#endif        /* _X86_PMAP_PRIVATE_H_ */



























































































































































































































































































































































































  147 

  515 
  515 



  146 





















































































   83 


















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
/*        $NetBSD: vfs_init.c,v 1.54 2022/02/12 15:51:29 thorpej Exp $        */

/*-
 * Copyright (c) 1998, 2000, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed
 * to Berkeley by John Heidemann of the UCLA Ficus project.
 *
 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vfs_init.c        8.5 (Berkeley) 5/11/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_init.c,v 1.54 2022/02/12 15:51:29 thorpej Exp $");

#include <sys/param.h>
#include <sys/mount.h>
#include <sys/time.h>
#include <sys/vnode.h>
#include <sys/stat.h>
#include <sys/namei.h>
#include <sys/ucred.h>
#include <sys/buf.h>
#include <sys/errno.h>
#include <sys/kmem.h>
#include <sys/systm.h>
#include <sys/module.h>
#include <sys/dirhash.h>
#include <sys/sysctl.h>
#include <sys/kauth.h>

/*
 * Sigh, such primitive tools are these...
 */
#if 0
#define DODEBUG(A) A
#else
#define DODEBUG(A)
#endif

pool_cache_t pnbuf_cache;

/*
 * The global list of vnode operations.
 */
extern const struct vnodeop_desc * const vfs_op_descs[];

/*
 * These vnodeopv_descs are listed here because they are not
 * associated with any particular file system, and thus cannot
 * be initialized by vfs_attach().
 */
extern const struct vnodeopv_desc dead_vnodeop_opv_desc;
extern const struct vnodeopv_desc fifo_vnodeop_opv_desc;
extern const struct vnodeopv_desc spec_vnodeop_opv_desc;

const struct vnodeopv_desc * const vfs_special_vnodeopv_descs[] = {
        &dead_vnodeop_opv_desc,
        &fifo_vnodeop_opv_desc,
        &spec_vnodeop_opv_desc,
        NULL,
};

struct vfs_list_head vfs_list =                        /* vfs list */
    LIST_HEAD_INITIALIZER(vfs_list);

static kauth_listener_t mount_listener;

/*
 * This code doesn't work if the defn is **vnodop_defns with cc.
 * The problem is because of the compiler sometimes putting in an
 * extra level of indirection for arrays.  It's an interesting
 * "feature" of C.
 */
typedef int (*PFI)(void *);

/*
 * A miscellaneous routine.
 * A generic "default" routine that just returns an error.
 */
/*ARGSUSED*/
int
vn_default_error(void *v)
{

        return (EOPNOTSUPP);
}

static struct sysctllog *vfs_sysctllog;

/*
 * Top level filesystem related information gathering.
 */
static void
sysctl_vfs_setup(void)
{
        extern int vfs_magiclinks;
        extern int vfs_timestamp_precision;

        sysctl_createv(&vfs_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "generic",
                       SYSCTL_DESCR("Non-specific vfs related information"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, VFS_GENERIC, CTL_EOL);
        sysctl_createv(&vfs_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "fstypes",
                       SYSCTL_DESCR("List of file systems present"),
                       sysctl_vfs_generic_fstypes, 0, NULL, 0,
                       CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL);
        sysctl_createv(&vfs_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "magiclinks",
                       SYSCTL_DESCR("Whether \"magic\" symlinks are expanded"),
                       NULL, 0, &vfs_magiclinks, 0,
                       CTL_VFS, VFS_GENERIC, VFS_MAGICLINKS, CTL_EOL);
        sysctl_createv(&vfs_sysctllog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                        CTLTYPE_INT, "timestamp_precision",
                        SYSCTL_DESCR("File timestamp precision"),
                        NULL, 0, &vfs_timestamp_precision, 0,
                        CTL_VFS, VFS_GENERIC, VFS_TIMESTAMP_PRECISION,
                        CTL_EOL);
}


/*
 * vfs_init.c
 *
 * Allocate and fill in operations vectors.
 *
 * An undocumented feature of this approach to defining operations is that
 * there can be multiple entries in vfs_opv_descs for the same operations
 * vector. This allows third parties to extend the set of operations
 * supported by another layer in a binary compatibile way. For example,
 * assume that NFS needed to be modified to support Ficus. NFS has an entry
 * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by
 * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions)
 * listing those new operations Ficus adds to NFS, all without modifying the
 * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but
 * that is a(whole)nother story.) This is a feature.
 */

/*
 * Init the vector, if it needs it.
 * Also handle backwards compatibility.
 */
static void
vfs_opv_init_explicit(const struct vnodeopv_desc *vfs_opv_desc)
{
        int (**opv_desc_vector)(void *);
        const struct vnodeopv_entry_desc *opve_descp;

        opv_desc_vector = *(vfs_opv_desc->opv_desc_vector_p);

        for (opve_descp = vfs_opv_desc->opv_desc_ops;
             opve_descp->opve_op;
             opve_descp++) {
                /*
                 * Sanity check:  is this operation listed
                 * in the list of operations?  We check this
                 * by seeing if its offset is zero.  Since
                 * the default routine should always be listed
                 * first, it should be the only one with a zero
                 * offset.  Any other operation with a zero
                 * offset is probably not listed in
                 * vfs_op_descs, and so is probably an error.
                 *
                 * A panic here means the layer programmer
                 * has committed the all-too common bug
                 * of adding a new operation to the layer's
                 * list of vnode operations but
                 * not adding the operation to the system-wide
                 * list of supported operations.
                 */
                if (opve_descp->opve_op->vdesc_offset == 0 &&
                    opve_descp->opve_op->vdesc_offset != VOFFSET(vop_default)) {
                        printf("operation %s not listed in %s.\n",
                            opve_descp->opve_op->vdesc_name, "vfs_op_descs");
                        panic ("vfs_opv_init: bad operation");
                }

                /*
                 * Fill in this entry.
                 */
                opv_desc_vector[opve_descp->opve_op->vdesc_offset] =
                    opve_descp->opve_impl;
        }
}

static void
vfs_opv_init_default(const struct vnodeopv_desc *vfs_opv_desc)
{
        int j;
        int (**opv_desc_vector)(void *);

        opv_desc_vector = *(vfs_opv_desc->opv_desc_vector_p);

        /*
         * Force every operations vector to have a default routine.
         */
        if (opv_desc_vector[VOFFSET(vop_default)] == NULL)
                panic("vfs_opv_init: operation vector without default routine.");

        for (j = 0; j < VNODE_OPS_COUNT; j++)
                if (opv_desc_vector[j] == NULL)
                        opv_desc_vector[j] =
                            opv_desc_vector[VOFFSET(vop_default)];
}

void
vfs_opv_init(const struct vnodeopv_desc * const *vopvdpp)
{
        int (**opv_desc_vector)(void *);
        int i;

        /*
         * Allocate the vectors.
         */
        for (i = 0; vopvdpp[i] != NULL; i++) {
                opv_desc_vector =
                    kmem_alloc(VNODE_OPS_COUNT * sizeof(PFI), KM_SLEEP);
                memset(opv_desc_vector, 0, VNODE_OPS_COUNT * sizeof(PFI));
                *(vopvdpp[i]->opv_desc_vector_p) = opv_desc_vector;
                DODEBUG(printf("vector at %p allocated\n",
                    opv_desc_vector_p));
        }

        /*
         * ...and fill them in.
         */
        for (i = 0; vopvdpp[i] != NULL; i++)
                vfs_opv_init_explicit(vopvdpp[i]);

        /*
         * Finally, go back and replace unfilled routines
         * with their default.
         */
        for (i = 0; vopvdpp[i] != NULL; i++)
                vfs_opv_init_default(vopvdpp[i]);
}

void
vfs_opv_free(const struct vnodeopv_desc * const *vopvdpp)
{
        int i;

        /*
         * Free the vectors allocated in vfs_opv_init().
         */
        for (i = 0; vopvdpp[i] != NULL; i++) {
                kmem_free(*(vopvdpp[i]->opv_desc_vector_p),
                    VNODE_OPS_COUNT * sizeof(PFI));
                *(vopvdpp[i]->opv_desc_vector_p) = NULL;
        }
}

#ifdef DEBUG
static void
vfs_op_check(void)
{
        int i;

        DODEBUG(printf("Vnode_interface_init.\n"));

        /*
         * Check offset of each op.
         */
        for (i = 0; vfs_op_descs[i]; i++) {
                if (vfs_op_descs[i]->vdesc_offset != i)
                        panic("vfs_op_check: vfs_op_desc[] offset mismatch");
        }

        if (i != VNODE_OPS_COUNT) {
                panic("vfs_op_check: vnode ops count mismatch (%d != %d)",
                        i, VNODE_OPS_COUNT);
        }

        DODEBUG(printf ("vfs_opv_numops=%d\n", VNODE_OPS_COUNT));
}
#endif /* DEBUG */

/*
 * Common routine to check if an unprivileged mount is allowed.
 *
 * We export just this part (i.e., without the access control) so that if a
 * secmodel wants to implement finer grained user mounts it can do so without
 * copying too much code. More elaborate policies (i.e., specific users allowed
 * to also create devices and/or introduce set-id binaries, or export
 * file-systems) will require a different implementation.
 *
 * This routine is intended to be called from listener context, and as such
 * does not take credentials as an argument.
 */
int
usermount_common_policy(struct mount *mp, u_long flags)
{

        /* No exporting if unprivileged. */
        if (flags & MNT_EXPORTED)
                return EPERM;

        /* Must have 'nosuid' and 'nodev'. */
        if ((flags & MNT_NODEV) == 0 || (flags & MNT_NOSUID) == 0)
                return EPERM;

        /* Retain 'noexec'. */
        if ((mp->mnt_flag & MNT_NOEXEC) && (flags & MNT_NOEXEC) == 0)
                return EPERM;

        return 0;
}

static int
mount_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        int result;
        enum kauth_system_req req;

        result = KAUTH_RESULT_DEFER;
        req = (enum kauth_system_req)(uintptr_t)(uintptr_t)arg0;

        if (action != KAUTH_SYSTEM_MOUNT)
                return result;

        if (req == KAUTH_REQ_SYSTEM_MOUNT_GET)
                result = KAUTH_RESULT_ALLOW;
        else if (req == KAUTH_REQ_SYSTEM_MOUNT_DEVICE) {
                vnode_t *devvp = arg2;
                accmode_t accmode = (accmode_t)(unsigned long)arg3;
                int error;

                error = VOP_ACCESS(devvp, accmode, cred);
                if (!error)
                        result = KAUTH_RESULT_ALLOW;
        }

        return result;
}

/*
 * Initialize the vnode structures and initialize each file system type.
 */
void
vfsinit(void)
{

        /*
         * Attach sysctl nodes
         */
        sysctl_vfs_setup();

        /*
         * Initialize the namei pathname buffer pool and cache.
         */
        pnbuf_cache = pool_cache_init(MAXPATHLEN, 0, 0, 0, "pnbufpl",
            NULL, IPL_NONE, NULL, NULL, NULL);
        KASSERT(pnbuf_cache != NULL);

        /*
         * Initialize the vnode table
         */
        vntblinit();

        /*
         * Initialize the vnode name cache
         */
        nchinit();

#ifdef DEBUG
        /*
         * Check the list of vnode operations.
         */
        vfs_op_check();
#endif

        /*
         * Initialize the special vnode operations.
         */
        vfs_opv_init(vfs_special_vnodeopv_descs);

        /*
         * Initialise generic dirhash.
         */
        dirhash_init();

        /*
         * Initialise VFS hooks.
         */
        vfs_hooks_init();

        mount_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
            mount_listener_cb, NULL);

        /*
         * Establish each file system which was statically
         * included in the kernel.
         */
        module_init_class(MODULE_CLASS_VFS);

        /*
         * Initialize EVFILT_FS for kqueue.
         */
        vfs_evfilt_fs_init();
}

/*
 * Drop a reference to a file system type.
 */
void
vfs_delref(struct vfsops *vfs)
{

        mutex_enter(&vfs_list_lock);
        vfs->vfs_refcount--;
        mutex_exit(&vfs_list_lock);
}

/*
 * Establish a file system and initialize it.
 */
int
vfs_attach(struct vfsops *vfs)
{
        struct vfsops *v;
        int error = 0;

        mutex_enter(&vfs_list_lock);

        /*
         * Make sure this file system doesn't already exist.
         */
        LIST_FOREACH(v, &vfs_list, vfs_list) {
                if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
                        error = EEXIST;
                        goto out;
                }
        }

        /*
         * Initialize the vnode operations for this file system.
         */
        vfs_opv_init(vfs->vfs_opv_descs);

        /*
         * Now initialize the file system itself.
         */
        (*vfs->vfs_init)();

        /*
         * ...and link it into the kernel's list.
         */
        LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);

        /*
         * Sanity: make sure the reference count is 0.
         */
        vfs->vfs_refcount = 0;
 out:
        mutex_exit(&vfs_list_lock);
        return (error);
}

/*
 * Remove a file system from the kernel.
 */
int
vfs_detach(struct vfsops *vfs)
{
        struct vfsops *v;
        int error = 0;

        mutex_enter(&vfs_list_lock);

        /*
         * Make sure no one is using the filesystem.
         */
        if (vfs->vfs_refcount != 0) {
                error = EBUSY;
                goto out;
        }

        /*
         * ...and remove it from the kernel's list.
         */
        LIST_FOREACH(v, &vfs_list, vfs_list) {
                if (v == vfs) {
                        LIST_REMOVE(v, vfs_list);
                        break;
                }
        }

        if (v == NULL) {
                error = ESRCH;
                goto out;
        }

        /*
         * Now run the file system-specific cleanups.
         */
        (*vfs->vfs_done)();

        /*
         * Free the vnode operations vector.
         */
        vfs_opv_free(vfs->vfs_opv_descs);
 out:
         mutex_exit(&vfs_list_lock);
        return (error);
}

void
vfs_reinit(void)
{
        struct vfsops *vfs;

        mutex_enter(&vfs_list_lock);
        LIST_FOREACH(vfs, &vfs_list, vfs_list) {
                if (vfs->vfs_reinit) {
                        vfs->vfs_refcount++;
                        mutex_exit(&vfs_list_lock);
                        (*vfs->vfs_reinit)();
                        mutex_enter(&vfs_list_lock);
                        vfs->vfs_refcount--;
                }
        }
        mutex_exit(&vfs_list_lock);
}





















































































































































    5 


    5 






    1 














    1 
    1 



    4 
    1 
    5 



    5 
    3 
    3 




    5 











    5 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
/*        $NetBSD: clock_subr.c,v 1.27 2016/08/15 15:51:39 jakllsch Exp $        */

/*
 * Copyright (c) 1988 University of Utah.
 * Copyright (c) 1982, 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the Systems Programming Group of the University of Utah Computer
 * Science Department.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from: Utah $Hdr: clock.c 1.18 91/01/21$
 *
 *        @(#)clock.c        8.2 (Berkeley) 1/12/94
 */

/*
 * Generic routines to convert between a POSIX date
 * (seconds since 1/1/1970) and yr/mo/day/hr/min/sec
 * Derived from arch/hp300/hp300/clock.c
 */

#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif /* HAVE_NBTOOL_CONFIG_H */

#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: clock_subr.c,v 1.27 2016/08/15 15:51:39 jakllsch Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/errno.h>
#else /* ! _KERNEL */
#include <string.h>
#include <time.h>
#include <errno.h>
#endif /* ! _KERNEL */

#include "../sys/clock.h"
#include <dev/clock_subr.h>

#define FEBRUARY        2

/* for easier alignment:
 * time from the epoch to 2001 (there were 8 leap years): */
#define        DAYSTO2001        (365*31+8)

/* 4 year intervals include 1 leap year */
#define        DAYS4YEARS        (365*4+1)

/* 100 year intervals include 24 leap years */
#define        DAYS100YEARS        (365*100+24)

/* 400 year intervals include 97 leap years */
#define        DAYS400YEARS        (365*400+97)

time_t
clock_ymdhms_to_secs(struct clock_ymdhms *dt)
{
        uint64_t secs, i, year, days;

        year = dt->dt_year;

        /*
         * Compute days since start of time
         * First from years, then from months.
         */
        if (year < POSIX_BASE_YEAR)
                return -1;
        days = 0;
        if (is_leap_year(year) && dt->dt_mon > FEBRUARY)
                days++;

        if (year < 2001) {
                /* simple way for early years */
                for (i = POSIX_BASE_YEAR; i < year; i++)
                        days += days_per_year(i);
        } else {
                /* years are properly aligned */
                days += DAYSTO2001;
                year -= 2001;

                i = year / 400;
                days += i * DAYS400YEARS;
                year -= i * 400;

                i = year / 100;
                days += i * DAYS100YEARS;
                year -= i * 100;

                i = year / 4;
                days += i * DAYS4YEARS;
                year -= i * 4;

                for (i = dt->dt_year-year; i < dt->dt_year; i++)
                        days += days_per_year(i);
        }


        /* Months */
        for (i = 1; i < dt->dt_mon; i++)
                  days += days_in_month(i);
        days += (dt->dt_day - 1);

        /* Add hours, minutes, seconds. */
        secs = (((uint64_t)days
            * 24 + dt->dt_hour)
            * 60 + dt->dt_min)
            * 60 + dt->dt_sec;

        if ((time_t)secs < 0 || secs > __type_max(time_t))
                return -1;
        return secs;
}

int
clock_secs_to_ymdhms(time_t secs, struct clock_ymdhms *dt)
{
        int leap;
        uint64_t i;
        time_t days;
        time_t rsec;        /* remainder seconds */

        if (secs < 0)
                return EINVAL;

        days = secs / SECS_PER_DAY;
        rsec = secs % SECS_PER_DAY;

        /* Day of week (Note: 1/1/1970 was a Thursday) */
        dt->dt_wday = (days + 4) % 7;

        if (days >= DAYSTO2001) {
                days -= DAYSTO2001;
                dt->dt_year = 2001;

                i = days / DAYS400YEARS;
                days -= i*DAYS400YEARS;
                dt->dt_year += i*400;

                i = days / DAYS100YEARS;
                days -= i*DAYS100YEARS;
                dt->dt_year += i*100;

                i = days / DAYS4YEARS;
                days -= i*DAYS4YEARS;
                dt->dt_year += i*4;

                for (i = dt->dt_year; days >= days_per_year(i); i++)
                        days -= days_per_year(i);
                dt->dt_year = i;
        } else {
                /* Subtract out whole years, counting them in i. */
                for (i = POSIX_BASE_YEAR; days >= days_per_year(i); i++)
                        days -= days_per_year(i);
                dt->dt_year = i;
        }

        /* Subtract out whole months, counting them in i. */
        for (leap = 0, i = 1; days >= days_in_month(i)+leap; i++) {
                days -= days_in_month(i)+leap;
                if (i == 1 && is_leap_year(dt->dt_year))
                        leap = 1;
                else
                        leap = 0;
        }
        dt->dt_mon = i;

        /* Days are what is left over (+1) from all that. */
        dt->dt_day = days + 1;

        /* Hours, minutes, seconds are easy */
        dt->dt_hour = rsec / SECS_PER_HOUR;
        rsec = rsec % SECS_PER_HOUR;
        dt->dt_min  = rsec / SECS_PER_MINUTE;
        rsec = rsec % SECS_PER_MINUTE;
        dt->dt_sec  = rsec;

        return 0;
}
















































































































































































































































































































   52 


   52 

   52 












   12 

   12 







    4 

    4 


































































































































































































































































































































































































































































































































































































































































































































































































   72 
   72 

   72 





   72 












   72 






   40 



















    2 


    2 
    2 
    2 

    2 



    1 


    1 







    1 

    2 

































































































   40 

   38 


   40 


   40 


   40 




   40 









































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
/* $NetBSD: ipsec.c,v 1.173 2021/12/08 20:03:26 andvar Exp $ */
/* $FreeBSD: ipsec.c,v 1.2.2.2 2003/07/01 01:38:13 sam Exp $ */
/* $KAME: ipsec.c,v 1.103 2001/05/24 07:14:18 sakane Exp $ */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *        notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *        notice, this list of conditions and the following disclaimer in the
 *        documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *        may be used to endorse or promote products derived from this software
 *        without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ipsec.c,v 1.173 2021/12/08 20:03:26 andvar Exp $");

/*
 * IPsec controller part.
 */

#if defined(_KERNEL_OPT)
#include "opt_inet.h"
#include "opt_ipsec.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
#include <sys/proc.h>
#include <sys/kauth.h>
#include <sys/cpu.h>
#include <sys/kmem.h>
#include <sys/pserialize.h>

#include <net/if.h>
#include <net/route.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/in_var.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/ip_icmp.h>
#include <netinet/ip_private.h>

#include <netinet/ip6.h>
#ifdef INET6
#include <netinet6/ip6_var.h>
#endif
#include <netinet/in_pcb.h>
#include <netinet/in_offload.h>
#ifdef INET6
#include <netinet6/in6_pcb.h>
#include <netinet/icmp6.h>
#endif

#include <netipsec/ipsec.h>
#include <netipsec/ipsec_var.h>
#include <netipsec/ipsec_private.h>
#ifdef INET6
#include <netipsec/ipsec6.h>
#endif
#include <netipsec/ah_var.h>
#include <netipsec/esp_var.h>
#include <netipsec/ipcomp.h>                /*XXX*/
#include <netipsec/ipcomp_var.h>

#include <netipsec/key.h>
#include <netipsec/keydb.h>
#include <netipsec/key_debug.h>

#include <netipsec/xform.h>

int ipsec_used = 0;
int ipsec_enabled = 1;

#ifdef IPSEC_DEBUG
int ipsec_debug = 1;

/*
 * When set to 1, IPsec will send packets with the same sequence number.
 * This allows to verify if the other side has proper replay attacks detection.
 */
int ipsec_replay = 0;

/*
 * When set 1, IPsec will send packets with corrupted HMAC.
 * This allows to verify if the other side properly detects modified packets.
 */
int ipsec_integrity = 0;
#else
int ipsec_debug = 0;
#endif

percpu_t *ipsecstat_percpu;

int ip4_ah_offsetmask = 0;        /* maybe IP_DF? */
int ip4_ipsec_dfbit = 2;        /* DF bit on encap. 0: clear 1: set 2: copy */
int ip4_esp_trans_deflev = IPSEC_LEVEL_USE;
int ip4_esp_net_deflev = IPSEC_LEVEL_USE;
int ip4_ah_trans_deflev = IPSEC_LEVEL_USE;
int ip4_ah_net_deflev = IPSEC_LEVEL_USE;
struct secpolicy ip4_def_policy;
int ip4_ipsec_ecn = 0;                /* ECN ignore(-1)/forbidden(0)/allowed(1) */

u_int ipsec_spdgen = 1;                /* SPD generation # */

static struct secpolicy ipsec_dummy_sp __read_mostly = {
        .state                = IPSEC_SPSTATE_ALIVE,
        /* If ENTRUST, the dummy SP never be used. See ipsec_getpolicybysock. */
        .policy                = IPSEC_POLICY_ENTRUST,
};

static struct secpolicy *ipsec_checkpcbcache(struct mbuf *,
    struct inpcbpolicy *, int);
static int ipsec_fillpcbcache(struct inpcbpolicy *, struct mbuf *,
    struct secpolicy *, int);
static int ipsec_invalpcbcache(struct inpcbpolicy *, int);

/*
 * Crypto support requirements:
 *
 *  1        require hardware support
 * -1        require software support
 *  0        take anything
 */
int crypto_support = 0;

static struct secpolicy *ipsec_getpolicybysock(struct mbuf *, u_int,
    struct inpcb_hdr *, int *);

#ifdef INET6
int ip6_esp_trans_deflev = IPSEC_LEVEL_USE;
int ip6_esp_net_deflev = IPSEC_LEVEL_USE;
int ip6_ah_trans_deflev = IPSEC_LEVEL_USE;
int ip6_ah_net_deflev = IPSEC_LEVEL_USE;
struct secpolicy ip6_def_policy;
int ip6_ipsec_ecn = 0;                /* ECN ignore(-1)/forbidden(0)/allowed(1) */
#endif

static int ipsec_setspidx_inpcb(struct mbuf *, void *);
static int ipsec_setspidx(struct mbuf *, struct secpolicyindex *, int, int);
static void ipsec4_get_ulp(struct mbuf *m, struct secpolicyindex *, int);
static int ipsec4_setspidx_ipaddr(struct mbuf *, struct secpolicyindex *);
#ifdef INET6
static void ipsec6_get_ulp(struct mbuf *m, struct secpolicyindex *, int);
static int ipsec6_setspidx_ipaddr(struct mbuf *, struct secpolicyindex *);
#endif
static void ipsec_delpcbpolicy(struct inpcbpolicy *);
static void ipsec_destroy_policy(struct secpolicy *);
static int ipsec_sp_reject(const struct secpolicy *, const struct mbuf *);
static void vshiftl(unsigned char *, int, int);
static size_t ipsec_sp_hdrsiz(const struct secpolicy *, const struct mbuf *);

/*
 * Try to validate and use cached policy on a PCB.
 */
static struct secpolicy *
ipsec_checkpcbcache(struct mbuf *m, struct inpcbpolicy *pcbsp, int dir)
{
        struct secpolicyindex spidx;
        struct secpolicy *sp = NULL;
        int s;

        KASSERT(IPSEC_DIR_IS_VALID(dir));
        KASSERT(pcbsp != NULL);
        KASSERT(dir < __arraycount(pcbsp->sp_cache));
        KASSERT(inph_locked(pcbsp->sp_inph));

        /*
         * Checking the generation and sp->state and taking a reference to an SP
         * must be in a critical section of pserialize. See key_unlink_sp.
         */
        s = pserialize_read_enter();
        /* SPD table change invalidate all the caches. */
        if (ipsec_spdgen != pcbsp->sp_cache[dir].cachegen) {
                ipsec_invalpcbcache(pcbsp, dir);
                goto out;
        }
        sp = pcbsp->sp_cache[dir].cachesp;
        if (sp == NULL)
                goto out;
        if (sp->state != IPSEC_SPSTATE_ALIVE) {
                sp = NULL;
                ipsec_invalpcbcache(pcbsp, dir);
                goto out;
        }
        if ((pcbsp->sp_cacheflags & IPSEC_PCBSP_CONNECTED) == 0) {
                /* NB: assume ipsec_setspidx never sleep */
                if (ipsec_setspidx(m, &spidx, dir, 1) != 0) {
                        sp = NULL;
                        goto out;
                }

                /*
                 * We have to make an exact match here since the cached rule
                 * might have lower priority than a rule that would otherwise
                 * have matched the packet.
                 */
                if (memcmp(&pcbsp->sp_cache[dir].cacheidx, &spidx,
                    sizeof(spidx))) {
                        sp = NULL;
                        goto out;
                }
        } else {
                /*
                 * The pcb is connected, and the L4 code is sure that:
                 * - outgoing side uses inp_[lf]addr
                 * - incoming side looks up policy after inpcb lookup
                 * and address pair is know to be stable.  We do not need
                 * to generate spidx again, nor check the address match again.
                 *
                 * For IPv4/v6 SOCK_STREAM sockets, this assumptions holds
                 * and there are calls to ipsec_pcbconn() from in_pcbconnect().
                 */
        }

        sp->lastused = time_second;
        KEY_SP_REF(sp);
        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
            "DP cause refcnt++:%d SP:%p\n",
            key_sp_refcnt(sp), pcbsp->sp_cache[dir].cachesp);
out:
        pserialize_read_exit(s);
        return sp;
}

static int
ipsec_fillpcbcache(struct inpcbpolicy *pcbsp, struct mbuf *m,
    struct secpolicy *sp, int dir)
{

        KASSERT(IPSEC_DIR_IS_INOROUT(dir));
        KASSERT(dir < __arraycount(pcbsp->sp_cache));
        KASSERT(inph_locked(pcbsp->sp_inph));

        pcbsp->sp_cache[dir].cachesp = NULL;
        pcbsp->sp_cache[dir].cachehint = IPSEC_PCBHINT_UNKNOWN;
        if (ipsec_setspidx(m, &pcbsp->sp_cache[dir].cacheidx, dir, 1) != 0) {
                return EINVAL;
        }
        pcbsp->sp_cache[dir].cachesp = sp;
        if (pcbsp->sp_cache[dir].cachesp) {
                /*
                 * If the PCB is connected, we can remember a hint to
                 * possibly short-circuit IPsec processing in other places.
                 */
                if (pcbsp->sp_cacheflags & IPSEC_PCBSP_CONNECTED) {
                        switch (pcbsp->sp_cache[dir].cachesp->policy) {
                        case IPSEC_POLICY_NONE:
                        case IPSEC_POLICY_BYPASS:
                                pcbsp->sp_cache[dir].cachehint =
                                    IPSEC_PCBHINT_NO;
                                break;
                        default:
                                pcbsp->sp_cache[dir].cachehint =
                                    IPSEC_PCBHINT_YES;
                        }
                }
        }
        pcbsp->sp_cache[dir].cachegen = ipsec_spdgen;

        return 0;
}

static int
ipsec_invalpcbcache(struct inpcbpolicy *pcbsp, int dir)
{
        int i;

        KASSERT(inph_locked(pcbsp->sp_inph));

        for (i = IPSEC_DIR_INBOUND; i <= IPSEC_DIR_OUTBOUND; i++) {
                if (dir != IPSEC_DIR_ANY && i != dir)
                        continue;
                pcbsp->sp_cache[i].cachesp = NULL;
                pcbsp->sp_cache[i].cachehint = IPSEC_PCBHINT_UNKNOWN;
                pcbsp->sp_cache[i].cachegen = 0;
                memset(&pcbsp->sp_cache[i].cacheidx, 0,
                    sizeof(pcbsp->sp_cache[i].cacheidx));
        }
        return 0;
}

void
ipsec_pcbconn(struct inpcbpolicy *pcbsp)
{

        KASSERT(inph_locked(pcbsp->sp_inph));

        pcbsp->sp_cacheflags |= IPSEC_PCBSP_CONNECTED;
        ipsec_invalpcbcache(pcbsp, IPSEC_DIR_ANY);
}

void
ipsec_pcbdisconn(struct inpcbpolicy *pcbsp)
{

        KASSERT(inph_locked(pcbsp->sp_inph));

        pcbsp->sp_cacheflags &= ~IPSEC_PCBSP_CONNECTED;
        ipsec_invalpcbcache(pcbsp, IPSEC_DIR_ANY);
}

void
ipsec_invalpcbcacheall(void)
{

        if (ipsec_spdgen == UINT_MAX)
                ipsec_spdgen = 1;
        else
                ipsec_spdgen++;
}

/*
 * Return a held reference to the default SP.
 */
static struct secpolicy *
key_get_default_sp(int af, const char *where, int tag)
{
        struct secpolicy *sp;

        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP, "DP from %s:%u\n", where, tag);

        switch(af) {
        case AF_INET:
                sp = &ip4_def_policy;
                break;
#ifdef INET6
        case AF_INET6:
                sp = &ip6_def_policy;
                break;
#endif
        default:
                KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
                    "unexpected protocol family %u\n", af);
                return NULL;
        }

        if (sp->policy != IPSEC_POLICY_DISCARD &&
            sp->policy != IPSEC_POLICY_NONE) {
                IPSECLOG(LOG_INFO, "fixed system default policy: %d->%d\n",
                    sp->policy, IPSEC_POLICY_NONE);
                sp->policy = IPSEC_POLICY_NONE;
        }
        KEY_SP_REF(sp);

        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP, "DP returns SP:%p (%u)\n",
            sp, key_sp_refcnt(sp));
        return sp;
}

#define        KEY_GET_DEFAULT_SP(af) \
        key_get_default_sp((af), __func__, __LINE__)

/*
 * For OUTBOUND packet having a socket. Searching SPD for packet,
 * and return a pointer to SP.
 * OUT:        NULL:        no appropriate SP found, the following value is set to error.
 *                0        : bypass
 *                EACCES        : discard packet.
 *                ENOENT        : ipsec_acquire() in progress, maybe.
 *                others        : error occurred.
 *        others:        a pointer to SP
 *
 * NOTE: IPv6 mapped address concern is implemented here.
 */
static struct secpolicy *
ipsec_getpolicybysock(struct mbuf *m, u_int dir, struct inpcb_hdr *inph,
    int *error)
{
        struct inpcbpolicy *pcbsp = NULL;
        struct secpolicy *currsp = NULL;        /* policy on socket */
        struct secpolicy *sp;
        int af;

        KASSERT(m != NULL);
        KASSERT(inph != NULL);
        KASSERT(error != NULL);
        KASSERTMSG(IPSEC_DIR_IS_INOROUT(dir), "invalid direction %u", dir);

        KASSERT(inph->inph_socket != NULL);
        KASSERT(inph_locked(inph));

        /* XXX FIXME inpcb/in6pcb vs socket*/
        af = inph->inph_af;
        KASSERTMSG(af == AF_INET || af == AF_INET6,
            "unexpected protocol family %u", af);

        KASSERT(inph->inph_sp != NULL);
        /* If we have a cached entry, and if it is still valid, use it. */
        IPSEC_STATINC(IPSEC_STAT_SPDCACHELOOKUP);
        currsp = ipsec_checkpcbcache(m, inph->inph_sp, dir);
        if (currsp) {
                *error = 0;
                return currsp;
        }
        IPSEC_STATINC(IPSEC_STAT_SPDCACHEMISS);

        switch (af) {
        case AF_INET:
#if defined(INET6)
        case AF_INET6:
#endif
                *error = ipsec_setspidx_inpcb(m, inph);
                pcbsp = inph->inph_sp;
                break;
        default:
                *error = EPFNOSUPPORT;
                break;
        }
        if (*error)
                return NULL;

        KASSERT(pcbsp != NULL);
        switch (dir) {
        case IPSEC_DIR_INBOUND:
                currsp = pcbsp->sp_in;
                break;
        case IPSEC_DIR_OUTBOUND:
                currsp = pcbsp->sp_out;
                break;
        }
        KASSERT(currsp != NULL);

        if (pcbsp->priv) {        /* when privileged socket */
                switch (currsp->policy) {
                case IPSEC_POLICY_BYPASS:
                case IPSEC_POLICY_IPSEC:
                        KEY_SP_REF(currsp);
                        sp = currsp;
                        break;

                case IPSEC_POLICY_ENTRUST:
                        /* look for a policy in SPD */
                        if (key_havesp(dir))
                                sp = KEY_LOOKUP_SP_BYSPIDX(&currsp->spidx, dir);
                        else
                                sp = NULL;
                        if (sp == NULL)                /* no SP found */
                                sp = KEY_GET_DEFAULT_SP(af);
                        break;

                default:
                        IPSECLOG(LOG_ERR, "Invalid policy for PCB %d\n",
                            currsp->policy);
                        *error = EINVAL;
                        return NULL;
                }
        } else {                                /* unpriv, SPD has policy */
                if (key_havesp(dir))
                        sp = KEY_LOOKUP_SP_BYSPIDX(&currsp->spidx, dir);
                else
                        sp = NULL;
                if (sp == NULL) {                /* no SP found */
                        switch (currsp->policy) {
                        case IPSEC_POLICY_BYPASS:
                                IPSECLOG(LOG_ERR, "Illegal policy for "
                                    "non-priviliged defined %d\n",
                                    currsp->policy);
                                *error = EINVAL;
                                return NULL;

                        case IPSEC_POLICY_ENTRUST:
                                sp = KEY_GET_DEFAULT_SP(af);
                                break;

                        case IPSEC_POLICY_IPSEC:
                                KEY_SP_REF(currsp);
                                sp = currsp;
                                break;

                        default:
                                IPSECLOG(LOG_ERR, "Invalid policy for "
                                    "PCB %d\n", currsp->policy);
                                *error = EINVAL;
                                return NULL;
                        }
                }
        }
        KASSERTMSG(sp != NULL, "null SP (priv %u policy %u", pcbsp->priv,
            currsp->policy);
        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_STAMP,
            "DP (priv %u policy %u) allocates SP:%p (refcnt %u)\n",
            pcbsp->priv, currsp->policy, sp, key_sp_refcnt(sp));
        ipsec_fillpcbcache(pcbsp, m, sp, dir);
        return sp;
}

/*
 * For FORWARDING packet or OUTBOUND without a socket. Searching SPD for packet,
 * and return a pointer to SP.
 * OUT:        positive: a pointer to the entry for security policy leaf matched.
 *        NULL:        no appropriate SP found, the following value is set to error.
 *                0        : bypass
 *                EACCES        : discard packet.
 *                ENOENT        : ipsec_acquire() in progress, maybe.
 *                others        : error occurred.
 */
static struct secpolicy *
ipsec_getpolicybyaddr(struct mbuf *m, u_int dir, int flag, int *error)
{
        struct secpolicyindex spidx;
        struct secpolicy *sp;

        KASSERT(m != NULL);
        KASSERT(error != NULL);
        KASSERTMSG(IPSEC_DIR_IS_INOROUT(dir), "invalid direction %u", dir);

        sp = NULL;

        /* Make an index to look for a policy. */
        *error = ipsec_setspidx(m, &spidx, dir, (flag & IP_FORWARDING) ? 0 : 1);
        if (*error != 0) {
                IPSECLOG(LOG_DEBUG, "setpidx failed, dir %u flag %u\n", dir, flag);
                memset(&spidx, 0, sizeof(spidx));
                return NULL;
        }

        spidx.dir = dir;

        if (key_havesp(dir)) {
                sp = KEY_LOOKUP_SP_BYSPIDX(&spidx, dir);
        }
        if (sp == NULL) {
                /* no SP found, use system default */
                sp = KEY_GET_DEFAULT_SP(spidx.dst.sa.sa_family);
        }

        KASSERT(sp != NULL);
        return sp;
}

static struct secpolicy *
ipsec_checkpolicy(struct mbuf *m, u_int dir, u_int flag, int *error,
    void *inp)
{
        struct secpolicy *sp;

        *error = 0;

        if (inp == NULL) {
                sp = ipsec_getpolicybyaddr(m, dir, flag, error);
        } else {
                struct inpcb_hdr *inph = (struct inpcb_hdr *)inp;
                KASSERT(inph->inph_socket != NULL);
                sp = ipsec_getpolicybysock(m, dir, inph, error);
        }
        if (sp == NULL) {
                KASSERTMSG(*error != 0, "getpolicy failed w/o error");
                IPSEC_STATINC(IPSEC_STAT_OUT_INVAL);
                return NULL;
        }
        KASSERTMSG(*error == 0, "sp w/ error set to %u", *error);

        switch (sp->policy) {
        case IPSEC_POLICY_ENTRUST:
        default:
                printf("%s: invalid policy %u\n", __func__, sp->policy);
                /* fall thru... */
        case IPSEC_POLICY_DISCARD:
                IPSEC_STATINC(IPSEC_STAT_OUT_POLVIO);
                *error = -EINVAL;        /* packet is discarded by caller */
                break;
        case IPSEC_POLICY_BYPASS:
        case IPSEC_POLICY_NONE:
                KEY_SP_UNREF(&sp);
                sp = NULL;                /* NB: force NULL result */
                break;
        case IPSEC_POLICY_IPSEC:
                KASSERT(sp->req != NULL);
                break;
        }

        if (*error != 0) {
                KEY_SP_UNREF(&sp);
                sp = NULL;
                IPSECLOG(LOG_DEBUG, "done, error %d\n", *error);
        }

        return sp;
}

int
ipsec4_output(struct mbuf *m, struct inpcb *inp, int flags,
    u_long *mtu, bool *natt_frag, bool *done, bool *count_drop)
{
        struct secpolicy *sp = NULL;
        u_long _mtu = 0;
        int error, s;

        /*
         * Check the security policy (SP) for the packet and, if required,
         * do IPsec-related processing.  There are two cases here; the first
         * time a packet is sent through it will be untagged and handled by
         * ipsec_checkpolicy().  If the packet is resubmitted to ip_output
         * (e.g. after AH, ESP, etc. processing), there will be a tag to
         * bypass the lookup and related policy checking.
         */
        if (ipsec_outdone(m)) {
                return 0;
        }
        s = splsoftnet();
        if (inp && ipsec_pcb_skip_ipsec(inp->inp_sp, IPSEC_DIR_OUTBOUND)) {
                splx(s);
                return 0;
        }
        sp = ipsec_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags, &error, inp);

        /*
         * There are four return cases:
         *        sp != NULL                    apply IPsec policy
         *        sp == NULL, error == 0        no IPsec handling needed
         *        sp == NULL, error == -EINVAL  discard packet w/o error
         *        sp == NULL, error != 0        discard packet, report error
         */
        if (sp == NULL) {
                splx(s);
                if (error) {
                        /*
                         * Hack: -EINVAL is used to signal that a packet
                         * should be silently discarded.  This is typically
                         * because we asked key management for an SA and
                         * it was delayed (e.g. kicked up to IKE).
                         */
                        if (error == -EINVAL)
                                error = 0;
                        m_freem(m);
                        *done = true;
                        *count_drop = true;
                        return error;
                }
                /* No IPsec processing for this packet. */
                return 0;
        }

        /*
         * Do delayed checksums now because we send before
         * this is done in the normal processing path.
         */
        if (m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) {
                in_undefer_cksum_tcpudp(m);
                m->m_pkthdr.csum_flags &= ~(M_CSUM_TCPv4|M_CSUM_UDPv4);
        }

        error = ipsec4_process_packet(m, sp->req, &_mtu);
        if (error == 0 && _mtu != 0) {
                /*
                 * NAT-T ESP fragmentation: do not do IPSec processing
                 * now, we will do it on each fragmented packet.
                 */
                *mtu = _mtu;
                *natt_frag = true;
                KEY_SP_UNREF(&sp);
                splx(s);
                return 0;
        }

        /*
         * Preserve KAME behaviour: ENOENT can be returned
         * when an SA acquire is in progress.  Don't propagate
         * this to user-level; it confuses applications.
         *
         * XXX this will go away when the SADB is redone.
         */
        if (error == ENOENT)
                error = 0;
        KEY_SP_UNREF(&sp);
        splx(s);
        *done = true;
        return error;
}

int
ipsec_ip_input_checkpolicy(struct mbuf *m, bool forward)
{
        struct secpolicy *sp;
        int error, s;

        s = splsoftnet();
        error = ipsec_in_reject(m, NULL);
        splx(s);
        if (error) {
                return EINVAL;
        }

        if (!forward || !(m->m_flags & M_CANFASTFWD)) {
                return 0;
        }

        /*
         * Peek at the outbound SP for this packet to determine if
         * it is a Fast Forward candidate.
         */
        s = splsoftnet();
        sp = ipsec_checkpolicy(m, IPSEC_DIR_OUTBOUND, IP_FORWARDING,
            &error, NULL);
        if (sp != NULL) {
                m->m_flags &= ~M_CANFASTFWD;
                KEY_SP_UNREF(&sp);
        }
        splx(s);

        return 0;
}

/*
 * If the packet is routed over IPsec tunnel, tell the originator the
 * tunnel MTU.
 *     tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
 *
 * XXX: Quick hack!!!
 *
 * XXX: And what if the MTU goes negative?
 */
void
ipsec_mtu(struct mbuf *m, int *destmtu)
{
        struct secpolicy *sp;
        size_t ipsechdr;
        int error;

        sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, IP_FORWARDING,
            &error);
        if (sp == NULL) {
                return;
        }

        /* Count IPsec header size. */
        ipsechdr = ipsec_sp_hdrsiz(sp, m);

        /*
         * Find the correct route for outer IP header, compute tunnel MTU.
         */
        if (sp->req) {
                struct secasvar *sav;

                sav = ipsec_lookup_sa(sp->req, m);
                if (sav != NULL) {
                        struct route *ro;
                        struct rtentry *rt;

                        ro = &sav->sah->sa_route;
                        rt = rtcache_validate(ro);
                        if (rt && rt->rt_ifp) {
                                *destmtu = rt->rt_rmx.rmx_mtu ?
                                    rt->rt_rmx.rmx_mtu : rt->rt_ifp->if_mtu;
                                *destmtu -= ipsechdr;
                        }
                        rtcache_unref(rt, ro);
                        KEY_SA_UNREF(&sav);
                }
        }
        KEY_SP_UNREF(&sp);
}

static int
ipsec_setspidx_inpcb(struct mbuf *m, void *pcb)
{
        struct inpcb_hdr *inph = (struct inpcb_hdr *)pcb;
        int error;

        KASSERT(inph != NULL);
        KASSERT(inph->inph_sp != NULL);
        KASSERT(inph->inph_sp->sp_out != NULL);
        KASSERT(inph->inph_sp->sp_in != NULL);

        error = ipsec_setspidx(m, &inph->inph_sp->sp_in->spidx,
            IPSEC_DIR_INBOUND, 1);
        if (error == 0) {
                inph->inph_sp->sp_out->spidx = inph->inph_sp->sp_in->spidx;
                inph->inph_sp->sp_out->spidx.dir = IPSEC_DIR_OUTBOUND;
        } else {
                memset(&inph->inph_sp->sp_in->spidx, 0,
                    sizeof(inph->inph_sp->sp_in->spidx));
                memset(&inph->inph_sp->sp_out->spidx, 0,
                    sizeof(inph->inph_sp->sp_out->spidx));
        }
        return error;
}

/*
 * configure security policy index (src/dst/proto/sport/dport)
 * by looking at the content of mbuf.
 * the caller is responsible for error recovery (like clearing up spidx).
 */
static int
ipsec_setspidx(struct mbuf *m, struct secpolicyindex *spidx, int dir,
    int needport)
{
        struct ip *ip = NULL;
        struct ip ipbuf;
        u_int v;
        int error;

        KASSERT(m != NULL);
        M_VERIFY_PACKET(m);

        if (m->m_pkthdr.len < sizeof(struct ip)) {
                KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_DUMP,
                    "pkthdr.len(%d) < sizeof(struct ip), ignored.\n",
                    m->m_pkthdr.len);
                return EINVAL;
        }

        memset(spidx, 0, sizeof(*spidx));
        spidx->dir = dir;

        if (m->m_len >= sizeof(*ip)) {
                ip = mtod(m, struct ip *);
        } else {
                m_copydata(m, 0, sizeof(ipbuf), &ipbuf);
                ip = &ipbuf;
        }
        v = ip->ip_v;
        switch (v) {
        case 4:
                error = ipsec4_setspidx_ipaddr(m, spidx);
                if (error)
                        return error;
                ipsec4_get_ulp(m, spidx, needport);
                return 0;
#ifdef INET6
        case 6:
                if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) {
                        KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_DUMP,
                            "pkthdr.len(%d) < sizeof(struct ip6_hdr), "
                            "ignored.\n", m->m_pkthdr.len);
                        return EINVAL;
                }
                error = ipsec6_setspidx_ipaddr(m, spidx);
                if (error)
                        return error;
                ipsec6_get_ulp(m, spidx, needport);
                return 0;
#endif
        default:
                KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_DUMP,
                    "unknown IP version %u, ignored.\n", v);
                return EINVAL;
        }
}

static void
ipsec4_get_ulp(struct mbuf *m, struct secpolicyindex *spidx, int needport)
{
        u_int8_t nxt;
        int off;

        KASSERT(m != NULL);
        KASSERTMSG(m->m_pkthdr.len >= sizeof(struct ip), "packet too short");

        /* NB: ip_input() flips it into host endian XXX need more checking */
        if (m->m_len >= sizeof(struct ip)) {
                struct ip *ip = mtod(m, struct ip *);
                if (ip->ip_off & htons(IP_MF | IP_OFFMASK))
                        goto done;
                off = ip->ip_hl << 2;
                nxt = ip->ip_p;
        } else {
                struct ip ih;

                m_copydata(m, 0, sizeof(struct ip), &ih);
                if (ih.ip_off & htons(IP_MF | IP_OFFMASK))
                        goto done;
                off = ih.ip_hl << 2;
                nxt = ih.ip_p;
        }

        while (off < m->m_pkthdr.len) {
                struct ip6_ext ip6e;
                struct tcphdr th;
                struct udphdr uh;
                struct icmp icmph;

                switch (nxt) {
                case IPPROTO_TCP:
                        spidx->ul_proto = nxt;
                        if (!needport)
                                goto done_proto;
                        if (off + sizeof(struct tcphdr) > m->m_pkthdr.len)
                                goto done;
                        m_copydata(m, off, sizeof(th), &th);
                        spidx->src.sin.sin_port = th.th_sport;
                        spidx->dst.sin.sin_port = th.th_dport;
                        return;
                case IPPROTO_UDP:
                        spidx->ul_proto = nxt;
                        if (!needport)
                                goto done_proto;
                        if (off + sizeof(struct udphdr) > m->m_pkthdr.len)
                                goto done;
                        m_copydata(m, off, sizeof(uh), &uh);
                        spidx->src.sin.sin_port = uh.uh_sport;
                        spidx->dst.sin.sin_port = uh.uh_dport;
                        return;
                case IPPROTO_AH:
                        if (off + sizeof(ip6e) > m->m_pkthdr.len)
                                goto done;
                        /* XXX sigh, this works but is totally bogus */
                        m_copydata(m, off, sizeof(ip6e), &ip6e);
                        off += (ip6e.ip6e_len + 2) << 2;
                        nxt = ip6e.ip6e_nxt;
                        break;
                case IPPROTO_ICMP:
                        spidx->ul_proto = nxt;
                        if (off + sizeof(struct icmp) > m->m_pkthdr.len)
                                goto done;
                        m_copydata(m, off, sizeof(icmph), &icmph);
                        ((struct sockaddr_in *)&spidx->src)->sin_port =
                            htons((uint16_t)icmph.icmp_type);
                        ((struct sockaddr_in *)&spidx->dst)->sin_port =
                            htons((uint16_t)icmph.icmp_code);
                        return;
                default:
                        /* XXX intermediate headers??? */
                        spidx->ul_proto = nxt;
                        goto done_proto;
                }
        }
done:
        spidx->ul_proto = IPSEC_ULPROTO_ANY;
done_proto:
        spidx->src.sin.sin_port = IPSEC_PORT_ANY;
        spidx->dst.sin.sin_port = IPSEC_PORT_ANY;
}

static int
ipsec4_setspidx_ipaddr(struct mbuf *m, struct secpolicyindex *spidx)
{
        static const struct sockaddr_in template = {
                sizeof(struct sockaddr_in),
                AF_INET,
                0, { 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 }
        };

        spidx->src.sin = template;
        spidx->dst.sin = template;

        if (m->m_len < sizeof(struct ip)) {
                m_copydata(m, offsetof(struct ip, ip_src),
                    sizeof(struct in_addr), &spidx->src.sin.sin_addr);
                m_copydata(m, offsetof(struct ip, ip_dst),
                    sizeof(struct in_addr), &spidx->dst.sin.sin_addr);
        } else {
                struct ip *ip = mtod(m, struct ip *);
                spidx->src.sin.sin_addr = ip->ip_src;
                spidx->dst.sin.sin_addr = ip->ip_dst;
        }

        spidx->prefs = sizeof(struct in_addr) << 3;
        spidx->prefd = sizeof(struct in_addr) << 3;

        return 0;
}

#ifdef INET6
static void
ipsec6_get_ulp(struct mbuf *m, struct secpolicyindex *spidx, int needport)
{
        int off, nxt;
        struct tcphdr th;
        struct udphdr uh;
        struct icmp6_hdr icmph;

        KASSERT(m != NULL);

        if (KEYDEBUG_ON(KEYDEBUG_IPSEC_DUMP)) {
                kdebug_mbuf(__func__, m);
        }

        /* set default */
        spidx->ul_proto = IPSEC_ULPROTO_ANY;
        ((struct sockaddr_in6 *)&spidx->src)->sin6_port = IPSEC_PORT_ANY;
        ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = IPSEC_PORT_ANY;

        nxt = -1;
        off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
        if (off < 0 || m->m_pkthdr.len < off)
                return;

        switch (nxt) {
        case IPPROTO_TCP:
                spidx->ul_proto = nxt;
                if (!needport)
                        break;
                if (off + sizeof(struct tcphdr) > m->m_pkthdr.len)
                        break;
                m_copydata(m, off, sizeof(th), &th);
                ((struct sockaddr_in6 *)&spidx->src)->sin6_port = th.th_sport;
                ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = th.th_dport;
                break;
        case IPPROTO_UDP:
                spidx->ul_proto = nxt;
                if (!needport)
                        break;
                if (off + sizeof(struct udphdr) > m->m_pkthdr.len)
                        break;
                m_copydata(m, off, sizeof(uh), &uh);
                ((struct sockaddr_in6 *)&spidx->src)->sin6_port = uh.uh_sport;
                ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = uh.uh_dport;
                break;
        case IPPROTO_ICMPV6:
                spidx->ul_proto = nxt;
                if (off + sizeof(struct icmp6_hdr) > m->m_pkthdr.len)
                        break;
                m_copydata(m, off, sizeof(icmph), &icmph);
                ((struct sockaddr_in6 *)&spidx->src)->sin6_port =
                    htons((uint16_t)icmph.icmp6_type);
                ((struct sockaddr_in6 *)&spidx->dst)->sin6_port =
                    htons((uint16_t)icmph.icmp6_code);
                break;
        default:
                /* XXX intermediate headers??? */
                spidx->ul_proto = nxt;
                break;
        }
}

static int
ipsec6_setspidx_ipaddr(struct mbuf *m, struct secpolicyindex *spidx)
{
        struct ip6_hdr *ip6 = NULL;
        struct ip6_hdr ip6buf;
        struct sockaddr_in6 *sin6;

        if (m->m_len >= sizeof(*ip6)) {
                ip6 = mtod(m, struct ip6_hdr *);
        } else {
                m_copydata(m, 0, sizeof(ip6buf), &ip6buf);
                ip6 = &ip6buf;
        }

        sin6 = (struct sockaddr_in6 *)&spidx->src;
        memset(sin6, 0, sizeof(*sin6));
        sin6->sin6_family = AF_INET6;
        sin6->sin6_len = sizeof(struct sockaddr_in6);
        memcpy(&sin6->sin6_addr, &ip6->ip6_src, sizeof(ip6->ip6_src));
        if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
                sin6->sin6_addr.s6_addr16[1] = 0;
                sin6->sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]);
        }
        spidx->prefs = sizeof(struct in6_addr) << 3;

        sin6 = (struct sockaddr_in6 *)&spidx->dst;
        memset(sin6, 0, sizeof(*sin6));
        sin6->sin6_family = AF_INET6;
        sin6->sin6_len = sizeof(struct sockaddr_in6);
        memcpy(&sin6->sin6_addr, &ip6->ip6_dst, sizeof(ip6->ip6_dst));
        if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) {
                sin6->sin6_addr.s6_addr16[1] = 0;
                sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]);
        }
        spidx->prefd = sizeof(struct in6_addr) << 3;

        return 0;
}
#endif

static void
ipsec_delpcbpolicy(struct inpcbpolicy *p)
{

        kmem_intr_free(p, sizeof(*p));
}

int
ipsec_init_pcbpolicy(struct socket *so, struct inpcbpolicy **policy)
{
        struct inpcbpolicy *new;

        KASSERT(so != NULL);
        KASSERT(policy != NULL);

        new = kmem_intr_zalloc(sizeof(*new), KM_NOSLEEP);
        if (new == NULL) {
                IPSECLOG(LOG_DEBUG, "No more memory.\n");
                return ENOBUFS;
        }

        if (IPSEC_PRIVILEGED_SO(so))
                new->priv = 1;
        else
                new->priv = 0;

        /*
         * Set dummy SPs. Actual SPs will be allocated later if needed.
         */
        new->sp_in = &ipsec_dummy_sp;
        new->sp_out = &ipsec_dummy_sp;

        *policy = new;

        return 0;
}

static void
ipsec_destroy_policy(struct secpolicy *sp)
{

        if (sp == &ipsec_dummy_sp) {
                ; /* It's dummy. No need to free it. */
        } else {
                /*
                 * We cannot destroy here because it can be called in
                 * softint. So mark the SP as DEAD and let the timer
                 * destroy it. See key_timehandler_spd.
                 */
                sp->state = IPSEC_SPSTATE_DEAD;
        }
}

int
ipsec_set_policy(void *inp, const void *request, size_t len,
    kauth_cred_t cred)
{
        struct inpcb_hdr *inph = (struct inpcb_hdr *)inp;
        const struct sadb_x_policy *xpl;
        struct secpolicy *newsp, *oldsp;
        struct secpolicy **policy;
        int error;

        KASSERT(!cpu_softintr_p());
        KASSERT(inph != NULL);
        KASSERT(inph_locked(inph));
        KASSERT(request != NULL);

        if (len < sizeof(*xpl))
                return EINVAL;
        xpl = (const struct sadb_x_policy *)request;

        KASSERT(inph->inph_sp != NULL);

        /* select direction */
        switch (xpl->sadb_x_policy_dir) {
        case IPSEC_DIR_INBOUND:
                policy = &inph->inph_sp->sp_in;
                break;
        case IPSEC_DIR_OUTBOUND:
                policy = &inph->inph_sp->sp_out;
                break;
        default:
                IPSECLOG(LOG_ERR, "invalid direction=%u\n",
                    xpl->sadb_x_policy_dir);
                return EINVAL;
        }

        /* sanity check. */
        if (policy == NULL || *policy == NULL)
                return EINVAL;

        if (KEYDEBUG_ON(KEYDEBUG_IPSEC_DUMP)) {
                kdebug_sadb_xpolicy("set passed policy", request);
        }

        /* check policy type */
        /* ipsec_set_policy() accepts IPSEC, ENTRUST and BYPASS. */
        if (xpl->sadb_x_policy_type == IPSEC_POLICY_DISCARD ||
            xpl->sadb_x_policy_type == IPSEC_POLICY_NONE)
                return EINVAL;

        /* check privileged socket */
        if (xpl->sadb_x_policy_type == IPSEC_POLICY_BYPASS) {
                error = kauth_authorize_network(cred, KAUTH_NETWORK_IPSEC,
                    KAUTH_REQ_NETWORK_IPSEC_BYPASS, NULL, NULL, NULL);
                if (error)
                        return error;
        }

        /* allocation new SP entry */
        if ((newsp = key_msg2sp(xpl, len, &error)) == NULL)
                return error;

        key_init_sp(newsp);
        newsp->created = time_uptime;
        /* Insert the global list for SPs for sockets */
        key_socksplist_add(newsp);

        /* clear old SP and set new SP */
        oldsp = *policy;
        *policy = newsp;
        ipsec_destroy_policy(oldsp);

        if (KEYDEBUG_ON(KEYDEBUG_IPSEC_DUMP)) {
                printf("%s: new policy\n", __func__);
                kdebug_secpolicy(newsp);
        }

        return 0;
}

int
ipsec_get_policy(void *inp, const void *request, size_t len,
    struct mbuf **mp)
{
        struct inpcb_hdr *inph = (struct inpcb_hdr *)inp;
        const struct sadb_x_policy *xpl;
        struct secpolicy *policy;

        /* sanity check. */
        if (inph == NULL || request == NULL || mp == NULL)
                return EINVAL;
        KASSERT(inph->inph_sp != NULL);
        if (len < sizeof(*xpl))
                return EINVAL;
        xpl = (const struct sadb_x_policy *)request;

        /* select direction */
        switch (xpl->sadb_x_policy_dir) {
        case IPSEC_DIR_INBOUND:
                policy = inph->inph_sp->sp_in;
                break;
        case IPSEC_DIR_OUTBOUND:
                policy = inph->inph_sp->sp_out;
                break;
        default:
                IPSECLOG(LOG_ERR, "invalid direction=%u\n",
                    xpl->sadb_x_policy_dir);
                return EINVAL;
        }

        if (policy == NULL)
                return EINVAL;

        *mp = key_sp2msg(policy, M_NOWAIT);
        if (!*mp) {
                IPSECLOG(LOG_DEBUG, "No more memory.\n");
                return ENOBUFS;
        }

        if (KEYDEBUG_ON(KEYDEBUG_IPSEC_DUMP)) {
                kdebug_mbuf(__func__, *mp);
        }

        return 0;
}

int
ipsec_delete_pcbpolicy(void *inp)
{
        struct inpcb_hdr *inph = (struct inpcb_hdr *)inp;

        KASSERT(inph != NULL);

        if (inph->inph_sp == NULL)
                return 0;

        if (inph->inph_sp->sp_in != NULL)
                ipsec_destroy_policy(inph->inph_sp->sp_in);

        if (inph->inph_sp->sp_out != NULL)
                ipsec_destroy_policy(inph->inph_sp->sp_out);

        ipsec_invalpcbcache(inph->inph_sp, IPSEC_DIR_ANY);

        ipsec_delpcbpolicy(inph->inph_sp);
        inph->inph_sp = NULL;

        return 0;
}

/*
 * Return the current level (either IPSEC_LEVEL_USE or IPSEC_LEVEL_REQUIRE).
 */
u_int
ipsec_get_reqlevel(const struct ipsecrequest *isr)
{
        u_int level = 0;
        u_int esp_trans_deflev, esp_net_deflev;
        u_int ah_trans_deflev, ah_net_deflev;

        KASSERT(isr != NULL);
        KASSERT(isr->sp != NULL);
        KASSERTMSG(
            isr->sp->spidx.src.sa.sa_family == isr->sp->spidx.dst.sa.sa_family,
            "af family mismatch, src %u, dst %u",
            isr->sp->spidx.src.sa.sa_family, isr->sp->spidx.dst.sa.sa_family);

/* XXX note that we have ipseclog() expanded here - code sync issue */
#define IPSEC_CHECK_DEFAULT(lev)                                        \
    (((lev) != IPSEC_LEVEL_USE && (lev) != IPSEC_LEVEL_REQUIRE                \
    && (lev) != IPSEC_LEVEL_UNIQUE) ?                                        \
        (ipsec_debug ? log(LOG_INFO, "fixed system default level " #lev \
        ":%d->%d\n", (lev), IPSEC_LEVEL_REQUIRE) : (void)0),                \
        (lev) = IPSEC_LEVEL_REQUIRE, (lev)                                \
    : (lev))

        /* set default level */
        switch (((struct sockaddr *)&isr->sp->spidx.src)->sa_family) {
#ifdef INET
        case AF_INET:
                esp_trans_deflev = IPSEC_CHECK_DEFAULT(ip4_esp_trans_deflev);
                esp_net_deflev = IPSEC_CHECK_DEFAULT(ip4_esp_net_deflev);
                ah_trans_deflev = IPSEC_CHECK_DEFAULT(ip4_ah_trans_deflev);
                ah_net_deflev = IPSEC_CHECK_DEFAULT(ip4_ah_net_deflev);
                break;
#endif
#ifdef INET6
        case AF_INET6:
                esp_trans_deflev = IPSEC_CHECK_DEFAULT(ip6_esp_trans_deflev);
                esp_net_deflev = IPSEC_CHECK_DEFAULT(ip6_esp_net_deflev);
                ah_trans_deflev = IPSEC_CHECK_DEFAULT(ip6_ah_trans_deflev);
                ah_net_deflev = IPSEC_CHECK_DEFAULT(ip6_ah_net_deflev);
                break;
#endif
        default:
                panic("%s: unknown af %u", __func__,
                    isr->sp->spidx.src.sa.sa_family);
        }

#undef IPSEC_CHECK_DEFAULT

        /* set level */
        switch (isr->level) {
        case IPSEC_LEVEL_DEFAULT:
                switch (isr->saidx.proto) {
                case IPPROTO_ESP:
                        if (isr->saidx.mode == IPSEC_MODE_TUNNEL)
                                level = esp_net_deflev;
                        else
                                level = esp_trans_deflev;
                        break;
                case IPPROTO_AH:
                        if (isr->saidx.mode == IPSEC_MODE_TUNNEL)
                                level = ah_net_deflev;
                        else
                                level = ah_trans_deflev;
                        break;
                case IPPROTO_IPCOMP:
                        /*
                         * we don't really care, as IPcomp document says that
                         * we shouldn't compress small packets
                         */
                        level = IPSEC_LEVEL_USE;
                        break;
                default:
                        panic("%s: Illegal protocol defined %u", __func__,
                            isr->saidx.proto);
                }
                break;

        case IPSEC_LEVEL_USE:
        case IPSEC_LEVEL_REQUIRE:
                level = isr->level;
                break;
        case IPSEC_LEVEL_UNIQUE:
                level = IPSEC_LEVEL_REQUIRE;
                break;

        default:
                panic("%s: Illegal IPsec level %u", __func__, isr->level);
        }

        return level;
}

/*
 * Check security policy requirements against the actual packet contents.
 *
 * If the SP requires an IPsec packet, and the packet was neither AH nor ESP,
 * then kick it.
 */
static int
ipsec_sp_reject(const struct secpolicy *sp, const struct mbuf *m)
{
        struct ipsecrequest *isr;

        if (KEYDEBUG_ON(KEYDEBUG_IPSEC_DATA)) {
                printf("%s: using SP\n", __func__);
                kdebug_secpolicy(sp);
        }

        /* check policy */
        switch (sp->policy) {
        case IPSEC_POLICY_DISCARD:
                return 1;
        case IPSEC_POLICY_BYPASS:
        case IPSEC_POLICY_NONE:
                return 0;
        }

        KASSERTMSG(sp->policy == IPSEC_POLICY_IPSEC,
            "invalid policy %u", sp->policy);

        /* XXX should compare policy against ipsec header history */

        for (isr = sp->req; isr != NULL; isr = isr->next) {
                if (ipsec_get_reqlevel(isr) != IPSEC_LEVEL_REQUIRE)
                        continue;
                switch (isr->saidx.proto) {
                case IPPROTO_ESP:
                        if ((m->m_flags & M_DECRYPTED) == 0) {
                                KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_DUMP,
                                    "ESP m_flags:%x\n", m->m_flags);
                                return 1;
                        }
                        break;
                case IPPROTO_AH:
                        if ((m->m_flags & M_AUTHIPHDR) == 0) {
                                KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_DUMP,
                                    "AH m_flags:%x\n", m->m_flags);
                                return 1;
                        }
                        break;
                case IPPROTO_IPCOMP:
                        /*
                         * We don't really care, as IPcomp document
                         * says that we shouldn't compress small
                         * packets, IPComp policy should always be
                         * treated as being in "use" level.
                         */
                        break;
                }
        }

        return 0;
}

/*
 * Check security policy requirements.
 */
int
ipsec_in_reject(struct mbuf *m, void *inp)
{
        struct inpcb_hdr *inph = (struct inpcb_hdr *)inp;
        struct secpolicy *sp;
        int error;
        int result;

        KASSERT(m != NULL);

        if (inph == NULL)
                sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
                    IP_FORWARDING, &error);
        else
                sp = ipsec_getpolicybysock(m, IPSEC_DIR_INBOUND,
                    inph, &error);

        if (sp != NULL) {
                result = ipsec_sp_reject(sp, m);
                if (result)
                        IPSEC_STATINC(IPSEC_STAT_IN_POLVIO);
                KEY_SP_UNREF(&sp);
        } else {
                result = 0;
        }
        return result;
}

/*
 * Compute the byte size to be occupied by the IPsec header. If it is
 * tunneled, it includes the size of outer IP header.
 */
static size_t
ipsec_sp_hdrsiz(const struct secpolicy *sp, const struct mbuf *m)
{
        struct ipsecrequest *isr;
        size_t siz;

        if (KEYDEBUG_ON(KEYDEBUG_IPSEC_DATA)) {
                printf("%s: using SP\n", __func__);
                kdebug_secpolicy(sp);
        }

        switch (sp->policy) {
        case IPSEC_POLICY_DISCARD:
        case IPSEC_POLICY_BYPASS:
        case IPSEC_POLICY_NONE:
                return 0;
        }

        KASSERTMSG(sp->policy == IPSEC_POLICY_IPSEC,
            "invalid policy %u", sp->policy);

        siz = 0;
        for (isr = sp->req; isr != NULL; isr = isr->next) {
                size_t clen = 0;
                struct secasvar *sav;

                switch (isr->saidx.proto) {
                case IPPROTO_ESP:
                        sav = ipsec_lookup_sa(isr, m);
                        if (sav != NULL) {
                                clen = esp_hdrsiz(sav);
                                KEY_SA_UNREF(&sav);
                        } else
                                clen = esp_hdrsiz(NULL);
                        break;
                case IPPROTO_AH:
                        sav = ipsec_lookup_sa(isr, m);
                        if (sav != NULL) {
                                clen = ah_hdrsiz(sav);
                                KEY_SA_UNREF(&sav);
                        } else
                                clen = ah_hdrsiz(NULL);
                        break;
                case IPPROTO_IPCOMP:
                        clen = sizeof(struct ipcomp);
                        break;
                }

                if (isr->saidx.mode == IPSEC_MODE_TUNNEL) {
                        switch (isr->saidx.dst.sa.sa_family) {
                        case AF_INET:
                                clen += sizeof(struct ip);
                                break;
#ifdef INET6
                        case AF_INET6:
                                clen += sizeof(struct ip6_hdr);
                                break;
#endif
                        default:
                                IPSECLOG(LOG_ERR, "unknown AF %d in "
                                    "IPsec tunnel SA\n",
                                    ((const struct sockaddr *)&isr->saidx.dst)
                                    ->sa_family);
                                break;
                        }
                }
                siz += clen;
        }

        return siz;
}

size_t
ipsec_hdrsiz(struct mbuf *m, u_int dir, void *inp)
{
        struct inpcb_hdr *inph = (struct inpcb_hdr *)inp;
        struct secpolicy *sp;
        int error;
        size_t size;

        KASSERT(m != NULL);
        KASSERTMSG(inph == NULL || inph->inph_socket != NULL,
            "socket w/o inpcb");

        if (inph == NULL)
                sp = ipsec_getpolicybyaddr(m, dir, IP_FORWARDING, &error);
        else
                sp = ipsec_getpolicybysock(m, dir, inph, &error);

        if (sp != NULL) {
                size = ipsec_sp_hdrsiz(sp, m);
                KEYDEBUG_PRINTF(KEYDEBUG_IPSEC_DATA, "size:%zu.\n", size);
                KEY_SP_UNREF(&sp);
        } else {
                size = 0;
        }

        return size;
}

/*
 * Check the variable replay window.
 * ipsec_chkreplay() performs replay check before ICV verification.
 * ipsec_updatereplay() updates replay bitmap.  This must be called after
 * ICV verification (it also performs replay check, which is usually done
 * beforehand).
 * 0 (zero) is returned if packet disallowed, 1 if packet permitted.
 *
 * based on RFC 2401.
 */
int
ipsec_chkreplay(u_int32_t seq, const struct secasvar *sav)
{
        const struct secreplay *replay;
        u_int32_t diff;
        int fr;
        u_int32_t wsizeb;        /* constant: bits of window size */
        int frlast;                /* constant: last frame */

        KASSERT(sav != NULL);
        KASSERT(sav->replay != NULL);

        replay = sav->replay;

        if (replay->wsize == 0)
                return 1;        /* no need to check replay. */

        /* constant */
        frlast = replay->wsize - 1;
        wsizeb = replay->wsize << 3;

        /* sequence number of 0 is invalid */
        if (seq == 0)
                return 0;

        /* first time is always okay */
        if (replay->count == 0)
                return 1;

        if (seq > replay->lastseq) {
                /* larger sequences are okay */
                return 1;
        } else {
                /* seq is equal or less than lastseq. */
                diff = replay->lastseq - seq;

                /* over range to check, i.e. too old or wrapped */
                if (diff >= wsizeb)
                        return 0;

                fr = frlast - diff / 8;

                /* this packet already seen ? */
                if ((replay->bitmap)[fr] & (1 << (diff % 8)))
                        return 0;

                /* out of order but good */
                return 1;
        }
}

/*
 * check replay counter whether to update or not.
 * OUT:        0:        OK
 *        1:        NG
 */
int
ipsec_updatereplay(u_int32_t seq, const struct secasvar *sav)
{
        struct secreplay *replay;
        u_int32_t diff;
        int fr;
        u_int32_t wsizeb;        /* constant: bits of window size */
        int frlast;                /* constant: last frame */

        KASSERT(sav != NULL);
        KASSERT(sav->replay != NULL);

        replay = sav->replay;

        if (replay->wsize == 0)
                goto ok;        /* no need to check replay. */

        /* constant */
        frlast = replay->wsize - 1;
        wsizeb = replay->wsize << 3;

        /* sequence number of 0 is invalid */
        if (seq == 0)
                return 1;

        /* first time */
        if (replay->count == 0) {
                replay->lastseq = seq;
                memset(replay->bitmap, 0, replay->wsize);
                (replay->bitmap)[frlast] = 1;
                goto ok;
        }

        if (seq > replay->lastseq) {
                /* seq is larger than lastseq. */
                diff = seq - replay->lastseq;

                /* new larger sequence number */
                if (diff < wsizeb) {
                        /* In window */
                        /* set bit for this packet */
                        vshiftl(replay->bitmap, diff, replay->wsize);
                        (replay->bitmap)[frlast] |= 1;
                } else {
                        /* this packet has a "way larger" */
                        memset(replay->bitmap, 0, replay->wsize);
                        (replay->bitmap)[frlast] = 1;
                }
                replay->lastseq = seq;

                /* larger is good */
        } else {
                /* seq is equal or less than lastseq. */
                diff = replay->lastseq - seq;

                /* over range to check, i.e. too old or wrapped */
                if (diff >= wsizeb)
                        return 1;

                fr = frlast - diff / 8;

                /* this packet already seen ? */
                if ((replay->bitmap)[fr] & (1 << (diff % 8)))
                        return 1;

                /* mark as seen */
                (replay->bitmap)[fr] |= (1 << (diff % 8));

                /* out of order but good */
        }

ok:
        if (replay->count == ~0) {
                char buf[IPSEC_LOGSASTRLEN];

                /* set overflow flag */
                replay->overflow++;

                /* don't increment, no more packets accepted */
                if ((sav->flags & SADB_X_EXT_CYCSEQ) == 0)
                        return 1;

                IPSECLOG(LOG_WARNING, "replay counter made %d cycle. %s\n",
                    replay->overflow, ipsec_logsastr(sav, buf, sizeof(buf)));
        }

        replay->count++;

        return 0;
}

/*
 * shift variable length buffer to left.
 * IN:        bitmap: pointer to the buffer
 *        nbit:        the number of to shift.
 *        wsize:        buffer size (bytes).
 */
static void
vshiftl(unsigned char *bitmap, int nbit, int wsize)
{
        int s, j, i;
        unsigned char over;

        for (j = 0; j < nbit; j += 8) {
                s = (nbit - j < 8) ? (nbit - j): 8;
                bitmap[0] <<= s;
                for (i = 1; i < wsize; i++) {
                        over = (bitmap[i] >> (8 - s));
                        bitmap[i] <<= s;
                        bitmap[i-1] |= over;
                }
        }

        return;
}

/* Return a printable string for the address. */
const char *
ipsec_address(const union sockaddr_union *sa, char *buf, size_t size)
{
        switch (sa->sa.sa_family) {
        case AF_INET:
                in_print(buf, size, &sa->sin.sin_addr);
                return buf;
#if INET6
        case AF_INET6:
                in6_print(buf, size, &sa->sin6.sin6_addr);
                return buf;
#endif
        default:
                return "(unknown address family)";
        }
}

const char *
ipsec_logsastr(const struct secasvar *sav, char *buf, size_t size)
{
        const struct secasindex *saidx = &sav->sah->saidx;
        char sbuf[IPSEC_ADDRSTRLEN], dbuf[IPSEC_ADDRSTRLEN];

        KASSERTMSG(saidx->src.sa.sa_family == saidx->dst.sa.sa_family,
            "af family mismatch, src %u, dst %u",
            saidx->src.sa.sa_family, saidx->dst.sa.sa_family);

        snprintf(buf, size, "SA(SPI=%u src=%s dst=%s)",
            (u_int32_t)ntohl(sav->spi),
            ipsec_address(&saidx->src, sbuf, sizeof(sbuf)),
            ipsec_address(&saidx->dst, dbuf, sizeof(dbuf)));

        return buf;
}

#ifdef INET6
struct secpolicy *
ipsec6_check_policy(struct mbuf *m, struct in6pcb *in6p, int flags,
    int *needipsecp, int *errorp)
{
        struct secpolicy *sp = NULL;
        int s;
        int error = 0;
        int needipsec = 0;

        if (ipsec_outdone(m)) {
                goto skippolicycheck;
        }
        s = splsoftnet();
        if (in6p && ipsec_pcb_skip_ipsec(in6p->in6p_sp, IPSEC_DIR_OUTBOUND)) {
                splx(s);
                goto skippolicycheck;
        }
        sp = ipsec_checkpolicy(m, IPSEC_DIR_OUTBOUND, flags, &error, in6p);
        splx(s);

        /*
         * There are four return cases:
         *        sp != NULL                    apply IPsec policy
         *        sp == NULL, error == 0        no IPsec handling needed
         *        sp == NULL, error == -EINVAL  discard packet w/o error
         *        sp == NULL, error != 0        discard packet, report error
         */
        if (sp == NULL) {
                needipsec = 0;
        } else {
                needipsec = 1;
        }

skippolicycheck:
        *errorp = error;
        *needipsecp = needipsec;
        return sp;
}

/*
 * calculate UDP checksum for UDP encapsulated ESP for IPv6.
 *
 * RFC2460(Internet Protocol, Version 6 Specification) says:
 *
 *   IPv6 receivers MUST discard UDP packets with a zero checksum.
 *
 * There is more relaxed specification RFC6935(IPv6 and UDP Checksums for
 * Tunneled Packets). The document allows zero checksum. It's too
 * late to publish, there are a lot of interoperability problems...
 */
void
ipsec6_udp_cksum(struct mbuf *m)
{
        struct ip6_hdr *ip6;
        uint16_t plen, uh_sum;
        int off;

        /* must called after m_pullup() */
        KASSERT(m->m_len >= sizeof(struct ip6_hdr));

        ip6 = mtod(m, struct ip6_hdr *);
        KASSERT(ip6->ip6_nxt == IPPROTO_UDP);

        /* ip6->ip6_plen can not be updated before ip6_output() */
        plen = m->m_pkthdr.len - sizeof(*ip6);
        KASSERT(plen >= sizeof(struct udphdr));

        uh_sum = in6_cksum(m, IPPROTO_UDP, sizeof(*ip6), plen);
        if (uh_sum == 0)
                uh_sum = 0xffff;

        off = sizeof(*ip6) + offsetof(struct udphdr, uh_sum);
        m_copyback(m, off, sizeof(uh_sum), (void *)&uh_sum);
}
#endif /* INET6 */

/*
 * -----------------------------------------------------------------------------
 */

/* XXX this stuff doesn't belong here... */

static struct xformsw *xforms = NULL;

/*
 * Register a transform; typically at system startup.
 */
void
xform_register(struct xformsw *xsp)
{
        xsp->xf_next = xforms;
        xforms = xsp;
}

/*
 * Initialize transform support in an sav.
 */
int
xform_init(struct secasvar *sav, int xftype)
{
        struct xformsw *xsp;

        if (sav->tdb_xform != NULL)        /* previously initialized */
                return 0;
        for (xsp = xforms; xsp; xsp = xsp->xf_next)
                if (xsp->xf_type == xftype)
                        return (*xsp->xf_init)(sav, xsp);

        IPSECLOG(LOG_DEBUG, "no match for xform type %d\n", xftype);
        return EINVAL;
}

/*
 * XXXJRT This should be done as a protosw init call.
 */
void
ipsec_attach(void)
{

        ipsec_output_init();

        ipsecstat_percpu = percpu_alloc(sizeof(uint64_t) * IPSEC_NSTATS);

        sysctl_net_inet_ipsec_setup(NULL);
#ifdef INET6
        sysctl_net_inet6_ipsec6_setup(NULL);
#endif

        ah_attach();
        esp_attach();
        ipcomp_attach();
        ipe4_attach();
#ifdef TCP_SIGNATURE
        tcpsignature_attach();
#endif
}














































































































































































































































































































































































































































































































































































    3 
    3 








































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
/*        $NetBSD: if_run.c,v 1.42 2020/06/24 21:06:39 jdolecek Exp $        */
/*        $OpenBSD: if_run.c,v 1.90 2012/03/24 15:11:04 jsg Exp $        */

/*-
 * Copyright (c) 2008-2010 Damien Bergamini <damien.bergamini@free.fr>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*-
 * Ralink Technology RT2700U/RT2800U/RT3000U/RT3900E chipset driver.
 * http://www.ralinktech.com/
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_run.c,v 1.42 2020/06/24 21:06:39 jdolecek Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/callout.h>
#include <sys/module.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/atomic.h>

#include <sys/bus.h>
#include <machine/endian.h>
#include <sys/intr.h>

#include <net/bpf.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_ether.h>
#include <net/if_media.h>
#include <net/if_types.h>

#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_amrr.h>
#include <net80211/ieee80211_radiotap.h>

#include <dev/firmload.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <dev/ic/rt2860reg.h>                /* shared with ral(4) */
#include <dev/usb/if_runvar.h>

#ifdef RUN_DEBUG
#define DPRINTF(x)        do { if (run_debug) printf x; } while (0)
#define DPRINTFN(n, x)        do { if (run_debug >= (n)) printf x; } while (0)
int run_debug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n, x)
#endif

#define IEEE80211_HAS_ADDR4(wh) IEEE80211_IS_DSTODS(wh)

#define USB_ID(v, p)        { USB_VENDOR_##v, USB_PRODUCT_##v##_##p }
static const struct usb_devno run_devs[] = {
        USB_ID(ABOCOM,                RT2770),
        USB_ID(ABOCOM,                RT2870),
        USB_ID(ABOCOM,                RT3070),
        USB_ID(ABOCOM,                RT3071),
        USB_ID(ABOCOM,                RT3072),
        USB_ID(ABOCOM2,                RT2870_1),
        USB_ID(ACCTON,                RT2770),
        USB_ID(ACCTON,                RT2870_1),
        USB_ID(ACCTON,                RT2870_2),
        USB_ID(ACCTON,                RT2870_3),
        USB_ID(ACCTON,                RT2870_4),
        USB_ID(ACCTON,                RT2870_5),
        USB_ID(ACCTON,                RT3070),
        USB_ID(ACCTON,                RT3070_1),
        USB_ID(ACCTON,                RT3070_2),
        USB_ID(ACCTON,                RT3070_3),
        USB_ID(ACCTON,                RT3070_4),
        USB_ID(ACCTON,                RT3070_5),
        USB_ID(ACCTON,                RT3070_6),
        USB_ID(AIRTIES,                RT3070),
        USB_ID(AIRTIES,                RT3070_2),
        USB_ID(ALLWIN,                RT2070),
        USB_ID(ALLWIN,                RT2770),
        USB_ID(ALLWIN,                RT2870),
        USB_ID(ALLWIN,                RT3070),
        USB_ID(ALLWIN,                RT3071),
        USB_ID(ALLWIN,                RT3072),
        USB_ID(ALLWIN,                RT3572),
        USB_ID(AMIGO,                RT2870_1),
        USB_ID(AMIGO,                RT2870_2),
        USB_ID(AMIT,                CGWLUSB2GNR),
        USB_ID(AMIT,                RT2870_1),
        USB_ID(AMIT2,                RT2870),
        USB_ID(ASUSTEK,                RT2870_1),
        USB_ID(ASUSTEK,                RT2870_2),
        USB_ID(ASUSTEK,                RT2870_3),
        USB_ID(ASUSTEK,                RT2870_4),
        USB_ID(ASUSTEK,                RT2870_5),
        USB_ID(ASUSTEK,                RT3070),
        USB_ID(ASUSTEK,                RT3070_1),
        USB_ID(ASUSTEK,                USBN53),
        USB_ID(ASUSTEK,                USBN66),
        USB_ID(ASUSTEK2,        USBN11),
        USB_ID(AZUREWAVE,        RT2870_1),
        USB_ID(AZUREWAVE,        RT2870_2),
        USB_ID(AZUREWAVE,        RT3070),
        USB_ID(AZUREWAVE,        RT3070_2),
        USB_ID(AZUREWAVE,        RT3070_3),
        USB_ID(AZUREWAVE,        RT3070_4),
        USB_ID(AZUREWAVE,        RT3070_5),
        USB_ID(BELKIN,                F5D8053V3),
        USB_ID(BELKIN,                F5D8055),
        USB_ID(BELKIN,                F5D8055V2),
        USB_ID(BELKIN,                F6D4050V1),
        USB_ID(BELKIN,                F6D4050V2),
        USB_ID(BELKIN,                F7D1101V2),
        USB_ID(BELKIN,                RT2870_1),
        USB_ID(BELKIN,                RT2870_2),
        USB_ID(BELKIN,                RTL8192CU_2),
        USB_ID(BEWAN,                RT3070),
        USB_ID(CISCOLINKSYS,        AE1000),
        USB_ID(CISCOLINKSYS,        AM10),
        USB_ID(CISCOLINKSYS2,        RT3070),
        USB_ID(CISCOLINKSYS3,        RT3070),
        USB_ID(CONCEPTRONIC,        RT2870_1),
        USB_ID(CONCEPTRONIC,        RT2870_2),
        USB_ID(CONCEPTRONIC,        RT2870_3),
        USB_ID(CONCEPTRONIC,        RT2870_4),
        USB_ID(CONCEPTRONIC,        RT2870_5),
        USB_ID(CONCEPTRONIC,        RT2870_6),
        USB_ID(CONCEPTRONIC,        RT2870_7),
        USB_ID(CONCEPTRONIC,        RT2870_8),
        USB_ID(CONCEPTRONIC,        RT3070_1),
        USB_ID(CONCEPTRONIC,        RT3070_2),
        USB_ID(CONCEPTRONIC,        RT3070_3),
        USB_ID(COREGA,                CGWLUSB300GNM),
        USB_ID(COREGA,                RT2870_1),
        USB_ID(COREGA,                RT2870_2),
        USB_ID(COREGA,                RT2870_3),
        USB_ID(COREGA,                RT3070),
        USB_ID(CYBERTAN,        RT2870),
        USB_ID(DLINK,                RT2870),
        USB_ID(DLINK,                RT3072),
        USB_ID(DLINK,                DWA127),
        USB_ID(DLINK,                DWA140B3),
        USB_ID(DLINK,                DWA160B2),
        USB_ID(DLINK,                DWA162),
        USB_ID(DLINK2,                DWA130),
        USB_ID(DLINK2,                RT2870_1),
        USB_ID(DLINK2,                RT2870_2),
        USB_ID(DLINK2,                RT3070_1),
        USB_ID(DLINK2,                RT3070_2),
        USB_ID(DLINK2,                RT3070_3),
        USB_ID(DLINK2,                RT3070_4),
        USB_ID(DLINK2,                RT3070_5),
        USB_ID(DLINK2,                RT3072),
        USB_ID(DLINK2,                RT3072_1),
        USB_ID(DVICO,                RT3070),
        USB_ID(EDIMAX,                EW7717),
        USB_ID(EDIMAX,                EW7718),
        USB_ID(EDIMAX,                EW7722UTN),
        USB_ID(EDIMAX,                RT2870_1),
        USB_ID(ENCORE,                RT3070),
        USB_ID(ENCORE,                RT3070_2),
        USB_ID(ENCORE,                RT3070_3),
        USB_ID(GIGABYTE,        GNWB31N),
        USB_ID(GIGABYTE,        GNWB32L),
        USB_ID(GIGABYTE,        RT2870_1),
        USB_ID(GIGASET,                RT3070_1),
        USB_ID(GIGASET,                RT3070_2),
        USB_ID(GUILLEMOT,        HWNU300),
        USB_ID(HAWKING,                HWUN2),
        USB_ID(HAWKING,                RT2870_1),
        USB_ID(HAWKING,                RT2870_2),
        USB_ID(HAWKING,                RT2870_3),
        USB_ID(HAWKING,                RT2870_4),
        USB_ID(HAWKING,                RT2870_5),
        USB_ID(HAWKING,                RT3070),
        USB_ID(IODATA,                RT3072_1),
        USB_ID(IODATA,                RT3072_2),
        USB_ID(IODATA,                RT3072_3),
        USB_ID(IODATA,                RT3072_4),
        USB_ID(LINKSYS4,        RT3070),
        USB_ID(LINKSYS4,        WUSB100),
        USB_ID(LINKSYS4,        WUSB54GC_3),
        USB_ID(LINKSYS4,        WUSB600N),
        USB_ID(LINKSYS4,        WUSB600NV2),
        USB_ID(LOGITEC,                LANW300NU2),
        USB_ID(LOGITEC,                LANW300NU2S),
        USB_ID(LOGITEC,                LAN_W300ANU2),
        USB_ID(LOGITEC,                LAN_W450ANU2E),
        USB_ID(LOGITEC,                RT2870_1),
        USB_ID(LOGITEC,                RT2870_2),
        USB_ID(LOGITEC,                RT2870_3),
        USB_ID(LOGITEC,                RT3020),
        USB_ID(MELCO,                RT2870_1),
        USB_ID(MELCO,                RT2870_2),
        USB_ID(MELCO,                WLIUCAG300N),
        USB_ID(MELCO,                WLIUCG300N),
        USB_ID(MELCO,                WLIUCG301N),
        USB_ID(MELCO,                WLIUCGN),
        USB_ID(MELCO,                WLIUCGNHP),
        USB_ID(MELCO,                WLIUCGNM),
        USB_ID(MELCO,                WLIUCGNM2T),
        USB_ID(MOTOROLA4,        RT2770),
        USB_ID(MOTOROLA4,        RT3070),
        USB_ID(MSI,                RT3070),
        USB_ID(MSI,                RT3070_2),
        USB_ID(MSI,                RT3070_3),
        USB_ID(MSI,                RT3070_4),
        USB_ID(MSI,                RT3070_5),
        USB_ID(MSI,                RT3070_6),
        USB_ID(MSI,                RT3070_7),
        USB_ID(MSI,                RT3070_8),
        USB_ID(MSI,                RT3070_9),
        USB_ID(MSI,                RT3070_10),
        USB_ID(MSI,                RT3070_11),
        USB_ID(MSI,                RT3070_12),
        USB_ID(MSI,                RT3070_13),
        USB_ID(MSI,                RT3070_14),
        USB_ID(MSI,                RT3070_15),
        USB_ID(OVISLINK,        RT3071),
        USB_ID(OVISLINK,        RT3072),
        USB_ID(PARA,                RT3070),
        USB_ID(PEGATRON,        RT2870),
        USB_ID(PEGATRON,        RT3070),
        USB_ID(PEGATRON,        RT3070_2),
        USB_ID(PEGATRON,        RT3070_3),
        USB_ID(PEGATRON,        RT3072),
        USB_ID(PHILIPS,                RT2870),
        USB_ID(PLANEX2,                GWUS300MINIS),
        USB_ID(PLANEX2,                GWUSMICRO300),
        USB_ID(PLANEX2,                GWUSMICRON),
        USB_ID(PLANEX2,                GWUS300MINIX),
        USB_ID(PLANEX2,                RT3070),
        USB_ID(QCOM,                RT2870),
        USB_ID(QUANTA,                RT3070),
        USB_ID(RALINK,                RT2070),
        USB_ID(RALINK,                RT2770),
        USB_ID(RALINK,                RT2870),
        USB_ID(RALINK,                RT3070),
        USB_ID(RALINK,                RT3071),
        USB_ID(RALINK,                RT3072),
        USB_ID(RALINK,                RT3370),
        USB_ID(RALINK,                RT3572),
        USB_ID(RALINK,                RT3573),
        USB_ID(RALINK,                RT5370),
        USB_ID(RALINK,                RT5572),
        USB_ID(RALINK,                RT8070),
        USB_ID(SAMSUNG,                RT2870_1),
        USB_ID(SENAO,                RT2870_1),
        USB_ID(SENAO,                RT2870_2),
        USB_ID(SENAO,                RT2870_3),
        USB_ID(SENAO,                RT2870_4),
        USB_ID(SENAO,                RT3070),
        USB_ID(SENAO,                RT3071),
        USB_ID(SENAO,                RT3072),
        USB_ID(SENAO,                RT3072_2),
        USB_ID(SENAO,                RT3072_3),
        USB_ID(SENAO,                RT3072_4),
        USB_ID(SENAO,                RT3072_5),
        USB_ID(SITECOMEU,        RT2870_1),
        USB_ID(SITECOMEU,        RT2870_2),
        USB_ID(SITECOMEU,        RT2870_3),
        USB_ID(SITECOMEU,        RT3070_1),
        USB_ID(SITECOMEU,        RT3070_3),
        USB_ID(SITECOMEU,        RT3072_3),
        USB_ID(SITECOMEU,        RT3072_4),
        USB_ID(SITECOMEU,        RT3072_5),
        USB_ID(SITECOMEU,        RT3072_6),
        USB_ID(SITECOMEU,        WL302),
        USB_ID(SITECOMEU,        WL315),
        USB_ID(SITECOMEU,        WL321),
        USB_ID(SITECOMEU,        WL324),
        USB_ID(SITECOMEU,        WL329),
        USB_ID(SITECOMEU,        WL343),
        USB_ID(SITECOMEU,        WL344),
        USB_ID(SITECOMEU,        WL345),
        USB_ID(SITECOMEU,        WL349V4),
        USB_ID(SITECOMEU,        WL608),
        USB_ID(SITECOMEU,        WLA4000),
        USB_ID(SITECOMEU,        WLA5000),
        USB_ID(SPARKLAN,        RT2870_1),
        USB_ID(SPARKLAN,        RT2870_2),
        USB_ID(SPARKLAN,        RT3070),
        USB_ID(SWEEX2,                LW153),
        USB_ID(SWEEX2,                LW303),
        USB_ID(SWEEX2,                LW313),
        USB_ID(TOSHIBA,                RT3070),
        USB_ID(UMEDIA,                RT2870_1),
        USB_ID(UMEDIA,                TEW645UB),
        USB_ID(ZCOM,                RT2870_1),
        USB_ID(ZCOM,                RT2870_2),
        USB_ID(ZINWELL,                RT2870_1),
        USB_ID(ZINWELL,                RT2870_2),
        USB_ID(ZINWELL,                RT3070),
        USB_ID(ZINWELL,                RT3072),
        USB_ID(ZINWELL,                RT3072_2),
        USB_ID(ZYXEL,                NWD2105),
        USB_ID(ZYXEL,                NWD211AN),
        USB_ID(ZYXEL,                RT2870_1),
        USB_ID(ZYXEL,                RT2870_2),
        USB_ID(ZYXEL,                RT3070),
};

static int                run_match(device_t, cfdata_t, void *);
static void                run_attach(device_t, device_t, void *);
static int                run_detach(device_t, int);
static int                run_activate(device_t, enum devact);

CFATTACH_DECL_NEW(run, sizeof(struct run_softc),
        run_match, run_attach, run_detach, run_activate);

static int                run_alloc_rx_ring(struct run_softc *);
static void                run_free_rx_ring(struct run_softc *);
static int                run_alloc_tx_ring(struct run_softc *, int);
static void                run_free_tx_ring(struct run_softc *, int);
static int                run_load_microcode(struct run_softc *);
static int                run_reset(struct run_softc *);
static int                run_read(struct run_softc *, uint16_t, uint32_t *);
static int                run_read_region_1(struct run_softc *, uint16_t,
                            uint8_t *, int);
static int                run_write_2(struct run_softc *, uint16_t, uint16_t);
static int                run_write(struct run_softc *, uint16_t, uint32_t);
static int                run_write_region_1(struct run_softc *, uint16_t,
                            const uint8_t *, int);
static int                run_set_region_4(struct run_softc *, uint16_t,
                            uint32_t, int);
static int                run_efuse_read(struct run_softc *, uint16_t,
                            uint16_t *, int);
static int                run_efuse_read_2(struct run_softc *, uint16_t,
                            uint16_t *);
static int                run_eeprom_read_2(struct run_softc *, uint16_t,
                            uint16_t *);
static int                run_rt2870_rf_write(struct run_softc *, uint8_t,
                            uint32_t);
static int                run_rt3070_rf_read(struct run_softc *, uint8_t,
                            uint8_t *);
static int                run_rt3070_rf_write(struct run_softc *, uint8_t,
                            uint8_t);
static int                run_bbp_read(struct run_softc *, uint8_t, uint8_t *);
static int                run_bbp_write(struct run_softc *, uint8_t, uint8_t);
static int                run_mcu_cmd(struct run_softc *, uint8_t, uint16_t);
static const char *        run_get_rf(uint16_t);
static void                run_rt3593_get_txpower(struct run_softc *);
static void                run_get_txpower(struct run_softc *);
static int                run_read_eeprom(struct run_softc *);
static struct ieee80211_node *
                        run_node_alloc(struct ieee80211_node_table *);
static int                run_media_change(struct ifnet *);
static void                run_next_scan(void *);
static void                run_task(void *);
static void                run_do_async(struct run_softc *,
                            void (*)(struct run_softc *, void *), void *, int);
static int                run_newstate(struct ieee80211com *,
                            enum ieee80211_state, int);
static void                run_newstate_cb(struct run_softc *, void *);
static int                run_updateedca(struct ieee80211com *);
static void                run_updateedca_cb(struct run_softc *, void *);
#ifdef RUN_HWCRYPTO
static int                run_set_key(struct ieee80211com *,
                            const struct ieee80211_key *, const uint8_t *);
static void                run_set_key_cb(struct run_softc *, void *);
static int                run_delete_key(struct ieee80211com *,
                            const struct ieee80211_key *);
static void                run_delete_key_cb(struct run_softc *, void *);
#endif
static void                run_calibrate_to(void *);
static void                run_calibrate_cb(struct run_softc *, void *);
static void                run_newassoc(struct ieee80211_node *, int);
static void                run_rx_frame(struct run_softc *, uint8_t *, int);
static void                run_rxeof(struct usbd_xfer *, void *,
                            usbd_status);
static void                run_txeof(struct usbd_xfer *, void *,
                            usbd_status);
static int                run_tx(struct run_softc *, struct mbuf *,
                            struct ieee80211_node *);
static void                run_start(struct ifnet *);
static void                run_watchdog(struct ifnet *);
static int                run_ioctl(struct ifnet *, u_long, void *);
static void                run_select_chan_group(struct run_softc *, int);
static void                run_iq_calib(struct run_softc *, u_int);
static void                run_set_agc(struct run_softc *, uint8_t);
static void                run_set_rx_antenna(struct run_softc *, int);
static void                run_rt2870_set_chan(struct run_softc *, u_int);
static void                run_rt3070_set_chan(struct run_softc *, u_int);
static void                run_rt3572_set_chan(struct run_softc *, u_int);
static void                run_rt3593_set_chan(struct run_softc *, u_int);
static void                run_rt5390_set_chan(struct run_softc *, u_int);
static void                run_rt5592_set_chan(struct run_softc *, u_int);
static int                run_set_chan(struct run_softc *,
                            struct ieee80211_channel *);
static void                run_updateprot(struct run_softc *);
static void                run_enable_tsf_sync(struct run_softc *);
static void                run_enable_mrr(struct run_softc *);
static void                run_set_txpreamble(struct run_softc *);
static void                run_set_basicrates(struct run_softc *);
static void                run_set_leds(struct run_softc *, uint16_t);
static void                run_set_bssid(struct run_softc *, const uint8_t *);
static void                run_set_macaddr(struct run_softc *, const uint8_t *);
static void                run_updateslot(struct ifnet *);
static void                run_updateslot_cb(struct run_softc *, void *);
static int8_t                run_rssi2dbm(struct run_softc *, uint8_t, uint8_t);
static void                run_rt5390_bbp_init(struct run_softc *);
static int                run_bbp_init(struct run_softc *);
static int                run_rt3070_rf_init(struct run_softc *);
static int                run_rt3593_rf_init(struct run_softc *);
static int                run_rt5390_rf_init(struct run_softc *);
static int                run_rt3070_filter_calib(struct run_softc *, uint8_t,
                            uint8_t, uint8_t *);
static void                run_rt3070_rf_setup(struct run_softc *);
static void                run_rt3593_rf_setup(struct run_softc *);
static void                run_rt5390_rf_setup(struct run_softc *);
static int                run_txrx_enable(struct run_softc *);
static int                     run_adjust_freq_offset(struct run_softc *);
static int                run_init(struct ifnet *);
static void                run_stop(struct ifnet *, int);
#ifndef IEEE80211_STA_ONLY
static int                run_setup_beacon(struct run_softc *);
#endif

static const struct {
        uint32_t reg;
        uint32_t val;
} rt2870_def_mac[] = {
        RT2870_DEF_MAC
};

static const struct {
        uint8_t reg;
        uint8_t val;
} rt2860_def_bbp[] = {
        RT2860_DEF_BBP
}, rt5390_def_bbp[] = {
        RT5390_DEF_BBP
}, rt5592_def_bbp[] = {
        RT5592_DEF_BBP
};

/*
 * Default values for BBP register R196 for RT5592.
 */
static const uint8_t rt5592_bbp_r196[] = {
        0xe0, 0x1f, 0x38, 0x32, 0x08, 0x28, 0x19, 0x0a, 0xff, 0x00,
        0x16, 0x10, 0x10, 0x0b, 0x36, 0x2c, 0x26, 0x24, 0x42, 0x36,
        0x30, 0x2d, 0x4c, 0x46, 0x3d, 0x40, 0x3e, 0x42, 0x3d, 0x40,
        0x3c, 0x34, 0x2c, 0x2f, 0x3c, 0x35, 0x2e, 0x2a, 0x49, 0x41,
        0x36, 0x31, 0x30, 0x30, 0x0e, 0x0d, 0x28, 0x21, 0x1c, 0x16,
        0x50, 0x4a, 0x43, 0x40, 0x10, 0x10, 0x10, 0x10, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x7d, 0x14, 0x32, 0x2c, 0x36, 0x4c, 0x43, 0x2c,
        0x2e, 0x36, 0x30, 0x6e
};

static const struct rfprog {
        uint8_t chan;
        uint32_t r1, r2, r3, r4;
} rt2860_rf2850[] = {
        RT2860_RF2850
};

static const struct {
        uint8_t n, r, k;
} rt3070_freqs[] = {
        RT3070_RF3052
};

static const struct rt5592_freqs {
        uint16_t n;
        uint8_t k, m, r;
} rt5592_freqs_20mhz[] = {
        RT5592_RF5592_20MHZ
},rt5592_freqs_40mhz[] = {
        RT5592_RF5592_40MHZ
};

static const struct {
        uint8_t reg;
        uint8_t val;
} rt3070_def_rf[] = {
        RT3070_DEF_RF
}, rt3572_def_rf[] = {
        RT3572_DEF_RF
},rt3593_def_rf[] = {
        RT3593_DEF_RF
},rt5390_def_rf[] = {
        RT5390_DEF_RF
},rt5392_def_rf[] = {
        RT5392_DEF_RF
},rt5592_def_rf[] = {
        RT5592_DEF_RF
},rt5592_2ghz_def_rf[] = {
        RT5592_2GHZ_DEF_RF
},rt5592_5ghz_def_rf[] = {
        RT5592_5GHZ_DEF_RF
};

static const struct {
        u_int firstchan;
        u_int lastchan;
        uint8_t reg;
        uint8_t val;
} rt5592_chan_5ghz[] = {
        RT5592_CHAN_5GHZ
};

static int
firmware_load(const char *dname, const char *iname, uint8_t **ucodep,
    size_t *sizep)
{
        firmware_handle_t fh;
        int error;

        if ((error = firmware_open(dname, iname, &fh)) != 0)
                return error;
        *sizep = firmware_get_size(fh);
        if ((*ucodep = firmware_malloc(*sizep)) == NULL) {
                firmware_close(fh);
                return ENOMEM;
        }
        if ((error = firmware_read(fh, 0, *ucodep, *sizep)) != 0)
                firmware_free(*ucodep, *sizep);
        firmware_close(fh);

        return error;
}

static int
run_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return (usb_lookup(run_devs, uaa->uaa_vendor, uaa->uaa_product) != NULL) ?
            UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
run_attach(device_t parent, device_t self, void *aux)
{
        struct run_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        int i, nrx, ntx, ntries, error;
        uint32_t ver;

        aprint_naive("\n");
        aprint_normal("\n");

        sc->sc_dev = self;
        sc->sc_udev = uaa->uaa_device;

        devinfop = usbd_devinfo_alloc(sc->sc_udev, 0);
        aprint_normal_dev(sc->sc_dev, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        error = usbd_set_config_no(sc->sc_udev, 1, 0);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(error));
                return;
        }

        /* get the first interface handle */
        error = usbd_device2interface_handle(sc->sc_udev, 0, &sc->sc_iface);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not get interface handle\n");
                return;
        }

        /*
         * Find all bulk endpoints.  There are 7 bulk endpoints: 1 for RX
         * and 6 for TX (4 EDCAs + HCCA + Prio).
         * Update 03-14-2009:  some devices like the Planex GW-US300MiniS
         * seem to have only 4 TX bulk endpoints (Fukaumi Naoki).
         */
        nrx = ntx = 0;
        id = usbd_get_interface_descriptor(sc->sc_iface);
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL || UE_GET_XFERTYPE(ed->bmAttributes) != UE_BULK)
                        continue;

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN) {
                        sc->rxq.pipe_no = ed->bEndpointAddress;
                        nrx++;
                } else if (ntx < RUN_MAXEPOUT) {
                        sc->txq[ntx].pipe_no = ed->bEndpointAddress;
                        ntx++;
                }
        }
        /* make sure we've got them all */
        if (nrx < 1 || ntx < RUN_MAXEPOUT) {
                aprint_error_dev(sc->sc_dev, "missing endpoint\n");
                return;
        }

        usb_init_task(&sc->sc_task, run_task, sc, 0);
        callout_init(&sc->scan_to, 0);
        callout_setfunc(&sc->scan_to, run_next_scan, sc);
        callout_init(&sc->calib_to, 0);
        callout_setfunc(&sc->calib_to, run_calibrate_to, sc);

        sc->amrr.amrr_min_success_threshold =  1;
        sc->amrr.amrr_max_success_threshold = 10;

        /* wait for the chip to settle */
        for (ntries = 0; ntries < 100; ntries++) {
                if (run_read(sc, RT2860_ASIC_VER_ID, &ver) != 0)
                        return;
                if (ver != 0 && ver != 0xffffffff)
                        break;
                DELAY(10);
        }
        if (ntries == 100) {
                aprint_error_dev(sc->sc_dev,
                    "timeout waiting for NIC to initialize\n");
                return;
        }
        sc->mac_ver = ver >> 16;
        sc->mac_rev = ver & 0xffff;

       /*
        * Per the comment in run_write_region_1(), "the WRITE_REGION_1
        * command is not stable on RT2860", but WRITE_REGION_1 calls
        * of up to 64 bytes have been tested and found to work with
        * mac_ver 0x5390, and they reduce the run time of "ifconfig
        * run0 up" from 30 seconds to a couple of seconds on OHCI.
        * Enable WRITE_REGION_1 for the tested version only.  As other
        * versions are tested and found to work, they can be added
        * here.
        */
        if (sc->mac_ver == 0x5390)
                sc->sc_flags |= RUN_USE_BLOCK_WRITE;

        /* retrieve RF rev. no and various other things from EEPROM */
        run_read_eeprom(sc);

        aprint_verbose_dev(sc->sc_dev,
            "MAC/BBP RT%04X (rev 0x%04X), RF %s (MIMO %dT%dR), address %s\n",
            sc->mac_ver, sc->mac_rev, run_get_rf(sc->rf_rev), sc->ntxchains,
            sc->nrxchains, ether_sprintf(ic->ic_myaddr));

        ic->ic_ifp = ifp;
        ic->ic_phytype = IEEE80211_T_OFDM;        /* not only, but not used */
        ic->ic_opmode = IEEE80211_M_STA;        /* default to BSS mode */
        ic->ic_state = IEEE80211_S_INIT;

        /* set device capabilities */
        ic->ic_caps =
            IEEE80211_C_MONITOR |        /* monitor mode supported */
#ifndef IEEE80211_STA_ONLY
            IEEE80211_C_IBSS |                /* IBSS mode supported */
            IEEE80211_C_HOSTAP |        /* HostAP mode supported */
#endif
            IEEE80211_C_SHPREAMBLE |        /* short preamble supported */
            IEEE80211_C_SHSLOT |        /* short slot time supported */
#ifdef RUN_HWCRYPTO
            IEEE80211_C_WEP |                /* WEP */
            IEEE80211_C_TKIP |                /* TKIP */
            IEEE80211_C_AES_CCM |        /* AES CCMP */
            IEEE80211_C_TKIPMIC |        /* TKIPMIC */
#endif
            IEEE80211_C_WME |                /* WME */
            IEEE80211_C_WPA;                /* WPA/RSN */

        if (sc->rf_rev == RT2860_RF_2750 ||
            sc->rf_rev == RT2860_RF_2850 ||
            sc->rf_rev == RT3070_RF_3052 ||
            sc->rf_rev == RT3070_RF_3053 ||
            sc->rf_rev == RT5592_RF_5592) {
                /* set supported .11a rates */
                ic->ic_sup_rates[IEEE80211_MODE_11A] =
                    ieee80211_std_rateset_11a;

                /* set supported .11a channels */
                for (i = 14; i < (int)__arraycount(rt2860_rf2850); i++) {
                        uint8_t chan = rt2860_rf2850[i].chan;
                        ic->ic_channels[chan].ic_freq =
                            ieee80211_ieee2mhz(chan, IEEE80211_CHAN_5GHZ);
                        ic->ic_channels[chan].ic_flags = IEEE80211_CHAN_A;
                }
        }

        /* set supported .11b and .11g rates */
        ic->ic_sup_rates[IEEE80211_MODE_11B] = ieee80211_std_rateset_11b;
        ic->ic_sup_rates[IEEE80211_MODE_11G] = ieee80211_std_rateset_11g;

        /* set supported .11b and .11g channels (1 through 14) */
        for (i = 1; i <= 14; i++) {
                ic->ic_channels[i].ic_freq =
                    ieee80211_ieee2mhz(i, IEEE80211_CHAN_2GHZ);
                ic->ic_channels[i].ic_flags =
                    IEEE80211_CHAN_CCK | IEEE80211_CHAN_OFDM |
                    IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ;
        }

        ifp->if_softc = sc;
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_init = run_init;
        ifp->if_ioctl = run_ioctl;
        ifp->if_start = run_start;
        ifp->if_watchdog = run_watchdog;
        IFQ_SET_READY(&ifp->if_snd);
        memcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);

        if_attach(ifp);
        ieee80211_ifattach(ic);
        ic->ic_node_alloc = run_node_alloc;
        ic->ic_newassoc = run_newassoc;
        ic->ic_updateslot = run_updateslot;
        ic->ic_wme.wme_update = run_updateedca;
#ifdef RUN_HWCRYPTO
        ic->ic_crypto.cs_key_set = run_set_key;
        ic->ic_crypto.cs_key_delete = run_delete_key;
#endif
        /* override state transition machine */
        sc->sc_newstate = ic->ic_newstate;
        ic->ic_newstate = run_newstate;

        /* XXX media locking needs revisiting */
        mutex_init(&sc->sc_media_mtx, MUTEX_DEFAULT, IPL_SOFTUSB);
        ieee80211_media_init_with_lock(ic,
            run_media_change, ieee80211_media_status, &sc->sc_media_mtx);

        bpf_attach2(ifp, DLT_IEEE802_11_RADIO,
            sizeof(struct ieee80211_frame) + IEEE80211_RADIOTAP_HDRLEN,
            &sc->sc_drvbpf);

        sc->sc_rxtap_len = sizeof(sc->sc_rxtapu);
        sc->sc_rxtap.wr_ihdr.it_len = htole16(sc->sc_rxtap_len);
        sc->sc_rxtap.wr_ihdr.it_present = htole32(RUN_RX_RADIOTAP_PRESENT);

        sc->sc_txtap_len = sizeof(sc->sc_txtapu);
        sc->sc_txtap.wt_ihdr.it_len = htole16(sc->sc_txtap_len);
        sc->sc_txtap.wt_ihdr.it_present = htole32(RUN_TX_RADIOTAP_PRESENT);

        ieee80211_announce(ic);

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");
}

static int
run_detach(device_t self, int flags)
{
        struct run_softc *sc = device_private(self);
        struct ifnet *ifp = &sc->sc_if;
        struct ieee80211com *ic = &sc->sc_ic;
        int s;

        if (ifp->if_softc == NULL)
                return 0;

        pmf_device_deregister(self);

        s = splusb();

        sc->sc_flags |= RUN_DETACHING;

        if (ifp->if_flags & IFF_RUNNING) {
                run_stop(ifp, 0);
                callout_halt(&sc->scan_to, NULL);
                callout_halt(&sc->calib_to, NULL);
                usb_rem_task_wait(sc->sc_udev, &sc->sc_task, USB_TASKQ_DRIVER,
                    NULL);
        }

        ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
        bpf_detach(ifp);
        ieee80211_ifdetach(ic);
        if_detach(ifp);

        splx(s);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        callout_stop(&sc->scan_to);
        callout_stop(&sc->calib_to);

        callout_destroy(&sc->scan_to);
        callout_destroy(&sc->calib_to);

        return 0;
}

static int
run_activate(device_t self, enum devact act)
{
        struct run_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                if_deactivate(sc->sc_ic.ic_ifp);
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

static int
run_alloc_rx_ring(struct run_softc *sc)
{
        struct run_rx_ring *rxq = &sc->rxq;
        int i, error;

        error = usbd_open_pipe(sc->sc_iface, rxq->pipe_no, 0, &rxq->pipeh);
        if (error != 0)
                goto fail;

        for (i = 0; i < RUN_RX_RING_COUNT; i++) {
                struct run_rx_data *data = &rxq->data[i];

                data->sc = sc;        /* backpointer for callbacks */

                error = usbd_create_xfer(sc->rxq.pipeh, RUN_MAX_RXSZ,
                    0, 0, &data->xfer);
                if (error)
                        goto fail;

                data->buf = usbd_get_buffer(data->xfer);
        }
        if (error != 0)
fail:                run_free_rx_ring(sc);
        return error;
}

static void
run_free_rx_ring(struct run_softc *sc)
{
        struct run_rx_ring *rxq = &sc->rxq;
        int i;

        if (rxq->pipeh != NULL) {
                usbd_abort_pipe(rxq->pipeh);
        }
        for (i = 0; i < RUN_RX_RING_COUNT; i++) {
                if (rxq->data[i].xfer != NULL)
                        usbd_destroy_xfer(rxq->data[i].xfer);
                rxq->data[i].xfer = NULL;
        }
        if (rxq->pipeh != NULL) {
                usbd_close_pipe(rxq->pipeh);
                rxq->pipeh = NULL;
        }
}

static int
run_alloc_tx_ring(struct run_softc *sc, int qid)
{
        struct run_tx_ring *txq = &sc->txq[qid];
        int i, error;
        uint16_t txwisize;

        txwisize = sizeof(struct rt2860_txwi);
        if (sc->mac_ver == 0x5592)
                txwisize += sizeof(uint32_t);

        txq->cur = txq->queued = 0;

        error = usbd_open_pipe(sc->sc_iface, txq->pipe_no, 0, &txq->pipeh);
        if (error != 0)
                goto fail;

        for (i = 0; i < RUN_TX_RING_COUNT; i++) {
                struct run_tx_data *data = &txq->data[i];

                data->sc = sc;        /* backpointer for callbacks */
                data->qid = qid;

                error = usbd_create_xfer(txq->pipeh, RUN_MAX_TXSZ,
                    USBD_FORCE_SHORT_XFER, 0, &data->xfer);
                if (error)
                        goto fail;

                data->buf = usbd_get_buffer(data->xfer);
                /* zeroize the TXD + TXWI part */
                memset(data->buf, 0, sizeof(struct rt2870_txd) + txwisize);
        }
        if (error != 0)
fail:                run_free_tx_ring(sc, qid);
        return error;
}

static void
run_free_tx_ring(struct run_softc *sc, int qid)
{
        struct run_tx_ring *txq = &sc->txq[qid];
        int i;

        if (txq->pipeh != NULL) {
                usbd_abort_pipe(txq->pipeh);
                usbd_close_pipe(txq->pipeh);
                txq->pipeh = NULL;
        }
        for (i = 0; i < RUN_TX_RING_COUNT; i++) {
                if (txq->data[i].xfer != NULL)
                        usbd_destroy_xfer(txq->data[i].xfer);
                txq->data[i].xfer = NULL;
        }
}

static int __noinline
run_load_microcode(struct run_softc *sc)
{
        usb_device_request_t req;
        const char *fwname;
        u_char *ucode = NULL;        /* XXX gcc 4.8.3: maybe-uninitialized */
        size_t size = 0;        /* XXX gcc 4.8.3: maybe-uninitialized */
        uint32_t tmp;
        int ntries, error;

        /* RT3071/RT3072 use a different firmware */
        if (sc->mac_ver != 0x2860 &&
            sc->mac_ver != 0x2872 &&
            sc->mac_ver != 0x3070)
                fwname = "run-rt3071";
        else
                fwname = "run-rt2870";

        if ((error = firmware_load("run", fwname, &ucode, &size)) != 0) {
                device_printf(sc->sc_dev,
                    "error %d, could not read firmware %s\n", error, fwname);
                return error;
        }
        if (size != 4096) {
                device_printf(sc->sc_dev,
                    "invalid firmware size (should be 4KB)\n");
                firmware_free(ucode, size);
                return EINVAL;
        }

        run_read(sc, RT2860_ASIC_VER_ID, &tmp);
        /* write microcode image */
        run_write_region_1(sc, RT2870_FW_BASE, ucode, size);
        firmware_free(ucode, size);
        run_write(sc, RT2860_H2M_MAILBOX_CID, 0xffffffff);
        run_write(sc, RT2860_H2M_MAILBOX_STATUS, 0xffffffff);

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = RT2870_RESET;
        USETW(req.wValue, 8);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);
        if ((error = usbd_do_request(sc->sc_udev, &req, NULL)) != 0)
                return error;

        usbd_delay_ms(sc->sc_udev, 10);
        run_write(sc, RT2860_H2M_BBPAGENT, 0);
        run_write(sc, RT2860_H2M_MAILBOX, 0);
        run_write(sc, RT2860_H2M_INTSRC, 0);
        if ((error = run_mcu_cmd(sc, RT2860_MCU_CMD_RFRESET, 0)) != 0)
                return error;

        /* wait until microcontroller is ready */
        for (ntries = 0; ntries < 1000; ntries++) {
                if ((error = run_read(sc, RT2860_SYS_CTRL, &tmp)) != 0)
                        return error;
                if (tmp & RT2860_MCU_READY)
                        break;
                usbd_delay_ms(sc->sc_udev, 10);
        }
        if (ntries == 1000) {
                device_printf(sc->sc_dev,
                    "timeout waiting for MCU to initialize\n");
                return ETIMEDOUT;
        }

        sc->sc_flags |= RUN_FWLOADED;

        DPRINTF(("microcode successfully loaded after %d tries\n", ntries));
        return 0;
}

static int __noinline
run_reset(struct run_softc *sc)
{
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = RT2870_RESET;
        USETW(req.wValue, 1);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);
        return usbd_do_request(sc->sc_udev, &req, NULL);
}

static int __noinline
run_read(struct run_softc *sc, uint16_t reg, uint32_t *val)
{
        uint32_t tmp;
        int error;

        error = run_read_region_1(sc, reg, (uint8_t *)&tmp, sizeof(tmp));
        if (error == 0)
                *val = le32toh(tmp);
        else
                *val = 0xffffffff;
        return error;
}

static int
run_read_region_1(struct run_softc *sc, uint16_t reg, uint8_t *buf, int len)
{
        usb_device_request_t req;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = RT2870_READ_REGION_1;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, len);
        return usbd_do_request(sc->sc_udev, &req, buf);
}

static int
run_write_2(struct run_softc *sc, uint16_t reg, uint16_t val)
{
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = RT2870_WRITE_2;
        USETW(req.wValue, val);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 0);
        return usbd_do_request(sc->sc_udev, &req, NULL);
}

static int __noinline
run_write(struct run_softc *sc, uint16_t reg, uint32_t val)
{
        uint32_t tmp = htole32(val);
        return run_write_region_1(sc, reg, (uint8_t *)&tmp, sizeof(tmp));
}

static int
run_write_region_1(struct run_softc *sc, uint16_t reg, const uint8_t *buf,
    int len)
{
        int error = 0;
        if (sc->sc_flags & RUN_USE_BLOCK_WRITE) {
                usb_device_request_t req;
                /*
                 * NOTE: It appears the WRITE_REGION_1 command cannot be
                 * passed a huge amount of data, which will crash the
                 * firmware. Limit amount of data passed to 64 bytes at a
                 * time.
                 */
                while (len > 0) {
                        int delta = MIN(len, 64);
                        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
                        req.bRequest = RT2870_WRITE_REGION_1;
                        USETW(req.wValue, 0);
                        USETW(req.wIndex, reg);
                        USETW(req.wLength, delta);
                        error = usbd_do_request(sc->sc_udev, &req,
                            __UNCONST(buf));
                        if (error != 0)
                                break;
                        reg += delta;
                        buf += delta;
                        len -= delta;
                }
        } else {
                /*
                 * NB: the WRITE_REGION_1 command is not stable on RT2860.
                 * We thus issue multiple WRITE_2 commands instead.
                 */
                int i;
                KASSERT((len & 1) == 0);
                for (i = 0; i < len && error == 0; i += 2)
                        error = run_write_2(sc, reg + i, buf[i] | buf[i + 1] << 8);
        }
        return error;
}

static int
run_set_region_4(struct run_softc *sc, uint16_t reg, uint32_t val, int count)
{
        int error = 0;

        if (sc->sc_flags & RUN_USE_BLOCK_WRITE) {
                while (count > 0) {
                        int i, delta;
                        uint32_t tmp[16];

                        delta = MIN(count, __arraycount(tmp));
                        for (i = 0; i < delta; i++)
                                tmp[i] = htole32(val);
                        error = run_write_region_1(sc, reg, (uint8_t *)tmp,
                            delta * sizeof(uint32_t));
                        if (error != 0)
                                break;
                        reg += delta * sizeof(uint32_t);
                        count -= delta;
                }
        } else {
                for (; count > 0 && error == 0; count--, reg += 4)
                        error = run_write(sc, reg, val);
        }
        return error;
}

static int
run_efuse_read(struct run_softc *sc, uint16_t addr, uint16_t *val, int count)
{
        uint32_t tmp;
        uint16_t reg;
        int error, ntries;

        if ((error = run_read(sc, RT3070_EFUSE_CTRL, &tmp)) != 0)
                return error;

        if (count == 2)
                addr *= 2;
        /*-
         * Read one 16-byte block into registers EFUSE_DATA[0-3]:
         * DATA0: F E D C
         * DATA1: B A 9 8
         * DATA2: 7 6 5 4
         * DATA3: 3 2 1 0
         */
        tmp &= ~(RT3070_EFSROM_MODE_MASK | RT3070_EFSROM_AIN_MASK);
        tmp |= (addr & ~0xf) << RT3070_EFSROM_AIN_SHIFT | RT3070_EFSROM_KICK;
        run_write(sc, RT3070_EFUSE_CTRL, tmp);
        for (ntries = 0; ntries < 100; ntries++) {
                if ((error = run_read(sc, RT3070_EFUSE_CTRL, &tmp)) != 0)
                        return error;
                if (!(tmp & RT3070_EFSROM_KICK))
                        break;
                usbd_delay_ms(sc->sc_udev, 2);
        }
        if (ntries == 100)
                return ETIMEDOUT;

        if ((tmp & RT3070_EFUSE_AOUT_MASK) == RT3070_EFUSE_AOUT_MASK) {
                *val = 0xffff;        /* address not found */
                return 0;
        }
        /* determine to which 32-bit register our 16-bit word belongs */
        reg = RT3070_EFUSE_DATA3 - (addr & 0xc);
        if ((error = run_read(sc, reg, &tmp)) != 0)
                return error;

        tmp >>= (8 * (addr & 0x3));
        *val = (addr & 1) ? tmp >> 16 : tmp & 0xffff;
        return 0;
}

/* Read 16-bit from eFUSE ROM for RT3xxxx. */
static int
run_efuse_read_2(struct run_softc *sc, uint16_t addr, uint16_t *val)
{
        return run_efuse_read(sc, addr, val, 2);
}

static int
run_eeprom_read_2(struct run_softc *sc, uint16_t addr, uint16_t *val)
{
        usb_device_request_t req;
        uint16_t tmp;
        int error;

        addr *= 2;
        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = RT2870_EEPROM_READ;
        USETW(req.wValue, 0);
        USETW(req.wIndex, addr);
        USETW(req.wLength, sizeof(tmp));
        error = usbd_do_request(sc->sc_udev, &req, &tmp);
        if (error == 0)
                *val = le16toh(tmp);
        else
                *val = 0xffff;
        return error;
}

static __inline int
run_srom_read(struct run_softc *sc, uint16_t addr, uint16_t *val)
{

        /* either eFUSE ROM or EEPROM */
        return sc->sc_srom_read(sc, addr, val);
}

static int
run_rt2870_rf_write(struct run_softc *sc, uint8_t reg, uint32_t val)
{
        uint32_t tmp;
        int error, ntries;

        for (ntries = 0; ntries < 10; ntries++) {
                if ((error = run_read(sc, RT2860_RF_CSR_CFG0, &tmp)) != 0)
                        return error;
                if (!(tmp & RT2860_RF_REG_CTRL))
                        break;
        }
        if (ntries == 10)
                return ETIMEDOUT;

        /* RF registers are 24-bit on the RT2860 */
        tmp = RT2860_RF_REG_CTRL | 24 << RT2860_RF_REG_WIDTH_SHIFT |
            (val & 0x3fffff) << 2 | (reg & 3);
        return run_write(sc, RT2860_RF_CSR_CFG0, tmp);
}

static int
run_rt3070_rf_read(struct run_softc *sc, uint8_t reg, uint8_t *val)
{
        uint32_t tmp;
        int error, ntries;

        for (ntries = 0; ntries < 100; ntries++) {
                if ((error = run_read(sc, RT3070_RF_CSR_CFG, &tmp)) != 0)
                        return error;
                if (!(tmp & RT3070_RF_KICK))
                        break;
        }
        if (ntries == 100)
                return ETIMEDOUT;

        tmp = RT3070_RF_KICK | reg << 8;
        if ((error = run_write(sc, RT3070_RF_CSR_CFG, tmp)) != 0)
                return error;

        for (ntries = 0; ntries < 100; ntries++) {
                if ((error = run_read(sc, RT3070_RF_CSR_CFG, &tmp)) != 0)
                        return error;
                if (!(tmp & RT3070_RF_KICK))
                        break;
        }
        if (ntries == 100)
                return ETIMEDOUT;

        *val = tmp & 0xff;
        return 0;
}

static int
run_rt3070_rf_write(struct run_softc *sc, uint8_t reg, uint8_t val)
{
        uint32_t tmp;
        int error, ntries;

        for (ntries = 0; ntries < 10; ntries++) {
                if ((error = run_read(sc, RT3070_RF_CSR_CFG, &tmp)) != 0)
                        return error;
                if (!(tmp & RT3070_RF_KICK))
                        break;
        }
        if (ntries == 10)
                return ETIMEDOUT;

        tmp = RT3070_RF_WRITE | RT3070_RF_KICK | reg << 8 | val;
        return run_write(sc, RT3070_RF_CSR_CFG, tmp);
}

static int
run_bbp_read(struct run_softc *sc, uint8_t reg, uint8_t *val)
{
        uint32_t tmp;
        int ntries, error;

        for (ntries = 0; ntries < 10; ntries++) {
                if ((error = run_read(sc, RT2860_BBP_CSR_CFG, &tmp)) != 0)
                        return error;
                if (!(tmp & RT2860_BBP_CSR_KICK))
                        break;
        }
        if (ntries == 10)
                return ETIMEDOUT;

        tmp = RT2860_BBP_CSR_READ | RT2860_BBP_CSR_KICK | reg << 8;
        if ((error = run_write(sc, RT2860_BBP_CSR_CFG, tmp)) != 0)
                return error;

        for (ntries = 0; ntries < 10; ntries++) {
                if ((error = run_read(sc, RT2860_BBP_CSR_CFG, &tmp)) != 0)
                        return error;
                if (!(tmp & RT2860_BBP_CSR_KICK))
                        break;
        }
        if (ntries == 10)
                return ETIMEDOUT;

        *val = tmp & 0xff;
        return 0;
}

static int
run_bbp_write(struct run_softc *sc, uint8_t reg, uint8_t val)
{
        uint32_t tmp;
        int ntries, error;

        for (ntries = 0; ntries < 10; ntries++) {
                if ((error = run_read(sc, RT2860_BBP_CSR_CFG, &tmp)) != 0)
                        return error;
                if (!(tmp & RT2860_BBP_CSR_KICK))
                        break;
        }
        if (ntries == 10)
                return ETIMEDOUT;

        tmp = RT2860_BBP_CSR_KICK | reg << 8 | val;
        return run_write(sc, RT2860_BBP_CSR_CFG, tmp);
}

/*
 * Send a command to the 8051 microcontroller unit.
 */
static int
run_mcu_cmd(struct run_softc *sc, uint8_t cmd, uint16_t arg)
{
        uint32_t tmp;
        int error, ntries;

        for (ntries = 0; ntries < 100; ntries++) {
                if ((error = run_read(sc, RT2860_H2M_MAILBOX, &tmp)) != 0)
                        return error;
                if (!(tmp & RT2860_H2M_BUSY))
                        break;
        }
        if (ntries == 100)
                return ETIMEDOUT;

        tmp = RT2860_H2M_BUSY | RT2860_TOKEN_NO_INTR << 16 | arg;
        if ((error = run_write(sc, RT2860_H2M_MAILBOX, tmp)) == 0)
                error = run_write(sc, RT2860_HOST_CMD, cmd);
        return error;
}

/*
 * Add `delta' (signed) to each 4-bit sub-word of a 32-bit word.
 * Used to adjust per-rate Tx power registers.
 */
static __inline uint32_t
b4inc(uint32_t b32, int8_t delta)
{
        int8_t i, b4;

        for (i = 0; i < 8; i++) {
                b4 = b32 & 0xf;
                b4 += delta;
                if (b4 < 0)
                        b4 = 0;
                else if (b4 > 0xf)
                        b4 = 0xf;
                b32 = b32 >> 4 | b4 << 28;
        }
        return b32;
}

static const char *
run_get_rf(uint16_t rev)
{
        switch (rev) {
        case RT2860_RF_2820:        return "RT2820";
        case RT2860_RF_2850:        return "RT2850";
        case RT2860_RF_2720:        return "RT2720";
        case RT2860_RF_2750:        return "RT2750";
        case RT3070_RF_3020:        return "RT3020";
        case RT3070_RF_2020:        return "RT2020";
        case RT3070_RF_3021:        return "RT3021";
        case RT3070_RF_3022:        return "RT3022";
        case RT3070_RF_3052:        return "RT3052";
        case RT3070_RF_3053:    return "RT3053";
        case RT5592_RF_5592:    return "RT5592";
        case RT5390_RF_5370:    return "RT5370";
        case RT5390_RF_5372:    return "RT5372";
        }
        return "unknown";
}

static void
run_rt3593_get_txpower(struct run_softc *sc)
{
        uint16_t addr, val;
        int i;

        /* Read power settings for 2GHz channels. */
        for (i = 0; i < 14; i += 2) {
                addr = (sc->ntxchains == 3) ? RT3593_EEPROM_PWR2GHZ_BASE1 :
                    RT2860_EEPROM_PWR2GHZ_BASE1;
                run_srom_read(sc, addr + i / 2, &val);
                sc->txpow1[i + 0] = (int8_t)(val & 0xff);
                sc->txpow1[i + 1] = (int8_t)(val >> 8);

                addr = (sc->ntxchains == 3) ? RT3593_EEPROM_PWR2GHZ_BASE2 :
                    RT2860_EEPROM_PWR2GHZ_BASE2;
                run_srom_read(sc, addr + i / 2, &val);
                sc->txpow2[i + 0] = (int8_t)(val & 0xff);
                sc->txpow2[i + 1] = (int8_t)(val >> 8);

                if (sc->ntxchains == 3) {
                        run_srom_read(sc, RT3593_EEPROM_PWR2GHZ_BASE3 + i / 2,
                            &val);
                        sc->txpow3[i + 0] = (int8_t)(val & 0xff);
                        sc->txpow3[i + 1] = (int8_t)(val >> 8);
                }
        }
        /* Fix broken Tx power entries. */
        for (i = 0; i < 14; i++) {
                if (sc->txpow1[i] > 31)
                        sc->txpow1[i] = 5;
                if (sc->txpow2[i] > 31)
                        sc->txpow2[i] = 5;
                if (sc->ntxchains == 3) {
                        if (sc->txpow3[i] > 31)
                                sc->txpow3[i] = 5;
                }
        }
        /* Read power settings for 5GHz channels. */
        for (i = 0; i < 40; i += 2) {
                run_srom_read(sc, RT3593_EEPROM_PWR5GHZ_BASE1 + i / 2, &val);
                sc->txpow1[i + 14] = (int8_t)(val & 0xff);
                sc->txpow1[i + 15] = (int8_t)(val >> 8);

                run_srom_read(sc, RT3593_EEPROM_PWR5GHZ_BASE2 + i / 2, &val);
                sc->txpow2[i + 14] = (int8_t)(val & 0xff);
                sc->txpow2[i + 15] = (int8_t)(val >> 8);

                if (sc->ntxchains == 3) {
                        run_srom_read(sc, RT3593_EEPROM_PWR5GHZ_BASE3 + i / 2,
                            &val);
                        sc->txpow3[i + 14] = (int8_t)(val & 0xff);
                        sc->txpow3[i + 15] = (int8_t)(val >> 8);
                }
        }
}

static void
run_get_txpower(struct run_softc *sc)
{
        uint16_t val;
        int i;

        /* Read power settings for 2GHz channels. */
        for (i = 0; i < 14; i += 2) {
                run_srom_read(sc, RT2860_EEPROM_PWR2GHZ_BASE1 + i / 2, &val);
                sc->txpow1[i + 0] = (int8_t)(val & 0xff);
                sc->txpow1[i + 1] = (int8_t)(val >> 8);

                if (sc->mac_ver != 0x5390) {
                        run_srom_read(sc,
                            RT2860_EEPROM_PWR2GHZ_BASE2 + i / 2, &val);
                        sc->txpow2[i + 0] = (int8_t)(val & 0xff);
                        sc->txpow2[i + 1] = (int8_t)(val >> 8);
                }
        }
        /* Fix broken Tx power entries. */
        for (i = 0; i < 14; i++) {
                if (sc->mac_ver >= 0x5390) {
                        if (sc->txpow1[i] < 0 || sc->txpow1[i] > 39)
                                sc->txpow1[i] = 5;
                } else {
                        if (sc->txpow1[i] < 0 || sc->txpow1[i] > 31)
                                sc->txpow1[i] = 5;
                }
                if (sc->mac_ver > 0x5390) {
                        if (sc->txpow2[i] < 0 || sc->txpow2[i] > 39)
                                sc->txpow2[i] = 5;
                } else if (sc->mac_ver < 0x5390) {
                        if (sc->txpow2[i] < 0 || sc->txpow2[i] > 31)
                                sc->txpow2[i] = 5;
                }
                DPRINTF(("chan %d: power1=%d, power2=%d\n",
                    rt2860_rf2850[i].chan, sc->txpow1[i], sc->txpow2[i]));
        }
        /* Read power settings for 5GHz channels. */
        for (i = 0; i < 40; i += 2) {
                run_srom_read(sc, RT2860_EEPROM_PWR5GHZ_BASE1 + i / 2, &val);
                sc->txpow1[i + 14] = (int8_t)(val & 0xff);
                sc->txpow1[i + 15] = (int8_t)(val >> 8);

                run_srom_read(sc, RT2860_EEPROM_PWR5GHZ_BASE2 + i / 2, &val);
                sc->txpow2[i + 14] = (int8_t)(val & 0xff);
                sc->txpow2[i + 15] = (int8_t)(val >> 8);
        }
        /* Fix broken Tx power entries. */
        for (i = 0; i < 40; i++ ) {
                if (sc->mac_ver != 0x5592) {
                        if (sc->txpow1[14 + i] < -7 || sc->txpow1[14 + i] > 15)
                                sc->txpow1[14 + i] = 5;
                        if (sc->txpow2[14 + i] < -7 || sc->txpow2[14 + i] > 15)
                                sc->txpow2[14 + i] = 5;
                }
                DPRINTF(("chan %d: power1=%d, power2=%d\n",
                    rt2860_rf2850[14 + i].chan, sc->txpow1[14 + i],
                    sc->txpow2[14 + i]));
        }
}

static int
run_read_eeprom(struct run_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        int8_t delta_2ghz, delta_5ghz;
        uint32_t tmp;
        uint16_t val;
        int ridx, ant, i;

        /* check whether the ROM is eFUSE ROM or EEPROM */
        sc->sc_srom_read = run_eeprom_read_2;
        if (sc->mac_ver >= 0x3070) {
                run_read(sc, RT3070_EFUSE_CTRL, &tmp);
                DPRINTF(("EFUSE_CTRL=0x%08x\n", tmp));
                if (tmp & RT3070_SEL_EFUSE)
                        sc->sc_srom_read = run_efuse_read_2;
        }

        /* read ROM version */
        run_srom_read(sc, RT2860_EEPROM_VERSION, &val);
        DPRINTF(("EEPROM rev=%d, FAE=%d\n", val & 0xff, val >> 8));

        /* read MAC address */
        run_srom_read(sc, RT2860_EEPROM_MAC01, &val);
        ic->ic_myaddr[0] = val & 0xff;
        ic->ic_myaddr[1] = val >> 8;
        run_srom_read(sc, RT2860_EEPROM_MAC23, &val);
        ic->ic_myaddr[2] = val & 0xff;
        ic->ic_myaddr[3] = val >> 8;
        run_srom_read(sc, RT2860_EEPROM_MAC45, &val);
        ic->ic_myaddr[4] = val & 0xff;
        ic->ic_myaddr[5] = val >> 8;

        if (sc->mac_ver < 0x3593) {
                /* read vendor BBP settings */
                for (i = 0; i < 10; i++) {
                        run_srom_read(sc, RT2860_EEPROM_BBP_BASE + i, &val);
                        sc->bbp[i].val = val & 0xff;
                        sc->bbp[i].reg = val >> 8;
                        DPRINTF(("BBP%d=0x%02x\n", sc->bbp[i].reg,
                            sc->bbp[i].val));
                }

                if (sc->mac_ver >= 0x3071) {
                        /* read vendor RF settings */
                        for (i = 0; i < 8; i++) {
                                run_srom_read(sc, RT3071_EEPROM_RF_BASE + i,
                                    &val);
                                sc->rf[i].val = val & 0xff;
                                sc->rf[i].reg = val >> 8;
                                DPRINTF(("RF%d=0x%02x\n", sc->rf[i].reg,
                                    sc->rf[i].val));
                        }
                }
        }

        /* read RF frequency offset from EEPROM */
        run_srom_read(sc, (sc->mac_ver != 0x3593) ? RT2860_EEPROM_FREQ_LEDS :
            RT3593_EEPROM_FREQ, &val);
        sc->freq = ((val & 0xff) != 0xff) ? val & 0xff : 0;
        DPRINTF(("EEPROM freq offset %d\n", sc->freq & 0xff));
        run_srom_read(sc, (sc->mac_ver != 0x3593) ? RT2860_EEPROM_FREQ_LEDS :
            RT3593_EEPROM_FREQ, &val);
        if ((val >> 8) != 0xff) {
                /* read LEDs operating mode */
                sc->leds = val >> 8;
                run_srom_read(sc, (sc->mac_ver != 0x3593) ? RT2860_EEPROM_LED1 :
                    RT3593_EEPROM_LED1, &sc->led[0]);
                run_srom_read(sc, (sc->mac_ver != 0x3593) ? RT2860_EEPROM_LED2 :
                    RT3593_EEPROM_LED2, &sc->led[1]);
                run_srom_read(sc, (sc->mac_ver != 0x3593) ? RT2860_EEPROM_LED3 :
                    RT3593_EEPROM_LED3, &sc->led[2]);
        } else {
                /* broken EEPROM, use default settings */
                sc->leds = 0x01;
                sc->led[0] = 0x5555;
                sc->led[1] = 0x2221;
                sc->led[2] = 0x5627;        /* differs from RT2860 */
        }
        DPRINTF(("EEPROM LED mode=0x%02x, LEDs=0x%04x/0x%04x/0x%04x\n",
            sc->leds, sc->led[0], sc->led[1], sc->led[2]));

        /* read RF information */
        if (sc->mac_ver == 0x5390 || sc->mac_ver == 0x5392)
                run_srom_read(sc, 0x00, &val);
        else
                run_srom_read(sc, RT2860_EEPROM_ANTENNA, &val);
        if (val == 0xffff) {
                DPRINTF(("invalid EEPROM antenna info, using default\n"));
                if (sc->mac_ver == 0x3572) {
                        /* default to RF3052 2T2R */
                        sc->rf_rev = RT3070_RF_3052;
                        sc->ntxchains = 2;
                        sc->nrxchains = 2;
                } else if (sc->mac_ver >= 0x3070) {
                        /* default to RF3020 1T1R */
                        sc->rf_rev = RT3070_RF_3020;
                        sc->ntxchains = 1;
                        sc->nrxchains = 1;
                } else {
                        /* default to RF2820 1T2R */
                        sc->rf_rev = RT2860_RF_2820;
                        sc->ntxchains = 1;
                        sc->nrxchains = 2;
                }
        } else {
                if (sc->mac_ver == 0x5390 || sc->mac_ver == 0x5392) {
                        sc->rf_rev = val;
                        run_srom_read(sc, RT2860_EEPROM_ANTENNA, &val);
                } else
                        sc->rf_rev = (val >> 8) & 0xf;
                sc->ntxchains = (val >> 4) & 0xf;
                sc->nrxchains = val & 0xf;
        }
        DPRINTF(("EEPROM RF rev=0x%04x chains=%dT%dR\n",
            sc->rf_rev, sc->ntxchains, sc->nrxchains));

        run_srom_read(sc, RT2860_EEPROM_CONFIG, &val);
        DPRINTF(("EEPROM CFG 0x%04x\n", val));
        /* check if driver should patch the DAC issue */
        if ((val >> 8) != 0xff)
                sc->patch_dac = (val >> 15) & 1;
        if ((val & 0xff) != 0xff) {
                sc->ext_5ghz_lna = (val >> 3) & 1;
                sc->ext_2ghz_lna = (val >> 2) & 1;
                /* check if RF supports automatic Tx access gain control */
                sc->calib_2ghz = sc->calib_5ghz = (val >> 1) & 1;
                /* check if we have a hardware radio switch */
                sc->rfswitch = val & 1;
        }

        /* Read Tx power settings. */
        if (sc->mac_ver == 0x3593)
                run_rt3593_get_txpower(sc);
        else
                run_get_txpower(sc);

        /* read Tx power compensation for each Tx rate */
        run_srom_read(sc, RT2860_EEPROM_DELTAPWR, &val);
        delta_2ghz = delta_5ghz = 0;
        if ((val & 0xff) != 0xff && (val & 0x80)) {
                delta_2ghz = val & 0xf;
                if (!(val & 0x40))        /* negative number */
                        delta_2ghz = -delta_2ghz;
        }
        val >>= 8;
        if ((val & 0xff) != 0xff && (val & 0x80)) {
                delta_5ghz = val & 0xf;
                if (!(val & 0x40))        /* negative number */
                        delta_5ghz = -delta_5ghz;
        }
        DPRINTF(("power compensation=%d (2GHz), %d (5GHz)\n",
            delta_2ghz, delta_5ghz));

        for (ridx = 0; ridx < 5; ridx++) {
                uint32_t reg;

                run_srom_read(sc, RT2860_EEPROM_RPWR + ridx * 2, &val);
                reg = val;
                run_srom_read(sc, RT2860_EEPROM_RPWR + ridx * 2 + 1, &val);
                reg |= (uint32_t)val << 16;

                sc->txpow20mhz[ridx] = reg;
                sc->txpow40mhz_2ghz[ridx] = b4inc(reg, delta_2ghz);
                sc->txpow40mhz_5ghz[ridx] = b4inc(reg, delta_5ghz);

                DPRINTF(("ridx %d: power 20MHz=0x%08x, 40MHz/2GHz=0x%08x, "
                    "40MHz/5GHz=0x%08x\n", ridx, sc->txpow20mhz[ridx],
                    sc->txpow40mhz_2ghz[ridx], sc->txpow40mhz_5ghz[ridx]));
        }

        DPRINTF(("mac_ver %hx\n", sc->mac_ver));
        /* read RSSI offsets and LNA gains from EEPROM */
        run_srom_read(sc, (sc->mac_ver != 0x3593) ? RT2860_EEPROM_RSSI1_2GHZ :
            RT3593_EEPROM_RSSI1_2GHZ, &val);
        sc->rssi_2ghz[0] = val & 0xff;        /* Ant A */
        sc->rssi_2ghz[1] = val >> 8;        /* Ant B */
        run_srom_read(sc, (sc->mac_ver != 0x3593) ? RT2860_EEPROM_RSSI2_2GHZ :
            RT3593_EEPROM_RSSI2_2GHZ, &val);
        if (sc->mac_ver >= 0x3070) {
                if (sc->mac_ver == 0x3593) {
                        sc->txmixgain_2ghz = 0;
                        sc->rssi_2ghz[2] = val & 0xff;         /* Ant C */
                } else {
                        /*
                         * On RT3070 chips (limited to 2 Rx chains), this ROM
                         * field contains the Tx mixer gain for the 2GHz band.
                         */
                        if ((val & 0xff) != 0xff)
                                sc->txmixgain_2ghz = val & 0x7;
                }
                DPRINTF(("tx mixer gain=%u (2GHz)\n", sc->txmixgain_2ghz));
        } else {
                sc->rssi_2ghz[2] = val & 0xff;        /* Ant C */
        }
        if (sc->mac_ver == 0x3593)
                run_srom_read(sc, RT3593_EEPROM_LNA_5GHZ, &val);
        sc->lna[2] = val >> 8;                /* channel group 2 */

        run_srom_read(sc, (sc->mac_ver != 0x3593) ? RT2860_EEPROM_RSSI1_5GHZ :
            RT3593_EEPROM_RSSI1_5GHZ, &val);
        sc->rssi_5ghz[0] = val & 0xff;        /* Ant A */
        sc->rssi_5ghz[1] = val >> 8;        /* Ant B */
        run_srom_read(sc, (sc->mac_ver != 0x3593) ? RT2860_EEPROM_RSSI2_5GHZ :
            RT3593_EEPROM_RSSI2_5GHZ, &val);
        if (sc->mac_ver == 0x3572) {
                /*
                 * On RT3572 chips (limited to 2 Rx chains), this ROM
                 * field contains the Tx mixer gain for the 5GHz band.
                 */
                if ((val & 0xff) != 0xff)
                        sc->txmixgain_5ghz = val & 0x7;
                DPRINTF(("tx mixer gain=%u (5GHz)\n", sc->txmixgain_5ghz));
        } else {
                sc->rssi_5ghz[2] = val & 0xff;        /* Ant C */
        }
        if (sc->mac_ver == 0x3593) {
                sc->txmixgain_5ghz = 0;
                run_srom_read(sc, RT3593_EEPROM_LNA_5GHZ, &val);
        }
        sc->lna[3] = val >> 8;                /* channel group 3 */

        run_srom_read(sc, (sc->mac_ver != 0x3593) ? RT2860_EEPROM_LNA :
            RT3593_EEPROM_LNA, &val);
        sc->lna[0] = val & 0xff;        /* channel group 0 */
        sc->lna[1] = val >> 8;                /* channel group 1 */

        /* fix broken 5GHz LNA entries */
        if (sc->lna[2] == 0 || sc->lna[2] == 0xff) {
                DPRINTF(("invalid LNA for channel group %d\n", 2));
                sc->lna[2] = sc->lna[1];
        }
        if (sc->lna[3] == 0 || sc->lna[3] == 0xff) {
                DPRINTF(("invalid LNA for channel group %d\n", 3));
                sc->lna[3] = sc->lna[1];
        }

        /* fix broken RSSI offset entries */
        for (ant = 0; ant < 3; ant++) {
                if (sc->rssi_2ghz[ant] < -10 || sc->rssi_2ghz[ant] > 10) {
                        DPRINTF(("invalid RSSI%d offset: %d (2GHz)\n",
                            ant + 1, sc->rssi_2ghz[ant]));
                        sc->rssi_2ghz[ant] = 0;
                }
                if (sc->rssi_5ghz[ant] < -10 || sc->rssi_5ghz[ant] > 10) {
                        DPRINTF(("invalid RSSI%d offset: %d (5GHz)\n",
                            ant + 1, sc->rssi_5ghz[ant]));
                        sc->rssi_5ghz[ant] = 0;
                }
        }
        return 0;
}

static struct ieee80211_node *
run_node_alloc(struct ieee80211_node_table *nt)
{
        struct run_node *rn =
            malloc(sizeof(struct run_node), M_DEVBUF, M_NOWAIT | M_ZERO);
        return rn ? &rn->ni : NULL;
}

static int
run_media_change(struct ifnet *ifp)
{
        struct run_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        uint8_t rate, ridx;
        int error;

        error = ieee80211_media_change(ifp);
        if (error != ENETRESET)
                return error;

        if (ic->ic_fixed_rate != IEEE80211_FIXED_RATE_NONE) {
                rate = ic->ic_sup_rates[ic->ic_curmode].
                    rs_rates[ic->ic_fixed_rate] & IEEE80211_RATE_VAL;
                for (ridx = 0; ridx <= RT2860_RIDX_MAX; ridx++)
                        if (rt2860_rates[ridx].rate == rate)
                                break;
                sc->fixed_ridx = ridx;
        }

        if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
                run_init(ifp);

        return 0;
}

static void
run_next_scan(void *arg)
{
        struct run_softc *sc = arg;

        if (sc->sc_ic.ic_state == IEEE80211_S_SCAN)
                ieee80211_next_scan(&sc->sc_ic);
}

static void
run_task(void *arg)
{
        struct run_softc *sc = arg;
        struct run_host_cmd_ring *ring = &sc->cmdq;
        struct run_host_cmd *cmd;
        int s;

        /* process host commands */
        s = splusb();
        while (ring->next != ring->cur) {
                cmd = &ring->cmd[ring->next];
                splx(s);
                membar_consumer();
                /* callback */
                cmd->cb(sc, cmd->data);
                s = splusb();
                atomic_dec_uint(&ring->queued);
                ring->next = (ring->next + 1) % RUN_HOST_CMD_RING_COUNT;
        }
        wakeup(ring);
        splx(s);
}

static void
run_do_async(struct run_softc *sc, void (*cb)(struct run_softc *, void *),
    void *arg, int len)
{
        struct run_host_cmd_ring *ring = &sc->cmdq;
        struct run_host_cmd *cmd;
        int s;

        if (sc->sc_flags & RUN_DETACHING)
                return;

        s = splusb();
        cmd = &ring->cmd[ring->cur];
        cmd->cb = cb;
        KASSERT(len <= sizeof(cmd->data));
        memcpy(cmd->data, arg, len);
        membar_producer();
        ring->cur = (ring->cur + 1) % RUN_HOST_CMD_RING_COUNT;

        /* if there is no pending command already, schedule a task */
        if (atomic_inc_uint_nv(&ring->queued) == 1)
                usb_add_task(sc->sc_udev, &sc->sc_task, USB_TASKQ_DRIVER);
        splx(s);
}

static int
run_newstate(struct ieee80211com *ic, enum ieee80211_state nstate, int arg)
{
        struct run_softc *sc = ic->ic_ifp->if_softc;
        struct run_cmd_newstate cmd;

        callout_stop(&sc->scan_to);
        callout_stop(&sc->calib_to);

        /* do it in a process context */
        cmd.state = nstate;
        cmd.arg = arg;
        run_do_async(sc, run_newstate_cb, &cmd, sizeof(cmd));
        return 0;
}

static void
run_newstate_cb(struct run_softc *sc, void *arg)
{
        struct run_cmd_newstate *cmd = arg;
        struct ifnet *ifp = &sc->sc_if;
        struct ieee80211com *ic = &sc->sc_ic;
        enum ieee80211_state ostate;
        struct ieee80211_node *ni;
        uint32_t tmp, sta[3];
        uint8_t wcid;
        int s;

        s = splnet();
        ostate = ic->ic_state;

        if (ostate == IEEE80211_S_RUN) {
                /* turn link LED off */
                run_set_leds(sc, RT2860_LED_RADIO);
        }

        switch (cmd->state) {
        case IEEE80211_S_INIT:
                if (ostate == IEEE80211_S_RUN) {
                        /* abort TSF synchronization */
                        run_read(sc, RT2860_BCN_TIME_CFG, &tmp);
                        run_write(sc, RT2860_BCN_TIME_CFG,
                            tmp & ~(RT2860_BCN_TX_EN | RT2860_TSF_TIMER_EN |
                            RT2860_TBTT_TIMER_EN));
                }
                break;

        case IEEE80211_S_SCAN:
                run_set_chan(sc, ic->ic_curchan);
                callout_schedule(&sc->scan_to, hz / 5);
                break;

        case IEEE80211_S_AUTH:
        case IEEE80211_S_ASSOC:
                run_set_chan(sc, ic->ic_curchan);
                break;

        case IEEE80211_S_RUN:
                run_set_chan(sc, ic->ic_curchan);

                ni = ic->ic_bss;

                if (ic->ic_opmode != IEEE80211_M_MONITOR) {
                        run_updateslot(ifp);
                        run_enable_mrr(sc);
                        run_set_txpreamble(sc);
                        run_set_basicrates(sc);
                        run_set_bssid(sc, ni->ni_bssid);
                }
#ifndef IEEE80211_STA_ONLY
                if (ic->ic_opmode == IEEE80211_M_HOSTAP ||
                    ic->ic_opmode == IEEE80211_M_IBSS)
                        (void)run_setup_beacon(sc);
#endif
                if (ic->ic_opmode == IEEE80211_M_STA) {
                        /* add BSS entry to the WCID table */
                        wcid = RUN_AID2WCID(ni->ni_associd);
                        run_write_region_1(sc, RT2860_WCID_ENTRY(wcid),
                            ni->ni_macaddr, IEEE80211_ADDR_LEN);

                        /* fake a join to init the tx rate */
                        run_newassoc(ni, 1);
                }
                if (ic->ic_opmode != IEEE80211_M_MONITOR) {
                        run_enable_tsf_sync(sc);

                        /* clear statistic registers used by AMRR */
                        run_read_region_1(sc, RT2860_TX_STA_CNT0,
                            (uint8_t *)sta, sizeof(sta));
                        /* start calibration timer */
                        callout_schedule(&sc->calib_to, hz);
                }

                /* turn link LED on */
                run_set_leds(sc, RT2860_LED_RADIO |
                    (IEEE80211_IS_CHAN_2GHZ(ic->ic_curchan) ?
                     RT2860_LED_LINK_2GHZ : RT2860_LED_LINK_5GHZ));
                break;
        }
        (void)sc->sc_newstate(ic, cmd->state, cmd->arg);
        splx(s);
}

static int
run_updateedca(struct ieee80211com *ic)
{

        /* do it in a process context */
        run_do_async(ic->ic_ifp->if_softc, run_updateedca_cb, NULL, 0);
        return 0;
}

/* ARGSUSED */
static void
run_updateedca_cb(struct run_softc *sc, void *arg)
{
        struct ieee80211com *ic = &sc->sc_ic;
        int s, aci;

        s = splnet();
        /* update MAC TX configuration registers */
        for (aci = 0; aci < WME_NUM_AC; aci++) {
                run_write(sc, RT2860_EDCA_AC_CFG(aci),
                    ic->ic_wme.wme_params[aci].wmep_logcwmax << 16 |
                    ic->ic_wme.wme_params[aci].wmep_logcwmin << 12 |
                    ic->ic_wme.wme_params[aci].wmep_aifsn  <<  8 |
                    ic->ic_wme.wme_params[aci].wmep_txopLimit);
        }

        /* update SCH/DMA registers too */
        run_write(sc, RT2860_WMM_AIFSN_CFG,
            ic->ic_wme.wme_params[WME_AC_VO].wmep_aifsn  << 12 |
            ic->ic_wme.wme_params[WME_AC_VI].wmep_aifsn  <<  8 |
            ic->ic_wme.wme_params[WME_AC_BK].wmep_aifsn  <<  4 |
            ic->ic_wme.wme_params[WME_AC_BE].wmep_aifsn);
        run_write(sc, RT2860_WMM_CWMIN_CFG,
            ic->ic_wme.wme_params[WME_AC_VO].wmep_logcwmin << 12 |
            ic->ic_wme.wme_params[WME_AC_VI].wmep_logcwmin <<  8 |
            ic->ic_wme.wme_params[WME_AC_BK].wmep_logcwmin <<  4 |
            ic->ic_wme.wme_params[WME_AC_BE].wmep_logcwmin);
        run_write(sc, RT2860_WMM_CWMAX_CFG,
            ic->ic_wme.wme_params[WME_AC_VO].wmep_logcwmax << 12 |
            ic->ic_wme.wme_params[WME_AC_VI].wmep_logcwmax <<  8 |
            ic->ic_wme.wme_params[WME_AC_BK].wmep_logcwmax <<  4 |
            ic->ic_wme.wme_params[WME_AC_BE].wmep_logcwmax);
        run_write(sc, RT2860_WMM_TXOP0_CFG,
            ic->ic_wme.wme_params[WME_AC_BK].wmep_txopLimit << 16 |
            ic->ic_wme.wme_params[WME_AC_BE].wmep_txopLimit);
        run_write(sc, RT2860_WMM_TXOP1_CFG,
            ic->ic_wme.wme_params[WME_AC_VO].wmep_txopLimit << 16 |
            ic->ic_wme.wme_params[WME_AC_VI].wmep_txopLimit);
        splx(s);
}

#ifdef RUN_HWCRYPTO
static int
run_set_key(struct ieee80211com *ic, const struct ieee80211_key *k,
    const uint8_t *mac)
{
        struct run_softc *sc = ic->ic_ifp->if_softc;
        struct ieee80211_node *ni = ic->ic_bss;
        struct run_cmd_key cmd;

        /* do it in a process context */
        cmd.key = *k;
        cmd.associd = (ni != NULL) ? ni->ni_associd : 0;
        run_do_async(sc, run_set_key_cb, &cmd, sizeof(cmd));
        return 1;
}

static void
run_set_key_cb(struct run_softc *sc, void *arg)
{
#ifndef IEEE80211_STA_ONLY
        struct ieee80211com *ic = &sc->sc_ic;
#endif
        struct run_cmd_key *cmd = arg;
        struct ieee80211_key *k = &cmd->key;
        uint32_t attr;
        uint16_t base;
        uint8_t mode, wcid, iv[8];

        /* map net80211 cipher to RT2860 security mode */
        switch (k->wk_cipher->ic_cipher) {
        case IEEE80211_CIPHER_WEP:
                k->wk_flags |= IEEE80211_KEY_GROUP; /* XXX */
                if (k->wk_keylen == 5)
                        mode = RT2860_MODE_WEP40;
                else
                        mode = RT2860_MODE_WEP104;
                break;
        case IEEE80211_CIPHER_TKIP:
                mode = RT2860_MODE_TKIP;
                break;
        case IEEE80211_CIPHER_AES_CCM:
                mode = RT2860_MODE_AES_CCMP;
                break;
        default:
                return;
        }

        if (k->wk_flags & IEEE80211_KEY_GROUP) {
                wcid = 0;        /* NB: update WCID0 for group keys */
                base = RT2860_SKEY(0, k->wk_keyix);
        } else {
                wcid = RUN_AID2WCID(cmd->associd);
                base = RT2860_PKEY(wcid);
        }

        if (k->wk_cipher->ic_cipher == IEEE80211_CIPHER_TKIP) {
                run_write_region_1(sc, base, k->wk_key, 16);
#ifndef IEEE80211_STA_ONLY
                if (ic->ic_opmode == IEEE80211_M_HOSTAP) {
                        run_write_region_1(sc, base + 16, &k->wk_key[16], 8);
                        run_write_region_1(sc, base + 24, &k->wk_key[24], 8);
                } else
#endif
                {
                        run_write_region_1(sc, base + 16, &k->wk_key[24], 8);
                        run_write_region_1(sc, base + 24, &k->wk_key[16], 8);
                }
        } else {
                /* roundup len to 16-bit: XXX fix write_region_1() instead */
                run_write_region_1(sc, base, k->wk_key,
                    (k->wk_keylen + 1) & ~1);
        }

        if (!(k->wk_flags & IEEE80211_KEY_GROUP) ||
            (k->wk_flags & IEEE80211_KEY_XMIT)) {
                /* set initial packet number in IV+EIV */
                if (k->wk_cipher->ic_cipher == IEEE80211_CIPHER_WEP) {
                        memset(iv, 0, sizeof(iv));
                        iv[3] = sc->sc_ic.ic_crypto.cs_def_txkey << 6;
                } else {
                        if (k->wk_cipher->ic_cipher == IEEE80211_CIPHER_TKIP) {
                                iv[0] = k->wk_keytsc >> 8;
                                iv[1] = (iv[0] | 0x20) & 0x7f;
                                iv[2] = k->wk_keytsc;
                        } else /* CCMP */ {
                                iv[0] = k->wk_keytsc;
                                iv[1] = k->wk_keytsc >> 8;
                                iv[2] = 0;
                        }
                        iv[3] = k->wk_keyix << 6 | IEEE80211_WEP_EXTIV;
                        iv[4] = k->wk_keytsc >> 16;
                        iv[5] = k->wk_keytsc >> 24;
                        iv[6] = k->wk_keytsc >> 32;
                        iv[7] = k->wk_keytsc >> 40;
                }
                run_write_region_1(sc, RT2860_IVEIV(wcid), iv, 8);
        }

        if (k->wk_flags & IEEE80211_KEY_GROUP) {
                /* install group key */
                run_read(sc, RT2860_SKEY_MODE_0_7, &attr);
                attr &= ~(0xf << (k->wk_keyix * 4));
                attr |= mode << (k->wk_keyix * 4);
                run_write(sc, RT2860_SKEY_MODE_0_7, attr);
        } else {
                /* install pairwise key */
                run_read(sc, RT2860_WCID_ATTR(wcid), &attr);
                attr = (attr & ~0xf) | (mode << 1) | RT2860_RX_PKEY_EN;
                run_write(sc, RT2860_WCID_ATTR(wcid), attr);
        }
}

static int
run_delete_key(struct ieee80211com *ic, const struct ieee80211_key *k)
{
        struct run_softc *sc = ic->ic_ifp->if_softc;
        struct ieee80211_node *ni = ic->ic_bss;
        struct run_cmd_key cmd;

        /* do it in a process context */
        cmd.key = *k;
        cmd.associd = (ni != NULL) ? ni->ni_associd : 0;
        run_do_async(sc, run_delete_key_cb, &cmd, sizeof(cmd));
        return 1;
}

static void
run_delete_key_cb(struct run_softc *sc, void *arg)
{
        struct run_cmd_key *cmd = arg;
        struct ieee80211_key *k = &cmd->key;
        uint32_t attr;
        uint8_t wcid;

        if (k->wk_cipher->ic_cipher == IEEE80211_CIPHER_WEP)
                k->wk_flags |= IEEE80211_KEY_GROUP; /* XXX */

        if (k->wk_flags & IEEE80211_KEY_GROUP) {
                /* remove group key */
                run_read(sc, RT2860_SKEY_MODE_0_7, &attr);
                attr &= ~(0xf << (k->wk_keyix * 4));
                run_write(sc, RT2860_SKEY_MODE_0_7, attr);

        } else {
                /* remove pairwise key */
                wcid = RUN_AID2WCID(cmd->associd);
                run_read(sc, RT2860_WCID_ATTR(wcid), &attr);
                attr &= ~0xf;
                run_write(sc, RT2860_WCID_ATTR(wcid), attr);
        }
}
#endif

static void
run_calibrate_to(void *arg)
{

        /* do it in a process context */
        run_do_async(arg, run_calibrate_cb, NULL, 0);
        /* next timeout will be rescheduled in the calibration task */
}

/* ARGSUSED */
static void
run_calibrate_cb(struct run_softc *sc, void *arg)
{
        struct ifnet *ifp = &sc->sc_if;
        uint32_t sta[3];
        int s, error;

        /* read statistic counters (clear on read) and update AMRR state */
        error = run_read_region_1(sc, RT2860_TX_STA_CNT0, (uint8_t *)sta,
            sizeof(sta));
        if (error != 0)
                goto skip;

        DPRINTF(("retrycnt=%d txcnt=%d failcnt=%d\n",
            le32toh(sta[1]) >> 16, le32toh(sta[1]) & 0xffff,
            le32toh(sta[0]) & 0xffff));

        s = splnet();
        /* count failed TX as errors */
        if_statadd(ifp, if_oerrors, le32toh(sta[0]) & 0xffff);

        sc->amn.amn_retrycnt =
            (le32toh(sta[0]) & 0xffff) +        /* failed TX count */
            (le32toh(sta[1]) >> 16);                /* TX retransmission count */

        sc->amn.amn_txcnt =
            sc->amn.amn_retrycnt +
            (le32toh(sta[1]) & 0xffff);                /* successful TX count */

        ieee80211_amrr_choose(&sc->amrr, sc->sc_ic.ic_bss, &sc->amn);
        splx(s);

skip:        callout_schedule(&sc->calib_to, hz);
}

static void
run_newassoc(struct ieee80211_node *ni, int isnew)
{
        struct run_softc *sc = ni->ni_ic->ic_ifp->if_softc;
        struct run_node *rn = (void *)ni;
        struct ieee80211_rateset *rs = &ni->ni_rates;
        uint8_t rate;
        int ridx, i, j;

        DPRINTF(("new assoc isnew=%d addr=%s\n",
            isnew, ether_sprintf(ni->ni_macaddr)));

        ieee80211_amrr_node_init(&sc->amrr, &sc->amn);
        /* start at lowest available bit-rate, AMRR will raise */
        ni->ni_txrate = 0;

        for (i = 0; i < rs->rs_nrates; i++) {
                rate = rs->rs_rates[i] & IEEE80211_RATE_VAL;
                /* convert 802.11 rate to hardware rate index */
                for (ridx = 0; ridx < RT2860_RIDX_MAX; ridx++)
                        if (rt2860_rates[ridx].rate == rate)
                                break;
                rn->ridx[i] = ridx;
                /* determine rate of control response frames */
                for (j = i; j >= 0; j--) {
                        if ((rs->rs_rates[j] & IEEE80211_RATE_BASIC) &&
                            rt2860_rates[rn->ridx[i]].phy ==
                            rt2860_rates[rn->ridx[j]].phy)
                                break;
                }
                if (j >= 0) {
                        rn->ctl_ridx[i] = rn->ridx[j];
                } else {
                        /* no basic rate found, use mandatory one */
                        rn->ctl_ridx[i] = rt2860_rates[ridx].ctl_ridx;
                }
                DPRINTF(("rate=0x%02x ridx=%d ctl_ridx=%d\n",
                    rs->rs_rates[i], rn->ridx[i], rn->ctl_ridx[i]));
        }
}

/*
 * Return the Rx chain with the highest RSSI for a given frame.
 */
static __inline uint8_t
run_maxrssi_chain(struct run_softc *sc, const struct rt2860_rxwi *rxwi)
{
        uint8_t rxchain = 0;

        if (sc->nrxchains > 1) {
                if (rxwi->rssi[1] > rxwi->rssi[rxchain])
                        rxchain = 1;
                if (sc->nrxchains > 2)
                        if (rxwi->rssi[2] > rxwi->rssi[rxchain])
                                rxchain = 2;
        }
        return rxchain;
}

static void
run_rx_frame(struct run_softc *sc, uint8_t *buf, int dmalen)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        struct ieee80211_frame *wh;
        struct ieee80211_node *ni;
        struct rt2870_rxd *rxd;
        struct rt2860_rxwi *rxwi;
        struct mbuf *m;
        uint32_t flags;
        uint16_t len, rxwisize, phy;
        uint8_t ant, rssi;
        int s;
#ifdef RUN_HWCRYPTO
        int decrypted = 0;
#endif

        rxwi = (struct rt2860_rxwi *)buf;
        rxwisize = sizeof(struct rt2860_rxwi);
        if (sc->mac_ver == 0x5592)
                rxwisize += sizeof(uint64_t);
        else if (sc->mac_ver == 0x3593)
                rxwisize += sizeof(uint32_t);
        len = le16toh(rxwi->len) & 0xfff;
        if (__predict_false(len > dmalen)) {
                DPRINTF(("bad RXWI length %u > %u\n", len, dmalen));
                return;
        }
        /* Rx descriptor is located at the end */
        rxd = (struct rt2870_rxd *)(buf + dmalen);
        flags = le32toh(rxd->flags);

        if (__predict_false(flags & (RT2860_RX_CRCERR | RT2860_RX_ICVERR))) {
                if_statinc(ifp, if_ierrors);
                return;
        }

        wh = (struct ieee80211_frame *)(buf + rxwisize);

        if (__predict_false((flags & RT2860_RX_MICERR))) {
                /* report MIC failures to net80211 for TKIP */
                ieee80211_notify_michael_failure(ic, wh, 0/* XXX */);
                if_statinc(ifp, if_ierrors);
                return;
        }
        
        if (flags & RT2860_RX_L2PAD) {
                u_int hdrlen = ieee80211_hdrspace(ic, wh);
                memmove((uint8_t *)wh + 2, wh, hdrlen);
                wh = (struct ieee80211_frame *)((uint8_t *)wh + 2);
        }

#ifdef RUN_HWCRYPTO
        if (wh->i_fc[1] & IEEE80211_FC1_WEP) {
                wh->i_fc[1] &= ~IEEE80211_FC1_WEP;
                decrypted = 1;
        }
#endif

        /* could use m_devget but net80211 wants contig mgmt frames */
        MGETHDR(m, M_DONTWAIT, MT_DATA);
        if (__predict_false(m == NULL)) {
                if_statinc(ifp, if_ierrors);
                return;
        }
        if (len > MHLEN) {
                if (__predict_true(len <= MCLBYTES))
                        MCLGET(m, M_DONTWAIT);
                if (__predict_false(!(m->m_flags & M_EXT))) {
                        if_statinc(ifp, if_ierrors);
                        m_freem(m);
                        return;
                }
        }
        /* finalize mbuf */
        m_set_rcvif(m, ifp);
        memcpy(mtod(m, void *), wh, len);
        m->m_pkthdr.len = m->m_len = len;

        ant = run_maxrssi_chain(sc, rxwi);
        rssi = rxwi->rssi[ant];

        if (__predict_false(sc->sc_drvbpf != NULL)) {
                struct run_rx_radiotap_header *tap = &sc->sc_rxtap;

                tap->wr_flags = 0;
                tap->wr_chan_freq = htole16(ic->ic_curchan->ic_freq);
                tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags);
                tap->wr_antsignal = rssi;
                tap->wr_antenna = ant;
                tap->wr_dbm_antsignal = run_rssi2dbm(sc, rssi, ant);
                tap->wr_rate = 2;        /* in case it can't be found below */
                phy = le16toh(rxwi->phy);
                switch (phy & RT2860_PHY_MODE) {
                case RT2860_PHY_CCK:
                        switch ((phy & RT2860_PHY_MCS) & ~RT2860_PHY_SHPRE) {
                        case 0:        tap->wr_rate =   2; break;
                        case 1:        tap->wr_rate =   4; break;
                        case 2:        tap->wr_rate =  11; break;
                        case 3:        tap->wr_rate =  22; break;
                        }
                        if (phy & RT2860_PHY_SHPRE)
                                tap->wr_flags |= IEEE80211_RADIOTAP_F_SHORTPRE;
                        break;
                case RT2860_PHY_OFDM:
                        switch (phy & RT2860_PHY_MCS) {
                        case 0:        tap->wr_rate =  12; break;
                        case 1:        tap->wr_rate =  18; break;
                        case 2:        tap->wr_rate =  24; break;
                        case 3:        tap->wr_rate =  36; break;
                        case 4:        tap->wr_rate =  48; break;
                        case 5:        tap->wr_rate =  72; break;
                        case 6:        tap->wr_rate =  96; break;
                        case 7:        tap->wr_rate = 108; break;
                        }
                        break;
                }
                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_rxtap_len, m, BPF_D_IN);
        }

        s = splnet();
        ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh);
#ifdef RUN_HWCRYPTO
        if (decrypted) {
                uint32_t icflags = ic->ic_flags;

                ic->ic_flags &= ~IEEE80211_F_DROPUNENC; /* XXX */
                ieee80211_input(ic, m, ni, rssi, 0);
                ic->ic_flags = icflags;
        } else
#endif
        ieee80211_input(ic, m, ni, rssi, 0);

        /* node is no longer needed */
        ieee80211_free_node(ni);

        /*
         * In HostAP mode, ieee80211_input() will enqueue packets in if_snd
         * without calling if_start().
         */
        if (!IFQ_IS_EMPTY(&ifp->if_snd) && !(ifp->if_flags & IFF_OACTIVE))
                run_start(ifp);

        splx(s);
}

static void
run_rxeof(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct run_rx_data *data = priv;
        struct run_softc *sc = data->sc;
        uint8_t *buf;
        uint32_t dmalen;
        int xferlen;
        uint16_t rxwisize;

        if (__predict_false(sc->sc_flags & RUN_DETACHING))
                return;

        rxwisize = sizeof(struct rt2860_rxwi);
        if (sc->mac_ver == 0x5592)
                rxwisize += sizeof(uint64_t);
        else if (sc->mac_ver == 0x3593)
                rxwisize += sizeof(uint32_t);

        if (__predict_false(status != USBD_NORMAL_COMPLETION)) {
                DPRINTF(("RX status=%s\n", usbd_errstr(status)));
                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->rxq.pipeh);
                if (status != USBD_CANCELLED)
                        goto skip;
                return;
        }
        usbd_get_xfer_status(xfer, NULL, NULL, &xferlen, NULL);

        if (__predict_false(xferlen < (int)(sizeof(uint32_t) +
            rxwisize + sizeof(struct rt2870_rxd)))) {
                DPRINTF(("xfer too short %d\n", xferlen));
                goto skip;
        }

        /* HW can aggregate multiple 802.11 frames in a single USB xfer */
        buf = data->buf;
        while (xferlen > 8) {
                dmalen = le32toh(*(uint32_t *)buf) & 0xffff;

                if (__predict_false((dmalen >= (uint32_t)-8) || dmalen == 0 ||
                    (dmalen & 3) != 0)) {
                        DPRINTF(("bad DMA length %u (%x)\n", dmalen, dmalen));
                        break;
                }
                if (__predict_false(dmalen + 8 > (uint32_t)xferlen)) {
                        DPRINTF(("bad DMA length %u > %d\n",
                            dmalen + 8, xferlen));
                        break;
                }
                run_rx_frame(sc, buf + sizeof(uint32_t), dmalen);
                buf += dmalen + 8;
                xferlen -= dmalen + 8;
        }

skip:        /* setup a new transfer */
        usbd_setup_xfer(xfer, data, data->buf, RUN_MAX_RXSZ,
            USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, run_rxeof);
        status = usbd_transfer(xfer);
        if (status != USBD_NORMAL_COMPLETION &&
            status != USBD_IN_PROGRESS)
                device_printf(sc->sc_dev, "requeuing rx failed: %s\n",
                    usbd_errstr(status));
}

static void
run_txeof(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct run_tx_data *data = priv;
        struct run_softc *sc = data->sc;
        struct run_tx_ring *txq = &sc->txq[data->qid];
        struct ifnet *ifp = &sc->sc_if;
        int s;

        s = splnet();
        txq->queued--;
        sc->qfullmsk &= ~(1 << data->qid);

        if (__predict_false(status != USBD_NORMAL_COMPLETION)) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;

                DPRINTF(("%s: usb error on tx: %s\n",
                        device_xname(sc->sc_dev), usbd_errstr(status)));
                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(txq->pipeh);
                if_statinc(ifp, if_oerrors);
                splx(s);
                return;
        }

        sc->sc_tx_timer = 0;
        if_statinc(ifp, if_opackets);
        ifp->if_flags &= ~IFF_OACTIVE;
        run_start(ifp);
        splx(s);
}

static int
run_tx(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct run_node *rn = (void *)ni;
        struct ieee80211_frame *wh;
#ifndef RUN_HWCRYPTO
        struct ieee80211_key *k;
#endif
        struct run_tx_ring *ring;
        struct run_tx_data *data;
        struct rt2870_txd *txd;
        struct rt2860_txwi *txwi;
        uint16_t qos, dur, mcs;
        uint16_t txwisize;
        uint8_t type, tid, qid;
        int hasqos, ridx, ctl_ridx, xferlen;
        uint8_t pad;
        usbd_status status;

        wh = mtod(m, struct ieee80211_frame *);

#ifndef RUN_HWCRYPTO
        if (wh->i_fc[1] & IEEE80211_FC1_WEP) {
                k = ieee80211_crypto_encap(ic, ni, m);
                if (k == NULL) {
                        m_freem(m);
                        return ENOBUFS;
                }

                /* packet header may have moved, reset our local pointer */
                wh = mtod(m, struct ieee80211_frame *);
        }
#endif
        type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;

        if ((hasqos = ieee80211_has_qos(wh))) {
                qos = ((struct ieee80211_qosframe *)wh)->i_qos[0];
                tid = qos & IEEE80211_QOS_TID;
                qid = TID_TO_WME_AC(tid);
        } else {
                qos = 0;
                tid = 0;
                qid = WME_AC_BE;
        }
        ring = &sc->txq[qid];
        data = &ring->data[ring->cur];

        /* pickup a rate index */
        if (IEEE80211_IS_MULTICAST(wh->i_addr1) ||
            type != IEEE80211_FC0_TYPE_DATA) {
                ridx = (ic->ic_curmode == IEEE80211_MODE_11A) ?
                    RT2860_RIDX_OFDM6 : RT2860_RIDX_CCK1;
                ctl_ridx = rt2860_rates[ridx].ctl_ridx;
        } else if (ic->ic_fixed_rate != IEEE80211_FIXED_RATE_NONE) {
                ridx = sc->fixed_ridx;
                ctl_ridx = rt2860_rates[ridx].ctl_ridx;
        } else {
                ridx = rn->ridx[ni->ni_txrate];
                ctl_ridx = rn->ctl_ridx[ni->ni_txrate];
        }

        /* get MCS code from rate index */
        mcs = rt2860_rates[ridx].mcs;

        txwisize = sizeof(struct rt2860_txwi);
        if (sc->mac_ver == 0x5592)
                txwisize += sizeof(uint32_t);
        xferlen = txwisize + m->m_pkthdr.len;

        /* roundup to 32-bit alignment */
        xferlen = (xferlen + 3) & ~3;

        txd = (struct rt2870_txd *)data->buf;
        txd->flags = RT2860_TX_QSEL_EDCA;
        txd->len = htole16(xferlen);

        /*
         * Ether both are true or both are false, the header
         * are nicely aligned to 32-bit. So, no L2 padding.
         */
        if (IEEE80211_HAS_ADDR4(wh) == IEEE80211_QOS_HAS_SEQ(wh))
                pad = 0;
        else
                pad = 2;

        /* setup TX Wireless Information */
        txwi = (struct rt2860_txwi *)(txd + 1);
        txwi->flags = 0;
        txwi->xflags = hasqos ? 0 : RT2860_TX_NSEQ;
        txwi->wcid = (type == IEEE80211_FC0_TYPE_DATA) ?
            RUN_AID2WCID(ni->ni_associd) : 0xff;
        txwi->len = htole16(m->m_pkthdr.len - pad);
        if (rt2860_rates[ridx].phy == IEEE80211_T_DS) {
                txwi->phy = htole16(RT2860_PHY_CCK);
                if (ridx != RT2860_RIDX_CCK1 &&
                    (ic->ic_flags & IEEE80211_F_SHPREAMBLE))
                        mcs |= RT2860_PHY_SHPRE;
        } else
                mcs |= RT2860_PHY_OFDM;
        txwi->phy = htole16(mcs);

        txwi->txop = RT2860_TX_TXOP_BACKOFF;

        if (!IEEE80211_IS_MULTICAST(wh->i_addr1) &&
            (!hasqos || (qos & IEEE80211_QOS_ACKPOLICY) !=
            IEEE80211_QOS_ACKPOLICY_NOACK)) {
                txwi->xflags |= RT2860_TX_ACK;
                if (ic->ic_flags & IEEE80211_F_SHPREAMBLE)
                        dur = rt2860_rates[ctl_ridx].sp_ack_dur;
                else
                        dur = rt2860_rates[ctl_ridx].lp_ack_dur;
                *(uint16_t *)wh->i_dur = htole16(dur);
        }

#ifndef IEEE80211_STA_ONLY
        /* ask MAC to insert timestamp into probe responses */
        if ((wh->i_fc[0] &
            (IEEE80211_FC0_TYPE_MASK | IEEE80211_FC0_SUBTYPE_MASK)) ==
            (IEEE80211_FC0_TYPE_MGT | IEEE80211_FC0_SUBTYPE_PROBE_RESP))
            /* NOTE: beacons do not pass through tx_data() */
                txwi->flags |= RT2860_TX_TS;
#endif

        if (__predict_false(sc->sc_drvbpf != NULL)) {
                struct run_tx_radiotap_header *tap = &sc->sc_txtap;

                tap->wt_flags = 0;
                tap->wt_rate = rt2860_rates[ridx].rate;
                tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq);
                tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags);
                tap->wt_hwqueue = qid;
                if (mcs & RT2860_PHY_SHPRE)
                        tap->wt_flags |= IEEE80211_RADIOTAP_F_SHORTPRE;

                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_txtap_len, m, BPF_D_OUT);
        }

        m_copydata(m, 0, m->m_pkthdr.len, ((uint8_t *)txwi) + txwisize);
        m_freem(m);

        xferlen += sizeof(*txd) + 4;

        usbd_setup_xfer(data->xfer, data, data->buf, xferlen,
            USBD_FORCE_SHORT_XFER, RUN_TX_TIMEOUT, run_txeof);
        status = usbd_transfer(data->xfer);
        if (__predict_false(status != USBD_IN_PROGRESS &&
            status != USBD_NORMAL_COMPLETION)) {
                device_printf(sc->sc_dev, "queuing tx failed: %s\n",
                    usbd_errstr(status));
                return EIO;
        }

        ieee80211_free_node(ni);

        ring->cur = (ring->cur + 1) % RUN_TX_RING_COUNT;
        if (++ring->queued >= RUN_TX_RING_COUNT)
                sc->qfullmsk |= 1 << qid;

        return 0;
}

static void
run_start(struct ifnet *ifp)
{
        struct run_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ether_header *eh;
        struct ieee80211_node *ni;
        struct mbuf *m;

        if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
                return;

        for (;;) {
                if (sc->qfullmsk != 0) {
                        ifp->if_flags |= IFF_OACTIVE;
                        break;
                }
                /* send pending management frames first */
                IF_DEQUEUE(&ic->ic_mgtq, m);
                if (m != NULL) {
                        ni = M_GETCTX(m, struct ieee80211_node *);
                        M_CLEARCTX(m);
                        goto sendit;
                }
                if (ic->ic_state != IEEE80211_S_RUN)
                        break;

                /* encapsulate and send data frames */
                IFQ_DEQUEUE(&ifp->if_snd, m);
                if (m == NULL)
                        break;
                if (m->m_len < (int)sizeof(*eh) &&
                    (m = m_pullup(m, sizeof(*eh))) == NULL) {
                        if_statinc(ifp, if_oerrors);
                        continue;
                }

                eh = mtod(m, struct ether_header *);
                ni = ieee80211_find_txnode(ic, eh->ether_dhost);
                if (ni == NULL) {
                        m_freem(m);
                        if_statinc(ifp, if_oerrors);
                        continue;
                }

                bpf_mtap(ifp, m, BPF_D_OUT);

                if ((m = ieee80211_encap(ic, m, ni)) == NULL) {
                        ieee80211_free_node(ni);
                        if_statinc(ifp, if_oerrors);
                        continue;
                }
sendit:
                bpf_mtap3(ic->ic_rawbpf, m, BPF_D_OUT);

                if (run_tx(sc, m, ni) != 0) {
                        ieee80211_free_node(ni);
                        if_statinc(ifp, if_oerrors);
                        continue;
                }

                sc->sc_tx_timer = 5;
                ifp->if_timer = 1;
        }
}

static void
run_watchdog(struct ifnet *ifp)
{
        struct run_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;

        ifp->if_timer = 0;

        if (sc->sc_tx_timer > 0) {
                if (--sc->sc_tx_timer == 0) {
                        device_printf(sc->sc_dev, "device timeout\n");
                        /* run_init(ifp); XXX needs a process context! */
                        if_statinc(ifp, if_oerrors);
                        return;
                }
                ifp->if_timer = 1;
        }

        ieee80211_watchdog(ic);
}

static int
run_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
        struct run_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        int s, error = 0;

        s = splnet();

        switch (cmd) {
        case SIOCSIFFLAGS:
                if ((error = ifioctl_common(ifp, cmd, data)) != 0)
                        break;
                switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) {
                case IFF_UP|IFF_RUNNING:
                        break;
                case IFF_UP:
                        run_init(ifp);
                        break;
                case IFF_RUNNING:
                        run_stop(ifp, 1);
                        break;
                case 0:
                        break;
                }
                break;

        case SIOCADDMULTI:
        case SIOCDELMULTI:
                if ((error = ether_ioctl(ifp, cmd, data)) == ENETRESET) {
                        /* setup multicast filter, etc */
                        error = 0;
                }
                break;

        default:
                error = ieee80211_ioctl(ic, cmd, data);
                break;
        }

        if (error == ENETRESET) {
                if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) ==
                    (IFF_UP | IFF_RUNNING)) {
                        run_init(ifp);
                }
                error = 0;
        }

        splx(s);

        return error;
}

static void
run_select_chan_group(struct run_softc *sc, int group)
{
        uint32_t tmp;
        uint8_t agc;

        run_bbp_write(sc, 62, 0x37 - sc->lna[group]);
        run_bbp_write(sc, 63, 0x37 - sc->lna[group]);
        run_bbp_write(sc, 64, 0x37 - sc->lna[group]);
        if (sc->mac_ver < 0x3572)
                run_bbp_write(sc, 86, 0x00);

        if (sc->mac_ver == 0x3593) {
                run_bbp_write(sc, 77, 0x98);
                run_bbp_write(sc, 83, (group == 0) ? 0x8a : 0x9a);
        }

        if (group == 0) {
                if (sc->ext_2ghz_lna) {
                        if (sc->mac_ver >= 0x5390)
                                run_bbp_write(sc, 75, 0x52);
                        else {
                                run_bbp_write(sc, 82, 0x62);
                                run_bbp_write(sc, 75, 0x46);
                        }
                } else {
                        if (sc->mac_ver == 0x5592) {
                                run_bbp_write(sc, 79, 0x1c);
                                run_bbp_write(sc, 80, 0x0e);
                                run_bbp_write(sc, 81, 0x3a);
                                run_bbp_write(sc, 82, 0x62);

                                run_bbp_write(sc, 195, 0x80);
                                run_bbp_write(sc, 196, 0xe0);
                                run_bbp_write(sc, 195, 0x81);
                                run_bbp_write(sc, 196, 0x1f);
                                run_bbp_write(sc, 195, 0x82);
                                run_bbp_write(sc, 196, 0x38);
                                run_bbp_write(sc, 195, 0x83);
                                run_bbp_write(sc, 196, 0x32);
                                run_bbp_write(sc, 195, 0x85);
                                run_bbp_write(sc, 196, 0x28);
                                run_bbp_write(sc, 195, 0x86);
                                run_bbp_write(sc, 196, 0x19);
                        } else if (sc->mac_ver >= 0x5390) {
                                run_bbp_write(sc, 75, 0x50);
                        } else {
                                run_bbp_write(sc, 82,
                                    (sc->mac_ver == 0x3593) ? 0x62 : 0x84);
                                run_bbp_write(sc, 75, 0x50);
                        }
                }
        } else {
                if (sc->mac_ver == 0x5592) {
                        run_bbp_write(sc, 79, 0x18);
                        run_bbp_write(sc, 80, 0x08);
                        run_bbp_write(sc, 81, 0x38);
                        run_bbp_write(sc, 82, 0x92);

                        run_bbp_write(sc, 195, 0x80);
                        run_bbp_write(sc, 196, 0xf0);
                        run_bbp_write(sc, 195, 0x81);
                        run_bbp_write(sc, 196, 0x1e);
                        run_bbp_write(sc, 195, 0x82);
                        run_bbp_write(sc, 196, 0x28);
                        run_bbp_write(sc, 195, 0x83);
                        run_bbp_write(sc, 196, 0x20);
                        run_bbp_write(sc, 195, 0x85);
                        run_bbp_write(sc, 196, 0x7f);
                        run_bbp_write(sc, 195, 0x86);
                        run_bbp_write(sc, 196, 0x7f);
                } else if (sc->mac_ver == 0x3572)
                        run_bbp_write(sc, 82, 0x94);
                else
                        run_bbp_write(sc, 82,
                            (sc->mac_ver == 0x3593) ? 0x82 : 0xf2);
                if (sc->ext_5ghz_lna)
                        run_bbp_write(sc, 75, 0x46);
                else
                        run_bbp_write(sc, 75, 0x50);
        }

        run_read(sc, RT2860_TX_BAND_CFG, &tmp);
        tmp &= ~(RT2860_5G_BAND_SEL_N | RT2860_5G_BAND_SEL_P);
        tmp |= (group == 0) ? RT2860_5G_BAND_SEL_N : RT2860_5G_BAND_SEL_P;
        run_write(sc, RT2860_TX_BAND_CFG, tmp);

        /* enable appropriate Power Amplifiers and Low Noise Amplifiers */
        tmp = RT2860_RFTR_EN | RT2860_TRSW_EN | RT2860_LNA_PE0_EN;
        if (sc->mac_ver == 0x3593)
                tmp |= RT3593_LNA_PE_G2_EN | RT3593_LNA_PE_A2_EN;
        if (sc->nrxchains > 1)
                tmp |= RT2860_LNA_PE1_EN;
        if (group == 0) {        /* 2GHz */
                tmp |= RT2860_PA_PE_G0_EN;
                if (sc->ntxchains > 1)
                        tmp |= RT2860_PA_PE_G1_EN;
        } else {                /* 5GHz */
                tmp |= RT2860_PA_PE_A0_EN;
                if (sc->ntxchains > 1)
                        tmp |= RT2860_PA_PE_A1_EN;
                if (sc->mac_ver == 0x3593) {
                        if (sc->ntxchains > 2)
                                tmp |= RT3593_PA_PE_G2_EN;
                }
        }
        if (sc->mac_ver == 0x3572) {
                run_rt3070_rf_write(sc, 8, 0x00);
                run_write(sc, RT2860_TX_PIN_CFG, tmp);
                run_rt3070_rf_write(sc, 8, 0x80);
        } else
                run_write(sc, RT2860_TX_PIN_CFG, tmp);

        if (sc->mac_ver == 0x5592) {
                run_bbp_write(sc, 195, 0x8d);
                run_bbp_write(sc, 196, 0x1a);
        }

        if (sc->mac_ver == 0x3593) {
                run_read(sc, RT2860_GPIO_CTRL, &tmp);
                tmp &= ~0x01010000;
                if (group == 0)
                        tmp |= 0x00010000;
                tmp = (tmp & ~0x00009090) | 0x00000090;
                run_write(sc, RT2860_GPIO_CTRL, tmp);
        }

        /* set initial AGC value */
        if (group == 0) {       /* 2GHz band */
                if (sc->mac_ver >= 0x3070)
                        agc = 0x1c + sc->lna[0] * 2;
                else
                        agc = 0x2e + sc->lna[0];
        } else {                /* 5GHz band */
                if (sc->mac_ver == 0x3572)
                        agc = 0x22 + (sc->lna[group] * 5) / 3;
                else
                        agc = 0x32 + (sc->lna[group] * 5) / 3;
        }
        run_set_agc(sc, agc);
}

static void
run_rt2870_set_chan(struct run_softc *sc, u_int chan)
{
        const struct rfprog *rfprog = rt2860_rf2850;
        uint32_t r2, r3, r4;
        int8_t txpow1, txpow2;
        int i;

        /* find the settings for this channel (we know it exists) */
        for (i = 0; rfprog[i].chan != chan; i++);

        r2 = rfprog[i].r2;
        if (sc->ntxchains == 1)
                r2 |= 1 << 12;                /* 1T: disable Tx chain 2 */
        if (sc->nrxchains == 1)
                r2 |= 1 << 15 | 1 << 4;        /* 1R: disable Rx chains 2 & 3 */
        else if (sc->nrxchains == 2)
                r2 |= 1 << 4;                /* 2R: disable Rx chain 3 */

        /* use Tx power values from EEPROM */
        txpow1 = sc->txpow1[i];
        txpow2 = sc->txpow2[i];
        if (chan > 14) {
                if (txpow1 >= 0)
                        txpow1 = txpow1 << 1 | 1;
                else
                        txpow1 = (7 + txpow1) << 1;
                if (txpow2 >= 0)
                        txpow2 = txpow2 << 1 | 1;
                else
                        txpow2 = (7 + txpow2) << 1;
        }
        r3 = rfprog[i].r3 | txpow1 << 7;
        r4 = rfprog[i].r4 | sc->freq << 13 | txpow2 << 4;

        run_rt2870_rf_write(sc, RT2860_RF1, rfprog[i].r1);
        run_rt2870_rf_write(sc, RT2860_RF2, r2);
        run_rt2870_rf_write(sc, RT2860_RF3, r3);
        run_rt2870_rf_write(sc, RT2860_RF4, r4);

        usbd_delay_ms(sc->sc_udev, 10);

        run_rt2870_rf_write(sc, RT2860_RF1, rfprog[i].r1);
        run_rt2870_rf_write(sc, RT2860_RF2, r2);
        run_rt2870_rf_write(sc, RT2860_RF3, r3 | 1);
        run_rt2870_rf_write(sc, RT2860_RF4, r4);

        usbd_delay_ms(sc->sc_udev, 10);

        run_rt2870_rf_write(sc, RT2860_RF1, rfprog[i].r1);
        run_rt2870_rf_write(sc, RT2860_RF2, r2);
        run_rt2870_rf_write(sc, RT2860_RF3, r3);
        run_rt2870_rf_write(sc, RT2860_RF4, r4);
}

static void
run_rt3070_set_chan(struct run_softc *sc, u_int chan)
{
        int8_t txpow1, txpow2;
        uint8_t rf;
        int i;

        KASSERT(chan >= 1 && chan <= 14);        /* RT3070 is 2GHz only */

        /* find the settings for this channel (we know it exists) */
        for (i = 0; rt2860_rf2850[i].chan != chan; i++)
                continue;

        /* use Tx power values from EEPROM */
        txpow1 = sc->txpow1[i];
        txpow2 = sc->txpow2[i];

        run_rt3070_rf_write(sc, 2, rt3070_freqs[i].n);
        run_rt3070_rf_write(sc, 3, rt3070_freqs[i].k);
        run_rt3070_rf_read(sc, 6, &rf);
        rf = (rf & ~0x03) | rt3070_freqs[i].r;
        run_rt3070_rf_write(sc, 6, rf);

        /* set Tx0 power */
        run_rt3070_rf_read(sc, 12, &rf);
        rf = (rf & ~0x1f) | txpow1;
        run_rt3070_rf_write(sc, 12, rf);

        /* set Tx1 power */
        run_rt3070_rf_read(sc, 13, &rf);
        rf = (rf & ~0x1f) | txpow2;
        run_rt3070_rf_write(sc, 13, rf);

        run_rt3070_rf_read(sc, 1, &rf);
        rf &= ~0xfc;
        if (sc->ntxchains == 1)
                rf |= 1 << 7 | 1 << 5;        /* 1T: disable Tx chains 2 & 3 */
        else if (sc->ntxchains == 2)
                rf |= 1 << 7;                /* 2T: disable Tx chain 3 */
        if (sc->nrxchains == 1)
                rf |= 1 << 6 | 1 << 4;        /* 1R: disable Rx chains 2 & 3 */
        else if (sc->nrxchains == 2)
                rf |= 1 << 6;                /* 2R: disable Rx chain 3 */
        run_rt3070_rf_write(sc, 1, rf);

        /* set RF offset */
        run_rt3070_rf_read(sc, 23, &rf);
        rf = (rf & ~0x7f) | sc->freq;
        run_rt3070_rf_write(sc, 23, rf);

        /* program RF filter */
        run_rt3070_rf_read(sc, 24, &rf);        /* Tx */
        rf = (rf & ~0x3f) | sc->rf24_20mhz;
        run_rt3070_rf_write(sc, 24, rf);
        run_rt3070_rf_read(sc, 31, &rf);        /* Rx */
        rf = (rf & ~0x3f) | sc->rf24_20mhz;
        run_rt3070_rf_write(sc, 31, rf);

        /* enable RF tuning */
        run_rt3070_rf_read(sc, 7, &rf);
        run_rt3070_rf_write(sc, 7, rf | 0x01);
}

static void
run_rt3572_set_chan(struct run_softc *sc, u_int chan)
{
        int8_t txpow1, txpow2;
        uint32_t tmp;
        uint8_t rf;
        int i;

        /* find the settings for this channel (we know it exists) */
        for (i = 0; rt2860_rf2850[i].chan != chan; i++);

        /* use Tx power values from EEPROM */
        txpow1 = sc->txpow1[i];
        txpow2 = sc->txpow2[i];

        if (chan <= 14) {
                run_bbp_write(sc, 25, sc->bbp25);
                run_bbp_write(sc, 26, sc->bbp26);
        } else {
                /* enable IQ phase correction */
                run_bbp_write(sc, 25, 0x09);
                run_bbp_write(sc, 26, 0xff);
        }

        run_rt3070_rf_write(sc, 2, rt3070_freqs[i].n);
        run_rt3070_rf_write(sc, 3, rt3070_freqs[i].k);
        run_rt3070_rf_read(sc, 6, &rf);
        rf  = (rf & ~0x0f) | rt3070_freqs[i].r;
        rf |= (chan <= 14) ? 0x08 : 0x04;
        run_rt3070_rf_write(sc, 6, rf);

        /* set PLL mode */
        run_rt3070_rf_read(sc, 5, &rf);
        rf &= ~(0x08 | 0x04);
        rf |= (chan <= 14) ? 0x04 : 0x08;
        run_rt3070_rf_write(sc, 5, rf);

        /* set Tx power for chain 0 */
        if (chan <= 14)
                rf = 0x60 | txpow1;
        else
                rf = 0xe0 | (txpow1 & 0xc) << 1 | (txpow1 & 0x3);
        run_rt3070_rf_write(sc, 12, rf);

        /* set Tx power for chain 1 */
        if (chan <= 14)
                rf = 0x60 | txpow2;
        else
                rf = 0xe0 | (txpow2 & 0xc) << 1 | (txpow2 & 0x3);
        run_rt3070_rf_write(sc, 13, rf);

        /* set Tx/Rx streams */
        run_rt3070_rf_read(sc, 1, &rf);
        rf &= ~0xfc;
        if (sc->ntxchains == 1)
                rf |= 1 << 7 | 1 << 5;        /* 1T: disable Tx chains 2 & 3 */
        else if (sc->ntxchains == 2)
                rf |= 1 << 7;                /* 2T: disable Tx chain 3 */
        if (sc->nrxchains == 1)
                rf |= 1 << 6 | 1 << 4;        /* 1R: disable Rx chains 2 & 3 */
        else if (sc->nrxchains == 2)
                rf |= 1 << 6;                /* 2R: disable Rx chain 3 */
        run_rt3070_rf_write(sc, 1, rf);

        /* set RF offset */
        run_rt3070_rf_read(sc, 23, &rf);
        rf = (rf & ~0x7f) | sc->freq;
        run_rt3070_rf_write(sc, 23, rf);

        /* program RF filter */
        rf = sc->rf24_20mhz;
        run_rt3070_rf_write(sc, 24, rf);        /* Tx */
        run_rt3070_rf_write(sc, 31, rf);        /* Rx */

        /* enable RF tuning */
        run_rt3070_rf_read(sc, 7, &rf);
        rf = (chan <= 14) ? 0xd8 : ((rf & ~0xc8) | 0x14);
        run_rt3070_rf_write(sc, 7, rf);

        /* TSSI */
        rf = (chan <= 14) ? 0xc3 : 0xc0;
        run_rt3070_rf_write(sc, 9, rf);

        /* set loop filter 1 */
        run_rt3070_rf_write(sc, 10, 0xf1);
        /* set loop filter 2 */
        run_rt3070_rf_write(sc, 11, (chan <= 14) ? 0xb9 : 0x00);

        /* set tx_mx2_ic */
        run_rt3070_rf_write(sc, 15, (chan <= 14) ? 0x53 : 0x43);
        /* set tx_mx1_ic */
        if (chan <= 14)
                rf = 0x48 | sc->txmixgain_2ghz;
        else
                rf = 0x78 | sc->txmixgain_5ghz;
        run_rt3070_rf_write(sc, 16, rf);

        /* set tx_lo1 */
        run_rt3070_rf_write(sc, 17, 0x23);
        /* set tx_lo2 */
        if (chan <= 14)
                rf = 0x93;
        else if (chan <= 64)
                rf = 0xb7;
        else if (chan <= 128)
                rf = 0x74;
        else
                rf = 0x72;
        run_rt3070_rf_write(sc, 19, rf);

        /* set rx_lo1 */
        if (chan <= 14)
                rf = 0xb3;
        else if (chan <= 64)
                rf = 0xf6;
        else if (chan <= 128)
                rf = 0xf4;
        else
                rf = 0xf3;
        run_rt3070_rf_write(sc, 20, rf);

        /* set pfd_delay */
        if (chan <= 14)
                rf = 0x15;
        else if (chan <= 64)
                rf = 0x3d;
        else
                rf = 0x01;
        run_rt3070_rf_write(sc, 25, rf);

        /* set rx_lo2 */
        run_rt3070_rf_write(sc, 26, (chan <= 14) ? 0x85 : 0x87);
        /* set ldo_rf_vc */
        run_rt3070_rf_write(sc, 27, (chan <= 14) ? 0x00 : 0x01);
        /* set drv_cc */
        run_rt3070_rf_write(sc, 29, (chan <= 14) ? 0x9b : 0x9f);

        run_read(sc, RT2860_GPIO_CTRL, &tmp);
        tmp &= ~0x8080;
        if (chan <= 14)
                tmp |= 0x80;
        run_write(sc, RT2860_GPIO_CTRL, tmp);

        /* enable RF tuning */
        run_rt3070_rf_read(sc, 7, &rf);
        run_rt3070_rf_write(sc, 7, rf | 0x01);

        usbd_delay_ms(sc->sc_udev, 2);
}

static void
run_rt3593_set_chan(struct run_softc *sc, u_int chan)
{
        int8_t txpow1, txpow2, txpow3;
        uint8_t h20mhz, rf;
        int i;

        /* find the settings for this channel (we know it exists) */
        for (i = 0; rt2860_rf2850[i].chan != chan; i++);

        /* use Tx power values from EEPROM */
        txpow1 = sc->txpow1[i];
        txpow2 = sc->txpow2[i];
        txpow3 = (sc->ntxchains == 3) ? sc->txpow3[i] : 0;

        if (chan <= 14) {
                run_bbp_write(sc, 25, sc->bbp25);
                run_bbp_write(sc, 26, sc->bbp26);
        } else {
                /* Enable IQ phase correction. */
                run_bbp_write(sc, 25, 0x09);
                run_bbp_write(sc, 26, 0xff);
        }

        run_rt3070_rf_write(sc, 8, rt3070_freqs[i].n);
        run_rt3070_rf_write(sc, 9, rt3070_freqs[i].k & 0x0f);
        run_rt3070_rf_read(sc, 11, &rf);
        rf = (rf & ~0x03) | (rt3070_freqs[i].r & 0x03);
        run_rt3070_rf_write(sc, 11, rf);

        /* Set pll_idoh. */
        run_rt3070_rf_read(sc, 11, &rf);
        rf &= ~0x4c;
        rf |= (chan <= 14) ? 0x44 : 0x48;
        run_rt3070_rf_write(sc, 11, rf);

        if (chan <= 14)
                rf = txpow1 & 0x1f;
        else
                rf = 0x40 | ((txpow1 & 0x18) << 1) | (txpow1 & 0x07);
        run_rt3070_rf_write(sc, 53, rf);

        if (chan <= 14)
                rf = txpow2 & 0x1f;
        else
                rf = 0x40 | ((txpow2 & 0x18) << 1) | (txpow2 & 0x07);
        run_rt3070_rf_write(sc, 55, rf);

        if (chan <= 14)
                rf = txpow3 & 0x1f;
        else
                rf = 0x40 | ((txpow3 & 0x18) << 1) | (txpow3 & 0x07);
        run_rt3070_rf_write(sc, 54, rf);

        rf = RT3070_RF_BLOCK | RT3070_PLL_PD;
        if (sc->ntxchains == 3)
                rf |= RT3070_TX0_PD | RT3070_TX1_PD | RT3070_TX2_PD;
        else
                rf |= RT3070_TX0_PD | RT3070_TX1_PD;
        rf |= RT3070_RX0_PD | RT3070_RX1_PD | RT3070_RX2_PD;
        run_rt3070_rf_write(sc, 1, rf);

        run_adjust_freq_offset(sc);

        run_rt3070_rf_write(sc, 31, (chan <= 14) ? 0xa0 : 0x80);

        h20mhz = (sc->rf24_20mhz & 0x20) >> 5;
        run_rt3070_rf_read(sc, 30, &rf);
        rf = (rf & ~0x06) | (h20mhz << 1) | (h20mhz << 2);
        run_rt3070_rf_write(sc, 30, rf);

        run_rt3070_rf_read(sc, 36, &rf);
        if (chan <= 14)
                rf |= 0x80;
        else
                rf &= ~0x80;
        run_rt3070_rf_write(sc, 36, rf);

        /* Set vcolo_bs. */
        run_rt3070_rf_write(sc, 34, (chan <= 14) ? 0x3c : 0x20);
        /* Set pfd_delay. */
        run_rt3070_rf_write(sc, 12, (chan <= 14) ? 0x1a : 0x12);

        /* Set vco bias current control. */
        run_rt3070_rf_read(sc, 6, &rf);
        rf &= ~0xc0;
        if (chan <= 14)
                rf |= 0x40;
        else if (chan <= 128)
                rf |= 0x80;
        else
                rf |= 0x40;
        run_rt3070_rf_write(sc, 6, rf);

        run_rt3070_rf_read(sc, 30, &rf);
        rf = (rf & ~0x18) | 0x10;
        run_rt3070_rf_write(sc, 30, rf);

        run_rt3070_rf_write(sc, 10, (chan <= 14) ? 0xd3 : 0xd8);
        run_rt3070_rf_write(sc, 13, (chan <= 14) ? 0x12 : 0x23);

        run_rt3070_rf_read(sc, 51, &rf);
        rf = (rf & ~0x03) | 0x01;
        run_rt3070_rf_write(sc, 51, rf);
        /* Set tx_mx1_cc. */
        run_rt3070_rf_read(sc, 51, &rf);
        rf &= ~0x1c;
        rf |= (chan <= 14) ? 0x14 : 0x10;
        run_rt3070_rf_write(sc, 51, rf);
        /* Set tx_mx1_ic. */
        run_rt3070_rf_read(sc, 51, &rf);
        rf &= ~0xe0;
        rf |= (chan <= 14) ? 0x60 : 0x40;
        run_rt3070_rf_write(sc, 51, rf);
        /* Set tx_lo1_ic. */
        run_rt3070_rf_read(sc, 49, &rf);
        rf &= ~0x1c;
        rf |= (chan <= 14) ? 0x0c : 0x08;
        run_rt3070_rf_write(sc, 49, rf);
        /* Set tx_lo1_en. */
        run_rt3070_rf_read(sc, 50, &rf);
        run_rt3070_rf_write(sc, 50, rf & ~0x20);
        /* Set drv_cc. */
        run_rt3070_rf_read(sc, 57, &rf);
        rf &= ~0xfc;
        rf |= (chan <= 14) ?  0x6c : 0x3c;
        run_rt3070_rf_write(sc, 57, rf);
        /* Set rx_mix1_ic, rxa_lnactr, lna_vc, lna_inbias_en and lna_en. */
        run_rt3070_rf_write(sc, 44, (chan <= 14) ? 0x93 : 0x9b);
        /* Set drv_gnd_a, tx_vga_cc_a and tx_mx2_gain. */
        run_rt3070_rf_write(sc, 52, (chan <= 14) ? 0x45 : 0x05);
        /* Enable VCO calibration. */
        run_rt3070_rf_read(sc, 3, &rf);
        rf &= ~RT5390_VCOCAL;
        rf |= (chan <= 14) ? RT5390_VCOCAL : 0xbe;
        run_rt3070_rf_write(sc, 3, rf);

        if (chan <= 14)
                rf = 0x23;
        else if (chan <= 64)
                rf = 0x36;
        else if (chan <= 128)
                rf = 0x32;
        else
                rf = 0x30;
        run_rt3070_rf_write(sc, 39, rf);
        if (chan <= 14)
                rf = 0xbb;
        else if (chan <= 64)
                rf = 0xeb;
        else if (chan <= 128)
                rf = 0xb3;
        else
                rf = 0x9b;
        run_rt3070_rf_write(sc, 45, rf);

        /* Set FEQ/AEQ control. */
        run_bbp_write(sc, 105, 0x34);
}

static void
run_rt5390_set_chan(struct run_softc *sc, u_int chan)
{
        int8_t txpow1, txpow2;
        uint8_t rf;
        int i;

        /* find the settings for this channel (we know it exists) */
        for (i = 0; rt2860_rf2850[i].chan != chan; i++);

        /* use Tx power values from EEPROM */
        txpow1 = sc->txpow1[i];
        txpow2 = sc->txpow2[i];

        run_rt3070_rf_write(sc, 8, rt3070_freqs[i].n);
        run_rt3070_rf_write(sc, 9, rt3070_freqs[i].k & 0x0f);
        run_rt3070_rf_read(sc, 11, &rf);
        rf = (rf & ~0x03) | (rt3070_freqs[i].r & 0x03);
        run_rt3070_rf_write(sc, 11, rf);

        run_rt3070_rf_read(sc, 49, &rf);
        rf = (rf & ~0x3f) | (txpow1 & 0x3f);
        /* The valid range of the RF R49 is 0x00 to 0x27. */
        if ((rf & 0x3f) > 0x27)
                rf = (rf & ~0x3f) | 0x27;
        run_rt3070_rf_write(sc, 49, rf);

        if (sc->mac_ver == 0x5392) {
                run_rt3070_rf_read(sc, 50, &rf);
                rf = (rf & ~0x3f) | (txpow2 & 0x3f);
                /* The valid range of the RF R50 is 0x00 to 0x27. */
                if ((rf & 0x3f) > 0x27)
                        rf = (rf & ~0x3f) | 0x27;
                run_rt3070_rf_write(sc, 50, rf);
        }

        run_rt3070_rf_read(sc, 1, &rf);
        rf |= RT3070_RF_BLOCK | RT3070_PLL_PD | RT3070_RX0_PD | RT3070_TX0_PD;
        if (sc->mac_ver == 0x5392)
                rf |= RT3070_RX1_PD | RT3070_TX1_PD;
        run_rt3070_rf_write(sc, 1, rf);

        if (sc->mac_ver != 0x5392) {
                run_rt3070_rf_read(sc, 2, &rf);
                rf |= 0x80;
                run_rt3070_rf_write(sc, 2, rf);
                usbd_delay_ms(sc->sc_udev, 10);
                rf &= 0x7f;
                run_rt3070_rf_write(sc, 2, rf);
        }

        run_adjust_freq_offset(sc);

        if (sc->mac_ver == 0x5392) {
                /* Fix for RT5392C. */
                if (sc->mac_rev >= 0x0223) {
                        if (chan <= 4)
                                rf = 0x0f;
                        else if (chan >= 5 && chan <= 7)
                                rf = 0x0e;
                        else
                                rf = 0x0d;
                        run_rt3070_rf_write(sc, 23, rf);

                        if (chan <= 4)
                                rf = 0x0c;
                        else if (chan == 5)
                                rf = 0x0b;
                        else if (chan >= 6 && chan <= 7)
                                rf = 0x0a;
                        else if (chan >= 8 && chan <= 10)
                                rf = 0x09;
                        else
                                rf = 0x08;
                        run_rt3070_rf_write(sc, 59, rf);
                } else {
                        if (chan <= 11)
                                rf = 0x0f;
                        else
                                rf = 0x0b;
                        run_rt3070_rf_write(sc, 59, rf);
                }
        } else {
                /* Fix for RT5390F. */
                if (sc->mac_rev >= 0x0502) {
                        if (chan <= 11)
                                rf = 0x43;
                        else
                                rf = 0x23;
                        run_rt3070_rf_write(sc, 55, rf);

                        if (chan <= 11)
                                rf = 0x0f;
                        else if (chan == 12)
                                rf = 0x0d;
                        else
                                rf = 0x0b;
                        run_rt3070_rf_write(sc, 59, rf);
                } else {
                        run_rt3070_rf_write(sc, 55, 0x44);
                        run_rt3070_rf_write(sc, 59, 0x8f);
                }
        }

        /* Enable VCO calibration. */
        run_rt3070_rf_read(sc, 3, &rf);
        rf |= RT5390_VCOCAL;
        run_rt3070_rf_write(sc, 3, rf);
}

static void
run_rt5592_set_chan(struct run_softc *sc, u_int chan)
{
        const struct rt5592_freqs *freqs;
        uint32_t tmp;
        uint8_t reg, rf, txpow_bound;
        int8_t txpow1, txpow2;
        int i;

        run_read(sc, RT5592_DEBUG_INDEX, &tmp);
        freqs = (tmp & RT5592_SEL_XTAL) ?
            rt5592_freqs_40mhz : rt5592_freqs_20mhz;

        /* find the settings for this channel (we know it exists) */
        for (i = 0; rt2860_rf2850[i].chan != chan; i++, freqs++);

        /* use Tx power values from EEPROM */
        txpow1 = sc->txpow1[i];
        txpow2 = sc->txpow2[i];

        run_read(sc, RT3070_LDO_CFG0, &tmp);
        tmp &= ~0x1c000000;
        if (chan > 14)
                tmp |= 0x14000000;
        run_write(sc, RT3070_LDO_CFG0, tmp);

        /* N setting. */
        run_rt3070_rf_write(sc, 8, freqs->n & 0xff);
        run_rt3070_rf_read(sc, 9, &rf);
        rf &= ~(1 << 4);
        rf |= ((freqs->n & 0x0100) >> 8) << 4;
        run_rt3070_rf_write(sc, 9, rf);

        /* K setting. */
        run_rt3070_rf_read(sc, 9, &rf);
        rf &= ~0x0f;
        rf |= (freqs->k & 0x0f);
        run_rt3070_rf_write(sc, 9, rf);

        /* Mode setting. */
        run_rt3070_rf_read(sc, 11, &rf);
        rf &= ~0x0c;
        rf |= ((freqs->m - 0x8) & 0x3) << 2;
        run_rt3070_rf_write(sc, 11, rf);
        run_rt3070_rf_read(sc, 9, &rf);
        rf &= ~(1 << 7);
        rf |= (((freqs->m - 0x8) & 0x4) >> 2) << 7;
        run_rt3070_rf_write(sc, 9, rf);

        /* R setting. */
        run_rt3070_rf_read(sc, 11, &rf);
        rf &= ~0x03;
        rf |= (freqs->r - 0x1);
        run_rt3070_rf_write(sc, 11, rf);

        if (chan <= 14) {
                /* Initialize RF registers for 2GHZ. */
                for (i = 0; i < (int)__arraycount(rt5592_2ghz_def_rf); i++) {
                        run_rt3070_rf_write(sc, rt5592_2ghz_def_rf[i].reg,
                            rt5592_2ghz_def_rf[i].val);
                }

                rf = (chan <= 10) ? 0x07 : 0x06;
                run_rt3070_rf_write(sc, 23, rf);
                run_rt3070_rf_write(sc, 59, rf);

                run_rt3070_rf_write(sc, 55, 0x43);

                /*
                 * RF R49/R50 Tx power ALC code.
                 * G-band bit<7:6>=1:0, bit<5:0> range from 0x0 ~ 0x27.
                 */
                reg = 2;
                txpow_bound = 0x27;
        } else {
                /* Initialize RF registers for 5GHZ. */
                for (i = 0; i < (int)__arraycount(rt5592_5ghz_def_rf); i++) {
                        run_rt3070_rf_write(sc, rt5592_5ghz_def_rf[i].reg,
                            rt5592_5ghz_def_rf[i].val);
                }
                for (i = 0; i < (int)__arraycount(rt5592_chan_5ghz); i++) {
                        if (chan >= rt5592_chan_5ghz[i].firstchan &&
                            chan <= rt5592_chan_5ghz[i].lastchan) {
                                run_rt3070_rf_write(sc, rt5592_chan_5ghz[i].reg,
                                    rt5592_chan_5ghz[i].val);
                        }
                }

                /*
                 * RF R49/R50 Tx power ALC code.
                 * A-band bit<7:6>=1:1, bit<5:0> range from 0x0 ~ 0x2b.
                 */
                reg = 3;
                txpow_bound = 0x2b;
        }

        /* RF R49 ch0 Tx power ALC code. */
        run_rt3070_rf_read(sc, 49, &rf);
        rf &= ~0xc0;
        rf |= (reg << 6);
        rf = (rf & ~0x3f) | (txpow1 & 0x3f);
        if ((rf & 0x3f) > txpow_bound)
                rf = (rf & ~0x3f) | txpow_bound;
        run_rt3070_rf_write(sc, 49, rf);

        /* RF R50 ch1 Tx power ALC code. */
        run_rt3070_rf_read(sc, 50, &rf);
        rf &= ~(1 << 7 | 1 << 6);
        rf |= (reg << 6);
        rf = (rf & ~0x3f) | (txpow2 & 0x3f);
        if ((rf & 0x3f) > txpow_bound)
                rf = (rf & ~0x3f) | txpow_bound;
        run_rt3070_rf_write(sc, 50, rf);

        /* Enable RF_BLOCK, PLL_PD, RX0_PD, and TX0_PD. */
        run_rt3070_rf_read(sc, 1, &rf);
        rf |= (RT3070_RF_BLOCK | RT3070_PLL_PD | RT3070_RX0_PD | RT3070_TX0_PD);
        if (sc->ntxchains > 1)
                rf |= RT3070_TX1_PD;
        if (sc->nrxchains > 1)
                rf |= RT3070_RX1_PD;
        run_rt3070_rf_write(sc, 1, rf);

        run_rt3070_rf_write(sc, 6, 0xe4);

        run_rt3070_rf_write(sc, 30, 0x10);
        run_rt3070_rf_write(sc, 31, 0x80);
        run_rt3070_rf_write(sc, 32, 0x80);

        run_adjust_freq_offset(sc);

        /* Enable VCO calibration. */
        run_rt3070_rf_read(sc, 3, &rf);
        rf |= RT5390_VCOCAL;
        run_rt3070_rf_write(sc, 3, rf);
}

static void
run_iq_calib(struct run_softc *sc, u_int chan)
{
        uint16_t val;

        /* Tx0 IQ gain. */
        run_bbp_write(sc, 158, 0x2c);
        if (chan <= 14)
                run_efuse_read(sc, RT5390_EEPROM_IQ_GAIN_CAL_TX0_2GHZ, &val, 1);
        else if (chan <= 64) {
                run_efuse_read(sc,
                    RT5390_EEPROM_IQ_GAIN_CAL_TX0_CH36_TO_CH64_5GHZ,
                    &val, 1);
        } else if (chan <= 138) {
                run_efuse_read(sc,
                    RT5390_EEPROM_IQ_GAIN_CAL_TX0_CH100_TO_CH138_5GHZ,
                    &val, 1);
        } else if (chan <= 165) {
                run_efuse_read(sc,
            RT5390_EEPROM_IQ_GAIN_CAL_TX0_CH140_TO_CH165_5GHZ,
                    &val, 1);
        } else
                val = 0;
        run_bbp_write(sc, 159, val);

        /* Tx0 IQ phase. */
        run_bbp_write(sc, 158, 0x2d);
        if (chan <= 14) {
                run_efuse_read(sc, RT5390_EEPROM_IQ_PHASE_CAL_TX0_2GHZ,
                    &val, 1);
        } else if (chan <= 64) {
                run_efuse_read(sc,
                    RT5390_EEPROM_IQ_PHASE_CAL_TX0_CH36_TO_CH64_5GHZ,
                    &val, 1);
        } else if (chan <= 138) {
                run_efuse_read(sc,
                    RT5390_EEPROM_IQ_PHASE_CAL_TX0_CH100_TO_CH138_5GHZ,
                    &val, 1);
        } else if (chan <= 165) {
                run_efuse_read(sc,
                    RT5390_EEPROM_IQ_PHASE_CAL_TX0_CH140_TO_CH165_5GHZ,
                    &val, 1);
        } else
                val = 0;
        run_bbp_write(sc, 159, val);

        /* Tx1 IQ gain. */
        run_bbp_write(sc, 158, 0x4a);
        if (chan <= 14) {
                run_efuse_read(sc, RT5390_EEPROM_IQ_GAIN_CAL_TX1_2GHZ,
                    &val, 1);
        } else if (chan <= 64) {
                run_efuse_read(sc,
                    RT5390_EEPROM_IQ_GAIN_CAL_TX1_CH36_TO_CH64_5GHZ,
                    &val, 1);
        } else if (chan <= 138) {
                run_efuse_read(sc,
                    RT5390_EEPROM_IQ_GAIN_CAL_TX1_CH100_TO_CH138_5GHZ,
                    &val, 1);
        } else if (chan <= 165) {
                run_efuse_read(sc,
                    RT5390_EEPROM_IQ_GAIN_CAL_TX1_CH140_TO_CH165_5GHZ,
                    &val, 1);
        } else
                val = 0;
        run_bbp_write(sc, 159, val);

        /* Tx1 IQ phase. */
        run_bbp_write(sc, 158, 0x4b);
        if (chan <= 14) {
                run_efuse_read(sc, RT5390_EEPROM_IQ_PHASE_CAL_TX1_2GHZ,
                    &val, 1);
        } else if (chan <= 64) {
                run_efuse_read(sc,
                    RT5390_EEPROM_IQ_PHASE_CAL_TX1_CH36_TO_CH64_5GHZ,
                    &val, 1);
        } else if (chan <= 138) {
                run_efuse_read(sc,
                    RT5390_EEPROM_IQ_PHASE_CAL_TX1_CH100_TO_CH138_5GHZ,
                    &val, 1);
        } else if (chan <= 165) {
                run_efuse_read(sc,
                    RT5390_EEPROM_IQ_PHASE_CAL_TX1_CH140_TO_CH165_5GHZ,
                    &val, 1);
        } else
                val = 0;
        run_bbp_write(sc, 159, val);

        /* RF IQ compensation control. */
        run_bbp_write(sc, 158, 0x04);
        run_efuse_read(sc, RT5390_EEPROM_RF_IQ_COMPENSATION_CTL,
            &val, 1);
        run_bbp_write(sc, 159, val);

        /* RF IQ imbalance compensation control. */
        run_bbp_write(sc, 158, 0x03);
        run_efuse_read(sc,
            RT5390_EEPROM_RF_IQ_IMBALANCE_COMPENSATION_CTL, &val, 1);
        run_bbp_write(sc, 159, val);
}

static void
run_set_agc(struct run_softc *sc, uint8_t agc)
{
        uint8_t bbp;

        if (sc->mac_ver == 0x3572) {
                run_bbp_read(sc, 27, &bbp);
                bbp &= ~(0x3 << 5);
                run_bbp_write(sc, 27, bbp | 0 << 5);        /* select Rx0 */
                run_bbp_write(sc, 66, agc);
                run_bbp_write(sc, 27, bbp | 1 << 5);        /* select Rx1 */
                run_bbp_write(sc, 66, agc);
        } else
                run_bbp_write(sc, 66, agc);
}

static void
run_set_rx_antenna(struct run_softc *sc, int aux)
{
        uint32_t tmp;
        uint8_t bbp152;

        if (aux) {
                if (sc->rf_rev == RT5390_RF_5370) {
                        run_bbp_read(sc, 152, &bbp152);
                        bbp152 &= ~0x80;
                        run_bbp_write(sc, 152, bbp152);
                } else {
                        run_mcu_cmd(sc, RT2860_MCU_CMD_ANTSEL, 0);
                        run_read(sc, RT2860_GPIO_CTRL, &tmp);
                        tmp &= ~0x0808;
                        tmp |= 0x08;
                        run_write(sc, RT2860_GPIO_CTRL, tmp);
                }
        } else {
                if (sc->rf_rev == RT5390_RF_5370) {
                        run_bbp_read(sc, 152, &bbp152);
                        bbp152 |= 0x80;
                        run_bbp_write(sc, 152, bbp152);
                } else {
                        run_mcu_cmd(sc, RT2860_MCU_CMD_ANTSEL, !aux);
                        run_read(sc, RT2860_GPIO_CTRL, &tmp);
                        tmp &= ~0x0808;
                        run_write(sc, RT2860_GPIO_CTRL, tmp);
                }
        }
}

static int
run_set_chan(struct run_softc *sc, struct ieee80211_channel *c)
{
        struct ieee80211com *ic = &sc->sc_ic;
        u_int chan, group;

        chan = ieee80211_chan2ieee(ic, c);
        if (chan == 0 || chan == IEEE80211_CHAN_ANY)
                return EINVAL;

        if (sc->mac_ver == 0x5592)
                run_rt5592_set_chan(sc, chan);
        else if (sc->mac_ver >= 0x5390)
                run_rt5390_set_chan(sc, chan);
        else if (sc->mac_ver == 0x3593)
                run_rt3593_set_chan(sc, chan);
        else if (sc->mac_ver == 0x3572)
                run_rt3572_set_chan(sc, chan);
        else if (sc->mac_ver >= 0x3070)
                run_rt3070_set_chan(sc, chan);
        else
                run_rt2870_set_chan(sc, chan);

        /* determine channel group */
        if (chan <= 14)
                group = 0;
        else if (chan <= 64)
                group = 1;
        else if (chan <= 128)
                group = 2;
        else
                group = 3;

        /* XXX necessary only when group has changed! */
        run_select_chan_group(sc, group);

        usbd_delay_ms(sc->sc_udev, 10);

        /* Perform IQ calibration. */
        if (sc->mac_ver >= 0x5392)
                run_iq_calib(sc, chan);

        return 0;
}

static void
run_updateprot(struct run_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        uint32_t tmp;

        tmp = RT2860_RTSTH_EN | RT2860_PROT_NAV_SHORT | RT2860_TXOP_ALLOW_ALL;
        /* setup protection frame rate (MCS code) */
        tmp |= (ic->ic_curmode == IEEE80211_MODE_11A) ?
            rt2860_rates[RT2860_RIDX_OFDM6].mcs | RT2860_PHY_OFDM :
            rt2860_rates[RT2860_RIDX_CCK11].mcs;

        /* CCK frames don't require protection */
        run_write(sc, RT2860_CCK_PROT_CFG, tmp);
        if (ic->ic_flags & IEEE80211_F_USEPROT) {
                if (ic->ic_protmode == IEEE80211_PROT_RTSCTS)
                        tmp |= RT2860_PROT_CTRL_RTS_CTS;
                else if (ic->ic_protmode == IEEE80211_PROT_CTSONLY)
                        tmp |= RT2860_PROT_CTRL_CTS;
        }
        run_write(sc, RT2860_OFDM_PROT_CFG, tmp);
}

static void
run_enable_tsf_sync(struct run_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        uint32_t tmp;

        run_read(sc, RT2860_BCN_TIME_CFG, &tmp);
        tmp &= ~0x1fffff;
        tmp |= ic->ic_bss->ni_intval * 16;
        tmp |= RT2860_TSF_TIMER_EN | RT2860_TBTT_TIMER_EN;
        if (ic->ic_opmode == IEEE80211_M_STA) {
                /*
                 * Local TSF is always updated with remote TSF on beacon
                 * reception.
                 */
                tmp |= 1 << RT2860_TSF_SYNC_MODE_SHIFT;
        }
#ifndef IEEE80211_STA_ONLY
        else if (ic->ic_opmode == IEEE80211_M_IBSS) {
                tmp |= RT2860_BCN_TX_EN;
                /*
                 * Local TSF is updated with remote TSF on beacon reception
                 * only if the remote TSF is greater than local TSF.
                 */
                tmp |= 2 << RT2860_TSF_SYNC_MODE_SHIFT;
        } else if (ic->ic_opmode == IEEE80211_M_HOSTAP) {
                tmp |= RT2860_BCN_TX_EN;
                /* SYNC with nobody */
                tmp |= 3 << RT2860_TSF_SYNC_MODE_SHIFT;
        }
#endif
        run_write(sc, RT2860_BCN_TIME_CFG, tmp);
}

static void
run_enable_mrr(struct run_softc *sc)
{
#define CCK(mcs)        (mcs)
#define OFDM(mcs)        (1 << 3 | (mcs))
        run_write(sc, RT2860_LG_FBK_CFG0,
            OFDM(6) << 28 |        /* 54->48 */
            OFDM(5) << 24 |        /* 48->36 */
            OFDM(4) << 20 |        /* 36->24 */
            OFDM(3) << 16 |        /* 24->18 */
            OFDM(2) << 12 |        /* 18->12 */
            OFDM(1) <<  8 |        /* 12-> 9 */
            OFDM(0) <<  4 |        /*  9-> 6 */
            OFDM(0));                /*  6-> 6 */

        run_write(sc, RT2860_LG_FBK_CFG1,
            CCK(2) << 12 |        /* 11->5.5 */
            CCK(1) <<  8 |        /* 5.5-> 2 */
            CCK(0) <<  4 |        /*   2-> 1 */
            CCK(0));                /*   1-> 1 */
#undef OFDM
#undef CCK
}

static void
run_set_txpreamble(struct run_softc *sc)
{
        uint32_t tmp;

        run_read(sc, RT2860_AUTO_RSP_CFG, &tmp);
        if (sc->sc_ic.ic_flags & IEEE80211_F_SHPREAMBLE)
                tmp |= RT2860_CCK_SHORT_EN;
        else
                tmp &= ~RT2860_CCK_SHORT_EN;
        run_write(sc, RT2860_AUTO_RSP_CFG, tmp);
}

static void
run_set_basicrates(struct run_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;

        /* set basic rates mask */
        if (ic->ic_curmode == IEEE80211_MODE_11B)
                run_write(sc, RT2860_LEGACY_BASIC_RATE, 0x003);
        else if (ic->ic_curmode == IEEE80211_MODE_11A)
                run_write(sc, RT2860_LEGACY_BASIC_RATE, 0x150);
        else        /* 11g */
                run_write(sc, RT2860_LEGACY_BASIC_RATE, 0x15f);
}

static void
run_set_leds(struct run_softc *sc, uint16_t which)
{

        (void)run_mcu_cmd(sc, RT2860_MCU_CMD_LEDS,
            which | (sc->leds & 0x7f));
}

static void
run_set_bssid(struct run_softc *sc, const uint8_t *bssid)
{

        run_write(sc, RT2860_MAC_BSSID_DW0,
            bssid[0] | bssid[1] << 8 | bssid[2] << 16 | bssid[3] << 24);
        run_write(sc, RT2860_MAC_BSSID_DW1,
            bssid[4] | bssid[5] << 8);
}

static void
run_set_macaddr(struct run_softc *sc, const uint8_t *addr)
{

        run_write(sc, RT2860_MAC_ADDR_DW0,
            addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24);
        run_write(sc, RT2860_MAC_ADDR_DW1,
            addr[4] | addr[5] << 8 | 0xff << 16);
}

static void
run_updateslot(struct ifnet *ifp)
{

        /* do it in a process context */
        run_do_async(ifp->if_softc, run_updateslot_cb, NULL, 0);
}

/* ARGSUSED */
static void
run_updateslot_cb(struct run_softc *sc, void *arg)
{
        uint32_t tmp;

        run_read(sc, RT2860_BKOFF_SLOT_CFG, &tmp);
        tmp &= ~0xff;
        tmp |= (sc->sc_ic.ic_flags & IEEE80211_F_SHSLOT) ? 9 : 20;
        run_write(sc, RT2860_BKOFF_SLOT_CFG, tmp);
}

static int8_t
run_rssi2dbm(struct run_softc *sc, uint8_t rssi, uint8_t rxchain)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_channel *c = ic->ic_curchan;
        int delta;

        if (IEEE80211_IS_CHAN_5GHZ(c)) {
                u_int chan = ieee80211_chan2ieee(ic, c);
                delta = sc->rssi_5ghz[rxchain];

                /* determine channel group */
                if (chan <= 64)
                        delta -= sc->lna[1];
                else if (chan <= 128)
                        delta -= sc->lna[2];
                else
                        delta -= sc->lna[3];
        } else
                delta = sc->rssi_2ghz[rxchain] - sc->lna[0];

        return -12 - delta - rssi;
}

static void
run_rt5390_bbp_init(struct run_softc *sc)
{
        u_int i;
        uint8_t bbp;

        /* Apply maximum likelihood detection for 2 stream case. */
        run_bbp_read(sc, 105, &bbp);
        if (sc->nrxchains > 1)
                run_bbp_write(sc, 105, bbp | RT5390_MLD);

        /* Avoid data lost and CRC error. */
        run_bbp_read(sc, 4, &bbp);
        run_bbp_write(sc, 4, bbp | RT5390_MAC_IF_CTRL);

        if (sc->mac_ver == 0x5592) {
                for (i = 0; i < (int)__arraycount(rt5592_def_bbp); i++) {
                        run_bbp_write(sc, rt5592_def_bbp[i].reg,
                            rt5592_def_bbp[i].val);
                }
                for (i = 0; i < (int)__arraycount(rt5592_bbp_r196); i++) {
                        run_bbp_write(sc, 195, i + 0x80);
                        run_bbp_write(sc, 196, rt5592_bbp_r196[i]);
                }
        } else {
                for (i = 0; i < (int)__arraycount(rt5390_def_bbp); i++) {
                        run_bbp_write(sc, rt5390_def_bbp[i].reg,
                            rt5390_def_bbp[i].val);
                }
        }
        if (sc->mac_ver == 0x5392) {
                run_bbp_write(sc, 88, 0x90);
                run_bbp_write(sc, 95, 0x9a);
                run_bbp_write(sc, 98, 0x12);
                run_bbp_write(sc, 106, 0x12);
                run_bbp_write(sc, 134, 0xd0);
                run_bbp_write(sc, 135, 0xf6);
                run_bbp_write(sc, 148, 0x84);
        }

        run_bbp_read(sc, 152, &bbp);
        run_bbp_write(sc, 152, bbp | 0x80);

        /* Fix BBP254 for RT5592C. */
        if (sc->mac_ver == 0x5592 && sc->mac_rev >= 0x0221) {
                run_bbp_read(sc, 254, &bbp);
                run_bbp_write(sc, 254, bbp | 0x80);
        }

        /* Disable hardware antenna diversity. */
        if (sc->mac_ver == 0x5390)
                run_bbp_write(sc, 154, 0);

        /* Initialize Rx CCK/OFDM frequency offset report. */
        run_bbp_write(sc, 142, 1);
        run_bbp_write(sc, 143, 57);
}

static int
run_bbp_init(struct run_softc *sc)
{
        int i, error, ntries;
        uint8_t bbp0;

        /* wait for BBP to wake up */
        for (ntries = 0; ntries < 20; ntries++) {
                if ((error = run_bbp_read(sc, 0, &bbp0)) != 0)
                        return error;
                if (bbp0 != 0 && bbp0 != 0xff)
                        break;
        }
        if (ntries == 20)
                return ETIMEDOUT;

        /* initialize BBP registers to default values */
        if (sc->mac_ver >= 0x5390)
                run_rt5390_bbp_init(sc);
        else {
                for (i = 0; i < (int)__arraycount(rt2860_def_bbp); i++) {
                        run_bbp_write(sc, rt2860_def_bbp[i].reg,
                            rt2860_def_bbp[i].val);
                }
        }

        if (sc->mac_ver == 0x3593) {
                run_bbp_write(sc, 79, 0x13);
                run_bbp_write(sc, 80, 0x05);
                run_bbp_write(sc, 81, 0x33);
                run_bbp_write(sc, 86, 0x46);
                run_bbp_write(sc, 137, 0x0f);
        }

        /* fix BBP84 for RT2860E */
        if (sc->mac_ver == 0x2860 && sc->mac_rev != 0x0101)
                run_bbp_write(sc, 84, 0x19);

        if (sc->mac_ver >= 0x3070 && (sc->mac_ver != 0x3593 &&
            sc->mac_ver != 0x5592)) {
                run_bbp_write(sc, 79, 0x13);
                run_bbp_write(sc, 80, 0x05);
                run_bbp_write(sc, 81, 0x33);
        } else if (sc->mac_ver == 0x2860 && sc->mac_rev == 0x0100) {
                run_bbp_write(sc, 69, 0x16);
                run_bbp_write(sc, 73, 0x12);
        }
        return 0;
}

static int
run_rt3070_rf_init(struct run_softc *sc)
{
        uint32_t tmp;
        uint8_t rf, target, bbp4;
        int i;

        run_rt3070_rf_read(sc, 30, &rf);
        /* toggle RF R30 bit 7 */
        run_rt3070_rf_write(sc, 30, rf | 0x80);
        usbd_delay_ms(sc->sc_udev, 10);
        run_rt3070_rf_write(sc, 30, rf & ~0x80);

        /* initialize RF registers to default value */
        if (sc->mac_ver == 0x3572) {
                for (i = 0; i < (int)__arraycount(rt3572_def_rf); i++) {
                        run_rt3070_rf_write(sc, rt3572_def_rf[i].reg,
                            rt3572_def_rf[i].val);
                }
        } else {
                for (i = 0; i < (int)__arraycount(rt3070_def_rf); i++) {
                        run_rt3070_rf_write(sc, rt3070_def_rf[i].reg,
                            rt3070_def_rf[i].val);
                }
        }
        if (sc->mac_ver == 0x3572) {
                run_rt3070_rf_read(sc, 6, &rf);
                run_rt3070_rf_write(sc, 6, rf | 0x40);
                run_rt3070_rf_write(sc, 31, 0x14);

                run_read(sc, RT3070_LDO_CFG0, &tmp);
                tmp &= ~0x1f000000;
                if (sc->mac_rev < 0x0211 && sc->patch_dac)
                        tmp |= 0x0d000000;        /* 1.3V */
                else
                        tmp |= 0x01000000;        /* 1.2V */
                run_write(sc, RT3070_LDO_CFG0, tmp);
        } else if (sc->mac_ver == 0x3071) {
                run_rt3070_rf_read(sc, 6, &rf);
                run_rt3070_rf_write(sc, 6, rf | 0x40);
                run_rt3070_rf_write(sc, 31, 0x14);

                run_read(sc, RT3070_LDO_CFG0, &tmp);
                tmp &= ~0x1f000000;
                if (sc->mac_rev < 0x0211)
                        tmp |= 0x0d000000;        /* 1.35V */
                else
                        tmp |= 0x01000000;        /* 1.2V */
                run_write(sc, RT3070_LDO_CFG0, tmp);

                /* patch LNA_PE_G1 */
                run_read(sc, RT3070_GPIO_SWITCH, &tmp);
                run_write(sc, RT3070_GPIO_SWITCH, tmp & ~0x20);
        } else if (sc->mac_ver == 0x3070 && sc->mac_rev < 0x0201) {
                /*
                 * Change voltage from 1.2V to 1.35V for RT3070.
                 * The DAC issue (RT3070_LD0_CFG0) has been fixed
                 * in RT3070(F).
                 */
                run_read(sc, RT3070_LDO_CFG0, &tmp);
                tmp = (tmp & ~0x0f000000) | 0x0d000000;
                run_write(sc, RT3070_LDO_CFG0, tmp);
        }

        /* select 20MHz bandwidth */
        run_rt3070_rf_read(sc, 31, &rf);
        run_rt3070_rf_write(sc, 31, rf & ~0x20);

        /* calibrate filter for 20MHz bandwidth */
        sc->rf24_20mhz = 0x1f;        /* default value */
        target = (sc->mac_ver < 0x3071) ? 0x16 : 0x13;
        run_rt3070_filter_calib(sc, 0x07, target, &sc->rf24_20mhz);

        /* select 40MHz bandwidth */
        run_bbp_read(sc, 4, &bbp4);
        run_bbp_write(sc, 4, (bbp4 & ~0x08) | 0x10);
        run_rt3070_rf_read(sc, 31, &rf);
        run_rt3070_rf_write(sc, 31, rf | 0x20);

        /* calibrate filter for 40MHz bandwidth */
        sc->rf24_40mhz = 0x2f;        /* default value */
        target = (sc->mac_ver < 0x3071) ? 0x19 : 0x15;
        run_rt3070_filter_calib(sc, 0x27, target, &sc->rf24_40mhz);

        /* go back to 20MHz bandwidth */
        run_bbp_read(sc, 4, &bbp4);
        run_bbp_write(sc, 4, bbp4 & ~0x18);

        if (sc->mac_ver == 0x3572) {
                /* save default BBP registers 25 and 26 values */
                run_bbp_read(sc, 25, &sc->bbp25);
                run_bbp_read(sc, 26, &sc->bbp26);
        } else if (sc->mac_rev < 0x0211)
                run_rt3070_rf_write(sc, 27, 0x03);

        run_read(sc, RT3070_OPT_14, &tmp);
        run_write(sc, RT3070_OPT_14, tmp | 1);

        if (sc->mac_ver == 0x3070 || sc->mac_ver == 0x3071) {
                run_rt3070_rf_read(sc, 17, &rf);
                rf &= ~RT3070_TX_LO1;
                if ((sc->mac_ver == 0x3070 ||
                     (sc->mac_ver == 0x3071 && sc->mac_rev >= 0x0211)) &&
                    !sc->ext_2ghz_lna)
                        rf |= 0x20;        /* fix for long range Rx issue */
                if (sc->txmixgain_2ghz >= 1)
                        rf = (rf & ~0x7) | sc->txmixgain_2ghz;
                run_rt3070_rf_write(sc, 17, rf);
        }
        if (sc->mac_ver == 0x3071) {
                run_rt3070_rf_read(sc, 1, &rf);
                rf &= ~(RT3070_RX0_PD | RT3070_TX0_PD);
                rf |= RT3070_RF_BLOCK | RT3070_RX1_PD | RT3070_TX1_PD;
                run_rt3070_rf_write(sc, 1, rf);

                run_rt3070_rf_read(sc, 15, &rf);
                run_rt3070_rf_write(sc, 15, rf & ~RT3070_TX_LO2);

                run_rt3070_rf_read(sc, 20, &rf);
                run_rt3070_rf_write(sc, 20, rf & ~RT3070_RX_LO1);

                run_rt3070_rf_read(sc, 21, &rf);
                run_rt3070_rf_write(sc, 21, rf & ~RT3070_RX_LO2);
        }
        if (sc->mac_ver == 0x3070 || sc->mac_ver == 0x3071) {
                /* fix Tx to Rx IQ glitch by raising RF voltage */
                run_rt3070_rf_read(sc, 27, &rf);
                rf &= ~0x77;
                if (sc->mac_rev < 0x0211)
                        rf |= 0x03;
                run_rt3070_rf_write(sc, 27, rf);
        }
        return 0;
}

static int
run_rt3593_rf_init(struct run_softc *sc)
{
        uint32_t tmp;
        uint8_t rf;
        int i;

        /* Disable the GPIO bits 4 and 7 for LNA PE control. */
        run_read(sc, RT3070_GPIO_SWITCH, &tmp);
        tmp &= ~(1 << 4 | 1 << 7);
        run_write(sc, RT3070_GPIO_SWITCH, tmp);

        /* Initialize RF registers to default value. */
        for (i = 0; i < __arraycount(rt3593_def_rf); i++) {
                run_rt3070_rf_write(sc, rt3593_def_rf[i].reg,
                        rt3593_def_rf[i].val);
        }

        /* Toggle RF R2 to initiate calibration. */
        run_rt3070_rf_write(sc, 2, RT5390_RESCAL);

        /* Initialize RF frequency offset. */
        run_adjust_freq_offset(sc);

        run_rt3070_rf_read(sc, 18, &rf);
        run_rt3070_rf_write(sc, 18, rf | RT3593_AUTOTUNE_BYPASS);

        /*
         * Increase voltage from 1.2V to 1.35V, wait for 1 msec to
         * decrease voltage back to 1.2V.
         */
        run_read(sc, RT3070_LDO_CFG0, &tmp);
        tmp = (tmp & ~0x1f000000) | 0x0d000000;
        run_write(sc, RT3070_LDO_CFG0, tmp);
        usbd_delay_ms(sc->sc_udev, 1);
        tmp = (tmp & ~0x1f000000) | 0x01000000;
        run_write(sc, RT3070_LDO_CFG0, tmp);

        sc->rf24_20mhz = 0x1f;
        sc->rf24_40mhz = 0x2f;

        /* Save default BBP registers 25 and 26 values. */
        run_bbp_read(sc, 25, &sc->bbp25);
        run_bbp_read(sc, 26, &sc->bbp26);

        run_read(sc, RT3070_OPT_14, &tmp);
        run_write(sc, RT3070_OPT_14, tmp | 1);
        return 0;
}

static int
run_rt5390_rf_init(struct run_softc *sc)
{
        uint32_t tmp;
        uint8_t rf;
        int i;

        /* Toggle RF R2 to initiate calibration. */
        if (sc->mac_ver == 0x5390) {
                run_rt3070_rf_read(sc, 2, &rf);
                run_rt3070_rf_write(sc, 2, rf | RT5390_RESCAL);
                usbd_delay_ms(sc->sc_udev, 10);
                run_rt3070_rf_write(sc, 2, rf & ~RT5390_RESCAL);
        } else {
                run_rt3070_rf_write(sc, 2, RT5390_RESCAL);
                usbd_delay_ms(sc->sc_udev, 10);
        }

        /* Initialize RF registers to default value. */
        if (sc->mac_ver == 0x5592) {
                for (i = 0; i < __arraycount(rt5592_def_rf); i++) {
                        run_rt3070_rf_write(sc, rt5592_def_rf[i].reg,
                            rt5592_def_rf[i].val);
                }
                /* Initialize RF frequency offset. */
                run_adjust_freq_offset(sc);
        } else if (sc->mac_ver == 0x5392) {
                for (i = 0; i < __arraycount(rt5392_def_rf); i++) {
                        run_rt3070_rf_write(sc, rt5392_def_rf[i].reg,
                            rt5392_def_rf[i].val);
                }
                if (sc->mac_rev >= 0x0223) {
                        run_rt3070_rf_write(sc, 23, 0x0f);
                        run_rt3070_rf_write(sc, 24, 0x3e);
                        run_rt3070_rf_write(sc, 51, 0x32);
                        run_rt3070_rf_write(sc, 53, 0x22);
                        run_rt3070_rf_write(sc, 56, 0xc1);
                        run_rt3070_rf_write(sc, 59, 0x0f);
                }
        } else {
                for (i = 0; i < __arraycount(rt5390_def_rf); i++) {
                        run_rt3070_rf_write(sc, rt5390_def_rf[i].reg,
                            rt5390_def_rf[i].val);
                }
                if (sc->mac_rev >= 0x0502) {
                        run_rt3070_rf_write(sc, 6, 0xe0);
                        run_rt3070_rf_write(sc, 25, 0x80);
                        run_rt3070_rf_write(sc, 46, 0x73);
                        run_rt3070_rf_write(sc, 53, 0x00);
                        run_rt3070_rf_write(sc, 56, 0x42);
                        run_rt3070_rf_write(sc, 61, 0xd1);
                }
        }

        sc->rf24_20mhz = 0x1f;  /* default value */
        sc->rf24_40mhz = (sc->mac_ver == 0x5592) ? 0 : 0x2f;

        if (sc->mac_rev < 0x0211)
                run_rt3070_rf_write(sc, 27, 0x3);

        run_read(sc, RT3070_OPT_14, &tmp);
        run_write(sc, RT3070_OPT_14, tmp | 1);
        return 0;
}

static int
run_rt3070_filter_calib(struct run_softc *sc, uint8_t init, uint8_t target,
    uint8_t *val)
{
        uint8_t rf22, rf24;
        uint8_t bbp55_pb, bbp55_sb, delta;
        int ntries;

        /* program filter */
        run_rt3070_rf_read(sc, 24, &rf24);
        rf24 = (rf24 & 0xc0) | init;    /* initial filter value */
        run_rt3070_rf_write(sc, 24, rf24);

        /* enable baseband loopback mode */
        run_rt3070_rf_read(sc, 22, &rf22);
        run_rt3070_rf_write(sc, 22, rf22 | 0x01);

        /* set power and frequency of passband test tone */
        run_bbp_write(sc, 24, 0x00);
        for (ntries = 0; ntries < 100; ntries++) {
                /* transmit test tone */
                run_bbp_write(sc, 25, 0x90);
                usbd_delay_ms(sc->sc_udev, 10);
                /* read received power */
                run_bbp_read(sc, 55, &bbp55_pb);
                if (bbp55_pb != 0)
                        break;
        }
        if (ntries == 100)
                return ETIMEDOUT;

        /* set power and frequency of stopband test tone */
        run_bbp_write(sc, 24, 0x06);
        for (ntries = 0; ntries < 100; ntries++) {
                /* transmit test tone */
                run_bbp_write(sc, 25, 0x90);
                usbd_delay_ms(sc->sc_udev, 10);
                /* read received power */
                run_bbp_read(sc, 55, &bbp55_sb);

                delta = bbp55_pb - bbp55_sb;
                if (delta > target)
                        break;

                /* reprogram filter */
                rf24++;
                run_rt3070_rf_write(sc, 24, rf24);
        }
        if (ntries < 100) {
                if (rf24 != init)
                        rf24--;        /* backtrack */
                *val = rf24;
                run_rt3070_rf_write(sc, 24, rf24);
        }

        /* restore initial state */
        run_bbp_write(sc, 24, 0x00);

        /* disable baseband loopback mode */
        run_rt3070_rf_read(sc, 22, &rf22);
        run_rt3070_rf_write(sc, 22, rf22 & ~0x01);

        return 0;
}

static void
run_rt3070_rf_setup(struct run_softc *sc)
{
        uint8_t bbp, rf;
        int i;

        if (sc->mac_ver == 0x3572) {
                /* enable DC filter */
                if (sc->mac_rev >= 0x0201)
                        run_bbp_write(sc, 103, 0xc0);

                run_bbp_read(sc, 138, &bbp);
                if (sc->ntxchains == 1)
                        bbp |= 0x20;        /* turn off DAC1 */
                if (sc->nrxchains == 1)
                        bbp &= ~0x02;        /* turn off ADC1 */
                run_bbp_write(sc, 138, bbp);

                if (sc->mac_rev >= 0x0211) {
                        /* improve power consumption */
                        run_bbp_read(sc, 31, &bbp);
                        run_bbp_write(sc, 31, bbp & ~0x03);
                }

                run_rt3070_rf_read(sc, 16, &rf);
                rf = (rf & ~0x07) | sc->txmixgain_2ghz;
                run_rt3070_rf_write(sc, 16, rf);
        } else if (sc->mac_ver == 0x3071) {
                /* enable DC filter */
                if (sc->mac_rev >= 0x0201)
                        run_bbp_write(sc, 103, 0xc0);

                run_bbp_read(sc, 138, &bbp);
                if (sc->ntxchains == 1)
                        bbp |= 0x20;        /* turn off DAC1 */
                if (sc->nrxchains == 1)
                        bbp &= ~0x02;        /* turn off ADC1 */
                run_bbp_write(sc, 138, bbp);

                if (sc->mac_rev >= 0x0211) {
                        /* improve power consumption */
                        run_bbp_read(sc, 31, &bbp);
                        run_bbp_write(sc, 31, bbp & ~0x03);
                }

                run_write(sc, RT2860_TX_SW_CFG1, 0);
                if (sc->mac_rev < 0x0211) {
                        run_write(sc, RT2860_TX_SW_CFG2,
                            sc->patch_dac ? 0x2c : 0x0f);
                } else
                        run_write(sc, RT2860_TX_SW_CFG2, 0);
        } else if (sc->mac_ver == 0x3070) {
                if (sc->mac_rev >= 0x0201) {
                        /* enable DC filter */
                        run_bbp_write(sc, 103, 0xc0);

                        /* improve power consumption */
                        run_bbp_read(sc, 31, &bbp);
                        run_bbp_write(sc, 31, bbp & ~0x03);
                }

                if (sc->mac_rev < 0x0211) {
                        run_write(sc, RT2860_TX_SW_CFG1, 0);
                        run_write(sc, RT2860_TX_SW_CFG2, 0x2c);
                } else
                        run_write(sc, RT2860_TX_SW_CFG2, 0);
        }

        /* initialize RF registers from ROM for >=RT3071*/
        if (sc->mac_ver >= 0x3071) {
                for (i = 0; i < 10; i++) {
                        if (sc->rf[i].reg == 0 || sc->rf[i].reg == 0xff)
                                continue;
                        run_rt3070_rf_write(sc, sc->rf[i].reg, sc->rf[i].val);
                }
        }
}

static void
run_rt3593_rf_setup(struct run_softc *sc)
{
        uint8_t bbp, rf;

        if (sc->mac_rev >= 0x0211) {
                /* Enable DC filter. */
                run_bbp_write(sc, 103, 0xc0);
        }
        run_write(sc, RT2860_TX_SW_CFG1, 0);
        if (sc->mac_rev < 0x0211) {
                run_write(sc, RT2860_TX_SW_CFG2,
                    sc->patch_dac ? 0x2c : 0x0f);
        } else
                run_write(sc, RT2860_TX_SW_CFG2, 0);

        run_rt3070_rf_read(sc, 50, &rf);
        run_rt3070_rf_write(sc, 50, rf & ~RT3593_TX_LO2);

        run_rt3070_rf_read(sc, 51, &rf);
        rf = (rf & ~(RT3593_TX_LO1 | 0x0c)) |
            ((sc->txmixgain_2ghz & 0x07) << 2);
        run_rt3070_rf_write(sc, 51, rf);

        run_rt3070_rf_read(sc, 38, &rf);
        run_rt3070_rf_write(sc, 38, rf & ~RT5390_RX_LO1);

        run_rt3070_rf_read(sc, 39, &rf);
        run_rt3070_rf_write(sc, 39, rf & ~RT5390_RX_LO2);

        run_rt3070_rf_read(sc, 1, &rf);
        run_rt3070_rf_write(sc, 1, rf & ~(RT3070_RF_BLOCK | RT3070_PLL_PD));

        run_rt3070_rf_read(sc, 30, &rf);
        rf = (rf & ~0x18) | 0x10;
        run_rt3070_rf_write(sc, 30, rf);

        /* Apply maximum likelihood detection for 2 stream case. */
        run_bbp_read(sc, 105, &bbp);
        if (sc->nrxchains > 1)
                run_bbp_write(sc, 105, bbp | RT5390_MLD);

        /* Avoid data lost and CRC error. */
        run_bbp_read(sc, 4, &bbp);
        run_bbp_write(sc, 4, bbp | RT5390_MAC_IF_CTRL);

        run_bbp_write(sc, 92, 0x02);
        run_bbp_write(sc, 82, 0x82);
        run_bbp_write(sc, 106, 0x05);
        run_bbp_write(sc, 104, 0x92);
        run_bbp_write(sc, 88, 0x90);
        run_bbp_write(sc, 148, 0xc8);
        run_bbp_write(sc, 47, 0x48);
        run_bbp_write(sc, 120, 0x50);

        run_bbp_write(sc, 163, 0x9d);

        /* SNR mapping. */
        run_bbp_write(sc, 142, 0x06);
        run_bbp_write(sc, 143, 0xa0);
        run_bbp_write(sc, 142, 0x07);
        run_bbp_write(sc, 143, 0xa1);
        run_bbp_write(sc, 142, 0x08);
        run_bbp_write(sc, 143, 0xa2);

        run_bbp_write(sc, 31, 0x08);
        run_bbp_write(sc, 68, 0x0b);
        run_bbp_write(sc, 105, 0x04);
}

static void
run_rt5390_rf_setup(struct run_softc *sc)
{
        uint8_t bbp, rf;

        if (sc->mac_rev >= 0x0211) {
                /* Enable DC filter. */
                run_bbp_write(sc, 103, 0xc0);

                if (sc->mac_ver != 0x5592) {
                        /* Improve power consumption. */
                        run_bbp_read(sc, 31, &bbp);
                        run_bbp_write(sc, 31, bbp & ~0x03);
                }
        }

        run_bbp_read(sc, 138, &bbp);
        if (sc->ntxchains == 1)
                bbp |= 0x20;    /* turn off DAC1 */
        if (sc->nrxchains == 1)
                bbp &= ~0x02;   /* turn off ADC1 */
        run_bbp_write(sc, 138, bbp);

        run_rt3070_rf_read(sc, 38, &rf);
        run_rt3070_rf_write(sc, 38, rf & ~RT5390_RX_LO1);

        run_rt3070_rf_read(sc, 39, &rf);
        run_rt3070_rf_write(sc, 39, rf & ~RT5390_RX_LO2);

        /* Avoid data lost and CRC error. */
        run_bbp_read(sc, 4, &bbp);
        run_bbp_write(sc, 4, bbp | RT5390_MAC_IF_CTRL);

        run_rt3070_rf_read(sc, 30, &rf);
        rf = (rf & ~0x18) | 0x10;
        run_rt3070_rf_write(sc, 30, rf);

        if (sc->mac_ver != 0x5592) {
                run_write(sc, RT2860_TX_SW_CFG1, 0);
                if (sc->mac_rev < 0x0211) {
                        run_write(sc, RT2860_TX_SW_CFG2,
                            sc->patch_dac ? 0x2c : 0x0f);
                } else
                        run_write(sc, RT2860_TX_SW_CFG2, 0);
        }
}

static int
run_txrx_enable(struct run_softc *sc)
{
        uint32_t tmp;
        int error, ntries;

        run_write(sc, RT2860_MAC_SYS_CTRL, RT2860_MAC_TX_EN);
        for (ntries = 0; ntries < 200; ntries++) {
                if ((error = run_read(sc, RT2860_WPDMA_GLO_CFG, &tmp)) != 0)
                        return error;
                if ((tmp & (RT2860_TX_DMA_BUSY | RT2860_RX_DMA_BUSY)) == 0)
                        break;
                usbd_delay_ms(sc->sc_udev, 50);
        }
        if (ntries == 200)
                return ETIMEDOUT;

        usbd_delay_ms(sc->sc_udev, 50);

        tmp |= RT2860_RX_DMA_EN | RT2860_TX_DMA_EN | RT2860_TX_WB_DDONE;
        run_write(sc, RT2860_WPDMA_GLO_CFG, tmp);

        /* enable Rx bulk aggregation (set timeout and limit) */
        tmp = RT2860_USB_TX_EN | RT2860_USB_RX_EN | RT2860_USB_RX_AGG_EN |
            RT2860_USB_RX_AGG_TO(128) | RT2860_USB_RX_AGG_LMT(2);
        run_write(sc, RT2860_USB_DMA_CFG, tmp);

        /* set Rx filter */
        tmp = RT2860_DROP_CRC_ERR | RT2860_DROP_PHY_ERR;
        if (sc->sc_ic.ic_opmode != IEEE80211_M_MONITOR) {
                tmp |= RT2860_DROP_UC_NOME | RT2860_DROP_DUPL |
                    RT2860_DROP_CTS | RT2860_DROP_BA | RT2860_DROP_ACK |
                    RT2860_DROP_VER_ERR | RT2860_DROP_CTRL_RSV |
                    RT2860_DROP_CFACK | RT2860_DROP_CFEND;
                if (sc->sc_ic.ic_opmode == IEEE80211_M_STA)
                        tmp |= RT2860_DROP_RTS | RT2860_DROP_PSPOLL;
        }
        run_write(sc, RT2860_RX_FILTR_CFG, tmp);

        run_write(sc, RT2860_MAC_SYS_CTRL,
            RT2860_MAC_RX_EN | RT2860_MAC_TX_EN);

        return 0;
}

static int
run_adjust_freq_offset(struct run_softc *sc)
{
        uint8_t rf, tmp;

        run_rt3070_rf_read(sc, 17, &rf);
        tmp = rf;
        rf = (rf & ~0x7f) | (sc->freq & 0x7f);
        rf = MIN(rf, 0x5f);

        if (tmp != rf)
                run_mcu_cmd(sc, 0x74, (tmp << 8 ) | rf);

        return 0;
}

static int
run_init(struct ifnet *ifp)
{
        struct run_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        uint32_t tmp;
        uint8_t bbp1, bbp3;
        int i, error, qid, ridx, ntries;
        usbd_status status;

        for (ntries = 0; ntries < 100; ntries++) {
                if ((error = run_read(sc, RT2860_ASIC_VER_ID, &tmp)) != 0)
                        goto fail;
                if (tmp != 0 && tmp != 0xffffffff)
                        break;
                usbd_delay_ms(sc->sc_udev, 10);
        }
        if (ntries == 100) {
                error = ETIMEDOUT;
                goto fail;
        }

        if ((sc->sc_flags & RUN_FWLOADED) == 0 &&
            (error = run_load_microcode(sc)) != 0) {
                device_printf(sc->sc_dev,
                    "could not load 8051 microcode\n");
                goto fail;
        }

        if (ifp->if_flags & IFF_RUNNING)
                run_stop(ifp, 0);

        /* init host command ring */
        sc->cmdq.cur = sc->cmdq.next = sc->cmdq.queued = 0;

        /* init Tx rings (4 EDCAs) */
        for (qid = 0; qid < 4; qid++) {
                if ((error = run_alloc_tx_ring(sc, qid)) != 0)
                        goto fail;
        }
        /* init Rx ring */
        if ((error = run_alloc_rx_ring(sc)) != 0)
                goto fail;

        IEEE80211_ADDR_COPY(ic->ic_myaddr, CLLADDR(ifp->if_sadl));
        run_set_macaddr(sc, ic->ic_myaddr);

        for (ntries = 0; ntries < 100; ntries++) {
                if ((error = run_read(sc, RT2860_WPDMA_GLO_CFG, &tmp)) != 0)
                        goto fail;
                if ((tmp & (RT2860_TX_DMA_BUSY | RT2860_RX_DMA_BUSY)) == 0)
                        break;
                usbd_delay_ms(sc->sc_udev, 10);
        }
        if (ntries == 100) {
                device_printf(sc->sc_dev,
                    "timeout waiting for DMA engine\n");
                error = ETIMEDOUT;
                goto fail;
        }
        tmp &= 0xff0;
        tmp |= RT2860_TX_WB_DDONE;
        run_write(sc, RT2860_WPDMA_GLO_CFG, tmp);

        /* turn off PME_OEN to solve high-current issue */
        run_read(sc, RT2860_SYS_CTRL, &tmp);
        run_write(sc, RT2860_SYS_CTRL, tmp & ~RT2860_PME_OEN);

        run_write(sc, RT2860_MAC_SYS_CTRL,
            RT2860_BBP_HRST | RT2860_MAC_SRST);
        run_write(sc, RT2860_USB_DMA_CFG, 0);

        if ((error = run_reset(sc)) != 0) {
                device_printf(sc->sc_dev, "could not reset chipset\n");
                goto fail;
        }

        run_write(sc, RT2860_MAC_SYS_CTRL, 0);

        /* init Tx power for all Tx rates (from EEPROM) */
        for (ridx = 0; ridx < 5; ridx++) {
                if (sc->txpow20mhz[ridx] == 0xffffffff)
                        continue;
                run_write(sc, RT2860_TX_PWR_CFG(ridx), sc->txpow20mhz[ridx]);
        }

        for (i = 0; i < (int)__arraycount(rt2870_def_mac); i++)
                run_write(sc, rt2870_def_mac[i].reg, rt2870_def_mac[i].val);
        run_write(sc, RT2860_WMM_AIFSN_CFG, 0x00002273);
        run_write(sc, RT2860_WMM_CWMIN_CFG, 0x00002344);
        run_write(sc, RT2860_WMM_CWMAX_CFG, 0x000034aa);

        if (sc->mac_ver >= 0x5390) {
                run_write(sc, RT2860_TX_SW_CFG0,
                    4 << RT2860_DLY_PAPE_EN_SHIFT | 4);
                if (sc->mac_ver >= 0x5392) {
                        run_write(sc, RT2860_MAX_LEN_CFG, 0x00002fff);
                        if (sc->mac_ver == 0x5592) {
                                run_write(sc, RT2860_HT_FBK_CFG1, 0xedcba980);
                                run_write(sc, RT2860_TXOP_HLDR_ET, 0x00000082);
                        } else {
                                run_write(sc, RT2860_HT_FBK_CFG1, 0xedcb4980);
                                run_write(sc, RT2860_LG_FBK_CFG0, 0xedcba322);
                        }
                }
        } else if (sc->mac_ver >= 0x3593) {
                run_write(sc, RT2860_TX_SW_CFG0,
                    4 << RT2860_DLY_PAPE_EN_SHIFT | 2);
        } else if (sc->mac_ver >= 0x3070) {
                /* set delay of PA_PE assertion to 1us (unit of 0.25us) */
                run_write(sc, RT2860_TX_SW_CFG0,
                    4 << RT2860_DLY_PAPE_EN_SHIFT);
        }

        /* wait while MAC is busy */
        for (ntries = 0; ntries < 100; ntries++) {
                if ((error = run_read(sc, RT2860_MAC_STATUS_REG, &tmp)) != 0)
                        goto fail;
                if (!(tmp & (RT2860_RX_STATUS_BUSY | RT2860_TX_STATUS_BUSY)))
                        break;
                DELAY(1000);
        }
        if (ntries == 100) {
                error = ETIMEDOUT;
                goto fail;
        }

        /* clear Host to MCU mailbox */
        run_write(sc, RT2860_H2M_BBPAGENT, 0);
        run_write(sc, RT2860_H2M_MAILBOX, 0);
        usbd_delay_ms(sc->sc_udev, 10);

        if ((error = run_bbp_init(sc)) != 0) {
                device_printf(sc->sc_dev, "could not initialize BBP\n");
                goto fail;
        }

        /* abort TSF synchronization */
        run_read(sc, RT2860_BCN_TIME_CFG, &tmp);
        tmp &= ~(RT2860_BCN_TX_EN | RT2860_TSF_TIMER_EN |
            RT2860_TBTT_TIMER_EN);
        run_write(sc, RT2860_BCN_TIME_CFG, tmp);

        /* clear RX WCID search table */
        run_set_region_4(sc, RT2860_WCID_ENTRY(0), 0, 512);
        /* clear Pair-wise key table */
        run_set_region_4(sc, RT2860_PKEY(0), 0, 2048);
        /* clear IV/EIV table */
        run_set_region_4(sc, RT2860_IVEIV(0), 0, 512);
        /* clear WCID attribute table */
        run_set_region_4(sc, RT2860_WCID_ATTR(0), 0, 8 * 32);
        /* clear shared key table */
        run_set_region_4(sc, RT2860_SKEY(0, 0), 0, 8 * 32);
        /* clear shared key mode */
        run_set_region_4(sc, RT2860_SKEY_MODE_0_7, 0, 4);

        /* clear RX WCID search table */
        run_set_region_4(sc, RT2860_WCID_ENTRY(0), 0, 512);
        /* clear WCID attribute table */
        run_set_region_4(sc, RT2860_WCID_ATTR(0), 0, 8 * 32);

        run_read(sc, RT2860_US_CYC_CNT, &tmp);
        tmp = (tmp & ~0xff) | 0x1e;
        run_write(sc, RT2860_US_CYC_CNT, tmp);

        if (sc->mac_rev != 0x0101)
                run_write(sc, RT2860_TXOP_CTRL_CFG, 0x0000583f);

        run_write(sc, RT2860_WMM_TXOP0_CFG, 0);
        run_write(sc, RT2860_WMM_TXOP1_CFG, 48 << 16 | 96);

        /* write vendor-specific BBP values (from EEPROM) */
        if (sc->mac_ver < 0x3593) {
                for (i = 0; i < 10; i++) {
                        if (sc->bbp[i].reg == 0 || sc->bbp[i].reg == 0xff)
                                continue;
                        run_bbp_write(sc, sc->bbp[i].reg, sc->bbp[i].val);
                }
        }

        /* select Main antenna for 1T1R devices */
        if (sc->rf_rev == RT3070_RF_3020 || sc->rf_rev == RT5390_RF_5370)
                run_set_rx_antenna(sc, 0);

        /* send LEDs operating mode to microcontroller */
        (void)run_mcu_cmd(sc, RT2860_MCU_CMD_LED1, sc->led[0]);
        (void)run_mcu_cmd(sc, RT2860_MCU_CMD_LED2, sc->led[1]);
        (void)run_mcu_cmd(sc, RT2860_MCU_CMD_LED3, sc->led[2]);

        if (sc->mac_ver >= 0x5390)
                run_rt5390_rf_init(sc);
        else if (sc->mac_ver == 0x3593)
                run_rt3593_rf_init(sc);
        else if (sc->mac_ver >= 0x3070)
                run_rt3070_rf_init(sc);

        /* disable non-existing Rx chains */
        run_bbp_read(sc, 3, &bbp3);
        bbp3 &= ~(1 << 3 | 1 << 4);
        if (sc->nrxchains == 2)
                bbp3 |= 1 << 3;
        else if (sc->nrxchains == 3)
                bbp3 |= 1 << 4;
        run_bbp_write(sc, 3, bbp3);

        /* disable non-existing Tx chains */
        run_bbp_read(sc, 1, &bbp1);
        if (sc->ntxchains == 1)
                bbp1 &= ~(1 << 3 | 1 << 4);
        run_bbp_write(sc, 1, bbp1);

        if (sc->mac_ver >= 0x5390)
                run_rt5390_rf_setup(sc);
        else if (sc->mac_ver == 0x3593)
                run_rt3593_rf_setup(sc);
        else if (sc->mac_ver >= 0x3070)
                run_rt3070_rf_setup(sc);

        /* select default channel */
        run_set_chan(sc, ic->ic_curchan);

        /* setup initial protection mode */
        run_updateprot(sc);

        /* turn radio LED on */
        run_set_leds(sc, RT2860_LED_RADIO);

#ifdef RUN_HWCRYPTO
        if (ic->ic_flags & IEEE80211_F_PRIVACY) {
                /* install WEP keys */
                for (i = 0; i < IEEE80211_WEP_NKID; i++)
                        (void)run_set_key(ic, &ic->ic_crypto.cs_nw_keys[i],
                            NULL);
        }
#endif

        for (i = 0; i < RUN_RX_RING_COUNT; i++) {
                struct run_rx_data *data = &sc->rxq.data[i];

                usbd_setup_xfer(data->xfer, data, data->buf, RUN_MAX_RXSZ,
                    USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, run_rxeof);
                status = usbd_transfer(data->xfer);
                if (status != USBD_NORMAL_COMPLETION &&
                    status != USBD_IN_PROGRESS) {
                        device_printf(sc->sc_dev, "queuing rx failed: %s\n",
                            usbd_errstr(status));
                        error = EIO;
                        goto fail;
                }
        }

        if ((error = run_txrx_enable(sc)) != 0)
                goto fail;

        ifp->if_flags &= ~IFF_OACTIVE;
        ifp->if_flags |= IFF_RUNNING;

        if (ic->ic_opmode == IEEE80211_M_MONITOR)
                ieee80211_new_state(ic, IEEE80211_S_RUN, -1);
        else
                ieee80211_new_state(ic, IEEE80211_S_SCAN, -1);

        if (error != 0)
fail:                run_stop(ifp, 1);
        return error;
}

static void
run_stop(struct ifnet *ifp, int disable)
{
        struct run_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        uint32_t tmp;
        int ntries, qid;

        if (ifp->if_flags & IFF_RUNNING)
                run_set_leds(sc, 0);        /* turn all LEDs off */

        sc->sc_tx_timer = 0;
        ifp->if_timer = 0;
        ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);

        callout_stop(&sc->scan_to);
        callout_stop(&sc->calib_to);

        ieee80211_new_state(ic, IEEE80211_S_INIT, -1);
        /* wait for all queued asynchronous commands to complete */
        while (sc->cmdq.queued > 0)
                tsleep(&sc->cmdq, 0, "cmdq", 0);

        /* disable Tx/Rx */
        run_read(sc, RT2860_MAC_SYS_CTRL, &tmp);
        tmp &= ~(RT2860_MAC_RX_EN | RT2860_MAC_TX_EN);
        run_write(sc, RT2860_MAC_SYS_CTRL, tmp);

        /* wait for pending Tx to complete */
        for (ntries = 0; ntries < 100; ntries++) {
                if (run_read(sc, RT2860_TXRXQ_PCNT, &tmp) != 0)
                        break;
                if ((tmp & RT2860_TX2Q_PCNT_MASK) == 0)
                        break;
        }
        DELAY(1000);
        run_write(sc, RT2860_USB_DMA_CFG, 0);

        /* reset adapter */
        run_write(sc, RT2860_MAC_SYS_CTRL, RT2860_BBP_HRST | RT2860_MAC_SRST);
        run_write(sc, RT2860_MAC_SYS_CTRL, 0);

        /* reset Tx and Rx rings */
        sc->qfullmsk = 0;
        for (qid = 0; qid < 4; qid++)
                run_free_tx_ring(sc, qid);
        run_free_rx_ring(sc);
}

#ifndef IEEE80211_STA_ONLY
static int
run_setup_beacon(struct run_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct rt2860_txwi txwi;
        struct mbuf *m;
        uint16_t txwisize;
        int ridx;

        if ((m = ieee80211_beacon_alloc(ic, ic->ic_bss, &sc->sc_bo)) == NULL)
                return ENOBUFS;

        memset(&txwi, 0, sizeof(txwi));
        txwi.wcid = 0xff;
        txwi.len = htole16(m->m_pkthdr.len);
        /* send beacons at the lowest available rate */
        ridx = (ic->ic_curmode == IEEE80211_MODE_11A) ?
            RT2860_RIDX_OFDM6 : RT2860_RIDX_CCK1;
        txwi.phy = htole16(rt2860_rates[ridx].mcs);
        if (rt2860_rates[ridx].phy == IEEE80211_T_OFDM)
                txwi.phy |= htole16(RT2860_PHY_OFDM);
        txwi.txop = RT2860_TX_TXOP_HT;
        txwi.flags = RT2860_TX_TS;

        txwisize = (sc->mac_ver == 0x5592) ?
            sizeof(txwi) + sizeof(uint32_t) : sizeof(txwi);
        run_write_region_1(sc, RT2860_BCN_BASE(0),
            (uint8_t *)&txwi, txwisize);
        run_write_region_1(sc, RT2860_BCN_BASE(0) + txwisize,
            mtod(m, uint8_t *), (m->m_pkthdr.len + 1) & ~1);

        m_freem(m);

        return 0;
}
#endif

MODULE(MODULE_CLASS_DRIVER, if_run, NULL);

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
if_run_modcmd(modcmd_t cmd, void *arg)
{
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                error = config_init_component(cfdriver_ioconf_run,
                    cfattach_ioconf_run, cfdata_ioconf_run);
#endif
                return error;
        case MODULE_CMD_FINI:
#ifdef _MODULE
                error = config_fini_component(cfdriver_ioconf_run,
                    cfattach_ioconf_run, cfdata_ioconf_run);
#endif
                return error;
        default:
                return ENOTTY;
        }
}







































































































































































































































































































































































































































































































    2 


























    2 
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
/*        $NetBSD: cd.c,v 1.354 2022/06/26 21:00:28 andvar Exp $        */

/*-
 * Copyright (c) 1998, 2001, 2003, 2004, 2005, 2008 The NetBSD Foundation,
 * Inc.  All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * MMC framework implemented and contributed to the NetBSD Foundation by
 * Reinoud Zandijk.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Originally written by Julian Elischer (julian@tfs.com)
 * for TRW Financial Systems for use under the MACH(2.5) operating system.
 *
 * TRW Financial Systems, in accordance with their agreement with Carnegie
 * Mellon University, makes this software available to CMU to distribute
 * or use in any manner that they see fit as long as this message is kept with
 * the software. For this reason TFS also grants any other persons or
 * organisations permission to use or modify this software.
 *
 * TFS supplies this software to be publicly redistributed
 * on the understanding that TFS is not responsible for the correct
 * functioning of this software in any circumstances.
 *
 * Ported to run under 386BSD by Julian Elischer (julian@tfs.com) Sept 1992
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: cd.c,v 1.354 2022/06/26 21:00:28 andvar Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/uio.h>
#include <sys/malloc.h>
#include <sys/errno.h>
#include <sys/device.h>
#include <sys/disklabel.h>
#include <sys/disk.h>
#include <sys/cdio.h>
#include <sys/dvdio.h>
#include <sys/scsiio.h>
#include <sys/proc.h>
#include <sys/conf.h>
#include <sys/vnode.h>
#include <sys/rndsource.h>

#include <dev/scsipi/scsi_spc.h>
#include <dev/scsipi/scsipi_all.h>
#include <dev/scsipi/scsipi_cd.h>
#include <dev/scsipi/scsipi_disk.h>        /* rw_big and start_stop come */
#include <dev/scsipi/scsi_all.h>
                                        /* from there */
#include <dev/scsipi/scsi_disk.h>        /* rw comes from there */
#include <dev/scsipi/scsipiconf.h>
#include <dev/scsipi/scsipi_base.h>
#include <dev/scsipi/cdvar.h>

#include <prop/proplib.h>

#define        CDUNIT(z)                        DISKUNIT(z)
#define        CDPART(z)                        DISKPART(z)
#define        CDMINOR(unit, part)                DISKMINOR(unit, part)
#define        MAKECDDEV(maj, unit, part)        MAKEDISKDEV(maj, unit, part)

#define MAXTRACK        99
#define CD_BLOCK_OFFSET        150
#define CD_FRAMES        75
#define CD_SECS                60

#define CD_TOC_FORM        0        /* formatted TOC, exposed to userland     */
#define CD_TOC_MSINFO        1        /* multi-session info                          */
#define CD_TOC_RAW        2        /* raw TOC as on disc, unprocessed          */
#define CD_TOC_PMA        3        /* PMA, used as intermediate (rare use)   */
#define CD_TOC_ATIP        4        /* pressed space of recordable                  */
#define CD_TOC_CDTEXT        5        /* special CD-TEXT, rarely used                  */

#define P5LEN        0x32
#define MS5LEN        (P5LEN + 8 + 2)

struct cd_formatted_toc {
        struct ioc_toc_header header;
        struct cd_toc_entry entries[MAXTRACK+1]; /* One extra for the */
                                                 /* leadout */
};

struct cdbounce {
        struct buf *obp;        /* original buf */
        struct buf *lbp;        /* first buffer */
        struct buf *rbp;        /* second buffer */
        int lerr;                /* error returned for first buffer */
        int rerr;                /* error returned for second buffer */
        int head;                /* bytes skipped at the start */
        int lcount;                /* bytes copied to first buffer */
        int rcount;                /* bytes copied to second buffer */
};

static void        cdstart(struct scsipi_periph *);
static void        cdrestart(void *);
static void        cdminphys(struct buf *);
static void        cddone(struct scsipi_xfer *, int);
static int        cd_interpret_sense(struct scsipi_xfer *);
static int        cd_diskstart(device_t, struct buf *);
static void        cd_iosize(device_t, int *);
static int        cd_lastclose(device_t);
static int      cd_firstopen(device_t, dev_t, int, int);
static void        cd_label(device_t, struct disklabel *);
static u_long        cd_size(struct cd_softc *, int);
static int        cd_play(struct cd_softc *, int, int);
static int        cd_play_tracks(struct cd_softc *, struct cd_formatted_toc *,
                    int, int, int, int);
static int        cd_play_msf(struct cd_softc *, int, int, int, int, int, int);
static int        cd_pause(struct cd_softc *, int);
static int        cd_reset(struct cd_softc *);
static int        cd_read_subchannel(struct cd_softc *, int, int, int,
                    struct cd_sub_channel_info *, int, int);
static int        cd_read_toc(struct cd_softc *, int, int, int,
                    struct cd_formatted_toc *, int, int, int);
static int        cd_get_parms(struct cd_softc *, int);
static int        cd_load_toc(struct cd_softc *, int, struct cd_formatted_toc *, int);
static int        cdreadmsaddr(struct cd_softc *, struct cd_formatted_toc *,int *);
static int        cdcachesync(struct scsipi_periph *periph, int flags);

static int        dvd_auth(struct cd_softc *, dvd_authinfo *);
static int        dvd_read_physical(struct cd_softc *, dvd_struct *);
static int        dvd_read_copyright(struct cd_softc *, dvd_struct *);
static int        dvd_read_disckey(struct cd_softc *, dvd_struct *);
static int        dvd_read_bca(struct cd_softc *, dvd_struct *);
static int        dvd_read_manufact(struct cd_softc *, dvd_struct *);
static int        dvd_read_struct(struct cd_softc *, dvd_struct *);

static int        cd_mode_sense(struct cd_softc *, u_int8_t, void *, size_t, int,
                    int, int *);
static int        cd_mode_select(struct cd_softc *, u_int8_t, void *, size_t,
                    int, int);
static int        cd_setchan(struct cd_softc *, int, int, int, int, int);
static int        cd_getvol(struct cd_softc *, struct ioc_vol *, int);
static int        cd_setvol(struct cd_softc *, const struct ioc_vol *, int);
static int        cd_set_pa_immed(struct cd_softc *, int);
static int        cd_load_unload(struct cd_softc *, struct ioc_load_unload *);
static int        cd_setblksize(struct cd_softc *);

static int        cdmatch(device_t, cfdata_t, void *);
static void        cdattach(device_t, device_t, void *);
static int        cddetach(device_t, int);

static int        mmc_getdiscinfo(struct scsipi_periph *, struct mmc_discinfo *);
static int        mmc_gettrackinfo(struct scsipi_periph *, struct mmc_trackinfo *);
static int        mmc_do_op(struct scsipi_periph *, struct mmc_op *);
static int        mmc_setup_writeparams(struct scsipi_periph *, struct mmc_writeparams *);

static void        cd_set_geometry(struct cd_softc *);

CFATTACH_DECL3_NEW(cd, sizeof(struct cd_softc), cdmatch, cdattach, cddetach,
    NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);

extern struct cfdriver cd_cd;

static const struct scsipi_inquiry_pattern cd_patterns[] = {
        {T_CDROM, T_REMOV,
         "",         "",                 ""},
        {T_WORM, T_REMOV,
         "",         "",                 ""},
#if 0
        {T_CDROM, T_REMOV, /* more luns */
         "PIONEER ", "CD-ROM DRM-600  ", ""},
#endif
        {T_DIRECT, T_REMOV,
         "NEC                 CD-ROM DRIVE:260", "", ""},
};

static dev_type_open(cdopen);
static dev_type_close(cdclose);
static dev_type_read(cdread);
static dev_type_write(cdwrite);
static dev_type_ioctl(cdioctl);
static dev_type_strategy(cdstrategy);
static dev_type_dump(cddump);
static dev_type_size(cdsize);

const struct bdevsw cd_bdevsw = {
        .d_open = cdopen,
        .d_close = cdclose,
        .d_strategy = cdstrategy,
        .d_ioctl = cdioctl,
        .d_dump = cddump,
        .d_psize = cdsize,
        .d_discard = nodiscard,
        .d_flag = D_DISK | D_MPSAFE
};

const struct cdevsw cd_cdevsw = {
        .d_open = cdopen,
        .d_close = cdclose,
        .d_read = cdread,
        .d_write = cdwrite,
        .d_ioctl = cdioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_DISK | D_MPSAFE
};

static const struct dkdriver cddkdriver = {
        .d_open = cdopen,
        .d_close = cdclose,
        .d_strategy = cdstrategy,
        .d_minphys = cdminphys,
        .d_diskstart = cd_diskstart,
        .d_firstopen = cd_firstopen,
        .d_lastclose = cd_lastclose,
        .d_label = cd_label,
};

static const struct scsipi_periphsw cd_switch = {
        cd_interpret_sense,        /* use our error handler first */
        cdstart,                /* we have a queue, which is started by this */
        NULL,                        /* we do not have an async handler */
        cddone,                        /* deal with stats at interrupt time */
};

/*
 * The routine called by the low level scsi routine when it discovers
 * A device suitable for this driver
 */
static int
cdmatch(device_t parent, cfdata_t match, void *aux)
{
        struct scsipibus_attach_args *sa = aux;
        int priority;

        (void)scsipi_inqmatch(&sa->sa_inqbuf,
            cd_patterns, sizeof(cd_patterns) / sizeof(cd_patterns[0]),
            sizeof(cd_patterns[0]), &priority);

        return (priority);
}

static void
cdattach(device_t parent, device_t self, void *aux)
{
        struct cd_softc *cd = device_private(self);
        struct dk_softc *dksc = &cd->sc_dksc;
        struct scsipibus_attach_args *sa = aux;
        struct scsipi_periph *periph = sa->sa_periph;
        int dtype;

        SC_DEBUG(periph, SCSIPI_DB2, ("cdattach: "));

        switch (SCSIPI_BUSTYPE_TYPE(scsipi_periph_bustype(sa->sa_periph))) {
        case SCSIPI_BUSTYPE_SCSI:
                dtype = DKTYPE_SCSI;
                if (periph->periph_version == 0)
                        cd->flags |= CDF_ANCIENT;
                break; 
        case SCSIPI_BUSTYPE_ATAPI:
                dtype = DKTYPE_ATAPI;
                break;
        default:
                dtype = DKTYPE_UNKNOWN;
                break; 
        }

        /*
         * Initialize and attach the disk structure.
         */
        dk_init(dksc, self, dtype);
        disk_init(&dksc->sc_dkdev, dksc->sc_xname, &cddkdriver);

        dk_attach(dksc);
        disk_attach(&dksc->sc_dkdev);

        bufq_alloc(&dksc->sc_bufq, "disksort", BUFQ_SORT_RAWBLOCK);

        callout_init(&cd->sc_callout, 0);

        /*
         * Store information needed to contact our base driver
         */
        cd->sc_periph = periph;

        periph->periph_dev = dksc->sc_dev;
        periph->periph_switch = &cd_switch;

        /*
         * Increase our openings to the maximum-per-periph
         * supported by the adapter.  This will either be
         * clamped down or grown by the adapter if necessary.
         */
        periph->periph_openings =
            SCSIPI_CHAN_MAX_PERIPH(periph->periph_channel);
        periph->periph_flags |= PERIPH_GROW_OPENINGS;

        aprint_naive("\n");
        aprint_normal("\n");

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");
}

static int
cddetach(device_t self, int flags)
{
        struct cd_softc *cd = device_private(self);
        struct dk_softc *dksc = &cd->sc_dksc;
        struct scsipi_periph *periph = cd->sc_periph;
        struct scsipi_channel *chan = periph->periph_channel;
        int bmaj, cmaj, i, mn, rc;

        if ((rc = disk_begindetach(&dksc->sc_dkdev, cd_lastclose, self, flags)) != 0)
                return rc;

        /* locate the major number */
        bmaj = bdevsw_lookup_major(&cd_bdevsw);
        cmaj = cdevsw_lookup_major(&cd_cdevsw);

        /* Nuke the vnodes for any open instances */
        for (i = 0; i < MAXPARTITIONS; i++) {
                mn = CDMINOR(device_unit(self), i);
                vdevgone(bmaj, mn, mn, VBLK);
                vdevgone(cmaj, mn, mn, VCHR);
        }

        /* kill any pending restart */
        callout_halt(&cd->sc_callout, NULL);

        dk_drain(dksc);

        /* Kill off any pending commands. */
        mutex_enter(chan_mtx(chan));
        scsipi_kill_pending(cd->sc_periph);
        mutex_exit(chan_mtx(chan));

        bufq_free(dksc->sc_bufq);

        /* Detach from the disk list. */
        disk_detach(&dksc->sc_dkdev);
        disk_destroy(&dksc->sc_dkdev);

        dk_detach(dksc);

        callout_destroy(&cd->sc_callout);

        pmf_device_deregister(self);

        return (0);
}

/*
 * Serialized by caller
 */
static int
cd_firstopen(device_t self, dev_t dev, int flag, int fmt)
{
        struct cd_softc *cd = device_private(self);
        struct scsipi_periph *periph = cd->sc_periph;
        struct scsipi_adapter *adapt = periph->periph_channel->chan_adapter;
        int error, silent;
        int part;

        part = CDPART(dev);

        error = scsipi_adapter_addref(adapt);
        if (error)
                return error;

        if ((part == RAW_PART && fmt == S_IFCHR) || (flag & FSILENT))
                silent = XS_CTL_SILENT;
        else
                silent = 0;

        /* make cdclose() silent */
        cd->flags |= CDF_EJECTED;

        /* Check that it is still responding and ok. */
        error = scsipi_test_unit_ready(periph,
            XS_CTL_IGNORE_ILLEGAL_REQUEST | XS_CTL_IGNORE_MEDIA_CHANGE |
            XS_CTL_SILENT);

        /*
         * Start the pack spinning if necessary. Always allow the
         * raw partition to be opened, for raw IOCTLs. Data transfers
         * will check for SDEV_MEDIA_LOADED.
         */
        if (error == EIO) {
                error = scsipi_start(periph, SSS_START, silent);
                if (error == EINVAL)
                        error = EIO;
        }
        if (error) {
                if (part == RAW_PART)
                        goto out;
                goto bad;
        }

        /* Lock the pack in. */
        error = scsipi_prevent(periph, SPAMR_PREVENT_DT,
            XS_CTL_IGNORE_ILLEGAL_REQUEST |
            XS_CTL_IGNORE_MEDIA_CHANGE);
        SC_DEBUG(periph, SCSIPI_DB1,
            ("cdopen: scsipi_prevent, error=%d\n", error));
        if (error) {
                if (part == RAW_PART)
                        goto out;
                goto bad;
        }

        if ((periph->periph_flags & PERIPH_MEDIA_LOADED) == 0) {
                int param_error;

                /* Load the physical device parameters. */
                param_error = cd_get_parms(cd, 0);
                if (param_error == CDGP_RESULT_OFFLINE) {
                        error = ENXIO;
                        goto bad2;
                }
                periph->periph_flags |= PERIPH_MEDIA_LOADED;

                SC_DEBUG(periph, SCSIPI_DB3, ("Params loaded "));

                cd_set_geometry(cd);

                /* make cdclose() loud again */
                cd->flags &= ~CDF_EJECTED;
        }

        periph->periph_flags |= PERIPH_OPEN;

out:
        return 0;

bad2:
        scsipi_prevent(periph, SPAMR_ALLOW,
            XS_CTL_IGNORE_ILLEGAL_REQUEST |
            XS_CTL_IGNORE_MEDIA_CHANGE |
            XS_CTL_SILENT);

bad:
        scsipi_adapter_delref(adapt);
        return error;
}

/*
 * open the device. Make sure the partition info is a up-to-date as can be.
 */
static int
cdopen(dev_t dev, int flag, int fmt, struct lwp *l)
{
        struct cd_softc *cd;
        struct dk_softc *dksc;
        struct scsipi_periph *periph;
        int unit, part;
        int error;

        unit = CDUNIT(dev);
        cd = device_lookup_private(&cd_cd, unit);
        if (cd == NULL)
                return (ENXIO);
        dksc = &cd->sc_dksc;

        periph = cd->sc_periph;
        part = CDPART(dev);

        SC_DEBUG(periph, SCSIPI_DB1,
            ("cdopen: dev=0x%"PRIu64" (unit %"PRIu32" (of %d), partition %d)\n",
            dev, unit, cd_cd.cd_ndevs, CDPART(dev)));

        /*
         * If any partition is open, but the disk has been invalidated,
         * disallow further opens of non-raw partition
         */
        if ((periph->periph_flags & (PERIPH_OPEN | PERIPH_MEDIA_LOADED)) ==
            PERIPH_OPEN) {
                if (part != RAW_PART || fmt != S_IFCHR)
                        return EIO;
        }

        error = dk_open(dksc, dev, flag, fmt, l);

        SC_DEBUG(periph, SCSIPI_DB3, ("open complete\n"));

        return error;
}

/*      
 * Serialized by caller
 */     
static int
cd_lastclose(device_t self)
{ 
        struct cd_softc *cd = device_private(self);
        struct scsipi_periph *periph = cd->sc_periph;
        struct scsipi_adapter *adapt = periph->periph_channel->chan_adapter;
        int silent;

        if ((cd->flags & CDF_EJECTED) != 0 ||
            (periph->periph_flags & PERIPH_MEDIA_LOADED) == 0)
                silent = XS_CTL_SILENT;
        else
                silent = 0;

        cdcachesync(periph, silent);

        scsipi_wait_drain(periph);  

        scsipi_prevent(periph, SPAMR_ALLOW,
            XS_CTL_IGNORE_ILLEGAL_REQUEST |
            XS_CTL_IGNORE_NOT_READY |
            XS_CTL_SILENT);
        periph->periph_flags &= ~PERIPH_OPEN;

        scsipi_wait_drain(periph);

        scsipi_adapter_delref(adapt); 

        return 0;
}

/*
 * close the device.. only called if we are the LAST
 * occurrence of an open device
 */
static int
cdclose(dev_t dev, int flag, int fmt, struct lwp *l)
{
        struct cd_softc *cd;
        struct dk_softc *dksc;
        int unit;

        unit = CDUNIT(dev);
        cd = device_lookup_private(&cd_cd, unit);
        dksc = &cd->sc_dksc;

        return dk_close(dksc, dev, flag, fmt, l);
}

static void
cd_bounce_buffer_done(struct buf *bp)
{
        struct cdbounce *bounce = bp->b_private;
        struct buf *obp = bounce->obp;

        if (bp == bounce->lbp) {
                if ((bounce->lerr = bp->b_error) == 0)
                        memcpy(obp->b_data, (char *)bp->b_data + bounce->head, bounce->lcount);
                bounce->lbp = NULL;
        }

        if (bp == bounce->rbp) {
                if ((bounce->rerr = bp->b_error) == 0)
                        memcpy((char *)obp->b_data + bounce->lcount, bp->b_data, bounce->rcount);
                bounce->rbp = NULL;
        }

        free(bp->b_data, M_DEVBUF);
        putiobuf(bp);

        if (bounce->lbp != NULL || bounce->rbp != NULL)
                return;

        obp->b_error = bounce->rerr;
        if (bounce->lerr)
                obp->b_error = bounce->lerr;

        obp->b_resid = 0;
        if (obp->b_error)
                obp->b_resid = obp->b_bcount;

        free(bounce, M_DEVBUF);
        biodone(obp);
}

static int
cd_make_bounce_buffer(struct cd_softc *cd, struct buf *bp, daddr_t blkno, int count, struct buf **nbpp, void *priv)
{
        struct buf *nbp;

        /* We don't support bouncing writes */
        if ((bp->b_flags & B_READ) == 0)
                return EACCES; /* XXX */

        nbp = getiobuf(NULL, false);
        if (nbp == NULL)
                return ENOMEM;

        nbp->b_data = malloc(count, M_DEVBUF, M_NOWAIT);
        if (nbp->b_data == NULL) {
                putiobuf(nbp);
                return ENOMEM;
        }

        /* Set up the IOP to the bounce buffer */
        nbp->b_error = 0;
        nbp->b_dev = bp->b_dev;
        nbp->b_proc = bp->b_proc;
        nbp->b_bcount = count;
        nbp->b_bufsize = count;
        nbp->b_blkno = blkno;
        nbp->b_flags = bp->b_flags | B_READ;
        nbp->b_oflags = bp->b_oflags;
        nbp->b_cflags = bp->b_cflags;
        nbp->b_iodone = cd_bounce_buffer_done;
        nbp->b_private = priv;

        BIO_COPYPRIO(nbp, bp);

        *nbpp = nbp;
        return 0;
}

static int
cd_make_bounce(struct cd_softc *cd, struct buf *bp, struct cdbounce **bouncep)
{
        struct dk_softc *dksc = &cd->sc_dksc;
        unsigned secsize = dksc->sc_dkdev.dk_geom.dg_secsize;
        struct cdbounce *bounce;
        int bps, nblks, skip, total, count;
        daddr_t blkno;
        struct buf *lbp, *rbp;
        int error;

        bounce = malloc(sizeof(struct cdbounce), M_DEVBUF, M_NOWAIT|M_ZERO);
        if (bounce == NULL)
                return ENOMEM;

        bps = howmany(secsize, DEV_BSIZE);
        nblks = howmany(bp->b_bcount, DEV_BSIZE);

        skip = bp->b_blkno % bps;

        blkno = bp->b_blkno - skip;
        total = roundup(nblks + skip, bps) * DEV_BSIZE;

        count = total;
        cd_iosize(dksc->sc_dev, &count);

        bounce->head = skip * DEV_BSIZE;
        bounce->lcount = imin(count - bounce->head, bp->b_bcount);
        bounce->rcount = bp->b_bcount - bounce->lcount;

        error = cd_make_bounce_buffer(cd, bp, blkno, count, &lbp, bounce);
        if (error)
                goto bad;

        blkno += howmany(count, DEV_BSIZE);
        count = total - count;

        if (count > 0) {
                error = cd_make_bounce_buffer(cd, bp, blkno, count, &rbp, bounce);
                if (error) {
                        free(lbp->b_data, M_DEVBUF);
                        putiobuf(lbp);
                        goto bad;
                }
        } else
                rbp = NULL;

        bounce->obp = bp;
        bounce->lbp = lbp;
        bounce->rbp = rbp;

        *bouncep = bounce;

        return 0;

bad:
        free(bounce, M_DEVBUF);
        return error;
}

/*
 * Actually translate the requested transfer into one the physical driver can
 * understand.  The transfer is described by a buf and will include only one
 * physical transfer.
 */
static void
cdstrategy(struct buf *bp)
{
        struct cd_softc *cd = device_lookup_private(&cd_cd,CDUNIT(bp->b_dev));
        struct dk_softc *dksc = &cd->sc_dksc;
        struct scsipi_periph *periph = cd->sc_periph;
        int error;

        SC_DEBUG(cd->sc_periph, SCSIPI_DB2, ("cdstrategy "));
        SC_DEBUG(cd->sc_periph, SCSIPI_DB1,
            ("%d bytes @ blk %" PRId64 "\n", bp->b_bcount, bp->b_blkno));

        /*
         * If the device has been made invalid, error out
         * maybe the media changed
         */
        if ((periph->periph_flags & PERIPH_MEDIA_LOADED) == 0) {
                if (periph->periph_flags & PERIPH_OPEN)
                        error = EIO;
                else
                        error = ENODEV;
                goto bad;
        }

        /*
         * If label and device don't agree in sector size use a bounce buffer
         */
        if (dksc->sc_dkdev.dk_label->d_secsize != dksc->sc_dkdev.dk_geom.dg_secsize) {
                struct cdbounce *bounce = NULL;

                error = cd_make_bounce(cd, bp, &bounce);
                if (error)
                        goto bad;

                dk_strategy(dksc, bounce->lbp);
                if (bounce->rbp != NULL)
                        dk_strategy(dksc, bounce->rbp);

                return;
        }
        
        dk_strategy(dksc, bp);
        return;

bad:
        bp->b_error = error;
        bp->b_resid = bp->b_bcount;
        biodone(bp);
}

/*
 * Issue single I/O command
 *
 * Called from dk_start and implicitly from dk_strategy
 */
static int
cd_diskstart(device_t dev, struct buf *bp)
{
        struct cd_softc *cd = device_private(dev);
        struct scsipi_periph *periph = cd->sc_periph;
        struct scsipi_channel *chan = periph->periph_channel;
        struct scsipi_rw_10 cmd_big;
        struct scsi_rw_6 cmd_small;
        struct scsipi_generic *cmdp;
        struct scsipi_xfer *xs;
        int error, flags, nblks, cmdlen;

        SC_DEBUG(periph, SCSIPI_DB2, ("cdstart "));

        mutex_enter(chan_mtx(chan));

        if (periph->periph_active >= periph->periph_openings) {
                error = EAGAIN;
                goto out;
        }

        /*
         * there is excess capacity, but a special waits
         * It'll need the adapter as soon as we clear out of the
         * way and let it run (user level wait).
         */
        if (periph->periph_flags & PERIPH_WAITING) {
                periph->periph_flags &= ~PERIPH_WAITING;
                cv_broadcast(periph_cv_periph(periph));
                error = EAGAIN;
                goto out;
        }

        /*
         * If the device has become invalid, abort all the
         * reads and writes until all files have been closed and
         * re-opened
         */
        if (__predict_false(
            (periph->periph_flags & PERIPH_MEDIA_LOADED) == 0)) {
                error = EIO;
                goto out;
        }

        nblks = howmany(bp->b_bcount, cd->params.blksize);

        /*
         *  Fill out the scsi command.  If the transfer will
         *  fit in a "small" cdb, use it.
         */
        if (((bp->b_rawblkno & 0x1fffff) == bp->b_rawblkno) &&
            ((nblks & 0xff) == nblks) &&
            !(periph->periph_quirks & PQUIRK_ONLYBIG)) {
                /*
                 * We can fit in a small cdb.
                 */
                memset(&cmd_small, 0, sizeof(cmd_small));
                cmd_small.opcode = (bp->b_flags & B_READ) ?
                    SCSI_READ_6_COMMAND : SCSI_WRITE_6_COMMAND;
                _lto3b(bp->b_rawblkno, cmd_small.addr);
                cmd_small.length = nblks & 0xff;
                cmdlen = sizeof(cmd_small);
                cmdp = (struct scsipi_generic *)&cmd_small;
        } else {
                /*
                 * Need a large cdb.
                 */
                memset(&cmd_big, 0, sizeof(cmd_big));
                cmd_big.opcode = (bp->b_flags & B_READ) ?
                    READ_10 : WRITE_10;
                _lto4b(bp->b_rawblkno, cmd_big.addr);
                _lto2b(nblks, cmd_big.length);
                cmdlen = sizeof(cmd_big);
                cmdp = (struct scsipi_generic *)&cmd_big;
        }

        /*
         * Figure out what flags to use.
         */
        flags = XS_CTL_NOSLEEP|XS_CTL_ASYNC|XS_CTL_SIMPLE_TAG;
        if (bp->b_flags & B_READ)
                flags |= XS_CTL_DATA_IN;
        else
                flags |= XS_CTL_DATA_OUT;

        /*
         * Call the routine that chats with the adapter.
         * Note: we cannot sleep as we may be an interrupt
         */
        xs = scsipi_make_xs_locked(periph, cmdp, cmdlen,
            (u_char *)bp->b_data, bp->b_bcount,
            CDRETRIES, 30000, bp, flags);
        if (__predict_false(xs == NULL)) {
                /*
                 * out of memory. Keep this buffer in the queue, and
                 * retry later.
                 */
                callout_reset(&cd->sc_callout, hz / 2, cdrestart, cd);
                error = EAGAIN;
                goto out;
        }

        error = scsipi_execute_xs(xs);
        /* with a scsipi_xfer preallocated, scsipi_command can't fail */
        KASSERT(error == 0);

out:
        mutex_exit(chan_mtx(chan));

        return error;
}

/*
 * Recover I/O request after memory shortage
 *
 * Called from callout
 */
static void
cdrestart(void *v)
{
        struct cd_softc *cd = v;
        struct dk_softc *dksc = &cd->sc_dksc;

        dk_start(dksc, NULL);
}

/*
 * Recover I/O request after memory shortage
 *
 * Called from scsipi midlayer when resources have been freed
 * with channel lock held
 */
static void
cdstart(struct scsipi_periph *periph)
{
        struct cd_softc *cd = device_private(periph->periph_dev);
        struct dk_softc *dksc = &cd->sc_dksc;
        struct scsipi_channel *chan = periph->periph_channel;

        /*
         * release channel lock as dk_start may need to acquire
         * other locks
         *
         * cdstart is called from scsipi_put_xs and all its callers
         * release the lock afterwards. So releasing it here
         * doesn't matter.
         */
        mutex_exit(chan_mtx(chan));

        dk_start(dksc, NULL);

        mutex_enter(chan_mtx(chan));
}

static void
cddone(struct scsipi_xfer *xs, int error)
{
        struct cd_softc *cd = device_private(xs->xs_periph->periph_dev);
        struct dk_softc *dksc = &cd->sc_dksc;
        struct buf *bp = xs->bp;

        if (bp) {
                bp->b_error = error;
                bp->b_resid = xs->resid;
                if (error) {
                        /* on a read/write error bp->b_resid is zero, so fix */
                        bp->b_resid = bp->b_bcount;
                }

                dk_done(dksc, bp);
                /* dk_start is called from scsipi_complete */
        }
}

static int
cd_interpret_sense(struct scsipi_xfer *xs)
{
        struct scsipi_periph *periph = xs->xs_periph;
        struct scsi_sense_data *sense = &xs->sense.scsi_sense;
        int retval = EJUSTRETURN;

        /*
         * If it isn't an extended or extended/deferred error, let
         * the generic code handle it.
         */
        if (SSD_RCODE(sense->response_code) != SSD_RCODE_CURRENT &&
            SSD_RCODE(sense->response_code) != SSD_RCODE_DEFERRED)
                return (retval);

        /*
         * If we got a "Unit not ready" (SKEY_NOT_READY) and "Logical Unit
         * Is In The Process of Becoming Ready" (Sense code 0x04,0x01), then
         * wait a bit for the drive to spin up
         */

        if ((SSD_SENSE_KEY(sense->flags) == SKEY_NOT_READY) &&
            (sense->asc == 0x04) && (sense->ascq == 0x01)) {
                /*
                 * Sleep for 5 seconds to wait for the drive to spin up
                 */

                SC_DEBUG(periph, SCSIPI_DB1, ("Waiting 5 sec for CD "
                                                "spinup\n"));
                if (!callout_pending(&periph->periph_callout))
                        scsipi_periph_freeze(periph, 1);
                callout_reset(&periph->periph_callout,
                    5 * hz, scsipi_periph_timed_thaw, periph);
                retval = ERESTART;
        }

        /*
         * If we got a "Unit not ready" (SKEY_NOT_READY) and "Logical Unit Not
         * Ready, Operation In Progress" (Sense code 0x04, 0x07),
         * then wait for the specified time
         */
         
        if ((SSD_SENSE_KEY(sense->flags) == SKEY_NOT_READY) &&
            (sense->asc == 0x04) && (sense->ascq == 0x07)) {
                /*
                 * we could listen to the delay; but it looks like the skey
                 * data is not always returned.
                 */
                /* cd_delay = _2btol(sense->sks.sks_bytes); */

                /* wait for a half second and get going again */
                if (!callout_pending(&periph->periph_callout))
                        scsipi_periph_freeze(periph, 1);
                callout_reset(&periph->periph_callout,
                    hz/2, scsipi_periph_timed_thaw, periph);
                retval = ERESTART;
        }

        /*
         * If we got a "Unit not ready" (SKEY_NOT_READY) and "Long write in
         * progress" (Sense code 0x04, 0x08), then wait for the specified
         * time
         */
         
        if ((SSD_SENSE_KEY(sense->flags) == SKEY_NOT_READY) &&
            (sense->asc == 0x04) && (sense->ascq == 0x08)) {
                /*
                 * long write in process; we could listen to the delay; but it
                 * looks like the skey data is not always returned.
                 */
                /* cd_delay = _2btol(sense->sks.sks_bytes); */

                /* wait for a half second and get going again */
                if (!callout_pending(&periph->periph_callout))
                        scsipi_periph_freeze(periph, 1);
                callout_reset(&periph->periph_callout,
                    hz/2, scsipi_periph_timed_thaw, periph);
                retval = ERESTART;
        }

        return (retval);
}

static void
cdminphys(struct buf *bp)
{
        struct cd_softc *cd = device_lookup_private(&cd_cd, CDUNIT(bp->b_dev));
        struct dk_softc *dksc = &cd->sc_dksc;
        long xmax;

        /*
         * If the device is ancient, we want to make sure that
         * the transfer fits into a 6-byte cdb.
         *
         * XXX Note that the SCSI-I spec says that 256-block transfers
         * are allowed in a 6-byte read/write, and are specified
         * by setting the "length" to 0.  However, we're conservative
         * here, allowing only 255-block transfers in case an
         * ancient device gets confused by length == 0.  A length of 0
         * in a 10-byte read/write actually means 0 blocks.
         */
        if (cd->flags & CDF_ANCIENT) {
                xmax = dksc->sc_dkdev.dk_geom.dg_secsize * 0xff;

                if (bp->b_bcount > xmax)
                        bp->b_bcount = xmax;
        }

        scsipi_adapter_minphys(cd->sc_periph->periph_channel, bp);
}

static void
cd_iosize(device_t dev, int *count)
{
        struct buf B;
        int bmaj;

        bmaj       = bdevsw_lookup_major(&cd_bdevsw);
        B.b_dev    = MAKECDDEV(bmaj,device_unit(dev),RAW_PART);
        B.b_bcount = *count;

        cdminphys(&B);

        *count = B.b_bcount;
}

static int
cdread(dev_t dev, struct uio *uio, int ioflag)
{
        return (physio(cdstrategy, NULL, dev, B_READ, cdminphys, uio));
}

static int
cdwrite(dev_t dev, struct uio *uio, int ioflag)
{
        return (physio(cdstrategy, NULL, dev, B_WRITE, cdminphys, uio));
}

#if 0        /* XXX Not used */
/*
 * conversion between minute-seconde-frame and logical block address
 * addresses format
 */
static void
lba2msf(u_long lba, u_char *m, u_char *s, u_char *f)
{
        u_long tmp;

        tmp = lba + CD_BLOCK_OFFSET;        /* offset of first logical frame */
        tmp &= 0xffffff;                /* negative lbas use only 24 bits */
        *m = tmp / (CD_SECS * CD_FRAMES);
        tmp %= (CD_SECS * CD_FRAMES);
        *s = tmp / CD_FRAMES;
        *f = tmp % CD_FRAMES;
}
#endif /* XXX Not used */

/*
 * Convert an hour:minute:second:frame address to a logical block address. In
 * theory the number of secs/minute and number of frames/second could be
 * configured differently in the device  as could the block offset but in
 * practice these values are rock solid and most drives don't even allow
 * theses values to be changed.
 */
static uint32_t
hmsf2lba(uint8_t h, uint8_t m, uint8_t s, uint8_t f)
{
        return (((((uint32_t) h * 60 + m) * CD_SECS) + s) * CD_FRAMES + f)
                - CD_BLOCK_OFFSET;
}

static int
cdreadmsaddr(struct cd_softc *cd, struct cd_formatted_toc *toc, int *addr)
{
        struct scsipi_periph *periph = cd->sc_periph;
        int error;
        struct cd_toc_entry *cte;

        error = cd_read_toc(cd, CD_TOC_FORM, 0, 0, toc,
            sizeof(struct ioc_toc_header) + sizeof(struct cd_toc_entry),
            0, 0x40 /* control word for "get MS info" */);

        if (error)
                return (error);

        cte = &toc->entries[0];
        if (periph->periph_quirks & PQUIRK_LITTLETOC) {
                cte->addr.lba = le32toh(cte->addr.lba);
                toc->header.len = le16toh(toc->header.len);
        } else {
                cte->addr.lba = be32toh(cte->addr.lba);
                toc->header.len = be16toh(toc->header.len);
        }

        *addr = (toc->header.len >= 10 && cte->track > 1) ?
                cte->addr.lba : 0;
        return 0;
}

/* synchronise caches code from cd.c, move to scsipi_ioctl.c ? */
static int
cdcachesync(struct scsipi_periph *periph, int flags) {
        struct scsi_synchronize_cache_10 cmd;

        /*
         * Issue a SYNCHRONIZE CACHE. MMC devices have to issue with address 0
         * and length 0 as it can't synchronise parts of the disc per spec.
         * We ignore ILLEGAL REQUEST in the event that the command is not
         * supported by the device, and poll for completion so that we know
         * that the cache has actually been flushed.
         *
         * XXX should we handle the PQUIRK_NOSYNCCACHE ?
         */

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = SCSI_SYNCHRONIZE_CACHE_10;

        return (scsipi_command(periph, (void *)&cmd, sizeof(cmd), 0, 0,
            CDRETRIES, 30000, NULL, flags | XS_CTL_IGNORE_ILLEGAL_REQUEST));
}

static int
do_cdioreadentries(struct cd_softc *cd, struct ioc_read_toc_entry *te,
    struct cd_formatted_toc *toc)
{
        /* READ TOC format 0 command, entries */
        struct ioc_toc_header *th;
        struct cd_toc_entry *cte;
        u_int len = te->data_len;
        int ntracks;
        int error;

        th = &toc->header;

        if (len > sizeof(toc->entries) ||
            len < sizeof(toc->entries[0]))
                return (EINVAL);
        error = cd_read_toc(cd, CD_TOC_FORM, te->address_format,
            te->starting_track, toc,
            sizeof(toc->header) + len,
            0, 0);
        if (error)
                return (error);
        if (te->address_format == CD_LBA_FORMAT)
                for (ntracks =
                    th->ending_track - th->starting_track + 1;
                    ntracks >= 0; ntracks--) {
                        cte = &toc->entries[ntracks];
                        cte->addr_type = CD_LBA_FORMAT;
                        if (cd->sc_periph->periph_quirks & PQUIRK_LITTLETOC)
                                cte->addr.lba = le32toh(cte->addr.lba);
                        else
                                cte->addr.lba = be32toh(cte->addr.lba);
                }
        if (cd->sc_periph->periph_quirks & PQUIRK_LITTLETOC)
                th->len = le16toh(th->len);
        else
                th->len = be16toh(th->len);
        return 0;
}

/*
 * Perform special action on behalf of the user.
 * Knows about the internals of this device
 */
static int
cdioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
{
        struct cd_softc *cd = device_lookup_private(&cd_cd, CDUNIT(dev));
        struct dk_softc *dksc = &cd->sc_dksc;
        struct scsipi_periph *periph = cd->sc_periph;
        struct cd_formatted_toc toc;
        int part = CDPART(dev);
        int error;

        SC_DEBUG(cd->sc_periph, SCSIPI_DB2, ("cdioctl 0x%lx ", cmd));

        /*
         * If the device is not valid, some IOCTLs can still be
         * handled on the raw partition. Check this here.
         */
        if ((periph->periph_flags & PERIPH_MEDIA_LOADED) == 0 &&
            part != RAW_PART)
                return (EIO);

        switch (cmd) {
        case DIOCTUR: {
                /* test unit ready */
                error = scsipi_test_unit_ready(cd->sc_periph, XS_CTL_SILENT);
                *((int*)addr) = (error == 0);
                if (error == ENODEV || error == EIO || error == 0)
                        return 0;                       
                return error;
        }
        case CDIOCPLAYTRACKS: {
                /* PLAY_MSF command */
                struct ioc_play_track *args = addr;

                if ((error = cd_set_pa_immed(cd, 0)) != 0)
                        return (error);
                return (cd_play_tracks(cd, &toc, args->start_track,
                    args->start_index, args->end_track, args->end_index));
        }
        case CDIOCPLAYMSF: {
                /* PLAY_MSF command */
                struct ioc_play_msf *args = addr;

                if ((error = cd_set_pa_immed(cd, 0)) != 0)
                        return (error);
                return (cd_play_msf(cd, args->start_m, args->start_s,
                    args->start_f, args->end_m, args->end_s, args->end_f));
        }
        case CDIOCPLAYBLOCKS: {
                /* PLAY command */
                struct ioc_play_blocks *args = addr;

                if ((error = cd_set_pa_immed(cd, 0)) != 0)
                        return (error);
                return (cd_play(cd, args->blk, args->len));
        }
        case CDIOCREADSUBCHANNEL: {
                /* READ_SUBCHANNEL command */
                struct ioc_read_subchannel *args = addr;
                struct cd_sub_channel_info data;
                u_int len = args->data_len;

                if (len > sizeof(data) ||
                    len < sizeof(struct cd_sub_channel_header))
                        return (EINVAL);
                error = cd_read_subchannel(cd, args->address_format,
                    args->data_format, args->track, &data, len, 0);
                if (error)
                        return (error);
                len = uimin(len, _2btol(data.header.data_len) +
                    sizeof(struct cd_sub_channel_header));
                return (copyout(&data, args->data, len));
        }
        case CDIOCREADSUBCHANNEL_BUF: {
                /* As CDIOCREADSUBCHANNEL, but without a 2nd buffer area */
                struct ioc_read_subchannel_buf *args = addr;
                if (args->req.data_len != sizeof args->info)
                        return EINVAL;
                return cd_read_subchannel(cd, args->req.address_format,
                    args->req.data_format, args->req.track, &args->info,
                    sizeof(args->info), 0);
        }
        case CDIOREADTOCHEADER: {
                /* READ TOC format 0 command, static header */
                if ((error = cd_read_toc(cd, CD_TOC_FORM, 0, 0,
                    &toc, sizeof(toc.header), 0, 0)) != 0)
                        return (error);
                if (cd->sc_periph->periph_quirks & PQUIRK_LITTLETOC)
                        toc.header.len = le16toh(toc.header.len);
                else
                        toc.header.len = be16toh(toc.header.len);
                memcpy(addr, &toc.header, sizeof(toc.header));
                return (0);
        }
        case CDIOREADTOCENTRYS: {
                struct ioc_read_toc_entry *te = addr;
                error = do_cdioreadentries(cd, te, &toc);
                if (error != 0)
                        return error;
                return copyout(toc.entries, te->data, uimin(te->data_len,
                    toc.header.len - (sizeof(toc.header.starting_track)
                        + sizeof(toc.header.ending_track))));
        }
        case CDIOREADTOCENTRIES_BUF: {
                struct ioc_read_toc_entry_buf *te = addr;
                error = do_cdioreadentries(cd, &te->req, &toc);
                if (error != 0)
                        return error;
                memcpy(te->entry, toc.entries, uimin(te->req.data_len,
                    toc.header.len - (sizeof(toc.header.starting_track)
                        + sizeof(toc.header.ending_track))));
                return 0;
        }
        case CDIOREADMSADDR: {
                /* READ TOC format 0 command, length of first track only */
                int sessno = *(int*)addr;

                if (sessno != 0)
                        return (EINVAL);

                return (cdreadmsaddr(cd, &toc, addr));
        }
        case CDIOCSETPATCH: {
                struct ioc_patch *arg = addr;

                return (cd_setchan(cd, arg->patch[0], arg->patch[1],
                    arg->patch[2], arg->patch[3], 0));
        }
        case CDIOCGETVOL: {
                /* MODE SENSE command (AUDIO page) */
                struct ioc_vol *arg = addr;

                return (cd_getvol(cd, arg, 0));
        }
        case CDIOCSETVOL: {
                /* MODE SENSE/MODE SELECT commands (AUDIO page) */
                struct ioc_vol *arg = addr;

                return (cd_setvol(cd, arg, 0));
        }
        case CDIOCSETMONO:
                /* MODE SENSE/MODE SELECT commands (AUDIO page) */
                return (cd_setchan(cd, BOTH_CHANNEL, BOTH_CHANNEL,
                    MUTE_CHANNEL, MUTE_CHANNEL, 0));

        case CDIOCSETSTEREO:
                /* MODE SENSE/MODE SELECT commands (AUDIO page) */
                return (cd_setchan(cd, LEFT_CHANNEL, RIGHT_CHANNEL,
                    MUTE_CHANNEL, MUTE_CHANNEL, 0));

        case CDIOCSETMUTE:
                /* MODE SENSE/MODE SELECT commands (AUDIO page) */
                return (cd_setchan(cd, MUTE_CHANNEL, MUTE_CHANNEL,
                    MUTE_CHANNEL, MUTE_CHANNEL, 0));

        case CDIOCSETLEFT:
                /* MODE SENSE/MODE SELECT commands (AUDIO page) */
                return (cd_setchan(cd, LEFT_CHANNEL, LEFT_CHANNEL,
                    MUTE_CHANNEL, MUTE_CHANNEL, 0));

        case CDIOCSETRIGHT:
                /* MODE SENSE/MODE SELECT commands (AUDIO page) */
                return (cd_setchan(cd, RIGHT_CHANNEL, RIGHT_CHANNEL,
                    MUTE_CHANNEL, MUTE_CHANNEL, 0));

        case CDIOCRESUME:
                /* PAUSE command */
                return (cd_pause(cd, PA_RESUME));
        case CDIOCPAUSE:
                /* PAUSE command */
                return (cd_pause(cd, PA_PAUSE));
        case CDIOCSTART:
                return (scsipi_start(periph, SSS_START, 0));
        case CDIOCSTOP:
                return (scsipi_start(periph, SSS_STOP, 0));
        case CDIOCCLOSE:
                return (scsipi_start(periph, SSS_START|SSS_LOEJ,
                    XS_CTL_IGNORE_NOT_READY | XS_CTL_IGNORE_MEDIA_CHANGE));
        case DIOCEJECT:
                if (*(int *)addr == 0) {
                        int pmask = __BIT(part);
                        /*
                         * Don't force eject: check that we are the only
                         * partition open. If so, unlock it.
                         */
                        if (DK_BUSY(dksc, pmask) == 0) {
                                error = scsipi_prevent(periph, SPAMR_ALLOW,
                                    XS_CTL_IGNORE_NOT_READY);
                                if (error)
                                        return (error);
                        } else {
                                return (EBUSY);
                        }
                }
                /* FALLTHROUGH */
        case CDIOCEJECT: /* FALLTHROUGH */
        case ODIOCEJECT:
                error = scsipi_start(periph, SSS_STOP|SSS_LOEJ, 0);
                if (error == 0)
                        /* Make sure cdclose() will do silent operations */
                        cd->flags |= CDF_EJECTED;
                return error;
        case DIOCCACHESYNC:
                /* SYNCHRONISE CACHES command */
                return (cdcachesync(periph, 0));
        case CDIOCALLOW:
                return (scsipi_prevent(periph, SPAMR_ALLOW, 0));
        case CDIOCPREVENT:
                return (scsipi_prevent(periph, SPAMR_PREVENT_DT, 0));
        case DIOCLOCK:
                return (scsipi_prevent(periph,
                    (*(int *)addr) ? SPAMR_PREVENT_DT : SPAMR_ALLOW, 0));
        case CDIOCSETDEBUG:
                cd->sc_periph->periph_dbflags |= (SCSIPI_DB1 | SCSIPI_DB2);
                return (0);
        case CDIOCCLRDEBUG:
                cd->sc_periph->periph_dbflags &= ~(SCSIPI_DB1 | SCSIPI_DB2);
                return (0);
        case CDIOCRESET:
        case SCIOCRESET:
                return (cd_reset(cd));
        case CDIOCLOADUNLOAD:
                /* LOAD_UNLOAD command */
                return (cd_load_unload(cd, addr));
        case DVD_AUTH:
                /* GPCMD_REPORT_KEY or GPCMD_SEND_KEY command */
                return (dvd_auth(cd, addr));
        case DVD_READ_STRUCT:
                /* GPCMD_READ_DVD_STRUCTURE command */
                return (dvd_read_struct(cd, addr));
        case MMCGETDISCINFO:
                /*
                 * GET_CONFIGURATION, READ_DISCINFO, READ_TRACKINFO,
                 * (READ_TOCf2, READ_CD_CAPACITY and GET_CONFIGURATION) commands
                 */
                return mmc_getdiscinfo(periph, (struct mmc_discinfo *) addr);
        case MMCGETTRACKINFO:
                /* READ TOCf2, READ_CD_CAPACITY and READ_TRACKINFO commands */
                return mmc_gettrackinfo(periph, (struct mmc_trackinfo *) addr);
        case MMCOP:
                /*
                 * CLOSE TRACK/SESSION, RESERVE_TRACK, REPAIR_TRACK,
                 * SYNCHRONISE_CACHE commands
                 */
                return mmc_do_op(periph, (struct mmc_op *) addr);
        case MMCSETUPWRITEPARAMS :
                /* MODE SENSE page 5, MODE_SELECT page 5 commands */
                return mmc_setup_writeparams(periph, (struct mmc_writeparams *) addr);
        default:
                error = dk_ioctl(dksc, dev, cmd, addr, flag, l); 
                if (error == ENOTTY)
                        error = scsipi_do_ioctl(periph, dev, cmd, addr, flag, l);
                return (error);
        }

#ifdef DIAGNOSTIC
        panic("cdioctl: impossible");
#endif
}

static void
cd_label(device_t self, struct disklabel *lp)
{
        struct cd_softc *cd = device_private(self);
        struct scsipi_periph *periph = cd->sc_periph;
        struct cd_formatted_toc toc;
        int lastsession = 0;

        strncpy(lp->d_typename, "optical media", 16);
        lp->d_rpm = 300;
        lp->d_flags |= D_REMOVABLE;

        if ((periph->periph_flags & PERIPH_MEDIA_LOADED) != 0) {
                lp->d_flags |= D_SCSI_MMC;
                (void) cdreadmsaddr(cd, &toc, &lastsession);
        }

        lp->d_partitions[0].p_offset = 0;
        lp->d_partitions[0].p_size = lp->d_secperunit;
        lp->d_partitions[0].p_cdsession = lastsession;
        lp->d_partitions[0].p_fstype = FS_ISO9660;

        lp->d_partitions[RAW_PART].p_offset = 0;
        lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
        lp->d_partitions[RAW_PART].p_fstype = FS_UDF;
}

/*
 * Reading a disc's total capacity is apparently a very difficult issue for the
 * SCSI standardisation group. Every disc type seems to have its own
 * (re)invented size request method and modifiers. The failsafe way of
 * determining the total (max) capacity i.e. not the recorded capacity but the
 * total maximum capacity is to request the info on the last track and
 * calculate the last lba.
 *
 * For ROM drives, we go for the CD recorded capacity. For recordable devices
 * we count.
 */
static int
read_cd_capacity(struct scsipi_periph *periph, uint32_t *blksize, u_long *last_lba)
{
        struct scsipi_read_cd_capacity    cap_cmd;
        /*
         * XXX: see PR 48550 and PR 48754:
         * the ahcisata(4) driver can not deal with unaligned
         * data, so align this "a bit"
         */
        struct scsipi_read_cd_cap_data    cap __aligned(2);
        struct scsipi_read_discinfo       di_cmd;
        struct scsipi_read_discinfo_data  di __aligned(2);
        struct scsipi_read_trackinfo      ti_cmd;
        struct scsipi_read_trackinfo_data ti __aligned(2);
        uint32_t track_start, track_size;
        int error, flags, msb, lsb, last_track;

        /* if the device doesn't grok capacity, return the dummies */
        if (periph->periph_quirks & PQUIRK_NOCAPACITY)
                return 0;

        /* first try read CD capacity for blksize and last recorded lba */
        /* issue the cd capacity request */
        flags = XS_CTL_DATA_IN;
        memset(&cap_cmd, 0, sizeof(cap_cmd));
        memset(&cap, 0, sizeof(cap));
        cap_cmd.opcode = READ_CD_CAPACITY;

        error = scsipi_command(periph,
            (void *) &cap_cmd, sizeof(cap_cmd),
            (void *) &cap,     sizeof(cap),
            CDRETRIES, 30000, NULL, flags);
        if (error)
                return error;

        /* retrieve values and sanity check them */
        *blksize  = _4btol(cap.length);
        *last_lba = _4btol(cap.addr);

        /* blksize is 2048 for CD, but some drives give gibberish */
        if ((*blksize < 512) || ((*blksize & 511) != 0)
            || (*blksize > 16*1024)) {
                if (*blksize > 16*1024)
                        aprint_error("read_cd_capacity: extra large block "
                            "size %u found - limiting to 2kByte\n",
                            *blksize);
                *blksize = 2048;        /* some drives lie ! */
        }

        /* recordables have READ_DISCINFO implemented */
        flags = XS_CTL_DATA_IN | XS_CTL_SILENT;
        memset(&di_cmd, 0, sizeof(di_cmd));
        di_cmd.opcode = READ_DISCINFO;
        _lto2b(READ_DISCINFO_BIGSIZE, di_cmd.data_len);

        error = scsipi_command(periph,
            (void *) &di_cmd,  sizeof(di_cmd),
            (void *) &di,      READ_DISCINFO_BIGSIZE,
            CDRETRIES, 30000, NULL, flags);
        if (error == 0) {
                msb = di.last_track_last_session_msb;
                lsb = di.last_track_last_session_lsb;
                last_track = (msb << 8) | lsb;

                /* request info on last track */
                memset(&ti_cmd, 0, sizeof(ti_cmd));
                ti_cmd.opcode = READ_TRACKINFO;
                ti_cmd.addr_type = 1;                        /* on tracknr */
                _lto4b(last_track, ti_cmd.address);        /* tracknr    */
                _lto2b(sizeof(ti), ti_cmd.data_len);

                error = scsipi_command(periph,
                    (void *) &ti_cmd,  sizeof(ti_cmd),
                    (void *) &ti,      sizeof(ti),
                    CDRETRIES, 30000, NULL, flags);
                if (error == 0) {
                        track_start = _4btol(ti.track_start);
                        track_size  = _4btol(ti.track_size);

                        /* overwrite only with a sane value */
                        if (track_start + track_size >= 100)
                                *last_lba = (u_long) track_start + track_size -1;
                }
        }

        /* sanity check for lba_size */
        if (*last_lba < 100)
                *last_lba = 400000-1;

        return 0;
}

/*
 * Find out from the device what its capacity is
 */
static u_long
cd_size(struct cd_softc *cd, int flags)
{
        uint32_t blksize = 2048;
        u_long last_lba = 0, size;
        int error;

        error = read_cd_capacity(cd->sc_periph, &blksize, &last_lba);
        if (error)
                goto error;

        if (blksize != 2048) {
                if (cd_setblksize(cd) == 0) {
                        blksize = 2048;
                        error = read_cd_capacity(cd->sc_periph,
                            &blksize, &last_lba);
                        if (error)
                                goto error;
                }
        }

        size = last_lba + 1;
        cd->params.blksize     = blksize;
        cd->params.disksize    = size;
        cd->params.disksize512 = ((u_int64_t)cd->params.disksize * blksize) / DEV_BSIZE;

        SC_DEBUG(cd->sc_periph, SCSIPI_DB2,
            ("cd_size: %u %lu\n", blksize, size));

        return size;

error:
        /* something went wrong */
        cd->params.blksize     = 2048;
        cd->params.disksize    = 0;
        cd->params.disksize512 = 0;

        SC_DEBUG(cd->sc_periph, SCSIPI_DB2, ("cd_size: failed\n"));

        return 0;
}

/*
 * Get scsi driver to send a "start playing" command
 */
static int
cd_play(struct cd_softc *cd, int blkno, int nblks)
{
        struct scsipi_play cmd;

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = PLAY;
        _lto4b(blkno, cmd.blk_addr);
        _lto2b(nblks, cmd.xfer_len);

        return (scsipi_command(cd->sc_periph, (void *)&cmd, sizeof(cmd), 0, 0,
            CDRETRIES, 30000, NULL, 0));
}

/*
 * Get scsi driver to send a "start playing" command
 */
static int
cd_play_tracks(struct cd_softc *cd, struct cd_formatted_toc *toc, int strack,
    int sindex, int etrack, int eindex)
{
        int error;

        if (!etrack)
                return (EIO);
        if (strack > etrack)
                return (EINVAL);

        error = cd_load_toc(cd, CD_TOC_FORM, toc, 0);
        if (error)
                return (error);

        if (++etrack > (toc->header.ending_track+1))
                etrack = toc->header.ending_track+1;

        strack -= toc->header.starting_track;
        etrack -= toc->header.starting_track;
        if (strack < 0)
                return (EINVAL);

        return (cd_play_msf(cd, toc->entries[strack].addr.msf.minute,
            toc->entries[strack].addr.msf.second,
            toc->entries[strack].addr.msf.frame,
            toc->entries[etrack].addr.msf.minute,
            toc->entries[etrack].addr.msf.second,
            toc->entries[etrack].addr.msf.frame));
}

/*
 * Get scsi driver to send a "play msf" command
 */
static int
cd_play_msf(struct cd_softc *cd, int startm, int starts, int startf, int endm,
    int ends, int endf)
{
        struct scsipi_play_msf cmd;

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = PLAY_MSF;
        cmd.start_m = startm;
        cmd.start_s = starts;
        cmd.start_f = startf;
        cmd.end_m = endm;
        cmd.end_s = ends;
        cmd.end_f = endf;

        return (scsipi_command(cd->sc_periph, (void *)&cmd, sizeof(cmd), 0, 0,
            CDRETRIES, 30000, NULL, 0));
}

/*
 * Get scsi driver to send a "start up" command
 */
static int
cd_pause(struct cd_softc *cd, int go)
{
        struct scsipi_pause cmd;

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = PAUSE;
        cmd.resume = go & 0xff;

        return (scsipi_command(cd->sc_periph, (void *)&cmd, sizeof(cmd), 0, 0,
            CDRETRIES, 30000, NULL, 0));
}

/*
 * Get scsi driver to send a "RESET" command
 */
static int
cd_reset(struct cd_softc *cd)
{

        return (scsipi_command(cd->sc_periph, 0, 0, 0, 0,
            CDRETRIES, 30000, NULL, XS_CTL_RESET));
}

/*
 * Read subchannel
 */
static int
cd_read_subchannel(struct cd_softc *cd, int mode, int format, int track,
    struct cd_sub_channel_info *data, int len, int flags)
{
        struct scsipi_read_subchannel cmd;

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = READ_SUBCHANNEL;
        if (mode == CD_MSF_FORMAT)
                cmd.byte2 |= CD_MSF;
        cmd.byte3 = SRS_SUBQ;
        cmd.subchan_format = format;
        cmd.track = track;
        _lto2b(len, cmd.data_len);

        return (scsipi_command(cd->sc_periph,
            (void *)&cmd, sizeof(struct scsipi_read_subchannel),
            (void *)data, len,
            CDRETRIES, 30000, NULL, flags | XS_CTL_DATA_IN | XS_CTL_SILENT));
}

/*
 * Read table of contents
 */
static int
cd_read_toc(struct cd_softc *cd, int respf, int mode, int start,
    struct cd_formatted_toc *toc, int len, int flags, int control)
{
        struct scsipi_read_toc cmd;
        int ntoc;

        memset(&cmd, 0, sizeof(cmd));
#if 0
        if (len != sizeof(struct ioc_toc_header))
                ntoc = ((len) - sizeof(struct ioc_toc_header)) /
                    sizeof(struct cd_toc_entry);
        else
#endif
        ntoc = len;
        cmd.opcode = READ_TOC;
        if (mode == CD_MSF_FORMAT)
                cmd.addr_mode |= CD_MSF;
        cmd.resp_format = respf;
        cmd.from_track = start;
        _lto2b(ntoc, cmd.data_len);
        cmd.control = control;

        return (scsipi_command(cd->sc_periph,
            (void *)&cmd, sizeof(cmd), (void *)toc, len, CDRETRIES,
            30000, NULL, flags | XS_CTL_DATA_IN));
}

static int
cd_load_toc(struct cd_softc *cd, int respf, struct cd_formatted_toc *toc, int flags)
{
        int ntracks, len, error;

        if ((error = cd_read_toc(cd, respf, 0, 0, toc, sizeof(toc->header),
            flags, 0)) != 0)
                return (error);

        ntracks = toc->header.ending_track - toc->header.starting_track + 1;
        len = (ntracks + 1) * sizeof(struct cd_toc_entry) +
            sizeof(toc->header);
        if ((error = cd_read_toc(cd, respf, CD_MSF_FORMAT, 0, toc, len,
            flags, 0)) != 0)
                return (error);
        return (0);
}

/*
 * Get the scsi driver to send a full inquiry to the device and use the
 * results to fill out the disk parameter structure.
 */
static int
cd_get_parms(struct cd_softc *cd, int flags)
{

        /*
         * give a number of sectors so that sec * trks * cyls
         * is <= disk_size
         */
        if (cd_size(cd, flags) == 0)
                return (ENXIO);
        return (0);
}

static int
cdsize(dev_t dev)
{

        /* CD-ROMs are read-only. */
        return (-1);
}

static int
cddump(dev_t dev, daddr_t blkno, void *va, size_t size)
{

        /* Not implemented. */
        return (ENXIO);
}

#define        dvd_copy_key(dst, src)                memcpy((dst), (src), sizeof(dvd_key))
#define        dvd_copy_challenge(dst, src)        memcpy((dst), (src), sizeof(dvd_challenge))

static int
dvd_auth(struct cd_softc *cd, dvd_authinfo *a)
{
        struct scsipi_generic cmd;
        u_int8_t bf[20];
        int error;

        memset(cmd.bytes, 0, 15);
        memset(bf, 0, sizeof(bf));

        switch (a->type) {
        case DVD_LU_SEND_AGID:
                cmd.opcode = GPCMD_REPORT_KEY;
                cmd.bytes[8] = 8;
                cmd.bytes[9] = 0 | (0 << 6);
                error = scsipi_command(cd->sc_periph, &cmd, 12, bf, 8,
                    CDRETRIES, 30000, NULL, XS_CTL_DATA_IN);
                if (error)
                        return (error);
                a->lsa.agid = bf[7] >> 6;
                return (0);

        case DVD_LU_SEND_CHALLENGE:
                cmd.opcode = GPCMD_REPORT_KEY;
                cmd.bytes[8] = 16;
                cmd.bytes[9] = 1 | (a->lsc.agid << 6);
                error = scsipi_command(cd->sc_periph, &cmd, 12, bf, 16,
                    CDRETRIES, 30000, NULL, XS_CTL_DATA_IN);
                if (error)
                        return (error);
                dvd_copy_challenge(a->lsc.chal, &bf[4]);
                return (0);

        case DVD_LU_SEND_KEY1:
                cmd.opcode = GPCMD_REPORT_KEY;
                cmd.bytes[8] = 12;
                cmd.bytes[9] = 2 | (a->lsk.agid << 6);
                error = scsipi_command(cd->sc_periph, &cmd, 12, bf, 12,
                    CDRETRIES, 30000, NULL, XS_CTL_DATA_IN);
                if (error)
                        return (error);
                dvd_copy_key(a->lsk.key, &bf[4]);
                return (0);

        case DVD_LU_SEND_TITLE_KEY:
                cmd.opcode = GPCMD_REPORT_KEY;
                _lto4b(a->lstk.lba, &cmd.bytes[1]);
                cmd.bytes[8] = 12;
                cmd.bytes[9] = 4 | (a->lstk.agid << 6);
                error = scsipi_command(cd->sc_periph, &cmd, 12, bf, 12,
                    CDRETRIES, 30000, NULL, XS_CTL_DATA_IN);
                if (error)
                        return (error);
                a->lstk.cpm = (bf[4] >> 7) & 1;
                a->lstk.cp_sec = (bf[4] >> 6) & 1;
                a->lstk.cgms = (bf[4] >> 4) & 3;
                dvd_copy_key(a->lstk.title_key, &bf[5]);
                return (0);

        case DVD_LU_SEND_ASF:
                cmd.opcode = GPCMD_REPORT_KEY;
                cmd.bytes[8] = 8;
                cmd.bytes[9] = 5 | (a->lsasf.agid << 6);
                error = scsipi_command(cd->sc_periph, &cmd, 12, bf, 8,
                    CDRETRIES, 30000, NULL, XS_CTL_DATA_IN);
                if (error)
                        return (error);
                a->lsasf.asf = bf[7] & 1;
                return (0);

        case DVD_HOST_SEND_CHALLENGE:
                cmd.opcode = GPCMD_SEND_KEY;
                cmd.bytes[8] = 16;
                cmd.bytes[9] = 1 | (a->hsc.agid << 6);
                bf[1] = 14;
                dvd_copy_challenge(&bf[4], a->hsc.chal);
                error = scsipi_command(cd->sc_periph, &cmd, 12, bf, 16,
                    CDRETRIES, 30000, NULL, XS_CTL_DATA_OUT);
                if (error)
                        return (error);
                a->type = DVD_LU_SEND_KEY1;
                return (0);

        case DVD_HOST_SEND_KEY2:
                cmd.opcode = GPCMD_SEND_KEY;
                cmd.bytes[8] = 12;
                cmd.bytes[9] = 3 | (a->hsk.agid << 6);
                bf[1] = 10;
                dvd_copy_key(&bf[4], a->hsk.key);
                error = scsipi_command(cd->sc_periph, &cmd, 12, bf, 12,
                    CDRETRIES, 30000, NULL, XS_CTL_DATA_OUT);
                if (error) {
                        a->type = DVD_AUTH_FAILURE;
                        return (error);
                }
                a->type = DVD_AUTH_ESTABLISHED;
                return (0);

        case DVD_INVALIDATE_AGID:
                cmd.opcode = GPCMD_REPORT_KEY;
                cmd.bytes[9] = 0x3f | (a->lsa.agid << 6);
                error = scsipi_command(cd->sc_periph, &cmd, 12, bf, 16,
                    CDRETRIES, 30000, NULL, 0);
                if (error)
                        return (error);
                return (0);

        case DVD_LU_SEND_RPC_STATE:
                cmd.opcode = GPCMD_REPORT_KEY;
                cmd.bytes[8] = 8;
                cmd.bytes[9] = 8 | (0 << 6);
                error = scsipi_command(cd->sc_periph, &cmd, 12, bf, 8,
                    CDRETRIES, 30000, NULL, XS_CTL_DATA_IN);
                if (error)
                        return (error);
                a->lrpcs.type = (bf[4] >> 6) & 3;
                a->lrpcs.vra = (bf[4] >> 3) & 7;
                a->lrpcs.ucca = (bf[4]) & 7;
                a->lrpcs.region_mask = bf[5];
                a->lrpcs.rpc_scheme = bf[6];
                return (0);

        case DVD_HOST_SEND_RPC_STATE:
                cmd.opcode = GPCMD_SEND_KEY;
                cmd.bytes[8] = 8;
                cmd.bytes[9] = 6 | (0 << 6);
                bf[1] = 6;
                bf[4] = a->hrpcs.pdrc;
                error = scsipi_command(cd->sc_periph, &cmd, 12, bf, 8,
                    CDRETRIES, 30000, NULL, XS_CTL_DATA_OUT);
                if (error)
                        return (error);
                return (0);

        default:
                return (ENOTTY);
        }
}

static int
dvd_read_physical(struct cd_softc *cd, dvd_struct *s)
{
        struct scsipi_generic cmd;
        u_int8_t bf[4 + 4 * 20], *bufp;
        int error;
        struct dvd_layer *layer;
        int i;

        memset(cmd.bytes, 0, 15);
        memset(bf, 0, sizeof(bf));
        cmd.opcode = GPCMD_READ_DVD_STRUCTURE;
        cmd.bytes[6] = s->type;
        _lto2b(sizeof(bf), &cmd.bytes[7]);

        cmd.bytes[5] = s->physical.layer_num;
        error = scsipi_command(cd->sc_periph, &cmd, 12, bf, sizeof(bf),
            CDRETRIES, 30000, NULL, XS_CTL_DATA_IN);
        if (error)
                return (error);
        for (i = 0, bufp = &bf[4], layer = &s->physical.layer[0]; i < 4;
             i++, bufp += 20, layer++) {
                memset(layer, 0, sizeof(*layer));
                layer->book_version = bufp[0] & 0xf;
                layer->book_type = bufp[0] >> 4;
                layer->min_rate = bufp[1] & 0xf;
                layer->disc_size = bufp[1] >> 4;
                layer->layer_type = bufp[2] & 0xf;
                layer->track_path = (bufp[2] >> 4) & 1;
                layer->nlayers = (bufp[2] >> 5) & 3;
                layer->track_density = bufp[3] & 0xf;
                layer->linear_density = bufp[3] >> 4;
                layer->start_sector = _4btol(&bufp[4]);
                layer->end_sector = _4btol(&bufp[8]);
                layer->end_sector_l0 = _4btol(&bufp[12]);
                layer->bca = bufp[16] >> 7;
        }
        return (0);
}

static int
dvd_read_copyright(struct cd_softc *cd, dvd_struct *s)
{
        struct scsipi_generic cmd;
        u_int8_t bf[8];
        int error;

        memset(cmd.bytes, 0, 15);
        memset(bf, 0, sizeof(bf));
        cmd.opcode = GPCMD_READ_DVD_STRUCTURE;
        cmd.bytes[6] = s->type;
        _lto2b(sizeof(bf), &cmd.bytes[7]);

        cmd.bytes[5] = s->copyright.layer_num;
        error = scsipi_command(cd->sc_periph, &cmd, 12, bf, sizeof(bf),
            CDRETRIES, 30000, NULL, XS_CTL_DATA_IN);
        if (error)
                return (error);
        s->copyright.cpst = bf[4];
        s->copyright.rmi = bf[5];
        return (0);
}

static int
dvd_read_disckey(struct cd_softc *cd, dvd_struct *s)
{
        struct scsipi_generic cmd;
        u_int8_t *bf;
        int error;

        bf = malloc(4 + 2048, M_TEMP, M_WAITOK|M_ZERO);
        if (bf == NULL)
                return EIO;
        memset(cmd.bytes, 0, 15);
        cmd.opcode = GPCMD_READ_DVD_STRUCTURE;
        cmd.bytes[6] = s->type;
        _lto2b(4 + 2048, &cmd.bytes[7]);

        cmd.bytes[9] = s->disckey.agid << 6;
        error = scsipi_command(cd->sc_periph, &cmd, 12, bf, 4 + 2048,
            CDRETRIES, 30000, NULL, XS_CTL_DATA_IN);
        if (error == 0)
                memcpy(s->disckey.value, &bf[4], 2048);
        free(bf, M_TEMP);
        return error;
}

static int
dvd_read_bca(struct cd_softc *cd, dvd_struct *s)
{
        struct scsipi_generic cmd;
        u_int8_t bf[4 + 188];
        int error;

        memset(cmd.bytes, 0, 15);
        memset(bf, 0, sizeof(bf));
        cmd.opcode = GPCMD_READ_DVD_STRUCTURE;
        cmd.bytes[6] = s->type;
        _lto2b(sizeof(bf), &cmd.bytes[7]);

        error = scsipi_command(cd->sc_periph, &cmd, 12, bf, sizeof(bf),
            CDRETRIES, 30000, NULL, XS_CTL_DATA_IN);
        if (error)
                return (error);
        s->bca.len = _2btol(&bf[0]);
        if (s->bca.len < 12 || s->bca.len > 188)
                return (EIO);
        memcpy(s->bca.value, &bf[4], s->bca.len);
        return (0);
}

static int
dvd_read_manufact(struct cd_softc *cd, dvd_struct *s)
{
        struct scsipi_generic cmd;
        u_int8_t *bf;
        int error;

        bf = malloc(4 + 2048, M_TEMP, M_WAITOK|M_ZERO);
        if (bf == NULL)
                return (EIO);
        memset(cmd.bytes, 0, 15);
        cmd.opcode = GPCMD_READ_DVD_STRUCTURE;
        cmd.bytes[6] = s->type;
        _lto2b(4 + 2048, &cmd.bytes[7]);

        error = scsipi_command(cd->sc_periph, &cmd, 12, bf, 4 + 2048,
            CDRETRIES, 30000, NULL, XS_CTL_DATA_IN);
        if (error == 0) {
                s->manufact.len = _2btol(&bf[0]);
                if (s->manufact.len >= 0 && s->manufact.len <= 2048)
                        memcpy(s->manufact.value, &bf[4], s->manufact.len);
                else
                        error = EIO;
        }
        free(bf, M_TEMP);
        return error;
}

static int
dvd_read_struct(struct cd_softc *cd, dvd_struct *s)
{

        switch (s->type) {
        case DVD_STRUCT_PHYSICAL:
                return (dvd_read_physical(cd, s));
        case DVD_STRUCT_COPYRIGHT:
                return (dvd_read_copyright(cd, s));
        case DVD_STRUCT_DISCKEY:
                return (dvd_read_disckey(cd, s));
        case DVD_STRUCT_BCA:
                return (dvd_read_bca(cd, s));
        case DVD_STRUCT_MANUFACT:
                return (dvd_read_manufact(cd, s));
        default:
                return (EINVAL);
        }
}

static int
cd_mode_sense(struct cd_softc *cd, u_int8_t byte2, void *sense, size_t size,
    int page, int flags, int *big)
{

        if (cd->sc_periph->periph_quirks & PQUIRK_ONLYBIG) {
                *big = 1;
                return scsipi_mode_sense_big(cd->sc_periph, byte2, page, sense,
                    size + sizeof(struct scsi_mode_parameter_header_10),
                    flags, CDRETRIES, 20000);
        } else {
                *big = 0;
                return scsipi_mode_sense(cd->sc_periph, byte2, page, sense,
                    size + sizeof(struct scsi_mode_parameter_header_6),
                    flags, CDRETRIES, 20000);
        }
}

static int
cd_mode_select(struct cd_softc *cd, u_int8_t byte2, void *sense, size_t size,
    int flags, int big)
{

        if (big) {
                struct scsi_mode_parameter_header_10 *header = sense;

                _lto2b(0, header->data_length);
                return scsipi_mode_select_big(cd->sc_periph, byte2, sense,
                    size + sizeof(struct scsi_mode_parameter_header_10),
                    flags, CDRETRIES, 20000);
        } else {
                struct scsi_mode_parameter_header_6 *header = sense;

                header->data_length = 0;
                return scsipi_mode_select(cd->sc_periph, byte2, sense,
                    size + sizeof(struct scsi_mode_parameter_header_6),
                    flags, CDRETRIES, 20000);
        }
}

static int
cd_set_pa_immed(struct cd_softc *cd, int flags)
{
        struct {
                union {
                        struct scsi_mode_parameter_header_6 small;
                        struct scsi_mode_parameter_header_10 big;
                } header;
                struct cd_audio_page page;
        } data;
        int error;
        uint8_t oflags;
        int big, byte2;
        struct cd_audio_page *page;

        byte2 = SMS_DBD;
try_again:
        if ((error = cd_mode_sense(cd, byte2, &data, sizeof(data.page),
            AUDIO_PAGE, flags, &big)) != 0) {
                if (byte2 == SMS_DBD) {
                        /* Device may not understand DBD; retry without */
                        byte2 = 0;
                        goto try_again;
                }
                return (error);
        }

        if (big)
                page = (void *)((u_long)&data.header.big +
                                sizeof data.header.big +
                                _2btol(data.header.big.blk_desc_len));
        else
                page = (void *)((u_long)&data.header.small +
                                sizeof data.header.small +
                                data.header.small.blk_desc_len);

        oflags = page->flags;
        page->flags &= ~CD_PA_SOTC;
        page->flags |= CD_PA_IMMED;
        if (oflags == page->flags)
                return (0);

        return (cd_mode_select(cd, SMS_PF, &data,
            sizeof(struct scsi_mode_page_header) + page->pg_length,
            flags, big));
}

static int
cd_setchan(struct cd_softc *cd, int p0, int p1, int p2, int p3, int flags)
{
        struct {
                union {
                        struct scsi_mode_parameter_header_6 small;
                        struct scsi_mode_parameter_header_10 big;
                } header;
                struct cd_audio_page page;
        } data;
        int error;
        int big, byte2;
        struct cd_audio_page *page;

        byte2 = SMS_DBD;
try_again:
        if ((error = cd_mode_sense(cd, byte2, &data, sizeof(data.page),
            AUDIO_PAGE, flags, &big)) != 0) {
                if (byte2 == SMS_DBD) {
                        /* Device may not understand DBD; retry without */
                        byte2 = 0;
                        goto try_again;
                }
                return (error);
        }

        if (big)
                page = (void *)((u_long)&data.header.big +
                                sizeof data.header.big +
                                _2btol(data.header.big.blk_desc_len));
        else
                page = (void *)((u_long)&data.header.small +
                                sizeof data.header.small +
                                data.header.small.blk_desc_len);

        page->port[0].channels = p0;
        page->port[1].channels = p1;
        page->port[2].channels = p2;
        page->port[3].channels = p3;

        return (cd_mode_select(cd, SMS_PF, &data,
            sizeof(struct scsi_mode_page_header) + page->pg_length,
            flags, big));
}

static int
cd_getvol(struct cd_softc *cd, struct ioc_vol *arg, int flags)
{
        struct {
                union {
                        struct scsi_mode_parameter_header_6 small;
                        struct scsi_mode_parameter_header_10 big;
                } header;
                struct cd_audio_page page;
        } data;
        int error;
        int big, byte2;
        struct cd_audio_page *page;

        byte2 = SMS_DBD;
try_again:
        if ((error = cd_mode_sense(cd, byte2, &data, sizeof(data.page),
            AUDIO_PAGE, flags, &big)) != 0) {
                if (byte2 == SMS_DBD) {
                        /* Device may not understand DBD; retry without */
                        byte2 = 0;
                        goto try_again;
                }
                return (error);
        }

        if (big)
                page = (void *)((u_long)&data.header.big +
                                sizeof data.header.big +
                                _2btol(data.header.big.blk_desc_len));
        else
                page = (void *)((u_long)&data.header.small +
                                sizeof data.header.small +
                                data.header.small.blk_desc_len);

        arg->vol[0] = page->port[0].volume;
        arg->vol[1] = page->port[1].volume;
        arg->vol[2] = page->port[2].volume;
        arg->vol[3] = page->port[3].volume;

        return (0);
}

static int
cd_setvol(struct cd_softc *cd, const struct ioc_vol *arg, int flags)
{
        struct {
                union {
                        struct scsi_mode_parameter_header_6 small;
                        struct scsi_mode_parameter_header_10 big;
                } header;
                struct cd_audio_page page;
        } data, mask;
        int error;
        int big, byte2;
        struct cd_audio_page *page, *page2;

        byte2 = SMS_DBD;
try_again:
        if ((error = cd_mode_sense(cd, byte2, &data, sizeof(data.page),
            AUDIO_PAGE, flags, &big)) != 0) {
                if (byte2 == SMS_DBD) {
                        /* Device may not understand DBD; retry without */
                        byte2 = 0;
                        goto try_again;
                }
                return (error);
        }
        if ((error = cd_mode_sense(cd, byte2, &mask, sizeof(mask.page),
            AUDIO_PAGE|SMS_PCTRL_CHANGEABLE, flags, &big)) != 0)
                return (error);

        if (big) {
                page = (void *)((u_long)&data.header.big +
                                sizeof data.header.big +
                                _2btol(data.header.big.blk_desc_len));
                page2 = (void *)((u_long)&mask.header.big +
                                sizeof mask.header.big +
                                _2btol(mask.header.big.blk_desc_len));
        } else {
                page = (void *)((u_long)&data.header.small +
                                sizeof data.header.small +
                                data.header.small.blk_desc_len);
                page2 = (void *)((u_long)&mask.header.small +
                                sizeof mask.header.small +
                                mask.header.small.blk_desc_len);
        }

        page->port[0].volume = arg->vol[0] & page2->port[0].volume;
        page->port[1].volume = arg->vol[1] & page2->port[1].volume;
        page->port[2].volume = arg->vol[2] & page2->port[2].volume;
        page->port[3].volume = arg->vol[3] & page2->port[3].volume;

        page->port[0].channels = CHANNEL_0;
        page->port[1].channels = CHANNEL_1;

        return (cd_mode_select(cd, SMS_PF, &data,
            sizeof(struct scsi_mode_page_header) + page->pg_length,
            flags, big));
}

static int
cd_load_unload(struct cd_softc *cd, struct ioc_load_unload *args)
{
        struct scsipi_load_unload cmd;

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = LOAD_UNLOAD;
        cmd.options = args->options;    /* ioctl uses MMC values */
        cmd.slot = args->slot;

        return (scsipi_command(cd->sc_periph, (void *)&cmd, sizeof(cmd), 0, 0,
            CDRETRIES, 200000, NULL, 0));
}

static int
cd_setblksize(struct cd_softc *cd)
{
        struct {
                union {
                        struct scsi_mode_parameter_header_6 small;
                        struct scsi_mode_parameter_header_10 big;
                } header;
                struct scsi_general_block_descriptor blk_desc;
        } data;
        int error;
        int big, bsize;
        struct scsi_general_block_descriptor *bdesc;

        if ((error = cd_mode_sense(cd, 0, &data, sizeof(data.blk_desc), 0, 0,
            &big)) != 0)
                return (error);

        if (big) {
                bdesc = (void *)(&data.header.big + 1);
                bsize = _2btol(data.header.big.blk_desc_len);
        } else {
                bdesc = (void *)(&data.header.small + 1);
                bsize = data.header.small.blk_desc_len;
        }

        if (bsize == 0) {
printf("cd_setblksize: trying to change bsize, but no blk_desc\n");
                return (EINVAL);
        }
        if (_3btol(bdesc->blklen) == 2048) {
printf("cd_setblksize: trying to change bsize, but blk_desc is correct\n");
                return (EINVAL);
        }

        _lto3b(2048, bdesc->blklen);

        return (cd_mode_select(cd, SMS_PF, &data, sizeof(data.blk_desc), 0,
            big));
}


static int
mmc_profile2class(uint16_t mmc_profile)
{
        switch (mmc_profile) {
        case 0x01 : /* SCSI discs */
        case 0x02 :
                /* this can't happen really, cd.c wouldn't have matched */
                return MMC_CLASS_DISC;
        case 0x03 : /* Magneto Optical with sector erase */
        case 0x04 : /* Magneto Optical write once        */
        case 0x05 : /* Advance Storage Magneto Optical   */
                return MMC_CLASS_MO;
        case 0x00 : /* Unknown MMC profile, can also be CD-ROM */
        case 0x08 : /* CD-ROM  */
        case 0x09 : /* CD-R    */
        case 0x0a : /* CD-RW   */
                return MMC_CLASS_CD;
        case 0x10 : /* DVD-ROM */
        case 0x11 : /* DVD-R   */
        case 0x12 : /* DVD-RAM */
        case 0x13 : /* DVD-RW restricted overwrite */
        case 0x14 : /* DVD-RW sequential */
        case 0x1a : /* DVD+RW  */
        case 0x1b : /* DVD+R   */
        case 0x2a : /* DVD+RW Dual layer */
        case 0x2b : /* DVD+R Dual layer */
        case 0x50 : /* HD DVD-ROM */
        case 0x51 : /* HD DVD-R   */
        case 0x52 : /* HD DVD-RW; DVD-RAM like */
                return MMC_CLASS_DVD;
        case 0x40 : /* BD-ROM  */
        case 0x41 : /* BD-R Sequential recording (SRM) */
        case 0x42 : /* BD-R Random Recording (RRM) */
        case 0x43 : /* BD-RE */
                return MMC_CLASS_BD;
        }
        return MMC_CLASS_UNKN;
}


/*
 * Drive/media combination is reflected in a series of features that can
 * either be current or dormant. We try to make sense out of them to create a
 * set of easy to use flags that abstract the device/media capabilities.
 */

static void
mmc_process_feature(struct mmc_discinfo *mmc_discinfo,
                    uint16_t feature, int cur, uint8_t *rpos)
{
        uint32_t blockingnr;
        uint64_t flags;

        if (cur == 1) {
                flags = mmc_discinfo->mmc_cur;
        } else {
                flags = mmc_discinfo->mmc_cap;
        }

        switch (feature) {
        case 0x0010 :        /* random readable feature */
                blockingnr  =  rpos[5] | (rpos[4] << 8);
                if (blockingnr > 1)
                        flags |= MMC_CAP_PACKET;

                /* RW error page */
                break;
        case 0x0020 :        /* random writable feature */
                flags |= MMC_CAP_RECORDABLE;
                flags |= MMC_CAP_REWRITABLE;
                blockingnr  =  rpos[9] | (rpos[8] << 8);
                if (blockingnr > 1)
                        flags |= MMC_CAP_PACKET;
                break;
        case 0x0021 :        /* incremental streaming write feature */
                flags |= MMC_CAP_RECORDABLE;
                flags |= MMC_CAP_SEQUENTIAL;
                if (cur)
                        mmc_discinfo->link_block_penalty = rpos[4];
                if (rpos[2] & 1)
                        flags |= MMC_CAP_ZEROLINKBLK;
                break;
        case 0x0022 :         /* (obsolete) erase support feature */
                flags |= MMC_CAP_RECORDABLE;
                flags |= MMC_CAP_ERASABLE;
                break;
        case 0x0023 :        /* formatting media support feature */
                flags |= MMC_CAP_RECORDABLE;
                flags |= MMC_CAP_FORMATTABLE;
                break;
        case 0x0024 :        /* hardware assised defect management feature */
                flags |= MMC_CAP_HW_DEFECTFREE;
                break;
        case 0x0025 :         /* write once */
                flags |= MMC_CAP_RECORDABLE;
                break;
        case 0x0026 :        /* restricted overwrite feature */
                flags |= MMC_CAP_RECORDABLE;
                flags |= MMC_CAP_REWRITABLE;
                flags |= MMC_CAP_STRICTOVERWRITE;
                break;
        case 0x0028 :        /* MRW formatted media support feature */
                flags |= MMC_CAP_MRW;
                break;
        case 0x002b :        /* DVD+R read (and opt. write) support */
                flags |= MMC_CAP_SEQUENTIAL;
                if (rpos[0] & 1) /* write support */
                        flags |= MMC_CAP_RECORDABLE;
                break;
        case 0x002c :        /* rigid restricted overwrite feature */
                flags |= MMC_CAP_RECORDABLE;
                flags |= MMC_CAP_REWRITABLE;
                flags |= MMC_CAP_STRICTOVERWRITE;
                if (rpos[0] & 1) /* blank bit */
                        flags |= MMC_CAP_BLANKABLE;
                break;
        case 0x002d :        /* track at once recording feature */
                flags |= MMC_CAP_RECORDABLE;
                flags |= MMC_CAP_SEQUENTIAL;
                break;
        case 0x002f :        /* DVD-R/-RW write feature */
                flags |= MMC_CAP_RECORDABLE;
                if (rpos[0] & 2) /* DVD-RW bit */
                        flags |= MMC_CAP_BLANKABLE;
                break;
        case 0x0038 :        /* BD-R SRM with pseudo overwrite */
                flags |= MMC_CAP_PSEUDOOVERWRITE;
                break;
        default :
                /* ignore */
                break;
        }

        if (cur == 1) {
                mmc_discinfo->mmc_cur = flags;
        } else {
                mmc_discinfo->mmc_cap = flags;
        }
}

static int
mmc_getdiscinfo_cdrom(struct scsipi_periph *periph,
                      struct mmc_discinfo *mmc_discinfo)
{
        struct scsipi_read_toc      gtoc_cmd;
        struct scsipi_toc_header   *toc_hdr;
        struct scsipi_toc_msinfo   *toc_msinfo;
        const uint32_t buffer_size = 1024;
        uint32_t req_size;
        uint8_t  *buffer;
        int error, flags;

        buffer = malloc(buffer_size, M_TEMP, M_WAITOK);
        /*
         * Fabricate mmc_discinfo for CD-ROM. Some values are really `dont
         * care' but others might be of interest to programs.
         */

        mmc_discinfo->disc_state         = MMC_STATE_FULL;
        mmc_discinfo->last_session_state = MMC_STATE_FULL;
        mmc_discinfo->bg_format_state    = MMC_BGFSTATE_COMPLETED;
        mmc_discinfo->link_block_penalty = 7;        /* not relevant */

        /* get number of sessions and first tracknr in last session */
        flags = XS_CTL_DATA_IN;
        memset(&gtoc_cmd, 0, sizeof(gtoc_cmd));
        gtoc_cmd.opcode      = READ_TOC;
        gtoc_cmd.addr_mode   = CD_MSF;                /* not relevant        */
        gtoc_cmd.resp_format = CD_TOC_MSINFO;        /* multisession info   */
        gtoc_cmd.from_track  = 0;                /* reserved, must be 0 */
        req_size = sizeof(*toc_hdr) + sizeof(*toc_msinfo);
        _lto2b(req_size, gtoc_cmd.data_len);

        error = scsipi_command(periph,
                (void *)&gtoc_cmd, sizeof(gtoc_cmd),
                (void *)buffer,    req_size,
                CDRETRIES, 30000, NULL, flags);
        if (error)
                goto out;
        toc_hdr    = (struct scsipi_toc_header *)  buffer;
        toc_msinfo = (struct scsipi_toc_msinfo *) (buffer + 4);
        mmc_discinfo->num_sessions = toc_hdr->last - toc_hdr->first + 1;
        mmc_discinfo->first_track  = toc_hdr->first;
        mmc_discinfo->first_track_last_session = toc_msinfo->tracknr;

        /* get last track of last session */
        flags = XS_CTL_DATA_IN;
        gtoc_cmd.resp_format  = CD_TOC_FORM;        /* formatted toc */
        req_size = sizeof(*toc_hdr);
        _lto2b(req_size, gtoc_cmd.data_len);

        error = scsipi_command(periph,
                (void *)&gtoc_cmd, sizeof(gtoc_cmd),
                (void *)buffer,    req_size,
                CDRETRIES, 30000, NULL, flags);
        if (error)
                goto out;
        toc_hdr    = (struct scsipi_toc_header *) buffer;
        mmc_discinfo->last_track_last_session = toc_hdr->last;
        mmc_discinfo->num_tracks = toc_hdr->last - toc_hdr->first + 1;

        /* TODO how to handle disc_barcode and disc_id */
        /* done */

out:
        free(buffer, M_TEMP);
        return error;
}

static int
mmc_getdiscinfo_dvdrom(struct scsipi_periph *periph,
                       struct mmc_discinfo *mmc_discinfo)
{
        struct scsipi_read_toc   gtoc_cmd;
        struct scsipi_toc_header toc_hdr;
        uint32_t req_size;
        int error, flags;

        /*
         * Fabricate mmc_discinfo for DVD-ROM. Some values are really `dont
         * care' but others might be of interest to programs.
         */

        mmc_discinfo->disc_state         = MMC_STATE_FULL;
        mmc_discinfo->last_session_state = MMC_STATE_FULL;
        mmc_discinfo->bg_format_state    = MMC_BGFSTATE_COMPLETED;
        mmc_discinfo->link_block_penalty = 16;        /* not relevant */

        /* get number of sessions and first tracknr in last session */
        flags = XS_CTL_DATA_IN;
        memset(&gtoc_cmd, 0, sizeof(gtoc_cmd));
        gtoc_cmd.opcode      = READ_TOC;
        gtoc_cmd.addr_mode   = 0;                /* LBA                 */
        gtoc_cmd.resp_format = CD_TOC_FORM;        /* multisession info   */
        gtoc_cmd.from_track  = 1;                /* first track         */
        req_size = sizeof(toc_hdr);
        _lto2b(req_size, gtoc_cmd.data_len);

        error = scsipi_command(periph,
                (void *)&gtoc_cmd, sizeof(gtoc_cmd),
                (void *)&toc_hdr,  req_size,
                CDRETRIES, 30000, NULL, flags);
        if (error)
                return error;

        /* DVD-ROM squashes the track/session space */
        mmc_discinfo->num_sessions = toc_hdr.last - toc_hdr.first + 1;
        mmc_discinfo->num_tracks   = mmc_discinfo->num_sessions;
        mmc_discinfo->first_track  = toc_hdr.first;
        mmc_discinfo->first_track_last_session = toc_hdr.last;
        mmc_discinfo->last_track_last_session  = toc_hdr.last;

        /* TODO how to handle disc_barcode and disc_id */
        /* done */
        return 0;
}

static int
mmc_getdiscinfo(struct scsipi_periph *periph,
                struct mmc_discinfo *mmc_discinfo)
{
        struct scsipi_get_configuration   gc_cmd;
        struct scsipi_get_conf_data      *gc;
        struct scsipi_get_conf_feature   *gcf;
        struct scsipi_read_discinfo       di_cmd;
        struct scsipi_read_discinfo_data  di __aligned(2);
        const uint32_t buffer_size = 0x200; /* XXX RPZ USB3 SCSI size issue */
        uint32_t pos;
        u_long   last_lba = 0;
        uint8_t  *buffer, *fpos;
        int feature, last_feature, features_len, feature_cur, feature_len;
        int lsb, msb, error, flags;

        buffer = malloc(buffer_size, M_TEMP, M_WAITOK);

        /* initialise structure */
        memset(mmc_discinfo, 0, sizeof(struct mmc_discinfo));
        mmc_discinfo->mmc_profile = 0x00;        /* unknown */
        mmc_discinfo->mmc_class   = MMC_CLASS_UNKN;
        mmc_discinfo->mmc_cur     = 0;
        mmc_discinfo->mmc_cap     = 0;
        mmc_discinfo->link_block_penalty = 0;

        /* determine mmc profile and class */
        flags = XS_CTL_DATA_IN;
        memset(&gc_cmd, 0, sizeof(gc_cmd));
        gc_cmd.opcode = GET_CONFIGURATION;
        _lto2b(GET_CONF_NO_FEATURES_LEN, gc_cmd.data_len);

        gc = (struct scsipi_get_conf_data *) buffer;

        error = scsipi_command(periph,
                (void *)&gc_cmd, sizeof(gc_cmd),
                (void *) gc,     GET_CONF_NO_FEATURES_LEN,
                CDRETRIES, 30000, NULL, flags);
        if (error)
                goto out;

        mmc_discinfo->mmc_profile = _2btol(gc->mmc_profile);
        mmc_discinfo->mmc_class = mmc_profile2class(mmc_discinfo->mmc_profile);

        /* assume 2048 sector size unless told otherwise */
        mmc_discinfo->sector_size = 2048;
        error = read_cd_capacity(periph, &mmc_discinfo->sector_size, &last_lba);
        if (error)
                goto out;

        mmc_discinfo->last_possible_lba = (uint32_t) last_lba;

        /* Read in all features to determine device capabilities */
        last_feature = feature = 0;
        do {
                /* determine mmc profile and class */
                flags = XS_CTL_DATA_IN;
                memset(&gc_cmd, 0, sizeof(gc_cmd));
                gc_cmd.opcode = GET_CONFIGURATION;
                _lto2b(last_feature, gc_cmd.start_at_feature);
                _lto2b(buffer_size, gc_cmd.data_len);
                memset(gc, 0, buffer_size);

                error = scsipi_command(periph,
                        (void *)&gc_cmd, sizeof(gc_cmd),
                        (void *) gc,     buffer_size,
                        CDRETRIES, 30000, NULL, flags);
                if (error) {
                        /* ieeek... break out of loop... i dunno what to do */
                        break;
                }

                features_len = _4btol(gc->data_len);
                if (features_len < 4 || features_len > buffer_size)
                        break;

                pos  = 0;
                fpos = &gc->feature_desc[0];
                while (pos < features_len - 4) {
                        gcf = (struct scsipi_get_conf_feature *) fpos;

                        feature     = _2btol(gcf->featurecode);
                        feature_cur = gcf->flags & 1;
                        feature_len = gcf->additional_length;

                        mmc_process_feature(mmc_discinfo,
                                            feature, feature_cur,
                                            gcf->feature_dependent);

                        last_feature = MAX(last_feature, feature);
#ifdef DIAGNOSTIC
                        /* assert((feature_len & 3) == 0); */
                        if ((feature_len & 3) != 0) {
                                printf("feature %d having length %d\n",
                                        feature, feature_len);
                        }
#endif

                        pos  += 4 + feature_len;
                        fpos += 4 + feature_len;
                }
                /* unlikely to ever grow past our 1kb buffer */
        } while (features_len >= 0xffff);

        /*
         * Fixup CD-RW drives that are on crack.
         *
         * Some drives report the capability to incrementally write
         * sequentially on CD-R(W) media...  nice, but this should not be
         * active for a fixed packet formatted CD-RW media. Other report the
         * ability of HW_DEFECTFREE even when the media is NOT MRW
         * formatted....
         */
        if (mmc_discinfo->mmc_profile == 0x0a) {
                if ((mmc_discinfo->mmc_cur & MMC_CAP_SEQUENTIAL) == 0)
                        mmc_discinfo->mmc_cur |= MMC_CAP_STRICTOVERWRITE;
                if (mmc_discinfo->mmc_cur & MMC_CAP_STRICTOVERWRITE)
                        mmc_discinfo->mmc_cur &= ~MMC_CAP_SEQUENTIAL;
                if (mmc_discinfo->mmc_cur & MMC_CAP_MRW) {
                        mmc_discinfo->mmc_cur &= ~MMC_CAP_SEQUENTIAL;
                        mmc_discinfo->mmc_cur &= ~MMC_CAP_STRICTOVERWRITE;
                } else {
                        mmc_discinfo->mmc_cur &= ~MMC_CAP_HW_DEFECTFREE;
                }
        }
        if (mmc_discinfo->mmc_profile == 0x09) {
                mmc_discinfo->mmc_cur &= ~MMC_CAP_REWRITABLE;
        }

#ifdef DEBUG
        printf("CD mmc %d, mmc_cur 0x%"PRIx64", mmc_cap 0x%"PRIx64"\n",
                mmc_discinfo->mmc_profile,
                 mmc_discinfo->mmc_cur, mmc_discinfo->mmc_cap);
#endif

        /* read in disc state and number of sessions and tracks */
        flags = XS_CTL_DATA_IN | XS_CTL_SILENT;
        memset(&di_cmd, 0, sizeof(di_cmd));
        di_cmd.opcode = READ_DISCINFO;
        di_cmd.data_len[1] = READ_DISCINFO_BIGSIZE;

        error = scsipi_command(periph,
                (void *)&di_cmd, sizeof(di_cmd),
                (void *)&di,     READ_DISCINFO_BIGSIZE,
                CDRETRIES, 30000, NULL, flags);

        if (error) {
                /* discinfo call failed, emulate for cd-rom/dvd-rom */
                if (mmc_discinfo->mmc_profile == 0x08) /* CD-ROM */
                        error = mmc_getdiscinfo_cdrom(periph, mmc_discinfo);
                else if (mmc_discinfo->mmc_profile == 0x10) /* DVD-ROM */
                        error = mmc_getdiscinfo_dvdrom(periph, mmc_discinfo);
                else /* CD/DVD drive is violating specs */
                        error = EIO;
                goto out;
        }

        /* call went OK */
        mmc_discinfo->disc_state         =  di.disc_state & 3;
        mmc_discinfo->last_session_state = (di.disc_state >> 2) & 3;
        mmc_discinfo->bg_format_state    = (di.disc_state2 & 3);

        lsb = di.num_sessions_lsb;
        msb = di.num_sessions_msb;
        mmc_discinfo->num_sessions = lsb | (msb << 8);

        mmc_discinfo->first_track = di.first_track;
        lsb = di.first_track_last_session_lsb;
        msb = di.first_track_last_session_msb;
        mmc_discinfo->first_track_last_session = lsb | (msb << 8);
        lsb = di.last_track_last_session_lsb;
        msb = di.last_track_last_session_msb;
        mmc_discinfo->last_track_last_session  = lsb | (msb << 8);

        mmc_discinfo->num_tracks = mmc_discinfo->last_track_last_session -
                mmc_discinfo->first_track + 1;

        /* set misc. flags and parameters from this disc info */
        if (di.disc_state  &  16)
                mmc_discinfo->mmc_cur |= MMC_CAP_BLANKABLE;

        if (di.disc_state2 & 128) {
                mmc_discinfo->disc_id = _4btol(di.discid);
                mmc_discinfo->disc_flags |= MMC_DFLAGS_DISCIDVALID;
        }
        if (di.disc_state2 &  64) {
                mmc_discinfo->disc_barcode = _8btol(di.disc_bar_code);
                mmc_discinfo->disc_flags |= MMC_DFLAGS_BARCODEVALID;
        }
        if (di.disc_state2 &  32)
                mmc_discinfo->disc_flags |= MMC_DFLAGS_UNRESTRICTED;

        if (di.disc_state2 &  16) {
                mmc_discinfo->application_code = di.application_code;
                mmc_discinfo->disc_flags |= MMC_DFLAGS_APPCODEVALID;
        }

        /* done */

out:
        free(buffer, M_TEMP);
        return error;
}

static int
mmc_gettrackinfo_cdrom(struct scsipi_periph *periph,
                       struct mmc_trackinfo *trackinfo)
{
        struct scsipi_read_toc            gtoc_cmd;
        struct scsipi_toc_header         *toc_hdr;
        struct scsipi_toc_rawtoc         *rawtoc;
        uint32_t track_start, track_size;
        uint32_t last_recorded, next_writable;
        uint32_t lba, next_track_start, lead_out;
        const uint32_t buffer_size = 4 * 1024;        /* worst case TOC estimate */
        uint8_t *buffer;
        uint8_t track_sessionnr, sessionnr, adr, tno, point;
        uint8_t control, tmin, tsec, tframe, pmin, psec, pframe;
        int size, req_size;
        int error, flags;

        buffer = malloc(buffer_size, M_TEMP, M_WAITOK);

        /*
         * Emulate read trackinfo for CD-ROM using the raw-TOC.
         *
         * Not all information is present and this presents a problem.  Track
         * starts are known for each track but other values are deducted.
         *
         * For a complete overview of `magic' values used here, see the
         * SCSI/ATAPI MMC documentation. Note that the `magic' values have no
         * names, they are specified as numbers.
         */

        /* get raw toc to process, first header to check size */
        flags = XS_CTL_DATA_IN | XS_CTL_SILENT;
        memset(&gtoc_cmd, 0, sizeof(gtoc_cmd));
        gtoc_cmd.opcode      = READ_TOC;
        gtoc_cmd.addr_mode   = CD_MSF;                /* not relevant     */
        gtoc_cmd.resp_format = CD_TOC_RAW;        /* raw toc          */
        gtoc_cmd.from_track  = 1;                /* first session    */
        req_size = sizeof(*toc_hdr);
        _lto2b(req_size, gtoc_cmd.data_len);

        error = scsipi_command(periph,
                (void *)&gtoc_cmd, sizeof(gtoc_cmd),
                (void *)buffer,    req_size,
                CDRETRIES, 30000, NULL, flags);
        if (error)
                goto out;
        toc_hdr = (struct scsipi_toc_header *) buffer;
        if (_2btol(toc_hdr->length) > buffer_size - 2) {
#ifdef DIAGNOSTIC
                printf("increase buffersize in mmc_readtrackinfo_cdrom\n");
#endif
                error = ENOBUFS;
                goto out;
        }

        /* read in complete raw toc */
        req_size = _2btol(toc_hdr->length);
        req_size = 2*((req_size + 1) / 2);        /* for ATAPI */
        _lto2b(req_size, gtoc_cmd.data_len);

        error = scsipi_command(periph,
                (void *)&gtoc_cmd, sizeof(gtoc_cmd),
                (void *)buffer,    req_size,
                CDRETRIES, 30000, NULL, flags);
        if (error)
                goto out;

        toc_hdr = (struct scsipi_toc_header *) buffer;
        rawtoc  = (struct scsipi_toc_rawtoc *) (buffer + 4);

        track_start      = 0;
        track_size       = 0;
        last_recorded    = 0;
        next_writable    = 0;
        flags            = 0;

        next_track_start = 0;
        track_sessionnr  = MAXTRACK;        /* by definition */
        lead_out         = 0;

        size = req_size - sizeof(struct scsipi_toc_header) + 1;
        while (size > 0) {
                /* get track start and session end */
                tno       = rawtoc->tno;
                sessionnr = rawtoc->sessionnr;
                adr       = rawtoc->adrcontrol >> 4;
                control   = rawtoc->adrcontrol & 0xf;
                point     = rawtoc->point;
                tmin      = rawtoc->min;
                tsec      = rawtoc->sec;
                tframe    = rawtoc->frame;
                pmin      = rawtoc->pmin;
                psec      = rawtoc->psec;
                pframe    = rawtoc->pframe;

                if (tno == 0 && sessionnr && adr == 1) {
                        lba = hmsf2lba(0, pmin, psec, pframe);
                        if (point == trackinfo->tracknr) {
                                track_start = lba;
                                track_sessionnr = sessionnr;
                        }
                        if (point == trackinfo->tracknr + 1) {
                                /* estimate size */
                                track_size = lba - track_start;
                                next_track_start = lba;
                        }
                        if (point == 0xa2) {
                                lead_out = lba;
                        }
                        if (point <= 0x63) {
                                /* CD's ok, DVD are glued */
                                /* last_tracknr = point; */
                        }
                        if (sessionnr == track_sessionnr) {
                                last_recorded = lead_out;
                        }
                }
                if (tno == 0 && sessionnr && adr == 5) {
                        lba = hmsf2lba(0, tmin, tsec, tframe);
                        if (sessionnr == track_sessionnr) {
                                next_writable = lba;
                        }
                }

                if ((control & (3<<2)) == 4)                /* 01xxb */
                        flags |= MMC_TRACKINFO_DATA;
                if ((control & (1<<2)) == 0) {                /* x0xxb */
                        flags |= MMC_TRACKINFO_AUDIO;
                        if (control & 1)                /* xxx1b */
                                flags |= MMC_TRACKINFO_PRE_EMPH;
                }

                rawtoc++;
                size -= sizeof(struct scsipi_toc_rawtoc);
        }

        /* process found values; some voodoo */
        /* if no tracksize tracknr is the last of the disc */
        if ((track_size == 0) && last_recorded) {
                track_size = last_recorded - track_start;
        }
        /* if last_recorded < tracksize, tracksize is overestimated */
        if (last_recorded) {
                if (last_recorded - track_start <= track_size) {
                        track_size = last_recorded - track_start;
                        flags |= MMC_TRACKINFO_LRA_VALID;
                }
        }
        /* check if its a the last track of the sector */
        if (next_writable) {
                if (next_track_start > next_writable)
                        flags |= MMC_TRACKINFO_NWA_VALID;
        }

        /* no flag set -> no values */
        if ((flags & MMC_TRACKINFO_LRA_VALID) == 0)
                last_recorded = 0;
        if ((flags & MMC_TRACKINFO_NWA_VALID) == 0)
                next_writable = 0;

        /* fill in */
        /* trackinfo->tracknr preserved */
        trackinfo->sessionnr  = track_sessionnr;
        trackinfo->track_mode = 7;        /* data, incremental  */
        trackinfo->data_mode  = 8;        /* 2048 bytes mode1   */

        trackinfo->flags = flags;
        trackinfo->track_start   = track_start;
        trackinfo->next_writable = next_writable;
        trackinfo->free_blocks   = 0;
        trackinfo->packet_size   = 1;
        trackinfo->track_size    = track_size;
        trackinfo->last_recorded = last_recorded;

out:
        free(buffer, M_TEMP);
        return error;

}

static int
mmc_gettrackinfo_dvdrom(struct scsipi_periph *periph, 
                        struct mmc_trackinfo *trackinfo)
{
        struct scsipi_read_toc            gtoc_cmd;
        struct scsipi_toc_header         *toc_hdr;
        struct scsipi_toc_formatted      *toc;
        uint32_t tracknr, track_start, track_size;
        uint32_t lba, lead_out;
        const uint32_t buffer_size = 4 * 1024;        /* worst case TOC estimate */
        uint8_t *buffer;
        uint8_t control, last_tracknr;
        int size, req_size;
        int error, flags;

        
        buffer = malloc(buffer_size, M_TEMP, M_WAITOK);
        /*
         * Emulate read trackinfo for DVD-ROM. We can't use the raw-TOC as the
         * CD-ROM emulation uses since the specification tells us that no such
         * thing is defined for DVD's. The reason for this is due to the large
         * number of tracks and that would clash with the `magic' values. This
         * suxs.
         *
         * Not all information is present and this presents a problem.
         * Track starts are known for each track but other values are
         * deducted.
         */

        /* get formatted toc to process, first header to check size */
        flags = XS_CTL_DATA_IN | XS_CTL_SILENT;
        memset(&gtoc_cmd, 0, sizeof(gtoc_cmd));
        gtoc_cmd.opcode      = READ_TOC;
        gtoc_cmd.addr_mode   = 0;                /* lba's please     */
        gtoc_cmd.resp_format = CD_TOC_FORM;        /* formatted toc    */
        gtoc_cmd.from_track  = 1;                /* first track      */
        req_size = sizeof(*toc_hdr);
        _lto2b(req_size, gtoc_cmd.data_len);

        error = scsipi_command(periph,
                (void *)&gtoc_cmd, sizeof(gtoc_cmd),
                (void *)buffer,    req_size,
                CDRETRIES, 30000, NULL, flags);
        if (error)
                goto out;
        toc_hdr = (struct scsipi_toc_header *) buffer;
        if (_2btol(toc_hdr->length) > buffer_size - 2) {
#ifdef DIAGNOSTIC
                printf("increase buffersize in mmc_readtrackinfo_dvdrom\n");
#endif
                error = ENOBUFS;
                goto out;
        }

        /* read in complete formatted toc */
        req_size = _2btol(toc_hdr->length);
        _lto2b(req_size, gtoc_cmd.data_len);

        error = scsipi_command(periph,
                (void *)&gtoc_cmd, sizeof(gtoc_cmd),
                (void *)buffer,    req_size,
                CDRETRIES, 30000, NULL, flags);
        if (error)
                goto out;

        toc_hdr = (struct scsipi_toc_header *)     buffer;
        toc     = (struct scsipi_toc_formatted *) (buffer + 4);

        /* as in read disc info, all sessions are converted to tracks      */
        /* track 1..  -> offsets, sizes can be (roughly) estimated (16 ECC) */
        /* last track -> we got the size from the lead-out                 */

        tracknr      = 0;
        last_tracknr = toc_hdr->last;
        track_start  = 0;
        track_size   = 0;
        lead_out     = 0;
        flags        = 0;

        size = req_size - sizeof(struct scsipi_toc_header) + 1;
        while (size > 0) {
                /* remember, DVD-ROM: tracknr == sessionnr */
                lba     = _4btol(toc->msf_lba);
                tracknr = toc->tracknr;
                control = toc->adrcontrol & 0xf;

                if (trackinfo->tracknr == tracknr) {
                        track_start = lba;
                }
                if (trackinfo->tracknr == tracknr+1) {
                        track_size  = lba - track_start;
                        track_size -= 16;        /* link block ? */
                }
                if (tracknr == 0xAA) {
                        lead_out = lba;
                }

                if ((control & (3<<2)) == 4)                /* 01xxb */
                        flags |= MMC_TRACKINFO_DATA;
                if ((control & (1<<2)) == 0) {                /* x0xxb */
                        flags |= MMC_TRACKINFO_AUDIO;
                        if (control & (1<<3))                /* 10xxb */
                                flags |= MMC_TRACKINFO_AUDIO_4CHAN;
                        if (control & 1)                /* xxx1b */
                                flags |= MMC_TRACKINFO_PRE_EMPH;
                }

                toc++;
                size -= sizeof(struct scsipi_toc_formatted);
        }
        if (trackinfo->tracknr == last_tracknr) {
                track_size = lead_out - track_start;
        }

        /* fill in */
        /* trackinfo->tracknr preserved */
        trackinfo->sessionnr  = trackinfo->tracknr;
        trackinfo->track_mode = 0;        /* unknown */
        trackinfo->data_mode  = 8;        /* 2048 bytes mode1   */

        trackinfo->flags         = flags;
        trackinfo->track_start   = track_start;
        trackinfo->next_writable = 0;
        trackinfo->free_blocks   = 0;
        trackinfo->packet_size   = 16;        /* standard length 16 blocks ECC */
        trackinfo->track_size    = track_size;
        trackinfo->last_recorded = 0;

out:
        free(buffer, M_TEMP);
        return error;
}

static int
mmc_gettrackinfo(struct scsipi_periph *periph, 
                 struct mmc_trackinfo *trackinfo)
{
        struct scsipi_read_trackinfo      ti_cmd;
        struct scsipi_read_trackinfo_data ti __aligned(2);
        struct scsipi_get_configuration   gc_cmd;
        struct scsipi_get_conf_data       gc __aligned(2);
        int error, flags;
        int mmc_profile;

        /* set up SCSI call with track number from trackinfo.tracknr */
        flags = XS_CTL_DATA_IN | XS_CTL_SILENT;
        memset(&ti_cmd, 0, sizeof(ti_cmd));
        ti_cmd.opcode    = READ_TRACKINFO;
        ti_cmd.addr_type = READ_TRACKINFO_ADDR_TRACK;
        ti_cmd.data_len[1] = READ_TRACKINFO_RETURNSIZE;

        /* trackinfo.tracknr contains number of tracks to query */
        _lto4b(trackinfo->tracknr, ti_cmd.address);
        error = scsipi_command(periph,
                (void *)&ti_cmd, sizeof(ti_cmd),
                (void *)&ti,     READ_TRACKINFO_RETURNSIZE,
                CDRETRIES, 30000, NULL, flags);

        if (error) {
                /* trackinfo call failed, emulate for cd-rom/dvd-rom */
                /* first determine mmc profile */
                flags = XS_CTL_DATA_IN;
                memset(&gc_cmd, 0, sizeof(gc_cmd));
                gc_cmd.opcode = GET_CONFIGURATION;
                _lto2b(GET_CONF_NO_FEATURES_LEN, gc_cmd.data_len);

                error = scsipi_command(periph,
                        (void *)&gc_cmd, sizeof(gc_cmd),
                        (void *)&gc,     GET_CONF_NO_FEATURES_LEN,
                        CDRETRIES, 30000, NULL, flags);
                if (error)
                        return error;
                mmc_profile = _2btol(gc.mmc_profile);

                /* choose emulation */
                if (mmc_profile == 0x08) /* CD-ROM */
                        return mmc_gettrackinfo_cdrom(periph, trackinfo);
                if (mmc_profile == 0x10) /* DVD-ROM */
                        return mmc_gettrackinfo_dvdrom(periph, trackinfo);
                /* CD/DVD drive is violating specs */
                return EIO;
        }

        /* (re)initialise structure */
        memset(trackinfo, 0, sizeof(struct mmc_trackinfo));

        /* account for short returns screwing up track and session msb */
        if ((ti.data_len[1] | (ti.data_len[0] << 8)) <= 32) {
                ti.track_msb   = 0;
                ti.session_msb = 0;
        }

        trackinfo->tracknr    = ti.track_lsb   | (ti.track_msb   << 8);
        trackinfo->sessionnr  = ti.session_lsb | (ti.session_msb << 8);
        trackinfo->track_mode = ti.track_info_1 & 0xf;
        trackinfo->data_mode  = ti.track_info_2 & 0xf;

        flags = 0;
        if (ti.track_info_1 & 0x10)
                flags |= MMC_TRACKINFO_COPY;
        if (ti.track_info_1 & 0x20)
                flags |= MMC_TRACKINFO_DAMAGED;
        if (ti.track_info_2 & 0x10)
                flags |= MMC_TRACKINFO_FIXED_PACKET;
        if (ti.track_info_2 & 0x20)
                flags |= MMC_TRACKINFO_INCREMENTAL;
        if (ti.track_info_2 & 0x40)
                flags |= MMC_TRACKINFO_BLANK;
        if (ti.track_info_2 & 0x80)
                flags |= MMC_TRACKINFO_RESERVED;
        if (ti.data_valid   & 0x01)
                flags |= MMC_TRACKINFO_NWA_VALID;
        if (ti.data_valid   & 0x02)
                flags |= MMC_TRACKINFO_LRA_VALID;
        if ((trackinfo->track_mode & (3<<2)) == 4)                /* 01xxb */
                flags |= MMC_TRACKINFO_DATA;
        if ((trackinfo->track_mode & (1<<2)) == 0) {                /* x0xxb */
                flags |= MMC_TRACKINFO_AUDIO;
                if (trackinfo->track_mode & (1<<3))                /* 10xxb */
                        flags |= MMC_TRACKINFO_AUDIO_4CHAN;
                if (trackinfo->track_mode & 1)                        /* xxx1b */
                        flags |= MMC_TRACKINFO_PRE_EMPH;
        }

        trackinfo->flags = flags;
        trackinfo->track_start    = _4btol(ti.track_start);
        trackinfo->next_writable  = _4btol(ti.next_writable);
        trackinfo->free_blocks    = _4btol(ti.free_blocks);
        trackinfo->packet_size    = _4btol(ti.packet_size);
        trackinfo->track_size     = _4btol(ti.track_size);
        trackinfo->last_recorded  = _4btol(ti.last_recorded);

        return 0;
}

static int
mmc_doclose(struct scsipi_periph *periph, int param, int func) {
        struct scsipi_close_tracksession close_cmd;
        int error, flags;

        /* set up SCSI call with track number */
        flags = XS_CTL_DATA_OUT;
        memset(&close_cmd, 0, sizeof(close_cmd));
        close_cmd.opcode    = CLOSE_TRACKSESSION;
        close_cmd.function  = func;
        _lto2b(param, close_cmd.tracksessionnr);

        error = scsipi_command(periph,
                (void *) &close_cmd, sizeof(close_cmd),
                NULL, 0,
                CDRETRIES, 120000, NULL, flags);

        return error;
}

static int
mmc_do_closetrack(struct scsipi_periph *periph, struct mmc_op *mmc_op)
{
        int mmc_profile = mmc_op->mmc_profile;

        switch (mmc_profile) {
        case 0x12 : /* DVD-RAM */
        case 0x1a : /* DVD+RW  */
        case 0x2a : /* DVD+RW Dual layer */
        case 0x42 : /* BD-R Random Recording (RRM) */
        case 0x43 : /* BD-RE */
        case 0x52 : /* HD DVD-RW ; DVD-RAM like */
                return EINVAL;
        }

        return mmc_doclose(periph, mmc_op->tracknr, 1);
}

static int
mmc_do_close_or_finalise(struct scsipi_periph *periph, struct mmc_op *mmc_op)
{
        uint8_t blob[MS5LEN], *page5;
        int mmc_profile = mmc_op->mmc_profile;
        int func, close, flags;
        int error;

        close = (mmc_op->operation == MMC_OP_CLOSESESSION);

        switch (mmc_profile) {
        case 0x09 : /* CD-R       */
        case 0x0a : /* CD-RW      */
                /* Special case : need to update MS field in mode page 5 */
                memset(blob, 0, sizeof(blob));
                page5 = blob+8;

                flags = XS_CTL_DATA_IN;
                error = scsipi_mode_sense_big(periph, SMS_PF, 5,
                    (void *)blob, sizeof(blob), flags, CDRETRIES, 20000);
                if (error)
                        return error;

                /* set multi session field when closing a session only */
                page5[3] &= 63;
                if (close)
                        page5[3] |= 3 << 6;

                flags = XS_CTL_DATA_OUT;
                error = scsipi_mode_select_big(periph, SMS_PF,
                    (void *)blob, sizeof(blob), flags, CDRETRIES, 20000);
                if (error)
                        return error;
                /* and use function 2 */
                func = 2;
                break;
        case 0x11 : /* DVD-R (DL) */
        case 0x13 : /* DVD-RW restricted overwrite */
        case 0x14 : /* DVD-RW sequential */
                func = close ? 2 : 3;
                break;
        case 0x1b : /* DVD+R   */
        case 0x2b : /* DVD+R Dual layer */
        case 0x51 : /* HD DVD-R   */
        case 0x41 : /* BD-R Sequential recording (SRM) */
                func = close ? 2 : 6;
                break;
        case 0x12 : /* DVD-RAM */
        case 0x1a : /* DVD+RW  */
        case 0x2a : /* DVD+RW Dual layer */
        case 0x42 : /* BD-R Random Recording (RRM) */
        case 0x43 : /* BD-RE */
        case 0x52 : /* HD DVD-RW; DVD-RAM like */
                return EINVAL;
        default:
                printf("MMC close/finalise passed wrong device type! (%d)\n",
                    mmc_profile);
                return EINVAL;
        }

        return mmc_doclose(periph, mmc_op->sessionnr, func);
}

static int
mmc_do_reserve_track(struct scsipi_periph *periph, struct mmc_op *mmc_op)
{
        struct scsipi_reserve_track reserve_cmd;
        uint32_t extent;
        int error, flags;

        /* TODO make mmc safeguards? */
        extent = mmc_op->extent;
        /* TODO min/max support? */

        /* set up SCSI call with requested space */
        flags = XS_CTL_DATA_OUT;
        memset(&reserve_cmd, 0, sizeof(reserve_cmd));
        reserve_cmd.opcode = RESERVE_TRACK;
        _lto4b(extent, reserve_cmd.reservation_size);

        error = scsipi_command(periph,
                (void *) &reserve_cmd, sizeof(reserve_cmd),
                NULL, 0,
                CDRETRIES, 30000, NULL, flags);

        return error;
}

static int
mmc_do_reserve_track_nwa(struct scsipi_periph *periph, struct mmc_op *mmc_op)
{
        /* XXX assumes that NWA given is valid */
        switch (mmc_op->mmc_profile) {
        case 0x09 : /* CD-R       */
                /* XXX unknown boundary checks XXX */
                if (mmc_op->extent <= 152)
                        return EINVAL;
                /* CD-R takes 152 sectors to close track */
                mmc_op->extent -= 152;
                return mmc_do_reserve_track(periph, mmc_op);
        case 0x11 : /* DVD-R (DL) */
        case 0x1b : /* DVD+R   */
        case 0x2b : /* DVD+R Dual layer */
                if (mmc_op->extent % 16)
                        return EINVAL;
                /* upto one ECC block of 16 sectors lost */
                mmc_op->extent -= 16;
                return mmc_do_reserve_track(periph, mmc_op);
        case 0x41 : /* BD-R Sequential recording (SRM) */
        case 0x51 : /* HD DVD-R   */
                if (mmc_op->extent % 32)
                        return EINVAL;
                /* one ECC block of 32 sectors lost (AFAIK) */
                mmc_op->extent -= 32;
                return mmc_do_reserve_track(periph, mmc_op);
        }

        /* unknown behaviour or invalid disc type */
        return EINVAL;
}

static int
mmc_do_repair_track(struct scsipi_periph *periph, struct mmc_op *mmc_op)
{
        struct scsipi_repair_track repair_cmd;
        int error, flags;

        /* TODO make mmc safeguards? */

        /* set up SCSI call with track number */
        flags = XS_CTL_DATA_OUT;
        memset(&repair_cmd, 0, sizeof(repair_cmd));
        repair_cmd.opcode = REPAIR_TRACK;
        _lto2b(mmc_op->tracknr, repair_cmd.tracknr);

        error = scsipi_command(periph,
                (void *) &repair_cmd, sizeof(repair_cmd),
                NULL, 0,
                CDRETRIES, 30000, NULL, flags);

        return error;
}

static int
mmc_do_op(struct scsipi_periph *periph, struct mmc_op *mmc_op)
{
        /* guard operation value */
        if (mmc_op->operation < 1 || mmc_op->operation > MMC_OP_MAX)
                return EINVAL;

        /* synchronise cache is special since it doesn't rely on mmc_profile */
        if (mmc_op->operation == MMC_OP_SYNCHRONISECACHE)
                return cdcachesync(periph, 0);

        /* zero mmc_profile means unknown disc so operations are not defined */
        if (mmc_op->mmc_profile == 0) {
#ifdef DEBUG
                printf("mmc_do_op called with mmc_profile = 0\n");
#endif
                return EINVAL;
        }

        /* do the operations */
        switch (mmc_op->operation) {
        case MMC_OP_CLOSETRACK   :
                return mmc_do_closetrack(periph, mmc_op);
        case MMC_OP_CLOSESESSION :
        case MMC_OP_FINALISEDISC :
                return mmc_do_close_or_finalise(periph, mmc_op);
        case MMC_OP_RESERVETRACK :
                return mmc_do_reserve_track(periph, mmc_op);
        case MMC_OP_RESERVETRACK_NWA :
                return mmc_do_reserve_track_nwa(periph, mmc_op);
        case MMC_OP_REPAIRTRACK  :
                return mmc_do_repair_track(periph, mmc_op);
        case MMC_OP_UNCLOSELASTSESSION :
                /* TODO unclose last session support */
                return EINVAL;
        default :
                printf("mmc_do_op: unhandled operation %d\n", mmc_op->operation);
        }

        return EINVAL;
}

static int
mmc_setup_writeparams(struct scsipi_periph *periph,
                      struct mmc_writeparams *mmc_writeparams)
{
        struct mmc_trackinfo trackinfo;
        uint8_t blob[MS5LEN];
        uint8_t *page5;
        int flags, error;
        int track_mode, data_mode;

        /* setup mode page 5 for CD only */
        if (mmc_writeparams->mmc_class != MMC_CLASS_CD)
                return 0;

        memset(blob, 0, sizeof(blob));
        page5 = blob+8;

        /* read mode page 5 (with header) */
        flags = XS_CTL_DATA_IN;
        error = scsipi_mode_sense_big(periph, SMS_PF, 5, (void *)blob,
            sizeof(blob), flags, CDRETRIES, 20000);
        if (error)
                return error;

        /* set page length for reassurance */
        page5[1] = P5LEN;        /* page length */

        /* write type packet/incremental */
        page5[2] &= 0xf0;

        /* set specified mode parameters */
        track_mode = mmc_writeparams->track_mode;
        data_mode  = mmc_writeparams->data_mode;
        if (track_mode <= 0 || track_mode > 15)
                return EINVAL;
        if (data_mode < 1 || data_mode > 2)
                return EINVAL;

        /* if a tracknr is passed, setup according to the track */
        if (mmc_writeparams->tracknr > 0) {
                trackinfo.tracknr = mmc_writeparams->tracknr;
                error = mmc_gettrackinfo(periph, &trackinfo);
                if (error)
                        return error;
                if ((trackinfo.flags & MMC_TRACKINFO_BLANK) == 0) {
                        track_mode = trackinfo.track_mode;
                        data_mode  = trackinfo.data_mode;
                }
                mmc_writeparams->blockingnr = trackinfo.packet_size;
        }

        /* copy track mode and data mode from trackinfo */
        page5[3] &= 16;                /* keep only `Copy' bit */
        page5[3] |= (3 << 6) | track_mode;
        page5[4] &= 0xf0;        /* wipe data block type */
        if (data_mode == 1) {
                /* select ISO mode 1 (CD only) */
                page5[4] |= 8;
                /* select session format normal disc (CD only) */
                page5[8] = 0;
        } else {
                /* select ISO mode 2; XA form 1 (CD only) */
                page5[4] |= 10;
                /* select session format CD-ROM XA disc (CD only) */
                page5[8] = 0x20;
        }
        if (mmc_writeparams->mmc_cur & MMC_CAP_SEQUENTIAL) {
                if (mmc_writeparams->mmc_cur & MMC_CAP_ZEROLINKBLK) {
                        /* set BUFE buffer underrun protection */
                        page5[2] |= 1<<6;
                }
                /* allow for multi session */
                page5[3] |= 3 << 6;
        } else {
                /* select fixed packets */
                page5[3] |= 1<<5;
                _lto4b(mmc_writeparams->blockingnr, &(page5[10]));
        }

        /* write out updated mode page 5 (with header) */
        flags = XS_CTL_DATA_OUT;
        error = scsipi_mode_select_big(periph, SMS_PF, (void *)blob,
            sizeof(blob), flags, CDRETRIES, 20000);
        if (error)
                return error;

        return 0;
}

static void
cd_set_geometry(struct cd_softc *cd)
{
        struct dk_softc *dksc = &cd->sc_dksc;
        struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;

        memset(dg, 0, sizeof(*dg));

        dg->dg_secperunit = cd->params.disksize;
        dg->dg_secsize = cd->params.blksize;
        dg->dg_nsectors = 100;
        dg->dg_ntracks = 1;

        disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
}























































































































































































































































  512 
  514 























































































































































































































    3 




    2 





    2 


    4 


    2 



    1 








    1 







































































































































































































































































































































































































































































































































































































































































    2 














    2 


    2 


    2 




    2 











    2 







    2 



    2 






    2 


























































































































































































































































































































































































    2 










    2 


    2 
    2 

    2 










































































































































































































































































   30 











   30 






















    6 

    6 



    6 

















    6 

    6 
    6 
    6 















    6 






    5 






    6 


    6 

    6 













    6 


    6 

    6 





















































  338 





  338 

  338 



  338 









  338 

  338 
































































































  338 







  338 

  338 






  338 



  338 








  338 






  338 












  338 



  338 


  338 
  338 








  338 











  338 
  338 
  338 
  338 
  338 




  116 



  116 
  116 

  338 





  338 
  338 
  338 




  338 
  338 






































    7 


    5 


    7 






    7 



































































































































    2 





    2 





    2 

    2 

    2 



    2 
    2 
































    2 
    2 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
/*        $NetBSD: ffs_vfsops.c,v 1.376 2022/04/16 08:00:55 hannken Exp $        */

/*-
 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Wasabi Systems, Inc, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1989, 1991, 1993, 1994
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ffs_vfsops.c        8.31 (Berkeley) 5/20/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.376 2022/04/16 08:00:55 hannken Exp $");

#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
#include "opt_quota.h"
#include "opt_wapbl.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/fstrans.h>
#include <sys/socket.h>
#include <sys/mount.h>
#include <sys/buf.h>
#include <sys/device.h>
#include <sys/disk.h>
#include <sys/file.h>
#include <sys/disklabel.h>
#include <sys/ioctl.h>
#include <sys/errno.h>
#include <sys/kmem.h>
#include <sys/pool.h>
#include <sys/lock.h>
#include <sys/sysctl.h>
#include <sys/conf.h>
#include <sys/kauth.h>
#include <sys/wapbl.h>
#include <sys/module.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>

#include <ufs/ufs/quota.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/dir.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_wapbl.h>

#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>

#ifdef WAPBL
MODULE(MODULE_CLASS_VFS, ffs, "ufs,wapbl");
#else
MODULE(MODULE_CLASS_VFS, ffs, "ufs");
#endif

static int ffs_vfs_fsync(vnode_t *, int);
static int ffs_superblock_validate(struct fs *);
static int ffs_is_appleufs(struct vnode *, struct fs *);

static int ffs_init_vnode(struct ufsmount *, struct vnode *, ino_t);
static void ffs_deinit_vnode(struct ufsmount *, struct vnode *);

static kauth_listener_t ffs_snapshot_listener;

/* how many times ffs_init() was called */
int ffs_initcount = 0;

#ifdef DEBUG_FFS_MOUNT
#define DPRINTF(_fmt, args...)        printf("%s: " _fmt "\n", __func__, ##args)
#else
#define DPRINTF(_fmt, args...)        do {} while (/*CONSTCOND*/0)
#endif

extern const struct vnodeopv_desc ffs_vnodeop_opv_desc;
extern const struct vnodeopv_desc ffs_specop_opv_desc;
extern const struct vnodeopv_desc ffs_fifoop_opv_desc;

const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = {
        &ffs_vnodeop_opv_desc,
        &ffs_specop_opv_desc,
        &ffs_fifoop_opv_desc,
        NULL,
};

struct vfsops ffs_vfsops = {
        .vfs_name = MOUNT_FFS,
        .vfs_min_mount_data = sizeof (struct ufs_args),
        .vfs_mount = ffs_mount,
        .vfs_start = ufs_start,
        .vfs_unmount = ffs_unmount,
        .vfs_root = ufs_root,
        .vfs_quotactl = ufs_quotactl,
        .vfs_statvfs = ffs_statvfs,
        .vfs_sync = ffs_sync,
        .vfs_vget = ufs_vget,
        .vfs_loadvnode = ffs_loadvnode,
        .vfs_newvnode = ffs_newvnode,
        .vfs_fhtovp = ffs_fhtovp,
        .vfs_vptofh = ffs_vptofh,
        .vfs_init = ffs_init,
        .vfs_reinit = ffs_reinit,
        .vfs_done = ffs_done,
        .vfs_mountroot = ffs_mountroot,
        .vfs_snapshot = ffs_snapshot,
        .vfs_extattrctl = ffs_extattrctl,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_renamelock_enter = genfs_renamelock_enter,
        .vfs_renamelock_exit = genfs_renamelock_exit,
        .vfs_fsync = ffs_vfs_fsync,
        .vfs_opv_descs = ffs_vnodeopv_descs
};

static const struct genfs_ops ffs_genfsops = {
        .gop_size = ffs_gop_size,
        .gop_alloc = ufs_gop_alloc,
        .gop_write = genfs_gop_write,
        .gop_markupdate = ufs_gop_markupdate,
        .gop_putrange = genfs_gop_putrange,
};

static const struct ufs_ops ffs_ufsops = {
        .uo_itimes = ffs_itimes,
        .uo_update = ffs_update,
        .uo_truncate = ffs_truncate,
        .uo_balloc = ffs_balloc,
        .uo_snapgone = ffs_snapgone,
        .uo_bufrd = ffs_bufrd,
        .uo_bufwr = ffs_bufwr,
};

static int
ffs_checkrange(struct mount *mp, ino_t ino)
{
        struct fs *fs = VFSTOUFS(mp)->um_fs;

        if (ino < UFS_ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg) {
                DPRINTF("out of range %u\n", ino);
                return ESTALE;
        }

        /*
         * Need to check if inode is initialized because ffsv2 does 
         * lazy initialization and we can get here from nfs_fhtovp
         */
        if (fs->fs_magic != FS_UFS2_MAGIC)
                return 0;

        struct buf *bp;
        int cg = ino_to_cg(fs, ino);
        struct ufsmount *ump = VFSTOUFS(mp);

        int error = bread(ump->um_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)),
            (int)fs->fs_cgsize, B_MODIFY, &bp);
        if (error) {
                DPRINTF("error %d reading cg %d ino %u\n", error, cg, ino);
                return error;
        }

        const int needswap = UFS_FSNEEDSWAP(fs);

        struct cg *cgp = (struct cg *)bp->b_data;
        if (!cg_chkmagic(cgp, needswap)) {
                brelse(bp, 0);
                DPRINTF("bad cylinder group magic cg %d ino %u\n", cg, ino);
                return ESTALE;
        }

        int32_t initediblk = ufs_rw32(cgp->cg_initediblk, needswap);
        brelse(bp, 0);

        if (cg * fs->fs_ipg + initediblk < ino) {
                DPRINTF("cg=%d fs->fs_ipg=%d initediblk=%d ino=%u\n",
                    cg, fs->fs_ipg, initediblk, ino);
                return ESTALE;
        }
        return 0;
}

static int
ffs_snapshot_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        vnode_t *vp = arg2;
        int result = KAUTH_RESULT_DEFER;

        if (action != KAUTH_SYSTEM_FS_SNAPSHOT)
                return result;

        if (VTOI(vp)->i_uid == kauth_cred_geteuid(cred))
                result = KAUTH_RESULT_ALLOW;

        return result;
}

SYSCTL_SETUP(ffs_sysctl_setup, "ffs sysctls")
{
#ifdef UFS_EXTATTR
        extern int ufs_extattr_autocreate;
#endif
        extern int ffs_log_changeopt;

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "ffs",
                       SYSCTL_DESCR("Berkeley Fast File System"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, 1, CTL_EOL);
        /*
         * @@@ should we even bother with these first three?
         */
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "doclusterread", NULL,
                       sysctl_notavail, 0, NULL, 0,
                       CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "doclusterwrite", NULL,
                       sysctl_notavail, 0, NULL, 0,
                       CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "doreallocblks", NULL,
                       sysctl_notavail, 0, NULL, 0,
                       CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL);
#if 0
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "doasyncfree",
                       SYSCTL_DESCR("Release dirty blocks asynchronously"),
                       NULL, 0, &doasyncfree, 0,
                       CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL);
#endif
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "log_changeopt",
                       SYSCTL_DESCR("Log changes in optimization strategy"),
                       NULL, 0, &ffs_log_changeopt, 0,
                       CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL);
#ifdef UFS_EXTATTR
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "extattr_autocreate",
                       SYSCTL_DESCR("Size of attribute for "
                                    "backing file autocreation"),
                       NULL, 0, &ufs_extattr_autocreate, 0,
                       CTL_VFS, 1, FFS_EXTATTR_AUTOCREATE, CTL_EOL);

#endif /* UFS_EXTATTR */
}

static int
ffs_modcmd(modcmd_t cmd, void *arg)
{
        int error;

#if 0
        extern int doasyncfree;
#endif

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = vfs_attach(&ffs_vfsops);
                if (error != 0)
                        break;

                ffs_snapshot_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
                    ffs_snapshot_cb, NULL);
                if (ffs_snapshot_listener == NULL)
                        printf("ffs_modcmd: can't listen on system scope.\n");

                break;
        case MODULE_CMD_FINI:
                error = vfs_detach(&ffs_vfsops);
                if (error != 0)
                        break;
                if (ffs_snapshot_listener != NULL)
                        kauth_unlisten_scope(ffs_snapshot_listener);
                break;
        default:
                error = ENOTTY;
                break;
        }

        return (error);
}

pool_cache_t ffs_inode_cache;
pool_cache_t ffs_dinode1_cache;
pool_cache_t ffs_dinode2_cache;

static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t);
static void ffs_oldfscompat_write(struct fs *, struct ufsmount *);

/*
 * Called by main() when ffs is going to be mounted as root.
 */

int
ffs_mountroot(void)
{
        struct fs *fs;
        struct mount *mp;
        struct lwp *l = curlwp;                        /* XXX */
        struct ufsmount *ump;
        int error;

        if (device_class(root_device) != DV_DISK)
                return (ENODEV);

        if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) {
                vrele(rootvp);
                return (error);
        }

        /*
         * We always need to be able to mount the root file system.
         */
        mp->mnt_flag |= MNT_FORCE;
        if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
                vfs_unbusy(mp);
                vfs_rele(mp);
                return (error);
        }
        mp->mnt_flag &= ~MNT_FORCE;
        mountlist_append(mp);
        ump = VFSTOUFS(mp);
        fs = ump->um_fs;
        memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt));
        (void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
        (void)ffs_statvfs(mp, &mp->mnt_stat);
        vfs_unbusy(mp);
        setrootfstime((time_t)fs->fs_time);
        return (0);
}

static void
ffs_acls(struct mount *mp, int fs_flags)
{
        if ((fs_flags & FS_NFS4ACLS) != 0) {
#ifdef UFS_ACL
                if (mp->mnt_flag & MNT_POSIX1EACLS)
                        printf("WARNING: %s: ACLs flag on fs conflicts with "
                            "\"posix1eacls\" mount option; option ignored\n",
                            mp->mnt_stat.f_mntonname);
                mp->mnt_flag &= ~MNT_POSIX1EACLS;
                mp->mnt_flag |= MNT_NFS4ACLS;

#else
                printf("WARNING: %s: ACLs flag on fs but no ACLs support\n",
                    mp->mnt_stat.f_mntonname);
#endif
        }
        if ((fs_flags & FS_POSIX1EACLS) != 0) {
#ifdef UFS_ACL
                if (mp->mnt_flag & MNT_NFS4ACLS)
                        printf("WARNING: %s: NFSv4 ACLs flag on fs conflicts "
                            "with \"acls\" mount option; option ignored\n",
                            mp->mnt_stat.f_mntonname);
                mp->mnt_flag &= ~MNT_NFS4ACLS;
                mp->mnt_flag |= MNT_POSIX1EACLS;
#else
                printf("WARNING: %s: POSIX.1e ACLs flag on fs but no "
                    "ACLs support\n", mp->mnt_stat.f_mntonname);
#endif
        }

        if ((mp->mnt_flag & (MNT_NFS4ACLS | MNT_POSIX1EACLS))
            == (MNT_NFS4ACLS | MNT_POSIX1EACLS))
        {
                printf("WARNING: %s: posix1eacl conflicts "
                    "with \"acls\" mount option; option ignored\n",
                    mp->mnt_stat.f_mntonname);
                mp->mnt_flag &= ~MNT_POSIX1EACLS;
        }

        if (mp->mnt_flag & (MNT_NFS4ACLS | MNT_POSIX1EACLS))
                mp->mnt_iflag &= ~(IMNT_SHRLOOKUP|IMNT_NCLOOKUP);
        else
                mp->mnt_iflag |= IMNT_SHRLOOKUP|IMNT_NCLOOKUP;
}

/*
 * VFS Operations.
 *
 * mount system call
 */
int
ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct lwp *l = curlwp;
        struct vnode *devvp = NULL;
        struct ufs_args *args = data;
        struct ufsmount *ump = NULL;
        struct fs *fs;
        int error = 0, flags, update;
        mode_t accessmode;

        if (args == NULL) {
                DPRINTF("NULL args");
                return EINVAL;
        }
        if (*data_len < sizeof(*args)) {
                DPRINTF("bad size args %zu != %zu", *data_len, sizeof(*args));
                return EINVAL;
        }

        ump = VFSTOUFS(mp);
        if ((mp->mnt_flag & (MNT_GETARGS|MNT_UPDATE)) && ump == NULL) {
                DPRINTF("no ump");
                return EIO;
        }

        if (mp->mnt_flag & MNT_GETARGS) {
                args->fspec = NULL;
                *data_len = sizeof *args;
                return 0;
        }

        update = mp->mnt_flag & MNT_UPDATE;

        /* Check arguments */
        if (args->fspec == NULL) {
                if (!update) {
                        /* New mounts must have a filename for the device */
                        DPRINTF("no filename for mount");
                        return EINVAL;
                }
        } else {
                /*
                 * Look up the name and verify that it's sane.
                 */
                error = namei_simple_user(args->fspec,
                    NSM_FOLLOW_NOEMULROOT, &devvp);
                if (error != 0) {
                        DPRINTF("namei_simple_user returned %d", error);
                        return error;
                }

                /*
                 * Be sure this is a valid block device
                 */
                if (devvp->v_type != VBLK) {
                        DPRINTF("non block device %d", devvp->v_type);
                        error = ENOTBLK;
                        goto fail;
                }

                if (bdevsw_lookup(devvp->v_rdev) == NULL) {
                        DPRINTF("can't find block device 0x%jx",
                            devvp->v_rdev);
                        error = ENXIO;
                        goto fail;
                }

                if (update) {
                        /*
                         * Be sure we're still naming the same device
                         * used for our initial mount
                         */
                        if (devvp != ump->um_devvp &&
                            devvp->v_rdev != ump->um_devvp->v_rdev) {
                                DPRINTF("wrong device 0x%jx != 0x%jx",
                                    (uintmax_t)devvp->v_rdev,
                                    (uintmax_t)ump->um_devvp->v_rdev);
                                error = EINVAL;
                                goto fail;
                        }
                        vrele(devvp);
                        devvp = NULL;
                }
        }

        if (devvp == NULL) {
                devvp = ump->um_devvp;
                vref(devvp);
        }

        /*
         * If mount by non-root, then verify that user has necessary
         * permissions on the device.
         *
         * Permission to update a mount is checked higher, so here we presume
         * updating the mount is okay (for example, as far as securelevel goes)
         * which leaves us with the normal check.
         */
        accessmode = VREAD;
        if (update ? (mp->mnt_iflag & IMNT_WANTRDWR) != 0 :
            (mp->mnt_flag & MNT_RDONLY) == 0)
                accessmode |= VWRITE;
        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
            KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(accessmode));
        VOP_UNLOCK(devvp);
        if (error) {
                DPRINTF("kauth returned %d", error);
                goto fail;
        }

#ifdef WAPBL
        /* WAPBL can only be enabled on a r/w mount. */
        if (((mp->mnt_flag & MNT_RDONLY) && !(mp->mnt_iflag & IMNT_WANTRDWR)) ||
            (mp->mnt_iflag & IMNT_WANTRDONLY)) {
                mp->mnt_flag &= ~MNT_LOG;
        }
#else /* !WAPBL */
        mp->mnt_flag &= ~MNT_LOG;
#endif /* !WAPBL */

        if (!update) {
                int xflags;

                if (mp->mnt_flag & MNT_RDONLY)
                        xflags = FREAD;
                else
                        xflags = FREAD | FWRITE;
                vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
                error = VOP_OPEN(devvp, xflags, FSCRED);
                VOP_UNLOCK(devvp);
                if (error) {        
                        DPRINTF("VOP_OPEN returned %d", error);
                        goto fail;
                }
                /* Need fstrans_start() for assertion in ufs_strategy(). */
                if ((mp->mnt_flag & MNT_RDONLY) == 0)
                        fstrans_start(mp);
                error = ffs_mountfs(devvp, mp, l);
                if ((mp->mnt_flag & MNT_RDONLY) == 0)
                        fstrans_done(mp);
                if (error) {
                        DPRINTF("ffs_mountfs returned %d", error);
                        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
                        (void)VOP_CLOSE(devvp, xflags, NOCRED);
                        VOP_UNLOCK(devvp);
                        goto fail;
                }

                ump = VFSTOUFS(mp);
                fs = ump->um_fs;
        } else {
                /*
                 * Update the mount.
                 */

                /*
                 * The initial mount got a reference on this
                 * device, so drop the one obtained via
                 * namei(), above.
                 */
                vrele(devvp);

                ump = VFSTOUFS(mp);
                fs = ump->um_fs;
                if (fs->fs_ronly == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) {
                        /*
                         * Changing from r/w to r/o
                         */
                        flags = WRITECLOSE;
                        if (mp->mnt_flag & MNT_FORCE)
                                flags |= FORCECLOSE;
                        error = ffs_flushfiles(mp, flags, l);
                        if (error)
                                return error;

                        error = UFS_WAPBL_BEGIN(mp);
                        if (error) {
                                DPRINTF("wapbl %d", error);
                                return error;
                        }

                        if (ffs_cgupdate(ump, MNT_WAIT) == 0 &&
                            fs->fs_clean & FS_WASCLEAN) {
                                if (mp->mnt_flag & MNT_SOFTDEP)
                                        fs->fs_flags &= ~FS_DOSOFTDEP;
                                fs->fs_clean = FS_ISCLEAN;
                                (void) ffs_sbupdate(ump, MNT_WAIT);
                        }

                        UFS_WAPBL_END(mp);
                }

#ifdef WAPBL
                if ((mp->mnt_flag & MNT_LOG) == 0) {
                        error = ffs_wapbl_stop(mp, mp->mnt_flag & MNT_FORCE);
                        if (error) {
                                DPRINTF("ffs_wapbl_stop returned %d", error);
                                return error;
                        }
                }
#endif /* WAPBL */

                if (fs->fs_ronly == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) {
                        /*
                         * Finish change from r/w to r/o
                         */
                        fs->fs_ronly = 1;
                        fs->fs_fmod = 0;
                }

                ffs_acls(mp, fs->fs_flags);
                if (mp->mnt_flag & MNT_RELOAD) {
                        error = ffs_reload(mp, l->l_cred, l);
                        if (error) {
                                DPRINTF("ffs_reload returned %d", error);
                                return error;
                        }
                }

                if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) {
                        /*
                         * Changing from read-only to read/write
                         */
#ifndef QUOTA2
                        if (fs->fs_flags & FS_DOQUOTA2) {
                                ump->um_flags |= UFS_QUOTA2;
                                uprintf("%s: options QUOTA2 not enabled%s\n",
                                    mp->mnt_stat.f_mntonname,
                                    (mp->mnt_flag & MNT_FORCE) ? "" :
                                    ", not mounting");
                                DPRINTF("ffs_quota2 %d", EINVAL);
                                return EINVAL;
                        }
#endif
                        fs->fs_ronly = 0;
                        fs->fs_clean <<= 1;
                        fs->fs_fmod = 1;
#ifdef WAPBL
                        if (fs->fs_flags & FS_DOWAPBL) {
                                const char *nm = mp->mnt_stat.f_mntonname;
                                if (!mp->mnt_wapbl_replay) {
                                        printf("%s: log corrupted;"
                                            " replay cancelled\n", nm);
                                        return EFTYPE;
                                }
                                printf("%s: replaying log to disk\n", nm);
                                error = wapbl_replay_write(mp->mnt_wapbl_replay,
                                    devvp);
                                if (error) {
                                        DPRINTF("%s: wapbl_replay_write %d",
                                            nm, error);
                                        return error;
                                }
                                wapbl_replay_stop(mp->mnt_wapbl_replay);
                                fs->fs_clean = FS_WASCLEAN;
                        }
#endif /* WAPBL */
                        if (fs->fs_snapinum[0] != 0)
                                ffs_snapshot_mount(mp);
                }

#ifdef WAPBL
                error = ffs_wapbl_start(mp);
                if (error) {
                        DPRINTF("ffs_wapbl_start returned %d", error);
                        return error;
                }
#endif /* WAPBL */

#ifdef QUOTA2
                if (!fs->fs_ronly) {
                        error = ffs_quota2_mount(mp);
                        if (error) {
                                DPRINTF("ffs_quota2_mount returned %d", error);
                                return error;
                        }
                }
#endif

                if ((mp->mnt_flag & MNT_DISCARD) && !(ump->um_discarddata))
                        ump->um_discarddata = ffs_discard_init(devvp, fs);

                if (args->fspec == NULL)
                        return 0;
        }

        error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
            UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
        if (error == 0)
                (void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
                    sizeof(fs->fs_fsmnt));
        else {
            DPRINTF("set_statvfs_info returned %d", error);
        }
        fs->fs_flags &= ~FS_DOSOFTDEP;
        if (fs->fs_fmod != 0) {        /* XXX */
                int err;

                fs->fs_fmod = 0;
                if (fs->fs_clean & FS_WASCLEAN)
                        fs->fs_time = time_second;
                else {
                        printf("%s: file system not clean (fs_clean=%#x); "
                            "please fsck(8)\n", mp->mnt_stat.f_mntfromname,
                            fs->fs_clean);
                        printf("%s: lost blocks %" PRId64 " files %d\n",
                            mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks,
                            fs->fs_pendinginodes);
                }
                err = UFS_WAPBL_BEGIN(mp);
                if (err == 0) {
                        (void) ffs_cgupdate(ump, MNT_WAIT);
                        UFS_WAPBL_END(mp);
                }
        }
        if ((mp->mnt_flag & MNT_SOFTDEP) != 0) {
                printf("%s: `-o softdep' is no longer supported, "
                    "consider `-o log'\n", mp->mnt_stat.f_mntfromname);
                mp->mnt_flag &= ~MNT_SOFTDEP;
        }

        return (error);

fail:
        vrele(devvp);
        return (error);
}

/*
 * Reload all incore data for a filesystem (used after running fsck on
 * the root filesystem and finding things to fix). The filesystem must
 * be mounted read-only.
 *
 * Things to do to update the mount:
 *        1) invalidate all cached meta-data.
 *        2) re-read superblock from disk.
 *        3) re-read summary information from disk.
 *        4) invalidate all inactive vnodes.
 *        5) invalidate all cached file data.
 *        6) re-read inode data for all active vnodes.
 */
int
ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
{
        struct vnode *vp, *devvp;
        struct inode *ip;
        void *space;
        struct buf *bp;
        struct fs *fs, *newfs;
        int i, bsize, blks, error;
        int32_t *lp, fs_sbsize;
        struct ufsmount *ump;
        daddr_t sblockloc;
        struct vnode_iterator *marker;

        if ((mp->mnt_flag & MNT_RDONLY) == 0)
                return (EINVAL);

        ump = VFSTOUFS(mp);

        /*
         * Step 1: invalidate all cached meta-data.
         */
        devvp = ump->um_devvp;
        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
        error = vinvalbuf(devvp, 0, cred, l, 0, 0);
        VOP_UNLOCK(devvp);
        if (error)
                panic("%s: dirty1", __func__);

        /*
         * Step 2: re-read superblock from disk. XXX: We don't handle
         * possibility that superblock moved. Which implies that we don't
         * want its size to change either.
         */
        fs = ump->um_fs;
        fs_sbsize = fs->fs_sbsize;
        error = bread(devvp, fs->fs_sblockloc / DEV_BSIZE, fs_sbsize,
                      0, &bp);
        if (error)
                return (error);
        newfs = kmem_alloc(fs_sbsize, KM_SLEEP);
        memcpy(newfs, bp->b_data, fs_sbsize);

#ifdef FFS_EI
        if (ump->um_flags & UFS_NEEDSWAP) {
                ffs_sb_swap((struct fs *)bp->b_data, newfs);
                newfs->fs_flags |= FS_SWAPPED;
        } else
#endif
                newfs->fs_flags &= ~FS_SWAPPED;

        brelse(bp, 0);

        if ((newfs->fs_magic != FS_UFS1_MAGIC) &&
            (newfs->fs_magic != FS_UFS2_MAGIC)) {
                kmem_free(newfs, fs_sbsize);
                return (EIO);                /* XXX needs translation */
        }
        if (!ffs_superblock_validate(newfs)) {
                kmem_free(newfs, fs_sbsize);
                return (EINVAL);
        }

        /*
         * The current implementation doesn't handle the possibility that
         * these values may have changed.
         */
        if ((newfs->fs_sbsize != fs_sbsize) ||
            (newfs->fs_cssize != fs->fs_cssize) ||
            (newfs->fs_contigsumsize != fs->fs_contigsumsize) ||
            (newfs->fs_ncg != fs->fs_ncg)) {
                kmem_free(newfs, fs_sbsize);
                return (EINVAL);
        }

        /* Store off old fs_sblockloc for fs_oldfscompat_read. */
        sblockloc = fs->fs_sblockloc;
        /*
         * Copy pointer fields back into superblock before copying in        XXX
         * new superblock. These should really be in the ufsmount.        XXX
         * Note that important parameters (eg fs_ncg) are unchanged.
         */
        newfs->fs_csp = fs->fs_csp;
        newfs->fs_maxcluster = fs->fs_maxcluster;
        newfs->fs_contigdirs = fs->fs_contigdirs;
        newfs->fs_ronly = fs->fs_ronly;
        newfs->fs_active = fs->fs_active;
        memcpy(fs, newfs, (u_int)fs_sbsize);
        kmem_free(newfs, fs_sbsize);

        /*
         * Recheck for Apple UFS filesystem.
         */
        ump->um_flags &= ~UFS_ISAPPLEUFS;
        if (ffs_is_appleufs(devvp, fs)) {
#ifdef APPLE_UFS
                ump->um_flags |= UFS_ISAPPLEUFS;
#else
                DPRINTF("AppleUFS not supported");
                return (EIO); /* XXX: really? */
#endif
        }

        if (UFS_MPISAPPLEUFS(ump)) {
                /* see comment about NeXT below */
                ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
                ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
                mp->mnt_iflag |= IMNT_DTYPE;
        } else {
                ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
                ump->um_dirblksiz = UFS_DIRBLKSIZ;
                if (ump->um_maxsymlinklen > 0)
                        mp->mnt_iflag |= IMNT_DTYPE;
                else
                        mp->mnt_iflag &= ~IMNT_DTYPE;
        }
        ffs_oldfscompat_read(fs, ump, sblockloc);

        mutex_enter(&ump->um_lock);
        ump->um_maxfilesize = fs->fs_maxfilesize;
        if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
                uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
                    mp->mnt_stat.f_mntonname, fs->fs_flags,
                    (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
                if ((mp->mnt_flag & MNT_FORCE) == 0) {
                        mutex_exit(&ump->um_lock);
                        return (EINVAL);
                }
        }

        if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
                fs->fs_pendingblocks = 0;
                fs->fs_pendinginodes = 0;
        }
        mutex_exit(&ump->um_lock);

        ffs_statvfs(mp, &mp->mnt_stat);
        /*
         * Step 3: re-read summary information from disk.
         */
        blks = howmany(fs->fs_cssize, fs->fs_fsize);
        space = fs->fs_csp;
        for (i = 0; i < blks; i += fs->fs_frag) {
                bsize = fs->fs_bsize;
                if (i + fs->fs_frag > blks)
                        bsize = (blks - i) * fs->fs_fsize;
                error = bread(devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), bsize,
                              0, &bp);
                if (error) {
                        return (error);
                }
#ifdef FFS_EI
                if (UFS_FSNEEDSWAP(fs))
                        ffs_csum_swap((struct csum *)bp->b_data,
                            (struct csum *)space, bsize);
                else
#endif
                        memcpy(space, bp->b_data, (size_t)bsize);
                space = (char *)space + bsize;
                brelse(bp, 0);
        }
        /*
         * We no longer know anything about clusters per cylinder group.
         */
        if (fs->fs_contigsumsize > 0) {
                lp = fs->fs_maxcluster;
                for (i = 0; i < fs->fs_ncg; i++)
                        *lp++ = fs->fs_contigsumsize;
        }

        vfs_vnode_iterator_init(mp, &marker);
        while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) {
                /*
                 * Step 4: invalidate all inactive vnodes.
                 */
                if (vrecycle(vp))
                        continue;
                /*
                 * Step 5: invalidate all cached file data.
                 */
                if (vn_lock(vp, LK_EXCLUSIVE)) {
                        vrele(vp);
                        continue;
                }
                if (vinvalbuf(vp, 0, cred, l, 0, 0))
                        panic("%s: dirty2", __func__);
                /*
                 * Step 6: re-read inode data for all active vnodes.
                 */
                ip = VTOI(vp);
                error = bread(devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ip->i_number)),
                              (int)fs->fs_bsize, 0, &bp);
                if (error) {
                        vput(vp);
                        break;
                }
                ffs_load_inode(bp, ip, fs, ip->i_number);
                brelse(bp, 0);
                vput(vp);
        }
        vfs_vnode_iterator_destroy(marker);
        return (error);
}

/*
 * Possible superblock locations ordered from most to least likely.
 */
static const int sblock_try[] = SBLOCKSEARCH;


static int
ffs_superblock_validate(struct fs *fs)
{
        int32_t i, fs_bshift = 0, fs_fshift = 0, fs_fragshift = 0, fs_frag;
        int32_t fs_inopb;

        /* Check the superblock size */
        if (fs->fs_sbsize > SBLOCKSIZE || fs->fs_sbsize < sizeof(struct fs))
                return 0;

        /* Check the file system blocksize */
        if (fs->fs_bsize > MAXBSIZE || fs->fs_bsize < MINBSIZE)
                return 0;
        if (!powerof2(fs->fs_bsize))
                return 0;

        /* Check the size of frag blocks */
        if (!powerof2(fs->fs_fsize))
                return 0;
        if (fs->fs_fsize == 0)
                return 0;

        /*
         * XXX: these values are just zero-checked to prevent obvious
         * bugs. We need more strict checks.
         */
        if (fs->fs_size == 0 && fs->fs_old_size == 0)
                return 0;
        if (fs->fs_cssize == 0)
                return 0;
        if (fs->fs_ipg == 0)
                return 0;
        if (fs->fs_fpg == 0)
                return 0;
        if (fs->fs_ncg == 0)
                return 0;
        if (fs->fs_maxbpg == 0)
                return 0;

        /* Check the number of inodes per block */
        if (fs->fs_magic == FS_UFS1_MAGIC)
                fs_inopb = fs->fs_bsize / sizeof(struct ufs1_dinode);
        else /* fs->fs_magic == FS_UFS2_MAGIC */
                fs_inopb = fs->fs_bsize / sizeof(struct ufs2_dinode);
        if (fs->fs_inopb != fs_inopb)
                return 0;

        /* Block size cannot be smaller than fragment size */
        if (fs->fs_bsize < fs->fs_fsize)
                return 0;

        /* Compute fs_bshift and ensure it is consistent */
        for (i = fs->fs_bsize; i > 1; i >>= 1)
                fs_bshift++;
        if (fs->fs_bshift != fs_bshift)
                return 0;

        /* Compute fs_fshift and ensure it is consistent */
        for (i = fs->fs_fsize; i > 1; i >>= 1)
                fs_fshift++;
        if (fs->fs_fshift != fs_fshift)
                return 0;

        /* Compute fs_fragshift and ensure it is consistent */
        for (i = fs->fs_frag; i > 1; i >>= 1)
                fs_fragshift++;
        if (fs->fs_fragshift != fs_fragshift)
                return 0;

        /* Check the masks */
        if (fs->fs_bmask != ~(fs->fs_bsize - 1))
                return 0;
        if (fs->fs_fmask != ~(fs->fs_fsize - 1))
                return 0;

        /*
         * Now that the shifts and masks are sanitized, we can use the ffs_ API.
         */

        /* Check the number of frag blocks */
        if ((fs_frag = ffs_numfrags(fs, fs->fs_bsize)) > MAXFRAG)
                return 0;
        if (fs->fs_frag != fs_frag)
                return 0;

        /* Check the size of cylinder groups */
        if ((fs->fs_cgsize < sizeof(struct cg)) ||
            (fs->fs_cgsize > fs->fs_bsize))
                return 0;

        return 1;
}

static int
ffs_is_appleufs(struct vnode *devvp, struct fs *fs)
{
        struct dkwedge_info dkw;
        int ret = 0;

        /*
         * First check to see if this is tagged as an Apple UFS filesystem
         * in the disklabel.
         */
        if (getdiskinfo(devvp, &dkw) == 0 &&
            strcmp(dkw.dkw_ptype, DKW_PTYPE_APPLEUFS) == 0)
                ret = 1;
#ifdef APPLE_UFS
        else {
                struct appleufslabel *applefs;
                struct buf *bp;
                daddr_t blkno = APPLEUFS_LABEL_OFFSET / DEV_BSIZE;
                int error;

                /*
                 * Manually look for an Apple UFS label, and if a valid one
                 * is found, then treat it like an Apple UFS filesystem anyway.
                 */
                error = bread(devvp, blkno, APPLEUFS_LABEL_SIZE, 0, &bp);
                if (error) {
                        DPRINTF("bread@0x%jx returned %d", (intmax_t)blkno, error);
                        return 0;
                }
                applefs = (struct appleufslabel *)bp->b_data;
                error = ffs_appleufs_validate(fs->fs_fsmnt, applefs, NULL);
                if (error == 0)
                        ret = 1;
                brelse(bp, 0);
        }
#endif

        return ret;
}

/*
 * Common code for mount and mountroot
 */
int
ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
{
        struct ufsmount *ump = NULL;
        struct buf *bp = NULL;
        struct fs *fs = NULL;
        dev_t dev;
        void *space;
        daddr_t sblockloc = 0;
        int blks, fstype = 0;
        int error, i, bsize, ronly, bset = 0;
#ifdef FFS_EI
        int needswap = 0;                /* keep gcc happy */
#endif
        int32_t *lp;
        kauth_cred_t cred;
        u_int32_t allocsbsize, fs_sbsize = 0;

        dev = devvp->v_rdev;
        cred = l ? l->l_cred : NOCRED;

        /* Flush out any old buffers remaining from a previous use. */
        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
        error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0);
        VOP_UNLOCK(devvp);
        if (error) {
                DPRINTF("vinvalbuf returned %d", error);
                return error;
        }

        ronly = (mp->mnt_flag & MNT_RDONLY) != 0;

        ump = kmem_zalloc(sizeof(*ump), KM_SLEEP);
        mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
        error = ffs_snapshot_init(ump);
        if (error) {
                DPRINTF("ffs_snapshot_init returned %d", error);
                goto out;
        }
        ump->um_ops = &ffs_ufsops;

#ifdef WAPBL
 sbagain:
#endif
        /*
         * Try reading the superblock in each of its possible locations.
         */
        for (i = 0; ; i++) {
                daddr_t fs_sblockloc;

                if (bp != NULL) {
                        brelse(bp, BC_NOCACHE);
                        bp = NULL;
                }
                if (sblock_try[i] == -1) {
                        DPRINTF("no superblock found");
                        error = EINVAL;
                        fs = NULL;
                        goto out;
                }

                error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
                    0, &bp);
                if (error) {
                        DPRINTF("bread@0x%x returned %d",
                            sblock_try[i] / DEV_BSIZE, error);
                        fs = NULL;
                        goto out;
                }
                fs = (struct fs *)bp->b_data;

                sblockloc = sblock_try[i];
                DPRINTF("fs_magic 0x%x", fs->fs_magic);

                /*
                 * Swap: here, we swap fs->fs_sbsize in order to get the correct
                 * size to read the superblock. Once read, we swap the whole
                 * superblock structure.
                 */
                if (fs->fs_magic == FS_UFS1_MAGIC) {
                        fs_sbsize = fs->fs_sbsize;
                        fstype = UFS1;
#ifdef FFS_EI
                        needswap = 0;
                } else if (fs->fs_magic == FS_UFS1_MAGIC_SWAPPED) {
                        fs_sbsize = bswap32(fs->fs_sbsize);
                        fstype = UFS1;
                        needswap = 1;
#endif
                } else if (fs->fs_magic == FS_UFS2_MAGIC) {
                        fs_sbsize = fs->fs_sbsize;
                        fstype = UFS2;
#ifdef FFS_EI
                        needswap = 0;
                } else if (fs->fs_magic == FS_UFS2_MAGIC_SWAPPED) {
                        fs_sbsize = bswap32(fs->fs_sbsize);
                        fstype = UFS2;
                        needswap = 1;
#endif
                } else
                        continue;

                /* fs->fs_sblockloc isn't defined for old filesystems */
                if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) {
                        if (sblockloc == SBLOCK_UFS2)
                                /*
                                 * This is likely to be the first alternate
                                 * in a filesystem with 64k blocks.
                                 * Don't use it.
                                 */
                                continue;
                        fs_sblockloc = sblockloc;
                } else {
                        fs_sblockloc = fs->fs_sblockloc;
#ifdef FFS_EI
                        if (needswap)
                                fs_sblockloc = bswap64(fs_sblockloc);
#endif
                }

                /* Check we haven't found an alternate superblock */
                if (fs_sblockloc != sblockloc)
                        continue;

                /* Check the superblock size */
                if (fs_sbsize > SBLOCKSIZE || fs_sbsize < sizeof(struct fs))
                        continue;
                fs = kmem_alloc((u_long)fs_sbsize, KM_SLEEP);
                memcpy(fs, bp->b_data, fs_sbsize);

                /* Swap the whole superblock structure, if necessary. */
#ifdef FFS_EI
                if (needswap) {
                        ffs_sb_swap((struct fs*)bp->b_data, fs);
                        fs->fs_flags |= FS_SWAPPED;
                } else
#endif
                        fs->fs_flags &= ~FS_SWAPPED;

                /*
                 * Now that everything is swapped, the superblock is ready to
                 * be sanitized.
                 */
                if (!ffs_superblock_validate(fs)) {
                        kmem_free(fs, fs_sbsize);
                        continue;
                }

                /* Ok seems to be a good superblock */
                break;
        }

        ump->um_fs = fs;

#ifdef WAPBL
        if ((mp->mnt_wapbl_replay == 0) && (fs->fs_flags & FS_DOWAPBL)) {
                error = ffs_wapbl_replay_start(mp, fs, devvp);
                if (error && (mp->mnt_flag & MNT_FORCE) == 0) {
                        DPRINTF("ffs_wapbl_replay_start returned %d", error);
                        goto out;
                }
                if (!error) {
                        if (!ronly) {
                                /* XXX fsmnt may be stale. */
                                printf("%s: replaying log to disk\n",
                                    fs->fs_fsmnt);
                                error = wapbl_replay_write(mp->mnt_wapbl_replay,
                                    devvp);
                                if (error) {
                                        DPRINTF("wapbl_replay_write returned %d",
                                            error);
                                        goto out;
                                }
                                wapbl_replay_stop(mp->mnt_wapbl_replay);
                                fs->fs_clean = FS_WASCLEAN;
                        } else {
                                /* XXX fsmnt may be stale */
                                printf("%s: replaying log to memory\n",
                                    fs->fs_fsmnt);
                        }

                        /* Force a re-read of the superblock */
                        brelse(bp, BC_INVAL);
                        bp = NULL;
                        kmem_free(fs, fs_sbsize);
                        fs = NULL;
                        goto sbagain;
                }
        }
#else /* !WAPBL */
        if ((fs->fs_flags & FS_DOWAPBL) && (mp->mnt_flag & MNT_FORCE) == 0) {
                error = EPERM;
                DPRINTF("no force %d", error);
                goto out;
        }
#endif /* !WAPBL */

        ffs_oldfscompat_read(fs, ump, sblockloc);
        ump->um_maxfilesize = fs->fs_maxfilesize;

        if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
                uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
                    mp->mnt_stat.f_mntonname, fs->fs_flags,
                    (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
                if ((mp->mnt_flag & MNT_FORCE) == 0) {
                        error = EINVAL;
                        DPRINTF("no force %d", error);
                        goto out;
                }
        }

        if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
                fs->fs_pendingblocks = 0;
                fs->fs_pendinginodes = 0;
        }

        ump->um_fstype = fstype;
        if (fs->fs_sbsize < SBLOCKSIZE)
                brelse(bp, BC_INVAL);
        else
                brelse(bp, 0);
        bp = NULL;

        if (ffs_is_appleufs(devvp, fs)) {
#ifdef APPLE_UFS
                ump->um_flags |= UFS_ISAPPLEUFS;
#else
                DPRINTF("AppleUFS not supported");
                error = EINVAL;
                goto out;
#endif
        }

#if 0
/*
 * XXX This code changes the behaviour of mounting dirty filesystems, to
 * XXX require "mount -f ..." to mount them.  This doesn't match what
 * XXX mount(8) describes and is disabled for now.
 */
        /*
         * If the file system is not clean, don't allow it to be mounted
         * unless MNT_FORCE is specified.  (Note: MNT_FORCE is always set
         * for the root file system.)
         */
        if (fs->fs_flags & FS_DOWAPBL) {
                /*
                 * wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL
                 * bit is set, although there's a window in unmount where it
                 * could be FS_ISCLEAN
                 */
                if ((mp->mnt_flag & MNT_FORCE) == 0 &&
                    (fs->fs_clean & (FS_WASCLEAN | FS_ISCLEAN)) == 0) {
                        error = EPERM;
                        goto out;
                }
        } else
                if ((fs->fs_clean & FS_ISCLEAN) == 0 &&
                    (mp->mnt_flag & MNT_FORCE) == 0) {
                        error = EPERM;
                        goto out;
                }
#endif

        /*
         * Verify that we can access the last block in the fs
         * if we're mounting read/write.
         */
        if (!ronly) {
                error = bread(devvp, FFS_FSBTODB(fs, fs->fs_size - 1),
                    fs->fs_fsize, 0, &bp);
                if (error) {
                        DPRINTF("bread@0x%jx returned %d",
                            (intmax_t)FFS_FSBTODB(fs, fs->fs_size - 1),
                            error);
                        bset = BC_INVAL;
                        goto out;
                }
                if (bp->b_bcount != fs->fs_fsize) {
                        DPRINTF("bcount %x != fsize %x", bp->b_bcount,
                            fs->fs_fsize);
                        error = EINVAL;
                        bset = BC_INVAL;
                        goto out;
                }
                brelse(bp, BC_INVAL);
                bp = NULL;
        }

        fs->fs_ronly = ronly;
        /* Don't bump fs_clean if we're replaying journal */
        if (!((fs->fs_flags & FS_DOWAPBL) && (fs->fs_clean & FS_WASCLEAN))) {
                if (ronly == 0) {
                        fs->fs_clean <<= 1;
                        fs->fs_fmod = 1;
                }
        }

        bsize = fs->fs_cssize;
        blks = howmany(bsize, fs->fs_fsize);
        if (fs->fs_contigsumsize > 0)
                bsize += fs->fs_ncg * sizeof(int32_t);
        bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
        allocsbsize = bsize;
        space = kmem_alloc((u_long)allocsbsize, KM_SLEEP);
        fs->fs_csp = space;

        for (i = 0; i < blks; i += fs->fs_frag) {
                bsize = fs->fs_bsize;
                if (i + fs->fs_frag > blks)
                        bsize = (blks - i) * fs->fs_fsize;
                error = bread(devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), bsize,
                              0, &bp);
                if (error) {
                        DPRINTF("bread@0x%jx %d",
                            (intmax_t)FFS_FSBTODB(fs, fs->fs_csaddr + i),
                            error);
                        goto out1;
                }
#ifdef FFS_EI
                if (needswap)
                        ffs_csum_swap((struct csum *)bp->b_data,
                                (struct csum *)space, bsize);
                else
#endif
                        memcpy(space, bp->b_data, (u_int)bsize);

                space = (char *)space + bsize;
                brelse(bp, 0);
                bp = NULL;
        }
        if (fs->fs_contigsumsize > 0) {
                fs->fs_maxcluster = lp = space;
                for (i = 0; i < fs->fs_ncg; i++)
                        *lp++ = fs->fs_contigsumsize;
                space = lp;
        }
        bsize = fs->fs_ncg * sizeof(*fs->fs_contigdirs);
        fs->fs_contigdirs = space;
        space = (char *)space + bsize;
        memset(fs->fs_contigdirs, 0, bsize);

        /* Compatibility for old filesystems - XXX */
        if (fs->fs_avgfilesize <= 0)
                fs->fs_avgfilesize = AVFILESIZ;
        if (fs->fs_avgfpdir <= 0)
                fs->fs_avgfpdir = AFPDIR;
        fs->fs_active = NULL;

        mp->mnt_data = ump;
        mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
        mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS);
        mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
        mp->mnt_stat.f_namemax = FFS_MAXNAMLEN;
        if (UFS_MPISAPPLEUFS(ump)) {
                /* NeXT used to keep short symlinks in the inode even
                 * when using FS_42INODEFMT.  In that case fs->fs_maxsymlinklen
                 * is probably -1, but we still need to be able to identify
                 * short symlinks.
                 */
                ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
                ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
                mp->mnt_iflag |= IMNT_DTYPE;
        } else {
                ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
                ump->um_dirblksiz = UFS_DIRBLKSIZ;
                if (ump->um_maxsymlinklen > 0)
                        mp->mnt_iflag |= IMNT_DTYPE;
                else
                        mp->mnt_iflag &= ~IMNT_DTYPE;
        }
        mp->mnt_fs_bshift = fs->fs_bshift;
        mp->mnt_dev_bshift = DEV_BSHIFT;        /* XXX */
        mp->mnt_flag |= MNT_LOCAL;
        mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO | IMNT_SHRLOOKUP |
            IMNT_NCLOOKUP;
#ifdef FFS_EI
        if (needswap)
                ump->um_flags |= UFS_NEEDSWAP;
#endif
        ffs_acls(mp, fs->fs_flags);
        ump->um_mountp = mp;
        ump->um_dev = dev;
        ump->um_devvp = devvp;
        ump->um_nindir = fs->fs_nindir;
        ump->um_lognindir = ffs(fs->fs_nindir) - 1;
        ump->um_bptrtodb = fs->fs_fshift - DEV_BSHIFT;
        ump->um_seqinc = fs->fs_frag;
        for (i = 0; i < MAXQUOTAS; i++)
                ump->um_quotas[i] = NULLVP;
        spec_node_setmountedfs(devvp, mp);
        if (ronly == 0 && fs->fs_snapinum[0] != 0)
                ffs_snapshot_mount(mp);
#ifdef WAPBL
        if (!ronly) {
                KDASSERT(fs->fs_ronly == 0);
                /*
                 * ffs_wapbl_start() needs mp->mnt_stat initialised if it
                 * needs to create a new log file in-filesystem.
                 */
                error = ffs_statvfs(mp, &mp->mnt_stat);
                if (error) {
                        DPRINTF("ffs_statvfs returned %d", error);
                        goto out1;
                }

                error = ffs_wapbl_start(mp);
                if (error) {
                        DPRINTF("ffs_wapbl_start returned %d", error);
                        goto out1;
                }
        }
#endif /* WAPBL */
        if (ronly == 0) {
#ifdef QUOTA2
                error = ffs_quota2_mount(mp);
                if (error) {
                        DPRINTF("ffs_quota2_mount returned %d", error);
                        goto out1;
                }
#else
                if (fs->fs_flags & FS_DOQUOTA2) {
                        ump->um_flags |= UFS_QUOTA2;
                        uprintf("%s: options QUOTA2 not enabled%s\n",
                            mp->mnt_stat.f_mntonname,
                            (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
                        if ((mp->mnt_flag & MNT_FORCE) == 0) {
                                error = EINVAL;
                                DPRINTF("quota disabled %d", error);
                                goto out1;
                        }
                }
#endif
         }

        if (mp->mnt_flag & MNT_DISCARD)
                ump->um_discarddata = ffs_discard_init(devvp, fs);

        return (0);
out1:
        kmem_free(fs->fs_csp, allocsbsize);
out:
#ifdef WAPBL
        if (mp->mnt_wapbl_replay) {
                wapbl_replay_stop(mp->mnt_wapbl_replay);
                wapbl_replay_free(mp->mnt_wapbl_replay);
                mp->mnt_wapbl_replay = 0;
        }
#endif

        if (fs)
                kmem_free(fs, fs->fs_sbsize);
        spec_node_setmountedfs(devvp, NULL);
        if (bp)
                brelse(bp, bset);
        if (ump) {
                if (ump->um_oldfscompat)
                        kmem_free(ump->um_oldfscompat, 512 + 3*sizeof(int32_t));
                mutex_destroy(&ump->um_lock);
                kmem_free(ump, sizeof(*ump));
                mp->mnt_data = NULL;
        }
        return (error);
}

/*
 * Sanity checks for loading old filesystem superblocks.
 * See ffs_oldfscompat_write below for unwound actions.
 *
 * XXX - Parts get retired eventually.
 * Unfortunately new bits get added.
 */
static void
ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc)
{
        off_t maxfilesize;
        int32_t *extrasave;

        if ((fs->fs_magic != FS_UFS1_MAGIC) ||
            (fs->fs_old_flags & FS_FLAGS_UPDATED))
                return;

        if (!ump->um_oldfscompat)
                ump->um_oldfscompat = kmem_alloc(512 + 3*sizeof(int32_t),
                    KM_SLEEP);

        memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512);
        extrasave = ump->um_oldfscompat;
        extrasave += 512/sizeof(int32_t);
        extrasave[0] = fs->fs_old_npsect;
        extrasave[1] = fs->fs_old_interleave;
        extrasave[2] = fs->fs_old_trackskew;

        /* These fields will be overwritten by their
         * original values in fs_oldfscompat_write, so it is harmless
         * to modify them here.
         */
        fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
        fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
        fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
        fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;

        fs->fs_maxbsize = fs->fs_bsize;
        fs->fs_time = fs->fs_old_time;
        fs->fs_size = fs->fs_old_size;
        fs->fs_dsize = fs->fs_old_dsize;
        fs->fs_csaddr = fs->fs_old_csaddr;
        fs->fs_sblockloc = sblockloc;

        fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL);

        if (fs->fs_old_postblformat == FS_42POSTBLFMT) {
                fs->fs_old_nrpos = 8;
                fs->fs_old_npsect = fs->fs_old_nsect;
                fs->fs_old_interleave = 1;
                fs->fs_old_trackskew = 0;
        }

        if (fs->fs_magic == FS_UFS1_MAGIC &&
            fs->fs_old_inodefmt < FS_44INODEFMT) {
                fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
                fs->fs_qbmask = ~fs->fs_bmask;
                fs->fs_qfmask = ~fs->fs_fmask;
        }

        maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1;
        if (fs->fs_maxfilesize > maxfilesize)
                fs->fs_maxfilesize = maxfilesize;

        /* Compatibility for old filesystems */
        if (fs->fs_avgfilesize <= 0)
                fs->fs_avgfilesize = AVFILESIZ;
        if (fs->fs_avgfpdir <= 0)
                fs->fs_avgfpdir = AFPDIR;

#if 0
        if (bigcgs) {
                fs->fs_save_cgsize = fs->fs_cgsize;
                fs->fs_cgsize = fs->fs_bsize;
        }
#endif
}

/*
 * Unwinding superblock updates for old filesystems.
 * See ffs_oldfscompat_read above for details.
 *
 * XXX - Parts get retired eventually.
 * Unfortunately new bits get added.
 */
static void
ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump)
{
        int32_t *extrasave;

        if ((fs->fs_magic != FS_UFS1_MAGIC) ||
            (fs->fs_old_flags & FS_FLAGS_UPDATED))
                return;

        fs->fs_old_time = fs->fs_time;
        fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
        fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
        fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
        fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
        fs->fs_old_flags = fs->fs_flags;

#if 0
        if (bigcgs) {
                fs->fs_cgsize = fs->fs_save_cgsize;
        }
#endif

        memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512);
        extrasave = ump->um_oldfscompat;
        extrasave += 512/sizeof(int32_t);
        fs->fs_old_npsect = extrasave[0];
        fs->fs_old_interleave = extrasave[1];
        fs->fs_old_trackskew = extrasave[2];

}

/*
 * unmount vfs operation
 */
int
ffs_unmount(struct mount *mp, int mntflags)
{
        struct lwp *l = curlwp;
        struct ufsmount *ump = VFSTOUFS(mp);
        struct fs *fs = ump->um_fs;
        int error, flags;
        u_int32_t bsize;
#ifdef WAPBL
        extern int doforce;
#endif

        if (ump->um_discarddata) {
                ffs_discard_finish(ump->um_discarddata, mntflags);
                ump->um_discarddata = NULL;
        }

        flags = 0;
        if (mntflags & MNT_FORCE)
                flags |= FORCECLOSE;
        if ((error = ffs_flushfiles(mp, flags, l)) != 0)
                return (error);
        error = UFS_WAPBL_BEGIN(mp);
        if (error == 0)
                if (fs->fs_ronly == 0 &&
                    ffs_cgupdate(ump, MNT_WAIT) == 0 &&
                    fs->fs_clean & FS_WASCLEAN) {
                        fs->fs_clean = FS_ISCLEAN;
                        fs->fs_fmod = 0;
                        (void) ffs_sbupdate(ump, MNT_WAIT);
                }
        if (error == 0)
                UFS_WAPBL_END(mp);
#ifdef WAPBL
        KASSERT(!(mp->mnt_wapbl_replay && mp->mnt_wapbl));
        if (mp->mnt_wapbl_replay) {
                KDASSERT(fs->fs_ronly);
                wapbl_replay_stop(mp->mnt_wapbl_replay);
                wapbl_replay_free(mp->mnt_wapbl_replay);
                mp->mnt_wapbl_replay = 0;
        }
        error = ffs_wapbl_stop(mp, doforce && (mntflags & MNT_FORCE));
        if (error) {
                return error;
        }
#endif /* WAPBL */

        if (ump->um_devvp->v_type != VBAD)
                spec_node_setmountedfs(ump->um_devvp, NULL);
        vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
        (void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
                NOCRED);
        vput(ump->um_devvp);

        bsize = fs->fs_cssize;
        if (fs->fs_contigsumsize > 0)
                bsize += fs->fs_ncg * sizeof(int32_t);
        bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
        kmem_free(fs->fs_csp, bsize);

        kmem_free(fs, fs->fs_sbsize);
        if (ump->um_oldfscompat != NULL)
                kmem_free(ump->um_oldfscompat, 512 + 3*sizeof(int32_t));
        mutex_destroy(&ump->um_lock);
        ffs_snapshot_fini(ump);
        kmem_free(ump, sizeof(*ump));
        mp->mnt_data = NULL;
        mp->mnt_flag &= ~MNT_LOCAL;
        return (0);
}

/*
 * Flush out all the files in a filesystem.
 */
int
ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
{
        extern int doforce;
        struct ufsmount *ump;
        int error;

        if (!doforce)
                flags &= ~FORCECLOSE;
        ump = VFSTOUFS(mp);
#ifdef QUOTA
        if ((error = quota1_umount(mp, flags)) != 0)
                return (error);
#endif
#ifdef QUOTA2
        if ((error = quota2_umount(mp, flags)) != 0)
                return (error);
#endif
#ifdef UFS_EXTATTR
        if (ump->um_fstype == UFS1) {
                if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)
                        ufs_extattr_stop(mp, l);
                if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED)
                        ufs_extattr_uepm_destroy(&ump->um_extattr);
                mp->mnt_flag &= ~MNT_EXTATTR;
        }
#endif
        if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
                return (error);
        ffs_snapshot_unmount(mp);
        /*
         * Flush all the files.
         */
        error = vflush(mp, NULLVP, flags);
        if (error)
                return (error);
        /*
         * Flush filesystem metadata.
         */
        vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0);
        VOP_UNLOCK(ump->um_devvp);
        if (flags & FORCECLOSE) /* XXXDBJ */
                error = 0;

#ifdef WAPBL
        if (error)
                return error;
        if (mp->mnt_wapbl) {
                error = wapbl_flush(mp->mnt_wapbl, 1);
                if (flags & FORCECLOSE)
                        error = 0;
        }
#endif

        return (error);
}

/*
 * Get file system statistics.
 */
int
ffs_statvfs(struct mount *mp, struct statvfs *sbp)
{
        struct ufsmount *ump;
        struct fs *fs;

        ump = VFSTOUFS(mp);
        fs = ump->um_fs;
        mutex_enter(&ump->um_lock);
        sbp->f_bsize = fs->fs_bsize;
        sbp->f_frsize = fs->fs_fsize;
        sbp->f_iosize = fs->fs_bsize;
        sbp->f_blocks = fs->fs_dsize;
        sbp->f_bfree = ffs_blkstofrags(fs, fs->fs_cstotal.cs_nbfree) +
            fs->fs_cstotal.cs_nffree + FFS_DBTOFSB(fs, fs->fs_pendingblocks);
        sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t)
            fs->fs_minfree) / (u_int64_t) 100;
        if (sbp->f_bfree > sbp->f_bresvd)
                sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd;
        else
                sbp->f_bavail = 0;
        sbp->f_files =  fs->fs_ncg * fs->fs_ipg - UFS_ROOTINO;
        sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
        sbp->f_favail = sbp->f_ffree;
        sbp->f_fresvd = 0;
        mutex_exit(&ump->um_lock);
        copy_statvfs_info(sbp, mp);

        return (0);
}

struct ffs_sync_ctx {
        int waitfor;
};

static bool
ffs_sync_selector(void *cl, struct vnode *vp)
{
        struct ffs_sync_ctx *c = cl;
        struct inode *ip;

        KASSERT(mutex_owned(vp->v_interlock));

        ip = VTOI(vp);
        /*
         * Skip the vnode/inode if inaccessible.
         */
        if (ip == NULL || vp->v_type == VNON)
                return false;

        /*
         * We deliberately update inode times here.  This will
         * prevent a massive queue of updates accumulating, only
         * to be handled by a call to unmount.
         *
         * XXX It would be better to have the syncer trickle these
         * out.  Adjustment needed to allow registering vnodes for
         * sync when the vnode is clean, but the inode dirty.  Or
         * have ufs itself trickle out inode updates.
         *
         * If doing a lazy sync, we don't care about metadata or
         * data updates, because they are handled by each vnode's
         * synclist entry.  In this case we are only interested in
         * writing back modified inodes.
         */
        if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE |
            IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) == 0 &&
            (c->waitfor == MNT_LAZY || (LIST_EMPTY(&vp->v_dirtyblkhd) &&
            (vp->v_iflag & VI_ONWORKLST) == 0)))
                return false;

        return true;
}

/*
 * Go through the disk queues to initiate sandbagged IO;
 * go through the inodes to write those that have been modified;
 * initiate the writing of the super block if it has been modified.
 *
 * Note: we are always called with the filesystem marked `MPBUSY'.
 */
int
ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
{
        struct vnode *vp;
        struct ufsmount *ump = VFSTOUFS(mp);
        struct fs *fs;
        struct vnode_iterator *marker;
        int error, allerror = 0;
        struct ffs_sync_ctx ctx;

        fs = ump->um_fs;
        if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {                /* XXX */
                panic("%s: rofs mod, fs=%s", __func__, fs->fs_fsmnt);
        }

        /*
         * Write back each (modified) inode.
         */
        vfs_vnode_iterator_init(mp, &marker);

        ctx.waitfor = waitfor;
        while ((vp = vfs_vnode_iterator_next(marker, ffs_sync_selector, &ctx)))
        {
                error = vn_lock(vp,
                    LK_EXCLUSIVE | (waitfor == MNT_LAZY ? LK_NOWAIT : 0));
                if (error) {
                        vrele(vp);
                        continue;
                }
                if (waitfor == MNT_LAZY) {
                        error = UFS_WAPBL_BEGIN(vp->v_mount);
                        if (!error) {
                                error = ffs_update(vp, NULL, NULL,
                                    UPDATE_CLOSE);
                                UFS_WAPBL_END(vp->v_mount);
                        }
                } else {
                        error = VOP_FSYNC(vp, cred, FSYNC_NOLOG |
                            (waitfor == MNT_WAIT ? FSYNC_WAIT : 0), 0, 0);
                }
                if (error)
                        allerror = error;
                vput(vp);
        }
        vfs_vnode_iterator_destroy(marker);

        /*
         * Force stale file system control information to be flushed.
         */
        if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 ||
            !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) {
                vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
                if ((error = VOP_FSYNC(ump->um_devvp, cred,
                    (waitfor == MNT_WAIT ? FSYNC_WAIT : 0) | FSYNC_NOLOG,
                    0, 0)) != 0)
                        allerror = error;
                VOP_UNLOCK(ump->um_devvp);
        }
#if defined(QUOTA) || defined(QUOTA2)
        qsync(mp);
#endif
        /*
         * Write back modified superblock.
         */
        if (fs->fs_fmod != 0) {
                fs->fs_fmod = 0;
                fs->fs_time = time_second;
                error = UFS_WAPBL_BEGIN(mp);
                if (error)
                        allerror = error;
                else {
                        if ((error = ffs_cgupdate(ump, waitfor)))
                                allerror = error;
                        UFS_WAPBL_END(mp);
                }
        }

#ifdef WAPBL
        if (mp->mnt_wapbl) {
                error = wapbl_flush(mp->mnt_wapbl, (waitfor == MNT_WAIT));
                if (error)
                        allerror = error;
        }
#endif

        return (allerror);
}

/*
 * Load inode from disk and initialize vnode.
 */
static int
ffs_init_vnode(struct ufsmount *ump, struct vnode *vp, ino_t ino)
{
        struct fs *fs;
        struct inode *ip;
        struct buf *bp;
        int error;

        fs = ump->um_fs;

        /* Read in the disk contents for the inode. */
        error = bread(ump->um_devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ino)),
                      (int)fs->fs_bsize, 0, &bp);
        if (error)
                return error;

        /* Allocate and initialize inode. */
        ip = pool_cache_get(ffs_inode_cache, PR_WAITOK);
        memset(ip, 0, sizeof(struct inode));
        ip->i_ump = ump;
        ip->i_fs = fs;
        ip->i_dev = ump->um_dev;
        ip->i_number = ino;
        if (ump->um_fstype == UFS1)
                ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache,
                    PR_WAITOK);
        else
                ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache,
                    PR_WAITOK);
        ffs_load_inode(bp, ip, fs, ino);
        brelse(bp, 0);
        ip->i_vnode = vp;
#if defined(QUOTA) || defined(QUOTA2)
        ufsquota_init(ip);
#endif

        /* Initialise vnode with this inode. */
        vp->v_tag = VT_UFS;
        vp->v_op = ffs_vnodeop_p;
        vp->v_data = ip;

        /* Initialize genfs node. */
        genfs_node_init(vp, &ffs_genfsops);

        return 0;
}

/*
 * Undo ffs_init_vnode().
 */
static void
ffs_deinit_vnode(struct ufsmount *ump, struct vnode *vp)
{
        struct inode *ip = VTOI(vp);

        genfs_node_destroy(vp);
        vp->v_data = NULL;

        if (ump->um_fstype == UFS1)
                pool_cache_put(ffs_dinode1_cache, ip->i_din.ffs1_din);
        else
                pool_cache_put(ffs_dinode2_cache, ip->i_din.ffs2_din);
        pool_cache_put(ffs_inode_cache, ip);
}

/*
 * Read an inode from disk and initialize this vnode / inode pair.
 * Caller assures no other thread will try to load this inode.
 */
int
ffs_loadvnode(struct mount *mp, struct vnode *vp,
    const void *key, size_t key_len, const void **new_key)
{
        ino_t ino;
        struct fs *fs;
        struct inode *ip;
        struct ufsmount *ump;
        int error;

        KASSERT(key_len == sizeof(ino));
        memcpy(&ino, key, key_len);
        ump = VFSTOUFS(mp);
        fs = ump->um_fs;

        error = ffs_init_vnode(ump, vp, ino);
        if (error)
                return error;

        ip = VTOI(vp);
        if (ip->i_mode == 0) {
                ffs_deinit_vnode(ump, vp);

                return ENOENT;
        }

        /* Initialize the vnode from the inode. */
        ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);

        /* Finish inode initialization.  */
        ip->i_devvp = ump->um_devvp;
        vref(ip->i_devvp);

        /*
         * Ensure that uid and gid are correct. This is a temporary
         * fix until fsck has been changed to do the update.
         */

        if (fs->fs_magic == FS_UFS1_MAGIC &&                        /* XXX */
            fs->fs_old_inodefmt < FS_44INODEFMT) {                /* XXX */
                ip->i_uid = ip->i_ffs1_ouid;                        /* XXX */
                ip->i_gid = ip->i_ffs1_ogid;                        /* XXX */
        }                                                        /* XXX */
        uvm_vnp_setsize(vp, ip->i_size);
        cache_enter_id(vp, ip->i_mode, ip->i_uid, ip->i_gid, !HAS_ACLS(ip));
        *new_key = &ip->i_number;
        return 0;
}

/*
 * Create a new inode on disk and initialize this vnode / inode pair.
 */
int
ffs_newvnode(struct mount *mp, struct vnode *dvp, struct vnode *vp,
    struct vattr *vap, kauth_cred_t cred, void *extra,
    size_t *key_len, const void **new_key)
{
        ino_t ino;
        struct fs *fs;
        struct inode *ip;
        struct timespec ts;
        struct ufsmount *ump;
        int error, mode;

        KASSERT(dvp->v_mount == mp);
        KASSERT(vap->va_type != VNON);

        *key_len = sizeof(ino);
        ump = VFSTOUFS(mp);
        fs = ump->um_fs;
        mode = MAKEIMODE(vap->va_type, vap->va_mode);

        /* Allocate fresh inode. */
        error = ffs_valloc(dvp, mode, cred, &ino);
        if (error)
                return error;

        /* Attach inode to vnode. */
        error = ffs_init_vnode(ump, vp, ino);
        if (error) {
                if (UFS_WAPBL_BEGIN(mp) == 0) {
                        ffs_vfree(dvp, ino, mode);
                        UFS_WAPBL_END(mp);
                }
                return error;
        }

        ip = VTOI(vp);
        if (ip->i_mode) {
                panic("%s: dup alloc ino=%" PRId64 " on %s: mode %o/%o "
                    "gen %x/%x size %" PRIx64 " blocks %" PRIx64,
                    __func__, ino, fs->fs_fsmnt, DIP(ip, mode), ip->i_mode,
                    DIP(ip, gen), ip->i_gen, DIP(ip, size), DIP(ip, blocks));
        }
        if (DIP(ip, size) || DIP(ip, blocks)) {
                printf("%s: ino=%" PRId64 " on %s: "
                    "gen %x/%x has non zero blocks %" PRIx64 " or size %"
                    PRIx64 "\n",
                    __func__, ino, fs->fs_fsmnt, DIP(ip, gen), ip->i_gen,
                    DIP(ip, blocks), DIP(ip, size));
                if ((ip)->i_ump->um_fstype == UFS1)
                        panic("%s: dirty filesystem?", __func__);
                DIP_ASSIGN(ip, blocks, 0);
                DIP_ASSIGN(ip, size, 0);
        }

        /* Set uid / gid. */
        if (cred == NOCRED || cred == FSCRED) {
                ip->i_gid = 0;
                ip->i_uid = 0;
        } else {
                ip->i_gid = VTOI(dvp)->i_gid;
                ip->i_uid = kauth_cred_geteuid(cred);
        }
        DIP_ASSIGN(ip, gid, ip->i_gid);
        DIP_ASSIGN(ip, uid, ip->i_uid);

#if defined(QUOTA) || defined(QUOTA2)
        error = UFS_WAPBL_BEGIN(mp);
        if (error) {
                ffs_deinit_vnode(ump, vp);

                return error;
        }
        error = chkiq(ip, 1, cred, 0);
        if (error) {
                ffs_vfree(dvp, ino, mode);
                UFS_WAPBL_END(mp);
                ffs_deinit_vnode(ump, vp);

                return error;
        }
        UFS_WAPBL_END(mp);
#endif

        /* Set type and finalize. */
        ip->i_flags = 0;
        DIP_ASSIGN(ip, flags, 0);
        ip->i_mode = mode;
        DIP_ASSIGN(ip, mode, mode);
        if (vap->va_rdev != VNOVAL) {
                /*
                 * Want to be able to use this to make badblock
                 * inodes, so don't truncate the dev number.
                 */
                if (ump->um_fstype == UFS1)
                        ip->i_ffs1_rdev = ufs_rw32(vap->va_rdev,
                            UFS_MPNEEDSWAP(ump));
                else
                        ip->i_ffs2_rdev = ufs_rw64(vap->va_rdev,
                            UFS_MPNEEDSWAP(ump));
        }
        ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
        ip->i_devvp = ump->um_devvp;
        vref(ip->i_devvp);

        /* Set up a new generation number for this inode.  */
        ip->i_gen++;
        DIP_ASSIGN(ip, gen, ip->i_gen);
        if (fs->fs_magic == FS_UFS2_MAGIC) {
                vfs_timestamp(&ts);
                ip->i_ffs2_birthtime = ts.tv_sec;
                ip->i_ffs2_birthnsec = ts.tv_nsec;
        }

        uvm_vnp_setsize(vp, ip->i_size);
        cache_enter_id(vp, ip->i_mode, ip->i_uid, ip->i_gid, !HAS_ACLS(ip));
        *new_key = &ip->i_number;
        return 0;
}

/*
 * File handle to vnode
 *
 * Have to be really careful about stale file handles:
 * - check that the inode number is valid
 * - call ffs_vget() to get the locked inode
 * - check for an unallocated inode (i_mode == 0)
 * - check that the given client host has export rights and return
 *   those rights via. exflagsp and credanonp
 */
int
ffs_fhtovp(struct mount *mp, struct fid *fhp, int lktype, struct vnode **vpp)
{
        struct ufid ufh;
        int error;

        if (fhp->fid_len != sizeof(struct ufid))
                return EINVAL;

        memcpy(&ufh, fhp, sizeof(ufh));
        if ((error = ffs_checkrange(mp, ufh.ufid_ino)) != 0)
                return error;

        return (ufs_fhtovp(mp, &ufh, lktype, vpp));
}

/*
 * Vnode pointer to File handle
 */
/* ARGSUSED */
int
ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
{
        struct inode *ip;
        struct ufid ufh;

        if (*fh_size < sizeof(struct ufid)) {
                *fh_size = sizeof(struct ufid);
                return E2BIG;
        }
        ip = VTOI(vp);
        *fh_size = sizeof(struct ufid);
        memset(&ufh, 0, sizeof(ufh));
        ufh.ufid_len = sizeof(struct ufid);
        ufh.ufid_ino = ip->i_number;
        ufh.ufid_gen = ip->i_gen;
        memcpy(fhp, &ufh, sizeof(ufh));
        return (0);
}

void
ffs_init(void)
{
        if (ffs_initcount++ > 0)
                return;

        ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0,
            "ffsino", NULL, IPL_NONE, NULL, NULL, NULL);
        ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0,
            "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL);
        ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0,
            "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL);
        ufs_init();
}

void
ffs_reinit(void)
{
        ufs_reinit();
}

void
ffs_done(void)
{
        if (--ffs_initcount > 0)
                return;

        ufs_done();
        pool_cache_destroy(ffs_dinode2_cache);
        pool_cache_destroy(ffs_dinode1_cache);
        pool_cache_destroy(ffs_inode_cache);
}

/*
 * Write a superblock and associated information back to disk.
 */
int
ffs_sbupdate(struct ufsmount *mp, int waitfor)
{
        struct fs *fs = mp->um_fs;
        struct buf *bp;
        int error;
        u_int32_t saveflag;

        error = ffs_getblk(mp->um_devvp,
            fs->fs_sblockloc / DEV_BSIZE, FFS_NOBLK,
            fs->fs_sbsize, false, &bp);
        if (error)
                return error;
        saveflag = fs->fs_flags & FS_INTERNAL;
        fs->fs_flags &= ~FS_INTERNAL;

        memcpy(bp->b_data, fs, fs->fs_sbsize);

        ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
#ifdef FFS_EI
        if (mp->um_flags & UFS_NEEDSWAP)
                ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data);
#endif
        fs->fs_flags |= saveflag;

        if (waitfor == MNT_WAIT)
                error = bwrite(bp);
        else
                bawrite(bp);
        return (error);
}

int
ffs_cgupdate(struct ufsmount *mp, int waitfor)
{
        struct fs *fs = mp->um_fs;
        struct buf *bp;
        int blks;
        void *space;
        int i, size, error = 0, allerror = 0;

        UFS_WAPBL_JLOCK_ASSERT(mp->um_mountp);

        allerror = ffs_sbupdate(mp, waitfor);
        blks = howmany(fs->fs_cssize, fs->fs_fsize);
        space = fs->fs_csp;
        for (i = 0; i < blks; i += fs->fs_frag) {
                size = fs->fs_bsize;
                if (i + fs->fs_frag > blks)
                        size = (blks - i) * fs->fs_fsize;
                error = ffs_getblk(mp->um_devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i),
                    FFS_NOBLK, size, false, &bp);
                if (error)
                        break;
#ifdef FFS_EI
                if (mp->um_flags & UFS_NEEDSWAP)
                        ffs_csum_swap((struct csum*)space,
                            (struct csum*)bp->b_data, size);
                else
#endif
                        memcpy(bp->b_data, space, (u_int)size);
                space = (char *)space + size;
                if (waitfor == MNT_WAIT)
                        error = bwrite(bp);
                else
                        bawrite(bp);
        }
        if (!allerror && error)
                allerror = error;
        return (allerror);
}

int
ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp,
    int attrnamespace, const char *attrname)
{
#ifdef UFS_EXTATTR
        /*
         * File-backed extended attributes are only supported on UFS1.
         * UFS2 has native extended attributes.
         */
        if (VFSTOUFS(mp)->um_fstype == UFS1)
                return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname));
#endif
        return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname));
}

/*
 * Synch vnode for a mounted file system.
 */
static int
ffs_vfs_fsync(vnode_t *vp, int flags)
{
        int error, i, pflags;
#ifdef WAPBL
        struct mount *mp;
#endif

        KASSERT(vp->v_type == VBLK);
        KASSERT(spec_node_getmountedfs(vp) != NULL);

        /*
         * Flush all dirty data associated with the vnode.
         */
        pflags = PGO_ALLPAGES | PGO_CLEANIT;
        if ((flags & FSYNC_WAIT) != 0)
                pflags |= PGO_SYNCIO;
        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
        error = VOP_PUTPAGES(vp, 0, 0, pflags);
        if (error)
                return error;

#ifdef WAPBL
        mp = spec_node_getmountedfs(vp);
        if (mp && mp->mnt_wapbl) {
                /*
                 * Don't bother writing out metadata if the syncer is
                 * making the request.  We will let the sync vnode
                 * write it out in a single burst through a call to
                 * VFS_SYNC().
                 */
                if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY | FSYNC_NOLOG)) != 0)
                        return 0;

                /*
                 * Don't flush the log if the vnode being flushed
                 * contains no dirty buffers that could be in the log.
                 */
                if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
                        VOP_UNLOCK(vp);
                        error = wapbl_flush(mp->mnt_wapbl, 0);
                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                        if (error)
                                return error;
                }

                if ((flags & FSYNC_WAIT) != 0) {
                        mutex_enter(vp->v_interlock);
                        while (vp->v_numoutput)
                                cv_wait(&vp->v_cv, vp->v_interlock);
                        mutex_exit(vp->v_interlock);
                }

                return 0;
        }
#endif /* WAPBL */

        error = vflushbuf(vp, flags);
        if (error == 0 && (flags & FSYNC_CACHE) != 0) {
                i = 1;
                VOP_UNLOCK(vp);
                (void)VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE,
                    kauth_cred_get());
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        }

        return error;
}


































































































































































































  465 







  466 

  465 

  466 
  466 































  133 



  133 












   13 



   13 












    7 



    7 






  382 


  280 
    9 


  382 
  381 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
/*        $NetBSD: kern_uidinfo.c,v 1.13 2021/12/28 13:28:24 riastradh Exp $        */

/*-
 * Copyright (c) 1982, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_uidinfo.c,v 1.13 2021/12/28 13:28:24 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/proc.h>
#include <sys/atomic.h>
#include <sys/uidinfo.h>
#include <sys/sysctl.h>
#include <sys/kauth.h>
#include <sys/cpu.h>

static SLIST_HEAD(uihashhead, uidinfo) *uihashtbl;
static u_long                 uihash;

#define        UIHASH(uid)        (&uihashtbl[(uid) & uihash])

static int
sysctl_kern_uidinfo_cnt(SYSCTLFN_ARGS)
{
        static const struct {
                const char *name;
                u_int value;
        } nv[] = {
#define _MEM(n) { # n, offsetof(struct uidinfo, ui_ ## n) }
                _MEM(proccnt),
                _MEM(lwpcnt),
                _MEM(lockcnt),
                _MEM(semcnt),
                _MEM(sbsize),
#undef _MEM
        };

        for (size_t i = 0; i < __arraycount(nv); i++)
                if (strcmp(nv[i].name, rnode->sysctl_name) == 0) {
                        uint64_t cnt;
                        struct sysctlnode node = *rnode;
                        struct uidinfo *uip;

                        node.sysctl_data = &cnt;
                        uip = uid_find(kauth_cred_geteuid(l->l_cred));

                        *(uint64_t *)node.sysctl_data =
                            *(u_long *)((char *)uip + nv[i].value);

                        return sysctl_lookup(SYSCTLFN_CALL(&node));
                }

        return EINVAL;
}

static struct sysctllog *kern_uidinfo_sysctllog;

static void
sysctl_kern_uidinfo_setup(void)
{
        const struct sysctlnode *rnode, *cnode;

        sysctl_createv(&kern_uidinfo_sysctllog, 0, NULL, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "uidinfo",
                       SYSCTL_DESCR("Resource usage per uid"),
                       NULL, 0, NULL, 0,
                       CTL_KERN, CTL_CREATE, CTL_EOL);

        sysctl_createv(&kern_uidinfo_sysctllog, 0, &rnode, &cnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_QUAD, "proccnt",
                       SYSCTL_DESCR("Number of processes for the current user"),
                       sysctl_kern_uidinfo_cnt, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(&kern_uidinfo_sysctllog, 0, &rnode, &cnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_QUAD, "lwpcnt",
                       SYSCTL_DESCR("Number of lwps for the current user"),
                       sysctl_kern_uidinfo_cnt, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(&kern_uidinfo_sysctllog, 0, &rnode, &cnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_QUAD, "lockcnt",
                       SYSCTL_DESCR("Number of locks for the current user"),
                       sysctl_kern_uidinfo_cnt, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(&kern_uidinfo_sysctllog, 0, &rnode, &cnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_QUAD, "semcnt",
                       SYSCTL_DESCR("Number of semaphores used for the current user"),
                       sysctl_kern_uidinfo_cnt, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(&kern_uidinfo_sysctllog, 0, &rnode, &cnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_QUAD, "sbsize",
                       SYSCTL_DESCR("Socket buffers used for the current user"),
                       sysctl_kern_uidinfo_cnt, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);
}

static int
uid_stats(struct hashstat_sysctl *hs, bool fill)
{
        struct uidinfo *uip;
        uint64_t chain;

        strlcpy(hs->hash_name, "uihash", sizeof(hs->hash_name));
        strlcpy(hs->hash_desc, "user info (uid->used proc) hash",
            sizeof(hs->hash_desc));
        if (!fill)
                return 0;

        hs->hash_size = uihash + 1;

        for (size_t i = 0; i < hs->hash_size; i++) {
                chain = 0;
                SLIST_FOREACH(uip, &uihashtbl[i], ui_hash) {
                        membar_datadep_consumer();
                        chain++;
                }
                if (chain > 0) {
                        hs->hash_used++;
                        hs->hash_items += chain;
                        if (chain > hs->hash_maxchain)
                                hs->hash_maxchain = chain;
                }
        }

        return 0;
}

void
uid_init(void)
{

        /*
         * In case of MP system, SLIST_FOREACH would force a cache line
         * write-back for every modified 'uidinfo', thus we try to keep the
         * lists short.
         */
        const u_int uihash_sz = (maxcpus > 1 ? 1024 : 64);

        uihashtbl = hashinit(uihash_sz, HASH_SLIST, true, &uihash);

        /*
         * Ensure that uid 0 is always in the user hash table, as
         * sbreserve() expects it available from interrupt context.
         */
        (void)uid_find(0);
        sysctl_kern_uidinfo_setup();
        hashstat_register("uihash", uid_stats);
}

struct uidinfo *
uid_find(uid_t uid)
{
        struct uidinfo *uip, *uip_first, *newuip;
        struct uihashhead *uipp;

        uipp = UIHASH(uid);
        newuip = NULL;

        /*
         * To make insertion atomic, abstraction of SLIST will be violated.
         */
        uip_first = uipp->slh_first;
 again:
        SLIST_FOREACH(uip, uipp, ui_hash) {
                membar_datadep_consumer();
                if (uip->ui_uid != uid)
                        continue;
                if (newuip != NULL)
                        kmem_free(newuip, sizeof(*newuip));
                return uip;
        }
        if (newuip == NULL)
                newuip = kmem_zalloc(sizeof(*newuip), KM_SLEEP);
        newuip->ui_uid = uid;

        /*
         * If atomic insert is unsuccessful, another thread might be
         * allocated this 'uid', thus full re-check is needed.
         */
        newuip->ui_hash.sle_next = uip_first;
        membar_producer();
        uip = atomic_cas_ptr(&uipp->slh_first, uip_first, newuip);
        if (uip != uip_first) {
                uip_first = uip;
                goto again;
        }

        return newuip;
}

/*
 * Change the count associated with number of processes
 * a given user is using.
 */
int
chgproccnt(uid_t uid, int diff)
{
        struct uidinfo *uip;
        long proccnt;

        uip = uid_find(uid);
        proccnt = atomic_add_long_nv(&uip->ui_proccnt, diff);
        KASSERTMSG(proccnt >= 0, "uid=%d diff=%d proccnt=%ld",
            uid, diff, proccnt);
        return proccnt;
}

/*
 * Change the count associated with number of lwps
 * a given user is using.
 */
int
chglwpcnt(uid_t uid, int diff)
{
        struct uidinfo *uip;
        long lwpcnt;

        uip = uid_find(uid);
        lwpcnt = atomic_add_long_nv(&uip->ui_lwpcnt, diff);
        KASSERTMSG(lwpcnt >= 0, "uid=%d diff=%d lwpcnt=%ld",
            uid, diff, lwpcnt);
        return lwpcnt;
}

/*
 * Change the count associated with number of semaphores
 * a given user is using.
 */
int
chgsemcnt(uid_t uid, int diff)
{
        struct uidinfo *uip;
        long semcnt;

        uip = uid_find(uid);
        semcnt = atomic_add_long_nv(&uip->ui_semcnt, diff);
        KASSERTMSG(semcnt >= 0, "uid=%d diff=%d semcnt=%ld",
            uid, diff, semcnt);
        return semcnt;
}

int
chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t xmax)
{
        rlim_t nsb;
        const long diff = to - *hiwat;

        nsb = (rlim_t)atomic_add_long_nv((long *)&uip->ui_sbsize, diff);
        if (diff > 0 && nsb > xmax) {
                atomic_add_long((long *)&uip->ui_sbsize, -diff);
                return 0;
        }
        *hiwat = to;
        return 1;
}















































































































































































   30 

   30 













   30 
   29 







   25 
   25 













































































































    9 
   26 
   30 

    9 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
/*        $NetBSD: prop_dictionary_util.c,v 1.9 2022/08/03 21:13:46 riastradh Exp $        */

/*-
 * Copyright (c) 2006, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Utility routines to make it more convenient to work with values
 * stored in dictionaries.
 *
 * Note: There is no special magic going on here.  We use the standard
 * proplib(3) APIs to do all of this work.  Any application could do
 * exactly what we're doing here.
 */

#include "prop_object_impl.h"        /* only to hide kernel vs. not-kernel */
#include <prop/proplib.h>

bool
prop_dictionary_get_dict(prop_dictionary_t dict, const char *key,
                         prop_dictionary_t *dp)
{
        prop_object_t o;

        o = prop_dictionary_get(dict, key);
        if (prop_object_type(o) != PROP_TYPE_DICTIONARY)
                return false;
        *dp = o;
        return true;

}

bool
prop_dictionary_get_bool(prop_dictionary_t dict, const char *key, bool *valp)
{
        prop_bool_t b;

        b = prop_dictionary_get(dict, key);
        if (prop_object_type(b) != PROP_TYPE_BOOL)
                return (false);

        *valp = prop_bool_true(b);

        return (true);
}

bool
prop_dictionary_set_bool(prop_dictionary_t dict, const char *key, bool val)
{

        return prop_dictionary_set_and_rel(dict, key, prop_bool_create(val));
}

#define        TEMPLATE(name, typ)                                                \
bool                                                                        \
prop_dictionary_get_ ## name (prop_dictionary_t dict,                        \
                              const char *key,                                \
                              typ *valp)                                \
{                                                                        \
        return prop_number_ ## name ## _value(                                \
            prop_dictionary_get(dict, key), valp);                        \
}
TEMPLATE(schar,    signed char)
TEMPLATE(short,    short)
TEMPLATE(int,      int)
TEMPLATE(long,     long)
TEMPLATE(longlong, long long)
TEMPLATE(intptr,   intptr_t)
TEMPLATE(int8,     int8_t)
TEMPLATE(int16,    int16_t)
TEMPLATE(int32,    int32_t)
TEMPLATE(int64,    int64_t)

TEMPLATE(uchar,     unsigned char)
TEMPLATE(ushort,    unsigned short)
TEMPLATE(uint,      unsigned int)
TEMPLATE(ulong,     unsigned long)
TEMPLATE(ulonglong, unsigned long long)
TEMPLATE(uintptr,   uintptr_t)
TEMPLATE(uint8,     uint8_t)
TEMPLATE(uint16,    uint16_t)
TEMPLATE(uint32,    uint32_t)
TEMPLATE(uint64,    uint64_t)

#undef TEMPLATE

static bool
prop_dictionary_set_signed_number(prop_dictionary_t dict, const char *key,
                                  intmax_t val)
{
        return prop_dictionary_set_and_rel(dict, key,
                                           prop_number_create_signed(val));
}

static bool
prop_dictionary_set_unsigned_number(prop_dictionary_t dict, const char *key,
                                    uintmax_t val)
{
        /*LINTED: for conversion from 'long long' to 'long'*/                \
        return prop_dictionary_set_and_rel(dict, key,
                                           prop_number_create_unsigned(val));
}

#define        TEMPLATE(name, which, typ)                                        \
bool                                                                        \
prop_dictionary_set_ ## name (prop_dictionary_t dict,                        \
                              const char *key,                                \
                              typ val)                                        \
{                                                                        \
        /*LINTED: for conversion from long long to 'long'*/                \
        return prop_dictionary_set_ ## which ## _number(dict, key, val);\
}

#define        STEMPLATE(name, typ)        TEMPLATE(name, signed, typ)
#define        UTEMPLATE(name, typ)        TEMPLATE(name, unsigned, typ)

STEMPLATE(schar,    signed char)
STEMPLATE(short,    short)
STEMPLATE(int,      int)
STEMPLATE(long,     long)
STEMPLATE(longlong, long long)
STEMPLATE(intptr,   intptr_t)
STEMPLATE(int8,     int8_t)
STEMPLATE(int16,    int16_t)
STEMPLATE(int32,    int32_t)
STEMPLATE(int64,    int64_t)

UTEMPLATE(uchar,     unsigned char)
UTEMPLATE(ushort,    unsigned short)
UTEMPLATE(uint,      unsigned int)
UTEMPLATE(ulong,     unsigned long)
UTEMPLATE(ulonglong, unsigned long long)
UTEMPLATE(uintptr,   uintptr_t)
UTEMPLATE(uint8,     uint8_t)
UTEMPLATE(uint16,    uint16_t)
UTEMPLATE(uint32,    uint32_t)
UTEMPLATE(uint64,    uint64_t)

#undef STEMPLATE
#undef UTEMPLATE
#undef TEMPLATE

bool
prop_dictionary_get_string(prop_dictionary_t dict, const char *key,
                           const char **cpp)
{
        prop_string_t str;
        const char *cp;

        str = prop_dictionary_get(dict, key);
        if (prop_object_type(str) != PROP_TYPE_STRING)
                return (false);

        cp = prop_string_value(str);
        if (cp == NULL)
                return (false);

        *cpp = cp;
        return (true);
}

bool
prop_dictionary_set_string(prop_dictionary_t dict, const char *key,
                           const char *cp)
{
        return prop_dictionary_set_and_rel(dict, key,
                                           prop_string_create_copy(cp));
}

bool
prop_dictionary_set_string_nocopy(prop_dictionary_t dict,
                                  const char *key,
                                  const char *cp)
{
        return prop_dictionary_set_and_rel(dict, key,
                                           prop_string_create_nocopy(cp));
}

bool
prop_dictionary_get_data(prop_dictionary_t dict, const char *key,
                         const void **vp, size_t *sizep)
{
        prop_data_t data;
        const void *v;

        data = prop_dictionary_get(dict, key);
        if (prop_object_type(data) != PROP_TYPE_DATA)
                return (false);

        v = prop_data_value(data);
        if (v == NULL)
                return (false);

        *vp = v;
        if (sizep != NULL)
                *sizep = prop_data_size(data);
        return (true);
}

bool
prop_dictionary_set_data(prop_dictionary_t dict, const char *key,
                         const void *v, size_t size)
{
        return prop_dictionary_set_and_rel(dict, key,
                                           prop_data_create_copy(v, size));
}

bool
prop_dictionary_set_data_nocopy(prop_dictionary_t dict, const char *key,
                                const void *v, size_t size)
{
        return prop_dictionary_set_and_rel(dict, key,
                                           prop_data_create_nocopy(v, size));
}

_PROP_DEPRECATED(prop_dictionary_get_cstring,
    "this program uses prop_dictionary_get_cstring(), "
    "which is deprecated; use prop_dictionary_get_string() and copy instead.")
bool
prop_dictionary_get_cstring(prop_dictionary_t dict,
                            const char *key,
                            char **cpp)
{
        prop_string_t str;
        char *cp;
        size_t len;
        bool rv;

        str = prop_dictionary_get(dict, key);
        if (prop_object_type(str) != PROP_TYPE_STRING)
                return (false);

        len = prop_string_size(str);
        cp = _PROP_MALLOC(len + 1, M_TEMP);
        if (cp == NULL)
                return (false);

        rv = prop_string_copy_value(str, cp, len + 1);
        if (rv)
                *cpp = cp;
        else
                _PROP_FREE(cp, M_TEMP);

        return (rv);
}

_PROP_DEPRECATED(prop_string_get_cstring_nocopy,
    "this program uses prop_string_get_cstring_nocopy(), "
    "which is deprecated; use prop_dictionary_get_string() instead.")
bool
prop_dictionary_get_cstring_nocopy(prop_dictionary_t dict,
                                   const char *key,
                                   const char **cpp)
{
        return prop_dictionary_get_string(dict, key, cpp);
}

_PROP_DEPRECATED(prop_dictionary_set_cstring,
    "this program uses prop_dictionary_set_cstring(), "
    "which is deprecated; use prop_dictionary_set_string() instead.")
bool
prop_dictionary_set_cstring(prop_dictionary_t dict,
                            const char *key,
                            const char *cp)
{
        return prop_dictionary_set_string(dict, key, cp);
}

_PROP_DEPRECATED(prop_dictionary_set_cstring_nocopy,
    "this program uses prop_dictionary_set_cstring_nocopy(), "
    "which is deprecated; use prop_dictionary_set_string_nocopy() instead.")
bool
prop_dictionary_set_cstring_nocopy(prop_dictionary_t dict,
                                   const char *key,
                                   const char *cp)
{
        return prop_dictionary_set_string_nocopy(dict, key, cp);
}

bool
prop_dictionary_set_and_rel(prop_dictionary_t dict, const char *key,
                            prop_object_t po)
{
        bool rv;

        if (po == NULL)
                return false;
        rv = prop_dictionary_set(dict, key, po);
        prop_object_release(po);
        return rv;
}

















































































































  142 




  104 

  136 











   46 

   45 














   41 

   42 









   27 




   27 

   24 






   23 






   17 
    3 
   22 

   14 
    1 


   12 



   12 
    1 

   12 



   10 



    9 



    7 
    3 
    1 


    3 



    3 



    3 

    6 

    1 



    6 
    6 


    6 






    1 

    6 











    6 


    6 



    6 




    1 












   18 




   18 

    6 





















    7 



    5 






    4 


    5 





















   86 

   87 


   76 





   85 



   75 


   79 
   79 




   79 


   41 




   41 
    7 





    6 






   38 


   32 

   70 
   70 

   71 









  175 





  150 
    1 


  151 
    1 


  151 






  174 












  175 









  163 

  151 





  138 



  139 
  124 



   36 




  124 

  121 



   18 

   19 

















   67 























   97 






   96 
   92 








  179 









  179 
  176 



  114 
   12 



   10 

  113 
   29 



  113 

   66 

  177 




  177 


   92 





   92 
   91 





  177 
   85 





  177 
   56 



   54 
   54 






    3 
    1 








  175 
   82 


  176 





  163 


  100 







   69 

  166 

    1 
    1 


  164 
   10 
  165 
   77 
  165 


  164 







  164 




   26 
    2 
  150 



  157 

















   46 
















   26 


   44 

    8 










   37 







   34 

   35 

   30 


   21 
   14 


   20 

    1 
    6 
   21 

   18 















   25 










   23 






   21 



   20 






   19 



   19 



   21 







   22 
















    3 
    3 


    3 
    3 


    2 
    2 
    2 










    3 
    6 

    3 
    3 
    3 
    6 














   19 
   10 
   15 




    4 


    4 









    4 
    4 
    3 




    3 


    3 







    4 
   18 






  100 






  101 

   28 

   99 


   60 
   15 



   13 

   60 
   22 



   60 

   42 
  101 




  101 


   62 





   62 
   62 





    3 
    1 



  100 



   16 
   95 

   25 
    1 



   95 
    1 




   25 


    2 
    2 


   96 

    9 
   94 








    7 

    3 

   80 

    6 













   32 








   12 

   10 

    9 

    8 

    1 




   30 






   26 
    2 





   26 






   19 
   22 



   22 


    4 



   22 
   22 

    7 




   21 
    5 



   21 





   20 





   19 



   19 
    4 
    4 



   19 
    2 



   19 
    1 

   21 





    9 
    5 



   22 

   28 










   39 


   29 

   38 
   39 
















  279 






  224 
  259 

  222 


  277 


  278 







  276 

    1 

  254 

  257 









  146 






   30 



  118 

   24 
  134 

  141 


  140 






  138 
    1 

  139 

  139 
  101 


   11 




   10 
  125 
















  148 



























































































   19 


   16 


   16 
   16 


   15 












   35 


   30 


   33 
   33 









   44 






   18 
   18 





   16 



   16 
   11 

   16 
   15 



   15 
   15 

   42 









   72 






   22 
    7 



   15 
   19 


   18 
   12 


    7 
    4 

    7 
    7 



   17 
    4 

   71 

















   35 







   26 

   27 














   18 







   13 

   16 



























  124 
   58 

  123 



















    6 





  129 



  124 






    4 

  125 

  125 

    2 





  123 


   85 







   85 
   68 

   86 





    4 


    4 
    4 
    4 
    4 






  124 

































































    1 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
/*        $NetBSD: uipc_syscalls.c,v 1.206 2022/07/01 22:30:51 riastradh Exp $        */

/*-
 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)uipc_syscalls.c        8.6 (Berkeley) 2/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uipc_syscalls.c,v 1.206 2022/07/01 22:30:51 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_pipe.h"
#include "opt_sctp.h"
#endif

#define MBUFTYPES
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/filedesc.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/buf.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/signalvar.h>
#include <sys/un.h>
#include <sys/ktrace.h>
#include <sys/event.h>
#include <sys/atomic.h>
#include <sys/kauth.h>

#ifdef SCTP
#include <netinet/sctp_uio.h>
#include <netinet/sctp_peeloff.h>
#endif

#include <sys/mount.h>
#include <sys/syscallargs.h>

/*
 * System call interface to the socket abstraction.
 */
extern const struct fileops socketops;

static int        sockargs_sb(struct sockaddr_big *, const void *, socklen_t);

int
sys___socket30(struct lwp *l, const struct sys___socket30_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)        domain;
                syscallarg(int)        type;
                syscallarg(int)        protocol;
        } */
        int fd, error;

        error = fsocreate(SCARG(uap, domain), NULL, SCARG(uap, type),
            SCARG(uap, protocol), &fd);
        if (error == 0) {
                *retval = fd;
        }
        return error;
}

int
sys_bind(struct lwp *l, const struct sys_bind_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)                                s;
                syscallarg(const struct sockaddr *)        name;
                syscallarg(unsigned int)                namelen;
        } */
        int                error;
        struct sockaddr_big sb;

        error = sockargs_sb(&sb, SCARG(uap, name), SCARG(uap, namelen));
        if (error)
                return error;

        return do_sys_bind(l, SCARG(uap, s), (struct sockaddr *)&sb);
}

int
do_sys_bind(struct lwp *l, int fd, struct sockaddr *nam)
{
        struct socket        *so;
        int                error;

        if ((error = fd_getsock(fd, &so)) != 0)
                return error;
        error = sobind(so, nam, l);
        fd_putfile(fd);
        return error;
}

int
sys_listen(struct lwp *l, const struct sys_listen_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)        s;
                syscallarg(int)        backlog;
        } */
        struct socket        *so;
        int                error;

        if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
                return (error);
        error = solisten(so, SCARG(uap, backlog), l);
        fd_putfile(SCARG(uap, s));
        return error;
}

int
do_sys_accept(struct lwp *l, int sock, struct sockaddr *name,
    register_t *new_sock, const sigset_t *mask, int flags, int clrflags)
{
        file_t                *fp, *fp2;
        int                error, fd;
        struct socket        *so, *so2;
        short                wakeup_state = 0;

        if ((fp = fd_getfile(sock)) == NULL)
                return EBADF;
        if (fp->f_type != DTYPE_SOCKET) {
                fd_putfile(sock);
                return ENOTSOCK;
        }
        if ((error = fd_allocfile(&fp2, &fd)) != 0) {
                fd_putfile(sock);
                return error;
        }
        *new_sock = fd;
        so = fp->f_socket;
        solock(so);

        if (__predict_false(mask))
                sigsuspendsetup(l, mask);

        if (!(so->so_proto->pr_flags & PR_LISTEN)) {
                error = EOPNOTSUPP;
                goto bad;
        }
        if ((so->so_options & SO_ACCEPTCONN) == 0) {
                error = EINVAL;
                goto bad;
        }
        if ((so->so_state & SS_NBIO) && so->so_qlen == 0) {
                error = EWOULDBLOCK;
                goto bad;
        }
        while (so->so_qlen == 0 && so->so_error == 0) {
                if (so->so_state & SS_CANTRCVMORE) {
                        so->so_error = ECONNABORTED;
                        break;
                }
                if (wakeup_state & SS_RESTARTSYS) {
                        error = ERESTART;
                        goto bad;
                }
                error = sowait(so, true, 0);
                if (error) {
                        goto bad;
                }
                wakeup_state = so->so_state;
        }
        if (so->so_error) {
                error = so->so_error;
                so->so_error = 0;
                goto bad;
        }
        /* connection has been removed from the listen queue */
        KNOTE(&so->so_rcv.sb_sel.sel_klist, NOTE_SUBMIT);
        so2 = TAILQ_FIRST(&so->so_q);
        if (soqremque(so2, 1) == 0)
                panic("accept");
        fp2->f_type = DTYPE_SOCKET;
        fp2->f_flag = (fp->f_flag & ~clrflags) |
            ((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0)|
            ((flags & SOCK_NOSIGPIPE) ? FNOSIGPIPE : 0);
        fp2->f_ops = &socketops;
        fp2->f_socket = so2;
        if (fp2->f_flag & FNONBLOCK)
                so2->so_state |= SS_NBIO;
        else
                so2->so_state &= ~SS_NBIO;
        error = soaccept(so2, name);
        so2->so_cred = kauth_cred_dup(so->so_cred);
        sounlock(so);
        if (error) {
                /* an error occurred, free the file descriptor and mbuf */
                mutex_enter(&fp2->f_lock);
                fp2->f_count++;
                mutex_exit(&fp2->f_lock);
                closef(fp2);
                fd_abort(curproc, NULL, fd);
        } else {
                fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0);
                fd_affix(curproc, fp2, fd);
        }
        fd_putfile(sock);
        if (__predict_false(mask))
                sigsuspendteardown(l);
        return error;
 bad:
        sounlock(so);
        fd_putfile(sock);
        fd_abort(curproc, fp2, fd);
        if (__predict_false(mask))
                sigsuspendteardown(l);
        return error;
}

int
sys_accept(struct lwp *l, const struct sys_accept_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)                        s;
                syscallarg(struct sockaddr *)        name;
                syscallarg(unsigned int *)        anamelen;
        } */
        int error, fd;
        struct sockaddr_big name;

        name.sb_len = UCHAR_MAX;
        error = do_sys_accept(l, SCARG(uap, s), (struct sockaddr *)&name,
            retval, NULL, 0, 0);
        if (error != 0)
                return error;
        error = copyout_sockname_sb(SCARG(uap, name), SCARG(uap, anamelen),
            MSG_LENUSRSPACE, &name);
        if (error != 0) {
                fd = (int)*retval;
                if (fd_getfile(fd) != NULL)
                        (void)fd_close(fd);
        }
        return error;
}

int
sys_paccept(struct lwp *l, const struct sys_paccept_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                        s;
                syscallarg(struct sockaddr *)        name;
                syscallarg(unsigned int *)        anamelen;
                syscallarg(const sigset_t *)        mask;
                syscallarg(int)                        flags;
        } */
        int error, fd;
        struct sockaddr_big name;
        sigset_t *mask, amask;

        if (SCARG(uap, mask) != NULL) {
                error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
                if (error)
                        return error;
                mask = &amask;
        } else
                mask = NULL;

        name.sb_len = UCHAR_MAX;
        error = do_sys_accept(l, SCARG(uap, s), (struct sockaddr *)&name,
            retval, mask, SCARG(uap, flags), FNONBLOCK);
        if (error != 0)
                return error;
        error = copyout_sockname_sb(SCARG(uap, name), SCARG(uap, anamelen),
            MSG_LENUSRSPACE, &name);
        if (error != 0) {
                fd = (int)*retval;
                if (fd_getfile(fd) != NULL)
                        (void)fd_close(fd);
        }
        return error;
}

int
sys_connect(struct lwp *l, const struct sys_connect_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                                s;
                syscallarg(const struct sockaddr *)        name;
                syscallarg(unsigned int)                namelen;
        } */
        int                error;
        struct sockaddr_big sbig;

        error = sockargs_sb(&sbig, SCARG(uap, name), SCARG(uap, namelen));
        if (error)
                return error;
        return do_sys_connect(l, SCARG(uap, s), (struct sockaddr *)&sbig);
}

int
do_sys_connect(struct lwp *l, int fd, struct sockaddr *nam)
{
        struct socket        *so;
        int                error;
        int                interrupted = 0;

        if ((error = fd_getsock(fd, &so)) != 0) {
                return (error);
        }
        solock(so);
        if ((so->so_state & SS_ISCONNECTING) != 0) {
                error = EALREADY;
                goto out;
        }

        error = soconnect(so, nam, l);
        if (error)
                goto bad;
        if ((so->so_state & (SS_NBIO|SS_ISCONNECTING)) ==
            (SS_NBIO|SS_ISCONNECTING)) {
                error = EINPROGRESS;
                goto out;
        }
        while ((so->so_state & SS_ISCONNECTING) != 0 && so->so_error == 0) {
                error = sowait(so, true, 0);
                if (__predict_false((so->so_state & SS_ISABORTING) != 0)) {
                        error = EPIPE;
                        interrupted = 1;
                        break;
                }
                if (error) {
                        if (error == EINTR || error == ERESTART)
                                interrupted = 1;
                        break;
                }
        }
        if (error == 0) {
                error = so->so_error;
                so->so_error = 0;
        }
 bad:
        if (!interrupted)
                so->so_state &= ~SS_ISCONNECTING;
        if (error == ERESTART)
                error = EINTR;
 out:
        sounlock(so);
        fd_putfile(fd);
        return error;
}

static int
makesocket(struct lwp *l, file_t **fp, int *fd, int flags, int type,
    int domain, int proto, struct socket *soo)
{
        struct socket *so;
        int error;

        if ((error = socreate(domain, &so, type, proto, l, soo)) != 0) {
                return error;
        }
        if (flags & SOCK_NONBLOCK) {
                so->so_state |= SS_NBIO;
        }

        if ((error = fd_allocfile(fp, fd)) != 0) {
                soclose(so);
                return error;
        }
        fd_set_exclose(l, *fd, (flags & SOCK_CLOEXEC) != 0);
        (*fp)->f_flag = FREAD|FWRITE|
            ((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0)|
            ((flags & SOCK_NOSIGPIPE) ? FNOSIGPIPE : 0);
        (*fp)->f_type = DTYPE_SOCKET;
        (*fp)->f_ops = &socketops;
        (*fp)->f_socket = so;
        return 0;
}

int
sys_socketpair(struct lwp *l, const struct sys_socketpair_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                domain;
                syscallarg(int)                type;
                syscallarg(int)                protocol;
                syscallarg(int *)        rsv;
        } */
        file_t                *fp1, *fp2;
        struct socket        *so1, *so2;
        int                fd, error, sv[2];
        proc_t                *p = curproc;
        int                flags = SCARG(uap, type) & SOCK_FLAGS_MASK;
        int                type = SCARG(uap, type) & ~SOCK_FLAGS_MASK;
        int                domain = SCARG(uap, domain);
        int                proto = SCARG(uap, protocol);

        error = makesocket(l, &fp1, &fd, flags, type, domain, proto, NULL);
        if (error)
                return error;
        so1 = fp1->f_socket;
        sv[0] = fd;

        error = makesocket(l, &fp2, &fd, flags, type, domain, proto, so1);
        if (error)
                goto out;
        so2 = fp2->f_socket;
        sv[1] = fd;

        solock(so1);
        error = soconnect2(so1, so2);
        if (error == 0 && type == SOCK_DGRAM) {
                /*
                 * Datagram socket connection is asymmetric.
                 */
                error = soconnect2(so2, so1);
        }
        sounlock(so1);

        if (error == 0)
                error = copyout(sv, SCARG(uap, rsv), sizeof(sv));
        if (error == 0) {
                fd_affix(p, fp2, sv[1]);
                fd_affix(p, fp1, sv[0]);
                return 0;
        }
        fd_abort(p, fp2, sv[1]);
        (void)soclose(so2);
out:
        fd_abort(p, fp1, sv[0]);
        (void)soclose(so1);
        return error;
}

int
sys_sendto(struct lwp *l, const struct sys_sendto_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                                s;
                syscallarg(const void *)                buf;
                syscallarg(size_t)                        len;
                syscallarg(int)                                flags;
                syscallarg(const struct sockaddr *)        to;
                syscallarg(unsigned int)                tolen;
        } */
        struct msghdr        msg = {0};
        struct iovec        aiov;

        msg.msg_name = __UNCONST(SCARG(uap, to)); /* XXXUNCONST kills const */
        msg.msg_namelen = SCARG(uap, tolen);
        msg.msg_iov = &aiov;
        msg.msg_iovlen = 1;
        msg.msg_control = NULL;
        msg.msg_flags = 0;
        aiov.iov_base = __UNCONST(SCARG(uap, buf)); /* XXXUNCONST kills const */
        aiov.iov_len = SCARG(uap, len);
        return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags),
            retval);
}

int
sys_sendmsg(struct lwp *l, const struct sys_sendmsg_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                                s;
                syscallarg(const struct msghdr *)        msg;
                syscallarg(int)                                flags;
        } */
        struct msghdr        msg;
        int                error;

        error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
        if (error)
                return (error);

        msg.msg_flags = MSG_IOVUSRSPACE;
        return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags),
            retval);
}

int
do_sys_sendmsg_so(struct lwp *l, int s, struct socket *so, file_t *fp,
    struct msghdr *mp, int flags, register_t *retsize)
{

        struct iovec        aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov = NULL;
        struct sockaddr *sa = NULL;
        struct mbuf        *to, *control;
        struct uio        auio;
        size_t                len, iovsz;
        int                i, error;

        ktrkuser("msghdr", mp, sizeof(*mp));

        /* If the caller passed us stuff in mbufs, we must free them. */
        to = (mp->msg_flags & MSG_NAMEMBUF) ? mp->msg_name : NULL;
        control = (mp->msg_flags & MSG_CONTROLMBUF) ? mp->msg_control : NULL;
        iovsz = mp->msg_iovlen * sizeof(struct iovec);

        if (mp->msg_flags & MSG_IOVUSRSPACE) {
                if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
                        if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
                                error = EMSGSIZE;
                                goto bad;
                        }
                        iov = kmem_alloc(iovsz, KM_SLEEP);
                }
                if (mp->msg_iovlen != 0) {
                        error = copyin(mp->msg_iov, iov, iovsz);
                        if (error)
                                goto bad;
                }
                auio.uio_iov = iov;
        } else
                auio.uio_iov = mp->msg_iov;

        auio.uio_iovcnt = mp->msg_iovlen;
        auio.uio_rw = UIO_WRITE;
        auio.uio_offset = 0;                        /* XXX */
        auio.uio_resid = 0;
        KASSERT(l == curlwp);
        auio.uio_vmspace = l->l_proc->p_vmspace;

        tiov = auio.uio_iov;
        for (i = 0; i < auio.uio_iovcnt; i++, tiov++) {
                /*
                 * Writes return ssize_t because -1 is returned on error.
                 * Therefore, we must restrict the length to SSIZE_MAX to
                 * avoid garbage return values.
                 */
                auio.uio_resid += tiov->iov_len;
                if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
                        error = EINVAL;
                        goto bad;
                }
        }

        if (mp->msg_name && to == NULL) {
                error = sockargs(&to, mp->msg_name, mp->msg_namelen,
                    UIO_USERSPACE, MT_SONAME);
                if (error)
                        goto bad;
        }

        if (mp->msg_control) {
                if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) {
                        error = EINVAL;
                        goto bad;
                }
                if (control == NULL) {
                        error = sockargs(&control, mp->msg_control,
                            mp->msg_controllen, UIO_USERSPACE, MT_CONTROL);
                        if (error)
                                goto bad;
                }
        }

        if (ktrpoint(KTR_GENIO) && iovsz > 0) {
                ktriov = kmem_alloc(iovsz, KM_SLEEP);
                memcpy(ktriov, auio.uio_iov, iovsz);
        }

        if (mp->msg_name)
                MCLAIM(to, so->so_mowner);
        if (mp->msg_control)
                MCLAIM(control, so->so_mowner);

        if (to) {
                sa = mtod(to, struct sockaddr *);
        }

        len = auio.uio_resid;
        error = (*so->so_send)(so, sa, &auio, NULL, control, flags, l);
        /* Protocol is responsible for freeing 'control' */
        control = NULL;

        if (error) {
                if (auio.uio_resid != len && (error == ERESTART ||
                    error == EINTR || error == EWOULDBLOCK))
                        error = 0;
                if (error == EPIPE && (fp->f_flag & FNOSIGPIPE) == 0 &&
                    (flags & MSG_NOSIGNAL) == 0) {
                        mutex_enter(&proc_lock);
                        psignal(l->l_proc, SIGPIPE);
                        mutex_exit(&proc_lock);
                }
        }
        if (error == 0)
                *retsize = len - auio.uio_resid;

bad:
        if (ktriov != NULL) {
                ktrgeniov(s, UIO_WRITE, ktriov, *retsize, error);
                kmem_free(ktriov, iovsz);
        }

        if (iov != aiov)
                kmem_free(iov, iovsz);
        if (to)
                m_freem(to);
        if (control)
                m_freem(control);

        return error;
}

int
do_sys_sendmsg(struct lwp *l, int s, struct msghdr *mp, int flags,
    register_t *retsize)
{
        int                error;
        struct socket        *so;
        file_t                *fp;

        if ((error = fd_getsock1(s, &so, &fp)) != 0) {
                /* We have to free msg_name and msg_control ourselves */
                if (mp->msg_flags & MSG_NAMEMBUF)
                        m_freem(mp->msg_name);
                if (mp->msg_flags & MSG_CONTROLMBUF)
                        m_freem(mp->msg_control);
                return error;
        }
        error = do_sys_sendmsg_so(l, s, so, fp, mp, flags, retsize);
        /* msg_name and msg_control freed */
        fd_putfile(s);
        return error;
}

int
sys_recvfrom(struct lwp *l, const struct sys_recvfrom_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                        s;
                syscallarg(void *)                buf;
                syscallarg(size_t)                len;
                syscallarg(int)                        flags;
                syscallarg(struct sockaddr *)        from;
                syscallarg(unsigned int *)        fromlenaddr;
        } */
        struct msghdr        msg = {0};
        struct iovec        aiov;
        int                error;
        struct mbuf        *from;

        msg.msg_name = NULL;
        msg.msg_iov = &aiov;
        msg.msg_iovlen = 1;
        aiov.iov_base = SCARG(uap, buf);
        aiov.iov_len = SCARG(uap, len);
        msg.msg_control = NULL;
        msg.msg_flags = SCARG(uap, flags) & MSG_USERFLAGS;

        error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from, NULL, retval);
        if (error != 0)
                return error;

        error = copyout_sockname(SCARG(uap, from), SCARG(uap, fromlenaddr),
            MSG_LENUSRSPACE, from);
        if (from != NULL)
                m_free(from);
        return error;
}

int
sys_recvmsg(struct lwp *l, const struct sys_recvmsg_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                        s;
                syscallarg(struct msghdr *)        msg;
                syscallarg(int)                        flags;
        } */
        struct msghdr        msg;
        int                error;
        struct mbuf        *from, *control;

        error = copyin(SCARG(uap, msg), &msg, sizeof(msg));
        if (error)
                return error;

        msg.msg_flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;

        error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from,
            msg.msg_control != NULL ? &control : NULL, retval);
        if (error != 0)
                return error;

        if (msg.msg_control != NULL)
                error = copyout_msg_control(l, &msg, control);

        if (error == 0)
                error = copyout_sockname(msg.msg_name, &msg.msg_namelen, 0,
                        from);
        if (from != NULL)
                m_free(from);
        if (error == 0) {
                ktrkuser("msghdr", &msg, sizeof(msg));
                error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
        }

        return error;
}

int
sys_sendmmsg(struct lwp *l, const struct sys_sendmmsg_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                        s;
                syscallarg(struct mmsghdr *)        mmsg;
                syscallarg(unsigned int)        vlen;
                syscallarg(unsigned int)        flags;
        } */
        struct mmsghdr mmsg;
        struct socket *so;
        file_t *fp;
        struct msghdr *msg = &mmsg.msg_hdr;
        int error, s;
        unsigned int vlen, flags, dg;

        s = SCARG(uap, s);
        if ((error = fd_getsock1(s, &so, &fp)) != 0)
                return error;

        vlen = SCARG(uap, vlen);
        if (vlen > 1024)
                vlen = 1024;

        flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;

        for (dg = 0; dg < vlen;) {
                error = copyin(SCARG(uap, mmsg) + dg, &mmsg, sizeof(mmsg));
                if (error)
                        break;

                msg->msg_flags = flags;

                error = do_sys_sendmsg_so(l, s, so, fp, msg, flags, retval);
                if (error)
                        break;

                ktrkuser("msghdr", msg, sizeof(*msg));
                mmsg.msg_len = *retval;
                error = copyout(&mmsg, SCARG(uap, mmsg) + dg, sizeof(mmsg));
                if (error)
                        break;
                dg++;

        }

        *retval = dg;

        fd_putfile(s);

        /*
         * If we succeeded at least once, return 0.
         */
        if (dg)
                return 0;
        return error;
}

/*
 * Adjust for a truncated SCM_RIGHTS control message.
 *  This means closing any file descriptors that aren't present
 *  in the returned buffer.
 *  m is the mbuf holding the (already externalized) SCM_RIGHTS message.
 */
static void
free_rights(struct mbuf *m)
{
        struct cmsghdr *cm;
        int *fdv;
        unsigned int nfds, i;

        KASSERT(sizeof(*cm) <= m->m_len);
        cm = mtod(m, struct cmsghdr *);

        KASSERT(CMSG_ALIGN(sizeof(*cm)) <= cm->cmsg_len);
        KASSERT(cm->cmsg_len <= m->m_len);
        nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
        fdv = (int *)CMSG_DATA(cm);

        for (i = 0; i < nfds; i++)
                if (fd_getfile(fdv[i]) != NULL)
                        (void)fd_close(fdv[i]);
}

void
free_control_mbuf(struct lwp *l, struct mbuf *control, struct mbuf *uncopied)
{
        struct mbuf *next;
        struct cmsghdr *cmsg;
        bool do_free_rights = false;

        while (control != NULL) {
                cmsg = mtod(control, struct cmsghdr *);
                if (control == uncopied)
                        do_free_rights = true;
                if (do_free_rights && cmsg->cmsg_level == SOL_SOCKET
                    && cmsg->cmsg_type == SCM_RIGHTS)
                        free_rights(control);
                next = control->m_next;
                m_free(control);
                control = next;
        }
}

/* Copy socket control/CMSG data to user buffer, frees the mbuf */
int
copyout_msg_control(struct lwp *l, struct msghdr *mp, struct mbuf *control)
{
        int i, len, error = 0;
        struct cmsghdr *cmsg;
        struct mbuf *m;
        char *q;

        len = mp->msg_controllen;
        if (len <= 0 || control == 0) {
                mp->msg_controllen = 0;
                free_control_mbuf(l, control, control);
                return 0;
        }

        q = (char *)mp->msg_control;

        for (m = control; m != NULL; ) {
                cmsg = mtod(m, struct cmsghdr *);
                i = m->m_len;
                if (len < i) {
                        mp->msg_flags |= MSG_CTRUNC;
                        if (cmsg->cmsg_level == SOL_SOCKET
                            && cmsg->cmsg_type == SCM_RIGHTS)
                                /* Do not truncate me ... */
                                break;
                        i = len;
                }
                error = copyout(mtod(m, void *), q, i);
                ktrkuser(mbuftypes[MT_CONTROL], cmsg, cmsg->cmsg_len);
                if (error != 0) {
                        /* We must free all the SCM_RIGHTS */
                        m = control;
                        break;
                }
                m = m->m_next;
                if (m)
                        i = ALIGN(i);
                q += i;
                len -= i;
                if (len <= 0)
                        break;
        }

        free_control_mbuf(l, control, m);

        mp->msg_controllen = q - (char *)mp->msg_control;
        return error;
}

int
do_sys_recvmsg_so(struct lwp *l, int s, struct socket *so, struct msghdr *mp,
    struct mbuf **from, struct mbuf **control, register_t *retsize)
{
        struct iovec        aiov[UIO_SMALLIOV], *iov = aiov, *tiov, *ktriov = NULL;
        struct uio        auio;
        size_t                len, iovsz;
        int                i, error;

        ktrkuser("msghdr", mp, sizeof(*mp));

        *from = NULL;
        if (control != NULL)
                *control = NULL;

        iovsz = mp->msg_iovlen * sizeof(struct iovec);

        if (mp->msg_flags & MSG_IOVUSRSPACE) {
                if ((unsigned int)mp->msg_iovlen > UIO_SMALLIOV) {
                        if ((unsigned int)mp->msg_iovlen > IOV_MAX) {
                                error = EMSGSIZE;
                                goto out;
                        }
                        iov = kmem_alloc(iovsz, KM_SLEEP);
                }
                if (mp->msg_iovlen != 0) {
                        error = copyin(mp->msg_iov, iov, iovsz);
                        if (error)
                                goto out;
                }
                auio.uio_iov = iov;
        } else
                auio.uio_iov = mp->msg_iov;
        auio.uio_iovcnt = mp->msg_iovlen;
        auio.uio_rw = UIO_READ;
        auio.uio_offset = 0;                        /* XXX */
        auio.uio_resid = 0;
        KASSERT(l == curlwp);
        auio.uio_vmspace = l->l_proc->p_vmspace;

        tiov = auio.uio_iov;
        for (i = 0; i < auio.uio_iovcnt; i++, tiov++) {
                /*
                 * Reads return ssize_t because -1 is returned on error.
                 * Therefore we must restrict the length to SSIZE_MAX to
                 * avoid garbage return values.
                 */
                auio.uio_resid += tiov->iov_len;
                if (tiov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
                        error = EINVAL;
                        goto out;
                }
        }

        if (ktrpoint(KTR_GENIO) && iovsz > 0) {
                ktriov = kmem_alloc(iovsz, KM_SLEEP);
                memcpy(ktriov, auio.uio_iov, iovsz);
        }

        len = auio.uio_resid;
        mp->msg_flags &= MSG_USERFLAGS;
        error = (*so->so_receive)(so, from, &auio, NULL, control,
            &mp->msg_flags);
        KASSERT(*from == NULL || (*from)->m_next == NULL);
        len -= auio.uio_resid;
        *retsize = len;
        if (error != 0 && len != 0
            && (error == ERESTART || error == EINTR || error == EWOULDBLOCK))
                /* Some data transferred */
                error = 0;

        if (ktriov != NULL) {
                ktrgeniov(s, UIO_READ, ktriov, len, error);
                kmem_free(ktriov, iovsz);
        }

        if (error != 0) {
                m_freem(*from);
                *from = NULL;
                if (control != NULL) {
                        free_control_mbuf(l, *control, *control);
                        *control = NULL;
                }
        }
 out:
        if (iov != aiov)
                kmem_free(iov, iovsz);
        return error;
}


int
do_sys_recvmsg(struct lwp *l, int s, struct msghdr *mp,
    struct mbuf **from, struct mbuf **control, register_t *retsize)
{
        int error;
        struct socket *so;

        if ((error = fd_getsock(s, &so)) != 0)
                return error;
        error = do_sys_recvmsg_so(l, s, so, mp, from, control, retsize);
        fd_putfile(s);
        return error;
}

int
sys_recvmmsg(struct lwp *l, const struct sys_recvmmsg_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                        s;
                syscallarg(struct mmsghdr *)        mmsg;
                syscallarg(unsigned int)        vlen;
                syscallarg(unsigned int)        flags;
                syscallarg(struct timespec *)        timeout;
        } */
        struct mmsghdr mmsg;
        struct socket *so;
        struct msghdr *msg = &mmsg.msg_hdr;
        int error, s;
        struct mbuf *from, *control;
        struct timespec ts, now;
        unsigned int vlen, flags, dg;

        if (SCARG(uap, timeout)) {
                if ((error = copyin(SCARG(uap, timeout), &ts, sizeof(ts))) != 0)
                        return error;
                if (ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000L)
                        return EINVAL;
                getnanotime(&now);
                if (timespecaddok(&now, &ts)) {
                        timespecadd(&now, &ts, &ts);
                } else {
                        ts.tv_sec = __type_max(time_t);
                        ts.tv_nsec = 999999999L;
                }
        }

        s = SCARG(uap, s);
        if ((error = fd_getsock(s, &so)) != 0)
                return error;

        /*
         * If so->so_rerror holds a deferred error return it now.
         */
        if (so->so_rerror) {
                error = so->so_rerror;
                so->so_rerror = 0;
                fd_putfile(s);
                return error;
        }

        vlen = SCARG(uap, vlen);
        if (vlen > 1024)
                vlen = 1024;

        from = NULL;
        flags = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;

        for (dg = 0; dg < vlen;) {
                error = copyin(SCARG(uap, mmsg) + dg, &mmsg, sizeof(mmsg));
                if (error)
                        break;

                msg->msg_flags = flags & ~MSG_WAITFORONE;

                if (from != NULL) {
                        m_free(from);
                        from = NULL;
                }

                error = do_sys_recvmsg_so(l, s, so, msg, &from,
                    msg->msg_control != NULL ? &control : NULL, retval);
                if (error) {
                        if (error == EAGAIN && dg > 0)
                                error = 0;
                        break;
                }

                if (msg->msg_control != NULL)
                        error = copyout_msg_control(l, msg, control);
                if (error)
                        break;

                error = copyout_sockname(msg->msg_name, &msg->msg_namelen, 0,
                    from);
                if (error)
                        break;

                ktrkuser("msghdr", msg, sizeof *msg);
                mmsg.msg_len = *retval;

                error = copyout(&mmsg, SCARG(uap, mmsg) + dg, sizeof(mmsg));
                if (error)
                        break;

                dg++;
                if (msg->msg_flags & MSG_OOB)
                        break;

                if (SCARG(uap, timeout)) {
                        getnanotime(&now);
                        if (timespeccmp(&ts, &now, <))
                                break;
                }

                if (flags & MSG_WAITFORONE)
                        flags |= MSG_DONTWAIT;

        }

        if (from != NULL)
                m_free(from);

        *retval = dg;

        /*
         * If we succeeded at least once, return 0, hopefully so->so_rerror
         * will catch it next time.
         */
        if (error && dg > 0) {
                so->so_rerror = error;
                error = 0;
        }

        fd_putfile(s);

        return error;
}

int
sys_shutdown(struct lwp *l, const struct sys_shutdown_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)        s;
                syscallarg(int)        how;
        } */
        struct socket        *so;
        int                error;

        if ((error = fd_getsock(SCARG(uap, s), &so)) != 0)
                return error;
        solock(so);
        error = soshutdown(so, SCARG(uap, how));
        sounlock(so);
        fd_putfile(SCARG(uap, s));
        return error;
}

int
sys_setsockopt(struct lwp *l, const struct sys_setsockopt_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                        s;
                syscallarg(int)                        level;
                syscallarg(int)                        name;
                syscallarg(const void *)        val;
                syscallarg(unsigned int)        valsize;
        } */
        struct sockopt        sopt;
        struct socket        *so;
        file_t                *fp;
        int                error;
        unsigned int        len;

        len = SCARG(uap, valsize);
        if (len > 0 && SCARG(uap, val) == NULL)
                return EINVAL;

        if (len > MCLBYTES)
                return EINVAL;

        if ((error = fd_getsock1(SCARG(uap, s), &so, &fp)) != 0)
                return (error);

        sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), len);

        if (len > 0) {
                error = copyin(SCARG(uap, val), sopt.sopt_data, len);
                if (error)
                        goto out;
        }

        error = sosetopt(so, &sopt);
        if (so->so_options & SO_NOSIGPIPE)
                atomic_or_uint(&fp->f_flag, FNOSIGPIPE);
        else
                atomic_and_uint(&fp->f_flag, ~FNOSIGPIPE);

 out:
        sockopt_destroy(&sopt);
        fd_putfile(SCARG(uap, s));
        return error;
}

static int
getsockopt(struct lwp *l, const struct sys_getsockopt_args *uap,
    register_t *retval, bool copyarg)
{
        struct sockopt        sopt;
        struct socket        *so;
        file_t                *fp;
        unsigned int        valsize, len;
        int                error;

        if (SCARG(uap, val) != NULL) {
                error = copyin(SCARG(uap, avalsize), &valsize, sizeof(valsize));
                if (error)
                        return error;
        } else
                valsize = 0;

        if (valsize > MCLBYTES)
                return EINVAL;

        if ((error = fd_getsock1(SCARG(uap, s), &so, &fp)) != 0)
                return error;

        sockopt_init(&sopt, SCARG(uap, level), SCARG(uap, name), valsize);
        if (copyarg && valsize > 0) {
                error = copyin(SCARG(uap, val), sopt.sopt_data, valsize);
                if (error)
                        goto out;
        }

        if (fp->f_flag & FNOSIGPIPE)
                so->so_options |= SO_NOSIGPIPE;
        else
                so->so_options &= ~SO_NOSIGPIPE;

        error = sogetopt(so, &sopt);
        if (error || valsize == 0)
                goto out;

        len = uimin(valsize, sopt.sopt_retsize);
        error = copyout(sopt.sopt_data, SCARG(uap, val), len);
        if (error)
                goto out;

        error = copyout(&len, SCARG(uap, avalsize), sizeof(len));
 out:
        sockopt_destroy(&sopt);
        fd_putfile(SCARG(uap, s));
        return error;
}

int
sys_getsockopt(struct lwp *l, const struct sys_getsockopt_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                        s;
                syscallarg(int)                        level;
                syscallarg(int)                        name;
                syscallarg(void *)                val;
                syscallarg(unsigned int *)        avalsize;
        } */
        return getsockopt(l, uap, retval, false);
}

int
sys_getsockopt2(struct lwp *l, const struct sys_getsockopt2_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                        s;
                syscallarg(int)                        level;
                syscallarg(int)                        name;
                syscallarg(void *)                val;
                syscallarg(unsigned int *)        avalsize;
        } */
        return getsockopt(l, (const struct sys_getsockopt_args *) uap, retval, true);
}

#ifdef PIPE_SOCKETPAIR

int
pipe1(struct lwp *l, int *fildes, int flags)
{
        file_t                *rf, *wf;
        struct socket        *rso, *wso;
        int                fd, error;
        proc_t                *p;

        if (flags & ~(O_CLOEXEC|O_NONBLOCK|O_NOSIGPIPE))
                return EINVAL;
        p = curproc;
        if ((error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l, NULL)) != 0)
                return error;
        if ((error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l, rso)) != 0)
                goto free1;
        /* remember this socket pair implements a pipe */
        wso->so_state |= SS_ISAPIPE;
        rso->so_state |= SS_ISAPIPE;
        if ((error = fd_allocfile(&rf, &fd)) != 0)
                goto free2;
        fildes[0] = fd;
        rf->f_flag = FREAD | flags;
        rf->f_type = DTYPE_SOCKET;
        rf->f_ops = &socketops;
        rf->f_socket = rso;
        if ((error = fd_allocfile(&wf, &fd)) != 0)
                goto free3;
        wf->f_flag = FWRITE | flags;
        wf->f_type = DTYPE_SOCKET;
        wf->f_ops = &socketops;
        wf->f_socket = wso;
        fildes[1] = fd;
        solock(wso);
        /*
         * Pipes must be readable when there is at least 1
         * byte of data available in the receive buffer.
         *
         * Pipes must be writable when there is space for
         * at least PIPE_BUF bytes in the send buffer.
         * If we're increasing the low water mark for the
         * send buffer, then mimic how soreserve() would
         * have set the high water mark.
         */
        rso->so_rcv.sb_lowat = 1;
        if (wso->so_snd.sb_lowat < PIPE_BUF) {
                wso->so_snd.sb_hiwat = PIPE_BUF * 2;
        }
        wso->so_snd.sb_lowat = PIPE_BUF;
        error = unp_connect2(wso, rso);
        sounlock(wso);
        if (error != 0)
                goto free4;
        fd_affix(p, wf, fildes[1]);
        fd_affix(p, rf, fildes[0]);
        return (0);
 free4:
        fd_abort(p, wf, fildes[1]);
 free3:
        fd_abort(p, rf, fildes[0]);
 free2:
        (void)soclose(wso);
 free1:
        (void)soclose(rso);
        return error;
}
#endif /* PIPE_SOCKETPAIR */

/*
 * Get peer socket name.
 */
int
do_sys_getpeername(int fd, struct sockaddr *nam)
{
        struct socket        *so;
        int                error;

        if ((error = fd_getsock(fd, &so)) != 0)
                return error;

        solock(so);
        if ((so->so_state & SS_ISCONNECTED) == 0)
                error = ENOTCONN;
        else {
                error = (*so->so_proto->pr_usrreqs->pr_peeraddr)(so, nam);
        }
        sounlock(so);
        fd_putfile(fd);
        return error;
}

/*
 * Get local socket name.
 */
int
do_sys_getsockname(int fd, struct sockaddr *nam)
{
        struct socket        *so;
        int                error;

        if ((error = fd_getsock(fd, &so)) != 0)
                return error;

        solock(so);
        error = (*so->so_proto->pr_usrreqs->pr_sockaddr)(so, nam);
        sounlock(so);
        fd_putfile(fd);
        return error;
}

int
copyout_sockname_sb(struct sockaddr *asa, unsigned int *alen, int flags,
    struct sockaddr_big *addr)
{
        unsigned int len;
        int error;

        if (asa == NULL)
                /* Assume application not interested */
                return 0;

        if (flags & MSG_LENUSRSPACE) {
                error = copyin(alen, &len, sizeof(len));
                if (error)
                        return error;
        } else
                len = *alen;

        if (addr == NULL) {
                len = 0;
                error = 0;
        } else {
                if (len > addr->sb_len)
                        len = addr->sb_len;
                /* XXX addr isn't an mbuf... */
                ktrkuser(mbuftypes[MT_SONAME], addr, len);
                error = copyout(addr, asa, len);
        }

        if (error == 0) {
                if (flags & MSG_LENUSRSPACE)
                        error = copyout(&len, alen, sizeof(len));
                else
                        *alen = len;
        }

        return error;
}

int
copyout_sockname(struct sockaddr *asa, unsigned int *alen, int flags,
    struct mbuf *addr)
{
        int len;
        int error;

        if (asa == NULL)
                /* Assume application not interested */
                return 0;

        if (flags & MSG_LENUSRSPACE) {
                error = copyin(alen, &len, sizeof(len));
                if (error)
                        return error;
        } else
                len = *alen;
        if (len < 0)
                return EINVAL;

        if (addr == NULL) {
                len = 0;
                error = 0;
        } else {
                if (len > addr->m_len)
                        len = addr->m_len;
                /* Maybe this ought to copy a chain ? */
                ktrkuser(mbuftypes[MT_SONAME], mtod(addr, void *), len);
                error = copyout(mtod(addr, void *), asa, len);
        }

        if (error == 0) {
                if (flags & MSG_LENUSRSPACE)
                        error = copyout(&len, alen, sizeof(len));
                else
                        *alen = len;
        }

        return error;
}

/*
 * Get socket name.
 */
int
sys_getsockname(struct lwp *l, const struct sys_getsockname_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                        fdes;
                syscallarg(struct sockaddr *)        asa;
                syscallarg(unsigned int *)        alen;
        } */
        struct sockaddr_big sbig;
        int                    error;

        sbig.sb_len = UCHAR_MAX;
        error = do_sys_getsockname(SCARG(uap, fdes), (struct sockaddr *)&sbig);
        if (error != 0)
                return error;

        error = copyout_sockname_sb(SCARG(uap, asa), SCARG(uap, alen),
            MSG_LENUSRSPACE, &sbig);
        return error;
}

/*
 * Get name of peer for connected socket.
 */
int
sys_getpeername(struct lwp *l, const struct sys_getpeername_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                        fdes;
                syscallarg(struct sockaddr *)        asa;
                syscallarg(unsigned int *)        alen;
        } */
        struct sockaddr_big sbig;
        int                    error;

        sbig.sb_len = UCHAR_MAX;
        error = do_sys_getpeername(SCARG(uap, fdes), (struct sockaddr *)&sbig);
        if (error != 0)
                return error;

        error = copyout_sockname_sb(SCARG(uap, asa), SCARG(uap, alen),
            MSG_LENUSRSPACE, &sbig);
        return error;
}

static int
sockargs_sb(struct sockaddr_big *sb, const void *name, socklen_t buflen)
{
        int error;

        /*
         * We can't allow socket names > UCHAR_MAX in length, since that
         * will overflow sb_len. Further no reasonable buflen is <=
         * offsetof(sockaddr_big, sb_data) since it shall be at least
         * the size of the preamble sb_len and sb_family members.
         */
        if (buflen > UCHAR_MAX ||
            buflen <= offsetof(struct sockaddr_big, sb_data))
                return EINVAL;

        error = copyin(name, (void *)sb, buflen);
        if (error)
                return error;

        ktrkuser(mbuftypes[MT_SONAME], sb, buflen);
#if BYTE_ORDER != BIG_ENDIAN
        /*
         * 4.3BSD compat thing - need to stay, since bind(2),
         * connect(2), sendto(2) were not versioned for COMPAT_43.
         */
        if (sb->sb_family == 0 && sb->sb_len < AF_MAX)
                sb->sb_family = sb->sb_len;
#endif
        sb->sb_len = buflen;
        return 0;
}

/*
 * XXX In a perfect world, we wouldn't pass around socket control
 * XXX arguments in mbufs, and this could go away.
 */
int
sockargs(struct mbuf **mp, const void *bf, size_t buflen, enum uio_seg seg,
    int type)
{
        struct mbuf        *m;
        int                error;

        /*
         * We can't allow socket names > UCHAR_MAX in length, since that
         * will overflow sa_len.  Control data more than a page size in
         * length is just too much.
         */
        if (buflen > (type == MT_SONAME ? UCHAR_MAX : PAGE_SIZE))
                return EINVAL;

        /*
         * length must greater than sizeof(sa_family) + sizeof(sa_len)
         */
        if (type == MT_SONAME && buflen <= 2)
                return EINVAL;

        /* Allocate an mbuf to hold the arguments. */
        m = m_get(M_WAIT, type);
        /* can't claim.  don't who to assign it to. */
        if (buflen > MLEN) {
                /*
                 * Won't fit into a regular mbuf, so we allocate just
                 * enough external storage to hold the argument.
                 */
                MEXTMALLOC(m, buflen, M_WAITOK);
        }
        m->m_len = buflen;
        if (seg == UIO_USERSPACE) {
                error = copyin(bf, mtod(m, void *), buflen);
                if (error) {
                        (void)m_free(m);
                        return error;
                }
        } else {
                memcpy(mtod(m, void *), bf, buflen);
        }
        *mp = m;
        switch (type) {
        case MT_SONAME:
                ktrkuser(mbuftypes[type], mtod(m, void *), buflen);

                struct sockaddr *sa = mtod(m, struct sockaddr *);
#if BYTE_ORDER != BIG_ENDIAN
                /*
                 * 4.3BSD compat thing - need to stay, since bind(2),
                 * connect(2), sendto(2) were not versioned for COMPAT_43.
                 */
                if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
                        sa->sa_family = sa->sa_len;
#endif
                sa->sa_len = buflen;
                return 0;
        case MT_CONTROL:
                if (!KTRPOINT(curproc, KTR_USER))
                        return 0;

                struct msghdr mhdr;
                mhdr.msg_control = mtod(m, void *);
                mhdr.msg_controllen = buflen;
                for (struct cmsghdr *cmsg = CMSG_FIRSTHDR(&mhdr); cmsg;
                    cmsg = CMSG_NXTHDR(&mhdr, cmsg)) {
                        KASSERT(((char *)cmsg - mtod(m, char *)) <= buflen);
                        if (cmsg->cmsg_len >
                            buflen - ((char *)cmsg - mtod(m, char *)))
                                break;
                        ktrkuser(mbuftypes[type], cmsg, cmsg->cmsg_len);
                }
                return 0;
        default:
                return EINVAL;
        }
}

int
do_sys_peeloff(struct socket *head, void *data)
{
#ifdef SCTP
        /*file_t *lfp = NULL;*/
        file_t *nfp = NULL;
        int error;
        struct socket *so;
        int fd;
        uint32_t name;
        /*short fflag;*/                /* type must match fp->f_flag */

        name = *(uint32_t *) data;
        error = sctp_can_peel_off(head, name);
        if (error) {
                printf("peeloff failed\n");
                return error;
        }
        /*
         * At this point we know we do have a assoc to pull
         * we proceed to get the fd setup. This may block
         * but that is ok.
         */
        error = fd_allocfile(&nfp, &fd);
        if (error) {
                /*
                 * Probably ran out of file descriptors. Put the
                 * unaccepted connection back onto the queue and
                 * do another wakeup so some other process might
                 * have a chance at it.
                 */
                return error;
        }
        *(int *) data = fd;

        so = sctp_get_peeloff(head, name, &error);
        if (so == NULL) {
                /*
                 * Either someone else peeled it off OR
                 * we can't get a socket.
                 * close the new descriptor, assuming someone hasn't ripped it
                 * out from under us.
                 */
                mutex_enter(&nfp->f_lock);
                nfp->f_count++;
                mutex_exit(&nfp->f_lock);
                fd_abort(curlwp->l_proc, nfp, fd);
                return error;
        }
        so->so_state &= ~SS_NOFDREF;
        so->so_state &= ~SS_ISCONNECTING;
        so->so_head = NULL;
        so->so_cred = kauth_cred_dup(head->so_cred);
        nfp->f_socket = so;
        nfp->f_flag = FREAD|FWRITE;
        nfp->f_ops = &socketops;
        nfp->f_type = DTYPE_SOCKET;

        fd_affix(curlwp->l_proc, nfp, fd);

        return error;
#else
        return EOPNOTSUPP;
#endif
}




































































  624 

  556 


   71 

  622 













  130 


  130 



   45 

  130 











   99 



   28 

   99 



   99 













    4 








    4 










    3 
    3 

    3 
    3 




    3 












    3 
    3 






    3 
    3 







    3 




    3 

    3 





















    2 

    2 


    2 

    2 



    1 






 1289 
 1288 



















 1199 



 1200 
 1197 





  518 



  520 
  520 





   44 



   44 
   44 























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
/*        $NetBSD: uvm_object.c,v 1.25 2020/08/15 07:24:09 chs Exp $        */

/*
 * Copyright (c) 2006, 2010, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * uvm_object.c: operate with memory objects
 *
 * TODO:
 *  1. Support PG_RELEASED-using objects
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_object.c,v 1.25 2020/08/15 07:24:09 chs Exp $");

#ifdef _KERNEL_OPT
#include "opt_ddb.h"
#endif

#include <sys/param.h>
#include <sys/rwlock.h>
#include <sys/queue.h>

#include <uvm/uvm.h>
#include <uvm/uvm_ddb.h>
#include <uvm/uvm_page_array.h>

/* Page count to fetch per single step. */
#define        FETCH_PAGECOUNT                        16

/*
 * uvm_obj_init: initialize UVM memory object.
 */
void
uvm_obj_init(struct uvm_object *uo, const struct uvm_pagerops *ops,
    bool alock, u_int refs)
{

#if 0 /* notyet */
        KASSERT(ops);
#endif
        if (alock) {
                /* Allocate and assign a lock. */
                uo->vmobjlock = rw_obj_alloc();
        } else {
                /* The lock will need to be set via uvm_obj_setlock(). */
                uo->vmobjlock = NULL;
        }
        uo->pgops = ops;
        LIST_INIT(&uo->uo_ubc);
        uo->uo_npages = 0;
        uo->uo_refs = refs;
        radix_tree_init_tree(&uo->uo_pages);
}

/*
 * uvm_obj_destroy: destroy UVM memory object.
 */
void
uvm_obj_destroy(struct uvm_object *uo, bool dlock)
{

        KASSERT(radix_tree_empty_tree_p(&uo->uo_pages));

        /* Purge any UBC entries associated with this object. */
        ubc_purge(uo);

        /* Destroy the lock, if requested. */
        if (dlock) {
                rw_obj_free(uo->vmobjlock);
        }
        radix_tree_fini_tree(&uo->uo_pages);
}

/*
 * uvm_obj_setlock: assign a vmobjlock to the UVM object.
 *
 * => Caller is responsible to ensure that UVM objects is not use.
 * => Only dynamic lock may be previously set.  We drop the reference then.
 */
void
uvm_obj_setlock(struct uvm_object *uo, krwlock_t *lockptr)
{
        krwlock_t *olockptr = uo->vmobjlock;

        if (olockptr) {
                /* Drop the reference on the old lock. */
                rw_obj_free(olockptr);
        }
        if (lockptr == NULL) {
                /* If new lock is not passed - allocate default one. */
                lockptr = rw_obj_alloc();
        }
        uo->vmobjlock = lockptr;
}

/*
 * uvm_obj_wirepages: wire the pages of entire UVM object.
 *
 * => NOTE: this function should only be used for types of objects
 *  where PG_RELEASED flag is never set (aobj objects)
 * => caller must pass page-aligned start and end values
 */
int
uvm_obj_wirepages(struct uvm_object *uobj, off_t start, off_t end,
    struct pglist *list)
{
        int i, npages, error;
        struct vm_page *pgs[FETCH_PAGECOUNT], *pg = NULL;
        off_t offset = start, left;

        left = (end - start) >> PAGE_SHIFT;

        rw_enter(uobj->vmobjlock, RW_WRITER);
        while (left) {

                npages = MIN(FETCH_PAGECOUNT, left);

                /* Get the pages */
                memset(pgs, 0, sizeof(pgs));
                error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, 0,
                        VM_PROT_READ | VM_PROT_WRITE, UVM_ADV_SEQUENTIAL,
                        PGO_SYNCIO);

                if (error)
                        goto error;

                rw_enter(uobj->vmobjlock, RW_WRITER);
                for (i = 0; i < npages; i++) {

                        KASSERT(pgs[i] != NULL);
                        KASSERT(!(pgs[i]->flags & PG_RELEASED));

                        /*
                         * Loan break
                         */
                        if (pgs[i]->loan_count) {
                                while (pgs[i]->loan_count) {
                                        pg = uvm_loanbreak(pgs[i]);
                                        if (!pg) {
                                                rw_exit(uobj->vmobjlock);
                                                uvm_wait("uobjwirepg");
                                                rw_enter(uobj->vmobjlock, RW_WRITER);
                                                continue;
                                        }
                                }
                                pgs[i] = pg;
                        }

                        if (pgs[i]->flags & PG_AOBJ) {
                                uvm_pagemarkdirty(pgs[i],
                                    UVM_PAGE_STATUS_DIRTY);
                                uao_dropswap(uobj, i);
                        }
                }

                /* Wire the pages */
                for (i = 0; i < npages; i++) {
                        uvm_pagelock(pgs[i]);
                        uvm_pagewire(pgs[i]);
                        uvm_pageunlock(pgs[i]);
                        if (list != NULL)
                                TAILQ_INSERT_TAIL(list, pgs[i], pageq.queue);
                }

                /* Unbusy the pages */
                uvm_page_unbusy(pgs, npages);

                left -= npages;
                offset += npages << PAGE_SHIFT;
        }
        rw_exit(uobj->vmobjlock);

        return 0;

error:
        /* Unwire the pages which has been wired */
        uvm_obj_unwirepages(uobj, start, offset);

        return error;
}

/*
 * uvm_obj_unwirepages: unwire the pages of entire UVM object.
 *
 * => NOTE: this function should only be used for types of objects
 *  where PG_RELEASED flag is never set
 * => caller must pass page-aligned start and end values
 */
void
uvm_obj_unwirepages(struct uvm_object *uobj, off_t start, off_t end)
{
        struct vm_page *pg;
        off_t offset;

        rw_enter(uobj->vmobjlock, RW_WRITER);
        for (offset = start; offset < end; offset += PAGE_SIZE) {
                pg = uvm_pagelookup(uobj, offset);

                KASSERT(pg != NULL);
                KASSERT(!(pg->flags & PG_RELEASED));

                uvm_pagelock(pg);
                uvm_pageunwire(pg);
                uvm_pageunlock(pg);
        }
        rw_exit(uobj->vmobjlock);
}

static inline bool
uvm_obj_notag_p(struct uvm_object *uobj, int tag)
{

        KASSERT(rw_lock_held(uobj->vmobjlock));
        return radix_tree_empty_tagged_tree_p(&uobj->uo_pages, tag);
}

bool
uvm_obj_clean_p(struct uvm_object *uobj)
{

        return uvm_obj_notag_p(uobj, UVM_PAGE_DIRTY_TAG);
}

bool
uvm_obj_nowriteback_p(struct uvm_object *uobj)
{

        return uvm_obj_notag_p(uobj, UVM_PAGE_WRITEBACK_TAG);
}

static inline bool
uvm_obj_page_tag_p(struct vm_page *pg, int tag)
{
        struct uvm_object *uobj = pg->uobject;
        uint64_t pgidx = pg->offset >> PAGE_SHIFT;

        KASSERT(uobj != NULL);
        KASSERT(rw_lock_held(uobj->vmobjlock));
        return radix_tree_get_tag(&uobj->uo_pages, pgidx, tag) != 0;
}

static inline void
uvm_obj_page_set_tag(struct vm_page *pg, int tag)
{
        struct uvm_object *uobj = pg->uobject;
        uint64_t pgidx = pg->offset >> PAGE_SHIFT;

        KASSERT(uobj != NULL);
        KASSERT(rw_write_held(uobj->vmobjlock));
        radix_tree_set_tag(&uobj->uo_pages, pgidx, tag);
}

static inline void
uvm_obj_page_clear_tag(struct vm_page *pg, int tag)
{
        struct uvm_object *uobj = pg->uobject;
        uint64_t pgidx = pg->offset >> PAGE_SHIFT;

        KASSERT(uobj != NULL);
        KASSERT(rw_write_held(uobj->vmobjlock));
        radix_tree_clear_tag(&uobj->uo_pages, pgidx, tag);
}

bool
uvm_obj_page_dirty_p(struct vm_page *pg)
{

        return uvm_obj_page_tag_p(pg, UVM_PAGE_DIRTY_TAG);
}

void
uvm_obj_page_set_dirty(struct vm_page *pg)
{

        uvm_obj_page_set_tag(pg, UVM_PAGE_DIRTY_TAG);
}

void
uvm_obj_page_clear_dirty(struct vm_page *pg)
{

        uvm_obj_page_clear_tag(pg, UVM_PAGE_DIRTY_TAG);
}

bool
uvm_obj_page_writeback_p(struct vm_page *pg)
{

        return uvm_obj_page_tag_p(pg, UVM_PAGE_WRITEBACK_TAG);
}

void
uvm_obj_page_set_writeback(struct vm_page *pg)
{

        uvm_obj_page_set_tag(pg, UVM_PAGE_WRITEBACK_TAG);
}

void
uvm_obj_page_clear_writeback(struct vm_page *pg)
{

        uvm_obj_page_clear_tag(pg, UVM_PAGE_WRITEBACK_TAG);
}

#if defined(DDB) || defined(DEBUGPRINT)

/*
 * uvm_object_printit: actually prints the object
 */
void
uvm_object_printit(struct uvm_object *uobj, bool full,
    void (*pr)(const char *, ...))
{
        struct uvm_page_array a;
        struct vm_page *pg;
        int cnt = 0;
        voff_t off;

        (*pr)("OBJECT %p: locked=%d, pgops=%p, npages=%d, ",
            uobj, rw_write_held(uobj->vmobjlock), uobj->pgops, uobj->uo_npages);
        if (UVM_OBJ_IS_KERN_OBJECT(uobj))
                (*pr)("refs=<SYSTEM>\n");
        else
                (*pr)("refs=%d\n", uobj->uo_refs);

        if (!full) {
                return;
        }
        (*pr)("  PAGES <pg,offset>:\n  ");
        uvm_page_array_init(&a, uobj, 0);
        off = 0;
        while ((pg = uvm_page_array_fill_and_peek(&a, off, 0)) != NULL) {
                cnt++;
                (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
                if ((cnt % 3) == 0) {
                        (*pr)("\n  ");
                }
                off = pg->offset + PAGE_SIZE;
                uvm_page_array_advance(&a);
        }
        if ((cnt % 3) != 0) {
                (*pr)("\n");
        }
        uvm_page_array_fini(&a);
}

#endif /* DDB || DEBUGPRINT */










































































    5 





    4 
    2 

    2 
    4 

    2 
    5 










   10 







    8 

   10 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/*        $NetBSD: kern_resource_43.c,v 1.23 2021/09/07 11:43:02 riastradh Exp $        */

/*-
 * Copyright (c) 1982, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_resource.c        8.5 (Berkeley) 1/21/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_resource_43.c,v 1.23 2021/09/07 11:43:02 riastradh Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/resourcevar.h>
#include <sys/proc.h>

#include <sys/mount.h>
#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>

#include <compat/common/compat_mod.h>

static struct syscall_package kern_resource_43_syscalls[] = {
        { SYS_compat_43_ogetrlimit, 0, (sy_call_t *)compat_43_sys_getrlimit },
        { SYS_compat_43_osetrlimit, 0, (sy_call_t *)compat_43_sys_setrlimit },
        { 0, 0, NULL }
};

/* ARGSUSED */
int
compat_43_sys_getrlimit(struct lwp *l, const struct compat_43_sys_getrlimit_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) which;
                syscallarg(struct orlimit *) rlp;
        } */
        struct proc *p = l->l_proc;
        int which = SCARG(uap, which);
        struct orlimit olim;

        if ((u_int)which >= RLIM_NLIMITS)
                return (EINVAL);
        memset(&olim, 0, sizeof(olim));
        olim.rlim_cur = p->p_rlimit[which].rlim_cur;
        if (olim.rlim_cur == -1)
                olim.rlim_cur = 0x7fffffff;
        olim.rlim_max = p->p_rlimit[which].rlim_max;
        if (olim.rlim_max == -1)
                olim.rlim_max = 0x7fffffff;
        return copyout(&olim, SCARG(uap, rlp), sizeof(olim));
}

/* ARGSUSED */
int
compat_43_sys_setrlimit(struct lwp *l, const struct compat_43_sys_setrlimit_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) which;
                syscallarg(const struct orlimit *) rlp;
        } */
        int which = SCARG(uap, which);
        struct orlimit olim;
        struct rlimit lim;
        int error;

        error = copyin(SCARG(uap, rlp), &olim, sizeof(struct orlimit));
        if (error)
                return (error);
        lim.rlim_cur = olim.rlim_cur;
        lim.rlim_max = olim.rlim_max;
        return (dosetrlimit(l, l->l_proc, which, &lim));
}

int
kern_resource_43_init(void)
{

        return syscall_establish(NULL, kern_resource_43_syscalls);
}

int
kern_resource_43_fini(void)
{

        return syscall_disestablish(NULL, kern_resource_43_syscalls);
}

















































































    3 














    4 



    2 


    3 













    5 



    3 


    4 














    2 







    1 
















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
/*        $NetBSD: vfs_syscalls_90.c,v 1.1 2019/09/22 22:59:38 christos Exp $        */

/*-
 * Copyright (c) 2005, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls_90.c,v 1.1 2019/09/22 22:59:38 christos Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/socketvar.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/uio.h>
#include <sys/dirent.h>
#include <sys/malloc.h>
#include <sys/kauth.h>
#include <sys/vfs_syscalls.h>
#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>

#include <compat/common/compat_mod.h>
#include <compat/common/compat_util.h>
#include <compat/sys/statvfs.h>

static const struct syscall_package vfs_syscalls_90_syscalls[] = {
        { SYS_compat_90_getvfsstat, 0, (sy_call_t *)compat_90_sys_getvfsstat },
        { SYS_compat_90_statvfs1, 0, (sy_call_t *)compat_90_sys_statvfs1 },
        { SYS_compat_90_fstatvfs1, 0, (sy_call_t *)compat_90_sys_fstatvfs1 },
        { SYS_compat_90_fhstatvfs1, 0, (sy_call_t *)compat_90_sys_fhstatvfs1 },
        { 0,0, NULL }
};


int
compat_90_sys_getvfsstat(struct lwp *l,
    const struct compat_90_sys_getvfsstat_args *uap, register_t *retval)
{
        /* {
                syscallarg(struct statvfs90 *) buf;
                syscallarg(size_t) bufsize;
                syscallarg(int)        flags;
        } */

        return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize),
            SCARG(uap, flags), statvfs_to_statvfs90_copy,
            sizeof(struct statvfs90), retval);
}

int
compat_90_sys_statvfs1(struct lwp *l,
    const struct compat_90_sys_statvfs1_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(struct statvfs90 *) buf;
                syscallarg(int)        flags;
        } */

        struct statvfs *sb = STATVFSBUF_GET();
        int error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb);

        if (!error)
                error = statvfs_to_statvfs90_copy(sb, SCARG(uap, buf),
                    sizeof(struct statvfs90));

        STATVFSBUF_PUT(sb);
        return error;
}

int
compat_90_sys_fstatvfs1(struct lwp *l,
    const struct compat_90_sys_fstatvfs1_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(struct statvfs90 *) buf;
                syscallarg(int)        flags;
        } */

        struct statvfs *sb = STATVFSBUF_GET();
        int error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb);

        if (!error)
                error = statvfs_to_statvfs90_copy(sb, SCARG(uap, buf),
                    sizeof(struct statvfs90));

        STATVFSBUF_PUT(sb);
        return error;
}

int
compat_90_sys_fhstatvfs1(struct lwp *l,
    const struct compat_90_sys_fhstatvfs1_args *uap, register_t *retval)
{
        /* {
                syscallarg(const void *) fhp;
                syscallarg(size_t) fh_size;
                syscallarg(struct statvfs90 *) buf;
                syscallarg(int)        flags;
        } */

        struct statvfs *sb = STATVFSBUF_GET();
        int error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size),
            sb, SCARG(uap, flags));

        if (!error)
                error = statvfs_to_statvfs90_copy(sb, SCARG(uap, buf),
                    sizeof(struct statvfs90));

        STATVFSBUF_PUT(sb);
        return error;
}

int
vfs_syscalls_90_init(void)
{

        return syscall_establish(NULL, vfs_syscalls_90_syscalls);
}

int
vfs_syscalls_90_fini(void)
{

        return syscall_disestablish(NULL, vfs_syscalls_90_syscalls);
}

































































































































































































































































































































































































   10 











1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
/*        $NetBSD: ip6_var.h,v 1.91 2021/08/17 22:00:32 andvar Exp $        */
/*        $KAME: ip6_var.h,v 1.33 2000/06/11 14:59:20 jinmei Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ip_var.h        8.1 (Berkeley) 6/10/93
 */

#ifndef _NETINET6_IP6_VAR_H_
#define _NETINET6_IP6_VAR_H_

#include <sys/types.h>
#include <sys/queue.h>
#include <sys/socketvar.h>

#include <net/if.h>
#include <net/route.h>

#include <netinet/in.h>
#include <netinet/ip6.h>

struct        ip6_moptions {
        if_index_t im6o_multicast_if_index; /* I/F for outgoing multicasts */
        u_char        im6o_multicast_hlim;        /* hoplimit for outgoing multicasts */
        u_char        im6o_multicast_loop;        /* 1 >= hear sends if a member */
        LIST_HEAD(, in6_multi_mship) im6o_memberships;
};

/*
 * Control options for outgoing packets
 */

/* Routing header related info */
struct        ip6po_rhinfo {
        struct        ip6_rthdr *ip6po_rhi_rthdr; /* Routing header */
        struct        route ip6po_rhi_route; /* Route to the 1st hop */
};
#define ip6po_rthdr        ip6po_rhinfo.ip6po_rhi_rthdr
#define ip6po_route        ip6po_rhinfo.ip6po_rhi_route

/* Nexthop related info */
struct        ip6po_nhinfo {
        struct        sockaddr *ip6po_nhi_nexthop;
        struct        route ip6po_nhi_route; /* Route to the nexthop */
};
#define ip6po_nexthop        ip6po_nhinfo.ip6po_nhi_nexthop
#define ip6po_nextroute        ip6po_nhinfo.ip6po_nhi_route

struct        ip6_pktopts {
        int        ip6po_hlim;                /* Hoplimit for outgoing packets */
        struct        in6_pktinfo *ip6po_pktinfo; /* Outgoing IF/address information */
        struct        ip6po_nhinfo ip6po_nhinfo; /* Next-hop address information */
        struct        ip6_hbh *ip6po_hbh; /* Hop-by-Hop options header */
        struct        ip6_dest *ip6po_dest1; /* Destination options header(1st part) */
        struct        ip6po_rhinfo ip6po_rhinfo; /* Routing header related info. */
        struct        ip6_dest *ip6po_dest2; /* Destination options header(2nd part) */
        int        ip6po_tclass;        /* traffic class */
        int        ip6po_minmtu;  /* fragment vs PMTU discovery policy */
#define IP6PO_MINMTU_MCASTONLY        -1 /* default; send at min MTU for multicast*/
#define IP6PO_MINMTU_DISABLE         0 /* always perform pmtu disc */
#define IP6PO_MINMTU_ALL         1 /* always send at min MTU */
        int        ip6po_prefer_tempaddr;        /* whether temporary addresses are
                                         * preferred as source address */
#define IP6PO_TEMPADDR_SYSTEM        -1 /* follow the system default */
#define IP6PO_TEMPADDR_NOTPREFER 0 /* not prefer temporary address */
#define IP6PO_TEMPADDR_PREFER         1 /* prefer temporary address */
        int ip6po_flags;
#if 0        /* parameters in this block is obsolete. do not reuse the values. */
#define IP6PO_REACHCONF        0x01        /* upper-layer reachability confirmation. */
#define IP6PO_MINMTU        0x02        /* use minimum MTU (IPV6_USE_MIN_MTU) */
#endif
#define IP6PO_DONTFRAG        0x04        /* disable fragmentation (IPV6_DONTFRAG) */
};

/*
 * IPv6 statistics.
 * Each counter is an unsigned 64-bit value.
 */
#define        IP6_STAT_TOTAL                0        /* total packets received */
#define        IP6_STAT_TOOSHORT        1        /* packet too short */
#define        IP6_STAT_TOOSMALL        2        /* not enough data */
#define        IP6_STAT_FRAGMENTS        3        /* fragments received */
#define        IP6_STAT_FRAGDROPPED        4        /* frags dropped (dups, out of space) */
#define        IP6_STAT_FRAGTIMEOUT        5        /* fragments timed out */
#define        IP6_STAT_FRAGOVERFLOW        6        /* fragments that exceed limit */
#define IP6_STAT_FORWARD        7        /* packets forwarded */
#define        IP6_STAT_CANTFORWARD        8        /* packets rcvd for uncreachable dst */
#define        IP6_STAT_REDIRECTSENT        9        /* packets forwarded on same net */
#define        IP6_STAT_DELIVERED        10        /* datagrams delivered to upper level */
#define        IP6_STAT_LOCALOUT        11        /* total IP packets generated here */
#define        IP6_STAT_ODROPPED        12        /* lost packets due to nobufs, etc. */
#define        IP6_STAT_REASSEMBLED        13        /* total packets reassembled ok */
#define        IP6_STAT_FRAGMENTED        14        /* datagrams successfully fragmented */
#define        IP6_STAT_OFRAGMENTS        15        /* output fragments created */
#define        IP6_STAT_CANTFRAG        16        /* don't fragment flag was set, etc. */
#define        IP6_STAT_BADOPTIONS        17        /* error in option processing */
#define        IP6_STAT_NOROUTE        18        /* packets discarded due to no route */
#define        IP6_STAT_BADVERS        19        /* ip6 version != 6 */
#define        IP6_STAT_RAWOUT                20        /* total raw ip packets generated */
#define        IP6_STAT_BADSCOPE        21        /* scope error */
#define        IP6_STAT_NOTMEMBER        22        /* don't join this multicast group */
#define        IP6_STAT_NXTHIST        23        /* next header histogram */
                /* space for 256 counters */
#define        IP6_STAT_M1                279        /* one mbuf */
#define        IP6_STAT_M2M                280        /* two or more mbuf */
                /* space for 32 counters */
#define        IP6_STAT_MEXT1                312        /* one ext mbuf */
#define        IP6_STAT_MEXT2M                313        /* two or more ext mbuf */
#define        IP6_STAT_EXTHDRTOOLONG        314        /* ext hdr are not contiguous */
#define        IP6_STAT_NOGIF                315        /* no match gif found */
#define        IP6_STAT_TOOMANYHDR        316        /* discarded due to too many headers */
        /*
         * statistics for improvement of the source address selection
         * algorithm:
         * XXX: hardcoded 16 = # of ip6 multicast scope types + 1
         */
#define        IP6_STAT_SOURCES_NONE        317        /* number of times that address
                                           selection fails */
#define        IP6_STAT_SOURCES_SAMEIF        318        /* number of times that an address
                                           on the outgoing I/F is chosen */
                /* space for 16 counters */
#define        IP6_STAT_SOURCES_OTHERIF 334        /* number of times that an address on
                                           a non-outgoing I/F is chosen */
                /* space for 16 counters */
#define        IP6_STAT_SOURCES_SAMESCOPE 350        /* number of times that an address that
                                           has the same scope from the dest.
                                           is chosen */
                /* space for 16 counters */
#define        IP6_STAT_SOURCES_OTHERSCOPE 366        /* number of times that an address that
                                           has a different scope from the dest.
                                           is chosen */
                /* space for 16 counters */
#define        IP6_STAT_SOURCES_DEPRECATED 382        /* number of times that a deprecated
                                           address is chosen */
                /* space for 16 counters */
#define        IP6_STAT_FORWARD_CACHEHIT 398
#define        IP6_STAT_FORWARD_CACHEMISS 399
#define        IP6_STAT_FASTFORWARD        400        /* packets fast forwarded */
#define        IP6_STAT_FASTFORWARDFLOWS 401        /* number of fast forward flows */
#define        IP6_STAT_NOIPSEC        402        /* no match ipsec(4) found */
#define        IP6_STAT_PFILDROP_IN        403        /* dropped by pfil (PFIL_IN) */
#define        IP6_STAT_PFILDROP_OUT        404        /* dropped by pfil (PFIL_OUT) */
#define        IP6_STAT_IPSECDROP_IN        405        /* dropped by IPsec SP check */
#define        IP6_STAT_IPSECDROP_OUT        406        /* dropped by IPsec SP check */
#define        IP6_STAT_IFDROP                407        /* dropped due to interface state */
#define        IP6_STAT_IDROPPED        408        /* lost packets due to nobufs, etc. */
#define        IP6_STAT_TIMXCEED        409        /* hop limit exceeded */
#define        IP6_STAT_TOOBIG                410        /* packet bigger than MTU */
#define        IP6_STAT_RTREJECT        411        /* rejected by route */

#define        IP6_NSTATS                412

#define IP6FLOW_HASHBITS         6 /* should not be a multiple of 8 */

/* 
 * Structure for an IPv6 flow (ip6_fastforward).
 */
struct ip6flow {
        TAILQ_ENTRY(ip6flow) ip6f_list;  /* next in active list */
        TAILQ_ENTRY(ip6flow) ip6f_hash;  /* next ip6flow in bucket */
        size_t ip6f_hashidx;             /* own hash index of ipflowtable[] */
        struct in6_addr ip6f_dst;       /* destination address */
        struct in6_addr ip6f_src;       /* source address */
        struct route ip6f_ro;       /* associated route entry */
        u_int32_t ip6f_flow;                /* flow (tos) */
        u_quad_t ip6f_uses;               /* number of uses in this period */
        u_quad_t ip6f_last_uses;          /* number of uses in last period */
        u_quad_t ip6f_dropped;            /* ENOBUFS returned by if_output */
        u_quad_t ip6f_forwarded;          /* packets forwarded */
        u_int ip6f_timer;               /* lifetime timer */
};

#ifdef _KERNEL

#include <sys/protosw.h>
#include <sys/cprng.h>

/*
 * Auxiliary attributes of incoming IPv6 packets, which is initialized when we
 * come into ip6_input().
 * XXX do not make it a kitchen sink!
 */
struct ip6aux {
        /* ip6.ip6_dst */
        struct in6_addr        ip6a_src;
        uint32_t        ip6a_scope_id;
        int                ip6a_flags;
};

/* flags passed to ip6_output as last parameter */
#define        IPV6_UNSPECSRC                0x01        /* allow :: as the source address */
#define        IPV6_FORWARDING                0x02        /* most of IPv6 header exists */
#define        IPV6_MINMTU                0x04        /* use minimum MTU (IPV6_USE_MIN_MTU) */

extern u_int32_t ip6_id;                /* fragment identifier */
extern int        ip6_defhlim;                /* default hop limit */
extern int        ip6_defmcasthlim;        /* default multicast hop limit */
extern int        ip6_forwarding;                /* act as router? */
extern int        ip6_sendredirect;        /* send ICMPv6 redirect? */
extern int        ip6_use_deprecated;        /* allow deprecated addr as source */
extern int        ip6_mcast_pmtu;                /* enable pMTU discovery for multicast? */
extern int        ip6_v6only;
extern int        ip6_neighborgcthresh;        /* Threshold # of NDP entries for GC */
extern int        ip6_maxdynroutes; /* Max # of routes created via redirect */


extern struct socket *ip6_mrouter;         /* multicast routing daemon */
extern int        ip6_sendredirects;        /* send IP redirects when forwarding? */
extern int        ip6_maxfragpackets; /* Maximum packets in reassembly queue */
extern int        ip6_maxfrags;        /* Maximum fragments in reassembly queue */
extern int        ip6_keepfaith;                /* Firewall Aided Internet Translator */
extern int        ip6_log_interval;
extern time_t        ip6_log_time;
extern int        ip6_hdrnestlimit; /* upper limit of # of extension headers */
extern int        ip6_dad_count;                /* DupAddrDetectionTransmits */

extern int ip6_auto_flowlabel;
extern int ip6_auto_linklocal;

extern int   ip6_anonportmin;                /* minimum ephemeral port */
extern int   ip6_anonportmax;                /* maximum ephemeral port */
extern int   ip6_lowportmin;                /* minimum reserved port */
extern int   ip6_lowportmax;                /* maximum reserved port */

extern int        ip6_prefer_tempaddr; /* whether to prefer temporary addresses
                                        in the source address selection */
extern int        ip6_use_defzone; /* whether to use the default scope zone
                                    when unspecified */

#ifdef GATEWAY
extern int      ip6_maxflows;           /* maximum amount of flows for ip6ff */
extern int        ip6_hashsize;                /* size of hash table */
#endif

struct in6pcb;
extern const struct pr_usrreqs rip6_usrreqs;

int        icmp6_ctloutput(int, struct socket *, struct sockopt *);

struct mbuf;
void        ip6_init(void);
const struct ip6aux *ip6_getdstifaddr(struct mbuf *);
void        ip6_freepcbopts(struct ip6_pktopts *);
void        ip6_freemoptions(struct ip6_moptions *);
int        ip6_unknown_opt(u_int8_t *, struct mbuf *, int);
int        ip6_get_prevhdr(struct mbuf *, int);
int        ip6_nexthdr(struct mbuf *, int, int, int *);
int        ip6_lasthdr(struct mbuf *, int, int, int *);

struct ip6_hdr;
int        ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *);
int        ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
void        ip6_savecontrol(struct in6pcb *, struct mbuf **, struct ip6_hdr *,
                struct mbuf *);
void        ip6_notify_pmtu(struct in6pcb *, const struct sockaddr_in6 *,
                u_int32_t *);
int        ip6_sysctl(int *, u_int, void *, size_t *, void *, size_t);

void        ip6_forward(struct mbuf *, int, struct ifnet *);

void        ip6_mloopback(struct ifnet *, struct mbuf *,
                      const struct sockaddr_in6 *);
int        ip6_output(struct mbuf *, struct ip6_pktopts *, struct route *, int,
            struct ip6_moptions *, struct in6pcb *, struct ifnet **);
int        ip6_if_output(struct ifnet * const, struct ifnet * const,
            struct mbuf * const,
            const struct sockaddr_in6 * const, const struct rtentry *);
int        ip6_ctloutput(int, struct socket *, struct sockopt *);
int        ip6_raw_ctloutput(int, struct socket *, struct sockopt *);
void        ip6_initpktopts(struct ip6_pktopts *);
int        ip6_setpktopts(struct mbuf *, struct ip6_pktopts *,
                            struct ip6_pktopts *, kauth_cred_t, int);
void        ip6_clearpktopts(struct ip6_pktopts *, int);
struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int);
int        ip6_optlen(struct in6pcb *);

void        ip6_statinc(u_int);

int        route6_input(struct mbuf **, int *, int);

void        frag6_init(void);
int        frag6_input(struct mbuf **, int *, int);
int        ip6_reass_packet(struct mbuf **, int);
void        frag6_slowtimo(void);
void        frag6_fasttimo(void);
void        frag6_drain(void);
void        frag6_drainstub(void);

int        ip6flow_init(int);
void        ip6flow_poolinit(void);
struct  ip6flow *ip6flow_reap(int);
void    ip6flow_create(struct route *, struct mbuf *);
void    ip6flow_slowtimo(void);
int        ip6flow_invalidate_all(int);

void        rip6_init(void);
int        rip6_input(struct mbuf **, int *, int);
void        *rip6_ctlinput(int, const struct sockaddr *, void *);
int        rip6_ctloutput(int, struct socket *, struct sockopt *);
int        rip6_output(struct mbuf *, struct socket *, struct sockaddr_in6 *,
                         struct mbuf *);
int        rip6_attach(struct socket *, int);
int        rip6_usrreq(struct socket *,
            int, struct mbuf *, struct mbuf *, struct mbuf *, struct lwp *);

int        dest6_input(struct mbuf **, int *, int);
int        none_input(struct mbuf **, int *, int);

struct route;

int        in6_selectsrc(struct sockaddr_in6 *, struct ip6_pktopts *,
           struct ip6_moptions *, struct route *, struct in6_addr *,
           struct ifnet **, struct psref *, struct in6_addr *);
int in6_selectroute(struct sockaddr_in6 *, struct ip6_pktopts *,
        struct route **, struct rtentry **, bool);
int        ip6_get_membership(const struct sockopt *, struct ifnet **,
            struct psref *, void *, size_t);

static __inline uint32_t
ip6_randomid(void)
{

        return cprng_fast32();
}

static __inline uint32_t
ip6_randomflowlabel(void)
{

        return cprng_fast32() & 0xfffff;
}

static __inline bool
ip6_dad_enabled(void)
{

        return ip6_dad_count > 0;
}
#endif /* _KERNEL */

#endif /* !_NETINET6_IP6_VAR_H_ */



































































































































































































































































































































































































































































































































































































































































































































































































































































    1 
































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
/*        $NetBSD: scsipiconf.h,v 1.130 2019/03/28 10:44:29 kardel Exp $        */

/*-
 * Copyright (c) 1998, 1999, 2000, 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum; by Jason R. Thorpe of the Numerical Aerospace
 * Simulation Facility, NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Originally written by Julian Elischer (julian@tfs.com)
 * for TRW Financial Systems for use under the MACH(2.5) operating system.
 *
 * TRW Financial Systems, in accordance with their agreement with Carnegie
 * Mellon University, makes this software available to CMU to distribute
 * or use in any manner that they see fit as long as this message is kept with
 * the software. For this reason TFS also grants any other persons or
 * organisations permission to use or modify this software.
 *
 * TFS supplies this software to be publicly redistributed
 * on the understanding that TFS is not responsible for the correct
 * functioning of this software in any circumstances.
 *
 * Ported to run under 386BSD by Julian Elischer (julian@tfs.com) Sept 1992
 */

#ifndef _DEV_SCSIPI_SCSIPICONF_H_
#define _DEV_SCSIPI_SCSIPICONF_H_

typedef        int        boolean;

#include <sys/callout.h>
#include <sys/queue.h>
#include <sys/condvar.h>
#include <dev/scsipi/scsi_spc.h>
#include <dev/scsipi/scsipi_debug.h>

struct buf;
struct proc;
struct device;
struct scsipi_channel;
struct scsipi_periph;
struct scsipi_xfer;

/*
 * The following defines the scsipi_xfer queue.
 */
TAILQ_HEAD(scsipi_xfer_queue, scsipi_xfer);

struct scsipi_generic {
        u_int8_t opcode;
        u_int8_t bytes[15];
};


/*
 * scsipi_async_event_t:
 *
 *        Asynchronous events from the adapter to the mid-layer and
 *        peripheral.
 *
 *        Arguments:
 *
 *        ASYNC_EVENT_MAX_OPENINGS        scsipi_max_openings * -- max
 *                                        openings, device specified in
 *                                        parameters
 *
 *        ASYNC_EVENT_XFER_MODE                scsipi_xfer_mode * -- xfer mode
 *                                        parameters changed for I_T Nexus
 *        ASYNC_EVENT_RESET                NULL - channel has been reset
 */
typedef enum {
        ASYNC_EVENT_MAX_OPENINGS,        /* set max openings on periph */
        ASYNC_EVENT_XFER_MODE,                /* xfer mode update for I_T */
        ASYNC_EVENT_RESET                /* channel reset */
} scsipi_async_event_t;

/*
 * scsipi_max_openings:
 *
 *        Argument for an ASYNC_EVENT_MAX_OPENINGS event.
 */
struct scsipi_max_openings {
        int        mo_target;                /* openings are for this target... */
        int        mo_lun;                        /* ...and this lun */
        int        mo_openings;                /* openings value */
};

/*
 * scsipi_xfer_mode:
 *
 *        Argument for an ASYNC_EVENT_XFER_MODE event.
 */
struct scsipi_xfer_mode {
        int        xm_target;                /* target, for I_T Nexus */
        int        xm_mode;                /* PERIPH_CAP* bits */
        int        xm_period;                /* sync period */
        int        xm_offset;                /* sync offset */
};


/*
 * scsipi_adapter_req_t:
 *
 *        Requests that can be made of an adapter.
 *
 *        Arguments:
 *
 *        ADAPTER_REQ_RUN_XFER                scsipi_xfer * -- the xfer which
 *                                        is to be run
 *
 *        ADAPTER_REQ_GROW_RESOURCES        no argument
 *
 *        ADAPTER_REQ_SET_XFER_MODE        scsipi_xfer_mode * -- set the xfer
 *                                        mode for the I_T Nexus according to
 *                                        this
 */
typedef enum {
        ADAPTER_REQ_RUN_XFER,                /* run a scsipi_xfer */
        ADAPTER_REQ_GROW_RESOURCES,        /* grow xfer execution resources */
        ADAPTER_REQ_SET_XFER_MODE        /* set xfer mode */
} scsipi_adapter_req_t;

#ifdef _KERNEL
/*
 * scsipi_periphsw:
 *
 *        Callbacks into periph driver from midlayer.
 *
 *        psw_error        Called by the bustype's interpret-sense routine
 *                        to do periph-specific sense handling.
 *
 *        psw_start        Called by midlayer to restart a device once
 *                        more command openings become available.
 *
 *        psw_async        Called by midlayer when an asynchronous event
 *                        from the adapter occurs.
 *
 *        psw_done        Called by the midlayer when an xfer has completed.
 */
struct scsipi_periphsw {
        int        (*psw_error)(struct scsipi_xfer *);
        void        (*psw_start)(struct scsipi_periph *);
        int        (*psw_async)(struct scsipi_periph *,
                    scsipi_async_event_t, void *);
        void        (*psw_done)(struct scsipi_xfer *, int);
};

struct disk_parms;
struct scsipi_inquiry_pattern;

/*
 * scsipi_adapter:
 *
 *        This structure describes an instance of a SCSIPI adapter.
 *
 *        Note that `adapt_openings' is used by (the common case of) adapters
 *        which have per-adapter resources.  If an adapter's command resources
 *        are associated with a channel, then the `chan_openings' below will
 *        be used instead.
 *
 *        Note that all adapter entry points take a pointer to a channel,
 *        as an adapter may have more than one channel, and the channel
 *        structure contains the channel number.
 */
struct scsipi_adapter {
        device_t adapt_dev;        /* pointer to adapter's device */
        int        adapt_nchannels;        /* number of adapter channels */
        volatile int        adapt_refcnt;                /* adapter's reference count */
        int        adapt_openings;                /* total # of command openings */
        int        adapt_max_periph;        /* max openings per periph */
        int        adapt_flags;

        void        (*adapt_request)(struct scsipi_channel *,
                    scsipi_adapter_req_t, void *);
        void        (*adapt_minphys)(struct buf *);
        int        (*adapt_ioctl)(struct scsipi_channel *, u_long,
                    void *, int, struct proc *);
        int        (*adapt_enable)(device_t, int);
        int        (*adapt_getgeom)(struct scsipi_periph *,
                        struct disk_parms *, u_long);
        int        (*adapt_accesschk)(struct scsipi_periph *,
                        struct scsipi_inquiry_pattern *);

        kmutex_t adapt_mtx;
        volatile int        adapt_running;        /* how many users of mutex */
};

/* adapt_flags */
#define SCSIPI_ADAPT_POLL_ONLY        0x01 /* Adaptor can't do interrupts. */
#define SCSIPI_ADAPT_MPSAFE     0x02 /* Adaptor doesn't need kernel lock */

void scsipi_adapter_minphys(struct scsipi_channel *, struct buf *);
void scsipi_adapter_request(struct scsipi_channel *,
        scsipi_adapter_req_t, void *);
int scsipi_adapter_ioctl(struct scsipi_channel *, u_long,
        void *, int, struct proc *);
int scsipi_adapter_enable(struct scsipi_adapter *, int);
#endif


/*
 * scsipi_bustype:
 *
 *        This structure describes a SCSIPI bus type.
 *        The bustype_type member is shared with struct ata_bustype
 *        (because we can ata, atapi or scsi busses to the same controller)
 */
struct scsipi_bustype {
        int        bustype_type;                /* symbolic name of type */

        void        (*bustype_cmd)(struct scsipi_xfer *);
        int        (*bustype_interpret_sense)(struct scsipi_xfer *);
        void        (*bustype_printaddr)(struct scsipi_periph *);
        void        (*bustype_kill_pending)(struct scsipi_periph *);
        void        (*bustype_async_event_xfer_mode)(struct scsipi_channel *,
                    void *);
};

/* bustype_type */
/* type is stored in the first byte */
#define SCSIPI_BUSTYPE_TYPE_SHIFT 0
#define SCSIPI_BUSTYPE_TYPE(x) (((x) >> SCSIPI_BUSTYPE_TYPE_SHIFT) & 0xff)
#define        SCSIPI_BUSTYPE_SCSI        0 /* parallel SCSI */
#define        SCSIPI_BUSTYPE_ATAPI        1
/* #define SCSIPI_BUSTYPE_ATA        2 */
/* subtype is stored in the second byte */
#define SCSIPI_BUSTYPE_SUBTYPE_SHIFT 8
#define SCSIPI_BUSTYPE_SUBTYPE(x) (((x) >> SCSIPI_BUSTYPE_SUBTYPE_SHIFT) & 0xff)

#define SCSIPI_BUSTYPE_BUSTYPE(t, s) \
    ((t) << SCSIPI_BUSTYPE_TYPE_SHIFT | (s) << SCSIPI_BUSTYPE_SUBTYPE_SHIFT)
/* subtypes are defined in each bus type headers */

/*
 * scsipi_channel:
 *
 *        This structure describes a single channel of a SCSIPI adapter.
 *        An adapter may have one or more channels.  See the comment above
 *        regarding the resource counter.
 *        Note: chan_bustype has to be first member, as its bustype_type member
 *         is shared with the aa_bustype member of struct ata_atapi_attach.
 */

#define        SCSIPI_CHAN_PERIPH_BUCKETS        16
#define        SCSIPI_CHAN_PERIPH_HASHMASK        (SCSIPI_CHAN_PERIPH_BUCKETS - 1)

#ifdef _KERNEL
struct scsipi_channel {
        const struct scsipi_bustype *chan_bustype; /* channel's bus type */
        const char *chan_name;        /* this channel's name */

        struct scsipi_adapter *chan_adapter; /* pointer to our adapter */

        /* Periphs for this channel. */
        LIST_HEAD(, scsipi_periph) chan_periphtab[SCSIPI_CHAN_PERIPH_BUCKETS];

        int        chan_channel;                /* channel number */
        int        chan_flags;                /* channel flags */
        int        chan_openings;                /* number of command openings */
        int        chan_max_periph;        /* max openings per periph */

        int        chan_ntargets;                /* number of targets */
        int        chan_nluns;                /* number of luns */
        int        chan_id;                /* adapter's ID for this channel */

        int        chan_defquirks;                /* default device's quirks */

        struct lwp *chan_dthread;        /* discovery thread */
        struct lwp *chan_thread;        /* completion thread */
        int        chan_tflags;                /* flags for the completion thread */

        int        chan_qfreeze;                /* freeze count for queue */

        /* Job queue for this channel. */
        struct scsipi_xfer_queue chan_queue;

        /* Completed (async) jobs. */
        struct scsipi_xfer_queue chan_complete;

        /* callback we may have to call from completion thread */
        void (*chan_callback)(struct scsipi_channel *, void *);
        void *chan_callback_arg;

        /* callback we may have to call after forking the kthread */
        void (*chan_init_cb)(struct scsipi_channel *, void *);
        void *chan_init_cb_arg;

        kcondvar_t chan_cv_comp;
        kcondvar_t chan_cv_thr;
        kcondvar_t chan_cv_xs;

#define chan_cv_complete(ch) (&(ch)->chan_cv_comp)
#define chan_cv_thread(ch) (&(ch)->chan_cv_thr)
};

#define chan_running(ch) ((ch)->chan_adapter->adapt_running)
#define chan_mtx(ch) (&(ch)->chan_adapter->adapt_mtx)
#endif

/* chan_flags */
#define        SCSIPI_CHAN_OPENINGS        0x01        /* use chan_openings */
#define        SCSIPI_CHAN_CANGROW        0x02        /* channel can grow resources */
#define        SCSIPI_CHAN_NOSETTLE        0x04        /* don't wait for devices to settle */
#define        SCSIPI_CHAN_TACTIVE        0x08        /* completion thread is active */

/* chan thread flags (chan_tflags) */
#define        SCSIPI_CHANT_SHUTDOWN        0x01        /* channel is shutting down */
#define        SCSIPI_CHANT_CALLBACK        0x02        /* has to call chan_callback() */
#define        SCSIPI_CHANT_KICK        0x04        /* need to run queues */
#define        SCSIPI_CHANT_GROWRES        0x08        /* call ADAPTER_REQ_GROW_RESOURCES */

#define        SCSIPI_CHAN_MAX_PERIPH(chan)                                        \
        (((chan)->chan_flags & SCSIPI_CHAN_OPENINGS) ?                        \
         (chan)->chan_max_periph : (chan)->chan_adapter->adapt_max_periph)


#define        scsipi_printaddr(periph)                                        \
        (*(periph)->periph_channel->chan_bustype->bustype_printaddr)((periph))

#define        scsipi_periph_bustype(periph)                                        \
        (periph)->periph_channel->chan_bustype->bustype_type


/*
 * Number of tag words in a periph structure:
 *
 *        n_tag_words = ((256 / NBBY) / sizeof(u_int32_t))
 */
#define        PERIPH_NTAGWORDS        ((256 / 8) / sizeof(u_int32_t))

#ifdef _KERNEL
/*
 * scsipi_opcodes:
 *      This optionally allocated structure documents
 *      valid opcodes and timeout values for the respective
 *      opcodes overriding the requested timeouts.
 *      It is created when SCSI_MAINTENANCE_IN/
 *      RSOC_REPORT_SUPPORTED_OPCODES can provide information
 *      at attach time.
 */
struct scsipi_opcodes 
{
        struct scsipi_opinfo
        {
                long          ti_timeout;        /* timeout in seconds (> 0 => VALID) */
                unsigned long ti_flags;
#define SCSIPI_TI_VALID  0x0001                /* valid op code */
#define SCSIPI_TI_LOGGED 0x8000                /* override logged during debug */
        } opcode_info[0x100];
};

/*
 * scsipi_periph:
 *
 *        This structure describes the path between a peripheral device
 *        and an adapter.  It contains a pointer to the adapter channel
 *        which in turn contains a pointer to the adapter.
 *
 * XXX Given the way NetBSD's autoconfiguration works, this is ...
 * XXX nasty.
 *
 *        Well, it's a lot nicer than it used to be, but there could
 *        still be an improvement.
 */
struct scsipi_periph {
        device_t periph_dev;        /* pointer to peripheral's device */
        struct scsipi_channel *periph_channel; /* channel we're connected to */

                                        /* link in channel's table of periphs */
        LIST_ENTRY(scsipi_periph) periph_hash;

        const struct scsipi_periphsw *periph_switch; /* peripheral's entry
                                                        points */
        int        periph_openings;        /* max # of outstanding commands */
        int        periph_active;                /* current # of outstanding commands */
        int        periph_sent;                /* current # of commands sent to adapt*/

        int        periph_mode;                /* operation modes, CAP bits */
        int        periph_period;                /* sync period (factor) */
        int        periph_offset;                /* sync offset */

        /*
         * Information gleaned from the inquiry data.
         */
        u_int8_t periph_type;                /* basic device type */
        int        periph_cap;                /* capabilities */
        int        periph_quirks;                /* device's quirks */

        int        periph_flags;                /* misc. flags */
        int        periph_dbflags;                /* debugging flags */

        int        periph_target;                /* target ID (drive # on ATAPI) */
        int        periph_lun;                /* LUN (not used on ATAPI) */

        int        periph_version;                /* ANSI SCSI version */

        int        periph_qfreeze;                /* queue freeze count */

        /* available opcodes and timeout information */
        struct scsipi_opcodes *periph_opcs;
        
        /* Bitmap of free command tags. */
        u_int32_t periph_freetags[PERIPH_NTAGWORDS];

        /* Pending scsipi_xfers on this peripheral. */
        struct scsipi_xfer_queue periph_xferq;

        callout_t periph_callout;

        /* xfer which has a pending CHECK_CONDITION */
        struct scsipi_xfer *periph_xscheck;

        kcondvar_t periph_cv;
#define periph_cv_periph(p) (&(p)->periph_cv)
#define periph_cv_active(p) (&(p)->periph_cv)
};
#endif

/*
 * Macro to return the current xfer mode of a periph.
 */
#define        PERIPH_XFER_MODE(periph)                                        \
        (((periph)->periph_flags & PERIPH_MODE_VALID) ?                        \
         (periph)->periph_mode : 0)

/* periph_cap */
#define        PERIPH_CAP_ANEC                0x0001        /* async event notification */
#define        PERIPH_CAP_TERMIOP        0x0002        /* terminate i/o proc. messages */
#define        PERIPH_CAP_RELADR        0x0004        /* relative addressing */
#define        PERIPH_CAP_WIDE32        0x0008        /* wide-32 transfers */
#define        PERIPH_CAP_WIDE16        0x0010        /* wide-16 transfers */
                /*        XXX        0x0020           reserved for ATAPI_CFG_DRQ_MASK */
                /*        XXX        0x0040           reserved for ATAPI_CFG_DRQ_MASK */
#define        PERIPH_CAP_SYNC                0x0080        /* synchronous transfers */
#define        PERIPH_CAP_LINKCMDS        0x0100        /* linked commands */
#define        PERIPH_CAP_TQING        0x0200        /* tagged queueing */
#define        PERIPH_CAP_SFTRESET        0x0400        /* soft RESET condition response */
#define        PERIPH_CAP_CMD16        0x0800        /* 16 byte commands (ATAPI) */
#define        PERIPH_CAP_DT                0x1000        /* supports DT clock */
#define        PERIPH_CAP_QAS                0x2000        /* supports quick arbit. and select. */
#define        PERIPH_CAP_IUS                0x4000        /* supports information unit xfers */

/* periph_flags */
#define        PERIPH_REMOVABLE        0x0001        /* media is removable */
#define        PERIPH_MEDIA_LOADED        0x0002        /* media is loaded */
#define        PERIPH_WAITING                0x0004        /* process waiting for opening */
#define        PERIPH_OPEN                0x0008        /* device is open */
#define        PERIPH_WAITDRAIN        0x0010        /* waiting for pending xfers to drain */
#define        PERIPH_GROW_OPENINGS        0x0020        /* allow openings to grow */
#define        PERIPH_MODE_VALID        0x0040        /* periph_mode is valid */
#define        PERIPH_RECOVERING        0x0080        /* periph is recovering */
#define        PERIPH_RECOVERY_ACTIVE        0x0100        /* a recovery command is active */
#define PERIPH_KEEP_LABEL        0x0200        /* retain label after 'full' close */
#define        PERIPH_SENSE                0x0400        /* periph has sense pending */
#define PERIPH_UNTAG                0x0800        /* untagged command running */

/* periph_quirks */
#define        PQUIRK_AUTOSAVE                0x00000001        /* do implicit SAVE POINTERS */
#define        PQUIRK_NOSYNC                0x00000002        /* does not grok SDTR */
#define        PQUIRK_NOWIDE                0x00000004        /* does not grok WDTR */
#define        PQUIRK_NOTAG                0x00000008        /* does not grok tagged cmds */
#define        PQUIRK_NOLUNS                0x00000010        /* DTWT with LUNs */
#define        PQUIRK_FORCELUNS        0x00000020        /* prehistoric device groks
                                                   LUNs */
#define        PQUIRK_NOMODESENSE        0x00000040        /* device doesn't do MODE SENSE
                                                   properly */
#define        PQUIRK_NOSYNCCACHE        0x00000100        /* do not issue SYNC CACHE */
#define        PQUIRK_LITTLETOC        0x00000400        /* audio TOC is little-endian */
#define        PQUIRK_NOCAPACITY        0x00000800        /* no READ CD CAPACITY */
#define        PQUIRK_NOTUR                0x00001000        /* no TEST UNIT READY */
#define        PQUIRK_NODOORLOCK        0x00002000        /* can't lock door */
#define        PQUIRK_NOSENSE                0x00004000        /* can't REQUEST SENSE */
#define PQUIRK_ONLYBIG                0x00008000        /* only use SCSI_{R,W}_BIG */
#define PQUIRK_NOBIGMODESENSE        0x00040000        /* has no big mode-sense op */
#define PQUIRK_CAP_SYNC                0x00080000        /* SCSI device with ST sync op*/
#define PQUIRK_CAP_WIDE16        0x00100000        /* SCSI device with ST wide op*/
#define PQUIRK_CAP_NODT                0x00200000        /* signals DT, but can't. */
#define PQUIRK_START                0x00400000        /* needs start before tur */
#define        PQUIRK_NOFUA                0x00800000        /* does not grok FUA */
#define PQUIRK_NOREPSUPPOPC     0x01000000      /* does not grok
                                                   REPORT SUPPORTED OPCODES
                                                   to fetch device timeouts */
/*
 * Error values an adapter driver may return
 */
typedef enum {
        XS_NOERROR,                /* there is no error, (sense is invalid)  */
        XS_SENSE,                /* Check the returned sense for the error */
        XS_SHORTSENSE,                /* Check the ATAPI sense for the error          */
        XS_DRIVER_STUFFUP,        /* Driver failed to perform operation     */
        XS_RESOURCE_SHORTAGE,        /* adapter resource shortage                  */
        XS_SELTIMEOUT,                /* The device timed out.. turned off?     */
        XS_TIMEOUT,                /* The Timeout reported was caught by SW  */
        XS_BUSY,                /* The device busy, try again later?      */
        XS_RESET,                /* bus was reset; possible retry command  */
        XS_REQUEUE                /* requeue this command */
} scsipi_xfer_result_t;

#ifdef _KERNEL
/*
 * Each scsipi transaction is fully described by one of these structures
 * It includes information about the source of the command and also the
 * device and adapter for which the command is destined.
 *
 * Before the HBA is given this transaction, channel_q is the linkage on
 * the related channel's chan_queue.
 *
 * When the this transaction is taken off the channel's chan_queue and
 * the HBA's request entry point is called with this transaction, the
 * HBA can use the channel_q tag for whatever it likes until it calls
 * scsipi_done for this transaction, at which time it has to stop
 * using channel_q.
 *
 * After scsipi_done is called with this transaction and if there was an
 * error on it, channel_q then becomes the linkage on the related channel's
 * chan_complete cqueue.
 *
 * The device_q member is maintained by the scsipi middle layer.  When
 * a device issues a command, the xfer is placed on that device's
 * pending commands queue.  When an xfer is done and freed, it is taken
 * off the device's queue.  This allows for a device to wait for all of
 * its pending commands to complete.
 */
struct scsipi_xfer {
        TAILQ_ENTRY(scsipi_xfer) channel_q; /* entry on channel queue */
        TAILQ_ENTRY(scsipi_xfer) device_q;  /* device's pending xfers */
        callout_t xs_callout;                /* callout for adapter use */
        int        xs_control;                /* control flags */
        volatile int xs_status;                /* status flags */
        struct scsipi_periph *xs_periph;/* peripheral doing the xfer */
        int        xs_retries;                /* the number of times to retry */
        int        xs_requeuecnt;                /* number of requeues */
        int        timeout;                /* in milliseconds */
        struct        scsipi_generic *cmd;        /* The scsipi command to execute */
        int        cmdlen;                        /* how long it is */
        u_char        *data;                        /* DMA address OR a uio address */
        int        datalen;                /* data len (blank if uio) */
        int        resid;                        /* how much buffer was not touched */
        scsipi_xfer_result_t error;        /* an error value */
        struct        buf *bp;                /* If we need to associate with */
                                        /* a buf */
        union {
                struct  scsi_sense_data scsi_sense; /* 32 bytes */
                u_int32_t atapi_sense;
        } sense;

        struct scsipi_xfer *xs_sensefor;/* we are requesting sense for this */
                                        /* xfer */

        u_int8_t status;                /* SCSI status */

        /*
         * Info for tagged command queueing.  This may or may not
         * be used by a given adapter driver.  These are the same
         * as the bytes in the tag message.
         */
        u_int8_t xs_tag_type;                /* tag type */
        u_int8_t xs_tag_id;                /* tag ID */

        struct        scsipi_generic cmdstore
            __aligned(4);                /* stash the command in here */

#define xs_cv(xs) (&(xs)->xs_periph->periph_channel->chan_cv_xs)
};
#endif

/*
 * scsipi_xfer control flags
 *
 * To do:
 *
 *        - figure out what to do with XS_CTL_ESCAPE
 *
 *        - replace XS_CTL_URGENT with an `xs_priority' field?
 */
#define        XS_CTL_NOSLEEP                0x00000001        /* don't sleep */
#define        XS_CTL_POLL                0x00000002        /* poll for completion */
#define        XS_CTL_DISCOVERY        0x00000004        /* doing device discovery */
#define        XS_CTL_ASYNC                0x00000008        /* command completes
                                                   asynchronously */
#define        XS_CTL_USERCMD                0x00000010        /* user issued command */
#define        XS_CTL_SILENT                0x00000020        /* don't print sense info */
#define        XS_CTL_IGNORE_NOT_READY        0x00000040        /* ignore NOT READY */
#define        XS_CTL_IGNORE_MEDIA_CHANGE                                         \
                                0x00000080        /* ignore media change */
#define        XS_CTL_IGNORE_ILLEGAL_REQUEST                                        \
                                0x00000100        /* ignore ILLEGAL REQUEST */
#define        XS_CTL_SILENT_NODEV        0x00000200        /* don't print sense info
                                                   if sense info is nodev */
#define        XS_CTL_RESET                0x00000400        /* reset the device */
#define        XS_CTL_DATA_UIO                0x00000800        /* xs_data points to uio */
#define        XS_CTL_DATA_IN                0x00001000        /* data coming into memory */
#define        XS_CTL_DATA_OUT                0x00002000        /* data going out of memory */
#define        XS_CTL_TARGET                0x00004000        /* target mode operation */
#define        XS_CTL_ESCAPE                0x00008000        /* escape operation */
#define        XS_CTL_URGENT                0x00010000        /* urgent (recovery)
                                                   operation */
#define        XS_CTL_SIMPLE_TAG        0x00020000        /* use a Simple Tag */
#define        XS_CTL_ORDERED_TAG        0x00040000        /* use an Ordered Tag */
#define        XS_CTL_HEAD_TAG                0x00080000        /* use a Head of Queue Tag */
#define        XS_CTL_THAW_PERIPH        0x00100000        /* thaw periph once enqueued */
#define        XS_CTL_FREEZE_PERIPH        0x00200000        /* freeze periph when done */
#define XS_CTL_REQSENSE                0x00800000        /* xfer is a request sense */

#define        XS_CTL_TAGMASK        (XS_CTL_SIMPLE_TAG|XS_CTL_ORDERED_TAG|XS_CTL_HEAD_TAG)

#define        XS_CTL_TAGTYPE(xs)        ((xs)->xs_control & XS_CTL_TAGMASK)

/*
 * scsipi_xfer status flags
 */
#define        XS_STS_DONE                0x00000001        /* scsipi_xfer is done */
#define        XS_STS_PRIVATE                0xf0000000        /* reserved for HBA's use */

/*
 * This describes matching information for scsipi_inqmatch().  The more things
 * match, the higher the configuration priority.
 */
struct scsipi_inquiry_pattern {
        u_int8_t type;
        boolean removable;
        const char *vendor;
        const char *product;
        const char *revision;
};

/*
 * This is used to pass information from the high-level configuration code
 * to the device-specific drivers.
 */
struct scsipibus_attach_args {
        struct scsipi_periph *sa_periph;
        struct scsipi_inquiry_pattern sa_inqbuf;
        struct scsipi_inquiry_data *sa_inqptr;
        union {                                /* bus-type specific infos */
                u_int8_t scsi_version;        /* SCSI version */
        } scsipi_info;
};

/*
 * this describes a quirk entry
 */
struct scsi_quirk_inquiry_pattern {
        struct scsipi_inquiry_pattern pattern;
        int quirks;
};

/*
 * Default number of retries, used for generic routines.
 */
#define SCSIPIRETRIES 4


#ifdef _KERNEL
void        scsipi_init(void);
void        scsipi_ioctl_init(void);
void        scsipi_load_verbose(void);
int        scsipi_command(struct scsipi_periph *, struct scsipi_generic *, int,
            u_char *, int, int, int, struct buf *, int);
void        scsipi_create_completion_thread(void *);
const void *scsipi_inqmatch(struct scsipi_inquiry_pattern *, const void *,
            size_t, size_t, int *);
const char *scsipi_dtype(int);
int        scsipi_execute_xs(struct scsipi_xfer *);
int        scsipi_test_unit_ready(struct scsipi_periph *, int);
int        scsipi_prevent(struct scsipi_periph *, int, int);
int        scsipi_inquire(struct scsipi_periph *,
            struct scsipi_inquiry_data *, int);
int        scsipi_mode_select(struct scsipi_periph *, int,
            struct scsi_mode_parameter_header_6 *, int, int, int, int);
int        scsipi_mode_select_big(struct scsipi_periph *, int,
            struct scsi_mode_parameter_header_10 *, int, int, int, int);
int        scsipi_mode_sense(struct scsipi_periph *, int, int,
            struct scsi_mode_parameter_header_6 *, int, int, int, int);
int        scsipi_mode_sense_big(struct scsipi_periph *, int, int,
            struct scsi_mode_parameter_header_10 *, int, int, int, int);
int        scsipi_start(struct scsipi_periph *, int, int);
void        scsipi_done(struct scsipi_xfer *);
void        scsipi_user_done(struct scsipi_xfer *);
int        scsipi_interpret_sense(struct scsipi_xfer *);
void        scsipi_wait_drain(struct scsipi_periph *);
void        scsipi_kill_pending(struct scsipi_periph *);
void    scsipi_get_opcodeinfo(struct scsipi_periph *periph);
void    scsipi_free_opcodeinfo(struct scsipi_periph *periph);
struct scsipi_periph *scsipi_alloc_periph(int);
void        scsipi_free_periph(struct scsipi_periph *);

/* Function pointers for scsiverbose module */
extern int        (*scsipi_print_sense)(struct scsipi_xfer *, int);
extern void        (*scsipi_print_sense_data)(struct scsi_sense_data *, int);

int     scsipi_print_sense_stub(struct scsipi_xfer *, int);
void    scsipi_print_sense_data_stub(struct scsi_sense_data *, int);

extern int        scsi_verbose_loaded;

void        scsipi_print_cdb(struct scsipi_generic *cmd);
int        scsipi_thread_call_callback(struct scsipi_channel *,
            void (*callback)(struct scsipi_channel *, void *),
            void *);
void        scsipi_async_event(struct scsipi_channel *,
            scsipi_async_event_t, void *);
int        scsipi_do_ioctl(struct scsipi_periph *, dev_t, u_long, void *,
            int, struct lwp *);

void        scsipi_set_xfer_mode(struct scsipi_channel *, int, int);

int        scsipi_channel_init(struct scsipi_channel *);
void        scsipi_channel_shutdown(struct scsipi_channel *);

void        scsipi_insert_periph(struct scsipi_channel *,
            struct scsipi_periph *);
void        scsipi_remove_periph(struct scsipi_channel *,
            struct scsipi_periph *);
struct scsipi_periph *scsipi_lookup_periph(struct scsipi_channel *,
            int, int);
struct scsipi_periph *scsipi_lookup_periph_locked(struct scsipi_channel *,
            int, int);
int        scsipi_target_detach(struct scsipi_channel *, int, int, int);

int        scsipi_adapter_addref(struct scsipi_adapter *);
void        scsipi_adapter_delref(struct scsipi_adapter *);

void        scsipi_channel_freeze(struct scsipi_channel *, int);
void        scsipi_channel_thaw(struct scsipi_channel *, int);
void        scsipi_channel_timed_thaw(void *);

void        scsipi_periph_freeze(struct scsipi_periph *, int);
void        scsipi_periph_thaw(struct scsipi_periph *, int);
void        scsipi_periph_timed_thaw(void *);

void        scsipi_periph_freeze_locked(struct scsipi_periph *, int);
void        scsipi_periph_thaw_locked(struct scsipi_periph *, int);

int        scsipi_sync_period_to_factor(int);
int        scsipi_sync_factor_to_period(int);
int        scsipi_sync_factor_to_freq(int);

void        show_scsipi_xs(struct scsipi_xfer *);
void        show_scsipi_cmd(struct scsipi_xfer *);
void        show_mem(u_char *, int);
#endif /* _KERNEL */

static __inline void
_lto2b(u_int32_t val, u_int8_t *bytes)
{

        bytes[0] = (val >> 8) & 0xff;
        bytes[1] = val & 0xff;
}

static __inline void
_lto3b(u_int32_t val, u_int8_t *bytes)
{

        bytes[0] = (val >> 16) & 0xff;
        bytes[1] = (val >> 8) & 0xff;
        bytes[2] = val & 0xff;
}

static __inline void
_lto4b(u_int32_t val, u_int8_t *bytes)
{

        bytes[0] = (val >> 24) & 0xff;
        bytes[1] = (val >> 16) & 0xff;
        bytes[2] = (val >> 8) & 0xff;
        bytes[3] = val & 0xff;
}

static __inline void
_lto8b(u_int64_t val, u_int8_t *bytes)
{

        bytes[0] = (val >> 56) & 0xff;
        bytes[1] = (val >> 48) & 0xff;
        bytes[2] = (val >> 40) & 0xff;
        bytes[3] = (val >> 32) & 0xff;
        bytes[4] = (val >> 24) & 0xff;
        bytes[5] = (val >> 16) & 0xff;
        bytes[6] = (val >> 8)  & 0xff;
        bytes[7] = val         & 0xff;
}

static __inline u_int32_t
_2btol(const u_int8_t *bytes)
{
        u_int32_t rv;

        rv = (bytes[0] << 8) |
             bytes[1];
        return (rv);
}

static __inline u_int32_t
_3btol(const u_int8_t *bytes)
{
        u_int32_t rv;

        rv = (bytes[0] << 16) |
             (bytes[1] << 8) |
             bytes[2];
        return (rv);
}

static __inline u_int32_t
_4btol(const u_int8_t *bytes)
{
        u_int32_t rv;

        rv = ((u_int32_t)bytes[0] << 24) |
             ((u_int32_t)bytes[1] << 16) |
             ((u_int32_t)bytes[2] << 8) |
             (u_int32_t)bytes[3];
        return (rv);
}

static __inline u_int64_t
_5btol(const u_int8_t *bytes)
{
        u_int64_t rv;

        rv = ((u_int64_t)bytes[0] << 32) |
             ((u_int64_t)bytes[1] << 24) |
             ((u_int64_t)bytes[2] << 16) |
             ((u_int64_t)bytes[3] << 8) |
             (u_int64_t)bytes[4];
        return (rv);
}

static __inline u_int64_t
_8btol(const u_int8_t *bytes)
{
        u_int64_t rv;

        rv = ((u_int64_t)bytes[0] << 56) |
             ((u_int64_t)bytes[1] << 48) |
             ((u_int64_t)bytes[2] << 40) |
             ((u_int64_t)bytes[3] << 32) |
             ((u_int64_t)bytes[4] << 24) |
             ((u_int64_t)bytes[5] << 16) |
             ((u_int64_t)bytes[6] << 8) |
             (u_int64_t)bytes[7];
        return (rv);
}

static __inline void
_lto2l(u_int32_t val, u_int8_t *bytes)
{

        bytes[0] = val & 0xff;
        bytes[1] = (val >> 8) & 0xff;
}

static __inline void
_lto3l(u_int32_t val, u_int8_t *bytes)
{

        bytes[0] = val & 0xff;
        bytes[1] = (val >> 8) & 0xff;
        bytes[2] = (val >> 16) & 0xff;
}

static __inline void
_lto4l(u_int32_t val, u_int8_t *bytes)
{

        bytes[0] = val & 0xff;
        bytes[1] = (val >> 8) & 0xff;
        bytes[2] = (val >> 16) & 0xff;
        bytes[3] = (val >> 24) & 0xff;
}

static __inline u_int32_t
_2ltol(const u_int8_t *bytes)
{
        u_int32_t rv;

        rv = bytes[0] |
             (bytes[1] << 8);
        return (rv);
}

static __inline u_int32_t
_3ltol(const u_int8_t *bytes)
{
        u_int32_t rv;

        rv = bytes[0] |
             (bytes[1] << 8) |
             (bytes[2] << 16);
        return (rv);
}

static __inline u_int32_t
_4ltol(const u_int8_t *bytes)
{
        u_int32_t rv;

        rv = (u_int32_t)bytes[0] |
             ((u_int32_t)bytes[1] << 8) |
             ((u_int32_t)bytes[2] << 16) |
             ((u_int32_t)bytes[3] << 24);
        return (rv);
}

#endif /* _DEV_SCSIPI_SCSIPICONF_H_ */































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 


















    1 













































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
/*        $NetBSD: fd.c,v 1.116 2021/08/07 16:19:12 thorpej Exp $        */

/*-
 * Copyright (c) 1998, 2003, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1990 The Regents of the University of California.
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Don Ahn.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)fd.c        7.4 (Berkeley) 5/25/91
 */

/*
 * Floppy formatting facilities merged from FreeBSD fd.c driver:
 *        Id: fd.c,v 1.53 1995/03/12 22:40:56 joerg Exp
 * which carries the same copyright/redistribution notice as shown above with
 * the addition of the following statement before the "Redistribution and
 * use ..." clause:
 *
 * Copyright (c) 1993, 1994 by
 *  jc@irbs.UUCP (John Capo)
 *  vak@zebub.msk.su (Serge Vakulenko)
 *  ache@astral.msk.su (Andrew A. Chernov)
 *
 * Copyright (c) 1993, 1994, 1995 by
 *  joerg_wunsch@uriah.sax.de (Joerg Wunsch)
 *  dufault@hda.com (Peter Dufault)
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: fd.c,v 1.116 2021/08/07 16:19:12 thorpej Exp $");

#include "opt_ddb.h"

/*
 * XXX This driver should be properly MI'd some day, but this allows us
 * XXX to eliminate a lot of code duplication for now.
 */
#if !defined(alpha) && !defined(algor) && !defined(atari) && \
    !defined(bebox) && !defined(evbmips) && !defined(i386) && \
    !defined(prep) && !defined(sandpoint) && !defined(x86_64) && \
    !defined(mvmeppc) && !defined(ofppc)
#error platform not supported by this driver, yet
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/ioctl.h>
#include <sys/device.h>
#include <sys/disklabel.h>
#include <sys/disk.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/malloc.h>
#include <sys/uio.h>
#include <sys/syslog.h>
#include <sys/queue.h>
#include <sys/proc.h>
#include <sys/fdio.h>
#include <sys/conf.h>
#include <sys/vnode.h>
#include <sys/rndsource.h>

#include <prop/proplib.h>

#include <dev/cons.h>

#include <sys/cpu.h>
#include <sys/bus.h>

#include "locators.h"

#if defined(atari)
/*
 * On the atari, it is configured as fdcisa
 */
#define        FDCCF_DRIVE                FDCISACF_DRIVE
#define        FDCCF_DRIVE_DEFAULT        FDCISACF_DRIVE_DEFAULT

#define        fd_cd        fdisa_cd
#endif /* atari */

#include <sys/intr.h>

#include <dev/isa/isavar.h>
#include <dev/isa/isadmavar.h>

#include <dev/isa/fdreg.h>
#include <dev/isa/fdcvar.h>

#if defined(i386) || defined(x86_64)

#include <dev/ic/mc146818reg.h>                        /* for NVRAM access */
#include <i386/isa/nvram.h>

#if defined(i386)
#include "mca.h"
#if NMCA > 0
#include <machine/mca_machdep.h>                /* for MCA_system */
#endif
#endif

#endif /* i386 || x86_64 */

#include <dev/isa/fdvar.h>

#define FDUNIT(dev)        (minor(dev) / 8)
#define FDTYPE(dev)        (minor(dev) % 8)

/* (mis)use device use flag to identify format operation */
#define B_FORMAT B_DEVPRIVATE

/* controller driver configuration */
int fdprint(void *, const char *);

#if NMCA > 0
/* MCA - specific entries */
const struct fd_type mca_fd_types[] = {
        { 18,2,36,2,0xff,0x0f,0x1b,0x6c,80,2880,1,FDC_500KBPS,0xf6,1, "1.44MB"    }, /* 1.44MB diskette - XXX try 16ms step rate */
        {  9,2,18,2,0xff,0x4f,0x2a,0x50,80,1440,1,FDC_250KBPS,0xf6,1, "720KB"    }, /* 3.5 inch 720kB diskette - XXX try 24ms step rate */
};
#endif /* NMCA > 0 */

/* The order of entries in the following table is important -- BEWARE! */

#if defined(atari)
const struct fd_type fd_types[] = {
        {  9,2,18,2,0xff,0xdf,0x2a,0x50,40, 720,1,FDC_250KBPS,0xf6,1, "360KB/PC" }, /* 360kB PC diskettes */
        {  9,2,18,2,0xff,0xdf,0x2a,0x50,80,1440,1,FDC_250KBPS,0xf6,1, "720KB"    }, /* 3.5 inch 720kB diskette */
        { 18,2,36,2,0xff,0xcf,0x1b,0x6c,80,2880,1,FDC_500KBPS,0xf6,1, "1.44MB"   }, /* 1.44MB diskette */
};
#else
const struct fd_type fd_types[] = {
        { 18,2,36,2,0xff,0xcf,0x1b,0x6c,80,2880,1,FDC_500KBPS,0xf6,1, "1.44MB"   }, /* 1.44MB diskette */
        { 15,2,30,2,0xff,0xdf,0x1b,0x54,80,2400,1,FDC_500KBPS,0xf6,1, "1.2MB"    }, /* 1.2 MB AT-diskettes */
        {  9,2,18,2,0xff,0xdf,0x23,0x50,40, 720,2,FDC_300KBPS,0xf6,1, "360KB/AT" }, /* 360kB in 1.2MB drive */
        {  9,2,18,2,0xff,0xdf,0x2a,0x50,40, 720,1,FDC_250KBPS,0xf6,1, "360KB/PC" }, /* 360kB PC diskettes */
        {  9,2,18,2,0xff,0xdf,0x2a,0x50,80,1440,1,FDC_250KBPS,0xf6,1, "720KB"    }, /* 3.5 inch 720kB diskette */
        {  9,2,18,2,0xff,0xdf,0x23,0x50,80,1440,1,FDC_300KBPS,0xf6,1, "720KB/x"  }, /* 720kB in 1.2MB drive */
        {  9,2,18,2,0xff,0xdf,0x2a,0x50,40, 720,2,FDC_250KBPS,0xf6,1, "360KB/x"  }, /* 360kB in 720kB drive */
};
#endif /* defined(atari) */

void fdcfinishattach(device_t);
int fdprobe(device_t, cfdata_t, void *);
void fdattach(device_t, device_t, void *);
static int fddetach(device_t, int);
static int fdcintr1(struct fdc_softc *);
static void fdcintrcb(void *);
static bool fdcsuspend(device_t, const pmf_qual_t *);
static bool fdcresume(device_t, const pmf_qual_t *);

extern struct cfdriver fd_cd;

#ifdef atari
CFATTACH_DECL_NEW(fdisa, sizeof(struct fd_softc),
    fdprobe, fdattach, fddetach, NULL);
#else
CFATTACH_DECL_NEW(fd, sizeof(struct fd_softc),
    fdprobe, fdattach, fddetach, NULL);
#endif

dev_type_open(fdopen);
dev_type_close(fdclose);
dev_type_read(fdread);
dev_type_write(fdwrite);
dev_type_ioctl(fdioctl);
dev_type_strategy(fdstrategy);

const struct bdevsw fd_bdevsw = {
        .d_open = fdopen,
        .d_close = fdclose,
        .d_strategy = fdstrategy,
        .d_ioctl = fdioctl,
        .d_dump = nodump,
        .d_psize = nosize,
        .d_discard = nodiscard,
        .d_flag = D_DISK
};

const struct cdevsw fd_cdevsw = {
        .d_open = fdopen,
        .d_close = fdclose,
        .d_read = fdread,
        .d_write = fdwrite,
        .d_ioctl = fdioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_DISK
};

void fdgetdisklabel(struct fd_softc *);
int fd_get_parms(struct fd_softc *);
void fdstart(struct fd_softc *);

struct dkdriver fddkdriver = {
        .d_strategy = fdstrategy,
        .d_minphys = minphys
};

#if defined(i386) || defined(x86_64)
const struct fd_type *fd_nvtotype(const char *, int, int);
#endif /* i386 || x86_64 */
void fd_set_motor(struct fdc_softc *fdc, int reset);
void fd_motor_off(void *arg);
void fd_motor_on(void *arg);
int fdcresult(struct fdc_softc *fdc);
void fdcstart(struct fdc_softc *fdc);
void fdcstatus(device_t, int, const char *);
void fdctimeout(void *arg);
void fdcretry(struct fdc_softc *fdc);
void fdfinish(struct fd_softc *fd, struct buf *bp);
static const struct fd_type *fd_dev_to_type(struct fd_softc *, dev_t);
int fdformat(dev_t, struct ne7_fd_formb *, struct lwp *);
static void fd_set_geometry(struct fd_softc *fd);

void        fd_mountroot_hook(device_t);

/*
 * Arguments passed between fdcattach and fdprobe.
 */
struct fdc_attach_args {
        int fa_drive;
        const struct fd_type *fa_deftype;
};

/*
 * Print the location of a disk drive (called just before attaching the
 * the drive).  If `fdc' is not NULL, the drive was found but was not
 * in the system config file; print the drive name as well.
 * Return QUIET (config_find ignores this if the device was configured) to
 * avoid printing `fdN not configured' messages.
 */
int
fdprint(void *aux, const char *fdc)
{
        struct fdc_attach_args *fa = aux;

        if (!fdc)
                aprint_normal(" drive %d", fa->fa_drive);
        return QUIET;
}

static bool
fdcresume(device_t self, const pmf_qual_t *qual)
{
        struct fdc_softc *fdc = device_private(self);

        mutex_enter(&fdc->sc_mtx);
        (void)fdcintr1(fdc);
        mutex_exit(&fdc->sc_mtx);
        return true;
}

static bool
fdcsuspend(device_t self, const pmf_qual_t *qual)
{
        struct fdc_softc *fdc = device_private(self);
        int drive;
        struct fd_softc *fd;

        mutex_enter(&fdc->sc_mtx);
        while (fdc->sc_state != DEVIDLE)
                cv_wait(&fdc->sc_cv, &fdc->sc_mtx);
        for (drive = 0; drive < 4; drive++) {
                if ((fd = fdc->sc_fd[drive]) == NULL)
                        continue;
                fd->sc_flags &= ~(FD_MOTOR|FD_MOTOR_WAIT);
        }
        fd_set_motor(fdc, 0);
        mutex_exit(&fdc->sc_mtx);
        return true;
}

void
fdc_childdet(device_t self, device_t child)
{
        struct fdc_softc *fdc = device_private(self);
        struct fd_softc *fd = device_private(child);
        int drive = fd->sc_drive;

        KASSERT(fdc->sc_fd[drive] == fd); /* but the kid is not my son */
        fdc->sc_fd[drive] = NULL;
}

int
fdcdetach(device_t self, int flags)
{
        int rc;
        struct fdc_softc *fdc = device_private(self);

        if ((rc = config_detach_children(self, flags)) != 0)
                return rc;

        pmf_device_deregister(self);

        isa_dmamap_destroy(fdc->sc_ic, fdc->sc_drq);
        isa_drq_free(fdc->sc_ic, fdc->sc_drq);

        callout_destroy(&fdc->sc_intr_ch);
        callout_destroy(&fdc->sc_timo_ch);

        cv_destroy(&fdc->sc_cv);
        mutex_destroy(&fdc->sc_mtx);

        return 0;
}

void
fdcattach(struct fdc_softc *fdc)
{
        mutex_init(&fdc->sc_mtx, MUTEX_DEFAULT, IPL_BIO);
        cv_init(&fdc->sc_cv, "fdcwake");
        callout_init(&fdc->sc_timo_ch, 0);
        callout_init(&fdc->sc_intr_ch, 0);

        fdc->sc_state = DEVIDLE;
        TAILQ_INIT(&fdc->sc_drives);

        fdc->sc_maxiosize = isa_dmamaxsize(fdc->sc_ic, fdc->sc_drq);

        if (isa_drq_alloc(fdc->sc_ic, fdc->sc_drq) != 0) {
                aprint_normal_dev(fdc->sc_dev, "can't reserve drq %d\n",
                    fdc->sc_drq);
                return;
        }

        if (isa_dmamap_create(fdc->sc_ic, fdc->sc_drq, fdc->sc_maxiosize,
            BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW)) {
                aprint_normal_dev(fdc->sc_dev, "can't set up ISA DMA map\n");
                return;
        }

        config_interrupts(fdc->sc_dev, fdcfinishattach);

        if (!pmf_device_register(fdc->sc_dev, fdcsuspend, fdcresume)) {
                aprint_error_dev(fdc->sc_dev,
                    "cannot set power mgmt handler\n");
        }
}

void
fdcfinishattach(device_t self)
{
        struct fdc_softc *fdc = device_private(self);
        bus_space_tag_t iot = fdc->sc_iot;
        bus_space_handle_t ioh = fdc->sc_ioh;
        struct fdc_attach_args fa;

        /*
         * Reset the controller to get it into a known state. Not all
         * probes necessarily need do this to discover the controller up
         * front, so don't assume anything.
         */

        bus_space_write_1(iot, ioh, fdout, 0);
        delay(100);
        bus_space_write_1(iot, ioh, fdout, FDO_FRST);

        /* see if it can handle a command */
        if (out_fdc(iot, ioh, NE7CMD_SPECIFY) < 0) {
                aprint_normal_dev(fdc->sc_dev, "can't reset controller\n");
                return;
        }
        out_fdc(iot, ioh, 0xdf);
        out_fdc(iot, ioh, 2);

#if defined(i386) || defined(x86_64)
        /*
         * The NVRAM info only tells us about the first two disks on the
         * `primary' floppy controller.
         */
        /* XXX device_unit() abuse */
        if (device_unit(fdc->sc_dev) == 0) {
                int type = mc146818_read(NULL, NVRAM_DISKETTE); /* XXX softc */
                fdc->sc_known = 1;
                fdc->sc_knownfds[0] = fd_nvtotype(device_xname(fdc->sc_dev),
                    type, 0);
                if (fdc->sc_knownfds[0] != NULL)
                        fdc->sc_present |= 1;
                fdc->sc_knownfds[1] = fd_nvtotype(device_xname(fdc->sc_dev),
                    type, 1);
                if (fdc->sc_knownfds[1] != NULL)
                        fdc->sc_present |= 2;
        }
#endif /* i386 || x86_64 */

        /* physical limit: four drives per controller. */
        fdc->sc_state = PROBING;
        for (fa.fa_drive = 0; fa.fa_drive < 4; fa.fa_drive++) {
                if (fdc->sc_known) {
                        if (fdc->sc_present & (1 << fa.fa_drive)) {
                                fa.fa_deftype = fdc->sc_knownfds[fa.fa_drive];
                                config_found(fdc->sc_dev, (void *)&fa,
                                    fdprint, CFARGS_NONE);
                        }
                } else {
#if defined(atari)
                        /*
                         * Atari has a different ordening, defaults to 1.44
                         */
                        fa.fa_deftype = &fd_types[2];
                         /* Atari also configures ISA fdc(4) as "fdcisa" */
                        config_found(fdc->sc_dev, &fa, fdprint,
                            CFARGS(.iattr = "fdcisa"));
#else
                        /*
                         * Default to 1.44MB on Alpha and BeBox.  How do we tell
                         * on these platforms?
                         */
                        fa.fa_deftype = &fd_types[0];
                        config_found(fdc->sc_dev, &fa, fdprint,
                            CFARGS(.iattr = "fdc"));
#endif
                }
        }
        fdc->sc_state = DEVIDLE;
}

int
fdprobe(device_t parent, cfdata_t match, void *aux)
{
        struct fdc_softc *fdc = device_private(parent);
        cfdata_t cf = match;
        struct fdc_attach_args *fa = aux;
        int drive = fa->fa_drive;
        bus_space_tag_t iot = fdc->sc_iot;
        bus_space_handle_t ioh = fdc->sc_ioh;
        int n;

        if (cf->cf_loc[FDCCF_DRIVE] != FDCCF_DRIVE_DEFAULT &&
            cf->cf_loc[FDCCF_DRIVE] != drive)
                return 0;
        /*
         * XXX
         * This is to work around some odd interactions between this driver
         * and SMC Ethernet cards.
         */
        if (cf->cf_loc[FDCCF_DRIVE] == FDCCF_DRIVE_DEFAULT && drive >= 2)
                return 0;

        /* Use PNP information if available */
        if (fdc->sc_known)
                return 1;

        mutex_enter(&fdc->sc_mtx);
        /* toss any interrupt status */
        for (n = 0; n < 4; n++) {
                out_fdc(iot, ioh, NE7CMD_SENSEI);
                (void) fdcresult(fdc);
        }
        /* select drive and turn on motor */
        bus_space_write_1(iot, ioh, fdout, drive | FDO_FRST | FDO_MOEN(drive));
        /* wait for motor to spin up */
        /* XXX check sc_probe */
        (void) cv_timedwait(&fdc->sc_cv, &fdc->sc_mtx, hz / 4);
        out_fdc(iot, ioh, NE7CMD_RECAL);
        out_fdc(iot, ioh, drive);
        /* wait for recalibrate, up to 2s */
        /* XXX check sc_probe */
        if (cv_timedwait(&fdc->sc_cv, &fdc->sc_mtx, 2 * hz) != EWOULDBLOCK){
#ifdef FD_DEBUG
                /* XXX */
                printf("fdprobe: got intr\n");
#endif
        }
        out_fdc(iot, ioh, NE7CMD_SENSEI);
        n = fdcresult(fdc);
#ifdef FD_DEBUG
        {
                int i;
                printf("fdprobe: status");
                for (i = 0; i < n; i++)
                        printf(" %x", fdc->sc_status[i]);
                printf("\n");
        }
#endif
        /* turn off motor */
        bus_space_write_1(iot, ioh, fdout, FDO_FRST);
        mutex_exit(&fdc->sc_mtx);

#if defined(bebox)        /* XXX What is this about? --thorpej@NetBSD.org */
        if (n != 2 || (fdc->sc_status[1] != 0))
                return 0;
#else
        if (n != 2 || (fdc->sc_status[0] & 0xf8) != 0x20)
                return 0;
#endif /* bebox */

        return 1;
}

/*
 * Controller is working, and drive responded.  Attach it.
 */
void
fdattach(device_t parent, device_t self, void *aux)
{
        struct fdc_softc *fdc = device_private(parent);
        struct fd_softc *fd = device_private(self);
        struct fdc_attach_args *fa = aux;
        const struct fd_type *type = fa->fa_deftype;
        int drive = fa->fa_drive;

        fd->sc_dev = self;

        callout_init(&fd->sc_motoron_ch, 0);
        callout_init(&fd->sc_motoroff_ch, 0);

        /* XXX Allow `flags' to override device type? */

        if (type)
                aprint_normal(": %s, %d cyl, %d head, %d sec\n", type->name,
                    type->cyls, type->heads, type->sectrac);
        else
                aprint_normal(": density unknown\n");

        bufq_alloc(&fd->sc_q, "disksort", BUFQ_SORT_CYLINDER);
        fd->sc_cylin = -1;
        fd->sc_drive = drive;
        fd->sc_deftype = type;
        fdc->sc_fd[drive] = fd;

        /*
         * Initialize and attach the disk structure.
         */
        disk_init(&fd->sc_dk, device_xname(fd->sc_dev), &fddkdriver);
        disk_attach(&fd->sc_dk);

        /*
         * Establish a mountroot hook.
         */
        fd->sc_roothook =
            mountroothook_establish(fd_mountroot_hook, fd->sc_dev);

        rnd_attach_source(&fd->rnd_source, device_xname(fd->sc_dev),
                          RND_TYPE_DISK, RND_FLAG_DEFAULT);

        fd_set_geometry(fd);

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "cannot set power mgmt handler\n");
}

static int
fddetach(device_t self, int flags)
{
        struct fd_softc *fd = device_private(self);
        int bmaj, cmaj, i, mn;

        fd_motor_off(fd);

        /* locate the major number */
        bmaj = bdevsw_lookup_major(&fd_bdevsw);
        cmaj = cdevsw_lookup_major(&fd_cdevsw);

        /* Nuke the vnodes for any open instances. */
        for (i = 0; i < MAXPARTITIONS; i++) {
                mn = DISKMINOR(device_unit(self), i);
                vdevgone(bmaj, mn, mn, VBLK);
                vdevgone(cmaj, mn, mn, VCHR);
        }

        pmf_device_deregister(self);

#if 0 /* XXX need to undo at detach? */
        fd_set_geometry(fd);
#endif

        rnd_detach_source(&fd->rnd_source);

        disk_detach(&fd->sc_dk);
        disk_destroy(&fd->sc_dk);

        /* Kill off any queued buffers. */
        bufq_drain(fd->sc_q);

        bufq_free(fd->sc_q);

        callout_destroy(&fd->sc_motoroff_ch);
        callout_destroy(&fd->sc_motoron_ch);

        return 0;
}

#if defined(i386) || defined(x86_64)
/*
 * Translate nvram type into internal data structure.  Return NULL for
 * none/unknown/unusable.
 */
const struct fd_type *
fd_nvtotype(const char *fdc, int nvraminfo, int drive)
{
        int type;

        type = (drive == 0 ? nvraminfo : nvraminfo << 4) & 0xf0;
        switch (type) {
        case NVRAM_DISKETTE_NONE:
                return NULL;
        case NVRAM_DISKETTE_12M:
                return &fd_types[1];
        case NVRAM_DISKETTE_TYPE5:
        case NVRAM_DISKETTE_TYPE6:
                /* XXX We really ought to handle 2.88MB format. */
        case NVRAM_DISKETTE_144M:
#if NMCA > 0
                if (MCA_system)
                        return &mca_fd_types[0];
                else
#endif /* NMCA > 0 */
                        return &fd_types[0];
        case NVRAM_DISKETTE_360K:
                return &fd_types[3];
        case NVRAM_DISKETTE_720K:
#if NMCA > 0
                if (MCA_system)
                        return &mca_fd_types[1];
                else
#endif /* NMCA > 0 */
                        return &fd_types[4];
        default:
                printf("%s: drive %d: unknown device type 0x%x\n",
                    fdc, drive, type);
                return NULL;
        }
}
#endif /* i386 || x86_64 */

static const struct fd_type *
fd_dev_to_type(struct fd_softc *fd, dev_t dev)
{
        u_int type = FDTYPE(dev);

        if (type > __arraycount(fd_types))
                return NULL;
        return type ? &fd_types[type - 1] : fd->sc_deftype;
}

void
fdstrategy(struct buf *bp)
{
        struct fd_softc *fd = device_lookup_private(&fd_cd, FDUNIT(bp->b_dev));
        struct fdc_softc *fdc = device_private(device_parent(fd->sc_dev));
        int sz;

        /* Valid unit, controller, and request? */
        if (bp->b_blkno < 0 ||
            ((bp->b_bcount % FDC_BSIZE) != 0 &&
             (bp->b_flags & B_FORMAT) == 0)) {
                bp->b_error = EINVAL;
                goto done;
        }

        /* If it's a null transfer, return immediately. */
        if (bp->b_bcount == 0)
                goto done;

        sz = howmany(bp->b_bcount, FDC_BSIZE);

        if (bp->b_blkno + sz > fd->sc_type->size) {
                sz = fd->sc_type->size - bp->b_blkno;
                if (sz == 0) {
                        /* If exactly at end of disk, return EOF. */
                        goto done;
                }
                if (sz < 0) {
                        /* If past end of disk, return EINVAL. */
                        bp->b_error = EINVAL;
                        goto done;
                }
                /* Otherwise, truncate request. */
                bp->b_bcount = sz << DEV_BSHIFT;
        }

        bp->b_rawblkno = bp->b_blkno;
         bp->b_cylinder =
            bp->b_blkno / (FDC_BSIZE / DEV_BSIZE) / fd->sc_type->seccyl;

#ifdef FD_DEBUG
        printf("fdstrategy: b_blkno %llu b_bcount %d blkno %llu cylin %d "
            "sz %d\n", (unsigned long long)bp->b_blkno, bp->b_bcount,
            (unsigned long long)fd->sc_blkno, bp->b_cylinder, sz);
#endif

        /* Queue transfer on drive, activate drive and controller if idle. */
        mutex_enter(&fdc->sc_mtx);
        bufq_put(fd->sc_q, bp);
        callout_stop(&fd->sc_motoroff_ch);                /* a good idea */
        if (fd->sc_active == 0)
                fdstart(fd);
#ifdef DIAGNOSTIC
        else {
                if (fdc->sc_state == DEVIDLE) {
                        printf("fdstrategy: controller inactive\n");
                        fdcstart(fdc);
                }
        }
#endif
        mutex_exit(&fdc->sc_mtx);
        return;

done:
        /* Toss transfer; we're done early. */
        bp->b_resid = bp->b_bcount;
        biodone(bp);
}

void
fdstart(struct fd_softc *fd)
{
        struct fdc_softc *fdc = device_private(device_parent(fd->sc_dev));
        int active = !TAILQ_EMPTY(&fdc->sc_drives);

        KASSERT(mutex_owned(&fdc->sc_mtx));
        /* Link into controller queue. */
        fd->sc_active = 1;
        TAILQ_INSERT_TAIL(&fdc->sc_drives, fd, sc_drivechain);

        /* If controller not already active, start it. */
        if (!active)
                fdcstart(fdc);
}

void
fdfinish(struct fd_softc *fd, struct buf *bp)
{
        struct fdc_softc *fdc = device_private(device_parent(fd->sc_dev));

        /*
         * Move this drive to the end of the queue to give others a `fair'
         * chance.  We only force a switch if N operations are completed while
         * another drive is waiting to be serviced, since there is a long motor
         * startup delay whenever we switch.
         */
        (void)bufq_get(fd->sc_q);
        if (TAILQ_NEXT(fd, sc_drivechain) && ++fd->sc_ops >= 8) {
                fd->sc_ops = 0;
                TAILQ_REMOVE(&fdc->sc_drives, fd, sc_drivechain);
                if (bufq_peek(fd->sc_q) != NULL)
                        TAILQ_INSERT_TAIL(&fdc->sc_drives, fd, sc_drivechain);
                else
                        fd->sc_active = 0;
        }
        bp->b_resid = fd->sc_bcount;
        fd->sc_skip = 0;

        rnd_add_uint32(&fd->rnd_source, bp->b_blkno);

        biodone(bp);
        /* turn off motor 5s from now */
        callout_reset(&fd->sc_motoroff_ch, 5 * hz, fd_motor_off, fd);
        fdc->sc_state = DEVIDLE;
}

int
fdread(dev_t dev, struct uio *uio, int flags)
{

        return (physio(fdstrategy, NULL, dev, B_READ, minphys, uio));
}

int
fdwrite(dev_t dev, struct uio *uio, int flags)
{

        return (physio(fdstrategy, NULL, dev, B_WRITE, minphys, uio));
}

void
fd_set_motor(struct fdc_softc *fdc, int reset)
{
        struct fd_softc *fd;
        u_char status;
        int n;

        if ((fd = TAILQ_FIRST(&fdc->sc_drives)) != NULL)
                status = fd->sc_drive;
        else
                status = 0;
        if (!reset)
                status |= FDO_FRST | FDO_FDMAEN;
        for (n = 0; n < 4; n++)
                if ((fd = fdc->sc_fd[n]) && (fd->sc_flags & FD_MOTOR))
                        status |= FDO_MOEN(n);
        bus_space_write_1(fdc->sc_iot, fdc->sc_ioh, fdout, status);
}

void
fd_motor_off(void *arg)
{
        struct fd_softc *fd = arg;
        struct fdc_softc *fdc;

        fdc = device_private(device_parent(fd->sc_dev));

        mutex_enter(&fdc->sc_mtx);
        fd->sc_flags &= ~(FD_MOTOR | FD_MOTOR_WAIT);
        fd_set_motor(fdc, 0);
        mutex_exit(&fdc->sc_mtx);
}

void
fd_motor_on(void *arg)
{
        struct fd_softc *fd = arg;
        struct fdc_softc *fdc = device_private(device_parent(fd->sc_dev));

        mutex_enter(&fdc->sc_mtx);
        fd->sc_flags &= ~FD_MOTOR_WAIT;
        if (TAILQ_FIRST(&fdc->sc_drives) == fd && fdc->sc_state == MOTORWAIT)
                (void)fdcintr1(fdc);
        mutex_exit(&fdc->sc_mtx);
}

int
fdcresult(struct fdc_softc *fdc)
{
        bus_space_tag_t iot = fdc->sc_iot;
        bus_space_handle_t ioh = fdc->sc_ioh;
        u_char i;
        u_int j = 100000,
              n = 0;

        for (; j; j--) {
                i = bus_space_read_1(iot, ioh, fdsts) &
                    (NE7_DIO | NE7_RQM | NE7_CB);
                if (i == NE7_RQM)
                        return n;
                if (i == (NE7_DIO | NE7_RQM | NE7_CB)) {
                        if (n >= sizeof(fdc->sc_status)) {
                                log(LOG_ERR, "fdcresult: overrun\n");
                                return -1;
                        }
                        fdc->sc_status[n++] =
                            bus_space_read_1(iot, ioh, fddata);
                }
                delay(10);
        }
        log(LOG_ERR, "fdcresult: timeout\n");
        return -1;
}

int
out_fdc(bus_space_tag_t iot, bus_space_handle_t ioh, u_char x)
{
        u_char i;
        u_int j = 100000;

        for (; j; j--) {
                i = bus_space_read_1(iot, ioh, fdsts) &
                    (NE7_DIO | NE7_RQM);
                if (i == NE7_RQM) {
                        bus_space_write_1(iot, ioh, fddata, x);
                        return 0;
                }
                delay(10);
        }
        return -1;
}

int
fdopen(dev_t dev, int flags, int mode, struct lwp *l)
{
        struct fd_softc *fd;
        const struct fd_type *type;

        fd = device_lookup_private(&fd_cd, FDUNIT(dev));
        if (fd == NULL)
                return (ENXIO);

        type = fd_dev_to_type(fd, dev);
        if (type == NULL)
                return ENXIO;

        if ((fd->sc_flags & FD_OPEN) != 0 &&
            memcmp(fd->sc_type, type, sizeof(*type)))
                return EBUSY;

        fd->sc_type_copy = *type;
        fd->sc_type = &fd->sc_type_copy;
        fd->sc_cylin = -1;
        fd->sc_flags |= FD_OPEN;

        fd_set_geometry(fd);

        return 0;
}

int
fdclose(dev_t dev, int flags, int mode, struct lwp *l)
{
        struct fd_softc *fd =
            device_lookup_private(&fd_cd, FDUNIT(dev));

        fd->sc_flags &= ~FD_OPEN;
        fd->sc_opts &= ~(FDOPT_NORETRY|FDOPT_SILENT);
        return 0;
}

void
fdcstart(struct fdc_softc *fdc)
{

        KASSERT(mutex_owned(&fdc->sc_mtx));

        if (!device_is_active(fdc->sc_dev))
                return;

#ifdef DIAGNOSTIC
        /* only got here if controller's drive queue was inactive; should
           be in idle state */
        if (fdc->sc_state != DEVIDLE) {
                printf("fdcstart: not idle\n");
                return;
        }
#endif
        (void)fdcintr1(fdc);
}

static void
fdcpstatus(int n, struct fdc_softc *fdc)
{
        char bits[64];

        switch (n) {
        case 0:
                printf("\n");
                break;
        case 2:
                snprintb(bits, sizeof(bits), NE7_ST0BITS, fdc->sc_status[0]);
                printf(" (st0 %s cyl %d)\n", bits, fdc->sc_status[1]);
                break;
        case 7:
                snprintb(bits, sizeof(bits), NE7_ST0BITS, fdc->sc_status[0]);
                printf(" (st0 %s", bits);
                snprintb(bits, sizeof(bits), NE7_ST1BITS, fdc->sc_status[1]);
                printf(" st1 %s", bits);
                snprintb(bits, sizeof(bits), NE7_ST2BITS, fdc->sc_status[2]);
                printf(" st2 %s", bits);
                printf(" cyl %d head %d sec %d)\n",
                    fdc->sc_status[3], fdc->sc_status[4], fdc->sc_status[5]);
                break;
#ifdef DIAGNOSTIC
        default:
                printf("\nfdcstatus: weird size");
                break;
#endif
        }
}

void
fdcstatus(device_t dv, int n, const char *s)
{
        struct fdc_softc *fdc = device_private(device_parent(dv));

        if (n == 0) {
                out_fdc(fdc->sc_iot, fdc->sc_ioh, NE7CMD_SENSEI);
                (void) fdcresult(fdc);
                n = 2;
        }
        fdcpstatus(n, fdc);

        aprint_normal_dev(dv, "%s", s);

}

void
fdctimeout(void *arg)
{
        struct fdc_softc *fdc = arg;
        struct fd_softc *fd = TAILQ_FIRST(&fdc->sc_drives);

        mutex_enter(&fdc->sc_mtx);
#ifdef DEBUG
        log(LOG_ERR, "fdctimeout: state %d\n", fdc->sc_state);
#endif
        fdcstatus(fd->sc_dev, 0, "timeout");

        if (bufq_peek(fd->sc_q) != NULL)
                fdc->sc_state++;
        else
                fdc->sc_state = DEVIDLE;

        (void)fdcintr1(fdc);
        mutex_exit(&fdc->sc_mtx);
}

static int
fdcintr1(struct fdc_softc *fdc)
{
#define        st0        fdc->sc_status[0]
#define        cyl        fdc->sc_status[1]
        struct fd_softc *fd;
        struct buf *bp;
        bus_space_tag_t iot = fdc->sc_iot;
        bus_space_handle_t ioh = fdc->sc_ioh;
        int read, head, sec, i, nblks;
        struct fd_type *type;
        struct ne7_fd_formb *finfo = NULL;

        KASSERT(mutex_owned(&fdc->sc_mtx));
        if (fdc->sc_state == PROBING) {
#ifdef DEBUG
                printf("fdcintr: got probe interrupt\n");
#endif
                fdc->sc_probe++;
                goto out;
        }

loop:
        /* Is there a drive for the controller to do a transfer with? */
        fd = TAILQ_FIRST(&fdc->sc_drives);
        if (fd == NULL) {
                fdc->sc_state = DEVIDLE;
                 goto out;
        }

        /* Is there a transfer to this drive?  If not, deactivate drive. */
        bp = bufq_peek(fd->sc_q);
        if (bp == NULL) {
                fd->sc_ops = 0;
                TAILQ_REMOVE(&fdc->sc_drives, fd, sc_drivechain);
                fd->sc_active = 0;
                goto loop;
        }

        if (bp->b_flags & B_FORMAT)
                finfo = (struct ne7_fd_formb *)bp->b_data;

        switch (fdc->sc_state) {
        case DEVIDLE:
                fdc->sc_errors = 0;
                fd->sc_skip = 0;
                fd->sc_bcount = bp->b_bcount;
                fd->sc_blkno = bp->b_blkno / (FDC_BSIZE / DEV_BSIZE);
                callout_stop(&fd->sc_motoroff_ch);
                if ((fd->sc_flags & FD_MOTOR_WAIT) != 0) {
                        fdc->sc_state = MOTORWAIT;
                        return 1;
                }
                if ((fd->sc_flags & FD_MOTOR) == 0) {
                        /* Turn on the motor, being careful about pairing. */
                        struct fd_softc *ofd = fdc->sc_fd[fd->sc_drive ^ 1];
                        if (ofd && ofd->sc_flags & FD_MOTOR) {
                                callout_stop(&ofd->sc_motoroff_ch);
                                ofd->sc_flags &= ~(FD_MOTOR | FD_MOTOR_WAIT);
                        }
                        fd->sc_flags |= FD_MOTOR | FD_MOTOR_WAIT;
                        fd_set_motor(fdc, 0);
                        fdc->sc_state = MOTORWAIT;
                        /* Allow .25s for motor to stabilize. */
                        callout_reset(&fd->sc_motoron_ch, hz / 4,
                            fd_motor_on, fd);
                        return 1;
                }
                /* Make sure the right drive is selected. */
                fd_set_motor(fdc, 0);

                /* fall through */
        case DOSEEK:
        doseek:
                if (fd->sc_cylin == bp->b_cylinder)
                        goto doio;

                out_fdc(iot, ioh, NE7CMD_SPECIFY);/* specify command */
                out_fdc(iot, ioh, fd->sc_type->steprate);
                out_fdc(iot, ioh, 6);                /* XXX head load time == 6ms */

                out_fdc(iot, ioh, NE7CMD_SEEK);        /* seek function */
                out_fdc(iot, ioh, fd->sc_drive); /* drive number */
                out_fdc(iot, ioh, bp->b_cylinder * fd->sc_type->step);

                fd->sc_cylin = -1;
                fdc->sc_state = SEEKWAIT;

                iostat_seek(fd->sc_dk.dk_stats);
                disk_busy(&fd->sc_dk);

                callout_reset(&fdc->sc_timo_ch, 4 * hz, fdctimeout, fdc);
                return 1;

        case DOIO:
        doio:
                type = fd->sc_type;
                if (finfo)
                        fd->sc_skip = (char *)&(finfo->fd_formb_cylno(0)) -
                                      (char *)finfo;
                sec = fd->sc_blkno % type->seccyl;
                nblks = type->seccyl - sec;
                nblks = uimin(nblks, fd->sc_bcount / FDC_BSIZE);
                nblks = uimin(nblks, fdc->sc_maxiosize / FDC_BSIZE);
                fd->sc_nblks = nblks;
                fd->sc_nbytes = finfo ? bp->b_bcount : nblks * FDC_BSIZE;
                head = sec / type->sectrac;
                sec -= head * type->sectrac;
#ifdef DIAGNOSTIC
                {
                        int block;
                        block = (fd->sc_cylin * type->heads + head)
                            * type->sectrac + sec;
                        if (block != fd->sc_blkno) {
                                printf("fdcintr: block %d != blkno "
                                    "%" PRId64 "\n", block, fd->sc_blkno);
#ifdef DDB
                                 Debugger();
#endif
                        }
                }
#endif
                read = bp->b_flags & B_READ ? DMAMODE_READ : DMAMODE_WRITE;
                isa_dmastart(fdc->sc_ic, fdc->sc_drq,
                    (char *)bp->b_data + fd->sc_skip, fd->sc_nbytes,
                    NULL, read | DMAMODE_DEMAND, BUS_DMA_NOWAIT);
                bus_space_write_1(iot, fdc->sc_fdctlioh, 0, type->rate);
#ifdef FD_DEBUG
                printf("fdcintr: %s drive %d track %d head %d sec %d nblks %d\n",
                        read ? "read" : "write", fd->sc_drive, fd->sc_cylin,
                        head, sec, nblks);
#endif
                if (finfo) {
                        /* formatting */
                        if (out_fdc(iot, ioh, NE7CMD_FORMAT) < 0) {
                                fdc->sc_errors = 4;
                                fdcretry(fdc);
                                goto loop;
                        }
                        out_fdc(iot, ioh, (head << 2) | fd->sc_drive);
                        out_fdc(iot, ioh, finfo->fd_formb_secshift);
                        out_fdc(iot, ioh, finfo->fd_formb_nsecs);
                        out_fdc(iot, ioh, finfo->fd_formb_gaplen);
                        out_fdc(iot, ioh, finfo->fd_formb_fillbyte);
                } else {
                        if (read)
                                out_fdc(iot, ioh, NE7CMD_READ);        /* READ */
                        else
                                out_fdc(iot, ioh, NE7CMD_WRITE); /* WRITE */
                        out_fdc(iot, ioh, (head << 2) | fd->sc_drive);
                        out_fdc(iot, ioh, fd->sc_cylin); /* track */
                        out_fdc(iot, ioh, head);
                        out_fdc(iot, ioh, sec + 1);         /* sector +1 */
                        out_fdc(iot, ioh, type->secsize);/* sector size */
                        out_fdc(iot, ioh, type->sectrac);/* sectors/track */
                        out_fdc(iot, ioh, type->gap1);         /* gap1 size */
                        out_fdc(iot, ioh, type->datalen);/* data length */
                }
                fdc->sc_state = IOCOMPLETE;

                disk_busy(&fd->sc_dk);

                /* allow 2 seconds for operation */
                callout_reset(&fdc->sc_timo_ch, 2 * hz, fdctimeout, fdc);
                return 1;                                /* will return later */

        case SEEKWAIT:
                callout_stop(&fdc->sc_timo_ch);
                fdc->sc_state = SEEKCOMPLETE;
                /* allow 1/50 second for heads to settle */
                callout_reset(&fdc->sc_intr_ch, hz / 50, fdcintrcb, fdc);
                return 1;

        case SEEKCOMPLETE:
                /* no data on seek */
                disk_unbusy(&fd->sc_dk, 0, 0);

                /* Make sure seek really happened. */
                out_fdc(iot, ioh, NE7CMD_SENSEI);
                if (fdcresult(fdc) != 2 || (st0 & 0xf8) != 0x20 ||
                    cyl != bp->b_cylinder * fd->sc_type->step) {
#ifdef FD_DEBUG
                        fdcstatus(fd->sc_dev, 2, "seek failed");
#endif
                        fdcretry(fdc);
                        goto loop;
                }
                fd->sc_cylin = bp->b_cylinder;
                goto doio;

        case IOTIMEDOUT:
                isa_dmaabort(fdc->sc_ic, fdc->sc_drq);
                /* FALLTHROUGH */
        case SEEKTIMEDOUT:
        case RECALTIMEDOUT:
        case RESETTIMEDOUT:
                fdcretry(fdc);
                goto loop;

        case IOCOMPLETE: /* IO DONE, post-analyze */
                callout_stop(&fdc->sc_timo_ch);

                disk_unbusy(&fd->sc_dk, (bp->b_bcount - bp->b_resid),
                    (bp->b_flags & B_READ));

                if (fdcresult(fdc) != 7 || (st0 & 0xf8) != 0) {
                        isa_dmaabort(fdc->sc_ic, fdc->sc_drq);
#ifdef FD_DEBUG
                        fdcstatus(fd->sc_dev, 7, bp->b_flags & B_READ ?
                            "read failed" : "write failed");
                        printf("blkno %llu nblks %d\n",
                            (unsigned long long)fd->sc_blkno, fd->sc_nblks);
#endif
                        fdcretry(fdc);
                        goto loop;
                }
                isa_dmadone(fdc->sc_ic, fdc->sc_drq);
                if (fdc->sc_errors) {
                        diskerr(bp, "fd", "soft error (corrected)", LOG_PRINTF,
                            fd->sc_skip / FDC_BSIZE, NULL);
                        printf("\n");
                        fdc->sc_errors = 0;
                }
                fd->sc_blkno += fd->sc_nblks;
                fd->sc_skip += fd->sc_nbytes;
                fd->sc_bcount -= fd->sc_nbytes;
                if (!finfo && fd->sc_bcount > 0) {
                        bp->b_cylinder = fd->sc_blkno / fd->sc_type->seccyl;
                        goto doseek;
                }
                fdfinish(fd, bp);
                goto loop;

        case DORESET:
                /* try a reset, keep motor on */
                fd_set_motor(fdc, 1);
                delay(100);
                fd_set_motor(fdc, 0);
                fdc->sc_state = RESETCOMPLETE;
                callout_reset(&fdc->sc_timo_ch, hz / 2, fdctimeout, fdc);
                return 1;                        /* will return later */

        case RESETCOMPLETE:
                callout_stop(&fdc->sc_timo_ch);
                /* clear the controller output buffer */
                for (i = 0; i < 4; i++) {
                        out_fdc(iot, ioh, NE7CMD_SENSEI);
                        (void) fdcresult(fdc);
                }

                /* fall through */
        case DORECAL:
                out_fdc(iot, ioh, NE7CMD_RECAL); /* recalibrate function */
                out_fdc(iot, ioh, fd->sc_drive);
                fdc->sc_state = RECALWAIT;
                callout_reset(&fdc->sc_timo_ch, 5 * hz, fdctimeout, fdc);
                return 1;                        /* will return later */

        case RECALWAIT:
                callout_stop(&fdc->sc_timo_ch);
                fdc->sc_state = RECALCOMPLETE;
                /* allow 1/30 second for heads to settle */
                callout_reset(&fdc->sc_intr_ch, hz / 30, fdcintrcb, fdc);
                return 1;                        /* will return later */

        case RECALCOMPLETE:
                out_fdc(iot, ioh, NE7CMD_SENSEI);
                if (fdcresult(fdc) != 2 || (st0 & 0xf8) != 0x20 || cyl != 0) {
#ifdef FD_DEBUG
                        fdcstatus(fd->sc_dev, 2, "recalibrate failed");
#endif
                        fdcretry(fdc);
                        goto loop;
                }
                fd->sc_cylin = 0;
                goto doseek;

        case MOTORWAIT:
                if (fd->sc_flags & FD_MOTOR_WAIT)
                        return 1;                /* time's not up yet */
                goto doseek;

        default:
                fdcstatus(fd->sc_dev, 0, "stray interrupt");
                return 1;
        }
#undef        st0
#undef        cyl

out:
        cv_signal(&fdc->sc_cv);
        return 1;
}

static void
fdcintrcb(void *arg)
{
        (void)fdcintr(arg);
}

int
fdcintr(void *arg)
{
        int rc;
        struct fdc_softc *fdc = arg;

        mutex_enter(&fdc->sc_mtx);
        rc = fdcintr1(fdc);
        mutex_exit(&fdc->sc_mtx);
        return rc;
}

void
fdcretry(struct fdc_softc *fdc)
{
        struct fd_softc *fd;
        struct buf *bp;

        fd = TAILQ_FIRST(&fdc->sc_drives);
        bp = bufq_peek(fd->sc_q);

        if (fd->sc_opts & FDOPT_NORETRY)
            goto fail;
        switch (fdc->sc_errors) {
        case 0:
                /* try again */
                fdc->sc_state = DOSEEK;
                break;

        case 1: case 2: case 3:
                /* didn't work; try recalibrating */
                fdc->sc_state = DORECAL;
                break;

        case 4:
                /* still no go; reset the bastard */
                fdc->sc_state = DORESET;
                break;

        default:
        fail:
                if ((fd->sc_opts & FDOPT_SILENT) == 0) {
                        diskerr(bp, "fd", "hard error", LOG_PRINTF,
                                fd->sc_skip / FDC_BSIZE, NULL);
                        fdcpstatus(7, fdc);
                }

                bp->b_error = EIO;
                fdfinish(fd, bp);
        }
        fdc->sc_errors++;
}

int
fdioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
{
        struct fd_softc *fd =
            device_lookup_private(&fd_cd, FDUNIT(dev));
        struct fdformat_parms *form_parms;
        struct fdformat_cmd *form_cmd;
        struct ne7_fd_formb *fd_formb;
        struct disklabel *lp = fd->sc_dk.dk_label;
        int error;
        unsigned int scratch;
        int il[FD_MAX_NSEC + 1];
        int i, j;
#ifdef __HAVE_OLD_DISKLABEL
        struct disklabel newlabel;
#endif

        switch (cmd) {
        case DIOCGPARTINFO:
        case DIOCGDINFO:
#ifdef __HAVE_OLD_DISKLABEL
        case ODIOCGDINFO:
#endif
                memset(lp, 0, sizeof(*lp));

                lp->d_type = DKTYPE_FLOPPY;
                lp->d_secsize = FDC_BSIZE;
                lp->d_nsectors = fd->sc_type->sectrac;
                lp->d_ntracks = fd->sc_type->heads;
                lp->d_ncylinders = fd->sc_type->cyls;
                lp->d_secpercyl = fd->sc_type->seccyl;
                lp->d_secperunit = fd->sc_type->size;

                if (readdisklabel(dev, fdstrategy, lp, NULL) != NULL)
                        return EINVAL;
                break;
        }

        error = disk_ioctl(&fd->sc_dk, dev, cmd, addr, flag, l);
        if (error != EPASSTHROUGH)
                return error;

        switch (cmd) {
        case DIOCWLABEL:
                if ((flag & FWRITE) == 0)
                        return EBADF;
                /* XXX do something */
                return 0;

        case DIOCWDINFO:
#ifdef __HAVE_OLD_DISKLABEL
        case ODIOCWDINFO:
#endif
        {
                if ((flag & FWRITE) == 0)
                        return EBADF;
#ifdef __HAVE_OLD_DISKLABEL
                if (cmd == ODIOCWDINFO) {
                        memset(&newlabel, 0, sizeof newlabel);
                        memcpy(&newlabel, addr, sizeof (struct olddisklabel));
                        addr = &newlabel;
                }
#endif
                error = setdisklabel(lp, addr, 0, NULL);
                if (error)
                        return error;

                error = writedisklabel(dev, fdstrategy, lp, NULL);
                return error;
        }

        case FDIOCGETFORMAT:
                form_parms = (struct fdformat_parms *)addr;
                form_parms->fdformat_version = FDFORMAT_VERSION;
                form_parms->nbps = 128 * (1 << fd->sc_type->secsize);
                form_parms->ncyl = fd->sc_type->cyls;
                form_parms->nspt = fd->sc_type->sectrac;
                form_parms->ntrk = fd->sc_type->heads;
                form_parms->stepspercyl = fd->sc_type->step;
                form_parms->gaplen = fd->sc_type->gap2;
                form_parms->fillbyte = fd->sc_type->fillbyte;
                form_parms->interleave = fd->sc_type->interleave;
                switch (fd->sc_type->rate) {
                case FDC_500KBPS:
                        form_parms->xfer_rate = 500 * 1024;
                        break;
                case FDC_300KBPS:
                        form_parms->xfer_rate = 300 * 1024;
                        break;
                case FDC_250KBPS:
                        form_parms->xfer_rate = 250 * 1024;
                        break;
                default:
                        return EINVAL;
                }
                return 0;

        case FDIOCSETFORMAT:
                if((flag & FWRITE) == 0)
                        return EBADF;        /* must be opened for writing */
                form_parms = (struct fdformat_parms *)addr;
                if (form_parms->fdformat_version != FDFORMAT_VERSION)
                        return EINVAL;        /* wrong version of formatting prog */

                scratch = form_parms->nbps >> 7;
                if ((form_parms->nbps & 0x7f) || ffs(scratch) == 0 ||
                    scratch & ~(1 << (ffs(scratch)-1)))
                        /* not a power-of-two multiple of 128 */
                        return EINVAL;

                switch (form_parms->xfer_rate) {
                case 500 * 1024:
                        fd->sc_type->rate = FDC_500KBPS;
                        break;
                case 300 * 1024:
                        fd->sc_type->rate = FDC_300KBPS;
                        break;
                case 250 * 1024:
                        fd->sc_type->rate = FDC_250KBPS;
                        break;
                default:
                        return EINVAL;
                }

                if (form_parms->nspt > FD_MAX_NSEC ||
                    form_parms->fillbyte > 0xff ||
                    form_parms->interleave > 0xff)
                        return EINVAL;
                fd->sc_type->sectrac = form_parms->nspt;
                if (form_parms->ntrk != 2 && form_parms->ntrk != 1)
                        return EINVAL;
                fd->sc_type->heads = form_parms->ntrk;
                fd->sc_type->seccyl = form_parms->nspt * form_parms->ntrk;
                fd->sc_type->secsize = ffs(scratch)-1;
                fd->sc_type->gap2 = form_parms->gaplen;
                fd->sc_type->cyls = form_parms->ncyl;
                fd->sc_type->size = fd->sc_type->seccyl * form_parms->ncyl *
                    form_parms->nbps / DEV_BSIZE;
                fd->sc_type->step = form_parms->stepspercyl;
                fd->sc_type->fillbyte = form_parms->fillbyte;
                fd->sc_type->interleave = form_parms->interleave;
                return 0;

        case FDIOCFORMAT_TRACK:
                if((flag & FWRITE) == 0)
                        return EBADF;        /* must be opened for writing */
                form_cmd = (struct fdformat_cmd *)addr;
                if (form_cmd->formatcmd_version != FDFORMAT_VERSION)
                        return EINVAL;        /* wrong version of formatting prog */

                if (form_cmd->head >= fd->sc_type->heads ||
                    form_cmd->cylinder >= fd->sc_type->cyls) {
                        return EINVAL;
                }

                fd_formb = malloc(sizeof(struct ne7_fd_formb),
                    M_TEMP, M_WAITOK);
                fd_formb->head = form_cmd->head;
                fd_formb->cyl = form_cmd->cylinder;
                fd_formb->transfer_rate = fd->sc_type->rate;
                fd_formb->fd_formb_secshift = fd->sc_type->secsize;
                fd_formb->fd_formb_nsecs = fd->sc_type->sectrac;
                fd_formb->fd_formb_gaplen = fd->sc_type->gap2;
                fd_formb->fd_formb_fillbyte = fd->sc_type->fillbyte;

                memset(il, 0, sizeof il);
                for (j = 0, i = 1; i <= fd_formb->fd_formb_nsecs; i++) {
                        while (il[(j%fd_formb->fd_formb_nsecs)+1])
                                j++;
                        il[(j%fd_formb->fd_formb_nsecs)+1] = i;
                        j += fd->sc_type->interleave;
                }
                for (i = 0; i < fd_formb->fd_formb_nsecs; i++) {
                        fd_formb->fd_formb_cylno(i) = form_cmd->cylinder;
                        fd_formb->fd_formb_headno(i) = form_cmd->head;
                        fd_formb->fd_formb_secno(i) = il[i+1];
                        fd_formb->fd_formb_secsize(i) = fd->sc_type->secsize;
                }

                error = fdformat(dev, fd_formb, l);
                free(fd_formb, M_TEMP);
                return error;

        case FDIOCGETOPTS:                /* get drive options */
                *(int *)addr = fd->sc_opts;
                return 0;

        case FDIOCSETOPTS:                /* set drive options */
                fd->sc_opts = *(int *)addr;
                return 0;

        default:
                return ENOTTY;
        }

#ifdef DIAGNOSTIC
        panic("fdioctl: impossible");
#endif
}

int
fdformat(dev_t dev, struct ne7_fd_formb *finfo, struct lwp *l)
{
        int rv = 0;
        struct fd_softc *fd = 
            device_lookup_private(&fd_cd, FDUNIT(dev));
        struct fd_type *type = fd->sc_type;
        struct buf *bp;

        /* set up a buffer header for fdstrategy() */
        bp = getiobuf(NULL, false);
        if (bp == NULL)
                return ENOBUFS;

        bp->b_cflags = BC_BUSY;
        bp->b_flags = B_PHYS | B_FORMAT;
        bp->b_proc = l->l_proc;
        bp->b_dev = dev;

        /*
         * calculate a fake blkno, so fdstrategy() would initiate a
         * seek to the requested cylinder
         */
        bp->b_blkno = (finfo->cyl * (type->sectrac * type->heads)
                       + finfo->head * type->sectrac) * FDC_BSIZE / DEV_BSIZE;

        bp->b_bcount = sizeof(struct fd_idfield_data) * finfo->fd_formb_nsecs;
        bp->b_data = (void *)finfo;

#ifdef FD_DEBUG
        printf("fdformat: blkno %" PRIx64 " count %x\n",
            bp->b_blkno, bp->b_bcount);
#endif

        /* now do the format */
        fdstrategy(bp);

        /* ...and wait for it to complete */
        rv = biowait(bp);
        putiobuf(bp);
        return rv;
}

/*
 * Mountroot hook: prompt the user to enter the root file system
 * floppy.
 */
void
fd_mountroot_hook(device_t dev)
{
        int c;

        printf("Insert filesystem floppy and press return.");
        cnpollc(1);
        for (;;) {
                c = cngetc();
                if ((c == '\r') || (c == '\n')) {
                        printf("\n");
                        break;
                }
        }
        cnpollc(0);
}

static void
fd_set_geometry(struct fd_softc *fd)
{
        const struct fd_type *fdt;

        fdt = fd->sc_type;
        if (fdt == NULL) {
                fdt = fd->sc_deftype;
                if (fdt == NULL)
                        return;
        }

        struct disk_geom *dg = &fd->sc_dk.dk_geom;

        memset(dg, 0, sizeof(*dg));
        dg->dg_secperunit = fdt->size;
        dg->dg_nsectors = fdt->sectrac;
        switch (fdt->secsize) {
        case 2:
                dg->dg_secsize = 512;
                break;
        case 3:
                dg->dg_secsize = 1024;
                break;
        default:
                break;
        }
        dg->dg_ntracks = fdt->heads;
        dg->dg_ncylinders = fdt->cyls;
        disk_set_info(fd->sc_dev, &fd->sc_dk, NULL);
}



































































































































































    3 
    3 
















































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
/*        $NetBSD: umct.c,v 1.43 2021/08/07 16:19:17 thorpej Exp $        */
/*
 * Copyright (c) 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Ichiro FUKUHARA (ichiro@ichiro.org).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * MCT USB-RS232 Interface Controller
 * http://www.mct.com.tw/prod/rs232.html
 * http://www.dlink.com/products/usb/dsbs25
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: umct.c,v 1.43 2021/08/07 16:19:17 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/ioctl.h>
#include <sys/conf.h>
#include <sys/tty.h>
#include <sys/file.h>
#include <sys/select.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/device.h>
#include <sys/poll.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbcdc.h>

#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usb_quirks.h>

#include <dev/usb/ucomvar.h>
#include <dev/usb/umct.h>

#ifdef UMCT_DEBUG
#define DPRINTFN(n, x)  if (umctdebug > (n)) printf x
int        umctdebug = 0;
#else
#define DPRINTFN(n, x)
#endif
#define DPRINTF(x) DPRINTFN(0, x)

#define        UMCT_CONFIG_INDEX        0
#define        UMCT_IFACE_INDEX        0

struct        umct_softc {
        device_t                sc_dev;                /* base device */
        struct usbd_device *        sc_udev;        /* USB device */
        struct usbd_interface *        sc_iface;        /* interface */
        int                        sc_iface_number;        /* interface number */
        uint16_t                sc_product;

        int                        sc_intr_number;        /* interrupt number */
        struct usbd_pipe *        sc_intr_pipe;        /* interrupt pipe */
        u_char                        *sc_intr_buf;        /* interrupt buffer */
        int                        sc_isize;

        usb_cdc_line_state_t        sc_line_state;        /* current line state */
        u_char                        sc_dtr;                /* current DTR state */
        u_char                        sc_rts;                /* current RTS state */
        u_char                        sc_break;        /* set break */

        u_char                        sc_status;

        device_t                sc_subdev;        /* ucom device */

        bool                        sc_dying;        /* disconnecting */

        u_char                        sc_lsr;                /* Local status register */
        u_char                        sc_msr;                /* umct status register */

        u_int                        last_lcr;        /* keep lcr register */
};

/*
 * These are the maximum number of bytes transferred per frame.
 * The output buffer size cannot be increased due to the size encoding.
 */
#define UMCTIBUFSIZE 256
#define UMCTOBUFSIZE 256

static        void umct_init(struct umct_softc *);
static        void umct_set_baudrate(struct umct_softc *, u_int);
static        void umct_set_lcr(struct umct_softc *, u_int);
static        void umct_intr(struct usbd_xfer *, void *, usbd_status);

static        void umct_set(void *, int, int, int);
static        void umct_dtr(struct umct_softc *, int);
static        void umct_rts(struct umct_softc *, int);
static        void umct_break(struct umct_softc *, int);
static        void umct_set_line_state(struct umct_softc *);
static        void umct_get_status(void *, int, u_char *, u_char *);
static        int  umct_param(void *, int, struct termios *);
static        int  umct_open(void *, int);
static        void umct_close(void *, int);

static const struct        ucom_methods umct_methods = {
        .ucom_get_status = umct_get_status,
        .ucom_set = umct_set,
        .ucom_param = umct_param,
        .ucom_open = umct_open,
        .ucom_close = umct_close,
};

static const struct usb_devno umct_devs[] = {
        /* MCT USB-232 Interface Products */
        { USB_VENDOR_MCT, USB_PRODUCT_MCT_USB232 },
        /* Sitecom USB-232 Products */
        { USB_VENDOR_MCT, USB_PRODUCT_MCT_SITECOM_USB232 },
        /* D-Link DU-H3SP USB BAY Hub Products */
        { USB_VENDOR_MCT, USB_PRODUCT_MCT_DU_H3SP_USB232 },
        /* BELKIN F5U109 */
        { USB_VENDOR_BELKIN, USB_PRODUCT_BELKIN_F5U109 },
};
#define umct_lookup(v, p) usb_lookup(umct_devs, v, p)

static int        umct_match(device_t, cfdata_t, void *);
static void        umct_attach(device_t, device_t, void *);
static void        umct_childdet(device_t, device_t);
static int        umct_detach(device_t, int);

CFATTACH_DECL2_NEW(umct, sizeof(struct umct_softc), umct_match,
    umct_attach, umct_detach, NULL, NULL, umct_childdet);

static int
umct_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return umct_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
umct_attach(device_t parent, device_t self, void *aux)
{
        struct umct_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        usb_config_descriptor_t *cdesc;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;

        char *devinfop;
        usbd_status err;
        int i;
        struct ucom_attach_args ucaa;

        sc->sc_dev = self;
        sc->sc_dying = false;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        sc->sc_udev = dev;
        sc->sc_product = uaa->uaa_product;

        DPRINTF(("\n\numct attach: sc=%p\n", sc));

        /* initialize endpoints */
        ucaa.ucaa_bulkin = ucaa.ucaa_bulkout = -1;
        sc->sc_intr_number = -1;
        sc->sc_intr_pipe = NULL;

        /* Move the device into the configured state. */
        err = usbd_set_config_index(dev, UMCT_CONFIG_INDEX, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration, err=%s\n",
                    usbd_errstr(err));
                sc->sc_dying = true;
                return;
        }

        /* get the config descriptor */
        cdesc = usbd_get_config_descriptor(sc->sc_udev);

        if (cdesc == NULL) {
                aprint_error_dev(self,
                    "failed to get configuration descriptor\n");
                sc->sc_dying = true;
                return;
        }

        /* get the interface */
        err = usbd_device2interface_handle(dev, UMCT_IFACE_INDEX,
                                                        &sc->sc_iface);
        if (err) {
                aprint_error_dev(self, "failed to get interface, err=%s\n",
                    usbd_errstr(err));
                sc->sc_dying = true;
                return;
        }

        /* Find the bulk{in,out} and interrupt endpoints */

        id = usbd_get_interface_descriptor(sc->sc_iface);
        sc->sc_iface_number = id->bInterfaceNumber;

        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "no endpoint descriptor for %d\n", i);
                        sc->sc_dying = true;
                        return;
                }

                /*
                 * The Bulkin endpoint is marked as an interrupt. Since
                 * we can't rely on the endpoint descriptor order, we'll
                 * check the wMaxPacketSize field to differentiate.
                 */
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT &&
                    UGETW(ed->wMaxPacketSize) != 0x2) {
                        ucaa.ucaa_bulkin = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        ucaa.ucaa_bulkout = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        sc->sc_intr_number = ed->bEndpointAddress;
                        sc->sc_isize = UGETW(ed->wMaxPacketSize);
                }
        }

        if (ucaa.ucaa_bulkin == -1) {
                aprint_error_dev(self, "Could not find data bulk in\n");
                sc->sc_dying = true;
                return;
        }

        if (ucaa.ucaa_bulkout == -1) {
                aprint_error_dev(self, "Could not find data bulk out\n");
                sc->sc_dying = true;
                return;
        }

        if (sc->sc_intr_number == -1) {
                aprint_error_dev(self, "Could not find interrupt in\n");
                sc->sc_dying = true;
                return;
        }

        sc->sc_dtr = sc->sc_rts = 0;
        ucaa.ucaa_portno = UCOM_UNK_PORTNO;
        /* ucaa_bulkin, ucaa_bulkout set above */
        ucaa.ucaa_ibufsize = UMCTIBUFSIZE;
        if (sc->sc_product == USB_PRODUCT_MCT_SITECOM_USB232)
                ucaa.ucaa_obufsize = 16; /* device is broken */
        else
                ucaa.ucaa_obufsize = UMCTOBUFSIZE;
        ucaa.ucaa_ibufsizepad = UMCTIBUFSIZE;
        ucaa.ucaa_opkthdrlen = 0;
        ucaa.ucaa_device = dev;
        ucaa.ucaa_iface = sc->sc_iface;
        ucaa.ucaa_methods = &umct_methods;
        ucaa.ucaa_arg = sc;
        ucaa.ucaa_info = NULL;

        umct_init(sc);

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        DPRINTF(("umct: in=%#x out=%#x intr=%#x\n",
            ucaa.ucaa_bulkin, ucaa.ucaa_bulkout, sc->sc_intr_number));
        sc->sc_subdev = config_found(self, &ucaa, ucomprint,
            CFARGS(.submatch = ucomsubmatch));

        return;
}

static void
umct_childdet(device_t self, device_t child)
{
        struct umct_softc *sc = device_private(self);

        KASSERT(sc->sc_subdev == child);
        sc->sc_subdev = NULL;
}

static void
umct_close_pipe(struct umct_softc *sc)
{

        if (sc->sc_intr_pipe != NULL) {
                usbd_abort_pipe(sc->sc_intr_pipe);
                usbd_close_pipe(sc->sc_intr_pipe);
                sc->sc_intr_pipe = NULL;
        }
        if (sc->sc_intr_buf != NULL) {
                kmem_free(sc->sc_intr_buf, sc->sc_isize);
                sc->sc_intr_buf = NULL;
        }
}

static int
umct_detach(device_t self, int flags)
{
        struct umct_softc *sc = device_private(self);
        int rv = 0;

        DPRINTF(("umct_detach: sc=%p flags=%d\n", sc, flags));

        sc->sc_dying = true;

        umct_close_pipe(sc);

        if (sc->sc_subdev != NULL) {
                rv = config_detach(sc->sc_subdev, flags);
                sc->sc_subdev = NULL;
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return rv;
}

static void
umct_set_line_state(struct umct_softc *sc)
{
        usb_device_request_t req;
        uByte ls;

        ls = (sc->sc_dtr ? MCR_DTR : 0) |
             (sc->sc_rts ? MCR_RTS : 0);

        DPRINTF(("umct_set_line_state: DTR=%d,RTS=%d,ls=%02x\n",
                        sc->sc_dtr, sc->sc_rts, ls));

        req.bmRequestType = UMCT_SET_REQUEST;
        req.bRequest = REQ_SET_MCR;
        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_iface_number);
        USETW(req.wLength, LENGTH_SET_MCR);

        (void)usbd_do_request(sc->sc_udev, &req, &ls);
}

static void
umct_set(void *addr, int portno, int reg, int onoff)
{
        struct umct_softc *sc = addr;

        if (sc->sc_dying)
                return;

        switch (reg) {
        case UCOM_SET_DTR:
                umct_dtr(sc, onoff);
                break;
        case UCOM_SET_RTS:
                umct_rts(sc, onoff);
                break;
        case UCOM_SET_BREAK:
                umct_break(sc, onoff);
                break;
        default:
                break;
        }
}

void
umct_dtr(struct umct_softc *sc, int onoff)
{

        DPRINTF(("umct_dtr: onoff=%d\n", onoff));

        if (sc->sc_dtr == onoff)
                return;
        sc->sc_dtr = onoff;

        umct_set_line_state(sc);
}

void
umct_rts(struct umct_softc *sc, int onoff)
{
        DPRINTF(("umct_rts: onoff=%d\n", onoff));

        if (sc->sc_rts == onoff)
                return;
        sc->sc_rts = onoff;

        umct_set_line_state(sc);
}

void
umct_break(struct umct_softc *sc, int onoff)
{
        DPRINTF(("umct_break: onoff=%d\n", onoff));

        umct_set_lcr(sc, onoff ? sc->last_lcr | LCR_SET_BREAK :
                     sc->last_lcr);
}

void
umct_set_lcr(struct umct_softc *sc, u_int data)
{
        usb_device_request_t req;
        uByte adata;

        adata = data;
        req.bmRequestType = UMCT_SET_REQUEST;
        req.bRequest = REQ_SET_LCR;
        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_iface_number);
        USETW(req.wLength, LENGTH_SET_LCR);

        (void)usbd_do_request(sc->sc_udev, &req, &adata); /* XXX should check */
}

void
umct_set_baudrate(struct umct_softc *sc, u_int rate)
{
        usb_device_request_t req;
        uDWord arate;
        u_int val;

        if (sc->sc_product == USB_PRODUCT_MCT_SITECOM_USB232 ||
            sc->sc_product == USB_PRODUCT_BELKIN_F5U109) {
                switch (rate) {
                case    300: val = 0x01; break;
                case    600: val = 0x02; break;
                case   1200: val = 0x03; break;
                case   2400: val = 0x04; break;
                case   4800: val = 0x06; break;
                case   9600: val = 0x08; break;
                case  19200: val = 0x09; break;
                case  38400: val = 0x0a; break;
                case  57600: val = 0x0b; break;
                case 115200: val = 0x0c; break;
                default:     val = -1; break;
                }
        } else {
                val = UMCT_BAUD_RATE(rate);
        }
        USETDW(arate, val);

        req.bmRequestType = UMCT_SET_REQUEST;
        req.bRequest = REQ_SET_BAUD_RATE;
        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_iface_number);
        USETW(req.wLength, LENGTH_BAUD_RATE);

        (void)usbd_do_request(sc->sc_udev, &req, arate); /* XXX should check */
}

static void
umct_init(struct umct_softc *sc)
{
        umct_set_baudrate(sc, 9600);
        umct_set_lcr(sc, LCR_DATA_BITS_8 | LCR_PARITY_NONE | LCR_STOP_BITS_1);
}

static int
umct_param(void *addr, int portno, struct termios *t)
{
        struct umct_softc *sc = addr;
        u_int data = 0;

        DPRINTF(("umct_param: sc=%p BAUDRATE=%d\n", sc, t->c_ospeed));

        if (sc->sc_dying)
                return EIO;

        if (ISSET(t->c_cflag, CSTOPB))
                data |= LCR_STOP_BITS_2;
        else
                data |= LCR_STOP_BITS_1;
        if (ISSET(t->c_cflag, PARENB)) {
                if (ISSET(t->c_cflag, PARODD))
                        data |= LCR_PARITY_ODD;
                else
                        data |= LCR_PARITY_EVEN;
        } else
                data |= LCR_PARITY_NONE;
        switch (ISSET(t->c_cflag, CSIZE)) {
        case CS5:
                data |= LCR_DATA_BITS_5;
                break;
        case CS6:
                data |= LCR_DATA_BITS_6;
                break;
        case CS7:
                data |= LCR_DATA_BITS_7;
                break;
        case CS8:
                data |= LCR_DATA_BITS_8;
                break;
        }

        umct_set_baudrate(sc, t->c_ospeed);

        sc->last_lcr = data;
        umct_set_lcr(sc, data);

        return 0;
}

int
umct_open(void *addr, int portno)
{
        struct umct_softc *sc = addr;
        int err, lcr_data;

        DPRINTF(("umct_open: sc=%p\n", sc));

        if (sc->sc_dying)
                return EIO;

        /* initialize LCR */
        lcr_data = LCR_DATA_BITS_8 | LCR_PARITY_NONE |
            LCR_STOP_BITS_1;
        umct_set_lcr(sc, lcr_data);

        if (sc->sc_intr_number != -1 && sc->sc_intr_pipe == NULL) {
                sc->sc_status = 0; /* clear status bit */
                sc->sc_intr_buf = kmem_alloc(sc->sc_isize, KM_SLEEP);
                err = usbd_open_pipe_intr(sc->sc_iface, sc->sc_intr_number,
                        USBD_SHORT_XFER_OK, &sc->sc_intr_pipe, sc,
                        sc->sc_intr_buf, sc->sc_isize,
                        umct_intr, USBD_DEFAULT_INTERVAL);
                if (err) {
                        DPRINTF(("%s: cannot open interrupt pipe (addr %d)\n",
                                device_xname(sc->sc_dev), sc->sc_intr_number));
                        return EIO;
                }
        }

        return 0;
}

void
umct_close(void *addr, int portno)
{
        struct umct_softc *sc = addr;

        DPRINTF(("umct_close: close\n"));

        if (sc->sc_dying)
                return;
        umct_close_pipe(sc);
}

void
umct_intr(struct usbd_xfer *xfer, void *priv,
    usbd_status status)
{
        struct umct_softc *sc = priv;
        u_char *tbuf = sc->sc_intr_buf;
        u_char mstatus;

        if (sc->sc_dying)
                return;

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;

                DPRINTF(("%s: abnormal status: %s\n", device_xname(sc->sc_dev),
                        usbd_errstr(status)));
                usbd_clear_endpoint_stall_async(sc->sc_intr_pipe);
                return;
        }

        DPRINTF(("%s: umct status = MSR:%02x, LSR:%02x\n",
                 device_xname(sc->sc_dev), tbuf[0],tbuf[1]));

        sc->sc_lsr = sc->sc_msr = 0;
        mstatus = tbuf[0];
        if (ISSET(mstatus, MSR_DSR))
                sc->sc_msr |= UMSR_DSR;
        if (ISSET(mstatus, MSR_DCD))
                sc->sc_msr |= UMSR_DCD;
        ucom_status_change(device_private(sc->sc_subdev));
}

void
umct_get_status(void *addr, int portno, u_char *lsr, u_char *msr)
{
        struct umct_softc *sc = addr;

        DPRINTF(("umct_get_status:\n"));

        if (sc->sc_dying)
                return;

        *lsr = sc->sc_lsr;
        *msr = sc->sc_msr;
}



























































































































































































































































































































   10 




    7 




    6 




    3 


    2 

    2 

    1 




    1 
   10 



    1 




    1 




    1 


















    1 
    1 






















    2 



















    1 




















    1 


    2 

































    3 
    3 





    1 































    3 

































    3 









    3 
    3 



    1 







    1 




    3 









    3 

















    1 



    1 































































































































































    1 

    1 


















    1 








































































































    1 






    1 















    1 

























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
/*        $NetBSD: ip6_mroute.c,v 1.132 2020/06/12 11:04:45 roy Exp $        */
/*        $KAME: ip6_mroute.c,v 1.49 2001/07/25 09:21:18 jinmei Exp $        */

/*
 * Copyright (C) 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*        BSDI ip_mroute.c,v 2.10 1996/11/14 00:29:52 jch Exp        */

/*
 * Copyright (c) 1992, 1993
 *      The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Stephen Deering of Stanford University.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *      @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
 */

/*
 * Copyright (c) 1989 Stephen Deering
 *
 * This code is derived from software contributed to Berkeley by
 * Stephen Deering of Stanford University.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by the University of
 *      California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *      @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
 */

/*
 * IP multicast forwarding procedures
 *
 * Written by David Waitzman, BBN Labs, August 1988.
 * Modified by Steve Deering, Stanford, February 1989.
 * Modified by Mark J. Steiglitz, Stanford, May, 1991
 * Modified by Van Jacobson, LBL, January 1993
 * Modified by Ajit Thyagarajan, PARC, August 1993
 * Modified by Bill Fenner, PARC, April 1994
 *
 * MROUTING Revision: 3.5.1.2 + PIM-SMv2 (pimd) Support
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ip6_mroute.c,v 1.132 2020/06/12 11:04:45 roy Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_mrouting.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/ioctl.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>

#include <net/if.h>
#include <net/route.h>
#include <net/raw_cb.h>
#include <net/net_stats.h>

#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/icmp6.h>

#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/ip6_private.h>
#include <netinet6/ip6_mroute.h>
#include <netinet6/scope6_var.h>
#include <netinet6/pim6.h>
#include <netinet6/pim6_var.h>
#include <netinet6/nd6.h>

static int ip6_mdq(struct mbuf *, struct ifnet *, struct mf6c *);
static void phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *);

static int set_pim6(int *);
static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in6 *);
static int register_send(struct ip6_hdr *, struct mif6 *, struct mbuf *);

/*
 * Globals.  All but ip6_mrouter, ip6_mrtproto and mrt6stat could be static,
 * except for netstat or debugging purposes.
 */
struct socket  *ip6_mrouter = NULL;
int                ip6_mrouter_ver = 0;
int                ip6_mrtproto = IPPROTO_PIM;    /* for netstat only */
struct mrt6stat        mrt6stat;

#define NO_RTE_FOUND         0x1
#define RTE_FOUND        0x2

struct mf6c        *mf6ctable[MF6CTBLSIZ];
u_char                n6expire[MF6CTBLSIZ];
struct mif6 mif6table[MAXMIFS];
#ifdef MRT6DEBUG
u_int                mrt6debug = 0;          /* debug level         */
#define DEBUG_MFC        0x02
#define DEBUG_FORWARD        0x04
#define DEBUG_EXPIRE        0x08
#define DEBUG_XMIT        0x10
#define DEBUG_REG        0x20
#define DEBUG_PIM        0x40
#define __mrt6debugused     /* empty */
#else
#define __mrt6debugused     __unused
#endif

static void        expire_upcalls(void *);
#define        EXPIRE_TIMEOUT        (hz / 4)        /* 4x / second */
#define        UPCALL_EXPIRE        6                /* number of timeouts */

#ifdef INET
#ifdef MROUTING
extern struct socket *ip_mrouter;
#endif
#endif

/*
 * 'Interfaces' associated with decapsulator (so we can tell
 * packets that went through it from ones that get reflected
 * by a broken gateway).  These interfaces are never linked into
 * the system ifnet list & no routes point to them.  I.e., packets
 * can't be sent this way.  They only exist as a placeholder for
 * multicast source verification.
 */
struct ifnet multicast_register_if6;

#define ENCAP_HOPS 64

/*
 * Private variables.
 */
static mifi_t nummifs = 0;
static mifi_t reg_mif_num = (mifi_t)-1;

static percpu_t *pim6stat_percpu;

#define        PIM6_STATINC(x)                _NET_STATINC(pim6stat_percpu, x)

static int pim6;

/*
 * Hash function for a source, group entry
 */
#define MF6CHASH(a, g) MF6CHASHMOD((a).s6_addr32[0] ^ (a).s6_addr32[1] ^ \
                                   (a).s6_addr32[2] ^ (a).s6_addr32[3] ^ \
                                   (g).s6_addr32[0] ^ (g).s6_addr32[1] ^ \
                                   (g).s6_addr32[2] ^ (g).s6_addr32[3])

/*
 * Find a route for a given origin IPv6 address and Multicast group address.
 * Quality of service parameter to be added in the future!!!
 */

#define MF6CFIND(o, g, rt) do { \
        struct mf6c *_rt = mf6ctable[MF6CHASH(o,g)]; \
        rt = NULL; \
        mrt6stat.mrt6s_mfc_lookups++; \
        while (_rt) { \
                if (IN6_ARE_ADDR_EQUAL(&_rt->mf6c_origin.sin6_addr, &(o)) && \
                    IN6_ARE_ADDR_EQUAL(&_rt->mf6c_mcastgrp.sin6_addr, &(g)) && \
                    (_rt->mf6c_stall == NULL)) { \
                        rt = _rt; \
                        break; \
                } \
                _rt = _rt->mf6c_next; \
        } \
        if (rt == NULL) { \
                mrt6stat.mrt6s_mfc_misses++; \
        } \
} while (/*CONSTCOND*/ 0)

/*
 * Macros to compute elapsed time efficiently
 * Borrowed from Van Jacobson's scheduling code
 */
#define TV_DELTA(a, b, delta) do { \
            int xxs; \
                \
            delta = (a).tv_usec - (b).tv_usec; \
            if ((xxs = (a).tv_sec - (b).tv_sec)) { \
               switch (xxs) { \
                      case 2: \
                          delta += 1000000; \
                              /* FALLTHROUGH */ \
                      case 1: \
                          delta += 1000000; \
                          break; \
                      default: \
                          delta += (1000000 * xxs); \
               } \
            } \
} while (/*CONSTCOND*/ 0)

#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
              (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)

#ifdef UPCALL_TIMING
#define UPCALL_MAX        50
u_long upcall_data[UPCALL_MAX + 1];
static void collate();
#endif /* UPCALL_TIMING */

static int get_sg_cnt(struct sioc_sg_req6 *);
static int get_mif6_cnt(struct sioc_mif_req6 *);
static int ip6_mrouter_init(struct socket *, int, int);
static int add_m6if(struct mif6ctl *);
static int del_m6if(mifi_t *);
static int add_m6fc(struct mf6cctl *);
static int del_m6fc(struct mf6cctl *);
static void sysctl_net_inet6_pim6_setup(struct sysctllog **);

static callout_t expire_upcalls_ch;

void
pim6_init(void)
{

        sysctl_net_inet6_pim6_setup(NULL);
        pim6stat_percpu = percpu_alloc(sizeof(uint64_t) * PIM6_NSTATS);
}

/*
 * Handle MRT setsockopt commands to modify the multicast routing tables.
 */
int
ip6_mrouter_set(struct socket *so, struct sockopt *sopt)
{
        int error, optval;
        struct mif6ctl mifc;
        struct mf6cctl mfcc;
        mifi_t mifi;

        if (sopt->sopt_name != MRT6_INIT && so != ip6_mrouter)
                return (EACCES);

        error = 0;

        switch (sopt->sopt_name) {
#ifdef MRT6_OINIT
        case MRT6_OINIT:
#endif
        case MRT6_INIT:
                error = sockopt_getint(sopt, &optval);
                if (error)
                        break;
                return (ip6_mrouter_init(so, optval, sopt->sopt_name));
        case MRT6_DONE:
                return (ip6_mrouter_done());
        case MRT6_ADD_MIF:
                error = sockopt_get(sopt, &mifc, sizeof(mifc));
                if (error)
                        break;
                return (add_m6if(&mifc));
        case MRT6_DEL_MIF:
                error = sockopt_get(sopt, &mifi, sizeof(mifi));
                if (error)
                        break;
                return (del_m6if(&mifi));
        case MRT6_ADD_MFC:
                error = sockopt_get(sopt, &mfcc, sizeof(mfcc));
                if (error)
                        break;
                return (add_m6fc(&mfcc));
        case MRT6_DEL_MFC:
                error = sockopt_get(sopt, &mfcc, sizeof(mfcc));
                if (error)
                        break;
                return (del_m6fc(&mfcc));
        case MRT6_PIM:
                error = sockopt_getint(sopt, &optval);
                if (error)
                        break;
                return (set_pim6(&optval));
        default:
                error = EOPNOTSUPP;
        }

        return (error);
}

/*
 * Handle MRT getsockopt commands
 */
int
ip6_mrouter_get(struct socket *so, struct sockopt *sopt)
{
        int error;

        if (so != ip6_mrouter)
                return EACCES;

        error = 0;

        switch (sopt->sopt_name) {
        case MRT6_PIM:
                error = sockopt_set(sopt, &pim6, sizeof(pim6));
                break;
        default:
                error = EOPNOTSUPP;
                break;
        }

        return (error);
}

/*
 * Handle ioctl commands to obtain information from the cache
 */
int
mrt6_ioctl(u_long cmd, void *data)
{

        switch (cmd) {
        case SIOCGETSGCNT_IN6:
                return (get_sg_cnt((struct sioc_sg_req6 *)data));
        case SIOCGETMIFCNT_IN6:
                return (get_mif6_cnt((struct sioc_mif_req6 *)data));
        default:
                return (EINVAL);
        }
}

/*
 * returns the packet, byte, rpf-failure count for the source group provided
 */
static int
get_sg_cnt(struct sioc_sg_req6 *req)
{
        struct mf6c *rt;
        int s;

        s = splsoftnet();
        MF6CFIND(req->src.sin6_addr, req->grp.sin6_addr, rt);
        splx(s);
        if (rt != NULL) {
                req->pktcnt = rt->mf6c_pkt_cnt;
                req->bytecnt = rt->mf6c_byte_cnt;
                req->wrong_if = rt->mf6c_wrong_if;
        } else
                return (ESRCH);
#if 0
                req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
#endif

        return 0;
}

/*
 * returns the input and output packet and byte counts on the mif provided
 */
static int
get_mif6_cnt(struct sioc_mif_req6 *req)
{
        mifi_t mifi = req->mifi;

        if (mifi >= nummifs)
                return EINVAL;

        req->icount = mif6table[mifi].m6_pkt_in;
        req->ocount = mif6table[mifi].m6_pkt_out;
        req->ibytes = mif6table[mifi].m6_bytes_in;
        req->obytes = mif6table[mifi].m6_bytes_out;

        return 0;
}

static int
set_pim6(int *i)
{
        if ((*i != 1) && (*i != 0))
                return EINVAL;

        pim6 = *i;

        return 0;
}

/*
 * Enable multicast routing
 */
static int
ip6_mrouter_init(struct socket *so, int v, int cmd)
{
#ifdef MRT6DEBUG
        if (mrt6debug)
                log(LOG_DEBUG,
                    "ip6_mrouter_init: so_type = %d, pr_protocol = %d\n",
                    so->so_type, so->so_proto->pr_protocol);
#endif

        if (so->so_type != SOCK_RAW ||
            so->so_proto->pr_protocol != IPPROTO_ICMPV6)
                return EOPNOTSUPP;

        if (v != 1)
                return ENOPROTOOPT;

        if (ip6_mrouter != NULL)
                return EADDRINUSE;

        ip6_mrouter = so;
        ip6_mrouter_ver = cmd;

        memset((void *)mf6ctable, 0, sizeof(mf6ctable));
        memset((void *)n6expire, 0, sizeof(n6expire));

        pim6 = 0;/* used for stubbing out/in pim stuff */

        callout_init(&expire_upcalls_ch, CALLOUT_MPSAFE);
        callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
            expire_upcalls, NULL);

#ifdef MRT6DEBUG
        if (mrt6debug)
                log(LOG_DEBUG, "ip6_mrouter_init\n");
#endif

        return 0;
}

/*
 * Disable multicast routing
 */
int
ip6_mrouter_done(void)
{
        mifi_t mifi;
        int i;
        struct ifnet *ifp;
        struct sockaddr_in6 sin6;
        struct mf6c *rt;
        struct rtdetq *rte;
        int s;

        s = splsoftnet();

        /*
         * For each phyint in use, disable promiscuous reception of all IPv6
         * multicasts.
         */
#ifdef INET
#ifdef MROUTING
        /*
         * If there is still IPv4 multicast routing daemon,
         * we remain interfaces to receive all muliticasted packets.
         * XXX: there may be an interface in which the IPv4 multicast
         * daemon is not interested...
         */
        if (!ip_mrouter)
#endif
#endif
        {
                for (mifi = 0; mifi < nummifs; mifi++) {
                        if (mif6table[mifi].m6_ifp &&
                            !(mif6table[mifi].m6_flags & MIFF_REGISTER)) {
                                ifp = mif6table[mifi].m6_ifp;
                                sockaddr_in6_init(&sin6, &in6addr_any, 0, 0, 0);
                                if_mcast_op(ifp, SIOCDELMULTI,
                                    sin6tocsa(&sin6));
                        }
                }
        }

        memset((void *)mif6table, 0, sizeof(mif6table));
        nummifs = 0;

        pim6 = 0; /* used to stub out/in pim specific code */

        callout_stop(&expire_upcalls_ch);

        /*
         * Free all multicast forwarding cache entries.
         */
        for (i = 0; i < MF6CTBLSIZ; i++) {
                rt = mf6ctable[i];
                while (rt) {
                        struct mf6c *frt;

                        for (rte = rt->mf6c_stall; rte != NULL; ) {
                                struct rtdetq *n = rte->next;

                                m_freem(rte->m);
                                free(rte, M_MRTABLE);
                                rte = n;
                        }
                        frt = rt;
                        rt = rt->mf6c_next;
                        free(frt, M_MRTABLE);
                }
        }

        memset((void *)mf6ctable, 0, sizeof(mf6ctable));

        /*
         * Reset register interface
         */
        if (reg_mif_num != (mifi_t)-1) {
                if_detach(&multicast_register_if6);
                reg_mif_num = (mifi_t)-1;
        }

        ip6_mrouter = NULL;
        ip6_mrouter_ver = 0;

        splx(s);

#ifdef MRT6DEBUG
        if (mrt6debug)
                log(LOG_DEBUG, "ip6_mrouter_done\n");
#endif

        return 0;
}

void
ip6_mrouter_detach(struct ifnet *ifp)
{
        struct rtdetq *rte;
        struct mf6c *mfc;
        mifi_t mifi;
        int i;

        if (ip6_mrouter == NULL)
                return;

        /*
         * Delete a mif which points to ifp.
         */
        for (mifi = 0; mifi < nummifs; mifi++)
                if (mif6table[mifi].m6_ifp == ifp)
                        del_m6if(&mifi);

        /*
         * Clear rte->ifp of cache entries received on ifp.
         */
        for (i = 0; i < MF6CTBLSIZ; i++) {
                if (n6expire[i] == 0)
                        continue;

                for (mfc = mf6ctable[i]; mfc != NULL; mfc = mfc->mf6c_next) {
                        for (rte = mfc->mf6c_stall; rte != NULL; rte = rte->next) {
                                if (rte->ifp == ifp)
                                        rte->ifp = NULL;
                        }
                }
        }
}

/*
 * Add a mif to the mif table
 */
static int
add_m6if(struct mif6ctl *mifcp)
{
        struct mif6 *mifp;
        struct ifnet *ifp;
        struct sockaddr_in6 sin6;
        int error, s;

        if (mifcp->mif6c_mifi >= MAXMIFS)
                return EINVAL;
        mifp = mif6table + mifcp->mif6c_mifi;
        if (mifp->m6_ifp)
                return EADDRINUSE; /* XXX: is it appropriate? */
        if (!mifcp->mif6c_pifi || (ifp = if_byindex(mifcp->mif6c_pifi)) == NULL)
                return ENXIO;

        if (mifcp->mif6c_flags & MIFF_REGISTER) {
                ifp = &multicast_register_if6;

                if (reg_mif_num == (mifi_t)-1) {
                        strlcpy(ifp->if_xname, "register_mif",
                            sizeof(ifp->if_xname));
                        ifp->if_flags |= IFF_LOOPBACK;
                        ifp->if_index = mifcp->mif6c_mifi;
                        reg_mif_num = mifcp->mif6c_mifi;
                        if_attach(ifp);
                }
        } else {
                /* Make sure the interface supports multicast */
                if ((ifp->if_flags & IFF_MULTICAST) == 0)
                        return EOPNOTSUPP;

                s = splsoftnet();
                /*
                 * Enable promiscuous reception of all IPv6 multicasts
                 * from the interface.
                 */
                sockaddr_in6_init(&sin6, &in6addr_any, 0, 0, 0);
                error = if_mcast_op(ifp, SIOCADDMULTI, sin6tosa(&sin6));
                splx(s);
                if (error)
                        return error;
        }

        s = splsoftnet();
        mifp->m6_flags     = mifcp->mif6c_flags;
        mifp->m6_ifp       = ifp;
        /* initialize per mif pkt counters */
        mifp->m6_pkt_in    = 0;
        mifp->m6_pkt_out   = 0;
        mifp->m6_bytes_in  = 0;
        mifp->m6_bytes_out = 0;
        splx(s);

        /* Adjust nummifs up if the mifi is higher than nummifs */
        if (nummifs <= mifcp->mif6c_mifi)
                nummifs = mifcp->mif6c_mifi + 1;

#ifdef MRT6DEBUG
        if (mrt6debug)
                log(LOG_DEBUG,
                    "add_mif #%d, phyint %s\n",
                    mifcp->mif6c_mifi, ifp->if_xname);
#endif

        return 0;
}

/*
 * Delete a mif from the mif table
 */
static int
del_m6if(mifi_t *mifip)
{
        struct mif6 *mifp = mif6table + *mifip;
        mifi_t mifi;
        struct ifnet *ifp;
        struct sockaddr_in6 sin6;
        int s;

        if (*mifip >= nummifs)
                return EINVAL;
        if (mifp->m6_ifp == NULL)
                return EINVAL;

        s = splsoftnet();

        if (!(mifp->m6_flags & MIFF_REGISTER)) {
                /*
                 * XXX: what if there is yet IPv4 multicast daemon
                 *      using the interface?
                 */
                ifp = mifp->m6_ifp;

                sockaddr_in6_init(&sin6, &in6addr_any, 0, 0, 0);
                if_mcast_op(ifp, SIOCDELMULTI, sin6tosa(&sin6));
        } else {
                if (reg_mif_num != (mifi_t)-1) {
                        if_detach(&multicast_register_if6);
                        reg_mif_num = (mifi_t)-1;
                }
        }

        memset((void *)mifp, 0, sizeof (*mifp));

        /* Adjust nummifs down */
        for (mifi = nummifs; mifi > 0; mifi--)
                if (mif6table[mifi - 1].m6_ifp)
                        break;
        nummifs = mifi;

        splx(s);

#ifdef MRT6DEBUG
        if (mrt6debug)
                log(LOG_DEBUG, "del_m6if %d, nummifs %d\n", *mifip, nummifs);
#endif

        return 0;
}

/*
 * Add an mfc entry
 */
static int
add_m6fc(struct mf6cctl *mfccp)
{
        struct mf6c *rt;
        u_long hash;
        struct rtdetq *rte;
        u_short nstl;
        int s;
        char ip6bufo[INET6_ADDRSTRLEN], ip6bufm[INET6_ADDRSTRLEN];

        MF6CFIND(mfccp->mf6cc_origin.sin6_addr,
                 mfccp->mf6cc_mcastgrp.sin6_addr, rt);

        /* If an entry already exists, just update the fields */
        if (rt) {
#ifdef MRT6DEBUG
                if (mrt6debug & DEBUG_MFC)
                        log(LOG_DEBUG,"add_m6fc update o %s g %s p %x\n",
                            IN6_PRINT(ip6bufo,
                            &mfccp->mf6cc_origin.sin6_addr),
                            IN6_PRINT(ip6bufm,
                            &mfccp->mf6cc_mcastgrp.sin6_addr),
                            mfccp->mf6cc_parent);
#endif

                s = splsoftnet();
                rt->mf6c_parent = mfccp->mf6cc_parent;
                rt->mf6c_ifset = mfccp->mf6cc_ifset;
                splx(s);
                return 0;
        }

        /*
         * Find the entry for which the upcall was made and update
         */
        s = splsoftnet();
        hash = MF6CHASH(mfccp->mf6cc_origin.sin6_addr,
                        mfccp->mf6cc_mcastgrp.sin6_addr);
        for (rt = mf6ctable[hash], nstl = 0; rt; rt = rt->mf6c_next) {
                if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr,
                                       &mfccp->mf6cc_origin.sin6_addr) &&
                    IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr,
                                       &mfccp->mf6cc_mcastgrp.sin6_addr) &&
                    (rt->mf6c_stall != NULL)) {

                        if (nstl++)
                                log(LOG_ERR,
                                    "add_m6fc: %s o %s g %s p %x dbx %p\n",
                                    "multiple kernel entries",
                                    IN6_PRINT(ip6bufo,
                                    &mfccp->mf6cc_origin.sin6_addr),
                                    IN6_PRINT(ip6bufm,
                                    &mfccp->mf6cc_mcastgrp.sin6_addr),
                                    mfccp->mf6cc_parent, rt->mf6c_stall);

#ifdef MRT6DEBUG
                        if (mrt6debug & DEBUG_MFC)
                                log(LOG_DEBUG,
                                    "add_m6fc o %s g %s p %x dbg %p\n",
                                    IN6_PRINT(ip6bufo,
                                    &mfccp->mf6cc_origin.sin6_addr),
                                    IN6_PRINT(ip6bufm,
                                    &mfccp->mf6cc_mcastgrp.sin6_addr),
                                    mfccp->mf6cc_parent, rt->mf6c_stall);
#endif

                        rt->mf6c_origin     = mfccp->mf6cc_origin;
                        rt->mf6c_mcastgrp   = mfccp->mf6cc_mcastgrp;
                        rt->mf6c_parent     = mfccp->mf6cc_parent;
                        rt->mf6c_ifset            = mfccp->mf6cc_ifset;
                        /* initialize pkt counters per src-grp */
                        rt->mf6c_pkt_cnt    = 0;
                        rt->mf6c_byte_cnt   = 0;
                        rt->mf6c_wrong_if   = 0;

                        rt->mf6c_expire = 0;        /* Don't clean this guy up */
                        n6expire[hash]--;

                        /* free packets Qed at the end of this entry */
                        for (rte = rt->mf6c_stall; rte != NULL; ) {
                                struct rtdetq *n = rte->next;
                                if (rte->ifp) {
                                        ip6_mdq(rte->m, rte->ifp, rt);
                                }
                                m_freem(rte->m);
#ifdef UPCALL_TIMING
                                collate(&(rte->t));
#endif
                                free(rte, M_MRTABLE);
                                rte = n;
                        }
                        rt->mf6c_stall = NULL;
                }
        }

        /*
         * It is possible that an entry is being inserted without an upcall
         */
        if (nstl == 0) {
#ifdef MRT6DEBUG
                if (mrt6debug & DEBUG_MFC)
                        log(LOG_DEBUG,
                            "add_mfc no upcall h %ld o %s g %s p %x\n",
                            hash,
                            IN6_PRINT(ip6bufo,
                            &mfccp->mf6cc_origin.sin6_addr),
                            IN6_PRINT(ip6bufm,
                            &mfccp->mf6cc_mcastgrp.sin6_addr),
                            mfccp->mf6cc_parent);
#endif

                for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {

                        if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr,
                                               &mfccp->mf6cc_origin.sin6_addr)&&
                            IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr,
                                               &mfccp->mf6cc_mcastgrp.sin6_addr)) {

                                rt->mf6c_origin     = mfccp->mf6cc_origin;
                                rt->mf6c_mcastgrp   = mfccp->mf6cc_mcastgrp;
                                rt->mf6c_parent     = mfccp->mf6cc_parent;
                                rt->mf6c_ifset            = mfccp->mf6cc_ifset;
                                /* initialize pkt counters per src-grp */
                                rt->mf6c_pkt_cnt    = 0;
                                rt->mf6c_byte_cnt   = 0;
                                rt->mf6c_wrong_if   = 0;

                                if (rt->mf6c_expire)
                                        n6expire[hash]--;
                                rt->mf6c_expire           = 0;
                        }
                }
                if (rt == NULL) {
                        /* no upcall, so make a new entry */
                        rt = malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
                        if (rt == NULL) {
                                splx(s);
                                return ENOBUFS;
                        }

                        /* insert new entry at head of hash chain */
                        rt->mf6c_origin     = mfccp->mf6cc_origin;
                        rt->mf6c_mcastgrp   = mfccp->mf6cc_mcastgrp;
                        rt->mf6c_parent     = mfccp->mf6cc_parent;
                        rt->mf6c_ifset            = mfccp->mf6cc_ifset;
                        /* initialize pkt counters per src-grp */
                        rt->mf6c_pkt_cnt    = 0;
                        rt->mf6c_byte_cnt   = 0;
                        rt->mf6c_wrong_if   = 0;
                        rt->mf6c_expire     = 0;
                        rt->mf6c_stall = NULL;

                        /* link into table */
                        rt->mf6c_next  = mf6ctable[hash];
                        mf6ctable[hash] = rt;
                }
        }
        splx(s);
        return 0;
}

#ifdef UPCALL_TIMING
/*
 * collect delay statistics on the upcalls
 */
static void
collate(struct timeval *t)
{
        u_long d;
        struct timeval tp;
        u_long delta;

        GET_TIME(tp);

        if (TV_LT(*t, tp))
        {
                TV_DELTA(tp, *t, delta);

                d = delta >> 10;
                if (d > UPCALL_MAX)
                        d = UPCALL_MAX;

                ++upcall_data[d];
        }
}
#endif /* UPCALL_TIMING */

/*
 * Delete an mfc entry
 */
static int
del_m6fc(struct mf6cctl *mfccp)
{
        struct sockaddr_in6         origin;
        struct sockaddr_in6         mcastgrp;
        struct mf6c                 *rt;
        struct mf6c                 **nptr;
        u_long                 hash;
        int s;

        origin = mfccp->mf6cc_origin;
        mcastgrp = mfccp->mf6cc_mcastgrp;
        hash = MF6CHASH(origin.sin6_addr, mcastgrp.sin6_addr);

#ifdef MRT6DEBUG
        if (mrt6debug & DEBUG_MFC) {
                char ip6bufo[INET6_ADDRSTRLEN], ip6bufm[INET6_ADDRSTRLEN];
                log(LOG_DEBUG,"del_m6fc orig %s mcastgrp %s\n",
                    IN6_PRINT(ip6bufo, &origin.sin6_addr),
                    IN6_PRINT(ip6bufm, &mcastgrp.sin6_addr));
        }
#endif

        s = splsoftnet();

        nptr = &mf6ctable[hash];
        while ((rt = *nptr) != NULL) {
                if (IN6_ARE_ADDR_EQUAL(&origin.sin6_addr,
                                       &rt->mf6c_origin.sin6_addr) &&
                    IN6_ARE_ADDR_EQUAL(&mcastgrp.sin6_addr,
                                       &rt->mf6c_mcastgrp.sin6_addr) &&
                    rt->mf6c_stall == NULL)
                        break;

                nptr = &rt->mf6c_next;
        }
        if (rt == NULL) {
                splx(s);
                return EADDRNOTAVAIL;
        }

        *nptr = rt->mf6c_next;
        free(rt, M_MRTABLE);

        splx(s);

        return 0;
}

static int
socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in6 *src)
{
        if (s) {
                if (sbappendaddr(&s->so_rcv, sin6tosa(src), mm, NULL) != 0) {
                        sorwakeup(s);
                        return 0;
                }
                soroverflow(s);
        }
        m_freem(mm);
        return -1;
}

/*
 * IPv6 multicast forwarding function. This function assumes that the packet
 * pointed to by "ip6" has arrived on (or is about to be sent to) the interface
 * pointed to by "ifp", and the packet is to be relayed to other networks
 * that have members of the packet's destination IPv6 multicast group.
 *
 * The packet is returned unscathed to the caller, unless it is
 * erroneous, in which case a non-zero return value tells the caller to
 * discard it.
 */
int
ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
{
        struct mf6c *rt;
        struct mif6 *mifp;
        struct mbuf *mm;
        int s;
        mifi_t mifi;
        struct sockaddr_in6 sin6;
        char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];

#ifdef MRT6DEBUG
        if (mrt6debug & DEBUG_FORWARD)
                log(LOG_DEBUG, "ip6_mforward: src %s, dst %s, ifindex %d\n",
                    IN6_PRINT(ip6bufs, &ip6->ip6_src),
                    IN6_PRINT(ip6bufd, &ip6->ip6_dst),
                    ifp->if_index);
#endif

        /*
         * Don't forward a packet with Hop limit of zero or one,
         * or a packet destined to a local-only group.
         */
        if (ip6->ip6_hlim <= 1 || IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst) ||
            IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst))
                return 0;
        ip6->ip6_hlim--;

        /*
         * Source address check: do not forward packets with unspecified
         * source. It was discussed in July 2000, on ipngwg mailing list.
         * This is rather more serious than unicast cases, because some
         * MLD packets can be sent with the unspecified source address
         * (although such packets must normally set the hop limit field to 1).
         */
        if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
                IP6_STATINC(IP6_STAT_CANTFORWARD);
                if (ip6_log_time + ip6_log_interval < time_uptime) {
                        ip6_log_time = time_uptime;
                        log(LOG_DEBUG,
                            "cannot forward "
                            "from %s to %s nxt %d received on %s\n",
                            IN6_PRINT(ip6bufs, &ip6->ip6_src),
                            IN6_PRINT(ip6bufd, &ip6->ip6_dst),
                            ip6->ip6_nxt,
                            m->m_pkthdr.rcvif_index ?
                            if_name(m_get_rcvif_NOMPSAFE(m)) : "?");
                }
                return 0;
        }

        /*
         * Determine forwarding mifs from the forwarding cache table
         */
        s = splsoftnet();
        MF6CFIND(ip6->ip6_src, ip6->ip6_dst, rt);

        /* Entry exists, so forward if necessary */
        if (rt) {
                splx(s);
                return ip6_mdq(m, ifp, rt);
        } else {
                /*
                 * If we don't have a route for packet's origin, make a copy
                 * of the packet and send message to routing daemon.
                 */

                struct mbuf *mb0;
                struct rtdetq *rte;
                u_long hash;

#ifdef UPCALL_TIMING
                struct timeval tp;
                GET_TIME(tp);
#endif

                mrt6stat.mrt6s_no_route++;
#ifdef MRT6DEBUG
                if (mrt6debug & (DEBUG_FORWARD | DEBUG_MFC))
                        log(LOG_DEBUG, "ip6_mforward: no rte s %s g %s\n",
                            IN6_PRINT(ip6bufs, &ip6->ip6_src),
                            IN6_PRINT(ip6bufd, &ip6->ip6_dst));
#endif

                /*
                 * Allocate mbufs early so that we don't do extra work if we
                 * are just going to fail anyway.
                 */
                rte = malloc(sizeof(*rte), M_MRTABLE, M_NOWAIT);
                if (rte == NULL) {
                        splx(s);
                        return ENOBUFS;
                }
                mb0 = m_copypacket(m, M_DONTWAIT);

                /*
                 * Pullup packet header if needed before storing it,
                 * as other references may modify it in the meantime.
                 */
                if (mb0 && M_UNWRITABLE(mb0, sizeof(struct ip6_hdr)))
                        mb0 = m_pullup(mb0, sizeof(struct ip6_hdr));
                if (mb0 == NULL) {
                        free(rte, M_MRTABLE);
                        splx(s);
                        return ENOBUFS;
                }

                /* is there an upcall waiting for this packet? */
                hash = MF6CHASH(ip6->ip6_src, ip6->ip6_dst);
                for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
                        if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src,
                                               &rt->mf6c_origin.sin6_addr) &&
                            IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
                                               &rt->mf6c_mcastgrp.sin6_addr) &&
                            (rt->mf6c_stall != NULL))
                                break;
                }

                if (rt == NULL) {
                        struct mrt6msg *im;
                        struct omrt6msg *oim;

                        /* no upcall, so make a new entry */
                        rt = malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
                        if (rt == NULL) {
                                free(rte, M_MRTABLE);
                                m_freem(mb0);
                                splx(s);
                                return ENOBUFS;
                        }

                        /*
                         * Make a copy of the header to send to the user
                         * level process
                         */
                        mm = m_copym(mb0, 0, sizeof(struct ip6_hdr), M_DONTWAIT);

                        if (mm == NULL) {
                                free(rte, M_MRTABLE);
                                m_freem(mb0);
                                free(rt, M_MRTABLE);
                                splx(s);
                                return ENOBUFS;
                        }

                        /*
                         * Send message to routing daemon
                         */
                        sockaddr_in6_init(&sin6, &ip6->ip6_src, 0, 0, 0);

                        im = NULL;
                        oim = NULL;
                        switch (ip6_mrouter_ver) {
                        case MRT6_OINIT:
                                oim = mtod(mm, struct omrt6msg *);
                                oim->im6_msgtype = MRT6MSG_NOCACHE;
                                oim->im6_mbz = 0;
                                break;
                        case MRT6_INIT:
                                im = mtod(mm, struct mrt6msg *);
                                im->im6_msgtype = MRT6MSG_NOCACHE;
                                im->im6_mbz = 0;
                                break;
                        default:
                                free(rte, M_MRTABLE);
                                m_freem(mb0);
                                free(rt, M_MRTABLE);
                                splx(s);
                                return EINVAL;
                        }

#ifdef MRT6DEBUG
                        if (mrt6debug & DEBUG_FORWARD)
                                log(LOG_DEBUG,
                                    "getting the iif info in the kernel\n");
#endif

                        for (mifp = mif6table, mifi = 0;
                             mifi < nummifs && mifp->m6_ifp != ifp;
                             mifp++, mifi++)
                                ;

                        switch (ip6_mrouter_ver) {
                        case MRT6_OINIT:
                                oim->im6_mif = mifi;
                                break;
                        case MRT6_INIT:
                                im->im6_mif = mifi;
                                break;
                        }

                        if (socket_send(ip6_mrouter, mm, &sin6) < 0) {
                                log(LOG_WARNING, "ip6_mforward: ip6_mrouter "
                                    "socket queue full\n");
                                mrt6stat.mrt6s_upq_sockfull++;
                                free(rte, M_MRTABLE);
                                m_freem(mb0);
                                free(rt, M_MRTABLE);
                                splx(s);
                                return ENOBUFS;
                        }

                        mrt6stat.mrt6s_upcalls++;

                        /* insert new entry at head of hash chain */
                        memset(rt, 0, sizeof(*rt));
                        sockaddr_in6_init(&rt->mf6c_origin, &ip6->ip6_src,
                            0, 0, 0);
                        sockaddr_in6_init(&rt->mf6c_mcastgrp, &ip6->ip6_dst,
                            0, 0, 0);
                        rt->mf6c_expire = UPCALL_EXPIRE;
                        n6expire[hash]++;
                        rt->mf6c_parent = MF6C_INCOMPLETE_PARENT;

                        /* link into table */
                        rt->mf6c_next  = mf6ctable[hash];
                        mf6ctable[hash] = rt;
                        /* Add this entry to the end of the queue */
                        rt->mf6c_stall = rte;
                } else {
                        /* determine if q has overflowed */
                        struct rtdetq **p;
                        int npkts = 0;

                        for (p = &rt->mf6c_stall; *p != NULL; p = &(*p)->next) {
                                if (++npkts > MAX_UPQ6) {
                                        mrt6stat.mrt6s_upq_ovflw++;
                                        free(rte, M_MRTABLE);
                                        m_freem(mb0);
                                        splx(s);
                                        return 0;
                                }
                        }

                        /* Add this entry to the end of the queue */
                        *p = rte;
                }

                rte->next = NULL;
                rte->m = mb0;
                rte->ifp = ifp;
#ifdef UPCALL_TIMING
                rte->t = tp;
#endif

                splx(s);

                return 0;
        }
}

/*
 * Clean up cache entries if upcalls are not serviced
 * Call from the Slow Timeout mechanism, every 0.25 seconds.
 */
static void
expire_upcalls(void *unused)
{
        struct rtdetq *rte;
        struct mf6c *mfc, **nptr;
        int i;

        /* XXX NOMPSAFE still need softnet_lock */
        mutex_enter(softnet_lock);
        KERNEL_LOCK(1, NULL);

        for (i = 0; i < MF6CTBLSIZ; i++) {
                if (n6expire[i] == 0)
                        continue;
                nptr = &mf6ctable[i];
                while ((mfc = *nptr) != NULL) {
                        rte = mfc->mf6c_stall;
                        /*
                         * Skip real cache entries
                         * Make sure it wasn't marked to not expire (shouldn't happen)
                         * If it expires now
                         */
                        if (rte != NULL &&
                            mfc->mf6c_expire != 0 &&
                            --mfc->mf6c_expire == 0) {
#ifdef MRT6DEBUG
                                if (mrt6debug & DEBUG_EXPIRE) {
                                        char ip6bufo[INET6_ADDRSTRLEN];
                                        char ip6bufm[INET6_ADDRSTRLEN];
                                        log(LOG_DEBUG,
                                            "expire_upcalls: expiring (%s %s)\n",
                                            IN6_PRINT(ip6bufo,
                                            &mfc->mf6c_origin.sin6_addr),
                                            IN6_PRINT(ip6bufm,
                                            &mfc->mf6c_mcastgrp.sin6_addr));
                                }
#endif
                                /*
                                 * drop all the packets
                                 * free the mbuf with the pkt, if, timing info
                                 */
                                do {
                                        struct rtdetq *n = rte->next;
                                        m_freem(rte->m);
                                        free(rte, M_MRTABLE);
                                        rte = n;
                                } while (rte != NULL);
                                mrt6stat.mrt6s_cache_cleanups++;
                                n6expire[i]--;

                                *nptr = mfc->mf6c_next;
                                free(mfc, M_MRTABLE);
                        } else {
                                nptr = &mfc->mf6c_next;
                        }
                }
        }
        callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
            expire_upcalls, NULL);

        KERNEL_UNLOCK_ONE(NULL);
        mutex_exit(softnet_lock);
}

/*
 * Macro to send packet on mif.  Since RSVP packets don't get counted on
 * input, they shouldn't get counted on output, so statistics keeping is
 * separate.
 */
#define MC6_SEND(ip6, mifp, m) do {                                \
        if ((mifp)->m6_flags & MIFF_REGISTER)                        \
                register_send((ip6), (mifp), (m));                \
        else                                                        \
                phyint_send((ip6), (mifp), (m));                \
} while (/*CONSTCOND*/ 0)

/*
 * Packet forwarding routine once entry in the cache is made
 */
static int
ip6_mdq(struct mbuf *m, struct ifnet *ifp, struct mf6c *rt)
{
        struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
        mifi_t mifi, iif;
        struct mif6 *mifp;
        int plen = m->m_pkthdr.len;
        struct in6_addr src0, dst0; /* copies for local work */
        u_int32_t iszone, idzone, oszone, odzone;
        int error = 0;

        /*
         * Don't forward if it didn't arrive from the parent mif
         * for its origin.
         */
        mifi = rt->mf6c_parent;
        if ((mifi >= nummifs) || (mif6table[mifi].m6_ifp != ifp)) {
                /* came in the wrong interface */
#ifdef MRT6DEBUG
                if (mrt6debug & DEBUG_FORWARD)
                        log(LOG_DEBUG,
                            "wrong if: ifid %d mifi %d mififid %x\n",
                            ifp->if_index, mifi,
                            mif6table[mifi].m6_ifp ?
                            mif6table[mifi].m6_ifp->if_index : -1);
#endif
                mrt6stat.mrt6s_wrong_if++;
                rt->mf6c_wrong_if++;

                /*
                 * If we are doing PIM processing, and we are forwarding
                 * packets on this interface, send a message to the
                 * routing daemon.
                 */
                /* have to make sure this is a valid mif */
                if (mifi < nummifs && mif6table[mifi].m6_ifp) {
                        if (pim6 && (m->m_flags & M_LOOP) == 0) {
                                /*
                                 * Check the M_LOOP flag to avoid an
                                 * unnecessary PIM assert.
                                 * XXX: M_LOOP is an ad-hoc hack...
                                 */
                                struct sockaddr_in6 sin6;

                                struct mbuf *mm;
                                struct mrt6msg *im;
                                struct omrt6msg *oim;

                                mm = m_copym(m, 0, sizeof(struct ip6_hdr), M_DONTWAIT);
                                if (mm && M_UNWRITABLE(mm, sizeof(struct ip6_hdr)))
                                        mm = m_pullup(mm, sizeof(struct ip6_hdr));
                                if (mm == NULL)
                                        return ENOBUFS;

                                oim = NULL;
                                im = NULL;
                                switch (ip6_mrouter_ver) {
                                case MRT6_OINIT:
                                        oim = mtod(mm, struct omrt6msg *);
                                        oim->im6_msgtype = MRT6MSG_WRONGMIF;
                                        oim->im6_mbz = 0;
                                        break;
                                case MRT6_INIT:
                                        im = mtod(mm, struct mrt6msg *);
                                        im->im6_msgtype = MRT6MSG_WRONGMIF;
                                        im->im6_mbz = 0;
                                        break;
                                default:
                                        m_freem(mm);
                                        return EINVAL;
                                }

                                for (mifp = mif6table, iif = 0;
                                     iif < nummifs && mifp &&
                                             mifp->m6_ifp != ifp;
                                     mifp++, iif++)
                                        ;

                                memset(&sin6, 0, sizeof(sin6));
                                sin6.sin6_len = sizeof(sin6);
                                sin6.sin6_family = AF_INET6;
                                switch (ip6_mrouter_ver) {
                                case MRT6_OINIT:
                                        oim->im6_mif = iif;
                                        sin6.sin6_addr = oim->im6_src;
                                        break;
                                case MRT6_INIT:
                                        im->im6_mif = iif;
                                        sin6.sin6_addr = im->im6_src;
                                        break;
                                }

                                mrt6stat.mrt6s_upcalls++;

                                if (socket_send(ip6_mrouter, mm, &sin6) < 0) {
#ifdef MRT6DEBUG
                                        if (mrt6debug)
                                                log(LOG_WARNING, "mdq, ip6_mrouter socket queue full\n");
#endif
                                        ++mrt6stat.mrt6s_upq_sockfull;
                                        return ENOBUFS;
                                }
                        }
                }

                return 0;
        }

        /* If I sourced this packet, it counts as output, else it was input. */
        if (m->m_pkthdr.rcvif_index == 0) {
                /* XXX: is rcvif really NULL when output?? */
                mif6table[mifi].m6_pkt_out++;
                mif6table[mifi].m6_bytes_out += plen;
        } else {
                mif6table[mifi].m6_pkt_in++;
                mif6table[mifi].m6_bytes_in += plen;
        }
        rt->mf6c_pkt_cnt++;
        rt->mf6c_byte_cnt += plen;

        /*
         * For each mif, forward a copy of the packet if there are group
         * members downstream on the interface.
         */
        src0 = ip6->ip6_src;
        dst0 = ip6->ip6_dst;
        if ((error = in6_setscope(&src0, ifp, &iszone)) != 0 ||
            (error = in6_setscope(&dst0, ifp, &idzone)) != 0) {
                IP6_STATINC(IP6_STAT_BADSCOPE);
                return error;
        }
        for (mifp = mif6table, mifi = 0; mifi < nummifs; mifp++, mifi++) {
                if (IF_ISSET(mifi, &rt->mf6c_ifset)) {
                        if (mif6table[mifi].m6_ifp == NULL)
                                continue;
                        /*
                         * check if the outgoing packet is going to break
                         * a scope boundary.
                         * XXX: For packets through PIM register tunnel
                         * interface, we believe the routing daemon.
                         */
                        if ((mif6table[rt->mf6c_parent].m6_flags &
                             MIFF_REGISTER) == 0 &&
                            (mif6table[mifi].m6_flags & MIFF_REGISTER) == 0) {
                                if (in6_setscope(&src0, mif6table[mifi].m6_ifp,
                                    &oszone) ||
                                    in6_setscope(&dst0, mif6table[mifi].m6_ifp,
                                    &odzone) ||
                                    iszone != oszone || idzone != odzone) {
                                        IP6_STATINC(IP6_STAT_BADSCOPE);
                                        continue;
                                }
                        }

                        mifp->m6_pkt_out++;
                        mifp->m6_bytes_out += plen;
                        MC6_SEND(ip6, mifp, m);
                }
        }

        return 0;
}

static void
phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
{
        struct mbuf *mb_copy;
        struct ifnet *ifp = mifp->m6_ifp;
        int error __mrt6debugused = 0;
        int s;
        static struct route ro;
        bool ingroup;
        struct sockaddr_in6 dst6;

        s = splsoftnet();

        /*
         * Make a new reference to the packet; make sure that
         * the IPv6 header is actually copied, not just referenced,
         * so that ip6_output() only scribbles on the copy.
         */
        mb_copy = m_copypacket(m, M_DONTWAIT);
        if (mb_copy && M_UNWRITABLE(mb_copy, sizeof(struct ip6_hdr)))
                mb_copy = m_pullup(mb_copy, sizeof(struct ip6_hdr));
        if (mb_copy == NULL) {
                splx(s);
                return;
        }

        /* set MCAST flag to the outgoing packet */
        mb_copy->m_flags |= M_MCAST;

        /*
         * If we sourced the packet, call ip6_output since we may divide
         * the packet into fragments when the packet is too big for the
         * outgoing interface.
         * Otherwise, we can simply send the packet to the interface
         * sending queue.
         */
        if (m->m_pkthdr.rcvif_index == 0) {
                struct ip6_moptions im6o;

                im6o.im6o_multicast_if_index = if_get_index(ifp);
                /* XXX: ip6_output will override ip6->ip6_hlim */
                im6o.im6o_multicast_hlim = ip6->ip6_hlim;
                im6o.im6o_multicast_loop = 1;
                error = ip6_output(mb_copy, NULL, &ro, IPV6_FORWARDING,
                    &im6o, NULL, NULL);

#ifdef MRT6DEBUG
                if (mrt6debug & DEBUG_XMIT)
                        log(LOG_DEBUG, "phyint_send on mif %td err %d\n",
                            mifp - mif6table, error);
#endif
                splx(s);
                return;
        }

        /*
         * If we belong to the destination multicast group
         * on the outgoing interface, loop back a copy.
         */
        /*
         * Does not have to check source info, as it's already covered by
         * ip6_input
         */
        sockaddr_in6_init(&dst6, &ip6->ip6_dst, 0, 0, 0);

        ingroup = in6_multi_group(&ip6->ip6_dst, ifp);
        if (ingroup) {
                ip6_mloopback(ifp, m,
                    satocsin6(rtcache_getdst(&ro)));
        }

        /*
         * Put the packet into the sending queue of the outgoing interface
         * if it would fit in the MTU of the interface.
         */
        if (mb_copy->m_pkthdr.len <= ifp->if_mtu || ifp->if_mtu < IPV6_MMTU) {
                error = ip6_if_output(ifp, ifp, mb_copy, &dst6, NULL);
#ifdef MRT6DEBUG
                if (mrt6debug & DEBUG_XMIT)
                        log(LOG_DEBUG, "phyint_send on mif %td err %d\n",
                            mifp - mif6table, error);
#endif
        } else {
                /*
                 * pMTU discovery is intentionally disabled by default, since
                 * various routers may notify pMTU in multicast, which can be
                 * a DDoS to a router.
                 */
                if (ip6_mcast_pmtu) {
                        icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0,
                            ifp->if_mtu);
                } else {
                        /* simply discard the packet */
#ifdef MRT6DEBUG
                        if (mrt6debug & DEBUG_XMIT) {
                                char ip6bufs[INET6_ADDRSTRLEN];
                                char ip6bufd[INET6_ADDRSTRLEN];
                                log(LOG_DEBUG,
                                    "phyint_send: packet too big on %s o %s g %s"
                                    " size %d(discarded)\n",
                                    if_name(ifp),
                                    IN6_PRINT(ip6bufs, &ip6->ip6_src),
                                    IN6_PRINT(ip6bufd, &ip6->ip6_dst),
                                    mb_copy->m_pkthdr.len);
                        }
#endif
                        m_freem(mb_copy);
                }
        }

        splx(s);
}

static int
register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
{
        struct mbuf *mm;
        int i, len = m->m_pkthdr.len;
        struct sockaddr_in6 sin6;
        struct mrt6msg *im6;

#ifdef MRT6DEBUG
        if (mrt6debug) {
                char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
                log(LOG_DEBUG, "** IPv6 register_send **\n src %s dst %s\n",
                    IN6_PRINT(ip6bufs, &ip6->ip6_src),
                    IN6_PRINT(ip6bufd, &ip6->ip6_dst));
        }
#endif
        PIM6_STATINC(PIM6_STAT_SND_REGISTERS);

        /* Make a copy of the packet to send to the user level process */
        MGETHDR(mm, M_DONTWAIT, MT_HEADER);
        if (mm == NULL)
                return ENOBUFS;
        mm->m_data += max_linkhdr;
        mm->m_len = sizeof(struct ip6_hdr);

        if ((mm->m_next = m_copypacket(m, M_DONTWAIT)) == NULL) {
                m_freem(mm);
                return ENOBUFS;
        }
        i = MHLEN - M_LEADINGSPACE(mm);
        if (i > len)
                i = len;
        mm = m_pullup(mm, i);
        if (mm == NULL)
                return ENOBUFS;
        mm->m_pkthdr.len = len + sizeof(struct ip6_hdr);

        /*
         * Send message to routing daemon
         */
        sockaddr_in6_init(&sin6, &ip6->ip6_src, 0, 0, 0);

        im6 = mtod(mm, struct mrt6msg *);
        im6->im6_msgtype = MRT6MSG_WHOLEPKT;
        im6->im6_mbz = 0;
        im6->im6_mif = mif - mif6table;

        /* iif info is not given for reg. encap.n */
        mrt6stat.mrt6s_upcalls++;

        if (socket_send(ip6_mrouter, mm, &sin6) < 0) {
#ifdef MRT6DEBUG
                if (mrt6debug)
                        log(LOG_WARNING,
                            "register_send: ip6_mrouter socket queue full\n");
#endif
                ++mrt6stat.mrt6s_upq_sockfull;
                return ENOBUFS;
        }

        return 0;
}

/*
 * PIM sparse mode hook. Receives the pim control messages, and passes them up
 * to the listening socket, using rip6_input.
 *
 * The only message processed is the REGISTER pim message; the pim header
 * is stripped off, and the inner packet is passed to register_mforward.
 */
int
pim6_input(struct mbuf **mp, int *offp, int proto)
{
        struct pim *pim;
        struct ip6_hdr *ip6 __mrt6debugused;
        int pimlen;
        struct mbuf *m = *mp;
        int minlen;
        int off = *offp;

        PIM6_STATINC(PIM6_STAT_RCV_TOTAL);

        ip6 = mtod(m, struct ip6_hdr *);
        pimlen = m->m_pkthdr.len - off;

        /*
         * Validate lengths
         */
        if (pimlen < PIM_MINLEN) {
                PIM6_STATINC(PIM6_STAT_RCV_TOOSHORT);
#ifdef MRT6DEBUG
                if (mrt6debug & DEBUG_PIM)
                        log(LOG_DEBUG,"pim6_input: PIM packet too short\n");
#endif
                m_freem(m);
                return IPPROTO_DONE;
        }

        /*
         * If the packet is at least as big as a REGISTER, go ahead
         * and grab the PIM REGISTER header size, to avoid another
         * possible m_pullup() later.
         *
         * PIM_MINLEN       == pimhdr + u_int32 == 8
         * PIM6_REG_MINLEN   == pimhdr + reghdr + eip6hdr == 4 + 4 + 40
         */
        minlen = (pimlen >= PIM6_REG_MINLEN) ? PIM6_REG_MINLEN : PIM_MINLEN;

        /*
         * Make sure that the IP6 and PIM headers in contiguous memory, and
         * possibly the PIM REGISTER header
         */
        IP6_EXTHDR_GET(pim, struct pim *, m, off, minlen);
        if (pim == NULL) {
                PIM6_STATINC(PIM6_STAT_RCV_TOOSHORT);
                return IPPROTO_DONE;
        }

        /* PIM version check */
        if (pim->pim_ver != PIM_VERSION) {
                PIM6_STATINC(PIM6_STAT_RCV_BADVERSION);
#ifdef MRT6DEBUG
                log(LOG_ERR,
                    "pim6_input: incorrect version %d, expecting %d\n",
                    pim->pim_ver, PIM_VERSION);
#endif
                m_freem(m);
                return IPPROTO_DONE;
        }

#define PIM6_CHECKSUM
#ifdef PIM6_CHECKSUM
        {
                int cksumlen;

                /*
                 * Validate checksum.
                 * If PIM REGISTER, exclude the data packet
                 */
                if (pim->pim_type == PIM_REGISTER)
                        cksumlen = PIM_MINLEN;
                else
                        cksumlen = pimlen;

                if (in6_cksum(m, IPPROTO_PIM, off, cksumlen)) {
                        PIM6_STATINC(PIM6_STAT_RCV_BADSUM);
#ifdef MRT6DEBUG
                        if (mrt6debug & DEBUG_PIM)
                                log(LOG_DEBUG,
                                    "pim6_input: invalid checksum\n");
#endif
                        m_freem(m);
                        return IPPROTO_DONE;
                }
        }
#endif /* PIM_CHECKSUM */

        if (pim->pim_type == PIM_REGISTER) {
                /*
                 * since this is a REGISTER, we'll make a copy of the register
                 * headers ip6+pim+u_int32_t+encap_ip6, to be passed up to the
                 * routing daemon.
                 */
                static const struct sockaddr_in6 dst = {
                        .sin6_len = sizeof(dst),
                        .sin6_family = AF_INET6,
                };

                struct mbuf *mcp;
                struct ip6_hdr *eip6;
                u_int32_t *reghdr;

                PIM6_STATINC(PIM6_STAT_RCV_REGISTERS);

                if ((reg_mif_num >= nummifs) || (reg_mif_num == (mifi_t) -1)) {
#ifdef MRT6DEBUG
                        if (mrt6debug & DEBUG_PIM)
                                log(LOG_DEBUG,
                                    "pim6_input: register mif not set: %d\n",
                                    reg_mif_num);
#endif
                        m_freem(m);
                        return IPPROTO_DONE;
                }

                reghdr = (u_int32_t *)(pim + 1);

                if ((ntohl(*reghdr) & PIM_NULL_REGISTER))
                        goto pim6_input_to_daemon;

                /*
                 * Validate length
                 */
                if (pimlen < PIM6_REG_MINLEN) {
#ifdef MRT6DEBUG
                        char ip6buf[INET6_ADDRSTRLEN];
                        log(LOG_ERR,
                            "pim6_input: register packet size too "
                            "small %d from %s\n",
                            pimlen, IN6_PRINT(ip6buf, &ip6->ip6_src));
#endif
                        PIM6_STATINC(PIM6_STAT_RCV_TOOSHORT);
                        PIM6_STATINC(PIM6_STAT_RCV_BADREGISTERS);
                        m_freem(m);
                        return IPPROTO_DONE;
                }

                eip6 = (struct ip6_hdr *)(reghdr + 1);
#ifdef MRT6DEBUG
                if (mrt6debug & DEBUG_PIM) {
                        char ip6bufs[INET6_ADDRSTRLEN];
                        char ip6bufd[INET6_ADDRSTRLEN];
                        log(LOG_DEBUG,
                            "pim6_input[register], eip6: %s -> %s, "
                            "eip6 plen %d\n",
                            IN6_PRINT(ip6bufs, &eip6->ip6_src),
                            IN6_PRINT(ip6bufd, &eip6->ip6_dst),
                            ntohs(eip6->ip6_plen));
                }
#endif

                /* verify the version number of the inner packet */
                if ((eip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
                        PIM6_STATINC(PIM6_STAT_RCV_BADREGISTERS);
#ifdef MRT6DEBUG
                        log(LOG_DEBUG, "pim6_input: invalid IP version (%d) "
                            "of the inner packet\n",
                            (eip6->ip6_vfc & IPV6_VERSION));
#endif
                        m_freem(m);
                        return IPPROTO_DONE;
                }

                /* verify the inner packet is destined to a mcast group */
                if (!IN6_IS_ADDR_MULTICAST(&eip6->ip6_dst)) {
                        PIM6_STATINC(PIM6_STAT_RCV_BADREGISTERS);
#ifdef MRT6DEBUG
                        if (mrt6debug & DEBUG_PIM) {
                                char ip6buf[INET6_ADDRSTRLEN];
                                log(LOG_DEBUG,
                                    "pim6_input: inner packet of register "
                                    "is not multicast %s\n",
                                    IN6_PRINT(ip6buf, &eip6->ip6_dst));
                        }
#endif
                        m_freem(m);
                        return IPPROTO_DONE;
                }

                /*
                 * make a copy of the whole header to pass to the daemon later.
                 */
                mcp = m_copym(m, 0, off + PIM6_REG_MINLEN, M_DONTWAIT);
                if (mcp == NULL) {
#ifdef MRT6DEBUG
                        log(LOG_ERR,
                            "pim6_input: pim register: "
                            "could not copy register head\n");
#endif
                        m_freem(m);
                        return IPPROTO_DONE;
                }

                /*
                 * forward the inner ip6 packet; point m_data at the inner ip6.
                 */
                m_adj(m, off + PIM_MINLEN);
#ifdef MRT6DEBUG
                if (mrt6debug & DEBUG_PIM) {
                        char ip6bufs[INET6_ADDRSTRLEN];
                        char ip6bufd[INET6_ADDRSTRLEN];
                        log(LOG_DEBUG,
                            "pim6_input: forwarding decapsulated register: "
                            "src %s, dst %s, mif %d\n",
                            IN6_PRINT(ip6bufs, &eip6->ip6_src),
                            IN6_PRINT(ip6bufd, &eip6->ip6_dst),
                            reg_mif_num);
                }
#endif

                looutput(mif6table[reg_mif_num].m6_ifp, m, sin6tocsa(&dst),
                    NULL);

                /* prepare the register head to send to the mrouting daemon */
                m = mcp;
        }

        /*
         * Pass the PIM message up to the daemon; if it is a register message
         * pass the 'head' only up to the daemon. This includes the
         * encapsulator ip6 header, pim header, register header and the
         * encapsulated ip6 header.
         */
pim6_input_to_daemon:
        /*
         * Currently, rip6_input() is always called holding softnet_lock
         * by ipintr()(!NET_MPSAFE) or PR_INPUT_WRAP()(NET_MPSAFE).
         */
        KASSERT(mutex_owned(softnet_lock));
        rip6_input(&m, offp, proto);
        return IPPROTO_DONE;
}

static int
sysctl_net_inet6_pim6_stats(SYSCTLFN_ARGS)
{

        return (NETSTAT_SYSCTL(pim6stat_percpu, PIM6_NSTATS));
}

static void
sysctl_net_inet6_pim6_setup(struct sysctllog **clog)
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "inet6", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET6, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "pim6",
                       SYSCTL_DESCR("PIMv6 settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_PIM, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "stats",
                       SYSCTL_DESCR("PIMv6 statistics"),
                       sysctl_net_inet6_pim6_stats, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_PIM, PIM6CTL_STATS,
                       CTL_EOL);
}

















































































































































































































































































































































































































































































































































































































































    2 





    1 



    1 



    1 



    1 



    1 



    1 



    1 

    1 









































































































































    2 





    1 



















































    2 



    2 



















    1 
    1 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
/*-
 * Copyright (c) 2009-2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This material is based upon work partially supported by The
 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * NPF device control.
 *
 * Implementation of (re)loading, construction of tables and rules.
 * NPF nvlist(3) consumer.
 */

#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_ctl.c,v 1.60 2020/05/30 14:16:56 rmind Exp $");

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/kmem.h>
#include <net/bpf.h>
#endif

#include "npf_impl.h"
#include "npf_conn.h"

#define        NPF_ERR_DEBUG(e) \
        nvlist_add_string((e), "source-file", __FILE__); \
        nvlist_add_number((e), "source-line", __LINE__);

static int __noinline
npf_mk_params(npf_t *npf, const nvlist_t *req, nvlist_t *resp, bool set)
{
        const nvlist_t *params;
        int type, error, val;
        const char *name;
        void *cookie;

        params = dnvlist_get_nvlist(req, "params", NULL);
        if (params == NULL) {
                return 0;
        }
        cookie = NULL;
        while ((name = nvlist_next(params, &type, &cookie)) != NULL) {
                if (type != NV_TYPE_NUMBER) {
                        NPF_ERR_DEBUG(resp);
                        return EINVAL;
                }
                val = (int)nvlist_get_number(params, name);
                if (set) {
                        /* Actually set the parameter. */
                        error = npfk_param_set(npf, name, val);
                        KASSERT(error == 0);
                        continue;
                }

                /* Validate the parameter and its value. */
                error = npf_param_check(npf, name, val);
                if (__predict_true(error == 0)) {
                        continue;
                }
                if (error == ENOENT) {
                        nvlist_add_stringf(resp, "error-msg",
                            "invalid parameter `%s`", name);
                }
                if (error == EINVAL) {
                        nvlist_add_stringf(resp, "error-msg",
                            "invalid parameter `%s` value %d", name, val);
                }
                return error;
        }
        return 0;
}

static int __noinline
npf_mk_table_entries(npf_table_t *t, const nvlist_t *req, nvlist_t *resp)
{
        const nvlist_t * const *entries;
        size_t nitems;
        int error = 0;

        if (!nvlist_exists_nvlist_array(req, "entries")) {
                return 0;
        }
        entries = nvlist_get_nvlist_array(req, "entries", &nitems);
        for (unsigned i = 0; i < nitems; i++) {
                const nvlist_t *entry = entries[i];
                const npf_addr_t *addr;
                npf_netmask_t mask;
                size_t alen;

                /* Get address and mask; add a table entry. */
                addr = dnvlist_get_binary(entry, "addr", &alen, NULL, 0);
                mask = dnvlist_get_number(entry, "mask", NPF_NO_NETMASK);
                if (addr == NULL || alen == 0) {
                        NPF_ERR_DEBUG(resp);
                        error = EINVAL;
                        break;
                }
                error = npf_table_insert(t, alen, addr, mask);
                if (__predict_false(error)) {
                        if (error == EEXIST) {
                                nvlist_add_stringf(resp, "error-msg",
                                    "table `%s' has a duplicate entry",
                                    nvlist_get_string(req, "name"));
                        } else {
                                NPF_ERR_DEBUG(resp);
                        }
                        break;
                }
        }
        return error;
}

/*
 * npf_mk_table: create a table from provided nvlist.
 */
static int __noinline
npf_mk_table(npf_t *npf, const nvlist_t *req, nvlist_t *resp,
    npf_tableset_t *tblset, npf_table_t **tblp, bool replacing)
{
        npf_table_t *t;
        const char *name;
        const void *blob;
        uint64_t tid;
        size_t size;
        int type;
        int error = 0;

        KASSERT(tblp != NULL);

        /* Table name, ID and type.  Validate them. */
        name = dnvlist_get_string(req, "name", NULL);
        if (!name) {
                NPF_ERR_DEBUG(resp);
                error = EINVAL;
                goto out;
        }
        tid = dnvlist_get_number(req, "id", UINT64_MAX);
        type = dnvlist_get_number(req, "type", UINT64_MAX);
        error = npf_table_check(tblset, name, tid, type, replacing);
        if (error) {
                NPF_ERR_DEBUG(resp);
                goto out;
        }

        /* Get the entries or binary data. */
        blob = dnvlist_get_binary(req, "data", &size, NULL, 0);
        if (type == NPF_TABLE_CONST && (blob == NULL || size == 0)) {
                NPF_ERR_DEBUG(resp);
                error = EINVAL;
                goto out;
        }

        t = npf_table_create(name, (unsigned)tid, type, blob, size);
        if (t == NULL) {
                NPF_ERR_DEBUG(resp);
                error = ENOMEM;
                goto out;
        }

        if ((error = npf_mk_table_entries(t, req, resp)) != 0) {
                npf_table_destroy(t);
                goto out;
        }

        *tblp = t;
out:
        return error;
}

static int __noinline
npf_mk_tables(npf_t *npf, const nvlist_t *req, nvlist_t *resp, npf_config_t *nc)
{
        const nvlist_t * const *tables;
        npf_tableset_t *tblset;
        size_t nitems;
        int error = 0;

        if (nvlist_exists_nvlist_array(req, "tables")) {
                tables = nvlist_get_nvlist_array(req, "tables", &nitems);
                if (nitems > NPF_MAX_TABLES) {
                        NPF_ERR_DEBUG(resp);
                        return E2BIG;
                }
        } else {
                tables = NULL;
                nitems = 0;
        }
        tblset = npf_tableset_create(nitems);
        for (unsigned i = 0; i < nitems; i++) {
                const nvlist_t *table = tables[i];
                npf_table_t *t;

                error = npf_mk_table(npf, table, resp, tblset, &t, 0);
                if (error) {
                        break;
                }

                error = npf_tableset_insert(tblset, t);
                KASSERT(error == 0);
        }
        nc->tableset = tblset;
        return error;
}

static npf_rproc_t *
npf_mk_singlerproc(npf_t *npf, const nvlist_t *rproc, nvlist_t *resp)
{
        const nvlist_t * const *extcalls;
        size_t nitems;
        npf_rproc_t *rp;

        if ((rp = npf_rproc_create(rproc)) == NULL) {
                NPF_ERR_DEBUG(resp);
                return NULL;
        }
        if (!nvlist_exists_nvlist_array(rproc, "extcalls")) {
                return rp;
        }
        extcalls = nvlist_get_nvlist_array(rproc, "extcalls", &nitems);
        for (unsigned i = 0; i < nitems; i++) {
                const nvlist_t *extcall = extcalls[i];
                const char *name;

                name = dnvlist_get_string(extcall, "name", NULL);
                if (!name || npf_ext_construct(npf, name, rp, extcall)) {
                        NPF_ERR_DEBUG(resp);
                        npf_rproc_release(rp);
                        rp = NULL;
                        break;
                }
        }
        return rp;
}

static int __noinline
npf_mk_rprocs(npf_t *npf, const nvlist_t *req, nvlist_t *resp, npf_config_t *nc)
{
        const nvlist_t * const *rprocs;
        npf_rprocset_t *rpset;
        size_t nitems;
        int error = 0;

        if (nvlist_exists_nvlist_array(req, "rprocs")) {
                rprocs = nvlist_get_nvlist_array(req, "rprocs", &nitems);
                if (nitems > NPF_MAX_RPROCS) {
                        NPF_ERR_DEBUG(resp);
                        return E2BIG;
                }
        } else {
                rprocs = NULL;
                nitems = 0;
        }
        rpset = npf_rprocset_create();
        for (unsigned i = 0; i < nitems; i++) {
                const nvlist_t *rproc = rprocs[i];
                npf_rproc_t *rp;

                if ((rp = npf_mk_singlerproc(npf, rproc, resp)) == NULL) {
                        error = EINVAL;
                        break;
                }
                npf_rprocset_insert(rpset, rp);
        }
        nc->rule_procs = rpset;
        return error;
}

static int __noinline
npf_mk_algs(npf_t *npf, const nvlist_t *req, nvlist_t *resp)
{
        const nvlist_t * const *algs;
        size_t nitems;

        if (nvlist_exists_nvlist_array(req, "algs")) {
                algs = nvlist_get_nvlist_array(req, "algs", &nitems);
        } else {
                algs = NULL;
                nitems = 0;
        }
        for (unsigned i = 0; i < nitems; i++) {
                const nvlist_t *alg = algs[i];
                const char *name;

                name = dnvlist_get_string(alg, "name", NULL);
                if (!name) {
                        NPF_ERR_DEBUG(resp);
                        return EINVAL;
                }
                if (!npf_alg_construct(npf, name)) {
                        NPF_ERR_DEBUG(resp);
                        return EINVAL;
                }
        }
        return 0;
}

static int __noinline
npf_mk_singlerule(npf_t *npf, const nvlist_t *req, nvlist_t *resp,
    npf_rprocset_t *rpset, npf_rule_t **rlret)
{
        npf_rule_t *rl;
        const char *rname;
        const void *code;
        size_t clen;
        int error = 0;

        if ((rl = npf_rule_alloc(npf, req)) == NULL) {
                NPF_ERR_DEBUG(resp);
                return EINVAL;
        }

        /* Assign the rule procedure, if any. */
        if ((rname = dnvlist_get_string(req, "rproc", NULL)) != NULL) {
                npf_rproc_t *rp;

                if (rpset == NULL) {
                        NPF_ERR_DEBUG(resp);
                        error = EINVAL;
                        goto err;
                }
                if ((rp = npf_rprocset_lookup(rpset, rname)) == NULL) {
                        NPF_ERR_DEBUG(resp);
                        error = EINVAL;
                        goto err;
                }
                npf_rule_setrproc(rl, rp);
        }

        /* Filter byte-code (binary data). */
        code = dnvlist_get_binary(req, "code", &clen, NULL, 0);
        if (code) {
                void *bc;
                int type;

                type = dnvlist_get_number(req, "code-type", UINT64_MAX);
                if (type != NPF_CODE_BPF) {
                        NPF_ERR_DEBUG(resp);
                        error = ENOTSUP;
                        goto err;
                }
                if (clen == 0) {
                        NPF_ERR_DEBUG(resp);
                        error = EINVAL;
                        goto err;
                }
                if (!npf_bpf_validate(code, clen)) {
                        NPF_ERR_DEBUG(resp);
                        error = EINVAL;
                        goto err;
                }
                bc = kmem_alloc(clen, KM_SLEEP);
                memcpy(bc, code, clen); // XXX: use nvlist_take
                npf_rule_setcode(rl, type, bc, clen);
        }

        *rlret = rl;
        return 0;
err:
        nvlist_add_number(resp, "id", dnvlist_get_number(req, "prio", 0));
        npf_rule_free(rl);
        return error;
}

static int __noinline
npf_mk_rules(npf_t *npf, const nvlist_t *req, nvlist_t *resp, npf_config_t *nc)
{
        const nvlist_t * const *rules;
        npf_ruleset_t *rlset;
        size_t nitems;
        int error = 0;

        if (nvlist_exists_nvlist_array(req, "rules")) {
                rules = nvlist_get_nvlist_array(req, "rules", &nitems);
                if (nitems > NPF_MAX_RULES) {
                        NPF_ERR_DEBUG(resp);
                        return E2BIG;
                }
        } else {
                rules = NULL;
                nitems = 0;
        }
        rlset = npf_ruleset_create(nitems);
        for (unsigned i = 0; i < nitems; i++) {
                const nvlist_t *rule = rules[i];
                npf_rule_t *rl = NULL;
                const char *name;

                error = npf_mk_singlerule(npf, rule, resp, nc->rule_procs, &rl);
                if (error) {
                        break;
                }
                name = dnvlist_get_string(rule, "name", NULL);
                if (name && npf_ruleset_lookup(rlset, name)) {
                        NPF_ERR_DEBUG(resp);
                        npf_rule_free(rl);
                        error = EEXIST;
                        break;
                }
                npf_ruleset_insert(rlset, rl);
        }
        nc->ruleset = rlset;
        return error;
}

static int __noinline
npf_mk_singlenat(npf_t *npf, const nvlist_t *nat, nvlist_t *resp,
    npf_ruleset_t *ntset, npf_tableset_t *tblset, npf_rule_t **rlp)
{
        npf_rule_t *rl = NULL;
        npf_natpolicy_t *np;
        int error;

        /*
         * NAT rules are standard rules, plus the translation policy.
         * We first construct the rule structure.
         */
        error = npf_mk_singlerule(npf, nat, resp, NULL, &rl);
        if (error) {
                return error;
        }
        KASSERT(rl != NULL);
        *rlp = rl;

        /* If this rule is named, then it is a group with NAT policies. */
        if (dnvlist_get_string(nat, "name", NULL)) {
                return 0;
        }

        /* Check the table ID. */
        if (nvlist_exists_number(nat, "nat-table-id")) {
                unsigned tid = nvlist_get_number(nat, "nat-table-id");

                if (!npf_tableset_getbyid(tblset, tid)) {
                        NPF_ERR_DEBUG(resp);
                        error = EINVAL;
                        goto out;
                }
        }

        /* Allocate a new NAT policy and assign it to the rule. */
        np = npf_natpolicy_create(npf, nat, ntset);
        if (np == NULL) {
                NPF_ERR_DEBUG(resp);
                error = ENOMEM;
                goto out;
        }
        npf_rule_setnat(rl, np);
out:
        if (error) {
                npf_rule_free(rl);
        }
        return error;
}

static int __noinline
npf_mk_natlist(npf_t *npf, const nvlist_t *req, nvlist_t *resp, npf_config_t *nc)
{
        const nvlist_t * const *nat_rules;
        npf_ruleset_t *ntset;
        size_t nitems;
        int error = 0;

        /*
         * NAT policies must be an array, but enforce a limit.
         */
        if (nvlist_exists_nvlist_array(req, "nat")) {
                nat_rules = nvlist_get_nvlist_array(req, "nat", &nitems);
                if (nitems > NPF_MAX_RULES) {
                        NPF_ERR_DEBUG(resp);
                        return E2BIG;
                }
        } else {
                nat_rules = NULL;
                nitems = 0;
        }
        ntset = npf_ruleset_create(nitems);
        for (unsigned i = 0; i < nitems; i++) {
                const nvlist_t *nat = nat_rules[i];
                npf_rule_t *rl = NULL;

                error = npf_mk_singlenat(npf, nat, resp, ntset,
                    nc->tableset, &rl);
                if (error) {
                        break;
                }
                npf_ruleset_insert(ntset, rl);
        }
        nc->nat_ruleset = ntset;
        return error;
}

/*
 * npf_mk_connlist: import a list of connections and load them.
 */
static int __noinline
npf_mk_connlist(npf_t *npf, const nvlist_t *req, nvlist_t *resp,
    npf_config_t *nc, npf_conndb_t **conndb)
{
        const nvlist_t * const *conns;
        npf_conndb_t *cd;
        size_t nitems;
        int error = 0;

        if (!nvlist_exists_nvlist_array(req, "conn-list")) {
                *conndb = NULL;
                return 0;
        }
        cd = npf_conndb_create();
        conns = nvlist_get_nvlist_array(req, "conn-list", &nitems);
        for (unsigned i = 0; i < nitems; i++) {
                const nvlist_t *conn = conns[i];

                /* Construct and insert the connection. */
                error = npf_conn_import(npf, cd, conn, nc->nat_ruleset);
                if (error) {
                        NPF_ERR_DEBUG(resp);
                        break;
                }
        }
        if (error) {
                npf_conndb_gc(npf, cd, true, false);
                npf_conndb_destroy(cd);
        } else {
                *conndb = cd;
        }
        return error;
}

/*
 * npfctl_load: store passed data i.e. the update settings, create the
 * passed rules, tables, etc and atomically activate them all.
 */
static int
npfctl_load(npf_t *npf, const nvlist_t *req, nvlist_t *resp)
{
        npf_config_t *nc;
        npf_conndb_t *conndb = NULL;
        bool flush;
        int error;

        nc = npf_config_create();
        error = npf_mk_params(npf, req, resp, false /* validate */);
        if (error) {
                goto fail;
        }
        error = npf_mk_algs(npf, req, resp);
        if (error) {
                goto fail;
        }
        error = npf_mk_tables(npf, req, resp, nc);
        if (error) {
                goto fail;
        }
        error = npf_mk_rprocs(npf, req, resp, nc);
        if (error) {
                goto fail;
        }
        error = npf_mk_natlist(npf, req, resp, nc);
        if (error) {
                goto fail;
        }
        error = npf_mk_rules(npf, req, resp, nc);
        if (error) {
                goto fail;
        }
        error = npf_mk_connlist(npf, req, resp, nc, &conndb);
        if (error) {
                goto fail;
        }

        flush = dnvlist_get_bool(req, "flush", false);
        nc->default_pass = flush;

        /*
         * Finally - perform the load.
         */
        npf_config_load(npf, nc, conndb, flush);
        npf_mk_params(npf, req, resp, true /* set the params */);
        return 0;

fail:
        npf_config_destroy(nc);
        return error;
}

/*
 * npfctl_save: export the active configuration, including the current
 * snapshot of the connections.  Additionally, set the version and indicate
 * whether the ruleset is currently active.
 */
static int
npfctl_save(npf_t *npf, const nvlist_t *req, nvlist_t *resp)
{
        npf_config_t *nc;
        int error;

        /*
         * Serialize the whole NPF configuration, including connections.
         */
        nvlist_add_number(resp, "version", NPF_VERSION);
        nc = npf_config_enter(npf);
        error = npf_params_export(npf, resp);
        if (error) {
                goto out;
        }
        error = npf_conndb_export(npf, resp);
        if (error) {
                goto out;
        }
        error = npf_ruleset_export(npf, nc->ruleset, "rules", resp);
        if (error) {
                goto out;
        }
        error = npf_ruleset_export(npf, nc->nat_ruleset, "nat", resp);
        if (error) {
                goto out;
        }
        error = npf_tableset_export(npf, nc->tableset, resp);
        if (error) {
                goto out;
        }
        error = npf_rprocset_export(nc->rule_procs, resp);
        if (error) {
                goto out;
        }
        error = npf_alg_export(npf, resp);
        if (error) {
                goto out;
        }
        nvlist_add_bool(resp, "active", npf_active_p());
out:
        npf_config_exit(npf);
        return error;
}

/*
 * npfctl_table_replace: atomically replace a table's contents with
 * the passed table data.
 */
static int __noinline
npfctl_table_replace(npf_t *npf, const nvlist_t *req, nvlist_t *resp)
{
        npf_table_t *tbl, *gc_tbl = NULL;
        npf_config_t *nc;
        int error = 0;

        nc = npf_config_enter(npf);
        error = npf_mk_table(npf, req, resp, nc->tableset, &tbl, true);
        if (error) {
                goto err;
        }
        gc_tbl = npf_tableset_swap(nc->tableset, tbl);
        if (gc_tbl == NULL) {
                error = EINVAL;
                gc_tbl = tbl;
                goto err;
        }
        npf_config_sync(npf);
err:
        npf_config_exit(npf);
        if (gc_tbl) {
                npf_table_destroy(gc_tbl);
        }
        return error;
}

/*
 * npfctl_rule: add or remove dynamic rules in the specified ruleset.
 */
static int
npfctl_rule(npf_t *npf, const nvlist_t *req, nvlist_t *resp)
{
        npf_ruleset_t *rlset;
        npf_rule_t *rl = NULL;
        const char *ruleset_name;
        npf_config_t *nc;
        uint32_t rcmd;
        int error = 0;
        bool natset;

        rcmd = dnvlist_get_number(req, "command", 0);
        natset = dnvlist_get_bool(req, "nat-ruleset", false);
        ruleset_name = dnvlist_get_string(req, "ruleset-name", NULL);
        if (!ruleset_name) {
                error = EINVAL;
                goto out;
        }

        nc = npf_config_enter(npf);
        rlset = natset ? nc->nat_ruleset : nc->ruleset;
        switch (rcmd) {
        case NPF_CMD_RULE_ADD: {
                if (natset) {
                        /*
                         * Translation rule.
                         */
                        error = npf_mk_singlenat(npf, req, resp, rlset,
                            nc->tableset, &rl);
                } else {
                        /*
                         * Standard rule.
                         */
                        error = npf_mk_singlerule(npf, req, resp, NULL, &rl);
                }
                if (error) {
                        goto out;
                }
                if ((error = npf_ruleset_add(rlset, ruleset_name, rl)) == 0) {
                        /* Success. */
                        uint64_t id = npf_rule_getid(rl);
                        nvlist_add_number(resp, "id", id);
                        rl = NULL;
                }
                break;
        }
        case NPF_CMD_RULE_REMOVE: {
                uint64_t id = dnvlist_get_number(req, "id", UINT64_MAX);
                error = npf_ruleset_remove(rlset, ruleset_name, id);
                break;
        }
        case NPF_CMD_RULE_REMKEY: {
                const void *key;
                size_t len;

                key = dnvlist_get_binary(req, "key", &len, NULL, 0);
                if (len == 0 || len > NPF_RULE_MAXKEYLEN) {
                        error = EINVAL;
                        break;
                }
                error = npf_ruleset_remkey(rlset, ruleset_name, key, len);
                break;
        }
        case NPF_CMD_RULE_LIST: {
                error = npf_ruleset_list(npf, rlset, ruleset_name, resp);
                break;
        }
        case NPF_CMD_RULE_FLUSH: {
                error = npf_ruleset_flush(rlset, ruleset_name);
                break;
        }
        default:
                error = EINVAL;
                break;
        }

        /* Destroy any removed rules. */
        if (!error && rcmd != NPF_CMD_RULE_ADD && rcmd != NPF_CMD_RULE_LIST) {
                npf_config_sync(npf);
                npf_ruleset_gc(rlset);
        }
out:
        npf_config_exit(npf);

        if (rl) {
                KASSERT(error);
                npf_rule_free(rl);
        }
        return error;
}

/*
 * npfctl_table: add, remove or query entries in the specified table.
 *
 * For maximum performance, the interface is using plain structures.
 */
int
npfctl_table(npf_t *npf, void *data)
{
        const npf_ioctl_table_t *nct = data;
        char tname[NPF_TABLE_MAXNAMELEN];
        npf_config_t *nc;
        npf_table_t *t;
        int error;

        error = copyinstr(nct->nct_name, tname, sizeof(tname), NULL);
        if (error) {
                return error;
        }

        nc = npf_config_enter(npf);
        if ((t = npf_tableset_getbyname(nc->tableset, tname)) == NULL) {
                npf_config_exit(npf);
                return EINVAL;
        }

        switch (nct->nct_cmd) {
        case NPF_CMD_TABLE_LOOKUP:
                error = npf_table_lookup(t, nct->nct_data.ent.alen,
                    &nct->nct_data.ent.addr);
                break;
        case NPF_CMD_TABLE_ADD:
                error = npf_table_insert(t, nct->nct_data.ent.alen,
                    &nct->nct_data.ent.addr, nct->nct_data.ent.mask);
                break;
        case NPF_CMD_TABLE_REMOVE:
                error = npf_table_remove(t, nct->nct_data.ent.alen,
                    &nct->nct_data.ent.addr, nct->nct_data.ent.mask);
                break;
        case NPF_CMD_TABLE_LIST:
                error = npf_table_list(t, nct->nct_data.buf.buf,
                    nct->nct_data.buf.len);
                break;
        case NPF_CMD_TABLE_FLUSH:
                error = npf_table_flush(t);
                break;
        default:
                error = EINVAL;
                break;
        }
        npf_table_gc(npf, t);
        npf_config_exit(npf);

        return error;
}

/*
 * npfctl_run_op: run a particular NPF operation with a given the request.
 *
 * => Checks the ABI version.
 * => Sets the error number for the response.
 */
int
npfctl_run_op(npf_t *npf, unsigned op, const nvlist_t *req, nvlist_t *resp)
{
        uint64_t ver;
        int error;

        ver = dnvlist_get_number(req, "version", UINT64_MAX);
        if (__predict_false(ver != UINT64_MAX && ver != NPF_VERSION)) {
                return EPROGMISMATCH;
        }
        switch (op) {
        case IOC_NPF_LOAD:
                error = npfctl_load(npf, req, resp);
                break;
        case IOC_NPF_SAVE:
                error = npfctl_save(npf, req, resp);
                break;
        case IOC_NPF_RULE:
                error = npfctl_rule(npf, req, resp);
                break;
        case IOC_NPF_CONN_LOOKUP:
                error = npf_conn_find(npf, req, resp);
                break;
        case IOC_NPF_TABLE_REPLACE:
                error = npfctl_table_replace(npf, req, resp);
                break;
        default:
                error = ENOTTY;
                break;
        }
        nvlist_add_number(resp, "errno", error);
        return error;
}









































































































































































































































    3 



    3 






































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
/*        $NetBSD: ustir.c,v 1.51 2022/03/12 21:15:25 riastradh Exp $        */

/*
 * Copyright (c) 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by David Sainty <dsainty@NetBSD.org>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ustir.c,v 1.51 2022/03/12 21:15:25 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/kmem.h>
#include <sys/conf.h>
#include <sys/file.h>
#include <sys/poll.h>
#include <sys/select.h>
#include <sys/proc.h>
#include <sys/kthread.h>

#ifdef USTIR_DEBUG_IOCTLS
#include <sys/ioctl.h>
#include <dev/usb/ustir.h>
#endif

#include <dev/usb/usb.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/ustirreg.h>

#include <dev/ir/ir.h>
#include <dev/ir/irdaio.h>
#include <dev/ir/irframevar.h>
#include <dev/ir/sir.h>

#ifdef USTIR_DEBUG
#define DPRINTFN(n,x)        if (ustirdebug>(n)) printf x
int        ustirdebug = 0;
#else
#define DPRINTFN(n,x)
#endif

/* Max size with framing. */
#define MAX_USTIR_OUTPUT_FRAME (2*IRDA_MAX_FRAME_SIZE + IRDA_MAX_EBOFS + STIR_OUTPUT_HEADER_SIZE + 4)

#define USTIR_NSPEEDS 9
struct ustir_speedrec {
        unsigned int speed;
        unsigned int config;
};

Static struct ustir_speedrec const ustir_speeds[USTIR_NSPEEDS] = {
        { 4000000, STIR_BRMODE_4000000 },
        { 1152000, STIR_BRMODE_1152000 },
        { 576000, STIR_BRMODE_576000 },
        { 115200, STIR_BRMODE_115200 },
        { 57600, STIR_BRMODE_57600 },
        { 38400, STIR_BRMODE_38400 },
        { 19200, STIR_BRMODE_19200 },
        { 9600, STIR_BRMODE_9600 },
        { 2400, STIR_BRMODE_2400 }
};

struct ustir_softc {
        device_t                sc_dev;
        struct usbd_device        *sc_udev;
        struct usbd_interface        *sc_iface;
        enum {
                USTIR_INIT_NONE,
                USTIR_INIT_INITED
        } sc_init_state;

        uint8_t                        *sc_ur_buf; /* Unencapsulated frame */
        u_int                        sc_ur_framelen;

        uint8_t                        *sc_rd_buf; /* Raw incoming data stream */
        size_t                        sc_rd_index;
        int                        sc_rd_addr;
        struct usbd_pipe        *sc_rd_pipe;
        struct usbd_xfer        *sc_rd_xfer;
        u_int                        sc_rd_count;
        int                        sc_rd_readinprogress;
        u_int                        sc_rd_expectdataticks;
        u_char                        sc_rd_err;
        struct framestate        sc_framestate;
        struct lwp                *sc_thread;
        struct selinfo                sc_rd_sel;

        uint8_t                        *sc_wr_buf;
        int                        sc_wr_addr;
        int                        sc_wr_stalewrite;
        struct usbd_xfer        *sc_wr_xfer;
        struct usbd_pipe        *sc_wr_pipe;
        struct selinfo                sc_wr_sel;

        enum {
                udir_input, /* Receiving data */
                udir_output, /* Transmitting data */
                udir_stalled, /* Error preventing data flow */
                udir_idle /* Neither receiving nor transmitting */
        } sc_direction;

        struct ustir_speedrec const *sc_speedrec;

        device_t                sc_child;
        struct irda_params        sc_params;

        int                        sc_refcnt;
        char                        sc_closing;
        char                        sc_dying;
};

/* True if we cannot safely read data from the device */
#define USTIR_BLOCK_RX_DATA(sc) ((sc)->sc_ur_framelen != 0)

#define USTIR_WR_TIMEOUT 200

Static int ustir_open(void *, int, int, struct lwp *);
Static int ustir_close(void *, int, int, struct lwp *);
Static int ustir_read(void *, struct uio *, int);
Static int ustir_write(void *, struct uio *, int);
Static int ustir_set_params(void *, struct irda_params *);
Static int ustir_get_speeds(void *, int *);
Static int ustir_get_turnarounds(void *, int *);
Static int ustir_poll(void *, int, struct lwp *);
Static int ustir_kqfilter(void *, struct knote *);

#ifdef USTIR_DEBUG_IOCTLS
Static int ustir_ioctl(void *, u_long, void *, int, struct lwp *);
#endif

Static struct irframe_methods const ustir_methods = {
        ustir_open, ustir_close, ustir_read, ustir_write, ustir_poll,
        ustir_kqfilter, ustir_set_params, ustir_get_speeds,
        ustir_get_turnarounds,
#ifdef USTIR_DEBUG_IOCTLS
        ustir_ioctl
#endif
};

Static void ustir_rd_cb(struct usbd_xfer *, void *, usbd_status);
Static usbd_status ustir_start_read(struct ustir_softc *);
Static void ustir_periodic(struct ustir_softc *);
Static void ustir_thread(void *);

static usbd_status
ustir_read_reg(struct ustir_softc *sc, unsigned int reg, uint8_t *data)
{
        usb_device_request_t req;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = STIR_CMD_READMULTIREG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 1);

        return usbd_do_request(sc->sc_udev, &req, data);
}

static usbd_status
ustir_write_reg(struct ustir_softc *sc, unsigned int reg, uint8_t data)
{
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = STIR_CMD_WRITESINGLEREG;
        USETW(req.wValue, data);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 0);

        return usbd_do_request(sc->sc_udev, &req, NULL);
}

#ifdef USTIR_DEBUG
static void
ustir_dumpdata(uint8_t const *data, size_t dlen, char const *desc)
{
        size_t bdindex;
        printf("%s: (%lx)", desc, (unsigned long)dlen);
        for (bdindex = 0; bdindex < dlen; bdindex++)
                printf(" %02x", (unsigned int)data[bdindex]);
        printf("\n");
}
#endif

static int ustir_match(device_t, cfdata_t, void *);
static void ustir_attach(device_t, device_t, void *);
static void ustir_childdet(device_t, device_t);
static int ustir_detach(device_t, int);
static int ustir_activate(device_t, enum devact);

CFATTACH_DECL2_NEW(ustir, sizeof(struct ustir_softc), ustir_match,
    ustir_attach, ustir_detach, ustir_activate, NULL, ustir_childdet);

static int
ustir_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        DPRINTFN(50,("ustir_match\n"));

        if (uaa->uaa_vendor == USB_VENDOR_SIGMATEL &&
            uaa->uaa_product == USB_PRODUCT_SIGMATEL_IRDA)
                return UMATCH_VENDOR_PRODUCT;

        return UMATCH_NONE;
}

static void
ustir_attach(device_t parent, device_t self, void *aux)
{
        struct ustir_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        struct usbd_interface *iface;
        char *devinfop;
        usb_endpoint_descriptor_t *ed;
        uint8_t epcount;
        int i;
        struct ir_attach_args ia;

        DPRINTFN(10,("ustir_attach: sc=%p\n", sc));

        sc->sc_dev = self;
        sc->sc_init_state = USTIR_INIT_NONE;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        if (usbd_set_config_index(dev, 0, 1)
            || usbd_device2interface_handle(dev, 0, &iface)) {
                aprint_error_dev(self, "Configuration failed\n");
                return;
        }

        sc->sc_udev = dev;
        sc->sc_iface = iface;

        epcount = 0;
        (void)usbd_endpoint_count(iface, &epcount);

        sc->sc_rd_addr = -1;
        sc->sc_wr_addr = -1;
        for (i = 0; i < epcount; i++) {
                ed = usbd_interface2endpoint_descriptor(iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self, "couldn't get ep %d\n", i);
                        return;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        sc->sc_rd_addr = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        sc->sc_wr_addr = ed->bEndpointAddress;
                }
        }
        if (sc->sc_rd_addr == -1 || sc->sc_wr_addr == -1) {
                aprint_error_dev(self, "missing endpoint\n");
                return;
        }

        DPRINTFN(10, ("ustir_attach: %p\n", sc->sc_udev));

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        ia.ia_type = IR_TYPE_IRFRAME;
        ia.ia_methods = &ustir_methods;
        ia.ia_handle = sc;

        sc->sc_child = config_found(self, &ia, ir_print, CFARGS_NONE);
        selinit(&sc->sc_rd_sel);
        selinit(&sc->sc_wr_sel);
        sc->sc_init_state = USTIR_INIT_INITED;

        return;
}

static void
ustir_childdet(device_t self, device_t child)
{
        struct ustir_softc *sc = device_private(self);

        KASSERT(sc->sc_child == child);
        sc->sc_child = NULL;
}

static int
ustir_detach(device_t self, int flags)
{
        struct ustir_softc *sc = device_private(self);
        int s;
        int rv = 0;

        DPRINTFN(0, ("ustir_detach: sc=%p flags=%d\n", sc, flags));

        sc->sc_closing = sc->sc_dying = 1;

        wakeup(&sc->sc_thread);

        while (sc->sc_thread != NULL)
                tsleep(&sc->sc_closing, PWAIT, "usircl", 0);

        /* Abort all pipes.  Causes processes waiting for transfer to wake. */
        if (sc->sc_rd_pipe != NULL) {
                usbd_abort_pipe(sc->sc_rd_pipe);
        }
        if (sc->sc_wr_pipe != NULL) {
                usbd_abort_pipe(sc->sc_wr_pipe);
        }
        if (sc->sc_rd_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_rd_xfer);
                sc->sc_rd_xfer = NULL;
                sc->sc_rd_buf = NULL;
        }
        if (sc->sc_wr_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_wr_xfer);
                sc->sc_wr_xfer = NULL;
                sc->sc_wr_buf = NULL;
        }
        if (sc->sc_rd_pipe != NULL) {
                usbd_close_pipe(sc->sc_rd_pipe);
                sc->sc_rd_pipe = NULL;
        }
        if (sc->sc_wr_pipe != NULL) {
                usbd_close_pipe(sc->sc_wr_pipe);
                sc->sc_wr_pipe = NULL;
        }
        wakeup(&sc->sc_ur_framelen);
        wakeup(&sc->sc_wr_buf);

        s = splusb();
        if (--sc->sc_refcnt >= 0) {
                /* Wait for processes to go away. */
                usb_detach_waitold(sc->sc_dev);
        }
        splx(s);

        if (sc->sc_child != NULL)
                rv = config_detach(sc->sc_child, flags);

        if (sc->sc_init_state >= USTIR_INIT_INITED) {
                usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev,
                    sc->sc_dev);
                seldestroy(&sc->sc_rd_sel);
                seldestroy(&sc->sc_wr_sel);
        }

        return rv;
}

/* Returns 0 if more data required, 1 if a complete frame was extracted */
static int
deframe_rd_ur(struct ustir_softc *sc)
{
        while (sc->sc_rd_index < sc->sc_rd_count) {
                uint8_t const *buf;
                size_t buflen;
                enum frameresult fresult;

                buf = &sc->sc_rd_buf[sc->sc_rd_index];
                buflen = sc->sc_rd_count - sc->sc_rd_index;

                fresult = deframe_process(&sc->sc_framestate, &buf, &buflen);

                sc->sc_rd_index = sc->sc_rd_count - buflen;

                DPRINTFN(1,("%s: result=%d\n", __func__, (int)fresult));

                switch (fresult) {
                case FR_IDLE:
                case FR_INPROGRESS:
                case FR_FRAMEBADFCS:
                case FR_FRAMEMALFORMED:
                case FR_BUFFEROVERRUN:
                        break;
                case FR_FRAMEOK:
                        sc->sc_ur_framelen = sc->sc_framestate.bufindex;
                        wakeup(&sc->sc_ur_framelen); /* XXX should use flag */
                        selnotify(&sc->sc_rd_sel, 0, 0);
                        return 1;
                }
        }

        /* Reset indices into USB-side buffer */
        sc->sc_rd_index = sc->sc_rd_count = 0;

        return 0;
}

/*
 * Direction transitions:
 *
 * ustir_periodic() can switch the direction from:
 *
 *        output -> idle
 *        output -> stalled
 *        stalled -> idle
 *        idle -> input
 *
 * ustir_rd_cb() can switch the direction from:
 *
 *        input -> stalled
 *        input -> idle
 *
 * ustir_write() can switch the direction from:
 *
 *        idle -> output
 */
Static void
ustir_periodic(struct ustir_softc *sc)
{
        DPRINTFN(60, ("%s: direction = %d\n",
                      __func__, sc->sc_direction));

        if (sc->sc_direction == udir_output ||
            sc->sc_direction == udir_stalled) {
                usbd_status err;
                uint8_t regval;

                DPRINTFN(60, ("%s: reading status register\n",
                              __func__));

                err = ustir_read_reg(sc, STIR_REG_STATUS,
                                     &regval);
                if (err != USBD_NORMAL_COMPLETION) {
                        aprint_error_dev(sc->sc_dev,
                            "status register read failed: %s\n",
                             usbd_errstr(err));
                } else {
                        DPRINTFN(10, ("%s: status register = %#x\n",
                                      __func__,
                                      (unsigned int)regval));
                        if (sc->sc_direction == udir_output &&
                            !(regval & STIR_RSTATUS_FFDIR))
                                /* Output has completed */
                                sc->sc_direction = udir_idle;
                        if (regval & STIR_RSTATUS_FFOVER) {
                                /*
                                 * On an overrun the FIFO hangs, and
                                 * any data bulk transfers will stall.
                                 * Reset the FIFO.
                                 */
                                sc->sc_direction = udir_stalled;

                                DPRINTFN(10, ("%s: clearing FIFO error\n",
                                              __func__));

                                err = ustir_write_reg(sc, STIR_REG_STATUS,
                                                      STIR_RSTATUS_FFCLR);
                                /* XXX if we fail partway through
                                 * this, we may not recover? */
                                if (err == USBD_NORMAL_COMPLETION)
                                        err = ustir_write_reg(sc,
                                                              STIR_REG_STATUS,
                                                              0);
                                if (err != USBD_NORMAL_COMPLETION) {
                                        aprint_error_dev(sc->sc_dev,
                                            "FIFO reset failed: %s\n",
                                            usbd_errstr(err));
                                } else {
                                        /* FIFO reset */
                                        sc->sc_direction = udir_idle;
                                }
                        }
                }
        }

        if (sc->sc_wr_stalewrite && sc->sc_direction == udir_idle) {
                /*
                 * In a stale write case, we need to check if the
                 * write has completed.  Once that has happened, the
                 * write is no longer stale.
                 *
                 * But note that we may immediately start a read poll...
                 */
                sc->sc_wr_stalewrite = 0;
                wakeup(&sc->sc_wr_buf);
        }

        if (!sc->sc_rd_readinprogress &&
            (sc->sc_direction == udir_idle ||
             sc->sc_direction == udir_input))
                /* Do a read poll if appropriate... */
                ustir_start_read(sc);
}

Static void
ustir_thread(void *arg)
{
        struct ustir_softc *sc = arg;

        DPRINTFN(20, ("%s: starting polling thread\n", __func__));

        while (!sc->sc_closing) {
                if (!sc->sc_rd_readinprogress && !USTIR_BLOCK_RX_DATA(sc))
                        ustir_periodic(sc);

                if (!sc->sc_closing) {
                        int error;
                        error = tsleep(&sc->sc_thread, PWAIT,
                                       "ustir", hz / 10);
                        if (error == EWOULDBLOCK &&
                            sc->sc_rd_expectdataticks > 0)
                                /*
                                 * After a timeout decrement the tick
                                 * counter within which time we expect
                                 * data to arrive if we are receiving
                                 * data...
                                 */
                                sc->sc_rd_expectdataticks--;
                }
        }

        DPRINTFN(20, ("%s: exiting polling thread\n", __func__));

        sc->sc_thread = NULL;

        wakeup(&sc->sc_closing);

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        kthread_exit(0);
}

Static void
ustir_rd_cb(struct usbd_xfer *xfer, void *priv,
            usbd_status status)
{
        struct ustir_softc *sc = priv;
        uint32_t size;

        DPRINTFN(60, ("%s: sc=%p\n", __func__, sc));

        /* Read is no longer in progress */
        sc->sc_rd_readinprogress = 0;

        if (status == USBD_CANCELLED || sc->sc_closing) /* this is normal */
                return;
        if (status) {
                size = 0;
                sc->sc_rd_err = 1;

                if (sc->sc_direction == udir_input ||
                    sc->sc_direction == udir_idle) {
                        /*
                         * Receive error, probably need to clear error
                         * condition.
                         */
                        sc->sc_direction = udir_stalled;
                }
        } else {
                usbd_get_xfer_status(xfer, NULL, NULL, &size, NULL);
        }

        sc->sc_rd_index = 0;
        sc->sc_rd_count = size;

        DPRINTFN(((size > 0 || sc->sc_rd_err != 0) ? 20 : 60),
                 ("%s: sc=%p size=%u, err=%d\n", __func__,
                  sc, size, sc->sc_rd_err));

#ifdef USTIR_DEBUG
        if (ustirdebug >= 20 && size > 0)
                ustir_dumpdata(sc->sc_rd_buf, size, __func__);
#endif

        if (!deframe_rd_ur(sc)) {
                if (!deframe_isclear(&sc->sc_framestate) && size == 0 &&
                    sc->sc_rd_expectdataticks == 0) {
                        /*
                         * Expected data, but didn't get it
                         * within expected time...
                         */
                        DPRINTFN(5,("%s: incoming packet timeout\n",
                                    __func__));
                        deframe_clear(&sc->sc_framestate);
                } else if (size > 0) {
                        /*
                         * If we also received actual data, reset the
                         * data read timeout and wake up the possibly
                         * sleeping thread...
                         */
                        sc->sc_rd_expectdataticks = 2;
                        wakeup(&sc->sc_thread);
                }
        }

        /*
         * Check if incoming data has stopped, or that we cannot
         * safely read any more data.  In the case of the latter we
         * must switch to idle so that a write will not block...
         */
        if (sc->sc_direction == udir_input &&
            ((size == 0 && sc->sc_rd_expectdataticks == 0) ||
             USTIR_BLOCK_RX_DATA(sc))) {
                DPRINTFN(8,("%s: idling on packet timeout, "
                            "complete frame, or no data\n", __func__));
                sc->sc_direction = udir_idle;

                /* Wake up for possible output */
                wakeup(&sc->sc_wr_buf);
                selnotify(&sc->sc_wr_sel, 0, 0);
        }
}

Static usbd_status
ustir_start_read(struct ustir_softc *sc)
{
        usbd_status err;

        DPRINTFN(60,("%s: sc=%p, size=%d\n", __func__, sc,
                     sc->sc_params.maxsize));

        if (sc->sc_dying)
                return USBD_IOERROR;

        if (USTIR_BLOCK_RX_DATA(sc) || deframe_rd_ur(sc)) {
                /*
                 * Can't start reading just yet.  Since we aren't
                 * going to start a read, have to switch direction to
                 * idle.
                 */
                sc->sc_direction = udir_idle;
                return USBD_NORMAL_COMPLETION;
        }

        /* Starting a read... */
        sc->sc_rd_readinprogress = 1;
        sc->sc_direction = udir_input;

        if (sc->sc_rd_err) {
                sc->sc_rd_err = 0;
                DPRINTFN(0, ("%s: clear stall\n", __func__));
                usbd_clear_endpoint_stall(sc->sc_rd_pipe);
        }

        usbd_setup_xfer(sc->sc_rd_xfer, sc, sc->sc_rd_buf,
            sc->sc_params.maxsize, USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT,
            ustir_rd_cb);
        err = usbd_transfer(sc->sc_rd_xfer);
        if (err != USBD_IN_PROGRESS) {
                DPRINTFN(0, ("%s: err=%d\n", __func__, (int)err));
                return err;
        }
        return USBD_NORMAL_COMPLETION;
}

Static int
ustir_activate(device_t self, enum devact act)
{
        struct ustir_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

/* ARGSUSED */
Static int
ustir_open(void *h, int flag, int mode,
    struct lwp *l)
{
        struct ustir_softc *sc = h;
        int error;
        usbd_status err;

        DPRINTFN(0, ("%s: sc=%p\n", __func__, sc));

        err = usbd_open_pipe(sc->sc_iface, sc->sc_rd_addr, 0, &sc->sc_rd_pipe);
        if (err != USBD_NORMAL_COMPLETION) {
                error = EIO;
                goto bad1;
        }
        err = usbd_open_pipe(sc->sc_iface, sc->sc_wr_addr, 0, &sc->sc_wr_pipe);
        if (err != USBD_NORMAL_COMPLETION) {
                error = EIO;
                goto bad2;
        }
        error = usbd_create_xfer(sc->sc_rd_pipe, IRDA_MAX_FRAME_SIZE,
            0, 0, &sc->sc_rd_xfer);
        if (error)
                goto bad3;
        sc->sc_rd_buf = usbd_get_buffer(sc->sc_rd_xfer);

        error = usbd_create_xfer(sc->sc_wr_pipe,
            IRDA_MAX_FRAME_SIZE + STIR_OUTPUT_HEADER_SIZE,
            USBD_FORCE_SHORT_XFER, 0, &sc->sc_wr_xfer);
        if (error)
                goto bad4;
        sc->sc_wr_buf = usbd_get_buffer(sc->sc_wr_xfer);

        sc->sc_ur_buf = kmem_alloc(IRDA_MAX_FRAME_SIZE, KM_SLEEP);
        sc->sc_rd_index = sc->sc_rd_count = 0;
        sc->sc_closing = 0;
        sc->sc_rd_readinprogress = 0;
        sc->sc_rd_expectdataticks = 0;
        sc->sc_ur_framelen = 0;
        sc->sc_rd_err = 0;
        sc->sc_wr_stalewrite = 0;
        sc->sc_speedrec = NULL;
        sc->sc_direction = udir_idle;
        sc->sc_params.speed = 0;
        sc->sc_params.ebofs = 0;
        sc->sc_params.maxsize = IRDA_MAX_FRAME_SIZE;

        deframe_init(&sc->sc_framestate, sc->sc_ur_buf, IRDA_MAX_FRAME_SIZE);

        /* Increment reference for thread */
        sc->sc_refcnt++;

        error = kthread_create(PRI_NONE, 0, NULL, ustir_thread, sc,
            &sc->sc_thread, "%s", device_xname(sc->sc_dev));
        if (error) {
                sc->sc_refcnt--;
                goto bad5;
        }

        return 0;

 bad5:
        usbd_destroy_xfer(sc->sc_wr_xfer);
        sc->sc_wr_xfer = NULL;
 bad4:
        usbd_destroy_xfer(sc->sc_rd_xfer);
        sc->sc_rd_xfer = NULL;
 bad3:
        usbd_close_pipe(sc->sc_wr_pipe);
        sc->sc_wr_pipe = NULL;
 bad2:
        usbd_close_pipe(sc->sc_rd_pipe);
        sc->sc_rd_pipe = NULL;
 bad1:
        return error;
}

/* ARGSUSED */
Static int
ustir_close(void *h, int flag, int mode,
    struct lwp *l)
{
        struct ustir_softc *sc = h;

        DPRINTFN(0, ("%s: sc=%p\n", __func__, sc));

        sc->sc_refcnt++;

        sc->sc_rd_readinprogress = 1;
        sc->sc_closing = 1;

        wakeup(&sc->sc_thread);

        while (sc->sc_thread != NULL)
                tsleep(&sc->sc_closing, PWAIT, "usircl", 0);

        if (sc->sc_rd_pipe != NULL) {
                usbd_abort_pipe(sc->sc_rd_pipe);
                sc->sc_rd_pipe = NULL;
        }
        if (sc->sc_wr_pipe != NULL) {
                usbd_abort_pipe(sc->sc_wr_pipe);
                sc->sc_wr_pipe = NULL;
        }
        if (sc->sc_rd_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_rd_xfer);
                sc->sc_rd_xfer = NULL;
                sc->sc_rd_buf = NULL;
        }
        if (sc->sc_wr_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_wr_xfer);
                sc->sc_wr_xfer = NULL;
                sc->sc_wr_buf = NULL;
        }
        if (sc->sc_ur_buf != NULL) {
                kmem_free(sc->sc_ur_buf, IRDA_MAX_FRAME_SIZE);
                sc->sc_ur_buf = NULL;
        }
        if (sc->sc_rd_pipe != NULL) {
                usbd_close_pipe(sc->sc_rd_pipe);
                sc->sc_rd_pipe = NULL;
        }
        if (sc->sc_wr_pipe != NULL) {
                usbd_close_pipe(sc->sc_wr_pipe);
                sc->sc_wr_pipe = NULL;
        }

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        return 0;
}

/* ARGSUSED */
Static int
ustir_read(void *h, struct uio *uio, int flag)
{
        struct ustir_softc *sc = h;
        int s;
        int error;
        u_int uframelen;

        DPRINTFN(1,("%s: sc=%p\n", __func__, sc));

        if (sc->sc_dying)
                return EIO;

#ifdef DIAGNOSTIC
        if (sc->sc_rd_buf == NULL)
                return EINVAL;
#endif

        sc->sc_refcnt++;

        if (!sc->sc_rd_readinprogress && !USTIR_BLOCK_RX_DATA(sc))
                /* Possibly wake up polling thread */
                wakeup(&sc->sc_thread);

        do {
                s = splusb();
                while (sc->sc_ur_framelen == 0) {
                        DPRINTFN(5,("%s: calling tsleep()\n", __func__));
                        error = tsleep(&sc->sc_ur_framelen, PZERO | PCATCH,
                                       "usirrd", 0);
                        if (sc->sc_dying)
                                error = EIO;
                        if (error) {
                                splx(s);
                                DPRINTFN(0, ("%s: tsleep() = %d\n",
                                             __func__, error));
                                goto ret;
                        }
                }
                splx(s);

                uframelen = sc->sc_ur_framelen;
                DPRINTFN(1,("%s: sc=%p framelen=%u, hdr=0x%02x\n",
                            __func__, sc, uframelen, sc->sc_ur_buf[0]));
                if (uframelen > uio->uio_resid)
                        error = EINVAL;
                else
                        error = uiomove(sc->sc_ur_buf, uframelen, uio);
                sc->sc_ur_framelen = 0;

                if (!deframe_rd_ur(sc) && uframelen > 0) {
                        /*
                         * Need to wait for another read to obtain a
                         * complete frame...  If we also obtained
                         * actual data, wake up the possibly sleeping
                         * thread immediately...
                         */
                        wakeup(&sc->sc_thread);
                }
        } while (uframelen == 0);

        DPRINTFN(1,("%s: return %d\n", __func__, error));

 ret:
        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);
        return error;
}

/* ARGSUSED */
Static int
ustir_write(void *h, struct uio *uio, int flag)
{
        struct ustir_softc *sc = h;
        usbd_status err;
        uint32_t wrlen;
        int error, sirlength;
        uint8_t *wrbuf;
        int s;

        DPRINTFN(1,("%s: sc=%p\n", __func__, sc));

        if (sc->sc_dying)
                return EIO;

#ifdef DIAGNOSTIC
        if (sc->sc_wr_buf == NULL)
                return EINVAL;
#endif

        wrlen = uio->uio_resid;
        if (wrlen > sc->sc_params.maxsize)
                return EINVAL;

        sc->sc_refcnt++;

        if (!USTIR_BLOCK_RX_DATA(sc)) {
                /*
                 * If reads are not blocked, determine what action we
                 * should potentially take...
                 */
                if (sc->sc_direction == udir_output) {
                        /*
                         * If the last operation was an output, wait for the
                         * polling thread to check for incoming data.
                         */
                        sc->sc_wr_stalewrite = 1;
                        wakeup(&sc->sc_thread);
                } else if (!sc->sc_rd_readinprogress &&
                           (sc->sc_direction == udir_idle ||
                            sc->sc_direction == udir_input)) {
                        /* If idle, check for input before outputting */
                        ustir_start_read(sc);
                }
        }

        s = splusb();
        while (sc->sc_wr_stalewrite ||
               (sc->sc_direction != udir_output &&
                sc->sc_direction != udir_idle)) {
                DPRINTFN(5, ("%s: sc=%p stalewrite=%d direction=%d, "
                             "calling tsleep()\n", __func__,
                             sc, sc->sc_wr_stalewrite, sc->sc_direction));
                error = tsleep(&sc->sc_wr_buf, PZERO | PCATCH,
                               "usirwr", 0);
                if (sc->sc_dying)
                        error = EIO;
                if (error) {
                        splx(s);
                        DPRINTFN(0, ("%s: tsleep() = %d\n", __func__,
                                     error));
                        goto ret;
                }
        }
        splx(s);

        wrbuf = sc->sc_wr_buf;

        /* Build header */
        wrbuf[0] = STIR_OUTPUT_HEADER_BYTE0;
        wrbuf[1] = STIR_OUTPUT_HEADER_BYTE1;

        sirlength = irda_sir_frame(&wrbuf[STIR_OUTPUT_HEADER_SIZE],
                                   MAX_USTIR_OUTPUT_FRAME -
                                   STIR_OUTPUT_HEADER_SIZE,
                                   uio, sc->sc_params.ebofs);
        if (sirlength < 0) {
                error = -sirlength;
        } else {
                uint32_t btlen;

                DPRINTFN(1, ("%s: transfer %u bytes\n", __func__,
                             (unsigned int)wrlen));

                wrbuf[2] = sirlength & 0xff;
                wrbuf[3] = (sirlength >> 8) & 0xff;

                btlen = STIR_OUTPUT_HEADER_SIZE + sirlength;

                sc->sc_direction = udir_output;

#ifdef USTIR_DEBUG
                if (ustirdebug >= 20)
                        ustir_dumpdata(wrbuf, btlen, __func__);
#endif

                err = usbd_bulk_transfer(sc->sc_wr_xfer, sc->sc_wr_pipe,
                    USBD_FORCE_SHORT_XFER, USTIR_WR_TIMEOUT, wrbuf, &btlen);
                DPRINTFN(2, ("%s: err=%d\n", __func__, err));
                if (err != USBD_NORMAL_COMPLETION) {
                        if (err == USBD_INTERRUPTED)
                                error = EINTR;
                        else if (err == USBD_TIMEOUT)
                                error = ETIMEDOUT;
                        else
                                error = EIO;
                } else {
                        error = 0;
                }
        }

 ret:
        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        DPRINTFN(1,("%s: sc=%p done\n", __func__, sc));
        return error;
}

Static int
ustir_poll(void *h, int events, struct lwp *l)
{
        struct ustir_softc *sc = h;
        int revents = 0;

        DPRINTFN(1,("%s: sc=%p\n", __func__, sc));

        if (events & (POLLOUT | POLLWRNORM)) {
                if (sc->sc_direction != udir_input) {
                        revents |= events & (POLLOUT | POLLWRNORM);
                } else {
                        DPRINTFN(2,("%s: recording write select\n",
                                    __func__));
                        selrecord(l, &sc->sc_wr_sel);
                }
        }

        if (events & (POLLIN | POLLRDNORM)) {
                if (sc->sc_ur_framelen != 0) {
                        DPRINTFN(2,("%s: have data\n", __func__));
                        revents |= events & (POLLIN | POLLRDNORM);
                } else {
                        DPRINTFN(2,("%s: recording read select\n",
                                    __func__));
                        selrecord(l, &sc->sc_rd_sel);
                }
        }

        return revents;
}

static void
filt_ustirrdetach(struct knote *kn)
{
        struct ustir_softc *sc = kn->kn_hook;
        int s;

        s = splusb();
        selremove_knote(&sc->sc_rd_sel, kn);
        splx(s);
}

/* ARGSUSED */
static int
filt_ustirread(struct knote *kn, long hint)
{
        struct ustir_softc *sc = kn->kn_hook;

        kn->kn_data = sc->sc_ur_framelen;
        return kn->kn_data > 0;
}

static void
filt_ustirwdetach(struct knote *kn)
{
        struct ustir_softc *sc = kn->kn_hook;
        int s;

        s = splusb();
        selremove_knote(&sc->sc_wr_sel, kn);
        splx(s);
}

/* ARGSUSED */
static int
filt_ustirwrite(struct knote *kn, long hint)
{
        struct ustir_softc *sc = kn->kn_hook;

        kn->kn_data = 0;
        return sc->sc_direction != udir_input;
}

static const struct filterops ustirread_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_ustirrdetach,
        .f_event = filt_ustirread,
};

static const struct filterops ustirwrite_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_ustirwdetach,
        .f_event = filt_ustirwrite,
};

Static int
ustir_kqfilter(void *h, struct knote *kn)
{
        struct ustir_softc *sc = h;
        struct selinfo *sip;
        int s;

        switch (kn->kn_filter) {
        case EVFILT_READ:
                sip = &sc->sc_rd_sel;
                kn->kn_fop = &ustirread_filtops;
                break;
        case EVFILT_WRITE:
                sip = &sc->sc_wr_sel;
                kn->kn_fop = &ustirwrite_filtops;
                break;
        default:
                return EINVAL;
        }

        kn->kn_hook = sc;

        s = splusb();
        selrecord_knote(sip, kn);
        splx(s);

        return 0;
}

#ifdef USTIR_DEBUG_IOCTLS
Static int ustir_ioctl(void *h, u_long cmd, void *addr, int flag, struct lwp *l)
{
        struct ustir_softc *sc = h;
        int error;
        unsigned int regnum;
        usbd_status err;
        uint8_t regdata = 0;

        if (sc->sc_dying)
                return EIO;

        sc->sc_refcnt++;

        error = 0;
        switch (cmd) {
        case USTIR_READ_REGISTER:
                regnum = *(unsigned int *)addr;

                if (regnum > STIR_MAX_REG) {
                        error = EINVAL;
                        break;
                }

                err = ustir_read_reg(sc, regnum, &regdata);

                DPRINTFN(10, ("%s: regget(%u) = %#x\n", __func__,
                              regnum, (unsigned int)regdata));

                *(unsigned int *)addr = regdata;
                if (err != USBD_NORMAL_COMPLETION) {
                        printf("%s: register read failed: %s\n",
                               device_xname(sc->sc_dev),
                               usbd_errstr(err));
                        error = EIO;
                }
                break;

        case USTIR_WRITE_REGISTER:
                regnum = *(unsigned int *)addr;
                regdata = (regnum >> 8) & 0xff;
                regnum = regnum & 0xff;

                if (regnum > STIR_MAX_REG) {
                        error = EINVAL;
                        break;
                }

                DPRINTFN(10, ("%s: regset(%u, %#x)\n", __func__,
                              regnum, (unsigned int)regdata));

                err = ustir_write_reg(sc, regnum, regdata);
                if (err != USBD_NORMAL_COMPLETION) {
                        printf("%s: register write failed: %s\n",
                               device_xname(sc->sc_dev),
                               usbd_errstr(err));
                        error = EIO;
                }
                break;

        case USTIR_DEBUG_LEVEL:
#ifdef USTIR_DEBUG
                ustirdebug = *(int *)addr;
#endif
                break;

        case USTIR_DEBUG_OPERATION:
                break;

        default:
                error = EINVAL;
                break;
        }

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        return error;
}
#endif

Static int
ustir_set_params(void *h, struct irda_params *p)
{
        struct ustir_softc *sc = h;
        struct ustir_speedrec const *speedblk;
        int i;

        DPRINTFN(0, ("%s: sc=%p, speed=%d ebofs=%d maxsize=%d\n", __func__,
                     sc, p->speed, p->ebofs, p->maxsize));

        if (sc->sc_dying)
                return EIO;

        speedblk = NULL;

        if (sc->sc_speedrec == NULL || p->speed != sc->sc_speedrec->speed) {
                /* find speed */
                for (i = 0; i < USTIR_NSPEEDS; i++) {
                        if (ustir_speeds[i].speed == p->speed) {
                                speedblk = &ustir_speeds[i];
                                goto found2;
                        }
                }
                /* no good value found */
                return EINVAL;
        found2:
                ;
        }
        if (p->maxsize != sc->sc_params.maxsize) {
                if (p->maxsize > IRDA_MAX_FRAME_SIZE)
                        return EINVAL;
                sc->sc_params.maxsize = p->maxsize;
        }

        sc->sc_params = *p;

        if (speedblk != NULL) {
                usbd_status err;
                uint8_t regmode;
                uint8_t regbrate;

                sc->sc_speedrec = speedblk;

                regmode = STIR_BRMODE_MODEREG(speedblk->config);
                regbrate = STIR_BRMODE_BRATEREG(speedblk->config);

                /*
                 * FFSPRST must be set to enable the FIFO.
                 */
                regmode |= STIR_RMODE_FFSPRST;

                DPRINTFN(10, ("%s: setting BRATE = %x\n", __func__,
                              (unsigned int)regbrate));
                err = ustir_write_reg(sc, STIR_REG_BRATE, regbrate);
                if (err == USBD_NORMAL_COMPLETION) {
                        DPRINTFN(10, ("%s: setting MODE = %x\n", __func__,
                                      (unsigned int)regmode));
                        err = ustir_write_reg(sc, STIR_REG_MODE, regmode);
                }
                if (err != USBD_NORMAL_COMPLETION) {
                        DPRINTFN(10, ("%s: error setting register: %s\n",
                                      __func__, usbd_errstr(err)));
                        return EIO;
                }
        }

        return 0;
}

Static int
ustir_get_speeds(void *h, int *speeds)
{
        struct ustir_softc *sc = h;

        DPRINTFN(0, ("%s: sc=%p\n", __func__, sc));

        if (sc->sc_dying)
                return EIO;

        /* All these speeds are supported */
        *speeds = IRDA_SPEED_4000000 |
                IRDA_SPEED_1152000 |
                IRDA_SPEED_576000 |
                IRDA_SPEED_115200 |
                IRDA_SPEED_57600 |
                IRDA_SPEED_38400 |
                IRDA_SPEED_19200 |
                IRDA_SPEED_9600 |
                IRDA_SPEED_2400;

        return 0;
}

Static int
ustir_get_turnarounds(void *h, int *turnarounds)
{
        struct ustir_softc *sc = h;

        DPRINTFN(0, ("%s: sc=%p\n", __func__, sc));

        if (sc->sc_dying)
                return EIO;

        /*
         * Documentation is on the light side with respect to
         * turnaround time for this device.
         */
        *turnarounds = IRDA_TURNT_10000;

        return 0;
}

















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
/*        $NetBSD: mutex.h,v 1.17 2021/12/19 11:33:31 riastradh Exp $        */

/*-
 * Copyright (c) 2013 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _LINUX_MUTEX_H_
#define _LINUX_MUTEX_H_

#include <sys/mutex.h>

#include <lib/libkern/libkern.h> /* KASSERT */

#include <asm/processor.h>

#include <linux/list.h>
#include <linux/spinlock.h>

struct mutex {
        kmutex_t mtx_lock;
};

struct lock_class_key {
};

/* Name collision.  Pooh.  */
static inline void
linux_mutex_init(struct mutex *mutex)
{
        mutex_init(&mutex->mtx_lock, MUTEX_DEFAULT, IPL_NONE);
}

/* Lockdep stuff.  */
static inline void
__mutex_init(struct mutex *mutex, const char *name __unused,
    struct lock_class_key *key __unused)
{
        linux_mutex_init(mutex);
}

/* Another name collision.  */
static inline void
linux_mutex_destroy(struct mutex *mutex)
{
        mutex_destroy(&mutex->mtx_lock);
}

static inline void
mutex_lock(struct mutex *mutex)
{
        mutex_enter(&mutex->mtx_lock);
}

static inline int
mutex_lock_interruptible(struct mutex *mutex)
{
        mutex_enter(&mutex->mtx_lock); /* XXX */
        return 0;
}

static inline int
mutex_trylock(struct mutex *mutex)
{
        return mutex_tryenter(&mutex->mtx_lock);
}

static inline void
mutex_unlock(struct mutex *mutex)
{
        mutex_exit(&mutex->mtx_lock);
}

static inline bool
mutex_is_locked(struct mutex *mutex)
{
        return mutex_owned(&mutex->mtx_lock);
}

static inline void
mutex_lock_nest_lock(struct mutex *mutex, struct mutex *already)
{

        KASSERT(mutex_is_locked(already));
        mutex_lock(mutex);
}

static inline void
mutex_lock_nested(struct mutex *mutex, unsigned subclass __unused)
{
        mutex_lock(mutex);
}

static inline int
mutex_lock_interruptible_nested(struct mutex *mutex,
    unsigned subclass __unused)
{
        return mutex_lock_interruptible(mutex);
}

/*
 * `recursive locking is bad, do not use this ever.'
 * -- linux/scripts/checkpath.pl
 */
static inline enum {
        MUTEX_TRYLOCK_FAILED,
        MUTEX_TRYLOCK_SUCCESS,
        MUTEX_TRYLOCK_RECURSIVE,
}
mutex_trylock_recursive(struct mutex *mutex)
{
        if (mutex_owned(&mutex->mtx_lock))
                return MUTEX_TRYLOCK_RECURSIVE;
        else if (mutex_tryenter(&mutex->mtx_lock))
                return MUTEX_TRYLOCK_SUCCESS;
        else
                return MUTEX_TRYLOCK_FAILED;
}

#endif  /* _LINUX_MUTEX_H_ */





































































































































































   18 
   17 

    4 





   14 






























   28 



























   29 




   28 






   29 















    2 

    2 

    2 

    2 







   27 

   29 

   29 



   29 



   29 

















   29 









   28 










   27 


   29 






















   28 


















































   29 
   29 

   27 


   29 























   29 





   29 





   29 






    6 



   29 







    2 











    2 

   29 
    2 


    2 



   29 


















   29 











    2 


    2 






   28 


   25 


   26 














   25 

   26 

   25 
   26 









   26 




   26 




   26 



   26 


   26 




   26 





   25 





















   26 
   16 
   16 

   15 





   26 

































































   26 
   20 







   26 









   26 

















   26 
   26 
    5 
   26 

   26 










   26 










   18 





   12 



    5 





   18 
   18 



   17 






















    1 




































   18 






   18 



   18 

   18 



    6 



    6 


   18 
   17 






   18 







































































































































































   28 


    6 

   28 


   28 

   25 










    4 



    1 

    1 
    1 
    1 



























































































































































































   26 
   25 
    1 
   26 

    9 
   18 










   18 

















   26 













  152 





  152 

  153 



   96 

  148 

    7 





    4 
    5 

    1 













  141 
























    4 

















   22 


   22 

    2 



    1 


























    2 




    2 








    2 




    2 











    2 




    2 





    2 




    2 





    2 




    2 
















    2 




    2 



    2 









    2 
    2 








    2 
    2 







    2 






    2 




    2 

















    3 


    3 

    2 












    7 




    7 















   10 



   10 

    2 


    2 






    2 






    2 


    2 






    2 


    2 

















   26 





   26 






   26 




   26 

















    2 



    2 

    1 









    1 










    1 



    1 





    1 
    1 














   43 




























   12 


    1 







    1 



    2 



    1 



    1 



    1 



    1 



    1 



    1 





    1 


    1 






    1 









   12 





    2 
    1 







    2 








    1 


    1 


    1 






    1 








    5 

    1 


    1 


    1 


    1 


    1 


    5 













   20 












    1 





    1 



























    6 






    6 
    6 


    6 












    6 

    5 


    4 


    3 

    1 


    2 



    1 


    1 



























    5 






    5 








    5 



    5 

    5 




    1 




    4 






    4 


    3 



    1 












   40 













   37 
   34 








   35 
















    2 



    2 








    2 






    2 






    1 






    2 






    3 


    1 



    2 





    1 
    1 





    3 


    3 




    1 












   19 

   19 





   12 
   10 
    2 
   10 

    4 
    9 
    4 

    4 
    1 


   10 

    4 
   11 
    1 
   11 

    4 
   10 
    1 
   10 

    4 
   10 

   10 


    4 
    9 

    9 

























































































   10 








    9 





    6 



    1 


    3 


    3 







    6 





    2 








    2 



    2 

   10 
    2 
    2 
    2 






    4 



    4 


    1 









   17 









   18 




   15 


   14 






   17 






    6 



    4 

    3 
    1 



    2 
    1 




    1 
    2 









    2 





    1 
    2 
    1 


    1 








    1 


    1 



    1 




    7 








    4 




    3 







    3 




    3 







    3 
    2 
    1 











    3 


    3 
    3 










    4 



    4 







    4 
    2 
    1 



    2 



    1 

    1 






























    2 





    3 
    2 
    2 












    2 













   14 
   13 
   12 
   11 
    8 



   17 








    4 





    1 








    1 
    1 

    3 





    1 











    4 










   19 



    1 



    1 











    8 
    8 






















    8 



    8 



    8 


    8 
    7 

    5 



    4 























   39 











   35 
   35 




























   38 







    9 










    8 
    3 
    2 
    1 



    7 
    4 




    4 

    2 


    1 


    1 












    3 
    3 




    3 














    1 













    2 


    2 






    2 

    2 



    1 







    5 





    5 





    4 


    2 




    1 






































    5 





    5 





    3 


    2 








    1 














    5 





    5 
    2 




    3 


    2 






    1 



















    1 







    1 



    1 












    2 



























    3 

    3 

    1 



    2 



    4 


    2 




    2 

    3 

























   38 






















































































    3 






    1 
    1 
    1 
    1 
    3 









































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
/*        $NetBSD: ip6_output.c,v 1.229 2021/09/21 15:07:43 christos Exp $        */
/*        $KAME: ip6_output.c,v 1.172 2001/03/25 09:55:56 itojun Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ip_output.c        8.3 (Berkeley) 1/21/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ip6_output.c,v 1.229 2021/09/21 15:07:43 christos Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
#endif

#include <sys/param.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/errno.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kauth.h>

#include <net/if.h>
#include <net/route.h>
#include <net/pfil.h>

#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/ip6.h>
#include <netinet/ip_var.h>
#include <netinet/icmp6.h>
#include <netinet/in_offload.h>
#include <netinet/portalgo.h>
#include <netinet6/in6_offload.h>
#include <netinet6/ip6_var.h>
#include <netinet6/ip6_private.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/nd6.h>
#include <netinet6/ip6protosw.h>
#include <netinet6/scope6_var.h>

#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
#include <netipsec/key.h>
#endif

extern pfil_head_t *inet6_pfil_hook;        /* XXX */

struct ip6_exthdrs {
        struct mbuf *ip6e_ip6;
        struct mbuf *ip6e_hbh;
        struct mbuf *ip6e_dest1;
        struct mbuf *ip6e_rthdr;
        struct mbuf *ip6e_dest2;
};

static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
        kauth_cred_t, int);
static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, kauth_cred_t,
        int, int, int);
static int ip6_setmoptions(const struct sockopt *, struct in6pcb *);
static int ip6_getmoptions(struct sockopt *, struct in6pcb *);
static int ip6_copyexthdr(struct mbuf **, void *, int);
static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
        struct ip6_frag **);
static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
static int ip6_getpmtu(struct rtentry *, struct ifnet *, u_long *, int *);
static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
static int ip6_ifaddrvalid(const struct in6_addr *, const struct in6_addr *);
static int ip6_handle_rthdr(struct ip6_rthdr *, struct ip6_hdr *);

#ifdef RFC2292
static int ip6_pcbopts(struct ip6_pktopts **, struct socket *, struct sockopt *);
#endif

static int
ip6_handle_rthdr(struct ip6_rthdr *rh, struct ip6_hdr *ip6)
{
        int error = 0;

        switch (rh->ip6r_type) {
        case IPV6_RTHDR_TYPE_0:
                /* Dropped, RFC5095. */
        default:        /* is it possible? */
                error = EINVAL;
        }

        return error;
}

/*
 * Send an IP packet to a host.
 */
int
ip6_if_output(struct ifnet * const ifp, struct ifnet * const origifp,
    struct mbuf * const m, const struct sockaddr_in6 * const dst,
    const struct rtentry *rt)
{
        int error = 0;

        if (rt != NULL) {
                error = rt_check_reject_route(rt, ifp);
                if (error != 0) {
                        IP6_STATINC(IP6_STAT_RTREJECT);
                        m_freem(m);
                        return error;
                }
        }

        if ((ifp->if_flags & IFF_LOOPBACK) != 0)
                error = if_output_lock(ifp, origifp, m, sin6tocsa(dst), rt);
        else
                error = if_output_lock(ifp, ifp, m, sin6tocsa(dst), rt);
        return error;
}

/*
 * IP6 output. The packet in mbuf chain m contains a skeletal IP6
 * header (with pri, len, nxt, hlim, src, dst).
 *
 * This function may modify ver and hlim only. The mbuf chain containing the
 * packet will be freed. The mbuf opt, if present, will not be freed.
 *
 * Type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
 * nd_ifinfo.linkmtu is u_int32_t. So we use u_long to hold largest one,
 * which is rt_rmx.rmx_mtu.
 */
int
ip6_output(
    struct mbuf *m0,
    struct ip6_pktopts *opt,
    struct route *ro,
    int flags,
    struct ip6_moptions *im6o,
    struct in6pcb *in6p,
    struct ifnet **ifpp                /* XXX: just for statistics */
)
{
        struct ip6_hdr *ip6, *mhip6;
        struct ifnet *ifp = NULL, *origifp = NULL;
        struct mbuf *m = m0;
        int tlen, len, off;
        bool tso;
        struct route ip6route;
        struct rtentry *rt = NULL, *rt_pmtu;
        const struct sockaddr_in6 *dst;
        struct sockaddr_in6 src_sa, dst_sa;
        int error = 0;
        struct in6_ifaddr *ia = NULL;
        u_long mtu;
        int alwaysfrag, dontfrag;
        u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
        struct ip6_exthdrs exthdrs;
        struct in6_addr finaldst, src0, dst0;
        u_int32_t zone;
        struct route *ro_pmtu = NULL;
        int hdrsplit = 0;
        int needipsec = 0;
#ifdef IPSEC
        struct secpolicy *sp = NULL;
#endif
        struct psref psref, psref_ia;
        int bound = curlwp_bind();
        bool release_psref_ia = false;

#ifdef DIAGNOSTIC
        if ((m->m_flags & M_PKTHDR) == 0)
                panic("ip6_output: no HDR");
        if ((m->m_pkthdr.csum_flags &
            (M_CSUM_TCPv4|M_CSUM_UDPv4|M_CSUM_TSOv4)) != 0) {
                panic("ip6_output: IPv4 checksum offload flags: %d",
                    m->m_pkthdr.csum_flags);
        }
        if ((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) ==
            (M_CSUM_TCPv6|M_CSUM_UDPv6)) {
                panic("ip6_output: conflicting checksum offload flags: %d",
                    m->m_pkthdr.csum_flags);
        }
#endif

        M_CSUM_DATA_IPv6_SET(m->m_pkthdr.csum_data, sizeof(struct ip6_hdr));

#define MAKE_EXTHDR(hp, mp)                                                \
    do {                                                                \
        if (hp) {                                                        \
                struct ip6_ext *eh = (struct ip6_ext *)(hp);                \
                error = ip6_copyexthdr((mp), (void *)(hp),                 \
                    ((eh)->ip6e_len + 1) << 3);                                \
                if (error)                                                \
                        goto freehdrs;                                        \
        }                                                                \
    } while (/*CONSTCOND*/ 0)

        memset(&exthdrs, 0, sizeof(exthdrs));
        if (opt) {
                /* Hop-by-Hop options header */
                MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
                /* Destination options header (1st part) */
                MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
                /* Routing header */
                MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
                /* Destination options header (2nd part) */
                MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
        }

        /*
         * Calculate the total length of the extension header chain.
         * Keep the length of the unfragmentable part for fragmentation.
         */
        optlen = 0;
        if (exthdrs.ip6e_hbh)
                optlen += exthdrs.ip6e_hbh->m_len;
        if (exthdrs.ip6e_dest1)
                optlen += exthdrs.ip6e_dest1->m_len;
        if (exthdrs.ip6e_rthdr)
                optlen += exthdrs.ip6e_rthdr->m_len;
        unfragpartlen = optlen + sizeof(struct ip6_hdr);
        /* NOTE: we don't add AH/ESP length here. do that later. */
        if (exthdrs.ip6e_dest2)
                optlen += exthdrs.ip6e_dest2->m_len;

#ifdef IPSEC
        if (ipsec_used) {
                /* Check the security policy (SP) for the packet */
                sp = ipsec6_check_policy(m, in6p, flags, &needipsec, &error);
                if (error != 0) {
                        /*
                         * Hack: -EINVAL is used to signal that a packet
                         * should be silently discarded.  This is typically
                         * because we asked key management for an SA and
                         * it was delayed (e.g. kicked up to IKE).
                         */
                        if (error == -EINVAL)
                                error = 0;
                        IP6_STATINC(IP6_STAT_IPSECDROP_OUT);
                        goto freehdrs;
                }
        }
#endif

        if (needipsec &&
            (m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
                in6_undefer_cksum_tcpudp(m);
                m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
        }

        /*
         * If we need IPsec, or there is at least one extension header,
         * separate IP6 header from the payload.
         */
        if ((needipsec || optlen) && !hdrsplit) {
                if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
                        IP6_STATINC(IP6_STAT_ODROPPED);
                        m = NULL;
                        goto freehdrs;
                }
                m = exthdrs.ip6e_ip6;
                hdrsplit++;
        }

        /* adjust pointer */
        ip6 = mtod(m, struct ip6_hdr *);

        /* adjust mbuf packet header length */
        m->m_pkthdr.len += optlen;
        plen = m->m_pkthdr.len - sizeof(*ip6);

        /* If this is a jumbo payload, insert a jumbo payload option. */
        if (plen > IPV6_MAXPACKET) {
                if (!hdrsplit) {
                        if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
                                IP6_STATINC(IP6_STAT_ODROPPED);
                                m = NULL;
                                goto freehdrs;
                        }
                        m = exthdrs.ip6e_ip6;
                        hdrsplit++;
                }
                /* adjust pointer */
                ip6 = mtod(m, struct ip6_hdr *);
                if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) {
                        IP6_STATINC(IP6_STAT_ODROPPED);
                        goto freehdrs;
                }
                optlen += 8; /* XXX JUMBOOPTLEN */
                ip6->ip6_plen = 0;
        } else
                ip6->ip6_plen = htons(plen);

        /*
         * Concatenate headers and fill in next header fields.
         * Here we have, on "m"
         *        IPv6 payload
         * and we insert headers accordingly.  Finally, we should be getting:
         *        IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
         *
         * during the header composing process, "m" points to IPv6 header.
         * "mprev" points to an extension header prior to esp.
         */
        {
                u_char *nexthdrp = &ip6->ip6_nxt;
                struct mbuf *mprev = m;

                /*
                 * we treat dest2 specially.  this makes IPsec processing
                 * much easier.  the goal here is to make mprev point the
                 * mbuf prior to dest2.
                 *
                 * result: IPv6 dest2 payload
                 * m and mprev will point to IPv6 header.
                 */
                if (exthdrs.ip6e_dest2) {
                        if (!hdrsplit)
                                panic("assumption failed: hdr not split");
                        exthdrs.ip6e_dest2->m_next = m->m_next;
                        m->m_next = exthdrs.ip6e_dest2;
                        *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
                        ip6->ip6_nxt = IPPROTO_DSTOPTS;
                }

#define MAKE_CHAIN(m, mp, p, i)\
    do {\
        if (m) {\
                if (!hdrsplit) \
                        panic("assumption failed: hdr not split"); \
                *mtod((m), u_char *) = *(p);\
                *(p) = (i);\
                p = mtod((m), u_char *);\
                (m)->m_next = (mp)->m_next;\
                (mp)->m_next = (m);\
                (mp) = (m);\
        }\
    } while (/*CONSTCOND*/ 0)
                /*
                 * result: IPv6 hbh dest1 rthdr dest2 payload
                 * m will point to IPv6 header.  mprev will point to the
                 * extension header prior to dest2 (rthdr in the above case).
                 */
                MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
                MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
                    IPPROTO_DSTOPTS);
                MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
                    IPPROTO_ROUTING);

                M_CSUM_DATA_IPv6_SET(m->m_pkthdr.csum_data,
                    sizeof(struct ip6_hdr) + optlen);
        }

        /* Need to save for pmtu */
        finaldst = ip6->ip6_dst;

        /*
         * If there is a routing header, replace destination address field
         * with the first hop of the routing header.
         */
        if (exthdrs.ip6e_rthdr) {
                struct ip6_rthdr *rh;

                rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);

                error = ip6_handle_rthdr(rh, ip6);
                if (error != 0) {
                        IP6_STATINC(IP6_STAT_ODROPPED);
                        goto bad;
                }
        }

        /* Source address validation */
        if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
            (flags & IPV6_UNSPECSRC) == 0) {
                error = EOPNOTSUPP;
                IP6_STATINC(IP6_STAT_BADSCOPE);
                goto bad;
        }
        if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
                error = EOPNOTSUPP;
                IP6_STATINC(IP6_STAT_BADSCOPE);
                goto bad;
        }

        IP6_STATINC(IP6_STAT_LOCALOUT);

        /*
         * Route packet.
         */
        /* initialize cached route */
        if (ro == NULL) {
                memset(&ip6route, 0, sizeof(ip6route));
                ro = &ip6route;
        }
        ro_pmtu = ro;
        if (opt && opt->ip6po_rthdr)
                ro = &opt->ip6po_route;

        /*
         * if specified, try to fill in the traffic class field.
         * do not override if a non-zero value is already set.
         * we check the diffserv field and the ecn field separately.
         */
        if (opt && opt->ip6po_tclass >= 0) {
                int mask = 0;

                if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
                        mask |= 0xfc;
                if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
                        mask |= 0x03;
                if (mask != 0)
                        ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
        }

        /* fill in or override the hop limit field, if necessary. */
        if (opt && opt->ip6po_hlim != -1)
                ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
        else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
                if (im6o != NULL)
                        ip6->ip6_hlim = im6o->im6o_multicast_hlim;
                else
                        ip6->ip6_hlim = ip6_defmcasthlim;
        }

#ifdef IPSEC
        if (needipsec) {
                int s = splsoftnet();
                error = ipsec6_process_packet(m, sp->req, flags);
                splx(s);

                /*
                 * Preserve KAME behaviour: ENOENT can be returned
                 * when an SA acquire is in progress.  Don't propagate
                 * this to user-level; it confuses applications.
                 * XXX this will go away when the SADB is redone.
                 */
                if (error == ENOENT)
                        error = 0;

                goto done;
        }
#endif

        /* adjust pointer */
        ip6 = mtod(m, struct ip6_hdr *);

        sockaddr_in6_init(&dst_sa, &ip6->ip6_dst, 0, 0, 0);

        /* We do not need a route for multicast */
        if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
                struct in6_pktinfo *pi = NULL;

                /*
                 * If the outgoing interface for the address is specified by
                 * the caller, use it.
                 */
                if (opt && (pi = opt->ip6po_pktinfo) != NULL) {
                        /* XXX boundary check is assumed to be already done. */
                        ifp = if_get_byindex(pi->ipi6_ifindex, &psref);
                } else if (im6o != NULL) {
                        ifp = if_get_byindex(im6o->im6o_multicast_if_index,
                            &psref);
                }
        }

        if (ifp == NULL) {
                error = in6_selectroute(&dst_sa, opt, &ro, &rt, true);
                if (error != 0)
                        goto bad;
                ifp = if_get_byindex(rt->rt_ifp->if_index, &psref);
        }

        if (rt == NULL) {
                /*
                 * If in6_selectroute() does not return a route entry,
                 * dst may not have been updated.
                 */
                error = rtcache_setdst(ro, sin6tosa(&dst_sa));
                if (error) {
                        IP6_STATINC(IP6_STAT_ODROPPED);
                        goto bad;
                }
        }

        /*
         * then rt (for unicast) and ifp must be non-NULL valid values.
         */
        if ((flags & IPV6_FORWARDING) == 0) {
                /* XXX: the FORWARDING flag can be set for mrouting. */
                in6_ifstat_inc(ifp, ifs6_out_request);
        }
        if (rt != NULL) {
                ia = (struct in6_ifaddr *)(rt->rt_ifa);
                rt->rt_use++;
        }

        /*
         * The outgoing interface must be in the zone of source and
         * destination addresses.  We should use ia_ifp to support the
         * case of sending packets to an address of our own.
         */
        if (ia != NULL) {
                origifp = ia->ia_ifp;
                if (if_is_deactivated(origifp)) {
                        IP6_STATINC(IP6_STAT_ODROPPED);
                        goto bad;
                }
                if_acquire(origifp, &psref_ia);
                release_psref_ia = true;
        } else
                origifp = ifp;

        src0 = ip6->ip6_src;
        if (in6_setscope(&src0, origifp, &zone))
                goto badscope;
        sockaddr_in6_init(&src_sa, &ip6->ip6_src, 0, 0, 0);
        if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
                goto badscope;

        dst0 = ip6->ip6_dst;
        if (in6_setscope(&dst0, origifp, &zone))
                goto badscope;
        /* re-initialize to be sure */
        sockaddr_in6_init(&dst_sa, &ip6->ip6_dst, 0, 0, 0);
        if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id)
                goto badscope;

        /* scope check is done. */

        /* Ensure we only send from a valid address. */
        if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
            (flags & IPV6_FORWARDING) == 0 &&
            (error = ip6_ifaddrvalid(&src0, &dst0)) != 0)
        {
                char ip6buf[INET6_ADDRSTRLEN];
                nd6log(LOG_ERR,
                    "refusing to send from invalid address %s (pid %d)\n",
                    IN6_PRINT(ip6buf, &src0), curproc->p_pid);
                IP6_STATINC(IP6_STAT_ODROPPED);
                in6_ifstat_inc(origifp, ifs6_out_discard);
                if (error == 1)
                        /*
                         * Address exists, but is tentative or detached.
                         * We can't send from it because it's invalid,
                         * so we drop the packet.
                         */
                        error = 0;
                else
                        error = EADDRNOTAVAIL;
                goto bad;
        }

        if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) &&
            !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
                dst = satocsin6(rt->rt_gateway);
        else
                dst = satocsin6(rtcache_getdst(ro));

        /*
         * XXXXXX: original code follows:
         */
        if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
                m->m_flags &= ~(M_BCAST | M_MCAST);        /* just in case */
        else {
                bool ingroup;

                m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;

                in6_ifstat_inc(ifp, ifs6_out_mcast);

                /*
                 * Confirm that the outgoing interface supports multicast.
                 */
                if (!(ifp->if_flags & IFF_MULTICAST)) {
                        IP6_STATINC(IP6_STAT_NOROUTE);
                        in6_ifstat_inc(ifp, ifs6_out_discard);
                        error = ENETUNREACH;
                        goto bad;
                }

                ingroup = in6_multi_group(&ip6->ip6_dst, ifp);
                if (ingroup && (im6o == NULL || im6o->im6o_multicast_loop)) {
                        /*
                         * If we belong to the destination multicast group
                         * on the outgoing interface, and the caller did not
                         * forbid loopback, loop back a copy.
                         */
                        KASSERT(dst != NULL);
                        ip6_mloopback(ifp, m, dst);
                } else {
                        /*
                         * If we are acting as a multicast router, perform
                         * multicast forwarding as if the packet had just
                         * arrived on the interface to which we are about
                         * to send.  The multicast forwarding function
                         * recursively calls this function, using the
                         * IPV6_FORWARDING flag to prevent infinite recursion.
                         *
                         * Multicasts that are looped back by ip6_mloopback(),
                         * above, will be forwarded by the ip6_input() routine,
                         * if necessary.
                         */
                        if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
                                if (ip6_mforward(ip6, ifp, m) != 0) {
                                        m_freem(m);
                                        goto done;
                                }
                        }
                }
                /*
                 * Multicasts with a hoplimit of zero may be looped back,
                 * above, but must not be transmitted on a network.
                 * Also, multicasts addressed to the loopback interface
                 * are not sent -- the above call to ip6_mloopback() will
                 * loop back a copy if this host actually belongs to the
                 * destination group on the loopback interface.
                 */
                if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
                    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
                        m_freem(m);
                        goto done;
                }
        }

        /*
         * Fill the outgoing interface to tell the upper layer
         * to increment per-interface statistics.
         */
        if (ifpp)
                *ifpp = ifp;

        /* Determine path MTU. */
        /*
         * ro_pmtu represent final destination while
         * ro might represent immediate destination.
         * Use ro_pmtu destination since MTU might differ.
         */
        if (ro_pmtu != ro) {
                union {
                        struct sockaddr                dst;
                        struct sockaddr_in6        dst6;
                } u;

                /* ro_pmtu may not have a cache */
                sockaddr_in6_init(&u.dst6, &finaldst, 0, 0, 0);
                rt_pmtu = rtcache_lookup(ro_pmtu, &u.dst);
        } else
                rt_pmtu = rt;
        error = ip6_getpmtu(rt_pmtu, ifp, &mtu, &alwaysfrag);
        if (rt_pmtu != NULL && rt_pmtu != rt)
                rtcache_unref(rt_pmtu, ro_pmtu);
        KASSERT(error == 0); /* ip6_getpmtu never fail if ifp is passed */

        /*
         * The caller of this function may specify to use the minimum MTU
         * in some cases.
         * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
         * setting.  The logic is a bit complicated; by default, unicast
         * packets will follow path MTU while multicast packets will be sent at
         * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
         * including unicast ones will be sent at the minimum MTU.  Multicast
         * packets will always be sent at the minimum MTU unless
         * IP6PO_MINMTU_DISABLE is explicitly specified.
         * See RFC 3542 for more details.
         */
        if (mtu > IPV6_MMTU) {
                if ((flags & IPV6_MINMTU))
                        mtu = IPV6_MMTU;
                else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
                        mtu = IPV6_MMTU;
                else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
                         (opt == NULL ||
                          opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
                        mtu = IPV6_MMTU;
                }
        }

        /*
         * clear embedded scope identifiers if necessary.
         * in6_clearscope will touch the addresses only when necessary.
         */
        in6_clearscope(&ip6->ip6_src);
        in6_clearscope(&ip6->ip6_dst);

        /*
         * If the outgoing packet contains a hop-by-hop options header,
         * it must be examined and processed even by the source node.
         * (RFC 2460, section 4.)
         *
         * XXX Is this really necessary?
         */
        if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
                u_int32_t dummy1; /* XXX unused */
                u_int32_t dummy2; /* XXX unused */
                int hoff = sizeof(struct ip6_hdr);

                if (ip6_hopopts_input(&dummy1, &dummy2, &m, &hoff)) {
                        /* m was already freed at this point */
                        error = EINVAL;
                        goto done;
                }

                ip6 = mtod(m, struct ip6_hdr *);
        }

        /*
         * Run through list of hooks for output packets.
         */
        error = pfil_run_hooks(inet6_pfil_hook, &m, ifp, PFIL_OUT);
        if (error != 0 || m == NULL) {
                IP6_STATINC(IP6_STAT_PFILDROP_OUT);
                goto done;
        }
        ip6 = mtod(m, struct ip6_hdr *);

        /*
         * Send the packet to the outgoing interface.
         * If necessary, do IPv6 fragmentation before sending.
         *
         * the logic here is rather complex:
         * 1: normal case (dontfrag == 0, alwaysfrag == 0)
         * 1-a:        send as is if tlen <= path mtu
         * 1-b:        fragment if tlen > path mtu
         *
         * 2: if user asks us not to fragment (dontfrag == 1)
         * 2-a:        send as is if tlen <= interface mtu
         * 2-b:        error if tlen > interface mtu
         *
         * 3: if we always need to attach fragment header (alwaysfrag == 1)
         *        always fragment
         *
         * 4: if dontfrag == 1 && alwaysfrag == 1
         *        error, as we cannot handle this conflicting request
         */
        tlen = m->m_pkthdr.len;
        tso = (m->m_pkthdr.csum_flags & M_CSUM_TSOv6) != 0;
        if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
                dontfrag = 1;
        else
                dontfrag = 0;

        if (dontfrag && alwaysfrag) {        /* case 4 */
                /* conflicting request - can't transmit */
                IP6_STATINC(IP6_STAT_CANTFRAG);
                error = EMSGSIZE;
                goto bad;
        }
        if (dontfrag && (!tso && tlen > ifp->if_mtu)) {        /* case 2-b */
                /*
                 * Even if the DONTFRAG option is specified, we cannot send the
                 * packet when the data length is larger than the MTU of the
                 * outgoing interface.
                 * Notify the error by sending IPV6_PATHMTU ancillary data as
                 * well as returning an error code (the latter is not described
                 * in the API spec.)
                 */
                u_int32_t mtu32;
                struct ip6ctlparam ip6cp;

                mtu32 = (u_int32_t)mtu;
                memset(&ip6cp, 0, sizeof(ip6cp));
                ip6cp.ip6c_cmdarg = (void *)&mtu32;
                pfctlinput2(PRC_MSGSIZE,
                    rtcache_getdst(ro_pmtu), &ip6cp);

                IP6_STATINC(IP6_STAT_CANTFRAG);
                error = EMSGSIZE;
                goto bad;
        }

        /*
         * transmit packet without fragmentation
         */
        if (dontfrag || (!alwaysfrag && (tlen <= mtu || tso))) {
                /* case 1-a and 2-a */
                struct in6_ifaddr *ia6;
                int sw_csum;
                int s;

                ip6 = mtod(m, struct ip6_hdr *);
                s = pserialize_read_enter();
                ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
                if (ia6) {
                        /* Record statistics for this interface address. */
                        ia6->ia_ifa.ifa_data.ifad_outbytes += m->m_pkthdr.len;
                }
                pserialize_read_exit(s);

                sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_csum_flags_tx;
                if ((sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6)) != 0) {
                        if (IN6_NEED_CHECKSUM(ifp,
                            sw_csum & (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
                                in6_undefer_cksum_tcpudp(m);
                        }
                        m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
                }

                KASSERT(dst != NULL);
                if (__predict_false(sw_csum & M_CSUM_TSOv6)) {
                        /*
                         * TSO6 is required by a packet, but disabled for
                         * the interface.
                         */
                        error = ip6_tso_output(ifp, origifp, m, dst, rt);
                } else
                        error = ip6_if_output(ifp, origifp, m, dst, rt);
                goto done;
        }

        if (tso) {
                IP6_STATINC(IP6_STAT_CANTFRAG); /* XXX */
                error = EINVAL; /* XXX */
                goto bad;
        }

        /*
         * try to fragment the packet.  case 1-b and 3
         */
        if (mtu < IPV6_MMTU) {
                /* path MTU cannot be less than IPV6_MMTU */
                IP6_STATINC(IP6_STAT_CANTFRAG);
                error = EMSGSIZE;
                in6_ifstat_inc(ifp, ifs6_out_fragfail);
                goto bad;
        } else if (ip6->ip6_plen == 0) {
                /* jumbo payload cannot be fragmented */
                IP6_STATINC(IP6_STAT_CANTFRAG);
                error = EMSGSIZE;
                in6_ifstat_inc(ifp, ifs6_out_fragfail);
                goto bad;
        } else {
                const uint32_t id = ip6_randomid();
                struct mbuf **mnext, *m_frgpart;
                const int hlen = unfragpartlen;
                struct ip6_frag *ip6f;
                u_char nextproto;

                if (mtu > IPV6_MAXPACKET)
                        mtu = IPV6_MAXPACKET;

                /*
                 * Must be able to put at least 8 bytes per fragment.
                 */
                len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
                if (len < 8) {
                        IP6_STATINC(IP6_STAT_CANTFRAG);
                        error = EMSGSIZE;
                        in6_ifstat_inc(ifp, ifs6_out_fragfail);
                        goto bad;
                }

                mnext = &m->m_nextpkt;

                /*
                 * Change the next header field of the last header in the
                 * unfragmentable part.
                 */
                if (exthdrs.ip6e_rthdr) {
                        nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
                        *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
                } else if (exthdrs.ip6e_dest1) {
                        nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
                        *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
                } else if (exthdrs.ip6e_hbh) {
                        nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
                        *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
                } else {
                        nextproto = ip6->ip6_nxt;
                        ip6->ip6_nxt = IPPROTO_FRAGMENT;
                }

                if ((m->m_pkthdr.csum_flags & (M_CSUM_UDPv6|M_CSUM_TCPv6))
                    != 0) {
                        if (IN6_NEED_CHECKSUM(ifp,
                            m->m_pkthdr.csum_flags &
                            (M_CSUM_UDPv6|M_CSUM_TCPv6))) {
                                in6_undefer_cksum_tcpudp(m);
                        }
                        m->m_pkthdr.csum_flags &= ~(M_CSUM_UDPv6|M_CSUM_TCPv6);
                }

                /*
                 * Loop through length of segment after first fragment,
                 * make new header and copy data of each part and link onto
                 * chain.
                 */
                m0 = m;
                for (off = hlen; off < tlen; off += len) {
                        struct mbuf *mlast;

                        MGETHDR(m, M_DONTWAIT, MT_HEADER);
                        if (!m) {
                                error = ENOBUFS;
                                IP6_STATINC(IP6_STAT_ODROPPED);
                                goto sendorfree;
                        }
                        m_reset_rcvif(m);
                        m->m_flags = m0->m_flags & M_COPYFLAGS;
                        *mnext = m;
                        mnext = &m->m_nextpkt;
                        m->m_data += max_linkhdr;
                        mhip6 = mtod(m, struct ip6_hdr *);
                        *mhip6 = *ip6;
                        m->m_len = sizeof(*mhip6);

                        ip6f = NULL;
                        error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
                        if (error) {
                                IP6_STATINC(IP6_STAT_ODROPPED);
                                goto sendorfree;
                        }

                        /* Fill in the Frag6 Header */
                        ip6f->ip6f_offlg = htons((u_int16_t)((off - hlen) & ~7));
                        if (off + len >= tlen)
                                len = tlen - off;
                        else
                                ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
                        ip6f->ip6f_reserved = 0;
                        ip6f->ip6f_ident = id;
                        ip6f->ip6f_nxt = nextproto;

                        mhip6->ip6_plen = htons((u_int16_t)(len + hlen +
                            sizeof(*ip6f) - sizeof(struct ip6_hdr)));
                        if ((m_frgpart = m_copym(m0, off, len, M_DONTWAIT)) == NULL) {
                                error = ENOBUFS;
                                IP6_STATINC(IP6_STAT_ODROPPED);
                                goto sendorfree;
                        }
                        for (mlast = m; mlast->m_next; mlast = mlast->m_next)
                                ;
                        mlast->m_next = m_frgpart;

                        m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
                        m_reset_rcvif(m);
                        IP6_STATINC(IP6_STAT_OFRAGMENTS);
                        in6_ifstat_inc(ifp, ifs6_out_fragcreat);
                }

                in6_ifstat_inc(ifp, ifs6_out_fragok);
        }

sendorfree:
        m = m0->m_nextpkt;
        m0->m_nextpkt = 0;
        m_freem(m0);
        for (m0 = m; m; m = m0) {
                m0 = m->m_nextpkt;
                m->m_nextpkt = 0;
                if (error == 0) {
                        struct in6_ifaddr *ia6;
                        int s;
                        ip6 = mtod(m, struct ip6_hdr *);
                        s = pserialize_read_enter();
                        ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
                        if (ia6) {
                                /*
                                 * Record statistics for this interface
                                 * address.
                                 */
                                ia6->ia_ifa.ifa_data.ifad_outbytes +=
                                    m->m_pkthdr.len;
                        }
                        pserialize_read_exit(s);
                        KASSERT(dst != NULL);
                        error = ip6_if_output(ifp, origifp, m, dst, rt);
                } else
                        m_freem(m);
        }

        if (error == 0)
                IP6_STATINC(IP6_STAT_FRAGMENTED);

done:
        rtcache_unref(rt, ro);
        if (ro == &ip6route)
                rtcache_free(&ip6route);
#ifdef IPSEC
        if (sp != NULL)
                KEY_SP_UNREF(&sp);
#endif
        if_put(ifp, &psref);
        if (release_psref_ia)
                if_put(origifp, &psref_ia);
        curlwp_bindx(bound);

        return error;

freehdrs:
        m_freem(exthdrs.ip6e_hbh);
        m_freem(exthdrs.ip6e_dest1);
        m_freem(exthdrs.ip6e_rthdr);
        m_freem(exthdrs.ip6e_dest2);
        /* FALLTHROUGH */
bad:
        m_freem(m);
        goto done;

badscope:
        IP6_STATINC(IP6_STAT_BADSCOPE);
        in6_ifstat_inc(origifp, ifs6_out_discard);
        if (error == 0)
                error = EHOSTUNREACH; /* XXX */
        goto bad;
}

static int
ip6_copyexthdr(struct mbuf **mp, void *hdr, int hlen)
{
        struct mbuf *m;

        if (hlen > MCLBYTES)
                return ENOBUFS; /* XXX */

        MGET(m, M_DONTWAIT, MT_DATA);
        if (!m)
                return ENOBUFS;

        if (hlen > MLEN) {
                MCLGET(m, M_DONTWAIT);
                if ((m->m_flags & M_EXT) == 0) {
                        m_free(m);
                        return ENOBUFS;
                }
        }
        m->m_len = hlen;
        if (hdr)
                memcpy(mtod(m, void *), hdr, hlen);

        *mp = m;
        return 0;
}

/*
 * Insert jumbo payload option.
 */
static int
ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
{
        struct mbuf *mopt;
        u_int8_t *optbuf;
        u_int32_t v;

#define JUMBOOPTLEN        8        /* length of jumbo payload option and padding */

        /*
         * If there is no hop-by-hop options header, allocate new one.
         * If there is one but it doesn't have enough space to store the
         * jumbo payload option, allocate a cluster to store the whole options.
         * Otherwise, use it to store the options.
         */
        if (exthdrs->ip6e_hbh == NULL) {
                MGET(mopt, M_DONTWAIT, MT_DATA);
                if (mopt == 0)
                        return (ENOBUFS);
                mopt->m_len = JUMBOOPTLEN;
                optbuf = mtod(mopt, u_int8_t *);
                optbuf[1] = 0;        /* = ((JUMBOOPTLEN) >> 3) - 1 */
                exthdrs->ip6e_hbh = mopt;
        } else {
                struct ip6_hbh *hbh;

                mopt = exthdrs->ip6e_hbh;
                if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
                        const int oldoptlen = mopt->m_len;
                        struct mbuf *n;

                        /*
                         * Assumptions:
                         * - exthdrs->ip6e_hbh is not referenced from places
                         *   other than exthdrs.
                         * - exthdrs->ip6e_hbh is not an mbuf chain.
                         */
                        KASSERT(mopt->m_next == NULL);

                        /*
                         * Give up if the whole (new) hbh header does not fit
                         * even in an mbuf cluster.
                         */
                        if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
                                return ENOBUFS;

                        /*
                         * At this point, we must always prepare a cluster.
                         */
                        MGET(n, M_DONTWAIT, MT_DATA);
                        if (n) {
                                MCLGET(n, M_DONTWAIT);
                                if ((n->m_flags & M_EXT) == 0) {
                                        m_freem(n);
                                        n = NULL;
                                }
                        }
                        if (!n)
                                return ENOBUFS;

                        n->m_len = oldoptlen + JUMBOOPTLEN;
                        bcopy(mtod(mopt, void *), mtod(n, void *),
                            oldoptlen);
                        optbuf = mtod(n, u_int8_t *) + oldoptlen;
                        m_freem(mopt);
                        mopt = exthdrs->ip6e_hbh = n;
                } else {
                        optbuf = mtod(mopt, u_int8_t *) + mopt->m_len;
                        mopt->m_len += JUMBOOPTLEN;
                }
                optbuf[0] = IP6OPT_PADN;
                optbuf[1] = 0;

                /*
                 * Adjust the header length according to the pad and
                 * the jumbo payload option.
                 */
                hbh = mtod(mopt, struct ip6_hbh *);
                hbh->ip6h_len += (JUMBOOPTLEN >> 3);
        }

        /* fill in the option. */
        optbuf[2] = IP6OPT_JUMBO;
        optbuf[3] = 4;
        v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
        memcpy(&optbuf[4], &v, sizeof(u_int32_t));

        /* finally, adjust the packet header length */
        exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;

        return 0;
#undef JUMBOOPTLEN
}

/*
 * Insert fragment header and copy unfragmentable header portions.
 *
 * *frghdrp will not be read, and it is guaranteed that either an
 * error is returned or that *frghdrp will point to space allocated
 * for the fragment header.
 *
 * On entry, m contains:
 *     IPv6 Header
 * On exit, it contains:
 *     IPv6 Header -> Unfragmentable Part -> Frag6 Header
 */
static int
ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
        struct ip6_frag **frghdrp)
{
        struct mbuf *n, *mlast;

        if (hlen > sizeof(struct ip6_hdr)) {
                n = m_copym(m0, sizeof(struct ip6_hdr),
                    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
                if (n == NULL)
                        return ENOBUFS;
                m->m_next = n;
        } else
                n = m;

        /* Search for the last mbuf of unfragmentable part. */
        for (mlast = n; mlast->m_next; mlast = mlast->m_next)
                ;

        if ((mlast->m_flags & M_EXT) == 0 &&
            M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
                /* use the trailing space of the last mbuf for the fragment hdr */
                *frghdrp = (struct ip6_frag *)(mtod(mlast, char *) +
                    mlast->m_len);
                mlast->m_len += sizeof(struct ip6_frag);
        } else {
                /* allocate a new mbuf for the fragment header */
                struct mbuf *mfrg;

                MGET(mfrg, M_DONTWAIT, MT_DATA);
                if (mfrg == NULL)
                        return ENOBUFS;
                mfrg->m_len = sizeof(struct ip6_frag);
                *frghdrp = mtod(mfrg, struct ip6_frag *);
                mlast->m_next = mfrg;
        }

        return 0;
}

static int
ip6_getpmtu(struct rtentry *rt, struct ifnet *ifp, u_long *mtup,
    int *alwaysfragp)
{
        u_int32_t mtu = 0;
        int alwaysfrag = 0;
        int error = 0;

        if (rt != NULL) {
                if (ifp == NULL)
                        ifp = rt->rt_ifp;
                mtu = rt->rt_rmx.rmx_mtu;
                if (mtu == 0)
                        mtu = ifp->if_mtu;
                else if (mtu < IPV6_MMTU) {
                        /*
                         * RFC2460 section 5, last paragraph:
                         * if we record ICMPv6 too big message with
                         * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
                         * or smaller, with fragment header attached.
                         * (fragment header is needed regardless from the
                         * packet size, for translators to identify packets)
                         */
                        alwaysfrag = 1;
                        mtu = IPV6_MMTU;
                } else if (mtu > ifp->if_mtu) {
                        /*
                         * The MTU on the route is larger than the MTU on
                         * the interface!  This shouldn't happen, unless the
                         * MTU of the interface has been changed after the
                         * interface was brought up.  Change the MTU in the
                         * route to match the interface MTU (as long as the
                         * field isn't locked).
                         */
                        mtu = ifp->if_mtu;
                        if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
                                rt->rt_rmx.rmx_mtu = mtu;
                }
        } else if (ifp) {
                mtu = ifp->if_mtu;
        } else
                error = EHOSTUNREACH; /* XXX */

        *mtup = mtu;
        if (alwaysfragp)
                *alwaysfragp = alwaysfrag;
        return (error);
}

/*
 * IP6 socket option processing.
 */
int
ip6_ctloutput(int op, struct socket *so, struct sockopt *sopt)
{
        int optdatalen, uproto;
        void *optdata;
        struct in6pcb *in6p = sotoin6pcb(so);
        struct ip_moptions **mopts;
        int error, optval;
        int level, optname;

        KASSERT(solocked(so));
        KASSERT(sopt != NULL);

        level = sopt->sopt_level;
        optname = sopt->sopt_name;

        error = optval = 0;
        uproto = (int)so->so_proto->pr_protocol;

        switch (level) {
        case IPPROTO_IP:
                switch (optname) {
                case IP_ADD_MEMBERSHIP:
                case IP_DROP_MEMBERSHIP:
                case IP_MULTICAST_IF:
                case IP_MULTICAST_LOOP:
                case IP_MULTICAST_TTL:
                        mopts = &in6p->in6p_v4moptions;
                        switch (op) {
                        case PRCO_GETOPT:
                                return ip_getmoptions(*mopts, sopt);
                        case PRCO_SETOPT:
                                return ip_setmoptions(mopts, sopt);
                        default:
                                return EINVAL;
                        }
                default:
                        return ENOPROTOOPT;
                }
        case IPPROTO_IPV6:
                break;
        default:
                return ENOPROTOOPT;
        }
        switch (op) {
        case PRCO_SETOPT:
                switch (optname) {
#ifdef RFC2292
                case IPV6_2292PKTOPTIONS:
                        error = ip6_pcbopts(&in6p->in6p_outputopts, so, sopt);
                        break;
#endif

                /*
                 * Use of some Hop-by-Hop options or some
                 * Destination options, might require special
                 * privilege.  That is, normal applications
                 * (without special privilege) might be forbidden
                 * from setting certain options in outgoing packets,
                 * and might never see certain options in received
                 * packets. [RFC 2292 Section 6]
                 * KAME specific note:
                 *  KAME prevents non-privileged users from sending or
                 *  receiving ANY hbh/dst options in order to avoid
                 *  overhead of parsing options in the kernel.
                 */
                case IPV6_RECVHOPOPTS:
                case IPV6_RECVDSTOPTS:
                case IPV6_RECVRTHDRDSTOPTS:
                        error = kauth_authorize_network(
                            kauth_cred_get(),
                            KAUTH_NETWORK_IPV6, KAUTH_REQ_NETWORK_IPV6_HOPBYHOP,
                            NULL, NULL, NULL);
                        if (error)
                                break;
                        /* FALLTHROUGH */
                case IPV6_UNICAST_HOPS:
                case IPV6_HOPLIMIT:
                case IPV6_FAITH:

                case IPV6_RECVPKTINFO:
                case IPV6_RECVHOPLIMIT:
                case IPV6_RECVRTHDR:
                case IPV6_RECVPATHMTU:
                case IPV6_RECVTCLASS:
                case IPV6_V6ONLY:
                case IPV6_BINDANY:
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                break;
                        switch (optname) {
                        case IPV6_UNICAST_HOPS:
                                if (optval < -1 || optval >= 256)
                                        error = EINVAL;
                                else {
                                        /* -1 = kernel default */
                                        in6p->in6p_hops = optval;
                                }
                                break;
#define OPTSET(bit) \
do { \
if (optval) \
        in6p->in6p_flags |= (bit); \
else \
        in6p->in6p_flags &= ~(bit); \
} while (/*CONSTCOND*/ 0)

#ifdef RFC2292
#define OPTSET2292(bit)                         \
do {                                                 \
in6p->in6p_flags |= IN6P_RFC2292;         \
if (optval)                                 \
        in6p->in6p_flags |= (bit);         \
else                                         \
        in6p->in6p_flags &= ~(bit);         \
} while (/*CONSTCOND*/ 0)
#endif

#define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)

                        case IPV6_RECVPKTINFO:
#ifdef RFC2292
                                /* cannot mix with RFC2292 */
                                if (OPTBIT(IN6P_RFC2292)) {
                                        error = EINVAL;
                                        break;
                                }
#endif
                                OPTSET(IN6P_PKTINFO);
                                break;

                        case IPV6_HOPLIMIT:
                        {
                                struct ip6_pktopts **optp;

#ifdef RFC2292
                                /* cannot mix with RFC2292 */
                                if (OPTBIT(IN6P_RFC2292)) {
                                        error = EINVAL;
                                        break;
                                }
#endif
                                optp = &in6p->in6p_outputopts;
                                error = ip6_pcbopt(IPV6_HOPLIMIT,
                                                   (u_char *)&optval,
                                                   sizeof(optval),
                                                   optp,
                                                   kauth_cred_get(), uproto);
                                break;
                        }

                        case IPV6_RECVHOPLIMIT:
#ifdef RFC2292
                                /* cannot mix with RFC2292 */
                                if (OPTBIT(IN6P_RFC2292)) {
                                        error = EINVAL;
                                        break;
                                }
#endif
                                OPTSET(IN6P_HOPLIMIT);
                                break;

                        case IPV6_RECVHOPOPTS:
#ifdef RFC2292
                                /* cannot mix with RFC2292 */
                                if (OPTBIT(IN6P_RFC2292)) {
                                        error = EINVAL;
                                        break;
                                }
#endif
                                OPTSET(IN6P_HOPOPTS);
                                break;

                        case IPV6_RECVDSTOPTS:
#ifdef RFC2292
                                /* cannot mix with RFC2292 */
                                if (OPTBIT(IN6P_RFC2292)) {
                                        error = EINVAL;
                                        break;
                                }
#endif
                                OPTSET(IN6P_DSTOPTS);
                                break;

                        case IPV6_RECVRTHDRDSTOPTS:
#ifdef RFC2292
                                /* cannot mix with RFC2292 */
                                if (OPTBIT(IN6P_RFC2292)) {
                                        error = EINVAL;
                                        break;
                                }
#endif
                                OPTSET(IN6P_RTHDRDSTOPTS);
                                break;

                        case IPV6_RECVRTHDR:
#ifdef RFC2292
                                /* cannot mix with RFC2292 */
                                if (OPTBIT(IN6P_RFC2292)) {
                                        error = EINVAL;
                                        break;
                                }
#endif
                                OPTSET(IN6P_RTHDR);
                                break;

                        case IPV6_FAITH:
                                OPTSET(IN6P_FAITH);
                                break;

                        case IPV6_RECVPATHMTU:
                                /*
                                 * We ignore this option for TCP
                                 * sockets.
                                 * (RFC3542 leaves this case
                                 * unspecified.)
                                 */
                                if (uproto != IPPROTO_TCP)
                                        OPTSET(IN6P_MTU);
                                break;

                        case IPV6_V6ONLY:
                                /*
                                 * make setsockopt(IPV6_V6ONLY)
                                 * available only prior to bind(2).
                                 * see ipng mailing list, Jun 22 2001.
                                 */
                                if (in6p->in6p_lport ||
                                    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
                                        error = EINVAL;
                                        break;
                                }
#ifdef INET6_BINDV6ONLY
                                if (!optval)
                                        error = EINVAL;
#else
                                OPTSET(IN6P_IPV6_V6ONLY);
#endif
                                break;

                        case IPV6_RECVTCLASS:
#ifdef RFC2292
                                /* cannot mix with RFC2292 XXX */
                                if (OPTBIT(IN6P_RFC2292)) {
                                        error = EINVAL;
                                        break;
                                }
#endif
                                OPTSET(IN6P_TCLASS);
                                break;

                        case IPV6_BINDANY:
                                error = kauth_authorize_network(
                                    kauth_cred_get(), KAUTH_NETWORK_BIND,
                                    KAUTH_REQ_NETWORK_BIND_ANYADDR, so, NULL,
                                    NULL);
                                if (error)
                                        break;
                                OPTSET(IN6P_BINDANY);
                                break;
                        }
                        break;

                case IPV6_OTCLASS:
                {
                        struct ip6_pktopts **optp;
                        u_int8_t tclass;

                        error = sockopt_get(sopt, &tclass, sizeof(tclass));
                        if (error)
                                break;
                        optp = &in6p->in6p_outputopts;
                        error = ip6_pcbopt(optname,
                                           (u_char *)&tclass,
                                           sizeof(tclass),
                                           optp,
                                           kauth_cred_get(), uproto);
                        break;
                }

                case IPV6_TCLASS:
                case IPV6_DONTFRAG:
                case IPV6_USE_MIN_MTU:
                case IPV6_PREFER_TEMPADDR:
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                break;
                        {
                                struct ip6_pktopts **optp;
                                optp = &in6p->in6p_outputopts;
                                error = ip6_pcbopt(optname,
                                                   (u_char *)&optval,
                                                   sizeof(optval),
                                                   optp,
                                                   kauth_cred_get(), uproto);
                                break;
                        }

#ifdef RFC2292
                case IPV6_2292PKTINFO:
                case IPV6_2292HOPLIMIT:
                case IPV6_2292HOPOPTS:
                case IPV6_2292DSTOPTS:
                case IPV6_2292RTHDR:
                        /* RFC 2292 */
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                break;

                        switch (optname) {
                        case IPV6_2292PKTINFO:
                                OPTSET2292(IN6P_PKTINFO);
                                break;
                        case IPV6_2292HOPLIMIT:
                                OPTSET2292(IN6P_HOPLIMIT);
                                break;
                        case IPV6_2292HOPOPTS:
                                /*
                                 * Check super-user privilege.
                                 * See comments for IPV6_RECVHOPOPTS.
                                 */
                                error = kauth_authorize_network(
                                    kauth_cred_get(),
                                    KAUTH_NETWORK_IPV6,
                                    KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL,
                                    NULL, NULL);
                                if (error)
                                        return (error);
                                OPTSET2292(IN6P_HOPOPTS);
                                break;
                        case IPV6_2292DSTOPTS:
                                error = kauth_authorize_network(
                                    kauth_cred_get(),
                                    KAUTH_NETWORK_IPV6,
                                    KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL,
                                    NULL, NULL);
                                if (error)
                                        return (error);
                                OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
                                break;
                        case IPV6_2292RTHDR:
                                OPTSET2292(IN6P_RTHDR);
                                break;
                        }
                        break;
#endif
                case IPV6_PKTINFO:
                case IPV6_HOPOPTS:
                case IPV6_RTHDR:
                case IPV6_DSTOPTS:
                case IPV6_RTHDRDSTOPTS:
                case IPV6_NEXTHOP: {
                        /* new advanced API (RFC3542) */
                        void *optbuf;
                        int optbuflen;
                        struct ip6_pktopts **optp;

#ifdef RFC2292
                        /* cannot mix with RFC2292 */
                        if (OPTBIT(IN6P_RFC2292)) {
                                error = EINVAL;
                                break;
                        }
#endif

                        optbuflen = sopt->sopt_size;
                        optbuf = malloc(optbuflen, M_IP6OPT, M_NOWAIT);
                        if (optbuf == NULL) {
                                error = ENOBUFS;
                                break;
                        }

                        error = sockopt_get(sopt, optbuf, optbuflen);
                        if (error) {
                                free(optbuf, M_IP6OPT);
                                break;
                        }
                        optp = &in6p->in6p_outputopts;
                        error = ip6_pcbopt(optname, optbuf, optbuflen,
                            optp, kauth_cred_get(), uproto);

                        free(optbuf, M_IP6OPT);
                        break;
                        }
#undef OPTSET

                case IPV6_MULTICAST_IF:
                case IPV6_MULTICAST_HOPS:
                case IPV6_MULTICAST_LOOP:
                case IPV6_JOIN_GROUP:
                case IPV6_LEAVE_GROUP:
                        error = ip6_setmoptions(sopt, in6p);
                        break;

                case IPV6_PORTRANGE:
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                break;

                        switch (optval) {
                        case IPV6_PORTRANGE_DEFAULT:
                                in6p->in6p_flags &= ~(IN6P_LOWPORT);
                                in6p->in6p_flags &= ~(IN6P_HIGHPORT);
                                break;

                        case IPV6_PORTRANGE_HIGH:
                                in6p->in6p_flags &= ~(IN6P_LOWPORT);
                                in6p->in6p_flags |= IN6P_HIGHPORT;
                                break;

                        case IPV6_PORTRANGE_LOW:
                                in6p->in6p_flags &= ~(IN6P_HIGHPORT);
                                in6p->in6p_flags |= IN6P_LOWPORT;
                                break;

                        default:
                                error = EINVAL;
                                break;
                        }
                        break;

                case IPV6_PORTALGO:
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                break;

                        error = portalgo_algo_index_select(
                            (struct inpcb_hdr *)in6p, optval);
                        break;

#if defined(IPSEC)
                case IPV6_IPSEC_POLICY:
                        if (ipsec_enabled) {
                                error = ipsec_set_policy(in6p,
                                    sopt->sopt_data, sopt->sopt_size,
                                    kauth_cred_get());
                        } else
                                error = ENOPROTOOPT;
                        break;
#endif /* IPSEC */

                default:
                        error = ENOPROTOOPT;
                        break;
                }
                break;

        case PRCO_GETOPT:
                switch (optname) {
#ifdef RFC2292
                case IPV6_2292PKTOPTIONS:
                        /*
                         * RFC3542 (effectively) deprecated the
                         * semantics of the 2292-style pktoptions.
                         * Since it was not reliable in nature (i.e.,
                         * applications had to expect the lack of some
                         * information after all), it would make sense
                         * to simplify this part by always returning
                         * empty data.
                         */
                        break;
#endif

                case IPV6_RECVHOPOPTS:
                case IPV6_RECVDSTOPTS:
                case IPV6_RECVRTHDRDSTOPTS:
                case IPV6_UNICAST_HOPS:
                case IPV6_RECVPKTINFO:
                case IPV6_RECVHOPLIMIT:
                case IPV6_RECVRTHDR:
                case IPV6_RECVPATHMTU:

                case IPV6_FAITH:
                case IPV6_V6ONLY:
                case IPV6_PORTRANGE:
                case IPV6_RECVTCLASS:
                case IPV6_BINDANY:
                        switch (optname) {

                        case IPV6_RECVHOPOPTS:
                                optval = OPTBIT(IN6P_HOPOPTS);
                                break;

                        case IPV6_RECVDSTOPTS:
                                optval = OPTBIT(IN6P_DSTOPTS);
                                break;

                        case IPV6_RECVRTHDRDSTOPTS:
                                optval = OPTBIT(IN6P_RTHDRDSTOPTS);
                                break;

                        case IPV6_UNICAST_HOPS:
                                optval = in6p->in6p_hops;
                                break;

                        case IPV6_RECVPKTINFO:
                                optval = OPTBIT(IN6P_PKTINFO);
                                break;

                        case IPV6_RECVHOPLIMIT:
                                optval = OPTBIT(IN6P_HOPLIMIT);
                                break;

                        case IPV6_RECVRTHDR:
                                optval = OPTBIT(IN6P_RTHDR);
                                break;

                        case IPV6_RECVPATHMTU:
                                optval = OPTBIT(IN6P_MTU);
                                break;

                        case IPV6_FAITH:
                                optval = OPTBIT(IN6P_FAITH);
                                break;

                        case IPV6_V6ONLY:
                                optval = OPTBIT(IN6P_IPV6_V6ONLY);
                                break;

                        case IPV6_PORTRANGE:
                            {
                                int flags;
                                flags = in6p->in6p_flags;
                                if (flags & IN6P_HIGHPORT)
                                        optval = IPV6_PORTRANGE_HIGH;
                                else if (flags & IN6P_LOWPORT)
                                        optval = IPV6_PORTRANGE_LOW;
                                else
                                        optval = 0;
                                break;
                            }
                        case IPV6_RECVTCLASS:
                                optval = OPTBIT(IN6P_TCLASS);
                                break;

                        case IPV6_BINDANY:
                                optval = OPTBIT(IN6P_BINDANY);
                                break;
                        }

                        if (error)
                                break;
                        error = sockopt_setint(sopt, optval);
                        break;

                case IPV6_PATHMTU:
                    {
                        u_long pmtu = 0;
                        struct ip6_mtuinfo mtuinfo;
                        struct route *ro = &in6p->in6p_route;
                        struct rtentry *rt;
                        union {
                                struct sockaddr                dst;
                                struct sockaddr_in6        dst6;
                        } u;

                        if (!(so->so_state & SS_ISCONNECTED))
                                return (ENOTCONN);
                        /*
                         * XXX: we dot not consider the case of source
                         * routing, or optional information to specify
                         * the outgoing interface.
                         */
                        sockaddr_in6_init(&u.dst6, &in6p->in6p_faddr, 0, 0, 0);
                        rt = rtcache_lookup(ro, &u.dst);
                        error = ip6_getpmtu(rt, NULL, &pmtu, NULL);
                        rtcache_unref(rt, ro);
                        if (error)
                                break;
                        if (pmtu > IPV6_MAXPACKET)
                                pmtu = IPV6_MAXPACKET;

                        memset(&mtuinfo, 0, sizeof(mtuinfo));
                        mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
                        optdata = (void *)&mtuinfo;
                        optdatalen = sizeof(mtuinfo);
                        if (optdatalen > MCLBYTES)
                                return (EMSGSIZE); /* XXX */
                        error = sockopt_set(sopt, optdata, optdatalen);
                        break;
                    }

#ifdef RFC2292
                case IPV6_2292PKTINFO:
                case IPV6_2292HOPLIMIT:
                case IPV6_2292HOPOPTS:
                case IPV6_2292RTHDR:
                case IPV6_2292DSTOPTS:
                        switch (optname) {
                        case IPV6_2292PKTINFO:
                                optval = OPTBIT(IN6P_PKTINFO);
                                break;
                        case IPV6_2292HOPLIMIT:
                                optval = OPTBIT(IN6P_HOPLIMIT);
                                break;
                        case IPV6_2292HOPOPTS:
                                optval = OPTBIT(IN6P_HOPOPTS);
                                break;
                        case IPV6_2292RTHDR:
                                optval = OPTBIT(IN6P_RTHDR);
                                break;
                        case IPV6_2292DSTOPTS:
                                optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
                                break;
                        }
                        error = sockopt_setint(sopt, optval);
                        break;
#endif
                case IPV6_PKTINFO:
                case IPV6_HOPOPTS:
                case IPV6_RTHDR:
                case IPV6_DSTOPTS:
                case IPV6_RTHDRDSTOPTS:
                case IPV6_NEXTHOP:
                case IPV6_OTCLASS:
                case IPV6_TCLASS:
                case IPV6_DONTFRAG:
                case IPV6_USE_MIN_MTU:
                case IPV6_PREFER_TEMPADDR:
                        error = ip6_getpcbopt(in6p->in6p_outputopts,
                            optname, sopt);
                        break;

                case IPV6_MULTICAST_IF:
                case IPV6_MULTICAST_HOPS:
                case IPV6_MULTICAST_LOOP:
                case IPV6_JOIN_GROUP:
                case IPV6_LEAVE_GROUP:
                        error = ip6_getmoptions(sopt, in6p);
                        break;

                case IPV6_PORTALGO:
                        optval = ((struct inpcb_hdr *)in6p)->inph_portalgo;
                        error = sockopt_setint(sopt, optval);
                        break;

#if defined(IPSEC)
                case IPV6_IPSEC_POLICY:
                        if (ipsec_used) {
                                struct mbuf *m = NULL;

                                /*
                                 * XXX: this will return EINVAL as sopt is
                                 * empty
                                 */
                                error = ipsec_get_policy(in6p, sopt->sopt_data,
                                    sopt->sopt_size, &m);
                                if (!error)
                                        error = sockopt_setmbuf(sopt, m);
                        } else
                                error = ENOPROTOOPT;
                        break;
#endif /* IPSEC */

                default:
                        error = ENOPROTOOPT;
                        break;
                }
                break;
        }
        return (error);
}

int
ip6_raw_ctloutput(int op, struct socket *so, struct sockopt *sopt)
{
        int error = 0, optval;
        const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
        struct in6pcb *in6p = sotoin6pcb(so);
        int level, optname;

        KASSERT(sopt != NULL);

        level = sopt->sopt_level;
        optname = sopt->sopt_name;

        if (level != IPPROTO_IPV6) {
                return ENOPROTOOPT;
        }

        switch (optname) {
        case IPV6_CHECKSUM:
                /*
                 * For ICMPv6 sockets, no modification allowed for checksum
                 * offset, permit "no change" values to help existing apps.
                 *
                 * XXX RFC3542 says: "An attempt to set IPV6_CHECKSUM
                 * for an ICMPv6 socket will fail."  The current
                 * behavior does not meet RFC3542.
                 */
                switch (op) {
                case PRCO_SETOPT:
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                break;
                        if ((optval % 2) != 0) {
                                /* the API assumes even offset values */
                                error = EINVAL;
                        } else if (so->so_proto->pr_protocol ==
                            IPPROTO_ICMPV6) {
                                if (optval != icmp6off)
                                        error = EINVAL;
                        } else
                                in6p->in6p_cksum = optval;
                        break;

                case PRCO_GETOPT:
                        if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
                                optval = icmp6off;
                        else
                                optval = in6p->in6p_cksum;

                        error = sockopt_setint(sopt, optval);
                        break;

                default:
                        error = EINVAL;
                        break;
                }
                break;

        default:
                error = ENOPROTOOPT;
                break;
        }

        return (error);
}

#ifdef RFC2292
/*
 * Set up IP6 options in pcb for insertion in output packets or
 * specifying behavior of outgoing packets.
 */
static int
ip6_pcbopts(struct ip6_pktopts **pktopt, struct socket *so,
    struct sockopt *sopt)
{
        struct ip6_pktopts *opt = *pktopt;
        struct mbuf *m;
        int error = 0;

        KASSERT(solocked(so));

        /* turn off any old options. */
        if (opt) {
#ifdef DIAGNOSTIC
            if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
                opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
                opt->ip6po_rhinfo.ip6po_rhi_rthdr)
                    printf("ip6_pcbopts: all specified options are cleared.\n");
#endif
                ip6_clearpktopts(opt, -1);
        } else {
                opt = malloc(sizeof(*opt), M_IP6OPT, M_NOWAIT);
                if (opt == NULL)
                        return (ENOBUFS);
        }
        *pktopt = NULL;

        if (sopt == NULL || sopt->sopt_size == 0) {
                /*
                 * Only turning off any previous options, regardless of
                 * whether the opt is just created or given.
                 */
                free(opt, M_IP6OPT);
                return (0);
        }

        /*  set options specified by user. */
        m = sockopt_getmbuf(sopt);
        if (m == NULL) {
                free(opt, M_IP6OPT);
                return (ENOBUFS);
        }

        error = ip6_setpktopts(m, opt, NULL, kauth_cred_get(),
            so->so_proto->pr_protocol);
        m_freem(m);
        if (error != 0) {
                ip6_clearpktopts(opt, -1); /* XXX: discard all options */
                free(opt, M_IP6OPT);
                return (error);
        }
        *pktopt = opt;
        return (0);
}
#endif

/*
 * initialize ip6_pktopts.  beware that there are non-zero default values in
 * the struct.
 */
void
ip6_initpktopts(struct ip6_pktopts *opt)
{

        memset(opt, 0, sizeof(*opt));
        opt->ip6po_hlim = -1;        /* -1 means default hop limit */
        opt->ip6po_tclass = -1;        /* -1 means default traffic class */
        opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
        opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
}

#define sin6tosa(sin6)        ((struct sockaddr *)(sin6)) /* XXX */
static int
ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
    kauth_cred_t cred, int uproto)
{
        struct ip6_pktopts *opt;

        if (*pktopt == NULL) {
                *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
                    M_NOWAIT);
                if (*pktopt == NULL)
                        return (ENOBUFS);

                ip6_initpktopts(*pktopt);
        }
        opt = *pktopt;

        return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
}

static int
ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
{
        void *optdata = NULL;
        int optdatalen = 0;
        struct ip6_ext *ip6e;
        int error = 0;
        struct in6_pktinfo null_pktinfo;
        int deftclass = 0, on;
        int defminmtu = IP6PO_MINMTU_MCASTONLY;
        int defpreftemp = IP6PO_TEMPADDR_SYSTEM;

        switch (optname) {
        case IPV6_PKTINFO:
                if (pktopt && pktopt->ip6po_pktinfo)
                        optdata = (void *)pktopt->ip6po_pktinfo;
                else {
                        /* XXX: we don't have to do this every time... */
                        memset(&null_pktinfo, 0, sizeof(null_pktinfo));
                        optdata = (void *)&null_pktinfo;
                }
                optdatalen = sizeof(struct in6_pktinfo);
                break;
        case IPV6_OTCLASS:
                /* XXX */
                return (EINVAL);
        case IPV6_TCLASS:
                if (pktopt && pktopt->ip6po_tclass >= 0)
                        optdata = (void *)&pktopt->ip6po_tclass;
                else
                        optdata = (void *)&deftclass;
                optdatalen = sizeof(int);
                break;
        case IPV6_HOPOPTS:
                if (pktopt && pktopt->ip6po_hbh) {
                        optdata = (void *)pktopt->ip6po_hbh;
                        ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
                        optdatalen = (ip6e->ip6e_len + 1) << 3;
                }
                break;
        case IPV6_RTHDR:
                if (pktopt && pktopt->ip6po_rthdr) {
                        optdata = (void *)pktopt->ip6po_rthdr;
                        ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
                        optdatalen = (ip6e->ip6e_len + 1) << 3;
                }
                break;
        case IPV6_RTHDRDSTOPTS:
                if (pktopt && pktopt->ip6po_dest1) {
                        optdata = (void *)pktopt->ip6po_dest1;
                        ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
                        optdatalen = (ip6e->ip6e_len + 1) << 3;
                }
                break;
        case IPV6_DSTOPTS:
                if (pktopt && pktopt->ip6po_dest2) {
                        optdata = (void *)pktopt->ip6po_dest2;
                        ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
                        optdatalen = (ip6e->ip6e_len + 1) << 3;
                }
                break;
        case IPV6_NEXTHOP:
                if (pktopt && pktopt->ip6po_nexthop) {
                        optdata = (void *)pktopt->ip6po_nexthop;
                        optdatalen = pktopt->ip6po_nexthop->sa_len;
                }
                break;
        case IPV6_USE_MIN_MTU:
                if (pktopt)
                        optdata = (void *)&pktopt->ip6po_minmtu;
                else
                        optdata = (void *)&defminmtu;
                optdatalen = sizeof(int);
                break;
        case IPV6_DONTFRAG:
                if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
                        on = 1;
                else
                        on = 0;
                optdata = (void *)&on;
                optdatalen = sizeof(on);
                break;
        case IPV6_PREFER_TEMPADDR:
                if (pktopt)
                        optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
                else
                        optdata = (void *)&defpreftemp;
                optdatalen = sizeof(int);
                break;
        default:                /* should not happen */
#ifdef DIAGNOSTIC
                panic("ip6_getpcbopt: unexpected option\n");
#endif
                return (ENOPROTOOPT);
        }

        error = sockopt_set(sopt, optdata, optdatalen);

        return (error);
}

void
ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
{
        if (optname == -1 || optname == IPV6_PKTINFO) {
                if (pktopt->ip6po_pktinfo)
                        free(pktopt->ip6po_pktinfo, M_IP6OPT);
                pktopt->ip6po_pktinfo = NULL;
        }
        if (optname == -1 || optname == IPV6_HOPLIMIT)
                pktopt->ip6po_hlim = -1;
        if (optname == -1 || optname == IPV6_TCLASS)
                pktopt->ip6po_tclass = -1;
        if (optname == -1 || optname == IPV6_NEXTHOP) {
                rtcache_free(&pktopt->ip6po_nextroute);
                if (pktopt->ip6po_nexthop)
                        free(pktopt->ip6po_nexthop, M_IP6OPT);
                pktopt->ip6po_nexthop = NULL;
        }
        if (optname == -1 || optname == IPV6_HOPOPTS) {
                if (pktopt->ip6po_hbh)
                        free(pktopt->ip6po_hbh, M_IP6OPT);
                pktopt->ip6po_hbh = NULL;
        }
        if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
                if (pktopt->ip6po_dest1)
                        free(pktopt->ip6po_dest1, M_IP6OPT);
                pktopt->ip6po_dest1 = NULL;
        }
        if (optname == -1 || optname == IPV6_RTHDR) {
                if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
                        free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
                pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
                rtcache_free(&pktopt->ip6po_route);
        }
        if (optname == -1 || optname == IPV6_DSTOPTS) {
                if (pktopt->ip6po_dest2)
                        free(pktopt->ip6po_dest2, M_IP6OPT);
                pktopt->ip6po_dest2 = NULL;
        }
}

#define PKTOPT_EXTHDRCPY(type)                                         \
do {                                                                \
        if (src->type) {                                        \
                int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
                dst->type = malloc(hlen, M_IP6OPT, canwait);        \
                if (dst->type == NULL)                                \
                        goto bad;                                \
                memcpy(dst->type, src->type, hlen);                \
        }                                                        \
} while (/*CONSTCOND*/ 0)

static int
copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
{
        dst->ip6po_hlim = src->ip6po_hlim;
        dst->ip6po_tclass = src->ip6po_tclass;
        dst->ip6po_flags = src->ip6po_flags;
        dst->ip6po_minmtu = src->ip6po_minmtu;
        dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr;
        if (src->ip6po_pktinfo) {
                dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
                    M_IP6OPT, canwait);
                if (dst->ip6po_pktinfo == NULL)
                        goto bad;
                *dst->ip6po_pktinfo = *src->ip6po_pktinfo;
        }
        if (src->ip6po_nexthop) {
                dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
                    M_IP6OPT, canwait);
                if (dst->ip6po_nexthop == NULL)
                        goto bad;
                memcpy(dst->ip6po_nexthop, src->ip6po_nexthop,
                    src->ip6po_nexthop->sa_len);
        }
        PKTOPT_EXTHDRCPY(ip6po_hbh);
        PKTOPT_EXTHDRCPY(ip6po_dest1);
        PKTOPT_EXTHDRCPY(ip6po_dest2);
        PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
        return (0);

  bad:
        if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
        if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
        if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
        if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
        if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
        if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);

        return (ENOBUFS);
}
#undef PKTOPT_EXTHDRCPY

struct ip6_pktopts *
ip6_copypktopts(struct ip6_pktopts *src, int canwait)
{
        int error;
        struct ip6_pktopts *dst;

        dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
        if (dst == NULL)
                return (NULL);
        ip6_initpktopts(dst);

        if ((error = copypktopts(dst, src, canwait)) != 0) {
                free(dst, M_IP6OPT);
                return (NULL);
        }

        return (dst);
}

void
ip6_freepcbopts(struct ip6_pktopts *pktopt)
{
        if (pktopt == NULL)
                return;

        ip6_clearpktopts(pktopt, -1);

        free(pktopt, M_IP6OPT);
}

int
ip6_get_membership(const struct sockopt *sopt, struct ifnet **ifp,
    struct psref *psref, void *v, size_t l)
{
        struct ipv6_mreq mreq;
        int error;
        struct in6_addr *ia = &mreq.ipv6mr_multiaddr;
        struct in_addr *ia4 = (void *)&ia->s6_addr32[3];

        error = sockopt_get(sopt, &mreq, sizeof(mreq));
        if (error != 0)
                return error;

        if (IN6_IS_ADDR_UNSPECIFIED(ia)) {
                /*
                 * We use the unspecified address to specify to accept
                 * all multicast addresses. Only super user is allowed
                 * to do this.
                 */
                if (kauth_authorize_network(kauth_cred_get(),
                    KAUTH_NETWORK_IPV6,
                    KAUTH_REQ_NETWORK_IPV6_JOIN_MULTICAST, NULL, NULL, NULL))
                        return EACCES;
        } else if (IN6_IS_ADDR_V4MAPPED(ia)) {
                // Don't bother if we are not going to use ifp.
                if (l == sizeof(*ia)) {
                        memcpy(v, ia, l);
                        return 0;
                }
        } else if (!IN6_IS_ADDR_MULTICAST(ia)) {
                return EINVAL;
        }

        /*
         * If no interface was explicitly specified, choose an
         * appropriate one according to the given multicast address.
         */
        if (mreq.ipv6mr_interface == 0) {
                struct rtentry *rt;
                union {
                        struct sockaddr                dst;
                        struct sockaddr_in        dst4;
                        struct sockaddr_in6        dst6;
                } u;
                struct route ro;

                /*
                 * Look up the routing table for the
                 * address, and choose the outgoing interface.
                 *   XXX: is it a good approach?
                 */
                memset(&ro, 0, sizeof(ro));
                if (IN6_IS_ADDR_V4MAPPED(ia))
                        sockaddr_in_init(&u.dst4, ia4, 0);
                else
                        sockaddr_in6_init(&u.dst6, ia, 0, 0, 0);
                error = rtcache_setdst(&ro, &u.dst);
                if (error != 0)
                        return error;
                rt = rtcache_init(&ro);
                *ifp = rt != NULL ?
                    if_get_byindex(rt->rt_ifp->if_index, psref) : NULL;
                rtcache_unref(rt, &ro);
                rtcache_free(&ro);
        } else {
                /*
                 * If the interface is specified, validate it.
                 */
                *ifp = if_get_byindex(mreq.ipv6mr_interface, psref);
                if (*ifp == NULL)
                        return ENXIO;        /* XXX EINVAL? */
        }
        if (sizeof(*ia) == l)
                memcpy(v, ia, l);
        else
                memcpy(v, ia4, l);
        return 0;
}

/*
 * Set the IP6 multicast options in response to user setsockopt().
 */
static int
ip6_setmoptions(const struct sockopt *sopt, struct in6pcb *in6p)
{
        int error = 0;
        u_int loop, ifindex;
        struct ipv6_mreq mreq;
        struct in6_addr ia;
        struct ifnet *ifp;
        struct ip6_moptions *im6o = in6p->in6p_moptions;
        struct in6_multi_mship *imm;

        KASSERT(in6p_locked(in6p));

        if (im6o == NULL) {
                /*
                 * No multicast option buffer attached to the pcb;
                 * allocate one and initialize to default values.
                 */
                im6o = malloc(sizeof(*im6o), M_IPMOPTS, M_NOWAIT);
                if (im6o == NULL)
                        return (ENOBUFS);
                in6p->in6p_moptions = im6o;
                im6o->im6o_multicast_if_index = 0;
                im6o->im6o_multicast_hlim = ip6_defmcasthlim;
                im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
                LIST_INIT(&im6o->im6o_memberships);
        }

        switch (sopt->sopt_name) {

        case IPV6_MULTICAST_IF: {
                int s;
                /*
                 * Select the interface for outgoing multicast packets.
                 */
                error = sockopt_get(sopt, &ifindex, sizeof(ifindex));
                if (error != 0)
                        break;

                s = pserialize_read_enter();
                if (ifindex != 0) {
                        if ((ifp = if_byindex(ifindex)) == NULL) {
                                pserialize_read_exit(s);
                                error = ENXIO;        /* XXX EINVAL? */
                                break;
                        }
                        if ((ifp->if_flags & IFF_MULTICAST) == 0) {
                                pserialize_read_exit(s);
                                error = EADDRNOTAVAIL;
                                break;
                        }
                } else
                        ifp = NULL;
                im6o->im6o_multicast_if_index = if_get_index(ifp);
                pserialize_read_exit(s);
                break;
            }

        case IPV6_MULTICAST_HOPS:
            {
                /*
                 * Set the IP6 hoplimit for outgoing multicast packets.
                 */
                int optval;

                error = sockopt_getint(sopt, &optval);
                if (error != 0)
                        break;

                if (optval < -1 || optval >= 256)
                        error = EINVAL;
                else if (optval == -1)
                        im6o->im6o_multicast_hlim = ip6_defmcasthlim;
                else
                        im6o->im6o_multicast_hlim = optval;
                break;
            }

        case IPV6_MULTICAST_LOOP:
                /*
                 * Set the loopback flag for outgoing multicast packets.
                 * Must be zero or one.
                 */
                error = sockopt_get(sopt, &loop, sizeof(loop));
                if (error != 0)
                        break;
                if (loop > 1) {
                        error = EINVAL;
                        break;
                }
                im6o->im6o_multicast_loop = loop;
                break;

        case IPV6_JOIN_GROUP: {
                int bound;
                struct psref psref;
                /*
                 * Add a multicast group membership.
                 * Group must be a valid IP6 multicast address.
                 */
                bound = curlwp_bind();
                ifp = NULL;
                error = ip6_get_membership(sopt, &ifp, &psref, &ia, sizeof(ia));
                if (error != 0) {
                        KASSERT(ifp == NULL);
                        curlwp_bindx(bound);
                        return error;
                }

                if (IN6_IS_ADDR_V4MAPPED(&ia)) {
                        error = ip_setmoptions(&in6p->in6p_v4moptions, sopt);
                        goto put_break;
                }
                /*
                 * See if we found an interface, and confirm that it
                 * supports multicast
                 */
                if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
                        error = EADDRNOTAVAIL;
                        goto put_break;
                }

                if (in6_setscope(&ia, ifp, NULL)) {
                        error = EADDRNOTAVAIL; /* XXX: should not happen */
                        goto put_break;
                }

                /*
                 * See if the membership already exists.
                 */
                LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) {
                        if (imm->i6mm_maddr->in6m_ifp == ifp &&
                            IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
                            &ia))
                                goto put_break;
                }
                if (imm != NULL) {
                        error = EADDRINUSE;
                        goto put_break;
                }
                /*
                 * Everything looks good; add a new record to the multicast
                 * address list for the given interface.
                 */
                imm = in6_joingroup(ifp, &ia, &error, 0);
                if (imm == NULL)
                        goto put_break;
                LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
            put_break:
                if_put(ifp, &psref);
                curlwp_bindx(bound);
                break;
            }

        case IPV6_LEAVE_GROUP: {
                /*
                 * Drop a multicast group membership.
                 * Group must be a valid IP6 multicast address.
                 */
                error = sockopt_get(sopt, &mreq, sizeof(mreq));
                if (error != 0)
                        break;

                if (IN6_IS_ADDR_V4MAPPED(&mreq.ipv6mr_multiaddr)) {
                        error = ip_setmoptions(&in6p->in6p_v4moptions, sopt);
                        break;
                }
                /*
                 * If an interface address was specified, get a pointer
                 * to its ifnet structure.
                 */
                if (mreq.ipv6mr_interface != 0) {
                        if ((ifp = if_byindex(mreq.ipv6mr_interface)) == NULL) {
                                error = ENXIO;        /* XXX EINVAL? */
                                break;
                        }
                } else
                        ifp = NULL;

                /* Fill in the scope zone ID */
                if (ifp) {
                        if (in6_setscope(&mreq.ipv6mr_multiaddr, ifp, NULL)) {
                                /* XXX: should not happen */
                                error = EADDRNOTAVAIL;
                                break;
                        }
                } else if (mreq.ipv6mr_interface != 0) {
                        /*
                         * XXX: This case would happens when the (positive)
                         * index is in the valid range, but the corresponding
                         * interface has been detached dynamically.  The above
                         * check probably avoids such case to happen here, but
                         * we check it explicitly for safety.
                         */
                        error = EADDRNOTAVAIL;
                        break;
                } else {        /* ipv6mr_interface == 0 */
                        struct sockaddr_in6 sa6_mc;

                        /*
                         * The API spec says as follows:
                         *  If the interface index is specified as 0, the
                         *  system may choose a multicast group membership to
                         *  drop by matching the multicast address only.
                         * On the other hand, we cannot disambiguate the scope
                         * zone unless an interface is provided.  Thus, we
                         * check if there's ambiguity with the default scope
                         * zone as the last resort.
                         */
                        sockaddr_in6_init(&sa6_mc, &mreq.ipv6mr_multiaddr,
                            0, 0, 0);
                        error = sa6_embedscope(&sa6_mc, ip6_use_defzone);
                        if (error != 0)
                                break;
                        mreq.ipv6mr_multiaddr = sa6_mc.sin6_addr;
                }

                /*
                 * Find the membership in the membership list.
                 */
                LIST_FOREACH(imm, &im6o->im6o_memberships, i6mm_chain) {
                        if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
                            IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
                            &mreq.ipv6mr_multiaddr))
                                break;
                }
                if (imm == NULL) {
                        /* Unable to resolve interface */
                        error = EADDRNOTAVAIL;
                        break;
                }
                /*
                 * Give up the multicast address record to which the
                 * membership points.
                 */
                LIST_REMOVE(imm, i6mm_chain);
                in6_leavegroup(imm);
                /* in6m_ifp should not leave thanks to in6p_lock */
                break;
            }

        default:
                error = EOPNOTSUPP;
                break;
        }

        /*
         * If all options have default values, no need to keep the mbuf.
         */
        if (im6o->im6o_multicast_if_index == 0 &&
            im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
            im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
            LIST_EMPTY(&im6o->im6o_memberships)) {
                free(in6p->in6p_moptions, M_IPMOPTS);
                in6p->in6p_moptions = NULL;
        }

        return (error);
}

/*
 * Return the IP6 multicast options in response to user getsockopt().
 */
static int
ip6_getmoptions(struct sockopt *sopt, struct in6pcb *in6p)
{
        u_int optval;
        int error;
        struct ip6_moptions *im6o = in6p->in6p_moptions;

        switch (sopt->sopt_name) {
        case IPV6_MULTICAST_IF:
                if (im6o == NULL || im6o->im6o_multicast_if_index == 0)
                        optval = 0;
                else
                        optval = im6o->im6o_multicast_if_index;

                error = sockopt_set(sopt, &optval, sizeof(optval));
                break;

        case IPV6_MULTICAST_HOPS:
                if (im6o == NULL)
                        optval = ip6_defmcasthlim;
                else
                        optval = im6o->im6o_multicast_hlim;

                error = sockopt_set(sopt, &optval, sizeof(optval));
                break;

        case IPV6_MULTICAST_LOOP:
                if (im6o == NULL)
                        optval = IPV6_DEFAULT_MULTICAST_LOOP;
                else
                        optval = im6o->im6o_multicast_loop;

                error = sockopt_set(sopt, &optval, sizeof(optval));
                break;

        default:
                error = EOPNOTSUPP;
        }

        return (error);
}

/*
 * Discard the IP6 multicast options.
 */
void
ip6_freemoptions(struct ip6_moptions *im6o)
{
        struct in6_multi_mship *imm, *nimm;

        if (im6o == NULL)
                return;

        /* The owner of im6o (in6p) should be protected by solock */
        LIST_FOREACH_SAFE(imm, &im6o->im6o_memberships, i6mm_chain, nimm) {
                LIST_REMOVE(imm, i6mm_chain);
                in6_leavegroup(imm);
        }
        free(im6o, M_IPMOPTS);
}

/*
 * Set IPv6 outgoing packet options based on advanced API.
 */
int
ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
        struct ip6_pktopts *stickyopt, kauth_cred_t cred, int uproto)
{
        struct cmsghdr *cm = 0;

        if (control == NULL || opt == NULL)
                return (EINVAL);

        ip6_initpktopts(opt);
        if (stickyopt) {
                int error;

                /*
                 * If stickyopt is provided, make a local copy of the options
                 * for this particular packet, then override them by ancillary
                 * objects.
                 * XXX: copypktopts() does not copy the cached route to a next
                 * hop (if any).  This is not very good in terms of efficiency,
                 * but we can allow this since this option should be rarely
                 * used.
                 */
                if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
                        return (error);
        }

        /*
         * XXX: Currently, we assume all the optional information is stored
         * in a single mbuf.
         */
        if (control->m_next)
                return (EINVAL);

        /* XXX if cm->cmsg_len is not aligned, control->m_len can become <0 */
        for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
            control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
                int error;

                if (control->m_len < CMSG_LEN(0))
                        return (EINVAL);

                cm = mtod(control, struct cmsghdr *);
                if (cm->cmsg_len < CMSG_LEN(0) || cm->cmsg_len > control->m_len)
                        return (EINVAL);
                if (cm->cmsg_level != IPPROTO_IPV6)
                        continue;

                error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
                    cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
                if (error)
                        return (error);
        }

        return (0);
}

/*
 * Set a particular packet option, as a sticky option or an ancillary data
 * item.  "len" can be 0 only when it's a sticky option.
 * We have 4 cases of combination of "sticky" and "cmsg":
 * "sticky=0, cmsg=0": impossible
 * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
 * "sticky=1, cmsg=0": RFC3542 socket option
 * "sticky=1, cmsg=1": RFC2292 socket option
 */
static int
ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
    kauth_cred_t cred, int sticky, int cmsg, int uproto)
{
        int minmtupolicy;
        int error;

        if (!sticky && !cmsg) {
#ifdef DIAGNOSTIC
                printf("ip6_setpktopt: impossible case\n");
#endif
                return (EINVAL);
        }

        /*
         * IPV6_2292xxx is for backward compatibility to RFC2292, and should
         * not be specified in the context of RFC3542.  Conversely,
         * RFC3542 types should not be specified in the context of RFC2292.
         */
        if (!cmsg) {
                switch (optname) {
                case IPV6_2292PKTINFO:
                case IPV6_2292HOPLIMIT:
                case IPV6_2292NEXTHOP:
                case IPV6_2292HOPOPTS:
                case IPV6_2292DSTOPTS:
                case IPV6_2292RTHDR:
                case IPV6_2292PKTOPTIONS:
                        return (ENOPROTOOPT);
                }
        }
        if (sticky && cmsg) {
                switch (optname) {
                case IPV6_PKTINFO:
                case IPV6_HOPLIMIT:
                case IPV6_NEXTHOP:
                case IPV6_HOPOPTS:
                case IPV6_DSTOPTS:
                case IPV6_RTHDRDSTOPTS:
                case IPV6_RTHDR:
                case IPV6_USE_MIN_MTU:
                case IPV6_DONTFRAG:
                case IPV6_OTCLASS:
                case IPV6_TCLASS:
                case IPV6_PREFER_TEMPADDR: /* XXX not an RFC3542 option */
                        return (ENOPROTOOPT);
                }
        }

        switch (optname) {
#ifdef RFC2292
        case IPV6_2292PKTINFO:
#endif
        case IPV6_PKTINFO:
        {
                struct in6_pktinfo *pktinfo;

                if (len != sizeof(struct in6_pktinfo))
                        return (EINVAL);

                pktinfo = (struct in6_pktinfo *)buf;

                /*
                 * An application can clear any sticky IPV6_PKTINFO option by
                 * doing a "regular" setsockopt with ipi6_addr being
                 * in6addr_any and ipi6_ifindex being zero.
                 * [RFC 3542, Section 6]
                 */
                if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
                    pktinfo->ipi6_ifindex == 0 &&
                    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
                        ip6_clearpktopts(opt, optname);
                        break;
                }

                if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
                    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
                        return (EINVAL);
                }

                /* Validate the interface index if specified. */
                if (pktinfo->ipi6_ifindex) {
                        struct ifnet *ifp;
                        int s = pserialize_read_enter();
                        ifp = if_byindex(pktinfo->ipi6_ifindex);
                        if (ifp == NULL) {
                                pserialize_read_exit(s);
                                return ENXIO;
                        }
                        pserialize_read_exit(s);
                }

                /*
                 * We store the address anyway, and let in6_selectsrc()
                 * validate the specified address.  This is because ipi6_addr
                 * may not have enough information about its scope zone, and
                 * we may need additional information (such as outgoing
                 * interface or the scope zone of a destination address) to
                 * disambiguate the scope.
                 * XXX: the delay of the validation may confuse the
                 * application when it is used as a sticky option.
                 */
                if (opt->ip6po_pktinfo == NULL) {
                        opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
                            M_IP6OPT, M_NOWAIT);
                        if (opt->ip6po_pktinfo == NULL)
                                return (ENOBUFS);
                }
                memcpy(opt->ip6po_pktinfo, pktinfo, sizeof(*pktinfo));
                break;
        }

#ifdef RFC2292
        case IPV6_2292HOPLIMIT:
#endif
        case IPV6_HOPLIMIT:
        {
                int *hlimp;

                /*
                 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
                 * to simplify the ordering among hoplimit options.
                 */
                if (optname == IPV6_HOPLIMIT && sticky)
                        return (ENOPROTOOPT);

                if (len != sizeof(int))
                        return (EINVAL);
                hlimp = (int *)buf;
                if (*hlimp < -1 || *hlimp > 255)
                        return (EINVAL);

                opt->ip6po_hlim = *hlimp;
                break;
        }

        case IPV6_OTCLASS:
                if (len != sizeof(u_int8_t))
                        return (EINVAL);

                opt->ip6po_tclass = *(u_int8_t *)buf;
                break;

        case IPV6_TCLASS:
        {
                int tclass;

                if (len != sizeof(int))
                        return (EINVAL);
                tclass = *(int *)buf;
                if (tclass < -1 || tclass > 255)
                        return (EINVAL);

                opt->ip6po_tclass = tclass;
                break;
        }

#ifdef RFC2292
        case IPV6_2292NEXTHOP:
#endif
        case IPV6_NEXTHOP:
                error = kauth_authorize_network(cred,
                    KAUTH_NETWORK_IPV6,
                    KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
                if (error)
                        return (error);

                if (len == 0) {        /* just remove the option */
                        ip6_clearpktopts(opt, IPV6_NEXTHOP);
                        break;
                }

                /* check if cmsg_len is large enough for sa_len */
                if (len < sizeof(struct sockaddr) || len < *buf)
                        return (EINVAL);

                switch (((struct sockaddr *)buf)->sa_family) {
                case AF_INET6:
                {
                        struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;

                        if (sa6->sin6_len != sizeof(struct sockaddr_in6))
                                return (EINVAL);

                        if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
                            IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
                                return (EINVAL);
                        }
                        if ((error = sa6_embedscope(sa6, ip6_use_defzone))
                            != 0) {
                                return (error);
                        }
                        break;
                }
                case AF_LINK:        /* eventually be supported? */
                default:
                        return (EAFNOSUPPORT);
                }

                /* turn off the previous option, then set the new option. */
                ip6_clearpktopts(opt, IPV6_NEXTHOP);
                opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
                if (opt->ip6po_nexthop == NULL)
                        return (ENOBUFS);
                memcpy(opt->ip6po_nexthop, buf, *buf);
                break;

#ifdef RFC2292
        case IPV6_2292HOPOPTS:
#endif
        case IPV6_HOPOPTS:
        {
                struct ip6_hbh *hbh;
                int hbhlen;

                /*
                 * XXX: We don't allow a non-privileged user to set ANY HbH
                 * options, since per-option restriction has too much
                 * overhead.
                 */
                error = kauth_authorize_network(cred,
                    KAUTH_NETWORK_IPV6,
                    KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
                if (error)
                        return (error);

                if (len == 0) {
                        ip6_clearpktopts(opt, IPV6_HOPOPTS);
                        break;        /* just remove the option */
                }

                /* message length validation */
                if (len < sizeof(struct ip6_hbh))
                        return (EINVAL);
                hbh = (struct ip6_hbh *)buf;
                hbhlen = (hbh->ip6h_len + 1) << 3;
                if (len != hbhlen)
                        return (EINVAL);

                /* turn off the previous option, then set the new option. */
                ip6_clearpktopts(opt, IPV6_HOPOPTS);
                opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
                if (opt->ip6po_hbh == NULL)
                        return (ENOBUFS);
                memcpy(opt->ip6po_hbh, hbh, hbhlen);

                break;
        }

#ifdef RFC2292
        case IPV6_2292DSTOPTS:
#endif
        case IPV6_DSTOPTS:
        case IPV6_RTHDRDSTOPTS:
        {
                struct ip6_dest *dest, **newdest = NULL;
                int destlen;

                /* XXX: see the comment for IPV6_HOPOPTS */
                error = kauth_authorize_network(cred,
                    KAUTH_NETWORK_IPV6,
                    KAUTH_REQ_NETWORK_IPV6_HOPBYHOP, NULL, NULL, NULL);
                if (error)
                        return (error);

                if (len == 0) {
                        ip6_clearpktopts(opt, optname);
                        break;        /* just remove the option */
                }

                /* message length validation */
                if (len < sizeof(struct ip6_dest))
                        return (EINVAL);
                dest = (struct ip6_dest *)buf;
                destlen = (dest->ip6d_len + 1) << 3;
                if (len != destlen)
                        return (EINVAL);
                /*
                 * Determine the position that the destination options header
                 * should be inserted; before or after the routing header.
                 */
                switch (optname) {
                case IPV6_2292DSTOPTS:
                        /*
                         * The old advanced API is ambiguous on this point.
                         * Our approach is to determine the position based
                         * according to the existence of a routing header.
                         * Note, however, that this depends on the order of the
                         * extension headers in the ancillary data; the 1st
                         * part of the destination options header must appear
                         * before the routing header in the ancillary data,
                         * too.
                         * RFC3542 solved the ambiguity by introducing
                         * separate ancillary data or option types.
                         */
                        if (opt->ip6po_rthdr == NULL)
                                newdest = &opt->ip6po_dest1;
                        else
                                newdest = &opt->ip6po_dest2;
                        break;
                case IPV6_RTHDRDSTOPTS:
                        newdest = &opt->ip6po_dest1;
                        break;
                case IPV6_DSTOPTS:
                        newdest = &opt->ip6po_dest2;
                        break;
                }

                /* turn off the previous option, then set the new option. */
                ip6_clearpktopts(opt, optname);
                *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
                if (*newdest == NULL)
                        return (ENOBUFS);
                memcpy(*newdest, dest, destlen);

                break;
        }

#ifdef RFC2292
        case IPV6_2292RTHDR:
#endif
        case IPV6_RTHDR:
        {
                struct ip6_rthdr *rth;
                int rthlen;

                if (len == 0) {
                        ip6_clearpktopts(opt, IPV6_RTHDR);
                        break;        /* just remove the option */
                }

                /* message length validation */
                if (len < sizeof(struct ip6_rthdr))
                        return (EINVAL);
                rth = (struct ip6_rthdr *)buf;
                rthlen = (rth->ip6r_len + 1) << 3;
                if (len != rthlen)
                        return (EINVAL);
                switch (rth->ip6r_type) {
                case IPV6_RTHDR_TYPE_0:
                        /* Dropped, RFC5095. */
                default:
                        return (EINVAL);        /* not supported */
                }
                /* turn off the previous option */
                ip6_clearpktopts(opt, IPV6_RTHDR);
                opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
                if (opt->ip6po_rthdr == NULL)
                        return (ENOBUFS);
                memcpy(opt->ip6po_rthdr, rth, rthlen);
                break;
        }

        case IPV6_USE_MIN_MTU:
                if (len != sizeof(int))
                        return (EINVAL);
                minmtupolicy = *(int *)buf;
                if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
                    minmtupolicy != IP6PO_MINMTU_DISABLE &&
                    minmtupolicy != IP6PO_MINMTU_ALL) {
                        return (EINVAL);
                }
                opt->ip6po_minmtu = minmtupolicy;
                break;

        case IPV6_DONTFRAG:
                if (len != sizeof(int))
                        return (EINVAL);

                if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
                        /*
                         * we ignore this option for TCP sockets.
                         * (RFC3542 leaves this case unspecified.)
                         */
                        opt->ip6po_flags &= ~IP6PO_DONTFRAG;
                } else
                        opt->ip6po_flags |= IP6PO_DONTFRAG;
                break;

        case IPV6_PREFER_TEMPADDR:
        {
                int preftemp;

                if (len != sizeof(int))
                        return (EINVAL);
                preftemp = *(int *)buf;
                switch (preftemp) {
                case IP6PO_TEMPADDR_SYSTEM:
                case IP6PO_TEMPADDR_NOTPREFER:
                case IP6PO_TEMPADDR_PREFER:
                        break;
                default:
                        return (EINVAL);
                }
                opt->ip6po_prefer_tempaddr = preftemp;
                break;
        }

        default:
                return (ENOPROTOOPT);
        } /* end of switch */

        return (0);
}

/*
 * Routine called from ip6_output() to loop back a copy of an IP6 multicast
 * packet to the input queue of a specified interface.  Note that this
 * calls the output routine of the loopback "driver", but with an interface
 * pointer that might NOT be lo0ifp -- easier than replicating that code here.
 */
void
ip6_mloopback(struct ifnet *ifp, struct mbuf *m,
        const struct sockaddr_in6 *dst)
{
        struct mbuf *copym;
        struct ip6_hdr *ip6;

        copym = m_copypacket(m, M_DONTWAIT);
        if (copym == NULL)
                return;

        /*
         * Make sure to deep-copy IPv6 header portion in case the data
         * is in an mbuf cluster, so that we can safely override the IPv6
         * header portion later.
         */
        if ((copym->m_flags & M_EXT) != 0 ||
            copym->m_len < sizeof(struct ip6_hdr)) {
                copym = m_pullup(copym, sizeof(struct ip6_hdr));
                if (copym == NULL)
                        return;
        }

#ifdef DIAGNOSTIC
        if (copym->m_len < sizeof(*ip6)) {
                m_freem(copym);
                return;
        }
#endif

        ip6 = mtod(copym, struct ip6_hdr *);
        /*
         * clear embedded scope identifiers if necessary.
         * in6_clearscope will touch the addresses only when necessary.
         */
        in6_clearscope(&ip6->ip6_src);
        in6_clearscope(&ip6->ip6_dst);

        (void)looutput(ifp, copym, (const struct sockaddr *)dst, NULL);
}

/*
 * Chop IPv6 header off from the payload.
 */
static int
ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
{
        struct mbuf *mh;
        struct ip6_hdr *ip6;

        ip6 = mtod(m, struct ip6_hdr *);
        if (m->m_len > sizeof(*ip6)) {
                MGETHDR(mh, M_DONTWAIT, MT_HEADER);
                if (mh == NULL) {
                        m_freem(m);
                        return ENOBUFS;
                }
                m_move_pkthdr(mh, m);
                m_align(mh, sizeof(*ip6));
                m->m_len -= sizeof(*ip6);
                m->m_data += sizeof(*ip6);
                mh->m_next = m;
                mh->m_len = sizeof(*ip6);
                memcpy(mtod(mh, void *), (void *)ip6, sizeof(*ip6));
                m = mh;
        }
        exthdrs->ip6e_ip6 = m;
        return 0;
}

/*
 * Compute IPv6 extension header length.
 */
int
ip6_optlen(struct in6pcb *in6p)
{
        int len;

        if (!in6p->in6p_outputopts)
                return 0;

        len = 0;
#define elen(x) \
    (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)

        len += elen(in6p->in6p_outputopts->ip6po_hbh);
        len += elen(in6p->in6p_outputopts->ip6po_dest1);
        len += elen(in6p->in6p_outputopts->ip6po_rthdr);
        len += elen(in6p->in6p_outputopts->ip6po_dest2);
        return len;
#undef elen
}

/*
 * Ensure sending address is valid.
 * Returns 0 on success, -1 if an error should be sent back or 1
 * if the packet could be dropped without error (protocol dependent).
 */
static int
ip6_ifaddrvalid(const struct in6_addr *src, const struct in6_addr *dst)
{
        struct sockaddr_in6 sin6;
        int s, error;
        struct ifaddr *ifa;
        struct in6_ifaddr *ia6;

        if (IN6_IS_ADDR_UNSPECIFIED(src))
                return 0;

        memset(&sin6, 0, sizeof(sin6));
        sin6.sin6_family = AF_INET6;
        sin6.sin6_len = sizeof(sin6);
        sin6.sin6_addr = *src;

        s = pserialize_read_enter();
        ifa = ifa_ifwithaddr(sin6tosa(&sin6));
        if ((ia6 = ifatoia6(ifa)) == NULL ||
            ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_DUPLICATED))
                error = -1;
        else if (ia6->ia6_flags & IN6_IFF_TENTATIVE)
                error = 1;
        else if (ia6->ia6_flags & IN6_IFF_DETACHED &&
            (sin6.sin6_addr = *dst, ifa_ifwithaddr(sin6tosa(&sin6)) == NULL))
                /* Allow internal traffic to DETACHED addresses */
                error = 1;
        else
                error = 0;
        pserialize_read_exit(s);

        return error;
}















































































































































































    5 




    5 
    5 
    5 



    5 






   17 

   17 
    7 





























   12 


   18 
   18 






   18 

















   17 








   17 


   17 




   12 



   12 
   12 







   11 


   11 






















    8 






    8 

    3 

   11 















   11 























   10 




    8 

















   12 


   12 

    8 





   12 



    3 

    3 



    3 


   12 
    2 

   12 
   10 

   12 




















   12 




















   11 
    1 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
/*        $NetBSD: kern_physio.c,v 1.102 2022/07/10 23:11:55 riastradh Exp $        */

/*-
 * Copyright (c) 1982, 1986, 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_physio.c        8.1 (Berkeley) 6/10/93
 */

/*-
 * Copyright (c) 1994 Christopher G. Demetriou
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_physio.c        8.1 (Berkeley) 6/10/93
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_physio.c,v 1.102 2022/07/10 23:11:55 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/buf.h>
#include <sys/proc.h>
#include <sys/once.h>
#include <sys/workqueue.h>
#include <sys/kmem.h>

#include <uvm/uvm_extern.h>

ONCE_DECL(physio_initialized);
struct workqueue *physio_workqueue;

int physio_concurrency = 16;

/* #define        PHYSIO_DEBUG */
#if defined(PHYSIO_DEBUG)
#define        DPRINTF(a)        printf a
#else /* defined(PHYSIO_DEBUG) */
#define        DPRINTF(a)        /* nothing */
#endif /* defined(PHYSIO_DEBUG) */

struct physio_stat {
        int ps_running;
        int ps_error;
        int ps_failed;
        off_t ps_endoffset;
        size_t ps_resid;
        buf_t *ps_orig_bp;
        kmutex_t ps_lock;
        kcondvar_t ps_cv;
};

static void
physio_done(struct work *wk, void *dummy)
{
        struct buf *bp = (void *)wk;
        size_t todo = bp->b_bufsize;
        size_t done = bp->b_bcount - bp->b_resid;
        struct physio_stat *ps = bp->b_private;
        bool is_iobuf;

        KASSERT(&bp->b_work == wk);
        KASSERT(bp->b_bcount <= todo);
        KASSERT(bp->b_resid <= bp->b_bcount);
        KASSERT((bp->b_flags & B_PHYS) != 0);
        KASSERT(dummy == NULL);

        vunmapbuf(bp, todo);
        uvm_vsunlock(bp->b_proc->p_vmspace, bp->b_data, todo);

        mutex_enter(&ps->ps_lock);
        is_iobuf = (bp != ps->ps_orig_bp);
        if (__predict_false(done != todo)) {
                off_t endoffset = dbtob(bp->b_blkno) + done;

                /*
                 * we got an error or hit EOM.
                 *
                 * we only care about the first one.
                 * ie. the one at the lowest offset.
                 */

                KASSERT(ps->ps_endoffset != endoffset);
                DPRINTF(("%s: error=%d at %" PRIu64 " - %" PRIu64
                    ", blkno=%" PRIu64 ", bcount=%d, flags=0x%x\n",
                    __func__, bp->b_error, dbtob(bp->b_blkno), endoffset,
                    bp->b_blkno, bp->b_bcount, bp->b_flags));

                if (ps->ps_endoffset == -1 || endoffset < ps->ps_endoffset) {
                        DPRINTF(("%s: ps=%p, error %d -> %d, endoff %" PRIu64
                            " -> %" PRIu64 "\n",
                            __func__, ps,
                            ps->ps_error, bp->b_error,
                            ps->ps_endoffset, endoffset));

                        ps->ps_endoffset = endoffset;
                        ps->ps_error = bp->b_error;
                }
                ps->ps_failed++;

                ps->ps_resid += todo - done;
        } else {
                KASSERT(bp->b_error == 0);
        }

        ps->ps_running--;
        cv_signal(&ps->ps_cv);
        mutex_exit(&ps->ps_lock);

        if (is_iobuf)
                putiobuf(bp);
}

static void
physio_biodone(struct buf *bp)
{
#if defined(DIAGNOSTIC)
        struct physio_stat *ps = bp->b_private;
        size_t todo = bp->b_bufsize;
        size_t done = bp->b_bcount - bp->b_resid;

        KASSERT(ps->ps_running > 0);
        KASSERT(bp->b_bcount <= todo);
        KASSERT(bp->b_resid <= bp->b_bcount);
        if (done == todo)
                KASSERTMSG(bp->b_error == 0, "error=%d", bp->b_error);
#endif /* defined(DIAGNOSTIC) */

        workqueue_enqueue(physio_workqueue, &bp->b_work, NULL);
}

static void
physio_wait(struct physio_stat *ps, int n)
{

        KASSERT(mutex_owned(&ps->ps_lock));

        while (ps->ps_running > n)
                cv_wait(&ps->ps_cv, &ps->ps_lock);
}

static int
physio_init(void)
{
        int error;

        KASSERT(physio_workqueue == NULL);

        error = workqueue_create(&physio_workqueue, "physiod",
            physio_done, NULL, PRI_BIO, IPL_BIO, WQ_MPSAFE);

        return error;
}

/*
 * Do "physical I/O" on behalf of a user.  "Physical I/O" is I/O directly
 * from the raw device to user buffers, and bypasses the buffer cache.
 */
int
physio(void (*strategy)(struct buf *), struct buf *obp, dev_t dev, int flags,
    void (*min_phys)(struct buf *), struct uio *uio)
{
        struct iovec *iovp;
        struct lwp *l = curlwp;
        struct proc *p = l->l_proc;
        int i, error;
        struct buf *bp = NULL;
        struct physio_stat *ps;
        int concurrency = physio_concurrency - 1;
        int isdisk;

        error = RUN_ONCE(&physio_initialized, physio_init);
        if (__predict_false(error != 0)) {
                return error;
        }

        DPRINTF(("%s: called: off=%" PRIu64 ", resid=%zu\n",
            __func__, uio->uio_offset, uio->uio_resid));

        flags &= B_READ | B_WRITE;

        ps = kmem_zalloc(sizeof(*ps), KM_SLEEP);
        /* ps->ps_running = 0; */
        /* ps->ps_error = 0; */
        /* ps->ps_failed = 0; */
        ps->ps_orig_bp = obp;
        ps->ps_endoffset = -1;
        ps->ps_resid = 0;
        mutex_init(&ps->ps_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&ps->ps_cv, "physio");

        /* Allow concurrent I/O only for disks */
        isdisk = cdev_type(dev) == D_DISK;
        if (!isdisk)
                concurrency = 0;

        /* Make sure we have a buffer, creating one if necessary. */
        if (obp != NULL) {
                mutex_enter(&bufcache_lock);
                /* Mark it busy, so nobody else will use it. */
                while (bbusy(obp, false, 0, NULL) == EPASSTHROUGH)
                        ;
                mutex_exit(&bufcache_lock);
                concurrency = 0; /* see "XXXkludge" comment below */
        }

        for (i = 0; i < uio->uio_iovcnt; i++) {
                bool sync = true;

                iovp = &uio->uio_iov[i];
                while (iovp->iov_len > 0) {
                        size_t todo;
                        vaddr_t endp;

                        mutex_enter(&ps->ps_lock);
                        if (ps->ps_failed != 0) {
                                goto done_locked;
                        }
                        physio_wait(ps, sync ? 0 : concurrency);
                        mutex_exit(&ps->ps_lock);
                        if (obp != NULL) {
                                /*
                                 * XXXkludge
                                 * some drivers use "obp" as an identifier.
                                 */
                                bp = obp;
                        } else {
                                bp = getiobuf(NULL, true);
                                bp->b_cflags |= BC_BUSY;
                        }
                        bp->b_dev = dev;
                        bp->b_proc = p;
                        bp->b_private = ps;

                        /*
                         * Mrk the buffer busy for physical I/O.  Also set
                         * B_PHYS because it's an I/O to user memory, and
                         * B_RAW because B_RAW is to be "set by physio for
                         * raw transfers".
                         */
                        bp->b_oflags = 0;
                        bp->b_cflags |= BC_BUSY;
                        bp->b_flags = flags | B_PHYS | B_RAW;
                        bp->b_iodone = physio_biodone;

                        /* Set up the buffer for a maximum-sized transfer. */
                        bp->b_blkno = btodb(uio->uio_offset);
                        if (isdisk) {
                                /*
                                 * For disks, check that offsets are at least block
                                 * aligned, the block addresses are used to track
                                 * errors of finished requests.
                                 */
                                if (uio->uio_offset & (DEV_BSIZE - 1)) {
                                        error = EINVAL;
                                        goto done;
                                }
                                /*
                                 * Split request into MAXPHYS chunks
                                 */
                                bp->b_bcount = MIN(MAXPHYS, iovp->iov_len);
                        } else {
                                bp->b_bcount = MIN(INT_MAX, iovp->iov_len);
                        }
                        bp->b_data = iovp->iov_base;

                        /*
                         * Call minphys to bound the transfer size,
                         * and remember the amount of data to transfer,
                         * for later comparison.
                         */
                        (*min_phys)(bp);
                        todo = bp->b_bufsize = bp->b_bcount;
#if defined(DIAGNOSTIC)
                        if (todo > MAXPHYS)
                                panic("todo(%zu) > MAXPHYS; minphys broken",
                                    todo);
#endif /* defined(DIAGNOSTIC) */

                        sync = false;
                        endp = (vaddr_t)bp->b_data + todo;
                        if (trunc_page(endp) != endp) {
                                /*
                                 * Following requests can overlap.
                                 * note that uvm_vslock does round_page.
                                 */
                                sync = true;
                        }

                        /*
                         * Lock the part of the user address space involved
                         * in the transfer.
                         */
                        error = uvm_vslock(p->p_vmspace, bp->b_data, todo,
                            (flags & B_READ) ?  VM_PROT_WRITE : VM_PROT_READ);
                        if (error) {
                                goto done;
                        }

                        /*
                         * Beware vmapbuf(); if successful it clobbers
                         * b_data and saves it in b_saveaddr.
                         * However, vunmapbuf() restores b_data.
                         */
                        if ((error = vmapbuf(bp, todo)) != 0) {
                                uvm_vsunlock(p->p_vmspace, bp->b_data, todo);
                                goto done;
                        }

                        BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);

                        mutex_enter(&ps->ps_lock);
                        ps->ps_running++;
                        mutex_exit(&ps->ps_lock);

                        /* Call strategy to start the transfer. */
                        (*strategy)(bp);
                        bp = NULL;

                        iovp->iov_len -= todo;
                        iovp->iov_base = (char *)iovp->iov_base + todo;
                        uio->uio_offset += todo;
                        uio->uio_resid -= todo;
                }
        }

done:
        mutex_enter(&ps->ps_lock);
done_locked:
        physio_wait(ps, 0);
        mutex_exit(&ps->ps_lock);

        KASSERT(ps->ps_failed || ps->ps_endoffset == -1);

        /*
         * Compute residual, for disks adjust for the
         * lowest numbered block that returned an error.
         */
        if (isdisk) {
                if (ps->ps_failed != 0) {
                        off_t delta;

                        delta = uio->uio_offset - ps->ps_endoffset;
                        KASSERT(delta > 0);
                        uio->uio_resid += delta;
                        /* uio->uio_offset = ps->ps_endoffset; */
                }
        } else {
                uio->uio_resid += ps->ps_resid;
        }

        if (bp != NULL && bp != obp) {
                putiobuf(bp);
        }
        if (error == 0) {
                error = ps->ps_error;
        }
        mutex_destroy(&ps->ps_lock);
        cv_destroy(&ps->ps_cv);
        kmem_free(ps, sizeof(*ps));

        /*
         * Clean up the state of the buffer.  Remember if somebody wants
         * it, so we can wake them up below.  Also, if we had to steal it,
         * give it back.
         */
        if (obp != NULL) {
                KASSERT((obp->b_cflags & BC_BUSY) != 0);

                /*
                 * If another process is waiting for the raw I/O buffer,
                 * wake up processes waiting to do physical I/O;
                 */
                mutex_enter(&bufcache_lock);
                obp->b_cflags &= ~(BC_BUSY | BC_WANTED);
                obp->b_flags &= ~(B_PHYS | B_RAW);
                obp->b_iodone = NULL;
                cv_broadcast(&obp->b_busy);
                mutex_exit(&bufcache_lock);
        }

        DPRINTF(("%s: done: off=%" PRIu64 ", resid=%zu\n",
            __func__, uio->uio_offset, uio->uio_resid));

        return error;
}

/*
 * A minphys() routine is called by physio() to adjust the size of each
 * I/O transfer before the latter is passed to the strategy routine.
 *
 * This minphys() is a default that must be called to enforce limits
 * that are applicable to all devices, because of limitations in the
 * kernel or the hardware platform.
 */
void
minphys(struct buf *bp)
{

        if (bp->b_bcount > MAXPHYS)
                bp->b_bcount = MAXPHYS;
}








































































   66 





   66 










   64 
   64 
   63 








   65 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/*        $NetBSD: kern_cfglock.c,v 1.1 2010/08/21 13:17:31 pgoyette Exp $ */

/*-
 * Copyright (c) 2002, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_cfglock.c,v 1.1 2010/08/21 13:17:31 pgoyette Exp $");

#include <sys/param.h>
#include <sys/cpu.h>
#include <sys/mutex.h>
#include <sys/lwp.h>
#include <sys/systm.h>

static kmutex_t kernconfig_mutex;
static lwp_t *kernconfig_lwp;
static int kernconfig_recurse;

/*
 * Functions for manipulating the kernel configuration lock.  This
 * recursive lock should be used to protect all additions and removals
 * of kernel functionality, such as device configuration and loading
 * of modular kernel components.
 */

void
kernconfig_lock_init(void)
{

        mutex_init(&kernconfig_mutex, MUTEX_DEFAULT, IPL_NONE);
        kernconfig_lwp = NULL;
        kernconfig_recurse = 0;
}

void
kernconfig_lock(void)
{
        lwp_t        *my_lwp;

        /*
         * It's OK to check this unlocked, since it could only be set to
         * curlwp by the current thread itself, and not by an interrupt
         * or any other LWP.
         */
        KASSERT(!cpu_intr_p());
        my_lwp = curlwp;
        if (kernconfig_lwp == my_lwp) {
                kernconfig_recurse++;
                KASSERT(kernconfig_recurse > 1);
        } else {
                mutex_enter(&kernconfig_mutex);
                kernconfig_lwp = my_lwp;
                kernconfig_recurse = 1;
        }
}

void
kernconfig_unlock(void)
{

        KASSERT(kernconfig_is_held());
        KASSERT(kernconfig_recurse != 0);
        if (--kernconfig_recurse == 0) {
                kernconfig_lwp = NULL;
                mutex_exit(&kernconfig_mutex);
        }
}

bool
kernconfig_is_held(void)
{

        return mutex_owned(&kernconfig_mutex);
}





































































































































































































    2 









    2 

























































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
/*        $NetBSD: rf_diskqueue.c,v 1.63 2021/12/14 00:46:43 mrg Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Mark Holland
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/****************************************************************************
 *
 * rf_diskqueue.c -- higher-level disk queue code
 *
 * the routines here are a generic wrapper around the actual queueing
 * routines.  The code here implements thread scheduling, synchronization,
 * and locking ops (see below) on top of the lower-level queueing code.
 *
 * to support atomic RMW, we implement "locking operations".  When a
 * locking op is dispatched to the lower levels of the driver, the
 * queue is locked, and no further I/Os are dispatched until the queue
 * receives & completes a corresponding "unlocking operation".  This
 * code relies on the higher layers to guarantee that a locking op
 * will always be eventually followed by an unlocking op.  The model
 * is that the higher layers are structured so locking and unlocking
 * ops occur in pairs, i.e.  an unlocking op cannot be generated until
 * after a locking op reports completion.  There is no good way to
 * check to see that an unlocking op "corresponds" to the op that
 * currently has the queue locked, so we make no such attempt.  Since
 * by definition there can be only one locking op outstanding on a
 * disk, this should not be a problem.
 *
 * In the kernel, we allow multiple I/Os to be concurrently dispatched
 * to the disk driver.  In order to support locking ops in this
 * environment, when we decide to do a locking op, we stop dispatching
 * new I/Os and wait until all dispatched I/Os have completed before
 * dispatching the locking op.
 *
 * Unfortunately, the code is different in the 3 different operating
 * states (user level, kernel, simulator).  In the kernel, I/O is
 * non-blocking, and we have no disk threads to dispatch for us.
 * Therefore, we have to dispatch new I/Os to the scsi driver at the
 * time of enqueue, and also at the time of completion.  At user
 * level, I/O is blocking, and so only the disk threads may dispatch
 * I/Os.  Thus at user level, all we can do at enqueue time is enqueue
 * and wake up the disk thread to do the dispatch.
 *
 ****************************************************************************/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_diskqueue.c,v 1.63 2021/12/14 00:46:43 mrg Exp $");

#include <dev/raidframe/raidframevar.h>

#include "rf_threadstuff.h"
#include "rf_raid.h"
#include "rf_diskqueue.h"
#include "rf_alloclist.h"
#include "rf_acctrace.h"
#include "rf_etimer.h"
#include "rf_general.h"
#include "rf_debugprint.h"
#include "rf_shutdown.h"
#include "rf_cvscan.h"
#include "rf_sstf.h"
#include "rf_fifo.h"
#include "rf_kintf.h"

#include <sys/buf.h>

static void rf_ShutdownDiskQueueSystem(void *);

#ifndef RF_DEBUG_DISKQUEUE
#define RF_DEBUG_DISKQUEUE 0
#endif

#if RF_DEBUG_DISKQUEUE
#define Dprintf1(s,a)         if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
#define Dprintf2(s,a,b)       if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
#define Dprintf3(s,a,b,c)     if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL)
#else
#define Dprintf1(s,a)
#define Dprintf2(s,a,b)
#define Dprintf3(s,a,b,c)
#endif

/*****************************************************************************
 *
 * the disk queue switch defines all the functions used in the
 * different queueing disciplines queue ID, init routine, enqueue
 * routine, dequeue routine
 *
 ****************************************************************************/

static const RF_DiskQueueSW_t diskqueuesw[] = {
        {"fifo",                /* FIFO */
                rf_FifoCreate,
                rf_FifoEnqueue,
                rf_FifoDequeue,
                rf_FifoPromote},

        {"cvscan",                /* cvscan */
                rf_CvscanCreate,
                rf_CvscanEnqueue,
                rf_CvscanDequeue,
                rf_CvscanPromote},

        {"sstf",                /* shortest seek time first */
                rf_SstfCreate,
                rf_SstfEnqueue,
                rf_SstfDequeue,
                rf_SstfPromote},

        {"scan",                /* SCAN (two-way elevator) */
                rf_ScanCreate,
                rf_SstfEnqueue,
                rf_ScanDequeue,
                rf_SstfPromote},

        {"cscan",                /* CSCAN (one-way elevator) */
                rf_CscanCreate,
                rf_SstfEnqueue,
                rf_CscanDequeue,
                rf_SstfPromote},

};
#define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t))


#define RF_MAX_FREE_DQD 256
#define RF_MIN_FREE_DQD  64

/* XXX: scale these... */
#define RF_MAX_FREE_BUFIO 256
#define RF_MIN_FREE_BUFIO  64



/* configures a single disk queue */

static void
rf_ShutdownDiskQueue(void *arg)
{
        RF_DiskQueue_t *diskqueue = arg;

        rf_destroy_mutex2(diskqueue->mutex);
}

int
rf_ConfigureDiskQueue(RF_Raid_t *raidPtr, RF_DiskQueue_t *diskqueue,
                      RF_RowCol_t c, const RF_DiskQueueSW_t *p,
                      RF_SectorCount_t sectPerDisk, dev_t dev,
                      int maxOutstanding, RF_ShutdownList_t **listp,
                      RF_AllocListElem_t *clList)
{
        diskqueue->col = c;
        diskqueue->qPtr = p;
        diskqueue->qHdr = (p->Create) (sectPerDisk, clList, listp);
        diskqueue->dev = dev;
        diskqueue->numOutstanding = 0;
        diskqueue->queueLength = 0;
        diskqueue->maxOutstanding = maxOutstanding;
        diskqueue->curPriority = RF_IO_NORMAL_PRIORITY;
        diskqueue->flags = 0;
        diskqueue->raidPtr = raidPtr;
        diskqueue->rf_cinfo = &raidPtr->raid_cinfo[c];
        rf_init_mutex2(diskqueue->mutex, IPL_VM);
        rf_ShutdownCreate(listp, rf_ShutdownDiskQueue, diskqueue);
        return (0);
}

static void
rf_ShutdownDiskQueueSystem(void *arg)
{
        RF_Raid_t *raidPtr;

        raidPtr = (RF_Raid_t *) arg;
        
        pool_destroy(&raidPtr->pools.dqd);
        pool_destroy(&raidPtr->pools.bufio);
}

int
rf_ConfigureDiskQueueSystem(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                            RF_Config_t *cfgPtr)

{

        rf_pool_init(raidPtr, raidPtr->poolNames.dqd, &raidPtr->pools.dqd, sizeof(RF_DiskQueueData_t),
                     "dqd", RF_MIN_FREE_DQD, RF_MAX_FREE_DQD);
        rf_pool_init(raidPtr, raidPtr->poolNames.bufio, &raidPtr->pools.bufio, sizeof(buf_t),
                     "bufio", RF_MIN_FREE_BUFIO, RF_MAX_FREE_BUFIO);
        rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, raidPtr);

        return (0);
}

int
rf_ConfigureDiskQueues(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                       RF_Config_t *cfgPtr)
{
        RF_DiskQueue_t *diskQueues, *spareQueues;
        const RF_DiskQueueSW_t *p;
        RF_RowCol_t r,c;
        int     rc, i;

        raidPtr->maxQueueDepth = cfgPtr->maxOutstandingDiskReqs;

        for (p = NULL, i = 0; i < NUM_DISK_QUEUE_TYPES; i++) {
                if (!strcmp(diskqueuesw[i].queueType, cfgPtr->diskQueueType)) {
                        p = &diskqueuesw[i];
                        break;
                }
        }
        if (p == NULL) {
                RF_ERRORMSG2("Unknown queue type \"%s\".  Using %s\n", cfgPtr->diskQueueType, diskqueuesw[0].queueType);
                p = &diskqueuesw[0];
        }
        raidPtr->qType = p;

        diskQueues = RF_MallocAndAdd(
            (raidPtr->numCol + RF_MAXSPARE) * sizeof(*diskQueues),
            raidPtr->cleanupList);
        if (diskQueues == NULL)
                return (ENOMEM);
        raidPtr->Queues = diskQueues;

        for (c = 0; c < raidPtr->numCol; c++) {
                rc = rf_ConfigureDiskQueue(raidPtr, &diskQueues[c],
                                           c, p,
                                           raidPtr->sectorsPerDisk,
                                           raidPtr->Disks[c].dev,
                                           cfgPtr->maxOutstandingDiskReqs,
                                           listp, raidPtr->cleanupList);
                if (rc)
                        return (rc);
        }

        spareQueues = &raidPtr->Queues[raidPtr->numCol];
        for (r = 0; r < raidPtr->numSpare; r++) {
                rc = rf_ConfigureDiskQueue(raidPtr, &spareQueues[r],
                                           raidPtr->numCol + r, p,
                                           raidPtr->sectorsPerDisk,
                                           raidPtr->Disks[raidPtr->numCol + r].dev,
                                           cfgPtr->maxOutstandingDiskReqs, listp,
                                           raidPtr->cleanupList);
                if (rc)
                        return (rc);
        }
        return (0);
}
/* Enqueue a disk I/O
 *
 * In the kernel, I/O is non-blocking and so we'd like to have multiple
 * I/Os outstanding on the physical disks when possible.
 *
 * when any request arrives at a queue, we have two choices:
 *    dispatch it to the lower levels
 *    queue it up
 *
 * kernel rules for when to do what:
 *    unlocking req  :  always dispatch it
 *    normal req     :  queue empty => dispatch it & set priority
 *                      queue not full & priority is ok => dispatch it
 *                      else queue it
 */
void
rf_DiskIOEnqueue(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int pri)
{
        RF_ETIMER_START(req->qtime);
        RF_ASSERT(req->type == RF_IO_TYPE_NOP || req->numSector);
        req->priority = pri;

#if RF_DEBUG_DISKQUEUE
        if (rf_queueDebug && (req->numSector == 0)) {
                printf("Warning: Enqueueing zero-sector access\n");
        }
#endif
        RF_LOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue");
        if (RF_OK_TO_DISPATCH(queue, req)) {
                Dprintf2("Dispatching pri %d regular op to c %d (ok to dispatch)\n", pri, queue->col);
                rf_DispatchKernelIO(queue, req);
        } else {
                queue->queueLength++;        /* increment count of number of requests waiting in this queue */
                Dprintf2("Enqueueing pri %d regular op to c %d (not ok to dispatch)\n", pri, queue->col);
                req->queue = (void *) queue;
                (queue->qPtr->Enqueue) (queue->qHdr, req, pri);
        }
        RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue");
}


/* get the next set of I/Os started */
void
rf_DiskIOComplete(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int status)
{
        int     done = 0;

        RF_LOCK_QUEUE_MUTEX(queue, "DiskIOComplete");
        queue->numOutstanding--;
        RF_ASSERT(queue->numOutstanding >= 0);

        /* dispatch requests to the disk until we find one that we can't. */
        /* no reason to continue once we've filled up the queue */
        /* no reason to even start if the queue is locked */

        while (!done && !RF_QUEUE_FULL(queue)) {
                req = (queue->qPtr->Dequeue) (queue->qHdr);
                if (req) {
                        Dprintf2("DiskIOComplete: extracting pri %d req from queue at c %d\n", req->priority, queue->col);
                        queue->queueLength--;        /* decrement count of number of requests waiting in this queue */
                        RF_ASSERT(queue->queueLength >= 0);
                        if (RF_OK_TO_DISPATCH(queue, req)) {
                                Dprintf2("DiskIOComplete: dispatching pri %d regular req to c %d (ok to dispatch)\n", req->priority, queue->col);
                                rf_DispatchKernelIO(queue, req);
                        } else {        
                                /* we can't dispatch it, so just re-enqueue it.  
                                   potential trouble here if disk queues batch reqs */
                                Dprintf2("DiskIOComplete: re-enqueueing pri %d regular req to c %d\n", req->priority, queue->col);
                                queue->queueLength++;
                                (queue->qPtr->Enqueue) (queue->qHdr, req, req->priority);
                                done = 1;
                        }
                } else {        
                        Dprintf1("DiskIOComplete: no more requests to extract.\n", "");
                        done = 1;
                }
        }

        RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOComplete");
}
/* promotes accesses tagged with the given parityStripeID from low priority
 * to normal priority.  This promotion is optional, meaning that a queue
 * need not implement it.  If there is no promotion routine associated with
 * a queue, this routine does nothing and returns -1.
 */
int
rf_DiskIOPromote(RF_DiskQueue_t *queue, RF_StripeNum_t parityStripeID,
                 RF_ReconUnitNum_t which_ru)
{
        int     retval;

        if (!queue->qPtr->Promote)
                return (-1);
        RF_LOCK_QUEUE_MUTEX(queue, "DiskIOPromote");
        retval = (queue->qPtr->Promote) (queue->qHdr, parityStripeID, which_ru);
        RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOPromote");
        return (retval);
}

RF_DiskQueueData_t *
rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect,
                       RF_SectorCount_t nsect, void *bf,
                       RF_StripeNum_t parityStripeID,
                       RF_ReconUnitNum_t which_ru,
                       void (*wakeF) (void *, int), void *arg,
                       RF_AccTraceEntry_t *tracerec, RF_Raid_t *raidPtr,
                       RF_DiskQueueDataFlags_t flags, const struct buf *mbp)
{
        RF_DiskQueueData_t *p;

        p = pool_get(&raidPtr->pools.dqd, PR_WAITOK | PR_ZERO);
        KASSERT(p != NULL);

        /* Obtain a buffer from our own pool.  It is possible for the
           regular getiobuf() to run out of memory and return NULL.
           We need to guarantee that never happens, as RAIDframe
           doesn't have a good way to recover if memory allocation
           fails here.
        */
        p->bp = pool_get(&raidPtr->pools.bufio, PR_WAITOK | PR_ZERO);
        KASSERT(p->bp != NULL);
        
        buf_init(p->bp);
                
        SET(p->bp->b_cflags, BC_BUSY);        /* mark buffer busy */
        if (mbp) {
                SET(p->bp->b_flags, mbp->b_flags & rf_b_pass);
                p->bp->b_proc = mbp->b_proc;
        }

        p->sectorOffset = ssect + rf_protectedSectors;
        p->numSector = nsect;
        p->type = typ;
        p->buf = bf;
        p->parityStripeID = parityStripeID;
        p->which_ru = which_ru;
        p->CompleteFunc = wakeF;
        p->argument = arg;
        p->next = NULL;
        p->tracerec = tracerec;
        p->priority = RF_IO_NORMAL_PRIORITY;
        p->raidPtr = raidPtr;
        p->flags = flags;
        return (p);
}

void
rf_FreeDiskQueueData(RF_DiskQueueData_t *p)
{

        buf_destroy(p->bp);

        pool_put(&p->raidPtr->pools.bufio, p->bp);
        pool_put(&p->raidPtr->pools.dqd, p);
}






































































    3 






    3 
    3 


    3 

    1 






    2 

    2 






    1 
    3 
    3 

    3 































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
/*        $NetBSD: kern_module_vfs.c,v 1.18 2021/06/29 22:40:53 dholland Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software developed for The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Kernel module file system interaction.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_module_vfs.c,v 1.18 2021/06/29 22:40:53 dholland Exp $");

#define _MODULE_INTERNAL
#include <sys/param.h>
#include <sys/fcntl.h>
#include <sys/kmem.h>
#include <sys/kobj.h>
#include <sys/module.h>
#include <sys/namei.h>
#include <sys/pool.h>
#include <sys/stat.h>
#include <sys/vnode.h>

#include <prop/proplib.h>

static int        module_load_plist_vfs(const char *, const bool,
                                      prop_dictionary_t *);

void
module_load_vfs_init(void)
{
        module_load_vfs_vec = module_load_vfs;
        aprint_normal("kern.module.path=%s\n", module_base);
}

int
module_load_vfs(const char *name, int flags, bool autoload,
        module_t *mod, prop_dictionary_t *filedictp)
{
        char *path;
        bool nochroot;
        int error;
        prop_bool_t noload;
        prop_dictionary_t moduledict;

        nochroot = false;
        error = 0;
        path = NULL;
        moduledict = NULL;
        if (filedictp)
                *filedictp = NULL;
        path = PNBUF_GET();

        if (!autoload) {
                if (strchr(name,  '/') != NULL) {
                        nochroot = false;
                        snprintf(path, MAXPATHLEN, "%s", name);
                        module_print("Loading module from %s", path);
                        error = kobj_load_vfs(&mod->mod_kobj, path, nochroot);
                } else
                        error = ENOENT;
        }
        if (autoload || (error == ENOENT)) {
                if (strchr(name, '/') == NULL) {
                        nochroot = true;
                        snprintf(path, MAXPATHLEN, "%s/%s/%s.kmod",
                            module_base, name, name);
                        module_print("Loading module from %s", path);
                        error = kobj_load_vfs(&mod->mod_kobj, path, nochroot);
                } else
                        error = ENOENT;
        }
        if (error != 0) {
                PNBUF_PUT(path);
                module_print("Cannot %sload kernel object `%s'"
                    " error=%d", autoload ? "auto" : "", name, error);
                return error;
        }

        /*
         * Load and process <module>.plist if it exists.
         */
        if ((!ISSET(flags, MODCTL_NO_PROP) && filedictp) || autoload) {
                error = module_load_plist_vfs(path, nochroot, &moduledict);
                if (error != 0) {
                        module_print("plist load returned error %d for `%s'",
                            error, path);
                        if (error != ENOENT)
                                goto fail;
                } else if (autoload) {
                        noload = prop_dictionary_get(moduledict, "noautoload");
                        if (noload != NULL && prop_bool_true(noload)) {
                                module_error("autoloading is disallowed for %s",
                                    path);
                                prop_object_release(moduledict);
                                error = EPERM;
                                goto fail;
                        }
                }
                if (error == 0) {        /* can get here if error == ENOENT */
                        if (!ISSET(flags, MODCTL_NO_PROP) && filedictp)
                                *filedictp = moduledict;
                        else 
                                prop_object_release(moduledict);
                }
        }

        PNBUF_PUT(path);
        return 0;

 fail:
        kobj_unload(mod->mod_kobj);
        PNBUF_PUT(path);
        return error;
}

/*
 * module_load_plist_vfs:
 *
 *        Load a plist located in the file system into memory.
 */
static int
module_load_plist_vfs(const char *modpath, const bool nochroot,
    prop_dictionary_t *filedictp)
{
        struct pathbuf *pb;
        struct vnode *vp;
        struct stat sb;
        void *base;
        char *proppath;
        const size_t plistsize = 8192;
        size_t resid;
        int error, pathlen;

        KASSERT(filedictp != NULL);
        base = NULL;

        proppath = PNBUF_GET();
        strlcpy(proppath, modpath, MAXPATHLEN);
        pathlen = strlen(proppath);
        if ((pathlen >= 6) && (strcmp(&proppath[pathlen - 5], ".kmod") == 0)) {
                strcpy(&proppath[pathlen - 5], ".plist");
        } else if (pathlen < MAXPATHLEN - 6) {
                        strcat(proppath, ".plist");
        } else {
                error = ENOENT;
                goto out1;
        }

        /* XXX this makes an unnecessary extra copy of the path */
        pb = pathbuf_create(proppath);
        if (pb == NULL) {
                error = ENOMEM;
                goto out1;
        }
        module_print("Loading plist from %s", proppath);
        
        error = vn_open(NULL, pb, (nochroot ? NOCHROOT : 0), FREAD, 0,
            &vp, NULL, NULL);
         if (error != 0) {
                 goto out2;
        }

        error = vn_stat(vp, &sb);
        if (error != 0) {
                goto out3;
        }
        if (sb.st_size >= (plistsize - 1)) {        /* leave space for term \0 */
                error = EFBIG;
                goto out3;
        }

        base = kmem_alloc(plistsize, KM_SLEEP);
        error = vn_rdwr(UIO_READ, vp, base, sb.st_size, 0,
            UIO_SYSSPACE, IO_NODELOCKED, curlwp->l_cred, &resid, curlwp);
        *((uint8_t *)base + sb.st_size) = '\0';
        if (error == 0 && resid != 0) {
                error = EFBIG;
        }
        if (error != 0) {
                kmem_free(base, plistsize);
                base = NULL;
                goto out3;
        }

        *filedictp = prop_dictionary_internalize(base);
        if (*filedictp == NULL) {
                error = EINVAL;
        }
        kmem_free(base, plistsize);
        base = NULL;
        KASSERT(error == 0);

out3:
        VOP_UNLOCK(vp);
        vn_close(vp, FREAD, kauth_cred_get());

out2:
        pathbuf_destroy(pb);

out1:
        PNBUF_PUT(proppath);
        return error;
}
































































































    2 


    2 

    2 








































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
/*        $NetBSD: if_cdce.c,v 1.81 2022/03/03 05:56:28 riastradh Exp $ */

/*
 * Copyright (c) 1997, 1998, 1999, 2000-2003 Bill Paul <wpaul@windriver.com>
 * Copyright (c) 2003 Craig Boston
 * Copyright (c) 2004 Daniel Hartmeier
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by Bill Paul.
 * 4. Neither the name of the author nor the names of any co-contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul, THE VOICES IN HIS HEAD OR
 * THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * USB Communication Device Class (Ethernet Networking Control Model)
 * http://www.usb.org/developers/devclass_docs/usbcdc11.pdf
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_cdce.c,v 1.81 2022/03/03 05:56:28 riastradh Exp $");

#include <sys/param.h>

#include <dev/usb/usbnet.h>
#include <dev/usb/usbcdc.h>

#include <dev/usb/if_cdcereg.h>

struct cdce_type {
        struct usb_devno         cdce_dev;
        uint16_t                 cdce_flags;
#define CDCE_ZAURUS        1
#define CDCE_NO_UNION        2
};

static const struct cdce_type cdce_devs[] = {
  {{ USB_VENDOR_ACERLABS, USB_PRODUCT_ACERLABS_M5632 }, CDCE_NO_UNION },
  {{ USB_VENDOR_COMPAQ, USB_PRODUCT_COMPAQ_IPAQLINUX }, CDCE_NO_UNION },
  {{ USB_VENDOR_GMATE, USB_PRODUCT_GMATE_YP3X00 }, CDCE_NO_UNION },
  {{ USB_VENDOR_MOTOROLA2, USB_PRODUCT_MOTOROLA2_USBLAN }, CDCE_ZAURUS | CDCE_NO_UNION },
  {{ USB_VENDOR_MOTOROLA2, USB_PRODUCT_MOTOROLA2_USBLAN2 }, CDCE_ZAURUS | CDCE_NO_UNION },
  {{ USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2501 }, CDCE_NO_UNION },
  {{ USB_VENDOR_SHARP, USB_PRODUCT_SHARP_SL5500 }, CDCE_ZAURUS },
  {{ USB_VENDOR_SHARP, USB_PRODUCT_SHARP_A300 }, CDCE_ZAURUS | CDCE_NO_UNION },
  {{ USB_VENDOR_SHARP, USB_PRODUCT_SHARP_SL5600 }, CDCE_ZAURUS | CDCE_NO_UNION },
  {{ USB_VENDOR_SHARP, USB_PRODUCT_SHARP_C700 }, CDCE_ZAURUS | CDCE_NO_UNION },
  {{ USB_VENDOR_SHARP, USB_PRODUCT_SHARP_C750 }, CDCE_ZAURUS | CDCE_NO_UNION },
};
#define cdce_lookup(v, p) \
        ((const struct cdce_type *)usb_lookup(cdce_devs, v, p))

static int        cdce_match(device_t, cfdata_t, void *);
static void        cdce_attach(device_t, device_t, void *);

CFATTACH_DECL_NEW(cdce, sizeof(struct usbnet), cdce_match, cdce_attach,
    usbnet_detach, usbnet_activate);

static void        cdce_uno_rx_loop(struct usbnet *, struct usbnet_chain *,
                                 uint32_t);
static unsigned        cdce_uno_tx_prepare(struct usbnet *, struct mbuf *,
                                    struct usbnet_chain *);

static const struct usbnet_ops cdce_ops = {
        .uno_tx_prepare = cdce_uno_tx_prepare,
        .uno_rx_loop = cdce_uno_rx_loop,
};

static int
cdce_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;

        if (cdce_lookup(uiaa->uiaa_vendor, uiaa->uiaa_product) != NULL)
                return UMATCH_VENDOR_PRODUCT;

        if (uiaa->uiaa_class == UICLASS_CDC && uiaa->uiaa_subclass ==
            UISUBCLASS_ETHERNET_NETWORKING_CONTROL_MODEL)
                return UMATCH_IFACECLASS_GENERIC;

        return UMATCH_NONE;
}

static void
cdce_attach(device_t parent, device_t self, void *aux)
{
        struct usbnet * const                 un = device_private(self);
        struct usbif_attach_arg                *uiaa = aux;
        char                                *devinfop;
        struct usbd_device                *dev = uiaa->uiaa_device;
        const struct cdce_type                *t;
        usb_interface_descriptor_t        *id;
        usb_endpoint_descriptor_t        *ed;
        const usb_cdc_union_descriptor_t *ud;
        usb_config_descriptor_t                *cd;
        int                                 data_ifcno;
        int                                 i, j, numalts;
        const usb_cdc_ethernet_descriptor_t *ue;
        char                                 eaddr_str[USB_MAX_ENCODED_STRING_LEN];

        aprint_naive("\n");
        aprint_normal("\n");
        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        un->un_dev = self;
        un->un_udev = dev;
        un->un_sc = un;
        un->un_ops = &cdce_ops;
        un->un_rx_xfer_flags = USBD_SHORT_XFER_OK;
        un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER;
        un->un_rx_list_cnt = CDCE_RX_LIST_CNT;
        un->un_tx_list_cnt = CDCE_TX_LIST_CNT;
        un->un_rx_bufsz = CDCE_BUFSZ;
        un->un_tx_bufsz = CDCE_BUFSZ;

        t = cdce_lookup(uiaa->uiaa_vendor, uiaa->uiaa_product);
        if (t)
                un->un_flags = t->cdce_flags;

        if (un->un_flags & CDCE_NO_UNION)
                un->un_iface = uiaa->uiaa_iface;
        else {
                ud = (const usb_cdc_union_descriptor_t *)usb_find_desc_if(un->un_udev,
                    UDESC_CS_INTERFACE, UDESCSUB_CDC_UNION,
                    usbd_get_interface_descriptor(uiaa->uiaa_iface));
                if (ud == NULL) {
                        aprint_error_dev(self, "no union descriptor\n");
                        return;
                }
                data_ifcno = ud->bSlaveInterface[0];

                for (i = 0; i < uiaa->uiaa_nifaces; i++) {
                        if (uiaa->uiaa_ifaces[i] != NULL) {
                                id = usbd_get_interface_descriptor(
                                    uiaa->uiaa_ifaces[i]);
                                if (id != NULL && id->bInterfaceNumber ==
                                    data_ifcno) {
                                        un->un_iface = uiaa->uiaa_ifaces[i];
                                        uiaa->uiaa_ifaces[i] = NULL;
                                }
                        }
                }
        }
        if (un->un_iface == NULL) {
                aprint_error_dev(self, "no data interface\n");
                return;
        }

        /*
         * <quote>
         *  The Data Class interface of a networking device shall have a minimum
         *  of two interface settings. The first setting (the default interface
         *  setting) includes no endpoints and therefore no networking traffic is
         *  exchanged whenever the default interface setting is selected. One or
         *  more additional interface settings are used for normal operation, and
         *  therefore each includes a pair of endpoints (one IN, and one OUT) to
         *  exchange network traffic. Select an alternate interface setting to
         *  initialize the network aspects of the device and to enable the
         *  exchange of network traffic.
         * </quote>
         *
         * Some devices, most notably cable modems, include interface settings
         * that have no IN or OUT endpoint, therefore loop through the list of all
         * available interface settings looking for one with both IN and OUT
         * endpoints.
         */
        id = usbd_get_interface_descriptor(un->un_iface);
        cd = usbd_get_config_descriptor(un->un_udev);
        numalts = usbd_get_no_alts(cd, id->bInterfaceNumber);

        for (j = 0; j < numalts; j++) {
                if (usbd_set_interface(un->un_iface, j)) {
                        aprint_error_dev(un->un_dev,
                                        "setting alternate interface failed\n");
                        return;
                }
                /* Find endpoints. */
                id = usbd_get_interface_descriptor(un->un_iface);
                un->un_ed[USBNET_ENDPT_RX] = un->un_ed[USBNET_ENDPT_TX] = 0;
                for (i = 0; i < id->bNumEndpoints; i++) {
                        ed = usbd_interface2endpoint_descriptor(un->un_iface, i);
                        if (!ed) {
                                aprint_error_dev(self,
                                                "could not read endpoint descriptor\n");
                                return;
                        }
                        if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                                        UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                                un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress;
                        } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                                        UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                                un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress;
                        } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                                        UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                                /* XXX: CDC spec defines an interrupt pipe, but it is not
                                 * needed for simple host-to-host applications. */
                        } else {
                                aprint_error_dev(self, "unexpected endpoint\n");
                        }
                }
                /* If we found something, try and use it... */
                if (un->un_ed[USBNET_ENDPT_RX] != 0 && un->un_ed[USBNET_ENDPT_TX] != 0)
                        break;
        }

        if (un->un_ed[USBNET_ENDPT_RX] == 0) {
                aprint_error_dev(self, "could not find data bulk in\n");
                return;
        }
        if (un->un_ed[USBNET_ENDPT_TX] == 0) {
                aprint_error_dev(self, "could not find data bulk out\n");
                return;
        }

        ue = (const usb_cdc_ethernet_descriptor_t *)usb_find_desc_if(dev,
            UDESC_CS_INTERFACE, UDESCSUB_CDC_ENF,
            usbd_get_interface_descriptor(uiaa->uiaa_iface));
        if (!ue || usbd_get_string(dev, ue->iMacAddress, eaddr_str) ||
            ether_aton_r(un->un_eaddr, sizeof(un->un_eaddr), eaddr_str)) {
                aprint_normal_dev(self, "faking address\n");
                un->un_eaddr[0] = 0x2a;
                uint32_t ticks = getticks();
                memcpy(&un->un_eaddr[1], &ticks, sizeof(ticks));
                un->un_eaddr[5] = (uint8_t)(device_unit(un->un_dev));
        }

        usbnet_attach(un);
        usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST,
            0, NULL);
}

static void
cdce_uno_rx_loop(struct usbnet * un, struct usbnet_chain *c, uint32_t total_len)
{
        struct ifnet                *ifp = usbnet_ifp(un);

        /* Strip off CRC added by Zaurus, if present */
        if (un->un_flags & CDCE_ZAURUS && total_len > 4)
                total_len -= 4;

        if (total_len < sizeof(struct ether_header)) {
                if_statinc(ifp, if_ierrors);
                return;
        }

        usbnet_enqueue(un, c->unc_buf, total_len, 0, 0, 0);
}

static unsigned
cdce_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c)
{
        /* Zaurus wants a 32-bit CRC appended to every frame */
        uint32_t                 crc;
        unsigned                 extra = 0;
        unsigned                 length;

        if (un->un_flags & CDCE_ZAURUS)
                extra = sizeof(crc);

        if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz - extra)
                return 0;
        length = m->m_pkthdr.len + extra;

        m_copydata(m, 0, m->m_pkthdr.len, c->unc_buf);
        if (un->un_flags & CDCE_ZAURUS) {
                crc = htole32(~ether_crc32_le(c->unc_buf, m->m_pkthdr.len));
                memcpy(c->unc_buf + m->m_pkthdr.len, &crc, sizeof(crc));
        }

        return length;
}

#ifdef _MODULE
#include "ioconf.c"
#endif

USBNET_MODULE(cdce)











































































   76 







   76 


   76 
    3 
   73 

   73 
   40 

   68 


   68 


   67 
   67 
   67 
   67 
    2 

   67 



   67 





   18 












   86 






   86 
   86 

   86 




    4 
    4 


   86 

   86 
   84 

   85 

   85 


   82 
   83 


   83 

   83 





   83 















   81 





   80 


   80 

   60 
    1 

   83 







   95 


   94 












   95 

   95 


























   61 







   61 
   61 
   61 
   61 

   61 
   29 
   61 
   46 

   55 
   60 
   60 

   60 


   55 
   55 

   41 






































   55 









   50 
   48 
   28 



   10 



   10 
    4 






   55 




   55 
   11 


   55 













   55 

   53 





   42 






   55 

   49 



   49 




   29 













   53 









   46 


   52 







   49 
   26 







   40 
   13 























    4 






    4 
    4 

    4 
    4 
    4 

    4 





    4 
    4 
    4 
    4 
    4 


    4 
    4 







    4 
    4 
    4 



    4 



    4 




    4 
    4 
    4 
    4 

    4 











    4 




    4 


    4 
    4 



    4 









   45 












   45 
   41 
    3 






   41 
    4 

    1 
    1 









   45 


   39 
    3 

   36 


   39 




   44 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
/*        $NetBSD: ufs_readwrite.c,v 1.128 2022/02/21 17:07:45 hannken Exp $        */

/*-
 * Copyright (c) 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ufs_readwrite.c        8.11 (Berkeley) 5/8/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.128 2022/02/21 17:07:45 hannken Exp $");

#define        FS                        struct fs
#define        I_FS                        i_fs
#define        READ                        ffs_read
#define        READ_S                        "ffs_read"
#define        WRITE                        ffs_write
#define        WRITE_S                        "ffs_write"
#define        BUFRD                        ffs_bufrd
#define        BUFWR                        ffs_bufwr
#define ufs_blkoff                ffs_blkoff
#define ufs_blksize                ffs_blksize
#define ufs_lblkno                ffs_lblkno
#define ufs_lblktosize                ffs_lblktosize
#define ufs_blkroundup                ffs_blkroundup

static int        ufs_post_read_update(struct vnode *, int, int);
static int        ufs_post_write_update(struct vnode *, struct uio *, int,
                    kauth_cred_t, off_t, int, int);

/*
 * Vnode op for reading.
 */
/* ARGSUSED */
int
READ(void *v)
{
        struct vop_read_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                int a_ioflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp;
        struct inode *ip;
        struct uio *uio;
        struct ufsmount *ump;
        vsize_t bytelen;
        int error, ioflag, advice;

        vp = ap->a_vp;
        ip = VTOI(vp);
        ump = ip->i_ump;
        uio = ap->a_uio;
        ioflag = ap->a_ioflag;
        error = 0;

        KASSERT(uio->uio_rw == UIO_READ);
        KASSERT(vp->v_type == VREG || vp->v_type == VDIR);

        /* XXX Eliminate me by refusing directory reads from userland.  */
        if (vp->v_type == VDIR)
                return BUFRD(vp, uio, ioflag, ap->a_cred);
        if ((u_int64_t)uio->uio_offset > ump->um_maxfilesize)
                return (EFBIG);
        if (uio->uio_resid == 0)
                return (0);

        if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT)
                return ffs_snapshot_read(vp, uio, ioflag);

        if (uio->uio_offset >= ip->i_size)
                goto out;

        KASSERT(vp->v_type == VREG);
        advice = IO_ADV_DECODE(ap->a_ioflag);
        while (uio->uio_resid > 0) {
                if (ioflag & IO_DIRECT) {
                        genfs_directio(vp, uio, ioflag);
                }
                bytelen = MIN(ip->i_size - uio->uio_offset, uio->uio_resid);
                if (bytelen == 0)
                        break;
                error = ubc_uiomove(&vp->v_uobj, uio, bytelen, advice,
                    UBC_READ | UBC_PARTIALOK | UBC_VNODE_FLAGS(vp));
                if (error)
                        break;
        }

 out:
        error = ufs_post_read_update(vp, ap->a_ioflag, error);
        return (error);
}

/*
 * UFS op for reading via the buffer cache
 */
int
BUFRD(struct vnode *vp, struct uio *uio, int ioflag, kauth_cred_t cred)
{
        struct inode *ip;
        struct ufsmount *ump;
        FS *fs;
        struct buf *bp;
        daddr_t lbn, nextlbn;
        off_t bytesinfile;
        long size, xfersize, blkoffset;
        int error;

        KASSERT(VOP_ISLOCKED(vp));
        KASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
        KASSERT(uio->uio_rw == UIO_READ);

        ip = VTOI(vp);
        ump = ip->i_ump;
        fs = ip->I_FS;
        error = 0;

        KASSERT(vp->v_type != VLNK || ip->i_size >= ump->um_maxsymlinklen);
        KASSERT(vp->v_type != VLNK || ump->um_maxsymlinklen != 0 ||
            DIP(ip, blocks) != 0);

        if (uio->uio_offset > ump->um_maxfilesize)
                return EFBIG;
        if (uio->uio_resid == 0)
                return 0;

        KASSERT(!ISSET(ip->i_flags, (SF_SNAPSHOT | SF_SNAPINVAL)));

        if (uio->uio_offset >= ip->i_size)
                goto out;

        for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
                bytesinfile = ip->i_size - uio->uio_offset;
                if (bytesinfile <= 0)
                        break;
                lbn = ufs_lblkno(fs, uio->uio_offset);
                nextlbn = lbn + 1;
                size = ufs_blksize(fs, ip, lbn);
                blkoffset = ufs_blkoff(fs, uio->uio_offset);
                xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid),
                    bytesinfile);

                if (ufs_lblktosize(fs, nextlbn) >= ip->i_size)
                        error = bread(vp, lbn, size, 0, &bp);
                else {
                        int nextsize = ufs_blksize(fs, ip, nextlbn);
                        error = breadn(vp, lbn,
                            size, &nextlbn, &nextsize, 1, 0, &bp);
                }
                if (error)
                        break;

                /*
                 * We should only get non-zero b_resid when an I/O error
                 * has occurred, which should cause us to break above.
                 * However, if the short read did not cause an error,
                 * then we want to ensure that we do not uiomove bad
                 * or uninitialized data.
                 */
                size -= bp->b_resid;
                if (size < xfersize) {
                        if (size == 0)
                                break;
                        xfersize = size;
                }
                error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
                if (error)
                        break;
                brelse(bp, 0);
        }
        if (bp != NULL)
                brelse(bp, 0);

 out:
        error = ufs_post_read_update(vp, ioflag, error);
        return (error);
}

static int
ufs_post_read_update(struct vnode *vp, int ioflag, int oerror)
{
        struct inode *ip = VTOI(vp);
        int error = oerror;

        if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
                ip->i_flag |= IN_ACCESS;
                if ((ioflag & IO_SYNC) == IO_SYNC) {
                        error = UFS_WAPBL_BEGIN(vp->v_mount);
                        if (error)
                                goto out;
                        error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
                        UFS_WAPBL_END(vp->v_mount);
                }
        }

out:
        /* Read error overrides any inode update error.  */
        if (oerror)
                error = oerror;
        return error;
}

/*
 * Vnode op for writing.
 */
int
WRITE(void *v)
{
        struct vop_write_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                int a_ioflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp;
        struct uio *uio;
        struct inode *ip;
        FS *fs;
        kauth_cred_t cred;
        off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize;
        int blkoffset, error, flags, ioflag, resid;
        int aflag;
        vsize_t bytelen;
        bool async;
        struct ufsmount *ump;

        cred = ap->a_cred;
        ioflag = ap->a_ioflag;
        uio = ap->a_uio;
        vp = ap->a_vp;
        ip = VTOI(vp);
        ump = ip->i_ump;

        KASSERT(vp->v_size == ip->i_size);
        KASSERT(uio->uio_rw == UIO_WRITE);
        KASSERT(vp->v_type == VREG);
        KASSERT(!ISSET(ioflag, IO_JOURNALLOCKED));
        UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount);

        if (ioflag & IO_APPEND)
                uio->uio_offset = ip->i_size;
        if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
                return (EPERM);

        fs = ip->I_FS;
        if (uio->uio_offset < 0 ||
            (u_int64_t)uio->uio_offset + uio->uio_resid > ump->um_maxfilesize)
                return (EFBIG);
        if (uio->uio_resid == 0)
                return (0);

        flags = ioflag & IO_SYNC ? B_SYNC : 0;
        async = vp->v_mount->mnt_flag & MNT_ASYNC;
        origoff = uio->uio_offset;
        resid = uio->uio_resid;
        osize = ip->i_size;
        error = 0;

        KASSERT(vp->v_type == VREG);

        /*
         * XXX The entire write operation must occur in a single WAPBL
         * transaction because it may allocate disk blocks, if
         * appending or filling holes, which is allowed to happen only
         * if the write fully succeeds.
         *
         * If ubc_uiomove fails in the middle with EFAULT, we can clean
         * up at the end with UFS_TRUNCATE.  But if the power fails in
         * the middle, there would be nobody to deallocate the blocks,
         * without an fsck to globally analyze the file system.
         *
         * If the increasingly inaccurately named WAPBL were augmented
         * with rollback records for block allocations, then we could
         * split this into multiple transactions and commit the
         * allocations in the last one.
         *
         * But WAPBL doesn't have that notion now, so we'll have to
         * live with gigantic transactions and WAPBL tentacles in
         * genfs_getpages/putpages to cope with the possibility that
         * the transaction may or may not be locked on entry to the
         * page cache.
         *
         * And even if we added that notion to WAPBL, it wouldn't help
         * us get rid of the tentacles in genfs_getpages/putpages
         * because we'd have to interoperate with old implementations
         * that assume they can replay the log without fsck.
         */
        error = UFS_WAPBL_BEGIN(vp->v_mount);
        if (error) {
                return error;
        }


        preallocoff = round_page(ufs_blkroundup(fs, MAX(osize, uio->uio_offset)));
        aflag = ioflag & IO_SYNC ? B_SYNC : 0;
        nsize = MAX(osize, uio->uio_offset + uio->uio_resid);
        endallocoff = nsize - ufs_blkoff(fs, nsize);

        /*
         * if we're increasing the file size, deal with expanding
         * the fragment if there is one.
         */

        if (nsize > osize && ufs_lblkno(fs, osize) < UFS_NDADDR &&
            ufs_lblkno(fs, osize) != ufs_lblkno(fs, nsize) &&
            ufs_blkroundup(fs, osize) != osize) {
                off_t eob;

                eob = ufs_blkroundup(fs, osize);
                uvm_vnp_setwritesize(vp, eob);
                error = ufs_balloc_range(vp, osize, eob - osize, cred, aflag);
                if (error)
                        goto out;
                if (flags & B_SYNC) {
                        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
                        VOP_PUTPAGES(vp, trunc_page(osize & fs->fs_bmask),
                            round_page(eob),
                            PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED);
                }
        }

        while (uio->uio_resid > 0) {
                int ubc_flags = UBC_WRITE;
                bool overwrite; /* if we're overwrite a whole block */
                off_t newoff;

                if (ioflag & IO_DIRECT) {
                        genfs_directio(vp, uio, ioflag | IO_JOURNALLOCKED);
                }

                oldoff = uio->uio_offset;
                blkoffset = ufs_blkoff(fs, uio->uio_offset);
                bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
                if (bytelen == 0) {
                        break;
                }

                /*
                 * if we're filling in a hole, allocate the blocks now and
                 * initialize the pages first.  if we're extending the file,
                 * we can safely allocate blocks without initializing pages
                 * since the new blocks will be inaccessible until the write
                 * is complete.
                 */
                overwrite = uio->uio_offset >= preallocoff &&
                    uio->uio_offset < endallocoff;
                if (!overwrite && (vp->v_vflag & VV_MAPPED) == 0 &&
                    ufs_blkoff(fs, uio->uio_offset) == 0 &&
                    (uio->uio_offset & PAGE_MASK) == 0) {
                        vsize_t len;

                        len = trunc_page(bytelen);
                        len -= ufs_blkoff(fs, len);
                        if (len > 0) {
                                overwrite = true;
                                bytelen = len;
                        }
                }

                newoff = oldoff + bytelen;
                if (vp->v_size < newoff) {
                        uvm_vnp_setwritesize(vp, newoff);
                }

                if (!overwrite) {
                        error = ufs_balloc_range(vp, uio->uio_offset, bytelen,
                            cred, aflag);
                        if (error)
                                break;
                } else {
                        genfs_node_wrlock(vp);
                        error = GOP_ALLOC(vp, uio->uio_offset, bytelen,
                            aflag, cred);
                        genfs_node_unlock(vp);
                        if (error)
                                break;
                        ubc_flags |= UBC_FAULTBUSY;
                }

                /*
                 * copy the data.
                 */

                error = ubc_uiomove(&vp->v_uobj, uio, bytelen,
                    IO_ADV_DECODE(ioflag), ubc_flags | UBC_VNODE_FLAGS(vp));

                /*
                 * update UVM's notion of the size now that we've
                 * copied the data into the vnode's pages.
                 *
                 * we should update the size even when uiomove failed.
                 */

                if (vp->v_size < newoff) {
                        uvm_vnp_setsize(vp, newoff);
                }

                if (error)
                        break;

                /*
                 * flush what we just wrote if necessary.
                 * XXXUBC simplistic async flushing.
                 */

                if (!async && oldoff >> 16 != uio->uio_offset >> 16) {
                        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
                        error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16,
                            (uio->uio_offset >> 16) << 16,
                            PGO_CLEANIT | PGO_JOURNALLOCKED | PGO_LAZY);
                        if (error)
                                break;
                }
        }
        if (error == 0 && ioflag & IO_SYNC) {
                rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
                error = VOP_PUTPAGES(vp, trunc_page(origoff & fs->fs_bmask),
                    round_page(ufs_blkroundup(fs, uio->uio_offset)),
                    PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED);
        }

out:
        error = ufs_post_write_update(vp, uio, ioflag, cred, osize, resid,
            error);
        UFS_WAPBL_END(vp->v_mount);

        return (error);
}

/*
 * UFS op for writing via the buffer cache
 */
int
BUFWR(struct vnode *vp, struct uio *uio, int ioflag, kauth_cred_t cred)
{
        struct inode *ip;
        struct ufsmount *ump;
        FS *fs;
        int flags;
        struct buf *bp;
        off_t osize;
        int resid, xfersize, size, blkoffset;
        daddr_t lbn;
        int error;

        KASSERT(ISSET(ioflag, IO_NODELOCKED));
        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
        KASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
        KASSERT(vp->v_type != VDIR || ISSET(ioflag, IO_SYNC));
        KASSERT(uio->uio_rw == UIO_WRITE);
        KASSERT(ISSET(ioflag, IO_JOURNALLOCKED));
        UFS_WAPBL_JLOCK_ASSERT(vp->v_mount);

        ip = VTOI(vp);
        ump = ip->i_ump;
        fs = ip->I_FS;

        KASSERT(vp->v_size == ip->i_size);

        if (uio->uio_offset < 0 ||
            uio->uio_resid > ump->um_maxfilesize ||
            uio->uio_offset > (ump->um_maxfilesize - uio->uio_resid))
                return EFBIG;
        if (uio->uio_resid == 0)
                return 0;

        flags = ioflag & IO_SYNC ? B_SYNC : 0;
        resid = uio->uio_resid;
        osize = ip->i_size;
        error = 0;

        KASSERT(vp->v_type != VREG);


        /* XXX Should never have pages cached here.  */
        KASSERT(vp->v_uobj.uo_npages == 0);
        while (uio->uio_resid > 0) {
                lbn = ufs_lblkno(fs, uio->uio_offset);
                blkoffset = ufs_blkoff(fs, uio->uio_offset);
                xfersize = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
                if (fs->fs_bsize > xfersize)
                        flags |= B_CLRBUF;
                else
                        flags &= ~B_CLRBUF;

                error = UFS_BALLOC(vp, uio->uio_offset, xfersize, cred, flags,
                    &bp);

                if (error)
                        break;
                if (uio->uio_offset + xfersize > ip->i_size) {
                        ip->i_size = uio->uio_offset + xfersize;
                        DIP_ASSIGN(ip, size, ip->i_size);
                        uvm_vnp_setsize(vp, ip->i_size);
                }
                size = ufs_blksize(fs, ip, lbn) - bp->b_resid;
                if (xfersize > size)
                        xfersize = size;

                error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);

                /*
                 * if we didn't clear the block and the uiomove failed,
                 * the buf will now contain part of some other file,
                 * so we need to invalidate it.
                 */
                if (error && (flags & B_CLRBUF) == 0) {
                        brelse(bp, BC_INVAL);
                        break;
                }
                if (ioflag & IO_SYNC)
                        (void)bwrite(bp);
                else if (xfersize + blkoffset == fs->fs_bsize)
                        bawrite(bp);
                else
                        bdwrite(bp);
                if (error || xfersize == 0)
                        break;
        }

        error = ufs_post_write_update(vp, uio, ioflag, cred, osize, resid,
            error);

        return (error);
}

static int
ufs_post_write_update(struct vnode *vp, struct uio *uio, int ioflag,
    kauth_cred_t cred, off_t osize, int resid, int oerror)
{
        struct inode *ip = VTOI(vp);
        int error = oerror;

        /* Trigger ctime and mtime updates, and atime if MNT_RELATIME.  */
        ip->i_flag |= IN_CHANGE | IN_UPDATE;
        if (vp->v_mount->mnt_flag & MNT_RELATIME)
                ip->i_flag |= IN_ACCESS;

        /*
         * If we successfully wrote any data and we are not the superuser,
         * we clear the setuid and setgid bits as a precaution against
         * tampering.
         */
        if (resid > uio->uio_resid && cred) {
                if (ip->i_mode & ISUID) {
                        if (kauth_authorize_vnode(cred,
                            KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) {
                                ip->i_mode &= ~ISUID;
                                DIP_ASSIGN(ip, mode, ip->i_mode);
                        }
                }

                if (ip->i_mode & ISGID) {
                        if (kauth_authorize_vnode(cred,
                            KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) {
                                ip->i_mode &= ~ISGID;
                                DIP_ASSIGN(ip, mode, ip->i_mode);
                        }
                }
        }

        /*
         * Update the size on disk: truncate back to original size on
         * error, or reflect the new size on success.
         */
        if (error) {
                (void) UFS_TRUNCATE(vp, osize, ioflag & IO_SYNC, cred);
                uio->uio_offset -= resid - uio->uio_resid;
                uio->uio_resid = resid;
        } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC)
                error = UFS_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
        else
                UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);

        /* Make sure the vnode uvm size matches the inode file size.  */
        KASSERT(vp->v_size == ip->i_size);

        /* Write error overrides any inode update error.  */
        if (oerror)
                error = oerror;
        return error;
}
































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 





    1 














































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
/*        $NetBSD: amr.c,v 1.67 2021/08/07 16:19:14 thorpej Exp $        */

/*-
 * Copyright (c) 2002, 2003 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1999,2000 Michael Smith
 * Copyright (c) 2000 BSDi
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from FreeBSD: amr_pci.c,v 1.5 2000/08/30 07:52:40 msmith Exp
 * from FreeBSD: amr.c,v 1.16 2000/08/30 07:52:40 msmith Exp
 */

/*
 * Driver for AMI RAID controllers.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: amr.c,v 1.67 2021/08/07 16:19:14 thorpej Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/queue.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/malloc.h>
#include <sys/conf.h>
#include <sys/kthread.h>
#include <sys/kauth.h>
#include <sys/mutex.h>
#include <sys/condvar.h>
#include <sys/module.h>

#include <machine/endian.h>
#include <sys/bus.h>

#include <dev/pci/pcidevs.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/amrreg.h>
#include <dev/pci/amrvar.h>
#include <dev/pci/amrio.h>

#include "locators.h"

#include "ioconf.h"

static void        amr_attach(device_t, device_t, void *);
static void        amr_ccb_dump(struct amr_softc *, struct amr_ccb *);
static void        *amr_enquire(struct amr_softc *, u_int8_t, u_int8_t, u_int8_t,
                             void *);
static int        amr_init(struct amr_softc *, const char *,
                         struct pci_attach_args *pa);
static int        amr_intr(void *);
static int        amr_match(device_t, cfdata_t, void *);
static int        amr_rescan(device_t, const char *, const int *);
static int        amr_print(void *, const char *);
static void        amr_shutdown(void *);
static void        amr_teardown(struct amr_softc *);
static void        amr_quartz_thread(void *);
static void        amr_std_thread(void *);

static int        amr_quartz_get_work(struct amr_softc *,
                                    struct amr_mailbox_resp *);
static int        amr_quartz_submit(struct amr_softc *, struct amr_ccb *);
static int        amr_std_get_work(struct amr_softc *, struct amr_mailbox_resp *);
static int        amr_std_submit(struct amr_softc *, struct amr_ccb *);

static dev_type_open(amropen);
static dev_type_close(amrclose);
static dev_type_ioctl(amrioctl);

CFATTACH_DECL3_NEW(amr, sizeof(struct amr_softc),
    amr_match, amr_attach, NULL, NULL, amr_rescan, NULL, 0);

const struct cdevsw amr_cdevsw = {
        .d_open = amropen,
        .d_close = amrclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = amrioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

extern struct   cfdriver amr_cd;

#define AT_QUARTZ        0x01        /* `Quartz' chipset */
#define        AT_SIG                0x02        /* Check for signature */

static struct amr_pci_type {
        u_short        apt_vendor;
        u_short        apt_product;
        u_short        apt_flags;
} const amr_pci_type[] = {
        { PCI_VENDOR_AMI,   PCI_PRODUCT_AMI_MEGARAID,  0 },
        { PCI_VENDOR_AMI,   PCI_PRODUCT_AMI_MEGARAID2, 0 },
        { PCI_VENDOR_AMI,   PCI_PRODUCT_AMI_MEGARAID3, AT_QUARTZ },
        { PCI_VENDOR_SYMBIOS, PCI_PRODUCT_AMI_MEGARAID3, AT_QUARTZ },
        { PCI_VENDOR_INTEL, PCI_PRODUCT_AMI_MEGARAID3, AT_QUARTZ | AT_SIG },
        { PCI_VENDOR_INTEL,  PCI_PRODUCT_SYMBIOS_MEGARAID_320X, AT_QUARTZ },
        { PCI_VENDOR_INTEL,  PCI_PRODUCT_SYMBIOS_MEGARAID_320E, AT_QUARTZ },
        { PCI_VENDOR_SYMBIOS,  PCI_PRODUCT_SYMBIOS_MEGARAID_300X, AT_QUARTZ },
        { PCI_VENDOR_DELL,  PCI_PRODUCT_DELL_PERC_4DI, AT_QUARTZ },
        { PCI_VENDOR_DELL,  PCI_PRODUCT_DELL_PERC_4DI_2, AT_QUARTZ },
        { PCI_VENDOR_DELL,  PCI_PRODUCT_DELL_PERC_4ESI, AT_QUARTZ },
        { PCI_VENDOR_SYMBIOS,  PCI_PRODUCT_SYMBIOS_PERC_4SC, AT_QUARTZ },
        { PCI_VENDOR_SYMBIOS,  PCI_PRODUCT_SYMBIOS_MEGARAID_320X, AT_QUARTZ },
        { PCI_VENDOR_SYMBIOS,  PCI_PRODUCT_SYMBIOS_MEGARAID_320E, AT_QUARTZ },
        { PCI_VENDOR_SYMBIOS,  PCI_PRODUCT_SYMBIOS_MEGARAID_300X, AT_QUARTZ },
};

static struct amr_typestr {
        const char        *at_str;
        int                at_sig;
} const amr_typestr[] = {
        { "Series 431",                        AMR_SIG_431 },
        { "Series 438",                        AMR_SIG_438 },
        { "Series 466",                        AMR_SIG_466 },
        { "Series 467",                        AMR_SIG_467 },
        { "Series 490",                        AMR_SIG_490 },
        { "Series 762",                        AMR_SIG_762 },
        { "HP NetRAID (T5)",                AMR_SIG_T5 },
        { "HP NetRAID (T7)",                AMR_SIG_T7 },
};

static struct {
        const char        *ds_descr;
        int        ds_happy;
} const amr_dstate[] = {
        { "offline",        0 },
        { "degraded",        1 },
        { "optimal",        1 },
        { "online",        1 },
        { "failed",        0 },
        { "rebuilding",        1 },
        { "hotspare",        0 },
};

static void        *amr_sdh;

static kcondvar_t thread_cv;
static kmutex_t        thread_mutex;

static int        amr_max_segs;
int                amr_max_xfer;

static inline u_int8_t
amr_inb(struct amr_softc *amr, int off)
{
        bus_space_barrier(amr->amr_iot, amr->amr_ioh, off, 1,
            BUS_SPACE_BARRIER_WRITE | BUS_SPACE_BARRIER_READ);
        return (bus_space_read_1(amr->amr_iot, amr->amr_ioh, off));
}

static inline u_int32_t
amr_inl(struct amr_softc *amr, int off)
{
        bus_space_barrier(amr->amr_iot, amr->amr_ioh, off, 4,
            BUS_SPACE_BARRIER_WRITE | BUS_SPACE_BARRIER_READ);
        return (bus_space_read_4(amr->amr_iot, amr->amr_ioh, off));
}

static inline void
amr_outb(struct amr_softc *amr, int off, u_int8_t val)
{
        bus_space_write_1(amr->amr_iot, amr->amr_ioh, off, val);
        bus_space_barrier(amr->amr_iot, amr->amr_ioh, off, 1,
            BUS_SPACE_BARRIER_WRITE);
}

static inline void
amr_outl(struct amr_softc *amr, int off, u_int32_t val)
{
        bus_space_write_4(amr->amr_iot, amr->amr_ioh, off, val);
        bus_space_barrier(amr->amr_iot, amr->amr_ioh, off, 4,
            BUS_SPACE_BARRIER_WRITE);
}

/*
 * Match a supported device.
 */
static int
amr_match(device_t parent, cfdata_t match, void *aux)
{
        struct pci_attach_args *pa;
        pcireg_t s;
        int i;

        pa = (struct pci_attach_args *)aux;

        /*
         * Don't match the device if it's operating in I2O mode.  In this
         * case it should be handled by the `iop' driver.
         */
        if (PCI_CLASS(pa->pa_class) == PCI_CLASS_I2O)
                return (0);

        for (i = 0; i < sizeof(amr_pci_type) / sizeof(amr_pci_type[0]); i++)
                if (PCI_VENDOR(pa->pa_id) == amr_pci_type[i].apt_vendor &&
                    PCI_PRODUCT(pa->pa_id) == amr_pci_type[i].apt_product)
                            break;

        if (i == sizeof(amr_pci_type) / sizeof(amr_pci_type[0]))
                return (0);

        if ((amr_pci_type[i].apt_flags & AT_SIG) == 0)
                return (1);

        s = pci_conf_read(pa->pa_pc, pa->pa_tag, AMR_QUARTZ_SIG_REG) & 0xffff;
        return (s == AMR_QUARTZ_SIG0 || s == AMR_QUARTZ_SIG1);
}

/*
 * Attach a supported device.
 */
static void
amr_attach(device_t parent, device_t self, void *aux)
{
        struct pci_attach_args *pa;
        const struct amr_pci_type *apt;
        struct amr_softc *amr;
        pci_chipset_tag_t pc;
        pci_intr_handle_t ih;
        const char *intrstr;
        pcireg_t reg;
        int rseg, i, size, rv, memreg, ioreg;
        struct amr_ccb *ac;
        char intrbuf[PCI_INTRSTR_LEN];

        aprint_naive(": RAID controller\n");

        amr = device_private(self);
        amr->amr_dv = self;

        mutex_init(&amr->amr_mutex, MUTEX_DEFAULT, IPL_BIO);

        pa = (struct pci_attach_args *)aux;
        pc = pa->pa_pc;

        for (i = 0; i < sizeof(amr_pci_type) / sizeof(amr_pci_type[0]); i++)
                if (PCI_VENDOR(pa->pa_id) == amr_pci_type[i].apt_vendor &&
                    PCI_PRODUCT(pa->pa_id) == amr_pci_type[i].apt_product)
                        break;
        apt = amr_pci_type + i;

        memreg = ioreg = 0;
        for (i = 0x10; i <= 0x14; i += 4) {
                reg = pci_conf_read(pc, pa->pa_tag, i);
                switch (PCI_MAPREG_TYPE(reg)) {
                case PCI_MAPREG_TYPE_MEM:
                        if (PCI_MAPREG_MEM_SIZE(reg) != 0)
                                memreg = i;
                        break;
                case PCI_MAPREG_TYPE_IO:
                        if (PCI_MAPREG_IO_SIZE(reg) != 0)
                                ioreg = i;
                        break;
                }
        }

        if (memreg && pci_mapreg_map(pa, memreg, PCI_MAPREG_TYPE_MEM, 0,
            &amr->amr_iot, &amr->amr_ioh, NULL, &amr->amr_ios) == 0)
                ;
        else if (ioreg && pci_mapreg_map(pa, ioreg, PCI_MAPREG_TYPE_IO, 0,
            &amr->amr_iot, &amr->amr_ioh, NULL, &amr->amr_ios) == 0)
                ;
        else {
                aprint_error("can't map control registers\n");
                amr_teardown(amr);
                return;
        }

        amr->amr_flags |= AMRF_PCI_REGS;
        amr->amr_dmat = pa->pa_dmat;
        amr->amr_pc = pa->pa_pc;

        /* Enable the device. */
        reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
        pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
            reg | PCI_COMMAND_MASTER_ENABLE);

        /* Map and establish the interrupt. */
        if (pci_intr_map(pa, &ih)) {
                aprint_error("can't map interrupt\n");
                amr_teardown(amr);
                return;
        }
        intrstr = pci_intr_string(pc, ih, intrbuf, sizeof(intrbuf));
        amr->amr_ih = pci_intr_establish_xname(pc, ih, IPL_BIO, amr_intr, amr,
            device_xname(self));
        if (amr->amr_ih == NULL) {
                aprint_error("can't establish interrupt");
                if (intrstr != NULL)
                        aprint_error(" at %s", intrstr);
                aprint_error("\n");
                amr_teardown(amr);
                return;
        }
        amr->amr_flags |= AMRF_PCI_INTR;

        /*
         * Allocate space for the mailbox and S/G lists.  Some controllers
         * don't like S/G lists to be located below 0x2000, so we allocate
         * enough slop to enable us to compensate.
         *
         * The standard mailbox structure needs to be aligned on a 16-byte
         * boundary.  The 64-bit mailbox has one extra field, 4 bytes in
         * size, which precedes the standard mailbox.
         */
        size = AMR_SGL_SIZE * AMR_MAX_CMDS + 0x2000;
        amr->amr_dmasize = size;

        if ((rv = bus_dmamem_alloc(amr->amr_dmat, size, PAGE_SIZE, 0,
            &amr->amr_dmaseg, 1, &rseg, BUS_DMA_NOWAIT)) != 0) {
                aprint_error_dev(amr->amr_dv,
                    "unable to allocate buffer, rv = %d\n", rv);
                amr_teardown(amr);
                return;
        }
        amr->amr_flags |= AMRF_DMA_ALLOC;

        if ((rv = bus_dmamem_map(amr->amr_dmat, &amr->amr_dmaseg, rseg, size,
            (void **)&amr->amr_mbox,
            BUS_DMA_NOWAIT | BUS_DMA_COHERENT)) != 0) {
                aprint_error_dev(amr->amr_dv, "unable to map buffer, rv = %d\n",
                    rv);
                amr_teardown(amr);
                return;
        }
        amr->amr_flags |= AMRF_DMA_MAP;

        if ((rv = bus_dmamap_create(amr->amr_dmat, size, 1, size, 0,
            BUS_DMA_NOWAIT, &amr->amr_dmamap)) != 0) {
                aprint_error_dev(amr->amr_dv,
                    "unable to create buffer DMA map, rv = %d\n", rv);
                amr_teardown(amr);
                return;
        }
        amr->amr_flags |= AMRF_DMA_CREATE;

        if ((rv = bus_dmamap_load(amr->amr_dmat, amr->amr_dmamap,
            amr->amr_mbox, size, NULL, BUS_DMA_NOWAIT)) != 0) {
                aprint_error_dev(amr->amr_dv,
                    "unable to load buffer DMA map, rv = %d\n", rv);
                amr_teardown(amr);
                return;
        }
        amr->amr_flags |= AMRF_DMA_LOAD;

        memset(amr->amr_mbox, 0, size);

        amr->amr_mbox_paddr = amr->amr_dmamap->dm_segs[0].ds_addr;
        amr->amr_sgls_paddr = (amr->amr_mbox_paddr + 0x1fff) & ~0x1fff;
        amr->amr_sgls = (struct amr_sgentry *)((char *)amr->amr_mbox +
            amr->amr_sgls_paddr - amr->amr_dmamap->dm_segs[0].ds_addr);

        /*
         * Allocate and initalise the command control blocks.
         */
        ac = malloc(sizeof(*ac) * AMR_MAX_CMDS, M_DEVBUF, M_WAITOK | M_ZERO);
        amr->amr_ccbs = ac;
        SLIST_INIT(&amr->amr_ccb_freelist);
        TAILQ_INIT(&amr->amr_ccb_active);
        amr->amr_flags |= AMRF_CCBS;

        if (amr_max_xfer == 0) {
                amr_max_xfer = uimin(((AMR_MAX_SEGS - 1) * PAGE_SIZE), MAXPHYS);
                amr_max_segs = (amr_max_xfer + (PAGE_SIZE * 2) - 1) / PAGE_SIZE;
        }

        for (i = 0; i < AMR_MAX_CMDS; i++, ac++) {
                rv = bus_dmamap_create(amr->amr_dmat, amr_max_xfer,
                    amr_max_segs, amr_max_xfer, 0,
                    BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, &ac->ac_xfer_map);
                if (rv != 0)
                        break;

                ac->ac_ident = i;
                cv_init(&ac->ac_cv, "amr1ccb");
                mutex_init(&ac->ac_mutex, MUTEX_DEFAULT, IPL_NONE);
                amr_ccb_free(amr, ac);
        }
        if (i != AMR_MAX_CMDS) {
                aprint_error_dev(amr->amr_dv, "memory exhausted\n");
                amr_teardown(amr);
                return;
        }

        /*
         * Take care of model-specific tasks.
         */
        if ((apt->apt_flags & AT_QUARTZ) != 0) {
                amr->amr_submit = amr_quartz_submit;
                amr->amr_get_work = amr_quartz_get_work;
        } else {
                amr->amr_submit = amr_std_submit;
                amr->amr_get_work = amr_std_get_work;

                /* Notify the controller of the mailbox location. */
                amr_outl(amr, AMR_SREG_MBOX, (u_int32_t)amr->amr_mbox_paddr + 16);
                amr_outb(amr, AMR_SREG_MBOX_ENABLE, AMR_SMBOX_ENABLE_ADDR);

                /* Clear outstanding interrupts and enable interrupts. */
                amr_outb(amr, AMR_SREG_CMD, AMR_SCMD_ACKINTR);
                amr_outb(amr, AMR_SREG_TOGL,
                    amr_inb(amr, AMR_SREG_TOGL) | AMR_STOGL_ENABLE);
        }

        /*
         * Retrieve parameters, and tell the world about us.
         */
        amr->amr_enqbuf = malloc(AMR_ENQUIRY_BUFSIZE, M_DEVBUF, M_WAITOK);
        amr->amr_flags |= AMRF_ENQBUF;
        amr->amr_maxqueuecnt = i;
        aprint_normal(": AMI RAID ");
        if (amr_init(amr, intrstr, pa) != 0) {
                amr_teardown(amr);
                return;
        }

        /*
         * Cap the maximum number of outstanding commands.  AMI's Linux
         * driver doesn't trust the controller's reported value, and lockups
         * have been seen when we do.
         */
        amr->amr_maxqueuecnt = uimin(amr->amr_maxqueuecnt, AMR_MAX_CMDS);
        if (amr->amr_maxqueuecnt > i)
                amr->amr_maxqueuecnt = i;

        /* Set our `shutdownhook' before we start any device activity. */
        if (amr_sdh == NULL)
                amr_sdh = shutdownhook_establish(amr_shutdown, NULL);

        /* Attach sub-devices. */
        amr_rescan(self, NULL, NULL);

        SIMPLEQ_INIT(&amr->amr_ccb_queue);

        cv_init(&thread_cv, "amrwdog");
        mutex_init(&thread_mutex, MUTEX_DEFAULT, IPL_NONE);

        if ((apt->apt_flags & AT_QUARTZ) == 0) {
                rv = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
                                    amr_std_thread, amr, &amr->amr_thread,
                                    "%s", device_xname(amr->amr_dv));
        } else {
                rv = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
                                    amr_quartz_thread, amr, &amr->amr_thread,
                                    "%s", device_xname(amr->amr_dv));
        }
        if (rv != 0)
                aprint_error_dev(amr->amr_dv, "unable to create thread (%d)",
                     rv);
         else
                 amr->amr_flags |= AMRF_THREAD;
}

static int
amr_rescan(device_t self, const char *ifattr, const int *ulocs)
{
        int j;
        int locs[AMRCF_NLOCS];
        struct amr_attach_args amra;
        struct amr_softc *amr;

        amr = device_private(self);
        for (j = 0; j < amr->amr_numdrives; j++) {
                if (amr->amr_drive[j].al_dv)
                        continue;
                if (amr->amr_drive[j].al_size == 0)
                        continue;
                amra.amra_unit = j;

                locs[AMRCF_UNIT] = j;

                amr->amr_drive[j].al_dv =
                    config_found(amr->amr_dv, &amra, amr_print,
                                 CFARGS(.submatch = config_stdsubmatch,
                                        .iattr = ifattr,
                                        .locators = locs));
        }
        return 0;
}

/*
 * Free up resources.
 */
static void
amr_teardown(struct amr_softc *amr)
{
        struct amr_ccb *ac;
        int fl;

        fl = amr->amr_flags;

        if ((fl & AMRF_THREAD) != 0) {
                amr->amr_flags |= AMRF_THREAD_EXIT;
                mutex_enter(&thread_mutex);
                cv_broadcast(&thread_cv);
                mutex_exit(&thread_mutex);
                while ((amr->amr_flags & AMRF_THREAD_EXIT) != 0) {
                        mutex_enter(&thread_mutex);
                        cv_wait(&thread_cv, &thread_mutex);
                        mutex_exit(&thread_mutex);
                }
        }
        if ((fl & AMRF_CCBS) != 0) {
                SLIST_FOREACH(ac, &amr->amr_ccb_freelist, ac_chain.slist) {
                        bus_dmamap_destroy(amr->amr_dmat, ac->ac_xfer_map);
                }
                free(amr->amr_ccbs, M_DEVBUF);
        }
        if ((fl & AMRF_ENQBUF) != 0)
                free(amr->amr_enqbuf, M_DEVBUF);
        if ((fl & AMRF_DMA_LOAD) != 0)
                bus_dmamap_unload(amr->amr_dmat, amr->amr_dmamap);
        if ((fl & AMRF_DMA_MAP) != 0)
                bus_dmamem_unmap(amr->amr_dmat, (void *)amr->amr_mbox,
                    amr->amr_dmasize);
        if ((fl & AMRF_DMA_ALLOC) != 0)
                bus_dmamem_free(amr->amr_dmat, &amr->amr_dmaseg, 1);
        if ((fl & AMRF_DMA_CREATE) != 0)
                bus_dmamap_destroy(amr->amr_dmat, amr->amr_dmamap);
        if ((fl & AMRF_PCI_INTR) != 0)
                pci_intr_disestablish(amr->amr_pc, amr->amr_ih);
        if ((fl & AMRF_PCI_REGS) != 0)
                bus_space_unmap(amr->amr_iot, amr->amr_ioh, amr->amr_ios);
}

/*
 * Print autoconfiguration message for a sub-device.
 */
static int
amr_print(void *aux, const char *pnp)
{
        struct amr_attach_args *amra;

        amra = (struct amr_attach_args *)aux;

        if (pnp != NULL)
                aprint_normal("block device at %s", pnp);
        aprint_normal(" unit %d", amra->amra_unit);
        return (UNCONF);
}

/*
 * Retrieve operational parameters and describe the controller.
 */
static int
amr_init(struct amr_softc *amr, const char *intrstr,
         struct pci_attach_args *pa)
{
        struct amr_adapter_info *aa;
        struct amr_prodinfo *ap;
        struct amr_enquiry *ae;
        struct amr_enquiry3 *aex;
        const char *prodstr;
        u_int i, sig, ishp;
        char sbuf[64];

        /*
         * Try to get 40LD product info, which tells us what the card is
         * labelled as.
         */
        ap = amr_enquire(amr, AMR_CMD_CONFIG, AMR_CONFIG_PRODUCT_INFO, 0,
            amr->amr_enqbuf);
        if (ap != NULL) {
                aprint_normal("<%.80s>\n", ap->ap_product);
                if (intrstr != NULL)
                        aprint_normal_dev(amr->amr_dv, "interrupting at %s\n",
                            intrstr);
                aprint_normal_dev(amr->amr_dv,
                    "firmware %.16s, BIOS %.16s, %dMB RAM\n",
                    ap->ap_firmware, ap->ap_bios, le16toh(ap->ap_memsize));

                amr->amr_maxqueuecnt = ap->ap_maxio;

                /*
                 * Fetch and record state of logical drives.
                 */
                aex = amr_enquire(amr, AMR_CMD_CONFIG, AMR_CONFIG_ENQ3,
                    AMR_CONFIG_ENQ3_SOLICITED_FULL, amr->amr_enqbuf);
                if (aex == NULL) {
                        aprint_error_dev(amr->amr_dv, "ENQUIRY3 failed\n");
                        return (-1);
                }

                if (aex->ae_numldrives > __arraycount(aex->ae_drivestate)) {
                        aprint_error_dev(amr->amr_dv, "Inquiry returned more "
                            "drives (%d) than the array can handle (%zu)\n",
                            aex->ae_numldrives,
                            __arraycount(aex->ae_drivestate));
                        aex->ae_numldrives = __arraycount(aex->ae_drivestate);
                }
                if (aex->ae_numldrives > AMR_MAX_UNITS) {
                        aprint_error_dev(amr->amr_dv,
                            "adjust AMR_MAX_UNITS to %d (currently %d)\n",
                            AMR_MAX_UNITS, amr->amr_numdrives);
                        amr->amr_numdrives = AMR_MAX_UNITS;
                } else
                        amr->amr_numdrives = aex->ae_numldrives;

                for (i = 0; i < amr->amr_numdrives; i++) {
                        amr->amr_drive[i].al_size =
                            le32toh(aex->ae_drivesize[i]);
                        amr->amr_drive[i].al_state = aex->ae_drivestate[i];
                        amr->amr_drive[i].al_properties = aex->ae_driveprop[i];
                }

                return (0);
        }

        /*
         * Try 8LD extended ENQUIRY to get the controller signature.  Once
         * found, search for a product description.
         */
        ae = amr_enquire(amr, AMR_CMD_EXT_ENQUIRY2, 0, 0, amr->amr_enqbuf);
        if (ae != NULL) {
                i = 0;
                sig = le32toh(ae->ae_signature);

                while (i < sizeof(amr_typestr) / sizeof(amr_typestr[0])) {
                        if (amr_typestr[i].at_sig == sig)
                                break;
                        i++;
                }
                if (i == sizeof(amr_typestr) / sizeof(amr_typestr[0])) {
                        snprintf(sbuf, sizeof(sbuf),
                            "unknown ENQUIRY2 sig (0x%08x)", sig);
                        prodstr = sbuf;
                } else
                        prodstr = amr_typestr[i].at_str;
        } else {
                ae = amr_enquire(amr, AMR_CMD_ENQUIRY, 0, 0, amr->amr_enqbuf);
                if (ae == NULL) {
                        aprint_error_dev(amr->amr_dv,
                            "unsupported controller\n");
                        return (-1);
                }

                switch (PCI_PRODUCT(pa->pa_id)) {
                case PCI_PRODUCT_AMI_MEGARAID:
                        prodstr = "Series 428";
                        break;
                case PCI_PRODUCT_AMI_MEGARAID2:
                        prodstr = "Series 434";
                        break;
                default:
                        snprintf(sbuf, sizeof(sbuf),
                            "unknown PCI dev (0x%04x)",
                            PCI_PRODUCT(pa->pa_id));
                        prodstr = sbuf;
                        break;
                }
        }

        /*
         * HP NetRaid controllers have a special encoding of the firmware
         * and BIOS versions.  The AMI version seems to have it as strings
         * whereas the HP version does it with a leading uppercase character
         * and two binary numbers.
        */
        aa = &ae->ae_adapter;

        if (aa->aa_firmware[2] >= 'A' && aa->aa_firmware[2] <= 'Z' &&
            aa->aa_firmware[1] <  ' ' && aa->aa_firmware[0] <  ' ' &&
            aa->aa_bios[2] >= 'A' && aa->aa_bios[2] <= 'Z' &&
            aa->aa_bios[1] <  ' ' && aa->aa_bios[0] <  ' ') {
                if (le32toh(ae->ae_signature) == AMR_SIG_438) {
                        /* The AMI 438 is a NetRaid 3si in HP-land. */
                        prodstr = "HP NetRaid 3si";
                }
                ishp = 1;
        } else
                ishp = 0;

        aprint_normal("<%s>\n", prodstr);
        if (intrstr != NULL)
                aprint_normal_dev(amr->amr_dv, "interrupting at %s\n",
                    intrstr);

        if (ishp)
                aprint_normal_dev(amr->amr_dv, "firmware <%c.%02d.%02d>, "
                    "BIOS <%c.%02d.%02d>, %dMB RAM\n", aa->aa_firmware[2],
                     aa->aa_firmware[1], aa->aa_firmware[0], aa->aa_bios[2],
                     aa->aa_bios[1], aa->aa_bios[0], aa->aa_memorysize);
        else
                aprint_normal_dev(amr->amr_dv, "firmware <%.4s>, BIOS <%.4s>, "
                    "%dMB RAM\n", aa->aa_firmware, aa->aa_bios,
                    aa->aa_memorysize);

        amr->amr_maxqueuecnt = aa->aa_maxio;

        /*
         * Record state of logical drives.
         */
        if (ae->ae_ldrv.al_numdrives > __arraycount(ae->ae_ldrv.al_size)) {
                aprint_error_dev(amr->amr_dv, "Inquiry returned more drives "
                    "(%d) than the array can handle (%zu)\n",
                    ae->ae_ldrv.al_numdrives,
                    __arraycount(ae->ae_ldrv.al_size));
                ae->ae_ldrv.al_numdrives = __arraycount(ae->ae_ldrv.al_size);
        }
        if (ae->ae_ldrv.al_numdrives > AMR_MAX_UNITS) {
                aprint_error_dev(amr->amr_dv,
                    "adjust AMR_MAX_UNITS to %d (currently %d)\n",
                    ae->ae_ldrv.al_numdrives, AMR_MAX_UNITS);
                amr->amr_numdrives = AMR_MAX_UNITS;
        } else
                amr->amr_numdrives = ae->ae_ldrv.al_numdrives;

        for (i = 0; i < amr->amr_numdrives; i++) {
                amr->amr_drive[i].al_size = le32toh(ae->ae_ldrv.al_size[i]);
                amr->amr_drive[i].al_state = ae->ae_ldrv.al_state[i];
                amr->amr_drive[i].al_properties = ae->ae_ldrv.al_properties[i];
        }

        return (0);
}

/*
 * Flush the internal cache on each configured controller.  Called at
 * shutdown time.
 */
static void
amr_shutdown(void *cookie)
{
        extern struct cfdriver amr_cd;
        struct amr_softc *amr;
        struct amr_ccb *ac;
        int i, rv;

        for (i = 0; i < amr_cd.cd_ndevs; i++) {
                if ((amr = device_lookup_private(&amr_cd, i)) == NULL)
                        continue;

                if ((rv = amr_ccb_alloc(amr, &ac)) == 0) {
                        ac->ac_cmd.mb_command = AMR_CMD_FLUSH;
                        rv = amr_ccb_poll(amr, ac, 30000);
                        amr_ccb_free(amr, ac);
                }
                if (rv != 0)
                        aprint_error_dev(amr->amr_dv,
                            "unable to flush cache (%d)\n", rv);
        }
}

/*
 * Interrupt service routine.
 */
static int
amr_intr(void *cookie)
{
        struct amr_softc *amr;
        struct amr_ccb *ac;
        struct amr_mailbox_resp mbox;
        u_int i, forus, idx;

        amr = cookie;
        forus = 0;

        mutex_spin_enter(&amr->amr_mutex);

        while ((*amr->amr_get_work)(amr, &mbox) == 0) {
                /* Iterate over completed commands in this result. */
                for (i = 0; i < mbox.mb_nstatus; i++) {
                        idx = mbox.mb_completed[i] - 1;
                        ac = amr->amr_ccbs + idx;

                        if (idx >= amr->amr_maxqueuecnt) {
                                printf("%s: bad status (bogus ID: %u=%u)\n",
                                    device_xname(amr->amr_dv), i, idx);
                                continue;
                        }

                        if ((ac->ac_flags & AC_ACTIVE) == 0) {
                                printf("%s: bad status (not active; 0x04%x)\n",
                                    device_xname(amr->amr_dv), ac->ac_flags);
                                continue;
                        }

                        ac->ac_status = mbox.mb_status;
                        ac->ac_flags = (ac->ac_flags & ~AC_ACTIVE) |
                            AC_COMPLETE;
                        TAILQ_REMOVE(&amr->amr_ccb_active, ac, ac_chain.tailq);

                        if ((ac->ac_flags & AC_MOAN) != 0)
                                printf("%s: ccb %d completed\n",
                                    device_xname(amr->amr_dv), ac->ac_ident);

                        /* Pass notification to upper layers. */
                        mutex_spin_exit(&amr->amr_mutex);
                        if (ac->ac_handler != NULL) {
                                (*ac->ac_handler)(ac);
                        } else {
                                mutex_enter(&ac->ac_mutex);
                                cv_signal(&ac->ac_cv);
                                mutex_exit(&ac->ac_mutex);
                        }
                        mutex_spin_enter(&amr->amr_mutex);
                }
                forus = 1;
        }

        mutex_spin_exit(&amr->amr_mutex);

        if (forus)
                amr_ccb_enqueue(amr, NULL);

        return (forus);
}

/*
 * Watchdog thread.
 */
static void
amr_quartz_thread(void *cookie)
{
        struct amr_softc *amr;
        struct amr_ccb *ac;

        amr = cookie;

        for (;;) {
                mutex_enter(&thread_mutex);
                cv_timedwait(&thread_cv, &thread_mutex, AMR_WDOG_TICKS);
                mutex_exit(&thread_mutex);

                if ((amr->amr_flags & AMRF_THREAD_EXIT) != 0) {
                        amr->amr_flags ^= AMRF_THREAD_EXIT;
                        mutex_enter(&thread_mutex);
                        cv_signal(&thread_cv);
                        mutex_exit(&thread_mutex);
                        kthread_exit(0);
                }

                if (amr_intr(amr) == 0)
                        amr_ccb_enqueue(amr, NULL);

                mutex_spin_enter(&amr->amr_mutex);
                ac = TAILQ_FIRST(&amr->amr_ccb_active);
                while (ac != NULL) {
                        if (ac->ac_start_time + AMR_TIMEOUT > time_uptime)
                                break;
                        if ((ac->ac_flags & AC_MOAN) == 0) {
                                printf("%s: ccb %d timed out; mailbox:\n",
                                    device_xname(amr->amr_dv), ac->ac_ident);
                                amr_ccb_dump(amr, ac);
                                ac->ac_flags |= AC_MOAN;
                        }
                        ac = TAILQ_NEXT(ac, ac_chain.tailq);
                }
                mutex_spin_exit(&amr->amr_mutex);
        }
}

static void
amr_std_thread(void *cookie)
{
        struct amr_softc *amr;
        struct amr_ccb *ac;
        struct amr_logdrive *al;
        struct amr_enquiry *ae;
        int rv, i;

        amr = cookie;
        ae = amr->amr_enqbuf;

        for (;;) {
                mutex_enter(&thread_mutex);
                cv_timedwait(&thread_cv, &thread_mutex, AMR_WDOG_TICKS);
                mutex_exit(&thread_mutex);

                if ((amr->amr_flags & AMRF_THREAD_EXIT) != 0) {
                        amr->amr_flags ^= AMRF_THREAD_EXIT;
                        mutex_enter(&thread_mutex);
                        cv_signal(&thread_cv);
                        mutex_exit(&thread_mutex);
                        kthread_exit(0);
                }

                if (amr_intr(amr) == 0)
                        amr_ccb_enqueue(amr, NULL);

                mutex_spin_enter(&amr->amr_mutex);
                ac = TAILQ_FIRST(&amr->amr_ccb_active);
                while (ac != NULL) {
                        if (ac->ac_start_time + AMR_TIMEOUT > time_uptime)
                                break;
                        if ((ac->ac_flags & AC_MOAN) == 0) {
                                printf("%s: ccb %d timed out; mailbox:\n",
                                    device_xname(amr->amr_dv), ac->ac_ident);
                                amr_ccb_dump(amr, ac);
                                ac->ac_flags |= AC_MOAN;
                        }
                        ac = TAILQ_NEXT(ac, ac_chain.tailq);
                }
                mutex_spin_exit(&amr->amr_mutex);

                if ((rv = amr_ccb_alloc(amr, &ac)) != 0) {
                        printf("%s: ccb_alloc failed (%d)\n",
                             device_xname(amr->amr_dv), rv);
                        continue;
                }

                ac->ac_cmd.mb_command = AMR_CMD_ENQUIRY;

                rv = amr_ccb_map(amr, ac, amr->amr_enqbuf,
                    AMR_ENQUIRY_BUFSIZE, AC_XFER_IN);
                if (rv != 0) {
                        aprint_error_dev(amr->amr_dv, "ccb_map failed (%d)\n",
                             rv);
                        amr_ccb_free(amr, ac);
                        continue;
                }

                rv = amr_ccb_wait(amr, ac);
                amr_ccb_unmap(amr, ac);
                if (rv != 0) {
                        aprint_error_dev(amr->amr_dv,
                            "enquiry failed (st=%d)\n", ac->ac_status);
                        continue;
                }
                amr_ccb_free(amr, ac);

                al = amr->amr_drive;
                for (i = 0; i < __arraycount(ae->ae_ldrv.al_state); i++, al++) {
                        if (al->al_dv == NULL)
                                continue;
                        if (al->al_state == ae->ae_ldrv.al_state[i])
                                continue;

                        printf("%s: state changed: %s -> %s\n",
                            device_xname(al->al_dv),
                            amr_drive_state(al->al_state, NULL),
                            amr_drive_state(ae->ae_ldrv.al_state[i], NULL));

                        al->al_state = ae->ae_ldrv.al_state[i];
                }
        }
}

/*
 * Return a text description of a logical drive's current state.
 */
const char *
amr_drive_state(int state, int *happy)
{
        const char *str;

        state = AMR_DRV_CURSTATE(state);
        if (state >= sizeof(amr_dstate) / sizeof(amr_dstate[0])) {
                if (happy)
                        *happy = 1;
                str = "status unknown";
        } else {
                if (happy)
                        *happy = amr_dstate[state].ds_happy;
                str = amr_dstate[state].ds_descr;
        }

        return (str);
}

/*
 * Run a generic enquiry-style command.
 */
static void *
amr_enquire(struct amr_softc *amr, u_int8_t cmd, u_int8_t cmdsub,
            u_int8_t cmdqual, void *sbuf)
{
        struct amr_ccb *ac;
        u_int8_t *mb;
        int rv;

        if (amr_ccb_alloc(amr, &ac) != 0)
                return (NULL);

        /* Build the command proper. */
        mb = (u_int8_t *)&ac->ac_cmd;
        mb[0] = cmd;
        mb[2] = cmdsub;
        mb[3] = cmdqual;

        rv = amr_ccb_map(amr, ac, sbuf, AMR_ENQUIRY_BUFSIZE, AC_XFER_IN);
        if (rv == 0) {
                rv = amr_ccb_poll(amr, ac, 2000);
                amr_ccb_unmap(amr, ac);
        }
        amr_ccb_free(amr, ac);

        return (rv ? NULL : sbuf);
}

/*
 * Allocate and initialise a CCB.
 */
int
amr_ccb_alloc(struct amr_softc *amr, struct amr_ccb **acp)
{
        mutex_spin_enter(&amr->amr_mutex);
        if ((*acp = SLIST_FIRST(&amr->amr_ccb_freelist)) == NULL) {
                mutex_spin_exit(&amr->amr_mutex);
                return (EAGAIN);
        }
        SLIST_REMOVE_HEAD(&amr->amr_ccb_freelist, ac_chain.slist);
        mutex_spin_exit(&amr->amr_mutex);

        return (0);
}

/*
 * Free a CCB.
 */
void
amr_ccb_free(struct amr_softc *amr, struct amr_ccb *ac)
{
        memset(&ac->ac_cmd, 0, sizeof(ac->ac_cmd));
        ac->ac_cmd.mb_ident = ac->ac_ident + 1;
        ac->ac_cmd.mb_busy = 1;
        ac->ac_handler = NULL;
        ac->ac_flags = 0;

        mutex_spin_enter(&amr->amr_mutex);
        SLIST_INSERT_HEAD(&amr->amr_ccb_freelist, ac, ac_chain.slist);
        mutex_spin_exit(&amr->amr_mutex);
}

/*
 * If a CCB is specified, enqueue it.  Pull CCBs off the software queue in
 * the order that they were enqueued and try to submit their command blocks
 * to the controller for execution.
 */
void
amr_ccb_enqueue(struct amr_softc *amr, struct amr_ccb *ac)
{
        if (ac != NULL) {
                mutex_spin_enter(&amr->amr_mutex);
                SIMPLEQ_INSERT_TAIL(&amr->amr_ccb_queue, ac, ac_chain.simpleq);
                mutex_spin_exit(&amr->amr_mutex);
        }

        while (SIMPLEQ_FIRST(&amr->amr_ccb_queue) != NULL) {
                mutex_spin_enter(&amr->amr_mutex);
                if ((ac = SIMPLEQ_FIRST(&amr->amr_ccb_queue)) != NULL) {
                        if ((*amr->amr_submit)(amr, ac) != 0) {
                                mutex_spin_exit(&amr->amr_mutex);
                                break;
                        }
                        SIMPLEQ_REMOVE_HEAD(&amr->amr_ccb_queue,
                            ac_chain.simpleq);
                        TAILQ_INSERT_TAIL(&amr->amr_ccb_active, ac,
                            ac_chain.tailq);
                }
                mutex_spin_exit(&amr->amr_mutex);
        }
}

/*
 * Map the specified CCB's data buffer onto the bus, and fill the
 * scatter-gather list.
 */
int
amr_ccb_map(struct amr_softc *amr, struct amr_ccb *ac, void *data, int size,
            int tflag)
{
        struct amr_sgentry *sge;
        struct amr_mailbox_cmd *mb;
        int nsegs, i, rv, sgloff;
        bus_dmamap_t xfer;
        int dmaflag = 0;

        xfer = ac->ac_xfer_map;

        rv = bus_dmamap_load(amr->amr_dmat, xfer, data, size, NULL,
            BUS_DMA_NOWAIT);
        if (rv != 0)
                return (rv);

        mb = &ac->ac_cmd;
        ac->ac_xfer_size = size;
        ac->ac_flags |= (tflag & (AC_XFER_OUT | AC_XFER_IN));
        sgloff = AMR_SGL_SIZE * ac->ac_ident;

        if (tflag & AC_XFER_OUT)
                dmaflag |= BUS_DMASYNC_PREWRITE;
        if (tflag & AC_XFER_IN)
                dmaflag |= BUS_DMASYNC_PREREAD;

        /* We don't need to use a scatter/gather list for just 1 segment. */
        nsegs = xfer->dm_nsegs;
        if (nsegs == 1) {
                mb->mb_nsgelem = 0;
                mb->mb_physaddr = htole32(xfer->dm_segs[0].ds_addr);
                ac->ac_flags |= AC_NOSGL;
        } else {
                mb->mb_nsgelem = nsegs;
                mb->mb_physaddr = htole32(amr->amr_sgls_paddr + sgloff);

                sge = (struct amr_sgentry *)((char *)amr->amr_sgls + sgloff);
                for (i = 0; i < nsegs; i++, sge++) {
                        sge->sge_addr = htole32(xfer->dm_segs[i].ds_addr);
                        sge->sge_count = htole32(xfer->dm_segs[i].ds_len);
                }
        }

        bus_dmamap_sync(amr->amr_dmat, xfer, 0, ac->ac_xfer_size, dmaflag);

        if ((ac->ac_flags & AC_NOSGL) == 0)
                bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, sgloff,
                    AMR_SGL_SIZE, BUS_DMASYNC_PREWRITE);

        return (0);
}

/*
 * Unmap the specified CCB's data buffer.
 */
void
amr_ccb_unmap(struct amr_softc *amr, struct amr_ccb *ac)
{
        int dmaflag = 0;

        if (ac->ac_flags & AC_XFER_IN)
                dmaflag |= BUS_DMASYNC_POSTREAD;
        if (ac->ac_flags & AC_XFER_OUT)
                dmaflag |= BUS_DMASYNC_POSTWRITE;

        if ((ac->ac_flags & AC_NOSGL) == 0)
                bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap,
                    AMR_SGL_SIZE * ac->ac_ident, AMR_SGL_SIZE,
                    BUS_DMASYNC_POSTWRITE);
        bus_dmamap_sync(amr->amr_dmat, ac->ac_xfer_map, 0, ac->ac_xfer_size,
            dmaflag);
        bus_dmamap_unload(amr->amr_dmat, ac->ac_xfer_map);
}

/*
 * Submit a command to the controller and poll on completion.  Return
 * non-zero on timeout or error.
 */
int
amr_ccb_poll(struct amr_softc *amr, struct amr_ccb *ac, int timo)
{
        int rv, i;

        mutex_spin_enter(&amr->amr_mutex);
        if ((rv = (*amr->amr_submit)(amr, ac)) != 0) {
                mutex_spin_exit(&amr->amr_mutex);
                return (rv);
        }
        TAILQ_INSERT_TAIL(&amr->amr_ccb_active, ac, ac_chain.tailq);
        mutex_spin_exit(&amr->amr_mutex);

        for (i = timo * 10; i > 0; i--) {
                amr_intr(amr);
                if ((ac->ac_flags & AC_COMPLETE) != 0)
                        break;
                DELAY(100);
        }

        if (i == 0)
                printf("%s: polled operation timed out after %d ms\n",
                       device_xname(amr->amr_dv), timo);

        return ((i == 0 || ac->ac_status != 0) ? EIO : 0);
}

/*
 * Submit a command to the controller and sleep on completion.  Return
 * non-zero on error.
 */
int
amr_ccb_wait(struct amr_softc *amr, struct amr_ccb *ac)
{
        amr_ccb_enqueue(amr, ac);
        mutex_enter(&ac->ac_mutex);
        cv_wait(&ac->ac_cv, &ac->ac_mutex);
        mutex_exit(&ac->ac_mutex);

        return (ac->ac_status != 0 ? EIO : 0);
}

#if 0
/*
 * Wait for the mailbox to become available.
 */
static int
amr_mbox_wait(struct amr_softc *amr)
{
        int timo;

        for (timo = 10000; timo != 0; timo--) {
                bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, 0,
                    sizeof(struct amr_mailbox), BUS_DMASYNC_POSTREAD);
                if (amr->amr_mbox->mb_cmd.mb_busy == 0)
                        break;
                DELAY(100);
        }

        if (timo == 0)
                printf("%s: controller wedged\n", device_xname(amr->amr_dv));

        return (timo != 0 ? 0 : EAGAIN);
}
#endif

/*
 * Tell the controller that the mailbox contains a valid command.  Must be
 * called with interrupts blocked.
 */
static int
amr_quartz_submit(struct amr_softc *amr, struct amr_ccb *ac)
{
        int i = 0;
        u_int32_t v;

        amr->amr_mbox->mb_poll = 0;
        amr->amr_mbox->mb_ack = 0;

        bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, 0,
            sizeof(struct amr_mailbox),
            BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD);

        v = amr_inl(amr, AMR_QREG_ODB);
        bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, 0,
            sizeof(struct amr_mailbox), BUS_DMASYNC_POSTREAD);
        while ((amr->amr_mbox->mb_cmd.mb_busy != 0) && (i++ < 10)) {
                bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, 0,
                    sizeof(struct amr_mailbox), BUS_DMASYNC_PREREAD);
                /* This is a no-op read that flushes pending mailbox updates */
                v = amr_inl(amr, AMR_QREG_ODB);
                DELAY(1);
                bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, 0,
                    sizeof(struct amr_mailbox), BUS_DMASYNC_POSTREAD);
        }

        if (amr->amr_mbox->mb_cmd.mb_busy != 0)
                return (EAGAIN);

        v = amr_inl(amr, AMR_QREG_IDB);
        if ((v & AMR_QIDB_SUBMIT) != 0) {
                amr->amr_mbox->mb_cmd.mb_busy = 0;
                bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, 0,
                    sizeof(struct amr_mailbox), BUS_DMASYNC_PREWRITE);
                printf("%s: submit failed\n", device_xname(amr->amr_dv));
                return (EAGAIN);
        }

        amr->amr_mbox->mb_segment = 0;
        memcpy(&amr->amr_mbox->mb_cmd, &ac->ac_cmd, sizeof(ac->ac_cmd));
        bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, 0,
            sizeof(struct amr_mailbox), BUS_DMASYNC_PREWRITE);

        ac->ac_start_time = time_uptime;
        ac->ac_flags |= AC_ACTIVE;

        amr_outl(amr, AMR_QREG_IDB,
            (amr->amr_mbox_paddr + 16) | AMR_QIDB_SUBMIT);
        bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, 0,
            sizeof(struct amr_mailbox), BUS_DMASYNC_POSTWRITE);

        return (0);
}

static int
amr_std_submit(struct amr_softc *amr, struct amr_ccb *ac)
{

        amr->amr_mbox->mb_poll = 0;
        amr->amr_mbox->mb_ack = 0;

        bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, 0,
            sizeof(struct amr_mailbox), BUS_DMASYNC_POSTREAD);

        if (amr->amr_mbox->mb_cmd.mb_busy != 0)
                return (EAGAIN);

        if ((amr_inb(amr, AMR_SREG_MBOX_BUSY) & AMR_SMBOX_BUSY_FLAG) != 0) {
                amr->amr_mbox->mb_cmd.mb_busy = 0;
                bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, 0,
                    sizeof(struct amr_mailbox), BUS_DMASYNC_PREWRITE);
                return (EAGAIN);
        }

        amr->amr_mbox->mb_segment = 0;
        memcpy(&amr->amr_mbox->mb_cmd, &ac->ac_cmd, sizeof(ac->ac_cmd));

        bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, 0,
            sizeof(struct amr_mailbox), BUS_DMASYNC_PREWRITE);

        ac->ac_start_time = time_uptime;
        ac->ac_flags |= AC_ACTIVE;
        amr_outb(amr, AMR_SREG_CMD, AMR_SCMD_POST);

        bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, 0,
            sizeof(struct amr_mailbox), BUS_DMASYNC_POSTWRITE);

        return (0);
}

/*
 * Claim any work that the controller has completed; acknowledge completion,
 * save details of the completion in (mbsave).  Must be called with
 * interrupts blocked.
 */
static int
amr_quartz_get_work(struct amr_softc *amr, struct amr_mailbox_resp *mbsave)
{
        bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, 0,
            sizeof(struct amr_mailbox), BUS_DMASYNC_PREREAD);

        /* Work waiting for us? */
        if (amr_inl(amr, AMR_QREG_ODB) != AMR_QODB_READY)
                return (-1);

        bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, 0,
            sizeof(struct amr_mailbox), BUS_DMASYNC_POSTREAD);

        /* Save the mailbox, which contains a list of completed commands. */
        memcpy(mbsave, &amr->amr_mbox->mb_resp, sizeof(*mbsave));

        /* Ack the interrupt and mailbox transfer. */
        amr_outl(amr, AMR_QREG_ODB, AMR_QODB_READY);
        amr_outl(amr, AMR_QREG_IDB, (amr->amr_mbox_paddr+16) | AMR_QIDB_ACK);

        /*
         * This waits for the controller to notice that we've taken the
         * command from it.  It's very inefficient, and we shouldn't do it,
         * but if we remove this code, we stop completing commands under
         * load.
         *
         * Peter J says we shouldn't do this.  The documentation says we
         * should.  Who is right?
         */
        while ((amr_inl(amr, AMR_QREG_IDB) & AMR_QIDB_ACK) != 0)
                DELAY(10);

        return (0);
}

static int
amr_std_get_work(struct amr_softc *amr, struct amr_mailbox_resp *mbsave)
{
        u_int8_t istat;

        bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, 0,
            sizeof(struct amr_mailbox), BUS_DMASYNC_PREREAD);

        /* Check for valid interrupt status. */
        if (((istat = amr_inb(amr, AMR_SREG_INTR)) & AMR_SINTR_VALID) == 0)
                return (-1);

        /* Ack the interrupt. */
        amr_outb(amr, AMR_SREG_INTR, istat);

        bus_dmamap_sync(amr->amr_dmat, amr->amr_dmamap, 0,
            sizeof(struct amr_mailbox), BUS_DMASYNC_POSTREAD);

        /* Save mailbox, which contains a list of completed commands. */
        memcpy(mbsave, &amr->amr_mbox->mb_resp, sizeof(*mbsave));

        /* Ack mailbox transfer. */
        amr_outb(amr, AMR_SREG_CMD, AMR_SCMD_ACKINTR);

        return (0);
}

static void
amr_ccb_dump(struct amr_softc *amr, struct amr_ccb *ac)
{
        int i;

        printf("%s: ", device_xname(amr->amr_dv));
        for (i = 0; i < 4; i++)
                printf("%08x ", ((u_int32_t *)&ac->ac_cmd)[i]);
        printf("\n");
}

static int
amropen(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct amr_softc *amr;

        if ((amr = device_lookup_private(&amr_cd, minor(dev))) == NULL)
                return (ENXIO);
        if ((amr->amr_flags & AMRF_OPEN) != 0)
                return (EBUSY);

        amr->amr_flags |= AMRF_OPEN;
        return (0);
}

static int
amrclose(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct amr_softc *amr;

        amr = device_lookup_private(&amr_cd, minor(dev));
        amr->amr_flags &= ~AMRF_OPEN;
        return (0);
}

/* used below to correct for a firmware bug */
static unsigned long
amrioctl_buflen(unsigned long len)
{
        if (len <= 4 * 1024)
                return (4 * 1024);
        if (len <= 8 * 1024)
                return (8 * 1024);
        if (len <= 32 * 1024)
                return (32 * 1024);
        if (len <= 64 * 1024)
                return (64 * 1024);
        return (len);
}

static int
amrioctl(dev_t dev, u_long cmd, void *data, int flag,
    struct lwp *l)
{
        struct amr_softc *amr;
        struct amr_user_ioctl *au;
        struct amr_ccb *ac;
        struct amr_mailbox_ioctl *mbi;
        unsigned long au_length;
        uint8_t *au_cmd;
        int error;
        void *dp = NULL, *au_buffer;

        amr = device_lookup_private(&amr_cd, minor(dev));

        /* This should be compatible with the FreeBSD interface */

        switch (cmd) {
        case AMR_IO_VERSION:
                *(int *)data = AMR_IO_VERSION_NUMBER;
                return 0;
        case AMR_IO_COMMAND:
                error = kauth_authorize_device_passthru(l->l_cred, dev,
                    KAUTH_REQ_DEVICE_RAWIO_PASSTHRU_ALL, data);
                if (error)
                        return (error);

                au = (struct amr_user_ioctl *)data;
                au_cmd = au->au_cmd;
                au_buffer = au->au_buffer;
                au_length = au->au_length;
                break;
        default:
                return ENOTTY;
        }

        if (au_cmd[0] == AMR_CMD_PASS) {
                /* not yet */
                return EOPNOTSUPP;
        }

        if (au_length <= 0 || au_length > MAXPHYS || au_cmd[0] == 0x06)
                return (EINVAL);

        /*
         * allocate kernel memory for data, doing I/O directly to user
         * buffer isn't that easy.  Correct allocation size for a bug
         * in at least some versions of the device firmware, by using
         * the amrioctl_buflen() function, defined above.
         */
        dp = malloc(amrioctl_buflen(au_length), M_DEVBUF, M_WAITOK|M_ZERO);
        if (dp == NULL)
                return ENOMEM;
        if ((error = copyin(au_buffer, dp, au_length)) != 0)
                goto out;

        /* direct command to controller */
        while (amr_ccb_alloc(amr, &ac) != 0) {
                mutex_enter(&thread_mutex);
                error = cv_timedwait_sig(&thread_cv, &thread_mutex, hz);
                mutex_exit(&thread_mutex);
                if (error == EINTR)
                        goto out;
        }

        mbi = (struct amr_mailbox_ioctl *)&ac->ac_cmd;
        mbi->mb_command = au_cmd[0];
        mbi->mb_channel = au_cmd[1];
        mbi->mb_param = au_cmd[2];
        mbi->mb_pad[0] = au_cmd[3];
        mbi->mb_drive = au_cmd[4];
        error = amr_ccb_map(amr, ac, dp, (int)au_length,
            AC_XFER_IN | AC_XFER_OUT);
        if (error == 0) {
                error = amr_ccb_wait(amr, ac);
                amr_ccb_unmap(amr, ac);
                if (error == 0)
                        error = copyout(dp, au_buffer, au_length);

        }
        amr_ccb_free(amr, ac);
out:
        free(dp, M_DEVBUF);
        return (error);
}

MODULE(MODULE_CLASS_DRIVER, amr, "pci");

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
amr_modcmd(modcmd_t cmd, void *opaque)
{
        int error = 0;

#ifdef _MODULE
        switch (cmd) {
        case MODULE_CMD_INIT:
                error = config_init_component(cfdriver_ioconf_amr,
                    cfattach_ioconf_amr, cfdata_ioconf_amr);
                break;
        case MODULE_CMD_FINI:
                error = config_fini_component(cfdriver_ioconf_amr,
                    cfattach_ioconf_amr, cfdata_ioconf_amr);
                break;
        default:
                error = ENOTTY;
                break;
        }
#endif

        return error;
}






























































































































































































































































































































































































































































































































































































































    3 
    3 












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
/*        $NetBSD: if_otus.c,v 1.45 2020/06/11 09:51:37 martin Exp $        */
/*        $OpenBSD: if_otus.c,v 1.18 2010/08/27 17:08:00 jsg Exp $        */

/*-
 * Copyright (c) 2009 Damien Bergamini <damien.bergamini@free.fr>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*-
 * Driver for Atheros AR9001U chipset.
 * http://www.atheros.com/pt/bulletins/AR9001USBBulletin.pdf
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_otus.c,v 1.45 2020/06/11 09:51:37 martin Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/sockio.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/kthread.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/device.h>
#include <sys/proc.h>
#include <sys/bus.h>
#include <sys/endian.h>
#include <sys/intr.h>

#include <net/bpf.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_ether.h>
#include <net/if_media.h>
#include <net/if_types.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>

#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_amrr.h>
#include <net80211/ieee80211_radiotap.h>

#include <dev/firmload.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdevs.h>

#include <dev/usb/if_otusreg.h>
#include <dev/usb/if_otusvar.h>

#ifdef OTUS_DEBUG

#define        DBG_INIT        __BIT(0)
#define        DBG_FN                __BIT(1)
#define        DBG_TX                __BIT(2)
#define        DBG_RX                __BIT(3)
#define        DBG_STM                __BIT(4)
#define        DBG_CHAN        __BIT(5)
#define        DBG_REG                __BIT(6)
#define        DBG_CMD                __BIT(7)
#define        DBG_ALL                0xffffffffU
#define DBG_NO_SC        (struct otus_softc *)NULL

unsigned int otus_debug = 0;
#define DPRINTFN(n, s, ...) do { \
        if (otus_debug & (n)) { \
                if ((s) != NULL) \
                        printf("%s: ", device_xname((s)->sc_dev)); \
                else \
                        printf("otus0: "); \
                printf("%s: ", __func__); \
                printf(__VA_ARGS__); \
        } \
} while (0)

#else        /* ! OTUS_DEBUG */

#define DPRINTFN(n, ...) \
        do { } while (0)

#endif        /* OTUS_DEBUG */

Static int        otus_match(device_t, cfdata_t, void *);
Static void        otus_attach(device_t, device_t, void *);
Static int        otus_detach(device_t, int);
Static int        otus_activate(device_t, devact_t);
Static void        otus_attachhook(device_t);
Static void        otus_get_chanlist(struct otus_softc *);
Static int        otus_load_firmware(struct otus_softc *, const char *,
                    uint32_t);
Static int        otus_open_pipes(struct otus_softc *);
Static void        otus_close_pipes(struct otus_softc *);
Static int        otus_alloc_tx_cmd(struct otus_softc *);
Static void        otus_free_tx_cmd(struct otus_softc *);
Static int        otus_alloc_tx_data_list(struct otus_softc *);
Static void        otus_free_tx_data_list(struct otus_softc *);
Static int        otus_alloc_rx_data_list(struct otus_softc *);
Static void        otus_free_rx_data_list(struct otus_softc *);
Static void        otus_next_scan(void *);
Static void        otus_task(void *);
Static void        otus_do_async(struct otus_softc *,
                    void (*)(struct otus_softc *, void *), void *, int);
Static int        otus_newstate(struct ieee80211com *, enum ieee80211_state,
                    int);
Static void        otus_newstate_cb(struct otus_softc *, void *);
Static int        otus_cmd(struct otus_softc *, uint8_t, const void *, int,
                    void *);
Static void        otus_write(struct otus_softc *, uint32_t, uint32_t);
Static int        otus_write_barrier(struct otus_softc *);
Static struct        ieee80211_node *otus_node_alloc(struct ieee80211_node_table *);
Static int        otus_media_change(struct ifnet *);
Static int        otus_read_eeprom(struct otus_softc *);
Static void        otus_newassoc(struct ieee80211_node *, int);
Static void        otus_intr(struct usbd_xfer *, void *, usbd_status);
Static void        otus_cmd_rxeof(struct otus_softc *, uint8_t *, int);
Static void        otus_sub_rxeof(struct otus_softc *, uint8_t *, int);
Static void        otus_rxeof(struct usbd_xfer *, void *, usbd_status);
Static void        otus_txeof(struct usbd_xfer *, void *, usbd_status);
Static int        otus_tx(struct otus_softc *, struct mbuf *,
                    struct ieee80211_node *, struct otus_tx_data *);
Static void        otus_start(struct ifnet *);
Static void        otus_watchdog(struct ifnet *);
Static int        otus_ioctl(struct ifnet *, u_long, void *);
Static int        otus_set_multi(struct otus_softc *);
#ifdef HAVE_EDCA
Static void        otus_updateedca(struct ieee80211com *);
Static void        otus_updateedca_cb(struct otus_softc *, void *);
#endif
Static void        otus_updateedca_cb_locked(struct otus_softc *);
Static void        otus_updateslot(struct ifnet *);
Static void        otus_updateslot_cb(struct otus_softc *, void *);
Static void        otus_updateslot_cb_locked(struct otus_softc *);
Static int        otus_init_mac(struct otus_softc *);
Static uint32_t        otus_phy_get_def(struct otus_softc *, uint32_t);
Static int        otus_set_board_values(struct otus_softc *,
                    struct ieee80211_channel *);
Static int        otus_program_phy(struct otus_softc *,
                    struct ieee80211_channel *);
Static int        otus_set_rf_bank4(struct otus_softc *,
                    struct ieee80211_channel *);
Static void        otus_get_delta_slope(uint32_t, uint32_t *, uint32_t *);
Static int        otus_set_chan(struct otus_softc *, struct ieee80211_channel *,
                    int);
#ifdef notyet
Static int        otus_set_key(struct ieee80211com *, struct ieee80211_node *,
                    struct ieee80211_key *);
Static void        otus_set_key_cb(struct otus_softc *, void *);
Static void        otus_delete_key(struct ieee80211com *, struct ieee80211_node *,
                    struct ieee80211_key *);
Static void        otus_delete_key_cb(struct otus_softc *, void *);
#endif /* notyet */
Static void        otus_calib_to(void *);
Static int        otus_set_bssid(struct otus_softc *, const uint8_t *);
Static int        otus_set_macaddr(struct otus_softc *, const uint8_t *);
#ifdef notyet
Static void        otus_led_newstate_type1(struct otus_softc *);
Static void        otus_led_newstate_type2(struct otus_softc *);
#endif /* notyet */
Static void        otus_led_newstate_type3(struct otus_softc *);
Static int        otus_init(struct ifnet *);
Static void        otus_stop(struct ifnet *);
Static void        otus_wait_async(struct otus_softc *);

/* List of supported channels. */
static const uint8_t ar_chans[] = {
        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
        36, 40, 44, 48, 52, 56, 60, 64, 100, 104, 108, 112, 116, 120, 124,
        128, 132, 136, 140, 149, 153, 157, 161, 165, 34, 38, 42, 46
};

/*
 * This data is automatically generated from the "otus.ini" file.
 * It is stored in a different way though, to reduce kernel's .rodata
 * section overhead (5.1KB instead of 8.5KB).
 */

/* NB: apply AR_PHY(). */
static const uint16_t ar5416_phy_regs[] = {
        0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007, 0x008,
        0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, 0x010, 0x011,
        0x012, 0x013, 0x014, 0x015, 0x016, 0x017, 0x018, 0x01a, 0x01b,
        0x040, 0x041, 0x042, 0x043, 0x045, 0x046, 0x047, 0x048, 0x049,
        0x04a, 0x04b, 0x04d, 0x04e, 0x04f, 0x051, 0x052, 0x053, 0x055,
        0x056, 0x058, 0x059, 0x05c, 0x05d, 0x05e, 0x05f, 0x060, 0x061,
        0x062, 0x063, 0x064, 0x065, 0x066, 0x067, 0x068, 0x069, 0x06a,
        0x06b, 0x06c, 0x06d, 0x070, 0x071, 0x072, 0x073, 0x074, 0x075,
        0x076, 0x077, 0x078, 0x079, 0x07a, 0x07b, 0x07c, 0x07f, 0x080,
        0x081, 0x082, 0x083, 0x084, 0x085, 0x086, 0x087, 0x088, 0x089,
        0x08a, 0x08b, 0x08c, 0x08d, 0x08e, 0x08f, 0x090, 0x091, 0x092,
        0x093, 0x094, 0x095, 0x096, 0x097, 0x098, 0x099, 0x09a, 0x09b,
        0x09c, 0x09d, 0x09e, 0x09f, 0x0a0, 0x0a1, 0x0a2, 0x0a3, 0x0a4,
        0x0a5, 0x0a6, 0x0a7, 0x0a8, 0x0a9, 0x0aa, 0x0ab, 0x0ac, 0x0ad,
        0x0ae, 0x0af, 0x0b0, 0x0b1, 0x0b2, 0x0b3, 0x0b4, 0x0b5, 0x0b6,
        0x0b7, 0x0b8, 0x0b9, 0x0ba, 0x0bb, 0x0bc, 0x0bd, 0x0be, 0x0bf,
        0x0c0, 0x0c1, 0x0c2, 0x0c3, 0x0c4, 0x0c5, 0x0c6, 0x0c7, 0x0c8,
        0x0c9, 0x0ca, 0x0cb, 0x0cc, 0x0cd, 0x0ce, 0x0cf, 0x0d0, 0x0d1,
        0x0d2, 0x0d3, 0x0d4, 0x0d5, 0x0d6, 0x0d7, 0x0d8, 0x0d9, 0x0da,
        0x0db, 0x0dc, 0x0dd, 0x0de, 0x0df, 0x0e0, 0x0e1, 0x0e2, 0x0e3,
        0x0e4, 0x0e5, 0x0e6, 0x0e7, 0x0e8, 0x0e9, 0x0ea, 0x0eb, 0x0ec,
        0x0ed, 0x0ee, 0x0ef, 0x0f0, 0x0f1, 0x0f2, 0x0f3, 0x0f4, 0x0f5,
        0x0f6, 0x0f7, 0x0f8, 0x0f9, 0x0fa, 0x0fb, 0x0fc, 0x0fd, 0x0fe,
        0x0ff, 0x100, 0x103, 0x104, 0x105, 0x106, 0x107, 0x108, 0x109,
        0x10a, 0x10b, 0x10c, 0x10d, 0x10e, 0x10f, 0x13c, 0x13d, 0x13e,
        0x13f, 0x280, 0x281, 0x282, 0x283, 0x284, 0x285, 0x286, 0x287,
        0x288, 0x289, 0x28a, 0x28b, 0x28c, 0x28d, 0x28e, 0x28f, 0x290,
        0x291, 0x292, 0x293, 0x294, 0x295, 0x296, 0x297, 0x298, 0x299,
        0x29a, 0x29b, 0x29d, 0x29e, 0x29f, 0x2c0, 0x2c1, 0x2c2, 0x2c3,
        0x2c4, 0x2c5, 0x2c6, 0x2c7, 0x2c8, 0x2c9, 0x2ca, 0x2cb, 0x2cc,
        0x2cd, 0x2ce, 0x2cf, 0x2d0, 0x2d1, 0x2d2, 0x2d3, 0x2d4, 0x2d5,
        0x2d6, 0x2e2, 0x2e3, 0x2e4, 0x2e5, 0x2e6, 0x2e7, 0x2e8, 0x2e9,
        0x2ea, 0x2eb, 0x2ec, 0x2ed, 0x2ee, 0x2ef, 0x2f0, 0x2f1, 0x2f2,
        0x2f3, 0x2f4, 0x2f5, 0x2f6, 0x2f7, 0x2f8, 0x412, 0x448, 0x458,
        0x683, 0x69b, 0x812, 0x848, 0x858, 0xa83, 0xa9b, 0xc19, 0xc57,
        0xc5a, 0xc6f, 0xe9c, 0xed7, 0xed8, 0xed9, 0xeda, 0xedb, 0xedc,
        0xedd, 0xede, 0xedf, 0xee0, 0xee1
};

static const uint32_t ar5416_phy_vals_5ghz_20mhz[] = {
        0x00000007, 0x00000300, 0x00000000, 0xad848e19, 0x7d14e000,
        0x9c0a9f6b, 0x00000090, 0x00000000, 0x02020200, 0x00000e0e,
        0x0a020001, 0x0000a000, 0x00000000, 0x00000e0e, 0x00000007,
        0x00200400, 0x206a002e, 0x1372161e, 0x001a6a65, 0x1284233c,
        0x6c48b4e4, 0x00000859, 0x7ec80d2e, 0x31395c5e, 0x0004dd10,
        0x409a4190, 0x050cb081, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x000007d0, 0x00000118, 0x10000fff, 0x0510081c,
        0xd0058a15, 0x00000001, 0x00000004, 0x3f3f3f3f, 0x3f3f3f3f,
        0x0000007f, 0xdfb81020, 0x9280b212, 0x00020028, 0x5d50e188,
        0x00081fff, 0x00009b40, 0x00001120, 0x190fb515, 0x00000000,
        0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000007, 0x001fff00, 0x006f00c4, 0x03051000,
        0x00000820, 0x038919be, 0x06336f77, 0x60f6532c, 0x08f186c8,
        0x00046384, 0x00000000, 0x00000000, 0x00000000, 0x00000200,
        0x64646464, 0x3c787878, 0x000000aa, 0x00000000, 0x00001042,
        0x00000000, 0x00000040, 0x00000080, 0x000001a1, 0x000001e1,
        0x00000021, 0x00000061, 0x00000168, 0x000001a8, 0x000001e8,
        0x00000028, 0x00000068, 0x00000189, 0x000001c9, 0x00000009,
        0x00000049, 0x00000089, 0x00000170, 0x000001b0, 0x000001f0,
        0x00000030, 0x00000070, 0x00000191, 0x000001d1, 0x00000011,
        0x00000051, 0x00000091, 0x000001b8, 0x000001f8, 0x00000038,
        0x00000078, 0x00000199, 0x000001d9, 0x00000019, 0x00000059,
        0x00000099, 0x000000d9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x00000000,
        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005,
        0x00000008, 0x00000009, 0x0000000a, 0x0000000b, 0x0000000c,
        0x0000000d, 0x00000010, 0x00000011, 0x00000012, 0x00000013,
        0x00000014, 0x00000015, 0x00000018, 0x00000019, 0x0000001a,
        0x0000001b, 0x0000001c, 0x0000001d, 0x00000020, 0x00000021,
        0x00000022, 0x00000023, 0x00000024, 0x00000025, 0x00000028,
        0x00000029, 0x0000002a, 0x0000002b, 0x0000002c, 0x0000002d,
        0x00000030, 0x00000031, 0x00000032, 0x00000033, 0x00000034,
        0x00000035, 0x00000035, 0x00000035, 0x00000035, 0x00000035,
        0x00000035, 0x00000035, 0x00000035, 0x00000035, 0x00000035,
        0x00000035, 0x00000035, 0x00000035, 0x00000035, 0x00000035,
        0x00000035, 0x00000035, 0x00000035, 0x00000035, 0x00000035,
        0x00000035, 0x00000010, 0x0000001a, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000008, 0x00000440, 0xd6be4788, 0x012e8160,
        0x40806333, 0x00106c10, 0x009c4060, 0x1883800a, 0x018830c6,
        0x00000400, 0x000009b5, 0x00000000, 0x00000108, 0x3f3f3f3f,
        0x3f3f3f3f, 0x13c889af, 0x38490a20, 0x00007bb6, 0x0fff3ffc,
        0x00000001, 0x0000a000, 0x00000000, 0x0cc75380, 0x0f0f0f01,
        0xdfa91f01, 0x00418a11, 0x00000000, 0x09249126, 0x0a1a9caa,
        0x1ce739ce, 0x051701ce, 0x18010000, 0x30032602, 0x48073e06,
        0x560b4c0a, 0x641a600f, 0x7a4f6e1b, 0x8c5b7e5a, 0x9d0f96cf,
        0xb51fa69f, 0xcb3fbd07, 0x0000d7bf, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x3fffffff, 0x3fffffff, 0x3fffffff, 0x0003ffff, 0x79a8aa1f,
        0x08000000, 0x3f3f3f3f, 0x3f3f3f3f, 0x1ce739ce, 0x000001ce,
        0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x3f3f3f3f, 0x3f3f3f3f, 0x3f3f3f3f,
        0x00000000, 0x1ce739ce, 0x000000c0, 0x00180a65, 0x0510001c,
        0x00009b40, 0x012e8160, 0x09249126, 0x00180a65, 0x0510001c,
        0x00009b40, 0x012e8160, 0x09249126, 0x0001c600, 0x004b6a8e,
        0x000003ce, 0x00181400, 0x00820820, 0x066c420f, 0x0f282207,
        0x17601685, 0x1f801104, 0x37a00c03, 0x3fc40883, 0x57c00803,
        0x5fd80682, 0x7fe00482, 0x7f3c7bba, 0xf3307ff0
};

#ifdef notyet
static const uint32_t ar5416_phy_vals_5ghz_40mhz[] = {
        0x00000007, 0x000003c4, 0x00000000, 0xad848e19, 0x7d14e000,
        0x9c0a9f6b, 0x00000090, 0x00000000, 0x02020200, 0x00000e0e,
        0x0a020001, 0x0000a000, 0x00000000, 0x00000e0e, 0x00000007,
        0x00200400, 0x206a002e, 0x13721c1e, 0x001a6a65, 0x1284233c,
        0x6c48b4e4, 0x00000859, 0x7ec80d2e, 0x31395c5e, 0x0004dd10,
        0x409a4190, 0x050cb081, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x000007d0, 0x00000230, 0x10000fff, 0x0510081c,
        0xd0058a15, 0x00000001, 0x00000004, 0x3f3f3f3f, 0x3f3f3f3f,
        0x0000007f, 0xdfb81020, 0x9280b212, 0x00020028, 0x5d50e188,
        0x00081fff, 0x00009b40, 0x00001120, 0x190fb515, 0x00000000,
        0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000007, 0x001fff00, 0x006f00c4, 0x03051000,
        0x00000820, 0x038919be, 0x06336f77, 0x60f6532c, 0x08f186c8,
        0x00046384, 0x00000000, 0x00000000, 0x00000000, 0x00000200,
        0x64646464, 0x3c787878, 0x000000aa, 0x00000000, 0x00001042,
        0x00000000, 0x00000040, 0x00000080, 0x000001a1, 0x000001e1,
        0x00000021, 0x00000061, 0x00000168, 0x000001a8, 0x000001e8,
        0x00000028, 0x00000068, 0x00000189, 0x000001c9, 0x00000009,
        0x00000049, 0x00000089, 0x00000170, 0x000001b0, 0x000001f0,
        0x00000030, 0x00000070, 0x00000191, 0x000001d1, 0x00000011,
        0x00000051, 0x00000091, 0x000001b8, 0x000001f8, 0x00000038,
        0x00000078, 0x00000199, 0x000001d9, 0x00000019, 0x00000059,
        0x00000099, 0x000000d9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x00000000,
        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005,
        0x00000008, 0x00000009, 0x0000000a, 0x0000000b, 0x0000000c,
        0x0000000d, 0x00000010, 0x00000011, 0x00000012, 0x00000013,
        0x00000014, 0x00000015, 0x00000018, 0x00000019, 0x0000001a,
        0x0000001b, 0x0000001c, 0x0000001d, 0x00000020, 0x00000021,
        0x00000022, 0x00000023, 0x00000024, 0x00000025, 0x00000028,
        0x00000029, 0x0000002a, 0x0000002b, 0x0000002c, 0x0000002d,
        0x00000030, 0x00000031, 0x00000032, 0x00000033, 0x00000034,
        0x00000035, 0x00000035, 0x00000035, 0x00000035, 0x00000035,
        0x00000035, 0x00000035, 0x00000035, 0x00000035, 0x00000035,
        0x00000035, 0x00000035, 0x00000035, 0x00000035, 0x00000035,
        0x00000035, 0x00000035, 0x00000035, 0x00000035, 0x00000035,
        0x00000035, 0x00000010, 0x0000001a, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000008, 0x00000440, 0xd6be4788, 0x012e8160,
        0x40806333, 0x00106c10, 0x009c4060, 0x1883800a, 0x018830c6,
        0x00000400, 0x000009b5, 0x00000000, 0x00000210, 0x3f3f3f3f,
        0x3f3f3f3f, 0x13c889af, 0x38490a20, 0x00007bb6, 0x0fff3ffc,
        0x00000001, 0x0000a000, 0x00000000, 0x0cc75380, 0x0f0f0f01,
        0xdfa91f01, 0x00418a11, 0x00000000, 0x09249126, 0x0a1a9caa,
        0x1ce739ce, 0x051701ce, 0x18010000, 0x30032602, 0x48073e06,
        0x560b4c0a, 0x641a600f, 0x7a4f6e1b, 0x8c5b7e5a, 0x9d0f96cf,
        0xb51fa69f, 0xcb3fbcbf, 0x0000d7bf, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x3fffffff, 0x3fffffff, 0x3fffffff, 0x0003ffff, 0x79a8aa1f,
        0x08000000, 0x3f3f3f3f, 0x3f3f3f3f, 0x1ce739ce, 0x000001ce,
        0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x3f3f3f3f, 0x3f3f3f3f, 0x3f3f3f3f,
        0x00000000, 0x1ce739ce, 0x000000c0, 0x00180a65, 0x0510001c,
        0x00009b40, 0x012e8160, 0x09249126, 0x00180a65, 0x0510001c,
        0x00009b40, 0x012e8160, 0x09249126, 0x0001c600, 0x004b6a8e,
        0x000003ce, 0x00181400, 0x00820820, 0x066c420f, 0x0f282207,
        0x17601685, 0x1f801104, 0x37a00c03, 0x3fc40883, 0x57c00803,
        0x5fd80682, 0x7fe00482, 0x7f3c7bba, 0xf3307ff0
};
#endif

#ifdef notyet
static const uint32_t ar5416_phy_vals_2ghz_40mhz[] = {
        0x00000007, 0x000003c4, 0x00000000, 0xad848e19, 0x7d14e000,
        0x9c0a9f6b, 0x00000090, 0x00000000, 0x02020200, 0x00000e0e,
        0x0a020001, 0x0000a000, 0x00000000, 0x00000e0e, 0x00000007,
        0x00200400, 0x206a002e, 0x13721c24, 0x00197a68, 0x1284233c,
        0x6c48b0e4, 0x00000859, 0x7ec80d2e, 0x31395c5e, 0x0004dd20,
        0x409a4190, 0x050cb081, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000898, 0x00000268, 0x10000fff, 0x0510001c,
        0xd0058a15, 0x00000001, 0x00000004, 0x3f3f3f3f, 0x3f3f3f3f,
        0x0000007f, 0xdfb81020, 0x9280b212, 0x00020028, 0x5d50e188,
        0x00081fff, 0x00009b40, 0x00001120, 0x190fb515, 0x00000000,
        0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000007, 0x001fff00, 0x006f00c4, 0x03051000,
        0x00000820, 0x038919be, 0x06336f77, 0x60f6532c, 0x08f186c8,
        0x00046384, 0x00000000, 0x00000000, 0x00000000, 0x00000200,
        0x64646464, 0x3c787878, 0x000000aa, 0x00000000, 0x00001042,
        0x00000000, 0x00000040, 0x00000080, 0x00000141, 0x00000181,
        0x000001c1, 0x00000001, 0x00000041, 0x000001a8, 0x000001e8,
        0x00000028, 0x00000068, 0x000000a8, 0x00000169, 0x000001a9,
        0x000001e9, 0x00000029, 0x00000069, 0x00000190, 0x000001d0,
        0x00000010, 0x00000050, 0x00000090, 0x00000151, 0x00000191,
        0x000001d1, 0x00000011, 0x00000051, 0x00000198, 0x000001d8,
        0x00000018, 0x00000058, 0x00000098, 0x00000159, 0x00000199,
        0x000001d9, 0x00000019, 0x00000059, 0x00000099, 0x000000d9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x00000000,
        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005,
        0x00000008, 0x00000009, 0x0000000a, 0x0000000b, 0x0000000c,
        0x0000000d, 0x00000010, 0x00000011, 0x00000012, 0x00000013,
        0x00000014, 0x00000015, 0x00000018, 0x00000019, 0x0000001a,
        0x0000001b, 0x0000001c, 0x0000001d, 0x00000020, 0x00000021,
        0x00000022, 0x00000023, 0x00000024, 0x00000025, 0x00000028,
        0x00000029, 0x0000002a, 0x0000002b, 0x0000002c, 0x0000002d,
        0x00000030, 0x00000031, 0x00000032, 0x00000033, 0x00000034,
        0x00000035, 0x00000035, 0x00000035, 0x00000035, 0x00000035,
        0x00000035, 0x00000035, 0x00000035, 0x00000035, 0x00000035,
        0x00000035, 0x00000035, 0x00000035, 0x00000035, 0x00000035,
        0x00000035, 0x00000035, 0x00000035, 0x00000035, 0x00000035,
        0x00000035, 0x00000010, 0x0000001a, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x0000000e, 0x00000440, 0xd03e4788, 0x012a8160,
        0x40806333, 0x00106c10, 0x009c4060, 0x1883800a, 0x018830c6,
        0x00000400, 0x000009b5, 0x00000000, 0x00000210, 0x3f3f3f3f,
        0x3f3f3f3f, 0x13c889af, 0x38490a20, 0x00007bb6, 0x0fff3ffc,
        0x00000001, 0x0000a000, 0x00000000, 0x0cc75380, 0x0f0f0f01,
        0xdfa91f01, 0x00418a11, 0x00000000, 0x09249126, 0x0a1a7caa,
        0x1ce739ce, 0x051701ce, 0x18010000, 0x2e032402, 0x4a0a3c06,
        0x621a540b, 0x764f6c1b, 0x845b7a5a, 0x950f8ccf, 0xa5cf9b4f,
        0xbddfaf1f, 0xd1ffc93f, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x3fffffff, 0x3fffffff, 0x3fffffff, 0x0003ffff, 0x79a8aa1f,
        0x08000000, 0x3f3f3f3f, 0x3f3f3f3f, 0x1ce739ce, 0x000001ce,
        0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x3f3f3f3f, 0x3f3f3f3f, 0x3f3f3f3f,
        0x00000000, 0x1ce739ce, 0x000000c0, 0x00180a68, 0x0510001c,
        0x00009b40, 0x012a8160, 0x09249126, 0x00180a68, 0x0510001c,
        0x00009b40, 0x012a8160, 0x09249126, 0x0001c600, 0x004b6a8e,
        0x000003ce, 0x00181400, 0x00820820, 0x066c420f, 0x0f282207,
        0x17601685, 0x1f801104, 0x37a00c03, 0x3fc40883, 0x57c00803,
        0x5fd80682, 0x7fe00482, 0x7f3c7bba, 0xf3307ff0
};
#endif

static const uint32_t ar5416_phy_vals_2ghz_20mhz[] = {
        0x00000007, 0x00000300, 0x00000000, 0xad848e19, 0x7d14e000,
        0x9c0a9f6b, 0x00000090, 0x00000000, 0x02020200, 0x00000e0e,
        0x0a020001, 0x0000a000, 0x00000000, 0x00000e0e, 0x00000007,
        0x00200400, 0x206a002e, 0x137216a4, 0x00197a68, 0x1284233c,
        0x6c48b0e4, 0x00000859, 0x7ec80d2e, 0x31395c5e, 0x0004dd20,
        0x409a4190, 0x050cb081, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000898, 0x00000134, 0x10000fff, 0x0510001c,
        0xd0058a15, 0x00000001, 0x00000004, 0x3f3f3f3f, 0x3f3f3f3f,
        0x0000007f, 0xdfb81020, 0x9280b212, 0x00020028, 0x5d50e188,
        0x00081fff, 0x00009b40, 0x00001120, 0x190fb515, 0x00000000,
        0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000007, 0x001fff00, 0x006f00c4, 0x03051000,
        0x00000820, 0x038919be, 0x06336f77, 0x60f6532c, 0x08f186c8,
        0x00046384, 0x00000000, 0x00000000, 0x00000000, 0x00000200,
        0x64646464, 0x3c787878, 0x000000aa, 0x00000000, 0x00001042,
        0x00000000, 0x00000040, 0x00000080, 0x00000141, 0x00000181,
        0x000001c1, 0x00000001, 0x00000041, 0x000001a8, 0x000001e8,
        0x00000028, 0x00000068, 0x000000a8, 0x00000169, 0x000001a9,
        0x000001e9, 0x00000029, 0x00000069, 0x00000190, 0x000001d0,
        0x00000010, 0x00000050, 0x00000090, 0x00000151, 0x00000191,
        0x000001d1, 0x00000011, 0x00000051, 0x00000198, 0x000001d8,
        0x00000018, 0x00000058, 0x00000098, 0x00000159, 0x00000199,
        0x000001d9, 0x00000019, 0x00000059, 0x00000099, 0x000000d9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9,
        0x000000f9, 0x000000f9, 0x000000f9, 0x000000f9, 0x00000000,
        0x00000001, 0x00000002, 0x00000003, 0x00000004, 0x00000005,
        0x00000008, 0x00000009, 0x0000000a, 0x0000000b, 0x0000000c,
        0x0000000d, 0x00000010, 0x00000011, 0x00000012, 0x00000013,
        0x00000014, 0x00000015, 0x00000018, 0x00000019, 0x0000001a,
        0x0000001b, 0x0000001c, 0x0000001d, 0x00000020, 0x00000021,
        0x00000022, 0x00000023, 0x00000024, 0x00000025, 0x00000028,
        0x00000029, 0x0000002a, 0x0000002b, 0x0000002c, 0x0000002d,
        0x00000030, 0x00000031, 0x00000032, 0x00000033, 0x00000034,
        0x00000035, 0x00000035, 0x00000035, 0x00000035, 0x00000035,
        0x00000035, 0x00000035, 0x00000035, 0x00000035, 0x00000035,
        0x00000035, 0x00000035, 0x00000035, 0x00000035, 0x00000035,
        0x00000035, 0x00000035, 0x00000035, 0x00000035, 0x00000035,
        0x00000035, 0x00000010, 0x0000001a, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x0000000e, 0x00000440, 0xd03e4788, 0x012a8160,
        0x40806333, 0x00106c10, 0x009c4060, 0x1883800a, 0x018830c6,
        0x00000400, 0x000009b5, 0x00000000, 0x00000108, 0x3f3f3f3f,
        0x3f3f3f3f, 0x13c889af, 0x38490a20, 0x00007bb6, 0x0fff3ffc,
        0x00000001, 0x0000a000, 0x00000000, 0x0cc75380, 0x0f0f0f01,
        0xdfa91f01, 0x00418a11, 0x00000000, 0x09249126, 0x0a1a7caa,
        0x1ce739ce, 0x051701ce, 0x18010000, 0x2e032402, 0x4a0a3c06,
        0x621a540b, 0x764f6c1b, 0x845b7a5a, 0x950f8ccf, 0xa5cf9b4f,
        0xbddfaf1f, 0xd1ffc93f, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x3fffffff, 0x3fffffff, 0x3fffffff, 0x0003ffff, 0x79a8aa1f,
        0x08000000, 0x3f3f3f3f, 0x3f3f3f3f, 0x1ce739ce, 0x000001ce,
        0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00000000, 0x3f3f3f3f, 0x3f3f3f3f, 0x3f3f3f3f,
        0x00000000, 0x1ce739ce, 0x000000c0, 0x00180a68, 0x0510001c,
        0x00009b40, 0x012a8160, 0x09249126, 0x00180a68, 0x0510001c,
        0x00009b40, 0x012a8160, 0x09249126, 0x0001c600, 0x004b6a8e,
        0x000003ce, 0x00181400, 0x00820820, 0x066c420f, 0x0f282207,
        0x17601685, 0x1f801104, 0x37a00c03, 0x3fc40883, 0x57c00803,
        0x5fd80682, 0x7fe00482, 0x7f3c7bba, 0xf3307ff0
};

/* NB: apply AR_PHY(). */
static const uint8_t ar5416_banks_regs[] = {
        0x2c, 0x38, 0x2c, 0x3b, 0x2c, 0x38, 0x3c, 0x2c, 0x3a, 0x2c, 0x39,
        0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c,
        0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c,
        0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c,
        0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c,
        0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x38, 0x2c, 0x2c,
        0x2c, 0x3c
};

static const uint32_t ar5416_banks_vals_5ghz[] = {
        0x1e5795e5, 0x02008020, 0x02108421, 0x00000008, 0x0e73ff17,
        0x00000420, 0x01400018, 0x000001a1, 0x00000001, 0x00000013,
        0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00004000, 0x00006c00, 0x00002c00, 0x00004800,
        0x00004000, 0x00006000, 0x00001000, 0x00004000, 0x00007c00,
        0x00007c00, 0x00007c00, 0x00007c00, 0x00007c00, 0x00087c00,
        0x00007c00, 0x00005400, 0x00000c00, 0x00001800, 0x00007c00,
        0x00006c00, 0x00006c00, 0x00007c00, 0x00002c00, 0x00003c00,
        0x00003800, 0x00001c00, 0x00000800, 0x00000408, 0x00004c15,
        0x00004188, 0x0000201e, 0x00010408, 0x00000801, 0x00000c08,
        0x0000181e, 0x00001016, 0x00002800, 0x00004010, 0x0000081c,
        0x00000115, 0x00000015, 0x00000066, 0x0000001c, 0x00000000,
        0x00000004, 0x00000015, 0x0000001f, 0x00000000, 0x000000a0,
        0x00000000, 0x00000040, 0x0000001c
};

static const uint32_t ar5416_banks_vals_2ghz[] = {
        0x1e5795e5, 0x02008020, 0x02108421, 0x00000008, 0x0e73ff17,
        0x00000420, 0x01c00018, 0x000001a1, 0x00000001, 0x00000013,
        0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
        0x00000000, 0x00004000, 0x00006c00, 0x00002c00, 0x00004800,
        0x00004000, 0x00006000, 0x00001000, 0x00004000, 0x00007c00,
        0x00007c00, 0x00007c00, 0x00007c00, 0x00007c00, 0x00087c00,
        0x00007c00, 0x00005400, 0x00000c00, 0x00001800, 0x00007c00,
        0x00006c00, 0x00006c00, 0x00007c00, 0x00002c00, 0x00003c00,
        0x00003800, 0x00001c00, 0x00000800, 0x00000408, 0x00004c15,
        0x00004188, 0x0000201e, 0x00010408, 0x00000801, 0x00000c08,
        0x0000181e, 0x00001016, 0x00002800, 0x00004010, 0x0000081c,
        0x00000115, 0x00000015, 0x00000066, 0x0000001c, 0x00000000,
        0x00000004, 0x00000015, 0x0000001f, 0x00000400, 0x000000a0,
        0x00000000, 0x00000040, 0x0000001c
};

static const struct usb_devno otus_devs[] = {
        { USB_VENDOR_ACCTON,                USB_PRODUCT_ACCTON_WN7512 },
        { USB_VENDOR_ATHEROS2,                USB_PRODUCT_ATHEROS2_3CRUSBN275 },
        { USB_VENDOR_ATHEROS2,                USB_PRODUCT_ATHEROS2_TG121N },
        { USB_VENDOR_ATHEROS2,                USB_PRODUCT_ATHEROS2_AR9170 },
        { USB_VENDOR_ATHEROS2,                USB_PRODUCT_ATHEROS2_WN612 },
        { USB_VENDOR_ATHEROS2,                USB_PRODUCT_ATHEROS2_WN821NV2 },
        { USB_VENDOR_AVM,                USB_PRODUCT_AVM_FRITZWLAN },
        { USB_VENDOR_CACE,                USB_PRODUCT_CACE_AIRPCAPNX },
        { USB_VENDOR_DLINK2,                USB_PRODUCT_DLINK2_DWA130D1 },
        { USB_VENDOR_DLINK2,                USB_PRODUCT_DLINK2_DWA160A1 },
        { USB_VENDOR_DLINK2,                USB_PRODUCT_DLINK2_DWA160A2 },
        { USB_VENDOR_IODATA,                USB_PRODUCT_IODATA_WNGDNUS2 },
        { USB_VENDOR_NEC,                USB_PRODUCT_NEC_WL300NUG },
        { USB_VENDOR_NETGEAR,                USB_PRODUCT_NETGEAR_WN111V2 },
        { USB_VENDOR_NETGEAR,                USB_PRODUCT_NETGEAR_WNA1000 },
        { USB_VENDOR_NETGEAR,                USB_PRODUCT_NETGEAR_WNDA3100 },
        { USB_VENDOR_PLANEX2,                USB_PRODUCT_PLANEX2_GW_US300 },
        { USB_VENDOR_WISTRONNEWEB,        USB_PRODUCT_WISTRONNEWEB_O8494 },
        { USB_VENDOR_WISTRONNEWEB,        USB_PRODUCT_WISTRONNEWEB_WNC0600 },
        { USB_VENDOR_ZCOM,                USB_PRODUCT_ZCOM_UB81 },
        { USB_VENDOR_ZCOM,                USB_PRODUCT_ZCOM_UB82 },
        { USB_VENDOR_ZYDAS,                USB_PRODUCT_ZYDAS_ZD1221 },
        { USB_VENDOR_ZYXEL,                USB_PRODUCT_ZYXEL_NWD271N }
};

CFATTACH_DECL_NEW(otus, sizeof(struct otus_softc), otus_match, otus_attach,
    otus_detach, otus_activate);

Static int
otus_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa;

        uaa = aux;

        DPRINTFN(DBG_FN, DBG_NO_SC,
            "otus_match: vendor=%#x product=%#x revision=%#x\n",
                    uaa->uaa_vendor, uaa->uaa_product, uaa->uaa_release);

        return usb_lookup(otus_devs, uaa->uaa_vendor, uaa->uaa_product) != NULL ?
            UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

Static void
otus_attach(device_t parent, device_t self, void *aux)
{
        struct otus_softc *sc;
        struct usb_attach_arg *uaa;
        char *devinfop;
        int error;

        sc = device_private(self);

        DPRINTFN(DBG_FN, sc, "\n");

        sc->sc_dev = self;
        uaa = aux;
        sc->sc_udev = uaa->uaa_device;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(sc->sc_udev, 0);
        aprint_normal_dev(sc->sc_dev, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        cv_init(&sc->sc_task_cv, "otustsk");
        cv_init(&sc->sc_cmd_cv, "otuscmd");
        mutex_init(&sc->sc_cmd_mtx,   MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&sc->sc_task_mtx,  MUTEX_DEFAULT, IPL_NET);
        mutex_init(&sc->sc_tx_mtx,    MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&sc->sc_write_mtx, MUTEX_DEFAULT, IPL_NONE);

        usb_init_task(&sc->sc_task, otus_task, sc, 0);

        callout_init(&sc->sc_scan_to, 0);
        callout_setfunc(&sc->sc_scan_to, otus_next_scan, sc);
        callout_init(&sc->sc_calib_to, 0);
        callout_setfunc(&sc->sc_calib_to, otus_calib_to, sc);

        sc->sc_amrr.amrr_min_success_threshold =  1;
        sc->sc_amrr.amrr_max_success_threshold = 10;

        if (usbd_set_config_no(sc->sc_udev, 1, 0) != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not set configuration no\n");
                return;
        }

        /* Get the first interface handle. */
        error = usbd_device2interface_handle(sc->sc_udev, 0, &sc->sc_iface);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not get interface handle\n");
                return;
        }

        if ((error = otus_open_pipes(sc)) != 0) {
                aprint_error_dev(sc->sc_dev, "could not open pipes\n");
                return;
        }

        /*
         * We need the firmware loaded from file system to complete the attach.
         */
        config_mountroot(self, otus_attachhook);

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);
}

Static void
otus_wait_async(struct otus_softc *sc)
{

        DPRINTFN(DBG_FN, sc, "\n");

        mutex_spin_enter(&sc->sc_task_mtx);
        while (sc->sc_cmdq.queued > 0)
                cv_wait(&sc->sc_task_cv, &sc->sc_task_mtx);
        mutex_spin_exit(&sc->sc_task_mtx);
}

Static int
otus_detach(device_t self, int flags)
{
        struct otus_softc *sc;
        struct ifnet *ifp;
        int s;

        sc = device_private(self);

        DPRINTFN(DBG_FN, sc, "\n");

        s = splusb();

        sc->sc_dying = 1;

        ifp = sc->sc_ic.ic_ifp;
        if (ifp != NULL)        /* Failed to attach properly */
                otus_stop(ifp);

        usb_rem_task_wait(sc->sc_udev, &sc->sc_task, USB_TASKQ_DRIVER, NULL);
        callout_destroy(&sc->sc_scan_to);
        callout_destroy(&sc->sc_calib_to);

        if (ifp && ifp->if_flags != 0) { /* if_attach() has been called. */
                ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
                bpf_detach(ifp);
                ieee80211_ifdetach(&sc->sc_ic);
                if_detach(ifp);
        }
        otus_close_pipes(sc);
        splx(s);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        mutex_destroy(&sc->sc_write_mtx);
        mutex_destroy(&sc->sc_tx_mtx);
        mutex_destroy(&sc->sc_task_mtx);
        mutex_destroy(&sc->sc_cmd_mtx);
        cv_destroy(&sc->sc_task_cv);
        cv_destroy(&sc->sc_cmd_cv);

        return 0;
}

Static int
otus_activate(device_t self, devact_t act)
{
        struct otus_softc *sc;

        sc = device_private(self);

        DPRINTFN(DBG_FN, sc, "%d\n", act);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                if_deactivate(sc->sc_ic.ic_ifp);
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

Static void
otus_attachhook(device_t arg)
{
        struct otus_softc *sc;
        struct ieee80211com *ic;
        struct ifnet *ifp;
        usb_device_request_t req;
        uint32_t in, out;
        int error;

        sc = device_private(arg);

        DPRINTFN(DBG_FN, sc, "\n");

        ic = &sc->sc_ic;
        ifp = &sc->sc_if;

        error = otus_load_firmware(sc, "otus-init", AR_FW_INIT_ADDR);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev, "could not load init firmware\n");
                return;
        }
        usbd_delay_ms(sc->sc_udev, 1000);

        error = otus_load_firmware(sc, "otus-main", AR_FW_MAIN_ADDR);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev, "could not load main firmware\n");
                return;
        }

        /* Tell device that firmware transfer is complete. */
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = AR_FW_DOWNLOAD_COMPLETE;
        USETW(req.wValue, 0);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);
        if (usbd_do_request(sc->sc_udev, &req, NULL) != 0) {
                aprint_error_dev(sc->sc_dev,
                    "firmware initialization failed\n");
                return;
        }

        /* Send an ECHO command to check that everything is settled. */
        in = 0xbadc0ffe;
        if (otus_cmd(sc, AR_CMD_ECHO, &in, sizeof(in), &out) != 0) {
                aprint_error_dev(sc->sc_dev, "echo command failed\n");
                return;
        }
        if (in != out) {
                aprint_error_dev(sc->sc_dev,
                    "echo reply mismatch: 0x%08x!=0x%08x\n", in, out);
                return;
        }

        /* Read entire EEPROM. */
        if (otus_read_eeprom(sc) != 0) {
                aprint_error_dev(sc->sc_dev, "could not read EEPROM\n");
                return;
        }

        sc->sc_txmask = sc->sc_eeprom.baseEepHeader.txMask;
        sc->sc_rxmask = sc->sc_eeprom.baseEepHeader.rxMask;
        sc->sc_capflags = sc->sc_eeprom.baseEepHeader.opCapFlags;
        IEEE80211_ADDR_COPY(ic->ic_myaddr, sc->sc_eeprom.baseEepHeader.macAddr);
        sc->sc_led_newstate = otus_led_newstate_type3;        /* XXX */

        aprint_normal_dev(sc->sc_dev,
            "MAC/BBP AR9170, RF AR%X, MIMO %dT%dR, address %s\n",
            (sc->sc_capflags & AR5416_OPFLAGS_11A) ?
                0x9104 : ((sc->sc_txmask == 0x5) ? 0x9102 : 0x9101),
            (sc->sc_txmask == 0x5) ? 2 : 1, (sc->sc_rxmask == 0x5) ? 2 : 1,
            ether_sprintf(ic->ic_myaddr));

        /*
         * Setup the 802.11 device.
         */
        ic->ic_ifp = ifp;
        ic->ic_phytype = IEEE80211_T_OFDM;        /* not only, but not used */
        ic->ic_opmode = IEEE80211_M_STA;        /* default to BSS mode */
        ic->ic_state = IEEE80211_S_INIT;

        /* Set device capabilities. */
        ic->ic_caps =
            IEEE80211_C_MONITOR |        /* monitor mode supported */
            IEEE80211_C_SHPREAMBLE |        /* short preamble supported */
            IEEE80211_C_SHSLOT |        /* short slot time supported */
            IEEE80211_C_WPA;                /* 802.11i */

        if (sc->sc_eeprom.baseEepHeader.opCapFlags & AR5416_OPFLAGS_11G) {
                /* Set supported .11b and .11g rates. */
                ic->ic_sup_rates[IEEE80211_MODE_11B] =
                    ieee80211_std_rateset_11b;
                ic->ic_sup_rates[IEEE80211_MODE_11G] =
                    ieee80211_std_rateset_11g;
        }
        if (sc->sc_eeprom.baseEepHeader.opCapFlags & AR5416_OPFLAGS_11A) {
                /* Set supported .11a rates. */
                ic->ic_sup_rates[IEEE80211_MODE_11A] =
                    ieee80211_std_rateset_11a;
        }

        /* Build the list of supported channels. */
        otus_get_chanlist(sc);

        ifp->if_softc = sc;
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_init  = otus_init;
        ifp->if_ioctl = otus_ioctl;
        ifp->if_start = otus_start;
        ifp->if_watchdog = otus_watchdog;
        IFQ_SET_READY(&ifp->if_snd);
        memcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);

        if_attach(ifp);

        ieee80211_ifattach(ic);

        ic->ic_node_alloc = otus_node_alloc;
        ic->ic_newassoc          = otus_newassoc;
        ic->ic_updateslot = otus_updateslot;
#ifdef HAVE_EDCA
        ic->ic_updateedca = otus_updateedca;
#endif /* HAVE_EDCA */
#ifdef notyet
        ic->ic_set_key = otus_set_key;
        ic->ic_delete_key = otus_delete_key;
#endif /* notyet */

        /* Override state transition machine. */
        sc->sc_newstate = ic->ic_newstate;
        ic->ic_newstate = otus_newstate;

        /* XXX media locking needs revisiting */
        mutex_init(&sc->sc_media_mtx, MUTEX_DEFAULT, IPL_SOFTUSB);
        ieee80211_media_init_with_lock(ic,
            otus_media_change, ieee80211_media_status, &sc->sc_media_mtx);

        bpf_attach2(ifp, DLT_IEEE802_11_RADIO,
            sizeof(struct ieee80211_frame) + IEEE80211_RADIOTAP_HDRLEN,
            &sc->sc_drvbpf);

        sc->sc_rxtap_len = sizeof(sc->sc_rxtapu);
        sc->sc_rxtap.wr_ihdr.it_len = htole16(sc->sc_rxtap_len);
        sc->sc_rxtap.wr_ihdr.it_present = htole32(OTUS_RX_RADIOTAP_PRESENT);

        sc->sc_txtap_len = sizeof(sc->sc_txtapu);
        sc->sc_txtap.wt_ihdr.it_len = htole16(sc->sc_txtap_len);
        sc->sc_txtap.wt_ihdr.it_present = htole32(OTUS_TX_RADIOTAP_PRESENT);

        ieee80211_announce(ic);
}

Static void
otus_get_chanlist(struct otus_softc *sc)
{
        struct ieee80211com *ic;
        uint8_t chan;
        int i;

#ifdef OTUS_DEBUG
        /* XXX regulatory domain. */
        uint16_t domain = le16toh(sc->sc_eeprom.baseEepHeader.regDmn[0]);

        DPRINTFN(DBG_FN | DBG_INIT, sc, "regdomain=0x%04x\n", domain);
#endif

        ic = &sc->sc_ic;
        if (sc->sc_eeprom.baseEepHeader.opCapFlags & AR5416_OPFLAGS_11G) {
                for (i = 0; i < 14; i++) {
                        chan = ar_chans[i];
                        ic->ic_channels[chan].ic_freq =
                            ieee80211_ieee2mhz(chan, IEEE80211_CHAN_2GHZ);
                        ic->ic_channels[chan].ic_flags =
                            IEEE80211_CHAN_CCK | IEEE80211_CHAN_OFDM |
                            IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ;
                }
        }
        if (sc->sc_eeprom.baseEepHeader.opCapFlags & AR5416_OPFLAGS_11A) {
                for (i = 14; i < __arraycount(ar_chans); i++) {
                        chan = ar_chans[i];
                        ic->ic_channels[chan].ic_freq =
                            ieee80211_ieee2mhz(chan, IEEE80211_CHAN_5GHZ);
                        ic->ic_channels[chan].ic_flags = IEEE80211_CHAN_A;
                }
        }
}

Static int
otus_load_firmware(struct otus_softc *sc, const char *name, uint32_t addr)
{
        usb_device_request_t req;
        firmware_handle_t fh;
        uint8_t *ptr;
        uint8_t *fw;
        size_t size;
        int mlen, error;

        DPRINTFN(DBG_FN, sc, "\n");

        if ((error = firmware_open("if_otus", name, &fh)) != 0)
                return error;

        size = firmware_get_size(fh);
        if ((fw = firmware_malloc(size)) == NULL) {
                firmware_close(fh);
                return ENOMEM;
        }
        if ((error = firmware_read(fh, 0, fw, size)) != 0)
                firmware_free(fw, size);
        firmware_close(fh);
        if (error)
                return error;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = AR_FW_DOWNLOAD;
        USETW(req.wIndex, 0);

        ptr = fw;
        addr >>= 8;
        while (size > 0) {
                mlen = MIN(size, 4096);

                USETW(req.wValue, addr);
                USETW(req.wLength, mlen);
                if (usbd_do_request(sc->sc_udev, &req, ptr) != 0) {
                        error = EIO;
                        break;
                }
                addr += mlen >> 8;
                ptr  += mlen;
                size -= mlen;
        }
        free(fw, M_DEVBUF);
        return error;
}

Static int
otus_open_pipes(struct otus_softc *sc)
{
        usb_endpoint_descriptor_t *ed;
        int i, error;

        DPRINTFN(DBG_FN, sc, "\n");

        error = usbd_open_pipe(sc->sc_iface, AR_EPT_BULK_RX_NO, 0,
            &sc->sc_data_rx_pipe);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev, "could not open Rx bulk pipe\n");
                goto fail;
        }

        ed = usbd_get_endpoint_descriptor(sc->sc_iface, AR_EPT_INTR_RX_NO);
        if (ed == NULL) {
                aprint_error_dev(sc->sc_dev,
                    "could not retrieve Rx intr pipe descriptor\n");
                goto fail;
        }
        sc->sc_ibuf_size = UGETW(ed->wMaxPacketSize);
        if (sc->sc_ibuf_size == 0) {
                aprint_error_dev(sc->sc_dev,
                    "invalid Rx intr pipe descriptor\n");
                goto fail;
        }
        sc->sc_ibuf = kmem_alloc(sc->sc_ibuf_size, KM_SLEEP);
        error = usbd_open_pipe_intr(sc->sc_iface, AR_EPT_INTR_RX_NO,
            USBD_SHORT_XFER_OK, &sc->sc_cmd_rx_pipe, sc, sc->sc_ibuf,
            sc->sc_ibuf_size, otus_intr, USBD_DEFAULT_INTERVAL);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev, "could not open Rx intr pipe\n");
                goto fail;
        }

        error = usbd_open_pipe(sc->sc_iface, AR_EPT_BULK_TX_NO, 0,
            &sc->sc_data_tx_pipe);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev, "could not open Tx bulk pipe\n");
                goto fail;
        }

        error = usbd_open_pipe(sc->sc_iface, AR_EPT_INTR_TX_NO, 0,
            &sc->sc_cmd_tx_pipe);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev, "could not open Tx intr pipe\n");
                goto fail;
        }

        if (otus_alloc_tx_cmd(sc) != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not allocate command xfer\n");
                goto fail;
        }

        if (otus_alloc_tx_data_list(sc)) {
                aprint_error_dev(sc->sc_dev, "could not allocate Tx xfers\n");
                goto fail;
        }

        if (otus_alloc_rx_data_list(sc)) {
                aprint_error_dev(sc->sc_dev, "could not allocate Rx xfers\n");
                goto fail;
        }

        for (i = 0; i < OTUS_RX_DATA_LIST_COUNT; i++) {
                struct otus_rx_data *data = &sc->sc_rx_data[i];

                usbd_setup_xfer(data->xfer, data, data->buf, OTUS_RXBUFSZ,
                    USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, otus_rxeof);
                error = usbd_transfer(data->xfer);
                if (error != USBD_IN_PROGRESS && error != 0) {
                        aprint_error_dev(sc->sc_dev,
                            "could not queue Rx xfer\n");
                        goto fail;
                }
        }
        return 0;

 fail:        otus_close_pipes(sc);
        return error;
}

Static void
otus_close_pipes(struct otus_softc *sc)
{

        DPRINTFN(DBG_FN, sc, "\n");

        otus_free_tx_cmd(sc);
        otus_free_tx_data_list(sc);
        otus_free_rx_data_list(sc);

        if (sc->sc_data_rx_pipe != NULL)
                usbd_close_pipe(sc->sc_data_rx_pipe);
        if (sc->sc_cmd_rx_pipe != NULL) {
                usbd_abort_pipe(sc->sc_cmd_rx_pipe);
                usbd_close_pipe(sc->sc_cmd_rx_pipe);
        }
        if (sc->sc_ibuf != NULL)
                kmem_free(sc->sc_ibuf, sc->sc_ibuf_size);
        if (sc->sc_data_tx_pipe != NULL)
                usbd_close_pipe(sc->sc_data_tx_pipe);
        if (sc->sc_cmd_tx_pipe != NULL)
                usbd_close_pipe(sc->sc_cmd_tx_pipe);
}

Static int
otus_alloc_tx_cmd(struct otus_softc *sc)
{
        struct otus_tx_cmd *cmd;

        DPRINTFN(DBG_FN, sc, "\n");

        cmd = &sc->sc_tx_cmd;

        int error = usbd_create_xfer(sc->sc_cmd_tx_pipe, OTUS_MAX_TXCMDSZ,
            USBD_FORCE_SHORT_XFER, 0, &cmd->xfer);
        if (error)
                return error;

        cmd->buf = usbd_get_buffer(cmd->xfer);

        return 0;
}

Static void
otus_free_tx_cmd(struct otus_softc *sc)
{

        DPRINTFN(DBG_FN, sc, "\n");

        if (sc->sc_cmd_tx_pipe == NULL)
                return;

        /* Make sure no transfers are pending. */
        usbd_abort_pipe(sc->sc_cmd_tx_pipe);

        mutex_enter(&sc->sc_cmd_mtx);
        if (sc->sc_tx_cmd.xfer != NULL)
                usbd_destroy_xfer(sc->sc_tx_cmd.xfer);
        sc->sc_tx_cmd.xfer = NULL;
        sc->sc_tx_cmd.buf  = NULL;
        mutex_exit(&sc->sc_cmd_mtx);
}

Static int
otus_alloc_tx_data_list(struct otus_softc *sc)
{
        struct otus_tx_data *data;
        int i, error;

        DPRINTFN(DBG_FN, sc, "\n");

        mutex_enter(&sc->sc_tx_mtx);
        error = 0;
        TAILQ_INIT(&sc->sc_tx_free_list);
        for (i = 0; i < OTUS_TX_DATA_LIST_COUNT; i++) {
                data = &sc->sc_tx_data[i];

                data->sc = sc;        /* Backpointer for callbacks. */

                error = usbd_create_xfer(sc->sc_data_tx_pipe, OTUS_TXBUFSZ,
                    USBD_FORCE_SHORT_XFER, 0, &data->xfer);
                if (error) {
                        aprint_error_dev(sc->sc_dev,
                            "could not allocate xfer\n");
                        break;
                }
                data->buf = usbd_get_buffer(data->xfer);
                /* Append this Tx buffer to our free list. */
                TAILQ_INSERT_TAIL(&sc->sc_tx_free_list, data, next);
        }
        if (error != 0)
                otus_free_tx_data_list(sc);
        mutex_exit(&sc->sc_tx_mtx);
        return error;
}

Static void
otus_free_tx_data_list(struct otus_softc *sc)
{
        int i;

        DPRINTFN(DBG_FN, sc, "\n");

        if (sc->sc_data_tx_pipe == NULL)
                return;

        /* Make sure no transfers are pending. */
        usbd_abort_pipe(sc->sc_data_tx_pipe);

        for (i = 0; i < OTUS_TX_DATA_LIST_COUNT; i++) {
                if (sc->sc_tx_data[i].xfer != NULL)
                        usbd_destroy_xfer(sc->sc_tx_data[i].xfer);
        }
}

Static int
otus_alloc_rx_data_list(struct otus_softc *sc)
{
        struct otus_rx_data *data;
        int i, error;

        DPRINTFN(DBG_FN, sc, "\n");

        for (i = 0; i < OTUS_RX_DATA_LIST_COUNT; i++) {
                data = &sc->sc_rx_data[i];

                data->sc = sc;        /* Backpointer for callbacks. */

                error = usbd_create_xfer(sc->sc_data_rx_pipe, OTUS_RXBUFSZ,
                   0, 0, &data->xfer);

                if (error) {
                        aprint_error_dev(sc->sc_dev,
                            "could not allocate xfer\n");
                        goto fail;
                }
                data->buf = usbd_get_buffer(data->xfer);
        }
        return 0;

fail:        otus_free_rx_data_list(sc);
        return error;
}

Static void
otus_free_rx_data_list(struct otus_softc *sc)
{
        int i;

        DPRINTFN(DBG_FN, sc, "\n");

        if (sc->sc_data_rx_pipe == NULL)
                return;

        /* Make sure no transfers are pending. */
        usbd_abort_pipe(sc->sc_data_rx_pipe);

        for (i = 0; i < OTUS_RX_DATA_LIST_COUNT; i++)
                if (sc->sc_rx_data[i].xfer != NULL)
                        usbd_destroy_xfer(sc->sc_rx_data[i].xfer);
}

Static void
otus_next_scan(void *arg)
{
        struct otus_softc *sc;

        sc = arg;

        DPRINTFN(DBG_FN, sc, "\n");

        if (sc->sc_dying)
                return;

        if (sc->sc_ic.ic_state == IEEE80211_S_SCAN)
                ieee80211_next_scan(&sc->sc_ic);
}

Static void
otus_task(void *arg)
{
        struct otus_softc *sc;
        struct otus_host_cmd_ring *ring;
        struct otus_host_cmd *cmd;

        sc = arg;

        DPRINTFN(DBG_FN, sc, "\n");

        /* Process host commands. */
        mutex_spin_enter(&sc->sc_task_mtx);
        ring = &sc->sc_cmdq;
        while (ring->next != ring->cur) {
                cmd = &ring->cmd[ring->next];
                mutex_spin_exit(&sc->sc_task_mtx);

                /* Callback. */
                DPRINTFN(DBG_CMD, sc, "cb=%p queued=%d\n", cmd->cb,
                    ring->queued);
                cmd->cb(sc, cmd->data);

                mutex_spin_enter(&sc->sc_task_mtx);
                ring->queued--;
                ring->next = (ring->next + 1) % OTUS_HOST_CMD_RING_COUNT;
        }
        cv_signal(&sc->sc_task_cv);
        mutex_spin_exit(&sc->sc_task_mtx);
}

Static void
otus_do_async(struct otus_softc *sc, void (*cb)(struct otus_softc *, void *),
    void *arg, int len)
{
        struct otus_host_cmd_ring *ring;
        struct otus_host_cmd *cmd;
        bool sched = false;

        DPRINTFN(DBG_FN, sc, "cb=%p\n", cb);

        mutex_spin_enter(&sc->sc_task_mtx);
        ring = &sc->sc_cmdq;
        cmd = &ring->cmd[ring->cur];
        cmd->cb = cb;
        KASSERT(len <= sizeof(cmd->data));
        memcpy(cmd->data, arg, len);
        ring->cur = (ring->cur + 1) % OTUS_HOST_CMD_RING_COUNT;

        /* If there is no pending command already, schedule a task. */
        if (++ring->queued == 1) {
                sched = true;
        }
        cv_signal(&sc->sc_task_cv);
        mutex_spin_exit(&sc->sc_task_mtx);
        if (sched)
                usb_add_task(sc->sc_udev, &sc->sc_task, USB_TASKQ_DRIVER);
}

Static int
otus_newstate(struct ieee80211com *ic, enum ieee80211_state nstate, int arg)
{
        struct otus_softc *sc;
        struct otus_cmd_newstate cmd;

        sc = ic->ic_ifp->if_softc;

        DPRINTFN(DBG_FN | DBG_STM, sc, "nstate=%s(%d), arg=%d\n",
            ieee80211_state_name[nstate], nstate, arg);

        /* Do it in a process context. */
        cmd.state = nstate;
        cmd.arg = arg;
        otus_do_async(sc, otus_newstate_cb, &cmd, sizeof(cmd));
        return 0;
}

Static void
otus_newstate_cb(struct otus_softc *sc, void *arg)
{
        struct otus_cmd_newstate *cmd;
        struct ieee80211com *ic;
        struct ieee80211_node *ni;
        enum ieee80211_state nstate;
        int s;

        cmd = arg;
        ic = &sc->sc_ic;
        ni = ic->ic_bss;
        nstate = cmd->state;

#ifdef OTUS_DEBUG
        enum ieee80211_state ostate = ostate = ic->ic_state;
        DPRINTFN(DBG_FN | DBG_STM, sc, "%s(%d)->%s(%d)\n",
            ieee80211_state_name[ostate], ostate,
            ieee80211_state_name[nstate], nstate);
#endif

        s = splnet();

        callout_halt(&sc->sc_scan_to, NULL);
        callout_halt(&sc->sc_calib_to, NULL);

        mutex_enter(&sc->sc_write_mtx);

        switch (nstate) {
        case IEEE80211_S_INIT:
                break;

        case IEEE80211_S_SCAN:
                otus_set_chan(sc, ic->ic_curchan, 0);
                if (!sc->sc_dying)
                        callout_schedule(&sc->sc_scan_to, hz / 5);
                break;

        case IEEE80211_S_AUTH:
        case IEEE80211_S_ASSOC:
                otus_set_chan(sc, ic->ic_curchan, 0);
                break;

        case IEEE80211_S_RUN:
                otus_set_chan(sc, ic->ic_curchan, 1);

                switch (ic->ic_opmode) {
                case IEEE80211_M_STA:
                        otus_updateslot_cb_locked(sc);
                        otus_set_bssid(sc, ni->ni_bssid);

                        /* Fake a join to init the Tx rate. */
                        otus_newassoc(ni, 1);

                        /* Start calibration timer. */
                        if (!sc->sc_dying)
                                callout_schedule(&sc->sc_calib_to, hz);
                        break;

                case IEEE80211_M_IBSS:
                case IEEE80211_M_AHDEMO:
                case IEEE80211_M_HOSTAP:
                case IEEE80211_M_MONITOR:
                        break;
                }
                break;
        }
        (void)sc->sc_newstate(ic, nstate, cmd->arg);
        sc->sc_led_newstate(sc);
        mutex_exit(&sc->sc_write_mtx);

        splx(s);
}

Static int
otus_cmd(struct otus_softc *sc, uint8_t code, const void *idata, int ilen,
    void *odata)
{
        struct otus_tx_cmd *cmd;
        struct ar_cmd_hdr *hdr;
        int xferlen, error;

        DPRINTFN(DBG_FN, sc, "\n");

        cmd = &sc->sc_tx_cmd;

        mutex_enter(&sc->sc_cmd_mtx);

        /* Always bulk-out a multiple of 4 bytes. */
        xferlen = roundup2(sizeof(*hdr) + ilen, 4);

        hdr = (void *)cmd->buf;
        if (hdr == NULL) {        /* we may have been freed while detaching */
                mutex_exit(&sc->sc_cmd_mtx);
                DPRINTFN(DBG_CMD, sc, "tx_cmd freed with commands pending\n");
                return 0;
        }
        hdr->code  = code;
        hdr->len   = ilen;
        hdr->token = ++cmd->token;        /* Don't care about endianness. */
        KASSERT(sizeof(hdr) + ilen <= OTUS_MAX_TXCMDSZ);
        memcpy(cmd->buf + sizeof(hdr[0]), idata, ilen);

        DPRINTFN(DBG_CMD, sc, "sending command code=0x%02x len=%d token=%d\n",
            code, ilen, hdr->token);

        cmd->odata = odata;
        cmd->done = 0;
        usbd_setup_xfer(cmd->xfer, cmd, cmd->buf, xferlen,
            USBD_FORCE_SHORT_XFER, OTUS_CMD_TIMEOUT, NULL);
        error = usbd_sync_transfer(cmd->xfer);
        if (error != 0) {
                mutex_exit(&sc->sc_cmd_mtx);
#if defined(DIAGNOSTIC) || defined(OTUS_DEBUG)        /* XXX: kill some noise */
                aprint_error_dev(sc->sc_dev,
                    "could not send command %#x (error=%s)\n",
                    code, usbd_errstr(error));
#endif
                return EIO;
        }
        if (!cmd->done)
                error = cv_timedwait_sig(&sc->sc_cmd_cv, &sc->sc_cmd_mtx, hz);
        cmd->odata = NULL;        /* In case answer is received too late. */
        mutex_exit(&sc->sc_cmd_mtx);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev,
                    "timeout waiting for command 0x%02x reply\n", code);
        }
        return error;
}

Static void
otus_write(struct otus_softc *sc, uint32_t reg, uint32_t val)
{

        DPRINTFN(DBG_FN | DBG_REG, sc, "reg=%#x, val=%#x\n", reg, val);

        KASSERT(mutex_owned(&sc->sc_write_mtx));
        KASSERT(sc->sc_write_idx < __arraycount(sc->sc_write_buf));

        sc->sc_write_buf[sc->sc_write_idx].reg = htole32(reg);
        sc->sc_write_buf[sc->sc_write_idx].val = htole32(val);

        if (++sc->sc_write_idx >= __arraycount(sc->sc_write_buf))
                (void)otus_write_barrier(sc);
}

Static int
otus_write_barrier(struct otus_softc *sc)
{
        int error;

        DPRINTFN(DBG_FN, sc, "\n");

        KASSERT(mutex_owned(&sc->sc_write_mtx));
        KASSERT(sc->sc_write_idx <= __arraycount(sc->sc_write_buf));

        if (sc->sc_write_idx == 0)
                return 0;        /* Nothing to flush. */

        error = otus_cmd(sc, AR_CMD_WREG, sc->sc_write_buf,
            sizeof(sc->sc_write_buf[0]) * sc->sc_write_idx, NULL);

        sc->sc_write_idx = 0;
        if (error)
                DPRINTFN(DBG_REG, sc, "error=%d\n", error);
        return error;
}

Static struct ieee80211_node *
otus_node_alloc(struct ieee80211_node_table *ntp)
{
        struct otus_node *on;

        DPRINTFN(DBG_FN, DBG_NO_SC, "\n");

        on = malloc(sizeof(*on), M_DEVBUF, M_NOWAIT | M_ZERO);
        return on ? &on->ni : NULL;
}

Static int
otus_media_change(struct ifnet *ifp)
{
        struct otus_softc *sc;
        struct ieee80211com *ic;
        uint8_t rate, ridx;
        int error;

        sc = ifp->if_softc;

        DPRINTFN(DBG_FN, sc, "\n");

        error = ieee80211_media_change(ifp);
        if (error != ENETRESET)
                return error;

        ic = &sc->sc_ic;
        if (ic->ic_fixed_rate != -1) {
                rate = ic->ic_sup_rates[ic->ic_curmode].
                    rs_rates[ic->ic_fixed_rate] & IEEE80211_RATE_VAL;
                for (ridx = 0; ridx <= OTUS_RIDX_MAX; ridx++)
                        if (otus_rates[ridx].rate == rate)
                                break;
                sc->sc_fixed_ridx = ridx;
        }

        if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
                error = otus_init(ifp);

        return error;
}

Static int
otus_read_eeprom(struct otus_softc *sc)
{
        uint32_t regs[8], reg;
        uint8_t *eep;
        int i, j, error;

        DPRINTFN(DBG_FN, sc, "\n");

        KASSERT(sizeof(sc->sc_eeprom) % 32 == 0);

        /* Read EEPROM by blocks of 32 bytes. */
        eep = (uint8_t *)&sc->sc_eeprom;
        reg = AR_EEPROM_OFFSET;
        for (i = 0; i < sizeof(sc->sc_eeprom) / 32; i++) {
                for (j = 0; j < 8; j++, reg += 4)
                        regs[j] = htole32(reg);
                error = otus_cmd(sc, AR_CMD_RREG, regs, sizeof(regs), eep);
                if (error != 0)
                        break;
                eep += 32;
        }
        return error;
}

Static void
otus_newassoc(struct ieee80211_node *ni, int isnew)
{
        struct ieee80211_rateset *rs;
        struct otus_softc *sc;
        struct otus_node *on;
        uint8_t rate;
        int ridx, i;

        sc = ni->ni_ic->ic_ifp->if_softc;

        DPRINTFN(DBG_FN, sc, "isnew=%d addr=%s\n",
            isnew, ether_sprintf(ni->ni_macaddr));

        on = (void *)ni;
        ieee80211_amrr_node_init(&sc->sc_amrr, &on->amn);
        /* Start at lowest available bit-rate, AMRR will raise. */
        ni->ni_txrate = 0;
        rs = &ni->ni_rates;
        for (i = 0; i < rs->rs_nrates; i++) {
                rate = rs->rs_rates[i] & IEEE80211_RATE_VAL;
                /* Convert 802.11 rate to hardware rate index. */
                for (ridx = 0; ridx <= OTUS_RIDX_MAX; ridx++)
                        if (otus_rates[ridx].rate == rate)
                                break;
                on->ridx[i] = ridx;
                DPRINTFN(DBG_INIT, sc, "rate=0x%02x ridx=%d\n",
                    rs->rs_rates[i], on->ridx[i]);
        }
}

/* ARGSUSED */
Static void
otus_intr(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
#if 0
        struct otus_softc *sc;
        int len;

        sc = priv;

        DPRINTFN(DBG_FN, sc, "\n");

        /*
         * The Rx intr pipe is unused with current firmware.  Notifications
         * and replies to commands are sent through the Rx bulk pipe instead
         * (with a magic PLCP header.)
         */
        if (__predict_false(status != USBD_NORMAL_COMPLETION)) {
                DPRINTFN(DBG_INTR, sc, "status=%d\n", status);
                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->sc_cmd_rx_pipe);
                return;
        }
        usbd_get_xfer_status(xfer, NULL, NULL, &len, NULL);

        otus_cmd_rxeof(sc, sc->sc_ibuf, len);
#endif
}

Static void
otus_cmd_rxeof(struct otus_softc *sc, uint8_t *buf, int len)
{
        struct ieee80211com *ic;
        struct otus_tx_cmd *cmd;
        struct ar_cmd_hdr *hdr;
        int s;

        DPRINTFN(DBG_FN, sc, "\n");

        ic = &sc->sc_ic;

        if (__predict_false(len < sizeof(*hdr))) {
                DPRINTFN(DBG_RX, sc, "cmd too small %d\n", len);
                return;
        }
        hdr = (void *)buf;
        if (__predict_false(sizeof(*hdr) + hdr->len > len ||
            sizeof(*hdr) + hdr->len > 64)) {
                DPRINTFN(DBG_RX, sc, "cmd too large %d\n", hdr->len);
                return;
        }

        if ((hdr->code & 0xc0) != 0xc0) {
                DPRINTFN(DBG_RX, sc, "received reply code=0x%02x len=%d token=%d\n",
                    hdr->code, hdr->len, hdr->token);
                mutex_enter(&sc->sc_cmd_mtx);
                cmd = &sc->sc_tx_cmd;
                if (__predict_false(hdr->token != cmd->token)) {
                        mutex_exit(&sc->sc_cmd_mtx);
                        return;
                }
                /* Copy answer into caller's supplied buffer. */
                if (cmd->odata != NULL)
                        memcpy(cmd->odata, &hdr[1], hdr->len);
                cmd->done = 1;
                cv_signal(&sc->sc_cmd_cv);
                mutex_exit(&sc->sc_cmd_mtx);
                return;
        }

        /* Received unsolicited notification. */
        DPRINTFN(DBG_RX, sc, "received notification code=0x%02x len=%d\n",
            hdr->code, hdr->len);
        switch (hdr->code & 0x3f) {
        case AR_EVT_BEACON:
                break;
        case AR_EVT_TX_COMP:
        {
                struct ar_evt_tx_comp *tx;
                struct ieee80211_node *ni;
                struct otus_node *on;

                tx = (void *)&hdr[1];

                DPRINTFN(DBG_RX, sc, "tx completed %s status=%d phy=%#x\n",
                    ether_sprintf(tx->macaddr), le16toh(tx->status),
                    le32toh(tx->phy));
                s = splnet();
#ifdef notyet
#ifndef IEEE80211_STA_ONLY
                if (ic->ic_opmode != IEEE80211_M_STA) {
                        ni = ieee80211_find_node(ic, tx->macaddr);
                        if (__predict_false(ni == NULL)) {
                                splx(s);
                                break;
                        }
                } else
#endif
#endif
                        ni = ic->ic_bss;
                /* Update rate control statistics. */
                on = (void *)ni;
                /* NB: we do not set the TX_MAC_RATE_PROBING flag. */
                if (__predict_true(tx->status != 0))
                        on->amn.amn_retrycnt++;
                splx(s);
                break;
        }
        case AR_EVT_TBTT:
                break;
        }
}

Static void
otus_sub_rxeof(struct otus_softc *sc, uint8_t *buf, int len)
{
        struct ieee80211com *ic;
        struct ifnet *ifp;
        struct ieee80211_node *ni;
        struct ar_rx_tail *tail;
        struct ieee80211_frame *wh;
        struct mbuf *m;
        uint8_t *plcp;
        int s, mlen, align;

        DPRINTFN(DBG_FN, sc, "\n");

        ic = &sc->sc_ic;
        ifp = ic->ic_ifp;

        if (__predict_false(len < AR_PLCP_HDR_LEN)) {
                DPRINTFN(DBG_RX, sc, "sub-xfer too short %d\n", len);
                return;
        }
        plcp = buf;

        /* All bits in the PLCP header are set to 1 for non-MPDU. */
        if (memcmp(plcp, AR_PLCP_HDR_INTR, AR_PLCP_HDR_LEN) == 0) {
                otus_cmd_rxeof(sc, plcp + AR_PLCP_HDR_LEN,
                    len - AR_PLCP_HDR_LEN);
                return;
        }

        /* Received MPDU. */
        if (__predict_false(len < AR_PLCP_HDR_LEN + sizeof(*tail))) {
                DPRINTFN(DBG_RX, sc, "MPDU too short %d\n", len);
                if_statinc(ifp, if_ierrors);
                return;
        }
        tail = (void *)(plcp + len - sizeof(*tail));
        wh = (void *)(plcp + AR_PLCP_HDR_LEN);

        /* Discard error frames. */
        if (__predict_false((tail->error & sc->sc_rx_error_msk) != 0)) {
                DPRINTFN(DBG_RX, sc, "error frame 0x%02x\n", tail->error);
                if (tail->error & AR_RX_ERROR_FCS) {
                        DPRINTFN(DBG_RX, sc, "bad FCS\n");
                } else if (tail->error & AR_RX_ERROR_MMIC) {
                        /* Report Michael MIC failures to net80211. */
                        ieee80211_notify_michael_failure(ic, wh, 0 /* XXX: keyix */);
                }
                if_statinc(ifp, if_ierrors);
                return;
        }
        /* Compute MPDU's length. */
        mlen = len - AR_PLCP_HDR_LEN - sizeof(*tail);
        if (__predict_false(mlen < IEEE80211_CRC_LEN)) {
                if_statinc(ifp, if_ierrors);
                return;
        }
        mlen -= IEEE80211_CRC_LEN;        /* strip 802.11 FCS */
        /* Make sure there's room for an 802.11 header. */
        /*
         * XXX: This will drop most control packets.  Do we really
         * want this in IEEE80211_M_MONITOR mode?
         */
        if (__predict_false(mlen < sizeof(*wh))) {
                if_statinc(ifp, if_ierrors);
                return;
        }

        /* Provide a 32-bit aligned protocol header to the stack. */
        align = (ieee80211_has_qos(wh) ^ ieee80211_has_addr4(wh)) ? 2 : 0;

        MGETHDR(m, M_DONTWAIT, MT_DATA);
        if (__predict_false(m == NULL)) {
                if_statinc(ifp, if_ierrors);
                return;
        }
        if (align + mlen > MHLEN) {
                if (__predict_true(align + mlen <= MCLBYTES))
                        MCLGET(m, M_DONTWAIT);
                if (__predict_false(!(m->m_flags & M_EXT))) {
                        if_statinc(ifp, if_ierrors);
                        m_freem(m);
                        return;
                }
        }
        /* Finalize mbuf. */
        m_set_rcvif(m, ifp);
        m->m_data += align;
        memcpy(mtod(m, void *), wh, mlen);
        m->m_pkthdr.len = m->m_len = mlen;

        s = splnet();
        if (__predict_false(sc->sc_drvbpf != NULL)) {
                struct otus_rx_radiotap_header *tap;

                tap = &sc->sc_rxtap;
                tap->wr_flags = 0;
                tap->wr_chan_freq = htole16(ic->ic_curchan->ic_freq);
                tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags);
                tap->wr_antsignal = tail->rssi;
                tap->wr_rate = 2;        /* In case it can't be found below. */
                switch (tail->status & AR_RX_STATUS_MT_MASK) {
                case AR_RX_STATUS_MT_CCK:
                        switch (plcp[0]) {
                        case  10: tap->wr_rate =   2; break;
                        case  20: tap->wr_rate =   4; break;
                        case  55: tap->wr_rate =  11; break;
                        case 110: tap->wr_rate =  22; break;
                        }
                        if (tail->status & AR_RX_STATUS_SHPREAMBLE)
                                tap->wr_flags |= IEEE80211_RADIOTAP_F_SHORTPRE;
                        break;
                case AR_RX_STATUS_MT_OFDM:
                        switch (plcp[0] & 0xf) {
                        case 0xb: tap->wr_rate =  12; break;
                        case 0xf: tap->wr_rate =  18; break;
                        case 0xa: tap->wr_rate =  24; break;
                        case 0xe: tap->wr_rate =  36; break;
                        case 0x9: tap->wr_rate =  48; break;
                        case 0xd: tap->wr_rate =  72; break;
                        case 0x8: tap->wr_rate =  96; break;
                        case 0xc: tap->wr_rate = 108; break;
                        }
                        break;
                }
                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_rxtap_len, m, BPF_D_IN);
        }

        ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh);

        /* push the frame up to the 802.11 stack */
        ieee80211_input(ic, m, ni, tail->rssi, 0);

        /* Node is no longer needed. */
        ieee80211_free_node(ni);
        splx(s);
}

Static void
otus_rxeof(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct otus_rx_data *data;
        struct otus_softc *sc;
        uint8_t *buf;
        struct ar_rx_head *head;
        uint16_t hlen;
        int len;

        data = priv;
        sc = data->sc;

        DPRINTFN(DBG_FN, sc, "\n");

        buf = data->buf;

        if (__predict_false(status != USBD_NORMAL_COMPLETION)) {
                DPRINTFN(DBG_RX, sc, "RX status=%d\n", status);
                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->sc_data_rx_pipe);
                else if (status != USBD_CANCELLED) {
                        DPRINTFN(DBG_RX, sc,
                            "otus_rxeof: goto resubmit: status=%d\n", status);
                        goto resubmit;
                }
                return;
        }
        usbd_get_xfer_status(xfer, NULL, NULL, &len, NULL);

        while (len >= sizeof(*head)) {
                head = (void *)buf;
                if (__predict_false(head->tag != htole16(AR_RX_HEAD_TAG))) {
                        DPRINTFN(DBG_RX, sc, "tag not valid %#x\n",
                            le16toh(head->tag));
                        break;
                }
                hlen = le16toh(head->len);
                if (__predict_false(sizeof(*head) + hlen > len)) {
                        DPRINTFN(DBG_RX, sc, "xfer too short %d/%d\n",
                            len, hlen);
                        break;
                }
                /* Process sub-xfer. */
                otus_sub_rxeof(sc, (uint8_t *)&head[1], hlen);

                /* Next sub-xfer is aligned on a 32-bit boundary. */
                hlen = roundup2(sizeof(*head) + hlen, 4);
                buf += hlen;
                len -= hlen;
        }

 resubmit:
        usbd_setup_xfer(xfer, data, data->buf, OTUS_RXBUFSZ,
            USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, otus_rxeof);
        (void)usbd_transfer(data->xfer);
}

Static void
otus_txeof(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct otus_tx_data *data;
        struct otus_softc *sc;
        struct ieee80211com *ic;
        struct ifnet *ifp;
        int s;

        data = priv;
        sc = data->sc;

        DPRINTFN(DBG_FN, sc, "\n");

        /* Put this Tx buffer back to the free list. */
        mutex_enter(&sc->sc_tx_mtx);
        TAILQ_INSERT_TAIL(&sc->sc_tx_free_list, data, next);
        mutex_exit(&sc->sc_tx_mtx);

        ic = &sc->sc_ic;
        ifp = ic->ic_ifp;
        if (__predict_false(status != USBD_NORMAL_COMPLETION)) {
                DPRINTFN(DBG_TX, sc, "TX status=%d\n", status);
                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->sc_data_tx_pipe);
                if_statinc(ifp, if_oerrors);
                return;
        }
        if_statinc(ifp, if_opackets);

        s = splnet();
        sc->sc_tx_timer = 0;
        ifp->if_flags &= ~IFF_OACTIVE;        /* XXX: do after freeing Tx buffer? */
        otus_start(ifp);
        splx(s);
}

Static int
otus_tx(struct otus_softc *sc, struct mbuf *m, struct ieee80211_node *ni,
    struct otus_tx_data *data)
{
        struct ieee80211com *ic;
        struct otus_node *on;
        struct ieee80211_frame *wh;
        struct ieee80211_key *k;
        struct ar_tx_head *head;
        uint32_t phyctl;
        uint16_t macctl, qos;
        uint8_t qid;
        int error, ridx, hasqos, xferlen;

        DPRINTFN(DBG_FN, sc, "\n");

        ic = &sc->sc_ic;
        on = (void *)ni;

        wh = mtod(m, struct ieee80211_frame *);
        if ((wh->i_fc[1] & IEEE80211_FC1_PROTECTED)) {
                /* XXX: derived from upgt_tx_task() and ural_tx_data() */
                k = ieee80211_crypto_encap(ic, ni, m);
                if (k == NULL)
                        return ENOBUFS;

                /* Packet header may have moved, reset our local pointer. */
                wh = mtod(m, struct ieee80211_frame *);
        }

#ifdef HAVE_EDCA
        if ((hasqos = ieee80211_has_qos(wh))) {
                qos = ieee80211_get_qos(wh);
                qid = ieee80211_up_to_ac(ic, qos & IEEE80211_QOS_TID);
        } else {
                qos = 0;
                qid = WME_AC_BE;
        }
#else
        hasqos = 0;
        qos = 0;
        qid = WME_AC_BE;
#endif

        /* Pickup a rate index. */
        if (IEEE80211_IS_MULTICAST(wh->i_addr1) ||
            (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) != IEEE80211_FC0_TYPE_DATA)
                ridx = (ic->ic_curmode == IEEE80211_MODE_11A) ?
                    OTUS_RIDX_OFDM6 : OTUS_RIDX_CCK1;
        else if (ic->ic_fixed_rate != -1)
                ridx = sc->sc_fixed_ridx;
        else
                ridx = on->ridx[ni->ni_txrate];

        phyctl = 0;
        macctl = AR_TX_MAC_BACKOFF | AR_TX_MAC_HW_DUR | AR_TX_MAC_QID(qid);

        if (IEEE80211_IS_MULTICAST(wh->i_addr1) ||
            (hasqos && ((qos & IEEE80211_QOS_ACKPOLICY_MASK) ==
             IEEE80211_QOS_ACKPOLICY_NOACK)))
                macctl |= AR_TX_MAC_NOACK;

        if (!IEEE80211_IS_MULTICAST(wh->i_addr1)) {
                if (m->m_pkthdr.len + IEEE80211_CRC_LEN >= ic->ic_rtsthreshold)
                        macctl |= AR_TX_MAC_RTS;
                else if ((ic->ic_flags & IEEE80211_F_USEPROT) &&
                    ridx >= OTUS_RIDX_OFDM6) {
                        if (ic->ic_protmode == IEEE80211_PROT_CTSONLY)
                                macctl |= AR_TX_MAC_CTS;
                        else if (ic->ic_protmode == IEEE80211_PROT_RTSCTS)
                                macctl |= AR_TX_MAC_RTS;
                }
        }

        phyctl |= AR_TX_PHY_MCS(otus_rates[ridx].mcs);
        if (ridx >= OTUS_RIDX_OFDM6) {
                phyctl |= AR_TX_PHY_MT_OFDM;
                if (ridx <= OTUS_RIDX_OFDM24)
                        phyctl |= AR_TX_PHY_ANTMSK(sc->sc_txmask);
                else
                        phyctl |= AR_TX_PHY_ANTMSK(1);
        } else {        /* CCK */
                phyctl |= AR_TX_PHY_MT_CCK;
                phyctl |= AR_TX_PHY_ANTMSK(sc->sc_txmask);
        }

        /* Update rate control stats for frames that are ACK'ed. */
        if (!(macctl & AR_TX_MAC_NOACK))
                on->amn.amn_txcnt++;

        /* Fill Tx descriptor. */
        head = (void *)data->buf;
        head->len = htole16(m->m_pkthdr.len + IEEE80211_CRC_LEN);
        head->macctl = htole16(macctl);
        head->phyctl = htole32(phyctl);

        if (__predict_false(sc->sc_drvbpf != NULL)) {
                struct otus_tx_radiotap_header *tap = &sc->sc_txtap;

                tap->wt_flags = 0;
                if (wh->i_fc[1] & IEEE80211_FC1_WEP)
                        tap->wt_flags |= IEEE80211_RADIOTAP_F_WEP;
                tap->wt_rate = otus_rates[ridx].rate;
                tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq);
                tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags);

                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_txtap_len, m, BPF_D_OUT);
        }

        xferlen = sizeof(*head) + m->m_pkthdr.len;
        m_copydata(m, 0, m->m_pkthdr.len, (void *)&head[1]);

        DPRINTFN(DBG_TX, sc, "queued len=%d mac=0x%04x phy=0x%08x rate=%d\n",
            head->len, head->macctl, head->phyctl, otus_rates[ridx].rate);

        usbd_setup_xfer(data->xfer, data, data->buf, xferlen,
            USBD_FORCE_SHORT_XFER, OTUS_TX_TIMEOUT, otus_txeof);
        error = usbd_transfer(data->xfer);
        if (__predict_false(
                    error != USBD_NORMAL_COMPLETION &&
                    error != USBD_IN_PROGRESS)) {
                DPRINTFN(DBG_TX, sc, "transfer failed %d\n", error);
                return error;
        }
        return 0;
}

Static void
otus_start(struct ifnet *ifp)
{
        struct otus_softc *sc;
        struct ieee80211com *ic;
        struct otus_tx_data *data;
        struct ether_header *eh;
        struct ieee80211_node *ni;
        struct mbuf *m;

        if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
                return;

        sc = ifp->if_softc;
        ic = &sc->sc_ic;

        DPRINTFN(DBG_FN, sc, "\n");

        data = NULL;
        for (;;) {
                /*
                 * Grab a Tx buffer if we don't already have one.  If
                 * one isn't available, bail out.
                 * NB: We must obtain this Tx buffer _before_
                 * dequeueing anything as one may not be available
                 * later.  Both must be done inside a single lock.
                 */
                mutex_enter(&sc->sc_tx_mtx);
                if (data == NULL && !TAILQ_EMPTY(&sc->sc_tx_free_list)) {
                        data = TAILQ_FIRST(&sc->sc_tx_free_list);
                        TAILQ_REMOVE(&sc->sc_tx_free_list, data, next);
                }
                mutex_exit(&sc->sc_tx_mtx);

                if (data == NULL) {
                        ifp->if_flags |= IFF_OACTIVE;
                        DPRINTFN(DBG_TX, sc, "empty sc_tx_free_list\n");
                        return;
                }

                /* Send pending management frames first. */
                IF_DEQUEUE(&ic->ic_mgtq, m);
                if (m != NULL) {
                        ni = M_GETCTX(m, struct ieee80211_node *);
                        M_CLEARCTX(m);
                        goto sendit;
                }

                if (ic->ic_state != IEEE80211_S_RUN)
                        break;

                /* Encapsulate and send data frames. */
                IFQ_DEQUEUE(&ifp->if_snd, m);
                if (m == NULL)
                        break;

                if (m->m_len < (int)sizeof(*eh) &&
                    (m = m_pullup(m, sizeof(*eh))) == NULL) {
                        if_statinc(ifp, if_oerrors);
                        continue;
                }

                eh = mtod(m, struct ether_header *);
                ni = ieee80211_find_txnode(ic, eh->ether_dhost);
                if (ni == NULL) {
                        m_freem(m);
                        if_statinc(ifp, if_oerrors);
                        continue;
                }

                bpf_mtap(ifp, m, BPF_D_OUT);

                if ((m = ieee80211_encap(ic, m, ni)) == NULL) {
                        /* original m was freed by ieee80211_encap() */
                        ieee80211_free_node(ni);
                        if_statinc(ifp, if_oerrors);
                        continue;
                }
 sendit:
                bpf_mtap3(ic->ic_rawbpf, m, BPF_D_OUT);

                if (otus_tx(sc, m, ni, data) != 0) {
                        m_freem(m);
                        ieee80211_free_node(ni);
                        if_statinc(ifp, if_oerrors);
                        continue;
                }

                data = NULL;        /* we're finished with this data buffer */
                m_freem(m);
                ieee80211_free_node(ni);
                sc->sc_tx_timer = 5;
                ifp->if_timer = 1;
        }

        /*
         * If here, we have a Tx buffer, but ran out of mbufs to
         * transmit.  Put the Tx buffer back to the free list.
         */
        mutex_enter(&sc->sc_tx_mtx);
        TAILQ_INSERT_TAIL(&sc->sc_tx_free_list, data, next);
        mutex_exit(&sc->sc_tx_mtx);
}

Static void
otus_watchdog(struct ifnet *ifp)
{
        struct otus_softc *sc;

        sc = ifp->if_softc;

        DPRINTFN(DBG_FN, sc, "\n");

        ifp->if_timer = 0;

        if (sc->sc_tx_timer > 0) {
                if (--sc->sc_tx_timer == 0) {
                        aprint_error_dev(sc->sc_dev, "device timeout\n");
                        /* otus_init(ifp); XXX needs a process context! */
                        if_statinc(ifp, if_oerrors);
                        return;
                }
                ifp->if_timer = 1;
        }
        ieee80211_watchdog(&sc->sc_ic);
}

Static int
otus_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
        struct otus_softc *sc;
        struct ieee80211com *ic;
        int s, error = 0;

        sc = ifp->if_softc;

        DPRINTFN(DBG_FN, sc, "%#lx\n", cmd);

        ic = &sc->sc_ic;

        s = splnet();

        switch (cmd) {
        case SIOCSIFADDR:
                ifp->if_flags |= IFF_UP;
#ifdef INET
                struct ifaddr *ifa = data;
                if (ifa->ifa_addr->sa_family == AF_INET)
                        arp_ifinit(&ic->ic_ac, ifa);
#endif
                /* FALLTHROUGH */
        case SIOCSIFFLAGS:
                if ((error = ifioctl_common(ifp, cmd, data)) != 0)
                        break;

                switch (ifp->if_flags & (IFF_UP | IFF_RUNNING)) {
                case IFF_UP | IFF_RUNNING:
                        if (((ifp->if_flags ^ sc->sc_if_flags) &
                                (IFF_ALLMULTI | IFF_PROMISC)) != 0)
                                otus_set_multi(sc);
                        break;
                case IFF_UP:
                        otus_init(ifp);
                        break;

                case IFF_RUNNING:
                        otus_stop(ifp);
                        break;
                case 0:
                default:
                        break;
                }
                sc->sc_if_flags = ifp->if_flags;
                break;

        case SIOCADDMULTI:
        case SIOCDELMULTI:
                if ((error = ether_ioctl(ifp, cmd, data)) == ENETRESET) {
                        /* setup multicast filter, etc */
                        /* XXX: ??? */
                        error = 0;
                }
                break;

        case SIOCS80211CHANNEL:
                /*
                 * This allows for fast channel switching in monitor mode
                 * (used by kismet). In IBSS mode, we must explicitly reset
                 * the interface to generate a new beacon frame.
                 */
                error = ieee80211_ioctl(ic, cmd, data);

                DPRINTFN(DBG_CHAN, sc,
                    "ic_curchan=%d ic_ibss_chan=%d ic_des_chan=%d ni_chan=%d error=%d\n",
                    ieee80211_chan2ieee(ic, ic->ic_curchan),
                    ieee80211_chan2ieee(ic, ic->ic_ibss_chan),
                    ieee80211_chan2ieee(ic, ic->ic_des_chan),
                    ieee80211_chan2ieee(ic, ic->ic_bss->ni_chan),
                    error);

                if (error == ENETRESET &&
                    ic->ic_opmode == IEEE80211_M_MONITOR) {
                        if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) ==
                            (IFF_UP | IFF_RUNNING)) {
                                mutex_enter(&sc->sc_write_mtx);
                                otus_set_chan(sc, ic->ic_curchan, 0);
                                mutex_exit(&sc->sc_write_mtx);
                        }
                        error = 0;
                }
                break;

        default:
                error = ieee80211_ioctl(ic, cmd, data);
        }
        if (error == ENETRESET) {
                if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) ==
                    (IFF_UP | IFF_RUNNING))
                        otus_init(ifp);
                error = 0;
        }
        splx(s);
        return error;
}

Static int
otus_set_multi(struct otus_softc *sc)
{
        struct ethercom *ec = &sc->sc_ec;
        struct ifnet *ifp;
        struct ether_multi *enm;
        struct ether_multistep step;
        uint32_t lo, hi;
        uint8_t bit;
        int error;

        DPRINTFN(DBG_FN, sc, "\n");

        ifp = sc->sc_ic.ic_ifp;
        if ((ifp->if_flags & (IFF_ALLMULTI | IFF_PROMISC)) != 0) {
                lo = hi = 0xffffffff;
                goto done;
        }
        lo = hi = 0;
        ETHER_LOCK(ec);
        ETHER_FIRST_MULTI(step, ec, enm);
        while (enm != NULL) {
                if (bcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
                        ifp->if_flags |= IFF_ALLMULTI;
                        lo = hi = 0xffffffff;
                        goto done;
                }
                bit = enm->enm_addrlo[5] >> 2;
                if (bit < 32)
                        lo |= 1 << bit;
                else
                        hi |= 1 << (bit - 32);
                ETHER_NEXT_MULTI(step, enm);
        }
 done:
        ETHER_UNLOCK(ec);
        mutex_enter(&sc->sc_write_mtx);
        hi |= 1 << 31;        /* Make sure the broadcast bit is set. */
        otus_write(sc, AR_MAC_REG_GROUP_HASH_TBL_L, lo);
        otus_write(sc, AR_MAC_REG_GROUP_HASH_TBL_H, hi);
        error = otus_write_barrier(sc);
        mutex_exit(&sc->sc_write_mtx);
        return error;
}

#ifdef HAVE_EDCA
Static void
otus_updateedca(struct ieee80211com *ic)
{

        DPRINTFN(DBG_FN, DBG_NO_SC, "\n");

        /* Do it in a process context. */
        otus_do_async(ic->ic_ifp->if_softc, otus_updateedca_cb, NULL, 0);
}

Static void
otus_updateedca_cb(struct otus_softc *sc, void *arg __used)
{

        DPRINTFN(DBG_FN, sc, "\n");

        mutex_enter(&sc->sc_write_mtx);
        otus_updateedca_cb_locked(sc);
        mutex_exit(&sc->sc_write_mtx);
}
#endif

Static void
otus_updateedca_cb_locked(struct otus_softc *sc)
{
#ifdef HAVE_EDCA
        struct ieee80211com *ic;
#endif
        const struct ieee80211_edca_ac_params *edca;
        int s;

        DPRINTFN(DBG_FN, sc, "\n");

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        s = splnet();

#ifdef HAVE_EDCA
        ic = &sc->sc_ic;
        edca = (ic->ic_flags & IEEE80211_F_QOS) ?
            ic->ic_edca_ac : otus_edca_def;
#else
        edca = otus_edca_def;
#endif /* HAVE_EDCA */

#define EXP2(val)        ((1 << (val)) - 1)
#define AIFS(val)        ((val) * 9 + 10)

        /* Set CWmin/CWmax values. */
        otus_write(sc, AR_MAC_REG_AC0_CW,
            EXP2(edca[WME_AC_BE].ac_ecwmax) << 16 |
            EXP2(edca[WME_AC_BE].ac_ecwmin));
        otus_write(sc, AR_MAC_REG_AC1_CW,
            EXP2(edca[WME_AC_BK].ac_ecwmax) << 16 |
            EXP2(edca[WME_AC_BK].ac_ecwmin));
        otus_write(sc, AR_MAC_REG_AC2_CW,
            EXP2(edca[WME_AC_VI].ac_ecwmax) << 16 |
            EXP2(edca[WME_AC_VI].ac_ecwmin));
        otus_write(sc, AR_MAC_REG_AC3_CW,
            EXP2(edca[WME_AC_VO].ac_ecwmax) << 16 |
            EXP2(edca[WME_AC_VO].ac_ecwmin));
        otus_write(sc, AR_MAC_REG_AC4_CW,                /* Special TXQ. */
            EXP2(edca[WME_AC_VO].ac_ecwmax) << 16 |
            EXP2(edca[WME_AC_VO].ac_ecwmin));

        /* Set AIFSN values. */
        otus_write(sc, AR_MAC_REG_AC1_AC0_AIFS,
            AIFS(edca[WME_AC_VI].ac_aifsn) << 24 |
            AIFS(edca[WME_AC_BK].ac_aifsn) << 12 |
            AIFS(edca[WME_AC_BE].ac_aifsn));
        otus_write(sc, AR_MAC_REG_AC3_AC2_AIFS,
            AIFS(edca[WME_AC_VO].ac_aifsn) << 16 |        /* Special TXQ. */
            AIFS(edca[WME_AC_VO].ac_aifsn) <<  4 |
            AIFS(edca[WME_AC_VI].ac_aifsn) >>  8);

        /* Set TXOP limit. */
        otus_write(sc, AR_MAC_REG_AC1_AC0_TXOP,
            edca[WME_AC_BK].ac_txoplimit << 16 |
            edca[WME_AC_BE].ac_txoplimit);
        otus_write(sc, AR_MAC_REG_AC3_AC2_TXOP,
            edca[WME_AC_VO].ac_txoplimit << 16 |
            edca[WME_AC_VI].ac_txoplimit);
#undef AIFS
#undef EXP2

        splx(s);

        (void)otus_write_barrier(sc);
}

Static void
otus_updateslot(struct ifnet *ifp)
{
        struct otus_softc *sc;

        sc = ifp->if_softc;

        DPRINTFN(DBG_FN, sc, "\n");

        /* Do it in a process context. */
        otus_do_async(sc, otus_updateslot_cb, NULL, 0);
}

/* ARGSUSED */
Static void
otus_updateslot_cb(struct otus_softc *sc, void *arg)
{

        DPRINTFN(DBG_FN, sc, "\n");

        mutex_enter(&sc->sc_write_mtx);
        otus_updateslot_cb_locked(sc);
        mutex_exit(&sc->sc_write_mtx);
}

Static void
otus_updateslot_cb_locked(struct otus_softc *sc)
{
        uint32_t slottime;

        DPRINTFN(DBG_FN, sc, "\n");

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        slottime = (sc->sc_ic.ic_flags & IEEE80211_F_SHSLOT) ? 9 : 20;
        otus_write(sc, AR_MAC_REG_SLOT_TIME, slottime << 10);
        (void)otus_write_barrier(sc);
}

Static int
otus_init_mac(struct otus_softc *sc)
{
        int error;

        DPRINTFN(DBG_FN | DBG_INIT, sc, "\n");

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        otus_write(sc, AR_MAC_REG_ACK_EXTENSION, 0x40);
        otus_write(sc, AR_MAC_REG_RETRY_MAX, 0);
        otus_write(sc, AR_MAC_REG_SNIFFER, AR_MAC_REG_SNIFFER_DEFAULTS);
        otus_write(sc, AR_MAC_REG_RX_THRESHOLD, 0xc1f80);
        otus_write(sc, AR_MAC_REG_RX_PE_DELAY, 0x70);
        otus_write(sc, AR_MAC_REG_EIFS_AND_SIFS, 0xa144000);
        otus_write(sc, AR_MAC_REG_SLOT_TIME, 9 << 10);

        /* CF-END mode */
        otus_write(sc, 0x1c3b2c, 0x19000000);

        /* NAV protects ACK only (in TXOP). */
        otus_write(sc, 0x1c3b38, 0x201);

        /* Set beacon PHY CTRL's TPC to 0x7, TA1=1 */
        /* OTUS set AM to 0x1 */
        otus_write(sc, AR_MAC_REG_BCN_HT1, 0x8000170);

        otus_write(sc, AR_MAC_REG_BACKOFF_PROTECT, 0x105);

        /* AGG test code*/
        /* Aggregation MAX number and timeout */
        otus_write(sc, AR_MAC_REG_AMPDU_FACTOR, 0x10000a);

        /* Filter any control frames, BAR is bit 24. */
        otus_write(sc, AR_MAC_REG_FRAMETYPE_FILTER, AR_MAC_REG_FTF_DEFAULTS);

        /* Enable deaggregator, response in sniffer mode */
        otus_write(sc, 0x1c3c40, 0x1 | 1 << 30);        /* XXX: was 0x1 */

        /* rate sets */
        otus_write(sc, AR_MAC_REG_BASIC_RATE, 0x150f);
        otus_write(sc, AR_MAC_REG_MANDATORY_RATE, 0x150f);
        otus_write(sc, AR_MAC_REG_RTS_CTS_RATE, 0x10b01bb);

        /* MIMO response control */
        otus_write(sc, 0x1c3694, 0x4003c1e);        /* bit 26~28  otus-AM */

        /* Switch MAC to OTUS interface. */
        otus_write(sc, 0x1c3600, 0x3);

        otus_write(sc, AR_MAC_REG_AMPDU_RX_THRESH, 0xffff);

        /* set PHY register read timeout (??) */
        otus_write(sc, AR_MAC_REG_MISC_680, 0xf00008);

        /* Disable Rx TimeOut, workaround for BB. */
        otus_write(sc, AR_MAC_REG_RX_TIMEOUT, 0x0);

        /* Set clock frequency to 88/80MHz. */
        otus_write(sc, AR_PWR_REG_CLOCK_SEL,
            AR_PWR_CLK_AHB_80_88MHZ | AR_PWR_CLK_DAC_160_INV_DLY);

        /* Set WLAN DMA interrupt mode: generate intr per packet. */
        otus_write(sc, AR_MAC_REG_TXRX_MPI, 0x110011);

        otus_write(sc, AR_MAC_REG_FCS_SELECT, AR_MAC_FCS_FIFO_PROT);

        /* Disables the CF_END frame, undocumented register */
        otus_write(sc, AR_MAC_REG_TXOP_NOT_ENOUGH_INDICATION, 0x141e0f48);

        /* Disable HW decryption for now. */
        otus_write(sc, AR_MAC_REG_ENCRYPTION,
            AR_MAC_REG_ENCRYPTION_DEFAULTS | AR_MAC_REG_ENCRYPTION_RX_SOFTWARE);

        /*
         * XXX: should these be elsewhere?
         */
        /* Enable LED0 and LED1. */
        otus_write(sc, AR_GPIO_REG_PORT_TYPE, 3);
        otus_write(sc, AR_GPIO_REG_DATA,
            AR_GPIO_REG_DATA_LED0_ON | AR_GPIO_REG_DATA_LED1_ON);

        /* Set USB Rx stream mode maximum frame number to 2. */
        otus_write(sc, AR_USB_REG_MAX_AGG_UPLOAD, (1 << 2));

        /* Set USB Rx stream mode timeout to 10us. */
        otus_write(sc, AR_USB_REG_UPLOAD_TIME_CTL, 0x80);

        if ((error = otus_write_barrier(sc)) != 0)
                return error;

        /* Set default EDCA parameters. */
        otus_updateedca_cb_locked(sc);
        return 0;
}

/*
 * Return default value for PHY register based on current operating mode.
 */
Static uint32_t
otus_phy_get_def(struct otus_softc *sc, uint32_t reg)
{
        int i;

        DPRINTFN(DBG_FN, sc, "\n");

        for (i = 0; i < __arraycount(ar5416_phy_regs); i++)
                if (AR_PHY(ar5416_phy_regs[i]) == reg)
                        return sc->sc_phy_vals[i];
        return 0;        /* Register not found. */
}

/*
 * Update PHY's programming based on vendor-specific data stored in EEPROM.
 * This is for FEM-type devices only.
 */
Static int
otus_set_board_values(struct otus_softc *sc, struct ieee80211_channel *c)
{
        const struct ModalEepHeader *eep;
        uint32_t tmp, offset;

        DPRINTFN(DBG_FN, sc, "\n");

        if (IEEE80211_IS_CHAN_5GHZ(c))
                eep = &sc->sc_eeprom.modalHeader[0];
        else
                eep = &sc->sc_eeprom.modalHeader[1];

        /* Offset of chain 2. */
        offset = 2 * 0x1000;

        tmp = le32toh(eep->antCtrlCommon);
        otus_write(sc, AR_PHY_SWITCH_COM, tmp);

        tmp = le32toh(eep->antCtrlChain[0]);
        otus_write(sc, AR_PHY_SWITCH_CHAIN_0, tmp);

        tmp = le32toh(eep->antCtrlChain[1]);
        otus_write(sc, AR_PHY_SWITCH_CHAIN_0 + offset, tmp);

        if (1 /* sc->sc_sco == AR_SCO_SCN */) {
                tmp = otus_phy_get_def(sc, AR_PHY_SETTLING);
                tmp &= ~(0x7f << 7);
                tmp |= (eep->switchSettling & 0x7f) << 7;
                otus_write(sc, AR_PHY_SETTLING, tmp);
        }

        tmp = otus_phy_get_def(sc, AR_PHY_DESIRED_SZ);
        tmp &= ~0xffff;
        tmp |= eep->pgaDesiredSize << 8 | eep->adcDesiredSize;
        otus_write(sc, AR_PHY_DESIRED_SZ, tmp);

        tmp = eep->txEndToXpaOff << 24 | eep->txEndToXpaOff << 16 |
              eep->txFrameToXpaOn << 8 | eep->txFrameToXpaOn;
        otus_write(sc, AR_PHY_RF_CTL4, tmp);

        tmp = otus_phy_get_def(sc, AR_PHY_RF_CTL3);
        tmp &= ~(0xff << 16);
        tmp |= eep->txEndToRxOn << 16;
        otus_write(sc, AR_PHY_RF_CTL3, tmp);

        tmp = otus_phy_get_def(sc, AR_PHY_CCA);
        tmp &= ~(0x7f << 12);
        tmp |= (eep->thresh62 & 0x7f) << 12;
        otus_write(sc, AR_PHY_CCA, tmp);

        tmp = otus_phy_get_def(sc, AR_PHY_RXGAIN);
        tmp &= ~(0x3f << 12);
        tmp |= (eep->txRxAttenCh[0] & 0x3f) << 12;
        otus_write(sc, AR_PHY_RXGAIN, tmp);

        tmp = otus_phy_get_def(sc, AR_PHY_RXGAIN + offset);
        tmp &= ~(0x3f << 12);
        tmp |= (eep->txRxAttenCh[1] & 0x3f) << 12;
        otus_write(sc, AR_PHY_RXGAIN + offset, tmp);

        tmp = otus_phy_get_def(sc, AR_PHY_GAIN_2GHZ);
        tmp &= ~(0x3f << 18);
        tmp |= (eep->rxTxMarginCh[0] & 0x3f) << 18;
        if (IEEE80211_IS_CHAN_5GHZ(c)) {
                tmp &= ~(0xf << 10);
                tmp |= (eep->bswMargin[0] & 0xf) << 10;
        }
        otus_write(sc, AR_PHY_GAIN_2GHZ, tmp);

        tmp = otus_phy_get_def(sc, AR_PHY_GAIN_2GHZ + offset);
        tmp &= ~(0x3f << 18);
        tmp |= (eep->rxTxMarginCh[1] & 0x3f) << 18;
        otus_write(sc, AR_PHY_GAIN_2GHZ + offset, tmp);

        tmp = otus_phy_get_def(sc, AR_PHY_TIMING_CTRL4);
        tmp &= ~(0x3f << 5 | 0x1f);
        tmp |= (eep->iqCalICh[0] & 0x3f) << 5 | (eep->iqCalQCh[0] & 0x1f);
        otus_write(sc, AR_PHY_TIMING_CTRL4, tmp);

        tmp = otus_phy_get_def(sc, AR_PHY_TIMING_CTRL4 + offset);
        tmp &= ~(0x3f << 5 | 0x1f);
        tmp |= (eep->iqCalICh[1] & 0x3f) << 5 | (eep->iqCalQCh[1] & 0x1f);
        otus_write(sc, AR_PHY_TIMING_CTRL4 + offset, tmp);

        tmp = otus_phy_get_def(sc, AR_PHY_TPCRG1);
        tmp &= ~(0xf << 16);
        tmp |= (eep->xpd & 0xf) << 16;
        otus_write(sc, AR_PHY_TPCRG1, tmp);

        return otus_write_barrier(sc);
}

Static int
otus_program_phy(struct otus_softc *sc, struct ieee80211_channel *c)
{
        const uint32_t *vals;
        int error, i;

        DPRINTFN(DBG_FN, sc, "\n");

        /* Select PHY programming based on band and bandwidth. */
        if (IEEE80211_IS_CHAN_2GHZ(c))
                vals = ar5416_phy_vals_2ghz_20mhz;
        else
                vals = ar5416_phy_vals_5ghz_20mhz;
        for (i = 0; i < __arraycount(ar5416_phy_regs); i++)
                otus_write(sc, AR_PHY(ar5416_phy_regs[i]), vals[i]);
        sc->sc_phy_vals = vals;

        if (sc->sc_eeprom.baseEepHeader.deviceType == 0x80)        /* FEM */
                if ((error = otus_set_board_values(sc, c)) != 0)
                        return error;

        /* Initial Tx power settings. */
        otus_write(sc, AR_PHY_POWER_TX_RATE_MAX, 0x7f);
        otus_write(sc, AR_PHY_POWER_TX_RATE1, 0x3f3f3f3f);
        otus_write(sc, AR_PHY_POWER_TX_RATE2, 0x3f3f3f3f);
        otus_write(sc, AR_PHY_POWER_TX_RATE3, 0x3f3f3f3f);
        otus_write(sc, AR_PHY_POWER_TX_RATE4, 0x3f3f3f3f);
        otus_write(sc, AR_PHY_POWER_TX_RATE5, 0x3f3f3f3f);
        otus_write(sc, AR_PHY_POWER_TX_RATE6, 0x3f3f3f3f);
        otus_write(sc, AR_PHY_POWER_TX_RATE7, 0x3f3f3f3f);
        otus_write(sc, AR_PHY_POWER_TX_RATE8, 0x3f3f3f3f);
        otus_write(sc, AR_PHY_POWER_TX_RATE9, 0x3f3f3f3f);

        if (IEEE80211_IS_CHAN_2GHZ(c))
                otus_write(sc, 0x1d4014, 0x5163);
        else
                otus_write(sc, 0x1d4014, 0x5143);

        return otus_write_barrier(sc);
}

static __inline uint8_t
otus_reverse_bits(uint8_t v)
{

        v = ((v >> 1) & 0x55) | ((v & 0x55) << 1);
        v = ((v >> 2) & 0x33) | ((v & 0x33) << 2);
        v = ((v >> 4) & 0x0f) | ((v & 0x0f) << 4);
        return v;
}

Static int
otus_set_rf_bank4(struct otus_softc *sc, struct ieee80211_channel *c)
{
        uint8_t chansel, d0, d1;
        uint16_t data;
        int error;

        DPRINTFN(DBG_FN, sc, "\n");

        d0 = 0;
        if (IEEE80211_IS_CHAN_5GHZ(c)) {
                chansel = (c->ic_freq - 4800) / 5;
                if (chansel & 1)
                        d0 |= AR_BANK4_AMODE_REFSEL(2);
                else
                        d0 |= AR_BANK4_AMODE_REFSEL(1);
        } else {
                d0 |= AR_BANK4_AMODE_REFSEL(2);
                if (c->ic_freq == 2484) {        /* CH 14 */
                        d0 |= AR_BANK4_BMODE_LF_SYNTH_FREQ;
                        chansel = 10 + (c->ic_freq - 2274) / 5;
                } else
                        chansel = 16 + (c->ic_freq - 2272) / 5;
                chansel <<= 2;
        }
        d0 |= AR_BANK4_ADDR(1) | AR_BANK4_CHUP;
        d1 = otus_reverse_bits(chansel);

        /* Write bits 0-4 of d0 and d1. */
        data = (d1 & 0x1f) << 5 | (d0 & 0x1f);
        otus_write(sc, AR_PHY(44), data);
        /* Write bits 5-7 of d0 and d1. */
        data = (d1 >> 5) << 5 | (d0 >> 5);
        otus_write(sc, AR_PHY(58), data);

        if ((error = otus_write_barrier(sc)) == 0)
                usbd_delay_ms(sc->sc_udev, 10);

        return error;
}

Static void
otus_get_delta_slope(uint32_t coeff, uint32_t *exponent, uint32_t *mantissa)
{
#define COEFF_SCALE_SHIFT        24
        uint32_t exp, man;

        DPRINTFN(DBG_FN, DBG_NO_SC, "\n");

        /* exponent = 14 - floor(log2(coeff)) */
        for (exp = 31; exp > 0; exp--)
                if (coeff & (1 << exp))
                        break;
        KASSERT(exp != 0);
        exp = 14 - (exp - COEFF_SCALE_SHIFT);

        /* mantissa = floor(coeff * 2^exponent + 0.5) */
        man = coeff + (1 << (COEFF_SCALE_SHIFT - exp - 1));

        *mantissa = man >> (COEFF_SCALE_SHIFT - exp);
        *exponent = exp - 16;
#undef COEFF_SCALE_SHIFT
}

Static int
otus_set_chan(struct otus_softc *sc, struct ieee80211_channel *c, int assoc)
{
        struct ar_cmd_frequency cmd;
        struct ar_rsp_frequency rsp;
        const uint32_t *vals;
        uint32_t coeff, exp, man, tmp;
        uint8_t code;
        int error, i;

        DPRINTFN(DBG_FN, sc, "\n");


#ifdef OTUS_DEBUG
        struct ieee80211com *ic = &sc->sc_ic;
        int chan = ieee80211_chan2ieee(ic, c);

        DPRINTFN(DBG_CHAN, sc, "setting channel %d (%dMHz)\n",
            chan, c->ic_freq);
#endif

        tmp = IEEE80211_IS_CHAN_2GHZ(c) ? 0x105 : 0x104;
        otus_write(sc, AR_MAC_REG_DYNAMIC_SIFS_ACK, tmp);
        if ((error = otus_write_barrier(sc)) != 0)
                return error;

        /* Disable BB Heavy Clip. */
        otus_write(sc, AR_PHY_HEAVY_CLIP_ENABLE, 0x200);
        if ((error = otus_write_barrier(sc)) != 0)
                return error;

        /* XXX Is that FREQ_START ? */
        error = otus_cmd(sc, AR_CMD_FREQ_STRAT, NULL, 0, NULL);
        if (error != 0)
                return error;

        /* Reprogram PHY and RF on channel band or bandwidth changes. */
        if (sc->sc_bb_reset || c->ic_flags != sc->sc_curchan->ic_flags) {
                DPRINTFN(DBG_CHAN, sc, "band switch\n");

                /* Cold/Warm reset BB/ADDA. */
                otus_write(sc, 0x1d4004, sc->sc_bb_reset ? 0x800 : 0x400);
                if ((error = otus_write_barrier(sc)) != 0)
                        return error;

                otus_write(sc, 0x1d4004, 0);
                if ((error = otus_write_barrier(sc)) != 0)
                        return error;
                sc->sc_bb_reset = 0;

                if ((error = otus_program_phy(sc, c)) != 0) {
                        aprint_error_dev(sc->sc_dev,
                            "could not program PHY\n");
                        return error;
                }

                /* Select RF programming based on band. */
                if (IEEE80211_IS_CHAN_5GHZ(c))
                        vals = ar5416_banks_vals_5ghz;
                else
                        vals = ar5416_banks_vals_2ghz;
                for (i = 0; i < __arraycount(ar5416_banks_regs); i++)
                        otus_write(sc, AR_PHY(ar5416_banks_regs[i]), vals[i]);
                if ((error = otus_write_barrier(sc)) != 0) {
                        aprint_error_dev(sc->sc_dev, "could not program RF\n");
                        return error;
                }
                code = AR_CMD_RF_INIT;
        } else {
                code = AR_CMD_FREQUENCY;
        }

        if ((error = otus_set_rf_bank4(sc, c)) != 0)
                return error;

        tmp = (sc->sc_txmask == 0x5) ? 0x340 : 0x240;
        otus_write(sc, AR_PHY_TURBO, tmp);
        if ((error = otus_write_barrier(sc)) != 0)
                return error;

        /* Send firmware command to set channel. */
        cmd.freq = htole32((uint32_t)c->ic_freq * 1000);
        cmd.dynht2040 = htole32(0);
        cmd.htena = htole32(1);

        /* Set Delta Slope (exponent and mantissa). */
        coeff = (100 << 24) / c->ic_freq;
        otus_get_delta_slope(coeff, &exp, &man);
        cmd.dsc_exp = htole32(exp);
        cmd.dsc_man = htole32(man);
        DPRINTFN(DBG_CHAN, sc, "ds coeff=%u exp=%u man=%u\n",
            coeff, exp, man);

        /* For Short GI, coeff is 9/10 that of normal coeff. */
        coeff = (9 * coeff) / 10;
        otus_get_delta_slope(coeff, &exp, &man);
        cmd.dsc_shgi_exp = htole32(exp);
        cmd.dsc_shgi_man = htole32(man);
        DPRINTFN(DBG_CHAN, sc, "ds shgi coeff=%u exp=%u man=%u\n",
            coeff, exp, man);

        /* Set wait time for AGC and noise calibration (100 or 200ms). */
        cmd.check_loop_count = assoc ? htole32(2000) : htole32(1000);
        DPRINTFN(DBG_CHAN, sc, "%s\n",
            code == AR_CMD_RF_INIT ? "RF_INIT" : "FREQUENCY");
        error = otus_cmd(sc, code, &cmd, sizeof(cmd), &rsp);
        if (error != 0)
                return error;

        if ((rsp.status & htole32(AR_CAL_ERR_AGC | AR_CAL_ERR_NF_VAL)) != 0) {
                DPRINTFN(DBG_CHAN, sc, "status=%#x\n", le32toh(rsp.status));
                /* Force cold reset on next channel. */
                sc->sc_bb_reset = 1;
        }

#ifdef OTUS_DEBUG
        if (otus_debug & DBG_CHAN) {
                DPRINTFN(DBG_CHAN, sc, "calibration status=%#x\n",
                    le32toh(rsp.status));
                for (i = 0; i < 2; i++) {        /* 2 Rx chains */
                        /* Sign-extend 9-bit NF values. */
                        DPRINTFN(DBG_CHAN, sc, "noisefloor chain %d=%d\n",
                            i, (((int32_t)le32toh(rsp.nf[i])) << 4) >> 23);
                        DPRINTFN(DBG_CHAN, sc, "noisefloor ext chain %d=%d\n",
                            i, ((int32_t)le32toh(rsp.nf_ext[i])) >> 23);
                }
        }
#endif
        sc->sc_curchan = c;
        return 0;
}

#ifdef notyet
Static int
otus_set_key(struct ieee80211com *ic, struct ieee80211_node *ni,
    struct ieee80211_key *k)
{
        struct otus_softc *sc;
        struct otus_cmd_key cmd;

        sc = ic->ic_ifp->if_softc;

        DPRINTFN(DBG_FN, sc, "\n");

        /* Defer setting of WEP keys until interface is brought up. */
        if ((ic->ic_ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
            (IFF_UP | IFF_RUNNING))
                return 0;

        /* Do it in a process context. */
        cmd.key = *k;
        cmd.associd = (ni != NULL) ? ni->ni_associd : 0;
        otus_do_async(sc, otus_set_key_cb, &cmd, sizeof(cmd));
        return 0;
}

Static void
otus_set_key_cb(struct otus_softc *sc, void *arg)
{
        struct otus_cmd_key *cmd;
        struct ieee80211_key *k;
        struct ar_cmd_ekey key;
        uint16_t cipher;
        int error;

        DPRINTFN(DBG_FN, sc, "\n");

        cmd = arg;
        k = &cmd->key;

        memset(&key, 0, sizeof(key));
        if (k->k_flags & IEEE80211_KEY_GROUP) {
                key.uid = htole16(k->k_id);
                IEEE80211_ADDR_COPY(key.macaddr, sc->sc_ic.ic_myaddr);
                key.macaddr[0] |= 0x80;
        } else {
                key.uid = htole16(OTUS_UID(cmd->associd));
                IEEE80211_ADDR_COPY(key.macaddr, ni->ni_macaddr);
        }
        key.kix = htole16(0);
        /* Map net80211 cipher to hardware. */
        switch (k->k_cipher) {
        case IEEE80211_CIPHER_WEP40:
                cipher = AR_CIPHER_WEP64;
                break;
        case IEEE80211_CIPHER_WEP104:
                cipher = AR_CIPHER_WEP128;
                break;
        case IEEE80211_CIPHER_TKIP:
                cipher = AR_CIPHER_TKIP;
                break;
        case IEEE80211_CIPHER_CCMP:
                cipher = AR_CIPHER_AES;
                break;
        default:
                return;
        }
        key.cipher = htole16(cipher);
        memcpy(key.key, k->k_key, MIN(k->k_len, 16));
        error = otus_cmd(sc, AR_CMD_EKEY, &key, sizeof(key), NULL);
        if (error != 0 || k->k_cipher != IEEE80211_CIPHER_TKIP)
                return;

        /* TKIP: set Tx/Rx MIC Key. */
        key.kix = htole16(1);
        memcpy(key.key, k->k_key + 16, 16);
        (void)otus_cmd(sc, AR_CMD_EKEY, &key, sizeof(key), NULL);
}

Static void
otus_delete_key(struct ieee80211com *ic, struct ieee80211_node *ni,
    struct ieee80211_key *k)
{
        struct otus_softc *sc;
        struct otus_cmd_key cmd;

        sc = ic->ic_ifp->if_softc;

        DPRINTFN(DBG_FN, sc, "\n");

        if (!(ic->ic_ifp->if_flags & IFF_RUNNING) ||
            ic->ic_state != IEEE80211_S_RUN)
                return;        /* Nothing to do. */

        /* Do it in a process context. */
        cmd.key = *k;
        cmd.associd = (ni != NULL) ? ni->ni_associd : 0;
        otus_do_async(sc, otus_delete_key_cb, &cmd, sizeof(cmd));
}

Static void
otus_delete_key_cb(struct otus_softc *sc, void *arg)
{
        struct otus_cmd_key *cmd;
        struct ieee80211_key *k;
        uint32_t uid;

        DPRINTFN(DBG_FN, sc, "\n");

        cmd = arg;
        k = &cmd->key;
        if (k->k_flags & IEEE80211_KEY_GROUP)
                uid = htole32(k->k_id);
        else
                uid = htole32(OTUS_UID(cmd->associd));
        (void)otus_cmd(sc, AR_CMD_DKEY, &uid, sizeof(uid), NULL);
}
#endif /* notyet */

Static void
otus_calib_to(void *arg)
{
        struct otus_softc *sc;
        struct ieee80211com *ic;
        struct ieee80211_node *ni;
        struct otus_node *on;
        int s;

        sc = arg;

        DPRINTFN(DBG_FN, sc, "\n");

        if (sc->sc_dying)
                return;

        s = splnet();
        ic = &sc->sc_ic;
        ni = ic->ic_bss;
        on = (void *)ni;
        ieee80211_amrr_choose(&sc->sc_amrr, ni, &on->amn);
        splx(s);

        if (!sc->sc_dying)
                callout_schedule(&sc->sc_calib_to, hz);
}

Static int
otus_set_bssid(struct otus_softc *sc, const uint8_t *bssid)
{

        DPRINTFN(DBG_FN, sc, "\n");

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        otus_write(sc, AR_MAC_REG_BSSID_L,
            bssid[0] | bssid[1] << 8 | bssid[2] << 16 | bssid[3] << 24);
        otus_write(sc, AR_MAC_REG_BSSID_H,
            bssid[4] | bssid[5] << 8);
        return otus_write_barrier(sc);
}

Static int
otus_set_macaddr(struct otus_softc *sc, const uint8_t *addr)
{

        DPRINTFN(DBG_FN, sc, "\n");

        KASSERT(mutex_owned(&sc->sc_write_mtx));

        otus_write(sc, AR_MAC_REG_MAC_ADDR_L,
            addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24);
        otus_write(sc, AR_MAC_REG_MAC_ADDR_H,
            addr[4] | addr[5] << 8);
        return otus_write_barrier(sc);
}

#ifdef notyet
/* Default single-LED. */
Static void
otus_led_newstate_type1(struct otus_softc *sc)
{

        DPRINTFN(DBG_FN, sc, "\n");

        /* TBD */
}

/* NETGEAR, dual-LED. */
Static void
otus_led_newstate_type2(struct otus_softc *sc)
{

        DPRINTFN(DBG_FN, sc, "\n");

        /* TBD */
}
#endif /* notyet */

/*
 * NETGEAR, single-LED/3 colors (blue, red, purple.)
 */
Static void
otus_led_newstate_type3(struct otus_softc *sc)
{
        struct ieee80211com *ic;
        uint32_t led_state;

        DPRINTFN(DBG_FN, sc, "\n");

        ic = &sc->sc_ic;
        led_state = sc->sc_led_state;
        switch (ic->ic_state) {
        case IEEE80211_S_INIT:
                led_state = 0;
                break;
        case IEEE80211_S_SCAN:
                led_state ^= AR_GPIO_REG_DATA_LED0_ON | AR_GPIO_REG_DATA_LED1_ON;
                led_state &= ~(IEEE80211_IS_CHAN_2GHZ(sc->sc_curchan) ?
                    AR_GPIO_REG_DATA_LED1_ON : AR_GPIO_REG_DATA_LED0_ON);
                break;
        case IEEE80211_S_AUTH:
        case IEEE80211_S_ASSOC:
                /* XXX: Turn both LEDs on for AUTH and ASSOC? */
                led_state = AR_GPIO_REG_DATA_LED0_ON | AR_GPIO_REG_DATA_LED1_ON;
                break;
        case IEEE80211_S_RUN:
                led_state = IEEE80211_IS_CHAN_2GHZ(sc->sc_curchan) ?
                    AR_GPIO_REG_DATA_LED0_ON : AR_GPIO_REG_DATA_LED1_ON;
                break;
        }
        if (led_state != sc->sc_led_state) {
                otus_write(sc, AR_GPIO_REG_DATA, led_state);
                if (otus_write_barrier(sc) == 0)
                        sc->sc_led_state = led_state;
        }
}

Static int
otus_init(struct ifnet *ifp)
{
        struct otus_softc *sc;
        struct ieee80211com *ic;
        uint32_t filter, pm_mode, sniffer;
        int error;

        sc = ifp->if_softc;

        DPRINTFN(DBG_FN | DBG_INIT, sc, "\n");

        ic = &sc->sc_ic;

        mutex_enter(&sc->sc_write_mtx);

        /* Init host command ring. */
        mutex_spin_enter(&sc->sc_task_mtx);
        sc->sc_cmdq.cur = sc->sc_cmdq.next = sc->sc_cmdq.queued = 0;
        mutex_spin_exit(&sc->sc_task_mtx);

        if ((error = otus_init_mac(sc)) != 0) {
                mutex_exit(&sc->sc_write_mtx);
                aprint_error_dev(sc->sc_dev, "could not initialize MAC\n");
                return error;
        }

        IEEE80211_ADDR_COPY(ic->ic_myaddr, CLLADDR(ifp->if_sadl));
        (void)otus_set_macaddr(sc, ic->ic_myaddr);

        pm_mode = AR_MAC_REG_POWERMGT_DEFAULTS;
        sniffer = AR_MAC_REG_SNIFFER_DEFAULTS;
        filter = AR_MAC_REG_FTF_DEFAULTS;
        sc->sc_rx_error_msk = ~0;

        switch (ic->ic_opmode) {
#ifdef notyet
#ifndef IEEE80211_STA_ONLY
        case IEEE80211_M_HOSTAP:
                pm_mode |= AR_MAC_REG_POWERMGT_AP;
                break;
        case IEEE80211_M_IBSS:
                pm_mode |= AR_MAC_REG_POWERMGT_IBSS;        /* XXX: was 0x0 */
                break;
#endif
#endif
        case IEEE80211_M_STA:
                pm_mode |= AR_MAC_REG_POWERMGT_STA;
                break;
        case IEEE80211_M_MONITOR:
                sc->sc_rx_error_msk = ~AR_RX_ERROR_BAD_RA;
                filter = AR_MAC_REG_FTF_MONITOR;
                sniffer |= AR_MAC_REG_SNIFFER_ENABLE_PROMISC;
                break;
        default:
                aprint_error_dev(sc->sc_dev, "bad opmode: %d", ic->ic_opmode);
                return EOPNOTSUPP;        /* XXX: ??? */
        }
        otus_write(sc, AR_MAC_REG_POWERMANAGEMENT, pm_mode);
        otus_write(sc, AR_MAC_REG_FRAMETYPE_FILTER, filter);
        otus_write(sc, AR_MAC_REG_SNIFFER, sniffer);
        (void)otus_write_barrier(sc);

        sc->sc_bb_reset = 1;        /* Force cold reset. */
        if ((error = otus_set_chan(sc, ic->ic_curchan, 0)) != 0) {
                mutex_exit(&sc->sc_write_mtx);
                aprint_error_dev(sc->sc_dev, "could not set channel\n");
                return error;
        }

        /* Start Rx. */
        otus_write(sc, AR_MAC_REG_DMA, AR_MAC_REG_DMA_ENABLE);
        (void)otus_write_barrier(sc);
        mutex_exit(&sc->sc_write_mtx);

        ifp->if_flags &= ~IFF_OACTIVE;
        ifp->if_flags |= IFF_RUNNING;

        if (ic->ic_opmode == IEEE80211_M_MONITOR)
                ieee80211_new_state(ic, IEEE80211_S_RUN, -1);
        else
                ieee80211_new_state(ic, IEEE80211_S_SCAN, -1);

        return 0;
}

Static void
otus_stop(struct ifnet *ifp)
{
        struct otus_softc *sc;
        struct ieee80211com *ic;
        int s;

        sc = ifp->if_softc;

        DPRINTFN(DBG_FN, sc, "\n");

        ic = &sc->sc_ic;

        sc->sc_tx_timer = 0;
        ifp->if_timer = 0;
        ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);

        callout_halt(&sc->sc_scan_to, NULL);
        callout_halt(&sc->sc_calib_to, NULL);

        s = splusb();
        ieee80211_new_state(ic, IEEE80211_S_INIT, -1);
        otus_wait_async(sc);
        splx(s);

        /* Stop Rx. */
        mutex_enter(&sc->sc_write_mtx);
        otus_write(sc, AR_MAC_REG_DMA, AR_MAC_REG_DMA_OFF);
        (void)otus_write_barrier(sc);
        mutex_exit(&sc->sc_write_mtx);
}



























































































































































































 1252 

 1251 

 1250 












 1252 

 1252 
 1252 





 1252 

 1252 


 1252 
 1252 
 1252 
 1252 











 1086 



 1084 


 1086 


 1074 





 1085 

 1081 







 1086 













    9 

    9 
    8 




    9 




    9 
    8 







 1125 





 1124 
 1124 
 1126 
 1126 

 1125 







 1125 
    2 








  532 





  530 
  532 
  531 

  532 
  277 


  532 





  532 
  531 



  529 
    9 





















 2652 



 2553 

 2555 




 2553 












 2548 









 2568 
 2644 


   16 

 2489 












 2259 
 2261 


 2255 
 2266 

 2245 






    7 






















 2250 
 2249 
 2242 


    7 


















  227 



  173 
    8 


  165 





  165 
  227 









  912 



  878 
    5 


  883 
  592 






  348 





















































  332 










  332 

  297 

  332 

  332 






    7 
   11 









  330 







  330 

















  331 

  331 







   11 


















   11 











   11 

   11 


  322 










  330 

    2 









  331 



  307 












  206 






  204 


  204 








   92 









   92 


   90 







   91 
   90 

   89 








   66 

   67 

    2 
    2 


   66 







   69 










  335 








  333 
  279 



  135 


    3 
    3 





  135 
  135 



  113 
  113 
  111 

  309 








 1086 










 1086 

 1086 

 1086 


 1086 






 1082 
 1082 








 1077 



 1074 
   62 
   62 

 1074 
 1075 

 1070 

 1070 
 1044 


 1065 




    8 
    8 
 1070 











   75 

   75 






































































































































































 1076 
    8 





 1056 




 1056 
 1056 
 1056 













 1056 





 1056 



 1061 















  908 













  910 



  909 
  910 
  909 
  910 


  910 













  400 
  400 


  384 

  400 

  400 



  384 
  385 
  385 

  400 






 1056 







 1056 
 1056 





 1055 







  489 

  488 



  488 








  224 










   66 








































































   77 










   77 



   77 


































   57 






















   77 
   77 
   77 
   77 
   77 
   76 
   77 
   77 


   77 
   77 



   77 

   76 










    1 








   75 



   75 







   76 











    1 

   76 
   76 

   76 

   76 

   75 


   76 







   76 





   75 


   76 

   76 






   76 








   76 
   76 

   76 


















    8 

    8 
    7 




    8 









































































































    3 
































  199 



  197 

















    2 





    1 


  196 





  196 




    2 
  199 





























































   16 





   15 

    3 

    2 




   12 
    7 

    6 
    8 
    3 




   14 







  371 
  371 



   18 
  370 










    9 

    4 
    9 













    4 



    4 
    4 


    1 








    1 

    1 










    1 









  197 

  197 

  197 













   13 
   13 








    6 




















    3 
    1 














    2 











































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
/*        $NetBSD: kern_descrip.c,v 1.251 2021/06/29 22:40:53 dholland Exp $        */

/*-
 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_descrip.c        8.8 (Berkeley) 2/14/95
 */

/*
 * File descriptor management.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.251 2021/06/29 22:40:53 dholland Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/pool.h>
#include <sys/unistd.h>
#include <sys/resourcevar.h>
#include <sys/conf.h>
#include <sys/event.h>
#include <sys/kauth.h>
#include <sys/atomic.h>
#include <sys/syscallargs.h>
#include <sys/cpu.h>
#include <sys/kmem.h>
#include <sys/vnode.h>
#include <sys/sysctl.h>
#include <sys/ktrace.h>

/*
 * A list (head) of open files, counter, and lock protecting them.
 */
struct filelist                filehead        __cacheline_aligned;
static u_int                nfiles                __cacheline_aligned;
kmutex_t                filelist_lock        __cacheline_aligned;

static pool_cache_t        filedesc_cache        __read_mostly;
static pool_cache_t        file_cache        __read_mostly;
static pool_cache_t        fdfile_cache        __read_mostly;

static int        file_ctor(void *, void *, int);
static void        file_dtor(void *, void *);
static int        fdfile_ctor(void *, void *, int);
static void        fdfile_dtor(void *, void *);
static int        filedesc_ctor(void *, void *, int);
static void        filedesc_dtor(void *, void *);
static int        filedescopen(dev_t, int, int, lwp_t *);

static int sysctl_kern_file(SYSCTLFN_PROTO);
static int sysctl_kern_file2(SYSCTLFN_PROTO);
static void fill_file(struct file *, const struct file *);
static void fill_file2(struct kinfo_file *, const file_t *, const fdfile_t *,
                      int, pid_t);

const struct cdevsw filedesc_cdevsw = {
        .d_open = filedescopen,
        .d_close = noclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = noioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER | D_MPSAFE
};

/* For ease of reading. */
__strong_alias(fd_putvnode,fd_putfile)
__strong_alias(fd_putsock,fd_putfile)

/*
 * Initialize the descriptor system.
 */
void
fd_sys_init(void)
{
        static struct sysctllog *clog;

        mutex_init(&filelist_lock, MUTEX_DEFAULT, IPL_NONE);

        LIST_INIT(&filehead);

        file_cache = pool_cache_init(sizeof(file_t), coherency_unit, 0,
            0, "file", NULL, IPL_NONE, file_ctor, file_dtor, NULL);
        KASSERT(file_cache != NULL);

        fdfile_cache = pool_cache_init(sizeof(fdfile_t), coherency_unit, 0,
            PR_LARGECACHE, "fdfile", NULL, IPL_NONE, fdfile_ctor, fdfile_dtor,
            NULL);
        KASSERT(fdfile_cache != NULL);

        filedesc_cache = pool_cache_init(sizeof(filedesc_t), coherency_unit,
            0, 0, "filedesc", NULL, IPL_NONE, filedesc_ctor, filedesc_dtor,
            NULL);
        KASSERT(filedesc_cache != NULL);

        sysctl_createv(&clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "file",
                       SYSCTL_DESCR("System open file table"),
                       sysctl_kern_file, 0, NULL, 0,
                       CTL_KERN, KERN_FILE, CTL_EOL);
        sysctl_createv(&clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "file2",
                       SYSCTL_DESCR("System open file table"),
                       sysctl_kern_file2, 0, NULL, 0,
                       CTL_KERN, KERN_FILE2, CTL_EOL);
}

static bool
fd_isused(filedesc_t *fdp, unsigned fd)
{
        u_int off = fd >> NDENTRYSHIFT;

        KASSERT(fd < atomic_load_consume(&fdp->fd_dt)->dt_nfiles);

        return (fdp->fd_lomap[off] & (1U << (fd & NDENTRYMASK))) != 0;
}

/*
 * Verify that the bitmaps match the descriptor table.
 */
static inline void
fd_checkmaps(filedesc_t *fdp)
{
#ifdef DEBUG
        fdtab_t *dt;
        u_int fd;

        KASSERT(fdp->fd_refcnt <= 1 || mutex_owned(&fdp->fd_lock));

        dt = fdp->fd_dt;
        if (fdp->fd_refcnt == -1) {
                /*
                 * fd_free tears down the table without maintaining its bitmap.
                 */
                return;
        }
        for (fd = 0; fd < dt->dt_nfiles; fd++) {
                if (fd < NDFDFILE) {
                        KASSERT(dt->dt_ff[fd] ==
                            (fdfile_t *)fdp->fd_dfdfile[fd]);
                }
                if (dt->dt_ff[fd] == NULL) {
                        KASSERT(!fd_isused(fdp, fd));
                } else if (dt->dt_ff[fd]->ff_file != NULL) {
                        KASSERT(fd_isused(fdp, fd));
                }
        }
#endif
}

static int
fd_next_zero(filedesc_t *fdp, uint32_t *bitmap, int want, u_int bits)
{
        int i, off, maxoff;
        uint32_t sub;

        KASSERT(mutex_owned(&fdp->fd_lock));

        fd_checkmaps(fdp);

        if (want > bits)
                return -1;

        off = want >> NDENTRYSHIFT;
        i = want & NDENTRYMASK;
        if (i) {
                sub = bitmap[off] | ((u_int)~0 >> (NDENTRIES - i));
                if (sub != ~0)
                        goto found;
                off++;
        }

        maxoff = NDLOSLOTS(bits);
        while (off < maxoff) {
                if ((sub = bitmap[off]) != ~0)
                        goto found;
                off++;
        }

        return -1;

 found:
        return (off << NDENTRYSHIFT) + ffs(~sub) - 1;
}

static int
fd_last_set(filedesc_t *fd, int last)
{
        int off, i;
        fdfile_t **ff = fd->fd_dt->dt_ff;
        uint32_t *bitmap = fd->fd_lomap;

        KASSERT(mutex_owned(&fd->fd_lock));

        fd_checkmaps(fd);

        off = (last - 1) >> NDENTRYSHIFT;

        while (off >= 0 && !bitmap[off])
                off--;

        if (off < 0)
                return -1;

        i = ((off + 1) << NDENTRYSHIFT) - 1;
        if (i >= last)
                i = last - 1;

        /* XXX should use bitmap */
        while (i > 0 && (ff[i] == NULL || !ff[i]->ff_allocated))
                i--;

        return i;
}

static inline void
fd_used(filedesc_t *fdp, unsigned fd)
{
        u_int off = fd >> NDENTRYSHIFT;
        fdfile_t *ff;

        ff = fdp->fd_dt->dt_ff[fd];

        KASSERT(mutex_owned(&fdp->fd_lock));
        KASSERT((fdp->fd_lomap[off] & (1U << (fd & NDENTRYMASK))) == 0);
        KASSERT(ff != NULL);
        KASSERT(ff->ff_file == NULL);
        KASSERT(!ff->ff_allocated);

        ff->ff_allocated = true;
        fdp->fd_lomap[off] |= 1U << (fd & NDENTRYMASK);
        if (__predict_false(fdp->fd_lomap[off] == ~0)) {
                KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] &
                    (1U << (off & NDENTRYMASK))) == 0);
                fdp->fd_himap[off >> NDENTRYSHIFT] |= 1U << (off & NDENTRYMASK);
        }

        if ((int)fd > fdp->fd_lastfile) {
                fdp->fd_lastfile = fd;
        }

        fd_checkmaps(fdp);
}

static inline void
fd_unused(filedesc_t *fdp, unsigned fd)
{
        u_int off = fd >> NDENTRYSHIFT;
        fdfile_t *ff;

        ff = fdp->fd_dt->dt_ff[fd];

        KASSERT(mutex_owned(&fdp->fd_lock));
        KASSERT(ff != NULL);
        KASSERT(ff->ff_file == NULL);
        KASSERT(ff->ff_allocated);

        if (fd < fdp->fd_freefile) {
                fdp->fd_freefile = fd;
        }

        if (fdp->fd_lomap[off] == ~0) {
                KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] &
                    (1U << (off & NDENTRYMASK))) != 0);
                fdp->fd_himap[off >> NDENTRYSHIFT] &=
                    ~(1U << (off & NDENTRYMASK));
        }
        KASSERT((fdp->fd_lomap[off] & (1U << (fd & NDENTRYMASK))) != 0);
        fdp->fd_lomap[off] &= ~(1U << (fd & NDENTRYMASK));
        ff->ff_allocated = false;

        KASSERT(fd <= fdp->fd_lastfile);
        if (fd == fdp->fd_lastfile) {
                fdp->fd_lastfile = fd_last_set(fdp, fd);
        }
        fd_checkmaps(fdp);
}

/*
 * Look up the file structure corresponding to a file descriptor
 * and return the file, holding a reference on the descriptor.
 */
file_t *
fd_getfile(unsigned fd)
{
        filedesc_t *fdp;
        fdfile_t *ff;
        file_t *fp;
        fdtab_t *dt;

        /*
         * Look up the fdfile structure representing this descriptor.
         * We are doing this unlocked.  See fd_tryexpand().
         */
        fdp = curlwp->l_fd;
        dt = atomic_load_consume(&fdp->fd_dt);
        if (__predict_false(fd >= dt->dt_nfiles)) {
                return NULL;
        }
        ff = dt->dt_ff[fd];
        KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]);
        if (__predict_false(ff == NULL)) {
                return NULL;
        }

        /* Now get a reference to the descriptor. */
        if (fdp->fd_refcnt == 1) {
                /*
                 * Single threaded: don't need to worry about concurrent
                 * access (other than earlier calls to kqueue, which may
                 * hold a reference to the descriptor).
                 */
                ff->ff_refcnt++;
        } else {
                /*
                 * Multi threaded: issue a memory barrier to ensure that we
                 * acquire the file pointer _after_ adding a reference.  If
                 * no memory barrier, we could fetch a stale pointer.
                 */
                atomic_inc_uint(&ff->ff_refcnt);
#ifndef __HAVE_ATOMIC_AS_MEMBAR
                membar_enter();
#endif
        }

        /*
         * If the file is not open or is being closed then put the
         * reference back.
         */
        fp = atomic_load_consume(&ff->ff_file);
        if (__predict_true(fp != NULL)) {
                return fp;
        }
        fd_putfile(fd);
        return NULL;
}

/*
 * Release a reference to a file descriptor acquired with fd_getfile().
 */
void
fd_putfile(unsigned fd)
{
        filedesc_t *fdp;
        fdfile_t *ff;
        u_int u, v;

        fdp = curlwp->l_fd;
        KASSERT(fd < atomic_load_consume(&fdp->fd_dt)->dt_nfiles);
        ff = atomic_load_consume(&fdp->fd_dt)->dt_ff[fd];

        KASSERT(ff != NULL);
        KASSERT((ff->ff_refcnt & FR_MASK) > 0);
        KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]);

        if (fdp->fd_refcnt == 1) {
                /*
                 * Single threaded: don't need to worry about concurrent
                 * access (other than earlier calls to kqueue, which may
                 * hold a reference to the descriptor).
                 */
                if (__predict_false((ff->ff_refcnt & FR_CLOSING) != 0)) {
                        fd_close(fd);
                        return;
                }
                ff->ff_refcnt--;
                return;
        }

        /*
         * Ensure that any use of the file is complete and globally
         * visible before dropping the final reference.  If no membar,
         * the current CPU could still access memory associated with
         * the file after it has been freed or recycled by another
         * CPU.
         */
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_exit();
#endif

        /*
         * Be optimistic and start out with the assumption that no other
         * threads are trying to close the descriptor.  If the CAS fails,
         * we lost a race and/or it's being closed.
         */
        for (u = ff->ff_refcnt & FR_MASK;; u = v) {
                v = atomic_cas_uint(&ff->ff_refcnt, u, u - 1);
                if (__predict_true(u == v)) {
                        return;
                }
                if (__predict_false((v & FR_CLOSING) != 0)) {
                        break;
                }
        }

        /* Another thread is waiting to close the file: join it. */
        (void)fd_close(fd);
}

/*
 * Convenience wrapper around fd_getfile() that returns reference
 * to a vnode.
 */
int
fd_getvnode(unsigned fd, file_t **fpp)
{
        vnode_t *vp;
        file_t *fp;

        fp = fd_getfile(fd);
        if (__predict_false(fp == NULL)) {
                return EBADF;
        }
        if (__predict_false(fp->f_type != DTYPE_VNODE)) {
                fd_putfile(fd);
                return EINVAL;
        }
        vp = fp->f_vnode;
        if (__predict_false(vp->v_type == VBAD)) {
                /* XXX Is this case really necessary? */
                fd_putfile(fd);
                return EBADF;
        }
        *fpp = fp;
        return 0;
}

/*
 * Convenience wrapper around fd_getfile() that returns reference
 * to a socket.
 */
int
fd_getsock1(unsigned fd, struct socket **sop, file_t **fp)
{
        *fp = fd_getfile(fd);
        if (__predict_false(*fp == NULL)) {
                return EBADF;
        }
        if (__predict_false((*fp)->f_type != DTYPE_SOCKET)) {
                fd_putfile(fd);
                return ENOTSOCK;
        }
        *sop = (*fp)->f_socket;
        return 0;
}

int
fd_getsock(unsigned fd, struct socket **sop)
{
        file_t *fp;
        return fd_getsock1(fd, sop, &fp);
}

/*
 * Look up the file structure corresponding to a file descriptor
 * and return it with a reference held on the file, not the
 * descriptor.
 *
 * This is heavyweight and only used when accessing descriptors
 * from a foreign process.  The caller must ensure that `p' does
 * not exit or fork across this call.
 *
 * To release the file (not descriptor) reference, use closef().
 */
file_t *
fd_getfile2(proc_t *p, unsigned fd)
{
        filedesc_t *fdp;
        fdfile_t *ff;
        file_t *fp;
        fdtab_t *dt;

        fdp = p->p_fd;
        mutex_enter(&fdp->fd_lock);
        dt = fdp->fd_dt;
        if (fd >= dt->dt_nfiles) {
                mutex_exit(&fdp->fd_lock);
                return NULL;
        }
        if ((ff = dt->dt_ff[fd]) == NULL) {
                mutex_exit(&fdp->fd_lock);
                return NULL;
        }
        if ((fp = atomic_load_consume(&ff->ff_file)) == NULL) {
                mutex_exit(&fdp->fd_lock);
                return NULL;
        }
        mutex_enter(&fp->f_lock);
        fp->f_count++;
        mutex_exit(&fp->f_lock);
        mutex_exit(&fdp->fd_lock);

        return fp;
}

/*
 * Internal form of close.  Must be called with a reference to the
 * descriptor, and will drop the reference.  When all descriptor
 * references are dropped, releases the descriptor slot and a single
 * reference to the file structure.
 */
int
fd_close(unsigned fd)
{
        struct flock lf;
        filedesc_t *fdp;
        fdfile_t *ff;
        file_t *fp;
        proc_t *p;
        lwp_t *l;
        u_int refcnt;

        l = curlwp;
        p = l->l_proc;
        fdp = l->l_fd;
        ff = atomic_load_consume(&fdp->fd_dt)->dt_ff[fd];

        KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]);

        mutex_enter(&fdp->fd_lock);
        KASSERT((ff->ff_refcnt & FR_MASK) > 0);
        fp = atomic_load_consume(&ff->ff_file);
        if (__predict_false(fp == NULL)) {
                /*
                 * Another user of the file is already closing, and is
                 * waiting for other users of the file to drain.  Release
                 * our reference, and wake up the closer.
                 */
                atomic_dec_uint(&ff->ff_refcnt);
                cv_broadcast(&ff->ff_closing);
                mutex_exit(&fdp->fd_lock);

                /*
                 * An application error, so pretend that the descriptor
                 * was already closed.  We can't safely wait for it to
                 * be closed without potentially deadlocking.
                 */
                return (EBADF);
        }
        KASSERT((ff->ff_refcnt & FR_CLOSING) == 0);

        /*
         * There may be multiple users of this file within the process.
         * Notify existing and new users that the file is closing.  This
         * will prevent them from adding additional uses to this file
         * while we are closing it.
         */
        ff->ff_file = NULL;
        ff->ff_exclose = false;

        /*
         * We expect the caller to hold a descriptor reference - drop it.
         * The reference count may increase beyond zero at this point due
         * to an erroneous descriptor reference by an application, but
         * fd_getfile() will notice that the file is being closed and drop
         * the reference again.
         */
        if (fdp->fd_refcnt == 1) {
                /* Single threaded. */
                refcnt = --(ff->ff_refcnt);
        } else {
                /* Multi threaded. */
#ifndef __HAVE_ATOMIC_AS_MEMBAR
                membar_producer();
#endif
                refcnt = atomic_dec_uint_nv(&ff->ff_refcnt);
        }
        if (__predict_false(refcnt != 0)) {
                /*
                 * Wait for other references to drain.  This is typically
                 * an application error - the descriptor is being closed
                 * while still in use.
                 * (Or just a threaded application trying to unblock its
                 * thread that sleeps in (say) accept()).
                 */
                atomic_or_uint(&ff->ff_refcnt, FR_CLOSING);

                /*
                 * Remove any knotes attached to the file.  A knote
                 * attached to the descriptor can hold references on it.
                 */
                mutex_exit(&fdp->fd_lock);
                if (!SLIST_EMPTY(&ff->ff_knlist)) {
                        knote_fdclose(fd);
                }

                /*
                 * Since the file system code doesn't know which fd
                 * each request came from (think dup()), we have to
                 * ask it to return ERESTART for any long-term blocks.
                 * The re-entry through read/write/etc will detect the
                 * closed fd and return EBAFD.
                 * Blocked partial writes may return a short length.
                 */
                (*fp->f_ops->fo_restart)(fp);
                mutex_enter(&fdp->fd_lock);

                /*
                 * We need to see the count drop to zero at least once,
                 * in order to ensure that all pre-existing references
                 * have been drained.  New references past this point are
                 * of no interest.
                 * XXX (dsl) this may need to call fo_restart() after a
                 * timeout to guarantee that all the system calls exit.
                 */
                while ((ff->ff_refcnt & FR_MASK) != 0) {
                        cv_wait(&ff->ff_closing, &fdp->fd_lock);
                }
                atomic_and_uint(&ff->ff_refcnt, ~FR_CLOSING);
        } else {
                /* If no references, there must be no knotes. */
                KASSERT(SLIST_EMPTY(&ff->ff_knlist));
        }

        /*
         * POSIX record locking dictates that any close releases ALL
         * locks owned by this process.  This is handled by setting
         * a flag in the unlock to free ONLY locks obeying POSIX
         * semantics, and not to free BSD-style file locks.
         * If the descriptor was in a message, POSIX-style locks
         * aren't passed with the descriptor.
         */
        if (__predict_false((p->p_flag & PK_ADVLOCK) != 0 &&
            fp->f_type == DTYPE_VNODE)) {
                lf.l_whence = SEEK_SET;
                lf.l_start = 0;
                lf.l_len = 0;
                lf.l_type = F_UNLCK;
                mutex_exit(&fdp->fd_lock);
                (void)VOP_ADVLOCK(fp->f_vnode, p, F_UNLCK, &lf, F_POSIX);
                mutex_enter(&fdp->fd_lock);
        }

        /* Free descriptor slot. */
        fd_unused(fdp, fd);
        mutex_exit(&fdp->fd_lock);

        /* Now drop reference to the file itself. */
        return closef(fp);
}

/*
 * Duplicate a file descriptor.
 */
int
fd_dup(file_t *fp, int minfd, int *newp, bool exclose)
{
        proc_t *p = curproc;
        fdtab_t *dt;
        int error;

        while ((error = fd_alloc(p, minfd, newp)) != 0) {
                if (error != ENOSPC) {
                        return error;
                }
                fd_tryexpand(p);
        }

        dt = atomic_load_consume(&curlwp->l_fd->fd_dt);
        dt->dt_ff[*newp]->ff_exclose = exclose;
        fd_affix(p, fp, *newp);
        return 0;
}

/*
 * dup2 operation.
 */
int
fd_dup2(file_t *fp, unsigned newfd, int flags)
{
        filedesc_t *fdp = curlwp->l_fd;
        fdfile_t *ff;
        fdtab_t *dt;

        if (flags & ~(O_CLOEXEC|O_NONBLOCK|O_NOSIGPIPE))
                return EINVAL;
        /*
         * Ensure there are enough slots in the descriptor table,
         * and allocate an fdfile_t up front in case we need it.
         */
        while (newfd >= atomic_load_consume(&fdp->fd_dt)->dt_nfiles) {
                fd_tryexpand(curproc);
        }
        ff = pool_cache_get(fdfile_cache, PR_WAITOK);

        /*
         * If there is already a file open, close it.  If the file is
         * half open, wait for it to be constructed before closing it.
         * XXX Potential for deadlock here?
         */
        mutex_enter(&fdp->fd_lock);
        while (fd_isused(fdp, newfd)) {
                mutex_exit(&fdp->fd_lock);
                if (fd_getfile(newfd) != NULL) {
                        (void)fd_close(newfd);
                } else {
                        /*
                         * Crummy, but unlikely to happen.
                         * Can occur if we interrupt another
                         * thread while it is opening a file.
                         */
                        kpause("dup2", false, 1, NULL);
                }
                mutex_enter(&fdp->fd_lock);
        }
        dt = fdp->fd_dt;
        if (dt->dt_ff[newfd] == NULL) {
                KASSERT(newfd >= NDFDFILE);
                dt->dt_ff[newfd] = ff;
                ff = NULL;
        }
        fd_used(fdp, newfd);
        mutex_exit(&fdp->fd_lock);

        dt->dt_ff[newfd]->ff_exclose = (flags & O_CLOEXEC) != 0;
        fp->f_flag |= flags & (FNONBLOCK|FNOSIGPIPE);
        /* Slot is now allocated.  Insert copy of the file. */
        fd_affix(curproc, fp, newfd);
        if (ff != NULL) {
                pool_cache_put(fdfile_cache, ff);
        }
        return 0;
}

/*
 * Drop reference to a file structure.
 */
int
closef(file_t *fp)
{
        struct flock lf;
        int error;

        /*
         * Drop reference.  If referenced elsewhere it's still open
         * and we have nothing more to do.
         */
        mutex_enter(&fp->f_lock);
        KASSERT(fp->f_count > 0);
        if (--fp->f_count > 0) {
                mutex_exit(&fp->f_lock);
                return 0;
        }
        KASSERT(fp->f_count == 0);
        mutex_exit(&fp->f_lock);

        /* We held the last reference - release locks, close and free. */
        if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
                lf.l_whence = SEEK_SET;
                lf.l_start = 0;
                lf.l_len = 0;
                lf.l_type = F_UNLCK;
                (void)VOP_ADVLOCK(fp->f_vnode, fp, F_UNLCK, &lf, F_FLOCK);
        }
        if (fp->f_ops != NULL) {
                error = (*fp->f_ops->fo_close)(fp);
        } else {
                error = 0;
        }
        KASSERT(fp->f_count == 0);
        KASSERT(fp->f_cred != NULL);
        pool_cache_put(file_cache, fp);

        return error;
}

/*
 * Allocate a file descriptor for the process.
 */
int
fd_alloc(proc_t *p, int want, int *result)
{
        filedesc_t *fdp = p->p_fd;
        int i, lim, last, error, hi;
        u_int off;
        fdtab_t *dt;

        KASSERT(p == curproc || p == &proc0);

        /*
         * Search for a free descriptor starting at the higher
         * of want or fd_freefile.
         */
        mutex_enter(&fdp->fd_lock);
        fd_checkmaps(fdp);
        dt = fdp->fd_dt;
        KASSERT(dt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]);
        lim = uimin((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
        last = uimin(dt->dt_nfiles, lim);
        for (;;) {
                if ((i = want) < fdp->fd_freefile)
                        i = fdp->fd_freefile;
                off = i >> NDENTRYSHIFT;
                hi = fd_next_zero(fdp, fdp->fd_himap, off,
                    (last + NDENTRIES - 1) >> NDENTRYSHIFT);
                if (hi == -1)
                        break;
                i = fd_next_zero(fdp, &fdp->fd_lomap[hi],
                    hi > off ? 0 : i & NDENTRYMASK, NDENTRIES);
                if (i == -1) {
                        /*
                         * Free file descriptor in this block was
                         * below want, try again with higher want.
                         */
                        want = (hi + 1) << NDENTRYSHIFT;
                        continue;
                }
                i += (hi << NDENTRYSHIFT);
                if (i >= last) {
                        break;
                }
                if (dt->dt_ff[i] == NULL) {
                        KASSERT(i >= NDFDFILE);
                        dt->dt_ff[i] = pool_cache_get(fdfile_cache, PR_WAITOK);
                }
                KASSERT(dt->dt_ff[i]->ff_file == NULL);
                fd_used(fdp, i);
                if (want <= fdp->fd_freefile) {
                        fdp->fd_freefile = i;
                }
                *result = i;
                KASSERT(i >= NDFDFILE ||
                    dt->dt_ff[i] == (fdfile_t *)fdp->fd_dfdfile[i]);
                fd_checkmaps(fdp);
                mutex_exit(&fdp->fd_lock);
                return 0;
        }

        /* No space in current array.  Let the caller expand and retry. */
        error = (dt->dt_nfiles >= lim) ? EMFILE : ENOSPC;
        mutex_exit(&fdp->fd_lock);
        return error;
}

/*
 * Allocate memory for a descriptor table.
 */
static fdtab_t *
fd_dtab_alloc(int n)
{
        fdtab_t *dt;
        size_t sz;

        KASSERT(n > NDFILE);

        sz = sizeof(*dt) + (n - NDFILE) * sizeof(dt->dt_ff[0]);
        dt = kmem_alloc(sz, KM_SLEEP);
#ifdef DIAGNOSTIC
        memset(dt, 0xff, sz);
#endif
        dt->dt_nfiles = n;
        dt->dt_link = NULL;
        return dt;
}

/*
 * Free a descriptor table, and all tables linked for deferred free.
 */
static void
fd_dtab_free(fdtab_t *dt)
{
        fdtab_t *next;
        size_t sz;

        do {
                next = dt->dt_link;
                KASSERT(dt->dt_nfiles > NDFILE);
                sz = sizeof(*dt) +
                    (dt->dt_nfiles - NDFILE) * sizeof(dt->dt_ff[0]);
#ifdef DIAGNOSTIC
                memset(dt, 0xff, sz);
#endif
                kmem_free(dt, sz);
                dt = next;
        } while (dt != NULL);
}

/*
 * Allocate descriptor bitmap.
 */
static void
fd_map_alloc(int n, uint32_t **lo, uint32_t **hi)
{
        uint8_t *ptr;
        size_t szlo, szhi;

        KASSERT(n > NDENTRIES);

        szlo = NDLOSLOTS(n) * sizeof(uint32_t);
        szhi = NDHISLOTS(n) * sizeof(uint32_t);
        ptr = kmem_alloc(szlo + szhi, KM_SLEEP);
        *lo = (uint32_t *)ptr;
        *hi = (uint32_t *)(ptr + szlo);
}

/*
 * Free descriptor bitmap.
 */
static void
fd_map_free(int n, uint32_t *lo, uint32_t *hi)
{
        size_t szlo, szhi;

        KASSERT(n > NDENTRIES);

        szlo = NDLOSLOTS(n) * sizeof(uint32_t);
        szhi = NDHISLOTS(n) * sizeof(uint32_t);
        KASSERT(hi == (uint32_t *)((uint8_t *)lo + szlo));
        kmem_free(lo, szlo + szhi);
}

/*
 * Expand a process' descriptor table.
 */
void
fd_tryexpand(proc_t *p)
{
        filedesc_t *fdp;
        int i, numfiles, oldnfiles;
        fdtab_t *newdt, *dt;
        uint32_t *newhimap, *newlomap;

        KASSERT(p == curproc || p == &proc0);

        fdp = p->p_fd;
        newhimap = NULL;
        newlomap = NULL;
        oldnfiles = atomic_load_consume(&fdp->fd_dt)->dt_nfiles;

        if (oldnfiles < NDEXTENT)
                numfiles = NDEXTENT;
        else
                numfiles = 2 * oldnfiles;

        newdt = fd_dtab_alloc(numfiles);
        if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) {
                fd_map_alloc(numfiles, &newlomap, &newhimap);
        }

        mutex_enter(&fdp->fd_lock);
        dt = fdp->fd_dt;
        KASSERT(dt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]);
        if (dt->dt_nfiles != oldnfiles) {
                /* fdp changed; caller must retry */
                mutex_exit(&fdp->fd_lock);
                fd_dtab_free(newdt);
                if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) {
                        fd_map_free(numfiles, newlomap, newhimap);
                }
                return;
        }

        /* Copy the existing descriptor table and zero the new portion. */
        i = sizeof(fdfile_t *) * oldnfiles;
        memcpy(newdt->dt_ff, dt->dt_ff, i);
        memset((uint8_t *)newdt->dt_ff + i, 0,
            numfiles * sizeof(fdfile_t *) - i);

        /*
         * Link old descriptor array into list to be discarded.  We defer
         * freeing until the last reference to the descriptor table goes
         * away (usually process exit).  This allows us to do lockless
         * lookups in fd_getfile().
         */
        if (oldnfiles > NDFILE) {
                if (fdp->fd_refcnt > 1) {
                        newdt->dt_link = dt;
                } else {
                        fd_dtab_free(dt);
                }
        }

        if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) {
                i = NDHISLOTS(oldnfiles) * sizeof(uint32_t);
                memcpy(newhimap, fdp->fd_himap, i);
                memset((uint8_t *)newhimap + i, 0,
                    NDHISLOTS(numfiles) * sizeof(uint32_t) - i);

                i = NDLOSLOTS(oldnfiles) * sizeof(uint32_t);
                memcpy(newlomap, fdp->fd_lomap, i);
                memset((uint8_t *)newlomap + i, 0,
                    NDLOSLOTS(numfiles) * sizeof(uint32_t) - i);

                if (NDHISLOTS(oldnfiles) > NDHISLOTS(NDFILE)) {
                        fd_map_free(oldnfiles, fdp->fd_lomap, fdp->fd_himap);
                }
                fdp->fd_himap = newhimap;
                fdp->fd_lomap = newlomap;
        }

        /*
         * All other modifications must become globally visible before
         * the change to fd_dt.  See fd_getfile().
         */
        atomic_store_release(&fdp->fd_dt, newdt);
        KASSERT(newdt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]);
        fd_checkmaps(fdp);
        mutex_exit(&fdp->fd_lock);
}

/*
 * Create a new open file structure and allocate a file descriptor
 * for the current process.
 */
int
fd_allocfile(file_t **resultfp, int *resultfd)
{
        proc_t *p = curproc;
        kauth_cred_t cred;
        file_t *fp;
        int error;

        while ((error = fd_alloc(p, 0, resultfd)) != 0) {
                if (error != ENOSPC) {
                        return error;
                }
                fd_tryexpand(p);
        }

        fp = pool_cache_get(file_cache, PR_WAITOK);
        if (fp == NULL) {
                fd_abort(p, NULL, *resultfd);
                return ENFILE;
        }
        KASSERT(fp->f_count == 0);
        KASSERT(fp->f_msgcount == 0);
        KASSERT(fp->f_unpcount == 0);

        /* Replace cached credentials if not what we need. */
        cred = curlwp->l_cred;
        if (__predict_false(cred != fp->f_cred)) {
                kauth_cred_free(fp->f_cred);
                kauth_cred_hold(cred);
                fp->f_cred = cred;
        }

        /*
         * Don't allow recycled files to be scanned.
         * See uipc_usrreq.c.
         */
        if (__predict_false((fp->f_flag & FSCAN) != 0)) {
                mutex_enter(&fp->f_lock);
                atomic_and_uint(&fp->f_flag, ~FSCAN);
                mutex_exit(&fp->f_lock);
        }

        fp->f_advice = 0;
        fp->f_offset = 0;
        *resultfp = fp;

        return 0;
}

/*
 * Successful creation of a new descriptor: make visible to the process.
 */
void
fd_affix(proc_t *p, file_t *fp, unsigned fd)
{
        fdfile_t *ff;
        filedesc_t *fdp;
        fdtab_t *dt;

        KASSERT(p == curproc || p == &proc0);

        /* Add a reference to the file structure. */
        mutex_enter(&fp->f_lock);
        fp->f_count++;
        mutex_exit(&fp->f_lock);

        /*
         * Insert the new file into the descriptor slot.
         *
         * The memory barriers provided by lock activity in this routine
         * ensure that any updates to the file structure become globally
         * visible before the file becomes visible to other LWPs in the
         * current process; otherwise we would set ff->ff_file with
         * atomic_store_release(&ff->ff_file, fp) at the bottom.
         */
        fdp = p->p_fd;
        dt = atomic_load_consume(&fdp->fd_dt);
        ff = dt->dt_ff[fd];

        KASSERT(ff != NULL);
        KASSERT(ff->ff_file == NULL);
        KASSERT(ff->ff_allocated);
        KASSERT(fd_isused(fdp, fd));
        KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]);

        /* No need to lock in order to make file initially visible. */
        ff->ff_file = fp;
}

/*
 * Abort creation of a new descriptor: free descriptor slot and file.
 */
void
fd_abort(proc_t *p, file_t *fp, unsigned fd)
{
        filedesc_t *fdp;
        fdfile_t *ff;

        KASSERT(p == curproc || p == &proc0);

        fdp = p->p_fd;
        ff = atomic_load_consume(&fdp->fd_dt)->dt_ff[fd];
        ff->ff_exclose = false;

        KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]);

        mutex_enter(&fdp->fd_lock);
        KASSERT(fd_isused(fdp, fd));
        fd_unused(fdp, fd);
        mutex_exit(&fdp->fd_lock);

        if (fp != NULL) {
                KASSERT(fp->f_count == 0);
                KASSERT(fp->f_cred != NULL);
                pool_cache_put(file_cache, fp);
        }
}

static int
file_ctor(void *arg, void *obj, int flags)
{
        file_t *fp = obj;

        memset(fp, 0, sizeof(*fp));

        mutex_enter(&filelist_lock);
        if (__predict_false(nfiles >= maxfiles)) {
                mutex_exit(&filelist_lock);
                tablefull("file", "increase kern.maxfiles or MAXFILES");
                return ENFILE;
        }
        nfiles++;
        LIST_INSERT_HEAD(&filehead, fp, f_list);
        mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE);
        fp->f_cred = curlwp->l_cred;
        kauth_cred_hold(fp->f_cred);
        mutex_exit(&filelist_lock);

        return 0;
}

static void
file_dtor(void *arg, void *obj)
{
        file_t *fp = obj;

        mutex_enter(&filelist_lock);
        nfiles--;
        LIST_REMOVE(fp, f_list);
        mutex_exit(&filelist_lock);

        KASSERT(fp->f_count == 0);
        kauth_cred_free(fp->f_cred);
        mutex_destroy(&fp->f_lock);
}

static int
fdfile_ctor(void *arg, void *obj, int flags)
{
        fdfile_t *ff = obj;

        memset(ff, 0, sizeof(*ff));
        cv_init(&ff->ff_closing, "fdclose");

        return 0;
}

static void
fdfile_dtor(void *arg, void *obj)
{
        fdfile_t *ff = obj;

        cv_destroy(&ff->ff_closing);
}

file_t *
fgetdummy(void)
{
        file_t *fp;

        fp = kmem_zalloc(sizeof(*fp), KM_SLEEP);
        mutex_init(&fp->f_lock, MUTEX_DEFAULT, IPL_NONE);
        return fp;
}

void
fputdummy(file_t *fp)
{

        mutex_destroy(&fp->f_lock);
        kmem_free(fp, sizeof(*fp));
}

/*
 * Create an initial filedesc structure.
 */
filedesc_t *
fd_init(filedesc_t *fdp)
{
#ifdef DIAGNOSTIC
        unsigned fd;
#endif

        if (__predict_true(fdp == NULL)) {
                fdp = pool_cache_get(filedesc_cache, PR_WAITOK);
        } else {
                KASSERT(fdp == &filedesc0);
                filedesc_ctor(NULL, fdp, PR_WAITOK);
        }

#ifdef DIAGNOSTIC
        KASSERT(fdp->fd_lastfile == -1);
        KASSERT(fdp->fd_lastkqfile == -1);
        KASSERT(fdp->fd_knhash == NULL);
        KASSERT(fdp->fd_freefile == 0);
        KASSERT(fdp->fd_exclose == false);
        KASSERT(fdp->fd_dt == &fdp->fd_dtbuiltin);
        KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE);
        for (fd = 0; fd < NDFDFILE; fd++) {
                KASSERT(fdp->fd_dtbuiltin.dt_ff[fd] ==
                    (fdfile_t *)fdp->fd_dfdfile[fd]);
        }
        for (fd = NDFDFILE; fd < NDFILE; fd++) {
                KASSERT(fdp->fd_dtbuiltin.dt_ff[fd] == NULL);
        }
        KASSERT(fdp->fd_himap == fdp->fd_dhimap);
        KASSERT(fdp->fd_lomap == fdp->fd_dlomap);
#endif        /* DIAGNOSTIC */

        fdp->fd_refcnt = 1;
        fd_checkmaps(fdp);

        return fdp;
}

/*
 * Initialize a file descriptor table.
 */
static int
filedesc_ctor(void *arg, void *obj, int flag)
{
        filedesc_t *fdp = obj;
        fdfile_t **ffp;
        int i;

        memset(fdp, 0, sizeof(*fdp));
        mutex_init(&fdp->fd_lock, MUTEX_DEFAULT, IPL_NONE);
        fdp->fd_lastfile = -1;
        fdp->fd_lastkqfile = -1;
        fdp->fd_dt = &fdp->fd_dtbuiltin;
        fdp->fd_dtbuiltin.dt_nfiles = NDFILE;
        fdp->fd_himap = fdp->fd_dhimap;
        fdp->fd_lomap = fdp->fd_dlomap;

        CTASSERT(sizeof(fdp->fd_dfdfile[0]) >= sizeof(fdfile_t));
        for (i = 0, ffp = fdp->fd_dt->dt_ff; i < NDFDFILE; i++, ffp++) {
                *ffp = (fdfile_t *)fdp->fd_dfdfile[i];
                (void)fdfile_ctor(NULL, fdp->fd_dfdfile[i], PR_WAITOK);
        }

        return 0;
}

static void
filedesc_dtor(void *arg, void *obj)
{
        filedesc_t *fdp = obj;
        int i;

        for (i = 0; i < NDFDFILE; i++) {
                fdfile_dtor(NULL, fdp->fd_dfdfile[i]);
        }

        mutex_destroy(&fdp->fd_lock);
}

/*
 * Make p share curproc's filedesc structure.
 */
void
fd_share(struct proc *p)
{
        filedesc_t *fdp;

        fdp = curlwp->l_fd;
        p->p_fd = fdp;
        atomic_inc_uint(&fdp->fd_refcnt);
}

/*
 * Acquire a hold on a filedesc structure.
 */
void
fd_hold(lwp_t *l)
{
        filedesc_t *fdp = l->l_fd;

        atomic_inc_uint(&fdp->fd_refcnt);
}

/*
 * Copy a filedesc structure.
 */
filedesc_t *
fd_copy(void)
{
        filedesc_t *newfdp, *fdp;
        fdfile_t *ff, **ffp, **nffp, *ff2;
        int i, j, numfiles, lastfile, newlast;
        file_t *fp;
        fdtab_t *newdt;

        fdp = curproc->p_fd;
        newfdp = pool_cache_get(filedesc_cache, PR_WAITOK);
        newfdp->fd_refcnt = 1;

#ifdef DIAGNOSTIC
        KASSERT(newfdp->fd_lastfile == -1);
        KASSERT(newfdp->fd_lastkqfile == -1);
        KASSERT(newfdp->fd_knhash == NULL);
        KASSERT(newfdp->fd_freefile == 0);
        KASSERT(newfdp->fd_exclose == false);
        KASSERT(newfdp->fd_dt == &newfdp->fd_dtbuiltin);
        KASSERT(newfdp->fd_dtbuiltin.dt_nfiles == NDFILE);
        for (i = 0; i < NDFDFILE; i++) {
                KASSERT(newfdp->fd_dtbuiltin.dt_ff[i] ==
                    (fdfile_t *)&newfdp->fd_dfdfile[i]);
        }
        for (i = NDFDFILE; i < NDFILE; i++) {
                KASSERT(newfdp->fd_dtbuiltin.dt_ff[i] == NULL);
        }
#endif        /* DIAGNOSTIC */

        mutex_enter(&fdp->fd_lock);
        fd_checkmaps(fdp);
        numfiles = fdp->fd_dt->dt_nfiles;
        lastfile = fdp->fd_lastfile;

        /*
         * If the number of open files fits in the internal arrays
         * of the open file structure, use them, otherwise allocate
         * additional memory for the number of descriptors currently
         * in use.
         */
        if (lastfile < NDFILE) {
                i = NDFILE;
                newdt = newfdp->fd_dt;
                KASSERT(newfdp->fd_dt == &newfdp->fd_dtbuiltin);
        } else {
                /*
                 * Compute the smallest multiple of NDEXTENT needed
                 * for the file descriptors currently in use,
                 * allowing the table to shrink.
                 */
                i = numfiles;
                while (i >= 2 * NDEXTENT && i > lastfile * 2) {
                        i /= 2;
                }
                KASSERT(i > NDFILE);
                newdt = fd_dtab_alloc(i);
                newfdp->fd_dt = newdt;
                memcpy(newdt->dt_ff, newfdp->fd_dtbuiltin.dt_ff,
                    NDFDFILE * sizeof(fdfile_t **));
                memset(newdt->dt_ff + NDFDFILE, 0,
                    (i - NDFDFILE) * sizeof(fdfile_t **));
        }
        if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) {
                newfdp->fd_himap = newfdp->fd_dhimap;
                newfdp->fd_lomap = newfdp->fd_dlomap;
        } else {
                fd_map_alloc(i, &newfdp->fd_lomap, &newfdp->fd_himap);
                KASSERT(i >= NDENTRIES * NDENTRIES);
                memset(newfdp->fd_himap, 0, NDHISLOTS(i)*sizeof(uint32_t));
                memset(newfdp->fd_lomap, 0, NDLOSLOTS(i)*sizeof(uint32_t));
        }
        newfdp->fd_freefile = fdp->fd_freefile;
        newfdp->fd_exclose = fdp->fd_exclose;

        ffp = fdp->fd_dt->dt_ff;
        nffp = newdt->dt_ff;
        newlast = -1;
        for (i = 0; i <= lastfile; i++, ffp++, nffp++) {
                KASSERT(i >= NDFDFILE ||
                    *nffp == (fdfile_t *)newfdp->fd_dfdfile[i]);
                ff = *ffp;
                if (ff == NULL ||
                    (fp = atomic_load_consume(&ff->ff_file)) == NULL) {
                        /* Descriptor unused, or descriptor half open. */
                        KASSERT(!fd_isused(newfdp, i));
                        continue;
                }
                if (__predict_false(fp->f_type == DTYPE_KQUEUE)) {
                        /* kqueue descriptors cannot be copied. */
                        if (i < newfdp->fd_freefile) {
                                newfdp->fd_freefile = i;
                        }
                        continue;
                }
                /* It's active: add a reference to the file. */
                mutex_enter(&fp->f_lock);
                fp->f_count++;
                mutex_exit(&fp->f_lock);

                /* Allocate an fdfile_t to represent it. */
                if (i >= NDFDFILE) {
                        ff2 = pool_cache_get(fdfile_cache, PR_WAITOK);
                        *nffp = ff2;
                } else {
                        ff2 = newdt->dt_ff[i];
                }
                ff2->ff_file = fp;
                ff2->ff_exclose = ff->ff_exclose;
                ff2->ff_allocated = true;

                /* Fix up bitmaps. */
                j = i >> NDENTRYSHIFT;
                KASSERT((newfdp->fd_lomap[j] & (1U << (i & NDENTRYMASK))) == 0);
                newfdp->fd_lomap[j] |= 1U << (i & NDENTRYMASK);
                if (__predict_false(newfdp->fd_lomap[j] == ~0)) {
                        KASSERT((newfdp->fd_himap[j >> NDENTRYSHIFT] &
                            (1U << (j & NDENTRYMASK))) == 0);
                        newfdp->fd_himap[j >> NDENTRYSHIFT] |=
                            1U << (j & NDENTRYMASK);
                }
                newlast = i;
        }
        KASSERT(newdt->dt_ff[0] == (fdfile_t *)newfdp->fd_dfdfile[0]);
        newfdp->fd_lastfile = newlast;
        fd_checkmaps(newfdp);
        mutex_exit(&fdp->fd_lock);

        return newfdp;
}

/*
 * Release a filedesc structure.
 */
void
fd_free(void)
{
        fdfile_t *ff;
        file_t *fp;
        int fd, nf;
        fdtab_t *dt;
        lwp_t * const l = curlwp;
        filedesc_t * const fdp = l->l_fd;
        const bool noadvlock = (l->l_proc->p_flag & PK_ADVLOCK) == 0;

        KASSERT(atomic_load_consume(&fdp->fd_dt)->dt_ff[0] ==
            (fdfile_t *)fdp->fd_dfdfile[0]);
        KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE);
        KASSERT(fdp->fd_dtbuiltin.dt_link == NULL);

#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_exit();
#endif
        if (atomic_dec_uint_nv(&fdp->fd_refcnt) > 0)
                return;

        /*
         * Close any files that the process holds open.
         */
        dt = fdp->fd_dt;
        fd_checkmaps(fdp);
#ifdef DEBUG
        fdp->fd_refcnt = -1; /* see fd_checkmaps */
#endif
        for (fd = 0, nf = dt->dt_nfiles; fd < nf; fd++) {
                ff = dt->dt_ff[fd];
                KASSERT(fd >= NDFDFILE ||
                    ff == (fdfile_t *)fdp->fd_dfdfile[fd]);
                if (ff == NULL)
                        continue;
                if ((fp = atomic_load_consume(&ff->ff_file)) != NULL) {
                        /*
                         * Must use fd_close() here if there is
                         * a reference from kqueue or we might have posix
                         * advisory locks.
                         */
                        if (__predict_true(ff->ff_refcnt == 0) &&
                            (noadvlock || fp->f_type != DTYPE_VNODE)) {
                                ff->ff_file = NULL;
                                ff->ff_exclose = false;
                                ff->ff_allocated = false;
                                closef(fp);
                        } else {
                                ff->ff_refcnt++;
                                fd_close(fd);
                        }
                }
                KASSERT(ff->ff_refcnt == 0);
                KASSERT(ff->ff_file == NULL);
                KASSERT(!ff->ff_exclose);
                KASSERT(!ff->ff_allocated);
                if (fd >= NDFDFILE) {
                        pool_cache_put(fdfile_cache, ff);
                        dt->dt_ff[fd] = NULL;
                }
        }

        /*
         * Clean out the descriptor table for the next user and return
         * to the cache.
         */
        if (__predict_false(dt != &fdp->fd_dtbuiltin)) {
                fd_dtab_free(fdp->fd_dt);
                /* Otherwise, done above. */
                memset(&fdp->fd_dtbuiltin.dt_ff[NDFDFILE], 0,
                    (NDFILE - NDFDFILE) * sizeof(fdp->fd_dtbuiltin.dt_ff[0]));
                fdp->fd_dt = &fdp->fd_dtbuiltin;
        }
        if (__predict_false(NDHISLOTS(nf) > NDHISLOTS(NDFILE))) {
                KASSERT(fdp->fd_himap != fdp->fd_dhimap);
                KASSERT(fdp->fd_lomap != fdp->fd_dlomap);
                fd_map_free(nf, fdp->fd_lomap, fdp->fd_himap);
        }
        if (__predict_false(fdp->fd_knhash != NULL)) {
                hashdone(fdp->fd_knhash, HASH_LIST, fdp->fd_knhashmask);
                fdp->fd_knhash = NULL;
                fdp->fd_knhashmask = 0;
        } else {
                KASSERT(fdp->fd_knhashmask == 0);
        }
        fdp->fd_dt = &fdp->fd_dtbuiltin;
        fdp->fd_lastkqfile = -1;
        fdp->fd_lastfile = -1;
        fdp->fd_freefile = 0;
        fdp->fd_exclose = false;
        memset(&fdp->fd_startzero, 0, sizeof(*fdp) -
            offsetof(filedesc_t, fd_startzero));
        fdp->fd_himap = fdp->fd_dhimap;
        fdp->fd_lomap = fdp->fd_dlomap;
        KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE);
        KASSERT(fdp->fd_dtbuiltin.dt_link == NULL);
        KASSERT(fdp->fd_dt == &fdp->fd_dtbuiltin);
#ifdef DEBUG
        fdp->fd_refcnt = 0; /* see fd_checkmaps */
#endif
        fd_checkmaps(fdp);
        pool_cache_put(filedesc_cache, fdp);
}

/*
 * File Descriptor pseudo-device driver (/dev/fd/).
 *
 * Opening minor device N dup()s the file (if any) connected to file
 * descriptor N belonging to the calling process.  Note that this driver
 * consists of only the ``open()'' routine, because all subsequent
 * references to this file will be direct to the other driver.
 */
static int
filedescopen(dev_t dev, int mode, int type, lwp_t *l)
{

        /*
         * XXX Kludge: set dupfd to contain the value of the
         * the file descriptor being sought for duplication. The error
         * return ensures that the vnode for this device will be released
         * by vn_open. Open will detect this special error and take the
         * actions in fd_dupopen below. Other callers of vn_open or VOP_OPEN
         * will simply report the error.
         */
        l->l_dupfd = minor(dev);        /* XXX */
        return EDUPFD;
}

/*
 * Duplicate the specified descriptor to a free descriptor.
 *
 * old is the original fd.
 * moveit is true if we should move rather than duplicate.
 * flags are the open flags (converted from O_* to F*).
 * newp returns the new fd on success.
 *
 * These two cases are produced by the EDUPFD and EMOVEFD magic
 * errnos, but in the interest of removing that regrettable interface,
 * vn_open has been changed to intercept them. Now vn_open returns
 * either a vnode or a filehandle, and the filehandle is accompanied
 * by a boolean that says whether we should dup (moveit == false) or
 * move (moveit == true) the fd.
 *
 * The dup case is used by /dev/stderr, /proc/self/fd, and such. The
 * move case is used by cloner devices that allocate a fd of their
 * own (a layering violation that should go away eventually) that
 * then needs to be put in the place open() expects it.
 */
int
fd_dupopen(int old, bool moveit, int flags, int *newp)
{
        filedesc_t *fdp;
        fdfile_t *ff;
        file_t *fp;
        fdtab_t *dt;
        int error;

        if ((fp = fd_getfile(old)) == NULL) {
                return EBADF;
        }
        fdp = curlwp->l_fd;
        dt = atomic_load_consume(&fdp->fd_dt);
        ff = dt->dt_ff[old];

        /*
         * There are two cases of interest here.
         *
         * 1. moveit == false (used to be the EDUPFD magic errno):
         *    simply dup (old) to file descriptor (new) and return.
         *
         * 2. moveit == true (used to be the EMOVEFD magic errno):
         *    steal away the file structure from (old) and store it in
         *    (new).  (old) is effectively closed by this operation.
         */
        if (moveit == false) {
                /*
                 * Check that the mode the file is being opened for is a
                 * subset of the mode of the existing descriptor.
                 */
                if (((flags & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) {
                        error = EACCES;
                        goto out;
                }

                /* Copy it. */
                error = fd_dup(fp, 0, newp, ff->ff_exclose);
        } else {
                /* Copy it. */
                error = fd_dup(fp, 0, newp, ff->ff_exclose);
                if (error != 0) {
                        goto out;
                }

                /* Steal away the file pointer from 'old'. */
                (void)fd_close(old);
                return 0;
        }

out:
        fd_putfile(old);
        return error;
}

/*
 * Close open files on exec.
 */
void
fd_closeexec(void)
{
        proc_t *p;
        filedesc_t *fdp;
        fdfile_t *ff;
        lwp_t *l;
        fdtab_t *dt;
        int fd;

        l = curlwp;
        p = l->l_proc;
        fdp = p->p_fd;

        if (fdp->fd_refcnt > 1) {
                fdp = fd_copy();
                fd_free();
                p->p_fd = fdp;
                l->l_fd = fdp;
        }
        if (!fdp->fd_exclose) {
                return;
        }
        fdp->fd_exclose = false;
        dt = atomic_load_consume(&fdp->fd_dt);

        for (fd = 0; fd <= fdp->fd_lastfile; fd++) {
                if ((ff = dt->dt_ff[fd]) == NULL) {
                        KASSERT(fd >= NDFDFILE);
                        continue;
                }
                KASSERT(fd >= NDFDFILE ||
                    ff == (fdfile_t *)fdp->fd_dfdfile[fd]);
                if (ff->ff_file == NULL)
                        continue;
                if (ff->ff_exclose) {
                        /*
                         * We need a reference to close the file.
                         * No other threads can see the fdfile_t at
                         * this point, so don't bother locking.
                         */
                        KASSERT((ff->ff_refcnt & FR_CLOSING) == 0);
                        ff->ff_refcnt++;
                        fd_close(fd);
                }
        }
}

/*
 * Sets descriptor owner. If the owner is a process, 'pgid'
 * is set to positive value, process ID. If the owner is process group,
 * 'pgid' is set to -pg_id.
 */
int
fsetown(pid_t *pgid, u_long cmd, const void *data)
{
        pid_t id = *(const pid_t *)data;
        int error;

        if (id == INT_MIN)
                return EINVAL;

        switch (cmd) {
        case TIOCSPGRP:
                if (id < 0)
                        return EINVAL;
                id = -id;
                break;
        default:
                break;
        }
        if (id > 0) {
                mutex_enter(&proc_lock);
                error = proc_find(id) ? 0 : ESRCH;
                mutex_exit(&proc_lock);
        } else if (id < 0) {
                error = pgid_in_session(curproc, -id);
        } else {
                error = 0;
        }
        if (!error) {
                *pgid = id;
        }
        return error;
}

void
fd_set_exclose(struct lwp *l, int fd, bool exclose)
{
        filedesc_t *fdp = l->l_fd;
        fdfile_t *ff = atomic_load_consume(&fdp->fd_dt)->dt_ff[fd];

        ff->ff_exclose = exclose;
        if (exclose)
                fdp->fd_exclose = true;
}

/*
 * Return descriptor owner information. If the value is positive,
 * it's process ID. If it's negative, it's process group ID and
 * needs the sign removed before use.
 */
int
fgetown(pid_t pgid, u_long cmd, void *data)
{

        switch (cmd) {
        case TIOCGPGRP:
                *(int *)data = -pgid;
                break;
        default:
                *(int *)data = pgid;
                break;
        }
        return 0;
}

/*
 * Send signal to descriptor owner, either process or process group.
 */
void
fownsignal(pid_t pgid, int signo, int code, int band, void *fdescdata)
{
        ksiginfo_t ksi;

        KASSERT(!cpu_intr_p());

        if (pgid == 0) {
                return;
        }

        KSI_INIT(&ksi);
        ksi.ksi_signo = signo;
        ksi.ksi_code = code;
        ksi.ksi_band = band;

        mutex_enter(&proc_lock);
        if (pgid > 0) {
                struct proc *p1;

                p1 = proc_find(pgid);
                if (p1 != NULL) {
                        kpsignal(p1, &ksi, fdescdata);
                }
        } else {
                struct pgrp *pgrp;

                KASSERT(pgid < 0);
                pgrp = pgrp_find(-pgid);
                if (pgrp != NULL) {
                        kpgsignal(pgrp, &ksi, fdescdata, 0);
                }
        }
        mutex_exit(&proc_lock);
}

int
fd_clone(file_t *fp, unsigned fd, int flag, const struct fileops *fops,
         void *data)
{
        fdfile_t *ff;
        filedesc_t *fdp;

        fp->f_flag = flag & FMASK;
        fdp = curproc->p_fd;
        ff = atomic_load_consume(&fdp->fd_dt)->dt_ff[fd];
        KASSERT(ff != NULL);
        ff->ff_exclose = (flag & O_CLOEXEC) != 0;
        fp->f_type = DTYPE_MISC;
        fp->f_ops = fops;
        fp->f_data = data;
        curlwp->l_dupfd = fd;
        fd_affix(curproc, fp, fd);

        return EMOVEFD;
}

int
fnullop_fcntl(file_t *fp, u_int cmd, void *data)
{

        if (cmd == F_SETFL)
                return 0;

        return EOPNOTSUPP;
}

int
fnullop_poll(file_t *fp, int which)
{

        return 0;
}

int
fnullop_kqfilter(file_t *fp, struct knote *kn)
{

        return EOPNOTSUPP;
}

void
fnullop_restart(file_t *fp)
{

}

int
fbadop_read(file_t *fp, off_t *offset, struct uio *uio,
            kauth_cred_t cred, int flags)
{

        return EOPNOTSUPP;
}

int
fbadop_write(file_t *fp, off_t *offset, struct uio *uio,
             kauth_cred_t cred, int flags)
{

        return EOPNOTSUPP;
}

int
fbadop_ioctl(file_t *fp, u_long com, void *data)
{

        return EOPNOTSUPP;
}

int
fbadop_stat(file_t *fp, struct stat *sb)
{

        return EOPNOTSUPP;
}

int
fbadop_close(file_t *fp)
{

        return EOPNOTSUPP;
}

/*
 * sysctl routines pertaining to file descriptors
 */

/* Initialized in sysctl_init() for now... */
extern kmutex_t sysctl_file_marker_lock;
static u_int sysctl_file_marker = 1;

/*
 * Expects to be called with proc_lock and sysctl_file_marker_lock locked.
 */
static void
sysctl_file_marker_reset(void)
{
        struct proc *p;

        PROCLIST_FOREACH(p, &allproc) {
                struct filedesc *fd = p->p_fd;
                fdtab_t *dt;
                u_int i;

                mutex_enter(&fd->fd_lock);
                dt = fd->fd_dt;
                for (i = 0; i < dt->dt_nfiles; i++) {
                        struct file *fp;
                        fdfile_t *ff;

                        if ((ff = dt->dt_ff[i]) == NULL) {
                                continue;
                        }
                        if ((fp = atomic_load_consume(&ff->ff_file)) == NULL) {
                                continue;
                        }
                        fp->f_marker = 0;
                }
                mutex_exit(&fd->fd_lock);
        }
}

/*
 * sysctl helper routine for kern.file pseudo-subtree.
 */
static int
sysctl_kern_file(SYSCTLFN_ARGS)
{
        const bool allowaddr = get_expose_address(curproc);
        struct filelist flist;
        int error;
        size_t buflen;
        struct file *fp, fbuf;
        char *start, *where;
        struct proc *p;

        start = where = oldp;
        buflen = *oldlenp;
        
        if (where == NULL) {
                /*
                 * overestimate by 10 files
                 */
                *oldlenp = sizeof(filehead) + (nfiles + 10) *
                    sizeof(struct file);
                return 0;
        }

        /*
         * first sysctl_copyout filehead
         */
        if (buflen < sizeof(filehead)) {
                *oldlenp = 0;
                return 0;
        }
        sysctl_unlock();
        if (allowaddr) {
                memcpy(&flist, &filehead, sizeof(flist));
        } else {
                memset(&flist, 0, sizeof(flist));
        }
        error = sysctl_copyout(l, &flist, where, sizeof(flist));
        if (error) {
                sysctl_relock();
                return error;
        }
        buflen -= sizeof(flist);
        where += sizeof(flist);

        /*
         * followed by an array of file structures
         */
        mutex_enter(&sysctl_file_marker_lock);
        mutex_enter(&proc_lock);
        PROCLIST_FOREACH(p, &allproc) {
                struct filedesc *fd;
                fdtab_t *dt;
                u_int i;

                if (p->p_stat == SIDL) {
                        /* skip embryonic processes */
                        continue;
                }
                mutex_enter(p->p_lock);
                error = kauth_authorize_process(l->l_cred,
                    KAUTH_PROCESS_CANSEE, p,
                    KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_OPENFILES),
                    NULL, NULL);
                mutex_exit(p->p_lock);
                if (error != 0) {
                        /*
                         * Don't leak kauth retval if we're silently
                         * skipping this entry.
                         */
                        error = 0;
                        continue;
                }

                /*
                 * Grab a hold on the process.
                 */
                if (!rw_tryenter(&p->p_reflock, RW_READER)) {
                        continue;
                }
                mutex_exit(&proc_lock);

                fd = p->p_fd;
                mutex_enter(&fd->fd_lock);
                dt = fd->fd_dt;
                for (i = 0; i < dt->dt_nfiles; i++) {
                        fdfile_t *ff;

                        if ((ff = dt->dt_ff[i]) == NULL) {
                                continue;
                        }
                        if ((fp = atomic_load_consume(&ff->ff_file)) == NULL) {
                                continue;
                        }

                        mutex_enter(&fp->f_lock);

                        if ((fp->f_count == 0) ||
                            (fp->f_marker == sysctl_file_marker)) {
                                mutex_exit(&fp->f_lock);
                                continue;
                        }

                        /* Check that we have enough space. */
                        if (buflen < sizeof(struct file)) {
                                *oldlenp = where - start;
                                mutex_exit(&fp->f_lock);
                                error = ENOMEM;
                                break;
                        }

                        fill_file(&fbuf, fp);
                        mutex_exit(&fp->f_lock);
                        error = sysctl_copyout(l, &fbuf, where, sizeof(fbuf));
                        if (error) {
                                break;
                        }
                        buflen -= sizeof(struct file);
                        where += sizeof(struct file);

                        fp->f_marker = sysctl_file_marker;
                }
                mutex_exit(&fd->fd_lock);

                /*
                 * Release reference to process.
                 */
                mutex_enter(&proc_lock);
                rw_exit(&p->p_reflock);

                if (error)
                        break;
        }

        sysctl_file_marker++;
        /* Reset all markers if wrapped. */
        if (sysctl_file_marker == 0) {
                sysctl_file_marker_reset();
                sysctl_file_marker++;
        }

        mutex_exit(&proc_lock);
        mutex_exit(&sysctl_file_marker_lock);

        *oldlenp = where - start;
        sysctl_relock();
        return error;
}

/*
 * sysctl helper function for kern.file2
 */
static int
sysctl_kern_file2(SYSCTLFN_ARGS)
{
        struct proc *p;
        struct file *fp;
        struct filedesc *fd;
        struct kinfo_file kf;
        char *dp;
        u_int i, op;
        size_t len, needed, elem_size, out_size;
        int error, arg, elem_count;
        fdfile_t *ff;
        fdtab_t *dt;

        if (namelen == 1 && name[0] == CTL_QUERY)
                return sysctl_query(SYSCTLFN_CALL(rnode));

        if (namelen != 4)
                return EINVAL;

        error = 0;
        dp = oldp;
        len = (oldp != NULL) ? *oldlenp : 0;
        op = name[0];
        arg = name[1];
        elem_size = name[2];
        elem_count = name[3];
        out_size = MIN(sizeof(kf), elem_size);
        needed = 0;

        if (elem_size < 1 || elem_count < 0)
                return EINVAL;

        switch (op) {
        case KERN_FILE_BYFILE:
        case KERN_FILE_BYPID:
                /*
                 * We're traversing the process list in both cases; the BYFILE
                 * case does additional work of keeping track of files already
                 * looked at.
                 */

                /* doesn't use arg so it must be zero */
                if ((op == KERN_FILE_BYFILE) && (arg != 0))
                        return EINVAL;

                if ((op == KERN_FILE_BYPID) && (arg < -1))
                        /* -1 means all processes */
                        return EINVAL;

                sysctl_unlock();
                if (op == KERN_FILE_BYFILE)
                        mutex_enter(&sysctl_file_marker_lock);
                mutex_enter(&proc_lock);
                PROCLIST_FOREACH(p, &allproc) {
                        if (p->p_stat == SIDL) {
                                /* skip embryonic processes */
                                continue;
                        }
                        if (arg > 0 && p->p_pid != arg) {
                                /* pick only the one we want */
                                /* XXX want 0 to mean "kernel files" */
                                continue;
                        }
                        mutex_enter(p->p_lock);
                        error = kauth_authorize_process(l->l_cred,
                            KAUTH_PROCESS_CANSEE, p,
                            KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_OPENFILES),
                            NULL, NULL);
                        mutex_exit(p->p_lock);
                        if (error != 0) {
                                /*
                                 * Don't leak kauth retval if we're silently
                                 * skipping this entry.
                                 */
                                error = 0;
                                continue;
                        }

                        /*
                         * Grab a hold on the process.
                         */
                        if (!rw_tryenter(&p->p_reflock, RW_READER)) {
                                continue;
                        }
                        mutex_exit(&proc_lock);

                        fd = p->p_fd;
                        mutex_enter(&fd->fd_lock);
                        dt = fd->fd_dt;
                        for (i = 0; i < dt->dt_nfiles; i++) {
                                if ((ff = dt->dt_ff[i]) == NULL) {
                                        continue;
                                }
                                if ((fp = atomic_load_consume(&ff->ff_file)) ==
                                    NULL) {
                                        continue;
                                }

                                if ((op == KERN_FILE_BYFILE) &&
                                    (fp->f_marker == sysctl_file_marker)) {
                                        continue;
                                }
                                if (len >= elem_size && elem_count > 0) {
                                        mutex_enter(&fp->f_lock);
                                        fill_file2(&kf, fp, ff, i, p->p_pid);
                                        mutex_exit(&fp->f_lock);
                                        mutex_exit(&fd->fd_lock);
                                        error = sysctl_copyout(l,
                                            &kf, dp, out_size);
                                        mutex_enter(&fd->fd_lock);
                                        if (error)
                                                break;
                                        dp += elem_size;
                                        len -= elem_size;
                                }
                                if (op == KERN_FILE_BYFILE)
                                        fp->f_marker = sysctl_file_marker;
                                needed += elem_size;
                                if (elem_count > 0 && elem_count != INT_MAX)
                                        elem_count--;
                        }
                        mutex_exit(&fd->fd_lock);

                        /*
                         * Release reference to process.
                         */
                        mutex_enter(&proc_lock);
                        rw_exit(&p->p_reflock);
                }
                if (op == KERN_FILE_BYFILE) {
                        sysctl_file_marker++;

                        /* Reset all markers if wrapped. */
                        if (sysctl_file_marker == 0) {
                                sysctl_file_marker_reset();
                                sysctl_file_marker++;
                        }
                }
                mutex_exit(&proc_lock);
                if (op == KERN_FILE_BYFILE)
                        mutex_exit(&sysctl_file_marker_lock);
                sysctl_relock();
                break;
        default:
                return EINVAL;
        }

        if (oldp == NULL)
                needed += KERN_FILESLOP * elem_size;
        *oldlenp = needed;

        return error;
}

static void
fill_file(struct file *fp, const struct file *fpsrc)
{
        const bool allowaddr = get_expose_address(curproc);

        memset(fp, 0, sizeof(*fp));

        fp->f_offset = fpsrc->f_offset;
        COND_SET_PTR(fp->f_cred, fpsrc->f_cred, allowaddr);
        COND_SET_CPTR(fp->f_ops, fpsrc->f_ops, allowaddr);
        COND_SET_STRUCT(fp->f_undata, fpsrc->f_undata, allowaddr);
        COND_SET_STRUCT(fp->f_list, fpsrc->f_list, allowaddr);
        fp->f_flag = fpsrc->f_flag;
        fp->f_marker = fpsrc->f_marker;
        fp->f_type = fpsrc->f_type;
        fp->f_advice = fpsrc->f_advice;
        fp->f_count = fpsrc->f_count;
        fp->f_msgcount = fpsrc->f_msgcount;
        fp->f_unpcount = fpsrc->f_unpcount;
        COND_SET_STRUCT(fp->f_unplist, fpsrc->f_unplist, allowaddr);
}

static void
fill_file2(struct kinfo_file *kp, const file_t *fp, const fdfile_t *ff,
          int i, pid_t pid)
{
        const bool allowaddr = get_expose_address(curproc);

        memset(kp, 0, sizeof(*kp));

        COND_SET_VALUE(kp->ki_fileaddr, PTRTOUINT64(fp), allowaddr);
        kp->ki_flag =                fp->f_flag;
        kp->ki_iflags =                0;
        kp->ki_ftype =                fp->f_type;
        kp->ki_count =                fp->f_count;
        kp->ki_msgcount =        fp->f_msgcount;
        COND_SET_VALUE(kp->ki_fucred, PTRTOUINT64(fp->f_cred), allowaddr);
        kp->ki_fuid =                kauth_cred_geteuid(fp->f_cred);
        kp->ki_fgid =                kauth_cred_getegid(fp->f_cred);
        COND_SET_VALUE(kp->ki_fops, PTRTOUINT64(fp->f_ops), allowaddr);
        kp->ki_foffset =        fp->f_offset;
        COND_SET_VALUE(kp->ki_fdata, PTRTOUINT64(fp->f_data), allowaddr);

        /* vnode information to glue this file to something */
        if (fp->f_type == DTYPE_VNODE) {
                struct vnode *vp = fp->f_vnode;

                COND_SET_VALUE(kp->ki_vun, PTRTOUINT64(vp->v_un.vu_socket),
                    allowaddr);
                kp->ki_vsize =        vp->v_size;
                kp->ki_vtype =        vp->v_type;
                kp->ki_vtag =        vp->v_tag;
                COND_SET_VALUE(kp->ki_vdata, PTRTOUINT64(vp->v_data),
                    allowaddr);
        }

        /* process information when retrieved via KERN_FILE_BYPID */
        if (ff != NULL) {
                kp->ki_pid =                pid;
                kp->ki_fd =                i;
                kp->ki_ofileflags =        ff->ff_exclose;
                kp->ki_usecount =        ff->ff_refcnt;
        }
}





























































    9 










    9 














































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/*        $NetBSD: uipc_syscalls_50.c,v 1.11 2020/01/29 05:48:22 thorpej Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/msg.h>
#include <sys/sysctl.h>
#include <sys/syscallargs.h>
#include <sys/errno.h>
#include <sys/kauth.h>
#include <sys/proc.h>
#include <sys/time.h>
#include <sys/compat_stub.h>

#include <net/if.h>

#include <compat/sys/time.h>
#include <compat/sys/socket.h>
#include <compat/sys/sockio.h>

#include <compat/common/compat_mod.h>

/*ARGSUSED*/
static int
compat_ifdatareq(struct lwp *l, u_long cmd, void *data)
{
        struct if_data ifi;
        struct oifdatareq *ifdr = data;
        struct ifnet *ifp;
        int error;

        /* Validate arguments. */
        switch (cmd) {
        case OSIOCGIFDATA:
        case OSIOCZIFDATA:
                break;
        default:
                return ENOSYS;
        }

        ifp = ifunit(ifdr->ifdr_name);
        if (ifp == NULL)
                return ENXIO;

        /* Do work. */
        switch (cmd) {
        case OSIOCGIFDATA:
                if_export_if_data(ifp, &ifi, false);
                ifdatan2o(&ifdr->ifdr_data, &ifi);
                return 0;

        case OSIOCZIFDATA:
                if (l != NULL) {
                        error = kauth_authorize_network(l->l_cred,
                            KAUTH_NETWORK_INTERFACE,
                            KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp,
                            (void *)cmd, NULL);
                        if (error != 0)
                                return error;
                }
                if_export_if_data(ifp, &ifi, true);
                ifdatan2o(&ifdr->ifdr_data, &ifi);
                /* XXX if_lastchange? */
                return 0;

        default:
                /* Impossible due to above validation, but makes gcc happy. */
                return ENOSYS;
        }
}

void
uipc_syscalls_50_init(void)
{

        MODULE_HOOK_SET(uipc_syscalls_50_hook, compat_ifdatareq);
}

void
uipc_syscalls_50_fini(void)
{
 
        MODULE_HOOK_UNSET(uipc_syscalls_50_hook);
}











































































































































































































    1 




    1 













    1 














    1 

    1 





























    1 









    4 






    4 

    3 






    2 





































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
/*        $NetBSD: uipc_accf.c,v 1.13 2014/02/25 18:30:11 pooka Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software developed for The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 2000 Paycounter, Inc.
 * Copyright (c) 2005 Robert N. M. Watson
 * Author: Alfred Perlstein <alfred@paycounter.com>, <alfred@FreeBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uipc_accf.c,v 1.13 2014/02/25 18:30:11 pooka Exp $");

#define ACCEPT_FILTER_MOD

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/domain.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/kmem.h>
#include <sys/mbuf.h>
#include <sys/rwlock.h>
#include <sys/protosw.h>
#include <sys/sysctl.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/queue.h>
#include <sys/once.h>
#include <sys/atomic.h>
#include <sys/module.h>

static krwlock_t accept_filter_lock;

static LIST_HEAD(, accept_filter) accept_filtlsthd =
    LIST_HEAD_INITIALIZER(&accept_filtlsthd);

/*
 * Names of Accept filter sysctl objects
 */
static struct sysctllog *ctllog;
static void
sysctl_net_inet_accf_setup(void)
{

        sysctl_createv(&ctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "inet", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET, CTL_EOL);
        sysctl_createv(&ctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "accf",
                       SYSCTL_DESCR("Accept filters"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET, SO_ACCEPTFILTER, CTL_EOL);
}

int
accept_filt_add(struct accept_filter *filt)
{
        struct accept_filter *p;

        accept_filter_init();

        rw_enter(&accept_filter_lock, RW_WRITER);
        LIST_FOREACH(p, &accept_filtlsthd, accf_next) {
                if (strcmp(p->accf_name, filt->accf_name) == 0)  {
                        rw_exit(&accept_filter_lock);
                        return EEXIST;
                }
        }                                
        LIST_INSERT_HEAD(&accept_filtlsthd, filt, accf_next);
        rw_exit(&accept_filter_lock);

        return 0;
}

int
accept_filt_del(struct accept_filter *p)
{

        rw_enter(&accept_filter_lock, RW_WRITER);
        if (p->accf_refcnt != 0) {
                rw_exit(&accept_filter_lock);
                return EBUSY;
        }
        LIST_REMOVE(p, accf_next);
        rw_exit(&accept_filter_lock);

        return 0;
}

struct accept_filter *
accept_filt_get(char *name)
{
        struct accept_filter *p;
        char buf[32];
        u_int gen;

        do {
                rw_enter(&accept_filter_lock, RW_READER);
                LIST_FOREACH(p, &accept_filtlsthd, accf_next) {
                        if (strcmp(p->accf_name, name) == 0) {
                                atomic_inc_uint(&p->accf_refcnt);
                                break;
                        }
                }
                rw_exit(&accept_filter_lock);
                if (p != NULL) {
                        break;
                }
                /* Try to autoload a module to satisfy the request. */
                strcpy(buf, "accf_");
                strlcat(buf, name, sizeof(buf));
                gen = module_gen;
                (void)module_autoload(buf, MODULE_CLASS_ANY);
        } while (gen != module_gen);

        return p;
}

/*
 * Accept filter initialization routine.
 * This should be called only once.
 */

static int
accept_filter_init0(void)
{

        rw_init(&accept_filter_lock);
        sysctl_net_inet_accf_setup();

        return 0;
}

/*
 * Initialization routine: This can also be replaced with 
 * accept_filt_generic_mod_event for attaching new accept filter.
 */

void
accept_filter_init(void)
{
        static ONCE_DECL(accept_filter_init_once);

        RUN_ONCE(&accept_filter_init_once, accept_filter_init0);
}

int
accept_filt_getopt(struct socket *so, struct sockopt *sopt)
{
        struct accept_filter_arg afa;
        int error;

        KASSERT(solocked(so));

        if ((so->so_options & SO_ACCEPTCONN) == 0) {
                error = EINVAL;
                goto out;
        }
        if ((so->so_options & SO_ACCEPTFILTER) == 0) {
                error = EINVAL;
                goto out;
        }

        memset(&afa, 0, sizeof(afa));
        strcpy(afa.af_name, so->so_accf->so_accept_filter->accf_name);
        if (so->so_accf->so_accept_filter_str != NULL)
                strcpy(afa.af_arg, so->so_accf->so_accept_filter_str);
        error = sockopt_set(sopt, &afa, sizeof(afa));
out:
        return error;
}

/*
 * Simple delete case, with socket locked.
 */
int
accept_filt_clear(struct socket *so)
{
        struct accept_filter_arg afa;
        struct accept_filter *afp;
        struct socket *so2, *next;
        struct so_accf *af;

        KASSERT(solocked(so));

        if ((so->so_options & SO_ACCEPTCONN) == 0) {
                return EINVAL;
        }
        if (so->so_accf != NULL) {
                /* Break in-flight processing. */
                for (so2 = TAILQ_FIRST(&so->so_q0); so2 != NULL; so2 = next) {
                        next = TAILQ_NEXT(so2, so_qe);
                        if (so2->so_upcall == NULL) {
                                continue;
                        }
                        so2->so_upcall = NULL;
                        so2->so_upcallarg = NULL;
                        so2->so_options &= ~SO_ACCEPTFILTER;
                        so2->so_rcv.sb_flags &= ~SB_UPCALL;
                        soisconnected(so2);
                }
                af = so->so_accf;
                afp = af->so_accept_filter;
                if (afp != NULL && afp->accf_destroy != NULL) {
                        (*afp->accf_destroy)(so);
                }
                if (af->so_accept_filter_str != NULL) {
                        kmem_free(af->so_accept_filter_str,
                            sizeof(afa.af_name));
                }
                kmem_free(af, sizeof(*af));
                so->so_accf = NULL;
                atomic_dec_uint(&afp->accf_refcnt);
        }
        so->so_options &= ~SO_ACCEPTFILTER;
        return 0;
}

/*
 * setsockopt() for accept filters.  Called with the socket unlocked,
 * will always return it locked.
 */
int
accept_filt_setopt(struct socket *so, const struct sockopt *sopt)
{
        struct accept_filter_arg afa;
        struct accept_filter *afp;
        struct so_accf *newaf;
        int error;

        accept_filter_init();

        if (sopt == NULL || sopt->sopt_size == 0) {
                solock(so);
                return accept_filt_clear(so);
        }

        /*
         * Pre-allocate any memory we may need later to avoid blocking at
         * untimely moments.  This does not optimize for invalid arguments.
         */
        error = sockopt_get(sopt, &afa, sizeof(afa));
        if (error) {
                solock(so);
                return error;
        }
        afa.af_name[sizeof(afa.af_name)-1] = '\0';
        afa.af_arg[sizeof(afa.af_arg)-1] = '\0';
        afp = accept_filt_get(afa.af_name);
        if (afp == NULL) {
                solock(so);
                return ENOENT;
        }
        /*
         * Allocate the new accept filter instance storage.  We may
         * have to free it again later if we fail to attach it.  If
         * attached properly, 'newaf' is NULLed to avoid a free()
         * while in use.
         */
        newaf = kmem_zalloc(sizeof(*newaf), KM_SLEEP);
        if (afp->accf_create != NULL && afa.af_name[0] != '\0') {
                /*
                 * FreeBSD did a variable-size allocation here
                 * with the actual string length from afa.af_name
                 * but it is so short, why bother tracking it?
                 * XXX as others have noted, this is an API mistake;
                 * XXX accept_filter_arg should have a mandatory namelen.
                 * XXX (but it's a bit too late to fix that now)
                 */
                newaf->so_accept_filter_str =
                    kmem_alloc(sizeof(afa.af_name), KM_SLEEP);
                strcpy(newaf->so_accept_filter_str, afa.af_name);
        }

        /*
         * Require a listen socket; don't try to replace an existing filter
         * without first removing it.
         */
        solock(so);
        if ((so->so_options & SO_ACCEPTCONN) == 0 || so->so_accf != NULL) {
                error = EINVAL;
                goto out;
        }

        /*
         * Invoke the accf_create() method of the filter if required.  The
         * socket lock is held over this call, so create methods for filters
         * shouldn't block.
         */
        if (afp->accf_create != NULL) {
                newaf->so_accept_filter_arg =
                    (*afp->accf_create)(so, afa.af_arg);
                if (newaf->so_accept_filter_arg == NULL) {
                        error = EINVAL;
                        goto out;
                }
        }
        newaf->so_accept_filter = afp;
        so->so_accf = newaf;
        so->so_options |= SO_ACCEPTFILTER;
        newaf = NULL;
out:
        if (newaf != NULL) {
                if (newaf->so_accept_filter_str != NULL)
                        kmem_free(newaf->so_accept_filter_str,
                            sizeof(afa.af_name));
                kmem_free(newaf, sizeof(*newaf));
                atomic_dec_uint(&afp->accf_refcnt);
        }
        return error;
}



















































    1 




 1386 




  672 















 1090 
 1350 
  793 



























 1575 












  428 

 1577 














 1429 






 1430 














 1428 


























   20 








    2 



   20 




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
/*        $NetBSD: uvm_fault_i.h,v 1.33 2020/02/23 15:46:43 ad Exp $        */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * from: Id: uvm_fault_i.h,v 1.1.6.1 1997/12/08 16:07:12 chuck Exp
 */

#ifndef _UVM_UVM_FAULT_I_H_
#define _UVM_UVM_FAULT_I_H_

/*
 * uvm_fault_i.h: fault inline functions
 */
void uvmfault_update_stats(struct uvm_faultinfo *);


/*
 * uvmfault_unlockmaps: unlock the maps
 */

static __inline void
uvmfault_unlockmaps(struct uvm_faultinfo *ufi, bool write_locked)
{
        /*
         * ufi can be NULL when this isn't really a fault,
         * but merely paging in anon data.
         */

        if (ufi == NULL) {
                return;
        }

#ifndef __HAVE_NO_PMAP_STATS
        uvmfault_update_stats(ufi);
#endif
        if (write_locked) {
                vm_map_unlock(ufi->map);
        } else {
                vm_map_unlock_read(ufi->map);
        }
}

/*
 * uvmfault_unlockall: unlock everything passed in.
 *
 * => maps must be read-locked (not write-locked).
 */

static __inline void
uvmfault_unlockall(struct uvm_faultinfo *ufi, struct vm_amap *amap,
    struct uvm_object *uobj)
{

        if (uobj)
                rw_exit(uobj->vmobjlock);
        if (amap)
                amap_unlock(amap);
        uvmfault_unlockmaps(ufi, false);
}

/*
 * uvmfault_lookup: lookup a virtual address in a map
 *
 * => caller must provide a uvm_faultinfo structure with the IN
 *        params properly filled in
 * => we will lookup the map entry (handling submaps) as we go
 * => if the lookup is a success we will return with the maps locked
 * => if "write_lock" is true, we write_lock the map, otherwise we only
 *        get a read lock.
 * => note that submaps can only appear in the kernel and they are
 *        required to use the same virtual addresses as the map they
 *        are referenced by (thus address translation between the main
 *        map and the submap is unnecessary).
 */

static __inline bool
uvmfault_lookup(struct uvm_faultinfo *ufi, bool write_lock)
{
        struct vm_map *tmpmap;

        /*
         * init ufi values for lookup.
         */

        ufi->map = ufi->orig_map;
        ufi->size = ufi->orig_size;

        /*
         * keep going down levels until we are done.   note that there can
         * only be two levels so we won't loop very long.
         */

        for (;;) {
                /*
                 * lock map
                 */
                if (write_lock) {
                        vm_map_lock(ufi->map);
                } else {
                        vm_map_lock_read(ufi->map);
                }

                /*
                 * lookup
                 */
                if (!uvm_map_lookup_entry(ufi->map, ufi->orig_rvaddr,
                    &ufi->entry)) {
                        uvmfault_unlockmaps(ufi, write_lock);
                        return(false);
                }

                /*
                 * reduce size if necessary
                 */
                if (ufi->entry->end - ufi->orig_rvaddr < ufi->size)
                        ufi->size = ufi->entry->end - ufi->orig_rvaddr;

                /*
                 * submap?    replace map with the submap and lookup again.
                 * note: VAs in submaps must match VAs in main map.
                 */
                if (UVM_ET_ISSUBMAP(ufi->entry)) {
                        tmpmap = ufi->entry->object.sub_map;
                        if (write_lock) {
                                vm_map_unlock(ufi->map);
                        } else {
                                vm_map_unlock_read(ufi->map);
                        }
                        ufi->map = tmpmap;
                        continue;
                }

                /*
                 * got it!
                 */

                ufi->mapv = ufi->map->timestamp;
                return(true);

        }        /* while loop */

        /*NOTREACHED*/
}

/*
 * uvmfault_relock: attempt to relock the same version of the map
 *
 * => fault data structures should be unlocked before calling.
 * => if a success (true) maps will be locked after call.
 */

static __inline bool
uvmfault_relock(struct uvm_faultinfo *ufi)
{
        /*
         * ufi can be NULL when this isn't really a fault,
         * but merely paging in anon data.
         */

        if (ufi == NULL) {
                return true;
        }

        cpu_count(CPU_COUNT_FLTRELCK, 1);

        /*
         * relock map.   fail if version mismatch (in which case nothing
         * gets locked).
         */

        vm_map_lock_read(ufi->map);
        if (ufi->mapv != ufi->map->timestamp) {
                vm_map_unlock_read(ufi->map);
                return(false);
        }

        cpu_count(CPU_COUNT_FLTRELCKOK, 1);
        return(true);
}

#endif /* _UVM_UVM_FAULT_I_H_ */
























































































    6 
    6 
    6 

    6 
    6 
    6 




    6 
    6 











    6 




    6 



    6 










    6 








   10 




    3 
    3 

    3 

    3 













































































































































































































    6 

    6 

    6 





    6 
    6 



















    6 
















    6 






    6 










































    6 



    6 
    6 


    6 
    6 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
/*        $NetBSD: exec_subr.c,v 1.84 2020/04/13 19:23:18 ad Exp $        */

/*
 * Copyright (c) 1993, 1994, 1996 Christopher G. Demetriou
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Christopher G. Demetriou.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: exec_subr.c,v 1.84 2020/04/13 19:23:18 ad Exp $");

#include "opt_pax.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kmem.h>
#include <sys/vnode.h>
#include <sys/filedesc.h>
#include <sys/exec.h>
#include <sys/mman.h>
#include <sys/resourcevar.h>
#include <sys/device.h>
#include <sys/pax.h>

#include <uvm/uvm_extern.h>

#define        VMCMD_EVCNT_DECL(name)                                        \
static struct evcnt vmcmd_ev_##name =                                \
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "vmcmd", #name);        \
EVCNT_ATTACH_STATIC(vmcmd_ev_##name)

#define        VMCMD_EVCNT_INCR(name)                                        \
    vmcmd_ev_##name.ev_count++

VMCMD_EVCNT_DECL(calls);
VMCMD_EVCNT_DECL(extends);
VMCMD_EVCNT_DECL(kills);

#ifdef DEBUG_STACK
#define DPRINTF(a) uprintf a
#else
#define DPRINTF(a)
#endif

unsigned int user_stack_guard_size = 1024 * 1024;
unsigned int user_thread_stack_guard_size = 64 * 1024;

/*
 * new_vmcmd():
 *        create a new vmcmd structure and fill in its fields based
 *        on function call arguments.  make sure objects ref'd by
 *        the vmcmd are 'held'.
 */

void
new_vmcmd(struct exec_vmcmd_set *evsp,
    int (*proc)(struct lwp * l, struct exec_vmcmd *),
    vsize_t len, vaddr_t addr, struct vnode *vp, u_long offset,
    u_int prot, int flags)
{
        struct exec_vmcmd *vcp;

        VMCMD_EVCNT_INCR(calls);
        KASSERT(proc != vmcmd_map_pagedvn || (vp->v_iflag & VI_TEXT));
        KASSERT(vp == NULL || vrefcnt(vp) > 0);

        if (evsp->evs_used >= evsp->evs_cnt)
                vmcmdset_extend(evsp);
        vcp = &evsp->evs_cmds[evsp->evs_used++];
        vcp->ev_proc = proc;
        vcp->ev_len = len;
        vcp->ev_addr = addr;
        if ((vcp->ev_vp = vp) != NULL)
                vref(vp);
        vcp->ev_offset = offset;
        vcp->ev_prot = prot;
        vcp->ev_flags = flags;
}

void
vmcmdset_extend(struct exec_vmcmd_set *evsp)
{
        struct exec_vmcmd *nvcp;
        u_int ocnt;

#ifdef DIAGNOSTIC
        if (evsp->evs_used < evsp->evs_cnt)
                panic("vmcmdset_extend: not necessary");
#endif

        /* figure out number of entries in new set */
        if ((ocnt = evsp->evs_cnt) != 0) {
                evsp->evs_cnt += ocnt;
                VMCMD_EVCNT_INCR(extends);
        } else
                evsp->evs_cnt = EXEC_DEFAULT_VMCMD_SETSIZE;

        /* allocate it */
        nvcp = kmem_alloc(evsp->evs_cnt * sizeof(struct exec_vmcmd), KM_SLEEP);

        /* free the old struct, if there was one, and record the new one */
        if (ocnt) {
                memcpy(nvcp, evsp->evs_cmds,
                    (ocnt * sizeof(struct exec_vmcmd)));
                kmem_free(evsp->evs_cmds, ocnt * sizeof(struct exec_vmcmd));
        }
        evsp->evs_cmds = nvcp;
}

void
kill_vmcmds(struct exec_vmcmd_set *evsp)
{
        struct exec_vmcmd *vcp;
        u_int i;

        VMCMD_EVCNT_INCR(kills);

        if (evsp->evs_cnt == 0)
                return;

        for (i = 0; i < evsp->evs_used; i++) {
                vcp = &evsp->evs_cmds[i];
                if (vcp->ev_vp != NULL)
                        vrele(vcp->ev_vp);
        }
        kmem_free(evsp->evs_cmds, evsp->evs_cnt * sizeof(struct exec_vmcmd));
        evsp->evs_used = evsp->evs_cnt = 0;
}

/*
 * vmcmd_map_pagedvn():
 *        handle vmcmd which specifies that a vnode should be mmap'd.
 *        appropriate for handling demand-paged text and data segments.
 */

static int
vmcmd_get_prot(struct lwp *l, const struct exec_vmcmd *cmd, vm_prot_t *prot,
    vm_prot_t *maxprot)
{

        *prot = cmd->ev_prot;
        *maxprot = PAX_MPROTECT_MAXPROTECT(l, *prot, 0, UVM_PROT_ALL);

        if ((*prot & *maxprot) != *prot)
                return EACCES;
        return PAX_MPROTECT_VALIDATE(l, *prot);
}

int
vmcmd_map_pagedvn(struct lwp *l, struct exec_vmcmd *cmd)
{
        struct uvm_object *uobj;
        struct vnode *vp = cmd->ev_vp;
        struct proc *p = l->l_proc;
        int error;
        vm_prot_t prot, maxprot;

        KASSERT(vp->v_iflag & VI_TEXT);

        /*
         * map the vnode in using uvm_map.
         */

        if (cmd->ev_len == 0)
                return 0;
        if (cmd->ev_offset & PAGE_MASK)
                return EINVAL;
        if (cmd->ev_addr & PAGE_MASK)
                return EINVAL;
        if (cmd->ev_len & PAGE_MASK)
                return EINVAL;

        if ((error = vmcmd_get_prot(l, cmd, &prot, &maxprot)) != 0)
                return error;

        /*
         * check the file system's opinion about mmapping the file
         */

        error = VOP_MMAP(vp, prot, l->l_cred);
        if (error)
                return error;

        if ((vp->v_vflag & VV_MAPPED) == 0) {
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                vp->v_vflag |= VV_MAPPED;
                VOP_UNLOCK(vp);
        }

        /*
         * do the map, reference the object for this map entry
         */
        uobj = &vp->v_uobj;
        vref(vp);

        error = uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr, cmd->ev_len,
                uobj, cmd->ev_offset, 0,
                UVM_MAPFLAG(prot, maxprot, UVM_INH_COPY,
                        UVM_ADV_NORMAL, UVM_FLAG_COPYONW|UVM_FLAG_FIXED));
        if (error) {
                uobj->pgops->pgo_detach(uobj);
        }
        return error;
}

/*
 * vmcmd_map_readvn():
 *        handle vmcmd which specifies that a vnode should be read from.
 *        appropriate for non-demand-paged text/data segments, i.e. impure
 *        objects (a la OMAGIC and NMAGIC).
 */
int
vmcmd_map_readvn(struct lwp *l, struct exec_vmcmd *cmd)
{
        struct proc *p = l->l_proc;
        int error;
        long diff;

        if (cmd->ev_len == 0)
                return 0;

        diff = cmd->ev_addr - trunc_page(cmd->ev_addr);
        cmd->ev_addr -= diff;                        /* required by uvm_map */
        cmd->ev_offset -= diff;
        cmd->ev_len += diff;

        error = uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr,
                        round_page(cmd->ev_len), NULL, UVM_UNKNOWN_OFFSET, 0,
                        UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
                        UVM_ADV_NORMAL,
                        UVM_FLAG_FIXED|UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW));

        if (error)
                return error;

        return vmcmd_readvn(l, cmd);
}

int
vmcmd_readvn(struct lwp *l, struct exec_vmcmd *cmd)
{
        struct proc *p = l->l_proc;
        int error;
        vm_prot_t prot, maxprot;

        error = vn_rdwr(UIO_READ, cmd->ev_vp, (void *)cmd->ev_addr,
            cmd->ev_len, cmd->ev_offset, UIO_USERSPACE, IO_UNIT,
            l->l_cred, NULL, l);
        if (error)
                return error;

        if ((error = vmcmd_get_prot(l, cmd, &prot, &maxprot)) != 0)
                return error;

#ifdef PMAP_NEED_PROCWR
        /*
         * we had to write the process, make sure the pages are synched
         * with the instruction cache.
         */
        if (prot & VM_PROT_EXECUTE)
                pmap_procwr(p, cmd->ev_addr, cmd->ev_len);
#endif

        /*
         * we had to map in the area at PROT_ALL so that vn_rdwr()
         * could write to it.   however, the caller seems to want
         * it mapped read-only, so now we are going to have to call
         * uvm_map_protect() to fix up the protection.  ICK.
         */
        if (maxprot != VM_PROT_ALL) {
                error = uvm_map_protect(&p->p_vmspace->vm_map,
                                trunc_page(cmd->ev_addr),
                                round_page(cmd->ev_addr + cmd->ev_len),
                                maxprot, true);
                if (error)
                        return error;
        }

        if (prot != maxprot) {
                error = uvm_map_protect(&p->p_vmspace->vm_map,
                                trunc_page(cmd->ev_addr),
                                round_page(cmd->ev_addr + cmd->ev_len),
                                prot, false);
                if (error)
                        return error;
        }

        return 0;
}

/*
 * vmcmd_map_zero():
 *        handle vmcmd which specifies a zero-filled address space region.  The
 *        address range must be first allocated, then protected appropriately.
 */

int
vmcmd_map_zero(struct lwp *l, struct exec_vmcmd *cmd)
{
        struct proc *p = l->l_proc;
        int error;
        long diff;
        vm_prot_t prot, maxprot;

        diff = cmd->ev_addr - trunc_page(cmd->ev_addr);
        cmd->ev_addr -= diff;                        /* required by uvm_map */
        cmd->ev_len += diff;

        if ((error = vmcmd_get_prot(l, cmd, &prot, &maxprot)) != 0)
                return error;

        error = uvm_map(&p->p_vmspace->vm_map, &cmd->ev_addr,
                        round_page(cmd->ev_len), NULL, UVM_UNKNOWN_OFFSET, 0,
                        UVM_MAPFLAG(prot, maxprot, UVM_INH_COPY,
                        UVM_ADV_NORMAL,
                        UVM_FLAG_FIXED|UVM_FLAG_COPYONW));
        if (cmd->ev_flags & VMCMD_STACK)
                curproc->p_vmspace->vm_issize += atop(round_page(cmd->ev_len));
        return error;
}

/*
 * exec_read():
 *
 *        Read from vnode into buffer at offset.
 */
int
exec_read(struct lwp *l, struct vnode *vp, u_long off, void *bf, size_t size,
    int ioflg)
{
        int error;
        size_t resid;

        KASSERT((ioflg & IO_NODELOCKED) == 0 || VOP_ISLOCKED(vp) != LK_NONE);

        if ((error = vn_rdwr(UIO_READ, vp, bf, size, off, UIO_SYSSPACE,
            ioflg, l->l_cred, &resid, NULL)) != 0)
                return error;
        /*
         * See if we got all of it
         */
        if (resid != 0)
                return ENOEXEC;
        return 0;
}

/*
 * exec_setup_stack(): Set up the stack segment for an elf
 * executable.
 *
 * Note that the ep_ssize parameter must be set to be the current stack
 * limit; this is adjusted in the body of execve() to yield the
 * appropriate stack segment usage once the argument length is
 * calculated.
 *
 * This function returns an int for uniformity with other (future) formats'
 * stack setup functions.  They might have errors to return.
 */

int
exec_setup_stack(struct lwp *l, struct exec_package *epp)
{
        vsize_t max_stack_size;
        vaddr_t access_linear_min;
        vsize_t access_size;
        vaddr_t noaccess_linear_min;
        vsize_t noaccess_size;

#ifndef        USRSTACK32
#define USRSTACK32        (0x00000000ffffffffL&~PGOFSET)
#endif
#ifndef MAXSSIZ32
#define MAXSSIZ32        (MAXSSIZ >> 2)
#endif

        if (epp->ep_flags & EXEC_32) {
                epp->ep_minsaddr = USRSTACK32;
                max_stack_size = MAXSSIZ32;
        } else {
                epp->ep_minsaddr = USRSTACK;
                max_stack_size = MAXSSIZ;
        }

        DPRINTF(("ep_minsaddr=%#jx max_stack_size=%#jx\n",
            (uintmax_t)epp->ep_minsaddr, (uintmax_t)max_stack_size));

        pax_aslr_stack(epp, &max_stack_size);

        DPRINTF(("[RLIMIT_STACK].lim_cur=%#jx max_stack_size=%#jx\n",
            (uintmax_t)l->l_proc->p_rlimit[RLIMIT_STACK].rlim_cur,
            (uintmax_t)max_stack_size));
        epp->ep_ssize = MIN(l->l_proc->p_rlimit[RLIMIT_STACK].rlim_cur,
            max_stack_size);

        l->l_proc->p_stackbase = epp->ep_minsaddr;
        
        epp->ep_maxsaddr = (vaddr_t)STACK_GROW(epp->ep_minsaddr,
            max_stack_size);

        DPRINTF(("ep_ssize=%#jx ep_minsaddr=%#jx ep_maxsaddr=%#jx\n",
            (uintmax_t)epp->ep_ssize, (uintmax_t)epp->ep_minsaddr,
            (uintmax_t)epp->ep_maxsaddr));

        /*
         * set up commands for stack.  note that this takes *two*, one to
         * map the part of the stack which we can access, and one to map
         * the part which we can't.
         *
         * arguably, it could be made into one, but that would require the
         * addition of another mapping proc, which is unnecessary
         */
        access_size = epp->ep_ssize;
        access_linear_min = (vaddr_t)STACK_ALLOC(epp->ep_minsaddr, access_size);
        noaccess_size = max_stack_size - access_size;
        noaccess_linear_min = (vaddr_t)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
            access_size), noaccess_size);

        DPRINTF(("access_size=%#jx, access_linear_min=%#jx, "
            "noaccess_size=%#jx, noaccess_linear_min=%#jx\n",
            (uintmax_t)access_size, (uintmax_t)access_linear_min,
            (uintmax_t)noaccess_size, (uintmax_t)noaccess_linear_min));

        if (user_stack_guard_size > 0) {
#ifdef __MACHINE_STACK_GROWS_UP
                vsize_t guard_size = MIN(VM_MAXUSER_ADDRESS - epp->ep_maxsaddr, user_stack_guard_size);
                if (guard_size > 0)
                        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, guard_size,
                            epp->ep_maxsaddr, NULL, 0, VM_PROT_NONE);
#else
                NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, user_stack_guard_size,
                    epp->ep_maxsaddr - user_stack_guard_size, NULL, 0, VM_PROT_NONE);
#endif
        }
        if (noaccess_size > 0 && noaccess_size <= MAXSSIZ) {
                NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size,
                    noaccess_linear_min, NULL, 0, VM_PROT_NONE, VMCMD_STACK);
        }
        KASSERT(access_size > 0 && access_size <= MAXSSIZ);
        NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, access_size,
            access_linear_min, NULL, 0, VM_PROT_READ | VM_PROT_WRITE,
            VMCMD_STACK);

        return 0;
}





















































































































































































  129 


  132 























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
/*        $NetBSD: procfs.h,v 1.82 2022/01/19 10:23:00 martin Exp $        */

/*
 * Copyright (c) 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)procfs.h        8.9 (Berkeley) 5/14/95
 */

/*
 * Copyright (c) 1993 Jan-Simon Pendry
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)procfs.h        8.9 (Berkeley) 5/14/95
 */

/* This also pulls in __HAVE_PROCFS_MACHDEP */
#include <sys/ptrace.h>

#ifdef _KERNEL
#include <sys/proc.h>

/*
 * The different types of node in a procfs filesystem
 */
typedef enum {
        PFSauxv,        /* ELF Auxiliary Vector */
        PFSchroot,        /* the process's current root directory */
        PFScmdline,        /* process command line args */
        PFScpuinfo,        /* CPU info (if -o linux) */
        PFScpustat,        /* status info (if -o linux) */
        PFScurproc,        /* symbolic link for curproc */
        PFScwd,                /* the process's current working directory */
        PFSdevices,        /* major/device name mappings (if -o linux) */
        PFSemul,        /* the process's emulation */
        PFSenviron,        /* process environment */
        PFSexe,                /* symlink to the executable file */
        PFSfd,                /* a directory containing the processes open fd's */
        PFSfile,        /* the executable file */
        PFSfpregs,        /* the process's FP register set */
        PFSloadavg,        /* load average (if -o linux) */
        PFSlimit,        /* resource limits */
        PFSmap,                /* memory map */
        PFSmaps,        /* memory map, Linux style (if -o linux) */
        PFSmem,                /* the process's memory image */
        PFSmeminfo,        /* system memory info (if -o linux) */
        PFSmounts,        /* mounted filesystems (if -o linux) */
        PFSnote,        /* process notifier */
        PFSnotepg,        /* process group notifier */
        PFSproc,        /* a process-specific sub-directory */
        PFSregs,        /* the process's register set */
        PFSroot,        /* the filesystem root */
        PFSself,        /* like curproc, but this is the Linux name */
        PFSstat,        /* process status (if -o linux) */
        PFSstatm,        /* process memory info (if -o linux) */
        PFSstatus,        /* process status */
        PFStask,        /* task subdirector (if -o linux) */
        PFSuptime,        /* elapsed time since (if -o linux) */
        PFSversion,        /* kernel version (if -o linux) */
#ifdef __HAVE_PROCFS_MACHDEP
        PROCFS_MACHDEP_NODE_TYPES
#endif
        PFSlast,        /* track number of types */
} pfstype;

/*
 * control data for the proc file system.
 */
struct pfskey {
        pfstype                pk_type;        /* type of procfs node */
        pid_t                pk_pid;                /* associated process */
        int                pk_fd;                /* associated fd if not -1 */
};
struct pfsnode {
        struct vnode        *pfs_vnode;        /* vnode associated with this pfsnode */
        struct pfskey        pfs_key;
#define pfs_type pfs_key.pk_type
#define pfs_pid pfs_key.pk_pid
#define pfs_fd pfs_key.pk_fd
        mode_t                pfs_mode;        /* mode bits for stat() */
        u_long                pfs_flags;        /* open flags */
        uint64_t        pfs_fileno;        /* unique file id */
};

#define PROCFS_NOTELEN        64        /* max length of a note (/proc/$pid/note) */
#define PROCFS_MAXNAMLEN        255

#endif /* _KERNEL */

struct procfs_args {
        int version;
        int flags;
};

#define PROCFS_ARGSVERSION        1

#define PROCFSMNT_LINUXCOMPAT        0x01

#define PROCFSMNT_BITS "\177\20" \
    "b\00linuxcompat\0"

/*
 * Kernel stuff follows
 */
#ifdef _KERNEL
#define CNEQ(cnp, s, len) \
         ((cnp)->cn_namelen == (len) && \
          (memcmp((s), (cnp)->cn_nameptr, (len)) == 0))

#define UIO_MX 32

static __inline ino_t
procfs_fileno(pid_t _pid, pfstype _type, int _fd)
{
        ino_t _ino;
        switch (_type) {
        case PFSroot:
                return 2;
        case PFScurproc:
                return 3;
        case PFSself:
                return 4;
        default:
                _ino = _pid + 1;
                if (_fd != -1)
                        _ino = _ino << 32 | _fd;
                return _ino * PFSlast + _type;
        }
}

#define PROCFS_FILENO(pid, type, fd) procfs_fileno(pid, type, fd)

#define PROCFS_TYPE(type)        ((type) % PFSlast)

struct procfsmount {
        void *pmnt_exechook;
        int pmnt_flags;
};

#define VFSTOPROC(mp)        ((struct procfsmount *)(mp)->mnt_data)

/*
 * Convert between pfsnode vnode
 */
#define VTOPFS(vp)        ((struct pfsnode *)(vp)->v_data)
#define PFSTOV(pfs)        ((pfs)->pfs_vnode)

typedef struct vfs_namemap vfs_namemap_t;
struct vfs_namemap {
        const char *nm_name;
        int nm_val;
};

int vfs_getuserstr(struct uio *, char *, int *);
const vfs_namemap_t *vfs_findname(const vfs_namemap_t *, const char *, int);

struct mount;

struct proc *procfs_proc_find(struct mount *, pid_t);
bool procfs_use_linux_compat(struct mount *);

static inline bool
procfs_proc_is_linux_compat(void)
{
        const char *emulname = curlwp->l_proc->p_emul->e_name;
        return (strncmp(emulname, "linux", 5) == 0);
}

int procfs_proc_lock(struct mount *, int, struct proc **, int);
void procfs_proc_unlock(struct proc *);
int procfs_allocvp(struct mount *, struct vnode **, pid_t, pfstype, int);
int procfs_donote(struct lwp *, struct proc *, struct pfsnode *,
    struct uio *);
int procfs_doregs(struct lwp *, struct lwp *, struct pfsnode *,
    struct uio *);
int procfs_dofpregs(struct lwp *, struct lwp *, struct pfsnode *,
    struct uio *);
int procfs_domem(struct lwp *, struct lwp *, struct pfsnode *,
    struct uio *);
int procfs_do_pid_stat(struct lwp *, struct lwp *, struct pfsnode *,
    struct uio *);
int procfs_dostatus(struct lwp *, struct lwp *, struct pfsnode *,
    struct uio *);
int procfs_domap(struct lwp *, struct proc *, struct pfsnode *,
    struct uio *, int);
int procfs_doprocargs(struct lwp *, struct proc *, struct pfsnode *,
    struct uio *, int);
int procfs_domeminfo(struct lwp *, struct proc *, struct pfsnode *,
    struct uio *);
int procfs_dodevices(struct lwp *, struct proc *, struct pfsnode *,
    struct uio *);
int procfs_docpuinfo(struct lwp *, struct proc *, struct pfsnode *,
    struct uio *);
int procfs_docpustat(struct lwp *, struct proc *, struct pfsnode *,
    struct uio *);
int procfs_doloadavg(struct lwp *, struct proc *, struct pfsnode *,
    struct uio *);
int procfs_do_pid_statm(struct lwp *, struct lwp *, struct pfsnode *,
    struct uio *);
int procfs_dofd(struct lwp *, struct proc *, struct pfsnode *,
    struct uio *);
int procfs_douptime(struct lwp *, struct proc *, struct pfsnode *,
    struct uio *);
int procfs_domounts(struct lwp *, struct proc *, struct pfsnode *,
    struct uio *);
int procfs_doemul(struct lwp *, struct proc *, struct pfsnode *,
    struct uio *);
int procfs_doversion(struct lwp *, struct proc *, struct pfsnode *,
    struct uio *);
int procfs_doauxv(struct lwp *, struct proc *, struct pfsnode *,
    struct uio *);
int procfs_dolimit(struct lwp *, struct proc *, struct pfsnode *,
    struct uio *);

void procfs_revoke_vnodes(struct proc *, void *);
int procfs_getfp(struct pfsnode *, struct proc *, struct file **);

/* functions to check whether or not files should be displayed */
int procfs_validauxv(struct lwp *, struct mount *);
int procfs_validfile(struct lwp *, struct mount *);
int procfs_validfpregs(struct lwp *, struct mount *);
int procfs_validregs(struct lwp *, struct mount *);
int procfs_validmap(struct lwp *, struct mount *);

int procfs_rw(void *);

int procfs_getcpuinfstr(char *, size_t *);

#define PROCFS_LOCKED        0x01
#define PROCFS_WANT        0x02

extern int (**procfs_vnodeop_p)(void *);
extern struct vfsops procfs_vfsops;

int        procfs_root(struct mount *, int, struct vnode **);

#ifdef __HAVE_PROCFS_MACHDEP
struct vattr;

void        procfs_machdep_allocvp(struct vnode *);
int        procfs_machdep_rw(struct lwp *, struct lwp *, struct pfsnode *,
            struct uio *);
int        procfs_machdep_getattr(struct vnode *, struct vattr *, struct proc *);
#endif

#endif /* _KERNEL */











































































































































































































































































    1 













    1 
















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
/*-
 * Copyright (c) 2009-2013 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This material is based upon work partially supported by The
 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * NPF extension and rule procedure interface.
 */

#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_rproc.c,v 1.22 2022/04/09 23:38:33 riastradh Exp $");

#include <sys/param.h>
#include <sys/types.h>

#include <sys/atomic.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
#include <sys/module.h>
#endif

#include "npf_impl.h"

#define        EXT_NAME_LEN                32

typedef struct npf_ext {
        char                        ext_callname[EXT_NAME_LEN];
        LIST_ENTRY(npf_ext)        ext_entry;
        const npf_ext_ops_t *        ext_ops;
        unsigned                ext_refcnt;
} npf_ext_t;

struct npf_rprocset {
        LIST_HEAD(, npf_rproc)        rps_list;
};

#define        RPROC_NAME_LEN                32
#define        RPROC_EXT_COUNT                16

struct npf_rproc {
        /* Flags and reference count. */
        uint32_t                rp_flags;
        unsigned                rp_refcnt;

        /* Associated extensions and their metadata . */
        unsigned                rp_ext_count;
        npf_ext_t *                rp_ext[RPROC_EXT_COUNT];
        void *                        rp_ext_meta[RPROC_EXT_COUNT];

        /* Name of the procedure and list entry. */
        char                        rp_name[RPROC_NAME_LEN];
        LIST_ENTRY(npf_rproc)        rp_entry;
};

void
npf_ext_init(npf_t *npf)
{
        mutex_init(&npf->ext_lock, MUTEX_DEFAULT, IPL_NONE);
        LIST_INIT(&npf->ext_list);
}

void
npf_ext_fini(npf_t *npf)
{
        KASSERT(LIST_EMPTY(&npf->ext_list));
        mutex_destroy(&npf->ext_lock);
}

/*
 * NPF extension management for the rule procedures.
 */

static const char npf_ext_prefix[] = "npf_ext_";
#define NPF_EXT_PREFLEN (sizeof(npf_ext_prefix) - 1)

static npf_ext_t *
npf_ext_lookup(npf_t *npf, const char *name, bool autoload)
{
        npf_ext_t *ext;
        char modname[RPROC_NAME_LEN + NPF_EXT_PREFLEN];
        int error;

        KASSERT(mutex_owned(&npf->ext_lock));

again:
        LIST_FOREACH(ext, &npf->ext_list, ext_entry)
                if (strcmp(ext->ext_callname, name) == 0)
                        break;

        if (ext != NULL || !autoload)
                return ext;

        mutex_exit(&npf->ext_lock);
        autoload = false;
        snprintf(modname, sizeof(modname), "%s%s", npf_ext_prefix, name);
        error = module_autoload(modname, MODULE_CLASS_MISC);
        mutex_enter(&npf->ext_lock);

        if (error)
                return NULL;
        goto again;
}

void *
npf_ext_register(npf_t *npf, const char *name, const npf_ext_ops_t *ops)
{
        npf_ext_t *ext;

        ext = kmem_zalloc(sizeof(npf_ext_t), KM_SLEEP);
        strlcpy(ext->ext_callname, name, EXT_NAME_LEN);
        ext->ext_ops = ops;

        mutex_enter(&npf->ext_lock);
        if (npf_ext_lookup(npf, name, false)) {
                mutex_exit(&npf->ext_lock);
                kmem_free(ext, sizeof(npf_ext_t));
                return NULL;
        }
        LIST_INSERT_HEAD(&npf->ext_list, ext, ext_entry);
        mutex_exit(&npf->ext_lock);

        return (void *)ext;
}

int
npf_ext_unregister(npf_t *npf, void *extid)
{
        npf_ext_t *ext = extid;

        /*
         * Check if in-use first (re-check with the lock held).
         */
        if (atomic_load_relaxed(&ext->ext_refcnt)) {
                return EBUSY;
        }

        mutex_enter(&npf->ext_lock);
        if (atomic_load_relaxed(&ext->ext_refcnt)) {
                mutex_exit(&npf->ext_lock);
                return EBUSY;
        }
        KASSERT(npf_ext_lookup(npf, ext->ext_callname, false));
        LIST_REMOVE(ext, ext_entry);
        mutex_exit(&npf->ext_lock);

        kmem_free(ext, sizeof(npf_ext_t));
        return 0;
}

int
npf_ext_construct(npf_t *npf, const char *name,
    npf_rproc_t *rp, const nvlist_t *params)
{
        const npf_ext_ops_t *extops;
        npf_ext_t *ext;
        unsigned i;
        int error;

        if (rp->rp_ext_count >= RPROC_EXT_COUNT) {
                return ENOSPC;
        }

        mutex_enter(&npf->ext_lock);
        ext = npf_ext_lookup(npf, name, true);
        if (ext) {
                atomic_inc_uint(&ext->ext_refcnt);
        }
        mutex_exit(&npf->ext_lock);

        if (!ext) {
                return ENOENT;
        }

        extops = ext->ext_ops;
        KASSERT(extops != NULL);

        error = extops->ctor(rp, params);
        if (error) {
                atomic_dec_uint(&ext->ext_refcnt);
                return error;
        }
        i = rp->rp_ext_count++;
        rp->rp_ext[i] = ext;
        return 0;
}

/*
 * Rule procedure management.
 */

npf_rprocset_t *
npf_rprocset_create(void)
{
        npf_rprocset_t *rpset;

        rpset = kmem_zalloc(sizeof(npf_rprocset_t), KM_SLEEP);
        LIST_INIT(&rpset->rps_list);
        return rpset;
}

void
npf_rprocset_destroy(npf_rprocset_t *rpset)
{
        npf_rproc_t *rp;

        while ((rp = LIST_FIRST(&rpset->rps_list)) != NULL) {
                LIST_REMOVE(rp, rp_entry);
                npf_rproc_release(rp);
        }
        kmem_free(rpset, sizeof(npf_rprocset_t));
}

/*
 * npf_rproc_lookup: find a rule procedure by the name.
 */
npf_rproc_t *
npf_rprocset_lookup(npf_rprocset_t *rpset, const char *name)
{
        npf_rproc_t *rp;

        LIST_FOREACH(rp, &rpset->rps_list, rp_entry) {
                if (strncmp(rp->rp_name, name, RPROC_NAME_LEN) == 0)
                        break;
        }
        return rp;
}

/*
 * npf_rproc_insert: insert a new rule procedure into the set.
 */
void
npf_rprocset_insert(npf_rprocset_t *rpset, npf_rproc_t *rp)
{
        LIST_INSERT_HEAD(&rpset->rps_list, rp, rp_entry);
}

int
npf_rprocset_export(const npf_rprocset_t *rpset, nvlist_t *nvl)
{
        const npf_rproc_t *rp;

        LIST_FOREACH(rp, &rpset->rps_list, rp_entry) {
                nvlist_t *rproc = nvlist_create(0);
#if 0 // FIXME/TODO
                for (unsigned i = 0; i < rp->rp_ext_count; i++) {
                        nvlist_t *meta = rp->rp_ext_meta[i];
                        ...
                        nvlist_append_nvlist_array(rproc, "extcalls", meta);
                }
#endif
                nvlist_add_string(rproc, "name", rp->rp_name);
                nvlist_add_number(rproc, "flags", rp->rp_flags);
                nvlist_append_nvlist_array(nvl, "rprocs", rproc);
                nvlist_destroy(rproc);
        }
        return 0;
}

/*
 * npf_rproc_create: construct a new rule procedure, lookup and associate
 * the extension calls with it.
 */
npf_rproc_t *
npf_rproc_create(const nvlist_t *rproc)
{
        const char *name;
        npf_rproc_t *rp;

        if ((name = dnvlist_get_string(rproc, "name", NULL)) == NULL) {
                return NULL;
        }

        rp = kmem_intr_zalloc(sizeof(npf_rproc_t), KM_SLEEP);
        rp->rp_refcnt = 1;

        strlcpy(rp->rp_name, name, RPROC_NAME_LEN);
        rp->rp_flags = dnvlist_get_number(rproc, "flags", 0);
        return rp;
}

/*
 * npf_rproc_acquire: acquire the reference on the rule procedure.
 */
void
npf_rproc_acquire(npf_rproc_t *rp)
{
        atomic_inc_uint(&rp->rp_refcnt);
}

/*
 * npf_rproc_getname: return the name of the given rproc
 */
const char *
npf_rproc_getname(const npf_rproc_t *rp)
{
        return rp->rp_name;
}

/*
 * npf_rproc_release: drop the reference count and destroy the rule
 * procedure on the last reference.
 */
void
npf_rproc_release(npf_rproc_t *rp)
{
        KASSERT(atomic_load_relaxed(&rp->rp_refcnt) > 0);

#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_release();
#endif
        if (atomic_dec_uint_nv(&rp->rp_refcnt) != 0) {
                return;
        }
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_acquire();
#endif
        /* XXXintr */
        for (unsigned i = 0; i < rp->rp_ext_count; i++) {
                npf_ext_t *ext = rp->rp_ext[i];
                const npf_ext_ops_t *extops = ext->ext_ops;

                extops->dtor(rp, rp->rp_ext_meta[i]);
                atomic_dec_uint(&ext->ext_refcnt);
        }
        kmem_intr_free(rp, sizeof(npf_rproc_t));
}

void
npf_rproc_assign(npf_rproc_t *rp, void *params)
{
        unsigned i = rp->rp_ext_count;

        /* Note: params may be NULL. */
        KASSERT(i < RPROC_EXT_COUNT);
        rp->rp_ext_meta[i] = params;
}

/*
 * npf_rproc_run: run the rule procedure by executing each extension call.
 *
 * => Reference on the rule procedure must be held.
 */
bool
npf_rproc_run(npf_cache_t *npc, npf_rproc_t *rp, const npf_match_info_t *mi,
    int *decision)
{
        const unsigned extcount = rp->rp_ext_count;

        KASSERT(!nbuf_flag_p(npc->npc_nbuf, NBUF_DATAREF_RESET));
        KASSERT(atomic_load_relaxed(&rp->rp_refcnt) > 0);

        for (unsigned i = 0; i < extcount; i++) {
                const npf_ext_t *ext = rp->rp_ext[i];
                const npf_ext_ops_t *extops = ext->ext_ops;

                KASSERT(atomic_load_relaxed(&ext->ext_refcnt) > 0);

                if (!extops->proc(npc, rp->rp_ext_meta[i], mi, decision)) {
                        return false;
                }

                if (nbuf_flag_p(npc->npc_nbuf, NBUF_DATAREF_RESET)) {
                        npf_recache(npc);
                }
        }

        return true;
}





































































































































































    2 


    2 



    2 
































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
/*        $NetBSD: uirda.c,v 1.53 2022/08/07 11:25:32 riastradh Exp $        */

/*
 * Copyright (c) 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uirda.c,v 1.53 2022/08/07 11:25:32 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/mutex.h>
#include <sys/ioctl.h>
#include <sys/conf.h>
#include <sys/file.h>
#include <sys/poll.h>
#include <sys/select.h>
#include <sys/proc.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <dev/ir/ir.h>
#include <dev/ir/irdaio.h>
#include <dev/ir/irframevar.h>

#include <dev/usb/uirdavar.h>

#ifdef UIRDA_DEBUG
#define DPRINTF(x)        if (uirdadebug) printf x
#define DPRINTFN(n,x)        if (uirdadebug>(n)) printf x
int        uirdadebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif


/* Class specific requests */
#define UR_IRDA_RECEIVING                0x01        /* Receive in progress? */
#define UR_IRDA_CHECK_MEDIA_BUSY        0x03
#define UR_IRDA_SET_RATE_SNIFF                0x04        /* opt */
#define UR_IRDA_SET_UNICAST_LIST        0x05        /* opt */
#define UR_IRDA_GET_DESC                0x06

#define UIRDA_NEBOFS 8
static const struct {
        int count;
        int mask;
        int header;
} uirda_ebofs[UIRDA_NEBOFS] = {
        { 0, UI_EB_0, UIRDA_EB_0 },
        { 1, UI_EB_1, UIRDA_EB_1 },
        { 2, UI_EB_2, UIRDA_EB_2 },
        { 3, UI_EB_3, UIRDA_EB_3 },
        { 6, UI_EB_6, UIRDA_EB_6 },
        { 12, UI_EB_12, UIRDA_EB_12 },
        { 24, UI_EB_24, UIRDA_EB_24 },
        { 48, UI_EB_48, UIRDA_EB_48 }
};

#define UIRDA_NSPEEDS 9
static const struct {
        int speed;
        int mask;
        int header;
} uirda_speeds[UIRDA_NSPEEDS] = {
        { 4000000, UI_BR_4000000, UIRDA_4000000 },
        { 1152000, UI_BR_1152000, UIRDA_1152000 },
        { 576000, UI_BR_576000, UIRDA_576000 },
        { 115200, UI_BR_115200, UIRDA_115200 },
        { 57600, UI_BR_57600, UIRDA_57600 },
        { 38400, UI_BR_38400, UIRDA_38400 },
        { 19200, UI_BR_19200, UIRDA_19200 },
        { 9600, UI_BR_9600, UIRDA_9600 },
        { 2400, UI_BR_2400, UIRDA_2400 },
};



int uirda_open(void *, int, int, struct lwp *);
int uirda_close(void *, int, int, struct lwp *);
int uirda_read(void *, struct uio *, int);
int uirda_write(void *, struct uio *, int);
int uirda_set_params(void *, struct irda_params *);
int uirda_get_speeds(void *, int *);
int uirda_get_turnarounds(void *, int *);
int uirda_poll(void *, int, struct lwp *);
int uirda_kqfilter(void *, struct knote *);

static const struct irframe_methods uirda_methods = {
        uirda_open, uirda_close, uirda_read, uirda_write, uirda_poll,
        uirda_kqfilter, uirda_set_params, uirda_get_speeds,
        uirda_get_turnarounds
};

static void uirda_rd_cb(struct usbd_xfer *xfer,        void *priv,
                 usbd_status status);
static usbd_status uirda_start_read(struct uirda_softc *sc);

/*
 * These devices don't quite follow the spec.  Speed changing is broken
 * and they don't handle windows.
 * But we change speed in a safe way, and don't use windows now.
 * Some devices also seem to have an interrupt pipe that can be ignored.
 *
 * Table information taken from Linux driver.
 */
Static const struct usb_devno uirda_devs[] = {
        { USB_VENDOR_ACTISYS, USB_PRODUCT_ACTISYS_IR2000U },
        { USB_VENDOR_EXTENDED, USB_PRODUCT_EXTENDED_XTNDACCESS },
        { USB_VENDOR_KAWATSU, USB_PRODUCT_KAWATSU_KC180 },
};
#define uirda_lookup(v, p) (usb_lookup(uirda_devs, v, p))

int uirda_match(device_t, cfdata_t, void *);
void uirda_attach(device_t, device_t, void *);
void uirda_childdet(device_t, device_t);
int uirda_detach(device_t, int);
int uirda_activate(device_t, enum devact);

CFATTACH_DECL2_NEW(uirda, sizeof(struct uirda_softc), uirda_match,
    uirda_attach, uirda_detach, uirda_activate, NULL, uirda_childdet);

int
uirda_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;

        DPRINTFN(50,("uirda_match\n"));

        if (uirda_lookup(uiaa->uiaa_vendor, uiaa->uiaa_product) != NULL)
                return UMATCH_VENDOR_PRODUCT;

        if (uiaa->uiaa_class == UICLASS_APPL_SPEC &&
            uiaa->uiaa_subclass == UISUBCLASS_IRDA &&
            uiaa->uiaa_proto == UIPROTO_IRDA)
                return UMATCH_IFACECLASS_IFACESUBCLASS_IFACEPROTO;
        return UMATCH_NONE;
}

void
uirda_attach(device_t parent, device_t self, void *aux)
{
        struct uirda_softc *sc = device_private(self);
        struct usbif_attach_arg *uiaa = aux;
        struct usbd_device *        dev = uiaa->uiaa_device;
        struct usbd_interface  *iface = uiaa->uiaa_iface;
        char                        *devinfop;
        usb_endpoint_descriptor_t *ed;
        usbd_status                err;
        uint8_t                        epcount;
        u_int                        specrev;
        int                        i;
        struct ir_attach_args        ia;

        DPRINTFN(10,("uirda_attach: sc=%p\n", sc));

        sc->sc_dev = self;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        sc->sc_udev = dev;
        sc->sc_iface = iface;

        mutex_init(&sc->sc_wr_buf_lk, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&sc->sc_rd_buf_lk, MUTEX_DEFAULT, IPL_NONE);
        selinit(&sc->sc_rd_sel);

        if (sc->sc_hdszi == 0)
                sc->sc_hdszi = UIRDA_INPUT_HEADER_SIZE;

        epcount = 0;
        (void)usbd_endpoint_count(iface, &epcount);

        sc->sc_rd_addr = -1;
        sc->sc_wr_addr = -1;
        for (i = 0; i < epcount; i++) {
                ed = usbd_interface2endpoint_descriptor(iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self, "couldn't get ep %d\n", i);
                        return;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        sc->sc_rd_addr = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        sc->sc_wr_addr = ed->bEndpointAddress;
                }
        }
        if (sc->sc_rd_addr == -1 || sc->sc_wr_addr == -1) {
                aprint_error_dev(self, "missing endpoint\n");
                return;
        }

        if (sc->sc_loadfw(sc) != 0) {
                return;
        }

        /* Get the IrDA descriptor */
        err = usbd_get_class_desc(sc->sc_udev, UDESC_IRDA, 0,
                USB_IRDA_DESCRIPTOR_SIZE, &sc->sc_irdadesc);
        aprint_error_dev(self, "error %d reading class desc\n", err);
        if (err) {
                err = usbd_get_desc(sc->sc_udev, UDESC_IRDA, 0,
                  USB_IRDA_DESCRIPTOR_SIZE, &sc->sc_irdadesc);
        }
        aprint_error_dev(self, "error %d reading desc\n", err);
        if (err) {
                /* maybe it's embedded in the config desc? */
                usbd_desc_iter_t iter;
                const usb_descriptor_t *d;
                usb_desc_iter_init(sc->sc_udev, &iter);
                for (;;) {
                        d = usb_desc_iter_next(&iter);
                        if (!d || d->bDescriptorType == UDESC_IRDA)
                                break;
                }
                if (d == NULL) {
                        aprint_error_dev(self,
                            "Cannot get IrDA descriptor\n");
                        return;
                }
                memcpy(&sc->sc_irdadesc, d, USB_IRDA_DESCRIPTOR_SIZE);
        }
        DPRINTF(("uirda_attach: bDescriptorSize %d bDescriptorType %#x "
                 "bmDataSize=0x%02x bmWindowSize=0x%02x "
                 "bmMinTurnaroundTime=0x%02x wBaudRate=0x%04x "
                 "bmAdditionalBOFs=0x%02x bIrdaSniff=%d bMaxUnicastList=%d\n",
                 sc->sc_irdadesc.bLength,
                 sc->sc_irdadesc.bDescriptorType,
                 sc->sc_irdadesc.bmDataSize,
                 sc->sc_irdadesc.bmWindowSize,
                 sc->sc_irdadesc.bmMinTurnaroundTime,
                 UGETW(sc->sc_irdadesc.wBaudRate),
                 sc->sc_irdadesc.bmAdditionalBOFs,
                 sc->sc_irdadesc.bIrdaSniff,
                 sc->sc_irdadesc.bMaxUnicastList));

        specrev = UGETW(sc->sc_irdadesc.bcdSpecRevision);
        aprint_normal_dev(self, "USB-IrDA protocol version %x.%02x\n",
            specrev >> 8, specrev & 0xff);

        DPRINTFN(10, ("uirda_attach: %p\n", sc->sc_udev));

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        ia.ia_type = IR_TYPE_IRFRAME;
        ia.ia_methods = sc->sc_irm ? sc->sc_irm : &uirda_methods;
        ia.ia_handle = sc;

        sc->sc_child = config_found(self, &ia, ir_print, CFARGS_NONE);

        return;
}

int
uirda_detach(device_t self, int flags)
{
        struct uirda_softc *sc = device_private(self);
        int s;
        int rv = 0;

        DPRINTF(("uirda_detach: sc=%p flags=%d\n", sc, flags));

        sc->sc_dying = 1;
        /* Abort all pipes.  Causes processes waiting for transfer to wake. */
        if (sc->sc_rd_pipe != NULL) {
                usbd_abort_pipe(sc->sc_rd_pipe);
        }
        if (sc->sc_wr_pipe != NULL) {
                usbd_abort_pipe(sc->sc_wr_pipe);
        }
        if (sc->sc_rd_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_rd_xfer);
                sc->sc_rd_xfer = NULL;
                sc->sc_rd_buf = NULL;
        }
        if (sc->sc_wr_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_wr_xfer);
                sc->sc_wr_xfer = NULL;
                sc->sc_wr_buf = NULL;
        }
        if (sc->sc_rd_pipe != NULL) {
                usbd_close_pipe(sc->sc_rd_pipe);
                sc->sc_rd_pipe = NULL;
        }
        if (sc->sc_wr_pipe != NULL) {
                usbd_close_pipe(sc->sc_wr_pipe);
                sc->sc_wr_pipe = NULL;
        }
        wakeup(&sc->sc_rd_count);

        s = splusb();
        if (--sc->sc_refcnt >= 0) {
                /* Wait for processes to go away. */
                usb_detach_waitold(sc->sc_dev);
        }
        splx(s);

        if (sc->sc_child != NULL)
                rv = config_detach(sc->sc_child, flags);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        mutex_destroy(&sc->sc_wr_buf_lk);
        mutex_destroy(&sc->sc_rd_buf_lk);
        seldestroy(&sc->sc_rd_sel);

        return rv;
}

void
uirda_childdet(device_t self, device_t child)
{
        struct uirda_softc *sc = device_private(self);

        KASSERT(sc->sc_child == child);
        sc->sc_child = NULL;
}

int
uirda_activate(device_t self, enum devact act)
{
        struct uirda_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

int
uirda_open(void *h, int flag, int mode,
    struct lwp *l)
{
        struct uirda_softc *sc = h;
        int error;
        usbd_status err;

        DPRINTF(("%s: sc=%p\n", __func__, sc));

        err = usbd_open_pipe(sc->sc_iface, sc->sc_rd_addr, 0, &sc->sc_rd_pipe);
        if (err) {
                error = EIO;
                goto bad1;
        }
        err = usbd_open_pipe(sc->sc_iface, sc->sc_wr_addr, 0, &sc->sc_wr_pipe);
        if (err) {
                error = EIO;
                goto bad2;
        }
        error = usbd_create_xfer(sc->sc_rd_pipe,
            IRDA_MAX_FRAME_SIZE + sc->sc_hdszi, 0, 0,
            &sc->sc_rd_xfer);
        if (error)
                goto bad3;
        sc->sc_rd_buf = usbd_get_buffer(sc->sc_rd_xfer);

        /* worst case ST-UIRDA length */
        error = usbd_create_xfer(sc->sc_wr_pipe,
            IRDA_MAX_FRAME_SIZE + UIRDA_OUTPUT_HEADER_SIZE + 2 + 1,
            USBD_FORCE_SHORT_XFER, 0, &sc->sc_wr_xfer);
        if (error)
                goto bad4;
        sc->sc_wr_buf = usbd_get_buffer(sc->sc_wr_xfer);

        sc->sc_rd_count = 0;
        sc->sc_rd_err = 0;
        sc->sc_params.speed = 0;
        sc->sc_params.ebofs = 0;
        sc->sc_params.maxsize = IRDA_MAX_FRAME_SIZE;
        sc->sc_wr_hdr = -1;

        err = uirda_start_read(sc);
        /* XXX check err */

        return 0;

bad4:
        usbd_destroy_xfer(sc->sc_rd_xfer);
        sc->sc_rd_xfer = NULL;
bad3:
        usbd_close_pipe(sc->sc_wr_pipe);
        sc->sc_wr_pipe = NULL;
bad2:
        usbd_close_pipe(sc->sc_rd_pipe);
        sc->sc_rd_pipe = NULL;
bad1:
        return error;
}

int
uirda_close(void *h, int flag, int mode,
    struct lwp *l)
{
        struct uirda_softc *sc = h;

        DPRINTF(("%s: sc=%p\n", __func__, sc));

        if (sc->sc_rd_pipe != NULL) {
                usbd_abort_pipe(sc->sc_rd_pipe);
        }
        if (sc->sc_wr_pipe != NULL) {
                usbd_abort_pipe(sc->sc_wr_pipe);
        }
        if (sc->sc_rd_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_rd_xfer);
                sc->sc_rd_xfer = NULL;
                sc->sc_rd_buf = NULL;
        }
        if (sc->sc_wr_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_wr_xfer);
                sc->sc_wr_xfer = NULL;
                sc->sc_wr_buf = NULL;
        }
        if (sc->sc_rd_pipe != NULL) {
                usbd_close_pipe(sc->sc_rd_pipe);
                sc->sc_rd_pipe = NULL;
        }
        if (sc->sc_wr_pipe != NULL) {
                usbd_close_pipe(sc->sc_wr_pipe);
                sc->sc_wr_pipe = NULL;
        }

        return 0;
}

int
uirda_read(void *h, struct uio *uio, int flag)
{
        struct uirda_softc *sc = h;
        int s;
        int error;
        u_int n;

        DPRINTFN(1,("%s: sc=%p\n", __func__, sc));

        if (sc->sc_dying)
                return EIO;

#ifdef DIAGNOSTIC
        if (sc->sc_rd_buf == NULL)
                return EINVAL;
#endif

        sc->sc_refcnt++;

        do {
                s = splusb();
                while (sc->sc_rd_count == 0) {
                        DPRINTFN(5,("uirda_read: calling tsleep()\n"));
                        error = tsleep(&sc->sc_rd_count, PZERO | PCATCH,
                                       "uirdrd", 0);
                        if (sc->sc_dying)
                                error = EIO;
                        if (error) {
                                splx(s);
                                DPRINTF(("uirda_read: tsleep() = %d\n", error));
                                goto ret;
                        }
                }
                splx(s);

                mutex_enter(&sc->sc_rd_buf_lk);
                n = sc->sc_rd_count - sc->sc_hdszi;
                DPRINTFN(1,("%s: sc=%p n=%u, hdr=0x%02x\n", __func__,
                            sc, n, sc->sc_rd_buf[0]));
                if (n > uio->uio_resid)
                        error = EINVAL;
                else
                        error = uiomove(sc->sc_rd_buf + sc->sc_hdszi, n, uio);
                sc->sc_rd_count = 0;
                mutex_exit(&sc->sc_rd_buf_lk);

                uirda_start_read(sc);
                /* XXX check uirda_start_read() return value */

        } while (n == 0);

        DPRINTFN(1,("uirda_read: return %d\n", error));

 ret:
        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);
        return error;
}

int
uirda_write(void *h, struct uio *uio, int flag)
{
        struct uirda_softc *sc = h;
        usbd_status err;
        uint32_t n;
        int error = 0;

        DPRINTFN(1,("%s: sc=%p\n", __func__, sc));

        if (sc->sc_dying)
                return EIO;

#ifdef DIAGNOSTIC
        if (sc->sc_wr_buf == NULL)
                return EINVAL;
#endif

        n = uio->uio_resid;
        if (n > sc->sc_params.maxsize)
                return EINVAL;

        sc->sc_refcnt++;
        mutex_enter(&sc->sc_wr_buf_lk);

        sc->sc_wr_buf[0] = UIRDA_EB_NO_CHANGE | UIRDA_NO_SPEED;
        error = uiomove(sc->sc_wr_buf + UIRDA_OUTPUT_HEADER_SIZE, n, uio);
        if (error)
                goto done;

        DPRINTFN(1, ("uirdawrite: transfer %d bytes\n", n));

        n += UIRDA_OUTPUT_HEADER_SIZE;
        err = usbd_bulk_transfer(sc->sc_wr_xfer, sc->sc_wr_pipe,
            USBD_FORCE_SHORT_XFER, UIRDA_WR_TIMEOUT, sc->sc_wr_buf, &n);
        DPRINTFN(2, ("uirdawrite: err=%d\n", err));
        if (err) {
                if (err == USBD_INTERRUPTED)
                        error = EINTR;
                else if (err == USBD_TIMEOUT)
                        error = ETIMEDOUT;
                else
                        error = EIO;
        }
done:
        mutex_exit(&sc->sc_wr_buf_lk);
        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        DPRINTFN(1,("%s: sc=%p done\n", __func__, sc));
        return error;
}

int
uirda_poll(void *h, int events, struct lwp *l)
{
        struct uirda_softc *sc = h;
        int revents = 0;
        int s;

        DPRINTFN(1,("%s: sc=%p\n", __func__, sc));

        s = splusb();
        if (events & (POLLOUT | POLLWRNORM))
                revents |= events & (POLLOUT | POLLWRNORM);
        if (events & (POLLIN | POLLRDNORM)) {
                if (sc->sc_rd_count != 0) {
                        DPRINTFN(2,("%s: have data\n", __func__));
                        revents |= events & (POLLIN | POLLRDNORM);
                } else {
                        DPRINTFN(2,("%s: recording select\n", __func__));
                        selrecord(l, &sc->sc_rd_sel);
                }
        }
        splx(s);

        return revents;
}

static void
filt_uirdardetach(struct knote *kn)
{
        struct uirda_softc *sc = kn->kn_hook;
        int s;

        s = splusb();
        selremove_knote(&sc->sc_rd_sel, kn);
        splx(s);
}

static int
filt_uirdaread(struct knote *kn, long hint)
{
        struct uirda_softc *sc = kn->kn_hook;

        kn->kn_data = sc->sc_rd_count;
        return kn->kn_data > 0;
}

static const struct filterops uirdaread_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_uirdardetach,
        .f_event = filt_uirdaread,
};

int
uirda_kqfilter(void *h, struct knote *kn)
{
        struct uirda_softc *sc = kn->kn_hook;
        int s;

        switch (kn->kn_filter) {
        case EVFILT_READ:
                kn->kn_fop = &uirdaread_filtops;
                kn->kn_hook = sc;
                s = splusb();
                selrecord_knote(&sc->sc_rd_sel, kn);
                splx(s);
                break;

        case EVFILT_WRITE:
                kn->kn_fop = &seltrue_filtops;
                break;

        default:
                return EINVAL;
        }

        return 0;
}

int
uirda_set_params(void *h, struct irda_params *p)
{
        struct uirda_softc *sc = h;
        usbd_status err;
        int i;
        uint8_t hdr;
        uint32_t n;
        u_int mask;

        DPRINTF(("%s: sc=%p, speed=%d ebofs=%d maxsize=%d\n", __func__,
                 sc, p->speed, p->ebofs, p->maxsize));

        if (sc->sc_dying)
                return EIO;

        hdr = 0;
        if (p->ebofs != sc->sc_params.ebofs) {
                /* round up ebofs */
                mask = 1 /* sc->sc_irdadesc.bmAdditionalBOFs*/;
                DPRINTF(("u.s.p.: mask=%#x, sc->ebofs=%d, p->ebofs=%d\n",
                        mask, sc->sc_params.ebofs, p->ebofs));
                for (i = 0; i < UIRDA_NEBOFS; i++) {
                        DPRINTF(("u.s.p.: u_e[%d].mask=%#x, count=%d\n",
                                i, uirda_ebofs[i].mask, uirda_ebofs[i].count));
                        if ((mask & uirda_ebofs[i].mask) &&
                            uirda_ebofs[i].count >= p->ebofs) {
                                hdr = uirda_ebofs[i].header;
                                goto found1;
                        }
                }
                for (i = 0; i < UIRDA_NEBOFS; i++) {
                        DPRINTF(("u.s.p.: u_e[%d].mask=%#x, count=%d\n",
                                i, uirda_ebofs[i].mask, uirda_ebofs[i].count));
                        if ((mask & uirda_ebofs[i].mask)) {
                                hdr = uirda_ebofs[i].header;
                                goto found1;
                        }
                }
                /* no good value found */
                return EINVAL;
        found1:
                DPRINTF(("uirda_set_params: ebofs hdr=0x%02x\n", hdr));
                ;

        }
        if (hdr != 0 || p->speed != sc->sc_params.speed) {
                /* find speed */
                mask = UGETW(sc->sc_irdadesc.wBaudRate);
                for (i = 0; i < UIRDA_NSPEEDS; i++) {
                        if ((mask & uirda_speeds[i].mask) &&
                            uirda_speeds[i].speed == p->speed) {
                                hdr |= uirda_speeds[i].header;
                                goto found2;
                        }
                }
                /* no good value found */
                return EINVAL;
        found2:
                DPRINTF(("uirda_set_params: speed hdr=0x%02x\n", hdr));
                ;
        }
        if (p->maxsize != sc->sc_params.maxsize) {
                if (p->maxsize > IRDA_MAX_FRAME_SIZE)
                        return EINVAL;
                sc->sc_params.maxsize = p->maxsize;
#if 0
                DPRINTF(("%s: new buffers, old size=%d\n", __func__,
                         sc->sc_params.maxsize));
                if (p->maxsize > 10000 || p < 0) /* XXX */
                        return EINVAL;

                /* Change the write buffer */
                mutex_enter(&sc->sc_wr_buf_lk);
                if (sc->sc_wr_buf != NULL)
                        usbd_free_buffer(sc->sc_wr_xfer);
                sc->sc_wr_buf = usbd_get_buffer(sc->sc_wr_xfer, p->maxsize+1);
                mutex_exit(&sc->sc_wr_buf_lk);
                if (sc->sc_wr_buf == NULL)
                        return ENOMEM;

                /* Change the read buffer */
                mutex_enter(&sc->sc_rd_buf_lk);
                usbd_abort_pipe(sc->sc_rd_pipe);
                if (sc->sc_rd_buf != NULL)
                        usbd_free_buffer(sc->sc_rd_xfer);
                sc->sc_rd_buf = usbd_get_buffer(sc->sc_rd_xfer, p->maxsize+1);
                sc->sc_rd_count = 0;
                if (sc->sc_rd_buf == NULL) {
                        mutex_exit(&sc->sc_rd_buf_lk);
                        return ENOMEM;
                }
                sc->sc_params.maxsize = p->maxsize;
                err = uirda_start_read(sc); /* XXX check */
                mutex_exit(&sc->sc_rd_buf_lk);
#endif
        }
        if (hdr != 0 && hdr != sc->sc_wr_hdr) {
                /*
                 * A change has occurred, transmit a 0 length frame with
                 * the new settings.  The 0 length frame is not sent to the
                 * device.
                 */
                DPRINTF(("%s: sc=%p setting header 0x%02x\n",
                         __func__, sc, hdr));
                sc->sc_wr_hdr = hdr;
                mutex_enter(&sc->sc_wr_buf_lk);
                sc->sc_wr_buf[0] = hdr;
                n = UIRDA_OUTPUT_HEADER_SIZE;
                err = usbd_bulk_transfer(sc->sc_wr_xfer, sc->sc_wr_pipe,
                    USBD_FORCE_SHORT_XFER, UIRDA_WR_TIMEOUT,
                    sc->sc_wr_buf, &n);
                if (err) {
                        aprint_error_dev(sc->sc_dev, "set failed, err=%d\n",
                            err);
                        usbd_clear_endpoint_stall(sc->sc_wr_pipe);
                }
                mutex_exit(&sc->sc_wr_buf_lk);
        }

        sc->sc_params = *p;

        return 0;
}

int
uirda_get_speeds(void *h, int *speeds)
{
        struct uirda_softc *sc = h;
        u_int isp;
        u_int usp;

        DPRINTF(("%s: sc=%p\n", __func__, sc));

        if (sc->sc_dying)
                return EIO;

        usp = UGETW(sc->sc_irdadesc.wBaudRate);
        isp = 0;
        if (usp & UI_BR_4000000) isp |= IRDA_SPEED_4000000;
        if (usp & UI_BR_1152000) isp |= IRDA_SPEED_1152000;
        if (usp & UI_BR_576000)  isp |= IRDA_SPEED_576000;
        if (usp & UI_BR_115200)  isp |= IRDA_SPEED_115200;
        if (usp & UI_BR_57600)   isp |= IRDA_SPEED_57600;
        if (usp & UI_BR_38400)   isp |= IRDA_SPEED_38400;
        if (usp & UI_BR_19200)   isp |= IRDA_SPEED_19200;
        if (usp & UI_BR_9600)    isp |= IRDA_SPEED_9600;
        if (usp & UI_BR_2400)    isp |= IRDA_SPEED_2400;
        *speeds = isp;
        DPRINTF(("%s: speeds = %#x\n", __func__, isp));
        return 0;
}

int
uirda_get_turnarounds(void *h, int *turnarounds)
{
        struct uirda_softc *sc = h;
        u_int ita;
        u_int uta;

        DPRINTF(("%s: sc=%p\n", __func__, sc));

        if (sc->sc_dying)
                return EIO;

        uta = sc->sc_irdadesc.bmMinTurnaroundTime;
        ita = 0;
        if (uta & UI_TA_0)     ita |= IRDA_TURNT_0;
        if (uta & UI_TA_10)    ita |= IRDA_TURNT_10;
        if (uta & UI_TA_50)    ita |= IRDA_TURNT_50;
        if (uta & UI_TA_100)   ita |= IRDA_TURNT_100;
        if (uta & UI_TA_500)   ita |= IRDA_TURNT_500;
        if (uta & UI_TA_1000)  ita |= IRDA_TURNT_1000;
        if (uta & UI_TA_5000)  ita |= IRDA_TURNT_5000;
        if (uta & UI_TA_10000) ita |= IRDA_TURNT_10000;
        *turnarounds = ita;
        return 0;
}

static void
uirda_rd_cb(struct usbd_xfer *xfer, void *priv,
            usbd_status status)
{
        struct uirda_softc *sc = priv;
        uint32_t size;

        DPRINTFN(1,("%s: sc=%p\n", __func__, sc));

        if (status == USBD_CANCELLED) /* this is normal */
                return;
        if (status) {
                size = sc->sc_hdszi;
                sc->sc_rd_err = 1;
        } else {
                usbd_get_xfer_status(xfer, NULL, NULL, &size, NULL);
        }
        DPRINTFN(1,("%s: sc=%p size=%u, err=%d\n", __func__, sc, size,
                    sc->sc_rd_err));
        sc->sc_rd_count = size;
        wakeup(&sc->sc_rd_count); /* XXX should use flag */
        selnotify(&sc->sc_rd_sel, 0, 0);
}

static usbd_status
uirda_start_read(struct uirda_softc *sc)
{
        usbd_status err;

        DPRINTFN(1,("%s: sc=%p, size=%d\n", __func__, sc,
                    sc->sc_params.maxsize + UIRDA_INPUT_HEADER_SIZE));

        if (sc->sc_dying)
                return USBD_IOERROR;

        if (sc->sc_rd_err) {
                sc->sc_rd_err = 0;
                DPRINTF(("uirda_start_read: clear stall\n"));
                usbd_clear_endpoint_stall(sc->sc_rd_pipe);
        }

        usbd_setup_xfer(sc->sc_rd_xfer, sc, sc->sc_rd_buf,
            sc->sc_params.maxsize + sc->sc_hdszi, USBD_SHORT_XFER_OK,
            USBD_NO_TIMEOUT, uirda_rd_cb);
        err = usbd_transfer(sc->sc_rd_xfer);
        if (err != USBD_IN_PROGRESS) {
                DPRINTF(("uirda_start_read: err=%d\n", err));
                return err;
        }
        return USBD_NORMAL_COMPLETION;
}

usbd_status
usbd_get_class_desc(struct usbd_device *dev, int type, int index, int len, void *desc)
{
        usb_device_request_t req;

        DPRINTFN(3,("usbd_get_desc: type=%d, index=%d, len=%d\n",
                    type, index, len));

        req.bmRequestType = 0xa1; /* XXX ? */
        req.bRequest = UR_GET_DESCRIPTOR;
        USETW2(req.wValue, type, index);
        USETW(req.wIndex, 0);
        USETW(req.wLength, len);
        return usbd_do_request(dev, &req, desc);
}



















































































































































































































































































    2 



    2 



























































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
/*        $NetBSD: u3g.c,v 1.43 2021/08/07 16:19:17 thorpej Exp $        */

/*-
 * Copyright (c) 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2008 AnyWi Technologies
 *   Author: Andrea Guzzo <aguzzo@anywi.com>
 *   * based on uark.c 1.1 2006/08/14 08:30:22 jsg *
 *   * parts from ubsa.c 183348 2008-09-25 12:00:56Z phk *
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 * $FreeBSD$
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: u3g.c,v 1.43 2021/08/07 16:19:17 thorpej Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/tty.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdi_util.h>

#include <dev/usb/ucomvar.h>

#include "usbdevs.h"

/*
 * We read/write data from/to the device in 4KB chunks to maximise
 * performance.
 */
#define U3G_BUFF_SIZE        4096

/*
 * Some 3G devices (the Huawei E160/E220 springs to mind here) buffer up
 * data internally even when the USB pipes are closed. So on first open,
 * we can receive a large chunk of stale data.
 *
 * This causes a real problem because the default TTYDEF_LFLAG (applied
 * on first open) has the ECHO flag set, resulting in all the stale data
 * being echoed straight back to the device by the tty(4) layer. Some
 * devices (again, the Huawei E160/E220 for example) react to this spew
 * by going catatonic.
 *
 * All this happens before the application gets a chance to disable ECHO.
 *
 * We work around this by ignoring all data received from the device for
 * a period of two seconds, or until the application starts sending data -
 * whichever comes first.
 */
#define        U3G_PURGE_SECS        2

/*
 * Define bits for the virtual modem control pins.
 * The input pin states are reported via the interrupt pipe on some devices.
 */
#define        U3G_OUTPIN_DTR        (1u << 0)
#define        U3G_OUTPIN_RTS        (1u << 1)
#define        U3G_INPIN_DCD        (1u << 0)
#define        U3G_INPIN_DSR        (1u << 1)
#define        U3G_INPIN_RI        (1u << 3)

/*
 * USB request to set the output pin status
 */
#define        U3G_SET_PIN        0x22

struct u3g_softc {
        device_t                sc_dev;
        struct usbd_device *        sc_udev;
        bool                        sc_dying;        /* We're going away */
        int                        sc_ifaceno;        /* Device interface number */
        struct usbd_interface        *sc_iface;        /* Device interface */

        struct u3g_com {
                device_t        c_dev;                /* Child ucom(4) handle */

                bool                c_open;                /* Device is in use */
                bool                c_purging;        /* Purging stale data */
                struct timeval        c_purge_start;        /* Control duration of purge */

                u_char                c_msr;                /* Emulated 'msr' */
                uint16_t        c_outpins;        /* Output pin state */
        } sc_com[10];
        size_t                        sc_ncom;

        struct usbd_pipe *        sc_intr_pipe;        /* Interrupt pipe */
        u_char                        *sc_intr_buff;        /* Interrupt buffer */
        size_t                        sc_intr_size;        /* buffer size */
};

/*
 * The device driver has two personalities. The first uses the 'usbdevif'
 * interface attribute so that a match will claim the entire USB device
 * for itself. This is used for when a device needs to be mode-switched
 * and ensures any other interfaces present cannot be claimed by other
 * drivers while the mode-switch is in progress. This is implemented by
 * the umodeswitch driver.
 *
 * The second personality uses the 'usbifif' interface attribute so that
 * it can claim the 3G modem interfaces for itself, leaving others (such
 * as the mass storage interfaces on some devices) for other drivers.
 */

static int u3g_match(device_t, cfdata_t, void *);
static void u3g_attach(device_t, device_t, void *);
static int u3g_detach(device_t, int);
static void u3g_childdet(device_t, device_t);

CFATTACH_DECL2_NEW(u3g, sizeof(struct u3g_softc), u3g_match,
    u3g_attach, u3g_detach, NULL, NULL, u3g_childdet);


static void u3g_intr(struct usbd_xfer *, void *, usbd_status);
static void u3g_get_status(void *, int, u_char *, u_char *);
static void u3g_set(void *, int, int, int);
static int  u3g_open(void *, int);
static void u3g_close(void *, int);
static void u3g_read(void *, int, u_char **, uint32_t *);
static void u3g_write(void *, int, u_char *, u_char *, uint32_t *);

static const struct ucom_methods u3g_methods = {
        .ucom_get_status = u3g_get_status,
        .ucom_set = u3g_set,
        .ucom_open = u3g_open,
        .ucom_close = u3g_close,
        .ucom_read = u3g_read,
        .ucom_write = u3g_write,
};

/*
 * Allegedly supported devices
 */
static const struct usb_devno u3g_devs[] = {
        { USB_VENDOR_DELL, USB_PRODUCT_DELL_W5500 },
        { USB_VENDOR_HP, USB_PRODUCT_HP_UN2430 },        
        /* OEM: Huawei */
        { USB_VENDOR_HUAWEI, USB_PRODUCT_HUAWEI_E1750 },
        { USB_VENDOR_HUAWEI, USB_PRODUCT_HUAWEI_E1820 },
        { USB_VENDOR_HUAWEI, USB_PRODUCT_HUAWEI_E220 },
        { USB_VENDOR_HUAWEI, USB_PRODUCT_HUAWEI_EM770W },
        { USB_VENDOR_HUAWEI, USB_PRODUCT_HUAWEI_K3765 },
        { USB_VENDOR_HUAWEI, USB_PRODUCT_HUAWEI_MOBILE },
        { USB_VENDOR_HUAWEI, USB_PRODUCT_HUAWEI_E171 },
        { USB_VENDOR_HUAWEI, USB_PRODUCT_HUAWEI_E353 },
        /* LG Electronics */
        { USB_VENDOR_LG, USB_PRODUCT_LG_NTT_DOCOMO_L02C_MODEM },
        /* OEM: Merlin */
        { USB_VENDOR_MERLIN, USB_PRODUCT_MERLIN_V620 },
        /* OEM: Novatel */
        { USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_ES620 },
        { USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_EU8X0D },
        { USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_MC950D },
#if 0
        /* These are matched in u3ginit_match() */
        { USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_MC950D_DRIVER },
        { USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_U760_DRIVER },
#endif
        { USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_MERLINU740 },
        { USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_MERLINV620 },
        { USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_S720 },
        { USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_U720 },
        { USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_U727 },
        { USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_U740_2 },
        { USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_U760 },
        { USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_U870 },
        { USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_V740 },
        { USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_X950D },
        { USB_VENDOR_NOVATEL2, USB_PRODUCT_NOVATEL2_XU870 },
        /* OEM: Option N.V. */
        { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_QUADPLUSUMTS },
        { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_HSDPA },
        { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_GTMAXHSUPA },

        /* OEM: Sierra Wireless: */
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_AC595U },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_AC597E },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_AC875U },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_AC880 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_AC880E },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_AC880U },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_AC881 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_AC881E },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_AC881U },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_AIRCARD580 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_AIRCARD595 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_AIRCARD875 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_C597 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_EM5625 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_MC5720 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_MC5720_2 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_MC5725 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_MC8755 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_MC8755_2 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_MC8755_3 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_MC8765 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_MC8775_2 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_MC8780 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_MC8781 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_MINI5725 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_USB305 },
        { USB_VENDOR_SIERRA, USB_PRODUCT_SIERRA_250U },
        /* Toshiba */
        { USB_VENDOR_TOSHIBA, USB_PRODUCT_TOSHIBA_HSDPA_MODEM_EU870DT1 },

        /* ZTE */
        { USB_VENDOR_ZTE, USB_PRODUCT_ZTE_MF622 },
        { USB_VENDOR_ZTE, USB_PRODUCT_ZTE_MF626 },
        { USB_VENDOR_ZTE, USB_PRODUCT_ZTE_MF628 },
        { USB_VENDOR_ZTE, USB_PRODUCT_ZTE_MF820D },

        /* 4G Systems */
        { USB_VENDOR_LONGCHEER, USB_PRODUCT_LONGCHEER_XSSTICK_P14 },
        { USB_VENDOR_LONGCHEER, USB_PRODUCT_LONGCHEER_XSSTICK_W14 },

        /* DLink */
        { USB_VENDOR_DLINK, USB_PRODUCT_DLINK_DWM157 },
        { USB_VENDOR_DLINK, USB_PRODUCT_DLINK_DWM157E },
};

/*
 * Second personality:
 *
 * Claim only those interfaces required for 3G modem operation.
 */

static int
u3g_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;
        struct usbd_interface *iface = uiaa->uiaa_iface;
        usb_interface_descriptor_t *id;

        if (!usb_lookup(u3g_devs, uiaa->uiaa_vendor, uiaa->uiaa_product))
                return UMATCH_NONE;

        id = usbd_get_interface_descriptor(iface);
        if (id == NULL) {
                printf("u3g_match: failed to get interface descriptor\n");
                return UMATCH_NONE;
        }

        /*
         * Huawei modems use the vendor-specific class for all interfaces,
         * both tty and CDC NCM, which we should avoid attaching to.
         */
        if (uiaa->uiaa_vendor == USB_VENDOR_HUAWEI &&
            id->bInterfaceSubClass == 2 &&
            (id->bInterfaceProtocol & 0xf) == 6)        /* 0x16, 0x46, 0x76 */
                return UMATCH_NONE;

        /*
         * Sierra Wireless modems use the vendor-specific class also for
         * Direct IP or QMI interfaces, which we should avoid attaching to.
         */
        if (uiaa->uiaa_vendor == USB_VENDOR_SIERRA &&
            id->bInterfaceClass == UICLASS_VENDOR &&
            uiaa->uiaa_product == USB_PRODUCT_SIERRA_USB305 &&
             uiaa->uiaa_ifaceno >= 7)
                return UMATCH_NONE;

        /*
         * 3G modems generally report vendor-specific class
         *
         * XXX: this may be too generalised.
         */
        return id->bInterfaceClass == UICLASS_VENDOR ?
            UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
u3g_attach(device_t parent, device_t self, void *aux)
{
        struct u3g_softc *sc = device_private(self);
        struct usbif_attach_arg *uiaa = aux;
        struct usbd_device *dev = uiaa->uiaa_device;
        struct usbd_interface *iface = uiaa->uiaa_iface;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        struct ucom_attach_args ucaa;
        usbd_status error;
        int n, intr_address, intr_size;

        aprint_naive("\n");
        aprint_normal("\n");

        sc->sc_dev = self;
        sc->sc_dying = false;
        sc->sc_udev = dev;

        id = usbd_get_interface_descriptor(iface);

        ucaa.ucaa_info = "3G Modem";
        ucaa.ucaa_ibufsize = U3G_BUFF_SIZE;
        ucaa.ucaa_obufsize = U3G_BUFF_SIZE;
        ucaa.ucaa_ibufsizepad = U3G_BUFF_SIZE;
        ucaa.ucaa_opkthdrlen = 0;
        ucaa.ucaa_device = dev;
        ucaa.ucaa_iface = iface;
        ucaa.ucaa_methods = &u3g_methods;
        ucaa.ucaa_arg = sc;
        ucaa.ucaa_portno = -1;
        ucaa.ucaa_bulkin = ucaa.ucaa_bulkout = -1;

        sc->sc_ifaceno = uiaa->uiaa_ifaceno;
        sc->sc_iface = uiaa->uiaa_iface;
        intr_address = -1;
        intr_size = 0;

        for (n = 0; n < id->bNumEndpoints; n++) {
                ed = usbd_interface2endpoint_descriptor(iface, n);
                if (ed == NULL) {
                        aprint_error_dev(self, "no endpoint descriptor "
                            "for %d (interface: %d)\n", n, sc->sc_ifaceno);
                        sc->sc_dying = true;
                        return;
                }

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        intr_address = ed->bEndpointAddress;
                        intr_size = UGETW(ed->wMaxPacketSize);
                } else
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        ucaa.ucaa_bulkin = ed->bEndpointAddress;
                } else
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        ucaa.ucaa_bulkout = ed->bEndpointAddress;
                }
                if (ucaa.ucaa_bulkin != -1 && ucaa.ucaa_bulkout != -1) {
                        struct u3g_com *com;
                        if (sc->sc_ncom == __arraycount(sc->sc_com)) {
                                aprint_error_dev(self, "Need to configure "
                                    "more than %zu ttys", sc->sc_ncom);
                                continue;
                        }
                        ucaa.ucaa_portno = sc->sc_ncom++;
                        com = &sc->sc_com[ucaa.ucaa_portno];
                        com->c_outpins = 0;
                        com->c_msr = UMSR_DSR | UMSR_CTS | UMSR_DCD;
                        com->c_open = false;
                        com->c_purging = false;
                        com->c_dev = config_found(self, &ucaa, ucomprint,
                            CFARGS(.submatch = ucomsubmatch));
                        ucaa.ucaa_bulkin = -1;
                        ucaa.ucaa_bulkout = -1;
                }
        }

        if (sc->sc_ncom == 0) {
                aprint_error_dev(self, "Missing bulk in/out for interface %d\n",
                    sc->sc_ifaceno);
                sc->sc_dying = true;
                return;
        }

        /*
         * If the interface has an interrupt pipe, open it immediately so
         * that we can track input pin state changes regardless of whether
         * the tty(4) device is open or not.
         */
        if (intr_address != -1) {
                sc->sc_intr_size = intr_size;
                sc->sc_intr_buff = kmem_alloc(intr_size, KM_SLEEP);
                error = usbd_open_pipe_intr(iface, intr_address,
                    USBD_SHORT_XFER_OK, &sc->sc_intr_pipe, sc, sc->sc_intr_buff,
                    intr_size, u3g_intr, 100);
                if (error) {
                        aprint_error_dev(self, "cannot open interrupt pipe "
                            "(addr %d)\n", intr_address);
                        return;
                }
        } else {
                sc->sc_intr_pipe = NULL;
                sc->sc_intr_buff = NULL;
        }

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");
}

static int
u3g_detach(device_t self, int flags)
{
        struct u3g_softc *sc = device_private(self);
        int rv = 0;

        sc->sc_dying = true;

        if (sc->sc_intr_pipe != NULL) {
                usbd_abort_pipe(sc->sc_intr_pipe);
                usbd_close_pipe(sc->sc_intr_pipe);
                sc->sc_intr_pipe = NULL;
        }
        if (sc->sc_intr_buff != NULL) {
                kmem_free(sc->sc_intr_buff, sc->sc_intr_size);
                sc->sc_intr_buff = NULL;
        }

        for (size_t i = 0; i < sc->sc_ncom; i++)
                if (sc->sc_com[i].c_dev != NULL) {
                        int port_rv;

                        port_rv = config_detach(sc->sc_com[i].c_dev, flags);
                        if (port_rv != 0) {
                                aprint_verbose_dev(self, "Can't deallocate "
                                    "port (%d)", port_rv);
                        }
                        rv |= port_rv;
                        sc->sc_com[i].c_dev = NULL;
                }

        pmf_device_deregister(self);

        return rv;
}

static void
u3g_childdet(device_t self, device_t child)
{
        struct u3g_softc *sc = device_private(self);

        for (size_t i = 0; i < sc->sc_ncom; i++)
                    if (sc->sc_com[i].c_dev == child)
                            sc->sc_com[i].c_dev = NULL;
}

static void
u3g_intr(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct u3g_softc *sc = (struct u3g_softc *)priv;
        u_char *buf;
        int portno = 0;        /* XXX */
        struct u3g_com *com = &sc->sc_com[portno];

        if (sc->sc_dying)
                return;

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;
                usbd_clear_endpoint_stall_async(sc->sc_intr_pipe);
                return;
        }

        buf = sc->sc_intr_buff;
        if (buf[0] == 0xa1 && buf[1] == 0x20) {
                u_char msr;

                msr = com->c_msr & ~(UMSR_DCD | UMSR_DSR | UMSR_RI);

                if (buf[8] & U3G_INPIN_DCD)
                        msr |= UMSR_DCD;

                if (buf[8] & U3G_INPIN_DSR)
                        msr |= UMSR_DSR;

                if (buf[8] & U3G_INPIN_RI)
                        msr |= UMSR_RI;

                if (msr != com->c_msr) {
                        com->c_msr = msr;
                        if (com->c_open)
                                ucom_status_change(device_private(com->c_dev));
                }
        }
}

/*ARGSUSED*/
static void
u3g_get_status(void *arg, int portno, u_char *lsr, u_char *msr)
{
        struct u3g_softc *sc = arg;

        *lsr = 0;        /* LSR isn't supported */
        *msr = sc->sc_com[portno].c_msr;
}

/*ARGSUSED*/
static void
u3g_set(void *arg, int portno, int reg, int onoff)
{
        struct u3g_softc *sc = arg;
        usb_device_request_t req;
        uint16_t mask, new_state;
        usbd_status err;
        struct u3g_com *com = &sc->sc_com[portno];

        if (sc->sc_dying)
                return;

        switch (reg) {
        case UCOM_SET_DTR:
                mask = U3G_OUTPIN_DTR;
                break;
        case UCOM_SET_RTS:
                mask = U3G_OUTPIN_RTS;
                break;
        default:
                return;
        }

        new_state = com->c_outpins & ~mask;
        if (onoff)
                new_state |= mask;

        if (new_state == com->c_outpins)
                return;

        com->c_outpins = new_state;

        req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
        req.bRequest = U3G_SET_PIN;
        USETW(req.wValue, new_state);
        USETW(req.wIndex, sc->sc_ifaceno);
        USETW(req.wLength, 0);

        err = usbd_do_request(sc->sc_udev, &req, 0);
        if (err == USBD_STALLED)
                usbd_clear_endpoint_stall(sc->sc_udev->ud_pipe0);
}

/*ARGSUSED*/
static int
u3g_open(void *arg, int portno)
{
        struct u3g_softc *sc = arg;
        usb_endpoint_descriptor_t *ed;
        usb_interface_descriptor_t *id;
        usbd_status err;
        struct u3g_com *com = &sc->sc_com[portno];
        int i, nin;

        if (sc->sc_dying)
                 return EIO;

        id = usbd_get_interface_descriptor(sc->sc_iface);

        for (nin = i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL)
                        return EIO;

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK &&
                    nin++ == portno) {
                        err = usbd_clear_endpoint_feature(sc->sc_udev,
                            ed->bEndpointAddress, UF_ENDPOINT_HALT);
                        if (err)
                                return EIO;
                }
        }

        com->c_open = true;
        com->c_purging = true;
        getmicrotime(&com->c_purge_start);

        return 0;
}

/*ARGSUSED*/
static void
u3g_close(void *arg, int portno)
{
        struct u3g_softc *sc = arg;
        struct u3g_com *com = &sc->sc_com[portno];

        com->c_open = false;
}

/*ARGSUSED*/
static void
u3g_read(void *arg, int portno, u_char **cpp, uint32_t *ccp)
{
        struct u3g_softc *sc = arg;
        struct timeval curr_tv, diff_tv;
        struct u3g_com *com = &sc->sc_com[portno];

        /*
         * If we're not purging input data following first open, do nothing.
         */
        if (com->c_purging == false)
                return;

        /*
         * Otherwise check if the purge timeout has expired
         */
        getmicrotime(&curr_tv);
        timersub(&curr_tv, &com->c_purge_start, &diff_tv);

        if (diff_tv.tv_sec >= U3G_PURGE_SECS) {
                /* Timeout expired. */
                com->c_purging = false;
        } else {
                /* Still purging. Adjust the caller's byte count. */
                *ccp = 0;
        }
}

/*ARGSUSED*/
static void
u3g_write(void *arg, int portno, u_char *to, u_char *from, uint32_t *count)
{
        struct u3g_softc *sc = arg;
        struct u3g_com *com = &sc->sc_com[portno];

        /*
         * Stop purging as soon as the first data is written to the device.
         */
        com->c_purging = false;
        memcpy(to, from, *count);
}






































































































































    6 
















    2 





















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
/*        $NetBSD: shm.h,v 1.8 2019/02/21 03:37:19 mrg Exp $        */

/*
 * Copyright (c) 1994 Adam Glass
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Adam Glass.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * As defined+described in "X/Open System Interfaces and Headers"
 *                         Issue 4, p. XXX
 */

#ifndef _COMPAT_SYS_SHM_H_
#define _COMPAT_SYS_SHM_H_

#include <compat/sys/ipc.h>

struct shmid_ds14 {
        struct ipc_perm14 shm_perm;        /* operation permission structure */
        int                shm_segsz;        /* size of segment in bytes */
        pid_t                shm_lpid;        /* process ID of last shm op */
        pid_t                shm_cpid;        /* process ID of creator */
        short                shm_nattch;        /* number of current attaches */
        int32_t                shm_atime;        /* time of last shmat() */
        int32_t                shm_dtime;        /* time of last shmdt() */
        int32_t                shm_ctime;        /* time of last change by shmctl() */
        void                *shm_internal;        /* sysv stupidity */
};

struct shmid_ds13 {
        struct ipc_perm        shm_perm;        /* operation permission structure */
        size_t                shm_segsz;        /* size of segment in bytes */
        pid_t                shm_lpid;        /* process ID of last shm operation */
        pid_t                shm_cpid;        /* process ID of creator */
        shmatt_t        shm_nattch;        /* number of current attaches */
        int32_t                shm_atime;        /* time of last shmat() */
        int32_t                shm_dtime;        /* time of last shmdt() */
        int32_t                shm_ctime;        /* time of last change by shmctl() */

        /*
         * These members are private and used only in the internal
         * implementation of this interface.
         */
        void                *_shm_internal;
};

/* Warning: 64-bit structure padding is needed here */
struct shmid_ds_sysctl50 {
        struct                ipc_perm_sysctl shm_perm;
        uint64_t        shm_segsz;
        pid_t                shm_lpid;
        pid_t                shm_cpid;
        int32_t                shm_atime;
        int32_t                shm_dtime;
        int32_t                shm_ctime;
        uint32_t        shm_nattch;
};
struct shm_sysctl_info50 {
        struct        shminfo shminfo;
        struct        shmid_ds_sysctl50 shmids[1];
};

__BEGIN_DECLS
static __inline void        __shmid_ds14_to_native(const struct shmid_ds14 *, struct shmid_ds *);
static __inline void        __native_to_shmid_ds14(const struct shmid_ds *, struct shmid_ds14 *);
static __inline void        __shmid_ds13_to_native(const struct shmid_ds13 *, struct shmid_ds *);
static __inline void        __native_to_shmid_ds13(const struct shmid_ds *, struct shmid_ds13 *);
static __inline void
__shmid_ds14_to_native(const struct shmid_ds14 *oshmbuf, struct shmid_ds *shmbuf)
{

        __ipc_perm14_to_native(&oshmbuf->shm_perm, &shmbuf->shm_perm);

#define        CVT(x)        shmbuf->x = oshmbuf->x
        CVT(shm_segsz);
        CVT(shm_lpid);
        CVT(shm_cpid);
        CVT(shm_nattch);
        CVT(shm_atime);
        CVT(shm_dtime);
        CVT(shm_ctime);
#undef CVT
}

static __inline void
__native_to_shmid_ds14(const struct shmid_ds *shmbuf, struct shmid_ds14 *oshmbuf)
{

        memset(oshmbuf, 0, sizeof *oshmbuf);
        __native_to_ipc_perm14(&shmbuf->shm_perm, &oshmbuf->shm_perm);

#define        CVT(x)        oshmbuf->x = shmbuf->x
#define        CVTI(x)        oshmbuf->x = (int)shmbuf->x
        CVTI(shm_segsz);
        CVT(shm_lpid);
        CVT(shm_cpid);
        CVT(shm_nattch);
        CVTI(shm_atime);
        CVTI(shm_dtime);
        CVTI(shm_ctime);
#undef CVT
#undef CVTI
}

static __inline void
__shmid_ds13_to_native(const struct shmid_ds13 *oshmbuf, struct shmid_ds *shmbuf)
{

        shmbuf->shm_perm = oshmbuf->shm_perm;

#define        CVT(x)        shmbuf->x = oshmbuf->x
        CVT(shm_segsz);
        CVT(shm_lpid);
        CVT(shm_cpid);
        CVT(shm_nattch);
        CVT(shm_atime);
        CVT(shm_dtime);
        CVT(shm_ctime);
#undef CVT
}

static __inline void
__native_to_shmid_ds13(const struct shmid_ds *shmbuf, struct shmid_ds13 *oshmbuf)
{

        memset(oshmbuf, 0, sizeof *oshmbuf);
        oshmbuf->shm_perm = shmbuf->shm_perm;

#define        CVT(x)        oshmbuf->x = shmbuf->x
#define        CVTI(x)        oshmbuf->x = (int)shmbuf->x
        CVT(shm_segsz);
        CVT(shm_lpid);
        CVT(shm_cpid);
        CVT(shm_nattch);
        CVTI(shm_atime);
        CVTI(shm_dtime);
        CVTI(shm_ctime);
#undef CVT
#undef CVTI
}

int        __shmctl13(int, int, struct shmid_ds13 *);
int        __shmctl14(int, int, struct shmid_ds14 *);
int        __shmctl50(int, int, struct shmid_ds *);
__END_DECLS

#endif /* !_COMPAT_SYS_SHM_H_ */


































































   76 





   75 
   75 

    1 
   75 


   75 











   76 

















































    3 






















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
/*        $NetBSD: vfs_cwd.c,v 1.8 2022/04/09 23:38:33 riastradh Exp $        */

/*-
 * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Current working directory.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_cwd.c,v 1.8 2022/04/09 23:38:33 riastradh Exp $");

#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/filedesc.h>
#include <sys/proc.h>
#include <sys/vnode.h>

static int        cwdi_ctor(void *, void *, int);
static void        cwdi_dtor(void *, void *);

static pool_cache_t cwdi_cache;

void
cwd_sys_init(void)
{

        cwdi_cache = pool_cache_init(sizeof(struct cwdinfo), coherency_unit,
            0, 0, "cwdi", NULL, IPL_NONE, cwdi_ctor, cwdi_dtor, NULL);
        KASSERT(cwdi_cache != NULL);
}

/*
 * Create an initial cwdinfo structure, using the same current and root
 * directories as curproc.
 */
struct cwdinfo *
cwdinit(void)
{
        struct cwdinfo *cwdi;
        struct cwdinfo *copy;

        cwdi = pool_cache_get(cwdi_cache, PR_WAITOK);
        copy = curproc->p_cwdi;

        rw_enter(&copy->cwdi_lock, RW_READER);
        cwdi->cwdi_cdir = copy->cwdi_cdir;
        if (cwdi->cwdi_cdir)
                vref(cwdi->cwdi_cdir);
        cwdi->cwdi_rdir = copy->cwdi_rdir;
        if (cwdi->cwdi_rdir)
                vref(cwdi->cwdi_rdir);
        cwdi->cwdi_edir = copy->cwdi_edir;
        if (cwdi->cwdi_edir)
                vref(cwdi->cwdi_edir);
        cwdi->cwdi_cmask = copy->cwdi_cmask;
        cwdi->cwdi_refcnt = 1;
        rw_exit(&copy->cwdi_lock);

        return (cwdi);
}

static int
cwdi_ctor(void *arg, void *obj, int flags)
{
        struct cwdinfo *cwdi = obj;

        rw_init(&cwdi->cwdi_lock);

        return 0;
}

static void
cwdi_dtor(void *arg, void *obj)
{
        struct cwdinfo *cwdi = obj;

        rw_destroy(&cwdi->cwdi_lock);
}

/*
 * Make p2 share p1's cwdinfo.
 */
void
cwdshare(struct proc *p2)
{
        struct cwdinfo *cwdi;

        cwdi = curproc->p_cwdi;

        atomic_inc_uint(&cwdi->cwdi_refcnt);
        p2->p_cwdi = cwdi;
}

/*
 * Make sure proc has only one reference to its cwdi, creating
 * a new one if necessary.
 */
void
cwdunshare(struct proc *p)
{
        struct cwdinfo *cwdi = p->p_cwdi;

        if (cwdi->cwdi_refcnt > 1) {
                cwdi = cwdinit();
                cwdfree(p->p_cwdi);
                p->p_cwdi = cwdi;
        }
}

/*
 * Release a cwdinfo structure.
 */
void
cwdfree(struct cwdinfo *cwdi)
{

        membar_release();
        if (atomic_dec_uint_nv(&cwdi->cwdi_refcnt) > 0)
                return;
        membar_acquire();

        vrele(cwdi->cwdi_cdir);
        if (cwdi->cwdi_rdir)
                vrele(cwdi->cwdi_rdir);
        if (cwdi->cwdi_edir)
                vrele(cwdi->cwdi_edir);
        pool_cache_put(cwdi_cache, cwdi);
}

void
cwdexec(struct proc *p)
{

        cwdunshare(p);

        if (p->p_cwdi->cwdi_edir) {
                vrele(p->p_cwdi->cwdi_edir);
        }
}





























































































































































































































































   38 



   38 
   35 


   36 






   38 
















































   24 

   24 








   24 




   14 

   14 









   14 





















   37 






   37 















   37 





























































































    3 





















    3 



    3 



















    3 








1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
/*        $NetBSD: if_loop.c,v 1.114 2022/07/31 13:14:54 mlelstv Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)if_loop.c        8.2 (Berkeley) 1/9/95
 */

/*
 * Loopback interface driver for protocol testing and timing.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_loop.c,v 1.114 2022/07/31 13:14:54 mlelstv Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_atalk.h"
#include "opt_mbuftrace.h"
#include "opt_mpls.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/errno.h>
#include <sys/ioctl.h>
#include <sys/time.h>
#include <sys/device.h>
#include <sys/module.h>

#include <sys/cpu.h>

#include <net/if.h>
#include <net/if_types.h>
#include <net/netisr.h>
#include <net/route.h>

#ifdef        INET
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/in_offload.h>
#include <netinet/ip.h>
#endif

#ifdef INET6
#ifndef INET
#include <netinet/in.h>
#endif
#include <netinet6/in6_var.h>
#include <netinet6/in6_offload.h>
#include <netinet/ip6.h>
#endif

#ifdef MPLS
#include <netmpls/mpls.h>
#include <netmpls/mpls_var.h>
#endif

#ifdef NETATALK
#include <netatalk/at.h>
#include <netatalk/at_var.h>
#endif

#include <net/bpf.h>

#if defined(LARGE_LOMTU)
#define LOMTU        (131072 +  MHLEN + MLEN)
#define LOMTU_MAX LOMTU
#else
#define        LOMTU        (32768 +  MHLEN + MLEN)
#define        LOMTU_MAX        (65536 +  MHLEN + MLEN)
#endif

#ifdef ALTQ
static void        lostart(struct ifnet *);
#endif

static int        loop_clone_create(struct if_clone *, int);
static int        loop_clone_destroy(struct ifnet *);

static void        loop_rtrequest(int, struct rtentry *, const struct rt_addrinfo *);

static struct if_clone loop_cloner =
    IF_CLONE_INITIALIZER("lo", loop_clone_create, loop_clone_destroy);

void
loopattach(int n)
{

#ifndef _MODULE
        loop_clone_create(&loop_cloner, 0);        /* lo0 always exists */
#endif
}

void
loopinit(void)
{

        if (lo0ifp != NULL)        /* can happen in rump kernel */
                return;

#ifdef _MODULE
        loop_clone_create(&loop_cloner, 0);        /* lo0 always exists */
#endif
        if_clone_attach(&loop_cloner);
}

static int
loopdetach(void)
{
        /* no detach for now; we don't allow lo0 to be deleted */
        return EBUSY;
}

static int
loop_clone_create(struct if_clone *ifc, int unit)
{
        struct ifnet *ifp;

        ifp = if_alloc(IFT_LOOP);

        if_initname(ifp, ifc->ifc_name, unit);

        ifp->if_mtu = LOMTU;
        ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
#ifdef NET_MPSAFE
        ifp->if_extflags = IFEF_MPSAFE;
#endif
        ifp->if_ioctl = loioctl;
        ifp->if_output = looutput;
#ifdef ALTQ
        ifp->if_start = lostart;
#endif
        ifp->if_type = IFT_LOOP;
        ifp->if_hdrlen = 0;
        ifp->if_addrlen = 0;
        ifp->if_dlt = DLT_NULL;
        IFQ_SET_READY(&ifp->if_snd);
        if (unit == 0)
                lo0ifp = ifp;
        if_initialize(ifp);
        ifp->if_link_state = LINK_STATE_UP;
        if_alloc_sadl(ifp);
        bpf_attach(ifp, DLT_NULL, sizeof(u_int));
#ifdef MBUFTRACE
        ifp->if_mowner = malloc(sizeof(struct mowner), M_DEVBUF,
            M_WAITOK | M_ZERO);
        strlcpy(ifp->if_mowner->mo_name, ifp->if_xname,
            sizeof(ifp->if_mowner->mo_name));
        MOWNER_ATTACH(ifp->if_mowner);
#endif

        ifp->if_flags |= IFF_RUNNING;
        if_register(ifp);

        return (0);
}

static int
loop_clone_destroy(struct ifnet *ifp)
{

        if (ifp == lo0ifp)
                return (EPERM);

        ifp->if_flags &= ~IFF_RUNNING;

#ifdef MBUFTRACE
        MOWNER_DETACH(ifp->if_mowner);
        free(ifp->if_mowner, M_DEVBUF);
#endif

        bpf_detach(ifp);
        if_detach(ifp);

        if_free(ifp);

        return (0);
}

int
looutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
    const struct rtentry *rt)
{
        pktqueue_t *pktq = NULL;
        struct ifqueue *ifq = NULL;
        int s, isr = -1;
        int csum_flags;
        int error = 0;
        size_t pktlen;

        MCLAIM(m, ifp->if_mowner);

        KERNEL_LOCK_UNLESS_NET_MPSAFE();

        if ((m->m_flags & M_PKTHDR) == 0)
                panic("looutput: no header mbuf");
        if (ifp->if_flags & IFF_LOOPBACK)
                bpf_mtap_af(ifp, dst->sa_family, m, BPF_D_OUT);
        m_set_rcvif(m, ifp);

        if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
                m_freem(m);
                error = (rt->rt_flags & RTF_BLACKHOLE ? 0 :
                        rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
                goto out;
        }

        pktlen = m->m_pkthdr.len;

        if_statadd2(ifp, if_opackets, 1, if_obytes, pktlen);

#ifdef ALTQ
        /*
         * ALTQ on the loopback interface is just for debugging.  It's
         * used only for loopback interfaces, not for a simplex interface.
         */
        if ((ALTQ_IS_ENABLED(&ifp->if_snd) || TBR_IS_ENABLED(&ifp->if_snd)) &&
            ifp->if_start == lostart) {
                /*
                 * If the queueing discipline needs packet classification,
                 * do it before prepending the link headers.
                 */
                IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);

                M_PREPEND(m, sizeof(uint32_t), M_DONTWAIT);
                if (m == NULL) {
                        if_statinc(ifp, if_oerrors);
                        error = ENOBUFS;
                        goto out;
                }
                *(mtod(m, uint32_t *)) = dst->sa_family;

                error = if_transmit_lock(ifp, m);
                goto out;
        }
#endif /* ALTQ */

        m_tag_delete_chain(m);

#ifdef MPLS
        if (rt != NULL && rt_gettag(rt) != NULL &&
            rt_gettag(rt)->sa_family == AF_MPLS &&
            (m->m_flags & (M_MCAST | M_BCAST)) == 0) {
                union mpls_shim msh;
                msh.s_addr = MPLS_GETSADDR(rt);
                if (msh.shim.label != MPLS_LABEL_IMPLNULL) {
                        ifq = &mplsintrq;
                        isr = NETISR_MPLS;
                }
        }
        if (isr != NETISR_MPLS)
#endif
        switch (dst->sa_family) {

#ifdef INET
        case AF_INET:
                csum_flags = m->m_pkthdr.csum_flags;
                KASSERT((csum_flags & ~(M_CSUM_IPv4|M_CSUM_UDPv4)) == 0);
                if (csum_flags != 0 && IN_LOOPBACK_NEED_CHECKSUM(csum_flags)) {
                        in_undefer_cksum(m, 0, csum_flags);
                        m->m_pkthdr.csum_flags = 0;
                } else {
                        /*
                         * Do nothing. Pass M_CSUM_IPv4 and M_CSUM_UDPv4 as
                         * they are to tell those are calculated and good.
                         */
                }
                pktq = ip_pktq;
                break;
#endif
#ifdef INET6
        case AF_INET6:
                csum_flags = m->m_pkthdr.csum_flags;
                KASSERT((csum_flags & ~M_CSUM_UDPv6) == 0);
                if (csum_flags != 0 &&
                    IN6_LOOPBACK_NEED_CHECKSUM(csum_flags)) {
                        in6_undefer_cksum(m, 0, csum_flags);
                        m->m_pkthdr.csum_flags = 0;
                } else {
                        /*
                         * Do nothing. Pass M_CSUM_UDPv6 as
                         * they are to tell those are calculated and good.
                         */
                }
                m->m_flags |= M_LOOP;
                pktq = ip6_pktq;
                break;
#endif
#ifdef NETATALK
        case AF_APPLETALK:
                ifq = &atintrq2;
                isr = NETISR_ATALK;
                break;
#endif
        default:
                printf("%s: can't handle af%d\n", ifp->if_xname,
                    dst->sa_family);
                m_freem(m);
                error = EAFNOSUPPORT;
                goto out;
        }

        s = splnet();
        if (__predict_true(pktq)) {
                error = 0;

                if (__predict_true(pktq_enqueue(pktq, m, 0))) {
                        if_statadd2(ifp, if_ipackets, 1, if_ibytes, pktlen);
                } else {
                        m_freem(m);
                        if_statinc(ifp, if_oerrors);
                        error = ENOBUFS;
                }
                splx(s);
                goto out;
        }
        if (IF_QFULL(ifq)) {
                IF_DROP(ifq);
                m_freem(m);
                splx(s);
                error = ENOBUFS;
                if_statinc(ifp, if_oerrors);
                goto out;
        }
        if_statadd2(ifp, if_ipackets, 1, if_ibytes, m->m_pkthdr.len);
        IF_ENQUEUE(ifq, m);
        schednetisr(isr);
        splx(s);
out:
        KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
        return error;
}

#ifdef ALTQ
static void
lostart(struct ifnet *ifp)
{
        for (;;) {
                pktqueue_t *pktq = NULL;
                struct ifqueue *ifq = NULL;
                struct mbuf *m;
                size_t pktlen;
                uint32_t af;
                int s, isr = 0;

                IFQ_DEQUEUE(&ifp->if_snd, m);
                if (m == NULL)
                        return;

                af = *(mtod(m, uint32_t *));
                m_adj(m, sizeof(uint32_t));

                switch (af) {
#ifdef INET
                case AF_INET:
                        pktq = ip_pktq;
                        break;
#endif
#ifdef INET6
                case AF_INET6:
                        m->m_flags |= M_LOOP;
                        pktq = ip6_pktq;
                        break;
#endif
#ifdef NETATALK
                case AF_APPLETALK:
                        ifq = &atintrq2;
                        isr = NETISR_ATALK;
                        break;
#endif
                default:
                        printf("%s: can't handle af%d\n", ifp->if_xname, af);
                        m_freem(m);
                        return;
                }
                pktlen = m->m_pkthdr.len;

                s = splnet();
                if (__predict_true(pktq)) {
                        if (__predict_false(pktq_enqueue(pktq, m, 0))) {
                                m_freem(m);
                                splx(s);
                                return;
                        }
                        if_statadd2(ifp, if_ipackets, 1, if_ibytes, pktlen);
                        splx(s);
                        continue;
                }
                if (IF_QFULL(ifq)) {
                        IF_DROP(ifq);
                        splx(s);
                        m_freem(m);
                        return;
                }
                IF_ENQUEUE(ifq, m);
                schednetisr(isr);
                if_statadd2(ifp, if_ipackets, 1, if_ibytes, pktlen);
                splx(s);
        }
}
#endif /* ALTQ */

/* ARGSUSED */
static void
loop_rtrequest(int cmd, struct rtentry *rt,
    const struct rt_addrinfo *info)
{

        if (rt)
                rt->rt_rmx.rmx_mtu = lo0ifp->if_mtu;
}

/*
 * Process an ioctl request.
 */
/* ARGSUSED */
int
loioctl(struct ifnet *ifp, u_long cmd, void *data)
{
        struct ifaddr *ifa;
        struct ifreq *ifr = data;
        int error = 0;

        switch (cmd) {

        case SIOCINITIFADDR:
                ifp->if_flags |= IFF_UP;
                ifa = (struct ifaddr *)data;
                if (ifa != NULL)
                        ifa->ifa_rtrequest = loop_rtrequest;
                /*
                 * Everything else is done at a higher level.
                 */
                break;

        case SIOCSIFMTU:
                if ((unsigned)ifr->ifr_mtu > LOMTU_MAX)
                        error = EINVAL;
                else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET){
                        error = 0;
                }
                break;

        case SIOCADDMULTI:
        case SIOCDELMULTI:
                if (ifr == NULL) {
                        error = EAFNOSUPPORT;                /* XXX */
                        break;
                }
                switch (ifreq_getaddr(cmd, ifr)->sa_family) {

#ifdef INET
                case AF_INET:
                        break;
#endif
#ifdef INET6
                case AF_INET6:
                        break;
#endif

                default:
                        error = EAFNOSUPPORT;
                        break;
                }
                break;

        default:
                error = ifioctl_common(ifp, cmd, data);
        }
        return (error);
}

/*
 * Module infrastructure
 */
#include "if_module.h"

IF_MODULE(MODULE_CLASS_DRIVER, loop, NULL)






























































   34 













    3 




    3 






    3 















    3 


    3 




















































   38 












   38 

   38 
   38 







    3 




















    3 


    3 

    3 





    3 




    3 







































   38 



































































   38 
































































































    3 















   33 





















































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
/*        $NetBSD: usbdi_util.c,v 1.87 2022/04/17 13:16:52 riastradh Exp $        */

/*
 * Copyright (c) 1998, 2012 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology and Matthew R. Green (mrg@eterna.com.au).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: usbdi_util.c,v 1.87 2022/04/17 13:16:52 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/proc.h>
#include <sys/device.h>
#include <sys/bus.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbhid.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usb_quirks.h>
#include <dev/usb/usbhist.h>

#define        DPRINTF(FMT,A,B,C,D)        USBHIST_LOGN(usbdebug,1,FMT,A,B,C,D)
#define        DPRINTFN(N,FMT,A,B,C,D)        USBHIST_LOGN(usbdebug,N,FMT,A,B,C,D)

usbd_status
usbd_get_desc(struct usbd_device *dev, int type, int index, int len, void *desc)
{
        usb_device_request_t req;
        usbd_status err;

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "type=%jd, index=%jd, len=%jd",
            type, index, len, 0);

        /*
         * Provide hard-coded configuration descriptors
         * for devices that may corrupt it. This cannot
         * be done for device descriptors which are used
         * to identify the device.
         */
        if (type != UDESC_DEVICE &&
            dev->ud_quirks->uq_flags & UQ_DESC_CORRUPT) {
                err = usbd_get_desc_fake(dev, type, index, len, desc);
                goto out;
        }

        req.bmRequestType = UT_READ_DEVICE;
        req.bRequest = UR_GET_DESCRIPTOR;
        USETW2(req.wValue, type, index);
        USETW(req.wIndex, 0);
        USETW(req.wLength, len);
        err = usbd_do_request(dev, &req, desc);

out:
        return err;
}

usbd_status
usbd_get_config_desc(struct usbd_device *dev, int confidx,
                     usb_config_descriptor_t *d)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "confidx=%jd", confidx, 0, 0, 0);
        usbd_status err;

        err = usbd_get_desc(dev, UDESC_CONFIG, confidx,
                            USB_CONFIG_DESCRIPTOR_SIZE, d);
        if (err)
                return err;
        if (d->bDescriptorType != UDESC_CONFIG) {
                DPRINTFN(1, "confidx=%jd, bad desc len=%jd type=%jd",
                    confidx, d->bLength, d->bDescriptorType, 0);
                return USBD_INVAL;
        }
        return USBD_NORMAL_COMPLETION;
}

usbd_status
usbd_get_config_desc_full(struct usbd_device *dev, int conf, void *d, int size)
{
        USBHIST_FUNC(); USBHIST_CALLARGS(usbdebug, "conf=%jd", conf, 0, 0, 0);

        return usbd_get_desc(dev, UDESC_CONFIG, conf, size, d);
}

usbd_status
usbd_get_bos_desc(struct usbd_device *dev, int confidx,
                     usb_bos_descriptor_t *d)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "confidx=%jd", confidx, 0, 0, 0);
        usbd_status err;

        err = usbd_get_desc(dev, UDESC_BOS, confidx,
                            USB_BOS_DESCRIPTOR_SIZE, d);
        if (err)
                return err;
        if (d->bDescriptorType != UDESC_BOS) {
                DPRINTFN(1, "confidx=%jd, bad desc len=%jd type=%jd",
                    confidx, d->bLength, d->bDescriptorType, 0);
                return USBD_INVAL;
        }
        return USBD_NORMAL_COMPLETION;
}

usbd_status
usbd_get_device_desc(struct usbd_device *dev, usb_device_descriptor_t *d)
{
        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);

        return usbd_get_desc(dev, UDESC_DEVICE,
                             0, USB_DEVICE_DESCRIPTOR_SIZE, d);
}

/*
 * Get the first 8 bytes of the device descriptor.
 * Do as Windows does: try to read 64 bytes -- there are devices which
 * recognize the initial descriptor fetch (before the control endpoint's
 * MaxPacketSize is known by the host) by exactly this length.
 */
usbd_status
usbd_get_initial_ddesc(struct usbd_device *dev, usb_device_descriptor_t *desc)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev %#jx", (uintptr_t)dev, 0, 0, 0);
        usb_device_request_t req;
        char buf[64];
        int res, actlen;

        req.bmRequestType = UT_READ_DEVICE;
        req.bRequest = UR_GET_DESCRIPTOR;
        USETW2(req.wValue, UDESC_DEVICE, 0);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 8);
        res = usbd_do_request_flags(dev, &req, buf, USBD_SHORT_XFER_OK,
                &actlen, USBD_DEFAULT_TIMEOUT);
        if (res)
                return res;
        if (actlen < 8)
                return USBD_SHORT_XFER;
        memcpy(desc, buf, 8);
        return USBD_NORMAL_COMPLETION;
}

usbd_status
usbd_get_string_desc(struct usbd_device *dev, int sindex, int langid,
    usb_string_descriptor_t *sdesc, int *sizep)
{
        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);
        usb_device_request_t req;
        usbd_status err;
        int actlen;

        /*
         * Pass a full-sized buffer to usbd_do_request_len().  At least
         * one device has been seen returning additional data beyond the
         * provided buffers (2-bytes written shortly after the request
         * claims to have completed and returned the 2 byte header,
         * corrupting other memory.)
         */
        req.bmRequestType = UT_READ_DEVICE;
        req.bRequest = UR_GET_DESCRIPTOR;
        USETW2(req.wValue, UDESC_STRING, sindex);
        USETW(req.wIndex, langid);
        USETW(req.wLength, 2);        /* only size byte first */
        err = usbd_do_request_len(dev, &req, sizeof(*sdesc), sdesc,
            USBD_SHORT_XFER_OK, &actlen, USBD_DEFAULT_TIMEOUT);
        if (err)
                return err;

        if (actlen < 2)
                return USBD_SHORT_XFER;

        if (sdesc->bLength > sizeof(*sdesc))
                return USBD_INVAL;
        USETW(req.wLength, sdesc->bLength);        /* the whole string */
        err = usbd_do_request_len(dev, &req, sizeof(*sdesc), sdesc,
            USBD_SHORT_XFER_OK, &actlen, USBD_DEFAULT_TIMEOUT);
        if (err)
                return err;

        if (actlen != sdesc->bLength) {
                DPRINTF("expected %jd, got %jd", sdesc->bLength, actlen, 0, 0);
        }

        *sizep = actlen;
        return USBD_NORMAL_COMPLETION;
}

/* -------------------------------------------------------------------------- */

usbd_status
usbd_get_device_status(struct usbd_device *dev, usb_status_t *st)
{
        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);
        usb_device_request_t req;

        req.bmRequestType = UT_READ_DEVICE;
        req.bRequest = UR_GET_STATUS;
        USETW(req.wValue, 0);
        USETW(req.wIndex, 0);
        USETW(req.wLength, sizeof(usb_status_t));
        return usbd_do_request(dev, &req, st);
}

usbd_status
usbd_get_hub_status(struct usbd_device *dev, usb_hub_status_t *st)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev %#jx", (uintptr_t)dev, 0, 0, 0);
        usb_device_request_t req;

        req.bmRequestType = UT_READ_CLASS_DEVICE;
        req.bRequest = UR_GET_STATUS;
        USETW(req.wValue, 0);
        USETW(req.wIndex, 0);
        USETW(req.wLength, sizeof(usb_hub_status_t));
        return usbd_do_request(dev, &req, st);
}

usbd_status
usbd_get_port_status(struct usbd_device *dev, int port, usb_port_status_t *ps)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev %#jx port %jd",
            (uintptr_t)dev, port, 0, 0);
        usb_device_request_t req;

        req.bmRequestType = UT_READ_CLASS_OTHER;
        req.bRequest = UR_GET_STATUS;
        USETW(req.wValue, 0);
        USETW(req.wIndex, port);
        USETW(req.wLength, sizeof(*ps));
        return usbd_do_request(dev, &req, ps);
}

/* USB 3.1 10.16.2.6, 10.16.2.6.3 */
usbd_status
usbd_get_port_status_ext(struct usbd_device *dev, int port,
    usb_port_status_ext_t *pse)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev %#jx port %jd",
            (uintptr_t)dev, port, 0, 0);
        usb_device_request_t req;

        req.bmRequestType = UT_READ_CLASS_OTHER;
        req.bRequest = UR_GET_STATUS;
        USETW2(req.wValue, 0, UR_PST_EXT_PORT_STATUS);
        USETW(req.wIndex, port);
        USETW(req.wLength, sizeof(*pse));
        return usbd_do_request(dev, &req, pse);
}

/* -------------------------------------------------------------------------- */

usbd_status
usbd_clear_hub_feature(struct usbd_device *dev, int sel)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev %#jx sel %jd",
            (uintptr_t)dev, sel, 0, 0);
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_CLASS_DEVICE;
        req.bRequest = UR_CLEAR_FEATURE;
        USETW(req.wValue, sel);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);
        return usbd_do_request(dev, &req, 0);
}

usbd_status
usbd_set_hub_feature(struct usbd_device *dev, int sel)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug,
            "dev %#jx sel %jd", (uintptr_t)dev, sel, 0, 0);
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_CLASS_DEVICE;
        req.bRequest = UR_SET_FEATURE;
        USETW(req.wValue, sel);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);
        return usbd_do_request(dev, &req, 0);
}

usbd_status
usbd_clear_port_feature(struct usbd_device *dev, int port, int sel)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev %#jx port %jd sel %jd",
            (uintptr_t)dev, port, sel, 0);
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_CLASS_OTHER;
        req.bRequest = UR_CLEAR_FEATURE;
        USETW(req.wValue, sel);
        USETW(req.wIndex, port);
        USETW(req.wLength, 0);
        return usbd_do_request(dev, &req, 0);
}

usbd_status
usbd_set_port_feature(struct usbd_device *dev, int port, int sel)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev %#jx port %jd sel %.d",
            (uintptr_t)dev, sel, 0, 0);
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_CLASS_OTHER;
        req.bRequest = UR_SET_FEATURE;
        USETW(req.wValue, sel);
        USETW(req.wIndex, port);
        USETW(req.wLength, 0);
        return usbd_do_request(dev, &req, 0);
}

usbd_status
usbd_set_port_u1_timeout(struct usbd_device *dev, int port, int timeout)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev %#jx port %jd timeout %.d",
            (uintptr_t)dev, port, timeout, 0);
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_CLASS_OTHER;
        req.bRequest = UR_SET_FEATURE;
        USETW(req.wValue, UHF_PORT_U1_TIMEOUT);
        USETW2(req.wIndex, timeout, port);
        USETW(req.wLength, 0);
        return usbd_do_request(dev, &req, 0);
}

usbd_status
usbd_set_port_u2_timeout(struct usbd_device *dev, int port, int timeout)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev %#jx port %jd timeout %jd",
            (uintptr_t)dev, port, timeout, 0);
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_CLASS_OTHER;
        req.bRequest = UR_SET_FEATURE;
        USETW(req.wValue, UHF_PORT_U2_TIMEOUT);
        USETW2(req.wIndex, timeout, port);
        USETW(req.wLength, 0);
        return usbd_do_request(dev, &req, 0);
}

usbd_status
usbd_clear_endpoint_feature(struct usbd_device *dev, int epaddr, int sel)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev %#jx epaddr %jd sel %jd",
            (uintptr_t)dev, epaddr, sel, 0);
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_ENDPOINT;
        req.bRequest = UR_CLEAR_FEATURE;
        USETW(req.wValue, sel);
        USETW(req.wIndex, epaddr);
        USETW(req.wLength, 0);
        return usbd_do_request(dev, &req, 0);
}

/* -------------------------------------------------------------------------- */

usbd_status
usbd_get_config(struct usbd_device *dev, uint8_t *conf)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev %#jx", (uintptr_t)dev, 0, 0, 0);
        usb_device_request_t req;

        req.bmRequestType = UT_READ_DEVICE;
        req.bRequest = UR_GET_CONFIG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 1);
        return usbd_do_request(dev, &req, conf);
}

usbd_status
usbd_set_config(struct usbd_device *dev, int conf)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev %#jx conf %jd",
            (uintptr_t)dev, conf, 0, 0);
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_DEVICE;
        req.bRequest = UR_SET_CONFIG;
        USETW(req.wValue, conf);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);
        return usbd_do_request(dev, &req, 0);
}

usbd_status
usbd_set_address(struct usbd_device *dev, int addr)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev %#jx addr %jd",
            (uintptr_t)dev, addr, 0, 0);
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_DEVICE;
        req.bRequest = UR_SET_ADDRESS;
        USETW(req.wValue, addr);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);
        return usbd_do_request(dev, &req, 0);
}

usbd_status
usbd_set_idle(struct usbd_interface *iface, int duration, int id)
{
        usb_interface_descriptor_t *ifd = usbd_get_interface_descriptor(iface);
        struct usbd_device *dev;
        usb_device_request_t req;

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "duration %jd id %jd", duration, id, 0, 0);

        if (ifd == NULL)
                return USBD_IOERROR;
        usbd_interface2device_handle(iface, &dev);
        req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
        req.bRequest = UR_SET_IDLE;
        USETW2(req.wValue, duration, id);
        USETW(req.wIndex, ifd->bInterfaceNumber);
        USETW(req.wLength, 0);
        return usbd_do_request(dev, &req, 0);
}

/* -------------------------------------------------------------------------- */

usbd_status
usbd_get_protocol(struct usbd_interface *iface, uint8_t *report)
{
        usb_interface_descriptor_t *id = usbd_get_interface_descriptor(iface);
        struct usbd_device *dev;
        usb_device_request_t req;

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "iface=%#jx, endpt=%jd",
            (uintptr_t)iface, id->bInterfaceNumber, 0, 0);

        if (id == NULL)
                return USBD_IOERROR;

        usbd_interface2device_handle(iface, &dev);
        req.bmRequestType = UT_READ_CLASS_INTERFACE;
        req.bRequest = UR_GET_PROTOCOL;
        USETW(req.wValue, 0);
        USETW(req.wIndex, id->bInterfaceNumber);
        USETW(req.wLength, 1);
        return usbd_do_request(dev, &req, report);
}

usbd_status
usbd_set_protocol(struct usbd_interface *iface, int report)
{
        usb_interface_descriptor_t *id = usbd_get_interface_descriptor(iface);
        struct usbd_device *dev;
        usb_device_request_t req;

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "iface=%#jx, report=%jd, endpt=%jd",
            (uintptr_t)iface, report, id->bInterfaceNumber, 0);

        if (id == NULL)
                return USBD_IOERROR;

        usbd_interface2device_handle(iface, &dev);
        req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
        req.bRequest = UR_SET_PROTOCOL;
        USETW(req.wValue, report);
        USETW(req.wIndex, id->bInterfaceNumber);
        USETW(req.wLength, 0);
        return usbd_do_request(dev, &req, 0);
}

/* -------------------------------------------------------------------------- */

usbd_status
usbd_set_report(struct usbd_interface *iface, int type, int id, void *data,
                int len)
{
        usb_interface_descriptor_t *ifd = usbd_get_interface_descriptor(iface);
        struct usbd_device *dev;
        usb_device_request_t req;

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "len=%jd", len, 0, 0, 0);

        if (ifd == NULL)
                return USBD_IOERROR;
        usbd_interface2device_handle(iface, &dev);
        req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
        req.bRequest = UR_SET_REPORT;
        USETW2(req.wValue, type, id);
        USETW(req.wIndex, ifd->bInterfaceNumber);
        USETW(req.wLength, len);
        return usbd_do_request(dev, &req, data);
}

usbd_status
usbd_get_report(struct usbd_interface *iface, int type, int id, void *data,
                int len)
{
        usb_interface_descriptor_t *ifd = usbd_get_interface_descriptor(iface);
        struct usbd_device *dev;
        usb_device_request_t req;

        USBHIST_FUNC(); USBHIST_CALLARGS(usbdebug, "len=%jd", len, 0, 0, 0);

        if (ifd == NULL)
                return USBD_IOERROR;
        usbd_interface2device_handle(iface, &dev);
        req.bmRequestType = UT_READ_CLASS_INTERFACE;
        req.bRequest = UR_GET_REPORT;
        USETW2(req.wValue, type, id);
        USETW(req.wIndex, ifd->bInterfaceNumber);
        USETW(req.wLength, len);
        return usbd_do_request(dev, &req, data);
}

usbd_status
usbd_get_report_descriptor(struct usbd_device *dev, int ifcno,
                           int size, void *d)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev %#jx ifcno %jd size %jd",
            (uintptr_t)dev, ifcno, size, 0);
        usb_device_request_t req;

        req.bmRequestType = UT_READ_INTERFACE;
        req.bRequest = UR_GET_DESCRIPTOR;
        USETW2(req.wValue, UDESC_REPORT, 0); /* report id should be 0 */
        USETW(req.wIndex, ifcno);
        USETW(req.wLength, size);
        return usbd_do_request(dev, &req, d);
}

/* -------------------------------------------------------------------------- */

usb_hid_descriptor_t *
usbd_get_hid_descriptor(struct usbd_interface *ifc)
{
        usb_interface_descriptor_t *idesc = usbd_get_interface_descriptor(ifc);
        struct usbd_device *dev;
        usb_config_descriptor_t *cdesc;
        usb_hid_descriptor_t *hd;
        char *p, *end;

        if (idesc == NULL)
                return NULL;
        usbd_interface2device_handle(ifc, &dev);
        cdesc = usbd_get_config_descriptor(dev);

        p = (char *)idesc + idesc->bLength;
        end = (char *)cdesc + UGETW(cdesc->wTotalLength);

        for (; end - p >= sizeof(*hd); p += hd->bLength) {
                hd = (usb_hid_descriptor_t *)p;
                if (hd->bLength < sizeof(*hd) || hd->bLength > end - p)
                        break;
                if (hd->bLength >= USB_HID_DESCRIPTOR_SIZE(0) &&
                    hd->bDescriptorType == UDESC_HID)
                        return hd;
                if (hd->bDescriptorType == UDESC_INTERFACE)
                        break;
        }
        return NULL;
}

usbd_status
usbd_read_report_desc(struct usbd_interface *ifc, void **descp, int *sizep)
{
        usb_interface_descriptor_t *id;
        usb_hid_descriptor_t *hid;
        struct usbd_device *dev;
        usbd_status err;

        usbd_interface2device_handle(ifc, &dev);
        id = usbd_get_interface_descriptor(ifc);
        if (id == NULL)
                return USBD_INVAL;
        hid = usbd_get_hid_descriptor(ifc);
        if (hid == NULL)
                return USBD_IOERROR;
        *sizep = UGETW(hid->descrs[0].wDescriptorLength);
        if (*sizep == 0)
                return USBD_INVAL;
        *descp = kmem_alloc(*sizep, KM_SLEEP);
        err = usbd_get_report_descriptor(dev, id->bInterfaceNumber,
                                         *sizep, *descp);
        if (err) {
                kmem_free(*descp, *sizep);
                *descp = NULL;
                return err;
        }
        return USBD_NORMAL_COMPLETION;
}

usbd_status
usbd_bulk_transfer(struct usbd_xfer *xfer, struct usbd_pipe *pipe,
    uint16_t flags, uint32_t timeout, void *buf, uint32_t *size)
{
        usbd_status err;

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "start transfer %jd bytes", *size, 0, 0, 0);

        usbd_setup_xfer(xfer, 0, buf, *size, flags, timeout, NULL);
        err = usbd_sync_transfer_sig(xfer);

        usbd_get_xfer_status(xfer, NULL, NULL, size, NULL);
        DPRINTFN(1, "transferred %jd", *size, 0, 0, 0);
        if (err) {
                usbd_clear_endpoint_stall(pipe);
        }
        USBHIST_LOG(usbdebug, "<- done xfer %#jx err %jd", (uintptr_t)xfer,
            err, 0, 0);

        return err;
}

usbd_status
usbd_intr_transfer(struct usbd_xfer *xfer, struct usbd_pipe *pipe,
    uint16_t flags, uint32_t timeout, void *buf, uint32_t *size)
{
        usbd_status err;

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "start transfer %jd bytes", *size, 0, 0, 0);

        usbd_setup_xfer(xfer, 0, buf, *size, flags, timeout, NULL);

        err = usbd_sync_transfer_sig(xfer);

        usbd_get_xfer_status(xfer, NULL, NULL, size, NULL);

        DPRINTFN(1, "transferred %jd", *size, 0, 0, 0);
        if (err) {
                usbd_clear_endpoint_stall(pipe);
        }
        USBHIST_LOG(usbdebug, "<- done xfer %#jx err %jd", (uintptr_t)xfer,
            err, 0, 0);

        return err;
}

void
usb_detach_waitold(device_t dv)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "waiting for dv %#jx",
            (uintptr_t)dv, 0, 0, 0);

        if (tsleep(dv, PZERO, "usbdet", hz * 60)) /* XXXSMP ok */
                aprint_error_dev(dv, "usb_detach_waitold: didn't detach\n");
        DPRINTFN(1, "done", 0, 0, 0, 0);
}

void
usb_detach_wakeupold(device_t dv)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "for dv %#jx", (uintptr_t)dv, 0, 0, 0);

        wakeup(dv); /* XXXSMP ok */
}

/* -------------------------------------------------------------------------- */

void
usb_desc_iter_init(struct usbd_device *dev, usbd_desc_iter_t *iter)
{
        const usb_config_descriptor_t *cd = usbd_get_config_descriptor(dev);

        iter->cur = (const uByte *)cd;
        iter->end = (const uByte *)cd + UGETW(cd->wTotalLength);
}

const usb_descriptor_t *
usb_desc_iter_peek(usbd_desc_iter_t *iter)
{
        const usb_descriptor_t *desc;

        if (iter->end - iter->cur < sizeof(usb_descriptor_t)) {
                if (iter->cur != iter->end)
                        printf("%s: bad descriptor\n", __func__);
                return NULL;
        }
        desc = (const usb_descriptor_t *)iter->cur;
        if (desc->bLength < USB_DESCRIPTOR_SIZE) {
                printf("%s: descriptor length too small\n", __func__);
                return NULL;
        }
        if (desc->bLength > iter->end - iter->cur) {
                printf("%s: descriptor length too large\n", __func__);
                return NULL;
        }
        return desc;
}

const usb_descriptor_t *
usb_desc_iter_next(usbd_desc_iter_t *iter)
{
        const usb_descriptor_t *desc = usb_desc_iter_peek(iter);

        if (desc == NULL)
                return NULL;
        KASSERT(desc->bLength <= iter->end - iter->cur);
        iter->cur += desc->bLength;
        return desc;
}

/*
 * Return the next interface descriptor, skipping over any other
 * descriptors.  Returns NULL at the end or on error.
 */
const usb_interface_descriptor_t *
usb_desc_iter_next_interface(usbd_desc_iter_t *iter)
{
        const usb_descriptor_t *desc;

        while ((desc = usb_desc_iter_peek(iter)) != NULL &&
            desc->bDescriptorType != UDESC_INTERFACE) {
                usb_desc_iter_next(iter);
        }

        if ((desc = usb_desc_iter_next(iter)) == NULL ||
            desc->bLength < sizeof(usb_interface_descriptor_t))
                return NULL;
        KASSERT(desc->bDescriptorType == UDESC_INTERFACE);
        return (const usb_interface_descriptor_t *)desc;
}

/*
 * Returns the next non-interface descriptor, returning NULL when the
 * next descriptor would be an interface descriptor.
 */
const usb_descriptor_t *
usb_desc_iter_next_non_interface(usbd_desc_iter_t *iter)
{
        const usb_descriptor_t *desc;

        if ((desc = usb_desc_iter_peek(iter)) != NULL &&
            desc->bDescriptorType != UDESC_INTERFACE) {
                return usb_desc_iter_next(iter);
        } else {
                return NULL;
        }
}

const usb_cdc_descriptor_t *
usb_find_desc(struct usbd_device *dev, int type, int subtype)
{
        usbd_desc_iter_t iter;
        const usb_cdc_descriptor_t *desc;

        usb_desc_iter_init(dev, &iter);
        for (;;) {
                desc = (const usb_cdc_descriptor_t *)usb_desc_iter_next(&iter);
                if (desc == NULL)
                        break;
                if (desc->bDescriptorType != type)
                        continue;
                if (subtype == USBD_CDCSUBTYPE_ANY ||
                    subtype == desc->bDescriptorSubtype)
                        break;
        }
        return desc;
}

/*
 * Same as usb_find_desc(), but searches only in the specified
 * interface.
 */
const usb_cdc_descriptor_t *
usb_find_desc_if(struct usbd_device *dev, int type, int subtype,
    usb_interface_descriptor_t *id)
{
        usbd_desc_iter_t iter;
        const usb_cdc_descriptor_t *desc;

        if (id == NULL)
                return usb_find_desc(dev, type, subtype);

        usb_desc_iter_init(dev, &iter);

        iter.cur = (void *)id;                /* start from the interface desc */
        usb_desc_iter_next(&iter);        /* and skip it */

        while ((desc = (const usb_cdc_descriptor_t *)usb_desc_iter_next(&iter))
            != NULL) {
                if (desc->bDescriptorType == UDESC_INTERFACE) {
                        /* we ran into the next interface --- not found */
                        return NULL;
                }
                if (desc->bDescriptorType == type &&
                    (subtype == USBD_CDCSUBTYPE_ANY ||
                     subtype == desc->bDescriptorSubtype))
                        break;
        }
        return desc;
}













































































































































    2 


































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   38 
















   38 













































































































































   38 



   38 







































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
/*        $NetBSD: ffs_snapshot.c,v 1.154 2022/04/16 07:59:46 hannken Exp $        */

/*
 * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
 *
 * Further information about snapshots can be obtained from:
 *
 *        Marshall Kirk McKusick                http://www.mckusick.com/softdep/
 *        1614 Oxford Street                mckusick@mckusick.com
 *        Berkeley, CA 94709-1608                +1-510-843-9542
 *        USA
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ffs_snapshot.c        8.11 (McKusick) 7/23/00
 *
 *        from FreeBSD: ffs_snapshot.c,v 1.79 2004/02/13 02:02:06 kuriyama Exp
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_snapshot.c,v 1.154 2022/04/16 07:59:46 hannken Exp $");

#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
#include "opt_quota.h"
#endif

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/buf.h>
#include <sys/proc.h>
#include <sys/namei.h>
#include <sys/sched.h>
#include <sys/stat.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <sys/vnode.h>
#include <sys/kauth.h>
#include <sys/fstrans.h>
#include <sys/wapbl.h>

#include <miscfs/specfs/specdev.h>

#include <ufs/ufs/quota.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_wapbl.h>

#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>

#include <uvm/uvm.h>

TAILQ_HEAD(inodelst, inode);                        /* List of active snapshots */

struct snap_info {
        kmutex_t si_lock;                        /* Lock this snapinfo */
        kmutex_t si_snaplock;                        /* Snapshot vnode common lock */
        lwp_t *si_owner;                        /* Snaplock owner */
        struct inodelst si_snapshots;                /* List of active snapshots */
        daddr_t *si_snapblklist;                /* Snapshot block hints list */
        uint32_t si_gen;                        /* Incremented on change */
};

#if !defined(FFS_NO_SNAPSHOT)
typedef int (*acctfunc_t)
    (struct vnode *, void *, int, int, struct fs *, daddr_t, int);

static int snapshot_setup(struct mount *, struct vnode *);
static int snapshot_copyfs(struct mount *, struct vnode *, void **);
static int snapshot_expunge(struct mount *, struct vnode *,
    struct fs *, daddr_t *, daddr_t **);
static int snapshot_expunge_snap(struct mount *, struct vnode *,
    struct fs *, daddr_t);
static int snapshot_writefs(struct mount *, struct vnode *, void *);
static int cgaccount(struct vnode *, int, int *);
static int cgaccount1(int, struct vnode *, void *, int);
static int expunge(struct vnode *, struct inode *, struct fs *,
    acctfunc_t, int);
static int indiracct(struct vnode *, struct vnode *, int, daddr_t,
    daddr_t, daddr_t, daddr_t, daddr_t, struct fs *, acctfunc_t, int);
static int fullacct(struct vnode *, void *, int, int, struct fs *,
    daddr_t, int);
static int snapacct(struct vnode *, void *, int, int, struct fs *,
    daddr_t, int);
static int mapacct(struct vnode *, void *, int, int, struct fs *,
    daddr_t, int);
#endif /* !defined(FFS_NO_SNAPSHOT) */

static int ffs_copyonwrite(void *, struct buf *, bool);
static int snapblkaddr(struct vnode *, daddr_t, daddr_t *);
static int rwfsblk(struct vnode *, int, void *, daddr_t);
static int syncsnap(struct vnode *);
static int wrsnapblk(struct vnode *, void *, daddr_t);
#if !defined(FFS_NO_SNAPSHOT)
static int blocks_in_journal(struct fs *);
#endif

static inline bool is_active_snapshot(struct snap_info *, struct inode *);
static inline daddr_t db_get(struct inode *, int);
static inline void db_assign(struct inode *, int, daddr_t);
static inline daddr_t ib_get(struct inode *, int);
static inline daddr_t idb_get(struct inode *, void *, int);
static inline void idb_assign(struct inode *, void *, int, daddr_t);

#ifdef DEBUG
static int snapdebug = 0;
#endif

int
ffs_snapshot_init(struct ufsmount *ump)
{
        struct snap_info *si;

        si = ump->um_snapinfo = kmem_alloc(sizeof(*si), KM_SLEEP);
        TAILQ_INIT(&si->si_snapshots);
        mutex_init(&si->si_lock, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&si->si_snaplock, MUTEX_DEFAULT, IPL_NONE);
        si->si_owner = NULL;
        si->si_gen = 0;
        si->si_snapblklist = NULL;

        return 0;
}

void
ffs_snapshot_fini(struct ufsmount *ump)
{
        struct snap_info *si;

        si = ump->um_snapinfo;
        ump->um_snapinfo = NULL;

        KASSERT(TAILQ_EMPTY(&si->si_snapshots));
        mutex_destroy(&si->si_lock);
        mutex_destroy(&si->si_snaplock);
        KASSERT(si->si_snapblklist == NULL);
        kmem_free(si, sizeof(*si));
}

/*
 * Create a snapshot file and initialize it for the filesystem.
 * Vnode is locked on entry and return.
 */
int
ffs_snapshot(struct mount *mp, struct vnode *vp, struct timespec *ctime)
{
#if defined(FFS_NO_SNAPSHOT)
        return EOPNOTSUPP;
}
#else /* defined(FFS_NO_SNAPSHOT) */
        bool suspended = false;
        int error, redo = 0, snaploc;
        void *sbbuf = NULL;
        daddr_t *snaplist = NULL, snaplistsize = 0;
        struct buf *bp, *nbp;
        struct fs *copy_fs = NULL;
        struct fs *fs = VFSTOUFS(mp)->um_fs;
        struct inode *ip = VTOI(vp);
        struct lwp *l = curlwp;
        struct snap_info *si = VFSTOUFS(mp)->um_snapinfo;
        struct timespec ts;
        struct timeval starttime;
#ifdef DEBUG
        struct timeval endtime;
#endif
        struct vnode *devvp = ip->i_devvp;

        /*
         * If the vnode already is a snapshot, return.
         */
        if ((ip->i_flags & SF_SNAPSHOT)) {
                if ((ip->i_flags & SF_SNAPINVAL))
                        return EINVAL;
                if (ctime) {
                        ctime->tv_sec = DIP(ip, mtime);
                        ctime->tv_nsec = DIP(ip, mtimensec);
                }
                return 0;
        }
        /*
         * Check for free snapshot slot in the superblock.
         */
        for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
                if (fs->fs_snapinum[snaploc] == 0)
                        break;
        if (snaploc == FSMAXSNAP)
                return (ENOSPC);
        /*
         * Prepare the vnode to become a snapshot.
         */
        error = snapshot_setup(mp, vp);
        if (error)
                goto out;

        /*
         * Copy all the cylinder group maps. Although the
         * filesystem is still active, we hope that only a few
         * cylinder groups will change between now and when we
         * suspend operations. Thus, we will be able to quickly
         * touch up the few cylinder groups that changed during
         * the suspension period.
         */
        error = cgaccount(vp, 1, NULL);
        if (error)
                goto out;

        /*
         * snapshot is now valid
         */
        ip->i_flags &= ~SF_SNAPINVAL;
        DIP_ASSIGN(ip, flags, ip->i_flags);
        ip->i_flag |= IN_CHANGE | IN_UPDATE;

        /*
         * Ensure that the snapshot is completely on disk.
         * Since we have marked it as a snapshot it is safe to
         * unlock it as no process will be allowed to write to it.
         */
        error = VOP_FSYNC(vp, l->l_cred, FSYNC_WAIT, 0, 0);
        if (error)
                goto out;
        VOP_UNLOCK(vp);
        /*
         * All allocations are done, so we can now suspend the filesystem.
         */
        error = vfs_suspend(vp->v_mount, 0);
        if (error == 0) {
                suspended = true;
                vrele_flush(vp->v_mount);
                error = VFS_SYNC(vp->v_mount, MNT_WAIT, curlwp->l_cred);
        }
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        if (error)
                goto out;
        getmicrotime(&starttime);
        /*
         * First, copy all the cylinder group maps that have changed.
         */
        error = cgaccount(vp, 2, &redo);
        if (error)
                goto out;
        /*
         * Create a copy of the superblock and its summary information.
         */
        error = snapshot_copyfs(mp, vp, &sbbuf);
        if (error)
                goto out;
        copy_fs = (struct fs *)((char *)sbbuf + ffs_blkoff(fs, fs->fs_sblockloc));
        /*
         * Expunge unlinked files from our view.
         */
        error = snapshot_expunge(mp, vp, copy_fs, &snaplistsize, &snaplist);
        if (error)
                goto out;
        /*
         * Record snapshot inode. Since this is the newest snapshot,
         * it must be placed at the end of the list.
         */
        if (ip->i_nlink > 0)
                fs->fs_snapinum[snaploc] = ip->i_number;

        mutex_enter(&si->si_lock);
        if (is_active_snapshot(si, ip))
                panic("ffs_snapshot: %"PRIu64" already on list", ip->i_number);
        TAILQ_INSERT_TAIL(&si->si_snapshots, ip, i_nextsnap);
        if (TAILQ_FIRST(&si->si_snapshots) == ip) {
                /*
                 * If this is the first snapshot on this filesystem, put the
                 * preliminary list in place and establish the cow handler.
                 */
                si->si_snapblklist = snaplist;
                fscow_establish(mp, ffs_copyonwrite, devvp);
        }
        si->si_gen++;
        mutex_exit(&si->si_lock);

        vp->v_vflag |= VV_SYSTEM;
        /*
         * Set the mtime to the time the snapshot has been taken.
         */
        TIMEVAL_TO_TIMESPEC(&starttime, &ts);
        if (ctime)
                *ctime = ts;
        DIP_ASSIGN(ip, mtime, ts.tv_sec);
        DIP_ASSIGN(ip, mtimensec, ts.tv_nsec);
        ip->i_flag |= IN_CHANGE | IN_UPDATE;
        /*
         * Copy allocation information from all snapshots and then
         * expunge them from our view.
         */
        error = snapshot_expunge_snap(mp, vp, copy_fs, snaplistsize);
        if (error)
                goto out;
        /*
         * Write the superblock and its summary information to the snapshot.
         */
        error = snapshot_writefs(mp, vp, sbbuf);
        if (error)
                goto out;
        /*
         * We're nearly done, ensure that the snapshot is completely on disk.
         */
        error = VOP_FSYNC(vp, l->l_cred, FSYNC_WAIT, 0, 0);
        if (error)
                goto out;
        /*
         * Invalidate and free all pages on the snapshot vnode.
         * We will read and write through the buffercache.
         */
        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
        error = VOP_PUTPAGES(vp, 0, 0,
                    PGO_ALLPAGES | PGO_CLEANIT | PGO_SYNCIO | PGO_FREE);
        if (error)
                goto out;
        /*
         * Invalidate short ( < fs_bsize ) buffers.  We will always read
         * full size buffers later.
         */
        mutex_enter(&bufcache_lock);
        KASSERT(LIST_FIRST(&vp->v_dirtyblkhd) == NULL);
        for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
                nbp = LIST_NEXT(bp, b_vnbufs);
                if (bp->b_bcount == fs->fs_bsize)
                        continue;
                error = bbusy(bp, false, 0, NULL);
                if (error != 0) {
                        if (error == EPASSTHROUGH) {
                                nbp = LIST_FIRST(&vp->v_cleanblkhd);
                                continue;
                        }
                        break;
                }
                brelsel(bp, BC_INVAL | BC_VFLUSH);
        }
        mutex_exit(&bufcache_lock);

out:
        if (sbbuf != NULL) {
                free(copy_fs->fs_csp, M_UFSMNT);
                free(sbbuf, M_UFSMNT);
        }
        if (fs->fs_active != NULL) {
                free(fs->fs_active, M_DEVBUF);
                fs->fs_active = NULL;
        }

        mutex_enter(&si->si_lock);
        if (snaplist != NULL) {
                if (si->si_snapblklist == snaplist)
                        si->si_snapblklist = NULL;
                free(snaplist, M_UFSMNT);
        }
        if (error) {
                fs->fs_snapinum[snaploc] = 0;
        } else {
                /*
                 * As this is the newest list, it is the most inclusive, so
                 * should replace the previous list.
                 */
                si->si_snapblklist = ip->i_snapblklist;
        }
        si->si_gen++;
        mutex_exit(&si->si_lock);

        if (suspended) {
                VOP_UNLOCK(vp);
                vfs_resume(vp->v_mount);
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
#ifdef DEBUG
                getmicrotime(&endtime);
                timersub(&endtime, &starttime, &endtime);
                printf("%s: suspended %lld.%03d sec, redo %d of %d\n",
                    mp->mnt_stat.f_mntonname, (long long)endtime.tv_sec,
                    endtime.tv_usec / 1000, redo, fs->fs_ncg);
#endif
        }
        if (error) {
                if (UFS_WAPBL_BEGIN(mp) == 0) {
                        /*
                         * We depend on ffs_truncate() to call ffs_snapremove()
                         * before it may return an error. On failed
                         * ffs_truncate() we have normal file with leaked
                         * (meta-) data, but no snapshot to use.
                         */
                        (void) ffs_truncate(vp, (off_t)0, 0, NOCRED);
                        UFS_WAPBL_END(mp);
                }
        } else if (ip->i_nlink > 0)
                vref(vp);
        return (error);
}

/*
 * Prepare vnode to become a snapshot.
 */
static int
snapshot_setup(struct mount *mp, struct vnode *vp)
{
        int error, n, len, loc, cg;
        daddr_t blkno, numblks;
        struct buf *ibp, *nbp;
        struct fs *fs = VFSTOUFS(mp)->um_fs;
        struct lwp *l = curlwp;
        const int wbreak = blocks_in_journal(fs)/8;
        struct inode *ip = VTOI(vp);

        /*
         * Check mount, readonly reference and owner.
         */
        if (vp->v_mount != mp)
                return EXDEV;
        if (vp->v_writecount != 0)
                return EBUSY;
        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_SNAPSHOT,
            0, mp, vp, NULL);
        if (error)
                return EACCES;

        /*
         * Must completely truncate the file here. Allocated
         * blocks on a snapshot mean that block has been copied
         * on write, see ffs_copyonwrite() testing "blkno != 0"
         */
        error = ufs_truncate_all(vp);
        if (error)
                return error;

        /* Change inode to snapshot type file. */
        error = UFS_WAPBL_BEGIN(mp);
        if (error)
                return error;
#if defined(QUOTA) || defined(QUOTA2)
        /* snapshot inodes are not accounted in quotas */
        chkiq(ip, -1, l->l_cred, 0);
#endif
        ip->i_flags |= (SF_SNAPSHOT | SF_SNAPINVAL);
        DIP_ASSIGN(ip, flags, ip->i_flags);
        ip->i_flag |= IN_CHANGE | IN_UPDATE;
        ffs_update(vp, NULL, NULL, UPDATE_WAIT);
        UFS_WAPBL_END(mp);

        KASSERT(ip->i_flags & SF_SNAPSHOT);
        /*
         * Write an empty list of preallocated blocks to the end of
         * the snapshot to set size to at least that of the filesystem.
         */
        numblks = howmany(fs->fs_size, fs->fs_frag);
        blkno = 1;
        blkno = ufs_rw64(blkno, UFS_FSNEEDSWAP(fs));
        error = vn_rdwr(UIO_WRITE, vp,
            (void *)&blkno, sizeof(blkno), ffs_lblktosize(fs, (off_t)numblks),
            UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, l->l_cred, NULL, NULL);
        if (error)
                return error;
        /*
         * Preallocate critical data structures so that we can copy
         * them in without further allocation after we suspend all
         * operations on the filesystem. We would like to just release
         * the allocated buffers without writing them since they will
         * be filled in below once we are ready to go, but this upsets
         * the soft update code, so we go ahead and write the new buffers.
         *
         * Allocate all indirect blocks and mark all of them as not
         * needing to be copied.
         */
        error = UFS_WAPBL_BEGIN(mp);
        if (error)
                return error;
        for (blkno = UFS_NDADDR, n = 0; blkno < numblks; blkno += FFS_NINDIR(fs)) {
                error = ffs_balloc(vp, ffs_lblktosize(fs, (off_t)blkno),
                    fs->fs_bsize, l->l_cred, B_METAONLY, &ibp);
                if (error)
                        goto out;
                brelse(ibp, 0);
                if (wbreak > 0 && (++n % wbreak) == 0) {
                        UFS_WAPBL_END(mp);
                        error = UFS_WAPBL_BEGIN(mp);
                        if (error)
                                return error;
                }
        }
        /*
         * Allocate copies for the superblock and its summary information.
         */
        error = ffs_balloc(vp, fs->fs_sblockloc, fs->fs_sbsize, l->l_cred,
            0, &nbp);
        if (error)
                goto out;
        bawrite(nbp);
        blkno = ffs_fragstoblks(fs, fs->fs_csaddr);
        len = howmany(fs->fs_cssize, fs->fs_bsize);
        for (loc = 0; loc < len; loc++) {
                error = ffs_balloc(vp, ffs_lblktosize(fs, (off_t)(blkno + loc)),
                    fs->fs_bsize, l->l_cred, 0, &nbp);
                if (error)
                        goto out;
                bawrite(nbp);
                if (wbreak > 0 && (++n % wbreak) == 0) {
                        UFS_WAPBL_END(mp);
                        error = UFS_WAPBL_BEGIN(mp);
                        if (error)
                                return error;
                }
        }
        /*
         * Allocate all cylinder group blocks.
         */
        for (cg = 0; cg < fs->fs_ncg; cg++) {
                error = ffs_balloc(vp, ffs_lfragtosize(fs, cgtod(fs, cg)),
                    fs->fs_bsize, l->l_cred, 0, &nbp);
                if (error)
                        goto out;
                bawrite(nbp);
                if (wbreak > 0 && (++n % wbreak) == 0) {
                        UFS_WAPBL_END(mp);
                        error = UFS_WAPBL_BEGIN(mp);
                        if (error)
                                return error;
                }
        }

out:
        UFS_WAPBL_END(mp);
        return error;
}

/*
 * Create a copy of the superblock and its summary information.
 * It is up to the caller to free copyfs and copy_fs->fs_csp.
 */
static int
snapshot_copyfs(struct mount *mp, struct vnode *vp, void **sbbuf)
{
        int error, i, len, loc, size;
        void *space;
        int32_t *lp;
        struct buf *bp;
        struct fs *copyfs, *fs = VFSTOUFS(mp)->um_fs;
        struct vnode *devvp = VTOI(vp)->i_devvp;

        /*
         * Grab a copy of the superblock and its summary information.
         * We delay writing it until the suspension is released below.
         */
        *sbbuf = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
        loc = ffs_blkoff(fs, fs->fs_sblockloc);
        if (loc > 0)
                memset(*sbbuf, 0, loc);
        copyfs = (struct fs *)((char *)(*sbbuf) + loc);
        memcpy(copyfs, fs, fs->fs_sbsize);
        size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE;
        if (fs->fs_sbsize < size)
                memset((char *)(*sbbuf) + loc + fs->fs_sbsize, 0, 
                    size - fs->fs_sbsize);
        size = ffs_blkroundup(fs, fs->fs_cssize);
        if (fs->fs_contigsumsize > 0)
                size += fs->fs_ncg * sizeof(int32_t);
        space = malloc(size, M_UFSMNT, M_WAITOK);
        copyfs->fs_csp = space;
        memcpy(copyfs->fs_csp, fs->fs_csp, fs->fs_cssize);
        space = (char *)space + fs->fs_cssize;
        loc = howmany(fs->fs_cssize, fs->fs_fsize);
        i = fs->fs_frag - loc % fs->fs_frag;
        len = (i == fs->fs_frag) ? 0 : i * fs->fs_fsize;
        if (len > 0) {
                if ((error = bread(devvp, FFS_FSBTODB(fs, fs->fs_csaddr + loc),
                    len, 0, &bp)) != 0) {
                        free(copyfs->fs_csp, M_UFSMNT);
                        free(*sbbuf, M_UFSMNT);
                        *sbbuf = NULL;
                        return error;
                }
                memcpy(space, bp->b_data, (u_int)len);
                space = (char *)space + len;
                brelse(bp, BC_INVAL | BC_NOCACHE);
        }
        if (fs->fs_contigsumsize > 0) {
                copyfs->fs_maxcluster = lp = space;
                for (i = 0; i < fs->fs_ncg; i++)
                        *lp++ = fs->fs_contigsumsize;
        }
        if (mp->mnt_wapbl)
                copyfs->fs_flags &= ~FS_DOWAPBL;
        return 0;
}

struct snapshot_expunge_ctx {
        struct vnode *logvp;
        struct vnode *vp;
        struct fs *copy_fs;
};

static bool
snapshot_expunge_selector(void *cl, struct vnode *xvp)
{
        struct snapshot_expunge_ctx *c = cl;
        struct inode *xp;

        KASSERT(mutex_owned(xvp->v_interlock));

        xp = VTOI(xvp);
        if (xvp->v_type == VNON || VTOI(xvp) == NULL ||
            (xp->i_flags & SF_SNAPSHOT))
                return false;
#ifdef DEBUG
        if (snapdebug)
                vprint("ffs_snapshot: busy vnode", xvp);
#endif

        if (xvp == c->logvp)
                return true;

        if (xp->i_nlink > 0)
                return false;

        if (ffs_checkfreefile(c->copy_fs, c->vp, xp->i_number))
                return false;

        return true;
}

/*
 * We must check for active files that have been unlinked (e.g., with a zero
 * link count). We have to expunge all trace of these files from the snapshot
 * so that they are not reclaimed prematurely by fsck or unnecessarily dumped.
 * Note that we skip unlinked snapshot files as they will be handled separately.
 * Calculate the snapshot list size and create a preliminary list.
 */
static int
snapshot_expunge(struct mount *mp, struct vnode *vp, struct fs *copy_fs,
    daddr_t *snaplistsize, daddr_t **snaplist)
{
        int cg, error = 0, len, loc;
        daddr_t blkno, *blkp;
        struct fs *fs = VFSTOUFS(mp)->um_fs;
        struct inode *xp;
        struct vnode *logvp = NULL, *xvp;
        struct vnode_iterator *marker;
        struct snapshot_expunge_ctx ctx;

        *snaplist = NULL;
        /*
         * Get the log inode if any.
         */
        if ((fs->fs_flags & FS_DOWAPBL) &&
            fs->fs_journal_location == UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM) {
                error = VFS_VGET(mp, fs->fs_journallocs[UFS_WAPBL_INFS_INO],
                    LK_EXCLUSIVE, &logvp);
                if (error)
                        goto out;
        }
        /*
         * We also calculate the needed size for the snapshot list.
         */
        *snaplistsize = fs->fs_ncg + howmany(fs->fs_cssize, fs->fs_bsize) +
            FSMAXSNAP + 1 /* superblock */ + 1 /* last block */ + 1 /* size */;

        vfs_vnode_iterator_init(mp, &marker);
        ctx.logvp = logvp;
        ctx.vp = vp;
        ctx.copy_fs = copy_fs;
        while ((xvp = vfs_vnode_iterator_next(marker, snapshot_expunge_selector,
            &ctx)))
        {
                /*
                 * If there is a fragment, clear it here.
                 */
                xp = VTOI(xvp);
                blkno = 0;
                loc = howmany(xp->i_size, fs->fs_bsize) - 1;
                if (loc < UFS_NDADDR) {
                        len = ffs_fragroundup(fs, ffs_blkoff(fs, xp->i_size));
                        if (len > 0 && len < fs->fs_bsize) {
                                error = UFS_WAPBL_BEGIN(mp);
                                if (error) {
                                        vrele(xvp);
                                        vfs_vnode_iterator_destroy(marker);
                                        goto out;
                                }
                                ffs_blkfree_snap(copy_fs, vp, db_get(xp, loc),
                                    len, xp->i_number);
                                blkno = db_get(xp, loc);
                                db_assign(xp, loc, 0);
                                UFS_WAPBL_END(mp);
                        }
                }
                *snaplistsize += 1;
                error = expunge(vp, xp, copy_fs, fullacct, BLK_NOCOPY);
                if (blkno)
                        db_assign(xp, loc, blkno);
                if (!error) {
                        error = UFS_WAPBL_BEGIN(mp);
                        if (!error) {
                                error = ffs_freefile_snap(copy_fs, vp,
                                    xp->i_number, xp->i_mode);
                                UFS_WAPBL_END(mp);
                        }
                }
                vrele(xvp);
                if (error) {
                        vfs_vnode_iterator_destroy(marker);
                        goto out;
                }
        }
        vfs_vnode_iterator_destroy(marker);

        /*
         * Create a preliminary list of preallocated snapshot blocks.
         */
        *snaplist = malloc(*snaplistsize * sizeof(daddr_t), M_UFSMNT, M_WAITOK);
        blkp = &(*snaplist)[1];
        *blkp++ = ffs_lblkno(fs, fs->fs_sblockloc);
        blkno = ffs_fragstoblks(fs, fs->fs_csaddr);
        for (cg = 0; cg < fs->fs_ncg; cg++) {
                if (ffs_fragstoblks(fs, cgtod(fs, cg)) > blkno)
                        break;
                *blkp++ = ffs_fragstoblks(fs, cgtod(fs, cg));
        }
        len = howmany(fs->fs_cssize, fs->fs_bsize);
        for (loc = 0; loc < len; loc++)
                *blkp++ = blkno + loc;
        for (; cg < fs->fs_ncg; cg++)
                *blkp++ = ffs_fragstoblks(fs, cgtod(fs, cg));
        (*snaplist)[0] = blkp - &(*snaplist)[0];

out:
        if (logvp != NULL)
                vput(logvp);
        if (error && *snaplist != NULL) {
                free(*snaplist, M_UFSMNT);
                *snaplist = NULL;
        }

        return error;
}

/*
 * Copy allocation information from all the snapshots in this snapshot and
 * then expunge them from its view. Also, collect the list of allocated
 * blocks in i_snapblklist.
 */
static int
snapshot_expunge_snap(struct mount *mp, struct vnode *vp,
    struct fs *copy_fs, daddr_t snaplistsize)
{
        int error = 0, i;
        daddr_t numblks, *snaplist = NULL;
        struct fs *fs = VFSTOUFS(mp)->um_fs;
        struct inode *ip = VTOI(vp), *xp;
        struct lwp *l = curlwp;
        struct snap_info *si = VFSTOUFS(mp)->um_snapinfo;

        TAILQ_FOREACH(xp, &si->si_snapshots, i_nextsnap) {
                if (xp != ip) {
                        error = expunge(vp, xp, fs, snapacct, BLK_SNAP);
                        if (error)
                                break;
                }
                if (xp->i_nlink != 0)
                        continue;
                error = UFS_WAPBL_BEGIN(mp);
                if (error)
                        break;
                error = ffs_freefile_snap(copy_fs, vp, xp->i_number, xp->i_mode);
                UFS_WAPBL_END(mp);
                if (error)
                        break;
        }
        if (error)
                goto out;
        /*
         * Allocate space for the full list of preallocated snapshot blocks.
         */
        snaplist = malloc(snaplistsize * sizeof(daddr_t), M_UFSMNT, M_WAITOK);
        ip->i_snapblklist = &snaplist[1];
        /*
         * Expunge the blocks used by the snapshots from the set of
         * blocks marked as used in the snapshot bitmaps. Also, collect
         * the list of allocated blocks in i_snapblklist.
         */
        error = expunge(vp, ip, copy_fs, mapacct, BLK_SNAP);
        if (error)
                goto out;
        if (snaplistsize < ip->i_snapblklist - snaplist)
                panic("ffs_snapshot: list too small");
        snaplistsize = ip->i_snapblklist - snaplist;
        snaplist[0] = snaplistsize;
        ip->i_snapblklist = &snaplist[0];
        /*
         * Write out the list of allocated blocks to the end of the snapshot.
         */
        numblks = howmany(fs->fs_size, fs->fs_frag);
        for (i = 0; i < snaplistsize; i++)
                snaplist[i] = ufs_rw64(snaplist[i], UFS_FSNEEDSWAP(fs));
        error = vn_rdwr(UIO_WRITE, vp, (void *)snaplist,
            snaplistsize * sizeof(daddr_t), ffs_lblktosize(fs, (off_t)numblks),
            UIO_SYSSPACE, IO_NODELOCKED | IO_UNIT, l->l_cred, NULL, NULL);
        for (i = 0; i < snaplistsize; i++)
                snaplist[i] = ufs_rw64(snaplist[i], UFS_FSNEEDSWAP(fs));
out:
        if (error && snaplist != NULL) {
                free(snaplist, M_UFSMNT);
                ip->i_snapblklist = NULL;
        }
        return error;
}

/*
 * Write the superblock and its summary information to the snapshot.
 * Make sure, the first UFS_NDADDR blocks get copied to the snapshot.
 */
static int
snapshot_writefs(struct mount *mp, struct vnode *vp, void *sbbuf)
{
        int error, len, loc;
        void *space;
        daddr_t blkno;
        struct buf *bp;
        struct fs *copyfs, *fs = VFSTOUFS(mp)->um_fs;
        struct inode *ip = VTOI(vp);
        struct lwp *l = curlwp;

        copyfs = (struct fs *)((char *)sbbuf + ffs_blkoff(fs, fs->fs_sblockloc));

        /*
         * Write the superblock and its summary information
         * to the snapshot.
         */
        blkno = ffs_fragstoblks(fs, fs->fs_csaddr);
        len = howmany(fs->fs_cssize, fs->fs_bsize);
        space = copyfs->fs_csp;
#ifdef FFS_EI
        if (UFS_FSNEEDSWAP(fs)) {
                ffs_sb_swap(copyfs, copyfs);
                ffs_csum_swap(space, space, fs->fs_cssize);
        }
#endif
        error = UFS_WAPBL_BEGIN(mp);
        if (error)
                return error;
        for (loc = 0; loc < len; loc++) {
                error = bread(vp, blkno + loc, fs->fs_bsize,
                    B_MODIFY, &bp);
                if (error) {
                        break;
                }
                memcpy(bp->b_data, space, fs->fs_bsize);
                space = (char *)space + fs->fs_bsize;
                bawrite(bp);
        }
        if (error)
                goto out;
        error = bread(vp, ffs_lblkno(fs, fs->fs_sblockloc),
            fs->fs_bsize, B_MODIFY, &bp);
        if (error) {
                goto out;
        } else {
                memcpy(bp->b_data, sbbuf, fs->fs_bsize);
                bawrite(bp);
        }
        /*
         * Copy the first UFS_NDADDR blocks to the snapshot so
         * ffs_copyonwrite() and ffs_snapblkfree() will always work on
         * indirect blocks.
         */
        for (loc = 0; loc < UFS_NDADDR; loc++) {
                if (db_get(ip, loc) != 0)
                        continue;
                error = ffs_balloc(vp, ffs_lblktosize(fs, (off_t)loc),
                    fs->fs_bsize, l->l_cred, 0, &bp);
                if (error)
                        break;
                error = rwfsblk(vp, B_READ, bp->b_data, loc);
                if (error) {
                        brelse(bp, 0);
                        break;
                }
                bawrite(bp);
        }

out:
        UFS_WAPBL_END(mp);
        return error;
}

/*
 * Copy all cylinder group maps.
 */
static int
cgaccount(struct vnode *vp, int passno, int *redo)
{
        int cg, error = 0;
        struct buf *nbp;
        struct fs *fs = VTOI(vp)->i_fs;

        if (redo != NULL)
                *redo = 0;
        if (passno == 1)
                fs->fs_active = malloc(howmany(fs->fs_ncg, NBBY),
                    M_DEVBUF, M_WAITOK | M_ZERO);
        for (cg = 0; cg < fs->fs_ncg; cg++) {
                if (passno == 2 && ACTIVECG_ISSET(fs, cg))
                        continue;

                if (redo != NULL)
                        *redo += 1;
                error = UFS_WAPBL_BEGIN(vp->v_mount);
                if (error)
                        return error;
                error = ffs_balloc(vp, ffs_lfragtosize(fs, cgtod(fs, cg)),
                    fs->fs_bsize, curlwp->l_cred, 0, &nbp);
                if (error) {
                        UFS_WAPBL_END(vp->v_mount);
                        break;
                }
                error = cgaccount1(cg, vp, nbp->b_data, passno);
                bawrite(nbp);
                UFS_WAPBL_END(vp->v_mount);
                if (error)
                        break;
        }
        return error;
}

/*
 * Copy a cylinder group map. All the unallocated blocks are marked
 * BLK_NOCOPY so that the snapshot knows that it need not copy them
 * if they are later written. If passno is one, then this is a first
 * pass, so only setting needs to be done. If passno is 2, then this
 * is a revision to a previous pass which must be undone as the
 * replacement pass is done.
 */
static int
cgaccount1(int cg, struct vnode *vp, void *data, int passno)
{
        struct buf *bp, *ibp;
        struct inode *ip;
        struct cg *cgp;
        struct fs *fs;
        struct lwp *l = curlwp;
        daddr_t base, numblks;
        int error, len, loc, ns __unused, indiroff;

        ip = VTOI(vp);
        fs = ip->i_fs;
        ns = UFS_FSNEEDSWAP(fs);
        error = bread(ip->i_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)),
                (int)fs->fs_cgsize, 0, &bp);
        if (error) {
                return (error);
        }
        cgp = (struct cg *)bp->b_data;
        if (!cg_chkmagic(cgp, ns)) {
                brelse(bp, 0);
                return (EIO);
        }
        ACTIVECG_SET(fs, cg);

        memcpy(data, bp->b_data, fs->fs_cgsize);
        brelse(bp, 0);
        if (fs->fs_cgsize < fs->fs_bsize)
                memset((char *)data + fs->fs_cgsize, 0,
                    fs->fs_bsize - fs->fs_cgsize);
        numblks = howmany(fs->fs_size, fs->fs_frag);
        len = howmany(fs->fs_fpg, fs->fs_frag);
        base = cg * fs->fs_fpg / fs->fs_frag;
        if (base + len >= numblks)
                len = numblks - base - 1;
        loc = 0;
        if (base < UFS_NDADDR) {
                for ( ; loc < UFS_NDADDR; loc++) {
                        if (ffs_isblock(fs, cg_blksfree(cgp, ns), loc))
                                db_assign(ip, loc, BLK_NOCOPY);
                        else if (db_get(ip, loc) == BLK_NOCOPY) {
                                if (passno == 2)
                                        db_assign(ip, loc, 0);
                                else if (passno == 1)
                                        panic("ffs_snapshot: lost direct block");
                        }
                }
        }
        if ((error = ffs_balloc(vp, ffs_lblktosize(fs, (off_t)(base + loc)),
            fs->fs_bsize, l->l_cred, B_METAONLY, &ibp)) != 0)
                return (error);
        indiroff = (base + loc - UFS_NDADDR) % FFS_NINDIR(fs);
        for ( ; loc < len; loc++, indiroff++) {
                if (indiroff >= FFS_NINDIR(fs)) {
                        bawrite(ibp);
                        if ((error = ffs_balloc(vp,
                            ffs_lblktosize(fs, (off_t)(base + loc)),
                            fs->fs_bsize, l->l_cred, B_METAONLY, &ibp)) != 0)
                                return (error);
                        indiroff = 0;
                }
                if (ffs_isblock(fs, cg_blksfree(cgp, ns), loc))
                        idb_assign(ip, ibp->b_data, indiroff, BLK_NOCOPY);
                else if (idb_get(ip, ibp->b_data, indiroff) == BLK_NOCOPY) {
                        if (passno == 2)
                                idb_assign(ip, ibp->b_data, indiroff, 0);
                        else if (passno == 1)
                                panic("ffs_snapshot: lost indirect block");
                }
        }
        bdwrite(ibp);
        return (0);
}

/*
 * Before expunging a snapshot inode, note all the
 * blocks that it claims with BLK_SNAP so that fsck will
 * be able to account for those blocks properly and so
 * that this snapshot knows that it need not copy them
 * if the other snapshot holding them is freed.
 */
static int
expunge(struct vnode *snapvp, struct inode *cancelip, struct fs *fs,
    acctfunc_t acctfunc, int expungetype)
{
        int i, error, ns __unused;
        daddr_t lbn, rlbn;
        daddr_t len, blkno, numblks, blksperindir;
        struct ufs1_dinode *dip1;
        struct ufs2_dinode *dip2;
        struct lwp *l = curlwp;
        void *bap;
        struct buf *bp;
        struct mount *mp;

        ns = UFS_FSNEEDSWAP(fs);
        mp = snapvp->v_mount;

        error = UFS_WAPBL_BEGIN(mp);
        if (error)
                return error;
        /*
         * Prepare to expunge the inode. If its inode block has not
         * yet been copied, then allocate and fill the copy.
         */
        lbn = ffs_fragstoblks(fs, ino_to_fsba(fs, cancelip->i_number));
        error = snapblkaddr(snapvp, lbn, &blkno);
        if (error)
                return error;
        if (blkno != 0) {
                error = bread(snapvp, lbn, fs->fs_bsize,
                    B_MODIFY, &bp);
        } else {
                error = ffs_balloc(snapvp, ffs_lblktosize(fs, (off_t)lbn),
                    fs->fs_bsize, l->l_cred, 0, &bp);
                if (! error)
                        error = rwfsblk(snapvp, B_READ, bp->b_data, lbn);
        }
        if (error) {
                UFS_WAPBL_END(mp);
                return error;
        }
        /*
         * Set a snapshot inode to be a zero length file, regular files
         * or unlinked snapshots to be completely unallocated.
         */
        if (fs->fs_magic == FS_UFS1_MAGIC) {
                dip1 = (struct ufs1_dinode *)bp->b_data +
                    ino_to_fsbo(fs, cancelip->i_number);
                if (cancelip->i_flags & SF_SNAPSHOT) {
                        dip1->di_flags =
                            ufs_rw32(ufs_rw32(dip1->di_flags, ns) |
                            SF_SNAPINVAL, ns);
                }
                if (expungetype == BLK_NOCOPY || cancelip->i_nlink == 0)
                        dip1->di_mode = 0;
                dip1->di_size = 0;
                dip1->di_blocks = 0;
                memset(&dip1->di_db[0], 0, (UFS_NDADDR + UFS_NIADDR) * sizeof(int32_t));
        } else {
                dip2 = (struct ufs2_dinode *)bp->b_data +
                    ino_to_fsbo(fs, cancelip->i_number);
                if (cancelip->i_flags & SF_SNAPSHOT) {
                        dip2->di_flags =
                            ufs_rw32(ufs_rw32(dip2->di_flags, ns) |
                            SF_SNAPINVAL, ns);
                }
                if (expungetype == BLK_NOCOPY || cancelip->i_nlink == 0)
                        dip2->di_mode = 0;
                dip2->di_size = 0;
                dip2->di_blocks = 0;
                memset(&dip2->di_db[0], 0, (UFS_NDADDR + UFS_NIADDR) * sizeof(int64_t));
        }
        bdwrite(bp);
        UFS_WAPBL_END(mp);
        /*
         * Now go through and expunge all the blocks in the file
         * using the function requested.
         */
        numblks = howmany(cancelip->i_size, fs->fs_bsize);
        if (fs->fs_magic == FS_UFS1_MAGIC)
                bap = &cancelip->i_ffs1_db[0];
        else
                bap = &cancelip->i_ffs2_db[0];
        error = (*acctfunc)(snapvp, bap, 0, UFS_NDADDR, fs, 0, expungetype);
        if (error)
                return (error);
        if (fs->fs_magic == FS_UFS1_MAGIC)
                bap = &cancelip->i_ffs1_ib[0];
        else
                bap = &cancelip->i_ffs2_ib[0];
        error = (*acctfunc)(snapvp, bap, 0, UFS_NIADDR, fs, -1, expungetype);
        if (error)
                return (error);
        blksperindir = 1;
        lbn = -UFS_NDADDR;
        len = numblks - UFS_NDADDR;
        rlbn = UFS_NDADDR;
        for (i = 0; len > 0 && i < UFS_NIADDR; i++) {
                error = indiracct(snapvp, ITOV(cancelip), i,
                    ib_get(cancelip, i), lbn, rlbn, len,
                    blksperindir, fs, acctfunc, expungetype);
                if (error)
                        return (error);
                blksperindir *= FFS_NINDIR(fs);
                lbn -= blksperindir + 1;
                len -= blksperindir;
                rlbn += blksperindir;
        }
        return (0);
}

/*
 * Descend an indirect block chain for vnode cancelvp accounting for all
 * its indirect blocks in snapvp.
 */
static int
indiracct(struct vnode *snapvp, struct vnode *cancelvp, int level,
    daddr_t blkno, daddr_t lbn, daddr_t rlbn, daddr_t remblks,
    daddr_t blksperindir, struct fs *fs, acctfunc_t acctfunc, int expungetype)
{
        int error, num, i;
        daddr_t subblksperindir;
        struct indir indirs[UFS_NIADDR + 2];
        daddr_t last;
        void *bap;
        struct buf *bp;

        if (blkno == 0) {
                if (expungetype == BLK_NOCOPY)
                        return (0);
                panic("indiracct: missing indir");
        }
        if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
                return (error);
        if (lbn != indirs[num - 1 - level].in_lbn || num < 2)
                panic("indiracct: botched params");
        /*
         * We have to expand bread here since it will deadlock looking
         * up the block number for any blocks that are not in the cache.
         */
        error = ffs_getblk(cancelvp, lbn, FFS_FSBTODB(fs, blkno), fs->fs_bsize,
            false, &bp);
        if (error)
                return error;
        if ((bp->b_oflags & (BO_DONE | BO_DELWRI)) == 0 && (error =
            rwfsblk(bp->b_vp, B_READ, bp->b_data, ffs_fragstoblks(fs, blkno)))) {
                brelse(bp, 0);
                return (error);
        }
        /*
         * Account for the block pointers in this indirect block.
         */
        last = howmany(remblks, blksperindir);
        if (last > FFS_NINDIR(fs))
                last = FFS_NINDIR(fs);
        bap = malloc(fs->fs_bsize, M_DEVBUF, M_WAITOK | M_ZERO);
        memcpy((void *)bap, bp->b_data, fs->fs_bsize);
        brelse(bp, 0);
        error = (*acctfunc)(snapvp, bap, 0, last,
            fs, level == 0 ? rlbn : -1, expungetype);
        if (error || level == 0)
                goto out;
        /*
         * Account for the block pointers in each of the indirect blocks
         * in the levels below us.
         */
        subblksperindir = blksperindir / FFS_NINDIR(fs);
        for (lbn++, level--, i = 0; i < last; i++) {
                error = indiracct(snapvp, cancelvp, level,
                    idb_get(VTOI(snapvp), bap, i), lbn, rlbn, remblks,
                    subblksperindir, fs, acctfunc, expungetype);
                if (error)
                        goto out;
                rlbn += blksperindir;
                lbn -= blksperindir;
                remblks -= blksperindir;
        }
out:
        free(bap, M_DEVBUF);
        return (error);
}

/*
 * Do both snap accounting and map accounting.
 */
static int
fullacct(struct vnode *vp, void *bap, int oldblkp, int lastblkp,
    struct fs *fs, daddr_t lblkno,
    int exptype /* BLK_SNAP or BLK_NOCOPY */)
{
        int error;

        if ((error = snapacct(vp, bap, oldblkp, lastblkp, fs, lblkno, exptype)))
                return (error);
        return (mapacct(vp, bap, oldblkp, lastblkp, fs, lblkno, exptype));
}

/*
 * Identify a set of blocks allocated in a snapshot inode.
 */
static int
snapacct(struct vnode *vp, void *bap, int oldblkp, int lastblkp,
    struct fs *fs, daddr_t lblkno,
    int expungetype /* BLK_SNAP or BLK_NOCOPY */)
{
        struct inode *ip = VTOI(vp);
        struct lwp *l = curlwp;
        struct mount *mp = vp->v_mount;
        daddr_t blkno;
        daddr_t lbn;
        struct buf *ibp;
        int error, n;
        const int wbreak = blocks_in_journal(VFSTOUFS(mp)->um_fs)/8;

        error = UFS_WAPBL_BEGIN(mp);
        if (error)
                return error;
        for ( n = 0; oldblkp < lastblkp; oldblkp++) {
                blkno = idb_get(ip, bap, oldblkp);
                if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
                        continue;
                lbn = ffs_fragstoblks(fs, blkno);
                if (lbn < UFS_NDADDR) {
                        blkno = db_get(ip, lbn);
                        ip->i_flag |= IN_CHANGE | IN_UPDATE;
                } else {
                        error = ffs_balloc(vp, ffs_lblktosize(fs, (off_t)lbn),
                            fs->fs_bsize, l->l_cred, B_METAONLY, &ibp);
                        if (error)
                                break;
                        blkno = idb_get(ip, ibp->b_data,
                            (lbn - UFS_NDADDR) % FFS_NINDIR(fs));
                }
                /*
                 * If we are expunging a snapshot vnode and we
                 * find a block marked BLK_NOCOPY, then it is
                 * one that has been allocated to this snapshot after
                 * we took our current snapshot and can be ignored.
                 */
                if (expungetype == BLK_SNAP && blkno == BLK_NOCOPY) {
                        if (lbn >= UFS_NDADDR)
                                brelse(ibp, 0);
                } else {
                        if (blkno != 0)
                                panic("snapacct: bad block");
                        if (lbn < UFS_NDADDR)
                                db_assign(ip, lbn, expungetype);
                        else {
                                idb_assign(ip, ibp->b_data,
                                    (lbn - UFS_NDADDR) % FFS_NINDIR(fs), expungetype);
                                bdwrite(ibp);
                        }
                }
                if (wbreak > 0 && (++n % wbreak) == 0) {
                        UFS_WAPBL_END(mp);
                        error = UFS_WAPBL_BEGIN(mp);
                        if (error)
                                return error;
                }
        }
        UFS_WAPBL_END(mp);
        return error;
}

/*
 * Account for a set of blocks allocated in a snapshot inode.
 */
static int
mapacct(struct vnode *vp, void *bap, int oldblkp, int lastblkp,
    struct fs *fs, daddr_t lblkno, int expungetype)
{
        daddr_t blkno;
        struct inode *ip;
        struct mount *mp = vp->v_mount;
        ino_t inum;
        int acctit, error, n;
        const int wbreak = blocks_in_journal(VFSTOUFS(mp)->um_fs)/8;

        error = UFS_WAPBL_BEGIN(mp);
        if (error)
                return error;
        ip = VTOI(vp);
        inum = ip->i_number;
        if (lblkno == -1)
                acctit = 0;
        else
                acctit = 1;
        for ( n = 0; oldblkp < lastblkp; oldblkp++, lblkno++) {
                blkno = idb_get(ip, bap, oldblkp);
                if (blkno == 0 || blkno == BLK_NOCOPY)
                        continue;
                if (acctit && expungetype == BLK_SNAP && blkno != BLK_SNAP)
                        *ip->i_snapblklist++ = lblkno;
                if (blkno == BLK_SNAP)
                        blkno = ffs_blkstofrags(fs, lblkno);
                ffs_blkfree_snap(fs, vp, blkno, fs->fs_bsize, inum);
                if (wbreak > 0 && (++n % wbreak) == 0) {
                        UFS_WAPBL_END(mp);
                        error = UFS_WAPBL_BEGIN(mp);
                        if (error)
                                return error;
                }
        }
        UFS_WAPBL_END(mp);
        return (0);
}

/*
 * Number of blocks that fit into the journal or zero if not logging.
 */
static int
blocks_in_journal(struct fs *fs)
{
        off_t bpj;

        if ((fs->fs_flags & FS_DOWAPBL) == 0)
                return 0;
        bpj = 1;
        if (fs->fs_journal_version == UFS_WAPBL_VERSION) {
                switch (fs->fs_journal_location) {
                case UFS_WAPBL_JOURNALLOC_END_PARTITION:
                        bpj = (off_t)fs->fs_journallocs[UFS_WAPBL_EPART_BLKSZ]*
                            fs->fs_journallocs[UFS_WAPBL_EPART_COUNT];
                        break;
                case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM:
                        bpj = (off_t)fs->fs_journallocs[UFS_WAPBL_INFS_BLKSZ]*
                            fs->fs_journallocs[UFS_WAPBL_INFS_COUNT];
                        break;
                }
        }
        bpj /= fs->fs_bsize;
        return (bpj > 0 ? bpj : 1);
}
#endif /* defined(FFS_NO_SNAPSHOT) */

/*
 * Decrement extra reference on snapshot when last name is removed.
 * It will not be freed until the last open reference goes away.
 */
void
ffs_snapgone(struct vnode *vp)
{
        struct inode *xp, *ip = VTOI(vp);
        struct mount *mp = spec_node_getmountedfs(ip->i_devvp);
        struct fs *fs;
        struct snap_info *si;
        int snaploc;

        si = VFSTOUFS(mp)->um_snapinfo;

        /*
         * Find snapshot in incore list.
         */
        mutex_enter(&si->si_lock);
        TAILQ_FOREACH(xp, &si->si_snapshots, i_nextsnap)
                if (xp == ip)
                        break;
        mutex_exit(&si->si_lock);
        if (xp != NULL)
                vrele(ITOV(ip));
#ifdef DEBUG
        else if (snapdebug)
                printf("ffs_snapgone: lost snapshot vnode %llu\n",
                    (unsigned long long)ip->i_number);
#endif
        /*
         * Delete snapshot inode from superblock. Keep list dense.
         */
        mutex_enter(&si->si_lock);
        fs = ip->i_fs;
        for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
                if (fs->fs_snapinum[snaploc] == ip->i_number)
                        break;
        if (snaploc < FSMAXSNAP) {
                for (snaploc++; snaploc < FSMAXSNAP; snaploc++) {
                        if (fs->fs_snapinum[snaploc] == 0)
                                break;
                        fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc];
                }
                fs->fs_snapinum[snaploc - 1] = 0;
        }
        si->si_gen++;
        mutex_exit(&si->si_lock);
}

/*
 * Prepare a snapshot file for being removed.
 */
void
ffs_snapremove(struct vnode *vp)
{
        struct inode *ip = VTOI(vp), *xp;
        struct vnode *devvp = ip->i_devvp;
        struct fs *fs = ip->i_fs;
        struct mount *mp = spec_node_getmountedfs(devvp);
        struct buf *ibp;
        struct snap_info *si;
        struct lwp *l = curlwp;
        daddr_t numblks, blkno, dblk;
        int error, loc, last;

        si = VFSTOUFS(mp)->um_snapinfo;
        /*
         * If active, delete from incore list (this snapshot may
         * already have been in the process of being deleted, so
         * would not have been active).
         *
         * Clear copy-on-write flag if last snapshot.
         */
        mutex_enter(&si->si_snaplock);
        mutex_enter(&si->si_lock);
        if (is_active_snapshot(si, ip)) {
                TAILQ_REMOVE(&si->si_snapshots, ip, i_nextsnap);
                if (TAILQ_FIRST(&si->si_snapshots) != 0) {
                        /* Roll back the list of preallocated blocks. */
                        xp = TAILQ_LAST(&si->si_snapshots, inodelst);
                        si->si_snapblklist = xp->i_snapblklist;
                        si->si_gen++;
                        mutex_exit(&si->si_lock);
                        mutex_exit(&si->si_snaplock);
                } else {
                        si->si_snapblklist = 0;
                        si->si_gen++;
                        mutex_exit(&si->si_lock);
                        mutex_exit(&si->si_snaplock);
                        fscow_disestablish(mp, ffs_copyonwrite, devvp);
                }
                if (ip->i_snapblklist != NULL) {
                        free(ip->i_snapblklist, M_UFSMNT);
                        ip->i_snapblklist = NULL;
                }
        } else {
                mutex_exit(&si->si_lock);
                mutex_exit(&si->si_snaplock);
        }
        /*
         * Clear all BLK_NOCOPY fields. Pass any block claims to other
         * snapshots that want them (see ffs_snapblkfree below).
         */
        for (blkno = 1; blkno < UFS_NDADDR; blkno++) {
                dblk = db_get(ip, blkno);
                if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
                        db_assign(ip, blkno, 0);
                else if ((dblk == ffs_blkstofrags(fs, blkno) &&
                     ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize,
                     ip->i_number))) {
                        DIP_ADD(ip, blocks, -btodb(fs->fs_bsize));
                        db_assign(ip, blkno, 0);
                }
        }
        numblks = howmany(ip->i_size, fs->fs_bsize);
        for (blkno = UFS_NDADDR; blkno < numblks; blkno += FFS_NINDIR(fs)) {
                error = ffs_balloc(vp, ffs_lblktosize(fs, (off_t)blkno),
                    fs->fs_bsize, l->l_cred, B_METAONLY, &ibp);
                if (error)
                        continue;
                if (fs->fs_size - blkno > FFS_NINDIR(fs))
                        last = FFS_NINDIR(fs);
                else
                        last = fs->fs_size - blkno;
                for (loc = 0; loc < last; loc++) {
                        dblk = idb_get(ip, ibp->b_data, loc);
                        if (dblk == BLK_NOCOPY || dblk == BLK_SNAP)
                                idb_assign(ip, ibp->b_data, loc, 0);
                        else if (dblk == ffs_blkstofrags(fs, blkno) &&
                            ffs_snapblkfree(fs, ip->i_devvp, dblk,
                            fs->fs_bsize, ip->i_number)) {
                                DIP_ADD(ip, blocks, -btodb(fs->fs_bsize));
                                idb_assign(ip, ibp->b_data, loc, 0);
                        }
                }
                bawrite(ibp);
                UFS_WAPBL_END(mp);
                error = UFS_WAPBL_BEGIN(mp);
                KASSERT(error == 0);
        }
        /*
         * Clear snapshot flag and drop reference.
         */
        ip->i_flags &= ~(SF_SNAPSHOT | SF_SNAPINVAL);
        DIP_ASSIGN(ip, flags, ip->i_flags);
        ip->i_flag |= IN_CHANGE | IN_UPDATE;
#if defined(QUOTA) || defined(QUOTA2)
        chkdq(ip, DIP(ip, blocks), l->l_cred, FORCE);
        chkiq(ip, 1, l->l_cred, FORCE);
#endif
}

/*
 * Notification that a block is being freed. Return zero if the free
 * should be allowed to proceed. Return non-zero if the snapshot file
 * wants to claim the block. The block will be claimed if it is an
 * uncopied part of one of the snapshots. It will be freed if it is
 * either a BLK_NOCOPY or has already been copied in all of the snapshots.
 * If a fragment is being freed, then all snapshots that care about
 * it must make a copy since a snapshot file can only claim full sized
 * blocks. Note that if more than one snapshot file maps the block,
 * we can pick one at random to claim it. Since none of the snapshots
 * can change, we are assurred that they will all see the same unmodified
 * image. When deleting a snapshot file (see ffs_snapremove above), we
 * must push any of these claimed blocks to one of the other snapshots
 * that maps it. These claimed blocks are easily identified as they will
 * have a block number equal to their logical block number within the
 * snapshot. A copied block can never have this property because they
 * must always have been allocated from a BLK_NOCOPY location.
 */
int
ffs_snapblkfree(struct fs *fs, struct vnode *devvp, daddr_t bno,
    long size, ino_t inum)
{
        struct mount *mp = spec_node_getmountedfs(devvp);
        struct buf *ibp;
        struct inode *ip;
        struct vnode *vp = NULL;
        struct snap_info *si;
        void *saved_data = NULL;
        daddr_t lbn;
        daddr_t blkno;
        uint32_t gen;
        int indiroff = 0, error = 0, claimedblk = 0;

        si = VFSTOUFS(mp)->um_snapinfo;
        lbn = ffs_fragstoblks(fs, bno);
        mutex_enter(&si->si_snaplock);
        mutex_enter(&si->si_lock);
        si->si_owner = curlwp;
                
retry:
        gen = si->si_gen;
        TAILQ_FOREACH(ip, &si->si_snapshots, i_nextsnap) {
                vp = ITOV(ip);
                /*
                 * Lookup block being written.
                 */
                if (lbn < UFS_NDADDR) {
                        blkno = db_get(ip, lbn);
                } else {
                        mutex_exit(&si->si_lock);
                        error = ffs_balloc(vp, ffs_lblktosize(fs, (off_t)lbn),
                            fs->fs_bsize, FSCRED, B_METAONLY, &ibp);
                        if (error) {
                                mutex_enter(&si->si_lock);
                                break;
                        }
                        indiroff = (lbn - UFS_NDADDR) % FFS_NINDIR(fs);
                        blkno = idb_get(ip, ibp->b_data, indiroff);
                        mutex_enter(&si->si_lock);
                        if (gen != si->si_gen) {
                                brelse(ibp, 0);
                                goto retry;
                        }
                }
                /*
                 * Check to see if block needs to be copied.
                 */
                if (blkno == 0) {
                        /*
                         * A block that we map is being freed. If it has not
                         * been claimed yet, we will claim or copy it (below).
                         */
                        claimedblk = 1;
                } else if (blkno == BLK_SNAP) {
                        /*
                         * No previous snapshot claimed the block,
                         * so it will be freed and become a BLK_NOCOPY
                         * (don't care) for us.
                         */
                        if (claimedblk)
                                panic("snapblkfree: inconsistent block type");
                        if (lbn < UFS_NDADDR) {
                                db_assign(ip, lbn, BLK_NOCOPY);
                                ip->i_flag |= IN_CHANGE | IN_UPDATE;
                        } else {
                                idb_assign(ip, ibp->b_data, indiroff,
                                    BLK_NOCOPY);
                                mutex_exit(&si->si_lock);
                                if (ip->i_nlink > 0)
                                        bwrite(ibp);
                                else
                                        bdwrite(ibp);
                                mutex_enter(&si->si_lock);
                                if (gen != si->si_gen)
                                        goto retry;
                        }
                        continue;
                } else /* BLK_NOCOPY or default */ {
                        /*
                         * If the snapshot has already copied the block
                         * (default), or does not care about the block,
                         * it is not needed.
                         */
                        if (lbn >= UFS_NDADDR)
                                brelse(ibp, 0);
                        continue;
                }
                /*
                 * If this is a full size block, we will just grab it
                 * and assign it to the snapshot inode. Otherwise we
                 * will proceed to copy it. See explanation for this
                 * routine as to why only a single snapshot needs to
                 * claim this block.
                 */
                if (size == fs->fs_bsize) {
#ifdef DEBUG
                        if (snapdebug)
                                printf("%s %llu lbn %" PRId64
                                    "from inum %llu\n",
                                    "Grabonremove: snapino",
                                    (unsigned long long)ip->i_number,
                                    lbn, (unsigned long long)inum);
#endif
                        mutex_exit(&si->si_lock);
                        if (lbn < UFS_NDADDR) {
                                db_assign(ip, lbn, bno);
                        } else {
                                idb_assign(ip, ibp->b_data, indiroff, bno);
                                if (ip->i_nlink > 0)
                                        bwrite(ibp);
                                else
                                        bdwrite(ibp);
                        }
                        DIP_ADD(ip, blocks, btodb(size));
                        ip->i_flag |= IN_CHANGE | IN_UPDATE;
                        if (ip->i_nlink > 0 && mp->mnt_wapbl)
                                error = syncsnap(vp);
                        else
                                error = 0;
                        mutex_enter(&si->si_lock);
                        si->si_owner = NULL;
                        mutex_exit(&si->si_lock);
                        mutex_exit(&si->si_snaplock);
                        return (error == 0);
                }
                if (lbn >= UFS_NDADDR)
                        brelse(ibp, 0);
#ifdef DEBUG
                if (snapdebug)
                        printf("%s%llu lbn %" PRId64 " %s %llu size %ld\n",
                            "Copyonremove: snapino ",
                            (unsigned long long)ip->i_number,
                            lbn, "for inum", (unsigned long long)inum, size);
#endif
                /*
                 * If we have already read the old block contents, then
                 * simply copy them to the new block. Note that we need
                 * to synchronously write snapshots that have not been
                 * unlinked, and hence will be visible after a crash,
                 * to ensure their integrity.
                 */
                mutex_exit(&si->si_lock);
                if (saved_data == NULL) {
                        saved_data = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
                        error = rwfsblk(vp, B_READ, saved_data, lbn);
                        if (error) {
                                free(saved_data, M_UFSMNT);
                                saved_data = NULL;
                                mutex_enter(&si->si_lock);
                                break;
                        }
                }
                error = wrsnapblk(vp, saved_data, lbn);
                if (error == 0 && ip->i_nlink > 0 && mp->mnt_wapbl)
                        error = syncsnap(vp);
                mutex_enter(&si->si_lock);
                if (error)
                        break;
                if (gen != si->si_gen)
                        goto retry;
        }
        si->si_owner = NULL;
        mutex_exit(&si->si_lock);
        mutex_exit(&si->si_snaplock);
        if (saved_data)
                free(saved_data, M_UFSMNT);
        /*
         * If we have been unable to allocate a block in which to do
         * the copy, then return non-zero so that the fragment will
         * not be freed. Although space will be lost, the snapshot
         * will stay consistent.
         */
        return (error);
}

/*
 * Associate snapshot files when mounting.
 */
void
ffs_snapshot_mount(struct mount *mp)
{
        struct vnode *devvp = VFSTOUFS(mp)->um_devvp;
        struct fs *fs = VFSTOUFS(mp)->um_fs;
        struct lwp *l = curlwp;
        struct vnode *vp;
        struct inode *ip, *xp;
        struct snap_info *si;
        daddr_t snaplistsize, *snapblklist;
        int i, error, ns __unused, snaploc, loc;

        /*
         * No persistent snapshots on apple ufs file systems.
         */
        if (UFS_MPISAPPLEUFS(VFSTOUFS(mp)))
                return;

        si = VFSTOUFS(mp)->um_snapinfo;
        ns = UFS_FSNEEDSWAP(fs);
        /*
         * XXX The following needs to be set before ffs_truncate or
         * VOP_READ can be called.
         */
        mp->mnt_stat.f_iosize = fs->fs_bsize;
        /*
         * Process each snapshot listed in the superblock.
         */
        vp = NULL;
        mutex_enter(&si->si_lock);
        for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) {
                if (fs->fs_snapinum[snaploc] == 0)
                        break;
                if ((error = VFS_VGET(mp, fs->fs_snapinum[snaploc],
                    LK_EXCLUSIVE, &vp)) != 0) {
                        printf("ffs_snapshot_mount: vget failed %d\n", error);
                        continue;
                }
                ip = VTOI(vp);
                if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) !=
                    SF_SNAPSHOT) {
                        printf("ffs_snapshot_mount: non-snapshot inode %d\n",
                            fs->fs_snapinum[snaploc]);
                        vput(vp);
                        vp = NULL;
                        for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) {
                                if (fs->fs_snapinum[loc] == 0)
                                        break;
                                fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc];
                        }
                        fs->fs_snapinum[loc - 1] = 0;
                        snaploc--;
                        continue;
                }

                /*
                 * Read the block hints list. Use an empty list on
                 * read errors.
                 */
                error = vn_rdwr(UIO_READ, vp,
                    (void *)&snaplistsize, sizeof(snaplistsize),
                    ffs_lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)),
                    UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT|IO_ALTSEMANTICS,
                    l->l_cred, NULL, NULL);
                if (error) {
                        printf("ffs_snapshot_mount: read_1 failed %d\n", error);
                        snaplistsize = 1;
                } else
                        snaplistsize = ufs_rw64(snaplistsize, ns);
                snapblklist = malloc(
                    snaplistsize * sizeof(daddr_t), M_UFSMNT, M_WAITOK);
                if (error)
                        snapblklist[0] = 1;
                else {
                        error = vn_rdwr(UIO_READ, vp, (void *)snapblklist,
                            snaplistsize * sizeof(daddr_t),
                            ffs_lblktosize(fs, howmany(fs->fs_size, fs->fs_frag)),
                            UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT|IO_ALTSEMANTICS,
                            l->l_cred, NULL, NULL);
                        for (i = 0; i < snaplistsize; i++)
                                snapblklist[i] = ufs_rw64(snapblklist[i], ns);
                        if (error) {
                                printf("ffs_snapshot_mount: read_2 failed %d\n",
                                    error);
                                snapblklist[0] = 1;
                        }
                }
                ip->i_snapblklist = &snapblklist[0];

                /*
                 * Link it onto the active snapshot list.
                 */
                if (is_active_snapshot(si, ip))
                        panic("ffs_snapshot_mount: %"PRIu64" already on list",
                            ip->i_number);
                else
                        TAILQ_INSERT_TAIL(&si->si_snapshots, ip, i_nextsnap);
                vp->v_vflag |= VV_SYSTEM;
                VOP_UNLOCK(vp);
        }
        /*
         * No usable snapshots found.
         */
        if (vp == NULL) {
                mutex_exit(&si->si_lock);
                return;
        }
        /*
         * Attach the block hints list. We always want to
         * use the list from the newest snapshot.
        */
        xp = TAILQ_LAST(&si->si_snapshots, inodelst);
        si->si_snapblklist = xp->i_snapblklist;
        fscow_establish(mp, ffs_copyonwrite, devvp);
        si->si_gen++;
        mutex_exit(&si->si_lock);
}

/*
 * Disassociate snapshot files when unmounting.
 */
void
ffs_snapshot_unmount(struct mount *mp)
{
        struct vnode *devvp = VFSTOUFS(mp)->um_devvp;
        struct inode *xp;
        struct vnode *vp = NULL;
        struct snap_info *si;

        si = VFSTOUFS(mp)->um_snapinfo;
        mutex_enter(&si->si_lock);
        while ((xp = TAILQ_FIRST(&si->si_snapshots)) != 0) {
                vp = ITOV(xp);
                TAILQ_REMOVE(&si->si_snapshots, xp, i_nextsnap);
                if (xp->i_snapblklist == si->si_snapblklist)
                        si->si_snapblklist = NULL;
                free(xp->i_snapblklist, M_UFSMNT);
                if (xp->i_nlink > 0) {
                        si->si_gen++;
                        mutex_exit(&si->si_lock);
                        vrele(vp);
                        mutex_enter(&si->si_lock);
                }
        }
        si->si_gen++;
        mutex_exit(&si->si_lock);
        if (vp)
                fscow_disestablish(mp, ffs_copyonwrite, devvp);
}

/*
 * Check for need to copy block that is about to be written,
 * copying the block if necessary.
 */
static int
ffs_copyonwrite(void *v, struct buf *bp, bool data_valid)
{
        struct fs *fs;
        struct inode *ip;
        struct vnode *devvp = v, *vp = NULL;
        struct mount *mp = spec_node_getmountedfs(devvp);
        struct snap_info *si;
        void *saved_data = NULL;
        daddr_t lbn, blkno, *snapblklist;
        uint32_t gen;
        int lower, upper, mid, snapshot_locked = 0, error = 0;

        /*
         * Check for valid snapshots.
         */
        si = VFSTOUFS(mp)->um_snapinfo;
        mutex_enter(&si->si_lock);
        ip = TAILQ_FIRST(&si->si_snapshots);
        if (ip == NULL) {
                mutex_exit(&si->si_lock);
                return 0;
        }
        /*
         * First check to see if it is after the file system,
         * in the journal or in the preallocated list.
         * By doing these checks we avoid several potential deadlocks.
         */
        fs = ip->i_fs;
        lbn = ffs_fragstoblks(fs, FFS_DBTOFSB(fs, bp->b_blkno));
        if (bp->b_blkno >= FFS_FSBTODB(fs, fs->fs_size)) {
                mutex_exit(&si->si_lock);
                return 0;
        }
        if ((fs->fs_flags & FS_DOWAPBL) &&
            fs->fs_journal_location == UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM) {
                off_t blk_off, log_start, log_end;

                log_start = (off_t)fs->fs_journallocs[UFS_WAPBL_INFS_ADDR] *
                    fs->fs_journallocs[UFS_WAPBL_INFS_BLKSZ];
                log_end = log_start + fs->fs_journallocs[UFS_WAPBL_INFS_COUNT] *
                    fs->fs_journallocs[UFS_WAPBL_INFS_BLKSZ];
                blk_off = dbtob(bp->b_blkno);
                if (blk_off >= log_start && blk_off < log_end) {
                        mutex_exit(&si->si_lock);
                        return 0;
                }
        }
        snapblklist = si->si_snapblklist;
        upper = (snapblklist != NULL ? snapblklist[0] - 1 : 0);
        lower = 1;
        while (lower <= upper) {
                mid = (lower + upper) / 2;
                if (snapblklist[mid] == lbn)
                        break;
                if (snapblklist[mid] < lbn)
                        lower = mid + 1;
                else
                        upper = mid - 1;
        }
        if (lower <= upper) {
                mutex_exit(&si->si_lock);
                return 0;
        }
        /*
         * Not in the precomputed list, so check the snapshots.
         */
         if (si->si_owner != curlwp) {
                if (!mutex_tryenter(&si->si_snaplock)) {
                        mutex_exit(&si->si_lock);
                        mutex_enter(&si->si_snaplock);
                        mutex_enter(&si->si_lock);
                }
                si->si_owner = curlwp;
                snapshot_locked = 1;
         }
         if (data_valid && bp->b_bcount == fs->fs_bsize)
                saved_data = bp->b_data;
retry:
        gen = si->si_gen;
        TAILQ_FOREACH(ip, &si->si_snapshots, i_nextsnap) {
                vp = ITOV(ip);
                /*
                 * We ensure that everything of our own that needs to be
                 * copied will be done at the time that ffs_snapshot is
                 * called. Thus we can skip the check here which can
                 * deadlock in doing the lookup in ffs_balloc.
                 */
                if (bp->b_vp == vp)
                        continue;
                /*
                 * Check to see if block needs to be copied.
                 */
                if (lbn < UFS_NDADDR) {
                        blkno = db_get(ip, lbn);
                } else {
                        mutex_exit(&si->si_lock);
                        blkno = 0; /* XXX: GCC */
                        if ((error = snapblkaddr(vp, lbn, &blkno)) != 0) {
                                mutex_enter(&si->si_lock);
                                break;
                        }
                        mutex_enter(&si->si_lock);
                        if (gen != si->si_gen)
                                goto retry;
                }
                KASSERTMSG((blkno != BLK_SNAP || bp->b_lblkno < 0),
                    "ffs_copyonwrite: bad copy block: blkno %jd, lblkno %jd",
                    (intmax_t)blkno, (intmax_t)bp->b_lblkno);
                if (blkno != 0)
                        continue;

                if (curlwp == uvm.pagedaemon_lwp) {
                        error = ENOMEM;
                        break;
                }
                /* Only one level of recursion allowed. */
                KASSERT(snapshot_locked);
                /*
                 * Allocate the block into which to do the copy. Since
                 * multiple processes may all try to copy the same block,
                 * we have to recheck our need to do a copy if we sleep
                 * waiting for the lock.
                 *
                 * Because all snapshots on a filesystem share a single
                 * lock, we ensure that we will never be in competition
                 * with another process to allocate a block.
                 */
#ifdef DEBUG
                if (snapdebug) {
                        printf("Copyonwrite: snapino %llu lbn %" PRId64 " for ",
                            (unsigned long long)ip->i_number, lbn);
                        if (bp->b_vp == devvp)
                                printf("fs metadata");
                        else
                                printf("inum %llu", (unsigned long long)
                                    VTOI(bp->b_vp)->i_number);
                        printf(" lblkno %" PRId64 "\n", bp->b_lblkno);
                }
#endif
                /*
                 * If we have already read the old block contents, then
                 * simply copy them to the new block. Note that we need
                 * to synchronously write snapshots that have not been
                 * unlinked, and hence will be visible after a crash,
                 * to ensure their integrity.
                 */
                mutex_exit(&si->si_lock);
                if (saved_data == NULL) {
                        saved_data = malloc(fs->fs_bsize, M_UFSMNT, M_WAITOK);
                        error = rwfsblk(vp, B_READ, saved_data, lbn);
                        if (error) {
                                free(saved_data, M_UFSMNT);
                                saved_data = NULL;
                                mutex_enter(&si->si_lock);
                                break;
                        }
                }
                error = wrsnapblk(vp, saved_data, lbn);
                if (error == 0 && ip->i_nlink > 0 && mp->mnt_wapbl)
                        error = syncsnap(vp);
                mutex_enter(&si->si_lock);
                if (error)
                        break;
                if (gen != si->si_gen)
                        goto retry;
        }
        /*
         * Note that we need to synchronously write snapshots that
         * have not been unlinked, and hence will be visible after
         * a crash, to ensure their integrity.
         */
        if (snapshot_locked) {
                si->si_owner = NULL;
                mutex_exit(&si->si_lock);
                mutex_exit(&si->si_snaplock);
        } else
                mutex_exit(&si->si_lock);
        if (saved_data && saved_data != bp->b_data)
                free(saved_data, M_UFSMNT);
        return error;
}

/*
 * Read from a snapshot.
 */
int
ffs_snapshot_read(struct vnode *vp, struct uio *uio, int ioflag)
{
        struct inode *ip = VTOI(vp);
        struct fs *fs = ip->i_fs;
        struct snap_info *si = VFSTOUFS(vp->v_mount)->um_snapinfo;
        struct buf *bp;
        daddr_t lbn, nextlbn;
        off_t fsbytes, bytesinfile;
        long size, xfersize, blkoffset;
        int error;

        mutex_enter(&si->si_snaplock);

        if (ioflag & IO_ALTSEMANTICS)
                fsbytes = ip->i_size;
        else
                fsbytes = ffs_lfragtosize(fs, fs->fs_size);
        for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
                bytesinfile = fsbytes - uio->uio_offset;
                if (bytesinfile <= 0)
                        break;
                lbn = ffs_lblkno(fs, uio->uio_offset);
                nextlbn = lbn + 1;
                size = fs->fs_bsize;
                blkoffset = ffs_blkoff(fs, uio->uio_offset);
                xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid),
                    bytesinfile);

                if (ffs_lblktosize(fs, nextlbn + 1) >= fsbytes) {
                        if (ffs_lblktosize(fs, lbn) + size > fsbytes)
                                size = ffs_fragroundup(fs,
                                    fsbytes - ffs_lblktosize(fs, lbn));
                        error = bread(vp, lbn, size, 0, &bp);
                } else {
                        int nextsize = fs->fs_bsize;
                        error = breadn(vp, lbn,
                            size, &nextlbn, &nextsize, 1, 0, &bp);
                }
                if (error)
                        break;

                /*
                 * We should only get non-zero b_resid when an I/O error
                 * has occurred, which should cause us to break above.
                 * However, if the short read did not cause an error,
                 * then we want to ensure that we do not uiomove bad
                 * or uninitialized data.
                 */
                size -= bp->b_resid;
                if (size < blkoffset + xfersize) {
                        xfersize = size - blkoffset;
                        if (xfersize <= 0)
                                break;
                }
                error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
                if (error)
                        break;
                brelse(bp, BC_AGE);
        }
        if (bp != NULL)
                brelse(bp, BC_AGE);

        mutex_exit(&si->si_snaplock);
        return error;
}

/*
 * Lookup a snapshots data block address.
 * Simpler than UFS_BALLOC() as we know all metadata is already allocated
 * and safe even for the pagedaemon where we cannot bread().
 */
static int
snapblkaddr(struct vnode *vp, daddr_t lbn, daddr_t *res)
{
        struct indir indirs[UFS_NIADDR + 2];
        struct inode *ip = VTOI(vp);
        struct fs *fs = ip->i_fs;
        struct buf *bp;
        int error, num;

        KASSERT(lbn >= 0);

        if (lbn < UFS_NDADDR) {
                *res = db_get(ip, lbn);
                return 0;
        }
        if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
                return error;
        if (curlwp == uvm.pagedaemon_lwp) {
                mutex_enter(&bufcache_lock);
                bp = incore(vp, indirs[num-1].in_lbn);
                if (bp && (bp->b_oflags & (BO_DONE | BO_DELWRI))) {
                        *res = idb_get(ip, bp->b_data, indirs[num-1].in_off);
                        error = 0;
                } else
                        error = ENOMEM;
                mutex_exit(&bufcache_lock);
                return error;
        }
        error = bread(vp, indirs[num-1].in_lbn, fs->fs_bsize, 0, &bp);
        if (error == 0) {
                *res = idb_get(ip, bp->b_data, indirs[num-1].in_off);
                brelse(bp, 0);
        }

        return error;
}

/*
 * Read or write the specified block of the filesystem vp resides on
 * from or to the disk bypassing the buffer cache.
 */
static int
rwfsblk(struct vnode *vp, int flags, void *data, daddr_t lbn)
{
        int error;
        struct inode *ip = VTOI(vp);
        struct fs *fs = ip->i_fs;
        struct buf *nbp;

        nbp = getiobuf(NULL, true);
        nbp->b_flags = flags;
        nbp->b_bcount = nbp->b_bufsize = fs->fs_bsize;
        nbp->b_error = 0;
        nbp->b_data = data;
        nbp->b_blkno = nbp->b_rawblkno = FFS_FSBTODB(fs, ffs_blkstofrags(fs, lbn));
        nbp->b_proc = NULL;
        nbp->b_dev = ip->i_devvp->v_rdev;
        SET(nbp->b_cflags, BC_BUSY);        /* mark buffer busy */

        bdev_strategy(nbp);

        error = biowait(nbp);

        putiobuf(nbp);

        return error;
}

/*
 * Write all dirty buffers to disk and invalidate them.
 */
static int
syncsnap(struct vnode *vp)
{
        int error;
        buf_t *bp;
        struct fs *fs = VTOI(vp)->i_fs;

        mutex_enter(&bufcache_lock);
        while ((bp = LIST_FIRST(&vp->v_dirtyblkhd))) {
                error = bbusy(bp, false, 0, NULL);
                if (error == EPASSTHROUGH)
                        continue;
                else if (error != 0) {
                        mutex_exit(&bufcache_lock);
                        return error;
                }
                KASSERT(bp->b_bcount == fs->fs_bsize);
                mutex_exit(&bufcache_lock);
                error = rwfsblk(vp, B_WRITE, bp->b_data,
                    ffs_fragstoblks(fs, FFS_DBTOFSB(fs, bp->b_blkno)));
                brelse(bp, BC_INVAL | BC_VFLUSH);
                if (error)
                        return error;
                mutex_enter(&bufcache_lock);
        }
        mutex_exit(&bufcache_lock);

        return 0;
}

/*
 * Write the specified block to a snapshot.
 */
static int
wrsnapblk(struct vnode *vp, void *data, daddr_t lbn)
{
        struct inode *ip = VTOI(vp);
        struct fs *fs = ip->i_fs;
        struct buf *bp;
        int error;

        error = ffs_balloc(vp, ffs_lblktosize(fs, (off_t)lbn), fs->fs_bsize,
            FSCRED, (ip->i_nlink > 0 ? B_SYNC : 0), &bp);
        if (error)
                return error;
        memcpy(bp->b_data, data, fs->fs_bsize);
        if (ip->i_nlink > 0)
                error = bwrite(bp);
        else
                bawrite(bp);

        return error;
}

/*
 * Check if this inode is present on the active snapshot list.
 * Must be called with snapinfo locked.
 */
static inline bool
is_active_snapshot(struct snap_info *si, struct inode *ip)
{
        struct inode *xp;

        KASSERT(mutex_owned(&si->si_lock));

        TAILQ_FOREACH(xp, &si->si_snapshots, i_nextsnap)
                if (xp == ip)
                        return true;
        return false;
}

/*
 * Get/Put direct block from inode or buffer containing disk addresses. Take
 * care for fs type (UFS1/UFS2) and byte swapping. These functions should go
 * into a global include.
 */
static inline daddr_t
db_get(struct inode *ip, int loc)
{
        if (ip->i_ump->um_fstype == UFS1)
                return ufs_rw32(ip->i_ffs1_db[loc], UFS_IPNEEDSWAP(ip));
        else
                return ufs_rw64(ip->i_ffs2_db[loc], UFS_IPNEEDSWAP(ip));
}

static inline void
db_assign(struct inode *ip, int loc, daddr_t val)
{
        if (ip->i_ump->um_fstype == UFS1)
                ip->i_ffs1_db[loc] = ufs_rw32(val, UFS_IPNEEDSWAP(ip));
        else
                ip->i_ffs2_db[loc] = ufs_rw64(val, UFS_IPNEEDSWAP(ip));
}

__unused static inline daddr_t
ib_get(struct inode *ip, int loc)
{
        if (ip->i_ump->um_fstype == UFS1)
                return ufs_rw32(ip->i_ffs1_ib[loc], UFS_IPNEEDSWAP(ip));
        else
                return ufs_rw64(ip->i_ffs2_ib[loc], UFS_IPNEEDSWAP(ip));
}

static inline daddr_t
idb_get(struct inode *ip, void *bf, int loc)
{
        if (ip->i_ump->um_fstype == UFS1)
                return ufs_rw32(((int32_t *)(bf))[loc], UFS_IPNEEDSWAP(ip));
        else
                return ufs_rw64(((int64_t *)(bf))[loc], UFS_IPNEEDSWAP(ip));
}

static inline void
idb_assign(struct inode *ip, void *bf, int loc, daddr_t val)
{
        if (ip->i_ump->um_fstype == UFS1)
                ((int32_t *)(bf))[loc] = ufs_rw32(val, UFS_IPNEEDSWAP(ip));
        else
                ((int64_t *)(bf))[loc] = ufs_rw64(val, UFS_IPNEEDSWAP(ip));
}



























































































 1306 
 1306 
















  410 
















  656 
  656 

  654 






  652 














    8 






    8 














  542 




















   73 






















  140 
























   48 























































































































































































































 1124 
  374 





























  374 
  374 
  373 
  374 
  371 


    4 

  373 














 2189 
  155 



















  155 
  154 
  154 
  156 

  153 












 1126 












 2552 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
/*        $NetBSD: kern_condvar.c,v 1.54 2022/06/29 22:27:01 riastradh Exp $        */

/*-
 * Copyright (c) 2006, 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Kernel condition variable implementation.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_condvar.c,v 1.54 2022/06/29 22:27:01 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lwp.h>
#include <sys/condvar.h>
#include <sys/sleepq.h>
#include <sys/lockdebug.h>
#include <sys/cpu.h>
#include <sys/kernel.h>

/*
 * Accessors for the private contents of the kcondvar_t data type.
 *
 *        cv_opaque[0]        sleepq_t
 *        cv_opaque[1]        description for ps(1)
 *
 * cv_opaque[0] is protected by the interlock passed to cv_wait() (enqueue
 * only), and the sleep queue lock acquired with sleepq_hashlock() (enqueue
 * and dequeue).
 *
 * cv_opaque[1] (the wmesg) is static and does not change throughout the life
 * of the CV.
 */
#define        CV_SLEEPQ(cv)                ((sleepq_t *)(cv)->cv_opaque)
#define        CV_WMESG(cv)                ((const char *)(cv)->cv_opaque[1])
#define        CV_SET_WMESG(cv, v)         (cv)->cv_opaque[1] = __UNCONST(v)

#define        CV_DEBUG_P(cv)        (CV_WMESG(cv) != nodebug)
#define        CV_RA                ((uintptr_t)__builtin_return_address(0))

static void                cv_unsleep(lwp_t *, bool);
static inline void        cv_wakeup_one(kcondvar_t *);
static inline void        cv_wakeup_all(kcondvar_t *);

syncobj_t cv_syncobj = {
        .sobj_flag        = SOBJ_SLEEPQ_SORTED,
        .sobj_unsleep        = cv_unsleep,
        .sobj_changepri        = sleepq_changepri,
        .sobj_lendpri        = sleepq_lendpri,
        .sobj_owner        = syncobj_noowner,
};

static const char deadcv[] = "deadcv";

/*
 * cv_init:
 *
 *        Initialize a condition variable for use.
 */
void
cv_init(kcondvar_t *cv, const char *wmesg)
{

        KASSERT(wmesg != NULL);
        CV_SET_WMESG(cv, wmesg);
        sleepq_init(CV_SLEEPQ(cv));
}

/*
 * cv_destroy:
 *
 *        Tear down a condition variable.
 */
void
cv_destroy(kcondvar_t *cv)
{

        sleepq_destroy(CV_SLEEPQ(cv));
#ifdef DIAGNOSTIC
        KASSERT(cv_is_valid(cv));
        KASSERT(!cv_has_waiters(cv));
        CV_SET_WMESG(cv, deadcv);
#endif
}

/*
 * cv_enter:
 *
 *        Look up and lock the sleep queue corresponding to the given
 *        condition variable, and increment the number of waiters.
 */
static inline void
cv_enter(kcondvar_t *cv, kmutex_t *mtx, lwp_t *l, bool catch_p)
{
        sleepq_t *sq;
        kmutex_t *mp;

        KASSERT(cv_is_valid(cv));
        KASSERT(!cpu_intr_p());
        KASSERT((l->l_pflag & LP_INTR) == 0 || panicstr != NULL);

        l->l_kpriority = true;
        mp = sleepq_hashlock(cv);
        sq = CV_SLEEPQ(cv);
        sleepq_enter(sq, l, mp);
        sleepq_enqueue(sq, cv, CV_WMESG(cv), &cv_syncobj, catch_p);
        mutex_exit(mtx);
        KASSERT(cv_has_waiters(cv));
}

/*
 * cv_unsleep:
 *
 *        Remove an LWP from the condition variable and sleep queue.  This
 *        is called when the LWP has not been awoken normally but instead
 *        interrupted: for example, when a signal is received.  Must be
 *        called with the LWP locked.  Will unlock if "unlock" is true.
 */
static void
cv_unsleep(lwp_t *l, bool unlock)
{
        kcondvar_t *cv __diagused;

        cv = (kcondvar_t *)(uintptr_t)l->l_wchan;

        KASSERT(l->l_wchan == (wchan_t)cv);
        KASSERT(l->l_sleepq == CV_SLEEPQ(cv));
        KASSERT(cv_is_valid(cv));
        KASSERT(cv_has_waiters(cv));

        sleepq_unsleep(l, unlock);
}

/*
 * cv_wait:
 *
 *        Wait non-interruptably on a condition variable until awoken.
 */
void
cv_wait(kcondvar_t *cv, kmutex_t *mtx)
{
        lwp_t *l = curlwp;

        KASSERT(mutex_owned(mtx));

        cv_enter(cv, mtx, l, false);
        (void)sleepq_block(0, false, &cv_syncobj);
        mutex_enter(mtx);
}

/*
 * cv_wait_sig:
 *
 *        Wait on a condition variable until a awoken or a signal is received. 
 *        Will also return early if the process is exiting.  Returns zero if
 *        awoken normally, ERESTART if a signal was received and the system
 *        call is restartable, or EINTR otherwise.
 */
int
cv_wait_sig(kcondvar_t *cv, kmutex_t *mtx)
{
        lwp_t *l = curlwp;
        int error;

        KASSERT(mutex_owned(mtx));

        cv_enter(cv, mtx, l, true);
        error = sleepq_block(0, true, &cv_syncobj);
        mutex_enter(mtx);
        return error;
}

/*
 * cv_timedwait:
 *
 *        Wait on a condition variable until awoken or the specified timeout
 *        expires.  Returns zero if awoken normally or EWOULDBLOCK if the
 *        timeout expired.
 *
 *        timo is a timeout in ticks.  timo = 0 specifies an infinite timeout.
 */
int
cv_timedwait(kcondvar_t *cv, kmutex_t *mtx, int timo)
{
        lwp_t *l = curlwp;
        int error;

        KASSERT(mutex_owned(mtx));

        cv_enter(cv, mtx, l, false);
        error = sleepq_block(timo, false, &cv_syncobj);
        mutex_enter(mtx);
        return error;
}

/*
 * cv_timedwait_sig:
 *
 *        Wait on a condition variable until a timeout expires, awoken or a
 *        signal is received.  Will also return early if the process is
 *        exiting.  Returns zero if awoken normally, EWOULDBLOCK if the
 *        timeout expires, ERESTART if a signal was received and the system
 *        call is restartable, or EINTR otherwise.
 *
 *        timo is a timeout in ticks.  timo = 0 specifies an infinite timeout.
 */
int
cv_timedwait_sig(kcondvar_t *cv, kmutex_t *mtx, int timo)
{
        lwp_t *l = curlwp;
        int error;

        KASSERT(mutex_owned(mtx));

        cv_enter(cv, mtx, l, true);
        error = sleepq_block(timo, true, &cv_syncobj);
        mutex_enter(mtx);
        return error;
}

/*
 * Given a number of seconds, sec, and 2^64ths of a second, frac, we
 * want a number of ticks for a timeout:
 *
 *        timo = hz*(sec + frac/2^64)
 *             = hz*sec + hz*frac/2^64
 *             = hz*sec + hz*(frachi*2^32 + fraclo)/2^64
 *             = hz*sec + hz*frachi/2^32 + hz*fraclo/2^64,
 *
 * where frachi is the high 32 bits of frac and fraclo is the
 * low 32 bits.
 *
 * We assume hz < INT_MAX/2 < UINT32_MAX, so
 *
 *        hz*fraclo/2^64 < fraclo*2^32/2^64 <= 1,
 *
 * since fraclo < 2^32.
 *
 * We clamp the result at INT_MAX/2 for a timeout in ticks, since we
 * can't represent timeouts higher than INT_MAX in cv_timedwait, and
 * spurious wakeup is OK.  Moreover, we don't want to wrap around,
 * because we compute end - start in ticks in order to compute the
 * remaining timeout, and that difference cannot wrap around, so we use
 * a timeout less than INT_MAX.  Using INT_MAX/2 provides plenty of
 * margin for paranoia and will exceed most waits in practice by far.
 */
static unsigned
bintime2timo(const struct bintime *bt)
{

        KASSERT(hz < INT_MAX/2);
        CTASSERT(INT_MAX/2 < UINT32_MAX);
        if (bt->sec > ((INT_MAX/2)/hz))
                return INT_MAX/2;
        if ((hz*(bt->frac >> 32) >> 32) > (INT_MAX/2 - hz*bt->sec))
                return INT_MAX/2;

        return hz*bt->sec + (hz*(bt->frac >> 32) >> 32);
}

/*
 * timo is in units of ticks.  We want units of seconds and 2^64ths of
 * a second.  We know hz = 1 sec/tick, and 2^64 = 1 sec/(2^64th of a
 * second), from which we can conclude 2^64 / hz = 1 (2^64th of a
 * second)/tick.  So for the fractional part, we compute
 *
 *        frac = rem * 2^64 / hz
 *             = ((rem * 2^32) / hz) * 2^32
 *
 * Using truncating integer division instead of real division will
 * leave us with only about 32 bits of precision, which means about
 * 1/4-nanosecond resolution, which is good enough for our purposes.
 */
static struct bintime
timo2bintime(unsigned timo)
{

        return (struct bintime) {
                .sec = timo / hz,
                .frac = (((uint64_t)(timo % hz) << 32)/hz << 32),
        };
}

/*
 * cv_timedwaitbt:
 *
 *        Wait on a condition variable until awoken or the specified
 *        timeout expires.  Returns zero if awoken normally or
 *        EWOULDBLOCK if the timeout expires.
 *
 *        On entry, bt is a timeout in bintime.  cv_timedwaitbt subtracts
 *        the time slept, so on exit, bt is the time remaining after
 *        sleeping, possibly negative if the complete time has elapsed.
 *        No infinite timeout; use cv_wait_sig instead.
 *
 *        epsilon is a requested maximum error in timeout (excluding
 *        spurious wakeups).  Currently not used, will be used in the
 *        future to choose between low- and high-resolution timers.
 *        Actual wakeup time will be somewhere in [t, t + max(e, r) + s)
 *        where r is the finest resolution of clock available and s is
 *        scheduling delays for scheduler overhead and competing threads.
 *        Time is measured by the interrupt source implementing the
 *        timeout, not by another timecounter.
 */
int
cv_timedwaitbt(kcondvar_t *cv, kmutex_t *mtx, struct bintime *bt,
    const struct bintime *epsilon __diagused)
{
        struct bintime slept;
        unsigned start, end;
        int timo;
        int error;

        KASSERTMSG(bt->sec >= 0, "negative timeout");
        KASSERTMSG(epsilon != NULL, "specify maximum requested delay");

        /* If there's nothing left to wait, time out.  */
        if (bt->sec == 0 && bt->frac == 0)
                return EWOULDBLOCK;

        /* Convert to ticks, but clamp to be >=1.  */
        timo = bintime2timo(bt);
        KASSERTMSG(timo >= 0, "negative ticks: %d", timo);
        if (timo == 0)
                timo = 1;

        /*
         * getticks() is technically int, but nothing special
         * happens instead of overflow, so we assume two's-complement
         * wraparound and just treat it as unsigned.
         */
        start = getticks();
        error = cv_timedwait(cv, mtx, timo);
        end = getticks();

        /*
         * Set it to the time left, or zero, whichever is larger.  We
         * do not fail with EWOULDBLOCK here because this may have been
         * an explicit wakeup, so the caller needs to check before they
         * give up or else cv_signal would be lost.
         */
        slept = timo2bintime(end - start);
        if (bintimecmp(bt, &slept, <=)) {
                bt->sec = 0;
                bt->frac = 0;
        } else {
                /* bt := bt - slept */
                bintime_sub(bt, &slept);
        }

        return error;
}

/*
 * cv_timedwaitbt_sig:
 *
 *        Wait on a condition variable until awoken, the specified
 *        timeout expires, or interrupted by a signal.  Returns zero if
 *        awoken normally, EWOULDBLOCK if the timeout expires, or
 *        EINTR/ERESTART if interrupted by a signal.
 *
 *        On entry, bt is a timeout in bintime.  cv_timedwaitbt_sig
 *        subtracts the time slept, so on exit, bt is the time remaining
 *        after sleeping.  No infinite timeout; use cv_wait instead.
 *
 *        epsilon is a requested maximum error in timeout (excluding
 *        spurious wakeups).  Currently not used, will be used in the
 *        future to choose between low- and high-resolution timers.
 */
int
cv_timedwaitbt_sig(kcondvar_t *cv, kmutex_t *mtx, struct bintime *bt,
    const struct bintime *epsilon __diagused)
{
        struct bintime slept;
        unsigned start, end;
        int timo;
        int error;

        KASSERTMSG(bt->sec >= 0, "negative timeout");
        KASSERTMSG(epsilon != NULL, "specify maximum requested delay");

        /* If there's nothing left to wait, time out.  */
        if (bt->sec == 0 && bt->frac == 0)
                return EWOULDBLOCK;

        /* Convert to ticks, but clamp to be >=1.  */
        timo = bintime2timo(bt);
        KASSERTMSG(timo >= 0, "negative ticks: %d", timo);
        if (timo == 0)
                timo = 1;

        /*
         * getticks() is technically int, but nothing special
         * happens instead of overflow, so we assume two's-complement
         * wraparound and just treat it as unsigned.
         */
        start = getticks();
        error = cv_timedwait_sig(cv, mtx, timo);
        end = getticks();

        /*
         * Set it to the time left, or zero, whichever is larger.  We
         * do not fail with EWOULDBLOCK here because this may have been
         * an explicit wakeup, so the caller needs to check before they
         * give up or else cv_signal would be lost.
         */
        slept = timo2bintime(end - start);
        if (bintimecmp(bt, &slept, <=)) {
                bt->sec = 0;
                bt->frac = 0;
        } else {
                /* bt := bt - slept */
                bintime_sub(bt, &slept);
        }

        return error;
}

/*
 * cv_signal:
 *
 *        Wake the highest priority LWP waiting on a condition variable.
 *        Must be called with the interlocking mutex held.
 */
void
cv_signal(kcondvar_t *cv)
{

        KASSERT(cv_is_valid(cv));

        if (__predict_false(!LIST_EMPTY(CV_SLEEPQ(cv))))
                cv_wakeup_one(cv);
}

/*
 * cv_wakeup_one:
 *
 *        Slow path for cv_signal().  Deliberately marked __noinline to
 *        prevent the compiler pulling it in to cv_signal(), which adds
 *        extra prologue and epilogue code.
 */
static __noinline void
cv_wakeup_one(kcondvar_t *cv)
{
        sleepq_t *sq;
        kmutex_t *mp;
        lwp_t *l;

        /*
         * Keep waking LWPs until a non-interruptable waiter is found.  An
         * interruptable waiter could fail to do something useful with the
         * wakeup due to an error return from cv_[timed]wait_sig(), and the
         * caller of cv_signal() may not expect such a scenario.
         *
         * This isn't a problem for non-interruptable waits (untimed and
         * timed), because if such a waiter is woken here it will not return
         * an error.
         */
        mp = sleepq_hashlock(cv);
        sq = CV_SLEEPQ(cv);
        while ((l = LIST_FIRST(sq)) != NULL) {
                KASSERT(l->l_sleepq == sq);
                KASSERT(l->l_mutex == mp);
                KASSERT(l->l_wchan == cv);
                if ((l->l_flag & LW_SINTR) == 0) {
                        sleepq_remove(sq, l);
                        break;
                } else
                        sleepq_remove(sq, l);
        }
        mutex_spin_exit(mp);
}

/*
 * cv_broadcast:
 *
 *        Wake all LWPs waiting on a condition variable.  Must be called
 *        with the interlocking mutex held.
 */
void
cv_broadcast(kcondvar_t *cv)
{

        KASSERT(cv_is_valid(cv));

        if (__predict_false(!LIST_EMPTY(CV_SLEEPQ(cv))))  
                cv_wakeup_all(cv);
}

/*
 * cv_wakeup_all:
 *
 *        Slow path for cv_broadcast().  Deliberately marked __noinline to
 *        prevent the compiler pulling it in to cv_broadcast(), which adds
 *        extra prologue and epilogue code.
 */
static __noinline void
cv_wakeup_all(kcondvar_t *cv)
{
        sleepq_t *sq;
        kmutex_t *mp;
        lwp_t *l;

        mp = sleepq_hashlock(cv);
        sq = CV_SLEEPQ(cv);
        while ((l = LIST_FIRST(sq)) != NULL) {
                KASSERT(l->l_sleepq == sq);
                KASSERT(l->l_mutex == mp);
                KASSERT(l->l_wchan == cv);
                sleepq_remove(sq, l);
        }
        mutex_spin_exit(mp);
}

/*
 * cv_has_waiters:
 *
 *        For diagnostic assertions: return non-zero if a condition
 *        variable has waiters.
 */
bool
cv_has_waiters(kcondvar_t *cv)
{

        return !LIST_EMPTY(CV_SLEEPQ(cv));
}

/*
 * cv_is_valid:
 *
 *        For diagnostic assertions: return non-zero if a condition
 *        variable appears to be valid.  No locks need be held.
 */
bool
cv_is_valid(kcondvar_t *cv)
{

        return CV_WMESG(cv) != deadcv && CV_WMESG(cv) != NULL;
}












































































  946 















  626 

















  336 

  336 



  335 













  105 
  105 

  105 













  233 
  232 




  233 
  233 




  232 














  399 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
/*        $NetBSD: kern_rwlock_obj.c,v 1.7 2022/04/09 23:38:33 riastradh Exp $        */

/*-
 * Copyright (c) 2008, 2009, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_rwlock_obj.c,v 1.7 2022/04/09 23:38:33 riastradh Exp $");

#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/pool.h>
#include <sys/rwlock.h>

/* Mutex cache */
#define        RW_OBJ_MAGIC        0x85d3c85d
struct krwobj {
        krwlock_t        ro_lock;
        u_int                ro_magic;
        u_int                ro_refcnt;
};

static int        rw_obj_ctor(void *, void *, int);

static pool_cache_t        rw_obj_cache        __read_mostly;

/*
 * rw_obj_init:
 *
 *        Initialize the rw object store.
 */
void
rw_obj_init(void)
{

        rw_obj_cache = pool_cache_init(sizeof(struct krwobj),
            coherency_unit, 0, 0, "rwlock", NULL, IPL_NONE, rw_obj_ctor,
            NULL, NULL);
}

/*
 * rw_obj_ctor:
 *
 *        Initialize a new lock for the cache.
 */
static int
rw_obj_ctor(void *arg, void *obj, int flags)
{
        struct krwobj * ro = obj;

        ro->ro_magic = RW_OBJ_MAGIC;

        return 0;
}

/*
 * rw_obj_alloc:
 *
 *        Allocate a single lock object, waiting for memory if needed.
 */
krwlock_t *
rw_obj_alloc(void)
{
        struct krwobj *ro;
        extern void _rw_init(krwlock_t *, uintptr_t);

        ro = pool_cache_get(rw_obj_cache, PR_WAITOK);
        _rw_init(&ro->ro_lock, (uintptr_t)__builtin_return_address(0));
        ro->ro_refcnt = 1;

        return (krwlock_t *)ro;
}

/*
 * rw_obj_tryalloc:
 *
 *        Allocate a single lock object, but fail if no memory is available.
 */
krwlock_t *
rw_obj_tryalloc(void)
{
        struct krwobj *ro;
        extern void _rw_init(krwlock_t *, uintptr_t);

        ro = pool_cache_get(rw_obj_cache, PR_NOWAIT);
        if (__predict_true(ro != NULL)) {
                _rw_init(&ro->ro_lock, (uintptr_t)__builtin_return_address(0));
                ro->ro_refcnt = 1;
        }

        return (krwlock_t *)ro;
}

/*
 * rw_obj_hold:
 *
 *        Add a single reference to a lock object.  A reference to the object
 *        must already be held, and must be held across this call.
 */
void
rw_obj_hold(krwlock_t *lock)
{
        struct krwobj *ro = (struct krwobj *)lock;

        KASSERT(ro->ro_magic == RW_OBJ_MAGIC);
        KASSERT(ro->ro_refcnt > 0);

        atomic_inc_uint(&ro->ro_refcnt);
}

/*
 * rw_obj_free:
 *
 *        Drop a reference from a lock object.  If the last reference is being
 *        dropped, free the object and return true.  Otherwise, return false.
 */
bool
rw_obj_free(krwlock_t *lock)
{
        struct krwobj *ro = (struct krwobj *)lock;

        KASSERT(ro->ro_magic == RW_OBJ_MAGIC);
        KASSERT(ro->ro_refcnt > 0);

#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_release();
#endif
        if (atomic_dec_uint_nv(&ro->ro_refcnt) > 0) {
                return false;
        }
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_acquire();
#endif
        rw_destroy(&ro->ro_lock);
        pool_cache_put(rw_obj_cache, ro);
        return true;
}

/*
 * rw_obj_refcnt:
 *
 *        Return the reference count for a lock object.
 */
u_int
rw_obj_refcnt(krwlock_t *lock)
{
        struct krwobj *ro = (struct krwobj *)lock;

        return ro->ro_refcnt;
}







































































































































































































































































































































    2 



    2 



    2 





































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
/*        $NetBSD: umidi.c,v 1.89 2022/07/01 01:08:06 riastradh Exp $        */

/*
 * Copyright (c) 2001, 2012, 2014 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Takuya SHIOZAKI (tshiozak@NetBSD.org), (full-size transfers, extended
 * hw_if) Chapman Flack (chap@NetBSD.org), and Matthew R. Green
 * (mrg@eterna.com.au).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: umidi.c,v 1.89 2022/07/01 01:08:06 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/conf.h>
#include <sys/file.h>
#include <sys/select.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/poll.h>
#include <sys/intr.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>

#include <dev/usb/usbdevs.h>
#include <dev/usb/umidi_quirks.h>
#include <dev/midi_if.h>

/* Jack Descriptor */
#define UMIDI_MS_HEADER        0x01
#define UMIDI_IN_JACK        0x02
#define UMIDI_OUT_JACK        0x03

/* Jack Type */
#define UMIDI_EMBEDDED        0x01
#define UMIDI_EXTERNAL        0x02

/* generic, for iteration */
typedef struct {
        uByte                bLength;
        uByte                bDescriptorType;
        uByte                bDescriptorSubtype;
} UPACKED umidi_cs_descriptor_t;

typedef struct {
        uByte                bLength;
        uByte                bDescriptorType;
        uByte                bDescriptorSubtype;
        uWord                bcdMSC;
        uWord                wTotalLength;
} UPACKED umidi_cs_interface_descriptor_t;
#define UMIDI_CS_INTERFACE_DESCRIPTOR_SIZE 7

typedef struct {
        uByte                bLength;
        uByte                bDescriptorType;
        uByte                bDescriptorSubtype;
        uByte                bNumEmbMIDIJack;
} UPACKED umidi_cs_endpoint_descriptor_t;
#define UMIDI_CS_ENDPOINT_DESCRIPTOR_SIZE 4

typedef struct {
        uByte                bLength;
        uByte                bDescriptorType;
        uByte                bDescriptorSubtype;
        uByte                bJackType;
        uByte                bJackID;
} UPACKED umidi_jack_descriptor_t;
#define        UMIDI_JACK_DESCRIPTOR_SIZE        5


#define TO_D(p) ((usb_descriptor_t *)(p))
#define NEXT_D(desc) TO_D((char *)(desc)+(desc)->bLength)
#define TO_IFD(desc) ((usb_interface_descriptor_t *)(desc))
#define TO_CSIFD(desc) ((umidi_cs_interface_descriptor_t *)(desc))
#define TO_EPD(desc) ((usb_endpoint_descriptor_t *)(desc))
#define TO_CSEPD(desc) ((umidi_cs_endpoint_descriptor_t *)(desc))


#define UMIDI_PACKET_SIZE 4

/*
 * hierarchie
 *
 * <-- parent               child -->
 *
 * umidi(sc) -> endpoint -> jack   <- (dynamically assignable) - mididev
 *           ^         |    ^            |
 *           +-----+    +-----+
 */

/* midi device */
struct umidi_mididev {
        struct umidi_softc        *sc;
        device_t                mdev;
        /* */
        struct umidi_jack        *in_jack;
        struct umidi_jack        *out_jack;
        char                        *label;
        size_t                        label_len;
        /* */
        int                        opened;
        int                        closing;
        int                        flags;
};

/* Jack Information */
struct umidi_jack {
        struct umidi_endpoint        *endpoint;
        /* */
        int                        cable_number;
        void                        *arg;
        int                        bound;
        int                        opened;
        unsigned char                *midiman_ppkt;
        union {
                struct {
                        void                        (*intr)(void *);
                } out;
                struct {
                        void                        (*intr)(void *, int);
                } in;
        } u;
};

#define UMIDI_MAX_EPJACKS        16
typedef unsigned char (*umidi_packet_bufp)[UMIDI_PACKET_SIZE];
/* endpoint data */
struct umidi_endpoint {
        struct umidi_softc        *sc;
        /* */
        int                        addr;
        struct usbd_pipe        *pipe;
        struct usbd_xfer        *xfer;
        umidi_packet_bufp        buffer;
        umidi_packet_bufp        next_slot;
        uint32_t               buffer_size;
        int                        num_scheduled;
        int                        num_open;
        int                        num_jacks;
        int                        soliciting;
        void                        *solicit_cookie;
        int                        armed;
        struct umidi_jack        *jacks[UMIDI_MAX_EPJACKS];
        uint16_t                this_schedule; /* see UMIDI_MAX_EPJACKS */
        uint16_t                next_schedule;
};

/* software context */
struct umidi_softc {
        device_t                sc_dev;
        struct usbd_device        *sc_udev;
        struct usbd_interface        *sc_iface;
        const struct umidi_quirk        *sc_quirk;

        int                        sc_dying;

        int                        sc_out_num_jacks;
        struct umidi_jack        *sc_out_jacks;
        int                        sc_in_num_jacks;
        struct umidi_jack        *sc_in_jacks;
        struct umidi_jack        *sc_jacks;

        int                        sc_num_mididevs;
        struct umidi_mididev        *sc_mididevs;

        int                        sc_out_num_endpoints;
        struct umidi_endpoint        *sc_out_ep;
        int                        sc_in_num_endpoints;
        struct umidi_endpoint        *sc_in_ep;
        struct umidi_endpoint        *sc_endpoints;
        size_t                        sc_endpoints_len;
        int                        cblnums_global;

        kmutex_t                sc_lock;
        kcondvar_t                sc_cv;
        kcondvar_t                sc_detach_cv;

        int                        sc_refcnt;
};

#ifdef UMIDI_DEBUG
#define DPRINTF(x)        if (umididebug) printf x
#define DPRINTFN(n,x)        if (umididebug >= (n)) printf x
#include <sys/time.h>
static struct timeval umidi_tv;
int        umididebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

#define UMIDI_ENDPOINT_SIZE(sc)        (sizeof(*(sc)->sc_out_ep) * \
                                 (sc->sc_out_num_endpoints + \
                                  sc->sc_in_num_endpoints))


static int umidi_open(void *, int,
                      void (*)(void *, int), void (*)(void *), void *);
static void umidi_close(void *);
static int umidi_channelmsg(void *, int, int, u_char *, int);
static int umidi_commonmsg(void *, int, u_char *, int);
static int umidi_sysex(void *, u_char *, int);
static int umidi_rtmsg(void *, int);
static void umidi_getinfo(void *, struct midi_info *);
static void umidi_get_locks(void *, kmutex_t **, kmutex_t **);

static usbd_status alloc_pipe(struct umidi_endpoint *);
static void free_pipe(struct umidi_endpoint *);

static usbd_status alloc_all_endpoints(struct umidi_softc *);
static void free_all_endpoints(struct umidi_softc *);

static usbd_status alloc_all_jacks(struct umidi_softc *);
static void free_all_jacks(struct umidi_softc *);
static usbd_status bind_jacks_to_mididev(struct umidi_softc *,
                                         struct umidi_jack *,
                                         struct umidi_jack *,
                                         struct umidi_mididev *);
static void unbind_jacks_from_mididev(struct umidi_mididev *);
static void unbind_all_jacks(struct umidi_softc *);
static usbd_status assign_all_jacks_automatically(struct umidi_softc *);
static usbd_status open_out_jack(struct umidi_jack *, void *,
                                 void (*)(void *));
static usbd_status open_in_jack(struct umidi_jack *, void *,
                                void (*)(void *, int));
static void close_out_jack(struct umidi_jack *);
static void close_in_jack(struct umidi_jack *);

static usbd_status attach_mididev(struct umidi_softc *, struct umidi_mididev *);
static usbd_status detach_mididev(struct umidi_mididev *, int);
static void deactivate_mididev(struct umidi_mididev *);
static usbd_status alloc_all_mididevs(struct umidi_softc *, int);
static void free_all_mididevs(struct umidi_softc *);
static usbd_status attach_all_mididevs(struct umidi_softc *);
static usbd_status detach_all_mididevs(struct umidi_softc *, int);
static void deactivate_all_mididevs(struct umidi_softc *);
static void describe_mididev(struct umidi_mididev *);

#ifdef UMIDI_DEBUG
static void dump_sc(struct umidi_softc *);
static void dump_ep(struct umidi_endpoint *);
static void dump_jack(struct umidi_jack *);
#endif

static usbd_status start_input_transfer(struct umidi_endpoint *);
static usbd_status start_output_transfer(struct umidi_endpoint *);
static int out_jack_output(struct umidi_jack *, u_char *, int, int);
static void in_intr(struct usbd_xfer *, void *, usbd_status);
static void out_intr(struct usbd_xfer *, void *, usbd_status);
static void out_solicit(void *); /* struct umidi_endpoint* for softintr */
static void out_solicit_locked(void *); /* pre-locked version */


const struct midi_hw_if umidi_hw_if = {
        .open = umidi_open,
        .close = umidi_close,
        .output = umidi_rtmsg,
        .getinfo = umidi_getinfo,
        .get_locks = umidi_get_locks,
};

struct midi_hw_if_ext umidi_hw_if_ext = {
        .channel = umidi_channelmsg,
        .common  = umidi_commonmsg,
        .sysex   = umidi_sysex,
};

struct midi_hw_if_ext umidi_hw_if_mm = {
        .channel = umidi_channelmsg,
        .common  = umidi_commonmsg,
        .sysex   = umidi_sysex,
        .compress = 1,
};

static int umidi_match(device_t, cfdata_t, void *);
static void umidi_attach(device_t, device_t, void *);
static void umidi_childdet(device_t, device_t);
static int umidi_detach(device_t, int);
static int umidi_activate(device_t, enum devact);

CFATTACH_DECL2_NEW(umidi, sizeof(struct umidi_softc), umidi_match,
    umidi_attach, umidi_detach, umidi_activate, NULL, umidi_childdet);

static int
umidi_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;

        DPRINTFN(1,("umidi_match\n"));

        if (umidi_search_quirk(uiaa->uiaa_vendor, uiaa->uiaa_product,
            uiaa->uiaa_ifaceno))
                return UMATCH_IFACECLASS_IFACESUBCLASS;

        if (uiaa->uiaa_class == UICLASS_AUDIO &&
            uiaa->uiaa_subclass == UISUBCLASS_MIDISTREAM)
                return UMATCH_IFACECLASS_IFACESUBCLASS;

        return UMATCH_NONE;
}

static void
umidi_attach(device_t parent, device_t self, void *aux)
{
        usbd_status     err;
        struct umidi_softc *sc = device_private(self);
        struct usbif_attach_arg *uiaa = aux;
        char *devinfop;

        DPRINTFN(1,("umidi_attach\n"));

        sc->sc_dev = self;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(uiaa->uiaa_device, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        sc->sc_iface = uiaa->uiaa_iface;
        sc->sc_udev = uiaa->uiaa_device;

        sc->sc_quirk = umidi_search_quirk(uiaa->uiaa_vendor,
            uiaa->uiaa_product, uiaa->uiaa_ifaceno);

        aprint_normal_dev(self, "");
        umidi_print_quirk(sc->sc_quirk);

        mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_SOFTUSB);
        cv_init(&sc->sc_cv, "umidopcl");
        cv_init(&sc->sc_detach_cv, "umidetcv");
        sc->sc_refcnt = 0;

        err = alloc_all_endpoints(sc);
        if (err != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(self,
                    "alloc_all_endpoints failed. (err=%d)\n", err);
                goto out;
        }
        err = alloc_all_jacks(sc);
        if (err != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(self, "alloc_all_jacks failed. (err=%d)\n",
                    err);
                goto out_free_endpoints;
        }
        aprint_normal_dev(self, "out=%d, in=%d\n",
               sc->sc_out_num_jacks, sc->sc_in_num_jacks);

        err = assign_all_jacks_automatically(sc);
        if (err != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(self,
                    "assign_all_jacks_automatically failed. (err=%d)\n", err);
                goto out_free_jacks;
        }
        err = attach_all_mididevs(sc);
        if (err != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(self,
                    "attach_all_mididevs failed. (err=%d)\n", err);
                goto out_free_jacks;
        }

#ifdef UMIDI_DEBUG
        dump_sc(sc);
#endif

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        return;

out_free_jacks:
        unbind_all_jacks(sc);
        free_all_jacks(sc);

out_free_endpoints:
        free_all_endpoints(sc);

out:
        aprint_error_dev(self, "disabled.\n");
        sc->sc_dying = 1;
        return;
}

static void
umidi_childdet(device_t self, device_t child)
{
        int i;
        struct umidi_softc *sc = device_private(self);

        KASSERT(sc->sc_mididevs != NULL);

        for (i = 0; i < sc->sc_num_mididevs; i++) {
                if (sc->sc_mididevs[i].mdev == child)
                        break;
        }
        KASSERT(i < sc->sc_num_mididevs);
        sc->sc_mididevs[i].mdev = NULL;
}

static int
umidi_activate(device_t self, enum devact act)
{
        struct umidi_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                DPRINTFN(1,("umidi_activate (deactivate)\n"));
                sc->sc_dying = 1;
                deactivate_all_mididevs(sc);
                return 0;
        default:
                DPRINTFN(1,("umidi_activate (%d)\n", act));
                return EOPNOTSUPP;
        }
}

static int
umidi_detach(device_t self, int flags)
{
        struct umidi_softc *sc = device_private(self);

        DPRINTFN(1,("umidi_detach\n"));

        mutex_enter(&sc->sc_lock);
        sc->sc_dying = 1;
        if (--sc->sc_refcnt >= 0)
                if (cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz * 60))
                        aprint_error_dev(self, ": didn't detach\n");
        mutex_exit(&sc->sc_lock);

        detach_all_mididevs(sc, flags);
        free_all_mididevs(sc);
        free_all_jacks(sc);
        free_all_endpoints(sc);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        mutex_destroy(&sc->sc_lock);
        cv_destroy(&sc->sc_detach_cv);
        cv_destroy(&sc->sc_cv);

        return 0;
}


/*
 * midi_if stuffs
 */
int
umidi_open(void *addr,
           int flags,
           void (*iintr)(void *, int),
           void (*ointr)(void *),
           void *arg)
{
        struct umidi_mididev *mididev = addr;
        struct umidi_softc *sc = mididev->sc;
        usbd_status err;

        KASSERT(mutex_owned(&sc->sc_lock));
        DPRINTF(("umidi_open: sc=%p\n", sc));

        if (mididev->opened)
                return EBUSY;
        if (sc->sc_dying)
                return EIO;

        mididev->opened = 1;
        mididev->flags = flags;
        if ((mididev->flags & FWRITE) && mididev->out_jack) {
                err = open_out_jack(mididev->out_jack, arg, ointr);
                if (err != USBD_NORMAL_COMPLETION)
                        goto bad;
        }
        if ((mididev->flags & FREAD) && mididev->in_jack) {
                err = open_in_jack(mididev->in_jack, arg, iintr);
                KASSERT(mididev->opened);
                if (err != USBD_NORMAL_COMPLETION &&
                    err != USBD_IN_PROGRESS) {
                        if (mididev->out_jack)
                                close_out_jack(mididev->out_jack);
                        goto bad;
                }
        }

        return 0;
bad:
        mididev->opened = 0;
        DPRINTF(("umidi_open: usbd_status %d\n", err));
        KASSERT(mutex_owned(&sc->sc_lock));
        return USBD_IN_USE == err ? EBUSY : EIO;
}

void
umidi_close(void *addr)
{
        struct umidi_mididev *mididev = addr;
        struct umidi_softc *sc = mididev->sc;

        KASSERT(mutex_owned(&sc->sc_lock));

        if (mididev->closing)
                return;

        mididev->closing = 1;

        sc->sc_refcnt++;

        if ((mididev->flags & FWRITE) && mididev->out_jack)
                close_out_jack(mididev->out_jack);
        if ((mididev->flags & FREAD) && mididev->in_jack)
                close_in_jack(mididev->in_jack);

        if (--sc->sc_refcnt < 0)
                cv_broadcast(&sc->sc_detach_cv);

        mididev->opened = 0;
        mididev->closing = 0;
}

int
umidi_channelmsg(void *addr, int status, int channel, u_char *msg,
    int len)
{
        struct umidi_mididev *mididev = addr;

        KASSERT(mutex_owned(&mididev->sc->sc_lock));

        if (!mididev->out_jack || !mididev->opened || mididev->closing)
                return EIO;

        return out_jack_output(mididev->out_jack, msg, len, (status>>4)&0xf);
}

int
umidi_commonmsg(void *addr, int status, u_char *msg, int len)
{
        struct umidi_mididev *mididev = addr;
        int cin;

        KASSERT(mutex_owned(&mididev->sc->sc_lock));

        if (!mididev->out_jack || !mididev->opened || mididev->closing)
                return EIO;

        switch ( len ) {
        case 1: cin = 5; break;
        case 2: cin = 2; break;
        case 3: cin = 3; break;
        default: return EIO; /* or gcc warns of cin uninitialized */
        }

        return out_jack_output(mididev->out_jack, msg, len, cin);
}

int
umidi_sysex(void *addr, u_char *msg, int len)
{
        struct umidi_mididev *mididev = addr;
        int cin;

        KASSERT(mutex_owned(&mididev->sc->sc_lock));

        if (!mididev->out_jack || !mididev->opened || mididev->closing)
                return EIO;

        switch ( len ) {
        case 1: cin = 5; break;
        case 2: cin = 6; break;
        case 3: cin = (msg[2] == 0xf7) ? 7 : 4; break;
        default: return EIO; /* or gcc warns of cin uninitialized */
        }

        return out_jack_output(mididev->out_jack, msg, len, cin);
}

int
umidi_rtmsg(void *addr, int d)
{
        struct umidi_mididev *mididev = addr;
        u_char msg = d;

        KASSERT(mutex_owned(&mididev->sc->sc_lock));

        if (!mididev->out_jack || !mididev->opened || mididev->closing)
                return EIO;

        return out_jack_output(mididev->out_jack, &msg, 1, 0xf);
}

void
umidi_getinfo(void *addr, struct midi_info *mi)
{
        struct umidi_mididev *mididev = addr;
        struct umidi_softc *sc = mididev->sc;
        int mm = UMQ_ISTYPE(sc, UMQ_TYPE_MIDIMAN_GARBLE);

        KASSERT(mutex_owned(&sc->sc_lock));

        mi->name = mididev->label;
        mi->props = MIDI_PROP_OUT_INTR;
        if (mididev->in_jack)
                mi->props |= MIDI_PROP_CAN_INPUT;
        midi_register_hw_if_ext(mm? &umidi_hw_if_mm : &umidi_hw_if_ext);
}

static void
umidi_get_locks(void *addr, kmutex_t **thread, kmutex_t **intr)
{
        struct umidi_mididev *mididev = addr;
        struct umidi_softc *sc = mididev->sc;

        *intr = NULL;
        *thread = &sc->sc_lock;
}

/*
 * each endpoint stuffs
 */

/* alloc/free pipe */
static usbd_status
alloc_pipe(struct umidi_endpoint *ep)
{
        struct umidi_softc *sc = ep->sc;
        usbd_status err;
        usb_endpoint_descriptor_t *epd;

        epd = usbd_get_endpoint_descriptor(sc->sc_iface, ep->addr);
        /*
         * For output, an improvement would be to have a buffer bigger than
         * wMaxPacketSize by num_jacks-1 additional packet slots; that would
         * allow out_solicit to fill the buffer to the full packet size in
         * all cases. But to use usbd_create_xfer to get a slightly larger
         * buffer would not be a good way to do that, because if the addition
         * would make the buffer exceed USB_MEM_SMALL then a substantially
         * larger block may be wastefully allocated. Some flavor of double
         * buffering could serve the same purpose, but would increase the
         * code complexity, so for now I will live with the current slight
         * penalty of reducing max transfer size by (num_open-num_scheduled)
         * packet slots.
         */
        ep->buffer_size = UGETW(epd->wMaxPacketSize);
        ep->buffer_size -= ep->buffer_size % UMIDI_PACKET_SIZE;

        DPRINTF(("%s: alloc_pipe %p, buffer size %u\n",
                device_xname(sc->sc_dev), ep, ep->buffer_size));
        ep->num_scheduled = 0;
        ep->this_schedule = 0;
        ep->next_schedule = 0;
        ep->soliciting = 0;
        ep->armed = 0;
        err = usbd_open_pipe(sc->sc_iface, ep->addr, USBD_MPSAFE, &ep->pipe);
        if (err)
                goto quit;
        int error = usbd_create_xfer(ep->pipe, ep->buffer_size,
            0, 0, &ep->xfer);
        if (error) {
                usbd_close_pipe(ep->pipe);
                return USBD_NOMEM;
        }
        ep->buffer = usbd_get_buffer(ep->xfer);
        ep->next_slot = ep->buffer;
        ep->solicit_cookie = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE,
            out_solicit, ep);
quit:
        return err;
}

static void
free_pipe(struct umidi_endpoint *ep)
{
        DPRINTF(("%s: free_pipe %p\n", device_xname(ep->sc->sc_dev), ep));
        usbd_abort_pipe(ep->pipe);
        usbd_destroy_xfer(ep->xfer);
        usbd_close_pipe(ep->pipe);
        softint_disestablish(ep->solicit_cookie);
}


/* alloc/free the array of endpoint structures */

static usbd_status alloc_all_endpoints_fixed_ep(struct umidi_softc *);
static usbd_status alloc_all_endpoints_yamaha(struct umidi_softc *);
static usbd_status alloc_all_endpoints_genuine(struct umidi_softc *);

static usbd_status
alloc_all_endpoints(struct umidi_softc *sc)
{
        usbd_status err;
        int i, n;

        if (UMQ_ISTYPE(sc, UMQ_TYPE_FIXED_EP)) {
                err = alloc_all_endpoints_fixed_ep(sc);
        } else if (UMQ_ISTYPE(sc, UMQ_TYPE_YAMAHA)) {
                err = alloc_all_endpoints_yamaha(sc);
        } else {
                err = alloc_all_endpoints_genuine(sc);
        }
        if (err != USBD_NORMAL_COMPLETION)
                return err;

        n = sc->sc_out_num_endpoints + sc->sc_in_num_endpoints;
        for (i = 0; i < n; i++) {
                err = alloc_pipe(&sc->sc_endpoints[i]);
                if (err != USBD_NORMAL_COMPLETION) {
                        while (i --> 0)
                                free_pipe(&sc->sc_endpoints[i]);
                        kmem_free(sc->sc_endpoints, sc->sc_endpoints_len);
                        sc->sc_endpoints = sc->sc_out_ep = sc->sc_in_ep = NULL;
                        break;
                }
        }
        return err;
}

static void
free_all_endpoints(struct umidi_softc *sc)
{
        int i, n;

        if (sc->sc_endpoints == NULL) {
                /* nothing to free */
                return;
        }

        n = sc->sc_in_num_endpoints + sc->sc_out_num_endpoints;
        for (i = 0; i < n; i++)
                free_pipe(&sc->sc_endpoints[i]);
        kmem_free(sc->sc_endpoints, sc->sc_endpoints_len);
        sc->sc_endpoints = sc->sc_out_ep = sc->sc_in_ep = NULL;
}

static usbd_status
alloc_all_endpoints_fixed_ep(struct umidi_softc *sc)
{
        usbd_status err;
        const struct umq_fixed_ep_desc *fp;
        struct umidi_endpoint *ep;
        usb_endpoint_descriptor_t *epd;
        int i;

        fp = umidi_get_quirk_data_from_type(sc->sc_quirk,
                                            UMQ_TYPE_FIXED_EP);
        if (fp->num_in_ep == 0 && fp->num_out_ep == 0)
                return USBD_INVAL;
        sc->sc_out_num_jacks = 0;
        sc->sc_in_num_jacks = 0;
        sc->sc_out_num_endpoints = fp->num_out_ep;
        sc->sc_in_num_endpoints = fp->num_in_ep;
        sc->sc_endpoints_len = UMIDI_ENDPOINT_SIZE(sc);
        sc->sc_endpoints = kmem_zalloc(sc->sc_endpoints_len, KM_SLEEP);
        sc->sc_out_ep = sc->sc_out_num_endpoints ? sc->sc_endpoints : NULL;
        sc->sc_in_ep =
            sc->sc_in_num_endpoints ?
                sc->sc_endpoints+sc->sc_out_num_endpoints : NULL;

        ep = &sc->sc_out_ep[0];
        for (i = 0; i < sc->sc_out_num_endpoints; i++) {
                epd = usbd_interface2endpoint_descriptor(
                        sc->sc_iface,
                        fp->out_ep[i].ep);
                if (!epd) {
                        aprint_error_dev(sc->sc_dev,
                            "cannot get endpoint descriptor(out:%d)\n",
                             fp->out_ep[i].ep);
                        err = USBD_INVAL;
                        goto error;
                }
                if (UE_GET_XFERTYPE(epd->bmAttributes)!=UE_BULK ||
                    UE_GET_DIR(epd->bEndpointAddress)!=UE_DIR_OUT) {
                        aprint_error_dev(sc->sc_dev,
                            "illegal endpoint(out:%d)\n", fp->out_ep[i].ep);
                        err = USBD_INVAL;
                        goto error;
                }
                ep->sc = sc;
                ep->addr = epd->bEndpointAddress;
                ep->num_jacks = fp->out_ep[i].num_jacks;
                sc->sc_out_num_jacks += fp->out_ep[i].num_jacks;
                ep->num_open = 0;
                ep++;
        }
        ep = &sc->sc_in_ep[0];
        for (i = 0; i < sc->sc_in_num_endpoints; i++) {
                epd = usbd_interface2endpoint_descriptor(
                        sc->sc_iface,
                        fp->in_ep[i].ep);
                if (!epd) {
                        aprint_error_dev(sc->sc_dev,
                            "cannot get endpoint descriptor(in:%d)\n",
                             fp->in_ep[i].ep);
                        err = USBD_INVAL;
                        goto error;
                }
                /*
                 * MIDISPORT_2X4 inputs on an interrupt rather than a bulk
                 * endpoint.  The existing input logic in this driver seems
                 * to work successfully if we just stop treating an interrupt
                 * endpoint as illegal (or the in_progress status we get on
                 * the initial transfer).  It does not seem necessary to
                 * actually use the interrupt flavor of alloc_pipe or make
                 * other serious rearrangements of logic.  I like that.
                 */
                switch ( UE_GET_XFERTYPE(epd->bmAttributes) ) {
                case UE_BULK:
                case UE_INTERRUPT:
                        if (UE_DIR_IN == UE_GET_DIR(epd->bEndpointAddress))
                                break;
                        /*FALLTHROUGH*/
                default:
                        aprint_error_dev(sc->sc_dev,
                            "illegal endpoint(in:%d)\n", fp->in_ep[i].ep);
                        err = USBD_INVAL;
                        goto error;
                }

                ep->sc = sc;
                ep->addr = epd->bEndpointAddress;
                ep->num_jacks = fp->in_ep[i].num_jacks;
                sc->sc_in_num_jacks += fp->in_ep[i].num_jacks;
                ep->num_open = 0;
                ep++;
        }

        return USBD_NORMAL_COMPLETION;
error:
        kmem_free(sc->sc_endpoints, UMIDI_ENDPOINT_SIZE(sc));
        sc->sc_endpoints = NULL;
        return err;
}

static usbd_status
alloc_all_endpoints_yamaha(struct umidi_softc *sc)
{
        /* This driver currently supports max 1in/1out bulk endpoints */
        char *end;
        usb_config_descriptor_t *cdesc;
        usb_descriptor_t *desc;
        umidi_cs_descriptor_t *csdesc;
        usb_interface_descriptor_t *idesc;
        umidi_cs_interface_descriptor_t *udesc;
        usb_endpoint_descriptor_t *epd;
        int out_addr, in_addr, i;
        int dir;

        sc->sc_out_num_jacks = sc->sc_in_num_jacks = 0;
        out_addr = in_addr = 0;

        /* detect endpoints */
        cdesc = usbd_get_config_descriptor(sc->sc_udev);
        end = (char *)cdesc + UGETW(cdesc->wTotalLength);
        idesc = usbd_get_interface_descriptor(sc->sc_iface);
        KASSERT((char *)cdesc <= (char *)idesc);
        KASSERT((char *)idesc < end);
        KASSERT(end - (char *)idesc >= sizeof(*idesc));
        KASSERT(idesc->bLength >= sizeof(*idesc));
        KASSERT(idesc->bLength <= end - (char *)idesc);
        for (i = idesc->bNumEndpoints; i --> 0;) {
                epd = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                KASSERT(epd != NULL);
                if (UE_GET_XFERTYPE(epd->bmAttributes) == UE_BULK) {
                        dir = UE_GET_DIR(epd->bEndpointAddress);
                        if (dir == UE_DIR_OUT && !out_addr)
                                out_addr = epd->bEndpointAddress;
                        else if (dir == UE_DIR_IN && !in_addr)
                                in_addr = epd->bEndpointAddress;
                }
        }
        desc = NEXT_D(idesc);
        if ((char *)desc > end || end - (char *)desc < sizeof(*desc) ||
            desc->bLength < sizeof(*desc) ||
            desc->bLength > end - (char *)desc)
                return USBD_INVAL;

        /* count jacks */
        if (desc->bDescriptorType != UDESC_CS_INTERFACE ||
            desc->bLength < sizeof(*csdesc))
                return USBD_INVAL;
        csdesc = (umidi_cs_descriptor_t *)desc;
        if (csdesc->bDescriptorSubtype != UMIDI_MS_HEADER)
                return USBD_INVAL;
        udesc = TO_CSIFD(csdesc);
        if (UGETW(udesc->wTotalLength) > end - (char *)udesc)
                return USBD_INVAL;
        if (UGETW(udesc->wTotalLength) < udesc->bLength)
                return USBD_INVAL;
        end = (char *)udesc + UGETW(udesc->wTotalLength);
        desc = NEXT_D(udesc);

        for (; end - (char *)desc >= sizeof(*desc); desc = NEXT_D(desc)) {
                if (desc->bLength < sizeof(*desc) ||
                    desc->bLength > end - (char *)desc)
                        break;
                if (desc->bDescriptorType != UDESC_CS_INTERFACE ||
                    desc->bLength < sizeof(*csdesc) ||
                    desc->bLength < UMIDI_JACK_DESCRIPTOR_SIZE)
                        continue;
                csdesc = (umidi_cs_descriptor_t *)desc;
                if (csdesc->bDescriptorSubtype == UMIDI_OUT_JACK)
                        sc->sc_out_num_jacks++;
                else if (csdesc->bDescriptorSubtype == UMIDI_IN_JACK)
                        sc->sc_in_num_jacks++;
        }

        /* validate some parameters */
        if (sc->sc_out_num_jacks > UMIDI_MAX_EPJACKS)
                sc->sc_out_num_jacks = UMIDI_MAX_EPJACKS;
        if (sc->sc_in_num_jacks > UMIDI_MAX_EPJACKS)
                sc->sc_in_num_jacks = UMIDI_MAX_EPJACKS;
        if (sc->sc_out_num_jacks && out_addr) {
                sc->sc_out_num_endpoints = 1;
        } else {
                sc->sc_out_num_endpoints = 0;
                sc->sc_out_num_jacks = 0;
        }
        if (sc->sc_in_num_jacks && in_addr) {
                sc->sc_in_num_endpoints = 1;
        } else {
                sc->sc_in_num_endpoints = 0;
                sc->sc_in_num_jacks = 0;
        }
        sc->sc_endpoints_len = UMIDI_ENDPOINT_SIZE(sc);
        if (sc->sc_endpoints_len == 0)
                return USBD_INVAL;
        sc->sc_endpoints = kmem_zalloc(sc->sc_endpoints_len, KM_SLEEP);
        if (sc->sc_out_num_endpoints) {
                sc->sc_out_ep = sc->sc_endpoints;
                sc->sc_out_ep->sc = sc;
                sc->sc_out_ep->addr = out_addr;
                sc->sc_out_ep->num_jacks = sc->sc_out_num_jacks;
                sc->sc_out_ep->num_open = 0;
        } else
                sc->sc_out_ep = NULL;

        if (sc->sc_in_num_endpoints) {
                sc->sc_in_ep = sc->sc_endpoints + sc->sc_out_num_endpoints;
                sc->sc_in_ep->sc = sc;
                sc->sc_in_ep->addr = in_addr;
                sc->sc_in_ep->num_jacks = sc->sc_in_num_jacks;
                sc->sc_in_ep->num_open = 0;
        } else
                sc->sc_in_ep = NULL;

        return USBD_NORMAL_COMPLETION;
}

static usbd_status
alloc_all_endpoints_genuine(struct umidi_softc *sc)
{
        usb_interface_descriptor_t *interface_desc;
        usb_config_descriptor_t *config_desc;
        usb_descriptor_t *desc;
        char *end;
        int num_ep;
        struct umidi_endpoint *p, *q, *lowest, *endep, tmpep;
        int epaddr;

        interface_desc = usbd_get_interface_descriptor(sc->sc_iface);
        num_ep = interface_desc->bNumEndpoints;
        if (num_ep == 0)
                return USBD_INVAL;
        sc->sc_endpoints_len = sizeof(struct umidi_endpoint) * num_ep;
        sc->sc_endpoints = p = kmem_zalloc(sc->sc_endpoints_len, KM_SLEEP);
        sc->sc_out_num_jacks = sc->sc_in_num_jacks = 0;
        sc->sc_out_num_endpoints = sc->sc_in_num_endpoints = 0;
        epaddr = -1;

        /* get the list of endpoints for midi stream */
        config_desc = usbd_get_config_descriptor(sc->sc_udev);
        end = (char *)config_desc + UGETW(config_desc->wTotalLength);
        desc = TO_D(config_desc);
        for (; end - (char *)desc >= sizeof(*desc); desc = NEXT_D(desc)) {
                if (desc->bLength < sizeof(*desc) ||
                    desc->bLength > end - (char *)desc)
                        break;
                if (desc->bDescriptorType == UDESC_ENDPOINT &&
                    desc->bLength >= sizeof(*TO_EPD(desc)) &&
                    UE_GET_XFERTYPE(TO_EPD(desc)->bmAttributes) == UE_BULK) {
                        epaddr = TO_EPD(desc)->bEndpointAddress;
                } else if (desc->bDescriptorType == UDESC_CS_ENDPOINT &&
                    desc->bLength >= sizeof(*TO_CSEPD(desc)) &&
                    epaddr != -1) {
                        if (num_ep > 0) {
                                num_ep--;
                                p->sc = sc;
                                p->addr = epaddr;
                                p->num_jacks = TO_CSEPD(desc)->bNumEmbMIDIJack;
                                if (UE_GET_DIR(epaddr) == UE_DIR_OUT) {
                                        sc->sc_out_num_endpoints++;
                                        sc->sc_out_num_jacks += p->num_jacks;
                                } else {
                                        sc->sc_in_num_endpoints++;
                                        sc->sc_in_num_jacks += p->num_jacks;
                                }
                                p++;
                        }
                } else
                        epaddr = -1;
        }

        /* sort endpoints */
        num_ep = sc->sc_out_num_endpoints + sc->sc_in_num_endpoints;
        p = sc->sc_endpoints;
        endep = p + num_ep;
        while (p<endep) {
                lowest = p;
                for (q=p+1; q<endep; q++) {
                        if ((UE_GET_DIR(lowest->addr)==UE_DIR_IN &&
                             UE_GET_DIR(q->addr)==UE_DIR_OUT) ||
                            ((UE_GET_DIR(lowest->addr)==
                              UE_GET_DIR(q->addr)) &&
                             (UE_GET_ADDR(lowest->addr)>
                              UE_GET_ADDR(q->addr))))
                                lowest = q;
                }
                if (lowest != p) {
                        memcpy((void *)&tmpep, (void *)p, sizeof(tmpep));
                        memcpy((void *)p, (void *)lowest, sizeof(tmpep));
                        memcpy((void *)lowest, (void *)&tmpep, sizeof(tmpep));
                }
                p->num_open = 0;
                p++;
        }

        sc->sc_out_ep = sc->sc_out_num_endpoints ? sc->sc_endpoints : NULL;
        sc->sc_in_ep =
            sc->sc_in_num_endpoints ?
                sc->sc_endpoints+sc->sc_out_num_endpoints : NULL;

        return USBD_NORMAL_COMPLETION;
}


/*
 * jack stuffs
 */

static usbd_status
alloc_all_jacks(struct umidi_softc *sc)
{
        int i, j;
        struct umidi_endpoint *ep;
        struct umidi_jack *jack;
        const unsigned char *cn_spec;

        if (UMQ_ISTYPE(sc, UMQ_TYPE_CN_SEQ_PER_EP))
                sc->cblnums_global = 0;
        else if (UMQ_ISTYPE(sc, UMQ_TYPE_CN_SEQ_GLOBAL))
                sc->cblnums_global = 1;
        else {
                /*
                 * I don't think this default is correct, but it preserves
                 * the prior behavior of the code. That's why I defined two
                 * complementary quirks. Any device for which the default
                 * behavior is wrong can be made to work by giving it an
                 * explicit quirk, and if a pattern ever develops (as I suspect
                 * it will) that a lot of otherwise standard USB MIDI devices
                 * need the CN_SEQ_PER_EP "quirk," then this default can be
                 * changed to 0, and the only devices that will break are those
                 * listing neither quirk, and they'll easily be fixed by giving
                 * them the CN_SEQ_GLOBAL quirk.
                 */
                sc->cblnums_global = 1;
        }

        if (UMQ_ISTYPE(sc, UMQ_TYPE_CN_FIXED))
                cn_spec = umidi_get_quirk_data_from_type(sc->sc_quirk,
                                                             UMQ_TYPE_CN_FIXED);
        else
                cn_spec = NULL;

        /* allocate/initialize structures */
        if (sc->sc_in_num_jacks == 0 && sc->sc_out_num_jacks == 0)
                return USBD_INVAL;
        sc->sc_jacks = kmem_zalloc(sizeof(*sc->sc_out_jacks) *
            (sc->sc_in_num_jacks + sc->sc_out_num_jacks), KM_SLEEP);
        if (!sc->sc_jacks)
                return USBD_NOMEM;
        sc->sc_out_jacks =
            sc->sc_out_num_jacks ? sc->sc_jacks : NULL;
        sc->sc_in_jacks =
            sc->sc_in_num_jacks ? sc->sc_jacks+sc->sc_out_num_jacks : NULL;

        jack = &sc->sc_out_jacks[0];
        for (i = 0; i < sc->sc_out_num_jacks; i++) {
                jack->opened = 0;
                jack->bound = 0;
                jack->arg = NULL;
                jack->u.out.intr = NULL;
                jack->midiman_ppkt = NULL;
                if (sc->cblnums_global)
                        jack->cable_number = i;
                jack++;
        }
        jack = &sc->sc_in_jacks[0];
        for (i = 0; i < sc->sc_in_num_jacks; i++) {
                jack->opened = 0;
                jack->bound = 0;
                jack->arg = NULL;
                jack->u.in.intr = NULL;
                if (sc->cblnums_global)
                        jack->cable_number = i;
                jack++;
        }

        /* assign each jacks to each endpoints */
        jack = &sc->sc_out_jacks[0];
        ep = &sc->sc_out_ep[0];
        for (i = 0; i < sc->sc_out_num_endpoints; i++) {
                for (j = 0; j < ep->num_jacks; j++) {
                        jack->endpoint = ep;
                        if (cn_spec != NULL)
                                jack->cable_number = *cn_spec++;
                        else if (!sc->cblnums_global)
                                jack->cable_number = j;
                        ep->jacks[jack->cable_number] = jack;
                        jack++;
                }
                ep++;
        }
        jack = &sc->sc_in_jacks[0];
        ep = &sc->sc_in_ep[0];
        for (i = 0; i < sc->sc_in_num_endpoints; i++) {
                for (j = 0; j < ep->num_jacks; j++) {
                        jack->endpoint = ep;
                        if (cn_spec != NULL)
                                jack->cable_number = *cn_spec++;
                        else if (!sc->cblnums_global)
                                jack->cable_number = j;
                        ep->jacks[jack->cable_number] = jack;
                        jack++;
                }
                ep++;
        }

        return USBD_NORMAL_COMPLETION;
}

static void
free_all_jacks(struct umidi_softc *sc)
{
        struct umidi_jack *jacks;
        size_t len;

        mutex_enter(&sc->sc_lock);
        jacks = sc->sc_jacks;
        len = sizeof(*sc->sc_out_jacks) *
            (sc->sc_in_num_jacks + sc->sc_out_num_jacks);
        sc->sc_jacks = sc->sc_in_jacks = sc->sc_out_jacks = NULL;
        mutex_exit(&sc->sc_lock);

        if (jacks)
                kmem_free(jacks, len);
}

static usbd_status
bind_jacks_to_mididev(struct umidi_softc *sc,
                      struct umidi_jack *out_jack,
                      struct umidi_jack *in_jack,
                      struct umidi_mididev *mididev)
{
        if ((out_jack && out_jack->bound) || (in_jack && in_jack->bound))
                return USBD_IN_USE;
        if (mididev->out_jack || mididev->in_jack)
                return USBD_IN_USE;

        if (out_jack)
                out_jack->bound = 1;
        if (in_jack)
                in_jack->bound = 1;
        mididev->in_jack = in_jack;
        mididev->out_jack = out_jack;

        mididev->closing = 0;

        return USBD_NORMAL_COMPLETION;
}

static void
unbind_jacks_from_mididev(struct umidi_mididev *mididev)
{
        KASSERT(mutex_owned(&mididev->sc->sc_lock));

        mididev->closing = 1;

        if ((mididev->flags & FWRITE) && mididev->out_jack)
                close_out_jack(mididev->out_jack);
        if ((mididev->flags & FREAD) && mididev->in_jack)
                close_in_jack(mididev->in_jack);

        if (mididev->out_jack) {
                mididev->out_jack->bound = 0;
                mididev->out_jack = NULL;
        }
        if (mididev->in_jack) {
                mididev->in_jack->bound = 0;
                mididev->in_jack = NULL;
        }
}

static void
unbind_all_jacks(struct umidi_softc *sc)
{
        int i;

        mutex_enter(&sc->sc_lock);
        if (sc->sc_mididevs)
                for (i = 0; i < sc->sc_num_mididevs; i++)
                        unbind_jacks_from_mididev(&sc->sc_mididevs[i]);
        mutex_exit(&sc->sc_lock);
}

static usbd_status
assign_all_jacks_automatically(struct umidi_softc *sc)
{
        usbd_status err;
        int i;
        struct umidi_jack *out, *in;
        const signed char *asg_spec;

        err =
            alloc_all_mididevs(sc,
                               uimax(sc->sc_out_num_jacks, sc->sc_in_num_jacks));
        if (err!=USBD_NORMAL_COMPLETION)
                return err;

        if (UMQ_ISTYPE(sc, UMQ_TYPE_MD_FIXED))
                asg_spec = umidi_get_quirk_data_from_type(sc->sc_quirk,
                                                              UMQ_TYPE_MD_FIXED);
        else
                asg_spec = NULL;

        for (i = 0; i < sc->sc_num_mididevs; i++) {
                if (asg_spec != NULL) {
                        if (*asg_spec == -1)
                                out = NULL;
                        else
                                out = &sc->sc_out_jacks[*asg_spec];
                        ++ asg_spec;
                        if (*asg_spec == -1)
                                in = NULL;
                        else
                                in = &sc->sc_in_jacks[*asg_spec];
                        ++ asg_spec;
                } else {
                        out = (i<sc->sc_out_num_jacks) ? &sc->sc_out_jacks[i]
                                                       : NULL;
                        in = (i<sc->sc_in_num_jacks) ? &sc->sc_in_jacks[i]
                                                     : NULL;
                }
                err = bind_jacks_to_mididev(sc, out, in, &sc->sc_mididevs[i]);
                if (err != USBD_NORMAL_COMPLETION) {
                        free_all_mididevs(sc);
                        return err;
                }
        }

        return USBD_NORMAL_COMPLETION;
}

static usbd_status
open_out_jack(struct umidi_jack *jack, void *arg, void (*intr)(void *))
{
        struct umidi_endpoint *ep = jack->endpoint;
        struct umidi_softc *sc = ep->sc;
        umidi_packet_bufp end;
        int err;

        KASSERT(mutex_owned(&sc->sc_lock));

        if (jack->opened)
                return USBD_IN_USE;

        jack->arg = arg;
        jack->u.out.intr = intr;
        jack->midiman_ppkt = NULL;
        end = ep->buffer + ep->buffer_size / sizeof(*ep->buffer);
        jack->opened = 1;
        ep->num_open++;
        /*
         * out_solicit maintains an invariant that there will always be
         * (num_open - num_scheduled) slots free in the buffer. as we have
         * just incremented num_open, the buffer may be too full to satisfy
         * the invariant until a transfer completes, for which we must wait.
         */
        while (end - ep->next_slot < ep->num_open - ep->num_scheduled) {
                err = cv_timedwait_sig(&sc->sc_cv, &sc->sc_lock,
                     mstohz(10));
                if (err) {
                        ep->num_open--;
                        jack->opened = 0;
                        return USBD_IOERROR;
                }
        }

        return USBD_NORMAL_COMPLETION;
}

static usbd_status
open_in_jack(struct umidi_jack *jack, void *arg, void (*intr)(void *, int))
{
        usbd_status err = USBD_NORMAL_COMPLETION;
        struct umidi_endpoint *ep = jack->endpoint;

        KASSERT(mutex_owned(&ep->sc->sc_lock));

        if (jack->opened)
                return USBD_IN_USE;

        jack->arg = arg;
        jack->u.in.intr = intr;
        jack->opened = 1;
        if (ep->num_open++ == 0 && UE_GET_DIR(ep->addr)==UE_DIR_IN) {
                /*
                 * Can't hold the interrupt lock while calling into USB,
                 * but we can safely drop it here.
                 */
                mutex_exit(&ep->sc->sc_lock);
                err = start_input_transfer(ep);
                if (err != USBD_NORMAL_COMPLETION &&
                    err != USBD_IN_PROGRESS) {
                        ep->num_open--;
                }
                mutex_enter(&ep->sc->sc_lock);
        }

        return err;
}

static void
close_out_jack(struct umidi_jack *jack)
{
        struct umidi_endpoint *ep;
        struct umidi_softc *sc;
        uint16_t mask;
        int err;

        if (jack->opened) {
                ep = jack->endpoint;
                sc = ep->sc;

                KASSERT(mutex_owned(&sc->sc_lock));
                mask = 1 << (jack->cable_number);
                while (mask & (ep->this_schedule | ep->next_schedule)) {
                        err = cv_timedwait_sig(&sc->sc_cv, &sc->sc_lock,
                             mstohz(10));
                        if (err)
                                break;
                }
                /*
                 * We can re-enter this function from both close() and
                 * detach().  Make sure only one of them does this part.
                 */
                if (jack->opened) {
                        jack->opened = 0;
                        jack->endpoint->num_open--;
                        ep->this_schedule &= ~mask;
                        ep->next_schedule &= ~mask;
                }
        }
}

static void
close_in_jack(struct umidi_jack *jack)
{
        if (jack->opened) {
                struct umidi_softc *sc = jack->endpoint->sc;

                KASSERT(mutex_owned(&sc->sc_lock));

                jack->opened = 0;
                if (--jack->endpoint->num_open == 0) {
                        /*
                         * We have to drop the (interrupt) lock so that
                         * the USB thread lock can be safely taken by
                         * the abort operation.  This is safe as this
                         * either closing or dying will be set properly.
                         */
                        mutex_exit(&sc->sc_lock);
                        usbd_abort_pipe(jack->endpoint->pipe);
                        mutex_enter(&sc->sc_lock);
                }
        }
}

static usbd_status
attach_mididev(struct umidi_softc *sc, struct umidi_mididev *mididev)
{
        if (mididev->sc)
                return USBD_IN_USE;

        mididev->sc = sc;

        describe_mididev(mididev);

        mididev->mdev = midi_attach_mi(&umidi_hw_if, mididev, sc->sc_dev);

        return USBD_NORMAL_COMPLETION;
}

static usbd_status
detach_mididev(struct umidi_mididev *mididev, int flags)
{
        struct umidi_softc *sc = mididev->sc;

        if (!sc)
                return USBD_NO_ADDR;

        mutex_enter(&sc->sc_lock);
        if (mididev->opened) {
                umidi_close(mididev);
        }
        unbind_jacks_from_mididev(mididev);
        mutex_exit(&sc->sc_lock);

        if (mididev->mdev != NULL)
                config_detach(mididev->mdev, flags);

        if (NULL != mididev->label) {
                kmem_free(mididev->label, mididev->label_len);
                mididev->label = NULL;
        }

        mididev->sc = NULL;

        return USBD_NORMAL_COMPLETION;
}

static void
deactivate_mididev(struct umidi_mididev *mididev)
{
        if (mididev->out_jack)
                mididev->out_jack->bound = 0;
        if (mididev->in_jack)
                mididev->in_jack->bound = 0;
}

static usbd_status
alloc_all_mididevs(struct umidi_softc *sc, int nmidi)
{
        sc->sc_num_mididevs = nmidi;
        sc->sc_mididevs = kmem_zalloc(sizeof(*sc->sc_mididevs)*nmidi, KM_SLEEP);
        return USBD_NORMAL_COMPLETION;
}

static void
free_all_mididevs(struct umidi_softc *sc)
{
        struct umidi_mididev *mididevs;
        size_t len;

        mutex_enter(&sc->sc_lock);
        mididevs = sc->sc_mididevs;
        if (mididevs)
                  len = sizeof(*sc->sc_mididevs )* sc->sc_num_mididevs;
        sc->sc_mididevs = NULL;
        sc->sc_num_mididevs = 0;
        mutex_exit(&sc->sc_lock);

        if (mididevs)
                kmem_free(mididevs, len);
}

static usbd_status
attach_all_mididevs(struct umidi_softc *sc)
{
        usbd_status err;
        int i;

        if (sc->sc_mididevs)
                for (i = 0; i < sc->sc_num_mididevs; i++) {
                        err = attach_mididev(sc, &sc->sc_mididevs[i]);
                        if (err != USBD_NORMAL_COMPLETION)
                                return err;
                }

        return USBD_NORMAL_COMPLETION;
}

static usbd_status
detach_all_mididevs(struct umidi_softc *sc, int flags)
{
        usbd_status err;
        int i;

        if (sc->sc_mididevs)
                for (i = 0; i < sc->sc_num_mididevs; i++) {
                        err = detach_mididev(&sc->sc_mididevs[i], flags);
                        if (err != USBD_NORMAL_COMPLETION)
                                return err;
                }

        return USBD_NORMAL_COMPLETION;
}

static void
deactivate_all_mididevs(struct umidi_softc *sc)
{
        int i;

        if (sc->sc_mididevs) {
                for (i = 0; i < sc->sc_num_mididevs; i++)
                        deactivate_mididev(&sc->sc_mididevs[i]);
        }
}

/*
 * TODO: the 0-based cable numbers will often not match the labeling of the
 * equipment. Ideally:
 *  For class-compliant devices: get the iJack string from the jack descriptor.
 *  Otherwise:
 *  - support a DISPLAY_BASE_CN quirk (add the value to each internal cable
 *    number for display)
 *  - support an array quirk explicitly giving a char * for each jack.
 * For now, you get 0-based cable numbers. If there are multiple endpoints and
 * the CNs are not globally unique, each is shown with its associated endpoint
 * address in hex also. That should not be necessary when using iJack values
 * or a quirk array.
 */
void
describe_mididev(struct umidi_mididev *md)
{
        char in_label[16];
        char out_label[16];
        const char *unit_label;
        char *final_label;
        struct umidi_softc *sc;
        int show_ep_in;
        int show_ep_out;
        size_t len;

        sc = md->sc;
        show_ep_in  = sc-> sc_in_num_endpoints > 1 && !sc->cblnums_global;
        show_ep_out = sc->sc_out_num_endpoints > 1 && !sc->cblnums_global;

        if (NULL == md->in_jack)
                in_label[0] = '\0';
        else if (show_ep_in)
                snprintf(in_label, sizeof(in_label), "<%d(%x) ",
                    md->in_jack->cable_number, md->in_jack->endpoint->addr);
        else
                snprintf(in_label, sizeof(in_label), "<%d ",
                    md->in_jack->cable_number);

        if (NULL == md->out_jack)
                out_label[0] = '\0';
        else if (show_ep_out)
                snprintf(out_label, sizeof(out_label), ">%d(%x) ",
                    md->out_jack->cable_number, md->out_jack->endpoint->addr);
        else
                snprintf(out_label, sizeof(out_label), ">%d ",
                    md->out_jack->cable_number);

        unit_label = device_xname(sc->sc_dev);

        len = strlen(in_label) + strlen(out_label) + strlen(unit_label) + 4;

        final_label = kmem_alloc(len, KM_SLEEP);

        snprintf(final_label, len, "%s%son %s",
            in_label, out_label, unit_label);

        md->label = final_label;
        md->label_len = len;
}

#ifdef UMIDI_DEBUG
static void
dump_sc(struct umidi_softc *sc)
{
        int i;

        DPRINTFN(10, ("%s: dump_sc\n", device_xname(sc->sc_dev)));
        for (i=0; i<sc->sc_out_num_endpoints; i++) {
                DPRINTFN(10, ("\tout_ep(%p):\n", &sc->sc_out_ep[i]));
                dump_ep(&sc->sc_out_ep[i]);
        }
        for (i=0; i<sc->sc_in_num_endpoints; i++) {
                DPRINTFN(10, ("\tin_ep(%p):\n", &sc->sc_in_ep[i]));
                dump_ep(&sc->sc_in_ep[i]);
        }
}

static void
dump_ep(struct umidi_endpoint *ep)
{
        int i;
        for (i=0; i<UMIDI_MAX_EPJACKS; i++) {
                if (NULL==ep->jacks[i])
                        continue;
                DPRINTFN(10, ("\t\tjack[%d]:%p:\n", i, ep->jacks[i]));
                dump_jack(ep->jacks[i]);
        }
}
static void
dump_jack(struct umidi_jack *jack)
{
        DPRINTFN(10, ("\t\t\tep=%p\n",
                      jack->endpoint));
}

#endif /* UMIDI_DEBUG */



/*
 * MUX MIDI PACKET
 */

static const int packet_length[16] = {
        /*0*/        -1,
        /*1*/        -1,
        /*2*/        2,
        /*3*/        3,
        /*4*/        3,
        /*5*/        1,
        /*6*/        2,
        /*7*/        3,
        /*8*/        3,
        /*9*/        3,
        /*A*/        3,
        /*B*/        3,
        /*C*/        2,
        /*D*/        2,
        /*E*/        3,
        /*F*/        1,
};

#define        GET_CN(p)                (((unsigned char)(p)>>4)&0x0F)
#define GET_CIN(p)                ((unsigned char)(p)&0x0F)
#define MIX_CN_CIN(cn, cin) \
        ((unsigned char)((((unsigned char)(cn)&0x0F)<<4)| \
                          ((unsigned char)(cin)&0x0F)))

static usbd_status
start_input_transfer(struct umidi_endpoint *ep)
{
        usbd_setup_xfer(ep->xfer, ep, ep->buffer, ep->buffer_size,
            USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, in_intr);
        return usbd_transfer(ep->xfer);
}

static usbd_status
start_output_transfer(struct umidi_endpoint *ep)
{
        usbd_status rv;
        uint32_t length;
        int i;

        length = (ep->next_slot - ep->buffer) * sizeof(*ep->buffer);
        DPRINTFN(200,("umidi out transfer: start %p end %p length %u\n",
            ep->buffer, ep->next_slot, length));

        usbd_setup_xfer(ep->xfer, ep, ep->buffer, length, 0,
            USBD_NO_TIMEOUT, out_intr);
        rv = usbd_transfer(ep->xfer);

        /*
         * Once the transfer is scheduled, no more adding to partial
         * packets within it.
         */
        if (UMQ_ISTYPE(ep->sc, UMQ_TYPE_MIDIMAN_GARBLE)) {
                for (i=0; i<UMIDI_MAX_EPJACKS; ++i)
                        if (NULL != ep->jacks[i])
                                ep->jacks[i]->midiman_ppkt = NULL;
        }

        return rv;
}

#ifdef UMIDI_DEBUG
#define DPR_PACKET(dir, sc, p)                                                \
if ((unsigned char)(p)[1]!=0xFE)                                \
        DPRINTFN(500,                                                        \
                 ("%s: umidi packet(" #dir "): %02X %02X %02X %02X\n",        \
                  device_xname(sc->sc_dev),                                \
                  (unsigned char)(p)[0],                        \
                  (unsigned char)(p)[1],                        \
                  (unsigned char)(p)[2],                        \
                  (unsigned char)(p)[3]));
#else
#define DPR_PACKET(dir, sc, p)
#endif

/*
 * A 4-byte Midiman packet superficially resembles a 4-byte USB MIDI packet
 * with the cable number and length in the last byte instead of the first,
 * but there the resemblance ends. Where a USB MIDI packet is a semantic
 * unit, a Midiman packet is just a wrapper for 1 to 3 bytes of raw MIDI
 * with a cable nybble and a length nybble (which, unlike the CIN of a
 * real USB MIDI packet, has no semantics at all besides the length).
 * A packet received from a Midiman may contain part of a MIDI message,
 * more than one MIDI message, or parts of more than one MIDI message. A
 * three-byte MIDI message may arrive in three packets of data length 1, and
 * running status may be used. Happily, the midi(4) driver above us will put
 * it all back together, so the only cost is in USB bandwidth. The device
 * has an easier time with what it receives from us: we'll pack messages in
 * and across packets, but filling the packets whenever possible and,
 * as midi(4) hands us a complete message at a time, we'll never send one
 * in a dribble of short packets.
 */

static int
out_jack_output(struct umidi_jack *out_jack, u_char *src, int len, int cin)
{
        struct umidi_endpoint *ep = out_jack->endpoint;
        struct umidi_softc *sc = ep->sc;
        unsigned char *packet;
        int plen;
        int poff;

        KASSERT(mutex_owned(&sc->sc_lock));

        if (sc->sc_dying)
                return EIO;

        if (!out_jack->opened)
                return ENODEV; /* XXX as it was, is this the right errno? */

        sc->sc_refcnt++;

#ifdef UMIDI_DEBUG
        if (umididebug >= 100)
                microtime(&umidi_tv);
#endif
        DPRINTFN(100, ("umidi out: %"PRIu64".%06"PRIu64
            "s ep=%p cn=%d len=%d cin=%#x\n", umidi_tv.tv_sec%100,
            (uint64_t)umidi_tv.tv_usec, ep, out_jack->cable_number, len, cin));

        packet = *ep->next_slot++;
        KASSERT(ep->buffer_size >=
            (ep->next_slot - ep->buffer) * sizeof(*ep->buffer));
        memset(packet, 0, UMIDI_PACKET_SIZE);
        if (UMQ_ISTYPE(sc, UMQ_TYPE_MIDIMAN_GARBLE)) {
                if (NULL != out_jack->midiman_ppkt) { /* fill out a prev pkt */
                        poff = 0x0f & (out_jack->midiman_ppkt[3]);
                        plen = 3 - poff;
                        if (plen > len)
                                plen = len;
                        memcpy(out_jack->midiman_ppkt+poff, src, plen);
                        src += plen;
                        len -= plen;
                        plen += poff;
                        out_jack->midiman_ppkt[3] =
                            MIX_CN_CIN(out_jack->cable_number, plen);
                        DPR_PACKET(out+, sc, out_jack->midiman_ppkt);
                        if (3 == plen)
                                out_jack->midiman_ppkt = NULL; /* no more */
                }
                if (0 == len)
                        ep->next_slot--; /* won't be needed, nevermind */
                else {
                        memcpy(packet, src, len);
                        packet[3] = MIX_CN_CIN(out_jack->cable_number, len);
                        DPR_PACKET(out, sc, packet);
                        if (len < 3)
                                out_jack->midiman_ppkt = packet;
                }
        } else { /* the nice simple USB class-compliant case */
                packet[0] = MIX_CN_CIN(out_jack->cable_number, cin);
                memcpy(packet+1, src, len);
                DPR_PACKET(out, sc, packet);
        }
        ep->next_schedule |= 1<<(out_jack->cable_number);
        ++ ep->num_scheduled;
        if (!ep->armed && !ep->soliciting) {
                /*
                 * It would be bad to call out_solicit directly here (the
                 * caller need not be reentrant) but a soft interrupt allows
                 * solicit to run immediately the caller exits its critical
                 * section, and if the caller has more to write we can get it
                 * before starting the USB transfer, and send a longer one.
                 */
                ep->soliciting = 1;
                kpreempt_disable();
                softint_schedule(ep->solicit_cookie);
                kpreempt_enable();
        }

        if (--sc->sc_refcnt < 0)
                cv_broadcast(&sc->sc_detach_cv);

        return 0;
}

static void
in_intr(struct usbd_xfer *xfer, void *priv,
    usbd_status status)
{
        int cn, len, i;
        struct umidi_endpoint *ep = (struct umidi_endpoint *)priv;
        struct umidi_softc *sc = ep->sc;
        struct umidi_jack *jack;
        unsigned char *packet;
        umidi_packet_bufp slot;
        umidi_packet_bufp end;
        unsigned char *data;
        uint32_t count;

        if (ep->sc->sc_dying || !ep->num_open)
                return;

        mutex_enter(&sc->sc_lock);
        usbd_get_xfer_status(xfer, NULL, NULL, &count, NULL);
        if (0 == count % UMIDI_PACKET_SIZE) {
                DPRINTFN(200,("%s: input endpoint %p transfer length %u\n",
                             device_xname(ep->sc->sc_dev), ep, count));
        } else {
                DPRINTF(("%s: input endpoint %p odd transfer length %u\n",
                        device_xname(ep->sc->sc_dev), ep, count));
        }

        slot = ep->buffer;
        end = slot + count / sizeof(*slot);

        for (packet = *slot; slot < end; packet = *++slot) {

                if (UMQ_ISTYPE(ep->sc, UMQ_TYPE_MIDIMAN_GARBLE)) {
                        cn = (0xf0&(packet[3]))>>4;
                        len = 0x0f&(packet[3]);
                        data = packet;
                } else {
                        cn = GET_CN(packet[0]);
                        len = packet_length[GET_CIN(packet[0])];
                        data = packet + 1;
                }
                /* 0 <= cn <= 15 by inspection of above code */
                if (!(jack = ep->jacks[cn]) || cn != jack->cable_number) {
                        DPRINTF(("%s: stray input endpoint %p cable %d len %d: "
                                 "%02X %02X %02X (try CN_SEQ quirk?)\n",
                                 device_xname(ep->sc->sc_dev), ep, cn, len,
                                 (unsigned)data[0],
                                 (unsigned)data[1],
                                 (unsigned)data[2]));
                        mutex_exit(&sc->sc_lock);
                        return;
                }

                if (!jack->bound || !jack->opened)
                        continue;

                DPRINTFN(500,("%s: input endpoint %p cable %d len %d: "
                             "%02X %02X %02X\n",
                             device_xname(ep->sc->sc_dev), ep, cn, len,
                             (unsigned)data[0],
                             (unsigned)data[1],
                             (unsigned)data[2]));

                if (jack->u.in.intr) {
                        for (i = 0; i < len; i++) {
                                (*jack->u.in.intr)(jack->arg, data[i]);
                        }
                }

        }

        (void)start_input_transfer(ep);
        mutex_exit(&sc->sc_lock);
}

static void
out_intr(struct usbd_xfer *xfer, void *priv,
    usbd_status status)
{
        struct umidi_endpoint *ep = (struct umidi_endpoint *)priv;
        struct umidi_softc *sc = ep->sc;
        uint32_t count;

        if (sc->sc_dying)
                return;

        mutex_enter(&sc->sc_lock);
#ifdef UMIDI_DEBUG
        if (umididebug >= 200)
                microtime(&umidi_tv);
#endif
        usbd_get_xfer_status(xfer, NULL, NULL, &count, NULL);
        if (0 == count % UMIDI_PACKET_SIZE) {
                DPRINTFN(200, ("%s: %"PRIu64".%06"PRIu64"s out ep %p xfer "
                    "length %u\n", device_xname(ep->sc->sc_dev),
                    umidi_tv.tv_sec%100, (uint64_t)umidi_tv.tv_usec, ep,
                    count));
        } else {
                DPRINTF(("%s: output endpoint %p odd transfer length %u\n",
                        device_xname(ep->sc->sc_dev), ep, count));
        }
        count /= UMIDI_PACKET_SIZE;

        /*
         * If while the transfer was pending we buffered any new messages,
         * move them to the start of the buffer.
         */
        ep->next_slot -= count;
        if (ep->buffer < ep->next_slot) {
                memcpy(ep->buffer, ep->buffer + count,
                       (char *)ep->next_slot - (char *)ep->buffer);
        }
        cv_broadcast(&sc->sc_cv);
        /*
         * Do not want anyone else to see armed <- 0 before soliciting <- 1.
         * Running at IPL_USB so the following should happen to be safe.
         */
        ep->armed = 0;
        if (!ep->soliciting) {
                ep->soliciting = 1;
                out_solicit_locked(ep);
        }
        mutex_exit(&sc->sc_lock);
}

/*
 * A jack on which we have received a packet must be called back on its
 * out.intr handler before it will send us another; it is considered
 * 'scheduled'. It is nice and predictable - as long as it is scheduled,
 * we need no extra buffer space for it.
 *
 * In contrast, a jack that is open but not scheduled may supply us a packet
 * at any time, driven by the top half, and we must be able to accept it, no
 * excuses. So we must ensure that at any point in time there are at least
 * (num_open - num_scheduled) slots free.
 *
 * As long as there are more slots free than that minimum, we can loop calling
 * scheduled jacks back on their "interrupt" handlers, soliciting more
 * packets, starting the USB transfer only when the buffer space is down to
 * the minimum or no jack has any more to send.
 */

static void
out_solicit_locked(void *arg)
{
        struct umidi_endpoint *ep = arg;
        umidi_packet_bufp end;
        uint16_t which;
        struct umidi_jack *jack;

        KASSERT(mutex_owned(&ep->sc->sc_lock));

        end = ep->buffer + ep->buffer_size / sizeof(*ep->buffer);

        for ( ;; ) {
                if (end - ep->next_slot <= ep->num_open - ep->num_scheduled)
                        break; /* at IPL_USB */
                if (ep->this_schedule == 0) {
                        if (ep->next_schedule == 0)
                                break; /* at IPL_USB */
                        ep->this_schedule = ep->next_schedule;
                        ep->next_schedule = 0;
                }
                /*
                 * At least one jack is scheduled. Find and mask off the least
                 * set bit in this_schedule and decrement num_scheduled.
                 * Convert mask to bit index to find the corresponding jack,
                 * and call its intr handler. If it has a message, it will call
                 * back one of the output methods, which will set its bit in
                 * next_schedule (not copied into this_schedule until the
                 * latter is empty). In this way we round-robin the jacks that
                 * have messages to send, until the buffer is as full as we
                 * dare, and then start a transfer.
                 */
                which = ep->this_schedule;
                which &= (~which)+1; /* now mask of least set bit */
                ep->this_schedule &= ~which;
                --ep->num_scheduled;

                --which; /* now 1s below mask - count 1s to get index */
                which -= ((which >> 1) & 0x5555);/* SWAR credit aggregate.org */
                which = (((which >> 2) & 0x3333) + (which & 0x3333));
                which = (((which >> 4) + which) & 0x0f0f);
                which +=  (which >> 8);
                which &= 0x1f; /* the bit index a/k/a jack number */

                jack = ep->jacks[which];
                if (jack->u.out.intr)
                        (*jack->u.out.intr)(jack->arg);
        }
        /* intr lock held at loop exit */
        if (!ep->armed && ep->next_slot > ep->buffer) {
                /*
                 * Can't hold the interrupt lock while calling into USB,
                 * but we can safely drop it here.
                 */
                mutex_exit(&ep->sc->sc_lock);
                ep->armed = (USBD_IN_PROGRESS == start_output_transfer(ep));
                mutex_enter(&ep->sc->sc_lock);
        }
        ep->soliciting = 0;
}

/* Entry point for the softintr.  */
static void
out_solicit(void *arg)
{
        struct umidi_endpoint *ep = arg;
        struct umidi_softc *sc = ep->sc;

        mutex_enter(&sc->sc_lock);
        out_solicit_locked(arg);
        mutex_exit(&sc->sc_lock);
}















































































































































    1 
    1 
    1 
    1 






    6 
   71 
   37 
   68 


























































































    1 
    1 








































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
/*        $NetBSD: in6_proto.c,v 1.128 2020/06/12 11:04:45 roy Exp $        */
/*        $KAME: in6_proto.c,v 1.66 2000/10/10 15:35:47 itojun Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)in_proto.c        8.1 (Berkeley) 6/10/93
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: in6_proto.c,v 1.128 2020/06/12 11:04:45 roy Exp $");

#ifdef _KERNEL_OPT
#include "opt_gateway.h"
#include "opt_inet.h"
#include "opt_ipsec.h"
#include "opt_dccp.h"
#include "opt_sctp.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/socket.h>
#include <sys/protosw.h>
#include <sys/kernel.h>
#include <sys/domain.h>
#include <sys/mbuf.h>

#include <net/if.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip_encap.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/in_pcb.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet/icmp6.h>
#include <netinet6/in6_pcb.h>

#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_debug.h>

#include <netinet6/udp6.h>
#include <netinet6/udp6_var.h>

#ifdef DCCP
#include <netinet/dccp.h>
#include <netinet/dccp_var.h>
#include <netinet6/dccp6_var.h>
#endif

#ifdef SCTP
#include <netinet/sctp_pcb.h>
#include <netinet/sctp.h>
#include <netinet/sctp_var.h>
#include <netinet6/sctp6_var.h>
#endif

#include <netinet6/pim6_var.h>

#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
#include <netipsec/key.h>
#endif

#include "carp.h"
#if NCARP > 0
#include <netinet/ip_carp.h>
#endif

#include <netinet6/ip6protosw.h>

/*
 * TCP/IP protocol family: IP6, ICMP6, UDP, TCP.
 */

DOMAIN_DEFINE(inet6domain);        /* forward declare and add to link set */

/* Wrappers to acquire kernel_lock. */

PR_WRAP_CTLINPUT(rip6_ctlinput)
PR_WRAP_CTLINPUT(encap6_ctlinput)
PR_WRAP_CTLINPUT(udp6_ctlinput)
PR_WRAP_CTLINPUT(tcp6_ctlinput)

#define        rip6_ctlinput        rip6_ctlinput_wrapper
#define        encap6_ctlinput        encap6_ctlinput_wrapper
#define        udp6_ctlinput        udp6_ctlinput_wrapper
#define        tcp6_ctlinput        tcp6_ctlinput_wrapper

PR_WRAP_CTLOUTPUT(rip6_ctloutput)
PR_WRAP_CTLOUTPUT(tcp_ctloutput)
PR_WRAP_CTLOUTPUT(udp6_ctloutput)
PR_WRAP_CTLOUTPUT(icmp6_ctloutput)

#define        rip6_ctloutput        rip6_ctloutput_wrapper
#define        tcp_ctloutput        tcp_ctloutput_wrapper
#define        udp6_ctloutput        udp6_ctloutput_wrapper
#define        icmp6_ctloutput        icmp6_ctloutput_wrapper

#if defined(DCCP)
PR_WRAP_CTLINPUT(dccp6_ctlinput)
PR_WRAP_CTLOUTPUT(dccp_ctloutput)

#define dccp6_ctlinput        dccp6_ctlinput_wrapper
#define dccp_ctloutput        dccp_ctloutput_wrapper
#endif

#if defined(SCTP)
PR_WRAP_CTLINPUT(sctp6_ctlinput)
PR_WRAP_CTLOUTPUT(sctp_ctloutput)

#define sctp6_ctlinput        sctp6_ctlinput_wrapper
#define sctp_ctloutput        sctp_ctloutput_wrapper
#endif

#ifdef NET_MPSAFE
PR_WRAP_INPUT6(udp6_input)
PR_WRAP_INPUT6(tcp6_input)
#ifdef DCCP
PR_WRAP_INPUT6(dccp6_input)
#endif
#ifdef SCTP
PR_WRAP_INPUT6(sctp6_input)
#endif
PR_WRAP_INPUT6(rip6_input)
PR_WRAP_INPUT6(dest6_input)
PR_WRAP_INPUT6(route6_input)
PR_WRAP_INPUT6(frag6_input)
#if NPFSYNC > 0
PR_WRAP_INPUT6(pfsync_input)
#endif
PR_WRAP_INPUT6(pim6_input)

#define        udp6_input                udp6_input_wrapper
#define        tcp6_input                tcp6_input_wrapper
#define        dccp6_input                dccp6_input_wrapper
#define        sctp6_input                sctp6_input_wrapper
#define        rip6_input                rip6_input_wrapper
#define        dest6_input                dest6_input_wrapper
#define        route6_input                route6_input_wrapper
#define        frag6_input                frag6_input_wrapper
#define        pim6_input                pim6_input_wrapper
#endif

#if defined(IPSEC)

#ifdef IPSEC_RUMPKERNEL
/*
 * .pr_input = ipsec6_common_input won't be resolved on loading
 * the ipsec shared library. We need a wrapper anyway.
 */
static int
ipsec6_common_input_wrapper(struct mbuf **mp, int *offp, int proto)
{

        if (ipsec_enabled) {
                return ipsec6_common_input(mp, offp, proto);
        } else {
                m_freem(*mp);
                return IPPROTO_DONE;
        }
}
#define        ipsec6_common_input        ipsec6_common_input_wrapper

/* The ctlinput functions may not be loaded */
#define        IPSEC_WRAP_CTLINPUT(name)                        \
static void *                                                \
name##_wrapper(int a, const struct sockaddr *b, void *c)\
{                                                        \
        void *rv;                                        \
        KERNEL_LOCK(1, NULL);                                \
        if (ipsec_enabled)                                \
                rv = name(a, b, c);                        \
        else                                                \
                rv = NULL;                                \
        KERNEL_UNLOCK_ONE(NULL);                        \
        return rv;                                        \
}
IPSEC_WRAP_CTLINPUT(ah6_ctlinput)
IPSEC_WRAP_CTLINPUT(esp6_ctlinput)

#else /* !IPSEC_RUMPKERNEL */

PR_WRAP_CTLINPUT(ah6_ctlinput)
PR_WRAP_CTLINPUT(esp6_ctlinput)

#endif /* !IPSEC_RUMPKERNEL */

#define        ah6_ctlinput        ah6_ctlinput_wrapper
#define        esp6_ctlinput        esp6_ctlinput_wrapper

#endif /* IPSEC */

static void
tcp6_init(void)
{

        icmp6_mtudisc_callback_register(tcp6_mtudisc_callback);

        tcp_init_common(sizeof(struct ip6_hdr));
}

const struct ip6protosw inet6sw[] = {
{        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_IPV6,
        .pr_init = ip6_init,
        .pr_fasttimo = frag6_fasttimo,
        .pr_slowtimo = frag6_slowtimo,
        .pr_drain = frag6_drainstub,
},
{        .pr_type = SOCK_RAW,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_ICMPV6,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
        .pr_input = icmp6_input,
        .pr_ctlinput = rip6_ctlinput,
        .pr_ctloutput = icmp6_ctloutput,
        .pr_usrreqs = &rip6_usrreqs,
        .pr_init = icmp6_init,
},
{        .pr_type = SOCK_DGRAM,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_UDP,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_PURGEIF,
        .pr_input = udp6_input,
        .pr_ctlinput = udp6_ctlinput,
        .pr_ctloutput = udp6_ctloutput,
        .pr_usrreqs = &udp6_usrreqs,
        .pr_init = udp6_init,
},
{        .pr_type = SOCK_STREAM,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_TCP,
        .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS|PR_PURGEIF,
        .pr_input = tcp6_input,
        .pr_ctlinput = tcp6_ctlinput,
        .pr_ctloutput = tcp_ctloutput,
        .pr_usrreqs = &tcp_usrreqs,
        .pr_init = tcp6_init,
        .pr_fasttimo = tcp_fasttimo,
        .pr_drain = tcp_drainstub,
},
#ifdef DCCP
{        .pr_type = SOCK_CONN_DGRAM,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_DCCP,
        .pr_flags = PR_CONNREQUIRED|PR_ATOMIC|PR_LISTEN,
        .pr_input = dccp6_input,
        .pr_ctlinput = dccp6_ctlinput,
        .pr_ctloutput = dccp_ctloutput,
        .pr_usrreqs = &dccp6_usrreqs,
#ifndef INET
        .pr_init = dccp_init,
#endif
},
#endif /* DCCP */
#ifdef SCTP
{        .pr_type = SOCK_DGRAM,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_SCTP,
        .pr_flags = PR_ADDR_OPT|PR_WANTRCVD,
        .pr_input = sctp6_input,
        .pr_ctlinput = sctp6_ctlinput,
        .pr_ctloutput = sctp_ctloutput,
        .pr_usrreqs = &sctp6_usrreqs,
        .pr_drain = sctp_drain,
},
{        .pr_type = SOCK_SEQPACKET,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_SCTP,
        .pr_flags = PR_ADDR_OPT|PR_WANTRCVD,
        .pr_input = sctp6_input,
        .pr_ctlinput = sctp6_ctlinput,
        .pr_ctloutput = sctp_ctloutput,
        .pr_usrreqs = &sctp6_usrreqs,
        .pr_drain = sctp_drain,
},
{        .pr_type = SOCK_STREAM,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_SCTP,
        .pr_flags = PR_CONNREQUIRED|PR_ADDR_OPT|PR_WANTRCVD|PR_LISTEN,
        .pr_input = sctp6_input,
        .pr_ctlinput = sctp6_ctlinput,
        .pr_ctloutput = sctp_ctloutput,
        .pr_usrreqs = &sctp6_usrreqs,
        .pr_drain = sctp_drain,
},
#endif /* SCTP */
{        .pr_type = SOCK_RAW,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_RAW,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_PURGEIF,
        .pr_input = rip6_input,
        .pr_ctlinput = rip6_ctlinput,
        .pr_ctloutput = rip6_ctloutput,
        .pr_usrreqs = &rip6_usrreqs,
},
#ifdef GATEWAY
{        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_IPV6,
        .pr_slowtimo = ip6flow_slowtimo,
        .pr_init = ip6flow_poolinit,
},
#endif /* GATEWAY */
{        .pr_type = SOCK_RAW,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_DSTOPTS,
        .pr_flags = PR_ATOMIC|PR_ADDR,
        .pr_input = dest6_input,
},
{        .pr_type = SOCK_RAW,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_ROUTING,
        .pr_flags = PR_ATOMIC|PR_ADDR,
        .pr_input = route6_input,
},
{        .pr_type = SOCK_RAW,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_FRAGMENT,
        .pr_flags = PR_ATOMIC|PR_ADDR,
        .pr_input = frag6_input,
},
#ifdef IPSEC
{        .pr_type = SOCK_RAW,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_AH,
        .pr_flags = PR_ATOMIC|PR_ADDR,
        .pr_input = ipsec6_common_input,
        .pr_ctlinput = ah6_ctlinput,
},
{        .pr_type = SOCK_RAW,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_ESP,
        .pr_flags = PR_ATOMIC|PR_ADDR,
        .pr_input = ipsec6_common_input,
        .pr_ctlinput = esp6_ctlinput,
},
{        .pr_type = SOCK_RAW,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_IPCOMP,
        .pr_flags = PR_ATOMIC|PR_ADDR,
        .pr_input = ipsec6_common_input,
},
#endif /* IPSEC */
#ifdef INET
{        .pr_type = SOCK_RAW,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_IPV4,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
        .pr_input = encap6_input,
        .pr_ctlinput = encap6_ctlinput,
        .pr_ctloutput = rip6_ctloutput,
        .pr_usrreqs = &rip6_usrreqs,
        .pr_init = encap_init,
},
#endif
{        .pr_type = SOCK_RAW,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_IPV6,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
        .pr_input = encap6_input,
        .pr_ctlinput = encap6_ctlinput,
        .pr_ctloutput = rip6_ctloutput,
        .pr_usrreqs = &rip6_usrreqs,
        .pr_init = encap_init,
},
#if NCARP > 0
{        .pr_type = SOCK_RAW,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_CARP,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
        .pr_input = carp6_proto_input,
        .pr_ctloutput = rip6_ctloutput,
        .pr_usrreqs = &rip6_usrreqs,
},
#endif /* NCARP */
{        .pr_type = SOCK_RAW,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_L2TP,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
        .pr_input = encap6_input,
        .pr_ctlinput = rip6_ctlinput,
        .pr_ctloutput = rip6_ctloutput,
        .pr_usrreqs = &rip6_usrreqs,
        .pr_init = encap_init,
},
{        .pr_type = SOCK_RAW,
        .pr_domain = &inet6domain,
        .pr_protocol = IPPROTO_PIM,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
        .pr_input = pim6_input,
        .pr_ctloutput = rip6_ctloutput,
        .pr_usrreqs = &rip6_usrreqs,
        .pr_init = pim6_init,
},
/* raw wildcard */
{        .pr_type = SOCK_RAW,
        .pr_domain = &inet6domain,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
        .pr_input = rip6_input,
        .pr_ctloutput = rip6_ctloutput,
        .pr_usrreqs = &rip6_usrreqs,
        .pr_init = rip6_init,
},
};

static const struct sockaddr_in6 in6_any = {
          .sin6_len = sizeof(in6_any)
        , .sin6_family = AF_INET6
        , .sin6_port = 0
        , .sin6_flowinfo = 0
        , .sin6_addr = IN6ADDR_ANY_INIT
        , .sin6_scope_id = 0
};

bool in6_present = false;
static void
in6_dom_init(void)
{

        in6_present = true;
}

struct domain inet6domain = {
        .dom_family = AF_INET6, .dom_name = "internet6",
        .dom_init = in6_dom_init, .dom_externalize = NULL, .dom_dispose = NULL,
        .dom_protosw = (const struct protosw *)inet6sw,
        .dom_protoswNPROTOSW = (const struct protosw *)&inet6sw[sizeof(inet6sw)/sizeof(inet6sw[0])],
        .dom_rtattach = rt_inithead,
        .dom_rtoffset = offsetof(struct sockaddr_in6, sin6_addr) << 3,
        .dom_maxrtkey = sizeof(struct ip_pack6),
        .dom_if_up = in6_if_up, .dom_if_down = in6_if_down,
        .dom_ifattach = in6_domifattach, .dom_ifdetach = in6_domifdetach,
        .dom_if_link_state_change = in6_if_link_state_change,
        .dom_ifqueues = { NULL, NULL },
        .dom_link = { NULL },
        .dom_mowner = MOWNER_INIT("",""),
        .dom_sa_cmpofs = offsetof(struct sockaddr_in6, sin6_addr),
        .dom_sa_cmplen = sizeof(struct in6_addr),
        .dom_sa_any = (const struct sockaddr *)&in6_any,
        .dom_sockaddr_externalize = sockaddr_in6_externalize,
};

#if 0
int
sockaddr_in6_cmp(const struct sockaddr *lsa, const struct sockaddr *rsa)
{
        uint_fast8_t len;
        const uint_fast8_t addrofs = offsetof(struct sockaddr_in6, sin6_addr),
                           addrend = addrofs + sizeof(struct in6_addr);
        int rc;
        const struct sockaddr_in6 *lsin6, *rsin6;

        lsin6 = satocsin6(lsa);
        rsin6 = satocsin6(rsa);

        len = MIN(addrend, MIN(lsin6->sin6_len, rsin6->sin6_len));

        if (len > addrofs &&
            (rc = memcmp(&lsin6->sin6_addr, &rsin6->sin6_addr,
                          len - addrofs)) != 0)
                return rc;

        return lsin6->sin6_len - rsin6->sin6_len;
}
#endif

/*
 * Internet configuration info
 */
#ifdef GATEWAY6
#define IPV6FORWARDING        1        /* forward IP6 packets not for us */
#else
#define IPV6FORWARDING        0        /* don't forward IP6 packets not for us */
#endif

int ip6_forwarding = IPV6FORWARDING;        /* act as router? */
int ip6_sendredirects = 1;
int ip6_defhlim = IPV6_DEFHLIM;
int ip6_defmcasthlim = IPV6_DEFAULT_MULTICAST_HOPS;
int ip6_maxfragpackets = 200;
int ip6_maxfrags = 200;
int ip6_log_interval = 5;
int ip6_hdrnestlimit = 15;
int ip6_dad_count = 1;        /* DupAddrDetectionTransmits */
int ip6_auto_flowlabel = 1;
int ip6_use_deprecated = 1;        /* allow deprecated addr (RFC2462 5.5.4) */
int ip6_mcast_pmtu = 0;        /* enable pMTU discovery for multicast? */
int ip6_v6only = 1;
int ip6_neighborgcthresh = 2048; /* Threshold # of NDP entries for GC */
int ip6_maxdynroutes = 4096; /* Max # of routes created via redirect */

int ip6_keepfaith = 0;
time_t ip6_log_time = 0;

/* icmp6 */
int pmtu_expire = 60*10;

/* raw IP6 parameters */
/*
 * Nominal space allocated to a raw ip socket.
 */
#define        RIPV6SNDQ        8192
#define        RIPV6RCVQ        16384

u_long        rip6_sendspace = RIPV6SNDQ;
u_long        rip6_recvspace = RIPV6RCVQ;

/* ICMPV6 parameters */
int        icmp6_rediraccept = 1;                /* accept and process redirects */
int        icmp6_redirtimeout = 10 * 60;        /* 10 minutes */
int        icmp6errppslim = 100;                /* 100pps */
int        icmp6_nodeinfo = 1;                /* enable/disable NI response */









































































































































































































































































  590 
















  209 































   16 





   16 




















 1605 



 1606 






































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
/*        $NetBSD: subr_specificdata.c,v 1.14 2017/06/01 02:45:13 chs Exp $        */

/*-
 * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 2006 YAMAMOTO Takashi.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_specificdata.c,v 1.14 2017/06/01 02:45:13 chs Exp $");

#include <sys/param.h>
#include <sys/kmem.h>
#include <sys/specificdata.h>
#include <sys/queue.h>
#include <sys/mutex.h>

/*
 * Locking notes:
 *
 * The specdataref_container pointer in the specificdata_reference
 * is volatile.  To read it, you must hold EITHER the domain lock
 * or the ref lock.  To write it, you must hold BOTH the domain lock
 * and the ref lock.  The locks must be acquired in the following
 * order:
 *        domain -> ref
 */

typedef struct {
        specificdata_dtor_t        ski_dtor;
} specificdata_key_impl;

struct specificdata_container {
        size_t                sc_nkey;
        LIST_ENTRY(specificdata_container) sc_list;
        void *                sc_data[];        /* variable length */
};

#define        SPECIFICDATA_CONTAINER_BYTESIZE(n)                \
        (sizeof(struct specificdata_container) + ((n) * sizeof(void *)))

struct specificdata_domain {
        kmutex_t        sd_lock;
        unsigned int        sd_nkey;
        LIST_HEAD(, specificdata_container) sd_list;
        specificdata_key_impl *sd_keys;
};

static void
specificdata_container_link(specificdata_domain_t sd,
                            specificdata_container_t sc)
{

        LIST_INSERT_HEAD(&sd->sd_list, sc, sc_list);
}

static void
specificdata_container_unlink(specificdata_domain_t sd,
                              specificdata_container_t sc)
{

        LIST_REMOVE(sc, sc_list);
}

static void
specificdata_destroy_datum(specificdata_domain_t sd,
                           specificdata_container_t sc, specificdata_key_t key)
{
        specificdata_dtor_t dtor;
        void *data;

        if (key >= sc->sc_nkey)
                return;

        KASSERT(key < sd->sd_nkey);
        
        data = sc->sc_data[key];
        dtor = sd->sd_keys[key].ski_dtor;

        if (dtor != NULL) {
                if (data != NULL) {
                        sc->sc_data[key] = NULL;
                        (*dtor)(data);
                }
        } else {
                KASSERT(data == NULL);
        }
}

static void
specificdata_noop_dtor(void *data)
{

        /* nothing */
}

/*
 * specificdata_domain_create --
 *        Create a specificdata domain.
 */
specificdata_domain_t
specificdata_domain_create(void)
{
        specificdata_domain_t sd;

        sd = kmem_zalloc(sizeof(*sd), KM_SLEEP);
        mutex_init(&sd->sd_lock, MUTEX_DEFAULT, IPL_NONE);
        LIST_INIT(&sd->sd_list);

        return (sd);
}

/*
 * specificdata_domain_delete --
 *        Destroy a specificdata domain.
 */
void
specificdata_domain_delete(specificdata_domain_t sd)
{

        panic("specificdata_domain_delete: not implemented");
}

/*
 * specificdata_key_create --
 *        Create a specificdata key for a domain.
 *
 *        Note: This is a rare operation.
 */
int
specificdata_key_create(specificdata_domain_t sd, specificdata_key_t *keyp,
                        specificdata_dtor_t dtor)
{
        specificdata_key_impl *newkeys;
        specificdata_key_t key = 0;
        size_t nsz;

        ASSERT_SLEEPABLE();

        if (dtor == NULL)
                dtor = specificdata_noop_dtor;
        
        mutex_enter(&sd->sd_lock);

        if (sd->sd_keys == NULL)
                goto needalloc;

        for (; key < sd->sd_nkey; key++) {
                if (sd->sd_keys[key].ski_dtor == NULL)
                        goto gotit;
        }

 needalloc:
        nsz = (sd->sd_nkey + 1) * sizeof(*newkeys);
        /* XXXSMP allocating memory while holding a lock. */
        newkeys = kmem_zalloc(nsz, KM_SLEEP);
        if (sd->sd_keys != NULL) {
                size_t osz = sd->sd_nkey * sizeof(*newkeys);
                memcpy(newkeys, sd->sd_keys, osz);
                kmem_free(sd->sd_keys, osz);
        }
        sd->sd_keys = newkeys;
        sd->sd_nkey++;
 gotit:
        sd->sd_keys[key].ski_dtor = dtor;

        mutex_exit(&sd->sd_lock);

        *keyp = key;
        return (0);
}

/*
 * specificdata_key_delete --
 *        Destroy a specificdata key for a domain.
 *
 *        Note: This is a rare operation.
 */
void
specificdata_key_delete(specificdata_domain_t sd, specificdata_key_t key)
{
        specificdata_container_t sc;

        mutex_enter(&sd->sd_lock);

        if (key >= sd->sd_nkey)
                goto out;

        /*
         * Traverse all of the specificdata containers in the domain
         * and the destroy the datum for the dying key.
         */
        LIST_FOREACH(sc, &sd->sd_list, sc_list) {
                specificdata_destroy_datum(sd, sc, key);
        }

        sd->sd_keys[key].ski_dtor = NULL;

 out:
        mutex_exit(&sd->sd_lock);
}

/*
 * specificdata_init --
 *        Initialize a specificdata container for operation in the
 *        specified domain.
 */
int
specificdata_init(specificdata_domain_t sd, specificdata_reference *ref)
{

        /*
         * Just NULL-out the container pointer; we'll allocate the
         * container the first time specificdata is put into it.
         */
        ref->specdataref_container = NULL;
        mutex_init(&ref->specdataref_lock, MUTEX_DEFAULT, IPL_NONE);

        return (0);
}

/*
 * specificdata_fini --
 *        Destroy a specificdata container.  We destroy all of the datums
 *        stuffed into the container just as if the key were destroyed.
 */
void
specificdata_fini(specificdata_domain_t sd, specificdata_reference *ref)
{
        specificdata_container_t sc;
        specificdata_key_t key;

        ASSERT_SLEEPABLE();

        mutex_destroy(&ref->specdataref_lock);

        sc = ref->specdataref_container;
        if (sc == NULL)
                return;
        ref->specdataref_container = NULL;
        
        mutex_enter(&sd->sd_lock);

        specificdata_container_unlink(sd, sc);
        for (key = 0; key < sc->sc_nkey; key++) {
                specificdata_destroy_datum(sd, sc, key);
        }

        mutex_exit(&sd->sd_lock);

        kmem_free(sc, SPECIFICDATA_CONTAINER_BYTESIZE(sc->sc_nkey));
}

/*
 * specificdata_getspecific --
 *        Get a datum from a container.
 */
void *
specificdata_getspecific(specificdata_domain_t sd, specificdata_reference *ref,
                         specificdata_key_t key)
{
        specificdata_container_t sc;
        void *data = NULL;

        mutex_enter(&ref->specdataref_lock);

        sc = ref->specdataref_container;
        if (sc != NULL && key < sc->sc_nkey)
                data = sc->sc_data[key];

        mutex_exit(&ref->specdataref_lock);

        return (data);
}

/*
 * specificdata_getspecific_unlocked --
 *        Get a datum from a container in a lockless fashion.
 *
 *        Note: When using this routine, care must be taken to ensure
 *        that no other thread could cause the specificdata_reference
 *        to become invalid (i.e. point at the wrong container) by
 *        issuing a setspecific call or destroying the container.
 */
void *
specificdata_getspecific_unlocked(specificdata_domain_t sd,
                                  specificdata_reference *ref,
                                  specificdata_key_t key)
{
        specificdata_container_t sc;
        
        sc = ref->specdataref_container;
        if (sc != NULL && key < sc->sc_nkey)
                return (sc->sc_data[key]);

        return (NULL);
}

/*
 * specificdata_setspecific --
 *      Put a datum into a container.
 */
void
specificdata_setspecific(specificdata_domain_t sd,
                         specificdata_reference *ref,
                         specificdata_key_t key, void *data)
{
        specificdata_container_t sc, newsc;
        size_t newnkey, sz;

        ASSERT_SLEEPABLE();

        mutex_enter(&ref->specdataref_lock);

        sc = ref->specdataref_container;
        if (__predict_true(sc != NULL && key < sc->sc_nkey)) {
                sc->sc_data[key] = data;
                mutex_exit(&ref->specdataref_lock);
                return;
        }

        mutex_exit(&ref->specdataref_lock);

        /*
         * Slow path: need to resize.
         */
        
        mutex_enter(&sd->sd_lock);
        newnkey = sd->sd_nkey;
        if (key >= newnkey) {
                mutex_exit(&sd->sd_lock);
                panic("specificdata_setspecific");
        }
        sz = SPECIFICDATA_CONTAINER_BYTESIZE(newnkey);
        newsc = kmem_zalloc(sz, KM_SLEEP);
        newsc->sc_nkey = newnkey;

        mutex_enter(&ref->specdataref_lock);

        sc = ref->specdataref_container;
        if (sc != NULL) {
                if (key < sc->sc_nkey) {
                        /*
                         * Someone beat us to the punch.  Unwind and put
                         * the object into the now large enough container.
                         */
                        sc->sc_data[key] = data;
                        mutex_exit(&ref->specdataref_lock);
                        mutex_exit(&sd->sd_lock);
                        kmem_free(newsc, sz);
                        return;
                }
                specificdata_container_unlink(sd, sc);
                memcpy(newsc->sc_data, sc->sc_data,
                       sc->sc_nkey * sizeof(void *));
        }
        newsc->sc_data[key] = data;
        specificdata_container_link(sd, newsc);
        ref->specdataref_container = newsc;

        mutex_exit(&ref->specdataref_lock);
        mutex_exit(&sd->sd_lock);

        if (sc != NULL)
                kmem_free(sc, SPECIFICDATA_CONTAINER_BYTESIZE(sc->sc_nkey));
}







































   25 



   23 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
/*        $NetBSD: uipc_syscalls_30.c,v 1.4 2019/01/27 02:08:39 pgoyette Exp $        */

/* written by Pavel Cahyna, 2006. Public domain. */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uipc_syscalls_30.c,v 1.4 2019/01/27 02:08:39 pgoyette Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

/*
 * System call interface to the socket abstraction.
 */

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/msg.h>
#include <sys/sysctl.h>
#include <sys/mount.h>
#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>
#include <sys/errno.h>

#include <compat/common/compat_mod.h>

static const struct syscall_package uipc_syscalls_30_syscalls[] = {
        { SYS_compat_30_socket, 0, (sy_call_t *)compat_30_sys_socket },
        { 0, 0, NULL}
};

int
compat_30_sys_socket(struct lwp *l,
    const struct compat_30_sys_socket_args *uap, register_t *retval)
{
        int        error;

        error = sys___socket30(l, (const void *)uap, retval);
        if (error == EAFNOSUPPORT)
                error = EPROTONOSUPPORT;

        return (error);
}

int
uipc_syscalls_30_init(void)
{

        return syscall_establish(NULL, uipc_syscalls_30_syscalls);
}

int
uipc_syscalls_30_fini(void)
{

        return syscall_disestablish(NULL, uipc_syscalls_30_syscalls);
}



























































































































































































































































































































































































































































































































































































































































































   36 
   12 
   36 


   36 



   11 








   13 








   14 
   15 
   33 










   36 









   31 




   31 

   36 
   36 



   33 












   12 


   12 






    7 
    3 

   10 
    3 














































































   34 







   10 
    5 

    4 
    3 


    2 




   20 
























   31 


   31 




































   31 

   10 







   31 
   33 













































































   36 




   35 










   35 
   21 









   35 
   34 



   31 








   31 
   27 
   21 
    6 
    1 






   28 
   28 

   22 












   23 
    1 













   23 
   23 
   15 
   14 

   14 
   14 


   14 







   17 
   16 











   30 
    7 



   20 
   16 







   13 

















   13 






   13 
   13 
   13 
   13 









   12 















   13 












    4 


   13 









   13 

   13 

















































   13 
   13 

















   13 





    2 
    2 







    2 


    2 











    2 

    2 


    2 




    2 




    2 
    2 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
/*        $NetBSD: kern_exit.c,v 1.293 2021/12/05 08:13:12 msaitoh Exp $        */

/*-
 * Copyright (c) 1998, 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_exit.c        8.10 (Berkeley) 2/23/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.293 2021/12/05 08:13:12 msaitoh Exp $");

#include "opt_ktrace.h"
#include "opt_dtrace.h"
#include "opt_sysv.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/ioctl.h>
#include <sys/tty.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/wait.h>
#include <sys/file.h>
#include <sys/fstrans.h>
#include <sys/vnode.h>
#include <sys/syslog.h>
#include <sys/pool.h>
#include <sys/uidinfo.h>
#include <sys/ptrace.h>
#include <sys/acct.h>
#include <sys/filedesc.h>
#include <sys/ras.h>
#include <sys/signalvar.h>
#include <sys/sched.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
#include <sys/kauth.h>
#include <sys/sleepq.h>
#include <sys/lock.h>
#include <sys/lockdebug.h>
#include <sys/ktrace.h>
#include <sys/cpu.h>
#include <sys/lwpctl.h>
#include <sys/atomic.h>
#include <sys/sdt.h>
#include <sys/psref.h>

#include <uvm/uvm_extern.h>

#ifdef DEBUG_EXIT
int debug_exit = 0;
#define DPRINTF(x) if (debug_exit) printf x
#else
#define DPRINTF(x)
#endif

static int find_stopped_child(struct proc *, idtype_t, id_t, int,
    struct proc **, struct wrusage *, siginfo_t *);
static void proc_free(struct proc *, struct wrusage *);

/*
 * DTrace SDT provider definitions
 */
SDT_PROVIDER_DECLARE(proc);
SDT_PROBE_DEFINE1(proc, kernel, , exit, "int");

/*
 * Fill in the appropriate signal information, and signal the parent.
 */
/* XXX noclone works around a gcc 4.5 bug on arm */
static void __noclone
exit_psignal(struct proc *p, struct proc *pp, ksiginfo_t *ksi)
{

        KSI_INIT(ksi);
        if ((ksi->ksi_signo = P_EXITSIG(p)) == SIGCHLD) {
                if (p->p_xsig) {
                        if (p->p_sflag & PS_COREDUMP)
                                ksi->ksi_code = CLD_DUMPED;
                        else
                                ksi->ksi_code = CLD_KILLED;
                        ksi->ksi_status = p->p_xsig;
                } else {
                        ksi->ksi_code = CLD_EXITED;
                        ksi->ksi_status = p->p_xexit;
                }
        } else {
                ksi->ksi_code = SI_USER;
                ksi->ksi_status = p->p_xsig;
        }
        /*
         * We fill those in, even for non-SIGCHLD.
         * It's safe to access p->p_cred unlocked here.
         */
        ksi->ksi_pid = p->p_pid;
        ksi->ksi_uid = kauth_cred_geteuid(p->p_cred);
        /* XXX: is this still valid? */
        ksi->ksi_utime = p->p_stats->p_ru.ru_utime.tv_sec;
        ksi->ksi_stime = p->p_stats->p_ru.ru_stime.tv_sec;
}

/*
 * exit --
 *        Death of process.
 */
int
sys_exit(struct lwp *l, const struct sys_exit_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)        rval;
        } */
        struct proc *p = l->l_proc;

        /* Don't call exit1() multiple times in the same process. */
        mutex_enter(p->p_lock);
        if (p->p_sflag & PS_WEXIT) {
                mutex_exit(p->p_lock);
                lwp_exit(l);
        }

        /* exit1() will release the mutex. */
        exit1(l, SCARG(uap, rval), 0);
        /* NOTREACHED */
        return (0);
}

/*
 * Exit: deallocate address space and other resources, change proc state
 * to zombie, and unlink proc from allproc and parent's lists.  Save exit
 * status and rusage for wait().  Check for child processes and orphan them.
 *
 * Must be called with p->p_lock held.  Does not return.
 */
void
exit1(struct lwp *l, int exitcode, int signo)
{
        struct proc        *p, *child, *next_child, *old_parent, *new_parent;
        struct pgrp        *pgrp;
        ksiginfo_t        ksi;
        ksiginfoq_t        kq;
        int                wakeinit;

        p = l->l_proc;

        /* Verify that we hold no locks other than p->p_lock. */
        LOCKDEBUG_BARRIER(p->p_lock, 0);

        /* XXX Temporary: something is leaking kernel_lock. */
        KERNEL_UNLOCK_ALL(l, NULL);

        KASSERT(mutex_owned(p->p_lock));
        KASSERT(p->p_vmspace != NULL);

        if (__predict_false(p == initproc)) {
                panic("init died (signal %d, exit %d)", signo, exitcode);
        }

        p->p_sflag |= PS_WEXIT;

        /*
         * Force all other LWPs to exit before we do.  Only then can we
         * begin to tear down the rest of the process state.
         */
        if (p->p_nlwps > 1) {
                exit_lwps(l);
        }

        ksiginfo_queue_init(&kq);

        /*
         * If we have been asked to stop on exit, do so now.
         */
        if (__predict_false(p->p_sflag & PS_STOPEXIT)) {
                KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
                sigclearall(p, &contsigmask, &kq);

                if (!mutex_tryenter(&proc_lock)) {
                        mutex_exit(p->p_lock);
                        mutex_enter(&proc_lock);
                        mutex_enter(p->p_lock);
                }
                p->p_waited = 0;
                p->p_pptr->p_nstopchild++;
                p->p_stat = SSTOP;
                mutex_exit(&proc_lock);
                lwp_lock(l);
                p->p_nrlwps--;
                l->l_stat = LSSTOP;
                lwp_unlock(l);
                mutex_exit(p->p_lock);
                lwp_lock(l);
                spc_lock(l->l_cpu);
                mi_switch(l);
                mutex_enter(p->p_lock);
        }

        /*
         * Bin any remaining signals and mark the process as dying so it will
         * not be found for, e.g. signals.
         */
        sigfillset(&p->p_sigctx.ps_sigignore);
        sigclearall(p, NULL, &kq);
        p->p_stat = SDYING;

        /*
         * Perform any required thread cleanup.  Do this early so
         * anyone wanting to look us up by our global thread ID
         * will fail to find us.
         *
         * N.B. this will unlock p->p_lock on our behalf.
         */
        lwp_thread_cleanup(l);

        ksiginfo_queue_drain(&kq);

        /* Destroy any lwpctl info. */
        if (p->p_lwpctl != NULL)
                lwp_ctl_exit();

        /*
         * Drain all remaining references that procfs, ptrace and others may
         * have on the process.
         */
        rw_enter(&p->p_reflock, RW_WRITER);

        DPRINTF(("%s: %d.%d exiting.\n", __func__, p->p_pid, l->l_lid));

        ptimers_free(p, TIMERS_ALL);
#if defined(__HAVE_RAS)
        ras_purgeall();
#endif

        /*
         * Close open files, release open-file table and free signal
         * actions.  This may block!
         */
        fd_free();
        cwdfree(p->p_cwdi);
        p->p_cwdi = NULL;
        doexithooks(p);
        sigactsfree(p->p_sigacts);

        /*
         * Write out accounting data.
         */
        (void)acct_process(l);

#ifdef KTRACE
        /*
         * Release trace file.
         */
        if (p->p_tracep != NULL) {
                mutex_enter(&ktrace_lock);
                ktrderef(p);
                mutex_exit(&ktrace_lock);
        }
#endif

        p->p_xexit = exitcode;
        p->p_xsig = signo;

        /*
         * If emulation has process exit hook, call it now.
         * Set the exit status now so that the exit hook has
         * an opportunity to tweak it (COMPAT_LINUX requires
         * this for thread group emulation)
         */
        if (p->p_emul->e_proc_exit)
                (*p->p_emul->e_proc_exit)(p);

        /*
         * Free the VM resources we're still holding on to.
         * We must do this from a valid thread because doing
         * so may block. This frees vmspace, which we don't
         * need anymore. The only remaining lwp is the one
         * we run at this moment, nothing runs in userland
         * anymore.
         */
        ruspace(p);        /* Update our vm resource use */
        uvm_proc_exit(p);

        /*
         * Stop profiling.
         */
        if (__predict_false((p->p_stflag & PST_PROFIL) != 0)) {
                mutex_spin_enter(&p->p_stmutex);
                stopprofclock(p);
                mutex_spin_exit(&p->p_stmutex);
        }

        /*
         * If parent is waiting for us to exit or exec, PL_PPWAIT is set; we
         * wake up the parent early to avoid deadlock.  We can do this once
         * the VM resources are released.
         */
        mutex_enter(&proc_lock);
        if (p->p_lflag & PL_PPWAIT) {
                lwp_t *lp;

                l->l_lwpctl = NULL; /* was on loan from blocked parent */
                p->p_lflag &= ~PL_PPWAIT;

                lp = p->p_vforklwp;
                p->p_vforklwp = NULL;
                lp->l_vforkwaiting = false;
                cv_broadcast(&lp->l_waitcv);
        }

        if (SESS_LEADER(p)) {
                struct vnode *vprele = NULL, *vprevoke = NULL;
                struct session *sp = p->p_session;
                struct tty *tp;

                if (sp->s_ttyvp) {
                        /*
                         * Controlling process.
                         * Signal foreground pgrp,
                         * drain controlling terminal
                         * and revoke access to controlling terminal.
                         */
                        tp = sp->s_ttyp;
                        mutex_spin_enter(&tty_lock);
                        if (tp->t_session == sp) {
                                /* we can't guarantee the revoke will do this */
                                pgrp = tp->t_pgrp;
                                tp->t_pgrp = NULL;
                                tp->t_session = NULL;
                                mutex_spin_exit(&tty_lock);
                                if (pgrp != NULL) {
                                        pgsignal(pgrp, SIGHUP, 1);
                                }
                                mutex_exit(&proc_lock);
                                (void) ttywait(tp);
                                mutex_enter(&proc_lock);

                                /* The tty could have been revoked. */
                                vprevoke = sp->s_ttyvp;
                        } else
                                mutex_spin_exit(&tty_lock);
                        vprele = sp->s_ttyvp;
                        sp->s_ttyvp = NULL;
                        /*
                         * s_ttyp is not zero'd; we use this to indicate
                         * that the session once had a controlling terminal.
                         * (for logging and informational purposes)
                         */
                }
                sp->s_leader = NULL;

                if (vprevoke != NULL || vprele != NULL) {
                        if (vprevoke != NULL) {
                                /* Releases proc_lock. */
                                proc_sessrele(sp);
                                VOP_REVOKE(vprevoke, REVOKEALL);
                        } else
                                mutex_exit(&proc_lock);
                        if (vprele != NULL)
                                vrele(vprele);
                        mutex_enter(&proc_lock);
                }
        }
        fixjobc(p, p->p_pgrp, 0);

        /* Release fstrans private data. */
        fstrans_lwp_dtor(l);

        /*
         * Finalize the last LWP's specificdata, as well as the
         * specificdata for the proc itself.
         */
        lwp_finispecific(l);
        proc_finispecific(p);

        /*
         * Reset p_opptr pointer of all former children which got
         * traced by another process and were reparented. We reset
         * it to NULL here; the trace detach code then reparents
         * the child to initproc. We only check allproc list, since
         * eventual former children on zombproc list won't reference
         * p_opptr anymore.
         */
        if (__predict_false(p->p_slflag & PSL_CHTRACED)) {
                struct proc *q;
                PROCLIST_FOREACH(q, &allproc) {
                        if (q->p_opptr == p)
                                q->p_opptr = NULL;
                }
                PROCLIST_FOREACH(q, &zombproc) {
                        if (q->p_opptr == p)
                                q->p_opptr = NULL;
                }
        }

        /*
         * Give orphaned children to init(8).
         */
        child = LIST_FIRST(&p->p_children);
        wakeinit = (child != NULL);
        for (; child != NULL; child = next_child) {
                next_child = LIST_NEXT(child, p_sibling);

                /*
                 * Traced processes are killed since their existence
                 * means someone is screwing up. Since we reset the
                 * trace flags, the logic in sys_wait4() would not be
                 * triggered to reparent the process to its
                 * original parent, so we must do this here.
                 */
                if (__predict_false(child->p_slflag & PSL_TRACED)) {
                        mutex_enter(p->p_lock);
                        child->p_slflag &=
                            ~(PSL_TRACED|PSL_SYSCALL);
                        mutex_exit(p->p_lock);
                        if (child->p_opptr != child->p_pptr) {
                                struct proc *t = child->p_opptr;
                                proc_reparent(child, t ? t : initproc);
                                child->p_opptr = NULL;
                        } else
                                proc_reparent(child, initproc);
                        killproc(child, "orphaned traced process");
                } else
                        proc_reparent(child, initproc);
        }

        /*
         * Move proc from allproc to zombproc, it's now nearly ready to be
         * collected by parent.
         */
        LIST_REMOVE(l, l_list);
        LIST_REMOVE(p, p_list);
        LIST_INSERT_HEAD(&zombproc, p, p_list);

        /*
         * Mark the process as dead.  We must do this before we signal
         * the parent.
         */
        p->p_stat = SDEAD;

        /*
         * Let anyone watching this DTrace probe know what we're
         * on our way out.
         */
        SDT_PROBE(proc, kernel, , exit,
                ((p->p_sflag & PS_COREDUMP) ? CLD_DUMPED :
                 (p->p_xsig ? CLD_KILLED : CLD_EXITED)),
                0,0,0,0);

        /* Put in front of parent's sibling list for parent to collect it */
        old_parent = p->p_pptr;
        old_parent->p_nstopchild++;
        if (LIST_FIRST(&old_parent->p_children) != p) {
                /* Put child where it can be found quickly */
                LIST_REMOVE(p, p_sibling);
                LIST_INSERT_HEAD(&old_parent->p_children, p, p_sibling);
        }

        /*
         * Notify parent that we're gone.  If parent has the P_NOCLDWAIT
         * flag set, notify init instead (and hope it will handle
         * this situation).
         */
        if (old_parent->p_flag & (PK_NOCLDWAIT|PK_CLDSIGIGN)) {
                proc_reparent(p, initproc);
                wakeinit = 1;

                /*
                 * If this was the last child of our parent, notify
                 * parent, so in case he was wait(2)ing, he will
                 * continue.
                 */
                if (LIST_FIRST(&old_parent->p_children) == NULL)
                        cv_broadcast(&old_parent->p_waitcv);
        }

        /* Reload parent pointer, since p may have been reparented above */
        new_parent = p->p_pptr;

        if (__predict_false(p->p_exitsig != 0)) {
                exit_psignal(p, new_parent, &ksi);
                kpsignal(new_parent, &ksi, NULL);
        }

        /* Calculate the final rusage info.  */
        calcru(p, &p->p_stats->p_ru.ru_utime, &p->p_stats->p_ru.ru_stime,
            NULL, NULL);

        if (wakeinit)
                cv_broadcast(&initproc->p_waitcv);

        callout_destroy(&l->l_timeout_ch);

        /*
         * Release any PCU resources before becoming a zombie.
         */
        pcu_discard_all(l);

        mutex_enter(p->p_lock);
        /*
         * Notify other processes tracking us with a knote that
         * we're exiting.
         *
         * N.B. we do this here because the process is now SDEAD,
         * and thus cannot have any more knotes attached.  Also,
         * knote_proc_exit() expects that p->p_lock is already
         * held (and will assert so).
         */
        if (!SLIST_EMPTY(&p->p_klist)) {
                knote_proc_exit(p);
        }

        /* Free the LWP ID */
        proc_free_lwpid(p, l->l_lid);
        lwp_drainrefs(l);
        lwp_lock(l);
        l->l_prflag &= ~LPR_DETACHED;
        l->l_stat = LSZOMB;
        lwp_unlock(l);
        KASSERT(curlwp == l);
        KASSERT(p->p_nrlwps == 1);
        KASSERT(p->p_nlwps == 1);
        p->p_stat = SZOMB;
        p->p_nrlwps--;
        p->p_nzlwps++;
        p->p_ndlwps = 0;
        mutex_exit(p->p_lock);

        /*
         * Signal the parent to collect us, and drop the proclist lock.
         * Drop debugger/procfs lock; no new references can be gained.
         */
        cv_broadcast(&p->p_pptr->p_waitcv);
        rw_exit(&p->p_reflock);
        mutex_exit(&proc_lock);

        /*
         * NOTE: WE ARE NO LONGER ALLOWED TO SLEEP!
         */

        /*
         * Give machine-dependent code a chance to free any MD LWP
         * resources.  This must be done before uvm_lwp_exit(), in
         * case these resources are in the PCB.
         */
        cpu_lwp_free(l, 1);

        /* Switch away into oblivion. */
        lwp_lock(l);
        spc_lock(l->l_cpu);
        mi_switch(l);
        panic("exit1");
}

void
exit_lwps(struct lwp *l)
{
        proc_t *p = l->l_proc;
        lwp_t *l2;

retry:
        KASSERT(mutex_owned(p->p_lock));

        /*
         * Interrupt LWPs in interruptable sleep, unsuspend suspended
         * LWPs and then wait for everyone else to finish.
         */
        LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
                if (l2 == l)
                        continue;
                lwp_lock(l2);
                l2->l_flag |= LW_WEXIT;
                if ((l2->l_stat == LSSLEEP && (l2->l_flag & LW_SINTR)) ||
                    l2->l_stat == LSSUSPENDED || l2->l_stat == LSSTOP) {
                        l2->l_flag &= ~LW_DBGSUSPEND;
                            /* setrunnable() will release the lock. */
                        setrunnable(l2);
                        continue;
                }
                lwp_need_userret(l2);
                lwp_unlock(l2);
        }

        /*
         * Wait for every LWP to exit.  Note: LWPs can get suspended/slept
         * behind us or there may even be new LWPs created.  Therefore, a
         * full retry is required on error.
         */
        while (p->p_nlwps > 1) {
                if (lwp_wait(l, 0, NULL, true)) {
                        goto retry;
                }
        }

        KASSERT(p->p_nlwps == 1);
}

int
do_sys_waitid(idtype_t idtype, id_t id, int *pid, int *status, int options,
    struct wrusage *wru, siginfo_t *si)
{
        proc_t *child;
        int error;


        if (wru != NULL)
                memset(wru, 0, sizeof(*wru));
        if (si != NULL)
                memset(si, 0, sizeof(*si));

        mutex_enter(&proc_lock);
        error = find_stopped_child(curproc, idtype, id, options, &child,
            wru, si);
        if (child == NULL) {
                mutex_exit(&proc_lock);
                *pid = 0;
                *status = 0;
                return error;
        }
        *pid = child->p_pid;

        if (child->p_stat == SZOMB) {
                /* Child is exiting */
                *status = P_WAITSTATUS(child);
                /* proc_free() will release the proc_lock. */
                if (options & WNOWAIT) {
                        mutex_exit(&proc_lock);
                } else {
                        proc_free(child, wru);
                }
        } else {
                /* Don't mark SIGCONT if we are being stopped */
                *status = (child->p_xsig == SIGCONT && child->p_stat != SSTOP) ?
                    W_CONTCODE() : W_STOPCODE(child->p_xsig);
                mutex_exit(&proc_lock);
        }
        return 0;
}

int
do_sys_wait(int *pid, int *status, int options, struct rusage *ru)
{
        idtype_t idtype;
        id_t id;
        int ret;
        struct wrusage wru;

        /*
         * Translate the special pid values into the (idtype, pid)
         * pair for wait6. The WAIT_MYPGRP case is handled by
         * find_stopped_child() on its own.
         */
        if (*pid == WAIT_ANY) {
                idtype = P_ALL;
                id = 0;
        } else if (*pid < 0) {
                idtype = P_PGID;
                id = (id_t)-*pid;
        } else {
                idtype = P_PID;
                id = (id_t)*pid;
        }
        options |= WEXITED | WTRAPPED;
        ret = do_sys_waitid(idtype, id, pid, status, options, ru ? &wru : NULL,
            NULL);
        if (ru)
                *ru = wru.wru_self;
        return ret;
}

int
sys___wait450(struct lwp *l, const struct sys___wait450_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                        pid;
                syscallarg(int *)                status;
                syscallarg(int)                        options;
                syscallarg(struct rusage *)        rusage;
        } */
        int error, status, pid = SCARG(uap, pid);
        struct rusage ru;

        error = do_sys_wait(&pid, &status, SCARG(uap, options),
            SCARG(uap, rusage) != NULL ? &ru : NULL);

        retval[0] = pid;
        if (pid == 0) {
                return error;
        }
        if (SCARG(uap, status)) {
                error = copyout(&status, SCARG(uap, status), sizeof(status));
        }
        if (SCARG(uap, rusage) && error == 0) {
                error = copyout(&ru, SCARG(uap, rusage), sizeof(ru));
        }
        return error;
}

int
sys_wait6(struct lwp *l, const struct sys_wait6_args *uap, register_t *retval)
{
        /* {
                syscallarg(idtype_t)                idtype;
                syscallarg(id_t)                id;
                syscallarg(int *)                status;
                syscallarg(int)                        options;
                syscallarg(struct wrusage *)        wru;
                syscallarg(siginfo_t *)                si;
        } */
        struct wrusage wru, *wrup;
        siginfo_t si, *sip;
        idtype_t idtype;
        int pid;
        id_t id;
        int error, status;

        idtype = SCARG(uap, idtype);
        id = SCARG(uap, id);

        if (SCARG(uap, wru) != NULL)
                wrup = &wru;
        else
                wrup = NULL;

        if (SCARG(uap, info) != NULL)
                sip = &si;
        else
                sip = NULL;

        /*
         *  We expect all callers of wait6() to know about WEXITED and
         *  WTRAPPED.
         */
        error = do_sys_waitid(idtype, id, &pid, &status, SCARG(uap, options),
            wrup, sip);

        retval[0] = pid;         /* tell userland who it was */

#if 0
        /*
         * should we copyout if there was no process, hence no useful data?
         * We don't for an old style wait4() (etc) but I believe
         * FreeBSD does for wait6(), so a tossup...  Go with FreeBSD for now.
         */
        if (pid == 0)
                return error;
#endif

        if (SCARG(uap, status) != NULL && error == 0)
                error = copyout(&status, SCARG(uap, status), sizeof(status));
        if (SCARG(uap, wru) != NULL && error == 0)
                error = copyout(&wru, SCARG(uap, wru), sizeof(wru));
        if (SCARG(uap, info) != NULL && error == 0)
                error = copyout(&si, SCARG(uap, info), sizeof(si));
        return error;
}


/*
 * Find a process that matches the provided criteria, and fill siginfo
 * and resources if found.
 * Returns:
 *        -1:         Not found, abort early
 *         0:        Not matched
 *         1:        Matched, there might be more matches
 *         2:        This is the only match
 */
static int
match_process(const struct proc *pp, struct proc **q, idtype_t idtype, id_t id,
    int options, struct wrusage *wrusage, siginfo_t *siginfo)
{
        struct rusage *rup;
        struct proc *p = *q;
        int rv = 1;

        mutex_enter(p->p_lock);
        switch (idtype) {
        case P_ALL:
                break;
        case P_PID:
                if (p->p_pid != (pid_t)id) {
                        mutex_exit(p->p_lock);
                        p = *q = proc_find_raw((pid_t)id);
                        if (p == NULL || p->p_stat == SIDL || p->p_pptr != pp) {
                                *q = NULL;
                                return -1;
                        }
                        mutex_enter(p->p_lock);
                }
                rv++;
                break;
        case P_PGID:
                if (p->p_pgid != (pid_t)id)
                        goto out;
                break;
        case P_SID:
                if (p->p_session->s_sid != (pid_t)id)
                        goto out;
                break;
        case P_UID:
                if (kauth_cred_geteuid(p->p_cred) != (uid_t)id)
                        goto out;
                break;
        case P_GID:
                if (kauth_cred_getegid(p->p_cred) != (gid_t)id)
                        goto out;
                break;
        case P_CID:
        case P_PSETID:
        case P_CPUID:
                /* XXX: Implement me */
        default:
        out:
                mutex_exit(p->p_lock);
                return 0;
        }

        if ((options & WEXITED) == 0 && p->p_stat == SZOMB)
                goto out;

        if (siginfo != NULL) {
                siginfo->si_errno = 0;

                /*
                 * SUSv4 requires that the si_signo value is always
                 * SIGCHLD. Obey it despite the rfork(2) interface
                 * allows to request other signal for child exit
                 * notification.
                 */
                siginfo->si_signo = SIGCHLD;

                /*
                 *  This is still a rough estimate.  We will fix the
                 *  cases TRAPPED, STOPPED, and CONTINUED later.
                 */
                if (p->p_sflag & PS_COREDUMP) {
                        siginfo->si_code = CLD_DUMPED;
                        siginfo->si_status = p->p_xsig;
                } else if (p->p_xsig) {
                        siginfo->si_code = CLD_KILLED;
                        siginfo->si_status = p->p_xsig;
                } else {
                        siginfo->si_code = CLD_EXITED;
                        siginfo->si_status = p->p_xexit;
                }

                siginfo->si_pid = p->p_pid;
                siginfo->si_uid = kauth_cred_geteuid(p->p_cred);
                siginfo->si_utime = p->p_stats->p_ru.ru_utime.tv_sec;
                siginfo->si_stime = p->p_stats->p_ru.ru_stime.tv_sec;
        }

        /*
         * There should be no reason to limit resources usage info to
         * exited processes only.  A snapshot about any resources used
         * by a stopped process may be exactly what is needed.
         */
        if (wrusage != NULL) {
                rup = &wrusage->wru_self;
                *rup = p->p_stats->p_ru;
                calcru(p, &rup->ru_utime, &rup->ru_stime, NULL, NULL);

                rup = &wrusage->wru_children;
                *rup = p->p_stats->p_cru;
                calcru(p, &rup->ru_utime, &rup->ru_stime, NULL, NULL);
        }

        mutex_exit(p->p_lock);
        return rv;
}

/*
 * Determine if there are existing processes being debugged
 * that used to be (and sometime later will be again) children
 * of a specific parent (while matching wait criteria)
 */
static bool
debugged_child_exists(idtype_t idtype, id_t id, int options, siginfo_t *si,
    const struct proc *parent)
{
        struct proc *pp;

        /*
         * If we are searching for a specific pid, we can optimise a little
         */
        if (idtype == P_PID) {
                /*
                 * Check the specific process to see if its real parent is us
                 */
                pp = proc_find_raw((pid_t)id);
                if (pp != NULL && pp->p_stat != SIDL && pp->p_opptr == parent) {
                        /*
                         * using P_ALL here avoids match_process() doing the
                         * same work that we just did, but incorrectly for
                         * this scenario.
                         */
                        if (match_process(parent, &pp, P_ALL, id, options,
                            NULL, si))
                                return true;
                }
                return false;
        }

        /*
         * For the hard cases, just look everywhere to see if some
         * stolen (reparented) process is really our lost child.
         * Then check if that process could satisfy the wait conditions.
         */

        /*
         * XXX inefficient, but hopefully fairly rare.
         * XXX should really use a list of reparented processes.
         */
        PROCLIST_FOREACH(pp, &allproc) {
                if (pp->p_stat == SIDL)                /* XXX impossible ?? */
                        continue;
                if (pp->p_opptr == parent &&
                    match_process(parent, &pp, idtype, id, options, NULL, si))
                        return true;
        }
        PROCLIST_FOREACH(pp, &zombproc) {
                if (pp->p_stat == SIDL)                /* XXX impossible ?? */
                        continue;
                if (pp->p_opptr == parent &&
                    match_process(parent, &pp, idtype, id, options, NULL, si))
                        return true;
        }

        return false;
}

/*
 * Scan list of child processes for a child process that has stopped or
 * exited.  Used by sys_wait4 and 'compat' equivalents.
 *
 * Must be called with the proc_lock held, and may release while waiting.
 */
static int
find_stopped_child(struct proc *parent, idtype_t idtype, id_t id, int options,
    struct proc **child_p, struct wrusage *wru, siginfo_t *si)
{
        struct proc *child, *dead;
        int error;

        KASSERT(mutex_owned(&proc_lock));

        if (options & ~WALLOPTS) {
                *child_p = NULL;
                return EINVAL;
        }

        if ((options & WSELECTOPTS) == 0) {
                /*
                 * We will be unable to find any matching processes,
                 * because there are no known events to look for.
                 * Prefer to return error instead of blocking
                 * indefinitely.
                 */
                *child_p = NULL;
                return EINVAL;
        }

        if ((pid_t)id == WAIT_MYPGRP && (idtype == P_PID || idtype == P_PGID)) {
                mutex_enter(parent->p_lock);
                id = (id_t)parent->p_pgid;
                mutex_exit(parent->p_lock);
                idtype = P_PGID;
        }

        for (;;) {
                error = ECHILD;
                dead = NULL;

                LIST_FOREACH(child, &parent->p_children, p_sibling) {
                        int rv = match_process(parent, &child, idtype, id,
                            options, wru, si);
                        if (rv == -1)
                                break;
                        if (rv == 0)
                                continue;

                        /*
                         * Wait for processes with p_exitsig != SIGCHLD
                         * processes only if WALTSIG is set; wait for
                         * processes with p_exitsig == SIGCHLD only
                         * if WALTSIG is clear.
                         */
                        if (((options & WALLSIG) == 0) &&
                            (options & WALTSIG ? child->p_exitsig == SIGCHLD
                                                : P_EXITSIG(child) != SIGCHLD)){
                                if (rv == 2) {
                                        child = NULL;
                                        break;
                                }
                                continue;
                        }

                        error = 0;
                        if ((options & WNOZOMBIE) == 0) {
                                if (child->p_stat == SZOMB)
                                        break;
                                if (child->p_stat == SDEAD) {
                                        /*
                                         * We may occasionally arrive here
                                         * after receiving a signal, but
                                         * immediately before the child
                                         * process is zombified.  The wait
                                         * will be short, so avoid returning
                                         * to userspace.
                                         */
                                        dead = child;
                                }
                        }

                        if ((options & WCONTINUED) != 0 &&
                            child->p_xsig == SIGCONT &&
                            (child->p_sflag & PS_CONTINUED)) {
                                if ((options & WNOWAIT) == 0) {
                                        child->p_sflag &= ~PS_CONTINUED;
                                        child->p_waited = 1;
                                        parent->p_nstopchild--;
                                }
                                if (si) {
                                        si->si_status = child->p_xsig;
                                        si->si_code = CLD_CONTINUED;
                                }
                                break;
                        }

                        if ((options & (WTRAPPED|WSTOPPED)) != 0 &&
                            child->p_stat == SSTOP &&
                            child->p_waited == 0 &&
                            ((child->p_slflag & PSL_TRACED) ||
                            options & (WUNTRACED|WSTOPPED))) {
                                if ((options & WNOWAIT) == 0) {
                                        child->p_waited = 1;
                                        parent->p_nstopchild--;
                                }
                                if (si) {
                                        si->si_status = child->p_xsig;
                                        si->si_code =
                                            (child->p_slflag & PSL_TRACED) ?
                                            CLD_TRAPPED : CLD_STOPPED;
                                }
                                break;
                        }
                        if (parent->p_nstopchild == 0 || rv == 2) {
                                child = NULL;
                                break;
                        }
                }

                /*
                 * If we found nothing, but we are the bereaved parent
                 * of a stolen child, look and see if that child (or
                 * one of them) meets our search criteria.   If so, then
                 * we cannot succeed, but we can hang (wait...), 
                 * or if WNOHANG, return 0 instead of ECHILD
                 */
                if (child == NULL && error == ECHILD && 
                    (parent->p_slflag & PSL_CHTRACED) &&
                    debugged_child_exists(idtype, id, options, si, parent))
                        error = 0;

                if (child != NULL || error != 0 ||
                    ((options & WNOHANG) != 0 && dead == NULL)) {
                        *child_p = child;
                        return error;
                }

                /*
                 * Wait for another child process to stop.
                 */
                error = cv_wait_sig(&parent->p_waitcv, &proc_lock);

                if (error != 0) {
                        *child_p = NULL;
                        return error;
                }
        }
}

/*
 * Free a process after parent has taken all the state info.  Must be called
 * with the proclist lock held, and will release before returning.
 *
 * *ru is returned to the caller, and must be freed by the caller.
 */
static void
proc_free(struct proc *p, struct wrusage *wru)
{
        struct proc *parent = p->p_pptr;
        struct lwp *l;
        ksiginfo_t ksi;
        kauth_cred_t cred1, cred2;
        uid_t uid;

        KASSERT(mutex_owned(&proc_lock));
        KASSERT(p->p_nlwps == 1);
        KASSERT(p->p_nzlwps == 1);
        KASSERT(p->p_nrlwps == 0);
        KASSERT(p->p_stat == SZOMB);

        /*
         * If we got the child via ptrace(2) or procfs, and
         * the parent is different (meaning the process was
         * attached, rather than run as a child), then we need
         * to give it back to the old parent, and send the
         * parent the exit signal.  The rest of the cleanup
         * will be done when the old parent waits on the child.
         */
        if ((p->p_slflag & PSL_TRACED) != 0 && p->p_opptr != parent) {
                mutex_enter(p->p_lock);
                p->p_slflag &= ~(PSL_TRACED|PSL_SYSCALL);
                mutex_exit(p->p_lock);
                parent = (p->p_opptr == NULL) ? initproc : p->p_opptr;
                proc_reparent(p, parent);
                p->p_opptr = NULL;
                if (p->p_exitsig != 0) {
                        exit_psignal(p, parent, &ksi);
                        kpsignal(parent, &ksi, NULL);
                }
                cv_broadcast(&parent->p_waitcv);
                mutex_exit(&proc_lock);
                return;
        }

        sched_proc_exit(parent, p);

        /*
         * Add child times of exiting process onto its own times.
         * This cannot be done any earlier else it might get done twice.
         */
        l = LIST_FIRST(&p->p_lwps);
        p->p_stats->p_ru.ru_nvcsw += (l->l_ncsw - l->l_nivcsw);
        p->p_stats->p_ru.ru_nivcsw += l->l_nivcsw;
        ruadd(&p->p_stats->p_ru, &l->l_ru);
        ruadd(&p->p_stats->p_ru, &p->p_stats->p_cru);
        ruadd(&parent->p_stats->p_cru, &p->p_stats->p_ru);
        if (wru != NULL) {
                wru->wru_self = p->p_stats->p_ru;
                wru->wru_children = p->p_stats->p_cru;
        }
        p->p_xsig = 0;
        p->p_xexit = 0;

        /*
         * At this point we are going to start freeing the final resources.
         * If anyone tries to access the proc structure after here they will
         * get a shock - bits are missing.  Attempt to make it hard!  We
         * don't bother with any further locking past this point.
         */
        p->p_stat = SIDL;                /* not even a zombie any more */
        LIST_REMOVE(p, p_list);        /* off zombproc */
        parent->p_nstopchild--;
        LIST_REMOVE(p, p_sibling);

        /*
         * Let pid be reallocated.
         */
        proc_free_pid(p->p_pid);
        atomic_dec_uint(&nprocs);

        /*
         * Unlink process from its process group.
         * Releases the proc_lock.
         */
        proc_leavepgrp(p);

        /*
         * Delay release until after lwp_free.
         */
        cred2 = l->l_cred;

        /*
         * Free the last LWP's resources.
         *
         * lwp_free ensures the LWP is no longer running on another CPU.
         */
        lwp_free(l, false, true);

        /*
         * Now no one except us can reach the process p.
         */

        /*
         * Decrement the count of procs running with this uid.
         */
        cred1 = p->p_cred;
        uid = kauth_cred_getuid(cred1);
        (void)chgproccnt(uid, -1);

        /*
         * Release substructures.
         */

        lim_free(p->p_limit);
        pstatsfree(p->p_stats);
        kauth_cred_free(cred1);
        kauth_cred_free(cred2);

        /*
         * Release reference to text vnode
         */
        if (p->p_textvp)
                vrele(p->p_textvp);
        kmem_strfree(p->p_path);

        mutex_destroy(&p->p_auxlock);
        mutex_obj_free(p->p_lock);
        mutex_destroy(&p->p_stmutex);
        cv_destroy(&p->p_waitcv);
        cv_destroy(&p->p_lwpcv);
        rw_destroy(&p->p_reflock);

        proc_free_mem(p);
}

/*
 * Change the parent of a process for tracing purposes.
 */
void
proc_changeparent(struct proc *t, struct proc *p)
{
        SET(t->p_slflag, PSL_TRACED);
        t->p_opptr = t->p_pptr;
        if (t->p_pptr == p)
                return;
        struct proc *parent = t->p_pptr;

        if (parent->p_lock < t->p_lock) {
                if (!mutex_tryenter(parent->p_lock)) {
                        mutex_exit(t->p_lock);
                        mutex_enter(parent->p_lock);
                        mutex_enter(t->p_lock);
                }
        } else if (parent->p_lock > t->p_lock) {
                mutex_enter(parent->p_lock);
        }
        parent->p_slflag |= PSL_CHTRACED;
        proc_reparent(t, p);
        if (parent->p_lock != t->p_lock)
                mutex_exit(parent->p_lock);
}

/*
 * make process 'parent' the new parent of process 'child'.
 *
 * Must be called with proc_lock held.
 */
void
proc_reparent(struct proc *child, struct proc *parent)
{

        KASSERT(mutex_owned(&proc_lock));

        if (child->p_pptr == parent)
                return;

        if (child->p_stat == SZOMB || child->p_stat == SDEAD ||
            (child->p_stat == SSTOP && !child->p_waited)) {
                child->p_pptr->p_nstopchild--;
                parent->p_nstopchild++;
        }
        if (parent == initproc) {
                child->p_exitsig = SIGCHLD;
                child->p_ppid = parent->p_pid;
        }

        LIST_REMOVE(child, p_sibling);
        LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
        child->p_pptr = parent;
}



















































































































































































    2 






    2 


    2 










    2 







    2 
    2 







    6 

    6 
    3 







    2 

    5 
    2 


    5 
































































   12 












   12 
    9 




    5 

    8 


    4 


    4 
    1 

    1 

    3 




    3 
    1 













    3 


    6 














    2 












    2 






    5 





   11 








































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
/*        $NetBSD: kern_acct.c,v 1.99 2021/12/05 04:35:38 msaitoh Exp $        */

/*-
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_acct.c        8.8 (Berkeley) 5/14/95
 */

/*-
 * Copyright (c) 1994 Christopher G. Demetriou
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_acct.c        8.8 (Berkeley) 5/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_acct.c,v 1.99 2021/12/05 04:35:38 msaitoh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/syslog.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/kmem.h>
#include <sys/namei.h>
#include <sys/errno.h>
#include <sys/acct.h>
#include <sys/resourcevar.h>
#include <sys/ioctl.h>
#include <sys/tty.h>
#include <sys/kauth.h>

#include <sys/syscallargs.h>

/*
 * The routines implemented in this file are described in:
 *      Leffler, et al.: The Design and Implementation of the 4.3BSD
 *            UNIX Operating System (Addison Welley, 1989)
 * on pages 62-63.
 *
 * Arguably, to simplify accounting operations, this mechanism should
 * be replaced by one in which an accounting log file (similar to /dev/klog)
 * is read by a user process, etc.  However, that has its own problems.
 */

/*
 * Lock to serialize system calls and kernel threads.
 */
krwlock_t        acct_lock;

/*
 * The global accounting state and related data.  Gain the mutex before
 * accessing these variables.
 */
static enum {
        ACCT_STOP,
        ACCT_ACTIVE,
        ACCT_SUSPENDED
} acct_state;                                /* The current accounting state. */
static struct vnode *acct_vp;                /* Accounting vnode pointer. */
static kauth_cred_t acct_cred;                /* Credential of accounting file
                                           owner (i.e root).  Used when
                                            accounting file i/o.  */
static struct lwp *acct_dkwatcher;        /* Free disk space checker. */

/*
 * Values associated with enabling and disabling accounting
 */
int        acctsuspend = 2;        /* stop accounting when < 2% free space left */
int        acctresume = 4;                /* resume when free space risen to > 4% */
int        acctchkfreq = 15;        /* frequency (in seconds) to check space */

/*
 * Encode_comp_t converts from ticks in seconds and microseconds
 * to ticks in 1/AHZ seconds.  The encoding is described in
 * Leffler, et al., on page 63.
 */

#define        MANTSIZE        13                        /* 13 bit mantissa. */
#define        EXPSIZE                3                        /* Base 8 (3 bit) exponent. */
#define        MAXFRACT        ((1 << MANTSIZE) - 1)        /* Maximum fractional value. */

static comp_t
encode_comp_t(u_long s, u_long us)
{
        int exp, rnd;

        exp = 0;
        rnd = 0;
        s *= AHZ;
        s += us / (1000000 / AHZ);        /* Maximize precision. */

        while (s > MAXFRACT) {
                rnd = s & (1 << (EXPSIZE - 1));        /* Round up? */
                s >>= EXPSIZE;                /* Base 8 exponent == 3 bit shift. */
                exp++;
        }

        /* If we need to round up, do it (and handle overflow correctly). */
        if (rnd && (++s > MAXFRACT)) {
                s >>= EXPSIZE;
                exp++;
        }

        /* Clean it up and polish it off. */
        exp <<= MANTSIZE;                /* Shift the exponent into place */
        exp += s;                        /* and add on the mantissa. */
        return (exp);
}

static int
acct_chkfree(void)
{
        int error;
        struct statvfs *sb;
        fsblkcnt_t bavail;

        sb = kmem_alloc(sizeof(*sb), KM_SLEEP);
        error = VFS_STATVFS(acct_vp->v_mount, sb);
        if (error != 0) {
                kmem_free(sb, sizeof(*sb));
                return (error);
        }

        if (sb->f_bfree < sb->f_bresvd) {
                bavail = 0;
        } else {
                bavail = sb->f_bfree - sb->f_bresvd;
        }

        switch (acct_state) {
        case ACCT_SUSPENDED:
                if (bavail > acctresume * sb->f_blocks / 100) {
                        acct_state = ACCT_ACTIVE;
                        log(LOG_NOTICE, "Accounting resumed\n");
                }
                break;
        case ACCT_ACTIVE:
                if (bavail <= acctsuspend * sb->f_blocks / 100) {
                        acct_state = ACCT_SUSPENDED;
                        log(LOG_NOTICE, "Accounting suspended\n");
                }
                break;
        case ACCT_STOP:
                break;
        }
        kmem_free(sb, sizeof(*sb));
        return (0);
}

static void
acct_stop(void)
{
        int error;

        KASSERT(rw_write_held(&acct_lock));

        if (acct_vp != NULLVP && acct_vp->v_type != VBAD) {
                error = vn_close(acct_vp, FWRITE, acct_cred);
#ifdef DIAGNOSTIC
                if (error != 0)
                        printf("acct_stop: failed to close, errno = %d\n",
                            error);
#else
                __USE(error);
#endif
                acct_vp = NULLVP;
        }
        if (acct_cred != NULL) {
                kauth_cred_free(acct_cred);
                acct_cred = NULL;
        }
        acct_state = ACCT_STOP;
}

/*
 * Periodically check the file system to see if accounting
 * should be turned on or off.  Beware the case where the vnode
 * has been vgone()'d out from underneath us, e.g. when the file
 * system containing the accounting file has been forcibly unmounted.
 */
static void
acctwatch(void *arg)
{
        int error;

        log(LOG_NOTICE, "Accounting started\n");
        rw_enter(&acct_lock, RW_WRITER);
        while (acct_state != ACCT_STOP) {
                if (acct_vp->v_type == VBAD) {
                        log(LOG_NOTICE, "Accounting terminated\n");
                        acct_stop();
                        continue;
                }

                error = acct_chkfree();
#ifdef DIAGNOSTIC
                if (error != 0)
                        printf("acctwatch: failed to statvfs, error = %d\n",
                            error);
#else
                __USE(error);
#endif
                rw_exit(&acct_lock);
                error = kpause("actwat", false, acctchkfreq * hz, NULL);
                rw_enter(&acct_lock, RW_WRITER);
#ifdef DIAGNOSTIC
                if (error != 0 && error != EWOULDBLOCK)
                        printf("acctwatch: sleep error %d\n", error);
#endif
        }
        acct_dkwatcher = NULL;
        rw_exit(&acct_lock);

        kthread_exit(0);
}

void
acct_init(void)
{

        acct_state = ACCT_STOP;
        acct_vp = NULLVP;
        acct_cred = NULL;
        rw_init(&acct_lock);
}

/*
 * Accounting system call.  Written based on the specification and
 * previous implementation done by Mark Tinguely.
 */
int
sys_acct(struct lwp *l, const struct sys_acct_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
        } */
        struct pathbuf *pb;
        struct vnode *vp;
        int error;

        /* Make sure that the caller is root. */
        if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_ACCOUNTING,
            0, NULL, NULL, NULL)))
                return (error);

        /*
         * If accounting is to be started to a file, open that file for
         * writing and make sure it's a 'normal'.
         */
        if (SCARG(uap, path) != NULL) {
                struct vattr va;
                size_t pad;

                error = pathbuf_copyin(SCARG(uap, path), &pb);
                if (error) {
                        return error;
                }
                error = vn_open(NULL, pb, TRYEMULROOT, FWRITE|O_APPEND, 0,
                    &vp, NULL, NULL);
                if (error != 0) {
                        pathbuf_destroy(pb);
                        return error;
                }
                if (vp->v_type != VREG) {
                        VOP_UNLOCK(vp);
                        error = EACCES;
                        goto bad;
                }
                if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0) {
                        VOP_UNLOCK(vp);
                        goto bad;
                }

                if ((pad = (va.va_size % sizeof(struct acct))) != 0) {
                        u_quad_t size = va.va_size - pad;
#ifdef DIAGNOSTIC
                        printf("Size of accounting file not a multiple of "
                            "%lu - incomplete record truncated\n",
                            (unsigned long)sizeof(struct acct));
#endif
                        vattr_null(&va);
                        va.va_size = size;
                        error = VOP_SETATTR(vp, &va, l->l_cred);
                        if (error != 0) {
                                VOP_UNLOCK(vp);
                                goto bad;
                        }
                }
                VOP_UNLOCK(vp);
        }

        rw_enter(&acct_lock, RW_WRITER);

        /*
         * If accounting was previously enabled, kill the old space-watcher,
         * free credential for accounting file i/o,
         * ... (and, if no new file was specified, leave).
         */
        acct_stop();
        if (SCARG(uap, path) == NULL)
                goto out;

        /*
         * Save the new accounting file vnode and credential,
         * and schedule the new free space watcher.
         */
        acct_state = ACCT_ACTIVE;
        acct_vp = vp;
        acct_cred = l->l_cred;
        kauth_cred_hold(acct_cred);

        pathbuf_destroy(pb);

        error = acct_chkfree();                /* Initial guess. */
        if (error != 0) {
                acct_stop();
                goto out;
        }

        if (acct_dkwatcher == NULL) {
                error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
                    acctwatch, NULL, &acct_dkwatcher, "acctwatch");
                if (error != 0)
                        acct_stop();
        }

 out:
        rw_exit(&acct_lock);
        return (error);
 bad:
        vn_close(vp, FWRITE, l->l_cred);
        pathbuf_destroy(pb);
        return error;
}

/*
 * Write out process accounting information, on process exit.
 * Data to be written out is specified in Leffler, et al.
 * and are enumerated below.  (They're also noted in the system
 * "acct.h" header file.)
 */
int
acct_process(struct lwp *l)
{
        struct acct acct;
        struct timeval ut, st, tmp;
        struct rusage *r;
        int t, error = 0;
        struct rlimit orlim;
        struct proc *p = l->l_proc;

        if (acct_state != ACCT_ACTIVE)
                return 0;

        memset(&acct, 0, sizeof(acct));        /* to zerofill padded data */

        rw_enter(&acct_lock, RW_READER);

        /* If accounting isn't enabled, don't bother */
        if (acct_state != ACCT_ACTIVE)
                goto out;

        /*
         * Temporarily raise the file limit so that accounting can't
         * be stopped by the user.
         *
         * XXX We should think about the CPU limit, too.
         */
        lim_privatise(p);
        orlim = p->p_rlimit[RLIMIT_FSIZE];
        /* Set current and max to avoid illegal values */
        p->p_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
        p->p_rlimit[RLIMIT_FSIZE].rlim_max = RLIM_INFINITY;

        /*
         * Get process accounting information.
         */

        /* (1) The name of the command that ran */
        strncpy(acct.ac_comm, p->p_comm, sizeof(acct.ac_comm));

        /* (2) The amount of user and system time that was used */
        mutex_enter(p->p_lock);
        calcru(p, &ut, &st, NULL, NULL);
        mutex_exit(p->p_lock);
        acct.ac_utime = encode_comp_t(ut.tv_sec, ut.tv_usec);
        acct.ac_stime = encode_comp_t(st.tv_sec, st.tv_usec);

        /* (3) The elapsed time the command ran (and its starting time) */
        acct.ac_btime = p->p_stats->p_start.tv_sec;
        getmicrotime(&tmp);
        timersub(&tmp, &p->p_stats->p_start, &tmp);
        acct.ac_etime = encode_comp_t(tmp.tv_sec, tmp.tv_usec);

        /* (4) The average amount of memory used */
        r = &p->p_stats->p_ru;
        timeradd(&ut, &st, &tmp);
        t = tmp.tv_sec * hz + tmp.tv_usec / tick;
        if (t)
                acct.ac_mem = (r->ru_ixrss + r->ru_idrss + r->ru_isrss) / t;
        else
                acct.ac_mem = 0;

        /* (5) The number of disk I/O operations done */
        acct.ac_io = encode_comp_t(r->ru_inblock + r->ru_oublock, 0);

        /* (6) The UID and GID of the process */
        acct.ac_uid = kauth_cred_getuid(l->l_cred);
        acct.ac_gid = kauth_cred_getgid(l->l_cred);

        /* (7) The terminal from which the process was started */
        mutex_enter(&proc_lock);
        if ((p->p_lflag & PL_CONTROLT) && p->p_pgrp->pg_session->s_ttyp)
                acct.ac_tty = p->p_pgrp->pg_session->s_ttyp->t_dev;
        else
                acct.ac_tty = NODEV;
        mutex_exit(&proc_lock);

        /* (8) The boolean flags that tell how the process terminated, etc. */
        acct.ac_flag = p->p_acflag;

        /*
         * Now, just write the accounting information to the file.
         */
        error = vn_rdwr(UIO_WRITE, acct_vp, (void *)&acct,
            sizeof(acct), (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT,
            acct_cred, NULL, NULL);
        if (error != 0)
                log(LOG_ERR, "Accounting: write failed %d\n", error);

        /* Restore limit - rather pointless since process is about to exit */
        p->p_rlimit[RLIMIT_FSIZE] = orlim;

 out:
        rw_exit(&acct_lock);
        return (error);
}



























































































































































    4 













    4 
    4 





    1 






    1 


    1 

    1 








    1 









    4 

    4 



    4 









    4 


    4 






    4 







    1 




    1 


    1 

    1 






    1 













    1 
    1 












    2 







    2 

    1 
    2 

















    2 
    2 






    1 





    3 






    8 











    8 








    1 





    1 














































    1 













































    1 

















    1 



    1 



























































































































































    3 
    3 
    3 

    3 
    2 

    3 

    3 



























































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
/*        $NetBSD: fss.c,v 1.112 2022/03/31 19:30:15 pgoyette Exp $        */

/*-
 * Copyright (c) 2003 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Juergen Hannken-Illjes.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * File system snapshot disk driver.
 *
 * Block/character interface to the snapshot of a mounted file system.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: fss.c,v 1.112 2022/03/31 19:30:15 pgoyette Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/malloc.h>
#include <sys/buf.h>
#include <sys/ioctl.h>
#include <sys/disklabel.h>
#include <sys/device.h>
#include <sys/disk.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/uio.h>
#include <sys/conf.h>
#include <sys/kthread.h>
#include <sys/fstrans.h>
#include <sys/vfs_syscalls.h>                /* For do_sys_unlink(). */

#include <miscfs/specfs/specdev.h>

#include <dev/fssvar.h>

#include <uvm/uvm.h>

#include "ioconf.h"

dev_type_open(fss_open);
dev_type_close(fss_close);
dev_type_read(fss_read);
dev_type_write(fss_write);
dev_type_ioctl(fss_ioctl);
dev_type_strategy(fss_strategy);
dev_type_dump(fss_dump);
dev_type_size(fss_size);

static void fss_unmount_hook(struct mount *);
static int fss_copy_on_write(void *, struct buf *, bool);
static inline void fss_error(struct fss_softc *, const char *);
static int fss_create_files(struct fss_softc *, struct fss_set *,
    off_t *, struct lwp *);
static int fss_create_snapshot(struct fss_softc *, struct fss_set *,
    struct lwp *);
static int fss_delete_snapshot(struct fss_softc *, struct lwp *);
static int fss_softc_alloc(struct fss_softc *);
static void fss_softc_free(struct fss_softc *);
static int fss_read_cluster(struct fss_softc *, u_int32_t);
static void fss_bs_thread(void *);
static int fss_bs_io(struct fss_softc *, fss_io_type,
    u_int32_t, off_t, int, void *, size_t *);
static u_int32_t *fss_bs_indir(struct fss_softc *, u_int32_t);

static kmutex_t fss_device_lock;        /* Protect all units. */
static kcondvar_t fss_device_cv;        /* Serialize snapshot creation. */
static bool fss_creating = false;        /* Currently creating a snapshot. */
static int fss_num_attached = 0;        /* Number of attached devices. */
static struct vfs_hooks fss_vfs_hooks = {
        .vh_unmount = fss_unmount_hook
};

const struct bdevsw fss_bdevsw = {
        .d_open = fss_open,
        .d_close = fss_close,
        .d_strategy = fss_strategy,
        .d_ioctl = fss_ioctl,
        .d_dump = fss_dump,
        .d_psize = fss_size,
        .d_discard = nodiscard,
        .d_flag = D_DISK | D_MPSAFE
};

const struct cdevsw fss_cdevsw = {
        .d_open = fss_open,
        .d_close = fss_close,
        .d_read = fss_read,
        .d_write = fss_write,
        .d_ioctl = fss_ioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_DISK | D_MPSAFE
};

static int fss_match(device_t, cfdata_t, void *);
static void fss_attach(device_t, device_t, void *);
static int fss_detach(device_t, int);

CFATTACH_DECL_NEW(fss, sizeof(struct fss_softc),
    fss_match, fss_attach, fss_detach, NULL);

void
fssattach(int num)
{

        mutex_init(&fss_device_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&fss_device_cv, "snapwait");
        if (config_cfattach_attach(fss_cd.cd_name, &fss_ca))
                aprint_error("%s: unable to register\n", fss_cd.cd_name);
}

static int
fss_match(device_t self, cfdata_t cfdata, void *aux)
{
        return 1;
}

static void
fss_attach(device_t parent, device_t self, void *aux)
{
        struct fss_softc *sc = device_private(self);

        sc->sc_dev = self;
        sc->sc_bdev = NODEV;
        mutex_init(&sc->sc_slock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&sc->sc_work_cv, "fssbs");
        cv_init(&sc->sc_cache_cv, "cowwait");
        bufq_alloc(&sc->sc_bufq, "fcfs", 0);
        sc->sc_dkdev = malloc(sizeof(*sc->sc_dkdev), M_DEVBUF, M_WAITOK);
        sc->sc_dkdev->dk_info = NULL;
        disk_init(sc->sc_dkdev, device_xname(self), NULL);
        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        if (fss_num_attached++ == 0)
                vfs_hooks_attach(&fss_vfs_hooks);
}

static int
fss_detach(device_t self, int flags)
{
        struct fss_softc *sc = device_private(self);

        mutex_enter(&sc->sc_slock);
        if (sc->sc_state != FSS_IDLE) {
                mutex_exit(&sc->sc_slock);
                return EBUSY;
        }
        mutex_exit(&sc->sc_slock);

        if (--fss_num_attached == 0)
                vfs_hooks_detach(&fss_vfs_hooks);

        pmf_device_deregister(self);
        mutex_destroy(&sc->sc_slock);
        cv_destroy(&sc->sc_work_cv);
        cv_destroy(&sc->sc_cache_cv);
        bufq_drain(sc->sc_bufq);
        bufq_free(sc->sc_bufq);
        disk_destroy(sc->sc_dkdev);
        free(sc->sc_dkdev, M_DEVBUF);

        return 0;
}

int
fss_open(dev_t dev, int flags, int mode, struct lwp *l)
{
        int mflag;
        cfdata_t cf;
        struct fss_softc *sc;

        mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN);

        mutex_enter(&fss_device_lock);

        sc = device_lookup_private(&fss_cd, minor(dev));
        if (sc == NULL) {
                cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
                cf->cf_name = fss_cd.cd_name;
                cf->cf_atname = fss_cd.cd_name;
                cf->cf_unit = minor(dev);
                cf->cf_fstate = FSTATE_STAR;
                sc = device_private(config_attach_pseudo(cf));
                if (sc == NULL) {
                        mutex_exit(&fss_device_lock);
                        return ENOMEM;
                }
                sc->sc_state = FSS_IDLE;
        }

        mutex_enter(&sc->sc_slock);

        sc->sc_flags |= mflag;

        mutex_exit(&sc->sc_slock);
        mutex_exit(&fss_device_lock);

        return 0;
}

int
fss_close(dev_t dev, int flags, int mode, struct lwp *l)
{
        int mflag, error;
        cfdata_t cf;
        struct fss_softc *sc = device_lookup_private(&fss_cd, minor(dev));

        if (sc == NULL)
                return ENXIO;

        mflag = (mode == S_IFCHR ? FSS_CDEV_OPEN : FSS_BDEV_OPEN);
        error = 0;

        mutex_enter(&fss_device_lock);
restart:
        mutex_enter(&sc->sc_slock);
        if ((sc->sc_flags & (FSS_CDEV_OPEN|FSS_BDEV_OPEN)) != mflag) {
                sc->sc_flags &= ~mflag;
                mutex_exit(&sc->sc_slock);
                mutex_exit(&fss_device_lock);
                return 0;
        }
        if (sc->sc_state != FSS_IDLE &&
            (sc->sc_uflags & FSS_UNCONFIG_ON_CLOSE) != 0) {
                sc->sc_uflags &= ~FSS_UNCONFIG_ON_CLOSE;
                mutex_exit(&sc->sc_slock);
                error = fss_ioctl(dev, FSSIOCCLR, NULL, FWRITE, l);
                goto restart;
        }
        if (sc->sc_state != FSS_IDLE) {
                mutex_exit(&sc->sc_slock);
                mutex_exit(&fss_device_lock);
                return error;
        }

        KASSERT(sc->sc_state == FSS_IDLE);
        KASSERT((sc->sc_flags & (FSS_CDEV_OPEN|FSS_BDEV_OPEN)) == mflag);
        mutex_exit(&sc->sc_slock);
        cf = device_cfdata(sc->sc_dev);
        error = config_detach(sc->sc_dev, DETACH_QUIET);
        if (! error)
                free(cf, M_DEVBUF);
        mutex_exit(&fss_device_lock);

        return error;
}

void
fss_strategy(struct buf *bp)
{
        const bool write = ((bp->b_flags & B_READ) != B_READ);
        struct fss_softc *sc = device_lookup_private(&fss_cd, minor(bp->b_dev));

        if (sc == NULL) {
                bp->b_error = ENXIO;
                goto done;
        }

        mutex_enter(&sc->sc_slock);

        if (write || sc->sc_state != FSS_ACTIVE) {
                bp->b_error = (write ? EROFS : ENXIO);
                goto done;
        }
        /* Check bounds for non-persistent snapshots. */
        if ((sc->sc_flags & FSS_PERSISTENT) == 0 &&
            bounds_check_with_mediasize(bp, DEV_BSIZE,
            btodb(FSS_CLTOB(sc, sc->sc_clcount - 1) + sc->sc_clresid)) <= 0)
                goto done;

        bp->b_rawblkno = bp->b_blkno;
        bufq_put(sc->sc_bufq, bp);
        cv_signal(&sc->sc_work_cv);

        mutex_exit(&sc->sc_slock);
        return;

done:
        if (sc != NULL)
                mutex_exit(&sc->sc_slock);
        bp->b_resid = bp->b_bcount;
        biodone(bp);
}

int
fss_read(dev_t dev, struct uio *uio, int flags)
{
        return physio(fss_strategy, NULL, dev, B_READ, minphys, uio);
}

int
fss_write(dev_t dev, struct uio *uio, int flags)
{
        return physio(fss_strategy, NULL, dev, B_WRITE, minphys, uio);
}

int
fss_ioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        int error = 0;
        struct fss_softc *sc = device_lookup_private(&fss_cd, minor(dev));
        struct fss_set _fss;
        struct fss_set *fss = (struct fss_set *)data;
        struct fss_set50 *fss50 = (struct fss_set50 *)data;
        struct fss_get *fsg = (struct fss_get *)data;
#ifndef _LP64
        struct fss_get50 *fsg50 = (struct fss_get50 *)data;
#endif

        if (sc == NULL)
                return ENXIO;

        switch (cmd) {
        case FSSIOCSET50:
                fss = &_fss;
                fss->fss_mount = fss50->fss_mount;
                fss->fss_bstore = fss50->fss_bstore;
                fss->fss_csize = fss50->fss_csize;
                fss->fss_flags = 0;
                /* Fall through */
        case FSSIOCSET:
                mutex_enter(&sc->sc_slock);
                if ((flag & FWRITE) == 0)
                        error = EPERM;
                if (error == 0 && sc->sc_state != FSS_IDLE) {
                        error = EBUSY;
                } else {
                        sc->sc_state = FSS_CREATING;
                        copyinstr(fss->fss_mount, sc->sc_mntname,
                            sizeof(sc->sc_mntname), NULL);
                        memset(&sc->sc_time, 0, sizeof(sc->sc_time));
                        sc->sc_clshift = 0;
                }
                mutex_exit(&sc->sc_slock);
                if (error)
                        break;

                /*
                 * Serialize snapshot creation.
                 */
                mutex_enter(&fss_device_lock);
                while (fss_creating) {
                        error = cv_wait_sig(&fss_device_cv, &fss_device_lock);
                        if (error) {
                                mutex_enter(&sc->sc_slock);
                                KASSERT(sc->sc_state == FSS_CREATING);
                                sc->sc_state = FSS_IDLE;
                                mutex_exit(&sc->sc_slock);
                                mutex_exit(&fss_device_lock);
                                break;
                        }
                }
                fss_creating = true;
                mutex_exit(&fss_device_lock);

                error = fss_create_snapshot(sc, fss, l);
                mutex_enter(&sc->sc_slock);
                if (error == 0) {
                        KASSERT(sc->sc_state == FSS_ACTIVE);
                        sc->sc_uflags = fss->fss_flags;
                } else {
                        KASSERT(sc->sc_state == FSS_CREATING);
                        sc->sc_state = FSS_IDLE;
                }
                mutex_exit(&sc->sc_slock);

                mutex_enter(&fss_device_lock);
                fss_creating = false;
                cv_broadcast(&fss_device_cv);
                mutex_exit(&fss_device_lock);

                break;

        case FSSIOCCLR:
                mutex_enter(&sc->sc_slock);
                if ((flag & FWRITE) == 0) {
                        error = EPERM;
                } else if (sc->sc_state != FSS_ACTIVE) {
                        error = EBUSY;
                } else {
                        sc->sc_state = FSS_DESTROYING;
                }
                mutex_exit(&sc->sc_slock);
                if (error)
                        break;

                error = fss_delete_snapshot(sc, l);
                mutex_enter(&sc->sc_slock);
                if (error)
                        fss_error(sc, "Failed to delete snapshot");
                else
                        KASSERT(sc->sc_state == FSS_IDLE);
                mutex_exit(&sc->sc_slock);
                break;

#ifndef _LP64
        case FSSIOCGET50:
                mutex_enter(&sc->sc_slock);
                if (sc->sc_state == FSS_IDLE) {
                        error = ENXIO;
                } else if ((sc->sc_flags & FSS_PERSISTENT) == 0) {
                        memcpy(fsg50->fsg_mount, sc->sc_mntname, MNAMELEN);
                        fsg50->fsg_csize = FSS_CLSIZE(sc);
                        timeval_to_timeval50(&sc->sc_time, &fsg50->fsg_time);
                        fsg50->fsg_mount_size = sc->sc_clcount;
                        fsg50->fsg_bs_size = sc->sc_clnext;
                        error = 0;
                } else {
                        memcpy(fsg50->fsg_mount, sc->sc_mntname, MNAMELEN);
                        fsg50->fsg_csize = 0;
                        timeval_to_timeval50(&sc->sc_time, &fsg50->fsg_time);
                        fsg50->fsg_mount_size = 0;
                        fsg50->fsg_bs_size = 0;
                        error = 0;
                }
                mutex_exit(&sc->sc_slock);
                break;
#endif /* _LP64 */

        case FSSIOCGET:
                mutex_enter(&sc->sc_slock);
                if (sc->sc_state == FSS_IDLE) {
                        error = ENXIO;
                } else if ((sc->sc_flags & FSS_PERSISTENT) == 0) {
                        memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN);
                        fsg->fsg_csize = FSS_CLSIZE(sc);
                        fsg->fsg_time = sc->sc_time;
                        fsg->fsg_mount_size = sc->sc_clcount;
                        fsg->fsg_bs_size = sc->sc_clnext;
                        error = 0;
                } else {
                        memcpy(fsg->fsg_mount, sc->sc_mntname, MNAMELEN);
                        fsg->fsg_csize = 0;
                        fsg->fsg_time = sc->sc_time;
                        fsg->fsg_mount_size = 0;
                        fsg->fsg_bs_size = 0;
                        error = 0;
                }
                mutex_exit(&sc->sc_slock);
                break;

        case FSSIOFSET:
                mutex_enter(&sc->sc_slock);
                sc->sc_uflags = *(int *)data;
                mutex_exit(&sc->sc_slock);
                error = 0;
                break;

        case FSSIOFGET:
                mutex_enter(&sc->sc_slock);
                *(int *)data = sc->sc_uflags;
                mutex_exit(&sc->sc_slock);
                error = 0;
                break;

        default:
                error = EINVAL;
                break;
        }

        return error;
}

int
fss_size(dev_t dev)
{
        return -1;
}

int
fss_dump(dev_t dev, daddr_t blkno, void *va,
    size_t size)
{
        return EROFS;
}

/*
 * An error occurred reading or writing the snapshot or backing store.
 * If it is the first error log to console and disestablish cow handler.
 * The caller holds the mutex.
 */
static inline void
fss_error(struct fss_softc *sc, const char *msg)
{

        KASSERT(mutex_owned(&sc->sc_slock));

        if ((sc->sc_flags & FSS_ERROR))
                return;

        aprint_error_dev(sc->sc_dev, "snapshot invalid: %s\n", msg);
        if ((sc->sc_flags & FSS_PERSISTENT) == 0) {
                mutex_exit(&sc->sc_slock);
                fscow_disestablish(sc->sc_mount, fss_copy_on_write, sc);
                mutex_enter(&sc->sc_slock);
        }
        sc->sc_flags |= FSS_ERROR;
}

/*
 * Allocate the variable sized parts of the softc and
 * fork the kernel thread.
 *
 * The fields sc_clcount, sc_clshift, sc_cache_size and sc_indir_size
 * must be initialized.
 */
static int
fss_softc_alloc(struct fss_softc *sc)
{
        int i, error;

        if ((sc->sc_flags & FSS_PERSISTENT) == 0) {
                sc->sc_copied =
                    kmem_zalloc(howmany(sc->sc_clcount, NBBY), KM_SLEEP);
                sc->sc_cache = kmem_alloc(sc->sc_cache_size *
                    sizeof(struct fss_cache), KM_SLEEP);
                for (i = 0; i < sc->sc_cache_size; i++) {
                        sc->sc_cache[i].fc_type = FSS_CACHE_FREE;
                        sc->sc_cache[i].fc_data =
                            kmem_alloc(FSS_CLSIZE(sc), KM_SLEEP);
                        cv_init(&sc->sc_cache[i].fc_state_cv, "cowwait1");
                }

                sc->sc_indir_valid =
                    kmem_zalloc(howmany(sc->sc_indir_size, NBBY), KM_SLEEP);
                sc->sc_indir_data = kmem_zalloc(FSS_CLSIZE(sc), KM_SLEEP);
        } else {
                sc->sc_copied = NULL;
                sc->sc_cache = NULL;
                sc->sc_indir_valid = NULL;
                sc->sc_indir_data = NULL;
        }

        sc->sc_flags |= FSS_BS_THREAD;
        if ((error = kthread_create(PRI_BIO, KTHREAD_MUSTJOIN, NULL,
            fss_bs_thread, sc, &sc->sc_bs_lwp,
            "%s", device_xname(sc->sc_dev))) != 0) {
                sc->sc_flags &= ~FSS_BS_THREAD;
                return error;
        }

        disk_attach(sc->sc_dkdev);

        return 0;
}

/*
 * Free the variable sized parts of the softc.
 */
static void
fss_softc_free(struct fss_softc *sc)
{
        int i;

        if ((sc->sc_flags & FSS_BS_THREAD) != 0) {
                mutex_enter(&sc->sc_slock);
                sc->sc_flags &= ~FSS_BS_THREAD;
                cv_signal(&sc->sc_work_cv);
                mutex_exit(&sc->sc_slock);
                kthread_join(sc->sc_bs_lwp);

                disk_detach(sc->sc_dkdev);
        }

        if (sc->sc_copied != NULL)
                kmem_free(sc->sc_copied, howmany(sc->sc_clcount, NBBY));
        sc->sc_copied = NULL;

        if (sc->sc_cache != NULL) {
                for (i = 0; i < sc->sc_cache_size; i++)
                        if (sc->sc_cache[i].fc_data != NULL) {
                                cv_destroy(&sc->sc_cache[i].fc_state_cv);
                                kmem_free(sc->sc_cache[i].fc_data,
                                    FSS_CLSIZE(sc));
                        }
                kmem_free(sc->sc_cache,
                    sc->sc_cache_size*sizeof(struct fss_cache));
        }
        sc->sc_cache = NULL;

        if (sc->sc_indir_valid != NULL)
                kmem_free(sc->sc_indir_valid, howmany(sc->sc_indir_size, NBBY));
        sc->sc_indir_valid = NULL;

        if (sc->sc_indir_data != NULL)
                kmem_free(sc->sc_indir_data, FSS_CLSIZE(sc));
        sc->sc_indir_data = NULL;
}

/*
 * Set all active snapshots on this file system into ERROR state.
 */
static void
fss_unmount_hook(struct mount *mp)
{
        int i;
        struct fss_softc *sc;

        mutex_enter(&fss_device_lock);
        for (i = 0; i < fss_cd.cd_ndevs; i++) {
                if ((sc = device_lookup_private(&fss_cd, i)) == NULL)
                        continue;
                mutex_enter(&sc->sc_slock);
                if (sc->sc_state != FSS_IDLE && sc->sc_mount == mp)
                        fss_error(sc, "forced by unmount");
                mutex_exit(&sc->sc_slock);
        }
        mutex_exit(&fss_device_lock);
}

/*
 * A buffer is written to the snapshotted block device. Copy to
 * backing store if needed.
 */
static int
fss_copy_on_write(void *v, struct buf *bp, bool data_valid)
{
        int error;
        u_int32_t cl, ch, c;
        struct fss_softc *sc = v;

        mutex_enter(&sc->sc_slock);
        if (sc->sc_state != FSS_ACTIVE) {
                mutex_exit(&sc->sc_slock);
                return 0;
        }

        cl = FSS_BTOCL(sc, dbtob(bp->b_blkno));
        ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1);
        error = 0;
        if (curlwp == uvm.pagedaemon_lwp) {
                for (c = cl; c <= ch; c++)
                        if (isclr(sc->sc_copied, c)) {
                                error = ENOMEM;
                                break;
                        }
        }
        mutex_exit(&sc->sc_slock);

        if (error == 0)
                for (c = cl; c <= ch; c++) {
                        error = fss_read_cluster(sc, c);
                        if (error)
                                break;
                }

        return error;
}

/*
 * Lookup and open needed files.
 *
 * For file system internal snapshot initializes sc_mntname, sc_mount,
 * sc_bs_vp and sc_time.
 *
 * Otherwise returns dev and size of the underlying block device.
 * Initializes sc_mntname, sc_mount, sc_bdev, sc_bs_vp and sc_mount
 */
static int
fss_create_files(struct fss_softc *sc, struct fss_set *fss,
    off_t *bsize, struct lwp *l)
{
        int error, bits, fsbsize;
        uint64_t numsec;
        unsigned int secsize;
        struct timespec ts;
        /* distinguish lookup 1 from lookup 2 to reduce mistakes */
        struct pathbuf *pb2;
        struct vnode *vp, *vp2;

        /*
         * Get the mounted file system.
         */

        error = namei_simple_user(fss->fss_mount,
                                NSM_FOLLOW_NOEMULROOT, &vp);
        if (error != 0)
                return error;

        if ((vp->v_vflag & VV_ROOT) != VV_ROOT) {
                vrele(vp);
                return EINVAL;
        }

        sc->sc_mount = vp->v_mount;
        memcpy(sc->sc_mntname, sc->sc_mount->mnt_stat.f_mntonname, MNAMELEN);

        vrele(vp);

        /*
         * Check for file system internal snapshot.
         */

        error = namei_simple_user(fss->fss_bstore,
                                NSM_FOLLOW_NOEMULROOT, &vp);
        if (error != 0)
                return error;

        if (vp->v_type == VREG && vp->v_mount == sc->sc_mount) {
                sc->sc_flags |= FSS_PERSISTENT;
                sc->sc_bs_vp = vp;

                fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize;
                bits = sizeof(sc->sc_bs_bshift)*NBBY;
                for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < bits;
                    sc->sc_bs_bshift++)
                        if (FSS_FSBSIZE(sc) == fsbsize)
                                break;
                if (sc->sc_bs_bshift >= bits)
                        return EINVAL;

                sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1;
                sc->sc_clshift = 0;

                if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) {
                        error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE);
                        if (error)
                                return error;
                }
                error = vn_lock(vp, LK_EXCLUSIVE);
                if (error != 0)
                        return error;
                error = VFS_SNAPSHOT(sc->sc_mount, sc->sc_bs_vp, &ts);
                TIMESPEC_TO_TIMEVAL(&sc->sc_time, &ts);

                VOP_UNLOCK(sc->sc_bs_vp);

                return error;
        }
        vrele(vp);

        /*
         * Get the block device it is mounted on and its size.
         */

        error = spec_node_lookup_by_mount(sc->sc_mount, &vp);
        if (error)
                return error;
        sc->sc_bdev = vp->v_rdev;

        error = getdisksize(vp, &numsec, &secsize);
        vrele(vp);
        if (error)
                return error;

        *bsize = (off_t)numsec*secsize;

        /*
         * Get the backing store
         */

        error = pathbuf_copyin(fss->fss_bstore, &pb2);
        if (error) {
                 return error;
        }
        error = vn_open(NULL, pb2, 0, FREAD|FWRITE, 0, &vp2, NULL, NULL);
        if (error != 0) {
                pathbuf_destroy(pb2);
                return error;
        }
        VOP_UNLOCK(vp2);

        sc->sc_bs_vp = vp2;

        if (vp2->v_type != VREG && vp2->v_type != VCHR) {
                vrele(vp2);
                pathbuf_destroy(pb2);
                return EINVAL;
        }
        pathbuf_destroy(pb2);

        if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) {
                error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE);
                if (error)
                        return error;
        }
        if (sc->sc_bs_vp->v_type == VREG) {
                fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize;
                if (fsbsize & (fsbsize-1))        /* No power of two */
                        return EINVAL;
                for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < 32;
                    sc->sc_bs_bshift++)
                        if (FSS_FSBSIZE(sc) == fsbsize)
                                break;
                if (sc->sc_bs_bshift >= 32)
                        return EINVAL;
                sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1;
        } else {
                sc->sc_bs_bshift = DEV_BSHIFT;
                sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1;
        }

        return 0;
}

/*
 * Create a snapshot.
 */
static int
fss_create_snapshot(struct fss_softc *sc, struct fss_set *fss, struct lwp *l)
{
        int len, error;
        u_int32_t csize;
        off_t bsize;

        bsize = 0;        /* XXX gcc */

        /*
         * Open needed files.
         */
        if ((error = fss_create_files(sc, fss, &bsize, l)) != 0)
                goto bad;

        if (sc->sc_flags & FSS_PERSISTENT) {
                fss_softc_alloc(sc);
                mutex_enter(&sc->sc_slock);
                sc->sc_state = FSS_ACTIVE;
                mutex_exit(&sc->sc_slock);
                return 0;
        }

        /*
         * Set cluster size. Must be a power of two and
         * a multiple of backing store block size.
         */
        if (fss->fss_csize <= 0)
                csize = MAXPHYS;
        else
                csize = fss->fss_csize;
        if (bsize/csize > FSS_CLUSTER_MAX)
                csize = bsize/FSS_CLUSTER_MAX+1;

        for (sc->sc_clshift = sc->sc_bs_bshift; sc->sc_clshift < 32;
            sc->sc_clshift++)
                if (FSS_CLSIZE(sc) >= csize)
                        break;
        if (sc->sc_clshift >= 32) {
                error = EINVAL;
                goto bad;
        }
        sc->sc_clmask = FSS_CLSIZE(sc)-1;

        /*
         * Set number of cache slots.
         */
        if (FSS_CLSIZE(sc) <= 8192)
                sc->sc_cache_size = 32;
        else if (FSS_CLSIZE(sc) <= 65536)
                sc->sc_cache_size = 8;
        else
                sc->sc_cache_size = 4;

        /*
         * Set number of clusters and size of last cluster.
         */
        sc->sc_clcount = FSS_BTOCL(sc, bsize-1)+1;
        sc->sc_clresid = FSS_CLOFF(sc, bsize-1)+1;

        /*
         * Set size of indirect table.
         */
        len = sc->sc_clcount*sizeof(u_int32_t);
        sc->sc_indir_size = FSS_BTOCL(sc, len)+1;
        sc->sc_clnext = sc->sc_indir_size;
        sc->sc_indir_cur = 0;

        if ((error = fss_softc_alloc(sc)) != 0)
                goto bad;

        /*
         * Activate the snapshot.
         */

        if ((error = vfs_suspend(sc->sc_mount, 0)) != 0)
                goto bad;

        microtime(&sc->sc_time);

        vrele_flush(sc->sc_mount);
        error = VFS_SYNC(sc->sc_mount, MNT_WAIT, curlwp->l_cred);
        if (error == 0)
                error = fscow_establish(sc->sc_mount, fss_copy_on_write, sc);
        if (error == 0) {
                mutex_enter(&sc->sc_slock);
                sc->sc_state = FSS_ACTIVE;
                mutex_exit(&sc->sc_slock);
        }

        vfs_resume(sc->sc_mount);

        if (error != 0)
                goto bad;

        aprint_debug_dev(sc->sc_dev, "%s snapshot active\n", sc->sc_mntname);
        aprint_debug_dev(sc->sc_dev,
            "%u clusters of %u, %u cache slots, %u indir clusters\n",
            sc->sc_clcount, FSS_CLSIZE(sc),
            sc->sc_cache_size, sc->sc_indir_size);

        return 0;

bad:
        fss_softc_free(sc);
        if (sc->sc_bs_vp != NULL) {
                if (sc->sc_flags & FSS_PERSISTENT)
                        vrele(sc->sc_bs_vp);
                else
                        vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred);
        }
        sc->sc_bs_vp = NULL;

        return error;
}

/*
 * Delete a snapshot.
 */
static int
fss_delete_snapshot(struct fss_softc *sc, struct lwp *l)
{

        mutex_enter(&sc->sc_slock);
        if ((sc->sc_flags & FSS_PERSISTENT) == 0 &&
            (sc->sc_flags & FSS_ERROR) == 0) {
                mutex_exit(&sc->sc_slock);
                fscow_disestablish(sc->sc_mount, fss_copy_on_write, sc);
        } else {
                mutex_exit(&sc->sc_slock);
        }

        fss_softc_free(sc);
        if (sc->sc_flags & FSS_PERSISTENT)
                vrele(sc->sc_bs_vp);
        else
                vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred);

        mutex_enter(&sc->sc_slock);
        sc->sc_state = FSS_IDLE;
        sc->sc_mount = NULL;
        sc->sc_bdev = NODEV;
        sc->sc_bs_vp = NULL;
        sc->sc_flags &= ~FSS_PERSISTENT;
        mutex_exit(&sc->sc_slock);

        return 0;
}

/*
 * Read a cluster from the snapshotted block device to the cache.
 */
static int
fss_read_cluster(struct fss_softc *sc, u_int32_t cl)
{
        int error, todo, offset, len;
        daddr_t dblk;
        struct buf *bp, *mbp;
        struct fss_cache *scp, *scl;

        /*
         * Get a free cache slot.
         */
        scl = sc->sc_cache+sc->sc_cache_size;

        mutex_enter(&sc->sc_slock);

restart:
        if (isset(sc->sc_copied, cl) || sc->sc_state != FSS_ACTIVE) {
                mutex_exit(&sc->sc_slock);
                return 0;
        }

        for (scp = sc->sc_cache; scp < scl; scp++) {
                if (scp->fc_type == FSS_CACHE_VALID) {
                        if (scp->fc_cluster == cl) {
                                mutex_exit(&sc->sc_slock);
                                return 0;
                        }
                } else if (scp->fc_type == FSS_CACHE_BUSY) {
                        if (scp->fc_cluster == cl) {
                                cv_wait(&scp->fc_state_cv, &sc->sc_slock);
                                goto restart;
                        }
                }
        }

        for (scp = sc->sc_cache; scp < scl; scp++)
                if (scp->fc_type == FSS_CACHE_FREE) {
                        scp->fc_type = FSS_CACHE_BUSY;
                        scp->fc_cluster = cl;
                        break;
                }
        if (scp >= scl) {
                cv_wait(&sc->sc_cache_cv, &sc->sc_slock);
                goto restart;
        }

        mutex_exit(&sc->sc_slock);

        /*
         * Start the read.
         */
        dblk = btodb(FSS_CLTOB(sc, cl));
        if (cl == sc->sc_clcount-1) {
                todo = sc->sc_clresid;
                memset((char *)scp->fc_data + todo, 0, FSS_CLSIZE(sc) - todo);
        } else
                todo = FSS_CLSIZE(sc);
        offset = 0;
        mbp = getiobuf(NULL, true);
        mbp->b_bufsize = todo;
        mbp->b_data = scp->fc_data;
        mbp->b_resid = mbp->b_bcount = todo;
        mbp->b_flags = B_READ;
        mbp->b_cflags = BC_BUSY;
        mbp->b_dev = sc->sc_bdev;
        while (todo > 0) {
                len = todo;
                if (len > MAXPHYS)
                        len = MAXPHYS;
                if (btodb(FSS_CLTOB(sc, cl)) == dblk && len == todo)
                        bp = mbp;
                else {
                        bp = getiobuf(NULL, true);
                        nestiobuf_setup(mbp, bp, offset, len);
                }
                bp->b_lblkno = 0;
                bp->b_blkno = dblk;
                bdev_strategy(bp);
                dblk += btodb(len);
                offset += len;
                todo -= len;
        }
        error = biowait(mbp);
        if (error == 0 && mbp->b_resid != 0)
                error = EIO;
        putiobuf(mbp);

        mutex_enter(&sc->sc_slock);
        scp->fc_type = (error ? FSS_CACHE_FREE : FSS_CACHE_VALID);
        cv_broadcast(&scp->fc_state_cv);
        if (error == 0) {
                setbit(sc->sc_copied, scp->fc_cluster);
                cv_signal(&sc->sc_work_cv);
        }
        mutex_exit(&sc->sc_slock);

        return error;
}

/*
 * Read/write clusters from/to backing store.
 * For persistent snapshots must be called with cl == 0. off is the
 * offset into the snapshot.
 */
static int
fss_bs_io(struct fss_softc *sc, fss_io_type rw,
    u_int32_t cl, off_t off, int len, void *data, size_t *resid)
{
        int error;

        off += FSS_CLTOB(sc, cl);

        vn_lock(sc->sc_bs_vp, LK_EXCLUSIVE|LK_RETRY);

        error = vn_rdwr((rw == FSS_READ ? UIO_READ : UIO_WRITE), sc->sc_bs_vp,
            data, len, off, UIO_SYSSPACE,
            IO_ADV_ENCODE(POSIX_FADV_NOREUSE) | IO_NODELOCKED,
            sc->sc_bs_lwp->l_cred, resid, NULL);
        if (error == 0) {
                rw_enter(sc->sc_bs_vp->v_uobj.vmobjlock, RW_WRITER);
                error = VOP_PUTPAGES(sc->sc_bs_vp, trunc_page(off),
                    round_page(off+len), PGO_CLEANIT | PGO_FREE | PGO_SYNCIO);
        }

        VOP_UNLOCK(sc->sc_bs_vp);

        return error;
}

/*
 * Get a pointer to the indirect slot for this cluster.
 */
static u_int32_t *
fss_bs_indir(struct fss_softc *sc, u_int32_t cl)
{
        u_int32_t icl;
        int ioff;

        icl = cl/(FSS_CLSIZE(sc)/sizeof(u_int32_t));
        ioff = cl%(FSS_CLSIZE(sc)/sizeof(u_int32_t));

        if (sc->sc_indir_cur == icl)
                return &sc->sc_indir_data[ioff];

        if (sc->sc_indir_dirty) {
                if (fss_bs_io(sc, FSS_WRITE, sc->sc_indir_cur, 0,
                    FSS_CLSIZE(sc), (void *)sc->sc_indir_data, NULL) != 0)
                        return NULL;
                setbit(sc->sc_indir_valid, sc->sc_indir_cur);
        }

        sc->sc_indir_dirty = 0;
        sc->sc_indir_cur = icl;

        if (isset(sc->sc_indir_valid, sc->sc_indir_cur)) {
                if (fss_bs_io(sc, FSS_READ, sc->sc_indir_cur, 0,
                    FSS_CLSIZE(sc), (void *)sc->sc_indir_data, NULL) != 0)
                        return NULL;
        } else
                memset(sc->sc_indir_data, 0, FSS_CLSIZE(sc));

        return &sc->sc_indir_data[ioff];
}

/*
 * The kernel thread (one for every active snapshot).
 *
 * After wakeup it cleans the cache and runs the I/O requests.
 */
static void
fss_bs_thread(void *arg)
{
        bool thread_idle, is_valid;
        int error, i, todo, len, crotor, is_read;
        long off;
        char *addr;
        u_int32_t c, cl, ch, *indirp;
        size_t resid;
        struct buf *bp, *nbp;
        struct fss_softc *sc;
        struct fss_cache *scp, *scl;

        sc = arg;
        scl = sc->sc_cache+sc->sc_cache_size;
        crotor = 0;
        thread_idle = false;

        mutex_enter(&sc->sc_slock);

        for (;;) {
                if (thread_idle)
                        cv_wait(&sc->sc_work_cv, &sc->sc_slock);
                thread_idle = true;
                if ((sc->sc_flags & FSS_BS_THREAD) == 0) {
                        mutex_exit(&sc->sc_slock);
                        kthread_exit(0);
                }

                /*
                 * Process I/O requests (persistent)
                 */

                if (sc->sc_flags & FSS_PERSISTENT) {
                        if ((bp = bufq_get(sc->sc_bufq)) == NULL)
                                continue;
                        is_valid = (sc->sc_state == FSS_ACTIVE);
                        is_read = (bp->b_flags & B_READ);
                        thread_idle = false;
                        mutex_exit(&sc->sc_slock);

                        if (is_valid) {
                                disk_busy(sc->sc_dkdev);
                                error = fss_bs_io(sc, FSS_READ, 0,
                                    dbtob(bp->b_blkno), bp->b_bcount,
                                    bp->b_data, &resid);
                                if (error)
                                        resid = bp->b_bcount;
                                disk_unbusy(sc->sc_dkdev,
                                    (error ? 0 : bp->b_bcount), is_read);
                        } else {
                                error = ENXIO;
                                resid = bp->b_bcount;
                        }

                        bp->b_error = error;
                        bp->b_resid = resid;
                        biodone(bp);

                        mutex_enter(&sc->sc_slock);
                        continue;
                }

                /*
                 * Clean the cache
                 */
                for (i = 0; i < sc->sc_cache_size; i++) {
                        crotor = (crotor + 1) % sc->sc_cache_size;
                        scp = sc->sc_cache + crotor;
                        if (scp->fc_type != FSS_CACHE_VALID)
                                continue;
                        mutex_exit(&sc->sc_slock);

                        thread_idle = false;
                        indirp = fss_bs_indir(sc, scp->fc_cluster);
                        if (indirp != NULL) {
                                error = fss_bs_io(sc, FSS_WRITE, sc->sc_clnext,
                                    0, FSS_CLSIZE(sc), scp->fc_data, NULL);
                        } else
                                error = EIO;

                        mutex_enter(&sc->sc_slock);
                        if (error == 0) {
                                *indirp = sc->sc_clnext++;
                                sc->sc_indir_dirty = 1;
                        } else
                                fss_error(sc, "write error on backing store");

                        scp->fc_type = FSS_CACHE_FREE;
                        cv_broadcast(&sc->sc_cache_cv);
                        break;
                }

                /*
                 * Process I/O requests
                 */
                if ((bp = bufq_get(sc->sc_bufq)) == NULL)
                        continue;
                is_valid = (sc->sc_state == FSS_ACTIVE);
                is_read = (bp->b_flags & B_READ);
                thread_idle = false;

                if (!is_valid) {
                        mutex_exit(&sc->sc_slock);

                        bp->b_error = ENXIO;
                        bp->b_resid = bp->b_bcount;
                        biodone(bp);

                        mutex_enter(&sc->sc_slock);
                        continue;
                }

                disk_busy(sc->sc_dkdev);

                /*
                 * First read from the snapshotted block device unless
                 * this request is completely covered by backing store.
                 */

                cl = FSS_BTOCL(sc, dbtob(bp->b_blkno));
                off = FSS_CLOFF(sc, dbtob(bp->b_blkno));
                ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1);
                error = 0;
                bp->b_resid = 0;
                bp->b_error = 0;
                for (c = cl; c <= ch; c++) {
                        if (isset(sc->sc_copied, c))
                                continue;
                        mutex_exit(&sc->sc_slock);

                        /* Not on backing store, read from device. */
                        nbp = getiobuf(NULL, true);
                        nbp->b_flags = B_READ;
                        nbp->b_resid = nbp->b_bcount = bp->b_bcount;
                        nbp->b_bufsize = bp->b_bcount;
                        nbp->b_data = bp->b_data;
                        nbp->b_blkno = bp->b_blkno;
                        nbp->b_lblkno = 0;
                        nbp->b_dev = sc->sc_bdev;
                        SET(nbp->b_cflags, BC_BUSY);        /* mark buffer busy */

                        bdev_strategy(nbp);

                        error = biowait(nbp);
                        if (error == 0 && nbp->b_resid != 0)
                                error = EIO;
                        if (error != 0) {
                                bp->b_resid = bp->b_bcount;
                                bp->b_error = nbp->b_error;
                                disk_unbusy(sc->sc_dkdev, 0, is_read);
                                biodone(bp);
                        }
                        putiobuf(nbp);

                        mutex_enter(&sc->sc_slock);
                        break;
                }
                if (error)
                        continue;

                /*
                 * Replace those parts that have been saved to backing store.
                 */

                addr = bp->b_data;
                todo = bp->b_bcount;
                for (c = cl; c <= ch; c++, off = 0, todo -= len, addr += len) {
                        len = FSS_CLSIZE(sc)-off;
                        if (len > todo)
                                len = todo;
                        if (isclr(sc->sc_copied, c))
                                continue;
                        mutex_exit(&sc->sc_slock);

                        indirp = fss_bs_indir(sc, c);
                        if (indirp == NULL || *indirp == 0) {
                                /*
                                 * Not on backing store. Either in cache
                                 * or hole in the snapshotted block device.
                                 */

                                mutex_enter(&sc->sc_slock);
                                for (scp = sc->sc_cache; scp < scl; scp++)
                                        if (scp->fc_type == FSS_CACHE_VALID &&
                                            scp->fc_cluster == c)
                                                break;
                                if (scp < scl)
                                        memcpy(addr, (char *)scp->fc_data+off,
                                            len);
                                else
                                        memset(addr, 0, len);
                                continue;
                        }

                        /*
                         * Read from backing store.
                         */
                        error = fss_bs_io(sc, FSS_READ,
                            *indirp, off, len, addr, NULL);

                        mutex_enter(&sc->sc_slock);
                        if (error) {
                                bp->b_resid = bp->b_bcount;
                                bp->b_error = error;
                                break;
                        }
                }
                mutex_exit(&sc->sc_slock);

                disk_unbusy(sc->sc_dkdev, (error ? 0 : bp->b_bcount), is_read);
                biodone(bp);

                mutex_enter(&sc->sc_slock);
        }
}

#ifdef _MODULE

#include <sys/module.h>

MODULE(MODULE_CLASS_DRIVER, fss, "bufq_fcfs");
CFDRIVER_DECL(fss, DV_DISK, NULL);

devmajor_t fss_bmajor = -1, fss_cmajor = -1;

static int
fss_modcmd(modcmd_t cmd, void *arg)
{
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
                mutex_init(&fss_device_lock, MUTEX_DEFAULT, IPL_NONE);
                cv_init(&fss_device_cv, "snapwait");

                error = devsw_attach(fss_cd.cd_name,
                    &fss_bdevsw, &fss_bmajor, &fss_cdevsw, &fss_cmajor);
                if (error) {
                        mutex_destroy(&fss_device_lock);
                        break;
                }

                error = config_cfdriver_attach(&fss_cd);
                if (error) {
                        devsw_detach(&fss_bdevsw, &fss_cdevsw);
                        mutex_destroy(&fss_device_lock);
                        break;
                }

                error = config_cfattach_attach(fss_cd.cd_name, &fss_ca);
                if (error) {
                        config_cfdriver_detach(&fss_cd);
                        devsw_detach(&fss_bdevsw, &fss_cdevsw);
                        mutex_destroy(&fss_device_lock);
                        break;
                }

                break;

        case MODULE_CMD_FINI:
                error = config_cfattach_detach(fss_cd.cd_name, &fss_ca);
                if (error) {
                        break;
                }
                error = config_cfdriver_detach(&fss_cd);
                if (error) {
                        config_cfattach_attach(fss_cd.cd_name, &fss_ca);
                        break;
                }
                devsw_detach(&fss_bdevsw, &fss_cdevsw);
                cv_destroy(&fss_device_cv);
                mutex_destroy(&fss_device_lock);
                break;

        default:
                error = ENOTTY;
                break;
        }

        return error;
}

#endif /* _MODULE */


































































































   31 














   31 














    4 
    4 
    4 
    4 
    4 
    4 
    4 
    4 

    4 













   22 
   22 
   22 
   22 
   22 
   22 
   21 
   22 
   22 
   22 
   22 
   22 
   22 
   21 
   22 
   22 
   22 
   22 
   22 



   22 















   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 




   21 
    5 












    3 
    3 
    3 
    3 
    3 
    3 
    3 
    3 
    3 
    3 


    3 













    2 
    2 
    2 
    2 
    2 
    2 
    2 
    2 
    2 
    2 


    2 




























   21 
   21 
   21 
   21 
   20 
   20 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 
   21 





   21 


   21 
    5 
   21 
    5 


   21 

















   21 
   20 
   21 
   21 












   19 





   14 
    3 




    3 
    3 
    3 















   14 
























    3 










    5 












    3 


    1 
    1 
    1 
    1 


















    2 


    2 
    2 
    2 











    8 

    4 



















    8 
   10 
















   17 






    6 
    4 






























   10 
   10 
   10 





   10 

   10 







   10 


   10 
    8 



















    9 









    9 
    9 
    9 
    9 
    9 
    9 

    9 


    9 

    9 



    9 


    9 


    9 

    9 



    9 

    9 




    9 
    9 






    9 












    9 




    9 
    9 




    9 


    9 


    9 





    1 

    1 












    1 



    1 





    9 






    9 


    9 












    9 
    9 
    9 


    9 




    9 














    2 
    2 
    2 
    2 
    2 
    2 
    2 
    2 
    2 
    2 
    2 
    2 
    2 






    2 


    2 

    2 










   30 




   30 
   30 
   30 
   30 
   30 


   30 



   30 











   30 

   30 

   30 











   31 
   30 
   31 


   31 









   11 




   11 

   11 




   11 
   11 




   11 
   11 















   11 
   11 
   11 


   11 





   11 











   11 




   11 
   11 
   11 
   11 
   11 
   11 
   11 
   11 





   11 
   11 



   11 


   11 
   11 
   11 
   11 
   11 


   11 
    5 













    6 





    9 



    9 





    9 
    9 
    9 












    9 















   31 
   31 
   31 

   31 

   31 



















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
/*        $NetBSD: ufs_rename.c,v 1.14 2021/10/20 03:08:19 thorpej Exp $        */

/*-
 * Copyright (c) 2012 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * UFS Rename
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_rename.c,v 1.14 2021/10/20 03:08:19 thorpej Exp $");

#include <sys/param.h>
#include <sys/buf.h>
#include <sys/errno.h>
#include <sys/kauth.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/pool.h>
#include <sys/vnode.h>
#include <sys/vnode_if.h>
#include <sys/wapbl.h>

#include <miscfs/genfs/genfs.h>

#include <ufs/ufs/dir.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_wapbl.h>
#include <ufs/ufs/ufsmount.h>

/*
 * Forward declarations
 */

static int ufs_sane_rename(struct vnode *, struct componentname *,
    struct vnode *, struct componentname *,
    kauth_cred_t, bool);
static bool ufs_rename_ulr_overlap_p(const struct ufs_lookup_results *,
    const struct ufs_lookup_results *);
static int ufs_rename_recalculate_fulr(struct vnode *,
    struct ufs_lookup_results *, const struct ufs_lookup_results *,
    const struct componentname *);
static int ufs_direct_namlen(const struct direct *, const struct vnode *);
static int ufs_read_dotdot(struct vnode *, kauth_cred_t, ino_t *);
static int ufs_dirbuf_dotdot_namlen(const struct dirtemplate *,
    const struct vnode *);

static const struct genfs_rename_ops ufs_genfs_rename_ops;

/*
 * ufs_sane_rename: The hairiest vop, with the saner API.
 *
 * Arguments:
 *
 * . fdvp (from directory vnode),
 * . fcnp (from component name),
 * . tdvp (to directory vnode),
 * . tcnp (to component name),
 * . cred (credentials structure), and
 * . posixly_correct (flag for behaviour if target & source link same file).
 *
 * fdvp and tdvp may be the same, and must be referenced and unlocked.
 */
static int
ufs_sane_rename(
    struct vnode *fdvp, struct componentname *fcnp,
    struct vnode *tdvp, struct componentname *tcnp,
    kauth_cred_t cred, bool posixly_correct)
{
        struct ufs_lookup_results fulr, tulr;

        return genfs_sane_rename(&ufs_genfs_rename_ops,
            fdvp, fcnp, &fulr, tdvp, tcnp, &tulr,
            cred, posixly_correct);
}

/*
 * ufs_rename: The hairiest vop, with the insanest API.  Defer to
 * genfs_insane_rename immediately.
 */
int
ufs_rename(void *v)
{

        return genfs_insane_rename(v, &ufs_sane_rename);
}

/*
 * ufs_gro_directory_empty_p: Return true if the directory vp is
 * empty.  dvp is its parent.
 *
 * vp and dvp must be locked and referenced.
 */
bool
ufs_gro_directory_empty_p(struct mount *mp, kauth_cred_t cred,
    struct vnode *vp, struct vnode *dvp)
{

        (void)mp;
        KASSERT(mp != NULL);
        KASSERT(vp != NULL);
        KASSERT(dvp != NULL);
        KASSERT(vp != dvp);
        KASSERT(vp->v_mount == mp);
        KASSERT(dvp->v_mount == mp);
        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);

        return ufs_dirempty(VTOI(vp), VTOI(dvp)->i_number, cred);
}

/*
 * ufs_gro_rename_check_possible: Check whether a rename is possible
 * independent of credentials.
 */
int
ufs_gro_rename_check_possible(struct mount *mp,
    struct vnode *fdvp, struct vnode *fvp,
    struct vnode *tdvp, struct vnode *tvp)
{

        (void)mp;
        KASSERT(mp != NULL);
        KASSERT(fdvp != NULL);
        KASSERT(fvp != NULL);
        KASSERT(tdvp != NULL);
        KASSERT(fdvp != fvp);
        KASSERT(fdvp != tvp);
        KASSERT(tdvp != fvp);
        KASSERT(tdvp != tvp);
        KASSERT(fvp != tvp);
        KASSERT(fdvp->v_type == VDIR);
        KASSERT(tdvp->v_type == VDIR);
        KASSERT(fdvp->v_mount == mp);
        KASSERT(fvp->v_mount == mp);
        KASSERT(tdvp->v_mount == mp);
        KASSERT((tvp == NULL) || (tvp->v_mount == mp));
        KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
        KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));

        return genfs_ufslike_rename_check_possible(
            VTOI(fdvp)->i_flags, VTOI(fvp)->i_flags,
            VTOI(tdvp)->i_flags, (tvp? VTOI(tvp)->i_flags : 0),
            (tvp != NULL),
            IMMUTABLE, APPEND);
}

/*
 * ufs_gro_rename_check_permitted: Check whether a rename is permitted
 * given our credentials.
 */
int
ufs_gro_rename_check_permitted(struct mount *mp, kauth_cred_t cred,
    struct vnode *fdvp, struct vnode *fvp,
    struct vnode *tdvp, struct vnode *tvp)
{

        (void)mp;
        KASSERT(mp != NULL);
        KASSERT(fdvp != NULL);
        KASSERT(fvp != NULL);
        KASSERT(tdvp != NULL);
        KASSERT(fdvp != fvp);
        KASSERT(fdvp != tvp);
        KASSERT(tdvp != fvp);
        KASSERT(tdvp != tvp);
        KASSERT(fvp != tvp);
        KASSERT(fdvp->v_type == VDIR);
        KASSERT(tdvp->v_type == VDIR);
        KASSERT(fdvp->v_mount == mp);
        KASSERT(fvp->v_mount == mp);
        KASSERT(tdvp->v_mount == mp);
        KASSERT((tvp == NULL) || (tvp->v_mount == mp));
        KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
        KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));

        return genfs_ufslike_rename_check_permitted(cred,
            fdvp, VTOI(fdvp)->i_mode, VTOI(fdvp)->i_uid,
            fvp, VTOI(fvp)->i_uid,
            tdvp, VTOI(tdvp)->i_mode, VTOI(tdvp)->i_uid,
            tvp, (tvp? VTOI(tvp)->i_uid : 0));
}

/*
 * ufs_gro_remove_check_possible: Check whether a remove is possible
 * independent of credentials.
 */
int
ufs_gro_remove_check_possible(struct mount *mp,
    struct vnode *dvp, struct vnode *vp)
{

        (void)mp;
        KASSERT(mp != NULL);
        KASSERT(dvp != NULL);
        KASSERT(vp != NULL);
        KASSERT(dvp != vp);
        KASSERT(dvp->v_type == VDIR);
        KASSERT(vp->v_type != VDIR);
        KASSERT(dvp->v_mount == mp);
        KASSERT(vp->v_mount == mp);
        KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);

        return genfs_ufslike_remove_check_possible(
            VTOI(dvp)->i_flags, VTOI(vp)->i_flags,
            IMMUTABLE, APPEND);
}

/*
 * ufs_gro_remove_check_permitted: Check whether a remove is permitted
 * given our credentials.
 */
int
ufs_gro_remove_check_permitted(struct mount *mp, kauth_cred_t cred,
    struct vnode *dvp, struct vnode *vp)
{

        (void)mp;
        KASSERT(mp != NULL);
        KASSERT(dvp != NULL);
        KASSERT(vp != NULL);
        KASSERT(dvp != vp);
        KASSERT(dvp->v_type == VDIR);
        KASSERT(vp->v_type != VDIR);
        KASSERT(dvp->v_mount == mp);
        KASSERT(vp->v_mount == mp);
        KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);

        return genfs_ufslike_remove_check_permitted(cred,
            dvp, VTOI(dvp)->i_mode, VTOI(dvp)->i_uid, vp, VTOI(vp)->i_uid);
}

/*
 * A virgin directory (no blushing please).
 *
 * XXX Copypasta from ufs_vnops.c.  Kill!
 */
static const struct dirtemplate mastertemplate = {
        0,        12,                        DT_DIR,        1,        ".",
        0,        UFS_DIRBLKSIZ - 12,        DT_DIR,        2,        ".."
};

/*
 * ufs_gro_rename: Actually perform the rename operation.
 */
int
ufs_gro_rename(struct mount *mp, kauth_cred_t cred,
    struct vnode *fdvp, struct componentname *fcnp,
    void *fde, struct vnode *fvp,
    struct vnode *tdvp, struct componentname *tcnp,
    void *tde, struct vnode *tvp, nlink_t *tvp_nlinkp)
{
        struct ufs_lookup_results *fulr = fde;
        struct ufs_lookup_results *tulr = tde;
        bool directory_p, reparent_p;
        struct direct *newdir;
        int error;

        KASSERT(mp != NULL);
        KASSERT(fdvp != NULL);
        KASSERT(fcnp != NULL);
        KASSERT(fulr != NULL);
        KASSERT(fvp != NULL);
        KASSERT(tdvp != NULL);
        KASSERT(tcnp != NULL);
        KASSERT(tulr != NULL);
        KASSERT(fulr != tulr);
        KASSERT(fdvp != fvp);
        KASSERT(fdvp != tvp);
        KASSERT(tdvp != fvp);
        KASSERT(tdvp != tvp);
        KASSERT(fvp != tvp);
        KASSERT(fdvp->v_mount == mp);
        KASSERT(fvp->v_mount == mp);
        KASSERT(tdvp->v_mount == mp);
        KASSERT((tvp == NULL) || (tvp->v_mount == mp));
        KASSERT(VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(fvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
        KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE));

        /*
         * We shall need to temporarily bump the link count, so make
         * sure there is room to do so.
         */
        if ((nlink_t)VTOI(fvp)->i_nlink >= LINK_MAX)
                return EMLINK;

        directory_p = (fvp->v_type == VDIR);
        KASSERT(directory_p == ((VTOI(fvp)->i_mode & IFMT) == IFDIR));
        KASSERT((tvp == NULL) || (directory_p == (tvp->v_type == VDIR)));
        KASSERT((tvp == NULL) || (directory_p ==
                ((VTOI(tvp)->i_mode & IFMT) == IFDIR)));

        reparent_p = (fdvp != tdvp);
        KASSERT(reparent_p == (VTOI(fdvp)->i_number != VTOI(tdvp)->i_number));

        /*
         * Commence hacking of the data on disk.
         */

        error = UFS_WAPBL_BEGIN(mp);
        if (error)
                goto ihateyou;

        /*
         * 1) Bump link count while we're moving stuff
         *    around.  If we crash somewhere before
         *    completing our work, the link count
         *    may be wrong, but correctable.
         */

        KASSERT((nlink_t)VTOI(fvp)->i_nlink < LINK_MAX);
        VTOI(fvp)->i_nlink++;
        DIP_ASSIGN(VTOI(fvp), nlink, VTOI(fvp)->i_nlink);
        VTOI(fvp)->i_flag |= IN_CHANGE;
        error = UFS_UPDATE(fvp, NULL, NULL, UPDATE_DIROP);
        if (error)
                goto whymustithurtsomuch;

        /*
         * 2) If target doesn't exist, link the target
         *    to the source and unlink the source.
         *    Otherwise, rewrite the target directory
         *    entry to reference the source inode and
         *    expunge the original entry's existence.
         */

        if (tvp == NULL) {
                /*
                 * Account for ".." in new directory.
                 * When source and destination have the same
                 * parent we don't fool with the link count.
                 */
                if (directory_p && reparent_p) {
                        if ((nlink_t)VTOI(tdvp)->i_nlink >= LINK_MAX) {
                                error = EMLINK;
                                goto whymustithurtsomuch;
                        }
                        KASSERT((nlink_t)VTOI(tdvp)->i_nlink < LINK_MAX);
                        VTOI(tdvp)->i_nlink++;
                        DIP_ASSIGN(VTOI(tdvp), nlink, VTOI(tdvp)->i_nlink);
                        VTOI(tdvp)->i_flag |= IN_CHANGE;
                        error = UFS_UPDATE(tdvp, NULL, NULL, UPDATE_DIROP);
                        if (error) {
                                /*
                                 * Link count update didn't take --
                                 * back out the in-memory link count.
                                 */
                                KASSERT(0 < VTOI(tdvp)->i_nlink);
                                VTOI(tdvp)->i_nlink--;
                                DIP_ASSIGN(VTOI(tdvp), nlink,
                                    VTOI(tdvp)->i_nlink);
                                VTOI(tdvp)->i_flag |= IN_CHANGE;
                                goto whymustithurtsomuch;
                        }
                }

                newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
                ufs_makedirentry(VTOI(fvp), tcnp, newdir);
                error = ufs_direnter(tdvp, tulr, NULL, newdir, tcnp, NULL);
                pool_cache_put(ufs_direct_cache, newdir);
                if (error) {
                        if (directory_p && reparent_p) {
                                /*
                                 * Directory update didn't take, but
                                 * the link count update did -- back
                                 * out the in-memory link count and the
                                 * on-disk link count.
                                 */
                                KASSERT(0 < VTOI(tdvp)->i_nlink);
                                VTOI(tdvp)->i_nlink--;
                                DIP_ASSIGN(VTOI(tdvp), nlink,
                                    VTOI(tdvp)->i_nlink);
                                VTOI(tdvp)->i_flag |= IN_CHANGE;
                                (void)UFS_UPDATE(tdvp, NULL, NULL,
                                    UPDATE_WAIT | UPDATE_DIROP);
                        }
                        goto whymustithurtsomuch;
                }
        } else {
                if (directory_p)
                        /* XXX WTF?  Why purge here?  Why not purge others?  */
                        cache_purge(tdvp);

                /*
                 * Make the target directory's entry for tcnp point at
                 * the source node.
                 *
                 * XXX ufs_dirrewrite decrements tvp's link count, but
                 * doesn't touch the link count of the new inode.  Go
                 * figure.
                 */
                error = ufs_dirrewrite(VTOI(tdvp), tulr->ulr_offset,
                    VTOI(tvp), VTOI(fvp)->i_number, IFTODT(VTOI(fvp)->i_mode),
                    ((directory_p && reparent_p) ? reparent_p : directory_p),
                    IN_CHANGE | IN_UPDATE);
                if (error)
                        goto whymustithurtsomuch;

                /*
                 * If the source and target are directories, and the
                 * target is in the same directory as the source,
                 * decrement the link count of the common parent
                 * directory, since we are removing the target from
                 * that directory.
                 */
                if (directory_p && !reparent_p) {
                        KASSERT(fdvp == tdvp);
                        /* XXX check, don't kassert */
                        KASSERT(0 < VTOI(tdvp)->i_nlink);
                        VTOI(tdvp)->i_nlink--;
                        DIP_ASSIGN(VTOI(tdvp), nlink, VTOI(tdvp)->i_nlink);
                        VTOI(tdvp)->i_flag |= IN_CHANGE;
                        UFS_WAPBL_UPDATE(tdvp, NULL, NULL, 0);
                }

                if (directory_p) {
                        /*
                         * XXX I don't understand the following comment
                         * from ufs_rename -- in particular, the part
                         * about `there may be other hard links'.
                         *
                         * Truncate inode. The only stuff left in the directory
                         * is "." and "..". The "." reference is inconsequential
                         * since we are quashing it. We have removed the "."
                         * reference and the reference in the parent directory,
                         * but there may be other hard links.
                         *
                         * XXX The ufs_dirempty call earlier does
                         * not guarantee anything about nlink.
                         */
                        if (VTOI(tvp)->i_nlink != 1)
                                ufs_dirbad(VTOI(tvp), (doff_t)0,
                                    "hard-linked directory");
                        VTOI(tvp)->i_nlink = 0;
                        DIP_ASSIGN(VTOI(tvp), nlink, 0);
                        (void) UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC, cred);
                }
        }

        /*
         * If the source is a directory with a new parent, the link
         * count of the old parent directory must be decremented and
         * ".." set to point to the new parent.
         *
         * XXX ufs_dirrewrite updates the link count of fdvp, but not
         * the link count of fvp or the link count of tdvp.  Go figure.
         */
        if (directory_p && reparent_p) {
                error = ufs_dirrewrite(VTOI(fvp), mastertemplate.dot_reclen,
                    VTOI(fdvp), VTOI(tdvp)->i_number, DT_DIR, 0, IN_CHANGE);
#if 0                /* XXX This branch was not in ufs_rename! */
                if (error)
                        goto whymustithurtsomuch;
#endif

                /* XXX WTF?  Why purge here?  Why not purge others?  */
                cache_purge(fdvp);
        }

        /*
         * 3) Unlink the source.
         */

        /*
         * ufs_direnter may compact the directory in the process of
         * inserting a new entry.  That may invalidate fulr, which we
         * need in order to remove the old entry.  In that case, we
         * need to recalculate what fulr should be.
         */
        if (!reparent_p && (tvp == NULL) &&
            ufs_rename_ulr_overlap_p(fulr, tulr)) {
                error = ufs_rename_recalculate_fulr(fdvp, fulr, tulr, fcnp);
#if 0                                /* XXX */
                if (error)        /* XXX Try to back out changes?  */
                        goto whymustithurtsomuch;
#endif
        }

        /*
         * XXX 0 means !isrmdir.  But can't this be an rmdir?
         * XXX Well, turns out that argument to ufs_dirremove is ignored...
         * XXX And it turns out ufs_dirremove updates the link count of fvp.
         * XXX But it doesn't update the link count of fdvp.  Go figure.
         * XXX fdvp's link count is updated in ufs_dirrewrite instead.
         * XXX Actually, sometimes it doesn't update fvp's link count.
         * XXX I hate the world.
         */
        error = ufs_dirremove(fdvp, fulr, VTOI(fvp), fcnp->cn_flags, 0);
        if (error)
#if 0                                /* XXX */
                goto whymustithurtsomuch;
#endif
                goto arghmybrainhurts;

        if (tvp != NULL) {
                *tvp_nlinkp = VTOI(tvp)->i_nlink;
        }
#if 0                                /* XXX */
        genfs_rename_cache_purge(fdvp, fvp, tdvp, tvp);
#endif
        goto arghmybrainhurts;

whymustithurtsomuch:
        KASSERT(0 < VTOI(fvp)->i_nlink);
        VTOI(fvp)->i_nlink--;
        DIP_ASSIGN(VTOI(fvp), nlink, VTOI(fvp)->i_nlink);
        VTOI(fvp)->i_flag |= IN_CHANGE;
        UFS_WAPBL_UPDATE(fvp, NULL, NULL, 0);

arghmybrainhurts:
        UFS_WAPBL_END(mp);

ihateyou:
        return error;
}

/*
 * ufs_rename_ulr_overlap_p: True iff tulr overlaps with fulr so that
 * entering a directory entry at tulr may move fulr.
 */
static bool
ufs_rename_ulr_overlap_p(const struct ufs_lookup_results *fulr,
    const struct ufs_lookup_results *tulr)
{
        doff_t from_prev_start, from_prev_end, to_start, to_end;

        KASSERT(fulr != NULL);
        KASSERT(tulr != NULL);
        KASSERT(fulr != tulr);

        /*
         * fulr is from a DELETE lookup, so fulr->ulr_count is the size
         * of the preceding entry (d_reclen).
         */
        from_prev_end = fulr->ulr_offset;
        KASSERT(fulr->ulr_count <= from_prev_end);
        from_prev_start = (from_prev_end - fulr->ulr_count);

        /*
         * tulr is from a RENAME lookup, so tulr->ulr_count is the size
         * of the free space for an entry that we are about to fill.
         */
        to_start = tulr->ulr_offset;
        KASSERT(tulr->ulr_count < (UFS_MAXDIRSIZE - to_start));
        to_end = (to_start + tulr->ulr_count);

        return
            (((to_start <= from_prev_start) && (from_prev_start < to_end)) ||
                ((to_start <= from_prev_end) && (from_prev_end < to_end)));
}

/*
 * ufs_rename_recalculate_fulr: If we have just entered a directory into
 * dvp at tulr, and we were about to remove one at fulr for an entry
 * named fcnp, fulr may be invalid.  So, if necessary, recalculate it.
 */
static int
ufs_rename_recalculate_fulr(struct vnode *dvp,
    struct ufs_lookup_results *fulr, const struct ufs_lookup_results *tulr,
    const struct componentname *fcnp)
{
        struct mount *mp;
        struct ufsmount *ump;
        int needswap;
        /* XXX int is a silly type for this; blame ufsmount::um_dirblksiz.  */
        int dirblksiz;
        doff_t search_start, search_end;
        doff_t offset;                /* Offset of entry we're examining.  */
        struct buf *bp;                /* I/O block we're examining.  */
        char *dirbuf;                /* Pointer into directory at search_start.  */
        struct direct *ep;        /* Pointer to the entry we're examining.  */
        /* XXX direct::d_reclen is 16-bit;
         * ufs_lookup_results::ulr_reclen is 32-bit.  Blah.  */
        uint32_t reclen;        /* Length of the entry we're examining.  */
        uint32_t prev_reclen;        /* Length of the preceding entry.  */
        int error;

        KASSERT(dvp != NULL);
        KASSERT(dvp->v_mount != NULL);
        KASSERT(VTOI(dvp) != NULL);
        KASSERT(fulr != NULL);
        KASSERT(tulr != NULL);
        KASSERT(fulr != tulr);
        KASSERT(ufs_rename_ulr_overlap_p(fulr, tulr));

        mp = dvp->v_mount;
        ump = VFSTOUFS(mp);
        KASSERT(ump != NULL);
        KASSERT(ump == VTOI(dvp)->i_ump);

        needswap = UFS_MPNEEDSWAP(ump);

        dirblksiz = ump->um_dirblksiz;
        KASSERT(0 < dirblksiz);
        KASSERT((dirblksiz & (dirblksiz - 1)) == 0);

        /* A directory block may not span across multiple I/O blocks.  */
        KASSERT(dirblksiz <= mp->mnt_stat.f_iosize);

        /* Find the bounds of the search.  */
        search_start = tulr->ulr_offset;
        KASSERT(fulr->ulr_reclen < (UFS_MAXDIRSIZE - fulr->ulr_offset));
        search_end = (fulr->ulr_offset + fulr->ulr_reclen);

        /* Compaction must happen only within a directory block. (*)  */
        KASSERT(search_start <= search_end);
        KASSERT((search_end - (search_start &~ (dirblksiz - 1))) <= dirblksiz);

        dirbuf = NULL;
        bp = NULL;
        error = ufs_blkatoff(dvp, (off_t)search_start, &dirbuf, &bp, false);
        if (error)
                return error;
        KASSERT(dirbuf != NULL);
        KASSERT(bp != NULL);

        /*
         * Guarantee we sha'n't go past the end of the buffer we got.
         * dirbuf is bp->b_data + (search_start & (iosize - 1)), and
         * the valid range is [bp->b_data, bp->b_data + bp->b_bcount).
         */
        KASSERT((search_end - search_start) <=
            (bp->b_bcount - (search_start & (mp->mnt_stat.f_iosize - 1))));

        prev_reclen = fulr->ulr_count;
        offset = search_start;

        /*
         * Search from search_start to search_end for the entry matching
         * fcnp, which must be there because we found it before and it
         * should only at most have moved earlier.
         */
        for (;;) {
                KASSERT(search_start <= offset);
                KASSERT(offset < search_end);

                /*
                 * Examine the directory entry at offset.
                 */
                ep = (struct direct *)(dirbuf + (offset - search_start));
                reclen = ufs_rw16(ep->d_reclen, needswap);

                if (ep->d_ino == 0)
                        goto next;        /* Entry is unused.  */

                if (ufs_rw32(ep->d_ino, needswap) == UFS_WINO)
                        goto next;        /* Entry is whiteout.  */

                if (fcnp->cn_namelen != ufs_direct_namlen(ep, dvp))
                        goto next;        /* Wrong name length.  */

                if (memcmp(ep->d_name, fcnp->cn_nameptr, fcnp->cn_namelen))
                        goto next;        /* Wrong name.  */

                /* Got it!  */
                break;

next:
                if (! ((reclen < search_end) &&
                        (offset < (search_end - reclen)))) {
                        brelse(bp, 0);
                        return EIO;        /* XXX Panic?  What?  */
                }

                /* We may not move past the search end.  */
                KASSERT(reclen < search_end);
                KASSERT(offset < (search_end - reclen));

                /*
                 * We may not move across a directory block boundary;
                 * see (*) above.
                 */
                KASSERT((offset &~ (dirblksiz - 1)) ==
                    ((offset + reclen) &~ (dirblksiz - 1)));

                prev_reclen = reclen;
                offset += reclen;
        }

        /*
         * Found the entry.  Record where.
         */
        fulr->ulr_offset = offset;
        fulr->ulr_reclen = reclen;

        /*
         * Record the preceding record length, but not if we're at the
         * start of a directory block.
         */
        fulr->ulr_count = ((offset & (dirblksiz - 1))? prev_reclen : 0);

        brelse(bp, 0);
        return 0;
}

/*
 * ufs_direct_namlen: Return the namlen of the directory entry ep from
 * the directory vp.
 */
static int                        /* XXX int?  uint8_t?  */
ufs_direct_namlen(const struct direct *ep, const struct vnode *vp)
{
        bool swap;

        KASSERT(ep != NULL);
        KASSERT(vp != NULL);
        KASSERT(VTOI(vp) != NULL);
        KASSERT(VTOI(vp)->i_ump != NULL);

#if (BYTE_ORDER == LITTLE_ENDIAN)
        swap = (UFS_MPNEEDSWAP(VTOI(vp)->i_ump) == 0);
#else
        swap = (UFS_MPNEEDSWAP(VTOI(vp)->i_ump) != 0);
#endif

        return ((FSFMT(vp) && swap)? ep->d_type : ep->d_namlen);
}

/*
 * ufs_gro_remove: Rename an object over another link to itself,
 * effectively removing just the original link.
 */
int
ufs_gro_remove(struct mount *mp, kauth_cred_t cred,
    struct vnode *dvp, struct componentname *cnp, void *de, struct vnode *vp,
    nlink_t *tvp_nlinkp)
{
        struct ufs_lookup_results *ulr = de;
        int error;

        KASSERT(mp != NULL);
        KASSERT(dvp != NULL);
        KASSERT(cnp != NULL);
        KASSERT(ulr != NULL);
        KASSERT(vp != NULL);
        KASSERT(dvp != vp);
        KASSERT(dvp->v_mount == mp);
        KASSERT(vp->v_mount == mp);
        KASSERT(dvp->v_type == VDIR);
        KASSERT(vp->v_type != VDIR);
        KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
        KASSERT(cnp->cn_nameiop == DELETE);

        error = UFS_WAPBL_BEGIN(mp);
        if (error)
                goto out;

        /* XXX ufs_dirremove decrements vp's link count for us.  */
        error = ufs_dirremove(dvp, ulr, VTOI(vp), cnp->cn_flags, 0);
        UFS_WAPBL_END(mp);

        *tvp_nlinkp = VTOI(vp)->i_nlink;
out:
        return error;
}

/*
 * ufs_gro_lookup: Look up and save the lookup results.
 */
int
ufs_gro_lookup(struct mount *mp, struct vnode *dvp,
    struct componentname *cnp, void *de_ret, struct vnode **vp_ret)
{
        struct ufs_lookup_results *ulr_ret = de_ret;
        struct vnode *vp = NULL;
        int error;

        (void)mp;
        KASSERT(mp != NULL);
        KASSERT(dvp != NULL);
        KASSERT(cnp != NULL);
        KASSERT(ulr_ret != NULL);
        KASSERT(vp_ret != NULL);
        KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);

        /* Kludge cargo-culted from dholland's ufs_rename.  */
        cnp->cn_flags &=~ MODMASK;
        cnp->cn_flags |= (LOCKPARENT | LOCKLEAF);

        error = relookup(dvp, &vp, cnp, 0 /* dummy */);
        if ((error == 0) && (vp == NULL)) {
                error = ENOENT;
                goto out;
        } else if (error) {
                return error;
        }

        /*
         * Thanks to VFS insanity, relookup locks vp, which screws us
         * in various ways.
         */
        KASSERT(vp != NULL);
        VOP_UNLOCK(vp);

out:        *ulr_ret = VTOI(dvp)->i_crap;
        *vp_ret = vp;
        return error;
}

/*
 * ufs_rmdired_p: Check whether the directory vp has been rmdired.
 *
 * vp must be locked and referenced.
 */
static bool
ufs_rmdired_p(struct vnode *vp)
{

        KASSERT(vp != NULL);
        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
        KASSERT(vp->v_type == VDIR);

        /* XXX Is this correct?  */
        return (VTOI(vp)->i_size == 0);
}

/*
 * ufs_read_dotdot: Store in *ino_ret the inode number of the parent
 * of the directory vp.
 */
static int
ufs_read_dotdot(struct vnode *vp, kauth_cred_t cred, ino_t *ino_ret)
{
        struct dirtemplate dirbuf;
        int error;

        KASSERT(vp != NULL);
        KASSERT(ino_ret != NULL);
        KASSERT(vp->v_type == VDIR);

        error = ufs_bufio(UIO_READ, vp, &dirbuf, sizeof dirbuf, (off_t)0,
            IO_NODELOCKED, cred, NULL, NULL);
        if (error)
                return error;

        if (ufs_dirbuf_dotdot_namlen(&dirbuf, vp) != 2 ||
            dirbuf.dotdot_name[0] != '.' ||
            dirbuf.dotdot_name[1] != '.')
                /* XXX Panic?  Print warning?  */
                return ENOTDIR;

        *ino_ret = ufs_rw32(dirbuf.dotdot_ino,
            UFS_MPNEEDSWAP(VTOI(vp)->i_ump));
        return 0;
}

/*
 * ufs_dirbuf_dotdot_namlen: Return the namlen of the directory buffer
 * dirbuf that came from the directory vp.  Swap byte order if
 * necessary.
 */
static int                        /* XXX int?  uint8_t?  */
ufs_dirbuf_dotdot_namlen(const struct dirtemplate *dirbuf,
    const struct vnode *vp)
{
        bool swap;

        KASSERT(dirbuf != NULL);
        KASSERT(vp != NULL);
        KASSERT(VTOI(vp) != NULL);
        KASSERT(VTOI(vp)->i_ump != NULL);

#if (BYTE_ORDER == LITTLE_ENDIAN)
        swap = (UFS_MPNEEDSWAP(VTOI(vp)->i_ump) == 0);
#else
        swap = (UFS_MPNEEDSWAP(VTOI(vp)->i_ump) != 0);
#endif

        return ((FSFMT(vp) && swap)?
            dirbuf->dotdot_type : dirbuf->dotdot_namlen);
}

/*
 * ufs_gro_genealogy: Analyze the genealogy of the source and target
 * directories.
 */
int
ufs_gro_genealogy(struct mount *mp, kauth_cred_t cred,
    struct vnode *fdvp, struct vnode *tdvp,
    struct vnode **intermediate_node_ret)
{
        struct vnode *vp, *dvp;
        ino_t dotdot_ino = 0;        /* XXX: gcc */
        int error;

        KASSERT(mp != NULL);
        KASSERT(fdvp != NULL);
        KASSERT(tdvp != NULL);
        KASSERT(fdvp != tdvp);
        KASSERT(intermediate_node_ret != NULL);
        KASSERT(fdvp->v_mount == mp);
        KASSERT(tdvp->v_mount == mp);
        KASSERT(fdvp->v_type == VDIR);
        KASSERT(tdvp->v_type == VDIR);

        /*
         * We need to provisionally lock tdvp to keep rmdir from
         * deleting it -- or any ancestor -- at an inopportune moment.
         */
        error = ufs_gro_lock_directory(mp, tdvp);
        if (error)
                return error;

        vp = tdvp;
        vref(vp);

        for (;;) {
                KASSERT(vp != NULL);
                KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
                KASSERT(vp->v_mount == mp);
                KASSERT(vp->v_type == VDIR);
                KASSERT(!ufs_rmdired_p(vp));

                /* Did we hit the root without finding fdvp?  */
                if (VTOI(vp)->i_number == UFS_ROOTINO) {
                        vput(vp);
                        *intermediate_node_ret = NULL;
                        return 0;
                }

                error = ufs_read_dotdot(vp, cred, &dotdot_ino);
                if (error) {
                        vput(vp);
                        return error;
                }

                /* Did we find that fdvp is an ancestor of tdvp?  */
                if (VTOI(fdvp)->i_number == dotdot_ino) {
                        /* Unlock vp, but keep it referenced.  */
                        VOP_UNLOCK(vp);
                        *intermediate_node_ret = vp;
                        return 0;
                }

                /* Neither -- keep ascending the family tree.  */
                error = vcache_get(mp, &dotdot_ino, sizeof(dotdot_ino), &dvp);
                vput(vp);
                if (error)
                        return error;
                error = vn_lock(dvp, LK_EXCLUSIVE);
                if (error) {
                        vrele(dvp);
                        return error;
                }

                KASSERT(dvp != NULL);
                KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
                vp = dvp;

                if (vp->v_type != VDIR) {
                        /*
                         * XXX Panic?  Print a warning?  Can this
                         * happen if we lose the race I suspect to
                         * exist above, and the `..' inode number has
                         * been recycled?
                         */
                        vput(vp);
                        return ENOTDIR;
                }

                if (ufs_rmdired_p(vp)) {
                        vput(vp);
                        return ENOENT;
                }
        }
}

/*
 * ufs_gro_lock_directory: Lock the directory vp, but fail if it has
 * been rmdir'd.
 */
int
ufs_gro_lock_directory(struct mount *mp, struct vnode *vp)
{

        (void)mp;
        KASSERT(mp != NULL);
        KASSERT(vp != NULL);
        KASSERT(vp->v_mount == mp);

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);

        if (ufs_rmdired_p(vp)) {
                VOP_UNLOCK(vp);
                return ENOENT;
        }

        return 0;
}

static const struct genfs_rename_ops ufs_genfs_rename_ops = {
        .gro_directory_empty_p                = ufs_gro_directory_empty_p,
        .gro_rename_check_possible        = ufs_gro_rename_check_possible,
        .gro_rename_check_permitted        = ufs_gro_rename_check_permitted,
        .gro_remove_check_possible        = ufs_gro_remove_check_possible,
        .gro_remove_check_permitted        = ufs_gro_remove_check_permitted,
        .gro_rename                        = ufs_gro_rename,
        .gro_remove                        = ufs_gro_remove,
        .gro_lookup                        = ufs_gro_lookup,
        .gro_genealogy                        = ufs_gro_genealogy,
        .gro_lock_directory                = ufs_gro_lock_directory,
};













































































































   15 




    9 



    8 


    7 




   15 



















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
/*        $NetBSD: kern_select_50.c,v 1.3 2019/09/20 15:05:22 kamil Exp $        */

/*-
 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_select_50.c,v 1.3 2019/09/20 15:05:22 kamil Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/event.h>
#include <sys/poll.h>
#include <sys/select.h>
#include <sys/time.h>
#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>

#include <compat/sys/time.h>
#include <compat/common/compat_mod.h>

static const struct syscall_package kern_select_50_syscalls[] = {
        { SYS_compat_50_kevent, 0, (sy_call_t *)compat_50_sys_kevent },
        { SYS_compat_50_select, 0, (sy_call_t *)compat_50_sys_select },
        { SYS_compat_50_pselect, 0, (sy_call_t *)compat_50_sys_pselect },
        { SYS_compat_50_pollts, 0, (sy_call_t *)compat_50_sys_pollts },
        { 0, 0, NULL }
};

static int
compat_50_kevent_fetch_timeout(const void *src, void *dest, size_t length)
{
        struct timespec50 ts50;
        int error;

        KASSERT(length == sizeof(struct timespec));

        error = copyin(src, &ts50, sizeof(ts50));
        if (error)
                return error;
        timespec50_to_timespec(&ts50, (struct timespec *)dest);
        return 0;
}

int
compat_50_sys_kevent(struct lwp *l, const struct compat_50_sys_kevent_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(keventp_t) changelist;
                syscallarg(size_t) nchanges;
                syscallarg(keventp_t) eventlist;
                syscallarg(size_t) nevents;
                syscallarg(struct timespec50) timeout;
        } */
        static const struct kevent_ops compat_50_kevent_ops = {
                .keo_private = NULL,
                .keo_fetch_timeout = compat_50_kevent_fetch_timeout,
                .keo_fetch_changes = kevent_fetch_changes,
                .keo_put_events = kevent_put_events,
        };

        return kevent1(retval, SCARG(uap, fd), SCARG(uap, changelist),
            SCARG(uap, nchanges), SCARG(uap, eventlist), SCARG(uap, nevents),
            (const struct timespec *)(const void *)SCARG(uap, timeout),
            &compat_50_kevent_ops);
}

int
compat_50_sys_select(struct lwp *l,
    const struct compat_50_sys_select_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)                        nd;
                syscallarg(fd_set *)                in;
                syscallarg(fd_set *)                ou;
                syscallarg(fd_set *)                ex;
                syscallarg(struct timeval50 *)        tv;
        } */
        struct timespec ats, *ts = NULL;
        struct timeval50 atv50;
        int error;

        if (SCARG(uap, tv)) {
                error = copyin(SCARG(uap, tv), (void *)&atv50, sizeof(atv50));
                if (error)
                        return error;

                if (atv50.tv_usec < 0 || atv50.tv_usec >= 1000000)
                        return EINVAL;

                ats.tv_sec = atv50.tv_sec;
                ats.tv_nsec = atv50.tv_usec * 1000;
                ts = &ats;
        }

        return selcommon(retval, SCARG(uap, nd), SCARG(uap, in),
            SCARG(uap, ou), SCARG(uap, ex), ts, NULL);
}

int
compat_50_sys_pselect(struct lwp *l,
    const struct compat_50_sys_pselect_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)                                nd;
                syscallarg(fd_set *)                        in;
                syscallarg(fd_set *)                        ou;
                syscallarg(fd_set *)                        ex;
                syscallarg(const struct timespec50 *)        ts;
                syscallarg(sigset_t *)                        mask;
        } */
        struct timespec50        ats50;
        struct timespec        ats, *ts = NULL;
        sigset_t        amask, *mask = NULL;
        int                error;

        if (SCARG(uap, ts)) {
                error = copyin(SCARG(uap, ts), &ats50, sizeof(ats50));
                if (error)
                        return error;
                timespec50_to_timespec(&ats50, &ats);
                ts = &ats;
        }
        if (SCARG(uap, mask) != NULL) {
                error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
                if (error)
                        return error;
                mask = &amask;
        }

        return selcommon(retval, SCARG(uap, nd), SCARG(uap, in),
            SCARG(uap, ou), SCARG(uap, ex), ts, mask);
}

int
compat_50_sys_pollts(struct lwp *l, const struct compat_50_sys_pollts_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(struct pollfd *)                fds;
                syscallarg(u_int)                        nfds;
                syscallarg(const struct timespec50 *)        ts;
                syscallarg(const sigset_t *)                mask;
        } */
        struct timespec        ats, *ts = NULL;
        struct timespec50 ats50;
        sigset_t        amask, *mask = NULL;
        int                error;

        if (SCARG(uap, ts)) {
                error = copyin(SCARG(uap, ts), &ats50, sizeof(ats50));
                if (error)
                        return error;
                timespec50_to_timespec(&ats50, &ats);
                ts = &ats;
        }
        if (SCARG(uap, mask)) {
                error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
                if (error)
                        return error;
                mask = &amask;
        }

        return pollcommon(retval, SCARG(uap, fds), SCARG(uap, nfds), ts, mask);
}

int             
kern_select_50_init(void)
{               
        
return syscall_establish(NULL, kern_select_50_syscalls);
}       
        
int
kern_select_50_fini(void)
{               

return syscall_disestablish(NULL, kern_select_50_syscalls);
}







































































    3 


    3 

    3 







































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
/* $NetBSD: aubtfwl.c,v 1.10 2020/05/30 17:19:45 jakllsch Exp $ */

/*
 * Copyright (c) 2011 Jonathan A. Kollasch
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: aubtfwl.c,v 1.10 2020/05/30 17:19:45 jakllsch Exp $");

#include <sys/param.h>
#include <sys/kmem.h>
#include <dev/usb/usb.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdi_util.h>
#include <dev/firmload.h>

#include <dev/usb/aubtfwlreg.h>

#define AR3K_FIRMWARE_CHUNK_SIZE 4096

static int aubtfwl_match(device_t, cfdata_t, void *);
static void aubtfwl_attach(device_t, device_t, void *);
static int aubtfwl_detach(device_t, int);
static void aubtfwl_attach_hook(device_t);

struct aubtfwl_softc {
        struct usbd_device *sc_udev;
        int sc_flags;
#define AUBT_IS_AR3012                1
};

CFATTACH_DECL_NEW(aubtfwl, sizeof(struct aubtfwl_softc), aubtfwl_match, aubtfwl_attach, aubtfwl_detach, NULL);

static const struct usb_devno ar3k_devs[] = {
        { USB_VENDOR_ATHEROS2, USB_PRODUCT_ATHEROS2_AR3011 },
};

static const struct usb_devno ar3k12_devs[] = {
        { USB_VENDOR_FOXCONN, USB_PRODUCT_FOXCONN_AR3012 },
};

static int
aubtfwl_match(device_t parent, cfdata_t match, void *aux)
{
        const struct usb_attach_arg * const uaa = aux;

        if (usb_lookup(ar3k_devs, uaa->uaa_vendor, uaa->uaa_product))
                return UMATCH_VENDOR_PRODUCT;

        if (usb_lookup(ar3k12_devs, uaa->uaa_vendor, uaa->uaa_product)) {
                return (UGETW(uaa->uaa_device->ud_ddesc.bcdDevice) > 1)?
                        UMATCH_NONE : UMATCH_VENDOR_PRODUCT;
        }

        return UMATCH_NONE;
}

static void
aubtfwl_attach(device_t parent, device_t self, void *aux)
{
        const struct usb_attach_arg * const uaa = aux;
        struct aubtfwl_softc * const sc = device_private(self);
        aprint_naive("\n");
        aprint_normal("\n");
        sc->sc_udev = uaa->uaa_device;
        sc->sc_flags = 0;

        if (usb_lookup(ar3k12_devs, uaa->uaa_vendor, uaa->uaa_product))
                sc->sc_flags |= AUBT_IS_AR3012;

        config_mountroot(self, aubtfwl_attach_hook);
}

static int
aubtfwl_detach(device_t self, int flags)
{
        struct aubtfwl_softc * const sc = device_private(self);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, self);

        return 0;
}

/* Returns 0 if firmware was correctly loaded */
static int
aubtfwl_firmware_load(device_t self, const char *name) {
        struct aubtfwl_softc * const sc = device_private(self);
        struct usbd_interface *iface;
        struct usbd_pipe *pipe;
        struct usbd_xfer *xfer;
        void *buf;
        usb_device_request_t req;
        int error = 0;
        firmware_handle_t fwh;
        size_t fws;
        size_t fwo = 0;
        uint32_t n;

        memset(&req, 0, sizeof(req));

        error = firmware_open("ubt", name, &fwh);
        if (error != 0) {
                aprint_error_dev(self, "'%s' open fail %d\n", name, error);
                return error;
        }
        fws = firmware_get_size(fwh);

        error = usbd_set_config_no(sc->sc_udev, 1, 0);
        if (error != 0) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(error));
                goto out_firmware;
        }

        error = usbd_device2interface_handle(sc->sc_udev, 0, &iface);
        if (error) {
                aprint_error_dev(self, "failed to get interface, %s\n",
                   usbd_errstr(error));
                goto out_firmware;
        }

        error = usbd_open_pipe(iface, UE_DIR_OUT|2, USBD_EXCLUSIVE_USE, &pipe);
        if (error) {
                aprint_error_dev(self, "failed to open pipe, %s\n",
                   usbd_errstr(error));
                goto out_firmware;
        }

        error = usbd_create_xfer(pipe, AR3K_FIRMWARE_CHUNK_SIZE, 0, 0, &xfer);
        if (error) {
                aprint_verbose_dev(self, "cannot create xfer(%d)\n",
                    error);
                goto out_pipe;
        }
        buf = usbd_get_buffer(xfer);

        error = firmware_read(fwh, fwo, buf, AR3K_FIRMWARE_HEADER_SIZE);
        if (error != 0) {
                aprint_error_dev(self, "firmware_read failed %d\n", error);
                goto out_xfer;
        }

        req.bRequest = AR3K_SEND_FIRMWARE;
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        USETW(req.wValue, 0);
        USETW(req.wIndex, 0);
        USETW(req.wLength, AR3K_FIRMWARE_HEADER_SIZE);

        aprint_verbose_dev(self, "beginning firmware load\n");

        error = usbd_do_request(sc->sc_udev, &req, buf);
        if (error != 0) {
                aprint_error_dev(self, "%s\n", usbd_errstr(error));
                return error;
        }
        fwo = AR3K_FIRMWARE_HEADER_SIZE;

        while (fwo < fws) {
                n = uimin(AR3K_FIRMWARE_CHUNK_SIZE, fws - fwo);
                error = firmware_read(fwh, fwo, buf, n);
                if (error != 0) {
                        break;
                }
                error = usbd_bulk_transfer(xfer, pipe, 0, USBD_DEFAULT_TIMEOUT,
                    buf, &n);
                if (error != USBD_NORMAL_COMPLETION) {
                        aprint_error_dev(self, "xfer failed, %s\n",
                           usbd_errstr(error));
                        break;
                }
                fwo += n;
        }

        if (error == 0)
                aprint_verbose_dev(self, "firmware load complete\n");

out_xfer:
        usbd_destroy_xfer(xfer);
out_pipe:
        usbd_close_pipe(pipe);
out_firmware:
        firmware_close(fwh);

        return !!error;
}

static int
aubtfwl_get_state(struct aubtfwl_softc *sc, uint8_t *state) {
        usb_device_request_t req;
        int error = 0;

        memset(&req, 0, sizeof(req));

        req.bRequest = AR3K_GET_STATE;
        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        USETW(req.wValue, 0);
        USETW(req.wIndex, 0);
        USETW(req.wLength, sizeof(*state));

        error = usbd_do_request(sc->sc_udev, &req, state);

        return error;
}

static int
aubtfwl_get_version(struct aubtfwl_softc *sc, struct ar3k_version *ver) {
        usb_device_request_t req;
        int error = 0;

        memset(&req, 0, sizeof(req));

        req.bRequest = AR3K_GET_VERSION;
        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        USETW(req.wValue, 0);
        USETW(req.wIndex, 0);
        USETW(req.wLength, sizeof(*ver));

        error = usbd_do_request(sc->sc_udev, &req, ver);

#if BYTE_ORDER == BIG_ENDIAN
        if (error == USBD_NORMAL_COMPLETION) {
                ver->rom = bswap32(ver->rom);
                ver->build = bswap32(ver->build);
                ver->ram = bswap32(ver->ram);
        }
#endif
        return error;
}

static int
aubtfwl_send_command(struct aubtfwl_softc *sc, uByte cmd) {
        usb_device_request_t req;
        int error = 0;

        memset(&req, 0, sizeof(req));

        req.bRequest = cmd;
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        USETW(req.wValue, 0);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);

        error = usbd_do_request(sc->sc_udev, &req, NULL);

        return error;
}

static void
aubtfwl_attach_hook(device_t self)
{
        struct aubtfwl_softc * const sc = device_private(self);
        char *fw_name;
        struct ar3k_version ver;
        uint8_t state;
        int clock = 0;
        int error = 0;

        if (sc->sc_flags & AUBT_IS_AR3012) {
                error = aubtfwl_get_version(sc, &ver);
                if (!error)
                        error = aubtfwl_get_state(sc, &state);

                if (error) {
                        aprint_error_dev(self,
                                "couldn't get version or state\n");
                        return;
                }

                aprint_verbose_dev(self, "state is 0x%02x\n", state);

                if (!(state & AR3K_STATE_IS_PATCHED)) {
                        fw_name = kmem_asprintf("ar3k/AthrBT_0x%08x.dfu",
                            ver.rom);
                        error = aubtfwl_firmware_load(self, fw_name);
                        kmem_strfree(fw_name);

                        if (error)
                                return;
                }

                switch (ver.clock) {
                case AR3K_CLOCK_19M:
                        clock = 19;
                        break;
                case AR3K_CLOCK_26M:
                        clock = 26;
                        break;
                case AR3K_CLOCK_40M:
                        clock = 40;
                        break;
                }

                fw_name = kmem_asprintf("ar3k/ramps_0x%08x_%d.dfu",
                    ver.rom, clock);
                aubtfwl_firmware_load(self, fw_name);
                kmem_strfree(fw_name);

                if ((state & AR3K_STATE_MODE_MASK) != AR3K_STATE_MODE_NORMAL) {
                        error = aubtfwl_send_command(sc, AR3K_SET_NORMAL_MODE);
                        if (error) {
                                aprint_error_dev(self,
                                        "couldn't set normal mode: %s",
                                        usbd_errstr(error));
                                return;
                        }
                }

                /* Apparently some devices will fail this, so ignore result */
                (void) aubtfwl_send_command(sc, AR3K_SWITCH_VID_PID);
        } else {
                aubtfwl_firmware_load(self, "ath3k-1.fw");
        }

        return;
}




























































    9 















    9 
    9 

    9 
    1 









    9 

    9 


    9 

    5 


    2 
    2 







    5 

    9 
    5 
    1 




    4 







    1 









    4 






    5 







   16 








   16 
    1 




   15 






    1 



   14 




   14 

   15 




































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
/*        $NetBSD: raw_usrreq.c,v 1.64 2019/08/02 02:17:46 ozaki-r Exp $        */

/*
 * Copyright (c) 1980, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)raw_usrreq.c        8.1 (Berkeley) 6/10/93
 */

/*
 * Raw protocol interface.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: raw_usrreq.c,v 1.64 2019/08/02 02:17:46 ozaki-r Exp $");

#include <sys/param.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/errno.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kauth.h>

#include <net/if.h>
#include <net/route.h>
#include <net/netisr.h>
#include <net/raw_cb.h>

static inline int
equal(const struct sockaddr *a1, const struct sockaddr *a2)
{
        return memcmp(a1, a2, a1->sa_len) == 0;
}

/*
 * raw_input: find the socket associated with the packet and move it over.
 * If nothing exists for this packet, drop it.
 */
void
raw_input(struct mbuf *m0, struct sockproto *proto, struct sockaddr *src,
    struct sockaddr *dst, struct rawcbhead *rawcbhead)
{
        struct rawcb *rp;
        struct mbuf *m = m0;
        struct socket *last;

        last = NULL;
        LIST_FOREACH(rp, rawcbhead, rcb_list) {
                if (rp->rcb_proto.sp_family != proto->sp_family)
                        continue;
                if (rp->rcb_proto.sp_protocol  &&
                    rp->rcb_proto.sp_protocol != proto->sp_protocol)
                        continue;
                /*
                 * We assume the lower level routines have
                 * placed the address in a canonical format
                 * suitable for a structure comparison.
                 *
                 * Note that if the lengths are not the same
                 * the comparison will fail at the first byte.
                 */
                if (rp->rcb_laddr && !equal(rp->rcb_laddr, dst))
                        continue;
                if (rp->rcb_faddr && !equal(rp->rcb_faddr, src))
                        continue;
                /* Run any filtering that may have been installed. */
                if (rp->rcb_filter != NULL && rp->rcb_filter(m, proto, rp) != 0)
                        continue;
                if (last != NULL) {
                        struct mbuf *n;

                        if ((n = m_copypacket(m, M_DONTWAIT)) == NULL ||
                            sbappendaddr(&last->so_rcv, src, n, NULL) == 0)
                        {
                                if (n != NULL)
                                        m_freem(n);
                                soroverflow(last);
                        } else
                                sorwakeup(last);
                }
                last = rp->rcb_socket;
        }
        if (last != NULL) {
                if (sbappendaddr(&last->so_rcv, src, m, NULL) == 0) {
                        m_freem(m);
                        soroverflow(last);
                } else
                        sorwakeup(last);
        } else {
                m_freem(m);
        }
}

void *
raw_ctlinput(int cmd, const struct sockaddr *arg, void *d)
{

        if ((unsigned)cmd >= PRC_NCMDS)
                return NULL;
        return NULL;
        /* INCOMPLETE */
}

void
raw_setsockaddr(struct rawcb *rp, struct sockaddr *nam)
{

        memcpy(nam, rp->rcb_laddr, rp->rcb_laddr->sa_len);
}

void
raw_setpeeraddr(struct rawcb *rp, struct sockaddr *nam)
{

        memcpy(nam, rp->rcb_faddr, rp->rcb_faddr->sa_len);
}

int
raw_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
    struct mbuf *control, struct lwp *l,
    int (*output)(struct mbuf *, struct socket *))
{
        struct rawcb *rp = sotorawcb(so);
        int error = 0;

        KASSERT(rp != NULL);

        /*
         * Ship a packet out.  The appropriate raw output
         * routine handles any massaging necessary.
         */
        if (control && control->m_len) {
                m_freem(control);
                m_freem(m);
                return EINVAL;
        }
        if (nam) {
                if ((so->so_state & SS_ISCONNECTED) != 0) {
                        error = EISCONN;
                        goto die;
                }
                error = (*so->so_proto->pr_usrreqs->pr_connect)(so, nam, l);
                if (error) {
                die:
                        m_freem(m);
                        return error;
                }
        } else {
                if ((so->so_state & SS_ISCONNECTED) == 0) {
                        error = ENOTCONN;
                        goto die;
                }
        }
        error = (*output)(m, so);
        if (nam)
                raw_disconnect(rp);

        return error;
}

int
raw_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
    struct mbuf *control, struct lwp *l)
{

        KASSERT(req != PRU_ATTACH);
        KASSERT(req != PRU_DETACH);
        KASSERT(req != PRU_ACCEPT);
        KASSERT(req != PRU_BIND);
        KASSERT(req != PRU_LISTEN);
        KASSERT(req != PRU_CONNECT);
        KASSERT(req != PRU_CONNECT2);
        KASSERT(req != PRU_DISCONNECT);
        KASSERT(req != PRU_SHUTDOWN);
        KASSERT(req != PRU_ABORT);
        KASSERT(req != PRU_CONTROL);
        KASSERT(req != PRU_SENSE);
        KASSERT(req != PRU_PEERADDR);
        KASSERT(req != PRU_SOCKADDR);
        KASSERT(req != PRU_RCVD);
        KASSERT(req != PRU_RCVOOB);
        KASSERT(req != PRU_SEND);
        KASSERT(req != PRU_SENDOOB);
        KASSERT(req != PRU_PURGEIF);

        if (sotorawcb(so) == NULL)
                return EINVAL;

        panic("raw_usrreq");

        return 0;
}
















































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
/*        $NetBSD: kern_scdebug.c,v 1.2 2019/03/14 19:51:49 palle Exp $        */

/*
 * Copyright (c) 2015 Matthew R. Green
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_xxx.c        8.3 (Berkeley) 2/14/95
 *        from: NetBSD: kern_xxx.c,v 1.74 2017/10/28 00:37:11 pgoyette Exp
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_scdebug.c,v 1.2 2019/03/14 19:51:49 palle Exp $");

#ifdef _KERNEL_OPT
#include "opt_syscall_debug.h"
#include "opt_kernhist.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/mount.h>
#include <sys/syscall.h>
#include <sys/syscallargs.h>
#include <sys/kernhist.h>

/*
 * Pull in the indirect syscall functions here.
 * They are only actually used if the ports syscall entry code
 * doesn't special-case SYS_SYSCALL and SYS___SYSCALL
 *
 * In some cases the generated code for the two functions is identical,
 * but there isn't a MI way of determining that - so we don't try.
 */

#define SYS_SYSCALL sys_syscall
#include "sys_syscall.c"
#undef SYS_SYSCALL

#define SYS_SYSCALL sys___syscall
#include "sys_syscall.c"
#undef SYS_SYSCALL

#ifdef SYSCALL_DEBUG
#define        SCDEBUG_CALLS                0x0001        /* show calls */
#define        SCDEBUG_RETURNS                0x0002        /* show returns */
#define        SCDEBUG_ALL                0x0004        /* even syscalls that are not implemented */
#define        SCDEBUG_SHOWARGS        0x0008        /* show arguments to calls */
#define        SCDEBUG_KERNHIST        0x0010        /* use kernhist instead of printf */

#ifndef SCDEBUG_DEFAULT
#define SCDEBUG_DEFAULT (SCDEBUG_CALLS|SCDEBUG_RETURNS|SCDEBUG_SHOWARGS)
#endif

int        scdebug = SCDEBUG_DEFAULT;

#ifdef KERNHIST
KERNHIST_DEFINE(scdebughist);
#define SCDEBUG_KERNHIST_FUNC(a)                KERNHIST_FUNC(a)
#define SCDEBUG_KERNHIST_CALLED(a)                KERNHIST_CALLED(a)
#define SCDEBUG_KERNHIST_LOG(a,b,c,d,e,f)        KERNHIST_LOG(a,b,c,d,e,f)
#else
#define SCDEBUG_KERNHIST_FUNC(a)                {} /* nothing */
#define SCDEBUG_KERNHIST_CALLED(a)                {} /* nothing */
#define SCDEBUG_KERNHIST_LOG(a,b,c,d,e,f)        {} /* nothing */
/* The non-kernhist support version can elide all this code easily. */
#undef        SCDEBUG_KERNHIST
#define        SCDEBUG_KERNHIST 0
#endif

#ifdef __HAVE_MINIMAL_EMUL
#define CODE_NOT_OK(code, em)        ((int)(code) < 0)
#else
#define CODE_NOT_OK(code, em)        (((int)(code) < 0) || \
                                 ((int)(code) >= (em)->e_nsysent))
#endif

void
scdebug_call(register_t code, const register_t args[])
{
        SCDEBUG_KERNHIST_FUNC("scdebug_call");
        struct lwp *l = curlwp;
        struct proc *p = l->l_proc;
        const struct sysent *sy;
        const struct emul *em;
        int i;

        if ((scdebug & SCDEBUG_CALLS) == 0)
                return;

        if (scdebug & SCDEBUG_KERNHIST)
                SCDEBUG_KERNHIST_CALLED(scdebughist);

        em = p->p_emul;
        sy = &em->e_sysent[code];

        if ((scdebug & SCDEBUG_ALL) == 0 &&
            (CODE_NOT_OK(code, em) || sy->sy_call == sys_nosys)) {
                if (scdebug & SCDEBUG_KERNHIST)
                        SCDEBUG_KERNHIST_LOG(scdebughist, "", 0, 0, 0, 0);
                return;
        }

        /*
         * The kernhist version of scdebug needs to restrict the usage
         * compared to the normal version.  histories must avoid these
         * sorts of usage:
         *
         *        - the format string *must* be literal, as it is used
         *          at display time in the kernel or userland
         *        - strings in the format will cause vmstat -u to crash
         *          so avoid using %s formats
         *
         * to avoid these, we have a fairly long block to print args
         * as the format needs to change for each, and we can't just
         * call printf() on each argument until we're done.
         */
        if (scdebug & SCDEBUG_KERNHIST) {
                if (CODE_NOT_OK(code, em)) {
                        SCDEBUG_KERNHIST_LOG(scdebughist, 
                            "pid %jd:%jd: OUT OF RANGE (%jd)",
                            p->p_pid, l->l_lid, code, 0);
                } else {
                        SCDEBUG_KERNHIST_LOG(scdebughist,
                            "pid %jd:%jd: num %jd call %#jx",
                            p->p_pid, l->l_lid, code, (uintptr_t)sy->sy_call);
                        if ((scdebug & SCDEBUG_SHOWARGS) == 0)
                                return;

                        if (sy->sy_narg > 7) {
                                SCDEBUG_KERNHIST_LOG(scdebughist,
                                    "args[4-7]: (%jx, %jx, %jx, %jx, ...)",
                                    (long)args[4], (long)args[5],
                                    (long)args[6], (long)args[7]);
                        } else if (sy->sy_narg > 6) {
                                SCDEBUG_KERNHIST_LOG(scdebughist,
                                    "args[4-6]: (%jx, %jx, %jx)",
                                    (long)args[4], (long)args[5],
                                    (long)args[6], 0);
                        } else if (sy->sy_narg > 5) {
                                SCDEBUG_KERNHIST_LOG(scdebughist,
                                    "args[4-5]: (%jx, %jx)",
                                    (long)args[4], (long)args[5], 0, 0);
                        } else if (sy->sy_narg == 5) {
                                SCDEBUG_KERNHIST_LOG(scdebughist,
                                    "args[4]: (%jx)",
                                    (long)args[4], 0, 0, 0);
                        }

                        if (sy->sy_narg > 3) {
                                SCDEBUG_KERNHIST_LOG(scdebughist,
                                    "args[0-3]: (%jx, %jx, %jx, %jx, ...)",
                                    (long)args[0], (long)args[1],
                                    (long)args[2], (long)args[3]);
                        } else if (sy->sy_narg > 2) {
                                SCDEBUG_KERNHIST_LOG(scdebughist,
                                    "args[0-2]: (%jx, %jx, %jx)",
                                    (long)args[0], (long)args[1],
                                    (long)args[2], 0);
                        } else if (sy->sy_narg > 1) {
                                SCDEBUG_KERNHIST_LOG(scdebughist,
                                    "args[0-1]: (%jx, %jx)",
                                    (long)args[0], (long)args[1], 0, 0);
                        } else if (sy->sy_narg == 1) {
                                SCDEBUG_KERNHIST_LOG(scdebughist,
                                    "args[0]: (%jx)",
                                    (long)args[0], 0, 0, 0);
                        }
                }
                return;
        }

        printf("proc %d (%s): %s num ", p->p_pid, p->p_comm, em->e_name);
        if (CODE_NOT_OK(code, em))
                printf("OUT OF RANGE (%ld)", (long)code);
        else {
                printf("%ld call: %s", (long)code, em->e_syscallnames[code]);
                if (scdebug & SCDEBUG_SHOWARGS) {
                        printf("(");
                        for (i = 0; i < sy->sy_argsize/sizeof(register_t); i++)
                                printf("%s0x%lx", i == 0 ? "" : ", ",
                                    (long)args[i]);
                        printf(")");
                }
        }
        printf("\n");
}

void
scdebug_ret(register_t code, int error, const register_t retval[])
{
        SCDEBUG_KERNHIST_FUNC("scdebug_ret");
        struct lwp *l = curlwp;
        struct proc *p = l->l_proc;
        const struct sysent *sy;
        const struct emul *em;

        if ((scdebug & SCDEBUG_RETURNS) == 0)
                return;

        if (scdebug & SCDEBUG_KERNHIST)
                SCDEBUG_KERNHIST_CALLED(scdebughist);

        em = p->p_emul;
        sy = &em->e_sysent[code];
        if ((scdebug & SCDEBUG_ALL) == 0 &&
            (CODE_NOT_OK(code, em) || sy->sy_call == sys_nosys)) {
                if (scdebug & SCDEBUG_KERNHIST)
                        SCDEBUG_KERNHIST_LOG(scdebughist, "", 0, 0, 0, 0);
                return;
        }

        if (scdebug & SCDEBUG_KERNHIST) {
                if (CODE_NOT_OK(code, em)) {
                        SCDEBUG_KERNHIST_LOG(scdebughist, 
                            "pid %jd:%jd: OUT OF RANGE (%jd)",
                            p->p_pid, l->l_lid, code, 0);
                } else {
                        SCDEBUG_KERNHIST_LOG(scdebughist,
                            "pid %jd:%jd: num %jd",
                            p->p_pid, l->l_lid, code, 0);
                        SCDEBUG_KERNHIST_LOG(scdebughist,
                            "ret: err = %jd, rv = 0x%jx,0x%jx",
                            error, (long)retval[0], (long)retval[1], 0);
                }
                return;
        }

        printf("proc %d (%s): %s num ", p->p_pid, p->p_comm, em->e_name);
        if (CODE_NOT_OK(code, em))
                printf("OUT OF RANGE (%ld)", (long)code);
        else
                printf("%ld ret %s: err = %d, rv = 0x%lx,0x%lx", (long)code,
                    em->e_syscallnames[code], error,
                    (long)retval[0], (long)retval[1]);
        printf("\n");
}
#endif /* SYSCALL_DEBUG */

#ifndef SCDEBUG_KERNHIST_SIZE
#define SCDEBUG_KERNHIST_SIZE 500
#endif

void
scdebug_init(void)
{
#if defined(SYSCALL_DEBUG) && defined(KERNHIST)
        /* Setup scdebughist kernel history */
        KERNHIST_INIT(scdebughist, SCDEBUG_KERNHIST_SIZE);
#endif
}









































































































































































































   17 



    5 




    3 
    2 



    2 





    3 



    2 



    1 
















    2 
    2 









    1 




    1 



    3 




    3 




    2 


   16 













   42 

   42 




   42 

   42 







































































































    2 

    2 
    1 



    1 
































    2 







































    2 

    2 




    2 

    1 






























    2 





















    3 








    3 


    3 



















    3 








    3 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
/*        $NetBSD: kern_cpu.c,v 1.93 2020/10/08 09:16:13 rin Exp $        */

/*-
 * Copyright (c) 2007, 2008, 2009, 2010, 2012, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c)2007 YAMAMOTO Takashi,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * CPU related routines not shared with rump.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.93 2020/10/08 09:16:13 rin Exp $");

#ifdef _KERNEL_OPT
#include "opt_cpu_ucode.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/idle.h>
#include <sys/sched.h>
#include <sys/intr.h>
#include <sys/conf.h>
#include <sys/cpu.h>
#include <sys/cpuio.h>
#include <sys/proc.h>
#include <sys/percpu.h>
#include <sys/kernel.h>
#include <sys/kauth.h>
#include <sys/xcall.h>
#include <sys/pool.h>
#include <sys/kmem.h>
#include <sys/select.h>
#include <sys/namei.h>
#include <sys/callout.h>
#include <sys/pcu.h>

#include <uvm/uvm_extern.h>

#include "ioconf.h"

/*
 * If the port has stated that cpu_data is the first thing in cpu_info,
 * verify that the claim is true. This will prevent them from getting out
 * of sync.
 */
#ifdef __HAVE_CPU_DATA_FIRST
CTASSERT(offsetof(struct cpu_info, ci_data) == 0);
#else
CTASSERT(offsetof(struct cpu_info, ci_data) != 0);
#endif

int (*compat_cpuctl_ioctl)(struct lwp *, u_long, void *) = (void *)enosys;

static void        cpu_xc_online(struct cpu_info *, void *);
static void        cpu_xc_offline(struct cpu_info *, void *);

dev_type_ioctl(cpuctl_ioctl);

const struct cdevsw cpuctl_cdevsw = {
        .d_open = nullopen,
        .d_close = nullclose,
        .d_read = nullread,
        .d_write = nullwrite,
        .d_ioctl = cpuctl_ioctl,
        .d_stop = nullstop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER | D_MPSAFE
};

int
mi_cpu_attach(struct cpu_info *ci)
{
        int error;

        KASSERT(maxcpus > 0);

        if ((ci->ci_index = ncpu) >= maxcpus)
                panic("Too many CPUs.  Increase MAXCPUS?");
        kcpuset_set(kcpuset_attached, cpu_index(ci));

        /*
         * Create a convenience cpuset of just ourselves.
         */
        kcpuset_create(&ci->ci_data.cpu_kcpuset, true);
        kcpuset_set(ci->ci_data.cpu_kcpuset, cpu_index(ci));

        TAILQ_INIT(&ci->ci_data.cpu_ld_locks);
        __cpu_simple_lock_init(&ci->ci_data.cpu_ld_lock);

        /* This is useful for eg, per-cpu evcnt */
        snprintf(ci->ci_data.cpu_name, sizeof(ci->ci_data.cpu_name), "cpu%d",
            cpu_index(ci));

        if (__predict_false(cpu_infos == NULL)) {
                size_t ci_bufsize = (maxcpus + 1) * sizeof(struct cpu_info *);
                cpu_infos = kmem_zalloc(ci_bufsize, KM_SLEEP);
        }
        cpu_infos[cpu_index(ci)] = ci;

        sched_cpuattach(ci);

        error = create_idle_lwp(ci);
        if (error != 0) {
                /* XXX revert sched_cpuattach */
                return error;
        }

        if (ci == curcpu())
                ci->ci_onproc = curlwp;
        else
                ci->ci_onproc = ci->ci_data.cpu_idlelwp;

        percpu_init_cpu(ci);
        softint_init(ci);
        callout_init_cpu(ci);
        xc_init_cpu(ci);
        pool_cache_cpu_init(ci);
        selsysinit(ci);
        cache_cpu_init(ci);
        TAILQ_INIT(&ci->ci_data.cpu_biodone);
        ncpu++;
        ncpuonline++;

        return 0;
}

void
cpuctlattach(int dummy __unused)
{

        KASSERT(cpu_infos != NULL);
}

int
cpuctl_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
{
        CPU_INFO_ITERATOR cii;
        cpustate_t *cs;
        struct cpu_info *ci;
        int error, i;
        u_int id;

        error = 0;

        mutex_enter(&cpu_lock);
        switch (cmd) {
        case IOC_CPU_SETSTATE:
                cs = data;
                error = kauth_authorize_system(l->l_cred,
                    KAUTH_SYSTEM_CPU, KAUTH_REQ_SYSTEM_CPU_SETSTATE, cs, NULL,
                    NULL);
                if (error != 0)
                        break;
                if (cs->cs_id >= maxcpus ||
                    (ci = cpu_lookup(cs->cs_id)) == NULL) {
                        error = ESRCH;
                        break;
                }
                cpu_setintr(ci, cs->cs_intr);        /* XXX neglect errors */
                error = cpu_setstate(ci, cs->cs_online);
                break;

        case IOC_CPU_GETSTATE:
                cs = data;
                id = cs->cs_id;
                memset(cs, 0, sizeof(*cs));
                cs->cs_id = id;
                if (cs->cs_id >= maxcpus ||
                    (ci = cpu_lookup(id)) == NULL) {
                        error = ESRCH;
                        break;
                }
                if ((ci->ci_schedstate.spc_flags & SPCF_OFFLINE) != 0)
                        cs->cs_online = false;
                else
                        cs->cs_online = true;
                if ((ci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0)
                        cs->cs_intr = false;
                else
                        cs->cs_intr = true;
                cs->cs_lastmod = (int32_t)ci->ci_schedstate.spc_lastmod;
                cs->cs_lastmodhi = (int32_t)
                    (ci->ci_schedstate.spc_lastmod >> 32);
                cs->cs_intrcnt = cpu_intr_count(ci) + 1;
                cs->cs_hwid = ci->ci_cpuid;
                break;

        case IOC_CPU_MAPID:
                i = 0;
                for (CPU_INFO_FOREACH(cii, ci)) {
                        if (i++ == *(int *)data)
                                break;
                }
                if (ci == NULL)
                        error = ESRCH;
                else
                        *(int *)data = cpu_index(ci);
                break;

        case IOC_CPU_GETCOUNT:
                *(int *)data = ncpu;
                break;

#ifdef CPU_UCODE
        case IOC_CPU_UCODE_GET_VERSION:
                error = cpu_ucode_get_version((struct cpu_ucode_version *)data);
                break;

        case IOC_CPU_UCODE_APPLY:
                error = kauth_authorize_machdep(l->l_cred,
                    KAUTH_MACHDEP_CPU_UCODE_APPLY,
                    NULL, NULL, NULL, NULL);
                if (error != 0)
                        break;
                error = cpu_ucode_apply((const struct cpu_ucode *)data);
                break;
#endif

        default:
                error = (*compat_cpuctl_ioctl)(l, cmd, data);
                break;
        }
        mutex_exit(&cpu_lock);

        return error;
}

struct cpu_info *
cpu_lookup(u_int idx)
{
        struct cpu_info *ci;

        /*
         * cpu_infos is a NULL terminated array of MAXCPUS + 1 entries,
         * so an index of MAXCPUS here is ok.  See mi_cpu_attach.
         */
        KASSERT(idx <= maxcpus);

        if (__predict_false(cpu_infos == NULL)) {
                KASSERT(idx == 0);
                return curcpu();
        }

        ci = cpu_infos[idx];
        KASSERT(ci == NULL || cpu_index(ci) == idx);
        KASSERTMSG(idx < maxcpus || ci == NULL, "idx %d ci %p", idx, ci);

        return ci;
}

static void
cpu_xc_offline(struct cpu_info *ci, void *unused)
{
        struct schedstate_percpu *spc, *mspc = NULL;
        struct cpu_info *target_ci;
        struct lwp *l;
        CPU_INFO_ITERATOR cii;
        int s;

        /*
         * Thread that made the cross call (separate context) holds
         * cpu_lock on our behalf.
         */
        spc = &ci->ci_schedstate;
        s = splsched();
        spc->spc_flags |= SPCF_OFFLINE;
        splx(s);

        /* Take the first available CPU for the migration. */
        for (CPU_INFO_FOREACH(cii, target_ci)) {
                mspc = &target_ci->ci_schedstate;
                if ((mspc->spc_flags & SPCF_OFFLINE) == 0)
                        break;
        }
        KASSERT(target_ci != NULL);

        /*
         * Migrate all non-bound threads to the other CPU.  Note that this
         * runs from the xcall thread, thus handling of LSONPROC is not needed.
         */
        mutex_enter(&proc_lock);
        LIST_FOREACH(l, &alllwp, l_list) {
                struct cpu_info *mci;

                lwp_lock(l);
                if (l->l_cpu != ci || (l->l_pflag & (LP_BOUND | LP_INTR))) {
                        lwp_unlock(l);
                        continue;
                }
                /* Regular case - no affinity. */
                if (l->l_affinity == NULL) {
                        lwp_migrate(l, target_ci);
                        continue;
                }
                /* Affinity is set, find an online CPU in the set. */
                for (CPU_INFO_FOREACH(cii, mci)) {
                        mspc = &mci->ci_schedstate;
                        if ((mspc->spc_flags & SPCF_OFFLINE) == 0 &&
                            kcpuset_isset(l->l_affinity, cpu_index(mci)))
                                break;
                }
                if (mci == NULL) {
                        lwp_unlock(l);
                        mutex_exit(&proc_lock);
                        goto fail;
                }
                lwp_migrate(l, mci);
        }
        mutex_exit(&proc_lock);

#if PCU_UNIT_COUNT > 0
        pcu_save_all_on_cpu();
#endif

#ifdef __HAVE_MD_CPU_OFFLINE
        cpu_offline_md();
#endif
        return;
fail:
        /* Just unset the SPCF_OFFLINE flag, caller will check */
        s = splsched();
        spc->spc_flags &= ~SPCF_OFFLINE;
        splx(s);
}

static void
cpu_xc_online(struct cpu_info *ci, void *unused)
{
        struct schedstate_percpu *spc;
        int s;

        spc = &ci->ci_schedstate;
        s = splsched();
        spc->spc_flags &= ~SPCF_OFFLINE;
        splx(s);
}

int
cpu_setstate(struct cpu_info *ci, bool online)
{
        struct schedstate_percpu *spc;
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci2;
        uint64_t where;
        xcfunc_t func;
        int nonline;

        spc = &ci->ci_schedstate;

        KASSERT(mutex_owned(&cpu_lock));

        if (online) {
                if ((spc->spc_flags & SPCF_OFFLINE) == 0)
                        return 0;
                func = (xcfunc_t)cpu_xc_online;
        } else {
                if ((spc->spc_flags & SPCF_OFFLINE) != 0)
                        return 0;
                nonline = 0;
                /*
                 * Ensure that at least one CPU within the processor set
                 * stays online.  Revisit this later.
                 */
                for (CPU_INFO_FOREACH(cii, ci2)) {
                        if ((ci2->ci_schedstate.spc_flags & SPCF_OFFLINE) != 0)
                                continue;
                        if (ci2->ci_schedstate.spc_psid != spc->spc_psid)
                                continue;
                        nonline++;
                }
                if (nonline == 1)
                        return EBUSY;
                func = (xcfunc_t)cpu_xc_offline;
        }

        where = xc_unicast(0, func, ci, NULL, ci);
        xc_wait(where);
        if (online) {
                KASSERT((spc->spc_flags & SPCF_OFFLINE) == 0);
                ncpuonline++;
        } else {
                if ((spc->spc_flags & SPCF_OFFLINE) == 0) {
                        /* If was not set offline, then it is busy */
                        return EBUSY;
                }
                ncpuonline--;
        }

        spc->spc_lastmod = time_second;
        return 0;
}

#if defined(__HAVE_INTR_CONTROL)
static void
cpu_xc_intr(struct cpu_info *ci, void *unused)
{
        struct schedstate_percpu *spc;
        int s;

        spc = &ci->ci_schedstate;
        s = splsched();
        spc->spc_flags &= ~SPCF_NOINTR;
        splx(s);
}

static void
cpu_xc_nointr(struct cpu_info *ci, void *unused)
{
        struct schedstate_percpu *spc;
        int s;

        spc = &ci->ci_schedstate;
        s = splsched();
        spc->spc_flags |= SPCF_NOINTR;
        splx(s);
}

int
cpu_setintr(struct cpu_info *ci, bool intr)
{
        struct schedstate_percpu *spc;
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci2;
        uint64_t where;
        xcfunc_t func;
        int nintr;

        spc = &ci->ci_schedstate;

        KASSERT(mutex_owned(&cpu_lock));

        if (intr) {
                if ((spc->spc_flags & SPCF_NOINTR) == 0)
                        return 0;
                func = (xcfunc_t)cpu_xc_intr;
        } else {
                if (CPU_IS_PRIMARY(ci))        /* XXX kern/45117 */
                        return EINVAL;
                if ((spc->spc_flags & SPCF_NOINTR) != 0)
                        return 0;
                /*
                 * Ensure that at least one CPU within the system
                 * is handing device interrupts.
                 */
                nintr = 0;
                for (CPU_INFO_FOREACH(cii, ci2)) {
                        if ((ci2->ci_schedstate.spc_flags & SPCF_NOINTR) != 0)
                                continue;
                        if (ci2 == ci)
                                continue;
                        nintr++;
                }
                if (nintr == 0)
                        return EBUSY;
                func = (xcfunc_t)cpu_xc_nointr;
        }

        where = xc_unicast(0, func, ci, NULL, ci);
        xc_wait(where);
        if (intr) {
                KASSERT((spc->spc_flags & SPCF_NOINTR) == 0);
        } else if ((spc->spc_flags & SPCF_NOINTR) == 0) {
                /* If was not set offline, then it is busy */
                return EBUSY;
        }

        /* Direct interrupts away from the CPU and record the change. */
        cpu_intr_redistribute();
        spc->spc_lastmod = time_second;
        return 0;
}
#else        /* __HAVE_INTR_CONTROL */
int
cpu_setintr(struct cpu_info *ci, bool intr)
{

        return EOPNOTSUPP;
}

u_int
cpu_intr_count(struct cpu_info *ci)
{

        return 0;        /* 0 == "don't know" */
}
#endif        /* __HAVE_INTR_CONTROL */

#ifdef CPU_UCODE
int
cpu_ucode_load(struct cpu_ucode_softc *sc, const char *fwname)
{
        firmware_handle_t fwh;
        int error;

        if (sc->sc_blob != NULL) {
                firmware_free(sc->sc_blob, sc->sc_blobsize);
                sc->sc_blob = NULL;
                sc->sc_blobsize = 0;
        }

        error = cpu_ucode_md_open(&fwh, sc->loader_version, fwname);
        if (error != 0) {
#ifdef DEBUG
                printf("ucode: firmware_open(%s) failed: %i\n", fwname, error);
#endif
                goto err0;
        }

        sc->sc_blobsize = firmware_get_size(fwh);
        if (sc->sc_blobsize == 0) {
                error = EFTYPE;
                firmware_close(fwh);
                goto err0;
        }
        sc->sc_blob = firmware_malloc(sc->sc_blobsize);
        if (sc->sc_blob == NULL) {
                error = ENOMEM;
                firmware_close(fwh);
                goto err0;
        }

        error = firmware_read(fwh, 0, sc->sc_blob, sc->sc_blobsize);
        firmware_close(fwh);
        if (error != 0)
                goto err1;

        return 0;

err1:
        firmware_free(sc->sc_blob, sc->sc_blobsize);
        sc->sc_blob = NULL;
        sc->sc_blobsize = 0;
err0:
        return error;
}
#endif










































































































































   16 



   16 








    6 










   16 





   16 
   16 
   16 




   16 















   16 


















   16 






   14 







  441 






  445 






  448 

































    9 















    2 


















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
/*        $NetBSD: subr_bufq.c,v 1.27 2019/02/17 23:17:41 bad Exp $        */
/*        NetBSD: subr_disk.c,v 1.70 2005/08/20 12:00:01 yamt Exp $        */

/*-
 * Copyright (c) 1996, 1997, 1999, 2000 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ufs_disksubr.c        8.5 (Berkeley) 1/21/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_bufq.c,v 1.27 2019/02/17 23:17:41 bad Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/bufq_impl.h>
#include <sys/kmem.h>
#include <sys/sysctl.h>
#include <sys/module.h>

#define        STRAT_MATCH(id, bs)        (strcmp((id), (bs)->bs_name) == 0)

static void sysctl_kern_bufq_strategies_setup(struct sysctllog **);
static SLIST_HEAD(, bufq_strat) bufq_strat_list =
    SLIST_HEAD_INITIALIZER(bufq_strat_list);

static kmutex_t bufq_mutex;

static struct sysctllog *sysctllog;

void
bufq_init(void)
{

        mutex_init(&bufq_mutex, MUTEX_DEFAULT, IPL_NONE);
        sysctl_kern_bufq_strategies_setup(&sysctllog);
}

int
bufq_register(struct bufq_strat *bs)
{

        mutex_enter(&bufq_mutex);
        SLIST_INSERT_HEAD(&bufq_strat_list, bs, bs_next);
        bs->bs_refcnt = 0;
        mutex_exit(&bufq_mutex);

        return 0;
}

int
bufq_unregister(struct bufq_strat *bs)
{

        mutex_enter(&bufq_mutex);
        if (bs->bs_refcnt != 0) {
                mutex_exit(&bufq_mutex);
                return EBUSY;
        }
        SLIST_REMOVE(&bufq_strat_list, bs, bufq_strat, bs_next);
        mutex_exit(&bufq_mutex);

        return 0;
}

/*
 * Create a device buffer queue.
 */
int
bufq_alloc(struct bufq_state **bufqp, const char *strategy, int flags)
{
        struct bufq_strat *bsp, *it;
        struct bufq_state *bufq;
        int error = 0;
        u_int gen;
        bool found_exact;
        char strategy_module_name[MAXPATHLEN];

        KASSERT((flags & BUFQ_EXACT) == 0 || strategy != BUFQ_STRAT_ANY);

        switch (flags & BUFQ_SORT_MASK) {
        case BUFQ_SORT_RAWBLOCK:
        case BUFQ_SORT_CYLINDER:
                break;
        case 0:
                /*
                 * for strategies which don't care about block numbers.
                 * eg. fcfs
                 */
                flags |= BUFQ_SORT_RAWBLOCK;
                break;
        default:
                panic("bufq_alloc: sort out of range");
        }

        /*
         * select strategy.
         * if a strategy specified by flags is found, use it.
         * otherwise, select one with the largest bs_prio.
         */
        mutex_enter(&bufq_mutex);
        do {
                gen = module_gen;
                bsp = NULL;
                found_exact = false;

                SLIST_FOREACH(it, &bufq_strat_list, bs_next) {
                        if (strategy != BUFQ_STRAT_ANY &&
                            STRAT_MATCH(strategy, (it))) {
                                bsp = it;
                                found_exact = true;
                                break;
                        }
                        if (bsp == NULL || (it)->bs_prio > bsp->bs_prio)
                                bsp = it;
                }
                if (strategy == BUFQ_STRAT_ANY || found_exact)
                        break;

                /* Try to autoload the bufq strategy module */
                strlcpy(strategy_module_name, "bufq_",
                        sizeof(strategy_module_name));
                strlcat(strategy_module_name, strategy,
                        sizeof(strategy_module_name));
                mutex_exit(&bufq_mutex);
                (void) module_autoload(strategy_module_name, MODULE_CLASS_BUFQ);
                mutex_enter(&bufq_mutex);
        } while (gen != module_gen);

        if (bsp == NULL) {
                panic("bufq_alloc: no strategy");
        }
        if (strategy != BUFQ_STRAT_ANY && !found_exact) {
                if ((flags & BUFQ_EXACT)) {
                        error = ENOENT;
                        mutex_exit(&bufq_mutex);
                        goto out;
                }
#if defined(DEBUG)
                printf("bufq_alloc: '%s' is not available. using '%s'.\n",
                    strategy, bsp->bs_name);
#endif
        }
#if defined(BUFQ_DEBUG)
        /* XXX aprint? */
        printf("bufq_alloc: using '%s'\n", bsp->bs_name);
#endif

        bsp->bs_refcnt++;
        mutex_exit(&bufq_mutex);
        *bufqp = bufq = kmem_zalloc(sizeof(*bufq), KM_SLEEP);
        bufq->bq_flags = flags;
        bufq->bq_strat = bsp;
        (*bsp->bs_initfn)(bufq);

out:
        return error;
}

void
bufq_put(struct bufq_state *bufq, struct buf *bp)
{

        (*bufq->bq_put)(bufq, bp);
}

struct buf *
bufq_get(struct bufq_state *bufq)
{

        return (*bufq->bq_get)(bufq, 1);
}

struct buf *
bufq_peek(struct bufq_state *bufq)
{

        return (*bufq->bq_get)(bufq, 0);
}

struct buf *
bufq_cancel(struct bufq_state *bufq, struct buf *bp)
{

        return (*bufq->bq_cancel)(bufq, bp);
}

/*
 * Drain a device buffer queue.
 */
void
bufq_drain(struct bufq_state *bufq)
{
        struct buf *bp;

        while ((bp = bufq_get(bufq)) != NULL) {
                bp->b_error = EIO;
                bp->b_resid = bp->b_bcount;
                biodone(bp);
        }
}

/*
 * Destroy a device buffer queue.
 */
void
bufq_free(struct bufq_state *bufq)
{

        KASSERT(bufq_peek(bufq) == NULL);

        bufq->bq_fini(bufq);

        mutex_enter(&bufq_mutex);
        bufq->bq_strat->bs_refcnt--;
        mutex_exit(&bufq_mutex);
        
        kmem_free(bufq, sizeof(*bufq));
}

/*
 * get a strategy identifier of a buffer queue.
 */
const char *
bufq_getstrategyname(struct bufq_state *bufq)
{

        return bufq->bq_strat->bs_name;
}

/*
 * move all requests on a buffer queue to another.
 */
void
bufq_move(struct bufq_state *dst, struct bufq_state *src)
{
        struct buf *bp;

        while ((bp = bufq_get(src)) != NULL) {
                bufq_put(dst, bp);
        }
}

static int
docopy(char *buf, size_t *bufoffp, size_t buflen,
    const char *datap, size_t datalen)
{
        int error = 0;

        if (buf != NULL && datalen > 0) {

                if (*bufoffp + datalen > buflen) {
                        goto out;
                }
                error = copyout(datap, buf + *bufoffp, datalen);
                if (error) {
                        goto out;
                }
        }
out:
        if (error == 0) {
                *bufoffp += datalen;
        }

        return error;
}

static int
docopystr(char *buf, size_t *bufoffp, size_t buflen, const char *datap)
{

        return docopy(buf, bufoffp, buflen, datap, strlen(datap));
}

static int
docopynul(char *buf, size_t *bufoffp, size_t buflen)
{

        return docopy(buf, bufoffp, buflen, "", 1);
}

/*
 * sysctl function that will print all bufq strategies
 * currently available to the kernel.
 */
static int
sysctl_kern_bufq_strategies(SYSCTLFN_ARGS)
{
        const struct bufq_strat *bq_strat;
        const char *delim = "";
        size_t off = 0;
        size_t buflen = *oldlenp;
        int error;

        SLIST_FOREACH(bq_strat, &bufq_strat_list, bs_next) {
                error = docopystr(oldp, &off, buflen, delim);
                if (error) {
                        goto out;
                }
                error = docopystr(oldp, &off, buflen, (bq_strat)->bs_name);
                if (error) {
                        goto out;
                }
                delim = " ";
        }

        /* In case there are no registered strategies ... */
        if (off == 0) {
                error = docopystr(oldp, &off, buflen, "NULL");
                if (error) {
                        goto out;
                }
        }

        /* NUL terminate */
        error = docopynul(oldp, &off, buflen);
out:
        *oldlenp = off;
        return error;
}

static void
sysctl_kern_bufq_strategies_setup(struct sysctllog **clog)
{
        const struct sysctlnode *node;

        node = NULL;
        sysctl_createv(clog, 0, NULL, &node,
                        CTLFLAG_PERMANENT,
                        CTLTYPE_NODE, "bufq",
                        SYSCTL_DESCR("buffer queue subtree"),
                        NULL, 0, NULL, 0,
                        CTL_KERN, CTL_CREATE, CTL_EOL);
        if (node != NULL) {
                sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT,
                        CTLTYPE_STRING, "strategies",
                        SYSCTL_DESCR("List of bufq strategies present"),
                        sysctl_kern_bufq_strategies, 0, NULL, 0,
                        CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
        }
}

































































































 4810 













 4819 













 4657 












 4821 

 4329 
 2796 



 1919 






   12 





 1911 














  116 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
/*        $NetBSD: syscall.c,v 1.21 2022/03/17 22:22:49 riastradh Exp $        */

/*-
 * Copyright (c) 1998, 2000, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: syscall.c,v 1.21 2022/03/17 22:22:49 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/signal.h>
#include <sys/ktrace.h>
#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscall_stats.h>

#include <uvm/uvm_extern.h>

#include <machine/cpu.h>
#include <machine/psl.h>
#include <machine/userret.h>

#include "opt_dtrace.h"

#ifndef __x86_64__
int                x86_copyargs(void *, void *, size_t);
#endif

void                syscall_intern(struct proc *);
static void        syscall(struct trapframe *);

void
md_child_return(struct lwp *l)
{
        struct trapframe *tf = l->l_md.md_regs;

        X86_TF_RAX(tf) = 0;
        X86_TF_RFLAGS(tf) &= ~PSL_C;

        userret(l);
}

/*
 * Process the tail end of a posix_spawn() for the child.
 */
void
cpu_spawn_return(struct lwp *l)
{

        userret(l);
}

/*
 * syscall(frame):
 *        System call request from POSIX system call gate interface to kernel.
 *        Like trap(), argument is call by reference.
 */
#ifdef KDTRACE_HOOKS
void syscall(struct trapframe *);
#else
static
#endif
void
syscall(struct trapframe *frame)
{
        const struct sysent *callp;
        struct proc *p;
        struct lwp *l;
        int error;
        register_t code, rval[2];
#ifdef __x86_64__
        /* Verify that the syscall args will fit in the trapframe space */
        CTASSERT(offsetof(struct trapframe, tf_arg9) >=
            sizeof(register_t) * (2 + SYS_MAXSYSARGS - 1));
#define args (&frame->tf_rdi)
#else
        register_t args[2 + SYS_MAXSYSARGS];
#endif

        l = curlwp;
        p = l->l_proc;
        LWP_CACHE_CREDS(l, p);

        code = X86_TF_RAX(frame) & (SYS_NSYSENT - 1);
        callp = p->p_emul->e_sysent + code;

        SYSCALL_COUNT(syscall_counts, code);
        SYSCALL_TIME_SYS_ENTRY(l, syscall_times, code);

#ifdef __x86_64__
        /*
         * The first 6 syscall args are passed in rdi, rsi, rdx, r10, r8 and r9
         * (rcx gets copied to r10 in the libc stub because the syscall
         * instruction overwrites %cx) and are together in the trap frame
         * with space following for 4 more entries.
         */
        if (__predict_false(callp->sy_argsize > 6 * 8)) {
                error = copyin((register_t *)frame->tf_rsp + 1,
                    &frame->tf_arg6, callp->sy_argsize - 6 * 8);
                if (error != 0)
                        goto bad;
        }
#else
        if (callp->sy_argsize) {
                error = x86_copyargs((char *)frame->tf_esp + sizeof(int), args,
                            callp->sy_argsize);
                if (__predict_false(error != 0))
                        goto bad;
        }
#endif
        error = sy_invoke(callp, l, args, rval, code);

        if (__predict_true(error == 0)) {
                X86_TF_RAX(frame) = rval[0];
                X86_TF_RDX(frame) = rval[1];
                X86_TF_RFLAGS(frame) &= ~PSL_C;        /* carry bit */
        } else {
                switch (error) {
                case ERESTART:
                        /*
                         * The offset to adjust the PC by depends on whether we
                         * entered the kernel through the trap or call gate.
                         * We saved the instruction size in tf_err on entry.
                         */
                        X86_TF_RIP(frame) -= frame->tf_err;
                        break;
                case EJUSTRETURN:
                        /* nothing to do */
                        break;
                default:
                bad:
                        X86_TF_RAX(frame) = error;
                        X86_TF_RFLAGS(frame) |= PSL_C;        /* carry bit */
                        break;
                }
        }

        SYSCALL_TIME_SYS_EXIT(l);
        userret(l);
}

void
syscall_intern(struct proc *p)
{

        p->p_md.md_syscall = syscall;
}




































































































































































   21 
  148 






  141 

  141 
  141 






































































































































    6 













    4 














































    6 

    6 






























  153 


  153 



   17 
    1 

    5 
  153 
  148 
  148 
  152 
  142 











   11 
   10 









  153 








  151 




  151 


  151 
  152 
  152 


  153 

  150 
  152 













  152 
   11 




  153 





  143 

  151 


  152 




















    1 
    1 

























































































































































































   13 






















   15 





   16 
   13 
   16 




   14 





   14 









   13 



   13 












































    2 





    2 


    2 
    2 





    2 















    2 



    2 






































   10 



   10 












































































































































  129 





























































  129 

















    5 









  278 















   77 



  328 

  329 


  329 

  329 















































































































  327 


  327 
  326 















  329 
  203 
  204 




  287 







  289 
  288 






  151 























  153 
  153 
















   13 





  153 








  152 


  271 



  271 









    1 











  273 
  271 
    1 












































  273 


  271 














  273 






    7 







   11 


   10 



   18 





  278 
  152 













  159 












  275 
  267 


  276 




   11 











   11 
  278 




























  152 
  289 



  289 



  290 
   38 
   38 



  290 
  153 
  290 





  290 
   13 
    5 
    5 




  290 
   54 


  290 
  290 

  287 






  327 
  328 
  327 
  328 







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
/*        $NetBSD: subr_prf.c,v 1.189 2022/08/11 23:53:03 gutteridge Exp $        */

/*-
 * Copyright (c) 1986, 1988, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)subr_prf.c        8.4 (Berkeley) 5/4/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_prf.c,v 1.189 2022/08/11 23:53:03 gutteridge Exp $");

#ifdef _KERNEL_OPT
#include "opt_ddb.h"
#include "opt_kgdb.h"
#include "opt_dump.h"
#include "opt_rnd_printf.h"
#endif

#include <sys/param.h>
#include <sys/stdint.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/device.h>
#include <sys/reboot.h>
#include <sys/msgbuf.h>
#include <sys/proc.h>
#include <sys/ioctl.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/tty.h>
#include <sys/tprintf.h>
#include <sys/spldebug.h>
#include <sys/syslog.h>
#include <sys/kprintf.h>
#include <sys/atomic.h>
#include <sys/kernel.h>
#include <sys/cpu.h>
#include <sys/rndsource.h>
#include <sys/kmem.h>

#include <dev/cons.h>

#include <net/if.h>

static kmutex_t kprintf_mtx;
static bool kprintf_inited = false;

#ifdef KGDB
#include <sys/kgdb.h>
#endif

#ifdef DDB
#include <ddb/ddbvar.h>                /* db_panic */
#include <ddb/db_output.h>        /* db_printf, db_putchar prototypes */
#endif


/*
 * defines
 */
#define KLOG_PRI        0x80000000


/*
 * local prototypes
 */

static void         putchar(int, int, struct tty *);
static void         kprintf_internal(const char *, int, void *, char *, ...);


/*
 * globals
 */

extern        struct tty *constty;        /* pointer to console "window" tty */
extern        int log_open;        /* subr_log: is /dev/klog open? */
const        char *panicstr; /* arg to first call to panic (used as a flag
                           to indicate that panic has already been called). */
struct cpu_info *paniccpu;        /* cpu that first panicked */
long        panicstart, panicend;        /* position in the msgbuf of the start and
                                   end of the formatted panicstr. */
int        doing_shutdown;        /* set to indicate shutdown in progress */

#ifdef RND_PRINTF
static krndsource_t        rnd_printf_source;
#endif

#ifndef        DUMP_ON_PANIC
#define        DUMP_ON_PANIC        1
#endif
int        dumponpanic = DUMP_ON_PANIC;

/*
 * v_putc: routine to putc on virtual console
 *
 * the v_putc pointer can be used to redirect the console cnputc elsewhere
 * [e.g. to a "virtual console"].
 */

void (*v_putc)(int) = cnputc;        /* start with cnputc (normal cons) */
void (*v_flush)(void) = cnflush;        /* start with cnflush (normal cons) */

const char hexdigits[] = "0123456789abcdef";
const char HEXDIGITS[] = "0123456789ABCDEF";


/*
 * functions
 */

/*
 * Locking is inited fairly early in MI bootstrap.  Before that
 * prints are done unlocked.  But that doesn't really matter,
 * since nothing can preempt us before interrupts are enabled.
 */
void
kprintf_init(void)
{

        KASSERT(!kprintf_inited && cold); /* not foolproof, but ... */
        mutex_init(&kprintf_mtx, MUTEX_DEFAULT, IPL_HIGH);
#ifdef RND_PRINTF
        rnd_attach_source(&rnd_printf_source, "printf", RND_TYPE_UNKNOWN,
            RND_FLAG_COLLECT_TIME|RND_FLAG_COLLECT_VALUE);
#endif
        kprintf_inited = true;
}

void
kprintf_lock(void)
{

        if (__predict_true(kprintf_inited))
                mutex_enter(&kprintf_mtx);
}

void
kprintf_unlock(void)
{

        if (__predict_true(kprintf_inited)) {
                /* assert kprintf wasn't somehow inited while we were in */
                KASSERT(mutex_owned(&kprintf_mtx));
                mutex_exit(&kprintf_mtx);
        }
}

/*
 * twiddle: spin a little propellor on the console.
 */

void
twiddle(void)
{
        static const char twiddle_chars[] = "|/-\\";
        static int pos;

        kprintf_lock();

        putchar(twiddle_chars[pos++ & 3], TOCONS|NOTSTAMP, NULL);
        putchar('\b', TOCONS|NOTSTAMP, NULL);

        kprintf_unlock();
}

/*
 * panic: handle an unresolvable fatal error
 *
 * prints "panic: <message>" and reboots.   if called twice (i.e. recursive
 * call) we avoid trying to dump and just reboot (to avoid recursive panics).
 */

void
panic(const char *fmt, ...)
{
        va_list ap;

        va_start(ap, fmt);
        vpanic(fmt, ap);
        va_end(ap);
}

void
vpanic(const char *fmt, va_list ap)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci, *oci;
        int bootopt;
        static char scratchstr[384]; /* stores panic message */

        spldebug_stop();

        if (lwp0.l_cpu && curlwp) {
                /*
                 * Disable preemption.  If already panicking on another CPU, sit
                 * here and spin until the system is rebooted.  Allow the CPU that
                 * first panicked to panic again.
                 */
                kpreempt_disable();
                ci = curcpu();
                oci = atomic_cas_ptr((void *)&paniccpu, NULL, ci);
                if (oci != NULL && oci != ci) {
                        /* Give interrupts a chance to try and prevent deadlock. */
                        for (;;) {
#ifndef _RUMPKERNEL /* XXXpooka: temporary build fix, see kern/40505 */
                                DELAY(10);
#endif /* _RUMPKERNEL */
                        }
                }

                /*
                 * Convert the current thread to a bound thread and prevent all
                 * CPUs from scheduling unbound jobs.  Do so without taking any
                 * locks.
                 */
                curlwp->l_pflag |= LP_BOUND;
                for (CPU_INFO_FOREACH(cii, ci)) {
                        ci->ci_schedstate.spc_flags |= SPCF_OFFLINE;
                }
        }

        bootopt = RB_AUTOBOOT | RB_NOSYNC;
        if (!doing_shutdown) {
                if (dumponpanic)
                        bootopt |= RB_DUMP;
        } else
                printf("Skipping crash dump on recursive panic\n");

        doing_shutdown = 1;

        if (logenabled(msgbufp))
                panicstart = msgbufp->msg_bufx;

        kprintf_lock();
        kprintf_internal("panic: ", TOLOG|TOCONS, NULL, NULL);
        if (panicstr == NULL) {
                /* first time in panic - store fmt first for precaution */
                panicstr = fmt;

                vsnprintf(scratchstr, sizeof(scratchstr), fmt, ap);
                kprintf_internal("%s", TOLOG|TOCONS, NULL, NULL, scratchstr);
                panicstr = scratchstr;
        } else {
                kprintf(fmt, TOLOG|TOCONS, NULL, NULL, ap);
        }
        kprintf_internal("\n", TOLOG|TOCONS, NULL, NULL);
        kprintf_unlock();

        if (logenabled(msgbufp))
                panicend = msgbufp->msg_bufx;

#ifdef KGDB
        kgdb_panic();
#endif
#ifdef KADB
        if (boothowto & RB_KDB)
                kdbpanic();
#endif
#ifdef DDB
        db_panic();
#endif
        kern_reboot(bootopt, NULL);
}

/*
 * kernel logging functions: log, logpri, addlog
 */

/*
 * log: write to the log buffer
 *
 * => will not sleep [so safe to call from interrupt]
 * => will log to console if /dev/klog isn't open
 */

void
log(int level, const char *fmt, ...)
{
        va_list ap;

        kprintf_lock();

        klogpri(level);                /* log the level first */
        va_start(ap, fmt);
        kprintf(fmt, TOLOG, NULL, NULL, ap);
        va_end(ap);
        if (!log_open) {
                va_start(ap, fmt);
                kprintf(fmt, TOCONS, NULL, NULL, ap);
                va_end(ap);
        }

        kprintf_unlock();

        logwakeup();                /* wake up anyone waiting for log msgs */
}

/*
 * vlog: write to the log buffer [already have va_list]
 */

void
vlog(int level, const char *fmt, va_list ap)
{
        va_list cap;

        va_copy(cap, ap);
        kprintf_lock();

        klogpri(level);                /* log the level first */
        kprintf(fmt, TOLOG, NULL, NULL, ap);
        if (!log_open)
                kprintf(fmt, TOCONS, NULL, NULL, cap);

        kprintf_unlock();
        va_end(cap);

        logwakeup();                /* wake up anyone waiting for log msgs */
}

/*
 * logpri: log the priority level to the klog
 */

void
logpri(int level)
{

        kprintf_lock();
        klogpri(level);
        kprintf_unlock();
}

/*
 * Note: we must be in the mutex here!
 */
void
klogpri(int level)
{
        KASSERT((level & KLOG_PRI) == 0);

        putchar(level | KLOG_PRI, TOLOG, NULL);
}

/*
 * addlog: add info to previous log message
 */

void
addlog(const char *fmt, ...)
{
        va_list ap;

        kprintf_lock();

        va_start(ap, fmt);
        kprintf(fmt, TOLOG, NULL, NULL, ap);
        va_end(ap);
        if (!log_open) {
                va_start(ap, fmt);
                kprintf(fmt, TOCONS, NULL, NULL, ap);
                va_end(ap);
        }

        kprintf_unlock();

        logwakeup();
}

static void
putone(int c, int flags, struct tty *tp)
{
        if (panicstr)
                constty = NULL;

        if ((flags & TOCONS) && tp == NULL && constty) {
                tp = constty;
                flags |= TOTTY;
        }
        if ((flags & TOTTY) && tp &&
            tputchar(c, flags, tp) < 0 &&
            (flags & TOCONS) && tp == constty)
                constty = NULL;
        if ((flags & TOLOG) &&
            c != '\0' && c != '\r' && c != 0177)
                    logputchar(c);
        if ((flags & TOCONS) && constty == NULL && c != '\0')
                (*v_putc)(c);
}

static void
putlogpri(int level)
{
        char *p;
        char snbuf[KPRINTF_BUFSIZE];

        putone('<', TOLOG, NULL);
        snprintf(snbuf, sizeof(snbuf), "%d", level);
        for (p = snbuf ; *p ; p++)
                putone(*p, TOLOG, NULL);
        putone('>', TOLOG, NULL);
}

#ifndef KLOG_NOTIMESTAMP
static int needtstamp = 1;
int log_ts_prec = 7;

static void
addtstamp(int flags, struct tty *tp)
{
        char buf[64];
        struct timespec ts;
        int n, prec;
        long fsec;

        prec = log_ts_prec;
        if (prec < 0) {
                prec = 0;
                log_ts_prec = prec;
        } else if (prec > 9) {
                prec = 9;
                log_ts_prec = prec;
        }

        getnanouptime(&ts);

        for (n = prec, fsec = ts.tv_nsec; n < 8; n++)
                fsec /= 10;
        if (n < 9)
                fsec = (fsec / 10) + ((fsec % 10) >= 5);

        n = snprintf(buf, sizeof(buf), "[% 4jd.%.*ld] ",
            (intmax_t)ts.tv_sec, prec, fsec);

        for (int i = 0; i < n; i++)
                putone(buf[i], flags, tp);
}
#endif

/*
 * putchar: print a single character on console or user terminal.
 *
 * => if console, then the last MSGBUFS chars are saved in msgbuf
 *        for inspection later (e.g. dmesg/syslog)
 * => we must already be in the mutex!
 */
static void
putchar(int c, int flags, struct tty *tp)
{
        if (c & KLOG_PRI) {
                putlogpri(c & ~KLOG_PRI);
                return;
        }

#ifndef KLOG_NOTIMESTAMP
        if (c != '\0' && c != '\n' && needtstamp && (flags & NOTSTAMP) == 0) {
                addtstamp(flags, tp);
                needtstamp = 0;
        }

        if (c == '\n')
                needtstamp = 1;
#endif
        putone(c, flags, tp);

#ifdef DDB
        if (flags & TODDB) {
                db_putchar(c);
                return;
        }
#endif

#ifdef RND_PRINTF
        if (__predict_true(kprintf_inited)) {
                unsigned char ch = c;
                rnd_add_data(&rnd_printf_source, &ch, 1, 0);
        }
#endif
}

/*
 * tablefull: warn that a system table is full
 */

void
tablefull(const char *tab, const char *hint)
{
        if (hint)
                log(LOG_ERR, "%s: table is full - %s\n", tab, hint);
        else
                log(LOG_ERR, "%s: table is full\n", tab);
}


/*
 * uprintf: print to the controlling tty of the current process
 *
 * => we may block if the tty queue is full
 * => no message is printed if the queue doesn't clear in a reasonable
 *        time
 */

void
uprintf(const char *fmt, ...)
{
        struct proc *p = curproc;
        va_list ap;

        /* mutex_enter(&proc_lock); XXXSMP */

        if (p->p_lflag & PL_CONTROLT && p->p_session->s_ttyvp) {
                /* No mutex needed; going to process TTY. */
                va_start(ap, fmt);
                kprintf(fmt, TOTTY, p->p_session->s_ttyp, NULL, ap);
                va_end(ap);
        }

        /* mutex_exit(&proc_lock); XXXSMP */
}

void
uprintf_locked(const char *fmt, ...)
{
        struct proc *p = curproc;
        va_list ap;

        if (p->p_lflag & PL_CONTROLT && p->p_session->s_ttyvp) {
                /* No mutex needed; going to process TTY. */
                va_start(ap, fmt);
                kprintf(fmt, TOTTY, p->p_session->s_ttyp, NULL, ap);
                va_end(ap);
        }
}

/*
 * tprintf functions: used to send messages to a specific process
 *
 * usage:
 *   get a tpr_t handle on a process "p" by using "tprintf_open(p)"
 *   use the handle when calling "tprintf"
 *   when done, do a "tprintf_close" to drop the handle
 */

/*
 * tprintf_open: get a tprintf handle on a process "p"
 *
 * => returns NULL if process can't be printed to
 */

tpr_t
tprintf_open(struct proc *p)
{
        tpr_t cookie;

        cookie = NULL;

        mutex_enter(&proc_lock);
        if (p->p_lflag & PL_CONTROLT && p->p_session->s_ttyvp) {
                proc_sesshold(p->p_session);
                cookie = (tpr_t)p->p_session;
        }
        mutex_exit(&proc_lock);

        return cookie;
}

/*
 * tprintf_close: dispose of a tprintf handle obtained with tprintf_open
 */

void
tprintf_close(tpr_t sess)
{

        if (sess) {
                mutex_enter(&proc_lock);
                /* Releases proc_lock. */
                proc_sessrele((struct session *)sess);
        }
}

/*
 * tprintf: given tprintf handle to a process [obtained with tprintf_open],
 * send a message to the controlling tty for that process.
 *
 * => also sends message to /dev/klog
 */
void
tprintf(tpr_t tpr, const char *fmt, ...)
{
        struct session *sess = (struct session *)tpr;
        struct tty *tp = NULL;
        int flags = TOLOG;
        va_list ap;

        /* mutex_enter(&proc_lock); XXXSMP */
        if (sess && sess->s_ttyvp && ttycheckoutq(sess->s_ttyp, 0)) {
                flags |= TOTTY;
                tp = sess->s_ttyp;
        }

        kprintf_lock();

        klogpri(LOG_INFO);
        va_start(ap, fmt);
        kprintf(fmt, flags, tp, NULL, ap);
        va_end(ap);

        kprintf_unlock();
        /* mutex_exit(&proc_lock);        XXXSMP */

        logwakeup();
}


/*
 * ttyprintf: send a message to a specific tty
 *
 * => should be used only by tty driver or anything that knows the
 *    underlying tty will not be revoked(2)'d away.  [otherwise,
 *    use tprintf]
 */
void
ttyprintf(struct tty *tp, const char *fmt, ...)
{
        va_list ap;

        /* No mutex needed; going to process TTY. */
        va_start(ap, fmt);
        kprintf(fmt, TOTTY, tp, NULL, ap);
        va_end(ap);
}

#ifdef DDB

/*
 * db_printf: printf for DDB (via db_putchar)
 */

void
db_printf(const char *fmt, ...)
{
        va_list ap;

        /* No mutex needed; DDB pauses all processors. */
        va_start(ap, fmt);
        kprintf(fmt, TODDB, NULL, NULL, ap);
        va_end(ap);

        if (db_tee_msgbuf) {
                va_start(ap, fmt);
                kprintf(fmt, TOLOG, NULL, NULL, ap);
                va_end(ap);
        }
}

void
db_vprintf(const char *fmt, va_list ap)
{
        va_list cap;

        va_copy(cap, ap);
        /* No mutex needed; DDB pauses all processors. */
        kprintf(fmt, TODDB, NULL, NULL, ap);
        if (db_tee_msgbuf)
                kprintf(fmt, TOLOG, NULL, NULL, cap);
        va_end(cap);
}

#endif /* DDB */

static void
kprintf_internal(const char *fmt, int oflags, void *vp, char *sbuf, ...)
{
        va_list ap;
        
        va_start(ap, sbuf);
        (void)kprintf(fmt, oflags, vp, sbuf, ap);
        va_end(ap);
}

/*
 * Device autoconfiguration printf routines.  These change their
 * behavior based on the AB_* flags in boothowto.  If AB_SILENT
 * is set, messages never go to the console (but they still always
 * go to the log).  AB_VERBOSE overrides AB_SILENT.
 */

/*
 * aprint_normal: Send to console unless AB_QUIET.  Always goes
 * to the log.
 */
static void
aprint_normal_internal(const char *prefix, const char *fmt, va_list ap)
{
        int flags = TOLOG;

        if ((boothowto & (AB_SILENT|AB_QUIET)) == 0 ||
            (boothowto & AB_VERBOSE) != 0)
                flags |= TOCONS;

        kprintf_lock();

        if (prefix)
                kprintf_internal("%s: ", flags, NULL, NULL, prefix);
        kprintf(fmt, flags, NULL, NULL, ap);

        kprintf_unlock();

        if (!panicstr)
                logwakeup();
}

void
aprint_normal(const char *fmt, ...)
{
        va_list ap;

        va_start(ap, fmt);
        aprint_normal_internal(NULL, fmt, ap);
        va_end(ap);
}

void
aprint_normal_dev(device_t dv, const char *fmt, ...)
{
        va_list ap;

        KASSERT(dv != NULL);

        va_start(ap, fmt);
        aprint_normal_internal(device_xname(dv), fmt, ap);
        va_end(ap);
}

void
aprint_normal_ifnet(struct ifnet *ifp, const char *fmt, ...)
{
        va_list ap;

        KASSERT(ifp != NULL);

        va_start(ap, fmt);
        aprint_normal_internal(ifp->if_xname, fmt, ap);
        va_end(ap);
}

/*
 * aprint_error: Send to console unless AB_QUIET.  Always goes
 * to the log.  Also counts the number of times called so other
 * parts of the kernel can report the number of errors during a
 * given phase of system startup.
 */
static int aprint_error_count;

int
aprint_get_error_count(void)
{
        int count;

        kprintf_lock();

        count = aprint_error_count;
        aprint_error_count = 0;

        kprintf_unlock();

        return (count);
}

static void
aprint_error_internal(const char *prefix, const char *fmt, va_list ap)
{
        int flags = TOLOG;

        if ((boothowto & (AB_SILENT|AB_QUIET)) == 0 ||
            (boothowto & AB_VERBOSE) != 0)
                flags |= TOCONS;

        kprintf_lock();

        aprint_error_count++;

        if (prefix)
                kprintf_internal("%s: ", flags, NULL, NULL, prefix);
        kprintf_internal("autoconfiguration error: ", TOLOG, NULL, NULL);
        kprintf(fmt, flags, NULL, NULL, ap);

        kprintf_unlock();

        if (!panicstr)
                logwakeup();
}

void
aprint_error(const char *fmt, ...)
{
        va_list ap;

        va_start(ap, fmt);
        aprint_error_internal(NULL, fmt, ap);
        va_end(ap);
}

void
aprint_error_dev(device_t dv, const char *fmt, ...)
{
        va_list ap;

        KASSERT(dv != NULL);

        va_start(ap, fmt);
        aprint_error_internal(device_xname(dv), fmt, ap);
        va_end(ap);
}

void
aprint_error_ifnet(struct ifnet *ifp, const char *fmt, ...)
{
        va_list ap;

        KASSERT(ifp != NULL);

        va_start(ap, fmt);
        aprint_error_internal(ifp->if_xname, fmt, ap);
        va_end(ap);
}

/*
 * aprint_naive: Send to console only if AB_QUIET.  Never goes
 * to the log.
 */
static void
aprint_naive_internal(const char *prefix, const char *fmt, va_list ap)
{
        if ((boothowto & (AB_QUIET|AB_SILENT|AB_VERBOSE)) != AB_QUIET)
                return;

        kprintf_lock();

        if (prefix)
                kprintf_internal("%s: ", TOCONS, NULL, NULL, prefix);
        kprintf(fmt, TOCONS, NULL, NULL, ap);

        kprintf_unlock();
}

void
aprint_naive(const char *fmt, ...)
{
        va_list ap;

        va_start(ap, fmt);
        aprint_naive_internal(NULL, fmt, ap);
        va_end(ap);
}

void
aprint_naive_dev(device_t dv, const char *fmt, ...)
{
        va_list ap;

        KASSERT(dv != NULL);

        va_start(ap, fmt);
        aprint_naive_internal(device_xname(dv), fmt, ap);
        va_end(ap);
}

void
aprint_naive_ifnet(struct ifnet *ifp, const char *fmt, ...)
{
        va_list ap;

        KASSERT(ifp != NULL);

        va_start(ap, fmt);
        aprint_naive_internal(ifp->if_xname, fmt, ap);
        va_end(ap);
}

/*
 * aprint_verbose: Send to console only if AB_VERBOSE.  Always
 * goes to the log.
 */
static void
aprint_verbose_internal(const char *prefix, const char *fmt, va_list ap)
{
        int flags = TOLOG;

        if (boothowto & AB_VERBOSE)
                flags |= TOCONS;

        kprintf_lock();

        if (prefix)
                kprintf_internal("%s: ", flags, NULL, NULL, prefix);
        kprintf(fmt, flags, NULL, NULL, ap);

        kprintf_unlock();

        if (!panicstr)
                logwakeup();
}

void
aprint_verbose(const char *fmt, ...)
{
        va_list ap;

        va_start(ap, fmt);
        aprint_verbose_internal(NULL, fmt, ap);
        va_end(ap);
}

void
aprint_verbose_dev(device_t dv, const char *fmt, ...)
{
        va_list ap;

        KASSERT(dv != NULL);

        va_start(ap, fmt);
        aprint_verbose_internal(device_xname(dv), fmt, ap);
        va_end(ap);
}

void
aprint_verbose_ifnet(struct ifnet *ifp, const char *fmt, ...)
{
        va_list ap;

        KASSERT(ifp != NULL);

        va_start(ap, fmt);
        aprint_verbose_internal(ifp->if_xname, fmt, ap);
        va_end(ap);
}

/*
 * aprint_debug: Send to console and log only if AB_DEBUG.
 */
static void
aprint_debug_internal(const char *prefix, const char *fmt, va_list ap)
{
        if ((boothowto & AB_DEBUG) == 0)
                return;

        kprintf_lock();

        if (prefix)
                kprintf_internal("%s: ", TOCONS | TOLOG, NULL, NULL, prefix);
        kprintf(fmt, TOCONS | TOLOG, NULL, NULL, ap);

        kprintf_unlock();
}

void
aprint_debug(const char *fmt, ...)
{
        va_list ap;

        va_start(ap, fmt);
        aprint_debug_internal(NULL, fmt, ap);
        va_end(ap);
}

void
aprint_debug_dev(device_t dv, const char *fmt, ...)
{
        va_list ap;

        KASSERT(dv != NULL);

        va_start(ap, fmt);
        aprint_debug_internal(device_xname(dv), fmt, ap);
        va_end(ap);
}

void
aprint_debug_ifnet(struct ifnet *ifp, const char *fmt, ...)
{
        va_list ap;

        KASSERT(ifp != NULL);

        va_start(ap, fmt);
        aprint_debug_internal(ifp->if_xname, fmt, ap);
        va_end(ap);
}

void
vprintf_flags(int flags, const char *fmt, va_list ap)
{
        kprintf_lock();
        kprintf(fmt, flags, NULL, NULL, ap);
        kprintf_unlock();
}

void
printf_flags(int flags, const char *fmt, ...)
{
        va_list ap;

        va_start(ap, fmt);
        vprintf_flags(flags, fmt, ap);
        va_end(ap);
}

void
printf_tolog(const char *fmt, ...)
{
        va_list ap;

        va_start(ap, fmt);
        vprintf_flags(TOLOG, fmt, ap);
        va_end(ap);
}

/*
 * printf_nolog: Like printf(), but does not send message to the log.
 */

void
printf_nolog(const char *fmt, ...)
{
        va_list ap;

        va_start(ap, fmt);
        vprintf_flags(TOCONS, fmt, ap);
        va_end(ap);
}

/*
 * printf_nostamp: Like printf(), but does not prepend a timestamp.
 */

void
printf_nostamp(const char *fmt, ...)
{
        va_list ap;

        va_start(ap, fmt);
        vprintf_flags(TOCONS|NOTSTAMP, fmt, ap);
        va_end(ap);
}

/*
 * normal kernel printf functions: printf, vprintf, snprintf, vsnprintf
 */

/*
 * printf: print a message to the console and the log
 */
void
printf(const char *fmt, ...)
{
        va_list ap;

        va_start(ap, fmt);
        vprintf_flags(TOCONS | TOLOG, fmt, ap);
        va_end(ap);
}

/*
 * vprintf: print a message to the console and the log [already have
 *        va_list]
 */

void
vprintf(const char *fmt, va_list ap)
{
        vprintf_flags(TOCONS | TOLOG, fmt, ap);

        if (!panicstr)
                logwakeup();
}

/*
 * snprintf: print a message to a buffer
 */
int
snprintf(char *bf, size_t size, const char *fmt, ...)
{
        int retval;
        va_list ap;

        va_start(ap, fmt);
        retval = vsnprintf(bf, size, fmt, ap);
        va_end(ap);

        return retval;
}

/*
 * vsnprintf: print a message to a buffer [already have va_list]
 */
int
vsnprintf(char *bf, size_t size, const char *fmt, va_list ap)
{
        int retval;
        char *p;

        p = bf + size;
        retval = kprintf(fmt, TOBUFONLY, &p, bf, ap);
        if (bf && size > 0) {
                /* nul terminate */
                if (size <= (size_t)retval)
                        bf[size - 1] = '\0';
                else
                        bf[retval] = '\0';
        }
        return retval;
}

int
vasprintf(char **bf, const char *fmt, va_list ap)
{
        int retval;
        va_list cap;

        va_copy(cap, ap);
        retval = kprintf(fmt, TOBUFONLY, NULL, NULL, cap) + 1;
        va_end(cap);
        *bf = kmem_alloc(retval, KM_SLEEP);
        return vsnprintf(*bf, retval, fmt, ap);
}

/*
 * kprintf: scaled down version of printf(3).
 *
 * this version based on vfprintf() from libc which was derived from
 * software contributed to Berkeley by Chris Torek.
 *
 * NOTE: The kprintf mutex must be held if we're going TOBUF or TOCONS!
 */

/*
 * macros for converting digits to letters and vice versa
 */
#define        to_digit(c)        ((c) - '0')
#define is_digit(c)        ((unsigned)to_digit(c) <= 9)
#define        to_char(n)        ((n) + '0')

/*
 * flags used during conversion.
 */
#define        ALT                0x001                /* alternate form */
#define        HEXPREFIX        0x002                /* add 0x or 0X prefix */
#define        LADJUST                0x004                /* left adjustment */
#define        LONGDBL                0x008                /* long double; unimplemented */
#define        LONGINT                0x010                /* long integer */
#define        QUADINT                0x020                /* quad integer */
#define        SHORTINT        0x040                /* short integer */
#define        MAXINT                0x080                /* intmax_t */
#define        PTRINT                0x100                /* intptr_t */
#define        SIZEINT                0x200                /* size_t */
#define        ZEROPAD                0x400                /* zero (as opposed to blank) pad */
#define FPT                0x800                /* Floating point number */

        /*
         * To extend shorts properly, we need both signed and unsigned
         * argument extraction methods.
         */
#define        SARG() \
        (flags&MAXINT ? va_arg(ap, intmax_t) : \
            flags&PTRINT ? va_arg(ap, intptr_t) : \
            flags&SIZEINT ? va_arg(ap, ssize_t) : /* XXX */ \
            flags&QUADINT ? va_arg(ap, quad_t) : \
            flags&LONGINT ? va_arg(ap, long) : \
            flags&SHORTINT ? (long)(short)va_arg(ap, int) : \
            (long)va_arg(ap, int))
#define        UARG() \
        (flags&MAXINT ? va_arg(ap, uintmax_t) : \
            flags&PTRINT ? va_arg(ap, uintptr_t) : \
            flags&SIZEINT ? va_arg(ap, size_t) : \
            flags&QUADINT ? va_arg(ap, u_quad_t) : \
            flags&LONGINT ? va_arg(ap, u_long) : \
            flags&SHORTINT ? (u_long)(u_short)va_arg(ap, int) : \
            (u_long)va_arg(ap, u_int))

#define KPRINTF_PUTCHAR(C) {                                                \
        if (oflags == TOBUFONLY) {                                        \
                if (sbuf && ((vp == NULL) || (sbuf < tailp)))                 \
                        *sbuf++ = (C);                                        \
        } else {                                                        \
                putchar((C), oflags, vp);                                \
        }                                                                \
}

void
device_printf(device_t dev, const char *fmt, ...)
{
        va_list ap;

        va_start(ap, fmt);
        printf("%s: ", device_xname(dev));
        vprintf(fmt, ap);
        va_end(ap);
        return;
}

/*
 * Guts of kernel printf.  Note, we already expect to be in a mutex!
 */
int
kprintf(const char *fmt0, int oflags, void *vp, char *sbuf, va_list ap)
{
        const char *fmt;        /* format string */
        int ch;                        /* character from fmt */
        int n;                        /* handy integer (short term usage) */
        char *cp;                /* handy char pointer (short term usage) */
        int flags;                /* flags as above */
        int ret;                /* return value accumulator */
        int width;                /* width from format (%8d), or 0 */
        int prec;                /* precision from format (%.3d), or -1 */
        char sign;                /* sign prefix (' ', '+', '-', or \0) */

        u_quad_t _uquad;        /* integer arguments %[diouxX] */
        enum { OCT, DEC, HEX } base;/* base for [diouxX] conversion */
        int dprec;                /* a copy of prec if [diouxX], 0 otherwise */
        int realsz;                /* field size expanded by dprec */
        int size;                /* size of converted field or string */
        const char *xdigs;        /* digits for [xX] conversion */
        char bf[KPRINTF_BUFSIZE]; /* space for %c, %[diouxX] */
        char *tailp;                /* tail pointer for snprintf */

        if (oflags == TOBUFONLY && (vp != NULL))
                tailp = *(char **)vp;
        else
                tailp = NULL;

        cp = NULL;        /* XXX: shutup gcc */
        size = 0;        /* XXX: shutup gcc */

        fmt = fmt0;
        ret = 0;

        xdigs = NULL;                /* XXX: shut up gcc warning */

        /*
         * Scan the format for conversions (`%' character).
         */
        for (;;) {
                for (; *fmt != '%' && *fmt; fmt++) {
                        ret++;
                        KPRINTF_PUTCHAR(*fmt);
                }
                if (*fmt == 0)
                        goto done;

                fmt++;                /* skip over '%' */

                flags = 0;
                dprec = 0;
                width = 0;
                prec = -1;
                sign = '\0';

rflag:                ch = *fmt++;
reswitch:        switch (ch) {
                case ' ':
                        /*
                         * ``If the space and + flags both appear, the space
                         * flag will be ignored.''
                         *        -- ANSI X3J11
                         */
                        if (!sign)
                                sign = ' ';
                        goto rflag;
                case '#':
                        flags |= ALT;
                        goto rflag;
                case '*':
                        /*
                         * ``A negative field width argument is taken as a
                         * - flag followed by a positive field width.''
                         *        -- ANSI X3J11
                         * They don't exclude field widths read from args.
                         */
                        if ((width = va_arg(ap, int)) >= 0)
                                goto rflag;
                        width = -width;
                        /* FALLTHROUGH */
                case '-':
                        flags |= LADJUST;
                        goto rflag;
                case '+':
                        sign = '+';
                        goto rflag;
                case '.':
                        if ((ch = *fmt++) == '*') {
                                n = va_arg(ap, int);
                                prec = n < 0 ? -1 : n;
                                goto rflag;
                        }
                        n = 0;
                        while (is_digit(ch)) {
                                n = 10 * n + to_digit(ch);
                                ch = *fmt++;
                        }
                        prec = n < 0 ? -1 : n;
                        goto reswitch;
                case '0':
                        /*
                         * ``Note that 0 is taken as a flag, not as the
                         * beginning of a field width.''
                         *        -- ANSI X3J11
                         */
                        flags |= ZEROPAD;
                        goto rflag;
                case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                        n = 0;
                        do {
                                n = 10 * n + to_digit(ch);
                                ch = *fmt++;
                        } while (is_digit(ch));
                        width = n;
                        goto reswitch;
                case 'h':
                        flags |= SHORTINT;
                        goto rflag;
                case 'j':
                        flags |= MAXINT;
                        goto rflag;
                case 'l':
                        if (*fmt == 'l') {
                                fmt++;
                                flags |= QUADINT;
                        } else {
                                flags |= LONGINT;
                        }
                        goto rflag;
                case 'q':
                        flags |= QUADINT;
                        goto rflag;
                case 't':
                        flags |= PTRINT;
                        goto rflag;
                case 'z':
                        flags |= SIZEINT;
                        goto rflag;
                case 'c':
                        *(cp = bf) = va_arg(ap, int);
                        size = 1;
                        sign = '\0';
                        break;
                case 'D':
                        flags |= LONGINT;
                        /*FALLTHROUGH*/
                case 'd':
                case 'i':
                        _uquad = SARG();
                        if ((quad_t)_uquad < 0) {
                                _uquad = -_uquad;
                                sign = '-';
                        }
                        base = DEC;
                        goto number;
                case 'n':
                        /* no %n support in the kernel, consume and skip */
                        if (flags & MAXINT)
                                (void)va_arg(ap, intmax_t *);
                        else if (flags & PTRINT)
                                (void)va_arg(ap, intptr_t *);
                        else if (flags & SIZEINT)
                                (void)va_arg(ap, ssize_t *);
                        else if (flags & QUADINT)
                                (void)va_arg(ap, quad_t *);
                        else if (flags & LONGINT)
                                (void)va_arg(ap, long *);
                        else if (flags & SHORTINT)
                                (void)va_arg(ap, short *);
                        else
                                (void)va_arg(ap, int *);
                        continue;        /* no output */
                case 'O':
                        flags |= LONGINT;
                        /*FALLTHROUGH*/
                case 'o':
                        _uquad = UARG();
                        base = OCT;
                        goto nosign;
                case 'p':
                        /*
                         * ``The argument shall be a pointer to void.  The
                         * value of the pointer is converted to a sequence
                         * of printable characters, in an implementation-
                         * defined manner.''
                         *        -- ANSI X3J11
                         */
                        /* NOSTRICT */
                        _uquad = (u_long)va_arg(ap, void *);
                        base = HEX;
                        xdigs = hexdigits;
                        flags |= HEXPREFIX;
                        ch = 'x';
                        goto nosign;
                case 's':
                        if ((cp = va_arg(ap, char *)) == NULL)
                                /*XXXUNCONST*/
                                cp = __UNCONST("(null)");
                        if (prec >= 0) {
                                /*
                                 * can't use strlen; can only look for the
                                 * NUL in the first `prec' characters, and
                                 * strlen() will go further.
                                 */
                                char *p = memchr(cp, 0, prec);

                                if (p != NULL) {
                                        size = p - cp;
                                        if (size > prec)
                                                size = prec;
                                } else
                                        size = prec;
                        } else
                                size = strlen(cp);
                        sign = '\0';
                        break;
                case 'U':
                        flags |= LONGINT;
                        /*FALLTHROUGH*/
                case 'u':
                        _uquad = UARG();
                        base = DEC;
                        goto nosign;
                case 'X':
                        xdigs = HEXDIGITS;
                        goto hex;
                case 'x':
                        xdigs = hexdigits;
hex:                        _uquad = UARG();
                        base = HEX;
                        /* leading 0x/X only if non-zero */
                        if (flags & ALT && _uquad != 0)
                                flags |= HEXPREFIX;

                        /* unsigned conversions */
nosign:                        sign = '\0';
                        /*
                         * ``... diouXx conversions ... if a precision is
                         * specified, the 0 flag will be ignored.''
                         *        -- ANSI X3J11
                         */
number:                        if ((dprec = prec) >= 0)
                                flags &= ~ZEROPAD;

                        /*
                         * ``The result of converting a zero value with an
                         * explicit precision of zero is no characters.''
                         *        -- ANSI X3J11
                         */
                        cp = bf + KPRINTF_BUFSIZE;
                        if (_uquad != 0 || prec != 0) {
                                /*
                                 * Unsigned mod is hard, and unsigned mod
                                 * by a constant is easier than that by
                                 * a variable; hence this switch.
                                 */
                                switch (base) {
                                case OCT:
                                        do {
                                                *--cp = to_char(_uquad & 7);
                                                _uquad >>= 3;
                                        } while (_uquad);
                                        /* handle octal leading 0 */
                                        if (flags & ALT && *cp != '0')
                                                *--cp = '0';
                                        break;

                                case DEC:
                                        /* many numbers are 1 digit */
                                        while (_uquad >= 10) {
                                                *--cp = to_char(_uquad % 10);
                                                _uquad /= 10;
                                        }
                                        *--cp = to_char(_uquad);
                                        break;

                                case HEX:
                                        do {
                                                *--cp = xdigs[_uquad & 15];
                                                _uquad >>= 4;
                                        } while (_uquad);
                                        break;

                                default:
                                        /*XXXUNCONST*/
                                        cp = __UNCONST("bug in kprintf: bad base");
                                        size = strlen(cp);
                                        goto skipsize;
                                }
                        }
                        size = bf + KPRINTF_BUFSIZE - cp;
                skipsize:
                        break;
                default:        /* "%?" prints ?, unless ? is NUL */
                        if (ch == '\0')
                                goto done;
                        /* pretend it was %c with argument ch */
                        cp = bf;
                        *cp = ch;
                        size = 1;
                        sign = '\0';
                        break;
                }

                /*
                 * All reasonable formats wind up here.  At this point, `cp'
                 * points to a string which (if not flags&LADJUST) should be
                 * padded out to `width' places.  If flags&ZEROPAD, it should
                 * first be prefixed by any sign or other prefix; otherwise,
                 * it should be blank padded before the prefix is emitted.
                 * After any left-hand padding and prefixing, emit zeroes
                 * required by a decimal [diouxX] precision, then print the
                 * string proper, then emit zeroes required by any leftover
                 * floating precision; finally, if LADJUST, pad with blanks.
                 *
                 * Compute actual size, so we know how much to pad.
                 * size excludes decimal prec; realsz includes it.
                 */
                realsz = dprec > size ? dprec : size;
                if (sign)
                        realsz++;
                else if (flags & HEXPREFIX)
                        realsz+= 2;

                /* adjust ret */
                ret += width > realsz ? width : realsz;

                /* right-adjusting blank padding */
                if ((flags & (LADJUST|ZEROPAD)) == 0) {
                        n = width - realsz;
                        while (n-- > 0)
                                KPRINTF_PUTCHAR(' ');
                }

                /* prefix */
                if (sign) {
                        KPRINTF_PUTCHAR(sign);
                } else if (flags & HEXPREFIX) {
                        KPRINTF_PUTCHAR('0');
                        KPRINTF_PUTCHAR(ch);
                }

                /* right-adjusting zero padding */
                if ((flags & (LADJUST|ZEROPAD)) == ZEROPAD) {
                        n = width - realsz;
                        while (n-- > 0)
                                KPRINTF_PUTCHAR('0');
                }

                /* leading zeroes from decimal precision */
                n = dprec - size;
                while (n-- > 0)
                        KPRINTF_PUTCHAR('0');

                /* the string or number proper */
                for (; size--; cp++)
                        KPRINTF_PUTCHAR(*cp);
                /* left-adjusting padding (always blank) */
                if (flags & LADJUST) {
                        n = width - realsz;
                        while (n-- > 0)
                                KPRINTF_PUTCHAR(' ');
                }
        }

done:
        if ((oflags == TOBUFONLY) && (vp != NULL))
                *(char **)vp = sbuf;
        (*v_flush)();

#ifdef RND_PRINTF
        if (__predict_true(kprintf_inited))
                rnd_add_data(&rnd_printf_source, NULL, 0, 0);
#endif
        return ret;
}





































































































































































































    1 
    1 






    1 




























































































    1 



























































































































































    1 



    1 




    1 

    1 










    1 



















































    3 





    3 


    3 
















    3 








    1 
    1 
    1 











































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
/*        $NetBSD: if_llatbl.c,v 1.34 2022/05/24 20:50:20 andvar Exp $        */
/*
 * Copyright (c) 2004 Luigi Rizzo, Alessandro Cerri. All rights reserved.
 * Copyright (c) 2004-2008 Qing Li. All rights reserved.
 * Copyright (c) 2008 Kip Macy. All rights reserved.
 * Copyright (c) 2015 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
#include <sys/cdefs.h>

#ifdef _KERNEL_OPT
#include "opt_ddb.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_net_mpsafe.h"
#endif

#include "arp.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/rwlock.h>

#ifdef DDB
#include <ddb/ddb.h>
#endif

#include <netinet/in.h>
#include <net/if_llatbl.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/nd.h>
#include <net/route.h>
#include <netinet/if_inarp.h>
#include <netinet/in_var.h>
#include <netinet6/in6_var.h>

static SLIST_HEAD(, lltable) lltables;
krwlock_t lltable_rwlock;
static struct pool llentry_pool;

static void lltable_unlink(struct lltable *llt);
static void llentries_unlink(struct lltable *llt, struct llentries *head);

static void htable_unlink_entry(struct llentry *lle);
static void htable_link_entry(struct lltable *llt, struct llentry *lle);
static int htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f,
    void *farg);

int
lltable_dump_entry(struct lltable *llt, struct llentry *lle,
    struct rt_walkarg *w, struct sockaddr *sa)
{
#define RTF_LLINFO        0x400
#define RTF_CLONED        0x2000
        struct ifnet *ifp = llt->llt_ifp;
        int error;
        void *a;
        struct sockaddr_dl sdl;
        int size;
        struct rt_addrinfo info;

        memset(&info, 0, sizeof(info));
        info.rti_info[RTAX_DST] = sa;

        a = (lle->la_flags & LLE_VALID) == LLE_VALID ? &lle->ll_addr : NULL;
        if (sockaddr_dl_init(&sdl, sizeof(sdl), ifp->if_index, ifp->if_type,
            NULL, 0, a, ifp->if_addrlen) == NULL)
                return EINVAL;

        info.rti_info[RTAX_GATEWAY] = sstocsa(&sdl);
        if (sa->sa_family == AF_INET && lle->la_flags & LLE_PUB) {
                struct sockaddr_inarp *sin;
                sin = (struct sockaddr_inarp *)sa;
                sin->sin_other = SIN_PROXY;
        }
        if ((error = rt_msg3(RTM_GET, &info, 0, w, &size)))
                return error;
        if (w->w_where && w->w_tmem && w->w_needed <= 0) {
                struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;

                /* Need to copy by myself */
                rtm->rtm_index = ifp->if_index;
                rtm->rtm_rmx.rmx_mtu = 0;
                rtm->rtm_rmx.rmx_expire = (lle->la_flags & LLE_STATIC) ? 0 :
                    time_mono_to_wall(lle->la_expire);
                rtm->rtm_flags = RTF_UP;
                rtm->rtm_flags |= RTF_HOST; /* For ndp */
                /* For backward compatibility */
                rtm->rtm_flags |= RTF_LLINFO | RTF_CLONED;
                rtm->rtm_flags |= (lle->la_flags & LLE_STATIC) ? RTF_STATIC : 0;
                if (lle->la_flags & LLE_PUB)
                        rtm->rtm_flags |= RTF_ANNOUNCE;
                rtm->rtm_addrs = info.rti_addrs;
                if ((error = copyout(rtm, w->w_where, size)) != 0)
                        w->w_where = NULL;
                else
                        w->w_where = (char *)w->w_where + size;
        }

        return error;
#undef RTF_LLINFO
#undef RTF_CLONED
}

/*
 * Dump lle state for a specific address family.
 */
static int
lltable_dump_af(struct lltable *llt, struct rt_walkarg *w)
{
        int error;

        LLTABLE_LOCK_ASSERT();

        if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
                return (0);
        error = 0;

        IF_AFDATA_RLOCK(llt->llt_ifp);
        error = lltable_foreach_lle(llt,
            (llt_foreach_cb_t *)llt->llt_dump_entry, w);
        IF_AFDATA_RUNLOCK(llt->llt_ifp);

        return (error);
}

/*
 * Dump arp state for a specific address family.
 */
int
lltable_sysctl_dump(int af, struct rt_walkarg *w)
{
        struct lltable *llt;
        int error = 0;

        LLTABLE_RLOCK();
        SLIST_FOREACH(llt, &lltables, llt_link) {
                if (llt->llt_af == af) {
                        error = lltable_dump_af(llt, w);
                        if (error != 0)
                                goto done;
                }
        }
done:
        LLTABLE_RUNLOCK();
        return (error);
}

/*
 * Common function helpers for chained hash table.
 */

/*
 * Runs specified callback for each entry in @llt.
 * Caller does the locking.
 *
 */
static int
htable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg)
{
        struct llentry *lle, *next;
        int i, error;

        error = 0;

        for (i = 0; i < llt->llt_hsize; i++) {
                LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
                        error = f(llt, lle, farg);
                        if (error != 0)
                                break;
                }
        }

        return (error);
}

static void
htable_link_entry(struct lltable *llt, struct llentry *lle)
{
        struct llentries *lleh;
        uint32_t hashidx;

        if ((lle->la_flags & LLE_LINKED) != 0)
                return;

        IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp);

        hashidx = llt->llt_hash(lle, llt->llt_hsize);
        lleh = &llt->lle_head[hashidx];

        lle->lle_tbl  = llt;
        lle->lle_head = lleh;
        lle->la_flags |= LLE_LINKED;
        LIST_INSERT_HEAD(lleh, lle, lle_next);

        llt->llt_lle_count++;
}

static void
htable_unlink_entry(struct llentry *lle)
{

        if ((lle->la_flags & LLE_LINKED) != 0) {
                IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp);
                LIST_REMOVE(lle, lle_next);
                lle->la_flags &= ~(LLE_VALID | LLE_LINKED);
#if 0
                lle->lle_tbl = NULL;
                lle->lle_head = NULL;
#endif
                KASSERTMSG(lle->lle_tbl->llt_lle_count != 0,
                    "llt_lle_count=%u", lle->lle_tbl->llt_lle_count);
                lle->lle_tbl->llt_lle_count--;
        }
}

struct prefix_match_data {
        const struct sockaddr *prefix;
        const struct sockaddr *mask;
        struct llentries dchain;
        u_int flags;
};

static int
htable_prefix_free_cb(struct lltable *llt, struct llentry *lle, void *farg)
{
        struct prefix_match_data *pmd;

        pmd = (struct prefix_match_data *)farg;

        if (llt->llt_match_prefix(pmd->prefix, pmd->mask, pmd->flags, lle)) {
                LLE_WLOCK(lle);
                LIST_INSERT_HEAD(&pmd->dchain, lle, lle_chain);
        }

        return (0);
}

static void
htable_prefix_free(struct lltable *llt, const struct sockaddr *prefix,
    const struct sockaddr *mask, u_int flags)
{
        struct llentry *lle, *next;
        struct prefix_match_data pmd;

        memset(&pmd, 0, sizeof(pmd));
        pmd.prefix = prefix;
        pmd.mask = mask;
        pmd.flags = flags;
        LIST_INIT(&pmd.dchain);

        IF_AFDATA_WLOCK(llt->llt_ifp);
        /* Push matching lles to chain */
        lltable_foreach_lle(llt, htable_prefix_free_cb, &pmd);

        llentries_unlink(llt, &pmd.dchain);
        IF_AFDATA_WUNLOCK(llt->llt_ifp);

        LIST_FOREACH_SAFE(lle, &pmd.dchain, lle_chain, next)
                llt->llt_free_entry(llt, lle);
}

static void
htable_free_tbl(struct lltable *llt)
{

        free(llt->lle_head, M_LLTABLE);
        free(llt, M_LLTABLE);
}

static void
llentries_unlink(struct lltable *llt, struct llentries *head)
{
        struct llentry *lle, *next;

        LIST_FOREACH_SAFE(lle, head, lle_chain, next)
                llt->llt_unlink_entry(lle);
}

/*
 * Helper function used to drop all mbufs in hold queue.
 *
 * Returns the number of held packets, if any, that were dropped.
 */
size_t
lltable_drop_entry_queue(struct llentry *lle)
{
        size_t pkts_dropped;
        struct mbuf *next;

        LLE_WLOCK_ASSERT(lle);

        pkts_dropped = 0;
        while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) {
                next = lle->la_hold->m_nextpkt;
                m_freem(lle->la_hold);
                lle->la_hold = next;
                lle->la_numheld--;
                pkts_dropped++;
        }

        KASSERTMSG(lle->la_numheld == 0,
                "la_numheld %d > 0, pkts_dropped %zd",
                 lle->la_numheld, pkts_dropped);

        return (pkts_dropped);
}

struct llentry *
llentry_pool_get(int flags)
{
        struct llentry *lle;

        lle = pool_get(&llentry_pool, flags);
        if (lle != NULL)
                memset(lle, 0, sizeof(*lle));
        return lle;
}

void
llentry_pool_put(struct llentry *lle)
{

        pool_put(&llentry_pool, lle);
}

/*
 * Deletes an address from the address table.
 * This function is called by the timer functions
 * such as arptimer() and nd6_llinfo_timer(), and
 * the caller does the locking.
 *
 * Returns the number of held packets, if any, that were dropped.
 */
size_t
llentry_free(struct llentry *lle)
{
        struct lltable *llt;
        size_t pkts_dropped;

        LLE_WLOCK_ASSERT(lle);

        lle->la_flags |= LLE_DELETED;

        if ((lle->la_flags & LLE_LINKED) != 0) {
                llt = lle->lle_tbl;

                IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp);
                llt->llt_unlink_entry(lle);
        }

        /*
         * Stop a pending callout if one exists.  If we cancel one, we have to
         * remove a reference to avoid a leak.  callout_pending is required to
         * to exclude the case that the callout has never been scheduled.
         */
        /* XXX once softnet_lock goes away, we should use callout_halt */
        if (callout_pending(&lle->la_timer)) {
                bool expired = callout_stop(&lle->la_timer);
                if (!expired)
                        LLE_REMREF(lle);
        }

        pkts_dropped = lltable_drop_entry_queue(lle);

        LLE_FREE_LOCKED(lle);

        return (pkts_dropped);
}

/*
 * (al)locate an llentry for address dst (equivalent to rtalloc for new-arp).
 *
 * If found the llentry * is returned referenced and unlocked.
 */
struct llentry *
llentry_alloc(struct ifnet *ifp, struct lltable *lt,
    struct sockaddr_storage *dst)
{
        struct llentry *la;

        IF_AFDATA_RLOCK(ifp);
        la = lla_lookup(lt, LLE_EXCLUSIVE, (struct sockaddr *)dst);
        IF_AFDATA_RUNLOCK(ifp);
        if ((la == NULL) && (ifp->if_flags & IFF_NOARP) == 0) {
                IF_AFDATA_WLOCK(ifp);
                la = lla_create(lt, 0, (struct sockaddr *)dst, NULL /* XXX */);
                IF_AFDATA_WUNLOCK(ifp);
        }

        if (la != NULL) {
                LLE_ADDREF(la);
                LLE_WUNLOCK(la);
        }

        return (la);
}

/*
 * Free all entries from given table and free itself.
 */

static int
lltable_free_cb(struct lltable *llt, struct llentry *lle, void *farg)
{
        struct llentries *dchain;

        dchain = (struct llentries *)farg;

        LLE_WLOCK(lle);
        LIST_INSERT_HEAD(dchain, lle, lle_chain);

        return (0);
}

/*
 * Free all entries from given table.
 */
void
lltable_purge_entries(struct lltable *llt)
{
        struct llentry *lle, *next;
        struct llentries dchain;

        KASSERTMSG(llt != NULL, "llt is NULL");

        LIST_INIT(&dchain);
        IF_AFDATA_WLOCK(llt->llt_ifp);
        /* Push all lles to @dchain */
        lltable_foreach_lle(llt, lltable_free_cb, &dchain);
        llentries_unlink(llt, &dchain);
        IF_AFDATA_WUNLOCK(llt->llt_ifp);

        LIST_FOREACH_SAFE(lle, &dchain, lle_chain, next)
                (void)llentry_free(lle);
}

/*
 * Free all entries from given table and free itself.
 */
void
lltable_free(struct lltable *llt)
{

        KASSERTMSG(llt != NULL, "llt is NULL");

        lltable_unlink(llt);
        lltable_purge_entries(llt);
        llt->llt_free_tbl(llt);
}

void
lltable_drain(int af)
{
        struct lltable        *llt;
        struct llentry        *lle;
        register int i;

        LLTABLE_RLOCK();
        SLIST_FOREACH(llt, &lltables, llt_link) {
                if (llt->llt_af != af)
                        continue;

                for (i=0; i < llt->llt_hsize; i++) {
                        LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
                                LLE_WLOCK(lle);
                                lltable_drop_entry_queue(lle);
                                LLE_WUNLOCK(lle);
                        }
                }
        }
        LLTABLE_RUNLOCK();
}

void
lltable_prefix_free(const int af, const struct sockaddr *prefix,
    const struct sockaddr *mask, const u_int flags)
{
        struct lltable *llt;

        LLTABLE_RLOCK();
        SLIST_FOREACH(llt, &lltables, llt_link) {
                if (llt->llt_af != af)
                        continue;

                llt->llt_prefix_free(llt, prefix, mask, flags);
        }
        LLTABLE_RUNLOCK();
}

struct lltable *
lltable_allocate_htbl(uint32_t hsize)
{
        struct lltable *llt;
        int i;

        llt = malloc(sizeof(struct lltable), M_LLTABLE, M_WAITOK | M_ZERO);
        llt->llt_hsize = hsize;
        llt->lle_head = malloc(sizeof(struct llentries) * hsize,
            M_LLTABLE, M_WAITOK | M_ZERO);

        for (i = 0; i < llt->llt_hsize; i++)
                LIST_INIT(&llt->lle_head[i]);

        /* Set some default callbacks */
        llt->llt_link_entry = htable_link_entry;
        llt->llt_unlink_entry = htable_unlink_entry;
        llt->llt_prefix_free = htable_prefix_free;
        llt->llt_foreach_entry = htable_foreach_lle;

        llt->llt_free_tbl = htable_free_tbl;

        return (llt);
}

/*
 * Links lltable to global llt list.
 */
void
lltable_link(struct lltable *llt)
{

        LLTABLE_WLOCK();
        SLIST_INSERT_HEAD(&lltables, llt, llt_link);
        LLTABLE_WUNLOCK();
}

static void
lltable_unlink(struct lltable *llt)
{

        LLTABLE_WLOCK();
        SLIST_REMOVE(&lltables, llt, lltable, llt_link);
        LLTABLE_WUNLOCK();

}

/*
 * External methods used by lltable consumers
 */

int
lltable_foreach_lle(struct lltable *llt, llt_foreach_cb_t *f, void *farg)
{

        return (llt->llt_foreach_entry(llt, f, farg));
}

void
lltable_link_entry(struct lltable *llt, struct llentry *lle)
{

        llt->llt_link_entry(llt, lle);
}

void
lltable_unlink_entry(struct lltable *llt, struct llentry *lle)
{

        llt->llt_unlink_entry(lle);
}

void
lltable_free_entry(struct lltable *llt, struct llentry *lle)
{

        llt->llt_free_entry(llt, lle);
}

void
lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
{
        struct lltable *llt;

        llt = lle->lle_tbl;
        llt->llt_fill_sa_entry(lle, sa);
}

struct ifnet *
lltable_get_ifp(const struct lltable *llt)
{

        return (llt->llt_ifp);
}

int
lltable_get_af(const struct lltable *llt)
{

        return (llt->llt_af);
}

/*
 * Called in route_output when rtm_flags contains RTF_LLDATA.
 */
int
lla_rt_output(const u_char rtm_type, const int rtm_flags, const time_t rtm_expire,
    struct rt_addrinfo *info, int sdl_index)
{
        const struct sockaddr_dl *dl = satocsdl(info->rti_info[RTAX_GATEWAY]);
        const struct sockaddr *dst = info->rti_info[RTAX_DST];
        struct ifnet *ifp;
        struct lltable *llt;
        struct llentry *lle;
        u_int laflags;
        int error;
        struct psref psref;
        int bound;

        KASSERTMSG(dl != NULL && dl->sdl_family == AF_LINK, "invalid dl");

        bound = curlwp_bind();
        if (sdl_index != 0)
                ifp = if_get_byindex(sdl_index, &psref);
        else
                ifp = if_get_byindex(dl->sdl_index, &psref);
        if (ifp == NULL) {
                curlwp_bindx(bound);
                log(LOG_INFO, "%s: invalid ifp (sdl_index %d)\n",
                    __func__, sdl_index != 0 ? sdl_index : dl->sdl_index);
                return EINVAL;
        }

        /* XXX linked list may be too expensive */
        LLTABLE_RLOCK();
        SLIST_FOREACH(llt, &lltables, llt_link) {
                if (llt->llt_af == dst->sa_family &&
                    llt->llt_ifp == ifp)
                        break;
        }
        LLTABLE_RUNLOCK();
        KASSERTMSG(llt != NULL, "Yep, ugly hacks are bad");

        error = 0;

        switch (rtm_type) {
        case RTM_ADD: {
                struct rtentry *rt;

                /* Never call rtalloc1 with IF_AFDATA_WLOCK */
                rt = rtalloc1(dst, 0);

                /* Add static LLE */
                IF_AFDATA_WLOCK(ifp);
                lle = lla_lookup(llt, LLE_EXCLUSIVE, dst);

                /* Cannot overwrite an existing static entry */
                if (lle != NULL &&
                    (lle->la_flags & LLE_STATIC || lle->la_expire == 0)) {
                        LLE_RUNLOCK(lle);
                        IF_AFDATA_WUNLOCK(ifp);
                        if (rt != NULL)
                                rt_unref(rt);
                        error = EEXIST;
                        goto out;
                }

                /*
                 * We can't overwrite an existing entry to avoid race
                 * conditions so remove it first.
                 */
                if (lle != NULL) {
#if defined(INET) && NARP > 0
                        size_t pkts_dropped = llentry_free(lle);
                        if (dst->sa_family == AF_INET) {
                                arp_stat_add(ARP_STAT_DFRDROPPED,
                                    (uint64_t)pkts_dropped);
                        }
#else
                        (void) llentry_free(lle);
#endif
                }

                lle = lla_create(llt, 0, dst, rt);
                if (lle == NULL) {
                        IF_AFDATA_WUNLOCK(ifp);
                        if (rt != NULL)
                                rt_unref(rt);
                        error = ENOMEM;
                        goto out;
                }

                KASSERT(ifp->if_addrlen <= sizeof(lle->ll_addr));
                memcpy(&lle->ll_addr, CLLADDR(dl), ifp->if_addrlen);
                if ((rtm_flags & RTF_ANNOUNCE))
                        lle->la_flags |= LLE_PUB;
                lle->la_flags |= LLE_VALID;
                switch (dst->sa_family) {
#ifdef INET
                case AF_INET:
                        lle->ln_state = ND_LLINFO_REACHABLE;
                        break;
#endif
#ifdef INET6
                case AF_INET6:
                        lle->ln_state = ND_LLINFO_REACHABLE;
                        break;
#endif
                }

                /*
                 * NB: arp and ndp always set (RTF_STATIC | RTF_HOST)
                 */

                if (rtm_expire == 0) {
                        lle->la_flags |= LLE_STATIC;
                        lle->la_expire = 0;
                } else
                        lle->la_expire = rtm_expire;
                laflags = lle->la_flags;
                LLE_WUNLOCK(lle);
                IF_AFDATA_WUNLOCK(ifp);
                if (rt != NULL)
                        rt_unref(rt);
#if defined(INET) && NARP > 0
                /* gratuitous ARP */
                if ((laflags & LLE_PUB) && dst->sa_family == AF_INET) {
                        const struct sockaddr_in *sin;
                        struct in_ifaddr *ia;
                        struct psref _psref;

                        sin = satocsin(dst);
                        ia = in_get_ia_on_iface_psref(sin->sin_addr,
                            ifp, &_psref);
                        if (ia != NULL) {
                                arpannounce(ifp, &ia->ia_ifa, CLLADDR(dl));
                                ia4_release(ia, &_psref);
                        }
                }
#else
                (void)laflags;
#endif
                break;
            }

        case RTM_DELETE:
                IF_AFDATA_WLOCK(ifp);
                error = lla_delete(llt, 0, dst);
                IF_AFDATA_WUNLOCK(ifp);
                error = (error == 0 ? 0 : ENOENT);
                break;

        default:
                error = EINVAL;
        }

out:
        if_put(ifp, &psref);
        curlwp_bindx(bound);
        return (error);
}

void
lltableinit(void)
{

        SLIST_INIT(&lltables);
        rw_init(&lltable_rwlock);

        pool_init(&llentry_pool, sizeof(struct llentry), 0, 0, 0, "llentrypl",
            NULL, IPL_SOFTNET);
}

#ifdef __FreeBSD__
#ifdef DDB
struct llentry_sa {
        struct llentry                base;
        struct sockaddr                l3_addr;
};

static void
llatbl_lle_show(struct llentry_sa *la)
{
        struct llentry *lle;
        uint8_t octet[6];

        lle = &la->base;
        db_printf("lle=%p\n", lle);
        db_printf(" lle_next=%p\n", lle->lle_next.le_next);
        db_printf(" lle_lock=%p\n", &lle->lle_lock);
        db_printf(" lle_tbl=%p\n", lle->lle_tbl);
        db_printf(" lle_head=%p\n", lle->lle_head);
        db_printf(" la_hold=%p\n", lle->la_hold);
        db_printf(" la_numheld=%d\n", lle->la_numheld);
        db_printf(" la_expire=%ju\n", (uintmax_t)lle->la_expire);
        db_printf(" la_flags=0x%04x\n", lle->la_flags);
        db_printf(" la_asked=%u\n", lle->la_asked);
        db_printf(" la_preempt=%u\n", lle->la_preempt);
        db_printf(" ln_byhint=%u\n", lle->ln_byhint);
        db_printf(" ln_state=%d\n", lle->ln_state);
        db_printf(" ln_router=%u\n", lle->ln_router);
        db_printf(" ln_ntick=%ju\n", (uintmax_t)lle->ln_ntick);
        db_printf(" lle_refcnt=%d\n", lle->lle_refcnt);
        memcopy(octet, &lle->ll_addr.mac16, sizeof(octet));
        db_printf(" ll_addr=%02x:%02x:%02x:%02x:%02x:%02x\n",
            octet[0], octet[1], octet[2], octet[3], octet[4], octet[5]);
        db_printf(" lle_timer=%p\n", &lle->lle_timer);

        switch (la->l3_addr.sa_family) {
#ifdef INET
        case AF_INET:
        {
                struct sockaddr_in *sin;
                char l3s[INET_ADDRSTRLEN];

                sin = (struct sockaddr_in *)&la->l3_addr;
                inet_ntoa_r(sin->sin_addr, l3s);
                db_printf(" l3_addr=%s\n", l3s);
                break;
        }
#endif
#ifdef INET6
        case AF_INET6:
        {
                struct sockaddr_in6 *sin6;
                char l3s[INET6_ADDRSTRLEN];

                sin6 = (struct sockaddr_in6 *)&la->l3_addr;
                IN6_PRINT(l3s, &sin6->sin6_addr);
                db_printf(" l3_addr=%s\n", l3s);
                break;
        }
#endif
        default:
                db_printf(" l3_addr=N/A (af=%d)\n", la->l3_addr.sa_family);
                break;
        }
}

DB_SHOW_COMMAND(llentry, db_show_llentry)
{

        if (!have_addr) {
                db_printf("usage: show llentry <struct llentry *>\n");
                return;
        }

        llatbl_lle_show((struct llentry_sa *)addr);
}

static void
llatbl_llt_show(struct lltable *llt)
{
        int i;
        struct llentry *lle;

        db_printf("llt=%p llt_af=%d llt_ifp=%p\n",
            llt, llt->llt_af, llt->llt_ifp);

        for (i = 0; i < llt->llt_hsize; i++) {
                LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {

                        llatbl_lle_show((struct llentry_sa *)lle);
                        if (db_pager_quit)
                                return;
                }
        }
}

DB_SHOW_COMMAND(lltable, db_show_lltable)
{

        if (!have_addr) {
                db_printf("usage: show lltable <struct lltable *>\n");
                return;
        }

        llatbl_llt_show((struct lltable *)addr);
}

DB_SHOW_ALL_COMMAND(lltables, db_show_all_lltables)
{
        VNET_ITERATOR_DECL(vnet_iter);
        struct lltable *llt;

        VNET_FOREACH(vnet_iter) {
                CURVNET_SET_QUIET(vnet_iter);
#ifdef VIMAGE
                db_printf("vnet=%p\n", curvnet);
#endif
                SLIST_FOREACH(llt, &lltables, llt_link) {
                        db_printf("llt=%p llt_af=%d llt_ifp=%p(%s)\n",
                            llt, llt->llt_af, llt->llt_ifp,
                            (llt->llt_ifp != NULL) ?
                                llt->llt_ifp->if_xname : "?");
                        if (have_addr && addr != 0) /* verbose */
                                llatbl_llt_show(llt);
                        if (db_pager_quit) {
                                CURVNET_RESTORE();
                                return;
                        }
                }
                CURVNET_RESTORE();
        }
}
#endif /* DDB */
#endif /* __FreeBSD__ */































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   24 








   24 


   24 


   24 



   24 




   24 

   24 


   24 





   24 



   24 



















   24 





   24 









   23 

















   24 
   24 


















   24 













































































   10 
   10 
   10 





   10 































































































































































































































































































































































































































































































































































































































































































































































































































   10 













































   10 

































































   10 


   10 


















































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
/*        $NetBSD: if_vioif.c,v 1.81 2022/05/04 02:38:27 simonb Exp $        */

/*
 * Copyright (c) 2020 The NetBSD Foundation, Inc.
 * Copyright (c) 2010 Minoura Makoto.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_vioif.c,v 1.81 2022/05/04 02:38:27 simonb Exp $");

#ifdef _KERNEL_OPT
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/atomic.h>
#include <sys/bus.h>
#include <sys/condvar.h>
#include <sys/device.h>
#include <sys/evcnt.h>
#include <sys/intr.h>
#include <sys/kmem.h>
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/sockio.h>
#include <sys/syslog.h>
#include <sys/cpu.h>
#include <sys/module.h>
#include <sys/pcq.h>
#include <sys/workqueue.h>

#include <dev/pci/virtioreg.h>
#include <dev/pci/virtiovar.h>

#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_media.h>
#include <net/if_ether.h>

#include <net/bpf.h>

#include "ioconf.h"

#ifdef NET_MPSAFE
#define VIOIF_MPSAFE        1
#define VIOIF_MULTIQ        1
#endif

/*
 * if_vioifreg.h:
 */
/* Configuration registers */
#define VIRTIO_NET_CONFIG_MAC                 0 /* 8bit x 6byte */
#define VIRTIO_NET_CONFIG_STATUS         6 /* 16bit */
#define VIRTIO_NET_CONFIG_MAX_VQ_PAIRS         8 /* 16bit */
#define VIRTIO_NET_CONFIG_MTU                10 /* 16bit */

/* Feature bits */
#define VIRTIO_NET_F_CSUM                __BIT(0)
#define VIRTIO_NET_F_GUEST_CSUM                __BIT(1)
#define VIRTIO_NET_F_MAC                __BIT(5)
#define VIRTIO_NET_F_GSO                __BIT(6)
#define VIRTIO_NET_F_GUEST_TSO4                __BIT(7)
#define VIRTIO_NET_F_GUEST_TSO6                __BIT(8)
#define VIRTIO_NET_F_GUEST_ECN                __BIT(9)
#define VIRTIO_NET_F_GUEST_UFO                __BIT(10)
#define VIRTIO_NET_F_HOST_TSO4                __BIT(11)
#define VIRTIO_NET_F_HOST_TSO6                __BIT(12)
#define VIRTIO_NET_F_HOST_ECN                __BIT(13)
#define VIRTIO_NET_F_HOST_UFO                __BIT(14)
#define VIRTIO_NET_F_MRG_RXBUF                __BIT(15)
#define VIRTIO_NET_F_STATUS                __BIT(16)
#define VIRTIO_NET_F_CTRL_VQ                __BIT(17)
#define VIRTIO_NET_F_CTRL_RX                __BIT(18)
#define VIRTIO_NET_F_CTRL_VLAN                __BIT(19)
#define VIRTIO_NET_F_CTRL_RX_EXTRA        __BIT(20)
#define VIRTIO_NET_F_GUEST_ANNOUNCE        __BIT(21)
#define VIRTIO_NET_F_MQ                        __BIT(22)
#define VIRTIO_NET_F_CTRL_MAC_ADDR         __BIT(23)

#define VIRTIO_NET_FLAG_BITS                        \
        VIRTIO_COMMON_FLAG_BITS                        \
        "b\x17" "CTRL_MAC\0"                        \
        "b\x16" "MQ\0"                                \
        "b\x15" "GUEST_ANNOUNCE\0"                \
        "b\x14" "CTRL_RX_EXTRA\0"                \
        "b\x13" "CTRL_VLAN\0"                        \
        "b\x12" "CTRL_RX\0"                        \
        "b\x11" "CTRL_VQ\0"                        \
        "b\x10" "STATUS\0"                        \
        "b\x0f" "MRG_RXBUF\0"                        \
        "b\x0e" "HOST_UFO\0"                        \
        "b\x0d" "HOST_ECN\0"                        \
        "b\x0c" "HOST_TSO6\0"                        \
        "b\x0b" "HOST_TSO4\0"                        \
        "b\x0a" "GUEST_UFO\0"                        \
        "b\x09" "GUEST_ECN\0"                        \
        "b\x08" "GUEST_TSO6\0"                        \
        "b\x07" "GUEST_TSO4\0"                        \
        "b\x06" "GSO\0"                                \
        "b\x05" "MAC\0"                                \
        "b\x01" "GUEST_CSUM\0"                        \
        "b\x00" "CSUM\0"

/* Status */
#define VIRTIO_NET_S_LINK_UP        1

/* Packet header structure */
struct virtio_net_hdr {
        uint8_t                flags;
        uint8_t                gso_type;
        uint16_t        hdr_len;
        uint16_t        gso_size;
        uint16_t        csum_start;
        uint16_t        csum_offset;

        uint16_t        num_buffers; /* VIRTIO_NET_F_MRG_RXBUF enabled or v1 */
} __packed;

#define VIRTIO_NET_HDR_F_NEEDS_CSUM        1 /* flags */
#define VIRTIO_NET_HDR_GSO_NONE                0 /* gso_type */
#define VIRTIO_NET_HDR_GSO_TCPV4        1 /* gso_type */
#define VIRTIO_NET_HDR_GSO_UDP                3 /* gso_type */
#define VIRTIO_NET_HDR_GSO_TCPV6        4 /* gso_type */
#define VIRTIO_NET_HDR_GSO_ECN                0x80 /* gso_type, |'ed */

#define VIRTIO_NET_MAX_GSO_LEN                (65536+ETHER_HDR_LEN)

/* Control virtqueue */
struct virtio_net_ctrl_cmd {
        uint8_t        class;
        uint8_t        command;
} __packed;
#define VIRTIO_NET_CTRL_RX                0
# define VIRTIO_NET_CTRL_RX_PROMISC        0
# define VIRTIO_NET_CTRL_RX_ALLMULTI        1

#define VIRTIO_NET_CTRL_MAC                1
# define VIRTIO_NET_CTRL_MAC_TABLE_SET        0
# define  VIRTIO_NET_CTRL_MAC_ADDR_SET        1

#define VIRTIO_NET_CTRL_VLAN                2
# define VIRTIO_NET_CTRL_VLAN_ADD        0
# define VIRTIO_NET_CTRL_VLAN_DEL        1

#define VIRTIO_NET_CTRL_MQ                        4
# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET        0
# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN        1
# define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX        0x8000

struct virtio_net_ctrl_status {
        uint8_t        ack;
} __packed;
#define VIRTIO_NET_OK                        0
#define VIRTIO_NET_ERR                        1

struct virtio_net_ctrl_rx {
        uint8_t        onoff;
} __packed;

struct virtio_net_ctrl_mac_tbl {
        uint32_t nentries;
        uint8_t macs[][ETHER_ADDR_LEN];
} __packed;

struct virtio_net_ctrl_mac_addr {
        uint8_t mac[ETHER_ADDR_LEN];
} __packed;

struct virtio_net_ctrl_vlan {
        uint16_t id;
} __packed;

struct virtio_net_ctrl_mq {
        uint16_t virtqueue_pairs;
} __packed;

/*
 * if_vioifvar.h:
 */

/*
 * Locking notes:
 * + a field in vioif_txqueue is protected by txq_lock (a spin mutex), and
 *   a field in vioif_rxqueue is protected by rxq_lock (a spin mutex).
 *      - more than one lock cannot be held at onece
 * + ctrlq_inuse is protected by ctrlq_wait_lock.
 *      - other fields in vioif_ctrlqueue are protected by ctrlq_inuse
 *      - txq_lock or rxq_lock cannot be held along with ctrlq_wait_lock
 * + fields in vioif_softc except queues are protected by
 *   sc->sc_lock(an adaptive mutex)
 *      - the lock is held before acquisition of other locks
 */

struct vioif_ctrl_cmdspec {
        bus_dmamap_t        dmamap;
        void                *buf;
        bus_size_t        bufsize;
};

struct vioif_work {
        struct work         cookie;
        void                (*func)(void *);
        void                *arg;
        unsigned int         added;
};

struct vioif_txqueue {
        kmutex_t                *txq_lock;        /* lock for tx operations */

        struct virtqueue        *txq_vq;
        bool                        txq_stopping;
        bool                        txq_link_active;
        pcq_t                        *txq_intrq;

        struct virtio_net_hdr        *txq_hdrs;
        bus_dmamap_t                *txq_hdr_dmamaps;

        struct mbuf                **txq_mbufs;
        bus_dmamap_t                *txq_dmamaps;

        void                        *txq_deferred_transmit;
        void                        *txq_handle_si;
        struct vioif_work         txq_work;
        bool                         txq_workqueue;
        bool                         txq_active;

        char                         txq_evgroup[16];
        struct evcnt                 txq_defrag_failed;
        struct evcnt                 txq_mbuf_load_failed;
        struct evcnt                 txq_enqueue_reserve_failed;
};

struct vioif_rxqueue {
        kmutex_t                *rxq_lock;        /* lock for rx operations */

        struct virtqueue        *rxq_vq;
        bool                        rxq_stopping;

        struct virtio_net_hdr        *rxq_hdrs;
        bus_dmamap_t                *rxq_hdr_dmamaps;

        struct mbuf                **rxq_mbufs;
        bus_dmamap_t                *rxq_dmamaps;

        void                        *rxq_handle_si;
        struct vioif_work         rxq_work;
        bool                         rxq_workqueue;
        bool                         rxq_active;

        char                         rxq_evgroup[16];
        struct evcnt                 rxq_mbuf_add_failed;
};

struct vioif_ctrlqueue {
        struct virtqueue                *ctrlq_vq;
        enum {
                FREE, INUSE, DONE
        }                                ctrlq_inuse;
        kcondvar_t                        ctrlq_wait;
        kmutex_t                        ctrlq_wait_lock;
        struct lwp                        *ctrlq_owner;

        struct virtio_net_ctrl_cmd        *ctrlq_cmd;
        struct virtio_net_ctrl_status        *ctrlq_status;
        struct virtio_net_ctrl_rx        *ctrlq_rx;
        struct virtio_net_ctrl_mac_tbl        *ctrlq_mac_tbl_uc;
        struct virtio_net_ctrl_mac_tbl        *ctrlq_mac_tbl_mc;
        struct virtio_net_ctrl_mac_addr        *ctrlq_mac_addr;
        struct virtio_net_ctrl_mq        *ctrlq_mq;

        bus_dmamap_t                        ctrlq_cmd_dmamap;
        bus_dmamap_t                        ctrlq_status_dmamap;
        bus_dmamap_t                        ctrlq_rx_dmamap;
        bus_dmamap_t                        ctrlq_tbl_uc_dmamap;
        bus_dmamap_t                        ctrlq_tbl_mc_dmamap;
        bus_dmamap_t                        ctrlq_mac_addr_dmamap;
        bus_dmamap_t                        ctrlq_mq_dmamap;

        struct evcnt                        ctrlq_cmd_load_failed;
        struct evcnt                        ctrlq_cmd_failed;
};

struct vioif_softc {
        device_t                sc_dev;
        kmutex_t                sc_lock;
        struct sysctllog        *sc_sysctllog;

        struct virtio_softc        *sc_virtio;
        struct virtqueue        *sc_vqs;
        u_int                         sc_hdr_size;

        int                        sc_max_nvq_pairs;
        int                        sc_req_nvq_pairs;
        int                        sc_act_nvq_pairs;

        uint8_t                        sc_mac[ETHER_ADDR_LEN];
        struct ethercom                sc_ethercom;
        bool                        sc_link_active;

        struct vioif_txqueue        *sc_txq;
        struct vioif_rxqueue        *sc_rxq;

        bool                        sc_has_ctrl;
        struct vioif_ctrlqueue        sc_ctrlq;

        bus_dma_segment_t        sc_hdr_segs[1];
        void                        *sc_dmamem;
        void                        *sc_kmem;

        void                        *sc_ctl_softint;

        struct workqueue        *sc_txrx_workqueue;
        bool                         sc_txrx_workqueue_sysctl;
        u_int                         sc_tx_intr_process_limit;
        u_int                         sc_tx_process_limit;
        u_int                         sc_rx_intr_process_limit;
        u_int                         sc_rx_process_limit;
};
#define VIRTIO_NET_TX_MAXNSEGS                (16) /* XXX */
#define VIRTIO_NET_CTRL_MAC_MAXENTRIES        (64) /* XXX */

#define VIOIF_TX_INTR_PROCESS_LIMIT        256
#define VIOIF_TX_PROCESS_LIMIT                256
#define VIOIF_RX_INTR_PROCESS_LIMIT        0U
#define VIOIF_RX_PROCESS_LIMIT                256

#define VIOIF_WORKQUEUE_PRI                PRI_SOFTNET

/* cfattach interface functions */
static int        vioif_match(device_t, cfdata_t, void *);
static void        vioif_attach(device_t, device_t, void *);
static int        vioif_finalize_teardown(device_t);

/* ifnet interface functions */
static int        vioif_init(struct ifnet *);
static void        vioif_stop(struct ifnet *, int);
static void        vioif_start(struct ifnet *);
static void        vioif_start_locked(struct ifnet *, struct vioif_txqueue *);
static int        vioif_transmit(struct ifnet *, struct mbuf *);
static void        vioif_transmit_locked(struct ifnet *, struct vioif_txqueue *);
static int        vioif_ioctl(struct ifnet *, u_long, void *);
static void        vioif_watchdog(struct ifnet *);
static int        vioif_ifflags_cb(struct ethercom *);

/* rx */
static int        vioif_add_rx_mbuf(struct vioif_rxqueue *, int);
static void        vioif_free_rx_mbuf(struct vioif_rxqueue *, int);
static void        vioif_populate_rx_mbufs_locked(struct vioif_softc *,
                    struct vioif_rxqueue *);
static void        vioif_rx_queue_clear(struct vioif_rxqueue *);
static bool        vioif_rx_deq_locked(struct vioif_softc *, struct virtio_softc *,
                    struct vioif_rxqueue *, u_int);
static int        vioif_rx_intr(void *);
static void        vioif_rx_handle(void *);
static void        vioif_rx_sched_handle(struct vioif_softc *,
                    struct vioif_rxqueue *);
static void        vioif_rx_drain(struct vioif_rxqueue *);

/* tx */
static int        vioif_tx_intr(void *);
static void        vioif_tx_handle(void *);
static void        vioif_tx_sched_handle(struct vioif_softc *,
                    struct vioif_txqueue *);
static void        vioif_tx_queue_clear(struct vioif_txqueue *);
static bool        vioif_tx_deq_locked(struct vioif_softc *, struct virtio_softc *,
                    struct vioif_txqueue *, u_int);
static void        vioif_tx_drain(struct vioif_txqueue *);
static void        vioif_deferred_transmit(void *);

/* workqueue */
static struct workqueue*
                vioif_workq_create(const char *, pri_t, int, int);
static void        vioif_workq_destroy(struct workqueue *);
static void        vioif_workq_work(struct work *, void *);
static void        vioif_work_set(struct vioif_work *, void(*)(void *), void *);
static void        vioif_work_add(struct workqueue *, struct vioif_work *);
static void        vioif_work_wait(struct workqueue *, struct vioif_work *);

/* other control */
static bool        vioif_is_link_up(struct vioif_softc *);
static void        vioif_update_link_status(struct vioif_softc *);
static int        vioif_ctrl_rx(struct vioif_softc *, int, bool);
static int        vioif_set_promisc(struct vioif_softc *, bool);
static int        vioif_set_allmulti(struct vioif_softc *, bool);
static int        vioif_set_rx_filter(struct vioif_softc *);
static int        vioif_rx_filter(struct vioif_softc *);
static int        vioif_set_mac_addr(struct vioif_softc *);
static int        vioif_ctrl_intr(void *);
static int        vioif_config_change(struct virtio_softc *);
static void        vioif_ctl_softint(void *);
static int        vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *, int);
static void        vioif_enable_interrupt_vqpairs(struct vioif_softc *);
static void        vioif_disable_interrupt_vqpairs(struct vioif_softc *);
static int        vioif_setup_sysctl(struct vioif_softc *);
static void        vioif_setup_stats(struct vioif_softc *);
static int        vioif_ifflags(struct vioif_softc *);

CFATTACH_DECL_NEW(vioif, sizeof(struct vioif_softc),
                  vioif_match, vioif_attach, NULL, NULL);

static int
vioif_match(device_t parent, cfdata_t match, void *aux)
{
        struct virtio_attach_args *va = aux;

        if (va->sc_childdevid == VIRTIO_DEVICE_ID_NETWORK)
                return 1;

        return 0;
}

static int
vioif_dmamap_create(struct vioif_softc *sc, bus_dmamap_t *map,
    bus_size_t size, int nsegs, const char *usage)
{
        int r;

        r = bus_dmamap_create(virtio_dmat(sc->sc_virtio), size,
            nsegs, size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, map);

        if (r != 0) {
                aprint_error_dev(sc->sc_dev, "%s dmamap creation failed, "
                    "error code %d\n", usage, r);
        }

        return r;
}

static void
vioif_dmamap_destroy(struct vioif_softc *sc, bus_dmamap_t *map)
{

        if (*map) {
                bus_dmamap_destroy(virtio_dmat(sc->sc_virtio), *map);
                *map = NULL;
        }
}

static int
vioif_dmamap_create_load(struct vioif_softc *sc, bus_dmamap_t *map,
    void *buf, bus_size_t size, int nsegs, int rw, const char *usage)
{
        int r;

        r = vioif_dmamap_create(sc, map, size, nsegs, usage);
        if (r != 0)
                return 1;

        r = bus_dmamap_load(virtio_dmat(sc->sc_virtio), *map, buf,
            size, NULL, rw | BUS_DMA_NOWAIT);
        if (r != 0) {
                vioif_dmamap_destroy(sc, map);
                aprint_error_dev(sc->sc_dev, "%s dmamap load failed. "
                    "error code %d\n", usage, r);
        }

        return r;
}

static void *
vioif_assign_mem(intptr_t *p, size_t size)
{
        intptr_t rv;

        rv = *p;
        *p += size;

        return (void *)rv;
}

static void
vioif_alloc_queues(struct vioif_softc *sc)
{
        int nvq_pairs = sc->sc_max_nvq_pairs;
        int nvqs = nvq_pairs * 2;
        int i;

        KASSERT(nvq_pairs <= VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX);

        sc->sc_rxq = kmem_zalloc(sizeof(sc->sc_rxq[0]) * nvq_pairs,
            KM_SLEEP);
        sc->sc_txq = kmem_zalloc(sizeof(sc->sc_txq[0]) * nvq_pairs,
            KM_SLEEP);

        if (sc->sc_has_ctrl)
                nvqs++;

        sc->sc_vqs = kmem_zalloc(sizeof(sc->sc_vqs[0]) * nvqs, KM_SLEEP);
        nvqs = 0;
        for (i = 0; i < nvq_pairs; i++) {
                sc->sc_rxq[i].rxq_vq = &sc->sc_vqs[nvqs++];
                sc->sc_txq[i].txq_vq = &sc->sc_vqs[nvqs++];
        }

        if (sc->sc_has_ctrl)
                sc->sc_ctrlq.ctrlq_vq = &sc->sc_vqs[nvqs++];
}

static void
vioif_free_queues(struct vioif_softc *sc)
{
        int nvq_pairs = sc->sc_max_nvq_pairs;
        int nvqs = nvq_pairs * 2;

        if (sc->sc_ctrlq.ctrlq_vq)
                nvqs++;

        if (sc->sc_txq) {
                kmem_free(sc->sc_txq, sizeof(sc->sc_txq[0]) * nvq_pairs);
                sc->sc_txq = NULL;
        }

        if (sc->sc_rxq) {
                kmem_free(sc->sc_rxq, sizeof(sc->sc_rxq[0]) * nvq_pairs);
                sc->sc_rxq = NULL;
        }

        if (sc->sc_vqs) {
                kmem_free(sc->sc_vqs, sizeof(sc->sc_vqs[0]) * nvqs);
                sc->sc_vqs = NULL;
        }
}

/* allocate memory */
/*
 * dma memory is used for:
 *   rxq_hdrs[slot]:         metadata array for received frames (READ)
 *   txq_hdrs[slot]:         metadata array for frames to be sent (WRITE)
 *   ctrlq_cmd:                 command to be sent via ctrl vq (WRITE)
 *   ctrlq_status:         return value for a command via ctrl vq (READ)
 *   ctrlq_rx:                 parameter for a VIRTIO_NET_CTRL_RX class command
 *                         (WRITE)
 *   ctrlq_mac_tbl_uc:         unicast MAC address filter for a VIRTIO_NET_CTRL_MAC
 *                         class command (WRITE)
 *   ctrlq_mac_tbl_mc:         multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
 *                         class command (WRITE)
 * ctrlq_* structures are allocated only one each; they are protected by
 * ctrlq_inuse variable and ctrlq_wait condvar.
 */
/*
 * dynamically allocated memory is used for:
 *   rxq_hdr_dmamaps[slot]:        bus_dmamap_t array for sc_rx_hdrs[slot]
 *   txq_hdr_dmamaps[slot]:        bus_dmamap_t array for sc_tx_hdrs[slot]
 *   rxq_dmamaps[slot]:                bus_dmamap_t array for received payload
 *   txq_dmamaps[slot]:                bus_dmamap_t array for sent payload
 *   rxq_mbufs[slot]:                mbuf pointer array for received frames
 *   txq_mbufs[slot]:                mbuf pointer array for sent frames
 */
static int
vioif_alloc_mems(struct vioif_softc *sc)
{
        struct virtio_softc *vsc = sc->sc_virtio;
        struct vioif_txqueue *txq;
        struct vioif_rxqueue *rxq;
        struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
        int allocsize, allocsize2, r, rsegs, i, qid;
        void *vaddr;
        intptr_t p;

        allocsize = 0;
        for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
                rxq = &sc->sc_rxq[qid];
                txq = &sc->sc_txq[qid];

                allocsize += sizeof(struct virtio_net_hdr) *
                        (rxq->rxq_vq->vq_num + txq->txq_vq->vq_num);
        }
        if (sc->sc_has_ctrl) {
                allocsize += sizeof(struct virtio_net_ctrl_cmd);
                allocsize += sizeof(struct virtio_net_ctrl_status);
                allocsize += sizeof(struct virtio_net_ctrl_rx);
                allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
                    + ETHER_ADDR_LEN;
                allocsize += sizeof(struct virtio_net_ctrl_mac_tbl)
                    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES;
                allocsize += sizeof(struct virtio_net_ctrl_mac_addr);
                allocsize += sizeof(struct virtio_net_ctrl_mq);
        }
        r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
            &sc->sc_hdr_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
        if (r != 0) {
                aprint_error_dev(sc->sc_dev,
                    "DMA memory allocation failed, size %d, "
                    "error code %d\n", allocsize, r);
                goto err_none;
        }
        r = bus_dmamem_map(virtio_dmat(vsc),
            &sc->sc_hdr_segs[0], 1, allocsize, &vaddr, BUS_DMA_NOWAIT);
        if (r != 0) {
                aprint_error_dev(sc->sc_dev,
                    "DMA memory map failed, error code %d\n", r);
                goto err_dmamem_alloc;
        }

        memset(vaddr, 0, allocsize);
        sc->sc_dmamem = vaddr;
        p = (intptr_t) vaddr;

        for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
                rxq = &sc->sc_rxq[qid];
                txq = &sc->sc_txq[qid];

                rxq->rxq_hdrs = vioif_assign_mem(&p,
                    sizeof(struct virtio_net_hdr) * rxq->rxq_vq->vq_num);
                txq->txq_hdrs = vioif_assign_mem(&p,
                    sizeof(struct virtio_net_hdr) * txq->txq_vq->vq_num);
        }
        if (sc->sc_has_ctrl) {
                ctrlq->ctrlq_cmd = vioif_assign_mem(&p,
                    sizeof(*ctrlq->ctrlq_cmd));
                ctrlq->ctrlq_status = vioif_assign_mem(&p,
                    sizeof(*ctrlq->ctrlq_status));
                ctrlq->ctrlq_rx = vioif_assign_mem(&p,
                    sizeof(*ctrlq->ctrlq_rx));
                ctrlq->ctrlq_mac_tbl_uc = vioif_assign_mem(&p,
                    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
                    + ETHER_ADDR_LEN);
                ctrlq->ctrlq_mac_tbl_mc = vioif_assign_mem(&p,
                    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
                    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES);
                ctrlq->ctrlq_mac_addr = vioif_assign_mem(&p,
                    sizeof(*ctrlq->ctrlq_mac_addr));
                ctrlq->ctrlq_mq = vioif_assign_mem(&p, sizeof(*ctrlq->ctrlq_mq));
        }

        allocsize2 = 0;
        for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
                int rxqsize, txqsize;

                rxq = &sc->sc_rxq[qid];
                txq = &sc->sc_txq[qid];
                rxqsize = rxq->rxq_vq->vq_num;
                txqsize = txq->txq_vq->vq_num;

                allocsize2 += sizeof(rxq->rxq_dmamaps[0]) * rxqsize;
                allocsize2 += sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize;
                allocsize2 += sizeof(rxq->rxq_mbufs[0]) * rxqsize;

                allocsize2 += sizeof(txq->txq_dmamaps[0]) * txqsize;
                allocsize2 += sizeof(txq->txq_hdr_dmamaps[0]) * txqsize;
                allocsize2 += sizeof(txq->txq_mbufs[0]) * txqsize;
        }
        vaddr = kmem_zalloc(allocsize2, KM_SLEEP);
        sc->sc_kmem = vaddr;
        p = (intptr_t) vaddr;

        for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
                int rxqsize, txqsize;
                rxq = &sc->sc_rxq[qid];
                txq = &sc->sc_txq[qid];
                rxqsize = rxq->rxq_vq->vq_num;
                txqsize = txq->txq_vq->vq_num;

                rxq->rxq_hdr_dmamaps = vioif_assign_mem(&p,
                    sizeof(rxq->rxq_hdr_dmamaps[0]) * rxqsize);
                txq->txq_hdr_dmamaps = vioif_assign_mem(&p,
                    sizeof(txq->txq_hdr_dmamaps[0]) * txqsize);
                rxq->rxq_dmamaps = vioif_assign_mem(&p,
                    sizeof(rxq->rxq_dmamaps[0]) * rxqsize);
                txq->txq_dmamaps = vioif_assign_mem(&p,
                    sizeof(txq->txq_dmamaps[0]) * txqsize);
                rxq->rxq_mbufs = vioif_assign_mem(&p,
                    sizeof(rxq->rxq_mbufs[0]) * rxqsize);
                txq->txq_mbufs = vioif_assign_mem(&p,
                    sizeof(txq->txq_mbufs[0]) * txqsize);
        }

        for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
                rxq = &sc->sc_rxq[qid];
                txq = &sc->sc_txq[qid];

                for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
                        r = vioif_dmamap_create_load(sc, &rxq->rxq_hdr_dmamaps[i],
                            &rxq->rxq_hdrs[i], sc->sc_hdr_size, 1,
                            BUS_DMA_READ, "rx header");
                        if (r != 0)
                                goto err_reqs;

                        r = vioif_dmamap_create(sc, &rxq->rxq_dmamaps[i],
                            MCLBYTES, 1, "rx payload");
                        if (r != 0)
                                goto err_reqs;
                }

                for (i = 0; i < txq->txq_vq->vq_num; i++) {
                        r = vioif_dmamap_create_load(sc, &txq->txq_hdr_dmamaps[i],
                            &txq->txq_hdrs[i], sc->sc_hdr_size, 1,
                            BUS_DMA_READ, "tx header");
                        if (r != 0)
                                goto err_reqs;

                        r = vioif_dmamap_create(sc, &txq->txq_dmamaps[i], ETHER_MAX_LEN,
                            VIRTIO_NET_TX_MAXNSEGS, "tx payload");
                        if (r != 0)
                                goto err_reqs;
                }
        }

        if (sc->sc_has_ctrl) {
                /* control vq class & command */
                r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_cmd_dmamap,
                    ctrlq->ctrlq_cmd, sizeof(*ctrlq->ctrlq_cmd), 1,
                    BUS_DMA_WRITE, "control command");
                if (r != 0)
                        goto err_reqs;

                r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_status_dmamap,
                    ctrlq->ctrlq_status, sizeof(*ctrlq->ctrlq_status), 1,
                    BUS_DMA_READ, "control status");
                if (r != 0)
                        goto err_reqs;

                /* control vq rx mode command parameter */
                r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_rx_dmamap,
                    ctrlq->ctrlq_rx, sizeof(*ctrlq->ctrlq_rx), 1,
                    BUS_DMA_WRITE, "rx mode control command");
                if (r != 0)
                        goto err_reqs;

                /* multiqueue set command */
                r = vioif_dmamap_create_load(sc, &ctrlq->ctrlq_mq_dmamap,
                    ctrlq->ctrlq_mq, sizeof(*ctrlq->ctrlq_mq), 1,
                    BUS_DMA_WRITE, "multiqueue set command");
                if (r != 0)
                        goto err_reqs;

                /* control vq MAC filter table for unicast */
                /* do not load now since its length is variable */
                r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_uc_dmamap,
                    sizeof(*ctrlq->ctrlq_mac_tbl_uc)
                    + ETHER_ADDR_LEN, 1,
                    "unicast MAC address filter command");
                if (r != 0)
                        goto err_reqs;

                /* control vq MAC filter table for multicast */
                r = vioif_dmamap_create(sc, &ctrlq->ctrlq_tbl_mc_dmamap,
                    sizeof(*ctrlq->ctrlq_mac_tbl_mc)
                    + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_MAXENTRIES, 1,
                    "multicast MAC address filter command");
                if (r != 0)
                        goto err_reqs;

                /* control vq MAC address set command */
                r = vioif_dmamap_create_load(sc,
                    &ctrlq->ctrlq_mac_addr_dmamap,
                    ctrlq->ctrlq_mac_addr,
                    sizeof(*ctrlq->ctrlq_mac_addr), 1,
                    BUS_DMA_WRITE, "mac addr set command");
                if (r != 0)
                        goto err_reqs;
        }

        return 0;

err_reqs:
        vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_mc_dmamap);
        vioif_dmamap_destroy(sc, &ctrlq->ctrlq_tbl_uc_dmamap);
        vioif_dmamap_destroy(sc, &ctrlq->ctrlq_rx_dmamap);
        vioif_dmamap_destroy(sc, &ctrlq->ctrlq_status_dmamap);
        vioif_dmamap_destroy(sc, &ctrlq->ctrlq_cmd_dmamap);
        vioif_dmamap_destroy(sc, &ctrlq->ctrlq_mac_addr_dmamap);
        for (qid = 0; qid < sc->sc_max_nvq_pairs; qid++) {
                rxq = &sc->sc_rxq[qid];
                txq = &sc->sc_txq[qid];

                for (i = 0; i < txq->txq_vq->vq_num; i++) {
                        vioif_dmamap_destroy(sc, &txq->txq_dmamaps[i]);
                        vioif_dmamap_destroy(sc, &txq->txq_hdr_dmamaps[i]);
                }
                for (i = 0; i < rxq->rxq_vq->vq_num; i++) {
                        vioif_dmamap_destroy(sc, &rxq->rxq_dmamaps[i]);
                        vioif_dmamap_destroy(sc, &rxq->rxq_hdr_dmamaps[i]);
                }
        }
        if (sc->sc_kmem) {
                kmem_free(sc->sc_kmem, allocsize2);
                sc->sc_kmem = NULL;
        }
        bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_dmamem, allocsize);
err_dmamem_alloc:
        bus_dmamem_free(virtio_dmat(vsc), &sc->sc_hdr_segs[0], 1);
err_none:
        return -1;
}

static void
vioif_attach(device_t parent, device_t self, void *aux)
{
        struct vioif_softc *sc = device_private(self);
        struct virtio_softc *vsc = device_private(parent);
        struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
        struct vioif_txqueue *txq;
        struct vioif_rxqueue *rxq;
        uint64_t features, req_features;
        struct ifnet *ifp = &sc->sc_ethercom.ec_if;
        u_int softint_flags;
        int r, i, nvqs = 0, req_flags;
        char xnamebuf[MAXCOMLEN];

        if (virtio_child(vsc) != NULL) {
                aprint_normal(": child already attached for %s; "
                    "something wrong...\n", device_xname(parent));
                return;
        }

        sc->sc_dev = self;
        sc->sc_virtio = vsc;
        sc->sc_link_active = false;

        sc->sc_max_nvq_pairs = 1;
        sc->sc_req_nvq_pairs = 1;
        sc->sc_act_nvq_pairs = 1;
        sc->sc_txrx_workqueue_sysctl = true;
        sc->sc_tx_intr_process_limit = VIOIF_TX_INTR_PROCESS_LIMIT;
        sc->sc_tx_process_limit = VIOIF_TX_PROCESS_LIMIT;
        sc->sc_rx_intr_process_limit = VIOIF_RX_INTR_PROCESS_LIMIT;
        sc->sc_rx_process_limit = VIOIF_RX_PROCESS_LIMIT;

        mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);

        snprintf(xnamebuf, sizeof(xnamebuf), "%s_txrx", device_xname(self));
        sc->sc_txrx_workqueue = vioif_workq_create(xnamebuf, VIOIF_WORKQUEUE_PRI,
            IPL_NET, WQ_PERCPU | WQ_MPSAFE);
        if (sc->sc_txrx_workqueue == NULL)
                goto err;

        req_flags = 0;

#ifdef VIOIF_MPSAFE
        req_flags |= VIRTIO_F_INTR_MPSAFE;
#endif
        req_flags |= VIRTIO_F_INTR_MSIX;

        req_features =
            VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | VIRTIO_NET_F_CTRL_VQ |
            VIRTIO_NET_F_CTRL_RX | VIRTIO_F_NOTIFY_ON_EMPTY;
        req_features |= VIRTIO_F_RING_EVENT_IDX;
        req_features |= VIRTIO_NET_F_CTRL_MAC_ADDR;
#ifdef VIOIF_MULTIQ
        req_features |= VIRTIO_NET_F_MQ;
#endif
        virtio_child_attach_start(vsc, self, IPL_NET, NULL,
            vioif_config_change, virtio_vq_intrhand, req_flags,
            req_features, VIRTIO_NET_FLAG_BITS);

        features = virtio_features(vsc);
        if (features == 0)
                goto err;

        if (features & VIRTIO_NET_F_MAC) {
                for (i = 0; i < __arraycount(sc->sc_mac); i++) {
                        sc->sc_mac[i] = virtio_read_device_config_1(vsc,
                            VIRTIO_NET_CONFIG_MAC + i);
                }
        } else {
                /* code stolen from sys/net/if_tap.c */
                struct timeval tv;
                uint32_t ui;
                getmicrouptime(&tv);
                ui = (tv.tv_sec ^ tv.tv_usec) & 0xffffff;
                memcpy(sc->sc_mac+3, (uint8_t *)&ui, 3);
                for (i = 0; i < __arraycount(sc->sc_mac); i++) {
                        virtio_write_device_config_1(vsc,
                            VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
                }
        }

        /* 'Ethernet' with capital follows other ethernet driver attachment */
        aprint_normal_dev(self, "Ethernet address %s\n",
            ether_sprintf(sc->sc_mac));

        if (features & (VIRTIO_NET_F_MRG_RXBUF | VIRTIO_F_VERSION_1)) {
                sc->sc_hdr_size = sizeof(struct virtio_net_hdr);
        } else {
                sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
        }

        if ((features & VIRTIO_NET_F_CTRL_VQ) &&
            (features & VIRTIO_NET_F_CTRL_RX)) {
                sc->sc_has_ctrl = true;

                cv_init(&ctrlq->ctrlq_wait, "ctrl_vq");
                mutex_init(&ctrlq->ctrlq_wait_lock, MUTEX_DEFAULT, IPL_NET);
                ctrlq->ctrlq_inuse = FREE;
        } else {
                sc->sc_has_ctrl = false;
        }

        if (sc->sc_has_ctrl && (features & VIRTIO_NET_F_MQ)) {
                sc->sc_max_nvq_pairs = virtio_read_device_config_2(vsc,
                    VIRTIO_NET_CONFIG_MAX_VQ_PAIRS);

                if (sc->sc_max_nvq_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
                        goto err;

                /* Limit the number of queue pairs to use */
                sc->sc_req_nvq_pairs = MIN(sc->sc_max_nvq_pairs, ncpu);
        }

        vioif_alloc_queues(sc);
        virtio_child_attach_set_vqs(vsc, sc->sc_vqs, sc->sc_req_nvq_pairs);

#ifdef VIOIF_MPSAFE
        softint_flags = SOFTINT_NET | SOFTINT_MPSAFE;
#else
        softint_flags = SOFTINT_NET;
#endif

        /*
         * Allocating virtqueues
         */
        for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
                rxq = &sc->sc_rxq[i];
                txq = &sc->sc_txq[i];
                char qname[32];

                rxq->rxq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);

                rxq->rxq_handle_si = softint_establish(softint_flags,
                    vioif_rx_handle, rxq);
                if (rxq->rxq_handle_si == NULL) {
                        aprint_error_dev(self, "cannot establish rx softint\n");
                        goto err;
                }

                snprintf(qname, sizeof(qname), "rx%d", i);
                r = virtio_alloc_vq(vsc, rxq->rxq_vq, nvqs,
                    MCLBYTES + sc->sc_hdr_size, 2, qname);
                if (r != 0)
                        goto err;
                nvqs++;
                rxq->rxq_vq->vq_intrhand = vioif_rx_intr;
                rxq->rxq_vq->vq_intrhand_arg = (void *)rxq;
                rxq->rxq_stopping = true;
                vioif_work_set(&rxq->rxq_work, vioif_rx_handle, rxq);

                txq->txq_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);

                txq->txq_deferred_transmit = softint_establish(softint_flags,
                    vioif_deferred_transmit, txq);
                if (txq->txq_deferred_transmit == NULL) {
                        aprint_error_dev(self, "cannot establish tx softint\n");
                        goto err;
                }
                txq->txq_handle_si = softint_establish(softint_flags,
                    vioif_tx_handle, txq);
                if (txq->txq_handle_si == NULL) {
                        aprint_error_dev(self, "cannot establish tx softint\n");
                        goto err;
                }

                snprintf(qname, sizeof(qname), "tx%d", i);
                r = virtio_alloc_vq(vsc, txq->txq_vq, nvqs,
                    sc->sc_hdr_size + (ETHER_MAX_LEN - ETHER_HDR_LEN),
                    VIRTIO_NET_TX_MAXNSEGS + 1, qname);
                if (r != 0)
                        goto err;
                nvqs++;
                txq->txq_vq->vq_intrhand = vioif_tx_intr;
                txq->txq_vq->vq_intrhand_arg = (void *)txq;
                txq->txq_link_active = sc->sc_link_active;
                txq->txq_stopping = false;
                txq->txq_intrq = pcq_create(txq->txq_vq->vq_num, KM_SLEEP);
                vioif_work_set(&txq->txq_work, vioif_tx_handle, txq);
        }

        if (sc->sc_has_ctrl) {
                /*
                 * Allocating a virtqueue for control channel
                 */
                r = virtio_alloc_vq(vsc, ctrlq->ctrlq_vq, nvqs,
                    NBPG, 1, "control");
                if (r != 0) {
                        aprint_error_dev(self, "failed to allocate "
                            "a virtqueue for control channel, error code %d\n",
                            r);

                        sc->sc_has_ctrl = false;
                        cv_destroy(&ctrlq->ctrlq_wait);
                        mutex_destroy(&ctrlq->ctrlq_wait_lock);
                } else {
                        nvqs++;
                        ctrlq->ctrlq_vq->vq_intrhand = vioif_ctrl_intr;
                        ctrlq->ctrlq_vq->vq_intrhand_arg = (void *) ctrlq;
                }
        }

        sc->sc_ctl_softint = softint_establish(softint_flags,
            vioif_ctl_softint, sc);
        if (sc->sc_ctl_softint == NULL) {
                aprint_error_dev(self, "cannot establish ctl softint\n");
                goto err;
        }

        if (vioif_alloc_mems(sc) < 0)
                goto err;

        if (virtio_child_attach_finish(vsc) != 0)
                goto err;

        if (vioif_setup_sysctl(sc) != 0) {
                aprint_error_dev(self, "unable to create sysctl node\n");
                /* continue */
        }

        vioif_setup_stats(sc);

        strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
        ifp->if_softc = sc;
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
#ifdef VIOIF_MPSAFE
        ifp->if_extflags = IFEF_MPSAFE;
#endif
        ifp->if_start = vioif_start;
        if (sc->sc_req_nvq_pairs > 1)
                ifp->if_transmit = vioif_transmit;
        ifp->if_ioctl = vioif_ioctl;
        ifp->if_init = vioif_init;
        ifp->if_stop = vioif_stop;
        ifp->if_capabilities = 0;
        ifp->if_watchdog = vioif_watchdog;
        txq = &sc->sc_txq[0];
        IFQ_SET_MAXLEN(&ifp->if_snd, MAX(txq->txq_vq->vq_num, IFQ_MAXLEN));
        IFQ_SET_READY(&ifp->if_snd);

        sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;

        if_attach(ifp);
        if_deferred_start_init(ifp, NULL);
        ether_ifattach(ifp, sc->sc_mac);
        ether_set_ifflags_cb(&sc->sc_ethercom, vioif_ifflags_cb);

        return;

err:
        for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
                rxq = &sc->sc_rxq[i];
                txq = &sc->sc_txq[i];

                if (rxq->rxq_lock) {
                        mutex_obj_free(rxq->rxq_lock);
                        rxq->rxq_lock = NULL;
                }

                if (rxq->rxq_handle_si) {
                        softint_disestablish(rxq->rxq_handle_si);
                        rxq->rxq_handle_si = NULL;
                }

                if (txq->txq_lock) {
                        mutex_obj_free(txq->txq_lock);
                        txq->txq_lock = NULL;
                }

                if (txq->txq_handle_si) {
                        softint_disestablish(txq->txq_handle_si);
                        txq->txq_handle_si = NULL;
                }

                if (txq->txq_deferred_transmit) {
                        softint_disestablish(txq->txq_deferred_transmit);
                        txq->txq_deferred_transmit = NULL;
                }

                if (txq->txq_intrq) {
                        pcq_destroy(txq->txq_intrq);
                        txq->txq_intrq = NULL;
                }
        }

        if (sc->sc_has_ctrl) {
                cv_destroy(&ctrlq->ctrlq_wait);
                mutex_destroy(&ctrlq->ctrlq_wait_lock);
        }

        while (nvqs > 0)
                virtio_free_vq(vsc, &sc->sc_vqs[--nvqs]);

        vioif_free_queues(sc);
        mutex_destroy(&sc->sc_lock);
        virtio_child_attach_failed(vsc);
        config_finalize_register(self, vioif_finalize_teardown);

        return;
}

static int
vioif_finalize_teardown(device_t self)
{
        struct vioif_softc *sc = device_private(self);

        if (sc->sc_txrx_workqueue != NULL) {
                vioif_workq_destroy(sc->sc_txrx_workqueue);
                sc->sc_txrx_workqueue = NULL;
        }

        return 0;
}

static void
vioif_enable_interrupt_vqpairs(struct vioif_softc *sc)
{
        struct virtio_softc *vsc = sc->sc_virtio;
        struct vioif_txqueue *txq;
        struct vioif_rxqueue *rxq;
        int i;

        for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
                txq = &sc->sc_txq[i];
                rxq = &sc->sc_rxq[i];

                virtio_start_vq_intr(vsc, txq->txq_vq);
                virtio_start_vq_intr(vsc, rxq->rxq_vq);
        }
}

static void
vioif_disable_interrupt_vqpairs(struct vioif_softc *sc)
{
        struct virtio_softc *vsc = sc->sc_virtio;
        struct vioif_txqueue *txq;
        struct vioif_rxqueue *rxq;
        int i;

        for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
                rxq = &sc->sc_rxq[i];
                txq = &sc->sc_txq[i];

                virtio_stop_vq_intr(vsc, rxq->rxq_vq);
                virtio_stop_vq_intr(vsc, txq->txq_vq);
        }
}

/*
 * Interface functions for ifnet
 */
static int
vioif_init(struct ifnet *ifp)
{
        struct vioif_softc *sc = ifp->if_softc;
        struct virtio_softc *vsc = sc->sc_virtio;
        struct vioif_rxqueue *rxq;
        struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
        int r, i;

        vioif_stop(ifp, 0);

        r = virtio_reinit_start(vsc);
        if (r != 0) {
                log(LOG_ERR, "%s: reset failed\n", ifp->if_xname);
                return EIO;
        }

        virtio_negotiate_features(vsc, virtio_features(vsc));

        for (i = 0; i < sc->sc_req_nvq_pairs; i++) {
                rxq = &sc->sc_rxq[i];

                /* Have to set false before vioif_populate_rx_mbufs */
                mutex_enter(rxq->rxq_lock);
                rxq->rxq_stopping = false;
                vioif_populate_rx_mbufs_locked(sc, rxq);
                mutex_exit(rxq->rxq_lock);

        }

        virtio_reinit_end(vsc);

        if (sc->sc_has_ctrl)
                virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);

        r = vioif_ctrl_mq_vq_pairs_set(sc, sc->sc_req_nvq_pairs);
        if (r == 0)
                sc->sc_act_nvq_pairs = sc->sc_req_nvq_pairs;
        else
                sc->sc_act_nvq_pairs = 1;

        for (i = 0; i < sc->sc_act_nvq_pairs; i++)
                sc->sc_txq[i].txq_stopping = false;

        vioif_enable_interrupt_vqpairs(sc);

        vioif_update_link_status(sc);
        ifp->if_flags |= IFF_RUNNING;
        ifp->if_flags &= ~IFF_OACTIVE;
        r = vioif_rx_filter(sc);

        return r;
}

static void
vioif_stop(struct ifnet *ifp, int disable)
{
        struct vioif_softc *sc = ifp->if_softc;
        struct virtio_softc *vsc = sc->sc_virtio;
        struct vioif_txqueue *txq;
        struct vioif_rxqueue *rxq;
        struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
        int i;

        /* disable interrupts */
        vioif_disable_interrupt_vqpairs(sc);
        if (sc->sc_has_ctrl)
                virtio_stop_vq_intr(vsc, ctrlq->ctrlq_vq);

        /*
         * stop all packet processing:
         * 1. stop interrupt handlers by rxq_stopping and txq_stopping
         * 2. wait for stopping workqueue for packet processing
         */
        for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
                txq = &sc->sc_txq[i];
                rxq = &sc->sc_rxq[i];

                mutex_enter(rxq->rxq_lock);
                rxq->rxq_stopping = true;
                mutex_exit(rxq->rxq_lock);
                vioif_work_wait(sc->sc_txrx_workqueue, &rxq->rxq_work);

                mutex_enter(txq->txq_lock);
                txq->txq_stopping = true;
                mutex_exit(txq->txq_lock);
                vioif_work_wait(sc->sc_txrx_workqueue, &txq->txq_work);
        }

        /* only way to stop I/O and DMA is resetting... */
        virtio_reset(vsc);

        for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
                vioif_rx_queue_clear(&sc->sc_rxq[i]);
                vioif_tx_queue_clear(&sc->sc_txq[i]);
        }

        ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
        sc->sc_link_active = false;

        for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
                txq = &sc->sc_txq[i];
                rxq = &sc->sc_rxq[i];

                txq->txq_link_active = false;

                if (disable)
                        vioif_rx_drain(rxq);

                vioif_tx_drain(txq);
        }
}

static void
vioif_send_common_locked(struct ifnet *ifp, struct vioif_txqueue *txq,
    bool is_transmit)
{
        struct vioif_softc *sc = ifp->if_softc;
        struct virtio_softc *vsc = sc->sc_virtio;
        struct virtqueue *vq = txq->txq_vq;
        struct virtio_net_hdr *hdr;
        struct mbuf *m;
        int queued = 0;

        KASSERT(mutex_owned(txq->txq_lock));

        if ((ifp->if_flags & IFF_RUNNING) == 0)
                return;

        if (!txq->txq_link_active || txq->txq_stopping)
                return;

        if ((ifp->if_flags & IFF_OACTIVE) != 0 && !is_transmit)
                return;

        for (;;) {
                int slot, r;

                if (is_transmit)
                        m = pcq_get(txq->txq_intrq);
                else
                        IFQ_DEQUEUE(&ifp->if_snd, m);

                if (m == NULL)
                        break;

                r = virtio_enqueue_prep(vsc, vq, &slot);
                if (r == EAGAIN) {
                        ifp->if_flags |= IFF_OACTIVE;
                        m_freem(m);
                        break;
                }
                if (r != 0)
                        panic("enqueue_prep for a tx buffer");

                r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
                    txq->txq_dmamaps[slot], m, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
                if (r != 0) {
                        /* maybe just too fragmented */
                        struct mbuf *newm;

                        newm = m_defrag(m, M_NOWAIT);
                        if (newm == NULL) {
                                txq->txq_defrag_failed.ev_count++;
                                goto skip;
                        }

                        m = newm;
                        r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
                            txq->txq_dmamaps[slot], m,
                            BUS_DMA_WRITE | BUS_DMA_NOWAIT);
                        if (r != 0) {
                                txq->txq_mbuf_load_failed.ev_count++;
skip:
                                m_freem(m);
                                virtio_enqueue_abort(vsc, vq, slot);
                                continue;
                        }
                }

                /* This should actually never fail */
                r = virtio_enqueue_reserve(vsc, vq, slot,
                    txq->txq_dmamaps[slot]->dm_nsegs + 1);
                if (r != 0) {
                        txq->txq_enqueue_reserve_failed.ev_count++;
                        bus_dmamap_unload(virtio_dmat(vsc),
                             txq->txq_dmamaps[slot]);
                        /* slot already freed by virtio_enqueue_reserve */
                        m_freem(m);
                        continue;
                }

                txq->txq_mbufs[slot] = m;

                hdr = &txq->txq_hdrs[slot];
                memset(hdr, 0, sc->sc_hdr_size);
                bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
                    0, txq->txq_dmamaps[slot]->dm_mapsize,
                    BUS_DMASYNC_PREWRITE);
                bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
                    0, txq->txq_hdr_dmamaps[slot]->dm_mapsize,
                    BUS_DMASYNC_PREWRITE);
                virtio_enqueue(vsc, vq, slot, txq->txq_hdr_dmamaps[slot], true);
                virtio_enqueue(vsc, vq, slot, txq->txq_dmamaps[slot], true);
                virtio_enqueue_commit(vsc, vq, slot, false);

                queued++;
                bpf_mtap(ifp, m, BPF_D_OUT);
        }

        if (queued > 0) {
                virtio_enqueue_commit(vsc, vq, -1, true);
                ifp->if_timer = 5;
        }
}

static void
vioif_start_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
{

        /*
         * ifp->if_obytes and ifp->if_omcasts are added in if_transmit()@if.c.
         */
        vioif_send_common_locked(ifp, txq, false);

}

static void
vioif_start(struct ifnet *ifp)
{
        struct vioif_softc *sc = ifp->if_softc;
        struct vioif_txqueue *txq = &sc->sc_txq[0];

#ifdef VIOIF_MPSAFE
        KASSERT(if_is_mpsafe(ifp));
#endif

        mutex_enter(txq->txq_lock);
        vioif_start_locked(ifp, txq);
        mutex_exit(txq->txq_lock);
}

static inline int
vioif_select_txqueue(struct ifnet *ifp, struct mbuf *m)
{
        struct vioif_softc *sc = ifp->if_softc;
        u_int cpuid = cpu_index(curcpu());

        return cpuid % sc->sc_act_nvq_pairs;
}

static void
vioif_transmit_locked(struct ifnet *ifp, struct vioif_txqueue *txq)
{

        vioif_send_common_locked(ifp, txq, true);
}

static int
vioif_transmit(struct ifnet *ifp, struct mbuf *m)
{
        struct vioif_softc *sc = ifp->if_softc;
        struct vioif_txqueue *txq;
        int qid;

        qid = vioif_select_txqueue(ifp, m);
        txq = &sc->sc_txq[qid];

        if (__predict_false(!pcq_put(txq->txq_intrq, m))) {
                m_freem(m);
                return ENOBUFS;
        }

        net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
        if_statadd_ref(nsr, if_obytes, m->m_pkthdr.len);
        if (m->m_flags & M_MCAST)
                if_statinc_ref(nsr, if_omcasts);
        IF_STAT_PUTREF(ifp);

        if (mutex_tryenter(txq->txq_lock)) {
                vioif_transmit_locked(ifp, txq);
                mutex_exit(txq->txq_lock);
        }

        return 0;
}

static void
vioif_deferred_transmit(void *arg)
{
        struct vioif_txqueue *txq = arg;
        struct virtio_softc *vsc = txq->txq_vq->vq_owner;
        struct vioif_softc *sc = device_private(virtio_child(vsc));
        struct ifnet *ifp = &sc->sc_ethercom.ec_if;

        mutex_enter(txq->txq_lock);
        vioif_send_common_locked(ifp, txq, true);
        mutex_exit(txq->txq_lock);
}

static int
vioif_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
        int s, r;

        s = splnet();

        r = ether_ioctl(ifp, cmd, data);
        if (r == ENETRESET && (cmd == SIOCADDMULTI || cmd == SIOCDELMULTI)) {
                if (ifp->if_flags & IFF_RUNNING) {
                        r = vioif_rx_filter(ifp->if_softc);
                } else {
                        r = 0;
                }
        }

        splx(s);

        return r;
}

void
vioif_watchdog(struct ifnet *ifp)
{
        struct vioif_softc *sc = ifp->if_softc;
        int i;

        if (ifp->if_flags & IFF_RUNNING) {
                for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
                        vioif_tx_queue_clear(&sc->sc_txq[i]);
                }
        }
}

/*
 * Receive implementation
 */
/* allocate and initialize a mbuf for receive */
static int
vioif_add_rx_mbuf(struct vioif_rxqueue *rxq, int i)
{
        struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;
        struct mbuf *m;
        int r;

        MGETHDR(m, M_DONTWAIT, MT_DATA);
        if (m == NULL)
                return ENOBUFS;
        MCLGET(m, M_DONTWAIT);
        if ((m->m_flags & M_EXT) == 0) {
                m_freem(m);
                return ENOBUFS;
        }
        rxq->rxq_mbufs[i] = m;
        m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
        r = bus_dmamap_load_mbuf(virtio_dmat(vsc),
            rxq->rxq_dmamaps[i], m, BUS_DMA_READ | BUS_DMA_NOWAIT);
        if (r) {
                m_freem(m);
                rxq->rxq_mbufs[i] = NULL;
                return r;
        }

        return 0;
}

/* free a mbuf for receive */
static void
vioif_free_rx_mbuf(struct vioif_rxqueue *rxq, int i)
{
        struct virtio_softc *vsc = rxq->rxq_vq->vq_owner;

        bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[i]);
        m_freem(rxq->rxq_mbufs[i]);
        rxq->rxq_mbufs[i] = NULL;
}

/* add mbufs for all the empty receive slots */
static void
vioif_populate_rx_mbufs_locked(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
{
        struct virtqueue *vq = rxq->rxq_vq;
        struct virtio_softc *vsc = vq->vq_owner;
        int i, r, ndone = 0;

        KASSERT(mutex_owned(rxq->rxq_lock));

        if (rxq->rxq_stopping)
                return;

        for (i = 0; i < vq->vq_num; i++) {
                int slot;
                r = virtio_enqueue_prep(vsc, vq, &slot);
                if (r == EAGAIN)
                        break;
                if (r != 0)
                        panic("enqueue_prep for rx buffers");
                if (rxq->rxq_mbufs[slot] == NULL) {
                        r = vioif_add_rx_mbuf(rxq, slot);
                        if (r != 0) {
                                rxq->rxq_mbuf_add_failed.ev_count++;
                                break;
                        }
                }
                r = virtio_enqueue_reserve(vsc, vq, slot,
                    rxq->rxq_dmamaps[slot]->dm_nsegs + 1);
                if (r != 0) {
                        vioif_free_rx_mbuf(rxq, slot);
                        break;
                }
                bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
                    0, sc->sc_hdr_size, BUS_DMASYNC_PREREAD);
                bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
                    0, MCLBYTES, BUS_DMASYNC_PREREAD);
                virtio_enqueue(vsc, vq, slot, rxq->rxq_hdr_dmamaps[slot],
                    false);
                virtio_enqueue(vsc, vq, slot, rxq->rxq_dmamaps[slot], false);
                virtio_enqueue_commit(vsc, vq, slot, false);
                ndone++;
        }
        if (ndone > 0)
                virtio_enqueue_commit(vsc, vq, -1, true);
}

static void
vioif_rx_queue_clear(struct vioif_rxqueue *rxq)
{
        struct virtqueue *vq = rxq->rxq_vq;
        struct virtio_softc *vsc = vq->vq_owner;
        struct vioif_softc *sc = device_private(virtio_child(vsc));
        u_int limit = UINT_MAX;
        bool more;

        KASSERT(rxq->rxq_stopping);

        mutex_enter(rxq->rxq_lock);
        for (;;) {
                more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
                if (more == false)
                        break;
        }
        mutex_exit(rxq->rxq_lock);
}

/* dequeue received packets */
static bool
vioif_rx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
    struct vioif_rxqueue *rxq, u_int limit)
{
        struct virtqueue *vq = rxq->rxq_vq;
        struct ifnet *ifp = &sc->sc_ethercom.ec_if;
        struct mbuf *m;
        int slot, len;
        bool more = false, dequeued = false;

        KASSERT(mutex_owned(rxq->rxq_lock));

        if (virtio_vq_is_enqueued(vsc, vq) == false)
                return false;

        for (;;) {
                if (limit-- == 0) {
                        more = true;
                        break;
                }

                if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
                        break;

                dequeued = true;

                len -= sc->sc_hdr_size;
                bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_hdr_dmamaps[slot],
                    0, sc->sc_hdr_size, BUS_DMASYNC_POSTREAD);
                bus_dmamap_sync(virtio_dmat(vsc), rxq->rxq_dmamaps[slot],
                    0, MCLBYTES, BUS_DMASYNC_POSTREAD);
                m = rxq->rxq_mbufs[slot];
                KASSERT(m != NULL);
                bus_dmamap_unload(virtio_dmat(vsc), rxq->rxq_dmamaps[slot]);
                rxq->rxq_mbufs[slot] = NULL;
                virtio_dequeue_commit(vsc, vq, slot);
                m_set_rcvif(m, ifp);
                m->m_len = m->m_pkthdr.len = len;

                mutex_exit(rxq->rxq_lock);
                if_percpuq_enqueue(ifp->if_percpuq, m);
                mutex_enter(rxq->rxq_lock);

                if (rxq->rxq_stopping)
                        break;
        }

        if (dequeued)
                vioif_populate_rx_mbufs_locked(sc, rxq);

        return more;
}

/* rx interrupt; call _dequeue above and schedule a softint */

static void
vioif_rx_handle_locked(void *xrxq, u_int limit)
{
        struct vioif_rxqueue *rxq = xrxq;
        struct virtqueue *vq = rxq->rxq_vq;
        struct virtio_softc *vsc = vq->vq_owner;
        struct vioif_softc *sc = device_private(virtio_child(vsc));
        bool more;

        KASSERT(!rxq->rxq_stopping);

        more = vioif_rx_deq_locked(sc, vsc, rxq, limit);
        if (more) {
                vioif_rx_sched_handle(sc, rxq);
                return;
        }
        more = virtio_start_vq_intr(vsc, rxq->rxq_vq);
        if (more) {
                vioif_rx_sched_handle(sc, rxq);
                return;
        }
        atomic_store_relaxed(&rxq->rxq_active, false);
}

static int
vioif_rx_intr(void *arg)
{
        struct vioif_rxqueue *rxq = arg;
        struct virtqueue *vq = rxq->rxq_vq;
        struct virtio_softc *vsc = vq->vq_owner;
        struct vioif_softc *sc = device_private(virtio_child(vsc));
        u_int limit;

        limit = sc->sc_rx_intr_process_limit;

        if (atomic_load_relaxed(&rxq->rxq_active) == true)
                return 1;

        mutex_enter(rxq->rxq_lock);

        if (!rxq->rxq_stopping) {
                rxq->rxq_workqueue = sc->sc_txrx_workqueue_sysctl;

                virtio_stop_vq_intr(vsc, vq);
                atomic_store_relaxed(&rxq->rxq_active, true);

                vioif_rx_handle_locked(rxq, limit);
        }

        mutex_exit(rxq->rxq_lock);
        return 1;
}

static void
vioif_rx_handle(void *xrxq)
{
        struct vioif_rxqueue *rxq = xrxq;
        struct virtqueue *vq = rxq->rxq_vq;
        struct virtio_softc *vsc = vq->vq_owner;
        struct vioif_softc *sc = device_private(virtio_child(vsc));
        u_int limit;

        limit = sc->sc_rx_process_limit;

        mutex_enter(rxq->rxq_lock);

        if (!rxq->rxq_stopping)
                vioif_rx_handle_locked(rxq, limit);

        mutex_exit(rxq->rxq_lock);
}

static void
vioif_rx_sched_handle(struct vioif_softc *sc, struct vioif_rxqueue *rxq)
{

        KASSERT(mutex_owned(rxq->rxq_lock));

        if (rxq->rxq_stopping)
                return;

        if (rxq->rxq_workqueue)
                vioif_work_add(sc->sc_txrx_workqueue, &rxq->rxq_work);
        else
                softint_schedule(rxq->rxq_handle_si);
}

/* free all the mbufs; called from if_stop(disable) */
static void
vioif_rx_drain(struct vioif_rxqueue *rxq)
{
        struct virtqueue *vq = rxq->rxq_vq;
        int i;

        for (i = 0; i < vq->vq_num; i++) {
                if (rxq->rxq_mbufs[i] == NULL)
                        continue;
                vioif_free_rx_mbuf(rxq, i);
        }
}

/*
 * Transmition implementation
 */
/* actual transmission is done in if_start */
/* tx interrupt; dequeue and free mbufs */
/*
 * tx interrupt is actually disabled; this should be called upon
 * tx vq full and watchdog
 */

static void
vioif_tx_handle_locked(struct vioif_txqueue *txq, u_int limit)
{
        struct virtqueue *vq = txq->txq_vq;
        struct virtio_softc *vsc = vq->vq_owner;
        struct vioif_softc *sc = device_private(virtio_child(vsc));
        struct ifnet *ifp = &sc->sc_ethercom.ec_if;
        bool more;

        KASSERT(!txq->txq_stopping);

        more = vioif_tx_deq_locked(sc, vsc, txq, limit);
        if (more) {
                vioif_tx_sched_handle(sc, txq);
                return;
        }

        if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
                more = virtio_postpone_intr_smart(vsc, vq);
        else
                more = virtio_start_vq_intr(vsc, vq);
        if (more) {
                vioif_tx_sched_handle(sc, txq);
                return;
        }

        atomic_store_relaxed(&txq->txq_active, false);
        /* for ALTQ */
        if (txq == &sc->sc_txq[0]) {
                if_schedule_deferred_start(ifp);
                ifp->if_flags &= ~IFF_OACTIVE;
        }
        softint_schedule(txq->txq_deferred_transmit);
}


static int
vioif_tx_intr(void *arg)
{
        struct vioif_txqueue *txq = arg;
        struct virtqueue *vq = txq->txq_vq;
        struct virtio_softc *vsc = vq->vq_owner;
        struct vioif_softc *sc = device_private(virtio_child(vsc));
        u_int limit;

        limit = sc->sc_tx_intr_process_limit;

        if (atomic_load_relaxed(&txq->txq_active) == true)
                return 1;

        mutex_enter(txq->txq_lock);

        if (!txq->txq_stopping) {
                txq->txq_workqueue = sc->sc_txrx_workqueue_sysctl;

                virtio_stop_vq_intr(vsc, vq);
                atomic_store_relaxed(&txq->txq_active, true);

                vioif_tx_handle_locked(txq, limit);
        }

        mutex_exit(txq->txq_lock);

        return 1;
}

static void
vioif_tx_handle(void *xtxq)
{
        struct vioif_txqueue *txq = xtxq;
        struct virtqueue *vq = txq->txq_vq;
        struct virtio_softc *vsc = vq->vq_owner;
        struct vioif_softc *sc = device_private(virtio_child(vsc));
        u_int limit;

        limit = sc->sc_tx_process_limit;

        mutex_enter(txq->txq_lock);
        if (!txq->txq_stopping)
                vioif_tx_handle_locked(txq, limit);
        mutex_exit(txq->txq_lock);
}

static void
vioif_tx_sched_handle(struct vioif_softc *sc, struct vioif_txqueue *txq)
{

        KASSERT(mutex_owned(txq->txq_lock));

        if (txq->txq_stopping)
                return;

        if (txq->txq_workqueue)
                vioif_work_add(sc->sc_txrx_workqueue, &txq->txq_work);
        else
                softint_schedule(txq->txq_handle_si);
}

static void
vioif_tx_queue_clear(struct vioif_txqueue *txq)
{
        struct virtqueue *vq = txq->txq_vq;
        struct virtio_softc *vsc = vq->vq_owner;
        struct vioif_softc *sc = device_private(virtio_child(vsc));
        u_int limit = UINT_MAX;
        bool more;

        mutex_enter(txq->txq_lock);
        for (;;) {
                more = vioif_tx_deq_locked(sc, vsc, txq, limit);
                if (more == false)
                        break;
        }
        mutex_exit(txq->txq_lock);
}

static bool
vioif_tx_deq_locked(struct vioif_softc *sc, struct virtio_softc *vsc,
    struct vioif_txqueue *txq, u_int limit)
{
        struct virtqueue *vq = txq->txq_vq;
        struct ifnet *ifp = &sc->sc_ethercom.ec_if;
        struct mbuf *m;
        int slot, len;
        bool more = false;

        KASSERT(mutex_owned(txq->txq_lock));

        if (virtio_vq_is_enqueued(vsc, vq) == false)
                return false;

        for (;;) {
                if (limit-- == 0) {
                        more = true;
                        break;
                }

                if (virtio_dequeue(vsc, vq, &slot, &len) != 0)
                        break;

                bus_dmamap_sync(virtio_dmat(vsc), txq->txq_hdr_dmamaps[slot],
                    0, sc->sc_hdr_size, BUS_DMASYNC_POSTWRITE);
                bus_dmamap_sync(virtio_dmat(vsc), txq->txq_dmamaps[slot],
                    0, txq->txq_dmamaps[slot]->dm_mapsize,
                    BUS_DMASYNC_POSTWRITE);
                m = txq->txq_mbufs[slot];
                bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[slot]);
                txq->txq_mbufs[slot] = NULL;
                virtio_dequeue_commit(vsc, vq, slot);
                if_statinc(ifp, if_opackets);
                m_freem(m);
        }

        return more;
}

/* free all the mbufs already put on vq; called from if_stop(disable) */
static void
vioif_tx_drain(struct vioif_txqueue *txq)
{
        struct virtqueue *vq = txq->txq_vq;
        struct virtio_softc *vsc = vq->vq_owner;
        int i;

        KASSERT(txq->txq_stopping);

        for (i = 0; i < vq->vq_num; i++) {
                if (txq->txq_mbufs[i] == NULL)
                        continue;
                bus_dmamap_unload(virtio_dmat(vsc), txq->txq_dmamaps[i]);
                m_freem(txq->txq_mbufs[i]);
                txq->txq_mbufs[i] = NULL;
        }
}

/*
 * Control vq
 */
/* issue a VIRTIO_NET_CTRL_RX class command and wait for completion */
static void
vioif_ctrl_acquire(struct vioif_softc *sc)
{
        struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;

        mutex_enter(&ctrlq->ctrlq_wait_lock);
        while (ctrlq->ctrlq_inuse != FREE)
                cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
        ctrlq->ctrlq_inuse = INUSE;
        ctrlq->ctrlq_owner = curlwp;
        mutex_exit(&ctrlq->ctrlq_wait_lock);
}

static void
vioif_ctrl_release(struct vioif_softc *sc)
{
        struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;

        KASSERT(ctrlq->ctrlq_inuse != FREE);
        KASSERT(ctrlq->ctrlq_owner == curlwp);

        mutex_enter(&ctrlq->ctrlq_wait_lock);
        ctrlq->ctrlq_inuse = FREE;
        ctrlq->ctrlq_owner = NULL;
        cv_signal(&ctrlq->ctrlq_wait);
        mutex_exit(&ctrlq->ctrlq_wait_lock);
}

static int
vioif_ctrl_load_cmdspec(struct vioif_softc *sc,
    struct vioif_ctrl_cmdspec *specs, int nspecs)
{
        struct virtio_softc *vsc = sc->sc_virtio;
        int i, r, loaded;

        loaded = 0;
        for (i = 0; i < nspecs; i++) {
                r = bus_dmamap_load(virtio_dmat(vsc),
                    specs[i].dmamap, specs[i].buf, specs[i].bufsize,
                    NULL, BUS_DMA_WRITE | BUS_DMA_NOWAIT);
                if (r) {
                        sc->sc_ctrlq.ctrlq_cmd_load_failed.ev_count++;
                        goto err;
                }
                loaded++;

        }

        return r;

err:
        for (i = 0; i < loaded; i++) {
                bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
        }

        return r;
}

static void
vioif_ctrl_unload_cmdspec(struct vioif_softc *sc,
    struct vioif_ctrl_cmdspec *specs, int nspecs)
{
        struct virtio_softc *vsc = sc->sc_virtio;
        int i;

        for (i = 0; i < nspecs; i++) {
                bus_dmamap_unload(virtio_dmat(vsc), specs[i].dmamap);
        }
}

static int
vioif_ctrl_send_command(struct vioif_softc *sc, uint8_t class, uint8_t cmd,
    struct vioif_ctrl_cmdspec *specs, int nspecs)
{
        struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
        struct virtqueue *vq = ctrlq->ctrlq_vq;
        struct virtio_softc *vsc = sc->sc_virtio;
        int i, r, slot;

        ctrlq->ctrlq_cmd->class = class;
        ctrlq->ctrlq_cmd->command = cmd;

        bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap,
            0, sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_PREWRITE);
        for (i = 0; i < nspecs; i++) {
                bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap,
                    0, specs[i].bufsize, BUS_DMASYNC_PREWRITE);
        }
        bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap,
            0, sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_PREREAD);

        /* we need to explicitly (re)start vq intr when using RING EVENT IDX */
        if (virtio_features(vsc) & VIRTIO_F_RING_EVENT_IDX)
                virtio_start_vq_intr(vsc, ctrlq->ctrlq_vq);

        r = virtio_enqueue_prep(vsc, vq, &slot);
        if (r != 0)
                panic("%s: control vq busy!?", device_xname(sc->sc_dev));
        r = virtio_enqueue_reserve(vsc, vq, slot, nspecs + 2);
        if (r != 0)
                panic("%s: control vq busy!?", device_xname(sc->sc_dev));
        virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_cmd_dmamap, true);
        for (i = 0; i < nspecs; i++) {
                virtio_enqueue(vsc, vq, slot, specs[i].dmamap, true);
        }
        virtio_enqueue(vsc, vq, slot, ctrlq->ctrlq_status_dmamap, false);
        virtio_enqueue_commit(vsc, vq, slot, true);

        /* wait for done */
        mutex_enter(&ctrlq->ctrlq_wait_lock);
        while (ctrlq->ctrlq_inuse != DONE)
                cv_wait(&ctrlq->ctrlq_wait, &ctrlq->ctrlq_wait_lock);
        mutex_exit(&ctrlq->ctrlq_wait_lock);
        /* already dequeueued */

        bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_cmd_dmamap, 0,
            sizeof(struct virtio_net_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
        for (i = 0; i < nspecs; i++) {
                bus_dmamap_sync(virtio_dmat(vsc), specs[i].dmamap, 0,
                    specs[i].bufsize, BUS_DMASYNC_POSTWRITE);
        }
        bus_dmamap_sync(virtio_dmat(vsc), ctrlq->ctrlq_status_dmamap, 0,
            sizeof(struct virtio_net_ctrl_status), BUS_DMASYNC_POSTREAD);

        if (ctrlq->ctrlq_status->ack == VIRTIO_NET_OK)
                r = 0;
        else {
                device_printf(sc->sc_dev, "failed setting rx mode\n");
                sc->sc_ctrlq.ctrlq_cmd_failed.ev_count++;
                r = EIO;
        }

        return r;
}

static int
vioif_ctrl_rx(struct vioif_softc *sc, int cmd, bool onoff)
{
        struct virtio_net_ctrl_rx *rx = sc->sc_ctrlq.ctrlq_rx;
        struct vioif_ctrl_cmdspec specs[1];
        int r;

        if (!sc->sc_has_ctrl)
                return ENOTSUP;

        vioif_ctrl_acquire(sc);

        rx->onoff = onoff;
        specs[0].dmamap = sc->sc_ctrlq.ctrlq_rx_dmamap;
        specs[0].buf = rx;
        specs[0].bufsize = sizeof(*rx);

        r = vioif_ctrl_send_command(sc, VIRTIO_NET_CTRL_RX, cmd,
            specs, __arraycount(specs));

        vioif_ctrl_release(sc);
        return r;
}

static int
vioif_set_promisc(struct vioif_softc *sc, bool onoff)
{
        return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_PROMISC, onoff);
}

static int
vioif_set_allmulti(struct vioif_softc *sc, bool onoff)
{
        return vioif_ctrl_rx(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, onoff);
}

/* issue VIRTIO_NET_CTRL_MAC_TABLE_SET command and wait for completion */
static int
vioif_set_rx_filter(struct vioif_softc *sc)
{
        /* filter already set in ctrlq->ctrlq_mac_tbl */
        struct virtio_softc *vsc = sc->sc_virtio;
        struct virtio_net_ctrl_mac_tbl *mac_tbl_uc, *mac_tbl_mc;
        struct vioif_ctrl_cmdspec specs[2];
        int nspecs = __arraycount(specs);
        int r;

        mac_tbl_uc = sc->sc_ctrlq.ctrlq_mac_tbl_uc;
        mac_tbl_mc = sc->sc_ctrlq.ctrlq_mac_tbl_mc;

        if (!sc->sc_has_ctrl)
                return ENOTSUP;

        vioif_ctrl_acquire(sc);

        specs[0].dmamap = sc->sc_ctrlq.ctrlq_tbl_uc_dmamap;
        specs[0].buf = mac_tbl_uc;
        specs[0].bufsize = sizeof(*mac_tbl_uc)
            + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_uc->nentries));

        specs[1].dmamap = sc->sc_ctrlq.ctrlq_tbl_mc_dmamap;
        specs[1].buf = mac_tbl_mc;
        specs[1].bufsize = sizeof(*mac_tbl_mc)
            + (ETHER_ADDR_LEN * virtio_rw32(vsc, mac_tbl_mc->nentries));

        r = vioif_ctrl_load_cmdspec(sc, specs, nspecs);
        if (r != 0)
                goto out;

        r = vioif_ctrl_send_command(sc,
            VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_TABLE_SET,
            specs, nspecs);

        vioif_ctrl_unload_cmdspec(sc, specs, nspecs);

out:
        vioif_ctrl_release(sc);

        return r;
}

static int
vioif_set_mac_addr(struct vioif_softc *sc)
{
        struct virtio_net_ctrl_mac_addr *ma =
            sc->sc_ctrlq.ctrlq_mac_addr;
        struct vioif_ctrl_cmdspec specs[1];
        struct ifnet *ifp = &sc->sc_ethercom.ec_if;
        int nspecs = __arraycount(specs);
        uint64_t features;
        int r;
        size_t i;

        if (!sc->sc_has_ctrl)
                return ENOTSUP;

        if (memcmp(CLLADDR(ifp->if_sadl), sc->sc_mac,
            ETHER_ADDR_LEN) == 0) {
                return 0;
        }

        memcpy(sc->sc_mac, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);

        features = virtio_features(sc->sc_virtio);
        if (features & VIRTIO_NET_F_CTRL_MAC_ADDR) {
                vioif_ctrl_acquire(sc);

                memcpy(ma->mac, sc->sc_mac, ETHER_ADDR_LEN);
                specs[0].dmamap = sc->sc_ctrlq.ctrlq_mac_addr_dmamap;
                specs[0].buf = ma;
                specs[0].bufsize = sizeof(*ma);

                r = vioif_ctrl_send_command(sc,
                    VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET,
                    specs, nspecs);

                vioif_ctrl_release(sc);
        } else {
                for (i = 0; i < __arraycount(sc->sc_mac); i++) {
                        virtio_write_device_config_1(sc->sc_virtio,
                            VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
                }
                r = 0;
        }

        return r;
}

static int
vioif_ctrl_mq_vq_pairs_set(struct vioif_softc *sc, int nvq_pairs)
{
        struct virtio_net_ctrl_mq *mq = sc->sc_ctrlq.ctrlq_mq;
        struct vioif_ctrl_cmdspec specs[1];
        int r;

        if (!sc->sc_has_ctrl)
                return ENOTSUP;

        if (nvq_pairs <= 1)
                return EINVAL;

        vioif_ctrl_acquire(sc);

        mq->virtqueue_pairs = virtio_rw16(sc->sc_virtio, nvq_pairs);
        specs[0].dmamap = sc->sc_ctrlq.ctrlq_mq_dmamap;
        specs[0].buf = mq;
        specs[0].bufsize = sizeof(*mq);

        r = vioif_ctrl_send_command(sc,
            VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET,
            specs, __arraycount(specs));

        vioif_ctrl_release(sc);

        return r;
}

/* ctrl vq interrupt; wake up the command issuer */
static int
vioif_ctrl_intr(void *arg)
{
        struct vioif_ctrlqueue *ctrlq = arg;
        struct virtqueue *vq = ctrlq->ctrlq_vq;
        struct virtio_softc *vsc = vq->vq_owner;
        int r, slot;

        if (virtio_vq_is_enqueued(vsc, vq) == false)
                return 0;

        r = virtio_dequeue(vsc, vq, &slot, NULL);
        if (r == ENOENT)
                return 0;
        virtio_dequeue_commit(vsc, vq, slot);

        mutex_enter(&ctrlq->ctrlq_wait_lock);
        ctrlq->ctrlq_inuse = DONE;
        cv_signal(&ctrlq->ctrlq_wait);
        mutex_exit(&ctrlq->ctrlq_wait_lock);

        return 1;
}

static int
vioif_ifflags(struct vioif_softc *sc)
{
        struct ifnet *ifp = &sc->sc_ethercom.ec_if;
        bool onoff;
        int r;

        if (!sc->sc_has_ctrl) {
                /* no ctrl vq; always promisc and allmulti */
                ifp->if_flags |= (IFF_PROMISC | IFF_ALLMULTI);
                return 0;
        }

        onoff = ifp->if_flags & IFF_ALLMULTI ? true : false;
        r = vioif_set_allmulti(sc, onoff);
        if (r != 0) {
                log(LOG_WARNING,
                    "%s: couldn't %sable ALLMULTI\n",
                    ifp->if_xname, onoff ? "en" : "dis");
                if (onoff == false) {
                        ifp->if_flags |= IFF_ALLMULTI;
                }
        }

        onoff = ifp->if_flags & IFF_PROMISC ? true : false;
        r = vioif_set_promisc(sc, onoff);
        if (r != 0) {
                log(LOG_WARNING,
                    "%s: couldn't %sable PROMISC\n",
                    ifp->if_xname, onoff ? "en" : "dis");
                if (onoff == false) {
                        ifp->if_flags |= IFF_PROMISC;
                }
        }

        return 0;
}

static int
vioif_ifflags_cb(struct ethercom *ec)
{
        struct ifnet *ifp = &ec->ec_if;
        struct vioif_softc *sc = ifp->if_softc;

        return vioif_ifflags(sc);
}

/*
 * If multicast filter small enough (<=MAXENTRIES) set rx filter
 * If large multicast filter exist use ALLMULTI
 * If setting rx filter fails fall back to ALLMULTI
 */
static int
vioif_rx_filter(struct vioif_softc *sc)
{
        struct virtio_softc *vsc = sc->sc_virtio;
        struct ethercom *ec = &sc->sc_ethercom;
        struct ifnet *ifp = &ec->ec_if;
        struct ether_multi *enm;
        struct ether_multistep step;
        struct vioif_ctrlqueue *ctrlq = &sc->sc_ctrlq;
        int nentries;
        bool allmulti = 0;
        int r;

        if (!sc->sc_has_ctrl) {
                goto set_ifflags;
        }

        memcpy(ctrlq->ctrlq_mac_tbl_uc->macs[0],
            CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);

        nentries = 0;
        allmulti = false;

        ETHER_LOCK(ec);
        for (ETHER_FIRST_MULTI(step, ec, enm); enm != NULL;
            ETHER_NEXT_MULTI(step, enm)) {
                if (nentries >= VIRTIO_NET_CTRL_MAC_MAXENTRIES) {
                        allmulti = true;
                        break;
                }
                if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
                        allmulti = true;
                        break;
                }

                memcpy(ctrlq->ctrlq_mac_tbl_mc->macs[nentries],
                    enm->enm_addrlo, ETHER_ADDR_LEN);
                nentries++;
        }
        ETHER_UNLOCK(ec);

        r = vioif_set_mac_addr(sc);
        if (r != 0) {
                log(LOG_WARNING, "%s: couldn't set MAC address\n",
                    ifp->if_xname);
        }

        if (!allmulti) {
                ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 1);
                ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, nentries);
                r = vioif_set_rx_filter(sc);
                if (r != 0) {
                        allmulti = true; /* fallback */
                }
        }

        if (allmulti) {
                ctrlq->ctrlq_mac_tbl_uc->nentries = virtio_rw32(vsc, 0);
                ctrlq->ctrlq_mac_tbl_mc->nentries = virtio_rw32(vsc, 0);
                r = vioif_set_rx_filter(sc);
                if (r != 0) {
                        log(LOG_DEBUG, "%s: couldn't clear RX filter\n",
                            ifp->if_xname);
                        /* what to do on failure? */
                }

                ifp->if_flags |= IFF_ALLMULTI;
        }

set_ifflags:
        r = vioif_ifflags(sc);

        return r;
}

static bool
vioif_is_link_up(struct vioif_softc *sc)
{
        struct virtio_softc *vsc = sc->sc_virtio;
        uint16_t status;

        if (virtio_features(vsc) & VIRTIO_NET_F_STATUS)
                status = virtio_read_device_config_2(vsc,
                    VIRTIO_NET_CONFIG_STATUS);
        else
                status = VIRTIO_NET_S_LINK_UP;

        return ((status & VIRTIO_NET_S_LINK_UP) != 0);
}

/* change link status */
static void
vioif_update_link_status(struct vioif_softc *sc)
{
        struct ifnet *ifp = &sc->sc_ethercom.ec_if;
        struct vioif_txqueue *txq;
        bool active, changed;
        int link, i;

        mutex_enter(&sc->sc_lock);

        active = vioif_is_link_up(sc);
        changed = false;

        if (active) {
                if (!sc->sc_link_active)
                        changed = true;

                link = LINK_STATE_UP;
                sc->sc_link_active = true;
        } else {
                if (sc->sc_link_active)
                        changed = true;

                link = LINK_STATE_DOWN;
                sc->sc_link_active = false;
        }

        if (changed) {
                for (i = 0; i < sc->sc_act_nvq_pairs; i++) {
                        txq = &sc->sc_txq[i];

                        mutex_enter(txq->txq_lock);
                        txq->txq_link_active = sc->sc_link_active;
                        mutex_exit(txq->txq_lock);
                }

                if_link_state_change(ifp, link);
        }

        mutex_exit(&sc->sc_lock);
}

static int
vioif_config_change(struct virtio_softc *vsc)
{
        struct vioif_softc *sc = device_private(virtio_child(vsc));

        softint_schedule(sc->sc_ctl_softint);
        return 0;
}

static void
vioif_ctl_softint(void *arg)
{
        struct vioif_softc *sc = arg;
        struct ifnet *ifp = &sc->sc_ethercom.ec_if;

        vioif_update_link_status(sc);
        vioif_start(ifp);
}

static struct workqueue *
vioif_workq_create(const char *name, pri_t prio, int ipl, int flags)
{
        struct workqueue *wq;
        int error;

        error = workqueue_create(&wq, name, vioif_workq_work, NULL,
            prio, ipl, flags);

        if (error)
                return NULL;

        return wq;
}

static void
vioif_workq_destroy(struct workqueue *wq)
{

        workqueue_destroy(wq);
}

static void
vioif_workq_work(struct work *wk, void *context)
{
        struct vioif_work *work;

        work = container_of(wk, struct vioif_work, cookie);

        atomic_store_relaxed(&work->added, 0);
        work->func(work->arg);
}

static void
vioif_work_set(struct vioif_work *work, void (*func)(void *), void *arg)
{

        memset(work, 0, sizeof(*work));
        work->func = func;
        work->arg = arg;
}

static void
vioif_work_add(struct workqueue *wq, struct vioif_work *work)
{

        if (atomic_load_relaxed(&work->added) != 0)
                return;

        atomic_store_relaxed(&work->added, 1);
        kpreempt_disable();
        workqueue_enqueue(wq, &work->cookie, NULL);
        kpreempt_enable();
}

static void
vioif_work_wait(struct workqueue *wq, struct vioif_work *work)
{

        workqueue_wait(wq, &work->cookie);
}

static int
vioif_setup_sysctl(struct vioif_softc *sc)
{
        const char *devname;
        struct sysctllog **log;
        const struct sysctlnode *rnode, *rxnode, *txnode;
        int error;

        log = &sc->sc_sysctllog;
        devname = device_xname(sc->sc_dev);

        error = sysctl_createv(log, 0, NULL, &rnode,
            0, CTLTYPE_NODE, devname,
            SYSCTL_DESCR("virtio-net information and settings"),
            NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);
        if (error)
                goto out;

        error = sysctl_createv(log, 0, &rnode, NULL,
            CTLFLAG_READWRITE, CTLTYPE_BOOL, "txrx_workqueue",
            SYSCTL_DESCR("Use workqueue for packet processing"),
            NULL, 0, &sc->sc_txrx_workqueue_sysctl, 0, CTL_CREATE, CTL_EOL);
        if (error)
                goto out;

        error = sysctl_createv(log, 0, &rnode, &rxnode,
            0, CTLTYPE_NODE, "rx",
            SYSCTL_DESCR("virtio-net information and settings for Rx"),
            NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
        if (error)
                goto out;

        error = sysctl_createv(log, 0, &rxnode, NULL,
            CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
            SYSCTL_DESCR("max number of Rx packets to process for interrupt processing"),
            NULL, 0, &sc->sc_rx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
        if (error)
                goto out;

        error = sysctl_createv(log, 0, &rxnode, NULL,
            CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
            SYSCTL_DESCR("max number of Rx packets to process for deferred processing"),
            NULL, 0, &sc->sc_rx_process_limit, 0, CTL_CREATE, CTL_EOL);
        if (error)
                goto out;

        error = sysctl_createv(log, 0, &rnode, &txnode,
            0, CTLTYPE_NODE, "tx",
            SYSCTL_DESCR("virtio-net information and settings for Tx"),
            NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
        if (error)
                goto out;

        error = sysctl_createv(log, 0, &txnode, NULL,
            CTLFLAG_READWRITE, CTLTYPE_INT, "intr_process_limit",
            SYSCTL_DESCR("max number of Tx packets to process for interrupt processing"),
            NULL, 0, &sc->sc_tx_intr_process_limit, 0, CTL_CREATE, CTL_EOL);
        if (error)
                goto out;

        error = sysctl_createv(log, 0, &txnode, NULL,
            CTLFLAG_READWRITE, CTLTYPE_INT, "process_limit",
            SYSCTL_DESCR("max number of Tx packets to process for deferred processing"),
            NULL, 0, &sc->sc_tx_process_limit, 0, CTL_CREATE, CTL_EOL);

out:
        if (error)
                sysctl_teardown(log);

        return error;
}

static void
vioif_setup_stats(struct vioif_softc *sc)
{
        struct vioif_rxqueue *rxq;
        struct vioif_txqueue *txq;
        int i;

        for (i = 0; i < sc->sc_max_nvq_pairs; i++) {
                rxq = &sc->sc_rxq[i];
                txq = &sc->sc_txq[i];

                snprintf(txq->txq_evgroup, sizeof(txq->txq_evgroup), "%s-TX%d",
                    device_xname(sc->sc_dev), i);
                evcnt_attach_dynamic(&txq->txq_defrag_failed, EVCNT_TYPE_MISC,
                    NULL, txq->txq_evgroup, "tx m_defrag() failed");
                evcnt_attach_dynamic(&txq->txq_mbuf_load_failed, EVCNT_TYPE_MISC,
                    NULL, txq->txq_evgroup, "tx dmamap load failed");
                evcnt_attach_dynamic(&txq->txq_enqueue_reserve_failed, EVCNT_TYPE_MISC,
                    NULL, txq->txq_evgroup, "virtio_enqueue_reserve failed");

                snprintf(rxq->rxq_evgroup, sizeof(rxq->rxq_evgroup), "%s-RX%d",
                    device_xname(sc->sc_dev), i);
                evcnt_attach_dynamic(&rxq->rxq_mbuf_add_failed, EVCNT_TYPE_MISC,
                    NULL, rxq->rxq_evgroup, "rx mbuf allocation failed");
        }

        evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_load_failed, EVCNT_TYPE_MISC,
            NULL, device_xname(sc->sc_dev), "control command dmamap load failed");
        evcnt_attach_dynamic(&sc->sc_ctrlq.ctrlq_cmd_failed, EVCNT_TYPE_MISC,
            NULL, device_xname(sc->sc_dev), "control command failed");
}

MODULE(MODULE_CLASS_DRIVER, if_vioif, "virtio");

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
if_vioif_modcmd(modcmd_t cmd, void *opaque)
{
        int error = 0;

#ifdef _MODULE
        switch (cmd) {
        case MODULE_CMD_INIT:
                error = config_init_component(cfdriver_ioconf_if_vioif,
                    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
                break;
        case MODULE_CMD_FINI:
                error = config_fini_component(cfdriver_ioconf_if_vioif,
                    cfattach_ioconf_if_vioif, cfdata_ioconf_if_vioif);
                break;
        default:
                error = ENOTTY;
                break;
        }
#endif

        return error;
}















































































































































    2 

    2 
    2 

    2 






    2 



















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
/*-
 * Copyright (c) 2019-2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_params.c,v 1.5 2022/04/28 17:28:15 martin Exp $");

#include <sys/param.h>
#include <sys/types.h>

#include <sys/kmem.h>
#include <sys/queue.h>
#include <sys/thmap.h>
#endif

#include "npf_impl.h"

typedef struct npf_paramreg {
        struct npf_paramreg *        next;
        unsigned                count;
        npf_param_t                params[];
} npf_paramreg_t;

struct npf_paraminfo {
        npf_paramreg_t *        list;
        thmap_t *                map;
};

static inline void
npf_param_general_register(npf_t *npf)
{
        npf_param_t param_map[] = {
                {
                        "ip4.reassembly",
                        &npf->ip4_reassembly,
                        .default_val = 0, // false
                        .min = 0, .max = 1
                },
                {
                        "ip6.reassembly",
                        &npf->ip6_reassembly,
                        .default_val = 0, // false
                        .min = 0, .max = 1
                },
        };
        npf_param_register(npf, param_map, __arraycount(param_map));
}

static uintptr_t
npf_param_thmap_alloc(size_t len)
{
        return (uintptr_t)kmem_alloc(len, KM_SLEEP);
}

static void
npf_param_thmap_free(uintptr_t addr, size_t len)  
{
        kmem_free((void *)addr, len);
} 

static const thmap_ops_t npf_param_thmap_ops = {
        .alloc = npf_param_thmap_alloc,
        .free = npf_param_thmap_free
};

void
npf_param_init(npf_t *npf)
{
        npf_paraminfo_t *paraminfo;

        paraminfo = kmem_zalloc(sizeof(npf_paraminfo_t), KM_SLEEP);
        paraminfo->map = thmap_create(0, &npf_param_thmap_ops, THMAP_NOCOPY);
        npf->paraminfo = paraminfo;

        /* Register some general parameters. */
        npf_param_general_register(npf);
}

void
npf_param_fini(npf_t *npf)
{
        npf_paraminfo_t *pinfo = npf->paraminfo;
        npf_paramreg_t *paramreg = pinfo->list;

        while (paramreg) {
                npf_param_t *plist = paramreg->params;
                npf_paramreg_t *next = paramreg->next;
                size_t len;

                /* Remove the parameters from the map. */
                for (unsigned i = 0; i < paramreg->count; i++) {
                        npf_param_t *param = &plist[i];
                        const char *name = param->name;
                        void *ret __diagused;

                        ret = thmap_del(pinfo->map, name, strlen(name));
                        KASSERT(ret != NULL);
                }

                /* Destroy this registry. */
                len = offsetof(npf_paramreg_t, params[paramreg->count]);
                kmem_free(paramreg, len);

                /* Next .. */
                paramreg = next;
        }
        thmap_destroy(pinfo->map);
        kmem_free(pinfo, sizeof(npf_paraminfo_t));
}

int
npf_params_export(const npf_t *npf, nvlist_t *nv)
{
        nvlist_t *params, *dparams;

        /*
         * Export both the active and default values.  The latter are to
         * accommodate npfctl so it could distinguish what has been set.
         */
        params = nvlist_create(0);
        dparams = nvlist_create(0);
        for (npf_paramreg_t *pr = npf->paraminfo->list; pr; pr = pr->next) {
                for (unsigned i = 0; i < pr->count; i++) {
                        const npf_param_t *param = &pr->params[i];
                        const uint64_t val = *param->valp;
                        const uint64_t defval = param->default_val;

                        nvlist_add_number(params, param->name, val);
                        nvlist_add_number(dparams, param->name, defval);
                }
        }
        nvlist_add_nvlist(nv, "params", params);
        nvlist_add_nvlist(nv, "params-defaults", dparams);
        return 0;
}

void *
npf_param_allocgroup(npf_t *npf, npf_paramgroup_t group, size_t len)
{
        void *params = kmem_zalloc(len, KM_SLEEP);
        npf->params[group] = params;
        return params;
}

void
npf_param_freegroup(npf_t *npf, npf_paramgroup_t group, size_t len)
{
        kmem_free(npf->params[group], len);
        npf->params[group] = NULL; // diagnostic
}

/*
 * npf_param_register: register an array of named parameters.
 */
void
npf_param_register(npf_t *npf, npf_param_t *params, unsigned count)
{
        npf_paraminfo_t *pinfo = npf->paraminfo;
        npf_paramreg_t *paramreg;
        size_t len;

        /*
         * Copy over the parameters.
         */
        len = offsetof(npf_paramreg_t, params[count]);
        paramreg = kmem_zalloc(len, KM_SLEEP);
        memcpy(paramreg->params, params, sizeof(npf_param_t) * count);
        paramreg->count = count;
        params = NULL; // dead

        /*
         * Map the parameter names to the variables.
         * Assign the default values.
         */
        for (unsigned i = 0; i < count; i++) {
                npf_param_t *param = &paramreg->params[i];
                const char *name = param->name;
                void *ret __diagused;

                ret = thmap_put(pinfo->map, name, strlen(name), param);
                KASSERTMSG(ret == param,
                    "parameter insertion failed: ret=%p, param=%p",
                    ret, param);

                /* Assign the default value. */
                KASSERT(param->default_val >= param->min);
                KASSERT(param->default_val <= param->max);
                *param->valp = param->default_val;
        }

        /* Insert the registry of params into the list. */
        paramreg->next = pinfo->list;
        pinfo->list = paramreg;
}

/*
 * NPF param API.
 */

static npf_param_t *
npf_param_lookup(npf_t *npf, const char *name)
{
        npf_paraminfo_t *pinfo = npf->paraminfo;
        const size_t namelen = strlen(name);
        return thmap_get(pinfo->map, name, namelen);
}

int
npf_param_check(npf_t *npf, const char *name, int val)
{
        npf_param_t *param;

        if ((param = npf_param_lookup(npf, name)) == NULL) {
                return ENOENT;
        }
        if (val < param->min || val > param->max) {
                return EINVAL;
        }
        return 0;
}

__dso_public int
npfk_param_get(npf_t *npf, const char *name, int *val)
{
        npf_param_t *param;

        if ((param = npf_param_lookup(npf, name)) == NULL) {
                return ENOENT;
        }
        *val = *param->valp;
        return 0;
}

__dso_public int
npfk_param_set(npf_t *npf, const char *name, int val)
{
        npf_param_t *param;

        if ((param = npf_param_lookup(npf, name)) == NULL) {
                return ENOENT;
        }
        if (val < param->min || val > param->max) {
                return EINVAL;
        }
        *param->valp = val;
        return 0;
}


































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 

    1 













































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
/* $NetBSD: hdaudio.c,v 1.18 2022/04/07 19:33:37 andvar Exp $ */

/*
 * Copyright (c) 2009 Precedence Technologies Ltd <support@precedence.co.uk>
 * Copyright (c) 2009 Jared D. McNeill <jmcneill@invisible.ca>
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Precedence Technologies Ltd
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: hdaudio.c,v 1.18 2022/04/07 19:33:37 andvar Exp $");

#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/conf.h>
#include <sys/bus.h>
#include <sys/kmem.h>
#include <sys/module.h>

#include "hdaudiovar.h"
#include "hdaudioreg.h"
#include "hdaudioio.h"
#include "hdaudio_verbose.h"
#include "hdaudiodevs.h"

/* #define        HDAUDIO_DEBUG */

#define        HDAUDIO_RESET_TIMEOUT        5000
#define HDAUDIO_CORB_TIMEOUT        1000
#define        HDAUDIO_RIRB_TIMEOUT        5000

#define        HDAUDIO_CODEC_DELAY        1000        /* spec calls for 250 */

dev_type_open(hdaudioopen);
dev_type_close(hdaudioclose);
dev_type_ioctl(hdaudioioctl);

const struct cdevsw hdaudio_cdevsw = {
        .d_open = hdaudioopen,
        .d_close = hdaudioclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = hdaudioioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

extern struct cfdriver hdaudio_cd;

#define        HDAUDIOUNIT(x)        minor((x))

static void
hdaudio_stream_init(struct hdaudio_softc *sc, int nis, int nos, int nbidir)
{
        int i, cnt = 0;

        for (i = 0; i < nis && cnt < HDAUDIO_MAX_STREAMS; i++) {
                sc->sc_stream[cnt].st_host = sc;
                sc->sc_stream[cnt].st_enable = true;
                sc->sc_stream[cnt].st_shift = cnt;
                sc->sc_stream[cnt++].st_type = HDAUDIO_STREAM_ISS;
        }
        for (i = 0; i < nos && cnt < HDAUDIO_MAX_STREAMS; i++) {
                sc->sc_stream[cnt].st_host = sc;
                sc->sc_stream[cnt].st_enable = true;
                sc->sc_stream[cnt].st_shift = cnt;
                sc->sc_stream[cnt++].st_type = HDAUDIO_STREAM_OSS;
        }
        for (i = 0; i < nbidir && cnt < HDAUDIO_MAX_STREAMS; i++) {
                sc->sc_stream[cnt].st_host = sc;
                sc->sc_stream[cnt].st_enable = true;
                sc->sc_stream[cnt].st_shift = cnt;
                sc->sc_stream[cnt++].st_type = HDAUDIO_STREAM_BSS;
        }

        for (i = 0; i < cnt; i++)
                hdaudio_stream_stop(&sc->sc_stream[i]);

        sc->sc_stream_mask = 0;
}

static void
hdaudio_codec_init(struct hdaudio_softc *sc)
{
        int i;

        for (i = 0; i < HDAUDIO_MAX_CODECS; i++) {
                sc->sc_codec[i].co_addr = i;
                sc->sc_codec[i].co_host = sc;
        }
}

static void
hdaudio_init(struct hdaudio_softc *sc)
{
        const uint8_t vmaj = hda_read1(sc, HDAUDIO_MMIO_VMAJ);
        const uint8_t vmin = hda_read1(sc, HDAUDIO_MMIO_VMIN);
        const uint16_t gcap = hda_read2(sc, HDAUDIO_MMIO_GCAP);
        const int nis = HDAUDIO_GCAP_ISS(gcap);
        const int nos = HDAUDIO_GCAP_OSS(gcap);
        const int nbidir = HDAUDIO_GCAP_BSS(gcap);
        const int nsdo = HDAUDIO_GCAP_NSDO(gcap);
        const int addr64 = HDAUDIO_GCAP_64OK(gcap);

        hda_print(sc, "HDA ver. %d.%d, OSS %d, ISS %d, BSS %d, SDO %d%s\n",
            vmaj, vmin, nos, nis, nbidir, nsdo, addr64 ? ", 64-bit" : "");

        /* Initialize codecs and streams */
        hdaudio_codec_init(sc);
        hdaudio_stream_init(sc, nis, nos, nbidir);
}

static int
hdaudio_codec_probe(struct hdaudio_softc *sc)
{
        uint16_t statests;
        int codecid;

        statests = hda_read2(sc, HDAUDIO_MMIO_STATESTS);
        for (codecid = 0; codecid < HDAUDIO_MAX_CODECS; codecid++)
                if (statests & (1 << codecid))
                        sc->sc_codec[codecid].co_valid = true;
        hda_write2(sc, HDAUDIO_MMIO_STATESTS, statests);

        return statests;
}

int
hdaudio_dma_alloc(struct hdaudio_softc *sc, struct hdaudio_dma *dma,
    int flags)
{
        int err;

        KASSERT(dma->dma_size > 0);

        err = bus_dmamem_alloc(sc->sc_dmat, dma->dma_size, 128, 0,
            dma->dma_segs, sizeof(dma->dma_segs) / sizeof(dma->dma_segs[0]),
            &dma->dma_nsegs, BUS_DMA_WAITOK);
        if (err)
                return err;
        err = bus_dmamem_map(sc->sc_dmat, dma->dma_segs, dma->dma_nsegs,
            dma->dma_size, &dma->dma_addr, BUS_DMA_WAITOK | flags);
        if (err)
                goto free;
        err = bus_dmamap_create(sc->sc_dmat, dma->dma_size, dma->dma_nsegs,
            dma->dma_size, 0, BUS_DMA_WAITOK, &dma->dma_map);
        if (err)
                goto unmap;
        err = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_addr,
            dma->dma_size, NULL, BUS_DMA_WAITOK | flags);
        if (err)
                goto destroy;

        memset(dma->dma_addr, 0, dma->dma_size);
        bus_dmamap_sync(sc->sc_dmat, dma->dma_map, 0, dma->dma_size,
            BUS_DMASYNC_PREWRITE);

        dma->dma_valid = true;
        return 0;

destroy:
        bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);        
unmap:
        bus_dmamem_unmap(sc->sc_dmat, dma->dma_addr, dma->dma_size);
free:
        bus_dmamem_free(sc->sc_dmat, dma->dma_segs, dma->dma_nsegs);

        dma->dma_valid = false;
        return err;
}

void
hdaudio_dma_free(struct hdaudio_softc *sc, struct hdaudio_dma *dma)
{
        if (dma->dma_valid == false)
                return;
        bus_dmamap_unload(sc->sc_dmat, dma->dma_map);
        bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);        
        bus_dmamem_unmap(sc->sc_dmat, dma->dma_addr, dma->dma_size);
        bus_dmamem_free(sc->sc_dmat, dma->dma_segs, dma->dma_nsegs);
        dma->dma_valid = false;
}

static void
hdaudio_corb_enqueue(struct hdaudio_softc *sc, int addr, int nid,
    uint32_t control, uint32_t param)
{
        uint32_t *corb = DMA_KERNADDR(&sc->sc_corb);
        uint32_t verb;
        uint16_t corbrp;
        int wp;

        /* Build command */
        verb = (addr << 28) | (nid << 20) | (control << 8) | param;

        /* Fetch and update write pointer */
        corbrp = hda_read2(sc, HDAUDIO_MMIO_CORBWP);
        wp = (corbrp & 0xff) + 1;
        if (wp >= (sc->sc_corb.dma_size / sizeof(*corb)))
                wp = 0;

        /* Enqueue command */
        bus_dmamap_sync(sc->sc_dmat, sc->sc_corb.dma_map, 0,
            sc->sc_corb.dma_size, BUS_DMASYNC_POSTWRITE);
        corb[wp] = verb;
        bus_dmamap_sync(sc->sc_dmat, sc->sc_corb.dma_map, 0,
            sc->sc_corb.dma_size, BUS_DMASYNC_PREWRITE);

        /* Commit updated write pointer */
        hda_write2(sc, HDAUDIO_MMIO_CORBWP, wp);
}

static void
hdaudio_rirb_unsol(struct hdaudio_softc *sc, struct rirb_entry *entry)
{
        struct hdaudio_codec *co;
        struct hdaudio_function_group *fg;
        uint8_t codecid = RIRB_CODEC_ID(entry);
        unsigned int i;

        if (codecid >= HDAUDIO_MAX_CODECS) {
                hda_error(sc, "unsol: codec id 0x%02x out of range\n", codecid);
                return;
        }
        co = &sc->sc_codec[codecid];
        if (sc->sc_codec[codecid].co_valid == false) {
                hda_error(sc, "unsol: codec id 0x%02x not valid\n", codecid);
                return;
        }

        for (i = 0; i < co->co_nfg; i++) {
                fg = &co->co_fg[i];
                if (fg->fg_device && fg->fg_unsol)
                        fg->fg_unsol(fg->fg_device, entry->resp);
        }
}

static uint32_t
hdaudio_rirb_dequeue(struct hdaudio_softc *sc, bool unsol)
{
        uint16_t rirbwp;
        uint64_t *rirb = DMA_KERNADDR(&sc->sc_rirb);
        struct rirb_entry entry;
        int retry;

        for (;;) {
                retry = HDAUDIO_RIRB_TIMEOUT;

                rirbwp = hda_read2(sc, HDAUDIO_MMIO_RIRBWP);
                while (--retry > 0 && (rirbwp & 0xff) == sc->sc_rirbrp) {
                        if (unsol) {
                                /* don't wait for more unsol events */
                                hda_trace(sc, "unsol: rirb empty\n");
                                return 0xffffffff;
                        }
                        hda_delay(10);
                        rirbwp = hda_read2(sc, HDAUDIO_MMIO_RIRBWP);
                }
                if (retry == 0) {
                        hda_error(sc, "RIRB timeout\n");
                        return 0xffffffff;
                }

                sc->sc_rirbrp++;
                if (sc->sc_rirbrp >= (sc->sc_rirb.dma_size / sizeof(*rirb)))
                        sc->sc_rirbrp = 0;

                bus_dmamap_sync(sc->sc_dmat, sc->sc_rirb.dma_map, 0,
                    sc->sc_rirb.dma_size, BUS_DMASYNC_POSTREAD);
                entry = *(struct rirb_entry *)&rirb[sc->sc_rirbrp];
                bus_dmamap_sync(sc->sc_dmat, sc->sc_rirb.dma_map, 0,
                    sc->sc_rirb.dma_size, BUS_DMASYNC_PREREAD);

                hda_trace(sc, "%s: response %08X %08X\n",
                    unsol ? "unsol" : "cmd  ",
                    entry.resp, entry.resp_ex);

                if (RIRB_UNSOL(&entry)) {
                        hdaudio_rirb_unsol(sc, &entry);
                        continue;
                }

                return entry.resp;
        }
}

uint32_t
hdaudio_command(struct hdaudio_codec *co, int nid, uint32_t control,
    uint32_t param)
{
        uint32_t result;
        struct hdaudio_softc *sc = co->co_host;
        mutex_enter(&sc->sc_corb_mtx);
        result = hdaudio_command_unlocked(co, nid, control, param);
        mutex_exit(&sc->sc_corb_mtx);
        return result;
}

uint32_t
hdaudio_command_unlocked(struct hdaudio_codec *co, int nid, uint32_t control,
    uint32_t param)
{
        struct hdaudio_softc *sc = co->co_host;
        uint32_t result;

        hda_trace(sc, "cmd  : request %08X %08X (%02X)\n",
            control, param, nid);
        hdaudio_corb_enqueue(sc, co->co_addr, nid, control, param);
        result = hdaudio_rirb_dequeue(sc, false);

        /* Clear response interrupt status */
        hda_write1(sc, HDAUDIO_MMIO_RIRBSTS, hda_read1(sc, HDAUDIO_MMIO_RIRBSTS));

        return result;
}

static int
hdaudio_corb_setsize(struct hdaudio_softc *sc)
{
        uint8_t corbsize;
        bus_size_t bufsize = 0;

        /*
         * The size of the CORB is programmable to 2, 16, or 256 entries
         * by using the CORBSIZE register. Choose a size based on the
         * controller capabilities, preferring a larger size when possible.
         */
        corbsize = hda_read1(sc, HDAUDIO_MMIO_CORBSIZE);
        corbsize &= ~0x3;
        if ((corbsize >> 4) & 0x4) {
                corbsize |= 0x2;
                bufsize = 1024;
        } else if ((corbsize >> 4) & 0x2) {
                corbsize |= 0x1;
                bufsize = 64;
        } else if ((corbsize >> 4) & 0x1) {
                corbsize |= 0x0;
                bufsize = 8;
        } else {
                hda_error(sc, "couldn't configure CORB size\n");
                return ENXIO;
        }

#if defined(HDAUDIO_DEBUG)
        hda_print(sc, "using %d byte CORB (cap %X)\n",
            (int)bufsize, corbsize >> 4);
#endif

        sc->sc_corb.dma_size = bufsize;
        sc->sc_corb.dma_sizereg = corbsize;

        return 0;
}

static int
hdaudio_corb_config(struct hdaudio_softc *sc)
{
        uint32_t corbubase, corblbase;
        uint16_t corbrp;
        int retry = HDAUDIO_CORB_TIMEOUT;

        /* Program command buffer base address and size */
        corblbase = (uint32_t)DMA_DMAADDR(&sc->sc_corb);
        corbubase = (uint32_t)(((uint64_t)DMA_DMAADDR(&sc->sc_corb)) >> 32);
        hda_write4(sc, HDAUDIO_MMIO_CORBLBASE, corblbase);
        hda_write4(sc, HDAUDIO_MMIO_CORBUBASE, corbubase);
        hda_write1(sc, HDAUDIO_MMIO_CORBSIZE, sc->sc_corb.dma_sizereg);

        /* Clear the read and write pointers */
        hda_write2(sc, HDAUDIO_MMIO_CORBRP, HDAUDIO_CORBRP_RP_RESET);
        hda_write2(sc, HDAUDIO_MMIO_CORBRP, 0);
        do {
                hda_delay(10);
                corbrp = hda_read2(sc, HDAUDIO_MMIO_CORBRP);
        } while (--retry > 0 && (corbrp & HDAUDIO_CORBRP_RP_RESET) != 0);
        if (retry == 0) {
                hda_error(sc, "timeout resetting CORB\n");
                return ETIME;
        }
        hda_write2(sc, HDAUDIO_MMIO_CORBWP, 0);

        return 0;
}

static int
hdaudio_corb_stop(struct hdaudio_softc *sc)
{
        uint8_t corbctl;
        int retry = HDAUDIO_CORB_TIMEOUT;

        /* Stop the CORB if necessary */
        corbctl = hda_read1(sc, HDAUDIO_MMIO_CORBCTL);
        if (corbctl & HDAUDIO_CORBCTL_RUN) {
                corbctl &= ~HDAUDIO_CORBCTL_RUN;
                hda_write1(sc, HDAUDIO_MMIO_CORBCTL, corbctl);
                do {
                        hda_delay(10);
                        corbctl = hda_read1(sc, HDAUDIO_MMIO_CORBCTL);
                } while (--retry > 0 && (corbctl & HDAUDIO_CORBCTL_RUN) != 0);
                if (retry == 0) {
                        hda_error(sc, "timeout stopping CORB\n");
                        return ETIME;
                }
        }

        return 0;
}

static int
hdaudio_corb_start(struct hdaudio_softc *sc)
{
        uint8_t corbctl;
        int retry = HDAUDIO_CORB_TIMEOUT;

        /* Start the CORB if necessary */
        corbctl = hda_read1(sc, HDAUDIO_MMIO_CORBCTL);
        if ((corbctl & HDAUDIO_CORBCTL_RUN) == 0) {
                corbctl |= HDAUDIO_CORBCTL_RUN;
                hda_write1(sc, HDAUDIO_MMIO_CORBCTL, corbctl);
                do {
                        hda_delay(10);
                        corbctl = hda_read1(sc, HDAUDIO_MMIO_CORBCTL);
                } while (--retry > 0 && (corbctl & HDAUDIO_CORBCTL_RUN) == 0);
                if (retry == 0) {
                        hda_error(sc, "timeout starting CORB\n");
                        return ETIME;
                }
        }

        return 0;
}

static int
hdaudio_rirb_stop(struct hdaudio_softc *sc)
{
        uint8_t rirbctl;
        int retry = HDAUDIO_RIRB_TIMEOUT;

        /* Stop the RIRB if necessary */
        rirbctl = hda_read1(sc, HDAUDIO_MMIO_RIRBCTL);
        if (rirbctl & (HDAUDIO_RIRBCTL_RUN|HDAUDIO_RIRBCTL_ROI_EN)) {
                rirbctl &= ~HDAUDIO_RIRBCTL_RUN;
                rirbctl &= ~HDAUDIO_RIRBCTL_ROI_EN;
                hda_write1(sc, HDAUDIO_MMIO_RIRBCTL, rirbctl);
                do {
                        hda_delay(10);
                        rirbctl = hda_read1(sc, HDAUDIO_MMIO_RIRBCTL);
                } while (--retry > 0 && (rirbctl & HDAUDIO_RIRBCTL_RUN) != 0);
                if (retry == 0) {
                        hda_error(sc, "timeout stopping RIRB\n");
                        return ETIME;
                }
        }

        return 0;
}

static int
hdaudio_rirb_start(struct hdaudio_softc *sc)
{
        uint8_t rirbctl;
        int retry = HDAUDIO_RIRB_TIMEOUT;

        /* Set the RIRB interrupt count */
        hda_write2(sc, HDAUDIO_MMIO_RINTCNT, 1);

        /* Start the RIRB */
        rirbctl = hda_read1(sc, HDAUDIO_MMIO_RIRBCTL);
        rirbctl |= HDAUDIO_RIRBCTL_RUN;
        rirbctl |= HDAUDIO_RIRBCTL_INT_EN;
        hda_write1(sc, HDAUDIO_MMIO_RIRBCTL, rirbctl);
        do {
                hda_delay(10);
                rirbctl = hda_read1(sc, HDAUDIO_MMIO_RIRBCTL);
        } while (--retry > 0 && (rirbctl & HDAUDIO_RIRBCTL_RUN) == 0);
        if (retry == 0) {
                hda_error(sc, "timeout starting RIRB\n");
                return ETIME;
        }

        return 0;
}

static int
hdaudio_rirb_setsize(struct hdaudio_softc *sc)
{
        uint8_t rirbsize;
        bus_size_t bufsize = 0;

        /*
         * The size of the RIRB is programmable to 2, 16, or 256 entries
         * by using the RIRBSIZE register. Choose a size based on the
         * controller capabilities, preferring a larger size when possible.
         */
        rirbsize = hda_read1(sc, HDAUDIO_MMIO_RIRBSIZE);
        rirbsize &= ~0x3;
        if ((rirbsize >> 4) & 0x4) {
                rirbsize |= 0x2;
                bufsize = 2048;
        } else if ((rirbsize >> 4) & 0x2) {
                rirbsize |= 0x1;
                bufsize = 128;
        } else if ((rirbsize >> 4) & 0x1) {
                rirbsize |= 0x0;
                bufsize = 16;
        } else {
                hda_error(sc, "couldn't configure RIRB size\n");
                return ENXIO;
        }

#if defined(HDAUDIO_DEBUG)
        hda_print(sc, "using %d byte RIRB (cap %X)\n",
            (int)bufsize, rirbsize >> 4);
#endif

        sc->sc_rirb.dma_size = bufsize;
        sc->sc_rirb.dma_sizereg = rirbsize;

        return 0;
}

static int
hdaudio_rirb_config(struct hdaudio_softc *sc)
{
        uint32_t rirbubase, rirblbase;

        /* Program command buffer base address and size */
        rirblbase = (uint32_t)DMA_DMAADDR(&sc->sc_rirb);
        rirbubase = (uint32_t)(((uint64_t)DMA_DMAADDR(&sc->sc_rirb)) >> 32);
        hda_write4(sc, HDAUDIO_MMIO_RIRBLBASE, rirblbase);
        hda_write4(sc, HDAUDIO_MMIO_RIRBUBASE, rirbubase);
        hda_write1(sc, HDAUDIO_MMIO_RIRBSIZE, sc->sc_rirb.dma_sizereg);

        /* Clear the write pointer */
        hda_write2(sc, HDAUDIO_MMIO_RIRBWP, HDAUDIO_RIRBWP_WP_RESET);
        sc->sc_rirbrp = 0;

        return 0;
}

static int
hdaudio_reset(struct hdaudio_softc *sc)
{
        int retry = HDAUDIO_RESET_TIMEOUT;
        uint32_t gctl;
        int err;

        if ((err = hdaudio_rirb_stop(sc)) != 0) {
                hda_error(sc, "couldn't reset because RIRB is busy\n");
                return err;
        }
        if ((err = hdaudio_corb_stop(sc)) != 0) {
                hda_error(sc, "couldn't reset because CORB is busy\n");
                return err;
        }

        /* Disable wake events */
        hda_write2(sc, HDAUDIO_MMIO_WAKEEN, 0);

        /* Disable interrupts */
        hda_write4(sc, HDAUDIO_MMIO_INTCTL, 0);

        /* Clear state change status register */
        hda_write2(sc, HDAUDIO_MMIO_STATESTS,
            hda_read2(sc, HDAUDIO_MMIO_STATESTS));
        hda_write1(sc, HDAUDIO_MMIO_RIRBSTS,
            hda_read1(sc, HDAUDIO_MMIO_RIRBSTS));

        /* Put the controller into reset state */
        gctl = hda_read4(sc, HDAUDIO_MMIO_GCTL);
        gctl &= ~HDAUDIO_GCTL_CRST;
        hda_write4(sc, HDAUDIO_MMIO_GCTL, gctl);
        do {
                hda_delay(10);
                gctl = hda_read4(sc, HDAUDIO_MMIO_GCTL);
        } while (--retry > 0 && (gctl & HDAUDIO_GCTL_CRST) != 0);
        if (retry == 0) {
                hda_error(sc, "timeout entering reset state\n");
                return ETIME;
        }

        hda_delay(1000);

        /* Now the controller is in reset state, so bring it out */
        retry = HDAUDIO_RESET_TIMEOUT;
        hda_write4(sc, HDAUDIO_MMIO_GCTL, gctl | HDAUDIO_GCTL_CRST);
        do {
                hda_delay(10);
                gctl = hda_read4(sc, HDAUDIO_MMIO_GCTL);
        } while (--retry > 0 && (gctl & HDAUDIO_GCTL_CRST) == 0);
        if (retry == 0) {
                hda_error(sc, "timeout leaving reset state\n");
                return ETIME;
        }

        hda_delay(2000);

        /* Accept unsolicited responses */
        hda_write4(sc, HDAUDIO_MMIO_GCTL, gctl | HDAUDIO_GCTL_UNSOL_EN);

        return 0;
}

static void
hdaudio_intr_enable(struct hdaudio_softc *sc)
{
        hda_write4(sc, HDAUDIO_MMIO_INTSTS,
            hda_read4(sc, HDAUDIO_MMIO_INTSTS));
        hda_write4(sc, HDAUDIO_MMIO_INTCTL,
            HDAUDIO_INTCTL_GIE | HDAUDIO_INTCTL_CIE);
}

static void
hdaudio_intr_disable(struct hdaudio_softc *sc)
{
        hda_write4(sc, HDAUDIO_MMIO_INTCTL, 0);
}

static int
hdaudio_config_print(void *opaque, const char *pnp)
{
        prop_dictionary_t dict = opaque;
        uint8_t fgtype, nid;
        uint16_t vendor, product;
        const char *type = "unknown";

        prop_dictionary_get_uint8(dict, "function-group-type", &fgtype);
        prop_dictionary_get_uint8(dict, "node-id", &nid);
        prop_dictionary_get_uint16(dict, "vendor-id", &vendor);
        prop_dictionary_get_uint16(dict, "product-id", &product);
        if (pnp) {
                if (fgtype == HDAUDIO_GROUP_TYPE_AFG)
                        type = "hdafg";
                else if (fgtype == HDAUDIO_GROUP_TYPE_VSM_FG)
                        type = "hdvsmfg";

                aprint_normal("%s at %s", type, pnp);
        }
        aprint_debug(" vendor 0x%04X product 0x%04X nid 0x%02X",
            vendor, product, nid);

        return UNCONF;
}

static void
hdaudio_attach_fg(struct hdaudio_function_group *fg, prop_array_t config)
{
        struct hdaudio_codec *co = fg->fg_codec;
        struct hdaudio_softc *sc = co->co_host;
        prop_dictionary_t args = prop_dictionary_create();
        uint64_t fgptr = (vaddr_t)fg;
        int locs[1];

        prop_dictionary_set_uint8(args, "function-group-type", fg->fg_type);
        prop_dictionary_set_uint64(args, "function-group", fgptr);
        prop_dictionary_set_uint8(args, "node-id", fg->fg_nid);
        prop_dictionary_set_uint16(args, "vendor-id", fg->fg_vendor);
        prop_dictionary_set_uint16(args, "product-id", fg->fg_product);
        if (config)
                prop_dictionary_set(args, "pin-config", config);

        locs[0] = fg->fg_nid;

        fg->fg_device = config_found(sc->sc_dev, args, hdaudio_config_print,
            CFARGS(.submatch = config_stdsubmatch,
                   .locators = locs));

        prop_object_release(args);
}

static void
hdaudio_codec_attach(struct hdaudio_codec *co)
{
        struct hdaudio_softc *sc = co->co_host;
        struct hdaudio_function_group *fg;
        uint32_t vid, snc, fgrp;
        int starting_node, num_nodes, nid;

        if (co->co_valid == false)
                return;

        vid = hdaudio_command(co, 0, CORB_GET_PARAMETER, COP_VENDOR_ID);
        snc = hdaudio_command(co, 0, CORB_GET_PARAMETER,
            COP_SUBORDINATE_NODE_COUNT);

        /* make sure the vendor and product IDs are valid */
        if (vid == 0xffffffff || vid == 0x00000000)
                return;

#ifdef HDAUDIO_DEBUG
        uint32_t rid = hdaudio_command(co, 0, CORB_GET_PARAMETER,
            COP_REVISION_ID);
        hda_print(sc, "Codec%02X: %04X:%04X HDA %d.%d rev %d stepping %d\n",
            co->co_addr, vid >> 16, vid & 0xffff,
            (rid >> 20) & 0xf, (rid >> 16) & 0xf,
            (rid >> 8) & 0xff, rid & 0xff);
#endif
        starting_node = (snc >> 16) & 0xff;
        num_nodes = snc & 0xff;

        /*
         * If the total number of nodes is 0, there's nothing we can do.
         * This shouldn't happen, so complain about it.
         */
        if (num_nodes == 0) {
                hda_error(sc, "Codec%02X: No subordinate nodes found (%08x)\n",
                    co->co_addr, snc);
                return;
        }

        co->co_nfg = num_nodes;
        co->co_fg = kmem_zalloc(co->co_nfg * sizeof(*co->co_fg), KM_SLEEP);

        for (nid = starting_node; nid < starting_node + num_nodes; nid++) {
                fg = &co->co_fg[nid - starting_node];
                fg->fg_codec = co;
                fg->fg_nid = nid;
                fg->fg_vendor = vid >> 16;
                fg->fg_product = vid & 0xffff;

                fgrp = hdaudio_command(co, nid, CORB_GET_PARAMETER,
                    COP_FUNCTION_GROUP_TYPE);
                switch (fgrp & 0xff) {
                case 0x01:        /* Audio Function Group */
                        fg->fg_type = HDAUDIO_GROUP_TYPE_AFG;
                        break;
                case 0x02:        /* Vendor Specific Modem Function Group */
                        fg->fg_type = HDAUDIO_GROUP_TYPE_VSM_FG;
                        break;
                default:
                        /* Function group type not supported */
                        fg->fg_type = HDAUDIO_GROUP_TYPE_UNKNOWN;
                        break;
                }
                hdaudio_attach_fg(fg, NULL);
        }
}

int
hdaudio_stream_tag(struct hdaudio_stream *st)
{
        int ret = 0;

        switch (st->st_type) {
        case HDAUDIO_STREAM_ISS:
                ret = 1;
                break;
        case HDAUDIO_STREAM_OSS:
                ret = 2;
                break;
        case HDAUDIO_STREAM_BSS:
                ret = 3;
                break;
        }

        return ret;
}

int
hdaudio_attach(device_t dev, struct hdaudio_softc *sc)
{
        int err, i;

        KASSERT(sc->sc_memvalid == true);

        sc->sc_dev = dev;
        mutex_init(&sc->sc_corb_mtx, MUTEX_DEFAULT, IPL_AUDIO);
        mutex_init(&sc->sc_stream_mtx, MUTEX_DEFAULT, IPL_AUDIO);

        /*
         * Put the controller into a known state by entering and leaving
         * CRST as necessary.
         */
        if ((err = hdaudio_reset(sc)) != 0)
                goto fail;

        /*
         * From the spec:
         *
         * Must wait 250us after reading CRST as a 1 before assuming that
         * codecs have all made status change requests and have been
         * registered by the controller.
         *
         * In reality, we need to wait longer than this.
         */
        hda_delay(HDAUDIO_CODEC_DELAY);

        /*
         * Read device capabilities
         */
        hdaudio_init(sc);

        /*
         * Detect codecs
         */
        if (hdaudio_codec_probe(sc) == 0) {
                hda_error(sc, "no codecs found\n");
                err = ENODEV;
                goto fail;
        }

        /*
         * Ensure that the device is in a known state
         */
        hda_write2(sc, HDAUDIO_MMIO_STATESTS, HDAUDIO_STATESTS_SDIWAKE);
        hda_write1(sc, HDAUDIO_MMIO_RIRBSTS,
            HDAUDIO_RIRBSTS_RIRBOIS | HDAUDIO_RIRBSTS_RINTFL);
        hda_write4(sc, HDAUDIO_MMIO_INTSTS,
            hda_read4(sc, HDAUDIO_MMIO_INTSTS));
        hda_write4(sc, HDAUDIO_MMIO_DPLBASE, 0);
        hda_write4(sc, HDAUDIO_MMIO_DPUBASE, 0);

        /*
         * Initialize the CORB. First negotiate a command buffer size,
         * then allocate and configure it.
         */
        if ((err = hdaudio_corb_setsize(sc)) != 0)
                goto fail;
        if ((err = hdaudio_dma_alloc(sc, &sc->sc_corb, BUS_DMA_WRITE)) != 0)
                goto fail;
        if ((err = hdaudio_corb_config(sc)) != 0)
                goto fail;

        /*
         * Initialize the RIRB.
         */
        if ((err = hdaudio_rirb_setsize(sc)) != 0)
                goto fail;
        if ((err = hdaudio_dma_alloc(sc, &sc->sc_rirb, BUS_DMA_READ)) != 0)
                goto fail;
        if ((err = hdaudio_rirb_config(sc)) != 0)
                goto fail;

        /*
         * Start the CORB and RIRB
         */
        if ((err = hdaudio_corb_start(sc)) != 0)
                goto fail;
        if ((err = hdaudio_rirb_start(sc)) != 0)
                goto fail;

        /*
         * Identify and attach discovered codecs
         */
        for (i = 0; i < HDAUDIO_MAX_CODECS; i++)
                hdaudio_codec_attach(&sc->sc_codec[i]);

        /*
         * Enable interrupts
         */
        hdaudio_intr_enable(sc);

fail:
        if (err)
                hda_error(sc, "device driver failed to attach\n");
        return err;
}

int
hdaudio_detach(struct hdaudio_softc *sc, int flags)
{
        int error;

        /* Disable interrupts */
        hdaudio_intr_disable(sc);

        error = config_detach_children(sc->sc_dev, flags);
        if (error != 0) {
                hdaudio_intr_enable(sc);
                return error;
        }

        mutex_destroy(&sc->sc_corb_mtx);
        mutex_destroy(&sc->sc_stream_mtx);

        hdaudio_dma_free(sc, &sc->sc_corb);
        hdaudio_dma_free(sc, &sc->sc_rirb);

        return 0;
}

bool
hdaudio_resume(struct hdaudio_softc *sc)
{
        if (hdaudio_reset(sc) != 0)
                return false;

        hda_delay(HDAUDIO_CODEC_DELAY);

        /*
         * Ensure that the device is in a known state
         */
        hda_write2(sc, HDAUDIO_MMIO_STATESTS, HDAUDIO_STATESTS_SDIWAKE);
        hda_write1(sc, HDAUDIO_MMIO_RIRBSTS,
            HDAUDIO_RIRBSTS_RIRBOIS | HDAUDIO_RIRBSTS_RINTFL);
        hda_write4(sc, HDAUDIO_MMIO_INTSTS,
            hda_read4(sc, HDAUDIO_MMIO_INTSTS));
        hda_write4(sc, HDAUDIO_MMIO_DPLBASE, 0);
        hda_write4(sc, HDAUDIO_MMIO_DPUBASE, 0);

        if (hdaudio_corb_config(sc) != 0)
                return false;
        if (hdaudio_rirb_config(sc) != 0)
                return false;
        if (hdaudio_corb_start(sc) != 0)
                return false;
        if (hdaudio_rirb_start(sc) != 0)
                return false;

        hdaudio_intr_enable(sc);

        return true;
}

int
hdaudio_rescan(struct hdaudio_softc *sc, const char *ifattr, const int *locs)
{
        struct hdaudio_codec *co;
        struct hdaudio_function_group *fg;
        unsigned int codec;

        for (codec = 0; codec < HDAUDIO_MAX_CODECS; codec++) {
                co = &sc->sc_codec[codec];
                fg = co->co_fg;
                if (!co->co_valid || fg == NULL)
                        continue;
                if (fg->fg_device)
                        continue;
                hdaudio_attach_fg(fg, NULL);
        }

        return 0;
}

void
hdaudio_childdet(struct hdaudio_softc *sc, device_t child)
{
        struct hdaudio_codec *co;
        struct hdaudio_function_group *fg;
        unsigned int codec;

        for (codec = 0; codec < HDAUDIO_MAX_CODECS; codec++) {
                co = &sc->sc_codec[codec];
                fg = co->co_fg;
                if (!co->co_valid || fg == NULL)
                        continue;
                if (fg->fg_device == child)
                        fg->fg_device = NULL;
        }
}

int
hdaudio_intr(struct hdaudio_softc *sc)
{
        struct hdaudio_stream *st;
        uint32_t intsts, stream_mask;
        int streamid = 0;
        uint8_t rirbsts;

        intsts = hda_read4(sc, HDAUDIO_MMIO_INTSTS);
        if (!(intsts & HDAUDIO_INTSTS_GIS))
                return 0;

        if (intsts & HDAUDIO_INTSTS_CIS) {
                rirbsts = hda_read1(sc, HDAUDIO_MMIO_RIRBSTS);
                if (rirbsts & HDAUDIO_RIRBSTS_RINTFL) {
                        mutex_enter(&sc->sc_corb_mtx);
                        hdaudio_rirb_dequeue(sc, true);
                        mutex_exit(&sc->sc_corb_mtx);
                }
                if (rirbsts & (HDAUDIO_RIRBSTS_RIRBOIS|HDAUDIO_RIRBSTS_RINTFL))
                        hda_write1(sc, HDAUDIO_MMIO_RIRBSTS, rirbsts);
                hda_write4(sc, HDAUDIO_MMIO_INTSTS, HDAUDIO_INTSTS_CIS);
        }
        if (intsts & HDAUDIO_INTSTS_SIS_MASK) {
                mutex_enter(&sc->sc_stream_mtx);
                stream_mask = intsts & sc->sc_stream_mask;
                while (streamid < HDAUDIO_MAX_STREAMS && stream_mask != 0) {
                        st = &sc->sc_stream[streamid++];
                        if ((stream_mask & 1) != 0 && st->st_intr) {
                                st->st_intr(st);
                        }
                        stream_mask >>= 1;
                }
                mutex_exit(&sc->sc_stream_mtx);
                hda_write4(sc, HDAUDIO_MMIO_INTSTS, HDAUDIO_INTSTS_SIS_MASK);
        }

        return 1;
}

struct hdaudio_stream *
hdaudio_stream_establish(struct hdaudio_softc *sc,
    enum hdaudio_stream_type type, int (*intr)(struct hdaudio_stream *),
    void *cookie)
{
        struct hdaudio_stream *st;
        struct hdaudio_dma dma;
        int i, err;

        dma.dma_size = sizeof(struct hdaudio_bdl_entry) * HDAUDIO_BDL_MAX;
        dma.dma_sizereg = 0;
        err = hdaudio_dma_alloc(sc, &dma, BUS_DMA_COHERENT | BUS_DMA_NOCACHE);
        if (err)
                return NULL;

        mutex_enter(&sc->sc_stream_mtx);
        for (i = 0; i < HDAUDIO_MAX_STREAMS; i++) {
                st = &sc->sc_stream[i];
                if (st->st_enable == false)
                        break;
                if (st->st_type != type)
                        continue;
                if (sc->sc_stream_mask & (1 << i))
                        continue;

                /* Allocate stream */
                st->st_bdl = dma;
                st->st_intr = intr;
                st->st_cookie = cookie;
                sc->sc_stream_mask |= (1 << i);
                mutex_exit(&sc->sc_stream_mtx);
                return st;
        }
        mutex_exit(&sc->sc_stream_mtx);

        /* No streams of requested type available */
        hdaudio_dma_free(sc, &dma);
        return NULL;
}

void
hdaudio_stream_disestablish(struct hdaudio_stream *st)
{
        struct hdaudio_softc *sc = st->st_host;
        struct hdaudio_dma dma;

        KASSERT(sc->sc_stream_mask & (1 << st->st_shift));

        mutex_enter(&sc->sc_stream_mtx);
        sc->sc_stream_mask &= ~(1 << st->st_shift);
        st->st_intr = NULL;
        st->st_cookie = NULL;
        dma = st->st_bdl;
        st->st_bdl.dma_valid = false;
        mutex_exit(&sc->sc_stream_mtx);

        /* Can't bus_dmamem_unmap while holding a mutex.  */
        hdaudio_dma_free(sc, &dma);
}

/*
 * Convert most of audio_params_t to stream fmt descriptor; noticeably missing
 * is the # channels bits, as this is encoded differently in codec and
 * stream descriptors.
 *
 * TODO: validate that the stream and selected codecs can handle the fmt
 */
uint16_t
hdaudio_stream_param(struct hdaudio_stream *st, const audio_params_t *param)
{
        uint16_t fmt = 0;

        switch (param->encoding) {
        case AUDIO_ENCODING_AC3:
                fmt |= HDAUDIO_FMT_TYPE_NONPCM;
                break;
        default:
                fmt |= HDAUDIO_FMT_TYPE_PCM;
                break;
        }

        switch (param->sample_rate) {
        case 8000:
                fmt |= HDAUDIO_FMT_BASE_48 | HDAUDIO_FMT_MULT(1) |
                    HDAUDIO_FMT_DIV(6);
                break;
        case 11025:
                fmt |= HDAUDIO_FMT_BASE_44 | HDAUDIO_FMT_MULT(1) |
                    HDAUDIO_FMT_DIV(4);
                break;
        case 16000:
                fmt |= HDAUDIO_FMT_BASE_48 | HDAUDIO_FMT_MULT(1) |
                    HDAUDIO_FMT_DIV(3);
                break;
        case 22050:
                fmt |= HDAUDIO_FMT_BASE_44 | HDAUDIO_FMT_MULT(1) |
                    HDAUDIO_FMT_DIV(2);
                break;
        case 32000:
                fmt |= HDAUDIO_FMT_BASE_48 | HDAUDIO_FMT_MULT(2) |
                    HDAUDIO_FMT_DIV(3);
                break;
        case 44100:
                fmt |= HDAUDIO_FMT_BASE_44 | HDAUDIO_FMT_MULT(1);
                break;
        case 48000:
                fmt |= HDAUDIO_FMT_BASE_48 | HDAUDIO_FMT_MULT(1);
                break;
        case 88200:
                fmt |= HDAUDIO_FMT_BASE_44 | HDAUDIO_FMT_MULT(2);
                break;
        case 96000:
                fmt |= HDAUDIO_FMT_BASE_48 | HDAUDIO_FMT_MULT(2);
                break;
        case 176400:
                fmt |= HDAUDIO_FMT_BASE_44 | HDAUDIO_FMT_MULT(4);
                break;
        case 192000:
                fmt |= HDAUDIO_FMT_BASE_48 | HDAUDIO_FMT_MULT(4);
                break;
        default:
                return 0;
        }

        if (param->precision == 16 && param->validbits == 8)
                fmt |= HDAUDIO_FMT_BITS_8_16;
        else if (param->precision == 16 && param->validbits == 16)
                fmt |= HDAUDIO_FMT_BITS_16_16;
        else if (param->precision == 32 && param->validbits == 20)
                fmt |= HDAUDIO_FMT_BITS_20_32;
        else if (param->precision == 32 && param->validbits == 24)
                fmt |= HDAUDIO_FMT_BITS_24_32;
        else if (param->precision == 32 && param->validbits == 32)
                fmt |= HDAUDIO_FMT_BITS_32_32;
        else
                return 0;

        return fmt;
}

void
hdaudio_stream_reset(struct hdaudio_stream *st)
{
        struct hdaudio_softc *sc = st->st_host;
        int snum = st->st_shift;
        int retry;
        uint8_t ctl0;

        ctl0 = hda_read1(sc, HDAUDIO_SD_CTL0(snum));
        ctl0 |= HDAUDIO_CTL_SRST;
        hda_write1(sc, HDAUDIO_SD_CTL0(snum), ctl0);

        retry = HDAUDIO_RESET_TIMEOUT;
        do {
                ctl0 = hda_read1(sc, HDAUDIO_SD_CTL0(snum));
                if (ctl0 & HDAUDIO_CTL_SRST)
                        break;
                hda_delay(10);
        } while (--retry > 0);

        ctl0 &= ~HDAUDIO_CTL_SRST;
        hda_write1(sc, HDAUDIO_SD_CTL0(snum), ctl0);

        retry = HDAUDIO_RESET_TIMEOUT;
        do {
                ctl0 = hda_read1(sc, HDAUDIO_SD_CTL0(snum));
                if (!(ctl0 & HDAUDIO_CTL_SRST))
                        break;
                hda_delay(10);
        } while (--retry > 0);
        if (retry == 0) {
                hda_error(sc, "timeout leaving stream reset state\n");
                return;
        }
}

void
hdaudio_stream_start(struct hdaudio_stream *st, int blksize,
    bus_size_t dmasize, const audio_params_t *params)
{
        struct hdaudio_softc *sc = st->st_host;
        struct hdaudio_bdl_entry *bdl;
        uint64_t dmaaddr;
        uint32_t intctl;
        uint16_t fmt;
        uint8_t ctl0, ctl2;
        int cnt, snum = st->st_shift;

        KASSERT(sc->sc_stream_mask & (1 << st->st_shift));
        KASSERT(st->st_data.dma_valid == true);
        KASSERT(st->st_bdl.dma_valid == true);

        hdaudio_stream_stop(st);
        hdaudio_stream_reset(st);

        /*
         * Configure buffer descriptor list
         */
        dmaaddr = DMA_DMAADDR(&st->st_data);
        bdl = DMA_KERNADDR(&st->st_bdl);
        for (cnt = 0; cnt < HDAUDIO_BDL_MAX; cnt++) {
                bdl[cnt].address_lo = (uint32_t)dmaaddr;
                bdl[cnt].address_hi = dmaaddr >> 32;
                bdl[cnt].length = blksize;
                bdl[cnt].flags = HDAUDIO_BDL_ENTRY_IOC;
                dmaaddr += blksize;
                if (dmaaddr >= DMA_DMAADDR(&st->st_data) + dmasize) {
                        cnt++;
                        break;
                }
        }

        /*
         * Program buffer descriptor list
         */
        dmaaddr = DMA_DMAADDR(&st->st_bdl);
        hda_write4(sc, HDAUDIO_SD_BDPL(snum), (uint32_t)dmaaddr);
        hda_write4(sc, HDAUDIO_SD_BDPU(snum), (uint32_t)(dmaaddr >> 32));
        hda_write2(sc, HDAUDIO_SD_LVI(snum), (cnt - 1) & 0xff);

        /*
         * Program cyclic buffer length
         */
        hda_write4(sc, HDAUDIO_SD_CBL(snum), dmasize);

        /*
         * Program stream number (tag). Although controller hardware is
         * capable of transmitting any stream number (0-15), by convention
         * stream 0 is reserved as unused by software, so that converters
         * whose stream numbers have been reset to 0 do not unintentionally
         * decode data not intended for them.
         */
        ctl2 = hda_read1(sc, HDAUDIO_SD_CTL2(snum));
        ctl2 &= ~0xf0;
        ctl2 |= hdaudio_stream_tag(st) << 4;
        hda_write1(sc, HDAUDIO_SD_CTL2(snum), ctl2);

        /*
         * Program stream format
         */
        fmt = hdaudio_stream_param(st, params) |
            HDAUDIO_FMT_CHAN(params->channels);
        hda_write2(sc, HDAUDIO_SD_FMT(snum), fmt);

        /*
         * Switch on interrupts for this stream
         */
        intctl = hda_read4(sc, HDAUDIO_MMIO_INTCTL);
        intctl |= (1 << st->st_shift);
        hda_write4(sc, HDAUDIO_MMIO_INTCTL, intctl);

        /*
         * Start running the stream
         */
        ctl0 = hda_read1(sc, HDAUDIO_SD_CTL0(snum));
        ctl0 |= HDAUDIO_CTL_DEIE | HDAUDIO_CTL_FEIE | HDAUDIO_CTL_IOCE |
            HDAUDIO_CTL_RUN;
        hda_write1(sc, HDAUDIO_SD_CTL0(snum), ctl0);
}

void
hdaudio_stream_stop(struct hdaudio_stream *st)
{
        struct hdaudio_softc *sc = st->st_host;
        uint32_t intctl;
        uint8_t ctl0;
        int snum = st->st_shift;

        /*
         * Stop running the stream
         */
        ctl0 = hda_read1(sc, HDAUDIO_SD_CTL0(snum));
        ctl0 &= ~(HDAUDIO_CTL_DEIE | HDAUDIO_CTL_FEIE | HDAUDIO_CTL_IOCE |
            HDAUDIO_CTL_RUN);
        hda_write1(sc, HDAUDIO_SD_CTL0(snum), ctl0);

        /*
         * Switch off interrupts for this stream
         */
        intctl = hda_read4(sc, HDAUDIO_MMIO_INTCTL);
        intctl &= ~(1 << st->st_shift);
        hda_write4(sc, HDAUDIO_MMIO_INTCTL, intctl);
}

/*
 * /dev/hdaudioN interface
 */

static const char *
hdaudioioctl_fgrp_to_cstr(enum function_group_type type)
{
        switch (type) {
        case HDAUDIO_GROUP_TYPE_AFG:
                return "afg";
        case HDAUDIO_GROUP_TYPE_VSM_FG:
                return "vsmfg";
        default:
                return "unknown";
        }
}

static struct hdaudio_function_group *
hdaudioioctl_fgrp_lookup(struct hdaudio_softc *sc, int codecid, int nid)
{
        struct hdaudio_codec *co;
        struct hdaudio_function_group *fg = NULL;
        int i;

        if (codecid < 0 || codecid >= HDAUDIO_MAX_CODECS)
                return NULL;
        co = &sc->sc_codec[codecid];
        if (co->co_valid == false)
                return NULL;

        for (i = 0; i < co->co_nfg; i++)
                if (co->co_fg[i].fg_nid == nid) {
                        fg = &co->co_fg[i];
                        break;
                }

        return fg;
}

static int
hdaudioioctl_fgrp_info(struct hdaudio_softc *sc, prop_dictionary_t request,
    prop_dictionary_t response)
{
        struct hdaudio_codec *co;
        struct hdaudio_function_group *fg;
        prop_array_t array;
        prop_dictionary_t dict;
        int codecid, fgid;

        array = prop_array_create();
        if (array == NULL)
                return ENOMEM;

        for (codecid = 0; codecid < HDAUDIO_MAX_CODECS; codecid++) {
                co = &sc->sc_codec[codecid];
                if (co->co_valid == false)
                        continue;
                for (fgid = 0; fgid < co->co_nfg; fgid++) {
                        fg = &co->co_fg[fgid];
                        dict = prop_dictionary_create();
                        if (dict == NULL)
                                return ENOMEM;
                        prop_dictionary_set_string_nocopy(dict,
                            "type", hdaudioioctl_fgrp_to_cstr(fg->fg_type));
                        prop_dictionary_set_int16(dict, "nid", fg->fg_nid);
                        prop_dictionary_set_int16(dict, "codecid", codecid);
                        prop_dictionary_set_uint16(dict, "vendor-id",
                            fg->fg_vendor);
                        prop_dictionary_set_uint16(dict, "product-id",
                            fg->fg_product);
                        prop_dictionary_set_uint32(dict, "subsystem-id",
                            sc->sc_subsystem);
                        if (fg->fg_device)
                                prop_dictionary_set_string(dict, "device",
                                    device_xname(fg->fg_device));
                        else
                                prop_dictionary_set_string_nocopy(dict,
                                    "device", "<none>");
                        prop_array_add(array, dict);
                }
        }

        prop_dictionary_set(response, "function-group-info", array);
        return 0;
}

static int
hdaudioioctl_fgrp_getconfig(struct hdaudio_softc *sc,
    prop_dictionary_t request, prop_dictionary_t response)
{
        struct hdaudio_function_group *fg;
        prop_dictionary_t dict;
        prop_array_t array;
        uint32_t nodecnt, wcap, config;
        int16_t codecid, nid, i;
        int startnode, endnode;

        if (!prop_dictionary_get_int16(request, "codecid", &codecid) ||
            !prop_dictionary_get_int16(request, "nid", &nid))
                return EINVAL;

        fg = hdaudioioctl_fgrp_lookup(sc, codecid, nid);
        if (fg == NULL)
                return ENODEV;

        array = prop_array_create();
        if (array == NULL)
                return ENOMEM;

        nodecnt = hdaudio_command(fg->fg_codec, fg->fg_nid,
            CORB_GET_PARAMETER, COP_SUBORDINATE_NODE_COUNT);
        startnode = COP_NODECNT_STARTNODE(nodecnt);
        endnode = startnode + COP_NODECNT_NUMNODES(nodecnt);

        for (i = startnode; i < endnode; i++) {
                wcap = hdaudio_command(fg->fg_codec, i,
                    CORB_GET_PARAMETER, COP_AUDIO_WIDGET_CAPABILITIES);
                if (COP_AWCAP_TYPE(wcap) != COP_AWCAP_TYPE_PIN_COMPLEX)
                        continue;
                config = hdaudio_command(fg->fg_codec, i,
                    CORB_GET_CONFIGURATION_DEFAULT, 0);
                dict = prop_dictionary_create();
                if (dict == NULL)
                        return ENOMEM;
                prop_dictionary_set_int16(dict, "nid", i);
                prop_dictionary_set_uint32(dict, "config", config);
                prop_array_add(array, dict);
        }

        prop_dictionary_set(response, "pin-config", array);

        return 0;
}

static int
hdaudioioctl_fgrp_setconfig(struct hdaudio_softc *sc,
    prop_dictionary_t request, prop_dictionary_t response)
{
        struct hdaudio_function_group *fg;
        prop_array_t config;
        int16_t codecid, nid;
        int err;

        if (!prop_dictionary_get_int16(request, "codecid", &codecid) ||
            !prop_dictionary_get_int16(request, "nid", &nid))
                return EINVAL;

        fg = hdaudioioctl_fgrp_lookup(sc, codecid, nid);
        if (fg == NULL)
                return ENODEV;

        if (fg->fg_device) {
                err = config_detach(fg->fg_device, 0);
                if (err)
                        return err;
                fg->fg_device = NULL;
        }

        /* "pin-config" may be NULL, this means "use BIOS configuration" */
        config = prop_dictionary_get(request, "pin-config");
        if (config && prop_object_type(config) != PROP_TYPE_ARRAY) {
                prop_object_release(config);
                return EINVAL;
        }
        hdaudio_attach_fg(fg, config);
        if (config)
                prop_object_release(config);

        return 0;
}

static int
hdaudio_dispatch_fgrp_ioctl(struct hdaudio_softc *sc, u_long cmd,
    prop_dictionary_t request, prop_dictionary_t response)
{
        struct hdaudio_function_group *fg;
        int (*infocb)(void *, prop_dictionary_t, prop_dictionary_t);
        prop_dictionary_t fgrp_dict;
        uint64_t info_fn;
        int16_t codecid, nid;
        void *fgrp_sc; 
        bool rv;
        int err;

        if (!prop_dictionary_get_int16(request, "codecid", &codecid) ||
            !prop_dictionary_get_int16(request, "nid", &nid))
                return EINVAL;

        fg = hdaudioioctl_fgrp_lookup(sc, codecid, nid);
        if (fg == NULL)
                return ENODEV;
        if (fg->fg_device == NULL)
                return ENXIO;
        fgrp_sc = device_private(fg->fg_device);
        fgrp_dict = device_properties(fg->fg_device);

        switch (fg->fg_type) {
        case HDAUDIO_GROUP_TYPE_AFG:
                switch (cmd) {
                case HDAUDIO_FGRP_CODEC_INFO:
                        rv = prop_dictionary_get_uint64(fgrp_dict,
                            "codecinfo-callback", &info_fn);
                        if (!rv)
                                return ENXIO;
                        infocb = (void *)(uintptr_t)info_fn;
                        err = infocb(fgrp_sc, request, response);
                        break;
                case HDAUDIO_FGRP_WIDGET_INFO:
                        rv = prop_dictionary_get_uint64(fgrp_dict,
                            "widgetinfo-callback", &info_fn);
                        if (!rv)
                                return ENXIO;
                        infocb = (void *)(uintptr_t)info_fn;
                        err = infocb(fgrp_sc, request, response);
                        break;
                default:
                        err = EINVAL;
                        break;
                }
                break;

        default:
                err = EINVAL;
                break;
        }
        return err;
}

int
hdaudioopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        device_t self;

        self = device_lookup(&hdaudio_cd, HDAUDIOUNIT(dev));
        if (self == NULL)
                return ENXIO;

        return 0;
}

int
hdaudioclose(dev_t dev, int flag, int mode, struct lwp *l)
{
        return 0;
}

int
hdaudioioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
{
        struct hdaudio_softc *sc;
        struct plistref *pref = addr;
        prop_dictionary_t request, response;
        int err;

        sc = device_lookup_private(&hdaudio_cd, HDAUDIOUNIT(dev));
        if (sc == NULL)
                return ENXIO;

        response = prop_dictionary_create();
        if (response == NULL)
                return ENOMEM;

        err = prop_dictionary_copyin_ioctl(pref, cmd, &request);
        if (err) {
                prop_object_release(response);
                return err;
        }

        switch (cmd) {
        case HDAUDIO_FGRP_INFO:
                err = hdaudioioctl_fgrp_info(sc, request, response);
                break;
        case HDAUDIO_FGRP_GETCONFIG:
                err = hdaudioioctl_fgrp_getconfig(sc, request, response);
                break;
        case HDAUDIO_FGRP_SETCONFIG:
                err = hdaudioioctl_fgrp_setconfig(sc, request, response);
                break;
        case HDAUDIO_FGRP_CODEC_INFO:
        case HDAUDIO_FGRP_WIDGET_INFO:
                err = hdaudio_dispatch_fgrp_ioctl(sc, cmd, request, response);
                break;
        default:
                err = EINVAL;
                break;
        }

        if (!err)
                err = prop_dictionary_copyout_ioctl(pref, cmd, response);

        if (response)
                prop_object_release(response);
        prop_object_release(request);
        return err;
}

MODULE(MODULE_CLASS_DRIVER, hdaudio, "audio");
#ifdef _MODULE
static const struct cfiattrdata hdaudiobuscf_iattrdata = {
        "hdaudiobus", 1, {
                { "nid", "-1", -1 },
        }
};
static const struct cfiattrdata * const hdaudio_attrs[] = {
        &hdaudiobuscf_iattrdata, NULL
};
CFDRIVER_DECL(hdaudio, DV_AUDIODEV, hdaudio_attrs);
#endif

static int
hdaudio_modcmd(modcmd_t cmd, void *opaque)
{
        int error = 0;
#ifdef _MODULE
        int bmaj = -1, cmaj = -1;
#endif

        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                error = devsw_attach("hdaudio", NULL, &bmaj,
                    &hdaudio_cdevsw, &cmaj);
                if (error)
                        break;
                error = config_cfdriver_attach(&hdaudio_cd);
                if (error)
                        devsw_detach(NULL, &hdaudio_cdevsw);
#endif
                break;
        case MODULE_CMD_FINI:
#ifdef _MODULE
                error = config_cfdriver_detach(&hdaudio_cd);
                if (error)
                        break;
                devsw_detach(NULL, &hdaudio_cdevsw);
#endif
                break;
        default:
                error = ENOTTY;
                break;
        }
        return error;
}

DEV_VERBOSE_DEFINE(hdaudio);





































































































































    6 
    6 


    6 


    6 





    6 
    6 
    6 












































































































































    9 
    6 







    6 













    6 




















    6 




    6 
    6 

    6 



    6 




    6 









    6 












    6 





    5 

    6 






    6 




    6 
    6 







    6 
























































































































































































































































   10 










   10 




    6 







    6 





    6 










    6 









    6 


    6 
    6 


































    6 

    6 







    6 


    6 







    6 
    6 




    6 











    6 
    6 

    6 


    6 

    6 


    6 
    6 












    6 
    6 










    6 









    6 








    6 





















    6 

    6 






    6 
    6 













    6 



    6 









    6 












    6 






    6 






    6 
    6 



    6 
    6 



    6 











    6 


    6 







    6 







    6 





    6 







    6 
    6 















    6 


    6 
    6 
    6 

    6 
























    6 

    5 

    6 

















































































    6 








    6 







    6 
    6 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
/*        $NetBSD: exec_elf.c,v 1.103 2022/06/08 10:12:42 rin Exp $        */

/*-
 * Copyright (c) 1994, 2000, 2005, 2015, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas and Maxime Villard.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1996 Christopher G. Demetriou
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(1, "$NetBSD: exec_elf.c,v 1.103 2022/06/08 10:12:42 rin Exp $");

#ifdef _KERNEL_OPT
#include "opt_pax.h"
#endif /* _KERNEL_OPT */

#include <sys/param.h>
#include <sys/proc.h>
#include <sys/kmem.h>
#include <sys/namei.h>
#include <sys/vnode.h>
#include <sys/exec.h>
#include <sys/exec_elf.h>
#include <sys/syscall.h>
#include <sys/signalvar.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/kauth.h>
#include <sys/bitops.h>

#include <sys/cpu.h>
#include <machine/reg.h>

#include <compat/common/compat_util.h>

#include <sys/pax.h>
#include <uvm/uvm_param.h>

extern struct emul emul_netbsd;

#define elf_check_header        ELFNAME(check_header)
#define elf_copyargs                ELFNAME(copyargs)
#define elf_populate_auxv        ELFNAME(populate_auxv)
#define elf_load_interp                ELFNAME(load_interp)
#define elf_load_psection        ELFNAME(load_psection)
#define exec_elf_makecmds        ELFNAME2(exec,makecmds)
#define netbsd_elf_signature        ELFNAME2(netbsd,signature)
#define netbsd_elf_note               ELFNAME2(netbsd,note)
#define netbsd_elf_probe        ELFNAME2(netbsd,probe)
#define        coredump                ELFNAMEEND(coredump)
#define        elf_free_emul_arg        ELFNAME(free_emul_arg)

static int
elf_load_interp(struct lwp *, struct exec_package *, char *,
    struct exec_vmcmd_set *, u_long *, Elf_Addr *);
static int
elf_load_psection(struct exec_vmcmd_set *, struct vnode *, const Elf_Phdr *,
    Elf_Addr *, u_long *, int);

int        netbsd_elf_signature(struct lwp *, struct exec_package *, Elf_Ehdr *);
int        netbsd_elf_note(struct exec_package *, const Elf_Nhdr *, const char *,
            const char *);
int        netbsd_elf_probe(struct lwp *, struct exec_package *, void *, char *,
            vaddr_t *);

static void        elf_free_emul_arg(void *);

#ifdef DEBUG_ELF
#define DPRINTF(a, ...)        printf("%s: " a "\n", __func__, ##__VA_ARGS__)
#else
#define DPRINTF(a, ...)
#endif

/* round up and down to page boundaries. */
#define        ELF_ROUND(a, b)                (((a) + (b) - 1) & ~((b) - 1))
#define        ELF_TRUNC(a, b)                ((a) & ~((b) - 1))

static int
elf_placedynexec(struct exec_package *epp, Elf_Ehdr *eh, Elf_Phdr *ph)
{
        Elf_Addr align, offset;
        int i;

        for (align = 1, i = 0; i < eh->e_phnum; i++)
                if (ph[i].p_type == PT_LOAD && ph[i].p_align > align)
                        align = ph[i].p_align;

        offset = (Elf_Addr)pax_aslr_exec_offset(epp, align);
        if (offset < epp->ep_vm_minaddr)
                offset = roundup(epp->ep_vm_minaddr, align);
        if ((offset & (align - 1)) != 0) {
                DPRINTF("bad offset=%#jx align=%#jx",
                    (uintmax_t)offset, (uintmax_t)align);
                return EINVAL;
        }

        for (i = 0; i < eh->e_phnum; i++)
                ph[i].p_vaddr += offset;
        epp->ep_entryoffset = offset;
        eh->e_entry += offset;
        return 0;
}


int
elf_populate_auxv(struct lwp *l, struct exec_package *pack, char **stackp)
{
        size_t len, vlen;
        AuxInfo ai[ELF_AUX_ENTRIES], *a, *execname;
        struct elf_args *ap;
        char *path = l->l_proc->p_path;
        int error;

        execname = NULL;
        a = ai;

        memset(ai, 0, sizeof(ai));

        /*
         * Push extra arguments on the stack needed by dynamically
         * linked binaries
         */
        if ((ap = (struct elf_args *)pack->ep_emul_arg)) {
                struct vattr *vap = pack->ep_vap;

                a->a_type = AT_PHDR;
                a->a_v = ap->arg_phaddr;
                a++;

                a->a_type = AT_PHENT;
                a->a_v = ap->arg_phentsize;
                a++;

                a->a_type = AT_PHNUM;
                a->a_v = ap->arg_phnum;
                a++;

                a->a_type = AT_PAGESZ;
                a->a_v = PAGE_SIZE;
                a++;

                a->a_type = AT_BASE;
                a->a_v = ap->arg_interp;
                a++;

                a->a_type = AT_FLAGS;
                a->a_v = 0;
                a++;

                a->a_type = AT_ENTRY;
                a->a_v = ap->arg_entry;
                a++;

                a->a_type = AT_EUID;
                if (vap->va_mode & S_ISUID)
                        a->a_v = vap->va_uid;
                else
                        a->a_v = kauth_cred_geteuid(l->l_cred);
                a++;

                a->a_type = AT_RUID;
                a->a_v = kauth_cred_getuid(l->l_cred);
                a++;

                a->a_type = AT_EGID;
                if (vap->va_mode & S_ISGID)
                        a->a_v = vap->va_gid;
                else
                        a->a_v = kauth_cred_getegid(l->l_cred);
                a++;

                a->a_type = AT_RGID;
                a->a_v = kauth_cred_getgid(l->l_cred);
                a++;

                a->a_type = AT_STACKBASE;
                a->a_v = l->l_proc->p_stackbase;
                a++;

                /* "/" means fexecve(2) could not resolve the pathname */
                if (path[0] == '/' && path[1] != '\0') {
                        execname = a;
                        a->a_type = AT_SUN_EXECNAME;
                        a++;
                }

                exec_free_emul_arg(pack);
        }

        a->a_type = AT_NULL;
        a->a_v = 0;
        a++;

        vlen = (a - ai) * sizeof(ai[0]);

        KASSERT(vlen <= sizeof(ai));

        if (execname) {
                execname->a_v = (uintptr_t)(*stackp + vlen);
                len = strlen(path) + 1;
                if ((error = copyout(path, (*stackp + vlen), len)) != 0)
                        return error;
                len = ALIGN(len);
        } else {
                len = 0;
        }

        if ((error = copyout(ai, *stackp, vlen)) != 0)
                return error;
        *stackp += vlen + len;

        return 0;
}

/*
 * Copy arguments onto the stack in the normal way, but add some
 * extra information in case of dynamic binding.
 */
int
elf_copyargs(struct lwp *l, struct exec_package *pack,
    struct ps_strings *arginfo, char **stackp, void *argp)
{
        int error;

        if ((error = copyargs(l, pack, arginfo, stackp, argp)) != 0)
                return error;

        return elf_populate_auxv(l, pack, stackp);
}

/*
 * elf_check_header():
 *
 * Check header for validity; return 0 if ok, ENOEXEC if error
 */
int
elf_check_header(Elf_Ehdr *eh)
{

        if (memcmp(eh->e_ident, ELFMAG, SELFMAG) != 0 ||
            eh->e_ident[EI_CLASS] != ELFCLASS) {
                DPRINTF("bad magic e_ident[EI_MAG0,EI_MAG3] %#x%x%x%x, "
                    "e_ident[EI_CLASS] %#x", eh->e_ident[EI_MAG0],
                    eh->e_ident[EI_MAG1], eh->e_ident[EI_MAG2],
                    eh->e_ident[EI_MAG3], eh->e_ident[EI_CLASS]);
                return ENOEXEC;
        }

        switch (eh->e_machine) {

        ELFDEFNNAME(MACHDEP_ID_CASES)

        default:
                DPRINTF("bad machine %#x", eh->e_machine);
                return ENOEXEC;
        }

        if (ELF_EHDR_FLAGS_OK(eh) == 0) {
                DPRINTF("bad flags %#x", eh->e_flags);
                return ENOEXEC;
        }

        if (eh->e_shnum > ELF_MAXSHNUM || eh->e_phnum > ELF_MAXPHNUM) {
                DPRINTF("bad shnum/phnum %#x/%#x", eh->e_shnum, eh->e_phnum);
                return ENOEXEC;
        }

        return 0;
}

/*
 * elf_load_psection():
 *
 * Load a psection at the appropriate address
 */
static int
elf_load_psection(struct exec_vmcmd_set *vcset, struct vnode *vp,
    const Elf_Phdr *ph, Elf_Addr *addr, u_long *size, int flags)
{
        u_long msize, psize, rm, rf;
        long diff, offset;
        int vmprot = 0;

        KASSERT(VOP_ISLOCKED(vp) != LK_NONE);

        /*
         * If the user specified an address, then we load there.
         */
        if (*addr == ELFDEFNNAME(NO_ADDR))
                *addr = ph->p_vaddr;

        if (ph->p_align > 1) {
                /*
                 * Make sure we are virtually aligned as we are supposed to be.
                 */
                diff = ph->p_vaddr - ELF_TRUNC(ph->p_vaddr, ph->p_align);
                if (*addr - diff != ELF_TRUNC(*addr, ph->p_align)) {
                        DPRINTF("bad alignment %#jx != %#jx\n",
                            (uintptr_t)(*addr - diff),
                            (uintptr_t)ELF_TRUNC(*addr, ph->p_align));
                        return EINVAL;
                }
                /*
                 * But make sure to not map any pages before the start of the
                 * psection by limiting the difference to within a page.
                 */
                diff &= PAGE_MASK;
        } else
                diff = 0;

        vmprot |= (ph->p_flags & PF_R) ? VM_PROT_READ : 0;
        vmprot |= (ph->p_flags & PF_W) ? VM_PROT_WRITE : 0;
        vmprot |= (ph->p_flags & PF_X) ? VM_PROT_EXECUTE : 0;

        /*
         * Adjust everything so it all starts on a page boundary.
         */
        *addr -= diff;
        offset = ph->p_offset - diff;
        *size = ph->p_filesz + diff;
        msize = ph->p_memsz + diff;

        if (ph->p_align >= PAGE_SIZE) {
                if ((ph->p_flags & PF_W) != 0) {
                        /*
                         * Because the pagedvn pager can't handle zero fill
                         * of the last data page if it's not page aligned we
                         * map the last page readvn.
                         */
                        psize = trunc_page(*size);
                } else {
                        psize = round_page(*size);
                }
        } else {
                psize = *size;
        }

        if (psize > 0) {
                NEW_VMCMD2(vcset, ph->p_align < PAGE_SIZE ?
                    vmcmd_map_readvn : vmcmd_map_pagedvn, psize, *addr, vp,
                    offset, vmprot, flags);
                flags &= VMCMD_RELATIVE;
        }
        if (psize < *size) {
                NEW_VMCMD2(vcset, vmcmd_map_readvn, *size - psize,
                    *addr + psize, vp, offset + psize, vmprot, flags);
        }

        /*
         * Check if we need to extend the size of the segment (does
         * bss extend page the next page boundary)?
         */
        rm = round_page(*addr + msize);
        rf = round_page(*addr + *size);

        if (rm != rf) {
                NEW_VMCMD2(vcset, vmcmd_map_zero, rm - rf, rf, NULLVP,
                    0, vmprot, flags & VMCMD_RELATIVE);
                *size = msize;
        }
        return 0;
}

/*
 * elf_load_interp():
 *
 * Load an interpreter pointed to by path.
 */
static int
elf_load_interp(struct lwp *l, struct exec_package *epp, char *path,
    struct exec_vmcmd_set *vcset, u_long *entryoff, Elf_Addr *last)
{
        int error, i;
        struct vnode *vp;
        Elf_Ehdr eh;
        Elf_Phdr *ph = NULL;
        const Elf_Phdr *base_ph;
        const Elf_Phdr *last_ph;
        u_long phsize;
        Elf_Addr addr = *last;
        struct proc *p;
        bool use_topdown;

        p = l->l_proc;

        KASSERT(p->p_vmspace);
        KASSERT(p->p_vmspace != proc0.p_vmspace);

#ifdef __USE_TOPDOWN_VM
        use_topdown = epp->ep_flags & EXEC_TOPDOWN_VM;
#else
        use_topdown = false;
#endif

        /*
         * 1. open file
         * 2. read filehdr
         * 3. map text, data, and bss out of it using VM_*
         */
        vp = epp->ep_interp;
        if (vp == NULL) {
                error = emul_find_interp(l, epp, path);
                if (error != 0)
                        return error;
                vp = epp->ep_interp;
        }
        /* We'll tidy this ourselves - otherwise we have locking issues */
        epp->ep_interp = NULL;
        vn_lock(vp, LK_SHARED | LK_RETRY);

        /*
         * Similarly, if it's not marked as executable, or it's not a regular
         * file, we don't allow it to be used.
         */
        if (vp->v_type != VREG) {
                error = EACCES;
                goto bad;
        }
        if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
                goto bad;

        /*
         * Check mount point.  Though we're not trying to exec this binary,
         * we will be executing code from it, so if the mount point
         * disallows execution or set-id-ness, we punt or kill the set-id.
         */
        if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
                error = EACCES;
                goto bad;
        }
        if (vp->v_mount->mnt_flag & MNT_NOSUID)
                epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);

        error = vn_marktext(vp);
        if (error)
                goto bad;

        error = exec_read(l, vp, 0, &eh, sizeof(eh), IO_NODELOCKED);
        if (error != 0)
                goto bad;

        if ((error = elf_check_header(&eh)) != 0)
                goto bad;
        if (eh.e_type != ET_DYN || eh.e_phnum == 0) {
                DPRINTF("bad interpreter type %#x", eh.e_type);
                error = ENOEXEC;
                goto bad;
        }

        phsize = eh.e_phnum * sizeof(Elf_Phdr);
        ph = kmem_alloc(phsize, KM_SLEEP);

        error = exec_read(l, vp, eh.e_phoff, ph, phsize, IO_NODELOCKED);
        if (error != 0)
                goto bad;

#ifdef ELF_INTERP_NON_RELOCATABLE
        /*
         * Evil hack:  Only MIPS should be non-relocatable, and the
         * psections should have a high address (typically 0x5ffe0000).
         * If it's now relocatable, it should be linked at 0 and the
         * psections should have zeros in the upper part of the address.
         * Otherwise, force the load at the linked address.
         */
        if (*last == ELF_LINK_ADDR && (ph->p_vaddr & 0xffff0000) == 0)
                *last = ELFDEFNNAME(NO_ADDR);
#endif

        /*
         * If no position to load the interpreter was set by a probe
         * function, pick the same address that a non-fixed mmap(0, ..)
         * would (i.e. something safely out of the way).
         */
        if (*last == ELFDEFNNAME(NO_ADDR)) {
                u_long limit = 0;
                /*
                 * Find the start and ending addresses of the psections to
                 * be loaded.  This will give us the size.
                 */
                for (i = 0, base_ph = NULL; i < eh.e_phnum; i++) {
                        if (ph[i].p_type == PT_LOAD) {
                                u_long psize = ph[i].p_vaddr + ph[i].p_memsz;
                                if (base_ph == NULL)
                                        base_ph = &ph[i];
                                if (psize > limit)
                                        limit = psize;
                        }
                }

                if (base_ph == NULL) {
                        DPRINTF("no interpreter loadable sections");
                        error = ENOEXEC;
                        goto bad;
                }

                /*
                 * Now compute the size and load address.
                 */
                addr = (*epp->ep_esch->es_emul->e_vm_default_addr)(p,
                    epp->ep_daddr,
                    round_page(limit) - trunc_page(base_ph->p_vaddr),
                    use_topdown);
                addr += (Elf_Addr)pax_aslr_rtld_offset(epp, base_ph->p_align,
                    use_topdown);
        } else {
                addr = *last; /* may be ELF_LINK_ADDR */
        }

        /*
         * Load all the necessary sections
         */
        for (i = 0, base_ph = NULL, last_ph = NULL; i < eh.e_phnum; i++) {
                switch (ph[i].p_type) {
                case PT_LOAD: {
                        u_long size;
                        int flags;

                        if (base_ph == NULL) {
                                /*
                                 * First encountered psection is always the
                                 * base psection.  Make sure it's aligned
                                 * properly (align down for topdown and align
                                 * upwards for not topdown).
                                 */
                                base_ph = &ph[i];
                                flags = VMCMD_BASE;
                                if (addr == ELF_LINK_ADDR)
                                        addr = ph[i].p_vaddr;
                                if (use_topdown)
                                        addr = ELF_TRUNC(addr, ph[i].p_align);
                                else
                                        addr = ELF_ROUND(addr, ph[i].p_align);
                        } else {
                                u_long limit = round_page(last_ph->p_vaddr
                                    + last_ph->p_memsz);
                                u_long base = trunc_page(ph[i].p_vaddr);

                                /*
                                 * If there is a gap in between the psections,
                                 * map it as inaccessible so nothing else
                                 * mmap'ed will be placed there.
                                 */
                                if (limit != base) {
                                        NEW_VMCMD2(vcset, vmcmd_map_zero,
                                            base - limit,
                                            limit - base_ph->p_vaddr, NULLVP,
                                            0, VM_PROT_NONE, VMCMD_RELATIVE);
                                }

                                addr = ph[i].p_vaddr - base_ph->p_vaddr;
                                flags = VMCMD_RELATIVE;
                        }
                        last_ph = &ph[i];
                        if ((error = elf_load_psection(vcset, vp, &ph[i], &addr,
                            &size, flags)) != 0)
                                goto bad;
                        /*
                         * If entry is within this psection then this
                         * must contain the .text section.  *entryoff is
                         * relative to the base psection.
                         */
                        if (eh.e_entry >= ph[i].p_vaddr &&
                            eh.e_entry < (ph[i].p_vaddr + size)) {
                                *entryoff = eh.e_entry - base_ph->p_vaddr;
                        }
                        addr += size;
                        break;
                }

                default:
                        break;
                }
        }

        kmem_free(ph, phsize);
        /*
         * This value is ignored if TOPDOWN.
         */
        *last = addr;
        vput(vp);
        return 0;

bad:
        if (ph != NULL)
                kmem_free(ph, phsize);
        vput(vp);
        return error;
}

/*
 * exec_elf_makecmds(): Prepare an Elf binary's exec package
 *
 * First, set of the various offsets/lengths in the exec package.
 *
 * Then, mark the text image busy (so it can be demand paged) or error
 * out if this is not possible.  Finally, set up vmcmds for the
 * text, data, bss, and stack segments.
 */
int
exec_elf_makecmds(struct lwp *l, struct exec_package *epp)
{
        Elf_Ehdr *eh = epp->ep_hdr;
        Elf_Phdr *ph, *pp;
        Elf_Addr phdr = 0, computed_phdr = 0, pos = 0, end_text = 0;
        int error, i;
        char *interp = NULL;
        u_long phsize;
        struct elf_args *ap;
        bool is_dyn = false;

        if (epp->ep_hdrvalid < sizeof(Elf_Ehdr)) {
                DPRINTF("small header %#x", epp->ep_hdrvalid);
                return ENOEXEC;
        }
        if ((error = elf_check_header(eh)) != 0)
                return error;

        if (eh->e_type == ET_DYN)
                /* PIE, and some libs have an entry point */
                is_dyn = true;
        else if (eh->e_type != ET_EXEC) {
                DPRINTF("bad type %#x", eh->e_type);
                return ENOEXEC;
        }

        if (eh->e_phnum == 0) {
                DPRINTF("no program headers");
                return ENOEXEC;
        }

        /* XXX only LK_EXCLUSIVE to match all others - allow spinning */
        vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
        error = vn_marktext(epp->ep_vp);
        if (error) {
                VOP_UNLOCK(epp->ep_vp);
                return error;
        }

        /*
         * Allocate space to hold all the program headers, and read them
         * from the file
         */
        phsize = eh->e_phnum * sizeof(Elf_Phdr);
        ph = kmem_alloc(phsize, KM_SLEEP);

        error = exec_read(l, epp->ep_vp, eh->e_phoff, ph, phsize,
            IO_NODELOCKED);
        if (error != 0) {
                VOP_UNLOCK(epp->ep_vp);
                goto bad;
        }

        epp->ep_taddr = epp->ep_tsize = ELFDEFNNAME(NO_ADDR);
        epp->ep_daddr = epp->ep_dsize = ELFDEFNNAME(NO_ADDR);

        for (i = 0; i < eh->e_phnum; i++) {
                pp = &ph[i];
                if (pp->p_type == PT_INTERP) {
                        if (pp->p_filesz < 2 || pp->p_filesz > MAXPATHLEN) {
                                DPRINTF("bad interpreter namelen %#jx",
                                    (uintmax_t)pp->p_filesz);
                                error = ENOEXEC;
                                VOP_UNLOCK(epp->ep_vp);
                                goto bad;
                        }
                        interp = PNBUF_GET();
                        error = exec_read(l, epp->ep_vp, pp->p_offset, interp,
                            pp->p_filesz, IO_NODELOCKED);
                        if (error != 0) {
                                VOP_UNLOCK(epp->ep_vp);
                                goto bad;
                        }
                        /* Ensure interp is NUL-terminated and of the expected length */
                        if (strnlen(interp, pp->p_filesz) != pp->p_filesz - 1) {
                                DPRINTF("bad interpreter name");
                                error = ENOEXEC;
                                VOP_UNLOCK(epp->ep_vp);
                                goto bad;
                        }
                        break;
                }
        }

        /*
         * On the same architecture, we may be emulating different systems.
         * See which one will accept this executable.
         *
         * Probe functions would normally see if the interpreter (if any)
         * exists. Emulation packages may possibly replace the interpreter in
         * interp with a changed path (/emul/xxx/<path>).
         */
        pos = ELFDEFNNAME(NO_ADDR);
        if (epp->ep_esch->u.elf_probe_func) {
                vaddr_t startp = (vaddr_t)pos;

                error = (*epp->ep_esch->u.elf_probe_func)(l, epp, eh, interp,
                                                          &startp);
                if (error) {
                        VOP_UNLOCK(epp->ep_vp);
                        goto bad;
                }
                pos = (Elf_Addr)startp;
        }

        if (is_dyn && (error = elf_placedynexec(epp, eh, ph)) != 0) {
                VOP_UNLOCK(epp->ep_vp);
                goto bad;
        }

        /*
         * Load all the necessary sections
         */
        for (i = 0; i < eh->e_phnum; i++) {
                Elf_Addr addr = ELFDEFNNAME(NO_ADDR);
                u_long size = 0;

                switch (ph[i].p_type) {
                case PT_LOAD:
                        if ((error = elf_load_psection(&epp->ep_vmcmds,
                            epp->ep_vp, &ph[i], &addr, &size, VMCMD_FIXED))
                            != 0) {
                                VOP_UNLOCK(epp->ep_vp);
                                goto bad;
                        }

                        /*
                         * Consider this as text segment, if it is executable.
                         * If there is more than one text segment, pick the
                         * largest.
                         */
                        if (ph[i].p_flags & PF_X) {
                                if (epp->ep_taddr == ELFDEFNNAME(NO_ADDR) ||
                                    size > epp->ep_tsize) {
                                        epp->ep_taddr = addr;
                                        epp->ep_tsize = size;
                                }
                                end_text = addr + size;
                        } else {
                                epp->ep_daddr = addr;
                                epp->ep_dsize = size;
                        }
                        if (ph[i].p_offset == 0) {
                                computed_phdr = ph[i].p_vaddr + eh->e_phoff;
                        }
                        break;

                case PT_SHLIB:
                        /* SCO has these sections. */
                case PT_INTERP:
                        /* Already did this one. */
                case PT_DYNAMIC:
                case PT_NOTE:
                        break;
                case PT_PHDR:
                        /* Note address of program headers (in text segment) */
                        phdr = ph[i].p_vaddr;
                        break;

                default:
                        /*
                         * Not fatal; we don't need to understand everything.
                         */
                        break;
                }
        }

        /* Now done with the vnode. */
        VOP_UNLOCK(epp->ep_vp);

        if (epp->ep_vmcmds.evs_used == 0) {
                /* No VMCMD; there was no PT_LOAD section, or those
                 * sections were empty */
                DPRINTF("no vmcommands");
                error = ENOEXEC;
                goto bad;
        }

        if (epp->ep_daddr == ELFDEFNNAME(NO_ADDR)) {
                epp->ep_daddr = round_page(end_text);
                epp->ep_dsize = 0;
        }

        /*
         * Check if we found a dynamically linked binary and arrange to load
         * its interpreter
         */
        if (interp) {
                u_int nused = epp->ep_vmcmds.evs_used;
                u_long interp_offset = 0;

                if ((error = elf_load_interp(l, epp, interp,
                    &epp->ep_vmcmds, &interp_offset, &pos)) != 0) {
                        goto bad;
                }
                if (epp->ep_vmcmds.evs_used == nused) {
                        /* elf_load_interp() has not set up any new VMCMD */
                        DPRINTF("no vmcommands for interpreter");
                        error = ENOEXEC;
                        goto bad;
                }

                ap = kmem_alloc(sizeof(*ap), KM_SLEEP);
                ap->arg_interp = epp->ep_vmcmds.evs_cmds[nused].ev_addr;
                epp->ep_entryoffset = interp_offset;
                epp->ep_entry = ap->arg_interp + interp_offset;
                PNBUF_PUT(interp);
                interp = NULL;
        } else {
                epp->ep_entry = eh->e_entry;
                if (epp->ep_flags & EXEC_FORCEAUX) {
                        ap = kmem_zalloc(sizeof(*ap), KM_SLEEP);
                        ap->arg_interp = (vaddr_t)NULL;
                } else {
                        ap = NULL;
                }
        }

        if (ap) {
                ap->arg_phaddr = phdr ? phdr : computed_phdr;
                ap->arg_phentsize = eh->e_phentsize;
                ap->arg_phnum = eh->e_phnum;
                ap->arg_entry = eh->e_entry;
                epp->ep_emul_arg = ap;
                epp->ep_emul_arg_free = elf_free_emul_arg;
        }

#ifdef ELF_MAP_PAGE_ZERO
        /* Dell SVR4 maps page zero, yeuch! */
        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, PAGE_SIZE, 0,
            epp->ep_vp, 0, VM_PROT_READ);
#endif

        error = (*epp->ep_esch->es_setup_stack)(l, epp);
        if (error)
                goto bad;

        kmem_free(ph, phsize);
        return 0;

bad:
        if (interp)
                PNBUF_PUT(interp);
        exec_free_emul_arg(epp);
        kmem_free(ph, phsize);
        kill_vmcmds(&epp->ep_vmcmds);
        return error;
}

int
netbsd_elf_signature(struct lwp *l, struct exec_package *epp,
    Elf_Ehdr *eh)
{
        size_t i;
        Elf_Phdr *ph;
        size_t phsize;
        char *nbuf;
        int error;
        int isnetbsd = 0;

        epp->ep_pax_flags = 0;

        if (eh->e_phnum > ELF_MAXPHNUM || eh->e_phnum == 0) {
                DPRINTF("no signature %#x", eh->e_phnum);
                return ENOEXEC;
        }

        phsize = eh->e_phnum * sizeof(Elf_Phdr);
        ph = kmem_alloc(phsize, KM_SLEEP);
        error = exec_read(l, epp->ep_vp, eh->e_phoff, ph, phsize,
            IO_NODELOCKED);
        if (error)
                goto out;

        nbuf = kmem_alloc(ELF_MAXNOTESIZE, KM_SLEEP);
        for (i = 0; i < eh->e_phnum; i++) {
                const char *nptr;
                size_t nlen;

                if (ph[i].p_type != PT_NOTE ||
                    ph[i].p_filesz > ELF_MAXNOTESIZE)
                        continue;

                nlen = ph[i].p_filesz;
                error = exec_read(l, epp->ep_vp, ph[i].p_offset, nbuf, nlen,
                    IO_NODELOCKED);
                if (error)
                        continue;

                nptr = nbuf;
                while (nlen > 0) {
                        const Elf_Nhdr *np;
                        const char *ndata, *ndesc;

                        /* note header */
                        np = (const Elf_Nhdr *)nptr;
                        if (nlen < sizeof(*np)) {
                                break;
                        }
                        nptr += sizeof(*np);
                        nlen -= sizeof(*np);

                        /* note name */
                        ndata = nptr;
                        if (nlen < roundup(np->n_namesz, 4)) {
                                break;
                        }
                        nptr += roundup(np->n_namesz, 4);
                        nlen -= roundup(np->n_namesz, 4);

                        /* note description */
                        ndesc = nptr;
                        if (nlen < roundup(np->n_descsz, 4)) {
                                break;
                        }
                        nptr += roundup(np->n_descsz, 4);
                        nlen -= roundup(np->n_descsz, 4);

                        isnetbsd |= netbsd_elf_note(epp, np, ndata, ndesc);
                }
        }
        kmem_free(nbuf, ELF_MAXNOTESIZE);

        error = isnetbsd ? 0 : ENOEXEC;
#ifdef DEBUG_ELF
        if (error)
                DPRINTF("not netbsd");
#endif
out:
        kmem_free(ph, phsize);
        return error;
}

int
netbsd_elf_note(struct exec_package *epp,
                const Elf_Nhdr *np, const char *ndata, const char *ndesc)
{
        int isnetbsd = 0;

#ifdef DIAGNOSTIC
        const char *badnote;
#define BADNOTE(n) badnote = (n)
#else
#define BADNOTE(n)
#endif

        switch (np->n_type) {
        case ELF_NOTE_TYPE_NETBSD_TAG:
                /* It is us */
                if (np->n_namesz == ELF_NOTE_NETBSD_NAMESZ &&
                    np->n_descsz == ELF_NOTE_NETBSD_DESCSZ &&
                    memcmp(ndata, ELF_NOTE_NETBSD_NAME,
                    ELF_NOTE_NETBSD_NAMESZ) == 0) {
                        memcpy(&epp->ep_osversion, ndesc,
                            ELF_NOTE_NETBSD_DESCSZ);
                        isnetbsd = 1;
                        break;
                }

                /*
                 * Ignore SuSE tags; SuSE's n_type is the same the
                 * NetBSD one.
                 */
                if (np->n_namesz == ELF_NOTE_SUSE_NAMESZ &&
                    memcmp(ndata, ELF_NOTE_SUSE_NAME,
                    ELF_NOTE_SUSE_NAMESZ) == 0)
                        break;
                /*
                 * Ignore old GCC
                 */
                if (np->n_namesz == ELF_NOTE_OGCC_NAMESZ &&
                    memcmp(ndata, ELF_NOTE_OGCC_NAME,
                    ELF_NOTE_OGCC_NAMESZ) == 0)
                        break;
                BADNOTE("NetBSD tag");
                goto bad;

        case ELF_NOTE_TYPE_PAX_TAG:
                if (np->n_namesz == ELF_NOTE_PAX_NAMESZ &&
                    np->n_descsz == ELF_NOTE_PAX_DESCSZ &&
                    memcmp(ndata, ELF_NOTE_PAX_NAME,
                    ELF_NOTE_PAX_NAMESZ) == 0) {
                        uint32_t flags;
                        memcpy(&flags, ndesc, sizeof(flags));
                        /* Convert the flags and insert them into
                         * the exec package. */
                        pax_setup_elf_flags(epp, flags);
                        break;
                }
                BADNOTE("PaX tag");
                goto bad;

        case ELF_NOTE_TYPE_MARCH_TAG:
                /* Copy the machine arch into the package. */
                if (np->n_namesz == ELF_NOTE_MARCH_NAMESZ
                    && memcmp(ndata, ELF_NOTE_MARCH_NAME,
                            ELF_NOTE_MARCH_NAMESZ) == 0) {
                        /* Do not truncate the buffer */
                        if (np->n_descsz > sizeof(epp->ep_machine_arch)) {
                                BADNOTE("description size limit");
                                goto bad;
                        }
                        /*
                         * Ensure ndesc is NUL-terminated and of the
                         * expected length.
                         */
                        if (strnlen(ndesc, np->n_descsz) + 1 !=
                            np->n_descsz) {
                                BADNOTE("description size");
                                goto bad;
                        }
                        strlcpy(epp->ep_machine_arch, ndesc,
                            sizeof(epp->ep_machine_arch));
                        break;
                }
                BADNOTE("march tag");
                goto bad;

        case ELF_NOTE_TYPE_MCMODEL_TAG:
                /* arch specific check for code model */
#ifdef ELF_MD_MCMODEL_CHECK
                if (np->n_namesz == ELF_NOTE_MCMODEL_NAMESZ
                    && memcmp(ndata, ELF_NOTE_MCMODEL_NAME,
                            ELF_NOTE_MCMODEL_NAMESZ) == 0) {
                        ELF_MD_MCMODEL_CHECK(epp, ndesc, np->n_descsz);
                        break;
                }
                BADNOTE("mcmodel tag");
                goto bad;
#endif
                break;

        case ELF_NOTE_TYPE_SUSE_VERSION_TAG:
                break;

        case ELF_NOTE_TYPE_GO_BUILDID_TAG:
                break;

        case ELF_NOTE_TYPE_FDO_PACKAGING_METADATA:
                break;

        case ELF_NOTE_TYPE_NETBSD_EMUL_TAG:
                /* Ancient NetBSD version tag */
                break;

        default:
                BADNOTE("unknown tag");
bad:
#ifdef DIAGNOSTIC
                /* Ignore GNU tags */
                if (np->n_namesz == ELF_NOTE_GNU_NAMESZ &&
                    memcmp(ndata, ELF_NOTE_GNU_NAME,
                    ELF_NOTE_GNU_NAMESZ) == 0)
                    break;

                int ns = (int)np->n_namesz;
                printf("%s: Unknown elf note type %d (%s): "
                    "[namesz=%d, descsz=%d name=%-*.*s]\n",
                    epp->ep_kname, np->n_type, badnote, np->n_namesz,
                    np->n_descsz, ns, ns, ndata);
#endif
                break;
        }

        return isnetbsd;
}

int
netbsd_elf_probe(struct lwp *l, struct exec_package *epp, void *eh, char *itp,
    vaddr_t *pos)
{
        int error;

        if ((error = netbsd_elf_signature(l, epp, eh)) != 0)
                return error;
#ifdef ELF_MD_PROBE_FUNC
        if ((error = ELF_MD_PROBE_FUNC(l, epp, eh, itp, pos)) != 0)
                return error;
#elif defined(ELF_INTERP_NON_RELOCATABLE)
        *pos = ELF_LINK_ADDR;
#endif
        epp->ep_flags |= EXEC_FORCEAUX;
        return 0;
}

void
elf_free_emul_arg(void *arg)
{
        struct elf_args *ap = arg;
        KASSERT(ap != NULL);
        kmem_free(ap, sizeof(*ap));
}










































































































































































































































































































































































































  421 

  113 



  365 









  420 






  315 








  280 








































































  181 
  314 


  313 
   41 

  273 
  312 

   42 


   21 
  312 


  271 

  270 


  269 
























  157 






  270 


  270 
    9 
    9 



  261 




















  136 


  135 









   31 


  104 
    2 

  104 





  104 






















   41 



   38 

   36 









   27 




   25 


   24 
   25 





   10 





   23 







  103 

  104 



   82 










   83 
   73 

   83 
   74 

   82 

   82 
   83 
   83 






   83 













   92 

    3 







    3 
    1 
    1 








   92 

   91 
   39 
   39 



   37 
    1 


    1 
    1 






   38 
   90 
   90 


   89 
   89 















    1 
    1 

    1 


    1 
    1 



    1 







    6 
    6 

    6 

    2 
    6 



    6 







   79 

   79 







   78 
   13 
   12 


   77 


   79 








  139 

  139 







   51 

   51 

   51 


   51 

   50 






































   44 
  214 
  213 









  211 




  213 

  214 

   51 

  214 


  212 



  211 
    2 

    1 


  211 
   79 
   11 



   69 





  207 
   34 
  208 
  205 



  206 
   11 
    6 



    4 




    4 


    1 




  205 







  204 




  204 




   31 



  201 








  202 

   42 



   29 
   29 

   42 


   42 









  155 
  121 

  203 

  202 





  203 

  201 

  195 
   12 


   43 

  203 



  203 
   15 
  203 
   12 

  203 


  169 


  190 
   15 
  193 
   12 


  185 



   90 



  194 



   14 
    2 
  197 
    3 
  195 














   19 





   19 
   19 










   19 


   19 
   17 









































   98 
  112 
   24 

  103 




   19 

   19 





    3 

    1 

   18 
   17 


   93 










   92 




   92 













   45 
   44 
   25 
   24 
    9 
    7 

   63 


   64 



    3 
    3 

    2 


   63 





   61 
    3 



   61 
   14 



   60 

   22 
   22 





   19 



   19 





   16 




















   45 
   45 



   45 


   18 

   18 
    2 
    2 
    2 



   15 
   15 




    1 

   16 



   45 
































   45 
    7 



    7 
    3 
    2 



    3 


    5 





    7 
    7 
    5 

    7 
    4 



    4 





    5 



    4 
    4 
    4 





    1 





    1 



    7 


    7 




   45 








   45 




   30 
   22 

   30 

   29 



   30 




   30 


    5 

   30 


















   30 











    2 
    1 







   28 
   25 
    3 




   24 
    6 


   19 

   19 





   19 






   18 
    3 

    3 

   16 
   16 




    3 
    2 

    1 








    1 




   27 

    3 

    2 



    2 




   22 

   24 









   22 
    4 
    4 
    4 





    6 






    4 
    4 


    4 


    4 






    3 
    2 
    3 



    3 
   12 

   11 

   40 






   16 

   15 

    3 
    3 



   32 
   18 

   41 
    4 
    1 



   40 
   36 

   84 


  106 








   38 

   38 
   39 



   19 



   37 
















    7 









  101 




  102 












   26 




  100 









   58 








    4 






    7 

    5 
    4 



    4 

    1 

    3 















    9 

    8 
    5 

    3 








   12 






   11 






   11 



    2 
    2 



    2 



    2 
    2 







    3 
    2 

    3 



    3 
    2 

    3 











    5 



    4 




    3 
    2 
    1 

    3 

    2 
   11 

    1 





   19 

   17 





   51 
   51 







  274 







   51 
  254 

  252 


  132 



































   26 





    1 



    2 

















    2 



    1 



    2 
    2 


    2 




    1 



    1 



    1 



    2 





    3 

    3 







    2 



    8 

    6 




   22 








  138 
  113 
  112 





















  417 
  118 


  112 

  327 

  418 











  415 














  384 
  102 

  380 












  106 





  104 

   57 


   59 




















  237 
  105 

  109 










  101 
   83 













    2 







    2 
    1 


    2 













    4 


    4 



    4 







    4 


    4 





































































































































































































   18 



   18 

    5 

   18 
    7 





















    7 
   12 
   11 



   12 
    5 





















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
/*        $NetBSD: uipc_socket.c,v 1.302 2022/04/09 23:52:22 riastradh Exp $        */

/*
 * Copyright (c) 2002, 2007, 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of Wasabi Systems, Inc, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2004 The FreeBSD Foundation
 * Copyright (c) 2004 Robert Watson
 * Copyright (c) 1982, 1986, 1988, 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)uipc_socket.c        8.6 (Berkeley) 5/2/95
 */

/*
 * Socket operation routines.
 *
 * These routines are called by the routines in sys_socket.c or from a
 * system process, and implement the semantics of socket operations by
 * switching out to the protocol specific routines.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.302 2022/04/09 23:52:22 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
#include "opt_sock_counters.h"
#include "opt_sosend_loan.h"
#include "opt_mbuftrace.h"
#include "opt_somaxkva.h"
#include "opt_multiprocessor.h"        /* XXX */
#include "opt_sctp.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/kmem.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
#include <sys/kernel.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/signalvar.h>
#include <sys/resourcevar.h>
#include <sys/uidinfo.h>
#include <sys/event.h>
#include <sys/poll.h>
#include <sys/kauth.h>
#include <sys/mutex.h>
#include <sys/condvar.h>
#include <sys/kthread.h>
#include <sys/compat_stub.h>

#include <compat/sys/time.h>
#include <compat/sys/socket.h>

#include <uvm/uvm_extern.h>
#include <uvm/uvm_loan.h>
#include <uvm/uvm_page.h>

#ifdef SCTP
#include <netinet/sctp_route.h>
#endif

MALLOC_DEFINE(M_SONAME, "soname", "socket name");

extern const struct fileops socketops;

static int        sooptions;
extern int        somaxconn;                        /* patchable (XXX sysctl) */
int                somaxconn = SOMAXCONN;
kmutex_t        *softnet_lock;

#ifdef SOSEND_COUNTERS
#include <sys/device.h>

static struct evcnt sosend_loan_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "sosend", "loan big");
static struct evcnt sosend_copy_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "sosend", "copy big");
static struct evcnt sosend_copy_small = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "sosend", "copy small");
static struct evcnt sosend_kvalimit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    NULL, "sosend", "kva limit");

#define        SOSEND_COUNTER_INCR(ev)                (ev)->ev_count++

EVCNT_ATTACH_STATIC(sosend_loan_big);
EVCNT_ATTACH_STATIC(sosend_copy_big);
EVCNT_ATTACH_STATIC(sosend_copy_small);
EVCNT_ATTACH_STATIC(sosend_kvalimit);
#else

#define        SOSEND_COUNTER_INCR(ev)                /* nothing */

#endif /* SOSEND_COUNTERS */

#if defined(SOSEND_NO_LOAN) || defined(MULTIPROCESSOR)
int sock_loan_thresh = -1;
#else
int sock_loan_thresh = 4096;
#endif

static kmutex_t so_pendfree_lock;
static struct mbuf *so_pendfree = NULL;

#ifndef SOMAXKVA
#define        SOMAXKVA (16 * 1024 * 1024)
#endif
int somaxkva = SOMAXKVA;
static int socurkva;
static kcondvar_t socurkva_cv;

#ifndef SOFIXEDBUF
#define SOFIXEDBUF true
#endif
bool sofixedbuf = SOFIXEDBUF;

static kauth_listener_t socket_listener;

#define        SOCK_LOAN_CHUNK                65536

static void sopendfree_thread(void *);
static kcondvar_t pendfree_thread_cv;
static lwp_t *sopendfree_lwp;

static void sysctl_kern_socket_setup(void);
static struct sysctllog *socket_sysctllog;

static vsize_t
sokvareserve(struct socket *so, vsize_t len)
{
        int error;

        mutex_enter(&so_pendfree_lock);
        while (socurkva + len > somaxkva) {
                SOSEND_COUNTER_INCR(&sosend_kvalimit);
                error = cv_wait_sig(&socurkva_cv, &so_pendfree_lock);
                if (error) {
                        len = 0;
                        break;
                }
        }
        socurkva += len;
        mutex_exit(&so_pendfree_lock);
        return len;
}

static void
sokvaunreserve(vsize_t len)
{

        mutex_enter(&so_pendfree_lock);
        socurkva -= len;
        cv_broadcast(&socurkva_cv);
        mutex_exit(&so_pendfree_lock);
}

/*
 * sokvaalloc: allocate kva for loan.
 */
vaddr_t
sokvaalloc(vaddr_t sva, vsize_t len, struct socket *so)
{
        vaddr_t lva;

        if (sokvareserve(so, len) == 0)
                return 0;

        lva = uvm_km_alloc(kernel_map, len, atop(sva) & uvmexp.colormask,
            UVM_KMF_COLORMATCH | UVM_KMF_VAONLY | UVM_KMF_WAITVA);
        if (lva == 0) {
                sokvaunreserve(len);
                return 0;
        }

        return lva;
}

/*
 * sokvafree: free kva for loan.
 */
void
sokvafree(vaddr_t sva, vsize_t len)
{

        uvm_km_free(kernel_map, sva, len, UVM_KMF_VAONLY);
        sokvaunreserve(len);
}

static void
sodoloanfree(struct vm_page **pgs, void *buf, size_t size)
{
        vaddr_t sva, eva;
        vsize_t len;
        int npgs;

        KASSERT(pgs != NULL);

        eva = round_page((vaddr_t) buf + size);
        sva = trunc_page((vaddr_t) buf);
        len = eva - sva;
        npgs = len >> PAGE_SHIFT;

        pmap_kremove(sva, len);
        pmap_update(pmap_kernel());
        uvm_unloan(pgs, npgs, UVM_LOAN_TOPAGE);
        sokvafree(sva, len);
}

/*
 * sopendfree_thread: free mbufs on "pendfree" list. Unlock and relock
 * so_pendfree_lock when freeing mbufs.
 */
static void
sopendfree_thread(void *v)
{
        struct mbuf *m, *next;
        size_t rv;

        mutex_enter(&so_pendfree_lock);

        for (;;) {
                rv = 0;
                while (so_pendfree != NULL) {
                        m = so_pendfree;
                        so_pendfree = NULL;
                        mutex_exit(&so_pendfree_lock);

                        for (; m != NULL; m = next) {
                                next = m->m_next;
                                KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) ==
                                    0);
                                KASSERT(m->m_ext.ext_refcnt == 0);

                                rv += m->m_ext.ext_size;
                                sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf,
                                    m->m_ext.ext_size);
                                pool_cache_put(mb_cache, m);
                        }

                        mutex_enter(&so_pendfree_lock);
                }
                if (rv)
                        cv_broadcast(&socurkva_cv);
                cv_wait(&pendfree_thread_cv, &so_pendfree_lock);
        }
        panic("sopendfree_thread");
        /* NOTREACHED */
}

void
soloanfree(struct mbuf *m, void *buf, size_t size, void *arg)
{

        KASSERT(m != NULL);

        /*
         * postpone freeing mbuf.
         *
         * we can't do it in interrupt context
         * because we need to put kva back to kernel_map.
         */

        mutex_enter(&so_pendfree_lock);
        m->m_next = so_pendfree;
        so_pendfree = m;
        cv_signal(&pendfree_thread_cv);
        mutex_exit(&so_pendfree_lock);
}

static long
sosend_loan(struct socket *so, struct uio *uio, struct mbuf *m, long space)
{
        struct iovec *iov = uio->uio_iov;
        vaddr_t sva, eva;
        vsize_t len;
        vaddr_t lva;
        int npgs, error;
        vaddr_t va;
        int i;

        if (VMSPACE_IS_KERNEL_P(uio->uio_vmspace))
                return 0;

        if (iov->iov_len < (size_t) space)
                space = iov->iov_len;
        if (space > SOCK_LOAN_CHUNK)
                space = SOCK_LOAN_CHUNK;

        eva = round_page((vaddr_t) iov->iov_base + space);
        sva = trunc_page((vaddr_t) iov->iov_base);
        len = eva - sva;
        npgs = len >> PAGE_SHIFT;

        KASSERT(npgs <= M_EXT_MAXPAGES);

        lva = sokvaalloc(sva, len, so);
        if (lva == 0)
                return 0;

        error = uvm_loan(&uio->uio_vmspace->vm_map, sva, len,
            m->m_ext.ext_pgs, UVM_LOAN_TOPAGE);
        if (error) {
                sokvafree(lva, len);
                return 0;
        }

        for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE)
                pmap_kenter_pa(va, VM_PAGE_TO_PHYS(m->m_ext.ext_pgs[i]),
                    VM_PROT_READ, 0);
        pmap_update(pmap_kernel());

        lva += (vaddr_t) iov->iov_base & PAGE_MASK;

        MEXTADD(m, (void *) lva, space, M_MBUF, soloanfree, so);
        m->m_flags |= M_EXT_PAGES | M_EXT_ROMAP;

        uio->uio_resid -= space;
        /* uio_offset not updated, not set/used for write(2) */
        uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + space;
        uio->uio_iov->iov_len -= space;
        if (uio->uio_iov->iov_len == 0) {
                uio->uio_iov++;
                uio->uio_iovcnt--;
        }

        return space;
}

static int
socket_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        int result;
        enum kauth_network_req req;

        result = KAUTH_RESULT_DEFER;
        req = (enum kauth_network_req)(uintptr_t)arg0;

        if ((action != KAUTH_NETWORK_SOCKET) &&
            (action != KAUTH_NETWORK_BIND))
                return result;

        switch (req) {
        case KAUTH_REQ_NETWORK_BIND_PORT:
                result = KAUTH_RESULT_ALLOW;
                break;

        case KAUTH_REQ_NETWORK_SOCKET_DROP: {
                /* Normal users can only drop their own connections. */
                struct socket *so = (struct socket *)arg1;

                if (so->so_cred && proc_uidmatch(cred, so->so_cred) == 0)
                        result = KAUTH_RESULT_ALLOW;

                break;
                }

        case KAUTH_REQ_NETWORK_SOCKET_OPEN:
                /* We allow "raw" routing/bluetooth sockets to anyone. */
                switch ((u_long)arg1) {
                case PF_ROUTE:
                case PF_OROUTE:
                case PF_BLUETOOTH:
                case PF_CAN:
                        result = KAUTH_RESULT_ALLOW;
                        break;
                default:
                        /* Privileged, let secmodel handle this. */
                        if ((u_long)arg2 == SOCK_RAW)
                                break;
                        result = KAUTH_RESULT_ALLOW;
                        break;
                }
                break;

        case KAUTH_REQ_NETWORK_SOCKET_CANSEE:
                result = KAUTH_RESULT_ALLOW;

                break;

        default:
                break;
        }

        return result;
}

void
soinit(void)
{

        sysctl_kern_socket_setup();

#ifdef SCTP
        /* Update the SCTP function hooks if necessary*/

        vec_sctp_add_ip_address = sctp_add_ip_address;
        vec_sctp_delete_ip_address = sctp_delete_ip_address; 
#endif

        mutex_init(&so_pendfree_lock, MUTEX_DEFAULT, IPL_VM);
        softnet_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
        cv_init(&socurkva_cv, "sokva");
        cv_init(&pendfree_thread_cv, "sopendfr");
        soinit2();

        /* Set the initial adjusted socket buffer size. */
        if (sb_max_set(sb_max))
                panic("bad initial sb_max value: %lu", sb_max);

        socket_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
            socket_listener_cb, NULL);
}

void
soinit1(void)
{
        int error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
            sopendfree_thread, NULL, &sopendfree_lwp, "sopendfree");
        if (error)
                panic("soinit1 %d", error);
}

/*
 * socreate: create a new socket of the specified type and the protocol.
 *
 * => Caller may specify another socket for lock sharing (must not be held).
 * => Returns the new socket without lock held.
 */
int
socreate(int dom, struct socket **aso, int type, int proto, struct lwp *l,
    struct socket *lockso)
{
        const struct protosw *prp;
        struct socket *so;
        uid_t uid;
        int error;
        kmutex_t *lock;

        error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET,
            KAUTH_REQ_NETWORK_SOCKET_OPEN, KAUTH_ARG(dom), KAUTH_ARG(type),
            KAUTH_ARG(proto));
        if (error != 0)
                return error;

        if (proto)
                prp = pffindproto(dom, proto, type);
        else
                prp = pffindtype(dom, type);
        if (prp == NULL) {
                /* no support for domain */
                if (pffinddomain(dom) == 0)
                        return EAFNOSUPPORT;
                /* no support for socket type */
                if (proto == 0 && type != 0)
                        return EPROTOTYPE;
                return EPROTONOSUPPORT;
        }
        if (prp->pr_usrreqs == NULL)
                return EPROTONOSUPPORT;
        if (prp->pr_type != type)
                return EPROTOTYPE;

        so = soget(true);
        so->so_type = type;
        so->so_proto = prp;
        so->so_send = sosend;
        so->so_receive = soreceive;
        so->so_options = sooptions;
#ifdef MBUFTRACE
        so->so_rcv.sb_mowner = &prp->pr_domain->dom_mowner;
        so->so_snd.sb_mowner = &prp->pr_domain->dom_mowner;
        so->so_mowner = &prp->pr_domain->dom_mowner;
#endif
        uid = kauth_cred_geteuid(l->l_cred);
        so->so_uidinfo = uid_find(uid);
        so->so_egid = kauth_cred_getegid(l->l_cred);
        so->so_cpid = l->l_proc->p_pid;

        /*
         * Lock assigned and taken during PCB attach, unless we share
         * the lock with another socket, e.g. socketpair(2) case.
         */
        if (lockso) {
                /*
                 * lockso->so_lock should be stable at this point, so
                 * no need for atomic_load_*.
                 */
                lock = lockso->so_lock;
                so->so_lock = lock;
                mutex_obj_hold(lock);
                mutex_enter(lock);
        }

        /* Attach the PCB (returns with the socket lock held). */
        error = (*prp->pr_usrreqs->pr_attach)(so, proto);
        KASSERT(solocked(so));

        if (error) {
                KASSERT(so->so_pcb == NULL);
                so->so_state |= SS_NOFDREF;
                sofree(so);
                return error;
        }
        so->so_cred = kauth_cred_dup(l->l_cred);
        sounlock(so);

        *aso = so;
        return 0;
}

/*
 * fsocreate: create a socket and a file descriptor associated with it.
 *
 * => On success, write file descriptor to fdout and return zero.
 * => On failure, return non-zero; *fdout will be undefined.
 */
int
fsocreate(int domain, struct socket **sop, int type, int proto, int *fdout)
{
        lwp_t *l = curlwp;
        int error, fd, flags;
        struct socket *so;
        struct file *fp;

        if ((error = fd_allocfile(&fp, &fd)) != 0) {
                return error;
        }
        flags = type & SOCK_FLAGS_MASK;
        fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0);
        fp->f_flag = FREAD|FWRITE|((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0)|
            ((flags & SOCK_NOSIGPIPE) ? FNOSIGPIPE : 0);
        fp->f_type = DTYPE_SOCKET;
        fp->f_ops = &socketops;

        type &= ~SOCK_FLAGS_MASK;
        error = socreate(domain, &so, type, proto, l, NULL);
        if (error) {
                fd_abort(curproc, fp, fd);
                return error;
        }
        if (flags & SOCK_NONBLOCK) {
                so->so_state |= SS_NBIO;
        }
        fp->f_socket = so;
        fd_affix(curproc, fp, fd);

        if (sop != NULL) {
                *sop = so;
        }
        *fdout = fd;
        return error;
}

int
sofamily(const struct socket *so)
{
        const struct protosw *pr;
        const struct domain *dom;

        if ((pr = so->so_proto) == NULL)
                return AF_UNSPEC;
        if ((dom = pr->pr_domain) == NULL)
                return AF_UNSPEC;
        return dom->dom_family;
}

int
sobind(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        int error;

        solock(so);
        if (nam->sa_family != so->so_proto->pr_domain->dom_family) {
                sounlock(so);
                return EAFNOSUPPORT;
        }
        error = (*so->so_proto->pr_usrreqs->pr_bind)(so, nam, l);
        sounlock(so);
        return error;
}

int
solisten(struct socket *so, int backlog, struct lwp *l)
{
        int error;
        short oldopt, oldqlimit;

        solock(so);
        if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
            SS_ISDISCONNECTING)) != 0) {
                sounlock(so);
                return EINVAL;
        }
        oldopt = so->so_options;
        oldqlimit = so->so_qlimit;
        if (TAILQ_EMPTY(&so->so_q))
                so->so_options |= SO_ACCEPTCONN;
        if (backlog < 0)
                backlog = 0;
        so->so_qlimit = uimin(backlog, somaxconn);

        error = (*so->so_proto->pr_usrreqs->pr_listen)(so, l);
        if (error != 0) {
                so->so_options = oldopt;
                so->so_qlimit = oldqlimit;
                sounlock(so);
                return error;
        }
        sounlock(so);
        return 0;
}

void
sofree(struct socket *so)
{
        u_int refs;

        KASSERT(solocked(so));

        if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
                sounlock(so);
                return;
        }
        if (so->so_head) {
                /*
                 * We must not decommission a socket that's on the accept(2)
                 * queue.  If we do, then accept(2) may hang after select(2)
                 * indicated that the listening socket was ready.
                 */
                if (!soqremque(so, 0)) {
                        sounlock(so);
                        return;
                }
        }
        if (so->so_rcv.sb_hiwat)
                (void)chgsbsize(so->so_uidinfo, &so->so_rcv.sb_hiwat, 0,
                    RLIM_INFINITY);
        if (so->so_snd.sb_hiwat)
                (void)chgsbsize(so->so_uidinfo, &so->so_snd.sb_hiwat, 0,
                    RLIM_INFINITY);
        sbrelease(&so->so_snd, so);
        KASSERT(!cv_has_waiters(&so->so_cv));
        KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv));
        KASSERT(!cv_has_waiters(&so->so_snd.sb_cv));
        sorflush(so);
        refs = so->so_aborting;        /* XXX */
        /* Remove acccept filter if one is present. */
        if (so->so_accf != NULL)
                (void)accept_filt_clear(so);
        sounlock(so);
        if (refs == 0)                /* XXX */
                soput(so);
}

/*
 * soclose: close a socket on last file table reference removal.
 * Initiate disconnect if connected.  Free socket when disconnect complete.
 */
int
soclose(struct socket *so)
{
        struct socket *so2;
        int error = 0;

        solock(so);
        if (so->so_options & SO_ACCEPTCONN) {
                for (;;) {
                        if ((so2 = TAILQ_FIRST(&so->so_q0)) != 0) {
                                KASSERT(solocked2(so, so2));
                                (void) soqremque(so2, 0);
                                /* soabort drops the lock. */
                                (void) soabort(so2);
                                solock(so);
                                continue;
                        }
                        if ((so2 = TAILQ_FIRST(&so->so_q)) != 0) {
                                KASSERT(solocked2(so, so2));
                                (void) soqremque(so2, 1);
                                /* soabort drops the lock. */
                                (void) soabort(so2);
                                solock(so);
                                continue;
                        }
                        break;
                }
        }
        if (so->so_pcb == NULL)
                goto discard;
        if (so->so_state & SS_ISCONNECTED) {
                if ((so->so_state & SS_ISDISCONNECTING) == 0) {
                        error = sodisconnect(so);
                        if (error)
                                goto drop;
                }
                if (so->so_options & SO_LINGER) {
                        if ((so->so_state & (SS_ISDISCONNECTING|SS_NBIO)) ==
                            (SS_ISDISCONNECTING|SS_NBIO))
                                goto drop;
                        while (so->so_state & SS_ISCONNECTED) {
                                error = sowait(so, true, so->so_linger * hz);
                                if (error)
                                        break;
                        }
                }
        }
 drop:
        if (so->so_pcb) {
                KASSERT(solocked(so));
                (*so->so_proto->pr_usrreqs->pr_detach)(so);
        }
 discard:
        KASSERT((so->so_state & SS_NOFDREF) == 0);
        kauth_cred_free(so->so_cred);
        so->so_cred = NULL;
        so->so_state |= SS_NOFDREF;
        sofree(so);
        return error;
}

/*
 * Must be called with the socket locked..  Will return with it unlocked.
 */
int
soabort(struct socket *so)
{
        u_int refs;
        int error;

        KASSERT(solocked(so));
        KASSERT(so->so_head == NULL);

        so->so_aborting++;                /* XXX */
        error = (*so->so_proto->pr_usrreqs->pr_abort)(so);
        refs = --so->so_aborting;        /* XXX */
        if (error || (refs == 0)) {
                sofree(so);
        } else {
                sounlock(so);
        }
        return error;
}

int
soaccept(struct socket *so, struct sockaddr *nam)
{
        int error;

        KASSERT(solocked(so));
        KASSERT((so->so_state & SS_NOFDREF) != 0);

        so->so_state &= ~SS_NOFDREF;
        if ((so->so_state & SS_ISDISCONNECTED) == 0 ||
            (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0)
                error = (*so->so_proto->pr_usrreqs->pr_accept)(so, nam);
        else
                error = ECONNABORTED;

        return error;
}

int
soconnect(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        int error;

        KASSERT(solocked(so));

        if (so->so_options & SO_ACCEPTCONN)
                return EOPNOTSUPP;
        /*
         * If protocol is connection-based, can only connect once.
         * Otherwise, if connected, try to disconnect first.
         * This allows user to disconnect by connecting to, e.g.,
         * a null address.
         */
        if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
            ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
            (error = sodisconnect(so)))) {
                error = EISCONN;
        } else {
                if (nam->sa_family != so->so_proto->pr_domain->dom_family) {
                        return EAFNOSUPPORT;
                }
                error = (*so->so_proto->pr_usrreqs->pr_connect)(so, nam, l);
        }

        return error;
}

int
soconnect2(struct socket *so1, struct socket *so2)
{
        KASSERT(solocked2(so1, so2));

        return (*so1->so_proto->pr_usrreqs->pr_connect2)(so1, so2);
}

int
sodisconnect(struct socket *so)
{
        int error;

        KASSERT(solocked(so));

        if ((so->so_state & SS_ISCONNECTED) == 0) {
                error = ENOTCONN;
        } else if (so->so_state & SS_ISDISCONNECTING) {
                error = EALREADY;
        } else {
                error = (*so->so_proto->pr_usrreqs->pr_disconnect)(so);
        }
        return error;
}

#define        SBLOCKWAIT(f)        (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
/*
 * Send on a socket.
 * If send must go all at once and message is larger than
 * send buffering, then hard error.
 * Lock against other senders.
 * If must go all at once and not enough room now, then
 * inform user that this would block and do nothing.
 * Otherwise, if nonblocking, send as much as possible.
 * The data to be sent is described by "uio" if nonzero,
 * otherwise by the mbuf chain "top" (which must be null
 * if uio is not).  Data provided in mbuf chain must be small
 * enough to send all at once.
 *
 * Returns nonzero on error, timeout or signal; callers
 * must check for short counts if EINTR/ERESTART are returned.
 * Data and control buffers are freed on return.
 */
int
sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
        struct mbuf *top, struct mbuf *control, int flags, struct lwp *l)
{
        struct mbuf **mp, *m;
        long space, len, resid, clen, mlen;
        int error, s, dontroute, atomic;
        short wakeup_state = 0;

        clen = 0;

        /*
         * solock() provides atomicity of access.  splsoftnet() prevents
         * protocol processing soft interrupts from interrupting us and
         * blocking (expensive).
         */
        s = splsoftnet();
        solock(so);
        atomic = sosendallatonce(so) || top;
        if (uio)
                resid = uio->uio_resid;
        else
                resid = top->m_pkthdr.len;
        /*
         * In theory resid should be unsigned.
         * However, space must be signed, as it might be less than 0
         * if we over-committed, and we must use a signed comparison
         * of space and resid.  On the other hand, a negative resid
         * causes us to loop sending 0-length segments to the protocol.
         */
        if (resid < 0) {
                error = EINVAL;
                goto out;
        }
        dontroute =
            (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
            (so->so_proto->pr_flags & PR_ATOMIC);
        l->l_ru.ru_msgsnd++;
        if (control)
                clen = control->m_len;
 restart:
        if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
                goto out;
        do {
                if (so->so_state & SS_CANTSENDMORE) {
                        error = EPIPE;
                        goto release;
                }
                if (so->so_error) {
                        error = so->so_error;
                        if ((flags & MSG_PEEK) == 0)
                                so->so_error = 0;
                        goto release;
                }
                if ((so->so_state & SS_ISCONNECTED) == 0) {
                        if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
                                if (resid || clen == 0) {
                                        error = ENOTCONN;
                                        goto release;
                                }
                        } else if (addr == NULL) {
                                error = EDESTADDRREQ;
                                goto release;
                        }
                }
                space = sbspace(&so->so_snd);
                if (flags & MSG_OOB)
                        space += 1024;
                if ((atomic && resid > so->so_snd.sb_hiwat) ||
                    clen > so->so_snd.sb_hiwat) {
                        error = EMSGSIZE;
                        goto release;
                }
                if (space < resid + clen &&
                    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
                        if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) {
                                error = EWOULDBLOCK;
                                goto release;
                        }
                        sbunlock(&so->so_snd);
                        if (wakeup_state & SS_RESTARTSYS) {
                                error = ERESTART;
                                goto out;
                        }
                        error = sbwait(&so->so_snd);
                        if (error)
                                goto out;
                        wakeup_state = so->so_state;
                        goto restart;
                }
                wakeup_state = 0;
                mp = &top;
                space -= clen;
                do {
                        if (uio == NULL) {
                                /*
                                 * Data is prepackaged in "top".
                                 */
                                resid = 0;
                                if (flags & MSG_EOR)
                                        top->m_flags |= M_EOR;
                        } else do {
                                sounlock(so);
                                splx(s);
                                if (top == NULL) {
                                        m = m_gethdr(M_WAIT, MT_DATA);
                                        mlen = MHLEN;
                                        m->m_pkthdr.len = 0;
                                        m_reset_rcvif(m);
                                } else {
                                        m = m_get(M_WAIT, MT_DATA);
                                        mlen = MLEN;
                                }
                                MCLAIM(m, so->so_snd.sb_mowner);
                                if (sock_loan_thresh >= 0 &&
                                    uio->uio_iov->iov_len >= sock_loan_thresh &&
                                    space >= sock_loan_thresh &&
                                    (len = sosend_loan(so, uio, m,
                                                       space)) != 0) {
                                        SOSEND_COUNTER_INCR(&sosend_loan_big);
                                        space -= len;
                                        goto have_data;
                                }
                                if (resid >= MINCLSIZE && space >= MCLBYTES) {
                                        SOSEND_COUNTER_INCR(&sosend_copy_big);
                                        m_clget(m, M_DONTWAIT);
                                        if ((m->m_flags & M_EXT) == 0)
                                                goto nopages;
                                        mlen = MCLBYTES;
                                        if (atomic && top == 0) {
                                                len = lmin(MCLBYTES - max_hdr,
                                                    resid);
                                                m->m_data += max_hdr;
                                        } else
                                                len = lmin(MCLBYTES, resid);
                                        space -= len;
                                } else {
 nopages:
                                        SOSEND_COUNTER_INCR(&sosend_copy_small);
                                        len = lmin(lmin(mlen, resid), space);
                                        space -= len;
                                        /*
                                         * For datagram protocols, leave room
                                         * for protocol headers in first mbuf.
                                         */
                                        if (atomic && top == 0 && len < mlen)
                                                m_align(m, len);
                                }
                                error = uiomove(mtod(m, void *), (int)len, uio);
 have_data:
                                resid = uio->uio_resid;
                                m->m_len = len;
                                *mp = m;
                                top->m_pkthdr.len += len;
                                s = splsoftnet();
                                solock(so);
                                if (error != 0)
                                        goto release;
                                mp = &m->m_next;
                                if (resid <= 0) {
                                        if (flags & MSG_EOR)
                                                top->m_flags |= M_EOR;
                                        break;
                                }
                        } while (space > 0 && atomic);

                        if (so->so_state & SS_CANTSENDMORE) {
                                error = EPIPE;
                                goto release;
                        }
                        if (dontroute)
                                so->so_options |= SO_DONTROUTE;
                        if (resid > 0)
                                so->so_state |= SS_MORETOCOME;
                        if (flags & MSG_OOB) {
                                error = (*so->so_proto->pr_usrreqs->pr_sendoob)(
                                    so, top, control);
                        } else {
                                error = (*so->so_proto->pr_usrreqs->pr_send)(so,
                                    top, addr, control, l);
                        }
                        if (dontroute)
                                so->so_options &= ~SO_DONTROUTE;
                        if (resid > 0)
                                so->so_state &= ~SS_MORETOCOME;
                        clen = 0;
                        control = NULL;
                        top = NULL;
                        mp = &top;
                        if (error != 0)
                                goto release;
                } while (resid && space > 0);
        } while (resid);

 release:
        sbunlock(&so->so_snd);
 out:
        sounlock(so);
        splx(s);
        if (top)
                m_freem(top);
        if (control)
                m_freem(control);
        return error;
}

/*
 * Following replacement or removal of the first mbuf on the first
 * mbuf chain of a socket buffer, push necessary state changes back
 * into the socket buffer so that other consumers see the values
 * consistently.  'nextrecord' is the caller's locally stored value of
 * the original value of sb->sb_mb->m_nextpkt which must be restored
 * when the lead mbuf changes.  NOTE: 'nextrecord' may be NULL.
 */
static void
sbsync(struct sockbuf *sb, struct mbuf *nextrecord)
{

        KASSERT(solocked(sb->sb_so));

        /*
         * First, update for the new value of nextrecord.  If necessary,
         * make it the first record.
         */
        if (sb->sb_mb != NULL)
                sb->sb_mb->m_nextpkt = nextrecord;
        else
                sb->sb_mb = nextrecord;

        /*
         * Now update any dependent socket buffer fields to reflect
         * the new state.  This is an inline of SB_EMPTY_FIXUP, with
         * the addition of a second clause that takes care of the
         * case where sb_mb has been updated, but remains the last
         * record.
         */
        if (sb->sb_mb == NULL) {
                sb->sb_mbtail = NULL;
                sb->sb_lastrecord = NULL;
        } else if (sb->sb_mb->m_nextpkt == NULL)
                sb->sb_lastrecord = sb->sb_mb;
}

/*
 * Implement receive operations on a socket.
 *
 * We depend on the way that records are added to the sockbuf by sbappend*. In
 * particular, each record (mbufs linked through m_next) must begin with an
 * address if the protocol so specifies, followed by an optional mbuf or mbufs
 * containing ancillary data, and then zero or more mbufs of data.
 *
 * In order to avoid blocking network interrupts for the entire time here, we
 * splx() while doing the actual copy to user space. Although the sockbuf is
 * locked, new data may still be appended, and thus we must maintain
 * consistency of the sockbuf during that time.
 *
 * The caller may receive the data as a single mbuf chain by supplying an mbuf
 * **mp0 for use in returning the chain. The uio is then used only for the
 * count in uio_resid.
 */
int
soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio,
    struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
{
        struct lwp *l = curlwp;
        struct mbuf *m, **mp, *mt;
        size_t len, offset, moff, orig_resid;
        int atomic, flags, error, s, type;
        const struct protosw *pr;
        struct mbuf *nextrecord;
        int mbuf_removed = 0;
        const struct domain *dom;
        short wakeup_state = 0;

        pr = so->so_proto;
        atomic = pr->pr_flags & PR_ATOMIC;
        dom = pr->pr_domain;
        mp = mp0;
        type = 0;
        orig_resid = uio->uio_resid;

        if (paddr != NULL)
                *paddr = NULL;
        if (controlp != NULL)
                *controlp = NULL;
        if (flagsp != NULL)
                flags = *flagsp &~ MSG_EOR;
        else
                flags = 0;

        if (flags & MSG_OOB) {
                m = m_get(M_WAIT, MT_DATA);
                solock(so);
                error = (*pr->pr_usrreqs->pr_recvoob)(so, m, flags & MSG_PEEK);
                sounlock(so);
                if (error)
                        goto bad;
                do {
                        error = uiomove(mtod(m, void *),
                            MIN(uio->uio_resid, m->m_len), uio);
                        m = m_free(m);
                } while (uio->uio_resid > 0 && error == 0 && m);
bad:
                if (m != NULL)
                        m_freem(m);
                return error;
        }
        if (mp != NULL)
                *mp = NULL;

        /*
         * solock() provides atomicity of access.  splsoftnet() prevents
         * protocol processing soft interrupts from interrupting us and
         * blocking (expensive).
         */
        s = splsoftnet();
        solock(so);
restart:
        if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) {
                sounlock(so);
                splx(s);
                return error;
        }
        m = so->so_rcv.sb_mb;

        /*
         * If we have less data than requested, block awaiting more
         * (subject to any timeout) if:
         *   1. the current count is less than the low water mark,
         *   2. MSG_WAITALL is set, and it is possible to do the entire
         *        receive operation at once if we block (resid <= hiwat), or
         *   3. MSG_DONTWAIT is not set.
         * If MSG_WAITALL is set but resid is larger than the receive buffer,
         * we have to do the receive in sections, and thus risk returning
         * a short count if a timeout or signal occurs after we start.
         */
        if (m == NULL ||
            ((flags & MSG_DONTWAIT) == 0 &&
             so->so_rcv.sb_cc < uio->uio_resid &&
             (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
              ((flags & MSG_WAITALL) &&
               uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
             m->m_nextpkt == NULL && !atomic)) {
#ifdef DIAGNOSTIC
                if (m == NULL && so->so_rcv.sb_cc)
                        panic("receive 1");
#endif
                if (so->so_error || so->so_rerror) {
                        u_short *e;
                        if (m != NULL)
                                goto dontblock;
                        e = so->so_error ? &so->so_error : &so->so_rerror;
                        error = *e;
                        if ((flags & MSG_PEEK) == 0)
                                *e = 0;
                        goto release;
                }
                if (so->so_state & SS_CANTRCVMORE) {
                        if (m != NULL)
                                goto dontblock;
                        else
                                goto release;
                }
                for (; m != NULL; m = m->m_next)
                        if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
                                m = so->so_rcv.sb_mb;
                                goto dontblock;
                        }
                if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
                    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
                        error = ENOTCONN;
                        goto release;
                }
                if (uio->uio_resid == 0)
                        goto release;
                if ((so->so_state & SS_NBIO) ||
                    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
                        error = EWOULDBLOCK;
                        goto release;
                }
                SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
                SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
                sbunlock(&so->so_rcv);
                if (wakeup_state & SS_RESTARTSYS)
                        error = ERESTART;
                else
                        error = sbwait(&so->so_rcv);
                if (error != 0) {
                        sounlock(so);
                        splx(s);
                        return error;
                }
                wakeup_state = so->so_state;
                goto restart;
        }

dontblock:
        /*
         * On entry here, m points to the first record of the socket buffer.
         * From this point onward, we maintain 'nextrecord' as a cache of the
         * pointer to the next record in the socket buffer.  We must keep the
         * various socket buffer pointers and local stack versions of the
         * pointers in sync, pushing out modifications before dropping the
         * socket lock, and re-reading them when picking it up.
         *
         * Otherwise, we will race with the network stack appending new data
         * or records onto the socket buffer by using inconsistent/stale
         * versions of the field, possibly resulting in socket buffer
         * corruption.
         *
         * By holding the high-level sblock(), we prevent simultaneous
         * readers from pulling off the front of the socket buffer.
         */
        if (l != NULL)
                l->l_ru.ru_msgrcv++;
        KASSERT(m == so->so_rcv.sb_mb);
        SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
        SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
        nextrecord = m->m_nextpkt;

        if (pr->pr_flags & PR_ADDR) {
                KASSERT(m->m_type == MT_SONAME);
                orig_resid = 0;
                if (flags & MSG_PEEK) {
                        if (paddr)
                                *paddr = m_copym(m, 0, m->m_len, M_DONTWAIT);
                        m = m->m_next;
                } else {
                        sbfree(&so->so_rcv, m);
                        mbuf_removed = 1;
                        if (paddr != NULL) {
                                *paddr = m;
                                so->so_rcv.sb_mb = m->m_next;
                                m->m_next = NULL;
                                m = so->so_rcv.sb_mb;
                        } else {
                                m = so->so_rcv.sb_mb = m_free(m);
                        }
                        sbsync(&so->so_rcv, nextrecord);
                }
        }

        if (pr->pr_flags & PR_ADDR_OPT) {
                /*
                 * For SCTP we may be getting a whole message OR a partial
                 * delivery.
                 */
                if (m->m_type == MT_SONAME) {
                        orig_resid = 0;
                        if (flags & MSG_PEEK) {
                                if (paddr)
                                        *paddr = m_copym(m, 0, m->m_len, M_DONTWAIT);
                                m = m->m_next;
                        } else {
                                sbfree(&so->so_rcv, m);
                                mbuf_removed = 1;
                                if (paddr) {
                                        *paddr = m;
                                        so->so_rcv.sb_mb = m->m_next;
                                        m->m_next = 0;
                                        m = so->so_rcv.sb_mb;
                                } else {
                                        m = so->so_rcv.sb_mb = m_free(m);
                                }
                                sbsync(&so->so_rcv, nextrecord);
                        }
                }
        }

        /*
         * Process one or more MT_CONTROL mbufs present before any data mbufs
         * in the first mbuf chain on the socket buffer.  If MSG_PEEK, we
         * just copy the data; if !MSG_PEEK, we call into the protocol to
         * perform externalization (or freeing if controlp == NULL).
         */
        if (__predict_false(m != NULL && m->m_type == MT_CONTROL)) {
                struct mbuf *cm = NULL, *cmn;
                struct mbuf **cme = &cm;

                do {
                        if (flags & MSG_PEEK) {
                                if (controlp != NULL) {
                                        *controlp = m_copym(m, 0, m->m_len, M_DONTWAIT);
                                        controlp = (*controlp == NULL ? NULL :
                                            &(*controlp)->m_next);
                                }
                                m = m->m_next;
                        } else {
                                sbfree(&so->so_rcv, m);
                                so->so_rcv.sb_mb = m->m_next;
                                m->m_next = NULL;
                                *cme = m;
                                cme = &(*cme)->m_next;
                                m = so->so_rcv.sb_mb;
                        }
                } while (m != NULL && m->m_type == MT_CONTROL);
                if ((flags & MSG_PEEK) == 0)
                        sbsync(&so->so_rcv, nextrecord);

                for (; cm != NULL; cm = cmn) {
                        cmn = cm->m_next;
                        cm->m_next = NULL;
                        type = mtod(cm, struct cmsghdr *)->cmsg_type;
                        if (controlp != NULL) {
                                if (dom->dom_externalize != NULL &&
                                    type == SCM_RIGHTS) {
                                        sounlock(so);
                                        splx(s);
                                        error = (*dom->dom_externalize)(cm, l,
                                            (flags & MSG_CMSG_CLOEXEC) ?
                                            O_CLOEXEC : 0);
                                        s = splsoftnet();
                                        solock(so);
                                }
                                *controlp = cm;
                                while (*controlp != NULL)
                                        controlp = &(*controlp)->m_next;
                        } else {
                                /*
                                 * Dispose of any SCM_RIGHTS message that went
                                 * through the read path rather than recv.
                                 */
                                if (dom->dom_dispose != NULL &&
                                    type == SCM_RIGHTS) {
                                        sounlock(so);
                                        (*dom->dom_dispose)(cm);
                                        solock(so);
                                }
                                m_freem(cm);
                        }
                }
                if (m != NULL)
                        nextrecord = so->so_rcv.sb_mb->m_nextpkt;
                else
                        nextrecord = so->so_rcv.sb_mb;
                orig_resid = 0;
        }

        /* If m is non-NULL, we have some data to read. */
        if (__predict_true(m != NULL)) {
                type = m->m_type;
                if (type == MT_OOBDATA)
                        flags |= MSG_OOB;
        }
        SBLASTRECORDCHK(&so->so_rcv, "soreceive 2");
        SBLASTMBUFCHK(&so->so_rcv, "soreceive 2");

        moff = 0;
        offset = 0;
        while (m != NULL && uio->uio_resid > 0 && error == 0) {
                /*
                 * If the type of mbuf has changed, end the receive
                 * operation and do a short read.
                 */
                if (m->m_type == MT_OOBDATA) {
                        if (type != MT_OOBDATA)
                                break;
                } else if (type == MT_OOBDATA) {
                        break;
                } else if (m->m_type == MT_CONTROL) {
                        break;
                }
#ifdef DIAGNOSTIC
                else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) {
                        panic("%s: m_type=%d", __func__, m->m_type);
                }
#endif

                so->so_state &= ~SS_RCVATMARK;
                wakeup_state = 0;
                len = uio->uio_resid;
                if (so->so_oobmark && len > so->so_oobmark - offset)
                        len = so->so_oobmark - offset;
                if (len > m->m_len - moff)
                        len = m->m_len - moff;

                /*
                 * If mp is set, just pass back the mbufs.
                 * Otherwise copy them out via the uio, then free.
                 * Sockbuf must be consistent here (points to current mbuf,
                 * it points to next record) when we drop priority;
                 * we must note any additions to the sockbuf when we
                 * block interrupts again.
                 */
                if (mp == NULL) {
                        SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
                        SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
                        sounlock(so);
                        splx(s);
                        error = uiomove(mtod(m, char *) + moff, len, uio);
                        s = splsoftnet();
                        solock(so);
                        if (error != 0) {
                                /*
                                 * If any part of the record has been removed
                                 * (such as the MT_SONAME mbuf, which will
                                 * happen when PR_ADDR, and thus also
                                 * PR_ATOMIC, is set), then drop the entire
                                 * record to maintain the atomicity of the
                                 * receive operation.
                                 *
                                 * This avoids a later panic("receive 1a")
                                 * when compiled with DIAGNOSTIC.
                                 */
                                if (m && mbuf_removed && atomic)
                                        (void) sbdroprecord(&so->so_rcv);

                                goto release;
                        }
                } else {
                        uio->uio_resid -= len;
                }

                if (len == m->m_len - moff) {
                        if (m->m_flags & M_EOR)
                                flags |= MSG_EOR;
#ifdef SCTP
                        if (m->m_flags & M_NOTIFICATION)
                                flags |= MSG_NOTIFICATION;
#endif
                        if (flags & MSG_PEEK) {
                                m = m->m_next;
                                moff = 0;
                        } else {
                                nextrecord = m->m_nextpkt;
                                sbfree(&so->so_rcv, m);
                                if (mp) {
                                        *mp = m;
                                        mp = &m->m_next;
                                        so->so_rcv.sb_mb = m = m->m_next;
                                        *mp = NULL;
                                } else {
                                        m = so->so_rcv.sb_mb = m_free(m);
                                }
                                /*
                                 * If m != NULL, we also know that
                                 * so->so_rcv.sb_mb != NULL.
                                 */
                                KASSERT(so->so_rcv.sb_mb == m);
                                if (m) {
                                        m->m_nextpkt = nextrecord;
                                        if (nextrecord == NULL)
                                                so->so_rcv.sb_lastrecord = m;
                                } else {
                                        so->so_rcv.sb_mb = nextrecord;
                                        SB_EMPTY_FIXUP(&so->so_rcv);
                                }
                                SBLASTRECORDCHK(&so->so_rcv, "soreceive 3");
                                SBLASTMBUFCHK(&so->so_rcv, "soreceive 3");
                        }
                } else if (flags & MSG_PEEK) {
                        moff += len;
                } else {
                        if (mp != NULL) {
                                mt = m_copym(m, 0, len, M_NOWAIT);
                                if (__predict_false(mt == NULL)) {
                                        sounlock(so);
                                        mt = m_copym(m, 0, len, M_WAIT);
                                        solock(so);
                                }
                                *mp = mt;
                        }
                        m->m_data += len;
                        m->m_len -= len;
                        so->so_rcv.sb_cc -= len;
                }

                if (so->so_oobmark) {
                        if ((flags & MSG_PEEK) == 0) {
                                so->so_oobmark -= len;
                                if (so->so_oobmark == 0) {
                                        so->so_state |= SS_RCVATMARK;
                                        break;
                                }
                        } else {
                                offset += len;
                                if (offset == so->so_oobmark)
                                        break;
                        }
                } else {
                        so->so_state &= ~SS_POLLRDBAND;
                }
                if (flags & MSG_EOR)
                        break;

                /*
                 * If the MSG_WAITALL flag is set (for non-atomic socket),
                 * we must not quit until "uio->uio_resid == 0" or an error
                 * termination.  If a signal/timeout occurs, return
                 * with a short count but without error.
                 * Keep sockbuf locked against other readers.
                 */
                while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 &&
                    !sosendallatonce(so) && !nextrecord) {
                        if (so->so_error || so->so_rerror ||
                            so->so_state & SS_CANTRCVMORE)
                                break;
                        /*
                         * If we are peeking and the socket receive buffer is
                         * full, stop since we can't get more data to peek at.
                         */
                        if ((flags & MSG_PEEK) && sbspace(&so->so_rcv) <= 0)
                                break;
                        /*
                         * If we've drained the socket buffer, tell the
                         * protocol in case it needs to do something to
                         * get it filled again.
                         */
                        if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb)
                                (*pr->pr_usrreqs->pr_rcvd)(so, flags, l);
                        SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2");
                        SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2");
                        if (wakeup_state & SS_RESTARTSYS)
                                error = ERESTART;
                        else
                                error = sbwait(&so->so_rcv);
                        if (error != 0) {
                                sbunlock(&so->so_rcv);
                                sounlock(so);
                                splx(s);
                                return 0;
                        }
                        if ((m = so->so_rcv.sb_mb) != NULL)
                                nextrecord = m->m_nextpkt;
                        wakeup_state = so->so_state;
                }
        }

        if (m && atomic) {
                flags |= MSG_TRUNC;
                if ((flags & MSG_PEEK) == 0)
                        (void) sbdroprecord(&so->so_rcv);
        }
        if ((flags & MSG_PEEK) == 0) {
                if (m == NULL) {
                        /*
                         * First part is an inline SB_EMPTY_FIXUP().  Second
                         * part makes sure sb_lastrecord is up-to-date if
                         * there is still data in the socket buffer.
                         */
                        so->so_rcv.sb_mb = nextrecord;
                        if (so->so_rcv.sb_mb == NULL) {
                                so->so_rcv.sb_mbtail = NULL;
                                so->so_rcv.sb_lastrecord = NULL;
                        } else if (nextrecord->m_nextpkt == NULL)
                                so->so_rcv.sb_lastrecord = nextrecord;
                }
                SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
                SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
                if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
                        (*pr->pr_usrreqs->pr_rcvd)(so, flags, l);
        }
        if (orig_resid == uio->uio_resid && orig_resid &&
            (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
                sbunlock(&so->so_rcv);
                goto restart;
        }

        if (flagsp != NULL)
                *flagsp |= flags;
release:
        sbunlock(&so->so_rcv);
        sounlock(so);
        splx(s);
        return error;
}

int
soshutdown(struct socket *so, int how)
{
        const struct protosw *pr;
        int error;

        KASSERT(solocked(so));

        pr = so->so_proto;
        if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
                return EINVAL;

        if (how == SHUT_RD || how == SHUT_RDWR) {
                sorflush(so);
                error = 0;
        }
        if (how == SHUT_WR || how == SHUT_RDWR)
                error = (*pr->pr_usrreqs->pr_shutdown)(so);

        return error;
}

void
sorestart(struct socket *so)
{
        /*
         * An application has called close() on an fd on which another
         * of its threads has called a socket system call.
         * Mark this and wake everyone up, and code that would block again
         * instead returns ERESTART.
         * On system call re-entry the fd is validated and EBADF returned.
         * Any other fd will block again on the 2nd syscall.
         */
        solock(so);
        so->so_state |= SS_RESTARTSYS;
        cv_broadcast(&so->so_cv);
        cv_broadcast(&so->so_snd.sb_cv);
        cv_broadcast(&so->so_rcv.sb_cv);
        sounlock(so);
}

void
sorflush(struct socket *so)
{
        struct sockbuf *sb, asb;
        const struct protosw *pr;

        KASSERT(solocked(so));

        sb = &so->so_rcv;
        pr = so->so_proto;
        socantrcvmore(so);
        sb->sb_flags |= SB_NOINTR;
        (void )sblock(sb, M_WAITOK);
        sbunlock(sb);
        asb = *sb;
        /*
         * Clear most of the sockbuf structure, but leave some of the
         * fields valid.
         */
        memset(&sb->sb_startzero, 0,
            sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
        if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) {
                sounlock(so);
                (*pr->pr_domain->dom_dispose)(asb.sb_mb);
                solock(so);
        }
        sbrelease(&asb, so);
}

/*
 * internal set SOL_SOCKET options
 */
static int
sosetopt1(struct socket *so, const struct sockopt *sopt)
{
        int error, opt;
        int optval = 0; /* XXX: gcc */
        struct linger l;
        struct timeval tv;

        opt = sopt->sopt_name;

        switch (opt) {

        case SO_ACCEPTFILTER:
                error = accept_filt_setopt(so, sopt);
                KASSERT(solocked(so));
                break;

        case SO_LINGER:
                error = sockopt_get(sopt, &l, sizeof(l));
                solock(so);
                if (error)
                        break;
                if (l.l_linger < 0 || l.l_linger > USHRT_MAX ||
                    l.l_linger > (INT_MAX / hz)) {
                        error = EDOM;
                        break;
                }
                so->so_linger = l.l_linger;
                if (l.l_onoff)
                        so->so_options |= SO_LINGER;
                else
                        so->so_options &= ~SO_LINGER;
                break;

        case SO_DEBUG:
        case SO_KEEPALIVE:
        case SO_DONTROUTE:
        case SO_USELOOPBACK:
        case SO_BROADCAST:
        case SO_REUSEADDR:
        case SO_REUSEPORT:
        case SO_OOBINLINE:
        case SO_TIMESTAMP:
        case SO_NOSIGPIPE:
        case SO_RERROR:
                error = sockopt_getint(sopt, &optval);
                solock(so);
                if (error)
                        break;
                if (optval)
                        so->so_options |= opt;
                else
                        so->so_options &= ~opt;
                break;

        case SO_SNDBUF:
        case SO_RCVBUF:
        case SO_SNDLOWAT:
        case SO_RCVLOWAT:
                error = sockopt_getint(sopt, &optval);
                solock(so);
                if (error)
                        break;

                /*
                 * Values < 1 make no sense for any of these
                 * options, so disallow them.
                 */
                if (optval < 1) {
                        error = EINVAL;
                        break;
                }

                switch (opt) {
                case SO_SNDBUF:
                        if (sbreserve(&so->so_snd, (u_long)optval, so) == 0) {
                                error = ENOBUFS;
                                break;
                        }
                        if (sofixedbuf)
                                so->so_snd.sb_flags &= ~SB_AUTOSIZE;
                        break;

                case SO_RCVBUF:
                        if (sbreserve(&so->so_rcv, (u_long)optval, so) == 0) {
                                error = ENOBUFS;
                                break;
                        }
                        if (sofixedbuf)
                                so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
                        break;

                /*
                 * Make sure the low-water is never greater than
                 * the high-water.
                 */
                case SO_SNDLOWAT:
                        if (optval > so->so_snd.sb_hiwat)
                                optval = so->so_snd.sb_hiwat;

                        so->so_snd.sb_lowat = optval;
                        break;

                case SO_RCVLOWAT:
                        if (optval > so->so_rcv.sb_hiwat)
                                optval = so->so_rcv.sb_hiwat;

                        so->so_rcv.sb_lowat = optval;
                        break;
                }
                break;

        case SO_SNDTIMEO:
        case SO_RCVTIMEO:
                solock(so);
                error = sockopt_get(sopt, &tv, sizeof(tv));
                if (error)
                        break;

                if (tv.tv_sec < 0 || tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
                        error = EDOM;
                        break;
                }
                if (tv.tv_sec > (INT_MAX - tv.tv_usec / tick) / hz) {
                        error = EDOM;
                        break;
                }

                optval = tv.tv_sec * hz + tv.tv_usec / tick;
                if (optval == 0 && tv.tv_usec != 0)
                        optval = 1;

                switch (opt) {
                case SO_SNDTIMEO:
                        so->so_snd.sb_timeo = optval;
                        break;
                case SO_RCVTIMEO:
                        so->so_rcv.sb_timeo = optval;
                        break;
                }
                break;

        default:
                MODULE_HOOK_CALL(uipc_socket_50_setopt1_hook,
                    (opt, so, sopt), enosys(), error);
                if (error == ENOSYS || error == EPASSTHROUGH) {
                        solock(so);
                        error = ENOPROTOOPT;
                }
                break;
        }
        KASSERT(solocked(so));
        return error;
}

int
sosetopt(struct socket *so, struct sockopt *sopt)
{
        int error, prerr;

        if (sopt->sopt_level == SOL_SOCKET) {
                error = sosetopt1(so, sopt);
                KASSERT(solocked(so));
        } else {
                error = ENOPROTOOPT;
                solock(so);
        }

        if ((error == 0 || error == ENOPROTOOPT) &&
            so->so_proto != NULL && so->so_proto->pr_ctloutput != NULL) {
                /* give the protocol stack a shot */
                prerr = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, sopt);
                if (prerr == 0)
                        error = 0;
                else if (prerr != ENOPROTOOPT)
                        error = prerr;
        }
        sounlock(so);
        return error;
}

/*
 * so_setsockopt() is a wrapper providing a sockopt structure for sosetopt()
 */
int
so_setsockopt(struct lwp *l, struct socket *so, int level, int name,
    const void *val, size_t valsize)
{
        struct sockopt sopt;
        int error;

        KASSERT(valsize == 0 || val != NULL);

        sockopt_init(&sopt, level, name, valsize);
        sockopt_set(&sopt, val, valsize);

        error = sosetopt(so, &sopt);

        sockopt_destroy(&sopt);

        return error;
}

/*
 * internal get SOL_SOCKET options
 */
static int
sogetopt1(struct socket *so, struct sockopt *sopt)
{
        int error, optval, opt;
        struct linger l;
        struct timeval tv;

        switch ((opt = sopt->sopt_name)) {

        case SO_ACCEPTFILTER:
                error = accept_filt_getopt(so, sopt);
                break;

        case SO_LINGER:
                l.l_onoff = (so->so_options & SO_LINGER) ? 1 : 0;
                l.l_linger = so->so_linger;

                error = sockopt_set(sopt, &l, sizeof(l));
                break;

        case SO_USELOOPBACK:
        case SO_DONTROUTE:
        case SO_DEBUG:
        case SO_KEEPALIVE:
        case SO_REUSEADDR:
        case SO_REUSEPORT:
        case SO_BROADCAST:
        case SO_OOBINLINE:
        case SO_TIMESTAMP:
        case SO_NOSIGPIPE:
        case SO_RERROR:
        case SO_ACCEPTCONN:
                error = sockopt_setint(sopt, (so->so_options & opt) ? 1 : 0);
                break;

        case SO_TYPE:
                error = sockopt_setint(sopt, so->so_type);
                break;

        case SO_ERROR:
                if (so->so_error == 0) {
                        so->so_error = so->so_rerror;
                        so->so_rerror = 0;
                }
                error = sockopt_setint(sopt, so->so_error);
                so->so_error = 0;
                break;

        case SO_SNDBUF:
                error = sockopt_setint(sopt, so->so_snd.sb_hiwat);
                break;

        case SO_RCVBUF:
                error = sockopt_setint(sopt, so->so_rcv.sb_hiwat);
                break;

        case SO_SNDLOWAT:
                error = sockopt_setint(sopt, so->so_snd.sb_lowat);
                break;

        case SO_RCVLOWAT:
                error = sockopt_setint(sopt, so->so_rcv.sb_lowat);
                break;

        case SO_SNDTIMEO:
        case SO_RCVTIMEO:
                optval = (opt == SO_SNDTIMEO ?
                     so->so_snd.sb_timeo : so->so_rcv.sb_timeo);

                memset(&tv, 0, sizeof(tv));
                tv.tv_sec = optval / hz;
                tv.tv_usec = (optval % hz) * tick;

                error = sockopt_set(sopt, &tv, sizeof(tv));
                break;

        case SO_OVERFLOWED:
                error = sockopt_setint(sopt, so->so_rcv.sb_overflowed);
                break;

        default:
                MODULE_HOOK_CALL(uipc_socket_50_getopt1_hook,
                    (opt, so, sopt), enosys(), error);
                if (error)
                        error = ENOPROTOOPT;
                break;
        }

        return error;
}

int
sogetopt(struct socket *so, struct sockopt *sopt)
{
        int error;

        solock(so);
        if (sopt->sopt_level != SOL_SOCKET) {
                if (so->so_proto && so->so_proto->pr_ctloutput) {
                        error = ((*so->so_proto->pr_ctloutput)
                            (PRCO_GETOPT, so, sopt));
                } else
                        error = (ENOPROTOOPT);
        } else {
                error = sogetopt1(so, sopt);
        }
        sounlock(so);
        return error;
}

/*
 * alloc sockopt data buffer buffer
 *        - will be released at destroy
 */
static int
sockopt_alloc(struct sockopt *sopt, size_t len, km_flag_t kmflag)
{
        void *data;

        KASSERT(sopt->sopt_size == 0);

        if (len > sizeof(sopt->sopt_buf)) {
                data = kmem_zalloc(len, kmflag);
                if (data == NULL)
                        return ENOMEM;
                sopt->sopt_data = data;
        } else
                sopt->sopt_data = sopt->sopt_buf;

        sopt->sopt_size = len;
        return 0;
}

/*
 * initialise sockopt storage
 *        - MAY sleep during allocation
 */
void
sockopt_init(struct sockopt *sopt, int level, int name, size_t size)
{

        memset(sopt, 0, sizeof(*sopt));

        sopt->sopt_level = level;
        sopt->sopt_name = name;
        (void)sockopt_alloc(sopt, size, KM_SLEEP);
}

/*
 * destroy sockopt storage
 *        - will release any held memory references
 */
void
sockopt_destroy(struct sockopt *sopt)
{

        if (sopt->sopt_data != sopt->sopt_buf)
                kmem_free(sopt->sopt_data, sopt->sopt_size);

        memset(sopt, 0, sizeof(*sopt));
}

/*
 * set sockopt value
 *        - value is copied into sockopt
 *        - memory is allocated when necessary, will not sleep
 */
int
sockopt_set(struct sockopt *sopt, const void *buf, size_t len)
{
        int error;

        if (sopt->sopt_size == 0) {
                error = sockopt_alloc(sopt, len, KM_NOSLEEP);
                if (error)
                        return error;
        }

        sopt->sopt_retsize = MIN(sopt->sopt_size, len);
        if (sopt->sopt_retsize > 0) {
                memcpy(sopt->sopt_data, buf, sopt->sopt_retsize);
        }

        return 0;
}

/*
 * common case of set sockopt integer value
 */
int
sockopt_setint(struct sockopt *sopt, int val)
{

        return sockopt_set(sopt, &val, sizeof(int));
}

/*
 * get sockopt value
 *        - correct size must be given
 */
int
sockopt_get(const struct sockopt *sopt, void *buf, size_t len)
{

        if (sopt->sopt_size != len)
                return EINVAL;

        memcpy(buf, sopt->sopt_data, len);
        return 0;
}

/*
 * common case of get sockopt integer value
 */
int
sockopt_getint(const struct sockopt *sopt, int *valp)
{

        return sockopt_get(sopt, valp, sizeof(int));
}

/*
 * set sockopt value from mbuf
 *        - ONLY for legacy code
 *        - mbuf is released by sockopt
 *        - will not sleep
 */
int
sockopt_setmbuf(struct sockopt *sopt, struct mbuf *m)
{
        size_t len;
        int error;

        len = m_length(m);

        if (sopt->sopt_size == 0) {
                error = sockopt_alloc(sopt, len, KM_NOSLEEP);
                if (error)
                        return error;
        }

        sopt->sopt_retsize = MIN(sopt->sopt_size, len);
        m_copydata(m, 0, sopt->sopt_retsize, sopt->sopt_data);
        m_freem(m);

        return 0;
}

/*
 * get sockopt value into mbuf
 *        - ONLY for legacy code
 *        - mbuf to be released by the caller
 *        - will not sleep
 */
struct mbuf *
sockopt_getmbuf(const struct sockopt *sopt)
{
        struct mbuf *m;

        if (sopt->sopt_size > MCLBYTES)
                return NULL;

        m = m_get(M_DONTWAIT, MT_SOOPTS);
        if (m == NULL)
                return NULL;

        if (sopt->sopt_size > MLEN) {
                MCLGET(m, M_DONTWAIT);
                if ((m->m_flags & M_EXT) == 0) {
                        m_free(m);
                        return NULL;
                }
        }

        memcpy(mtod(m, void *), sopt->sopt_data, sopt->sopt_size);
        m->m_len = sopt->sopt_size;

        return m;
}

void
sohasoutofband(struct socket *so)
{

        so->so_state |= SS_POLLRDBAND;
        fownsignal(so->so_pgid, SIGURG, POLL_PRI, POLLPRI|POLLRDBAND, so);
        selnotify(&so->so_rcv.sb_sel, POLLPRI | POLLRDBAND, NOTE_SUBMIT);
}

static void
filt_sordetach(struct knote *kn)
{
        struct socket *so;

        so = ((file_t *)kn->kn_obj)->f_socket;
        solock(so);
        if (selremove_knote(&so->so_rcv.sb_sel, kn))
                so->so_rcv.sb_flags &= ~SB_KNOTE;
        sounlock(so);
}

/*ARGSUSED*/
static int
filt_soread(struct knote *kn, long hint)
{
        struct socket *so;
        int rv;

        so = ((file_t *)kn->kn_obj)->f_socket;
        if (hint != NOTE_SUBMIT)
                solock(so);
        kn->kn_data = so->so_rcv.sb_cc;
        if (so->so_state & SS_CANTRCVMORE) {
                knote_set_eof(kn, 0);
                kn->kn_fflags = so->so_error;
                rv = 1;
        } else if (so->so_error || so->so_rerror)
                rv = 1;
        else if (kn->kn_sfflags & NOTE_LOWAT)
                rv = (kn->kn_data >= kn->kn_sdata);
        else
                rv = (kn->kn_data >= so->so_rcv.sb_lowat);
        if (hint != NOTE_SUBMIT)
                sounlock(so);
        return rv;
}

static void
filt_sowdetach(struct knote *kn)
{
        struct socket *so;

        so = ((file_t *)kn->kn_obj)->f_socket;
        solock(so);
        if (selremove_knote(&so->so_snd.sb_sel, kn))
                so->so_snd.sb_flags &= ~SB_KNOTE;
        sounlock(so);
}

/*ARGSUSED*/
static int
filt_sowrite(struct knote *kn, long hint)
{
        struct socket *so;
        int rv;

        so = ((file_t *)kn->kn_obj)->f_socket;
        if (hint != NOTE_SUBMIT)
                solock(so);
        kn->kn_data = sbspace(&so->so_snd);
        if (so->so_state & SS_CANTSENDMORE) {
                knote_set_eof(kn, 0);
                kn->kn_fflags = so->so_error;
                rv = 1;
        } else if (so->so_error)
                rv = 1;
        else if (((so->so_state & SS_ISCONNECTED) == 0) &&
            (so->so_proto->pr_flags & PR_CONNREQUIRED))
                rv = 0;
        else if (kn->kn_sfflags & NOTE_LOWAT)
                rv = (kn->kn_data >= kn->kn_sdata);
        else
                rv = (kn->kn_data >= so->so_snd.sb_lowat);
        if (hint != NOTE_SUBMIT)
                sounlock(so);
        return rv;
}

static int
filt_soempty(struct knote *kn, long hint)
{
        struct socket *so;
        int rv;

        so = ((file_t *)kn->kn_obj)->f_socket;
        if (hint != NOTE_SUBMIT)
                solock(so);
        rv = (kn->kn_data = sbused(&so->so_snd)) == 0 ||
             (so->so_options & SO_ACCEPTCONN) != 0;
        if (hint != NOTE_SUBMIT)
                sounlock(so);
        return rv;
}

/*ARGSUSED*/
static int
filt_solisten(struct knote *kn, long hint)
{
        struct socket *so;
        int rv;

        so = ((file_t *)kn->kn_obj)->f_socket;

        /*
         * Set kn_data to number of incoming connections, not
         * counting partial (incomplete) connections.
         */
        if (hint != NOTE_SUBMIT)
                solock(so);
        kn->kn_data = so->so_qlen;
        rv = (kn->kn_data > 0);
        if (hint != NOTE_SUBMIT)
                sounlock(so);
        return rv;
}

static const struct filterops solisten_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_sordetach,
        .f_event = filt_solisten,
};

static const struct filterops soread_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_sordetach,
        .f_event = filt_soread,
};

static const struct filterops sowrite_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_sowdetach,
        .f_event = filt_sowrite,
};

static const struct filterops soempty_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_sowdetach,
        .f_event = filt_soempty,
};

int
soo_kqfilter(struct file *fp, struct knote *kn)
{
        struct socket *so;
        struct sockbuf *sb;

        so = ((file_t *)kn->kn_obj)->f_socket;
        solock(so);
        switch (kn->kn_filter) {
        case EVFILT_READ:
                if (so->so_options & SO_ACCEPTCONN)
                        kn->kn_fop = &solisten_filtops;
                else
                        kn->kn_fop = &soread_filtops;
                sb = &so->so_rcv;
                break;
        case EVFILT_WRITE:
                kn->kn_fop = &sowrite_filtops;
                sb = &so->so_snd;
                break;
        case EVFILT_EMPTY:
                kn->kn_fop = &soempty_filtops;
                sb = &so->so_snd;
                break;
        default:
                sounlock(so);
                return EINVAL;
        }
        selrecord_knote(&sb->sb_sel, kn);
        sb->sb_flags |= SB_KNOTE;
        sounlock(so);
        return 0;
}

static int
sodopoll(struct socket *so, int events)
{
        int revents;

        revents = 0;

        if (events & (POLLIN | POLLRDNORM))
                if (soreadable(so))
                        revents |= events & (POLLIN | POLLRDNORM);

        if (events & (POLLOUT | POLLWRNORM))
                if (sowritable(so))
                        revents |= events & (POLLOUT | POLLWRNORM);

        if (events & (POLLPRI | POLLRDBAND))
                if (so->so_state & SS_POLLRDBAND)
                        revents |= events & (POLLPRI | POLLRDBAND);

        return revents;
}

int
sopoll(struct socket *so, int events)
{
        int revents = 0;

#ifndef DIAGNOSTIC
        /*
         * Do a quick, unlocked check in expectation that the socket
         * will be ready for I/O.  Don't do this check if DIAGNOSTIC,
         * as the solocked() assertions will fail.
         */
        if ((revents = sodopoll(so, events)) != 0)
                return revents;
#endif

        solock(so);
        if ((revents = sodopoll(so, events)) == 0) {
                if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
                        selrecord(curlwp, &so->so_rcv.sb_sel);
                        so->so_rcv.sb_flags |= SB_NOTIFY;
                }

                if (events & (POLLOUT | POLLWRNORM)) {
                        selrecord(curlwp, &so->so_snd.sb_sel);
                        so->so_snd.sb_flags |= SB_NOTIFY;
                }
        }
        sounlock(so);

        return revents;
}

struct mbuf **
sbsavetimestamp(int opt, struct mbuf **mp)
{
        struct timeval tv;
        int error;

        memset(&tv, 0, sizeof(tv));
        microtime(&tv);

        MODULE_HOOK_CALL(uipc_socket_50_sbts_hook, (opt, &mp), enosys(), error);
        if (error == 0)
                return mp;

        if (opt & SO_TIMESTAMP) {
                *mp = sbcreatecontrol(&tv, sizeof(tv),
                    SCM_TIMESTAMP, SOL_SOCKET);
                if (*mp)
                        mp = &(*mp)->m_next;
        }
        return mp;
}


#include <sys/sysctl.h>

static int sysctl_kern_somaxkva(SYSCTLFN_PROTO);
static int sysctl_kern_sbmax(SYSCTLFN_PROTO);

/*
 * sysctl helper routine for kern.somaxkva.  ensures that the given
 * value is not too small.
 * (XXX should we maybe make sure it's not too large as well?)
 */
static int
sysctl_kern_somaxkva(SYSCTLFN_ARGS)
{
        int error, new_somaxkva;
        struct sysctlnode node;

        new_somaxkva = somaxkva;
        node = *rnode;
        node.sysctl_data = &new_somaxkva;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        if (new_somaxkva < (16 * 1024 * 1024)) /* sanity */
                return EINVAL;

        mutex_enter(&so_pendfree_lock);
        somaxkva = new_somaxkva;
        cv_broadcast(&socurkva_cv);
        mutex_exit(&so_pendfree_lock);

        return error;
}

/*
 * sysctl helper routine for kern.sbmax. Basically just ensures that
 * any new value is not too small.
 */
static int
sysctl_kern_sbmax(SYSCTLFN_ARGS)
{
        int error, new_sbmax;
        struct sysctlnode node;

        new_sbmax = sb_max;
        node = *rnode;
        node.sysctl_data = &new_sbmax;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        KERNEL_LOCK(1, NULL);
        error = sb_max_set(new_sbmax);
        KERNEL_UNLOCK_ONE(NULL);

        return error;
}

/*
 * sysctl helper routine for kern.sooptions. Ensures that only allowed
 * options can be set.
 */
static int
sysctl_kern_sooptions(SYSCTLFN_ARGS)
{
        int error, new_options;
        struct sysctlnode node;

        new_options = sooptions;
        node = *rnode;
        node.sysctl_data = &new_options;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        if (new_options & ~SO_DEFOPTS)
                return EINVAL;

        sooptions = new_options;

        return 0;
}

static void
sysctl_kern_socket_setup(void)
{

        KASSERT(socket_sysctllog == NULL);

        sysctl_createv(&socket_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "somaxkva",
                       SYSCTL_DESCR("Maximum amount of kernel memory to be "
                                    "used for socket buffers"),
                       sysctl_kern_somaxkva, 0, NULL, 0,
                       CTL_KERN, KERN_SOMAXKVA, CTL_EOL);

        sysctl_createv(&socket_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_BOOL, "sofixedbuf",
                       SYSCTL_DESCR("Prevent scaling of fixed socket buffers"),
                       NULL, 0, &sofixedbuf, 0,
                       CTL_KERN, KERN_SOFIXEDBUF, CTL_EOL);

        sysctl_createv(&socket_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "sbmax",
                       SYSCTL_DESCR("Maximum socket buffer size"),
                       sysctl_kern_sbmax, 0, NULL, 0,
                       CTL_KERN, KERN_SBMAX, CTL_EOL);

        sysctl_createv(&socket_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "sooptions",
                       SYSCTL_DESCR("Default socket options"),
                       sysctl_kern_sooptions, 0, NULL, 0,
                       CTL_KERN, CTL_CREATE, CTL_EOL);
}









































































































   10 
   10 











   10 











    5 







    5 







    3 
    5 

    4 



    4 







    4 






    4 
    4 





    4 










































   10 




    5 





   10 


















































































































































































    4 


    4 

    4 





    4 
    4 


































    9 
    9 
























































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
/* $NetBSD: pckbc.c,v 1.62 2020/05/01 01:34:57 riastradh Exp $ */

/*
 * Copyright (c) 2004 Ben Harris.
 * Copyright (c) 1998
 *        Matthias Drochner.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: pckbc.c,v 1.62 2020/05/01 01:34:57 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/device.h>
#include <sys/malloc.h>
#include <sys/errno.h>
#include <sys/queue.h>

#include <sys/bus.h>

#include <dev/ic/i8042reg.h>
#include <dev/ic/pckbcvar.h>

#include <dev/pckbport/pckbportvar.h>

#include "locators.h"

#include <sys/rndsource.h>

/* data per slave device */
struct pckbc_slotdata {
        int polling;        /* don't process data in interrupt handler */
        int poll_data;        /* data read from inr handler if polling */
        int poll_stat;        /* status read from inr handler if polling */
        krndsource_t        rnd_source;
};

static void pckbc_init_slotdata(struct pckbc_slotdata *);
static int pckbc_attach_slot(struct pckbc_softc *, pckbc_slot_t);

struct pckbc_internal pckbc_consdata;
int pckbc_console_attached;

static int pckbc_console;
static struct pckbc_slotdata pckbc_cons_slotdata;

static int pckbc_xt_translation(void *, pckbport_slot_t, int);
static int pckbc_send_devcmd(void *, pckbport_slot_t, u_char);
static void pckbc_slot_enable(void *, pckbport_slot_t, int);
static void pckbc_intr_establish(void *, pckbport_slot_t);
static void pckbc_set_poll(void *,        pckbc_slot_t, int on);

static int pckbc_wait_output(bus_space_tag_t, bus_space_handle_t);

static int pckbc_get8042cmd(struct pckbc_internal *);
static int pckbc_put8042cmd(struct pckbc_internal *);

void pckbc_cleanqueue(struct pckbc_slotdata *);
void pckbc_cleanup(void *);
int pckbc_cmdresponse(struct pckbc_internal *, pckbc_slot_t, u_char);
void pckbc_start(struct pckbc_internal *, pckbc_slot_t);

const char * const pckbc_slot_names[] = { "kbd", "aux" };

static struct pckbport_accessops const pckbc_ops = {
        pckbc_xt_translation,
        pckbc_send_devcmd,
        pckbc_poll_data1,
        pckbc_slot_enable,
        pckbc_intr_establish,
        pckbc_set_poll
};

#define        KBD_DELAY        DELAY(8)

static inline int
pckbc_wait_output(bus_space_tag_t iot, bus_space_handle_t ioh_c)
{
        u_int i;

        for (i = 100000; i; i--)
                if (!(bus_space_read_1(iot, ioh_c, 0) & KBS_IBF)) {
                        KBD_DELAY;
                        return (1);
                }
        return (0);
}

int
pckbc_send_cmd(bus_space_tag_t iot, bus_space_handle_t ioh_c, u_char val)
{
        if (!pckbc_wait_output(iot, ioh_c))
                return (0);
        bus_space_write_1(iot, ioh_c, 0, val);
        return (1);
}

/*
 * Note: the spl games here are to deal with some strange PC kbd controllers
 * in some system configurations.
 * This is not canonical way to handle polling input.
 */
int
pckbc_poll_data1(void *pt, pckbc_slot_t slot)
{
        struct pckbc_internal *t = pt;
        struct pckbc_slotdata *q = t->t_slotdata[slot];
        int s;
        u_char stat, c;
        int i = 100; /* polls for ~100ms */
        int checkaux = t->t_haveaux;

        s = splhigh();

        if (q && q->polling && q->poll_data != -1 && q->poll_stat != -1) {
                stat        = q->poll_stat;
                c        = q->poll_data;
                q->poll_data = -1;
                q->poll_stat = -1;
                goto process;
        }

        for (; i; i--, delay(1000)) {
                stat = bus_space_read_1(t->t_iot, t->t_ioh_c, 0);
                if (stat & KBS_DIB) {
                        KBD_DELAY;
                        c = bus_space_read_1(t->t_iot, t->t_ioh_d, 0);

                    process:
                        if (checkaux && (stat & 0x20)) { /* aux data */
                                if (slot != PCKBC_AUX_SLOT) {
#ifdef PCKBCDEBUG
                                        printf("pckbc: lost aux 0x%x\n", c);
#endif
                                        continue;
                                }
                        } else {
                                if (slot == PCKBC_AUX_SLOT) {
#ifdef PCKBCDEBUG
                                        printf("pckbc: lost kbd 0x%x\n", c);
#endif
                                        continue;
                                }
                        }
                        splx(s);
                        return (c);
                }
        }

        splx(s);
        return (-1);
}

/*
 * Get the current command byte.
 */
static int
pckbc_get8042cmd(struct pckbc_internal *t)
{
        bus_space_tag_t iot = t->t_iot;
        bus_space_handle_t ioh_c = t->t_ioh_c;
        int data;

        if (!pckbc_send_cmd(iot, ioh_c, K_RDCMDBYTE))
                return (0);
        data = pckbc_poll_data1(t, PCKBC_KBD_SLOT);
        if (data == -1)
                return (0);
        t->t_cmdbyte = data;
        return (1);
}

/*
 * Pass command byte to keyboard controller (8042).
 */
static int
pckbc_put8042cmd(struct pckbc_internal *t)
{
        bus_space_tag_t iot = t->t_iot;
        bus_space_handle_t ioh_d = t->t_ioh_d;
        bus_space_handle_t ioh_c = t->t_ioh_c;

        if (!pckbc_send_cmd(iot, ioh_c, K_LDCMDBYTE))
                return (0);
        if (!pckbc_wait_output(iot, ioh_c))
                return (0);
        bus_space_write_1(iot, ioh_d, 0, t->t_cmdbyte);
        return (1);
}

static int
pckbc_send_devcmd(void *pt, pckbc_slot_t slot, u_char val)
{
        struct pckbc_internal *t = pt;
        bus_space_tag_t iot = t->t_iot;
        bus_space_handle_t ioh_d = t->t_ioh_d;
        bus_space_handle_t ioh_c = t->t_ioh_c;

        if (slot == PCKBC_AUX_SLOT) {
                if (!pckbc_send_cmd(iot, ioh_c, KBC_AUXWRITE))
                        return (0);
        }
        if (!pckbc_wait_output(iot, ioh_c))
                return (0);
        bus_space_write_1(iot, ioh_d, 0, val);
        return (1);
}

int
pckbc_is_console(bus_space_tag_t iot, bus_addr_t addr)
{
        if (pckbc_console && !pckbc_console_attached &&
            bus_space_is_equal(pckbc_consdata.t_iot, iot) &&
            pckbc_consdata.t_addr == addr)
                return (1);
        return (0);
}

static int
pckbc_attach_slot(struct pckbc_softc *sc, pckbc_slot_t slot)
{
        struct pckbc_internal *t = sc->id;
        void *sdata;
        device_t child;
        int alloced = 0;

        if (t->t_slotdata[slot] == NULL) {
                sdata = malloc(sizeof(struct pckbc_slotdata),
                    M_DEVBUF, M_WAITOK);
                t->t_slotdata[slot] = sdata;
                pckbc_init_slotdata(t->t_slotdata[slot]);
                alloced++;
        }

        child = pckbport_attach_slot(sc->sc_dv, t->t_pt, slot);

        if (child == NULL && alloced) {
                free(t->t_slotdata[slot], M_DEVBUF);
                t->t_slotdata[slot] = NULL;
        }

        if (child != NULL && t->t_slotdata[slot] != NULL) {
                memset(&t->t_slotdata[slot]->rnd_source, 0,
                    sizeof(t->t_slotdata[slot]->rnd_source));
                rnd_attach_source(&t->t_slotdata[slot]->rnd_source,
                    device_xname(child), RND_TYPE_TTY, RND_FLAG_DEFAULT);
        }

        return child != NULL;
}

void
pckbc_attach(struct pckbc_softc *sc)
{
        struct pckbc_internal *t;
        bus_space_tag_t iot;
        bus_space_handle_t ioh_d, ioh_c;
        int res;
        u_char cmdbits = 0;

        t = sc->id;
        iot = t->t_iot;
        ioh_d = t->t_ioh_d;
        ioh_c = t->t_ioh_c;

        t->t_pt = pckbport_attach(t, &pckbc_ops);
        if (t->t_pt == NULL) {
                aprint_error(": attach failed\n");
                return;
        }

        /* flush */
        (void) pckbc_poll_data1(t, PCKBC_KBD_SLOT);

        /* set initial cmd byte */
        if (!pckbc_put8042cmd(t)) {
                aprint_error("pckbc: cmd word write error\n");
                return;
        }

/*
 * XXX Don't check the keyboard port. There are broken keyboard controllers
 * which don't pass the test but work normally otherwise.
 */
#if 0
        /*
         * check kbd port ok
         */
        if (!pckbc_send_cmd(iot, ioh_c, KBC_KBDTEST))
                return;
        res = pckbc_poll_data1(t, PCKBC_KBD_SLOT, 0);

        /*
         * Normally, we should get a "0" here.
         * But there are keyboard controllers behaving differently.
         */
        if (res == 0 || res == 0xfa || res == 0x01 || res == 0xab) {
#ifdef PCKBCDEBUG
                if (res != 0)
                        printf("pckbc: returned %x on kbd slot test\n", res);
#endif
                if (pckbc_attach_slot(sc, PCKBC_KBD_SLOT))
                        cmdbits |= KC8_KENABLE;
        } else {
                printf("pckbc: kbd port test: %x\n", res);
                return;
        }
#else
        if (pckbc_attach_slot(sc, PCKBC_KBD_SLOT))
                cmdbits |= KC8_KENABLE;
#endif /* 0 */

        /*
         * Check aux port ok.
         * Avoid KBC_AUXTEST because it hangs some older controllers
         *  (eg UMC880?).
         */
        if (!pckbc_send_cmd(iot, ioh_c, KBC_AUXECHO)) {
                aprint_error("pckbc: aux echo error 1\n");
                goto nomouse;
        }
        if (!pckbc_wait_output(iot, ioh_c)) {
                aprint_error("pckbc: aux echo error 2\n");
                goto nomouse;
        }
        t->t_haveaux = 1;
        bus_space_write_1(iot, ioh_d, 0, 0x5a); /* a random value */
        res = pckbc_poll_data1(t, PCKBC_AUX_SLOT);

        /*
         * The following is needed to find the aux port on the Tadpole
         * SPARCle.
         */
        if (res == -1 && ISSET(t->t_flags, PCKBC_NEED_AUXWRITE)) {
                /* Read of aux echo timed out, try again */
                if (!pckbc_send_cmd(iot, ioh_c, KBC_AUXWRITE))
                        goto nomouse;
                if (!pckbc_wait_output(iot, ioh_c))
                        goto nomouse;
                bus_space_write_1(iot, ioh_d, 0, 0x5a);
                res = pckbc_poll_data1(t, PCKBC_AUX_SLOT);
        }
        if (res != -1) {
                /*
                 * In most cases, the 0x5a gets echoed.
                 * Some older controllers (Gateway 2000 circa 1993)
                 * return 0xfe here.
                 * We are satisfied if there is anything in the
                 * aux output buffer.
                 */
                if (pckbc_attach_slot(sc, PCKBC_AUX_SLOT))
                        cmdbits |= KC8_MENABLE;
        } else {

#ifdef PCKBCDEBUG
                printf("pckbc: aux echo test failed\n");
#endif
                t->t_haveaux = 0;
        }

nomouse:
        /* enable needed interrupts */
        t->t_cmdbyte |= cmdbits;
        if (!pckbc_put8042cmd(t))
                aprint_error("pckbc: cmd word write error\n");
}

static void
pckbc_init_slotdata(struct pckbc_slotdata *q)
{

        q->polling = 0;
}

/*
 * switch scancode translation on / off
 * return nonzero on success
 */
static int
pckbc_xt_translation(void *self, pckbc_slot_t slot, int on)
{
        struct pckbc_internal *t = self;
        int ison;

        if (ISSET(t->t_flags, PCKBC_CANT_TRANSLATE))
                return (-1);

        if (slot != PCKBC_KBD_SLOT) {
                /* translation only for kbd slot */
                if (on)
                        return (0);
                else
                        return (1);
        }

        ison = t->t_cmdbyte & KC8_TRANS;
        if ((on && ison) || (!on && !ison))
                return (1);

        t->t_cmdbyte ^= KC8_TRANS;
        if (!pckbc_put8042cmd(t))
                return (0);

        /* read back to be sure */
        if (!pckbc_get8042cmd(t))
                return (0);

        ison = t->t_cmdbyte & KC8_TRANS;
        if ((on && ison) || (!on && !ison))
                return (1);
        return (0);
}

static const struct pckbc_portcmd {
        u_char cmd_en, cmd_dis;
} pckbc_portcmd[2] = {
        {
                KBC_KBDENABLE, KBC_KBDDISABLE,
        }, {
                KBC_AUXENABLE, KBC_AUXDISABLE,
        }
};

void
pckbc_slot_enable(void *self, pckbc_slot_t slot, int on)
{
        struct pckbc_internal *t = (struct pckbc_internal *)self;
        const struct pckbc_portcmd *cmd;

        cmd = &pckbc_portcmd[slot];

        if (!pckbc_send_cmd(t->t_iot, t->t_ioh_c,
                            on ? cmd->cmd_en : cmd->cmd_dis))
                printf("pckbc: pckbc_slot_enable(%d) failed\n", on);
}

static void
pckbc_set_poll(void *self, pckbc_slot_t slot, int on)
{
        struct pckbc_internal *t = (struct pckbc_internal *)self;

        t->t_slotdata[slot]->polling = on;

        if (on) {
                t->t_slotdata[slot]->poll_data = -1;
                t->t_slotdata[slot]->poll_stat = -1;
        } else {
                int s;

                /*
                 * If disabling polling on a device that's been configured,
                 * make sure there are no bytes left in the FIFO, holding up
                 * the interrupt line.  Otherwise we won't get any further
                 * interrupts.
                 */
                if (t->t_sc) {
                        s = spltty();
                        pckbcintr(t->t_sc);
                        splx(s);
                }
        }
}

static void
pckbc_intr_establish(void *pt, pckbport_slot_t slot)
{
        struct pckbc_internal *t = pt;

        (*t->t_sc->intr_establish)(t->t_sc, slot);
}

int
pckbcintr_hard(void *vsc)
{
        struct pckbc_softc *sc = (struct pckbc_softc *)vsc;
        struct pckbc_internal *t = sc->id;
        u_char stat;
        pckbc_slot_t slot;
        struct pckbc_slotdata *q;
        int served = 0, data, next, s;

        for(;;) {
                stat = bus_space_read_1(t->t_iot, t->t_ioh_c, 0);
                if (!(stat & KBS_DIB))
                        break;

                served = 1;

                slot = (t->t_haveaux && (stat & 0x20)) ?
                    PCKBC_AUX_SLOT : PCKBC_KBD_SLOT;
                q = t->t_slotdata[slot];

                if (!q) {
                        /* XXX do something for live insertion? */
                        printf("pckbc: no dev for slot %d\n", slot);
                        KBD_DELAY;
                        (void) bus_space_read_1(t->t_iot, t->t_ioh_d, 0);
                        continue;
                }

                KBD_DELAY;
                data = bus_space_read_1(t->t_iot, t->t_ioh_d, 0);

                rnd_add_uint32(&q->rnd_source, (stat<<8)|data);

                if (q->polling) {
                        q->poll_data = data;
                        q->poll_stat = stat;
                        break; /* pckbc_poll_data() will get it */
                }

#if 0 /* XXXBJH */
                if (CMD_IN_QUEUE(q) && pckbc_cmdresponse(t, slot, data))
                        continue;
#endif

                s = splhigh();
                next = (t->rbuf_write+1) % PCKBC_RBUF_SIZE;
                if (next == t->rbuf_read) {
                        splx(s);
                        break;
                }
                t->rbuf[t->rbuf_write].data = data;
                t->rbuf[t->rbuf_write].slot = slot;
                t->rbuf_write = next;
                splx(s);
        }

        return (served);
}

void
pckbcintr_soft(void *vsc)
{
        struct pckbc_softc *sc = vsc;
        struct pckbc_internal *t = sc->id;
        int data, slot, s;
#ifndef __GENERIC_SOFT_INTERRUPTS_ALL_LEVELS
        int st;

        st = spltty();
#endif

        s = splhigh();
        while (t->rbuf_read != t->rbuf_write) {
                slot = t->rbuf[t->rbuf_read].slot;
                data = t->rbuf[t->rbuf_read].data;
                t->rbuf_read = (t->rbuf_read+1) % PCKBC_RBUF_SIZE;
                splx(s);
                pckbportintr(t->t_pt, slot, data);
                s = splhigh();
        }
        splx(s);


#ifndef __GENERIC_SOFT_INTERRUPTS_ALL_LEVELS
        splx(st);
#endif
}

int
pckbcintr(void *vsc)
{
        struct pckbc_softc *sc = (struct pckbc_softc *)vsc;
        struct pckbc_internal *t = sc->id;
        u_char stat;
        pckbc_slot_t slot;
        struct pckbc_slotdata *q;
        int served = 0, data;

        for(;;) {
                stat = bus_space_read_1(t->t_iot, t->t_ioh_c, 0);
                if (!(stat & KBS_DIB))
                        break;

                slot = (t->t_haveaux && (stat & 0x20)) ?
                    PCKBC_AUX_SLOT : PCKBC_KBD_SLOT;
                q = t->t_slotdata[slot];

                if (q != NULL && q->polling)
                        return 0;

                served = 1;
                KBD_DELAY;
                data = bus_space_read_1(t->t_iot, t->t_ioh_d, 0);

                if (q != NULL)
                        rnd_add_uint32(&q->rnd_source, (stat<<8)|data);

                pckbportintr(t->t_pt, slot, data);
        }

        return (served);
}

int
pckbc_cnattach(bus_space_tag_t iot, bus_addr_t addr,
        bus_size_t cmd_offset, pckbc_slot_t slot, int flags)
{
        bus_space_handle_t ioh_d, ioh_c;
#ifdef PCKBC_CNATTACH_SELFTEST
        int reply;
#endif
        int res = 0;

        if (bus_space_map(iot, addr + KBDATAP, 1, 0, &ioh_d))
                return (ENXIO);
        if (bus_space_map(iot, addr + cmd_offset, 1, 0, &ioh_c)) {
                bus_space_unmap(iot, ioh_d, 1);
                return (ENXIO);
        }

        memset(&pckbc_consdata, 0, sizeof(pckbc_consdata));
        pckbc_consdata.t_iot = iot;
        pckbc_consdata.t_ioh_d = ioh_d;
        pckbc_consdata.t_ioh_c = ioh_c;
        pckbc_consdata.t_addr = addr;
        pckbc_consdata.t_flags = flags;
        callout_init(&pckbc_consdata.t_cleanup, 0);

        /* flush */
        (void) pckbc_poll_data1(&pckbc_consdata, PCKBC_KBD_SLOT);

#ifdef PCKBC_CNATTACH_SELFTEST
        /*
         * In some machines (e.g. netwinder) pckbc refuses to talk at
         * all until we request a self-test.
         */
        if (!pckbc_send_cmd(iot, ioh_c, KBC_SELFTEST)) {
                printf("pckbc: unable to request selftest\n");
                res = EIO;
                goto out;
        }

        reply = pckbc_poll_data1(&pckbc_consdata, PCKBC_KBD_SLOT);
        if (reply != 0x55) {
                printf("pckbc: selftest returned 0x%02x\n", reply);
                res = EIO;
                goto out;
        }
#endif /* PCKBC_CNATTACH_SELFTEST */

        /* init cmd byte, enable ports */
        pckbc_consdata.t_cmdbyte = KC8_CPU;
        if (!pckbc_put8042cmd(&pckbc_consdata)) {
                printf("pckbc: cmd word write error\n");
                res = EIO;
                goto out;
        }

        res = pckbport_cnattach(&pckbc_consdata, &pckbc_ops, slot);

  out:
        if (res) {
                bus_space_unmap(iot, pckbc_consdata.t_ioh_d, 1);
                bus_space_unmap(iot, pckbc_consdata.t_ioh_c, 1);
        } else {
                pckbc_consdata.t_slotdata[slot] = &pckbc_cons_slotdata;
                pckbc_init_slotdata(&pckbc_cons_slotdata);
                pckbc_console = 1;
        }

        return (res);
}

bool
pckbc_resume(device_t dv, const pmf_qual_t *qual)
{
        struct pckbc_softc *sc = device_private(dv);
        struct pckbc_internal *t;

        t = sc->id;
        (void)pckbc_poll_data1(t, PCKBC_KBD_SLOT);
        if (!pckbc_send_cmd(t->t_iot, t->t_ioh_c, KBC_SELFTEST))
                return false;
        (void)pckbc_poll_data1(t, PCKBC_KBD_SLOT);
        (void)pckbc_put8042cmd(t);
        pckbcintr(t->t_sc);

        return true;
}





































































































































































   12 






   11 






   28 













   14 






   45 






  508 

































  523 





































  866 

















   30 











   48 



   48 



























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
/*        $NetBSD: subr_device.c,v 1.13 2022/03/28 12:38:59 riastradh Exp $        */

/*
 * Copyright (c) 2006, 2021 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_device.c,v 1.13 2022/03/28 12:38:59 riastradh Exp $");

#include <sys/param.h>
#include <sys/device.h>
#include <sys/device_impl.h>
#include <sys/systm.h>

#include <sys/device_calls.h>

/* Root device. */
device_t                        root_device;

/*
 * devhandle_t accessors / mutators.
 */

static bool
devhandle_is_valid_internal(const devhandle_t * const handlep)
{
        if (handlep->impl == NULL) {
                return false;
        }
        return handlep->impl->type != DEVHANDLE_TYPE_INVALID;
}

bool
devhandle_is_valid(devhandle_t handle)
{
        return devhandle_is_valid_internal(&handle);
}

devhandle_t
devhandle_invalid(void)
{
        static const devhandle_t invalid_devhandle = {
                .impl = NULL,
                .uintptr = 0,
        };
        return invalid_devhandle;
}

devhandle_type_t
devhandle_type(devhandle_t handle)
{
        if (!devhandle_is_valid_internal(&handle)) {
                return DEVHANDLE_TYPE_INVALID;
        }

        return handle.impl->type;
}

int
devhandle_compare(devhandle_t handle1, devhandle_t handle2)
{
        devhandle_type_t type1 = devhandle_type(handle1);
        devhandle_type_t type2 = devhandle_type(handle2);

        if (type1 == DEVHANDLE_TYPE_INVALID) {
                return -1;
        }
        if (type2 == DEVHANDLE_TYPE_INVALID) {
                return 1;
        }

        if (type1 < type2) {
                return -1;
        }
        if (type1 > type2) {
                return 1;
        }

        /* For private handles, we also compare the impl pointers. */
        if (type1 == DEVHANDLE_TYPE_PRIVATE) {
                intptr_t impl1 = (intptr_t)handle1.impl;
                intptr_t impl2 = (intptr_t)handle2.impl;

                if (impl1 < impl2) {
                        return -1;
                }
                if (impl1 > impl2) {
                        return 1;
                }
        }

        if (handle1.integer < handle2.integer) {
                return -1;
        }
        if (handle1.integer > handle2.integer) {
                return 1;
        }

        return 0;
}

device_call_t
devhandle_lookup_device_call(devhandle_t handle, const char *name,
    devhandle_t *call_handlep)
{
        const struct devhandle_impl *impl;
        device_call_t call;

        /*
         * The back-end can override the handle to use for the call,
         * if needed.
         */
        *call_handlep = handle;

        for (impl = handle.impl; impl != NULL; impl = impl->super) {
                if (impl->lookup_device_call != NULL) {
                        call = impl->lookup_device_call(handle, name,
                            call_handlep);
                        if (call != NULL) {
                                return call;
                        }
                }
        }
        return NULL;
}

void
devhandle_impl_inherit(struct devhandle_impl *impl,
    const struct devhandle_impl *super)
{
        memcpy(impl, super, sizeof(*impl));
        impl->super = super;
}

/*
 * Accessor functions for the device_t type.
 */

devclass_t
device_class(device_t dev)
{

        return dev->dv_class;
}

cfdata_t
device_cfdata(device_t dev)
{

        return dev->dv_cfdata;
}

cfdriver_t
device_cfdriver(device_t dev)
{

        return dev->dv_cfdriver;
}

cfattach_t
device_cfattach(device_t dev)
{

        return dev->dv_cfattach;
}

int
device_unit(device_t dev)
{

        return dev->dv_unit;
}

const char *
device_xname(device_t dev)
{

        return dev->dv_xname;
}

device_t
device_parent(device_t dev)
{

        return dev->dv_parent;
}

bool
device_activation(device_t dev, devact_level_t level)
{
        int active_flags;

        active_flags = DVF_ACTIVE;
        switch (level) {
        case DEVACT_LEVEL_FULL:
                active_flags |= DVF_CLASS_SUSPENDED;
                /*FALLTHROUGH*/
        case DEVACT_LEVEL_DRIVER:
                active_flags |= DVF_DRIVER_SUSPENDED;
                /*FALLTHROUGH*/
        case DEVACT_LEVEL_BUS:
                active_flags |= DVF_BUS_SUSPENDED;
                break;
        }

        return (dev->dv_flags & active_flags) == DVF_ACTIVE;
}

bool
device_is_active(device_t dev)
{
        int active_flags;

        active_flags = DVF_ACTIVE;
        active_flags |= DVF_CLASS_SUSPENDED;
        active_flags |= DVF_DRIVER_SUSPENDED;
        active_flags |= DVF_BUS_SUSPENDED;

        return (dev->dv_flags & active_flags) == DVF_ACTIVE;
}

bool
device_is_enabled(device_t dev)
{
        return (dev->dv_flags & DVF_ACTIVE) == DVF_ACTIVE;
}

bool
device_has_power(device_t dev)
{
        int active_flags;

        active_flags = DVF_ACTIVE | DVF_BUS_SUSPENDED;

        return (dev->dv_flags & active_flags) == DVF_ACTIVE;
}

int
device_locator(device_t dev, u_int locnum)
{

        KASSERT(dev->dv_locators != NULL);
        return dev->dv_locators[locnum];
}

void *
device_private(device_t dev)
{

        /*
         * The reason why device_private(NULL) is allowed is to simplify the
         * work of a lot of userspace request handlers (i.e., c/bdev
         * handlers) which grab cfdriver_t->cd_units[n].
         * It avoids having them test for it to be NULL and only then calling
         * device_private.
         */
        return dev == NULL ? NULL : dev->dv_private;
}

void
device_set_private(device_t dev, void *private)
{

        KASSERTMSG(dev->dv_private == NULL, "device_set_private(%p, %p):"
            " device %s already has private set to %p",
            dev, private, device_xname(dev), device_private(dev));
        KASSERT(private != NULL);
        dev->dv_private = private;
}

prop_dictionary_t
device_properties(device_t dev)
{

        return dev->dv_properties;
}

/*
 * device_is_a:
 *
 *        Returns true if the device is an instance of the specified
 *        driver.
 */
bool
device_is_a(device_t dev, const char *dname)
{
        if (dev == NULL || dev->dv_cfdriver == NULL) {
                return false;
        }

        return strcmp(dev->dv_cfdriver->cd_name, dname) == 0;
}

/*
 * device_attached_to_iattr:
 *
 *        Returns true if the device attached to the specified interface
 *        attribute.
 */
bool
device_attached_to_iattr(device_t dev, const char *iattr)
{
        cfdata_t cfdata = device_cfdata(dev);
        const struct cfparent *pspec;

        if (cfdata == NULL || (pspec = cfdata->cf_pspec) == NULL) {
                return false;
        }

        return strcmp(pspec->cfp_iattr, iattr) == 0;
}

void
device_set_handle(device_t dev, devhandle_t handle)
{
        dev->dv_handle = handle;
}

devhandle_t
device_handle(device_t dev)
{
        return dev->dv_handle;
}

int
device_call_generic(device_t dev, const struct device_call_generic *gen)
{
        devhandle_t handle = device_handle(dev);
        device_call_t call;
        devhandle_t call_handle;

        call = devhandle_lookup_device_call(handle, gen->name, &call_handle);
        if (call == NULL) {
                return ENOTSUP;
        }
        return call(dev, call_handle, gen->args);
}

int
device_enumerate_children(device_t dev,
    bool (*callback)(device_t, devhandle_t, void *),
    void *callback_arg)
{
        struct device_enumerate_children_args args = {
                .callback = callback,
                .callback_arg = callback_arg,
        };

        return device_call(dev, DEVICE_ENUMERATE_CHILDREN(&args));
}













































































































































    1 




    1 


























































































































    3 

    3 




























    1 

    1 





























    1 

    1 



















    2 

    2 













    1 

    1 













    1 

    1 






    1 

    1 





    1 

    1 









    1 












































    3 
































    3 










    3 




    3 



    3 



    3 
    3 






































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
/*        $NetBSD: link_proto.c,v 1.40 2021/12/31 14:25:24 riastradh Exp $        */

/*-
 * Copyright (c) 1982, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)uipc_proto.c        8.2 (Berkeley) 2/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: link_proto.c,v 1.40 2021/12/31 14:25:24 riastradh Exp $");

#include <sys/param.h>
#include <sys/socket.h>
#include <sys/protosw.h>
#include <sys/domain.h>
#include <sys/mbuf.h>
#include <sys/un.h>
#include <sys/socketvar.h>

#include <net/if.h>
#include <net/if_dl.h>
#include <net/raw_cb.h>
#include <net/route.h>

static int sockaddr_dl_cmp(const struct sockaddr *, const struct sockaddr *);
static int link_attach(struct socket *, int);
static void link_detach(struct socket *);
static int link_accept(struct socket *, struct sockaddr *);
static int link_bind(struct socket *, struct sockaddr *, struct lwp *);
static int link_listen(struct socket *, struct lwp *);
static int link_connect(struct socket *, struct sockaddr *, struct lwp *);
static int link_connect2(struct socket *, struct socket *);
static int link_disconnect(struct socket *);
static int link_shutdown(struct socket *);
static int link_abort(struct socket *);
static int link_ioctl(struct socket *, u_long, void *, struct ifnet *);
static int link_stat(struct socket *, struct stat *);
static int link_peeraddr(struct socket *, struct sockaddr *);
static int link_sockaddr(struct socket *, struct sockaddr *);
static int link_rcvd(struct socket *, int, struct lwp *);
static int link_recvoob(struct socket *, struct mbuf *, int);
static int link_send(struct socket *, struct mbuf *, struct sockaddr *,
    struct mbuf *, struct lwp *);
static int link_sendoob(struct socket *, struct mbuf *, struct mbuf *);
static int link_purgeif(struct socket *, struct ifnet *);
static void link_init(void);

/*
 * Definitions of protocols supported in the link-layer domain.
 */

DOMAIN_DEFINE(linkdomain);        /* forward define and add to link set */

static const struct pr_usrreqs link_usrreqs = {
        .pr_attach        = link_attach,
        .pr_detach        = link_detach,
        .pr_accept        = link_accept,
        .pr_bind        = link_bind,
        .pr_listen        = link_listen,
        .pr_connect        = link_connect,
        .pr_connect2        = link_connect2,
        .pr_disconnect        = link_disconnect,
        .pr_shutdown        = link_shutdown,
        .pr_abort        = link_abort,
        .pr_ioctl        = link_ioctl,
        .pr_stat        = link_stat,
        .pr_peeraddr        = link_peeraddr,
        .pr_sockaddr        = link_sockaddr,
        .pr_rcvd        = link_rcvd,
        .pr_recvoob        = link_recvoob,
        .pr_send        = link_send,
        .pr_sendoob        = link_sendoob,
        .pr_purgeif        = link_purgeif,
};

const struct protosw linksw[] = {
        {        .pr_type = SOCK_DGRAM,
                .pr_domain = &linkdomain,
                .pr_protocol = 0,        /* XXX */
                .pr_flags = PR_ATOMIC|PR_ADDR|PR_PURGEIF,
                .pr_input = NULL,
                .pr_ctlinput = NULL,
                .pr_ctloutput = NULL,
                .pr_usrreqs = &link_usrreqs,
                .pr_init = link_init,
        },
};

struct domain linkdomain = {
        .dom_family = AF_LINK,
        .dom_name = "link",
        .dom_externalize = NULL,
        .dom_dispose = NULL,
        .dom_protosw = linksw,
        .dom_protoswNPROTOSW = &linksw[__arraycount(linksw)],
        .dom_sockaddr_cmp = sockaddr_dl_cmp
};

static void
link_init(void)
{
        return;
}

static int
link_control(struct socket *so, unsigned long cmd, void *data,
    struct ifnet *ifp)
{
        int error, s;
        bool isactive, mkactive;
        struct if_laddrreq *iflr;
        union {
                struct sockaddr sa;
                struct sockaddr_dl sdl;
                struct sockaddr_storage ss;
        } u;
        struct ifaddr *ifa;
        const struct sockaddr_dl *asdl, *nsdl;
        struct psref psref;

        switch (cmd) {
        case SIOCALIFADDR:
        case SIOCDLIFADDR:
        case SIOCGLIFADDR:
                iflr = data;

                if (iflr->addr.ss_family != AF_LINK)
                        return EINVAL;

                asdl = satocsdl(sstocsa(&iflr->addr));

                if (asdl->sdl_alen != ifp->if_addrlen)
                        return EINVAL;

                if (sockaddr_dl_init(&u.sdl, sizeof(u.ss), ifp->if_index,
                    ifp->if_type, ifp->if_xname, strlen(ifp->if_xname),
                    CLLADDR(asdl), asdl->sdl_alen) == NULL)
                        return EINVAL;

                if ((iflr->flags & IFLR_PREFIX) == 0)
                        ;
                else if (iflr->prefixlen != NBBY * ifp->if_addrlen)
                        return EINVAL;        /* XXX match with prefix */

                error = 0;

                s = pserialize_read_enter();
                IFADDR_READER_FOREACH(ifa, ifp) {
                        if (sockaddr_cmp(&u.sa, ifa->ifa_addr) == 0) {
                                ifa_acquire(ifa, &psref);
                                break;
                        }
                }
                pserialize_read_exit(s);

                switch (cmd) {
                case SIOCGLIFADDR:
                        ifa_release(ifa, &psref);
                        s = pserialize_read_enter();
                        if ((iflr->flags & IFLR_PREFIX) == 0) {
                                IFADDR_READER_FOREACH(ifa, ifp) {
                                        if (ifa->ifa_addr->sa_family == AF_LINK)
                                                break;
                                }
                        }
                        if (ifa == NULL) {
                                pserialize_read_exit(s);
                                error = EADDRNOTAVAIL;
                                break;
                        }

                        if (ifa == ifp->if_dl)
                                iflr->flags = IFLR_ACTIVE;
                        else
                                iflr->flags = 0;

                        if (ifa == ifp->if_hwdl)
                                iflr->flags |= IFLR_FACTORY;

                        sockaddr_copy(sstosa(&iflr->addr), sizeof(iflr->addr),
                            ifa->ifa_addr);
                        pserialize_read_exit(s);
                        ifa = NULL;

                        break;
                case SIOCDLIFADDR:
                        if (ifa == NULL)
                                error = EADDRNOTAVAIL;
                        else if (ifa == ifp->if_dl || ifa == ifp->if_hwdl)
                                error = EBUSY;
                        else {
                                /* TBD routing socket */
                                rt_addrmsg(RTM_DELETE, ifa);
                                /* We need to release psref for ifa_remove */
                                ifaref(ifa);
                                ifa_release(ifa, &psref);
                                ifa_remove(ifp, ifa);
                                KASSERTMSG(ifa->ifa_refcnt == 1, "ifa_refcnt=%d",
                                    ifa->ifa_refcnt);
                                ifafree(ifa);
                                ifa = NULL;
                        }
                        break;
                case SIOCALIFADDR:
                        if (ifa == NULL) {
                                ifa = if_dl_create(ifp, &nsdl);
                                if (ifa == NULL) {
                                        error = ENOMEM;
                                        break;
                                }
                                ifa_acquire(ifa, &psref);
                                sockaddr_copy(ifa->ifa_addr,
                                    ifa->ifa_addr->sa_len, &u.sa);
                                ifa_insert(ifp, ifa);
                                rt_addrmsg(RTM_ADD, ifa);
                        }

                        mkactive = (iflr->flags & IFLR_ACTIVE) != 0;
                        isactive = (ifa == ifp->if_dl);

                        if (!isactive && mkactive) {
                                if_activate_sadl(ifp, ifa, nsdl);
                                rt_addrmsg(RTM_CHANGE, ifa);
                                error = ENETRESET;
                        }
                        break;
                }
                ifa_release(ifa, &psref);
                if (error != ENETRESET)
                        return error;
                else if ((ifp->if_flags & IFF_RUNNING) != 0 &&
                         ifp->if_init != NULL)
                        return if_init(ifp);
                else
                        return 0;
        default:
                return ENOTTY;
        }
}

static int
link_attach(struct socket *so, int proto)
{
        sosetlock(so);
        KASSERT(solocked(so));
        return 0;
}

static void
link_detach(struct socket *so)
{
        KASSERT(solocked(so));
        sofree(so);
}

static int
link_accept(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
link_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
link_listen(struct socket *so, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
link_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
         KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
link_connect2(struct socket *so, struct socket *so2)
{
         KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
link_disconnect(struct socket *so)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
link_shutdown(struct socket *so)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
link_abort(struct socket *so)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
link_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
{
        return link_control(so, cmd, nam, ifp);
}

static int
link_stat(struct socket *so, struct stat *ub)
{
        KASSERT(solocked(so));

        return 0;
}

static int
link_peeraddr(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
link_sockaddr(struct socket *so, struct sockaddr *nam)
{
         KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
link_rcvd(struct socket *so, int flags, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
link_recvoob(struct socket *so, struct mbuf *m, int flags)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
link_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
    struct mbuf *control, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
link_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
{
        KASSERT(solocked(so));

        m_freem(m);
        m_freem(control);

        return EOPNOTSUPP;
}

static int
link_purgeif(struct socket *so, struct ifnet *ifp)
{

        return EOPNOTSUPP;
}

/* Compare the field at byte offsets [fieldstart, fieldend) in
 * two memory regions, [l, l + llen) and [r, r + llen).
 */
static inline int
submemcmp(const void *l, const void *r,
    const uint_fast8_t llen, const uint_fast8_t rlen,
    const uint_fast8_t fieldstart, const uint_fast8_t fieldend)
{
        uint_fast8_t cmpend, minlen;
        const uint8_t *lb = l, *rb = r;
        int rc;

        minlen = MIN(llen, rlen);

        /* The field is missing from one region.  The shorter region is the
         * lesser region.
         */
        if (fieldstart >= minlen)
                return llen - rlen;

        /* Two empty, present fields are always equal. */
        if (fieldstart > fieldend)
                return 0;

        cmpend = MIN(fieldend, minlen);

        rc = memcmp(&lb[fieldstart], &rb[fieldstart], cmpend - fieldstart);

        if (rc != 0)
                return rc;
        /* If one or both fields are truncated, then the shorter is the lesser
         * field.
         */
        if (minlen < fieldend)
                return llen - rlen;
        /* Fields are full-length and equal.  The fields are equal. */
        return 0;
}

uint8_t
sockaddr_dl_measure(uint8_t namelen, uint8_t addrlen)
{
        return offsetof(struct sockaddr_dl, sdl_data[namelen + addrlen]);
}

struct sockaddr *
sockaddr_dl_alloc(uint16_t ifindex, uint8_t type,
    const void *name, uint8_t namelen, const void *addr, uint8_t addrlen,
    int flags)
{
        struct sockaddr *sa;
        socklen_t len;

        len = sockaddr_dl_measure(namelen, addrlen);
        sa = sockaddr_alloc(AF_LINK, len, flags);

        if (sa == NULL)
                return NULL;

        if (sockaddr_dl_init(satosdl(sa), len, ifindex, type, name, namelen,
            addr, addrlen) == NULL) {
                sockaddr_free(sa);
                return NULL;
        }

        return sa;
}

struct sockaddr_dl *
sockaddr_dl_init(struct sockaddr_dl *sdl, socklen_t socklen, uint16_t ifindex,
    uint8_t type, const void *name, uint8_t namelen, const void *addr,
    uint8_t addrlen)
{
        socklen_t len;

        sdl->sdl_family = AF_LINK;
        sdl->sdl_slen = 0;
        len = sockaddr_dl_measure(namelen, addrlen);
        if (len > socklen) {
                sdl->sdl_len = socklen;
#ifdef DIAGNOSTIC
                printf("%s: too long: %u > %u\n", __func__, (u_int)len,
                    (u_int)socklen);
#endif
                return NULL;
        }
        sdl->sdl_len = len;
        sdl->sdl_index = ifindex;
        sdl->sdl_type = type;
        memset(&sdl->sdl_data[0], 0, namelen + addrlen);
        if (name != NULL) {
                memcpy(&sdl->sdl_data[0], name, namelen);
                sdl->sdl_nlen = namelen;
        } else
                sdl->sdl_nlen = 0;
        if (addr != NULL) {
                memcpy(&sdl->sdl_data[sdl->sdl_nlen], addr, addrlen);
                sdl->sdl_alen = addrlen;
        } else
                sdl->sdl_alen = 0;
        return sdl;
}

static int
sockaddr_dl_cmp(const struct sockaddr *sa1, const struct sockaddr *sa2)
{
        int rc;
        const uint_fast8_t indexofs = offsetof(struct sockaddr_dl, sdl_index);
        const uint_fast8_t nlenofs = offsetof(struct sockaddr_dl, sdl_nlen);
        uint_fast8_t dataofs = offsetof(struct sockaddr_dl, sdl_data[0]);
        const struct sockaddr_dl *sdl1, *sdl2;

        sdl1 = satocsdl(sa1);
        sdl2 = satocsdl(sa2);

        rc = submemcmp(sdl1, sdl2, sdl1->sdl_len, sdl2->sdl_len,
            indexofs, nlenofs);

        if (rc != 0)
                return rc;

        rc = submemcmp(sdl1, sdl2, sdl1->sdl_len, sdl2->sdl_len,
            dataofs, dataofs + MIN(sdl1->sdl_nlen, sdl2->sdl_nlen));

        if (rc != 0)
                return rc;

        if (sdl1->sdl_nlen != sdl2->sdl_nlen)
                return sdl1->sdl_nlen - sdl2->sdl_nlen;

        dataofs += sdl1->sdl_nlen;

        rc = submemcmp(sdl1, sdl2, sdl1->sdl_len, sdl2->sdl_len,
            dataofs, dataofs + MIN(sdl1->sdl_alen, sdl2->sdl_alen));

        if (rc != 0)
                return rc;

        if (sdl1->sdl_alen != sdl2->sdl_alen)
                return sdl1->sdl_alen - sdl2->sdl_alen;

        dataofs += sdl1->sdl_alen;

        rc = submemcmp(sdl1, sdl2, sdl1->sdl_len, sdl2->sdl_len,
            dataofs, dataofs + MIN(sdl1->sdl_slen, sdl2->sdl_slen));

        if (sdl1->sdl_slen != sdl2->sdl_slen)
                return sdl1->sdl_slen - sdl2->sdl_slen;

        return sdl1->sdl_len - sdl2->sdl_len;
}

struct sockaddr_dl *
sockaddr_dl_setaddr(struct sockaddr_dl *sdl, socklen_t socklen,
    const void *addr, uint8_t addrlen)
{
        socklen_t len;

        len = sockaddr_dl_measure(sdl->sdl_nlen, addrlen);
        if (len > socklen) {
#ifdef DIAGNOSTIC
                printf("%s: too long: %u > %u\n", __func__, (u_int)len,
                    (u_int)socklen);
#endif
                return NULL;
        }
        memcpy(&sdl->sdl_data[sdl->sdl_nlen], addr, addrlen);
        sdl->sdl_alen = addrlen;
        sdl->sdl_len = len;
        return sdl;
}




























































































    8 
















    8 


















































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
/*        $NetBSD: immintrin.h,v 1.1 2020/07/25 22:49:20 riastradh Exp $        */

/*-
 * Copyright (c) 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef        _SYS_CRYPTO_CHACHA_ARCH_X86_IMMINTRIN_H
#define        _SYS_CRYPTO_CHACHA_ARCH_X86_IMMINTRIN_H

#include <sys/types.h>

/*
 * This kludgerous header file provides definitions for the Intel
 * intrinsics that work with GCC and Clang, because <immintrin.h> is
 * not available during the kernel build and arranging to make it
 * available is complicated.  Please fix this properly!
 */

#if defined(__GNUC__) && !defined(__clang__)

#define        _INTRINSATTR                                                              \
        __attribute__((__gnu_inline__, __always_inline__, __artificial__))
#define        _PACKALIAS

typedef float __m128 __attribute__((__vector_size__(16), __may_alias__));
typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__));
typedef long long __m128i_u
    __attribute__((__vector_size__(16), __may_alias__, __aligned__(1)));
typedef long long __v2di __attribute__((__vector_size__(16)));
typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
typedef int __v4si __attribute__((__vector_size__(16)));
typedef unsigned __v4su __attribute__((__vector_size__(16)));
typedef float __v4sf __attribute__((__vector_size__(16)));
typedef short __v8hi __attribute__((__vector_size__(16)));
typedef char __v16qi __attribute__((__vector_size__(16)));

#elif defined(__clang__)

typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
typedef long long __m128i
    __attribute__((__vector_size__(16), __aligned__(16)));
typedef long long __m128i_u
    __attribute__((__vector_size__(16), __may_alias__, __aligned__(1)));
typedef long long __v2di __attribute__((__vector_size__(16)));
typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
typedef int __v4si __attribute__((__vector_size__(16)));
typedef unsigned __v4su __attribute__((__vector_size__(16)));
typedef float __v4sf __attribute__((__vector_size__(16)));
typedef short __v8hi __attribute__((__vector_size__(16)));
typedef char __v16qi __attribute__((__vector_size__(16)));

#define        _INTRINSATTR                                                              \
        __attribute__((__always_inline__, __nodebug__, __target__("sse2"),    \
                __min_vector_width__(128)))
#define        _PACKALIAS                                                              \
        __attribute__((__packed__, __may_alias__))

#else

#error Please teach me how to do Intel intrinsics for your compiler!

#endif

#define        _SSSE3_ATTR        __attribute__((target("ssse3")))

_INTRINSATTR
static __inline __m128i
_mm_add_epi32(__m128i __a, __m128i __b)
{
        return (__m128i)((__v4su)__a + (__v4su)__b);
}

#if defined(__GNUC__) && !defined(__clang__)
#define        _mm_alignr_epi8(hi,lo,bytes)                                              \
        (__m128i)__builtin_ia32_palignr128((__v2di)(__m128i)(hi),              \
            (__v2di)(__m128i)(lo), 8*(int)(bytes))
#elif defined(__clang__)
#define        _mm_alignr_epi8(hi,lo,bytes)                                              \
        (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(hi),              \
            (__v16qi)(__m128i)(lo), (int)(bytes))
#endif

_INTRINSATTR
static __inline __m128
_mm_load1_ps(const float *__p)
{
        return __extension__ (__m128)(__v4sf) { *__p, *__p, *__p, *__p };
}

_INTRINSATTR
static __inline __m128i
_mm_loadu_si128(const __m128i_u *__p)
{
        return ((const struct { __m128i_u __v; } _PACKALIAS *)__p)->__v;
}

_INTRINSATTR
static __inline __m128i
_mm_loadu_si32(const void *__p)
{
        int32_t __v = ((const struct { int32_t __v; } _PACKALIAS *)__p)->__v;
        return __extension__ (__m128i)(__v4si){ __v, 0, 0, 0 };
}

_INTRINSATTR
static __inline __m128i
_mm_loadu_si64(const void *__p)
{
        int64_t __v = ((const struct { int64_t __v; } _PACKALIAS *)__p)->__v;
        return __extension__ (__m128i)(__v2di){ __v, 0 };
}

_INTRINSATTR
static __inline __m128i
_mm_load_si128(const __m128i *__p)
{
        return *__p;
}

_INTRINSATTR
static __inline __m128
_mm_movehl_ps(__m128 __v0, __m128 __v1)
{
#if defined(__GNUC__) && !defined(__clang__)
        return (__m128)__builtin_ia32_movhlps((__v4sf)__v0, (__v4sf)__v1);
#elif defined(__clang__)
        return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1, 6,7,2,3);
#endif
}

_INTRINSATTR
static __inline __m128
_mm_movelh_ps(__m128 __v0, __m128 __v1)
{
#if defined(__GNUC__) && !defined(__clang__)
        return (__m128)__builtin_ia32_movlhps((__v4sf)__v0, (__v4sf)__v1);
#elif defined(__clang__)
        return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1, 0,1,4,5);
#endif
}

_INTRINSATTR
static __inline __m128i
_mm_set1_epi16(int16_t __v)
{
        return __extension__ (__m128i)(__v8hi){
            __v, __v, __v, __v, __v, __v, __v, __v
        };
}

_INTRINSATTR
static __inline __m128i
_mm_set1_epi32(int32_t __v)
{
        return __extension__ (__m128i)(__v4si){ __v, __v, __v, __v };
}

_INTRINSATTR
static __inline __m128i
_mm_set1_epi64x(int64_t __v)
{
        return __extension__ (__m128i)(__v2di){ __v, __v };
}

_INTRINSATTR
static __inline __m128i
_mm_set_epi32(int32_t __v3, int32_t __v2, int32_t __v1, int32_t __v0)
{
        return __extension__ (__m128i)(__v4si){ __v0, __v1, __v2, __v3 };
}

_INTRINSATTR
static __inline __m128i
_mm_set_epi64x(int64_t __v1, int64_t __v0)
{
        return __extension__ (__m128i)(__v2di){ __v0, __v1 };
}

_INTRINSATTR
static __inline __m128
_mm_setzero_ps(void)
{
        return __extension__ (__m128){ 0, 0, 0, 0 };
}

_INTRINSATTR
static __inline __m128i
_mm_setzero_si128(void)
{
        return _mm_set1_epi64x(0);
}

_INTRINSATTR _SSSE3_ATTR
static __inline __m128i
_mm_shuffle_epi8(__m128i __vtbl, __m128i __vidx)
{
        return (__m128i)__builtin_ia32_pshufb128((__v16qi)__vtbl,
            (__v16qi)__vidx);
}

#define        _mm_shuffle_epi32(v,m)                                                      \
        (__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(v), (int)(m))

#define        _mm_shuffle_ps(x,y,m)                                                      \
        (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(x),                      \
            (__v4sf)(__m128)(y), (int)(m))                                      \

_INTRINSATTR
static __inline __m128i
_mm_slli_epi32(__m128i __v, uint8_t __bits)
{
        return (__m128i)__builtin_ia32_pslldi128((__v4si)__v, (int)__bits);
}

_INTRINSATTR
static __inline __m128i
_mm_slli_epi64(__m128i __v, uint8_t __bits)
{
        return (__m128i)__builtin_ia32_psllqi128((__v2di)__v, (int)__bits);
}

#if defined(__GNUC__) && !defined(__clang__)
#define        _mm_slli_si128(v,bytes)                                                      \
        (__m128i)__builtin_ia32_pslldqi128((__v2di)(__m128i)(v),              \
            8*(int)(bytes))
#elif defined(__clang__)
#define        _mm_slli_si128(v,bytes)                                                      \
        (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(v),    \
            (int)(bytes))
#endif

_INTRINSATTR
static __inline __m128i
_mm_srli_epi32(__m128i __v, uint8_t __bits)
{
        return (__m128i)__builtin_ia32_psrldi128((__v4si)__v, (int)__bits);
}

_INTRINSATTR
static __inline __m128i
_mm_srli_epi64(__m128i __v, uint8_t __bits)
{
        return (__m128i)__builtin_ia32_psrlqi128((__v2di)__v, (int)__bits);
}

#if defined(__GNUC__) && !defined(__clang__)
#define        _mm_srli_si128(v,bytes)                                                      \
        (__m128i)__builtin_ia32_psrldqi128((__m128i)(v), 8*(int)(bytes))
#elif defined(__clang__)
#define        _mm_srli_si128(v,bytes)                                                      \
        (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(v),    \
            (int)(bytes));
#endif

_INTRINSATTR
static __inline void
_mm_storeu_si128(__m128i_u *__p, __m128i __v)
{
        ((struct { __m128i_u __v; } _PACKALIAS *)__p)->__v = __v;
}

_INTRINSATTR
static __inline void
_mm_storeu_si32(void *__p, __m128i __v)
{
        ((struct { int32_t __v; } _PACKALIAS *)__p)->__v = ((__v4si)__v)[0];
}

_INTRINSATTR
static __inline void
_mm_storeu_si64(void *__p, __m128i __v)
{
        ((struct { int64_t __v; } _PACKALIAS *)__p)->__v = ((__v2di)__v)[0];
}

_INTRINSATTR
static __inline void
_mm_store_si128(__m128i *__p, __m128i __v)
{
        *__p = __v;
}

_INTRINSATTR
static __inline __m128i
_mm_sub_epi64(__m128i __x, __m128i __y)
{
        return (__m128i)((__v2du)__x - (__v2du)__y);
}

_INTRINSATTR
static __inline __m128i
_mm_unpackhi_epi32(__m128i __lo, __m128i __hi)
{
#if defined(__GNUC__) && !defined(__clang__)
        return (__m128i)__builtin_ia32_punpckhdq128((__v4si)__lo,
            (__v4si)__hi);
#elif defined(__clang__)
        return (__m128i)__builtin_shufflevector((__v4si)__lo, (__v4si)__hi,
            2,6,3,7);
#endif
}

_INTRINSATTR
static __inline __m128i
_mm_unpacklo_epi32(__m128i __lo, __m128i __hi)
{
#if defined(__GNUC__) && !defined(__clang__)
        return (__m128i)__builtin_ia32_punpckldq128((__v4si)__lo,
            (__v4si)__hi);
#elif defined(__clang__)
        return (__m128i)__builtin_shufflevector((__v4si)__lo, (__v4si)__hi,
            0,4,1,5);
#endif
}

_INTRINSATTR
static __inline __m128i
_mm_unpacklo_epi64(__m128i __lo, __m128i __hi)
{
#if defined(__GNUC__) && !defined(__clang__)
        return (__m128i)__builtin_ia32_punpcklqdq128((__v2di)__lo,
            (__v2di)__hi);
#elif defined(__clang__)
        return (__m128i)__builtin_shufflevector((__v2di)__lo, (__v2di)__hi,
            0,2);
#endif
}

#endif        /* _SYS_CRYPTO_CHACHA_ARCH_X86_IMMINTRIN_H */


























































    3 




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/*        $NetBSD: msgbuf.h,v 1.17 2018/04/19 21:19:07 christos Exp $        */

/*
 * Copyright (c) 1981, 1984, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)msgbuf.h        8.1 (Berkeley) 6/2/93
 */

#ifndef _SYS_MSGBUF_H_
#define _SYS_MSGBUF_H_

struct        kern_msgbuf {
#define        MSG_MAGIC        0x063061
        long        msg_magic;
        long        msg_bufx;                /* write pointer */
        long        msg_bufr;                /* read pointer */
        long        msg_bufs;                /* real msg_bufc size (bytes) */
        char        msg_bufc[1];                /* buffer */
};

#ifdef _KERNEL
extern int        msgbufmapped;                /* is the message buffer mapped */
extern int        msgbufenabled;                /* is logging to the buffer enabled */
extern struct        kern_msgbuf *msgbufp;        /* the mapped buffer, itself. */

void        initmsgbuf(void *, size_t);
void        loginit(void);
void        logputchar(int);

static __inline int
logenabled(const struct kern_msgbuf *mbp)
{
        return msgbufenabled && mbp->msg_magic == MSG_MAGIC;
}
#endif

#endif /* !_SYS_MSGBUF_H_ */











































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 

    1 










    1 

















































    1 













    1 





    1 


























































    1 




    1 







    1 








































    1 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
/*        $NetBSD: scsipi_verbose.c,v 1.35 2021/06/05 22:21:15 pgoyette Exp $        */

/*-
 * Copyright (c) 1998 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: scsipi_verbose.c,v 1.35 2021/06/05 22:21:15 pgoyette Exp $");

#include <sys/param.h>
#include <sys/time.h>
#include <sys/proc.h>
#include <sys/device.h>

#ifdef _KERNEL
#include <sys/systm.h>
#include <sys/module.h>
#else
#include <stdio.h>
#endif
#include <dev/scsipi/scsipi_all.h>
#include <dev/scsipi/scsipiconf.h>
#include <dev/scsipi/scsiconf.h>

int                scsipi_print_sense_real(struct scsipi_xfer *, int);
void                scsipi_print_sense_data_real(struct scsi_sense_data *, int);
static char    *scsipi_decode_sense(void *, int);

static const char *sense_keys[16] = {
        "No Additional Sense",
        "Recovered Error",
        "Not Ready",
        "Media Error",
        "Hardware Error",
        "Illegal Request",
        "Unit Attention",
        "Write Protected",
        "Blank Check",
        "Vendor Unique",
        "Copy Aborted",
        "Aborted Command",
        "Equal Error",
        "Volume Overflow",
        "Miscompare Error",
        "Reserved"
};

/*
 * The current version of this list can be obtained from
 * <ftp://ftp.t10.org/t10/drafts/spc3/asc-num.txt>
 */

static const struct {
        unsigned char asc;
        unsigned char ascq;
        const char *description;
} adesc[] = {
{ 0x00, 0x00, "No Additional Sense Information" },
{ 0x00, 0x01, "Filemark Detected" },
{ 0x00, 0x02, "End-Of-Partition/Medium Detected" },
{ 0x00, 0x03, "Setmark Detected" },
{ 0x00, 0x04, "Beginning-Of-Partition/Medium Detected" },
{ 0x00, 0x05, "End-Of-Data Detected" },
{ 0x00, 0x06, "I/O Process Terminated" },
{ 0x00, 0x07, "Programmable Early Warning Detected" },
{ 0x00, 0x11, "Audio Play Operation In Progress" },
{ 0x00, 0x12, "Audio Play Operation Paused" },
{ 0x00, 0x13, "Audio Play Operation Successfully Completed" },
{ 0x00, 0x14, "Audio Play Operation Stopped Due To Error" },
{ 0x00, 0x15, "No Current Audio Status To Return" },
{ 0x00, 0x16, "Operation In Progress" },
{ 0x00, 0x17, "Cleaning Requested" },
{ 0x00, 0x18, "Erase Operation In Progress" },
{ 0x00, 0x19, "Locate Operation In Progress" },
{ 0x00, 0x1A, "Rewind Operation In Progress" },
{ 0x00, 0x1B, "Set Capacity Operation In Progress" },
{ 0x00, 0x1C, "Verify Operation In Progress" },
{ 0x00, 0x1D, "ATA Pass Through Information Available" },
{ 0x00, 0x1E, "Conflicting SA Creation Request" },
{ 0x00, 0x1F, "Logical Unit Transitioning To Another Power Condition" },
{ 0x00, 0x20, "Extended Copy Information Available" },
{ 0x00, 0x21, "Atomic Command Aborted Due To ACA" },
{ 0x01, 0x00, "No Index/Sector Signal" },
{ 0x02, 0x00, "No Seek Complete" },
{ 0x03, 0x00, "Peripheral Device Write Fault" },
{ 0x03, 0x01, "No Write Current" },
{ 0x03, 0x02, "Excessive Write Errors" },
{ 0x04, 0x00, "Logical Unit Not Ready, Cause Not Reportable" },
{ 0x04, 0x01, "Logical Unit Is In Process Of Becoming Ready" },
{ 0x04, 0x02, "Logical Unit Not Ready, Initializing Command Required" },
{ 0x04, 0x03, "Logical Unit Not Ready, Manual Intervention Required" },
{ 0x04, 0x04, "Logical Unit Not Ready, Format In Progress" },
{ 0x04, 0x05, "Logical Unit Not Ready, Rebuild In Progress" },
{ 0x04, 0x06, "Logical Unit Not Ready, Recalculation In Progress" },
{ 0x04, 0x07, "Logical Unit Not Ready, Operation In Progress" },
{ 0x04, 0x08, "Logical Unit Not Ready, Long Write In Progress" },
{ 0x04, 0x09, "Logical Unit Not Ready, Self-Test In Progress" },
{ 0x04, 0x0A, "Logical Unit Not Accessible, Asymmetric Access State "
                                                                "Transition" },
{ 0x04, 0x0B, "Logical Unit Not Accessible, Target Port In Standby State" },
{ 0x04, 0x0C, "Logical Unit Not Accessible, Target Port In Unavailable State" },
{ 0x04, 0x0D, "Logical Unit Not Ready, Structure Check Required" },
{ 0x04, 0x0E, "Logical Unit Not Ready, Security Session In Progress" },
{ 0x04, 0x10, "Logical Unit Not Ready, Auxiliary Memory Not Accessible" },
{ 0x04, 0x11, "Logical Unit Not Ready, Notify (Enable Spinup) Required" },
{ 0x04, 0x12, "Logical Unit Not Ready, Offline" },
{ 0x04, 0x13, "Logical Unit Not Ready, SA Creation In Progress" },
{ 0x04, 0x14, "Logical Unit Not Ready, Space Allocation In Progress" },
{ 0x04, 0x15, "Logical Unit Not Ready, Robotics Disabled" },
{ 0x04, 0x16, "Logical Unit Not Ready, Configuration Required" },
{ 0x04, 0x17, "Logical Unit Not Ready, Calibration Required" },
{ 0x04, 0x18, "Logical Unit Not Ready, A Door Is Open" },
{ 0x04, 0x19, "Logical Unit Not Ready, Operating In Sequential Mode" },
{ 0x04, 0x1A, "Logical Unit Not Ready, Start Stop Unit Command In Progress" },
{ 0x04, 0x1B, "Logical Unit Not Ready, Sanitize In Progress" },
{ 0x04, 0x1C, "Logical Unit Not Ready, Additional Power Use Not Yet Granted" },
{ 0x04, 0x1D, "Logical Unit Not Ready, Configuration In Progress" },
{ 0x04, 0x1E, "Logical Unit Not Ready, Microcode Activation Required" },
{ 0x04, 0x1F, "Logical Unit Not Ready, Microcode Download Required" },
{ 0x04, 0x20, "Logical Unit Not Ready, Logical Unit Reset Required" },
{ 0x04, 0x21, "Logical Unit Not Ready, Hard Reset Required" },
{ 0x04, 0x22, "Logical Unit Not Ready, Power Cycle Required" },
{ 0x04, 0x23, "Logical Unit Not Ready, Affiliation Required" },
{ 0x05, 0x00, "Logical Unit Does Not Respond To Selection" },
{ 0x06, 0x00, "No Reference Position Found" },
{ 0x07, 0x00, "Multiple Peripheral Devices Selected" },
{ 0x08, 0x00, "Logical Unit Communication Failure" },
{ 0x08, 0x01, "Logical Unit Communication Timeout" },
{ 0x08, 0x02, "Logical Unit Communication Parity Error" },
{ 0x08, 0x03, "Logical Unit Communication CRC Error (Ultra-Dma/32)" },
{ 0x08, 0x04, "Unreachable Copy Target" },
{ 0x09, 0x00, "Track Following Error" },
{ 0x09, 0x01, "Tracking Servo Failure" },
{ 0x09, 0x02, "Focus Servo Failure" },
{ 0x09, 0x03, "Spindle Servo Failure" },
{ 0x09, 0x04, "Head Select Fault" },
{ 0x09, 0x05, "Vibration Induced Tracking Error" },
{ 0x0A, 0x00, "Error Log Overflow" },
{ 0x0B, 0x00, "Warning" },
{ 0x0B, 0x01, "Warning - Specified Temperature Exceeded" },
{ 0x0B, 0x02, "Warning - Enclosure Degraded" },
{ 0x0B, 0x03, "Warning - Background Self-Test Failed" },
{ 0x0B, 0x04, "Warning - Background Pre-Scan Detected Medium Error" },
{ 0x0B, 0x05, "Warning - Background Medium Scan Detected Medium Error" },
{ 0x0B, 0x06, "Warning - Non-Volatile Cache Now Volatile" },
{ 0x0B, 0x07, "Warning - Degraded Power To Non-Volatile Cache" },
{ 0x0B, 0x08, "Warning - Power Loss Expected" },
{ 0x0B, 0x09, "Warning - Device Statistics Notification Active" },
{ 0x0B, 0x0A, "Warning - High Critical Temperature Limit Exceeded" },
{ 0x0B, 0x0B, "Warning - Low Critical Temperature Limit Exceeded" },
{ 0x0B, 0x0C, "Warning - High Operating Temperature Limit Exceeded" },
{ 0x0B, 0x0D, "Warning - Low Operating Temperature Limit Exceeded" },
{ 0x0B, 0x0E, "Warning - High Critical Humidity Limit Exceeded" },
{ 0x0B, 0x0F, "Warning - Low Critical Humidity Limit Exceeded" },
{ 0x0B, 0x10, "Warning - High Operating Humidity Limit Exceeded" },
{ 0x0B, 0x11, "Warning - Low Operating Humidity Limit Exceeded" },
{ 0x0C, 0x00, "Write Error" },
{ 0x0C, 0x01, "Write Error - Recovered With Auto Reallocation" },
{ 0x0C, 0x02, "Write Error - Auto Reallocation Failed" },
{ 0x0C, 0x03, "Write Error - Recommend Reassignment" },
{ 0x0C, 0x04, "Compression Check Miscompare Error" },
{ 0x0C, 0x05, "Data Expansion Occurred During Compression" },
{ 0x0C, 0x06, "Block Not Compressible" },
{ 0x0C, 0x07, "Write Error - Recovery Needed" },
{ 0x0C, 0x08, "Write Error - Recovery Failed" },
{ 0x0C, 0x09, "Write Error - Loss Of Streaming" },
{ 0x0C, 0x0A, "Write Error - Padding Blocks Added" },
{ 0x0C, 0x0B, "Auxiliary Memory Write Error" },
{ 0x0C, 0x0C, "Write Error - Unexpected Unsolicited Data" },
{ 0x0C, 0x0D, "Write Error - Not Enough Unsolicited Data" },
{ 0x0C, 0x0E, "Multiple Write Errors" },
{ 0x0C, 0x0F, "Defects In Error Window" },
{ 0x0C, 0x10, "Incomplete Multiple Atomic Write Operations" },
{ 0x0C, 0x11, "Write Error - Recovery Scan Needed" },
{ 0x0C, 0x12, "Write Error - Insufficient Zone Resources" },
{ 0x0D, 0x00, "Error Detected By Third Party Temporary Initiator" },
{ 0x0D, 0x01, "Third Party Device Failure" },
{ 0x0D, 0x02, "Copy Target Device Not Reachable" },
{ 0x0D, 0x03, "Incorrect Copy Target Device Type" },
{ 0x0D, 0x04, "Copy Target Device Data Underrun" },
{ 0x0D, 0x05, "Copy Target Device Data Overrun" },
{ 0x0E, 0x00, "Invalid Information Unit" },
{ 0x0E, 0x01, "Information Unit Too Short" },
{ 0x0E, 0x02, "Information Unit Too Long" },
{ 0x0E, 0x03, "Invalid Field In Command Information Unit" },
{ 0x10, 0x00, "Id CRC Or ECC Error" },
{ 0x10, 0x01, "Logical Block Guard Check Failed" },
{ 0x10, 0x02, "Logical Block Application Tag Check Failed" },
{ 0x10, 0x03, "Logical Block Reference Tag Check Failed" },
{ 0x10, 0x04, "Logical Block Protection Error On Recover Buffered Data" },
{ 0x10, 0x05, "Logical Block Protection Method Error" },
{ 0x11, 0x00, "Unrecovered Read Error" },
{ 0x11, 0x01, "Read Retries Exhausted" },
{ 0x11, 0x02, "Error Too Long To Correct" },
{ 0x11, 0x03, "Multiple Read Errors" },
{ 0x11, 0x04, "Unrecovered Read Error - Auto Reallocate Failed" },
{ 0x11, 0x05, "L-EC Uncorrectable Error" },
{ 0x11, 0x06, "CIRC Unrecovered Error" },
{ 0x11, 0x07, "Data Re-synchronization Error" },
{ 0x11, 0x08, "Incomplete Block Read" },
{ 0x11, 0x09, "No Gap Found" },
{ 0x11, 0x0A, "Miscorrected Error" },
{ 0x11, 0x0B, "Unrecovered Read Error - Recommend Reassignment" },
{ 0x11, 0x0C, "Unrecovered Read Error - Recommend Rewrite The Data" },
{ 0x11, 0x0D, "De-Compression CRC Error" },
{ 0x11, 0x0E, "Cannot Decompress Using Declared Algorithm" },
{ 0x11, 0x0F, "Error Reading UPC/EAN Number" },
{ 0x11, 0x10, "Error Reading ISRC Number" },
{ 0x11, 0x11, "Read Error - Loss Of Streaming" },
{ 0x11, 0x12, "Auxiliary Memory Read Error" },
{ 0x11, 0x13, "Read Error - Failed Retransmission Request" },
{ 0x11, 0x14, "Read Error - Lba Marked Bad By Application Client" },
{ 0x11, 0x15, "Write After Sanitize Required" },
{ 0x12, 0x00, "Address Mark Not Found For ID Field" },
{ 0x13, 0x00, "Address Mark Not Found For DATA Field" },
{ 0x14, 0x00, "Recorded Entity Not Found" },
{ 0x14, 0x01, "Record Not Found" },
{ 0x14, 0x02, "Filemark or Setmark Not Found" },
{ 0x14, 0x03, "End-Of-Data Not Found" },
{ 0x14, 0x04, "Block Sequence Error" },
{ 0x14, 0x05, "Record Not Found - Recommend Reassignment" },
{ 0x14, 0x06, "Record Not Found - Data Auto-Reallocated" },
{ 0x14, 0x07, "Locate Operation Failure" },
{ 0x15, 0x00, "Random Positioning Error" },
{ 0x15, 0x01, "Mechanical Positioning Error" },
{ 0x15, 0x02, "Positioning Error Detected By Read of Medium" },
{ 0x16, 0x00, "Data Synchronization Mark Error" },
{ 0x16, 0x01, "Data Sync Error - Data Rewritten" },
{ 0x16, 0x02, "Data Sync Error - Recommend Rewrite" },
{ 0x16, 0x03, "Data Sync Error - Data Auto-Reallocated" },
{ 0x16, 0x04, "Data Sync Error - Recommend Reassignment" },
{ 0x17, 0x00, "Recovered Data With No Error Correction Applied" },
{ 0x17, 0x01, "Recovered Data With Retries" },
{ 0x17, 0x02, "Recovered Data With Positive Head Offset" },
{ 0x17, 0x03, "Recovered Data With Negative Head Offset" },
{ 0x17, 0x04, "Recovered Data With Retries and/or CIRC Applied" },
{ 0x17, 0x05, "Recovered Data Using Previous Sector ID" },
{ 0x17, 0x06, "Recovered Data Without ECC - Data Auto-Reallocated" },
{ 0x17, 0x07, "Recovered Data Without ECC - Recommend Reassignment" },
{ 0x17, 0x08, "Recovered Data Without ECC - Recommend Rewrite" },
{ 0x17, 0x09, "Recovered Data Without ECC - Data Rewritten" },
{ 0x18, 0x00, "Recovered Data With Error Correction Applied" },
{ 0x18, 0x01, "Recovered Data With Error Corr. & Retries Applied" },
{ 0x18, 0x02, "Recovered Data - Data Auto-Reallocated" },
{ 0x18, 0x03, "Recovered Data With CIRC" },
{ 0x18, 0x04, "Recovered Data With LEC" },
{ 0x18, 0x05, "Recovered Data - Recommend Reassignment" },
{ 0x18, 0x06, "Recovered Data - Recommend Rewrite" },
{ 0x18, 0x07, "Recovered Data With ECC - Data Rewritten" },
{ 0x18, 0x08, "Recovered Data With Linking" },
{ 0x19, 0x00, "Defect List Error" },
{ 0x19, 0x01, "Defect List Not Available" },
{ 0x19, 0x02, "Defect List Error In Primary List" },
{ 0x19, 0x03, "Defect List Error In Grown List" },
{ 0x1A, 0x00, "Parameter List Length Error" },
{ 0x1B, 0x00, "Synchronous Data Transfer Error" },
{ 0x1C, 0x00, "Defect List Not Found" },
{ 0x1C, 0x01, "Primary Defect List Not Found" },
{ 0x1C, 0x02, "Grown Defect List Not Found" },
{ 0x1D, 0x00, "Miscompare During Verify Operation" },
{ 0x1D, 0x01, "Miscompare Verify Of Unmapped Lba" },
{ 0x1E, 0x00, "Recovered ID With ECC Correction" },
{ 0x1F, 0x00, "Partial Defect List Transfer" },
{ 0x20, 0x00, "Invalid Command Operation Code" },
{ 0x20, 0x01, "Access Denied - Initiator Pending-Enrolled" },
{ 0x20, 0x02, "Access Denied - No Access Rights" },
{ 0x20, 0x03, "Access Denied - Invalid Mgmt ID Key" },
{ 0x20, 0x04, "Illegal Command While In Write Capable State" },
{ 0x20, 0x05, "Obsolete" },
{ 0x20, 0x06, "Illegal Command While In Explicit Address Mode" },
{ 0x20, 0x07, "Illegal Command While In Implicit Address Mode" },
{ 0x20, 0x08, "Access Denied - Enrollment Conflict" },
{ 0x20, 0x09, "Access Denied - Invalid LU Identifer" },
{ 0x20, 0x0A, "Access Denied - Invalid Proxy Token" },
{ 0x20, 0x0B, "Access Denied - ACL LUN Conflict" },
{ 0x20, 0x0C, "Illegal Command When Not In Append-Only Mode" },
{ 0x20, 0x0D, "Not An Administrative Logical Unit" },
{ 0x20, 0x0E, "Not A Subsidiary Logical Unit" },
{ 0x20, 0x0F, "Not A Conglomerate Logical Unit" },
{ 0x21, 0x00, "Logical Block Address Out Of Range" },
{ 0x21, 0x01, "Invalid Element Address" },
{ 0x21, 0x02, "Invalid Address For Write" },
{ 0x21, 0x03, "Invalid Write Crossing Layer Jump" },
{ 0x21, 0x04, "Unaligned Write Command" },
{ 0x21, 0x05, "Write Boundary Violation" },
{ 0x21, 0x06, "Attempt To Read Invalid Data" },
{ 0x21, 0x07, "Read Boundary Violation" },
{ 0x21, 0x08, "Misaligned Write Command" },
{ 0x22, 0x00, "Illegal Function (Use 20 00, 24 00, Or 26 00)" },
{ 0x23, 0x00, "Invalid Token Operation, Cause Not Reportable" },
{ 0x23, 0x01, "Invalid Token Operation, Unsupported Token Type" },
{ 0x23, 0x02, "Invalid Token Operation, Remote Token Usage Not Supported" },
{ 0x23, 0x03, "Invalid Token Operation, Remote Rod Token Creation Not Supported" },
{ 0x23, 0x04, "Invalid Token Operation, Token Unknown" },
{ 0x23, 0x05, "Invalid Token Operation, Token Corrupt" },
{ 0x23, 0x06, "Invalid Token Operation, Token Revoked" },
{ 0x23, 0x07, "Invalid Token Operation, Token Expired" },
{ 0x23, 0x08, "Invalid Token Operation, Token Cancelled" },
{ 0x23, 0x09, "Invalid Token Operation, Token Deleted" },
{ 0x23, 0x0A, "Invalid Token Operation, Invalid Token Length" },
{ 0x24, 0x00, "Invalid Field In CDB" },
{ 0x24, 0x01, "CDB Decryption Error" },
{ 0x24, 0x02, "Obsolete" },
{ 0x24, 0x03, "Obsolete" },
{ 0x24, 0x04, "Security Audit Value Frozen" },
{ 0x24, 0x05, "Security Working Key Frozen" },
{ 0x24, 0x06, "Nonce Not Unique" },
{ 0x24, 0x07, "Nonce Timestamp Out Of Range" },
{ 0x24, 0x08, "Invalid XCDB" },
{ 0x24, 0x09, "Invalid Fast Format" },
{ 0x25, 0x00, "Logical Unit Not Supported" },
{ 0x26, 0x00, "Invalid Field In Parameter List" },
{ 0x26, 0x01, "Parameter Not Supported" },
{ 0x26, 0x02, "Parameter Value Invalid" },
{ 0x26, 0x03, "Threshold Parameters Not Supported" },
{ 0x26, 0x04, "Invalid Release Of Persistent Reservation" },
{ 0x26, 0x05, "Data Decryption Error" },
{ 0x26, 0x06, "Too Many Target Descriptors" },
{ 0x26, 0x07, "Unsupported Target Descriptor Type Code" },
{ 0x26, 0x08, "Too Many Segment Descriptors" },
{ 0x26, 0x09, "Unsupported Segment Descriptor Type Code" },
{ 0x26, 0x0A, "Unexpected Inexact Segment" },
{ 0x26, 0x0B, "Inline Data Length Exceeded" },
{ 0x26, 0x0C, "Invalid Operation For Copy Source Or Destination" },
{ 0x26, 0x0D, "Copy Segment Granularity Violation" },
{ 0x26, 0x0E, "Invalid Parameter While Port Is Enabled" },
{ 0x26, 0x0F, "Invalid Data-Out Buffer Integrity Check Value" },
{ 0x26, 0x10, "Data Decryption Key Fail Limit Reached" },
{ 0x26, 0x11, "Incomplete Key-Associated Data Set" },
{ 0x26, 0x12, "Vendor Specific Key Reference Not Found" },
{ 0x26, 0x13, "Application Tag Mode Page Is Invalid" },
{ 0x26, 0x14, "Tape Stream Mirroring Prevented" },
{ 0x26, 0x15, "Copy Source Or Copy Destination Not Authorized" },
{ 0x27, 0x00, "Write Protected" },
{ 0x27, 0x01, "Hardware Write Protected" },
{ 0x27, 0x02, "Logical Unit Software Write Protected" },
{ 0x27, 0x03, "Associated Write Protect" },
{ 0x27, 0x04, "Persistent Write Protect" },
{ 0x27, 0x05, "Permanent Write Protect" },
{ 0x27, 0x06, "Conditional Write Protect" },
{ 0x27, 0x07, "Space Allocation Failed Write Protect" },
{ 0x27, 0x08, "Zone Is Read Only" },
{ 0x28, 0x00, "Not Ready To Ready Change, Medium May Have Changed" },
{ 0x28, 0x01, "Import Or Export Element Accessed" },
{ 0x28, 0x02, "Format-Layer May Have Changed" },
{ 0x28, 0x03, "Import/Export Element Accessed, Medium Changed" },
{ 0x29, 0x00, "Power On, Reset, Or Bus Device Reset Occurred" },
{ 0x29, 0x01, "Power On Occurred" },
{ 0x29, 0x02, "SCSI Bus Reset Occurred" },
{ 0x29, 0x03, "Bus Device Reset Function Occurred" },
{ 0x29, 0x04, "Device Internal Reset" },
{ 0x29, 0x05, "Transceiver Mode Changed To Single-Ended" },
{ 0x29, 0x06, "Transceiver Mode Changed To LVD" },
{ 0x29, 0x07, "I_T Nexus Loss Occurred" },
{ 0x2A, 0x00, "Parameters Changed" },
{ 0x2A, 0x01, "Mode Parameters Changed" },
{ 0x2A, 0x02, "Log Parameters Changed" },
{ 0x2A, 0x03, "Reservations Preempted" },
{ 0x2A, 0x04, "Reservations Released" },
{ 0x2A, 0x05, "Registrations Preempted" },
{ 0x2A, 0x06, "Asymmetric Access State Changed" },
{ 0x2A, 0x07, "Implicit Asymmetric Access State Transition Failed" },
{ 0x2A, 0x08, "Priority Changed" },
{ 0x2A, 0x09, "Capacity Data Has Changed" },
{ 0x2A, 0x0A, "Error History I_t Nexus Cleared" },
{ 0x2A, 0x0B, "Error History Snapshot Released" },
{ 0x2A, 0x0C, "Error Recovery Attributes Have Changed" },
{ 0x2A, 0x0D, "Data Encryption Capabilities Changed" },
{ 0x2A, 0x10, "Timestamp Changed" },
{ 0x2A, 0x11, "Data Encryption Parameters Changed By Another I_t Nexus" },
{ 0x2A, 0x12, "Data Encryption Parameters Changed By Vendor Specific Event" },
{ 0x2A, 0x13, "Data Encryption Key Instance Counter Has Changed" },
{ 0x2A, 0x14, "SA Creation Capabilities Data Has Changed" },
{ 0x2A, 0x15, "Medium Removal Prevention Preempted" },
{ 0x2A, 0x16, "Zone Reset Write Pointer Recommended" },
{ 0x2B, 0x00, "Copy Cannot Execute Since Host Cannot Disconnect" },
{ 0x2C, 0x00, "Command Sequence Error" },
{ 0x2C, 0x01, "Too Many Windows Specified" },
{ 0x2C, 0x02, "Invalid Combination Of Windows Specified" },
{ 0x2C, 0x03, "Current Program Area Is Not Empty" },
{ 0x2C, 0x04, "Current Program Area Is Empty" },
{ 0x2C, 0x05, "Illegal Power Condition Request" },
{ 0x2C, 0x06, "Persistent Prevent Conflict" },
{ 0x2C, 0x07, "Previous Busy Status" },
{ 0x2C, 0x08, "Previous Task Set Full Status" },
{ 0x2C, 0x09, "Previous Reservation Conflict Status" },
{ 0x2C, 0x0A, "Partition Or Collection Contains User Objects" },
{ 0x2C, 0x0B, "Not Reserved" },
{ 0x2C, 0x0C, "Orwrite Generation Does Not Match" },
{ 0x2C, 0x0D, "Reset Write Pointer Not Allowed" },
{ 0x2C, 0x0E, "Zone Is Offline" },
{ 0x2C, 0x0F, "Stream Not Open" },
{ 0x2C, 0x10, "Unwritten Data In Zone" },
{ 0x2C, 0x11, "Descriptor Format Sense Data Required" },
{ 0x2D, 0x00, "Overwrite Error On Update In Place" },
{ 0x2E, 0x00, "Insufficient Time For Operation" },
{ 0x2E, 0x01, "Command Timeout Before Processing" },
{ 0x2E, 0x02, "Command Timeout During Processing" },
{ 0x2E, 0x03, "Command Timeout During Processing Due To Error Recovery" },
{ 0x2F, 0x00, "Commands Cleared By Another Initiator" },
{ 0x2F, 0x01, "Commands Cleared By Power Loss Notification" },
{ 0x2F, 0x02, "Commands Cleared By Device Server" },
{ 0x2F, 0x03, "Some Commands Cleared By Queuing Layer Event" },
{ 0x30, 0x00, "Incompatible Medium Installed" },
{ 0x30, 0x01, "Cannot Read Medium - Unknown Format" },
{ 0x30, 0x02, "Cannot Read Medium - Incompatible Format" },
{ 0x30, 0x03, "Cleaning Cartridge Installed" },
{ 0x30, 0x04, "Cannot Write Medium - Unknown Format" },
{ 0x30, 0x05, "Cannot Write Medium - Incompatible Format" },
{ 0x30, 0x06, "Cannot Format Medium - Incompatible Medium" },
{ 0x30, 0x07, "Cleaning Failure" },
{ 0x30, 0x08, "Cannot Write - Application Code Mismatch" },
{ 0x30, 0x09, "Current Session Not Fixated For Append" },
{ 0x30, 0x0A, "Cleaning Request Rejected" },
{ 0x30, 0x0C, "Worm Medium - Overwrite Attempted" },
{ 0x30, 0x0D, "Worm Medium - Integrity Check" },
{ 0x30, 0x10, "Medium Not Formatted" },
{ 0x30, 0x11, "Incompatible Volume Type" },
{ 0x30, 0x12, "Incompatible Volume Qualifier" },
{ 0x30, 0x13, "Cleaning Volume Expired" },
{ 0x31, 0x00, "Medium Format Corrupted" },
{ 0x31, 0x01, "Format Command Failed" },
{ 0x31, 0x02, "Zoned Formatting Failed Due To Spare Linking" },
{ 0x31, 0x03, "Sanitize Command Failed" },
{ 0x32, 0x00, "No Defect Spare Location Available" },
{ 0x32, 0x01, "Defect List Update Failure" },
{ 0x33, 0x00, "Tape Length Error" },
{ 0x34, 0x00, "Enclosure Failure" },
{ 0x35, 0x00, "Enclosure Services Failure" },
{ 0x35, 0x01, "Unsupported Enclosure Function" },
{ 0x35, 0x02, "Enclosure Services Unavailable" },
{ 0x35, 0x03, "Enclosure Services Transfer Failure" },
{ 0x35, 0x04, "Enclosure Services Transfer Refused" },
{ 0x35, 0x05, "Enclosure Services Checksum Error" },
{ 0x36, 0x00, "Ribbon, Ink, Or Toner Failure" },
{ 0x37, 0x00, "Rounded Parameter" },
{ 0x38, 0x00, "Event Status Notification" },
{ 0x38, 0x02, "ESN - Power Management Class Event" },
{ 0x38, 0x04, "ESN - Media Class Event" },
{ 0x38, 0x06, "ESN - Device Busy Class Event" },
{ 0x38, 0x07, "Thin Provisioning Soft Threshold Reached" },
{ 0x39, 0x00, "Saving Parameters Not Supported" },
{ 0x3A, 0x00, "Medium Not Present" },
{ 0x3A, 0x01, "Medium Not Present - Tray Closed" },
{ 0x3A, 0x02, "Medium Not Present - Tray Open" },
{ 0x3A, 0x03, "Medium Not Present - Loadable" },
{ 0x3A, 0x04, "Medium Not Present - Medium Auxiliary Memory Accessible" },
{ 0x3B, 0x00, "Sequential Positioning Error" },
{ 0x3B, 0x01, "Tape Position Error At Beginning-Of-Medium" },
{ 0x3B, 0x02, "Tape Position Error At End-Of-Medium" },
{ 0x3B, 0x03, "Tape Or Electronic Vertical Forms Unit Not Ready" },
{ 0x3B, 0x04, "Slew Failure" },
{ 0x3B, 0x05, "Paper Jam" },
{ 0x3B, 0x06, "Failed To Sense Top-Of-Form" },
{ 0x3B, 0x07, "Failed To Sense Bottom-Of-Form" },
{ 0x3B, 0x08, "Reposition Error" },
{ 0x3B, 0x09, "Read Past End Of Medium" },
{ 0x3B, 0x0A, "Read Past Beginning Of Medium" },
{ 0x3B, 0x0B, "Position Past End Of Medium" },
{ 0x3B, 0x0C, "Position Past Beginning Of Medium" },
{ 0x3B, 0x0D, "Medium Destination Element Full" },
{ 0x3B, 0x0E, "Medium Source Element Empty" },
{ 0x3B, 0x0F, "End Of Medium Reached" },
{ 0x3B, 0x11, "Medium Magazine Not Accessible" },
{ 0x3B, 0x12, "Medium Magazine Removed" },
{ 0x3B, 0x13, "Medium Magazine Inserted" },
{ 0x3B, 0x14, "Medium Magazine Locked" },
{ 0x3B, 0x15, "Medium Magazine Unlocked" },
{ 0x3B, 0x16, "Mechanical Positioning Or Changer Error" },
{ 0x3B, 0x17, "Read Past End Of User Object" },
{ 0x3B, 0x18, "Element Disabled" },
{ 0x3B, 0x19, "Element Enabled" },
{ 0x3B, 0x1A, "Data Transfer Device Removed" },
{ 0x3B, 0x1B, "Data Transfer Device Inserted" },
{ 0x3B, 0x1C, "Too Many Logical Objects On Partition To Support Operation" },
{ 0x3D, 0x00, "Invalid Bits In IDENTIFY Message" },
{ 0x3E, 0x00, "Logical Unit Has Not Self-Configured Yet" },
{ 0x3E, 0x01, "Logical Unit Failure" },
{ 0x3E, 0x02, "Timeout On Logical Unit" },
{ 0x3E, 0x03, "Logical Unit Failed Self-Test" },
{ 0x3E, 0x04, "Logical Unit Unable To Update Self-Test Log" },
{ 0x3F, 0x00, "Target Operating Conditions Have Changed" },
{ 0x3F, 0x01, "Microcode Has Been Changed" },
{ 0x3F, 0x02, "Changed Operating Definition" },
{ 0x3F, 0x03, "INQUIRY Data Has Changed" },
{ 0x3F, 0x04, "Component Device Attached" },
{ 0x3F, 0x05, "Device Identifier Changed" },
{ 0x3F, 0x06, "Redundancy Group Created Or Modified" },
{ 0x3F, 0x07, "Redundancy Group Deleted" },
{ 0x3F, 0x08, "Spare Created Or Modified" },
{ 0x3F, 0x09, "Spare Deleted" },
{ 0x3F, 0x0A, "Volume Set Created Or Modified" },
{ 0x3F, 0x0B, "Volume Set Deleted" },
{ 0x3F, 0x0C, "Volume Set Deassigned" },
{ 0x3F, 0x0D, "Volume Set Reassigned" },
{ 0x3F, 0x0E, "Reported LUNs Data Has Changed" },
{ 0x3F, 0x0F, "Echo Buffer Overwritten" },
{ 0x3F, 0x10, "Medium Loadable" },
{ 0x3F, 0x11, "Medium Auxiliary Memory Accessible" },
{ 0x3F, 0x12, "Iscsi Ip Address Added" },
{ 0x3F, 0x13, "Iscsi Ip Address Removed" },
{ 0x3F, 0x14, "Iscsi Ip Address Changed" },
{ 0x3F, 0x15, "Inspect Referrals Sense Descriptors" },
{ 0x3F, 0x16, "Microcode Has Been Changed Without Reset" },
{ 0x3F, 0x17, "Zone Transition To Full" },
{ 0x3F, 0x18, "Bind Completed" },
{ 0x3F, 0x19, "Bind Redirected" },
{ 0x3F, 0x1A, "Subsidiary Binding Changed" },
{ 0x40, 0x00, "Ram Failure (Should Use 40 NN)" },
{ 0x40, 0x00, "Diagnostic Failure On Component NN (80h-FFh)" },
{ 0x41, 0x00, "Data Path Failure (Should Use 40 NN)" },
{ 0x42, 0x00, "Power-On Or Self-Test Failure (Should Use 40 NN)" },
{ 0x43, 0x00, "Message Error" },
{ 0x44, 0x00, "Internal Target Failure" },
{ 0x44, 0x01, "Persistent Reservation Information Lost" },
{ 0x44, 0x71, "Ata Device Failed Set Features" },
{ 0x45, 0x00, "Select Or Reselect Failure" },
{ 0x46, 0x00, "Unsuccessful Soft Reset" },
{ 0x47, 0x00, "SCSI Parity Error" },
{ 0x47, 0x01, "Data Phase CRC Error Detected" },
{ 0x47, 0x02, "SCSI Parity Error Detected During ST Data Phase" },
{ 0x47, 0x03, "Information Unit iuCRC Error Detected" },
{ 0x47, 0x04, "Asynchronous Information Protection Error Detected" },
{ 0x47, 0x05, "Protocol Service Crc Error" },
{ 0x47, 0x06, "PHY Test Function In Progress" },
{ 0x47, 0x7F, "Some Commands Cleared By Iscsi Protocol Event" },
{ 0x48, 0x00, "Initiator Detected Error Message Received" },
{ 0x49, 0x00, "Invalid Message Error" },
{ 0x4A, 0x00, "Command Phase Error" },
{ 0x4B, 0x00, "Data Phase Error" },
{ 0x4B, 0x01, "Illegal Target Port Transfer Tag Received" },
{ 0x4B, 0x02, "Too Much Write Data" },
{ 0x4B, 0x03, "ACK/NAK Timeout" },
{ 0x4B, 0x04, "NAK Reveived" },
{ 0x4B, 0x05, "Data Offset Error" },
{ 0x4B, 0x06, "Initiator Response Timeout" },
{ 0x4B, 0x07, "Connection Lost" },
{ 0x4B, 0x08, "Data-In Buffer Overflow - Data Buffer Size" },
{ 0x4B, 0x09, "Data-In Buffer Overflow - Data Buffer Descriptor Area" },
{ 0x4B, 0x0A, "Data-In Buffer Error" },
{ 0x4B, 0x0B, "Data-Out Buffer Overflow - Data Buffer Size" },
{ 0x4B, 0x0C, "Data-Out Buffer Overflow - Data Buffer Descriptor Area" },
{ 0x4B, 0x0D, "Data-Out Buffer Error" },
{ 0x4B, 0x0E, "PCIe Fabric Error" },
{ 0x4B, 0x0F, "PCIe Completion Timeout" },
{ 0x4B, 0x10, "PCIe Completer Abort" },
{ 0x4B, 0x11, "PCIe Poisoned Tlp Received" },
{ 0x4B, 0x12, "PCIe Ecrc Check Failed" },
{ 0x4B, 0x13, "PCIe Unsupported Request" },
{ 0x4B, 0x14, "PCIe ACS Violation" },
{ 0x4B, 0x15, "PCIe TLP Prefix Blocked" },
{ 0x4C, 0x00, "Logical Unit Failed Self-Configuration" },
{ 0x4D, 0x00, "Tagged Overlapped Commands (NN = Task Tag)" },
{ 0x4E, 0x00, "Overlapped Commands Attempted" },
{ 0x50, 0x00, "Write Append Error" },
{ 0x50, 0x01, "Write Append Position Error" },
{ 0x50, 0x02, "Position Error Related To Timing" },
{ 0x51, 0x00, "Erase Failure" },
{ 0x51, 0x01, "Erase Failure - Incomplete Erase Operation Detected" },
{ 0x52, 0x00, "Cartridge Fault" },
{ 0x53, 0x00, "Media Load or Eject Failed" },
{ 0x53, 0x01, "Unload Tape Failure" },
{ 0x53, 0x02, "Medium Removal Prevented" },
{ 0x53, 0x03, "Medium Removal Prevented By Data Transfer Element" },
{ 0x53, 0x04, "Medium Thread Or Unthread Failure" },
{ 0x53, 0x05, "Volume Identifier Invalid" },
{ 0x53, 0x06, "Volume Identifier Missing" },
{ 0x53, 0x07, "Duplicate Volume Identifier" },
{ 0x53, 0x08, "Element Status Unknown" },
{ 0x53, 0x09, "Data Transfer Device Error - Load Failed" },
{ 0x53, 0x0A, "Data Transfer Device Error - Unload Failed" },
{ 0x53, 0x0B, "Data Transfer Device Error - Unload Missing" },
{ 0x53, 0x0C, "Data Transfer Device Error - Eject Failed" },
{ 0x53, 0x0D, "Data Transfer Device Error - Library Communication Failed" },
{ 0x54, 0x00, "SCSI To Host System Interface Failure" },
{ 0x55, 0x00, "System Resource Failure" },
{ 0x55, 0x01, "System Buffer Full" },
{ 0x55, 0x02, "Insufficient Reservation Resources" },
{ 0x55, 0x03, "Insufficient Resources" },
{ 0x55, 0x04, "Insufficient Registration Resources" },
{ 0x55, 0x05, "Insufficient Access Control Resources" },
{ 0x55, 0x06, "Auxiliary Memory Out Of Space" },
{ 0x55, 0x07, "Quota Error" },
{ 0x55, 0x08, "Maximum Number Of Supplemental Decryption Keys Exceeded" },
{ 0x55, 0x09, "Medium Auxiliary Memory Not Accessible" },
{ 0x55, 0x0A, "Data Currently Unavailable" },
{ 0x55, 0x0B, "Insufficient Power For Operation" },
{ 0x55, 0x0C, "Insufficient Resources To Create Rod" },
{ 0x55, 0x0D, "Insufficient Resources To Create Rod Token" },
{ 0x55, 0x0E, "Insufficient Zone Resources" },
{ 0x55, 0x0F, "Insufficient Zone Resources To Complete Write" },
{ 0x55, 0x10, "Maximum Number Of Streams Open" },
{ 0x55, 0x11, "Insufficient Resources To Bind" },
{ 0x57, 0x00, "Unable To Recover Table-Of-Contents" },
{ 0x58, 0x00, "Generation Does Not Exist" },
{ 0x59, 0x00, "Updated Block Read" },
{ 0x5A, 0x00, "Operator Request Or State Change Input" },
{ 0x5A, 0x01, "Operator Medium Removal Request" },
{ 0x5A, 0x02, "Operator Selected Write Protect" },
{ 0x5A, 0x03, "Operator Selected Write Permit" },
{ 0x5B, 0x00, "Log Exception" },
{ 0x5B, 0x01, "Threshold Condition Met" },
{ 0x5B, 0x02, "Log Counter At Maximum" },
{ 0x5B, 0x03, "Log List Codes Exhausted" },
{ 0x5C, 0x00, "RPL Status Change" },
{ 0x5C, 0x01, "Spindles Synchronized" },
{ 0x5C, 0x02, "Spindles Not Synchronized" },
{ 0x5D, 0x00, "Failure Prediction Threshold Exceeded" },
{ 0x5D, 0x01, "Media Failure Prediction Threshold Exceeded" },
{ 0x5D, 0x02, "Logical Unit Failure Prediction Threshold Exceeded" },
{ 0x5D, 0x03, "Spare Area Exhaustion Prediction Threshold Exceeded" },
{ 0x5D, 0x10, "Hardware Impending Failure General Hard Drive Failure" },
{ 0x5D, 0x11, "Hardware Impending Failure Drive Error Rate Too High" },
{ 0x5D, 0x12, "Hardware Impending Failure Data Error Rate Too High" },
{ 0x5D, 0x13, "Hardware Impending Failure Seek Error Rate Too High" },
{ 0x5D, 0x14, "Hardware Impending Failure Too Many Block Reassigns" },
{ 0x5D, 0x15, "Hardware Impending Failure Access Times Too High" },
{ 0x5D, 0x16, "Hardware Impending Failure Start Unit Times Too High" },
{ 0x5D, 0x17, "Hardware Impending Failure Channel Parametrics" },
{ 0x5D, 0x18, "Hardware Impending Failure Controller Detected" },
{ 0x5D, 0x19, "Hardware Impending Failure Throughput Performance" },
{ 0x5D, 0x1A, "Hardware Impending Failure Seek Time Performance" },
{ 0x5D, 0x1B, "Hardware Impending Failure Spin-Up Retry Count" },
{ 0x5D, 0x1C, "Hardware Impending Failure Drive Calibration Retry Count" },
{ 0x5D, 0x20, "Controller Impending Failure General Hard Drive Failure" },
{ 0x5D, 0x21, "Controller Impending Failure Drive Error Rate Too High" },
{ 0x5D, 0x22, "Controller Impending Failure Data Error Rate Too High" },
{ 0x5D, 0x23, "Controller Impending Failure Seek Error Rate Too High" },
{ 0x5D, 0x24, "Controller Impending Failure Too Many Block Reassigns" },
{ 0x5D, 0x25, "Controller Impending Failure Access Times Too High" },
{ 0x5D, 0x26, "Controller Impending Failure Start Unit Times Too High" },
{ 0x5D, 0x27, "Controller Impending Failure Channel Parametrics" },
{ 0x5D, 0x28, "Controller Impending Failure Controller Detected" },
{ 0x5D, 0x29, "Controller Impending Failure Throughput Performance" },
{ 0x5D, 0x2A, "Controller Impending Failure Seek Time Performance" },
{ 0x5D, 0x2B, "Controller Impending Failure Spin-Up Retry Count" },
{ 0x5D, 0x2C, "Controller Impending Failure Drive Calibration Retry Count" },
{ 0x5D, 0x30, "Data Channel Impending Failure General Hard Drive Failure" },
{ 0x5D, 0x31, "Data Channel Impending Failure Drive Error Rate Too High" },
{ 0x5D, 0x32, "Data Channel Impending Failure Data Error Rate Too High" },
{ 0x5D, 0x33, "Data Channel Impending Failure Seek Error Rate Too High" },
{ 0x5D, 0x34, "Data Channel Impending Failure Too Many Block Reassigns" },
{ 0x5D, 0x35, "Data Channel Impending Failure Access Times Too High" },
{ 0x5D, 0x36, "Data Channel Impending Failure Start Unit Times Too High" },
{ 0x5D, 0x37, "Data Channel Impending Failure Channel Parametrics" },
{ 0x5D, 0x38, "Data Channel Impending Failure Controller Detected" },
{ 0x5D, 0x39, "Data Channel Impending Failure Throughput Performance" },
{ 0x5D, 0x3A, "Data Channel Impending Failure Seek Time Performance" },
{ 0x5D, 0x3B, "Data Channel Impending Failure Spin-Up Retry Count" },
{ 0x5D, 0x3C, "Data Channel Impending Failure Drive Calibration Retry Count" },
{ 0x5D, 0x40, "Servo Impending Failure General Hard Drive Failure" },
{ 0x5D, 0x41, "Servo Impending Failure Drive Error Rate Too High" },
{ 0x5D, 0x42, "Servo Impending Failure Data Error Rate Too High" },
{ 0x5D, 0x43, "Servo Impending Failure Seek Error Rate Too High" },
{ 0x5D, 0x44, "Servo Impending Failure Too Many Block Reassigns" },
{ 0x5D, 0x45, "Servo Impending Failure Access Times Too High" },
{ 0x5D, 0x46, "Servo Impending Failure Start Unit Times Too High" },
{ 0x5D, 0x47, "Servo Impending Failure Channel Parametrics" },
{ 0x5D, 0x48, "Servo Impending Failure Controller Detected" },
{ 0x5D, 0x49, "Servo Impending Failure Throughput Performance" },
{ 0x5D, 0x4A, "Servo Impending Failure Seek Time Performance" },
{ 0x5D, 0x4B, "Servo Impending Failure Spin-Up Retry Count" },
{ 0x5D, 0x4C, "Servo Impending Failure Drive Calibration Retry Count" },
{ 0x5D, 0x50, "Spindle Impending Failure General Hard Drive Failure" },
{ 0x5D, 0x51, "Spindle Impending Failure Drive Error Rate Too High" },
{ 0x5D, 0x52, "Spindle Impending Failure Data Error Rate Too High" },
{ 0x5D, 0x53, "Spindle Impending Failure Seek Error Rate Too High" },
{ 0x5D, 0x54, "Spindle Impending Failure Too Many Block Reassigns" },
{ 0x5D, 0x55, "Spindle Impending Failure Access Times Too High" },
{ 0x5D, 0x56, "Spindle Impending Failure Start Unit Times Too High" },
{ 0x5D, 0x57, "Spindle Impending Failure Channel Parametrics" },
{ 0x5D, 0x58, "Spindle Impending Failure Controller Detected" },
{ 0x5D, 0x59, "Spindle Impending Failure Throughput Performance" },
{ 0x5D, 0x5A, "Spindle Impending Failure Seek Time Performance" },
{ 0x5D, 0x5B, "Spindle Impending Failure Spin-Up Retry Count" },
{ 0x5D, 0x5C, "Spindle Impending Failure Drive Calibration Retry Count" },
{ 0x5D, 0x60, "Firmware Impending Failure General Hard Drive Failure" },
{ 0x5D, 0x61, "Firmware Impending Failure Drive Error Rate Too High" },
{ 0x5D, 0x62, "Firmware Impending Failure Data Error Rate Too High" },
{ 0x5D, 0x63, "Firmware Impending Failure Seek Error Rate Too High" },
{ 0x5D, 0x64, "Firmware Impending Failure Too Many Block Reassigns" },
{ 0x5D, 0x65, "Firmware Impending Failure Access Times Too High" },
{ 0x5D, 0x66, "Firmware Impending Failure Start Unit Times Too High" },
{ 0x5D, 0x67, "Firmware Impending Failure Channel Parametrics" },
{ 0x5D, 0x68, "Firmware Impending Failure Controller Detected" },
{ 0x5D, 0x69, "Firmware Impending Failure Throughput Performance" },
{ 0x5D, 0x6A, "Firmware Impending Failure Seek Time Performance" },
{ 0x5D, 0x6B, "Firmware Impending Failure Spin-Up Retry Count" },
{ 0x5D, 0x6C, "Firmware Impending Failure Drive Calibration Retry Count" },
{ 0x5D, 0xFF, "Failure Prediction Threshold Exceeded (False)" },
{ 0x5E, 0x00, "Low Power Condition On" },
{ 0x5E, 0x01, "Idle Condition Activated By Timer" },
{ 0x5E, 0x02, "Standby Condition Activated By Timer" },
{ 0x5E, 0x03, "Idle Condition Activated By Command" },
{ 0x5E, 0x04, "Standby Condition Activated By Command" },
{ 0x5E, 0x05, "IDLE_B Condition Activated By Timer" },
{ 0x5E, 0x06, "IDLE_B Condition Activated By Command" },
{ 0x5E, 0x07, "IDLE_C Condition Activated By Timer" },
{ 0x5E, 0x08, "IDLE_C Condition Activated By Command" },
{ 0x5E, 0x09, "STANDBY_Y Condition Activated By Timer" },
{ 0x5E, 0x0A, "STANDBY_Y Condition Activated By Command" },
{ 0x5E, 0x41, "Power State Change To Active" },
{ 0x5E, 0x42, "Power State Change To Idle" },
{ 0x5E, 0x43, "Power State Change To Standby" },
{ 0x5E, 0x45, "Power State Change To Sleep" },
{ 0x5E, 0x47, "Power State Change To Device Control" },
{ 0x60, 0x00, "Lamp Failure" },
{ 0x61, 0x00, "Video Acquisition Error" },
{ 0x61, 0x01, "Unable To Acquire Video" },
{ 0x61, 0x02, "Out Of Focus" },
{ 0x62, 0x00, "Scan Head Positioning Error" },
{ 0x63, 0x00, "End Of User Area Encountered On This Track" },
{ 0x63, 0x01, "Packet Does Not Fit In Available Space" },
{ 0x64, 0x00, "Illegal Mode For This Track" },
{ 0x64, 0x01, "Invalid Packet Size" },
{ 0x65, 0x00, "Voltage Fault" },
{ 0x66, 0x00, "Automatic Document Feeder Cover Up" },
{ 0x66, 0x01, "Automatic Document Feeder Lift Up" },
{ 0x66, 0x02, "Document Jam In Automatic Document Feeder" },
{ 0x66, 0x03, "Document Miss Feed Automatic In Document Feeder" },
{ 0x67, 0x00, "Configuration Failure" },
{ 0x67, 0x01, "Configuration Of Incapable Logical Units Failed" },
{ 0x67, 0x02, "Add Logical Unit Failed" },
{ 0x67, 0x03, "Modification Of Logical Unit Failed" },
{ 0x67, 0x04, "Exchange Of Logical Unit Failed" },
{ 0x67, 0x05, "Remove Of Logical Unit Failed" },
{ 0x67, 0x06, "Attachment Of Logical Unit Failed" },
{ 0x67, 0x07, "Creation of Logical Unit Failed" },
{ 0x67, 0x08, "Assign Failure Occurred" },
{ 0x67, 0x09, "Multiply Assigned Logical Unit" },
{ 0x67, 0x0A, "Set Target Port Groups Command Failed" },
{ 0x67, 0x0B, "ATA Device Feature Not Enabled" },
{ 0x67, 0x0C, "Command Rejected" },
{ 0x67, 0x0D, "Explicit Bind Not Allowed" },
{ 0x68, 0x00, "Logical Unit Not Configured" },
{ 0x68, 0x01, "Subsidiary Logical Unit Not Configured" },
{ 0x69, 0x00, "Data Loss On Logical Unit" },
{ 0x69, 0x01, "Multiple Logical Unit Failures" },
{ 0x69, 0x02, "Parity/Data Mismatch" },
{ 0x6A, 0x00, "Informational, Refer To Log" },
{ 0x6B, 0x00, "State Change Has Occurred" },
{ 0x6B, 0x01, "Redundancy Level Got Better" },
{ 0x6B, 0x02, "Redundancy Level Got Worse" },
{ 0x6C, 0x00, "Rebuild Failure Occurred" },
{ 0x6D, 0x00, "Recalculate Failure Occurred" },
{ 0x6E, 0x00, "Command To Logical Unit Failed" },
{ 0x6F, 0x00, "Copy Protection Key Exchange Failure - Authentication Failure" },
{ 0x6F, 0x01, "Copy Protection Key Exchange Failure - Key Not Present" },
{ 0x6F, 0x02, "Copy Protection Key Exchange Failure - Key Not Established" },
{ 0x6F, 0x03, "Read Of Scrambled Sector Without Authentication" },
{ 0x6F, 0x04, "Media Region Code Is Mismatched To Logical Unit Region" },
{ 0x6F, 0x05, "Drive Region Must Be Permanent/Region Reset Count Error" },
{ 0x6F, 0x06, "Insufficient Block Count For Binding Nonce Recording" },
{ 0x6F, 0x07, "Conflict In Binding Nonce Recording" },
{ 0x6F, 0x08, "Insufficient Permission" },
{ 0x6F, 0x09, "Invalid Drive-Host Pairing Server" },
{ 0x6F, 0x0A, "Drive-Host Pairing Suspended" },
{ 0x70, 0x00, "Decompression Exception Short Algorithm Id Of NN" },
{ 0x71, 0x00, "Decompression Exception Long Algorithm Id" },
{ 0x72, 0x00, "Session Fixation Error" },
{ 0x72, 0x01, "Session Fixation Error Writing Lead-In" },
{ 0x72, 0x02, "Session Fixation Error Writing Lead-Out" },
{ 0x72, 0x03, "Session Fixation Error - Incomplete Track In Session" },
{ 0x72, 0x04, "Empty Or Partially Written Reserved Track" },
{ 0x72, 0x05, "No More Track Reservations Allowed" },
{ 0x72, 0x06, "RMZ Extension Is Not Allowed" },
{ 0x72, 0x07, "No More Test Zone Extensions Are Allowed" },
{ 0x73, 0x00, "CD Control Error" },
{ 0x73, 0x01, "Power Calibration Area Almost Full" },
{ 0x73, 0x02, "Power Calibration Area Is Full" },
{ 0x73, 0x03, "Power Calibration Area Error" },
{ 0x73, 0x04, "Program Memory Area Update Failure" },
{ 0x73, 0x05, "Program Memory Area Is Full" },
{ 0x73, 0x06, "RMA/PMA Is Almost Full" },
{ 0x73, 0x10, "Current Power Calibration Area Almost Full" },
{ 0x73, 0x11, "Current Power Calibration Area Is Full" },
{ 0x73, 0x17, "RDZ Is Full" },
{ 0x74, 0x00, "Security Error" },
{ 0x74, 0x01, "Unable To Decrypt Data" },
{ 0x74, 0x02, "Unencrypted Data Encountered While Decrypting" },
{ 0x74, 0x03, "Incorrect Data Encryption Key" },
{ 0x74, 0x04, "Cryptographic Integrity Validation Failed" },
{ 0x74, 0x05, "Error Decrypting Data" },
{ 0x74, 0x06, "Unknown Signature Verification Key" },
{ 0x74, 0x07, "Encryption Parameters Not Useable" },
{ 0x74, 0x08, "Digital Signature Validation Failure" },
{ 0x74, 0x09, "Encryption Mode Mismatch On Read" },
{ 0x74, 0x0A, "Encrypted Block Not Raw Read Enabled" },
{ 0x74, 0x0B, "Incorrect Encryption Parameters" },
{ 0x74, 0x0C, "Unable To Decrypt Parameter List" },
{ 0x74, 0x0D, "Encryption Algorithm Disabled" },
{ 0x74, 0x10, "SA Creation Parameter Value Invalid" },
{ 0x74, 0x11, "SA Creation Parameter Value Rejected" },
{ 0x74, 0x12, "Invalid Sa Usage" },
{ 0x74, 0x21, "Data Encryption Configuration Prevented" },
{ 0x74, 0x30, "SA Creation Parameter Not Supported" },
{ 0x74, 0x40, "Authentication Failed" },
{ 0x74, 0x61, "External Data Encryption Key Manager Access Error" },
{ 0x74, 0x62, "External Data Encryption Key Manager Error" },
{ 0x74, 0x63, "External Data Encryption Key Not Found" },
{ 0x74, 0x64, "External Data Encryption Request Not Authorized" },
{ 0x74, 0x6E, "External Data Encryption Control Timeout" },
{ 0x74, 0x6F, "External Data Encryption Control Error" },
{ 0x74, 0x71, "Logical Unit Access Not Authorized" },
{ 0x74, 0x79, "Security Conflict In Translated Device" },
{ 0x00, 0x00, NULL }
};

#ifdef _KERNEL
MODULE(MODULE_CLASS_MISC, scsiverbose, NULL);

static int
scsiverbose_modcmd(modcmd_t cmd, void *arg)
{
        static int   (*saved_print_sense)(struct scsipi_xfer *, int);
        static void  (*saved_print_sense_data)(struct scsi_sense_data *, int);

        switch (cmd) {
        case MODULE_CMD_INIT:
                saved_print_sense = scsipi_print_sense;
                saved_print_sense_data = scsipi_print_sense_data;
                scsipi_print_sense = scsipi_print_sense_real;
                scsipi_print_sense_data = scsipi_print_sense_data_real;
                scsi_verbose_loaded = 1;
                return 0;
        case MODULE_CMD_FINI:
                scsipi_print_sense = saved_print_sense;
                scsipi_print_sense_data = saved_print_sense_data;
                scsi_verbose_loaded = 0;
                return 0;
        case MODULE_CMD_AUTOUNLOAD:
                return EBUSY;
        default:
                return ENOTTY;
        }
}
#else
int        (*scsipi_print_sense)(struct scsipi_xfer *, int) =
                scsipi_print_sense_real;
void        (*scsipi_print_sense_data)(struct scsi_sense_data *, int) =
                scsipi_print_sense_data_real; 
#endif

static void
asc2ascii(u_char asc, u_char ascq, char *result, size_t l)
{
        int i = 0;

        while (adesc[i].description != NULL) {
                if (adesc[i].asc == asc && adesc[i].ascq == ascq)
                        break;
                i++;
        }
        if (adesc[i].description == NULL) {
                if (asc == 0x40 && ascq != 0)
                        (void)snprintf(result, l,
                            "Diagnostic Failure on Component 0x%02x",
                            ascq & 0xff);
                else
                        (void)snprintf(result, l, "ASC 0x%02x ASCQ 0x%02x",
                            asc & 0xff, ascq & 0xff);
        } else
                (void)strlcpy(result, adesc[i].description, l);
}

void
scsipi_print_sense_data_real(struct scsi_sense_data *sense, int verbosity)
{
        int32_t info;
        int i, j, k;
        char *sbs, *s = (char *) sense;

        /*
         * Basics- print out SENSE KEY
         */
        printf("    SENSE KEY:  %s", scsipi_decode_sense(s, 0));

        /*
         * Print out, unqualified but aligned, FMK, EOM and ILI status.
         */
        if (s[2] & 0xe0) {
                char pad;
                printf("\n              ");
                pad = ' ';
                if (s[2] & SSD_FILEMARK) {
                        printf("%c Filemark Detected", pad);
                        pad = ',';
                }
                if (s[2] & SSD_EOM) {
                        printf("%c EOM Detected", pad);
                        pad = ',';
                }
                if (s[2] & SSD_ILI)
                        printf("%c Incorrect Length Indicator Set", pad);
        }

        /*
         * Now we should figure out, based upon device type, how
         * to format the information field. Unfortunately, that's
         * not convenient here, so we'll print it as a signed
         * 32 bit integer.
         */
        info = _4btol(&s[3]);
        if (info)
                printf("\n   INFO FIELD:  %d", info);

        /*
         * Now we check additional length to see whether there is
         * more information to extract.
         */

        /* enough for command specific information? */
        if (((unsigned int) s[7]) < 4) {
                printf("\n");
                return;
        }
        info = _4btol(&s[8]);
        if (info)
                printf("\n COMMAND INFO:  %d (0x%x)", info, info);

        /*
         * Decode ASC && ASCQ info, plus FRU, plus the rest...
         */

        sbs = scsipi_decode_sense(s, 1);
        if (sbs)
                printf("\n     ASC/ASCQ:  %s", sbs);
        if (s[14] != 0)
                printf("\n     FRU CODE:  0x%x", s[14] & 0xff);
        sbs = scsipi_decode_sense(s, 3);
        if (sbs)
                printf("\n         SKSV:  %s", sbs);
        printf("\n");
        if (verbosity == 0)
                return;

        /*
         * Now figure whether we should print any additional informtion.
         *
         * Where should we start from? If we had SKSV data,
         * start from offset 18, else from offset 15.
         *
         * From that point until the end of the buffer, check for any
         * nonzero data. If we have some, go back and print the lot,
         * otherwise we're done.
         */
        if (sbs)
                i = 18;
        else
                i = 15;
        for (j = i; j < sizeof (*sense); j++)
                if (s[j])
                        break;
        if (j == sizeof (*sense))
                return;

        printf("\n Additional Sense Information (byte %d out...):\n", i);
        if (i == 15) {
                printf("\n\t%2d:", i);
                k = 7;
        } else {
                printf("\n\t%2d:", i);
                k = 2;
                j -= 2;
        }
        while (j > 0) {
                if (i >= sizeof (*sense))
                        break;
                if (k == 8) {
                        k = 0;
                        printf("\n\t%2d:", i);
                }
                printf(" 0x%02x", s[i] & 0xff);
                k++;
                j--;
                i++;
        }
        printf("\n\n");
}

static char *
scsipi_decode_sense(void *sinfo, int flag)
{
        unsigned char *snsbuf;
        unsigned char skey;
        static char rqsbuf[132];

        skey = 0;

        snsbuf = (unsigned char *) sinfo;
        if (flag == 0 || flag == 2 || flag == 3)
                skey = snsbuf[2] & 0xf;
        if (flag == 0) {                        /* Sense Key Only */
                (void) strlcpy(rqsbuf, sense_keys[skey], sizeof(rqsbuf));
                return (rqsbuf);
        } else if (flag == 1) {                        /* ASC/ASCQ Only */
                asc2ascii(snsbuf[12], snsbuf[13], rqsbuf, sizeof(rqsbuf));
                return (rqsbuf);
        } else  if (flag == 2) {                /* Sense Key && ASC/ASCQ */
                auto char localbuf[64];
                asc2ascii(snsbuf[12], snsbuf[13], localbuf, sizeof(localbuf));
                (void) snprintf(rqsbuf, sizeof(rqsbuf), "%s, %s",
                    sense_keys[skey], localbuf);
                return (rqsbuf);
        } else if (flag == 3 && snsbuf[7] >= 9 && (snsbuf[15] & 0x80)) {
                /*
                 * SKSV Data
                 */
                switch (skey) {
                case SKEY_ILLEGAL_REQUEST:
                        if (snsbuf[15] & 0x8)
                                (void)snprintf(rqsbuf, sizeof(rqsbuf),
                                    "Error in %s, Offset %d, bit %d",
                                    (snsbuf[15] & 0x40)? "CDB" : "Parameters",
                                    (snsbuf[16] & 0xff) << 8 |
                                    (snsbuf[17] & 0xff), snsbuf[15] & 0x7);
                        else
                                (void)snprintf(rqsbuf, sizeof(rqsbuf),
                                    "Error in %s, Offset %d",
                                    (snsbuf[15] & 0x40)? "CDB" : "Parameters",
                                    (snsbuf[16] & 0xff) << 8 |
                                    (snsbuf[17] & 0xff));
                        return (rqsbuf);
                case SKEY_RECOVERED_ERROR:
                case SKEY_MEDIUM_ERROR:
                case SKEY_HARDWARE_ERROR:
                        (void)snprintf(rqsbuf, sizeof(rqsbuf),
                            "Actual Retry Count: %d",
                            (snsbuf[16] & 0xff) << 8 | (snsbuf[17] & 0xff));
                        return (rqsbuf);
                case SKEY_NOT_READY:
                        (void)snprintf(rqsbuf, sizeof(rqsbuf),
                            "Progress Indicator: %d",
                            (snsbuf[16] & 0xff) << 8 | (snsbuf[17] & 0xff));
                        return (rqsbuf);
                default:
                        break;
                }
        }
        return (NULL);
}

int
scsipi_print_sense_real(struct scsipi_xfer *xs, int verbosity)
{
        scsipi_printaddr(xs->xs_periph);
         printf(" Check Condition on CDB: ");
        scsipi_print_cdb(xs->cmd);
         printf("\n");
        scsipi_print_sense_data(&xs->sense.scsi_sense, verbosity);
        return 1;
}































































































































































    5 
















    5 















    5 








    5 






































































































































    5 






    5 



















    5 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
/*        $NetBSD: rtc.c,v 1.1 2009/06/16 21:05:34 bouyer Exp $        */

/*-
 * Copyright (c) 1990 The Regents of the University of California.
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * William Jolitz and Don Ahn.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)clock.c        7.2 (Berkeley) 5/12/91
 */
/*-
 * Copyright (c) 1993, 1994 Charles M. Hannum.
 *
 * This code is derived from software contributed to Berkeley by
 * William Jolitz and Don Ahn.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)clock.c        7.2 (Berkeley) 5/12/91
 */
/* 
 * Mach Operating System
 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
 * All Rights Reserved.
 * 
 * Permission to use, copy, modify and distribute this software and its
 * documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 * 
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 * 
 * Carnegie Mellon requests users of this software to return to
 * 
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 * 
 * any improvements or extensions that they make and grant Carnegie Mellon
 * the rights to redistribute these changes.
 */
/*
  Copyright 1988, 1989 by Intel Corporation, Santa Clara, California.

                All Rights Reserved

Permission to use, copy, modify, and distribute this software and
its documentation for any purpose and without fee is hereby
granted, provided that the above copyright notice appears in all
copies and that both the copyright notice and this permission notice
appear in supporting documentation, and that the name of Intel
not be used in advertising or publicity pertaining to distribution
of the software without specific, written prior permission.

INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/

/*
 * Primitive RTC chip routines.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rtc.c,v 1.1 2009/06/16 21:05:34 bouyer Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/timetc.h>
#include <sys/kernel.h>

#include <dev/isa/isareg.h>
#include <dev/isa/isavar.h>
#include <i386/isa/nvram.h>

#include <machine/pio.h>

#include <dev/ic/mc146818reg.h>
#include <x86/rtc.h>

#ifndef __x86_64__
#include "mca.h"
#endif
#if NMCA > 0
#include <machine/mca_machdep.h>        /* for MCA_system */
#endif

static void        rtcinit(void);
static int        rtcget(mc_todregs *);
static void        rtcput(mc_todregs *);
static int        cmoscheck(void);
static int        clock_expandyear(int);

/* XXX use sc? */
u_int
mc146818_read(void *sc, u_int reg)
{

        outb(IO_RTC, reg);
        return (inb(IO_RTC+1));
}

void
mc146818_write(void *sc, u_int reg, u_int datum)
{

        outb(IO_RTC, reg);
        outb(IO_RTC+1, datum);
}

static void
rtcinit(void)
{
        static int first_rtcopen_ever = 1;

        if (!first_rtcopen_ever)
                return;
        first_rtcopen_ever = 0;

        mc146818_write(NULL, MC_REGA,                        /* XXX softc */
            MC_BASE_32_KHz | MC_RATE_1024_Hz);
        mc146818_write(NULL, MC_REGB, MC_REGB_24HR);        /* XXX softc */
}

static int
rtcget(mc_todregs *regs)
{

        rtcinit();
        if ((mc146818_read(NULL, MC_REGD) & MC_REGD_VRT) == 0) /* XXX softc */
                return (-1);
        MC146818_GETTOD(NULL, regs);                        /* XXX softc */
        return (0);
}        

static void
rtcput(mc_todregs *regs)
{

        rtcinit();
        MC146818_PUTTOD(NULL, regs);                        /* XXX softc */
}

/*
 * check whether the CMOS layout is "standard"-like (ie, not PS/2-like),
 * to be called at splclock()
 */
static int
cmoscheck(void)
{
        int i;
        unsigned short cksum = 0;

        for (i = 0x10; i <= 0x2d; i++)
                cksum += mc146818_read(NULL, i); /* XXX softc */

        return (cksum == (mc146818_read(NULL, 0x2e) << 8)
                          + mc146818_read(NULL, 0x2f));
}

#if NMCA > 0
/*
 * Check whether the CMOS layout is PS/2 like, to be called at splclock().
 */
static int cmoscheckps2(void);
static int
cmoscheckps2(void)
{
#if 0
        /* Disabled until I find out the CRC checksum algorithm IBM uses */
        int i;
        unsigned short cksum = 0;

        for (i = 0x10; i <= 0x31; i++)
                cksum += mc146818_read(NULL, i); /* XXX softc */

        return (cksum == (mc146818_read(NULL, 0x32) << 8)
                          + mc146818_read(NULL, 0x33));
#else
        /* Check 'incorrect checksum' bit of IBM PS/2 Diagnostic Status Byte */
        return ((mc146818_read(NULL, NVRAM_DIAG) & (1<<6)) == 0);
#endif
}
#endif /* NMCA > 0 */

/*
 * patchable to control century byte handling:
 * 1: always update
 * -1: never touch
 * 0: try to figure out itself
 */
int rtc_update_century = 0;

/*
 * Expand a two-digit year as read from the clock chip
 * into full width.
 * Being here, deal with the CMOS century byte.
 */
static int centb = NVRAM_CENTURY;
static int
clock_expandyear(int clockyear)
{
        int s, clockcentury, cmoscentury;

        clockcentury = (clockyear < 70) ? 20 : 19;
        clockyear += 100 * clockcentury;

        if (rtc_update_century < 0)
                return (clockyear);

        s = splclock();
        if (cmoscheck())
                cmoscentury = mc146818_read(NULL, NVRAM_CENTURY);
#if NMCA > 0
        else if (MCA_system && cmoscheckps2())
                cmoscentury = mc146818_read(NULL, (centb = 0x37));
#endif
        else
                cmoscentury = 0;
        splx(s);
        if (!cmoscentury) {
#ifdef DIAGNOSTIC
                printf("clock: unknown CMOS layout\n");
#endif
                return (clockyear);
        }
        cmoscentury = bcdtobin(cmoscentury);

        if (cmoscentury != clockcentury) {
                /* XXX note: saying "century is 20" might confuse the naive. */
                printf("WARNING: NVRAM century is %d but RTC year is %d\n",
                       cmoscentury, clockyear);

                /* Kludge to roll over century. */
                if ((rtc_update_century > 0) ||
                    ((cmoscentury == 19) && (clockcentury == 20) &&
                     (clockyear == 2000))) {
                        printf("WARNING: Setting NVRAM century to %d\n",
                               clockcentury);
                        s = splclock();
                        mc146818_write(NULL, centb, bintobcd(clockcentury));
                        splx(s);
                }
        } else if (cmoscentury == 19 && rtc_update_century == 0)
                rtc_update_century = 1; /* will update later in resettodr() */

        return (clockyear);
}

int
rtc_get_ymdhms(todr_chip_handle_t tch, struct clock_ymdhms *dt)
{
        int s;
        mc_todregs rtclk;

        s = splclock();
        if (rtcget(&rtclk)) {
                splx(s);
                return -1;
        }
        splx(s);

        dt->dt_sec = bcdtobin(rtclk[MC_SEC]);
        dt->dt_min = bcdtobin(rtclk[MC_MIN]);
        dt->dt_hour = bcdtobin(rtclk[MC_HOUR]);
        dt->dt_day = bcdtobin(rtclk[MC_DOM]);
        dt->dt_mon = bcdtobin(rtclk[MC_MONTH]);
        dt->dt_year = clock_expandyear(bcdtobin(rtclk[MC_YEAR]));

        return 0;
}

int
rtc_set_ymdhms(todr_chip_handle_t tch, struct clock_ymdhms *dt)
{
        mc_todregs rtclk;
        int century;
        int s;

        s = splclock();
        if (rtcget(&rtclk))
                memset(&rtclk, 0, sizeof(rtclk));
        splx(s);

        rtclk[MC_SEC] = bintobcd(dt->dt_sec);
        rtclk[MC_MIN] = bintobcd(dt->dt_min);
        rtclk[MC_HOUR] = bintobcd(dt->dt_hour);
        rtclk[MC_DOW] = dt->dt_wday + 1;
        rtclk[MC_YEAR] = bintobcd(dt->dt_year % 100);
        rtclk[MC_MONTH] = bintobcd(dt->dt_mon);
        rtclk[MC_DOM] = bintobcd(dt->dt_day);

#ifdef DEBUG_CLOCK
        printf("setclock: %x/%x/%x %x:%x:%x\n", rtclk[MC_YEAR], rtclk[MC_MONTH],
           rtclk[MC_DOM], rtclk[MC_HOUR], rtclk[MC_MIN], rtclk[MC_SEC]);
#endif
        s = splclock();
        rtcput(&rtclk);
        if (rtc_update_century > 0) {
                century = bintobcd(dt->dt_year / 100);
                mc146818_write(NULL, centb, century); /* XXX softc */
        }
        splx(s);
        return 0;

}

void
rtc_register(void)
{
        static struct todr_chip_handle        tch;
        tch.todr_gettime_ymdhms = rtc_get_ymdhms;
        tch.todr_settime_ymdhms = rtc_set_ymdhms;
        tch.todr_setwen = NULL;

        todr_attach(&tch);
}

































































































































 1809 



 1809 
 1810 








 1812 



 1811 
 1812 





































  938 
 1968 










 1967 




































































 1812 
 1806 
 1809 





























 1810 


   15 




 1810 








 1807 





 1810 

 1808 

 1808 
















  938 












  935 







  936 

  932 
  935 























   15 



   15 
   15 







    1 




   15 


   14 








   14 
   14 
   14 
   14 


   14 
























 4604 







 4608 
 1568 
 1247 

  655 


 4610 







 4608 
 2825 
 4573 
 4576 
 4612 




 4608 
 4573 

 4614 
















 4596 








 2818 




 4569 




 4561 



 4456 

 3857 







 4549 

 4591 















 4553 








 2812 




 2815 




 2812 


 2812 


 2808 
 2814 

 4520 




 4523 
 4390 




 4399 







 3844 


 4515 



 4545 

















 4786 




 2302 
 2303 








 4782 
 4562 


 4346 





 4345 


























 2514 


 2510 





















































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
/*        $NetBSD: subr_lockdebug.c,v 1.80 2021/03/02 01:20:35 rin Exp $        */

/*-
 * Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Basic lock debugging code shared among lock primitives.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_lockdebug.c,v 1.80 2021/03/02 01:20:35 rin Exp $");

#ifdef _KERNEL_OPT
#include "opt_ddb.h"
#endif

#include <sys/param.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/lockdebug.h>
#include <sys/sleepq.h>
#include <sys/cpu.h>
#include <sys/atomic.h>
#include <sys/lock.h>
#include <sys/rbtree.h>
#include <sys/ksyms.h>
#include <sys/kcov.h>

#include <machine/lock.h>

unsigned int                ld_panic;

#ifdef LOCKDEBUG

#ifdef __ia64__
#define        LD_BATCH_SHIFT        16
#else
#define        LD_BATCH_SHIFT        9
#endif
#define        LD_BATCH        (1 << LD_BATCH_SHIFT)
#define        LD_BATCH_MASK        (LD_BATCH - 1)
#define        LD_MAX_LOCKS        1048576
#define        LD_SLOP                16

#define        LD_LOCKED        0x01
#define        LD_SLEEPER        0x02

#define        LD_WRITE_LOCK        0x80000000

typedef struct lockdebug {
        struct rb_node        ld_rb_node;
        __cpu_simple_lock_t ld_spinlock;
        _TAILQ_ENTRY(struct lockdebug, volatile) ld_chain;
        _TAILQ_ENTRY(struct lockdebug, volatile) ld_achain;
        volatile void        *ld_lock;
        lockops_t        *ld_lockops;
        struct lwp        *ld_lwp;
        uintptr_t        ld_locked;
        uintptr_t        ld_unlocked;
        uintptr_t        ld_initaddr;
        uint16_t        ld_shares;
        uint16_t        ld_cpu;
        uint8_t                ld_flags;
        uint8_t                ld_shwant;        /* advisory */
        uint8_t                ld_exwant;        /* advisory */
        uint8_t                ld_unused;
} volatile lockdebug_t;

typedef _TAILQ_HEAD(lockdebuglist, struct lockdebug, volatile) lockdebuglist_t;

__cpu_simple_lock_t        ld_mod_lk;
lockdebuglist_t                ld_free = TAILQ_HEAD_INITIALIZER(ld_free);
#ifdef _KERNEL
lockdebuglist_t                ld_all = TAILQ_HEAD_INITIALIZER(ld_all);
#else
extern lockdebuglist_t        ld_all;
#define cpu_name(a)        "?"
#define cpu_index(a)        -1
#define curlwp                NULL
#endif /* _KERNEL */
int                        ld_nfree;
int                        ld_freeptr;
int                        ld_recurse;
bool                        ld_nomore;
lockdebug_t                ld_prime[LD_BATCH];

#ifdef _KERNEL
static void        lockdebug_abort1(const char *, size_t, lockdebug_t *, int,
    const char *, bool);
static int        lockdebug_more(int);
static void        lockdebug_init(void);
static void        lockdebug_dump(lwp_t *, lockdebug_t *,
    void (*)(const char *, ...)
    __printflike(1, 2));

static signed int
ld_rbto_compare_nodes(void *ctx, const void *n1, const void *n2)
{
        const lockdebug_t *ld1 = n1;
        const lockdebug_t *ld2 = n2;
        const uintptr_t a = (uintptr_t)ld1->ld_lock;
        const uintptr_t b = (uintptr_t)ld2->ld_lock;

        if (a < b)
                return -1;
        if (a > b)
                return 1;
        return 0;
}

static signed int
ld_rbto_compare_key(void *ctx, const void *n, const void *key)
{
        const lockdebug_t *ld = n;
        const uintptr_t a = (uintptr_t)ld->ld_lock;
        const uintptr_t b = (uintptr_t)key;

        if (a < b)
                return -1;
        if (a > b)
                return 1;
        return 0;
}

static rb_tree_t ld_rb_tree;

static const rb_tree_ops_t ld_rb_tree_ops = {
        .rbto_compare_nodes = ld_rbto_compare_nodes,
        .rbto_compare_key = ld_rbto_compare_key,
        .rbto_node_offset = offsetof(lockdebug_t, ld_rb_node),
        .rbto_context = NULL
};

static inline lockdebug_t *
lockdebug_lookup1(const volatile void *lock)
{
        lockdebug_t *ld;
        struct cpu_info *ci;

        ci = curcpu();
        __cpu_simple_lock(&ci->ci_data.cpu_ld_lock);
        ld = rb_tree_find_node(&ld_rb_tree, (void *)(intptr_t)lock);
        __cpu_simple_unlock(&ci->ci_data.cpu_ld_lock);
        if (ld == NULL) {
                return NULL;
        }
        __cpu_simple_lock(&ld->ld_spinlock);

        return ld;
}

static void
lockdebug_lock_cpus(void)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;

        for (CPU_INFO_FOREACH(cii, ci)) {
                __cpu_simple_lock(&ci->ci_data.cpu_ld_lock);
        }
}

static void
lockdebug_unlock_cpus(void)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;

        for (CPU_INFO_FOREACH(cii, ci)) {
                __cpu_simple_unlock(&ci->ci_data.cpu_ld_lock);
        }
}

/*
 * lockdebug_lookup:
 *
 *        Find a lockdebug structure by a pointer to a lock and return it locked.
 */
static inline lockdebug_t *
lockdebug_lookup(const char *func, size_t line, const volatile void *lock,
    uintptr_t where)
{
        lockdebug_t *ld;

        kcov_silence_enter();
        ld = lockdebug_lookup1(lock);
        kcov_silence_leave();

        if (__predict_false(ld == NULL)) {
                panic("%s,%zu: uninitialized lock (lock=%p, from=%08"
                    PRIxPTR ")", func, line, lock, where);
        }
        return ld;
}

/*
 * lockdebug_init:
 *
 *        Initialize the lockdebug system.  Allocate an initial pool of
 *        lockdebug structures before the VM system is up and running.
 */
static void
lockdebug_init(void)
{
        lockdebug_t *ld;
        int i;

        TAILQ_INIT(&curcpu()->ci_data.cpu_ld_locks);
        TAILQ_INIT(&curlwp->l_ld_locks);
        __cpu_simple_lock_init(&curcpu()->ci_data.cpu_ld_lock);
        __cpu_simple_lock_init(&ld_mod_lk);

        rb_tree_init(&ld_rb_tree, &ld_rb_tree_ops);

        ld = ld_prime;
        for (i = 1, ld++; i < LD_BATCH; i++, ld++) {
                __cpu_simple_lock_init(&ld->ld_spinlock);
                TAILQ_INSERT_TAIL(&ld_free, ld, ld_chain);
                TAILQ_INSERT_TAIL(&ld_all, ld, ld_achain);
        }
        ld_freeptr = 1;
        ld_nfree = LD_BATCH - 1;
}

/*
 * lockdebug_alloc:
 *
 *        A lock is being initialized, so allocate an associated debug
 *        structure.
 */
bool
lockdebug_alloc(const char *func, size_t line, volatile void *lock,
    lockops_t *lo, uintptr_t initaddr)
{
        struct cpu_info *ci;
        lockdebug_t *ld;
        int s;

        if (__predict_false(lo == NULL || panicstr != NULL || ld_panic))
                return false;
        if (__predict_false(ld_freeptr == 0))
                lockdebug_init();

        s = splhigh();
        __cpu_simple_lock(&ld_mod_lk);
        if (__predict_false((ld = lockdebug_lookup1(lock)) != NULL)) {
                __cpu_simple_unlock(&ld_mod_lk);
                lockdebug_abort1(func, line, ld, s, "already initialized",
                    true);
                return false;
        }

        /*
         * Pinch a new debug structure.  We may recurse because we call
         * kmem_alloc(), which may need to initialize new locks somewhere
         * down the path.  If not recursing, we try to maintain at least
         * LD_SLOP structures free, which should hopefully be enough to
         * satisfy kmem_alloc().  If we can't provide a structure, not to
         * worry: we'll just mark the lock as not having an ID.
         */
        ci = curcpu();
        ci->ci_lkdebug_recurse++;
        if (TAILQ_EMPTY(&ld_free)) {
                if (ci->ci_lkdebug_recurse > 1 || ld_nomore) {
                        ci->ci_lkdebug_recurse--;
                        __cpu_simple_unlock(&ld_mod_lk);
                        splx(s);
                        return false;
                }
                s = lockdebug_more(s);
        } else if (ci->ci_lkdebug_recurse == 1 && ld_nfree < LD_SLOP) {
                s = lockdebug_more(s);
        }
        if (__predict_false((ld = TAILQ_FIRST(&ld_free)) == NULL)) {
                __cpu_simple_unlock(&ld_mod_lk);
                splx(s);
                return false;
        }
        TAILQ_REMOVE(&ld_free, ld, ld_chain);
        ld_nfree--;
        ci->ci_lkdebug_recurse--;

        if (__predict_false(ld->ld_lock != NULL)) {
                panic("%s,%zu: corrupt table ld %p", func, line, ld);
        }

        /* Initialise the structure. */
        ld->ld_lock = lock;
        ld->ld_lockops = lo;
        ld->ld_locked = 0;
        ld->ld_unlocked = 0;
        ld->ld_lwp = NULL;
        ld->ld_initaddr = initaddr;
        ld->ld_flags = (lo->lo_type == LOCKOPS_SLEEP ? LD_SLEEPER : 0);
        lockdebug_lock_cpus();
        (void)rb_tree_insert_node(&ld_rb_tree, __UNVOLATILE(ld));
        lockdebug_unlock_cpus();
        __cpu_simple_unlock(&ld_mod_lk);

        splx(s);
        return true;
}

/*
 * lockdebug_free:
 *
 *        A lock is being destroyed, so release debugging resources.
 */
void
lockdebug_free(const char *func, size_t line, volatile void *lock)
{
        lockdebug_t *ld;
        int s;

        if (__predict_false(panicstr != NULL || ld_panic))
                return;

        s = splhigh();
        __cpu_simple_lock(&ld_mod_lk);
        ld = lockdebug_lookup(func, line, lock,
            (uintptr_t) __builtin_return_address(0));
        if (__predict_false(ld == NULL)) {
                __cpu_simple_unlock(&ld_mod_lk);
                panic("%s,%zu: destroying uninitialized object %p"
                    "(ld_lock=%p)", func, line, lock, ld->ld_lock);
                return;
        }
        if (__predict_false((ld->ld_flags & LD_LOCKED) != 0 ||
            ld->ld_shares != 0)) {
                __cpu_simple_unlock(&ld_mod_lk);
                lockdebug_abort1(func, line, ld, s, "is locked or in use",
                    true);
                return;
        }
        lockdebug_lock_cpus();
        rb_tree_remove_node(&ld_rb_tree, __UNVOLATILE(ld));
        lockdebug_unlock_cpus();
        ld->ld_lock = NULL;
        TAILQ_INSERT_TAIL(&ld_free, ld, ld_chain);
        ld_nfree++;
        __cpu_simple_unlock(&ld->ld_spinlock);
        __cpu_simple_unlock(&ld_mod_lk);
        splx(s);
}

/*
 * lockdebug_more:
 *
 *        Allocate a batch of debug structures and add to the free list.
 *        Must be called with ld_mod_lk held.
 */
static int
lockdebug_more(int s)
{
        lockdebug_t *ld;
        void *block;
        int i, base, m;

        /*
         * Can't call kmem_alloc() if in interrupt context.  XXX We could
         * deadlock, because we don't know which locks the caller holds.
         */
        if (cpu_intr_p() || cpu_softintr_p()) {
                return s;
        }

        while (ld_nfree < LD_SLOP) {
                __cpu_simple_unlock(&ld_mod_lk);
                splx(s);
                block = kmem_zalloc(LD_BATCH * sizeof(lockdebug_t), KM_SLEEP);
                s = splhigh();
                __cpu_simple_lock(&ld_mod_lk);

                if (ld_nfree > LD_SLOP) {
                        /* Somebody beat us to it. */
                        __cpu_simple_unlock(&ld_mod_lk);
                        splx(s);
                        kmem_free(block, LD_BATCH * sizeof(lockdebug_t));
                        s = splhigh();
                        __cpu_simple_lock(&ld_mod_lk);
                        continue;
                }

                base = ld_freeptr;
                ld_nfree += LD_BATCH;
                ld = block;
                base <<= LD_BATCH_SHIFT;
                m = uimin(LD_MAX_LOCKS, base + LD_BATCH);

                if (m == LD_MAX_LOCKS)
                        ld_nomore = true;

                for (i = base; i < m; i++, ld++) {
                        __cpu_simple_lock_init(&ld->ld_spinlock);
                        TAILQ_INSERT_TAIL(&ld_free, ld, ld_chain);
                        TAILQ_INSERT_TAIL(&ld_all, ld, ld_achain);
                }

                membar_producer();
        }

        return s;
}

/*
 * lockdebug_wantlock:
 *
 *        Process the preamble to a lock acquire.  The "shared"
 *        parameter controls which ld_{ex,sh}want counter is
 *        updated; a negative value of shared updates neither.
 */
void
lockdebug_wantlock(const char *func, size_t line,
    const volatile void *lock, uintptr_t where, int shared)
{
        struct lwp *l = curlwp;
        lockdebug_t *ld;
        bool recurse;
        int s;

        (void)shared;
        recurse = false;

        if (__predict_false(panicstr != NULL || ld_panic))
                return;

        s = splhigh();
        if ((ld = lockdebug_lookup(func, line, lock, where)) == NULL) {
                splx(s);
                return;
        }
        if ((ld->ld_flags & LD_LOCKED) != 0 || ld->ld_shares != 0) {
                if ((ld->ld_flags & LD_SLEEPER) != 0) {
                        if (ld->ld_lwp == l)
                                recurse = true;
                } else if (ld->ld_cpu == (uint16_t)cpu_index(curcpu()))
                        recurse = true;
        }
        if (cpu_intr_p()) {
                if (__predict_false((ld->ld_flags & LD_SLEEPER) != 0)) {
                        lockdebug_abort1(func, line, ld, s,
                            "acquiring sleep lock from interrupt context",
                            true);
                        return;
                }
        }
        if (shared > 0)
                ld->ld_shwant++;
        else if (shared == 0)
                ld->ld_exwant++;
        if (__predict_false(recurse)) {
                lockdebug_abort1(func, line, ld, s, "locking against myself",
                    true);
                return;
        }
        if (l->l_ld_wanted == NULL) {
                l->l_ld_wanted = ld;
        }
        __cpu_simple_unlock(&ld->ld_spinlock);
        splx(s);
}

/*
 * lockdebug_locked:
 *
 *        Process a lock acquire operation.
 */
void
lockdebug_locked(const char *func, size_t line,
    volatile void *lock, void *cvlock, uintptr_t where, int shared)
{
        struct lwp *l = curlwp;
        lockdebug_t *ld;
        int s;

        if (__predict_false(panicstr != NULL || ld_panic))
                return;

        s = splhigh();
        if ((ld = lockdebug_lookup(func, line, lock, where)) == NULL) {
                splx(s);
                return;
        }
        if (shared) {
                l->l_shlocks++;
                ld->ld_locked = where;
                ld->ld_shares++;
                ld->ld_shwant--;
        } else {
                if (__predict_false((ld->ld_flags & LD_LOCKED) != 0)) {
                        lockdebug_abort1(func, line, ld, s, "already locked",
                            true);
                        return;
                }
                ld->ld_flags |= LD_LOCKED;
                ld->ld_locked = where;
                ld->ld_exwant--;
                if ((ld->ld_flags & LD_SLEEPER) != 0) {
                        TAILQ_INSERT_TAIL(&l->l_ld_locks, ld, ld_chain);
                } else {
                        TAILQ_INSERT_TAIL(&curcpu()->ci_data.cpu_ld_locks,
                            ld, ld_chain);
                }
        }
        ld->ld_cpu = (uint16_t)cpu_index(curcpu());
        ld->ld_lwp = l;
        __cpu_simple_unlock(&ld->ld_spinlock);
        if (l->l_ld_wanted == ld) {
                l->l_ld_wanted = NULL;
        }
        splx(s);
}

/*
 * lockdebug_unlocked:
 *
 *        Process a lock release operation.
 */
void
lockdebug_unlocked(const char *func, size_t line,
    volatile void *lock, uintptr_t where, int shared)
{
        struct lwp *l = curlwp;
        lockdebug_t *ld;
        int s;

        if (__predict_false(panicstr != NULL || ld_panic))
                return;

        s = splhigh();
        if ((ld = lockdebug_lookup(func, line, lock, where)) == NULL) {
                splx(s);
                return;
        }
        if (shared) {
                if (__predict_false(l->l_shlocks == 0)) {
                        lockdebug_abort1(func, line, ld, s,
                            "no shared locks held by LWP", true);
                        return;
                }
                if (__predict_false(ld->ld_shares == 0)) {
                        lockdebug_abort1(func, line, ld, s,
                            "no shared holds on this lock", true);
                        return;
                }
                l->l_shlocks--;
                ld->ld_shares--;
                if (ld->ld_lwp == l) {
                        ld->ld_unlocked = where;
                        ld->ld_lwp = NULL;
                }
                if (ld->ld_cpu == (uint16_t)cpu_index(curcpu()))
                        ld->ld_cpu = (uint16_t)-1;
        } else {
                if (__predict_false((ld->ld_flags & LD_LOCKED) == 0)) {
                        lockdebug_abort1(func, line, ld, s, "not locked", true);
                        return;
                }

                if ((ld->ld_flags & LD_SLEEPER) != 0) {
                        if (__predict_false(ld->ld_lwp != curlwp)) {
                                lockdebug_abort1(func, line, ld, s,
                                    "not held by current LWP", true);
                                return;
                        }
                        TAILQ_REMOVE(&l->l_ld_locks, ld, ld_chain);
                } else {
                        uint16_t idx = (uint16_t)cpu_index(curcpu());
                        if (__predict_false(ld->ld_cpu != idx)) {
                                lockdebug_abort1(func, line, ld, s,
                                    "not held by current CPU", true);
                                return;
                        }
                        TAILQ_REMOVE(&curcpu()->ci_data.cpu_ld_locks, ld,
                            ld_chain);
                }
                ld->ld_flags &= ~LD_LOCKED;
                ld->ld_unlocked = where;
                ld->ld_lwp = NULL;
        }
        __cpu_simple_unlock(&ld->ld_spinlock);
        splx(s);
}

/*
 * lockdebug_barrier:
 *
 *        Panic if we hold more than one specified lock, and optionally, if we
 *        hold any sleep locks.
 */
void
lockdebug_barrier(const char *func, size_t line, volatile void *onelock,
    int slplocks)
{
        struct lwp *l = curlwp;
        lockdebug_t *ld;
        int s;

        if (__predict_false(panicstr != NULL || ld_panic))
                return;

        s = splhigh();
        if ((l->l_pflag & LP_INTR) == 0) {
                TAILQ_FOREACH(ld, &curcpu()->ci_data.cpu_ld_locks, ld_chain) {
                        if (ld->ld_lock == onelock) {
                                continue;
                        }
                        __cpu_simple_lock(&ld->ld_spinlock);
                        lockdebug_abort1(func, line, ld, s,
                            "spin lock held", true);
                        return;
                }
        }
        if (slplocks) {
                splx(s);
                return;
        }
        ld = TAILQ_FIRST(&l->l_ld_locks);
        if (__predict_false(ld != NULL && ld->ld_lock != onelock)) {
                __cpu_simple_lock(&ld->ld_spinlock);
                lockdebug_abort1(func, line, ld, s, "sleep lock held", true);
                return;
        }
        splx(s);
        if (l->l_shlocks != 0) {
                TAILQ_FOREACH(ld, &ld_all, ld_achain) {
                        if (ld->ld_lock == onelock) {
                                continue;
                        }
                        if (ld->ld_lwp == l)
                                lockdebug_dump(l, ld, printf);
                }
                panic("%s,%zu: holding %d shared locks", func, line,
                    l->l_shlocks);
        }
}

/*
 * lockdebug_mem_check:
 *
 *        Check for in-use locks within a memory region that is
 *        being freed.
 */
void
lockdebug_mem_check(const char *func, size_t line, void *base, size_t sz)
{
        lockdebug_t *ld;
        struct cpu_info *ci;
        int s;

        if (__predict_false(panicstr != NULL || ld_panic))
                return;

        kcov_silence_enter();

        s = splhigh();
        ci = curcpu();
        __cpu_simple_lock(&ci->ci_data.cpu_ld_lock);
        ld = (lockdebug_t *)rb_tree_find_node_geq(&ld_rb_tree, base);
        if (ld != NULL) {
                const uintptr_t lock = (uintptr_t)ld->ld_lock;

                if (__predict_false((uintptr_t)base > lock))
                        panic("%s,%zu: corrupt tree ld=%p, base=%p, sz=%zu",
                            func, line, ld, base, sz);
                if (lock >= (uintptr_t)base + sz)
                        ld = NULL;
        }
        __cpu_simple_unlock(&ci->ci_data.cpu_ld_lock);
        if (__predict_false(ld != NULL)) {
                __cpu_simple_lock(&ld->ld_spinlock);
                lockdebug_abort1(func, line, ld, s,
                    "allocation contains active lock", !cold);
                kcov_silence_leave();
                return;
        }
        splx(s);

        kcov_silence_leave();
}
#endif /* _KERNEL */

#ifdef DDB
#include <machine/db_machdep.h>
#include <ddb/db_interface.h>
#include <ddb/db_access.h>
#endif

/*
 * lockdebug_dump:
 *
 *        Dump information about a lock on panic, or for DDB.
 */
static void
lockdebug_dump(lwp_t *l, lockdebug_t *ld, void (*pr)(const char *, ...)
    __printflike(1, 2))
{
        int sleeper = (ld->ld_flags & LD_SLEEPER);
        lockops_t *lo = ld->ld_lockops;

        (*pr)(
            "lock address : %#018lx type     : %18s\n"
            "initialized  : %#018lx",
            (long)ld->ld_lock, (sleeper ? "sleep/adaptive" : "spin"),
            (long)ld->ld_initaddr);

#ifndef _KERNEL
        lockops_t los;
        lo = &los;
        db_read_bytes((db_addr_t)ld->ld_lockops, sizeof(los), (char *)lo);
#endif
        (*pr)("\n"
            "shared holds : %18u exclusive: %18u\n"
            "shares wanted: %18u exclusive: %18u\n"
            "relevant cpu : %18u last held: %18u\n"
            "relevant lwp : %#018lx last held: %#018lx\n"
            "last locked%c : %#018lx unlocked%c: %#018lx\n",
            (unsigned)ld->ld_shares, ((ld->ld_flags & LD_LOCKED) != 0),
            (unsigned)ld->ld_shwant, (unsigned)ld->ld_exwant,
            (unsigned)cpu_index(l->l_cpu), (unsigned)ld->ld_cpu,
            (long)l, (long)ld->ld_lwp,
            ((ld->ld_flags & LD_LOCKED) ? '*' : ' '),
            (long)ld->ld_locked,
            ((ld->ld_flags & LD_LOCKED) ? ' ' : '*'),
            (long)ld->ld_unlocked);

#ifdef _KERNEL
        if (lo->lo_dump != NULL)
                (*lo->lo_dump)(ld->ld_lock, pr);

        if (sleeper) {
                turnstile_print(ld->ld_lock, pr);
        }
#endif
}

#ifdef _KERNEL
/*
 * lockdebug_abort1:
 *
 *        An error has been trapped - dump lock info and panic.
 */
static void
lockdebug_abort1(const char *func, size_t line, lockdebug_t *ld, int s,
                 const char *msg, bool dopanic)
{

        /*
         * Don't make the situation worse if the system is already going
         * down in flames.  Once a panic is triggered, lockdebug state
         * becomes stale and cannot be trusted.
         */
        if (atomic_inc_uint_nv(&ld_panic) != 1) {
                __cpu_simple_unlock(&ld->ld_spinlock);
                splx(s);
                return;
        }

        printf("%s error: %s,%zu: %s\n\n", ld->ld_lockops->lo_name,
            func, line, msg);
        lockdebug_dump(curlwp, ld, printf);
        __cpu_simple_unlock(&ld->ld_spinlock);
        splx(s);
        printf("\n");
        if (dopanic)
                panic("LOCKDEBUG: %s error: %s,%zu: %s",
                    ld->ld_lockops->lo_name, func, line, msg);
}

#endif /* _KERNEL */
#endif        /* LOCKDEBUG */

/*
 * lockdebug_lock_print:
 *
 *        Handle the DDB 'show lock' command.
 */
#ifdef DDB
void
lockdebug_lock_print(void *addr,
    void (*pr)(const char *, ...) __printflike(1, 2))
{
#ifdef LOCKDEBUG
        lockdebug_t *ld, lds;

        TAILQ_FOREACH(ld, &ld_all, ld_achain) {
                db_read_bytes((db_addr_t)ld, sizeof(lds), __UNVOLATILE(&lds));
                ld = &lds;
                if (ld->ld_lock == NULL)
                        continue;
                if (addr == NULL || ld->ld_lock == addr) {
                        lockdebug_dump(curlwp, ld, pr);
                        if (addr != NULL)
                                return;
                }
        }
        if (addr != NULL) {
                (*pr)("Sorry, no record of a lock with address %p found.\n",
                    addr);
        }
#else
        (*pr)("Sorry, kernel not built with the LOCKDEBUG option.\n");
#endif        /* LOCKDEBUG */
}

#ifdef _KERNEL
#ifdef LOCKDEBUG
static void
lockdebug_show_one(lwp_t *l, lockdebug_t *ld, int i,
    void (*pr)(const char *, ...) __printflike(1, 2))
{
        const char *sym;

#ifdef _KERNEL
        ksyms_getname(NULL, &sym, (vaddr_t)ld->ld_initaddr,
            KSYMS_CLOSEST|KSYMS_PROC|KSYMS_ANY);
#endif
        (*pr)("* Lock %d (initialized at %s)\n", i++, sym);
        lockdebug_dump(l, ld, pr);
}

static void
lockdebug_show_trace(const void *ptr,
    void (*pr)(const char *, ...) __printflike(1, 2))
{

        db_stack_trace_print((db_expr_t)(intptr_t)ptr, true, 32, "a", pr);
}

static void
lockdebug_show_all_locks_lwp(void (*pr)(const char *, ...) __printflike(1, 2),
    bool show_trace)
{
        struct proc *p;

        LIST_FOREACH(p, &allproc, p_list) {
                struct lwp *l;
                LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                        lockdebug_t *ld;
                        int i = 0;
                        if (TAILQ_EMPTY(&l->l_ld_locks) &&
                            l->l_ld_wanted == NULL) {
                                    continue;
                        }
                        (*pr)("\n****** LWP %d.%d (%s) @ %p, l_stat=%d\n",
                            p->p_pid, l->l_lid,
                            l->l_name ? l->l_name : p->p_comm, l, l->l_stat);
                        if (!TAILQ_EMPTY(&l->l_ld_locks)) {
                                (*pr)("\n*** Locks held: \n");
                                TAILQ_FOREACH(ld, &l->l_ld_locks, ld_chain) {
                                        (*pr)("\n");
                                        lockdebug_show_one(l, ld, i++, pr);
                                }
                        } else {
                                (*pr)("\n*** Locks held: none\n");
                        }

                        if (l->l_ld_wanted != NULL) {
                                (*pr)("\n*** Locks wanted: \n\n");
                                lockdebug_show_one(l, l->l_ld_wanted, 0, pr);
                        } else {
                                (*pr)("\n*** Locks wanted: none\n");
                        }
                        if (show_trace) {
                                (*pr)("\n*** Traceback: \n\n");
                                lockdebug_show_trace(l, pr);
                                (*pr)("\n");
                        }
                }
        }
}

static void
lockdebug_show_all_locks_cpu(void (*pr)(const char *, ...) __printflike(1, 2),
    bool show_trace)
{
        lockdebug_t *ld;
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;

        for (CPU_INFO_FOREACH(cii, ci)) {
                int i = 0;
                if (TAILQ_EMPTY(&ci->ci_data.cpu_ld_locks))
                        continue;
                (*pr)("\n******* Locks held on %s:\n", cpu_name(ci));
                TAILQ_FOREACH(ld, &ci->ci_data.cpu_ld_locks, ld_chain) {
                        (*pr)("\n");
#ifdef MULTIPROCESSOR
                        lockdebug_show_one(ci->ci_curlwp, ld, i++, pr);
                        if (show_trace)
                                lockdebug_show_trace(ci->ci_curlwp, pr);
#else
                        lockdebug_show_one(curlwp, ld, i++, pr);
                        if (show_trace)
                                lockdebug_show_trace(curlwp, pr);
#endif
                }
        }
}
#endif /* _KERNEL */
#endif        /* LOCKDEBUG */

#ifdef _KERNEL
void
lockdebug_show_all_locks(void (*pr)(const char *, ...) __printflike(1, 2),
    const char *modif)
{
#ifdef LOCKDEBUG
        bool show_trace = false;
        if (modif[0] == 't')
                show_trace = true;

        (*pr)("[Locks tracked through LWPs]\n");
        lockdebug_show_all_locks_lwp(pr, show_trace);
        (*pr)("\n");

        (*pr)("[Locks tracked through CPUs]\n");
        lockdebug_show_all_locks_cpu(pr, show_trace);
        (*pr)("\n");
#else
        (*pr)("Sorry, kernel not built with the LOCKDEBUG option.\n");
#endif        /* LOCKDEBUG */
}

void
lockdebug_show_lockstats(void (*pr)(const char *, ...) __printflike(1, 2))
{
#ifdef LOCKDEBUG
        lockdebug_t *ld;
        void *_ld;
        uint32_t n_null = 0;
        uint32_t n_spin_mutex = 0;
        uint32_t n_adaptive_mutex = 0;
        uint32_t n_rwlock = 0;
        uint32_t n_others = 0;

        RB_TREE_FOREACH(_ld, &ld_rb_tree) {
                ld = _ld;
                if (ld->ld_lock == NULL) {
                        n_null++;
                        continue;
                }
                if (ld->ld_lockops->lo_name[0] == 'M') {
                        if (ld->ld_lockops->lo_type == LOCKOPS_SLEEP)
                                n_adaptive_mutex++;
                        else
                                n_spin_mutex++;
                        continue;
                }
                if (ld->ld_lockops->lo_name[0] == 'R') {
                        n_rwlock++;
                        continue;
                }
                n_others++;
        }
        (*pr)(
            "spin mutex: %u\n"
            "adaptive mutex: %u\n"
            "rwlock: %u\n"
            "null locks: %u\n"
            "others: %u\n",
            n_spin_mutex, n_adaptive_mutex, n_rwlock,
            n_null, n_others);
#else
        (*pr)("Sorry, kernel not built with the LOCKDEBUG option.\n");
#endif        /* LOCKDEBUG */
}
#endif /* _KERNEL */
#endif        /* DDB */

#ifdef _KERNEL
/*
 * lockdebug_dismiss:
 *
 *      The system is rebooting, and potentially from an unsafe
 *      place so avoid any future aborts.
 */
void
lockdebug_dismiss(void)
{

        atomic_inc_uint_nv(&ld_panic);
}

/*
 * lockdebug_abort:
 *
 *        An error has been trapped - dump lock info and call panic().
 */
void
lockdebug_abort(const char *func, size_t line, const volatile void *lock,
    lockops_t *ops, const char *msg)
{
#ifdef LOCKDEBUG
        lockdebug_t *ld;
        int s;

        s = splhigh();
        if ((ld = lockdebug_lookup(func, line, lock,
                        (uintptr_t) __builtin_return_address(0))) != NULL) {
                lockdebug_abort1(func, line, ld, s, msg, true);
                return;
        }
        splx(s);
#endif        /* LOCKDEBUG */

        /*
         * Don't make the situation worse if the system is already going
         * down in flames.  Once a panic is triggered, lockdebug state
         * becomes stale and cannot be trusted.
         */
        if (atomic_inc_uint_nv(&ld_panic) > 1)
                return;

        printf("%s error: %s,%zu: %s\n\n"
            "lock address : %#018lx\n"
            "current cpu  : %18d\n"
            "current lwp  : %#018lx\n",
            ops->lo_name, func, line, msg, (long)lock,
            (int)cpu_index(curcpu()), (long)curlwp);
        (*ops->lo_dump)(lock, printf);
        printf("\n");

        panic("lock error: %s: %s,%zu: %s: lock %p cpu %d lwp %p",
            ops->lo_name, func, line, msg, lock, cpu_index(curcpu()), curlwp);
}
#endif /* _KERNEL */












































































































    9 







    7 


    8 











   95 

   96 





   96 

   96 


   94 
   70 


   71 










   25 










   72 





















   11 




    9 



    5 
    3 
    6 

    4 
    2 




    4 




    2 
    2 
    1 


    4 







    6 
   10 






   27 







   25 

   13 














   20 

    7 












    6 



   11 
   10 



   11 



    7 









    5 





    4 







   22 
   23 








    4 






    3 

    3 















  114 













   10 

    9 
    9 



    9 




    3 





   31 
  100 

   27 
    5 
    3 







   73 


   70 
   11 




   59 




    4 


    2 
    2 


    2 




    2 





    4 



    2 



    4 
    2 

    6 
    4 



    2 




   14 



   14 

    5 







   14 
   10 


    6 




    6 




    7 



   10 
    5 



   12 




    3 






   55 













   43 





   41 
   42 




















   31 


   48 


   31 













   21 




   18 

   19 
















    8 





    7 


    3 


    1 



    4 













    6 
    7 

















   13 







    2 

   12 






    2 





    3 


    5 


    2 
   12 


    8 

    2 

    6 

    7 
















   16 


   15 

    5 
    3 



   13 


   12 
    2 




    2 
   14 


   10 


    6 

    1 





    9 







    2 






    2 




    2 







    3 
    3 


















    8 
















   16 








   48 




   36 


   37 









   39 

   38 


   34 

   31 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
/*        $NetBSD: sys_descrip.c,v 1.40 2022/04/16 07:59:02 hannken Exp $        */

/*-
 * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_descrip.c        8.8 (Berkeley) 2/14/95
 */

/*
 * System calls on descriptors.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.40 2022/04/16 07:59:02 hannken Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/namei.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/kmem.h>
#include <sys/pool.h>
#include <sys/syslog.h>
#include <sys/unistd.h>
#include <sys/resourcevar.h>
#include <sys/conf.h>
#include <sys/event.h>
#include <sys/kauth.h>
#include <sys/atomic.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>

#include <uvm/uvm_readahead.h>

/*
 * Duplicate a file descriptor.
 */
int
sys_dup(struct lwp *l, const struct sys_dup_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)        fd;
        } */
        int error, newfd, oldfd;
        file_t *fp;

        oldfd = SCARG(uap, fd);

        if ((fp = fd_getfile(oldfd)) == NULL) {
                return EBADF;
        }
        error = fd_dup(fp, 0, &newfd, false);
        fd_putfile(oldfd);
        *retval = newfd;
        return error;
}

/*
 * Duplicate a file descriptor to a particular value.
 */
int
dodup(struct lwp *l, int from, int to, int flags, register_t *retval)
{
        int error;
        file_t *fp;

        if ((fp = fd_getfile(from)) == NULL)
                return EBADF;
        mutex_enter(&fp->f_lock);
        fp->f_count++;
        mutex_exit(&fp->f_lock);
        fd_putfile(from);

        if ((u_int)to >= curproc->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
            (u_int)to >= maxfiles)
                error = EBADF;
        else if (from == to)
                error = 0;
        else
                error = fd_dup2(fp, to, flags);
        closef(fp);
        *retval = to;

        return error;
}

int
sys_dup3(struct lwp *l, const struct sys_dup3_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)        from;
                syscallarg(int)        to;
                syscallarg(int)        flags;
        } */
        return dodup(l, SCARG(uap, from), SCARG(uap, to), SCARG(uap, flags),
            retval);
}

int
sys_dup2(struct lwp *l, const struct sys_dup2_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)        from;
                syscallarg(int)        to;
        } */
        return dodup(l, SCARG(uap, from), SCARG(uap, to), 0, retval);
}

/*
 * fcntl call which is being passed to the file's fs.
 */
static int
fcntl_forfs(int fd, file_t *fp, int cmd, void *arg)
{
        int                error;
        u_int                size;
        void                *data, *memp;
#define STK_PARAMS        128
        char                stkbuf[STK_PARAMS];

        if ((fp->f_flag & (FREAD | FWRITE)) == 0)
                return (EBADF);

        /*
         * Interpret high order word to find amount of data to be
         * copied to/from the user's address space.
         */
        size = (size_t)F_PARAM_LEN(cmd);
        if (size > F_PARAM_MAX)
                return (EINVAL);
        memp = NULL;
        if (size > sizeof(stkbuf)) {
                memp = kmem_alloc(size, KM_SLEEP);
                data = memp;
        } else
                data = stkbuf;
        if (cmd & F_FSIN) {
                if (size) {
                        error = copyin(arg, data, size);
                        if (error) {
                                if (memp)
                                        kmem_free(memp, size);
                                return (error);
                        }
                } else
                        *(void **)data = arg;
        } else if ((cmd & F_FSOUT) != 0 && size != 0) {
                /*
                 * Zero the buffer so the user always
                 * gets back something deterministic.
                 */
                memset(data, 0, size);
        } else if (cmd & F_FSVOID)
                *(void **)data = arg;


        error = (*fp->f_ops->fo_fcntl)(fp, cmd, data);

        /*
         * Copy any data to user, size was
         * already set and checked above.
         */
        if (error == 0 && (cmd & F_FSOUT) && size)
                error = copyout(data, arg, size);
        if (memp)
                kmem_free(memp, size);
        return (error);
}

int
do_fcntl_lock(int fd, int cmd, struct flock *fl)
{
        file_t *fp;
        vnode_t *vp;
        proc_t *p;
        int error, flg;

        if ((error = fd_getvnode(fd, &fp)) != 0)
                return error;

        vp = fp->f_vnode;
        if (fl->l_whence == SEEK_CUR) {
                vn_lock(vp, LK_SHARED | LK_RETRY);
                fl->l_start += fp->f_offset;
                VOP_UNLOCK(vp);
        }

        flg = F_POSIX;
        p = curproc;

        switch (cmd) {
        case F_SETLKW:
                flg |= F_WAIT;
                /* Fall into F_SETLK */

                /* FALLTHROUGH */
        case F_SETLK:
                switch (fl->l_type) {
                case F_RDLCK:
                        if ((fp->f_flag & FREAD) == 0) {
                                error = EBADF;
                                break;
                        }
                        if ((p->p_flag & PK_ADVLOCK) == 0) {
                                mutex_enter(p->p_lock);
                                p->p_flag |= PK_ADVLOCK;
                                mutex_exit(p->p_lock);
                        }
                        error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg);
                        break;

                case F_WRLCK:
                        if ((fp->f_flag & FWRITE) == 0) {
                                error = EBADF;
                                break;
                        }
                        if ((p->p_flag & PK_ADVLOCK) == 0) {
                                mutex_enter(p->p_lock);
                                p->p_flag |= PK_ADVLOCK;
                                mutex_exit(p->p_lock);
                        }
                        error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg);
                        break;

                case F_UNLCK:
                        error = VOP_ADVLOCK(vp, p, F_UNLCK, fl, F_POSIX);
                        break;

                default:
                        error = EINVAL;
                        break;
                }
                break;

        case F_GETLK:
                if (fl->l_type != F_RDLCK &&
                    fl->l_type != F_WRLCK &&
                    fl->l_type != F_UNLCK) {
                        error = EINVAL;
                        break;
                }
                error = VOP_ADVLOCK(vp, p, F_GETLK, fl, F_POSIX);
                break;

        default:
                error = EINVAL;
                break;
        }

        fd_putfile(fd);
        return error;
}

static int
do_fcntl_getpath(struct lwp *l, file_t *fp, char *upath)
{
        char *kpath;
        int error;

        if (fp->f_type != DTYPE_VNODE)
                return EOPNOTSUPP;

        kpath = PNBUF_GET();

        error = vnode_to_path(kpath, MAXPATHLEN, fp->f_vnode, l, l->l_proc);
        if (!error)
                error = copyoutstr(kpath, upath, MAXPATHLEN, NULL);

        PNBUF_PUT(kpath);

        return error;
}
        
/*
 * The file control system call.
 */
int
sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)                fd;
                syscallarg(int)                cmd;
                syscallarg(void *)        arg;
        } */
        int fd, i, tmp, error, cmd, newmin;
        filedesc_t *fdp;
        fdtab_t *dt;
        file_t *fp;
        struct flock fl;
        bool cloexec = false;

        fd = SCARG(uap, fd);
        cmd = SCARG(uap, cmd);
        fdp = l->l_fd;
        error = 0;

        switch (cmd) {
        case F_CLOSEM:
                if (fd < 0)
                        return EBADF;
                while ((i = fdp->fd_lastfile) >= fd) {
                        if (fd_getfile(i) == NULL) {
                                /* Another thread has updated. */
                                continue;
                        }
                        fd_close(i);
                }
                return 0;

        case F_MAXFD:
                *retval = fdp->fd_lastfile;
                return 0;

        case F_SETLKW:
        case F_SETLK:
        case F_GETLK:
                error = copyin(SCARG(uap, arg), &fl, sizeof(fl));
                if (error)
                        return error;
                error = do_fcntl_lock(fd, cmd, &fl);
                if (cmd == F_GETLK && error == 0)
                        error = copyout(&fl, SCARG(uap, arg), sizeof(fl));
                return error;

        default:
                /* Handled below */
                break;
        }

        if ((fp = fd_getfile(fd)) == NULL)
                return EBADF;

        if ((cmd & F_FSCTL)) {
                error = fcntl_forfs(fd, fp, cmd, SCARG(uap, arg));
                fd_putfile(fd);
                return error;
        }

        switch (cmd) {
        case F_DUPFD_CLOEXEC:
                cloexec = true;
                /*FALLTHROUGH*/
        case F_DUPFD:
                newmin = (long)SCARG(uap, arg);
                if ((u_int)newmin >=
                    l->l_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
                    (u_int)newmin >= maxfiles) {
                        fd_putfile(fd);
                        return EINVAL;
                }
                error = fd_dup(fp, newmin, &i, cloexec);
                *retval = i;
                break;

        case F_GETFD:
                dt = atomic_load_consume(&fdp->fd_dt);
                *retval = dt->dt_ff[fd]->ff_exclose;
                break;

        case F_SETFD:
                fd_set_exclose(l, fd,
                    ((long)SCARG(uap, arg) & FD_CLOEXEC) != 0);
                break;

        case F_GETNOSIGPIPE:
                *retval = (fp->f_flag & FNOSIGPIPE) != 0;
                break;

        case F_SETNOSIGPIPE:
                if (SCARG(uap, arg))
                        atomic_or_uint(&fp->f_flag, FNOSIGPIPE);
                else
                        atomic_and_uint(&fp->f_flag, ~FNOSIGPIPE);
                *retval = 0;
                break;

        case F_GETFL:
                *retval = OFLAGS(fp->f_flag);
                break;

        case F_SETFL:
                /* XXX not guaranteed to be atomic. */
                tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
                error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp);
                if (error)
                        break;
                i = tmp ^ fp->f_flag;
                if (i & FNONBLOCK) {
                        int flgs = tmp & FNONBLOCK;
                        error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs);
                        if (error) {
                                (*fp->f_ops->fo_fcntl)(fp, F_SETFL,
                                    &fp->f_flag);
                                break;
                        }
                }
                if (i & FASYNC) {
                        int flgs = tmp & FASYNC;
                        error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs);
                        if (error) {
                                if (i & FNONBLOCK) {
                                        tmp = fp->f_flag & FNONBLOCK;
                                        (void)(*fp->f_ops->fo_ioctl)(fp,
                                                FIONBIO, &tmp);
                                }
                                (*fp->f_ops->fo_fcntl)(fp, F_SETFL,
                                    &fp->f_flag);
                                break;
                        }
                }
                fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp;
                break;

        case F_GETOWN:
                error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp);
                *retval = tmp;
                break;

        case F_SETOWN:
                tmp = (int)(uintptr_t) SCARG(uap, arg);
                error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp);
                break;

        case F_GETPATH:
                error = do_fcntl_getpath(l, fp, SCARG(uap, arg));
                break;

        default:
                error = EINVAL;
        }

        fd_putfile(fd);
        return (error);
}

/*
 * Close a file descriptor.
 */
int
sys_close(struct lwp *l, const struct sys_close_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)        fd;
        } */
        int error;
        int fd = SCARG(uap, fd);

        if (fd_getfile(fd) == NULL) {
                return EBADF;
        }

        error = fd_close(fd);
        if (error == ERESTART) {
#ifdef DIAGNOSTIC
                printf("%s[%d]: close(%d) returned ERESTART\n",
                    l->l_proc->p_comm, (int)l->l_proc->p_pid, fd);
#endif
                error = EINTR;
        }

        return error;
}

/*
 * Return status information about a file descriptor.
 * Common function for compat code.
 */
int
do_sys_fstat(int fd, struct stat *sb)
{
        file_t *fp;
        int error;

        if ((fp = fd_getfile(fd)) == NULL) {
                return EBADF;
        }
        error = (*fp->f_ops->fo_stat)(fp, sb);
        fd_putfile(fd);

        return error;
}

/*
 * Return status information about a file descriptor.
 */
int
sys___fstat50(struct lwp *l, const struct sys___fstat50_args *uap,
              register_t *retval)
{
        /* {
                syscallarg(int)                        fd;
                syscallarg(struct stat *)        sb;
        } */
        struct stat sb;
        int error;

        error = do_sys_fstat(SCARG(uap, fd), &sb);
        if (error == 0) {
                error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
        }
        return error;
}

/*
 * Return pathconf information about a file descriptor.
 */
int
sys_fpathconf(struct lwp *l, const struct sys_fpathconf_args *uap,
              register_t *retval)
{
        /* {
                syscallarg(int)        fd;
                syscallarg(int)        name;
        } */
        int fd, error;
        file_t *fp;

        fd = SCARG(uap, fd);
        error = 0;

        if ((fp = fd_getfile(fd)) == NULL) {
                return (EBADF);
        }
        switch (fp->f_type) {
        case DTYPE_SOCKET:
        case DTYPE_PIPE:
                if (SCARG(uap, name) != _PC_PIPE_BUF)
                        error = EINVAL;
                else
                        *retval = PIPE_BUF;
                break;

        case DTYPE_VNODE:
                vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
                error = VOP_PATHCONF(fp->f_vnode, SCARG(uap, name), retval);
                VOP_UNLOCK(fp->f_vnode);
                break;

        case DTYPE_KQUEUE:
                error = EINVAL;
                break;

        default:
                error = EOPNOTSUPP;
                break;
        }

        fd_putfile(fd);
        return (error);
}

/*
 * Apply an advisory lock on a file descriptor.
 *
 * Just attempt to get a record lock of the requested type on
 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
 */
/* ARGSUSED */
int
sys_flock(struct lwp *l, const struct sys_flock_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)        fd;
                syscallarg(int)        how;
        } */
        int fd, how, error;
        file_t *fp;
        vnode_t        *vp;
        struct flock lf;

        fd = SCARG(uap, fd);
        how = SCARG(uap, how);

        if ((error = fd_getvnode(fd, &fp)) != 0)
                return error == EINVAL ? EOPNOTSUPP : error;

        vp = fp->f_vnode;
        lf.l_whence = SEEK_SET;
        lf.l_start = 0;
        lf.l_len = 0;

        switch (how & ~LOCK_NB) {
        case LOCK_UN:
                lf.l_type = F_UNLCK;
                atomic_and_uint(&fp->f_flag, ~FHASLOCK);
                error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
                fd_putfile(fd);
                return error;
        case LOCK_EX:
                lf.l_type = F_WRLCK;
                break;
        case LOCK_SH:
                lf.l_type = F_RDLCK;
                break;
        default:
                fd_putfile(fd);
                return EINVAL;
        }

        atomic_or_uint(&fp->f_flag, FHASLOCK);
        if (how & LOCK_NB) {
                error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK);
        } else {
                error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK|F_WAIT);
        }
        fd_putfile(fd);
        return error;
}

int
do_posix_fadvise(int fd, off_t offset, off_t len, int advice)
{
        const off_t OFF_MAX = __type_max(off_t);
        file_t *fp;
        vnode_t *vp;
        off_t endoffset;
        int error;

        CTASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL);
        CTASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM);
        CTASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL);

        if (offset < 0) {
                return EINVAL;
        }
        if (len == 0) {
                endoffset = OFF_MAX;
        } else if (len > 0 && (OFF_MAX - offset) >= len) {
                endoffset = offset + len;
        } else {
                return EINVAL;
        }
        if ((fp = fd_getfile(fd)) == NULL) {
                return EBADF;
        }
        if (fp->f_type != DTYPE_VNODE) {
                if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
                        error = ESPIPE;
                } else {
                        error = EOPNOTSUPP;
                }
                fd_putfile(fd);
                return error;
        }

        switch (advice) {
        case POSIX_FADV_WILLNEED:
        case POSIX_FADV_DONTNEED:
                vp = fp->f_vnode;
                if (vp->v_type != VREG && vp->v_type != VBLK) {
                        fd_putfile(fd);
                        return 0;
                }
                break;
        }

        switch (advice) {
        case POSIX_FADV_NORMAL:
        case POSIX_FADV_RANDOM:
        case POSIX_FADV_SEQUENTIAL:
                /*
                 * We ignore offset and size.  Must lock the file to
                 * do this, as f_advice is sub-word sized.
                 */
                mutex_enter(&fp->f_lock);
                fp->f_advice = (u_char)advice;
                mutex_exit(&fp->f_lock);
                error = 0;
                break;

        case POSIX_FADV_WILLNEED:
                vp = fp->f_vnode;
                error = uvm_readahead(&vp->v_uobj, offset, endoffset - offset);
                break;

        case POSIX_FADV_DONTNEED:
                vp = fp->f_vnode;
                /*
                 * Align the region to page boundaries as VOP_PUTPAGES expects
                 * by shrinking it.  We shrink instead of expand because we
                 * do not want to deactivate cache outside of the requested
                 * region.  It means that if the specified region is smaller
                 * than PAGE_SIZE, we do nothing.
                 */
                if (offset <= trunc_page(OFF_MAX) &&
                    round_page(offset) < trunc_page(endoffset)) {
                        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
                        error = VOP_PUTPAGES(vp,
                            round_page(offset), trunc_page(endoffset),
                            PGO_DEACTIVATE | PGO_CLEANIT);
                } else {
                        error = 0;
                }
                break;

        case POSIX_FADV_NOREUSE:
                /* Not implemented yet. */
                error = 0;
                break;
        default:
                error = EINVAL;
                break;
        }

        fd_putfile(fd);
        return error;
}

int
sys___posix_fadvise50(struct lwp *l,
                      const struct sys___posix_fadvise50_args *uap,
                      register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(int) pad;
                syscallarg(off_t) offset;
                syscallarg(off_t) len;
                syscallarg(int) advice;
        } */

        *retval = do_posix_fadvise(SCARG(uap, fd), SCARG(uap, offset),
            SCARG(uap, len), SCARG(uap, advice));

        return 0;
}

int
sys_pipe(struct lwp *l, const void *v, register_t *retval)
{
        int fd[2], error;

        if ((error = pipe1(l, fd, 0)) != 0)
                return error;

        retval[0] = fd[0];
        retval[1] = fd[1];

        return 0;
}

int
sys_pipe2(struct lwp *l, const struct sys_pipe2_args *uap, register_t *retval)
{
        /* {
                syscallarg(int[2]) fildes;
                syscallarg(int) flags;
        } */
        int fd[2], error;

        if ((error = pipe1(l, fd, SCARG(uap, flags))) != 0)
                return error;

        if ((error = copyout(fd, SCARG(uap, fildes), sizeof(fd))) != 0)
                return error;
        retval[0] = 0;
        return 0;
}





























































































































    8 










    4 



    1 
















    8 

    8 






    8 







    4 







    2 





















   16 





    4 











   14 
   14 











   14 













   14 




    6 



    6 



    6 
    4 


   11 

   13 










    7 






    1 




    6 


    7 













    6 



    1 




    5 




    4 




    4 



    5 

    6 










   11 







    4 
    1 


    8 










   12 








   11 










   10 
    8 
    8 


    9 














    8 











    4 
    3 











    7 
    7 
    7 



    2 








    5 





    6 

    8 














   20 
   21 





   16 

   16 




    6 
   13 











   12 



   20 







   13 




   13 
    7 


    3 

    6 








    9 
    3 



    7 




   10 







    3 























    1 











    1 





    1 














    7 











   13 










    3 


   10 






    9 



    4 
    5 

    2 




    6 
   13 
    4 

















   10 
    7 

   10 




    6 


    4 
   10 





    1 




    5 





    1 













    7 






    4 




    1 



    6 
    6 

    1 





    7 











    5 





    5 

    4 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
/*        $NetBSD: sys_lwp.c,v 1.83 2022/06/29 22:27:01 riastradh Exp $        */

/*-
 * Copyright (c) 2001, 2006, 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Nathan J. Williams, and Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Lightweight process (LWP) system calls.  See kern_lwp.c for a description
 * of LWPs.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.83 2022/06/29 22:27:01 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/pool.h>
#include <sys/proc.h>
#include <sys/types.h>
#include <sys/syscallargs.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/ptrace.h>
#include <sys/sleepq.h>
#include <sys/lwpctl.h>
#include <sys/cpu.h>
#include <sys/pserialize.h>

#include <uvm/uvm_extern.h>

#define        LWP_UNPARK_MAX                1024

static const stack_t lwp_ss_init = SS_INIT;

syncobj_t lwp_park_syncobj = {
        .sobj_flag        = SOBJ_SLEEPQ_NULL,
        .sobj_unsleep        = sleepq_unsleep,
        .sobj_changepri        = sleepq_changepri,
        .sobj_lendpri        = sleepq_lendpri,
        .sobj_owner        = syncobj_noowner,
};

static void
mi_startlwp(void *arg)
{
        struct lwp *l = curlwp;
        struct proc *p = l->l_proc;

        (p->p_emul->e_startlwp)(arg);

        /* If the process is traced, report lwp creation to a debugger */
        if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_CREATE)) ==
            (PSL_TRACED|PSL_TRACELWP_CREATE)) {
                /* Paranoid check */
                mutex_enter(&proc_lock);
                if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_CREATE)) !=
                    (PSL_TRACED|PSL_TRACELWP_CREATE)) { 
                        mutex_exit(&proc_lock);
                        return;
                }

                mutex_enter(p->p_lock);
                eventswitch(TRAP_LWP, PTRACE_LWP_CREATE, l->l_lid);
        }
}

int
do_lwp_create(lwp_t *l, void *arg, u_long flags, lwp_t **l2,
    const sigset_t *sigmask, const stack_t *sigstk)
{
        struct proc *p = l->l_proc;
        vaddr_t uaddr;
        int error;

        /* XXX check against resource limits */

        uaddr = uvm_uarea_alloc();
        if (__predict_false(uaddr == 0))
                return ENOMEM;

        error = lwp_create(l, p, uaddr, flags & LWP_DETACHED, NULL, 0,
            mi_startlwp, arg, l2, l->l_class, sigmask, &lwp_ss_init);
        if (__predict_false(error)) {
                uvm_uarea_free(uaddr);
                return error;
        }

        return 0;
}

int
sys__lwp_create(struct lwp *l, const struct sys__lwp_create_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const ucontext_t *) ucp;
                syscallarg(u_long) flags;
                syscallarg(lwpid_t *) new_lwp;
        } */
        struct proc *p = l->l_proc;
        ucontext_t *newuc;
        lwp_t *l2;
        int error;

        newuc = kmem_alloc(sizeof(ucontext_t), KM_SLEEP);
        error = copyin(SCARG(uap, ucp), newuc, p->p_emul->e_ucsize);
        if (error)
                goto fail;

        /* validate the ucontext */
        if ((newuc->uc_flags & _UC_CPU) == 0) {
                error = EINVAL;
                goto fail;
        }
        error = cpu_mcontext_validate(l, &newuc->uc_mcontext);
        if (error)
                goto fail;

        const sigset_t *sigmask = newuc->uc_flags & _UC_SIGMASK ?
            &newuc->uc_sigmask : &l->l_sigmask;
        error = do_lwp_create(l, newuc, SCARG(uap, flags), &l2, sigmask,
            &SS_INIT);
        if (error)
                goto fail;

        error = copyout(&l2->l_lid, SCARG(uap, new_lwp), sizeof(l2->l_lid));
        if (error == 0) {
                lwp_start(l2, SCARG(uap, flags));
                return 0;
        }
        lwp_exit(l2);
 fail:
        kmem_free(newuc, sizeof(ucontext_t));
        return error;
}

int
sys__lwp_exit(struct lwp *l, const void *v, register_t *retval)
{

        lwp_exit(l);
        return 0;
}

int
sys__lwp_self(struct lwp *l, const void *v, register_t *retval)
{

        *retval = l->l_lid;
        return 0;
}

int
sys__lwp_getprivate(struct lwp *l, const void *v, register_t *retval)
{

        *retval = (uintptr_t)l->l_private;
        return 0;
}

int
sys__lwp_setprivate(struct lwp *l, const struct sys__lwp_setprivate_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(void *) ptr;
        } */

        return lwp_setprivate(l, SCARG(uap, ptr));
}

int
sys__lwp_suspend(struct lwp *l, const struct sys__lwp_suspend_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(lwpid_t) target;
        } */
        struct proc *p = l->l_proc;
        struct lwp *t;
        int error;

        mutex_enter(p->p_lock);
        if ((t = lwp_find(p, SCARG(uap, target))) == NULL) {
                mutex_exit(p->p_lock);
                return ESRCH;
        }

        /*
         * Check for deadlock, which is only possible when we're suspending
         * ourself.  XXX There is a short race here, as p_nrlwps is only
         * incremented when an LWP suspends itself on the kernel/user
         * boundary.  It's still possible to kill -9 the process so we
         * don't bother checking further.
         */
        lwp_lock(t);
        if ((t == l && p->p_nrlwps == 1) ||
            (l->l_flag & (LW_WCORE | LW_WEXIT)) != 0) {
                lwp_unlock(t);
                mutex_exit(p->p_lock);
                return EDEADLK;
        }

        /*
         * Suspend the LWP.  XXX If it's on a different CPU, we should wait
         * for it to be preempted, where it will put itself to sleep. 
         *
         * Suspension of the current LWP will happen on return to userspace.
         */
        error = lwp_suspend(l, t);
        if (error) {
                mutex_exit(p->p_lock);
                return error;
        }

        /*
         * Wait for:
         *  o process exiting
         *  o target LWP suspended
         *  o target LWP not suspended and L_WSUSPEND clear
         *  o target LWP exited
         */
        for (;;) {
                error = cv_wait_sig(&p->p_lwpcv, p->p_lock);
                if (error) {
                        error = ERESTART;
                        break;
                }
                if (lwp_find(p, SCARG(uap, target)) == NULL) {
                        error = ESRCH;
                        break;
                }
                if ((l->l_flag | t->l_flag) & (LW_WCORE | LW_WEXIT)) {
                        error = ERESTART;
                        break;
                }
                if (t->l_stat == LSSUSPENDED ||
                    (t->l_flag & LW_WSUSPEND) == 0)
                        break;
        }
        mutex_exit(p->p_lock);

        return error;
}

int
sys__lwp_continue(struct lwp *l, const struct sys__lwp_continue_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(lwpid_t) target;
        } */
        int error;
        struct proc *p = l->l_proc;
        struct lwp *t;

        error = 0;

        mutex_enter(p->p_lock);
        if ((t = lwp_find(p, SCARG(uap, target))) == NULL) {
                mutex_exit(p->p_lock);
                return ESRCH;
        }

        lwp_lock(t);
        lwp_continue(t);
        mutex_exit(p->p_lock);

        return error;
}

int
sys__lwp_wakeup(struct lwp *l, const struct sys__lwp_wakeup_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(lwpid_t) target;
        } */
        struct lwp *t;
        struct proc *p;
        int error;

        p = l->l_proc;
        mutex_enter(p->p_lock);

        if ((t = lwp_find(p, SCARG(uap, target))) == NULL) {
                mutex_exit(p->p_lock);
                return ESRCH;
        }

        lwp_lock(t);
        t->l_flag |= (LW_CANCELLED | LW_UNPARKED);

        if (t->l_stat != LSSLEEP) {
                lwp_unlock(t);
                error = ENODEV;
        } else if ((t->l_flag & LW_SINTR) == 0) {
                lwp_unlock(t);
                error = EBUSY;
        } else {
                /* Wake it up.  lwp_unsleep() will release the LWP lock. */
                lwp_unsleep(t, true);
                error = 0;
        }

        mutex_exit(p->p_lock);

        return error;
}

int
sys__lwp_wait(struct lwp *l, const struct sys__lwp_wait_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(lwpid_t) wait_for;
                syscallarg(lwpid_t *) departed;
        } */
        struct proc *p = l->l_proc;
        int error;
        lwpid_t dep;

        mutex_enter(p->p_lock);
        error = lwp_wait(l, SCARG(uap, wait_for), &dep, false);
        mutex_exit(p->p_lock);

        if (!error && SCARG(uap, departed)) {
                error = copyout(&dep, SCARG(uap, departed), sizeof(dep));
        }

        return error;
}

int
sys__lwp_kill(struct lwp *l, const struct sys__lwp_kill_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(lwpid_t)        target;
                syscallarg(int)                signo;
        } */
        struct proc *p = l->l_proc;
        struct lwp *t;
        ksiginfo_t ksi;
        int signo = SCARG(uap, signo);
        int error = 0;

        if ((u_int)signo >= NSIG)
                return EINVAL;

        KSI_INIT(&ksi);
        ksi.ksi_signo = signo;
        ksi.ksi_code = SI_LWP;
        ksi.ksi_pid = p->p_pid;
        ksi.ksi_uid = kauth_cred_geteuid(l->l_cred);
        ksi.ksi_lid = SCARG(uap, target);

        mutex_enter(&proc_lock);
        mutex_enter(p->p_lock);
        if ((t = lwp_find(p, ksi.ksi_lid)) == NULL)
                error = ESRCH;
        else if (signo != 0)
                kpsignal2(p, &ksi);
        mutex_exit(p->p_lock);
        mutex_exit(&proc_lock);

        return error;
}

int
sys__lwp_detach(struct lwp *l, const struct sys__lwp_detach_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(lwpid_t)        target;
        } */
        struct proc *p;
        struct lwp *t;
        lwpid_t target;
        int error;

        target = SCARG(uap, target);
        p = l->l_proc;

        mutex_enter(p->p_lock);

        if (l->l_lid == target)
                t = l;
        else {
                /*
                 * We can't use lwp_find() here because the target might
                 * be a zombie.
                 */
                t = proc_find_lwp(p, target);
                KASSERT(t == NULL || t->l_lid == target);
        }

        /*
         * If the LWP is already detached, there's nothing to do.
         * If it's a zombie, we need to clean up after it.  LSZOMB
         * is visible with the proc mutex held.
         *
         * After we have detached or released the LWP, kick any
         * other LWPs that may be sitting in _lwp_wait(), waiting
         * for the target LWP to exit.
         */
        if (t != NULL && t->l_stat != LSIDL) {
                if ((t->l_prflag & LPR_DETACHED) == 0) {
                        p->p_ndlwps++;
                        t->l_prflag |= LPR_DETACHED;
                        if (t->l_stat == LSZOMB) {
                                /* Releases proc mutex. */
                                lwp_free(t, false, false);
                                return 0;
                        }
                        error = 0;

                        /*
                         * Have any LWPs sleeping in lwp_wait() recheck
                         * for deadlock.
                         */
                        cv_broadcast(&p->p_lwpcv);
                } else
                        error = EINVAL;
        } else
                error = ESRCH;

        mutex_exit(p->p_lock);

        return error;
}

int
lwp_unpark(const lwpid_t *tp, const u_int ntargets)
{
        u_int target;
        int error, s;
        proc_t *p;
        lwp_t *t;

        p = curproc;
        error = 0;

        s = pserialize_read_enter();
        for (target = 0; target < ntargets; target++) {
                t = proc_find_lwp_unlocked(p, tp[target]);
                if (__predict_false(t == NULL)) {
                        error = ESRCH;
                        continue;
                }

                KASSERT(lwp_locked(t, NULL));

                if (__predict_true(t->l_syncobj == &lwp_park_syncobj)) {
                        /*
                         * As expected it's parked, so wake it up. 
                         * lwp_unsleep() will release the LWP lock.
                         */
                        lwp_unsleep(t, true);
                } else if (__predict_false(t->l_stat == LSZOMB)) {
                        lwp_unlock(t);
                        error = ESRCH;
                } else {
                        /*
                         * It hasn't parked yet because the wakeup side won
                         * the race, or something else has happened to make
                         * the thread not park.  Why doesn't really matter. 
                         * Set the operation pending, so that the next call
                         * to _lwp_park() in the LWP returns early.  If it
                         * turns out to be a spurious wakeup, no harm done.
                         */
                        t->l_flag |= LW_UNPARKED;
                        lwp_unlock(t);
                }
        }
        pserialize_read_exit(s);

        return error;
}

int
lwp_park(clockid_t clock_id, int flags, struct timespec *ts)
{
        int timo, error;
        struct timespec start;
        lwp_t *l;
        bool timeremain = !(flags & TIMER_ABSTIME) && ts;

        if (ts != NULL) {
                if ((error = ts2timo(clock_id, flags, ts, &timo, 
                    timeremain ? &start : NULL)) != 0)
                        return error;
                KASSERT(timo != 0);
        } else {
                timo = 0;
        }

        /*
         * Before going the full route and blocking, check to see if an
         * unpark op is pending.
         */
        l = curlwp;
        lwp_lock(l);
        if ((l->l_flag & (LW_CANCELLED | LW_UNPARKED)) != 0) {
                l->l_flag &= ~(LW_CANCELLED | LW_UNPARKED);
                lwp_unlock(l);
                return EALREADY;
        }
        l->l_biglocks = 0;
        sleepq_enqueue(NULL, l, "parked", &lwp_park_syncobj, true);
        error = sleepq_block(timo, true, &lwp_park_syncobj);
        switch (error) {
        case EWOULDBLOCK:
                error = ETIMEDOUT;
                if (timeremain)
                        memset(ts, 0, sizeof(*ts));
                break;
        case ERESTART:
                error = EINTR;
                /*FALLTHROUGH*/
        default:
                if (timeremain)
                        clock_timeleft(clock_id, ts, &start);
                break;
        }
        return error;
}

/*
 * 'park' an LWP waiting on a user-level synchronisation object.  The LWP
 * will remain parked until another LWP in the same process calls in and
 * requests that it be unparked.
 */
int
sys____lwp_park60(struct lwp *l, const struct sys____lwp_park60_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(clockid_t)                        clock_id;
                syscallarg(int)                                flags;
                syscallarg(struct timespec *)                ts;
                syscallarg(lwpid_t)                        unpark;
                syscallarg(const void *)                hint;
                syscallarg(const void *)                unparkhint;
        } */
        struct timespec ts, *tsp;
        int error;

        if (SCARG(uap, ts) == NULL)
                tsp = NULL;
        else {
                error = copyin(SCARG(uap, ts), &ts, sizeof(ts));
                if (error != 0)
                        return error;
                tsp = &ts;
        }

        if (SCARG(uap, unpark) != 0) {
                error = lwp_unpark(&SCARG(uap, unpark), 1);
                if (error != 0)
                        return error;
        }

        error = lwp_park(SCARG(uap, clock_id), SCARG(uap, flags), tsp);
        if (SCARG(uap, ts) != NULL && (SCARG(uap, flags) & TIMER_ABSTIME) == 0)
                (void)copyout(tsp, SCARG(uap, ts), sizeof(*tsp));
        return error;
}

int
sys__lwp_unpark(struct lwp *l, const struct sys__lwp_unpark_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(lwpid_t)                target;
                syscallarg(const void *)        hint;
        } */

        return lwp_unpark(&SCARG(uap, target), 1);
}

int
sys__lwp_unpark_all(struct lwp *l, const struct sys__lwp_unpark_all_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const lwpid_t *)        targets;
                syscallarg(size_t)                ntargets;
                syscallarg(const void *)        hint;
        } */
        lwpid_t targets[32], *tp;
        int error;
        u_int ntargets;
        size_t sz;

        ntargets = SCARG(uap, ntargets);
        if (SCARG(uap, targets) == NULL) {
                /*
                 * Let the caller know how much we are willing to do, and
                 * let it unpark the LWPs in blocks.
                 */
                *retval = LWP_UNPARK_MAX;
                return 0;
        }
        if (ntargets > LWP_UNPARK_MAX || ntargets == 0)
                return EINVAL;

        /*
         * Copy in the target array.  If it's a small number of LWPs, then
         * place the numbers on the stack.
         */
        sz = sizeof(lwpid_t) * ntargets;
        if (sz <= sizeof(targets))
                tp = targets;
        else
                tp = kmem_alloc(sz, KM_SLEEP);
        error = copyin(SCARG(uap, targets), tp, sz);
        if (error != 0) {
                if (tp != targets) {
                        kmem_free(tp, sz);
                }
                return error;
        }
        error = lwp_unpark(tp, ntargets);
        if (tp != targets)
                kmem_free(tp, sz);
        return error;
}

int
sys__lwp_setname(struct lwp *l, const struct sys__lwp_setname_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(lwpid_t)                target;
                syscallarg(const char *)        name;
        } */
        char *name, *oname;
        lwpid_t target;
        proc_t *p;
        lwp_t *t;
        int error;

        if ((target = SCARG(uap, target)) == 0)
                target = l->l_lid;

        name = kmem_alloc(MAXCOMLEN, KM_SLEEP);
        error = copyinstr(SCARG(uap, name), name, MAXCOMLEN, NULL);
        switch (error) {
        case ENAMETOOLONG:
        case 0:
                name[MAXCOMLEN - 1] = '\0';
                break;
        default:
                kmem_free(name, MAXCOMLEN);
                return error;
        }

        p = curproc;
        mutex_enter(p->p_lock);
        if ((t = lwp_find(p, target)) == NULL) {
                mutex_exit(p->p_lock);
                kmem_free(name, MAXCOMLEN);
                return ESRCH;
        }
        lwp_lock(t);
        oname = t->l_name;
        t->l_name = name;
        lwp_unlock(t);
        mutex_exit(p->p_lock);

        if (oname != NULL)
                kmem_free(oname, MAXCOMLEN);

        return 0;
}

int
sys__lwp_getname(struct lwp *l, const struct sys__lwp_getname_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(lwpid_t)                target;
                syscallarg(char *)                name;
                syscallarg(size_t)                len;
        } */
        char name[MAXCOMLEN];
        lwpid_t target;
        size_t len;
        proc_t *p;
        lwp_t *t;

        if ((target = SCARG(uap, target)) == 0)
                target = l->l_lid;

        p = curproc;
        mutex_enter(p->p_lock);
        if ((t = lwp_find(p, target)) == NULL) {
                mutex_exit(p->p_lock);
                return ESRCH;
        }
        lwp_lock(t);
        if (t->l_name == NULL)
                name[0] = '\0';
        else
                strlcpy(name, t->l_name, sizeof(name));
        lwp_unlock(t);
        mutex_exit(p->p_lock);

        len = uimin(SCARG(uap, len), sizeof(name));

        return copyoutstr(name, SCARG(uap, name), len, NULL);
}

int
sys__lwp_ctl(struct lwp *l, const struct sys__lwp_ctl_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                        features;
                syscallarg(struct lwpctl **)        address;
        } */
        int error, features;
        vaddr_t vaddr;

        features = SCARG(uap, features);
        features &= ~(LWPCTL_FEATURE_CURCPU | LWPCTL_FEATURE_PCTR);
        if (features != 0)
                return ENODEV;
        if ((error = lwp_ctl_alloc(&vaddr)) != 0)
                return error;
        return copyout(&vaddr, SCARG(uap, address), sizeof(void *));
}











































































































  292 











   27 


   27 


  283 



  291 






  285 
  149 

  287 








  286 








  180 


  187 





   21 

















    2 





    2 




    2 
    2 






    2 







    2 






    2 








    2 








1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
/*        $NetBSD: kern_malloc.c,v 1.158 2019/11/14 16:23:52 maxv Exp $        */

/*
 * Copyright (c) 1987, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_malloc.c        8.4 (Berkeley) 5/20/95
 */

/*
 * Copyright (c) 1996 Christopher G. Demetriou.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_malloc.c        8.4 (Berkeley) 5/20/95
 */

/*
 * Wrapper interface for obsolete malloc(9).
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_malloc.c,v 1.158 2019/11/14 16:23:52 maxv Exp $");

#include <sys/param.h>
#include <sys/malloc.h>
#include <sys/kmem.h>
#include <sys/asan.h>
#include <sys/msan.h>

/*
 * Built-in malloc types.  Note: ought to be removed.
 */
MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory");
MALLOC_DEFINE(M_DMAMAP, "DMA map", "bus_dma(9) structures");
MALLOC_DEFINE(M_FREE, "free", "should be on free list");
MALLOC_DEFINE(M_TEMP, "temp", "misc. temporary data buffers");
MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
MALLOC_DEFINE(M_FTABLE, "fragtbl", "fragment reassembly header");
MALLOC_DEFINE(M_UFSMNT, "UFS mount", "UFS mount structure");
MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
MALLOC_DEFINE(M_MRTABLE, "mrt", "multicast routing tables");

/*
 * Header contains total size, including the header itself.
 */
struct malloc_header {
        size_t mh_size;
#ifdef KASAN
        size_t mh_rqsz;
#endif
} __aligned(ALIGNBYTES + 1);

void *
kern_malloc(unsigned long reqsize, int flags)
{
        const int kmflags = (flags & M_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
#ifdef KASAN
        const size_t origsize = reqsize;
#endif
        size_t size = reqsize;
        size_t allocsize, hdroffset;
        struct malloc_header *mh;
        void *p;

        kasan_add_redzone(&size);

        if (size >= PAGE_SIZE) {
                if (size > (ULONG_MAX-PAGE_SIZE))
                        allocsize = ULONG_MAX;        /* this will fail later */
                else
                        allocsize = PAGE_SIZE + size; /* for page alignment */
                hdroffset = PAGE_SIZE - sizeof(struct malloc_header);
        } else {
                allocsize = sizeof(struct malloc_header) + size;
                hdroffset = 0;
        }

        p = kmem_intr_alloc(allocsize, kmflags);
        if (p == NULL)
                return NULL;

        kmsan_mark(p, allocsize, KMSAN_STATE_UNINIT);
        kmsan_orig(p, allocsize, KMSAN_TYPE_MALLOC, __RET_ADDR);

        if ((flags & M_ZERO) != 0) {
                memset(p, 0, allocsize);
        }
        mh = (void *)((char *)p + hdroffset);
        mh->mh_size = allocsize - hdroffset;
#ifdef KASAN
        mh->mh_rqsz = origsize;
#endif
        mh++;

        kasan_mark(mh, origsize, size, KASAN_MALLOC_REDZONE);

        return mh;
}

void
kern_free(void *addr)
{
        struct malloc_header *mh;

        mh = addr;
        mh--;

        kasan_mark(addr, mh->mh_size - sizeof(struct malloc_header),
            mh->mh_size - sizeof(struct malloc_header), KASAN_MALLOC_REDZONE);

        if (mh->mh_size >= PAGE_SIZE + sizeof(struct malloc_header)) {
                kmsan_mark((char *)addr - PAGE_SIZE,
                    mh->mh_size + PAGE_SIZE - sizeof(struct malloc_header),
                    KMSAN_STATE_INITED);
                kmem_intr_free((char *)addr - PAGE_SIZE,
                    mh->mh_size + PAGE_SIZE - sizeof(struct malloc_header));
        } else {
                kmsan_mark(mh, mh->mh_size, KMSAN_STATE_INITED);
                kmem_intr_free(mh, mh->mh_size);
        }
}

void *
kern_realloc(void *curaddr, unsigned long newsize, int flags)
{
        struct malloc_header *mh;
        unsigned long cursize;
        void *newaddr;

        /*
         * realloc() with a NULL pointer is the same as malloc().
         */
        if (curaddr == NULL)
                return malloc(newsize, ksp, flags);

        /*
         * realloc() with zero size is the same as free().
         */
        if (newsize == 0) {
                free(curaddr, ksp);
                return NULL;
        }

        if ((flags & M_NOWAIT) == 0) {
                ASSERT_SLEEPABLE();
        }

        mh = curaddr;
        mh--;

#ifdef KASAN
        cursize = mh->mh_rqsz;
#else
        cursize = mh->mh_size - sizeof(struct malloc_header);
#endif

        /*
         * If we already actually have as much as they want, we're done.
         */
        if (newsize <= cursize)
                return curaddr;

        /*
         * Can't satisfy the allocation with the existing block.
         * Allocate a new one and copy the data.
         */
        newaddr = malloc(newsize, ksp, flags);
        if (__predict_false(newaddr == NULL)) {
                /*
                 * malloc() failed, because flags included M_NOWAIT.
                 * Return NULL to indicate that failure.  The old
                 * pointer is still valid.
                 */
                return NULL;
        }
        memcpy(newaddr, curaddr, cursize);

        /*
         * We were successful: free the old allocation and return
         * the new one.
         */
        free(curaddr, ksp);
        return newaddr;
}




















































































































































































































































































































































































































































































































































































































































































    1 









1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
/*        $NetBSD: in6_ifattach.c,v 1.120 2021/05/17 04:07:43 yamaguchi Exp $        */
/*        $KAME: in6_ifattach.c,v 1.124 2001/07/18 08:32:51 jinmei Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: in6_ifattach.c,v 1.120 2021/05/17 04:07:43 yamaguchi Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <sys/md5.h>
#include <sys/socketvar.h>
#include <sys/cprng.h>

#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/route.h>

#include <netinet/in.h>
#include <netinet/in_var.h>

#include <netinet/ip6.h>
#include <netinet6/in6_ifattach.h>
#include <netinet6/ip6_var.h>
#include <netinet6/nd6.h>
#include <netinet6/ip6_mroute.h>
#include <netinet6/scope6_var.h>

int ip6_auto_linklocal = 1;        /* enable by default */

#if 0
static int get_hostid_ifid(struct ifnet *, struct in6_addr *);
#endif
static int get_ifid(struct ifnet *, struct ifnet *, struct in6_addr *);
static int in6_ifattach_linklocal(struct ifnet *, struct ifnet *);
static int in6_ifattach_loopback(struct ifnet *);

#define EUI64_GBIT        0x01
#define EUI64_UBIT        0x02
#define EUI64_TO_IFID(in6)        do {(in6)->s6_addr[8] ^= EUI64_UBIT; } while (/*CONSTCOND*/ 0)
#define EUI64_GROUP(in6)        ((in6)->s6_addr[8] & EUI64_GBIT)
#define EUI64_INDIVIDUAL(in6)        (!EUI64_GROUP(in6))
#define EUI64_LOCAL(in6)        ((in6)->s6_addr[8] & EUI64_UBIT)
#define EUI64_UNIVERSAL(in6)        (!EUI64_LOCAL(in6))

#define IFID_LOCAL(in6)                (!EUI64_LOCAL(in6))
#define IFID_UNIVERSAL(in6)        (!EUI64_UNIVERSAL(in6))

#if 0
/*
 * Generate a last-resort interface identifier from hostid.
 * works only for certain architectures (like sparc).
 * also, using hostid itself may constitute a privacy threat, much worse
 * than MAC addresses (hostids are used for software licensing).
 * maybe we should use MD5(hostid) instead.
 *
 * in6 - upper 64bits are preserved
 */
static int
get_hostid_ifid(struct ifnet *ifp, struct in6_addr *in6)
{
        int off, len;
        static const uint8_t allzero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
        static const uint8_t allone[8] =
            { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };

        if (!hostid)
                return -1;

        /* get up to 8 bytes from the hostid field - should we get */
        len = (sizeof(hostid) > 8) ? 8 : sizeof(hostid);
        off = sizeof(*in6) - len;
        memcpy(&in6->s6_addr[off], &hostid, len);

        /* make sure we do not return anything bogus */
        if (memcmp(&in6->s6_addr[8], allzero, sizeof(allzero)))
                return -1;
        if (memcmp(&in6->s6_addr[8], allone, sizeof(allone)))
                return -1;

        /* make sure to set "u" bit to local, and "g" bit to individual. */
        in6->s6_addr[8] &= ~EUI64_GBIT;        /* g bit to "individual" */
        in6->s6_addr[8] |= EUI64_UBIT;        /* u bit to "local" */

        /* convert EUI64 into IPv6 interface identifier */
        EUI64_TO_IFID(in6);

        return 0;
}
#endif

/*
 * Generate a last-resort interface identifier, when the machine has no
 * IEEE802/EUI64 address sources.
 * The goal here is to get an interface identifier that is
 * (1) random enough and (2) does not change across reboot.
 * We currently use MD5(hostname) for it.
 */
static int
get_rand_ifid(struct in6_addr *in6)        /* upper 64bits are preserved */
{
        MD5_CTX ctxt;
        u_int8_t digest[16];

#if 0
        /* we need at least several letters as seed for ifid */
        if (hostnamelen < 3)
                return -1;
#endif

        /* generate 8 bytes of pseudo-random value. */
        memset(&ctxt, 0, sizeof(ctxt));
        MD5Init(&ctxt);
        MD5Update(&ctxt, (u_char *)hostname, hostnamelen);
        MD5Final(digest, &ctxt);

        /* assumes sizeof(digest) > sizeof(ifid) */
        memcpy(&in6->s6_addr[8], digest, 8);

        /* make sure to set "u" bit to local, and "g" bit to individual. */
        in6->s6_addr[8] &= ~EUI64_GBIT;        /* g bit to "individual" */
        in6->s6_addr[8] |= EUI64_UBIT;        /* u bit to "local" */

        /* convert EUI64 into IPv6 interface identifier */
        EUI64_TO_IFID(in6);

        return 0;
}

/*
 * Get interface identifier for the specified interface.
 *
 * in6 - upper 64bits are preserved
 */
int
in6_get_hw_ifid(struct ifnet *ifp, struct in6_addr *in6)
{
        struct ifaddr *ifa;
        const struct sockaddr_dl *sdl = NULL;
        const char *addr = NULL; /* XXX gcc 4.8 -Werror=maybe-uninitialized */
        size_t addrlen = 0; /* XXX gcc 4.8 -Werror=maybe-uninitialized */
        static u_int8_t allzero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
        static u_int8_t allone[8] =
                { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
        int s;

        s = pserialize_read_enter();
        IFADDR_READER_FOREACH(ifa, ifp) {
                const struct sockaddr_dl *tsdl;
                if (ifa->ifa_addr->sa_family != AF_LINK)
                        continue;
                tsdl = satocsdl(ifa->ifa_addr);
                if (tsdl == NULL || tsdl->sdl_alen == 0)
                        continue;
                if (sdl == NULL || ifa == ifp->if_dl || ifa == ifp->if_hwdl) {
                        sdl = tsdl;
                        addr = CLLADDR(sdl);
                        addrlen = sdl->sdl_alen;
                }
                if (ifa == ifp->if_hwdl)
                        break;
        }
        pserialize_read_exit(s);

        if (sdl == NULL)
                return -1;

        switch (ifp->if_type) {
        case IFT_IEEE1394:
        case IFT_IEEE80211:
                /* IEEE1394 uses 16byte length address starting with EUI64 */
                if (addrlen > 8)
                        addrlen = 8;
                break;
        default:
                break;
        }

        /* get EUI64 */
        switch (ifp->if_type) {
        /* IEEE802/EUI64 cases - what others? */
        case IFT_ETHER:
        case IFT_ATM:
        case IFT_IEEE1394:
        case IFT_IEEE80211:
                /* look at IEEE802/EUI64 only */
                if (addrlen != 8 && addrlen != 6)
                        return -1;

                /*
                 * check for invalid MAC address - on bsdi, we see it a lot
                 * since wildboar configures all-zero MAC on pccard before
                 * card insertion.
                 */
                if (memcmp(addr, allzero, addrlen) == 0)
                        return -1;
                if (memcmp(addr, allone, addrlen) == 0)
                        return -1;

                /* make EUI64 address */
                if (addrlen == 8)
                        memcpy(&in6->s6_addr[8], addr, 8);
                else if (addrlen == 6) {
                        in6->s6_addr[8] = addr[0];
                        in6->s6_addr[9] = addr[1];
                        in6->s6_addr[10] = addr[2];
                        in6->s6_addr[11] = 0xff;
                        in6->s6_addr[12] = 0xfe;
                        in6->s6_addr[13] = addr[3];
                        in6->s6_addr[14] = addr[4];
                        in6->s6_addr[15] = addr[5];
                }
                break;

        case IFT_ARCNET:
                if (addrlen != 1)
                        return -1;
                if (!addr[0])
                        return -1;

                memset(&in6->s6_addr[8], 0, 8);
                in6->s6_addr[15] = addr[0];

                /*
                 * due to insufficient bitwidth, we mark it local.
                 */
                in6->s6_addr[8] &= ~EUI64_GBIT;        /* g bit to "individual" */
                in6->s6_addr[8] |= EUI64_UBIT;        /* u bit to "local" */
                break;

        case IFT_GIF:
#ifdef IFT_STF
        case IFT_STF:
#endif
                /*
                 * RFC2893 says: "SHOULD use IPv4 address as ifid source".
                 * however, IPv4 address is not very suitable as unique
                 * identifier source (can be renumbered).
                 * we don't do this.
                 */
                return -1;

        default:
                return -1;
        }

        /* sanity check: g bit must not indicate "group" */
        if (EUI64_GROUP(in6))
                return -1;

        /* convert EUI64 into IPv6 interface identifier */
        EUI64_TO_IFID(in6);

        /*
         * sanity check: ifid must not be all zero, avoid conflict with
         * subnet router anycast
         */
        if ((in6->s6_addr[8] & ~(EUI64_GBIT | EUI64_UBIT)) == 0x00 &&
            memcmp(&in6->s6_addr[9], allzero, 7) == 0) {
                return -1;
        }

        return 0;
}

/*
 * Get interface identifier for the specified interface.  If it is not
 * available on ifp0, borrow interface identifier from other information
 * sources.
 *
 * altifp - secondary EUI64 source
 */
static int
get_ifid(struct ifnet *ifp0, struct ifnet *altifp, 
        struct in6_addr *in6)
{
        struct ifnet *ifp;
        int s;

        /* first, try to get it from the interface itself */
        if (in6_get_hw_ifid(ifp0, in6) == 0) {
                nd6log(LOG_DEBUG, "%s: got interface identifier from itself\n",
                    if_name(ifp0));
                goto success;
        }

        /* try secondary EUI64 source. this basically is for ATM PVC */
        if (altifp && in6_get_hw_ifid(altifp, in6) == 0) {
                nd6log(LOG_DEBUG, "%s: got interface identifier from %s\n",
                    if_name(ifp0), if_name(altifp));
                goto success;
        }

        /* next, try to get it from some other hardware interface */
        s = pserialize_read_enter();
        IFNET_READER_FOREACH(ifp) {
                if (ifp == ifp0)
                        continue;
                if (in6_get_hw_ifid(ifp, in6) != 0)
                        continue;

                /*
                 * to borrow ifid from other interface, ifid needs to be
                 * globally unique
                 */
                if (IFID_UNIVERSAL(in6)) {
                        nd6log(LOG_DEBUG,
                            "%s: borrow interface identifier from %s\n",
                            if_name(ifp0), if_name(ifp));
                        pserialize_read_exit(s);
                        goto success;
                }
        }
        pserialize_read_exit(s);

#if 0
        /* get from hostid - only for certain architectures */
        if (get_hostid_ifid(ifp, in6) == 0) {
                nd6log(LOG_DEBUG,
                    "%s: interface identifier generated by hostid\n",
                    if_name(ifp0));
                goto success;
        }
#endif

        /* last resort: get from random number source */
        if (get_rand_ifid(in6) == 0) {
                nd6log(LOG_DEBUG,
                    "%s: interface identifier generated by random number\n",
                    if_name(ifp0));
                goto success;
        }

        printf("%s: failed to get interface identifier\n", if_name(ifp0));
        return -1;

success:
        nd6log(LOG_INFO, "%s: ifid: %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
            if_name(ifp0), in6->s6_addr[8], in6->s6_addr[9], in6->s6_addr[10],
            in6->s6_addr[11], in6->s6_addr[12], in6->s6_addr[13],
            in6->s6_addr[14], in6->s6_addr[15]);
        return 0;
}

/*
 * altifp - secondary EUI64 source
 */

static int
in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp)
{
        struct in6_aliasreq ifra;
        int error;

        /*
         * configure link-local address.
         */
        memset(&ifra, 0, sizeof(ifra));

        /*
         * in6_update_ifa() does not use ifra_name, but we accurately set it
         * for safety.
         */
        strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));

        ifra.ifra_addr.sin6_family = AF_INET6;
        ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
        ifra.ifra_addr.sin6_addr.s6_addr32[0] = htonl(0xfe800000);
        ifra.ifra_addr.sin6_addr.s6_addr32[1] = 0;
        if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
                ifra.ifra_addr.sin6_addr.s6_addr32[2] = 0;
                ifra.ifra_addr.sin6_addr.s6_addr32[3] = htonl(1);
        } else {
                if (get_ifid(ifp, altifp, &ifra.ifra_addr.sin6_addr) != 0) {
                        nd6log(LOG_ERR,
                            "%s: no ifid available\n", if_name(ifp));
                        return -1;
                }
        }
        if (in6_setscope(&ifra.ifra_addr.sin6_addr, ifp, NULL))
                return -1;

        sockaddr_in6_init(&ifra.ifra_prefixmask, &in6mask64, 0, 0, 0);
        /* link-local addresses should NEVER expire. */
        ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
        ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;

        /*
         * Now call in6_update_ifa() to do a bunch of procedures to configure
         * a link-local address. We can set the 3rd argument to NULL, because
         * we know there's no other link-local address on the interface
         * and therefore we are adding one (instead of updating one).
         */
        if ((error = in6_update_ifa(ifp, &ifra, IN6_IFAUPDATE_DADDELAY)) != 0) {
                /*
                 * XXX: When the interface does not support IPv6, this call
                 * would fail in the SIOCINITIFADDR ioctl.  I believe the
                 * notification is rather confusing in this case, so just
                 * suppress it.  (jinmei@kame.net 20010130)
                 */
                if (error != EAFNOSUPPORT)
                        nd6log(LOG_NOTICE,
                            "failed to configure a link-local address on %s "
                            "(errno=%d)\n",
                            if_name(ifp), error);
                return -1;
        }

        return 0;
}

/*
 * ifp - mut be IFT_LOOP
 */

static int
in6_ifattach_loopback(struct ifnet *ifp)
{
        struct in6_aliasreq ifra;
        int error;

        memset(&ifra, 0, sizeof(ifra));

        /*
         * in6_update_ifa() does not use ifra_name, but we accurately set it
         * for safety.
         */
        strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));

        sockaddr_in6_init(&ifra.ifra_prefixmask, &in6mask128, 0, 0, 0);

        /*
         * Always initialize ia_dstaddr (= broadcast address) to loopback
         * address.  Follows IPv4 practice - see in_ifinit().
         */
        sockaddr_in6_init(&ifra.ifra_dstaddr, &in6addr_loopback, 0, 0, 0);

        sockaddr_in6_init(&ifra.ifra_addr, &in6addr_loopback, 0, 0, 0);

        /* the loopback  address should NEVER expire. */
        ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
        ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;

        /* we don't need to perform DAD on loopback interfaces. */
        ifra.ifra_flags |= IN6_IFF_NODAD;

        /*
         * We are sure that this is a newly assigned address, so we can set
         * NULL to the 3rd arg.
         */
        if ((error = in6_update_ifa(ifp, &ifra, 0)) != 0) {
                nd6log(LOG_ERR, "failed to configure "
                    "the loopback address on %s (errno=%d)\n",
                    if_name(ifp), error);
                return -1;
        }

        return 0;
}

/*
 * compute NI group address, based on the current hostname setting.
 * see draft-ietf-ipngwg-icmp-name-lookup-* (04 and later).
 *
 * when ifp == NULL, the caller is responsible for filling scopeid.
 */
int
in6_nigroup(struct ifnet *ifp, const char *name, int namelen, 
        struct sockaddr_in6 *sa6)
{
        const char *p;
        u_int8_t *q;
        MD5_CTX ctxt;
        u_int8_t digest[16];
        u_int8_t l;
        u_int8_t n[64];        /* a single label must not exceed 63 chars */

        if (!namelen || !name)
                return -1;

        p = name;
        while (p && *p && *p != '.' && p - name < namelen)
                p++;
        if (p - name > sizeof(n) - 1)
                return -1;        /* label too long */
        l = p - name;
        strncpy((char *)n, name, l);
        n[(int)l] = '\0';
        for (q = n; *q; q++) {
                if ('A' <= *q && *q <= 'Z')
                        *q = *q - 'A' + 'a';
        }

        /* generate 8 bytes of pseudo-random value. */
        memset(&ctxt, 0, sizeof(ctxt));
        MD5Init(&ctxt);
        MD5Update(&ctxt, &l, sizeof(l));
        MD5Update(&ctxt, n, l);
        MD5Final(digest, &ctxt);

        memset(sa6, 0, sizeof(*sa6));
        sa6->sin6_family = AF_INET6;
        sa6->sin6_len = sizeof(*sa6);
        sa6->sin6_addr.s6_addr16[0] = htons(0xff02);
        sa6->sin6_addr.s6_addr8[11] = 2;
        memcpy(&sa6->sin6_addr.s6_addr32[3], digest,
            sizeof(sa6->sin6_addr.s6_addr32[3]));
        if (in6_setscope(&sa6->sin6_addr, ifp, NULL))
                return -1; /* XXX: should not fail */

        return 0;
}

/*
 * XXX multiple loopback interface needs more care.  for instance,
 * nodelocal address needs to be configured onto only one of them.
 * XXX multiple link-local address case
 *
 * altifp - secondary EUI64 source
 */
void
in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
{
        struct in6_ifaddr *ia;
        struct in6_addr in6;

        KASSERT(IFNET_LOCKED(ifp));

        /* some of the interfaces are inherently not IPv6 capable */
        switch (ifp->if_type) {
        case IFT_BRIDGE:
        case IFT_L2TP:
        case IFT_IEEE8023ADLAG:
#ifdef IFT_PFLOG
        case IFT_PFLOG:
#endif
#ifdef IFT_PFSYNC
        case IFT_PFSYNC:
#endif
                ND_IFINFO(ifp)->flags &= ~ND6_IFF_AUTO_LINKLOCAL;
                ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
                return;
        }

        /*
         * if link mtu is too small, don't try to configure IPv6.
         * remember there could be some link-layer that has special
         * fragmentation logic.
         */
        if (ifp->if_mtu < IPV6_MMTU) {
                nd6log(LOG_INFO, "%s has too small MTU, IPv6 not enabled\n",
                    if_name(ifp));
                return;
        }

        /*
         * quirks based on interface type
         */
        switch (ifp->if_type) {
#ifdef IFT_STF
        case IFT_STF:
                /*
                 * 6to4 interface is a very special kind of beast.
                 * no multicast, no linklocal.  RFC2529 specifies how to make
                 * linklocals for 6to4 interface, but there's no use and
                 * it is rather harmful to have one.
                 */
                ND_IFINFO(ifp)->flags &= ~ND6_IFF_AUTO_LINKLOCAL;
                return;
#endif
        case IFT_CARP:
                return;
        default:
                break;
        }

        /*
         * usually, we require multicast capability to the interface
         */
        if ((ifp->if_flags & IFF_MULTICAST) == 0) {
                nd6log(LOG_INFO,
                    "%s is not multicast capable, IPv6 not enabled\n",
                    if_name(ifp));
                return;
        }

        /*
         * assign loopback address for loopback interface.
         * XXX multiple loopback interface case.
         */
        if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
                in6 = in6addr_loopback;
                /* These are safe and atomic thanks to IFNET_LOCK */
                if (in6ifa_ifpwithaddr(ifp, &in6) == NULL) {
                        if (in6_ifattach_loopback(ifp) != 0)
                                return;
                }
        }

        /*
         * assign a link-local address, if there's none.
         */
        if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
            ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL) {
                int bound = curlwp_bind();
                struct psref psref;
                ia = in6ifa_ifpforlinklocal_psref(ifp, 0, &psref);
                if (ia == NULL && in6_ifattach_linklocal(ifp, altifp) != 0) {
                        printf("%s: cannot assign link-local address\n",
                            ifp->if_xname);
                }
                ia6_release(ia, &psref);
                curlwp_bindx(bound);
        }
}

/*
 * NOTE: in6_ifdetach() does not support loopback if at this moment.
 * We don't need this function in bsdi, because interfaces are never removed
 * from the ifnet list in bsdi.
 */
void
in6_ifdetach(struct ifnet *ifp)
{

        /* nuke any of IPv6 addresses we have */
        if_purgeaddrs(ifp, AF_INET6, in6_purgeaddr);

        in6_purge_multi(ifp);

        /* remove ip6_mrouter stuff */
        ip6_mrouter_detach(ifp);

        /* remove neighbor management table */
        nd6_purge(ifp, NULL);
}































































































































































































































































































































































    3 
    3 


























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
/*        $NetBSD: if_ural.c,v 1.66 2022/08/20 19:11:08 thorpej Exp $ */
/*        $FreeBSD: /repoman/r/ncvs/src/sys/dev/usb/if_ural.c,v 1.40 2006/06/02 23:14:40 sam Exp $        */

/*-
 * Copyright (c) 2005, 2006
 *        Damien Bergamini <damien.bergamini@free.fr>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*-
 * Ralink Technology RT2500USB chipset driver
 * http://www.ralinktech.com/
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_ural.c,v 1.66 2022/08/20 19:11:08 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/device.h>

#include <sys/bus.h>
#include <machine/endian.h>
#include <sys/intr.h>

#include <net/bpf.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_ether.h>
#include <net/if_media.h>
#include <net/if_types.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>

#include <net80211/ieee80211_netbsd.h>
#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_amrr.h>
#include <net80211/ieee80211_radiotap.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <dev/usb/if_uralreg.h>
#include <dev/usb/if_uralvar.h>

#ifdef URAL_DEBUG
#define DPRINTF(x)        do { if (ural_debug) printf x; } while (0)
#define DPRINTFN(n, x)        do { if (ural_debug >= (n)) printf x; } while (0)
int ural_debug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n, x)
#endif

/* various supported device vendors/products */
static const struct usb_devno ural_devs[] = {
        { USB_VENDOR_ASUSTEK,                USB_PRODUCT_ASUSTEK_WL167G },
        { USB_VENDOR_ASUSTEK,                USB_PRODUCT_RALINK_RT2570 },
        { USB_VENDOR_BELKIN,                USB_PRODUCT_BELKIN_F5D7050 },
        { USB_VENDOR_CISCOLINKSYS,        USB_PRODUCT_CISCOLINKSYS_WUSB54G },
        { USB_VENDOR_CISCOLINKSYS,        USB_PRODUCT_CISCOLINKSYS_WUSB54GP },
        { USB_VENDOR_CISCOLINKSYS,        USB_PRODUCT_CISCOLINKSYS_HU200TS },
        { USB_VENDOR_CONCEPTRONIC,        USB_PRODUCT_CONCEPTRONIC_C54RU },
        { USB_VENDOR_DLINK,                USB_PRODUCT_DLINK_DWLG122 },
        { USB_VENDOR_GIGABYTE,                USB_PRODUCT_GIGABYTE_GNWBKG },
        { USB_VENDOR_GUILLEMOT,                USB_PRODUCT_GUILLEMOT_HWGUSB254 },
        { USB_VENDOR_MELCO,                USB_PRODUCT_MELCO_KG54 },
        { USB_VENDOR_MELCO,                USB_PRODUCT_MELCO_KG54AI },
        { USB_VENDOR_MELCO,                USB_PRODUCT_MELCO_KG54YB },
        { USB_VENDOR_MELCO,                USB_PRODUCT_MELCO_NINWIFI },
        { USB_VENDOR_MSI,                USB_PRODUCT_MSI_MS6861 },
        { USB_VENDOR_MSI,                USB_PRODUCT_MSI_MS6865 },
        { USB_VENDOR_MSI,                USB_PRODUCT_MSI_MS6869 },
        { USB_VENDOR_NOVATECH,                USB_PRODUCT_NOVATECH_NV902W },
        { USB_VENDOR_RALINK,                USB_PRODUCT_RALINK_RT2570 },
        { USB_VENDOR_RALINK,                USB_PRODUCT_RALINK_RT2570_2 },
        { USB_VENDOR_RALINK,                USB_PRODUCT_RALINK_RT2570_3 },
        { USB_VENDOR_SMC,                USB_PRODUCT_SMC_2862WG },
        { USB_VENDOR_SPHAIRON,                USB_PRODUCT_SPHAIRON_UB801R },
        { USB_VENDOR_SURECOM,                USB_PRODUCT_SURECOM_EP9001G },
        { USB_VENDOR_VTECH,                USB_PRODUCT_VTECH_RT2570 },
        { USB_VENDOR_ZINWELL,                USB_PRODUCT_ZINWELL_ZWXG261 },
};

Static int                ural_alloc_tx_list(struct ural_softc *);
Static void                ural_free_tx_list(struct ural_softc *);
Static int                ural_alloc_rx_list(struct ural_softc *);
Static void                ural_free_rx_list(struct ural_softc *);
Static int                ural_media_change(struct ifnet *);
Static void                ural_next_scan(void *);
Static void                ural_task(void *);
Static int                ural_newstate(struct ieee80211com *,
                            enum ieee80211_state, int);
Static int                ural_rxrate(struct ural_rx_desc *);
Static void                ural_txeof(struct usbd_xfer *, void *,
                            usbd_status);
Static void                ural_rxeof(struct usbd_xfer *, void *,
                            usbd_status);
Static int                ural_ack_rate(struct ieee80211com *, int);
Static uint16_t                ural_txtime(int, int, uint32_t);
Static uint8_t                ural_plcp_signal(int);
Static void                ural_setup_tx_desc(struct ural_softc *,
                            struct ural_tx_desc *, uint32_t, int, int);
Static int                ural_tx_bcn(struct ural_softc *, struct mbuf *,
                            struct ieee80211_node *);
Static int                ural_tx_mgt(struct ural_softc *, struct mbuf *,
                            struct ieee80211_node *);
Static int                ural_tx_data(struct ural_softc *, struct mbuf *,
                            struct ieee80211_node *);
Static void                ural_start(struct ifnet *);
Static void                ural_watchdog(struct ifnet *);
Static int                ural_reset(struct ifnet *);
Static int                ural_ioctl(struct ifnet *, u_long, void *);
Static void                ural_set_testmode(struct ural_softc *);
Static void                ural_eeprom_read(struct ural_softc *, uint16_t, void *,
                            int);
Static uint16_t                ural_read(struct ural_softc *, uint16_t);
Static void                ural_read_multi(struct ural_softc *, uint16_t, void *,
                            int);
Static void                ural_write(struct ural_softc *, uint16_t, uint16_t);
Static void                ural_write_multi(struct ural_softc *, uint16_t, void *,
                            int);
Static void                ural_bbp_write(struct ural_softc *, uint8_t, uint8_t);
Static uint8_t                ural_bbp_read(struct ural_softc *, uint8_t);
Static void                ural_rf_write(struct ural_softc *, uint8_t, uint32_t);
Static void                ural_set_chan(struct ural_softc *,
                            struct ieee80211_channel *);
Static void                ural_disable_rf_tune(struct ural_softc *);
Static void                ural_enable_tsf_sync(struct ural_softc *);
Static void                ural_update_slot(struct ifnet *);
Static void                ural_set_txpreamble(struct ural_softc *);
Static void                ural_set_basicrates(struct ural_softc *);
Static void                ural_set_bssid(struct ural_softc *, uint8_t *);
Static void                ural_set_macaddr(struct ural_softc *, uint8_t *);
Static void                ural_update_promisc(struct ural_softc *);
Static const char        *ural_get_rf(int);
Static void                ural_read_eeprom(struct ural_softc *);
Static int                ural_bbp_init(struct ural_softc *);
Static void                ural_set_txantenna(struct ural_softc *, int);
Static void                ural_set_rxantenna(struct ural_softc *, int);
Static int                ural_init(struct ifnet *);
Static void                ural_stop(struct ifnet *, int);
Static void                ural_amrr_start(struct ural_softc *,
                            struct ieee80211_node *);
Static void                ural_amrr_timeout(void *);
Static void                ural_amrr_update(struct usbd_xfer *, void *,
                            usbd_status status);

/*
 * Default values for MAC registers; values taken from the reference driver.
 */
static const struct {
        uint16_t        reg;
        uint16_t        val;
} ural_def_mac[] = {
        { RAL_TXRX_CSR5,  0x8c8d },
        { RAL_TXRX_CSR6,  0x8b8a },
        { RAL_TXRX_CSR7,  0x8687 },
        { RAL_TXRX_CSR8,  0x0085 },
        { RAL_MAC_CSR13,  0x1111 },
        { RAL_MAC_CSR14,  0x1e11 },
        { RAL_TXRX_CSR21, 0xe78f },
        { RAL_MAC_CSR9,   0xff1d },
        { RAL_MAC_CSR11,  0x0002 },
        { RAL_MAC_CSR22,  0x0053 },
        { RAL_MAC_CSR15,  0x0000 },
        { RAL_MAC_CSR8,   0x0780 },
        { RAL_TXRX_CSR19, 0x0000 },
        { RAL_TXRX_CSR18, 0x005a },
        { RAL_PHY_CSR2,   0x0000 },
        { RAL_TXRX_CSR0,  0x1ec0 },
        { RAL_PHY_CSR4,   0x000f }
};

/*
 * Default values for BBP registers; values taken from the reference driver.
 */
static const struct {
        uint8_t        reg;
        uint8_t        val;
} ural_def_bbp[] = {
        {  3, 0x02 },
        {  4, 0x19 },
        { 14, 0x1c },
        { 15, 0x30 },
        { 16, 0xac },
        { 17, 0x48 },
        { 18, 0x18 },
        { 19, 0xff },
        { 20, 0x1e },
        { 21, 0x08 },
        { 22, 0x08 },
        { 23, 0x08 },
        { 24, 0x80 },
        { 25, 0x50 },
        { 26, 0x08 },
        { 27, 0x23 },
        { 30, 0x10 },
        { 31, 0x2b },
        { 32, 0xb9 },
        { 34, 0x12 },
        { 35, 0x50 },
        { 39, 0xc4 },
        { 40, 0x02 },
        { 41, 0x60 },
        { 53, 0x10 },
        { 54, 0x18 },
        { 56, 0x08 },
        { 57, 0x10 },
        { 58, 0x08 },
        { 61, 0x60 },
        { 62, 0x10 },
        { 75, 0xff }
};

/*
 * Default values for RF register R2 indexed by channel numbers.
 */
static const uint32_t ural_rf2522_r2[] = {
        0x307f6, 0x307fb, 0x30800, 0x30805, 0x3080a, 0x3080f, 0x30814,
        0x30819, 0x3081e, 0x30823, 0x30828, 0x3082d, 0x30832, 0x3083e
};

static const uint32_t ural_rf2523_r2[] = {
        0x00327, 0x00328, 0x00329, 0x0032a, 0x0032b, 0x0032c, 0x0032d,
        0x0032e, 0x0032f, 0x00340, 0x00341, 0x00342, 0x00343, 0x00346
};

static const uint32_t ural_rf2524_r2[] = {
        0x00327, 0x00328, 0x00329, 0x0032a, 0x0032b, 0x0032c, 0x0032d,
        0x0032e, 0x0032f, 0x00340, 0x00341, 0x00342, 0x00343, 0x00346
};

static const uint32_t ural_rf2525_r2[] = {
        0x20327, 0x20328, 0x20329, 0x2032a, 0x2032b, 0x2032c, 0x2032d,
        0x2032e, 0x2032f, 0x20340, 0x20341, 0x20342, 0x20343, 0x20346
};

static const uint32_t ural_rf2525_hi_r2[] = {
        0x2032f, 0x20340, 0x20341, 0x20342, 0x20343, 0x20344, 0x20345,
        0x20346, 0x20347, 0x20348, 0x20349, 0x2034a, 0x2034b, 0x2034e
};

static const uint32_t ural_rf2525e_r2[] = {
        0x2044d, 0x2044e, 0x2044f, 0x20460, 0x20461, 0x20462, 0x20463,
        0x20464, 0x20465, 0x20466, 0x20467, 0x20468, 0x20469, 0x2046b
};

static const uint32_t ural_rf2526_hi_r2[] = {
        0x0022a, 0x0022b, 0x0022b, 0x0022c, 0x0022c, 0x0022d, 0x0022d,
        0x0022e, 0x0022e, 0x0022f, 0x0022d, 0x00240, 0x00240, 0x00241
};

static const uint32_t ural_rf2526_r2[] = {
        0x00226, 0x00227, 0x00227, 0x00228, 0x00228, 0x00229, 0x00229,
        0x0022a, 0x0022a, 0x0022b, 0x0022b, 0x0022c, 0x0022c, 0x0022d
};

/*
 * For dual-band RF, RF registers R1 and R4 also depend on channel number;
 * values taken from the reference driver.
 */
static const struct {
        uint8_t                chan;
        uint32_t        r1;
        uint32_t        r2;
        uint32_t        r4;
} ural_rf5222[] = {
        {   1, 0x08808, 0x0044d, 0x00282 },
        {   2, 0x08808, 0x0044e, 0x00282 },
        {   3, 0x08808, 0x0044f, 0x00282 },
        {   4, 0x08808, 0x00460, 0x00282 },
        {   5, 0x08808, 0x00461, 0x00282 },
        {   6, 0x08808, 0x00462, 0x00282 },
        {   7, 0x08808, 0x00463, 0x00282 },
        {   8, 0x08808, 0x00464, 0x00282 },
        {   9, 0x08808, 0x00465, 0x00282 },
        {  10, 0x08808, 0x00466, 0x00282 },
        {  11, 0x08808, 0x00467, 0x00282 },
        {  12, 0x08808, 0x00468, 0x00282 },
        {  13, 0x08808, 0x00469, 0x00282 },
        {  14, 0x08808, 0x0046b, 0x00286 },

        {  36, 0x08804, 0x06225, 0x00287 },
        {  40, 0x08804, 0x06226, 0x00287 },
        {  44, 0x08804, 0x06227, 0x00287 },
        {  48, 0x08804, 0x06228, 0x00287 },
        {  52, 0x08804, 0x06229, 0x00287 },
        {  56, 0x08804, 0x0622a, 0x00287 },
        {  60, 0x08804, 0x0622b, 0x00287 },
        {  64, 0x08804, 0x0622c, 0x00287 },

        { 100, 0x08804, 0x02200, 0x00283 },
        { 104, 0x08804, 0x02201, 0x00283 },
        { 108, 0x08804, 0x02202, 0x00283 },
        { 112, 0x08804, 0x02203, 0x00283 },
        { 116, 0x08804, 0x02204, 0x00283 },
        { 120, 0x08804, 0x02205, 0x00283 },
        { 124, 0x08804, 0x02206, 0x00283 },
        { 128, 0x08804, 0x02207, 0x00283 },
        { 132, 0x08804, 0x02208, 0x00283 },
        { 136, 0x08804, 0x02209, 0x00283 },
        { 140, 0x08804, 0x0220a, 0x00283 },

        { 149, 0x08808, 0x02429, 0x00281 },
        { 153, 0x08808, 0x0242b, 0x00281 },
        { 157, 0x08808, 0x0242d, 0x00281 },
        { 161, 0x08808, 0x0242f, 0x00281 }
};

static int        ural_match(device_t, cfdata_t, void *);
static void        ural_attach(device_t, device_t, void *);
static int        ural_detach(device_t, int);
static int        ural_activate(device_t, enum devact);

CFATTACH_DECL_NEW(ural, sizeof(struct ural_softc), ural_match, ural_attach,
    ural_detach, ural_activate);

static int
ural_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return (usb_lookup(ural_devs, uaa->uaa_vendor, uaa->uaa_product) != NULL) ?
            UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
ural_attach(device_t parent, device_t self, void *aux)
{
        struct ural_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        usbd_status error;
        char *devinfop;
        int i;

        sc->sc_dev = self;
        sc->sc_udev = uaa->uaa_device;
        sc->sc_init_state = URAL_INIT_NONE;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(sc->sc_udev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        error = usbd_set_config_no(sc->sc_udev, RAL_CONFIG_NO, 0);
        if (error != 0) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(error));
                return;
        }

        /* get the first interface handle */
        error = usbd_device2interface_handle(sc->sc_udev, RAL_IFACE_INDEX,
            &sc->sc_iface);
        if (error != 0) {
                aprint_error_dev(self, "could not get interface handle\n");
                return;
        }

        /*
         * Find endpoints.
         */
        id = usbd_get_interface_descriptor(sc->sc_iface);

        sc->sc_rx_no = sc->sc_tx_no = -1;
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "no endpoint descriptor for %d\n", i);
                        return;
                }

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK)
                        sc->sc_rx_no = ed->bEndpointAddress;
                else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK)
                        sc->sc_tx_no = ed->bEndpointAddress;
        }
        if (sc->sc_rx_no == -1 || sc->sc_tx_no == -1) {
                aprint_error_dev(self, "missing endpoint\n");
                return;
        }

        usb_init_task(&sc->sc_task, ural_task, sc, 0);
        callout_init(&sc->sc_scan_ch, 0);
        sc->amrr.amrr_min_success_threshold = 1;
        sc->amrr.amrr_max_success_threshold = 15;
        callout_init(&sc->sc_amrr_ch, 0);

        /* retrieve RT2570 rev. no */
        sc->asic_rev = ural_read(sc, RAL_MAC_CSR0);

        /* retrieve MAC address and various other things from EEPROM */
        ural_read_eeprom(sc);

        aprint_normal_dev(self, "MAC/BBP RT2570 (rev 0x%02x), RF %s\n",
            sc->asic_rev, ural_get_rf(sc->rf_rev));

        ifp->if_softc = sc;
        memcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_init = ural_init;
        ifp->if_ioctl = ural_ioctl;
        ifp->if_start = ural_start;
        ifp->if_watchdog = ural_watchdog;
        IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
        IFQ_SET_READY(&ifp->if_snd);

        ic->ic_ifp = ifp;
        ic->ic_phytype = IEEE80211_T_OFDM; /* not only, but not used */
        ic->ic_opmode = IEEE80211_M_STA; /* default to BSS mode */
        ic->ic_state = IEEE80211_S_INIT;

        /* set device capabilities */
        ic->ic_caps =
            IEEE80211_C_IBSS |                /* IBSS mode supported */
            IEEE80211_C_MONITOR |        /* monitor mode supported */
            IEEE80211_C_HOSTAP |        /* HostAp mode supported */
            IEEE80211_C_TXPMGT |        /* tx power management */
            IEEE80211_C_SHPREAMBLE |        /* short preamble supported */
            IEEE80211_C_SHSLOT |        /* short slot time supported */
            IEEE80211_C_WPA;                /* 802.11i */

        if (sc->rf_rev == RAL_RF_5222) {
                /* set supported .11a rates */
                ic->ic_sup_rates[IEEE80211_MODE_11A] = ieee80211_std_rateset_11a;

                /* set supported .11a channels */
                for (i = 36; i <= 64; i += 4) {
                        ic->ic_channels[i].ic_freq =
                            ieee80211_ieee2mhz(i, IEEE80211_CHAN_5GHZ);
                        ic->ic_channels[i].ic_flags = IEEE80211_CHAN_A;
                }
                for (i = 100; i <= 140; i += 4) {
                        ic->ic_channels[i].ic_freq =
                            ieee80211_ieee2mhz(i, IEEE80211_CHAN_5GHZ);
                        ic->ic_channels[i].ic_flags = IEEE80211_CHAN_A;
                }
                for (i = 149; i <= 161; i += 4) {
                        ic->ic_channels[i].ic_freq =
                            ieee80211_ieee2mhz(i, IEEE80211_CHAN_5GHZ);
                        ic->ic_channels[i].ic_flags = IEEE80211_CHAN_A;
                }
        }

        /* set supported .11b and .11g rates */
        ic->ic_sup_rates[IEEE80211_MODE_11B] = ieee80211_std_rateset_11b;
        ic->ic_sup_rates[IEEE80211_MODE_11G] = ieee80211_std_rateset_11g;

        /* set supported .11b and .11g channels (1 through 14) */
        for (i = 1; i <= 14; i++) {
                ic->ic_channels[i].ic_freq =
                    ieee80211_ieee2mhz(i, IEEE80211_CHAN_2GHZ);
                ic->ic_channels[i].ic_flags =
                    IEEE80211_CHAN_CCK | IEEE80211_CHAN_OFDM |
                    IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ;
        }

        if_attach(ifp);
        ieee80211_ifattach(ic);
        ic->ic_reset = ural_reset;

        /* override state transition machine */
        sc->sc_newstate = ic->ic_newstate;
        ic->ic_newstate = ural_newstate;

        /* XXX media locking needs revisiting */
        mutex_init(&sc->sc_media_mtx, MUTEX_DEFAULT, IPL_SOFTUSB);
        ieee80211_media_init_with_lock(ic,
            ural_media_change, ieee80211_media_status, &sc->sc_media_mtx);

        bpf_attach2(ifp, DLT_IEEE802_11_RADIO,
            sizeof(struct ieee80211_frame) + 64, &sc->sc_drvbpf);

        sc->sc_rxtap_len = sizeof(sc->sc_rxtapu);
        sc->sc_rxtap.wr_ihdr.it_len = htole16(sc->sc_rxtap_len);
        sc->sc_rxtap.wr_ihdr.it_present = htole32(RAL_RX_RADIOTAP_PRESENT);

        sc->sc_txtap_len = sizeof(sc->sc_txtapu);
        sc->sc_txtap.wt_ihdr.it_len = htole16(sc->sc_txtap_len);
        sc->sc_txtap.wt_ihdr.it_present = htole32(RAL_TX_RADIOTAP_PRESENT);

        ieee80211_announce(ic);

        sc->sc_init_state = URAL_INIT_INITED;

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        return;
}

static int
ural_detach(device_t self, int flags)
{
        struct ural_softc *sc = device_private(self);
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        int s;

        if (sc->sc_init_state < URAL_INIT_INITED)
                return 0;

        pmf_device_deregister(self);

        s = splusb();

        ural_stop(ifp, 1);
        callout_halt(&sc->sc_scan_ch, NULL);
        callout_halt(&sc->sc_amrr_ch, NULL);
        usb_rem_task_wait(sc->sc_udev, &sc->sc_task, USB_TASKQ_DRIVER, NULL);

        bpf_detach(ifp);
        ieee80211_ifdetach(ic);
        if_detach(ifp);

        splx(s);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return 0;
}

Static int
ural_alloc_tx_list(struct ural_softc *sc)
{
        struct ural_tx_data *data;
        int i, error;

        sc->tx_queued = 0;

        for (i = 0; i < RAL_TX_LIST_COUNT; i++) {
                data = &sc->tx_data[i];

                data->sc = sc;
                error = usbd_create_xfer(sc->sc_tx_pipeh,
                    RAL_TX_DESC_SIZE + MCLBYTES, USBD_FORCE_SHORT_XFER, 0,
                    &data->xfer);
                if (error) {
                        printf("%s: could not allocate tx xfer\n",
                            device_xname(sc->sc_dev));
                        goto fail;
                }

                data->buf = usbd_get_buffer(data->xfer);
        }

        return 0;

fail:        ural_free_tx_list(sc);
        return error;
}

Static void
ural_free_tx_list(struct ural_softc *sc)
{
        struct ural_tx_data *data;
        int i;

        for (i = 0; i < RAL_TX_LIST_COUNT; i++) {
                data = &sc->tx_data[i];

                if (data->xfer != NULL) {
                        usbd_destroy_xfer(data->xfer);
                        data->xfer = NULL;
                }

                if (data->ni != NULL) {
                        ieee80211_free_node(data->ni);
                        data->ni = NULL;
                }
        }
}

Static int
ural_alloc_rx_list(struct ural_softc *sc)
{
        struct ural_rx_data *data;
        int i, error;

        for (i = 0; i < RAL_RX_LIST_COUNT; i++) {
                data = &sc->rx_data[i];

                data->sc = sc;

                error = usbd_create_xfer(sc->sc_rx_pipeh, MCLBYTES,
                    0, 0, &data->xfer);
                if (error) {
                        printf("%s: could not allocate rx xfer\n",
                            device_xname(sc->sc_dev));
                        goto fail;
                }

                MGETHDR(data->m, M_DONTWAIT, MT_DATA);
                if (data->m == NULL) {
                        printf("%s: could not allocate rx mbuf\n",
                            device_xname(sc->sc_dev));
                        error = ENOMEM;
                        goto fail;
                }

                MCLGET(data->m, M_DONTWAIT);
                if (!(data->m->m_flags & M_EXT)) {
                        printf("%s: could not allocate rx mbuf cluster\n",
                            device_xname(sc->sc_dev));
                        error = ENOMEM;
                        goto fail;
                }

                data->buf = mtod(data->m, uint8_t *);
        }

        return 0;

fail:        ural_free_rx_list(sc);
        return error;
}

Static void
ural_free_rx_list(struct ural_softc *sc)
{
        struct ural_rx_data *data;
        int i;

        for (i = 0; i < RAL_RX_LIST_COUNT; i++) {
                data = &sc->rx_data[i];

                if (data->xfer != NULL) {
                        usbd_destroy_xfer(data->xfer);
                        data->xfer = NULL;
                }

                if (data->m != NULL) {
                        m_freem(data->m);
                        data->m = NULL;
                }
        }
}

Static int
ural_media_change(struct ifnet *ifp)
{
        int error;

        error = ieee80211_media_change(ifp);
        if (error != ENETRESET)
                return error;

        if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
                ural_init(ifp);

        return 0;
}

/*
 * This function is called periodically (every 200ms) during scanning to
 * switch from one channel to another.
 */
Static void
ural_next_scan(void *arg)
{
        struct ural_softc *sc = arg;
        struct ieee80211com *ic = &sc->sc_ic;

        if (ic->ic_state == IEEE80211_S_SCAN)
                ieee80211_next_scan(ic);
}

Static void
ural_task(void *arg)
{
        struct ural_softc *sc = arg;
        struct ieee80211com *ic = &sc->sc_ic;
        enum ieee80211_state ostate;
        struct ieee80211_node *ni;
        struct mbuf *m;

        ostate = ic->ic_state;

        switch (sc->sc_state) {
        case IEEE80211_S_INIT:
                if (ostate == IEEE80211_S_RUN) {
                        /* abort TSF synchronization */
                        ural_write(sc, RAL_TXRX_CSR19, 0);

                        /* force tx led to stop blinking */
                        ural_write(sc, RAL_MAC_CSR20, 0);
                }
                break;

        case IEEE80211_S_SCAN:
                ural_set_chan(sc, ic->ic_curchan);
                callout_reset(&sc->sc_scan_ch, hz / 5, ural_next_scan, sc);
                break;

        case IEEE80211_S_AUTH:
                ural_set_chan(sc, ic->ic_curchan);
                break;

        case IEEE80211_S_ASSOC:
                ural_set_chan(sc, ic->ic_curchan);
                break;

        case IEEE80211_S_RUN:
                ural_set_chan(sc, ic->ic_curchan);

                ni = ic->ic_bss;

                if (ic->ic_opmode != IEEE80211_M_MONITOR) {
                        ural_update_slot(ic->ic_ifp);
                        ural_set_txpreamble(sc);
                        ural_set_basicrates(sc);
                        ural_set_bssid(sc, ni->ni_bssid);
                }

                if (ic->ic_opmode == IEEE80211_M_HOSTAP ||
                    ic->ic_opmode == IEEE80211_M_IBSS) {
                        m = ieee80211_beacon_alloc(ic, ni, &sc->sc_bo);
                        if (m == NULL) {
                                printf("%s: could not allocate beacon\n",
                                    device_xname(sc->sc_dev));
                                return;
                        }

                        if (ural_tx_bcn(sc, m, ni) != 0) {
                                m_freem(m);
                                printf("%s: could not send beacon\n",
                                    device_xname(sc->sc_dev));
                                return;
                        }

                        /* beacon is no longer needed */
                        m_freem(m);
                }

                /* make tx led blink on tx (controlled by ASIC) */
                ural_write(sc, RAL_MAC_CSR20, 1);

                if (ic->ic_opmode != IEEE80211_M_MONITOR)
                        ural_enable_tsf_sync(sc);

                /* enable automatic rate adaptation in STA mode */
                if (ic->ic_opmode == IEEE80211_M_STA &&
                    ic->ic_fixed_rate == IEEE80211_FIXED_RATE_NONE)
                        ural_amrr_start(sc, ni);

                break;
        }

        sc->sc_newstate(ic, sc->sc_state, -1);
}

Static int
ural_newstate(struct ieee80211com *ic, enum ieee80211_state nstate,
    int arg)
{
        struct ural_softc *sc = ic->ic_ifp->if_softc;

        /*
         * XXXSMP: This does not wait for the task, if it is in flight,
         * to complete.  If this code works at all, it must rely on the
         * kernel lock to serialize with the USB task thread.
         */
        usb_rem_task(sc->sc_udev, &sc->sc_task);
        callout_stop(&sc->sc_scan_ch);
        callout_stop(&sc->sc_amrr_ch);

        /* do it in a process context */
        sc->sc_state = nstate;
        usb_add_task(sc->sc_udev, &sc->sc_task, USB_TASKQ_DRIVER);

        return 0;
}

/* quickly determine if a given rate is CCK or OFDM */
#define RAL_RATE_IS_OFDM(rate) ((rate) >= 12 && (rate) != 22)

#define RAL_ACK_SIZE        14        /* 10 + 4(FCS) */
#define RAL_CTS_SIZE        14        /* 10 + 4(FCS) */

#define RAL_SIFS                10        /* us */

#define RAL_RXTX_TURNAROUND        5        /* us */

/*
 * This function is only used by the Rx radiotap code.
 */
Static int
ural_rxrate(struct ural_rx_desc *desc)
{
        if (le32toh(desc->flags) & RAL_RX_OFDM) {
                /* reverse function of ural_plcp_signal */
                switch (desc->rate) {
                case 0xb:        return 12;
                case 0xf:        return 18;
                case 0xa:        return 24;
                case 0xe:        return 36;
                case 0x9:        return 48;
                case 0xd:        return 72;
                case 0x8:        return 96;
                case 0xc:        return 108;
                }
        } else {
                if (desc->rate == 10)
                        return 2;
                if (desc->rate == 20)
                        return 4;
                if (desc->rate == 55)
                        return 11;
                if (desc->rate == 110)
                        return 22;
        }
        return 2;        /* should not get there */
}

Static void
ural_txeof(struct usbd_xfer *xfer, void * priv,
    usbd_status status)
{
        struct ural_tx_data *data = priv;
        struct ural_softc *sc = data->sc;
        struct ifnet *ifp = &sc->sc_if;
        int s;

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;

                printf("%s: could not transmit buffer: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(status));

                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->sc_tx_pipeh);

                if_statinc(ifp, if_oerrors);
                return;
        }

        s = splnet();

        m_freem(data->m);
        data->m = NULL;
        ieee80211_free_node(data->ni);
        data->ni = NULL;

        sc->tx_queued--;
        if_statinc(ifp, if_opackets);

        DPRINTFN(10, ("tx done\n"));

        sc->sc_tx_timer = 0;
        ifp->if_flags &= ~IFF_OACTIVE;
        ural_start(ifp);

        splx(s);
}

Static void
ural_rxeof(struct usbd_xfer *xfer, void * priv, usbd_status status)
{
        struct ural_rx_data *data = priv;
        struct ural_softc *sc = data->sc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        struct ural_rx_desc *desc;
        struct ieee80211_frame *wh;
        struct ieee80211_node *ni;
        struct mbuf *mnew, *m;
        int s, len;

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;

                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->sc_rx_pipeh);
                goto skip;
        }

        usbd_get_xfer_status(xfer, NULL, NULL, &len, NULL);

        if (len < RAL_RX_DESC_SIZE + IEEE80211_MIN_LEN) {
                DPRINTF(("%s: xfer too short %d\n", device_xname(sc->sc_dev),
                    len));
                if_statinc(ifp, if_ierrors);
                goto skip;
        }

        /* rx descriptor is located at the end */
        desc = (struct ural_rx_desc *)(data->buf + len - RAL_RX_DESC_SIZE);

        if ((le32toh(desc->flags) & RAL_RX_PHY_ERROR) ||
            (le32toh(desc->flags) & RAL_RX_CRC_ERROR)) {
                /*
                 * This should not happen since we did not request to receive
                 * those frames when we filled RAL_TXRX_CSR2.
                 */
                DPRINTFN(5, ("PHY or CRC error\n"));
                if_statinc(ifp, if_ierrors);
                goto skip;
        }

        MGETHDR(mnew, M_DONTWAIT, MT_DATA);
        if (mnew == NULL) {
                if_statinc(ifp, if_ierrors);
                goto skip;
        }

        MCLGET(mnew, M_DONTWAIT);
        if (!(mnew->m_flags & M_EXT)) {
                if_statinc(ifp, if_ierrors);
                m_freem(mnew);
                goto skip;
        }

        m = data->m;
        data->m = mnew;
        data->buf = mtod(data->m, uint8_t *);

        /* finalize mbuf */
        m_set_rcvif(m, ifp);
        m->m_pkthdr.len = m->m_len = (le32toh(desc->flags) >> 16) & 0xfff;
        m->m_flags |= M_HASFCS;        /* h/w leaves FCS */

        s = splnet();

        if (sc->sc_drvbpf != NULL) {
                struct ural_rx_radiotap_header *tap = &sc->sc_rxtap;

                tap->wr_flags = IEEE80211_RADIOTAP_F_FCS;
                tap->wr_rate = ural_rxrate(desc);
                tap->wr_chan_freq = htole16(ic->ic_curchan->ic_freq);
                tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags);
                tap->wr_antenna = sc->rx_ant;
                tap->wr_antsignal = desc->rssi;

                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_rxtap_len, m, BPF_D_IN);
        }

        wh = mtod(m, struct ieee80211_frame *);
        ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh);

        /* send the frame to the 802.11 layer */
        ieee80211_input(ic, m, ni, desc->rssi, 0);

        /* node is no longer needed */
        ieee80211_free_node(ni);

        splx(s);

        DPRINTFN(15, ("rx done\n"));

skip:        /* setup a new transfer */
        usbd_setup_xfer(xfer, data, data->buf, MCLBYTES,
            USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, ural_rxeof);
        usbd_transfer(xfer);
}

/*
 * Return the expected ack rate for a frame transmitted at rate `rate'.
 * XXX: this should depend on the destination node basic rate set.
 */
Static int
ural_ack_rate(struct ieee80211com *ic, int rate)
{
        switch (rate) {
        /* CCK rates */
        case 2:
                return 2;
        case 4:
        case 11:
        case 22:
                return (ic->ic_curmode == IEEE80211_MODE_11B) ? 4 : rate;

        /* OFDM rates */
        case 12:
        case 18:
                return 12;
        case 24:
        case 36:
                return 24;
        case 48:
        case 72:
        case 96:
        case 108:
                return 48;
        }

        /* default to 1Mbps */
        return 2;
}

/*
 * Compute the duration (in us) needed to transmit `len' bytes at rate `rate'.
 * The function automatically determines the operating mode depending on the
 * given rate. `flags' indicates whether short preamble is in use or not.
 */
Static uint16_t
ural_txtime(int len, int rate, uint32_t flags)
{
        uint16_t txtime;

        if (RAL_RATE_IS_OFDM(rate)) {
                /* IEEE Std 802.11g-2003, pp. 37 */
                txtime = (8 + 4 * len + 3 + rate - 1) / rate;
                txtime = 16 + 4 + 4 * txtime + 6;
        } else {
                /* IEEE Std 802.11b-1999, pp. 28 */
                txtime = (16 * len + rate - 1) / rate;
                if (rate != 2 && (flags & IEEE80211_F_SHPREAMBLE))
                        txtime +=  72 + 24;
                else
                        txtime += 144 + 48;
        }
        return txtime;
}

Static uint8_t
ural_plcp_signal(int rate)
{
        switch (rate) {
        /* CCK rates (returned values are device-dependent) */
        case 2:                return 0x0;
        case 4:                return 0x1;
        case 11:        return 0x2;
        case 22:        return 0x3;

        /* OFDM rates (cf IEEE Std 802.11a-1999, pp. 14 Table 80) */
        case 12:        return 0xb;
        case 18:        return 0xf;
        case 24:        return 0xa;
        case 36:        return 0xe;
        case 48:        return 0x9;
        case 72:        return 0xd;
        case 96:        return 0x8;
        case 108:        return 0xc;

        /* unsupported rates (should not get there) */
        default:        return 0xff;
        }
}

Static void
ural_setup_tx_desc(struct ural_softc *sc, struct ural_tx_desc *desc,
    uint32_t flags, int len, int rate)
{
        struct ieee80211com *ic = &sc->sc_ic;
        uint16_t plcp_length;
        int remainder;

        desc->flags = htole32(flags);
        desc->flags |= htole32(RAL_TX_NEWSEQ);
        desc->flags |= htole32(len << 16);

        desc->wme = htole16(RAL_AIFSN(2) | RAL_LOGCWMIN(3) | RAL_LOGCWMAX(5));
        desc->wme |= htole16(RAL_IVOFFSET(sizeof(struct ieee80211_frame)));

        /* setup PLCP fields */
        desc->plcp_signal  = ural_plcp_signal(rate);
        desc->plcp_service = 4;

        len += IEEE80211_CRC_LEN;
        if (RAL_RATE_IS_OFDM(rate)) {
                desc->flags |= htole32(RAL_TX_OFDM);

                plcp_length = len & 0xfff;
                desc->plcp_length_hi = plcp_length >> 6;
                desc->plcp_length_lo = plcp_length & 0x3f;
        } else {
                plcp_length = (16 * len + rate - 1) / rate;
                if (rate == 22) {
                        remainder = (16 * len) % 22;
                        if (remainder != 0 && remainder < 7)
                                desc->plcp_service |= RAL_PLCP_LENGEXT;
                }
                desc->plcp_length_hi = plcp_length >> 8;
                desc->plcp_length_lo = plcp_length & 0xff;

                if (rate != 2 && (ic->ic_flags & IEEE80211_F_SHPREAMBLE))
                        desc->plcp_signal |= 0x08;
        }

        desc->iv = 0;
        desc->eiv = 0;
}

#define RAL_TX_TIMEOUT        5000

Static int
ural_tx_bcn(struct ural_softc *sc, struct mbuf *m0, struct ieee80211_node *ni)
{
        struct ural_tx_desc *desc;
        struct usbd_xfer *xfer;
        uint8_t cmd = 0;
        usbd_status error;
        uint8_t *buf;
        int xferlen, rate;

        rate = IEEE80211_IS_CHAN_5GHZ(ni->ni_chan) ? 12 : 2;

        /* xfer length needs to be a multiple of two! */
        xferlen = (RAL_TX_DESC_SIZE + m0->m_pkthdr.len + 1) & ~1;

        error = usbd_create_xfer(sc->sc_tx_pipeh, xferlen,
            USBD_FORCE_SHORT_XFER, 0, &xfer);
        if (error)
                return error;

        buf = usbd_get_buffer(xfer);

        usbd_setup_xfer(xfer, NULL, &cmd, sizeof(cmd), USBD_FORCE_SHORT_XFER,
            RAL_TX_TIMEOUT, NULL);

        error = usbd_sync_transfer(xfer);
        if (error != 0) {
                usbd_destroy_xfer(xfer);
                return error;
        }

        desc = (struct ural_tx_desc *)buf;

        m_copydata(m0, 0, m0->m_pkthdr.len, buf + RAL_TX_DESC_SIZE);
        ural_setup_tx_desc(sc, desc, RAL_TX_IFS_NEWBACKOFF | RAL_TX_TIMESTAMP,
            m0->m_pkthdr.len, rate);

        DPRINTFN(10, ("sending beacon frame len=%u rate=%u xfer len=%u\n",
            m0->m_pkthdr.len, rate, xferlen));

        usbd_setup_xfer(xfer, NULL, buf, xferlen, USBD_FORCE_SHORT_XFER,
            RAL_TX_TIMEOUT, NULL);

        error = usbd_sync_transfer(xfer);
        usbd_destroy_xfer(xfer);

        return error;
}

Static int
ural_tx_mgt(struct ural_softc *sc, struct mbuf *m0, struct ieee80211_node *ni)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct ural_tx_desc *desc;
        struct ural_tx_data *data;
        struct ieee80211_frame *wh;
        struct ieee80211_key *k;
        uint32_t flags = 0;
        uint16_t dur;
        usbd_status error;
        int xferlen, rate;

        data = &sc->tx_data[0];
        desc = (struct ural_tx_desc *)data->buf;

        rate = IEEE80211_IS_CHAN_5GHZ(ic->ic_curchan) ? 12 : 2;

        wh = mtod(m0, struct ieee80211_frame *);

        if (wh->i_fc[1] & IEEE80211_FC1_WEP) {
                k = ieee80211_crypto_encap(ic, ni, m0);
                if (k == NULL) {
                        m_freem(m0);
                        return ENOBUFS;
                }
        }

        data->m = m0;
        data->ni = ni;

        wh = mtod(m0, struct ieee80211_frame *);

        if (!IEEE80211_IS_MULTICAST(wh->i_addr1)) {
                flags |= RAL_TX_ACK;

                dur = ural_txtime(RAL_ACK_SIZE, rate, ic->ic_flags) + RAL_SIFS;
                *(uint16_t *)wh->i_dur = htole16(dur);

                /* tell hardware to add timestamp for probe responses */
                if ((wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) ==
                    IEEE80211_FC0_TYPE_MGT &&
                    (wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK) ==
                    IEEE80211_FC0_SUBTYPE_PROBE_RESP)
                        flags |= RAL_TX_TIMESTAMP;
        }

        if (sc->sc_drvbpf != NULL) {
                struct ural_tx_radiotap_header *tap = &sc->sc_txtap;

                tap->wt_flags = 0;
                tap->wt_rate = rate;
                tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq);
                tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags);
                tap->wt_antenna = sc->tx_ant;

                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_txtap_len, m0, BPF_D_OUT);
        }

        m_copydata(m0, 0, m0->m_pkthdr.len, data->buf + RAL_TX_DESC_SIZE);
        ural_setup_tx_desc(sc, desc, flags, m0->m_pkthdr.len, rate);

        /* align end on a 2-bytes boundary */
        xferlen = (RAL_TX_DESC_SIZE + m0->m_pkthdr.len + 1) & ~1;

        /*
         * No space left in the last URB to store the extra 2 bytes, force
         * sending of another URB.
         */
        if ((xferlen % 64) == 0)
                xferlen += 2;

        DPRINTFN(10, ("sending mgt frame len=%u rate=%u xfer len=%u\n",
            m0->m_pkthdr.len, rate, xferlen));

        usbd_setup_xfer(data->xfer, data, data->buf, xferlen,
            USBD_FORCE_SHORT_XFER, RAL_TX_TIMEOUT, ural_txeof);

        error = usbd_transfer(data->xfer);
        if (error != USBD_NORMAL_COMPLETION && error != USBD_IN_PROGRESS) {
                m_freem(m0);
                return error;
        }

        sc->tx_queued++;

        return 0;
}

Static int
ural_tx_data(struct ural_softc *sc, struct mbuf *m0, struct ieee80211_node *ni)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct ural_tx_desc *desc;
        struct ural_tx_data *data;
        struct ieee80211_frame *wh;
        struct ieee80211_key *k;
        uint32_t flags = 0;
        uint16_t dur;
        usbd_status error;
        int xferlen, rate;

        wh = mtod(m0, struct ieee80211_frame *);

        if (ic->ic_fixed_rate != IEEE80211_FIXED_RATE_NONE)
                rate = ic->ic_bss->ni_rates.rs_rates[ic->ic_fixed_rate];
        else
                rate = ni->ni_rates.rs_rates[ni->ni_txrate];

        rate &= IEEE80211_RATE_VAL;

        if (wh->i_fc[1] & IEEE80211_FC1_WEP) {
                k = ieee80211_crypto_encap(ic, ni, m0);
                if (k == NULL) {
                        m_freem(m0);
                        return ENOBUFS;
                }

                /* packet header may have moved, reset our local pointer */
                wh = mtod(m0, struct ieee80211_frame *);
        }

        data = &sc->tx_data[0];
        desc = (struct ural_tx_desc *)data->buf;

        data->m = m0;
        data->ni = ni;

        if (!IEEE80211_IS_MULTICAST(wh->i_addr1)) {
                flags |= RAL_TX_ACK;
                flags |= RAL_TX_RETRY(7);

                dur = ural_txtime(RAL_ACK_SIZE, ural_ack_rate(ic, rate),
                    ic->ic_flags) + RAL_SIFS;
                *(uint16_t *)wh->i_dur = htole16(dur);
        }

        if (sc->sc_drvbpf != NULL) {
                struct ural_tx_radiotap_header *tap = &sc->sc_txtap;

                tap->wt_flags = 0;
                tap->wt_rate = rate;
                tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq);
                tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags);
                tap->wt_antenna = sc->tx_ant;

                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_txtap_len, m0, BPF_D_OUT);
        }

        m_copydata(m0, 0, m0->m_pkthdr.len, data->buf + RAL_TX_DESC_SIZE);
        ural_setup_tx_desc(sc, desc, flags, m0->m_pkthdr.len, rate);

        /* align end on a 2-bytes boundary */
        xferlen = (RAL_TX_DESC_SIZE + m0->m_pkthdr.len + 1) & ~1;

        /*
         * No space left in the last URB to store the extra 2 bytes, force
         * sending of another URB.
         */
        if ((xferlen % 64) == 0)
                xferlen += 2;

        DPRINTFN(10, ("sending data frame len=%u rate=%u xfer len=%u\n",
            m0->m_pkthdr.len, rate, xferlen));
        usbd_setup_xfer(data->xfer, data, data->buf, xferlen,
            USBD_FORCE_SHORT_XFER, RAL_TX_TIMEOUT, ural_txeof);

        error = usbd_transfer(data->xfer);
        if (error != USBD_NORMAL_COMPLETION && error != USBD_IN_PROGRESS)
                return error;

        sc->tx_queued++;

        return 0;
}

Static void
ural_start(struct ifnet *ifp)
{
        struct ural_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct mbuf *m0;
        struct ether_header *eh;
        struct ieee80211_node *ni;

        for (;;) {
                IF_POLL(&ic->ic_mgtq, m0);
                if (m0 != NULL) {
                        if (sc->tx_queued >= RAL_TX_LIST_COUNT) {
                                ifp->if_flags |= IFF_OACTIVE;
                                break;
                        }
                        IF_DEQUEUE(&ic->ic_mgtq, m0);

                        ni = M_GETCTX(m0, struct ieee80211_node *);
                        M_CLEARCTX(m0);
                        bpf_mtap3(ic->ic_rawbpf, m0, BPF_D_OUT);
                        if (ural_tx_mgt(sc, m0, ni) != 0)
                                break;

                } else {
                        if (ic->ic_state != IEEE80211_S_RUN)
                                break;
                        IFQ_POLL(&ifp->if_snd, m0);
                        if (m0 == NULL)
                                break;
                        if (sc->tx_queued >= RAL_TX_LIST_COUNT) {
                                ifp->if_flags |= IFF_OACTIVE;
                                break;
                        }
                        IFQ_DEQUEUE(&ifp->if_snd, m0);

                        if (m0->m_len < sizeof(struct ether_header) &&
                            !(m0 = m_pullup(m0, sizeof(struct ether_header))))
                                continue;

                        eh = mtod(m0, struct ether_header *);
                        ni = ieee80211_find_txnode(ic, eh->ether_dhost);
                        if (ni == NULL) {
                                m_freem(m0);
                                continue;
                        }
                        bpf_mtap(ifp, m0, BPF_D_OUT);
                        m0 = ieee80211_encap(ic, m0, ni);
                        if (m0 == NULL) {
                                ieee80211_free_node(ni);
                                continue;
                        }
                        bpf_mtap3(ic->ic_rawbpf, m0, BPF_D_OUT);
                        if (ural_tx_data(sc, m0, ni) != 0) {
                                ieee80211_free_node(ni);
                                if_statinc(ifp, if_oerrors);
                                break;
                        }
                }

                sc->sc_tx_timer = 5;
                ifp->if_timer = 1;
        }
}

Static void
ural_watchdog(struct ifnet *ifp)
{
        struct ural_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;

        ifp->if_timer = 0;

        if (sc->sc_tx_timer > 0) {
                if (--sc->sc_tx_timer == 0) {
                        printf("%s: device timeout\n", device_xname(sc->sc_dev));
                        /*ural_init(sc); XXX needs a process context! */
                        if_statinc(ifp, if_oerrors);
                        return;
                }
                ifp->if_timer = 1;
        }

        ieee80211_watchdog(ic);
}

/*
 * This function allows for fast channel switching in monitor mode (used by
 * net-mgmt/kismet). In IBSS mode, we must explicitly reset the interface to
 * generate a new beacon frame.
 */
Static int
ural_reset(struct ifnet *ifp)
{
        struct ural_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;

        if (ic->ic_opmode != IEEE80211_M_MONITOR)
                return ENETRESET;

        ural_set_chan(sc, ic->ic_curchan);

        return 0;
}

Static int
ural_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
#define IS_RUNNING(ifp) \
        (((ifp)->if_flags & IFF_UP) && ((ifp)->if_flags & IFF_RUNNING))

        struct ural_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        int s, error = 0;

        s = splnet();

        switch (cmd) {
        case SIOCSIFFLAGS:
                if ((error = ifioctl_common(ifp, cmd, data)) != 0)
                        break;
                /* XXX re-use ether_ioctl() */
                switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) {
                case IFF_UP|IFF_RUNNING:
                        ural_update_promisc(sc);
                        break;
                case IFF_UP:
                        ural_init(ifp);
                        break;
                case IFF_RUNNING:
                        ural_stop(ifp, 1);
                        break;
                case 0:
                        break;
                }
                break;

        case SIOCADDMULTI:
        case SIOCDELMULTI:
                if ((error = ether_ioctl(ifp, cmd, data)) == ENETRESET) {
                        error = 0;
                }
                break;

        default:
                error = ieee80211_ioctl(ic, cmd, data);
        }

        if (error == ENETRESET) {
                if (IS_RUNNING(ifp) &&
                        (ic->ic_roaming != IEEE80211_ROAMING_MANUAL))
                        ural_init(ifp);
                error = 0;
        }

        splx(s);

        return error;
#undef IS_RUNNING
}

Static void
ural_set_testmode(struct ural_softc *sc)
{
        usb_device_request_t req;
        usbd_status error;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = RAL_VENDOR_REQUEST;
        USETW(req.wValue, 4);
        USETW(req.wIndex, 1);
        USETW(req.wLength, 0);

        error = usbd_do_request(sc->sc_udev, &req, NULL);
        if (error != 0) {
                printf("%s: could not set test mode: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
        }
}

Static void
ural_eeprom_read(struct ural_softc *sc, uint16_t addr, void *buf, int len)
{
        usb_device_request_t req;
        usbd_status error;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = RAL_READ_EEPROM;
        USETW(req.wValue, 0);
        USETW(req.wIndex, addr);
        USETW(req.wLength, len);

        error = usbd_do_request(sc->sc_udev, &req, buf);
        if (error != 0) {
                printf("%s: could not read EEPROM: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
        }
}

Static uint16_t
ural_read(struct ural_softc *sc, uint16_t reg)
{
        usb_device_request_t req;
        usbd_status error;
        uint16_t val;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = RAL_READ_MAC;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, sizeof(uint16_t));

        error = usbd_do_request(sc->sc_udev, &req, &val);
        if (error != 0) {
                printf("%s: could not read MAC register: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
                return 0;
        }

        return le16toh(val);
}

Static void
ural_read_multi(struct ural_softc *sc, uint16_t reg, void *buf, int len)
{
        usb_device_request_t req;
        usbd_status error;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = RAL_READ_MULTI_MAC;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, len);

        error = usbd_do_request(sc->sc_udev, &req, buf);
        if (error != 0) {
                printf("%s: could not read MAC register: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
        }
}

Static void
ural_write(struct ural_softc *sc, uint16_t reg, uint16_t val)
{
        usb_device_request_t req;
        usbd_status error;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = RAL_WRITE_MAC;
        USETW(req.wValue, val);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 0);

        error = usbd_do_request(sc->sc_udev, &req, NULL);
        if (error != 0) {
                printf("%s: could not write MAC register: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
        }
}

Static void
ural_write_multi(struct ural_softc *sc, uint16_t reg, void *buf, int len)
{
        usb_device_request_t req;
        usbd_status error;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = RAL_WRITE_MULTI_MAC;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, len);

        error = usbd_do_request(sc->sc_udev, &req, buf);
        if (error != 0) {
                printf("%s: could not write MAC register: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
        }
}

Static void
ural_bbp_write(struct ural_softc *sc, uint8_t reg, uint8_t val)
{
        uint16_t tmp;
        int ntries;

        for (ntries = 0; ntries < 5; ntries++) {
                if (!(ural_read(sc, RAL_PHY_CSR8) & RAL_BBP_BUSY))
                        break;
        }
        if (ntries == 5) {
                printf("%s: could not write to BBP\n", device_xname(sc->sc_dev));
                return;
        }

        tmp = reg << 8 | val;
        ural_write(sc, RAL_PHY_CSR7, tmp);
}

Static uint8_t
ural_bbp_read(struct ural_softc *sc, uint8_t reg)
{
        uint16_t val;
        int ntries;

        val = RAL_BBP_WRITE | reg << 8;
        ural_write(sc, RAL_PHY_CSR7, val);

        for (ntries = 0; ntries < 5; ntries++) {
                if (!(ural_read(sc, RAL_PHY_CSR8) & RAL_BBP_BUSY))
                        break;
        }
        if (ntries == 5) {
                printf("%s: could not read BBP\n", device_xname(sc->sc_dev));
                return 0;
        }

        return ural_read(sc, RAL_PHY_CSR7) & 0xff;
}

Static void
ural_rf_write(struct ural_softc *sc, uint8_t reg, uint32_t val)
{
        uint32_t tmp;
        int ntries;

        for (ntries = 0; ntries < 5; ntries++) {
                if (!(ural_read(sc, RAL_PHY_CSR10) & RAL_RF_LOBUSY))
                        break;
        }
        if (ntries == 5) {
                printf("%s: could not write to RF\n", device_xname(sc->sc_dev));
                return;
        }

        tmp = RAL_RF_BUSY | RAL_RF_20BIT | (val & 0xfffff) << 2 | (reg & 0x3);
        ural_write(sc, RAL_PHY_CSR9,  tmp & 0xffff);
        ural_write(sc, RAL_PHY_CSR10, tmp >> 16);

        /* remember last written value in sc */
        sc->rf_regs[reg] = val;

        DPRINTFN(15, ("RF R[%u] <- 0x%05x\n", reg & 0x3, val & 0xfffff));
}

Static void
ural_set_chan(struct ural_softc *sc, struct ieee80211_channel *c)
{
        struct ieee80211com *ic = &sc->sc_ic;
        uint8_t power, tmp;
        u_int i, chan;

        chan = ieee80211_chan2ieee(ic, c);
        if (chan == 0 || chan == IEEE80211_CHAN_ANY)
                return;

        if (IEEE80211_IS_CHAN_2GHZ(c))
                power = uimin(sc->txpow[chan - 1], 31);
        else
                power = 31;

        /* adjust txpower using ifconfig settings */
        power -= (100 - ic->ic_txpowlimit) / 8;

        DPRINTFN(2, ("setting channel to %u, txpower to %u\n", chan, power));

        switch (sc->rf_rev) {
        case RAL_RF_2522:
                ural_rf_write(sc, RAL_RF1, 0x00814);
                ural_rf_write(sc, RAL_RF2, ural_rf2522_r2[chan - 1]);
                ural_rf_write(sc, RAL_RF3, power << 7 | 0x00040);
                break;

        case RAL_RF_2523:
                ural_rf_write(sc, RAL_RF1, 0x08804);
                ural_rf_write(sc, RAL_RF2, ural_rf2523_r2[chan - 1]);
                ural_rf_write(sc, RAL_RF3, power << 7 | 0x38044);
                ural_rf_write(sc, RAL_RF4, (chan == 14) ? 0x00280 : 0x00286);
                break;

        case RAL_RF_2524:
                ural_rf_write(sc, RAL_RF1, 0x0c808);
                ural_rf_write(sc, RAL_RF2, ural_rf2524_r2[chan - 1]);
                ural_rf_write(sc, RAL_RF3, power << 7 | 0x00040);
                ural_rf_write(sc, RAL_RF4, (chan == 14) ? 0x00280 : 0x00286);
                break;

        case RAL_RF_2525:
                ural_rf_write(sc, RAL_RF1, 0x08808);
                ural_rf_write(sc, RAL_RF2, ural_rf2525_hi_r2[chan - 1]);
                ural_rf_write(sc, RAL_RF3, power << 7 | 0x18044);
                ural_rf_write(sc, RAL_RF4, (chan == 14) ? 0x00280 : 0x00286);

                ural_rf_write(sc, RAL_RF1, 0x08808);
                ural_rf_write(sc, RAL_RF2, ural_rf2525_r2[chan - 1]);
                ural_rf_write(sc, RAL_RF3, power << 7 | 0x18044);
                ural_rf_write(sc, RAL_RF4, (chan == 14) ? 0x00280 : 0x00286);
                break;

        case RAL_RF_2525E:
                ural_rf_write(sc, RAL_RF1, 0x08808);
                ural_rf_write(sc, RAL_RF2, ural_rf2525e_r2[chan - 1]);
                ural_rf_write(sc, RAL_RF3, power << 7 | 0x18044);
                ural_rf_write(sc, RAL_RF4, (chan == 14) ? 0x00286 : 0x00282);
                break;

        case RAL_RF_2526:
                ural_rf_write(sc, RAL_RF2, ural_rf2526_hi_r2[chan - 1]);
                ural_rf_write(sc, RAL_RF4, (chan & 1) ? 0x00386 : 0x00381);
                ural_rf_write(sc, RAL_RF1, 0x08804);

                ural_rf_write(sc, RAL_RF2, ural_rf2526_r2[chan - 1]);
                ural_rf_write(sc, RAL_RF3, power << 7 | 0x18044);
                ural_rf_write(sc, RAL_RF4, (chan & 1) ? 0x00386 : 0x00381);
                break;

        /* dual-band RF */
        case RAL_RF_5222:
                for (i = 0; ural_rf5222[i].chan != chan; i++);

                ural_rf_write(sc, RAL_RF1, ural_rf5222[i].r1);
                ural_rf_write(sc, RAL_RF2, ural_rf5222[i].r2);
                ural_rf_write(sc, RAL_RF3, power << 7 | 0x00040);
                ural_rf_write(sc, RAL_RF4, ural_rf5222[i].r4);
                break;
        }

        if (ic->ic_opmode != IEEE80211_M_MONITOR &&
            ic->ic_state != IEEE80211_S_SCAN) {
                /* set Japan filter bit for channel 14 */
                tmp = ural_bbp_read(sc, 70);

                tmp &= ~RAL_JAPAN_FILTER;
                if (chan == 14)
                        tmp |= RAL_JAPAN_FILTER;

                ural_bbp_write(sc, 70, tmp);

                /* clear CRC errors */
                ural_read(sc, RAL_STA_CSR0);

                DELAY(10000);
                ural_disable_rf_tune(sc);
        }
}

/*
 * Disable RF auto-tuning.
 */
Static void
ural_disable_rf_tune(struct ural_softc *sc)
{
        uint32_t tmp;

        if (sc->rf_rev != RAL_RF_2523) {
                tmp = sc->rf_regs[RAL_RF1] & ~RAL_RF1_AUTOTUNE;
                ural_rf_write(sc, RAL_RF1, tmp);
        }

        tmp = sc->rf_regs[RAL_RF3] & ~RAL_RF3_AUTOTUNE;
        ural_rf_write(sc, RAL_RF3, tmp);

        DPRINTFN(2, ("disabling RF autotune\n"));
}

/*
 * Refer to IEEE Std 802.11-1999 pp. 123 for more information on TSF
 * synchronization.
 */
Static void
ural_enable_tsf_sync(struct ural_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        uint16_t logcwmin, preload, tmp;

        /* first, disable TSF synchronization */
        ural_write(sc, RAL_TXRX_CSR19, 0);

        tmp = (16 * ic->ic_bss->ni_intval) << 4;
        ural_write(sc, RAL_TXRX_CSR18, tmp);

        logcwmin = (ic->ic_opmode == IEEE80211_M_IBSS) ? 2 : 0;
        preload = (ic->ic_opmode == IEEE80211_M_IBSS) ? 320 : 6;
        tmp = logcwmin << 12 | preload;
        ural_write(sc, RAL_TXRX_CSR20, tmp);

        /* finally, enable TSF synchronization */
        tmp = RAL_ENABLE_TSF | RAL_ENABLE_TBCN;
        if (ic->ic_opmode == IEEE80211_M_STA)
                tmp |= RAL_ENABLE_TSF_SYNC(1);
        else
                tmp |= RAL_ENABLE_TSF_SYNC(2) | RAL_ENABLE_BEACON_GENERATOR;
        ural_write(sc, RAL_TXRX_CSR19, tmp);

        DPRINTF(("enabling TSF synchronization\n"));
}

Static void
ural_update_slot(struct ifnet *ifp)
{
        struct ural_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        uint16_t slottime, sifs, eifs;

        slottime = (ic->ic_flags & IEEE80211_F_SHSLOT) ? 9 : 20;

        /*
         * These settings may sound a bit inconsistent but this is what the
         * reference driver does.
         */
        if (ic->ic_curmode == IEEE80211_MODE_11B) {
                sifs = 16 - RAL_RXTX_TURNAROUND;
                eifs = 364;
        } else {
                sifs = 10 - RAL_RXTX_TURNAROUND;
                eifs = 64;
        }

        ural_write(sc, RAL_MAC_CSR10, slottime);
        ural_write(sc, RAL_MAC_CSR11, sifs);
        ural_write(sc, RAL_MAC_CSR12, eifs);
}

Static void
ural_set_txpreamble(struct ural_softc *sc)
{
        uint16_t tmp;

        tmp = ural_read(sc, RAL_TXRX_CSR10);

        tmp &= ~RAL_SHORT_PREAMBLE;
        if (sc->sc_ic.ic_flags & IEEE80211_F_SHPREAMBLE)
                tmp |= RAL_SHORT_PREAMBLE;

        ural_write(sc, RAL_TXRX_CSR10, tmp);
}

Static void
ural_set_basicrates(struct ural_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;

        /* update basic rate set */
        if (ic->ic_curmode == IEEE80211_MODE_11B) {
                /* 11b basic rates: 1, 2Mbps */
                ural_write(sc, RAL_TXRX_CSR11, 0x3);
        } else if (IEEE80211_IS_CHAN_5GHZ(ic->ic_bss->ni_chan)) {
                /* 11a basic rates: 6, 12, 24Mbps */
                ural_write(sc, RAL_TXRX_CSR11, 0x150);
        } else {
                /* 11g basic rates: 1, 2, 5.5, 11, 6, 12, 24Mbps */
                ural_write(sc, RAL_TXRX_CSR11, 0x15f);
        }
}

Static void
ural_set_bssid(struct ural_softc *sc, uint8_t *bssid)
{
        uint16_t tmp;

        tmp = bssid[0] | bssid[1] << 8;
        ural_write(sc, RAL_MAC_CSR5, tmp);

        tmp = bssid[2] | bssid[3] << 8;
        ural_write(sc, RAL_MAC_CSR6, tmp);

        tmp = bssid[4] | bssid[5] << 8;
        ural_write(sc, RAL_MAC_CSR7, tmp);

        DPRINTF(("setting BSSID to %s\n", ether_sprintf(bssid)));
}

Static void
ural_set_macaddr(struct ural_softc *sc, uint8_t *addr)
{
        uint16_t tmp;

        tmp = addr[0] | addr[1] << 8;
        ural_write(sc, RAL_MAC_CSR2, tmp);

        tmp = addr[2] | addr[3] << 8;
        ural_write(sc, RAL_MAC_CSR3, tmp);

        tmp = addr[4] | addr[5] << 8;
        ural_write(sc, RAL_MAC_CSR4, tmp);

        DPRINTF(("setting MAC address to %s\n", ether_sprintf(addr)));
}

Static void
ural_update_promisc(struct ural_softc *sc)
{
        struct ifnet *ifp = sc->sc_ic.ic_ifp;
        uint32_t tmp;

        tmp = ural_read(sc, RAL_TXRX_CSR2);

        tmp &= ~RAL_DROP_NOT_TO_ME;
        if (!(ifp->if_flags & IFF_PROMISC))
                tmp |= RAL_DROP_NOT_TO_ME;

        ural_write(sc, RAL_TXRX_CSR2, tmp);

        DPRINTF(("%s promiscuous mode\n", (ifp->if_flags & IFF_PROMISC) ?
            "entering" : "leaving"));
}

Static const char *
ural_get_rf(int rev)
{
        switch (rev) {
        case RAL_RF_2522:        return "RT2522";
        case RAL_RF_2523:        return "RT2523";
        case RAL_RF_2524:        return "RT2524";
        case RAL_RF_2525:        return "RT2525";
        case RAL_RF_2525E:        return "RT2525e";
        case RAL_RF_2526:        return "RT2526";
        case RAL_RF_5222:        return "RT5222";
        default:                return "unknown";
        }
}

Static void
ural_read_eeprom(struct ural_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        uint16_t val;

        ural_eeprom_read(sc, RAL_EEPROM_CONFIG0, &val, 2);
        val = le16toh(val);
        sc->rf_rev =   (val >> 11) & 0x7;
        sc->hw_radio = (val >> 10) & 0x1;
        sc->led_mode = (val >> 6)  & 0x7;
        sc->rx_ant =   (val >> 4)  & 0x3;
        sc->tx_ant =   (val >> 2)  & 0x3;
        sc->nb_ant =   val & 0x3;

        /* read MAC address */
        ural_eeprom_read(sc, RAL_EEPROM_ADDRESS, ic->ic_myaddr, 6);

        /* read default values for BBP registers */
        ural_eeprom_read(sc, RAL_EEPROM_BBP_BASE, sc->bbp_prom, 2 * 16);

        /* read Tx power for all b/g channels */
        ural_eeprom_read(sc, RAL_EEPROM_TXPOWER, sc->txpow, 14);
}

Static int
ural_bbp_init(struct ural_softc *sc)
{
        int i, ntries;

        /* wait for BBP to be ready */
        for (ntries = 0; ntries < 100; ntries++) {
                if (ural_bbp_read(sc, RAL_BBP_VERSION) != 0)
                        break;
                DELAY(1000);
        }
        if (ntries == 100) {
                printf("%s: timeout waiting for BBP\n", device_xname(sc->sc_dev));
                return EIO;
        }

        /* initialize BBP registers to default values */
        for (i = 0; i < __arraycount(ural_def_bbp); i++)
                ural_bbp_write(sc, ural_def_bbp[i].reg, ural_def_bbp[i].val);

#if 0
        /* initialize BBP registers to values stored in EEPROM */
        for (i = 0; i < 16; i++) {
                if (sc->bbp_prom[i].reg == 0xff)
                        continue;
                ural_bbp_write(sc, sc->bbp_prom[i].reg, sc->bbp_prom[i].val);
        }
#endif

        return 0;
}

Static void
ural_set_txantenna(struct ural_softc *sc, int antenna)
{
        uint16_t tmp;
        uint8_t tx;

        tx = ural_bbp_read(sc, RAL_BBP_TX) & ~RAL_BBP_ANTMASK;
        if (antenna == 1)
                tx |= RAL_BBP_ANTA;
        else if (antenna == 2)
                tx |= RAL_BBP_ANTB;
        else
                tx |= RAL_BBP_DIVERSITY;

        /* need to force I/Q flip for RF 2525e, 2526 and 5222 */
        if (sc->rf_rev == RAL_RF_2525E || sc->rf_rev == RAL_RF_2526 ||
            sc->rf_rev == RAL_RF_5222)
                tx |= RAL_BBP_FLIPIQ;

        ural_bbp_write(sc, RAL_BBP_TX, tx);

        /* update values in PHY_CSR5 and PHY_CSR6 */
        tmp = ural_read(sc, RAL_PHY_CSR5) & ~0x7;
        ural_write(sc, RAL_PHY_CSR5, tmp | (tx & 0x7));

        tmp = ural_read(sc, RAL_PHY_CSR6) & ~0x7;
        ural_write(sc, RAL_PHY_CSR6, tmp | (tx & 0x7));
}

Static void
ural_set_rxantenna(struct ural_softc *sc, int antenna)
{
        uint8_t rx;

        rx = ural_bbp_read(sc, RAL_BBP_RX) & ~RAL_BBP_ANTMASK;
        if (antenna == 1)
                rx |= RAL_BBP_ANTA;
        else if (antenna == 2)
                rx |= RAL_BBP_ANTB;
        else
                rx |= RAL_BBP_DIVERSITY;

        /* need to force no I/Q flip for RF 2525e and 2526 */
        if (sc->rf_rev == RAL_RF_2525E || sc->rf_rev == RAL_RF_2526)
                rx &= ~RAL_BBP_FLIPIQ;

        ural_bbp_write(sc, RAL_BBP_RX, rx);
}

Static int
ural_init(struct ifnet *ifp)
{
        struct ural_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_key *wk;
        uint16_t tmp;
        usbd_status error;
        int i, ntries;

        ural_set_testmode(sc);
        ural_write(sc, 0x308, 0x00f0);        /* XXX magic */

        ural_stop(ifp, 0);

        /* initialize MAC registers to default values */
        for (i = 0; i < __arraycount(ural_def_mac); i++)
                ural_write(sc, ural_def_mac[i].reg, ural_def_mac[i].val);

        /* wait for BBP and RF to wake up (this can take a long time!) */
        for (ntries = 0; ntries < 100; ntries++) {
                tmp = ural_read(sc, RAL_MAC_CSR17);
                if ((tmp & (RAL_BBP_AWAKE | RAL_RF_AWAKE)) ==
                    (RAL_BBP_AWAKE | RAL_RF_AWAKE))
                        break;
                DELAY(1000);
        }
        if (ntries == 100) {
                printf("%s: timeout waiting for BBP/RF to wakeup\n",
                    device_xname(sc->sc_dev));
                error = EIO;
                goto fail;
        }

        /* we're ready! */
        ural_write(sc, RAL_MAC_CSR1, RAL_HOST_READY);

        /* set basic rate set (will be updated later) */
        ural_write(sc, RAL_TXRX_CSR11, 0x15f);

        error = ural_bbp_init(sc);
        if (error != 0)
                goto fail;

        /* set default BSS channel */
        ural_set_chan(sc, ic->ic_curchan);

        /* clear statistic registers (STA_CSR0 to STA_CSR10) */
        ural_read_multi(sc, RAL_STA_CSR0, sc->sta, sizeof(sc->sta));

        ural_set_txantenna(sc, sc->tx_ant);
        ural_set_rxantenna(sc, sc->rx_ant);

        IEEE80211_ADDR_COPY(ic->ic_myaddr, CLLADDR(ifp->if_sadl));
        ural_set_macaddr(sc, ic->ic_myaddr);

        /*
         * Copy WEP keys into adapter's memory (SEC_CSR0 to SEC_CSR31).
         */
        for (i = 0; i < IEEE80211_WEP_NKID; i++) {
                wk = &ic->ic_crypto.cs_nw_keys[i];
                ural_write_multi(sc, wk->wk_keyix * IEEE80211_KEYBUF_SIZE +
                    RAL_SEC_CSR0, wk->wk_key, IEEE80211_KEYBUF_SIZE);
        }

        /*
         * Allocate xfer for AMRR statistics requests.
         */
        struct usbd_pipe *pipe0 = usbd_get_pipe0(sc->sc_udev);
        error = usbd_create_xfer(pipe0, sizeof(sc->sta), 0, 0, &sc->amrr_xfer);
        if (error) {
                printf("%s: could not allocate AMRR xfer\n",
                    device_xname(sc->sc_dev));
                goto fail;
        }

        /*
         * Open Tx and Rx USB bulk pipes.
         */
        error = usbd_open_pipe(sc->sc_iface, sc->sc_tx_no, USBD_EXCLUSIVE_USE,
            &sc->sc_tx_pipeh);
        if (error != 0) {
                printf("%s: could not open Tx pipe: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
                goto fail;
        }

        error = usbd_open_pipe(sc->sc_iface, sc->sc_rx_no, USBD_EXCLUSIVE_USE,
            &sc->sc_rx_pipeh);
        if (error != 0) {
                printf("%s: could not open Rx pipe: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
                goto fail;
        }

        /*
         * Allocate Tx and Rx xfer queues.
         */
        error = ural_alloc_tx_list(sc);
        if (error != 0) {
                printf("%s: could not allocate Tx list\n",
                    device_xname(sc->sc_dev));
                goto fail;
        }

        error = ural_alloc_rx_list(sc);
        if (error != 0) {
                printf("%s: could not allocate Rx list\n",
                    device_xname(sc->sc_dev));
                goto fail;
        }

        /*
         * Start up the receive pipe.
         */
        for (i = 0; i < RAL_RX_LIST_COUNT; i++) {
                struct ural_rx_data *data = &sc->rx_data[i];

                usbd_setup_xfer(data->xfer, data, data->buf, MCLBYTES,
                    USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, ural_rxeof);
                usbd_transfer(data->xfer);
        }

        /* kick Rx */
        tmp = RAL_DROP_PHY_ERROR | RAL_DROP_CRC_ERROR;
        if (ic->ic_opmode != IEEE80211_M_MONITOR) {
                tmp |= RAL_DROP_CTL | RAL_DROP_VERSION_ERROR;
                if (ic->ic_opmode != IEEE80211_M_HOSTAP)
                        tmp |= RAL_DROP_TODS;
                if (!(ifp->if_flags & IFF_PROMISC))
                        tmp |= RAL_DROP_NOT_TO_ME;
        }
        ural_write(sc, RAL_TXRX_CSR2, tmp);

        ifp->if_flags &= ~IFF_OACTIVE;
        ifp->if_flags |= IFF_RUNNING;

        if (ic->ic_opmode != IEEE80211_M_MONITOR) {
                if (ic->ic_roaming != IEEE80211_ROAMING_MANUAL)
                        ieee80211_new_state(ic, IEEE80211_S_SCAN, -1);
        } else
                ieee80211_new_state(ic, IEEE80211_S_RUN, -1);

        return 0;

fail:        ural_stop(ifp, 1);
        return error;
}

Static void
ural_stop(struct ifnet *ifp, int disable)
{
        struct ural_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;

        ieee80211_new_state(ic, IEEE80211_S_INIT, -1);

        sc->sc_tx_timer = 0;
        ifp->if_timer = 0;
        ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);

        /* disable Rx */
        ural_write(sc, RAL_TXRX_CSR2, RAL_DISABLE_RX);

        /* reset ASIC and BBP (but won't reset MAC registers!) */
        ural_write(sc, RAL_MAC_CSR1, RAL_RESET_ASIC | RAL_RESET_BBP);
        ural_write(sc, RAL_MAC_CSR1, 0);

        if (sc->amrr_xfer != NULL) {
                usbd_destroy_xfer(sc->amrr_xfer);
                sc->amrr_xfer = NULL;
        }

        if (sc->sc_rx_pipeh != NULL) {
                usbd_abort_pipe(sc->sc_rx_pipeh);
        }

        if (sc->sc_tx_pipeh != NULL) {
                usbd_abort_pipe(sc->sc_tx_pipeh);
        }

        ural_free_rx_list(sc);
        ural_free_tx_list(sc);

        if (sc->sc_rx_pipeh != NULL) {
                usbd_close_pipe(sc->sc_rx_pipeh);
                sc->sc_rx_pipeh = NULL;
        }

        if (sc->sc_tx_pipeh != NULL) {
                usbd_close_pipe(sc->sc_tx_pipeh);
                sc->sc_tx_pipeh = NULL;
        }
}

static int
ural_activate(device_t self, enum devact act)
{
        struct ural_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                if_deactivate(&sc->sc_if);
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

Static void
ural_amrr_start(struct ural_softc *sc, struct ieee80211_node *ni)
{
        int i;

        /* clear statistic registers (STA_CSR0 to STA_CSR10) */
        ural_read_multi(sc, RAL_STA_CSR0, sc->sta, sizeof(sc->sta));

        ieee80211_amrr_node_init(&sc->amrr, &sc->amn);

        /* set rate to some reasonable initial value */
        for (i = ni->ni_rates.rs_nrates - 1;
             i > 0 && (ni->ni_rates.rs_rates[i] & IEEE80211_RATE_VAL) > 72;
             i--);
        ni->ni_txrate = i;

        callout_reset(&sc->sc_amrr_ch, hz, ural_amrr_timeout, sc);
}

Static void
ural_amrr_timeout(void *arg)
{
        struct ural_softc *sc = (struct ural_softc *)arg;
        usb_device_request_t req;
        int s;

        s = splusb();

        /*
         * Asynchronously read statistic registers (cleared by read).
         */
        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = RAL_READ_MULTI_MAC;
        USETW(req.wValue, 0);
        USETW(req.wIndex, RAL_STA_CSR0);
        USETW(req.wLength, sizeof(sc->sta));

        usbd_setup_default_xfer(sc->amrr_xfer, sc->sc_udev, sc,
            USBD_DEFAULT_TIMEOUT, &req, sc->sta, sizeof(sc->sta), 0,
            ural_amrr_update);
        (void)usbd_transfer(sc->amrr_xfer);

        splx(s);
}

Static void
ural_amrr_update(struct usbd_xfer *xfer, void * priv,
    usbd_status status)
{
        struct ural_softc *sc = (struct ural_softc *)priv;
        struct ifnet *ifp = sc->sc_ic.ic_ifp;

        if (status != USBD_NORMAL_COMPLETION) {
                printf("%s: could not retrieve Tx statistics - "
                    "cancelling automatic rate control\n",
                    device_xname(sc->sc_dev));
                return;
        }

        /* count TX retry-fail as Tx errors */
        if_statadd(ifp, if_oerrors, sc->sta[9]);

        sc->amn.amn_retrycnt =
            sc->sta[7] +        /* TX one-retry ok count */
            sc->sta[8] +        /* TX more-retry ok count */
            sc->sta[9];                /* TX retry-fail count */

        sc->amn.amn_txcnt =
            sc->amn.amn_retrycnt +
            sc->sta[6];                /* TX no-retry ok count */

        ieee80211_amrr_choose(&sc->amrr, sc->sc_ic.ic_bss, &sc->amn);

        callout_reset(&sc->sc_amrr_ch, hz, ural_amrr_timeout, sc);
}














































































































































































































































    2 



















    2 







    2 
    2 







    1 

    1 












    1 
    1 













    1 












    6 

    6 



    4 

    4 









   20 
    1 
   21 
    1 
   21 









   13 

   13 



   12 



   13 

   13 


   13 


    1 

















   13 
   13 


    7 


   11 





   13 





   17 




   17 

   17 









   17 

   17 





   17 














   17 





    1 

    1 






   68 





   68 
   68 







   67 



   18 






   57 


   16 

   16 











    3 


   12 




   14 

   66 


   13 
    1 







   41 



   41 

   41 







   41 




    1 
    1 








   41 
    6 


   35 

   14 


   21 






   41 










   41 

    1 













    4 

    4 











   20 




   20 

   12 


    5 

    2 






    3 


    3 







    1 


    1 


    1 















    2 









    2 



    2 
    2 












   19 
























  155 





  155 



  110 
  108 


  110 




   45 
   45 



   45 







  155 
  153 





  154 




  155 









   21 

   21 

   21 








    1 

   21 

   21 
    4 
    4 




   21 








    2 




   21 




    3 



    3 


    3 


    3 





    3 




    2 


    1 



















    1 












    3 





    2 








    4 

    4 



    4 



    3 


    2 

    2 




    4 

    4 
    4 



    4 





    5 
    5 
    5 

    5 






    6 
    6 
    5 

    5 

























   11 









   11 
   11 

   11 

   11 




   10 

   11 










   11 



    1 


   10 

    2 

    1 

    1 
    2 




    8 








    7 


    7 









    3 


    3 






    3 


    3 





    3 




    3 





   19 
   19 

   19 






    4 
    4 

    4 

    4 


















  132 


  133 
  133 









  133 
  133 
    2 
    2 


  133 





   37 




























   10 










    9 

   10 









   10 


    3 


    7 





    6 


    5 






    5 
    1 




    4 







    3 


    3 




    2 


    2 





    2 













    2 




    3 




    2 
    3 













    2 
    2 






    2 




    7 

    9 


    9 






  130 









   94 
  130 





















   20 




   20 



    7 
    7 



    1 
    1 

    1 



    1 

    7 





   13 
   13 














    3 
    2 









    4 



    4 








    4 












    4 
    3 













    3 
    3 










    3 














    3 
    3 




























    3 


    3 






















    3 














    3 






    4 
    4 







   10 













   13 
   12 










    5 
    6 






    6 
    5 


    2 







    8 




    7 












    3 
    3 

    3 









    4 
    4 

    1 



    4 
    7 



    7 







































































































































































































































































    2 









   31 












    8 
    8 
    6 


    6 

    6 

    5 



    3 







    8 





















































    3 


    3 
    3 

    3 

    3 

    3 

































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
/*        $NetBSD: uipc_usrreq.c,v 1.203 2022/05/28 22:08:46 andvar Exp $        */

/*-
 * Copyright (c) 1998, 2000, 2004, 2008, 2009, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)uipc_usrreq.c        8.9 (Berkeley) 5/14/95
 */

/*
 * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)uipc_usrreq.c        8.9 (Berkeley) 5/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.203 2022/05/28 22:08:46 andvar Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/filedesc.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/unpcb.h>
#include <sys/un.h>
#include <sys/namei.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/mbuf.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/atomic.h>
#include <sys/uidinfo.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/compat_stub.h>

#include <compat/sys/socket.h>
#include <compat/net/route_70.h>

/*
 * Unix communications domain.
 *
 * TODO:
 *        RDM
 *        rethink name space problems
 *        need a proper out-of-band
 *
 * Notes on locking:
 *
 * The generic rules noted in uipc_socket2.c apply.  In addition:
 *
 * o We have a global lock, uipc_lock.
 *
 * o All datagram sockets are locked by uipc_lock.
 *
 * o For stream socketpairs, the two endpoints are created sharing the same
 *   independent lock.  Sockets presented to PRU_CONNECT2 must already have
 *   matching locks.
 *
 * o Stream sockets created via socket() start life with their own
 *   independent lock.
 * 
 * o Stream connections to a named endpoint are slightly more complicated.
 *   Sockets that have called listen() have their lock pointer mutated to
 *   the global uipc_lock.  When establishing a connection, the connecting
 *   socket also has its lock mutated to uipc_lock, which matches the head
 *   (listening socket).  We create a new socket for accept() to return, and
 *   that also shares the head's lock.  Until the connection is completely
 *   done on both ends, all three sockets are locked by uipc_lock.  Once the
 *   connection is complete, the association with the head's lock is broken.
 *   The connecting socket and the socket returned from accept() have their
 *   lock pointers mutated away from uipc_lock, and back to the connecting
 *   socket's original, independent lock.  The head continues to be locked
 *   by uipc_lock.
 *
 * o If uipc_lock is determined to be a significant source of contention,
 *   it could easily be hashed out.  It is difficult to simply make it an
 *   independent lock because of visibility / garbage collection issues:
 *   if a socket has been associated with a lock at any point, that lock
 *   must remain valid until the socket is no longer visible in the system.
 *   The lock must not be freed or otherwise destroyed until any sockets
 *   that had referenced it have also been destroyed.
 */
const struct sockaddr_un sun_noname = {
        .sun_len = offsetof(struct sockaddr_un, sun_path),
        .sun_family = AF_LOCAL,
};
ino_t        unp_ino;                        /* prototype for fake inode numbers */

static struct mbuf * unp_addsockcred(struct lwp *, struct mbuf *);
static void   unp_discard_later(file_t *);
static void   unp_discard_now(file_t *);
static void   unp_disconnect1(struct unpcb *);
static bool   unp_drop(struct unpcb *, int);
static int    unp_internalize(struct mbuf **);
static void   unp_mark(file_t *);
static void   unp_scan(struct mbuf *, void (*)(file_t *), int);
static void   unp_shutdown1(struct unpcb *);
static void   unp_thread(void *);
static void   unp_thread_kick(void);

static kmutex_t *uipc_lock;

static kcondvar_t unp_thread_cv;
static lwp_t *unp_thread_lwp;
static SLIST_HEAD(,file) unp_thread_discard;
static int unp_defer;
static struct sysctllog *usrreq_sysctllog;
static void unp_sysctl_create(void);

/* Compat interface */

struct mbuf * stub_compat_70_unp_addsockcred(lwp_t *, struct mbuf *);

struct mbuf * stub_compat_70_unp_addsockcred(struct lwp *lwp,
    struct mbuf *control)
{

/* just copy our initial argument */
        return control;
}

bool compat70_ocreds_valid = false;

/*
 * Initialize Unix protocols.
 */
void
uipc_init(void)
{
        int error;

        unp_sysctl_create();

        uipc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
        cv_init(&unp_thread_cv, "unpgc");

        error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, unp_thread,
            NULL, &unp_thread_lwp, "unpgc");
        if (error != 0)
                panic("uipc_init %d", error);
}

static void
unp_connid(struct lwp *l, struct unpcb *unp, int flags)
{
        unp->unp_connid.unp_pid = l->l_proc->p_pid;
        unp->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred);
        unp->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred);
        unp->unp_flags |= flags;
}

/*
 * A connection succeeded: disassociate both endpoints from the head's
 * lock, and make them share their own lock.  There is a race here: for
 * a very brief time one endpoint will be locked by a different lock
 * than the other end.  However, since the current thread holds the old
 * lock (the listening socket's lock, the head) access can still only be
 * made to one side of the connection.
 */
static void
unp_setpeerlocks(struct socket *so, struct socket *so2)
{
        struct unpcb *unp;
        kmutex_t *lock;

        KASSERT(solocked2(so, so2));

        /*
         * Bail out if either end of the socket is not yet fully
         * connected or accepted.  We only break the lock association
         * with the head when the pair of sockets stand completely
         * on their own.
         */
        KASSERT(so->so_head == NULL);
        if (so2->so_head != NULL)
                return;

        /*
         * Drop references to old lock.  A third reference (from the
         * queue head) must be held as we still hold its lock.  Bonus:
         * we don't need to worry about garbage collecting the lock.
         */
        lock = so->so_lock;
        KASSERT(lock == uipc_lock);
        mutex_obj_free(lock);
        mutex_obj_free(lock);

        /*
         * Grab stream lock from the initiator and share between the two
         * endpoints.  Issue memory barrier to ensure all modifications
         * become globally visible before the lock change.  so2 is
         * assumed not to have a stream lock, because it was created
         * purely for the server side to accept this connection and
         * started out life using the domain-wide lock.
         */
        unp = sotounpcb(so);
        KASSERT(unp->unp_streamlock != NULL);
        KASSERT(sotounpcb(so2)->unp_streamlock == NULL);
        lock = unp->unp_streamlock;
        unp->unp_streamlock = NULL;
        mutex_obj_hold(lock);
        /*
         * Ensure lock is initialized before publishing it with
         * solockreset.  Pairs with atomic_load_consume in solock and
         * various loops to reacquire lock after wakeup.
         */
        membar_release();
        /*
         * possible race if lock is not held - see comment in
         * uipc_usrreq(PRU_ACCEPT).
         */
        KASSERT(mutex_owned(lock));
        solockreset(so, lock);
        solockreset(so2, lock);
}

/*
 * Reset a socket's lock back to the domain-wide lock.
 */
static void
unp_resetlock(struct socket *so)
{
        kmutex_t *olock, *nlock;
        struct unpcb *unp;

        KASSERT(solocked(so));

        olock = so->so_lock;
        nlock = uipc_lock;
        if (olock == nlock)
                return;
        unp = sotounpcb(so);
        KASSERT(unp->unp_streamlock == NULL);
        unp->unp_streamlock = olock;
        mutex_obj_hold(nlock);
        mutex_enter(nlock);
        solockreset(so, nlock);
        mutex_exit(olock);
}

static void
unp_free(struct unpcb *unp)
{
        if (unp->unp_addr)
                free(unp->unp_addr, M_SONAME);
        if (unp->unp_streamlock != NULL)
                mutex_obj_free(unp->unp_streamlock);
        kmem_free(unp, sizeof(*unp));
}

static int
unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp)
{
        struct socket *so2;
        const struct sockaddr_un *sun;

        /* XXX: server side closed the socket */
        if (unp->unp_conn == NULL)
                return ECONNREFUSED;
        so2 = unp->unp_conn->unp_socket;

        KASSERT(solocked(so2));

        if (unp->unp_addr)
                sun = unp->unp_addr;
        else
                sun = &sun_noname;
        if (unp->unp_conn->unp_flags & UNP_WANTCRED)
                control = unp_addsockcred(curlwp, control);
        if (unp->unp_conn->unp_flags & UNP_OWANTCRED)
                MODULE_HOOK_CALL(uipc_unp_70_hook, (curlwp, control),
                    stub_compat_70_unp_addsockcred(curlwp, control), control);
        if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m,
            control) == 0) {
                unp_dispose(control);
                m_freem(control);
                m_freem(m);
                /* Don't call soroverflow because we're returning this
                 * error directly to the sender. */
                so2->so_rcv.sb_overflowed++;
                return ENOBUFS;
        } else {
                sorwakeup(so2);
                return 0;
        }
}

static void
unp_setaddr(struct socket *so, struct sockaddr *nam, bool peeraddr)
{
        const struct sockaddr_un *sun = NULL;
        struct unpcb *unp;

        KASSERT(solocked(so));
        unp = sotounpcb(so);

        if (peeraddr) {
                if (unp->unp_conn && unp->unp_conn->unp_addr)
                        sun = unp->unp_conn->unp_addr;
        } else {
                if (unp->unp_addr)
                        sun = unp->unp_addr;
        }
        if (sun == NULL)
                sun = &sun_noname;

        memcpy(nam, sun, sun->sun_len);
}

static int
unp_rcvd(struct socket *so, int flags, struct lwp *l)
{
        struct unpcb *unp = sotounpcb(so);
        struct socket *so2;
        u_int newhiwat;

        KASSERT(solocked(so));
        KASSERT(unp != NULL);

        switch (so->so_type) {

        case SOCK_DGRAM:
                panic("uipc 1");
                /*NOTREACHED*/

        case SOCK_SEQPACKET: /* FALLTHROUGH */
        case SOCK_STREAM:
#define        rcv (&so->so_rcv)
#define snd (&so2->so_snd)
                if (unp->unp_conn == 0)
                        break;
                so2 = unp->unp_conn->unp_socket;
                KASSERT(solocked2(so, so2));
                /*
                 * Adjust backpressure on sender
                 * and wakeup any waiting to write.
                 */
                snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
                unp->unp_mbcnt = rcv->sb_mbcnt;
                newhiwat = snd->sb_hiwat + unp->unp_cc - rcv->sb_cc;
                (void)chgsbsize(so2->so_uidinfo,
                    &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
                unp->unp_cc = rcv->sb_cc;
                sowwakeup(so2);
#undef snd
#undef rcv
                break;

        default:
                panic("uipc 2");
        }

        return 0;
}

static int
unp_recvoob(struct socket *so, struct mbuf *m, int flags)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
unp_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
    struct mbuf *control, struct lwp *l)
{
        struct unpcb *unp = sotounpcb(so);
        int error = 0;
        u_int newhiwat;
        struct socket *so2;

        KASSERT(solocked(so));
        KASSERT(unp != NULL);
        KASSERT(m != NULL);

        /*
         * Note: unp_internalize() rejects any control message
         * other than SCM_RIGHTS, and only allows one.  This
         * has the side-effect of preventing a caller from
         * forging SCM_CREDS.
         */
        if (control) {
                sounlock(so);
                error = unp_internalize(&control);
                solock(so);
                if (error != 0) {
                        m_freem(control);
                        m_freem(m);
                        return error;
                }
        }

        switch (so->so_type) {

        case SOCK_DGRAM: {
                KASSERT(so->so_lock == uipc_lock);
                if (nam) {
                        if ((so->so_state & SS_ISCONNECTED) != 0)
                                error = EISCONN;
                        else {
                                /*
                                 * Note: once connected, the
                                 * socket's lock must not be
                                 * dropped until we have sent
                                 * the message and disconnected.
                                 * This is necessary to prevent
                                 * intervening control ops, like
                                 * another connection.
                                 */
                                error = unp_connect(so, nam, l);
                        }
                } else {
                        if ((so->so_state & SS_ISCONNECTED) == 0)
                                error = ENOTCONN;
                }
                if (error) {
                        unp_dispose(control);
                        m_freem(control);
                        m_freem(m);
                        return error;
                }
                error = unp_output(m, control, unp);
                if (nam)
                        unp_disconnect1(unp);
                break;
        }

        case SOCK_SEQPACKET: /* FALLTHROUGH */
        case SOCK_STREAM:
#define        rcv (&so2->so_rcv)
#define        snd (&so->so_snd)
                if (unp->unp_conn == NULL) {
                        error = ENOTCONN;
                        break;
                }
                so2 = unp->unp_conn->unp_socket;
                KASSERT(solocked2(so, so2));
                if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
                        /*
                         * Credentials are passed only once on
                         * SOCK_STREAM and SOCK_SEQPACKET.
                         */
                        unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
                        control = unp_addsockcred(l, control);
                }
                if (unp->unp_conn->unp_flags & UNP_OWANTCRED) {
                        /*
                         * Credentials are passed only once on
                         * SOCK_STREAM and SOCK_SEQPACKET.
                         */
                        unp->unp_conn->unp_flags &= ~UNP_OWANTCRED;
                        MODULE_HOOK_CALL(uipc_unp_70_hook, (curlwp, control),
                            stub_compat_70_unp_addsockcred(curlwp, control),
                            control);
                }
                /*
                 * Send to paired receive port, and then reduce
                 * send buffer hiwater marks to maintain backpressure.
                 * Wake up readers.
                 */
                if (control) {
                        if (sbappendcontrol(rcv, m, control) != 0)
                                control = NULL;
                } else {
                        switch(so->so_type) {
                        case SOCK_SEQPACKET:
                                sbappendrecord(rcv, m);
                                break;
                        case SOCK_STREAM:
                                sbappend(rcv, m);
                                break;
                        default:
                                panic("uipc_usrreq");
                                break;
                        }
                }
                snd->sb_mbmax -=
                    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
                unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
                newhiwat = snd->sb_hiwat -
                    (rcv->sb_cc - unp->unp_conn->unp_cc);
                (void)chgsbsize(so->so_uidinfo,
                    &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
                unp->unp_conn->unp_cc = rcv->sb_cc;
                sorwakeup(so2);
#undef snd
#undef rcv
                if (control != NULL) {
                        unp_dispose(control);
                        m_freem(control);
                }
                break;

        default:
                panic("uipc 4");
        }

        return error;
}

static int
unp_sendoob(struct socket *so, struct mbuf *m, struct mbuf * control)
{
        KASSERT(solocked(so));

        m_freem(m);
        m_freem(control);

        return EOPNOTSUPP;
}

/*
 * Unix domain socket option processing.
 */
int
uipc_ctloutput(int op, struct socket *so, struct sockopt *sopt)
{
        struct unpcb *unp = sotounpcb(so);
        int optval = 0, error = 0;

        KASSERT(solocked(so));

        if (sopt->sopt_level != SOL_LOCAL) {
                error = ENOPROTOOPT;
        } else switch (op) {

        case PRCO_SETOPT:
                switch (sopt->sopt_name) {
                case LOCAL_OCREDS:
                        if (!compat70_ocreds_valid)  {
                                error = ENOPROTOOPT;
                                break;
                        }
                        /* FALLTHROUGH */
                case LOCAL_CREDS:
                case LOCAL_CONNWAIT:
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                break;
                        switch (sopt->sopt_name) {
#define        OPTSET(bit) \
        if (optval) \
                unp->unp_flags |= (bit); \
        else \
                unp->unp_flags &= ~(bit);

                        case LOCAL_CREDS:
                                OPTSET(UNP_WANTCRED);
                                break;
                        case LOCAL_CONNWAIT:
                                OPTSET(UNP_CONNWAIT);
                                break;
                        case LOCAL_OCREDS:
                                OPTSET(UNP_OWANTCRED);
                                break;
                        }
                        break;
#undef OPTSET

                default:
                        error = ENOPROTOOPT;
                        break;
                }
                break;

        case PRCO_GETOPT:
                sounlock(so);
                switch (sopt->sopt_name) {
                case LOCAL_PEEREID:
                        if (unp->unp_flags & UNP_EIDSVALID) {
                                error = sockopt_set(sopt, &unp->unp_connid,
                                    sizeof(unp->unp_connid));
                        } else {
                                error = EINVAL;
                        }
                        break;
                case LOCAL_CREDS:
#define        OPTBIT(bit)        (unp->unp_flags & (bit) ? 1 : 0)

                        optval = OPTBIT(UNP_WANTCRED);
                        error = sockopt_setint(sopt, optval);
                        break;
                case LOCAL_OCREDS:
                        if (compat70_ocreds_valid) {
                                optval = OPTBIT(UNP_OWANTCRED);
                                error = sockopt_setint(sopt, optval);
                                break;
                        }
#undef OPTBIT
                        /* FALLTHROUGH */
                default:
                        error = ENOPROTOOPT;
                        break;
                }
                solock(so);
                break;
        }
        return (error);
}

/*
 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
 * for stream sockets, although the total for sender and receiver is
 * actually only PIPSIZ.
 * Datagram sockets really use the sendspace as the maximum datagram size,
 * and don't really want to reserve the sendspace.  Their recvspace should
 * be large enough for at least one max-size datagram plus address.
 */
#ifndef PIPSIZ
#define        PIPSIZ        8192
#endif
u_long        unpst_sendspace = PIPSIZ;
u_long        unpst_recvspace = PIPSIZ;
u_long        unpdg_sendspace = 2*1024;        /* really max datagram size */
u_long        unpdg_recvspace = 16*1024;

u_int        unp_rights;                        /* files in flight */
u_int        unp_rights_ratio = 2;                /* limit, fraction of maxfiles */

static int
unp_attach(struct socket *so, int proto)
{
        struct unpcb *unp = sotounpcb(so);
        u_long sndspc, rcvspc;
        int error;

        KASSERT(unp == NULL);

        switch (so->so_type) {
        case SOCK_SEQPACKET:
                /* FALLTHROUGH */
        case SOCK_STREAM:
                if (so->so_lock == NULL) {
                        so->so_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
                        solock(so);
                }
                sndspc = unpst_sendspace;
                rcvspc = unpst_recvspace;
                break;

        case SOCK_DGRAM:
                if (so->so_lock == NULL) {
                        mutex_obj_hold(uipc_lock);
                        so->so_lock = uipc_lock;
                        solock(so);
                }
                sndspc = unpdg_sendspace;
                rcvspc = unpdg_recvspace;
                break;

        default:
                panic("unp_attach");
        }

        if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
                error = soreserve(so, sndspc, rcvspc);
                if (error) {
                        return error;
                }
        }

        unp = kmem_zalloc(sizeof(*unp), KM_SLEEP);
        nanotime(&unp->unp_ctime);
        unp->unp_socket = so;
        so->so_pcb = unp;

        KASSERT(solocked(so));
        return 0;
}

static void
unp_detach(struct socket *so)
{
        struct unpcb *unp;
        vnode_t *vp;

        unp = sotounpcb(so);
        KASSERT(unp != NULL);
        KASSERT(solocked(so));
 retry:
        if ((vp = unp->unp_vnode) != NULL) {
                sounlock(so);
                /* Acquire v_interlock to protect against unp_connect(). */
                /* XXXAD racy */
                mutex_enter(vp->v_interlock);
                vp->v_socket = NULL;
                mutex_exit(vp->v_interlock);
                vrele(vp);
                solock(so);
                unp->unp_vnode = NULL;
        }
        if (unp->unp_conn)
                unp_disconnect1(unp);
        while (unp->unp_refs) {
                KASSERT(solocked2(so, unp->unp_refs->unp_socket));
                if (unp_drop(unp->unp_refs, ECONNRESET)) {
                        solock(so);
                        goto retry;
                }
        }
        soisdisconnected(so);
        so->so_pcb = NULL;
        if (unp_rights) {
                /*
                 * Normally the receive buffer is flushed later, in sofree,
                 * but if our receive buffer holds references to files that
                 * are now garbage, we will enqueue those file references to
                 * the garbage collector and kick it into action.
                 */
                sorflush(so);
                unp_free(unp);
                unp_thread_kick();
        } else
                unp_free(unp);
}

static int
unp_accept(struct socket *so, struct sockaddr *nam)
{
        struct unpcb *unp = sotounpcb(so);
        struct socket *so2;

        KASSERT(solocked(so));
        KASSERT(nam != NULL);

        /* XXX code review required to determine if unp can ever be NULL */
        if (unp == NULL)
                return EINVAL;

        KASSERT(so->so_lock == uipc_lock);
        /*
         * Mark the initiating STREAM socket as connected *ONLY*
         * after it's been accepted.  This prevents a client from
         * overrunning a server and receiving ECONNREFUSED.
         */
        if (unp->unp_conn == NULL) {
                /*
                 * This will use the empty socket and will not
                 * allocate.
                 */
                unp_setaddr(so, nam, true);
                return 0;
        }
        so2 = unp->unp_conn->unp_socket;
        if (so2->so_state & SS_ISCONNECTING) {
                KASSERT(solocked2(so, so->so_head));
                KASSERT(solocked2(so2, so->so_head));
                soisconnected(so2);
        }
        /*
         * If the connection is fully established, break the
         * association with uipc_lock and give the connected
         * pair a separate lock to share.
         * There is a race here: sotounpcb(so2)->unp_streamlock
         * is not locked, so when changing so2->so_lock
         * another thread can grab it while so->so_lock is still
         * pointing to the (locked) uipc_lock.
         * this should be harmless, except that this makes
         * solocked2() and solocked() unreliable.
         * Another problem is that unp_setaddr() expects the
         * the socket locked. Grabbing sotounpcb(so2)->unp_streamlock
         * fixes both issues.
         */
        mutex_enter(sotounpcb(so2)->unp_streamlock);
        unp_setpeerlocks(so2, so);
        /*
         * Only now return peer's address, as we may need to
         * block in order to allocate memory.
         *
         * XXX Minor race: connection can be broken while
         * lock is dropped in unp_setaddr().  We will return
         * error == 0 and sun_noname as the peer address.
         */
        unp_setaddr(so, nam, true);
        /* so_lock now points to unp_streamlock */
        mutex_exit(so2->so_lock);
        return 0;
}

static int
unp_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
{
        return EOPNOTSUPP;
}

static int
unp_stat(struct socket *so, struct stat *ub)
{
        struct unpcb *unp;
        struct socket *so2;

        KASSERT(solocked(so));

        unp = sotounpcb(so);
        if (unp == NULL)
                return EINVAL;

        ub->st_blksize = so->so_snd.sb_hiwat;
        switch (so->so_type) {
        case SOCK_SEQPACKET: /* FALLTHROUGH */
        case SOCK_STREAM:
                if (unp->unp_conn == 0) 
                        break;

                so2 = unp->unp_conn->unp_socket;
                KASSERT(solocked2(so, so2));
                ub->st_blksize += so2->so_rcv.sb_cc;
                break;
        default:
                break;
        }
        ub->st_dev = NODEV;
        if (unp->unp_ino == 0)
                unp->unp_ino = unp_ino++;
        ub->st_atimespec = ub->st_mtimespec = ub->st_ctimespec = unp->unp_ctime;
        ub->st_ino = unp->unp_ino;
        ub->st_uid = so->so_uidinfo->ui_uid;
        ub->st_gid = so->so_egid;
        return (0);
}

static int
unp_peeraddr(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));
        KASSERT(sotounpcb(so) != NULL);
        KASSERT(nam != NULL);

        unp_setaddr(so, nam, true);
        return 0;
}

static int
unp_sockaddr(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));
        KASSERT(sotounpcb(so) != NULL);
        KASSERT(nam != NULL);

        unp_setaddr(so, nam, false);
        return 0;
}

/*
 * we only need to perform this allocation until syscalls other than
 * bind are adjusted to use sockaddr_big.
 */
static struct sockaddr_un *
makeun_sb(struct sockaddr *nam, size_t *addrlen)
{
        struct sockaddr_un *sun;

        *addrlen = nam->sa_len + 1;
        sun = malloc(*addrlen, M_SONAME, M_WAITOK);
        memcpy(sun, nam, nam->sa_len);
        *(((char *)sun) + nam->sa_len) = '\0';
        return sun;
}

static int
unp_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct sockaddr_un *sun;
        struct unpcb *unp;
        vnode_t *vp;
        struct vattr vattr;
        size_t addrlen;
        int error;
        struct pathbuf *pb;
        struct nameidata nd;
        proc_t *p;

        unp = sotounpcb(so);

        KASSERT(solocked(so));
        KASSERT(unp != NULL);
        KASSERT(nam != NULL);

        if (unp->unp_vnode != NULL)
                return (EINVAL);
        if ((unp->unp_flags & UNP_BUSY) != 0) {
                /*
                 * EALREADY may not be strictly accurate, but since this
                 * is a major application error it's hardly a big deal.
                 */
                return (EALREADY);
        }
        unp->unp_flags |= UNP_BUSY;
        sounlock(so);

        p = l->l_proc;
        sun = makeun_sb(nam, &addrlen);

        pb = pathbuf_create(sun->sun_path);
        if (pb == NULL) {
                error = ENOMEM;
                goto bad;
        }
        NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT | TRYEMULROOT, pb);

/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
        if ((error = namei(&nd)) != 0) {
                pathbuf_destroy(pb);
                goto bad;
        }
        vp = nd.ni_vp;
        if (vp != NULL) {
                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                if (nd.ni_dvp == vp)
                        vrele(nd.ni_dvp);
                else
                        vput(nd.ni_dvp);
                vrele(vp);
                pathbuf_destroy(pb);
                error = EADDRINUSE;
                goto bad;
        }
        vattr_null(&vattr);
        vattr.va_type = VSOCK;
        vattr.va_mode = ACCESSPERMS & ~(p->p_cwdi->cwdi_cmask);
        error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
        if (error) {
                vput(nd.ni_dvp);
                pathbuf_destroy(pb);
                goto bad;
        }
        vp = nd.ni_vp;
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        solock(so);
        vp->v_socket = unp->unp_socket;
        unp->unp_vnode = vp;
        unp->unp_addrlen = addrlen;
        unp->unp_addr = sun;
        VOP_UNLOCK(vp);
        vput(nd.ni_dvp);
        unp->unp_flags &= ~UNP_BUSY;
        pathbuf_destroy(pb);
        return (0);

 bad:
        free(sun, M_SONAME);
        solock(so);
        unp->unp_flags &= ~UNP_BUSY;
        return (error);
}

static int
unp_listen(struct socket *so, struct lwp *l)
{
        struct unpcb *unp = sotounpcb(so);

        KASSERT(solocked(so));
        KASSERT(unp != NULL);

        /*
         * If the socket can accept a connection, it must be
         * locked by uipc_lock.
         */
        unp_resetlock(so);
        if (unp->unp_vnode == NULL)
                return EINVAL;

        unp_connid(l, unp, UNP_EIDSBIND);
        return 0;
}

static int
unp_disconnect(struct socket *so)
{
        KASSERT(solocked(so));
        KASSERT(sotounpcb(so) != NULL);

        unp_disconnect1(sotounpcb(so));
        return 0;
}

static int
unp_shutdown(struct socket *so)
{
        KASSERT(solocked(so));
        KASSERT(sotounpcb(so) != NULL);

        socantsendmore(so);
        unp_shutdown1(sotounpcb(so));
        return 0;
}

static int
unp_abort(struct socket *so)
{
        KASSERT(solocked(so));
        KASSERT(sotounpcb(so) != NULL);

        (void)unp_drop(sotounpcb(so), ECONNABORTED);
        KASSERT(so->so_head == NULL);
        KASSERT(so->so_pcb != NULL);
        unp_detach(so);
        return 0;
}

static int
unp_connect1(struct socket *so, struct socket *so2, struct lwp *l)
{
        struct unpcb *unp = sotounpcb(so);
        struct unpcb *unp2;

        if (so2->so_type != so->so_type)
                return EPROTOTYPE;

        /*
         * All three sockets involved must be locked by same lock:
         *
         * local endpoint (so)
         * remote endpoint (so2)
         * queue head (so2->so_head, only if PR_CONNREQUIRED)
         */
        KASSERT(solocked2(so, so2));
        KASSERT(so->so_head == NULL);
        if (so2->so_head != NULL) {
                KASSERT(so2->so_lock == uipc_lock);
                KASSERT(solocked2(so2, so2->so_head));
        }

        unp2 = sotounpcb(so2);
        unp->unp_conn = unp2;

        switch (so->so_type) {

        case SOCK_DGRAM:
                unp->unp_nextref = unp2->unp_refs;
                unp2->unp_refs = unp;
                soisconnected(so);
                break;

        case SOCK_SEQPACKET: /* FALLTHROUGH */
        case SOCK_STREAM:

                /*
                 * SOCK_SEQPACKET and SOCK_STREAM cases are handled by callers
                 * which are unp_connect() or unp_connect2().
                 */

                break;

        default:
                panic("unp_connect1");
        }

        return 0;
}

int
unp_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct sockaddr_un *sun;
        vnode_t *vp;
        struct socket *so2, *so3;
        struct unpcb *unp, *unp2, *unp3;
        size_t addrlen;
        int error;
        struct pathbuf *pb;
        struct nameidata nd;

        unp = sotounpcb(so);
        if ((unp->unp_flags & UNP_BUSY) != 0) {
                /*
                 * EALREADY may not be strictly accurate, but since this
                 * is a major application error it's hardly a big deal.
                 */
                return (EALREADY);
        }
        unp->unp_flags |= UNP_BUSY;
        sounlock(so);

        sun = makeun_sb(nam, &addrlen);
        pb = pathbuf_create(sun->sun_path);
        if (pb == NULL) {
                error = ENOMEM;
                goto bad2;
        }

        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);

        if ((error = namei(&nd)) != 0) {
                pathbuf_destroy(pb);
                goto bad2;
        }
        vp = nd.ni_vp;
        pathbuf_destroy(pb);
        if (vp->v_type != VSOCK) {
                error = ENOTSOCK;
                goto bad;
        }
        if ((error = VOP_ACCESS(vp, VWRITE, l->l_cred)) != 0)
                goto bad;
        /* Acquire v_interlock to protect against unp_detach(). */
        mutex_enter(vp->v_interlock);
        so2 = vp->v_socket;
        if (so2 == NULL) {
                mutex_exit(vp->v_interlock);
                error = ECONNREFUSED;
                goto bad;
        }
        if (so->so_type != so2->so_type) {
                mutex_exit(vp->v_interlock);
                error = EPROTOTYPE;
                goto bad;
        }
        solock(so);
        unp_resetlock(so);
        mutex_exit(vp->v_interlock);
        if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) {
                /*
                 * This may seem somewhat fragile but is OK: if we can
                 * see SO_ACCEPTCONN set on the endpoint, then it must
                 * be locked by the domain-wide uipc_lock.
                 */
                KASSERT((so2->so_options & SO_ACCEPTCONN) == 0 ||
                    so2->so_lock == uipc_lock);
                if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
                    (so3 = sonewconn(so2, false)) == NULL) {
                        error = ECONNREFUSED;
                        sounlock(so);
                        goto bad;
                }
                unp2 = sotounpcb(so2);
                unp3 = sotounpcb(so3);
                if (unp2->unp_addr) {
                        unp3->unp_addr = malloc(unp2->unp_addrlen,
                            M_SONAME, M_WAITOK);
                        memcpy(unp3->unp_addr, unp2->unp_addr,
                            unp2->unp_addrlen);
                        unp3->unp_addrlen = unp2->unp_addrlen;
                }
                unp3->unp_flags = unp2->unp_flags;
                so2 = so3;
                /*
                 * The connector's (client's) credentials are copied from its
                 * process structure at the time of connect() (which is now).
                 */
                unp_connid(l, unp3, UNP_EIDSVALID);
                 /*
                  * The receiver's (server's) credentials are copied from the
                  * unp_peercred member of socket on which the former called
                  * listen(); unp_listen() cached that process's credentials
                  * at that time so we can use them now.
                  */
                if (unp2->unp_flags & UNP_EIDSBIND) {
                        memcpy(&unp->unp_connid, &unp2->unp_connid,
                            sizeof(unp->unp_connid));
                        unp->unp_flags |= UNP_EIDSVALID;
                }
        }
        error = unp_connect1(so, so2, l);
        if (error) {
                sounlock(so);
                goto bad;
        }
        unp2 = sotounpcb(so2);
        switch (so->so_type) {

        /*
         * SOCK_DGRAM and default cases are handled in prior call to
         * unp_connect1(), do not add a default case without fixing
         * unp_connect1().
         */

        case SOCK_SEQPACKET: /* FALLTHROUGH */
        case SOCK_STREAM:
                unp2->unp_conn = unp;
                if ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT)
                        soisconnecting(so);
                else
                        soisconnected(so);
                soisconnected(so2);
                /*
                 * If the connection is fully established, break the
                 * association with uipc_lock and give the connected
                 * pair a separate lock to share.
                 */
                KASSERT(so2->so_head != NULL);
                unp_setpeerlocks(so, so2);
                break;

        }
        sounlock(so);
 bad:
        vput(vp);
 bad2:
        free(sun, M_SONAME);
        solock(so);
        unp->unp_flags &= ~UNP_BUSY;
        return (error);
}

int
unp_connect2(struct socket *so, struct socket *so2)
{
        struct unpcb *unp = sotounpcb(so);
        struct unpcb *unp2;
        int error = 0;

        KASSERT(solocked2(so, so2));

        error = unp_connect1(so, so2, curlwp);
        if (error)
                return error;

        unp2 = sotounpcb(so2);
        switch (so->so_type) {

        /*
         * SOCK_DGRAM and default cases are handled in prior call to
         * unp_connect1(), do not add a default case without fixing
         * unp_connect1().
         */

        case SOCK_SEQPACKET: /* FALLTHROUGH */
        case SOCK_STREAM:
                unp2->unp_conn = unp;
                soisconnected(so);
                soisconnected(so2);
                break;

        }
        return error;
}

static void
unp_disconnect1(struct unpcb *unp)
{
        struct unpcb *unp2 = unp->unp_conn;
        struct socket *so;

        if (unp2 == 0)
                return;
        unp->unp_conn = 0;
        so = unp->unp_socket;
        switch (so->so_type) {
        case SOCK_DGRAM:
                if (unp2->unp_refs == unp)
                        unp2->unp_refs = unp->unp_nextref;
                else {
                        unp2 = unp2->unp_refs;
                        for (;;) {
                                KASSERT(solocked2(so, unp2->unp_socket));
                                if (unp2 == 0)
                                        panic("unp_disconnect1");
                                if (unp2->unp_nextref == unp)
                                        break;
                                unp2 = unp2->unp_nextref;
                        }
                        unp2->unp_nextref = unp->unp_nextref;
                }
                unp->unp_nextref = 0;
                so->so_state &= ~SS_ISCONNECTED;
                break;

        case SOCK_SEQPACKET: /* FALLTHROUGH */
        case SOCK_STREAM:
                KASSERT(solocked2(so, unp2->unp_socket));
                soisdisconnected(so);
                unp2->unp_conn = 0;
                soisdisconnected(unp2->unp_socket);
                break;
        }
}

static void
unp_shutdown1(struct unpcb *unp)
{
        struct socket *so;

        switch(unp->unp_socket->so_type) {
        case SOCK_SEQPACKET: /* FALLTHROUGH */
        case SOCK_STREAM:
                if (unp->unp_conn && (so = unp->unp_conn->unp_socket))
                        socantrcvmore(so);
                break;
        default:
                break;
        }
}

static bool
unp_drop(struct unpcb *unp, int errno)
{
        struct socket *so = unp->unp_socket;

        KASSERT(solocked(so));

        so->so_error = errno;
        unp_disconnect1(unp);
        if (so->so_head) {
                so->so_pcb = NULL;
                /* sofree() drops the socket lock */
                sofree(so);
                unp_free(unp);
                return true;
        }
        return false;
}

#ifdef notdef
unp_drain(void)
{

}
#endif

int
unp_externalize(struct mbuf *rights, struct lwp *l, int flags)
{
        struct cmsghdr * const cm = mtod(rights, struct cmsghdr *);
        struct proc * const p = l->l_proc;
        file_t **rp;
        int error = 0;

        const size_t nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
            sizeof(file_t *);
        if (nfds == 0)
                goto noop;

        int * const fdp = kmem_alloc(nfds * sizeof(int), KM_SLEEP);
        rw_enter(&p->p_cwdi->cwdi_lock, RW_READER);

        /* Make sure the recipient should be able to see the files.. */
        rp = (file_t **)CMSG_DATA(cm);
        for (size_t i = 0; i < nfds; i++) {
                file_t * const fp = *rp++;
                if (fp == NULL) {
                        error = EINVAL;
                        goto out;
                }
                /*
                 * If we are in a chroot'ed directory, and
                 * someone wants to pass us a directory, make
                 * sure it's inside the subtree we're allowed
                 * to access.
                 */
                if (p->p_cwdi->cwdi_rdir != NULL && fp->f_type == DTYPE_VNODE) {
                        vnode_t *vp = fp->f_vnode;
                        if ((vp->v_type == VDIR) &&
                            !vn_isunder(vp, p->p_cwdi->cwdi_rdir, l)) {
                                error = EPERM;
                                goto out;
                        }
                }
        }

 restart:
        /*
         * First loop -- allocate file descriptor table slots for the
         * new files.
         */
        for (size_t i = 0; i < nfds; i++) {
                if ((error = fd_alloc(p, 0, &fdp[i])) != 0) {
                        /*
                         * Back out what we've done so far.
                         */
                        while (i-- > 0) {
                                fd_abort(p, NULL, fdp[i]);
                        }
                        if (error == ENOSPC) {
                                fd_tryexpand(p);
                                error = 0;
                                goto restart;
                        }
                        /*
                         * This is the error that has historically
                         * been returned, and some callers may
                         * expect it.
                         */
                        error = EMSGSIZE;
                        goto out;
                }
        }

        /*
         * Now that adding them has succeeded, update all of the
         * file passing state and affix the descriptors.
         */
        rp = (file_t **)CMSG_DATA(cm);
        int *ofdp = (int *)CMSG_DATA(cm);
        for (size_t i = 0; i < nfds; i++) {
                file_t * const fp = *rp++;
                const int fd = fdp[i];
                atomic_dec_uint(&unp_rights);
                fd_set_exclose(l, fd, (flags & O_CLOEXEC) != 0);
                fd_affix(p, fp, fd);
                /*
                 * Done with this file pointer, replace it with a fd;
                 */
                *ofdp++ = fd;
                mutex_enter(&fp->f_lock);
                fp->f_msgcount--;
                mutex_exit(&fp->f_lock);
                /*
                 * Note that fd_affix() adds a reference to the file.
                 * The file may already have been closed by another
                 * LWP in the process, so we must drop the reference
                 * added by unp_internalize() with closef().
                 */
                closef(fp);
        }

        /*
         * Adjust length, in case of transition from large file_t
         * pointers to ints.
         */
        if (sizeof(file_t *) != sizeof(int)) {
                cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
                rights->m_len = CMSG_SPACE(nfds * sizeof(int));
        }
 out:
        if (__predict_false(error != 0)) {
                file_t **const fpp = (file_t **)CMSG_DATA(cm);
                for (size_t i = 0; i < nfds; i++)
                        unp_discard_now(fpp[i]);
                /*
                 * Truncate the array so that nobody will try to interpret
                 * what is now garbage in it.
                 */
                cm->cmsg_len = CMSG_LEN(0);
                rights->m_len = CMSG_SPACE(0);
        }
        rw_exit(&p->p_cwdi->cwdi_lock);
        kmem_free(fdp, nfds * sizeof(int));

 noop:
        /*
         * Don't disclose kernel memory in the alignment space.
         */
        KASSERT(cm->cmsg_len <= rights->m_len);
        memset(&mtod(rights, char *)[cm->cmsg_len], 0, rights->m_len -
            cm->cmsg_len);
        return error;
}

static int
unp_internalize(struct mbuf **controlp)
{
        filedesc_t *fdescp = curlwp->l_fd;
        fdtab_t *dt;
        struct mbuf *control = *controlp;
        struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *);
        file_t **rp, **files;
        file_t *fp;
        int i, fd, *fdp;
        int nfds, error;
        u_int maxmsg;

        error = 0;
        newcm = NULL;

        /* Sanity check the control message header. */
        if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
            cm->cmsg_len > control->m_len ||
            cm->cmsg_len < CMSG_ALIGN(sizeof(*cm)))
                return (EINVAL);

        /*
         * Verify that the file descriptors are valid, and acquire
         * a reference to each.
         */
        nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
        fdp = (int *)CMSG_DATA(cm);
        maxmsg = maxfiles / unp_rights_ratio;
        for (i = 0; i < nfds; i++) {
                fd = *fdp++;
                if (atomic_inc_uint_nv(&unp_rights) > maxmsg) {
                        atomic_dec_uint(&unp_rights);
                        nfds = i;
                        error = EAGAIN;
                        goto out;
                }
                if ((fp = fd_getfile(fd)) == NULL
                    || fp->f_type == DTYPE_KQUEUE) {
                            if (fp)
                                    fd_putfile(fd);
                        atomic_dec_uint(&unp_rights);
                        nfds = i;
                        error = EBADF;
                        goto out;
                }
        }

        /* Allocate new space and copy header into it. */
        newcm = malloc(CMSG_SPACE(nfds * sizeof(file_t *)), M_MBUF, M_WAITOK);
        if (newcm == NULL) {
                error = E2BIG;
                goto out;
        }
        memcpy(newcm, cm, sizeof(struct cmsghdr));
        memset(newcm + 1, 0, CMSG_LEN(0) - sizeof(struct cmsghdr));
        files = (file_t **)CMSG_DATA(newcm);

        /*
         * Transform the file descriptors into file_t pointers, in
         * reverse order so that if pointers are bigger than ints, the
         * int won't get until we're done.  No need to lock, as we have
         * already validated the descriptors with fd_getfile().
         */
        fdp = (int *)CMSG_DATA(cm) + nfds;
        rp = files + nfds;
        for (i = 0; i < nfds; i++) {
                dt = atomic_load_consume(&fdescp->fd_dt);
                fp = atomic_load_consume(&dt->dt_ff[*--fdp]->ff_file);
                KASSERT(fp != NULL);
                mutex_enter(&fp->f_lock);
                *--rp = fp;
                fp->f_count++;
                fp->f_msgcount++;
                mutex_exit(&fp->f_lock);
        }

 out:
         /* Release descriptor references. */
        fdp = (int *)CMSG_DATA(cm);
        for (i = 0; i < nfds; i++) {
                fd_putfile(*fdp++);
                if (error != 0) {
                        atomic_dec_uint(&unp_rights);
                }
        }

        if (error == 0) {
                if (control->m_flags & M_EXT) {
                        m_freem(control);
                        *controlp = control = m_get(M_WAIT, MT_CONTROL);
                }
                MEXTADD(control, newcm, CMSG_SPACE(nfds * sizeof(file_t *)),
                    M_MBUF, NULL, NULL);
                cm = newcm;
                /*
                 * Adjust message & mbuf to note amount of space
                 * actually used.
                 */
                cm->cmsg_len = CMSG_LEN(nfds * sizeof(file_t *));
                control->m_len = CMSG_SPACE(nfds * sizeof(file_t *));
        }

        return error;
}

struct mbuf *
unp_addsockcred(struct lwp *l, struct mbuf *control)
{
        struct sockcred *sc;
        struct mbuf *m;
        void *p;

        m = sbcreatecontrol1(&p, SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)),
                SCM_CREDS, SOL_SOCKET, M_WAITOK);
        if (m == NULL)
                return control;

        sc = p;
        sc->sc_pid = l->l_proc->p_pid;
        sc->sc_uid = kauth_cred_getuid(l->l_cred);
        sc->sc_euid = kauth_cred_geteuid(l->l_cred);
        sc->sc_gid = kauth_cred_getgid(l->l_cred);
        sc->sc_egid = kauth_cred_getegid(l->l_cred);
        sc->sc_ngroups = kauth_cred_ngroups(l->l_cred);

        for (int i = 0; i < sc->sc_ngroups; i++)
                sc->sc_groups[i] = kauth_cred_group(l->l_cred, i);

        return m_add(control, m);
}

/*
 * Do a mark-sweep GC of files in the system, to free up any which are
 * caught in flight to an about-to-be-closed socket.  Additionally,
 * process deferred file closures.
 */
static void
unp_gc(file_t *dp)
{
        extern        struct domain unixdomain;
        file_t *fp, *np;
        struct socket *so, *so1;
        u_int i, oflags, rflags;
        bool didwork;

        KASSERT(curlwp == unp_thread_lwp);
        KASSERT(mutex_owned(&filelist_lock));

        /*
         * First, process deferred file closures.
         */
        while (!SLIST_EMPTY(&unp_thread_discard)) {
                fp = SLIST_FIRST(&unp_thread_discard);
                KASSERT(fp->f_unpcount > 0);
                KASSERT(fp->f_count > 0);
                KASSERT(fp->f_msgcount > 0);
                KASSERT(fp->f_count >= fp->f_unpcount);
                KASSERT(fp->f_count >= fp->f_msgcount);
                KASSERT(fp->f_msgcount >= fp->f_unpcount);
                SLIST_REMOVE_HEAD(&unp_thread_discard, f_unplist);
                i = fp->f_unpcount;
                fp->f_unpcount = 0;
                mutex_exit(&filelist_lock);
                for (; i != 0; i--) {
                        unp_discard_now(fp);
                }
                mutex_enter(&filelist_lock);
        }

        /*
         * Clear mark bits.  Ensure that we don't consider new files
         * entering the file table during this loop (they will not have
         * FSCAN set).
         */
        unp_defer = 0;
        LIST_FOREACH(fp, &filehead, f_list) {
                for (oflags = fp->f_flag;; oflags = rflags) {
                        rflags = atomic_cas_uint(&fp->f_flag, oflags,
                            (oflags | FSCAN) & ~(FMARK|FDEFER));
                        if (__predict_true(oflags == rflags)) {
                                break;
                        }
                }
        }

        /*
         * Iterate over the set of sockets, marking ones believed (based on
         * refcount) to be referenced from a process, and marking for rescan
         * sockets which are queued on a socket.  Recan continues descending
         * and searching for sockets referenced by sockets (FDEFER), until
         * there are no more socket->socket references to be discovered.
         */
        do {
                didwork = false;
                for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) {
                        KASSERT(mutex_owned(&filelist_lock));
                        np = LIST_NEXT(fp, f_list);
                        mutex_enter(&fp->f_lock);
                        if ((fp->f_flag & FDEFER) != 0) {
                                atomic_and_uint(&fp->f_flag, ~FDEFER);
                                unp_defer--;
                                if (fp->f_count == 0) {
                                        /*
                                         * XXX: closef() doesn't pay attention
                                         * to FDEFER
                                         */
                                        mutex_exit(&fp->f_lock);
                                        continue;
                                }
                        } else {
                                if (fp->f_count == 0 ||
                                    (fp->f_flag & FMARK) != 0 ||
                                    fp->f_count == fp->f_msgcount ||
                                    fp->f_unpcount != 0) {
                                        mutex_exit(&fp->f_lock);
                                        continue;
                                }
                        }
                        atomic_or_uint(&fp->f_flag, FMARK);

                        if (fp->f_type != DTYPE_SOCKET ||
                            (so = fp->f_socket) == NULL ||
                            so->so_proto->pr_domain != &unixdomain ||
                            (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
                                mutex_exit(&fp->f_lock);
                                continue;
                        }

                        /* Gain file ref, mark our position, and unlock. */
                        didwork = true;
                        LIST_INSERT_AFTER(fp, dp, f_list);
                        fp->f_count++;
                        mutex_exit(&fp->f_lock);
                        mutex_exit(&filelist_lock);

                        /*
                         * Mark files referenced from sockets queued on the
                         * accept queue as well.
                         */
                        solock(so);
                        unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
                        if ((so->so_options & SO_ACCEPTCONN) != 0) {
                                TAILQ_FOREACH(so1, &so->so_q0, so_qe) {
                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                                }
                                TAILQ_FOREACH(so1, &so->so_q, so_qe) {
                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                                }
                        }
                        sounlock(so);

                        /* Re-lock and restart from where we left off. */
                        closef(fp);
                        mutex_enter(&filelist_lock);
                        np = LIST_NEXT(dp, f_list);
                        LIST_REMOVE(dp, f_list);
                }
                /*
                 * Bail early if we did nothing in the loop above.  Could
                 * happen because of concurrent activity causing unp_defer
                 * to get out of sync.
                 */
        } while (unp_defer != 0 && didwork);

        /*
         * Sweep pass.
         *
         * We grab an extra reference to each of the files that are
         * not otherwise accessible and then free the rights that are
         * stored in messages on them.
         */
        for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) {
                KASSERT(mutex_owned(&filelist_lock));
                np = LIST_NEXT(fp, f_list);
                mutex_enter(&fp->f_lock);

                /*
                 * Ignore non-sockets.
                 * Ignore dead sockets, or sockets with pending close.
                 * Ignore sockets obviously referenced elsewhere. 
                 * Ignore sockets marked as referenced by our scan.
                 * Ignore new sockets that did not exist during the scan.
                 */
                if (fp->f_type != DTYPE_SOCKET ||
                    fp->f_count == 0 || fp->f_unpcount != 0 ||
                    fp->f_count != fp->f_msgcount ||
                    (fp->f_flag & (FMARK | FSCAN)) != FSCAN) {
                        mutex_exit(&fp->f_lock);
                        continue;
                }

                /* Gain file ref, mark our position, and unlock. */
                LIST_INSERT_AFTER(fp, dp, f_list);
                fp->f_count++;
                mutex_exit(&fp->f_lock);
                mutex_exit(&filelist_lock);

                /*
                 * Flush all data from the socket's receive buffer.
                 * This will cause files referenced only by the
                 * socket to be queued for close.
                 */
                so = fp->f_socket;
                solock(so);
                sorflush(so);
                sounlock(so);

                /* Re-lock and restart from where we left off. */
                closef(fp);
                mutex_enter(&filelist_lock);
                np = LIST_NEXT(dp, f_list);
                LIST_REMOVE(dp, f_list);
        }
}

/*
 * Garbage collector thread.  While SCM_RIGHTS messages are in transit,
 * wake once per second to garbage collect.  Run continually while we
 * have deferred closes to process.
 */
static void
unp_thread(void *cookie)
{
        file_t *dp;

        /* Allocate a dummy file for our scans. */
        if ((dp = fgetdummy()) == NULL) {
                panic("unp_thread");
        }

        mutex_enter(&filelist_lock);
        for (;;) {
                KASSERT(mutex_owned(&filelist_lock));
                if (SLIST_EMPTY(&unp_thread_discard)) {
                        if (unp_rights != 0) {
                                (void)cv_timedwait(&unp_thread_cv,
                                    &filelist_lock, hz);
                        } else {
                                cv_wait(&unp_thread_cv, &filelist_lock);
                        }
                }
                unp_gc(dp);
        }
        /* NOTREACHED */
}

/*
 * Kick the garbage collector into action if there is something for
 * it to process.
 */
static void
unp_thread_kick(void)
{

        if (!SLIST_EMPTY(&unp_thread_discard) || unp_rights != 0) {
                mutex_enter(&filelist_lock);
                cv_signal(&unp_thread_cv);
                mutex_exit(&filelist_lock);
        }
}

void
unp_dispose(struct mbuf *m)
{

        if (m)
                unp_scan(m, unp_discard_later, 1);
}

void
unp_scan(struct mbuf *m0, void (*op)(file_t *), int discard)
{
        struct mbuf *m;
        file_t **rp, *fp;
        struct cmsghdr *cm;
        int i, qfds;

        while (m0) {
                for (m = m0; m; m = m->m_next) {
                        if (m->m_type != MT_CONTROL ||
                            m->m_len < sizeof(*cm)) {
                                    continue;
                        }
                        cm = mtod(m, struct cmsghdr *);
                        if (cm->cmsg_level != SOL_SOCKET ||
                            cm->cmsg_type != SCM_RIGHTS)
                                continue;
                        qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
                            / sizeof(file_t *);
                        rp = (file_t **)CMSG_DATA(cm);
                        for (i = 0; i < qfds; i++) {
                                fp = *rp;
                                if (discard) {
                                        *rp = 0;
                                }
                                (*op)(fp);
                                rp++;
                        }
                }
                m0 = m0->m_nextpkt;
        }
}

void
unp_mark(file_t *fp)
{

        if (fp == NULL)
                return;

        /* If we're already deferred, don't screw up the defer count */
        mutex_enter(&fp->f_lock);
        if (fp->f_flag & (FMARK | FDEFER)) {
                mutex_exit(&fp->f_lock);
                return;
        }

        /*
         * Minimize the number of deferrals...  Sockets are the only type of
         * file which can hold references to another file, so just mark
         * other files, and defer unmarked sockets for the next pass.
         */
        if (fp->f_type == DTYPE_SOCKET) {
                unp_defer++;
                KASSERT(fp->f_count != 0);
                atomic_or_uint(&fp->f_flag, FDEFER);
        } else {
                atomic_or_uint(&fp->f_flag, FMARK);
        }
        mutex_exit(&fp->f_lock);
}

static void
unp_discard_now(file_t *fp)
{

        if (fp == NULL)
                return;

        KASSERT(fp->f_count > 0);
        KASSERT(fp->f_msgcount > 0);

        mutex_enter(&fp->f_lock);
        fp->f_msgcount--;
        mutex_exit(&fp->f_lock);
        atomic_dec_uint(&unp_rights);
        (void)closef(fp);
}

static void
unp_discard_later(file_t *fp)
{

        if (fp == NULL)
                return;

        KASSERT(fp->f_count > 0);
        KASSERT(fp->f_msgcount > 0);

        mutex_enter(&filelist_lock);
        if (fp->f_unpcount++ == 0) {
                SLIST_INSERT_HEAD(&unp_thread_discard, fp, f_unplist);
        }
        mutex_exit(&filelist_lock);
}

static void
unp_sysctl_create(void)
{

        KASSERT(usrreq_sysctllog == NULL);
        sysctl_createv(&usrreq_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_LONG, "sendspace",
                       SYSCTL_DESCR("Default stream send space"),
                       NULL, 0, &unpst_sendspace, 0,
                       CTL_NET, PF_LOCAL, SOCK_STREAM, CTL_CREATE, CTL_EOL);
        sysctl_createv(&usrreq_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_LONG, "recvspace",
                       SYSCTL_DESCR("Default stream recv space"),
                       NULL, 0, &unpst_recvspace, 0,
                       CTL_NET, PF_LOCAL, SOCK_STREAM, CTL_CREATE, CTL_EOL);
        sysctl_createv(&usrreq_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_LONG, "sendspace",
                       SYSCTL_DESCR("Default datagram send space"),
                       NULL, 0, &unpdg_sendspace, 0,
                       CTL_NET, PF_LOCAL, SOCK_DGRAM, CTL_CREATE, CTL_EOL);
        sysctl_createv(&usrreq_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_LONG, "recvspace",
                       SYSCTL_DESCR("Default datagram recv space"),
                       NULL, 0, &unpdg_recvspace, 0,
                       CTL_NET, PF_LOCAL, SOCK_DGRAM, CTL_CREATE, CTL_EOL);
        sysctl_createv(&usrreq_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_INT, "inflight",
                       SYSCTL_DESCR("File descriptors in flight"),
                       NULL, 0, &unp_rights, 0,
                       CTL_NET, PF_LOCAL, CTL_CREATE, CTL_EOL);
        sysctl_createv(&usrreq_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_INT, "deferred",
                       SYSCTL_DESCR("File descriptors deferred for close"),
                       NULL, 0, &unp_defer, 0,
                       CTL_NET, PF_LOCAL, CTL_CREATE, CTL_EOL);
}

const struct pr_usrreqs unp_usrreqs = {
        .pr_attach        = unp_attach,
        .pr_detach        = unp_detach,
        .pr_accept        = unp_accept,
        .pr_bind        = unp_bind,
        .pr_listen        = unp_listen,
        .pr_connect        = unp_connect,
        .pr_connect2        = unp_connect2,
        .pr_disconnect        = unp_disconnect,
        .pr_shutdown        = unp_shutdown,
        .pr_abort        = unp_abort,
        .pr_ioctl        = unp_ioctl,
        .pr_stat        = unp_stat,
        .pr_peeraddr        = unp_peeraddr,
        .pr_sockaddr        = unp_sockaddr,
        .pr_rcvd        = unp_rcvd,
        .pr_recvoob        = unp_recvoob,
        .pr_send        = unp_send,
        .pr_sendoob        = unp_sendoob,
};


































































































































































































   19 



    5 



    3 



    2 









    9 


    9 
    9 


    9 



    6 



    6 

    9 

    6 




    5 
    5 


    4 

    3 

    2 
    1 



    3 




    1 







    1 


    1 

    1 






















   16 














   14 




    2 






















   10 




   10 





















































   12 
























































    4 
    4 

    4 


    4 









    3 
    3 

    3 


    3 



























   14 














    4 



































































































































   12 














   12 
    3 



    9 

    9 

    8 


    4 




    1 




    1 

    1 












































































































































































































































































    1 



    8 







    8 




















    9 









    9 
    1 



    1 

    1 

    1 

    9 

    9 
    9 

    9 


   11 







    8 








    8 

    3 

    3 



















    1 
















    1 




    1 


    1 








    1 
    1 









































































































   16 















































   16 













   17 




   17 

   17 

   16 








   17 





   16 
   17 
   17 

   13 
   13 
   13 





   11 


   13 

   16 

   17 


   17 






























































































   13 






   11 
   13 






   13 
   13 









    1 






    1 








    1 

    1 















































































































































    4 

















    4 



    4 
    4 


    4 




















































































   17 







   17 
   17 


   15 



































   31 

   26 















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
/*        $NetBSD: rtsock_shared.c,v 1.22 2022/07/01 21:22:23 riastradh Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1988, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)rtsock.c        8.7 (Berkeley) 10/12/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rtsock_shared.c,v 1.22 2022/07/01 21:22:23 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/sysctl.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/intr.h>
#include <sys/condvar.h>
#include <sys/compat_stub.h>

#include <net/if.h>
#include <net/if_llatbl.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/raw_cb.h>

#include <netinet/in_var.h>
#include <netinet/if_inarp.h>

#include <netmpls/mpls.h>

#include <compat/net/if.h>
#include <compat/net/route.h>

/* sa_family is after sa_len, rest is data */
#define _SA_MINSIZE        (offsetof(struct sockaddr, sa_family) + \
                         sizeof(((struct sockaddr *)0)->sa_family))

#ifdef COMPAT_RTSOCK
/*
 * These are used when #include-d from compat/common/rtsock_50.c
 */
#define        RTM_XVERSION        RTM_OVERSION
#define        RTM_XNEWADDR        RTM_ONEWADDR
#define        RTM_XDELADDR        RTM_ODELADDR
#define        RTM_XCHGADDR        RTM_OCHGADDR
#define        RT_XADVANCE(a,b) RT_OADVANCE(a,b)
#define        RT_XROUNDUP(n)        RT_OROUNDUP(n)
#define        PF_XROUTE        PF_OROUTE
#define        rt_xmsghdr        rt_msghdr50
#define        if_xmsghdr        if_msghdr        /* if_msghdr50 is for RTM_OIFINFO */
#define        ifa_xmsghdr        ifa_msghdr50
#define        if_xannouncemsghdr        if_announcemsghdr50
#define        COMPATNAME(x)        compat_50_ ## x
#define        DOMAINNAME        "oroute"
#define        COMPATCALL(name, args)                \
        MODULE_HOOK_CALL_VOID(rtsock_ ## name ## _50_hook, args, __nothing);
#define        RTS_CTASSERT(x)        __nothing
CTASSERT(sizeof(struct ifa_xmsghdr) == 20);
DOMAIN_DEFINE(compat_50_routedomain); /* forward declare and add to link set */
#else /* COMPAT_RTSOCK */
/*
 * These are used when #include-d from compat/common/rtsock_50.c
 */
#define        RTM_XVERSION        RTM_VERSION
#define        RTM_XNEWADDR        RTM_NEWADDR
#define        RTM_XDELADDR        RTM_DELADDR
#define        RTM_XCHGADDR        RTM_CHGADDR
#define        RT_XADVANCE(a,b) RT_ADVANCE(a,b)
#define        RT_XROUNDUP(n)        RT_ROUNDUP(n)
#define        PF_XROUTE        PF_ROUTE
#define        rt_xmsghdr        rt_msghdr
#define        if_xmsghdr        if_msghdr
#define        ifa_xmsghdr        ifa_msghdr
#define        if_xannouncemsghdr        if_announcemsghdr
#define        COMPATNAME(x)        x
#define        DOMAINNAME        "route"
#define        COMPATCALL(name, args)        __nothing;
#define        RTS_CTASSERT(x)        CTASSERT(x)
CTASSERT(sizeof(struct ifa_xmsghdr) == 32);
DOMAIN_DEFINE(routedomain); /* forward declare and add to link set */
#endif /* COMPAT_RTSOCK */

#ifdef RTSOCK_DEBUG
#define RT_IN_PRINT(info, b, a) (in_print((b), sizeof(b), \
    &((const struct sockaddr_in *)(info)->rti_info[(a)])->sin_addr), (b))
#endif /* RTSOCK_DEBUG */

struct route_info COMPATNAME(route_info) = {
        .ri_dst = { .sa_len = 2, .sa_family = PF_XROUTE, },
        .ri_src = { .sa_len = 2, .sa_family = PF_XROUTE, },
        .ri_maxqlen = IFQ_MAXLEN,
};

static void COMPATNAME(route_init)(void);
static int COMPATNAME(route_output)(struct mbuf *, struct socket *);

static int rt_xaddrs(u_char, const char *, const char *, struct rt_addrinfo *);
static struct mbuf *rt_makeifannouncemsg(struct ifnet *, int, int,
    struct rt_addrinfo *);
static int rt_msg2(int, struct rt_addrinfo *, void *, struct rt_walkarg *, int *);
static void _rt_setmetrics(int, const struct rt_xmsghdr *, struct rtentry *);
static void rtm_setmetrics(const struct rtentry *, struct rt_xmsghdr *);
static void rt_adjustcount(int, int);

static const struct protosw COMPATNAME(route_protosw)[];

struct routecb {
        struct rawcb        rocb_rcb;
        unsigned int        rocb_msgfilter;
#define        RTMSGFILTER(m)        (1U << (m))
        char                *rocb_missfilter;
        size_t                rocb_missfilterlen;
};
#define sotoroutecb(so)        ((struct routecb *)(so)->so_pcb)

static struct rawcbhead rt_rawcb;
#ifdef NET_MPSAFE
static kmutex_t *rt_so_mtx;

static bool rt_updating = false;
static kcondvar_t rt_update_cv;
#endif

static void
rt_adjustcount(int af, int cnt)
{
        struct route_cb * const cb = &COMPATNAME(route_info).ri_cb;

        cb->any_count += cnt;

        switch (af) {
        case AF_INET:
                cb->ip_count += cnt;
                return;
#ifdef INET6
        case AF_INET6:
                cb->ip6_count += cnt;
                return;
#endif
        case AF_MPLS:
                cb->mpls_count += cnt;
                return;
        }
}

static int
COMPATNAME(route_filter)(struct mbuf *m, struct sockproto *proto,
    struct rawcb *rp)
{
        struct routecb *rop = (struct routecb *)rp;
        struct rt_xmsghdr rtm;

        KASSERT(m != NULL);
        KASSERT(proto != NULL);
        KASSERT(rp != NULL);

        /* Wrong family for this socket. */
        if (proto->sp_family != PF_ROUTE)
                return ENOPROTOOPT;

        /* If no filter set, just return. */
        if (rop->rocb_msgfilter == 0 && rop->rocb_missfilterlen == 0)
                return 0;

        /* Ensure we can access rtm_type */
        if (m->m_len <
            offsetof(struct rt_xmsghdr, rtm_type) + sizeof(rtm.rtm_type))
                return EINVAL;

        m_copydata(m, offsetof(struct rt_xmsghdr, rtm_type),
            sizeof(rtm.rtm_type), &rtm.rtm_type);
        if (rtm.rtm_type >= sizeof(rop->rocb_msgfilter) * CHAR_BIT)
                return EINVAL;
        /* If the rtm type is filtered out, return a positive. */
        if (rop->rocb_msgfilter != 0 &&
            !(rop->rocb_msgfilter & RTMSGFILTER(rtm.rtm_type)))
                return EEXIST;

        if (rop->rocb_missfilterlen != 0 && rtm.rtm_type == RTM_MISS) {
                __CTASSERT(RTAX_DST == 0);
                struct sockaddr_storage ss;
                struct sockaddr *dst = (struct sockaddr *)&ss, *sa;
                char *cp = rop->rocb_missfilter;
                char *ep = cp + rop->rocb_missfilterlen;

                /* Ensure we can access sa_len */
                if (m->m_pkthdr.len < sizeof(rtm) + _SA_MINSIZE)
                        return EINVAL;
                m_copydata(m, sizeof(rtm) + offsetof(struct sockaddr, sa_len),
                    sizeof(ss.ss_len), &ss.ss_len);
                if (ss.ss_len < _SA_MINSIZE ||
                    ss.ss_len > sizeof(ss) ||
                    m->m_pkthdr.len < sizeof(rtm) + ss.ss_len)
                        return EINVAL;
                /* Copy out the destination sockaddr */
                m_copydata(m, sizeof(rtm), ss.ss_len, &ss);

                /* Find a matching sockaddr in the filter */
                while (cp < ep) {
                        sa = (struct sockaddr *)cp;
                        if (sa->sa_len == dst->sa_len &&
                            memcmp(sa, dst, sa->sa_len) == 0)
                                break;
                        cp += RT_XROUNDUP(sa->sa_len);
                }
                if (cp == ep)
                        return EEXIST;
        }

        /* Passed the filter. */
        return 0;
}

static void
rt_pr_init(void)
{

        LIST_INIT(&rt_rawcb);
}

static int
COMPATNAME(route_attach)(struct socket *so, int proto)
{
        struct rawcb *rp;
        struct routecb *rop;
        int s, error;

        KASSERT(sotorawcb(so) == NULL);
        rop = kmem_zalloc(sizeof(*rop), KM_SLEEP);
        rp = &rop->rocb_rcb;
        rp->rcb_len = sizeof(*rop);
        so->so_pcb = rp;

        s = splsoftnet();

#ifdef NET_MPSAFE
        KASSERT(so->so_lock == NULL);
        mutex_obj_hold(rt_so_mtx);
        so->so_lock = rt_so_mtx;
        solock(so);
#endif

        if ((error = raw_attach(so, proto, &rt_rawcb)) == 0) {
                rt_adjustcount(rp->rcb_proto.sp_protocol, 1);
                rp->rcb_laddr = &COMPATNAME(route_info).ri_src;
                rp->rcb_faddr = &COMPATNAME(route_info).ri_dst;
                rp->rcb_filter = COMPATNAME(route_filter);
        }
        splx(s);

        if (error) {
                kmem_free(rop, sizeof(*rop));
                so->so_pcb = NULL;
                return error;
        }

        soisconnected(so);
        so->so_options |= SO_USELOOPBACK;
        KASSERT(solocked(so));

        return error;
}

static void
COMPATNAME(route_detach)(struct socket *so)
{
        struct rawcb *rp = sotorawcb(so);
        struct routecb *rop = (struct routecb *)rp;
        int s;

        KASSERT(rp != NULL);
        KASSERT(solocked(so));

        s = splsoftnet();
        if (rop->rocb_missfilterlen != 0)
                kmem_free(rop->rocb_missfilter, rop->rocb_missfilterlen);
        rt_adjustcount(rp->rcb_proto.sp_protocol, -1);
        raw_detach(so);
        splx(s);
}

static int
COMPATNAME(route_accept)(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));

        panic("route_accept");

        return EOPNOTSUPP;
}

static int
COMPATNAME(route_bind)(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
COMPATNAME(route_listen)(struct socket *so, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
COMPATNAME(route_connect)(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
COMPATNAME(route_connect2)(struct socket *so, struct socket *so2)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
COMPATNAME(route_disconnect)(struct socket *so)
{
        struct rawcb *rp = sotorawcb(so);
        int s;

        KASSERT(solocked(so));
        KASSERT(rp != NULL);

        s = splsoftnet();
        soisdisconnected(so);
        raw_disconnect(rp);
        splx(s);

        return 0;
}

static int
COMPATNAME(route_shutdown)(struct socket *so)
{
        int s;

        KASSERT(solocked(so));

        /*
         * Mark the connection as being incapable of further input.
         */
        s = splsoftnet();
        socantsendmore(so);
        splx(s);
        return 0;
}

static int
COMPATNAME(route_abort)(struct socket *so)
{
        KASSERT(solocked(so));

        panic("route_abort");

        return EOPNOTSUPP;
}

static int
COMPATNAME(route_ioctl)(struct socket *so, u_long cmd, void *nam,
    struct ifnet * ifp)
{
        return EOPNOTSUPP;
}

static int
COMPATNAME(route_stat)(struct socket *so, struct stat *ub)
{
        KASSERT(solocked(so));

        return 0;
}

static int
COMPATNAME(route_peeraddr)(struct socket *so, struct sockaddr *nam)
{
        struct rawcb *rp = sotorawcb(so);

        KASSERT(solocked(so));
        KASSERT(rp != NULL);
        KASSERT(nam != NULL);

        if (rp->rcb_faddr == NULL)
                return ENOTCONN;

        raw_setpeeraddr(rp, nam);
        return 0;
}

static int
COMPATNAME(route_sockaddr)(struct socket *so, struct sockaddr *nam)
{
        struct rawcb *rp = sotorawcb(so);

        KASSERT(solocked(so));
        KASSERT(rp != NULL);
        KASSERT(nam != NULL);

        if (rp->rcb_faddr == NULL)
                return ENOTCONN;

        raw_setsockaddr(rp, nam);
        return 0;
}

static int
COMPATNAME(route_rcvd)(struct socket *so, int flags, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
COMPATNAME(route_recvoob)(struct socket *so, struct mbuf *m, int flags)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
COMPATNAME(route_send)(struct socket *so, struct mbuf *m,
    struct sockaddr *nam, struct mbuf *control, struct lwp *l)
{
        int error = 0;
        int s;

        KASSERT(solocked(so));
        KASSERT(so->so_proto == &COMPATNAME(route_protosw)[0]);

        s = splsoftnet();
        error = raw_send(so, m, nam, control, l, &COMPATNAME(route_output));
        splx(s);

        return error;
}

static int
COMPATNAME(route_sendoob)(struct socket *so, struct mbuf *m,
    struct mbuf *control)
{
        KASSERT(solocked(so));

        m_freem(m);
        m_freem(control);

        return EOPNOTSUPP;
}
static int
COMPATNAME(route_purgeif)(struct socket *so, struct ifnet *ifp)
{

        panic("route_purgeif");

        return EOPNOTSUPP;
}

#if defined(INET) || defined(INET6)
static int
route_get_sdl_index(struct rt_addrinfo *info, int *sdl_index)
{
        struct rtentry *nrt;
        int error;

        error = rtrequest1(RTM_GET, info, &nrt);
        if (error != 0)
                return error;
        /*
         * nrt->rt_ifp->if_index may not be correct
         * due to changing to ifplo0.
         */
        *sdl_index = satosdl(nrt->rt_gateway)->sdl_index;
        rt_unref(nrt);

        return 0;
}
#endif

static void
route_get_sdl(const struct ifnet *ifp, const struct sockaddr *dst,
    struct sockaddr_dl *sdl, int *flags)
{
        struct llentry *la;

        KASSERT(ifp != NULL);

        IF_AFDATA_RLOCK(ifp);
        switch (dst->sa_family) {
        case AF_INET:
                la = lla_lookup(LLTABLE(ifp), 0, dst);
                break;
        case AF_INET6:
                la = lla_lookup(LLTABLE6(ifp), 0, dst);
                break;
        default:
                la = NULL;
                KASSERTMSG(0, "Invalid AF=%d\n", dst->sa_family);
                break;
        }
        IF_AFDATA_RUNLOCK(ifp);

        void *a = (LLE_IS_VALID(la) && (la->la_flags & LLE_VALID) == LLE_VALID)
            ? &la->ll_addr : NULL;

        a = sockaddr_dl_init(sdl, sizeof(*sdl), ifp->if_index, ifp->if_type,
            NULL, 0, a, ifp->if_addrlen);
        KASSERT(a != NULL);

        if (la != NULL) {
                *flags = la->la_flags;
                LLE_RUNLOCK(la);
        }
}

static int
route_output_report(struct rtentry *rt, struct rt_addrinfo *info,
    struct rt_xmsghdr *rtm, struct rt_xmsghdr **new_rtm)
{
        int len, error;

        if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
                const struct ifaddr *rtifa;
                const struct ifnet *ifp = rt->rt_ifp;

                info->rti_info[RTAX_IFP] = ifp->if_dl->ifa_addr;
                /* rtifa used to be simply rt->rt_ifa.
                 * If rt->rt_ifa != NULL, then
                 * rt_get_ifa() != NULL.  So this
                 * ought to still be safe. --dyoung
                 */
                rtifa = rt_get_ifa(rt);
                info->rti_info[RTAX_IFA] = rtifa->ifa_addr;
#ifdef RTSOCK_DEBUG
                if (info->rti_info[RTAX_IFA]->sa_family == AF_INET) {
                        char ibuf[INET_ADDRSTRLEN];
                        char abuf[INET_ADDRSTRLEN];
                        printf("%s: copying out RTAX_IFA %s "
                            "for info->rti_info[RTAX_DST] %s "
                            "ifa_getifa %p ifa_seqno %p\n",
                            __func__,
                            RT_IN_PRINT(info, ibuf, RTAX_IFA),
                            RT_IN_PRINT(info, abuf, RTAX_DST),
                            (void *)rtifa->ifa_getifa,
                            rtifa->ifa_seqno);
                }
#endif /* RTSOCK_DEBUG */
                if (ifp->if_flags & IFF_POINTOPOINT)
                        info->rti_info[RTAX_BRD] = rtifa->ifa_dstaddr;
                else
                        info->rti_info[RTAX_BRD] = NULL;
                rtm->rtm_index = ifp->if_index;
        }
        error = rt_msg2(rtm->rtm_type, info, NULL, NULL, &len);
        if (error)
                return error;
        if (len > rtm->rtm_msglen) {
                struct rt_xmsghdr *old_rtm = rtm;
                R_Malloc(*new_rtm, struct rt_xmsghdr *, len);
                if (*new_rtm == NULL)
                        return ENOBUFS;
                (void)memcpy(*new_rtm, old_rtm, old_rtm->rtm_msglen);
                rtm = *new_rtm;
        }
        (void)rt_msg2(rtm->rtm_type, info, rtm, NULL, 0);
        rtm->rtm_flags = rt->rt_flags;
        rtm_setmetrics(rt, rtm);
        rtm->rtm_addrs = info->rti_addrs;

        return 0;
}

/*ARGSUSED*/
int
COMPATNAME(route_output)(struct mbuf *m, struct socket *so)
{
        struct sockproto proto = { .sp_family = PF_XROUTE, };
        struct rt_xmsghdr hdr;
        struct rt_xmsghdr *rtm = NULL;
        struct rt_xmsghdr *old_rtm = NULL, *new_rtm = NULL;
        struct rtentry *rt = NULL;
        struct rtentry *saved_nrt = NULL;
        struct rt_addrinfo info;
        int len, error = 0;
        sa_family_t family;
        struct sockaddr_dl sdl;
        int bound = curlwp_bind();
        bool do_rt_free = false;
        struct sockaddr_storage netmask;

#define senderr(e) do { error = e; goto flush;} while (/*CONSTCOND*/ 0)
        if (m == NULL || ((m->m_len < sizeof(int32_t)) &&
           (m = m_pullup(m, sizeof(int32_t))) == NULL)) {
                error = ENOBUFS;
                goto out;
        }
        if ((m->m_flags & M_PKTHDR) == 0)
                panic("%s", __func__);
        len = m->m_pkthdr.len;
        if (len < sizeof(*rtm)) {
                info.rti_info[RTAX_DST] = NULL;
                senderr(EINVAL);
        }
        m_copydata(m, 0, sizeof(hdr), &hdr);
        if (len != hdr.rtm_msglen) {
                info.rti_info[RTAX_DST] = NULL;
                senderr(EINVAL);
        }
        R_Malloc(rtm, struct rt_xmsghdr *, len);
        if (rtm == NULL) {
                info.rti_info[RTAX_DST] = NULL;
                senderr(ENOBUFS);
        }
        m_copydata(m, 0, len, rtm);
        if (rtm->rtm_version != RTM_XVERSION) {
                info.rti_info[RTAX_DST] = NULL;
                senderr(EPROTONOSUPPORT);
        }
        rtm->rtm_pid = curproc->p_pid;
        memset(&info, 0, sizeof(info));
        info.rti_addrs = rtm->rtm_addrs;
        if (rt_xaddrs(rtm->rtm_type, (const char *)(rtm + 1), len + (char *)rtm,
            &info)) {
                senderr(EINVAL);
        }
        info.rti_flags = rtm->rtm_flags;
        if (info.rti_info[RTAX_DST] == NULL ||
            (info.rti_info[RTAX_DST]->sa_family >= AF_MAX)) {
                senderr(EINVAL);
        }
#ifdef RTSOCK_DEBUG
        if (info.rti_info[RTAX_DST]->sa_family == AF_INET) {
                char abuf[INET_ADDRSTRLEN];
                printf("%s: extracted info.rti_info[RTAX_DST] %s\n", __func__,
                    RT_IN_PRINT(&info, abuf, RTAX_DST));
        }
#endif /* RTSOCK_DEBUG */
        if (info.rti_info[RTAX_GATEWAY] != NULL &&
            (info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX)) {
                senderr(EINVAL);
        }

        /*
         * Verify that the socket has the appropriate privilege; RTM_GET
         * is the only operation the non-superuser is allowed.
         */
        if (kauth_authorize_network(so->so_cred, KAUTH_NETWORK_ROUTE,
            0, rtm, NULL, NULL) != 0)
                senderr(EACCES);

        /*
         * route(8) passes a sockaddr truncated with prefixlen.
         * The kernel doesn't expect such sockaddr and need to 
         * use a buffer that is big enough for the sockaddr expected
         * (padded with 0's). We keep the original length of the sockaddr.
         */
        if (info.rti_info[RTAX_NETMASK]) {
                /*
                 * Use the family of RTAX_DST, because RTAX_NETMASK
                 * can have a zero family if it comes from the radix
                 * tree via rt_mask().
                 */
                socklen_t sa_len = sockaddr_getsize_by_family(
                    info.rti_info[RTAX_DST]->sa_family);
                socklen_t masklen = sockaddr_getlen(
                    info.rti_info[RTAX_NETMASK]);
                if (sa_len != 0 && sa_len > masklen) {
                        KASSERT(sa_len <= sizeof(netmask));
                        memcpy(&netmask, info.rti_info[RTAX_NETMASK], masklen);
                        memset((char *)&netmask + masklen, 0, sa_len - masklen);
                        info.rti_info[RTAX_NETMASK] = sstocsa(&netmask);
                }
        }

        switch (rtm->rtm_type) {

        case RTM_ADD:
                if (info.rti_info[RTAX_GATEWAY] == NULL) {
                        senderr(EINVAL);
                }
#if defined(INET) || defined(INET6)
                /* support for new ARP/NDP code with keeping backcompat */
                if (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) {
                        const struct sockaddr_dl *sdlp =
                            satocsdl(info.rti_info[RTAX_GATEWAY]);

                        /* Allow routing requests by interface index */
                        if (sdlp->sdl_nlen == 0 && sdlp->sdl_alen == 0
                            && sdlp->sdl_slen == 0)
                                goto fallback;
                        /*
                         * Old arp binaries don't set the sdl_index
                         * so we have to complement it.
                         */
                        int sdl_index = sdlp->sdl_index;
                        if (sdl_index == 0) {
                                error = route_get_sdl_index(&info, &sdl_index);
                                if (error != 0)
                                        goto fallback;
                        } else if (
                            info.rti_info[RTAX_DST]->sa_family == AF_INET) {
                                /*
                                 * XXX workaround for SIN_PROXY case; proxy arp
                                 * entry should be in an interface that has
                                 * a network route including the destination,
                                 * not a local (link) route that may not be a
                                 * desired place, for example a tap.
                                 */
                                const struct sockaddr_inarp *sina =
                                    (const struct sockaddr_inarp *)
                                    info.rti_info[RTAX_DST];
                                if (sina->sin_other & SIN_PROXY) {
                                        error = route_get_sdl_index(&info,
                                            &sdl_index);
                                        if (error != 0)
                                                goto fallback;
                                }
                        }
                        error = lla_rt_output(rtm->rtm_type, rtm->rtm_flags,
                            rtm->rtm_rmx.rmx_expire, &info, sdl_index);
                        break;
                }
        fallback:
#endif /* defined(INET) || defined(INET6) */
                error = rtrequest1(rtm->rtm_type, &info, &saved_nrt);
                if (error == 0) {
                        _rt_setmetrics(rtm->rtm_inits, rtm, saved_nrt);
                        rt_unref(saved_nrt);
                }
                break;

        case RTM_DELETE:
#if defined(INET) || defined(INET6)
                /* support for new ARP/NDP code */
                if (info.rti_info[RTAX_GATEWAY] &&
                    (info.rti_info[RTAX_GATEWAY]->sa_family == AF_LINK) &&
                    (rtm->rtm_flags & RTF_LLDATA) != 0) {
                        const struct sockaddr_dl *sdlp =
                            satocsdl(info.rti_info[RTAX_GATEWAY]);
                        error = lla_rt_output(rtm->rtm_type, rtm->rtm_flags,
                            rtm->rtm_rmx.rmx_expire, &info, sdlp->sdl_index);
                        rtm->rtm_flags &= ~RTF_UP;
                        break;
                }
#endif
                error = rtrequest1(rtm->rtm_type, &info, &saved_nrt);
                if (error != 0)
                        break;

                rt = saved_nrt;
                do_rt_free = true;
                info.rti_info[RTAX_DST] = rt_getkey(rt);
                info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
                info.rti_info[RTAX_NETMASK] = rt_mask(rt);
                info.rti_info[RTAX_TAG] = rt_gettag(rt);
                error = route_output_report(rt, &info, rtm, &new_rtm);
                if (error)
                        senderr(error);
                if (new_rtm != NULL) {
                        old_rtm = rtm;
                        rtm = new_rtm;
                }
                break;

        case RTM_GET:
        case RTM_CHANGE:
        case RTM_LOCK:
                /* XXX This will mask info.rti_info[RTAX_DST] with
                 * info.rti_info[RTAX_NETMASK] before
                 * searching.  It did not used to do that.  --dyoung
                 */
                rt = NULL;
                error = rtrequest1(RTM_GET, &info, &rt);
                if (error != 0)
                        senderr(error);
                if (rtm->rtm_type != RTM_GET) {/* XXX: too grotty */
                        if (memcmp(info.rti_info[RTAX_DST], rt_getkey(rt),
                            info.rti_info[RTAX_DST]->sa_len) != 0)
                                senderr(ESRCH);
                        if (info.rti_info[RTAX_NETMASK] == NULL &&
                            rt_mask(rt) != NULL)
                                senderr(ETOOMANYREFS);
                }

                /*
                 * XXX if arp/ndp requests an L2 entry, we have to obtain
                 * it from lltable while for the route command we have to
                 * return a route as it is. How to distinguish them?
                 * For newer arp/ndp, RTF_LLDATA flag set by arp/ndp
                 * indicates an L2 entry is requested. For old arp/ndp
                 * binaries, we check RTF_UP flag is NOT set; it works
                 * by the fact that arp/ndp don't set it while the route
                 * command sets it.
                 */
                if (((rtm->rtm_flags & RTF_LLDATA) != 0 ||
                     (rtm->rtm_flags & RTF_UP) == 0) &&
                    rtm->rtm_type == RTM_GET &&
                    sockaddr_cmp(rt_getkey(rt), info.rti_info[RTAX_DST]) != 0) {
                        int ll_flags = 0;
                        route_get_sdl(rt->rt_ifp, info.rti_info[RTAX_DST], &sdl,
                            &ll_flags);
                        info.rti_info[RTAX_GATEWAY] = sstocsa(&sdl);
                        error = route_output_report(rt, &info, rtm, &new_rtm);
                        if (error)
                                senderr(error);
                        if (new_rtm != NULL) {
                                old_rtm = rtm;
                                rtm = new_rtm;
                        }
                        rtm->rtm_flags |= RTF_LLDATA;
                        rtm->rtm_flags &= ~RTF_CONNECTED;
                        rtm->rtm_flags |= (ll_flags & LLE_STATIC) ? RTF_STATIC : 0;
                        break;
                }

                switch (rtm->rtm_type) {
                case RTM_GET:
                        info.rti_info[RTAX_DST] = rt_getkey(rt);
                        info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
                        info.rti_info[RTAX_NETMASK] = rt_mask(rt);
                        info.rti_info[RTAX_TAG] = rt_gettag(rt);
                        error = route_output_report(rt, &info, rtm, &new_rtm);
                        if (error)
                                senderr(error);
                        if (new_rtm != NULL) {
                                old_rtm = rtm;
                                rtm = new_rtm;
                        }
                        break;

                case RTM_CHANGE:
#ifdef NET_MPSAFE
                        /*
                         * Release rt_so_mtx to avoid a deadlock with route_intr
                         * and also serialize updating routes to avoid another.
                         */
                        if (rt_updating) {
                                /* Release to allow the updater to proceed */
                                rt_unref(rt);
                                rt = NULL;
                        }
                        while (rt_updating) {
                                error = cv_wait_sig(&rt_update_cv, rt_so_mtx);
                                if (error != 0)
                                        goto flush;
                        }
                        if (rt == NULL) {
                                error = rtrequest1(RTM_GET, &info, &rt);
                                if (error != 0)
                                        goto flush;
                        }
                        rt_updating = true;
                        mutex_exit(rt_so_mtx);

                        error = rt_update_prepare(rt);
                        if (error == 0) {
                                error = rt_update(rt, &info, rtm);
                                rt_update_finish(rt);
                        }

                        mutex_enter(rt_so_mtx);
                        rt_updating = false;
                        cv_broadcast(&rt_update_cv);
#else
                        error = rt_update(rt, &info, rtm);
#endif
                        if (error != 0)
                                goto flush;
                        /*FALLTHROUGH*/
                case RTM_LOCK:
                        rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
                        rt->rt_rmx.rmx_locks |=
                            (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
                        break;
                }
                break;

        default:
                senderr(EOPNOTSUPP);
        }

flush:
        if (rtm) {
                if (error)
                        rtm->rtm_errno = error;
                else
                        rtm->rtm_flags |= RTF_DONE;
        }
        family = info.rti_info[RTAX_DST] ? info.rti_info[RTAX_DST]->sa_family :
            0;
        /* We cannot free old_rtm until we have stopped using the
         * pointers in info, some of which may point to sockaddrs
         * in old_rtm.
         */
        if (old_rtm != NULL)
                Free(old_rtm);
        if (rt) {
                if (do_rt_free) {
#ifdef NET_MPSAFE
                        /*
                         * Release rt_so_mtx to avoid a deadlock with
                         * route_intr.
                         */
                        mutex_exit(rt_so_mtx);
                        rt_free(rt);
                        mutex_enter(rt_so_mtx);
#else
                        rt_free(rt);
#endif
                } else
                        rt_unref(rt);
        }
    {
        struct rawcb *rp = NULL;
        /*
         * Check to see if we don't want our own messages.
         */
        if ((so->so_options & SO_USELOOPBACK) == 0) {
                if (COMPATNAME(route_info).ri_cb.any_count <= 1) {
                        if (rtm)
                                Free(rtm);
                        m_freem(m);
                        goto out;
                }
                /* There is another listener, so construct message */
                rp = sotorawcb(so);
        }
        if (rtm) {
                m_copyback(m, 0, rtm->rtm_msglen, rtm);
                if (m->m_pkthdr.len < rtm->rtm_msglen) {
                        m_freem(m);
                        m = NULL;
                } else if (m->m_pkthdr.len > rtm->rtm_msglen)
                        m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
                Free(rtm);
        }
        if (rp)
                rp->rcb_proto.sp_family = 0; /* Avoid us */
        if (family)
                proto.sp_protocol = family;
        if (m)
                raw_input(m, &proto, &COMPATNAME(route_info).ri_src,
                    &COMPATNAME(route_info).ri_dst, &rt_rawcb);
        if (rp)
                rp->rcb_proto.sp_family = PF_XROUTE;
    }
out:
        curlwp_bindx(bound);
        return error;
}

static int
route_ctloutput(int op, struct socket *so, struct sockopt *sopt)
{
        struct routecb *rop = sotoroutecb(so);
        int error = 0;
        unsigned char *rtm_type, *cp, *ep;
        size_t len;
        unsigned int msgfilter;
        struct sockaddr *sa;

        KASSERT(solocked(so));

        if (sopt->sopt_level != AF_ROUTE) {
                error = ENOPROTOOPT;
        } else switch (op) {
        case PRCO_SETOPT:
                switch (sopt->sopt_name) {
                case RO_MSGFILTER:
                        msgfilter = 0;
                        for (rtm_type = sopt->sopt_data, len = sopt->sopt_size;
                             len != 0;
                             rtm_type++, len -= sizeof(*rtm_type))
                        {
                                /* Guard against overflowing our storage. */
                                if (*rtm_type >= sizeof(msgfilter) * CHAR_BIT) {
                                        error = EOVERFLOW;
                                        break;
                                }
                                msgfilter |= RTMSGFILTER(*rtm_type);
                        }
                        if (error == 0)
                                rop->rocb_msgfilter = msgfilter;
                        break;
                case RO_MISSFILTER:
                        /* Validate the data */
                        len = 0;
                        cp = sopt->sopt_data;
                        ep = cp + sopt->sopt_size;
                        while (cp < ep) {
                                if (ep - cp <
                                    offsetof(struct sockaddr, sa_len) +
                                    sizeof(sa->sa_len))
                                        break;
                                if (++len > RO_FILTSA_MAX) {
                                        error = ENOBUFS;
                                        break;
                                }
                                sa = (struct sockaddr *)cp;
                                if (sa->sa_len < _SA_MINSIZE ||
                                    sa->sa_len >sizeof(struct sockaddr_storage))
                                        return EINVAL;
                                cp += RT_XROUNDUP(sa->sa_len);
                        }
                        if (cp != ep) {
                                if (error == 0)
                                        error = EINVAL;
                                break;
                        }
                        if (rop->rocb_missfilterlen != 0)
                                kmem_free(rop->rocb_missfilter,
                                    rop->rocb_missfilterlen);
                        if (sopt->sopt_size != 0) {
                                rop->rocb_missfilter =
                                    kmem_alloc(sopt->sopt_size, KM_SLEEP);
                                if (rop->rocb_missfilter == NULL) {
                                        rop->rocb_missfilterlen = 0;
                                        error = ENOBUFS;
                                        break;
                                }
                        } else
                                rop->rocb_missfilter = NULL;
                        rop->rocb_missfilterlen = sopt->sopt_size;
                        if (rop->rocb_missfilterlen != 0)
                                memcpy(rop->rocb_missfilter, sopt->sopt_data,
                                    rop->rocb_missfilterlen);
                        break;
                default:
                        error = ENOPROTOOPT;
                        break;
                }
                break;
        case PRCO_GETOPT:
                switch (sopt->sopt_name) {
                case RO_MSGFILTER:
                        error = ENOTSUP;
                        break;
                default:
                        error = ENOPROTOOPT;
                        break;
                }
        }
        return error;
}

static void
_rt_setmetrics(int which, const struct rt_xmsghdr *in, struct rtentry *out)
{
#define metric(f, e) if (which & (f)) out->rt_rmx.e = in->rtm_rmx.e;
        metric(RTV_RPIPE, rmx_recvpipe);
        metric(RTV_SPIPE, rmx_sendpipe);
        metric(RTV_SSTHRESH, rmx_ssthresh);
        metric(RTV_RTT, rmx_rtt);
        metric(RTV_RTTVAR, rmx_rttvar);
        metric(RTV_HOPCOUNT, rmx_hopcount);
        metric(RTV_MTU, rmx_mtu);
#undef metric
        if (which & RTV_EXPIRE) {
                out->rt_rmx.rmx_expire = in->rtm_rmx.rmx_expire ?
                    time_wall_to_mono(in->rtm_rmx.rmx_expire) : 0;
        }
}

static void
rtm_setmetrics(const struct rtentry *in, struct rt_xmsghdr *out)
{
#define metric(e) out->rtm_rmx.e = in->rt_rmx.e;
        metric(rmx_recvpipe);
        metric(rmx_sendpipe);
        metric(rmx_ssthresh);
        metric(rmx_rtt);
        metric(rmx_rttvar);
        metric(rmx_hopcount);
        metric(rmx_mtu);
        metric(rmx_locks);
#undef metric
        out->rtm_rmx.rmx_expire = in->rt_rmx.rmx_expire ?
            time_mono_to_wall(in->rt_rmx.rmx_expire) : 0;
}

static int
rt_xaddrs(u_char rtmtype, const char *cp, const char *cplim,
    struct rt_addrinfo *rtinfo)
{
        const struct sockaddr *sa = NULL;        /* Quell compiler warning */
        int i;

        for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
                if ((rtinfo->rti_addrs & (1 << i)) == 0)
                        continue;
                rtinfo->rti_info[i] = sa = (const struct sockaddr *)cp;
                RT_XADVANCE(cp, sa);
        }

        /*
         * Check for extra addresses specified, except RTM_GET asking
         * for interface info.
         */
        if (rtmtype == RTM_GET) {
                if (((rtinfo->rti_addrs &
                    (~((1 << RTAX_IFP) | (1 << RTAX_IFA)))) & (~0U << i)) != 0)
                        return 1;
        } else if ((rtinfo->rti_addrs & (~0U << i)) != 0)
                return 1;
        /* Check for bad data length.  */
        if (cp != cplim) {
                if (i == RTAX_NETMASK + 1 && sa != NULL &&
                    cp - RT_XROUNDUP(sa->sa_len) + sa->sa_len == cplim)
                        /*
                         * The last sockaddr was info.rti_info[RTAX_NETMASK].
                         * We accept this for now for the sake of old
                         * binaries or third party softwares.
                         */
                        ;
                else
                        return 1;
        }
        return 0;
}

static int
rt_getlen(int type)
{
        RTS_CTASSERT(__alignof(struct ifa_msghdr) >= sizeof(uint64_t));
        RTS_CTASSERT(__alignof(struct if_msghdr) >= sizeof(uint64_t));
        RTS_CTASSERT(__alignof(struct if_announcemsghdr) >= sizeof(uint64_t));
        RTS_CTASSERT(__alignof(struct rt_msghdr) >= sizeof(uint64_t));

        switch (type) {
        case RTM_ODELADDR:
        case RTM_ONEWADDR:
        case RTM_OCHGADDR:
                if (rtsock_iflist_70_hook.hooked)
                        return sizeof(struct ifa_msghdr70);
                else {
#ifdef RTSOCK_DEBUG
                        printf("%s: unsupported RTM type %d\n", __func__, type);
#endif
                        return -1;
                }

        case RTM_DELADDR:
        case RTM_NEWADDR:
        case RTM_CHGADDR:
                return sizeof(struct ifa_xmsghdr);

        case RTM_OOIFINFO:
                if (rtsock_iflist_14_hook.hooked)
                        return sizeof(struct if_msghdr14);
                else {
#ifdef RTSOCK_DEBUG
                        printf("%s: unsupported RTM type RTM_OOIFINFO\n",
                            __func__);
#endif
                        return -1;
                }

        case RTM_OIFINFO:
                if (rtsock_iflist_50_hook.hooked)
                        return sizeof(struct if_msghdr50);
                else {
#ifdef RTSOCK_DEBUG
                        printf("%s: unsupported RTM type RTM_OIFINFO\n",
                            __func__);
#endif
                        return -1;
                }

        case RTM_IFINFO:
                return sizeof(struct if_xmsghdr);

        case RTM_IFANNOUNCE:
        case RTM_IEEE80211:
                return sizeof(struct if_xannouncemsghdr);

        default:
                return sizeof(struct rt_xmsghdr);
        }
}


struct mbuf *
COMPATNAME(rt_msg1)(int type, struct rt_addrinfo *rtinfo, void *data, int datalen)
{
        struct rt_xmsghdr *rtm;
        struct mbuf *m;
        int i;
        const struct sockaddr *sa;
        int len, dlen;

        m = m_gethdr(M_DONTWAIT, MT_DATA);
        if (m == NULL)
                return m;
        MCLAIM(m, &COMPATNAME(routedomain).dom_mowner);

        if ((len = rt_getlen(type)) == -1)
                goto out;
        if (len > MHLEN + MLEN)
                panic("%s: message too long", __func__);
        else if (len > MHLEN) {
                m->m_next = m_get(M_DONTWAIT, MT_DATA);
                if (m->m_next == NULL)
                        goto out;
                MCLAIM(m->m_next, m->m_owner);
                m->m_pkthdr.len = len;
                m->m_len = MHLEN;
                m->m_next->m_len = len - MHLEN;
        } else {
                m->m_pkthdr.len = m->m_len = len;
        }
        m_reset_rcvif(m);
        m_copyback(m, 0, datalen, data);
        if (len > datalen)
                (void)memset(mtod(m, char *) + datalen, 0, len - datalen);
        rtm = mtod(m, struct rt_xmsghdr *);
        for (i = 0; i < RTAX_MAX; i++) {
                if ((sa = rtinfo->rti_info[i]) == NULL)
                        continue;
                rtinfo->rti_addrs |= (1 << i);
                dlen = RT_XROUNDUP(sa->sa_len);
                m_copyback(m, len, sa->sa_len, sa);
                if (dlen != sa->sa_len) {
                        /*
                         * Up to 7 + 1 nul's since roundup is to
                         * sizeof(uint64_t) (8 bytes)
                         */
                        m_copyback(m, len + sa->sa_len,
                            dlen - sa->sa_len, "\0\0\0\0\0\0\0");
                }
                len += dlen;
        }
        if (m->m_pkthdr.len != len)
                goto out;
        rtm->rtm_msglen = len;
        rtm->rtm_version = RTM_XVERSION;
        rtm->rtm_type = type;
        return m;
out:
        m_freem(m);
        return NULL;
}

/*
 * rt_msg2
 *
 *         fills 'cp' or 'w'.w_tmem with the routing socket message and
 *                returns the length of the message in 'lenp'.
 *
 * if walkarg is 0, cp is expected to be 0 or a buffer large enough to hold
 *        the message
 * otherwise walkarg's w_needed is updated and if the user buffer is
 *        specified and w_needed indicates space exists the information is copied
 *        into the temp space (w_tmem). w_tmem is [re]allocated if necessary,
 *        if the allocation fails ENOBUFS is returned.
 */
static int
rt_msg2(int type, struct rt_addrinfo *rtinfo, void *cpv, struct rt_walkarg *w,
        int *lenp)
{
        int i;
        int len, dlen, second_time = 0;
        char *cp0, *cp = cpv;

        rtinfo->rti_addrs = 0;
again:
        if ((len = rt_getlen(type)) == -1)
                return EINVAL;

        if ((cp0 = cp) != NULL)
                cp += len;
        for (i = 0; i < RTAX_MAX; i++) {
                const struct sockaddr *sa;

                if ((sa = rtinfo->rti_info[i]) == NULL)
                        continue;
                rtinfo->rti_addrs |= (1 << i);
                dlen = RT_XROUNDUP(sa->sa_len);
                if (cp) {
                        int diff = dlen - sa->sa_len;
                        (void)memcpy(cp, sa, (size_t)sa->sa_len);
                        cp += sa->sa_len;
                        if (diff > 0) {
                                (void)memset(cp, 0, (size_t)diff);
                                cp += diff;
                        }
                }
                len += dlen;
        }
        if (cp == NULL && w != NULL && !second_time) {
                struct rt_walkarg *rw = w;

                rw->w_needed += len;
                if (rw->w_needed <= 0 && rw->w_where) {
                        if (rw->w_tmemsize < len) {
                                if (rw->w_tmem)
                                        kmem_free(rw->w_tmem, rw->w_tmemsize);
                                rw->w_tmem = kmem_zalloc(len, KM_SLEEP);
                                rw->w_tmemsize = len;
                        }
                        if (rw->w_tmem) {
                                cp = rw->w_tmem;
                                second_time = 1;
                                goto again;
                        } else {
                                rw->w_tmemneeded = len;
                                return ENOBUFS;
                        }
                }
        }
        if (cp) {
                struct rt_xmsghdr *rtm = (struct rt_xmsghdr *)cp0;

                rtm->rtm_version = RTM_XVERSION;
                rtm->rtm_type = type;
                rtm->rtm_msglen = len;
        }
        if (lenp)
                *lenp = len;
        return 0;
}

/*
 * This routine is called to generate a message from the routing
 * socket indicating that a redirect has occurred, a routing lookup
 * has failed, or that a protocol has detected timeouts to a particular
 * destination.
 */
void
COMPATNAME(rt_missmsg)(int type, const struct rt_addrinfo *rtinfo, int flags,
    int error)
{
        struct rt_xmsghdr rtm;
        struct mbuf *m;
        const struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
        struct rt_addrinfo info = *rtinfo;

        COMPATCALL(rt_missmsg, (type, rtinfo, flags, error));
        if (COMPATNAME(route_info).ri_cb.any_count == 0)
                return;
        memset(&rtm, 0, sizeof(rtm));
        rtm.rtm_pid = curproc->p_pid;
        rtm.rtm_flags = RTF_DONE | flags;
        rtm.rtm_errno = error;
        m = COMPATNAME(rt_msg1)(type, &info, &rtm, sizeof(rtm));
        if (m == NULL)
                return;
        mtod(m, struct rt_xmsghdr *)->rtm_addrs = info.rti_addrs;
        COMPATNAME(route_enqueue)(m, sa ? sa->sa_family : 0);
}

/*
 * This routine is called to generate a message from the routing
 * socket indicating that the status of a network interface has changed.
 */
void
COMPATNAME(rt_ifmsg)(struct ifnet *ifp)
{
        struct if_xmsghdr ifm;
        struct mbuf *m;
        struct rt_addrinfo info;

        COMPATCALL(rt_ifmsg, (ifp));
        if (COMPATNAME(route_info).ri_cb.any_count == 0)
                return;
        (void)memset(&info, 0, sizeof(info));
        (void)memset(&ifm, 0, sizeof(ifm));
        ifm.ifm_index = ifp->if_index;
        ifm.ifm_flags = ifp->if_flags;
        if_export_if_data(ifp, &ifm.ifm_data, false);
        ifm.ifm_addrs = 0;
        m = COMPATNAME(rt_msg1)(RTM_IFINFO, &info, &ifm, sizeof(ifm));
        if (m == NULL)
                return;
        COMPATNAME(route_enqueue)(m, 0);
        MODULE_HOOK_CALL_VOID(rtsock_oifmsg_14_hook, (ifp), __nothing);
        MODULE_HOOK_CALL_VOID(rtsock_oifmsg_50_hook, (ifp), __nothing);
}

/*
 * This is called to generate messages from the routing socket
 * indicating a network interface has had addresses associated with it.
 * if we ever reverse the logic and replace messages TO the routing
 * socket indicate a request to configure interfaces, then it will
 * be unnecessary as the routing socket will automatically generate
 * copies of it.
 */
static void
COMPATNAME(rt_addrmsg0)(int cmd, struct ifaddr *ifa, int error,
    struct rtentry *rt, const struct sockaddr *src)
{
#define        cmdpass(__cmd, __pass)        (((__cmd) << 2) | (__pass))
        struct rt_addrinfo info;
        const struct sockaddr *sa;
        int pass;
        struct mbuf *m;
        struct ifnet *ifp;
        struct rt_xmsghdr rtm;
        struct ifa_xmsghdr ifam;
        int ncmd;

        KASSERT(ifa != NULL);
        KASSERT(ifa->ifa_addr != NULL);
        ifp = ifa->ifa_ifp;
        if (cmd == RTM_ADD && vec_sctp_add_ip_address != NULL) {
                (*vec_sctp_add_ip_address)(ifa);
        } else if (cmd == RTM_DELETE && vec_sctp_delete_ip_address != NULL) {
                (*vec_sctp_delete_ip_address)(ifa);
        }

        COMPATCALL(rt_addrmsg_rt, (cmd, ifa, error, rt));
        if (COMPATNAME(route_info).ri_cb.any_count == 0)
                return;
        for (pass = 1; pass < 3; pass++) {
                memset(&info, 0, sizeof(info));
                switch (cmdpass(cmd, pass)) {
                case cmdpass(RTM_ADD, 1):
                case cmdpass(RTM_CHANGE, 1):
                case cmdpass(RTM_DELETE, 2):
                case cmdpass(RTM_NEWADDR, 1):
                case cmdpass(RTM_DELADDR, 1):
                case cmdpass(RTM_CHGADDR, 1):
                        switch (cmd) {
                        case RTM_ADD:
                                ncmd = RTM_XNEWADDR;
                                break;
                        case RTM_DELETE:
                                ncmd = RTM_XDELADDR;
                                break;
                        case RTM_CHANGE:
                                ncmd = RTM_XCHGADDR;
                                break;
                        case RTM_NEWADDR:
                                ncmd = RTM_XNEWADDR;
                                break;
                        case RTM_DELADDR:
                                ncmd = RTM_XDELADDR;
                                break;
                        case RTM_CHGADDR:
                                ncmd = RTM_XCHGADDR;
                                break;
                        default:
                                panic("%s: unknown command %d", __func__, cmd);
                        }
                        MODULE_HOOK_CALL_VOID(rtsock_newaddr_70_hook,
                            (ncmd, ifa), __nothing);
                        info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
                        KASSERT(ifp->if_dl != NULL);
                        info.rti_info[RTAX_IFP] = ifp->if_dl->ifa_addr;
                        info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
                        info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
                        info.rti_info[RTAX_AUTHOR] = src;
                        memset(&ifam, 0, sizeof(ifam));
                        ifam.ifam_index = ifp->if_index;
                        ifam.ifam_metric = ifa->ifa_metric;
                        ifam.ifam_flags = ifa->ifa_flags;
#ifndef COMPAT_RTSOCK
                        ifam.ifam_pid = curproc->p_pid;
                        ifam.ifam_addrflags = if_addrflags(ifa);
#endif
                        m = COMPATNAME(rt_msg1)(ncmd, &info, &ifam, sizeof(ifam));
                        if (m == NULL)
                                continue;
                        mtod(m, struct ifa_xmsghdr *)->ifam_addrs =
                            info.rti_addrs;
                        break;
                case cmdpass(RTM_ADD, 2):
                case cmdpass(RTM_CHANGE, 2):
                case cmdpass(RTM_DELETE, 1):
                        if (rt == NULL)
                                continue;
                        info.rti_info[RTAX_NETMASK] = rt_mask(rt);
                        info.rti_info[RTAX_DST] = sa = rt_getkey(rt);
                        info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
                        memset(&rtm, 0, sizeof(rtm));
                        rtm.rtm_pid = curproc->p_pid;
                        rtm.rtm_index = ifp->if_index;
                        rtm.rtm_flags |= rt->rt_flags;
                        rtm.rtm_errno = error;
                        m = COMPATNAME(rt_msg1)(cmd, &info, &rtm, sizeof(rtm));
                        if (m == NULL)
                                continue;
                        mtod(m, struct rt_xmsghdr *)->rtm_addrs = info.rti_addrs;
                        break;
                default:
                        continue;
                }
                KASSERTMSG(m != NULL, "called with wrong command");
                COMPATNAME(route_enqueue)(m, sa ? sa->sa_family : 0);
        }
#undef cmdpass
}

void
COMPATNAME(rt_addrmsg)(int cmd, struct ifaddr *ifa)
{

        COMPATNAME(rt_addrmsg0)(cmd, ifa, 0, NULL, NULL);
}

void
COMPATNAME(rt_addrmsg_rt)(int cmd, struct ifaddr *ifa, int error,
    struct rtentry *rt)
{

        COMPATNAME(rt_addrmsg0)(cmd, ifa, error, rt, NULL);
}

void
COMPATNAME(rt_addrmsg_src)(int cmd, struct ifaddr *ifa,
    const struct sockaddr *src)
{

        COMPATNAME(rt_addrmsg0)(cmd, ifa, 0, NULL, src);
}

static struct mbuf *
rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
    struct rt_addrinfo *info)
{
        struct if_xannouncemsghdr ifan;

        memset(info, 0, sizeof(*info));
        memset(&ifan, 0, sizeof(ifan));
        ifan.ifan_index = ifp->if_index;
        strlcpy(ifan.ifan_name, ifp->if_xname, sizeof(ifan.ifan_name));
        ifan.ifan_what = what;
        return COMPATNAME(rt_msg1)(type, info, &ifan, sizeof(ifan));
}

/*
 * This is called to generate routing socket messages indicating
 * network interface arrival and departure.
 */
void
COMPATNAME(rt_ifannouncemsg)(struct ifnet *ifp, int what)
{
        struct mbuf *m;
        struct rt_addrinfo info;

        COMPATCALL(rt_ifannouncemsg, (ifp, what));
        if (COMPATNAME(route_info).ri_cb.any_count == 0)
                return;
        m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
        if (m == NULL)
                return;
        COMPATNAME(route_enqueue)(m, 0);
}

/*
 * This is called to generate routing socket messages indicating
 * IEEE80211 wireless events.
 * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
 */
void
COMPATNAME(rt_ieee80211msg)(struct ifnet *ifp, int what, void *data,
        size_t data_len)
{
        struct mbuf *m;
        struct rt_addrinfo info;

        COMPATCALL(rt_ieee80211msg, (ifp, what, data, data_len));
        if (COMPATNAME(route_info).ri_cb.any_count == 0)
                return;
        m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
        if (m == NULL)
                return;
        /*
         * Append the ieee80211 data.  Try to stick it in the
         * mbuf containing the ifannounce msg; otherwise allocate
         * a new mbuf and append.
         *
         * NB: we assume m is a single mbuf.
         */
        if (data_len > M_TRAILINGSPACE(m)) {
                struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
                if (n == NULL) {
                        m_freem(m);
                        return;
                }
                (void)memcpy(mtod(n, void *), data, data_len);
                n->m_len = data_len;
                m->m_next = n;
        } else if (data_len > 0) {
                (void)memcpy(mtod(m, uint8_t *) + m->m_len, data, data_len);
                m->m_len += data_len;
        }
        if (m->m_flags & M_PKTHDR)
                m->m_pkthdr.len += data_len;
        mtod(m, struct if_xannouncemsghdr *)->ifan_msglen += data_len;
        COMPATNAME(route_enqueue)(m, 0);
}

/*
 * Routing message software interrupt routine
 */
static void
COMPATNAME(route_intr)(void *cookie)
{
        struct sockproto proto = { .sp_family = PF_XROUTE, };
        struct route_info * const ri = &COMPATNAME(route_info);
        struct mbuf *m;

        SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
        for (;;) {
                IFQ_LOCK(&ri->ri_intrq);
                IF_DEQUEUE(&ri->ri_intrq, m);
                IFQ_UNLOCK(&ri->ri_intrq);
                if (m == NULL)
                        break;
                proto.sp_protocol = M_GETCTX(m, uintptr_t);
#ifdef NET_MPSAFE
                mutex_enter(rt_so_mtx);
#endif
                raw_input(m, &proto, &ri->ri_src, &ri->ri_dst, &rt_rawcb);
#ifdef NET_MPSAFE
                mutex_exit(rt_so_mtx);
#endif
        }
        SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
}

/*
 * Enqueue a message to the software interrupt routine.
 */
void
COMPATNAME(route_enqueue)(struct mbuf *m, int family)
{
        struct route_info * const ri = &COMPATNAME(route_info);
        int wasempty;

        IFQ_LOCK(&ri->ri_intrq);
        if (IF_QFULL(&ri->ri_intrq)) {
                printf("%s: queue full, dropped message\n", __func__);
                IF_DROP(&ri->ri_intrq);
                IFQ_UNLOCK(&ri->ri_intrq);
                m_freem(m);
        } else {
                wasempty = IF_IS_EMPTY(&ri->ri_intrq);
                M_SETCTX(m, (uintptr_t)family);
                IF_ENQUEUE(&ri->ri_intrq, m);
                IFQ_UNLOCK(&ri->ri_intrq);
                if (wasempty) {
                        kpreempt_disable();
                        softint_schedule(ri->ri_sih);
                        kpreempt_enable();
                }
        }
}

static void
COMPATNAME(route_init)(void)
{
        struct route_info * const ri = &COMPATNAME(route_info);

#ifndef COMPAT_RTSOCK
        rt_init();
#ifdef NET_MPSAFE
        rt_so_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);

        cv_init(&rt_update_cv, "rtsock_cv");
#endif

        sysctl_net_route_setup(NULL, PF_ROUTE, "rtable");
#endif
        ri->ri_intrq.ifq_maxlen = ri->ri_maxqlen;
        ri->ri_sih = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
            COMPATNAME(route_intr), NULL);
        IFQ_LOCK_INIT(&ri->ri_intrq);

#ifdef MBUFTRACE
        MOWNER_ATTACH(&COMPATNAME(routedomain).dom_mowner);
#endif
}

/*
 * Definitions of protocols supported in the ROUTE domain.
 */
#ifndef COMPAT_RTSOCK
PR_WRAP_USRREQS(route);
#else
PR_WRAP_USRREQS(compat_50_route);
#endif

static const struct pr_usrreqs route_usrreqs = {
        .pr_attach        = COMPATNAME(route_attach_wrapper),
        .pr_detach        = COMPATNAME(route_detach_wrapper),
        .pr_accept        = COMPATNAME(route_accept_wrapper),
        .pr_bind        = COMPATNAME(route_bind_wrapper),
        .pr_listen        = COMPATNAME(route_listen_wrapper),
        .pr_connect        = COMPATNAME(route_connect_wrapper),
        .pr_connect2        = COMPATNAME(route_connect2_wrapper),
        .pr_disconnect        = COMPATNAME(route_disconnect_wrapper),
        .pr_shutdown        = COMPATNAME(route_shutdown_wrapper),
        .pr_abort        = COMPATNAME(route_abort_wrapper),
        .pr_ioctl        = COMPATNAME(route_ioctl_wrapper),
        .pr_stat        = COMPATNAME(route_stat_wrapper),
        .pr_peeraddr        = COMPATNAME(route_peeraddr_wrapper),
        .pr_sockaddr        = COMPATNAME(route_sockaddr_wrapper),
        .pr_rcvd        = COMPATNAME(route_rcvd_wrapper),
        .pr_recvoob        = COMPATNAME(route_recvoob_wrapper),
        .pr_send        = COMPATNAME(route_send_wrapper),
        .pr_sendoob        = COMPATNAME(route_sendoob_wrapper),
        .pr_purgeif        = COMPATNAME(route_purgeif_wrapper),
};

static const struct protosw COMPATNAME(route_protosw)[] = {
        {
                .pr_type = SOCK_RAW,
                .pr_domain = &COMPATNAME(routedomain),
                .pr_flags = PR_ATOMIC|PR_ADDR,
                .pr_ctlinput = raw_ctlinput,
                .pr_ctloutput = route_ctloutput,
                .pr_usrreqs = &route_usrreqs,
                .pr_init = rt_pr_init,
        },
};

struct domain COMPATNAME(routedomain) = {
        .dom_family = PF_XROUTE,
        .dom_name = DOMAINNAME,
        .dom_init = COMPATNAME(route_init),
        .dom_protosw = COMPATNAME(route_protosw),
        .dom_protoswNPROTOSW =
            &COMPATNAME(route_protosw)[__arraycount(COMPATNAME(route_protosw))],
#ifdef MBUFTRACE
        .dom_mowner = MOWNER_INIT("route", "rtm"),
#endif
};






























































 1457 


 1457 
 1458 

 1458 

















  912 




  911 
  911 
  911 


  912 









  520 
   44 
  519 




  516 
   50 
   45 

  515 


  901 



   81 

  902 

  520 

  902 
  809 




  808 
  808 
  806 


















   44 






















   43 


   44 




   44 
   42 


   28 
   28 

   44 

   43 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
/*        $NetBSD: uvm_page_status.c,v 1.6 2020/08/14 09:06:15 chs Exp $        */

/*-
 * Copyright (c)2011 YAMAMOTO Takashi,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_page_status.c,v 1.6 2020/08/14 09:06:15 chs Exp $");

#include <sys/param.h>
#include <sys/systm.h>

#include <uvm/uvm.h>

/*
 * page dirtiness status tracking
 *
 * separated from uvm_page.c mainly for rump
 */

/*
 * these constants are chosen to match so that we can convert between
 * them quickly.
 */

__CTASSERT(UVM_PAGE_STATUS_UNKNOWN == 0);
__CTASSERT(UVM_PAGE_STATUS_DIRTY == PG_DIRTY);
__CTASSERT(UVM_PAGE_STATUS_CLEAN == PG_CLEAN);

/*
 * uvm_pagegetdirty: return the dirtiness status (one of UVM_PAGE_STATUS_
 * values) of the page.
 *
 * called with the owner locked.
 */

unsigned int
uvm_pagegetdirty(struct vm_page *pg)
{
        struct uvm_object * const uobj __diagused = pg->uobject;

        KASSERT((~pg->flags & (PG_CLEAN|PG_DIRTY)) != 0);
        KASSERT(uvm_page_owner_locked_p(pg, false));
        KASSERT(uobj == NULL || ((pg->flags & PG_CLEAN) == 0) ==
                uvm_obj_page_dirty_p(pg));
        return pg->flags & (PG_CLEAN|PG_DIRTY);
}

/*
 * uvm_pagemarkdirty: set the dirtiness status (one of UVM_PAGE_STATUS_ values)
 * of the page.
 *
 * called with the owner locked.
 *
 * update the radix tree tag for object-owned page.
 *
 * if new status is UVM_PAGE_STATUS_UNKNOWN, clear pmap-level dirty bit
 * so that later uvm_pagecheckdirty() can notice modifications on the page.
 */

void
uvm_pagemarkdirty(struct vm_page *pg, unsigned int newstatus)
{
        struct uvm_object * const uobj = pg->uobject;
        const unsigned int oldstatus = uvm_pagegetdirty(pg);
        enum cpu_count base;

        KASSERT((~newstatus & (PG_CLEAN|PG_DIRTY)) != 0);
        KASSERT((newstatus & ~(PG_CLEAN|PG_DIRTY)) == 0);
        KASSERT(uvm_page_owner_locked_p(pg, true));
        KASSERT(uobj == NULL || ((pg->flags & PG_CLEAN) == 0) ==
                uvm_obj_page_dirty_p(pg));

        if (oldstatus == newstatus) {
                return;
        }

        /*
         * set UVM_PAGE_DIRTY_TAG tag unless known CLEAN so that putpages can
         * find possibly-dirty pages quickly.
         */

        if (uobj != NULL) {
                if (newstatus == UVM_PAGE_STATUS_CLEAN) {
                        uvm_obj_page_clear_dirty(pg);
                } else if (oldstatus == UVM_PAGE_STATUS_CLEAN) {
                        /*
                         * on first dirty page, mark the object dirty.
                         * for vnodes this inserts to the syncer worklist.
                         */
                        if (uvm_obj_clean_p(uobj) &&
                            uobj->pgops->pgo_markdirty != NULL) {
                                (*uobj->pgops->pgo_markdirty)(uobj);
                        }
                        uvm_obj_page_set_dirty(pg);
                }
        }
        if (newstatus == UVM_PAGE_STATUS_UNKNOWN) {
                /*
                 * start relying on pmap-level dirtiness tracking.
                 */
                pmap_clear_modify(pg);
        }
        pg->flags &= ~(PG_CLEAN|PG_DIRTY);
        pg->flags |= newstatus;
        KASSERT(uobj == NULL || ((pg->flags & PG_CLEAN) == 0) ==
                uvm_obj_page_dirty_p(pg));
        if ((pg->flags & PG_STAT) != 0) {
                if ((pg->flags & PG_SWAPBACKED) != 0) {
                        base = CPU_COUNT_ANONUNKNOWN;
                } else {
                        base = CPU_COUNT_FILEUNKNOWN;
                }
                kpreempt_disable();
                CPU_COUNT(base + oldstatus, -1);
                CPU_COUNT(base + newstatus, +1);
                kpreempt_enable();
        }
}

/*
 * uvm_pagecheckdirty: check if page is dirty, and remove its dirty bit.
 *
 * called with the owner locked.
 *
 * returns if the page was dirty.
 *
 * if protected is true, mark the page CLEAN.  otherwise, mark the page UNKNOWN.
 * ("mark" in the sense of uvm_pagemarkdirty().)
 */

bool
uvm_pagecheckdirty(struct vm_page *pg, bool pgprotected)
{
        const unsigned int oldstatus = uvm_pagegetdirty(pg);
        bool modified;

        KASSERT(uvm_page_owner_locked_p(pg, true));

        /*
         * if pgprotected is true, mark the page CLEAN.
         * otherwise mark the page UNKNOWN unless it's CLEAN.
         *
         * possible transitions:
         *
         *        CLEAN   -> CLEAN  , modified = false
         *        UNKNOWN -> UNKNOWN, modified = true
         *        UNKNOWN -> UNKNOWN, modified = false
         *        UNKNOWN -> CLEAN  , modified = true
         *        UNKNOWN -> CLEAN  , modified = false
         *        DIRTY   -> UNKNOWN, modified = true
         *        DIRTY   -> CLEAN  , modified = true
         *
         * pmap_clear_modify is necessary if either of
         * oldstatus or newstatus is UVM_PAGE_STATUS_UNKNOWN.
         */

        if (oldstatus == UVM_PAGE_STATUS_CLEAN) {
                modified = false;
        } else {
                const unsigned int newstatus = pgprotected ?
                    UVM_PAGE_STATUS_CLEAN : UVM_PAGE_STATUS_UNKNOWN;

                if (oldstatus == UVM_PAGE_STATUS_DIRTY) {
                        modified = true;
                        if (newstatus == UVM_PAGE_STATUS_UNKNOWN) {
                                pmap_clear_modify(pg);
                        }
                } else {
                        KASSERT(oldstatus == UVM_PAGE_STATUS_UNKNOWN);
                        modified = pmap_clear_modify(pg);
                }
                uvm_pagemarkdirty(pg, newstatus);
        }
        return modified;
}








































































































































    3 
    3 







































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
/*        $NetBSD: if_upl.c,v 1.77 2022/03/03 05:56:28 riastradh Exp $        */

/*
 * Copyright (c) 2000 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Prolific PL2301/PL2302 driver
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_upl.c,v 1.77 2022/03/03 05:56:28 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_usb.h"
#endif

#include <sys/param.h>

#include <dev/usb/usbnet.h>

#include <net/if_types.h>

#ifdef INET
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/if_inarp.h>
#endif

/*
 * 7  6  5  4  3  2  1  0
 * tx rx 1  0
 * 1110 0000 rxdata
 * 1010 0000 idle
 * 0010 0000 tx over
 * 0110      tx over + rxd
 */

#define UPL_RXDATA                0x40
#define UPL_TXOK                0x80

#define UPL_CONFIG_NO                1
#define UPL_IFACE_IDX                0

/***/

#define UPL_INTR_INTERVAL        20

#define UPL_BUFSZ                1024

#define UPL_RX_LIST_CNT                1
#define UPL_TX_LIST_CNT                1

#ifdef UPL_DEBUG
#define DPRINTF(x)        if (upldebug) printf x
#define DPRINTFN(n,x)        if (upldebug >= (n)) printf x
int        upldebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

/*
 * Various supported device vendors/products.
 */
static const struct usb_devno sc_devs[] = {
        { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2301 },
        { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL2302 },
        { USB_VENDOR_PROLIFIC, USB_PRODUCT_PROLIFIC_PL25A1 },
        { USB_VENDOR_BELKIN, USB_PRODUCT_BELKIN_F5U258 },
        { USB_VENDOR_NI, USB_PRODUCT_NI_HTOH_7825 }
};

static int        upl_match(device_t, cfdata_t, void *);
static void        upl_attach(device_t, device_t, void *);

CFATTACH_DECL_NEW(upl, sizeof(struct usbnet), upl_match, upl_attach,
    usbnet_detach, usbnet_activate);

#if 0
static void upl_uno_intr(struct usbnet *, usbd_status);
#endif
static void upl_uno_rx_loop(struct usbnet *, struct usbnet_chain *, uint32_t);
static unsigned upl_uno_tx_prepare(struct usbnet *, struct mbuf *,
                               struct usbnet_chain *);
static int upl_uno_ioctl(struct ifnet *, u_long, void *);

static const struct usbnet_ops upl_ops = {
        .uno_tx_prepare = upl_uno_tx_prepare,
        .uno_rx_loop = upl_uno_rx_loop,
        .uno_ioctl = upl_uno_ioctl,
#if 0
        .uno_intr = upl_uno_intr,
#endif
};

static int upl_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
                      const struct rtentry *);
static void upl_input(struct ifnet *, struct mbuf *);

/*
 * Probe for a Prolific chip.
 */
static int
upl_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return usb_lookup(sc_devs, uaa->uaa_vendor, uaa->uaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
upl_attach(device_t parent, device_t self, void *aux)
{
        struct usbnet * const        un = device_private(self);
        struct usb_attach_arg        *uaa = aux;
        char                        *devinfop;
        struct usbd_device *        dev = uaa->uaa_device;
        usbd_status                err;
        usb_interface_descriptor_t        *id;
        usb_endpoint_descriptor_t        *ed;
        int                        i;

        DPRINTFN(5,(" : upl_attach: un=%p, dev=%p", un, dev));

        aprint_naive("\n");
        aprint_normal("\n");
        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        un->un_dev = self;
        un->un_udev = dev;
        un->un_sc = un;
        un->un_ops = &upl_ops;
        un->un_rx_xfer_flags = USBD_SHORT_XFER_OK;
        un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER;
        un->un_rx_list_cnt = UPL_RX_LIST_CNT;
        un->un_tx_list_cnt = UPL_TX_LIST_CNT;
        un->un_rx_bufsz = UPL_BUFSZ;
        un->un_tx_bufsz = UPL_BUFSZ;

        err = usbd_set_config_no(dev, UPL_CONFIG_NO, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(err));
                return;
        }

        err = usbd_device2interface_handle(dev, UPL_IFACE_IDX, &un->un_iface);
        if (err) {
                aprint_error_dev(self, "getting interface handle failed\n");
                return;
        }

        id = usbd_get_interface_descriptor(un->un_iface);

        /* Find endpoints. */
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(un->un_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self, "couldn't get ep %d\n", i);
                        return;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress;
                }
        }

        if (un->un_ed[USBNET_ENDPT_RX] == 0 || un->un_ed[USBNET_ENDPT_TX] == 0 /*||
            un->un_ed[USBNET_ENDPT_INTR] == 0*/) {
                aprint_error_dev(self, "missing endpoint\n");
                return;
        }

        usbnet_attach(un);

        /* Initialize interface info.*/
        struct ifnet *ifp = usbnet_ifp(un);
        ifp->if_mtu = UPL_BUFSZ;
        ifp->if_type = IFT_OTHER;
        ifp->if_addrlen = 0;
        ifp->if_hdrlen = 0;
        ifp->if_output = upl_output;
        ifp->_if_input = upl_input;
        ifp->if_baudrate = 12000000;
        ifp->if_dlt = DLT_RAW;

        usbnet_attach_ifp(un, IFF_POINTOPOINT | IFF_NOARP | IFF_SIMPLEX,
            0, NULL);
}

static void
upl_uno_rx_loop(struct usbnet * un, struct usbnet_chain *c, uint32_t total_len)
{

        DPRINTFN(9,("%s: %s: enter length=%d\n",
                    device_xname(un->un_dev), __func__, total_len));

        usbnet_input(un, c->unc_buf, total_len);
}

static unsigned
upl_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c)
{
        int        total_len;

        if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz)
                return 0;

        m_copydata(m, 0, m->m_pkthdr.len, c->unc_buf);
        total_len = m->m_pkthdr.len;

        DPRINTFN(10,("%s: %s: total_len=%d\n",
                     device_xname(un->un_dev), __func__, total_len));

        return total_len;
}

static int
upl_uno_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
        if (cmd == SIOCSIFMTU) {
                struct ifreq *ifr = data;

                if (ifr->ifr_mtu > UPL_BUFSZ)
                        return EINVAL;
        }
        return 0;
}

static int
upl_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
    const struct rtentry *rt0)
{
        struct usbnet * const un __unused = ifp->if_softc;

        DPRINTFN(10,("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        /* If the queueing discipline needs packet classification, do it now. */
        IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);

        /*
         * Queue message on interface, and start output if interface
         * not yet active.
         */
        return if_transmit_lock(ifp, m);
}

static void
upl_input(struct ifnet *ifp, struct mbuf *m)
{
#ifdef INET
        size_t pktlen = m->m_len;
        int s;

        s = splnet();
        if (__predict_false(!pktq_enqueue(ip_pktq, m, 0))) {
                if_statinc(ifp, if_iqdrops);
                m_freem(m);
        } else {
                if_statadd2(ifp, if_ipackets, 1, if_ibytes, pktlen);
        }
        splx(s);
#endif
}

#ifdef _MODULE
#include "ioconf.c"
#endif

USBNET_MODULE(upl)




























































































































































































































































































































































   38 





   38 














   38 


   38 

   38 
   38 







   38 



   38 







































































































































































   38 
   37 

   38 

   38 


   37 
   38 
   38 





   38 






   38 












































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
/* $NetBSD: usbroothub.c,v 1.15 2022/03/13 11:28:52 riastradh Exp $ */

/*-
 * Copyright (c) 1998, 2004, 2011, 2012 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology, Jared D. McNeill (jmcneill@invisible.ca),
 * Matthew R. Green (mrg@eterna.com.au) and Nick Hudson.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2008
 *        Matthias Drochner.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: usbroothub.c,v 1.15 2022/03/13 11:28:52 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>                /* for ostype */

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbroothub.h>
#include <dev/usb/usbhist.h>

/* helper functions for USB root hub emulation */

static usbd_status        roothub_ctrl_transfer(struct usbd_xfer *);
static usbd_status        roothub_ctrl_start(struct usbd_xfer *);
static void                roothub_ctrl_abort(struct usbd_xfer *);
static void                roothub_ctrl_close(struct usbd_pipe *);
static void                roothub_ctrl_done(struct usbd_xfer *);
static void                roothub_noop(struct usbd_pipe *pipe);

const struct usbd_pipe_methods roothub_ctrl_methods = {
        .upm_transfer =        roothub_ctrl_transfer,
        .upm_start =        roothub_ctrl_start,
        .upm_abort =        roothub_ctrl_abort,
        .upm_close =        roothub_ctrl_close,
        .upm_cleartoggle =        roothub_noop,
        .upm_done =        roothub_ctrl_done,
};

int
usb_makestrdesc(usb_string_descriptor_t *p, int l, const char *s)
{
        int i;

        if (l == 0)
                return 0;
        p->bLength = 2 * strlen(s) + 2;
        if (l == 1)
                return 1;
        p->bDescriptorType = UDESC_STRING;
        l -= 2;
        /* poor man's utf-16le conversion */
        for (i = 0; s[i] && l > 1; i++, l -= 2)
                USETW2(p->bString[i], 0, s[i]);
        return 2 * i + 2;
}

int
usb_makelangtbl(usb_string_descriptor_t *p, int l)
{

        if (l == 0)
                return 0;
        p->bLength = 4;
        if (l == 1)
                return 1;
        p->bDescriptorType = UDESC_STRING;
        if (l < 4)
                return 2;
        USETW(p->bString[0], 0x0409); /* english/US */
        return 4;
}

/*
 * Data structures and routines to emulate the root hub.
 */
static const usb_device_descriptor_t usbroothub_devd1 = {
        .bLength = sizeof(usb_device_descriptor_t),
        .bDescriptorType = UDESC_DEVICE,
        .bcdUSB = {0x00, 0x01},
        .bDeviceClass = UDCLASS_HUB,
        .bDeviceSubClass = UDSUBCLASS_HUB,
        .bDeviceProtocol = UDPROTO_FSHUB,
        .bMaxPacketSize = 64,
        .idVendor = {0},
        .idProduct = {0},
        .bcdDevice = {0x00, 0x01},
        .iManufacturer = 1,
        .iProduct = 2,
        .iSerialNumber = 0,
        .bNumConfigurations = 1
};

static const struct usb_roothub_descriptors usbroothub_confd1 = {
        .urh_confd = {
                .bLength = USB_CONFIG_DESCRIPTOR_SIZE,
                .bDescriptorType = UDESC_CONFIG,
                .wTotalLength = USETWD(sizeof(usbroothub_confd1)),
                .bNumInterface = 1,
                .bConfigurationValue = 1,
                .iConfiguration = 0,
                .bmAttributes = UC_ATTR_MBO | UC_SELF_POWERED,
                .bMaxPower = 0,
        },
        .urh_ifcd = {
                .bLength = USB_INTERFACE_DESCRIPTOR_SIZE,
                .bDescriptorType = UDESC_INTERFACE,
                .bInterfaceNumber = 0,
                .bAlternateSetting = 0,
                .bNumEndpoints = 1,
                .bInterfaceClass = UICLASS_HUB,
                .bInterfaceSubClass = UISUBCLASS_HUB,
                .bInterfaceProtocol = UIPROTO_FSHUB,
                .iInterface = 0
        },
        .urh_endpd = {
                .bLength = USB_ENDPOINT_DESCRIPTOR_SIZE,
                .bDescriptorType = UDESC_ENDPOINT,
                .bEndpointAddress = UE_DIR_IN | USBROOTHUB_INTR_ENDPT,
                .bmAttributes = UE_INTERRUPT,
                .wMaxPacketSize = USETWD(8),                        /* max packet */
                .bInterval = 255,
        },
};

/* USB 3.0 10.15.1 */
static const usb_device_descriptor_t usbroothub_devd3 = {
        .bLength = sizeof(usb_device_descriptor_t),
        .bDescriptorType = UDESC_DEVICE,
        .bcdUSB = {0x00, 0x03},
        .bDeviceClass = UDCLASS_HUB,
        .bDeviceSubClass = UDSUBCLASS_HUB,
        .bDeviceProtocol = UDPROTO_SSHUB,
        .bMaxPacketSize = 9,
        .idVendor = {0},
        .idProduct = {0},
        .bcdDevice = {0x00, 0x01},
        .iManufacturer = 1,
        .iProduct = 2,
        .iSerialNumber = 0,
        .bNumConfigurations = 1
};

static const usb_device_descriptor_t usbroothub_devd2 = {
        .bLength = sizeof(usb_device_descriptor_t),
        .bDescriptorType = UDESC_DEVICE,
        .bcdUSB = {0x00, 0x02},
        .bDeviceClass = UDCLASS_HUB,
        .bDeviceSubClass = UDSUBCLASS_HUB,
        .bDeviceProtocol = UDPROTO_HSHUBSTT,
        .bMaxPacketSize = 64,
        .idVendor = {0},
        .idProduct = {0},
        .bcdDevice = {0x00, 0x01},
        .iManufacturer = 1,
        .iProduct = 2,
        .iSerialNumber = 0,
        .bNumConfigurations = 1
};

static const usb_device_qualifier_t usbroothub_odevd2 = {
        .bLength = USB_DEVICE_QUALIFIER_SIZE,
        .bDescriptorType = UDESC_DEVICE_QUALIFIER,
        .bcdUSB = {0x00, 0x02},
        .bDeviceClass = UDCLASS_HUB,
        .bDeviceSubClass = UDSUBCLASS_HUB,
        .bDeviceProtocol = UDPROTO_FSHUB,
        .bMaxPacketSize0 = 64,
        .bNumConfigurations = 1,
};

static const struct usb_roothub_descriptors usbroothub_confd2 = {
        .urh_confd = {
                .bLength = USB_CONFIG_DESCRIPTOR_SIZE,
                .bDescriptorType = UDESC_CONFIG,
                .wTotalLength = USETWD(sizeof(usbroothub_confd2)),
                .bNumInterface = 1,
                .bConfigurationValue = 1,
                .iConfiguration = 0,
                .bmAttributes = UC_ATTR_MBO | UC_SELF_POWERED,
                .bMaxPower = 0,
        },
        .urh_ifcd = {
                .bLength = USB_INTERFACE_DESCRIPTOR_SIZE,
                .bDescriptorType = UDESC_INTERFACE,
                .bInterfaceNumber = 0,
                .bAlternateSetting = 0,
                .bNumEndpoints = 1,
                .bInterfaceClass = UICLASS_HUB,
                .bInterfaceSubClass = UISUBCLASS_HUB,
                .bInterfaceProtocol = UIPROTO_HSHUBSTT,
                .iInterface = 0
        },
        .urh_endpd = {
                .bLength = USB_ENDPOINT_DESCRIPTOR_SIZE,
                .bDescriptorType = UDESC_ENDPOINT,
                .bEndpointAddress = UE_DIR_IN | USBROOTHUB_INTR_ENDPT,
                .bmAttributes = UE_INTERRUPT,
                .wMaxPacketSize = USETWD(8),                        /* max packet */
                .bInterval = 12,
        },
};

static const struct usb3_roothub_descriptors usbroothub_confd3 = {
        .urh_confd = {
                .bLength = USB_CONFIG_DESCRIPTOR_SIZE,
                .bDescriptorType = UDESC_CONFIG,
                .wTotalLength = USETWD(sizeof(usbroothub_confd3)),
                .bNumInterface = 1,
                .bConfigurationValue = 1,
                .iConfiguration = 0,
                .bmAttributes = UC_SELF_POWERED,                /* 10.13.1 */
                .bMaxPower = 0,
        },
        .urh_ifcd = {
                .bLength = USB_INTERFACE_DESCRIPTOR_SIZE,
                .bDescriptorType = UDESC_INTERFACE,
                .bInterfaceNumber = 0,
                .bAlternateSetting = 0,
                .bNumEndpoints = 1,
                .bInterfaceClass = UICLASS_HUB,
                .bInterfaceSubClass = UISUBCLASS_HUB,
                .bInterfaceProtocol = 0,                        /* UIPROTO_SSHUB ??? */
                .iInterface = 0
        },
        .urh_endpd = {
                .bLength = USB_ENDPOINT_DESCRIPTOR_SIZE,
                .bDescriptorType = UDESC_ENDPOINT,
                .bEndpointAddress = UE_DIR_IN | USBROOTHUB_INTR_ENDPT,
                .bmAttributes = UE_INTERRUPT,
                .wMaxPacketSize = USETWD(2),                        /* max packet */
                .bInterval = 8,
        },
        .urh_endpssd = {
                .bLength = USB_ENDPOINT_SS_COMP_DESCRIPTOR_SIZE,
                .bDescriptorType = UDESC_ENDPOINT_SS_COMP,
                .bMaxBurst = 0,
                .bmAttributes = 0,
                .wBytesPerInterval = USETWD(2)
        },
};

static const struct usb3_roothub_bos_descriptors usbroothub_bosd3 = {
        .urh_bosd = {
                .bLength = USB_BOS_DESCRIPTOR_SIZE,
                .bDescriptorType = UDESC_BOS,
                .wTotalLength = USETWD(sizeof(usbroothub_bosd3)),
                .bNumDeviceCaps = 3,
        },
        /* 9.6.2.1 USB 2.0 Extension */
        .urh_usb2extd = {
                .bLength = USB_DEVCAP_USB2EXT_DESCRIPTOR_SIZE,
                .bDescriptorType = 1,
                .bDevCapabilityType = 2,
                .bmAttributes[0] = 2,
        },
        /* 9.6.2.2 Superspeed device capability */
        .urh_ssd = {
                .bLength = USB_DEVCAP_SS_DESCRIPTOR_SIZE,
                .bDescriptorType = UDESC_DEVICE_CAPABILITY,
                .bDevCapabilityType = USB_DEVCAP_SUPER_SPEED,
                .bmAttributes = 0,        /* USB_DEVCAP_SS_LTM */
                .wSpeedsSupported = USETWD(
                    USB_DEVCAP_SS_SPEED_LS | USB_DEVCAP_SS_SPEED_FS |
                    USB_DEVCAP_SS_SPEED_HS | USB_DEVCAP_SS_SPEED_SS),
                .bFunctionalitySupport = 8,                /* SS is 3, i.e. 1 << 3? */
                .bU1DevExitLat = 255,                /* Dummy... 0? */
                .wU2DevExitLat = USETWD(8),        /* Also dummy... 0? */
        },
        /* 9.6.2.3 Container ID  - see RFC 4122 */
        .urh_containerd = {
                .bLength = USB_DEVCAP_CONTAINER_ID_DESCRIPTOR_SIZE,
                .bDescriptorType = 1,
                .bDevCapabilityType = 4,
                .bReserved = 0,
                // ContainerID will be zero
        },
};

static const usb_hub_descriptor_t usbroothub_hubd = {
        .bDescLength = USB_HUB_DESCRIPTOR_SIZE,
        .bDescriptorType = UDESC_HUB,
        .bNbrPorts = 1,
        .wHubCharacteristics = USETWD(UHD_PWR_NO_SWITCH | UHD_OC_INDIVIDUAL),
        .bPwrOn2PwrGood = 50,
        .bHubContrCurrent = 0,
        .DeviceRemovable = {0},                /* port is removable */
};

/*
 * Simulate a hardware hub by handling all the necessary requests.
 */
usbd_status
roothub_ctrl_transfer(struct usbd_xfer *xfer)
{

        /* Pipe isn't running, start first */
        return roothub_ctrl_start(SIMPLEQ_FIRST(&xfer->ux_pipe->up_queue));
}

static usbd_status
roothub_ctrl_start(struct usbd_xfer *xfer)
{
        struct usbd_pipe *pipe = xfer->ux_pipe;
        struct usbd_bus *bus = pipe->up_dev->ud_bus;
        usb_device_request_t *req;
        usbd_status err = USBD_IOERROR;                /* XXX STALL? */
        uint16_t len, value;
        int buflen, actlen = -1;
        void *buf;

        USBHIST_FUNC();

        /*
         * XXX Should really assert pipe lock, in case ever have
         * per-pipe locking instead of using the bus lock for all
         * pipes.
         */
        KASSERT(bus->ub_usepolling || mutex_owned(bus->ub_lock));

        /* Roothub xfers are serialized through the pipe.  */
        KASSERTMSG(bus->ub_rhxfer == NULL, "rhxfer=%p", bus->ub_rhxfer);

        KASSERT(xfer->ux_rqflags & URQ_REQUEST);
        req = &xfer->ux_request;

        len = UGETW(req->wLength);
        value = UGETW(req->wValue);

        USBHIST_CALLARGS(usbdebug, "type=%#jx request=%#jx len=%#jx value=%#jx",
            req->bmRequestType, req->bRequest, len, value);

        buf = len ? usbd_get_buffer(xfer) : NULL;
        buflen = 0;

#define C(x,y) ((x) | ((y) << 8))
        switch (C(req->bRequest, req->bmRequestType)) {
        case C(UR_CLEAR_FEATURE, UT_WRITE_DEVICE):
        case C(UR_CLEAR_FEATURE, UT_WRITE_INTERFACE):
        case C(UR_CLEAR_FEATURE, UT_WRITE_ENDPOINT):
                /*
                 * DEVICE_REMOTE_WAKEUP and ENDPOINT_HALT are no-ops
                 * for the integrated root hub.
                 */
                break;
        case C(UR_GET_CONFIG, UT_READ_DEVICE):
                if (len > 0) {
                        uint8_t *out = buf;

                        *out = bus->ub_rhconf;
                        buflen = sizeof(*out);
                }
                break;
        case C(UR_GET_DESCRIPTOR, UT_READ_DEVICE):
                USBHIST_LOG(usbdebug, "wValue=%#4jx", value, 0, 0, 0);

                if (len == 0)
                        break;
                switch (value) {
                case C(0, UDESC_DEVICE):
                        if (bus->ub_revision >= USBREV_3_0) {
                                buflen = uimin(len, sizeof(usbroothub_devd3));
                                memcpy(buf, &usbroothub_devd3, buflen);
                        } else if (bus->ub_revision == USBREV_2_0) {
                                buflen = uimin(len, sizeof(usbroothub_devd2));
                                memcpy(buf, &usbroothub_devd2, buflen);
                        } else {
                                buflen = uimin(len, sizeof(usbroothub_devd1));
                                memcpy(buf, &usbroothub_devd1, buflen);
                        }
                        break;
                case C(0, UDESC_CONFIG):
                        if (bus->ub_revision >= USBREV_3_0) {
                                buflen = uimin(len, sizeof(usbroothub_confd3));
                                memcpy(buf, &usbroothub_confd3, buflen);
                        } else if (bus->ub_revision == USBREV_2_0) {
                                buflen = uimin(len, sizeof(usbroothub_confd2));
                                memcpy(buf, &usbroothub_confd2, buflen);
                        } else {
                                buflen = uimin(len, sizeof(usbroothub_confd1));
                                memcpy(buf, &usbroothub_confd1, buflen);
                        }
                        break;
                case C(0, UDESC_DEVICE_QUALIFIER):
                        if (bus->ub_revision == USBREV_2_0) {
                                /*
                                 * We can't really operate at another speed,
                                 * but the spec says we need this descriptor.
                                 */
                                buflen = uimin(len, sizeof(usbroothub_odevd2));
                                memcpy(buf, &usbroothub_odevd2, buflen);
                        } else
                                goto fail;
                        break;
                case C(0, UDESC_OTHER_SPEED_CONFIGURATION):
                        if (bus->ub_revision == USBREV_2_0) {
                                struct usb_roothub_descriptors confd;

                                /*
                                 * We can't really operate at another speed,
                                 * but the spec says we need this descriptor.
                                 */
                                buflen = uimin(len, sizeof(usbroothub_confd2));
                                memcpy(&confd, &usbroothub_confd2, buflen);
                                confd.urh_confd.bDescriptorType =
                                    UDESC_OTHER_SPEED_CONFIGURATION;
                                memcpy(buf, &confd, buflen);
                        } else
                                goto fail;
                        break;
                case C(0, UDESC_BOS):
                        if (bus->ub_revision >= USBREV_3_0) {
                                buflen = uimin(len, sizeof(usbroothub_bosd3));
                                memcpy(buf, &usbroothub_bosd3, buflen);
                        } else
                                goto fail;
                        break;
#define sd ((usb_string_descriptor_t *)buf)
                case C(0, UDESC_STRING):
                        /* Language table */
                        buflen = usb_makelangtbl(sd, len);
                        break;
                case C(1, UDESC_STRING):
                        /* Vendor */
                        buflen = usb_makestrdesc(sd, len, ostype);
                        break;
                case C(2, UDESC_STRING):
                        /* Product */
                        buflen = usb_makestrdesc(sd, len, "Root hub");
                        break;
#undef sd
                default:
                        /* Default to error */
                        buflen = -1;
                }
                break;
        case C(UR_GET_DESCRIPTOR, UT_READ_CLASS_DEVICE):
                buflen = uimin(len, sizeof(usbroothub_hubd));
                memcpy(buf, &usbroothub_hubd, buflen);
                break;
        case C(UR_GET_INTERFACE, UT_READ_INTERFACE):
                /* Get Interface, 9.4.4 */
                if (len > 0) {
                        uint8_t *out = buf;

                        *out = 0;
                        buflen = sizeof(*out);
                }
                break;
        case C(UR_GET_STATUS, UT_READ_DEVICE):
                /* Get Status from device, 9.4.5 */
                if (len > 1) {
                        usb_status_t *out = buf;

                        USETW(out->wStatus, UDS_SELF_POWERED);
                        buflen = sizeof(*out);
                }
                break;
        case C(UR_GET_STATUS, UT_READ_INTERFACE):
        case C(UR_GET_STATUS, UT_READ_ENDPOINT):
                /* Get Status from interface, endpoint, 9.4.5 */
                if (len > 1) {
                        usb_status_t *out = buf;

                        USETW(out->wStatus, 0);
                        buflen = sizeof(*out);
                }
                break;
        case C(UR_SET_ADDRESS, UT_WRITE_DEVICE):
                /* Set Address, 9.4.6 */
                USBHIST_LOG(usbdebug, "UR_SET_ADDRESS, UT_WRITE_DEVICE: "
                    "addr %jd", value, 0, 0, 0);
                if (value >= USB_MAX_DEVICES) {
                        goto fail;
                }
                bus->ub_rhaddr = value;
                break;
        case C(UR_SET_CONFIG, UT_WRITE_DEVICE):
                /* Set Configuration, 9.4.7 */
                if (value != 0 && value != 1) {
                        goto fail;
                }
                bus->ub_rhconf = value;
                break;
        case C(UR_SET_DESCRIPTOR, UT_WRITE_DEVICE):
                /* Set Descriptor, 9.4.8, not supported */
                break;
        case C(UR_SET_FEATURE, UT_WRITE_DEVICE):
        case C(UR_SET_FEATURE, UT_WRITE_INTERFACE):
        case C(UR_SET_FEATURE, UT_WRITE_ENDPOINT):
                /* Set Feature, 9.4.9, not supported */
                goto fail;
        case C(UR_SET_INTERFACE, UT_WRITE_INTERFACE):
                /* Set Interface, 9.4.10, not supported */
                break;
        case C(UR_SYNCH_FRAME, UT_WRITE_ENDPOINT):
                /* Synch Frame, 9.4.11, not supported */
                break;
        default:
                /* Default to error */
                buflen = -1;
                break;
        }

        KASSERTMSG(bus->ub_rhxfer == NULL, "rhxfer=%p", bus->ub_rhxfer);
        bus->ub_rhxfer = xfer;
        if (!bus->ub_usepolling)
                mutex_exit(bus->ub_lock);

        actlen = bus->ub_methods->ubm_rhctrl(bus, req, buf, buflen);

        if (!bus->ub_usepolling)
                mutex_enter(bus->ub_lock);
        KASSERTMSG(bus->ub_rhxfer == xfer, "rhxfer=%p", bus->ub_rhxfer);
        bus->ub_rhxfer = NULL;
        cv_signal(&bus->ub_rhxfercv);

        if (actlen < 0)
                goto fail;

        xfer->ux_actlen = actlen;
        err = USBD_NORMAL_COMPLETION;

 fail:
        USBHIST_LOG(usbdebug, "xfer %#jx buflen %jd actlen %jd err %jd",
            (uintptr_t)xfer, buflen, actlen, err);

        xfer->ux_status = err;
        usb_transfer_complete(xfer);

        return USBD_NORMAL_COMPLETION;
}

/* Abort a root control request. */
Static void
roothub_ctrl_abort(struct usbd_xfer *xfer)
{
        struct usbd_bus *bus = xfer->ux_bus;

        KASSERT(mutex_owned(bus->ub_lock));
        KASSERTMSG(bus->ub_rhxfer == xfer, "rhxfer=%p", bus->ub_rhxfer);

        /*
         * No mechanism to abort the xfer (would have to coordinate
         * with the bus's ubm_rhctrl to be useful, and usually at most
         * there's some short bounded delays of a few tens of
         * milliseconds), so just wait for it to complete.
         */
        while (bus->ub_rhxfer == xfer)
                cv_wait(&bus->ub_rhxfercv, bus->ub_lock);
}

/* Close the root pipe. */
Static void
roothub_ctrl_close(struct usbd_pipe *pipe)
{

        /* Nothing to do. */
}

Static void
roothub_ctrl_done(struct usbd_xfer *xfer)
{

        /* Nothing to do. */
}

static void
roothub_noop(struct usbd_pipe *pipe)
{

}













































































































































































    2 


    1 









    3 











    1 








    1 






















































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
/*        $NetBSD: ntfs_vfsops.c,v 1.110 2020/04/13 19:23:18 ad Exp $        */

/*-
 * Copyright (c) 1998, 1999 Semen Ustimenko
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        Id: ntfs_vfsops.c,v 1.7 1999/05/31 11:28:30 phk Exp
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ntfs_vfsops.c,v 1.110 2020/04/13 19:23:18 ad Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/buf.h>
#include <sys/fcntl.h>
#include <sys/malloc.h>
#include <sys/sysctl.h>
#include <sys/device.h>
#include <sys/conf.h>
#include <sys/kauth.h>
#include <sys/module.h>

#include <uvm/uvm_extern.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>

#include <fs/ntfs/ntfs.h>
#include <fs/ntfs/ntfs_inode.h>
#include <fs/ntfs/ntfs_subr.h>
#include <fs/ntfs/ntfs_vfsops.h>
#include <fs/ntfs/ntfs_ihash.h>
#include <fs/ntfs/ntfsmount.h>

MODULE(MODULE_CLASS_VFS, ntfs, NULL);

MALLOC_JUSTDEFINE(M_NTFSMNT, "NTFS mount", "NTFS mount structure");
MALLOC_JUSTDEFINE(M_NTFSNTNODE,"NTFS ntnode",  "NTFS ntnode information");
MALLOC_JUSTDEFINE(M_NTFSDIR,"NTFS dir",  "NTFS dir buffer");

static int        ntfs_superblock_validate(struct ntfsmount *);
static int        ntfs_mount(struct mount *, const char *, void *, size_t *);
static int        ntfs_root(struct mount *, int, struct vnode **);
static int        ntfs_start(struct mount *, int);
static int        ntfs_statvfs(struct mount *, struct statvfs *);
static int        ntfs_sync(struct mount *, int, kauth_cred_t);
static int        ntfs_unmount(struct mount *, int);
static int        ntfs_vget(struct mount *mp, ino_t ino, int,
                               struct vnode **vpp);
static int        ntfs_loadvnode(struct mount *, struct vnode *,
                                    const void *, size_t, const void **);
static int        ntfs_mountfs(struct vnode *, struct mount *,
                                  struct ntfs_args *, struct lwp *);
static int        ntfs_vptofh(struct vnode *, struct fid *, size_t *);

static void        ntfs_init(void);
static void        ntfs_reinit(void);
static void        ntfs_done(void);
static int        ntfs_fhtovp(struct mount *, struct fid *, int,
                                struct vnode **);
static int        ntfs_mountroot(void);

static const struct genfs_ops ntfs_genfsops = {
        .gop_write = genfs_compat_gop_write,
};

static struct sysctllog *ntfs_sysctl_log;

static int
ntfs_mountroot(void)
{
        struct mount *mp;
        struct lwp *l = curlwp;        /* XXX */
        int error;
        struct ntfs_args args;

        if (device_class(root_device) != DV_DISK)
                return (ENODEV);

        if ((error = vfs_rootmountalloc(MOUNT_NTFS, "root_device", &mp))) {
                vrele(rootvp);
                return (error);
        }

        args.flag = 0;
        args.uid = 0;
        args.gid = 0;
        args.mode = 0777;

        if ((error = ntfs_mountfs(rootvp, mp, &args, l)) != 0) {
                vfs_unbusy(mp);
                vfs_rele(mp);
                return (error);
        }

        mountlist_append(mp);
        (void)ntfs_statvfs(mp, &mp->mnt_stat);
        vfs_unbusy(mp);
        return (0);
}

static void
ntfs_init(void)
{

        malloc_type_attach(M_NTFSMNT);
        malloc_type_attach(M_NTFSNTNODE);
        malloc_type_attach(M_NTFSDIR);
        malloc_type_attach(M_NTFSNTVATTR);
        malloc_type_attach(M_NTFSRDATA);
        malloc_type_attach(M_NTFSDECOMP);
        malloc_type_attach(M_NTFSRUN);
        ntfs_nthashinit();
        ntfs_toupper_init();
}

static void
ntfs_reinit(void)
{
        ntfs_nthashreinit();
}

static void
ntfs_done(void)
{
        ntfs_nthashdone();
        malloc_type_detach(M_NTFSMNT);
        malloc_type_detach(M_NTFSNTNODE);
        malloc_type_detach(M_NTFSDIR);
        malloc_type_detach(M_NTFSNTVATTR);
        malloc_type_detach(M_NTFSRDATA);
        malloc_type_detach(M_NTFSDECOMP);
        malloc_type_detach(M_NTFSRUN);
}

static int
ntfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct lwp *l = curlwp;
        int                err = 0, flags;
        struct vnode        *devvp;
        struct ntfs_args *args = data;

        if (args == NULL)
                return EINVAL;
        if (*data_len < sizeof *args)
                return EINVAL;

        if (mp->mnt_flag & MNT_GETARGS) {
                struct ntfsmount *ntmp = VFSTONTFS(mp);
                if (ntmp == NULL)
                        return EIO;
                args->fspec = NULL;
                args->uid = ntmp->ntm_uid;
                args->gid = ntmp->ntm_gid;
                args->mode = ntmp->ntm_mode;
                args->flag = ntmp->ntm_flag;
                *data_len = sizeof *args;
                return 0;
        }
        /*
         ***
         * Mounting non-root file system or updating a file system
         ***
         */

        /*
         * If updating, check whether changing from read-only to
         * read/write; if there is no device name, that's all we do.
         */
        if (mp->mnt_flag & MNT_UPDATE) {
                printf("ntfs_mount(): MNT_UPDATE not supported\n");
                return (EINVAL);
        }

        /*
         * Not an update, or updating the name: look up the name
         * and verify that it refers to a sensible block device.
         */
        err = namei_simple_user(args->fspec,
                                NSM_FOLLOW_NOEMULROOT, &devvp);
        if (err)
                return (err);

        if (devvp->v_type != VBLK) {
                err = ENOTBLK;
                goto fail;
        }
        if (bdevsw_lookup(devvp->v_rdev) == NULL) {
                err = ENXIO;
                goto fail;
        }
        if (mp->mnt_flag & MNT_UPDATE) {
#if 0
                /*
                 ********************
                 * UPDATE
                 ********************
                 */

                if (devvp != ntmp->um_devvp) {
                        err = EINVAL;        /* needs translation */
                        goto fail;
                }

                /*
                 * Update device name only on success
                 */
                err = set_statvfs_info(NULL, UIO_USERSPACE, args->fspec,
                    UIO_USERSPACE, mp->mnt_op->vfs_name, mp, p);
                if (err)
                        goto fail;

                vrele(devvp);
#endif
        } else {
                /*
                 ********************
                 * NEW MOUNT
                 ********************
                 */

                /*
                 * Since this is a new mount, we want the names for
                 * the device and the mount point copied in.  If an
                 * error occurs,  the mountpoint is discarded by the
                 * upper level code.
                 */

                /* Save "last mounted on" info for mount point (NULL pad)*/
                err = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
                    UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
                if (err)
                        goto fail;

                if (mp->mnt_flag & MNT_RDONLY)
                        flags = FREAD;
                else
                        flags = FREAD|FWRITE;
                vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
                err = VOP_OPEN(devvp, flags, FSCRED);
                VOP_UNLOCK(devvp);
                if (err)
                        goto fail;
                err = ntfs_mountfs(devvp, mp, args, l);
                if (err) {
                        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
                        (void)VOP_CLOSE(devvp, flags, NOCRED);
                        VOP_UNLOCK(devvp);
                        goto fail;
                }
        }

        /*
         * Initialize FS stat information in mount struct; uses both
         * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
         *
         * This code is common to root and non-root mounts
         */
        (void)VFS_STATVFS(mp, &mp->mnt_stat);
        return (err);

fail:
        vrele(devvp);
        return (err);
}

static int
ntfs_superblock_validate(struct ntfsmount *ntmp)
{
        /* Sanity checks. XXX: More checks are probably needed. */
        if (strncmp(ntmp->ntm_bootfile.bf_sysid, NTFS_BBID, NTFS_BBIDLEN)) {
                dprintf(("ntfs_superblock_validate: invalid boot block\n"));
                return EINVAL;
        }
        if (ntmp->ntm_bps == 0) {
                dprintf(("ntfs_superblock_validate: invalid bytes per sector\n"));
                return EINVAL;
        }
        if (ntmp->ntm_spc == 0) {
                dprintf(("ntfs_superblock_validate: invalid sectors per cluster\n"));
                return EINVAL;
        }
        return 0;
}

/*
 * Common code for mount and mountroot
 */
int
ntfs_mountfs(struct vnode *devvp, struct mount *mp, struct ntfs_args *argsp, struct lwp *l)
{
        struct buf *bp;
        struct ntfsmount *ntmp;
        dev_t dev = devvp->v_rdev;
        int error, i;
        struct vnode *vp;
        struct vnode_iterator *marker;

        ntmp = NULL;

        /*
         * Flush out any old buffers remaining from a previous use.
         */
        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
        error = vinvalbuf(devvp, V_SAVE, l->l_cred, l, 0, 0);
        VOP_UNLOCK(devvp);
        if (error)
                return (error);

        bp = NULL;

        error = bread(devvp, BBLOCK, BBSIZE, 0, &bp);
        if (error)
                goto out;
        ntmp = malloc(sizeof(*ntmp), M_NTFSMNT, M_WAITOK|M_ZERO);
        memcpy(&ntmp->ntm_bootfile, bp->b_data, sizeof(struct bootfile));
        brelse(bp, 0);
        bp = NULL;

        if ((error = ntfs_superblock_validate(ntmp)))
                goto out;

        {
                int8_t cpr = ntmp->ntm_mftrecsz;
                if (cpr > 0)
                        ntmp->ntm_bpmftrec = ntmp->ntm_spc * cpr;
                else
                        ntmp->ntm_bpmftrec = (1 << (-cpr)) / ntmp->ntm_bps;
        }
        dprintf(("ntfs_mountfs(): bps: %d, spc: %d, media: %x, mftrecsz: %d (%d sects)\n",
                ntmp->ntm_bps, ntmp->ntm_spc, ntmp->ntm_bootfile.bf_media,
                ntmp->ntm_mftrecsz, ntmp->ntm_bpmftrec));
        dprintf(("ntfs_mountfs(): mftcn: 0x%x|0x%x\n",
                (u_int32_t)ntmp->ntm_mftcn, (u_int32_t)ntmp->ntm_mftmirrcn));

        ntmp->ntm_mountp = mp;
        ntmp->ntm_dev = dev;
        ntmp->ntm_devvp = devvp;
        ntmp->ntm_uid = argsp->uid;
        ntmp->ntm_gid = argsp->gid;
        ntmp->ntm_mode = argsp->mode;
        ntmp->ntm_flag = argsp->flag;
        mp->mnt_data = ntmp;

        /* set file name encode/decode hooks XXX utf-8 only for now */
        ntmp->ntm_wget = ntfs_utf8_wget;
        ntmp->ntm_wput = ntfs_utf8_wput;
        ntmp->ntm_wcmp = ntfs_utf8_wcmp;

        dprintf(("ntfs_mountfs(): case-%s,%s uid: %d, gid: %d, mode: %o\n",
                (ntmp->ntm_flag & NTFS_MFLAG_CASEINS)?"insens.":"sens.",
                (ntmp->ntm_flag & NTFS_MFLAG_ALLNAMES)?" allnames,":"",
                ntmp->ntm_uid, ntmp->ntm_gid, ntmp->ntm_mode));

        /*
         * We read in some system nodes to do not allow
         * reclaim them and to have everytime access to them.
         */
        {
                int pi[3] = { NTFS_MFTINO, NTFS_ROOTINO, NTFS_BITMAPINO };
                for (i = 0; i < 3; i++) {
                        error = VFS_VGET(mp, pi[i], LK_EXCLUSIVE,
                            &(ntmp->ntm_sysvn[pi[i]]));
                        if (error)
                                goto out1;
                        ntmp->ntm_sysvn[pi[i]]->v_vflag |= VV_SYSTEM;
                        vref(ntmp->ntm_sysvn[pi[i]]);
                        vput(ntmp->ntm_sysvn[pi[i]]);
                }
        }

        /* read the Unicode lowercase --> uppercase translation table,
         * if necessary */
        if ((error = ntfs_toupper_use(mp, ntmp)))
                goto out1;

        /*
         * Scan $BitMap and count free clusters
         */
        error = ntfs_calccfree(ntmp, &ntmp->ntm_cfree);
        if (error)
                goto out1;

        /*
         * Read and translate to internal format attribute
         * definition file.
         */
        {
                int num,j;
                struct attrdef ad;

                /* Open $AttrDef */
                error = VFS_VGET(mp, NTFS_ATTRDEFINO, LK_EXCLUSIVE, &vp);
                if (error)
                        goto out1;

                /* Count valid entries */
                for (num = 0; ; num++) {
                        error = ntfs_readattr(ntmp, VTONT(vp),
                                        NTFS_A_DATA, NULL,
                                        num * sizeof(ad), sizeof(ad),
                                        &ad, NULL);
                        if (error)
                                goto out1;
                        if (ad.ad_name[0] == 0)
                                break;
                }

                /* Alloc memory for attribute definitions */
                ntmp->ntm_ad = (struct ntvattrdef *) malloc(
                        num * sizeof(struct ntvattrdef),
                        M_NTFSMNT, M_WAITOK);

                ntmp->ntm_adnum = num;

                /* Read them and translate */
                for (i = 0; i < num; i++) {
                        error = ntfs_readattr(ntmp, VTONT(vp),
                                        NTFS_A_DATA, NULL,
                                        i * sizeof(ad), sizeof(ad),
                                        &ad, NULL);
                        if (error)
                                goto out1;
                        j = 0;
                        do {
                                ntmp->ntm_ad[i].ad_name[j] = ad.ad_name[j];
                        } while(ad.ad_name[j++]);
                        ntmp->ntm_ad[i].ad_namelen = j - 1;
                        ntmp->ntm_ad[i].ad_type = ad.ad_type;
                }

                vput(vp);
        }

        mp->mnt_stat.f_fsidx.__fsid_val[0] = dev;
        mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_NTFS);
        mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
        mp->mnt_stat.f_namemax = NTFS_MAXFILENAME;
        mp->mnt_flag |= MNT_LOCAL;
        spec_node_setmountedfs(devvp, mp);
        return (0);

out1:
        for (i = 0; i < NTFS_SYSNODESNUM; i++)
                if (ntmp->ntm_sysvn[i])
                        vrele(ntmp->ntm_sysvn[i]);

        vfs_vnode_iterator_init(mp, &marker);
        while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) {
                if (vrecycle(vp))
                        continue;
                panic("%s: cannot recycle vnode %p", __func__, vp);
        }
        vfs_vnode_iterator_destroy(marker);
out:
        spec_node_setmountedfs(devvp, NULL);
        if (bp)
                brelse(bp, 0);

        if (error) {
                if (ntmp) {
                        if (ntmp->ntm_ad)
                                free(ntmp->ntm_ad, M_NTFSMNT);
                        free(ntmp, M_NTFSMNT);
                }
        }

        return (error);
}

static int
ntfs_start(struct mount *mp, int flags)
{
        return (0);
}

static int
ntfs_unmount(struct mount *mp, int mntflags)
{
        struct lwp *l = curlwp;
        struct ntfsmount *ntmp;
        int error, ronly = 0, flags, i;

        dprintf(("ntfs_unmount: unmounting...\n"));
        ntmp = VFSTONTFS(mp);

        flags = 0;
        if (mntflags & MNT_FORCE)
                flags |= FORCECLOSE;

        dprintf(("ntfs_unmount: vflushing...\n"));
        error = vflush(mp, NULLVP, flags | SKIPSYSTEM);
        if (error) {
                dprintf(("ntfs_unmount: vflush failed: %d\n",error));
                return (error);
        }

        /* Check if only system vnodes are rest */
        for (i = 0; i < NTFS_SYSNODESNUM; i++)
                if ((ntmp->ntm_sysvn[i]) &&
                    (vrefcnt(ntmp->ntm_sysvn[i]) > 1))
                        return (EBUSY);

        /* Dereference all system vnodes */
        for (i = 0; i < NTFS_SYSNODESNUM; i++)
                if (ntmp->ntm_sysvn[i])
                        vrele(ntmp->ntm_sysvn[i]);

        /* vflush system vnodes */
        error = vflush(mp, NULLVP, flags);
        if (error) {
                panic("ntfs_unmount: vflush failed(sysnodes): %d\n",error);
        }

        /* Check if the type of device node isn't VBAD before
         * touching v_specinfo.  If the device vnode is revoked, the
         * field is NULL and touching it causes null pointer derefercence.
         */
        if (ntmp->ntm_devvp->v_type != VBAD)
                spec_node_setmountedfs(ntmp->ntm_devvp, NULL);

        error = vinvalbuf(ntmp->ntm_devvp, V_SAVE, NOCRED, l, 0, 0);
        KASSERT(error == 0);

        /* lock the device vnode before calling VOP_CLOSE() */
        vn_lock(ntmp->ntm_devvp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_CLOSE(ntmp->ntm_devvp, ronly ? FREAD : FREAD|FWRITE,
                NOCRED);
        KASSERT(error == 0);
        VOP_UNLOCK(ntmp->ntm_devvp);

        vrele(ntmp->ntm_devvp);

        /* free the toupper table, if this has been last mounted ntfs volume */
        ntfs_toupper_unuse();

        dprintf(("ntfs_umount: freeing memory...\n"));
        mp->mnt_data = NULL;
        mp->mnt_flag &= ~MNT_LOCAL;
        free(ntmp->ntm_ad, M_NTFSMNT);
        free(ntmp, M_NTFSMNT);
        return (0);
}

static int
ntfs_root(struct mount *mp, int lktype, struct vnode **vpp)
{
        struct vnode *nvp;
        int error = 0;

        dprintf(("ntfs_root(): sysvn: %p\n",
                VFSTONTFS(mp)->ntm_sysvn[NTFS_ROOTINO]));
        error = VFS_VGET(mp, (ino_t)NTFS_ROOTINO, lktype, &nvp);
        if (error) {
                printf("ntfs_root: VFS_VGET failed: %d\n", error);
                return (error);
        }

        *vpp = nvp;
        return (0);
}

int
ntfs_calccfree(struct ntfsmount *ntmp, cn_t *cfreep)
{
        struct vnode *vp;
        u_int8_t *tmp;
        int j, error;
        cn_t cfree = 0;
        size_t bmsize, i;

        vp = ntmp->ntm_sysvn[NTFS_BITMAPINO];
        bmsize = VTOF(vp)->f_size;
        tmp = (u_int8_t *) malloc(bmsize, M_TEMP, M_WAITOK);

        error = ntfs_readattr(ntmp, VTONT(vp), NTFS_A_DATA, NULL,
            0, bmsize, tmp, NULL);
        if (error)
                goto out;

        for (i = 0; i < bmsize; i++)
                for (j = 0; j < 8; j++)
                        if (~tmp[i] & (1 << j))
                                cfree++;
        *cfreep = cfree;

out:
        free(tmp, M_TEMP);
        return(error);
}

static int
ntfs_statvfs(struct mount *mp, struct statvfs *sbp)
{
        struct ntfsmount *ntmp = VFSTONTFS(mp);
        u_int64_t mftallocated;

        dprintf(("ntfs_statvfs():\n"));

        mftallocated = VTOF(ntmp->ntm_sysvn[NTFS_MFTINO])->f_allocated;

        sbp->f_bsize = ntmp->ntm_bps;
        sbp->f_frsize = sbp->f_bsize; /* XXX */
        sbp->f_iosize = ntmp->ntm_bps * ntmp->ntm_spc;
        sbp->f_blocks = ntmp->ntm_bootfile.bf_spv;
        sbp->f_bfree = sbp->f_bavail = ntfs_cntobn(ntmp->ntm_cfree);
        sbp->f_ffree = sbp->f_favail = sbp->f_bfree / ntmp->ntm_bpmftrec;
        sbp->f_files = mftallocated / ntfs_bntob(ntmp->ntm_bpmftrec) +
            sbp->f_ffree;
        sbp->f_fresvd = sbp->f_bresvd = 0; /* XXX */
        sbp->f_flag = mp->mnt_flag;
        copy_statvfs_info(sbp, mp);
        return (0);
}

static int
ntfs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
{
        /*dprintf(("ntfs_sync():\n"));*/
        return (0);
}

/*ARGSUSED*/
static int
ntfs_fhtovp(struct mount *mp, struct fid *fhp, int lktype, struct vnode **vpp)
{
        struct ntfid ntfh;
        int error;

        if (fhp->fid_len != sizeof(struct ntfid))
                return EINVAL;
        memcpy(&ntfh, fhp, sizeof(ntfh));
        ddprintf(("ntfs_fhtovp(): %s: %llu\n", mp->mnt_stat.f_mntonname,
            (unsigned long long)ntfh.ntfid_ino));

        error = ntfs_vgetex(mp, ntfh.ntfid_ino, ntfh.ntfid_attr, "",
                        lktype, vpp);
        if (error != 0) {
                *vpp = NULLVP;
                return (error);
        }

        /* XXX as unlink/rmdir/mkdir/creat are not currently possible
         * with NTFS, we don't need to check anything else for now */
        return (0);
}

static int
ntfs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
{
        struct ntnode *ntp;
        struct ntfid ntfh;
        struct fnode *fn;

        if (*fh_size < sizeof(struct ntfid)) {
                *fh_size = sizeof(struct ntfid);
                return E2BIG;
        }
        *fh_size = sizeof(struct ntfid);

        ddprintf(("ntfs_fhtovp(): %s: %p\n", vp->v_mount->mnt_stat.f_mntonname,
                vp));

        fn = VTOF(vp);
        ntp = VTONT(vp);
        memset(&ntfh, 0, sizeof(ntfh));
        ntfh.ntfid_len = sizeof(struct ntfid);
        ntfh.ntfid_ino = ntp->i_number;
        ntfh.ntfid_attr = fn->f_attrtype;
#ifdef notyet
        ntfh.ntfid_gen = ntp->i_gen;
#endif
        memcpy(fhp, &ntfh, sizeof(ntfh));
        return (0);
}

static int
ntfs_loadvnode(struct mount *mp, struct vnode *vp,
    const void *key, size_t key_len, const void **new_key)
{
        int error;
        struct ntvattr *vap;
        struct ntkey small_key, *ntkey;
        struct ntfsmount *ntmp;
        struct ntnode *ip;
        struct fnode *fp = NULL;
        enum vtype f_type = VBAD;

        if (key_len <= sizeof(small_key))
                ntkey = &small_key;
        else
                ntkey = kmem_alloc(key_len, KM_SLEEP);
        memcpy(ntkey, key, key_len);

        dprintf(("ntfs_loadvnode: ino: %llu, attr: 0x%x:%s",
            (unsigned long long)ntkey->k_ino,
            ntkey->k_attrtype, ntkey->k_attrname));

        ntmp = VFSTONTFS(mp);

        /* Get ntnode */
        error = ntfs_ntlookup(ntmp, ntkey->k_ino, &ip);
        if (error) {
                printf("ntfs_loadvnode: ntfs_ntget failed\n");
                goto out;
        }
        /* It may be not initialized fully, so force load it */
        if (!(ip->i_flag & IN_LOADED)) {
                error = ntfs_loadntnode(ntmp, ip);
                if (error) {
                        printf("ntfs_loadvnode: CAN'T LOAD ATTRIBUTES FOR INO:"
                            " %llu\n", (unsigned long long)ip->i_number);
                        ntfs_ntput(ip);
                        goto out;
                }
        }

        /* Setup fnode */
        fp = kmem_zalloc(sizeof(*fp), KM_SLEEP);
        dprintf(("%s: allocating fnode: %p\n", __func__, fp));

        error = ntfs_ntvattrget(ntmp, ip, NTFS_A_NAME, NULL, 0, &vap);
        if (error) {
                printf("%s: attr %x for ino %" PRId64 ": error %d\n",
                    __func__, NTFS_A_NAME, ip->i_number, error);
                ntfs_ntput(ip);
                goto out;
        }
        fp->f_fflag = vap->va_a_name->n_flag;
        fp->f_pnumber = vap->va_a_name->n_pnumber;
        fp->f_times = vap->va_a_name->n_times;
        ntfs_ntvattrrele(vap);

        if ((ip->i_frflag & NTFS_FRFLAG_DIR) &&
            (ntkey->k_attrtype == NTFS_A_DATA &&
            strcmp(ntkey->k_attrname, "") == 0)) {
                f_type = VDIR;
        } else {
                f_type = VREG;
                error = ntfs_ntvattrget(ntmp, ip,
                    ntkey->k_attrtype, ntkey->k_attrname, 0, &vap);
                if (error == 0) {
                        fp->f_size = vap->va_datalen;
                        fp->f_allocated = vap->va_allocated;
                        ntfs_ntvattrrele(vap);
                } else if (ntkey->k_attrtype == NTFS_A_DATA &&
                    strcmp(ntkey->k_attrname, "") == 0 &&
                    error == ENOENT) {
                        fp->f_size = 0;
                        fp->f_allocated = 0;
                        error = 0;
                } else {
                        printf("%s: attr %x for ino %" PRId64 ": error %d\n",
                            __func__, ntkey->k_attrtype, ip->i_number, error);
                        ntfs_ntput(ip);
                        goto out;
                }
        }

        if (key_len <= sizeof(fp->f_smallkey))
                fp->f_key = &fp->f_smallkey;
        else
                fp->f_key = kmem_alloc(key_len, KM_SLEEP);
        fp->f_ip = ip;
        fp->f_ino = ip->i_number;
        strcpy(fp->f_attrname, ntkey->k_attrname);
        fp->f_attrtype = ntkey->k_attrtype;
        fp->f_vp = vp;
        vp->v_data = fp;

        vp->v_tag = VT_NTFS;
        vp->v_type = f_type;
        vp->v_op = ntfs_vnodeop_p;
        ntfs_ntref(ip);
        vref(ip->i_devvp);
        genfs_node_init(vp, &ntfs_genfsops);

        if (ip->i_number == NTFS_ROOTINO)
                vp->v_vflag |= VV_ROOT;

        uvm_vnp_setsize(vp, fp->f_size);
        ntfs_ntput(ip);

        *new_key = fp->f_key;

        fp = NULL;

out:
        if (ntkey != &small_key)
                kmem_free(ntkey, key_len);
        if (fp)
                kmem_free(fp, sizeof(*fp));

        return error;
}

static int
ntfs_vget(struct mount *mp, ino_t ino, int lktype, struct vnode **vpp)
{
        return ntfs_vgetex(mp, ino, NTFS_A_DATA, "", lktype, vpp);
}

int
ntfs_vgetex(struct mount *mp, ino_t ino, u_int32_t attrtype,
    const char *attrname, u_long lkflags, struct vnode **vpp)
{
        const int attrlen = strlen(attrname);
        int error;
        struct ntkey small_key, *ntkey;

        if (NTKEY_SIZE(attrlen) <= sizeof(small_key))
                ntkey = &small_key;
        else
                ntkey = malloc(NTKEY_SIZE(attrlen), M_TEMP, M_WAITOK);
        ntkey->k_ino = ino;
        ntkey->k_attrtype = attrtype;
        strcpy(ntkey->k_attrname, attrname);

        error = vcache_get(mp, ntkey, NTKEY_SIZE(attrlen), vpp);
        if (error)
                goto out;

        if ((lkflags & (LK_SHARED | LK_EXCLUSIVE)) != 0) {
                error = vn_lock(*vpp, lkflags);
                if (error) {
                        vrele(*vpp);
                        *vpp = NULL;
                }
        }

out:
        if (ntkey != &small_key)
                free(ntkey, M_TEMP);
        return error;
}

extern const struct vnodeopv_desc ntfs_vnodeop_opv_desc;

const struct vnodeopv_desc * const ntfs_vnodeopv_descs[] = {
        &ntfs_vnodeop_opv_desc,
        NULL,
};

struct vfsops ntfs_vfsops = {
        .vfs_name = MOUNT_NTFS,
        .vfs_min_mount_data = sizeof (struct ntfs_args),
        .vfs_mount = ntfs_mount,
        .vfs_start = ntfs_start,
        .vfs_unmount = ntfs_unmount,
        .vfs_root = ntfs_root,
        .vfs_quotactl = (void *)eopnotsupp,
        .vfs_statvfs = ntfs_statvfs,
        .vfs_sync = ntfs_sync,
        .vfs_vget = ntfs_vget,
        .vfs_loadvnode = ntfs_loadvnode,
        .vfs_fhtovp = ntfs_fhtovp,
        .vfs_vptofh = ntfs_vptofh,
        .vfs_init = ntfs_init,
        .vfs_reinit = ntfs_reinit,
        .vfs_done = ntfs_done,
        .vfs_mountroot = ntfs_mountroot,
        .vfs_snapshot = (void *)eopnotsupp,
        .vfs_extattrctl = vfs_stdextattrctl,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_renamelock_enter = genfs_renamelock_enter,
        .vfs_renamelock_exit = genfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = ntfs_vnodeopv_descs
};

static int
ntfs_modcmd(modcmd_t cmd, void *arg)
{
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = vfs_attach(&ntfs_vfsops);
                if (error != 0)
                        break;
                sysctl_createv(&ntfs_sysctl_log, 0, NULL, NULL,
                               CTLFLAG_PERMANENT,
                               CTLTYPE_NODE, "ntfs",
                               SYSCTL_DESCR("NTFS file system"),
                               NULL, 0, NULL, 0,
                               CTL_VFS, 20, CTL_EOL);
                /*
                 * XXX the "20" above could be dynamic, thereby eliminating
                 * one more instance of the "number to vfs" mapping problem,
                 * but "20" is the order as taken from sys/mount.h
                 */
                break;
        case MODULE_CMD_FINI:
                error = vfs_detach(&ntfs_vfsops);
                if (error != 0)
                        break;
                sysctl_teardown(&ntfs_sysctl_log);
                break;
        default:
                error = ENOTTY;
                break;
        }

        return (error);
}

























































































































































































































































































  415 



  204 





















    6 


    6 














  119 

























  415 







   78 



































































































































































































































    7 
    7 







    1 
    1 






    1 






    1 




















    3 


    3 








    2 





    1 


    2 
    2 









    5 

    5 
    5 






















   53 















   53 


   53 
   50 
   14 









   50 
   50 














   42 
   53 




















   21 















   21 

   21 
   18 








   18 

   18 









   18 

   21 


























































  290 


  282 








  275 
    6 

   15 
    6 








    5 







  286 


  268 







  270 


  286 





  147 





  133 












   15 

   15 





   15 
















    1 


    1 









    1 



















    1 
    1 
    1 



    1 
    1 


    1 

    1 




    1 




















    1 









    1 

    1 









    1 







   78 















   78 













  132 


  132 











  132 




  132 

  132 

  132 


  132 






  132 

  132 






  132 


  132 










   78 
   78 

   78 


   78 
   78 










  124 
  123 
  124 

















  124 



  124 
   70 
   70 



   57 

   56 

  123 









   77 

   77 


   76 


   77 




   77 

















   13 
   13 













   63 

   63 











   64 







   13 























   15 
   15 






    7 



   14 

    6 





    5 


    5 


    5 





    9 







   12 







   12 



    5 



    7 



    4 





   11 








    7 













    7 







    7 


    7 








    8 






    8 


    2 
    8 
    8 







    1 

   14 

   14 

   15 
    9 





   15 











   13 


   13 

   13 







   13 














    1 

    1 



    1 
    1 

    1 











    1 













    2 

    2 
    2 




    2 








    1 





    1 




    1 



















    8 








    8 
    8 

    8 



    2 
    2 








    8 
    2 
    2 
    2 
    2 
    2 


    1 
    1 


















    2 

    2 
    2 










































































































































    2 





    2 
    2 



    2 


    2 
    2 

    2 













    5 



    5 


    5 

















   51 
























   48 
   49 
   49 
   49 
   49 



   49 
   29 





   49 




   29 


   49 



   48 
   48 

   28 































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
/*        $NetBSD: kern_proc.c,v 1.268 2022/07/01 01:06:40 riastradh Exp $        */

/*-
 * Copyright (c) 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_proc.c        8.7 (Berkeley) 2/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.268 2022/07/01 01:06:40 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_kstack.h"
#include "opt_maxuprc.h"
#include "opt_dtrace.h"
#include "opt_compat_netbsd32.h"
#include "opt_kaslr.h"
#endif

#if defined(__HAVE_COMPAT_NETBSD32) && !defined(COMPAT_NETBSD32) \
    && !defined(_RUMPKERNEL)
#define COMPAT_NETBSD32
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/buf.h>
#include <sys/acct.h>
#include <sys/wait.h>
#include <sys/file.h>
#include <ufs/ufs/quota.h>
#include <sys/uio.h>
#include <sys/pool.h>
#include <sys/pset.h>
#include <sys/ioctl.h>
#include <sys/tty.h>
#include <sys/signalvar.h>
#include <sys/ras.h>
#include <sys/filedesc.h>
#include <sys/syscall_stats.h>
#include <sys/kauth.h>
#include <sys/sleepq.h>
#include <sys/atomic.h>
#include <sys/kmem.h>
#include <sys/namei.h>
#include <sys/dtrace_bsd.h>
#include <sys/sysctl.h>
#include <sys/exec.h>
#include <sys/cpu.h>
#include <sys/compat_stub.h>
#include <sys/futex.h>
#include <sys/pserialize.h>

#include <uvm/uvm_extern.h>

/*
 * Process lists.
 */

struct proclist                allproc                __cacheline_aligned;
struct proclist                zombproc        __cacheline_aligned;

 kmutex_t                proc_lock        __cacheline_aligned;
static pserialize_t        proc_psz;

/*
 * pid to lwp/proc lookup is done by indexing the pid_table array.
 * Since pid numbers are only allocated when an empty slot
 * has been found, there is no need to search any lists ever.
 * (an orphaned pgrp will lock the slot, a session will lock
 * the pgrp with the same number.)
 * If the table is too small it is reallocated with twice the
 * previous size and the entries 'unzipped' into the two halves.
 * A linked list of free entries is passed through the pt_lwp
 * field of 'free' items - set odd to be an invalid ptr.  Two
 * additional bits are also used to indicate if the slot is
 * currently occupied by a proc or lwp, and if the PID is
 * hidden from certain kinds of lookups.  We thus require a
 * minimum alignment for proc and lwp structures (LWPs are
 * at least 32-byte aligned).
 */

struct pid_table {
        uintptr_t        pt_slot;
        struct pgrp        *pt_pgrp;
        pid_t                pt_pid;
};

#define        PT_F_FREE                ((uintptr_t)__BIT(0))
#define        PT_F_LWP                0        /* pseudo-flag */
#define        PT_F_PROC                ((uintptr_t)__BIT(1))

#define        PT_F_TYPEBITS                (PT_F_FREE|PT_F_PROC)
#define        PT_F_ALLBITS                (PT_F_FREE|PT_F_PROC)

#define        PT_VALID(s)                (((s) & PT_F_FREE) == 0)
#define        PT_RESERVED(s)                ((s) == 0)
#define        PT_NEXT(s)                ((u_int)(s) >> 1)
#define        PT_SET_FREE(pid)        (((pid) << 1) | PT_F_FREE)
#define        PT_SET_LWP(l)                ((uintptr_t)(l))
#define        PT_SET_PROC(p)                (((uintptr_t)(p)) | PT_F_PROC)
#define        PT_SET_RESERVED                0
#define        PT_GET_LWP(s)                ((struct lwp *)((s) & ~PT_F_ALLBITS))
#define        PT_GET_PROC(s)                ((struct proc *)((s) & ~PT_F_ALLBITS))
#define        PT_GET_TYPE(s)                ((s) & PT_F_TYPEBITS)
#define        PT_IS_LWP(s)                (PT_GET_TYPE(s) == PT_F_LWP && (s) != 0)
#define        PT_IS_PROC(s)                (PT_GET_TYPE(s) == PT_F_PROC)

#define        MIN_PROC_ALIGNMENT        (PT_F_ALLBITS + 1)

/*
 * Table of process IDs (PIDs).
 */
static struct pid_table *pid_table        __read_mostly;

#define        INITIAL_PID_TABLE_SIZE                (1 << 5)

/* Table mask, threshold for growing and number of allocated PIDs. */
static u_int                pid_tbl_mask        __read_mostly;
static u_int                pid_alloc_lim        __read_mostly;
static u_int                pid_alloc_cnt        __cacheline_aligned;

/* Next free, last free and maximum PIDs. */
static u_int                next_free_pt        __cacheline_aligned;
static u_int                last_free_pt        __cacheline_aligned;
static pid_t                pid_max                __read_mostly;

/* Components of the first process -- never freed. */

extern struct emul emul_netbsd;        /* defined in kern_exec.c */

struct session session0 = {
        .s_count = 1,
        .s_sid = 0,
};
struct pgrp pgrp0 = {
        .pg_members = LIST_HEAD_INITIALIZER(&pgrp0.pg_members),
        .pg_session = &session0,
};
filedesc_t filedesc0;
struct cwdinfo cwdi0 = {
        .cwdi_cmask = CMASK,
        .cwdi_refcnt = 1,
};
struct plimit limit0;
struct pstats pstat0;
struct vmspace vmspace0;
struct sigacts sigacts0;
struct proc proc0 = {
        .p_lwps = LIST_HEAD_INITIALIZER(&proc0.p_lwps),
        .p_sigwaiters = LIST_HEAD_INITIALIZER(&proc0.p_sigwaiters),
        .p_nlwps = 1,
        .p_nrlwps = 1,
        .p_pgrp = &pgrp0,
        .p_comm = "system",
        /*
         * Set P_NOCLDWAIT so that kernel threads are reparented to init(8)
         * when they exit.  init(8) can easily wait them out for us.
         */
        .p_flag = PK_SYSTEM | PK_NOCLDWAIT,
        .p_stat = SACTIVE,
        .p_nice = NZERO,
        .p_emul = &emul_netbsd,
        .p_cwdi = &cwdi0,
        .p_limit = &limit0,
        .p_fd = &filedesc0,
        .p_vmspace = &vmspace0,
        .p_stats = &pstat0,
        .p_sigacts = &sigacts0,
#ifdef PROC0_MD_INITIALIZERS
        PROC0_MD_INITIALIZERS
#endif
};
kauth_cred_t cred0;

static const int        nofile        = NOFILE;
static const int        maxuprc        = MAXUPRC;

static int sysctl_doeproc(SYSCTLFN_PROTO);
static int sysctl_kern_proc_args(SYSCTLFN_PROTO);
static int sysctl_security_expose_address(SYSCTLFN_PROTO);

#ifdef KASLR
static int kern_expose_address = 0;
#else
static int kern_expose_address = 1;
#endif
/*
 * The process list descriptors, used during pid allocation and
 * by sysctl.  No locking on this data structure is needed since
 * it is completely static.
 */
const struct proclist_desc proclists[] = {
        { &allproc        },
        { &zombproc        },
        { NULL                },
};

static struct pgrp *        pg_remove(pid_t);
static void                pg_delete(pid_t);
static void                orphanpg(struct pgrp *);

static specificdata_domain_t proc_specificdata_domain;

static pool_cache_t proc_cache;

static kauth_listener_t proc_listener;

static void fill_proc(const struct proc *, struct proc *, bool);
static int fill_pathname(struct lwp *, pid_t, void *, size_t *);
static int fill_cwd(struct lwp *, pid_t, void *, size_t *);

static int
proc_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        struct proc *p;
        int result;

        result = KAUTH_RESULT_DEFER;
        p = arg0;

        switch (action) {
        case KAUTH_PROCESS_CANSEE: {
                enum kauth_process_req req;

                req = (enum kauth_process_req)(uintptr_t)arg1;

                switch (req) {
                case KAUTH_REQ_PROCESS_CANSEE_ARGS:
                case KAUTH_REQ_PROCESS_CANSEE_ENTRY:
                case KAUTH_REQ_PROCESS_CANSEE_OPENFILES:
                case KAUTH_REQ_PROCESS_CANSEE_EPROC:
                        result = KAUTH_RESULT_ALLOW;
                        break;

                case KAUTH_REQ_PROCESS_CANSEE_ENV:
                        if (kauth_cred_getuid(cred) !=
                            kauth_cred_getuid(p->p_cred) ||
                            kauth_cred_getuid(cred) !=
                            kauth_cred_getsvuid(p->p_cred))
                                break;

                        result = KAUTH_RESULT_ALLOW;

                        break;

                case KAUTH_REQ_PROCESS_CANSEE_KPTR:
                        if (!kern_expose_address)
                                break;

                        if (kern_expose_address == 1 && !(p->p_flag & PK_KMEM))
                                break;

                        result = KAUTH_RESULT_ALLOW;

                        break;

                default:
                        break;
                }

                break;
                }

        case KAUTH_PROCESS_FORK: {
                int lnprocs = (int)(unsigned long)arg2;

                /*
                 * Don't allow a nonprivileged user to use the last few
                 * processes. The variable lnprocs is the current number of
                 * processes, maxproc is the limit.
                 */
                if (__predict_false((lnprocs >= maxproc - 5)))
                        break;

                result = KAUTH_RESULT_ALLOW;

                break;
                }

        case KAUTH_PROCESS_CORENAME:
        case KAUTH_PROCESS_STOPFLAG:
                if (proc_uidmatch(cred, p->p_cred) == 0)
                        result = KAUTH_RESULT_ALLOW;

                break;

        default:
                break;
        }

        return result;
}

static int
proc_ctor(void *arg __unused, void *obj, int flags __unused)
{
        struct proc *p = obj;

        memset(p, 0, sizeof(*p));
        klist_init(&p->p_klist);

        /*
         * There is no need for a proc_dtor() to do a klist_fini(),
         * since knote_proc_exit() ensures that p->p_klist is empty
         * when a process exits.
         */

        return 0;
}

static pid_t proc_alloc_pid_slot(struct proc *, uintptr_t);

/*
 * Initialize global process hashing structures.
 */
void
procinit(void)
{
        const struct proclist_desc *pd;
        u_int i;
#define        LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))

        for (pd = proclists; pd->pd_list != NULL; pd++)
                LIST_INIT(pd->pd_list);

        mutex_init(&proc_lock, MUTEX_DEFAULT, IPL_NONE);

        proc_psz = pserialize_create();

        pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE
            * sizeof(struct pid_table), KM_SLEEP);
        pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1;
        pid_max = PID_MAX;

        /* Set free list running through table...
           Preset 'use count' above PID_MAX so we allocate pid 1 next. */
        for (i = 0; i <= pid_tbl_mask; i++) {
                pid_table[i].pt_slot = PT_SET_FREE(LINK_EMPTY + i + 1);
                pid_table[i].pt_pgrp = 0;
                pid_table[i].pt_pid = 0;
        }
        /* slot 0 is just grabbed */
        next_free_pt = 1;
        /* Need to fix last entry. */
        last_free_pt = pid_tbl_mask;
        pid_table[last_free_pt].pt_slot = PT_SET_FREE(LINK_EMPTY);
        /* point at which we grow table - to avoid reusing pids too often */
        pid_alloc_lim = pid_tbl_mask - 1;
#undef LINK_EMPTY

        /* Reserve PID 1 for init(8). */        /* XXX slightly gross */
        mutex_enter(&proc_lock);
        if (proc_alloc_pid_slot(&proc0, PT_SET_RESERVED) != 1)
                panic("failed to reserve PID 1 for init(8)");
        mutex_exit(&proc_lock);

        proc_specificdata_domain = specificdata_domain_create();
        KASSERT(proc_specificdata_domain != NULL);

        size_t proc_alignment = coherency_unit;
        if (proc_alignment < MIN_PROC_ALIGNMENT)
                proc_alignment = MIN_PROC_ALIGNMENT;

        proc_cache = pool_cache_init(sizeof(struct proc), proc_alignment, 0, 0,
            "procpl", NULL, IPL_NONE, proc_ctor, NULL, NULL);

        proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
            proc_listener_cb, NULL);
}

void
procinit_sysctl(void)
{
        static struct sysctllog *clog;

        sysctl_createv(&clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "expose_address",
                       SYSCTL_DESCR("Enable exposing kernel addresses"),
                       sysctl_security_expose_address, 0,
                       &kern_expose_address, 0, CTL_KERN, CTL_CREATE, CTL_EOL);
        sysctl_createv(&clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "proc",
                       SYSCTL_DESCR("System-wide process information"),
                       sysctl_doeproc, 0, NULL, 0,
                       CTL_KERN, KERN_PROC, CTL_EOL);
        sysctl_createv(&clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "proc2",
                       SYSCTL_DESCR("Machine-independent process information"),
                       sysctl_doeproc, 0, NULL, 0,
                       CTL_KERN, KERN_PROC2, CTL_EOL);
        sysctl_createv(&clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "proc_args",
                       SYSCTL_DESCR("Process argument information"),
                       sysctl_kern_proc_args, 0, NULL, 0,
                       CTL_KERN, KERN_PROC_ARGS, CTL_EOL);

        /*
          "nodes" under these:

          KERN_PROC_ALL
          KERN_PROC_PID pid
          KERN_PROC_PGRP pgrp
          KERN_PROC_SESSION sess
          KERN_PROC_TTY tty
          KERN_PROC_UID uid
          KERN_PROC_RUID uid
          KERN_PROC_GID gid
          KERN_PROC_RGID gid

          all in all, probably not worth the effort...
        */
}

/*
 * Initialize process 0.
 */
void
proc0_init(void)
{
        struct proc *p;
        struct pgrp *pg;
        struct rlimit *rlim;
        rlim_t lim;
        int i;

        p = &proc0;
        pg = &pgrp0;

        mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
        mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
        p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);

        rw_init(&p->p_reflock);
        cv_init(&p->p_waitcv, "wait");
        cv_init(&p->p_lwpcv, "lwpwait");

        LIST_INSERT_HEAD(&p->p_lwps, &lwp0, l_sibling);

        KASSERT(lwp0.l_lid == 0);
        pid_table[lwp0.l_lid].pt_slot = PT_SET_LWP(&lwp0);
        LIST_INSERT_HEAD(&allproc, p, p_list);

        pid_table[lwp0.l_lid].pt_pgrp = pg;
        LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist);

#ifdef __HAVE_SYSCALL_INTERN
        (*p->p_emul->e_syscall_intern)(p);
#endif

        /* Create credentials. */
        cred0 = kauth_cred_alloc();
        p->p_cred = cred0;

        /* Create the CWD info. */
        rw_init(&cwdi0.cwdi_lock);

        /* Create the limits structures. */
        mutex_init(&limit0.pl_lock, MUTEX_DEFAULT, IPL_NONE);

        rlim = limit0.pl_rlimit;
        for (i = 0; i < __arraycount(limit0.pl_rlimit); i++) {
                rlim[i].rlim_cur = RLIM_INFINITY;
                rlim[i].rlim_max = RLIM_INFINITY;
        }

        rlim[RLIMIT_NOFILE].rlim_max = maxfiles;
        rlim[RLIMIT_NOFILE].rlim_cur = maxfiles < nofile ? maxfiles : nofile;

        rlim[RLIMIT_NPROC].rlim_max = maxproc;
        rlim[RLIMIT_NPROC].rlim_cur = maxproc < maxuprc ? maxproc : maxuprc;

        lim = MIN(VM_MAXUSER_ADDRESS, ctob((rlim_t)uvm_availmem(false)));
        rlim[RLIMIT_RSS].rlim_max = lim;
        rlim[RLIMIT_MEMLOCK].rlim_max = lim;
        rlim[RLIMIT_MEMLOCK].rlim_cur = lim / 3;

        rlim[RLIMIT_NTHR].rlim_max = maxlwp;
        rlim[RLIMIT_NTHR].rlim_cur = maxlwp / 2;

        /* Note that default core name has zero length. */
        limit0.pl_corename = defcorename;
        limit0.pl_cnlen = 0;
        limit0.pl_refcnt = 1;
        limit0.pl_writeable = false;
        limit0.pl_sv_limit = NULL;

        /* Configure virtual memory system, set vm rlimits. */
        uvm_init_limits(p);

        /* Initialize file descriptor table for proc0. */
        fd_init(&filedesc0);

        /*
         * Initialize proc0's vmspace, which uses the kernel pmap.
         * All kernel processes (which never have user space mappings)
         * share proc0's vmspace, and thus, the kernel pmap.
         */
        uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
            trunc_page(VM_MAXUSER_ADDRESS),
#ifdef __USE_TOPDOWN_VM
            true
#else
            false
#endif
            );

        /* Initialize signal state for proc0. XXX IPL_SCHED */
        mutex_init(&p->p_sigacts->sa_mutex, MUTEX_DEFAULT, IPL_SCHED);
        siginit(p);

        proc_initspecific(p);
        kdtrace_proc_ctor(NULL, p);
}

/*
 * Session reference counting.
 */

void
proc_sesshold(struct session *ss)
{

        KASSERT(mutex_owned(&proc_lock));
        ss->s_count++;
}

void
proc_sessrele(struct session *ss)
{
        struct pgrp *pg;

        KASSERT(mutex_owned(&proc_lock));
        KASSERT(ss->s_count > 0);

        /*
         * We keep the pgrp with the same id as the session in order to
         * stop a process being given the same pid.  Since the pgrp holds
         * a reference to the session, it must be a 'zombie' pgrp by now.
         */
        if (--ss->s_count == 0) {
                pg = pg_remove(ss->s_sid);
        } else {
                pg = NULL;
                ss = NULL;
        }

        mutex_exit(&proc_lock);

        if (pg)
                kmem_free(pg, sizeof(struct pgrp));
        if (ss)
                kmem_free(ss, sizeof(struct session));
}

/*
 * Check that the specified process group is in the session of the
 * specified process.
 * Treats -ve ids as process ids.
 * Used to validate TIOCSPGRP requests.
 */
int
pgid_in_session(struct proc *p, pid_t pg_id)
{
        struct pgrp *pgrp;
        struct session *session;
        int error;

        if (pg_id == INT_MIN)
                return EINVAL;

        mutex_enter(&proc_lock);
        if (pg_id < 0) {
                struct proc *p1 = proc_find(-pg_id);
                if (p1 == NULL) {
                        error = EINVAL;
                        goto fail;
                }
                pgrp = p1->p_pgrp;
        } else {
                pgrp = pgrp_find(pg_id);
                if (pgrp == NULL) {
                        error = EINVAL;
                        goto fail;
                }
        }
        session = pgrp->pg_session;
        error = (session != p->p_pgrp->pg_session) ? EPERM : 0;
fail:
        mutex_exit(&proc_lock);
        return error;
}

/*
 * p_inferior: is p an inferior of q?
 */
static inline bool
p_inferior(struct proc *p, struct proc *q)
{

        KASSERT(mutex_owned(&proc_lock));

        for (; p != q; p = p->p_pptr)
                if (p->p_pid == 0)
                        return false;
        return true;
}

/*
 * proc_find_lwp: locate an lwp in said proc by the ID.
 *
 * => Must be called with p::p_lock held.
 * => LSIDL lwps are not returned because they are only partially
 *    constructed while occupying the slot.
 * => Callers need to be careful about lwp::l_stat of the returned
 *    lwp.
 */
struct lwp *
proc_find_lwp(proc_t *p, pid_t pid)
{
        struct pid_table *pt;
        unsigned pt_mask;
        struct lwp *l = NULL;
        uintptr_t slot;
        int s;

        KASSERT(mutex_owned(p->p_lock));

        /*
         * Look in the pid_table.  This is done unlocked inside a
         * pserialize read section covering pid_table's memory
         * allocation only, so take care to read things in the correct
         * order:
         *
         * 1. First read the table mask -- this only ever increases, in
         *    expand_pid_table, so a stale value is safely
         *    conservative.
         *
         * 2. Next read the pid table -- this is always set _before_
         *    the mask increases, so if we see a new table and stale
         *    mask, the mask is still valid for the table.
         */
        s = pserialize_read_enter();
        pt_mask = atomic_load_acquire(&pid_tbl_mask);
        pt = &atomic_load_consume(&pid_table)[pid & pt_mask];
        slot = atomic_load_consume(&pt->pt_slot);
        if (__predict_false(!PT_IS_LWP(slot))) {
                pserialize_read_exit(s);
                return NULL;
        }

        /*
         * Check to see if the LWP is from the correct process.  We won't
         * see entries in pid_table from a prior process that also used "p",
         * by virtue of the fact that allocating "p" means all prior updates
         * to dependant data structures are visible to this thread.
         */
        l = PT_GET_LWP(slot);
        if (__predict_false(atomic_load_relaxed(&l->l_proc) != p)) {
                pserialize_read_exit(s);
                return NULL;
        }

        /*
         * We now know that p->p_lock holds this LWP stable.
         *
         * If the status is not LSIDL, it means the LWP is intended to be
         * findable by LID and l_lid cannot change behind us.
         *
         * No need to acquire the LWP's lock to check for LSIDL, as
         * p->p_lock must be held to transition in and out of LSIDL.
         * Any other observed state of is no particular interest.
         */
        pserialize_read_exit(s);
        return l->l_stat != LSIDL && l->l_lid == pid ? l : NULL;
}

/*
 * proc_find_lwp_unlocked: locate an lwp in said proc by the ID.
 *
 * => Called in a pserialize read section with no locks held.
 * => LSIDL lwps are not returned because they are only partially
 *    constructed while occupying the slot.
 * => Callers need to be careful about lwp::l_stat of the returned
 *    lwp.
 * => If an LWP is found, it's returned locked.
 */
struct lwp *
proc_find_lwp_unlocked(proc_t *p, pid_t pid)
{
        struct pid_table *pt;
        unsigned pt_mask;
        struct lwp *l = NULL;
        uintptr_t slot;

        KASSERT(pserialize_in_read_section());

        /*
         * Look in the pid_table.  This is done unlocked inside a
         * pserialize read section covering pid_table's memory
         * allocation only, so take care to read things in the correct
         * order:
         *
         * 1. First read the table mask -- this only ever increases, in
         *    expand_pid_table, so a stale value is safely
         *    conservative.
         *
         * 2. Next read the pid table -- this is always set _before_
         *    the mask increases, so if we see a new table and stale
         *    mask, the mask is still valid for the table.
         */
        pt_mask = atomic_load_acquire(&pid_tbl_mask);
        pt = &atomic_load_consume(&pid_table)[pid & pt_mask];
        slot = atomic_load_consume(&pt->pt_slot);
        if (__predict_false(!PT_IS_LWP(slot))) {
                return NULL;
        }

        /*
         * Lock the LWP we found to get it stable.  If it's embryonic or
         * reaped (LSIDL) then none of the other fields can safely be
         * checked.
         */
        l = PT_GET_LWP(slot);
        lwp_lock(l);
        if (__predict_false(l->l_stat == LSIDL)) {
                lwp_unlock(l);
                return NULL;
        }

        /*
         * l_proc and l_lid are now known stable because the LWP is not
         * LSIDL, so check those fields too to make sure we found the
         * right thing.
         */
        if (__predict_false(l->l_proc != p || l->l_lid != pid)) {
                lwp_unlock(l);
                return NULL;
        }

        /* Everything checks out, return it locked. */
        return l;
}

/*
 * proc_find_lwp_acquire_proc: locate an lwp and acquire a lock
 * on its containing proc.
 *
 * => Similar to proc_find_lwp(), but does not require you to have
 *    the proc a priori.
 * => Also returns proc * to caller, with p::p_lock held.
 * => Same caveats apply.
 */
struct lwp *
proc_find_lwp_acquire_proc(pid_t pid, struct proc **pp)
{
        struct pid_table *pt;
        struct proc *p = NULL;
        struct lwp *l = NULL;
        uintptr_t slot;

        KASSERT(pp != NULL);
        mutex_enter(&proc_lock);
        pt = &pid_table[pid & pid_tbl_mask];

        slot = pt->pt_slot;
        if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
                l = PT_GET_LWP(slot);
                p = l->l_proc;
                mutex_enter(p->p_lock);
                if (__predict_false(l->l_stat == LSIDL)) {
                        mutex_exit(p->p_lock);
                        l = NULL;
                        p = NULL;
                }
        }
        mutex_exit(&proc_lock);

        KASSERT(p == NULL || mutex_owned(p->p_lock));
        *pp = p;
        return l;
}

/*
 * proc_find_raw_pid_table_locked: locate a process by the ID.
 *
 * => Must be called with proc_lock held.
 */
static proc_t *
proc_find_raw_pid_table_locked(pid_t pid, bool any_lwpid)
{
        struct pid_table *pt;
        proc_t *p = NULL;
        uintptr_t slot;

        /* No - used by DDB.  KASSERT(mutex_owned(&proc_lock)); */
        pt = &pid_table[pid & pid_tbl_mask];

        slot = pt->pt_slot;
        if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
                /*
                 * When looking up processes, require a direct match
                 * on the PID assigned to the proc, not just one of
                 * its LWPs.
                 *
                 * N.B. We require lwp::l_proc of LSIDL LWPs to be
                 * valid here.
                 */
                p = PT_GET_LWP(slot)->l_proc;
                if (__predict_false(p->p_pid != pid && !any_lwpid))
                        p = NULL;
        } else if (PT_IS_PROC(slot) && pt->pt_pid == pid) {
                p = PT_GET_PROC(slot);
        }
        return p;
}

proc_t *
proc_find_raw(pid_t pid)
{

        return proc_find_raw_pid_table_locked(pid, false);
}

static proc_t *
proc_find_internal(pid_t pid, bool any_lwpid)
{
        proc_t *p;

        KASSERT(mutex_owned(&proc_lock));

        p = proc_find_raw_pid_table_locked(pid, any_lwpid);
        if (__predict_false(p == NULL)) {
                return NULL;
        }

        /*
         * Only allow live processes to be found by PID.
         * XXX: p_stat might change, since proc unlocked.
         */
        if (__predict_true(p->p_stat == SACTIVE || p->p_stat == SSTOP)) {
                return p;
        }
        return NULL;
}

proc_t *
proc_find(pid_t pid)
{
        return proc_find_internal(pid, false);
}

proc_t *
proc_find_lwpid(pid_t pid)
{
        return proc_find_internal(pid, true);
}

/*
 * pgrp_find: locate a process group by the ID.
 *
 * => Must be called with proc_lock held.
 */
struct pgrp *
pgrp_find(pid_t pgid)
{
        struct pgrp *pg;

        KASSERT(mutex_owned(&proc_lock));

        pg = pid_table[pgid & pid_tbl_mask].pt_pgrp;

        /*
         * Cannot look up a process group that only exists because the
         * session has not died yet (traditional).
         */
        if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) {
                return NULL;
        }
        return pg;
}

static void
expand_pid_table(void)
{
        size_t pt_size, tsz;
        struct pid_table *n_pt, *new_pt;
        uintptr_t slot;
        struct pgrp *pgrp;
        pid_t pid, rpid;
        u_int i;
        uint new_pt_mask;

        KASSERT(mutex_owned(&proc_lock));

        /* Unlock the pid_table briefly to allocate memory. */
        pt_size = pid_tbl_mask + 1;
        mutex_exit(&proc_lock);

        tsz = pt_size * 2 * sizeof(struct pid_table);
        new_pt = kmem_alloc(tsz, KM_SLEEP);
        new_pt_mask = pt_size * 2 - 1;

        /* XXX For now.  The pratical limit is much lower anyway. */
        KASSERT(new_pt_mask <= FUTEX_TID_MASK);

        mutex_enter(&proc_lock);
        if (pt_size != pid_tbl_mask + 1) {
                /* Another process beat us to it... */
                mutex_exit(&proc_lock);
                kmem_free(new_pt, tsz);
                goto out;
        }

        /*
         * Copy entries from old table into new one.
         * If 'pid' is 'odd' we need to place in the upper half,
         * even pid's to the lower half.
         * Free items stay in the low half so we don't have to
         * fixup the reference to them.
         * We stuff free items on the front of the freelist
         * because we can't write to unmodified entries.
         * Processing the table backwards maintains a semblance
         * of issuing pid numbers that increase with time.
         */
        i = pt_size - 1;
        n_pt = new_pt + i;
        for (; ; i--, n_pt--) {
                slot = pid_table[i].pt_slot;
                pgrp = pid_table[i].pt_pgrp;
                if (!PT_VALID(slot)) {
                        /* Up 'use count' so that link is valid */
                        pid = (PT_NEXT(slot) + pt_size) & ~pt_size;
                        rpid = 0;
                        slot = PT_SET_FREE(pid);
                        if (pgrp)
                                pid = pgrp->pg_id;
                } else {
                        pid = pid_table[i].pt_pid;
                        rpid = pid;
                }

                /* Save entry in appropriate half of table */
                n_pt[pid & pt_size].pt_slot = slot;
                n_pt[pid & pt_size].pt_pgrp = pgrp;
                n_pt[pid & pt_size].pt_pid = rpid;

                /* Put other piece on start of free list */
                pid = (pid ^ pt_size) & ~pid_tbl_mask;
                n_pt[pid & pt_size].pt_slot =
                        PT_SET_FREE((pid & ~pt_size) | next_free_pt);
                n_pt[pid & pt_size].pt_pgrp = 0;
                n_pt[pid & pt_size].pt_pid = 0;

                next_free_pt = i | (pid & pt_size);
                if (i == 0)
                        break;
        }

        /* Save old table size and switch tables */
        tsz = pt_size * sizeof(struct pid_table);
        n_pt = pid_table;
        atomic_store_release(&pid_table, new_pt);
        KASSERT(new_pt_mask >= pid_tbl_mask);
        atomic_store_release(&pid_tbl_mask, new_pt_mask);

        /*
         * pid_max starts as PID_MAX (= 30000), once we have 16384
         * allocated pids we need it to be larger!
         */
        if (pid_tbl_mask > PID_MAX) {
                pid_max = pid_tbl_mask * 2 + 1;
                pid_alloc_lim |= pid_alloc_lim << 1;
        } else
                pid_alloc_lim <<= 1;        /* doubles number of free slots... */

        mutex_exit(&proc_lock);

        /*
         * Make sure that unlocked access to the old pid_table is complete
         * and then free it.
         */
        pserialize_perform(proc_psz);
        kmem_free(n_pt, tsz);

 out:        /* Return with proc_lock held again. */
        mutex_enter(&proc_lock);
}

struct proc *
proc_alloc(void)
{
        struct proc *p;

        p = pool_cache_get(proc_cache, PR_WAITOK);
        p->p_stat = SIDL;                        /* protect against others */
        proc_initspecific(p);
        kdtrace_proc_ctor(NULL, p);

        /*
         * Allocate a placeholder in the pid_table.  When we create the
         * first LWP for this process, it will take ownership of the
         * slot.
         */
        if (__predict_false(proc_alloc_pid(p) == -1)) {
                /* Allocating the PID failed; unwind. */
                proc_finispecific(p);
                proc_free_mem(p);
                p = NULL;
        }
        return p;
}

/*
 * proc_alloc_pid_slot: allocate PID and record the occcupant so that
 * proc_find_raw() can find it by the PID.
 */
static pid_t __noinline
proc_alloc_pid_slot(struct proc *p, uintptr_t slot)
{
        struct pid_table *pt;
        pid_t pid;
        int nxt;

        KASSERT(mutex_owned(&proc_lock));

        for (;;expand_pid_table()) {
                if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) {
                        /* ensure pids cycle through 2000+ values */
                        continue;
                }
                /*
                 * The first user process *must* be given PID 1.
                 * it has already been reserved for us.  This
                 * will be coming in from the proc_alloc() call
                 * above, and the entry will be usurped later when
                 * the first user LWP is created.
                 * XXX this is slightly gross.
                 */
                if (__predict_false(PT_RESERVED(pid_table[1].pt_slot) &&
                                    p != &proc0)) {
                        KASSERT(PT_IS_PROC(slot));
                        pt = &pid_table[1];
                        pt->pt_slot = slot;
                        return 1;
                }
                pt = &pid_table[next_free_pt];
#ifdef DIAGNOSTIC
                if (__predict_false(PT_VALID(pt->pt_slot) || pt->pt_pgrp))
                        panic("proc_alloc: slot busy");
#endif
                nxt = PT_NEXT(pt->pt_slot);
                if (nxt & pid_tbl_mask)
                        break;
                /* Table full - expand (NB last entry not used....) */
        }

        /* pid is 'saved use count' + 'size' + entry */
        pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt;
        if ((uint)pid > (uint)pid_max)
                pid &= pid_tbl_mask;
        next_free_pt = nxt & pid_tbl_mask;

        /* XXX For now.  The pratical limit is much lower anyway. */
        KASSERT(pid <= FUTEX_TID_MASK);

        /* Grab table slot */
        pt->pt_slot = slot;

        KASSERT(pt->pt_pid == 0);
        pt->pt_pid = pid;
        pid_alloc_cnt++;

        return pid;
}

pid_t
proc_alloc_pid(struct proc *p)
{
        pid_t pid;

        KASSERT((((uintptr_t)p) & PT_F_ALLBITS) == 0);
        KASSERT(p->p_stat == SIDL);

        mutex_enter(&proc_lock);
        pid = proc_alloc_pid_slot(p, PT_SET_PROC(p));
        if (pid != -1)
                p->p_pid = pid;
        mutex_exit(&proc_lock);

        return pid;
}

pid_t
proc_alloc_lwpid(struct proc *p, struct lwp *l)
{
        struct pid_table *pt;
        pid_t pid;

        KASSERT((((uintptr_t)l) & PT_F_ALLBITS) == 0);
        KASSERT(l->l_proc == p);
        KASSERT(l->l_stat == LSIDL);

        /*
         * For unlocked lookup in proc_find_lwp(), make sure l->l_proc
         * is globally visible before the LWP becomes visible via the
         * pid_table.
         */
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_producer();
#endif

        /*
         * If the slot for p->p_pid currently points to the proc,
         * then we should usurp this ID for the LWP.  This happens
         * at least once per process (for the first LWP), and can
         * happen again if the first LWP for a process exits and
         * before the process creates another.
         */
        mutex_enter(&proc_lock);
        pid = p->p_pid;
        pt = &pid_table[pid & pid_tbl_mask];
        KASSERT(pt->pt_pid == pid);
        if (PT_IS_PROC(pt->pt_slot)) {
                KASSERT(PT_GET_PROC(pt->pt_slot) == p);
                l->l_lid = pid;
                pt->pt_slot = PT_SET_LWP(l);
        } else {
                /* Need to allocate a new slot. */
                pid = proc_alloc_pid_slot(p, PT_SET_LWP(l));
                if (pid != -1)
                        l->l_lid = pid;
        }
        mutex_exit(&proc_lock);

        return pid;
}

static void __noinline
proc_free_pid_internal(pid_t pid, uintptr_t type __diagused)
{
        struct pid_table *pt;

        KASSERT(mutex_owned(&proc_lock));

        pt = &pid_table[pid & pid_tbl_mask];

        KASSERT(PT_GET_TYPE(pt->pt_slot) == type);
        KASSERT(pt->pt_pid == pid);

        /* save pid use count in slot */
        pt->pt_slot = PT_SET_FREE(pid & ~pid_tbl_mask);
        pt->pt_pid = 0;

        if (pt->pt_pgrp == NULL) {
                /* link last freed entry onto ours */
                pid &= pid_tbl_mask;
                pt = &pid_table[last_free_pt];
                pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pid);
                pt->pt_pid = 0;
                last_free_pt = pid;
                pid_alloc_cnt--;
        }
}

/*
 * Free a process id - called from proc_free (in kern_exit.c)
 *
 * Called with the proc_lock held.
 */
void
proc_free_pid(pid_t pid)
{

        KASSERT(mutex_owned(&proc_lock));
        proc_free_pid_internal(pid, PT_F_PROC);
}

/*
 * Free a process id used by an LWP.  If this was the process's
 * first LWP, we convert the slot to point to the process; the
 * entry will get cleaned up later when the process finishes exiting.
 *
 * If not, then it's the same as proc_free_pid().
 */
void
proc_free_lwpid(struct proc *p, pid_t pid)
{

        KASSERT(mutex_owned(&proc_lock));

        if (__predict_true(p->p_pid == pid)) {
                struct pid_table *pt;

                pt = &pid_table[pid & pid_tbl_mask];

                KASSERT(pt->pt_pid == pid);
                KASSERT(PT_IS_LWP(pt->pt_slot));
                KASSERT(PT_GET_LWP(pt->pt_slot)->l_proc == p);

                pt->pt_slot = PT_SET_PROC(p);
                return;
        }
        proc_free_pid_internal(pid, PT_F_LWP);
}

void
proc_free_mem(struct proc *p)
{

        kdtrace_proc_dtor(NULL, p);
        pool_cache_put(proc_cache, p);
}

/*
 * proc_enterpgrp: move p to a new or existing process group (and session).
 *
 * If we are creating a new pgrp, the pgid should equal
 * the calling process' pid.
 * If is only valid to enter a process group that is in the session
 * of the process.
 * Also mksess should only be set if we are creating a process group
 *
 * Only called from sys_setsid, sys_setpgid and posix_spawn/spawn_return.
 */
int
proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
{
        struct pgrp *new_pgrp, *pgrp;
        struct session *sess;
        struct proc *p;
        int rval;
        pid_t pg_id = NO_PGID;

        /* Allocate data areas we might need before doing any validity checks */
        sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL;
        new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP);

        mutex_enter(&proc_lock);
        rval = EPERM;        /* most common error (to save typing) */

        /* Check pgrp exists or can be created */
        pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
        if (pgrp != NULL && pgrp->pg_id != pgid)
                goto done;

        /* Can only set another process under restricted circumstances. */
        if (pid != curp->p_pid) {
                /* Must exist and be one of our children... */
                p = proc_find_internal(pid, false);
                if (p == NULL || !p_inferior(p, curp)) {
                        rval = ESRCH;
                        goto done;
                }
                /* ... in the same session... */
                if (sess != NULL || p->p_session != curp->p_session)
                        goto done;
                /* ... existing pgid must be in same session ... */
                if (pgrp != NULL && pgrp->pg_session != p->p_session)
                        goto done;
                /* ... and not done an exec. */
                if (p->p_flag & PK_EXEC) {
                        rval = EACCES;
                        goto done;
                }
        } else {
                /* ... setsid() cannot re-enter a pgrp */
                if (mksess && (curp->p_pgid == curp->p_pid ||
                    pgrp_find(curp->p_pid)))
                        goto done;
                p = curp;
        }

        /* Changing the process group/session of a session
           leader is definitely off limits. */
        if (SESS_LEADER(p)) {
                if (sess == NULL && p->p_pgrp == pgrp)
                        /* unless it's a definite noop */
                        rval = 0;
                goto done;
        }

        /* Can only create a process group with id of process */
        if (pgrp == NULL && pgid != pid)
                goto done;

        /* Can only create a session if creating pgrp */
        if (sess != NULL && pgrp != NULL)
                goto done;

        /* Check we allocated memory for a pgrp... */
        if (pgrp == NULL && new_pgrp == NULL)
                goto done;

        /* Don't attach to 'zombie' pgrp */
        if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
                goto done;

        /* Expect to succeed now */
        rval = 0;

        if (pgrp == p->p_pgrp)
                /* nothing to do */
                goto done;

        /* Ok all setup, link up required structures */

        if (pgrp == NULL) {
                pgrp = new_pgrp;
                new_pgrp = NULL;
                if (sess != NULL) {
                        sess->s_sid = p->p_pid;
                        sess->s_leader = p;
                        sess->s_count = 1;
                        sess->s_ttyvp = NULL;
                        sess->s_ttyp = NULL;
                        sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET;
                        memcpy(sess->s_login, p->p_session->s_login,
                            sizeof(sess->s_login));
                        p->p_lflag &= ~PL_CONTROLT;
                } else {
                        sess = p->p_pgrp->pg_session;
                        proc_sesshold(sess);
                }
                pgrp->pg_session = sess;
                sess = NULL;

                pgrp->pg_id = pgid;
                LIST_INIT(&pgrp->pg_members);
#ifdef DIAGNOSTIC
                if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp))
                        panic("enterpgrp: pgrp table slot in use");
                if (__predict_false(mksess && p != curp))
                        panic("enterpgrp: mksession and p != curproc");
#endif
                pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
                pgrp->pg_jobc = 0;
        }

        /*
         * Adjust eligibility of affected pgrps to participate in job control.
         * Increment eligibility counts before decrementing, otherwise we
         * could reach 0 spuriously during the first call.
         */
        fixjobc(p, pgrp, 1);
        fixjobc(p, p->p_pgrp, 0);

        /* Interlock with ttread(). */
        mutex_spin_enter(&tty_lock);

        /* Move process to requested group. */
        LIST_REMOVE(p, p_pglist);
        if (LIST_EMPTY(&p->p_pgrp->pg_members))
                /* defer delete until we've dumped the lock */
                pg_id = p->p_pgrp->pg_id;
        p->p_pgrp = pgrp;
        LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);

        /* Done with the swap; we can release the tty mutex. */
        mutex_spin_exit(&tty_lock);

    done:
        if (pg_id != NO_PGID) {
                /* Releases proc_lock. */
                pg_delete(pg_id);
        } else {
                mutex_exit(&proc_lock);
        }
        if (sess != NULL)
                kmem_free(sess, sizeof(*sess));
        if (new_pgrp != NULL)
                kmem_free(new_pgrp, sizeof(*new_pgrp));
#ifdef DEBUG_PGRP
        if (__predict_false(rval))
                printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
                        pid, pgid, mksess, curp->p_pid, rval);
#endif
        return rval;
}

/*
 * proc_leavepgrp: remove a process from its process group.
 *  => must be called with the proc_lock held, which will be released;
 */
void
proc_leavepgrp(struct proc *p)
{
        struct pgrp *pgrp;

        KASSERT(mutex_owned(&proc_lock));

        /* Interlock with ttread() */
        mutex_spin_enter(&tty_lock);
        pgrp = p->p_pgrp;
        LIST_REMOVE(p, p_pglist);
        p->p_pgrp = NULL;
        mutex_spin_exit(&tty_lock);

        if (LIST_EMPTY(&pgrp->pg_members)) {
                /* Releases proc_lock. */
                pg_delete(pgrp->pg_id);
        } else {
                mutex_exit(&proc_lock);
        }
}

/*
 * pg_remove: remove a process group from the table.
 *  => must be called with the proc_lock held;
 *  => returns process group to free;
 */
static struct pgrp *
pg_remove(pid_t pg_id)
{
        struct pgrp *pgrp;
        struct pid_table *pt;

        KASSERT(mutex_owned(&proc_lock));

        pt = &pid_table[pg_id & pid_tbl_mask];
        pgrp = pt->pt_pgrp;

        KASSERT(pgrp != NULL);
        KASSERT(pgrp->pg_id == pg_id);
        KASSERT(LIST_EMPTY(&pgrp->pg_members));

        pt->pt_pgrp = NULL;

        if (!PT_VALID(pt->pt_slot)) {
                /* Orphaned pgrp, put slot onto free list. */
                KASSERT((PT_NEXT(pt->pt_slot) & pid_tbl_mask) == 0);
                pg_id &= pid_tbl_mask;
                pt = &pid_table[last_free_pt];
                pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pg_id);
                KASSERT(pt->pt_pid == 0);
                last_free_pt = pg_id;
                pid_alloc_cnt--;
        }
        return pgrp;
}

/*
 * pg_delete: delete and free a process group.
 *  => must be called with the proc_lock held, which will be released.
 */
static void
pg_delete(pid_t pg_id)
{
        struct pgrp *pg;
        struct tty *ttyp;
        struct session *ss;

        KASSERT(mutex_owned(&proc_lock));

        pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
        if (pg == NULL || pg->pg_id != pg_id || !LIST_EMPTY(&pg->pg_members)) {
                mutex_exit(&proc_lock);
                return;
        }

        ss = pg->pg_session;

        /* Remove reference (if any) from tty to this process group */
        mutex_spin_enter(&tty_lock);
        ttyp = ss->s_ttyp;
        if (ttyp != NULL && ttyp->t_pgrp == pg) {
                ttyp->t_pgrp = NULL;
                KASSERT(ttyp->t_session == ss);
        }
        mutex_spin_exit(&tty_lock);

        /*
         * The leading process group in a session is freed by proc_sessrele(),
         * if last reference.  It will also release the locks.
         */
        pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL;
        proc_sessrele(ss);

        if (pg != NULL) {
                /* Free it, if was not done above. */
                kmem_free(pg, sizeof(struct pgrp));
        }
}

/*
 * Adjust pgrp jobc counters when specified process changes process group.
 * We count the number of processes in each process group that "qualify"
 * the group for terminal job control (those with a parent in a different
 * process group of the same session).  If that count reaches zero, the
 * process group becomes orphaned.  Check both the specified process'
 * process group and that of its children.
 * entering == 0 => p is leaving specified group.
 * entering == 1 => p is entering specified group.
 *
 * Call with proc_lock held.
 */
void
fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
{
        struct pgrp *hispgrp;
        struct session *mysession = pgrp->pg_session;
        struct proc *child;

        KASSERT(mutex_owned(&proc_lock));

        /*
         * Check p's parent to see whether p qualifies its own process
         * group; if so, adjust count for p's process group.
         */
        hispgrp = p->p_pptr->p_pgrp;
        if (hispgrp != pgrp && hispgrp->pg_session == mysession) {
                if (entering) {
                        pgrp->pg_jobc++;
                        p->p_lflag &= ~PL_ORPHANPG;
                } else {
                        /* KASSERT(pgrp->pg_jobc > 0); */
                        if (--pgrp->pg_jobc == 0)
                                orphanpg(pgrp);
                }
        }

        /*
         * Check this process' children to see whether they qualify
         * their process groups; if so, adjust counts for children's
         * process groups.
         */
        LIST_FOREACH(child, &p->p_children, p_sibling) {
                hispgrp = child->p_pgrp;
                if (hispgrp != pgrp && hispgrp->pg_session == mysession &&
                    !P_ZOMBIE(child)) {
                        if (entering) {
                                child->p_lflag &= ~PL_ORPHANPG;
                                hispgrp->pg_jobc++;
                        } else {
                                KASSERT(hispgrp->pg_jobc > 0);
                                if (--hispgrp->pg_jobc == 0)
                                        orphanpg(hispgrp);
                        }
                }
        }
}

/*
 * A process group has become orphaned;
 * if there are any stopped processes in the group,
 * hang-up all process in that group.
 *
 * Call with proc_lock held.
 */
static void
orphanpg(struct pgrp *pg)
{
        struct proc *p;

        KASSERT(mutex_owned(&proc_lock));

        LIST_FOREACH(p, &pg->pg_members, p_pglist) {
                if (p->p_stat == SSTOP) {
                        p->p_lflag |= PL_ORPHANPG;
                        psignal(p, SIGHUP);
                        psignal(p, SIGCONT);
                }
        }
}

#ifdef DDB
#include <ddb/db_output.h>
void pidtbl_dump(void);
void
pidtbl_dump(void)
{
        struct pid_table *pt;
        struct proc *p;
        struct pgrp *pgrp;
        uintptr_t slot;
        int id;

        db_printf("pid table %p size %x, next %x, last %x\n",
                pid_table, pid_tbl_mask+1,
                next_free_pt, last_free_pt);
        for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) {
                slot = pt->pt_slot;
                if (!PT_VALID(slot) && !pt->pt_pgrp)
                        continue;
                if (PT_IS_LWP(slot)) {
                        p = PT_GET_LWP(slot)->l_proc;
                } else if (PT_IS_PROC(slot)) {
                        p = PT_GET_PROC(slot);
                } else {
                        p = NULL;
                }
                db_printf("  id %x: ", id);
                if (p != NULL)
                        db_printf("slotpid %d proc %p id %d (0x%x) %s\n",
                                pt->pt_pid, p, p->p_pid, p->p_pid, p->p_comm);
                else
                        db_printf("next %x use %x\n",
                                PT_NEXT(slot) & pid_tbl_mask,
                                PT_NEXT(slot) & ~pid_tbl_mask);
                if ((pgrp = pt->pt_pgrp)) {
                        db_printf("\tsession %p, sid %d, count %d, login %s\n",
                            pgrp->pg_session, pgrp->pg_session->s_sid,
                            pgrp->pg_session->s_count,
                            pgrp->pg_session->s_login);
                        db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
                            pgrp, pgrp->pg_id, pgrp->pg_jobc,
                            LIST_FIRST(&pgrp->pg_members));
                        LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
                                db_printf("\t\tpid %d addr %p pgrp %p %s\n",
                                    p->p_pid, p, p->p_pgrp, p->p_comm);
                        }
                }
        }
}
#endif /* DDB */

#ifdef KSTACK_CHECK_MAGIC

#define        KSTACK_MAGIC        0xdeadbeaf

/* XXX should be per process basis? */
static int        kstackleftmin = KSTACK_SIZE;
static int        kstackleftthres = KSTACK_SIZE / 8;

void
kstack_setup_magic(const struct lwp *l)
{
        uint32_t *ip;
        uint32_t const *end;

        KASSERT(l != NULL);
        KASSERT(l != &lwp0);

        /*
         * fill all the stack with magic number
         * so that later modification on it can be detected.
         */
        ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
        end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
        for (; ip < end; ip++) {
                *ip = KSTACK_MAGIC;
        }
}

void
kstack_check_magic(const struct lwp *l)
{
        uint32_t const *ip, *end;
        int stackleft;

        KASSERT(l != NULL);

        /* don't check proc0 */ /*XXX*/
        if (l == &lwp0)
                return;

#ifdef __MACHINE_STACK_GROWS_UP
        /* stack grows upwards (eg. hppa) */
        ip = (uint32_t *)((void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
        end = (uint32_t *)KSTACK_LOWEST_ADDR(l);
        for (ip--; ip >= end; ip--)
                if (*ip != KSTACK_MAGIC)
                        break;

        stackleft = (void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (void *)ip;
#else /* __MACHINE_STACK_GROWS_UP */
        /* stack grows downwards (eg. i386) */
        ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
        end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
        for (; ip < end; ip++)
                if (*ip != KSTACK_MAGIC)
                        break;

        stackleft = ((const char *)ip) - (const char *)KSTACK_LOWEST_ADDR(l);
#endif /* __MACHINE_STACK_GROWS_UP */

        if (kstackleftmin > stackleft) {
                kstackleftmin = stackleft;
                if (stackleft < kstackleftthres)
                        printf("warning: kernel stack left %d bytes"
                            "(pid %u:lid %u)\n", stackleft,
                            (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
        }

        if (stackleft <= 0) {
                panic("magic on the top of kernel stack changed for "
                    "pid %u, lid %u: maybe kernel stack overflow",
                    (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
        }
}
#endif /* KSTACK_CHECK_MAGIC */

int
proclist_foreach_call(struct proclist *list,
    int (*callback)(struct proc *, void *arg), void *arg)
{
        struct proc marker;
        struct proc *p;
        int ret = 0;

        marker.p_flag = PK_MARKER;
        mutex_enter(&proc_lock);
        for (p = LIST_FIRST(list); ret == 0 && p != NULL;) {
                if (p->p_flag & PK_MARKER) {
                        p = LIST_NEXT(p, p_list);
                        continue;
                }
                LIST_INSERT_AFTER(p, &marker, p_list);
                ret = (*callback)(p, arg);
                KASSERT(mutex_owned(&proc_lock));
                p = LIST_NEXT(&marker, p_list);
                LIST_REMOVE(&marker, p_list);
        }
        mutex_exit(&proc_lock);

        return ret;
}

int
proc_vmspace_getref(struct proc *p, struct vmspace **vm)
{

        /* XXXCDC: how should locking work here? */

        /* curproc exception is for coredump. */

        if ((p != curproc && (p->p_sflag & PS_WEXIT) != 0) ||
            (p->p_vmspace->vm_refcnt < 1)) {
                return EFAULT;
        }

        uvmspace_addref(p->p_vmspace);
        *vm = p->p_vmspace;

        return 0;
}

/*
 * Acquire a write lock on the process credential.
 */
void
proc_crmod_enter(void)
{
        struct lwp *l = curlwp;
        struct proc *p = l->l_proc;
        kauth_cred_t oc;

        /* Reset what needs to be reset in plimit. */
        if (p->p_limit->pl_corename != defcorename) {
                lim_setcorename(p, defcorename, 0);
        }

        mutex_enter(p->p_lock);

        /* Ensure the LWP cached credentials are up to date. */
        if ((oc = l->l_cred) != p->p_cred) {
                kauth_cred_hold(p->p_cred);
                l->l_cred = p->p_cred;
                kauth_cred_free(oc);
        }
}

/*
 * Set in a new process credential, and drop the write lock.  The credential
 * must have a reference already.  Optionally, free a no-longer required
 * credential.
 */
void
proc_crmod_leave(kauth_cred_t scred, kauth_cred_t fcred, bool sugid)
{
        struct lwp *l = curlwp, *l2;
        struct proc *p = l->l_proc;
        kauth_cred_t oc;

        KASSERT(mutex_owned(p->p_lock));

        /* Is there a new credential to set in? */
        if (scred != NULL) {
                p->p_cred = scred;
                LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
                        if (l2 != l)
                                l2->l_prflag |= LPR_CRMOD;
                }

                /* Ensure the LWP cached credentials are up to date. */
                if ((oc = l->l_cred) != scred) {
                        kauth_cred_hold(scred);
                        l->l_cred = scred;
                }
        } else
                oc = NULL;        /* XXXgcc */

        if (sugid) {
                /*
                 * Mark process as having changed credentials, stops
                 * tracing etc.
                 */
                p->p_flag |= PK_SUGID;
        }

        mutex_exit(p->p_lock);

        /* If there is a credential to be released, free it now. */
        if (fcred != NULL) {
                KASSERT(scred != NULL);
                kauth_cred_free(fcred);
                if (oc != scred)
                        kauth_cred_free(oc);
        }
}

/*
 * proc_specific_key_create --
 *        Create a key for subsystem proc-specific data.
 */
int
proc_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
{

        return (specificdata_key_create(proc_specificdata_domain, keyp, dtor));
}

/*
 * proc_specific_key_delete --
 *        Delete a key for subsystem proc-specific data.
 */
void
proc_specific_key_delete(specificdata_key_t key)
{

        specificdata_key_delete(proc_specificdata_domain, key);
}

/*
 * proc_initspecific --
 *        Initialize a proc's specificdata container.
 */
void
proc_initspecific(struct proc *p)
{
        int error __diagused;

        error = specificdata_init(proc_specificdata_domain, &p->p_specdataref);
        KASSERT(error == 0);
}

/*
 * proc_finispecific --
 *        Finalize a proc's specificdata container.
 */
void
proc_finispecific(struct proc *p)
{

        specificdata_fini(proc_specificdata_domain, &p->p_specdataref);
}

/*
 * proc_getspecific --
 *        Return proc-specific data corresponding to the specified key.
 */
void *
proc_getspecific(struct proc *p, specificdata_key_t key)
{

        return (specificdata_getspecific(proc_specificdata_domain,
                                         &p->p_specdataref, key));
}

/*
 * proc_setspecific --
 *        Set proc-specific data corresponding to the specified key.
 */
void
proc_setspecific(struct proc *p, specificdata_key_t key, void *data)
{

        specificdata_setspecific(proc_specificdata_domain,
                                 &p->p_specdataref, key, data);
}

int
proc_uidmatch(kauth_cred_t cred, kauth_cred_t target)
{
        int r = 0;

        if (kauth_cred_getuid(cred) != kauth_cred_getuid(target) ||
            kauth_cred_getuid(cred) != kauth_cred_getsvuid(target)) {
                /*
                 * suid proc of ours or proc not ours
                 */
                r = EPERM;
        } else if (kauth_cred_getgid(target) != kauth_cred_getsvgid(target)) {
                /*
                 * sgid proc has sgid back to us temporarily
                 */
                r = EPERM;
        } else {
                /*
                 * our rgid must be in target's group list (ie,
                 * sub-processes started by a sgid process)
                 */
                int ismember = 0;

                if (kauth_cred_ismember_gid(cred,
                    kauth_cred_getgid(target), &ismember) != 0 ||
                    !ismember)
                        r = EPERM;
        }

        return (r);
}

/*
 * sysctl stuff
 */

#define KERN_PROCSLOP        (5 * sizeof(struct kinfo_proc))

static const u_int sysctl_flagmap[] = {
        PK_ADVLOCK, P_ADVLOCK,
        PK_EXEC, P_EXEC,
        PK_NOCLDWAIT, P_NOCLDWAIT,
        PK_32, P_32,
        PK_CLDSIGIGN, P_CLDSIGIGN,
        PK_SUGID, P_SUGID,
        0
};

static const u_int sysctl_sflagmap[] = {
        PS_NOCLDSTOP, P_NOCLDSTOP,
        PS_WEXIT, P_WEXIT,
        PS_STOPFORK, P_STOPFORK,
        PS_STOPEXEC, P_STOPEXEC,
        PS_STOPEXIT, P_STOPEXIT,
        0
};

static const u_int sysctl_slflagmap[] = {
        PSL_TRACED, P_TRACED,
        PSL_CHTRACED, P_CHTRACED,
        PSL_SYSCALL, P_SYSCALL,
        0
};

static const u_int sysctl_lflagmap[] = {
        PL_CONTROLT, P_CONTROLT,
        PL_PPWAIT, P_PPWAIT,
        0
};

static const u_int sysctl_stflagmap[] = {
        PST_PROFIL, P_PROFIL,
        0

};

/* used by kern_lwp also */
const u_int sysctl_lwpflagmap[] = {
        LW_SINTR, L_SINTR,
        LW_SYSTEM, L_SYSTEM,
        0
};

/*
 * Find the most ``active'' lwp of a process and return it for ps display
 * purposes
 */
static struct lwp *
proc_active_lwp(struct proc *p)
{
        static const int ostat[] = {
                0,
                2,        /* LSIDL */
                6,        /* LSRUN */
                5,        /* LSSLEEP */
                4,        /* LSSTOP */
                0,        /* LSZOMB */
                1,        /* LSDEAD */
                7,        /* LSONPROC */
                3        /* LSSUSPENDED */
        };

        struct lwp *l, *lp = NULL;
        LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                KASSERT(l->l_stat >= 0 && l->l_stat < __arraycount(ostat));
                if (lp == NULL ||
                    ostat[l->l_stat] > ostat[lp->l_stat] ||
                    (ostat[l->l_stat] == ostat[lp->l_stat] &&
                    l->l_cpticks > lp->l_cpticks)) {
                        lp = l;
                        continue;
                }
        }
        return lp;
}

static int
sysctl_doeproc(SYSCTLFN_ARGS)
{
        union {
                struct kinfo_proc kproc;
                struct kinfo_proc2 kproc2;
        } *kbuf;
        struct proc *p, *next, *marker;
        char *where, *dp;
        int type, op, arg, error;
        u_int elem_size, kelem_size, elem_count;
        size_t buflen, needed;
        bool match, zombie, mmmbrains;
        const bool allowaddr = get_expose_address(curproc);

        if (namelen == 1 && name[0] == CTL_QUERY)
                return (sysctl_query(SYSCTLFN_CALL(rnode)));

        dp = where = oldp;
        buflen = where != NULL ? *oldlenp : 0;
        error = 0;
        needed = 0;
        type = rnode->sysctl_num;

        if (type == KERN_PROC) {
                if (namelen == 0)
                        return EINVAL;
                switch (op = name[0]) {
                case KERN_PROC_ALL:
                        if (namelen != 1)
                                return EINVAL;
                        arg = 0;
                        break;
                default:
                        if (namelen != 2)
                                return EINVAL;
                        arg = name[1];
                        break;
                }
                elem_count = 0;        /* Hush little compiler, don't you cry */
                kelem_size = elem_size = sizeof(kbuf->kproc);
        } else {
                if (namelen != 4)
                        return EINVAL;
                op = name[0];
                arg = name[1];
                elem_size = name[2];
                elem_count = name[3];
                kelem_size = sizeof(kbuf->kproc2);
        }

        sysctl_unlock();

        kbuf = kmem_zalloc(sizeof(*kbuf), KM_SLEEP);
        marker = kmem_alloc(sizeof(*marker), KM_SLEEP);
        marker->p_flag = PK_MARKER;

        mutex_enter(&proc_lock);
        /*
         * Start with zombies to prevent reporting processes twice, in case they
         * are dying and being moved from the list of alive processes to zombies.
         */
        mmmbrains = true;
        for (p = LIST_FIRST(&zombproc);; p = next) {
                if (p == NULL) {
                        if (mmmbrains) {
                                p = LIST_FIRST(&allproc);
                                mmmbrains = false;
                        }
                        if (p == NULL)
                                break;
                }
                next = LIST_NEXT(p, p_list);
                if ((p->p_flag & PK_MARKER) != 0)
                        continue;

                /*
                 * Skip embryonic processes.
                 */
                if (p->p_stat == SIDL)
                        continue;

                mutex_enter(p->p_lock);
                error = kauth_authorize_process(l->l_cred,
                    KAUTH_PROCESS_CANSEE, p,
                    KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_EPROC), NULL, NULL);
                if (error != 0) {
                        mutex_exit(p->p_lock);
                        continue;
                }

                /*
                 * Hande all the operations in one switch on the cost of
                 * algorithm complexity is on purpose. The win splitting this
                 * function into several similar copies makes maintenance
                 * burden, code grow and boost is negligible in practical
                 * systems.
                 */
                switch (op) {
                case KERN_PROC_PID:
                        match = (p->p_pid == (pid_t)arg);
                        break;

                case KERN_PROC_PGRP:
                        match = (p->p_pgrp->pg_id == (pid_t)arg);
                        break;

                case KERN_PROC_SESSION:
                        match = (p->p_session->s_sid == (pid_t)arg);
                        break;

                case KERN_PROC_TTY:
                        match = true;
                        if (arg == (int) KERN_PROC_TTY_REVOKE) {
                                if ((p->p_lflag & PL_CONTROLT) == 0 ||
                                    p->p_session->s_ttyp == NULL ||
                                    p->p_session->s_ttyvp != NULL) {
                                            match = false;
                                }
                        } else if ((p->p_lflag & PL_CONTROLT) == 0 ||
                            p->p_session->s_ttyp == NULL) {
                                if ((dev_t)arg != KERN_PROC_TTY_NODEV) {
                                        match = false;
                                }
                        } else if (p->p_session->s_ttyp->t_dev != (dev_t)arg) {
                                match = false;
                        }
                        break;

                case KERN_PROC_UID:
                        match = (kauth_cred_geteuid(p->p_cred) == (uid_t)arg);
                        break;

                case KERN_PROC_RUID:
                        match = (kauth_cred_getuid(p->p_cred) == (uid_t)arg);
                        break;

                case KERN_PROC_GID:
                        match = (kauth_cred_getegid(p->p_cred) == (uid_t)arg);
                        break;

                case KERN_PROC_RGID:
                        match = (kauth_cred_getgid(p->p_cred) == (uid_t)arg);
                        break;

                case KERN_PROC_ALL:
                        match = true;
                        /* allow everything */
                        break;

                default:
                        error = EINVAL;
                        mutex_exit(p->p_lock);
                        goto cleanup;
                }
                if (!match) {
                        mutex_exit(p->p_lock);
                        continue;
                }

                /*
                 * Grab a hold on the process.
                 */
                if (mmmbrains) {
                        zombie = true;
                } else {
                        zombie = !rw_tryenter(&p->p_reflock, RW_READER);
                }
                if (zombie) {
                        LIST_INSERT_AFTER(p, marker, p_list);
                }

                if (buflen >= elem_size &&
                    (type == KERN_PROC || elem_count > 0)) {
                        ruspace(p);        /* Update process vm resource use */

                        if (type == KERN_PROC) {
                                fill_proc(p, &kbuf->kproc.kp_proc, allowaddr);
                                fill_eproc(p, &kbuf->kproc.kp_eproc, zombie,
                                    allowaddr);
                        } else {
                                fill_kproc2(p, &kbuf->kproc2, zombie,
                                    allowaddr);
                                elem_count--;
                        }
                        mutex_exit(p->p_lock);
                        mutex_exit(&proc_lock);
                        /*
                         * Copy out elem_size, but not larger than kelem_size
                         */
                        error = sysctl_copyout(l, kbuf, dp,
                            uimin(kelem_size, elem_size));
                        mutex_enter(&proc_lock);
                        if (error) {
                                goto bah;
                        }
                        dp += elem_size;
                        buflen -= elem_size;
                } else {
                        mutex_exit(p->p_lock);
                }
                needed += elem_size;

                /*
                 * Release reference to process.
                 */
                 if (zombie) {
                        next = LIST_NEXT(marker, p_list);
                         LIST_REMOVE(marker, p_list);
                } else {
                        rw_exit(&p->p_reflock);
                        next = LIST_NEXT(p, p_list);
                }

                /*
                 * Short-circuit break quickly!
                 */
                if (op == KERN_PROC_PID)
                        break;
        }
        mutex_exit(&proc_lock);

        if (where != NULL) {
                *oldlenp = dp - where;
                if (needed > *oldlenp) {
                        error = ENOMEM;
                        goto out;
                }
        } else {
                needed += KERN_PROCSLOP;
                *oldlenp = needed;
        }
        kmem_free(kbuf, sizeof(*kbuf));
        kmem_free(marker, sizeof(*marker));
        sysctl_relock();
        return 0;
 bah:
         if (zombie)
                 LIST_REMOVE(marker, p_list);
        else
                rw_exit(&p->p_reflock);
 cleanup:
        mutex_exit(&proc_lock);
 out:
        kmem_free(kbuf, sizeof(*kbuf));
        kmem_free(marker, sizeof(*marker));
        sysctl_relock();
        return error;
}

int
copyin_psstrings(struct proc *p, struct ps_strings *arginfo)
{
#if !defined(_RUMPKERNEL)
        int retval;

        if (p->p_flag & PK_32) {
                MODULE_HOOK_CALL(kern_proc32_copyin_hook, (p, arginfo),
                    enosys(), retval);
                return retval;
        }
#endif /* !defined(_RUMPKERNEL) */

        return copyin_proc(p, (void *)p->p_psstrp, arginfo, sizeof(*arginfo));
}

static int
copy_procargs_sysctl_cb(void *cookie_, const void *src, size_t off, size_t len)
{
        void **cookie = cookie_;
        struct lwp *l = cookie[0];
        char *dst = cookie[1];

        return sysctl_copyout(l, src, dst + off, len);
}

/*
 * sysctl helper routine for kern.proc_args pseudo-subtree.
 */
static int
sysctl_kern_proc_args(SYSCTLFN_ARGS)
{
        struct ps_strings pss;
        struct proc *p;
        pid_t pid;
        int type, error;
        void *cookie[2];

        if (namelen == 1 && name[0] == CTL_QUERY)
                return (sysctl_query(SYSCTLFN_CALL(rnode)));

        if (newp != NULL || namelen != 2)
                return (EINVAL);
        pid = name[0];
        type = name[1];

        switch (type) {
        case KERN_PROC_PATHNAME:
                sysctl_unlock();
                error = fill_pathname(l, pid, oldp, oldlenp);
                sysctl_relock();
                return error;

        case KERN_PROC_CWD:
                sysctl_unlock();
                error = fill_cwd(l, pid, oldp, oldlenp);
                sysctl_relock();
                return error;

        case KERN_PROC_ARGV:
        case KERN_PROC_NARGV:
        case KERN_PROC_ENV:
        case KERN_PROC_NENV:
                /* ok */
                break;
        default:
                return (EINVAL);
        }

        sysctl_unlock();

        /* check pid */
        mutex_enter(&proc_lock);
        if ((p = proc_find(pid)) == NULL) {
                error = EINVAL;
                goto out_locked;
        }
        mutex_enter(p->p_lock);

        /* Check permission. */
        if (type == KERN_PROC_ARGV || type == KERN_PROC_NARGV)
                error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
                    p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ARGS), NULL, NULL);
        else if (type == KERN_PROC_ENV || type == KERN_PROC_NENV)
                error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
                    p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENV), NULL, NULL);
        else
                error = EINVAL; /* XXXGCC */
        if (error) {
                mutex_exit(p->p_lock);
                goto out_locked;
        }

        if (oldp == NULL) {
                if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV)
                        *oldlenp = sizeof (int);
                else
                        *oldlenp = ARG_MAX;        /* XXX XXX XXX */
                error = 0;
                mutex_exit(p->p_lock);
                goto out_locked;
        }

        /*
         * Zombies don't have a stack, so we can't read their psstrings.
         * System processes also don't have a user stack.
         */
        if (P_ZOMBIE(p) || (p->p_flag & PK_SYSTEM) != 0) {
                error = EINVAL;
                mutex_exit(p->p_lock);
                goto out_locked;
        }

        error = rw_tryenter(&p->p_reflock, RW_READER) ? 0 : EBUSY;
        mutex_exit(p->p_lock);
        if (error) {
                goto out_locked;
        }
        mutex_exit(&proc_lock);

        if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV) {
                int value;
                if ((error = copyin_psstrings(p, &pss)) == 0) {
                        if (type == KERN_PROC_NARGV)
                                value = pss.ps_nargvstr;
                        else
                                value = pss.ps_nenvstr;
                        error = sysctl_copyout(l, &value, oldp, sizeof(value));
                        *oldlenp = sizeof(value);
                }
        } else {
                cookie[0] = l;
                cookie[1] = oldp;
                error = copy_procargs(p, type, oldlenp,
                    copy_procargs_sysctl_cb, cookie);
        }
        rw_exit(&p->p_reflock);
        sysctl_relock();
        return error;

out_locked:
        mutex_exit(&proc_lock);
        sysctl_relock();
        return error;
}

int
copy_procargs(struct proc *p, int oid, size_t *limit,
    int (*cb)(void *, const void *, size_t, size_t), void *cookie)
{
        struct ps_strings pss;
        size_t len, i, loaded, entry_len;
        struct uio auio;
        struct iovec aiov;
        int error, argvlen;
        char *arg;
        char **argv;
        vaddr_t user_argv;
        struct vmspace *vmspace;

        /*
         * Allocate a temporary buffer to hold the argument vector and
         * the arguments themselve.
         */
        arg = kmem_alloc(PAGE_SIZE, KM_SLEEP);
        argv = kmem_alloc(PAGE_SIZE, KM_SLEEP);

        /*
         * Lock the process down in memory.
         */
        vmspace = p->p_vmspace;
        uvmspace_addref(vmspace);

        /*
         * Read in the ps_strings structure.
         */
        if ((error = copyin_psstrings(p, &pss)) != 0)
                goto done;

        /*
         * Now read the address of the argument vector.
         */
        switch (oid) {
        case KERN_PROC_ARGV:
                user_argv = (uintptr_t)pss.ps_argvstr;
                argvlen = pss.ps_nargvstr;
                break;
        case KERN_PROC_ENV:
                user_argv = (uintptr_t)pss.ps_envstr;
                argvlen = pss.ps_nenvstr;
                break;
        default:
                error = EINVAL;
                goto done;
        }

        if (argvlen < 0) {
                error = EIO;
                goto done;
        }


        /*
         * Now copy each string.
         */
        len = 0; /* bytes written to user buffer */
        loaded = 0; /* bytes from argv already processed */
        i = 0; /* To make compiler happy */
        entry_len = PROC_PTRSZ(p);

        for (; argvlen; --argvlen) {
                int finished = 0;
                vaddr_t base;
                size_t xlen;
                int j;

                if (loaded == 0) {
                        size_t rem = entry_len * argvlen;
                        loaded = MIN(rem, PAGE_SIZE);
                        error = copyin_vmspace(vmspace,
                            (const void *)user_argv, argv, loaded);
                        if (error)
                                break;
                        user_argv += loaded;
                        i = 0;
                }

#if !defined(_RUMPKERNEL)
                if (p->p_flag & PK_32)
                        MODULE_HOOK_CALL(kern_proc32_base_hook,
                            (argv, i++), 0, base);
                else
#endif /* !defined(_RUMPKERNEL) */
                        base = (vaddr_t)argv[i++];
                loaded -= entry_len;

                /*
                 * The program has messed around with its arguments,
                 * possibly deleting some, and replacing them with
                 * NULL's. Treat this as the last argument and not
                 * a failure.
                 */
                if (base == 0)
                        break;

                while (!finished) {
                        xlen = PAGE_SIZE - (base & PAGE_MASK);

                        aiov.iov_base = arg;
                        aiov.iov_len = PAGE_SIZE;
                        auio.uio_iov = &aiov;
                        auio.uio_iovcnt = 1;
                        auio.uio_offset = base;
                        auio.uio_resid = xlen;
                        auio.uio_rw = UIO_READ;
                        UIO_SETUP_SYSSPACE(&auio);
                        error = uvm_io(&vmspace->vm_map, &auio, 0);
                        if (error)
                                goto done;

                        /* Look for the end of the string */
                        for (j = 0; j < xlen; j++) {
                                if (arg[j] == '\0') {
                                        xlen = j + 1;
                                        finished = 1;
                                        break;
                                }
                        }

                        /* Check for user buffer overflow */
                        if (len + xlen > *limit) {
                                finished = 1;
                                if (len > *limit)
                                        xlen = 0;
                                else
                                        xlen = *limit - len;
                        }

                        /* Copyout the page */
                        error = (*cb)(cookie, arg, len, xlen);
                        if (error)
                                goto done;

                        len += xlen;
                        base += xlen;
                }
        }
        *limit = len;

done:
        kmem_free(argv, PAGE_SIZE);
        kmem_free(arg, PAGE_SIZE);
        uvmspace_free(vmspace);
        return error;
}

/*
 * Fill in a proc structure for the specified process.
 */
static void
fill_proc(const struct proc *psrc, struct proc *p, bool allowaddr)
{
        COND_SET_STRUCT(p->p_list, psrc->p_list, allowaddr);
        memset(&p->p_auxlock, 0, sizeof(p->p_auxlock));
        COND_SET_STRUCT(p->p_lock, psrc->p_lock, allowaddr);
        memset(&p->p_stmutex, 0, sizeof(p->p_stmutex));
        memset(&p->p_reflock, 0, sizeof(p->p_reflock));
        COND_SET_STRUCT(p->p_waitcv, psrc->p_waitcv, allowaddr);
        COND_SET_STRUCT(p->p_lwpcv, psrc->p_lwpcv, allowaddr);
        COND_SET_PTR(p->p_cred, psrc->p_cred, allowaddr);
        COND_SET_PTR(p->p_fd, psrc->p_fd, allowaddr);
        COND_SET_PTR(p->p_cwdi, psrc->p_cwdi, allowaddr);
        COND_SET_PTR(p->p_stats, psrc->p_stats, allowaddr);
        COND_SET_PTR(p->p_limit, psrc->p_limit, allowaddr);
        COND_SET_PTR(p->p_vmspace, psrc->p_vmspace, allowaddr);
        COND_SET_PTR(p->p_sigacts, psrc->p_sigacts, allowaddr);
        COND_SET_PTR(p->p_aio, psrc->p_aio, allowaddr);
        p->p_mqueue_cnt = psrc->p_mqueue_cnt;
        memset(&p->p_specdataref, 0, sizeof(p->p_specdataref));
        p->p_exitsig = psrc->p_exitsig;
        p->p_flag = psrc->p_flag;
        p->p_sflag = psrc->p_sflag;
        p->p_slflag = psrc->p_slflag;
        p->p_lflag = psrc->p_lflag;
        p->p_stflag = psrc->p_stflag;
        p->p_stat = psrc->p_stat;
        p->p_trace_enabled = psrc->p_trace_enabled;
        p->p_pid = psrc->p_pid;
        COND_SET_STRUCT(p->p_pglist, psrc->p_pglist, allowaddr);
        COND_SET_PTR(p->p_pptr, psrc->p_pptr, allowaddr);
        COND_SET_STRUCT(p->p_sibling, psrc->p_sibling, allowaddr);
        COND_SET_STRUCT(p->p_children, psrc->p_children, allowaddr);
        COND_SET_STRUCT(p->p_lwps, psrc->p_lwps, allowaddr);
        COND_SET_PTR(p->p_raslist, psrc->p_raslist, allowaddr);
        p->p_nlwps = psrc->p_nlwps;
        p->p_nzlwps = psrc->p_nzlwps;
        p->p_nrlwps = psrc->p_nrlwps;
        p->p_nlwpwait = psrc->p_nlwpwait;
        p->p_ndlwps = psrc->p_ndlwps;
        p->p_nstopchild = psrc->p_nstopchild;
        p->p_waited = psrc->p_waited;
        COND_SET_PTR(p->p_zomblwp, psrc->p_zomblwp, allowaddr);
        COND_SET_PTR(p->p_vforklwp, psrc->p_vforklwp, allowaddr);
        COND_SET_PTR(p->p_sched_info, psrc->p_sched_info, allowaddr);
        p->p_estcpu = psrc->p_estcpu;
        p->p_estcpu_inherited = psrc->p_estcpu_inherited;
        p->p_forktime = psrc->p_forktime;
        p->p_pctcpu = psrc->p_pctcpu;
        COND_SET_PTR(p->p_opptr, psrc->p_opptr, allowaddr);
        COND_SET_PTR(p->p_timers, psrc->p_timers, allowaddr);
        p->p_rtime = psrc->p_rtime;
        p->p_uticks = psrc->p_uticks;
        p->p_sticks = psrc->p_sticks;
        p->p_iticks = psrc->p_iticks;
        p->p_xutime = psrc->p_xutime;
        p->p_xstime = psrc->p_xstime;
        p->p_traceflag = psrc->p_traceflag;
        COND_SET_PTR(p->p_tracep, psrc->p_tracep, allowaddr);
        COND_SET_PTR(p->p_textvp, psrc->p_textvp, allowaddr);
        COND_SET_PTR(p->p_emul, psrc->p_emul, allowaddr);
        COND_SET_PTR(p->p_emuldata, psrc->p_emuldata, allowaddr);
        COND_SET_CPTR(p->p_execsw, psrc->p_execsw, allowaddr);
        COND_SET_STRUCT(p->p_klist, psrc->p_klist, allowaddr);
        COND_SET_STRUCT(p->p_sigwaiters, psrc->p_sigwaiters, allowaddr);
        COND_SET_STRUCT(p->p_sigpend.sp_info, psrc->p_sigpend.sp_info,
            allowaddr);
        p->p_sigpend.sp_set = psrc->p_sigpend.sp_set;
        COND_SET_PTR(p->p_lwpctl, psrc->p_lwpctl, allowaddr);
        p->p_ppid = psrc->p_ppid;
        p->p_oppid = psrc->p_oppid;
        COND_SET_PTR(p->p_path, psrc->p_path, allowaddr);
        p->p_sigctx = psrc->p_sigctx;
        p->p_nice = psrc->p_nice;
        memcpy(p->p_comm, psrc->p_comm, sizeof(p->p_comm));
        COND_SET_PTR(p->p_pgrp, psrc->p_pgrp, allowaddr);
        COND_SET_VALUE(p->p_psstrp, psrc->p_psstrp, allowaddr);
        p->p_pax = psrc->p_pax;
        p->p_xexit = psrc->p_xexit;
        p->p_xsig = psrc->p_xsig;
        p->p_acflag = psrc->p_acflag;
        COND_SET_STRUCT(p->p_md, psrc->p_md, allowaddr);
        p->p_stackbase = psrc->p_stackbase;
        COND_SET_PTR(p->p_dtrace, psrc->p_dtrace, allowaddr);
}

/*
 * Fill in an eproc structure for the specified process.
 */
void
fill_eproc(struct proc *p, struct eproc *ep, bool zombie, bool allowaddr)
{
        struct tty *tp;
        struct lwp *l;

        KASSERT(mutex_owned(&proc_lock));
        KASSERT(mutex_owned(p->p_lock));

        COND_SET_PTR(ep->e_paddr, p, allowaddr);
        COND_SET_PTR(ep->e_sess, p->p_session, allowaddr);
        if (p->p_cred) {
                kauth_cred_topcred(p->p_cred, &ep->e_pcred);
                kauth_cred_toucred(p->p_cred, &ep->e_ucred);
        }
        if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) {
                struct vmspace *vm = p->p_vmspace;

                ep->e_vm.vm_rssize = vm_resident_count(vm);
                ep->e_vm.vm_tsize = vm->vm_tsize;
                ep->e_vm.vm_dsize = vm->vm_dsize;
                ep->e_vm.vm_ssize = vm->vm_ssize;
                ep->e_vm.vm_map.size = vm->vm_map.size;

                /* Pick the primary (first) LWP */
                l = proc_active_lwp(p);
                KASSERT(l != NULL);
                lwp_lock(l);
                if (l->l_wchan)
                        strncpy(ep->e_wmesg, l->l_wmesg, WMESGLEN);
                lwp_unlock(l);
        }
        ep->e_ppid = p->p_ppid;
        if (p->p_pgrp && p->p_session) {
                ep->e_pgid = p->p_pgrp->pg_id;
                ep->e_jobc = p->p_pgrp->pg_jobc;
                ep->e_sid = p->p_session->s_sid;
                if ((p->p_lflag & PL_CONTROLT) &&
                    (tp = p->p_session->s_ttyp)) {
                        ep->e_tdev = tp->t_dev;
                        ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
                        COND_SET_PTR(ep->e_tsess, tp->t_session, allowaddr);
                } else
                        ep->e_tdev = (uint32_t)NODEV;
                ep->e_flag = p->p_session->s_ttyvp ? EPROC_CTTY : 0;
                if (SESS_LEADER(p))
                        ep->e_flag |= EPROC_SLEADER;
                strncpy(ep->e_login, p->p_session->s_login, MAXLOGNAME);
        }
        ep->e_xsize = ep->e_xrssize = 0;
        ep->e_xccount = ep->e_xswrss = 0;
}

/*
 * Fill in a kinfo_proc2 structure for the specified process.
 */
void
fill_kproc2(struct proc *p, struct kinfo_proc2 *ki, bool zombie, bool allowaddr)
{
        struct tty *tp;
        struct lwp *l, *l2;
        struct timeval ut, st, rt;
        sigset_t ss1, ss2;
        struct rusage ru;
        struct vmspace *vm;

        KASSERT(mutex_owned(&proc_lock));
        KASSERT(mutex_owned(p->p_lock));

        sigemptyset(&ss1);
        sigemptyset(&ss2);

        COND_SET_VALUE(ki->p_paddr, PTRTOUINT64(p), allowaddr);
        COND_SET_VALUE(ki->p_fd, PTRTOUINT64(p->p_fd), allowaddr);
        COND_SET_VALUE(ki->p_cwdi, PTRTOUINT64(p->p_cwdi), allowaddr);
        COND_SET_VALUE(ki->p_stats, PTRTOUINT64(p->p_stats), allowaddr);
        COND_SET_VALUE(ki->p_limit, PTRTOUINT64(p->p_limit), allowaddr);
        COND_SET_VALUE(ki->p_vmspace, PTRTOUINT64(p->p_vmspace), allowaddr);
        COND_SET_VALUE(ki->p_sigacts, PTRTOUINT64(p->p_sigacts), allowaddr);
        COND_SET_VALUE(ki->p_sess, PTRTOUINT64(p->p_session), allowaddr);
        ki->p_tsess = 0;        /* may be changed if controlling tty below */
        COND_SET_VALUE(ki->p_ru, PTRTOUINT64(&p->p_stats->p_ru), allowaddr);
        ki->p_eflag = 0;
        ki->p_exitsig = p->p_exitsig;
        ki->p_flag = L_INMEM;   /* Process never swapped out */
        ki->p_flag |= sysctl_map_flags(sysctl_flagmap, p->p_flag);
        ki->p_flag |= sysctl_map_flags(sysctl_sflagmap, p->p_sflag);
        ki->p_flag |= sysctl_map_flags(sysctl_slflagmap, p->p_slflag);
        ki->p_flag |= sysctl_map_flags(sysctl_lflagmap, p->p_lflag);
        ki->p_flag |= sysctl_map_flags(sysctl_stflagmap, p->p_stflag);
        ki->p_pid = p->p_pid;
        ki->p_ppid = p->p_ppid;
        ki->p_uid = kauth_cred_geteuid(p->p_cred);
        ki->p_ruid = kauth_cred_getuid(p->p_cred);
        ki->p_gid = kauth_cred_getegid(p->p_cred);
        ki->p_rgid = kauth_cred_getgid(p->p_cred);
        ki->p_svuid = kauth_cred_getsvuid(p->p_cred);
        ki->p_svgid = kauth_cred_getsvgid(p->p_cred);
        ki->p_ngroups = kauth_cred_ngroups(p->p_cred);
        kauth_cred_getgroups(p->p_cred, ki->p_groups,
            uimin(ki->p_ngroups, sizeof(ki->p_groups) / sizeof(ki->p_groups[0])),
            UIO_SYSSPACE);

        ki->p_uticks = p->p_uticks;
        ki->p_sticks = p->p_sticks;
        ki->p_iticks = p->p_iticks;
        ki->p_tpgid = NO_PGID;        /* may be changed if controlling tty below */
        COND_SET_VALUE(ki->p_tracep, PTRTOUINT64(p->p_tracep), allowaddr);
        ki->p_traceflag = p->p_traceflag;

        memcpy(&ki->p_sigignore, &p->p_sigctx.ps_sigignore,sizeof(ki_sigset_t));
        memcpy(&ki->p_sigcatch, &p->p_sigctx.ps_sigcatch, sizeof(ki_sigset_t));

        ki->p_cpticks = 0;
        ki->p_pctcpu = p->p_pctcpu;
        ki->p_estcpu = 0;
        ki->p_stat = p->p_stat; /* Will likely be overridden by LWP status */
        ki->p_realstat = p->p_stat;
        ki->p_nice = p->p_nice;
        ki->p_xstat = P_WAITSTATUS(p);
        ki->p_acflag = p->p_acflag;

        strncpy(ki->p_comm, p->p_comm,
            uimin(sizeof(ki->p_comm), sizeof(p->p_comm)));
        strncpy(ki->p_ename, p->p_emul->e_name, sizeof(ki->p_ename));

        ki->p_nlwps = p->p_nlwps;
        ki->p_realflag = ki->p_flag;

        if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) {
                vm = p->p_vmspace;
                ki->p_vm_rssize = vm_resident_count(vm);
                ki->p_vm_tsize = vm->vm_tsize;
                ki->p_vm_dsize = vm->vm_dsize;
                ki->p_vm_ssize = vm->vm_ssize;
                ki->p_vm_vsize = atop(vm->vm_map.size);
                /*
                 * Since the stack is initially mapped mostly with
                 * PROT_NONE and grown as needed, adjust the "mapped size"
                 * to skip the unused stack portion.
                 */
                ki->p_vm_msize =
                    atop(vm->vm_map.size) - vm->vm_issize + vm->vm_ssize;

                /* Pick the primary (first) LWP */
                l = proc_active_lwp(p);
                KASSERT(l != NULL);
                lwp_lock(l);
                ki->p_nrlwps = p->p_nrlwps;
                ki->p_forw = 0;
                ki->p_back = 0;
                COND_SET_VALUE(ki->p_addr, PTRTOUINT64(l->l_addr), allowaddr);
                ki->p_stat = l->l_stat;
                ki->p_flag |= sysctl_map_flags(sysctl_lwpflagmap, l->l_flag);
                ki->p_swtime = l->l_swtime;
                ki->p_slptime = l->l_slptime;
                if (l->l_stat == LSONPROC)
                        ki->p_schedflags = l->l_cpu->ci_schedstate.spc_flags;
                else
                        ki->p_schedflags = 0;
                ki->p_priority = lwp_eprio(l);
                ki->p_usrpri = l->l_priority;
                if (l->l_wchan)
                        strncpy(ki->p_wmesg, l->l_wmesg, sizeof(ki->p_wmesg));
                COND_SET_VALUE(ki->p_wchan, PTRTOUINT64(l->l_wchan), allowaddr);
                ki->p_cpuid = cpu_index(l->l_cpu);
                lwp_unlock(l);
                LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                        /* This is hardly correct, but... */
                        sigplusset(&l->l_sigpend.sp_set, &ss1);
                        sigplusset(&l->l_sigmask, &ss2);
                        ki->p_cpticks += l->l_cpticks;
                        ki->p_pctcpu += l->l_pctcpu;
                        ki->p_estcpu += l->l_estcpu;
                }
        }
        sigplusset(&p->p_sigpend.sp_set, &ss1);
        memcpy(&ki->p_siglist, &ss1, sizeof(ki_sigset_t));
        memcpy(&ki->p_sigmask, &ss2, sizeof(ki_sigset_t));

        if (p->p_session != NULL) {
                ki->p_sid = p->p_session->s_sid;
                ki->p__pgid = p->p_pgrp->pg_id;
                if (p->p_session->s_ttyvp)
                        ki->p_eflag |= EPROC_CTTY;
                if (SESS_LEADER(p))
                        ki->p_eflag |= EPROC_SLEADER;
                strncpy(ki->p_login, p->p_session->s_login,
                    uimin(sizeof ki->p_login - 1, sizeof p->p_session->s_login));
                ki->p_jobc = p->p_pgrp->pg_jobc;
                if ((p->p_lflag & PL_CONTROLT) && (tp = p->p_session->s_ttyp)) {
                        ki->p_tdev = tp->t_dev;
                        ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
                        COND_SET_VALUE(ki->p_tsess, PTRTOUINT64(tp->t_session),
                            allowaddr);
                } else {
                        ki->p_tdev = (int32_t)NODEV;
                }
        }

        if (!P_ZOMBIE(p) && !zombie) {
                ki->p_uvalid = 1;
                ki->p_ustart_sec = p->p_stats->p_start.tv_sec;
                ki->p_ustart_usec = p->p_stats->p_start.tv_usec;

                calcru(p, &ut, &st, NULL, &rt);
                ki->p_rtime_sec = rt.tv_sec;
                ki->p_rtime_usec = rt.tv_usec;
                ki->p_uutime_sec = ut.tv_sec;
                ki->p_uutime_usec = ut.tv_usec;
                ki->p_ustime_sec = st.tv_sec;
                ki->p_ustime_usec = st.tv_usec;

                memcpy(&ru, &p->p_stats->p_ru, sizeof(ru));
                ki->p_uru_nvcsw = 0;
                ki->p_uru_nivcsw = 0;
                LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
                        ki->p_uru_nvcsw += (l2->l_ncsw - l2->l_nivcsw);
                        ki->p_uru_nivcsw += l2->l_nivcsw;
                        ruadd(&ru, &l2->l_ru);
                }
                ki->p_uru_maxrss = ru.ru_maxrss;
                ki->p_uru_ixrss = ru.ru_ixrss;
                ki->p_uru_idrss = ru.ru_idrss;
                ki->p_uru_isrss = ru.ru_isrss;
                ki->p_uru_minflt = ru.ru_minflt;
                ki->p_uru_majflt = ru.ru_majflt;
                ki->p_uru_nswap = ru.ru_nswap;
                ki->p_uru_inblock = ru.ru_inblock;
                ki->p_uru_oublock = ru.ru_oublock;
                ki->p_uru_msgsnd = ru.ru_msgsnd;
                ki->p_uru_msgrcv = ru.ru_msgrcv;
                ki->p_uru_nsignals = ru.ru_nsignals;

                timeradd(&p->p_stats->p_cru.ru_utime,
                         &p->p_stats->p_cru.ru_stime, &ut);
                ki->p_uctime_sec = ut.tv_sec;
                ki->p_uctime_usec = ut.tv_usec;
        }
}


int
proc_find_locked(struct lwp *l, struct proc **p, pid_t pid)
{
        int error;

        mutex_enter(&proc_lock);
        if (pid == -1)
                *p = l->l_proc;
        else
                *p = proc_find(pid);

        if (*p == NULL) {
                if (pid != -1)
                        mutex_exit(&proc_lock);
                return ESRCH;
        }
        if (pid != -1)
                mutex_enter((*p)->p_lock);
        mutex_exit(&proc_lock);

        error = kauth_authorize_process(l->l_cred,
            KAUTH_PROCESS_CANSEE, *p,
            KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
        if (error) {
                if (pid != -1)
                        mutex_exit((*p)->p_lock);
        }
        return error;
}

static int
fill_pathname(struct lwp *l, pid_t pid, void *oldp, size_t *oldlenp)
{
        int error;
        struct proc *p;

        if ((error = proc_find_locked(l, &p, pid)) != 0)
                return error;

        if (p->p_path == NULL) {
                if (pid != -1)
                        mutex_exit(p->p_lock);
                return ENOENT;
        }

        size_t len = strlen(p->p_path) + 1;
        if (oldp != NULL) {
                size_t copylen = uimin(len, *oldlenp);
                error = sysctl_copyout(l, p->p_path, oldp, copylen);
                if (error == 0 && *oldlenp < len)
                        error = ENOSPC;
        }
        *oldlenp = len;
        if (pid != -1)
                mutex_exit(p->p_lock);
        return error;
}

static int
fill_cwd(struct lwp *l, pid_t pid, void *oldp, size_t *oldlenp)
{
        int error;
        struct proc *p;
        char *path;
        char *bp, *bend;
        struct cwdinfo *cwdi;
        struct vnode *vp;
        size_t len, lenused;

        if ((error = proc_find_locked(l, &p, pid)) != 0)
                return error;

        len = MAXPATHLEN * 4;

        path = kmem_alloc(len, KM_SLEEP);

        bp = &path[len];
        bend = bp;
        *(--bp) = '\0';

        cwdi = p->p_cwdi;
        rw_enter(&cwdi->cwdi_lock, RW_READER);
        vp = cwdi->cwdi_cdir;
        error = getcwd_common(vp, NULL, &bp, path, len/2, 0, l);
        rw_exit(&cwdi->cwdi_lock);

        if (error)
                goto out;

        lenused = bend - bp;

        if (oldp != NULL) {
                size_t copylen = uimin(lenused, *oldlenp);
                error = sysctl_copyout(l, bp, oldp, copylen);
                if (error == 0 && *oldlenp < lenused)
                        error = ENOSPC;
        }
        *oldlenp = lenused;
out:
        if (pid != -1)
                mutex_exit(p->p_lock);
        kmem_free(path, len);
        return error;
}

int
proc_getauxv(struct proc *p, void **buf, size_t *len)
{
        struct ps_strings pss;
        int error;
        void *uauxv, *kauxv;
        size_t size;

        if ((error = copyin_psstrings(p, &pss)) != 0)
                return error;
        if (pss.ps_envstr == NULL)
                return EIO;

        size = p->p_execsw->es_arglen;
        if (size == 0)
                return EIO;

        size_t ptrsz = PROC_PTRSZ(p);
        uauxv = (void *)((char *)pss.ps_envstr + (pss.ps_nenvstr + 1) * ptrsz);

        kauxv = kmem_alloc(size, KM_SLEEP);

        error = copyin_proc(p, uauxv, kauxv, size);
        if (error) {
                kmem_free(kauxv, size);
                return error;
        }

        *buf = kauxv;
        *len = size;

        return 0;
}


static int
sysctl_security_expose_address(SYSCTLFN_ARGS)
{
        int expose_address, error;
        struct sysctlnode node;

        node = *rnode;
        node.sysctl_data = &expose_address;
        expose_address = *(int *)rnode->sysctl_data;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_KERNADDR,
            0, NULL, NULL, NULL))
                return EPERM;

        switch (expose_address) {
        case 0:
        case 1:
        case 2:
                break;
        default:
                return EINVAL;
        }

        *(int *)rnode->sysctl_data = expose_address;

        return 0;
}

bool
get_expose_address(struct proc *p)
{
        /* allow only if sysctl variable is set or privileged */
        return kauth_authorize_process(kauth_cred_get(), KAUTH_PROCESS_CANSEE,
            p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_KPTR), NULL, NULL) == 0;
}



























































   35 








  119 













1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
/*        $NetBSD: byte_swap.h,v 1.8 2021/04/17 20:12:55 rillig Exp $        */

/*-
 * Copyright (c) 1998, 2010 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copy of the i386 version. 64 bit versions may be added later.
 */

#ifndef _AMD64_BYTE_SWAP_H_
#define        _AMD64_BYTE_SWAP_H_

#ifdef __x86_64__

#ifdef  __GNUC__
#include <sys/types.h>
__BEGIN_DECLS

#define        __BYTE_SWAP_U64_VARIABLE __byte_swap_u64_variable
static __inline uint64_t __byte_swap_u64_variable(uint64_t);
static __inline uint64_t
__byte_swap_u64_variable(uint64_t x)
{
        __asm volatile ( "bswap %1" : "=r" (x) : "0" (x));
        return (x);
}

#define        __BYTE_SWAP_U32_VARIABLE __byte_swap_u32_variable
static __inline uint32_t __byte_swap_u32_variable(uint32_t);
static __inline uint32_t
__byte_swap_u32_variable(uint32_t x)
{
        __asm volatile ( "bswap %1" : "=r" (x) : "0" (x));
        return (x);
}

#define        __BYTE_SWAP_U16_VARIABLE __byte_swap_u16_variable
static __inline uint16_t __byte_swap_u16_variable(uint16_t);
static __inline uint16_t
__byte_swap_u16_variable(uint16_t x)
{
        __asm volatile ("rorw $8, %w1" : "=r" (x) : "0" (x));
        return (x);
}

__END_DECLS
#endif

#else        /*        __x86_64__        */

#include <i386/byte_swap.h>

#endif        /*        __x86_64__        */

#endif /* !_AMD64_BYTE_SWAP_H_ */





































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
/*        $NetBSD: in6_var.h,v 1.6 2020/06/12 11:04:45 roy Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#ifndef _COMPAT_NETINET6_IN6_VAR_H_
#define _COMPAT_NETINET6_IN6_VAR_H_

#include <sys/ioccom.h>

struct in6_addrlifetime50 {
        int32_t ia6t_expire;
        int32_t ia6t_preferred;
        u_int32_t ia6t_vltime;
        u_int32_t ia6t_pltime;
};

struct in6_aliasreq50 {
        char        ifra_name[IFNAMSIZ];
        struct        sockaddr_in6 ifra_addr;
        struct        sockaddr_in6 ifra_dstaddr;
        struct        sockaddr_in6 ifra_prefixmask;
        int        ifra_flags;
        struct in6_addrlifetime50 ifra_lifetime;
};

struct prf_ra {
        u_int32_t onlink : 1;
        u_int32_t autonomous : 1;
        u_int32_t router : 1;
        u_int32_t reserved : 5;
};

#define OSIOCAIFADDR_IN6        _IOW('i', 26, struct in6_aliasreq50)
#define OSIOCSIFPHYADDR_IN6        _IOW('i', 70, struct in6_aliasreq50)
#define OSIOCGDRLST_IN6                _IOWR('i', 74, struct in6_drlist)
#define OSIOCGPRLST_IN6                _IOWR('i', 75, struct in6_oprlist)
#define OSIOCGIFINFO_IN6        _IOWR('i', 76, struct in6_ondireq)
#define OSIOCSNDFLUSH_IN6        _IOWR('i', 77, struct in6_ifreq)
#define OSIOCSPFXFLUSH_IN6        _IOWR('i', 79, struct in6_ifreq)
#define OSIOCSRTRFLUSH_IN6        _IOWR('i', 80, struct in6_ifreq)
#define OSIOCGIFALIFETIME_IN6        _IOWR('i', 81, struct in6_ifreq)
#define OSIOCSDEFIFACE_IN6        _IOWR('i', 85, struct in6_ndifreq90)
#define OSIOCGDEFIFACE_IN6        _IOWR('i', 86, struct in6_ndifreq90)
#define OSIOCSIFINFO_FLAGS_90        _IOWR('i', 87, struct in6_ndireq90)
#define OSIOCGIFINFO_IN6_90        _IOWR('i', 108, struct in6_ndireq90)
#define OSIOCSIFINFO_IN6_90        _IOWR('i', 109, struct in6_ndireq90)

static __inline void in6_addrlifetime_to_in6_addrlifetime50(
    struct in6_addrlifetime *al)
{
        struct in6_addrlifetime cp;
        struct in6_addrlifetime50 *oal =
            (struct in6_addrlifetime50 *)(void *)al;
        (void)memcpy(&cp, al, sizeof(cp));
        oal->ia6t_expire = (int32_t)cp.ia6t_expire;
        oal->ia6t_preferred = (int32_t)cp.ia6t_preferred;
        oal->ia6t_vltime = cp.ia6t_vltime;
        oal->ia6t_pltime = cp.ia6t_pltime;
}

static __inline void in6_aliasreq50_to_in6_aliasreq(
    struct in6_aliasreq *ar)
{
        struct in6_aliasreq50 *oar =
            (struct in6_aliasreq50 *)(void *)ar;
        struct in6_aliasreq50 cp;
        memcpy(&cp, oar, sizeof(cp));
        ar->ifra_lifetime.ia6t_expire = cp.ifra_lifetime.ia6t_expire;
        ar->ifra_lifetime.ia6t_preferred = cp.ifra_lifetime.ia6t_preferred;
        ar->ifra_lifetime.ia6t_vltime = cp.ifra_lifetime.ia6t_vltime;
        ar->ifra_lifetime.ia6t_pltime = cp.ifra_lifetime.ia6t_pltime;
}

#endif /* _COMPAT_NETINET6_IN6_VAR_H_ */

























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































 1666 





















   38 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
/*        $NetBSD: cpu.c,v 1.205 2022/08/20 23:48:51 riastradh Exp $        */

/*
 * Copyright (c) 2000-2020 NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Bill Sommerfeld of RedBack Networks Inc, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1999 Stefan Grefen
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by the NetBSD
 *      Foundation, Inc. and its contributors.
 * 4. Neither the name of The NetBSD Foundation nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.205 2022/08/20 23:48:51 riastradh Exp $");

#include "opt_ddb.h"
#include "opt_mpbios.h"                /* for MPDEBUG */
#include "opt_mtrr.h"
#include "opt_multiprocessor.h"
#include "opt_svs.h"

#include "lapic.h"
#include "ioapic.h"
#include "acpica.h"
#include "hpet.h"

#include <sys/param.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/cpu.h>
#include <sys/cpufreq.h>
#include <sys/idle.h>
#include <sys/atomic.h>
#include <sys/reboot.h>
#include <sys/csan.h>

#include <uvm/uvm.h>

#include "acpica.h"                /* for NACPICA, for mp_verbose */

#include <x86/machdep.h>
#include <machine/cpufunc.h>
#include <machine/cpuvar.h>
#include <machine/pmap.h>
#include <machine/vmparam.h>
#if defined(MULTIPROCESSOR)
#include <machine/mpbiosvar.h>
#endif
#include <machine/mpconfig.h>                /* for mp_verbose */
#include <machine/pcb.h>
#include <machine/specialreg.h>
#include <machine/segments.h>
#include <machine/gdt.h>
#include <machine/mtrr.h>
#include <machine/pio.h>
#include <machine/cpu_counter.h>
#include <machine/pmap_private.h>

#include <x86/fpu.h>

#if NACPICA > 0
#include <dev/acpi/acpi_srat.h>
#endif

#if NLAPIC > 0
#include <machine/apicvar.h>
#include <machine/i82489reg.h>
#include <machine/i82489var.h>
#endif

#include <dev/ic/mc146818reg.h>
#include <dev/ic/hpetvar.h>
#include <i386/isa/nvram.h>
#include <dev/isa/isareg.h>

#include "tsc.h"

#ifndef XENPV
#include "hyperv.h"
#if NHYPERV > 0
#include <x86/x86/hypervvar.h>
#endif
#endif

#ifdef XEN
#include <xen/hypervisor.h>
#endif

static int        cpu_match(device_t, cfdata_t, void *);
static void        cpu_attach(device_t, device_t, void *);
static void        cpu_defer(device_t);
static int        cpu_rescan(device_t, const char *, const int *);
static void        cpu_childdetached(device_t, device_t);
static bool        cpu_stop(device_t);
static bool        cpu_suspend(device_t, const pmf_qual_t *);
static bool        cpu_resume(device_t, const pmf_qual_t *);
static bool        cpu_shutdown(device_t, int);

struct cpu_softc {
        device_t sc_dev;                /* device tree glue */
        struct cpu_info *sc_info;        /* pointer to CPU info */
        bool sc_wasonline;
};

#ifdef MULTIPROCESSOR
int mp_cpu_start(struct cpu_info *, paddr_t);
void mp_cpu_start_cleanup(struct cpu_info *);
const struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL,
                                            mp_cpu_start_cleanup };
#endif


CFATTACH_DECL2_NEW(cpu, sizeof(struct cpu_softc),
    cpu_match, cpu_attach, NULL, NULL, cpu_rescan, cpu_childdetached);

/*
 * Statically-allocated CPU info for the primary CPU (or the only
 * CPU, on uniprocessors).  The CPU info list is initialized to
 * point at it.
 */
struct cpu_info cpu_info_primary __aligned(CACHE_LINE_SIZE) = {
        .ci_dev = 0,
        .ci_self = &cpu_info_primary,
        .ci_idepth = -1,
        .ci_curlwp = &lwp0,
        .ci_curldt = -1,
        .ci_kfpu_spl = -1,
};

struct cpu_info *cpu_info_list = &cpu_info_primary;

#ifdef i386
void                cpu_set_tss_gates(struct cpu_info *);
#endif

static void        cpu_init_idle_lwp(struct cpu_info *);

uint32_t cpu_feature[7] __read_mostly; /* X86 CPUID feature bits */
                        /* [0] basic features cpuid.1:%edx
                         * [1] basic features cpuid.1:%ecx (CPUID2_xxx bits)
                         * [2] extended features cpuid:80000001:%edx
                         * [3] extended features cpuid:80000001:%ecx
                         * [4] VIA padlock features
                         * [5] structured extended features cpuid.7:%ebx
                         * [6] structured extended features cpuid.7:%ecx
                         */

#ifdef MULTIPROCESSOR
bool x86_mp_online;
paddr_t mp_trampoline_paddr = MP_TRAMPOLINE;
#endif
#if NLAPIC > 0
static vaddr_t cmos_data_mapping;
#endif
struct cpu_info *cpu_starting;

#ifdef MULTIPROCESSOR
void                cpu_hatch(void *);
static void        cpu_boot_secondary(struct cpu_info *ci);
static void        cpu_start_secondary(struct cpu_info *ci);
#if NLAPIC > 0
static void        cpu_copy_trampoline(paddr_t);
#endif
#endif /* MULTIPROCESSOR */

/*
 * Runs once per boot once multiprocessor goo has been detected and
 * the local APIC on the boot processor has been mapped.
 *
 * Called from lapic_boot_init() (from mpbios_scan()).
 */
#if NLAPIC > 0
void
cpu_init_first(void)
{

        cpu_info_primary.ci_cpuid = lapic_cpu_number();

        cmos_data_mapping = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_VAONLY);
        if (cmos_data_mapping == 0)
                panic("No KVA for page 0");
        pmap_kenter_pa(cmos_data_mapping, 0, VM_PROT_READ|VM_PROT_WRITE, 0);
        pmap_update(pmap_kernel());
}
#endif

static int
cpu_match(device_t parent, cfdata_t match, void *aux)
{

        return 1;
}

#ifdef __HAVE_PCPU_AREA
void
cpu_pcpuarea_init(struct cpu_info *ci)
{
        struct vm_page *pg;
        size_t i, npages;
        vaddr_t base, va;
        paddr_t pa;

        CTASSERT(sizeof(struct pcpu_entry) % PAGE_SIZE == 0);

        npages = sizeof(struct pcpu_entry) / PAGE_SIZE;
        base = (vaddr_t)&pcpuarea->ent[cpu_index(ci)];

        for (i = 0; i < npages; i++) {
                pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
                if (pg == NULL) {
                        panic("failed to allocate pcpu PA");
                }

                va = base + i * PAGE_SIZE;
                pa = VM_PAGE_TO_PHYS(pg);

                pmap_kenter_pa(va, pa, VM_PROT_READ|VM_PROT_WRITE, 0);
        }

        pmap_update(pmap_kernel());
}
#endif

static void
cpu_vm_init(struct cpu_info *ci)
{
        unsigned int ncolors = 2;

        /*
         * XXX: for AP's the cache info has not been initialized yet
         * but that does not matter because uvm only pays attention at
         * the maximum only. We should fix it once cpus have different
         * cache sizes.
         */
        for (unsigned int i = CAI_ICACHE; i <= CAI_L2CACHE; i++) {
                struct x86_cache_info *cai;
                unsigned int tcolors;

                cai = &ci->ci_cinfo[i];

                tcolors = atop(cai->cai_totalsize);
                switch (cai->cai_associativity) {
                case 0xff:
                        tcolors = 1; /* fully associative */
                        break;
                case 0:
                case 1:
                        break;
                default:
                        tcolors /= cai->cai_associativity;
                }
                if (tcolors <= ncolors)
                        continue;
                ncolors = tcolors;
        }

        /*
         * If the desired number of colors is not a power of
         * two, it won't be good.  Find the greatest power of
         * two which is an even divisor of the number of colors,
         * to preserve even coloring of pages.
         */
        if (ncolors & (ncolors - 1) ) {
                unsigned int try, picked = 1;
                for (try = 1; try < ncolors; try *= 2) {
                        if (ncolors % try == 0) picked = try;
                }
                if (picked == 1) {
                        panic("desired number of cache colors %u is "
                        " > 1, but not even!", ncolors);
                }
                ncolors = picked;
        }

        /*
         * Knowing the size of the largest cache on this CPU, potentially
         * re-color our pages.
         */
        aprint_debug_dev(ci->ci_dev, "%d page colors\n", ncolors);
        uvm_page_recolor(ncolors);

        pmap_tlb_cpu_init(ci);
#ifndef __HAVE_DIRECT_MAP
        pmap_vpage_cpu_init(ci);
#endif
}

static void
cpu_attach(device_t parent, device_t self, void *aux)
{
        struct cpu_softc *sc = device_private(self);
        struct cpu_attach_args *caa = aux;
        struct cpu_info *ci;
        uintptr_t ptr;
#if NLAPIC > 0
        int cpunum = caa->cpu_number;
#endif
        static bool again;

        sc->sc_dev = self;

        if (ncpu > maxcpus) {
#ifndef _LP64
                aprint_error(": too many CPUs, please use NetBSD/amd64\n");
#else
                aprint_error(": too many CPUs\n");
#endif
                return;
        }

        /*
         * If we're an Application Processor, allocate a cpu_info
         * structure, otherwise use the primary's.
         */
        if (caa->cpu_role == CPU_ROLE_AP) {
                if ((boothowto & RB_MD1) != 0) {
                        aprint_error(": multiprocessor boot disabled\n");
                        if (!pmf_device_register(self, NULL, NULL))
                                aprint_error_dev(self,
                                    "couldn't establish power handler\n");
                        return;
                }
                aprint_naive(": Application Processor\n");
                ptr = (uintptr_t)uvm_km_alloc(kernel_map,
                    sizeof(*ci) + CACHE_LINE_SIZE - 1, 0,
                    UVM_KMF_WIRED|UVM_KMF_ZERO);
                ci = (struct cpu_info *)roundup2(ptr, CACHE_LINE_SIZE);
                ci->ci_curldt = -1;
        } else {
                aprint_naive(": %s Processor\n",
                    caa->cpu_role == CPU_ROLE_SP ? "Single" : "Boot");
                ci = &cpu_info_primary;
#if NLAPIC > 0
                if (cpunum != lapic_cpu_number()) {
                        /* XXX should be done earlier. */
                        uint32_t reg;
                        aprint_verbose("\n");
                        aprint_verbose_dev(self, "running CPU at apic %d"
                            " instead of at expected %d", lapic_cpu_number(),
                            cpunum);
                        reg = lapic_readreg(LAPIC_ID);
                        lapic_writereg(LAPIC_ID, (reg & ~LAPIC_ID_MASK) |
                            (cpunum << LAPIC_ID_SHIFT));
                }
                if (cpunum != lapic_cpu_number()) {
                        aprint_error_dev(self, "unable to reset apic id\n");
                }
#endif
        }

        ci->ci_self = ci;
        sc->sc_info = ci;
        ci->ci_dev = self;
        ci->ci_acpiid = caa->cpu_id;
        ci->ci_cpuid = caa->cpu_number;
        ci->ci_func = caa->cpu_func;
        ci->ci_kfpu_spl = -1;
        aprint_normal("\n");

        /* Must be before mi_cpu_attach(). */
        cpu_vm_init(ci);

        if (caa->cpu_role == CPU_ROLE_AP) {
                int error;

                error = mi_cpu_attach(ci);
                if (error != 0) {
                        aprint_error_dev(self,
                            "mi_cpu_attach failed with %d\n", error);
                        return;
                }
#ifdef __HAVE_PCPU_AREA
                cpu_pcpuarea_init(ci);
#endif
                cpu_init_tss(ci);
        } else {
                KASSERT(ci->ci_data.cpu_idlelwp != NULL);
#if NACPICA > 0
                /* Parse out NUMA info for cpu_identify(). */
                acpisrat_init();
#endif
        }

#ifdef SVS
        cpu_svs_init(ci);
#endif

        pmap_reference(pmap_kernel());
        ci->ci_pmap = pmap_kernel();
        ci->ci_tlbstate = TLBSTATE_STALE;

        /*
         * Boot processor may not be attached first, but the below
         * must be done to allow booting other processors.
         */
        if (!again) {
                /* Make sure DELAY() (likely i8254_delay()) is initialized. */
                DELAY(1);

                /*
                 * Basic init.  Compute an approximate frequency for the TSC
                 * using the i8254.  If there's a HPET we'll redo it later.
                 */
                atomic_or_32(&ci->ci_flags, CPUF_PRESENT | CPUF_PRIMARY);
                cpu_intr_init(ci);
                tsc_setfunc(ci);
                cpu_get_tsc_freq(ci);
                cpu_init(ci);
#ifdef i386
                cpu_set_tss_gates(ci);
#endif
                pmap_cpu_init_late(ci);
#if NLAPIC > 0
                if (caa->cpu_role != CPU_ROLE_SP) {
                        /* Enable lapic. */
                        lapic_enable();
                        lapic_set_lvt();
                        if (!vm_guest_is_xenpvh_or_pvhvm())
                                lapic_calibrate_timer(false);
                }
#endif
                kcsan_cpu_init(ci);
                again = true;
        }

        /* further PCB init done later. */

        switch (caa->cpu_role) {
        case CPU_ROLE_SP:
                atomic_or_32(&ci->ci_flags, CPUF_SP);
                cpu_identify(ci);
                x86_errata();
                x86_cpu_idle_init();
#ifdef XENPVHVM
                xen_hvm_init_cpu(ci);
#endif
                break;

        case CPU_ROLE_BP:
                atomic_or_32(&ci->ci_flags, CPUF_BSP);
                cpu_identify(ci);
                x86_errata();
                x86_cpu_idle_init();
#ifdef XENPVHVM
                xen_hvm_init_cpu(ci);
#endif
                break;

#ifdef MULTIPROCESSOR
        case CPU_ROLE_AP:
                /*
                 * report on an AP
                 */
                cpu_intr_init(ci);
                idt_vec_init_cpu_md(&ci->ci_idtvec, cpu_index(ci));
                gdt_alloc_cpu(ci);
#ifdef i386
                cpu_set_tss_gates(ci);
#endif
                pmap_cpu_init_late(ci);
                cpu_start_secondary(ci);
                if (ci->ci_flags & CPUF_PRESENT) {
                        struct cpu_info *tmp;

                        cpu_identify(ci);
                        tmp = cpu_info_list;
                        while (tmp->ci_next)
                                tmp = tmp->ci_next;

                        tmp->ci_next = ci;
                }
                break;
#endif

        default:
                panic("unknown processor type??\n");
        }

        pat_init(ci);

        if (!pmf_device_register1(self, cpu_suspend, cpu_resume, cpu_shutdown))
                aprint_error_dev(self, "couldn't establish power handler\n");

#ifdef MULTIPROCESSOR
        if (mp_verbose) {
                struct lwp *l = ci->ci_data.cpu_idlelwp;
                struct pcb *pcb = lwp_getpcb(l);

                aprint_verbose_dev(self,
                    "idle lwp at %p, idle sp at %p\n",
                    l,
#ifdef i386
                    (void *)pcb->pcb_esp
#else
                    (void *)pcb->pcb_rsp
#endif
                );
        }
#endif

        /*
         * Postpone the "cpufeaturebus" scan.
         * It is safe to scan the pseudo-bus
         * only after all CPUs have attached.
         */
        (void)config_defer(self, cpu_defer);
}

static void
cpu_defer(device_t self)
{
        cpu_rescan(self, NULL, NULL);
}

static int
cpu_rescan(device_t self, const char *ifattr, const int *locators)
{
        struct cpu_softc *sc = device_private(self);
        struct cpufeature_attach_args cfaa;
        struct cpu_info *ci = sc->sc_info;

        /*
         * If we booted with RB_MD1 to disable multiprocessor, the
         * auto-configuration data still contains the additional
         * CPUs.   But their initialization was mostly bypassed
         * during attach, so we have to make sure we don't look at
         * their featurebus info, since it wasn't retrieved.
         */
        if (ci == NULL)
                return 0;

        memset(&cfaa, 0, sizeof(cfaa));
        cfaa.ci = ci;

        if (ifattr_match(ifattr, "cpufeaturebus")) {
                if (ci->ci_frequency == NULL) {
                        cfaa.name = "frequency";
                        ci->ci_frequency =
                            config_found(self, &cfaa, NULL,
                                         CFARGS(.iattr = "cpufeaturebus"));
                }

                if (ci->ci_padlock == NULL) {
                        cfaa.name = "padlock";
                        ci->ci_padlock =
                            config_found(self, &cfaa, NULL,
                                         CFARGS(.iattr = "cpufeaturebus"));
                }

                if (ci->ci_temperature == NULL) {
                        cfaa.name = "temperature";
                        ci->ci_temperature =
                            config_found(self, &cfaa, NULL,
                                         CFARGS(.iattr = "cpufeaturebus"));
                }

                if (ci->ci_vm == NULL) {
                        cfaa.name = "vm";
                        ci->ci_vm =
                            config_found(self, &cfaa, NULL,
                                         CFARGS(.iattr = "cpufeaturebus"));
                }
        }

        return 0;
}

static void
cpu_childdetached(device_t self, device_t child)
{
        struct cpu_softc *sc = device_private(self);
        struct cpu_info *ci = sc->sc_info;

        if (ci->ci_frequency == child)
                ci->ci_frequency = NULL;

        if (ci->ci_padlock == child)
                ci->ci_padlock = NULL;

        if (ci->ci_temperature == child)
                ci->ci_temperature = NULL;

        if (ci->ci_vm == child)
                ci->ci_vm = NULL;
}

/*
 * Initialize the processor appropriately.
 */

void
cpu_init(struct cpu_info *ci)
{
        extern int x86_fpu_save;
        uint32_t cr4 = 0;

        lcr0(rcr0() | CR0_WP);

        /* If global TLB caching is supported, enable it */
        if (cpu_feature[0] & CPUID_PGE)
                cr4 |= CR4_PGE;

        /*
         * If we have FXSAVE/FXRESTOR, use them.
         */
        if (cpu_feature[0] & CPUID_FXSR) {
                cr4 |= CR4_OSFXSR;

                /*
                 * If we have SSE/SSE2, enable XMM exceptions.
                 */
                if (cpu_feature[0] & (CPUID_SSE|CPUID_SSE2))
                        cr4 |= CR4_OSXMMEXCPT;
        }

        /* If xsave is supported, enable it */
        if (cpu_feature[1] & CPUID2_XSAVE)
                cr4 |= CR4_OSXSAVE;

        /* If SMEP is supported, enable it */
        if (cpu_feature[5] & CPUID_SEF_SMEP)
                cr4 |= CR4_SMEP;

        /* If SMAP is supported, enable it */
        if (cpu_feature[5] & CPUID_SEF_SMAP)
                cr4 |= CR4_SMAP;

#ifdef SVS
        /* If PCID is supported, enable it */
        if (svs_pcid)
                cr4 |= CR4_PCIDE;
#endif

        if (cr4) {
                cr4 |= rcr4();
                lcr4(cr4);
        }

        /*
         * Changing CR4 register may change cpuid values. For example, setting
         * CR4_OSXSAVE sets CPUID2_OSXSAVE. The CPUID2_OSXSAVE is in
         * ci_feat_val[1], so update it.
         * XXX Other than ci_feat_val[1] might be changed.
         */
        if (cpuid_level >= 1) {
                u_int descs[4];

                x86_cpuid(1, descs);
                ci->ci_feat_val[1] = descs[2];
        }

        if (x86_fpu_save >= FPU_SAVE_FXSAVE) {
                fpuinit_mxcsr_mask();
        }

        /* If xsave is enabled, enable all fpu features */
        if (cr4 & CR4_OSXSAVE)
                wrxcr(0, x86_xsave_features & XCR0_FPU);

#ifdef MTRR
        /*
         * On a P6 or above, initialize MTRR's if the hardware supports them.
         */
        if (cpu_feature[0] & CPUID_MTRR) {
                if ((ci->ci_flags & CPUF_AP) == 0)
                        i686_mtrr_init_first();
                mtrr_init_cpu(ci);
        }

#ifdef i386
        if (strcmp((char *)(ci->ci_vendor), "AuthenticAMD") == 0) {
                /*
                 * Must be a K6-2 Step >= 7 or a K6-III.
                 */
                if (CPUID_TO_FAMILY(ci->ci_signature) == 5) {
                        if (CPUID_TO_MODEL(ci->ci_signature) > 8 ||
                            (CPUID_TO_MODEL(ci->ci_signature) == 8 &&
                             CPUID_TO_STEPPING(ci->ci_signature) >= 7)) {
                                mtrr_funcs = &k6_mtrr_funcs;
                                k6_mtrr_init_first();
                                mtrr_init_cpu(ci);
                        }
                }
        }
#endif        /* i386 */
#endif /* MTRR */

        if (ci != &cpu_info_primary) {
                /* Synchronize TSC */
                atomic_or_32(&ci->ci_flags, CPUF_RUNNING);
                tsc_sync_ap(ci);
        } else {
                atomic_or_32(&ci->ci_flags, CPUF_RUNNING);
        }
}

#ifdef MULTIPROCESSOR
void
cpu_boot_secondary_processors(void)
{
        struct cpu_info *ci;
        kcpuset_t *cpus;
        u_long i;

        /* Now that we know the number of CPUs, patch the text segment. */
        x86_patch(false);

#if NACPICA > 0
        /* Finished with NUMA info for now. */
        acpisrat_exit();
#endif

        kcpuset_create(&cpus, true);
        kcpuset_set(cpus, cpu_index(curcpu()));
        for (i = 0; i < maxcpus; i++) {
                ci = cpu_lookup(i);
                if (ci == NULL)
                        continue;
                if (ci->ci_data.cpu_idlelwp == NULL)
                        continue;
                if ((ci->ci_flags & CPUF_PRESENT) == 0)
                        continue;
                if (ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY))
                        continue;
                cpu_boot_secondary(ci);
                kcpuset_set(cpus, cpu_index(ci));
        }
        while (!kcpuset_match(cpus, kcpuset_running))
                ;
        kcpuset_destroy(cpus);

        x86_mp_online = true;

        /* Now that we know about the TSC, attach the timecounter. */
        tsc_tc_init();
}
#endif

static void
cpu_init_idle_lwp(struct cpu_info *ci)
{
        struct lwp *l = ci->ci_data.cpu_idlelwp;
        struct pcb *pcb = lwp_getpcb(l);

        pcb->pcb_cr0 = rcr0();
}

void
cpu_init_idle_lwps(void)
{
        struct cpu_info *ci;
        u_long i;

        for (i = 0; i < maxcpus; i++) {
                ci = cpu_lookup(i);
                if (ci == NULL)
                        continue;
                if (ci->ci_data.cpu_idlelwp == NULL)
                        continue;
                if ((ci->ci_flags & CPUF_PRESENT) == 0)
                        continue;
                cpu_init_idle_lwp(ci);
        }
}

#ifdef MULTIPROCESSOR
void
cpu_start_secondary(struct cpu_info *ci)
{
        u_long psl;
        int i;

#if NLAPIC > 0
        paddr_t mp_pdirpa;
        mp_pdirpa = pmap_init_tmp_pgtbl(mp_trampoline_paddr);
        cpu_copy_trampoline(mp_pdirpa);
#endif

        atomic_or_32(&ci->ci_flags, CPUF_AP);
        ci->ci_curlwp = ci->ci_data.cpu_idlelwp;
        if (CPU_STARTUP(ci, mp_trampoline_paddr) != 0) {
                return;
        }

        /*
         * Wait for it to become ready.   Setting cpu_starting opens the
         * initial gate and allows the AP to start soft initialization.
         */
        KASSERT(cpu_starting == NULL);
        cpu_starting = ci;
        for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT)) && i > 0; i--) {
                delay_func(10);
        }

        if ((ci->ci_flags & CPUF_PRESENT) == 0) {
                aprint_error_dev(ci->ci_dev, "failed to become ready\n");
#if defined(MPDEBUG) && defined(DDB)
                printf("dropping into debugger; continue from here to resume boot\n");
                Debugger();
#endif
        } else {
                /*
                 * Synchronize time stamp counters. Invalidate cache and do
                 * twice (in tsc_sync_bp) to minimize possible cache effects.
                 * Disable interrupts to try and rule out any external
                 * interference.
                 */
                psl = x86_read_psl();
                x86_disable_intr();
                tsc_sync_bp(ci);
                x86_write_psl(psl);
        }

        CPU_START_CLEANUP(ci);
        cpu_starting = NULL;
}

void
cpu_boot_secondary(struct cpu_info *ci)
{
        int64_t drift;
        u_long psl;
        int i;

        atomic_or_32(&ci->ci_flags, CPUF_GO);
        for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) {
                delay_func(10);
        }
        if ((ci->ci_flags & CPUF_RUNNING) == 0) {
                aprint_error_dev(ci->ci_dev, "failed to start\n");
#if defined(MPDEBUG) && defined(DDB)
                printf("dropping into debugger; continue from here to resume boot\n");
                Debugger();
#endif
        } else {
                /* Synchronize TSC again, check for drift. */
                drift = ci->ci_data.cpu_cc_skew;
                psl = x86_read_psl();
                x86_disable_intr();
                tsc_sync_bp(ci);
                x86_write_psl(psl);
                drift -= ci->ci_data.cpu_cc_skew;
                aprint_debug_dev(ci->ci_dev, "TSC skew=%lld drift=%lld\n",
                    (long long)ci->ci_data.cpu_cc_skew, (long long)drift);
                tsc_sync_drift(drift);
        }
}

/*
 * The CPU ends up here when it's ready to run.
 * This is called from code in mptramp.s; at this point, we are running
 * in the idle pcb/idle stack of the new CPU.  When this function returns,
 * this processor will enter the idle loop and start looking for work.
 */
void
cpu_hatch(void *v)
{
        struct cpu_info *ci = (struct cpu_info *)v;
        struct pcb *pcb;
        int s, i;

        /* ------------------------------------------------------------- */

        /*
         * This section of code must be compiled with SSP disabled, to
         * prevent a race against cpu0. See sys/conf/ssp.mk.
         */

        cpu_init_msrs(ci, true);
        cpu_probe(ci);
        cpu_speculation_init(ci);
#if NHYPERV > 0
        hyperv_init_cpu(ci);
#endif

        ci->ci_data.cpu_cc_freq = cpu_info_primary.ci_data.cpu_cc_freq;
        /* cpu_get_tsc_freq(ci); */

        KDASSERT((ci->ci_flags & CPUF_PRESENT) == 0);

        /*
         * Synchronize the TSC for the first time. Note that interrupts are
         * off at this point.
         */
        atomic_or_32(&ci->ci_flags, CPUF_PRESENT);
        tsc_sync_ap(ci);

        /* ------------------------------------------------------------- */

        /*
         * Wait to be brought online.
         *
         * Use MONITOR/MWAIT if available. These instructions put the CPU in
         * a low consumption mode (C-state), and if the TSC is not invariant,
         * this causes the TSC to drift. We want this to happen, so that we
         * can later detect (in tsc_tc_init) any abnormal drift with invariant
         * TSCs. That's just for safety; by definition such drifts should
         * never occur with invariant TSCs.
         *
         * If not available, try PAUSE. We'd like to use HLT, but we have
         * interrupts off.
         */
        while ((ci->ci_flags & CPUF_GO) == 0) {
                if ((cpu_feature[1] & CPUID2_MONITOR) != 0) {
                        x86_monitor(&ci->ci_flags, 0, 0);
                        if ((ci->ci_flags & CPUF_GO) != 0) {
                                continue;
                        }
                        x86_mwait(0, 0);
                } else {
        /*
         * XXX The loop repetition count could be a lot higher, but
         * XXX currently qemu emulator takes a _very_long_time_ to
         * XXX execute the pause instruction.  So for now, use a low
         * XXX value to allow the cpu to hatch before timing out.
         */
                        for (i = 50; i != 0; i--) {
                                x86_pause();
                        }
                }
        }

        /* Because the text may have been patched in x86_patch(). */
        wbinvd();
        x86_flush();
        tlbflushg();

        KASSERT((ci->ci_flags & CPUF_RUNNING) == 0);

#ifdef PAE
        pd_entry_t * l3_pd = ci->ci_pae_l3_pdir;
        for (i = 0 ; i < PDP_SIZE; i++) {
                l3_pd[i] = pmap_kernel()->pm_pdirpa[i] | PTE_P;
        }
        lcr3(ci->ci_pae_l3_pdirpa);
#else
        lcr3(pmap_pdirpa(pmap_kernel(), 0));
#endif

        pcb = lwp_getpcb(curlwp);
        pcb->pcb_cr3 = rcr3();
        pcb = lwp_getpcb(ci->ci_data.cpu_idlelwp);
        lcr0(pcb->pcb_cr0);

        cpu_init_idt(ci);
        gdt_init_cpu(ci);
#if NLAPIC > 0
        lapic_enable();
        lapic_set_lvt();
#endif

        fpuinit(ci);
        lldt(GSYSSEL(GLDT_SEL, SEL_KPL));
        ltr(ci->ci_tss_sel);

        /*
         * cpu_init will re-synchronize the TSC, and will detect any abnormal
         * drift that would have been caused by the use of MONITOR/MWAIT
         * above.
         */
        cpu_init(ci);
#ifdef XENPVHVM
        xen_hvm_init_cpu(ci);
#endif
        (*x86_initclock_func)();
        cpu_get_tsc_freq(ci);

        s = splhigh();
#if NLAPIC > 0
        lapic_write_tpri(0);
#endif
        x86_enable_intr();
        splx(s);
        x86_errata();

        aprint_debug_dev(ci->ci_dev, "running\n");

        kcsan_cpu_init(ci);

        idle_loop(NULL);
        KASSERT(false);
}
#endif

#if defined(DDB)

#include <ddb/db_output.h>
#include <machine/db_machdep.h>

/*
 * Dump CPU information from ddb.
 */
void
cpu_debug_dump(void)
{
        struct cpu_info *ci;
        CPU_INFO_ITERATOR cii;
        const char sixtyfour64space[] =
#ifdef _LP64
                           "        "
#endif
                           "";

        db_printf("addr                %sdev        id        flags        ipis        spl curlwp                 "
                  "\n", sixtyfour64space);
        for (CPU_INFO_FOREACH(cii, ci)) {
                db_printf("%p        %s        %ld        %x        %x        %d  %10p\n",
                    ci,
                    ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev),
                    (long)ci->ci_cpuid,
                    ci->ci_flags, ci->ci_ipis, ci->ci_ilevel,
                    ci->ci_curlwp);
        }
}
#endif

#ifdef MULTIPROCESSOR
#if NLAPIC > 0
static void
cpu_copy_trampoline(paddr_t pdir_pa)
{
        extern uint32_t nox_flag;
        extern u_char cpu_spinup_trampoline[];
        extern u_char cpu_spinup_trampoline_end[];
        vaddr_t mp_trampoline_vaddr;
        struct {
                uint32_t large;
                uint32_t nox;
                uint32_t pdir;
        } smp_data;
        CTASSERT(sizeof(smp_data) == 3 * 4);

        smp_data.large = (pmap_largepages != 0);
        smp_data.nox = nox_flag;
        smp_data.pdir = (uint32_t)(pdir_pa & 0xFFFFFFFF);

        /* Enter the physical address */
        mp_trampoline_vaddr = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
            UVM_KMF_VAONLY);
        pmap_kenter_pa(mp_trampoline_vaddr, mp_trampoline_paddr,
            VM_PROT_READ | VM_PROT_WRITE, 0);
        pmap_update(pmap_kernel());

        /* Copy boot code */
        memcpy((void *)mp_trampoline_vaddr,
            cpu_spinup_trampoline,
            cpu_spinup_trampoline_end - cpu_spinup_trampoline);

        /* Copy smp_data at the end */
        memcpy((void *)(mp_trampoline_vaddr + PAGE_SIZE - sizeof(smp_data)),
            &smp_data, sizeof(smp_data));

        pmap_kremove(mp_trampoline_vaddr, PAGE_SIZE);
        pmap_update(pmap_kernel());
        uvm_km_free(kernel_map, mp_trampoline_vaddr, PAGE_SIZE, UVM_KMF_VAONLY);
}
#endif

int
mp_cpu_start(struct cpu_info *ci, paddr_t target)
{
        int error;

        /*
         * Bootstrap code must be addressable in real mode
         * and it must be page aligned.
         */
        KASSERT(target < 0x10000 && target % PAGE_SIZE == 0);

        /*
         * "The BSP must initialize CMOS shutdown code to 0Ah ..."
         */

        outb(IO_RTC, NVRAM_RESET);
        outb(IO_RTC+1, NVRAM_RESET_JUMP);

#if NLAPIC > 0
        /*
         * "and the warm reset vector (DWORD based at 40:67) to point
         * to the AP startup code ..."
         */
        unsigned short dwordptr[2];
        dwordptr[0] = 0;
        dwordptr[1] = target >> 4;

        memcpy((uint8_t *)cmos_data_mapping + 0x467, dwordptr, 4);
#endif

        if ((cpu_feature[0] & CPUID_APIC) == 0) {
                aprint_error("mp_cpu_start: CPU does not have APIC\n");
                return ENODEV;
        }

        /*
         * ... prior to executing the following sequence:".  We'll also add in
         * local cache flush, in case the BIOS has left the AP with its cache
         * disabled.  It may not be able to cope with MP coherency.
         */
        wbinvd();

        if (ci->ci_flags & CPUF_AP) {
                error = x86_ipi_init(ci->ci_cpuid);
                if (error != 0) {
                        aprint_error_dev(ci->ci_dev, "%s: IPI not taken (1)\n",
                            __func__);
                        return error;
                }
                delay_func(10000);

                error = x86_ipi_startup(ci->ci_cpuid, target / PAGE_SIZE);
                if (error != 0) {
                        aprint_error_dev(ci->ci_dev, "%s: IPI not taken (2)\n",
                            __func__);
                        return error;
                }
                delay_func(200);

                error = x86_ipi_startup(ci->ci_cpuid, target / PAGE_SIZE);
                if (error != 0) {
                        aprint_error_dev(ci->ci_dev, "%s: IPI not taken (3)\n",
                            __func__);
                        return error;
                }
                delay_func(200);
        }

        return 0;
}

void
mp_cpu_start_cleanup(struct cpu_info *ci)
{
        /*
         * Ensure the NVRAM reset byte contains something vaguely sane.
         */

        outb(IO_RTC, NVRAM_RESET);
        outb(IO_RTC+1, NVRAM_RESET_RST);
}
#endif

#ifdef __x86_64__
typedef void (vector)(void);
extern vector Xsyscall, Xsyscall32, Xsyscall_svs;
#endif

void
cpu_init_msrs(struct cpu_info *ci, bool full)
{
#ifdef __x86_64__
        wrmsr(MSR_STAR,
            ((uint64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
            ((uint64_t)LSEL(LSYSRETBASE_SEL, SEL_UPL) << 48));
        wrmsr(MSR_LSTAR, (uint64_t)Xsyscall);
        wrmsr(MSR_CSTAR, (uint64_t)Xsyscall32);
        wrmsr(MSR_SFMASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D|PSL_AC);

#ifdef SVS
        if (svs_enabled)
                wrmsr(MSR_LSTAR, (uint64_t)Xsyscall_svs);
#endif

        if (full) {
                wrmsr(MSR_FSBASE, 0);
                wrmsr(MSR_GSBASE, (uint64_t)ci);
                wrmsr(MSR_KERNELGSBASE, 0);
        }
#endif        /* __x86_64__ */

        if (cpu_feature[2] & CPUID_NOX)
                wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE);
}

void
cpu_offline_md(void)
{
        return;
}

/* XXX joerg restructure and restart CPUs individually */
static bool
cpu_stop(device_t dv)
{
        struct cpu_softc *sc = device_private(dv);
        struct cpu_info *ci = sc->sc_info;
        int err;

        KASSERT((ci->ci_flags & CPUF_PRESENT) != 0);

        if (CPU_IS_PRIMARY(ci))
                return true;

        if (ci->ci_data.cpu_idlelwp == NULL)
                return true;

        sc->sc_wasonline = !(ci->ci_schedstate.spc_flags & SPCF_OFFLINE);

        if (sc->sc_wasonline) {
                mutex_enter(&cpu_lock);
                err = cpu_setstate(ci, false);
                mutex_exit(&cpu_lock);

                if (err != 0)
                        return false;
        }

        return true;
}

static bool
cpu_suspend(device_t dv, const pmf_qual_t *qual)
{
        struct cpu_softc *sc = device_private(dv);
        struct cpu_info *ci = sc->sc_info;

        if ((ci->ci_flags & CPUF_PRESENT) == 0)
                return true;
        else {
                cpufreq_suspend(ci);
        }

        return cpu_stop(dv);
}

static bool
cpu_resume(device_t dv, const pmf_qual_t *qual)
{
        struct cpu_softc *sc = device_private(dv);
        struct cpu_info *ci = sc->sc_info;
        int err = 0;

        if ((ci->ci_flags & CPUF_PRESENT) == 0)
                return true;

        if (CPU_IS_PRIMARY(ci))
                goto out;

        if (ci->ci_data.cpu_idlelwp == NULL)
                goto out;

        if (sc->sc_wasonline) {
                mutex_enter(&cpu_lock);
                err = cpu_setstate(ci, true);
                mutex_exit(&cpu_lock);
        }

out:
        if (err != 0)
                return false;

        cpufreq_resume(ci);

        return true;
}

static bool
cpu_shutdown(device_t dv, int how)
{
        struct cpu_softc *sc = device_private(dv);
        struct cpu_info *ci = sc->sc_info;

        if ((ci->ci_flags & CPUF_BSP) != 0)
                return false;

        if ((ci->ci_flags & CPUF_PRESENT) == 0)
                return true;

        return cpu_stop(dv);
}

/* Get the TSC frequency and set it to ci->ci_data.cpu_cc_freq. */
void
cpu_get_tsc_freq(struct cpu_info *ci)
{
        uint64_t freq = 0, freq_from_cpuid, t0, t1;
        int64_t overhead;

        if (CPU_IS_PRIMARY(ci) && cpu_hascounter()) {
                /*
                 * If it's the first call of this function, try to get TSC
                 * freq from CPUID by calling cpu_tsc_freq_cpuid().
                 * The function also set lapic_per_second variable if it's
                 * known. This is required for Intel's Comet Lake and newer
                 * processors to set LAPIC timer correctly.
                 */
                if (ci->ci_data.cpu_cc_freq == 0)
                        freq = freq_from_cpuid = cpu_tsc_freq_cpuid(ci);
                if (freq != 0)
                        aprint_debug_dev(ci->ci_dev, "TSC freq "
                            "from CPUID %" PRIu64 " Hz\n", freq);
#if NHPET > 0
                if (freq == 0) {
                        freq = hpet_tsc_freq();
                        if (freq != 0)
                                aprint_debug_dev(ci->ci_dev, "TSC freq "
                                    "from HPET %" PRIu64 " Hz\n", freq);
                }
#endif
                if (freq == 0) {
                        /*
                         * Work out the approximate overhead involved below.
                         * Discard the result of the first go around the
                         * loop.
                         */
                        overhead = 0;
                        for (int i = 0; i <= 8; i++) {
                                const int s = splhigh();
                                t0 = cpu_counter();
                                delay_func(0);
                                t1 = cpu_counter();
                                splx(s);
                                if (i > 0) {
                                        overhead += (t1 - t0);
                                }
                        }
                        overhead >>= 3;

                        /*
                         * Now do the calibration.
                         */
                        freq = 0;
                        for (int i = 0; i < 1000; i++) {
                                const int s = splhigh();
                                t0 = cpu_counter();
                                delay_func(100);
                                t1 = cpu_counter();
                                splx(s);
                                freq += t1 - t0 - overhead;
                        }
                        freq = freq * 10;

                        aprint_debug_dev(ci->ci_dev, "TSC freq "
                            "from delay %" PRIu64 " Hz\n", freq);
                }
                if (ci->ci_data.cpu_cc_freq != 0) {
                        freq_from_cpuid = cpu_tsc_freq_cpuid(ci);
                        if ((freq_from_cpuid != 0)
                            && (freq != freq_from_cpuid))
                                aprint_verbose_dev(ci->ci_dev, "TSC freq "
                                    "calibrated %" PRIu64 " Hz\n", freq);
                }
        } else {
                freq = cpu_info_primary.ci_data.cpu_cc_freq;
        }

        ci->ci_data.cpu_cc_freq = freq;
}

void
x86_cpu_idle_mwait(void)
{
        struct cpu_info *ci = curcpu();

        KASSERT(ci->ci_ilevel == IPL_NONE);

        x86_monitor(&ci->ci_want_resched, 0, 0);
        if (__predict_false(ci->ci_want_resched)) {
                return;
        }
        x86_mwait(0, 0);
}

void
x86_cpu_idle_halt(void)
{
        struct cpu_info *ci = curcpu();

        KASSERT(ci->ci_ilevel == IPL_NONE);

        x86_disable_intr();
        if (!__predict_false(ci->ci_want_resched)) {
                x86_stihlt();
        } else {
                x86_enable_intr();
        }
}

/*
 * Loads pmap for the current CPU.
 */
void
cpu_load_pmap(struct pmap *pmap, struct pmap *oldpmap)
{
#ifdef SVS
        if (svs_enabled) {
                svs_pdir_switch(pmap);
        }
#endif

#ifdef PAE
        struct cpu_info *ci = curcpu();
        bool interrupts_enabled;
        pd_entry_t *l3_pd = ci->ci_pae_l3_pdir;
        int i;

        /*
         * disable interrupts to block TLB shootdowns, which can reload cr3.
         * while this doesn't block NMIs, it's probably ok as NMIs unlikely
         * reload cr3.
         */
        interrupts_enabled = (x86_read_flags() & PSL_I) != 0;
        if (interrupts_enabled)
                x86_disable_intr();

        for (i = 0 ; i < PDP_SIZE; i++) {
                l3_pd[i] = pmap->pm_pdirpa[i] | PTE_P;
        }

        if (interrupts_enabled)
                x86_enable_intr();
        tlbflush();
#else
        lcr3(pmap_pdirpa(pmap, 0));
#endif
}

/*
 * Notify all other cpus to halt.
 */

void
cpu_broadcast_halt(void)
{
        x86_broadcast_ipi(X86_IPI_HALT);
}

/*
 * Send a dummy ipi to a cpu to force it to run splraise()/spllower(),
 * and trigger an AST on the running LWP.
 */

void
cpu_kick(struct cpu_info *ci)
{
        x86_send_ipi(ci, X86_IPI_AST);
}









































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   21 
























































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
/*        $NetBSD: tcp_input.c,v 1.433 2022/05/24 20:50:20 andvar Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 *      @(#)COPYRIGHT   1.1 (NRL) 17 January 1995
 *
 * NRL grants permission for redistribution and use in source and binary
 * forms, with or without modification, of the software and documentation
 * created at NRL provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgements:
 *      This product includes software developed by the University of
 *      California, Berkeley and its contributors.
 *      This product includes software developed at the Information
 *      Technology Division, US Naval Research Laboratory.
 * 4. Neither the name of the NRL nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * The views and conclusions contained in the software and documentation
 * are those of the authors and should not be interpreted as representing
 * official policies, either expressed or implied, of the US Naval
 * Research Laboratory (NRL).
 */

/*-
 * Copyright (c) 1997, 1998, 1999, 2001, 2005, 2006,
 * 2011 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Coyote Point Systems, Inc.
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe and Kevin M. Lahey of the Numerical Aerospace Simulation
 * Facility, NASA Ames Research Center.
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 * This code is derived from software contributed to The NetBSD Foundation
 * by Rui Paulo.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)tcp_input.c        8.12 (Berkeley) 5/24/95
 */

/*
 *        TODO list for SYN cache stuff:
 *
 *        Find room for a "state" field, which is needed to keep a
 *        compressed state for TIME_WAIT TCBs.  It's been noted already
 *        that this is fairly important for very high-volume web and
 *        mail servers, which use a large number of short-lived
 *        connections.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tcp_input.c,v 1.433 2022/05/24 20:50:20 andvar Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_ipsec.h"
#include "opt_inet_csum.h"
#include "opt_tcp_debug.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/errno.h>
#include <sys/syslog.h>
#include <sys/pool.h>
#include <sys/domain.h>
#include <sys/kernel.h>
#ifdef TCP_SIGNATURE
#include <sys/md5.h>
#endif
#include <sys/lwp.h> /* for lwp0 */
#include <sys/cprng.h>

#include <net/if.h>
#include <net/if_types.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#include <netinet/in_offload.h>

#if NARP > 0
#include <netinet/if_inarp.h>
#endif
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_var.h>
#include <netinet/icmp6.h>
#include <netinet6/nd6.h>
#ifdef TCP_SIGNATURE
#include <netinet6/scope6_var.h>
#endif
#endif

#ifndef INET6
#include <netinet/ip6.h>
#endif

#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_private.h>
#include <netinet/tcp_congctl.h>
#include <netinet/tcp_debug.h>

#ifdef INET6
#include "faith.h"
#if defined(NFAITH) && NFAITH > 0
#include <net/if_faith.h>
#endif
#endif

#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/key.h>
#ifdef INET6
#include <netipsec/ipsec6.h>
#endif
#endif        /* IPSEC*/

#include <netinet/tcp_vtw.h>

int        tcprexmtthresh = 3;
int        tcp_log_refused;

int        tcp_do_autorcvbuf = 1;
int        tcp_autorcvbuf_inc = 16 * 1024;
int        tcp_autorcvbuf_max = 256 * 1024;
int        tcp_msl = (TCPTV_MSL / PR_SLOWHZ);

static int tcp_rst_ppslim_count = 0;
static struct timeval tcp_rst_ppslim_last;
static int tcp_ackdrop_ppslim_count = 0;
static struct timeval tcp_ackdrop_ppslim_last;

static void syn_cache_timer(void *);

#define TCP_PAWS_IDLE        (24U * 24 * 60 * 60 * PR_SLOWHZ)

/* for modulo comparisons of timestamps */
#define TSTMP_LT(a,b)        ((int)((a)-(b)) < 0)
#define TSTMP_GEQ(a,b)        ((int)((a)-(b)) >= 0)

/*
 * Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint.
 */
static void
nd_hint(struct tcpcb *tp)
{
        struct route *ro = NULL;
        struct rtentry *rt;

        if (tp == NULL)
                return;

        switch (tp->t_family) {
#if NARP > 0
        case AF_INET:
                if (tp->t_inpcb != NULL)
                        ro = &tp->t_inpcb->inp_route;
                break;
#endif
#ifdef INET6
        case AF_INET6:
                if (tp->t_in6pcb != NULL)
                        ro = &tp->t_in6pcb->in6p_route;
                break;
#endif
        }

        if (ro == NULL)
                return;

        rt = rtcache_validate(ro);
        if (rt == NULL)
                return;

        switch (tp->t_family) {
#if NARP > 0
        case AF_INET:
                arp_nud_hint(rt);
                break;
#endif
#ifdef INET6
        case AF_INET6:
                nd6_nud_hint(rt);
                break;
#endif
        }

        rtcache_unref(rt, ro);
}

/*
 * Compute ACK transmission behavior.  Delay the ACK unless
 * we have already delayed an ACK (must send an ACK every two segments).
 * We also ACK immediately if we received a PUSH and the ACK-on-PUSH
 * option is enabled.
 */
static void
tcp_setup_ack(struct tcpcb *tp, const struct tcphdr *th)
{

        if (tp->t_flags & TF_DELACK ||
            (tcp_ack_on_push && th->th_flags & TH_PUSH))
                tp->t_flags |= TF_ACKNOW;
        else
                TCP_SET_DELACK(tp);
}

static void
icmp_check(struct tcpcb *tp, const struct tcphdr *th, int acked)
{

        /*
         * If we had a pending ICMP message that refers to data that have
         * just been acknowledged, disregard the recorded ICMP message.
         */
        if ((tp->t_flags & TF_PMTUD_PEND) &&
            SEQ_GT(th->th_ack, tp->t_pmtud_th_seq))
                tp->t_flags &= ~TF_PMTUD_PEND;

        /*
         * Keep track of the largest chunk of data
         * acknowledged since last PMTU update
         */
        if (tp->t_pmtud_mss_acked < acked)
                tp->t_pmtud_mss_acked = acked;
}

/*
 * Convert TCP protocol fields to host order for easier processing.
 */
static void
tcp_fields_to_host(struct tcphdr *th)
{

        NTOHL(th->th_seq);
        NTOHL(th->th_ack);
        NTOHS(th->th_win);
        NTOHS(th->th_urp);
}

/*
 * ... and reverse the above.
 */
static void
tcp_fields_to_net(struct tcphdr *th)
{

        HTONL(th->th_seq);
        HTONL(th->th_ack);
        HTONS(th->th_win);
        HTONS(th->th_urp);
}

static void
tcp_urp_drop(struct tcphdr *th, int todrop, int *tiflags)
{
        if (th->th_urp > todrop) {
                th->th_urp -= todrop;
        } else {
                *tiflags &= ~TH_URG;
                th->th_urp = 0;
        }
}

#ifdef TCP_CSUM_COUNTERS
#include <sys/device.h>

extern struct evcnt tcp_hwcsum_ok;
extern struct evcnt tcp_hwcsum_bad;
extern struct evcnt tcp_hwcsum_data;
extern struct evcnt tcp_swcsum;
#if defined(INET6)
extern struct evcnt tcp6_hwcsum_ok;
extern struct evcnt tcp6_hwcsum_bad;
extern struct evcnt tcp6_hwcsum_data;
extern struct evcnt tcp6_swcsum;
#endif /* defined(INET6) */

#define        TCP_CSUM_COUNTER_INCR(ev)        (ev)->ev_count++

#else

#define        TCP_CSUM_COUNTER_INCR(ev)        /* nothing */

#endif /* TCP_CSUM_COUNTERS */

#ifdef TCP_REASS_COUNTERS
#include <sys/device.h>

extern struct evcnt tcp_reass_;
extern struct evcnt tcp_reass_empty;
extern struct evcnt tcp_reass_iteration[8];
extern struct evcnt tcp_reass_prependfirst;
extern struct evcnt tcp_reass_prepend;
extern struct evcnt tcp_reass_insert;
extern struct evcnt tcp_reass_inserttail;
extern struct evcnt tcp_reass_append;
extern struct evcnt tcp_reass_appendtail;
extern struct evcnt tcp_reass_overlaptail;
extern struct evcnt tcp_reass_overlapfront;
extern struct evcnt tcp_reass_segdup;
extern struct evcnt tcp_reass_fragdup;

#define        TCP_REASS_COUNTER_INCR(ev)        (ev)->ev_count++

#else

#define        TCP_REASS_COUNTER_INCR(ev)        /* nothing */

#endif /* TCP_REASS_COUNTERS */

static int tcp_reass(struct tcpcb *, const struct tcphdr *, struct mbuf *,
    int);
static int tcp_dooptions(struct tcpcb *, const u_char *, int,
    struct tcphdr *, struct mbuf *, int, struct tcp_opt_info *);

static void tcp4_log_refused(const struct ip *, const struct tcphdr *);
#ifdef INET6
static void tcp6_log_refused(const struct ip6_hdr *, const struct tcphdr *);
#endif

#if defined(MBUFTRACE)
struct mowner tcp_reass_mowner = MOWNER_INIT("tcp", "reass");
#endif /* defined(MBUFTRACE) */

static struct pool tcpipqent_pool;

void
tcpipqent_init(void)
{

        pool_init(&tcpipqent_pool, sizeof(struct ipqent), 0, 0, 0, "tcpipqepl",
            NULL, IPL_VM);
}

struct ipqent *
tcpipqent_alloc(void)
{
        struct ipqent *ipqe;
        int s;

        s = splvm();
        ipqe = pool_get(&tcpipqent_pool, PR_NOWAIT);
        splx(s);

        return ipqe;
}

void
tcpipqent_free(struct ipqent *ipqe)
{
        int s;

        s = splvm();
        pool_put(&tcpipqent_pool, ipqe);
        splx(s);
}

/*
 * Insert segment ti into reassembly queue of tcp with
 * control block tp.  Return TH_FIN if reassembly now includes
 * a segment with FIN.
 */
static int
tcp_reass(struct tcpcb *tp, const struct tcphdr *th, struct mbuf *m, int tlen)
{
        struct ipqent *p, *q, *nq, *tiqe = NULL;
        struct socket *so = NULL;
        int pkt_flags;
        tcp_seq pkt_seq;
        unsigned pkt_len;
        u_long rcvpartdupbyte = 0;
        u_long rcvoobyte;
#ifdef TCP_REASS_COUNTERS
        u_int count = 0;
#endif
        uint64_t *tcps;

        if (tp->t_inpcb)
                so = tp->t_inpcb->inp_socket;
#ifdef INET6
        else if (tp->t_in6pcb)
                so = tp->t_in6pcb->in6p_socket;
#endif

        TCP_REASS_LOCK_CHECK(tp);

        /*
         * Call with th==NULL after become established to
         * force pre-ESTABLISHED data up to user socket.
         */
        if (th == NULL)
                goto present;

        m_claimm(m, &tcp_reass_mowner);

        rcvoobyte = tlen;
        /*
         * Copy these to local variables because the TCP header gets munged
         * while we are collapsing mbufs.
         */
        pkt_seq = th->th_seq;
        pkt_len = tlen;
        pkt_flags = th->th_flags;

        TCP_REASS_COUNTER_INCR(&tcp_reass_);

        if ((p = TAILQ_LAST(&tp->segq, ipqehead)) != NULL) {
                /*
                 * When we miss a packet, the vast majority of time we get
                 * packets that follow it in order.  So optimize for that.
                 */
                if (pkt_seq == p->ipqe_seq + p->ipqe_len) {
                        p->ipqe_len += pkt_len;
                        p->ipqe_flags |= pkt_flags;
                        m_cat(p->ipqe_m, m);
                        m = NULL;
                        tiqe = p;
                        TAILQ_REMOVE(&tp->timeq, p, ipqe_timeq);
                        TCP_REASS_COUNTER_INCR(&tcp_reass_appendtail);
                        goto skip_replacement;
                }
                /*
                 * While we're here, if the pkt is completely beyond
                 * anything we have, just insert it at the tail.
                 */
                if (SEQ_GT(pkt_seq, p->ipqe_seq + p->ipqe_len)) {
                        TCP_REASS_COUNTER_INCR(&tcp_reass_inserttail);
                        goto insert_it;
                }
        }

        q = TAILQ_FIRST(&tp->segq);

        if (q != NULL) {
                /*
                 * If this segment immediately precedes the first out-of-order
                 * block, simply slap the segment in front of it and (mostly)
                 * skip the complicated logic.
                 */
                if (pkt_seq + pkt_len == q->ipqe_seq) {
                        q->ipqe_seq = pkt_seq;
                        q->ipqe_len += pkt_len;
                        q->ipqe_flags |= pkt_flags;
                        m_cat(m, q->ipqe_m);
                        q->ipqe_m = m;
                        tiqe = q;
                        TAILQ_REMOVE(&tp->timeq, q, ipqe_timeq);
                        TCP_REASS_COUNTER_INCR(&tcp_reass_prependfirst);
                        goto skip_replacement;
                }
        } else {
                TCP_REASS_COUNTER_INCR(&tcp_reass_empty);
        }

        /*
         * Find a segment which begins after this one does.
         */
        for (p = NULL; q != NULL; q = nq) {
                nq = TAILQ_NEXT(q, ipqe_q);
#ifdef TCP_REASS_COUNTERS
                count++;
#endif

                /*
                 * If the received segment is just right after this
                 * fragment, merge the two together and then check
                 * for further overlaps.
                 */
                if (q->ipqe_seq + q->ipqe_len == pkt_seq) {
                        pkt_len += q->ipqe_len;
                        pkt_flags |= q->ipqe_flags;
                        pkt_seq = q->ipqe_seq;
                        m_cat(q->ipqe_m, m);
                        m = q->ipqe_m;
                        TCP_REASS_COUNTER_INCR(&tcp_reass_append);
                        goto free_ipqe;
                }

                /*
                 * If the received segment is completely past this
                 * fragment, we need to go to the next fragment.
                 */
                if (SEQ_LT(q->ipqe_seq + q->ipqe_len, pkt_seq)) {
                        p = q;
                        continue;
                }

                /*
                 * If the fragment is past the received segment,
                 * it (or any following) can't be concatenated.
                 */
                if (SEQ_GT(q->ipqe_seq, pkt_seq + pkt_len)) {
                        TCP_REASS_COUNTER_INCR(&tcp_reass_insert);
                        break;
                }

                /*
                 * We've received all the data in this segment before.
                 * Mark it as a duplicate and return.
                 */
                if (SEQ_LEQ(q->ipqe_seq, pkt_seq) &&
                    SEQ_GEQ(q->ipqe_seq + q->ipqe_len, pkt_seq + pkt_len)) {
                        tcps = TCP_STAT_GETREF();
                        tcps[TCP_STAT_RCVDUPPACK]++;
                        tcps[TCP_STAT_RCVDUPBYTE] += pkt_len;
                        TCP_STAT_PUTREF();
                        tcp_new_dsack(tp, pkt_seq, pkt_len);
                        m_freem(m);
                        if (tiqe != NULL) {
                                tcpipqent_free(tiqe);
                        }
                        TCP_REASS_COUNTER_INCR(&tcp_reass_segdup);
                        goto out;
                }

                /*
                 * Received segment completely overlaps this fragment
                 * so we drop the fragment (this keeps the temporal
                 * ordering of segments correct).
                 */
                if (SEQ_GEQ(q->ipqe_seq, pkt_seq) &&
                    SEQ_LEQ(q->ipqe_seq + q->ipqe_len, pkt_seq + pkt_len)) {
                        rcvpartdupbyte += q->ipqe_len;
                        m_freem(q->ipqe_m);
                        TCP_REASS_COUNTER_INCR(&tcp_reass_fragdup);
                        goto free_ipqe;
                }

                /*
                 * Received segment extends past the end of the fragment.
                 * Drop the overlapping bytes, merge the fragment and
                 * segment, and treat as a longer received packet.
                 */
                if (SEQ_LT(q->ipqe_seq, pkt_seq) &&
                    SEQ_GT(q->ipqe_seq + q->ipqe_len, pkt_seq))  {
                        int overlap = q->ipqe_seq + q->ipqe_len - pkt_seq;
                        m_adj(m, overlap);
                        rcvpartdupbyte += overlap;
                        m_cat(q->ipqe_m, m);
                        m = q->ipqe_m;
                        pkt_seq = q->ipqe_seq;
                        pkt_len += q->ipqe_len - overlap;
                        rcvoobyte -= overlap;
                        TCP_REASS_COUNTER_INCR(&tcp_reass_overlaptail);
                        goto free_ipqe;
                }

                /*
                 * Received segment extends past the front of the fragment.
                 * Drop the overlapping bytes on the received packet. The
                 * packet will then be concatenated with this fragment a
                 * bit later.
                 */
                if (SEQ_GT(q->ipqe_seq, pkt_seq) &&
                    SEQ_LT(q->ipqe_seq, pkt_seq + pkt_len))  {
                        int overlap = pkt_seq + pkt_len - q->ipqe_seq;
                        m_adj(m, -overlap);
                        pkt_len -= overlap;
                        rcvpartdupbyte += overlap;
                        TCP_REASS_COUNTER_INCR(&tcp_reass_overlapfront);
                        rcvoobyte -= overlap;
                }

                /*
                 * If the received segment immediately precedes this
                 * fragment then tack the fragment onto this segment
                 * and reinsert the data.
                 */
                if (q->ipqe_seq == pkt_seq + pkt_len) {
                        pkt_len += q->ipqe_len;
                        pkt_flags |= q->ipqe_flags;
                        m_cat(m, q->ipqe_m);
                        TAILQ_REMOVE(&tp->segq, q, ipqe_q);
                        TAILQ_REMOVE(&tp->timeq, q, ipqe_timeq);
                        tp->t_segqlen--;
                        KASSERT(tp->t_segqlen >= 0);
                        KASSERT(tp->t_segqlen != 0 ||
                            (TAILQ_EMPTY(&tp->segq) &&
                            TAILQ_EMPTY(&tp->timeq)));
                        if (tiqe == NULL) {
                                tiqe = q;
                        } else {
                                tcpipqent_free(q);
                        }
                        TCP_REASS_COUNTER_INCR(&tcp_reass_prepend);
                        break;
                }

                /*
                 * If the fragment is before the segment, remember it.
                 * When this loop is terminated, p will contain the
                 * pointer to the fragment that is right before the
                 * received segment.
                 */
                if (SEQ_LEQ(q->ipqe_seq, pkt_seq))
                        p = q;

                continue;

                /*
                 * This is a common operation.  It also will allow
                 * to save doing a malloc/free in most instances.
                 */
          free_ipqe:
                TAILQ_REMOVE(&tp->segq, q, ipqe_q);
                TAILQ_REMOVE(&tp->timeq, q, ipqe_timeq);
                tp->t_segqlen--;
                KASSERT(tp->t_segqlen >= 0);
                KASSERT(tp->t_segqlen != 0 ||
                    (TAILQ_EMPTY(&tp->segq) && TAILQ_EMPTY(&tp->timeq)));
                if (tiqe == NULL) {
                        tiqe = q;
                } else {
                        tcpipqent_free(q);
                }
        }

#ifdef TCP_REASS_COUNTERS
        if (count > 7)
                TCP_REASS_COUNTER_INCR(&tcp_reass_iteration[0]);
        else if (count > 0)
                TCP_REASS_COUNTER_INCR(&tcp_reass_iteration[count]);
#endif

insert_it:
        /*
         * Allocate a new queue entry (block) since the received segment
         * did not collapse onto any other out-of-order block. If it had
         * collapsed, tiqe would not be NULL and we would be reusing it.
         *
         * If the allocation fails, drop the packet.
         */
        if (tiqe == NULL) {
                tiqe = tcpipqent_alloc();
                if (tiqe == NULL) {
                        TCP_STATINC(TCP_STAT_RCVMEMDROP);
                        m_freem(m);
                        goto out;
                }
        }

        /*
         * Update the counters.
         */
        tp->t_rcvoopack++;
        tcps = TCP_STAT_GETREF();
        tcps[TCP_STAT_RCVOOPACK]++;
        tcps[TCP_STAT_RCVOOBYTE] += rcvoobyte;
        if (rcvpartdupbyte) {
            tcps[TCP_STAT_RCVPARTDUPPACK]++;
            tcps[TCP_STAT_RCVPARTDUPBYTE] += rcvpartdupbyte;
        }
        TCP_STAT_PUTREF();

        /*
         * Insert the new fragment queue entry into both queues.
         */
        tiqe->ipqe_m = m;
        tiqe->ipqe_seq = pkt_seq;
        tiqe->ipqe_len = pkt_len;
        tiqe->ipqe_flags = pkt_flags;
        if (p == NULL) {
                TAILQ_INSERT_HEAD(&tp->segq, tiqe, ipqe_q);
        } else {
                TAILQ_INSERT_AFTER(&tp->segq, p, tiqe, ipqe_q);
        }
        tp->t_segqlen++;

skip_replacement:
        TAILQ_INSERT_HEAD(&tp->timeq, tiqe, ipqe_timeq);

present:
        /*
         * Present data to user, advancing rcv_nxt through
         * completed sequence space.
         */
        if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
                goto out;
        q = TAILQ_FIRST(&tp->segq);
        if (q == NULL || q->ipqe_seq != tp->rcv_nxt)
                goto out;
        if (tp->t_state == TCPS_SYN_RECEIVED && q->ipqe_len)
                goto out;

        tp->rcv_nxt += q->ipqe_len;
        pkt_flags = q->ipqe_flags & TH_FIN;
        nd_hint(tp);

        TAILQ_REMOVE(&tp->segq, q, ipqe_q);
        TAILQ_REMOVE(&tp->timeq, q, ipqe_timeq);
        tp->t_segqlen--;
        KASSERT(tp->t_segqlen >= 0);
        KASSERT(tp->t_segqlen != 0 ||
            (TAILQ_EMPTY(&tp->segq) && TAILQ_EMPTY(&tp->timeq)));
        if (so->so_state & SS_CANTRCVMORE)
                m_freem(q->ipqe_m);
        else
                sbappendstream(&so->so_rcv, q->ipqe_m);
        tcpipqent_free(q);
        TCP_REASS_UNLOCK(tp);
        sorwakeup(so);
        return pkt_flags;

out:
        TCP_REASS_UNLOCK(tp);
        return 0;
}

#ifdef INET6
int
tcp6_input(struct mbuf **mp, int *offp, int proto)
{
        struct mbuf *m = *mp;

        /*
         * draft-itojun-ipv6-tcp-to-anycast
         * better place to put this in?
         */
        if (m->m_flags & M_ANYCAST6) {
                struct ip6_hdr *ip6;
                if (m->m_len < sizeof(struct ip6_hdr)) {
                        if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
                                TCP_STATINC(TCP_STAT_RCVSHORT);
                                return IPPROTO_DONE;
                        }
                }
                ip6 = mtod(m, struct ip6_hdr *);
                icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR,
                    (char *)&ip6->ip6_dst - (char *)ip6);
                return IPPROTO_DONE;
        }

        tcp_input(m, *offp, proto);
        return IPPROTO_DONE;
}
#endif

static void
tcp4_log_refused(const struct ip *ip, const struct tcphdr *th)
{
        char src[INET_ADDRSTRLEN];
        char dst[INET_ADDRSTRLEN];

        if (ip) {
                in_print(src, sizeof(src), &ip->ip_src);
                in_print(dst, sizeof(dst), &ip->ip_dst);
        } else {
                strlcpy(src, "(unknown)", sizeof(src));
                strlcpy(dst, "(unknown)", sizeof(dst));
        }
        log(LOG_INFO,
            "Connection attempt to TCP %s:%d from %s:%d\n",
            dst, ntohs(th->th_dport),
            src, ntohs(th->th_sport));
}

#ifdef INET6
static void
tcp6_log_refused(const struct ip6_hdr *ip6, const struct tcphdr *th)
{
        char src[INET6_ADDRSTRLEN];
        char dst[INET6_ADDRSTRLEN];

        if (ip6) {
                in6_print(src, sizeof(src), &ip6->ip6_src);
                in6_print(dst, sizeof(dst), &ip6->ip6_dst);
        } else {
                strlcpy(src, "(unknown v6)", sizeof(src));
                strlcpy(dst, "(unknown v6)", sizeof(dst));
        }
        log(LOG_INFO,
            "Connection attempt to TCP [%s]:%d from [%s]:%d\n",
            dst, ntohs(th->th_dport),
            src, ntohs(th->th_sport));
}
#endif

/*
 * Checksum extended TCP header and data.
 */
int
tcp_input_checksum(int af, struct mbuf *m, const struct tcphdr *th,
    int toff, int off, int tlen)
{
        struct ifnet *rcvif;
        int s;

        /*
         * XXX it's better to record and check if this mbuf is
         * already checked.
         */

        rcvif = m_get_rcvif(m, &s);
        if (__predict_false(rcvif == NULL))
                goto badcsum; /* XXX */

        switch (af) {
        case AF_INET:
                switch (m->m_pkthdr.csum_flags &
                        ((rcvif->if_csum_flags_rx & M_CSUM_TCPv4) |
                         M_CSUM_TCP_UDP_BAD | M_CSUM_DATA)) {
                case M_CSUM_TCPv4|M_CSUM_TCP_UDP_BAD:
                        TCP_CSUM_COUNTER_INCR(&tcp_hwcsum_bad);
                        goto badcsum;

                case M_CSUM_TCPv4|M_CSUM_DATA: {
                        u_int32_t hw_csum = m->m_pkthdr.csum_data;

                        TCP_CSUM_COUNTER_INCR(&tcp_hwcsum_data);
                        if (m->m_pkthdr.csum_flags & M_CSUM_NO_PSEUDOHDR) {
                                const struct ip *ip =
                                    mtod(m, const struct ip *);

                                hw_csum = in_cksum_phdr(ip->ip_src.s_addr,
                                    ip->ip_dst.s_addr,
                                    htons(hw_csum + tlen + off + IPPROTO_TCP));
                        }
                        if ((hw_csum ^ 0xffff) != 0)
                                goto badcsum;
                        break;
                }

                case M_CSUM_TCPv4:
                        /* Checksum was okay. */
                        TCP_CSUM_COUNTER_INCR(&tcp_hwcsum_ok);
                        break;

                default:
                        /*
                         * Must compute it ourselves.  Maybe skip checksum
                         * on loopback interfaces.
                         */
                        if (__predict_true(!(rcvif->if_flags & IFF_LOOPBACK) ||
                                           tcp_do_loopback_cksum)) {
                                TCP_CSUM_COUNTER_INCR(&tcp_swcsum);
                                if (in4_cksum(m, IPPROTO_TCP, toff,
                                              tlen + off) != 0)
                                        goto badcsum;
                        }
                        break;
                }
                break;

#ifdef INET6
        case AF_INET6:
                switch (m->m_pkthdr.csum_flags &
                        ((rcvif->if_csum_flags_rx & M_CSUM_TCPv6) |
                         M_CSUM_TCP_UDP_BAD | M_CSUM_DATA)) {
                case M_CSUM_TCPv6|M_CSUM_TCP_UDP_BAD:
                        TCP_CSUM_COUNTER_INCR(&tcp6_hwcsum_bad);
                        goto badcsum;

#if 0 /* notyet */
                case M_CSUM_TCPv6|M_CSUM_DATA:
#endif

                case M_CSUM_TCPv6:
                        /* Checksum was okay. */
                        TCP_CSUM_COUNTER_INCR(&tcp6_hwcsum_ok);
                        break;

                default:
                        /*
                         * Must compute it ourselves.  Maybe skip checksum
                         * on loopback interfaces.
                         */
                        if (__predict_true((m->m_flags & M_LOOP) == 0 ||
                            tcp_do_loopback_cksum)) {
                                TCP_CSUM_COUNTER_INCR(&tcp6_swcsum);
                                if (in6_cksum(m, IPPROTO_TCP, toff,
                                    tlen + off) != 0)
                                        goto badcsum;
                        }
                }
                break;
#endif /* INET6 */
        }
        m_put_rcvif(rcvif, &s);

        return 0;

badcsum:
        m_put_rcvif(rcvif, &s);
        TCP_STATINC(TCP_STAT_RCVBADSUM);
        return -1;
}

/*
 * When a packet arrives addressed to a vestigial tcpbp, we
 * nevertheless have to respond to it per the spec.
 *
 * This code is duplicated from the one in tcp_input().
 */
static void tcp_vtw_input(struct tcphdr *th, vestigial_inpcb_t *vp,
    struct mbuf *m, int tlen)
{
        int tiflags;
        int todrop;
        uint32_t t_flags = 0;
        uint64_t *tcps;

        tiflags = th->th_flags;
        todrop  = vp->rcv_nxt - th->th_seq;

        if (todrop > 0) {
                if (tiflags & TH_SYN) {
                        tiflags &= ~TH_SYN;
                        th->th_seq++;
                        tcp_urp_drop(th, 1, &tiflags);
                        todrop--;
                }
                if (todrop > tlen ||
                    (todrop == tlen && (tiflags & TH_FIN) == 0)) {
                        /*
                         * Any valid FIN or RST must be to the left of the
                         * window.  At this point the FIN or RST must be a
                         * duplicate or out of sequence; drop it.
                         */
                        if (tiflags & TH_RST)
                                goto drop;
                        tiflags &= ~(TH_FIN|TH_RST);

                        /*
                         * Send an ACK to resynchronize and drop any data.
                         * But keep on processing for RST or ACK.
                         */
                        t_flags |= TF_ACKNOW;
                        todrop = tlen;
                        tcps = TCP_STAT_GETREF();
                        tcps[TCP_STAT_RCVDUPPACK] += 1;
                        tcps[TCP_STAT_RCVDUPBYTE] += todrop;
                        TCP_STAT_PUTREF();
                } else if ((tiflags & TH_RST) &&
                    th->th_seq != vp->rcv_nxt) {
                        /*
                         * Test for reset before adjusting the sequence
                         * number for overlapping data.
                         */
                        goto dropafterack_ratelim;
                } else {
                        tcps = TCP_STAT_GETREF();
                        tcps[TCP_STAT_RCVPARTDUPPACK] += 1;
                        tcps[TCP_STAT_RCVPARTDUPBYTE] += todrop;
                        TCP_STAT_PUTREF();
                }

//                tcp_new_dsack(tp, th->th_seq, todrop);
//                hdroptlen += todrop;        /*drop from head afterwards*/

                th->th_seq += todrop;
                tlen -= todrop;
                tcp_urp_drop(th, todrop, &tiflags);
        }

        /*
         * If new data are received on a connection after the
         * user processes are gone, then RST the other end.
         */
        if (tlen) {
                TCP_STATINC(TCP_STAT_RCVAFTERCLOSE);
                goto dropwithreset;
        }

        /*
         * If segment ends after window, drop trailing data
         * (and PUSH and FIN); if nothing left, just ACK.
         */
        todrop = (th->th_seq + tlen) - (vp->rcv_nxt + vp->rcv_wnd);

        if (todrop > 0) {
                TCP_STATINC(TCP_STAT_RCVPACKAFTERWIN);
                if (todrop >= tlen) {
                        /*
                         * The segment actually starts after the window.
                         * th->th_seq + tlen - vp->rcv_nxt - vp->rcv_wnd >= tlen
                         * th->th_seq - vp->rcv_nxt - vp->rcv_wnd >= 0
                         * th->th_seq >= vp->rcv_nxt + vp->rcv_wnd
                         */
                        TCP_STATADD(TCP_STAT_RCVBYTEAFTERWIN, tlen);

                        /*
                         * If a new connection request is received
                         * while in TIME_WAIT, drop the old connection
                         * and start over if the sequence numbers
                         * are above the previous ones.
                         */
                        if ((tiflags & TH_SYN) &&
                            SEQ_GT(th->th_seq, vp->rcv_nxt)) {
                                /*
                                 * We only support this in the !NOFDREF case, which
                                 * is to say: not here.
                                 */
                                goto dropwithreset;
                        }

                        /*
                         * If window is closed can only take segments at
                         * window edge, and have to drop data and PUSH from
                         * incoming segments.  Continue processing, but
                         * remember to ack.  Otherwise, drop segment
                         * and (if not RST) ack.
                         */
                        if (vp->rcv_wnd == 0 && th->th_seq == vp->rcv_nxt) {
                                t_flags |= TF_ACKNOW;
                                TCP_STATINC(TCP_STAT_RCVWINPROBE);
                        } else {
                                goto dropafterack;
                        }
                } else {
                        TCP_STATADD(TCP_STAT_RCVBYTEAFTERWIN, todrop);
                }
                m_adj(m, -todrop);
                tlen -= todrop;
                tiflags &= ~(TH_PUSH|TH_FIN);
        }

        if (tiflags & TH_RST) {
                if (th->th_seq != vp->rcv_nxt)
                        goto dropafterack_ratelim;

                vtw_del(vp->ctl, vp->vtw);
                goto drop;
        }

        /*
         * If the ACK bit is off we drop the segment and return.
         */
        if ((tiflags & TH_ACK) == 0) {
                if (t_flags & TF_ACKNOW)
                        goto dropafterack;
                goto drop;
        }

        /*
         * In TIME_WAIT state the only thing that should arrive
         * is a retransmission of the remote FIN.  Acknowledge
         * it and restart the finack timer.
         */
        vtw_restart(vp);
        goto dropafterack;

dropafterack:
        /*
         * Generate an ACK dropping incoming segment if it occupies
         * sequence space, where the ACK reflects our state.
         */
        if (tiflags & TH_RST)
                goto drop;
        goto dropafterack2;

dropafterack_ratelim:
        /*
         * We may want to rate-limit ACKs against SYN/RST attack.
         */
        if (ppsratecheck(&tcp_ackdrop_ppslim_last, &tcp_ackdrop_ppslim_count,
            tcp_ackdrop_ppslim) == 0) {
                /* XXX stat */
                goto drop;
        }
        /* ...fall into dropafterack2... */

dropafterack2:
        (void)tcp_respond(0, m, m, th, th->th_seq + tlen, th->th_ack, TH_ACK);
        return;

dropwithreset:
        /*
         * Generate a RST, dropping incoming segment.
         * Make ACK acceptable to originator of segment.
         */
        if (tiflags & TH_RST)
                goto drop;

        if (tiflags & TH_ACK) {
                tcp_respond(0, m, m, th, (tcp_seq)0, th->th_ack, TH_RST);
        } else {
                if (tiflags & TH_SYN)
                        ++tlen;
                (void)tcp_respond(0, m, m, th, th->th_seq + tlen, (tcp_seq)0,
                    TH_RST|TH_ACK);
        }
        return;
drop:
        m_freem(m);
}

/*
 * TCP input routine, follows pages 65-76 of RFC 793 very closely.
 */
void
tcp_input(struct mbuf *m, int off, int proto)
{
        struct tcphdr *th;
        struct ip *ip;
        struct inpcb *inp;
#ifdef INET6
        struct ip6_hdr *ip6;
        struct in6pcb *in6p;
#endif
        u_int8_t *optp = NULL;
        int optlen = 0;
        int len, tlen, hdroptlen = 0;
        struct tcpcb *tp = NULL;
        int tiflags;
        struct socket *so = NULL;
        int todrop, acked, ourfinisacked, needoutput = 0;
        bool dupseg;
#ifdef TCP_DEBUG
        short ostate = 0;
#endif
        u_long tiwin;
        struct tcp_opt_info opti;
        int thlen, iphlen;
        int af;                /* af on the wire */
        struct mbuf *tcp_saveti = NULL;
        uint32_t ts_rtt;
        uint8_t iptos;
        uint64_t *tcps;
        vestigial_inpcb_t vestige;

        vestige.valid = 0;

        MCLAIM(m, &tcp_rx_mowner);

        TCP_STATINC(TCP_STAT_RCVTOTAL);

        memset(&opti, 0, sizeof(opti));
        opti.ts_present = 0;
        opti.maxseg = 0;

        /*
         * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN.
         *
         * TCP is, by definition, unicast, so we reject all
         * multicast outright.
         *
         * Note, there are additional src/dst address checks in
         * the AF-specific code below.
         */
        if (m->m_flags & (M_BCAST|M_MCAST)) {
                /* XXX stat */
                goto drop;
        }
#ifdef INET6
        if (m->m_flags & M_ANYCAST6) {
                /* XXX stat */
                goto drop;
        }
#endif

        M_REGION_GET(th, struct tcphdr *, m, off, sizeof(struct tcphdr));
        if (th == NULL) {
                TCP_STATINC(TCP_STAT_RCVSHORT);
                return;
        }

        /*
         * Enforce alignment requirements that are violated in
         * some cases, see kern/50766 for details.
         */
        if (ACCESSIBLE_POINTER(th, struct tcphdr) == 0) {
                m = m_copyup(m, off + sizeof(struct tcphdr), 0);
                if (m == NULL) {
                        TCP_STATINC(TCP_STAT_RCVSHORT);
                        return;
                }
                th = (struct tcphdr *)(mtod(m, char *) + off);
        }
        KASSERT(ACCESSIBLE_POINTER(th, struct tcphdr));

        /*
         * Get IP and TCP header.
         * Note: IP leaves IP header in first mbuf.
         */
        ip = mtod(m, struct ip *);
#ifdef INET6
        ip6 = mtod(m, struct ip6_hdr *);
#endif
        switch (ip->ip_v) {
        case 4:
                af = AF_INET;
                iphlen = sizeof(struct ip);

                if (IN_MULTICAST(ip->ip_dst.s_addr) ||
                    in_broadcast(ip->ip_dst, m_get_rcvif_NOMPSAFE(m)))
                        goto drop;

                /* We do the checksum after PCB lookup... */
                len = ntohs(ip->ip_len);
                tlen = len - off;
                iptos = ip->ip_tos;
                break;
#ifdef INET6
        case 6:
                iphlen = sizeof(struct ip6_hdr);
                af = AF_INET6;

                /*
                 * Be proactive about unspecified IPv6 address in source.
                 * As we use all-zero to indicate unbounded/unconnected pcb,
                 * unspecified IPv6 address can be used to confuse us.
                 *
                 * Note that packets with unspecified IPv6 destination is
                 * already dropped in ip6_input.
                 */
                if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
                        /* XXX stat */
                        goto drop;
                }

                /*
                 * Make sure destination address is not multicast.
                 * Source address checked in ip6_input().
                 */
                if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
                        /* XXX stat */
                        goto drop;
                }

                /* We do the checksum after PCB lookup... */
                len = m->m_pkthdr.len;
                tlen = len - off;
                iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
                break;
#endif
        default:
                m_freem(m);
                return;
        }


        /*
         * Check that TCP offset makes sense, pull out TCP options and
         * adjust length.
         */
        thlen = th->th_off << 2;
        if (thlen < sizeof(struct tcphdr) || thlen > tlen) {
                TCP_STATINC(TCP_STAT_RCVBADOFF);
                goto drop;
        }
        tlen -= thlen;

        if (thlen > sizeof(struct tcphdr)) {
                M_REGION_GET(th, struct tcphdr *, m, off, thlen);
                if (th == NULL) {
                        TCP_STATINC(TCP_STAT_RCVSHORT);
                        return;
                }
                KASSERT(ACCESSIBLE_POINTER(th, struct tcphdr));
                optlen = thlen - sizeof(struct tcphdr);
                optp = ((u_int8_t *)th) + sizeof(struct tcphdr);

                /*
                 * Do quick retrieval of timestamp options.
                 *
                 * If timestamp is the only option and it's formatted as
                 * recommended in RFC 1323 appendix A, we quickly get the
                 * values now and don't bother calling tcp_dooptions(),
                 * etc.
                 */
                if ((optlen == TCPOLEN_TSTAMP_APPA ||
                     (optlen > TCPOLEN_TSTAMP_APPA &&
                      optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
                    be32dec(optp) == TCPOPT_TSTAMP_HDR &&
                    (th->th_flags & TH_SYN) == 0) {
                        opti.ts_present = 1;
                        opti.ts_val = be32dec(optp + 4);
                        opti.ts_ecr = be32dec(optp + 8);
                        optp = NULL;        /* we've parsed the options */
                }
        }
        tiflags = th->th_flags;

        /*
         * Checksum extended TCP header and data
         */
        if (tcp_input_checksum(af, m, th, off, thlen, tlen))
                goto badcsum;

        /*
         * Locate pcb for segment.
         */
findpcb:
        inp = NULL;
#ifdef INET6
        in6p = NULL;
#endif
        switch (af) {
        case AF_INET:
                inp = in_pcblookup_connect(&tcbtable, ip->ip_src, th->th_sport,
                    ip->ip_dst, th->th_dport, &vestige);
                if (inp == NULL && !vestige.valid) {
                        TCP_STATINC(TCP_STAT_PCBHASHMISS);
                        inp = in_pcblookup_bind(&tcbtable, ip->ip_dst,
                            th->th_dport);
                }
#ifdef INET6
                if (inp == NULL && !vestige.valid) {
                        struct in6_addr s, d;

                        /* mapped addr case */
                        in6_in_2_v4mapin6(&ip->ip_src, &s);
                        in6_in_2_v4mapin6(&ip->ip_dst, &d);
                        in6p = in6_pcblookup_connect(&tcbtable, &s,
                            th->th_sport, &d, th->th_dport, 0, &vestige);
                        if (in6p == 0 && !vestige.valid) {
                                TCP_STATINC(TCP_STAT_PCBHASHMISS);
                                in6p = in6_pcblookup_bind(&tcbtable, &d,
                                    th->th_dport, 0);
                        }
                }
#endif
#ifndef INET6
                if (inp == NULL && !vestige.valid)
#else
                if (inp == NULL && in6p == NULL && !vestige.valid)
#endif
                {
                        TCP_STATINC(TCP_STAT_NOPORT);
                        if (tcp_log_refused &&
                            (tiflags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN) {
                                tcp4_log_refused(ip, th);
                        }
                        tcp_fields_to_host(th);
                        goto dropwithreset_ratelim;
                }
#if defined(IPSEC)
                if (ipsec_used) {
                        if (inp && ipsec_in_reject(m, inp)) {
                                goto drop;
                        }
#ifdef INET6
                        else if (in6p && ipsec_in_reject(m, in6p)) {
                                goto drop;
                        }
#endif
                }
#endif /*IPSEC*/
                break;
#ifdef INET6
        case AF_INET6:
            {
                int faith;

#if defined(NFAITH) && NFAITH > 0
                faith = faithprefix(&ip6->ip6_dst);
#else
                faith = 0;
#endif
                in6p = in6_pcblookup_connect(&tcbtable, &ip6->ip6_src,
                    th->th_sport, &ip6->ip6_dst, th->th_dport, faith, &vestige);
                if (!in6p && !vestige.valid) {
                        TCP_STATINC(TCP_STAT_PCBHASHMISS);
                        in6p = in6_pcblookup_bind(&tcbtable, &ip6->ip6_dst,
                            th->th_dport, faith);
                }
                if (!in6p && !vestige.valid) {
                        TCP_STATINC(TCP_STAT_NOPORT);
                        if (tcp_log_refused &&
                            (tiflags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN) {
                                tcp6_log_refused(ip6, th);
                        }
                        tcp_fields_to_host(th);
                        goto dropwithreset_ratelim;
                }
#if defined(IPSEC)
                if (ipsec_used && in6p && ipsec_in_reject(m, in6p)) {
                        goto drop;
                }
#endif
                break;
            }
#endif
        }

        tcp_fields_to_host(th);

        /*
         * If the state is CLOSED (i.e., TCB does not exist) then
         * all data in the incoming segment is discarded.
         * If the TCB exists but is in CLOSED state, it is embryonic,
         * but should either do a listen or a connect soon.
         */
        tp = NULL;
        so = NULL;
        if (inp) {
                /* Check the minimum TTL for socket. */
                if (ip->ip_ttl < inp->inp_ip_minttl)
                        goto drop;

                tp = intotcpcb(inp);
                so = inp->inp_socket;
        }
#ifdef INET6
        else if (in6p) {
                tp = in6totcpcb(in6p);
                so = in6p->in6p_socket;
        }
#endif
        else if (vestige.valid) {
                /* We do not support the resurrection of vtw tcpcps. */
                tcp_vtw_input(th, &vestige, m, tlen);
                m = NULL;
                goto drop;
        }

        if (tp == NULL)
                goto dropwithreset_ratelim;
        if (tp->t_state == TCPS_CLOSED)
                goto drop;

        KASSERT(so->so_lock == softnet_lock);
        KASSERT(solocked(so));

        /* Unscale the window into a 32-bit value. */
        if ((tiflags & TH_SYN) == 0)
                tiwin = th->th_win << tp->snd_scale;
        else
                tiwin = th->th_win;

#ifdef INET6
        /* save packet options if user wanted */
        if (in6p && (in6p->in6p_flags & IN6P_CONTROLOPTS)) {
                if (in6p->in6p_options) {
                        m_freem(in6p->in6p_options);
                        in6p->in6p_options = NULL;
                }
                ip6_savecontrol(in6p, &in6p->in6p_options, ip6, m);
        }
#endif

        if (so->so_options & SO_DEBUG) {
#ifdef TCP_DEBUG
                ostate = tp->t_state;
#endif

                tcp_saveti = NULL;
                if (iphlen + sizeof(struct tcphdr) > MHLEN)
                        goto nosave;

                if (m->m_len > iphlen && (m->m_flags & M_EXT) == 0) {
                        tcp_saveti = m_copym(m, 0, iphlen, M_DONTWAIT);
                        if (tcp_saveti == NULL)
                                goto nosave;
                } else {
                        MGETHDR(tcp_saveti, M_DONTWAIT, MT_HEADER);
                        if (tcp_saveti == NULL)
                                goto nosave;
                        MCLAIM(m, &tcp_mowner);
                        tcp_saveti->m_len = iphlen;
                        m_copydata(m, 0, iphlen,
                            mtod(tcp_saveti, void *));
                }

                if (M_TRAILINGSPACE(tcp_saveti) < sizeof(struct tcphdr)) {
                        m_freem(tcp_saveti);
                        tcp_saveti = NULL;
                } else {
                        tcp_saveti->m_len += sizeof(struct tcphdr);
                        memcpy(mtod(tcp_saveti, char *) + iphlen, th,
                            sizeof(struct tcphdr));
                }
nosave:;
        }

        if (so->so_options & SO_ACCEPTCONN) {
                union syn_cache_sa src;
                union syn_cache_sa dst;

                KASSERT(tp->t_state == TCPS_LISTEN);

                memset(&src, 0, sizeof(src));
                memset(&dst, 0, sizeof(dst));
                switch (af) {
                case AF_INET:
                        src.sin.sin_len = sizeof(struct sockaddr_in);
                        src.sin.sin_family = AF_INET;
                        src.sin.sin_addr = ip->ip_src;
                        src.sin.sin_port = th->th_sport;

                        dst.sin.sin_len = sizeof(struct sockaddr_in);
                        dst.sin.sin_family = AF_INET;
                        dst.sin.sin_addr = ip->ip_dst;
                        dst.sin.sin_port = th->th_dport;
                        break;
#ifdef INET6
                case AF_INET6:
                        src.sin6.sin6_len = sizeof(struct sockaddr_in6);
                        src.sin6.sin6_family = AF_INET6;
                        src.sin6.sin6_addr = ip6->ip6_src;
                        src.sin6.sin6_port = th->th_sport;

                        dst.sin6.sin6_len = sizeof(struct sockaddr_in6);
                        dst.sin6.sin6_family = AF_INET6;
                        dst.sin6.sin6_addr = ip6->ip6_dst;
                        dst.sin6.sin6_port = th->th_dport;
                        break;
#endif
                }

                if ((tiflags & (TH_RST|TH_ACK|TH_SYN)) != TH_SYN) {
                        if (tiflags & TH_RST) {
                                syn_cache_reset(&src.sa, &dst.sa, th);
                        } else if ((tiflags & (TH_ACK|TH_SYN)) ==
                            (TH_ACK|TH_SYN)) {
                                /*
                                 * Received a SYN,ACK. This should never
                                 * happen while we are in LISTEN. Send an RST.
                                 */
                                goto badsyn;
                        } else if (tiflags & TH_ACK) {
                                so = syn_cache_get(&src.sa, &dst.sa, th, so, m);
                                if (so == NULL) {
                                        /*
                                         * We don't have a SYN for this ACK;
                                         * send an RST.
                                         */
                                        goto badsyn;
                                } else if (so == (struct socket *)(-1)) {
                                        /*
                                         * We were unable to create the
                                         * connection. If the 3-way handshake
                                         * was completed, and RST has been
                                         * sent to the peer. Since the mbuf
                                         * might be in use for the reply, do
                                         * not free it.
                                         */
                                        m = NULL;
                                } else {
                                        /*
                                         * We have created a full-blown
                                         * connection.
                                         */
                                        tp = NULL;
                                        inp = NULL;
#ifdef INET6
                                        in6p = NULL;
#endif
                                        switch (so->so_proto->pr_domain->dom_family) {
                                        case AF_INET:
                                                inp = sotoinpcb(so);
                                                tp = intotcpcb(inp);
                                                break;
#ifdef INET6
                                        case AF_INET6:
                                                in6p = sotoin6pcb(so);
                                                tp = in6totcpcb(in6p);
                                                break;
#endif
                                        }
                                        if (tp == NULL)
                                                goto badsyn;        /*XXX*/
                                        tiwin <<= tp->snd_scale;
                                        goto after_listen;
                                }
                        } else {
                                /*
                                 * None of RST, SYN or ACK was set.
                                 * This is an invalid packet for a
                                 * TCB in LISTEN state.  Send a RST.
                                 */
                                goto badsyn;
                        }
                } else {
                        /*
                         * Received a SYN.
                         */

#ifdef INET6
                        /*
                         * If deprecated address is forbidden, we do
                         * not accept SYN to deprecated interface
                         * address to prevent any new inbound
                         * connection from getting established.
                         * When we do not accept SYN, we send a TCP
                         * RST, with deprecated source address (instead
                         * of dropping it).  We compromise it as it is
                         * much better for peer to send a RST, and
                         * RST will be the final packet for the
                         * exchange.
                         *
                         * If we do not forbid deprecated addresses, we
                         * accept the SYN packet.  RFC2462 does not
                         * suggest dropping SYN in this case.
                         * If we decipher RFC2462 5.5.4, it says like
                         * this:
                         * 1. use of deprecated addr with existing
                         *    communication is okay - "SHOULD continue
                         *    to be used"
                         * 2. use of it with new communication:
                         *   (2a) "SHOULD NOT be used if alternate
                         *        address with sufficient scope is
                         *        available"
                         *   (2b) nothing mentioned otherwise.
                         * Here we fall into (2b) case as we have no
                         * choice in our source address selection - we
                         * must obey the peer.
                         *
                         * The wording in RFC2462 is confusing, and
                         * there are multiple description text for
                         * deprecated address handling - worse, they
                         * are not exactly the same.  I believe 5.5.4
                         * is the best one, so we follow 5.5.4.
                         */
                        if (af == AF_INET6 && !ip6_use_deprecated) {
                                struct in6_ifaddr *ia6;
                                int s;
                                struct ifnet *rcvif = m_get_rcvif(m, &s);
                                if (rcvif == NULL)
                                        goto dropwithreset; /* XXX */
                                if ((ia6 = in6ifa_ifpwithaddr(rcvif,
                                    &ip6->ip6_dst)) &&
                                    (ia6->ia6_flags & IN6_IFF_DEPRECATED)) {
                                        tp = NULL;
                                        m_put_rcvif(rcvif, &s);
                                        goto dropwithreset;
                                }
                                m_put_rcvif(rcvif, &s);
                        }
#endif

                        /*
                         * LISTEN socket received a SYN from itself? This
                         * can't possibly be valid; drop the packet.
                         */
                        if (th->th_sport == th->th_dport) {
                                int eq = 0;

                                switch (af) {
                                case AF_INET:
                                        eq = in_hosteq(ip->ip_src, ip->ip_dst);
                                        break;
#ifdef INET6
                                case AF_INET6:
                                        eq = IN6_ARE_ADDR_EQUAL(&ip6->ip6_src,
                                            &ip6->ip6_dst);
                                        break;
#endif
                                }
                                if (eq) {
                                        TCP_STATINC(TCP_STAT_BADSYN);
                                        goto drop;
                                }
                        }

                        /*
                         * SYN looks ok; create compressed TCP
                         * state for it.
                         */
                        if (so->so_qlen <= so->so_qlimit &&
                            syn_cache_add(&src.sa, &dst.sa, th, off,
                            so, m, optp, optlen, &opti))
                                m = NULL;
                }

                goto drop;
        }

after_listen:
        /*
         * From here on, we're dealing with !LISTEN.
         */
        KASSERT(tp->t_state != TCPS_LISTEN);

        /*
         * Segment received on connection.
         * Reset idle time and keep-alive timer.
         */
        tp->t_rcvtime = tcp_now;
        if (TCPS_HAVEESTABLISHED(tp->t_state))
                TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle);

        /*
         * Process options.
         */
#ifdef TCP_SIGNATURE
        if (optp || (tp->t_flags & TF_SIGNATURE))
#else
        if (optp)
#endif
                if (tcp_dooptions(tp, optp, optlen, th, m, off, &opti) < 0)
                        goto drop;

        if (TCP_SACK_ENABLED(tp)) {
                tcp_del_sackholes(tp, th);
        }

        if (TCP_ECN_ALLOWED(tp)) {
                if (tiflags & TH_CWR) {
                        tp->t_flags &= ~TF_ECN_SND_ECE;
                }
                switch (iptos & IPTOS_ECN_MASK) {
                case IPTOS_ECN_CE:
                        tp->t_flags |= TF_ECN_SND_ECE;
                        TCP_STATINC(TCP_STAT_ECN_CE);
                        break;
                case IPTOS_ECN_ECT0:
                        TCP_STATINC(TCP_STAT_ECN_ECT);
                        break;
                case IPTOS_ECN_ECT1:
                        /* XXX: ignore for now -- rpaulo */
                        break;
                }
                /*
                 * Congestion experienced.
                 * Ignore if we are already trying to recover.
                 */
                if ((tiflags & TH_ECE) && SEQ_GEQ(tp->snd_una, tp->snd_recover))
                        tp->t_congctl->cong_exp(tp);
        }

        if (opti.ts_present && opti.ts_ecr) {
                /*
                 * Calculate the RTT from the returned time stamp and the
                 * connection's time base.  If the time stamp is later than
                 * the current time, or is extremely old, fall back to non-1323
                 * RTT calculation.  Since ts_rtt is unsigned, we can test both
                 * at the same time.
                 *
                 * Note that ts_rtt is in units of slow ticks (500
                 * ms).  Since most earthbound RTTs are < 500 ms,
                 * observed values will have large quantization noise.
                 * Our smoothed RTT is then the fraction of observed
                 * samples that are 1 tick instead of 0 (times 500
                 * ms).
                 *
                 * ts_rtt is increased by 1 to denote a valid sample,
                 * with 0 indicating an invalid measurement.  This
                 * extra 1 must be removed when ts_rtt is used, or
                 * else an erroneous extra 500 ms will result.
                 */
                ts_rtt = TCP_TIMESTAMP(tp) - opti.ts_ecr + 1;
                if (ts_rtt > TCP_PAWS_IDLE)
                        ts_rtt = 0;
        } else {
                ts_rtt = 0;
        }

        /*
         * Fast path: check for the two common cases of a uni-directional
         * data transfer. If:
         *    o We are in the ESTABLISHED state, and
         *    o The packet has no control flags, and
         *    o The packet is in-sequence, and
         *    o The window didn't change, and
         *    o We are not retransmitting
         * It's a candidate.
         *
         * If the length (tlen) is zero and the ack moved forward, we're
         * the sender side of the transfer. Just free the data acked and
         * wake any higher level process that was blocked waiting for
         * space.
         *
         * If the length is non-zero and the ack didn't move, we're the
         * receiver side. If we're getting packets in-order (the reassembly
         * queue is empty), add the data to the socket buffer and note
         * that we need a delayed ack.
         */
        if (tp->t_state == TCPS_ESTABLISHED &&
            (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ECE|TH_CWR|TH_ACK))
                == TH_ACK &&
            (!opti.ts_present || TSTMP_GEQ(opti.ts_val, tp->ts_recent)) &&
            th->th_seq == tp->rcv_nxt &&
            tiwin && tiwin == tp->snd_wnd &&
            tp->snd_nxt == tp->snd_max) {

                /*
                 * If last ACK falls within this segment's sequence numbers,
                 * record the timestamp.
                 * NOTE that the test is modified according to the latest
                 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
                 *
                 * note that we already know
                 *        TSTMP_GEQ(opti.ts_val, tp->ts_recent)
                 */
                if (opti.ts_present && SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
                        tp->ts_recent_age = tcp_now;
                        tp->ts_recent = opti.ts_val;
                }

                if (tlen == 0) {
                        /* Ack prediction. */
                        if (SEQ_GT(th->th_ack, tp->snd_una) &&
                            SEQ_LEQ(th->th_ack, tp->snd_max) &&
                            tp->snd_cwnd >= tp->snd_wnd &&
                            tp->t_partialacks < 0) {
                                /*
                                 * this is a pure ack for outstanding data.
                                 */
                                if (ts_rtt)
                                        tcp_xmit_timer(tp, ts_rtt - 1);
                                else if (tp->t_rtttime &&
                                    SEQ_GT(th->th_ack, tp->t_rtseq))
                                        tcp_xmit_timer(tp,
                                          tcp_now - tp->t_rtttime);
                                acked = th->th_ack - tp->snd_una;
                                tcps = TCP_STAT_GETREF();
                                tcps[TCP_STAT_PREDACK]++;
                                tcps[TCP_STAT_RCVACKPACK]++;
                                tcps[TCP_STAT_RCVACKBYTE] += acked;
                                TCP_STAT_PUTREF();
                                nd_hint(tp);

                                if (acked > (tp->t_lastoff - tp->t_inoff))
                                        tp->t_lastm = NULL;
                                sbdrop(&so->so_snd, acked);
                                tp->t_lastoff -= acked;

                                icmp_check(tp, th, acked);

                                tp->snd_una = th->th_ack;
                                tp->snd_fack = tp->snd_una;
                                if (SEQ_LT(tp->snd_high, tp->snd_una))
                                        tp->snd_high = tp->snd_una;
                                /*
                                 * drag snd_wl2 along so only newer
                                 * ACKs can update the window size.
                                 * also avoids the state where snd_wl2
                                 * is eventually larger than th_ack and thus
                                 * blocking the window update mechanism and
                                 * the connection gets stuck for a loooong
                                 * time in the zero sized send window state.
                                 *
                                 * see PR/kern 55567
                                 */
                                tp->snd_wl2 = tp->snd_una;

                                m_freem(m);

                                /*
                                 * If all outstanding data are acked, stop
                                 * retransmit timer, otherwise restart timer
                                 * using current (possibly backed-off) value.
                                 * If process is waiting for space,
                                 * wakeup/selnotify/signal.  If data
                                 * are ready to send, let tcp_output
                                 * decide between more output or persist.
                                 */
                                if (tp->snd_una == tp->snd_max)
                                        TCP_TIMER_DISARM(tp, TCPT_REXMT);
                                else if (TCP_TIMER_ISARMED(tp,
                                    TCPT_PERSIST) == 0)
                                        TCP_TIMER_ARM(tp, TCPT_REXMT,
                                            tp->t_rxtcur);

                                sowwakeup(so);
                                if (so->so_snd.sb_cc) {
                                        KERNEL_LOCK(1, NULL);
                                        (void)tcp_output(tp);
                                        KERNEL_UNLOCK_ONE(NULL);
                                }
                                if (tcp_saveti)
                                        m_freem(tcp_saveti);
                                return;
                        }
                } else if (th->th_ack == tp->snd_una &&
                    TAILQ_FIRST(&tp->segq) == NULL &&
                    tlen <= sbspace(&so->so_rcv)) {
                        int newsize = 0;

                        /*
                         * this is a pure, in-sequence data packet
                         * with nothing on the reassembly queue and
                         * we have enough buffer space to take it.
                         */
                        tp->rcv_nxt += tlen;

                        /*
                         * Pull rcv_up up to prevent seq wrap relative to
                         * rcv_nxt.
                         */
                        tp->rcv_up = tp->rcv_nxt;

                        /*
                         * Pull snd_wl1 up to prevent seq wrap relative to
                         * th_seq.
                         */
                        tp->snd_wl1 = th->th_seq;

                        tcps = TCP_STAT_GETREF();
                        tcps[TCP_STAT_PREDDAT]++;
                        tcps[TCP_STAT_RCVPACK]++;
                        tcps[TCP_STAT_RCVBYTE] += tlen;
                        TCP_STAT_PUTREF();
                        nd_hint(tp);
                /*
                 * Automatic sizing enables the performance of large buffers
                 * and most of the efficiency of small ones by only allocating
                 * space when it is needed.
                 *
                 * On the receive side the socket buffer memory is only rarely
                 * used to any significant extent.  This allows us to be much
                 * more aggressive in scaling the receive socket buffer.  For
                 * the case that the buffer space is actually used to a large
                 * extent and we run out of kernel memory we can simply drop
                 * the new segments; TCP on the sender will just retransmit it
                 * later.  Setting the buffer size too big may only consume too
                 * much kernel memory if the application doesn't read() from
                 * the socket or packet loss or reordering makes use of the
                 * reassembly queue.
                 *
                 * The criteria to step up the receive buffer one notch are:
                 *  1. the number of bytes received during the time it takes
                 *     one timestamp to be reflected back to us (the RTT);
                 *  2. received bytes per RTT is within seven eighth of the
                 *     current socket buffer size;
                 *  3. receive buffer size has not hit maximal automatic size;
                 *
                 * This algorithm does one step per RTT at most and only if
                 * we receive a bulk stream w/o packet losses or reorderings.
                 * Shrinking the buffer during idle times is not necessary as
                 * it doesn't consume any memory when idle.
                 *
                 * TODO: Only step up if the application is actually serving
                 * the buffer to better manage the socket buffer resources.
                 */
                        if (tcp_do_autorcvbuf &&
                            opti.ts_ecr &&
                            (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
                                if (opti.ts_ecr > tp->rfbuf_ts &&
                                    opti.ts_ecr - tp->rfbuf_ts < PR_SLOWHZ) {
                                        if (tp->rfbuf_cnt >
                                            (so->so_rcv.sb_hiwat / 8 * 7) &&
                                            so->so_rcv.sb_hiwat <
                                            tcp_autorcvbuf_max) {
                                                newsize =
                                                    uimin(so->so_rcv.sb_hiwat +
                                                    tcp_autorcvbuf_inc,
                                                    tcp_autorcvbuf_max);
                                        }
                                        /* Start over with next RTT. */
                                        tp->rfbuf_ts = 0;
                                        tp->rfbuf_cnt = 0;
                                } else
                                        tp->rfbuf_cnt += tlen;        /* add up */
                        }

                        /*
                         * Drop TCP, IP headers and TCP options then add data
                         * to socket buffer.
                         */
                        if (so->so_state & SS_CANTRCVMORE) {
                                m_freem(m);
                        } else {
                                /*
                                 * Set new socket buffer size.
                                 * Give up when limit is reached.
                                 */
                                if (newsize)
                                        if (!sbreserve(&so->so_rcv,
                                            newsize, so))
                                                so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
                                m_adj(m, off + thlen);
                                sbappendstream(&so->so_rcv, m);
                        }
                        sorwakeup(so);
                        tcp_setup_ack(tp, th);
                        if (tp->t_flags & TF_ACKNOW) {
                                KERNEL_LOCK(1, NULL);
                                (void)tcp_output(tp);
                                KERNEL_UNLOCK_ONE(NULL);
                        }
                        if (tcp_saveti)
                                m_freem(tcp_saveti);
                        return;
                }
        }

        /*
         * Compute mbuf offset to TCP data segment.
         */
        hdroptlen = off + thlen;

        /*
         * Calculate amount of space in receive window. Receive window is
         * amount of space in rcv queue, but not less than advertised
         * window.
         */
        {
                int win;
                win = sbspace(&so->so_rcv);
                if (win < 0)
                        win = 0;
                tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
        }

        /* Reset receive buffer auto scaling when not in bulk receive mode. */
        tp->rfbuf_ts = 0;
        tp->rfbuf_cnt = 0;

        switch (tp->t_state) {
        /*
         * If the state is SYN_SENT:
         *        if seg contains an ACK, but not for our SYN, drop the input.
         *        if seg contains a RST, then drop the connection.
         *        if seg does not contain SYN, then drop it.
         * Otherwise this is an acceptable SYN segment
         *        initialize tp->rcv_nxt and tp->irs
         *        if seg contains ack then advance tp->snd_una
         *        if seg contains a ECE and ECN support is enabled, the stream
         *            is ECN capable.
         *        if SYN has been acked change to ESTABLISHED else SYN_RCVD state
         *        arrange for segment to be acked (eventually)
         *        continue processing rest of data/controls, beginning with URG
         */
        case TCPS_SYN_SENT:
                if ((tiflags & TH_ACK) &&
                    (SEQ_LEQ(th->th_ack, tp->iss) ||
                     SEQ_GT(th->th_ack, tp->snd_max)))
                        goto dropwithreset;
                if (tiflags & TH_RST) {
                        if (tiflags & TH_ACK)
                                tp = tcp_drop(tp, ECONNREFUSED);
                        goto drop;
                }
                if ((tiflags & TH_SYN) == 0)
                        goto drop;
                if (tiflags & TH_ACK) {
                        tp->snd_una = th->th_ack;
                        if (SEQ_LT(tp->snd_nxt, tp->snd_una))
                                tp->snd_nxt = tp->snd_una;
                        if (SEQ_LT(tp->snd_high, tp->snd_una))
                                tp->snd_high = tp->snd_una;
                        TCP_TIMER_DISARM(tp, TCPT_REXMT);

                        if ((tiflags & TH_ECE) && tcp_do_ecn) {
                                tp->t_flags |= TF_ECN_PERMIT;
                                TCP_STATINC(TCP_STAT_ECN_SHS);
                        }
                }
                tp->irs = th->th_seq;
                tcp_rcvseqinit(tp);
                tp->t_flags |= TF_ACKNOW;
                tcp_mss_from_peer(tp, opti.maxseg);

                /*
                 * Initialize the initial congestion window.  If we
                 * had to retransmit the SYN, we must initialize cwnd
                 * to 1 segment (i.e. the Loss Window).
                 */
                if (tp->t_flags & TF_SYN_REXMT)
                        tp->snd_cwnd = tp->t_peermss;
                else {
                        int ss = tcp_init_win;
                        if (inp != NULL && in_localaddr(inp->inp_faddr))
                                ss = tcp_init_win_local;
#ifdef INET6
                        if (in6p != NULL && in6_localaddr(&in6p->in6p_faddr))
                                ss = tcp_init_win_local;
#endif
                        tp->snd_cwnd = TCP_INITIAL_WINDOW(ss, tp->t_peermss);
                }

                tcp_rmx_rtt(tp);
                if (tiflags & TH_ACK) {
                        TCP_STATINC(TCP_STAT_CONNECTS);
                        /*
                         * move tcp_established before soisconnected
                         * because upcall handler can drive tcp_output
                         * functionality.
                         * XXX we might call soisconnected at the end of
                         * all processing
                         */
                        tcp_established(tp);
                        soisconnected(so);
                        /* Do window scaling on this connection? */
                        if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
                            (TF_RCVD_SCALE|TF_REQ_SCALE)) {
                                tp->snd_scale = tp->requested_s_scale;
                                tp->rcv_scale = tp->request_r_scale;
                        }
                        TCP_REASS_LOCK(tp);
                        (void)tcp_reass(tp, NULL, NULL, tlen);
                        /*
                         * if we didn't have to retransmit the SYN,
                         * use its rtt as our initial srtt & rtt var.
                         */
                        if (tp->t_rtttime)
                                tcp_xmit_timer(tp, tcp_now - tp->t_rtttime);
                } else {
                        tp->t_state = TCPS_SYN_RECEIVED;
                }

                /*
                 * Advance th->th_seq to correspond to first data byte.
                 * If data, trim to stay within window,
                 * dropping FIN if necessary.
                 */
                th->th_seq++;
                if (tlen > tp->rcv_wnd) {
                        todrop = tlen - tp->rcv_wnd;
                        m_adj(m, -todrop);
                        tlen = tp->rcv_wnd;
                        tiflags &= ~TH_FIN;
                        tcps = TCP_STAT_GETREF();
                        tcps[TCP_STAT_RCVPACKAFTERWIN]++;
                        tcps[TCP_STAT_RCVBYTEAFTERWIN] += todrop;
                        TCP_STAT_PUTREF();
                }
                tp->snd_wl1 = th->th_seq - 1;
                tp->rcv_up = th->th_seq;
                goto step6;

        /*
         * If the state is SYN_RECEIVED:
         *        If seg contains an ACK, but not for our SYN, drop the input
         *        and generate an RST.  See page 36, rfc793
         */
        case TCPS_SYN_RECEIVED:
                if ((tiflags & TH_ACK) &&
                    (SEQ_LEQ(th->th_ack, tp->iss) ||
                     SEQ_GT(th->th_ack, tp->snd_max)))
                        goto dropwithreset;
                break;
        }

        /*
         * From here on, we're dealing with !LISTEN and !SYN_SENT.
         */
        KASSERT(tp->t_state != TCPS_LISTEN &&
            tp->t_state != TCPS_SYN_SENT);

        /*
         * RFC1323 PAWS: if we have a timestamp reply on this segment and
         * it's less than ts_recent, drop it.
         */
        if (opti.ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
            TSTMP_LT(opti.ts_val, tp->ts_recent)) {
                /* Check to see if ts_recent is over 24 days old.  */
                if (tcp_now - tp->ts_recent_age > TCP_PAWS_IDLE) {
                        /*
                         * Invalidate ts_recent.  If this segment updates
                         * ts_recent, the age will be reset later and ts_recent
                         * will get a valid value.  If it does not, setting
                         * ts_recent to zero will at least satisfy the
                         * requirement that zero be placed in the timestamp
                         * echo reply when ts_recent isn't valid.  The
                         * age isn't reset until we get a valid ts_recent
                         * because we don't want out-of-order segments to be
                         * dropped when ts_recent is old.
                         */
                        tp->ts_recent = 0;
                } else {
                        tcps = TCP_STAT_GETREF();
                        tcps[TCP_STAT_RCVDUPPACK]++;
                        tcps[TCP_STAT_RCVDUPBYTE] += tlen;
                        tcps[TCP_STAT_PAWSDROP]++;
                        TCP_STAT_PUTREF();
                        tcp_new_dsack(tp, th->th_seq, tlen);
                        goto dropafterack;
                }
        }

        /*
         * Check that at least some bytes of the segment are within the
         * receive window. If segment begins before rcv_nxt, drop leading
         * data (and SYN); if nothing left, just ack.
         */
        todrop = tp->rcv_nxt - th->th_seq;
        dupseg = false;
        if (todrop > 0) {
                if (tiflags & TH_SYN) {
                        tiflags &= ~TH_SYN;
                        th->th_seq++;
                        tcp_urp_drop(th, 1, &tiflags);
                        todrop--;
                }
                if (todrop > tlen ||
                    (todrop == tlen && (tiflags & TH_FIN) == 0)) {
                        /*
                         * Any valid FIN or RST must be to the left of the
                         * window.  At this point the FIN or RST must be a
                         * duplicate or out of sequence; drop it.
                         */
                        if (tiflags & TH_RST)
                                goto drop;
                        tiflags &= ~(TH_FIN|TH_RST);

                        /*
                         * Send an ACK to resynchronize and drop any data.
                         * But keep on processing for RST or ACK.
                         */
                        tp->t_flags |= TF_ACKNOW;
                        todrop = tlen;
                        dupseg = true;
                        tcps = TCP_STAT_GETREF();
                        tcps[TCP_STAT_RCVDUPPACK]++;
                        tcps[TCP_STAT_RCVDUPBYTE] += todrop;
                        TCP_STAT_PUTREF();
                } else if ((tiflags & TH_RST) && th->th_seq != tp->rcv_nxt) {
                        /*
                         * Test for reset before adjusting the sequence
                         * number for overlapping data.
                         */
                        goto dropafterack_ratelim;
                } else {
                        tcps = TCP_STAT_GETREF();
                        tcps[TCP_STAT_RCVPARTDUPPACK]++;
                        tcps[TCP_STAT_RCVPARTDUPBYTE] += todrop;
                        TCP_STAT_PUTREF();
                }
                tcp_new_dsack(tp, th->th_seq, todrop);
                hdroptlen += todrop;        /* drop from head afterwards (m_adj) */
                th->th_seq += todrop;
                tlen -= todrop;
                tcp_urp_drop(th, todrop, &tiflags);
        }

        /*
         * If new data is received on a connection after the user processes
         * are gone, then RST the other end.
         */
        if ((so->so_state & SS_NOFDREF) &&
            tp->t_state > TCPS_CLOSE_WAIT && tlen) {
                tp = tcp_close(tp);
                TCP_STATINC(TCP_STAT_RCVAFTERCLOSE);
                goto dropwithreset;
        }

        /*
         * If the segment ends after the window, drop trailing data (and
         * PUSH and FIN); if nothing left, just ACK.
         */
        todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd);
        if (todrop > 0) {
                TCP_STATINC(TCP_STAT_RCVPACKAFTERWIN);
                if (todrop >= tlen) {
                        /*
                         * The segment actually starts after the window.
                         * th->th_seq + tlen - tp->rcv_nxt - tp->rcv_wnd >= tlen
                         * th->th_seq - tp->rcv_nxt - tp->rcv_wnd >= 0
                         * th->th_seq >= tp->rcv_nxt + tp->rcv_wnd
                         */
                        TCP_STATADD(TCP_STAT_RCVBYTEAFTERWIN, tlen);

                        /*
                         * If a new connection request is received while in
                         * TIME_WAIT, drop the old connection and start over
                         * if the sequence numbers are above the previous
                         * ones.
                         *
                         * NOTE: We need to put the header fields back into
                         * network order.
                         */
                        if ((tiflags & TH_SYN) &&
                            tp->t_state == TCPS_TIME_WAIT &&
                            SEQ_GT(th->th_seq, tp->rcv_nxt)) {
                                tp = tcp_close(tp);
                                tcp_fields_to_net(th);
                                m_freem(tcp_saveti);
                                tcp_saveti = NULL;
                                goto findpcb;
                        }

                        /*
                         * If window is closed can only take segments at
                         * window edge, and have to drop data and PUSH from
                         * incoming segments.  Continue processing, but
                         * remember to ack.  Otherwise, drop segment
                         * and (if not RST) ack.
                         */
                        if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
                                KASSERT(todrop == tlen);
                                tp->t_flags |= TF_ACKNOW;
                                TCP_STATINC(TCP_STAT_RCVWINPROBE);
                        } else {
                                goto dropafterack;
                        }
                } else {
                        TCP_STATADD(TCP_STAT_RCVBYTEAFTERWIN, todrop);
                }
                m_adj(m, -todrop);
                tlen -= todrop;
                tiflags &= ~(TH_PUSH|TH_FIN);
        }

        /*
         * If last ACK falls within this segment's sequence numbers,
         *  record the timestamp.
         * NOTE: 
         * 1) That the test incorporates suggestions from the latest
         *    proposal of the tcplw@cray.com list (Braden 1993/04/26).
         * 2) That updating only on newer timestamps interferes with
         *    our earlier PAWS tests, so this check should be solely
         *    predicated on the sequence space of this segment.
         * 3) That we modify the segment boundary check to be 
         *        Last.ACK.Sent <= SEG.SEQ + SEG.Len  
         *    instead of RFC1323's
         *        Last.ACK.Sent < SEG.SEQ + SEG.Len,
         *    This modified check allows us to overcome RFC1323's
         *    limitations as described in Stevens TCP/IP Illustrated
         *    Vol. 2 p.869. In such cases, we can still calculate the
         *    RTT correctly when RCV.NXT == Last.ACK.Sent.
         */
        if (opti.ts_present &&
            SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
            SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
                 ((tiflags & (TH_SYN|TH_FIN)) != 0))) {
                tp->ts_recent_age = tcp_now;
                tp->ts_recent = opti.ts_val;
        }

        /*
         * If the RST bit is set examine the state:
         *    RECEIVED state:
         *        If passive open, return to LISTEN state.
         *        If active open, inform user that connection was refused.
         *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT states:
         *        Inform user that connection was reset, and close tcb.
         *    CLOSING, LAST_ACK, TIME_WAIT states:
         *        Close the tcb.
         */
        if (tiflags & TH_RST) {
                if (th->th_seq != tp->rcv_nxt)
                        goto dropafterack_ratelim;

                switch (tp->t_state) {
                case TCPS_SYN_RECEIVED:
                        so->so_error = ECONNREFUSED;
                        goto close;

                case TCPS_ESTABLISHED:
                case TCPS_FIN_WAIT_1:
                case TCPS_FIN_WAIT_2:
                case TCPS_CLOSE_WAIT:
                        so->so_error = ECONNRESET;
                close:
                        tp->t_state = TCPS_CLOSED;
                        TCP_STATINC(TCP_STAT_DROPS);
                        tp = tcp_close(tp);
                        goto drop;

                case TCPS_CLOSING:
                case TCPS_LAST_ACK:
                case TCPS_TIME_WAIT:
                        tp = tcp_close(tp);
                        goto drop;
                }
        }

        /*
         * Since we've covered the SYN-SENT and SYN-RECEIVED states above
         * we must be in a synchronized state.  RFC793 states (under Reset
         * Generation) that any unacceptable segment (an out-of-order SYN
         * qualifies) received in a synchronized state must elicit only an
         * empty acknowledgment segment ... and the connection remains in
         * the same state.
         */
        if (tiflags & TH_SYN) {
                if (tp->rcv_nxt == th->th_seq) {
                        tcp_respond(tp, m, m, th, (tcp_seq)0, th->th_ack - 1,
                            TH_ACK);
                        if (tcp_saveti)
                                m_freem(tcp_saveti);
                        return;
                }

                goto dropafterack_ratelim;
        }

        /*
         * If the ACK bit is off we drop the segment and return.
         */
        if ((tiflags & TH_ACK) == 0) {
                if (tp->t_flags & TF_ACKNOW)
                        goto dropafterack;
                goto drop;
        }

        /*
         * From here on, we're doing ACK processing.
         */

        switch (tp->t_state) {
        /*
         * In SYN_RECEIVED state if the ack ACKs our SYN then enter
         * ESTABLISHED state and continue processing, otherwise
         * send an RST.
         */
        case TCPS_SYN_RECEIVED:
                if (SEQ_GT(tp->snd_una, th->th_ack) ||
                    SEQ_GT(th->th_ack, tp->snd_max))
                        goto dropwithreset;
                TCP_STATINC(TCP_STAT_CONNECTS);
                soisconnected(so);
                tcp_established(tp);
                /* Do window scaling? */
                if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
                    (TF_RCVD_SCALE|TF_REQ_SCALE)) {
                        tp->snd_scale = tp->requested_s_scale;
                        tp->rcv_scale = tp->request_r_scale;
                }
                TCP_REASS_LOCK(tp);
                (void)tcp_reass(tp, NULL, NULL, tlen);
                tp->snd_wl1 = th->th_seq - 1;
                /* FALLTHROUGH */

        /*
         * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
         * ACKs.  If the ack is in the range
         *        tp->snd_una < th->th_ack <= tp->snd_max
         * then advance tp->snd_una to th->th_ack and drop
         * data from the retransmission queue.  If this ACK reflects
         * more up to date window information we update our window information.
         */
        case TCPS_ESTABLISHED:
        case TCPS_FIN_WAIT_1:
        case TCPS_FIN_WAIT_2:
        case TCPS_CLOSE_WAIT:
        case TCPS_CLOSING:
        case TCPS_LAST_ACK:
        case TCPS_TIME_WAIT:
                if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
                        if (tlen == 0 && !dupseg && tiwin == tp->snd_wnd) {
                                TCP_STATINC(TCP_STAT_RCVDUPACK);
                                /*
                                 * If we have outstanding data (other than
                                 * a window probe), this is a completely
                                 * duplicate ack (ie, window info didn't
                                 * change), the ack is the biggest we've
                                 * seen and we've seen exactly our rexmt
                                 * threshold of them, assume a packet
                                 * has been dropped and retransmit it.
                                 * Kludge snd_nxt & the congestion
                                 * window so we send only this one
                                 * packet.
                                 */
                                if (TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0 ||
                                    th->th_ack != tp->snd_una)
                                        tp->t_dupacks = 0;
                                else if (tp->t_partialacks < 0 &&
                                    (++tp->t_dupacks == tcprexmtthresh ||
                                     TCP_FACK_FASTRECOV(tp))) {
                                        /*
                                         * Do the fast retransmit, and adjust
                                         * congestion control parameters.
                                         */
                                        if (tp->t_congctl->fast_retransmit(tp, th)) {
                                                /* False fast retransmit */
                                                break;
                                        }
                                        goto drop;
                                } else if (tp->t_dupacks > tcprexmtthresh) {
                                        tp->snd_cwnd += tp->t_segsz;
                                        KERNEL_LOCK(1, NULL);
                                        (void)tcp_output(tp);
                                        KERNEL_UNLOCK_ONE(NULL);
                                        goto drop;
                                }
                        } else {
                                /*
                                 * If the ack appears to be very old, only
                                 * allow data that is in-sequence.  This
                                 * makes it somewhat more difficult to insert
                                 * forged data by guessing sequence numbers.
                                 * Sent an ack to try to update the send
                                 * sequence number on the other side.
                                 */
                                if (tlen && th->th_seq != tp->rcv_nxt &&
                                    SEQ_LT(th->th_ack,
                                    tp->snd_una - tp->max_sndwnd))
                                        goto dropafterack;
                        }
                        break;
                }
                /*
                 * If the congestion window was inflated to account
                 * for the other side's cached packets, retract it.
                 */
                tp->t_congctl->fast_retransmit_newack(tp, th);

                if (SEQ_GT(th->th_ack, tp->snd_max)) {
                        TCP_STATINC(TCP_STAT_RCVACKTOOMUCH);
                        goto dropafterack;
                }
                acked = th->th_ack - tp->snd_una;
                tcps = TCP_STAT_GETREF();
                tcps[TCP_STAT_RCVACKPACK]++;
                tcps[TCP_STAT_RCVACKBYTE] += acked;
                TCP_STAT_PUTREF();

                /*
                 * If we have a timestamp reply, update smoothed
                 * round trip time.  If no timestamp is present but
                 * transmit timer is running and timed sequence
                 * number was acked, update smoothed round trip time.
                 * Since we now have an rtt measurement, cancel the
                 * timer backoff (cf., Phil Karn's retransmit alg.).
                 * Recompute the initial retransmit timer.
                 */
                if (ts_rtt)
                        tcp_xmit_timer(tp, ts_rtt - 1);
                else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq))
                        tcp_xmit_timer(tp, tcp_now - tp->t_rtttime);

                /*
                 * If all outstanding data is acked, stop retransmit
                 * timer and remember to restart (more output or persist).
                 * If there is more data to be acked, restart retransmit
                 * timer, using current (possibly backed-off) value.
                 */
                if (th->th_ack == tp->snd_max) {
                        TCP_TIMER_DISARM(tp, TCPT_REXMT);
                        needoutput = 1;
                } else if (TCP_TIMER_ISARMED(tp, TCPT_PERSIST) == 0)
                        TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);

                /*
                 * New data has been acked, adjust the congestion window.
                 */
                tp->t_congctl->newack(tp, th);

                nd_hint(tp);
                if (acked > so->so_snd.sb_cc) {
                        tp->snd_wnd -= so->so_snd.sb_cc;
                        sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
                        ourfinisacked = 1;
                } else {
                        if (acked > (tp->t_lastoff - tp->t_inoff))
                                tp->t_lastm = NULL;
                        sbdrop(&so->so_snd, acked);
                        tp->t_lastoff -= acked;
                        if (tp->snd_wnd > acked)
                                tp->snd_wnd -= acked;
                        else
                                tp->snd_wnd = 0;
                        ourfinisacked = 0;
                }
                sowwakeup(so);

                icmp_check(tp, th, acked);

                tp->snd_una = th->th_ack;
                if (SEQ_GT(tp->snd_una, tp->snd_fack))
                        tp->snd_fack = tp->snd_una;
                if (SEQ_LT(tp->snd_nxt, tp->snd_una))
                        tp->snd_nxt = tp->snd_una;
                if (SEQ_LT(tp->snd_high, tp->snd_una))
                        tp->snd_high = tp->snd_una;

                switch (tp->t_state) {

                /*
                 * In FIN_WAIT_1 STATE in addition to the processing
                 * for the ESTABLISHED state if our FIN is now acknowledged
                 * then enter FIN_WAIT_2.
                 */
                case TCPS_FIN_WAIT_1:
                        if (ourfinisacked) {
                                /*
                                 * If we can't receive any more
                                 * data, then closing user can proceed.
                                 * Starting the timer is contrary to the
                                 * specification, but if we don't get a FIN
                                 * we'll hang forever.
                                 */
                                if (so->so_state & SS_CANTRCVMORE) {
                                        soisdisconnected(so);
                                        if (tp->t_maxidle > 0)
                                                TCP_TIMER_ARM(tp, TCPT_2MSL,
                                                    tp->t_maxidle);
                                }
                                tp->t_state = TCPS_FIN_WAIT_2;
                        }
                        break;

                 /*
                 * In CLOSING STATE in addition to the processing for
                 * the ESTABLISHED state if the ACK acknowledges our FIN
                 * then enter the TIME-WAIT state, otherwise ignore
                 * the segment.
                 */
                case TCPS_CLOSING:
                        if (ourfinisacked) {
                                tp->t_state = TCPS_TIME_WAIT;
                                tcp_canceltimers(tp);
                                TCP_TIMER_ARM(tp, TCPT_2MSL, 2 * tp->t_msl);
                                soisdisconnected(so);
                        }
                        break;

                /*
                 * In LAST_ACK, we may still be waiting for data to drain
                 * and/or to be acked, as well as for the ack of our FIN.
                 * If our FIN is now acknowledged, delete the TCB,
                 * enter the closed state and return.
                 */
                case TCPS_LAST_ACK:
                        if (ourfinisacked) {
                                tp = tcp_close(tp);
                                goto drop;
                        }
                        break;

                /*
                 * In TIME_WAIT state the only thing that should arrive
                 * is a retransmission of the remote FIN.  Acknowledge
                 * it and restart the finack timer.
                 */
                case TCPS_TIME_WAIT:
                        TCP_TIMER_ARM(tp, TCPT_2MSL, 2 * tp->t_msl);
                        goto dropafterack;
                }
        }

step6:
        /*
         * Update window information.
         * Don't look at window if no ACK: TAC's send garbage on first SYN.
         */
        if ((tiflags & TH_ACK) && (SEQ_LT(tp->snd_wl1, th->th_seq) ||
            (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
            (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
                /* keep track of pure window updates */
                if (tlen == 0 &&
                    tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
                        TCP_STATINC(TCP_STAT_RCVWINUPD);
                tp->snd_wnd = tiwin;
                tp->snd_wl1 = th->th_seq;
                tp->snd_wl2 = th->th_ack;
                if (tp->snd_wnd > tp->max_sndwnd)
                        tp->max_sndwnd = tp->snd_wnd;
                needoutput = 1;
        }

        /*
         * Process segments with URG.
         */
        if ((tiflags & TH_URG) && th->th_urp &&
            TCPS_HAVERCVDFIN(tp->t_state) == 0) {
                /*
                 * This is a kludge, but if we receive and accept
                 * random urgent pointers, we'll crash in
                 * soreceive.  It's hard to imagine someone
                 * actually wanting to send this much urgent data.
                 */
                if (th->th_urp + so->so_rcv.sb_cc > sb_max) {
                        th->th_urp = 0;                        /* XXX */
                        tiflags &= ~TH_URG;                /* XXX */
                        goto dodata;                        /* XXX */
                }

                /*
                 * If this segment advances the known urgent pointer,
                 * then mark the data stream.  This should not happen
                 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
                 * a FIN has been received from the remote side.
                 * In these states we ignore the URG.
                 *
                 * According to RFC961 (Assigned Protocols),
                 * the urgent pointer points to the last octet
                 * of urgent data.  We continue, however,
                 * to consider it to indicate the first octet
                 * of data past the urgent section as the original
                 * spec states (in one of two places).
                 */
                if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
                        tp->rcv_up = th->th_seq + th->th_urp;
                        so->so_oobmark = so->so_rcv.sb_cc +
                            (tp->rcv_up - tp->rcv_nxt) - 1;
                        if (so->so_oobmark == 0)
                                so->so_state |= SS_RCVATMARK;
                        sohasoutofband(so);
                        tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
                }

                /*
                 * Remove out of band data so doesn't get presented to user.
                 * This can happen independent of advancing the URG pointer,
                 * but if two URG's are pending at once, some out-of-band
                 * data may creep in... ick.
                 */
                if (th->th_urp <= (u_int16_t)tlen &&
                    (so->so_options & SO_OOBINLINE) == 0)
                        tcp_pulloutofband(so, th, m, hdroptlen);
        } else {
                /*
                 * If no out of band data is expected,
                 * pull receive urgent pointer along
                 * with the receive window.
                 */
                if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
                        tp->rcv_up = tp->rcv_nxt;
        }
dodata:

        /*
         * Process the segment text, merging it into the TCP sequencing queue,
         * and arranging for acknowledgement of receipt if necessary.
         * This process logically involves adjusting tp->rcv_wnd as data
         * is presented to the user (this happens in tcp_usrreq.c,
         * tcp_rcvd()).  If a FIN has already been received on this
         * connection then we just ignore the text.
         */
        if ((tlen || (tiflags & TH_FIN)) &&
            TCPS_HAVERCVDFIN(tp->t_state) == 0) {
                /*
                 * Handle the common case:
                 *  o Segment is the next to be received, and
                 *  o The queue is empty, and
                 *  o The connection is established
                 * In this case, we avoid calling tcp_reass.
                 *
                 * tcp_setup_ack: set DELACK for segments received in order,
                 * but ack immediately when segments are out of order (so that
                 * fast retransmit can work).
                 */
                TCP_REASS_LOCK(tp);
                if (th->th_seq == tp->rcv_nxt &&
                    TAILQ_FIRST(&tp->segq) == NULL &&
                    tp->t_state == TCPS_ESTABLISHED) {
                        tcp_setup_ack(tp, th);
                        tp->rcv_nxt += tlen;
                        tiflags = th->th_flags & TH_FIN;
                        tcps = TCP_STAT_GETREF();
                        tcps[TCP_STAT_RCVPACK]++;
                        tcps[TCP_STAT_RCVBYTE] += tlen;
                        TCP_STAT_PUTREF();
                        nd_hint(tp);
                        if (so->so_state & SS_CANTRCVMORE) {
                                m_freem(m);
                        } else {
                                m_adj(m, hdroptlen);
                                sbappendstream(&(so)->so_rcv, m);
                        }
                        TCP_REASS_UNLOCK(tp);
                        sorwakeup(so);
                } else {
                        m_adj(m, hdroptlen);
                        tiflags = tcp_reass(tp, th, m, tlen);
                        tp->t_flags |= TF_ACKNOW;
                }

                /*
                 * Note the amount of data that peer has sent into
                 * our window, in order to estimate the sender's
                 * buffer size.
                 */
                len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
        } else {
                m_freem(m);
                m = NULL;
                tiflags &= ~TH_FIN;
        }

        /*
         * If FIN is received ACK the FIN and let the user know
         * that the connection is closing.  Ignore a FIN received before
         * the connection is fully established.
         */
        if ((tiflags & TH_FIN) && TCPS_HAVEESTABLISHED(tp->t_state)) {
                if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
                        socantrcvmore(so);
                        tp->t_flags |= TF_ACKNOW;
                        tp->rcv_nxt++;
                }
                switch (tp->t_state) {

                 /*
                 * In ESTABLISHED STATE enter the CLOSE_WAIT state.
                 */
                case TCPS_ESTABLISHED:
                        tp->t_state = TCPS_CLOSE_WAIT;
                        break;

                 /*
                 * If still in FIN_WAIT_1 STATE FIN has not been acked so
                 * enter the CLOSING state.
                 */
                case TCPS_FIN_WAIT_1:
                        tp->t_state = TCPS_CLOSING;
                        break;

                 /*
                 * In FIN_WAIT_2 state enter the TIME_WAIT state,
                 * starting the time-wait timer, turning off the other
                 * standard timers.
                 */
                case TCPS_FIN_WAIT_2:
                        tp->t_state = TCPS_TIME_WAIT;
                        tcp_canceltimers(tp);
                        TCP_TIMER_ARM(tp, TCPT_2MSL, 2 * tp->t_msl);
                        soisdisconnected(so);
                        break;

                /*
                 * In TIME_WAIT state restart the 2 MSL time_wait timer.
                 */
                case TCPS_TIME_WAIT:
                        TCP_TIMER_ARM(tp, TCPT_2MSL, 2 * tp->t_msl);
                        break;
                }
        }
#ifdef TCP_DEBUG
        if (so->so_options & SO_DEBUG)
                tcp_trace(TA_INPUT, ostate, tp, tcp_saveti, 0);
#endif

        /*
         * Return any desired output.
         */
        if (needoutput || (tp->t_flags & TF_ACKNOW)) {
                KERNEL_LOCK(1, NULL);
                (void)tcp_output(tp);
                KERNEL_UNLOCK_ONE(NULL);
        }
        if (tcp_saveti)
                m_freem(tcp_saveti);

        if (tp->t_state == TCPS_TIME_WAIT
            && (so->so_state & SS_NOFDREF)
            && (tp->t_inpcb || af != AF_INET)
            && (tp->t_in6pcb || af != AF_INET6)
            && ((af == AF_INET ? tcp4_vtw_enable : tcp6_vtw_enable) & 1) != 0
            && TAILQ_EMPTY(&tp->segq)
            && vtw_add(af, tp)) {
                ;
        }
        return;

badsyn:
        /*
         * Received a bad SYN.  Increment counters and dropwithreset.
         */
        TCP_STATINC(TCP_STAT_BADSYN);
        tp = NULL;
        goto dropwithreset;

dropafterack:
        /*
         * Generate an ACK dropping incoming segment if it occupies
         * sequence space, where the ACK reflects our state.
         */
        if (tiflags & TH_RST)
                goto drop;
        goto dropafterack2;

dropafterack_ratelim:
        /*
         * We may want to rate-limit ACKs against SYN/RST attack.
         */
        if (ppsratecheck(&tcp_ackdrop_ppslim_last, &tcp_ackdrop_ppslim_count,
            tcp_ackdrop_ppslim) == 0) {
                /* XXX stat */
                goto drop;
        }

dropafterack2:
        m_freem(m);
        tp->t_flags |= TF_ACKNOW;
        KERNEL_LOCK(1, NULL);
        (void)tcp_output(tp);
        KERNEL_UNLOCK_ONE(NULL);
        if (tcp_saveti)
                m_freem(tcp_saveti);
        return;

dropwithreset_ratelim:
        /*
         * We may want to rate-limit RSTs in certain situations,
         * particularly if we are sending an RST in response to
         * an attempt to connect to or otherwise communicate with
         * a port for which we have no socket.
         */
        if (ppsratecheck(&tcp_rst_ppslim_last, &tcp_rst_ppslim_count,
            tcp_rst_ppslim) == 0) {
                /* XXX stat */
                goto drop;
        }

dropwithreset:
        /*
         * Generate a RST, dropping incoming segment.
         * Make ACK acceptable to originator of segment.
         */
        if (tiflags & TH_RST)
                goto drop;
        if (tiflags & TH_ACK) {
                (void)tcp_respond(tp, m, m, th, (tcp_seq)0, th->th_ack, TH_RST);
        } else {
                if (tiflags & TH_SYN)
                        tlen++;
                (void)tcp_respond(tp, m, m, th, th->th_seq + tlen, (tcp_seq)0,
                    TH_RST|TH_ACK);
        }
        if (tcp_saveti)
                m_freem(tcp_saveti);
        return;

badcsum:
drop:
        /*
         * Drop space held by incoming segment and return.
         */
        if (tp) {
                if (tp->t_inpcb)
                        so = tp->t_inpcb->inp_socket;
#ifdef INET6
                else if (tp->t_in6pcb)
                        so = tp->t_in6pcb->in6p_socket;
#endif
                else
                        so = NULL;
#ifdef TCP_DEBUG
                if (so && (so->so_options & SO_DEBUG) != 0)
                        tcp_trace(TA_DROP, ostate, tp, tcp_saveti, 0);
#endif
        }
        if (tcp_saveti)
                m_freem(tcp_saveti);
        m_freem(m);
        return;
}

#ifdef TCP_SIGNATURE
int
tcp_signature_apply(void *fstate, void *data, u_int len)
{

        MD5Update(fstate, (u_char *)data, len);
        return (0);
}

struct secasvar *
tcp_signature_getsav(struct mbuf *m)
{
        struct ip *ip;
        struct ip6_hdr *ip6;

        ip = mtod(m, struct ip *);
        switch (ip->ip_v) {
        case 4:
                ip = mtod(m, struct ip *);
                ip6 = NULL;
                break;
        case 6:
                ip = NULL;
                ip6 = mtod(m, struct ip6_hdr *);
                break;
        default:
                return (NULL);
        }

#ifdef IPSEC
        union sockaddr_union dst;

        /* Extract the destination from the IP header in the mbuf. */
        memset(&dst, 0, sizeof(union sockaddr_union));
        if (ip != NULL) {
                dst.sa.sa_len = sizeof(struct sockaddr_in);
                dst.sa.sa_family = AF_INET;
                dst.sin.sin_addr = ip->ip_dst;
        } else {
                dst.sa.sa_len = sizeof(struct sockaddr_in6);
                dst.sa.sa_family = AF_INET6;
                dst.sin6.sin6_addr = ip6->ip6_dst;
        }

        /*
         * Look up an SADB entry which matches the address of the peer.
         */
        return KEY_LOOKUP_SA(&dst, IPPROTO_TCP, htonl(TCP_SIG_SPI), 0, 0);
#else
        return NULL;
#endif
}

int
tcp_signature(struct mbuf *m, struct tcphdr *th, int thoff,
    struct secasvar *sav, char *sig)
{
        MD5_CTX ctx;
        struct ip *ip;
        struct ipovly *ipovly;
#ifdef INET6
        struct ip6_hdr *ip6;
        struct ip6_hdr_pseudo ip6pseudo;
#endif
        struct ippseudo ippseudo;
        struct tcphdr th0;
        int l, tcphdrlen;

        if (sav == NULL)
                return (-1);

        tcphdrlen = th->th_off * 4;

        switch (mtod(m, struct ip *)->ip_v) {
        case 4:
                MD5Init(&ctx);
                ip = mtod(m, struct ip *);
                memset(&ippseudo, 0, sizeof(ippseudo));
                ipovly = (struct ipovly *)ip;
                ippseudo.ippseudo_src = ipovly->ih_src;
                ippseudo.ippseudo_dst = ipovly->ih_dst;
                ippseudo.ippseudo_pad = 0;
                ippseudo.ippseudo_p = IPPROTO_TCP;
                ippseudo.ippseudo_len = htons(m->m_pkthdr.len - thoff);
                MD5Update(&ctx, (char *)&ippseudo, sizeof(ippseudo));
                break;
#if INET6
        case 6:
                MD5Init(&ctx);
                ip6 = mtod(m, struct ip6_hdr *);
                memset(&ip6pseudo, 0, sizeof(ip6pseudo));
                ip6pseudo.ip6ph_src = ip6->ip6_src;
                in6_clearscope(&ip6pseudo.ip6ph_src);
                ip6pseudo.ip6ph_dst = ip6->ip6_dst;
                in6_clearscope(&ip6pseudo.ip6ph_dst);
                ip6pseudo.ip6ph_len = htons(m->m_pkthdr.len - thoff);
                ip6pseudo.ip6ph_nxt = IPPROTO_TCP;
                MD5Update(&ctx, (char *)&ip6pseudo, sizeof(ip6pseudo));
                break;
#endif
        default:
                return (-1);
        }

        th0 = *th;
        th0.th_sum = 0;
        MD5Update(&ctx, (char *)&th0, sizeof(th0));

        l = m->m_pkthdr.len - thoff - tcphdrlen;
        if (l > 0)
                m_apply(m, thoff + tcphdrlen,
                    m->m_pkthdr.len - thoff - tcphdrlen,
                    tcp_signature_apply, &ctx);

        MD5Update(&ctx, _KEYBUF(sav->key_auth), _KEYLEN(sav->key_auth));
        MD5Final(sig, &ctx);

        return (0);
}
#endif

/*
 * Parse and process tcp options.
 *
 * Returns -1 if this segment should be dropped.  (eg. wrong signature)
 * Otherwise returns 0.
 */
static int
tcp_dooptions(struct tcpcb *tp, const u_char *cp, int cnt, struct tcphdr *th,
    struct mbuf *m, int toff, struct tcp_opt_info *oi)
{
        u_int16_t mss;
        int opt, optlen = 0;
#ifdef TCP_SIGNATURE
        void *sigp = NULL;
        char sigbuf[TCP_SIGLEN];
        struct secasvar *sav = NULL;
#endif

        for (; cp && cnt > 0; cnt -= optlen, cp += optlen) {
                opt = cp[0];
                if (opt == TCPOPT_EOL)
                        break;
                if (opt == TCPOPT_NOP)
                        optlen = 1;
                else {
                        if (cnt < 2)
                                break;
                        optlen = cp[1];
                        if (optlen < 2 || optlen > cnt)
                                break;
                }
                switch (opt) {

                default:
                        continue;

                case TCPOPT_MAXSEG:
                        if (optlen != TCPOLEN_MAXSEG)
                                continue;
                        if (!(th->th_flags & TH_SYN))
                                continue;
                        if (TCPS_HAVERCVDSYN(tp->t_state))
                                continue;
                        memcpy(&mss, cp + 2, sizeof(mss));
                        oi->maxseg = ntohs(mss);
                        break;

                case TCPOPT_WINDOW:
                        if (optlen != TCPOLEN_WINDOW)
                                continue;
                        if (!(th->th_flags & TH_SYN))
                                continue;
                        if (TCPS_HAVERCVDSYN(tp->t_state))
                                continue;
                        tp->t_flags |= TF_RCVD_SCALE;
                        tp->requested_s_scale = cp[2];
                        if (tp->requested_s_scale > TCP_MAX_WINSHIFT) {
                                char buf[INET6_ADDRSTRLEN];
                                struct ip *ip = mtod(m, struct ip *);
#ifdef INET6
                                struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
#endif

                                switch (ip->ip_v) {
                                case 4:
                                        in_print(buf, sizeof(buf),
                                            &ip->ip_src);
                                        break;
#ifdef INET6
                                case 6:
                                        in6_print(buf, sizeof(buf),
                                            &ip6->ip6_src);
                                        break;
#endif
                                default:
                                        strlcpy(buf, "(unknown)", sizeof(buf));
                                        break;
                                }

                                log(LOG_ERR, "TCP: invalid wscale %d from %s, "
                                    "assuming %d\n",
                                    tp->requested_s_scale, buf,
                                    TCP_MAX_WINSHIFT);
                                tp->requested_s_scale = TCP_MAX_WINSHIFT;
                        }
                        break;

                case TCPOPT_TIMESTAMP:
                        if (optlen != TCPOLEN_TIMESTAMP)
                                continue;
                        oi->ts_present = 1;
                        memcpy(&oi->ts_val, cp + 2, sizeof(oi->ts_val));
                        NTOHL(oi->ts_val);
                        memcpy(&oi->ts_ecr, cp + 6, sizeof(oi->ts_ecr));
                        NTOHL(oi->ts_ecr);

                        if (!(th->th_flags & TH_SYN))
                                continue;
                        if (TCPS_HAVERCVDSYN(tp->t_state))
                                continue;
                        /*
                         * A timestamp received in a SYN makes
                         * it ok to send timestamp requests and replies.
                         */
                        tp->t_flags |= TF_RCVD_TSTMP;
                        tp->ts_recent = oi->ts_val;
                        tp->ts_recent_age = tcp_now;
                        break;

                case TCPOPT_SACK_PERMITTED:
                        if (optlen != TCPOLEN_SACK_PERMITTED)
                                continue;
                        if (!(th->th_flags & TH_SYN))
                                continue;
                        if (TCPS_HAVERCVDSYN(tp->t_state))
                                continue;
                        if (tcp_do_sack) {
                                tp->t_flags |= TF_SACK_PERMIT;
                                tp->t_flags |= TF_WILL_SACK;
                        }
                        break;

                case TCPOPT_SACK:
                        tcp_sack_option(tp, th, cp, optlen);
                        break;
#ifdef TCP_SIGNATURE
                case TCPOPT_SIGNATURE:
                        if (optlen != TCPOLEN_SIGNATURE)
                                continue;
                        if (sigp &&
                            !consttime_memequal(sigp, cp + 2, TCP_SIGLEN))
                                return (-1);

                        sigp = sigbuf;
                        memcpy(sigbuf, cp + 2, TCP_SIGLEN);
                        tp->t_flags |= TF_SIGNATURE;
                        break;
#endif
                }
        }

#ifndef TCP_SIGNATURE
        return 0;
#else
        if (tp->t_flags & TF_SIGNATURE) {
                sav = tcp_signature_getsav(m);
                if (sav == NULL && tp->t_state == TCPS_LISTEN)
                        return (-1);
        }

        if ((sigp ? TF_SIGNATURE : 0) ^ (tp->t_flags & TF_SIGNATURE))
                goto out;

        if (sigp) {
                char sig[TCP_SIGLEN];

                tcp_fields_to_net(th);
                if (tcp_signature(m, th, toff, sav, sig) < 0) {
                        tcp_fields_to_host(th);
                        goto out;
                }
                tcp_fields_to_host(th);

                if (!consttime_memequal(sig, sigp, TCP_SIGLEN)) {
                        TCP_STATINC(TCP_STAT_BADSIG);
                        goto out;
                } else
                        TCP_STATINC(TCP_STAT_GOODSIG);

                key_sa_recordxfer(sav, m);
                KEY_SA_UNREF(&sav);
        }
        return 0;
out:
        if (sav != NULL)
                KEY_SA_UNREF(&sav);
        return -1;
#endif
}

/*
 * Pull out of band byte out of a segment so
 * it doesn't appear in the user's data queue.
 * It is still reflected in the segment length for
 * sequencing purposes.
 */
void
tcp_pulloutofband(struct socket *so, struct tcphdr *th,
    struct mbuf *m, int off)
{
        int cnt = off + th->th_urp - 1;

        while (cnt >= 0) {
                if (m->m_len > cnt) {
                        char *cp = mtod(m, char *) + cnt;
                        struct tcpcb *tp = sototcpcb(so);

                        tp->t_iobc = *cp;
                        tp->t_oobflags |= TCPOOB_HAVEDATA;
                        memmove(cp, cp + 1, (unsigned)(m->m_len - cnt - 1));
                        m->m_len--;
                        return;
                }
                cnt -= m->m_len;
                m = m->m_next;
                if (m == NULL)
                        break;
        }
        panic("tcp_pulloutofband");
}

/*
 * Collect new round-trip time estimate
 * and update averages and current timeout.
 *
 * rtt is in units of slow ticks (typically 500 ms) -- essentially the
 * difference of two timestamps.
 */
void
tcp_xmit_timer(struct tcpcb *tp, uint32_t rtt)
{
        int32_t delta;

        TCP_STATINC(TCP_STAT_RTTUPDATED);
        if (tp->t_srtt != 0) {
                /*
                 * Compute the amount to add to srtt for smoothing,
                 * *alpha, or 2^(-TCP_RTT_SHIFT).  Because
                 * srtt is stored in 1/32 slow ticks, we conceptually
                 * shift left 5 bits, subtract srtt to get the
                 * difference, and then shift right by TCP_RTT_SHIFT
                 * (3) to obtain 1/8 of the difference.
                 */
                delta = (rtt << 2) - (tp->t_srtt >> TCP_RTT_SHIFT);
                /* 
                 * This can never happen, because delta's lowest
                 * possible value is 1/8 of t_srtt.  But if it does,
                 * set srtt to some reasonable value, here chosen
                 * as 1/8 tick.
                 */
                if ((tp->t_srtt += delta) <= 0)
                        tp->t_srtt = 1 << 2;
                /*
                 * RFC2988 requires that rttvar be updated first.
                 * This code is compliant because "delta" is the old
                 * srtt minus the new observation (scaled).
                 *
                 * RFC2988 says:
                 *   rttvar = (1-beta) * rttvar + beta * |srtt-observed|
                 *
                 * delta is in units of 1/32 ticks, and has then been
                 * divided by 8.  This is equivalent to being in 1/16s
                 * units and divided by 4.  Subtract from it 1/4 of
                 * the existing rttvar to form the (signed) amount to
                 * adjust.
                 */
                if (delta < 0)
                        delta = -delta;
                delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
                /*
                 * As with srtt, this should never happen.  There is
                 * no support in RFC2988 for this operation.  But 1/4s
                 * as rttvar when faced with something arguably wrong
                 * is ok.
                 */
                if ((tp->t_rttvar += delta) <= 0)
                        tp->t_rttvar = 1 << 2;

                /*
                 * If srtt exceeds .01 second, ensure we use the 'remote' MSL
                 * Problem is: it doesn't work.  Disabled by defaulting
                 * tcp_rttlocal to 0; see corresponding code in
                 * tcp_subr that selects local vs remote in a different way.
                 *
                 * The static branch prediction hint here should be removed
                 * when the rtt estimator is fixed and the rtt_enable code
                 * is turned back on.
                 */
                if (__predict_false(tcp_rttlocal) && tcp_msl_enable
                    && tp->t_srtt > tcp_msl_remote_threshold
                    && tp->t_msl  < tcp_msl_remote) {
                        tp->t_msl = MIN(tcp_msl_remote, TCP_MAXMSL);
                }
        } else {
                /*
                 * This is the first measurement.  Per RFC2988, 2.2,
                 * set rtt=R and srtt=R/2.
                 * For srtt, storage representation is 1/32 ticks,
                 * so shift left by 5.
                 * For rttvar, storage representation is 1/16 ticks,
                 * So shift left by 4, but then right by 1 to halve.
                 */
                tp->t_srtt = rtt << (TCP_RTT_SHIFT + 2);
                tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT + 2 - 1);
        }
        tp->t_rtttime = 0;
        tp->t_rxtshift = 0;

        /*
         * the retransmit should happen at rtt + 4 * rttvar.
         * Because of the way we do the smoothing, srtt and rttvar
         * will each average +1/2 tick of bias.  When we compute
         * the retransmit timer, we want 1/2 tick of rounding and
         * 1 extra tick because of +-1/2 tick uncertainty in the
         * firing of the timer.  The bias will give us exactly the
         * 1.5 tick we need.  But, because the bias is
         * statistical, we have to test that we don't drop below
         * the minimum feasible timer (which is 2 ticks).
         */
        TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
            uimax(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);

        /*
         * We received an ack for a packet that wasn't retransmitted;
         * it is probably safe to discard any error indications we've
         * received recently.  This isn't quite right, but close enough
         * for now (a route might have failed after we sent a segment,
         * and the return path might not be symmetrical).
         */
        tp->t_softerror = 0;
}


/*
 * TCP compressed state engine.  Currently used to hold compressed
 * state for SYN_RECEIVED.
 */

u_long        syn_cache_count;
u_int32_t syn_hash1, syn_hash2;

#define SYN_HASH(sa, sp, dp) \
        ((((sa)->s_addr^syn_hash1)*(((((u_int32_t)(dp))<<16) + \
                                     ((u_int32_t)(sp)))^syn_hash2)))
#ifndef INET6
#define        SYN_HASHALL(hash, src, dst) \
do {                                                                        \
        hash = SYN_HASH(&((const struct sockaddr_in *)(src))->sin_addr,        \
                ((const struct sockaddr_in *)(src))->sin_port,                \
                ((const struct sockaddr_in *)(dst))->sin_port);                \
} while (/*CONSTCOND*/ 0)
#else
#define SYN_HASH6(sa, sp, dp) \
        ((((sa)->s6_addr32[0] ^ (sa)->s6_addr32[3] ^ syn_hash1) * \
          (((((u_int32_t)(dp))<<16) + ((u_int32_t)(sp)))^syn_hash2)) \
         & 0x7fffffff)

#define SYN_HASHALL(hash, src, dst) \
do {                                                                        \
        switch ((src)->sa_family) {                                        \
        case AF_INET:                                                        \
                hash = SYN_HASH(&((const struct sockaddr_in *)(src))->sin_addr, \
                        ((const struct sockaddr_in *)(src))->sin_port,        \
                        ((const struct sockaddr_in *)(dst))->sin_port);        \
                break;                                                        \
        case AF_INET6:                                                        \
                hash = SYN_HASH6(&((const struct sockaddr_in6 *)(src))->sin6_addr, \
                        ((const struct sockaddr_in6 *)(src))->sin6_port,        \
                        ((const struct sockaddr_in6 *)(dst))->sin6_port);        \
                break;                                                        \
        default:                                                        \
                hash = 0;                                                \
        }                                                                \
} while (/*CONSTCOND*/0)
#endif /* INET6 */

static struct pool syn_cache_pool;

/*
 * We don't estimate RTT with SYNs, so each packet starts with the default
 * RTT and each timer step has a fixed timeout value.
 */
static inline void
syn_cache_timer_arm(struct syn_cache *sc)
{

        TCPT_RANGESET(sc->sc_rxtcur,
            TCPTV_SRTTDFLT * tcp_backoff[sc->sc_rxtshift], TCPTV_MIN,
            TCPTV_REXMTMAX);
        callout_reset(&sc->sc_timer,
            sc->sc_rxtcur * (hz / PR_SLOWHZ), syn_cache_timer, sc);
}

#define        SYN_CACHE_TIMESTAMP(sc)        (tcp_now - (sc)->sc_timebase)

static inline void
syn_cache_rm(struct syn_cache *sc)
{
        TAILQ_REMOVE(&tcp_syn_cache[sc->sc_bucketidx].sch_bucket,
            sc, sc_bucketq);
        sc->sc_tp = NULL;
        LIST_REMOVE(sc, sc_tpq);
        tcp_syn_cache[sc->sc_bucketidx].sch_length--;
        callout_stop(&sc->sc_timer);
        syn_cache_count--;
}

static inline void
syn_cache_put(struct syn_cache *sc)
{
        if (sc->sc_ipopts)
                (void) m_free(sc->sc_ipopts);
        rtcache_free(&sc->sc_route);
        sc->sc_flags |= SCF_DEAD;
        if (!callout_invoking(&sc->sc_timer))
                callout_schedule(&(sc)->sc_timer, 1);
}

void
syn_cache_init(void)
{
        int i;

        pool_init(&syn_cache_pool, sizeof(struct syn_cache), 0, 0, 0,
            "synpl", NULL, IPL_SOFTNET);

        /* Initialize the hash buckets. */
        for (i = 0; i < tcp_syn_cache_size; i++)
                TAILQ_INIT(&tcp_syn_cache[i].sch_bucket);
}

void
syn_cache_insert(struct syn_cache *sc, struct tcpcb *tp)
{
        struct syn_cache_head *scp;
        struct syn_cache *sc2;
        int s;

        /*
         * If there are no entries in the hash table, reinitialize
         * the hash secrets.
         */
        if (syn_cache_count == 0) {
                syn_hash1 = cprng_fast32();
                syn_hash2 = cprng_fast32();
        }

        SYN_HASHALL(sc->sc_hash, &sc->sc_src.sa, &sc->sc_dst.sa);
        sc->sc_bucketidx = sc->sc_hash % tcp_syn_cache_size;
        scp = &tcp_syn_cache[sc->sc_bucketidx];

        /*
         * Make sure that we don't overflow the per-bucket
         * limit or the total cache size limit.
         */
        s = splsoftnet();
        if (scp->sch_length >= tcp_syn_bucket_limit) {
                TCP_STATINC(TCP_STAT_SC_BUCKETOVERFLOW);
                /*
                 * The bucket is full.  Toss the oldest element in the
                 * bucket.  This will be the first entry in the bucket.
                 */
                sc2 = TAILQ_FIRST(&scp->sch_bucket);
#ifdef DIAGNOSTIC
                /*
                 * This should never happen; we should always find an
                 * entry in our bucket.
                 */
                if (sc2 == NULL)
                        panic("syn_cache_insert: bucketoverflow: impossible");
#endif
                syn_cache_rm(sc2);
                syn_cache_put(sc2);        /* calls pool_put but see spl above */
        } else if (syn_cache_count >= tcp_syn_cache_limit) {
                struct syn_cache_head *scp2, *sce;

                TCP_STATINC(TCP_STAT_SC_OVERFLOWED);
                /*
                 * The cache is full.  Toss the oldest entry in the
                 * first non-empty bucket we can find.
                 *
                 * XXX We would really like to toss the oldest
                 * entry in the cache, but we hope that this
                 * condition doesn't happen very often.
                 */
                scp2 = scp;
                if (TAILQ_EMPTY(&scp2->sch_bucket)) {
                        sce = &tcp_syn_cache[tcp_syn_cache_size];
                        for (++scp2; scp2 != scp; scp2++) {
                                if (scp2 >= sce)
                                        scp2 = &tcp_syn_cache[0];
                                if (! TAILQ_EMPTY(&scp2->sch_bucket))
                                        break;
                        }
#ifdef DIAGNOSTIC
                        /*
                         * This should never happen; we should always find a
                         * non-empty bucket.
                         */
                        if (scp2 == scp)
                                panic("syn_cache_insert: cacheoverflow: "
                                    "impossible");
#endif
                }
                sc2 = TAILQ_FIRST(&scp2->sch_bucket);
                syn_cache_rm(sc2);
                syn_cache_put(sc2);        /* calls pool_put but see spl above */
        }

        /*
         * Initialize the entry's timer.
         */
        sc->sc_rxttot = 0;
        sc->sc_rxtshift = 0;
        syn_cache_timer_arm(sc);

        /* Link it from tcpcb entry */
        LIST_INSERT_HEAD(&tp->t_sc, sc, sc_tpq);

        /* Put it into the bucket. */
        TAILQ_INSERT_TAIL(&scp->sch_bucket, sc, sc_bucketq);
        scp->sch_length++;
        syn_cache_count++;

        TCP_STATINC(TCP_STAT_SC_ADDED);
        splx(s);
}

/*
 * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
 * If we have retransmitted an entry the maximum number of times, expire
 * that entry.
 */
static void
syn_cache_timer(void *arg)
{
        struct syn_cache *sc = arg;

        mutex_enter(softnet_lock);
        KERNEL_LOCK(1, NULL);

        callout_ack(&sc->sc_timer);

        if (__predict_false(sc->sc_flags & SCF_DEAD)) {
                TCP_STATINC(TCP_STAT_SC_DELAYED_FREE);
                goto free;
        }

        if (__predict_false(sc->sc_rxtshift == TCP_MAXRXTSHIFT)) {
                /* Drop it -- too many retransmissions. */
                goto dropit;
        }

        /*
         * Compute the total amount of time this entry has
         * been on a queue.  If this entry has been on longer
         * than the keep alive timer would allow, expire it.
         */
        sc->sc_rxttot += sc->sc_rxtcur;
        if (sc->sc_rxttot >= MIN(tcp_keepinit, TCP_TIMER_MAXTICKS))
                goto dropit;

        TCP_STATINC(TCP_STAT_SC_RETRANSMITTED);
        (void)syn_cache_respond(sc);

        /* Advance the timer back-off. */
        sc->sc_rxtshift++;
        syn_cache_timer_arm(sc);

        goto out;

 dropit:
        TCP_STATINC(TCP_STAT_SC_TIMED_OUT);
        syn_cache_rm(sc);
        if (sc->sc_ipopts)
                (void) m_free(sc->sc_ipopts);
        rtcache_free(&sc->sc_route);

 free:
        callout_destroy(&sc->sc_timer);
        pool_put(&syn_cache_pool, sc);

 out:
        KERNEL_UNLOCK_ONE(NULL);
        mutex_exit(softnet_lock);
}

/*
 * Remove syn cache created by the specified tcb entry,
 * because this does not make sense to keep them
 * (if there's no tcb entry, syn cache entry will never be used)
 */
void
syn_cache_cleanup(struct tcpcb *tp)
{
        struct syn_cache *sc, *nsc;
        int s;

        s = splsoftnet();

        for (sc = LIST_FIRST(&tp->t_sc); sc != NULL; sc = nsc) {
                nsc = LIST_NEXT(sc, sc_tpq);

#ifdef DIAGNOSTIC
                if (sc->sc_tp != tp)
                        panic("invalid sc_tp in syn_cache_cleanup");
#endif
                syn_cache_rm(sc);
                syn_cache_put(sc);        /* calls pool_put but see spl above */
        }
        /* just for safety */
        LIST_INIT(&tp->t_sc);

        splx(s);
}

/*
 * Find an entry in the syn cache.
 */
struct syn_cache *
syn_cache_lookup(const struct sockaddr *src, const struct sockaddr *dst,
    struct syn_cache_head **headp)
{
        struct syn_cache *sc;
        struct syn_cache_head *scp;
        u_int32_t hash;
        int s;

        SYN_HASHALL(hash, src, dst);

        scp = &tcp_syn_cache[hash % tcp_syn_cache_size];
        *headp = scp;
        s = splsoftnet();
        for (sc = TAILQ_FIRST(&scp->sch_bucket); sc != NULL;
             sc = TAILQ_NEXT(sc, sc_bucketq)) {
                if (sc->sc_hash != hash)
                        continue;
                if (!memcmp(&sc->sc_src, src, src->sa_len) &&
                    !memcmp(&sc->sc_dst, dst, dst->sa_len)) {
                        splx(s);
                        return (sc);
                }
        }
        splx(s);
        return (NULL);
}

/*
 * This function gets called when we receive an ACK for a socket in the
 * LISTEN state. We look up the connection in the syn cache, and if it's
 * there, we pull it out of the cache and turn it into a full-blown
 * connection in the SYN-RECEIVED state.
 *
 * The return values may not be immediately obvious, and their effects
 * can be subtle, so here they are:
 *
 *        NULL        SYN was not found in cache; caller should drop the
 *                packet and send an RST.
 *
 *        -1        We were unable to create the new connection, and are
 *                aborting it.  An ACK,RST is being sent to the peer
 *                (unless we got screwey sequence numbers; see below),
 *                because the 3-way handshake has been completed.  Caller
 *                should not free the mbuf, since we may be using it.  If
 *                we are not, we will free it.
 *
 *        Otherwise, the return value is a pointer to the new socket
 *        associated with the connection.
 */
struct socket *
syn_cache_get(struct sockaddr *src, struct sockaddr *dst,
    struct tcphdr *th, struct socket *so, struct mbuf *m)
{
        struct syn_cache *sc;
        struct syn_cache_head *scp;
        struct inpcb *inp = NULL;
#ifdef INET6
        struct in6pcb *in6p = NULL;
#endif
        struct tcpcb *tp;
        int s;
        struct socket *oso;

        s = splsoftnet();
        if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
                splx(s);
                return NULL;
        }

        /*
         * Verify the sequence and ack numbers.  Try getting the correct
         * response again.
         */
        if ((th->th_ack != sc->sc_iss + 1) ||
            SEQ_LEQ(th->th_seq, sc->sc_irs) ||
            SEQ_GT(th->th_seq, sc->sc_irs + 1 + sc->sc_win)) {
                m_freem(m);
                (void)syn_cache_respond(sc);
                splx(s);
                return ((struct socket *)(-1));
        }

        /* Remove this cache entry */
        syn_cache_rm(sc);
        splx(s);

        /*
         * Ok, create the full blown connection, and set things up
         * as they would have been set up if we had created the
         * connection when the SYN arrived.  If we can't create
         * the connection, abort it.
         */
        /*
         * inp still has the OLD in_pcb stuff, set the
         * v6-related flags on the new guy, too.   This is
         * done particularly for the case where an AF_INET6
         * socket is bound only to a port, and a v4 connection
         * comes in on that port.
         * we also copy the flowinfo from the original pcb
         * to the new one.
         */
        oso = so;
        so = sonewconn(so, true);
        if (so == NULL)
                goto resetandabort;

        switch (so->so_proto->pr_domain->dom_family) {
        case AF_INET:
                inp = sotoinpcb(so);
                break;
#ifdef INET6
        case AF_INET6:
                in6p = sotoin6pcb(so);
                break;
#endif
        }

        switch (src->sa_family) {
        case AF_INET:
                if (inp) {
                        inp->inp_laddr = ((struct sockaddr_in *)dst)->sin_addr;
                        inp->inp_lport = ((struct sockaddr_in *)dst)->sin_port;
                        inp->inp_options = ip_srcroute(m);
                        in_pcbstate(inp, INP_BOUND);
                        if (inp->inp_options == NULL) {
                                inp->inp_options = sc->sc_ipopts;
                                sc->sc_ipopts = NULL;
                        }
                }
#ifdef INET6
                else if (in6p) {
                        /* IPv4 packet to AF_INET6 socket */
                        memset(&in6p->in6p_laddr, 0, sizeof(in6p->in6p_laddr));
                        in6p->in6p_laddr.s6_addr16[5] = htons(0xffff);
                        bcopy(&((struct sockaddr_in *)dst)->sin_addr,
                                &in6p->in6p_laddr.s6_addr32[3],
                                sizeof(((struct sockaddr_in *)dst)->sin_addr));
                        in6p->in6p_lport = ((struct sockaddr_in *)dst)->sin_port;
                        in6totcpcb(in6p)->t_family = AF_INET;
                        if (sotoin6pcb(oso)->in6p_flags & IN6P_IPV6_V6ONLY)
                                in6p->in6p_flags |= IN6P_IPV6_V6ONLY;
                        else
                                in6p->in6p_flags &= ~IN6P_IPV6_V6ONLY;
                        in6_pcbstate(in6p, IN6P_BOUND);
                }
#endif
                break;
#ifdef INET6
        case AF_INET6:
                if (in6p) {
                        in6p->in6p_laddr = ((struct sockaddr_in6 *)dst)->sin6_addr;
                        in6p->in6p_lport = ((struct sockaddr_in6 *)dst)->sin6_port;
                        in6_pcbstate(in6p, IN6P_BOUND);
                }
                break;
#endif
        }

#ifdef INET6
        if (in6p && in6totcpcb(in6p)->t_family == AF_INET6 && sotoinpcb(oso)) {
                struct in6pcb *oin6p = sotoin6pcb(oso);
                /* inherit socket options from the listening socket */
                in6p->in6p_flags |= (oin6p->in6p_flags & IN6P_CONTROLOPTS);
                if (in6p->in6p_flags & IN6P_CONTROLOPTS) {
                        m_freem(in6p->in6p_options);
                        in6p->in6p_options = NULL;
                }
                ip6_savecontrol(in6p, &in6p->in6p_options,
                    mtod(m, struct ip6_hdr *), m);
        }
#endif

        /*
         * Give the new socket our cached route reference.
         */
        if (inp) {
                rtcache_copy(&inp->inp_route, &sc->sc_route);
                rtcache_free(&sc->sc_route);
        }
#ifdef INET6
        else {
                rtcache_copy(&in6p->in6p_route, &sc->sc_route);
                rtcache_free(&sc->sc_route);
        }
#endif

        if (inp) {
                struct sockaddr_in sin;
                memcpy(&sin, src, src->sa_len);
                if (in_pcbconnect(inp, &sin, &lwp0)) {
                        goto resetandabort;
                }
        }
#ifdef INET6
        else if (in6p) {
                struct sockaddr_in6 sin6;
                memcpy(&sin6, src, src->sa_len);
                if (src->sa_family == AF_INET) {
                        /* IPv4 packet to AF_INET6 socket */
                        in6_sin_2_v4mapsin6((struct sockaddr_in *)src, &sin6);
                }
                if (in6_pcbconnect(in6p, &sin6, NULL)) {
                        goto resetandabort;
                }
        }
#endif
        else {
                goto resetandabort;
        }

        if (inp)
                tp = intotcpcb(inp);
#ifdef INET6
        else if (in6p)
                tp = in6totcpcb(in6p);
#endif
        else
                tp = NULL;

        tp->t_flags = sototcpcb(oso)->t_flags & TF_NODELAY;
        if (sc->sc_request_r_scale != 15) {
                tp->requested_s_scale = sc->sc_requested_s_scale;
                tp->request_r_scale = sc->sc_request_r_scale;
                tp->snd_scale = sc->sc_requested_s_scale;
                tp->rcv_scale = sc->sc_request_r_scale;
                tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
        }
        if (sc->sc_flags & SCF_TIMESTAMP)
                tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
        tp->ts_timebase = sc->sc_timebase;

        tp->t_template = tcp_template(tp);
        if (tp->t_template == 0) {
                tp = tcp_drop(tp, ENOBUFS);        /* destroys socket */
                so = NULL;
                m_freem(m);
                goto abort;
        }

        tp->iss = sc->sc_iss;
        tp->irs = sc->sc_irs;
        tcp_sendseqinit(tp);
        tcp_rcvseqinit(tp);
        tp->t_state = TCPS_SYN_RECEIVED;
        TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit);
        TCP_STATINC(TCP_STAT_ACCEPTS);

        if ((sc->sc_flags & SCF_SACK_PERMIT) && tcp_do_sack)
                tp->t_flags |= TF_WILL_SACK;

        if ((sc->sc_flags & SCF_ECN_PERMIT) && tcp_do_ecn)
                tp->t_flags |= TF_ECN_PERMIT;

#ifdef TCP_SIGNATURE
        if (sc->sc_flags & SCF_SIGNATURE)
                tp->t_flags |= TF_SIGNATURE;
#endif

        /* Initialize tp->t_ourmss before we deal with the peer's! */
        tp->t_ourmss = sc->sc_ourmaxseg;
        tcp_mss_from_peer(tp, sc->sc_peermaxseg);

        /*
         * Initialize the initial congestion window.  If we
         * had to retransmit the SYN,ACK, we must initialize cwnd
         * to 1 segment (i.e. the Loss Window).
         */
        if (sc->sc_rxtshift)
                tp->snd_cwnd = tp->t_peermss;
        else {
                int ss = tcp_init_win;
                if (inp != NULL && in_localaddr(inp->inp_faddr))
                        ss = tcp_init_win_local;
#ifdef INET6
                if (in6p != NULL && in6_localaddr(&in6p->in6p_faddr))
                        ss = tcp_init_win_local;
#endif
                tp->snd_cwnd = TCP_INITIAL_WINDOW(ss, tp->t_peermss);
        }

        tcp_rmx_rtt(tp);
        tp->snd_wl1 = sc->sc_irs;
        tp->rcv_up = sc->sc_irs + 1;

        /*
         * This is what would have happened in tcp_output() when
         * the SYN,ACK was sent.
         */
        tp->snd_up = tp->snd_una;
        tp->snd_max = tp->snd_nxt = tp->iss+1;
        TCP_TIMER_ARM(tp, TCPT_REXMT, tp->t_rxtcur);
        if (sc->sc_win > 0 && SEQ_GT(tp->rcv_nxt + sc->sc_win, tp->rcv_adv))
                tp->rcv_adv = tp->rcv_nxt + sc->sc_win;
        tp->last_ack_sent = tp->rcv_nxt;
        tp->t_partialacks = -1;
        tp->t_dupacks = 0;

        TCP_STATINC(TCP_STAT_SC_COMPLETED);
        s = splsoftnet();
        syn_cache_put(sc);
        splx(s);
        return so;

resetandabort:
        (void)tcp_respond(NULL, m, m, th, (tcp_seq)0, th->th_ack, TH_RST);
abort:
        if (so != NULL) {
                (void) soqremque(so, 1);
                (void) soabort(so);
                mutex_enter(softnet_lock);
        }
        s = splsoftnet();
        syn_cache_put(sc);
        splx(s);
        TCP_STATINC(TCP_STAT_SC_ABORTED);
        return ((struct socket *)(-1));
}

/*
 * This function is called when we get a RST for a
 * non-existent connection, so that we can see if the
 * connection is in the syn cache.  If it is, zap it.
 */

void
syn_cache_reset(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th)
{
        struct syn_cache *sc;
        struct syn_cache_head *scp;
        int s = splsoftnet();

        if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
                splx(s);
                return;
        }
        if (SEQ_LT(th->th_seq, sc->sc_irs) ||
            SEQ_GT(th->th_seq, sc->sc_irs+1)) {
                splx(s);
                return;
        }
        syn_cache_rm(sc);
        TCP_STATINC(TCP_STAT_SC_RESET);
        syn_cache_put(sc);        /* calls pool_put but see spl above */
        splx(s);
}

void
syn_cache_unreach(const struct sockaddr *src, const struct sockaddr *dst,
    struct tcphdr *th)
{
        struct syn_cache *sc;
        struct syn_cache_head *scp;
        int s;

        s = splsoftnet();
        if ((sc = syn_cache_lookup(src, dst, &scp)) == NULL) {
                splx(s);
                return;
        }
        /* If the sequence number != sc_iss, then it's a bogus ICMP msg */
        if (ntohl(th->th_seq) != sc->sc_iss) {
                splx(s);
                return;
        }

        /*
         * If we've retransmitted 3 times and this is our second error,
         * we remove the entry.  Otherwise, we allow it to continue on.
         * This prevents us from incorrectly nuking an entry during a
         * spurious network outage.
         *
         * See tcp_notify().
         */
        if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxtshift < 3) {
                sc->sc_flags |= SCF_UNREACH;
                splx(s);
                return;
        }

        syn_cache_rm(sc);
        TCP_STATINC(TCP_STAT_SC_UNREACH);
        syn_cache_put(sc);        /* calls pool_put but see spl above */
        splx(s);
}

/*
 * Given a LISTEN socket and an inbound SYN request, add this to the syn
 * cache, and send back a segment:
 *        <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
 * to the source.
 *
 * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN.
 * Doing so would require that we hold onto the data and deliver it
 * to the application.  However, if we are the target of a SYN-flood
 * DoS attack, an attacker could send data which would eventually
 * consume all available buffer space if it were ACKed.  By not ACKing
 * the data, we avoid this DoS scenario.
 */
int
syn_cache_add(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
    unsigned int toff, struct socket *so, struct mbuf *m, u_char *optp,
    int optlen, struct tcp_opt_info *oi)
{
        struct tcpcb tb, *tp;
        long win;
        struct syn_cache *sc;
        struct syn_cache_head *scp;
        struct mbuf *ipopts;
        int s;

        tp = sototcpcb(so);

        /*
         * Initialize some local state.
         */
        win = sbspace(&so->so_rcv);
        if (win > TCP_MAXWIN)
                win = TCP_MAXWIN;

#ifdef TCP_SIGNATURE
        if (optp || (tp->t_flags & TF_SIGNATURE))
#else
        if (optp)
#endif
        {
                tb.t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
#ifdef TCP_SIGNATURE
                tb.t_flags |= (tp->t_flags & TF_SIGNATURE);
#endif
                tb.t_state = TCPS_LISTEN;
                if (tcp_dooptions(&tb, optp, optlen, th, m, toff, oi) < 0)
                        return 0;
        } else
                tb.t_flags = 0;

        switch (src->sa_family) {
        case AF_INET:
                /* Remember the IP options, if any. */
                ipopts = ip_srcroute(m);
                break;
        default:
                ipopts = NULL;
        }

        /*
         * See if we already have an entry for this connection.
         * If we do, resend the SYN,ACK.  We do not count this
         * as a retransmission (XXX though maybe we should).
         */
        if ((sc = syn_cache_lookup(src, dst, &scp)) != NULL) {
                TCP_STATINC(TCP_STAT_SC_DUPESYN);
                if (ipopts) {
                        /*
                         * If we were remembering a previous source route,
                         * forget it and use the new one we've been given.
                         */
                        if (sc->sc_ipopts)
                                (void)m_free(sc->sc_ipopts);
                        sc->sc_ipopts = ipopts;
                }
                sc->sc_timestamp = tb.ts_recent;
                m_freem(m);
                if (syn_cache_respond(sc) == 0) {
                        uint64_t *tcps = TCP_STAT_GETREF();
                        tcps[TCP_STAT_SNDACKS]++;
                        tcps[TCP_STAT_SNDTOTAL]++;
                        TCP_STAT_PUTREF();
                }
                return 1;
        }

        s = splsoftnet();
        sc = pool_get(&syn_cache_pool, PR_NOWAIT);
        splx(s);
        if (sc == NULL) {
                if (ipopts)
                        (void)m_free(ipopts);
                return 0;
        }

        /*
         * Fill in the cache, and put the necessary IP and TCP
         * options into the reply.
         */
        memset(sc, 0, sizeof(struct syn_cache));
        callout_init(&sc->sc_timer, CALLOUT_MPSAFE);
        memcpy(&sc->sc_src, src, src->sa_len);
        memcpy(&sc->sc_dst, dst, dst->sa_len);
        sc->sc_flags = 0;
        sc->sc_ipopts = ipopts;
        sc->sc_irs = th->th_seq;
        switch (src->sa_family) {
        case AF_INET:
            {
                struct sockaddr_in *srcin = (void *)src;
                struct sockaddr_in *dstin = (void *)dst;

                sc->sc_iss = tcp_new_iss1(&dstin->sin_addr,
                    &srcin->sin_addr, dstin->sin_port,
                    srcin->sin_port, sizeof(dstin->sin_addr));
                break;
            }
#ifdef INET6
        case AF_INET6:
            {
                struct sockaddr_in6 *srcin6 = (void *)src;
                struct sockaddr_in6 *dstin6 = (void *)dst;

                sc->sc_iss = tcp_new_iss1(&dstin6->sin6_addr,
                    &srcin6->sin6_addr, dstin6->sin6_port,
                    srcin6->sin6_port, sizeof(dstin6->sin6_addr));
                break;
            }
#endif
        }
        sc->sc_peermaxseg = oi->maxseg;
        sc->sc_ourmaxseg = tcp_mss_to_advertise(m->m_flags & M_PKTHDR ?
            m_get_rcvif_NOMPSAFE(m) : NULL, sc->sc_src.sa.sa_family);
        sc->sc_win = win;
        sc->sc_timebase = tcp_now - 1;        /* see tcp_newtcpcb() */
        sc->sc_timestamp = tb.ts_recent;
        if ((tb.t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP)) ==
            (TF_REQ_TSTMP|TF_RCVD_TSTMP))
                sc->sc_flags |= SCF_TIMESTAMP;
        if ((tb.t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
            (TF_RCVD_SCALE|TF_REQ_SCALE)) {
                sc->sc_requested_s_scale = tb.requested_s_scale;
                sc->sc_request_r_scale = 0;
                /*
                 * Pick the smallest possible scaling factor that
                 * will still allow us to scale up to sb_max.
                 *
                 * We do this because there are broken firewalls that
                 * will corrupt the window scale option, leading to
                 * the other endpoint believing that our advertised
                 * window is unscaled.  At scale factors larger than
                 * 5 the unscaled window will drop below 1500 bytes,
                 * leading to serious problems when traversing these
                 * broken firewalls.
                 *
                 * With the default sbmax of 256K, a scale factor
                 * of 3 will be chosen by this algorithm.  Those who
                 * choose a larger sbmax should watch out
                 * for the compatibility problems mentioned above.
                 *
                 * RFC1323: The Window field in a SYN (i.e., a <SYN>
                 * or <SYN,ACK>) segment itself is never scaled.
                 */
                while (sc->sc_request_r_scale < TCP_MAX_WINSHIFT &&
                    (TCP_MAXWIN << sc->sc_request_r_scale) < sb_max)
                        sc->sc_request_r_scale++;
        } else {
                sc->sc_requested_s_scale = 15;
                sc->sc_request_r_scale = 15;
        }
        if ((tb.t_flags & TF_SACK_PERMIT) && tcp_do_sack)
                sc->sc_flags |= SCF_SACK_PERMIT;

        /*
         * ECN setup packet received.
         */
        if ((th->th_flags & (TH_ECE|TH_CWR)) && tcp_do_ecn)
                sc->sc_flags |= SCF_ECN_PERMIT;

#ifdef TCP_SIGNATURE
        if (tb.t_flags & TF_SIGNATURE)
                sc->sc_flags |= SCF_SIGNATURE;
#endif
        sc->sc_tp = tp;
        m_freem(m);
        if (syn_cache_respond(sc) == 0) {
                uint64_t *tcps = TCP_STAT_GETREF();
                tcps[TCP_STAT_SNDACKS]++;
                tcps[TCP_STAT_SNDTOTAL]++;
                TCP_STAT_PUTREF();
                syn_cache_insert(sc, tp);
        } else {
                s = splsoftnet();
                /*
                 * syn_cache_put() will try to schedule the timer, so
                 * we need to initialize it
                 */
                syn_cache_timer_arm(sc);
                syn_cache_put(sc);
                splx(s);
                TCP_STATINC(TCP_STAT_SC_DROPPED);
        }
        return 1;
}

/*
 * syn_cache_respond: (re)send SYN+ACK.
 *
 * Returns 0 on success.
 */

int
syn_cache_respond(struct syn_cache *sc)
{
#ifdef INET6
        struct rtentry *rt = NULL;
#endif
        struct route *ro;
        u_int8_t *optp;
        int optlen, error;
        u_int16_t tlen;
        struct ip *ip = NULL;
#ifdef INET6
        struct ip6_hdr *ip6 = NULL;
#endif
        struct tcpcb *tp;
        struct tcphdr *th;
        struct mbuf *m;
        u_int hlen;
#ifdef TCP_SIGNATURE
        struct secasvar *sav = NULL;
        u_int8_t *sigp = NULL;
#endif

        ro = &sc->sc_route;
        switch (sc->sc_src.sa.sa_family) {
        case AF_INET:
                hlen = sizeof(struct ip);
                break;
#ifdef INET6
        case AF_INET6:
                hlen = sizeof(struct ip6_hdr);
                break;
#endif
        default:
                return EAFNOSUPPORT;
        }

        /* Worst case scenario, since we don't know the option size yet. */
        tlen = hlen + sizeof(struct tcphdr) + MAX_TCPOPTLEN;
        KASSERT(max_linkhdr + tlen <= MCLBYTES);

        /*
         * Create the IP+TCP header from scratch.
         */
        MGETHDR(m, M_DONTWAIT, MT_DATA);
        if (m && (max_linkhdr + tlen) > MHLEN) {
                MCLGET(m, M_DONTWAIT);
                if ((m->m_flags & M_EXT) == 0) {
                        m_freem(m);
                        m = NULL;
                }
        }
        if (m == NULL)
                return ENOBUFS;
        MCLAIM(m, &tcp_tx_mowner);

        tp = sc->sc_tp;

        /* Fixup the mbuf. */
        m->m_data += max_linkhdr;
        m_reset_rcvif(m);
        memset(mtod(m, void *), 0, tlen);

        switch (sc->sc_src.sa.sa_family) {
        case AF_INET:
                ip = mtod(m, struct ip *);
                ip->ip_v = 4;
                ip->ip_dst = sc->sc_src.sin.sin_addr;
                ip->ip_src = sc->sc_dst.sin.sin_addr;
                ip->ip_p = IPPROTO_TCP;
                th = (struct tcphdr *)(ip + 1);
                th->th_dport = sc->sc_src.sin.sin_port;
                th->th_sport = sc->sc_dst.sin.sin_port;
                break;
#ifdef INET6
        case AF_INET6:
                ip6 = mtod(m, struct ip6_hdr *);
                ip6->ip6_vfc = IPV6_VERSION;
                ip6->ip6_dst = sc->sc_src.sin6.sin6_addr;
                ip6->ip6_src = sc->sc_dst.sin6.sin6_addr;
                ip6->ip6_nxt = IPPROTO_TCP;
                /* ip6_plen will be updated in ip6_output() */
                th = (struct tcphdr *)(ip6 + 1);
                th->th_dport = sc->sc_src.sin6.sin6_port;
                th->th_sport = sc->sc_dst.sin6.sin6_port;
                break;
#endif
        default:
                panic("%s: impossible (1)", __func__);
        }

        th->th_seq = htonl(sc->sc_iss);
        th->th_ack = htonl(sc->sc_irs + 1);
        th->th_flags = TH_SYN|TH_ACK;
        th->th_win = htons(sc->sc_win);
        /* th_x2, th_sum, th_urp already 0 from memset */

        /* Tack on the TCP options. */
        optp = (u_int8_t *)(th + 1);
        optlen = 0;
        *optp++ = TCPOPT_MAXSEG;
        *optp++ = TCPOLEN_MAXSEG;
        *optp++ = (sc->sc_ourmaxseg >> 8) & 0xff;
        *optp++ = sc->sc_ourmaxseg & 0xff;
        optlen += TCPOLEN_MAXSEG;

        if (sc->sc_request_r_scale != 15) {
                *((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 |
                    TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 |
                    sc->sc_request_r_scale);
                optp += TCPOLEN_WINDOW + TCPOLEN_NOP;
                optlen += TCPOLEN_WINDOW + TCPOLEN_NOP;
        }

        if (sc->sc_flags & SCF_SACK_PERMIT) {
                /* Let the peer know that we will SACK. */
                *optp++ = TCPOPT_SACK_PERMITTED;
                *optp++ = TCPOLEN_SACK_PERMITTED;
                optlen += TCPOLEN_SACK_PERMITTED;
        }

        if (sc->sc_flags & SCF_TIMESTAMP) {
                while (optlen % 4 != 2) {
                        optlen += TCPOLEN_NOP;
                        *optp++ = TCPOPT_NOP;
                }
                *optp++ = TCPOPT_TIMESTAMP;
                *optp++ = TCPOLEN_TIMESTAMP;
                u_int32_t *lp = (u_int32_t *)(optp);
                /* Form timestamp option as shown in appendix A of RFC 1323. */
                *lp++ = htonl(SYN_CACHE_TIMESTAMP(sc));
                *lp   = htonl(sc->sc_timestamp);
                optp += TCPOLEN_TIMESTAMP - 2;
                optlen += TCPOLEN_TIMESTAMP;
        }

#ifdef TCP_SIGNATURE
        if (sc->sc_flags & SCF_SIGNATURE) {
                sav = tcp_signature_getsav(m);
                if (sav == NULL) {
                        m_freem(m);
                        return EPERM;
                }

                *optp++ = TCPOPT_SIGNATURE;
                *optp++ = TCPOLEN_SIGNATURE;
                sigp = optp;
                memset(optp, 0, TCP_SIGLEN);
                optp += TCP_SIGLEN;
                optlen += TCPOLEN_SIGNATURE;
        }
#endif

        /*
         * Terminate and pad TCP options to a 4 byte boundary.
         *
         * According to RFC793: "The content of the header beyond the
         * End-of-Option option must be header padding (i.e., zero)."
         * And later: "The padding is composed of zeros."
         */
        if (optlen % 4) {
                optlen += TCPOLEN_EOL;
                *optp++ = TCPOPT_EOL;
        }
        while (optlen % 4) {
                optlen += TCPOLEN_PAD;
                *optp++ = TCPOPT_PAD;
        }

        /* Compute the actual values now that we've added the options. */
        tlen = hlen + sizeof(struct tcphdr) + optlen;
        m->m_len = m->m_pkthdr.len = tlen;
        th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;

#ifdef TCP_SIGNATURE
        if (sav) {
                (void)tcp_signature(m, th, hlen, sav, sigp);
                key_sa_recordxfer(sav, m);
                KEY_SA_UNREF(&sav);
        }
#endif

        /*
         * Send ECN SYN-ACK setup packet.
         * Routes can be asymmetric, so, even if we receive a packet
         * with ECE and CWR set, we must not assume no one will block
         * the ECE packet we are about to send.
         */
        if ((sc->sc_flags & SCF_ECN_PERMIT) && tp &&
            SEQ_GEQ(tp->snd_nxt, tp->snd_max)) {
                th->th_flags |= TH_ECE;
                TCP_STATINC(TCP_STAT_ECN_SHS);

                /*
                 * draft-ietf-tcpm-ecnsyn-00.txt
                 *
                 * "[...] a TCP node MAY respond to an ECN-setup
                 * SYN packet by setting ECT in the responding
                 * ECN-setup SYN/ACK packet, indicating to routers 
                 * that the SYN/ACK packet is ECN-Capable.
                 * This allows a congested router along the path
                 * to mark the packet instead of dropping the
                 * packet as an indication of congestion."
                 *
                 * "[...] There can be a great benefit in setting
                 * an ECN-capable codepoint in SYN/ACK packets [...]
                 * Congestion is  most likely to occur in
                 * the server-to-client direction.  As a result,
                 * setting an ECN-capable codepoint in SYN/ACK
                 * packets can reduce the occurrence of three-second
                 * retransmit timeouts resulting from the drop
                 * of SYN/ACK packets."
                 *
                 * Page 4 and 6, January 2006.
                 */

                switch (sc->sc_src.sa.sa_family) {
                case AF_INET:
                        ip->ip_tos |= IPTOS_ECN_ECT0;
                        break;
#ifdef INET6
                case AF_INET6:
                        ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
                        break;
#endif
                }
                TCP_STATINC(TCP_STAT_ECN_ECT);
        }


        /*
         * Compute the packet's checksum.
         *
         * Fill in some straggling IP bits.  Note the stack expects
         * ip_len to be in host order, for convenience.
         */
        switch (sc->sc_src.sa.sa_family) {
        case AF_INET:
                ip->ip_len = htons(tlen - hlen);
                th->th_sum = 0;
                th->th_sum = in4_cksum(m, IPPROTO_TCP, hlen, tlen - hlen);
                ip->ip_len = htons(tlen);
                ip->ip_ttl = ip_defttl;
                /* XXX tos? */
                break;
#ifdef INET6
        case AF_INET6:
                ip6->ip6_plen = htons(tlen - hlen);
                th->th_sum = 0;
                th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen, tlen - hlen);
                ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
                ip6->ip6_vfc |= IPV6_VERSION;
                ip6->ip6_plen = htons(tlen - hlen);
                /* ip6_hlim will be initialized afterwards */
                /* XXX flowlabel? */
                break;
#endif
        }

        /* XXX use IPsec policy on listening socket, on SYN ACK */
        tp = sc->sc_tp;

        switch (sc->sc_src.sa.sa_family) {
        case AF_INET:
                error = ip_output(m, sc->sc_ipopts, ro,
                    (ip_mtudisc ? IP_MTUDISC : 0),
                    NULL, tp ? tp->t_inpcb : NULL);
                break;
#ifdef INET6
        case AF_INET6:
                ip6->ip6_hlim = in6_selecthlim(NULL,
                    (rt = rtcache_validate(ro)) != NULL ? rt->rt_ifp : NULL);
                rtcache_unref(rt, ro);

                error = ip6_output(m, NULL /*XXX*/, ro, 0, NULL,
                    tp ? tp->t_in6pcb : NULL, NULL);
                break;
#endif
        default:
                panic("%s: impossible (2)", __func__);
        }

        return error;
}





































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   17 

































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
/*        $NetBSD: ufs_extattr.c,v 1.53 2021/06/29 22:40:54 dholland Exp $        */

/*-
 * Copyright (c) 1999-2002 Robert N. M. Watson
 * Copyright (c) 2002-2003 Networks Associates Technology, Inc.
 * All rights reserved.
 *
 * This software was developed by Robert Watson for the TrustedBSD Project.
 *
 * This software was developed for the FreeBSD Project in part by Network
 * Associates Laboratories, the Security Research Division of Network
 * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"),
 * as part of the DARPA CHATS research program.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

/*
 * Support for file system extended attributes on the UFS1 file system.
 *
 * Extended attributes are defined in the form name=value, where name is
 * a nul-terminated string in the style of a file name, and value is a
 * binary blob of zero or more bytes.  The UFS1 extended attribute service
 * layers support for extended attributes onto a backing file, in the style
 * of the quota implementation, meaning that it requires no underlying format
 * changes to the file system.  This design choice exchanges simplicity,
 * usability, and easy deployment for performance.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_extattr.c,v 1.53 2021/06/29 22:40:54 dholland Exp $");

#ifdef _KERNEL_OPT
#include "opt_ffs.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/reboot.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/namei.h>
#include <sys/kmem.h>
#include <sys/fcntl.h>
#include <sys/lwp.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/lock.h>
#include <sys/dirent.h>
#include <sys/extattr.h>
#include <sys/sysctl.h>

#include <ufs/ufs/dir.h>
#include <ufs/ufs/extattr.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_extern.h>

int ufs_extattr_sync = 1;
int ufs_extattr_autocreate = 1024;

static int        ufs_extattr_valid_attrname(int attrnamespace,
                    const char *attrname);
static int        ufs_extattr_enable_with_open(struct ufsmount *ump,
                    struct vnode *vp, int attrnamespace, const char *attrname,
                    struct lwp *l);
static int        ufs_extattr_enable(struct ufsmount *ump, int attrnamespace,
                    const char *attrname, struct vnode *backing_vnode,
                    struct lwp *l);
static int        ufs_extattr_disable(struct ufsmount *ump, int attrnamespace,
                    const char *attrname, struct lwp *l);
static int        ufs_extattr_get(struct vnode *vp, int attrnamespace,
                    const char *name, struct uio *uio, size_t *size,
                    kauth_cred_t cred, struct lwp *l);
static int        ufs_extattr_list(struct vnode *vp, int attrnamespace,
                    struct uio *uio, size_t *size, int flag,
                    kauth_cred_t cred, struct lwp *l);
static int        ufs_extattr_set(struct vnode *vp, int attrnamespace,
                    const char *name, struct uio *uio, kauth_cred_t cred,
                    struct lwp *l);
static int        ufs_extattr_rm(struct vnode *vp, int attrnamespace,
                    const char *name, kauth_cred_t cred, struct lwp *l);
static struct ufs_extattr_list_entry *ufs_extattr_find_attr(struct ufsmount *,
                    int, const char *);
static int        ufs_extattr_get_header(struct vnode *, 
                    struct ufs_extattr_list_entry *, 
                    struct ufs_extattr_header *, off_t *);


/*
 * Per-FS attribute lock protecting attribute operations.
 * XXX Right now there is a lot of lock contention due to having a single
 * lock per-FS; really, this should be far more fine-grained.
 */
static void
ufs_extattr_uepm_lock(struct ufsmount *ump)
{

        /*
         * XXX This needs to be recursive for the following reasons:
         *   - it is taken in ufs_extattr_vnode_inactive
         *   - which is called from VOP_INACTIVE
         *   - which can be triggered by any vrele, vput, or vn_close
         *   - several of these can happen while it's held
         */
        if (mutex_owned(&ump->um_extattr.uepm_lock)) {
                ump->um_extattr.uepm_lockcnt++;
                return;
        }
        mutex_enter(&ump->um_extattr.uepm_lock);
}

static void
ufs_extattr_uepm_unlock(struct ufsmount *ump)
{

        if (ump->um_extattr.uepm_lockcnt != 0) {
                KASSERT(mutex_owned(&ump->um_extattr.uepm_lock));
                ump->um_extattr.uepm_lockcnt--;
                return;
        }
        mutex_exit(&ump->um_extattr.uepm_lock);
}

/*-
 * Determine whether the name passed is a valid name for an actual
 * attribute.
 *
 * Invalid currently consists of:
 *         NULL pointer for attrname
 *         zero-length attrname (used to retrieve application attribute list)
 */
static int
ufs_extattr_valid_attrname(int attrnamespace, const char *attrname)
{

        if (attrname == NULL)
                return 0;
        if (strlen(attrname) == 0)
                return 0;
        return 1;
}

/*
 * Autocreate an attribute storage
 */
static int
ufs_extattr_autocreate_attr(struct vnode *vp, int attrnamespace,
    const char *attrname, struct lwp *l, struct ufs_extattr_list_entry **uelep)
{
        struct mount *mp = vp->v_mount;
        struct ufsmount *ump = VFSTOUFS(mp);
        struct vnode *backing_vp;
        struct pathbuf *pb;
        char *path;
        struct ufs_extattr_fileheader uef;
        struct ufs_extattr_list_entry *uele;
        int error;

        path = PNBUF_GET();

        /* 
         * We only support system and user namespace autocreation
         */ 
        switch (attrnamespace) {
        case EXTATTR_NAMESPACE_SYSTEM:
                (void)snprintf(path, PATH_MAX, "%s/%s/%s/%s", 
                    mp->mnt_stat.f_mntonname, UFS_EXTATTR_FSROOTSUBDIR,
                    UFS_EXTATTR_SUBDIR_SYSTEM, attrname);
                break;
        case EXTATTR_NAMESPACE_USER:
                (void)snprintf(path, PATH_MAX, "%s/%s/%s/%s", 
                    mp->mnt_stat.f_mntonname, UFS_EXTATTR_FSROOTSUBDIR,
                    UFS_EXTATTR_SUBDIR_USER, attrname);
                break;
        default:
                PNBUF_PUT(path);
                *uelep = NULL;
                return EINVAL;
                break;
        }

        /*
         * Release extended attribute mount lock, otherwise
         * we can deadlock with another thread that would lock 
         * vp after we unlock it below, and call 
         * ufs_extattr_uepm_lock(ump), for instance
         * in ufs_getextattr().
         */
        ufs_extattr_uepm_unlock(ump);

        /*
         * XXX unlock/lock should only be done when setting extattr
         * on backing store or one of its parent directory 
         * including root, but we always do it for now.
         */ 
        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
        VOP_UNLOCK(vp);

        pb = pathbuf_create(path);
        
        /*
         * Since we do not hold ufs_extattr_uepm_lock anymore,
         * another thread may race with us for backend creation,
         * but only one can succeed here thanks to O_EXCL.
         *
          * backing_vp is the backing store. 
         */
        error = vn_open(NULL, pb, 0, O_CREAT|O_EXCL|O_RDWR, 0600,
            &backing_vp, NULL, NULL);

        /*
         * Reacquire the lock on the vnode
         */
        KASSERT(VOP_ISLOCKED(vp) == 0);
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);

        ufs_extattr_uepm_lock(ump);

        if (error != 0) {
                pathbuf_destroy(pb);
                PNBUF_PUT(path);
                *uelep = NULL;
                return error;
        }

        KASSERT(backing_vp != NULL);
        KASSERT(VOP_ISLOCKED(backing_vp) == LK_EXCLUSIVE);

        pathbuf_destroy(pb);
        PNBUF_PUT(path);

        uef.uef_magic = UFS_EXTATTR_MAGIC;
        uef.uef_version = UFS_EXTATTR_VERSION;
        uef.uef_size = ufs_extattr_autocreate;

        error = vn_rdwr(UIO_WRITE, backing_vp, &uef, sizeof(uef), 0,
                        UIO_SYSSPACE, IO_NODELOCKED|IO_APPEND, 
                        l->l_cred, NULL, l);

        VOP_UNLOCK(backing_vp);

        if (error != 0) {
                printf("%s: write uef header failed for `%s' (%d)\n", 
                    __func__, attrname, error);
                vn_close(backing_vp, FREAD|FWRITE, l->l_cred);
                *uelep = NULL;
                return error;
        }

        /*
         * Now enable attribute. 
         */
        error = ufs_extattr_enable(ump,attrnamespace, attrname, backing_vp, l);
        KASSERT(VOP_ISLOCKED(backing_vp) == 0);

        if (error != 0) {
                printf("%s: enable `%s' failed (%d)\n", 
                    __func__, attrname, error);
                vn_close(backing_vp, FREAD|FWRITE, l->l_cred);
                *uelep = NULL;
                return error;
        }

        uele = ufs_extattr_find_attr(ump, attrnamespace, attrname);
        if (uele == NULL) {
                printf("%s: atttribute `%s' created but not found!\n",
                       __func__, attrname);
                vn_close(backing_vp, FREAD|FWRITE, l->l_cred);
                *uelep = NULL;
                return ESRCH; /* really internal error */
        }

        printf("%s: EA backing store autocreated for %s\n",
            mp->mnt_stat.f_mntonname, attrname);

        *uelep = uele;
        return 0;
}

/*
 * Locate an attribute given a name and mountpoint.
 * Must be holding uepm lock for the mount point.
 */
static struct ufs_extattr_list_entry *
ufs_extattr_find_attr(struct ufsmount *ump, int attrnamespace,
    const char *attrname)
{
        struct ufs_extattr_list_entry *search_attribute;

        for (search_attribute = LIST_FIRST(&ump->um_extattr.uepm_list);
            search_attribute != NULL;
            search_attribute = LIST_NEXT(search_attribute, uele_entries)) {
                if (!(strncmp(attrname, search_attribute->uele_attrname,
                    UFS_EXTATTR_MAXEXTATTRNAME)) &&
                    (attrnamespace == search_attribute->uele_attrnamespace)) {
                        return search_attribute;
                }
        }

        return 0;
}

/*
 * Initialize per-FS structures supporting extended attributes.  Do not
 * start extended attributes yet.
 */
void
ufs_extattr_uepm_init(struct ufs_extattr_per_mount *uepm)
{

        uepm->uepm_flags = 0;
        uepm->uepm_lockcnt = 0;

        LIST_INIT(&uepm->uepm_list);
        mutex_init(&uepm->uepm_lock, MUTEX_DEFAULT, IPL_NONE);
        uepm->uepm_flags |= UFS_EXTATTR_UEPM_INITIALIZED;
}

/*
 * Destroy per-FS structures supporting extended attributes.  Assumes
 * that EAs have already been stopped, and will panic if not.
 */
void
ufs_extattr_uepm_destroy(struct ufs_extattr_per_mount *uepm)
{

        if (!(uepm->uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED))
                panic("ufs_extattr_uepm_destroy: not initialized");

        if ((uepm->uepm_flags & UFS_EXTATTR_UEPM_STARTED))
                panic("ufs_extattr_uepm_destroy: called while still started");

        /*
         * It's not clear that either order for the next three lines is
         * ideal, and it should never be a problem if this is only called
         * during unmount, and with vfs_busy().
         */
        uepm->uepm_flags &= ~UFS_EXTATTR_UEPM_STARTED;
        uepm->uepm_flags &= ~UFS_EXTATTR_UEPM_INITIALIZED;
        mutex_destroy(&uepm->uepm_lock);
}

/*
 * Start extended attribute support on an FS.
 */
int
ufs_extattr_start(struct mount *mp, struct lwp *l)
{
        struct ufsmount *ump;
        int error = 0;

        ump = VFSTOUFS(mp);

        if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED))
                ufs_extattr_uepm_init(&ump->um_extattr);

        ufs_extattr_uepm_lock(ump);

        if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED)) {
                error = EOPNOTSUPP;
                goto unlock;
        }
        if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED) {
                error = EBUSY;
                goto unlock;
        }

        ump->um_extattr.uepm_flags |= UFS_EXTATTR_UEPM_STARTED;

        ump->um_extattr.uepm_ucred = l->l_cred;
        kauth_cred_hold(ump->um_extattr.uepm_ucred);

 unlock:
        ufs_extattr_uepm_unlock(ump);
        return error;
}

/*
 * Helper routine: given a locked parent directory and filename, return
 * the locked vnode of the inode associated with the name.  Will not
 * follow symlinks, may return any type of vnode.  Lock on parent will
 * be released even in the event of a failure.  In the event that the
 * target is the parent (i.e., "."), there will be two references and
 * one lock, requiring the caller to possibly special-case.
 */
static int
ufs_extattr_lookup(struct vnode *start_dvp, int lockparent, const char *dirname,
    struct vnode **vp, struct lwp *l)
{
        struct vop_lookup_v2_args vargs;
        struct componentname cnp;
        struct vnode *target_vp;
        char *pnbuf;
        int error;

        KASSERT(VOP_ISLOCKED(start_dvp) == LK_EXCLUSIVE);

        pnbuf = PNBUF_GET();

        memset(&cnp, 0, sizeof(cnp));
        cnp.cn_nameiop = LOOKUP;
        cnp.cn_flags = ISLASTCN | lockparent;
        cnp.cn_cred = l->l_cred;
        cnp.cn_nameptr = pnbuf;
        error = copystr(dirname, pnbuf, MAXPATHLEN, &cnp.cn_namelen);
        if (error) {
                if (lockparent == 0) {
                        VOP_UNLOCK(start_dvp);
                }
                PNBUF_PUT(pnbuf);
                printf("%s: copystr failed (%d)\n", __func__, error);
                return error;
        }
        cnp.cn_namelen--;        /* trim nul termination */
        vargs.a_desc = NULL;
        vargs.a_dvp = start_dvp;
        vargs.a_vpp = &target_vp;
        vargs.a_cnp = &cnp;
        error = ufs_lookup(&vargs);
        PNBUF_PUT(pnbuf);
        if (error) {
                if (lockparent == 0) {
                        VOP_UNLOCK(start_dvp);
                }
                return error;
        }
#if 0
        if (target_vp == start_dvp)
                panic("%s: target_vp == start_dvp", __func__);
#endif

        if (target_vp != start_dvp) {
                error = vn_lock(target_vp, LK_EXCLUSIVE);
                if (lockparent == 0)
                        VOP_UNLOCK(start_dvp);
                if (error) {
                        vrele(target_vp);
                        return error;
                }
        }

        KASSERT(VOP_ISLOCKED(target_vp) == LK_EXCLUSIVE);
        *vp = target_vp;
        return 0;
}

/*
 * Enable an EA using the passed filesystem, backing vnode, attribute name,
 * namespace, and proc.  Will perform a VOP_OPEN() on the vp, so expects vp
 * to be locked when passed in.  The vnode will be returned unlocked,
 * regardless of success/failure of the function.  As a result, the caller
 * will always need to vrele(), but not vput().
 */
static int
ufs_extattr_enable_with_open(struct ufsmount *ump, struct vnode *vp,
    int attrnamespace, const char *attrname, struct lwp *l)
{
        int error;

        error = VOP_OPEN(vp, FREAD|FWRITE, l->l_cred);
        if (error) {
                printf("%s: VOP_OPEN(): failed (%d)\n", __func__, error);
                VOP_UNLOCK(vp);
                return error;
        }

        mutex_enter(vp->v_interlock);
        vp->v_writecount++;
        mutex_exit(vp->v_interlock);

        vref(vp);

        VOP_UNLOCK(vp);

        error = ufs_extattr_enable(ump, attrnamespace, attrname, vp, l);
        if (error != 0)
                vn_close(vp, FREAD|FWRITE, l->l_cred);
        return error;
}

/*
 * Given a locked directory vnode, iterate over the names in the directory
 * and use ufs_extattr_lookup() to retrieve locked vnodes of potential
 * attribute files.  Then invoke ufs_extattr_enable_with_open() on each
 * to attempt to start the attribute.  Leaves the directory locked on
 * exit.
 */
static int
ufs_extattr_iterate_directory(struct ufsmount *ump, struct vnode *dvp,
    int attrnamespace, struct lwp *l)
{
        struct vop_readdir_args vargs;
        struct statvfs *sbp = &ump->um_mountp->mnt_stat;
        struct dirent *dp, *edp;
        struct vnode *attr_vp;
        struct uio auio;
        struct iovec aiov;
        char *dirbuf;
        int error, eofflag = 0;

        if (dvp->v_type != VDIR)
                return ENOTDIR;

        dirbuf = kmem_alloc(UFS_DIRBLKSIZ, KM_SLEEP);

        auio.uio_iov = &aiov;
        auio.uio_iovcnt = 1;
        auio.uio_rw = UIO_READ;
        auio.uio_offset = 0;
        UIO_SETUP_SYSSPACE(&auio);

        vargs.a_desc = NULL;
        vargs.a_vp = dvp;
        vargs.a_uio = &auio;
        vargs.a_cred = l->l_cred;
        vargs.a_eofflag = &eofflag;
        vargs.a_ncookies = NULL;
        vargs.a_cookies = NULL;

        while (!eofflag) {
                auio.uio_resid = UFS_DIRBLKSIZ;
                aiov.iov_base = dirbuf;
                aiov.iov_len = UFS_DIRBLKSIZ;
                error = ufs_readdir(&vargs);
                if (error) {
                        printf("%s: ufs_readdir (%d)\n", __func__, error);
                        return error;
                }

                /*
                 * XXXRW: While in UFS, we always get UFS_DIRBLKSIZ returns from
                 * the directory code on success, on other file systems this
                 * may not be the case.  For portability, we should check the
                 * read length on return from ufs_readdir().
                 */
                edp = (struct dirent *)&dirbuf[UFS_DIRBLKSIZ];
                for (dp = (struct dirent *)dirbuf; dp < edp; ) {
                        if (dp->d_reclen == 0)
                                break;
                        /* Skip "." and ".." */
                        if (dp->d_name[0] == '.' &&
                            (dp->d_name[1] == '\0' ||
                             (dp->d_name[1] == '.' && dp->d_name[2] == '\0')))
                                goto next;
                        error = ufs_extattr_lookup(dvp, LOCKPARENT,
                            dp->d_name, &attr_vp, l);
                        if (error == ENOENT) {
                                goto next; /* keep silent */
                        } else if (error) {
                                printf("%s: lookup `%s' (%d)\n", __func__,
                                    dp->d_name, error);
                        } else if (attr_vp == dvp) {
                                vrele(attr_vp);
                        } else if (attr_vp->v_type != VREG) {
                                vput(attr_vp);
                        } else {
                                error = ufs_extattr_enable_with_open(ump,
                                    attr_vp, attrnamespace, dp->d_name, l);
                                vrele(attr_vp);
                                if (error) {
                                        printf("%s: enable `%s' (%d)\n",
                                            __func__, dp->d_name, error);
                                } else if (bootverbose) {
                                        printf("%s: EA %s loaded\n",
                                            sbp->f_mntonname, dp->d_name);
                                }
                        }
 next:
                        dp = (struct dirent *) ((char *)dp + dp->d_reclen);
                        if (dp >= edp)
                                break;
                }
        }
        kmem_free(dirbuf, UFS_DIRBLKSIZ);
        
        return 0;
}

static int
ufs_extattr_subdir(struct lwp *l, struct mount *mp, struct vnode *attr_dvp,
    const char *subdir, int namespace)
{
        int error;
        struct vnode *attr_sub;
        error = ufs_extattr_lookup(attr_dvp, LOCKPARENT, subdir, &attr_sub, l);
        KASSERT(VOP_ISLOCKED(attr_dvp) == LK_EXCLUSIVE);
        if (error) {
                printf("%s: Can't find `%s/%s/%s' (%d)\n",
                    __func__, mp->mnt_stat.f_mntonname,
                    UFS_EXTATTR_FSROOTSUBDIR, subdir, error);
                return error;
        }
        KASSERT(VOP_ISLOCKED(attr_sub) == LK_EXCLUSIVE);
        error = ufs_extattr_iterate_directory(VFSTOUFS(mp),
            attr_sub, namespace, l);
        if (error) {
                printf("%s: ufs_extattr_iterate_directory `%s/%s/%s' (%d)\n",
                    __func__, mp->mnt_stat.f_mntonname,
                    UFS_EXTATTR_FSROOTSUBDIR, subdir, error);
        }
        KASSERT(VOP_ISLOCKED(attr_sub) == LK_EXCLUSIVE);
        vput(attr_sub);
        return error;
}

/*
 * Auto-start of extended attributes, to be executed (optionally) at
 * mount-time.
 */
int
ufs_extattr_autostart(struct mount *mp, struct lwp *l)
{
        struct vnode *rvp, *attr_dvp;
        int error;

        /*
         * Does UFS_EXTATTR_FSROOTSUBDIR exist off the filesystem root?
         * If so, automatically start EA's.
         */
        error = VFS_ROOT(mp, LK_EXCLUSIVE, &rvp);
        if (error) {
                printf("%s: VFS_ROOT() (%d)\n", __func__, error);
                return error;
        }

        KASSERT(VOP_ISLOCKED(rvp) == LK_EXCLUSIVE);

        error = ufs_extattr_lookup(rvp, 0,
            UFS_EXTATTR_FSROOTSUBDIR, &attr_dvp, l);
        if (error) {
                /* rvp ref'd but now unlocked */
                KASSERT(VOP_ISLOCKED(rvp) == 0);
                vrele(rvp);
                printf("%s: lookup `%s/%s' (%d)\n", __func__,
                    mp->mnt_stat.f_mntonname, UFS_EXTATTR_FSROOTSUBDIR, error);
                return error;
        }
        if (rvp == attr_dvp) {
                /* Should never happen. */
                KASSERT(VOP_ISLOCKED(rvp) == LK_EXCLUSIVE);
                vrele(attr_dvp);
                vput(rvp);
                printf("%s: `/' == `%s/%s' (%d)\n", __func__,
                    mp->mnt_stat.f_mntonname, UFS_EXTATTR_FSROOTSUBDIR, EINVAL);
                return EINVAL;
        }
        KASSERT(VOP_ISLOCKED(rvp) == 0);
        vrele(rvp);

        KASSERT(VOP_ISLOCKED(attr_dvp) == LK_EXCLUSIVE);

        if (attr_dvp->v_type != VDIR) {
                printf("%s: `%s/%s' is not a directory\n",
                    __func__, mp->mnt_stat.f_mntonname,
                    UFS_EXTATTR_FSROOTSUBDIR);
                goto return_vput_attr_dvp;
        }

        error = ufs_extattr_start(mp, l);
        if (error) {
                printf("%s: ufs_extattr_start failed (%d)\n", __func__,
                    error);
                goto return_vput_attr_dvp;
        }

        /*
         * Look for two subdirectories: UFS_EXTATTR_SUBDIR_SYSTEM,
         * UFS_EXTATTR_SUBDIR_USER.  For each, iterate over the sub-directory,
         * and start with appropriate type.  Failures in either don't
         * result in an over-all failure.  attr_dvp is left locked to
         * be cleaned up on exit.
         */
        error = ufs_extattr_subdir(l, mp, attr_dvp, UFS_EXTATTR_SUBDIR_SYSTEM,
                EXTATTR_NAMESPACE_SYSTEM);
        error = ufs_extattr_subdir(l, mp, attr_dvp, UFS_EXTATTR_SUBDIR_USER,
                EXTATTR_NAMESPACE_USER);

        /* Mask startup failures in sub-directories. */
        error = 0;

 return_vput_attr_dvp:
        KASSERT(VOP_ISLOCKED(attr_dvp) == LK_EXCLUSIVE);
        vput(attr_dvp);

        return error;
}

/*
 * Stop extended attribute support on an FS.
 */
void
ufs_extattr_stop(struct mount *mp, struct lwp *l)
{
        struct ufs_extattr_list_entry *uele;
        struct ufsmount *ump = VFSTOUFS(mp);

        ufs_extattr_uepm_lock(ump);

        /*
         * If we haven't been started, no big deal.  Just short-circuit
         * the processing work.
         */
        if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) {
                goto unlock;
        }

        while (LIST_FIRST(&ump->um_extattr.uepm_list) != NULL) {
                uele = LIST_FIRST(&ump->um_extattr.uepm_list);
                ufs_extattr_disable(ump, uele->uele_attrnamespace,
                    uele->uele_attrname, l);
        }

        ump->um_extattr.uepm_flags &= ~UFS_EXTATTR_UEPM_STARTED;

        kauth_cred_free(ump->um_extattr.uepm_ucred);
        ump->um_extattr.uepm_ucred = NULL;

 unlock:
        ufs_extattr_uepm_unlock(ump);
}

/*
 * Enable a named attribute on the specified filesystem; provide an
 * unlocked backing vnode to hold the attribute data.
 */
static int
ufs_extattr_enable(struct ufsmount *ump, int attrnamespace,
    const char *attrname, struct vnode *backing_vnode, struct lwp *l)
{
        struct ufs_extattr_list_entry *attribute;
        struct iovec aiov;
        struct uio auio;
        int error = 0;

        if (!ufs_extattr_valid_attrname(attrnamespace, attrname))
                return EINVAL;
        if (backing_vnode->v_type != VREG)
                return EINVAL;

        attribute = kmem_zalloc(sizeof(*attribute), KM_SLEEP);

        if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)) {
                error = EOPNOTSUPP;
                goto free_exit;
        }

        if (ufs_extattr_find_attr(ump, attrnamespace, attrname)) {
                error = EEXIST;
                goto free_exit;
        }

        strncpy(attribute->uele_attrname, attrname,
            UFS_EXTATTR_MAXEXTATTRNAME);
        attribute->uele_attrnamespace = attrnamespace;
        memset(&attribute->uele_fileheader, 0,
            sizeof(struct ufs_extattr_fileheader));
        
        attribute->uele_backing_vnode = backing_vnode;

        auio.uio_iov = &aiov;
        auio.uio_iovcnt = 1;
        aiov.iov_base = (void *) &attribute->uele_fileheader;
        aiov.iov_len = sizeof(struct ufs_extattr_fileheader);
        auio.uio_resid = sizeof(struct ufs_extattr_fileheader);
        auio.uio_offset = (off_t) 0;
        auio.uio_rw = UIO_READ;
        UIO_SETUP_SYSSPACE(&auio);

        vn_lock(backing_vnode, LK_SHARED | LK_RETRY);
        error = VOP_READ(backing_vnode, &auio, IO_NODELOCKED,
            ump->um_extattr.uepm_ucred);

        if (error)
                goto unlock_free_exit;

        if (auio.uio_resid != 0) {
                printf("%s: malformed attribute header\n", __func__);
                error = EINVAL;
                goto unlock_free_exit;
        }

        /*
         * Try to determine the byte order of the attribute file.
         */
        if (attribute->uele_fileheader.uef_magic != UFS_EXTATTR_MAGIC) {
                attribute->uele_flags |= UELE_F_NEEDSWAP;
                attribute->uele_fileheader.uef_magic =
                    ufs_rw32(attribute->uele_fileheader.uef_magic,
                             UELE_NEEDSWAP(attribute));
                if (attribute->uele_fileheader.uef_magic != UFS_EXTATTR_MAGIC) {
                        printf("%s: invalid attribute header magic\n",
                            __func__);
                        error = EINVAL;
                        goto unlock_free_exit;
                }
        }
        attribute->uele_fileheader.uef_version =
            ufs_rw32(attribute->uele_fileheader.uef_version,
                     UELE_NEEDSWAP(attribute));
        attribute->uele_fileheader.uef_size =
            ufs_rw32(attribute->uele_fileheader.uef_size,
                     UELE_NEEDSWAP(attribute));

        if (attribute->uele_fileheader.uef_version != UFS_EXTATTR_VERSION) {
                printf("%s: incorrect attribute header version %d != %d\n",
                    __func__, attribute->uele_fileheader.uef_version,
                    UFS_EXTATTR_VERSION);
                error = EINVAL;
                goto unlock_free_exit;
        }

        LIST_INSERT_HEAD(&ump->um_extattr.uepm_list, attribute,
            uele_entries);

        VOP_UNLOCK(backing_vnode);
        return 0;

 unlock_free_exit:
        VOP_UNLOCK(backing_vnode);

 free_exit:
        kmem_free(attribute, sizeof(*attribute));
        return error;
}

/*
 * Disable extended attribute support on an FS.
 */
static int
ufs_extattr_disable(struct ufsmount *ump, int attrnamespace,
    const char *attrname, struct lwp *l)
{
        struct ufs_extattr_list_entry *uele;
        int error = 0;

        if (!ufs_extattr_valid_attrname(attrnamespace, attrname))
                return EINVAL;

        uele = ufs_extattr_find_attr(ump, attrnamespace, attrname);
        if (!uele)
                return ENODATA;

        LIST_REMOVE(uele, uele_entries);

        error = vn_close(uele->uele_backing_vnode, FREAD|FWRITE,
            l->l_cred);

        kmem_free(uele, sizeof(*uele));

        return error;
}

/*
 * VFS call to manage extended attributes in UFS.  If filename_vp is
 * non-NULL, it must be passed in locked, and regardless of errors in
 * processing, will be unlocked.
 */
int
ufs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
    int attrnamespace, const char *attrname)
{
        struct lwp *l = curlwp;
        struct ufsmount *ump = VFSTOUFS(mp);
        int error;

        /*
         * Only privileged processes can configure extended attributes.
         */
        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_EXTATTR,
            0, mp, NULL, NULL);
        if (error) {
                if (filename_vp != NULL)
                        VOP_UNLOCK(filename_vp);
                return error;
        }

        switch(cmd) {
        case UFS_EXTATTR_CMD_START:
        case UFS_EXTATTR_CMD_STOP:
        case UFS_EXTATTR_CMD_ENABLE:
        case UFS_EXTATTR_CMD_DISABLE:
                if (filename_vp != NULL) {
                        VOP_UNLOCK(filename_vp);
                        return EINVAL;
                }
                if (attrname != NULL)
                        return EINVAL;
                break;
        default:
                return EINVAL;
        }

        switch(cmd) {
        case UFS_EXTATTR_CMD_START:
                error = ufs_extattr_autostart(mp, l);
                return error;
                
        case UFS_EXTATTR_CMD_STOP:
                ufs_extattr_stop(mp, l);
                return 0;

        case UFS_EXTATTR_CMD_ENABLE:
                /*
                 * ufs_extattr_enable_with_open() will always unlock the
                 * vnode, regardless of failure.
                 */
                ufs_extattr_uepm_lock(ump);
                error = ufs_extattr_enable_with_open(ump, filename_vp,
                    attrnamespace, attrname, l);
                ufs_extattr_uepm_unlock(ump);
                return error;

        case UFS_EXTATTR_CMD_DISABLE:
                ufs_extattr_uepm_lock(ump);
                error = ufs_extattr_disable(ump, attrnamespace, attrname, l);
                ufs_extattr_uepm_unlock(ump);
                return error;

        default:
                return EINVAL;
        }
}

/*
 * Read extended attribute header for a given vnode and attribute.
 * Backing vnode should be locked and unlocked by caller.
 */
static int
ufs_extattr_get_header(struct vnode *vp, struct ufs_extattr_list_entry *uele,
    struct ufs_extattr_header *ueh, off_t *bap)
{
        struct mount *mp = vp->v_mount;
        struct ufsmount *ump = VFSTOUFS(mp);
        struct inode *ip = VTOI(vp);
        off_t base_offset;
        struct iovec aiov;
        struct uio aio;
        int error;

        /*
         * Find base offset of header in file based on file header size, and
         * data header size + maximum data size, indexed by inode number.
         */
        base_offset = sizeof(struct ufs_extattr_fileheader) +
            ip->i_number * (sizeof(struct ufs_extattr_header) +
            uele->uele_fileheader.uef_size);

        /*
         * Read in the data header to see if the data is defined, and if so
         * how much.
         */
        memset(ueh, 0, sizeof(struct ufs_extattr_header));
        aiov.iov_base = ueh;
        aiov.iov_len = sizeof(struct ufs_extattr_header);
        aio.uio_iov = &aiov;
        aio.uio_iovcnt = 1;
        aio.uio_rw = UIO_READ;
        aio.uio_offset = base_offset;
        aio.uio_resid = sizeof(struct ufs_extattr_header);
        UIO_SETUP_SYSSPACE(&aio);

        error = VOP_READ(uele->uele_backing_vnode, &aio,
            IO_NODELOCKED, ump->um_extattr.uepm_ucred);
        if (error)
                return error;

        /*
         * Attribute headers are kept in file system byte order.
         * XXX What about the blob of data?
         */
        ueh->ueh_flags = ufs_rw32(ueh->ueh_flags, UELE_NEEDSWAP(uele));
        ueh->ueh_len   = ufs_rw32(ueh->ueh_len, UELE_NEEDSWAP(uele));
        ueh->ueh_i_gen = ufs_rw32(ueh->ueh_i_gen, UELE_NEEDSWAP(uele));

        /* Defined? */
        if ((ueh->ueh_flags & UFS_EXTATTR_ATTR_FLAG_INUSE) == 0)
                return ENODATA;

        /* Valid for the current inode generation? */
        if (ueh->ueh_i_gen != ip->i_gen) {
                /*
                 * The inode itself has a different generation number
                 * than the uele data.  For now, the best solution
                 * is to coerce this to undefined, and let it get cleaned
                 * up by the next write or extattrctl clean.
                 */
                printf("%s: %s: inode gen inconsistency (%u, %jd)\n",
                       __func__,  mp->mnt_stat.f_mntonname, ueh->ueh_i_gen,
                       (intmax_t)ip->i_gen);
                return ENODATA;
        }

        /* Local size consistency check. */
        if (ueh->ueh_len > uele->uele_fileheader.uef_size)
                return ENXIO;

        /* Return base offset */
        if (bap != NULL)
                *bap = base_offset;

        return 0;
}

/*
 * Vnode operation to retrieve a named extended attribute.
 */
int
ufs_getextattr(struct vop_getextattr_args *ap)
/*
vop_getextattr {
        IN struct vnode *a_vp;
        IN int a_attrnamespace;
        IN const char *a_name;
        INOUT struct uio *a_uio;
        OUT size_t *a_size;
        IN kauth_cred_t a_cred;
};
*/
{
        struct mount *mp = ap->a_vp->v_mount;
        struct ufsmount *ump = VFSTOUFS(mp);
        int error;

        if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED))
                return EOPNOTSUPP;

        ufs_extattr_uepm_lock(ump);

        error = ufs_extattr_get(ap->a_vp, ap->a_attrnamespace, ap->a_name,
            ap->a_uio, ap->a_size, ap->a_cred, curlwp);

        ufs_extattr_uepm_unlock(ump);

        return error;
}

/*
 * Real work associated with retrieving a named attribute--assumes that
 * the attribute lock has already been grabbed.
 */
static int
ufs_extattr_get(struct vnode *vp, int attrnamespace, const char *name,
    struct uio *uio, size_t *size, kauth_cred_t cred, struct lwp *l)
{
        struct ufs_extattr_list_entry *attribute;
        struct ufs_extattr_header ueh;
        struct mount *mp = vp->v_mount;
        struct ufsmount *ump = VFSTOUFS(mp);
        off_t base_offset;
        size_t len, old_len;
        int error = 0;

        if (strlen(name) == 0)
                return EINVAL;

        error = extattr_check_cred(vp, attrnamespace, cred, VREAD);
        if (error)
                return error;

        attribute = ufs_extattr_find_attr(ump, attrnamespace, name);
        if (!attribute)
                return ENODATA;

        /*
         * Allow only offsets of zero to encourage the read/replace
         * extended attribute semantic.  Otherwise we can't guarantee
         * atomicity, as we don't provide locks for extended attributes.
         */
        if (uio != NULL && uio->uio_offset != 0)
                return ENXIO;

        /*
         * Don't need to get a lock on the backing file if the getattr is
         * being applied to the backing file, as the lock is already held.
         */
        if (attribute->uele_backing_vnode != vp)
                vn_lock(attribute->uele_backing_vnode, LK_SHARED | LK_RETRY);

        error = ufs_extattr_get_header(vp, attribute, &ueh, &base_offset);
        if (error)
                goto vopunlock_exit;

        /* Return full data size if caller requested it. */
        if (size != NULL)
                *size = ueh.ueh_len;

        /* Return data if the caller requested it. */
        if (uio != NULL) {
                /* Allow for offset into the attribute data. */
                uio->uio_offset = base_offset + sizeof(struct
                    ufs_extattr_header);

                /*
                 * Figure out maximum to transfer -- use buffer size and
                 * local data limit.
                 */
                len = MIN(uio->uio_resid, ueh.ueh_len);
                old_len = uio->uio_resid;
                uio->uio_resid = len;

                error = VOP_READ(attribute->uele_backing_vnode, uio,
                    IO_NODELOCKED, ump->um_extattr.uepm_ucred);
                if (error)
                        goto vopunlock_exit;

                uio->uio_resid = old_len - (len - uio->uio_resid);
        }

 vopunlock_exit:

        if (uio != NULL)
                uio->uio_offset = 0;

        if (attribute->uele_backing_vnode != vp)
                VOP_UNLOCK(attribute->uele_backing_vnode);

        return error;
}

/*
 * Vnode operation to list extended attribute for a vnode
 */
int
ufs_listextattr(struct vop_listextattr_args *ap)
/*
vop_listextattr {
        IN struct vnode *a_vp;
        IN int a_attrnamespace;
        INOUT struct uio *a_uio;
        OUT size_t *a_size;
        IN int flag;
        IN kauth_cred_t a_cred;
        struct proc *a_p;
};
*/
{
        struct mount *mp = ap->a_vp->v_mount;
        struct ufsmount *ump = VFSTOUFS(mp);
        int error;

        if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED))
                return EOPNOTSUPP;

        ufs_extattr_uepm_lock(ump);

        error = ufs_extattr_list(ap->a_vp, ap->a_attrnamespace,
            ap->a_uio, ap->a_size, ap->a_flag, ap->a_cred, curlwp);

        ufs_extattr_uepm_unlock(ump);

        return error;
}

/*
 * Real work associated with retrieving list of attributes--assumes that
 * the attribute lock has already been grabbed.
 */
static int
ufs_extattr_list(struct vnode *vp, int attrnamespace,
    struct uio *uio, size_t *size, int flag, 
    kauth_cred_t cred, struct lwp *l)
{
        struct ufs_extattr_list_entry *uele;
        struct ufs_extattr_header ueh;
        struct mount *mp = vp->v_mount;
        struct ufsmount *ump = VFSTOUFS(mp);
        size_t listsize = 0;
        int error = 0;

        /*
         * XXX: We can move this inside the loop and iterate on individual
         *        attributes.
         */
        error = extattr_check_cred(vp, attrnamespace, cred, VREAD);
        if (error)
                return error;

        LIST_FOREACH(uele, &ump->um_extattr.uepm_list, uele_entries) {
                unsigned char attrnamelen;

                if (uele->uele_attrnamespace != attrnamespace)
                        continue;

                error = ufs_extattr_get_header(vp, uele, &ueh, NULL);
                if (error == ENODATA)
                        continue;
                if (error != 0)
                        return error;

                /*
                 * Don't need to get a lock on the backing file if 
                 * the listattr is being applied to the backing file, 
                 * as the lock is already held.
                 */
                if (uele->uele_backing_vnode != vp)
                        vn_lock(uele->uele_backing_vnode, LK_SHARED | LK_RETRY);

                /*
                 * +1 for trailing NUL (listxattr flavor)
                 *  or leading name length (extattr_list_file flavor)
                  */
                attrnamelen = strlen(uele->uele_attrname);
                listsize += attrnamelen + 1;

                /* Return data if the caller requested it. */
                if (uio != NULL) {
                        /*
                         * We support two flavors. Either NUL-terminated
                         * strings (a la listxattr), or non NUL-terminated,
                         * one byte length prefixed strings (for
                         * extattr_list_file). EXTATTR_LIST_LENPREFIX switches
                          * that second behavior.
                         */
                        if (flag & EXTATTR_LIST_LENPREFIX) {
                                uint8_t len = (uint8_t)attrnamelen;

                                /* Copy leading name length */
                                error = uiomove(&len, sizeof(len), uio);
                                if (error != 0)
                                        break;
                        } else {
                                /* Include trailing NULL */
                                attrnamelen++;
                        }

                        error = uiomove(uele->uele_attrname, 
                                        (size_t)attrnamelen, uio);
                        if (error != 0)
                                break;
                }

                if (uele->uele_backing_vnode != vp)
                        VOP_UNLOCK(uele->uele_backing_vnode);

                if (error != 0)
                        return error;
        }

        if (uio != NULL)
                uio->uio_offset = 0;

        /* Return full data size if caller requested it. */
        if (size != NULL)
                *size = listsize;

        return 0;
}

/*
 * Vnode operation to remove a named attribute.
 */
int
ufs_deleteextattr(struct vop_deleteextattr_args *ap)
/*
vop_deleteextattr {
        IN struct vnode *a_vp;
        IN int a_attrnamespace;
        IN const char *a_name;
        IN kauth_cred_t a_cred;
};
*/
{
        struct mount *mp = ap->a_vp->v_mount;
        struct ufsmount *ump = VFSTOUFS(mp);
        int error;

        if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED))
                return EOPNOTSUPP;

        ufs_extattr_uepm_lock(ump);

        error = ufs_extattr_rm(ap->a_vp, ap->a_attrnamespace, ap->a_name,
            ap->a_cred, curlwp);

        ufs_extattr_uepm_unlock(ump);

        return error;
}

/*
 * Vnode operation to set a named attribute.
 */
int
ufs_setextattr(struct vop_setextattr_args *ap)
/*
vop_setextattr {
        IN struct vnode *a_vp;
        IN int a_attrnamespace;
        IN const char *a_name;
        INOUT struct uio *a_uio;
        IN kauth_cred_t a_cred;
};
*/
{
        struct mount *mp = ap->a_vp->v_mount;
        struct ufsmount *ump = VFSTOUFS(mp);
        int error;

        if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED))
                return EOPNOTSUPP;

        ufs_extattr_uepm_lock(ump);

        /*
         * XXX: No longer a supported way to delete extended attributes.
         */
        if (ap->a_uio == NULL) {
                ufs_extattr_uepm_unlock(ump);
                return EINVAL;
        }

        error = ufs_extattr_set(ap->a_vp, ap->a_attrnamespace, ap->a_name,
            ap->a_uio, ap->a_cred, curlwp);

        ufs_extattr_uepm_unlock(ump);

        return error;
}

/*
 * Real work associated with setting a vnode's extended attributes;
 * assumes that the attribute lock has already been grabbed.
 */
static int
ufs_extattr_set(struct vnode *vp, int attrnamespace, const char *name,
    struct uio *uio, kauth_cred_t cred, struct lwp *l)
{
        struct ufs_extattr_list_entry *attribute;
        struct ufs_extattr_header ueh;
        struct iovec local_aiov;
        struct uio local_aio;
        struct mount *mp = vp->v_mount;
        struct ufsmount *ump = VFSTOUFS(mp);
        struct inode *ip = VTOI(vp);
        off_t base_offset;
        int error = 0, ioflag;

        if (vp->v_mount->mnt_flag & MNT_RDONLY)
                return EROFS;

        if (!ufs_extattr_valid_attrname(attrnamespace, name))
                return EINVAL;

        error = extattr_check_cred(vp, attrnamespace, cred, VWRITE);
        if (error)
                return error;

        attribute = ufs_extattr_find_attr(ump, attrnamespace, name);
        if (!attribute) {
                error = ufs_extattr_autocreate_attr(vp, attrnamespace, 
                                                    name, l, &attribute);
                if (error == EEXIST) {
                        /* Another thread raced us for backend creation */
                        error = 0;
                        attribute = 
                            ufs_extattr_find_attr(ump, attrnamespace, name);
                }

                if (error || !attribute)
                        return ENODATA;
        }

        /*
         * Early rejection of invalid offsets/length.
         * Reject: any offset but 0 (replace)
         *         Any size greater than attribute size limit
          */
        if (uio->uio_offset != 0 ||
            uio->uio_resid > attribute->uele_fileheader.uef_size)
                return ENXIO;

        /*
         * Find base offset of header in file based on file header size, and
         * data header size + maximum data size, indexed by inode number.
         */
        base_offset = sizeof(struct ufs_extattr_fileheader) +
            ip->i_number * (sizeof(struct ufs_extattr_header) +
            attribute->uele_fileheader.uef_size);

        /*
         * Write out a data header for the data.
         */
        ueh.ueh_len = ufs_rw32((uint32_t) uio->uio_resid,
            UELE_NEEDSWAP(attribute));
        ueh.ueh_flags = ufs_rw32(UFS_EXTATTR_ATTR_FLAG_INUSE,
                                 UELE_NEEDSWAP(attribute));
        ueh.ueh_i_gen = ufs_rw32(ip->i_gen, UELE_NEEDSWAP(attribute));
        local_aiov.iov_base = &ueh;
        local_aiov.iov_len = sizeof(struct ufs_extattr_header);
        local_aio.uio_iov = &local_aiov;
        local_aio.uio_iovcnt = 1;
        local_aio.uio_rw = UIO_WRITE;
        local_aio.uio_offset = base_offset;
        local_aio.uio_resid = sizeof(struct ufs_extattr_header);
        UIO_SETUP_SYSSPACE(&local_aio);

        /*
         * Don't need to get a lock on the backing file if the setattr is
         * being applied to the backing file, as the lock is already held.
         */
        if (attribute->uele_backing_vnode != vp)
                vn_lock(attribute->uele_backing_vnode, 
                    LK_EXCLUSIVE | LK_RETRY);

        ioflag = IO_NODELOCKED;
        if (ufs_extattr_sync)
                ioflag |= IO_SYNC;
        error = VOP_WRITE(attribute->uele_backing_vnode, &local_aio, ioflag,
            ump->um_extattr.uepm_ucred);
        if (error)
                goto vopunlock_exit;

        if (local_aio.uio_resid != 0) {
                error = ENXIO;
                goto vopunlock_exit;
        }

        /*
         * Write out user data.
         * XXX NOT ATOMIC WITH RESPECT TO THE HEADER.
         */
        uio->uio_offset = base_offset + sizeof(struct ufs_extattr_header);

        ioflag = IO_NODELOCKED;
        if (ufs_extattr_sync)
                ioflag |= IO_SYNC;
        error = VOP_WRITE(attribute->uele_backing_vnode, uio, ioflag,
            ump->um_extattr.uepm_ucred);

 vopunlock_exit:
        uio->uio_offset = 0;

        if (attribute->uele_backing_vnode != vp)
                VOP_UNLOCK(attribute->uele_backing_vnode);

        return error;
}

/*
 * Real work associated with removing an extended attribute from a vnode.
 * Assumes the attribute lock has already been grabbed.
 */
static int
ufs_extattr_rm(struct vnode *vp, int attrnamespace, const char *name,
    kauth_cred_t cred, struct lwp *l)
{
        struct ufs_extattr_list_entry *attribute;
        struct ufs_extattr_header ueh;
        struct mount *mp = vp->v_mount;
        struct ufsmount *ump = VFSTOUFS(mp);
        struct iovec local_aiov;
        struct uio local_aio;
        off_t base_offset;
        int error = 0, ioflag;

        if (vp->v_mount->mnt_flag & MNT_RDONLY)  
                return EROFS;

        if (!ufs_extattr_valid_attrname(attrnamespace, name))
                return EINVAL;

        error = extattr_check_cred(vp, attrnamespace, cred, VWRITE);
        if (error)
                return error;

        attribute = ufs_extattr_find_attr(ump, attrnamespace, name);
        if (!attribute)
                return ENODATA;

        /*
         * Don't need to get a lock on the backing file if the getattr is
         * being applied to the backing file, as the lock is already held.
         */
        if (attribute->uele_backing_vnode != vp)
                vn_lock(attribute->uele_backing_vnode, LK_EXCLUSIVE | LK_RETRY);

        error = ufs_extattr_get_header(vp, attribute, &ueh, &base_offset);
        if (error)
                goto vopunlock_exit;

        /* Flag it as not in use. */
        ueh.ueh_flags = 0;                /* No need to byte swap 0 */
        ueh.ueh_len = 0;                /* ...ditto... */

        local_aiov.iov_base = &ueh;
        local_aiov.iov_len = sizeof(struct ufs_extattr_header);
        local_aio.uio_iov = &local_aiov;
        local_aio.uio_iovcnt = 1;
        local_aio.uio_rw = UIO_WRITE;
        local_aio.uio_offset = base_offset;
        local_aio.uio_resid = sizeof(struct ufs_extattr_header);
        UIO_SETUP_SYSSPACE(&local_aio);

        ioflag = IO_NODELOCKED;
        if (ufs_extattr_sync)
                ioflag |= IO_SYNC;
        error = VOP_WRITE(attribute->uele_backing_vnode, &local_aio, ioflag,
            ump->um_extattr.uepm_ucred);
        if (error)
                goto vopunlock_exit;

        if (local_aio.uio_resid != 0)
                error = ENXIO;

 vopunlock_exit:
        VOP_UNLOCK(attribute->uele_backing_vnode);

        return error;
}

/*
 * Called by UFS when an inode is no longer active and should have its
 * attributes stripped.
 */
void
ufs_extattr_vnode_inactive(struct vnode *vp, struct lwp *l)
{
        struct ufs_extattr_list_entry *uele;
        struct mount *mp = vp->v_mount;
        struct ufsmount *ump = VFSTOUFS(mp);

        /*
         * In that case, we cannot lock. We should not have any active vnodes
         * on the fs if this is not yet initialized but is going to be, so
         * this can go unlocked.
         */
        if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED))
                return;

        if (!(ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED))
                return;

        ufs_extattr_uepm_lock(ump);

        LIST_FOREACH(uele, &ump->um_extattr.uepm_list, uele_entries)
                ufs_extattr_rm(vp, uele->uele_attrnamespace,
                    uele->uele_attrname, lwp0.l_cred, l);

        ufs_extattr_uepm_unlock(ump);
}

void
ufs_extattr_init(void)
{

}

void
ufs_extattr_done(void)
{

}




























































































































    2 











    2 
    2 



    1 







































































    2 
    1 















































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
/*        $NetBSD: drm_cdevsw.c,v 1.30 2022/07/06 01:12:45 riastradh Exp $        */

/*-
 * Copyright (c) 2013 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: drm_cdevsw.c,v 1.30 2022/07/06 01:12:45 riastradh Exp $");

#include <sys/param.h>
#include <sys/types.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/ioccom.h>
#include <sys/kauth.h>
#ifndef _MODULE
/* XXX Mega-kludge because modules are broken.  */
#include <sys/once.h>
#endif
#include <sys/pmf.h>
#include <sys/poll.h>
#ifndef _MODULE
#include <sys/reboot.h>                /* XXX drm_init kludge */
#endif
#include <sys/select.h>

#include <uvm/uvm_extern.h>

#include <linux/err.h>

#include <linux/pm.h>

#include <drm/drm_agpsupport.h>
#include <drm/drm_device.h>
#include <drm/drm_drv.h>
#include <drm/drm_file.h>
#include <drm/drm_irq.h>
#include <drm/drm_legacy.h>

#include "../dist/drm/drm_internal.h"
#include "../dist/drm/drm_legacy.h"

static dev_type_open(drm_open);

static int        drm_close(struct file *);
static int        drm_read(struct file *, off_t *, struct uio *, kauth_cred_t,
                    int);
static int        drm_dequeue_event(struct drm_file *, size_t,
                    struct drm_pending_event **, int);
static int        drm_ioctl_shim(struct file *, unsigned long, void *);
static int        drm_poll(struct file *, int);
static int        drm_kqfilter(struct file *, struct knote *);
static int        drm_stat(struct file *, struct stat *);
static int        drm_fop_mmap(struct file *, off_t *, size_t, int, int *, int *,
                             struct uvm_object **, int *);
static void        drm_requeue_event(struct drm_file *, struct drm_pending_event *);

static paddr_t        drm_legacy_mmap(dev_t, off_t, int);

const struct cdevsw drm_cdevsw = {
        .d_open = drm_open,
        .d_close = noclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = noioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = drm_legacy_mmap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        /* XXX was D_TTY | D_NEGOFFSAFE */
        /* XXX Add D_MPSAFE some day... */
        .d_flag = D_NEGOFFSAFE,
};

const struct fileops drm_fileops = {
        .fo_name = "drm",
        .fo_read = drm_read,
        .fo_write = fbadop_write,
        .fo_ioctl = drm_ioctl_shim,
        .fo_fcntl = fnullop_fcntl,
        .fo_poll = drm_poll,
        .fo_stat = drm_stat,
        .fo_close = drm_close,
        .fo_kqfilter = drm_kqfilter,
        .fo_restart = fnullop_restart,
        .fo_mmap = drm_fop_mmap,
};

static int
drm_open(dev_t d, int flags, int fmt, struct lwp *l)
{
        struct drm_minor *dminor;
        struct drm_device *dev;
        bool lastclose;
        int fd;
        struct file *fp;
        struct drm_file *priv;
        int need_setup = 0;
        int error;

        error = drm_guarantee_initialized();
        if (error)
                goto fail0;

        /* Synchronize with drm_file.c, drm_open and drm_open_helper.  */

        if (flags & O_EXCL) {
                error = EBUSY;
                goto fail0;
        }

        dminor = drm_minor_acquire(minor(d));
        if (IS_ERR(dminor)) {
                /* XXX errno Linux->NetBSD */
                error = -PTR_ERR(dminor);
                goto fail0;
        }
        dev = dminor->dev;
        if (dev->switch_power_state != DRM_SWITCH_POWER_ON) {
                error = EINVAL;
                goto fail1;
        }

        mutex_lock(&drm_global_mutex);
        if (dev->open_count == INT_MAX) {
                mutex_unlock(&drm_global_mutex);
                error = EBUSY;
                goto fail1;
        }
        if (dev->open_count++ == 0)
                need_setup = 1;
        mutex_unlock(&drm_global_mutex);

        error = fd_allocfile(&fp, &fd);
        if (error)
                goto fail2;

        priv = drm_file_alloc(dminor);
        if (IS_ERR(priv)) {
                /* XXX errno Linux->NetBSD */
                error = -PTR_ERR(priv);
                goto fail3;
        }

        if (drm_is_primary_client(priv)) {
                /* XXX errno Linux->NetBSD */
                error = -drm_master_open(priv);
                if (error)
                        goto fail4;
        }
        priv->filp = fp;

        mutex_lock(&dev->filelist_mutex);
        list_add(&priv->lhead, &dev->filelist);
        mutex_unlock(&dev->filelist_mutex);
        /* XXX Alpha hose?  */

        if (need_setup) {
                /* XXX errno Linux->NetBSD */
                error = -drm_legacy_setup(dev);
                if (error)
                        goto fail5;
        }

        error = fd_clone(fp, fd, flags, &drm_fileops, priv);
        KASSERT(error == EMOVEFD); /* XXX */

        /* Success!  (But error has to be EMOVEFD, not 0.)  */
        return error;

fail5:        mutex_lock(&dev->filelist_mutex);
        list_del(&priv->lhead);
        mutex_unlock(&dev->filelist_mutex);
fail4:        drm_file_free(priv);
fail3:        fd_abort(curproc, fp, fd);
fail2:        mutex_lock(&drm_global_mutex);
        KASSERT(0 < dev->open_count);
        --dev->open_count;
        lastclose = (dev->open_count == 0);
        mutex_unlock(&drm_global_mutex);
        if (lastclose)
                drm_lastclose(dev);
fail1:        drm_minor_release(dminor);
fail0:        KASSERT(error);
        if (error == ERESTARTSYS)
                error = ERESTART;
        return error;
}

static int
drm_close(struct file *fp)
{
        struct drm_file *const priv = fp->f_data;
        struct drm_minor *const dminor = priv->minor;
        struct drm_device *const dev = dminor->dev;
        bool lastclose;

        /* Synchronize with drm_file.c, drm_release.  */

        mutex_lock(&dev->filelist_mutex);
        list_del(&priv->lhead);
        mutex_unlock(&dev->filelist_mutex);

        drm_file_free(priv);

        mutex_lock(&drm_global_mutex);
        KASSERT(0 < dev->open_count);
        --dev->open_count;
        lastclose = (dev->open_count == 0);
        mutex_unlock(&drm_global_mutex);

        if (lastclose)
                drm_lastclose(dev);

        drm_minor_release(dminor);

        return 0;
}

static int
drm_read(struct file *fp, off_t *off, struct uio *uio, kauth_cred_t cred,
    int flags)
{
        struct drm_file *const file = fp->f_data;
        struct drm_device *const dev = file->minor->dev;
        struct drm_pending_event *event;
        bool first;
        int ret = 0;

        /*
         * Only one event reader at a time, so that if copyout faults
         * after dequeueing one event and we have to put the event
         * back, another reader won't see out-of-order events.
         */
        spin_lock(&dev->event_lock);
        DRM_SPIN_WAIT_NOINTR_UNTIL(ret, &file->event_read_wq, &dev->event_lock,
            file->event_read_lock == NULL);
        if (ret) {
                spin_unlock(&dev->event_lock);
                /* XXX errno Linux->NetBSD */
                return -ret;
        }
        file->event_read_lock = curlwp;
        spin_unlock(&dev->event_lock);

        for (first = true; ; first = false) {
                int f = 0;
                off_t offset;
                size_t resid;

                if (!first || ISSET(fp->f_flag, FNONBLOCK))
                        f |= FNONBLOCK;

                ret = drm_dequeue_event(file, uio->uio_resid, &event, f);
                if (ret) {
                        if ((ret == -EWOULDBLOCK) && !first)
                                ret = 0;
                        break;
                }
                if (event == NULL)
                        break;

                offset = uio->uio_offset;
                resid = uio->uio_resid;
                /* XXX errno NetBSD->Linux */
                ret = -uiomove(event->event, event->event->length, uio);
                if (ret) {
                        /*
                         * Faulted on copyout.  Put the event back and
                         * stop here.
                         */
                        if (!first) {
                                /*
                                 * Already transferred some events.
                                 * Rather than back them all out, just
                                 * say we succeeded at returning those.
                                 */
                                ret = 0;
                        }
                        uio->uio_offset = offset;
                        uio->uio_resid = resid;
                        drm_requeue_event(file, event);
                        break;
                }
                kfree(event);
        }

        /* Release the event read lock.  */
        spin_lock(&dev->event_lock);
        KASSERT(file->event_read_lock == curlwp);
        file->event_read_lock = NULL;
        DRM_SPIN_WAKEUP_ONE(&file->event_read_wq, &dev->event_lock);
        spin_unlock(&dev->event_lock);

        /* XXX errno Linux->NetBSD */

        /* Success!  */
        if (ret == ERESTARTSYS)
                ret = ERESTART;
        return -ret;
}

static int
drm_dequeue_event(struct drm_file *file, size_t max_length,
    struct drm_pending_event **eventp, int flags)
{
        struct drm_device *const dev = file->minor->dev;
        struct drm_pending_event *event = NULL;
        unsigned long irqflags;
        int ret = 0;

        spin_lock_irqsave(&dev->event_lock, irqflags);

        if (ISSET(flags, FNONBLOCK)) {
                if (list_empty(&file->event_list))
                        ret = -EWOULDBLOCK;
        } else {
                DRM_SPIN_WAIT_UNTIL(ret, &file->event_wait, &dev->event_lock,
                    !list_empty(&file->event_list));
        }
        if (ret)
                goto out;

        event = list_first_entry(&file->event_list, struct drm_pending_event,
            link);
        if (event->event->length > max_length) {
                /* Event is too large, can't return it.  */
                event = NULL;
                ret = 0;
                goto out;
        }

        file->event_space += event->event->length;
        list_del(&event->link);

out:        spin_unlock_irqrestore(&dev->event_lock, irqflags);
        *eventp = event;
        return ret;
}

static void
drm_requeue_event(struct drm_file *file, struct drm_pending_event *event)
{
        struct drm_device *const dev = file->minor->dev;
        unsigned long irqflags;

        spin_lock_irqsave(&dev->event_lock, irqflags);
        list_add(&event->link, &file->event_list);
        KASSERT(file->event_space >= event->event->length);
        file->event_space -= event->event->length;
        spin_unlock_irqrestore(&dev->event_lock, irqflags);
}

static int
drm_ioctl_shim(struct file *fp, unsigned long cmd, void *data)
{
        struct drm_file *file = fp->f_data;
        struct drm_driver *driver = file->minor->dev->driver;
        int error;

        if (driver->ioctl_override)
                error = driver->ioctl_override(fp, cmd, data);
        else
                error = drm_ioctl(fp, cmd, data);
        if (error == ERESTARTSYS)
                error = ERESTART;

        return error;
}

static int
drm_poll(struct file *fp, int events)
{
        struct drm_file *const file = fp->f_data;
        struct drm_device *const dev = file->minor->dev;
        int revents = 0;
        unsigned long irqflags;

        if (!ISSET(events, (POLLIN | POLLRDNORM)))
                return 0;

        spin_lock_irqsave(&dev->event_lock, irqflags);
        if (list_empty(&file->event_list))
                selrecord(curlwp, &file->event_selq);
        else
                revents |= (events & (POLLIN | POLLRDNORM));
        spin_unlock_irqrestore(&dev->event_lock, irqflags);

        return revents;
}

static void        filt_drm_detach(struct knote *);
static int        filt_drm_event(struct knote *, long);

static const struct filterops drm_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_drm_detach,
        .f_event = filt_drm_event,
};

static int
drm_kqfilter(struct file *fp, struct knote *kn)
{
        struct drm_file *const file = fp->f_data;
        struct drm_device *const dev = file->minor->dev;
        unsigned long irqflags;

        switch (kn->kn_filter) {
        case EVFILT_READ:
                kn->kn_fop = &drm_filtops;
                kn->kn_hook = file;
                spin_lock_irqsave(&dev->event_lock, irqflags);
                selrecord_knote(&file->event_selq, kn);
                spin_unlock_irqrestore(&dev->event_lock, irqflags);
                return 0;
        case EVFILT_WRITE:
        default:
                return EINVAL;
        }
}

static void
filt_drm_detach(struct knote *kn)
{
        struct drm_file *const file = kn->kn_hook;
        struct drm_device *const dev = file->minor->dev;
        unsigned long irqflags;

        spin_lock_irqsave(&dev->event_lock, irqflags);
        selremove_knote(&file->event_selq, kn);
        spin_unlock_irqrestore(&dev->event_lock, irqflags);
}

static int
filt_drm_event(struct knote *kn, long hint)
{
        struct drm_file *const file = kn->kn_hook;
        struct drm_device *const dev = file->minor->dev;
        unsigned long irqflags;
        int ret;

        if (hint == NOTE_SUBMIT)
                KASSERT(spin_is_locked(&dev->event_lock));
        else
                spin_lock_irqsave(&dev->event_lock, irqflags);
        if (list_empty(&file->event_list)) {
                ret = 0;
        } else {
                struct drm_pending_event *const event =
                    list_first_entry(&file->event_list,
                        struct drm_pending_event, link);
                kn->kn_data = event->event->length;
                ret = 1;
        }
        if (hint == NOTE_SUBMIT)
                KASSERT(spin_is_locked(&dev->event_lock));
        else
                spin_unlock_irqrestore(&dev->event_lock, irqflags);

        return ret;
}

static int
drm_stat(struct file *fp, struct stat *st)
{
        struct drm_file *const file = fp->f_data;
        struct drm_minor *const dminor = file->minor;
        const dev_t devno = makedev(cdevsw_lookup_major(&drm_cdevsw),
            64*dminor->type + dminor->index);

        (void)memset(st, 0, sizeof(*st));

        st->st_dev = devno;
        st->st_ino = 0;                /* XXX (dev,ino) uniqueness bleh */
        st->st_uid = kauth_cred_geteuid(fp->f_cred);
        st->st_gid = kauth_cred_getegid(fp->f_cred);
        st->st_mode = S_IFCHR;        /* XXX what? */
        st->st_rdev = devno;
        /* XXX what else? */

        return 0;
}

static int
drm_fop_mmap(struct file *fp, off_t *offp, size_t len, int prot, int *flagsp,
             int *advicep, struct uvm_object **uobjp, int *maxprotp)
{
        struct drm_file *const file = fp->f_data;
        struct drm_device *const dev = file->minor->dev;
        int error;

        KASSERT(fp == file->filp);
        KASSERT(len > 0);

        /* XXX errno Linux->NetBSD */
        error = -(*dev->driver->mmap_object)(dev, *offp, len, prot, uobjp,
            offp, file->filp);
        *maxprotp = prot;
        *advicep = UVM_ADV_RANDOM;
        if (error == ERESTARTSYS)
                error = ERESTART;
        return error;
}

static paddr_t
drm_legacy_mmap(dev_t d, off_t offset, int prot)
{
        struct drm_minor *dminor;
        paddr_t paddr;

        dminor = drm_minor_acquire(minor(d));
        if (IS_ERR(dminor))
                return (paddr_t)-1;

        paddr = drm_legacy_mmap_paddr(dminor->dev, offset, prot);

        drm_minor_release(dminor);
        return paddr;
}



































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
/*        $NetBSD: bpf.h,v 1.78 2022/06/20 08:20:09 yamaguchi Exp $        */

/*
 * Copyright (c) 1990, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from the Stanford/CMU enet packet filter,
 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
 * Berkeley Laboratory.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)bpf.h        8.2 (Berkeley) 1/9/95
 * @(#) Header: bpf.h,v 1.36 97/06/12 14:29:53 leres Exp  (LBL)
 */

#ifndef _NET_BPF_H_
#define _NET_BPF_H_

#include <sys/ioccom.h>
#include <sys/time.h>

/* BSD style release date */
#define BPF_RELEASE 199606

/* Date when COP instructions and external memory have been released. */
#define BPF_COP_EXTMEM_RELEASE 20140624

__BEGIN_DECLS

typedef        int bpf_int32;
typedef        u_int bpf_u_int32;

/*
 * Alignment macros.  BPF_WORDALIGN rounds up to the next
 * even multiple of BPF_ALIGNMENT.
 */
#define BPF_ALIGNMENT sizeof(long)
#define BPF_ALIGNMENT32 sizeof(int)

#define BPF_WORDALIGN(x) (((x)+(BPF_ALIGNMENT-1))&~(BPF_ALIGNMENT-1))
#define BPF_WORDALIGN32(x) (((x)+(BPF_ALIGNMENT32-1))&~(BPF_ALIGNMENT32-1))

#define BPF_MAXINSNS 512
#define BPF_DFLTBUFSIZE (1024*1024)        /* default static upper limit */
#define BPF_MAXBUFSIZE (1024*1024*16)        /* hard limit on sysctl'able value */
#define BPF_MINBUFSIZE 32

/*
 *  Structure for BIOCSETF.
 */
struct bpf_program {
        u_int bf_len;
        struct bpf_insn *bf_insns;
};

/*
 * Struct returned by BIOCGSTATS and net.bpf.stats sysctl.
 */
struct bpf_stat {
        uint64_t bs_recv;        /* number of packets received */
        uint64_t bs_drop;        /* number of packets dropped */
        uint64_t bs_capt;        /* number of packets captured */
        uint64_t bs_padding[13];
};

/*
 * Struct returned by BIOCGSTATSOLD.
 */
struct bpf_stat_old {
        u_int bs_recv;                /* number of packets received */
        u_int bs_drop;                /* number of packets dropped */
};

/*
 * Struct return by BIOCVERSION.  This represents the version number of
 * the filter language described by the instruction encodings below.
 * bpf understands a program iff kernel_major == filter_major &&
 * kernel_minor >= filter_minor, that is, if the value returned by the
 * running kernel has the same major number and a minor number equal
 * equal to or less than the filter being downloaded.  Otherwise, the
 * results are undefined, meaning an error may be returned or packets
 * may be accepted haphazardly.
 * It has nothing to do with the source code version.
 */
struct bpf_version {
        u_short bv_major;
        u_short bv_minor;
};
/* Current version number of filter architecture. */
#define BPF_MAJOR_VERSION 1
#define BPF_MINOR_VERSION 1

/*
 * BPF ioctls
 *
 * The first set is for compatibility with Sun's pcc style
 * header files.  If your using gcc, we assume that you
 * have run fixincludes so the latter set should work.
 */
#define BIOCGBLEN         _IOR('B', 102, u_int)
#define BIOCSBLEN        _IOWR('B', 102, u_int)
#define BIOCSETF         _IOW('B', 103, struct bpf_program)
#define BIOCFLUSH          _IO('B', 104)
#define BIOCPROMISC          _IO('B', 105)
#define BIOCGDLT         _IOR('B', 106, u_int)
#define BIOCGETIF         _IOR('B', 107, struct ifreq)
#define BIOCSETIF         _IOW('B', 108, struct ifreq)
#ifdef COMPAT_50
#include <compat/sys/time.h>
#define BIOCSORTIMEOUT         _IOW('B', 109, struct timeval50)
#define BIOCGORTIMEOUT         _IOR('B', 110, struct timeval50)
#endif
#define BIOCGSTATS         _IOR('B', 111, struct bpf_stat)
#define BIOCGSTATSOLD         _IOR('B', 111, struct bpf_stat_old)
#define BIOCIMMEDIATE         _IOW('B', 112, u_int)
#define BIOCVERSION         _IOR('B', 113, struct bpf_version)
#define BIOCSTCPF         _IOW('B', 114, struct bpf_program)
#define BIOCSUDPF         _IOW('B', 115, struct bpf_program)
#define BIOCGHDRCMPLT         _IOR('B', 116, u_int)
#define BIOCSHDRCMPLT         _IOW('B', 117, u_int)
#define BIOCSDLT         _IOW('B', 118, u_int)
#define BIOCGDLTLIST        _IOWR('B', 119, struct bpf_dltlist)
#define BIOCGDIRECTION         _IOR('B', 120, u_int)
#define BIOCSDIRECTION         _IOW('B', 121, u_int)
#define BIOCSRTIMEOUT         _IOW('B', 122, struct timeval)
#define BIOCGRTIMEOUT         _IOR('B', 123, struct timeval)
#define BIOCGFEEDBACK         _IOR('B', 124, u_int)
#define BIOCSFEEDBACK         _IOW('B', 125, u_int)
#define BIOCFEEDBACK     BIOCSFEEDBACK                /* FreeBSD name */
#define BIOCLOCK          _IO('B', 126)
#define BIOCSETWF         _IOW('B', 127, struct bpf_program)

/* Obsolete */
#define        BIOCGSEESENT        BIOCGDIRECTION
#define        BIOCSSEESENT        BIOCSDIRECTION

/*
 * Packet directions.
 * BPF_D_IN = 0, BPF_D_INOUT =1 for backward compatibility of BIOC[GS]SEESENT.
 */
#define        BPF_D_IN        0        /* See incoming packets */
#define        BPF_D_INOUT        1        /* See incoming and outgoing packets */
#define        BPF_D_OUT        2        /* See outgoing packets */

/*
 * Structure prepended to each packet. This is "wire" format, so we
 * cannot change it unfortunately to 64 bit times on 32 bit systems [yet].
 */
struct bpf_timeval {
        long tv_sec;
        long tv_usec;
};

struct bpf_timeval32 {
        int32_t tv_sec;
        int32_t tv_usec;
};

struct bpf_hdr {
        struct bpf_timeval bh_tstamp;        /* time stamp */
        uint32_t        bh_caplen;        /* length of captured portion */
        uint32_t        bh_datalen;        /* original length of packet */
        uint16_t        bh_hdrlen;        /* length of bpf header (this struct
                                           plus alignment padding) */
};

struct bpf_hdr32 {
        struct bpf_timeval32 bh_tstamp;        /* time stamp */
        uint32_t        bh_caplen;        /* length of captured portion */
        uint32_t        bh_datalen;        /* original length of packet */
        uint16_t        bh_hdrlen;        /* length of bpf header (this struct
                                           plus alignment padding) */
};
/*
 * Because the structure above is not a multiple of 4 bytes, some compilers
 * will insist on inserting padding; hence, sizeof(struct bpf_hdr) won't work.
 * Only the kernel needs to know about it; applications use bh_hdrlen.
 * XXX To save a few bytes on 32-bit machines, we avoid end-of-struct
 * XXX padding by using the size of the header data elements.  This is
 * XXX fail-safe: on new machines, we just use the 'safe' sizeof.
 */
#ifdef _KERNEL
#if defined(__arm32__) || defined(__i386__) || defined(__m68k__) || \
    defined(__mips__) || defined(__ns32k__) || defined(__vax__) || \
    defined(__sh__) || (defined(__sparc__) && !defined(__sparc64__))
#define SIZEOF_BPF_HDR 18
#define SIZEOF_BPF_HDR32 18
#else
#define SIZEOF_BPF_HDR sizeof(struct bpf_hdr)
#define SIZEOF_BPF_HDR32 sizeof(struct bpf_hdr32)
#endif
#endif

/* Pull in data-link level type codes. */
#include <net/dlt.h>

/*
 * The instruction encodings.
 */
/* instruction classes */
#define BPF_CLASS(code) ((code) & 0x07)
#define                BPF_LD                0x00
#define                BPF_LDX                0x01
#define                BPF_ST                0x02
#define                BPF_STX                0x03
#define                BPF_ALU                0x04
#define                BPF_JMP                0x05
#define                BPF_RET                0x06
#define                BPF_MISC        0x07

/* ld/ldx fields */
#define BPF_SIZE(code)        ((code) & 0x18)
#define                BPF_W                0x00
#define                BPF_H                0x08
#define                BPF_B                0x10
/*                                0x18        reserved; used by BSD/OS */
#define BPF_MODE(code)        ((code) & 0xe0)
#define                BPF_IMM         0x00
#define                BPF_ABS                0x20
#define                BPF_IND                0x40
#define                BPF_MEM                0x60
#define                BPF_LEN                0x80
#define                BPF_MSH                0xa0
/*                                0xc0        reserved; used by BSD/OS */
/*                                0xe0        reserved; used by BSD/OS */

/* alu/jmp fields */
#define BPF_OP(code)        ((code) & 0xf0)
#define                BPF_ADD                0x00
#define                BPF_SUB                0x10
#define                BPF_MUL                0x20
#define                BPF_DIV                0x30
#define                BPF_OR                0x40
#define                BPF_AND                0x50
#define                BPF_LSH                0x60
#define                BPF_RSH                0x70
#define                BPF_NEG                0x80
#define                BPF_MOD                0x90
#define                BPF_XOR                0xa0
/*                                0xb0        reserved */
/*                                0xc0        reserved */
/*                                0xd0        reserved */
/*                                0xe0        reserved */
/*                                0xf0        reserved */
#define                BPF_JA                0x00
#define                BPF_JEQ                0x10
#define                BPF_JGT                0x20
#define                BPF_JGE                0x30
#define                BPF_JSET        0x40
/*                                0x50        reserved; used by BSD/OS */
/*                                0x60        reserved */
/*                                0x70        reserved */
/*                                0x80        reserved */
/*                                0x90        reserved */
/*                                0xa0        reserved */
/*                                0xb0        reserved */
/*                                0xc0        reserved */
/*                                0xd0        reserved */
/*                                0xe0        reserved */
/*                                0xf0        reserved */
#define BPF_SRC(code)        ((code) & 0x08)
#define                BPF_K                0x00
#define                BPF_X                0x08

/* ret - BPF_K and BPF_X also apply */
#define BPF_RVAL(code)        ((code) & 0x18)
#define                BPF_A                0x10
/*                                0x18        reserved */

/* misc */
#define BPF_MISCOP(code) ((code) & 0xf8)
#define                BPF_TAX                0x00
/*                                0x10        reserved */
/*                                0x18        reserved */
#define                BPF_COP                0x20
/*                                0x28        reserved */
/*                                0x30        reserved */
/*                                0x38        reserved */
#define                BPF_COPX        0x40        /* XXX: also used by BSD/OS */
/*                                0x48        reserved */
/*                                0x50        reserved */
/*                                0x58        reserved */
/*                                0x60        reserved */
/*                                0x68        reserved */
/*                                0x70        reserved */
/*                                0x78        reserved */
#define                BPF_TXA                0x80
/*                                0x88        reserved */
/*                                0x90        reserved */
/*                                0x98        reserved */
/*                                0xa0        reserved */
/*                                0xa8        reserved */
/*                                0xb0        reserved */
/*                                0xb8        reserved */
/*                                0xc0        reserved; used by BSD/OS */
/*                                0xc8        reserved */
/*                                0xd0        reserved */
/*                                0xd8        reserved */
/*                                0xe0        reserved */
/*                                0xe8        reserved */
/*                                0xf0        reserved */
/*                                0xf8        reserved */

/*
 * The instruction data structure.
 */
struct bpf_insn {
        uint16_t  code;
        u_char           jt;
        u_char           jf;
        uint32_t  k;
};

/*
 * Auxiliary data, for use when interpreting a filter intended for the
 * Linux kernel when the kernel rejects the filter (requiring us to
 * run it in userland).  It contains VLAN tag information.
 */
struct bpf_aux_data {
        u_short vlan_tag_present;
        u_short vlan_tag;
};

/*
 * Macros for insn array initializers.
 */
#define BPF_STMT(code, k) { (uint16_t)(code), 0, 0, k }
#define BPF_JUMP(code, k, jt, jf) { (uint16_t)(code), jt, jf, k }

/*
 * Number of scratch memory words (for BPF_LD|BPF_MEM and BPF_ST).
 */
#define        BPF_MEMWORDS                16

/*
 * bpf_memword_init_t: bits indicate which words in the external memory
 * store will be initialised by the caller before BPF program execution.
 */
typedef uint32_t bpf_memword_init_t;
#define        BPF_MEMWORD_INIT(k)        (UINT32_C(1) << (k))

/* Note: two most significant bits are reserved by bpfjit. */
__CTASSERT(BPF_MEMWORDS + 2 <= sizeof(bpf_memword_init_t) * NBBY);

#ifdef _KERNEL
/*
 * Max number of external memory words (for BPF_LD|BPF_MEM and BPF_ST).
 */
#define        BPF_MAX_MEMWORDS        30

__CTASSERT(BPF_MAX_MEMWORDS >= BPF_MEMWORDS);
__CTASSERT(BPF_MAX_MEMWORDS + 2 <= sizeof(bpf_memword_init_t) * NBBY);
#endif

/*
 * Structure to retrieve available DLTs for the interface.
 */
struct bpf_dltlist {
        u_int        bfl_len;        /* number of bfd_list array */
        u_int        *bfl_list;        /* array of DLTs */
};

struct bpf_ctx;
typedef struct bpf_ctx bpf_ctx_t;

typedef struct bpf_args {
        const uint8_t *        pkt;
        size_t                wirelen;
        size_t                buflen;
        /*
         * The following arguments are used only by some kernel
         * subsystems.
         * They aren't required for classical bpf filter programs.
         * For such programs, bpfjit generated code doesn't read
         * those arguments at all. Note however that bpf interpreter
         * always needs a pointer to memstore.
         */
        uint32_t *        mem; /* pointer to external memory store */
        void *                arg; /* auxiliary argument for a copfunc */
} bpf_args_t;

#if defined(_KERNEL) || defined(__BPF_PRIVATE)

typedef uint32_t (*bpf_copfunc_t)(const bpf_ctx_t *, bpf_args_t *, uint32_t);

struct bpf_ctx {
        /*
         * BPF coprocessor functions and the number of them.
         */
        const bpf_copfunc_t *        copfuncs;
        size_t                        nfuncs;

        /*
         * The number of memory words in the external memory store.
         * There may be up to BPF_MAX_MEMWORDS words; if zero is set,
         * then the internal memory store is used which has a fixed
         * number of words (BPF_MEMWORDS).
         */
        size_t                        extwords;

        /*
         * The bitmask indicating which words in the external memstore
         * will be initialised by the caller.
         */
        bpf_memword_init_t        preinited;
};
#endif

#ifdef _KERNEL
#include <net/bpfjit.h>
#include <net/if.h>

struct bpf_if;

struct bpf_ops {
        void (*bpf_attach)(struct ifnet *, u_int, u_int, struct bpf_if **);
        void (*bpf_detach)(struct ifnet *);
        void (*bpf_change_type)(struct ifnet *, u_int, u_int);

        void (*bpf_mtap)(struct bpf_if *, struct mbuf *, u_int);
        void (*bpf_mtap2)(struct bpf_if *, void *, u_int, struct mbuf *,
            u_int);
        void (*bpf_mtap_af)(struct bpf_if *, uint32_t, struct mbuf *, u_int);
        void (*bpf_mtap_sl_in)(struct bpf_if *, u_char *, struct mbuf **);
        void (*bpf_mtap_sl_out)(struct bpf_if *, u_char *, struct mbuf *);

        void (*bpf_mtap_softint_init)(struct ifnet *);
        void (*bpf_mtap_softint)(struct ifnet *, struct mbuf *);

        int (*bpf_register_track_event)(struct bpf_if **,
            void (*)(struct bpf_if *, struct ifnet *, int, int));
        int (*bpf_deregister_track_event)(struct bpf_if **,
            void (*)(struct bpf_if *, struct ifnet *, int, int));
};

extern struct bpf_ops *bpf_ops;

static __inline void
bpf_attach(struct ifnet *_ifp, u_int _dlt, u_int _hdrlen)
{
        bpf_ops->bpf_attach(_ifp, _dlt, _hdrlen, &_ifp->if_bpf);
}

static __inline void
bpf_attach2(struct ifnet *_ifp, u_int _dlt, u_int _hdrlen, struct bpf_if **_dp)
{
        bpf_ops->bpf_attach(_ifp, _dlt, _hdrlen, _dp);
}

static __inline void
bpf_mtap(struct ifnet *_ifp, struct mbuf *_m, u_int _direction)
{
        if (_ifp->if_bpf) {
                if (_ifp->if_bpf_mtap) {
                        _ifp->if_bpf_mtap(_ifp->if_bpf, _m, _direction);
                } else {
                        bpf_ops->bpf_mtap(_ifp->if_bpf, _m, _direction);
                }
        }
}

static __inline void
bpf_mtap2(struct bpf_if *_bpf, void *_data, u_int _dlen, struct mbuf *_m,
        u_int _direction)
{
        bpf_ops->bpf_mtap2(_bpf, _data, _dlen, _m, _direction);
}

static __inline void
bpf_mtap3(struct bpf_if *_bpf, struct mbuf *_m, u_int _direction)
{
        if (_bpf)
                bpf_ops->bpf_mtap(_bpf, _m, _direction);
}

static __inline void
bpf_mtap_af(struct ifnet *_ifp, uint32_t _af, struct mbuf *_m,
    u_int _direction)
{
        if (_ifp->if_bpf)
                bpf_ops->bpf_mtap_af(_ifp->if_bpf, _af, _m, _direction);
}

static __inline void
bpf_change_type(struct ifnet *_ifp, u_int _dlt, u_int _hdrlen)
{
        bpf_ops->bpf_change_type(_ifp, _dlt, _hdrlen);
}

static __inline bool
bpf_peers_present(struct bpf_if *dp)
{
        /*
         * Our code makes sure the driver visible pointer is NULL
         * whenever there is no listener on this tap.
         */
        return dp != NULL;
}

static __inline void
bpf_detach(struct ifnet *_ifp)
{
        bpf_ops->bpf_detach(_ifp);
}

static __inline void
bpf_mtap_sl_in(struct ifnet *_ifp, u_char *_hdr, struct mbuf **_m)
{
        bpf_ops->bpf_mtap_sl_in(_ifp->if_bpf, _hdr, _m);
}

static __inline void
bpf_mtap_sl_out(struct ifnet *_ifp, u_char *_hdr, struct mbuf *_m)
{
        if (_ifp->if_bpf)
                bpf_ops->bpf_mtap_sl_out(_ifp->if_bpf, _hdr, _m);
}

static __inline void
bpf_mtap_softint_init(struct ifnet *_ifp)
{

        bpf_ops->bpf_mtap_softint_init(_ifp);
}

static __inline void
bpf_mtap_softint(struct ifnet *_ifp, struct mbuf *_m)
{

        if (_ifp->if_bpf)
                bpf_ops->bpf_mtap_softint(_ifp, _m);
}

static __inline int
bpf_register_track_event(struct bpf_if **_dp,
            void (*_fun)(struct bpf_if *, struct ifnet *, int, int))
{
        if (bpf_ops->bpf_register_track_event == NULL)
                return ENXIO;
        return bpf_ops->bpf_register_track_event(_dp, _fun);
}

static __inline int
bpf_deregister_track_event(struct bpf_if **_dp,
            void (*_fun)(struct bpf_if *, struct ifnet *, int, int))
{
        if (bpf_ops->bpf_deregister_track_event == NULL)
                return ENXIO;
        return bpf_ops->bpf_deregister_track_event(_dp, _fun);
}

void        bpf_setops(void);

void        bpf_ops_handover_enter(struct bpf_ops *);
void        bpf_ops_handover_exit(void);

void        bpfilterattach(int);

bpf_ctx_t *bpf_create(void);
void        bpf_destroy(bpf_ctx_t *);

int        bpf_set_cop(bpf_ctx_t *, const bpf_copfunc_t *, size_t);
int        bpf_set_extmem(bpf_ctx_t *, size_t, bpf_memword_init_t);
u_int        bpf_filter_ext(const bpf_ctx_t *, const struct bpf_insn *, bpf_args_t *);
int        bpf_validate_ext(const bpf_ctx_t *, const struct bpf_insn *, int);

bpfjit_func_t bpf_jit_generate(bpf_ctx_t *, void *, size_t);
void        bpf_jit_freecode(bpfjit_func_t);

#endif

int        bpf_validate(const struct bpf_insn *, int);
u_int        bpf_filter(const struct bpf_insn *, const u_char *, u_int, u_int);

u_int        bpf_filter_with_aux_data(const struct bpf_insn *, const u_char *, u_int, u_int, const struct bpf_aux_data *);

/*
 * events to be tracked by bpf_register_track_event callbacks
 */
#define        BPF_TRACK_EVENT_ATTACH        1
#define        BPF_TRACK_EVENT_DETACH        2


__END_DECLS

#endif /* !_NET_BPF_H_ */












































































































    6 

































    1 






    1 





















    2 
















































































    1 



    1 

    1 








    5 

    5 








    5 


    5 



    5 






    1 










    1 



    1 











    3 












    2 

    2 

    2 








    2 




    1 



    2 


    2 

    2 







    3 







    1 

    1 





















    2 












    2 




    2 
    2 


    2 













1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
/*        $NetBSD: process_machdep.c,v 1.49 2020/10/19 17:47:37 christos Exp $        */

/*
 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * This file may seem a bit stylized, but that so that it's easier to port.
 * Functions to be implemented here are:
 *
 * process_read_regs(proc, regs)
 *        Get the current user-visible register set from the process
 *        and copy it into the regs structure (<machine/reg.h>).
 *        The process is stopped at the time read_regs is called.
 *
 * process_write_regs(proc, regs)
 *        Update the current register set from the passed in regs
 *        structure.  Take care to avoid clobbering special CPU
 *        registers or privileged bits in the PSL.
 *        The process is stopped at the time write_regs is called.
 *
 * process_read_fpregs(proc, regs, sz)
 *        Get the current user-visible register set from the process
 *        and copy it into the regs structure (<machine/reg.h>).
 *        The process is stopped at the time read_fpregs is called.
 *
 * process_write_fpregs(proc, regs, sz)
 *        Update the current register set from the passed in regs
 *        structure.  Take care to avoid clobbering special CPU
 *        registers or privileged bits in the PSL.
 *        The process is stopped at the time write_fpregs is called.
 *
 * process_read_dbregs(proc, regs, sz)
 *        Get the current user-visible register set from the process
 *        and copy it into the regs structure (<machine/reg.h>).
 *        The process is stopped at the time read_dbregs is called.
 *
 * process_write_dbregs(proc, regs, sz)
 *        Update the current register set from the passed in regs
 *        structure.  Take care to avoid clobbering special CPU
 *        registers or privileged bits in the PSL.
 *        The process is stopped at the time write_dbregs is called.
 *
 * process_sstep(proc)
 *        Arrange for the process to trap after executing a single instruction.
 *
 * process_set_pc(proc)
 *        Set the process's program counter.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: process_machdep.c,v 1.49 2020/10/19 17:47:37 christos Exp $");

#ifdef _KERNEL_OPT
#include "opt_xen.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/ptrace.h>
#include <sys/compat_stub.h>

#include <uvm/uvm_extern.h>

#include <compat/netbsd32/netbsd32.h>
#include <machine/psl.h>
#include <machine/reg.h>
#include <machine/segments.h>
#include <x86/dbregs.h>
#include <x86/fpu.h>

struct netbsd32_process_doxmmregs_hook_t netbsd32_process_doxmmregs_hook;

static inline struct trapframe *process_frame(struct lwp *);

static inline struct trapframe *
process_frame(struct lwp *l)
{

        return l->l_md.md_regs;
}

int
process_read_regs(struct lwp *l, struct reg *regp)
{
        struct trapframe *tf = process_frame(l);
        long *regs = regp->regs;
        const bool pk32 = (l->l_proc->p_flag & PK_32) != 0;

        regs[_REG_RDI] = tf->tf_rdi;
        regs[_REG_RSI] = tf->tf_rsi;
        regs[_REG_RDX] = tf->tf_rdx;
        regs[_REG_R10] = tf->tf_r10;
        regs[_REG_R8]  = tf->tf_r8;
        regs[_REG_R9]  = tf->tf_r9;
        /* argX not touched */
        regs[_REG_RCX] = tf->tf_rcx;
        regs[_REG_R11] = tf->tf_r11;
        regs[_REG_R12] = tf->tf_r12;
        regs[_REG_R13] = tf->tf_r13;
        regs[_REG_R14] = tf->tf_r14;
        regs[_REG_R15] = tf->tf_r15;
        regs[_REG_RBP] = tf->tf_rbp;
        regs[_REG_RBX] = tf->tf_rbx;
        regs[_REG_RAX] = tf->tf_rax;
        if (pk32) {
                regs[_REG_GS] = tf->tf_gs & 0xffff;
                regs[_REG_FS] = tf->tf_fs & 0xffff;
                regs[_REG_ES] = tf->tf_es & 0xffff;
                regs[_REG_DS] = tf->tf_ds & 0xffff;
                regs[_REG_CS] = tf->tf_cs & 0xffff;
                regs[_REG_SS] = tf->tf_ss & 0xffff;
        } else {
                regs[_REG_GS] = 0;
                regs[_REG_FS] = 0;
                regs[_REG_ES] = GSEL(GUDATA_SEL, SEL_UPL);
                regs[_REG_DS] = GSEL(GUDATA_SEL, SEL_UPL);
                regs[_REG_CS] = LSEL(LUCODE_SEL, SEL_UPL);
                regs[_REG_SS] = LSEL(LUDATA_SEL, SEL_UPL);
        }
        regs[_REG_TRAPNO] = tf->tf_trapno;
        regs[_REG_ERR] = tf->tf_err;
        regs[_REG_RIP] = tf->tf_rip;
        regs[_REG_RFLAGS] = tf->tf_rflags;
        regs[_REG_RSP] = tf->tf_rsp;

        return 0;
}

int
process_read_fpregs(struct lwp *l, struct fpreg *regs, size_t *sz)
{

        process_read_fpregs_xmm(l, &regs->fxstate);

        return 0;
}

int
process_read_dbregs(struct lwp *l, struct dbreg *regs, size_t *sz)
{

        x86_dbregs_read(l, regs);

        return 0;
}

int
process_write_regs(struct lwp *l, const struct reg *regp)
{
        struct trapframe *tf = process_frame(l);
        int error;
        const long *regs = regp->regs;
        const bool pk32 = (l->l_proc->p_flag & PK_32) != 0;

        /*
         * Check for security violations. Note that struct regs is compatible
         * with the __gregs array in mcontext_t.
         */
        if (pk32) {
                MODULE_HOOK_CALL(netbsd32_reg_validate_hook, (l, regp), EINVAL,
                    error);
        } else {
                error = cpu_mcontext_validate(l, (const mcontext_t *)regs);
        }
        if (error != 0)
                return error;

        tf->tf_rdi  = regs[_REG_RDI];
        tf->tf_rsi  = regs[_REG_RSI];
        tf->tf_rdx  = regs[_REG_RDX];
        tf->tf_r10  = regs[_REG_R10];
        tf->tf_r8   = regs[_REG_R8];
        tf->tf_r9   = regs[_REG_R9];
        /* argX not touched */
        tf->tf_rcx  = regs[_REG_RCX];
        tf->tf_r11  = regs[_REG_R11];
        tf->tf_r12  = regs[_REG_R12];
        tf->tf_r13  = regs[_REG_R13];
        tf->tf_r14  = regs[_REG_R14];
        tf->tf_r15  = regs[_REG_R15];
        tf->tf_rbp  = regs[_REG_RBP];
        tf->tf_rbx  = regs[_REG_RBX];
        tf->tf_rax  = regs[_REG_RAX];
        if (pk32) {
                tf->tf_gs = regs[_REG_GS] & 0xffff;
                tf->tf_fs = regs[_REG_FS] & 0xffff;
                tf->tf_es = regs[_REG_ES] & 0xffff;
                tf->tf_ds = regs[_REG_DS] & 0xffff;
                tf->tf_cs = regs[_REG_CS] & 0xffff;
                tf->tf_ss = regs[_REG_SS] & 0xffff;
        } else {
                tf->tf_gs = 0;
                tf->tf_fs = 0;
                tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
                tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
                tf->tf_cs = LSEL(LUCODE_SEL, SEL_UPL);
                tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL);
        }
        /* trapno, err not touched */
        tf->tf_rip  = regs[_REG_RIP];
        tf->tf_rflags = regs[_REG_RFLAGS];
        tf->tf_rsp  = regs[_REG_RSP];

        return 0;
}

int
process_write_fpregs(struct lwp *l, const struct fpreg *regs, size_t sz)
{

        process_write_fpregs_xmm(l, &regs->fxstate);
        return 0;
}

int
process_write_dbregs(struct lwp *l, const struct dbreg *regs, size_t sz)
{
        int error;

        /*
         * Check for security violations.
         */
        error = x86_dbregs_validate(regs);
        if (error != 0)
                return error;

        x86_dbregs_write(l, regs);

        return 0;
}

int
process_sstep(struct lwp *l, int sstep)
{
        struct trapframe *tf = process_frame(l);

        if (sstep)
                tf->tf_rflags |= PSL_T;
        else
                tf->tf_rflags &= ~PSL_T;
        
        return 0;
}

int
process_set_pc(struct lwp *l, void *addr)
{
        struct trapframe *tf = process_frame(l);
        const bool pk32 = (l->l_proc->p_flag & PK_32) != 0;
        const uint64_t rip = (uint64_t)addr;

        if (rip >= (pk32 ? VM_MAXUSER_ADDRESS32 : VM_MAXUSER_ADDRESS))
                return EINVAL;
        tf->tf_rip = rip;

        return 0;
}

#ifdef __HAVE_PTRACE_MACHDEP
static int
process_machdep_read_xstate(struct lwp *l, struct xstate *regs)
{
        return process_read_xstate(l, regs);
}

static int
process_machdep_write_xstate(struct lwp *l, const struct xstate *regs)
{
        int error;

        /*
         * Check for security violations.
         */
        error = process_verify_xstate(regs);
        if (error != 0)
                return error;

        return process_write_xstate(l, regs);
}

int
ptrace_machdep_dorequest(
    struct lwp *l,
    struct lwp **lt,
    int req,
    void *addr,
    int data
)
{
        struct uio uio;
        struct iovec iov;
        struct vmspace *vm;
        int error;
        bool write = false;

        switch (req) {
        case PT_SETXSTATE:
                write = true;

                /* FALLTHROUGH */
        case PT_GETXSTATE:
                /* write = false done above. */
                if ((error = ptrace_update_lwp((*lt)->l_proc, lt, data)) != 0)
                        return error;
                if (!process_machdep_validfpu((*lt)->l_proc))
                        return EINVAL;
                if (__predict_false(l->l_proc->p_flag & PK_32)) {
                        struct netbsd32_iovec user_iov;
                        if ((error = copyin(addr, &user_iov, sizeof(user_iov)))
                            != 0)
                                return error;

                        iov.iov_base = NETBSD32PTR64(user_iov.iov_base);
                        iov.iov_len = user_iov.iov_len;
                } else {
                        struct iovec user_iov;
                        if ((error = copyin(addr, &user_iov, sizeof(user_iov)))
                            != 0)
                                return error;

                        iov.iov_base = user_iov.iov_base;
                        iov.iov_len = user_iov.iov_len;
                }

                error = proc_vmspace_getref(l->l_proc, &vm);
                if (error)
                        return error;
                if (iov.iov_len > sizeof(struct xstate))
                        iov.iov_len = sizeof(struct xstate);
                uio.uio_iov = &iov;
                uio.uio_iovcnt = 1;
                uio.uio_offset = 0;
                uio.uio_resid = iov.iov_len;
                uio.uio_rw = write ? UIO_WRITE : UIO_READ;
                uio.uio_vmspace = vm;
                error = process_machdep_doxstate(l, *lt, &uio);
                uvmspace_free(vm);
                return error;

        case PT_SETXMMREGS:                /* only for COMPAT_NETBSD32 */
                write = true;

                /* FALLTHROUGH */
        case PT_GETXMMREGS:                /* only for COMPAT_NETBSD32 */
                /* write = false done above. */
                if ((error = ptrace_update_lwp((*lt)->l_proc, lt, data)) != 0)
                        return error;
                MODULE_HOOK_CALL(netbsd32_process_doxmmregs_hook,
                    (l, *lt, addr, write), EINVAL, error);
                return error;
        }

#ifdef DIAGNOSTIC
        panic("ptrace_machdep: impossible");
#endif

        return 0;
}

/*
 * The following functions are used by both ptrace(2) and procfs.
 */

int
process_machdep_doxstate(struct lwp *curl, struct lwp *l, struct uio *uio)
        /* curl:                 tracer */
        /* l:                         traced */
{
        int error;
        struct xstate r;
        char *kv;
        ssize_t kl;

        memset(&r, 0, sizeof(r));
        kl = MIN(uio->uio_iov->iov_len, sizeof(r));
        kv = (char *) &r;

        kv += uio->uio_offset;
        kl -= uio->uio_offset;
        if (kl > uio->uio_resid)
                kl = uio->uio_resid;

        if (kl < 0)
                error = EINVAL;
        else
                error = process_machdep_read_xstate(l, &r);
        if (error == 0)
                error = uiomove(kv, kl, uio);
        if (error == 0 && uio->uio_rw == UIO_WRITE)
                error = process_machdep_write_xstate(l, &r);

        uio->uio_offset = 0;
        return error;
}

int
process_machdep_validfpu(struct proc *p)
{

        if (p->p_flag & PK_SYSTEM)
                return 0;

        return 1;
}
#endif /* __HAVE_PTRACE_MACHDEP */






































































































































































































































































































































































































   70 






































 2356 








 2356 




















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
/*        $NetBSD: tsc.c,v 1.57 2021/10/15 18:12:48 jmcneill Exp $        */

/*-
 * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tsc.c,v 1.57 2021/10/15 18:12:48 jmcneill Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/timetc.h>
#include <sys/lwp.h>
#include <sys/atomic.h>
#include <sys/kernel.h>
#include <sys/cpu.h>
#include <sys/xcall.h>
#include <sys/lock.h>

#include <machine/cpu_counter.h>
#include <machine/cpuvar.h>
#include <machine/cpufunc.h>
#include <machine/specialreg.h>
#include <machine/cputypes.h>

#include "tsc.h"

#define        TSC_SYNC_ROUNDS                1000
#define        ABS(a)                        ((a) >= 0 ? (a) : -(a))

static u_int        tsc_get_timecount(struct timecounter *);

static void        tsc_delay(unsigned int);

static uint64_t        tsc_dummy_cacheline __cacheline_aligned;
uint64_t        tsc_freq __read_mostly;        /* exported for sysctl */
static int64_t        tsc_drift_max = 1000;        /* max cycles */
static int64_t        tsc_drift_observed;
uint64_t        (*rdtsc)(void) = rdtsc_cpuid;
uint64_t        (*cpu_counter)(void) = cpu_counter_cpuid;
uint32_t        (*cpu_counter32)(void) = cpu_counter32_cpuid;

int tsc_user_enabled = 1;

static volatile int64_t        tsc_sync_val;
static volatile struct cpu_info        *tsc_sync_cpu;

static struct timecounter tsc_timecounter = {
        .tc_get_timecount = tsc_get_timecount,
        .tc_counter_mask = ~0U,
        .tc_name = "TSC",
        .tc_quality = 3000,
};

bool
tsc_is_invariant(void)
{
        struct cpu_info *ci;
        uint32_t descs[4];
        uint32_t family;
        bool invariant;

        if (!cpu_hascounter())
                return false;

        ci = curcpu();
        invariant = false;

        if (cpu_vendor == CPUVENDOR_INTEL) {
                /*
                 * From Intel(tm) 64 and IA-32 Architectures Software
                 * Developer's Manual Volume 3A: System Programming Guide,
                 * Part 1, 17.13 TIME_STAMP COUNTER, these are the processors
                 * where the TSC is known invariant:
                 *
                 * Pentium 4, Intel Xeon (family 0f, models 03 and higher)
                 * Core Solo and Core Duo processors (family 06, model 0e)
                 * Xeon 5100 series and Core 2 Duo (family 06, model 0f)
                 * Core 2 and Xeon (family 06, model 17)
                 * Atom (family 06, model 1c)
                 *
                 * We'll also assume that it's safe on the Pentium, and
                 * that it's safe on P-II and P-III Xeons due to the
                 * typical configuration of those systems.
                 *
                 */
                switch (CPUID_TO_BASEFAMILY(ci->ci_signature)) {
                case 0x05:
                        invariant = true;
                        break;
                case 0x06:
                        invariant = CPUID_TO_MODEL(ci->ci_signature) == 0x0e ||
                            CPUID_TO_MODEL(ci->ci_signature) == 0x0f ||
                            CPUID_TO_MODEL(ci->ci_signature) == 0x17 ||
                            CPUID_TO_MODEL(ci->ci_signature) == 0x1c;
                        break;
                case 0x0f:
                        invariant = CPUID_TO_MODEL(ci->ci_signature) >= 0x03;
                        break;
                }
        } else if (cpu_vendor == CPUVENDOR_AMD) {
                /*
                 * TSC and Power Management Events on AMD Processors
                 * Nov 2, 2005 Rich Brunner, AMD Fellow
                 * http://lkml.org/lkml/2005/11/4/173
                 *
                 * See Appendix E.4.7 CPUID Fn8000_0007_EDX Advanced Power
                 * Management Features, AMD64 Architecture Programmer's
                 * Manual Volume 3: General-Purpose and System Instructions.
                 * The check is done below.
                 */
        }

        /*
         * The best way to check whether the TSC counter is invariant or not
         * is to check CPUID 80000007.
         */
        family = CPUID_TO_BASEFAMILY(ci->ci_signature);
        if (((cpu_vendor == CPUVENDOR_INTEL) || (cpu_vendor == CPUVENDOR_AMD))
            && ((family == 0x06) || (family == 0x0f))) {
                x86_cpuid(0x80000000, descs);
                if (descs[0] >= 0x80000007) {
                        x86_cpuid(0x80000007, descs);
                        invariant = (descs[3] & CPUID_APM_ITSC) != 0;
                }
        }

        return invariant;
}

/* Setup function pointers for rdtsc() and timecounter(9). */
void
tsc_setfunc(struct cpu_info *ci)
{
        bool use_lfence, use_mfence;

        use_lfence = use_mfence = false;

        /*
         * XXX On AMD, we might be able to use lfence for some cases:
         *   a) if MSR_DE_CFG exist and the bit 1 is set.
         *   b) family == 0x0f or 0x11. Those have no MSR_DE_CFG and
         *      lfence is always serializing.
         *
         * We don't use it because the test result showed mfence was better
         * than lfence with MSR_DE_CFG.
         */
        if (cpu_vendor == CPUVENDOR_AMD)
                use_mfence = true;
        else if (cpu_vendor == CPUVENDOR_INTEL)
                use_lfence = true;

        /* LFENCE and MFENCE are applicable if SSE2 is set. */
        if ((ci->ci_feat_val[0] & CPUID_SSE2) == 0)
                use_lfence = use_mfence = false;

#define TSC_SETFUNC(fence)                                                      \
        do {                                                                      \
                rdtsc = rdtsc_##fence;                                              \
                cpu_counter = cpu_counter_##fence;                              \
                cpu_counter32 = cpu_counter32_##fence;                              \
        } while (/* CONSTCOND */ 0)

        if (use_lfence)
                TSC_SETFUNC(lfence);
        else if (use_mfence)
                TSC_SETFUNC(mfence);
        else
                TSC_SETFUNC(cpuid);

        aprint_verbose_dev(ci->ci_dev, "Use %s to serialize rdtsc\n",
            use_lfence ? "lfence" : (use_mfence ? "mfence" : "cpuid"));
}

/*
 * Initialize timecounter(9) and DELAY() function of TSC.
 *
 * This function is called after all secondary processors were brought up
 * and drift has been measured, and after any other potential delay funcs
 * have been installed (e.g. lapic_delay()).
 */
void
tsc_tc_init(void)
{
        struct cpu_info *ci;
        bool invariant;

        if (!cpu_hascounter())
                return;

        ci = curcpu();
        tsc_freq = ci->ci_data.cpu_cc_freq;
        invariant = tsc_is_invariant();
        if (!invariant) {
                aprint_debug("TSC not known invariant on this CPU\n");
                tsc_timecounter.tc_quality = -100;
        } else if (tsc_drift_observed > tsc_drift_max) {
                aprint_error("ERROR: %lld cycle TSC drift observed\n",
                    (long long)tsc_drift_observed);
                tsc_timecounter.tc_quality = -100;
                invariant = false;
        } else if (vm_guest == VM_GUEST_NO) {
                delay_func = tsc_delay;
        } else if (vm_guest == VM_GUEST_VIRTUALBOX) {
                tsc_timecounter.tc_quality = -100;
        }

        if (tsc_freq != 0) {
                tsc_timecounter.tc_frequency = tsc_freq;
                tc_init(&tsc_timecounter);
        }
}

/*
 * Record drift (in clock cycles).  Called during AP startup.
 */
void
tsc_sync_drift(int64_t drift)
{

        if (drift < 0)
                drift = -drift;
        if (drift > tsc_drift_observed)
                tsc_drift_observed = drift;
}

/*
 * Called during startup of APs, by the boot processor.  Interrupts
 * are disabled on entry.
 */
static void __noinline
tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, uint64_t *aptscp)
{
        uint64_t bptsc;

        if (atomic_swap_ptr(&tsc_sync_cpu, ci) != NULL) {
                panic("tsc_sync_bp: 1");
        }

        /* Prepare a cache miss for the other side. */
        (void)atomic_swap_uint((void *)&tsc_dummy_cacheline, 0);

        /* Flag our readiness. */
        atomic_or_uint(&ci->ci_flags, CPUF_SYNCTSC);

        /* Wait for other side then read our TSC. */
        while ((ci->ci_flags & CPUF_SYNCTSC) != 0) {
                __insn_barrier();
        }
        bptsc = rdtsc();

        /* Wait for the results to come in. */
        while (tsc_sync_cpu == ci) {
                x86_pause();
        }
        if (tsc_sync_cpu != NULL) {
                panic("tsc_sync_bp: 2");
        }

        *bptscp = bptsc;
        *aptscp = tsc_sync_val;
}

void
tsc_sync_bp(struct cpu_info *ci)
{
        int64_t bptsc, aptsc, val, diff;

        if (!cpu_hascounter())
                return;

        val = INT64_MAX;
        for (int i = 0; i < TSC_SYNC_ROUNDS; i++) {
                tsc_read_bp(ci, &bptsc, &aptsc);
                diff = bptsc - aptsc;
                if (ABS(diff) < ABS(val)) {
                        val = diff;
                }
        }

        ci->ci_data.cpu_cc_skew = val;
}

/*
 * Called during startup of AP, by the AP itself.  Interrupts are
 * disabled on entry.
 */
static void __noinline
tsc_post_ap(struct cpu_info *ci)
{
        uint64_t tsc;

        /* Wait for go-ahead from primary. */
        while ((ci->ci_flags & CPUF_SYNCTSC) == 0) {
                __insn_barrier();
        }

        /* Instruct primary to read its counter. */
        atomic_and_uint(&ci->ci_flags, ~CPUF_SYNCTSC);

        /* Suffer a cache miss, then read TSC. */
        __insn_barrier();
        tsc = tsc_dummy_cacheline;
        __insn_barrier();
        tsc += rdtsc();

        /* Post result.  Ensure the whole value goes out atomically. */
        (void)atomic_swap_64(&tsc_sync_val, tsc);

        if (atomic_swap_ptr(&tsc_sync_cpu, NULL) != ci) {
                panic("tsc_sync_ap");
        }
}

void
tsc_sync_ap(struct cpu_info *ci)
{

        if (!cpu_hascounter())
                return;

        for (int i = 0; i < TSC_SYNC_ROUNDS; i++) {
                tsc_post_ap(ci);
        }
}

static void
tsc_apply_cpu(void *arg1, void *arg2)
{
        bool enable = arg1 != NULL;
        if (enable) {
                lcr4(rcr4() & ~CR4_TSD);
        } else {
                lcr4(rcr4() | CR4_TSD);
        }
}

void
tsc_user_enable(void)
{
        uint64_t xc;

        xc = xc_broadcast(0, tsc_apply_cpu, (void *)true, NULL);
        xc_wait(xc);
}

void
tsc_user_disable(void)
{
        uint64_t xc;

        xc = xc_broadcast(0, tsc_apply_cpu, (void *)false, NULL);
        xc_wait(xc);
}

uint64_t
cpu_frequency(struct cpu_info *ci)
{

        return ci->ci_data.cpu_cc_freq;
}

int
cpu_hascounter(void)
{

        return cpu_feature[0] & CPUID_TSC;
}

static void
tsc_delay(unsigned int us)
{
        uint64_t start, delta;

        start = cpu_counter();
        delta = (uint64_t)us * tsc_freq / 1000000;

        while ((cpu_counter() - start) < delta) {
                x86_pause();
        }
}

static u_int
tsc_get_timecount(struct timecounter *tc)
{
#ifdef _LP64 /* requires atomic 64-bit store */
        static __cpu_simple_lock_t lock = __SIMPLELOCK_UNLOCKED;
        static int lastwarn;
        uint64_t cur, prev;
        lwp_t *l = curlwp;
        int ticks;

        /*
         * Previous value must be read before the counter and stored to
         * after, because this routine can be called from interrupt context
         * and may run over the top of an existing invocation.  Ordering is
         * guaranteed by "volatile" on md_tsc.
         */
        prev = l->l_md.md_tsc;
        cur = cpu_counter();
        if (__predict_false(cur < prev)) {
                if ((cur >> 63) == (prev >> 63) &&
                    __cpu_simple_lock_try(&lock)) {
                        ticks = getticks();
                        if (ticks - lastwarn >= hz) {
                                printf(
                                    "WARNING: TSC time went backwards by %u - "
                                    "change sysctl(7) kern.timecounter?\n",
                                    (unsigned)(prev - cur));
                                lastwarn = ticks;
                        }
                        __cpu_simple_unlock(&lock);
                }
        }
        l->l_md.md_tsc = cur;
        return (uint32_t)cur;
#else
        return cpu_counter32();
#endif
}

/*
 * tsc has been reset; zero the cached tsc of every lwp in the system
 * so we don't spuriously report that the tsc has gone backward.
 * Caller must ensure all LWPs are quiescent (except the current one,
 * obviously) and interrupts are blocked while we update this.
 */
void
tsc_tc_reset(void)
{
        struct lwp *l;

        LIST_FOREACH(l, &alllwp, l_list)
                l->l_md.md_tsc = 0;
}




































































































































































































































































































































































































  184 
   48 

  166 







   38 










1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
/*        $NetBSD: usbdivar.h,v 1.137 2022/03/13 11:28:52 riastradh Exp $        */

/*
 * Copyright (c) 1998, 2012 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology and Matthew R. Green (mrg@eterna.com.au).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef        _DEV_USB_USBDIVAR_H_
#define        _DEV_USB_USBDIVAR_H_

/*
 * Discussion about locking in the USB code:
 *
 * The host controller presents one lock at IPL_SOFTUSB (aka IPL_SOFTNET).
 *
 * List of hardware interface methods, and whether the lock is held
 * when each is called by this module:
 *
 *        BUS METHOD                LOCK        NOTES
 *        ----------------------- -------        -------------------------
 *        ubm_open                -        might want to take lock?
 *        ubm_softint                x        may release/reacquire lock
 *        ubm_dopoll                -        might want to take lock?
 *        ubm_allocx                -
 *        ubm_freex                -
 *        ubm_abortx                x        must not release/reacquire lock
 *        ubm_getlock                 -        Called at attach time
 *        ubm_newdev                -        Will take lock
 *        ubm_rhctrl              -
 *
 *        PIPE METHOD                LOCK        NOTES
 *        ----------------------- -------        -------------------------
 *        upm_init                -
 *        upm_fini                -
 *        upm_transfer                x
 *        upm_start                x
 *        upm_abort                x
 *        upm_close                x
 *        upm_cleartoggle                -
 *        upm_done                x
 *
 * The above semantics are likely to change.  Little performance
 * evaluation has been done on this code and the locking strategy.
 *
 * USB functions known to expect the lock taken include (this list is
 * probably not exhaustive):
 *    usb_transfer_complete()
 *    usb_start_next()
 *
 */

#include <sys/callout.h>
#include <sys/mutex.h>
#include <sys/bus.h>

/* From usb_mem.h */
struct usb_dma_block;
typedef struct {
        struct usb_dma_block *udma_block;
        u_int udma_offs;
} usb_dma_t;

struct usbd_xfer;
struct usbd_pipe;
struct usbd_port;

struct usbd_endpoint {
        usb_endpoint_descriptor_t *ue_edesc;
        int                        ue_refcnt;
        int                        ue_toggle;
};

struct usbd_bus_methods {
        usbd_status              (*ubm_open)(struct usbd_pipe *);
        void                      (*ubm_softint)(void *);
        void                      (*ubm_dopoll)(struct usbd_bus *);
        struct usbd_xfer     *(*ubm_allocx)(struct usbd_bus *, unsigned int);
        void                      (*ubm_freex)(struct usbd_bus *, struct usbd_xfer *);
        void                      (*ubm_abortx)(struct usbd_xfer *);
        bool                      (*ubm_dying)(struct usbd_bus *);
        void                      (*ubm_getlock)(struct usbd_bus *, kmutex_t **);
        usbd_status              (*ubm_newdev)(device_t, struct usbd_bus *, int,
                                            int, int, struct usbd_port *);

        int                        (*ubm_rhctrl)(struct usbd_bus *,
                                    usb_device_request_t *, void *, int);
};

struct usbd_pipe_methods {
        int                      (*upm_init)(struct usbd_xfer *);
        void                      (*upm_fini)(struct usbd_xfer *);
        usbd_status              (*upm_transfer)(struct usbd_xfer *);
        usbd_status              (*upm_start)(struct usbd_xfer *);
        void                      (*upm_abort)(struct usbd_xfer *);
        void                      (*upm_close)(struct usbd_pipe *);
        void                      (*upm_cleartoggle)(struct usbd_pipe *);
        void                      (*upm_done)(struct usbd_xfer *);
};

struct usbd_tt {
        struct usbd_hub               *utt_hub;
};

struct usbd_port {
        usb_port_status_t        up_status;
        uint16_t                up_power;        /* mA of current on port */
        uint8_t                        up_portno;
        uint8_t                        up_restartcnt;
#define USBD_RESTART_MAX 5
        uint8_t                        up_reattach;
        struct usbd_device     *up_dev;                /* Connected device */
        struct usbd_device     *up_parent;        /* The ports hub */
        struct usbd_tt               *up_tt;        /* Transaction translator (if any) */
};

struct usbd_hub {
        usbd_status              (*uh_explore)(struct usbd_device *hub);
        void                       *uh_hubsoftc;
        usb_hub_descriptor_t        uh_hubdesc;
        struct usbd_port        uh_ports[1];
};

/*****/
/* 0, root, and 1->127 */
#define USB_ROOTHUB_INDEX        1
#define USB_TOTAL_DEVICES        (USB_MAX_DEVICES + 1)

struct usbd_bus {
        /* Filled by HC driver */
        void                        *ub_hcpriv;
        int                        ub_revision;        /* USB revision */
#define USBREV_UNKNOWN        0
#define USBREV_PRE_1_0        1
#define USBREV_1_0        2
#define USBREV_1_1        3
#define USBREV_2_0        4
#define USBREV_3_0        5
#define USBREV_3_1        6
#define USBREV_STR { "unknown", "pre 1.0", "1.0", "1.1", "2.0", "3.0", "3.1" }
        int                        ub_hctype;
#define USBHCTYPE_UNKNOWN        0
#define USBHCTYPE_MOTG                1
#define USBHCTYPE_OHCI                2
#define USBHCTYPE_UHCI                3
#define USBHCTYPE_EHCI                4
#define USBHCTYPE_XHCI                5
#define USBHCTYPE_VHCI                6
        int                        ub_busnum;
        const struct usbd_bus_methods
                               *ub_methods;
        uint32_t                ub_pipesize;        /* size of a pipe struct */
        bool                        ub_usedma;        /* Does this HC support DMA */
        int                        ub_dmaflags;
        bus_dma_tag_t                ub_dmatag;        /* DMA tag */

        /* Filled by usb driver */
        kmutex_t               *ub_lock;
        struct usbd_device     *ub_roothub;
        struct usbd_xfer       *ub_rhxfer;        /* roothub xfer in progress */
        kcondvar_t                ub_rhxfercv;
        uint8_t                        ub_rhaddr;        /* roothub address */
        uint8_t                        ub_rhconf;        /* roothub configuration */
        struct usbd_device     *ub_devices[USB_TOTAL_DEVICES];
        kcondvar_t              ub_needsexplore_cv;
        char                        ub_needsexplore;/* a hub a signalled a change */
        char                        ub_usepolling;
        device_t                ub_usbctl;
        struct usb_device_stats        ub_stats;

        void                       *ub_soft; /* soft interrupt cookie */
};

struct usbd_device {
        struct usbd_bus               *ud_bus;                /* our controller */
        struct usbd_pipe       *ud_pipe0;        /* pipe 0 */
        uint8_t                        ud_addr;        /* device address */
        uint8_t                        ud_config;        /* current configuration # */
        uint8_t                        ud_depth;        /* distance from root hub */
        uint8_t                        ud_speed;        /* low/full/high speed */
        uint8_t                        ud_selfpowered;        /* flag for self powered */
        uint16_t                ud_power;        /* mA the device uses */
        int16_t                        ud_langid;        /* language for strings */
#define USBD_NOLANG (-1)
        usb_event_cookie_t        ud_cookie;        /* unique connection id */
        struct usbd_port       *ud_powersrc;        /* upstream hub port, or 0 */
        struct usbd_device     *ud_myhub;        /* upstream hub */
        struct usbd_port       *ud_myhsport;        /* closest high speed port */
        struct usbd_endpoint        ud_ep0;                /* for pipe 0 */
        usb_endpoint_descriptor_t
                                ud_ep0desc;        /* for pipe 0 */
        struct usbd_interface  *ud_ifaces;        /* array of all interfaces */
        usb_device_descriptor_t ud_ddesc;        /* device descriptor */
        usb_config_descriptor_t *ud_cdesc;        /* full config descr */
        usb_bos_descriptor_t        *ud_bdesc;        /* full BOS descr */
        const struct usbd_quirks
                               *ud_quirks;        /* device quirks, always set */
        struct usbd_hub               *ud_hub;                /* only if this is a hub */
        u_int                        ud_subdevlen;        /* array length of following */
        device_t               *ud_subdevs;        /* sub-devices */
        int                        ud_nifaces_claimed; /* number of ifaces in use */
        void                       *ud_hcpriv;

        char                       *ud_serial;        /* serial number, can be NULL */
        char                       *ud_vendor;        /* vendor string, can be NULL */
        char                       *ud_product;        /* product string can be NULL */
};

struct usbd_interface {
        struct usbd_device     *ui_dev;
        usb_interface_descriptor_t
                               *ui_idesc;
        int                        ui_index;
        int                        ui_altindex;
        struct usbd_endpoint   *ui_endpoints;
        int64_t                        ui_busy;        /* #pipes, or -1 if setting */
};

struct usbd_pipe {
        struct usbd_interface  *up_iface;
        struct usbd_device     *up_dev;
        struct usbd_endpoint   *up_endpoint;
        char                        up_running;
        char                        up_aborting;
        bool                        up_serialise;
        SIMPLEQ_HEAD(, usbd_xfer)
                                up_queue;
        struct usb_task                up_async_task;

        struct usbd_xfer       *up_intrxfer; /* used for repeating requests */
        char                        up_repeat;
        int                        up_interval;
        uint8_t                        up_flags;

        struct usbd_xfer       *up_callingxfer; /* currently in callback */
        kcondvar_t                up_callingcv;

        struct lwp               *up_abortlwp;        /* lwp currently aborting */

        /* Filled by HC driver. */
        const struct usbd_pipe_methods
                               *up_methods;
};

struct usbd_xfer {
        struct usbd_pipe       *ux_pipe;
        void                       *ux_priv;
        void                       *ux_buffer;
        kcondvar_t                ux_cv;
        uint32_t                ux_length;
        uint32_t                ux_actlen;
        uint16_t                ux_flags;
        uint32_t                ux_timeout;
        usbd_status                ux_status;
        usbd_callback                ux_callback;
        volatile uint8_t        ux_done;
        uint8_t                        ux_state;        /* used for DIAGNOSTIC */
#define XFER_FREE 0x46
#define XFER_BUSY 0x55
#define XFER_ONQU 0x9e

        /* For control pipe */
        usb_device_request_t        ux_request;

        /* For isoc */
        uint16_t               *ux_frlengths;
        int                        ux_nframes;

        const struct usbd_pipe_methods *ux_methods;

        /* For memory allocation and softc */
        struct usbd_bus        *ux_bus;
        usb_dma_t                ux_dmabuf;
        void                       *ux_buf;
        uint32_t                ux_bufsize;

        uint8_t                        ux_rqflags;
#define URQ_REQUEST        0x01

        SIMPLEQ_ENTRY(usbd_xfer)
                                ux_next;

        void                       *ux_hcpriv;        /* private use by the HC driver */

        struct usb_task                ux_aborttask;
        struct callout                ux_callout;

        /*
         * Protected by bus lock.
         *
         * - ux_timeout_set: The timeout is scheduled as a callout or
         *   usb task, and has not yet acquired the bus lock.
         *
         * - ux_timeout_reset: The xfer completed, and was resubmitted
         *   before the callout or task was able to acquire the bus
         *   lock, so one or the other needs to schedule a new callout.
         */
        bool                        ux_timeout_set;
        bool                        ux_timeout_reset;
};

void usbd_init(void);
void usbd_finish(void);

#if defined(USB_DEBUG)
void usbd_dump_iface(struct usbd_interface *);
void usbd_dump_device(struct usbd_device *);
void usbd_dump_endpoint(struct usbd_endpoint *);
void usbd_dump_queue(struct usbd_pipe *);
void usbd_dump_pipe(struct usbd_pipe *);
#endif

/* Routines from usb_subr.c */
int                usbctlprint(void *, const char *);
void                usbd_get_device_strings(struct usbd_device *);
void                usb_delay_ms_locked(struct usbd_bus *, u_int, kmutex_t *);
void                usb_delay_ms(struct usbd_bus *, u_int);
void                usbd_delay_ms_locked(struct usbd_device *, u_int, kmutex_t *);
void                usbd_delay_ms(struct usbd_device *, u_int);
usbd_status        usbd_reset_port(struct usbd_device *, int, usb_port_status_t *);
usbd_status        usbd_setup_pipe(struct usbd_device *,
                                struct usbd_interface *,
                                struct usbd_endpoint *, int,
                                struct usbd_pipe **);
usbd_status        usbd_setup_pipe_flags(struct usbd_device *,
                                      struct usbd_interface *,
                                      struct usbd_endpoint *, int,
                                      struct usbd_pipe **,
                                      uint8_t);
usbd_status        usbd_new_device(device_t, struct usbd_bus *, int, int, int,
                                struct usbd_port *);
usbd_status        usbd_reattach_device(device_t, struct usbd_device *,
                                     int, const int *);

void                usbd_remove_device(struct usbd_device *, struct usbd_port *);
bool                usbd_iface_locked(struct usbd_interface *);
usbd_status        usbd_iface_lock(struct usbd_interface *);
void                usbd_iface_unlock(struct usbd_interface *);
usbd_status        usbd_iface_piperef(struct usbd_interface *);
void                usbd_iface_pipeunref(struct usbd_interface *);
usbd_status        usbd_fill_iface_data(struct usbd_device *, int, int);
void                usb_free_device(struct usbd_device *);

void                usb_transfer_complete(struct usbd_xfer *);
int                usb_disconnect_port(struct usbd_port *, device_t, int);

usbd_status        usbd_endpoint_acquire(struct usbd_device *,
                    struct usbd_endpoint *, int);
void                usbd_endpoint_release(struct usbd_device *,
                    struct usbd_endpoint *);

void                usbd_kill_pipe(struct usbd_pipe *);
usbd_status        usbd_attach_roothub(device_t, struct usbd_device *);
usbd_status        usbd_probe_and_attach(device_t, struct usbd_device *, int, int);

/* Routines from usb.c */
void                usb_needs_explore(struct usbd_device *);
void                usb_needs_reattach(struct usbd_device *);
void                usb_schedsoftintr(struct usbd_bus *);

static __inline int
usbd_xfer_isread(struct usbd_xfer *xfer)
{
        if (xfer->ux_rqflags & URQ_REQUEST)
                return xfer->ux_request.bmRequestType & UT_READ;

        return xfer->ux_pipe->up_endpoint->ue_edesc->bEndpointAddress &
           UE_DIR_IN;
}

static __inline size_t
usb_addr2dindex(int addr)
{

        return USB_ROOTHUB_INDEX + addr;
}

/*
 * These macros reflect the current locking scheme.  They might change.
 */

#define usbd_lock_pipe(p)        mutex_enter((p)->up_dev->ud_bus->ub_lock)
#define usbd_unlock_pipe(p)        mutex_exit((p)->up_dev->ud_bus->ub_lock)

#endif        /* _DEV_USB_USBDIVAR_H_ */

















































































































































































































































































































































































































































































































































































    3 
    3 








































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
/*        $NetBSD: if_mos.c,v 1.23 2022/08/20 14:08:59 riastradh Exp $        */
/*        $OpenBSD: if_mos.c,v 1.40 2019/07/07 06:40:10 kevlo Exp $        */

/*
 * Copyright (c) 2008 Johann Christian Rode <jcrode@gmx.net>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*
 * Copyright (c) 2005, 2006, 2007 Jonathan Gray <jsg@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*
 * Copyright (c) 1997, 1998, 1999, 2000-2003
 *        Bill Paul <wpaul@windriver.com>.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by Bill Paul.
 * 4. Neither the name of the author nor the names of any co-contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Moschip MCS7730/MCS7830/MCS7832 USB to Ethernet controller 
 * The datasheet is available at the following URL: 
 * http://www.moschip.com/data/products/MCS7830/Data%20Sheet_7830.pdf
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_mos.c,v 1.23 2022/08/20 14:08:59 riastradh Exp $");

#include <sys/param.h>

#include <dev/usb/usbnet.h>
#include <dev/usb/if_mosreg.h>

#define MOS_PAUSE_REWRITES        3

#define MOS_TIMEOUT                1000

#define MOS_RX_LIST_CNT                1
#define MOS_TX_LIST_CNT                1

/* Maximum size of a fast ethernet frame plus one byte for the status */
#define MOS_BUFSZ                 (ETHER_MAX_LEN+1)

/*
 * USB endpoints.
 */
#define MOS_ENDPT_RX                0
#define MOS_ENDPT_TX                1
#define MOS_ENDPT_INTR                2
#define MOS_ENDPT_MAX                3

/*
 * USB vendor requests.
 */
#define MOS_UR_READREG                0x0e
#define MOS_UR_WRITEREG                0x0d

#define MOS_CONFIG_NO                1
#define MOS_IFACE_IDX                0

struct mos_type {
        struct usb_devno        mos_dev;
        u_int16_t                mos_flags;
#define MCS7730        0x0001                /* MCS7730 */
#define MCS7830        0x0002                /* MCS7830 */
#define MCS7832        0x0004                /* MCS7832 */
};

#define MOS_INC(x, y)           (x) = (x + 1) % y

#ifdef MOS_DEBUG
#define DPRINTF(x)      do { if (mosdebug) printf x; } while (0)
#define DPRINTFN(n,x)   do { if (mosdebug >= (n)) printf x; } while (0)
int     mosdebug = 0;
#else
#define DPRINTF(x)        __nothing
#define DPRINTFN(n,x)        __nothing
#endif

/*
 * Various supported device vendors/products.
 */
static const struct mos_type mos_devs[] = {
        { { USB_VENDOR_MOSCHIP, USB_PRODUCT_MOSCHIP_MCS7730 }, MCS7730 },
        { { USB_VENDOR_MOSCHIP, USB_PRODUCT_MOSCHIP_MCS7830 }, MCS7830 },
        { { USB_VENDOR_MOSCHIP, USB_PRODUCT_MOSCHIP_MCS7832 }, MCS7832 },
        { { USB_VENDOR_SITECOMEU, USB_PRODUCT_SITECOMEU_LN030 }, MCS7830 },
};
#define mos_lookup(v, p) ((const struct mos_type *)usb_lookup(mos_devs, v, p))

static int mos_match(device_t, cfdata_t, void *);
static void mos_attach(device_t, device_t, void *);

CFATTACH_DECL_NEW(mos, sizeof(struct usbnet),
        mos_match, mos_attach, usbnet_detach, usbnet_activate);

static void mos_uno_rx_loop(struct usbnet *, struct usbnet_chain *, uint32_t);
static unsigned mos_uno_tx_prepare(struct usbnet *, struct mbuf *,
                                   struct usbnet_chain *);
static void mos_uno_mcast(struct ifnet *);
static int mos_uno_init(struct ifnet *);
static void mos_chip_init(struct usbnet *);
static void mos_uno_stop(struct ifnet *ifp, int disable);
static int mos_uno_mii_read_reg(struct usbnet *, int, int, uint16_t *);
static int mos_uno_mii_write_reg(struct usbnet *, int, int, uint16_t);
static void mos_uno_mii_statchg(struct ifnet *);
static void mos_reset(struct usbnet *);

static int mos_reg_read_1(struct usbnet *, int);
static int mos_reg_read_2(struct usbnet *, int);
static int mos_reg_write_1(struct usbnet *, int, int);
static int mos_reg_write_2(struct usbnet *, int, int);
static int mos_readmac(struct usbnet *);
static int mos_writemac(struct usbnet *);
static int mos_write_mcast(struct usbnet *, uint8_t *);

static const struct usbnet_ops mos_ops = {
        .uno_stop = mos_uno_stop,
        .uno_mcast = mos_uno_mcast,
        .uno_read_reg = mos_uno_mii_read_reg,
        .uno_write_reg = mos_uno_mii_write_reg,
        .uno_statchg = mos_uno_mii_statchg,
        .uno_tx_prepare = mos_uno_tx_prepare,
        .uno_rx_loop = mos_uno_rx_loop,
        .uno_init = mos_uno_init,
};

static int
mos_reg_read_1(struct usbnet *un, int reg)
{
        usb_device_request_t        req;
        usbd_status                err;
        uByte                        val = 0;

        if (usbnet_isdying(un))
                return 0;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = MOS_UR_READREG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 1);

        err = usbd_do_request(un->un_udev, &req, &val);

        if (err) {
                aprint_error_dev(un->un_dev, "read reg %x\n", reg);
                return 0;
        }

        return val;
}

static int
mos_reg_read_2(struct usbnet *un, int reg)
{
        usb_device_request_t        req;
        usbd_status                err;
        uWord                        val;

        if (usbnet_isdying(un))
                return 0;

        USETW(val,0);

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = MOS_UR_READREG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 2);

        err = usbd_do_request(un->un_udev, &req, &val);

        if (err) {
                aprint_error_dev(un->un_dev, "read reg2 %x\n", reg);
                return 0;
        }

        return UGETW(val);
}

static int
mos_reg_write_1(struct usbnet *un, int reg, int aval)
{
        usb_device_request_t        req;
        usbd_status                err;
        uByte                        val;

        if (usbnet_isdying(un))
                return 0;

        val = aval;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = MOS_UR_WRITEREG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 1);

        err = usbd_do_request(un->un_udev, &req, &val);

        if (err)
                aprint_error_dev(un->un_dev, "write reg %x <- %x\n", 
                    reg, aval);

        return 0;
}

static int
mos_reg_write_2(struct usbnet *un, int reg, int aval)
{
        usb_device_request_t        req;
        usbd_status                err;
        uWord                        val;

        USETW(val, aval);

        if (usbnet_isdying(un))
                return EIO;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = MOS_UR_WRITEREG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 2);

        err = usbd_do_request(un->un_udev, &req, &val);

        if (err)
                aprint_error_dev(un->un_dev, "write reg2 %x <- %x\n", 
                    reg, aval);

        return 0;
}

static int
mos_readmac(struct usbnet *un)
{
        usb_device_request_t        req;
        usbd_status                err;

        if (usbnet_isdying(un))
                return 0;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = MOS_UR_READREG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, MOS_MAC);
        USETW(req.wLength, ETHER_ADDR_LEN);

        err = usbd_do_request(un->un_udev, &req, un->un_eaddr);

        if (err)
                aprint_error_dev(un->un_dev, "%s: failed", __func__);

        return err;
}

static int
mos_writemac(struct usbnet *un)
{
        usb_device_request_t        req;
        usbd_status                err;

        if (usbnet_isdying(un))
                return 0;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = MOS_UR_WRITEREG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, MOS_MAC);
        USETW(req.wLength, ETHER_ADDR_LEN);

        err = usbd_do_request(un->un_udev, &req, un->un_eaddr);

        if (err)
                aprint_error_dev(un->un_dev, "%s: failed", __func__);

        return 0;
}

static int
mos_write_mcast(struct usbnet *un, uint8_t *hashtbl)
{
        usb_device_request_t        req;
        usbd_status                err;

        if (usbnet_isdying(un))
                return EIO;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = MOS_UR_WRITEREG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, MOS_MCAST_TABLE);
        USETW(req.wLength, 8);

        err = usbd_do_request(un->un_udev, &req, hashtbl);

        if (err) {
                aprint_error_dev(un->un_dev, "%s: failed", __func__);
                return(-1);
        }

        return 0;
}

static int
mos_uno_mii_read_reg(struct usbnet *un, int phy, int reg, uint16_t *val)
{
        int                        i, res;

        mos_reg_write_2(un, MOS_PHY_DATA, 0);
        mos_reg_write_1(un, MOS_PHY_CTL, (phy & MOS_PHYCTL_PHYADDR) |
            MOS_PHYCTL_READ);
        mos_reg_write_1(un, MOS_PHY_STS, (reg & MOS_PHYSTS_PHYREG) |
            MOS_PHYSTS_PENDING);

        for (i = 0; i < MOS_TIMEOUT; i++) {
                if (usbnet_isdying(un)) {
                        *val = 0;
                        return ENXIO;
                }
                if (mos_reg_read_1(un, MOS_PHY_STS) & MOS_PHYSTS_READY)
                        break;
        }
        if (i == MOS_TIMEOUT) {
                aprint_error_dev(un->un_dev, "read PHY failed\n");
                *val = 0;
                return EIO;
        }

        res = mos_reg_read_2(un, MOS_PHY_DATA);
        *val = res;

        DPRINTFN(10,("%s: %s: phy %d reg %d val %u\n",
            device_xname(un->un_dev), __func__, phy, reg, res));

        return 0;
}

static int
mos_uno_mii_write_reg(struct usbnet *un, int phy, int reg, uint16_t val)
{
        int                        i;

        DPRINTFN(10,("%s: %s: phy %d reg %d val %u\n",
            device_xname(un->un_dev), __func__, phy, reg, val));

        mos_reg_write_2(un, MOS_PHY_DATA, val);
        mos_reg_write_1(un, MOS_PHY_CTL, (phy & MOS_PHYCTL_PHYADDR) |
            MOS_PHYCTL_WRITE);
        mos_reg_write_1(un, MOS_PHY_STS, (reg & MOS_PHYSTS_PHYREG) |
            MOS_PHYSTS_PENDING);

        for (i = 0; i < MOS_TIMEOUT; i++) {
                if (usbnet_isdying(un))
                        return ENXIO;
                if (mos_reg_read_1(un, MOS_PHY_STS) & MOS_PHYSTS_READY)
                        break;
        }
        if (i == MOS_TIMEOUT) {
                aprint_error_dev(un->un_dev, "write PHY failed\n");
                return EIO;
        }

        return 0;
}

void
mos_uno_mii_statchg(struct ifnet *ifp)
{
        struct usbnet * const                un = ifp->if_softc;
        struct mii_data * const                mii = usbnet_mii(un);
        int                                val, err;

        if (usbnet_isdying(un))
                return;

        DPRINTFN(10,("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        /* disable RX, TX prior to changing FDX, SPEEDSEL */
        val = mos_reg_read_1(un, MOS_CTL);
        val &= ~(MOS_CTL_TX_ENB | MOS_CTL_RX_ENB);
        mos_reg_write_1(un, MOS_CTL, val);

        /* reset register which counts dropped frames */
        mos_reg_write_1(un, MOS_FRAME_DROP_CNT, 0);

        if ((mii->mii_media_active & IFM_GMASK) == IFM_FDX)
                val |= MOS_CTL_FDX_ENB;
        else
                val &= ~(MOS_CTL_FDX_ENB);

        if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
            (IFM_ACTIVE | IFM_AVALID)) {
                switch (IFM_SUBTYPE(mii->mii_media_active)) {
                case IFM_100_TX:
                        val |=  MOS_CTL_SPEEDSEL;
                        break;
                case IFM_10_T:
                        val &= ~(MOS_CTL_SPEEDSEL);
                        break;
                }
                usbnet_set_link(un, true);
        }

        /* re-enable TX, RX */
        val |= (MOS_CTL_TX_ENB | MOS_CTL_RX_ENB);
        err = mos_reg_write_1(un, MOS_CTL, val);

        if (err)
                aprint_error_dev(un->un_dev, "media change failed\n");
}

static void
mos_uno_mcast(struct ifnet *ifp)
{
        struct usbnet                *un = ifp->if_softc;
        struct ethercom                *ec = usbnet_ec(un);
        struct ether_multi        *enm;
        struct ether_multistep        step;
        u_int32_t h = 0;
        u_int8_t rxmode, mchash[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };

        if (usbnet_isdying(un))
                return;

        rxmode = mos_reg_read_1(un, MOS_CTL);
        rxmode &= ~(MOS_CTL_ALLMULTI | MOS_CTL_RX_PROMISC);

        ETHER_LOCK(ec);
        if (usbnet_ispromisc(un)) {
                ec->ec_flags |= ETHER_F_ALLMULTI;
                ETHER_UNLOCK(ec);
                /* run promisc. mode */
                rxmode |= MOS_CTL_ALLMULTI; /* ??? */
                rxmode |= MOS_CTL_RX_PROMISC;
                goto update;
        }
        ec->ec_flags &= ~ETHER_F_ALLMULTI;
        ETHER_FIRST_MULTI(step, ec, enm);
        while (enm != NULL) {
                if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
                        ec->ec_flags |= ETHER_F_ALLMULTI;
                        ETHER_UNLOCK(ec);
                        memset(mchash, 0, sizeof(mchash)); /* correct ??? */
                        /* accept all multicast frame */
                        rxmode |= MOS_CTL_ALLMULTI;
                        goto update;
                }
                h = ether_crc32_be(enm->enm_addrlo, ETHER_ADDR_LEN);
                /* 3(31:29) and 3(28:26) sampling to have uint8_t[8] */
                mchash[h >> 29] |= 1 << ((h >> 26) % 8);
                ETHER_NEXT_MULTI(step, enm);
        }
        ETHER_UNLOCK(ec);
        /* MOS receive filter is always on */
 update:
        /* 
         * The datasheet claims broadcast frames were always accepted
         * regardless of filter settings. But the hardware seems to
         * filter broadcast frames, so pass them explicitly.
         */
        mchash[7] |= 0x80;
        mos_write_mcast(un, mchash);
        mos_reg_write_1(un, MOS_CTL, rxmode);
}

static void
mos_reset(struct usbnet *un)
{
        u_int8_t ctl;

        if (usbnet_isdying(un))
                return;

        ctl = mos_reg_read_1(un, MOS_CTL);
        ctl &= ~(MOS_CTL_RX_PROMISC | MOS_CTL_ALLMULTI | MOS_CTL_TX_ENB |
            MOS_CTL_RX_ENB);
        /* Disable RX, TX, promiscuous and allmulticast mode */
        mos_reg_write_1(un, MOS_CTL, ctl);

        /* Reset frame drop counter register to zero */
        mos_reg_write_1(un, MOS_FRAME_DROP_CNT, 0);

        /* Wait a little while for the chip to get its brains in order. */
        DELAY(1000);
}

void
mos_chip_init(struct usbnet *un)
{
        int        i;

        /*
         * Rev.C devices have a pause threshold register which needs to be set
         * at startup.
         */
        if (mos_reg_read_1(un, MOS_PAUSE_TRHD) != -1) {
                for (i = 0; i < MOS_PAUSE_REWRITES; i++)
                        mos_reg_write_1(un, MOS_PAUSE_TRHD, 0);
        }
}

/*
 * Probe for a MCS7x30 chip.
 */
static int
mos_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return (mos_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
            UMATCH_VENDOR_PRODUCT : UMATCH_NONE);
}

/*
 * Attach the interface.
 */
static void
mos_attach(device_t parent, device_t self, void *aux)
{
        USBNET_MII_DECL_DEFAULT(unm);
        struct usbnet *                un = device_private(self);
        struct usb_attach_arg        *uaa = aux;
        struct usbd_device        *dev = uaa->uaa_device;
        usbd_status                err;
        usb_interface_descriptor_t         *id;
        usb_endpoint_descriptor_t         *ed;
        char                        *devinfop;
        int                        i;

        aprint_naive("\n");
        aprint_normal("\n");
        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        un->un_dev = self;
        un->un_udev = dev;
        un->un_sc = un;
        un->un_ops = &mos_ops;
        un->un_rx_xfer_flags = USBD_SHORT_XFER_OK;
        un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER;
        un->un_rx_list_cnt = MOS_RX_LIST_CNT;
        un->un_tx_list_cnt = MOS_TX_LIST_CNT;
        un->un_rx_bufsz = un->un_tx_bufsz = MOS_BUFSZ;

        err = usbd_set_config_no(dev, MOS_CONFIG_NO, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(err));
                return;
        }

        err = usbd_device2interface_handle(dev, MOS_IFACE_IDX, &un->un_iface);
        if (err) {
                aprint_error_dev(self, "failed getting interface handle"
                    ", err=%s\n", usbd_errstr(err));
                return;
        }

        un->un_flags = mos_lookup(uaa->uaa_vendor, uaa->uaa_product)->mos_flags;

        id = usbd_get_interface_descriptor(un->un_iface);

        /* Find endpoints. */
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(un->un_iface, i);
                if (!ed) {
                        aprint_error_dev(self, "couldn't get ep %d\n", i);
                        return;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        un->un_ed[USBNET_ENDPT_INTR] = ed->bEndpointAddress;
                }
        }

        if (un->un_flags & MCS7730)
                aprint_normal_dev(self, "MCS7730\n");
        else if (un->un_flags & MCS7830)
                aprint_normal_dev(self, "MCS7830\n");
        else if (un->un_flags & MCS7832)
                aprint_normal_dev(self, "MCS7832\n");

        /* Set these up now for register access. */
        usbnet_attach(un);

        mos_chip_init(un);

        /*
         * Read MAC address, inform the world.
         */
        err = mos_readmac(un);
        if (err) {
                aprint_error_dev(self, "couldn't read MAC address\n");
                return;
        }

        struct ifnet *ifp = usbnet_ifp(un);
        ifp->if_capabilities = ETHERCAP_VLAN_MTU;

        usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST,
            0, &unm);
}

/*
 * A frame has been uploaded: pass the resulting mbuf chain up to
 * the higher level protocols.
 */
void
mos_uno_rx_loop(struct usbnet * un, struct usbnet_chain *c, uint32_t total_len)
{
        struct ifnet                *ifp = usbnet_ifp(un);
        uint8_t                        *buf = c->unc_buf;
        u_int8_t                rxstat;
        u_int16_t                pktlen = 0;

        DPRINTFN(5,("%s: %s: enter len %u\n",
            device_xname(un->un_dev), __func__, total_len));

        if (total_len <= 1)
                return;

        /* evaluate status byte at the end */
        pktlen = total_len - 1;
        if (pktlen > un->un_rx_bufsz) {
                if_statinc(ifp, if_ierrors);
                return;
        }
        rxstat = buf[pktlen] & MOS_RXSTS_MASK;

        if (rxstat != MOS_RXSTS_VALID) {
                DPRINTF(("%s: erroneous frame received: ",
                    device_xname(un->un_dev)));
                if (rxstat & MOS_RXSTS_SHORT_FRAME)
                        DPRINTF(("frame size less than 64 bytes\n"));
                if (rxstat & MOS_RXSTS_LARGE_FRAME)
                        DPRINTF(("frame size larger than 1532 bytes\n"));
                if (rxstat & MOS_RXSTS_CRC_ERROR)
                        DPRINTF(("CRC error\n"));
                if (rxstat & MOS_RXSTS_ALIGN_ERROR)
                        DPRINTF(("alignment error\n"));
                if_statinc(ifp, if_ierrors);
                return;
        }

        if (pktlen < sizeof(struct ether_header) ) {
                if_statinc(ifp, if_ierrors);
                return;
        }

        usbnet_enqueue(un, c->unc_buf, pktlen, 0, 0, 0);
}

static unsigned
mos_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c)
{
        int                        length;

        if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz)
                return 0;

        m_copydata(m, 0, m->m_pkthdr.len, c->unc_buf);
        length = m->m_pkthdr.len;

        DPRINTFN(5,("%s: %s: len %u\n",
            device_xname(un->un_dev), __func__, length));

        return length;
}

static int
mos_uno_init(struct ifnet *ifp)
{
        struct usbnet * const un = ifp->if_softc;
        u_int8_t                rxmode;
        unsigned char                ipgs[2];

        /* Reset the ethernet interface. */
        mos_reset(un);

        /* Write MAC address. */
        mos_writemac(un);

        /* Read and set transmitter IPG values */
        ipgs[0] = mos_reg_read_1(un, MOS_IPG0);
        ipgs[1] = mos_reg_read_1(un, MOS_IPG1);
        mos_reg_write_1(un, MOS_IPG0, ipgs[0]);
        mos_reg_write_1(un, MOS_IPG1, ipgs[1]);

        /* Enable receiver and transmitter, bridge controls speed/duplex mode */
        rxmode = mos_reg_read_1(un, MOS_CTL);
        rxmode |= MOS_CTL_RX_ENB | MOS_CTL_TX_ENB | MOS_CTL_BS_ENB;
        rxmode &= ~(MOS_CTL_SLEEP);
        mos_reg_write_1(un, MOS_CTL, rxmode);

        return 0;
}

void
mos_uno_stop(struct ifnet *ifp, int disable)
{
        struct usbnet * const un = ifp->if_softc;

        mos_reset(un);
}






























































    1 








    2 































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/*        $NetBSD: rf_callback.c,v 1.25 2021/07/23 00:54:45 oster Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Jim Zelenka
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*****************************************************************************
 *
 * callback.c -- code to manipulate callback descriptor
 *
 ****************************************************************************/


#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_callback.c,v 1.25 2021/07/23 00:54:45 oster Exp $");

#include <dev/raidframe/raidframevar.h>
#include <sys/pool.h>

#include "rf_archs.h"
#include "rf_threadstuff.h"
#include "rf_callback.h"
#include "rf_debugMem.h"
#include "rf_general.h"
#include "rf_shutdown.h"
#include "rf_netbsd.h"
#include "rf_raid.h"

#define RF_MAX_FREE_CALLBACK 64
#define RF_MIN_FREE_CALLBACK 32

static void rf_ShutdownCallback(void *);
static void
rf_ShutdownCallback(void *arg)
{
        RF_Raid_t *raidPtr;

        raidPtr = (RF_Raid_t *) arg;
        
        pool_destroy(&raidPtr->pools.callbackf);
        pool_destroy(&raidPtr->pools.callbackv);
}

int
rf_ConfigureCallback(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                     RF_Config_t *cfgPtr)
{

        rf_pool_init(raidPtr, raidPtr->poolNames.callbackf, &raidPtr->pools.callbackf, sizeof(RF_CallbackFuncDesc_t),
                     "callbackf", RF_MIN_FREE_CALLBACK, RF_MAX_FREE_CALLBACK);
        rf_pool_init(raidPtr, raidPtr->poolNames.callbackv, &raidPtr->pools.callbackv, sizeof(RF_CallbackValueDesc_t),
                     "callbackv", RF_MIN_FREE_CALLBACK, RF_MAX_FREE_CALLBACK);
        rf_ShutdownCreate(listp, rf_ShutdownCallback, raidPtr);

        return (0);
}

RF_CallbackFuncDesc_t *
rf_AllocCallbackFuncDesc(RF_Raid_t *raidPtr)
{
        return pool_get(&raidPtr->pools.callbackf, PR_WAITOK);
}

void
rf_FreeCallbackFuncDesc(RF_Raid_t *raidPtr, RF_CallbackFuncDesc_t *p)
{
        pool_put(&raidPtr->pools.callbackf, p);
}

RF_CallbackValueDesc_t *
rf_AllocCallbackValueDesc(RF_Raid_t *raidPtr)
{
        return pool_get(&raidPtr->pools.callbackv, PR_WAITOK);
}

void
rf_FreeCallbackValueDesc(RF_Raid_t *raidPtr, RF_CallbackValueDesc_t *p)
{
        pool_put(&raidPtr->pools.callbackv, p);
}






















































































































































































































































































































































































    6 








    6 
















    6 

    6 



















  136 



  136 











  154 

  155 

  155 




    4 

















    6 


    6 
    6 










    6 

    6 













    6 

    6 





  132 





































  132 




  133 


  133 


    6 
    4 

    4 
    4 

    4 


  133 











    6 
    6 

    6 







    6 






    6 









    6 


    6 

    6 



























    6 



    6 

    6 




    6 




    6 







    3 


    3 



    3 




    3 









    6 

    6 



    6 









   33 
























   10 





   10 

   10 







































































   10 




























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
/*        $NetBSD: kern_pax.c,v 1.62 2021/08/30 01:25:10 rin Exp $        */

/*
 * Copyright (c) 2015, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Maxime Villard.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2006 Elad Efrat <elad@NetBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_pax.c,v 1.62 2021/08/30 01:25:10 rin Exp $");

#include "opt_pax.h"

#include <sys/param.h>
#include <sys/proc.h>
#include <sys/exec.h>
#include <sys/exec_elf.h>
#include <sys/pax.h>
#include <sys/sysctl.h>
#include <sys/kmem.h>
#include <sys/mman.h>
#include <sys/syslog.h>
#include <sys/vnode.h>
#include <sys/queue.h>
#include <sys/bitops.h>
#include <sys/kauth.h>
#include <sys/cprng.h>

#ifdef PAX_ASLR_DEBUG
#define PAX_DPRINTF(_fmt, args...) \
        do if (pax_aslr_debug) uprintf("%s: " _fmt "\n", __func__, ##args); \
        while (/*CONSTCOND*/0)
#else
#define PAX_DPRINTF(_fmt, args...)        do {} while (/*CONSTCOND*/0)
#endif

#ifdef PAX_ASLR
#include <sys/mman.h>

int pax_aslr_enabled = 1;
int pax_aslr_global = PAX_ASLR;

#ifndef PAX_ASLR_DELTA_MMAP_LSB
#define PAX_ASLR_DELTA_MMAP_LSB                PGSHIFT
#endif
#ifndef PAX_ASLR_DELTA_MMAP_LEN
#define PAX_ASLR_DELTA_MMAP_LEN                ((sizeof(void *) * NBBY) / 2)
#endif
#ifndef PAX_ASLR_DELTA_MMAP_LEN32
#define PAX_ASLR_DELTA_MMAP_LEN32        ((sizeof(uint32_t) * NBBY) / 2)
#endif
#ifndef PAX_ASLR_DELTA_STACK_LSB
#define PAX_ASLR_DELTA_STACK_LSB        PGSHIFT
#endif
#ifndef PAX_ASLR_DELTA_STACK_LEN
#define PAX_ASLR_DELTA_STACK_LEN         ((sizeof(void *) * NBBY) / 4)
#endif
#ifndef PAX_ASLR_DELTA_STACK_LEN32
#define PAX_ASLR_DELTA_STACK_LEN32         ((sizeof(uint32_t) * NBBY) / 4)
#endif
#define PAX_ASLR_MAX_STACK_WASTE        8

#ifdef PAX_ASLR_DEBUG
int pax_aslr_debug;
/* flag set means disable */
int pax_aslr_flags;
uint32_t pax_aslr_rand;
#define PAX_ASLR_STACK                0x01
#define PAX_ASLR_STACK_GAP        0x02
#define PAX_ASLR_MMAP                0x04
#define PAX_ASLR_EXEC_OFFSET        0x08
#define PAX_ASLR_RTLD_OFFSET        0x10
#define PAX_ASLR_FIXED                0x20
#endif

static bool pax_aslr_elf_flags_active(uint32_t);
#endif /* PAX_ASLR */

#ifdef PAX_MPROTECT
static int pax_mprotect_enabled = 1;
static int pax_mprotect_global = PAX_MPROTECT;
static int pax_mprotect_ptrace = 1;
static bool pax_mprotect_elf_flags_active(uint32_t);
#endif /* PAX_MPROTECT */
#ifdef PAX_MPROTECT_DEBUG
int pax_mprotect_debug;
#endif

#ifdef PAX_SEGVGUARD
#ifndef PAX_SEGVGUARD_EXPIRY
#define        PAX_SEGVGUARD_EXPIRY                (2 * 60)
#endif
#ifndef PAX_SEGVGUARD_SUSPENSION
#define        PAX_SEGVGUARD_SUSPENSION        (10 * 60)
#endif
#ifndef        PAX_SEGVGUARD_MAXCRASHES
#define        PAX_SEGVGUARD_MAXCRASHES        5
#endif


static int pax_segvguard_enabled = 1;
static int pax_segvguard_global = PAX_SEGVGUARD;
static int pax_segvguard_expiry = PAX_SEGVGUARD_EXPIRY;
static int pax_segvguard_suspension = PAX_SEGVGUARD_SUSPENSION;
static int pax_segvguard_maxcrashes = PAX_SEGVGUARD_MAXCRASHES;

struct pax_segvguard_uid_entry {
        uid_t sue_uid;
        size_t sue_ncrashes;
        time_t sue_expiry;
        time_t sue_suspended;
        LIST_ENTRY(pax_segvguard_uid_entry) sue_list;
};

struct pax_segvguard_entry {
        LIST_HEAD(, pax_segvguard_uid_entry) segv_uids;
};

static bool pax_segvguard_elf_flags_active(uint32_t);
#endif /* PAX_SEGVGUARD */

SYSCTL_SETUP(sysctl_security_pax_setup, "sysctl security.pax setup")
{
        const struct sysctlnode *rnode = NULL, *cnode;

        sysctl_createv(clog, 0, NULL, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "pax",
                       SYSCTL_DESCR("PaX (exploit mitigation) features."),
                       NULL, 0, NULL, 0,
                       CTL_SECURITY, CTL_CREATE, CTL_EOL);

        cnode = rnode;

#ifdef PAX_MPROTECT
        rnode = cnode;
        sysctl_createv(clog, 0, &rnode, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "mprotect",
                       SYSCTL_DESCR("mprotect(2) W^X restrictions."),
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "enabled",
                       SYSCTL_DESCR("Restrictions enabled."),
                       NULL, 0, &pax_mprotect_enabled, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "global",
                       SYSCTL_DESCR("When enabled, unless explicitly "
                                    "specified, apply restrictions to "
                                    "all processes."),
                       NULL, 0, &pax_mprotect_global, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "ptrace",
                       SYSCTL_DESCR("When enabled, allow ptrace(2) to "
                            "override mprotect permissions on traced "
                            "processes"),
                       NULL, 0, &pax_mprotect_ptrace, 0,
                       CTL_CREATE, CTL_EOL);
#ifdef PAX_MPROTECT_DEBUG
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "debug",
                       SYSCTL_DESCR("print mprotect changes."),
                       NULL, 0, &pax_mprotect_debug, 0,
                       CTL_CREATE, CTL_EOL);
#endif
#endif /* PAX_MPROTECT */

#ifdef PAX_SEGVGUARD
        rnode = cnode;
        sysctl_createv(clog, 0, &rnode, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "segvguard",
                       SYSCTL_DESCR("PaX segvguard."),
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "enabled",
                       SYSCTL_DESCR("segvguard enabled."),
                       NULL, 0, &pax_segvguard_enabled, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "global",
                       SYSCTL_DESCR("segvguard all programs."),
                       NULL, 0, &pax_segvguard_global, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "expiry_timeout",
                       SYSCTL_DESCR("Entry expiry timeout (in seconds)."),
                       NULL, 0, &pax_segvguard_expiry, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "suspend_timeout",
                       SYSCTL_DESCR("Entry suspension timeout (in seconds)."),
                       NULL, 0, &pax_segvguard_suspension, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "max_crashes",
                       SYSCTL_DESCR("Max number of crashes before expiry."),
                       NULL, 0, &pax_segvguard_maxcrashes, 0,
                       CTL_CREATE, CTL_EOL);
#endif /* PAX_SEGVGUARD */

#ifdef PAX_ASLR
        rnode = cnode;
        sysctl_createv(clog, 0, &rnode, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "aslr",
                       SYSCTL_DESCR("Address Space Layout Randomization."),
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "enabled",
                       SYSCTL_DESCR("Restrictions enabled."),
                       NULL, 0, &pax_aslr_enabled, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "global",
                       SYSCTL_DESCR("When enabled, unless explicitly "
                                    "specified, apply to all processes."),
                       NULL, 0, &pax_aslr_global, 0,
                       CTL_CREATE, CTL_EOL);
#ifdef PAX_ASLR_DEBUG
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "debug",
                       SYSCTL_DESCR("Print ASLR selected addresses."),
                       NULL, 0, &pax_aslr_debug, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "flags",
                       SYSCTL_DESCR("Disable/Enable select ASLR features."),
                       NULL, 0, &pax_aslr_flags, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "rand",
                       SYSCTL_DESCR("Use the given fixed random value"),
                       NULL, 0, &pax_aslr_rand, 0,
                       CTL_CREATE, CTL_EOL);
#endif
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "mmap_len",
                       SYSCTL_DESCR("Number of bits randomized for "
                                    "mmap(2) calls."),
                       NULL, PAX_ASLR_DELTA_MMAP_LEN, NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "stack_len",
                       SYSCTL_DESCR("Number of bits randomized for "
                                    "the stack."),
                       NULL, PAX_ASLR_DELTA_STACK_LEN, NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "exec_len",
                       SYSCTL_DESCR("Number of bits randomized for "
                                    "the PIE exec base."),
                       NULL, PAX_ASLR_DELTA_EXEC_LEN, NULL, 0,
                       CTL_CREATE, CTL_EOL);

#endif /* PAX_ASLR */
}

/*
 * Initialize PaX.
 */
void
pax_init(void)
{
#ifdef PAX_ASLR
        /* Adjust maximum stack by the size we can consume for ASLR */
        extern rlim_t maxsmap;
        maxsmap = MAXSSIZ - (MAXSSIZ / PAX_ASLR_MAX_STACK_WASTE);
        // XXX: compat32 is not handled.
#endif
}

void
pax_set_flags(struct exec_package *epp, struct proc *p)
{
        p->p_pax = epp->ep_pax_flags;

#ifdef PAX_MPROTECT
        if (pax_mprotect_ptrace == 0)
                return;
        /*
         * If we are running under the debugger, turn off MPROTECT so
          * the debugger can insert/delete breakpoints
         */
        if (p->p_slflag & PSL_TRACED)
                p->p_pax &= ~P_PAX_MPROTECT;
#endif
}

void
pax_setup_elf_flags(struct exec_package *epp, uint32_t elf_flags)
{
        uint32_t flags = 0;

#ifdef PAX_ASLR
        if (pax_aslr_elf_flags_active(elf_flags)) {
                flags |= P_PAX_ASLR;
        }
#endif
#ifdef PAX_MPROTECT
        if (pax_mprotect_elf_flags_active(elf_flags)) {
                flags |= P_PAX_MPROTECT;
        }
#endif
#ifdef PAX_SEGVGUARD
        if (pax_segvguard_elf_flags_active(elf_flags)) {
                flags |= P_PAX_GUARD;
        }
#endif

        epp->ep_pax_flags = flags;
}

#if defined(PAX_MPROTECT) || defined(PAX_SEGVGUARD) || defined(PAX_ASLR)
static inline bool
pax_flags_active(uint32_t flags, uint32_t opt)
{
        if (!(flags & opt))
                return false;
        return true;
}
#endif /* PAX_MPROTECT || PAX_SEGVGUARD || PAX_ASLR */

#ifdef PAX_MPROTECT
static bool
pax_mprotect_elf_flags_active(uint32_t flags)
{
        if (!pax_mprotect_enabled)
                return false;
        if (pax_mprotect_global && (flags & ELF_NOTE_PAX_NOMPROTECT) != 0) {
                /* Mprotect explicitly disabled */
                return false;
        }
        if (!pax_mprotect_global && (flags & ELF_NOTE_PAX_MPROTECT) == 0) {
                /* Mprotect not requested */
                return false;
        }
        return true;
}

vm_prot_t
pax_mprotect_maxprotect(
#ifdef PAX_MPROTECT_DEBUG
    const char *file, size_t line,
#endif
    struct lwp *l, vm_prot_t active, vm_prot_t extra, vm_prot_t maxprot)
{
        uint32_t flags;

        flags = l->l_proc->p_pax;
        if (!pax_flags_active(flags, P_PAX_MPROTECT))
                return maxprot;

        return (active|extra) & maxprot;
}

int
pax_mprotect_validate(
#ifdef PAX_MPROTECT_DEBUG
    const char *file, size_t line,
#endif
    struct lwp *l, vm_prot_t prot)
{
        uint32_t flags;

        flags = l->l_proc->p_pax;
        if (!pax_flags_active(flags, P_PAX_MPROTECT))
                return 0;

        if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
            (VM_PROT_WRITE|VM_PROT_EXECUTE)) {
#ifdef PAX_MPROTECT_DEBUG
                struct proc *p = l->l_proc;

                if (pax_mprotect_debug)
                        printf("%s: %s,%zu: %d.%d (%s): WX rejected\n",
                            __func__, file, line,
                            p->p_pid, l->l_lid, p->p_comm);
#endif
                return EACCES;
        }
        return 0;
}

/*
 * Bypass MPROTECT for traced processes
 */
int
pax_mprotect_prot(struct lwp *l)
{
        uint32_t flags;

        flags = l->l_proc->p_pax;
        if (!pax_flags_active(flags, P_PAX_MPROTECT))
                return 0;
        if (pax_mprotect_ptrace < 2)
                return 0;
        return UVM_EXTRACT_PROT_ALL;
}


#endif /* PAX_MPROTECT */

#ifdef PAX_ASLR
static bool
pax_aslr_elf_flags_active(uint32_t flags)
{
        if (!pax_aslr_enabled)
                return false;
        if (pax_aslr_global && (flags & ELF_NOTE_PAX_NOASLR) != 0) {
                /* ASLR explicitly disabled */
                return false;
        }
        if (!pax_aslr_global && (flags & ELF_NOTE_PAX_ASLR) == 0) {
                /* ASLR not requested */
                return false;
        }
        return true;
}

static bool
pax_aslr_epp_active(struct exec_package *epp)
{
        if (__predict_false((epp->ep_flags & (EXEC_32|EXEC_TOPDOWN_VM)) == 0))
                return false;
        return pax_flags_active(epp->ep_pax_flags, P_PAX_ASLR);
}

static bool
pax_aslr_active(struct lwp *l)
{
        return pax_flags_active(l->l_proc->p_pax, P_PAX_ASLR);
}

void
pax_aslr_init_vm(struct lwp *l, struct vmspace *vm, struct exec_package *ep)
{
        if (!pax_aslr_active(l))
                return;

        if (__predict_false((ep->ep_flags & (EXEC_32|EXEC_TOPDOWN_VM)) == 0))
                return;

#ifdef PAX_ASLR_DEBUG
        if (pax_aslr_flags & PAX_ASLR_MMAP)
                return;
#endif

        uint32_t len = (ep->ep_flags & EXEC_32) ?
            PAX_ASLR_DELTA_MMAP_LEN32 : PAX_ASLR_DELTA_MMAP_LEN;

        uint32_t rand = cprng_fast32();
#ifdef PAX_ASLR_DEBUG
        if (pax_aslr_flags & PAX_ASLR_FIXED)
                rand = pax_aslr_rand;
#endif
        vm->vm_aslr_delta_mmap = PAX_ASLR_DELTA(rand,
            PAX_ASLR_DELTA_MMAP_LSB, len);

        PAX_DPRINTF("delta_mmap=%#jx/%u",
            (uintmax_t)vm->vm_aslr_delta_mmap, len);
}

void
pax_aslr_mmap(struct lwp *l, vaddr_t *addr, vaddr_t orig_addr, int f)
{
        if (!pax_aslr_active(l))
                return;
#ifdef PAX_ASLR_DEBUG
        char buf[256];

        if (pax_aslr_flags & PAX_ASLR_MMAP)
                return;

        if (pax_aslr_debug)
                snprintb(buf, sizeof(buf), MAP_FMT, f);
        else
                buf[0] = '\0';
#endif

        if (!(f & MAP_FIXED) && ((orig_addr == 0) || !(f & MAP_ANON))) {
                PAX_DPRINTF("applying to %#jx orig_addr=%#jx f=%s",
                    (uintmax_t)*addr, (uintmax_t)orig_addr, buf);
                if (!(l->l_proc->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN))
                        *addr += l->l_proc->p_vmspace->vm_aslr_delta_mmap;
                else
                        *addr -= l->l_proc->p_vmspace->vm_aslr_delta_mmap;
                PAX_DPRINTF("result %#jx", (uintmax_t)*addr);
        } else {
                PAX_DPRINTF("not applying to %#jx orig_addr=%#jx f=%s",
                    (uintmax_t)*addr, (uintmax_t)orig_addr, buf);
        }
}

static vaddr_t
pax_aslr_offset(vaddr_t align)
{
        size_t pax_align, l2, delta;
        uint32_t rand;
        vaddr_t offset;

        pax_align = align == 0 ? PAGE_SIZE : align;
        l2 = ilog2(pax_align);

        rand = cprng_fast32();
#ifdef PAX_ASLR_DEBUG
        if (pax_aslr_flags & PAX_ASLR_FIXED)
                rand = pax_aslr_rand;
#endif

#define        PAX_TRUNC(a, b)        ((a) & ~((b) - 1))

        delta = PAX_ASLR_DELTA(rand, l2, PAX_ASLR_DELTA_EXEC_LEN);
        offset = PAX_TRUNC(delta, pax_align);
        offset = MAX(offset, pax_align);

        PAX_DPRINTF("rand=%#x l2=%#zx pax_align=%#zx delta=%#zx offset=%#jx",
            rand, l2, pax_align, delta, (uintmax_t)offset);

        return offset;
}

vaddr_t
pax_aslr_exec_offset(struct exec_package *epp, vaddr_t align)
{
        if (!pax_aslr_epp_active(epp))
                goto out;

#ifdef PAX_ASLR_DEBUG
        if (pax_aslr_flags & PAX_ASLR_EXEC_OFFSET)
                goto out;
#endif
        return pax_aslr_offset(align);
out:
        return MAX(align, PAGE_SIZE);
}

voff_t
pax_aslr_rtld_offset(struct exec_package *epp, vaddr_t align, int use_topdown)
{
        voff_t offset;

        if (!pax_aslr_epp_active(epp))
                return 0;

#ifdef PAX_ASLR_DEBUG
        if (pax_aslr_flags & PAX_ASLR_RTLD_OFFSET)
                return 0;
#endif
        offset = pax_aslr_offset(align);
        if (use_topdown)
                offset = -offset;

        return offset;
}

void
pax_aslr_stack(struct exec_package *epp, vsize_t *max_stack_size)
{
        if (!pax_aslr_epp_active(epp))
                return;
#ifdef PAX_ASLR_DEBUG
        if (pax_aslr_flags & PAX_ASLR_STACK)
                return;
#endif

        uint32_t len = (epp->ep_flags & EXEC_32) ?
            PAX_ASLR_DELTA_STACK_LEN32 : PAX_ASLR_DELTA_STACK_LEN;
        uint32_t rand = cprng_fast32();
#ifdef PAX_ASLR_DEBUG
        if (pax_aslr_flags & PAX_ASLR_FIXED)
                rand = pax_aslr_rand;
#endif
        u_long d = PAX_ASLR_DELTA(rand, PAX_ASLR_DELTA_STACK_LSB, len);
        d &= (*max_stack_size / PAX_ASLR_MAX_STACK_WASTE) - 1;
         u_long newminsaddr = (u_long)STACK_GROW(epp->ep_minsaddr, d);
        PAX_DPRINTF("old minsaddr=%#jx delta=%#lx new minsaddr=%#lx",
            (uintmax_t)epp->ep_minsaddr, d, newminsaddr);
        epp->ep_minsaddr = (vaddr_t)newminsaddr;
        *max_stack_size -= d;
}

uint32_t
pax_aslr_stack_gap(struct exec_package *epp)
{
        if (!pax_aslr_epp_active(epp))
                return 0;

#ifdef PAX_ASLR_DEBUG
        if (pax_aslr_flags & PAX_ASLR_STACK_GAP)
                return 0;
#endif

        uint32_t rand = cprng_fast32();
#ifdef PAX_ASLR_DEBUG
        if (pax_aslr_flags & PAX_ASLR_FIXED)
                rand = pax_aslr_rand;
#endif
        rand %= PAGE_SIZE;
        PAX_DPRINTF("stack gap=%#x\n", rand);
        return rand;
}
#endif /* PAX_ASLR */

#ifdef PAX_SEGVGUARD
static bool
pax_segvguard_elf_flags_active(uint32_t flags)
{
        if (!pax_segvguard_enabled)
                return false;
        if (pax_segvguard_global && (flags & ELF_NOTE_PAX_NOGUARD) != 0) {
                /* Segvguard explicitly disabled */
                return false;
        }
        if (!pax_segvguard_global && (flags & ELF_NOTE_PAX_GUARD) == 0) {
                /* Segvguard not requested */
                return false;
        }
        return true;
}

void
pax_segvguard_cleanup(struct vnode *vp)
{
        struct pax_segvguard_entry *p = vp->v_segvguard;
        struct pax_segvguard_uid_entry *up;

        if (__predict_true(p == NULL)) {
                return;
        }
        while ((up = LIST_FIRST(&p->segv_uids)) != NULL) {
                LIST_REMOVE(up, sue_list);
                kmem_free(up, sizeof(*up));
        }
        kmem_free(p, sizeof(*p));
        vp->v_segvguard = NULL;
}

/*
 * Called when a process of image vp generated a segfault.
 *
 * => exec_lock must be held by the caller
 * => if "crashed" is true, exec_lock must be held for write
 */
int
pax_segvguard(struct lwp *l, struct vnode *vp, const char *name, bool crashed)
{
        struct pax_segvguard_entry *p;
        struct pax_segvguard_uid_entry *up;
        struct timeval tv;
        uid_t uid;
        uint32_t flags;
        bool have_uid;

        KASSERT(rw_lock_held(&exec_lock));
        KASSERT(!crashed || rw_write_held(&exec_lock));

        flags = l->l_proc->p_pax;
        if (!pax_flags_active(flags, P_PAX_GUARD))
                return 0;

        if (vp == NULL)
                return EFAULT;        

        /* Fast-path if starting a program we don't know. */
        if ((p = vp->v_segvguard) == NULL && !crashed)
                return 0;

        microtime(&tv);

        /*
         * If a program we don't know crashed, we need to create a new entry
         * for it.
         */
        if (p == NULL) {
                p = kmem_alloc(sizeof(*p), KM_SLEEP);
                vp->v_segvguard = p;
                LIST_INIT(&p->segv_uids);

                /*
                 * Initialize a new entry with "crashes so far" of 1.
                 * The expiry time is when we purge the entry if it didn't
                 * reach the limit.
                 */
                up = kmem_alloc(sizeof(*up), KM_SLEEP);
                up->sue_uid = kauth_cred_getuid(l->l_cred);
                up->sue_ncrashes = 1;
                up->sue_expiry = tv.tv_sec + pax_segvguard_expiry;
                up->sue_suspended = 0;
                LIST_INSERT_HEAD(&p->segv_uids, up, sue_list);
                return 0;
        }

        /*
         * A program we "know" either executed or crashed again.
         * See if it's a culprit we're familiar with.
         */
        uid = kauth_cred_getuid(l->l_cred);
        have_uid = false;
        LIST_FOREACH(up, &p->segv_uids, sue_list) {
                if (up->sue_uid == uid) {
                        have_uid = true;
                        break;
                }
        }

        /*
         * It's someone else. Add an entry for him if we crashed.
         */
        if (!have_uid) {
                if (crashed) {
                        up = kmem_alloc(sizeof(*up), KM_SLEEP);
                        up->sue_uid = uid;
                        up->sue_ncrashes = 1;
                        up->sue_expiry = tv.tv_sec + pax_segvguard_expiry;
                        up->sue_suspended = 0;
                        LIST_INSERT_HEAD(&p->segv_uids, up, sue_list);
                }
                return 0;
        }

        if (crashed) {
                /* Check if timer on previous crashes expired first. */
                if (up->sue_expiry < tv.tv_sec) {
                        log(LOG_INFO, "PaX Segvguard: [%s] Suspension"
                            " expired.\n", name ? name : "unknown");
                        up->sue_ncrashes = 1;
                        up->sue_expiry = tv.tv_sec + pax_segvguard_expiry;
                        up->sue_suspended = 0;
                        return 0;
                }

                up->sue_ncrashes++;

                if (up->sue_ncrashes >= pax_segvguard_maxcrashes) {
                        log(LOG_ALERT, "PaX Segvguard: [%s] Suspending "
                            "execution for %d seconds after %zu crashes.\n",
                            name ? name : "unknown", pax_segvguard_suspension,
                            up->sue_ncrashes);

                        /* Suspend this program for a while. */
                        up->sue_suspended = tv.tv_sec + pax_segvguard_suspension;
                        up->sue_ncrashes = 0;
                        up->sue_expiry = 0;
                }
        } else {
                /* Are we supposed to be suspended? */
                if (up->sue_suspended > tv.tv_sec) {
                        log(LOG_ALERT, "PaX Segvguard: [%s] Preventing "
                            "execution due to repeated segfaults.\n", name ?
                            name : "unknown");
                        return EPERM;
                }
        }

        return 0;
}
#endif /* PAX_SEGVGUARD */

























































































































































    6 






    6 










    6 
    5 

    6 

























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
/*        $NetBSD: sys_process.c,v 1.180 2020/05/26 00:50:53 kamil Exp $        */

/*-
 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        from: @(#)sys_process.c        8.1 (Berkeley) 6/10/93
 */

/*-
 * Copyright (c) 1993 Jan-Simon Pendry.
 * Copyright (c) 1994 Christopher G. Demetriou.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        from: @(#)sys_process.c        8.1 (Berkeley) 6/10/93
 */

/*
 * References:
 *        (1) Bach's "The Design of the UNIX Operating System",
 *        (2) sys/miscfs/procfs from UCB's 4.4BSD-Lite distribution,
 *        (3) the "4.4BSD Programmer's Reference Manual" published
 *                by USENIX and O'Reilly & Associates.
 * The 4.4BSD PRM does a reasonably good job of documenting what the various
 * ptrace() requests should actually do, and its text is quoted several times
 * in this file.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_process.c,v 1.180 2020/05/26 00:50:53 kamil Exp $");

#ifdef _KERNEL_OPT
#include "opt_ptrace.h"
#include "opt_ktrace.h"
#include "opt_pax.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/exec.h>
#include <sys/pax.h>
#include <sys/ptrace.h>
#include <sys/uio.h>
#include <sys/ras.h>
#include <sys/kmem.h>
#include <sys/kauth.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>

#include <uvm/uvm_extern.h>

#include <machine/reg.h>

#if defined(KTRACE) || defined(PTRACE_HOOKS)
int
process_domem(struct lwp *curl /*tracer*/,
    struct lwp *l /*traced*/,
    struct uio *uio)
{
        struct proc *p = l->l_proc;        /* traced */
        struct vmspace *vm;
        int error;

        size_t len;

        error = 0;
        len = uio->uio_resid;

        if (len == 0)
                return 0;

#ifdef PMAP_NEED_PROCWR
        vaddr_t        addr = uio->uio_offset;
#endif

        vm = p->p_vmspace;

        if ((l->l_flag & LW_WEXIT) || vm->vm_refcnt < 1)
                error = EFAULT;
        if (error == 0)
                uvmspace_addref(p->p_vmspace);
        if (error != 0)
                return error;
        error = uvm_io(&vm->vm_map, uio, pax_mprotect_prot(l));

#ifdef PMAP_NEED_PROCWR
        if (error == 0 && uio->uio_rw == UIO_WRITE)
                pmap_procwr(p, addr, len);
#endif
        uvmspace_free(vm);

        return error;
}
#endif        /* KTRACE || PTRACE_HOOKS */

/*
 * Dummy routine so that ptrace_common module will fail to load if this
 * routine is not defined.
 */
#if defined(PTRACE_HOOKS)
void
ptrace_hooks(void)
{

}
#endif


































































    2 
    2 



































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
/*        $NetBSD: rf_nwayxor.c,v 1.11 2006/11/16 01:33:23 christos Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Mark Holland, Daniel Stodolsky
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/************************************************************
 *
 * nwayxor.c -- code to do N-way xors for reconstruction
 *
 * nWayXorN xors N input buffers into the destination buffer.
 * adapted from danner's longword_bxor code.
 *
 ************************************************************/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_nwayxor.c,v 1.11 2006/11/16 01:33:23 christos Exp $");

#include "rf_nwayxor.h"
#include "rf_shutdown.h"

static int callcount[10];
static void rf_ShutdownNWayXor(void *);

static void
rf_ShutdownNWayXor(void *ignored)
{
        int     i;

        if (rf_showXorCallCounts == 0)
                return;
        printf("Call counts for n-way xor routines:  ");
        for (i = 0; i < 10; i++)
                printf("%d ", callcount[i]);
        printf("\n");
}

int
rf_ConfigureNWayXor(RF_ShutdownList_t **listp)
{
        int     i;

        for (i = 0; i < 10; i++)
                callcount[i] = 0;
        rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL);
        return (0);
}

void
rf_nWayXor1(RF_ReconBuffer_t **src_rbs,        RF_ReconBuffer_t *dest_rb, int len)
{
        unsigned long *src = (unsigned long *) src_rbs[0]->buffer;
        unsigned long *dest = (unsigned long *) dest_rb->buffer;
        unsigned long *end = src + len;
        unsigned long d0, d1, d2, d3, s0, s1, s2, s3;

        callcount[1]++;
        while (len >= 4) {
                d0 = dest[0];
                d1 = dest[1];
                d2 = dest[2];
                d3 = dest[3];
                s0 = src[0];
                s1 = src[1];
                s2 = src[2];
                s3 = src[3];
                dest[0] = d0 ^ s0;
                dest[1] = d1 ^ s1;
                dest[2] = d2 ^ s2;
                dest[3] = d3 ^ s3;
                src += 4;
                dest += 4;
                len -= 4;
        }
        while (src < end) {
                *dest++ ^= *src++;
        }
}

void
rf_nWayXor2(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
{
        unsigned long *dst = (unsigned long *) dest_rb->buffer;
        unsigned long *a = dst;
        unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
        unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
        unsigned long a0, a1, a2, a3, b0, b1, b2, b3;

        callcount[2]++;
        /* align dest to cache line */
        while ((((unsigned long) dst) & 0x1f)) {
                *dst++ = *a++ ^ *b++ ^ *c++;
                len--;
        }
        while (len > 4) {
                a0 = a[0];
                len -= 4;

                a1 = a[1];
                a2 = a[2];

                a3 = a[3];
                a += 4;

                b0 = b[0];
                b1 = b[1];

                b2 = b[2];
                b3 = b[3];
                /* start dual issue */
                a0 ^= b0;
                b0 = c[0];

                b += 4;
                a1 ^= b1;

                a2 ^= b2;
                a3 ^= b3;

                b1 = c[1];
                a0 ^= b0;

                b2 = c[2];
                a1 ^= b1;

                b3 = c[3];
                a2 ^= b2;

                dst[0] = a0;
                a3 ^= b3;
                dst[1] = a1;
                c += 4;
                dst[2] = a2;
                dst[3] = a3;
                dst += 4;
        }
        while (len) {
                *dst++ = *a++ ^ *b++ ^ *c++;
                len--;
        }
}
/* note that first arg is not incremented but 2nd arg is */
#define LOAD_FIRST(_dst,_b) \
  a0 = _dst[0]; len -= 4;   \
  a1 = _dst[1];             \
  a2 = _dst[2];             \
  a3 = _dst[3];             \
  b0 = _b[0];               \
  b1 = _b[1];               \
  b2 = _b[2];               \
  b3 = _b[3];  _b += 4;

/* note: arg is incremented */
#define XOR_AND_LOAD_NEXT(_n) \
  a0 ^= b0; b0 = _n[0];       \
  a1 ^= b1; b1 = _n[1];       \
  a2 ^= b2; b2 = _n[2];       \
  a3 ^= b3; b3 = _n[3];       \
  _n += 4;

/* arg is incremented */
#define XOR_AND_STORE(_dst)       \
  a0 ^= b0; _dst[0] = a0;         \
  a1 ^= b1; _dst[1] = a1;         \
  a2 ^= b2; _dst[2] = a2;         \
  a3 ^= b3; _dst[3] = a3;         \
  _dst += 4;


void
rf_nWayXor3(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
{
        unsigned long *dst = (unsigned long *) dest_rb->buffer;
        unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
        unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
        unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
        unsigned long a0, a1, a2, a3, b0, b1, b2, b3;

        callcount[3]++;
        /* align dest to cache line */
        while ((((unsigned long) dst) & 0x1f)) {
                *dst++ ^= *b++ ^ *c++ ^ *d++;
                len--;
        }
        while (len > 4) {
                LOAD_FIRST(dst, b);
                XOR_AND_LOAD_NEXT(c);
                XOR_AND_LOAD_NEXT(d);
                XOR_AND_STORE(dst);
        }
        while (len) {
                *dst++ ^= *b++ ^ *c++ ^ *d++;
                len--;
        }
}

void
rf_nWayXor4(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
{
        unsigned long *dst = (unsigned long *) dest_rb->buffer;
        unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
        unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
        unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
        unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
        unsigned long a0, a1, a2, a3, b0, b1, b2, b3;

        callcount[4]++;
        /* align dest to cache line */
        while ((((unsigned long) dst) & 0x1f)) {
                *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
                len--;
        }
        while (len > 4) {
                LOAD_FIRST(dst, b);
                XOR_AND_LOAD_NEXT(c);
                XOR_AND_LOAD_NEXT(d);
                XOR_AND_LOAD_NEXT(e);
                XOR_AND_STORE(dst);
        }
        while (len) {
                *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
                len--;
        }
}

void
rf_nWayXor5(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
{
        unsigned long *dst = (unsigned long *) dest_rb->buffer;
        unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
        unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
        unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
        unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
        unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
        unsigned long a0, a1, a2, a3, b0, b1, b2, b3;

        callcount[5]++;
        /* align dest to cache line */
        while ((((unsigned long) dst) & 0x1f)) {
                *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
                len--;
        }
        while (len > 4) {
                LOAD_FIRST(dst, b);
                XOR_AND_LOAD_NEXT(c);
                XOR_AND_LOAD_NEXT(d);
                XOR_AND_LOAD_NEXT(e);
                XOR_AND_LOAD_NEXT(f);
                XOR_AND_STORE(dst);
        }
        while (len) {
                *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
                len--;
        }
}

void
rf_nWayXor6(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
{
        unsigned long *dst = (unsigned long *) dest_rb->buffer;
        unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
        unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
        unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
        unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
        unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
        unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
        unsigned long a0, a1, a2, a3, b0, b1, b2, b3;

        callcount[6]++;
        /* align dest to cache line */
        while ((((unsigned long) dst) & 0x1f)) {
                *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
                len--;
        }
        while (len > 4) {
                LOAD_FIRST(dst, b);
                XOR_AND_LOAD_NEXT(c);
                XOR_AND_LOAD_NEXT(d);
                XOR_AND_LOAD_NEXT(e);
                XOR_AND_LOAD_NEXT(f);
                XOR_AND_LOAD_NEXT(g);
                XOR_AND_STORE(dst);
        }
        while (len) {
                *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
                len--;
        }
}

void
rf_nWayXor7(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
{
        unsigned long *dst = (unsigned long *) dest_rb->buffer;
        unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
        unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
        unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
        unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
        unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
        unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
        unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
        unsigned long a0, a1, a2, a3, b0, b1, b2, b3;

        callcount[7]++;
        /* align dest to cache line */
        while ((((unsigned long) dst) & 0x1f)) {
                *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
                len--;
        }
        while (len > 4) {
                LOAD_FIRST(dst, b);
                XOR_AND_LOAD_NEXT(c);
                XOR_AND_LOAD_NEXT(d);
                XOR_AND_LOAD_NEXT(e);
                XOR_AND_LOAD_NEXT(f);
                XOR_AND_LOAD_NEXT(g);
                XOR_AND_LOAD_NEXT(h);
                XOR_AND_STORE(dst);
        }
        while (len) {
                *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
                len--;
        }
}

void
rf_nWayXor8(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
{
        unsigned long *dst = (unsigned long *) dest_rb->buffer;
        unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
        unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
        unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
        unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
        unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
        unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
        unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
        unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
        unsigned long a0, a1, a2, a3, b0, b1, b2, b3;

        callcount[8]++;
        /* align dest to cache line */
        while ((((unsigned long) dst) & 0x1f)) {
                *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
                len--;
        }
        while (len > 4) {
                LOAD_FIRST(dst, b);
                XOR_AND_LOAD_NEXT(c);
                XOR_AND_LOAD_NEXT(d);
                XOR_AND_LOAD_NEXT(e);
                XOR_AND_LOAD_NEXT(f);
                XOR_AND_LOAD_NEXT(g);
                XOR_AND_LOAD_NEXT(h);
                XOR_AND_LOAD_NEXT(i);
                XOR_AND_STORE(dst);
        }
        while (len) {
                *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
                len--;
        }
}


void
rf_nWayXor9(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len)
{
        unsigned long *dst = (unsigned long *) dest_rb->buffer;
        unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
        unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
        unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
        unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
        unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
        unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
        unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
        unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
        unsigned long *j = (unsigned long *) src_rbs[8]->buffer;
        unsigned long a0, a1, a2, a3, b0, b1, b2, b3;

        callcount[9]++;
        /* align dest to cache line */
        while ((((unsigned long) dst) & 0x1f)) {
                *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
                len--;
        }
        while (len > 4) {
                LOAD_FIRST(dst, b);
                XOR_AND_LOAD_NEXT(c);
                XOR_AND_LOAD_NEXT(d);
                XOR_AND_LOAD_NEXT(e);
                XOR_AND_LOAD_NEXT(f);
                XOR_AND_LOAD_NEXT(g);
                XOR_AND_LOAD_NEXT(h);
                XOR_AND_LOAD_NEXT(i);
                XOR_AND_LOAD_NEXT(j);
                XOR_AND_STORE(dst);
        }
        while (len) {
                *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
                len--;
        }
}
















































































































   23 






































   20 



   20 
   20 































   20 































































    2 


    2 







    2 
    2 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
/*        $NetBSD: ptyfs_subr.c,v 1.34 2020/11/27 14:43:57 christos Exp $        */

/*
 * Copyright (c) 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ptyfs_subr.c        8.6 (Berkeley) 5/14/95
 */

/*
 * Copyright (c) 1994 Christopher G. Demetriou.  All rights reserved.
 * Copyright (c) 1993 Jan-Simon Pendry
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)procfs_subr.c        8.6 (Berkeley) 5/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ptyfs_subr.c,v 1.34 2020/11/27 14:43:57 christos Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/stat.h>
#include <sys/malloc.h>
#include <sys/file.h>
#include <sys/namei.h>
#include <sys/filedesc.h>
#include <sys/select.h>
#include <sys/tty.h>
#include <sys/pty.h>
#include <sys/kauth.h>
#include <sys/lwp.h>

#include <fs/ptyfs/ptyfs.h>

static kmutex_t ptyfs_hashlock;

static SLIST_HEAD(ptyfs_hashhead, ptyfsnode) *ptyfs_node_tbl;
static u_long ptyfs_node_mask; /* size of hash table - 1 */

/*
 * allocate a ptyfsnode/vnode pair.  the vnode is referenced.
 *
 * the pty, ptyfs_type, and mount point uniquely
 * identify a ptyfsnode.  the mount point is needed
 * because someone might mount this filesystem
 * twice.
 */
int
ptyfs_allocvp(struct mount *mp, struct vnode **vpp, ptyfstype type, int pty)
{
        struct ptyfskey key;

        memset(&key, 0, sizeof(key));
        key.ptk_pty = pty;
        key.ptk_type = type;
        return vcache_get(mp, &key, sizeof(key), vpp);
}

/*
 * Initialize ptyfsnode hash table.
 */
void
ptyfs_hashinit(void)
{

        ptyfs_node_tbl = hashinit(16, HASH_SLIST, true, &ptyfs_node_mask);
        mutex_init(&ptyfs_hashlock, MUTEX_DEFAULT, IPL_NONE);
}

/*
 * Free ptyfsnode hash table.
 */
void
ptyfs_hashdone(void)
{
        
        mutex_destroy(&ptyfs_hashlock);
        hashdone(ptyfs_node_tbl, HASH_SLIST, ptyfs_node_mask);
}

/*
 * Get a ptyfsnode from the hash table, or allocate one.
 */
struct ptyfsnode *
ptyfs_get_node(ptyfstype type, int pty)
{
        struct ptyfs_hashhead *ppp;
        struct ptyfsnode *pp;

        ppp = &ptyfs_node_tbl[PTYFS_FILENO(type, pty) & ptyfs_node_mask];

        mutex_enter(&ptyfs_hashlock);
        SLIST_FOREACH(pp, ppp, ptyfs_hash) {
                if (pty == pp->ptyfs_pty && pp->ptyfs_type == type) {
                        mutex_exit(&ptyfs_hashlock);
                        return pp;
                }
        }
        mutex_exit(&ptyfs_hashlock);

        pp = malloc(sizeof(struct ptyfsnode), M_TEMP, M_WAITOK);
        pp->ptyfs_pty = pty;
        pp->ptyfs_type = type;
        pp->ptyfs_fileno = PTYFS_FILENO(type, pty);
        if (pp->ptyfs_type == PTYFSroot)
                pp->ptyfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|
                    S_IROTH|S_IXOTH;
        else
                pp->ptyfs_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|
                    S_IROTH|S_IWOTH;

        pp->ptyfs_uid = pp->ptyfs_gid = 0;
        pp->ptyfs_status = PTYFS_CHANGE;
        PTYFS_ITIMES(pp, NULL, NULL, NULL);
        pp->ptyfs_birthtime = pp->ptyfs_mtime =
            pp->ptyfs_atime = pp->ptyfs_ctime;
        pp->ptyfs_flags = 0;
        mutex_enter(&ptyfs_hashlock);
        /*
         * XXX We have minimum race condition when opening master side
         * first time, if other threads through other mount points, trying
         * opening the same device. As follow we have little chance have
         * unused list entries.
         */
        SLIST_INSERT_HEAD(ppp, pp, ptyfs_hash);
        mutex_exit(&ptyfs_hashlock);
        return pp;
}

/*
 * Mark this controlling pty as active.
 */
void
ptyfs_set_active(struct mount *mp, int pty)
{
        struct ptyfsmount *pmnt = VFSTOPTY(mp);

        KASSERT(pty >= 0);
        /* Reallocate map if needed. */
        if (pty >= pmnt->pmnt_bitmap_size * NBBY) {
                int osize, nsize;
                uint8_t *obitmap, *nbitmap;

                nsize = roundup(howmany(pty + 1, NBBY), 64);
                nbitmap = kmem_alloc(nsize, KM_SLEEP);
                mutex_enter(&pmnt->pmnt_lock);
                if (pty < pmnt->pmnt_bitmap_size * NBBY) {
                        mutex_exit(&pmnt->pmnt_lock);
                        kmem_free(nbitmap, nsize);
                } else {
                        osize = pmnt->pmnt_bitmap_size;
                        obitmap = pmnt->pmnt_bitmap;
                        pmnt->pmnt_bitmap_size = nsize;
                        pmnt->pmnt_bitmap = nbitmap;
                        if (osize > 0)
                                memcpy(pmnt->pmnt_bitmap, obitmap, osize);
                        memset(pmnt->pmnt_bitmap + osize, 0, nsize - osize);
                        mutex_exit(&pmnt->pmnt_lock);
                        if (osize > 0)
                                kmem_free(obitmap, osize);
                }
        }

        mutex_enter(&pmnt->pmnt_lock);
        setbit(pmnt->pmnt_bitmap, pty);
        mutex_exit(&pmnt->pmnt_lock);
}

/*
 * Mark this controlling pty as inactive.
 */
void
ptyfs_clr_active(struct mount *mp, int pty)
{
        struct ptyfsmount *pmnt = VFSTOPTY(mp);

        KASSERT(pty >= 0);
        mutex_enter(&pmnt->pmnt_lock);
        if (pty >= 0 && pty < pmnt->pmnt_bitmap_size * NBBY)
                clrbit(pmnt->pmnt_bitmap, pty);
        mutex_exit(&pmnt->pmnt_lock);
}

/*
 * Lookup the next active controlling pty greater or equal "pty".
 * Return -1 if not found.
 */
int
ptyfs_next_active(struct mount *mp, int pty)
{
        struct ptyfsmount *pmnt = VFSTOPTY(mp);

        KASSERT(pty >= 0);
        mutex_enter(&pmnt->pmnt_lock);
        while (pty < pmnt->pmnt_bitmap_size * NBBY) {
                if (isset(pmnt->pmnt_bitmap, pty)) {
                        mutex_exit(&pmnt->pmnt_lock);
                        return pty;
                }
                pty++;
        }
        mutex_exit(&pmnt->pmnt_lock);
        return -1;
}


























































































































































































































    3 



    2 








































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
/*        $NetBSD: utoppy.c,v 1.36 2022/03/03 06:05:38 riastradh Exp $        */

/*-
 * Copyright (c) 2006 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Steve C. Woodford.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: utoppy.c,v 1.36 2022/03/03 06:05:38 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/fcntl.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/uio.h>
#include <sys/conf.h>
#include <sys/vnode.h>
#include <sys/bus.h>

#include <lib/libkern/crc16.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usb_quirks.h>
#include <dev/usb/utoppy.h>

#include "ioconf.h"

#undef UTOPPY_DEBUG
#ifdef UTOPPY_DEBUG
#define        UTOPPY_DBG_OPEN                0x0001
#define        UTOPPY_DBG_CLOSE        0x0002
#define        UTOPPY_DBG_READ                0x0004
#define        UTOPPY_DBG_WRITE        0x0008
#define        UTOPPY_DBG_IOCTL        0x0010
#define        UTOPPY_DBG_SEND_PACKET        0x0020
#define        UTOPPY_DBG_RECV_PACKET        0x0040
#define        UTOPPY_DBG_ADDPATH        0x0080
#define        UTOPPY_DBG_READDIR        0x0100
#define        UTOPPY_DBG_DUMP                0x0200
#define        DPRINTF(l, m)                                \
                do {                                \
                        if (utoppy_debug & l)        \
                                printf m;        \
                } while (/*CONSTCOND*/0)
static int utoppy_debug = 0;
static void utoppy_dump_packet(const void *, size_t);
#define        DDUMP_PACKET(p, l)                                        \
                do {                                                \
                        if (utoppy_debug & UTOPPY_DBG_DUMP)        \
                                utoppy_dump_packet((p), (l));        \
                } while (/*CONSTCOND*/0)
#else
#define        DPRINTF(l, m)                /* nothing */
#define        DDUMP_PACKET(p, l)        /* nothing */
#endif


#define        UTOPPY_CONFIG_NO        1
#define        UTOPPY_NUMENDPOINTS        2

#define        UTOPPY_BSIZE                0xffff
#define        UTOPPY_FRAG_SIZE        0x1000
#define        UTOPPY_HEADER_SIZE        8
#define        UTOPPY_SHORT_TIMEOUT        (500)                /* 0.5 seconds */
#define        UTOPPY_LONG_TIMEOUT        (10 * 1000)        /* 10 seconds */

/* Protocol Commands and Responses */
#define        UTOPPY_RESP_ERROR                0x0001
#define        UTOPPY_CMD_ACK                        0x0002
#define         UTOPPY_RESP_SUCCESS                UTOPPY_CMD_ACK
#define        UTOPPY_CMD_CANCEL                0x0003
#define        UTOPPY_CMD_READY                0x0100
#define        UTOPPY_CMD_RESET                0x0101
#define        UTOPPY_CMD_TURBO                0x0102
#define        UTOPPY_CMD_STATS                0x1000
#define  UTOPPY_RESP_STATS_DATA                0x1001
#define        UTOPPY_CMD_READDIR                0x1002
#define         UTOPPY_RESP_READDIR_DATA        0x1003
#define         UTOPPY_RESP_READDIR_END        0x1004
#define        UTOPPY_CMD_DELETE                0x1005
#define        UTOPPY_CMD_RENAME                0x1006
#define        UTOPPY_CMD_MKDIR                0x1007
#define        UTOPPY_CMD_FILE                        0x1008
#define  UTOPPY_FILE_WRITE                0
#define  UTOPPY_FILE_READ                1
#define         UTOPPY_RESP_FILE_HEADER        0x1009
#define         UTOPPY_RESP_FILE_DATA                0x100a
#define         UTOPPY_RESP_FILE_END                0x100b

enum utoppy_state {
        UTOPPY_STATE_CLOSED,
        UTOPPY_STATE_OPENING,
        UTOPPY_STATE_IDLE,
        UTOPPY_STATE_READDIR,
        UTOPPY_STATE_READFILE,
        UTOPPY_STATE_WRITEFILE
};

struct utoppy_softc {
        device_t sc_dev;
        struct usbd_device *sc_udev;        /* device */
        struct usbd_interface *sc_iface;        /* interface */
        int sc_dying;
        int sc_refcnt;

        enum utoppy_state sc_state;
        u_int sc_turbo_mode;

        int sc_out;
        struct usbd_pipe *sc_out_pipe;        /* bulk out pipe */
        struct usbd_xfer *sc_out_xfer;
        void *sc_out_buf;
        void *sc_out_data;
        uint64_t sc_wr_offset;
        uint64_t sc_wr_size;

        int sc_in;
        struct usbd_pipe *sc_in_pipe;        /* bulk in pipe */
        struct usbd_xfer *sc_in_xfer;
        void *sc_in_buf;
        void *sc_in_data;
        size_t sc_in_len;
        u_int sc_in_offset;
};

struct utoppy_header {
        uint16_t h_len;
        uint16_t h_crc;
        uint16_t h_cmd2;
        uint16_t h_cmd;
        uint8_t h_data[0];
};
#define        UTOPPY_OUT_INIT(sc)                                        \
        do {                                                        \
                struct utoppy_header *_h = sc->sc_out_data;        \
                _h->h_len = 0;                                        \
        } while (/*CONSTCOND*/0)

#define        UTOPPY_MJD_1970 40587u        /* MJD value for Jan 1 00:00:00 1970 */

#define        UTOPPY_FTYPE_DIR        1
#define        UTOPPY_FTYPE_FILE        2

#define        UTOPPY_IN_DATA(sc)        \
 ((void*)&(((uint8_t*)(sc)->sc_in_data)[(sc)->sc_in_offset+UTOPPY_HEADER_SIZE]))

static dev_type_open(utoppyopen);
static dev_type_close(utoppyclose);
static dev_type_read(utoppyread);
static dev_type_write(utoppywrite);
static dev_type_ioctl(utoppyioctl);

const struct cdevsw utoppy_cdevsw = {
        .d_open = utoppyopen,
        .d_close = utoppyclose,
        .d_read = utoppyread,
        .d_write = utoppywrite,
        .d_ioctl = utoppyioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

#define        UTOPPYUNIT(n)        (minor(n))

static int        utoppy_match(device_t, cfdata_t, void *);
static void        utoppy_attach(device_t, device_t, void *);
static int        utoppy_detach(device_t, int);
static int        utoppy_activate(device_t, enum devact);

CFATTACH_DECL_NEW(utoppy, sizeof(struct utoppy_softc), utoppy_match,
    utoppy_attach, utoppy_detach, utoppy_activate);

static int
utoppy_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        if (uaa->uaa_vendor == USB_VENDOR_TOPFIELD &&
            uaa->uaa_product == USB_PRODUCT_TOPFIELD_TF5000PVR)
                return UMATCH_VENDOR_PRODUCT;

        return UMATCH_NONE;
}

static void
utoppy_attach(device_t parent, device_t self, void *aux)
{
        struct utoppy_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        struct usbd_interface *iface;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        uint8_t epcount;
        int i;

        sc->sc_dev = self;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        sc->sc_dying = 0;
        sc->sc_refcnt = 0;
        sc->sc_udev = dev;

        if (usbd_set_config_index(dev, 0, 1)
            || usbd_device2interface_handle(dev, 0, &iface)) {
                aprint_error_dev(self, "Configuration failed\n");
                return;
        }

        epcount = 0;
        (void) usbd_endpoint_count(iface, &epcount);
        if (epcount != UTOPPY_NUMENDPOINTS) {
                aprint_error_dev(self, "Expected %d endpoints, got %d\n",
                    UTOPPY_NUMENDPOINTS, epcount);
                return;
        }

        sc->sc_in = -1;
        sc->sc_out = -1;

        for (i = 0; i < epcount; i++) {
                ed = usbd_interface2endpoint_descriptor(iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self, "couldn't get ep %d\n", i);
                        return;
                }

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        sc->sc_in = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        sc->sc_out = ed->bEndpointAddress;
                }
        }

        if (sc->sc_out == -1 || sc->sc_in == -1) {
                aprint_error_dev(self,
                    "could not find bulk in/out endpoints\n");
                sc->sc_dying = 1;
                return;
        }

        sc->sc_iface = iface;
        sc->sc_udev = dev;

        sc->sc_out_pipe = NULL;
        sc->sc_in_pipe = NULL;

        if (usbd_open_pipe(sc->sc_iface, sc->sc_out, 0, &sc->sc_out_pipe)) {
                DPRINTF(UTOPPY_DBG_OPEN, ("%s: usbd_open_pipe(OUT) failed\n",
                    device_xname(sc->sc_dev)));
                aprint_error_dev(self, "could not open OUT pipe\n");
                sc->sc_dying = 1;
                return;
        }

        if (usbd_open_pipe(sc->sc_iface, sc->sc_in, 0, &sc->sc_in_pipe)) {
                DPRINTF(UTOPPY_DBG_OPEN, ("%s: usbd_open_pipe(IN) failed\n",
                    device_xname(sc->sc_dev)));
                aprint_error_dev(self, "could not open IN pipe\n");

                usbd_close_pipe(sc->sc_out_pipe);
                sc->sc_out_pipe = NULL;
                sc->sc_dying = 1;
                return;
        }

        int error;
        error = usbd_create_xfer(sc->sc_out_pipe, UTOPPY_FRAG_SIZE, 0, 0,
            &sc->sc_out_xfer);
        if (error) {
                aprint_error_dev(self, "could not allocate bulk out xfer\n");
                goto fail0;
        }

        error = usbd_create_xfer(sc->sc_in_pipe, UTOPPY_FRAG_SIZE,
            0, 0, &sc->sc_in_xfer);
        if (error) {
                aprint_error_dev(self, "could not allocate bulk in xfer\n");
                goto fail1;
        }

        sc->sc_out_buf = usbd_get_buffer(sc->sc_out_xfer);
        sc->sc_in_buf = usbd_get_buffer(sc->sc_in_xfer);

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        return;

 fail1:        usbd_destroy_xfer(sc->sc_out_xfer);
        sc->sc_out_xfer = NULL;

 fail0:        sc->sc_dying = 1;
        return;
}

static int
utoppy_activate(device_t self, enum devact act)
{
        struct utoppy_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

static int
utoppy_detach(device_t self, int flags)
{
        struct utoppy_softc *sc = device_private(self);
        int maj, mn;
        int s;

        sc->sc_dying = 1;
        if (sc->sc_out_pipe != NULL)
                usbd_abort_pipe(sc->sc_out_pipe);
        if (sc->sc_in_pipe != NULL)
                usbd_abort_pipe(sc->sc_in_pipe);

        if (sc->sc_in_xfer != NULL)
                usbd_destroy_xfer(sc->sc_in_xfer);
        if (sc->sc_out_xfer != NULL)
                usbd_destroy_xfer(sc->sc_out_xfer);

        if (sc->sc_out_pipe != NULL)
                usbd_close_pipe(sc->sc_out_pipe);
        if (sc->sc_in_pipe != NULL)
                usbd_close_pipe(sc->sc_in_pipe);

        s = splusb();
        if (--sc->sc_refcnt >= 0)
                usb_detach_waitold(sc->sc_dev);
        splx(s);

        /* locate the major number */
        maj = cdevsw_lookup_major(&utoppy_cdevsw);

        /* Nuke the vnodes for any open instances (calls close). */
        mn = device_unit(self);
        vdevgone(maj, mn, mn, VCHR);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return 0;
}

#define        UTOPPY_CRC16(ccrc,b)        crc16_byte((ccrc), (b)) /* from crc16.h */

static const int utoppy_usbdstatus_lookup[] = {
        0,                /* USBD_NORMAL_COMPLETION */
        EINPROGRESS,        /* USBD_IN_PROGRESS */
        EALREADY,        /* USBD_PENDING_REQUESTS */
        EAGAIN,                /* USBD_NOT_STARTED */
        EINVAL,                /* USBD_INVAL */
        ENOMEM,                /* USBD_NOMEM */
        ECONNRESET,        /* USBD_CANCELLED */
        EFAULT,                /* USBD_BAD_ADDRESS */
        EBUSY,                /* USBD_IN_USE */
        EADDRNOTAVAIL,        /* USBD_NO_ADDR */
        ENETDOWN,        /* USBD_SET_ADDR_FAILED */
        EIO,                /* USBD_NO_POWER */
        EMLINK,                /* USBD_TOO_DEEP */
        EIO,                /* USBD_IOERROR */
        ENXIO,                /* USBD_NOT_CONFIGURED */
        ETIMEDOUT,        /* USBD_TIMEOUT */
        EBADMSG,        /* USBD_SHORT_XFER */
        EHOSTDOWN,        /* USBD_STALLED */
        EINTR                /* USBD_INTERRUPTED */
};

static __inline int
utoppy_usbd_status2errno(usbd_status err)
{

        if (err >= USBD_ERROR_MAX)
                return EFAULT;
        return utoppy_usbdstatus_lookup[err];
}

#ifdef UTOPPY_DEBUG
static const char *
utoppy_state_string(enum utoppy_state state)
{
        const char *str;

        switch (state) {
        case UTOPPY_STATE_CLOSED:
                str = "CLOSED";
                break;
        case UTOPPY_STATE_OPENING:
                str = "OPENING";
                break;
        case UTOPPY_STATE_IDLE:
                str = "IDLE";
                break;
        case UTOPPY_STATE_READDIR:
                str = "READ DIRECTORY";
                break;
        case UTOPPY_STATE_READFILE:
                str = "READ FILE";
                break;
        case UTOPPY_STATE_WRITEFILE:
                str = "WRITE FILE";
                break;
        default:
                str = "INVALID!";
                break;
        }

        return str;
}

static void
utoppy_dump_packet(const void *b, size_t len)
{
        const uint8_t *buf = b, *l;
        uint8_t c;
        size_t i, j;

        if (len == 0)
                return;

        len = uimin(len, 256);

        printf("00: ");

        for (i = 0, l = buf; i < len; i++) {
                printf("%02x ", *buf++);

                if ((i % 16) == 15) {
                        for (j = 0; j < 16; j++) {
                                c = *l++;
                                if (c < ' ' || c > 0x7e)
                                        c = '.';
                                printf("%c", c);
                        }

                        printf("\n");
                        l = buf;

                        if ((i + 1) < len)
                                printf("%02x: ", (u_int)i + 1);
                }
        }

        while ((i++ % 16) != 0)
                printf("   ");

        if (l < buf) {
                while (l < buf) {
                        c = *l++;
                        if (c < ' ' || c > 0x7e)
                                c = '.';
                        printf("%c", c);
                }

                printf("\n");
        }
}
#endif

static usbd_status
utoppy_bulk_transfer(struct usbd_xfer *xfer, struct usbd_pipe *pipe,
    uint16_t flags, uint32_t timeout, void *buf, uint32_t *size)
{
        usbd_status err;

        usbd_setup_xfer(xfer, 0, buf, *size, flags, timeout, NULL);

        err = usbd_sync_transfer_sig(xfer);

        usbd_get_xfer_status(xfer, NULL, NULL, size, NULL);
        return err;
}

static int
utoppy_send_packet(struct utoppy_softc *sc, uint16_t cmd, uint32_t timeout)
{
        struct utoppy_header *h;
        usbd_status err;
        uint32_t len;
        uint16_t dlen, crc;
        uint8_t *data, *e, t1, t2;

        h = sc->sc_out_data;

        DPRINTF(UTOPPY_DBG_SEND_PACKET, ("%s: utoppy_send_packet: cmd 0x%04x, "
            "len %d\n", device_xname(sc->sc_dev), (u_int)cmd, h->h_len));

        dlen = h->h_len;
        len = dlen + UTOPPY_HEADER_SIZE;

        if (len & 1)
                len++;
        if ((len % 64) == 0)
                len += 2;

        if (len >= UTOPPY_BSIZE) {
                DPRINTF(UTOPPY_DBG_SEND_PACKET, ("%s: utoppy_send_packet: "
                    "packet too big (%d)\n", device_xname(sc->sc_dev),
                    (int)len));
                return EINVAL;
        }

        h->h_len = htole16(dlen + UTOPPY_HEADER_SIZE);
        h->h_cmd2 = 0;
        h->h_cmd = htole16(cmd);

        /* The command word is part of the CRC */
        crc = UTOPPY_CRC16(0,   0);
        crc = UTOPPY_CRC16(crc, 0);
        crc = UTOPPY_CRC16(crc, cmd >> 8);
        crc = UTOPPY_CRC16(crc, cmd);

        /*
         * If there is data following the header, calculate the CRC and
         * byte-swap as we go.
         */
        if (dlen) {
                data = h->h_data;
                e = data + (dlen & ~1);

                do {
                        t1 = data[0];
                        t2 = data[1];
                        crc = UTOPPY_CRC16(crc, t1);
                        crc = UTOPPY_CRC16(crc, t2);
                        *data++ = t2;
                        *data++ = t1;
                } while (data < e);

                if (dlen & 1) {
                        t1 = data[0];
                        crc = UTOPPY_CRC16(crc, t1);
                        data[1] = t1;
                }
        }

        h->h_crc = htole16(crc);
        data = sc->sc_out_data;

        DPRINTF(UTOPPY_DBG_SEND_PACKET, ("%s: utoppy_send_packet: total len "
            "%d...\n", device_xname(sc->sc_dev), (int)len));
        DDUMP_PACKET(data, len);

        do {
                uint32_t thislen;

                thislen = uimin(len, UTOPPY_FRAG_SIZE);

                memcpy(sc->sc_out_buf, data, thislen);

                err = utoppy_bulk_transfer(sc->sc_out_xfer, sc->sc_out_pipe,
                    0, timeout, sc->sc_out_buf, &thislen);

                if (thislen != uimin(len, UTOPPY_FRAG_SIZE)) {
                        DPRINTF(UTOPPY_DBG_SEND_PACKET, ("%s: "
                            "utoppy_send_packet: sent %ld, err %d\n",
                            device_xname(sc->sc_dev), (u_long)thislen, err));
                }

                if (err == 0) {
                        len -= thislen;
                        data += thislen;
                }
        } while (err == 0 && len);

        DPRINTF(UTOPPY_DBG_SEND_PACKET, ("%s: utoppy_send_packet: "
            "usbd_bulk_transfer() returned %d.\n",
            device_xname(sc->sc_dev),err));

        return err ? utoppy_usbd_status2errno(err) : 0;
}

static int
utoppy_recv_packet(struct utoppy_softc *sc, uint16_t *respp, uint32_t timeout)
{
        struct utoppy_header *h;
        usbd_status err;
        uint32_t len, thislen, requested, bytesleft;
        uint16_t crc;
        uint8_t *data, *e, t1, t2;

        data = sc->sc_in_data;
        len = 0;
        bytesleft = UTOPPY_BSIZE;

        DPRINTF(UTOPPY_DBG_RECV_PACKET, ("%s: utoppy_recv_packet: ...\n",
            device_xname(sc->sc_dev)));

        do {
                requested = thislen = uimin(bytesleft, UTOPPY_FRAG_SIZE);

                err = utoppy_bulk_transfer(sc->sc_in_xfer, sc->sc_in_pipe,
                    USBD_SHORT_XFER_OK, timeout, sc->sc_in_buf,
                    &thislen);

                DPRINTF(UTOPPY_DBG_RECV_PACKET, ("%s: utoppy_recv_packet: "
                    "usbd_bulk_transfer() returned %d, thislen %d, data %p\n",
                    device_xname(sc->sc_dev), err, (u_int)thislen, data));

                if (err == 0) {
                        memcpy(data, sc->sc_in_buf, thislen);
                        DDUMP_PACKET(data, thislen);
                        len += thislen;
                        bytesleft -= thislen;
                        data += thislen;
                }
        } while (err == 0 && bytesleft && thislen == requested);

        if (err)
                return utoppy_usbd_status2errno(err);

        h = sc->sc_in_data;

        DPRINTF(UTOPPY_DBG_RECV_PACKET, ("%s: utoppy_recv_packet: received %d "
            "bytes in total to %p\n", device_xname(sc->sc_dev), (u_int)len, h));
        DDUMP_PACKET(h, len);

        if (len < UTOPPY_HEADER_SIZE || len < (uint32_t)le16toh(h->h_len)) {
                DPRINTF(UTOPPY_DBG_RECV_PACKET, ("%s: utoppy_recv_packet: bad "
                    " length (len %d, h_len %d)\n", device_xname(sc->sc_dev),
                    (int)len, le16toh(h->h_len)));
                return EIO;
        }

        len = h->h_len = le16toh(h->h_len);
        h->h_crc = le16toh(h->h_crc);
        *respp = h->h_cmd = le16toh(h->h_cmd);
        h->h_cmd2 = le16toh(h->h_cmd2);

        /*
         * To maximise data throughput when transferring files, acknowledge
         * data blocks as soon as we receive them. If we detect an error
         * later on, we can always cancel.
         */
        if (*respp == UTOPPY_RESP_FILE_DATA) {
                DPRINTF(UTOPPY_DBG_RECV_PACKET, ("%s: utoppy_recv_packet: "
                    "ACKing file data\n", device_xname(sc->sc_dev)));

                UTOPPY_OUT_INIT(sc);
                err = utoppy_send_packet(sc, UTOPPY_CMD_ACK,
                    UTOPPY_SHORT_TIMEOUT);
                if (err) {
                        DPRINTF(UTOPPY_DBG_RECV_PACKET, ("%s: "
                            "utoppy_recv_packet: failed to ACK file data: %d\n",
                            device_xname(sc->sc_dev), err));
                        return err;
                }
        }

        /* The command word is part of the CRC */
        crc = UTOPPY_CRC16(0,   h->h_cmd2 >> 8);
        crc = UTOPPY_CRC16(crc, h->h_cmd2);
        crc = UTOPPY_CRC16(crc, h->h_cmd >> 8);
        crc = UTOPPY_CRC16(crc, h->h_cmd);

        /*
         * Extract any payload, byte-swapping and calculating the CRC16
         * as we go.
         */
        if (len > UTOPPY_HEADER_SIZE) {
                data = h->h_data;
                e = data + ((len & ~1) - UTOPPY_HEADER_SIZE);

                while (data < e) {
                        t1 = data[0];
                        t2 = data[1];
                        crc = UTOPPY_CRC16(crc, t2);
                        crc = UTOPPY_CRC16(crc, t1);
                        *data++ = t2;
                        *data++ = t1;
                }

                if (len & 1) {
                        t1 = data[1];
                        crc = UTOPPY_CRC16(crc, t1);
                        *data = t1;
                }
        }

        sc->sc_in_len = (size_t) len - UTOPPY_HEADER_SIZE;
        sc->sc_in_offset = 0;

        DPRINTF(UTOPPY_DBG_RECV_PACKET, ("%s: utoppy_recv_packet: len %d, "
            "crc 0x%04x, hdrcrc 0x%04x\n", device_xname(sc->sc_dev),
            (int)len, crc, h->h_crc));
        DDUMP_PACKET(h, len);

        return (crc == h->h_crc) ? 0 : EBADMSG;
}

static __inline void *
utoppy_current_ptr(void *b)
{
        struct utoppy_header *h = b;

        return &h->h_data[h->h_len];
}

static __inline void
utoppy_advance_ptr(void *b, size_t len)
{
        struct utoppy_header *h = b;

        h->h_len += len;
}

static __inline void
utoppy_add_8(struct utoppy_softc *sc, uint8_t v)
{
        struct utoppy_header *h = sc->sc_out_data;
        uint8_t *p;

        p = utoppy_current_ptr(h);
        *p = v;
        utoppy_advance_ptr(h, sizeof(v));
}

static __inline void
utoppy_add_16(struct utoppy_softc *sc, uint16_t v)
{
        struct utoppy_header *h = sc->sc_out_data;
        uint8_t *p;

        p = utoppy_current_ptr(h);
        *p++ = (uint8_t)(v >> 8);
        *p = (uint8_t)v;
        utoppy_advance_ptr(h, sizeof(v));
}

static __inline void
utoppy_add_32(struct utoppy_softc *sc, uint32_t v)
{
        struct utoppy_header *h = sc->sc_out_data;
        uint8_t *p;

        p = utoppy_current_ptr(h);
        *p++ = (uint8_t)(v >> 24);
        *p++ = (uint8_t)(v >> 16);
        *p++ = (uint8_t)(v >> 8);
        *p = (uint8_t)v;
        utoppy_advance_ptr(h, sizeof(v));
}

static __inline void
utoppy_add_64(struct utoppy_softc *sc, uint64_t v)
{
        struct utoppy_header *h = sc->sc_out_data;
        uint8_t *p;

        p = utoppy_current_ptr(h);
        *p++ = (uint8_t)(v >> 56);
        *p++ = (uint8_t)(v >> 48);
        *p++ = (uint8_t)(v >> 40);
        *p++ = (uint8_t)(v >> 32);
        *p++ = (uint8_t)(v >> 24);
        *p++ = (uint8_t)(v >> 16);
        *p++ = (uint8_t)(v >> 8);
        *p = (uint8_t)v;
        utoppy_advance_ptr(h, sizeof(v));
}

static __inline void
utoppy_add_string(struct utoppy_softc *sc, const char *str, size_t len)
{
        struct utoppy_header *h = sc->sc_out_data;
        char *p;

        p = utoppy_current_ptr(h);
        memset(p, 0, len);
        strncpy(p, str, len);
        utoppy_advance_ptr(h, len);
}

static int
utoppy_add_path(struct utoppy_softc *sc, const char *path, int putlen)
{
        struct utoppy_header *h = sc->sc_out_data;
        uint8_t *p, *str, *s;
        size_t len;
        int err;

        p = utoppy_current_ptr(h);

        str = putlen ? (p + sizeof(uint16_t)) : p;

        err = copyinstr(path, str, UTOPPY_MAX_FILENAME_LEN, &len);

        DPRINTF(UTOPPY_DBG_ADDPATH, ("utoppy_add_path: err %d, len %d\n",
            err, (int)len));

        if (err)
                return err;

        if (len < 2)
                return EINVAL;

        /*
         * copyinstr(9) has already copied the terminating NUL character,
         * but we append another one in case we have to pad the length
         * later on.
         */
        str[len] = '\0';

        /*
         * The Toppy uses backslash as the directory separator, so convert
         * all forward slashes.
         */
        for (s = &str[len - 2]; s >= str; s--)
                if (*s == '/')
                        *s = '\\';

        if ((len + h->h_len) & 1)
                len++;

        if (putlen)
                utoppy_add_16(sc, len);

        utoppy_advance_ptr(h, len);

        DPRINTF(UTOPPY_DBG_ADDPATH, ("utoppy_add_path: final len %d\n",
            (u_int)len));

        return 0;
}

static __inline int
utoppy_get_8(struct utoppy_softc *sc, uint8_t *vp)
{
        uint8_t *p;

        if (sc->sc_in_len < sizeof(*vp))
                return 1;

        p = UTOPPY_IN_DATA(sc);
        *vp = *p;
        sc->sc_in_offset += sizeof(*vp);
        sc->sc_in_len -= sizeof(*vp);
        return 0;
}

static __inline int
utoppy_get_16(struct utoppy_softc *sc, uint16_t *vp)
{
        uint16_t v;
        uint8_t *p;

        if (sc->sc_in_len < sizeof(v))
                return 1;

        p = UTOPPY_IN_DATA(sc);
        v = *p++;
        v = (v << 8) | *p;
        *vp = v;
        sc->sc_in_offset += sizeof(v);
        sc->sc_in_len -= sizeof(v);
        return 0;
}

static __inline int
utoppy_get_32(struct utoppy_softc *sc, uint32_t *vp)
{
        uint32_t v;
        uint8_t *p;

        if (sc->sc_in_len < sizeof(v))
                return 1;

        p = UTOPPY_IN_DATA(sc);
        v = *p++;
        v = (v << 8) | *p++;
        v = (v << 8) | *p++;
        v = (v << 8) | *p;
        *vp = v;
        sc->sc_in_offset += sizeof(v);
        sc->sc_in_len -= sizeof(v);
        return 0;
}

static __inline int
utoppy_get_64(struct utoppy_softc *sc, uint64_t *vp)
{
        uint64_t v;
        uint8_t *p;

        if (sc->sc_in_len < sizeof(v))
                return 1;

        p = UTOPPY_IN_DATA(sc);
        v = *p++;
        v = (v << 8) | *p++;
        v = (v << 8) | *p++;
        v = (v << 8) | *p++;
        v = (v << 8) | *p++;
        v = (v << 8) | *p++;
        v = (v << 8) | *p++;
        v = (v << 8) | *p;
        *vp = v;
        sc->sc_in_offset += sizeof(v);
        sc->sc_in_len -= sizeof(v);
        return 0;
}

static __inline int
utoppy_get_string(struct utoppy_softc *sc, char *str, size_t len)
{
        char *p;

        if (sc->sc_in_len < len)
                return 1;

        memset(str, 0, len);
        p = UTOPPY_IN_DATA(sc);
        strncpy(str, p, len);
        sc->sc_in_offset += len;
        sc->sc_in_len -= len;
        return 0;
}

static int
utoppy_command(struct utoppy_softc *sc, uint16_t cmd, int timeout,
    uint16_t *presp)
{
        int err;

        err = utoppy_send_packet(sc, cmd, timeout);
        if (err)
                return err;

        err = utoppy_recv_packet(sc, presp, timeout);
        if (err == EBADMSG) {
                UTOPPY_OUT_INIT(sc);
                utoppy_send_packet(sc, UTOPPY_RESP_ERROR, timeout);
        }

        return err;
}

static int
utoppy_timestamp_decode(struct utoppy_softc *sc, time_t *tp)
{
        uint16_t mjd;
        uint8_t hour, minute, sec;
        uint32_t rv;

        if (utoppy_get_16(sc, &mjd) || utoppy_get_8(sc, &hour) ||
            utoppy_get_8(sc, &minute) || utoppy_get_8(sc, &sec))
                return 1;

        if (mjd == 0xffffu && hour == 0xffu && minute == 0xffu && sec == 0xffu){
                *tp = 0;
                return 0;
        }

        rv = (mjd < UTOPPY_MJD_1970) ? UTOPPY_MJD_1970 : (uint32_t) mjd;

        /* Calculate seconds since 1970 */
        rv = (rv - UTOPPY_MJD_1970) * 60 * 60 * 24;

        /* Add in the hours, minutes, and seconds */
        rv += (uint32_t)hour * 60 * 60;
        rv += (uint32_t)minute * 60;
        rv += sec;
        *tp = (time_t)rv;

        return 0;
}

static void
utoppy_timestamp_encode(struct utoppy_softc *sc, time_t t)
{
        u_int mjd, hour, minute;

        mjd = t / (60 * 60 * 24);
        t -= mjd * 60 * 60 * 24;

        hour = t / (60 * 60);
        t -= hour * 60 * 60;

        minute = t / 60;
        t -= minute * 60;

        utoppy_add_16(sc, mjd + UTOPPY_MJD_1970);
        utoppy_add_8(sc, hour);
        utoppy_add_8(sc, minute);
        utoppy_add_8(sc, t);
}

static int
utoppy_turbo_mode(struct utoppy_softc *sc, int state)
{
        uint16_t r;
        int err;

        UTOPPY_OUT_INIT(sc);
        utoppy_add_32(sc, state);

        err = utoppy_command(sc, UTOPPY_CMD_TURBO, UTOPPY_SHORT_TIMEOUT, &r);
        if (err)
                return err;

        return (r == UTOPPY_RESP_SUCCESS) ? 0 : EIO;
}

static int
utoppy_check_ready(struct utoppy_softc *sc)
{
        uint16_t r;
        int err;

        UTOPPY_OUT_INIT(sc);

        err = utoppy_command(sc, UTOPPY_CMD_READY, UTOPPY_LONG_TIMEOUT, &r);
        if (err)
                return err;

        return (r == UTOPPY_RESP_SUCCESS) ? 0 : EIO;
}

static int
utoppy_cancel(struct utoppy_softc *sc)
{
        uint16_t r;
        int err, i;

        /*
         * Issue the cancel command serveral times. the Toppy doesn't
         * always respond to the first.
         */
        for (i = 0; i < 3; i++) {
                UTOPPY_OUT_INIT(sc);
                err = utoppy_command(sc, UTOPPY_CMD_CANCEL,
                    UTOPPY_SHORT_TIMEOUT, &r);
                if (err == 0 && r == UTOPPY_RESP_SUCCESS)
                        break;
                err = ETIMEDOUT;
        }

        if (err)
                return err;

        /*
         * Make sure turbo mode is off, otherwise the Toppy will not
         * respond to remote control input.
         */
        (void) utoppy_turbo_mode(sc, 0);

        sc->sc_state = UTOPPY_STATE_IDLE;
        return 0;
}

static int
utoppy_stats(struct utoppy_softc *sc, struct utoppy_stats *us)
{
        uint32_t hsize, hfree;
        uint16_t r;
        int err;

        UTOPPY_OUT_INIT(sc);
        err = utoppy_command(sc, UTOPPY_CMD_STATS, UTOPPY_LONG_TIMEOUT, &r);
        if (err)
                return err;

        if (r != UTOPPY_RESP_STATS_DATA)
                return EIO;

        if (utoppy_get_32(sc, &hsize) || utoppy_get_32(sc, &hfree))
                return EIO;

        us->us_hdd_size = hsize;
        us->us_hdd_size *= 1024;
        us->us_hdd_free = hfree;
        us->us_hdd_free *= 1024;

        return 0;
}

static int
utoppy_readdir_next(struct utoppy_softc *sc)
{
        uint16_t resp;
        int err;

        DPRINTF(UTOPPY_DBG_READDIR, ("%s: utoppy_readdir_next: running...\n",
            device_xname(sc->sc_dev)));

        /*
         * Fetch the next READDIR response
         */
        err = utoppy_recv_packet(sc, &resp, UTOPPY_LONG_TIMEOUT);
        if (err) {
                DPRINTF(UTOPPY_DBG_READDIR, ("%s: utoppy_readdir_next: "
                    "utoppy_recv_packet() returned %d\n",
                    device_xname(sc->sc_dev), err));
                if (err == EBADMSG) {
                        UTOPPY_OUT_INIT(sc);
                        utoppy_send_packet(sc, UTOPPY_RESP_ERROR,
                            UTOPPY_LONG_TIMEOUT);
                }
                utoppy_cancel(sc);
                return err;
        }

        DPRINTF(UTOPPY_DBG_READDIR, ("%s: utoppy_readdir_next: "
            "utoppy_recv_packet() returned %d, len %ld\n",
            device_xname(sc->sc_dev), err, (u_long)sc->sc_in_len));

        switch (resp) {
        case UTOPPY_RESP_READDIR_DATA:
                DPRINTF(UTOPPY_DBG_READDIR, ("%s: utoppy_readdir_next: "
                    "UTOPPY_RESP_READDIR_DATA\n", device_xname(sc->sc_dev)));

                UTOPPY_OUT_INIT(sc);
                err = utoppy_send_packet(sc, UTOPPY_CMD_ACK,
                    UTOPPY_LONG_TIMEOUT);
                if (err) {
                        DPRINTF(UTOPPY_DBG_READDIR, ("%s: utoppy_readdir_next: "
                            "utoppy_send_packet(ACK) returned %d\n",
                            device_xname(sc->sc_dev), err));
                        utoppy_cancel(sc);
                        return err;
                }
                sc->sc_state = UTOPPY_STATE_READDIR;
                sc->sc_in_offset = 0;
                break;

        case UTOPPY_RESP_READDIR_END:
                DPRINTF(UTOPPY_DBG_READDIR, ("%s: utoppy_readdir_next: "
                    "UTOPPY_RESP_READDIR_END\n", device_xname(sc->sc_dev)));

                UTOPPY_OUT_INIT(sc);
                utoppy_send_packet(sc, UTOPPY_CMD_ACK, UTOPPY_SHORT_TIMEOUT);
                sc->sc_state = UTOPPY_STATE_IDLE;
                sc->sc_in_len = 0;
                break;

        default:
                DPRINTF(UTOPPY_DBG_READDIR, ("%s: utoppy_readdir_next: "
                    "bad response: %#x\n", device_xname(sc->sc_dev), resp));
                sc->sc_state = UTOPPY_STATE_IDLE;
                sc->sc_in_len = 0;
                return EIO;
        }

        return 0;
}

static size_t
utoppy_readdir_decode(struct utoppy_softc *sc, struct utoppy_dirent *ud)
{
        uint8_t ftype;

        DPRINTF(UTOPPY_DBG_READDIR, ("%s: utoppy_readdir_decode: bytes left"
            " %d\n", device_xname(sc->sc_dev), (int)sc->sc_in_len));

        if (utoppy_timestamp_decode(sc, &ud->ud_mtime) ||
            utoppy_get_8(sc, &ftype) || utoppy_get_64(sc, &ud->ud_size) ||
            utoppy_get_string(sc, ud->ud_path, UTOPPY_MAX_FILENAME_LEN + 1) ||
            utoppy_get_32(sc, &ud->ud_attributes)) {
                DPRINTF(UTOPPY_DBG_READDIR, ("%s: utoppy_readdir_decode: no "
                    "more to decode\n", device_xname(sc->sc_dev)));
                return 0;
        }

        switch (ftype) {
        case UTOPPY_FTYPE_DIR:
                ud->ud_type = UTOPPY_DIRENT_DIRECTORY;
                break;
        case UTOPPY_FTYPE_FILE:
                ud->ud_type = UTOPPY_DIRENT_FILE;
                break;
        default:
                ud->ud_type = UTOPPY_DIRENT_UNKNOWN;
                break;
        }

        DPRINTF(UTOPPY_DBG_READDIR, ("%s: utoppy_readdir_decode: %s '%s', "
            "size %lld, time 0x%08lx, attr 0x%08x\n", device_xname(sc->sc_dev),
            (ftype == UTOPPY_FTYPE_DIR) ? "DIR" :
            ((ftype == UTOPPY_FTYPE_FILE) ? "FILE" : "UNKNOWN"), ud->ud_path,
            ud->ud_size, (u_long)ud->ud_mtime, ud->ud_attributes));

        return 1;
}

static int
utoppy_readfile_next(struct utoppy_softc *sc)
{
        uint64_t off;
        uint16_t resp;
        int err;

        err = utoppy_recv_packet(sc, &resp, UTOPPY_LONG_TIMEOUT);
        if (err) {
                DPRINTF(UTOPPY_DBG_READ, ("%s: utoppy_readfile_next: "
                    "utoppy_recv_packet() returned %d\n",
                    device_xname(sc->sc_dev), err));
                utoppy_cancel(sc);
                return err;
        }

        switch (resp) {
        case UTOPPY_RESP_FILE_HEADER:
                /* ACK it */
                UTOPPY_OUT_INIT(sc);
                err = utoppy_send_packet(sc, UTOPPY_CMD_ACK,
                    UTOPPY_LONG_TIMEOUT);
                if (err) {
                        DPRINTF(UTOPPY_DBG_READ, ("%s: utoppy_readfile_next: "
                            "utoppy_send_packet(UTOPPY_CMD_ACK) returned %d\n",
                            device_xname(sc->sc_dev), err));
                        utoppy_cancel(sc);
                        return err;
                }

                sc->sc_in_len = 0;
                DPRINTF(UTOPPY_DBG_READ, ("%s: utoppy_readfile_next: "
                    "FILE_HEADER done\n", device_xname(sc->sc_dev)));
                break;

        case UTOPPY_RESP_FILE_DATA:
                /* Already ACK'd */
                if (utoppy_get_64(sc, &off)) {
                        DPRINTF(UTOPPY_DBG_READ, ("%s: utoppy_readfile_next: "
                            "UTOPPY_RESP_FILE_DATA did not provide offset\n",
                            device_xname(sc->sc_dev)));
                        utoppy_cancel(sc);
                        return EBADMSG;
                }

                DPRINTF(UTOPPY_DBG_READ, ("%s: utoppy_readfile_next: "
                    "UTOPPY_RESP_FILE_DATA: offset %lld, bytes left %ld\n",
                    device_xname(sc->sc_dev), off, (u_long)sc->sc_in_len));
                break;

        case UTOPPY_RESP_FILE_END:
                DPRINTF(UTOPPY_DBG_READ, ("%s: utoppy_readfile_next: "
                    "UTOPPY_RESP_FILE_END: sending ACK\n",
                    device_xname(sc->sc_dev)));
                UTOPPY_OUT_INIT(sc);
                utoppy_send_packet(sc, UTOPPY_CMD_ACK, UTOPPY_SHORT_TIMEOUT);
                /*FALLTHROUGH*/

        case UTOPPY_RESP_SUCCESS:
                sc->sc_state = UTOPPY_STATE_IDLE;
                (void) utoppy_turbo_mode(sc, 0);
                DPRINTF(UTOPPY_DBG_READ, ("%s: utoppy_readfile_next: all "
                    "done\n", device_xname(sc->sc_dev)));
                break;

        case UTOPPY_RESP_ERROR:
        default:
                DPRINTF(UTOPPY_DBG_READ, ("%s: utoppy_readfile_next: bad "
                    "response code 0x%0x\n", device_xname(sc->sc_dev), resp));
                utoppy_cancel(sc);
                return EIO;
        }

        return 0;
}

static int
utoppyopen(dev_t dev, int flag, int mode,
    struct lwp *l)
{
        struct utoppy_softc *sc;
        int error = 0;

        sc = device_lookup_private(&utoppy_cd, UTOPPYUNIT(dev));
        if (sc == NULL)
                return ENXIO;

        if (sc == NULL || sc->sc_iface == NULL || sc->sc_dying)
                return ENXIO;

        if (sc->sc_state != UTOPPY_STATE_CLOSED) {
                DPRINTF(UTOPPY_DBG_OPEN, ("%s: utoppyopen: already open\n",
                    device_xname(sc->sc_dev)));
                return EBUSY;
        }

        DPRINTF(UTOPPY_DBG_OPEN, ("%s: utoppyopen: opening...\n",
            device_xname(sc->sc_dev)));

        sc->sc_refcnt++;
        sc->sc_state = UTOPPY_STATE_OPENING;
        sc->sc_turbo_mode = 0;
        sc->sc_out_data = kmem_alloc(UTOPPY_BSIZE + 1, KM_SLEEP);
        sc->sc_in_data = kmem_alloc(UTOPPY_BSIZE + 1, KM_SLEEP);

        if ((error = utoppy_cancel(sc)) != 0)
                goto error;

        if ((error = utoppy_check_ready(sc)) != 0) {
                DPRINTF(UTOPPY_DBG_OPEN, ("%s: utoppyopen: utoppy_check_ready()"
                    " returned %d\n", device_xname(sc->sc_dev), error));
        }

 error:
        sc->sc_state = error ? UTOPPY_STATE_CLOSED : UTOPPY_STATE_IDLE;

        DPRINTF(UTOPPY_DBG_OPEN, ("%s: utoppyopen: done. error %d, new state "
            "'%s'\n", device_xname(sc->sc_dev), error,
            utoppy_state_string(sc->sc_state)));

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        return error;
}

static int
utoppyclose(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct utoppy_softc *sc;

        sc = device_lookup_private(&utoppy_cd, UTOPPYUNIT(dev));

        DPRINTF(UTOPPY_DBG_CLOSE, ("%s: utoppyclose: closing...\n",
            device_xname(sc->sc_dev)));

        if (sc->sc_state < UTOPPY_STATE_IDLE) {
                /* We are being forced to close before the open completed. */
                DPRINTF(UTOPPY_DBG_CLOSE, ("%s: utoppyclose: not properly "
                    "open: %s\n", device_xname(sc->sc_dev),
                    utoppy_state_string(sc->sc_state)));
                return 0;
        }

        if (sc->sc_out_data)
                (void) utoppy_cancel(sc);

        if (sc->sc_out_pipe != NULL) {
                usbd_abort_pipe(sc->sc_out_pipe);
                sc->sc_out_pipe = NULL;
        }

        if (sc->sc_in_pipe != NULL) {
                usbd_abort_pipe(sc->sc_in_pipe);
                sc->sc_in_pipe = NULL;
        }

        if (sc->sc_out_data) {
                kmem_free(sc->sc_out_data, UTOPPY_BSIZE + 1);
                sc->sc_out_data = NULL;
        }

        if (sc->sc_in_data) {
                kmem_free(sc->sc_in_data, UTOPPY_BSIZE + 1);
                sc->sc_in_data = NULL;
        }

        sc->sc_state = UTOPPY_STATE_CLOSED;

        DPRINTF(UTOPPY_DBG_CLOSE, ("%s: utoppyclose: done.\n",
            device_xname(sc->sc_dev)));

        return 0;
}

static int
utoppyread(dev_t dev, struct uio *uio, int flags)
{
        struct utoppy_softc *sc;
        struct utoppy_dirent ud;
        size_t len;
        int err;

        sc = device_lookup_private(&utoppy_cd, UTOPPYUNIT(dev));

        if (sc->sc_dying)
                return EIO;

        sc->sc_refcnt++;

        DPRINTF(UTOPPY_DBG_READ, ("%s: utoppyread: reading: state '%s'\n",
            device_xname(sc->sc_dev), utoppy_state_string(sc->sc_state)));

        switch (sc->sc_state) {
        case UTOPPY_STATE_READDIR:
                err = 0;
                while (err == 0 && uio->uio_resid >= sizeof(ud) &&
                    sc->sc_state != UTOPPY_STATE_IDLE) {
                        if (utoppy_readdir_decode(sc, &ud) == 0)
                                err = utoppy_readdir_next(sc);
                        else
                        if ((err = uiomove(&ud, sizeof(ud), uio)) != 0)
                                utoppy_cancel(sc);
                }
                break;

        case UTOPPY_STATE_READFILE:
                err = 0;
                while (err == 0 && uio->uio_resid > 0 &&
                    sc->sc_state != UTOPPY_STATE_IDLE) {
                        DPRINTF(UTOPPY_DBG_READ, ("%s: utoppyread: READFILE: "
                            "resid %ld, bytes_left %ld\n",
                            device_xname(sc->sc_dev), (u_long)uio->uio_resid,
                            (u_long)sc->sc_in_len));

                        if (sc->sc_in_len == 0 &&
                            (err = utoppy_readfile_next(sc)) != 0) {
                                DPRINTF(UTOPPY_DBG_READ, ("%s: utoppyread: "
                                    "READFILE: utoppy_readfile_next returned "
                                    "%d\n", device_xname(sc->sc_dev), err));
                                break;
                        }

                        len = uimin(uio->uio_resid, sc->sc_in_len);
                        if (len) {
                                err = uiomove(UTOPPY_IN_DATA(sc), len, uio);
                                if (err == 0) {
                                        sc->sc_in_offset += len;
                                        sc->sc_in_len -= len;
                                }
                        }
                }
                break;

        case UTOPPY_STATE_IDLE:
                err = 0;
                break;

        case UTOPPY_STATE_WRITEFILE:
                err = EBUSY;
                break;

        default:
                err = EIO;
                break;
        }

        DPRINTF(UTOPPY_DBG_READ, ("%s: utoppyread: done. err %d, state '%s'\n",
            device_xname(sc->sc_dev), err, utoppy_state_string(sc->sc_state)));

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        return err;
}

static int
utoppywrite(dev_t dev, struct uio *uio, int flags)
{
        struct utoppy_softc *sc;
        uint16_t resp;
        size_t len;
        int err;

        sc = device_lookup_private(&utoppy_cd, UTOPPYUNIT(dev));

        if (sc->sc_dying)
                return EIO;

        switch(sc->sc_state) {
        case UTOPPY_STATE_WRITEFILE:
                break;

        case UTOPPY_STATE_IDLE:
                return 0;

        default:
                return EIO;
        }

        sc->sc_refcnt++;
        err = 0;

        DPRINTF(UTOPPY_DBG_WRITE, ("%s: utoppywrite: PRE-WRITEFILE: resid "
            "%ld, wr_size %lld, wr_offset %lld\n", device_xname(sc->sc_dev),
            (u_long)uio->uio_resid, sc->sc_wr_size, sc->sc_wr_offset));

        while (sc->sc_state == UTOPPY_STATE_WRITEFILE &&
            (len = uimin(uio->uio_resid, sc->sc_wr_size)) != 0) {

                len = uimin(len, UTOPPY_BSIZE - (UTOPPY_HEADER_SIZE +
                    sizeof(uint64_t) + 3));

                DPRINTF(UTOPPY_DBG_WRITE, ("%s: utoppywrite: uiomove(%ld)\n",
                    device_xname(sc->sc_dev), (u_long)len));

                UTOPPY_OUT_INIT(sc);
                utoppy_add_64(sc, sc->sc_wr_offset);

                err = uiomove(utoppy_current_ptr(sc->sc_out_data), len, uio);
                if (err) {
                        DPRINTF(UTOPPY_DBG_WRITE, ("%s: utoppywrite: uiomove()"
                            " returned %d\n", device_xname(sc->sc_dev), err));
                        break;
                }

                utoppy_advance_ptr(sc->sc_out_data, len);

                err = utoppy_command(sc, UTOPPY_RESP_FILE_DATA,
                    UTOPPY_LONG_TIMEOUT, &resp);
                if (err) {
                        DPRINTF(UTOPPY_DBG_WRITE, ("%s: utoppywrite: "
                            "utoppy_command(UTOPPY_RESP_FILE_DATA) "
                            "returned %d\n", device_xname(sc->sc_dev), err));
                        break;
                }
                if (resp != UTOPPY_RESP_SUCCESS) {
                        DPRINTF(UTOPPY_DBG_WRITE, ("%s: utoppywrite: "
                            "utoppy_command(UTOPPY_RESP_FILE_DATA) returned "
                            "bad response %#x\n", device_xname(sc->sc_dev),
                            resp));
                        utoppy_cancel(sc);
                        err = EIO;
                        break;
                }

                sc->sc_wr_offset += len;
                sc->sc_wr_size -= len;
        }

        DPRINTF(UTOPPY_DBG_WRITE, ("%s: utoppywrite: POST-WRITEFILE: resid "
            "%ld, wr_size %lld, wr_offset %lld, err %d\n",
            device_xname(sc->sc_dev), (u_long)uio->uio_resid, sc->sc_wr_size,
            sc->sc_wr_offset, err));

        if (err == 0 && sc->sc_wr_size == 0) {
                DPRINTF(UTOPPY_DBG_WRITE, ("%s: utoppywrite: sending "
                    "FILE_END...\n", device_xname(sc->sc_dev)));
                UTOPPY_OUT_INIT(sc);
                err = utoppy_command(sc, UTOPPY_RESP_FILE_END,
                    UTOPPY_LONG_TIMEOUT, &resp);
                if (err) {
                        DPRINTF(UTOPPY_DBG_WRITE, ("%s: utoppywrite: "
                            "utoppy_command(UTOPPY_RESP_FILE_END) returned "
                            "%d\n", device_xname(sc->sc_dev), err));

                        utoppy_cancel(sc);
                }

                sc->sc_state = UTOPPY_STATE_IDLE;
                DPRINTF(UTOPPY_DBG_WRITE, ("%s: utoppywrite: state %s\n",
                    device_xname(sc->sc_dev),
                    utoppy_state_string(sc->sc_state)));
        }

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        return err;
}

static int
utoppyioctl(dev_t dev, u_long cmd, void *data, int flag,
    struct lwp *l)
{
        struct utoppy_softc *sc;
        struct utoppy_rename *ur;
        struct utoppy_readfile *urf;
        struct utoppy_writefile *uw;
        char uwf[UTOPPY_MAX_FILENAME_LEN + 1], *uwfp;
        uint16_t resp;
        int err;

        sc = device_lookup_private(&utoppy_cd, UTOPPYUNIT(dev));

        if (sc->sc_dying)
                return EIO;

        DPRINTF(UTOPPY_DBG_IOCTL, ("%s: utoppyioctl: cmd 0x%08lx, state '%s'\n",
            device_xname(sc->sc_dev), cmd, utoppy_state_string(sc->sc_state)));

        if (sc->sc_state != UTOPPY_STATE_IDLE && cmd != UTOPPYIOCANCEL) {
                DPRINTF(UTOPPY_DBG_IOCTL, ("%s: utoppyioctl: still busy.\n",
                    device_xname(sc->sc_dev)));
                return EBUSY;
        }

        sc->sc_refcnt++;

        switch (cmd) {
        case UTOPPYIOTURBO:
                err = 0;
                sc->sc_turbo_mode = *((int *)data) ? 1 : 0;
                DPRINTF(UTOPPY_DBG_IOCTL, ("%s: utoppyioctl: UTOPPYIOTURBO: "
                    "%s\n", device_xname(sc->sc_dev),
                    sc->sc_turbo_mode ? "On" : "Off"));
                break;

        case UTOPPYIOCANCEL:
                DPRINTF(UTOPPY_DBG_IOCTL, ("%s: utoppyioctl: UTOPPYIOCANCEL\n",
                    device_xname(sc->sc_dev)));
                err = utoppy_cancel(sc);
                break;

        case UTOPPYIOREBOOT:
                DPRINTF(UTOPPY_DBG_IOCTL, ("%s: utoppyioctl: UTOPPYIOREBOOT\n",
                    device_xname(sc->sc_dev)));
                UTOPPY_OUT_INIT(sc);
                err = utoppy_command(sc, UTOPPY_CMD_RESET, UTOPPY_LONG_TIMEOUT,
                    &resp);
                if (err)
                        break;

                if (resp != UTOPPY_RESP_SUCCESS)
                        err = EIO;
                break;

        case UTOPPYIOSTATS:
                DPRINTF(UTOPPY_DBG_IOCTL, ("%s: utoppyioctl: UTOPPYIOSTATS\n",
                    device_xname(sc->sc_dev)));
                err = utoppy_stats(sc, (struct utoppy_stats *)data);
                break;

        case UTOPPYIORENAME:
                DPRINTF(UTOPPY_DBG_IOCTL, ("%s: utoppyioctl: UTOPPYIORENAME\n",
                    device_xname(sc->sc_dev)));
                ur = (struct utoppy_rename *)data;
                UTOPPY_OUT_INIT(sc);

                if ((err = utoppy_add_path(sc, ur->ur_old_path, 1)) != 0)
                        break;
                if ((err = utoppy_add_path(sc, ur->ur_new_path, 1)) != 0)
                        break;

                err = utoppy_command(sc, UTOPPY_CMD_RENAME,
                    UTOPPY_LONG_TIMEOUT, &resp);
                if (err)
                        break;

                if (resp != UTOPPY_RESP_SUCCESS)
                        err = EIO;
                break;

        case UTOPPYIOMKDIR:
                DPRINTF(UTOPPY_DBG_IOCTL, ("%s: utoppyioctl: UTOPPYIOMKDIR\n",
                    device_xname(sc->sc_dev)));
                UTOPPY_OUT_INIT(sc);
                err = utoppy_add_path(sc, *((const char **)data), 1);
                if (err)
                        break;

                err = utoppy_command(sc, UTOPPY_CMD_MKDIR, UTOPPY_LONG_TIMEOUT,
                    &resp);
                if (err)
                        break;

                if (resp != UTOPPY_RESP_SUCCESS)
                        err = EIO;
                break;

        case UTOPPYIODELETE:
                DPRINTF(UTOPPY_DBG_IOCTL, ("%s: utoppyioctl: UTOPPYIODELETE\n",
                    device_xname(sc->sc_dev)));
                UTOPPY_OUT_INIT(sc);
                err = utoppy_add_path(sc, *((const char **)data), 0);
                if (err)
                        break;

                err = utoppy_command(sc, UTOPPY_CMD_DELETE, UTOPPY_LONG_TIMEOUT,
                    &resp);
                if (err)
                        break;

                if (resp != UTOPPY_RESP_SUCCESS)
                        err = EIO;
                break;

        case UTOPPYIOREADDIR:
                DPRINTF(UTOPPY_DBG_IOCTL, ("%s: utoppyioctl: UTOPPYIOREADDIR\n",
                    device_xname(sc->sc_dev)));
                UTOPPY_OUT_INIT(sc);
                err = utoppy_add_path(sc, *((const char **)data), 0);
                if (err) {
                        DPRINTF(UTOPPY_DBG_READDIR, ("%s: utoppyioctl: "
                            "utoppy_add_path() returned %d\n",
                            device_xname(sc->sc_dev), err));
                        break;
                }

                err = utoppy_send_packet(sc, UTOPPY_CMD_READDIR,
                    UTOPPY_LONG_TIMEOUT);
                if (err != 0) {
                        DPRINTF(UTOPPY_DBG_READDIR, ("%s: utoppyioctl: "
                            "UTOPPY_CMD_READDIR returned %d\n",
                            device_xname(sc->sc_dev), err));
                        break;
                }

                err = utoppy_readdir_next(sc);
                if (err) {
                        DPRINTF(UTOPPY_DBG_READDIR, ("%s: utoppyioctl: "
                            "utoppy_readdir_next() returned %d\n",
                            device_xname(sc->sc_dev), err));
                }
                break;

        case UTOPPYIOREADFILE:
                urf = (struct utoppy_readfile *)data;

                DPRINTF(UTOPPY_DBG_IOCTL,("%s: utoppyioctl: UTOPPYIOREADFILE "
                    "%s, offset %lld\n", device_xname(sc->sc_dev),
                    urf->ur_path, urf->ur_offset));

                if ((err = utoppy_turbo_mode(sc, sc->sc_turbo_mode)) != 0)
                        break;

                UTOPPY_OUT_INIT(sc);
                utoppy_add_8(sc, UTOPPY_FILE_READ);

                if ((err = utoppy_add_path(sc, urf->ur_path, 1)) != 0)
                        break;

                utoppy_add_64(sc, urf->ur_offset);

                sc->sc_state = UTOPPY_STATE_READFILE;
                sc->sc_in_offset = 0;

                err = utoppy_send_packet(sc, UTOPPY_CMD_FILE,
                    UTOPPY_LONG_TIMEOUT);
                if (err == 0)
                        err = utoppy_readfile_next(sc);
                break;

        case UTOPPYIOWRITEFILE:
                uw = (struct utoppy_writefile *)data;

                DPRINTF(UTOPPY_DBG_IOCTL,("%s: utoppyioctl: UTOPPYIOWRITEFILE "
                    "%s, size %lld, offset %lld\n", device_xname(sc->sc_dev),
                    uw->uw_path, uw->uw_size, uw->uw_offset));

                if ((err = utoppy_turbo_mode(sc, sc->sc_turbo_mode)) != 0)
                        break;

                UTOPPY_OUT_INIT(sc);
                utoppy_add_8(sc, UTOPPY_FILE_WRITE);
                uwfp = utoppy_current_ptr(sc->sc_out_data);

                if ((err = utoppy_add_path(sc, uw->uw_path, 1)) != 0) {
                        DPRINTF(UTOPPY_DBG_WRITE,("%s: utoppyioctl: add_path()"
                            " returned %d\n", device_xname(sc->sc_dev), err));
                        break;
                }

                strncpy(uwf, &uwfp[2], sizeof(uwf));
                utoppy_add_64(sc, uw->uw_offset);

                err = utoppy_command(sc, UTOPPY_CMD_FILE, UTOPPY_LONG_TIMEOUT,
                    &resp);
                if (err) {
                        DPRINTF(UTOPPY_DBG_WRITE,("%s: utoppyioctl: "
                            "utoppy_command(UTOPPY_CMD_FILE) returned "
                            "%d\n", device_xname(sc->sc_dev), err));
                        break;
                }
                if (resp != UTOPPY_RESP_SUCCESS) {
                        DPRINTF(UTOPPY_DBG_WRITE,("%s: utoppyioctl: "
                            "utoppy_command(UTOPPY_CMD_FILE) returned "
                            "bad response %#x\n", device_xname(sc->sc_dev),
                            resp));
                        err = EIO;
                        break;
                }

                UTOPPY_OUT_INIT(sc);
                utoppy_timestamp_encode(sc, uw->uw_mtime);
                utoppy_add_8(sc, UTOPPY_FTYPE_FILE);
                utoppy_add_64(sc, uw->uw_size);
                utoppy_add_string(sc, uwf, sizeof(uwf));
                utoppy_add_32(sc, 0);

                err = utoppy_command(sc, UTOPPY_RESP_FILE_HEADER,
                    UTOPPY_LONG_TIMEOUT, &resp);
                if (err) {
                        DPRINTF(UTOPPY_DBG_WRITE,("%s: utoppyioctl: "
                            "utoppy_command(UTOPPY_RESP_FILE_HEADER) "
                            "returned %d\n", device_xname(sc->sc_dev), err));
                        break;
                }
                if (resp != UTOPPY_RESP_SUCCESS) {
                        DPRINTF(UTOPPY_DBG_WRITE,("%s: utoppyioctl: "
                            "utoppy_command(UTOPPY_RESP_FILE_HEADER) "
                            "returned bad response %#x\n",
                            device_xname(sc->sc_dev), resp));
                        err = EIO;
                        break;
                }

                sc->sc_wr_offset = uw->uw_offset;
                sc->sc_wr_size = uw->uw_size;
                sc->sc_state = UTOPPY_STATE_WRITEFILE;

                DPRINTF(UTOPPY_DBG_WRITE,("%s: utoppyioctl: Changing state to "
                    "%s. wr_offset %lld, wr_size %lld\n",
                    device_xname(sc->sc_dev), utoppy_state_string(sc->sc_state),
                    sc->sc_wr_offset, sc->sc_wr_size));
                break;

        default:
                DPRINTF(UTOPPY_DBG_IOCTL,("%s: utoppyioctl: Invalid cmd\n",
                    device_xname(sc->sc_dev)));
                err = ENODEV;
                break;
        }

        DPRINTF(UTOPPY_DBG_IOCTL,("%s: utoppyioctl: done. err %d, state '%s'\n",
            device_xname(sc->sc_dev), err, utoppy_state_string(sc->sc_state)));

        if (err)
                utoppy_cancel(sc);

        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        return err;
}














































































































    7 


    1 



    1 

    7 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/*        $NetBSD: vnd_30.c,v 1.4 2019/12/12 02:15:42 pgoyette Exp $        */

/*-
 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1988 University of Utah.
 * Copyright (c) 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the Systems Programming Group of the University of Utah Computer
 * Science Department.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from: Utah $Hdr: vn.c 1.13 94/04/02$
 *
 *        @(#)vn.c        8.9 (Berkeley) 5/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vnd_30.c,v 1.4 2019/12/12 02:15:42 pgoyette Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/malloc.h>
#include <sys/ioctl.h>
#include <sys/device.h>
#include <sys/disk.h>
#include <sys/stat.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/uio.h>
#include <sys/conf.h>
#include <sys/vnode.h>
#include <sys/compat_stub.h>

#include <net/zlib.h>

#include <dev/vndvar.h>

#include <compat/common/compat_mod.h>

static int compat_30_vndioctl(u_long, struct lwp *, void *, int, struct vattr *,
    int (*)(struct lwp *, void *, int, struct vattr *));

static int
compat_30_vndioctl(u_long cmd, struct lwp *l, void *data, int unit,
    struct vattr *vattr_p,
    int (*get)(struct lwp *, void *, int, struct vattr *))
{
        struct vnd_user30 *vnu = data;
        int error;

        if (cmd != VNDIOCGET30)
                return EPASSTHROUGH;

        error = (*get)(l, data, unit, vattr_p);
        if (error != 0)
                return error;

        vnu->vnu_dev = vattr_p->va_fsid;
        vnu->vnu_ino = vattr_p->va_fileid;
        return 0;
}

void
vnd_30_init(void)
{

        MODULE_HOOK_SET(compat_vndioctl_30_hook, compat_30_vndioctl);
}

void
vnd_30_fini(void)
{

        MODULE_HOOK_UNSET(compat_vndioctl_30_hook);
}















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/*        $NetBSD: rf_options.c,v 1.8 2009/03/15 17:17:23 cegger Exp $        */
/*
 * rf_options.c
 */
/*
 * Copyright (c) 1996 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Jim Zelenka
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_options.c,v 1.8 2009/03/15 17:17:23 cegger Exp $");

#include <dev/raidframe/raidframevar.h>

#include "rf_threadstuff.h"
#include "rf_archs.h"
#include "rf_general.h"
#include "rf_options.h"

#ifdef RF_DBG_OPTION
#undef RF_DBG_OPTION
#endif                                /* RF_DBG_OPTION */

#ifdef __STDC__
#define RF_DBG_OPTION(_option_,_defval_) long rf_##_option_ = _defval_;
#else                                /* __STDC__ */
#define RF_DBG_OPTION(_option_,_defval_) long rf_/**/_option_ = _defval_;
#endif                                /* __STDC__ */

#include "rf_optnames.h"

#undef RF_DBG_OPTION

#ifdef __STDC__
#define RF_DBG_OPTION(_option_,_defval_) { RF_STRING(_option_), &rf_##_option_ },
#else                                /* __STDC__ */
#define RF_DBG_OPTION(_option_,_defval_) { RF_STRING(_option_), &rf_/**/_option_ },
#endif                                /* __STDC__ */

RF_DebugName_t rf_debugNames[] = {
#include "rf_optnames.h"
        {NULL, NULL}
};
#undef RF_DBG_OPTION

#ifdef __STDC__
#define RF_DBG_OPTION(_option_,_defval_) rf_##_option_  = _defval_ ;
#else                                /* __STDC__ */
#define RF_DBG_OPTION(_option_,_defval_) rf_/**/_option_ = _defval_ ;
#endif                                /* __STDC__ */

void
rf_ResetDebugOptions(void)
{
#include "rf_optnames.h"
}
























































 1372 
 1468 
 1376 
















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/*        $NetBSD: hash.h,v 1.8 2014/09/05 05:46:15 matt Exp $        */

/*-
 * Copyright (c) 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Luke Mewburn.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef        _SYS_HASH_H_
#define        _SYS_HASH_H_

#include <sys/types.h>

#ifdef __HAVE_MACHINE_HASH_H
#include <machine/hash.h>
#endif

#ifndef __HAVE_HASH32_BUF                        /* not overridden by MD hash */

#define        HASH32_BUF_INIT        5381

/*
 * uint32_t
 * hash32_buf(const void *bf, size_t len, uint32_t hash)
 *        return a 32 bit hash of the binary buffer buf (size len),
 *        seeded with an initial hash value of hash (usually HASH32_BUF_INIT).
 */
static __inline uint32_t
hash32_buf(const void *bf, size_t len, uint32_t hash)
{
        const uint8_t *s = (const uint8_t *)bf;

        while (len-- != 0)                        /* "nemesi": k=257, r=r*257 */
                hash = hash * 257 + *s++;
        return (hash * 257);
}
#endif        /* __HAVE_HASH32_BUF */


#ifndef __HAVE_HASH32_STR                        /* not overridden by MD hash */

#define        HASH32_STR_INIT        5381
/*
 * uint32_t
 * hash32_str(const void *bf, uint32_t hash)
 *        return a 32 bit hash of NUL terminated ASCII string buf,
 *        seeded with an initial hash value of hash (usually HASH32_STR_INIT).
 */
static __inline uint32_t
hash32_str(const void *bf, uint32_t hash)
{
        const uint8_t *s = (const uint8_t *)bf;
        uint8_t        c;

        while ((c = *s++) != 0)
                hash = hash * 33 + c;                /* "perl": k=33, r=r+r/32 */
        return (hash + (hash >> 5));
}

/*
 * uint32_t
 * hash32_strn(const void *bf, size_t len, uint32_t hash)
 *        return a 32 bit hash of NUL terminated ASCII string buf up to
 *        a maximum of len bytes,
 *        seeded with an initial hash value of hash (usually HASH32_STR_INIT).
 */
static __inline uint32_t
hash32_strn(const void *bf, size_t len, uint32_t hash)
{
        const uint8_t *s = (const uint8_t *)bf;
        uint8_t        c;

        while ((c = *s++) != 0 && len-- != 0)
                hash = hash * 33 + c;                /* "perl": k=33, r=r+r/32 */
        return (hash + (hash >> 5));
}
#endif        /* __HAVE_HASH32_STR */

__BEGIN_DECLS
uint32_t        murmurhash2(const void *, size_t, uint32_t);
__END_DECLS

#endif        /* !_SYS_HASH_H_ */

































































































   30 
   30 

   30 










































   29 


   28 






   27 
   28 








   29 
   29 
   29 

   28 








    1 


    1 
    1 

    1 
    1 




























   28 

   30 















   30 









   30 







   12 




   30 




   30 



























































   28 



   30 





   29 





   30 




   30 












   29 











   26 

   26 














































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
/*        $NetBSD: prop_string.c,v 1.17 2022/08/03 21:13:46 riastradh Exp $        */

/*-
 * Copyright (c) 2006, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "prop_object_impl.h"
#include <prop/prop_string.h>

#include <sys/rbtree.h>
#if defined(_KERNEL) || defined(_STANDALONE)
#include <sys/stdarg.h>
#else
#include <stdarg.h>
#endif /* _KERNEL || _STANDALONE */

struct _prop_string {
        struct _prop_object        ps_obj;
        union {
                char *                psu_mutable;
                const char *        psu_immutable;
        } ps_un;
#define        ps_mutable                ps_un.psu_mutable
#define        ps_immutable                ps_un.psu_immutable
        size_t                        ps_size;        /* not including \0 */
        struct rb_node                ps_link;
        int                        ps_flags;
};

#define        PS_F_NOCOPY                0x01
#define        PS_F_MUTABLE                0x02

_PROP_POOL_INIT(_prop_string_pool, sizeof(struct _prop_string), "propstng")

_PROP_MALLOC_DEFINE(M_PROP_STRING, "prop string",
                    "property string container object")

static _prop_object_free_rv_t
                _prop_string_free(prop_stack_t, prop_object_t *);
static bool        _prop_string_externalize(
                                struct _prop_object_externalize_context *,
                                void *);
static _prop_object_equals_rv_t
                _prop_string_equals(prop_object_t, prop_object_t,
                                    void **, void **,
                                    prop_object_t *, prop_object_t *);

static const struct _prop_object_type _prop_object_type_string = {
        .pot_type        =        PROP_TYPE_STRING,
        .pot_free        =        _prop_string_free,
        .pot_extern        =        _prop_string_externalize,
        .pot_equals        =        _prop_string_equals,
};

#define        prop_object_is_string(x)        \
        ((x) != NULL && (x)->ps_obj.po_type == &_prop_object_type_string)
#define        prop_string_contents(x)  ((x)->ps_immutable ? (x)->ps_immutable : "")

/*
 * In order to reduce memory usage, all immutable string objects are
 * de-duplicated.
 */

static int
/*ARGSUSED*/
_prop_string_rb_compare_nodes(void *ctx _PROP_ARG_UNUSED,
                              const void *n1, const void *n2)
{
        const struct _prop_string * const ps1 = n1;
        const struct _prop_string * const ps2 = n2;

        _PROP_ASSERT(ps1->ps_immutable != NULL);
        _PROP_ASSERT(ps2->ps_immutable != NULL);

        return strcmp(ps1->ps_immutable, ps2->ps_immutable);
}

static int
/*ARGSUSED*/
_prop_string_rb_compare_key(void *ctx _PROP_ARG_UNUSED,
                            const void *n, const void *v)
{
        const struct _prop_string * const ps = n;
        const char * const cp = v;

        _PROP_ASSERT(ps->ps_immutable != NULL);

        return strcmp(ps->ps_immutable, cp);
}

static const rb_tree_ops_t _prop_string_rb_tree_ops = {
        .rbto_compare_nodes = _prop_string_rb_compare_nodes,
        .rbto_compare_key = _prop_string_rb_compare_key,
        .rbto_node_offset = offsetof(struct _prop_string, ps_link),
        .rbto_context = NULL
};

static struct rb_tree _prop_string_tree;

_PROP_ONCE_DECL(_prop_string_init_once)
_PROP_MUTEX_DECL_STATIC(_prop_string_tree_mutex)

static int
_prop_string_init(void)
{

        _PROP_MUTEX_INIT(_prop_string_tree_mutex);
        rb_tree_init(&_prop_string_tree,
                     &_prop_string_rb_tree_ops);

        return 0;
}

/* ARGSUSED */
static _prop_object_free_rv_t
_prop_string_free(prop_stack_t stack, prop_object_t *obj)
{
        prop_string_t ps = *obj;

        if ((ps->ps_flags & PS_F_MUTABLE) == 0) {
                _PROP_MUTEX_LOCK(_prop_string_tree_mutex);
                /*
                 * Double-check the retain count now that we've
                 * acquired the tree lock; holding this lock prevents
                 * new retains from coming in by finding it in the
                 * tree.
                 */
                if (_PROP_ATOMIC_LOAD(&ps->ps_obj.po_refcnt) == 0)
                        rb_tree_remove_node(&_prop_string_tree, ps);
                else
                        ps = NULL;
                _PROP_MUTEX_UNLOCK(_prop_string_tree_mutex);

                if (ps == NULL)
                        return (_PROP_OBJECT_FREE_DONE);
        }

        if ((ps->ps_flags & PS_F_NOCOPY) == 0 && ps->ps_mutable != NULL)
                    _PROP_FREE(ps->ps_mutable, M_PROP_STRING);
        _PROP_POOL_PUT(_prop_string_pool, ps);

        return (_PROP_OBJECT_FREE_DONE);
}

static bool
_prop_string_externalize(struct _prop_object_externalize_context *ctx,
                         void *v)
{
        prop_string_t ps = v;

        if (ps->ps_size == 0)
                return (_prop_object_externalize_empty_tag(ctx, "string"));

        if (_prop_object_externalize_start_tag(ctx, "string") == false ||
            _prop_object_externalize_append_encoded_cstring(ctx,
                                                    ps->ps_immutable) == false ||
            _prop_object_externalize_end_tag(ctx, "string") == false)
                return (false);

        return (true);
}

/* ARGSUSED */
static _prop_object_equals_rv_t
_prop_string_equals(prop_object_t v1, prop_object_t v2,
    void **stored_pointer1, void **stored_pointer2,
    prop_object_t *next_obj1, prop_object_t *next_obj2)
{
        prop_string_t str1 = v1;
        prop_string_t str2 = v2;

        if (str1 == str2)
                return (_PROP_OBJECT_EQUALS_TRUE);
        if (str1->ps_size != str2->ps_size)
                return (_PROP_OBJECT_EQUALS_FALSE);
        if (strcmp(prop_string_contents(str1), prop_string_contents(str2)))
                return (_PROP_OBJECT_EQUALS_FALSE);
        else
                return (_PROP_OBJECT_EQUALS_TRUE);
}

static prop_string_t
_prop_string_alloc(int const flags)
{
        prop_string_t ps;

        ps = _PROP_POOL_GET(_prop_string_pool);
        if (ps != NULL) {
                _prop_object_init(&ps->ps_obj, &_prop_object_type_string);

                ps->ps_mutable = NULL;
                ps->ps_size = 0;
                ps->ps_flags = flags;
        }

        return (ps);
}

static prop_string_t
_prop_string_instantiate(int const flags, const char * const str,
    size_t const len)
{
        prop_string_t ps;

        _PROP_ONCE_RUN(_prop_string_init_once, _prop_string_init);

        ps = _prop_string_alloc(flags);
        if (ps != NULL) {
                ps->ps_immutable = str;
                ps->ps_size = len;

                if ((flags & PS_F_MUTABLE) == 0) {
                        prop_string_t ops;

                        _PROP_MUTEX_LOCK(_prop_string_tree_mutex);
                        ops = rb_tree_insert_node(&_prop_string_tree, ps);
                        if (ops != ps) {
                                /*
                                 * Equivalent string object already exist;
                                 * free the new one and return a reference
                                 * to the existing object.
                                 */
                                prop_object_retain(ops);
                                _PROP_MUTEX_UNLOCK(_prop_string_tree_mutex);
                                _PROP_POOL_PUT(_prop_string_pool, ps);
                                ps = ops;
                        } else {
                                _PROP_MUTEX_UNLOCK(_prop_string_tree_mutex);
                        }
                }
        }

        return (ps);
}

_PROP_DEPRECATED(prop_string_create,
    "this program uses prop_string_create(); all functions "
    "supporting mutable prop_strings are deprecated.")
prop_string_t
prop_string_create(void)
{

        return (_prop_string_alloc(PS_F_MUTABLE));
}

_PROP_DEPRECATED(prop_string_create_cstring,
    "this program uses prop_string_create_cstring(); all functions "
    "supporting mutable prop_strings are deprecated.")
prop_string_t
prop_string_create_cstring(const char *str)
{
        prop_string_t ps;
        char *cp;
        size_t len;

        _PROP_ASSERT(str != NULL);

        ps = _prop_string_alloc(PS_F_MUTABLE);
        if (ps != NULL) {
                len = strlen(str);
                cp = _PROP_MALLOC(len + 1, M_PROP_STRING);
                if (cp == NULL) {
                        prop_object_release(ps);
                        return (NULL);
                }
                strcpy(cp, str);
                ps->ps_mutable = cp;
                ps->ps_size = len;
        }
        return (ps);
}

_PROP_DEPRECATED(prop_string_create_cstring_nocopy,
    "this program uses prop_string_create_cstring_nocopy(), "
    "which is deprecated; use prop_string_create_nocopy() instead.")
prop_string_t
prop_string_create_cstring_nocopy(const char *str)
{
        return prop_string_create_nocopy(str);
}

/*
 * prop_string_create_format --
 *        Create a string object using the provided format string.
 */
prop_string_t __printflike(1, 2)
prop_string_create_format(const char *fmt, ...)
{
        prop_string_t ps;
        char *str = NULL;
        int len;
        size_t nlen;
        va_list ap;

        _PROP_ASSERT(fmt != NULL);

        va_start(ap, fmt);
        len = vsnprintf(NULL, 0, fmt, ap);
        va_end(ap);

        if (len < 0)
                return (NULL);
        nlen = len + 1;

        str = _PROP_MALLOC(nlen, M_PROP_STRING);
        if (str == NULL)
                return (NULL);

        va_start(ap, fmt);
        vsnprintf(str, nlen, fmt, ap);
        va_end(ap);

        ps = _prop_string_instantiate(0, str, (size_t)len);
        if (ps == NULL)
                _PROP_FREE(str, M_PROP_STRING);

        return (ps);
}

/*
 * prop_string_create_copy --
 *        Create a string object by coping the provided constant string.
 */
prop_string_t
prop_string_create_copy(const char *str)
{
        return prop_string_create_format("%s", str);
}

/*
 * prop_string_create_nocopy --
 *        Create a string object using the provided external constant
 *        string.
 */
prop_string_t
prop_string_create_nocopy(const char *str)
{

        _PROP_ASSERT(str != NULL);

        return _prop_string_instantiate(PS_F_NOCOPY, str, strlen(str));
}

/*
 * prop_string_copy --
 *        Copy a string.  This reduces to a retain in the common case.
 *        Deprecated mutable string objects must be copied.
 */
prop_string_t
prop_string_copy(prop_string_t ops)
{
        prop_string_t ps;
        char *cp;

        if (! prop_object_is_string(ops))
                return (NULL);

        if ((ops->ps_flags & PS_F_MUTABLE) == 0) {
                prop_object_retain(ops);
                return (ops);
        }

        cp = _PROP_MALLOC(ops->ps_size + 1, M_PROP_STRING);
        if (cp == NULL)
                return NULL;

        strcpy(cp, prop_string_contents(ops));

        ps = _prop_string_instantiate(PS_F_MUTABLE, cp, ops->ps_size);
        if (ps == NULL)
                _PROP_FREE(cp, M_PROP_STRING);

        return (ps);
}

_PROP_DEPRECATED(prop_string_copy_mutable,
    "this program uses prop_string_copy_mutable(); all functions "
    "supporting mutable prop_strings are deprecated.")
prop_string_t
prop_string_copy_mutable(prop_string_t ops)
{
        prop_string_t ps;
        char *cp;

        if (! prop_object_is_string(ops))
                return (NULL);

        cp = _PROP_MALLOC(ops->ps_size + 1, M_PROP_STRING);
        if (cp == NULL)
                return NULL;

        strcpy(cp, prop_string_contents(ops));

        ps = _prop_string_instantiate(PS_F_MUTABLE, cp, ops->ps_size);
        if (ps == NULL)
                _PROP_FREE(cp, M_PROP_STRING);

        return (ps);
}

/*
 * prop_string_size --
 *        Return the size of the string, not including the terminating NUL.
 */
size_t
prop_string_size(prop_string_t ps)
{

        if (! prop_object_is_string(ps))
                return (0);

        return (ps->ps_size);
}

/*
 * prop_string_value --
 *        Returns a pointer to the string object's value.  This pointer
 *        remains valid only as long as the string object.
 */
const char *
prop_string_value(prop_string_t ps)
{

        if (! prop_object_is_string(ps))
                return (NULL);

        if ((ps->ps_flags & PS_F_MUTABLE) == 0)
                return (ps->ps_immutable);

        return (prop_string_contents(ps));
}

/*
 * prop_string_copy_value --
 *        Copy the string object's value into the supplied buffer.
 */
bool
prop_string_copy_value(prop_string_t ps, void *buf, size_t buflen)
{

        if (! prop_object_is_string(ps))
                return (false);

        if (buf == NULL || buflen < ps->ps_size + 1)
                return (false);

        strcpy(buf, prop_string_contents(ps));

        return (true);
}

_PROP_DEPRECATED(prop_string_mutable,
    "this program uses prop_string_mutable(); all functions "
    "supporting mutable prop_strings are deprecated.")
bool
prop_string_mutable(prop_string_t ps)
{

        if (! prop_object_is_string(ps))
                return (false);

        return ((ps->ps_flags & PS_F_MUTABLE) != 0);
}

_PROP_DEPRECATED(prop_string_cstring,
    "this program uses prop_string_cstring(), "
    "which is deprecated; use prop_string_copy_value() instead.")
char *
prop_string_cstring(prop_string_t ps)
{
        char *cp;

        if (! prop_object_is_string(ps))
                return (NULL);

        cp = _PROP_MALLOC(ps->ps_size + 1, M_TEMP);
        if (cp != NULL)
                strcpy(cp, prop_string_contents(ps));

        return (cp);
}

_PROP_DEPRECATED(prop_string_cstring_nocopy,
    "this program uses prop_string_cstring_nocopy(), "
    "which is deprecated; use prop_string_value() instead.")
const char *
prop_string_cstring_nocopy(prop_string_t ps)
{

        if (! prop_object_is_string(ps))
                return (NULL);

        return (prop_string_contents(ps));
}

_PROP_DEPRECATED(prop_string_append,
    "this program uses prop_string_append(); all functions "
    "supporting mutable prop_strings are deprecated.")
bool
prop_string_append(prop_string_t dst, prop_string_t src)
{
        char *ocp, *cp;
        size_t len;

        if (! (prop_object_is_string(dst) &&
               prop_object_is_string(src)))
                return (false);

        if ((dst->ps_flags & PS_F_MUTABLE) == 0)
                return (false);

        len = dst->ps_size + src->ps_size;
        cp = _PROP_MALLOC(len + 1, M_PROP_STRING);
        if (cp == NULL)
                return (false);
        snprintf(cp, len + 1, "%s%s", prop_string_contents(dst),
                prop_string_contents(src));
        ocp = dst->ps_mutable;
        dst->ps_mutable = cp;
        dst->ps_size = len;
        if (ocp != NULL)
                _PROP_FREE(ocp, M_PROP_STRING);

        return (true);
}

_PROP_DEPRECATED(prop_string_append_cstring,
    "this program uses prop_string_append_cstring(); all functions "
    "supporting mutable prop_strings are deprecated.")
bool
prop_string_append_cstring(prop_string_t dst, const char *src)
{
        char *ocp, *cp;
        size_t len;

        if (! prop_object_is_string(dst))
                return (false);

        _PROP_ASSERT(src != NULL);

        if ((dst->ps_flags & PS_F_MUTABLE) == 0)
                return (false);

        len = dst->ps_size + strlen(src);
        cp = _PROP_MALLOC(len + 1, M_PROP_STRING);
        if (cp == NULL)
                return (false);
        snprintf(cp, len + 1, "%s%s", prop_string_contents(dst), src);
        ocp = dst->ps_mutable;
        dst->ps_mutable = cp;
        dst->ps_size = len;
        if (ocp != NULL)
                _PROP_FREE(ocp, M_PROP_STRING);

        return (true);
}

/*
 * prop_string_equals --
 *        Return true if two strings are equivalent.
 */
bool
prop_string_equals(prop_string_t str1, prop_string_t str2)
{
        if (!prop_object_is_string(str1) || !prop_object_is_string(str2))
                return (false);

        return prop_object_equals(str1, str2);
}

/*
 * prop_string_equals_string --
 *        Return true if the string object is equivalent to the specified
 *        C string.
 */
bool
prop_string_equals_string(prop_string_t ps, const char *cp)
{

        if (! prop_object_is_string(ps))
                return (false);

        return (strcmp(prop_string_contents(ps), cp) == 0);
}

_PROP_DEPRECATED(prop_string_equals_cstring,
    "this program uses prop_string_equals_cstring(), "
    "which is deprecated; prop_string_equals_string() instead.")
bool
prop_string_equals_cstring(prop_string_t ps, const char *cp)
{
        return prop_string_equals_string(ps, cp);
}

/*
 * prop_string_compare --
 *        Compare two string objects, using strcmp() semantics.
 */
int
prop_string_compare(prop_string_t ps1, prop_string_t ps2)
{
        if (!prop_object_is_string(ps1) || !prop_object_is_string(ps2))
                return (-666);        /* arbitrary */

        return (strcmp(prop_string_contents(ps1),
                       prop_string_contents(ps2)));
}

/*
 * prop_string_compare_string --
 *        Compare a string object to the specified C string, using
 *        strcmp() semantics.
 */
int
prop_string_compare_string(prop_string_t ps, const char *cp)
{
        if (!prop_object_is_string(ps))
                return (-666);        /* arbitrary */

        return (strcmp(prop_string_contents(ps), cp));
}

/*
 * _prop_string_internalize --
 *        Parse a <string>...</string> and return the object created from the
 *        external representation.
 */
/* ARGSUSED */
bool
_prop_string_internalize(prop_stack_t stack, prop_object_t *obj,
    struct _prop_object_internalize_context *ctx)
{
        prop_string_t string;
        char *str;
        size_t len, alen;

        if (ctx->poic_is_empty_element) {
                *obj = prop_string_create();
                return (true);
        }

        /* No attributes recognized here. */
        if (ctx->poic_tagattr != NULL)
                return (true);

        /* Compute the length of the result. */
        if (_prop_object_internalize_decode_string(ctx, NULL, 0, &len,
                                                   NULL) == false)
                return (true);

        str = _PROP_MALLOC(len + 1, M_PROP_STRING);
        if (str == NULL)
                return (true);

        if (_prop_object_internalize_decode_string(ctx, str, len, &alen,
                                                   &ctx->poic_cp) == false ||
            alen != len) {
                _PROP_FREE(str, M_PROP_STRING);
                return (true);
        }
        str[len] = '\0';

        if (_prop_object_internalize_find_tag(ctx, "string",
                                              _PROP_TAG_TYPE_END) == false) {
                _PROP_FREE(str, M_PROP_STRING);
                return (true);
        }

        string = _prop_string_instantiate(0, str, len);
        if (string == NULL)
                _PROP_FREE(str, M_PROP_STRING);

        *obj = string;
        return (true);
}






































































  102 













































  100 
   90 


   99 












  103 

  104 
   94 

  139 













  400 





  400 



  398 
  397 


  399 










  399 
















  398 



  399 























  397 

















  334 
  333 



  331 





  331 












  400 
  336 




   69 

  400 



  399 

  398 












   91 

   91 
   91 































   91 
   91 
   91 





    2 

   91 
















  111 






















  111 

  111 
  111 

   46 



   65 























  111 



  111 
    3 

  111 
    4 








  111 
   46 

    1 
    1 



    1 






   65 




















  109 


    2 
    1 
    1 




    1 


    2 












    5 




    3 

















    5 

















    1 






    5 





  111 












  103 










    6 


   97 


  102 
















  102 






   42 

   60 






  102 
   56 
   60 






























    2 




    4 






    1 

    2 




    4 



   97 






   42 

   60 


  102 








    6 













































































   89 
   89 

   89 







   89 



   88 

   88 

   88 







   88 
   88 

   88 
    6 







   88 























  495 











  497 







  425 







  424 




  352 




   46 


  351 
  352 



  425 
  422 










  328 

















   82 
  344 












   77 












   77 















    3 












   77 

   77 


   77 
   77 
   77 
   77 




   77 











   77 



   77 
   77 




   77 



































    7 











    7 
    7 
    7 










    7 









    7 
    7 






    7 


    7 






    7 









    7 

    7 





















    7 


    7 












    7 












   74 



   74 


   74 




   74 
   47 



   73 














  119 



  119 





  119 
  119 
  119 















  100 

  100 







   90 

   90 
   18 
   18 




















  100 
  100 

  100 
   87 
   87 



   99 

  100 
  100 
   78 



  100 
   20 
















   20 






   20 

    7 





   17 



   20 




    7 
   17 


   14 
   14 
    5 


   13 


   16 





   16 



    9 


   16 






   16 
   14 


















































































































   12 






   12 








   12 
    8 
   12 











  811 





  811 






  811 


  809 
  810 



  496 
  495 















  762 




  763 

  763 




  760 

  161 
  161 






  678 
  678 
  678 



  762 













    2 





    2 


    2 
    2 

    2 





    1 


    2 













   92 






   92 
   74 



   92 



   92 


   92 














   74 

    7 

   74 
















  108 





  108 




   89 









   23 
    4 

   23 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
/*        $NetBSD: uvm_amap.c,v 1.126 2021/03/13 15:29:55 skrll Exp $        */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * uvm_amap.c: amap operations
 */

/*
 * this file contains functions that perform operations on amaps.  see
 * uvm_amap.h for a brief explanation of the role of amaps in uvm.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.126 2021/03/13 15:29:55 skrll Exp $");

#include "opt_uvmhist.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/pool.h>
#include <sys/atomic.h>

#include <uvm/uvm.h>
#include <uvm/uvm_swap.h>

/*
 * cache for allocation of vm_map structures.  note that in order to
 * avoid an endless loop, the amap cache's allocator cannot allocate
 * memory from an amap (it currently goes through the kernel uobj, so
 * we are ok).
 */
static struct pool_cache uvm_amap_cache;
static kmutex_t amap_list_lock __cacheline_aligned;
static LIST_HEAD(, vm_amap) amap_list;

/*
 * local functions
 */

static int
amap_roundup_slots(int slots)
{

        return kmem_roundup_size(slots * sizeof(int)) / sizeof(int);
}

#ifdef UVM_AMAP_PPREF
/*
 * what is ppref?   ppref is an _optional_ amap feature which is used
 * to keep track of reference counts on a per-page basis.  it is enabled
 * when UVM_AMAP_PPREF is defined.
 *
 * when enabled, an array of ints is allocated for the pprefs.  this
 * array is allocated only when a partial reference is added to the
 * map (either by unmapping part of the amap, or gaining a reference
 * to only a part of an amap).  if the allocation of the array fails
 * (KM_NOSLEEP), then we set the array pointer to PPREF_NONE to indicate
 * that we tried to do ppref's but couldn't alloc the array so just
 * give up (after all, this is an optional feature!).
 *
 * the array is divided into page sized "chunks."   for chunks of length 1,
 * the chunk reference count plus one is stored in that chunk's slot.
 * for chunks of length > 1 the first slot contains (the reference count
 * plus one) * -1.    [the negative value indicates that the length is
 * greater than one.]   the second slot of the chunk contains the length
 * of the chunk.   here is an example:
 *
 * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
 *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
 *              <----------><-><----><-------><----><-><------->
 * (x = don't care)
 *
 * this allows us to allow one int to contain the ref count for the whole
 * chunk.    note that the "plus one" part is needed because a reference
 * count of zero is neither positive or negative (need a way to tell
 * if we've got one zero or a bunch of them).
 *
 * here are some in-line functions to help us.
 */

/*
 * pp_getreflen: get the reference and length for a specific offset
 *
 * => ppref's amap must be locked
 */
static inline void
pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
{

        if (ppref[offset] > 0) {                /* chunk size must be 1 */
                *refp = ppref[offset] - 1;        /* don't forget to adjust */
                *lenp = 1;
        } else {
                *refp = (ppref[offset] * -1) - 1;
                *lenp = ppref[offset+1];
        }
}

/*
 * pp_setreflen: set the reference and length for a specific offset
 *
 * => ppref's amap must be locked
 */
static inline void
pp_setreflen(int *ppref, int offset, int ref, int len)
{
        if (len == 0)
                return;
        if (len == 1) {
                ppref[offset] = ref + 1;
        } else {
                ppref[offset] = (ref + 1) * -1;
                ppref[offset+1] = len;
        }
}
#endif /* UVM_AMAP_PPREF */

/*
 * amap_alloc1: allocate an amap, but do not initialise the overlay.
 *
 * => Note: lock is not set.
 */
static struct vm_amap *
amap_alloc1(int slots, int padslots, int flags)
{
        const bool nowait = (flags & UVM_FLAG_NOWAIT) != 0;
        const km_flag_t kmflags = nowait ? KM_NOSLEEP : KM_SLEEP;
        struct vm_amap *amap;
        krwlock_t *newlock, *oldlock;
        int totalslots;

        amap = pool_cache_get(&uvm_amap_cache, nowait ? PR_NOWAIT : PR_WAITOK);
        if (amap == NULL) {
                return NULL;
        }
        KASSERT(amap->am_lock != NULL);
        KASSERT(amap->am_nused == 0);

        /* Try to privatize the lock if currently shared. */
        if (rw_obj_refcnt(amap->am_lock) > 1) {
                newlock = rw_obj_tryalloc();
                if (newlock != NULL) {
                            oldlock = amap->am_lock;
                            mutex_enter(&amap_list_lock);
                            amap->am_lock = newlock;
                            mutex_exit(&amap_list_lock);
                            rw_obj_free(oldlock);
                }
        }

        totalslots = amap_roundup_slots(slots + padslots);
        amap->am_ref = 1;
        amap->am_flags = 0;
#ifdef UVM_AMAP_PPREF
        amap->am_ppref = NULL;
#endif
        amap->am_maxslot = totalslots;
        amap->am_nslot = slots;

        /*
         * Note: since allocations are likely big, we expect to reduce the
         * memory fragmentation by allocating them in separate blocks.
         */
        amap->am_slots = kmem_alloc(totalslots * sizeof(int), kmflags);
        if (amap->am_slots == NULL)
                goto fail1;

        amap->am_bckptr = kmem_alloc(totalslots * sizeof(int), kmflags);
        if (amap->am_bckptr == NULL)
                goto fail2;

        amap->am_anon = kmem_alloc(totalslots * sizeof(struct vm_anon *),
            kmflags);
        if (amap->am_anon == NULL)
                goto fail3;

        return amap;

fail3:
        kmem_free(amap->am_bckptr, totalslots * sizeof(int));
fail2:
        kmem_free(amap->am_slots, totalslots * sizeof(int));
fail1:
        pool_cache_put(&uvm_amap_cache, amap);

        /*
         * XXX hack to tell the pagedaemon how many pages we need,
         * since we can need more than it would normally free.
         */
        if (nowait) {
                extern u_int uvm_extrapages;
                atomic_add_int(&uvm_extrapages,
                    ((sizeof(int) * 2 + sizeof(struct vm_anon *)) *
                    totalslots) >> PAGE_SHIFT);
        }
        return NULL;
}

/*
 * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
 *
 * => caller should ensure sz is a multiple of PAGE_SIZE
 * => reference count to new amap is set to one
 * => new amap is returned unlocked
 */

struct vm_amap *
amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
{
        struct vm_amap *amap;
        int slots, padslots;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        AMAP_B2SLOT(slots, sz);
        AMAP_B2SLOT(padslots, padsz);

        amap = amap_alloc1(slots, padslots, waitf);
        if (amap) {
                memset(amap->am_anon, 0,
                    amap->am_maxslot * sizeof(struct vm_anon *));
        }

        UVMHIST_LOG(maphist,"<- done, amap = %#jx, sz=%jd", (uintptr_t)amap,
            sz, 0, 0);
        return(amap);
}

/*
 * amap_ctor: pool_cache constructor for new amaps
 *
 * => carefully synchronize with amap_swap_off()
 */
static int
amap_ctor(void *arg, void *obj, int flags)
{
        struct vm_amap *amap = obj;

        if ((flags & PR_NOWAIT) != 0) {
                amap->am_lock = rw_obj_tryalloc();
                if (amap->am_lock == NULL) {
                        return ENOMEM;
                }
        } else {
                amap->am_lock = rw_obj_alloc();
        }
        amap->am_nused = 0;
        amap->am_flags = 0;

        mutex_enter(&amap_list_lock);
        LIST_INSERT_HEAD(&amap_list, amap, am_list);
        mutex_exit(&amap_list_lock);
        return 0;
}

/*
 * amap_ctor: pool_cache destructor for amaps
 *
 * => carefully synchronize with amap_swap_off()
 */
static void
amap_dtor(void *arg, void *obj)
{
        struct vm_amap *amap = obj;

        KASSERT(amap->am_nused == 0);

        mutex_enter(&amap_list_lock);
        LIST_REMOVE(amap, am_list);
        mutex_exit(&amap_list_lock);
        rw_obj_free(amap->am_lock);
}

/*
 * uvm_amap_init: initialize the amap system.
 */
void
uvm_amap_init(void)
{

        mutex_init(&amap_list_lock, MUTEX_DEFAULT, IPL_NONE);

        pool_cache_bootstrap(&uvm_amap_cache, sizeof(struct vm_amap), 0, 0,
            PR_LARGECACHE, "amappl", NULL, IPL_NONE, amap_ctor, amap_dtor,
            NULL);
}

/*
 * amap_free: free an amap
 *
 * => the amap must be unlocked
 * => the amap should have a zero reference count and be empty
 */
void
amap_free(struct vm_amap *amap)
{
        int slots;

        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
        KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
        slots = amap->am_maxslot;
        kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots));
        kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr));
        kmem_free(amap->am_anon, slots * sizeof(*amap->am_anon));
#ifdef UVM_AMAP_PPREF
        if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
                kmem_free(amap->am_ppref, slots * sizeof(*amap->am_ppref));
#endif
        pool_cache_put(&uvm_amap_cache, amap);
        UVMHIST_LOG(maphist,"<- done, freed amap = %#jx", (uintptr_t)amap,
            0, 0, 0);
}

/*
 * amap_extend: extend the size of an amap (if needed)
 *
 * => called from uvm_map when we want to extend an amap to cover
 *    a new mapping (rather than allocate a new one)
 * => amap should be unlocked (we will lock it)
 * => to safely extend an amap it should have a reference count of
 *    one (thus it can't be shared)
 */
int
amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags)
{
        struct vm_amap *amap = entry->aref.ar_amap;
        int slotoff = entry->aref.ar_pageoff;
        int slotmapped, slotadd, slotneed, slotadded, slotalloc;
        int slotadj, slotarea, slotendoff;
        int oldnslots;
#ifdef UVM_AMAP_PPREF
        int *newppref, *oldppref;
#endif
        int i, *newsl, *newbck, *oldsl, *oldbck;
        struct vm_anon **newover, **oldover;
        const km_flag_t kmflags =
            (flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;

        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist, "  (entry=%#jx, addsize=%#jx, flags=%#jx)",
            (uintptr_t)entry, addsize, flags, 0);

        /*
         * first, determine how many slots we need in the amap.  don't
         * forget that ar_pageoff could be non-zero: this means that
         * there are some unused slots before us in the amap.
         */

        amap_lock(amap, RW_WRITER);
        KASSERT(amap_refs(amap) == 1); /* amap can't be shared */
        AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
        AMAP_B2SLOT(slotadd, addsize);                        /* slots to add */
        if (flags & AMAP_EXTEND_FORWARDS) {
                slotneed = slotoff + slotmapped + slotadd;
                slotadj = 0;
                slotarea = 0;
        } else {
                slotneed = slotadd + slotmapped;
                slotadj = slotadd - slotoff;
                slotarea = amap->am_maxslot - slotmapped;
        }

        /*
         * Because this amap only has 1 ref, we know that there is
         * only one vm_map_entry pointing to it, and the one entry is
         * using slots between slotoff and slotoff + slotmapped.  If
         * we have been using ppref then we know that only slots in
         * the one map entry's range can have anons, since ppref
         * allowed us to free any anons outside that range as other map
         * entries which used this amap were removed. But without ppref,
         * we couldn't know which slots were still needed by other map
         * entries, so we couldn't free any anons as we removed map
         * entries, and so any slot from 0 to am_nslot can have an
         * anon.  But now that we know there is only one map entry
         * left and we know its range, we can free up any anons
         * outside that range.  This is necessary because the rest of
         * this function assumes that there are no anons in the amap
         * outside of the one map entry's range.
         */

        slotendoff = slotoff + slotmapped;
        if (amap->am_ppref == PPREF_NONE) {
                amap_wiperange(amap, 0, slotoff);
                amap_wiperange(amap, slotendoff, amap->am_nslot - slotendoff);
        }
        for (i = 0; i < slotoff; i++) {
                KASSERT(amap->am_anon[i] == NULL);
        }
        for (i = slotendoff; i < amap->am_nslot - slotendoff; i++) {
                KASSERT(amap->am_anon[i] == NULL);
        }

        /*
         * case 1: we already have enough slots in the map and thus
         * only need to bump the reference counts on the slots we are
         * adding.
         */

        if (flags & AMAP_EXTEND_FORWARDS) {
                if (amap->am_nslot >= slotneed) {
#ifdef UVM_AMAP_PPREF
                        if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
                                amap_pp_adjref(amap, slotoff + slotmapped,
                                    slotadd, 1);
                        }
#endif
                        amap_unlock(amap);
                        UVMHIST_LOG(maphist,
                            "<- done (case 1f), amap = %#jx, sltneed=%jd",
                            (uintptr_t)amap, slotneed, 0, 0);
                        return 0;
                }
        } else {
                if (slotadj <= 0) {
                        slotoff -= slotadd;
                        entry->aref.ar_pageoff = slotoff;
#ifdef UVM_AMAP_PPREF
                        if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
                                amap_pp_adjref(amap, slotoff, slotadd, 1);
                        }
#endif
                        amap_unlock(amap);
                        UVMHIST_LOG(maphist,
                            "<- done (case 1b), amap = %#jx, sltneed=%jd",
                            (uintptr_t)amap, slotneed, 0, 0);
                        return 0;
                }
        }

        /*
         * case 2: we pre-allocated slots for use and we just need to
         * bump nslot up to take account for these slots.
         */

        if (amap->am_maxslot >= slotneed) {
                if (flags & AMAP_EXTEND_FORWARDS) {
#ifdef UVM_AMAP_PPREF
                        if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
                                if ((slotoff + slotmapped) < amap->am_nslot)
                                        amap_pp_adjref(amap,
                                            slotoff + slotmapped,
                                            (amap->am_nslot -
                                            (slotoff + slotmapped)), 1);
                                pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
                                    slotneed - amap->am_nslot);
                        }
#endif
                        amap->am_nslot = slotneed;
                        amap_unlock(amap);

                        /*
                         * no need to zero am_anon since that was done at
                         * alloc time and we never shrink an allocation.
                         */

                        UVMHIST_LOG(maphist,"<- done (case 2f), amap = %#jx, "
                            "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0);
                        return 0;
                } else {
#ifdef UVM_AMAP_PPREF
                        if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
                                /*
                                 * Slide up the ref counts on the pages that
                                 * are actually in use.
                                 */
                                memmove(amap->am_ppref + slotarea,
                                    amap->am_ppref + slotoff,
                                    slotmapped * sizeof(int));
                                /*
                                 * Mark the (adjusted) gap at the front as
                                 * referenced/not referenced.
                                 */
                                pp_setreflen(amap->am_ppref,
                                    0, 0, slotarea - slotadd);
                                pp_setreflen(amap->am_ppref,
                                    slotarea - slotadd, 1, slotadd);
                        }
#endif

                        /*
                         * Slide the anon pointers up and clear out
                         * the space we just made.
                         */
                        memmove(amap->am_anon + slotarea,
                            amap->am_anon + slotoff,
                            slotmapped * sizeof(struct vm_anon*));
                        memset(amap->am_anon + slotoff, 0,
                            (slotarea - slotoff) * sizeof(struct vm_anon *));

                        /*
                         * Slide the backpointers up, but don't bother
                         * wiping out the old slots.
                         */
                        memmove(amap->am_bckptr + slotarea,
                            amap->am_bckptr + slotoff,
                            slotmapped * sizeof(int));

                        /*
                         * Adjust all the useful active slot numbers.
                         */
                        for (i = 0; i < amap->am_nused; i++)
                                amap->am_slots[i] += (slotarea - slotoff);

                        /*
                         * We just filled all the empty space in the
                         * front of the amap by activating a few new
                         * slots.
                         */
                        amap->am_nslot = amap->am_maxslot;
                        entry->aref.ar_pageoff = slotarea - slotadd;
                        amap_unlock(amap);

                        UVMHIST_LOG(maphist,"<- done (case 2b), amap = %#jx, "
                            "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0);
                        return 0;
                }
        }

        /*
         * Case 3: we need to allocate a new amap and copy all the amap
         * data over from old amap to the new one.  Drop the lock before
         * performing allocation.
         *
         * Note: since allocations are likely big, we expect to reduce the
         * memory fragmentation by allocating them in separate blocks.
         */

        amap_unlock(amap);

        if (slotneed >= UVM_AMAP_LARGE) {
                return E2BIG;
        }

        slotalloc = amap_roundup_slots(slotneed);
#ifdef UVM_AMAP_PPREF
        newppref = NULL;
        if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
                /* Will be handled later if fails. */
                newppref = kmem_alloc(slotalloc * sizeof(*newppref), kmflags);
        }
#endif
        newsl = kmem_alloc(slotalloc * sizeof(*newsl), kmflags);
        newbck = kmem_alloc(slotalloc * sizeof(*newbck), kmflags);
        newover = kmem_alloc(slotalloc * sizeof(*newover), kmflags);
        if (newsl == NULL || newbck == NULL || newover == NULL) {
#ifdef UVM_AMAP_PPREF
                if (newppref != NULL) {
                        kmem_free(newppref, slotalloc * sizeof(*newppref));
                }
#endif
                if (newsl != NULL) {
                        kmem_free(newsl, slotalloc * sizeof(*newsl));
                }
                if (newbck != NULL) {
                        kmem_free(newbck, slotalloc * sizeof(*newbck));
                }
                if (newover != NULL) {
                        kmem_free(newover, slotalloc * sizeof(*newover));
                }
                return ENOMEM;
        }
        amap_lock(amap, RW_WRITER);
        KASSERT(amap->am_maxslot < slotneed);

        /*
         * Copy everything over to new allocated areas.
         */

        slotadded = slotalloc - amap->am_nslot;
        if (!(flags & AMAP_EXTEND_FORWARDS))
                slotarea = slotalloc - slotmapped;

        /* do am_slots */
        oldsl = amap->am_slots;
        if (flags & AMAP_EXTEND_FORWARDS)
                memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
        else
                for (i = 0; i < amap->am_nused; i++)
                        newsl[i] = oldsl[i] + slotarea - slotoff;
        amap->am_slots = newsl;

        /* do am_anon */
        oldover = amap->am_anon;
        if (flags & AMAP_EXTEND_FORWARDS) {
                memcpy(newover, oldover,
                    sizeof(struct vm_anon *) * amap->am_nslot);
                memset(newover + amap->am_nslot, 0,
                    sizeof(struct vm_anon *) * slotadded);
        } else {
                memcpy(newover + slotarea, oldover + slotoff,
                    sizeof(struct vm_anon *) * slotmapped);
                memset(newover, 0,
                    sizeof(struct vm_anon *) * slotarea);
        }
        amap->am_anon = newover;

        /* do am_bckptr */
        oldbck = amap->am_bckptr;
        if (flags & AMAP_EXTEND_FORWARDS)
                memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
        else
                memcpy(newbck + slotarea, oldbck + slotoff,
                    sizeof(int) * slotmapped);
        amap->am_bckptr = newbck;

#ifdef UVM_AMAP_PPREF
        /* do ppref */
        oldppref = amap->am_ppref;
        if (newppref) {
                if (flags & AMAP_EXTEND_FORWARDS) {
                        memcpy(newppref, oldppref,
                            sizeof(int) * amap->am_nslot);
                        memset(newppref + amap->am_nslot, 0,
                            sizeof(int) * slotadded);
                } else {
                        memcpy(newppref + slotarea, oldppref + slotoff,
                            sizeof(int) * slotmapped);
                }
                amap->am_ppref = newppref;
                if ((flags & AMAP_EXTEND_FORWARDS) &&
                    (slotoff + slotmapped) < amap->am_nslot)
                        amap_pp_adjref(amap, slotoff + slotmapped,
                            (amap->am_nslot - (slotoff + slotmapped)), 1);
                if (flags & AMAP_EXTEND_FORWARDS)
                        pp_setreflen(newppref, amap->am_nslot, 1,
                            slotneed - amap->am_nslot);
                else {
                        pp_setreflen(newppref, 0, 0,
                            slotalloc - slotneed);
                        pp_setreflen(newppref, slotalloc - slotneed, 1,
                            slotneed - slotmapped);
                }
        } else {
                if (amap->am_ppref)
                        amap->am_ppref = PPREF_NONE;
        }
#endif

        /* update master values */
        if (flags & AMAP_EXTEND_FORWARDS)
                amap->am_nslot = slotneed;
        else {
                entry->aref.ar_pageoff = slotarea - slotadd;
                amap->am_nslot = slotalloc;
        }
        oldnslots = amap->am_maxslot;
        amap->am_maxslot = slotalloc;
        amap_unlock(amap);

        kmem_free(oldsl, oldnslots * sizeof(*oldsl));
        kmem_free(oldbck, oldnslots * sizeof(*oldbck));
        kmem_free(oldover, oldnslots * sizeof(*oldover));
#ifdef UVM_AMAP_PPREF
        if (oldppref && oldppref != PPREF_NONE)
                kmem_free(oldppref, oldnslots * sizeof(*oldppref));
#endif
        UVMHIST_LOG(maphist,"<- done (case 3), amap = %#jx, slotneed=%jd",
            (uintptr_t)amap, slotneed, 0, 0);
        return 0;
}

/*
 * amap_share_protect: change protection of anons in a shared amap
 *
 * for shared amaps, given the current data structure layout, it is
 * not possible for us to directly locate all maps referencing the
 * shared anon (to change the protection).  in order to protect data
 * in shared maps we use pmap_page_protect().  [this is useful for IPC
 * mechanisms like map entry passing that may want to write-protect
 * all mappings of a shared amap.]  we traverse am_anon or am_slots
 * depending on the current state of the amap.
 *
 * => entry's map and amap must be locked by the caller
 */
void
amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
{
        struct vm_amap *amap = entry->aref.ar_amap;
        u_int slots, lcv, slot, stop;
        struct vm_anon *anon;

        KASSERT(rw_write_held(amap->am_lock));

        AMAP_B2SLOT(slots, (entry->end - entry->start));
        stop = entry->aref.ar_pageoff + slots;

        if (slots < amap->am_nused) {
                /*
                 * Cheaper to traverse am_anon.
                 */
                for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
                        anon = amap->am_anon[lcv];
                        if (anon == NULL) {
                                continue;
                        }
                        if (anon->an_page) {
                                pmap_page_protect(anon->an_page, prot);
                        }
                }
                return;
        }

        /*
         * Cheaper to traverse am_slots.
         */
        for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
                slot = amap->am_slots[lcv];
                if (slot < entry->aref.ar_pageoff || slot >= stop) {
                        continue;
                }
                anon = amap->am_anon[slot];
                if (anon->an_page) {
                        pmap_page_protect(anon->an_page, prot);
                }
        }
}

/*
 * amap_wipeout: wipeout all anon's in an amap; then free the amap!
 *
 * => Called from amap_unref(), when reference count drops to zero.
 * => amap must be locked.
 */

void
amap_wipeout(struct vm_amap *amap)
{
        u_int lcv;

        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist,"(amap=%#jx)", (uintptr_t)amap, 0,0,0);

        KASSERT(rw_write_held(amap->am_lock));
        KASSERT(amap->am_ref == 0);

        if (__predict_false(amap->am_flags & AMAP_SWAPOFF)) {
                /*
                 * Note: amap_swap_off() will call us again.
                 */
                amap_unlock(amap);
                return;
        }

        for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
                struct vm_anon *anon;
                u_int slot;

                slot = amap->am_slots[lcv];
                anon = amap->am_anon[slot];
                KASSERT(anon != NULL && anon->an_ref != 0);

                KASSERT(anon->an_lock == amap->am_lock);
                UVMHIST_LOG(maphist,"  processing anon %#jx, ref=%jd",
                    (uintptr_t)anon, anon->an_ref, 0, 0);

                /*
                 * Drop the reference.
                 */

                if (__predict_true(--anon->an_ref == 0)) {
                        uvm_anfree(anon);
                }
                if (__predict_false((lcv & 31) == 31)) {
                        preempt_point();
                }
        }

        /*
         * Finally, destroy the amap.
         */

        amap->am_nused = 0;
        amap_unlock(amap);
        amap_free(amap);
        UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
}

/*
 * amap_copy: ensure that a map entry's "needs_copy" flag is false
 *        by copying the amap if necessary.
 *
 * => an entry with a null amap pointer will get a new (blank) one.
 * => the map that the map entry belongs to must be locked by caller.
 * => the amap currently attached to "entry" (if any) must be unlocked.
 * => if canchunk is true, then we may clip the entry into a chunk
 * => "startva" and "endva" are used only if canchunk is true.  they are
 *     used to limit chunking (e.g. if you have a large space that you
 *     know you are going to need to allocate amaps for, there is no point
 *     in allowing that to be chunked)
 */

void
amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags,
    vaddr_t startva, vaddr_t endva)
{
        const int waitf = (flags & AMAP_COPY_NOWAIT) ? UVM_FLAG_NOWAIT : 0;
        struct vm_amap *amap, *srcamap;
        u_int slots, lcv;
        krwlock_t *oldlock;
        vsize_t len;

        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist, "  (map=%#jx, entry=%#jx, flags=%#jx)",
            (uintptr_t)map, (uintptr_t)entry, flags, -2);

        KASSERT(map != kernel_map);        /* we use nointr pool */

        srcamap = entry->aref.ar_amap;
        len = entry->end - entry->start;

        /*
         * Is there an amap to copy?  If not, create one.
         */

        if (srcamap == NULL) {
                const bool canchunk = (flags & AMAP_COPY_NOCHUNK) == 0;

                /*
                 * Check to see if we have a large amap that we can
                 * chunk.  We align startva/endva to chunk-sized
                 * boundaries and then clip to them.
                 */

                if (canchunk && atop(len) >= UVM_AMAP_LARGE) {
                        vsize_t chunksize;

                        /* Convert slots to bytes. */
                        chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
                        startva = (startva / chunksize) * chunksize;
                        endva = roundup(endva, chunksize);
                        UVMHIST_LOG(maphist,
                            "  chunk amap ==> clip %#jx->%#jx to %#jx->%#jx",
                            entry->start, entry->end, startva, endva);
                        UVM_MAP_CLIP_START(map, entry, startva);

                        /* Watch out for endva wrap-around! */
                        if (endva >= startva) {
                                UVM_MAP_CLIP_END(map, entry, endva);
                        }
                }

                if ((flags & AMAP_COPY_NOMERGE) == 0 &&
                    uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) {
                        return;
                }

                UVMHIST_LOG(maphist, "<- done [creating new amap %#jx->%#jx]",
                    entry->start, entry->end, 0, 0);

                /*
                 * Allocate an initialised amap and install it.
                 * Note: we must update the length after clipping.
                 */
                len = entry->end - entry->start;
                entry->aref.ar_pageoff = 0;
                entry->aref.ar_amap = amap_alloc(len, 0, waitf);
                if (entry->aref.ar_amap != NULL) {
                        entry->etype &= ~UVM_ET_NEEDSCOPY;
                }
                return;
        }

        /*
         * First check and see if we are the only map entry referencing
         * he amap we currently have.  If so, then just take it over instead
         * of copying it.  Note that we are reading am_ref without lock held
         * as the value value can only be one if we have the only reference
         * to the amap (via our locked map).  If the value is greater than
         * one, then allocate amap and re-check the value.
         */

        if (srcamap->am_ref == 1) {
                entry->etype &= ~UVM_ET_NEEDSCOPY;
                UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
                    0, 0, 0, 0);
                return;
        }

        UVMHIST_LOG(maphist,"  amap=%#jx, ref=%jd, must copy it",
            (uintptr_t)srcamap, srcamap->am_ref, 0, 0);

        /*
         * Allocate a new amap (note: not initialised, etc).
         */

        AMAP_B2SLOT(slots, len);
        amap = amap_alloc1(slots, 0, waitf);
        if (amap == NULL) {
                UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
                return;
        }

        /*
         * Make the new amap share the source amap's lock, and then lock
         * both.  We must do this before we set am_nused != 0, otherwise
         * amap_swap_off() can become interested in the amap.
         */

        oldlock = amap->am_lock;
        mutex_enter(&amap_list_lock);
        amap->am_lock = srcamap->am_lock;
        mutex_exit(&amap_list_lock);
        rw_obj_hold(amap->am_lock);
        rw_obj_free(oldlock);

        amap_lock(srcamap, RW_WRITER);

        /*
         * Re-check the reference count with the lock held.  If it has
         * dropped to one - we can take over the existing map.
         */

        if (srcamap->am_ref == 1) {
                /* Just take over the existing amap. */
                entry->etype &= ~UVM_ET_NEEDSCOPY;
                amap_unlock(srcamap);
                /* Destroy the new (unused) amap. */
                amap->am_ref--;
                amap_free(amap);
                return;
        }

        /*
         * Copy the slots.  Zero the padded part.
         */

        UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
        for (lcv = 0 ; lcv < slots; lcv++) {
                amap->am_anon[lcv] =
                    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
                if (amap->am_anon[lcv] == NULL)
                        continue;
                KASSERT(amap->am_anon[lcv]->an_lock == srcamap->am_lock);
                KASSERT(amap->am_anon[lcv]->an_ref > 0);
                KASSERT(amap->am_nused < amap->am_maxslot);
                amap->am_anon[lcv]->an_ref++;
                amap->am_bckptr[lcv] = amap->am_nused;
                amap->am_slots[amap->am_nused] = lcv;
                amap->am_nused++;
        }
        memset(&amap->am_anon[lcv], 0,
            (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));

        /*
         * Drop our reference to the old amap (srcamap) and unlock.
         * Since the reference count on srcamap is greater than one,
         * (we checked above), it cannot drop to zero while it is locked.
         */

        srcamap->am_ref--;
        KASSERT(srcamap->am_ref > 0);

        if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) {
                srcamap->am_flags &= ~AMAP_SHARED;
        }
#ifdef UVM_AMAP_PPREF
        if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
                amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
                    len >> PAGE_SHIFT, -1);
        }
#endif

        amap_unlock(srcamap);

        /*
         * Install new amap.
         */

        entry->aref.ar_pageoff = 0;
        entry->aref.ar_amap = amap;
        entry->etype &= ~UVM_ET_NEEDSCOPY;
        UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
}

/*
 * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
 *
 *        called during fork(2) when the parent process has a wired map
 *        entry.   in that case we want to avoid write-protecting pages
 *        in the parent's map (e.g. like what you'd do for a COW page)
 *        so we resolve the COW here.
 *
 * => assume parent's entry was wired, thus all pages are resident.
 * => assume pages that are loaned out (loan_count) are already mapped
 *        read-only in all maps, and thus no need for us to worry about them
 * => assume both parent and child vm_map's are locked
 * => caller passes child's map/entry in to us
 * => if we run out of memory we will unlock the amap and sleep _with_ the
 *        parent and child vm_map's locked(!).    we have to do this since
 *        we are in the middle of a fork(2) and we can't let the parent
 *        map change until we are done copying all the map entrys.
 * => XXXCDC: out of memory should cause fork to fail, but there is
 *        currently no easy way to do this (needs fix)
 */

void
amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
{
        struct vm_amap *amap = entry->aref.ar_amap;
        struct vm_anon *anon, *nanon;
        struct vm_page *pg, *npg;
        u_int lcv, slot;

        /*
         * note that if we unlock the amap then we must ReStart the "lcv" for
         * loop because some other process could reorder the anon's in the
         * am_anon[] array on us while the lock is dropped.
         */

ReStart:
        amap_lock(amap, RW_WRITER);
        for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
                slot = amap->am_slots[lcv];
                anon = amap->am_anon[slot];
                KASSERT(anon->an_lock == amap->am_lock);

                /*
                 * If anon has only one reference - we must have already
                 * copied it.  This can happen if we needed to sleep waiting
                 * for memory in a previous run through this loop.  The new
                 * page might even have been paged out, since is not wired.
                 */

                if (anon->an_ref == 1) {
                        KASSERT(anon->an_page != NULL || anon->an_swslot != 0);
                        continue;
                }

                /*
                 * The old page must be resident since the parent is wired.
                 */

                pg = anon->an_page;
                KASSERT(pg != NULL);
                KASSERT(pg->wire_count > 0);

                /*
                 * If the page is loaned then it must already be mapped
                 * read-only and we don't need to copy it.
                 */

                if (pg->loan_count != 0) {
                        continue;
                }
                KASSERT(pg->uanon == anon && pg->uobject == NULL);

                /*
                 * If the page is busy, then we have to unlock, wait for
                 * it and then restart.
                 */

                if (pg->flags & PG_BUSY) {
                        uvm_pagewait(pg, amap->am_lock, "cownow");
                        goto ReStart;
                }

                /*
                 * Perform a copy-on-write.
                 * First - get a new anon and a page.
                 */

                nanon = uvm_analloc();
                if (nanon) {
                        nanon->an_lock = amap->am_lock;
                        npg = uvm_pagealloc(NULL, 0, nanon, 0);
                } else {
                        npg = NULL;
                }
                if (nanon == NULL || npg == NULL) {
                        amap_unlock(amap);
                        if (nanon) {
                                nanon->an_lock = NULL;
                                nanon->an_ref--;
                                KASSERT(nanon->an_ref == 0);
                                uvm_anfree(nanon);
                        }
                        uvm_wait("cownowpage");
                        goto ReStart;
                }

                /*
                 * Copy the data and replace anon with the new one.
                 * Also, setup its lock (share the with amap's lock).
                 */

                uvm_pagecopy(pg, npg);
                anon->an_ref--;
                KASSERT(anon->an_ref > 0);
                amap->am_anon[slot] = nanon;

                /*
                 * Drop PG_BUSY on new page.  Since its owner was write
                 * locked all this time - it cannot be PG_RELEASED or
                 * waited on.
                 */
                uvm_pagelock(npg);
                uvm_pageactivate(npg);
                uvm_pageunlock(npg);
                npg->flags &= ~(PG_BUSY|PG_FAKE);
                UVM_PAGE_OWN(npg, NULL);
        }
        amap_unlock(amap);
}

/*
 * amap_splitref: split a single reference into two separate references
 *
 * => called from uvm_map's clip routines
 * => origref's map should be locked
 * => origref->ar_amap should be unlocked (we will lock)
 */
void
amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
{
        struct vm_amap *amap = origref->ar_amap;
        u_int leftslots;

        KASSERT(splitref->ar_amap == origref->ar_amap);
        AMAP_B2SLOT(leftslots, offset);
        KASSERT(leftslots != 0);

        amap_lock(amap, RW_WRITER);
        KASSERT(amap->am_nslot - origref->ar_pageoff - leftslots > 0);

#ifdef UVM_AMAP_PPREF
        /* Establish ppref before we add a duplicate reference to the amap. */
        if (amap->am_ppref == NULL) {
                amap_pp_establish(amap, origref->ar_pageoff);
        }
#endif
        /* Note: not a share reference. */
        amap->am_ref++;
        splitref->ar_pageoff = origref->ar_pageoff + leftslots;
        amap_unlock(amap);
}

#ifdef UVM_AMAP_PPREF

/*
 * amap_pp_establish: add a ppref array to an amap, if possible.
 *
 * => amap should be locked by caller.
 */
void
amap_pp_establish(struct vm_amap *amap, vaddr_t offset)
{
        const size_t sz = amap->am_maxslot * sizeof(*amap->am_ppref);

        KASSERT(rw_write_held(amap->am_lock));

        amap->am_ppref = kmem_zalloc(sz, KM_NOSLEEP);
        if (amap->am_ppref == NULL) {
                /* Failure - just do not use ppref. */
                amap->am_ppref = PPREF_NONE;
                return;
        }
        pp_setreflen(amap->am_ppref, 0, 0, offset);
        pp_setreflen(amap->am_ppref, offset, amap->am_ref,
            amap->am_nslot - offset);
}

/*
 * amap_pp_adjref: adjust reference count to a part of an amap using the
 * per-page reference count array.
 *
 * => caller must check that ppref != PPREF_NONE before calling.
 * => map and amap must be locked.
 */
void
amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
{
        int stopslot, *ppref, lcv, prevlcv;
        int ref, len, prevref, prevlen;

        KASSERT(rw_write_held(amap->am_lock));

        stopslot = curslot + slotlen;
        ppref = amap->am_ppref;
        prevlcv = 0;

        /*
         * Advance to the correct place in the array, fragment if needed.
         */

        for (lcv = 0 ; lcv < curslot ; lcv += len) {
                pp_getreflen(ppref, lcv, &ref, &len);
                if (lcv + len > curslot) {     /* goes past start? */
                        pp_setreflen(ppref, lcv, ref, curslot - lcv);
                        pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
                        len = curslot - lcv;   /* new length of entry @ lcv */
                }
                prevlcv = lcv;
        }
        if (lcv == 0) {
                /*
                 * Ensure that the "prevref == ref" test below always
                 * fails, since we are starting from the beginning of
                 * the ppref array; that is, there is no previous chunk.
                 */
                prevref = -1;
                prevlen = 0;
        } else {
                pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
        }

        /*
         * Now adjust reference counts in range.  Merge the first
         * changed entry with the last unchanged entry if possible.
         */
        KASSERT(lcv == curslot);
        for (/* lcv already set */; lcv < stopslot ; lcv += len) {
                pp_getreflen(ppref, lcv, &ref, &len);
                if (lcv + len > stopslot) {     /* goes past end? */
                        pp_setreflen(ppref, lcv, ref, stopslot - lcv);
                        pp_setreflen(ppref, stopslot, ref,
                            len - (stopslot - lcv));
                        len = stopslot - lcv;
                }
                ref += adjval;
                KASSERT(ref >= 0);
                KASSERT(ref <= amap->am_ref);
                if (lcv == prevlcv + prevlen && ref == prevref) {
                        pp_setreflen(ppref, prevlcv, ref, prevlen + len);
                } else {
                        pp_setreflen(ppref, lcv, ref, len);
                }
                if (ref == 0) {
                        amap_wiperange(amap, lcv, len);
                }
        }
}

/*
 * amap_wiperange: wipe out a range of an amap.
 * Note: different from amap_wipeout because the amap is kept intact.
 *
 * => Both map and amap must be locked by caller.
 */
void
amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
{
        u_int lcv, stop, slotend;
        bool byanon;

        KASSERT(rw_write_held(amap->am_lock));

        /*
         * We can either traverse the amap by am_anon or by am_slots.
         * Determine which way is less expensive.
         */

        if (slots < amap->am_nused) {
                byanon = true;
                lcv = slotoff;
                stop = slotoff + slots;
                slotend = 0;
        } else {
                byanon = false;
                lcv = 0;
                stop = amap->am_nused;
                slotend = slotoff + slots;
        }

        while (lcv < stop) {
                struct vm_anon *anon;
                u_int curslot, ptr, last;

                if (byanon) {
                        curslot = lcv++;        /* lcv advances here */
                        if (amap->am_anon[curslot] == NULL)
                                continue;
                } else {
                        curslot = amap->am_slots[lcv];
                        if (curslot < slotoff || curslot >= slotend) {
                                lcv++;                /* lcv advances here */
                                continue;
                        }
                        stop--;        /* drop stop, since anon will be removed */
                }
                anon = amap->am_anon[curslot];
                KASSERT(anon->an_lock == amap->am_lock);

                /*
                 * Remove anon from the amap.
                 */

                amap->am_anon[curslot] = NULL;
                ptr = amap->am_bckptr[curslot];
                last = amap->am_nused - 1;
                if (ptr != last) {
                        amap->am_slots[ptr] = amap->am_slots[last];
                        amap->am_bckptr[amap->am_slots[ptr]] = ptr;
                }
                amap->am_nused--;

                /*
                 * Drop its reference count.
                 */

                KASSERT(anon->an_lock == amap->am_lock);
                if (--anon->an_ref == 0) {
                        uvm_anfree(anon);
                }
        }
}

#endif

#if defined(VMSWAP)

/*
 * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
 *
 * => called with swap_syscall_lock held.
 * => note that we don't always traverse all anons.
 *    eg. amaps being wiped out, released anons.
 * => return true if failed.
 */

bool
amap_swap_off(int startslot, int endslot)
{
        struct vm_amap *am;
        struct vm_amap *am_next;
        struct vm_amap marker_prev;
        struct vm_amap marker_next;
        bool rv = false;

#if defined(DIAGNOSTIC)
        memset(&marker_prev, 0, sizeof(marker_prev));
        memset(&marker_next, 0, sizeof(marker_next));
#endif /* defined(DIAGNOSTIC) */

        mutex_enter(&amap_list_lock);
        for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
                int i;

                LIST_INSERT_BEFORE(am, &marker_prev, am_list);
                LIST_INSERT_AFTER(am, &marker_next, am_list);

                /* amap_list_lock prevents the lock pointer from changing. */
                if (!amap_lock_try(am, RW_WRITER)) {
                        (void)kpause("amapswpo", false, 1, &amap_list_lock);
                        am_next = LIST_NEXT(&marker_prev, am_list);
                        if (am_next == &marker_next) {
                                am_next = LIST_NEXT(am_next, am_list);
                        } else {
                                KASSERT(LIST_NEXT(am_next, am_list) ==
                                    &marker_next);
                        }
                        LIST_REMOVE(&marker_prev, am_list);
                        LIST_REMOVE(&marker_next, am_list);
                        continue;
                }

                mutex_exit(&amap_list_lock);

                /* If am_nused == 0, the amap could be free - careful. */
                for (i = 0; i < am->am_nused; i++) {
                        int slot;
                        int swslot;
                        struct vm_anon *anon;

                        slot = am->am_slots[i];
                        anon = am->am_anon[slot];
                        KASSERT(anon->an_lock == am->am_lock);

                        swslot = anon->an_swslot;
                        if (swslot < startslot || endslot <= swslot) {
                                continue;
                        }

                        am->am_flags |= AMAP_SWAPOFF;

                        rv = uvm_anon_pagein(am, anon);
                        amap_lock(am, RW_WRITER);

                        am->am_flags &= ~AMAP_SWAPOFF;
                        if (amap_refs(am) == 0) {
                                amap_wipeout(am);
                                am = NULL;
                                break;
                        }
                        if (rv) {
                                break;
                        }
                        i = 0;
                }

                if (am) {
                        amap_unlock(am);
                }

                mutex_enter(&amap_list_lock);
                KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
                    LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
                    &marker_next);
                am_next = LIST_NEXT(&marker_next, am_list);
                LIST_REMOVE(&marker_prev, am_list);
                LIST_REMOVE(&marker_next, am_list);
        }
        mutex_exit(&amap_list_lock);

        return rv;
}

#endif /* defined(VMSWAP) */

/*
 * amap_lookup: look up a page in an amap.
 *
 * => amap should be locked by caller.
 */
struct vm_anon *
amap_lookup(struct vm_aref *aref, vaddr_t offset)
{
        struct vm_amap *amap = aref->ar_amap;
        struct vm_anon *an;
        u_int slot;

        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
        KASSERT(rw_lock_held(amap->am_lock));

        AMAP_B2SLOT(slot, offset);
        slot += aref->ar_pageoff;
        an = amap->am_anon[slot];

        UVMHIST_LOG(maphist,
            "<- done (amap=%#jx, offset=%#jx, result=%#jx)",
            (uintptr_t)amap, offset, (uintptr_t)an, 0);

        KASSERT(slot < amap->am_nslot);
        KASSERT(an == NULL || an->an_ref != 0);
        KASSERT(an == NULL || an->an_lock == amap->am_lock);
        return an;
}

/*
 * amap_lookups: look up a range of pages in an amap.
 *
 * => amap should be locked by caller.
 */
void
amap_lookups(struct vm_aref *aref, vaddr_t offset, struct vm_anon **anons,
    int npages)
{
        struct vm_amap *amap = aref->ar_amap;
        u_int slot;

        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
        KASSERT(rw_lock_held(amap->am_lock));

        AMAP_B2SLOT(slot, offset);
        slot += aref->ar_pageoff;

        UVMHIST_LOG(maphist, "  slot=%u, npages=%d, nslot=%d",
            slot, npages, amap->am_nslot, 0);

        KASSERT((slot + (npages - 1)) < amap->am_nslot);
        memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));

#if defined(DIAGNOSTIC)
        for (int i = 0; i < npages; i++) {
                struct vm_anon * const an = anons[i];
                if (an == NULL) {
                        continue;
                }
                KASSERT(an->an_ref != 0);
                KASSERT(an->an_lock == amap->am_lock);
        }
#endif
        UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
}

/*
 * amap_add: add (or replace) a page to an amap.
 *
 * => amap should be locked by caller.
 * => anon must have the lock associated with this amap.
 */
void
amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
    bool replace)
{
        struct vm_amap *amap = aref->ar_amap;
        u_int slot;

        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
        KASSERT(rw_write_held(amap->am_lock));
        KASSERT(anon->an_lock == amap->am_lock);

        AMAP_B2SLOT(slot, offset);
        slot += aref->ar_pageoff;
        KASSERT(slot < amap->am_nslot);

        if (replace) {
                struct vm_anon *oanon = amap->am_anon[slot];

                KASSERT(oanon != NULL);
                if (oanon->an_page && (amap->am_flags & AMAP_SHARED) != 0) {
                        pmap_page_protect(oanon->an_page, VM_PROT_NONE);
                        /*
                         * XXX: suppose page is supposed to be wired somewhere?
                         */
                }
        } else {
                KASSERT(amap->am_anon[slot] == NULL);
                KASSERT(amap->am_nused < amap->am_maxslot);
                amap->am_bckptr[slot] = amap->am_nused;
                amap->am_slots[amap->am_nused] = slot;
                amap->am_nused++;
        }
        amap->am_anon[slot] = anon;
        UVMHIST_LOG(maphist,
            "<- done (amap=%#jx, offset=%#x, anon=%#jx, rep=%d)",
            (uintptr_t)amap, offset, (uintptr_t)anon, replace);
}

/*
 * amap_unadd: remove a page from an amap.
 *
 * => amap should be locked by caller.
 */
void
amap_unadd(struct vm_aref *aref, vaddr_t offset)
{
        struct vm_amap *amap = aref->ar_amap;
        u_int slot, ptr, last;

        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
        KASSERT(rw_write_held(amap->am_lock));

        AMAP_B2SLOT(slot, offset);
        slot += aref->ar_pageoff;
        KASSERT(slot < amap->am_nslot);
        KASSERT(amap->am_anon[slot] != NULL);
        KASSERT(amap->am_anon[slot]->an_lock == amap->am_lock);

        amap->am_anon[slot] = NULL;
        ptr = amap->am_bckptr[slot];

        last = amap->am_nused - 1;
        if (ptr != last) {
                /* Move the last entry to keep the slots contiguous. */
                amap->am_slots[ptr] = amap->am_slots[last];
                amap->am_bckptr[amap->am_slots[ptr]] = ptr;
        }
        amap->am_nused--;
        UVMHIST_LOG(maphist, "<- done (amap=%#jx, slot=%#jx)",
            (uintptr_t)amap, slot,0, 0);
}

/*
 * amap_adjref_anons: adjust the reference count(s) on amap and its anons.
 */
static void
amap_adjref_anons(struct vm_amap *amap, vaddr_t offset, vsize_t len,
    int refv, bool all)
{

#ifdef UVM_AMAP_PPREF
        KASSERT(rw_write_held(amap->am_lock));

        /*
         * We must establish the ppref array before changing am_ref
         * so that the ppref values match the current amap refcount.
         */

        if (amap->am_ppref == NULL) {
                amap_pp_establish(amap, offset);
        }
#endif

        amap->am_ref += refv;

#ifdef UVM_AMAP_PPREF
        if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
                amap_pp_adjref(amap, offset, len, refv);
        }
#endif
        amap_unlock(amap);
}

/*
 * amap_ref: gain a reference to an amap.
 *
 * => amap must not be locked (we will lock).
 * => "offset" and "len" are in units of pages.
 * => Called at fork time to gain the child's reference.
 */
void
amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
{
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        amap_lock(amap, RW_WRITER);
        if (flags & AMAP_SHARED) {
                amap->am_flags |= AMAP_SHARED;
        }
        amap_adjref_anons(amap, offset, len, 1, (flags & AMAP_REFALL) != 0);

        UVMHIST_LOG(maphist,"<- done!  amap=%#jx", (uintptr_t)amap, 0, 0, 0);
}

/*
 * amap_unref: remove a reference to an amap.
 *
 * => All pmap-level references to this amap must be already removed.
 * => Called from uvm_unmap_detach(); entry is already removed from the map.
 * => We will lock amap, so it must be unlocked.
 */
void
amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, bool all)
{
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        amap_lock(amap, RW_WRITER);

        UVMHIST_LOG(maphist,"  amap=%#jx  refs=%d, nused=%d",
            (uintptr_t)amap, amap->am_ref, amap->am_nused, 0);
        KASSERT(amap->am_ref > 0);

        if (amap->am_ref == 1) {

                /*
                 * If the last reference - wipeout and destroy the amap.
                 */
                amap->am_ref--;
                amap_wipeout(amap);
                UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0);
                return;
        }

        /*
         * Otherwise, drop the reference count(s) on anons.
         */

        if (amap->am_ref == 2 && (amap->am_flags & AMAP_SHARED) != 0) {
                amap->am_flags &= ~AMAP_SHARED;
        }
        amap_adjref_anons(amap, offset, len, -1, all);

        UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
}






























































    6 






    3 





    6 
    3 

    6 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
/*        $NetBSD: sysv_msg_50.c,v 1.5 2019/12/15 16:48:26 tsutsui Exp $        */

/*-
 * Copyright (c) 1999 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sysv_msg_50.c,v 1.5 2019/12/15 16:48:26 tsutsui Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/signal.h>
#include <sys/proc.h>
#include <sys/msg.h>

#include <compat/sys/msg.h>

#ifndef SYSVMSG
#define        SYSVMSG
#endif

#include <sys/syscallargs.h>

int
compat_50_sys___msgctl13(struct lwp *l, const struct compat_50_sys___msgctl13_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) msqid;
                syscallarg(int) cmd;
                syscallarg(struct msqid_ds13 *) buf;
        } */
        struct msqid_ds msqbuf;
        struct msqid_ds13 omsqbuf;
        int cmd, error;

        cmd = SCARG(uap, cmd);

        if (cmd == IPC_SET) {
                error = copyin(SCARG(uap, buf), &omsqbuf, sizeof(omsqbuf));
                if (error)
                        return (error);
                __msqid_ds13_to_native(&omsqbuf, &msqbuf);
        }

        error = msgctl1(l, SCARG(uap, msqid), cmd,
            (cmd == IPC_SET || cmd == IPC_STAT) ? &msqbuf : NULL);

        if (error == 0 && cmd == IPC_STAT) {
                __native_to_msqid_ds13(&msqbuf, &omsqbuf);
                error = copyout(&omsqbuf, SCARG(uap, buf), sizeof(omsqbuf));
        }

        return (error);
}


























































































































































































































    2 

    2 



































































    2 










    2 





    2 
    2 
    2 
    2 













    2 



    2 
    2 



    2 





    2 
















































































































































































































































































































































































    3 













    2 





    2 





    2 






    2 















    2 










    2 

    3 







    2 


    2 








    2 
    2 






































    2 
    2 






    2 

    2 





















    4 

    4 
    4 


    4 



































    1 










    1 




















    2 



    2 

    2 








    3 





    3 
    3 












    2 



    2 

    2 










































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
/*        $NetBSD: mld6.c,v 1.101 2019/09/25 09:53:38 ozaki-r Exp $        */
/*        $KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $        */

/*
 * Copyright (C) 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1992, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Stephen Deering of Stanford University.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)igmp.c        8.1 (Berkeley) 7/19/93
 */

/*
 * Copyright (c) 1988 Stephen Deering.
 *
 * This code is derived from software contributed to Berkeley by
 * Stephen Deering of Stanford University.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)igmp.c        8.1 (Berkeley) 7/19/93
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.101 2019/09/25 09:53:38 ozaki-r Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
#include <sys/kernel.h>
#include <sys/callout.h>
#include <sys/cprng.h>
#include <sys/rwlock.h>

#include <net/if.h>

#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#include <netinet/icmp6.h>
#include <netinet6/icmp6_private.h>
#include <netinet6/mld6_var.h>

static krwlock_t        in6_multilock __cacheline_aligned;

/*
 * Protocol constants
 */

/*
 * time between repetitions of a node's initial report of interest in a
 * multicast address(in seconds)
 */
#define MLD_UNSOLICITED_REPORT_INTERVAL        10

static struct ip6_pktopts ip6_opts;

static void mld_start_listening(struct in6_multi *);
static void mld_stop_listening(struct in6_multi *);

static struct mld_hdr *mld_allocbuf(struct mbuf **, struct in6_multi *, int);
static void mld_sendpkt(struct in6_multi *, int, const struct in6_addr *);
static void mld_starttimer(struct in6_multi *);
static void mld_stoptimer(struct in6_multi *);
static u_long mld_timerresid(struct in6_multi *);

static void in6m_ref(struct in6_multi *);
static void in6m_unref(struct in6_multi *);
static void in6m_destroy(struct in6_multi *);

void
mld_init(void)
{
        static u_int8_t hbh_buf[8];
        struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
        u_int16_t rtalert_code = htons((u_int16_t)IP6OPT_RTALERT_MLD);

        /* ip6h_nxt will be fill in later */
        hbh->ip6h_len = 0;        /* (8 >> 3) - 1 */

        /* XXX: grotty hard coding... */
        hbh_buf[2] = IP6OPT_PADN;        /* 2 byte padding */
        hbh_buf[3] = 0;
        hbh_buf[4] = IP6OPT_RTALERT;
        hbh_buf[5] = IP6OPT_RTALERT_LEN - 2;
        memcpy(&hbh_buf[6], (void *)&rtalert_code, sizeof(u_int16_t));

        ip6_opts.ip6po_hbh = hbh;
        /* We will specify the hoplimit by a multicast option. */
        ip6_opts.ip6po_hlim = -1;
        ip6_opts.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;

        rw_init(&in6_multilock);
}

static void
mld_starttimer(struct in6_multi *in6m)
{
        struct timeval now;

        KASSERT(rw_write_held(&in6_multilock));
        KASSERTMSG(in6m->in6m_timer != IN6M_TIMER_UNDEF,
            "in6m_timer=%d", in6m->in6m_timer);

        microtime(&now);
        in6m->in6m_timer_expire.tv_sec = now.tv_sec + in6m->in6m_timer / hz;
        in6m->in6m_timer_expire.tv_usec = now.tv_usec +
            (in6m->in6m_timer % hz) * (1000000 / hz);
        if (in6m->in6m_timer_expire.tv_usec > 1000000) {
                in6m->in6m_timer_expire.tv_sec++;
                in6m->in6m_timer_expire.tv_usec -= 1000000;
        }

        /* start or restart the timer */
        callout_schedule(&in6m->in6m_timer_ch, in6m->in6m_timer);
}

/*
 * mld_stoptimer releases in6_multilock when calling callout_halt.
 * The caller must ensure in6m won't be freed while releasing the lock.
 */
static void
mld_stoptimer(struct in6_multi *in6m)
{

        KASSERT(rw_write_held(&in6_multilock));

        if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
                return;

        rw_exit(&in6_multilock);

        callout_halt(&in6m->in6m_timer_ch, NULL);

        rw_enter(&in6_multilock, RW_WRITER);

        in6m->in6m_timer = IN6M_TIMER_UNDEF;
}

static void
mld_timeo(void *arg)
{
        struct in6_multi *in6m = arg;

        KASSERTMSG(in6m->in6m_refcount > 0, "in6m_refcount=%d",
            in6m->in6m_refcount);

        KERNEL_LOCK_UNLESS_NET_MPSAFE();
        rw_enter(&in6_multilock, RW_WRITER);
        if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
                goto out;

        in6m->in6m_timer = IN6M_TIMER_UNDEF;

        switch (in6m->in6m_state) {
        case MLD_REPORTPENDING:
                mld_start_listening(in6m);
                break;
        default:
                mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
                break;
        }

out:
        rw_exit(&in6_multilock);
        KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
}

static u_long
mld_timerresid(struct in6_multi *in6m)
{
        struct timeval now, diff;

        microtime(&now);

        if (now.tv_sec > in6m->in6m_timer_expire.tv_sec ||
            (now.tv_sec == in6m->in6m_timer_expire.tv_sec &&
            now.tv_usec > in6m->in6m_timer_expire.tv_usec)) {
                return (0);
        }
        diff = in6m->in6m_timer_expire;
        diff.tv_sec -= now.tv_sec;
        diff.tv_usec -= now.tv_usec;
        if (diff.tv_usec < 0) {
                diff.tv_sec--;
                diff.tv_usec += 1000000;
        }

        /* return the remaining time in milliseconds */
        return diff.tv_sec * 1000 + diff.tv_usec / 1000;
}

static void
mld_start_listening(struct in6_multi *in6m)
{
        struct in6_addr all_in6;

        KASSERT(rw_write_held(&in6_multilock));

        /*
         * RFC2710 page 10:
         * The node never sends a Report or Done for the link-scope all-nodes
         * address.
         * MLD messages are never sent for multicast addresses whose scope is 0
         * (reserved) or 1 (node-local).
         */
        all_in6 = in6addr_linklocal_allnodes;
        if (in6_setscope(&all_in6, in6m->in6m_ifp, NULL)) {
                /* XXX: this should not happen! */
                in6m->in6m_timer = 0;
                in6m->in6m_state = MLD_OTHERLISTENER;
        }
        if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
            IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) {
                in6m->in6m_timer = IN6M_TIMER_UNDEF;
                in6m->in6m_state = MLD_OTHERLISTENER;
        } else {
                mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
                in6m->in6m_timer = cprng_fast32() %
                    (MLD_UNSOLICITED_REPORT_INTERVAL * hz);
                in6m->in6m_state = MLD_IREPORTEDLAST;

                mld_starttimer(in6m);
        }
}

static void
mld_stop_listening(struct in6_multi *in6m)
{
        struct in6_addr allnode, allrouter;

        KASSERT(rw_lock_held(&in6_multilock));

        allnode = in6addr_linklocal_allnodes;
        if (in6_setscope(&allnode, in6m->in6m_ifp, NULL)) {
                /* XXX: this should not happen! */
                return;
        }
        allrouter = in6addr_linklocal_allrouters;
        if (in6_setscope(&allrouter, in6m->in6m_ifp, NULL)) {
                /* XXX impossible */
                return;
        }

        if (in6m->in6m_state == MLD_IREPORTEDLAST &&
            (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &allnode)) &&
            IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) >
            IPV6_ADDR_SCOPE_INTFACELOCAL) {
                mld_sendpkt(in6m, MLD_LISTENER_DONE, &allrouter);
        }
}

void
mld_input(struct mbuf *m, int off)
{
        struct ip6_hdr *ip6;
        struct mld_hdr *mldh;
        struct ifnet *ifp;
        struct in6_multi *in6m = NULL;
        struct in6_addr mld_addr, all_in6;
        u_long timer = 0;        /* timer value in the MLD query header */
        struct psref psref;

        ifp = m_get_rcvif_psref(m, &psref);
        if (__predict_false(ifp == NULL))
                goto out;
        IP6_EXTHDR_GET(mldh, struct mld_hdr *, m, off, sizeof(*mldh));
        if (mldh == NULL) {
                ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
                goto out_nodrop;
        }

        ip6 = mtod(m, struct ip6_hdr *);

        /* source address validation */
        if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
                /*
                 * RFC3590 allows the IPv6 unspecified address as the source
                 * address of MLD report and done messages.  However, as this
                 * same document says, this special rule is for snooping
                 * switches and the RFC requires routers to discard MLD packets
                 * with the unspecified source address.  The RFC only talks
                 * about hosts receiving an MLD query or report in Security
                 * Considerations, but this is probably the correct intention.
                 * RFC3590 does not talk about other cases than link-local and
                 * the unspecified source addresses, but we believe the same
                 * rule should be applied.
                 * As a result, we only allow link-local addresses as the
                 * source address; otherwise, simply discard the packet.
                 */
#if 0
                /*
                 * XXX: do not log in an input path to avoid log flooding,
                 * though RFC3590 says "SHOULD log" if the source of a query
                 * is the unspecified address.
                 */
                char ip6bufs[INET6_ADDRSTRLEN];
                char ip6bufm[INET6_ADDRSTRLEN];
                log(LOG_INFO,
                    "mld_input: src %s is not link-local (grp=%s)\n",
                    IN6_PRINT(ip6bufs,&ip6->ip6_src),
                    IN6_PRINT(ip6bufm, &mldh->mld_addr));
#endif
                goto out;
        }

        /*
         * make a copy for local work (in6_setscope() may modify the 1st arg)
         */
        mld_addr = mldh->mld_addr;
        if (in6_setscope(&mld_addr, ifp, NULL)) {
                /* XXX: this should not happen! */
                goto out;
        }

        /*
         * In the MLD specification, there are 3 states and a flag.
         *
         * In Non-Listener state, we simply don't have a membership record.
         * In Delaying Listener state, our timer is running (in6m->in6m_timer)
         * In Idle Listener state, our timer is not running
         * (in6m->in6m_timer==IN6M_TIMER_UNDEF)
         *
         * The flag is in6m->in6m_state, it is set to MLD_OTHERLISTENER if
         * we have heard a report from another member, or MLD_IREPORTEDLAST
         * if we sent the last report.
         */
        switch (mldh->mld_type) {
        case MLD_LISTENER_QUERY: {
                struct in6_multi *next;

                if (ifp->if_flags & IFF_LOOPBACK)
                        break;

                if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
                    !IN6_IS_ADDR_MULTICAST(&mld_addr))
                        break;        /* print error or log stat? */

                all_in6 = in6addr_linklocal_allnodes;
                if (in6_setscope(&all_in6, ifp, NULL)) {
                        /* XXX: this should not happen! */
                        break;
                }

                /*
                 * - Start the timers in all of our membership records
                 *   that the query applies to for the interface on
                 *   which the query arrived excl. those that belong
                 *   to the "all-nodes" group (ff02::1).
                 * - Restart any timer that is already running but has
                 *   a value longer than the requested timeout.
                 * - Use the value specified in the query message as
                 *   the maximum timeout.
                 */
                timer = ntohs(mldh->mld_maxdelay);

                rw_enter(&in6_multilock, RW_WRITER);
                /*
                 * mld_stoptimer and mld_sendpkt release in6_multilock
                 * temporarily, so we have to prevent in6m from being freed
                 * while releasing the lock by having an extra reference to it.
                 *
                 * Also in6_purge_multi might remove items from the list of the
                 * ifp while releasing the lock. Fortunately in6_purge_multi is
                 * never executed as long as we have a psref of the ifp.
                 */
                LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
                        if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
                            IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
                            IPV6_ADDR_SCOPE_LINKLOCAL)
                                continue;

                        if (in6m->in6m_state == MLD_REPORTPENDING)
                                continue; /* we are not yet ready */

                        if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
                            !IN6_ARE_ADDR_EQUAL(&mld_addr, &in6m->in6m_addr))
                                continue;

                        if (timer == 0) {
                                in6m_ref(in6m);

                                /* send a report immediately */
                                mld_stoptimer(in6m);
                                mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
                                in6m->in6m_state = MLD_IREPORTEDLAST;

                                in6m_unref(in6m); /* May free in6m */
                        } else if (in6m->in6m_timer == IN6M_TIMER_UNDEF ||
                            mld_timerresid(in6m) > timer) {
                                in6m->in6m_timer =
                                   1 + (cprng_fast32() % timer) * hz / 1000;
                                mld_starttimer(in6m);
                        }
                }
                rw_exit(&in6_multilock);
                break;
            }

        case MLD_LISTENER_REPORT:
                /*
                 * For fast leave to work, we have to know that we are the
                 * last person to send a report for this group.  Reports
                 * can potentially get looped back if we are a multicast
                 * router, so discard reports sourced by me.
                 * Note that it is impossible to check IFF_LOOPBACK flag of
                 * ifp for this purpose, since ip6_mloopback pass the physical
                 * interface to looutput.
                 */
                if (m->m_flags & M_LOOP) /* XXX: grotty flag, but efficient */
                        break;

                if (!IN6_IS_ADDR_MULTICAST(&mldh->mld_addr))
                        break;

                /*
                 * If we belong to the group being reported, stop
                 * our timer for that group.
                 */
                rw_enter(&in6_multilock, RW_WRITER);
                in6m = in6_lookup_multi(&mld_addr, ifp);
                if (in6m) {
                        in6m_ref(in6m);
                        mld_stoptimer(in6m); /* transit to idle state */
                        in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */
                        in6m_unref(in6m);
                        in6m = NULL; /* in6m might be freed */
                }
                rw_exit(&in6_multilock);
                break;
        default:                /* this is impossible */
#if 0
                /*
                 * this case should be impossible because of filtering in
                 * icmp6_input().  But we explicitly disabled this part
                 * just in case.
                 */
                log(LOG_ERR, "mld_input: illegal type(%d)", mldh->mld_type);
#endif
                break;
        }

out:
        m_freem(m);
out_nodrop:
        m_put_rcvif_psref(ifp, &psref);
}

/*
 * XXX mld_sendpkt must be called with in6_multilock held and
 * will release in6_multilock before calling ip6_output and
 * returning to avoid locking against myself in ip6_output.
 */
static void
mld_sendpkt(struct in6_multi *in6m, int type, const struct in6_addr *dst)
{
        struct mbuf *mh;
        struct mld_hdr *mldh;
        struct ip6_hdr *ip6 = NULL;
        struct ip6_moptions im6o;
        struct in6_ifaddr *ia = NULL;
        struct ifnet *ifp = in6m->in6m_ifp;
        int ignflags;
        struct psref psref;
        int bound;

        KASSERT(rw_write_held(&in6_multilock));

        /*
         * At first, find a link local address on the outgoing interface
         * to use as the source address of the MLD packet.
         * We do not reject tentative addresses for MLD report to deal with
         * the case where we first join a link-local address.
         */
        ignflags = (IN6_IFF_NOTREADY|IN6_IFF_ANYCAST) & ~IN6_IFF_TENTATIVE;
        bound = curlwp_bind();
        ia = in6ifa_ifpforlinklocal_psref(ifp, ignflags, &psref);
        if (ia == NULL) {
                curlwp_bindx(bound);
                return;
        }
        if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
                ia6_release(ia, &psref);
                ia = NULL;
        }

        /* Allocate two mbufs to store IPv6 header and MLD header */
        mldh = mld_allocbuf(&mh, in6m, type);
        if (mldh == NULL) {
                ia6_release(ia, &psref);
                curlwp_bindx(bound);
                return;
        }

        /* fill src/dst here */
        ip6 = mtod(mh, struct ip6_hdr *);
        ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
        ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
        ia6_release(ia, &psref);
        curlwp_bindx(bound);

        mldh->mld_addr = in6m->in6m_addr;
        in6_clearscope(&mldh->mld_addr); /* XXX */
        mldh->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr),
            sizeof(struct mld_hdr));

        /* construct multicast option */
        memset(&im6o, 0, sizeof(im6o));
        im6o.im6o_multicast_if_index = if_get_index(ifp);
        im6o.im6o_multicast_hlim = 1;

        /*
         * Request loopback of the report if we are acting as a multicast
         * router, so that the process-level routing daemon can hear it.
         */
        im6o.im6o_multicast_loop = (ip6_mrouter != NULL);

        /* increment output statistics */
        ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
        icmp6_ifstat_inc(ifp, ifs6_out_msg);
        switch (type) {
        case MLD_LISTENER_QUERY:
                icmp6_ifstat_inc(ifp, ifs6_out_mldquery);
                break;
        case MLD_LISTENER_REPORT:
                icmp6_ifstat_inc(ifp, ifs6_out_mldreport);
                break;
        case MLD_LISTENER_DONE:
                icmp6_ifstat_inc(ifp, ifs6_out_mlddone);
                break;
        }

        /* XXX we cannot call ip6_output with holding in6_multilock */
        rw_exit(&in6_multilock);

        ip6_output(mh, &ip6_opts, NULL, ia ? 0 : IPV6_UNSPECSRC,
            &im6o, NULL, NULL);

        rw_enter(&in6_multilock, RW_WRITER);
}

static struct mld_hdr *
mld_allocbuf(struct mbuf **mh, struct in6_multi *in6m, int type)
{
        struct mbuf *md;
        struct mld_hdr *mldh;
        struct ip6_hdr *ip6;

        /*
         * Allocate mbufs to store ip6 header and MLD header.
         * We allocate 2 mbufs and make chain in advance because
         * it is more convenient when inserting the hop-by-hop option later.
         */
        MGETHDR(*mh, M_DONTWAIT, MT_HEADER);
        if (*mh == NULL)
                return NULL;
        MGET(md, M_DONTWAIT, MT_DATA);
        if (md == NULL) {
                m_free(*mh);
                *mh = NULL;
                return NULL;
        }
        (*mh)->m_next = md;
        md->m_next = NULL;

        m_reset_rcvif((*mh));
        (*mh)->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
        (*mh)->m_len = sizeof(struct ip6_hdr);
        m_align(*mh, sizeof(struct ip6_hdr));

        /* fill in the ip6 header */
        ip6 = mtod(*mh, struct ip6_hdr *);
        memset(ip6, 0, sizeof(*ip6));
        ip6->ip6_flow = 0;
        ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
        ip6->ip6_vfc |= IPV6_VERSION;
        /* ip6_plen will be set later */
        ip6->ip6_nxt = IPPROTO_ICMPV6;
        /* ip6_hlim will be set by im6o.im6o_multicast_hlim */
        /* ip6_src/dst will be set by mld_sendpkt() or mld_sendbuf() */

        /* fill in the MLD header as much as possible */
        md->m_len = sizeof(struct mld_hdr);
        mldh = mtod(md, struct mld_hdr *);
        memset(mldh, 0, sizeof(struct mld_hdr));
        mldh->mld_type = type;
        return mldh;
}

static void
in6m_ref(struct in6_multi *in6m)
{

        KASSERT(rw_write_held(&in6_multilock));
        in6m->in6m_refcount++;
}

static void
in6m_unref(struct in6_multi *in6m)
{

        KASSERT(rw_write_held(&in6_multilock));
        if (--in6m->in6m_refcount == 0)
                in6m_destroy(in6m);
}

/*
 * Add an address to the list of IP6 multicast addresses for a given interface.
 */
struct        in6_multi *
in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp, int *errorp,
    int timer)
{
        struct        sockaddr_in6 sin6;
        struct        in6_multi *in6m;

        *errorp = 0;

        rw_enter(&in6_multilock, RW_WRITER);
        /*
         * See if address already in list.
         */
        in6m = in6_lookup_multi(maddr6, ifp);
        if (in6m != NULL) {
                /*
                 * Found it; just increment the reference count.
                 */
                in6m->in6m_refcount++;
        } else {
                /*
                 * New address; allocate a new multicast record
                 * and link it into the interface's multicast list.
                 */
                in6m = malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT|M_ZERO);
                if (in6m == NULL) {
                        *errorp = ENOBUFS;
                        goto out;
                }

                in6m->in6m_addr = *maddr6;
                in6m->in6m_ifp = ifp;
                in6m->in6m_refcount = 1;
                in6m->in6m_timer = IN6M_TIMER_UNDEF;
                callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
                callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);

                LIST_INSERT_HEAD(&ifp->if_multiaddrs, in6m, in6m_entry);

                /*
                 * Ask the network driver to update its multicast reception
                 * filter appropriately for the new address.
                 */
                sockaddr_in6_init(&sin6, maddr6, 0, 0, 0);
                *errorp = if_mcast_op(ifp, SIOCADDMULTI, sin6tosa(&sin6));
                if (*errorp) {
                        callout_destroy(&in6m->in6m_timer_ch);
                        LIST_REMOVE(in6m, in6m_entry);
                        free(in6m, M_IPMADDR);
                        in6m = NULL;
                        goto out;
                }

                in6m->in6m_timer = timer;
                if (in6m->in6m_timer > 0) {
                        in6m->in6m_state = MLD_REPORTPENDING;
                        mld_starttimer(in6m);
                        goto out;
                }

                /*
                 * Let MLD6 know that we have joined a new IP6 multicast
                 * group.
                 */
                mld_start_listening(in6m);
        }
out:
        rw_exit(&in6_multilock);
        return in6m;
}

static void
in6m_destroy(struct in6_multi *in6m)
{
        struct sockaddr_in6 sin6;

        KASSERT(rw_write_held(&in6_multilock));
        KASSERTMSG(in6m->in6m_refcount == 0, "in6m_refcount=%d",
            in6m->in6m_refcount);

        /*
         * Unlink from list if it's listed.  This must be done before
         * mld_stop_listening because it releases in6_multilock and that allows
         * someone to look up the removing in6m from the list and add a
         * reference to the entry unexpectedly.
         */
        if (in6_lookup_multi(&in6m->in6m_addr, in6m->in6m_ifp) != NULL)
                LIST_REMOVE(in6m, in6m_entry);

        /*
         * No remaining claims to this record; let MLD6 know
         * that we are leaving the multicast group.
         */
        mld_stop_listening(in6m);

        /*
         * Delete all references of this multicasting group from
         * the membership arrays
         */
        in6_purge_mcast_references(in6m);

        /*
         * Notify the network driver to update its multicast
         * reception filter.
         */
        sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
        if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));

        /* Tell mld_timeo we're halting the timer */
        in6m->in6m_timer = IN6M_TIMER_UNDEF;

        rw_exit(&in6_multilock);
        callout_halt(&in6m->in6m_timer_ch, NULL);
        callout_destroy(&in6m->in6m_timer_ch);

        free(in6m, M_IPMADDR);
        rw_enter(&in6_multilock, RW_WRITER);
}

/*
 * Delete a multicast address record.
 */
void
in6_delmulti_locked(struct in6_multi *in6m)
{

        KASSERT(rw_write_held(&in6_multilock));
        KASSERTMSG(in6m->in6m_refcount > 0, "in6m_refcount=%d",
            in6m->in6m_refcount);

        /*
         * The caller should have a reference to in6m. So we don't need to care
         * of releasing the lock in mld_stoptimer.
         */
        mld_stoptimer(in6m);
        if (--in6m->in6m_refcount == 0)
                in6m_destroy(in6m);
}

void
in6_delmulti(struct in6_multi *in6m)
{

        rw_enter(&in6_multilock, RW_WRITER);
        in6_delmulti_locked(in6m);
        rw_exit(&in6_multilock);
}

/*
 * Look up the in6_multi record for a given IP6 multicast address
 * on a given interface. If no matching record is found, "in6m"
 * returns NULL.
 */
struct in6_multi *
in6_lookup_multi(const struct in6_addr *addr, const struct ifnet *ifp)
{
        struct in6_multi *in6m;

        KASSERT(rw_lock_held(&in6_multilock));

        LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
                if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, addr))
                        break;
        }
        return in6m;
}

void
in6_lookup_and_delete_multi(const struct in6_addr *addr,
    const struct ifnet *ifp)
{
        struct in6_multi *in6m;

        rw_enter(&in6_multilock, RW_WRITER);
        in6m = in6_lookup_multi(addr, ifp);
        if (in6m != NULL)
                in6_delmulti_locked(in6m);
        rw_exit(&in6_multilock);
}

bool
in6_multi_group(const struct in6_addr *addr, const struct ifnet *ifp)
{
        bool ingroup;

        rw_enter(&in6_multilock, RW_READER);
        ingroup = in6_lookup_multi(addr, ifp) != NULL;
        rw_exit(&in6_multilock);

        return ingroup;
}

/*
 * Purge in6_multi records associated to the interface.
 */
void
in6_purge_multi(struct ifnet *ifp)
{
        struct in6_multi *in6m, *next;

        rw_enter(&in6_multilock, RW_WRITER);
        LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
                LIST_REMOVE(in6m, in6m_entry);
                /*
                 * Normally multicast addresses are already purged at this
                 * point. Remaining references aren't accessible via ifp,
                 * so what we can do here is to prevent ifp from being
                 * accessed via in6m by removing it from the list of ifp.
                 */
                mld_stoptimer(in6m);
        }
        rw_exit(&in6_multilock);
}

void
in6_multi_lock(int op)
{

        rw_enter(&in6_multilock, op);
}

void
in6_multi_unlock(void)
{

        rw_exit(&in6_multilock);
}

bool
in6_multi_locked(int op)
{

        switch (op) {
        case RW_READER:
                return rw_read_held(&in6_multilock);
        case RW_WRITER:
                return rw_write_held(&in6_multilock);
        default:
                return rw_lock_held(&in6_multilock);
        }
}

struct in6_multi_mship *
in6_joingroup(struct ifnet *ifp, struct in6_addr *addr, int *errorp, int timer)
{
        struct in6_multi_mship *imm;

        imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT|M_ZERO);
        if (imm == NULL) {
                *errorp = ENOBUFS;
                return NULL;
        }

        imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, timer);
        if (!imm->i6mm_maddr) {
                /* *errorp is already set */
                free(imm, M_IPMADDR);
                return NULL;
        }
        return imm;
}

int
in6_leavegroup(struct in6_multi_mship *imm)
{
        struct in6_multi *in6m;

        rw_enter(&in6_multilock, RW_WRITER);
        in6m = imm->i6mm_maddr;
        imm->i6mm_maddr = NULL;
        if (in6m != NULL) {
                in6_delmulti_locked(in6m);
        }
        rw_exit(&in6_multilock);
        free(imm, M_IPMADDR);
        return 0;
}

/*
 * DEPRECATED: keep it just to avoid breaking old sysctl users.
 */
static int
in6_mkludge_sysctl(SYSCTLFN_ARGS)
{

        if (namelen != 1)
                return EINVAL;
        *oldlenp = 0;
        return 0;
}

static int
in6_multicast_sysctl(SYSCTLFN_ARGS)
{
        struct ifnet *ifp;
        struct ifaddr *ifa;
        struct in6_ifaddr *ia6;
        struct in6_multi *in6m;
        uint32_t tmp;
        int error;
        size_t written;
        struct psref psref, psref_ia;
        int bound, s;

        if (namelen != 1)
                return EINVAL;

        rw_enter(&in6_multilock, RW_READER);

        bound = curlwp_bind();
        ifp = if_get_byindex(name[0], &psref);
        if (ifp == NULL) {
                curlwp_bindx(bound);
                rw_exit(&in6_multilock);
                return ENODEV;
        }

        if (oldp == NULL) {
                *oldlenp = 0;
                s = pserialize_read_enter();
                IFADDR_READER_FOREACH(ifa, ifp) {
                        LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
                                *oldlenp += 2 * sizeof(struct in6_addr) +
                                    sizeof(uint32_t);
                        }
                }
                pserialize_read_exit(s);
                if_put(ifp, &psref);
                curlwp_bindx(bound);
                rw_exit(&in6_multilock);
                return 0;
        }

        error = 0;
        written = 0;
        s = pserialize_read_enter();
        IFADDR_READER_FOREACH(ifa, ifp) {
                if (ifa->ifa_addr->sa_family != AF_INET6)
                        continue;

                ifa_acquire(ifa, &psref_ia);
                pserialize_read_exit(s);

                ia6 = ifatoia6(ifa);
                LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
                        if (written + 2 * sizeof(struct in6_addr) +
                            sizeof(uint32_t) > *oldlenp)
                                goto done;
                        /*
                         * XXX return the first IPv6 address to keep backward
                         * compatibility, however now multicast addresses
                         * don't belong to any IPv6 addresses so it should be
                         * unnecessary.
                         */
                        error = sysctl_copyout(l, &ia6->ia_addr.sin6_addr,
                            oldp, sizeof(struct in6_addr));
                        if (error)
                                goto done;
                        oldp = (char *)oldp + sizeof(struct in6_addr);
                        written += sizeof(struct in6_addr);
                        error = sysctl_copyout(l, &in6m->in6m_addr,
                            oldp, sizeof(struct in6_addr));
                        if (error)
                                goto done;
                        oldp = (char *)oldp + sizeof(struct in6_addr);
                        written += sizeof(struct in6_addr);
                        tmp = in6m->in6m_refcount;
                        error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
                        if (error)
                                goto done;
                        oldp = (char *)oldp + sizeof(tmp);
                        written += sizeof(tmp);
                }

                s = pserialize_read_enter();

                break;
        }
        pserialize_read_exit(s);
done:
        ifa_release(ifa, &psref_ia);
        if_put(ifp, &psref);
        curlwp_bindx(bound);
        rw_exit(&in6_multilock);
        *oldlenp = written;
        return error;
}

void
in6_sysctl_multicast_setup(struct sysctllog **clog)
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "inet6", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET6, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "multicast",
                       SYSCTL_DESCR("Multicast information"),
                       in6_multicast_sysctl, 0, NULL, 0,
                       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "multicast_kludge",
                       SYSCTL_DESCR("multicast kludge information"),
                       in6_mkludge_sysctl, 0, NULL, 0,
                       CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
}

































































































































































































































































































































































































































































    3 
    3 
































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
/*        $NetBSD: if_cue.c,v 1.108 2022/08/20 14:09:10 riastradh Exp $        */

/*
 * Copyright (c) 1997, 1998, 1999, 2000
 *        Bill Paul <wpaul@ee.columbia.edu>.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by Bill Paul.
 * 4. Neither the name of the author nor the names of any co-contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 *
 * $FreeBSD: src/sys/dev/usb/if_cue.c,v 1.4 2000/01/16 22:45:06 wpaul Exp $
 */

/*
 * CATC USB-EL1210A USB to ethernet driver. Used in the CATC Netmate
 * adapters and others.
 *
 * Written by Bill Paul <wpaul@ee.columbia.edu>
 * Electrical Engineering Department
 * Columbia University, New York City
 */

/*
 * The CATC USB-EL1210A provides USB ethernet support at 10Mbps. The
 * RX filter uses a 512-bit multicast hash table, single perfect entry
 * for the station address, and promiscuous mode. Unlike the ADMtek
 * and KLSI chips, the CATC ASIC supports read and write combining
 * mode where multiple packets can be transferred using a single bulk
 * transaction, which helps performance a great deal.
 */

/*
 * Ported to NetBSD and somewhat rewritten by Lennart Augustsson.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_cue.c,v 1.108 2022/08/20 14:09:10 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_usb.h"
#endif

#include <sys/param.h>

#include <dev/usb/usbnet.h>
#include <dev/usb/if_cuereg.h>

#ifdef INET
#include <netinet/in.h>
#include <netinet/if_inarp.h>
#endif

#ifdef CUE_DEBUG
#define DPRINTF(x)        if (cuedebug) printf x
#define DPRINTFN(n, x)        if (cuedebug >= (n)) printf x
int        cuedebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n, x)
#endif

#define CUE_BUFSZ                1536
#define CUE_MIN_FRAMELEN        60
#define CUE_RX_FRAMES                1
#define CUE_TX_FRAMES                1

#define CUE_CONFIG_NO                1
#define CUE_IFACE_IDX                0

#define CUE_RX_LIST_CNT                1
#define CUE_TX_LIST_CNT                1

struct cue_type {
        uint16_t                cue_vid;
        uint16_t                cue_did;
};

struct cue_softc;

struct cue_chain {
        struct cue_softc        *cue_sc;
        struct usbd_xfer        *cue_xfer;
        char                        *cue_buf;
        struct mbuf                *cue_mbuf;
        int                        cue_idx;
};

struct cue_cdata {
        struct cue_chain        cue_tx_chain[CUE_TX_LIST_CNT];
        struct cue_chain        cue_rx_chain[CUE_RX_LIST_CNT];
        int                        cue_tx_prod;
        int                        cue_tx_cnt;
};

struct cue_softc {
        struct usbnet                cue_un;
        uint8_t                        cue_mctab[CUE_MCAST_TABLE_LEN];
};

/*
 * Various supported device vendors/products.
 */
static const struct usb_devno cue_devs[] = {
        { USB_VENDOR_CATC, USB_PRODUCT_CATC_NETMATE },
        { USB_VENDOR_CATC, USB_PRODUCT_CATC_NETMATE2 },
        { USB_VENDOR_SMARTBRIDGES, USB_PRODUCT_SMARTBRIDGES_SMARTLINK },
        /* Belkin F5U111 adapter covered by NETMATE entry */
};
#define cue_lookup(v, p) (usb_lookup(cue_devs, v, p))

static int cue_match(device_t, cfdata_t, void *);
static void cue_attach(device_t, device_t, void *);

CFATTACH_DECL_NEW(cue, sizeof(struct cue_softc), cue_match, cue_attach,
    usbnet_detach, usbnet_activate);

static unsigned cue_uno_tx_prepare(struct usbnet *, struct mbuf *,
                                   struct usbnet_chain *);
static void cue_uno_rx_loop(struct usbnet *, struct usbnet_chain *, uint32_t);
static void cue_uno_mcast(struct ifnet *);
static void cue_uno_stop(struct ifnet *, int);
static int cue_uno_init(struct ifnet *);
static void cue_uno_tick(struct usbnet *);

static const struct usbnet_ops cue_ops = {
        .uno_stop = cue_uno_stop,
        .uno_mcast = cue_uno_mcast,
        .uno_tx_prepare = cue_uno_tx_prepare,
        .uno_rx_loop = cue_uno_rx_loop,
        .uno_init = cue_uno_init,
        .uno_tick = cue_uno_tick,
};

#ifdef CUE_DEBUG
static int
cue_csr_read_1(struct usbnet *un, int reg)
{
        usb_device_request_t        req;
        usbd_status                err;
        uint8_t                        val = 0;

        if (usbnet_isdying(un))
                return 0;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = CUE_CMD_READREG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 1);

        err = usbd_do_request(un->un_udev, &req, &val);

        if (err) {
                DPRINTF(("%s: cue_csr_read_1: reg=%#x err=%s\n",
                    device_xname(un->un_dev), reg, usbd_errstr(err)));
                return 0;
        }

        DPRINTFN(10,("%s: cue_csr_read_1 reg=%#x val=%#x\n",
            device_xname(un->un_dev), reg, val));

        return val;
}
#endif

static int
cue_csr_read_2(struct usbnet *un, int reg)
{
        usb_device_request_t        req;
        usbd_status                err;
        uWord                        val;

        if (usbnet_isdying(un))
                return 0;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = CUE_CMD_READREG;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 2);

        err = usbd_do_request(un->un_udev, &req, &val);

        DPRINTFN(10,("%s: cue_csr_read_2 reg=%#x val=%#x\n",
            device_xname(un->un_dev), reg, UGETW(val)));

        if (err) {
                DPRINTF(("%s: cue_csr_read_2: reg=%#x err=%s\n",
                    device_xname(un->un_dev), reg, usbd_errstr(err)));
                return 0;
        }

        return UGETW(val);
}

static int
cue_csr_write_1(struct usbnet *un, int reg, int val)
{
        usb_device_request_t        req;
        usbd_status                err;

        if (usbnet_isdying(un))
                return 0;

        DPRINTFN(10,("%s: cue_csr_write_1 reg=%#x val=%#x\n",
            device_xname(un->un_dev), reg, val));

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = CUE_CMD_WRITEREG;
        USETW(req.wValue, val);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 0);

        err = usbd_do_request(un->un_udev, &req, NULL);

        if (err) {
                DPRINTF(("%s: cue_csr_write_1: reg=%#x err=%s\n",
                    device_xname(un->un_dev), reg, usbd_errstr(err)));
                return -1;
        }

        DPRINTFN(20,("%s: cue_csr_write_1, after reg=%#x val=%#x\n",
            device_xname(un->un_dev), reg, cue_csr_read_1(un, reg)));

        return 0;
}

#if 0
static int
cue_csr_write_2(struct usbnet *un, int reg, int aval)
{
        usb_device_request_t        req;
        usbd_status                err;
        uWord                        val;
        int                        s;

        if (usbnet_isdying(un))
                return 0;

        DPRINTFN(10,("%s: cue_csr_write_2 reg=%#x val=%#x\n",
            device_xname(un->un_dev), reg, aval));

        USETW(val, aval);
        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = CUE_CMD_WRITEREG;
        USETW(req.wValue, val);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 0);

        err = usbd_do_request(un->un_udev, &req, NULL);

        if (err) {
                DPRINTF(("%s: cue_csr_write_2: reg=%#x err=%s\n",
                    device_xname(un->un_dev), reg, usbd_errstr(err)));
                return -1;
        }

        return 0;
}
#endif

static int
cue_mem(struct usbnet *un, int cmd, int addr, void *buf, int len)
{
        usb_device_request_t        req;
        usbd_status                err;

        DPRINTFN(10,("%s: cue_mem cmd=%#x addr=%#x len=%d\n",
            device_xname(un->un_dev), cmd, addr, len));

        if (cmd == CUE_CMD_READSRAM)
                req.bmRequestType = UT_READ_VENDOR_DEVICE;
        else
                req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = cmd;
        USETW(req.wValue, 0);
        USETW(req.wIndex, addr);
        USETW(req.wLength, len);

        err = usbd_do_request(un->un_udev, &req, buf);

        if (err) {
                DPRINTF(("%s: cue_csr_mem: addr=%#x err=%s\n",
                    device_xname(un->un_dev), addr, usbd_errstr(err)));
                return -1;
        }

        return 0;
}

static int
cue_getmac(struct usbnet *un)
{
        usb_device_request_t        req;
        usbd_status                err;

        DPRINTFN(10,("%s: cue_getmac\n", device_xname(un->un_dev)));

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = CUE_CMD_GET_MACADDR;
        USETW(req.wValue, 0);
        USETW(req.wIndex, 0);
        USETW(req.wLength, ETHER_ADDR_LEN);

        err = usbd_do_request(un->un_udev, &req, un->un_eaddr);

        if (err) {
                printf("%s: read MAC address failed\n",
                    device_xname(un->un_dev));
                return -1;
        }

        return 0;
}

#define CUE_POLY        0xEDB88320
#define CUE_BITS        9

static uint32_t
cue_crc(const char *addr)
{
        uint32_t                idx, bit, data, crc;

        /* Compute CRC for the address value. */
        crc = 0xFFFFFFFF; /* initial value */

        for (idx = 0; idx < 6; idx++) {
                for (data = *addr++, bit = 0; bit < 8; bit++, data >>= 1)
                        crc = (crc >> 1) ^ (((crc ^ data) & 1) ? CUE_POLY : 0);
        }

        return crc & ((1 << CUE_BITS) - 1);
}

static void
cue_uno_mcast(struct ifnet *ifp)
{
        struct usbnet                *un = ifp->if_softc;
        struct cue_softc        *sc = usbnet_softc(un);
        struct ethercom                *ec = usbnet_ec(un);
        struct ether_multi        *enm;
        struct ether_multistep        step;
        uint32_t                h, i;

        DPRINTFN(2,("%s: cue_setiff promisc=%d\n",
            device_xname(un->un_dev), usbnet_ispromisc(un)));

        if (usbnet_ispromisc(un)) {
                ETHER_LOCK(ec);
allmulti:
                ec->ec_flags |= ETHER_F_ALLMULTI;
                ETHER_UNLOCK(ec);
                for (i = 0; i < CUE_MCAST_TABLE_LEN; i++)
                        sc->cue_mctab[i] = 0xFF;
                cue_mem(un, CUE_CMD_WRITESRAM, CUE_MCAST_TABLE_ADDR,
                    &sc->cue_mctab, CUE_MCAST_TABLE_LEN);
                return;
        }

        /* first, zot all the existing hash bits */
        for (i = 0; i < CUE_MCAST_TABLE_LEN; i++)
                sc->cue_mctab[i] = 0;

        /* now program new ones */
        ETHER_LOCK(ec);
        ETHER_FIRST_MULTI(step, ec, enm);
        while (enm != NULL) {
                if (memcmp(enm->enm_addrlo,
                    enm->enm_addrhi, ETHER_ADDR_LEN) != 0) {
                        goto allmulti;
                }

                h = cue_crc(enm->enm_addrlo);
                sc->cue_mctab[h >> 3] |= 1 << (h & 0x7);
                ETHER_NEXT_MULTI(step, enm);
        }
        ec->ec_flags &= ~ETHER_F_ALLMULTI;
        ETHER_UNLOCK(ec);

        /*
         * Also include the broadcast address in the filter
         * so we can receive broadcast frames.
         */
        h = cue_crc(etherbroadcastaddr);
        sc->cue_mctab[h >> 3] |= 1 << (h & 0x7);

        cue_mem(un, CUE_CMD_WRITESRAM, CUE_MCAST_TABLE_ADDR,
            &sc->cue_mctab, CUE_MCAST_TABLE_LEN);
}

static void
cue_reset(struct usbnet *un)
{
        usb_device_request_t        req;
        usbd_status                err;

        DPRINTFN(2,("%s: cue_reset\n", device_xname(un->un_dev)));

        if (usbnet_isdying(un))
                return;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = CUE_CMD_RESET;
        USETW(req.wValue, 0);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);

        err = usbd_do_request(un->un_udev, &req, NULL);

        if (err)
                printf("%s: reset failed\n", device_xname(un->un_dev));

        /* Wait a little while for the chip to get its brains in order. */
        usbd_delay_ms(un->un_udev, 1);
}

/*
 * Probe for a CATC chip.
 */
static int
cue_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return cue_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

/*
 * Attach the interface. Allocate softc structures, do ifmedia
 * setup and ethernet/BPF attach.
 */
static void
cue_attach(device_t parent, device_t self, void *aux)
{
        struct cue_softc *sc = device_private(self);
        struct usbnet * const un = &sc->cue_un;
        struct usb_attach_arg *uaa = aux;
        char                        *devinfop;
        struct usbd_device *        dev = uaa->uaa_device;
        usbd_status                err;
        usb_interface_descriptor_t        *id;
        usb_endpoint_descriptor_t        *ed;
        int                        i;

        KASSERT((void *)sc == un);

        DPRINTFN(5,(" : cue_attach: sc=%p, dev=%p", sc, dev));

        aprint_naive("\n");
        aprint_normal("\n");
        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        err = usbd_set_config_no(dev, CUE_CONFIG_NO, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(err));
                return;
        }

        un->un_dev = self;
        un->un_udev = dev;
        un->un_sc = sc;
        un->un_ops = &cue_ops;
        un->un_rx_xfer_flags = USBD_SHORT_XFER_OK;
        un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER;
        un->un_rx_list_cnt = CUE_RX_LIST_CNT;
        un->un_tx_list_cnt = CUE_TX_LIST_CNT;
        un->un_rx_bufsz = CUE_BUFSZ;
        un->un_tx_bufsz = CUE_BUFSZ;

        err = usbd_device2interface_handle(dev, CUE_IFACE_IDX, &un->un_iface);
        if (err) {
                aprint_error_dev(self, "getting interface handle failed\n");
                return;
        }

        id = usbd_get_interface_descriptor(un->un_iface);

        /* Find endpoints. */
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(un->un_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self, "couldn't get ep %d\n", i);
                        return;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        un->un_ed[USBNET_ENDPT_INTR] = ed->bEndpointAddress;
                }
        }

        /* First level attach. */
        usbnet_attach(un);

#if 0
        /* Reset the adapter. */
        cue_reset(un);
#endif
        /*
         * Get station address.
         */
        cue_getmac(un);

        usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST,
            0, NULL);
}

static void
cue_uno_tick(struct usbnet *un)
{
        struct ifnet                *ifp = usbnet_ifp(un);

        net_stat_ref_t nsr = IF_STAT_GETREF(ifp);
        if (cue_csr_read_2(un, CUE_RX_FRAMEERR))
                if_statinc_ref(nsr, if_ierrors);

        if_statadd_ref(nsr, if_collisions,
            cue_csr_read_2(un, CUE_TX_SINGLECOLL));
        if_statadd_ref(nsr, if_collisions,
            cue_csr_read_2(un, CUE_TX_MULTICOLL));
        if_statadd_ref(nsr, if_collisions,
            cue_csr_read_2(un, CUE_TX_EXCESSCOLL));
        IF_STAT_PUTREF(ifp);
}

static void
cue_uno_rx_loop(struct usbnet *un, struct usbnet_chain *c, uint32_t total_len)
{
        struct ifnet                *ifp = usbnet_ifp(un);
        uint8_t                        *buf = c->unc_buf;
        uint16_t                len;

        DPRINTFN(5,("%s: %s: total_len=%d len=%d\n",
                     device_xname(un->un_dev), __func__,
                     total_len, le16dec(buf)));

        len = UGETW(buf);
        if (total_len < 2 ||
            len > total_len - 2 ||
            len < sizeof(struct ether_header)) {
                if_statinc(ifp, if_ierrors);
                return;
        }

        /* No errors; receive the packet. */
        usbnet_enqueue(un, buf + 2, len, 0, 0, 0);
}

static unsigned
cue_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c)
{
        unsigned                total_len;

        DPRINTFN(5,("%s: %s: mbuf len=%d\n",
                     device_xname(un->un_dev), __func__,
                     m->m_pkthdr.len));

        if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz - 2)
                return 0;

        /*
         * Copy the mbuf data into a contiguous buffer, leaving two
         * bytes at the beginning to hold the frame length.
         */
        m_copydata(m, 0, m->m_pkthdr.len, c->unc_buf + 2);

        total_len = m->m_pkthdr.len + 2;

        /* The first two bytes are the frame length */
        c->unc_buf[0] = (uint8_t)m->m_pkthdr.len;
        c->unc_buf[1] = (uint8_t)(m->m_pkthdr.len >> 8);

        return total_len;
}

static int
cue_uno_init(struct ifnet *ifp)
{
        struct usbnet * const        un = ifp->if_softc;
        int                        i, ctl;
        const u_char                *eaddr;

        DPRINTFN(10,("%s: %s: enter\n", device_xname(un->un_dev),__func__));

        /* Cancel pending I/O */
        cue_uno_stop(ifp, 1);

        /* Reset the interface. */
#if 1
        cue_reset(un);
#endif

        /* Set advanced operation modes. */
        cue_csr_write_1(un, CUE_ADVANCED_OPMODES,
            CUE_AOP_EMBED_RXLEN | 0x03); /* 1 wait state */

        eaddr = CLLADDR(ifp->if_sadl);
        /* Set MAC address */
        for (i = 0; i < ETHER_ADDR_LEN; i++)
                cue_csr_write_1(un, CUE_PAR0 - i, eaddr[i]);

        /* Enable RX logic. */
        ctl = CUE_ETHCTL_RX_ON | CUE_ETHCTL_MCAST_ON;
        if (usbnet_ispromisc(un))
                ctl |= CUE_ETHCTL_PROMISC;
        cue_csr_write_1(un, CUE_ETHCTL, ctl);

        /*
         * Set the number of RX and TX buffers that we want
         * to reserve inside the ASIC.
         */
        cue_csr_write_1(un, CUE_RX_BUFPKTS, CUE_RX_FRAMES);
        cue_csr_write_1(un, CUE_TX_BUFPKTS, CUE_TX_FRAMES);

        /* Set advanced operation modes. */
        cue_csr_write_1(un, CUE_ADVANCED_OPMODES,
            CUE_AOP_EMBED_RXLEN | 0x01); /* 1 wait state */

        /* Program the LED operation. */
        cue_csr_write_1(un, CUE_LEDCTL, CUE_LEDCTL_FOLLOW_LINK);

        return 0;
}

/* Stop and reset the adapter.  */
static void
cue_uno_stop(struct ifnet *ifp, int disable)
{
        struct usbnet * const        un = ifp->if_softc;

        DPRINTFN(10,("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        cue_csr_write_1(un, CUE_ETHCTL, 0);
        cue_reset(un);
}

#ifdef _MODULE
#include "ioconf.c"
#endif

USBNET_MODULE(cue)

























































































































    3 


























    3 









































































































































    3 

    3 
    3 








































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
/*        $NetBSD: kern_core.c,v 1.35 2021/06/29 22:40:53 dholland Exp $        */

/*
 * Copyright (c) 1982, 1986, 1989, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_sig.c        8.14 (Berkeley) 5/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_core.c,v 1.35 2021/06/29 22:40:53 dholland Exp $");

#ifdef _KERNEL_OPT
#include "opt_execfmt.h"
#include "opt_compat_netbsd32.h"
#endif

#include <sys/param.h>
#include <sys/vnode.h>
#include <sys/namei.h>
#include <sys/acct.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/proc.h>
#include <sys/exec.h>
#include <sys/filedesc.h>
#include <sys/kauth.h>
#include <sys/module.h>
#include <sys/compat_stub.h>
#include <sys/exec_elf.h>

MODULE(MODULE_CLASS_MISC, coredump, NULL);

struct coredump_iostate {
        struct lwp *io_lwp;
        struct vnode *io_vp;
        kauth_cred_t io_cred;
        off_t io_offset;
};

static int        coredump(struct lwp *, const char *);
static int        coredump_buildname(struct proc *, char *, const char *, size_t);
static int        coredump_write(struct coredump_iostate *, enum uio_seg segflg,
                    const void *, size_t);
static off_t        coredump_offset(struct coredump_iostate *);

static int
coredump_modcmd(modcmd_t cmd, void *arg)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                MODULE_HOOK_SET(coredump_hook, coredump);
                MODULE_HOOK_SET(coredump_write_hook, coredump_write);
                MODULE_HOOK_SET(coredump_offset_hook, coredump_offset);
                MODULE_HOOK_SET(coredump_netbsd_hook, real_coredump_netbsd);
#if defined(EXEC_ELF64)
                MODULE_HOOK_SET(coredump_elf64_hook, real_coredump_elf64);
#elif defined(EXEC_ELF32)
                MODULE_HOOK_SET(coredump_elf32_hook, real_coredump_elf32);
#endif
                MODULE_HOOK_SET(uvm_coredump_walkmap_hook,
                    uvm_coredump_walkmap);
                MODULE_HOOK_SET(uvm_coredump_count_segs_hook,
                    uvm_coredump_count_segs);
                return 0;
        case MODULE_CMD_FINI:
                MODULE_HOOK_UNSET(uvm_coredump_count_segs_hook);
                MODULE_HOOK_UNSET(uvm_coredump_walkmap_hook);
#if defined(EXEC_ELF64)
                MODULE_HOOK_UNSET(coredump_elf64_hook);
#elif defined(EXEC_ELF32)
                MODULE_HOOK_UNSET(coredump_elf32_hook);
#endif
                MODULE_HOOK_UNSET(coredump_netbsd_hook);
                MODULE_HOOK_UNSET(coredump_offset_hook);
                MODULE_HOOK_UNSET(coredump_write_hook);
                MODULE_HOOK_UNSET(coredump_hook);
                return 0;
        default:
                return ENOTTY;
        }
}

/*
 * Dump core, into a file named "progname.core" or "core" (depending on the
 * value of shortcorename), unless the process was setuid/setgid.
 */
static int
coredump(struct lwp *l, const char *pattern)
{
        struct vnode                *vp;
        struct proc                *p;
        struct vmspace                *vm;
        kauth_cred_t                cred;
        struct pathbuf                *pb;
        struct vattr                vattr;
        struct coredump_iostate        io;
        struct plimit                *lim;
        int                        error, error1;
        char                        *name, *lastslash;

        name = PNBUF_GET();

        p = l->l_proc;
        vm = p->p_vmspace;

        mutex_enter(&proc_lock);                /* p_session */
        mutex_enter(p->p_lock);

        /*
         * Refuse to core if the data + stack + user size is larger than
         * the core dump limit.  XXX THIS IS WRONG, because of mapped
         * data.
         */
        if (USPACE + ctob(vm->vm_dsize + vm->vm_ssize) >=
            p->p_rlimit[RLIMIT_CORE].rlim_cur) {
                error = EFBIG;                /* better error code? */
                mutex_exit(p->p_lock);
                mutex_exit(&proc_lock);
                goto done;
        }

        /*
         * It may well not be curproc, so grab a reference to its current
         * credentials.
         */
        kauth_cred_hold(p->p_cred);
        cred = p->p_cred;

        /*
         * Make sure the process has not set-id, to prevent data leaks,
         * unless it was specifically requested to allow set-id coredumps.
         */
        if (p->p_flag & PK_SUGID) {
                if (!security_setidcore_dump) {
                        error = EPERM;
                        mutex_exit(p->p_lock);
                        mutex_exit(&proc_lock);
                        goto done;
                }
                pattern = security_setidcore_path;
        }

        /* Lock, as p_limit and pl_corename might change. */
        lim = p->p_limit;
        mutex_enter(&lim->pl_lock);
        if (pattern == NULL) {
                pattern = lim->pl_corename;
        }
        error = coredump_buildname(p, name, pattern, MAXPATHLEN);
        mutex_exit(&lim->pl_lock);

        if (error) {
                mutex_exit(p->p_lock);
                mutex_exit(&proc_lock);
                goto done;
        }

        /*
         * On a simple filename, see if the filesystem allow us to write
         * core dumps there.
         */
        lastslash = strrchr(name, '/');
        if (!lastslash) {
                vp = p->p_cwdi->cwdi_cdir;
                if (vp->v_mount == NULL ||
                    (vp->v_mount->mnt_flag & MNT_NOCOREDUMP) != 0)
                        error = EPERM;
        }

        mutex_exit(p->p_lock);
        mutex_exit(&proc_lock);
        if (error)
                goto done;

        /*
         * On a complex filename, see if the filesystem allow us to write
         * core dumps there.
         *
         * XXX: We should have an API that avoids double lookups
         */
        if (lastslash) {
                char c[2];

                if (lastslash - name >= MAXPATHLEN - 2) {
                        error = EPERM;
                        goto done;
                }

                c[0] = lastslash[1];
                c[1] = lastslash[2];
                lastslash[1] = '.';
                lastslash[2] = '\0';
                error = namei_simple_kernel(name, NSM_FOLLOW_NOEMULROOT, &vp);
                if (error)
                        goto done;
                if (vp->v_mount == NULL ||
                    (vp->v_mount->mnt_flag & MNT_NOCOREDUMP) != 0)
                        error = EPERM;
                vrele(vp);
                if (error)
                        goto done;
                lastslash[1] = c[0];
                lastslash[2] = c[1];
        }

        pb = pathbuf_create(name);
        if (pb == NULL) {
                error = ENOMEM;
                goto done;
        }
        error = vn_open(NULL, pb, 0, O_CREAT | O_NOFOLLOW | FWRITE,
            S_IRUSR | S_IWUSR, &vp, NULL, NULL);
        if (error != 0) {
                pathbuf_destroy(pb);
                goto done;
        }
        pathbuf_destroy(pb);

        /*
         * Don't dump to:
         *         - non-regular files
         *         - files with links
         *         - files we don't own
         */
        if (vp->v_type != VREG ||
            VOP_GETATTR(vp, &vattr, cred) || vattr.va_nlink != 1 ||
            vattr.va_uid != kauth_cred_geteuid(cred)) {
                error = EACCES;
                goto out;
        }
        vattr_null(&vattr);
        vattr.va_size = 0;

        if ((p->p_flag & PK_SUGID) && security_setidcore_dump) {
                vattr.va_uid = security_setidcore_owner;
                vattr.va_gid = security_setidcore_group;
                vattr.va_mode = security_setidcore_mode;
        }

        VOP_SETATTR(vp, &vattr, cred);
        p->p_acflag |= ACORE;

        io.io_lwp = l;
        io.io_vp = vp;
        io.io_cred = cred;
        io.io_offset = 0;

        /* Now dump the actual core file. */
        error = (*p->p_execsw->es_coredump)(l, &io);
 out:
        VOP_UNLOCK(vp);
        error1 = vn_close(vp, FWRITE, cred);
        if (error == 0)
                error = error1;
done:
        if (name != NULL)
                PNBUF_PUT(name);
        return error;
}

static int
coredump_buildname(struct proc *p, char *dst, const char *src, size_t len)
{
        const char        *s;
        char                *d, *end;
        int                i;

        KASSERT(mutex_owned(&proc_lock));

        for (s = src, d = dst, end = d + len; *s != '\0'; s++) {
                if (*s == '%') {
                        switch (*(s + 1)) {
                        case 'n':
                                i = snprintf(d, end - d, "%s", p->p_comm);
                                break;
                        case 'p':
                                i = snprintf(d, end - d, "%d", p->p_pid);
                                break;
                        case 'u':
                                i = snprintf(d, end - d, "%.*s",
                                    (int)sizeof p->p_pgrp->pg_session->s_login,
                                    p->p_pgrp->pg_session->s_login);
                                break;
                        case 't':
                                i = snprintf(d, end - d, "%lld",
                                    (long long)p->p_stats->p_start.tv_sec);
                                break;
                        default:
                                goto copy;
                        }
                        d += i;
                        s++;
                } else {
 copy:                        *d = *s;
                        d++;
                }
                if (d >= end)
                        return (ENAMETOOLONG);
        }
        *d = '\0';
        return 0;
}

static int
coredump_write(struct coredump_iostate *io, enum uio_seg segflg,
    const void *data, size_t len)
{
        int error;

        error = vn_rdwr(UIO_WRITE, io->io_vp, __UNCONST(data), len,
            io->io_offset, segflg,
            IO_NODELOCKED|IO_UNIT, io->io_cred, NULL,
            segflg == UIO_USERSPACE ? io->io_lwp : NULL);
        if (error) {
                printf("pid %d (%s): %s write of %zu@%p at %lld failed: %d\n",
                    io->io_lwp->l_proc->p_pid, io->io_lwp->l_proc->p_comm,
                    segflg == UIO_USERSPACE ? "user" : "system",
                    len, data, (long long) io->io_offset, error);
                return (error);
        }

        io->io_offset += len;
        return (0);
}

static off_t
coredump_offset(struct coredump_iostate *io)
{
        return io->io_offset;
}

































































































    3 


    3 





    2 















































































































    2 












    2 


    2 
    2 



    2 





    2 






    1 



















































    1 


    1 
    1 





    1 






















    1 



    1 






































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
/*        $NetBSD: tty_ptm.c,v 1.43 2021/06/29 22:40:53 dholland Exp $        */

/*-
 * Copyright (c) 2004, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tty_ptm.c,v 1.43 2021/06/29 22:40:53 dholland Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
#include "opt_ptm.h"
#endif

/* pty multiplexor driver /dev/ptm{,x} */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/ioctl.h>
#include <sys/proc.h>
#include <sys/tty.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <sys/uio.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/namei.h>
#include <sys/signalvar.h>
#include <sys/filedesc.h>
#include <sys/conf.h>
#include <sys/poll.h>
#include <sys/pty.h>
#include <sys/kauth.h>
#include <sys/compat_stub.h>

#include <miscfs/specfs/specdev.h>

#include <compat/sys/ttycom.h>

#include "ioconf.h"

#ifdef DEBUG_PTM
#define DPRINTF(a)        printf a
#else
#define DPRINTF(a)
#endif

#ifdef NO_DEV_PTM
const struct cdevsw ptm_cdevsw = {
        .d_open = noopen,
        .d_close = noclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = noioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_TTY
};
#else

int pts_major, ptc_major;

static dev_t pty_getfree(void);
static int pty_alloc_master(struct lwp *, int *, dev_t *, struct mount *);
static int pty_alloc_slave(struct lwp *, int *, dev_t, struct mount *);
static int pty_vn_open(struct vnode *, struct lwp *);

int
pty_getmp(struct lwp *l, struct mount **mpp)
{
        if (ptm == NULL)
                return EOPNOTSUPP;

        return (*ptm->getmp)(l, mpp);
}

dev_t
pty_makedev(char ms, int minor)
{
        return makedev(ms == 't' ? pts_major : ptc_major, minor);
}


static dev_t
pty_getfree(void)
{
        extern kmutex_t pt_softc_mutex;
        int i;

        mutex_enter(&pt_softc_mutex);
        for (i = 0; i < npty; i++) {
                if (pty_isfree(i, 0))
                        break;
        }
        mutex_exit(&pt_softc_mutex);
        return pty_makedev('t', i);
}

/*
 * Hacked up version of vn_open. We _only_ handle ptys and only open
 * them with FREAD|FWRITE and never deal with creat or stuff like that.
 *
 * We need it because we have to fake up root credentials to open the pty.
 */
int
pty_vn_open(struct vnode *vp, struct lwp *l)
{
        int error;

        if (vp->v_type != VCHR) {
                vput(vp);
                return EINVAL;
        }

        error = VOP_OPEN(vp, FREAD|FWRITE, lwp0.l_cred);

        if (error) {
                /* only ptys mean we can't get these */
                KASSERT(error != EDUPFD && error != EMOVEFD);
                vput(vp);
                return error;
        }

        mutex_enter(vp->v_interlock);
        vp->v_writecount++;
        mutex_exit(vp->v_interlock);

        return 0;
}

static int
pty_alloc_master(struct lwp *l, int *fd, dev_t *dev, struct mount *mp)
{
        int error;
        struct file *fp;
        struct vnode *vp;
        int md;

        if ((error = fd_allocfile(&fp, fd)) != 0) {
                DPRINTF(("fd_allocfile %d\n", error));
                return error;
        }
retry:
        /* Find and open a free master pty. */
        *dev = pty_getfree();
        md = minor(*dev);
        if ((error = pty_check(md)) != 0) {
                DPRINTF(("pty_check %d\n", error));
                goto bad;
        }
        if (ptm == NULL) {
                DPRINTF(("no ptm\n"));
                error = EOPNOTSUPP;
                goto bad;
        }
        if ((error = (*ptm->allocvp)(mp, l, &vp, *dev, 'p')) != 0) {
                DPRINTF(("pty_allocvp %d\n", error));
                goto bad;
        }

        if ((error = pty_vn_open(vp, l)) != 0) {
                DPRINTF(("pty_vn_open %d\n", error));
                /*
                 * Check if the master open failed because we lost
                 * the race to grab it.
                 */
                if (error != EIO)
                        goto bad;
                error = !pty_isfree(md, 1);
                DPRINTF(("pty_isfree %d\n", error));
                if (error)
                        goto retry;
                else
                        goto bad;
        }
        fp->f_flag = FREAD|FWRITE;
        fp->f_type = DTYPE_VNODE;
        fp->f_ops = &vnops;
        fp->f_vnode = vp;
        VOP_UNLOCK(vp);
        fd_affix(curproc, fp, *fd);
        return 0;
bad:
        fd_abort(curproc, fp, *fd);
        return error;
}

int
pty_grant_slave(struct lwp *l, dev_t dev, struct mount *mp)
{
        int error;
        struct vnode *vp;

        /*
         * Open the slave.
         * namei -> setattr -> unlock -> revoke -> vrele ->
         * namei -> open -> unlock
         * Three stage rocket:
         * 1. Change the owner and permissions on the slave.
         * 2. Revoke all the users of the slave.
         * 3. open the slave.
         */
        if (ptm == NULL)
                return EOPNOTSUPP;
        if ((error = (*ptm->allocvp)(mp, l, &vp, dev, 't')) != 0)
                return error;

        if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
                struct vattr vattr;
                (*ptm->getvattr)(mp, l, &vattr);
                /* Do the VOP_SETATTR() as root. */
                error = VOP_SETATTR(vp, &vattr, lwp0.l_cred);
                if (error) {
                        DPRINTF(("setattr %d\n", error));
                        vput(vp);
                        return error;
                }
        }
        VOP_UNLOCK(vp);
        VOP_REVOKE(vp, REVOKEALL);

        /*
         * The vnode is useless after the revoke, we need to get it again.
         */
        vrele(vp);
        return 0;
}

static int
pty_alloc_slave(struct lwp *l, int *fd, dev_t dev, struct mount *mp)
{
        int error;
        struct file *fp;
        struct vnode *vp;

        /* Grab a filedescriptor for the slave */
        if ((error = fd_allocfile(&fp, fd)) != 0) {
                DPRINTF(("fd_allocfile %d\n", error));
                return error;
        }

        if (ptm == NULL) {
                error = EOPNOTSUPP;
                goto bad;
        }

        if ((error = (*ptm->allocvp)(mp, l, &vp, dev, 't')) != 0)
                goto bad;
        if ((error = pty_vn_open(vp, l)) != 0)
                goto bad;

        fp->f_flag = FREAD|FWRITE;
        fp->f_type = DTYPE_VNODE;
        fp->f_ops = &vnops;
        fp->f_vnode = vp;
        VOP_UNLOCK(vp);
        fd_affix(curproc, fp, *fd);
        return 0;
bad:
        fd_abort(curproc, fp, *fd);
        return error;
}

struct ptm_pty *
pty_sethandler(struct ptm_pty *nptm)
{
        struct ptm_pty *optm = ptm;
        ptm = nptm;
        return optm;
}

int
pty_fill_ptmget(struct lwp *l, dev_t dev, int cfd, int sfd, void *data, struct mount *mp)
{
        struct ptmget *ptmg = data;
        int error;

        if (ptm == NULL)
                return EOPNOTSUPP;

        ptmg->cfd = cfd == -1 ? minor(dev) : cfd;
        ptmg->sfd = sfd == -1 ? minor(dev) : sfd;

        error = (*ptm->makename)(mp, l, ptmg->cn, sizeof(ptmg->cn), dev, 'p');
        if (error)
                return error;

        return (*ptm->makename)(mp, l, ptmg->sn, sizeof(ptmg->sn), dev, 't');
}

void
/*ARGSUSED*/
ptmattach(int n)
{
        extern const struct cdevsw pts_cdevsw, ptc_cdevsw;
        /* find the major and minor of the pty devices */
        if ((pts_major = cdevsw_lookup_major(&pts_cdevsw)) == -1)
                panic("ptmattach: Can't find pty slave in cdevsw");
        if ((ptc_major = cdevsw_lookup_major(&ptc_cdevsw)) == -1)
                panic("ptmattach: Can't find pty master in cdevsw");
#ifdef COMPAT_BSDPTY
        ptm = &ptm_bsdpty;
#endif
}

static int
/*ARGSUSED*/
ptmopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        int error;
        int fd;
        dev_t ttydev;
        struct mount *mp;

        switch(minor(dev)) {
        case 0:                /* /dev/ptmx */
        case 2:                /* /emul/linux/dev/ptmx */
                if ((error = pty_getmp(l, &mp)) != 0)
                        return error;
                if ((error = pty_alloc_master(l, &fd, &ttydev, mp)) != 0)
                        return error;
                if (minor(dev) == 2) {
                        /*
                         * Linux ptyfs grants the pty right here.
                         * Handle this case here, instead of writing
                         * a new linux module.
                         */
                        if ((error = pty_grant_slave(l, ttydev, mp)) != 0) {
                                file_t *fp = fd_getfile(fd);
                                if (fp != NULL) {
                                        fd_close(fd);
                                }
                                return error;
                        }
                }
                curlwp->l_dupfd = fd;
                return EMOVEFD;
        case 1:                /* /dev/ptm */
                return 0;
        default:
                return ENODEV;
        }
}

static int
/*ARGSUSED*/
ptmclose(dev_t dev, int flag, int mode, struct lwp *l)
{

        return (0);
}

static int
/*ARGSUSED*/
ptmioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        int error;
        dev_t newdev;
        int cfd, sfd;
        file_t *fp;
        struct mount *mp;

        error = 0;
        switch (cmd) {
        case TIOCPTMGET:
                if ((error = pty_getmp(l, &mp)) != 0)
                        return error;

                if ((error = pty_alloc_master(l, &cfd, &newdev, mp)) != 0)
                        return error;

                if ((error = pty_grant_slave(l, newdev, mp)) != 0)
                        goto bad;

                if ((error = pty_alloc_slave(l, &sfd, newdev, mp)) != 0)
                        goto bad;

                /* now, put the indices and names into struct ptmget */
                if ((error = pty_fill_ptmget(l, newdev, cfd, sfd, data, mp)) != 0)
                        goto bad2;
                return 0;
        default:
                MODULE_HOOK_CALL(tty_ptmioctl_60_hook,
                    (dev, cmd, data, flag, l), EPASSTHROUGH, error);
                if (error != EPASSTHROUGH)
                        return error;
                DPRINTF(("ptmioctl EINVAL\n"));
                return EINVAL;
        }
bad2:
        fp = fd_getfile(sfd);
        if (fp != NULL) {
                fd_close(sfd);
        }
 bad:
        fp = fd_getfile(cfd);
        if (fp != NULL) {
                fd_close(cfd);
        }
        return error;
}

const struct cdevsw ptm_cdevsw = {
        .d_open = ptmopen,
        .d_close = ptmclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = ptmioctl,
        .d_stop = nullstop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_TTY
};
#endif



















































































































































    2 








































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
/*        $NetBSD: rf_debugMem.c,v 1.22 2019/02/09 03:34:00 christos Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Daniel Stodolsky, Mark Holland, Jim Zelenka
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/* debugMem.c:  memory usage debugging stuff.
 * Malloc, Calloc, and Free are #defined everywhere
 * to do_malloc, do_calloc, and do_free.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_debugMem.c,v 1.22 2019/02/09 03:34:00 christos Exp $");

#include <dev/raidframe/raidframevar.h>

#include "rf_threadstuff.h"
#include "rf_options.h"
#include "rf_debugMem.h"
#include "rf_general.h"
#include "rf_shutdown.h"

#if RF_DEBUG_MEM

static size_t tot_mem_in_use = 0;

/* Hash table of information about memory allocations */
#define RF_MH_TABLESIZE 1000

struct mh_struct {
        void   *address;
        size_t     size;
        const char *file;
        uint32_t     line;
        char    allocated;
        struct mh_struct *next;
};
static struct mh_struct *mh_table[RF_MH_TABLESIZE];
static rf_declare_mutex2(rf_debug_mem_mutex);
static int mh_table_initialized = 0;

static void memory_hash_insert(void *, size_t,  const char *, uint32_t);
static int memory_hash_remove(void *, size_t);

void
rf_record_malloc(void *p, size_t size, const char *file, uint32_t line)
{
        RF_ASSERT(size != 0);

        /* rf_lock_mutex2(rf_debug_mem_mutex); */
        memory_hash_insert(p, size, file, line);
        tot_mem_in_use += size;
        /* rf_unlock_mutex2(rf_debug_mem_mutex); */
        if ((intptr_t)p == rf_memDebugAddress) {
                printf("%s,%d: %s: Debug address allocated\n", file, line,
                    __func__);
        }
}

void
rf_unrecord_malloc(void *p, size_t sz)
{
        size_t     size;

        /* rf_lock_mutex2(rf_debug_mem_mutex); */
        size = memory_hash_remove(p, sz);
        tot_mem_in_use -= size;
        /* rf_unlock_mutex2(rf_debug_mem_mutex); */
        if ((intptr_t) p == rf_memDebugAddress) {
                /* this is really only a flag line for gdb */
                printf("%s: Found debug address\n", __func__);
        }
}

void
rf_print_unfreed(void)
{
        size_t i;
        int foundone = 0;
        struct mh_struct *p;

        for (i = 0; i < RF_MH_TABLESIZE; i++) {
                for (p = mh_table[i]; p; p = p->next) {
                        if (!p->allocated)
                                continue;
                        if (foundone) {
                                printf("\n\n:%s: There are unfreed memory"
                                    " locations at program shutdown:\n",
                                    __func__);
                        }
                        foundone = 1;
                        printf("%s: @%s,%d: addr %p size %zu\n", __func__,
                            p->file, p->line, p->address, p->size);
                }
        }
        if (tot_mem_in_use) {
                printf("%s: %zu total bytes in use\n",
                    __func__, tot_mem_in_use);
        }
}
#endif /* RF_DEBUG_MEM */

#if RF_DEBUG_MEM
static void
rf_ShutdownDebugMem(void *unused)
{
        rf_destroy_mutex2(rf_debug_mem_mutex);
}
#endif

int
rf_ConfigureDebugMem(RF_ShutdownList_t **listp)
{
#if RF_DEBUG_MEM
        size_t     i;

        rf_init_mutex2(rf_debug_mem_mutex, IPL_VM);
        if (rf_memDebug) {
                for (i = 0; i < RF_MH_TABLESIZE; i++)
                        mh_table[i] = NULL;
                mh_table_initialized = 1;
        }
        rf_ShutdownCreate(listp, rf_ShutdownDebugMem, NULL);
#endif
        return (0);
}

#if RF_DEBUG_MEM

#define HASHADDR(a) ((size_t)((((uintptr_t)a) >> 3) % RF_MH_TABLESIZE))

static void
memory_hash_insert(void *addr, size_t size, const char *file, uint32_t line)
{
        size_t bucket = (size_t)HASHADDR(addr);
        struct mh_struct *p;

        RF_ASSERT(mh_table_initialized);

        /* search for this address in the hash table */
        for (p = mh_table[bucket]; p && (p->address != addr); p = p->next)
                continue;
        if (!p) {
                p = RF_Malloc(sizeof(*p));
                RF_ASSERT(p);
                p->next = mh_table[bucket];
                mh_table[bucket] = p;
                p->address = addr;
                p->allocated = 0;
        }
        if (p->allocated) {
                printf("%s: @%s,%u: ERROR: Reallocated addr %p without free\n",
                    __func__, file, line, addr);
                printf("%s: last allocated @%s,%u\n",
                    __func__, p->file, p->line);
                RF_ASSERT(0);
        }
        p->size = size;
        p->line = line;
        p->file = file;
        p->allocated = 1;
}

static int
memory_hash_remove(void *addr, size_t sz)
{
        size_t bucket = HASHADDR(addr);
        struct mh_struct *p;

        RF_ASSERT(mh_table_initialized);
        for (p = mh_table[bucket]; p && (p->address != addr); p = p->next)
                continue;
        if (!p) {
                printf("%s: ERROR: Freeing never-allocated address %p\n",
                    __func__, addr);
                RF_PANIC();
        }
        if (!p->allocated) {
                printf("%s: ERROR: Freeing unallocated address %p."
                    " Last allocation @%s,%u\n",
                    __func__, addr, p->file, p->line);
                RF_PANIC();
        }
        if (sz > 0 && p->size != sz) {        /* you can suppress this error by
                                         * using a negative value as the size
                                         * to free */
                printf("%s: ERROR: Incorrect size (%zu should be %zu) at"
                    " free for address %p. Allocated @%s,%u\n", __func__,
                    sz, p->size, addr, p->file, p->line);
                RF_PANIC();
        }
        p->allocated = 0;
        return p->size;
}
#endif /* RF_DEBUG_MEM */










































































































































































































































































































































































































  356 

  356 










  241 
  242 

  232 








    8 

    8 

    8 




















  218 







   14 

   19 
   15 
   16 







   12 

   12 
    7 
    8 
   12 
    6 







   38 

   55 


   21 







   27 

   39 


   10 






   18 

   15 
    7 








    7 








 1154 

 1143 







  823 












































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
/*        $NetBSD: socketvar.h,v 1.165 2022/04/09 23:52:23 riastradh Exp $        */

/*-
 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1982, 1986, 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)socketvar.h        8.3 (Berkeley) 2/19/95
 */

#ifndef _SYS_SOCKETVAR_H_
#define        _SYS_SOCKETVAR_H_

#include <sys/select.h>
#include <sys/selinfo.h>                /* for struct selinfo */
#include <sys/queue.h>
#include <sys/mutex.h>
#include <sys/condvar.h>

#if !defined(_KERNEL)
struct uio;
struct lwp;
struct uidinfo;
#else
#include <sys/atomic.h>
#include <sys/uidinfo.h>
#endif

TAILQ_HEAD(soqhead, socket);

/*
 * Variables for socket buffering.
 */
struct sockbuf {
        struct selinfo sb_sel;                /* process selecting read/write */
        struct mowner *sb_mowner;        /* who owns data for this sockbuf */
        struct socket *sb_so;                /* back pointer to socket */
        kcondvar_t sb_cv;                /* notifier */
        /* When re-zeroing this struct, we zero from sb_startzero to the end */
#define        sb_startzero        sb_cc
        u_long        sb_cc;                        /* actual chars in buffer */
        u_long        sb_hiwat;                /* max actual char count */
        u_long        sb_mbcnt;                /* chars of mbufs used */
        u_long        sb_mbmax;                /* max chars of mbufs to use */
        u_long        sb_lowat;                /* low water mark */
        struct mbuf *sb_mb;                /* the mbuf chain */
        struct mbuf *sb_mbtail;                /* the last mbuf in the chain */
        struct mbuf *sb_lastrecord;        /* first mbuf of last record in
                                           socket buffer */
        int        sb_flags;                /* flags, see below */
        int        sb_timeo;                /* timeout for read/write */
        u_long        sb_overflowed;                /* # of drops due to full buffer */
};

#ifndef SB_MAX
#define        SB_MAX                (256*1024)        /* default for max chars in sockbuf */
#endif

#define        SB_LOCK                0x01                /* lock on data queue */
#define        SB_NOTIFY        0x04                /* someone is waiting for data/space */
#define        SB_ASYNC        0x10                /* ASYNC I/O, need signals */
#define        SB_UPCALL        0x20                /* someone wants an upcall */
#define        SB_NOINTR        0x40                /* operations not interruptible */
#define        SB_KNOTE        0x100                /* kernel note attached */
#define        SB_AUTOSIZE        0x800                /* automatically size socket buffer */

/*
 * Kernel structure per socket.
 * Contains send and receive buffer queues,
 * handle on protocol and pointer to protocol
 * private data and error information.
 */
struct so_accf {
        struct accept_filter        *so_accept_filter;
        void        *so_accept_filter_arg;        /* saved filter args */
        char        *so_accept_filter_str;        /* saved user args */
};

struct sockaddr;

struct socket {
        kmutex_t * volatile so_lock;        /* pointer to lock on structure */
        kcondvar_t        so_cv;                /* notifier */
        short                so_type;        /* generic type, see socket.h */
        short                so_options;        /* from socket call, see socket.h */
        u_short                so_linger;        /* time to linger while closing */
        short                so_state;        /* internal state flags SS_*, below */
        int                so_unused;        /* used to be so_nbio */
        void                *so_pcb;        /* protocol control block */
        const struct protosw *so_proto;        /* protocol handle */
/*
 * Variables for connection queueing.
 * Socket where accepts occur is so_head in all subsidiary sockets.
 * If so_head is 0, socket is not related to an accept.
 * For head socket so_q0 queues partially completed connections,
 * while so_q is a queue of connections ready to be accepted.
 * If a connection is aborted and it has so_head set, then
 * it has to be pulled out of either so_q0 or so_q.
 * We allow connections to queue up based on current queue lengths
 * and limit on number of queued connections for this socket.
 */
        struct socket        *so_head;        /* back pointer to accept socket */
        struct soqhead        *so_onq;        /* queue (q or q0) that we're on */
        struct soqhead        so_q0;                /* queue of partial connections */
        struct soqhead        so_q;                /* queue of incoming connections */
        TAILQ_ENTRY(socket) so_qe;        /* our queue entry (q or q0) */
        short                so_q0len;        /* partials on so_q0 */
        short                so_qlen;        /* number of connections on so_q */
        short                so_qlimit;        /* max number queued connections */
        short                so_timeo;        /* connection timeout */
        u_short                so_error;        /* error affecting connection */
        u_short                so_rerror;        /* error affecting receiving */
        u_short                so_aborting;        /* references from soabort() */
        pid_t                so_pgid;        /* pgid for signals */
        u_long                so_oobmark;        /* chars to oob mark */
        struct sockbuf        so_snd;                /* send buffer */
        struct sockbuf        so_rcv;                /* receive buffer */

        void                *so_internal;        /* Space for svr4 stream data */
        void                (*so_upcall) (struct socket *, void *, int, int);
        void *                so_upcallarg;        /* Arg for above */
        int                (*so_send) (struct socket *, struct sockaddr *,
                                        struct uio *, struct mbuf *,
                                        struct mbuf *, int, struct lwp *);
        int                (*so_receive) (struct socket *,
                                        struct mbuf **,
                                        struct uio *, struct mbuf **,
                                        struct mbuf **, int *);
        struct mowner        *so_mowner;        /* who owns mbufs for this socket */
        struct uidinfo        *so_uidinfo;        /* who opened the socket */
        gid_t                so_egid;        /* creator effective gid */
        pid_t                so_cpid;        /* creator pid */
        struct so_accf        *so_accf;
        kauth_cred_t        so_cred;        /* socket credentials */
};

/*
 * Socket state bits.
 */
#define        SS_NOFDREF                0x001        /* no file table ref any more */
#define        SS_ISCONNECTED                0x002        /* socket connected to a peer */
#define        SS_ISCONNECTING                0x004        /* in process of connecting to peer */
#define        SS_ISDISCONNECTING        0x008        /* in process of disconnecting */
#define        SS_CANTSENDMORE                0x010        /* can't send more data to peer */
#define        SS_CANTRCVMORE                0x020        /* can't receive more data from peer */
#define        SS_RCVATMARK                0x040        /* at mark on input */
#define        SS_ISABORTING                0x080        /* aborting fd references - close() */
#define        SS_RESTARTSYS                0x100        /* restart blocked system calls */
#define        SS_POLLRDBAND                0x200        /* poll should return POLLRDBAND */
#define        SS_MORETOCOME                0x400        /*
                                         * hint from sosend to lower layer;
                                         * more data coming
                                         */
#define        SS_ISDISCONNECTED        0x800        /* socket disconnected from peer */
#define        SS_ISAPIPE                 0x1000        /* socket is implementing a pipe */
#define        SS_NBIO                        0x2000        /* socket is in non blocking I/O */

#ifdef _KERNEL

struct accept_filter {
        char        accf_name[16];
        void        (*accf_callback)
                (struct socket *, void *, int, int);
        void *        (*accf_create)
                (struct socket *, char *);
        void        (*accf_destroy)
                (struct socket *);
        LIST_ENTRY(accept_filter) accf_next;
        u_int        accf_refcnt;
};

struct sockopt {
        int                sopt_level;                /* option level */
        int                sopt_name;                /* option name */
        size_t                sopt_size;                /* data length */
        size_t                sopt_retsize;                /* returned data length */
        void *                sopt_data;                /* data pointer */
        uint8_t                sopt_buf[sizeof(int)];        /* internal storage */
};

#define        SB_EMPTY_FIXUP(sb)                                                \
do {                                                                        \
        KASSERT(solocked((sb)->sb_so));                                        \
        if ((sb)->sb_mb == NULL) {                                        \
                (sb)->sb_mbtail = NULL;                                        \
                (sb)->sb_lastrecord = NULL;                                \
        }                                                                \
} while (/*CONSTCOND*/0)

extern u_long                sb_max;
extern int                somaxkva;
extern int                sock_loan_thresh;
extern kmutex_t                *softnet_lock;

struct mbuf;
struct lwp;
struct msghdr;
struct stat;
struct knote;
struct sockaddr_big;
enum uio_seg;

/* 0x400 is SO_OTIMESTAMP */
#define SOOPT_TIMESTAMP(o)     ((o) & (SO_TIMESTAMP | 0x400))

/*
 * File operations on sockets.
 */
int        soo_read(file_t *, off_t *, struct uio *, kauth_cred_t, int);
int        soo_write(file_t *, off_t *, struct uio *, kauth_cred_t, int);
int        soo_fcntl(file_t *, u_int cmd, void *);
int        soo_ioctl(file_t *, u_long cmd, void *);
int        soo_poll(file_t *, int);
int        soo_kqfilter(file_t *, struct knote *);
int         soo_close(file_t *);
int        soo_stat(file_t *, struct stat *);
void        soo_restart(file_t *);
void        sbappend(struct sockbuf *, struct mbuf *);
void        sbappendstream(struct sockbuf *, struct mbuf *);
int        sbappendaddr(struct sockbuf *, const struct sockaddr *, struct mbuf *,
            struct mbuf *);
int        sbappendaddrchain(struct sockbuf *, const struct sockaddr *,
             struct mbuf *, int);
int        sbappendcontrol(struct sockbuf *, struct mbuf *, struct mbuf *);
void        sbappendrecord(struct sockbuf *, struct mbuf *);
void        sbcheck(struct sockbuf *);
void        sbcompress(struct sockbuf *, struct mbuf *, struct mbuf *);
struct mbuf *
        sbcreatecontrol(void *, int, int, int);
struct mbuf *
        sbcreatecontrol1(void **, int, int, int, int);
struct mbuf **
        sbsavetimestamp(int, struct mbuf **);
void        sbdrop(struct sockbuf *, int);
void        sbdroprecord(struct sockbuf *);
void        sbflush(struct sockbuf *);
void        sbinsertoob(struct sockbuf *, struct mbuf *);
void        sbrelease(struct sockbuf *, struct socket *);
int        sbreserve(struct sockbuf *, u_long, struct socket *);
int        sbwait(struct sockbuf *);
int        sb_max_set(u_long);
void        soinit(void);
void        soinit1(void);
void        soinit2(void);
int        soabort(struct socket *);
int        soaccept(struct socket *, struct sockaddr *);
int        sofamily(const struct socket *);
int        sobind(struct socket *, struct sockaddr *, struct lwp *);
void        socantrcvmore(struct socket *);
void        socantsendmore(struct socket *);
void        soroverflow(struct socket *);
int        soclose(struct socket *);
int        soconnect(struct socket *, struct sockaddr *, struct lwp *);
int        soconnect2(struct socket *, struct socket *);
int        socreate(int, struct socket **, int, int, struct lwp *,
                 struct socket *);
int        fsocreate(int, struct socket **, int, int, int *);
int        sodisconnect(struct socket *);
void        sofree(struct socket *);
int        sogetopt(struct socket *, struct sockopt *);
void        sohasoutofband(struct socket *);
void        soisconnected(struct socket *);
void        soisconnecting(struct socket *);
void        soisdisconnected(struct socket *);
void        soisdisconnecting(struct socket *);
int        solisten(struct socket *, int, struct lwp *);
struct socket *
        sonewconn(struct socket *, bool);
void        soqinsque(struct socket *, struct socket *, int);
bool        soqremque(struct socket *, int);
int        soreceive(struct socket *, struct mbuf **, struct uio *,
            struct mbuf **, struct mbuf **, int *);
int        soreserve(struct socket *, u_long, u_long);
void        sorflush(struct socket *);
int        sosend(struct socket *, struct sockaddr *, struct uio *,
            struct mbuf *, struct mbuf *, int, struct lwp *);
int        sosetopt(struct socket *, struct sockopt *);
int        so_setsockopt(struct lwp *, struct socket *, int, int, const void *, size_t);
int        soshutdown(struct socket *, int);
void        sorestart(struct socket *);
void        sowakeup(struct socket *, struct sockbuf *, int);
int        sockargs(struct mbuf **, const void *, size_t, enum uio_seg, int);
int        sopoll(struct socket *, int);
struct        socket *soget(bool);
void        soput(struct socket *);
bool        solocked(const struct socket *);
bool        solocked2(const struct socket *, const struct socket *);
int        sblock(struct sockbuf *, int);
void        sbunlock(struct sockbuf *);
int        sowait(struct socket *, bool, int);
void        solockretry(struct socket *, kmutex_t *);
void        sosetlock(struct socket *);
void        solockreset(struct socket *, kmutex_t *);

void        sockopt_init(struct sockopt *, int, int, size_t);
void        sockopt_destroy(struct sockopt *);
int        sockopt_set(struct sockopt *, const void *, size_t);
int        sockopt_setint(struct sockopt *, int);
int        sockopt_get(const struct sockopt *, void *, size_t);
int        sockopt_getint(const struct sockopt *, int *);
int        sockopt_setmbuf(struct sockopt *, struct mbuf *);
struct mbuf *sockopt_getmbuf(const struct sockopt *);

int        copyout_sockname(struct sockaddr *, unsigned int *, int, struct mbuf *);
int        copyout_sockname_sb(struct sockaddr *, unsigned int *,
    int , struct sockaddr_big *);
int        copyout_msg_control(struct lwp *, struct msghdr *, struct mbuf *);
void        free_control_mbuf(struct lwp *, struct mbuf *, struct mbuf *);

int        do_sys_getpeername(int, struct sockaddr *);
int        do_sys_getsockname(int, struct sockaddr *);

int        do_sys_sendmsg(struct lwp *, int, struct msghdr *, int, register_t *);
int        do_sys_sendmsg_so(struct lwp *, int, struct socket *, file_t *,
            struct msghdr *, int, register_t *);

int        do_sys_recvmsg(struct lwp *, int, struct msghdr *,
            struct mbuf **, struct mbuf **, register_t *);
int        do_sys_recvmsg_so(struct lwp *, int, struct socket *,
            struct msghdr *mp, struct mbuf **, struct mbuf **, register_t *);

int        do_sys_bind(struct lwp *, int, struct sockaddr *);
int        do_sys_connect(struct lwp *, int, struct sockaddr *);
int        do_sys_accept(struct lwp *, int, struct sockaddr *, register_t *,
            const sigset_t *, int, int);

int        do_sys_peeloff(struct socket *, void *);
/*
 * Inline functions for sockets and socket buffering.
 */

#include <sys/protosw.h>
#include <sys/mbuf.h>

/*
 * Do we need to notify the other side when I/O is possible?
 */
static __inline int
sb_notify(struct sockbuf *sb)
{

        KASSERT(solocked(sb->sb_so));

        return sb->sb_flags & (SB_NOTIFY | SB_ASYNC | SB_UPCALL | SB_KNOTE);
}

/*
 * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
 * Since the fields are unsigned, detect overflow and return 0.
 */
static __inline u_long
sbspace(const struct sockbuf *sb)
{

        KASSERT(solocked(sb->sb_so));
        if (sb->sb_hiwat <= sb->sb_cc || sb->sb_mbmax <= sb->sb_mbcnt)
                return 0;
        return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt);
}

static __inline u_long
sbspace_oob(const struct sockbuf *sb)
{
        u_long hiwat = sb->sb_hiwat;

        if (hiwat < ULONG_MAX - 1024)
                hiwat += 1024;

        KASSERT(solocked(sb->sb_so));

        if (hiwat <= sb->sb_cc || sb->sb_mbmax <= sb->sb_mbcnt)
                return 0;
        return lmin(hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt);
}

/*
 * How much socket buffer space has been used?
 */
static __inline u_long
sbused(const struct sockbuf *sb)
{

        KASSERT(solocked(sb->sb_so));
        return sb->sb_cc;
}

/* do we have to send all at once on a socket? */
static __inline int
sosendallatonce(const struct socket *so)
{

        return so->so_proto->pr_flags & PR_ATOMIC;
}

/* can we read something from so? */
static __inline int
soreadable(const struct socket *so)
{

        KASSERT(solocked(so));

        return so->so_rcv.sb_cc >= so->so_rcv.sb_lowat ||
            (so->so_state & SS_CANTRCVMORE) != 0 ||
            so->so_qlen != 0 || so->so_error != 0 || so->so_rerror != 0;
}

/* can we write something to so? */
static __inline int
sowritable(const struct socket *so)
{

        KASSERT(solocked(so));

        return (sbspace(&so->so_snd) >= so->so_snd.sb_lowat &&
            ((so->so_state & SS_ISCONNECTED) != 0 ||
            (so->so_proto->pr_flags & PR_CONNREQUIRED) == 0)) ||
            (so->so_state & SS_CANTSENDMORE) != 0 ||
            so->so_error != 0;
}

/* adjust counters in sb reflecting allocation of m */
static __inline void
sballoc(struct sockbuf *sb, struct mbuf *m)
{

        KASSERT(solocked(sb->sb_so));

        sb->sb_cc += m->m_len;
        sb->sb_mbcnt += MSIZE;
        if (m->m_flags & M_EXT)
                sb->sb_mbcnt += m->m_ext.ext_size;
}

/* adjust counters in sb reflecting freeing of m */
static __inline void
sbfree(struct sockbuf *sb, struct mbuf *m)
{

        KASSERT(solocked(sb->sb_so));

        sb->sb_cc -= m->m_len;
        sb->sb_mbcnt -= MSIZE;
        if (m->m_flags & M_EXT)
                sb->sb_mbcnt -= m->m_ext.ext_size;
}

static __inline void
sorwakeup(struct socket *so)
{

        KASSERT(solocked(so));

        if (sb_notify(&so->so_rcv))
                sowakeup(so, &so->so_rcv, POLL_IN);
}

static __inline void
sowwakeup(struct socket *so)
{

        KASSERT(solocked(so));

        if (sb_notify(&so->so_snd))
                sowakeup(so, &so->so_snd, POLL_OUT);
}

static __inline void
solock(struct socket *so)
{
        kmutex_t *lock;

        lock = atomic_load_consume(&so->so_lock);
        mutex_enter(lock);
        if (__predict_false(lock != atomic_load_relaxed(&so->so_lock)))
                solockretry(so, lock);
}

static __inline void
sounlock(struct socket *so)
{

        mutex_exit(so->so_lock);
}

#ifdef SOCKBUF_DEBUG
/*
 * SBLASTRECORDCHK: check sb->sb_lastrecord is maintained correctly.
 * SBLASTMBUFCHK: check sb->sb_mbtail is maintained correctly.
 *
 * => panic if the socket buffer is inconsistent.
 * => 'where' is used for a panic message.
 */
void        sblastrecordchk(struct sockbuf *, const char *);
#define        SBLASTRECORDCHK(sb, where)        sblastrecordchk((sb), (where))

void        sblastmbufchk(struct sockbuf *, const char *);
#define        SBLASTMBUFCHK(sb, where)        sblastmbufchk((sb), (where))
#define        SBCHECK(sb)                        sbcheck(sb)
#else
#define        SBLASTRECORDCHK(sb, where)        /* nothing */
#define        SBLASTMBUFCHK(sb, where)        /* nothing */
#define        SBCHECK(sb)                        /* nothing */
#endif /* SOCKBUF_DEBUG */

/* sosend loan */
vaddr_t        sokvaalloc(vaddr_t, vsize_t, struct socket *);
void        sokvafree(vaddr_t, vsize_t);
void        soloanfree(struct mbuf *, void *, size_t, void *);

/*
 * Values for socket-buffer-append priority argument to sbappendaddrchain().
 * The following flags are reserved for future implementation:
 *
 *  SB_PRIO_NONE:  honour normal socket-buffer limits.
 *
 *  SB_PRIO_ONESHOT_OVERFLOW:  if the socket has any space,
 *        deliver the entire chain. Intended for large requests
 *      that should be delivered in their entirety, or not at all.
 *
 * SB_PRIO_OVERDRAFT:  allow a small (2*MLEN) overflow, over and
 *        aboce normal socket limits. Intended messages indicating
 *      buffer overflow in earlier normal/lower-priority messages .
 *
 * SB_PRIO_BESTEFFORT: Ignore  limits entirely.  Intended only for
 *         kernel-generated messages to specially-marked scokets which
 *        require "reliable" delivery, nd where the source socket/protocol
 *        message generator enforce some hard limit (but possibly well
 *        above kern.sbmax). It is entirely up to the in-kernel source to
 *        avoid complete mbuf exhaustion or DoS scenarios.
 */
#define SB_PRIO_NONE                  0
#define SB_PRIO_ONESHOT_OVERFLOW 1
#define SB_PRIO_OVERDRAFT        2
#define SB_PRIO_BESTEFFORT        3

/*
 * Accept filter functions (duh).
 */
int        accept_filt_getopt(struct socket *, struct sockopt *);
int        accept_filt_setopt(struct socket *, const struct sockopt *);
int        accept_filt_clear(struct socket *);
int        accept_filt_add(struct accept_filter *);
int        accept_filt_del(struct accept_filter *);
struct        accept_filter *accept_filt_get(char *);
#ifdef ACCEPT_FILTER_MOD
#ifdef SYSCTL_DECL
SYSCTL_DECL(_net_inet_accf);
#endif
void        accept_filter_init(void);
#endif
#ifdef DDB
int sofindproc(struct socket *so, int all, void (*pr)(const char *, ...));
void socket_print(const char *modif, void (*pr)(const char *, ...));
#endif

#endif /* _KERNEL */

#endif /* !_SYS_SOCKETVAR_H_ */








































































































































































































































































































































































































































































































































































































  439 














































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
/* $NetBSD: virtio_pci.c,v 1.38 2022/05/30 20:28:18 riastradh Exp $ */

/*
 * Copyright (c) 2020 The NetBSD Foundation, Inc.
 * Copyright (c) 2012 Stefan Fritsch.
 * Copyright (c) 2010 Minoura Makoto.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: virtio_pci.c,v 1.38 2022/05/30 20:28:18 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/module.h>
#include <sys/endian.h>
#include <sys/interrupt.h>
#include <sys/syslog.h>

#include <sys/device.h>

#include <dev/pci/pcidevs.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>

#include <dev/pci/virtioreg.h> /* XXX: move to non-pci */
#include <dev/pci/virtio_pcireg.h>

#define VIRTIO_PRIVATE
#include <dev/pci/virtiovar.h> /* XXX: move to non-pci */


#define VIRTIO_PCI_LOG(_sc, _use_log, _fmt, _args...)        \
do {                                                        \
        if ((_use_log)) {                                \
                log(LOG_DEBUG, "%s: " _fmt,                \
                    device_xname((_sc)->sc_dev),        \
                    ##_args);                                \
        } else {                                        \
                aprint_error_dev((_sc)->sc_dev,                \
                    _fmt, ##_args);                        \
        }                                                \
} while(0)

static int        virtio_pci_match(device_t, cfdata_t, void *);
static void        virtio_pci_attach(device_t, device_t, void *);
static int        virtio_pci_rescan(device_t, const char *, const int *);
static int        virtio_pci_detach(device_t, int);


#define NMAPREG                ((PCI_MAPREG_END - PCI_MAPREG_START) / \
                                sizeof(pcireg_t))
struct virtio_pci_softc {
        struct virtio_softc        sc_sc;

        /* IO space */
        bus_space_tag_t                sc_iot;
        bus_space_handle_t        sc_ioh;
        bus_size_t                sc_iosize;
        bus_size_t                sc_mapped_iosize;

        /* BARs */
        bus_space_tag_t                sc_bars_iot[NMAPREG];
        bus_space_handle_t        sc_bars_ioh[NMAPREG];
        bus_size_t                sc_bars_iosize[NMAPREG];

        /* notify space */
        bus_space_tag_t                sc_notify_iot;
        bus_space_handle_t        sc_notify_ioh;
        bus_size_t                sc_notify_iosize;
        uint32_t                sc_notify_off_multiplier;

        /* isr space */
        bus_space_tag_t                sc_isr_iot;
        bus_space_handle_t        sc_isr_ioh;
        bus_size_t                sc_isr_iosize;

        /* generic */
        struct pci_attach_args        sc_pa;
        pci_intr_handle_t        *sc_ihp;
        void                        **sc_ihs;
        int                        sc_ihs_num;
        int                        sc_devcfg_offset;        /* for 0.9 */
};

static int        virtio_pci_attach_09(device_t, void *);
static void        virtio_pci_kick_09(struct virtio_softc *, uint16_t);
static uint16_t        virtio_pci_read_queue_size_09(struct virtio_softc *, uint16_t);
static void        virtio_pci_setup_queue_09(struct virtio_softc *, uint16_t, uint64_t);
static void        virtio_pci_set_status_09(struct virtio_softc *, int);
static void        virtio_pci_negotiate_features_09(struct virtio_softc *, uint64_t);

static int        virtio_pci_attach_10(device_t, void *);
static void        virtio_pci_kick_10(struct virtio_softc *, uint16_t);
static uint16_t        virtio_pci_read_queue_size_10(struct virtio_softc *, uint16_t);
static void        virtio_pci_setup_queue_10(struct virtio_softc *, uint16_t, uint64_t);
static void        virtio_pci_set_status_10(struct virtio_softc *, int);
static void        virtio_pci_negotiate_features_10(struct virtio_softc *, uint64_t);
static int        virtio_pci_find_cap(struct virtio_pci_softc *psc, int cfg_type, void *buf, int buflen);

static int        virtio_pci_alloc_interrupts(struct virtio_softc *);
static void        virtio_pci_free_interrupts(struct virtio_softc *);
static int        virtio_pci_adjust_config_region(struct virtio_pci_softc *psc);
static int        virtio_pci_intr(void *arg);
static int        virtio_pci_msix_queue_intr(void *);
static int        virtio_pci_msix_config_intr(void *);
static int        virtio_pci_setup_interrupts_09(struct virtio_softc *, int);
static int        virtio_pci_setup_interrupts_10(struct virtio_softc *, int);
static int        virtio_pci_establish_msix_interrupts(struct virtio_softc *,
                    struct pci_attach_args *);
static int        virtio_pci_establish_intx_interrupt(struct virtio_softc *,
                    struct pci_attach_args *);
static bool        virtio_pci_msix_enabled(struct virtio_pci_softc *);

#define VIRTIO_MSIX_CONFIG_VECTOR_INDEX        0
#define VIRTIO_MSIX_QUEUE_VECTOR_INDEX        1

/*
 * When using PCI attached virtio on aarch64-eb under Qemu, the IO space
 * suddenly read BIG_ENDIAN where it should stay LITTLE_ENDIAN. The data read
 * 1 byte at a time seem OK but reading bigger lengths result in swapped
 * endian. This is most notable on reading 8 byters since we can't use
 * bus_space_{read,write}_8().
 */

#if defined(__aarch64__) && BYTE_ORDER == BIG_ENDIAN
#        define READ_ENDIAN_09        BIG_ENDIAN        /* should be LITTLE_ENDIAN */
#        define READ_ENDIAN_10        BIG_ENDIAN
#        define STRUCT_ENDIAN_09        BIG_ENDIAN
#        define STRUCT_ENDIAN_10        LITTLE_ENDIAN
#elif BYTE_ORDER == BIG_ENDIAN
#        define READ_ENDIAN_09        LITTLE_ENDIAN
#        define READ_ENDIAN_10        BIG_ENDIAN
#        define STRUCT_ENDIAN_09        BIG_ENDIAN
#        define STRUCT_ENDIAN_10        LITTLE_ENDIAN
#else /* little endian */
#        define READ_ENDIAN_09        LITTLE_ENDIAN
#        define READ_ENDIAN_10        LITTLE_ENDIAN
#        define STRUCT_ENDIAN_09        LITTLE_ENDIAN
#        define STRUCT_ENDIAN_10        LITTLE_ENDIAN
#endif


CFATTACH_DECL3_NEW(virtio_pci, sizeof(struct virtio_pci_softc),
    virtio_pci_match, virtio_pci_attach, virtio_pci_detach, NULL,
    virtio_pci_rescan, NULL, DVF_DETACH_SHUTDOWN);

static const struct virtio_ops virtio_pci_ops_09 = {
        .kick = virtio_pci_kick_09,
        .read_queue_size = virtio_pci_read_queue_size_09,
        .setup_queue = virtio_pci_setup_queue_09,
        .set_status = virtio_pci_set_status_09,
        .neg_features = virtio_pci_negotiate_features_09,
        .alloc_interrupts = virtio_pci_alloc_interrupts,
        .free_interrupts = virtio_pci_free_interrupts,
        .setup_interrupts = virtio_pci_setup_interrupts_09,
};

static const struct virtio_ops virtio_pci_ops_10 = {
        .kick = virtio_pci_kick_10,
        .read_queue_size = virtio_pci_read_queue_size_10,
        .setup_queue = virtio_pci_setup_queue_10,
        .set_status = virtio_pci_set_status_10,
        .neg_features = virtio_pci_negotiate_features_10,
        .alloc_interrupts = virtio_pci_alloc_interrupts,
        .free_interrupts = virtio_pci_free_interrupts,
        .setup_interrupts = virtio_pci_setup_interrupts_10,
};

static int
virtio_pci_match(device_t parent, cfdata_t match, void *aux)
{
        struct pci_attach_args *pa;

        pa = (struct pci_attach_args *)aux;
        switch (PCI_VENDOR(pa->pa_id)) {
        case PCI_VENDOR_QUMRANET:
                /* Transitional devices MUST have a PCI Revision ID of 0. */
                if (((PCI_PRODUCT_QUMRANET_VIRTIO_1000 <=
                      PCI_PRODUCT(pa->pa_id)) &&
                     (PCI_PRODUCT(pa->pa_id) <=
                      PCI_PRODUCT_QUMRANET_VIRTIO_103F)) &&
                      PCI_REVISION(pa->pa_class) == 0)
                        return 1;
                /*
                 * Non-transitional devices SHOULD have a PCI Revision
                 * ID of 1 or higher.  Drivers MUST match any PCI
                 * Revision ID value.
                 */
                if (((PCI_PRODUCT_QUMRANET_VIRTIO_1040 <=
                      PCI_PRODUCT(pa->pa_id)) &&
                     (PCI_PRODUCT(pa->pa_id) <=
                      PCI_PRODUCT_QUMRANET_VIRTIO_107F)) &&
                      /* XXX: TODO */
                      PCI_REVISION(pa->pa_class) == 1)
                        return 1;
                break;
        }

        return 0;
}

static void
virtio_pci_attach(device_t parent, device_t self, void *aux)
{
        struct virtio_pci_softc * const psc = device_private(self);
        struct virtio_softc * const sc = &psc->sc_sc;
        struct pci_attach_args *pa = (struct pci_attach_args *)aux;
        pci_chipset_tag_t pc = pa->pa_pc;
        pcitag_t tag = pa->pa_tag;
        int revision;
        int ret;
        pcireg_t id;
        pcireg_t csr;

        revision = PCI_REVISION(pa->pa_class);
        switch (revision) {
        case 0:
                /* subsystem ID shows what I am */
                id = PCI_SUBSYS_ID(pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG));
                break;
        case 1:
                /* pci product number shows what I am */
                id = PCI_PRODUCT(pa->pa_id) - PCI_PRODUCT_QUMRANET_VIRTIO_1040;
                break;
        default:
                aprint_normal(": unknown revision 0x%02x; giving up\n",
                              revision);
                return;
        }

        aprint_normal("\n");
        aprint_naive("\n");
        virtio_print_device_type(self, id, revision);

        csr = pci_conf_read(pc, tag, PCI_COMMAND_STATUS_REG);
        csr |= PCI_COMMAND_MASTER_ENABLE | PCI_COMMAND_IO_ENABLE;
        pci_conf_write(pc, tag, PCI_COMMAND_STATUS_REG, csr);

        sc->sc_dev = self;
        psc->sc_pa = *pa;
        psc->sc_iot = pa->pa_iot;

        sc->sc_dmat = pa->pa_dmat;
        if (pci_dma64_available(pa))
                sc->sc_dmat = pa->pa_dmat64;

        /* attach is dependent on revision */
        ret = 0;
        if (revision == 1) {
                /* try to attach 1.0 */
                ret = virtio_pci_attach_10(self, aux);
        }
        if (ret == 0 && revision == 0) {
                /* revision 0 means 0.9 only or both 0.9 and 1.0 */
                ret = virtio_pci_attach_09(self, aux);
        }
        if (ret) {
                aprint_error_dev(self, "cannot attach (%d)\n", ret);
                return;
        }
        KASSERT(sc->sc_ops);

        /* preset config region */
        psc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
        if (virtio_pci_adjust_config_region(psc))
                return;

        /* generic */
        virtio_device_reset(sc);
        virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
        virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);

        sc->sc_childdevid = id;
        sc->sc_child = NULL;
        virtio_pci_rescan(self, NULL, NULL);
        return;
}

/* ARGSUSED */
static int
virtio_pci_rescan(device_t self, const char *ifattr, const int *locs)
{
        struct virtio_pci_softc * const psc = device_private(self);
        struct virtio_softc * const sc = &psc->sc_sc;
        struct virtio_attach_args va;

        if (sc->sc_child)        /* Child already attached? */
                return 0;

        memset(&va, 0, sizeof(va));
        va.sc_childdevid = sc->sc_childdevid;

        config_found(self, &va, NULL, CFARGS_NONE);

        if (virtio_attach_failed(sc))
                return 0;

        return 0;
}


static int
virtio_pci_detach(device_t self, int flags)
{
        struct virtio_pci_softc * const psc = device_private(self);
        struct virtio_softc * const sc = &psc->sc_sc;
        int r;

        if (sc->sc_child != NULL) {
                r = config_detach(sc->sc_child, flags);
                if (r)
                        return r;
        }

        /* Check that child detached properly */
        KASSERT(sc->sc_child == NULL);
        KASSERT(sc->sc_vqs == NULL);
        KASSERT(psc->sc_ihs_num == 0);

        if (psc->sc_iosize)
                bus_space_unmap(psc->sc_iot, psc->sc_ioh,
                        psc->sc_mapped_iosize);
        psc->sc_iosize = 0;

        return 0;
}


static int
virtio_pci_attach_09(device_t self, void *aux)
        //struct virtio_pci_softc *psc, struct pci_attach_args *pa)
{
        struct virtio_pci_softc * const psc = device_private(self);
        struct pci_attach_args *pa = (struct pci_attach_args *)aux;
        struct virtio_softc * const sc = &psc->sc_sc;
//        pci_chipset_tag_t pc = pa->pa_pc;
//        pcitag_t tag = pa->pa_tag;

        /* complete IO region */
        if (pci_mapreg_map(pa, PCI_MAPREG_START, PCI_MAPREG_TYPE_IO, 0,
                           &psc->sc_iot, &psc->sc_ioh, NULL, &psc->sc_iosize)) {
                aprint_error_dev(self, "can't map i/o space\n");
                return EIO;
        }
        psc->sc_mapped_iosize = psc->sc_iosize;

        /* queue space */
        if (bus_space_subregion(psc->sc_iot, psc->sc_ioh,
                        VIRTIO_CONFIG_QUEUE_NOTIFY, 2, &psc->sc_notify_ioh)) {
                aprint_error_dev(self, "can't map notify i/o space\n");
                return EIO;
        }
        psc->sc_notify_iosize = 2;
        psc->sc_notify_iot = psc->sc_iot;

        /* ISR space */
        if (bus_space_subregion(psc->sc_iot, psc->sc_ioh,
                        VIRTIO_CONFIG_ISR_STATUS, 1, &psc->sc_isr_ioh)) {
                aprint_error_dev(self, "can't map isr i/o space\n");
                return EIO;
        }
        psc->sc_isr_iosize = 1;
        psc->sc_isr_iot = psc->sc_iot;

        /* set our version 0.9 ops */
        sc->sc_ops = &virtio_pci_ops_09;
        sc->sc_bus_endian    = READ_ENDIAN_09;
        sc->sc_struct_endian = STRUCT_ENDIAN_09;
        return 0;
}


static int
virtio_pci_attach_10(device_t self, void *aux)
{
        struct virtio_pci_softc * const psc = device_private(self);
        struct pci_attach_args *pa = (struct pci_attach_args *)aux;
        struct virtio_softc * const sc = &psc->sc_sc;
        pci_chipset_tag_t pc = pa->pa_pc;
        pcitag_t tag = pa->pa_tag;

        struct virtio_pci_cap common, isr, device;
        struct virtio_pci_notify_cap notify;
        int have_device_cfg = 0;
        bus_size_t bars[NMAPREG] = { 0 };
        int bars_idx[NMAPREG] = { 0 };
        struct virtio_pci_cap *caps[] = { &common, &isr, &device, &notify.cap };
        int i, j, ret = 0;

        if (virtio_pci_find_cap(psc, VIRTIO_PCI_CAP_COMMON_CFG,
                        &common, sizeof(common)))
                return ENODEV;
        if (virtio_pci_find_cap(psc, VIRTIO_PCI_CAP_NOTIFY_CFG,
                        &notify, sizeof(notify)))
                return ENODEV;
        if (virtio_pci_find_cap(psc, VIRTIO_PCI_CAP_ISR_CFG,
                        &isr, sizeof(isr)))
                return ENODEV;
        if (virtio_pci_find_cap(psc, VIRTIO_PCI_CAP_DEVICE_CFG,
                        &device, sizeof(device)))
                memset(&device, 0, sizeof(device));
        else
                have_device_cfg = 1;

        /* Figure out which bars we need to map */
        for (i = 0; i < __arraycount(caps); i++) {
                int bar = caps[i]->bar;
                bus_size_t len = caps[i]->offset + caps[i]->length;
                if (caps[i]->length == 0)
                        continue;
                if (bars[bar] < len)
                        bars[bar] = len;
        }

        for (i = j = 0; i < __arraycount(bars); i++) {
                int reg;
                pcireg_t type;
                if (bars[i] == 0)
                        continue;
                reg = PCI_BAR(i);
                type = pci_mapreg_type(pc, tag, reg);
                if (pci_mapreg_map(pa, reg, type, 0,
                                &psc->sc_bars_iot[j], &psc->sc_bars_ioh[j],
                                NULL, &psc->sc_bars_iosize[j])) {
                        aprint_error_dev(self, "can't map bar %u \n", i);
                        ret = EIO;
                        goto err;
                }
                aprint_debug_dev(self,
                    "bar[%d]: iot %p, size 0x%" PRIxBUSSIZE "\n",
                    j, psc->sc_bars_iot[j], psc->sc_bars_iosize[j]);
                bars_idx[i] = j;
                j++;
        }

        i = bars_idx[notify.cap.bar];
        if (bus_space_subregion(psc->sc_bars_iot[i], psc->sc_bars_ioh[i],
                        notify.cap.offset, notify.cap.length,
                        &psc->sc_notify_ioh)) {
                aprint_error_dev(self, "can't map notify i/o space\n");
                ret = EIO;
                goto err;
        }
        psc->sc_notify_iosize = notify.cap.length;
        psc->sc_notify_iot = psc->sc_bars_iot[i];
        psc->sc_notify_off_multiplier = le32toh(notify.notify_off_multiplier);

        if (have_device_cfg) {
                i = bars_idx[device.bar];
                if (bus_space_subregion(psc->sc_bars_iot[i], psc->sc_bars_ioh[i],
                                device.offset, device.length,
                                &sc->sc_devcfg_ioh)) {
                        aprint_error_dev(self, "can't map devcfg i/o space\n");
                        ret = EIO;
                        goto err;
                }
                aprint_debug_dev(self,
                        "device.offset = 0x%x, device.length = 0x%x\n",
                        device.offset, device.length);
                sc->sc_devcfg_iosize = device.length;
                sc->sc_devcfg_iot = psc->sc_bars_iot[i];
        }

        i = bars_idx[isr.bar];
        if (bus_space_subregion(psc->sc_bars_iot[i], psc->sc_bars_ioh[i],
                        isr.offset, isr.length, &psc->sc_isr_ioh)) {
                aprint_error_dev(self, "can't map isr i/o space\n");
                ret = EIO;
                goto err;
        }
        psc->sc_isr_iosize = isr.length;
        psc->sc_isr_iot = psc->sc_bars_iot[i];

        i = bars_idx[common.bar];
        if (bus_space_subregion(psc->sc_bars_iot[i], psc->sc_bars_ioh[i],
                        common.offset, common.length, &psc->sc_ioh)) {
                aprint_error_dev(self, "can't map common i/o space\n");
                ret = EIO;
                goto err;
        }
        psc->sc_iosize = common.length;
        psc->sc_iot = psc->sc_bars_iot[i];
        psc->sc_mapped_iosize = psc->sc_bars_iosize[i];

        psc->sc_sc.sc_version_1 = 1;

        /* set our version 1.0 ops */
        sc->sc_ops = &virtio_pci_ops_10;
        sc->sc_bus_endian    = READ_ENDIAN_10;
        sc->sc_struct_endian = STRUCT_ENDIAN_10;
        return 0;

err:
        /* undo our pci_mapreg_map()s */ 
        for (i = 0; i < __arraycount(bars); i++) {
                if (psc->sc_bars_iosize[i] == 0)
                        continue;
                bus_space_unmap(psc->sc_bars_iot[i], psc->sc_bars_ioh[i],
                                psc->sc_bars_iosize[i]);
        }
        return ret;
}

/* v1.0 attach helper */
static int
virtio_pci_find_cap(struct virtio_pci_softc *psc, int cfg_type, void *buf, int buflen)
{
        device_t self = psc->sc_sc.sc_dev;
        pci_chipset_tag_t pc = psc->sc_pa.pa_pc;
        pcitag_t tag = psc->sc_pa.pa_tag;
        unsigned int offset, i, len;
        union {
                pcireg_t reg[8];
                struct virtio_pci_cap vcap;
        } *v = buf;

        if (buflen < sizeof(struct virtio_pci_cap))
                return ERANGE;

        if (!pci_get_capability(pc, tag, PCI_CAP_VENDSPEC, &offset, &v->reg[0]))
                return ENOENT;

        do {
                for (i = 0; i < 4; i++)
                        v->reg[i] =
                                le32toh(pci_conf_read(pc, tag, offset + i * 4));
                if (v->vcap.cfg_type == cfg_type)
                        break;
                offset = v->vcap.cap_next;
        } while (offset != 0);

        if (offset == 0)
                return ENOENT;

        if (v->vcap.cap_len > sizeof(struct virtio_pci_cap)) {
                len = roundup(v->vcap.cap_len, sizeof(pcireg_t));
                if (len > buflen) {
                        aprint_error_dev(self, "%s cap too large\n", __func__);
                        return ERANGE;
                }
                for (i = 4; i < len / sizeof(pcireg_t);  i++)
                        v->reg[i] =
                                le32toh(pci_conf_read(pc, tag, offset + i * 4));
        }

        /* endian fixup */
        v->vcap.offset = le32toh(v->vcap.offset);
        v->vcap.length = le32toh(v->vcap.length);
        return 0;
}


/* -------------------------------------
 * Version 0.9 support
 * -------------------------------------*/

static void
virtio_pci_kick_09(struct virtio_softc *sc, uint16_t idx)
{
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;

        bus_space_write_2(psc->sc_notify_iot, psc->sc_notify_ioh, 0, idx);
}

/* only applicable for v 0.9 but also called for 1.0 */
static int
virtio_pci_adjust_config_region(struct virtio_pci_softc *psc)
{
        struct virtio_softc * const sc = &psc->sc_sc;
        device_t self = sc->sc_dev;

        if (psc->sc_sc.sc_version_1)
                return 0;

        sc->sc_devcfg_iosize = psc->sc_iosize - psc->sc_devcfg_offset;
        sc->sc_devcfg_iot = psc->sc_iot;
        if (bus_space_subregion(psc->sc_iot, psc->sc_ioh,
                        psc->sc_devcfg_offset, sc->sc_devcfg_iosize,
                        &sc->sc_devcfg_ioh)) {
                aprint_error_dev(self, "can't map config i/o space\n");
                return EIO;
        }

        return 0;
}

static uint16_t
virtio_pci_read_queue_size_09(struct virtio_softc *sc, uint16_t idx)
{
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;

        bus_space_write_2(psc->sc_iot, psc->sc_ioh,
            VIRTIO_CONFIG_QUEUE_SELECT, idx);
        return bus_space_read_2(psc->sc_iot, psc->sc_ioh,
            VIRTIO_CONFIG_QUEUE_SIZE);
}

static void
virtio_pci_setup_queue_09(struct virtio_softc *sc, uint16_t idx, uint64_t addr)
{
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;

        bus_space_write_2(psc->sc_iot, psc->sc_ioh,
            VIRTIO_CONFIG_QUEUE_SELECT, idx);
        bus_space_write_4(psc->sc_iot, psc->sc_ioh,
            VIRTIO_CONFIG_QUEUE_ADDRESS, addr / VIRTIO_PAGE_SIZE);

        if (psc->sc_ihs_num > 1) {
                int vec = VIRTIO_MSIX_QUEUE_VECTOR_INDEX;
                if (sc->sc_child_mq)
                        vec += idx;
                bus_space_write_2(psc->sc_iot, psc->sc_ioh,
                    VIRTIO_CONFIG_MSI_QUEUE_VECTOR, vec);
        }
}

static void
virtio_pci_set_status_09(struct virtio_softc *sc, int status)
{
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;
        int old = 0;

        if (status != 0) {
            old = bus_space_read_1(psc->sc_iot, psc->sc_ioh,
                VIRTIO_CONFIG_DEVICE_STATUS);
        }
        bus_space_write_1(psc->sc_iot, psc->sc_ioh,
            VIRTIO_CONFIG_DEVICE_STATUS, status|old);
}

static void
virtio_pci_negotiate_features_09(struct virtio_softc *sc, uint64_t guest_features)
{
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;
        uint32_t r;

        r = bus_space_read_4(psc->sc_iot, psc->sc_ioh,
            VIRTIO_CONFIG_DEVICE_FEATURES);

        r &= guest_features;

        bus_space_write_4(psc->sc_iot, psc->sc_ioh,
            VIRTIO_CONFIG_GUEST_FEATURES, r);

        sc->sc_active_features = r;
}

/* -------------------------------------
 * Version 1.0 support
 * -------------------------------------*/

static void
virtio_pci_kick_10(struct virtio_softc *sc, uint16_t idx)
{
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;
        unsigned offset = sc->sc_vqs[idx].vq_notify_off *
                psc->sc_notify_off_multiplier;

        bus_space_write_2(psc->sc_notify_iot, psc->sc_notify_ioh, offset, idx);
}


static uint16_t
virtio_pci_read_queue_size_10(struct virtio_softc *sc, uint16_t idx)
{
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;
        bus_space_tag_t           iot = psc->sc_iot;
        bus_space_handle_t ioh = psc->sc_ioh;

        bus_space_write_2(iot, ioh, VIRTIO_CONFIG1_QUEUE_SELECT, idx);
        return bus_space_read_2(iot, ioh, VIRTIO_CONFIG1_QUEUE_SIZE);
}

/*
 * By definition little endian only in v1.0.  NB: "MAY" in the text
 * below refers to "independently" (i.e. the order of accesses) not
 * "32-bit" (which is restricted by the earlier "MUST").
 *
 * 4.1.3.1 Driver Requirements: PCI Device Layout
 *
 * For device configuration access, the driver MUST use ... 32-bit
 * wide and aligned accesses for ... 64-bit wide fields.  For 64-bit
 * fields, the driver MAY access each of the high and low 32-bit parts
 * of the field independently.
 */
static __inline void
virtio_pci_bus_space_write_8(bus_space_tag_t iot, bus_space_handle_t ioh,
     bus_size_t offset, uint64_t value)
{
#if _QUAD_HIGHWORD
        bus_space_write_4(iot, ioh, offset, BUS_ADDR_LO32(value));
        bus_space_write_4(iot, ioh, offset + 4, BUS_ADDR_HI32(value));
#else
        bus_space_write_4(iot, ioh, offset, BUS_ADDR_HI32(value));
        bus_space_write_4(iot, ioh, offset + 4, BUS_ADDR_LO32(value));
#endif
}

static void
virtio_pci_setup_queue_10(struct virtio_softc *sc, uint16_t idx, uint64_t addr)
{
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;
        struct virtqueue *vq = &sc->sc_vqs[idx];
        bus_space_tag_t           iot = psc->sc_iot;
        bus_space_handle_t ioh = psc->sc_ioh;
        KASSERT(vq->vq_index == idx);

        bus_space_write_2(iot, ioh, VIRTIO_CONFIG1_QUEUE_SELECT, vq->vq_index);
        if (addr == 0) {
                bus_space_write_2(iot, ioh, VIRTIO_CONFIG1_QUEUE_ENABLE, 0);
                virtio_pci_bus_space_write_8(iot, ioh,
                    VIRTIO_CONFIG1_QUEUE_DESC,   0);
                virtio_pci_bus_space_write_8(iot, ioh,
                    VIRTIO_CONFIG1_QUEUE_AVAIL,  0);
                virtio_pci_bus_space_write_8(iot, ioh,
                    VIRTIO_CONFIG1_QUEUE_USED,   0);
        } else {
                virtio_pci_bus_space_write_8(iot, ioh,
                        VIRTIO_CONFIG1_QUEUE_DESC, addr);
                virtio_pci_bus_space_write_8(iot, ioh,
                        VIRTIO_CONFIG1_QUEUE_AVAIL, addr + vq->vq_availoffset);
                virtio_pci_bus_space_write_8(iot, ioh,
                        VIRTIO_CONFIG1_QUEUE_USED, addr + vq->vq_usedoffset);
                bus_space_write_2(iot, ioh,
                        VIRTIO_CONFIG1_QUEUE_ENABLE, 1);
                vq->vq_notify_off = bus_space_read_2(iot, ioh,
                        VIRTIO_CONFIG1_QUEUE_NOTIFY_OFF);
        }

        if (psc->sc_ihs_num > 1) {
                int vec = VIRTIO_MSIX_QUEUE_VECTOR_INDEX;
                if (sc->sc_child_mq)
                        vec += idx;
                bus_space_write_2(iot, ioh,
                        VIRTIO_CONFIG1_QUEUE_MSIX_VECTOR, vec);
        }
}

static void
virtio_pci_set_status_10(struct virtio_softc *sc, int status)
{
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;
        bus_space_tag_t           iot = psc->sc_iot;
        bus_space_handle_t ioh = psc->sc_ioh;
        int old = 0;

        if (status)
                old = bus_space_read_1(iot, ioh, VIRTIO_CONFIG1_DEVICE_STATUS);
        bus_space_write_1(iot, ioh, VIRTIO_CONFIG1_DEVICE_STATUS, status | old);
}

void
virtio_pci_negotiate_features_10(struct virtio_softc *sc, uint64_t guest_features)
{
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;
        device_t self          =  sc->sc_dev;
        bus_space_tag_t           iot = psc->sc_iot;
        bus_space_handle_t ioh = psc->sc_ioh;
        uint64_t host, negotiated, device_status;

        guest_features |= VIRTIO_F_VERSION_1;
        /* notify on empty is 0.9 only */
        guest_features &= ~VIRTIO_F_NOTIFY_ON_EMPTY;
        sc->sc_active_features = 0;

        bus_space_write_4(iot, ioh, VIRTIO_CONFIG1_DEVICE_FEATURE_SELECT, 0);
        host = bus_space_read_4(iot, ioh, VIRTIO_CONFIG1_DEVICE_FEATURE);
        bus_space_write_4(iot, ioh, VIRTIO_CONFIG1_DEVICE_FEATURE_SELECT, 1);
        host |= (uint64_t)
                bus_space_read_4(iot, ioh, VIRTIO_CONFIG1_DEVICE_FEATURE) << 32;

        negotiated = host & guest_features;

        bus_space_write_4(iot, ioh, VIRTIO_CONFIG1_DRIVER_FEATURE_SELECT, 0);
        bus_space_write_4(iot, ioh, VIRTIO_CONFIG1_DRIVER_FEATURE,
                        negotiated & 0xffffffff);
        bus_space_write_4(iot, ioh, VIRTIO_CONFIG1_DRIVER_FEATURE_SELECT, 1);
        bus_space_write_4(iot, ioh, VIRTIO_CONFIG1_DRIVER_FEATURE,
                        negotiated >> 32);
        virtio_pci_set_status_10(sc, VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK);

        device_status = bus_space_read_1(iot, ioh, VIRTIO_CONFIG1_DEVICE_STATUS);
        if ((device_status & VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK) == 0) {
                aprint_error_dev(self, "feature negotiation failed\n");
                bus_space_write_1(iot, ioh, VIRTIO_CONFIG1_DEVICE_STATUS,
                                VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
                return;
        }

        if ((negotiated & VIRTIO_F_VERSION_1) == 0) {
                aprint_error_dev(self, "host rejected version 1\n");
                bus_space_write_1(iot, ioh, VIRTIO_CONFIG1_DEVICE_STATUS,
                                VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
                return;
        }

        sc->sc_active_features = negotiated;
        return;
}


/* -------------------------------------
 * Generic PCI interrupt code
 * -------------------------------------*/

static int
virtio_pci_setup_interrupts_10(struct virtio_softc *sc, int reinit)
{
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;
        bus_space_tag_t           iot = psc->sc_iot;
        bus_space_handle_t ioh = psc->sc_ioh;
        int vector, ret, qid;

        if (!virtio_pci_msix_enabled(psc))
                return 0;

        vector = VIRTIO_MSIX_CONFIG_VECTOR_INDEX;
        bus_space_write_2(iot, ioh,
                VIRTIO_CONFIG1_CONFIG_MSIX_VECTOR, vector);
        ret = bus_space_read_2(iot, ioh, VIRTIO_CONFIG1_CONFIG_MSIX_VECTOR);
        if (ret != vector) {
                VIRTIO_PCI_LOG(sc, reinit,
                    "can't set config msix vector\n");
                return -1;
        }

        for (qid = 0; qid < sc->sc_nvqs; qid++) {
                vector = VIRTIO_MSIX_QUEUE_VECTOR_INDEX;

                if (sc->sc_child_mq)
                        vector += qid;
                bus_space_write_2(iot, ioh, VIRTIO_CONFIG1_QUEUE_SELECT, qid);
                bus_space_write_2(iot, ioh, VIRTIO_CONFIG1_QUEUE_MSIX_VECTOR,
                        vector);
                ret = bus_space_read_2(iot, ioh,
                        VIRTIO_CONFIG1_QUEUE_MSIX_VECTOR);
                if (ret != vector) {
                        VIRTIO_PCI_LOG(sc, reinit, "can't set queue %d "
                            "msix vector\n", qid);
                        return -1;
                }
        }

        return 0;
}

static int
virtio_pci_setup_interrupts_09(struct virtio_softc *sc, int reinit)
{
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;
        int offset, vector, ret, qid;

        if (!virtio_pci_msix_enabled(psc))
                return 0;

        offset = VIRTIO_CONFIG_MSI_CONFIG_VECTOR;
        vector = VIRTIO_MSIX_CONFIG_VECTOR_INDEX;

        bus_space_write_2(psc->sc_iot, psc->sc_ioh, offset, vector);
        ret = bus_space_read_2(psc->sc_iot, psc->sc_ioh, offset);
        if (ret != vector) {
                aprint_debug_dev(sc->sc_dev, "%s: expected=%d, actual=%d\n",
                    __func__, vector, ret);
                VIRTIO_PCI_LOG(sc, reinit,
                    "can't set config msix vector\n");
                return -1;
        }

        for (qid = 0; qid < sc->sc_nvqs; qid++) {
                offset = VIRTIO_CONFIG_QUEUE_SELECT;
                bus_space_write_2(psc->sc_iot, psc->sc_ioh, offset, qid);

                offset = VIRTIO_CONFIG_MSI_QUEUE_VECTOR;
                vector = VIRTIO_MSIX_QUEUE_VECTOR_INDEX;

                if (sc->sc_child_mq)
                        vector += qid;

                bus_space_write_2(psc->sc_iot, psc->sc_ioh, offset, vector);
                ret = bus_space_read_2(psc->sc_iot, psc->sc_ioh, offset);
                if (ret != vector) {
                        aprint_debug_dev(sc->sc_dev, "%s[qid=%d]:"
                            " expected=%d, actual=%d\n",
                            __func__, qid, vector, ret);
                        VIRTIO_PCI_LOG(sc, reinit, "can't set queue %d "
                            "msix vector\n", qid);
                        return -1;
                }
        }

        return 0;
}

static int
virtio_pci_establish_msix_interrupts(struct virtio_softc *sc,
    struct pci_attach_args *pa)
{
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;
        device_t self = sc->sc_dev;
        pci_chipset_tag_t pc = pa->pa_pc;
        struct virtqueue *vq;
        char intrbuf[PCI_INTRSTR_LEN];
        char intr_xname[INTRDEVNAMEBUF];
        char const *intrstr;
        int idx, qid, n;

        idx = VIRTIO_MSIX_CONFIG_VECTOR_INDEX;
        if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE)
                pci_intr_setattr(pc, &psc->sc_ihp[idx], PCI_INTR_MPSAFE, true);

        snprintf(intr_xname, sizeof(intr_xname), "%s config",
            device_xname(sc->sc_dev));

        psc->sc_ihs[idx] = pci_intr_establish_xname(pc, psc->sc_ihp[idx],
            sc->sc_ipl, virtio_pci_msix_config_intr, sc, intr_xname);
        if (psc->sc_ihs[idx] == NULL) {
                aprint_error_dev(self, "couldn't establish MSI-X for config\n");
                goto error;
        }

        idx = VIRTIO_MSIX_QUEUE_VECTOR_INDEX;
        if (sc->sc_child_mq) {
                for (qid = 0; qid < sc->sc_nvqs; qid++) {
                        n = idx + qid;
                        vq = &sc->sc_vqs[qid];

                        snprintf(intr_xname, sizeof(intr_xname), "%s vq#%d",
                            device_xname(sc->sc_dev), qid);

                        if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE) {
                                pci_intr_setattr(pc, &psc->sc_ihp[n],
                                    PCI_INTR_MPSAFE, true);
                        }

                        psc->sc_ihs[n] = pci_intr_establish_xname(pc, psc->sc_ihp[n],
                            sc->sc_ipl, vq->vq_intrhand, vq->vq_intrhand_arg, intr_xname);
                        if (psc->sc_ihs[n] == NULL) {
                                aprint_error_dev(self, "couldn't establish MSI-X for a vq\n");
                                goto error;
                        }
                }
        } else {
                if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE)
                        pci_intr_setattr(pc, &psc->sc_ihp[idx], PCI_INTR_MPSAFE, true);

                snprintf(intr_xname, sizeof(intr_xname), "%s queues",
                    device_xname(sc->sc_dev));
                psc->sc_ihs[idx] = pci_intr_establish_xname(pc, psc->sc_ihp[idx],
                    sc->sc_ipl, virtio_pci_msix_queue_intr, sc, intr_xname);
                if (psc->sc_ihs[idx] == NULL) {
                        aprint_error_dev(self, "couldn't establish MSI-X for queues\n");
                        goto error;
                }
        }

        idx = VIRTIO_MSIX_CONFIG_VECTOR_INDEX;
        intrstr = pci_intr_string(pc, psc->sc_ihp[idx], intrbuf, sizeof(intrbuf));
        aprint_normal_dev(self, "config interrupting at %s\n", intrstr);
        idx = VIRTIO_MSIX_QUEUE_VECTOR_INDEX;
        if (sc->sc_child_mq) {
                kcpuset_t *affinity;
                int affinity_to, r;

                kcpuset_create(&affinity, false);

                for (qid = 0; qid < sc->sc_nvqs; qid++) {
                        n = idx + qid;
                        affinity_to = (qid / 2) % ncpu;

                        intrstr = pci_intr_string(pc, psc->sc_ihp[n],
                            intrbuf, sizeof(intrbuf));

                        kcpuset_zero(affinity);
                        kcpuset_set(affinity, affinity_to);
                        r = interrupt_distribute(psc->sc_ihs[n], affinity, NULL);
                        if (r == 0) {
                                aprint_normal_dev(self,
                                    "for vq #%d interrupting at %s affinity to %u\n",
                                    qid, intrstr, affinity_to);
                        } else {
                                aprint_normal_dev(self,
                                    "for vq #%d interrupting at %s\n",
                                    qid, intrstr);
                        }
                }

                kcpuset_destroy(affinity);
        } else {
                intrstr = pci_intr_string(pc, psc->sc_ihp[idx], intrbuf, sizeof(intrbuf));
                aprint_normal_dev(self, "queues interrupting at %s\n", intrstr);
        }

        return 0;

error:
        idx = VIRTIO_MSIX_CONFIG_VECTOR_INDEX;
        if (psc->sc_ihs[idx] != NULL)
                pci_intr_disestablish(psc->sc_pa.pa_pc, psc->sc_ihs[idx]);
        idx = VIRTIO_MSIX_QUEUE_VECTOR_INDEX;
        if (sc->sc_child_mq) {
                for (qid = 0; qid < sc->sc_nvqs; qid++) {
                        n = idx + qid;
                        if (psc->sc_ihs[n] == NULL)
                                continue;
                        pci_intr_disestablish(psc->sc_pa.pa_pc, psc->sc_ihs[n]);
                }

        } else {
                if (psc->sc_ihs[idx] != NULL)
                        pci_intr_disestablish(psc->sc_pa.pa_pc, psc->sc_ihs[idx]);
        }

        return -1;
}

static int
virtio_pci_establish_intx_interrupt(struct virtio_softc *sc,
    struct pci_attach_args *pa)
{
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;
        device_t self = sc->sc_dev;
        pci_chipset_tag_t pc = pa->pa_pc;
        char intrbuf[PCI_INTRSTR_LEN];
        char const *intrstr;

        if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE)
                pci_intr_setattr(pc, &psc->sc_ihp[0], PCI_INTR_MPSAFE, true);

        psc->sc_ihs[0] = pci_intr_establish_xname(pc, psc->sc_ihp[0],
            sc->sc_ipl, virtio_pci_intr, sc, device_xname(sc->sc_dev));
        if (psc->sc_ihs[0] == NULL) {
                aprint_error_dev(self, "couldn't establish INTx\n");
                return -1;
        }

        intrstr = pci_intr_string(pc, psc->sc_ihp[0], intrbuf, sizeof(intrbuf));
        aprint_normal_dev(self, "interrupting at %s\n", intrstr);

        return 0;
}

static int
virtio_pci_alloc_interrupts(struct virtio_softc *sc)
{
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;
        device_t self = sc->sc_dev;
        pci_chipset_tag_t pc = psc->sc_pa.pa_pc;
        pcitag_t tag = psc->sc_pa.pa_tag;
        int error;
        int nmsix;
        int off;
        int counts[PCI_INTR_TYPE_SIZE];
        pci_intr_type_t max_type;
        pcireg_t ctl;

        nmsix = pci_msix_count(psc->sc_pa.pa_pc, psc->sc_pa.pa_tag);
        aprint_debug_dev(self, "pci_msix_count=%d\n", nmsix);

        /* We need at least two: one for config and the other for queues */
        if ((sc->sc_flags & VIRTIO_F_INTR_MSIX) == 0 || nmsix < 2) {
                /* Try INTx only */
                max_type = PCI_INTR_TYPE_INTX;
                counts[PCI_INTR_TYPE_INTX] = 1;
        } else {
                /* Try MSI-X first and INTx second */
                if (sc->sc_nvqs + VIRTIO_MSIX_QUEUE_VECTOR_INDEX <= nmsix) {
                        nmsix = sc->sc_nvqs + VIRTIO_MSIX_QUEUE_VECTOR_INDEX;
                } else {
                        sc->sc_child_mq = false;
                }

                if (sc->sc_child_mq == false) {
                        nmsix = 2;
                }

                max_type = PCI_INTR_TYPE_MSIX;
                counts[PCI_INTR_TYPE_MSIX] = nmsix;
                counts[PCI_INTR_TYPE_MSI] = 0;
                counts[PCI_INTR_TYPE_INTX] = 1;
        }

retry:
        error = pci_intr_alloc(&psc->sc_pa, &psc->sc_ihp, counts, max_type);
        if (error != 0) {
                aprint_error_dev(self, "couldn't map interrupt\n");
                return -1;
        }

        if (pci_intr_type(pc, psc->sc_ihp[0]) == PCI_INTR_TYPE_MSIX) {
                psc->sc_ihs = kmem_zalloc(sizeof(*psc->sc_ihs) * nmsix,
                    KM_SLEEP);

                error = virtio_pci_establish_msix_interrupts(sc, &psc->sc_pa);
                if (error != 0) {
                        kmem_free(psc->sc_ihs, sizeof(*psc->sc_ihs) * nmsix);
                        pci_intr_release(pc, psc->sc_ihp, nmsix);

                        /* Retry INTx */
                        max_type = PCI_INTR_TYPE_INTX;
                        counts[PCI_INTR_TYPE_INTX] = 1;
                        goto retry;
                }

                psc->sc_ihs_num = nmsix;
                psc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_MSI;
                virtio_pci_adjust_config_region(psc);
        } else if (pci_intr_type(pc, psc->sc_ihp[0]) == PCI_INTR_TYPE_INTX) {
                psc->sc_ihs = kmem_zalloc(sizeof(*psc->sc_ihs) * 1,
                    KM_SLEEP);

                error = virtio_pci_establish_intx_interrupt(sc, &psc->sc_pa);
                if (error != 0) {
                        kmem_free(psc->sc_ihs, sizeof(*psc->sc_ihs) * 1);
                        pci_intr_release(pc, psc->sc_ihp, 1);
                        return -1;
                }

                psc->sc_ihs_num = 1;
                psc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
                virtio_pci_adjust_config_region(psc);

                error = pci_get_capability(pc, tag, PCI_CAP_MSIX, &off, NULL);
                if (error != 0) {
                        ctl = pci_conf_read(pc, tag, off + PCI_MSIX_CTL);
                        ctl &= ~PCI_MSIX_CTL_ENABLE;
                        pci_conf_write(pc, tag, off + PCI_MSIX_CTL, ctl);
                }
        }

        return 0;
}

static void
virtio_pci_free_interrupts(struct virtio_softc *sc)
{
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;

        for (int i = 0; i < psc->sc_ihs_num; i++) {
                if (psc->sc_ihs[i] == NULL)
                        continue;
                pci_intr_disestablish(psc->sc_pa.pa_pc, psc->sc_ihs[i]);
                psc->sc_ihs[i] = NULL;
        }

        if (psc->sc_ihs_num > 0)
                pci_intr_release(psc->sc_pa.pa_pc, psc->sc_ihp, psc->sc_ihs_num);

        if (psc->sc_ihs != NULL) {
                kmem_free(psc->sc_ihs, sizeof(*psc->sc_ihs) * psc->sc_ihs_num);
                psc->sc_ihs = NULL;
        }
        psc->sc_ihs_num = 0;
}

static bool
virtio_pci_msix_enabled(struct virtio_pci_softc *psc)
{
        pci_chipset_tag_t pc = psc->sc_pa.pa_pc;

        if (pci_intr_type(pc, psc->sc_ihp[0]) == PCI_INTR_TYPE_MSIX)
                return true;

        return false;
}

/*
 * Interrupt handler.
 */
static int
virtio_pci_intr(void *arg)
{
        struct virtio_softc *sc = arg;
        struct virtio_pci_softc * const psc = (struct virtio_pci_softc *)sc;
        int isr, r = 0;

        /* check and ack the interrupt */
        isr = bus_space_read_1(psc->sc_isr_iot, psc->sc_isr_ioh, 0);
        if (isr == 0)
                return 0;
        if ((isr & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) &&
            (sc->sc_config_change != NULL))
                r = (sc->sc_config_change)(sc);
        if (sc->sc_intrhand != NULL) {
                if (sc->sc_soft_ih != NULL)
                        softint_schedule(sc->sc_soft_ih);
                else
                        r |= (sc->sc_intrhand)(sc);
        }

        return r;
}

static int
virtio_pci_msix_queue_intr(void *arg)
{
        struct virtio_softc *sc = arg;
        int r = 0;

        if (sc->sc_intrhand != NULL) {
                if (sc->sc_soft_ih != NULL)
                        softint_schedule(sc->sc_soft_ih);
                else
                        r |= (sc->sc_intrhand)(sc);
        }

        return r;
}

static int
virtio_pci_msix_config_intr(void *arg)
{
        struct virtio_softc *sc = arg;
        int r = 0;

        if (sc->sc_config_change != NULL)
                r = (sc->sc_config_change)(sc);
        return r;
}

MODULE(MODULE_CLASS_DRIVER, virtio_pci, "pci,virtio");

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
virtio_pci_modcmd(modcmd_t cmd, void *opaque)
{
        int error = 0;

#ifdef _MODULE
        switch (cmd) {
        case MODULE_CMD_INIT:
                error = config_init_component(cfdriver_ioconf_virtio_pci,
                    cfattach_ioconf_virtio_pci, cfdata_ioconf_virtio_pci);
                break;
        case MODULE_CMD_FINI:
                error = config_fini_component(cfdriver_ioconf_virtio_pci,
                    cfattach_ioconf_virtio_pci, cfdata_ioconf_virtio_pci);
                break;
        default:
                error = ENOTTY;
                break;
        }
#endif

        return error;
}


























































































































    3 








    3 
    3 








    1 

















    4 
    4 


    1 





   69 

    1 

   70 
















    1 







    1 

    1 








    1 








    1 










    1 




    1 





    1 










    1 















    1 
    1 

    1 

    1 
    1 


    1 



    1 




    1 


    1 






















































































































   69 







   68 

   69 









   69 

   68 

   69 
























    3 






    3 




    4 


    4 












    4 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
/*        $NetBSD: pfil.c,v 1.42 2022/08/16 04:35:57 knakahara Exp $        */

/*
 * Copyright (c) 2013 Mindaugas Rasiukevicius <rmind at NetBSD org>
 * Copyright (c) 1996 Matthew R. Green
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: pfil.c,v 1.42 2022/08/16 04:35:57 knakahara Exp $");

#if defined(_KERNEL_OPT)
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/queue.h>
#include <sys/kmem.h>
#include <sys/psref.h>
#include <sys/cpu.h>

#include <net/if.h>
#include <net/pfil.h>

#define        MAX_HOOKS        8

/* Func is either pfil_func_t or pfil_ifunc_t. */
typedef void                (*pfil_polyfunc_t)(void);

typedef struct {
        pfil_polyfunc_t pfil_func;
        void *                pfil_arg;
} pfil_hook_t;

typedef struct {
        pfil_hook_t        hooks[MAX_HOOKS];
        u_int                nhooks;
        struct psref_target psref;
} pfil_list_t;

typedef struct {
        pfil_list_t        *active;        /* lists[0] or lists[1] */
        pfil_list_t        lists[2];
} pfil_listset_t;

CTASSERT(PFIL_IN == 1);
CTASSERT(PFIL_OUT == 2);

struct pfil_head {
        pfil_listset_t        ph_in;
        pfil_listset_t        ph_out;
        pfil_listset_t        ph_ifaddr;
        pfil_listset_t        ph_ifevent;
        int                ph_type;
        void *                ph_key;
        LIST_ENTRY(pfil_head) ph_list;
};

static const int pfil_flag_cases[] = {
        PFIL_IN, PFIL_OUT
};

static LIST_HEAD(, pfil_head) pfil_head_list __read_mostly =
    LIST_HEAD_INITIALIZER(&pfil_head_list);

static kmutex_t pfil_mtx __cacheline_aligned;
static struct psref_class *pfil_psref_class __read_mostly;
#ifdef NET_MPSAFE
static pserialize_t pfil_psz;
#endif

void
pfil_init(void)
{
        mutex_init(&pfil_mtx, MUTEX_DEFAULT, IPL_NONE);
#ifdef NET_MPSAFE
        pfil_psz = pserialize_create();
#endif
        pfil_psref_class = psref_class_create("pfil", IPL_SOFTNET);
}

static inline void
pfil_listset_init(pfil_listset_t *pflistset)
{
        pflistset->active = &pflistset->lists[0];
        psref_target_init(&pflistset->active->psref, pfil_psref_class);
}

/*
 * pfil_head_create: create and register a packet filter head.
 */
pfil_head_t *
pfil_head_create(int type, void *key)
{
        pfil_head_t *ph;

        if (pfil_head_get(type, key)) {
                return NULL;
        }
        ph = kmem_zalloc(sizeof(pfil_head_t), KM_SLEEP);
        ph->ph_type = type;
        ph->ph_key = key;

        pfil_listset_init(&ph->ph_in);
        pfil_listset_init(&ph->ph_out);
        pfil_listset_init(&ph->ph_ifaddr);
        pfil_listset_init(&ph->ph_ifevent);

        LIST_INSERT_HEAD(&pfil_head_list, ph, ph_list);
        return ph;
}

/*
 * pfil_head_destroy: remove and destroy a packet filter head.
 */
void
pfil_head_destroy(pfil_head_t *pfh)
{
        LIST_REMOVE(pfh, ph_list);

        psref_target_destroy(&pfh->ph_in.active->psref, pfil_psref_class);
        psref_target_destroy(&pfh->ph_out.active->psref, pfil_psref_class);
        psref_target_destroy(&pfh->ph_ifaddr.active->psref, pfil_psref_class);
        psref_target_destroy(&pfh->ph_ifevent.active->psref, pfil_psref_class);

        kmem_free(pfh, sizeof(pfil_head_t));
}

/*
 * pfil_head_get: returns the packer filter head for a given key.
 */
pfil_head_t *
pfil_head_get(int type, void *key)
{
        pfil_head_t *ph;

        LIST_FOREACH(ph, &pfil_head_list, ph_list) {
                if (ph->ph_type == type && ph->ph_key == key)
                        break;
        }
        return ph;
}

static pfil_listset_t *
pfil_hook_get(int dir, pfil_head_t *ph)
{
        switch (dir) {
        case PFIL_IN:
                return &ph->ph_in;
        case PFIL_OUT:
                return &ph->ph_out;
        case PFIL_IFADDR:
                return &ph->ph_ifaddr;
        case PFIL_IFNET:
                return &ph->ph_ifevent;
        }
        return NULL;
}

static int
pfil_list_add(pfil_listset_t *phlistset, pfil_polyfunc_t func, void *arg,
              int flags)
{
        u_int nhooks;
        pfil_list_t *newlist, *oldlist;
        pfil_hook_t *pfh;

        mutex_enter(&pfil_mtx);

        /* Check if we have a free slot. */
        nhooks = phlistset->active->nhooks;
        if (nhooks == MAX_HOOKS) {
                mutex_exit(&pfil_mtx);
                return ENOSPC;
        }
        KASSERT(nhooks < MAX_HOOKS);

        if (phlistset->active == &phlistset->lists[0]) {
                oldlist = &phlistset->lists[0];
                newlist = &phlistset->lists[1];
        } else{
                oldlist = &phlistset->lists[1];
                newlist = &phlistset->lists[0];
        }

        /* Make sure the hook is not already added. */
        for (u_int i = 0; i < nhooks; i++) {
                pfh = &oldlist->hooks[i];
                if (pfh->pfil_func == func && pfh->pfil_arg == arg) {
                        mutex_exit(&pfil_mtx);
                        return EEXIST;
                }
        }

        /* create new pfil_list_t copied from old */
        memcpy(newlist, oldlist, sizeof(pfil_list_t));
        psref_target_init(&newlist->psref, pfil_psref_class);

        /*
         * Finally, add the hook.  Note: for PFIL_IN we insert the hooks in
         * reverse order of the PFIL_OUT so that the same path is followed
         * in or out of the kernel.
         */
        if (flags & PFIL_IN) {
                /* XXX: May want to revisit this later; */
                size_t len = sizeof(pfil_hook_t) * nhooks;
                pfh = &newlist->hooks[0];
                memmove(&newlist->hooks[1], pfh, len);
        } else {
                pfh = &newlist->hooks[nhooks];
        }
        newlist->nhooks++;

        pfh->pfil_func = func;
        pfh->pfil_arg  = arg;

        /* switch from oldlist to newlist */
        atomic_store_release(&phlistset->active, newlist);
#ifdef NET_MPSAFE
        pserialize_perform(pfil_psz);
#endif
        mutex_exit(&pfil_mtx);

        /* Wait for all readers */
#ifdef NET_MPSAFE
        psref_target_destroy(&oldlist->psref, pfil_psref_class);
#endif

        return 0;
}

/*
 * pfil_add_hook: add a function (hook) to the packet filter head.
 * The possible flags are:
 *
 *        PFIL_IN                call on incoming packets
 *        PFIL_OUT        call on outgoing packets
 *        PFIL_ALL        call on all of the above
 */
int
pfil_add_hook(pfil_func_t func, void *arg, int flags, pfil_head_t *ph)
{
        int error = 0;

        KASSERT(func != NULL);
        KASSERT((flags & ~PFIL_ALL) == 0);

        ASSERT_SLEEPABLE();

        for (u_int i = 0; i < __arraycount(pfil_flag_cases); i++) {
                const int fcase = pfil_flag_cases[i];
                pfil_listset_t *phlistset;

                if ((flags & fcase) == 0) {
                        continue;
                }
                phlistset = pfil_hook_get(fcase, ph);
                error = pfil_list_add(phlistset, (pfil_polyfunc_t)func, arg,
                    flags);
                if (error && (error != EEXIST))
                        break;
        }
        if (error && (error != EEXIST)) {
                pfil_remove_hook(func, arg, flags, ph);
        }
        return error;
}

/*
 * pfil_add_ihook: add an interface-event function (hook) to the packet
 * filter head.  The possible flags are:
 *
 *        PFIL_IFADDR        call on interface reconfig (cmd is ioctl #)
 *        PFIL_IFNET        call on interface attach/detach (cmd is PFIL_IFNET_*)
 */
int
pfil_add_ihook(pfil_ifunc_t func, void *arg, int flags, pfil_head_t *ph)
{
        pfil_listset_t *phlistset;

        KASSERT(func != NULL);
        KASSERT(flags == PFIL_IFADDR || flags == PFIL_IFNET);

        ASSERT_SLEEPABLE();

        phlistset = pfil_hook_get(flags, ph);
        return pfil_list_add(phlistset, (pfil_polyfunc_t)func, arg, flags);
}

/*
 * pfil_list_remove: remove the hook from a specified list.
 */
static int
pfil_list_remove(pfil_listset_t *phlistset, pfil_polyfunc_t func, void *arg)
{
        u_int nhooks;
        pfil_list_t *oldlist, *newlist;

        mutex_enter(&pfil_mtx);

        /* create new pfil_list_t copied from old */
        if (phlistset->active == &phlistset->lists[0]) {
                oldlist = &phlistset->lists[0];
                newlist = &phlistset->lists[1];
        } else{
                oldlist = &phlistset->lists[1];
                newlist = &phlistset->lists[0];
        }
        memcpy(newlist, oldlist, sizeof(*newlist));
        psref_target_init(&newlist->psref, pfil_psref_class);

        nhooks = newlist->nhooks;
        for (u_int i = 0; i < nhooks; i++) {
                pfil_hook_t *last, *pfh = &newlist->hooks[i];

                if (pfh->pfil_func != func || pfh->pfil_arg != arg) {
                        continue;
                }
                if ((last = &newlist->hooks[nhooks - 1]) != pfh) {
                        memcpy(pfh, last, sizeof(pfil_hook_t));
                }
                newlist->nhooks--;

                /* switch from oldlist to newlist */
                atomic_store_release(&phlistset->active, newlist);
#ifdef NET_MPSAFE
                pserialize_perform(pfil_psz);
#endif
                mutex_exit(&pfil_mtx);

                /* Wait for all readers */
#ifdef NET_MPSAFE
                psref_target_destroy(&oldlist->psref, pfil_psref_class);
#endif

                return 0;
        }
        mutex_exit(&pfil_mtx);
        return ENOENT;
}

/*
 * pfil_remove_hook: remove the hook from the packet filter head.
 */
int
pfil_remove_hook(pfil_func_t func, void *arg, int flags, pfil_head_t *ph)
{
        KASSERT((flags & ~PFIL_ALL) == 0);

        ASSERT_SLEEPABLE();

        for (u_int i = 0; i < __arraycount(pfil_flag_cases); i++) {
                const int fcase = pfil_flag_cases[i];
                pfil_listset_t *pflistset;

                if ((flags & fcase) == 0) {
                        continue;
                }
                pflistset = pfil_hook_get(fcase, ph);
                (void)pfil_list_remove(pflistset, (pfil_polyfunc_t)func, arg);
        }
        return 0;
}

int
pfil_remove_ihook(pfil_ifunc_t func, void *arg, int flags, pfil_head_t *ph)
{
        pfil_listset_t *pflistset;

        KASSERT(flags == PFIL_IFADDR || flags == PFIL_IFNET);

        ASSERT_SLEEPABLE();

        pflistset = pfil_hook_get(flags, ph);
        (void)pfil_list_remove(pflistset, (pfil_polyfunc_t)func, arg);
        return 0;
}

/*
 * pfil_run_hooks: run the specified packet filter hooks.
 */
int
pfil_run_hooks(pfil_head_t *ph, struct mbuf **mp, ifnet_t *ifp, int dir)
{
        struct mbuf *m = mp ? *mp : NULL;
        pfil_listset_t *phlistset;
        pfil_list_t *phlist;
        struct psref psref;
        int s, bound;
        int ret = 0;

        KASSERT(dir == PFIL_IN || dir == PFIL_OUT);
        KASSERT(!cpu_intr_p());

        if (ph == NULL) {
                return ret;
        }

        if (__predict_false((phlistset = pfil_hook_get(dir, ph)) == NULL)) {
                return ret;
        }

        bound = curlwp_bind();
        s = pserialize_read_enter();
        phlist = atomic_load_consume(&phlistset->active);
        if (phlist->nhooks == 0) {
                pserialize_read_exit(s);
                curlwp_bindx(bound);
                return ret;
        }
        psref_acquire(&psref, &phlist->psref, pfil_psref_class);
        pserialize_read_exit(s);
        for (u_int i = 0; i < phlist->nhooks; i++) {
                pfil_hook_t *pfh = &phlist->hooks[i];
                pfil_func_t func = (pfil_func_t)pfh->pfil_func;

                ret = (*func)(pfh->pfil_arg, &m, ifp, dir);
                if (m == NULL || ret)
                        break;
        }
        psref_release(&psref, &phlist->psref, pfil_psref_class);
        curlwp_bindx(bound);

        if (mp) {
                *mp = m;
        }
        return ret;
}

static void
pfil_run_arg(pfil_listset_t *phlistset, u_long cmd, void *arg)
{
        pfil_list_t *phlist;
        struct psref psref;
        int s, bound;

        KASSERT(!cpu_intr_p());

        bound = curlwp_bind();
        s = pserialize_read_enter();
        phlist = atomic_load_consume(&phlistset->active);
        psref_acquire(&psref, &phlist->psref, pfil_psref_class);
        pserialize_read_exit(s);
        for (u_int i = 0; i < phlist->nhooks; i++) {
                pfil_hook_t *pfh = &phlist->hooks[i];
                pfil_ifunc_t func = (pfil_ifunc_t)pfh->pfil_func;
                (*func)(pfh->pfil_arg, cmd, arg);
        }
        psref_release(&psref, &phlist->psref, pfil_psref_class);
        curlwp_bindx(bound);
}

void
pfil_run_addrhooks(pfil_head_t *ph, u_long cmd, struct ifaddr *ifa)
{
        pfil_run_arg(&ph->ph_ifaddr, cmd, ifa);
}

void
pfil_run_ifhooks(pfil_head_t *ph, u_long cmd, struct ifnet *ifp)
{
        pfil_run_arg(&ph->ph_ifevent, cmd, ifp);
}



































































































  262 





  627 








  627 
  487 


   39 
   39 



  489 


  254 


  488 
  344 


  177 


  450 


  438 





  438 


  587 


























































  177 






  177 
    7 


  178 





















  343 






  344 
  253 


  344 






























































































































































































































































































































































































































    6 











































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
/*        $NetBSD: subr_copy.c,v 1.16 2022/04/09 23:51:09 riastradh Exp $        */

/*-
 * Copyright (c) 1997, 1998, 1999, 2002, 2007, 2008, 2019
 *        The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Copyright (c) 1992, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This software was developed by the Computer Systems Engineering group
 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
 * contributed to Berkeley.
 *
 * All advertising materials mentioning features or use of this software
 * must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Lawrence Berkeley Laboratory.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_subr.c        8.4 (Berkeley) 2/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_copy.c,v 1.16 2022/04/09 23:51:09 riastradh Exp $");

#define        __UFETCHSTORE_PRIVATE
#define        __UCAS_PRIVATE

#include <sys/param.h>
#include <sys/fcntl.h>
#include <sys/proc.h>
#include <sys/systm.h>

#include <uvm/uvm_extern.h>

void
uio_setup_sysspace(struct uio *uio)
{

        uio->uio_vmspace = vmspace_kernel();
}

int
uiomove(void *buf, size_t n, struct uio *uio)
{
        struct vmspace *vm = uio->uio_vmspace;
        struct iovec *iov;
        size_t cnt;
        int error = 0;
        char *cp = buf;

        ASSERT_SLEEPABLE();

        KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE);
        while (n > 0 && uio->uio_resid) {
                iov = uio->uio_iov;
                cnt = iov->iov_len;
                if (cnt == 0) {
                        KASSERT(uio->uio_iovcnt > 0);
                        uio->uio_iov++;
                        uio->uio_iovcnt--;
                        continue;
                }
                if (cnt > n)
                        cnt = n;
                if (!VMSPACE_IS_KERNEL_P(vm)) {
                        preempt_point();
                }

                if (uio->uio_rw == UIO_READ) {
                        error = copyout_vmspace(vm, cp, iov->iov_base,
                            cnt);
                } else {
                        error = copyin_vmspace(vm, iov->iov_base, cp,
                            cnt);
                }
                if (error) {
                        break;
                }
                iov->iov_base = (char *)iov->iov_base + cnt;
                iov->iov_len -= cnt;
                uio->uio_resid -= cnt;
                uio->uio_offset += cnt;
                cp += cnt;
                KDASSERT(cnt <= n);
                n -= cnt;
        }

        return (error);
}

/*
 * Wrapper for uiomove() that validates the arguments against a known-good
 * kernel buffer.
 */
int
uiomove_frombuf(void *buf, size_t buflen, struct uio *uio)
{
        size_t offset;

        if (uio->uio_offset < 0 || /* uio->uio_resid < 0 || */
            (offset = uio->uio_offset) != uio->uio_offset)
                return (EINVAL);
        if (offset >= buflen)
                return (0);
        return (uiomove((char *)buf + offset, buflen - offset, uio));
}

/*
 * Give next character to user as result of read.
 */
int
ureadc(int c, struct uio *uio)
{
        struct iovec *iov;

        if (uio->uio_resid <= 0)
                panic("ureadc: non-positive resid");
again:
        if (uio->uio_iovcnt <= 0)
                panic("ureadc: non-positive iovcnt");
        iov = uio->uio_iov;
        if (iov->iov_len <= 0) {
                uio->uio_iovcnt--;
                uio->uio_iov++;
                goto again;
        }
        if (!VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) {
                int error;
                if ((error = ustore_char(iov->iov_base, c)) != 0)
                        return (error);
        } else {
                *(char *)iov->iov_base = c;
        }
        iov->iov_base = (char *)iov->iov_base + 1;
        iov->iov_len--;
        uio->uio_resid--;
        uio->uio_offset++;
        return (0);
}

/*
 * Like copyin(), but operates on an arbitrary vmspace.
 */
int
copyin_vmspace(struct vmspace *vm, const void *uaddr, void *kaddr, size_t len)
{
        struct iovec iov;
        struct uio uio;
        int error;

        if (len == 0)
                return (0);

        if (VMSPACE_IS_KERNEL_P(vm)) {
                return kcopy(uaddr, kaddr, len);
        }
        if (__predict_true(vm == curproc->p_vmspace)) {
                return copyin(uaddr, kaddr, len);
        }

        iov.iov_base = kaddr;
        iov.iov_len = len;
        uio.uio_iov = &iov;
        uio.uio_iovcnt = 1;
        uio.uio_offset = (off_t)(uintptr_t)uaddr;
        uio.uio_resid = len;
        uio.uio_rw = UIO_READ;
        UIO_SETUP_SYSSPACE(&uio);
        error = uvm_io(&vm->vm_map, &uio, 0);

        return (error);
}

/*
 * Like copyout(), but operates on an arbitrary vmspace.
 */
int
copyout_vmspace(struct vmspace *vm, const void *kaddr, void *uaddr, size_t len)
{
        struct iovec iov;
        struct uio uio;
        int error;

        if (len == 0)
                return (0);

        if (VMSPACE_IS_KERNEL_P(vm)) {
                return kcopy(kaddr, uaddr, len);
        }
        if (__predict_true(vm == curproc->p_vmspace)) {
                return copyout(kaddr, uaddr, len);
        }

        iov.iov_base = __UNCONST(kaddr); /* XXXUNCONST cast away const */
        iov.iov_len = len;
        uio.uio_iov = &iov;
        uio.uio_iovcnt = 1;
        uio.uio_offset = (off_t)(uintptr_t)uaddr;
        uio.uio_resid = len;
        uio.uio_rw = UIO_WRITE;
        UIO_SETUP_SYSSPACE(&uio);
        error = uvm_io(&vm->vm_map, &uio, 0);

        return (error);
}

/*
 * Like copyin(), but operates on an arbitrary process.
 */
int
copyin_proc(struct proc *p, const void *uaddr, void *kaddr, size_t len)
{
        struct vmspace *vm;
        int error;

        error = proc_vmspace_getref(p, &vm);
        if (error) {
                return error;
        }
        error = copyin_vmspace(vm, uaddr, kaddr, len);
        uvmspace_free(vm);

        return error;
}

/*
 * Like copyout(), but operates on an arbitrary process.
 */
int
copyout_proc(struct proc *p, const void *kaddr, void *uaddr, size_t len)
{
        struct vmspace *vm;
        int error;

        error = proc_vmspace_getref(p, &vm);
        if (error) {
                return error;
        }
        error = copyout_vmspace(vm, kaddr, uaddr, len);
        uvmspace_free(vm);

        return error;
}

/*
 * Like copyin(), but operates on an arbitrary pid.
 */
int
copyin_pid(pid_t pid, const void *uaddr, void *kaddr, size_t len)
{
        struct proc *p;
        struct vmspace *vm;
        int error;

        mutex_enter(&proc_lock);
        p = proc_find(pid);
        if (p == NULL) {
                mutex_exit(&proc_lock);
                return ESRCH;
        }
        mutex_enter(p->p_lock);
        error = proc_vmspace_getref(p, &vm);
        mutex_exit(p->p_lock);
        mutex_exit(&proc_lock);

        if (error == 0) {
                error = copyin_vmspace(vm, uaddr, kaddr, len);
                uvmspace_free(vm);
        }
        return error;
}

/*
 * Like copyin(), except it operates on kernel addresses when the FKIOCTL
 * flag is passed in `ioctlflags' from the ioctl call.
 */
int
ioctl_copyin(int ioctlflags, const void *src, void *dst, size_t len)
{
        if (ioctlflags & FKIOCTL)
                return kcopy(src, dst, len);
        return copyin(src, dst, len);
}

/*
 * Like copyout(), except it operates on kernel addresses when the FKIOCTL
 * flag is passed in `ioctlflags' from the ioctl call.
 */
int
ioctl_copyout(int ioctlflags, const void *src, void *dst, size_t len)
{
        if (ioctlflags & FKIOCTL)
                return kcopy(src, dst, len);
        return copyout(src, dst, len);
}

/*
 * User-space CAS / fetch / store
 */

#ifdef __NO_STRICT_ALIGNMENT
#define        CHECK_ALIGNMENT(x)        __nothing
#else /* ! __NO_STRICT_ALIGNMENT */
static bool
ufetchstore_aligned(uintptr_t uaddr, size_t size)
{
        return (uaddr & (size - 1)) == 0;
}

#define        CHECK_ALIGNMENT()                                                \
do {                                                                        \
        if (!ufetchstore_aligned((uintptr_t)uaddr, sizeof(*uaddr)))        \
                return EFAULT;                                                \
} while (/*CONSTCOND*/0)
#endif /* __NO_STRICT_ALIGNMENT */

/*
 * __HAVE_UCAS_FULL platforms provide _ucas_32() and _ucas_64() themselves.
 * _RUMPKERNEL also provides it's own _ucas_32() and _ucas_64().
 *
 * In all other cases, we provide generic implementations that work on
 * all platforms.
 */

#if !defined(__HAVE_UCAS_FULL) && !defined(_RUMPKERNEL)
#if !defined(__HAVE_UCAS_MP) && defined(MULTIPROCESSOR)
#include <sys/atomic.h>
#include <sys/cpu.h>
#include <sys/once.h>
#include <sys/mutex.h>
#include <sys/ipi.h>

static int ucas_critical_splcookie;
static volatile u_int ucas_critical_pausing_cpus;
static u_int ucas_critical_ipi;
static ONCE_DECL(ucas_critical_init_once)

static void
ucas_critical_cpu_gate(void *arg __unused)
{
        int count = SPINLOCK_BACKOFF_MIN;

        KASSERT(atomic_load_relaxed(&ucas_critical_pausing_cpus) > 0);

        /*
         * Notify ucas_critical_wait that we have stopped.  Using
         * store-release ensures all our memory operations up to the
         * IPI happen before the ucas -- no buffered stores on our end
         * can clobber it later on, for instance.
         *
         * Matches atomic_load_acquire in ucas_critical_wait -- turns
         * the following atomic_dec_uint into a store-release.
         */
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_release();
#endif
        atomic_dec_uint(&ucas_critical_pausing_cpus);

        /*
         * Wait for ucas_critical_exit to reopen the gate and let us
         * proceed.  Using a load-acquire ensures the ucas happens
         * before any of our memory operations when we return from the
         * IPI and proceed -- we won't observe any stale cached value
         * that the ucas overwrote, for instance.
         *
         * Matches atomic_store_release in ucas_critical_exit.
         */
        while (atomic_load_acquire(&ucas_critical_pausing_cpus) != (u_int)-1) {
                SPINLOCK_BACKOFF(count);
        }
}

static int
ucas_critical_init(void)
{

        ucas_critical_ipi = ipi_register(ucas_critical_cpu_gate, NULL);
        return 0;
}

static void
ucas_critical_wait(void)
{
        int count = SPINLOCK_BACKOFF_MIN;

        /*
         * Wait for all CPUs to stop at the gate.  Using a load-acquire
         * ensures all memory operations before they stop at the gate
         * happen before the ucas -- no buffered stores in other CPUs
         * can clobber it later on, for instance.
         *
         * Matches membar_release/atomic_dec_uint (store-release) in
         * ucas_critical_cpu_gate.
         */
        while (atomic_load_acquire(&ucas_critical_pausing_cpus) > 0) {
                SPINLOCK_BACKOFF(count);
        }
}
#endif /* ! __HAVE_UCAS_MP && MULTIPROCESSOR */

static inline void
ucas_critical_enter(lwp_t * const l)
{

#if !defined(__HAVE_UCAS_MP) && defined(MULTIPROCESSOR)
        if (ncpu > 1) {
                RUN_ONCE(&ucas_critical_init_once, ucas_critical_init);

                /*
                 * Acquire the mutex first, then go to splhigh() and
                 * broadcast the IPI to lock all of the other CPUs
                 * behind the gate.
                 *
                 * N.B. Going to splhigh() implicitly disables preemption,
                 * so there's no need to do it explicitly.
                 */
                mutex_enter(&cpu_lock);
                ucas_critical_splcookie = splhigh();
                ucas_critical_pausing_cpus = ncpu - 1;
                ipi_trigger_broadcast(ucas_critical_ipi, true);
                ucas_critical_wait();
                return;
        }
#endif /* ! __HAVE_UCAS_MP && MULTIPROCESSOR */

        KPREEMPT_DISABLE(l);
}

static inline void
ucas_critical_exit(lwp_t * const l)
{

#if !defined(__HAVE_UCAS_MP) && defined(MULTIPROCESSOR)
        if (ncpu > 1) {
                /*
                 * Open the gate and notify all CPUs in
                 * ucas_critical_cpu_gate that they can now proceed.
                 * Using a store-release ensures the ucas happens
                 * before any memory operations they issue after the
                 * IPI -- they won't observe any stale cache of the
                 * target word, for instance.
                 *
                 * Matches atomic_load_acquire in ucas_critical_cpu_gate.
                 */
                atomic_store_release(&ucas_critical_pausing_cpus, (u_int)-1);
                splx(ucas_critical_splcookie);
                mutex_exit(&cpu_lock);
                return;
        }
#endif /* ! __HAVE_UCAS_MP && MULTIPROCESSOR */

        KPREEMPT_ENABLE(l);
}

int
_ucas_32(volatile uint32_t *uaddr, uint32_t old, uint32_t new, uint32_t *ret)
{
        lwp_t * const l = curlwp;
        uint32_t *uva = ((void *)(uintptr_t)uaddr);
        int error;

        /*
         * Wire the user address down to avoid taking a page fault during
         * the critical section.
         */
        error = uvm_vslock(l->l_proc->p_vmspace, uva, sizeof(*uaddr),
                           VM_PROT_READ | VM_PROT_WRITE);
        if (error)
                return error;

        ucas_critical_enter(l);
        error = _ufetch_32(uva, ret);
        if (error == 0 && *ret == old) {
                error = _ustore_32(uva, new);
        }
        ucas_critical_exit(l);

        uvm_vsunlock(l->l_proc->p_vmspace, uva, sizeof(*uaddr));

        return error;
}

#ifdef _LP64
int
_ucas_64(volatile uint64_t *uaddr, uint64_t old, uint64_t new, uint64_t *ret)
{
        lwp_t * const l = curlwp;
        uint64_t *uva = ((void *)(uintptr_t)uaddr);
        int error;

        /*
         * Wire the user address down to avoid taking a page fault during
         * the critical section.
         */
        error = uvm_vslock(l->l_proc->p_vmspace, uva, sizeof(*uaddr),
                           VM_PROT_READ | VM_PROT_WRITE);
        if (error)
                return error;

        ucas_critical_enter(l);
        error = _ufetch_64(uva, ret);
        if (error == 0 && *ret == old) {
                error = _ustore_64(uva, new);
        }
        ucas_critical_exit(l);

        uvm_vsunlock(l->l_proc->p_vmspace, uva, sizeof(*uaddr));

        return error;
}
#endif /* _LP64 */
#endif /* ! __HAVE_UCAS_FULL && ! _RUMPKERNEL */

int
ucas_32(volatile uint32_t *uaddr, uint32_t old, uint32_t new, uint32_t *ret)
{

        ASSERT_SLEEPABLE();
        CHECK_ALIGNMENT();
#if (defined(__HAVE_UCAS_MP) && defined(MULTIPROCESSOR)) && \
    !defined(_RUMPKERNEL)
        if (ncpu > 1) {
                return _ucas_32_mp(uaddr, old, new, ret);
        }
#endif /* __HAVE_UCAS_MP && MULTIPROCESSOR */
        return _ucas_32(uaddr, old, new, ret);
}

#ifdef _LP64
int
ucas_64(volatile uint64_t *uaddr, uint64_t old, uint64_t new, uint64_t *ret)
{

        ASSERT_SLEEPABLE();
        CHECK_ALIGNMENT();
#if (defined(__HAVE_UCAS_MP) && defined(MULTIPROCESSOR)) && \
    !defined(_RUMPKERNEL)
        if (ncpu > 1) {
                return _ucas_64_mp(uaddr, old, new, ret);
        }
#endif /* __HAVE_UCAS_MP && MULTIPROCESSOR */
        return _ucas_64(uaddr, old, new, ret);
}
#endif /* _LP64 */

__strong_alias(ucas_int,ucas_32);
#ifdef _LP64
__strong_alias(ucas_ptr,ucas_64);
#else
__strong_alias(ucas_ptr,ucas_32);
#endif /* _LP64 */

int
ufetch_8(const uint8_t *uaddr, uint8_t *valp)
{

        ASSERT_SLEEPABLE();
        CHECK_ALIGNMENT();
        return _ufetch_8(uaddr, valp);
}

int
ufetch_16(const uint16_t *uaddr, uint16_t *valp)
{

        ASSERT_SLEEPABLE();
        CHECK_ALIGNMENT();
        return _ufetch_16(uaddr, valp);
}

int
ufetch_32(const uint32_t *uaddr, uint32_t *valp)
{

        ASSERT_SLEEPABLE();
        CHECK_ALIGNMENT();
        return _ufetch_32(uaddr, valp);
}

#ifdef _LP64
int
ufetch_64(const uint64_t *uaddr, uint64_t *valp)
{

        ASSERT_SLEEPABLE();
        CHECK_ALIGNMENT();
        return _ufetch_64(uaddr, valp);
}
#endif /* _LP64 */

__strong_alias(ufetch_char,ufetch_8);
__strong_alias(ufetch_short,ufetch_16);
__strong_alias(ufetch_int,ufetch_32);
#ifdef _LP64
__strong_alias(ufetch_long,ufetch_64);
__strong_alias(ufetch_ptr,ufetch_64);
#else
__strong_alias(ufetch_long,ufetch_32);
__strong_alias(ufetch_ptr,ufetch_32);
#endif /* _LP64 */

int
ustore_8(uint8_t *uaddr, uint8_t val)
{

        ASSERT_SLEEPABLE();
        CHECK_ALIGNMENT();
        return _ustore_8(uaddr, val);
}

int
ustore_16(uint16_t *uaddr, uint16_t val)
{

        ASSERT_SLEEPABLE();
        CHECK_ALIGNMENT();
        return _ustore_16(uaddr, val);
}

int
ustore_32(uint32_t *uaddr, uint32_t val)
{

        ASSERT_SLEEPABLE();
        CHECK_ALIGNMENT();
        return _ustore_32(uaddr, val);
}

#ifdef _LP64
int
ustore_64(uint64_t *uaddr, uint64_t val)
{

        ASSERT_SLEEPABLE();
        CHECK_ALIGNMENT();
        return _ustore_64(uaddr, val);
}
#endif /* _LP64 */

__strong_alias(ustore_char,ustore_8);
__strong_alias(ustore_short,ustore_16);
__strong_alias(ustore_int,ustore_32);
#ifdef _LP64
__strong_alias(ustore_long,ustore_64);
__strong_alias(ustore_ptr,ustore_64);
#else
__strong_alias(ustore_long,ustore_32);
__strong_alias(ustore_ptr,ustore_32);
#endif /* _LP64 */


































































































    2 

    4 



















    2 


    2 
    2 

    4 




























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
/*        $NetBSD: exec_aout.c,v 1.41 2019/11/20 19:37:53 pgoyette Exp $        */

/*
 * Copyright (c) 1993, 1994 Christopher G. Demetriou
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Christopher G. Demetriou.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: exec_aout.c,v 1.41 2019/11/20 19:37:53 pgoyette Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/exec.h>
#include <sys/exec_aout.h>
#include <sys/resourcevar.h>
#include <sys/module.h>

#include <uvm/uvm_extern.h>

MODULE(MODULE_CLASS_EXEC, exec_aout, NULL);

static struct execsw exec_aout_execsw = {
        .es_hdrsz = sizeof(struct exec),
        .es_makecmds = exec_aout_makecmds,
        .u = {
                .elf_probe_func = NULL,
        },
        .es_emul = &emul_netbsd,
        .es_prio = EXECSW_PRIO_ANY,
        .es_arglen = 0,
        .es_copyargs = copyargs,
        .es_setregs = NULL,
        .es_coredump = coredump_netbsd,
        .es_setup_stack = exec_setup_stack,
};

static int
exec_aout_modcmd(modcmd_t cmd, void *arg)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                return exec_add(&exec_aout_execsw, 1);

        case MODULE_CMD_FINI:
                return exec_remove(&exec_aout_execsw, 1);

        default:
                return ENOTTY;
        }
}

/*
 * exec_aout_makecmds(): Check if it's an a.out-format executable.
 *
 * Given a lwp pointer and an exec package pointer, see if the referent
 * of the epp is in a.out format.  First check 'standard' magic numbers for
 * this architecture.  If that fails, try a CPU-dependent hook.
 *
 * This function, in the former case, or the hook, in the latter, is
 * responsible for creating a set of vmcmds which can be used to build
 * the process's vm space and inserting them into the exec package.
 */

int
exec_aout_makecmds(struct lwp *l, struct exec_package *epp)
{
        u_long midmag, magic;
        u_short mid;
        int error;
        struct exec *execp = epp->ep_hdr;

        if (epp->ep_hdrvalid < sizeof(struct exec))
                return ENOEXEC;

        midmag = ntohl(execp->a_midmag);
        mid = (midmag >> 16) & 0x3ff;
        magic = midmag & 0xffff;

        midmag = mid << 16 | magic;

        switch (midmag) {
        case (MID_MACHINE << 16) | ZMAGIC:
                error = exec_aout_prep_zmagic(l, epp);
                break;
        case (MID_MACHINE << 16) | NMAGIC:
                error = exec_aout_prep_nmagic(l, epp);
                break;
        case (MID_MACHINE << 16) | OMAGIC:
                error = exec_aout_prep_omagic(l, epp);
                break;
        default:
                error = cpu_exec_aout_makecmds(l, epp);
        }

        if (error)
                kill_vmcmds(&epp->ep_vmcmds);
        else
                epp->ep_flags &= ~EXEC_TOPDOWN_VM;

        return error;
}

/*
 * exec_aout_prep_zmagic(): Prepare a 'native' ZMAGIC binary's exec package
 *
 * First, set of the various offsets/lengths in the exec package.
 *
 * Then, mark the text image busy (so it can be demand paged) or error
 * out if this is not possible.  Finally, set up vmcmds for the
 * text, data, bss, and stack segments.
 */

int
exec_aout_prep_zmagic(struct lwp *l, struct exec_package *epp)
{
        struct exec *execp = epp->ep_hdr;
        int error;

        epp->ep_taddr = AOUT_LDPGSZ;
        epp->ep_tsize = execp->a_text;
        epp->ep_daddr = epp->ep_taddr + execp->a_text;
        epp->ep_dsize = execp->a_data + execp->a_bss;
        epp->ep_entry = execp->a_entry;

        error = vn_marktext(epp->ep_vp);
        if (error)
                return (error);

        /* set up command for text segment */
        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, round_page(execp->a_text),
            epp->ep_taddr, epp->ep_vp, 0, VM_PROT_READ|VM_PROT_EXECUTE);

        /* set up command for data segment */
        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, round_page(execp->a_data),
            epp->ep_daddr, epp->ep_vp, execp->a_text,
            VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        /* set up command for bss segment */
        if (execp->a_bss > 0)
                NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, execp->a_bss,
                    epp->ep_daddr + execp->a_data, NULLVP, 0,
                    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        return (*epp->ep_esch->es_setup_stack)(l, epp);
}

/*
 * exec_aout_prep_nmagic(): Prepare a 'native' NMAGIC binary's exec package
 */

int
exec_aout_prep_nmagic(struct lwp *l, struct exec_package *epp)
{
        struct exec *execp = epp->ep_hdr;
        long bsize, baddr;

        epp->ep_taddr = AOUT_LDPGSZ;
        epp->ep_tsize = execp->a_text;
        epp->ep_daddr = roundup(epp->ep_taddr + execp->a_text, AOUT_LDPGSZ);
        epp->ep_dsize = execp->a_data + execp->a_bss;
        epp->ep_entry = execp->a_entry;

        /* set up command for text segment */
        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_text,
            epp->ep_taddr, epp->ep_vp, sizeof(struct exec),
            VM_PROT_READ|VM_PROT_EXECUTE);

        /* set up command for data segment */
        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_data,
            epp->ep_daddr, epp->ep_vp, execp->a_text + sizeof(struct exec),
            VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        /* set up command for bss segment */
        baddr = round_page(epp->ep_daddr + execp->a_data);
        bsize = epp->ep_daddr + epp->ep_dsize - baddr;
        if (bsize > 0)
                NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
                    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        return (*epp->ep_esch->es_setup_stack)(l, epp);
}

/*
 * exec_aout_prep_omagic(): Prepare a 'native' OMAGIC binary's exec package
 */

int
exec_aout_prep_omagic(struct lwp *l, struct exec_package *epp)
{
        struct exec *execp = epp->ep_hdr;
        long dsize, bsize, baddr;

        epp->ep_taddr = AOUT_LDPGSZ;
        epp->ep_tsize = execp->a_text;
        epp->ep_daddr = epp->ep_taddr + execp->a_text;
        epp->ep_dsize = execp->a_data + execp->a_bss;
        epp->ep_entry = execp->a_entry;

        /* set up command for text and data segments */
        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn,
            execp->a_text + execp->a_data, epp->ep_taddr, epp->ep_vp,
            sizeof(struct exec), VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        /* set up command for bss segment */
        baddr = round_page(epp->ep_daddr + execp->a_data);
        bsize = epp->ep_daddr + epp->ep_dsize - baddr;
        if (bsize > 0)
                NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
                    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        /*
         * Make sure (# of pages) mapped above equals (vm_tsize + vm_dsize);
         * obreak(2) relies on this fact. Both `vm_tsize' and `vm_dsize' are
         * computed (in execve(2)) by rounding *up* `ep_tsize' and `ep_dsize'
         * respectively to page boundaries.
         * Compensate `ep_dsize' for the amount of data covered by the last
         * text page.
         */
        dsize = epp->ep_dsize + execp->a_text - round_page(execp->a_text);
        epp->ep_dsize = (dsize > 0) ? dsize : 0;
        return (*epp->ep_esch->es_setup_stack)(l, epp);
}
























































































































































































































    3 



    3 



















   77 





   16 















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
/*        $NetBSD: signalvar.h,v 1.104 2021/11/01 05:07:17 thorpej Exp $        */

/*
 * Copyright (c) 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)signalvar.h        8.6 (Berkeley) 2/19/95
 */

#ifndef        _SYS_SIGNALVAR_H_
#define        _SYS_SIGNALVAR_H_

#include <sys/siginfo.h>
#include <sys/queue.h>
#include <sys/mutex.h>
#include <sys/stdbool.h>

#ifndef _KERNEL
#include <string.h>     /* Required for memset(3) and memcpy(3) prototypes */
#endif /* _KERNEL */

/*
 * Kernel signal definitions and data structures,
 * not exported to user programs.
 */

/*
 * Queue of signals.
 */
typedef TAILQ_HEAD(ksiginfoq, ksiginfo) ksiginfoq_t;

/*
 * Process signal actions, possibly shared between processes.
 */
struct sigacts {
        struct sigact_sigdesc {
                struct sigaction sd_sigact;
                const void        *sd_tramp;
                int                sd_vers;
        } sa_sigdesc[NSIG];                /* disposition of signals */

        int                sa_refcnt;        /* reference count */
        kmutex_t        sa_mutex;        /* lock on sa_refcnt */
};

/*
 * Pending signals, per LWP and per process.
 */
typedef struct sigpend {
        ksiginfoq_t        sp_info;
        sigset_t        sp_set;
} sigpend_t;

/*
 * Process signal state.
 */
struct sigctx {
        struct _ksiginfo ps_info;        /* for core dump/debugger XXX */
        int                 ps_lwp;        /* for core dump/debugger XXX */
        bool                 ps_faked;        /* for core dump/debugger XXX */
        void                *ps_sigcode;        /* address of signal trampoline */
        sigset_t         ps_sigignore;        /* Signals being ignored. */
        sigset_t         ps_sigcatch;        /* Signals being caught by user. */
        sigset_t         ps_sigpass;        /* Signals evading the debugger. */
};

/* additional signal action values, used only temporarily/internally */
#define        SIG_CATCH        (void (*)(int))2

/*
 * get signal action for process and signal; currently only for current process
 */
#define SIGACTION(p, sig)        (p->p_sigacts->sa_sigdesc[(sig)].sd_sigact)
#define        SIGACTION_PS(ps, sig)        (ps->sa_sigdesc[(sig)].sd_sigact)

/*
 * Copy a sigaction structure without padding.
 */
static __inline void
sigaction_copy(struct sigaction *dst, const struct sigaction *src)
{
        memset(dst, 0, sizeof(*dst));
        dst->_sa_u._sa_handler = src->_sa_u._sa_handler;
        memcpy(&dst->sa_mask, &src->sa_mask, sizeof(dst->sa_mask));
        dst->sa_flags = src->sa_flags;
}

/*
 * Signal properties and actions.
 * The array below categorizes the signals and their default actions
 * according to the following properties:
 */
#define        SA_KILL                0x0001                /* terminates process by default */
#define        SA_CORE                0x0002                /* ditto and coredumps */
#define        SA_STOP                0x0004                /* suspend process */
#define        SA_TTYSTOP        0x0008                /* ditto, from tty */
#define        SA_IGNORE        0x0010                /* ignore by default */
#define        SA_CONT                0x0020                /* continue if suspended */
#define        SA_CANTMASK        0x0040                /* non-maskable, catchable */
#define        SA_NORESET        0x0080                /* not reset when caught */
#define        SA_TOLWP        0x0100                /* to LWP that generated, if local */
#define        SA_TOALL        0x0200                /* always to all LWPs */

#ifdef _KERNEL

#include <sys/systm.h>                        /* for copyin_t/copyout_t */

extern sigset_t contsigmask, stopsigmask, sigcantmask;

struct vnode;
struct coredump_iostate;

/*
 * Machine-independent functions:
 */
int        coredump_netbsd(struct lwp *, struct coredump_iostate *);
int        coredump_netbsd32(struct lwp *, struct coredump_iostate *);
int        real_coredump_netbsd(struct lwp *, struct coredump_iostate *);
void        execsigs(struct proc *);
int        issignal(struct lwp *);
void        pgsignal(struct pgrp *, int, int);
void        kpgsignal(struct pgrp *, struct ksiginfo *, void *, int);
void        postsig(int);
void        psignal(struct proc *, int);
void        kpsignal(struct proc *, struct ksiginfo *, void *);
void        child_psignal(struct proc *, int);
void        siginit(struct proc *);
void        trapsignal(struct lwp *, struct ksiginfo *);
void        sigexit(struct lwp *, int) __dead;
void        killproc(struct proc *, const char *);
void        setsigvec(struct proc *, int, struct sigaction *);
int        killpg1(struct lwp *, struct ksiginfo *, int, int);
void        proc_unstop(struct proc *p);
void        eventswitch(int, int, int);
void        eventswitchchild(struct proc *, int, int);

int        sigaction1(struct lwp *, int, const struct sigaction *,
            struct sigaction *, const void *, int);
int        sigprocmask1(struct lwp *, int, const sigset_t *, sigset_t *);
void        sigpending1(struct lwp *, sigset_t *);
void        sigsuspendsetup(struct lwp *, const sigset_t *);
void        sigsuspendteardown(struct lwp *);
int        sigsuspend1(struct lwp *, const sigset_t *);
int        sigaltstack1(struct lwp *, const stack_t *, stack_t *);
int        sigismasked(struct lwp *, int);

int        sigget(sigpend_t *, ksiginfo_t *, int, const sigset_t *);
void        sigclear(sigpend_t *, const sigset_t *, ksiginfoq_t *);
void        sigclearall(struct proc *, const sigset_t *, ksiginfoq_t *);

int        kpsignal2(struct proc *, ksiginfo_t *);

void        signal_init(void);

struct sigacts        *sigactsinit(struct proc *, int);
void        sigactsunshare(struct proc *);
void        sigactsfree(struct sigacts *);

void        kpsendsig(struct lwp *, const struct ksiginfo *, const sigset_t *);
void        sendsig_reset(struct lwp *, int);
void        sendsig(const struct ksiginfo *, const sigset_t *);

ksiginfo_t        *ksiginfo_alloc(struct proc *, ksiginfo_t *, int);
void        ksiginfo_free(ksiginfo_t *);
void        ksiginfo_queue_drain0(ksiginfoq_t *);

struct sys_____sigtimedwait50_args;
int        sigtimedwait1(struct lwp *, const struct sys_____sigtimedwait50_args *,
    register_t *, copyin_t, copyout_t, copyin_t, copyout_t);

void        signotify(struct lwp *);
int        sigispending(struct lwp *, int);

/*
 * Machine-dependent functions:
 */
void        sendsig_sigcontext(const struct ksiginfo *, const sigset_t *);
void        sendsig_siginfo(const struct ksiginfo *, const sigset_t *);

extern        struct pool ksiginfo_pool;

/*
 * firstsig:
 *
 *         Return the first signal in a signal set.
 */
static __inline int
firstsig(const sigset_t *ss)
{
        int sig;

        sig = ffs(ss->__bits[0]);
        if (sig != 0)
                return (sig);
#if NSIG > 33
        sig = ffs(ss->__bits[1]);
        if (sig != 0)
                return (sig + 32);
#endif
#if NSIG > 65
        sig = ffs(ss->__bits[2]);
        if (sig != 0)
                return (sig + 64);
#endif
#if NSIG > 97
        sig = ffs(ss->__bits[3]);
        if (sig != 0)
                return (sig + 96);
#endif
        return (0);
}

static __inline void
ksiginfo_queue_init(ksiginfoq_t *kq)
{
        TAILQ_INIT(kq);
}

static __inline void
ksiginfo_queue_drain(ksiginfoq_t *kq)
{
        if (!TAILQ_EMPTY(kq))
                ksiginfo_queue_drain0(kq);
}

#endif        /* _KERNEL */

#ifdef        _KERNEL
#ifdef        SIGPROP
const int sigprop[NSIG] = {
        0,                                        /* 0 unused */
        SA_KILL,                                /* 1 SIGHUP */
        SA_KILL,                                /* 2 SIGINT */
        SA_KILL|SA_CORE,                        /* 3 SIGQUIT */
        SA_KILL|SA_CORE|SA_NORESET|SA_TOLWP,        /* 4 SIGILL */
        SA_KILL|SA_CORE|SA_NORESET|SA_TOLWP,        /* 5 SIGTRAP */
        SA_KILL|SA_CORE,                        /* 6 SIGABRT */
        SA_KILL|SA_CORE|SA_TOLWP,                /* 7 SIGEMT */
        SA_KILL|SA_CORE|SA_TOLWP,                /* 8 SIGFPE */
        SA_KILL|SA_CANTMASK|SA_TOALL,                /* 9 SIGKILL */
        SA_KILL|SA_CORE|SA_TOLWP,                /* 10 SIGBUS */
        SA_KILL|SA_CORE|SA_TOLWP,                /* 11 SIGSEGV */
        SA_KILL|SA_CORE|SA_TOLWP,                /* 12 SIGSYS */
        SA_KILL,                                /* 13 SIGPIPE */
        SA_KILL,                                /* 14 SIGALRM */
        SA_KILL,                                /* 15 SIGTERM */
        SA_IGNORE,                                /* 16 SIGURG */
        SA_STOP|SA_CANTMASK|SA_TOALL,                /* 17 SIGSTOP */
        SA_STOP|SA_TTYSTOP|SA_TOALL,                /* 18 SIGTSTP */
        SA_IGNORE|SA_CONT|SA_TOALL,                /* 19 SIGCONT */
        SA_IGNORE,                                /* 20 SIGCHLD */
        SA_STOP|SA_TTYSTOP|SA_TOALL,                /* 21 SIGTTIN */
        SA_STOP|SA_TTYSTOP|SA_TOALL,                /* 22 SIGTTOU */
        SA_IGNORE,                                /* 23 SIGIO */
        SA_KILL,                                /* 24 SIGXCPU */
        SA_KILL,                                /* 25 SIGXFSZ */
        SA_KILL,                                /* 26 SIGVTALRM */
        SA_KILL,                                /* 27 SIGPROF */
        SA_IGNORE,                                /* 28 SIGWINCH  */
        SA_IGNORE,                                /* 29 SIGINFO */
        SA_KILL,                                /* 30 SIGUSR1 */
        SA_KILL,                                /* 31 SIGUSR2 */
        SA_IGNORE|SA_NORESET,                        /* 32 SIGPWR */
        SA_KILL,                                /* 33 SIGRTMIN + 0 */
        SA_KILL,                                /* 34 SIGRTMIN + 1 */
        SA_KILL,                                /* 35 SIGRTMIN + 2 */
        SA_KILL,                                /* 36 SIGRTMIN + 3 */
        SA_KILL,                                /* 37 SIGRTMIN + 4 */
        SA_KILL,                                /* 38 SIGRTMIN + 5 */
        SA_KILL,                                /* 39 SIGRTMIN + 6 */
        SA_KILL,                                /* 40 SIGRTMIN + 7 */
        SA_KILL,                                /* 41 SIGRTMIN + 8 */
        SA_KILL,                                /* 42 SIGRTMIN + 9 */
        SA_KILL,                                /* 43 SIGRTMIN + 10 */
        SA_KILL,                                /* 44 SIGRTMIN + 11 */
        SA_KILL,                                /* 45 SIGRTMIN + 12 */
        SA_KILL,                                /* 46 SIGRTMIN + 13 */
        SA_KILL,                                /* 47 SIGRTMIN + 14 */
        SA_KILL,                                /* 48 SIGRTMIN + 15 */
        SA_KILL,                                /* 49 SIGRTMIN + 16 */
        SA_KILL,                                /* 50 SIGRTMIN + 17 */
        SA_KILL,                                /* 51 SIGRTMIN + 18 */
        SA_KILL,                                /* 52 SIGRTMIN + 19 */
        SA_KILL,                                /* 53 SIGRTMIN + 20 */
        SA_KILL,                                /* 54 SIGRTMIN + 21 */
        SA_KILL,                                /* 55 SIGRTMIN + 22 */
        SA_KILL,                                /* 56 SIGRTMIN + 23 */
        SA_KILL,                                /* 57 SIGRTMIN + 24 */
        SA_KILL,                                /* 58 SIGRTMIN + 25 */
        SA_KILL,                                /* 59 SIGRTMIN + 26 */
        SA_KILL,                                /* 60 SIGRTMIN + 27 */
        SA_KILL,                                /* 61 SIGRTMIN + 28 */
        SA_KILL,                                /* 62 SIGRTMIN + 29 */
        SA_KILL,                                /* 63 SIGRTMIN + 30 */
};
#undef        SIGPROP
#else
extern const int sigprop[NSIG];
#endif        /* SIGPROP */
#endif        /* _KERNEL */
#endif        /* !_SYS_SIGNALVAR_H_ */








































































































































































































  440 

  441 
  441 
  441 
  441 
  441 












  441 




  441 

   53 
  441 

   24 


  439 















  441 



  440 

  440 


  441 
  441 

    1 

  441 

  440 
  441 

  251 


  441 









    1 




    1 




    1 


    1 




    1 











































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
/*        $NetBSD: subr_iostat.c,v 1.25 2019/05/22 08:47:02 hannken Exp $        */
/*        NetBSD: subr_disk.c,v 1.69 2005/05/29 22:24:15 christos Exp        */

/*-
 * Copyright (c) 1996, 1997, 1999, 2000, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ufs_disksubr.c        8.5 (Berkeley) 1/21/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_iostat.c,v 1.25 2019/05/22 08:47:02 hannken Exp $");

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/iostat.h>
#include <sys/sysctl.h>
#include <sys/rwlock.h>

/*
 * Function prototypes for sysctl nodes
 */
static int        sysctl_hw_disknames(SYSCTLFN_PROTO);
static int        sysctl_hw_iostatnames(SYSCTLFN_PROTO);
static int        sysctl_hw_iostats(SYSCTLFN_PROTO);

static int
iostati_getnames(int disk_only, char *oldp, size_t *oldlenp, const void *newp,
                u_int namelen);

/*
 * A global list of all drives attached to the system.  May grow or
 * shrink over time.
 */
struct iostatlist_head iostatlist = TAILQ_HEAD_INITIALIZER(iostatlist);
int iostat_count;                /* number of drives in global drivelist */
krwlock_t iostatlist_lock;

static void sysctl_io_stats_setup(struct sysctllog **);

/*
 * Initialise the iostat subsystem.
 */
void
iostat_init(void)
{

        rw_init(&iostatlist_lock);
        sysctl_io_stats_setup(NULL);
}

/*
 * Searches the iostatlist for the iostat corresponding to the
 * name provided.
 */
struct io_stats *
iostat_find(const char *name)
{
        struct io_stats *iostatp;

        KASSERT(name != NULL);

        rw_enter(&iostatlist_lock, RW_READER);
        TAILQ_FOREACH(iostatp, &iostatlist, io_link) {
                if (strcmp(iostatp->io_name, name) == 0) {
                        break;
                }
        }
        rw_exit(&iostatlist_lock);

        return iostatp;
}

/*
 * Allocate and initialise memory for the i/o statistics.
 */
struct io_stats *
iostat_alloc(int32_t type, void *parent, const char *name)
{
        struct io_stats *stats;

        stats = kmem_zalloc(sizeof(*stats), KM_SLEEP);
        stats->io_type = type;
        stats->io_parent = parent;
        (void)strlcpy(stats->io_name, name, sizeof(stats->io_name));

        /*
         * Set the attached timestamp.
         */
        getmicrouptime(&stats->io_attachtime);

        /*
         * Link into the drivelist.
         */
        rw_enter(&iostatlist_lock, RW_WRITER);
        TAILQ_INSERT_TAIL(&iostatlist, stats, io_link);
        iostat_count++;
        rw_exit(&iostatlist_lock);

        return stats;
}

/*
 * Remove i/o from stats collection.
 */
void
iostat_free(struct io_stats *stats)
{

        /*
         * Remove from the iostat list.
         */
        if (iostat_count == 0)
                panic("iostat_free: iostat_count == 0");
        rw_enter(&iostatlist_lock, RW_WRITER);
        TAILQ_REMOVE(&iostatlist, stats, io_link);
        iostat_count--;
        rw_exit(&iostatlist_lock);
        kmem_free(stats, sizeof(*stats));
}

/*
 * Rename i/o stats.
 */
void
iostat_rename(struct io_stats *stats, const char *name)
{

        rw_enter(&iostatlist_lock, RW_WRITER);
        (void)strlcpy(stats->io_name, name, sizeof(stats->io_name));
        rw_exit(&iostatlist_lock);
}

/*
 * multiply timeval by unsigned integer and add to result
 */
static void
timermac(struct timeval *a, uint64_t count, struct timeval *res)
{
        struct timeval part = *a;

        while (count) {
                if (count & 1)
                        timeradd(res, &part, res);
                timeradd(&part, &part, &part);
                count >>= 1;
        }
}

/*
 * Increment the iostat wait counter.
 * Accumulate wait time and timesum.
 *
 * Wait time is spent in the device bufq.
 */
void
iostat_wait(struct io_stats *stats)
{
        struct timeval dv_time, diff_time;
        int32_t count;

        KASSERT(stats->io_wait >= 0);

        getmicrouptime(&dv_time);

        timersub(&dv_time, &stats->io_waitstamp, &diff_time);
        count = stats->io_wait++;
        if (count != 0) {
                timermac(&diff_time, count, &stats->io_waitsum);
                timeradd(&stats->io_waittime, &diff_time, &stats->io_waittime);
        }
        stats->io_waitstamp = dv_time;
}

/*
 * Decrement the iostat wait counter.
 * Increment the iostat busy counter.
 * Accumulate wait and busy times and timesums.
 *
 * Busy time is spent being processed by the device.
 *
 * Old devices do not yet measure wait time, so skip
 * processing it if the counter is still zero.
 */
void
iostat_busy(struct io_stats *stats)
{
        struct timeval dv_time, diff_time;
        int32_t count;

        KASSERT(stats->io_wait >= 0); /* > 0 when iostat_wait is used */
        KASSERT(stats->io_busy >= 0);

        getmicrouptime(&dv_time);

        timersub(&dv_time, &stats->io_waitstamp, &diff_time);
        if (stats->io_wait != 0) {
                count = stats->io_wait--;
                timermac(&diff_time, count, &stats->io_waitsum);
                timeradd(&stats->io_waittime, &diff_time, &stats->io_waittime);
        }
        stats->io_waitstamp = dv_time;

        timersub(&dv_time, &stats->io_busystamp, &diff_time);
        count = stats->io_busy++;
        if (count != 0) {
                timermac(&diff_time, count, &stats->io_busysum);
                timeradd(&stats->io_busytime, &diff_time, &stats->io_busytime);
        }
        stats->io_busystamp = dv_time;
}

/*
 * Decrement the iostat busy counter, increment the byte count.
 * Accumulate busy time and timesum.
 */
void
iostat_unbusy(struct io_stats *stats, long bcount, int read)
{
        struct timeval dv_time, diff_time;
        int32_t count;

        KASSERT(stats->io_busy > 0);

        getmicrouptime(&dv_time);
        stats->io_timestamp = dv_time;

        /* any op */
        timersub(&dv_time, &stats->io_busystamp, &diff_time);
        count = stats->io_busy--;
        timermac(&diff_time, count, &stats->io_busysum);
        timeradd(&stats->io_busytime, &diff_time, &stats->io_busytime);
        stats->io_busystamp = dv_time;

        if (bcount > 0) {
                if (read) {
                        stats->io_rbytes += bcount;
                        stats->io_rxfer++;
                } else {
                        stats->io_wbytes += bcount;
                        stats->io_wxfer++;
                }
        }
}

/*
 * Return non-zero if a device has an I/O request in flight.
 */
bool
iostat_isbusy(struct io_stats *stats)
{

        return stats->io_busy != 0;
}

/*
 * Increment the seek counter.  This does look almost redundant but it
 * abstracts the stats gathering.
 */
void
iostat_seek(struct io_stats *stats)
{

        stats->io_seek++;
}

static int
sysctl_hw_disknames(SYSCTLFN_ARGS)
{

        return iostati_getnames(1, oldp, oldlenp, newp, namelen);
}

static int
sysctl_hw_iostatnames(SYSCTLFN_ARGS)
{

        return iostati_getnames(0, oldp, oldlenp, newp, namelen);
}

static int
iostati_getnames(int disk_only, char *oldp, size_t *oldlenp, const void *newp,
                 u_int namelen)
{
        char bf[IOSTATNAMELEN + 1];
        char *where = oldp;
        struct io_stats *stats;
        size_t needed, left, slen;
        int error, first;

        if (newp != NULL)
                return (EPERM);
        if (namelen != 0)
                return (EINVAL);

        first = 1;
        error = 0;
        needed = 0;
        left = *oldlenp;

        rw_enter(&iostatlist_lock, RW_READER);
        for (stats = TAILQ_FIRST(&iostatlist); stats != NULL;
            stats = TAILQ_NEXT(stats, io_link)) {
                if ((disk_only == 1) && (stats->io_type != IOSTAT_DISK))
                        continue;

                if (where == NULL)
                        needed += strlen(stats->io_name) + 1;
                else {
                        memset(bf, 0, sizeof(bf));
                        if (first) {
                                strncpy(bf, stats->io_name, sizeof(bf));
                                first = 0;
                        } else {
                                bf[0] = ' ';
                                strncpy(bf + 1, stats->io_name,
                                    sizeof(bf) - 1);
                        }
                        bf[IOSTATNAMELEN] = '\0';
                        slen = strlen(bf);
                        if (left < slen + 1)
                                break;
                        /* +1 to copy out the trailing NUL byte */
                        error = copyout(bf, where, slen + 1);
                        if (error)
                                break;
                        where += slen;
                        needed += slen;
                        left -= slen;
                }
        }
        rw_exit(&iostatlist_lock);
        *oldlenp = needed;
        return (error);
}

static int
sysctl_hw_iostats(SYSCTLFN_ARGS)
{
        struct io_sysctl sdrive;
        struct io_stats *stats;
        char *where = oldp;
        size_t tocopy, left;
        int error;

        if (newp != NULL)
                return (EPERM);

        /*
         * The original hw.diskstats call was broken and did not require
         * the userland to pass in its size of struct disk_sysctl.  This
         * was fixed after NetBSD 1.6 was released.
         */
        if (namelen == 0)
                tocopy = offsetof(struct io_sysctl, busy);
        else
                tocopy = name[0];

        if (where == NULL) {
                *oldlenp = iostat_count * tocopy;
                return (0);
        }

        error = 0;
        left = *oldlenp;
        memset(&sdrive, 0, sizeof(sdrive));
        *oldlenp = 0;

        rw_enter(&iostatlist_lock, RW_READER);
        TAILQ_FOREACH(stats, &iostatlist, io_link) {
                if (left < tocopy)
                        break;

                strncpy(sdrive.name, stats->io_name, sizeof(sdrive.name));
                sdrive.attachtime_sec = stats->io_attachtime.tv_sec;
                sdrive.attachtime_usec = stats->io_attachtime.tv_usec;
                sdrive.timestamp_sec = stats->io_busystamp.tv_sec;
                sdrive.timestamp_usec = stats->io_busystamp.tv_usec;

                sdrive.time_sec = stats->io_busytime.tv_sec;
                sdrive.time_usec = stats->io_busytime.tv_usec;

                sdrive.seek = stats->io_seek;

                sdrive.rxfer = stats->io_rxfer;
                sdrive.wxfer = stats->io_wxfer;
                sdrive.xfer = stats->io_rxfer + stats->io_wxfer;

                sdrive.rbytes = stats->io_rbytes;
                sdrive.wbytes = stats->io_wbytes;
                sdrive.bytes = stats->io_rbytes + stats->io_wbytes;

                sdrive.wait_sec = stats->io_waittime.tv_sec;
                sdrive.wait_usec = stats->io_waittime.tv_usec;

                sdrive.time_sec = stats->io_busytime.tv_sec;
                sdrive.time_usec = stats->io_busytime.tv_usec;

                sdrive.waitsum_sec = stats->io_waitsum.tv_sec;
                sdrive.waitsum_usec = stats->io_waitsum.tv_usec;

                sdrive.busysum_sec = stats->io_busysum.tv_sec;
                sdrive.busysum_usec = stats->io_busysum.tv_usec;

                sdrive.busy = stats->io_busy;

                error = copyout(&sdrive, where, uimin(tocopy, sizeof(sdrive)));
                if (error)
                        break;
                where += tocopy;
                *oldlenp += tocopy;
                left -= tocopy;
        }
        rw_exit(&iostatlist_lock);
        return (error);
}

static void
sysctl_io_stats_setup(struct sysctllog **clog)
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "disknames",
                       SYSCTL_DESCR("List of disk drives present"),
                       sysctl_hw_disknames, 0, NULL, 0,
                       CTL_HW, HW_DISKNAMES, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "iostatnames",
                       SYSCTL_DESCR("I/O stats are being collected for these"
                                    " devices"),
                       sysctl_hw_iostatnames, 0, NULL, 0,
                       CTL_HW, HW_IOSTATNAMES, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "iostats",
                       SYSCTL_DESCR("Statistics on device I/O operations"),
                       sysctl_hw_iostats, 0, NULL, 0,
                       CTL_HW, HW_IOSTATS, CTL_EOL);
}
































































































    1 










































    1 






































    1 






    1 







































































    1 






    1 











































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
/*        $NetBSD: ipsec_netbsd.c,v 1.54 2018/04/28 13:44:19 maxv Exp $        */
/*        $KAME: esp_input.c,v 1.60 2001/09/04 08:43:19 itojun Exp $        */
/*        $KAME: ah_input.c,v 1.64 2001/09/04 08:43:19 itojun Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ipsec_netbsd.c,v 1.54 2018/04/28 13:44:19 maxv Exp $");

#if defined(_KERNEL_OPT)
#include "opt_inet.h"
#include "opt_ipsec.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>

#include <net/if.h>
#include <net/route.h>
#include <net/netisr.h>
#include <sys/cpu.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/ip_ecn.h>
#include <netinet/ip_icmp.h>

#include <netipsec/ipsec.h>
#include <netipsec/ipsec_var.h>
#include <netipsec/ipsec_private.h>
#include <netipsec/key.h>
#include <netipsec/keydb.h>
#include <netipsec/key_debug.h>
#include <netipsec/ah.h>
#include <netipsec/ah_var.h>
#include <netipsec/esp.h>
#include <netipsec/esp_var.h>
#include <netipsec/ipip_var.h>
#include <netipsec/ipcomp_var.h>

#ifdef INET6
#include <netipsec/ipsec6.h>
#include <netinet6/ip6protosw.h>
#include <netinet/icmp6.h>
#endif

#include <netipsec/key.h>

/* assumes that ip header and ah header are contiguous on mbuf */
void *
ah4_ctlinput(int cmd, const struct sockaddr *sa, void *v)
{
        struct ip *ip = v;
        struct ah *ah;
        struct icmp *icp;
        struct secasvar *sav;

        if (sa->sa_family != AF_INET ||
            sa->sa_len != sizeof(struct sockaddr_in))
                return NULL;
        if ((unsigned)cmd >= PRC_NCMDS)
                return NULL;

        if (cmd == PRC_MSGSIZE && ip_mtudisc && ip && ip->ip_v == 4) {
                /*
                 * Check to see if we have a valid SA corresponding to
                 * the address in the ICMP message payload.
                 */
                ah = (struct ah *)((char *)ip + (ip->ip_hl << 2));
                sav = KEY_LOOKUP_SA((const union sockaddr_union *)sa,
                    IPPROTO_AH, ah->ah_spi, 0, 0);

                if (sav) {
                        if (SADB_SASTATE_USABLE_P(sav)) {
                                /*
                                 * Now that we've validated that we are actually
                                 * communicating with the host indicated in the
                                 * ICMP message, locate the ICMP header,
                                 * recalculate the new MTU, and create the
                                 * corresponding routing entry.
                                 */
                                icp = (struct icmp *)((char *)ip -
                                    offsetof(struct icmp, icmp_ip));
                                icmp_mtudisc(icp, ip->ip_dst);
                        }
                        KEY_SA_UNREF(&sav);
                }
        }
        return NULL;
}

/* assumes that ip header and esp header are contiguous on mbuf */
void *
esp4_ctlinput(int cmd, const struct sockaddr *sa, void *v)
{
        struct ip *ip = v;
        struct esp *esp;
        struct icmp *icp;
        struct secasvar *sav;

        if (sa->sa_family != AF_INET ||
            sa->sa_len != sizeof(struct sockaddr_in))
                return NULL;
        if ((unsigned)cmd >= PRC_NCMDS)
                return NULL;

        if (cmd == PRC_MSGSIZE && ip_mtudisc && ip && ip->ip_v == 4) {
                /*
                 * Check to see if we have a valid SA corresponding to
                 * the address in the ICMP message payload.
                 */
                esp = (struct esp *)((char *)ip + (ip->ip_hl << 2));
                sav = KEY_LOOKUP_SA((const union sockaddr_union *)sa,
                    IPPROTO_ESP, esp->esp_spi, 0, 0);

                if (sav) {
                        if (SADB_SASTATE_USABLE_P(sav)) {
                                /*
                                 * Now that we've validated that we are actually
                                 * communicating with the host indicated in the
                                 * ICMP message, locate the ICMP header,
                                 * recalculate the new MTU, and create the
                                 * corresponding routing entry.
                                 */
                                icp = (struct icmp *)((char *)ip -
                                    offsetof(struct icmp, icmp_ip));
                                icmp_mtudisc(icp, ip->ip_dst);
                        }
                        KEY_SA_UNREF(&sav);
                }
        }
        return NULL;
}

#ifdef INET6
void *
ah6_ctlinput(int cmd, const struct sockaddr *sa, void *d)
{
        const struct newah *ahp;
        struct newah ah;
        struct secasvar *sav;
        struct ip6_hdr *ip6;
        struct mbuf *m;
        struct ip6ctlparam *ip6cp = NULL;
        int off;

        if (sa->sa_family != AF_INET6 ||
            sa->sa_len != sizeof(struct sockaddr_in6))
                return NULL;
        if ((unsigned)cmd >= PRC_NCMDS)
                return NULL;

        /* if the parameter is from icmp6, decode it. */
        if (d != NULL) {
                ip6cp = (struct ip6ctlparam *)d;
                m = ip6cp->ip6c_m;
                ip6 = ip6cp->ip6c_ip6;
                off = ip6cp->ip6c_off;
        } else {
                m = NULL;
                ip6 = NULL;
                off = 0;
        }

        if (ip6) {
                /* check if we can safely examine src and dst ports */
                if (m->m_pkthdr.len < off + sizeof(ah))
                        return NULL;

                if (m->m_len < off + sizeof(ah)) {
                        /*
                         * this should be rare case,
                         * so we compromise on this copy...
                         */
                        m_copydata(m, off, sizeof(ah), &ah);
                        ahp = &ah;
                } else
                        ahp = (struct newah *)(mtod(m, char *) + off);

                if (cmd == PRC_MSGSIZE) {
                        int valid = 0;

                        /*
                         * Check to see if we have a valid SA corresponding
                         * to the address in the ICMP message payload.
                         */
                        sav = KEY_LOOKUP_SA((const union sockaddr_union *)sa,
                            IPPROTO_AH, ahp->ah_spi, 0, 0);

                        if (sav) {
                                if (SADB_SASTATE_USABLE_P(sav))
                                        valid++;
                                KEY_SA_UNREF(&sav);
                        }

                        /* XXX Further validation? */

                        /*
                         * Depending on the value of "valid" and routing
                         * table size (mtudisc_{hi,lo}wat), we will:
                         * - recalculate the new MTU and create the
                         *   corresponding routing entry, or
                         * - ignore the MTU change notification.
                         */
                        icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);
                }

                /* we normally notify single pcb here */
        } else {
                /* we normally notify any pcb here */
        }
        return NULL;
}

void *
esp6_ctlinput(int cmd, const struct sockaddr *sa, void *d)
{
        const struct newesp *espp;
        struct newesp esp;
        struct ip6ctlparam *ip6cp = NULL, ip6cp1;
        struct secasvar *sav;
        struct ip6_hdr *ip6;
        struct mbuf *m;
        int off;

        if (sa->sa_family != AF_INET6 ||
            sa->sa_len != sizeof(struct sockaddr_in6))
                return NULL;
        if ((unsigned)cmd >= PRC_NCMDS)
                return NULL;

        /* if the parameter is from icmp6, decode it. */
        if (d != NULL) {
                ip6cp = (struct ip6ctlparam *)d;
                m = ip6cp->ip6c_m;
                ip6 = ip6cp->ip6c_ip6;
                off = ip6cp->ip6c_off;
        } else {
                m = NULL;
                ip6 = NULL;
                off = 0;
        }

        if (ip6) {
                /*
                 * Notify the error to all possible sockets via pfctlinput2.
                 * Since the upper layer information (such as protocol type,
                 * source and destination ports) is embedded in the encrypted
                 * data and might have been cut, we can't directly call
                 * an upper layer ctlinput function. However, the pcbnotify
                 * function will consider source and destination addresses
                 * as well as the flow info value, and may be able to find
                 * some PCB that should be notified.
                 * Although pfctlinput2 will call esp6_ctlinput(), there is
                 * no possibility of an infinite loop of function calls,
                 * because we don't pass the inner IPv6 header.
                 */
                memset(&ip6cp1, 0, sizeof(ip6cp1));
                ip6cp1.ip6c_src = ip6cp->ip6c_src;
                pfctlinput2(cmd, sa, &ip6cp1);

                /* check if we can safely examine src and dst ports */
                if (m->m_pkthdr.len < off + sizeof(esp))
                        return NULL;

                if (m->m_len < off + sizeof(esp)) {
                        /*
                         * this should be rare case,
                         * so we compromise on this copy...
                         */
                        m_copydata(m, off, sizeof(esp), &esp);
                        espp = &esp;
                } else
                        espp = (struct newesp *)(mtod(m, char *) + off);

                if (cmd == PRC_MSGSIZE) {
                        int valid = 0;

                        /*
                         * Check to see if we have a valid SA corresponding to
                         * the address in the ICMP message payload.
                         */

                        sav = KEY_LOOKUP_SA((const union sockaddr_union *)sa,
                            IPPROTO_ESP, espp->esp_spi, 0, 0);

                        if (sav) {
                                if (SADB_SASTATE_USABLE_P(sav))
                                        valid++;
                                KEY_SA_UNREF(&sav);
                        }

                        /* XXX Further validation? */

                        /*
                         * Depending on the value of "valid" and routing table
                         * size (mtudisc_{hi,lo}wat), we will:
                         * - recalcurate the new MTU and create the
                         *   corresponding routing entry, or
                         * - ignore the MTU change notification.
                         */
                        icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);
                }
        } else {
                /* we normally notify any pcb here */
        }
        return NULL;
}
#endif /* INET6 */

static int
sysctl_ipsec(SYSCTLFN_ARGS)
{
        int error, t;
        struct sysctlnode node;

        node = *rnode;
        t = *(int *)rnode->sysctl_data;
        node.sysctl_data = &t;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        switch (rnode->sysctl_num) {
        case IPSECCTL_DEF_ESP_TRANSLEV:
        case IPSECCTL_DEF_ESP_NETLEV:
        case IPSECCTL_DEF_AH_TRANSLEV:
        case IPSECCTL_DEF_AH_NETLEV:
                if (t != IPSEC_LEVEL_USE &&
                    t != IPSEC_LEVEL_REQUIRE)
                        return EINVAL;
                ipsec_invalpcbcacheall();
                break;
        case IPSECCTL_DEF_POLICY:
                if (t != IPSEC_POLICY_DISCARD &&
                    t != IPSEC_POLICY_NONE)
                        return EINVAL;
                ipsec_invalpcbcacheall();
                break;
        default:
                return EINVAL;
        }

        *(int *)rnode->sysctl_data = t;

        return 0;
}

#ifdef IPSEC_DEBUG
static int
sysctl_ipsec_test(SYSCTLFN_ARGS)
{
        int t, error;
        struct sysctlnode node;

        node = *rnode;
        t = *(int *)rnode->sysctl_data;
        node.sysctl_data = &t;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        if (t < 0 || t > 1)
                return EINVAL;

        if (rnode->sysctl_data == &ipsec_replay)
                printf("ipsec: Anti-Replay service %s\n",
                    (t == 1) ? "deactivated" : "activated");
        else if (rnode->sysctl_data == &ipsec_integrity)
                 printf("ipsec: HMAC corruption %s\n",
                     (t == 0) ? "deactivated" : "activated");

        *(int *)rnode->sysctl_data = t;

        return 0;
}
#endif

static int
sysctl_net_inet_ipsec_stats(SYSCTLFN_ARGS)
{

        return (NETSTAT_SYSCTL(ipsecstat_percpu, IPSEC_NSTATS));
}

static int
sysctl_net_inet_ah_stats(SYSCTLFN_ARGS)
{

        return (NETSTAT_SYSCTL(ahstat_percpu, AH_NSTATS));
}

static int
sysctl_net_inet_esp_stats(SYSCTLFN_ARGS)
{

        return (NETSTAT_SYSCTL(espstat_percpu, ESP_NSTATS));
}

static int
sysctl_net_inet_ipcomp_stats(SYSCTLFN_ARGS)
{

        return (NETSTAT_SYSCTL(ipcompstat_percpu, IPCOMP_NSTATS));
}

static int
sysctl_net_inet_ipip_stats(SYSCTLFN_ARGS)
{

        return (NETSTAT_SYSCTL(ipipstat_percpu, IPIP_NSTATS));
}

static int
sysctl_net_ipsec_enabled(SYSCTLFN_ARGS)
{
        int newenabled, error;
        struct sysctlnode node;
        node = *rnode;
        node.sysctl_data = &newenabled;

        newenabled = ipsec_enabled;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        switch (newenabled) {
        case 0:
                if (key_get_used())
                        return EBUSY;
                /*FALLTHROUGH*/
        case 1:
        case 2:
                ipsec_enabled = newenabled;
                key_update_used();
                return 0;
        default:
                return EINVAL;
        }
}

/* XXX will need a different oid at parent */
void
sysctl_net_inet_ipsec_setup(struct sysctllog **clog)
{
        const struct sysctlnode *_ipsec;
        int ipproto_ipsec;

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "inet", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET, CTL_EOL);

        /*
         * in numerical order:
         *
         * net.inet.ipip:        CTL_NET.PF_INET.IPPROTO_IPIP
         * net.inet.esp:        CTL_NET.PF_INET.IPPROTO_ESP
         * net.inet.ah:                CTL_NET.PF_INET.IPPROTO_AH
         * net.inet.ipcomp:        CTL_NET.PF_INET.IPPROTO_IPCOMP
         * net.inet.ipsec:        CTL_NET.PF_INET.CTL_CREATE
         *
         * this creates separate trees by name, but maintains that the
         * ipsec name leads to all the old leaves.
         */

        /* create net.inet.ipip */
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "ipip", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET, IPPROTO_IPIP, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_STRUCT, "ipip_stats", NULL,
                       sysctl_net_inet_ipip_stats, 0, NULL, 0,
                       CTL_NET, PF_INET, IPPROTO_IPIP,
                       CTL_CREATE, CTL_EOL);

        /* create net.inet.esp subtree under IPPROTO_ESP */
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "esp", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET, IPPROTO_ESP, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_STRUCT, "esp_stats", NULL,
                       sysctl_net_inet_esp_stats, 0, NULL, 0,
                       CTL_NET, PF_INET, IPPROTO_ESP,
                       CTL_CREATE, CTL_EOL);

        /* create net.inet.ah subtree under IPPROTO_AH */
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "ah", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET, IPPROTO_AH, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_STRUCT, "ah_stats", NULL,
                       sysctl_net_inet_ah_stats, 0, NULL, 0,
                       CTL_NET, PF_INET, IPPROTO_AH,
                       CTL_CREATE, CTL_EOL);

        /* create net.inet.ipcomp */
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "ipcomp", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET, IPPROTO_IPCOMP, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_STRUCT, "ipcomp_stats", NULL,
                       sysctl_net_inet_ipcomp_stats, 0, NULL, 0,
                       CTL_NET, PF_INET, IPPROTO_IPCOMP,
                       CTL_CREATE, CTL_EOL);

        /* create net.inet.ipsec subtree under dynamic oid */
        sysctl_createv(clog, 0, NULL, &_ipsec,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "ipsec", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET, CTL_CREATE, CTL_EOL);
        ipproto_ipsec = (_ipsec != NULL) ? _ipsec->sysctl_num : 0;

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "def_policy", NULL,
                       sysctl_ipsec, 0, &ip4_def_policy.policy, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       IPSECCTL_DEF_POLICY, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "esp_trans_deflev", NULL,
                       sysctl_ipsec, 0, &ip4_esp_trans_deflev, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       IPSECCTL_DEF_ESP_TRANSLEV, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "esp_net_deflev", NULL,
                       sysctl_ipsec, 0, &ip4_esp_net_deflev, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       IPSECCTL_DEF_ESP_NETLEV, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "ah_trans_deflev", NULL,
                       sysctl_ipsec, 0, &ip4_ah_trans_deflev, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       IPSECCTL_DEF_AH_TRANSLEV, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "ah_net_deflev", NULL,
                       sysctl_ipsec, 0, &ip4_ah_net_deflev, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       IPSECCTL_DEF_AH_NETLEV, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "ah_cleartos", NULL,
                       NULL, 0, &ip4_ah_cleartos, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       IPSECCTL_AH_CLEARTOS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "ah_offsetmask", NULL,
                       NULL, 0, &ip4_ah_offsetmask, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       IPSECCTL_AH_OFFSETMASK, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "dfbit", NULL,
                       NULL, 0, &ip4_ipsec_dfbit, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       IPSECCTL_DFBIT, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "ecn", NULL,
                       NULL, 0, &ip4_ipsec_ecn, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       IPSECCTL_ECN, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "debug", NULL,
                       NULL, 0, &ipsec_debug, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       IPSECCTL_DEBUG, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "ipip_spoofcheck", NULL,
                       NULL, 0, &ipip_spoofcheck, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_STRUCT, "ipsecstats", NULL,
                       sysctl_net_inet_ipsec_stats, 0, NULL, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "enabled",
                       SYSCTL_DESCR("Enable IPSec processing"),
                       sysctl_net_ipsec_enabled, 0, NULL, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_INT, "used",
                       SYSCTL_DESCR("Is IPSec active?"),
                       NULL, 0, &ipsec_used, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "ah_enable", NULL,
                       NULL, 0, &ah_enable, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "esp_enable", NULL,
                       NULL, 0, &esp_enable, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "ipcomp_enable", NULL,
                       NULL, 0, &ipcomp_enable, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "crypto_support", NULL,
                       NULL, 0, &crypto_support, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       CTL_CREATE, CTL_EOL);

#ifdef IPSEC_DEBUG
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "test_replay",
                       SYSCTL_DESCR("Emulate replay attack"),
                       sysctl_ipsec_test, 0, &ipsec_replay, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "test_integrity",
                       SYSCTL_DESCR("Emulate man-in-the-middle attack"),
                       sysctl_ipsec_test, 0, &ipsec_integrity, 0,
                       CTL_NET, PF_INET, ipproto_ipsec,
                       CTL_CREATE, CTL_EOL);
#endif
}

#ifdef INET6
void
sysctl_net_inet6_ipsec6_setup(struct sysctllog **clog)
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "inet6", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET6, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "ipsec6",
                       SYSCTL_DESCR("IPv6 related IPSec settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_AH, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_STRUCT, "stats",
                       SYSCTL_DESCR("IPSec statistics and counters"),
                       sysctl_net_inet_ipsec_stats, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_AH,
                       IPSECCTL_STATS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "def_policy",
                       SYSCTL_DESCR("Default action for non-IPSec packets"),
                       sysctl_ipsec, 0, &ip6_def_policy.policy, 0,
                       CTL_NET, PF_INET6, IPPROTO_AH,
                       IPSECCTL_DEF_POLICY, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "esp_trans_deflev",
                       SYSCTL_DESCR("Default required security level for "
                                    "transport mode traffic"),
                       sysctl_ipsec, 0, &ip6_esp_trans_deflev, 0,
                       CTL_NET, PF_INET6, IPPROTO_AH,
                       IPSECCTL_DEF_ESP_TRANSLEV, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "esp_net_deflev",
                       SYSCTL_DESCR("Default required security level for "
                                    "tunneled traffic"),
                       sysctl_ipsec, 0, &ip6_esp_net_deflev, 0,
                       CTL_NET, PF_INET6, IPPROTO_AH,
                       IPSECCTL_DEF_ESP_NETLEV, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "ah_trans_deflev",
                       SYSCTL_DESCR("Default required security level for "
                                    "transport mode headers"),
                       sysctl_ipsec, 0, &ip6_ah_trans_deflev, 0,
                       CTL_NET, PF_INET6, IPPROTO_AH,
                       IPSECCTL_DEF_AH_TRANSLEV, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "ah_net_deflev",
                       SYSCTL_DESCR("Default required security level for "
                                    "tunneled headers"),
                       sysctl_ipsec, 0, &ip6_ah_net_deflev, 0,
                       CTL_NET, PF_INET6, IPPROTO_AH,
                       IPSECCTL_DEF_AH_NETLEV, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "ecn",
                       SYSCTL_DESCR("Behavior of ECN for tunneled traffic"),
                       NULL, 0, &ip6_ipsec_ecn, 0,
                       CTL_NET, PF_INET6, IPPROTO_AH,
                       IPSECCTL_ECN, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "debug",
                       SYSCTL_DESCR("Enable IPSec debugging output"),
                       NULL, 0, &ipsec_debug, 0,
                       CTL_NET, PF_INET6, IPPROTO_AH,
                       IPSECCTL_DEBUG, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "enabled",
                       SYSCTL_DESCR("Enable IPSec processing"),
                       sysctl_net_ipsec_enabled, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_AH,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_INT, "used",
                       SYSCTL_DESCR("Is IPSec active?"),
                       NULL, 0, &ipsec_used, 0,
                       CTL_NET, PF_INET6, IPPROTO_AH,
                       CTL_CREATE, CTL_EOL);
}
#endif /* INET6 */






















































































































































































    9 












































































































































































































































































































































































































































































































































































































































































































    9 






    9 








1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
/*        $NetBSD: nfs_export.c,v 1.63 2021/06/04 10:44:58 hannken Exp $        */

/*-
 * Copyright (c) 1997, 1998, 2004, 2005, 2008, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 * This code is derived from software contributed to The NetBSD Foundation
 * by Julio M. Merino Vidal.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vfs_subr.c        8.13 (Berkeley) 4/18/94
 */

/*
 * VFS exports list management.
 *
 * Lock order: vfs_busy -> mnt_updating -> netexport_lock.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: nfs_export.c,v 1.63 2021/06/04 10:44:58 hannken Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/queue.h>
#include <sys/proc.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/namei.h>
#include <sys/errno.h>
#include <sys/malloc.h>
#include <sys/domain.h>
#include <sys/mbuf.h>
#include <sys/dirent.h>
#include <sys/socket.h>                /* XXX for AF_MAX */
#include <sys/kauth.h>

#include <net/radix.h>

#include <netinet/in.h>

#include <nfs/rpcv2.h>
#include <nfs/nfsproto.h>
#include <nfs/nfs.h>
#include <nfs/nfs_var.h>

/*
 * Network address lookup element.
 */
struct netcred {
        struct        radix_node netc_rnodes[2];
        int        netc_refcnt;
        int        netc_exflags;
        kauth_cred_t netc_anon;
};

/*
 * Network export information.
 */
struct netexport {
        TAILQ_ENTRY(netexport) ne_list;
        struct mount *ne_mount;
        struct netcred ne_defexported;                      /* Default export */
        struct radix_node_head *ne_rtable[AF_MAX+1]; /* Individual exports */
};
TAILQ_HEAD(, netexport) netexport_list =
    TAILQ_HEAD_INITIALIZER(netexport_list);

/* Publicly exported file system. */
struct nfs_public nfs_pub;

/*
 * Local prototypes.
 */
static int init_exports(struct mount *, struct netexport **);
static int hang_addrlist(struct mount *, struct netexport *,
    const struct export_args *);
static int sacheck(struct sockaddr *);
static int free_netcred(struct radix_node *, void *);
static int export(struct netexport *, const struct export_args *);
static int setpublicfs(struct mount *, struct netexport *,
    const struct export_args *);
static struct netcred *netcred_lookup(struct netexport *, struct mbuf *);
static struct netexport *netexport_lookup(const struct mount *);
static struct netexport *netexport_lookup_byfsid(const fsid_t *);
static void netexport_clear(struct netexport *);
static void netexport_insert(struct netexport *);
static void netexport_remove(struct netexport *);
static void netexport_wrlock(void);
static void netexport_wrunlock(void);
static int nfs_export_update_30(struct mount *mp, const char *path, void *);

static krwlock_t netexport_lock;

/*
 * PUBLIC INTERFACE
 */

/*
 * Declare and initialize the file system export hooks.
 */
static void netexport_unmount(struct mount *);

struct vfs_hooks nfs_export_hooks = {
        { NULL, NULL },
        .vh_unmount = netexport_unmount,
        .vh_reexport = nfs_export_update_30,
};

/*
 * VFS unmount hook for NFS exports.
 *
 * Releases NFS exports list resources if the given mount point has some.
 * As allocation happens lazily, it may be that it doesn't have this
 * information, although it theoretically should.
 */
static void
netexport_unmount(struct mount *mp)
{
        struct netexport *ne;

        KASSERT(mp != NULL);

        netexport_wrlock();
        ne = netexport_lookup(mp);
        if (ne == NULL) {
                netexport_wrunlock();
                return;
        }
        netexport_clear(ne);
        netexport_remove(ne);
        netexport_wrunlock();
        kmem_free(ne, sizeof(*ne));
}

void
netexport_init(void)
{

        rw_init(&netexport_lock);
}

void
netexport_fini(void)
{
        struct netexport *ne;
        struct mount *mp;
        int error;

        while (!TAILQ_EMPTY(&netexport_list)) {
                netexport_wrlock();
                ne = TAILQ_FIRST(&netexport_list);
                mp = ne->ne_mount;
                error = vfs_busy(mp);
                netexport_wrunlock();
                if (error != 0) {
                        kpause("nfsfini", false, hz, NULL);
                        continue;
                }
                mutex_enter(mp->mnt_updating);        /* mnt_flag */
                netexport_unmount(mp);
                mutex_exit(mp->mnt_updating);        /* mnt_flag */
                vfs_unbusy(mp);
        }
        rw_destroy(&netexport_lock);
}


/*
 * Atomically set the NFS exports list of the given file system, replacing
 * it with a new list of entries.
 *
 * Returns zero on success or an appropriate error code otherwise.
 *
 * Helper function for the nfssvc(2) system call (NFSSVC_SETEXPORTSLIST
 * and NFSSVC_REPLACEEXPORTSLIST command).
 */
int
mountd_set_exports_list(const struct mountd_exports_list *mel, struct lwp *l,
    struct mount *nmp, int cmd)
{
        int error;
        size_t i;
        struct mount *mp;
        struct netexport *ne;
        struct pathbuf *pb;
        struct nameidata nd;
        struct vnode *vp;
        size_t fid_size;

        if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_NFS,
            KAUTH_REQ_NETWORK_NFS_EXPORT, NULL, NULL, NULL) != 0)
                return EPERM;

        /* Look up the file system path. */
        error = pathbuf_copyin(mel->mel_path, &pb);
        if (error) {
                return error;
        }
        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pb);
        error = namei(&nd);
        if (error != 0) {
                pathbuf_destroy(pb);
                return error;
        }
        vp = nd.ni_vp;
        mp = vp->v_mount;
        KASSERT(nmp == NULL || nmp == mp);
        pathbuf_destroy(pb);

        /*
         * Make sure the file system can do vptofh.  If the file system
         * knows the handle's size, just trust it's able to do the
         * actual translation also (otherwise we should check fhtovp
         * also, and that's getting a wee bit ridiculous).
         */
        fid_size = 0;
        if ((error = VFS_VPTOFH(vp, NULL, &fid_size)) != E2BIG) {
                vput(vp);
                return EOPNOTSUPP;
        }

        /* Mark the file system busy. */
        error = vfs_busy(mp);
        vput(vp);
        if (error != 0)
                return error;
        if (nmp == NULL)
                mutex_enter(mp->mnt_updating);        /* mnt_flag */
        netexport_wrlock();
        ne = netexport_lookup(mp);
        if (ne == NULL) {
                error = init_exports(mp, &ne);
                if (error != 0) {
                        goto out;
                }
        }

        KASSERT(ne != NULL);
        KASSERT(ne->ne_mount == mp);

        if (cmd == NFSSVC_SETEXPORTSLIST) {
                if (mel->mel_nexports == 0)
                        netexport_clear(ne);
                else if (mel->mel_nexports == 1)
                        error = export(ne, &mel->mel_exports[0]);
                else {
                        printf("%s: Cannot set more than one "
                            "entry at once (unimplemented)\n", __func__);
                        error = EOPNOTSUPP;
                }
        } else if (cmd == NFSSVC_REPLACEEXPORTSLIST) {
                netexport_clear(ne);
                for (i = 0; error == 0 && i < mel->mel_nexports; i++)
                        error = export(ne, &mel->mel_exports[i]);
        } else {
                printf("%s: Command %#x not implemented\n", __func__, cmd);
                error = EOPNOTSUPP;
        }

out:
        netexport_wrunlock();
        if (nmp == NULL)
                mutex_exit(mp->mnt_updating);        /* mnt_flag */
        vfs_unbusy(mp);
        return error;
}

static void
netexport_insert(struct netexport *ne)
{

        TAILQ_INSERT_HEAD(&netexport_list, ne, ne_list);
}

static void
netexport_remove(struct netexport *ne)
{

        TAILQ_REMOVE(&netexport_list, ne, ne_list);
}

static struct netexport *
netexport_lookup(const struct mount *mp)
{
        struct netexport *ne;

        TAILQ_FOREACH(ne, &netexport_list, ne_list) {
                if (ne->ne_mount == mp) {
                        goto done;
                }
        }
        ne = NULL;
done:
        return ne;
}

static struct netexport *
netexport_lookup_byfsid(const fsid_t *fsid)
{
        struct netexport *ne;

        TAILQ_FOREACH(ne, &netexport_list, ne_list) {
                const struct mount *mp = ne->ne_mount;

                if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
                    mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
                        goto done;
                }
        }
        ne = NULL;
done:

        return ne;
}

/*
 * Check if the file system specified by the 'mp' mount structure is
 * exported to a client with 'anon' anonymous credentials.  The 'mb'
 * argument is an mbuf containing the network address of the client.
 * The return parameters for the export flags for the client are returned
 * in the address specified by 'wh'.
 *
 * This function is used exclusively by the NFS server.  It is generally
 * invoked before VFS_FHTOVP to validate that a client has access to the
 * file system.
 */

int
netexport_check(const fsid_t *fsid, struct mbuf *mb, struct mount **mpp,
    int *wh, kauth_cred_t *anon)
{
        struct netexport *ne;
        struct netcred *np;

        ne = netexport_lookup_byfsid(fsid);
        if (ne == NULL) {
                return EACCES;
        }
        np = netcred_lookup(ne, mb);
        if (np == NULL) {
                return EACCES;
        }

        *mpp = ne->ne_mount;
        *wh = np->netc_exflags;
        *anon = np->netc_anon;

        return 0;
}

/*
 * Handles legacy export requests.  In this case, the export information
 * is hardcoded in a specific place of the mount arguments structure (given
 * in data); the request for an update is given through the fspec field
 * (also in a known location), which must be a null pointer.
 *
 * Returns EJUSTRETURN if the given command was not a export request.
 * Otherwise, returns 0 on success or an appropriate error code otherwise.
 */
static int
nfs_export_update_30(struct mount *mp, const char *path, void *data)
{
        struct mountd_exports_list mel;
        struct mnt_export_args30 *args;

        args = data;
        mel.mel_path = path;

        if (args->fspec != NULL)
                return EJUSTRETURN;

        if (args->eargs.ex_flags & 0x00020000) {
                /* Request to delete exports.  The mask above holds the
                 * value that used to be in MNT_DELEXPORT. */
                mel.mel_nexports = 0;
        } else {
                /*
                 * The following code assumes export_args has not
                 * changed since export_args30, so check that.
                 */
                __CTASSERT(sizeof(args->eargs) == sizeof(*mel.mel_exports));

                mel.mel_nexports = 1;
                mel.mel_exports = (void *)&args->eargs;
        }

        return mountd_set_exports_list(&mel, curlwp, mp, NFSSVC_SETEXPORTSLIST);
}

/*
 * INTERNAL FUNCTIONS
 */

/*
 * Initializes NFS exports for the mountpoint given in 'mp'.
 * If successful, returns 0 and sets *nep to the address of the new
 * netexport item; otherwise returns an appropriate error code
 * and *nep remains unmodified.
 */
static int
init_exports(struct mount *mp, struct netexport **nep)
{
        int error;
        struct export_args ea;
        struct netexport *ne;

        KASSERT(mp != NULL);

        /* Ensure that we do not already have this mount point. */
        KASSERT(netexport_lookup(mp) == NULL);

        ne = kmem_zalloc(sizeof(*ne), KM_SLEEP);
        ne->ne_mount = mp;

        /* Set the default export entry.  Handled internally by export upon
         * first call. */
        memset(&ea, 0, sizeof(ea));
        ea.ex_root = -2;
        if (mp->mnt_flag & MNT_RDONLY)
                ea.ex_flags |= MNT_EXRDONLY;
        error = export(ne, &ea);
        if (error != 0) {
                kmem_free(ne, sizeof(*ne));
        } else {
                netexport_insert(ne);
                *nep = ne;
        }

        return error;
}

/*
 * Build hash lists of net addresses and hang them off the mount point.
 * Called by export() to set up a new entry in the lists of export
 * addresses.
 */
static int
hang_addrlist(struct mount *mp, struct netexport *nep,
    const struct export_args *argp)
{
        int error, i;
        struct netcred *np, *enp;
        struct radix_node_head *rnh;
        struct sockaddr *saddr, *smask;
        struct domain *dom;

        smask = NULL;

        if (argp->ex_addrlen == 0) {
                if (mp->mnt_flag & MNT_DEFEXPORTED)
                        return EPERM;
                np = &nep->ne_defexported;
                KASSERT(np->netc_anon == NULL);
                np->netc_anon = kauth_cred_alloc();
                np->netc_exflags = argp->ex_flags;
                kauth_uucred_to_cred(np->netc_anon, &argp->ex_anon);
                mp->mnt_flag |= MNT_DEFEXPORTED;
                return 0;
        }

        if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN)
                return EINVAL;

        i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
        np = malloc(i, M_NETADDR, M_WAITOK | M_ZERO);
        np->netc_anon = kauth_cred_alloc();
        saddr = (struct sockaddr *)(np + 1);
        error = copyin(argp->ex_addr, saddr, argp->ex_addrlen);
        if (error)
                goto out;
        if (saddr->sa_len > argp->ex_addrlen)
                saddr->sa_len = argp->ex_addrlen;
        if (sacheck(saddr) == -1) {
                error = EINVAL;
                goto out;
        }
        if (argp->ex_masklen) {
                smask = (struct sockaddr *)((char *)saddr + argp->ex_addrlen);
                error = copyin(argp->ex_mask, smask, argp->ex_masklen);
                if (error)
                        goto out;
                if (smask->sa_len > argp->ex_masklen)
                        smask->sa_len = argp->ex_masklen;
                if (smask->sa_family != saddr->sa_family) {
                        error = EINVAL;
                        goto out;
                }
                if (sacheck(smask) == -1) {
                        error = EINVAL;
                        goto out;
                }
        }
        i = saddr->sa_family;
        if ((rnh = nep->ne_rtable[i]) == 0) {
                /*
                 * Seems silly to initialize every AF when most are not
                 * used, do so on demand here.
                 */
                DOMAIN_FOREACH(dom) {
                        if (dom->dom_family == i && dom->dom_rtattach) {
                                rn_inithead((void **)&nep->ne_rtable[i],
                                        dom->dom_rtoffset);
                                break;
                        }
                }
                if ((rnh = nep->ne_rtable[i]) == 0) {
                        error = ENOBUFS;
                        goto out;
                }
        }

        enp = (struct netcred *)(*rnh->rnh_addaddr)(saddr, smask, rnh,
            np->netc_rnodes);
        if (enp != np) {
                if (enp == NULL) {
                        enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
                            smask, rnh);
                        if (enp == NULL) {
                                error = EPERM;
                                goto out;
                        }
                } else
                        enp->netc_refcnt++;

                goto check;
        } else
                enp->netc_refcnt = 1;

        np->netc_exflags = argp->ex_flags;
        kauth_uucred_to_cred(np->netc_anon, &argp->ex_anon);
        return 0;
check:
        if (enp->netc_exflags != argp->ex_flags ||
            kauth_cred_uucmp(enp->netc_anon, &argp->ex_anon) != 0)
                error = EPERM;
        else
                error = 0;
out:
        KASSERT(np->netc_anon != NULL);
        kauth_cred_free(np->netc_anon);
        free(np, M_NETADDR);
        return error;
}

/*
 * Ensure that the address stored in 'sa' is valid.
 * Returns zero on success, otherwise -1.
 */
static int
sacheck(struct sockaddr *sa)
{

        switch (sa->sa_family) {
        case AF_INET: {
                struct sockaddr_in *sin = (struct sockaddr_in *)sa;
                char *p = (char *)sin->sin_zero;
                size_t i;

                if (sin->sin_len != sizeof(*sin))
                        return -1;
                if (sin->sin_port != 0)
                        return -1;
                for (i = 0; i < sizeof(sin->sin_zero); i++)
                        if (*p++ != '\0')
                                return -1;
                return 0;
        }
        case AF_INET6: {
                struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;

                if (sin6->sin6_len != sizeof(*sin6))
                        return -1;
                if (sin6->sin6_port != 0)
                        return -1;
                return 0;
        }
        default:
                return -1;
        }
}

/*
 * Free the netcred object pointed to by the 'rn' radix node.
 * 'w' holds a pointer to the radix tree head.
 */
static int
free_netcred(struct radix_node *rn, void *w)
{
        struct radix_node_head *rnh = (struct radix_node_head *)w;
        struct netcred *np = (struct netcred *)(void *)rn;

        (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
        if (--(np->netc_refcnt) <= 0) {
                KASSERT(np->netc_anon != NULL);
                kauth_cred_free(np->netc_anon);
                free(np, M_NETADDR);
        }
        return 0;
}

/*
 * Clears the exports list for a given file system.
 */
static void
netexport_clear(struct netexport *ne)
{
        struct radix_node_head *rnh;
        struct mount *mp = ne->ne_mount;
        int i;

        if (mp->mnt_flag & MNT_EXPUBLIC) {
                setpublicfs(NULL, NULL, NULL);
                mp->mnt_flag &= ~MNT_EXPUBLIC;
        }

        for (i = 0; i <= AF_MAX; i++) {
                if ((rnh = ne->ne_rtable[i]) != NULL) {
                        rn_walktree(rnh, free_netcred, rnh);
                        free(rnh, M_RTABLE);
                        ne->ne_rtable[i] = NULL;
                }
        }

        if ((mp->mnt_flag & MNT_DEFEXPORTED) != 0) {
                struct netcred *np = &ne->ne_defexported;

                KASSERT(np->netc_anon != NULL);
                kauth_cred_free(np->netc_anon);
                np->netc_anon = NULL;
        } else {
                KASSERT(ne->ne_defexported.netc_anon == NULL);
        }

        mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
}

/*
 * Add a new export entry (described by an export_args structure) to the
 * given file system.
 */
static int
export(struct netexport *nep, const struct export_args *argp)
{
        struct mount *mp = nep->ne_mount;
        int error;

        if (argp->ex_flags & MNT_EXPORTED) {
                if (argp->ex_flags & MNT_EXPUBLIC) {
                        if ((error = setpublicfs(mp, nep, argp)) != 0)
                                return error;
                        mp->mnt_flag |= MNT_EXPUBLIC;
                }
                if ((error = hang_addrlist(mp, nep, argp)) != 0)
                        return error;
                mp->mnt_flag |= MNT_EXPORTED;
        }
        return 0;
}

/*
 * Set the publicly exported filesystem (WebNFS).  Currently, only
 * one public filesystem is possible in the spec (RFC 2054 and 2055)
 */
static int
setpublicfs(struct mount *mp, struct netexport *nep,
    const struct export_args *argp)
{
        char *cp;
        int error;
        struct vnode *rvp;
        size_t fhsize;

        /*
         * mp == NULL --> invalidate the current info; the FS is
         * no longer exported. May be called from either export
         * or unmount, so check if it hasn't already been done.
         */
        if (mp == NULL) {
                if (nfs_pub.np_valid) {
                        nfs_pub.np_valid = 0;
                        if (nfs_pub.np_handle != NULL) {
                                free(nfs_pub.np_handle, M_TEMP);
                                nfs_pub.np_handle = NULL;
                        }
                        if (nfs_pub.np_index != NULL) {
                                free(nfs_pub.np_index, M_TEMP);
                                nfs_pub.np_index = NULL;
                        }
                }
                return 0;
        }

        /*
         * Only one allowed at a time.
         */
        if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
                return EBUSY;

        /*
         * Get real filehandle for root of exported FS.
         */
        if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rvp)))
                return error;

        fhsize = 0;
        error = vfs_composefh(rvp, NULL, &fhsize);
        if (error != E2BIG)
                return error;
        nfs_pub.np_handle = malloc(fhsize, M_TEMP, M_NOWAIT);
        if (nfs_pub.np_handle == NULL)
                error = ENOMEM;
        else
                error = vfs_composefh(rvp, nfs_pub.np_handle, &fhsize);
        if (error)
                return error;

        vput(rvp);

        /*
         * If an indexfile was specified, pull it in.
         */
        if (argp->ex_indexfile != NULL) {
                nfs_pub.np_index = malloc(NFS_MAXNAMLEN + 1, M_TEMP, M_WAITOK);
                error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
                    NFS_MAXNAMLEN, (size_t *)0);
                if (!error) {
                        /*
                         * Check for illegal filenames.
                         */
                        for (cp = nfs_pub.np_index; *cp; cp++) {
                                if (*cp == '/') {
                                        error = EINVAL;
                                        break;
                                }
                        }
                }
                if (error) {
                        free(nfs_pub.np_index, M_TEMP);
                        return error;
                }
        }

        nfs_pub.np_mount = mp;
        nfs_pub.np_valid = 1;
        return 0;
}

/*
 * Look up an export entry in the exports list that matches the address
 * stored in 'nam'.  If no entry is found, the default one is used instead
 * (if available).
 */
static struct netcred *
netcred_lookup(struct netexport *ne, struct mbuf *nam)
{
        struct netcred *np;
        struct radix_node_head *rnh;
        struct sockaddr *saddr;

        if ((ne->ne_mount->mnt_flag & MNT_EXPORTED) == 0) {
                return NULL;
        }

        /*
         * Look in the export list first.
         */
        np = NULL;
        if (nam != NULL) {
                saddr = mtod(nam, struct sockaddr *);
                rnh = ne->ne_rtable[saddr->sa_family];
                if (rnh != NULL) {
                        np = (struct netcred *)
                                (*rnh->rnh_matchaddr)((void *)saddr,
                                                      rnh);
                        if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
                                np = NULL;
                }
        }
        /*
         * If no address match, use the default if it exists.
         */
        if (np == NULL && ne->ne_mount->mnt_flag & MNT_DEFEXPORTED)
                np = &ne->ne_defexported;

        return np;
}

void
netexport_rdlock(void)
{

        rw_enter(&netexport_lock, RW_READER);
}

void
netexport_rdunlock(void)
{

        rw_exit(&netexport_lock);
}

static void
netexport_wrlock(void)
{

        rw_enter(&netexport_lock, RW_WRITER);
}

static void
netexport_wrunlock(void)
{

        rw_exit(&netexport_lock);
}

bool
netexport_hasexports(void)
{
        
        return nfs_pub.np_valid || !TAILQ_EMPTY(&netexport_list);
}






























































    1 







    2 































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
/*        $NetBSD: rf_revent.c,v 1.29 2021/07/23 00:54:45 oster Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author:
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */
/*
 * revent.c -- reconstruction event handling code
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_revent.c,v 1.29 2021/07/23 00:54:45 oster Exp $");

#include <sys/errno.h>

#include "rf_raid.h"
#include "rf_revent.h"
#include "rf_etimer.h"
#include "rf_general.h"
#include "rf_desc.h"
#include "rf_shutdown.h"

#define RF_MAX_FREE_REVENT 128
#define RF_MIN_FREE_REVENT  32
#define RF_EVENTQ_WAIT 5000

#include <sys/proc.h>
#include <sys/kernel.h>

static void rf_ShutdownReconEvent(void *);

static RF_ReconEvent_t *
GetReconEventDesc(RF_Raid_t *raidPtr, RF_RowCol_t col, void *arg, RF_Revent_t type);

static void rf_ShutdownReconEvent(void *arg)
{
        RF_Raid_t *raidPtr;

        raidPtr = (RF_Raid_t *) arg;
        
        pool_destroy(&raidPtr->pools.revent);
}

int
rf_ConfigureReconEvent(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                       RF_Config_t *cfgPtr)
{

        rf_pool_init(raidPtr, raidPtr->poolNames.revent, &raidPtr->pools.revent, sizeof(RF_ReconEvent_t),
                     "revent", RF_MIN_FREE_REVENT, RF_MAX_FREE_REVENT);
        rf_ShutdownCreate(listp, rf_ShutdownReconEvent, raidPtr);

        return (0);
}

/* returns the next reconstruction event, blocking the calling thread
 * until one becomes available.  will now return null if it is blocked
 * or will return an event if it is not */

RF_ReconEvent_t *
rf_GetNextReconEvent(RF_RaidReconDesc_t *reconDesc)
{
        RF_Raid_t *raidPtr = reconDesc->raidPtr;
        RF_ReconCtrl_t *rctrl = raidPtr->reconControl;
        RF_ReconEvent_t *event;
        int stall_count;

        rf_lock_mutex2(rctrl->eq_mutex);
        /* q null and count==0 must be equivalent conditions */
        RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0));

        /* mpsleep timeout value: secs = timo_val/hz.  'ticks' here is
           defined as cycle-counter ticks, not softclock ticks */

#define MAX_RECON_EXEC_USECS (100 * 1000)  /* 100 ms */
#define RECON_DELAY_MS 25
#define RECON_TIMO     ((RECON_DELAY_MS * hz) / 1000)

        /* we are not pre-emptible in the kernel, but we don't want to run
         * forever.  If we run w/o blocking for more than MAX_RECON_EXEC_TICKS
         * ticks of the cycle counter, delay for RECON_DELAY before
         * continuing. this may murder us with context switches, so we may
         * need to increase both the MAX...TICKS and the RECON_DELAY_MS. */
        if (reconDesc->reconExecTimerRunning) {
                int     status;

                RF_ETIMER_STOP(reconDesc->recon_exec_timer);
                RF_ETIMER_EVAL(reconDesc->recon_exec_timer);
                reconDesc->reconExecTicks +=
                        RF_ETIMER_VAL_US(reconDesc->recon_exec_timer);
                if (reconDesc->reconExecTicks > reconDesc->maxReconExecTicks)
                        reconDesc->maxReconExecTicks =
                                reconDesc->reconExecTicks;
                if (reconDesc->reconExecTicks >= MAX_RECON_EXEC_USECS) {
                        /* we've been running too long.  delay for
                         * RECON_DELAY_MS */
#if RF_RECON_STATS > 0
                        reconDesc->numReconExecDelays++;
#endif                                /* RF_RECON_STATS > 0 */

                        status = rf_sleep("rfrecond", RECON_TIMO,
                                          rctrl->eq_mutex);
                        RF_ASSERT(status == EWOULDBLOCK);
                        reconDesc->reconExecTicks = 0;
                }
        }

        stall_count = 0;
        while (!rctrl->eventQueue) {
#if RF_RECON_STATS > 0
                reconDesc->numReconEventWaits++;
#endif                                /* RF_RECON_STATS > 0 */

                rf_timedwait_cond2(rctrl->eq_cv, rctrl->eq_mutex,
                                   RF_EVENTQ_WAIT);

                stall_count++;

                if ((stall_count > 10) && 
                    rctrl->headSepCBList) {
                        /* There is work to do on the callback list, and
                           we've waited long enough... */
                        rf_WakeupHeadSepCBWaiters(raidPtr);
                        stall_count = 0;
                }
                reconDesc->reconExecTicks = 0;        /* we've just waited */
        }

        reconDesc->reconExecTimerRunning = 1;
        if (RF_ETIMER_VAL_US(reconDesc->recon_exec_timer)!=0) {
                /* it moved!!  reset the timer. */
                RF_ETIMER_START(reconDesc->recon_exec_timer);
        }
        event = rctrl->eventQueue;
        rctrl->eventQueue = event->next;
        event->next = NULL;
        rctrl->eq_count--;

        /* q null and count==0 must be equivalent conditions */
        RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0));
        rf_unlock_mutex2(rctrl->eq_mutex);
        return (event);
}
/* enqueues a reconstruction event on the indicated queue */
void
rf_CauseReconEvent(RF_Raid_t *raidPtr, RF_RowCol_t col, void *arg,
                   RF_Revent_t type)
{
        RF_ReconCtrl_t *rctrl = raidPtr->reconControl;
        RF_ReconEvent_t *event = GetReconEventDesc(raidPtr, col, arg, type);

        if (type == RF_REVENT_BUFCLEAR) {
                RF_ASSERT(col != rctrl->fcol);
        }
        RF_ASSERT(col >= 0 && col <= raidPtr->numCol);
        rf_lock_mutex2(rctrl->eq_mutex);
        /* q null and count==0 must be equivalent conditions */
        RF_ASSERT((rctrl->eventQueue == NULL) == (rctrl->eq_count == 0));
        event->next = rctrl->eventQueue;
        rctrl->eventQueue = event;
        rctrl->eq_count++;
        rf_broadcast_cond2(rctrl->eq_cv);
        rf_unlock_mutex2(rctrl->eq_mutex);
}
/* allocates and initializes a recon event descriptor */
static RF_ReconEvent_t *
GetReconEventDesc(RF_Raid_t *raidPtr, RF_RowCol_t col, void *arg, RF_Revent_t type)
{
        RF_ReconEvent_t *t;

        t = pool_get(&raidPtr->pools.revent, PR_WAITOK);
        t->col = col;
        t->arg = arg;
        t->type = type;
        t->next = NULL;
        return (t);
}

/*
  rf_DrainReconEventQueue() -- used in the event of a reconstruction
  problem, this function simply drains all pending events from the
  reconstruct event queue.
 */

void
rf_DrainReconEventQueue(RF_RaidReconDesc_t *reconDesc)
{
        RF_ReconCtrl_t *rctrl = reconDesc->raidPtr->reconControl;
        RF_ReconEvent_t *event;

        rf_lock_mutex2(rctrl->eq_mutex);
        while (rctrl->eventQueue!=NULL) {

                event = rctrl->eventQueue;
                rctrl->eventQueue = event->next;
                event->next = NULL;
                rctrl->eq_count--;
                /* dump it */
                rf_FreeReconEventDesc(reconDesc->raidPtr, event);
        }
        rf_unlock_mutex2(rctrl->eq_mutex);
}

void
rf_FreeReconEventDesc(RF_Raid_t *raidPtr, RF_ReconEvent_t *event)
{
        pool_put(&raidPtr->pools.revent, event);
}






























































    2 












    2 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
/*        $NetBSD: uvm_50.c,v 1.3 2020/09/05 16:30:10 riastradh Exp $        */

/*-
 * Copyright (c) 2018 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_50.c,v 1.3 2020/09/05 16:30:10 riastradh Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#if defined(_KERNEL) || defined(_MODULE)
#if defined(_KERNEL_OPT)
#include "opt_vmswap.h"
#else
#define VMSWAP        /* XXX */
#endif
#endif

#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/syscallargs.h>
#include <sys/swap.h>

#include <uvm/uvm_swap.h>

#include <compat/sys/uvm.h>

static void
swapent50_cvt(void *p, const struct swapent *se)
{
        struct swapent50 *sep50 = p;

        sep50->se50_dev = se->se_dev;
        sep50->se50_flags = se->se_flags;
        sep50->se50_nblks = se->se_nblks;
        sep50->se50_inuse = se->se_inuse;
        sep50->se50_priority = se->se_priority;
        KASSERT(sizeof(se->se_path) <= sizeof(sep50->se50_path));
        strcpy(sep50->se50_path, se->se_path);
}


static int
compat_uvm_swap_stats50(const struct sys_swapctl_args *uap, register_t *retval)
{
     return uvm_swap_stats(SCARG(uap, arg), SCARG(uap, misc),
         swapent50_cvt, sizeof(struct swapent50), retval);

}

void
uvm_50_init(void)
{
        uvm_swap_stats50 = compat_uvm_swap_stats50;
}

void
uvm_50_fini(void)
{
        uvm_swap_stats50 = (void *)enosys;
}















































































































































































    7 

    7 

    7 













    7 
    7 








    5 












    3 



    3 

    3 












    3 















































































































    7 
    7 



















    7 


    7 












    7 
    7 

    7 
















    3 





    3 























    3 


    3 


    3 

    3 



































    7 

    7 



    7 


    3 








    7 

    7 
    7 









    7 
    3 

    3 



    3 














































    7 

    7 


    7 


    3 


    7 
    7 















    7 

    7 










    7 








    7 

    7 

    7 



    7 













    7 




    7 

    7 










    7 





    7 
    7 
    7 
    7 


    7 



    7 



















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
/* $NetBSD: midictl.c,v 1.9 2017/06/01 02:45:09 chs Exp $ */

/*-
 * Copyright (c) 2006, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Chapman Flack, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: midictl.c,v 1.9 2017/06/01 02:45:09 chs Exp $");

/*
 * See midictl.h for an overview of the purpose and use of this module.
 */

#include <sys/systm.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/kmem.h>

#include "midictl.h"

/*
 * The upper part of this file is MIDI-aware, and deals with things like
 * decoding MIDI Control Change messages, dealing with the ones that require
 * special handling as mode messages or parameter updates, and so on.
 *
 * It relies on a "store" layer (implemented in the lower part of this file)
 * that only must be able to stash away 2-, 8-, or 16-bit quantities (which
 * it may pack into larger units as it sees fit) and find them again given
 * a class, channel, and key (controller/parameter number).
 *
 * The MIDI controllers can have 1-, 7-, or 14-bit values; the parameters are
 * also 14-bit. The 14-bit values have to be set in two MIDI messages, 7 bits
 * at a time. The MIDI layer uses store-managed 2- or 8-bit slots for the
 * smaller types, and uses the free high bit to indicate that it has explicitly
 * set the value. (Because the store is allowed to pack things, it may 'find'
 * a zero entry for a value we never set, because it shares a word with a
 * different value that has been set. We know it is not a real value because
 * the high bit is clear.)
 *
 * The 14-bit values are handled similarly: 16-bit store slots are used to hold
 * them, with the two free high bits indicating independently whether the MSB
 * and the LSB have been explicitly set--as two separate MIDI messages are
 * required. If such a control is queried when only one half has been explicitly
 * set, the result is as if it had been set to the specified default value
 * before the explicit set.
 */

typedef enum { CTL1, CTL7, CTL14, RPN, NRPN } class;

/*
 * assert(does_not_apply(KNFNamespaceArgumentAgainstNamesInPrototypes,
 *    PrototypesOfStaticFunctionsWithinNonIncludedFile));
 */
static void reset_all_controllers(midictl *mc, uint_fast8_t chan);
static void enter14(midictl *mc, uint_fast8_t chan, class c,
                    uint_fast16_t key, _Bool islsb, uint8_t val);
static uint_fast16_t read14(midictl *mc, uint_fast8_t chan, class c,
                            uint_fast16_t key, uint_fast16_t dflt);
static class classify(uint_fast16_t *key, _Bool *islsb);
static midictl_notify notify_no_one;

static _Bool store_locate(midictl_store *s, class c,
                            uint_fast8_t chan, uint_fast16_t key);
/*
 * store_extract and store_update operate on the bucket most recently found
 * by store_locate on this store. That works because reentrancy of midictl
 * functions is limited: they /can/ be reentered during midictl_notify
 * callbacks, but not at other arbitrary times. We never call notify /during/
 * a locate/extract/update transaction.
 */
static uint16_t store_extract(midictl_store *s, class c,
                              uint_fast8_t chan, uint_fast16_t key);
static void store_update(midictl_store *s, class c,
                         uint_fast8_t chan, uint_fast16_t key, uint16_t value);

#define PN_SET 0x8000  /* a parameter number has been explicitly set */
#define C14MSET 0x8000 /* MSB of a 14-bit val has been set */
#define C14LSET 0x4000 /* LSB of a 14-bit val has been set */
#define C7_SET 0x80    /* a 7-bit ctl has been set */
#define C1_SET 2       /* a 1-bit ctl has been set */

/*
 *   I M P L E M E N T A T I O N     O F     T H E     S T O R E :
 *
 * MIDI defines a metric plethora of possible controllers, registered
 * parameters, and nonregistered parameters: a bit more than 32k possible words
 * to store. The saving grace is that only a handful are likely to appear in
 * typical MIDI data, and only a handful are likely implemented by or
 * interesting to a typical client. So the store implementation needs to be
 * suited to a largish but quite sparse data set.
 *
 * A double-hashed, open address table is used here. Each slot is a uint64
 * that contains the match key (control class|channel|ctl-or-PN-number) as
 * well as the values for two or more channels. CTL14s, RPNs, and NRPNs can
 * be packed two channels to the slot; CTL7s, six channels; and CTL1s get all
 * 16 channels into one slot. The channel value used in the key is the lowest
 * channel stored in the slot. Open addressing is appropriate here because the
 * link fields in a chained approach would be at least 100% overhead, and also,
 * we don't delete (MIDICTL_RESET is the only event that logically deletes
 * things, and at the moment it does not remove anything from the table, but
 * zeroes the stored value). If wanted, the deletion algorithm for open
 * addressing could be used, with shrinking/rehashing when the load factor
 * drops below 3/8 (1/2 is the current threshold for expansion), and the
 * rehashing would relieve the fills-with-DELETED problem in most cases. But
 * for now the table never shrinks while the device is open.
 */

struct midictl_store {
        uint64_t *table;
        uint64_t key;
        uint32_t idx;
        uint32_t lgcapacity;
        uint32_t used;
        kcondvar_t cv;
        kmutex_t *lock;
        bool destroy;
};

#define INITIALLGCAPACITY 6 /* initial capacity 1<<6 */
#define IS_USED 1<<15
#define IS_CTL7 1<<14

#define CTL1SHIFT(chan) (23+((chan)<<1))
#define CTL7SHIFT(chan) (16+((chan)<<3))
#define CTLESHIFT(chan) (23+((chan)<<4))

#define        NEED_REHASH(s)        ((s)->used * 2 >= 1 << (s)->lgcapacity)

static uint_fast8_t const packing[] = {
        [CTL1 ] = 16, /* 16 * 2 bits ==> 32 bits, all chns in one bucket */
        [CTL7 ] =  6, /*  6 * 8 bits ==> 48 bits, 6 chns in one bucket */
        [CTL14] =  2, /*  2 *16 bits ==> 32 bits, 2 chns in one bucket */
        [RPN  ] =  2,
        [NRPN ] =  2
};

static uint32_t store_idx(uint32_t lgcapacity,
                          uint64_t *table,
                          uint64_t key, uint64_t mask);
static void store_rehash(midictl_store *s);
static void store_thread(void *);

int
midictl_open(midictl *mc)
{
        midictl_store *s;
        int error;

        if (mc->lock == NULL)
                panic("midictl_open: no lock");
        if (NULL == mc->notify)
                mc->notify = notify_no_one;
        s = kmem_zalloc(sizeof(*s), KM_SLEEP);
        s->lgcapacity = INITIALLGCAPACITY;
        s->table = kmem_zalloc(sizeof(*s->table)<<s->lgcapacity, KM_SLEEP);
        s->lock = mc->lock;
        cv_init(&s->cv, "midictlv");
        error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, store_thread, 
            s, NULL, "midictlt");
        if (error != 0) {
                printf("midictl: cannot create kthread, error = %d\n", error);
                cv_destroy(&s->cv);
                kmem_free(s->table, sizeof(*s->table)<<s->lgcapacity);
                kmem_free(s, sizeof(*s));
                return error;
        }
        mc->store = s;
        return 0;
}

void
midictl_close(midictl *mc)
{
        midictl_store *s;
        kmutex_t *lock;

        s = mc->store;
        lock = s->lock;

        mutex_enter(lock);
        s->destroy = true;
        cv_broadcast(&s->cv);
        mutex_exit(lock);
}

void
midictl_change(midictl *mc, uint_fast8_t chan, uint8_t *ctlval)
{
        class c;
        uint_fast16_t key, val;
        _Bool islsb, present;

        KASSERT(mutex_owned(mc->lock));
        KASSERT(!mc->store->destroy);
                
        switch ( ctlval[0] ) {
        /*
         * Channel mode messages:
         */
        case MIDI_CTRL_OMNI_OFF:
        case MIDI_CTRL_OMNI_ON:
        case MIDI_CTRL_POLY_OFF:
        case MIDI_CTRL_POLY_ON:
                if ( chan != mc->base_channel )
                        return; /* ignored - not on base channel */
                else
                        return; /* XXX ignored anyway - not implemented yet */
        case MIDI_CTRL_NOTES_OFF:
                mc->notify(mc->cookie, MIDICTL_NOTES_OFF, chan, 0);
                return;
        case MIDI_CTRL_LOCAL:
                mc->notify(mc->cookie, MIDICTL_LOCAL, chan, ctlval[1]);
                return;
        case MIDI_CTRL_SOUND_OFF:
                mc->notify(mc->cookie, MIDICTL_SOUND_OFF, chan, 0);
                return;
        case MIDI_CTRL_RESET:
                reset_all_controllers(mc, chan);
                return;
        /*
         * Control changes to be handled specially:
         */
        case MIDI_CTRL_RPN_LSB:
                mc-> rpn &= ~0x7f;
                mc-> rpn |=  PN_SET | (0x7f & ctlval[1]);
                mc->nrpn &= ~PN_SET;
                return;
        case MIDI_CTRL_RPN_MSB:
                mc-> rpn &= ~0x7fU<<7;
                mc-> rpn |=  PN_SET | (0x7f & ctlval[1])<<7;
                mc->nrpn &= ~PN_SET;
                return;
        case MIDI_CTRL_NRPN_LSB:
                mc->nrpn &= ~0x7f;
                mc->nrpn |=  PN_SET | (0x7f & ctlval[1]);
                mc-> rpn &= ~PN_SET;
                return;
        case MIDI_CTRL_NRPN_MSB:
                mc->nrpn &= ~0x7fU<<7;
                mc->nrpn |=  PN_SET | (0x7f & ctlval[1])<<7;
                mc-> rpn &= ~PN_SET;
                return;
        case MIDI_CTRL_DATA_ENTRY_LSB:
                islsb = 1;
                goto whichparm;
        case MIDI_CTRL_DATA_ENTRY_MSB:
                islsb = 0;
        whichparm:
                if ( 0 == ( (mc->rpn ^ mc->nrpn) & PN_SET ) )
                        return; /* exactly one must be current */
                if ( mc->rpn & PN_SET ) {
                        key = mc->rpn;
                        c = RPN;
                } else {
                        key = mc->nrpn;
                        c = NRPN;
                }
                key &= 0x3fff;
                if ( 0x3fff == key ) /* 'null' parm# to lock out changes */
                        return;
                enter14(mc, chan, c, key, islsb, ctlval[1]);
                return;
        case MIDI_CTRL_RPN_INCREMENT: /* XXX for later - these are a PITA to */
        case MIDI_CTRL_RPN_DECREMENT: /* get right - 'right' varies by param */
                        /* see http://www.midi.org/about-midi/rp18.shtml */
                return;
        }
        
        /*
         * Channel mode, RPN, and NRPN operations have been ruled out.
         * This is an ordinary control change.
         */
        
        key = ctlval[0];
        c = classify(&key, &islsb);
        
        switch ( c ) {
        case CTL14:
                enter14(mc, chan, c, key, islsb, ctlval[1]);
                return;
        case CTL7:
                present = store_locate(mc->store, c, chan, key);
                if ( !mc->accept_any_ctl_rpn ) {
                        if ( !present )
                                break;
                        val = store_extract(mc->store, c, chan, key);
                        if ( !(val&C7_SET) )
                                break;
                }
                store_update(mc->store, c, chan, key,
                    C7_SET | (0x7f & ctlval[1]));
                mc->notify(mc->cookie, MIDICTL_CTLR, chan, key);
                return;
        case CTL1:
                present = store_locate(mc->store, c, chan, key);
                if ( !mc->accept_any_ctl_rpn ) {
                        if ( !present )
                                break;
                        val = store_extract(mc->store, c, chan, key);
                        if ( !(val&C1_SET) )
                                break;
                }
                store_update(mc->store, c, chan, key,
                    C1_SET | (ctlval[1]>63));
                mc->notify(mc->cookie, MIDICTL_CTLR, chan, key);
                return;
        case RPN:
        case NRPN:
                return; /* won't see these - sop for gcc */
        }
}

uint_fast16_t
midictl_read(midictl *mc, uint_fast8_t chan, uint_fast8_t ctlr,
             uint_fast16_t dflt)
{
        uint_fast16_t key, val;
        class c;
        _Bool islsb, present;

        KASSERT(mutex_owned(mc->lock));
        KASSERT(!mc->store->destroy);
        
        key = ctlr;
        c = classify(&key, &islsb);
        switch ( c ) {
        case CTL1:
                present = store_locate(mc->store, c, chan, key);
                if ( !present ||
                    !(C1_SET&(val = store_extract(mc->store, c, chan, key))) ) {
                        val = C1_SET | (dflt > 63); /* convert to boolean */
                        store_update(mc->store, c, chan, key, val);
                }
                return (val & 1) ? 127 : 0;
        case CTL7:
                present = store_locate(mc->store, c, chan, key);
                if ( !present ||
                    !(C7_SET&(val = store_extract(mc->store, c, chan, key))) ) {
                        val = C7_SET | (dflt & 0x7f);
                        store_update(mc->store, c, chan, key, val);
                }
                return val & 0x7f;
        case CTL14:
                KASSERT(!islsb);
                return read14(mc, chan, c, key, dflt);
        case RPN:
        case NRPN:
                break; /* sop for gcc */
        }
        return 0; /* sop for gcc */
}

uint_fast16_t
midictl_rpn_read(midictl *mc, uint_fast8_t chan, uint_fast16_t ctlr,
                 uint_fast16_t dflt)
{

        KASSERT(mutex_owned(mc->lock));
        KASSERT(!mc->store->destroy);

        return read14(mc, chan, RPN, ctlr, dflt);
}

uint_fast16_t
midictl_nrpn_read(midictl *mc, uint_fast8_t chan, uint_fast16_t ctlr,
                  uint_fast16_t dflt)
{

        KASSERT(mutex_owned(mc->lock));
        KASSERT(!mc->store->destroy);

        return read14(mc, chan, NRPN, ctlr, dflt);
}

static void
reset_all_controllers(midictl *mc, uint_fast8_t chan)
{
        uint_fast16_t ctlr, key;
        class c;
        _Bool islsb, present;

        KASSERT(mutex_owned(mc->lock));
        
        for ( ctlr = 0 ; ; ++ ctlr ) {
                switch ( ctlr ) {
                /*
                 * exempt by http://www.midi.org/about-midi/rp15.shtml:
                 */
                case MIDI_CTRL_BANK_SELECT_MSB:                /* 0 */
                case MIDI_CTRL_CHANNEL_VOLUME_MSB:        /* 7 */
                case MIDI_CTRL_PAN_MSB:                        /* 10 */
                        continue;
                case MIDI_CTRL_BANK_SELECT_LSB:                /* 32 */
                        ctlr += 31; /* skip all these LSBs anyway */
                        continue;
                case MIDI_CTRL_SOUND_VARIATION:                /* 70 */
                        ctlr += 9; /* skip all Sound Controllers */
                        continue;
                case MIDI_CTRL_EFFECT_DEPTH_1:                /* 91 */
                        goto loop_exit; /* nothing more gets reset */
                /*
                 * exempt for our own personal reasons:
                 */
                case MIDI_CTRL_DATA_ENTRY_MSB:                /* 6 */
                        continue; /* doesn't go to the store */
                }
                
                key = ctlr;
                c = classify(&key, &islsb);
                
                present = store_locate(mc->store, c, chan, key);
                if ( !present )
                        continue;
                store_update(mc->store, c, chan, key, 0); /* no C*SET */
        }
loop_exit:
        mc->notify(mc->cookie, MIDICTL_RESET, chan, 0);
}

static void
enter14(midictl *mc, uint_fast8_t chan, class c, uint_fast16_t key,
        _Bool islsb, uint8_t val)
{
        uint16_t stval;
        _Bool present;
        
        KASSERT(mutex_owned(mc->lock));

        present = store_locate(mc->store, c, chan, key);
        stval = (present) ? store_extract(mc->store, c, chan, key) : 0;
        if ( !( stval & (C14MSET|C14LSET) ) ) {
                if ( !((NRPN==c)? mc->accept_any_nrpn: mc->accept_any_ctl_rpn) )
                        return;
        }
        if ( islsb )
                stval = C14LSET | val | ( stval & ~0x7f );
        else
                stval = C14MSET | ( val << 7 ) | ( stval & ~0x3f80 );
        store_update(mc->store, c, chan, key, stval);
        mc->notify(mc->cookie, CTL14 == c ? MIDICTL_CTLR
                             : RPN   == c ? MIDICTL_RPN
                             : MIDICTL_NRPN, chan, key);
}

static uint_fast16_t
read14(midictl *mc, uint_fast8_t chan, class c, uint_fast16_t key,
       uint_fast16_t dflt)
{
        uint16_t val;
        _Bool present;

        KASSERT(mutex_owned(mc->lock));

        present = store_locate(mc->store, c, chan, key);
        if ( !present )
                goto neitherset;

        val = store_extract(mc->store, c, chan, key);
        switch ( val & (C14MSET|C14LSET) ) {
        case C14MSET|C14LSET:
                return val & 0x3fff;
        case C14MSET:
                val = C14LSET | (val & ~0x7f) | (dflt & 0x7f);
                break;
        case C14LSET:
                val = C14MSET | (val & ~0x3f8) | (dflt & 0x3f8);
                break;
neitherset:
        case 0:
                val = C14MSET|C14LSET | (dflt & 0x3fff);
        }
        store_update(mc->store, c, chan, key, val);
        return val & 0x3fff;
}

/*
 * Determine the controller class; ranges based on
 * http://www.midi.org/about-midi/table3.shtml dated 1995/1999/2002
 * and viewed 2 June 2006.
 */
static class
classify(uint_fast16_t *key, _Bool *islsb) {
        if ( *key < 32 ) {
                *islsb = 0;
                return CTL14;
        } else if ( *key < 64 ) {
                *islsb = 1;
                *key -= 32;
                return CTL14;
        } else if ( *key < 70 ) {
                return CTL1;
        }                  /* 70-84 defined, 85-90 undef'd, 91-95 def'd */
        return CTL7;        /* 96-101,120- handled above, 102-119 all undef'd */
                          /* treat them all as CTL7 */
}

static void
notify_no_one(void *cookie, midictl_evt evt,
    uint_fast8_t chan, uint_fast16_t k)
{
}

#undef PN_SET
#undef C14MSET
#undef C14LSET
#undef C7_SET
#undef C1_SET

static void
store_thread(void *arg)
{
        midictl_store *s;

        s = arg;

        mutex_enter(s->lock);
        for (;;) {
                if (s->destroy) {
                        mutex_exit(s->lock);
                        cv_destroy(&s->cv);
                        kmem_free(s->table, sizeof(*s->table)<<s->lgcapacity);
                        kmem_free(s, sizeof(*s));
                        kthread_exit(0);
                } else if (NEED_REHASH(s)) {
                        store_rehash(s);
                } else {
                        cv_wait(&s->cv, s->lock);
                }
        }
}

static _Bool
store_locate(midictl_store *s, class c, uint_fast8_t chan, uint_fast16_t key)
{
        uint64_t mask;

        KASSERT(mutex_owned(s->lock));
        
        if ( s->used >= 1 << s->lgcapacity )
                panic("%s: repeated attempts to expand table failed", __func__);

        chan = packing[c] * (chan/packing[c]);

        if ( CTL7 == c ) {        /* only 16 bits here (key's only 7) */
                s->key = IS_USED | IS_CTL7 | (chan << 7) | key;
                mask = 0xffff;
        } else {                /* use 23 bits (key could be 14) */
                s->key = (c << 20) | (chan << 16) | IS_USED | key;
                mask = 0x7fffff;
        }
        
        s->idx = store_idx(s->lgcapacity, s->table, s->key, mask);
        
        if ( !(s->table[s->idx] & IS_USED) )
                return 0;

        return 1;
}

static uint16_t
store_extract(midictl_store *s, class c, uint_fast8_t chan,
    uint_fast16_t key)
{

        KASSERT(mutex_owned(s->lock));

        chan %= packing[c];
        switch ( c ) {
        case CTL1:
                return 3 & (s->table[s->idx]>>CTL1SHIFT(chan));
        case CTL7:
                return 0xff & (s->table[s->idx]>>CTL7SHIFT(chan));
        case CTL14:
        case RPN:
        case NRPN:
                break;
        }
        return 0xffff & (s->table[s->idx]>>CTLESHIFT(chan));
}

static void
store_update(midictl_store *s, class c, uint_fast8_t chan,
    uint_fast16_t key, uint16_t value)
{
        uint64_t orig;

        KASSERT(mutex_owned(s->lock));
        
        orig = s->table[s->idx];
        if ( !(orig & IS_USED) ) {
                orig = s->key;
                ++ s->used;
        }
                
        chan %= packing[c];
        
        switch ( c ) {
        case CTL1:
                orig &= ~(((uint64_t)3)<<CTL1SHIFT(chan));
                orig |= ((uint64_t)(3 & value)) << CTL1SHIFT(chan);
                break;
        case CTL7:
                orig &= ~(((uint64_t)0xff)<<CTL7SHIFT(chan));
                orig |= ((uint64_t)(0xff & value)) << CTL7SHIFT(chan);
                break;
        case CTL14:
        case RPN:
        case NRPN:
                orig &= ~(((uint64_t)0xffff)<<CTLESHIFT(chan));
                orig |= ((uint64_t)value) << CTLESHIFT(chan);
                break;
        }
        
        s->table[s->idx] = orig;
        if (NEED_REHASH(s))
                cv_broadcast(&s->cv);
}

static uint32_t
store_idx(uint32_t lgcapacity, uint64_t *table,
          uint64_t key, uint64_t mask)
{
        uint32_t val;
        uint32_t k, h1, h2;
        int32_t idx;
        
        k = key;
        
        h1 = ((k * 0x61c88646) >> (32-lgcapacity)) & ((1<<lgcapacity) - 1);
        h2 = ((k * 0x9e3779b9) >> (32-lgcapacity)) & ((1<<lgcapacity) - 1);        
        h2 |= 1;

        for ( idx = h1 ;; idx -= h2 ) {
                if ( idx < 0 )
                        idx += 1<<lgcapacity;
                val = (uint32_t)(table[idx] & mask);
                if ( val == k )
                        break;
                if ( !(val & IS_USED) )
                        break; 
        }
        
        return idx;
}

static void
store_rehash(midictl_store *s)
{
        uint64_t *newtbl, *oldtbl, mask;
        uint32_t oldlgcap, newlgcap, oidx, nidx;

        KASSERT(mutex_owned(s->lock));

        oldlgcap = s->lgcapacity;
        newlgcap = oldlgcap + s->lgcapacity;

        mutex_exit(s->lock);
        newtbl = kmem_zalloc(sizeof(*newtbl) << newlgcap, KM_SLEEP);
        mutex_enter(s->lock);

        /*
         * If s->lgcapacity is changed from what we saved int oldlgcap
         * then someone else has already done this for us.
         * XXXMRG but only function changes s->lgcapacity from its
         * initial value, and it is called singled threaded from the
         * main store_thread(), so this code seems dead to me.
         */
        if (oldlgcap != s->lgcapacity) {
                KASSERT(FALSE);
                mutex_exit(s->lock);
                kmem_free(newtbl, sizeof(*newtbl) << newlgcap);
                mutex_enter(s->lock);
                return;
        }
                        
        for (oidx = 1 << s->lgcapacity ; oidx-- > 0 ; ) {
                if (!(s->table[oidx] & IS_USED))
                        continue;
                if (s->table[oidx] & IS_CTL7)
                        mask = 0xffff;
                else
                        mask = 0x3fffff;
                nidx = store_idx(newlgcap, newtbl,
                    s->table[oidx] & mask, mask);
                newtbl[nidx] = s->table[oidx];
        }
        oldtbl = s->table;
        s->table = newtbl;
        s->lgcapacity = newlgcap;
        
        mutex_exit(s->lock);
        kmem_free(oldtbl, sizeof(*oldtbl) << oldlgcap);
        mutex_enter(s->lock);
}



































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 

    1 



















































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
/*        $NetBSD: intr.c,v 1.160 2022/03/12 15:50:45 riastradh Exp $        */

/*
 * Copyright (c) 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran, and by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright 2002 (c) Wasabi Systems, Inc.
 * All rights reserved.
 *
 * Written by Frank van der Linden for Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed for the NetBSD Project by
 *      Wasabi Systems, Inc.
 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
 *    or promote products derived from this software without specific prior
 *    written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1991 The Regents of the University of California.
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * William Jolitz.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)isa.c        7.2 (Berkeley) 5/13/91
 */

/*-
 * Copyright (c) 1993, 1994 Charles Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)isa.c        7.2 (Berkeley) 5/13/91
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.160 2022/03/12 15:50:45 riastradh Exp $");

#include "opt_intrdebug.h"
#include "opt_multiprocessor.h"
#include "opt_acpi.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <sys/device.h>
#include <sys/kmem.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/intr.h>
#include <sys/cpu.h>
#include <sys/atomic.h>
#include <sys/xcall.h>
#include <sys/interrupt.h>
#include <sys/reboot.h> /* for AB_VERBOSE */

#include <sys/kauth.h>
#include <sys/conf.h>

#include <uvm/uvm_extern.h>

#include <machine/i8259.h>
#include <machine/pio.h>

#include "ioapic.h"
#include "lapic.h"
#include "pci.h"
#include "acpica.h"
#ifndef XENPV
#include "hyperv.h"
#if NHYPERV > 0
#include <dev/hyperv/hypervvar.h>

extern void Xresume_hyperv_hypercall(void);
extern void Xrecurse_hyperv_hypercall(void);
#endif
#endif

#if NIOAPIC > 0 || NACPICA > 0
#include <machine/i82093var.h>
#include <machine/mpbiosvar.h>
#include <machine/mpacpi.h>
#endif

#if NLAPIC > 0
#include <machine/i82489var.h>
#endif

#if NPCI > 0
#include <dev/pci/ppbreg.h>
#endif

#include <x86/pci/msipic.h>
#include <x86/pci/pci_msi_machdep.h>

#if NPCI == 0 || !defined(__HAVE_PCI_MSI_MSIX)
#define msipic_is_msi_pic(PIC)        (false)
#endif

#ifdef DDB
#include <ddb/db_output.h>
#endif

#ifdef INTRDEBUG
#define DPRINTF(msg) printf msg
#else
#define DPRINTF(msg)
#endif

static SIMPLEQ_HEAD(, intrsource) io_interrupt_sources =
        SIMPLEQ_HEAD_INITIALIZER(io_interrupt_sources);

static kmutex_t intr_distribute_lock;

static int intr_allocate_slot_cpu(struct cpu_info *, struct pic *, int, int *,
                                  struct intrsource *);
static int __noinline intr_allocate_slot(struct pic *, int, int,
                                         struct cpu_info **, int *, int *,
                                         struct intrsource *);

static void intr_source_free(struct cpu_info *, int, struct pic *, int);

static void intr_establish_xcall(void *, void *);
static void intr_disestablish_xcall(void *, void *);

static const char *legacy_intr_string(int, char *, size_t, struct pic *);

static const char *xen_intr_string(int, char *, size_t, struct pic *);

#if defined(INTRSTACKSIZE)
static inline bool redzone_const_or_false(bool);
static inline int redzone_const_or_zero(int);
#endif

static void intr_redistribute_xc_t(void *, void *);
static void intr_redistribute_xc_s1(void *, void *);
static void intr_redistribute_xc_s2(void *, void *);
static bool intr_redistribute(struct cpu_info *);
static struct intrsource *intr_get_io_intrsource(const char *);
static void intr_free_io_intrsource_direct(struct intrsource *);
static int intr_num_handlers(struct intrsource *);
static int intr_find_unused_slot(struct cpu_info *, int *);
static void intr_activate_xcall(void *, void *);
static void intr_deactivate_xcall(void *, void *);
static void intr_get_affinity(struct intrsource *, kcpuset_t *);
static int intr_set_affinity(struct intrsource *, const kcpuset_t *);

/*
 * Fill in default interrupt table (in case of spurious interrupt
 * during configuration of kernel), setup interrupt control unit
 */
void
intr_default_setup(void)
{
        struct idt_vec *iv = &(cpu_info_primary.ci_idtvec);
        int i;

        /* icu vectors */
        for (i = 0; i < NUM_LEGACY_IRQS; i++) {
                idt_vec_reserve(iv, ICU_OFFSET + i);
                idt_vec_set(iv, ICU_OFFSET + i, legacy_stubs[i].ist_entry);
        }

        /*
         * Eventually might want to check if it's actually there.
         */
        i8259_default_setup();

        mutex_init(&intr_distribute_lock, MUTEX_DEFAULT, IPL_NONE);
}

/*
 * Handle a NMI, possibly a machine check.
 * return true to panic system, false to ignore.
 */
void
x86_nmi(void)
{

        log(LOG_CRIT, "NMI port 61 %x, port 70 %x\n", inb(0x61), inb(0x70));
}

/*
 * Create an interrupt id such as "ioapic0 pin 9". This interrupt id is used
 * by MI code and intrctl(8).
 */
const char *
intr_create_intrid(int legacy_irq, struct pic *pic, int pin, char *buf,
    size_t len)
{
        int ih = 0;

#if NPCI > 0
#if defined(__HAVE_PCI_MSI_MSIX)
        if ((pic->pic_type == PIC_MSI) || (pic->pic_type == PIC_MSIX)) {
                uint64_t pih;
                int dev, vec;

                dev = msipic_get_devid(pic);
                vec = pin;
                pih = __SHIFTIN((uint64_t)dev, MSI_INT_DEV_MASK)
                        | __SHIFTIN((uint64_t)vec, MSI_INT_VEC_MASK)
                        | APIC_INT_VIA_MSI;
                if (pic->pic_type == PIC_MSI)
                        MSI_INT_MAKE_MSI(pih);
                else if (pic->pic_type == PIC_MSIX)
                        MSI_INT_MAKE_MSIX(pih);

                return x86_pci_msi_string(NULL, pih, buf, len);
        }
#endif /* __HAVE_PCI_MSI_MSIX */
#endif

        if (pic->pic_type == PIC_XEN) {
                ih = pin;        /* Port == pin */
                return xen_intr_string(pin, buf, len, pic);
        }

        /*
         * If the device is pci, "legacy_irq" is always -1. Least 8 bit of "ih"
         * is only used in intr_string() to show the irq number.
         * If the device is "legacy"(such as floppy), it should not use
         * intr_string().
         */
        if (pic->pic_type == PIC_I8259) {
                ih = legacy_irq;
                return legacy_intr_string(ih, buf, len, pic);
        }

#if NIOAPIC > 0 || NACPICA > 0
        ih = ((pic->pic_apicid << APIC_INT_APIC_SHIFT) & APIC_INT_APIC_MASK)
            | ((pin << APIC_INT_PIN_SHIFT) & APIC_INT_PIN_MASK);
        if (pic->pic_type == PIC_IOAPIC) {
                ih |= APIC_INT_VIA_APIC;
        }
        ih |= pin;
        return intr_string(ih, buf, len);
#endif

        return NULL; /* No pic found! */
}

/*
 * Find intrsource from io_interrupt_sources list.
 */
static struct intrsource *
intr_get_io_intrsource(const char *intrid)
{
        struct intrsource *isp;

        KASSERT(mutex_owned(&cpu_lock));

        SIMPLEQ_FOREACH(isp, &io_interrupt_sources, is_list) {
                KASSERT(isp->is_intrid != NULL);
                if (strncmp(intrid, isp->is_intrid, INTRIDBUF - 1) == 0)
                        return isp;
        }
        return NULL;
}

/*
 * Allocate intrsource and add to io_interrupt_sources list.
 */
struct intrsource *
intr_allocate_io_intrsource(const char *intrid)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;
        struct intrsource *isp;
        struct percpu_evcnt *pep;

        KASSERT(mutex_owned(&cpu_lock));

        if (intrid == NULL)
                return NULL;

        isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
        pep = kmem_zalloc(sizeof(*pep) * ncpu, KM_SLEEP);
        isp->is_saved_evcnt = pep;
        for (CPU_INFO_FOREACH(cii, ci)) {
                pep->cpuid = ci->ci_cpuid;
                pep++;
        }
        strlcpy(isp->is_intrid, intrid, sizeof(isp->is_intrid));

        SIMPLEQ_INSERT_TAIL(&io_interrupt_sources, isp, is_list);

        return isp;
}

/*
 * Remove from io_interrupt_sources list and free by the intrsource pointer.
 */
static void
intr_free_io_intrsource_direct(struct intrsource *isp)
{
        KASSERT(mutex_owned(&cpu_lock));

        SIMPLEQ_REMOVE(&io_interrupt_sources, isp, intrsource, is_list);

        /* Is this interrupt established? */
        if (isp->is_evname[0] != '\0') {
                evcnt_detach(&isp->is_evcnt);
                isp->is_evname[0] = '\0';
        }

        kmem_free(isp->is_saved_evcnt,
            sizeof(*(isp->is_saved_evcnt)) * ncpu);

        kmem_free(isp, sizeof(*isp));
}

/*
 * Remove from io_interrupt_sources list and free by the interrupt id.
 * This function can be used by MI code.
 */
void
intr_free_io_intrsource(const char *intrid)
{
        struct intrsource *isp;

        KASSERT(mutex_owned(&cpu_lock));

        if (intrid == NULL)
                return;

        if ((isp = intr_get_io_intrsource(intrid)) == NULL) {
                return;
        }

        /* If the interrupt uses shared IRQ, don't free yet. */
        if (isp->is_handlers != NULL) {
                return;
        }

        intr_free_io_intrsource_direct(isp);
}

static int
intr_allocate_slot_cpu(struct cpu_info *ci, struct pic *pic, int pin,
                       int *index, struct intrsource *chained)
{
        int slot, i;
        struct intrsource *isp;

        KASSERT(mutex_owned(&cpu_lock));

        if (pic == &i8259_pic) {
                KASSERT(CPU_IS_PRIMARY(ci));
                slot = pin;
        } else {
                int start = 0;
                int max = MAX_INTR_SOURCES;
                slot = -1;

                /* avoid reserved slots for legacy interrupts. */
                if (CPU_IS_PRIMARY(ci) && msipic_is_msi_pic(pic))
                        start = NUM_LEGACY_IRQS;
                /* don't step over Xen's slots */
                if (vm_guest == VM_GUEST_XENPVH)
                        max = SIR_XENIPL_VM;
                /*
                 * intr_allocate_slot has checked for an existing mapping.
                 * Now look for a free slot.
                 */
                for (i = start; i < max ; i++) {
                        if (ci->ci_isources[i] == NULL) {
                                slot = i;
                                break;
                        }
                }
                if (slot == -1) {
                        return EBUSY;
                }
        }

        isp = ci->ci_isources[slot];
        if (isp == NULL) {
                const char *via;

                isp = chained;
                KASSERT(isp != NULL);
                if (pic->pic_type == PIC_MSI || pic->pic_type == PIC_MSIX)
                        via = "vec";
                else
                        via = "pin";
                snprintf(isp->is_evname, sizeof (isp->is_evname),
                    "%s %d", via, pin);
                evcnt_attach_dynamic(&isp->is_evcnt, EVCNT_TYPE_INTR, NULL,
                    pic->pic_name, isp->is_evname);
                isp->is_active_cpu = ci->ci_cpuid;
                ci->ci_isources[slot] = isp;
        }

        *index = slot;
        return 0;
}

/*
 * A simple round-robin allocator to assign interrupts to CPUs.
 */
static int __noinline
intr_allocate_slot(struct pic *pic, int pin, int level,
                   struct cpu_info **cip, int *index, int *idt_slot,
                   struct intrsource *chained)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci, *lci;
        struct intrsource *isp;
        int slot = 0, idtvec, error;

        KASSERT(mutex_owned(&cpu_lock));

        /* First check if this pin is already used by an interrupt vector. */
        for (CPU_INFO_FOREACH(cii, ci)) {
                for (slot = 0 ; slot < MAX_INTR_SOURCES ; slot++) {
                        if ((isp = ci->ci_isources[slot]) == NULL) {
                                continue;
                        }
                        if (isp->is_pic == pic &&
                            pin != -1 && isp->is_pin == pin) {
                                *idt_slot = isp->is_idtvec;
                                *index = slot;
                                *cip = ci;
                                return 0;
                        }
                }
        }

        /*
         * The pic/pin combination doesn't have an existing mapping.
         * Find a slot for a new interrupt source.  For the i8259 case,
         * we always use reserved slots of the primary CPU.  Otherwise,
         * we make an attempt to balance the interrupt load.
         *
         * PIC and APIC usage are essentially exclusive, so the reservation
         * of the ISA slots is ignored when assigning IOAPIC slots.
         */
        if (pic == &i8259_pic) {
                /*
                 * Must be directed to BP.
                 */
                ci = &cpu_info_primary;
                error = intr_allocate_slot_cpu(ci, pic, pin, &slot, chained);
        } else {
                /*
                 * Find least loaded AP/BP and try to allocate there.
                 */
                ci = NULL;
                for (CPU_INFO_FOREACH(cii, lci)) {
                        if ((lci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) {
                                continue;
                        }
#if 0
                        if (ci == NULL ||
                            ci->ci_nintrhand > lci->ci_nintrhand) {
                                ci = lci;
                        }
#else
                        ci = &cpu_info_primary;
#endif
                }
                KASSERT(ci != NULL);
                error = intr_allocate_slot_cpu(ci, pic, pin, &slot, chained);

                /*
                 * If that did not work, allocate anywhere.
                 */
                if (error != 0) {
                        for (CPU_INFO_FOREACH(cii, ci)) {
                                if ((ci->ci_schedstate.spc_flags &
                                    SPCF_NOINTR) != 0) {
                                        continue;
                                }
                                error = intr_allocate_slot_cpu(ci, pic,
                                    pin, &slot, chained);
                                if (error == 0) {
                                        break;
                                }
                        }
                }
        }
        if (error != 0) {
                return error;
        }
        KASSERT(ci != NULL);

        /*
         * Now allocate an IDT vector.
         * For the 8259 these are reserved up front.
         */
        if (pic == &i8259_pic) {
                idtvec = ICU_OFFSET + pin;
        } else {
                /*
                 * TODO to support MSI (not MSI-X) multiple vectors
                 *
                 * PCI Local Bus Specification Revision 3.0 says the devices
                 * which use MSI multiple vectors increment the low order bits
                 * of MSI message data.
                 * On the other hand, Intel SDM "10.11.2 Message Data Register
                 * Format" says the 7:0 bits of MSI message data mean Interrupt
                 * Descriptor Table(IDT) vector.
                 * As the result of these two documents, the IDT vectors which
                 * are used by a device using MSI multiple vectors must be
                 * continuous.
                 */
                struct idt_vec *iv;

                iv = idt_vec_ref(&ci->ci_idtvec);
                idtvec = idt_vec_alloc(iv, APIC_LEVEL(level), IDT_INTR_HIGH);
        }
        if (idtvec < 0) {
                evcnt_detach(&ci->ci_isources[slot]->is_evcnt);
                ci->ci_isources[slot]->is_evname[0] = '\0';
                ci->ci_isources[slot] = NULL;
                return EBUSY;
        }
        ci->ci_isources[slot]->is_idtvec = idtvec;
        *idt_slot = idtvec;
        *index = slot;
        *cip = ci;
        return 0;
}

static void
intr_source_free(struct cpu_info *ci, int slot, struct pic *pic, int idtvec)
{
        struct intrsource *isp;
        struct idt_vec *iv;

        isp = ci->ci_isources[slot];
        iv = idt_vec_ref(&ci->ci_idtvec);

        if (isp->is_handlers != NULL)
                return;
        ci->ci_isources[slot] = NULL;
        if (pic != &i8259_pic)
                idt_vec_free(iv, idtvec);

        isp->is_recurse = NULL;
        isp->is_resume = NULL;
}

#ifdef MULTIPROCESSOR
static int intr_biglock_wrapper(void *);

/*
 * intr_biglock_wrapper: grab biglock and call a real interrupt handler.
 */

static int
intr_biglock_wrapper(void *vp)
{
        struct intrhand *ih = vp;
        int locks;
        int ret;

        KERNEL_LOCK(1, NULL);

        locks = curcpu()->ci_biglock_count;
        ret = (*ih->ih_realfun)(ih->ih_realarg);
        KASSERTMSG(locks == curcpu()->ci_biglock_count,
            "%s @ %p slipped locks %d -> %d",
            ih->ih_xname, ih->ih_realfun, locks, curcpu()->ci_biglock_count);

        KERNEL_UNLOCK_ONE(NULL);

        return ret;
}
#endif /* MULTIPROCESSOR */

/*
 * Append device name to intrsource. If device A and device B share IRQ number,
 * the device name of the interrupt id is "device A, device B".
 */
static void
intr_append_intrsource_xname(struct intrsource *isp, const char *xname)
{

        if (isp->is_xname[0] != '\0')
                strlcat(isp->is_xname, ", ", sizeof(isp->is_xname));
        strlcat(isp->is_xname, xname, sizeof(isp->is_xname));
}

/*
 * Called on bound CPU to handle calling pic_hwunmask from contexts
 * that are not already running on the bound CPU.
 *
 * => caller (on initiating CPU) holds cpu_lock on our behalf
 * => arg1: struct intrhand *ih
 */
static void
intr_hwunmask_xcall(void *arg1, void *arg2)
{
        struct intrhand * const ih = arg1;
        struct cpu_info * const ci = ih->ih_cpu;

        KASSERT(ci == curcpu() || !mp_online);

        const u_long psl = x86_read_psl();
        x86_disable_intr();

        struct intrsource * const source = ci->ci_isources[ih->ih_slot];
        struct pic * const pic = source->is_pic;

        if (source->is_mask_count == 0) {
                (*pic->pic_hwunmask)(pic, ih->ih_pin);
        }

        x86_write_psl(psl);
}

/*
 * Handle per-CPU component of interrupt establish.
 *
 * => caller (on initiating CPU) holds cpu_lock on our behalf
 * => arg1: struct intrhand *ih
 * => arg2: int idt_vec
 */
static void
intr_establish_xcall(void *arg1, void *arg2)
{
        struct idt_vec *iv;
        struct intrsource *source;
        struct intrstub *stubp;
        struct intrhand *ih;
        struct cpu_info *ci;
        int idt_vec;
        u_long psl;

        ih = arg1;

        KASSERT(ih->ih_cpu == curcpu() || !mp_online);

        ci = ih->ih_cpu;
        source = ci->ci_isources[ih->ih_slot];
        idt_vec = (int)(intptr_t)arg2;
        iv = idt_vec_ref(&ci->ci_idtvec);

        /* Disable interrupts locally. */
        psl = x86_read_psl();
        x86_disable_intr();

        /* Link in the handler and re-calculate masks. */
        *(ih->ih_prevp) = ih;
        x86_intr_calculatemasks(ci);

        /* Hook in new IDT vector and SPL state. */
        if (source->is_resume == NULL || source->is_idtvec != idt_vec) {
                if (source->is_idtvec != 0 && source->is_idtvec != idt_vec)
                        idt_vec_free(iv, source->is_idtvec);
                source->is_idtvec = idt_vec;
                if (source->is_type == IST_LEVEL) {
                        stubp = &source->is_pic->pic_level_stubs[ih->ih_slot];
                } else {
                        stubp = &source->is_pic->pic_edge_stubs[ih->ih_slot];
                }
                source->is_resume = stubp->ist_resume;
                source->is_recurse = stubp->ist_recurse;
                idt_vec_set(iv, idt_vec, stubp->ist_entry);
        }

        /* Re-enable interrupts locally. */
        x86_write_psl(psl);
}

void *
intr_establish_xname(int legacy_irq, struct pic *pic, int pin, int type,
                     int level, int (*handler)(void *), void *arg,
                     bool known_mpsafe, const char *xname)
{
        struct intrhand **p, *q, *ih;
        struct cpu_info *ci;
        int slot, error, idt_vec;
        struct intrsource *chained, *source;
#ifdef MULTIPROCESSOR
        bool mpsafe = (known_mpsafe || level != IPL_VM);
#endif /* MULTIPROCESSOR */
        uint64_t where;
        const char *intrstr;
        char intrstr_buf[INTRIDBUF];

        KASSERTMSG((legacy_irq == -1 || (0 <= legacy_irq && legacy_irq < 16)),
            "bad legacy IRQ value: %d", legacy_irq);
        KASSERTMSG((legacy_irq != -1 || pic != &i8259_pic),
            "non-legacy IRQ on i8259");

        ih = kmem_alloc(sizeof(*ih), KM_SLEEP);
        intrstr = intr_create_intrid(legacy_irq, pic, pin, intrstr_buf,
            sizeof(intrstr_buf));
        KASSERT(intrstr != NULL);

        mutex_enter(&cpu_lock);

        /* allocate intrsource pool, if not yet. */
        chained = intr_get_io_intrsource(intrstr);
        if (chained == NULL) {
                if (msipic_is_msi_pic(pic)) {
                        mutex_exit(&cpu_lock);
                        kmem_free(ih, sizeof(*ih));
                        printf("%s: %s has no intrsource\n", __func__, intrstr);
                        return NULL;
                }
                chained = intr_allocate_io_intrsource(intrstr);
                if (chained == NULL) {
                        mutex_exit(&cpu_lock);
                        kmem_free(ih, sizeof(*ih));
                        printf("%s: can't allocate io_intersource\n", __func__);
                        return NULL;
                }
        }

        error = intr_allocate_slot(pic, pin, level, &ci, &slot, &idt_vec,
            chained);
        if (error != 0) {
                intr_free_io_intrsource_direct(chained);
                mutex_exit(&cpu_lock);
                kmem_free(ih, sizeof(*ih));
                printf("failed to allocate interrupt slot for PIC %s pin %d\n",
                    pic->pic_name, pin);
                return NULL;
        }

        source = ci->ci_isources[slot];

        if (source->is_handlers != NULL &&
            source->is_pic->pic_type != pic->pic_type) {
                intr_free_io_intrsource_direct(chained);
                mutex_exit(&cpu_lock);
                kmem_free(ih, sizeof(*ih));
                printf("%s: can't share intr source between "
                       "different PIC types (legacy_irq %d pin %d slot %d)\n",
                    __func__, legacy_irq, pin, slot);
                return NULL;
        }

        source->is_pin = pin;
        source->is_pic = pic;
        intr_append_intrsource_xname(source, xname);
        switch (source->is_type) {
        case IST_NONE:
                source->is_type = type;
                break;
        case IST_EDGE:
        case IST_LEVEL:
                if (source->is_type == type)
                        break;
                /* FALLTHROUGH */
        case IST_PULSE:
                if (type != IST_NONE) {
                        intr_source_free(ci, slot, pic, idt_vec);
                        intr_free_io_intrsource_direct(chained);
                        mutex_exit(&cpu_lock);
                        kmem_free(ih, sizeof(*ih));
                        printf("%s: pic %s pin %d: can't share "
                               "type %d with %d\n",
                                __func__, pic->pic_name, pin,
                                source->is_type, type);
                        return NULL;
                }
                break;
        default:
                panic("%s: bad intr type %d for pic %s pin %d\n",
                    __func__, source->is_type, pic->pic_name, pin);
                /* NOTREACHED */
        }

        /*
         * If the establishing interrupt uses shared IRQ, the interrupt uses
         * "ci->ci_isources[slot]" instead of allocated by the establishing
         * device's pci_intr_alloc() or this function.
         */
        if (source->is_handlers != NULL) {
                struct intrsource *isp, *nisp;

                SIMPLEQ_FOREACH_SAFE(isp, &io_interrupt_sources,
                    is_list, nisp) {
                        if (strncmp(intrstr, isp->is_intrid, INTRIDBUF - 1) == 0
                            && isp->is_handlers == NULL)
                                intr_free_io_intrsource_direct(isp);
                }
        }

        /*
         * We're now committed.  Mask the interrupt in hardware and
         * count it for load distribution.
         */
        (*pic->pic_hwmask)(pic, pin);
        (ci->ci_nintrhand)++;

        /*
         * Figure out where to put the handler.
         * This is O(N^2), but we want to preserve the order, and N is
         * generally small.
         */
        for (p = &ci->ci_isources[slot]->is_handlers;
             (q = *p) != NULL && q->ih_level > level;
             p = &q->ih_next) {
                /* nothing */;
        }

        ih->ih_pic = pic;
        ih->ih_fun = ih->ih_realfun = handler;
        ih->ih_arg = ih->ih_realarg = arg;
        ih->ih_prevp = p;
        ih->ih_next = *p;
        ih->ih_level = level;
        ih->ih_pin = pin;
        ih->ih_cpu = ci;
        ih->ih_slot = slot;
        strlcpy(ih->ih_xname, xname, sizeof(ih->ih_xname));
#ifdef MULTIPROCESSOR
        if (!mpsafe) {
                ih->ih_fun = intr_biglock_wrapper;
                ih->ih_arg = ih;
        }
#endif /* MULTIPROCESSOR */

        /*
         * Call out to the remote CPU to update its interrupt state.
         * Only make RPCs if the APs are up and running.
         */
        if (ci == curcpu() || !mp_online) {
                intr_establish_xcall(ih, (void *)(intptr_t)idt_vec);
        } else {
                where = xc_unicast(0, intr_establish_xcall, ih,
                    (void *)(intptr_t)idt_vec, ci);
                xc_wait(where);
        }

        /* All set up, so add a route for the interrupt and unmask it. */
        (*pic->pic_addroute)(pic, ci, pin, idt_vec, type);
        if (ci == curcpu() || !mp_online) {
                intr_hwunmask_xcall(ih, NULL);
        } else {
                where = xc_unicast(0, intr_hwunmask_xcall, ih, NULL, ci);
                xc_wait(where);
        }
        mutex_exit(&cpu_lock);

        if (bootverbose || cpu_index(ci) != 0)
                aprint_verbose("allocated pic %s type %s pin %d level %d to "
                    "%s slot %d idt entry %d\n",
                    pic->pic_name, type == IST_EDGE ? "edge" : "level", pin,
                    level, device_xname(ci->ci_dev), slot, idt_vec);

        return ih;
}

void *
intr_establish(int legacy_irq, struct pic *pic, int pin, int type, int level,
               int (*handler)(void *), void *arg, bool known_mpsafe)
{

        return intr_establish_xname(legacy_irq, pic, pin, type,
            level, handler, arg, known_mpsafe, "unknown");
}

/*
 * Called on bound CPU to handle intr_mask() / intr_unmask().
 *
 * => caller (on initiating CPU) holds cpu_lock on our behalf
 * => arg1: struct intrhand *ih
 * => arg2: true -> mask, false -> unmask.
 */
static void
intr_mask_xcall(void *arg1, void *arg2)
{
        struct intrhand * const ih = arg1;
        const uintptr_t mask = (uintptr_t)arg2;
        struct cpu_info * const ci = ih->ih_cpu;
        bool force_pending = false;

        KASSERT(ci == curcpu() || !mp_online);

        /*
         * We need to disable interrupts to hold off the interrupt
         * vectors.
         */
        const u_long psl = x86_read_psl();
        x86_disable_intr();

        struct intrsource * const source = ci->ci_isources[ih->ih_slot];
        struct pic * const pic = source->is_pic;

        if (mask) {
                source->is_mask_count++;
                KASSERT(source->is_mask_count != 0);
                if (source->is_mask_count == 1) {
                        (*pic->pic_hwmask)(pic, ih->ih_pin);
                }
        } else {
                KASSERT(source->is_mask_count != 0);
                if (--source->is_mask_count == 0) {
                        /*
                         * If this interrupt source is being moved, don't
                         * unmask it at the hw.
                         */
                        if (! source->is_distribute_pending) {
                                (*pic->pic_hwunmask)(pic, ih->ih_pin);
                        }

                        /*
                         * For level-sensitive interrupts, the hardware
                         * will let us know.  For everything else, we
                         * need to explicitly handle interrupts that
                         * happened when when the source was masked.
                         */
                        const uint32_t bit = (1U << ih->ih_slot);
                        if (ci->ci_imasked & bit) {
                                ci->ci_imasked &= ~bit;
                                if (source->is_type != IST_LEVEL) {
                                        ci->ci_ipending |= bit;
                                        force_pending = true;
                                }
                        }
                }
        }

        /* Re-enable interrupts. */
        x86_write_psl(psl);

        if (force_pending) {
                /* Force processing of any pending interrupts. */
                splx(splhigh());
        }
}

static void
intr_mask_internal(struct intrhand * const ih, const bool mask)
{

        /*
         * Call out to the remote CPU to update its interrupt state.
         * Only make RPCs if the APs are up and running.
         */
        mutex_enter(&cpu_lock);
        struct cpu_info * const ci = ih->ih_cpu;
        void * const mask_arg = (void *)(uintptr_t)mask;
        if (ci == curcpu() || !mp_online) {
                intr_mask_xcall(ih, mask_arg);
        } else {
                const uint64_t where =
                    xc_unicast(0, intr_mask_xcall, ih, mask_arg, ci);
                xc_wait(where);
        }
        mutex_exit(&cpu_lock);
}

void
intr_mask(struct intrhand *ih)
{

        if (cpu_intr_p()) {
                /*
                 * Special case of calling intr_mask() from an interrupt
                 * handler: we MUST be called from the bound CPU for this
                 * interrupt (presumably from a handler we're about to
                 * mask).
                 *
                 * We can't take the cpu_lock in this case, and we must
                 * therefore be extra careful.
                 */
                KASSERT(ih->ih_cpu == curcpu() || !mp_online);
                intr_mask_xcall(ih, (void *)(uintptr_t)true);
                return;
        }

        intr_mask_internal(ih, true);
}

void
intr_unmask(struct intrhand *ih)
{

        /*
         * This is not safe to call from an interrupt context because
         * we don't want to accidentally unmask an interrupt source
         * that's masked because it's being serviced.
         */
        KASSERT(!cpu_intr_p());
        intr_mask_internal(ih, false);
}

/*
 * Called on bound CPU to handle intr_disestablish().
 *
 * => caller (on initiating CPU) holds cpu_lock on our behalf
 * => arg1: struct intrhand *ih
 * => arg2: unused
 */
static void
intr_disestablish_xcall(void *arg1, void *arg2)
{
        struct intrhand **p, *q;
        struct cpu_info *ci;
        struct pic *pic;
        struct intrsource *source;
        struct intrhand *ih;
        u_long psl;
        int idtvec;

        ih = arg1;
        ci = ih->ih_cpu;

        KASSERT(ci == curcpu() || !mp_online);

        /* Disable interrupts locally. */
        psl = x86_read_psl();
        x86_disable_intr();

        pic = ci->ci_isources[ih->ih_slot]->is_pic;
        source = ci->ci_isources[ih->ih_slot];
        idtvec = source->is_idtvec;

        (*pic->pic_hwmask)(pic, ih->ih_pin);
        atomic_and_32(&ci->ci_ipending, ~(1 << ih->ih_slot));

        /*
         * Remove the handler from the chain.
         */
        for (p = &source->is_handlers; (q = *p) != NULL && q != ih;
             p = &q->ih_next)
                ;
        if (q == NULL) {
                x86_write_psl(psl);
                panic("%s: handler not registered", __func__);
                /* NOTREACHED */
        }

        *p = q->ih_next;

        x86_intr_calculatemasks(ci);
        /*
         * If there is no any handler, 1) do delroute because it has no
         * any source and 2) dont' hwunmask to prevent spurious interrupt.
         *
         * If there is any handler, 1) don't delroute because it has source
         * and 2) do hwunmask to be able to get interrupt again.
         *
         */
        if (source->is_handlers == NULL)
                (*pic->pic_delroute)(pic, ci, ih->ih_pin, idtvec,
                    source->is_type);
        else if (source->is_mask_count == 0)
                (*pic->pic_hwunmask)(pic, ih->ih_pin);

        /* If the source is free we can drop it now. */
        intr_source_free(ci, ih->ih_slot, pic, idtvec);

        /* Re-enable interrupts. */
        x86_write_psl(psl);

        DPRINTF(("%s: remove slot %d (pic %s pin %d vec %d)\n",
            device_xname(ci->ci_dev), ih->ih_slot, pic->pic_name,
            ih->ih_pin, idtvec));
}

static int
intr_num_handlers(struct intrsource *isp)
{
        struct intrhand *ih;
        int num;

        num = 0;
        for (ih = isp->is_handlers; ih != NULL; ih = ih->ih_next)
                num++;

        return num;
}

/*
 * Deregister an interrupt handler.
 */
void
intr_disestablish(struct intrhand *ih)
{
        struct cpu_info *ci;
        struct intrsource *isp;
        uint64_t where;

        /*
         * Count the removal for load balancing.
         * Call out to the remote CPU to update its interrupt state.
         * Only make RPCs if the APs are up and running.
         */
        mutex_enter(&cpu_lock);
        ci = ih->ih_cpu;
        (ci->ci_nintrhand)--;
        KASSERT(ci->ci_nintrhand >= 0);
        isp = ci->ci_isources[ih->ih_slot];
        if (ci == curcpu() || !mp_online) {
                intr_disestablish_xcall(ih, NULL);
        } else {
                where = xc_unicast(0, intr_disestablish_xcall, ih, NULL, ci);
                xc_wait(where);
        }
        if (!msipic_is_msi_pic(isp->is_pic) && intr_num_handlers(isp) < 1) {
                intr_free_io_intrsource_direct(isp);
        }
        mutex_exit(&cpu_lock);
        kmem_free(ih, sizeof(*ih));
}

static const char *
xen_intr_string(int port, char *buf, size_t len, struct pic *pic)
{
        KASSERT(pic->pic_type == PIC_XEN);

        KASSERT(port >= 0);

        snprintf(buf, len, "%s chan %d", pic->pic_name, port);

        return buf;
}

static const char *
legacy_intr_string(int ih, char *buf, size_t len, struct pic *pic)
{
        int legacy_irq;

        KASSERT(pic->pic_type == PIC_I8259);
#if NLAPIC > 0
        KASSERT(APIC_IRQ_ISLEGACY(ih));

        legacy_irq = APIC_IRQ_LEGACY_IRQ(ih);
#else
        legacy_irq = ih;
#endif
        KASSERT(legacy_irq >= 0 && legacy_irq < 16);

        snprintf(buf, len, "%s pin %d", pic->pic_name, legacy_irq);

        return buf;
}

const char *
intr_string(intr_handle_t ih, char *buf, size_t len)
{
#if NIOAPIC > 0
        struct ioapic_softc *pic;
#endif

        if (ih == 0)
                panic("%s: bogus handle 0x%" PRIx64, __func__, ih);

#if NIOAPIC > 0
        if (ih & APIC_INT_VIA_APIC) {
                pic = ioapic_find(APIC_IRQ_APIC(ih));
                if (pic != NULL) {
                        snprintf(buf, len, "%s pin %d",
                            device_xname(pic->sc_dev), APIC_IRQ_PIN(ih));
                } else {
                        snprintf(buf, len,
                            "apic %d int %d (irq %d)",
                            APIC_IRQ_APIC(ih),
                            APIC_IRQ_PIN(ih),
                            APIC_IRQ_LEGACY_IRQ(ih));
                }
        } else
                snprintf(buf, len, "irq %d", APIC_IRQ_LEGACY_IRQ(ih));

#elif NLAPIC > 0
        snprintf(buf, len, "irq %d", APIC_IRQ_LEGACY_IRQ(ih));
#else
        snprintf(buf, len, "irq %d", (int) ih);
#endif
        return buf;

}

/*
 * Fake interrupt handler structures for the benefit of symmetry with
 * other interrupt sources, and the benefit of x86_intr_calculatemasks()
 */
struct intrhand fake_timer_intrhand;
struct intrhand fake_ipi_intrhand;
#if NHYPERV > 0
struct intrhand fake_hyperv_intrhand;
#endif

#if NLAPIC > 0 && defined(MULTIPROCESSOR)
static const char *x86_ipi_names[X86_NIPI] = X86_IPI_NAMES;
#endif

#if defined(INTRSTACKSIZE)
static inline bool
redzone_const_or_false(bool x)
{
#ifdef DIAGNOSTIC
        return x;
#else
        return false;
#endif /* !DIAGNOSTIC */
}

static inline int
redzone_const_or_zero(int x)
{
        return redzone_const_or_false(true) ? x : 0;
}
#endif

/*
 * Initialize all handlers that aren't dynamically allocated, and exist
 * for each CPU.
 */
void
cpu_intr_init(struct cpu_info *ci)
{
#if (NLAPIC > 0) || defined(MULTIPROCESSOR) || \
    (NHYPERV > 0)
        struct intrsource *isp;
#endif
#if NLAPIC > 0
        static int first = 1;
#if defined(MULTIPROCESSOR)
        int i;
#endif
#endif

#if NLAPIC > 0
        isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
        isp->is_recurse = Xrecurse_lapic_ltimer;
        isp->is_resume = Xresume_lapic_ltimer;
        fake_timer_intrhand.ih_pic = &local_pic;
        fake_timer_intrhand.ih_level = IPL_CLOCK;
        isp->is_handlers = &fake_timer_intrhand;
        isp->is_pic = &local_pic;
        ci->ci_isources[LIR_TIMER] = isp;
        evcnt_attach_dynamic(&isp->is_evcnt,
            first ? EVCNT_TYPE_INTR : EVCNT_TYPE_MISC, NULL,
            device_xname(ci->ci_dev), "timer");
        first = 0;

#ifdef MULTIPROCESSOR
        isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
        isp->is_recurse = Xrecurse_lapic_ipi;
        isp->is_resume = Xresume_lapic_ipi;
        fake_ipi_intrhand.ih_pic = &local_pic;
        fake_ipi_intrhand.ih_level = IPL_HIGH;
        isp->is_handlers = &fake_ipi_intrhand;
        isp->is_pic = &local_pic;
        ci->ci_isources[LIR_IPI] = isp;

        for (i = 0; i < X86_NIPI; i++)
                evcnt_attach_dynamic(&ci->ci_ipi_events[i], EVCNT_TYPE_MISC,
                    NULL, device_xname(ci->ci_dev), x86_ipi_names[i]);
#endif

#if NHYPERV > 0
        if (hyperv_hypercall_enabled()) {
                isp = kmem_zalloc(sizeof(*isp), KM_SLEEP);
                isp->is_recurse = Xrecurse_hyperv_hypercall;
                isp->is_resume = Xresume_hyperv_hypercall;
                fake_hyperv_intrhand.ih_level = IPL_NET;
                isp->is_handlers = &fake_hyperv_intrhand;
                isp->is_pic = &local_pic;
                ci->ci_isources[LIR_HV] = isp;
                evcnt_attach_dynamic(&isp->is_evcnt, EVCNT_TYPE_INTR, NULL,
                    device_xname(ci->ci_dev), "Hyper-V hypercall");
        }
#endif
#endif

#if defined(__HAVE_PREEMPTION)
        x86_init_preempt(ci);

#endif
        x86_intr_calculatemasks(ci);

#if defined(INTRSTACKSIZE)
        vaddr_t istack;

        /*
         * If the red zone is activated, protect both the top and
         * the bottom of the stack with an unmapped page.
         */
        istack = uvm_km_alloc(kernel_map,
            INTRSTACKSIZE + redzone_const_or_zero(2 * PAGE_SIZE), 0,
            UVM_KMF_WIRED | UVM_KMF_ZERO);
        if (redzone_const_or_false(true)) {
                pmap_kremove(istack, PAGE_SIZE);
                pmap_kremove(istack + INTRSTACKSIZE + PAGE_SIZE, PAGE_SIZE);
                pmap_update(pmap_kernel());
        }

        /*
         * 33 used to be 1.  Arbitrarily reserve 32 more register_t's
         * of space for ddb(4) to examine some subroutine arguments
         * and to hunt for the next stack frame.
         */
        ci->ci_intrstack = (char *)istack + redzone_const_or_zero(PAGE_SIZE) +
            INTRSTACKSIZE - 33 * sizeof(register_t);
#endif

        ci->ci_idepth = -1;
}

#if defined(INTRDEBUG) || defined(DDB)

void
intr_printconfig(void)
{
        int i;
        struct intrhand *ih;
        struct intrsource *isp;
        struct cpu_info *ci;
        CPU_INFO_ITERATOR cii;
        void (*pr)(const char *, ...);

        pr = printf;
#ifdef DDB
        extern int db_active;
        if (db_active) {
                pr = db_printf;
        }
#endif

        for (CPU_INFO_FOREACH(cii, ci)) {
                (*pr)("%s: interrupt masks:\n", device_xname(ci->ci_dev));
                for (i = 0; i < NIPL; i++)
                        (*pr)("IPL %d mask %08lx unmask %08lx\n", i,
                            (u_long)ci->ci_imask[i], (u_long)ci->ci_iunmask[i]);
                for (i = 0; i < MAX_INTR_SOURCES; i++) {
                        isp = ci->ci_isources[i];
                        if (isp == NULL)
                                continue;
                        (*pr)("%s source %d is pin %d from pic %s type %d "
                            "maxlevel %d\n", device_xname(ci->ci_dev), i,
                            isp->is_pin, isp->is_pic->pic_name, isp->is_type,
                            isp->is_maxlevel);
                        for (ih = isp->is_handlers; ih != NULL;
                             ih = ih->ih_next)
                                (*pr)("\thandler %p level %d\n",
                                    ih->ih_fun, ih->ih_level);
#if NIOAPIC > 0
                        if (isp->is_pic->pic_type == PIC_IOAPIC) {
                                struct ioapic_softc *sc;
                                sc = isp->is_pic->pic_ioapic;
                                (*pr)("\tioapic redir 0x%x\n",
                                    sc->sc_pins[isp->is_pin].ip_map->redir);
                        }
#endif

                }
        }
}

#endif

/*
 * Save current affinitied cpu's interrupt count.
 */
static void
intr_save_evcnt(struct intrsource *source, cpuid_t cpuid)
{
        struct percpu_evcnt *pep;
        uint64_t curcnt;
        int i;

        curcnt = source->is_evcnt.ev_count;
        pep = source->is_saved_evcnt;

        for (i = 0; i < ncpu; i++) {
                if (pep[i].cpuid == cpuid) {
                        pep[i].count = curcnt;
                        break;
                }
        }
}

/*
 * Restore current affinitied cpu's interrupt count.
 */
static void
intr_restore_evcnt(struct intrsource *source, cpuid_t cpuid)
{
        struct percpu_evcnt *pep;
        int i;

        pep = source->is_saved_evcnt;

        for (i = 0; i < ncpu; i++) {
                if (pep[i].cpuid == cpuid) {
                        source->is_evcnt.ev_count = pep[i].count;
                        break;
                }
        }
}

static void
intr_redistribute_xc_t(void *arg1, void *arg2)
{
        struct cpu_info *ci;
        struct intrsource *isp;
        int slot;
        u_long psl;

        ci = curcpu();
        isp = arg1;
        slot = (int)(intptr_t)arg2;

        /* Disable interrupts locally. */
        psl = x86_read_psl();
        x86_disable_intr();

        /* Hook it in and re-calculate masks. */
        ci->ci_isources[slot] = isp;
        x86_intr_calculatemasks(curcpu());

        /* Re-enable interrupts locally. */
        x86_write_psl(psl);
}

static void
intr_redistribute_xc_s1(void *arg1, void *arg2)
{
        struct pic *pic;
        struct intrsource *isp;
        struct cpu_info *nci;
        u_long psl;

        isp = arg1;
        nci = arg2;

        /*
         * Disable interrupts on-chip and mask the pin.  Back out
         * and let the interrupt be processed if one is pending.
         */
        pic = isp->is_pic;
        for (;;) {
                psl = x86_read_psl();
                x86_disable_intr();
                if ((*pic->pic_trymask)(pic, isp->is_pin)) {
                        break;
                }
                x86_write_psl(psl);
                DELAY(1000);
        }

        /* pic_addroute will unmask the interrupt. */
        (*pic->pic_addroute)(pic, nci, isp->is_pin, isp->is_idtvec,
            isp->is_type);
        x86_write_psl(psl);
}

static void
intr_redistribute_xc_s2(void *arg1, void *arg2)
{
        struct cpu_info *ci;
        u_long psl;
        int slot;

        ci = curcpu();
        slot = (int)(uintptr_t)arg1;

        /* Disable interrupts locally. */
        psl = x86_read_psl();
        x86_disable_intr();

        /* Patch out the source and re-calculate masks. */
        ci->ci_isources[slot] = NULL;
        x86_intr_calculatemasks(ci);

        /* Re-enable interrupts locally. */
        x86_write_psl(psl);
}

static bool
intr_redistribute(struct cpu_info *oci)
{
        struct intrsource *isp;
        struct intrhand *ih;
        CPU_INFO_ITERATOR cii;
        struct cpu_info *nci, *ici;
        int oslot, nslot;
        uint64_t where;

        KASSERT(mutex_owned(&cpu_lock));

        /* Look for an interrupt source that we can migrate. */
        for (oslot = 0; oslot < MAX_INTR_SOURCES; oslot++) {
                if ((isp = oci->ci_isources[oslot]) == NULL) {
                        continue;
                }
                if (isp->is_pic->pic_type == PIC_IOAPIC) {
                        break;
                }
        }
        if (oslot == MAX_INTR_SOURCES) {
                return false;
        }

        /* Find least loaded CPU and try to move there. */
        nci = NULL;
        for (CPU_INFO_FOREACH(cii, ici)) {
                if ((ici->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) {
                        continue;
                }
                KASSERT(ici != oci);
                if (nci == NULL || nci->ci_nintrhand > ici->ci_nintrhand) {
                        nci = ici;
                }
        }
        if (nci == NULL) {
                return false;
        }
        for (nslot = 0; nslot < MAX_INTR_SOURCES; nslot++) {
                if (nci->ci_isources[nslot] == NULL) {
                        break;
                }
        }

        /* If that did not work, allocate anywhere. */
        if (nslot == MAX_INTR_SOURCES) {
                for (CPU_INFO_FOREACH(cii, nci)) {
                        if ((nci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) {
                                continue;
                        }
                        KASSERT(nci != oci);
                        for (nslot = 0; nslot < MAX_INTR_SOURCES; nslot++) {
                                if (nci->ci_isources[nslot] == NULL) {
                                        break;
                                }
                        }
                        if (nslot != MAX_INTR_SOURCES) {
                                break;
                        }
                }
        }
        if (nslot == MAX_INTR_SOURCES) {
                return false;
        }

        /*
         * Now we have new CPU and new slot.  Run a cross-call to set up
         * the new vector on the target CPU.
         */
        where = xc_unicast(0, intr_redistribute_xc_t, isp,
            (void *)(intptr_t)nslot, nci);
        xc_wait(where);

        /*
         * We're ready to go on the target CPU.  Run a cross call to
         * reroute the interrupt away from the source CPU.
         */
        where = xc_unicast(0, intr_redistribute_xc_s1, isp, nci, oci);
        xc_wait(where);

        /* Sleep for (at least) 10ms to allow the change to take hold. */
        (void)kpause("intrdist", false, mstohz(10), NULL);

        /* Complete removal from the source CPU. */
        where = xc_unicast(0, intr_redistribute_xc_s2,
            (void *)(uintptr_t)oslot, NULL, oci);
        xc_wait(where);

        /* Finally, take care of book-keeping. */
        for (ih = isp->is_handlers; ih != NULL; ih = ih->ih_next) {
                oci->ci_nintrhand--;
                nci->ci_nintrhand++;
                ih->ih_cpu = nci;
        }
        intr_save_evcnt(isp, oci->ci_cpuid);
        intr_restore_evcnt(isp, nci->ci_cpuid);
        isp->is_active_cpu = nci->ci_cpuid;

        return true;
}

void
cpu_intr_redistribute(void)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;

        KASSERT(mutex_owned(&cpu_lock));
        KASSERT(mp_online);

        /* Direct interrupts away from shielded CPUs. */
        for (CPU_INFO_FOREACH(cii, ci)) {
                if ((ci->ci_schedstate.spc_flags & SPCF_NOINTR) == 0) {
                        continue;
                }
                while (intr_redistribute(ci)) {
                        /* nothing */
                }
        }

        /* XXX should now re-balance */
}

u_int
cpu_intr_count(struct cpu_info *ci)
{

        KASSERT(ci->ci_nintrhand >= 0);

        return ci->ci_nintrhand;
}

static int
intr_find_unused_slot(struct cpu_info *ci, int *index)
{
        int slot, i;

        KASSERT(mutex_owned(&cpu_lock));

        slot = -1;
        for (i = 0; i < MAX_INTR_SOURCES ; i++) {
                if (ci->ci_isources[i] == NULL) {
                        slot = i;
                        break;
                }
        }
        if (slot == -1) {
                DPRINTF(("cannot allocate ci_isources\n"));
                return EBUSY;
        }

        *index = slot;
        return 0;
}

/*
 * Let cpu_info ready to accept the interrupt.
 */
static void
intr_activate_xcall(void *arg1, void *arg2)
{
        struct cpu_info *ci;
        struct intrsource *source;
        struct intrstub *stubp;
        struct intrhand *ih;
        struct idt_vec *iv;
        u_long psl;
        int idt_vec;
        int slot;

        ih = arg1;

        kpreempt_disable();

        KASSERT(ih->ih_cpu == curcpu() || !mp_online);

        ci = ih->ih_cpu;
        slot = ih->ih_slot;
        source = ci->ci_isources[slot];
        idt_vec = source->is_idtvec;
        iv = idt_vec_ref(&ci->ci_idtvec);

        psl = x86_read_psl();
        x86_disable_intr();

        x86_intr_calculatemasks(ci);

        if (source->is_type == IST_LEVEL) {
                stubp = &source->is_pic->pic_level_stubs[slot];
        } else {
                stubp = &source->is_pic->pic_edge_stubs[slot];
        }

        source->is_resume = stubp->ist_resume;
        source->is_recurse = stubp->ist_recurse;
        idt_vec_set(iv, idt_vec, stubp->ist_entry);

        x86_write_psl(psl);

        kpreempt_enable();
}

/*
 * Let cpu_info not accept the interrupt.
 */
static void
intr_deactivate_xcall(void *arg1, void *arg2)
{
        struct cpu_info *ci;
        struct intrhand *ih, *lih;
        struct intrsource *isp;
        u_long psl;
        int idt_vec;
        int slot;

        ih = arg1;

        kpreempt_disable();

        KASSERT(ih->ih_cpu == curcpu() || !mp_online);

        ci = ih->ih_cpu;
        slot = ih->ih_slot;
        isp = ci->ci_isources[slot];
        idt_vec = isp->is_idtvec;

        psl = x86_read_psl();
        x86_disable_intr();

        /* Move all devices sharing IRQ number. */
        ci->ci_isources[slot] = NULL;
        for (lih = ih; lih != NULL; lih = lih->ih_next) {
                ci->ci_nintrhand--;
        }

        x86_intr_calculatemasks(ci);

        if (idt_vec_is_pcpu()) {
                idt_vec_free(&ci->ci_idtvec, idt_vec);
        } else {
                /*
                 * Skip unsetgate(), because the same idt[] entry is
                 * overwritten in intr_activate_xcall().
                 */
        }

        x86_write_psl(psl);

        kpreempt_enable();
}

static void
intr_get_affinity(struct intrsource *isp, kcpuset_t *cpuset)
{
        struct cpu_info *ci;

        KASSERT(mutex_owned(&cpu_lock));

        if (isp == NULL) {
                kcpuset_zero(cpuset);
                return;
        }

        KASSERTMSG(isp->is_handlers != NULL,
            "Don't get affinity for the device which is not established.");

        ci = isp->is_handlers->ih_cpu;
        if (ci == NULL) {
                kcpuset_zero(cpuset);
                return;
        }

        kcpuset_set(cpuset, cpu_index(ci));
        return;
}

static int
intr_set_affinity(struct intrsource *isp, const kcpuset_t *cpuset)
{
        struct cpu_info *oldci, *newci;
        struct intrhand *ih, *lih;
        struct pic *pic;
        u_int cpu_idx;
        int old_idtvec, new_idtvec;
        int oldslot, newslot;
        int err;
        int pin;

        KASSERT(mutex_owned(&intr_distribute_lock));
        KASSERT(mutex_owned(&cpu_lock));

        /* XXX
         * logical destination mode is not supported, use lowest index cpu.
         */
        cpu_idx = kcpuset_ffs(cpuset) - 1;
        newci = cpu_lookup(cpu_idx);
        if (newci == NULL) {
                DPRINTF(("invalid cpu index: %u\n", cpu_idx));
                return EINVAL;
        }
        if ((newci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) {
                DPRINTF(("the cpu is set nointr shield. index:%u\n", cpu_idx));
                return EINVAL;
        }

        if (isp == NULL) {
                DPRINTF(("invalid intrctl handler\n"));
                return EINVAL;
        }

        /* i8259_pic supports only primary cpu, see i8259.c. */
        pic = isp->is_pic;
        if (pic == &i8259_pic) {
                DPRINTF(("i8259 pic does not support set_affinity\n"));
                return ENOTSUP;
        }

        ih = isp->is_handlers;
        KASSERTMSG(ih != NULL,
            "Don't set affinity for the device which is not established.");

        oldci = ih->ih_cpu;
        if (newci == oldci) /* nothing to do */
                return 0;

        oldslot = ih->ih_slot;

        err = intr_find_unused_slot(newci, &newslot);
        if (err) {
                DPRINTF(("failed to allocate interrupt slot for PIC %s intrid "
                        "%s\n", isp->is_pic->pic_name, isp->is_intrid));
                return err;
        }

        old_idtvec = isp->is_idtvec;

        if (idt_vec_is_pcpu()) {
                new_idtvec = idt_vec_alloc(&newci->ci_idtvec,
                    APIC_LEVEL(ih->ih_level), IDT_INTR_HIGH);
                if (new_idtvec == 0)
                        return EBUSY;
                DPRINTF(("interrupt from cpu%d vec %d to cpu%d vec %d\n",
                    cpu_index(oldci), old_idtvec, cpu_index(newci),
                        new_idtvec));
        } else {
                new_idtvec = isp->is_idtvec;
        }

        /* Prevent intr_unmask() from reenabling the source at the hw. */
        isp->is_distribute_pending = true;

        pin = isp->is_pin;
        (*pic->pic_hwmask)(pic, pin); /* for ci_ipending check */
        while (oldci->ci_ipending & (1 << oldslot)) {
                (void)kpause("intrdist", false, 1, &cpu_lock);
        }

        kpreempt_disable();

        /* deactivate old interrupt setting */
        if (oldci == curcpu() || !mp_online) {
                intr_deactivate_xcall(ih, NULL);
        } else {
                uint64_t where;
                where = xc_unicast(0, intr_deactivate_xcall, ih,
                                   NULL, oldci);
                xc_wait(where);
        }
        intr_save_evcnt(isp, oldci->ci_cpuid);
        (*pic->pic_delroute)(pic, oldci, pin, old_idtvec, isp->is_type);

        /* activate new interrupt setting */
        isp->is_idtvec =  new_idtvec;
        newci->ci_isources[newslot] = isp;
        for (lih = ih; lih != NULL; lih = lih->ih_next) {
                newci->ci_nintrhand++;
                lih->ih_cpu = newci;
                lih->ih_slot = newslot;
        }
        if (newci == curcpu() || !mp_online) {
                intr_activate_xcall(ih, NULL);
        } else {
                uint64_t where;
                where = xc_unicast(0, intr_activate_xcall, ih,
                                   NULL, newci);
                xc_wait(where);
        }
        intr_restore_evcnt(isp, newci->ci_cpuid);
        isp->is_active_cpu = newci->ci_cpuid;
        (*pic->pic_addroute)(pic, newci, pin, new_idtvec, isp->is_type);

        isp->is_distribute_pending = false;
        if (newci == curcpu() || !mp_online) {
                intr_hwunmask_xcall(ih, NULL);
        } else {
                uint64_t where;
                where = xc_unicast(0, intr_hwunmask_xcall, ih, NULL, newci);
                xc_wait(where);
        }

        kpreempt_enable();

        return err;
}

static bool
intr_is_affinity_intrsource(struct intrsource *isp, const kcpuset_t *cpuset)
{
        struct cpu_info *ci;

        KASSERT(mutex_owned(&cpu_lock));

        /*
         * The device is already pci_intr_alloc'ed, however it is not
         * established yet.
         */
        if (isp->is_handlers == NULL)
                return false;

        ci = isp->is_handlers->ih_cpu;
        KASSERT(ci != NULL);

        return kcpuset_isset(cpuset, cpu_index(ci));
}

static struct intrhand *
intr_get_handler(const char *intrid)
{
        struct intrsource *isp;

        KASSERT(mutex_owned(&cpu_lock));

        isp = intr_get_io_intrsource(intrid);
        if (isp == NULL)
                return NULL;

        return isp->is_handlers;
}

uint64_t
x86_intr_get_count(const char *intrid, u_int cpu_idx)
{
        struct cpu_info *ci;
        struct intrsource *isp;
        struct intrhand *ih;
        struct percpu_evcnt pep;
        cpuid_t cpuid;
        int i, slot;
        uint64_t count = 0;

        KASSERT(mutex_owned(&cpu_lock));
        ci = cpu_lookup(cpu_idx);
        cpuid = ci->ci_cpuid;

        ih = intr_get_handler(intrid);
        if (ih == NULL) {
                count = 0;
                goto out;
        }
        slot = ih->ih_slot;
        isp = ih->ih_cpu->ci_isources[slot];

        for (i = 0; i < ncpu; i++) {
                pep = isp->is_saved_evcnt[i];
                if (cpuid == pep.cpuid) {
                        if (isp->is_active_cpu == pep.cpuid) {
                                count = isp->is_evcnt.ev_count;
                                goto out;
                        } else {
                                count = pep.count;
                                goto out;
                        }
                }
        }

 out:
        return count;
}

void
x86_intr_get_assigned(const char *intrid, kcpuset_t *cpuset)
{
        struct cpu_info *ci;
        struct intrhand *ih;

        KASSERT(mutex_owned(&cpu_lock));
        kcpuset_zero(cpuset);

        ih = intr_get_handler(intrid);
        if (ih == NULL)
                return;

        ci = ih->ih_cpu;
        kcpuset_set(cpuset, cpu_index(ci));
}

void
x86_intr_get_devname(const char *intrid, char *buf, size_t len)
{
        struct intrsource *isp;
        struct intrhand *ih;
        int slot;

        KASSERT(mutex_owned(&cpu_lock));

        ih = intr_get_handler(intrid);
        if (ih == NULL) {
                buf[0] = '\0';
                return;
        }
        slot = ih->ih_slot;
        isp = ih->ih_cpu->ci_isources[slot];
        strlcpy(buf, isp->is_xname, len);

}

/*
 * MI interface for subr_interrupt.c
 */
uint64_t
interrupt_get_count(const char *intrid, u_int cpu_idx)
{
        struct intrsource *isp;
        uint64_t count = 0;

        mutex_enter(&cpu_lock);
        isp = intr_get_io_intrsource(intrid);
        if (isp != NULL)
                count = isp->is_pic->pic_intr_get_count(intrid, cpu_idx);
        mutex_exit(&cpu_lock);
        return count;
}

/*
 * MI interface for subr_interrupt.c
 */
void
interrupt_get_assigned(const char *intrid, kcpuset_t *cpuset)
{
        struct intrsource *isp;

        mutex_enter(&cpu_lock);
        isp = intr_get_io_intrsource(intrid);
        if (isp != NULL)
                isp->is_pic->pic_intr_get_assigned(intrid, cpuset);
        mutex_exit(&cpu_lock);
}

/*
 * MI interface for subr_interrupt.c
 */
void
interrupt_get_available(kcpuset_t *cpuset)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;

        kcpuset_zero(cpuset);

        mutex_enter(&cpu_lock);
        for (CPU_INFO_FOREACH(cii, ci)) {
                if ((ci->ci_schedstate.spc_flags & SPCF_NOINTR) == 0) {
                        kcpuset_set(cpuset, cpu_index(ci));
                }
        }
        mutex_exit(&cpu_lock);
}

/*
 * MI interface for subr_interrupt.c
 */
void
interrupt_get_devname(const char *intrid, char *buf, size_t len)
{
        struct intrsource *isp;

        mutex_enter(&cpu_lock);
        isp = intr_get_io_intrsource(intrid);
        if (isp != NULL) {
                if (isp->is_pic->pic_intr_get_devname == NULL) {
                        printf("NULL get_devname intrid %s pic %s\n",
                            intrid, isp->is_pic->pic_name);
                } else {
                        isp->is_pic->pic_intr_get_devname(intrid, buf, len);
                }
        }
        mutex_exit(&cpu_lock);
}

static int
intr_distribute_locked(struct intrhand *ih, const kcpuset_t *newset,
    kcpuset_t *oldset)
{
        struct intrsource *isp;
        int slot;

        KASSERT(mutex_owned(&intr_distribute_lock));
        KASSERT(mutex_owned(&cpu_lock));

        if (ih == NULL)
                return EINVAL;

        slot = ih->ih_slot;
        isp = ih->ih_cpu->ci_isources[slot];
        KASSERT(isp != NULL);

        if (oldset != NULL)
                intr_get_affinity(isp, oldset);

        return intr_set_affinity(isp, newset);
}

/*
 * MI interface for subr_interrupt.c
 */
int
interrupt_distribute(void *cookie, const kcpuset_t *newset, kcpuset_t *oldset)
{
        int error;
        struct intrhand *ih = cookie;

        mutex_enter(&intr_distribute_lock);
        mutex_enter(&cpu_lock);
        error = intr_distribute_locked(ih, newset, oldset);
        mutex_exit(&cpu_lock);
        mutex_exit(&intr_distribute_lock);

        return error;
}

/*
 * MI interface for subr_interrupt.c
 */
int
interrupt_distribute_handler(const char *intrid, const kcpuset_t *newset,
    kcpuset_t *oldset)
{
        int error;
        struct intrhand *ih;

        mutex_enter(&intr_distribute_lock);
        mutex_enter(&cpu_lock);

        ih = intr_get_handler(intrid);
        if (ih == NULL) {
                error = ENOENT;
                goto out;
        }
        error = intr_distribute_locked(ih, newset, oldset);

 out:
        mutex_exit(&cpu_lock);
        mutex_exit(&intr_distribute_lock);
        return error;
}

/*
 * MI interface for subr_interrupt.c
 */
struct intrids_handler *
interrupt_construct_intrids(const kcpuset_t *cpuset)
{
        struct intrsource *isp;
        struct intrids_handler *ii_handler;
        intrid_t *ids;
        int i, count;

        if (kcpuset_iszero(cpuset))
                return 0;

        /*
         * Count the number of interrupts which affinity to any cpu of
         * "cpuset".
         */
        count = 0;
        mutex_enter(&cpu_lock);
        SIMPLEQ_FOREACH(isp, &io_interrupt_sources, is_list) {
                if (intr_is_affinity_intrsource(isp, cpuset))
                        count++;
        }
        mutex_exit(&cpu_lock);

        ii_handler = kmem_zalloc(sizeof(int) + sizeof(intrid_t) * count,
            KM_SLEEP);
        if (ii_handler == NULL)
                return NULL;
        ii_handler->iih_nids = count;
        if (count == 0)
                return ii_handler;

        ids = ii_handler->iih_intrids;
        i = 0;
        mutex_enter(&cpu_lock);
        SIMPLEQ_FOREACH(isp, &io_interrupt_sources, is_list) {
                /* Ignore devices attached after counting "count". */
                if (i >= count) {
                        DPRINTF(("New devices are attached after counting.\n"));
                        break;
                }

                if (!intr_is_affinity_intrsource(isp, cpuset))
                        continue;

                strncpy(ids[i], isp->is_intrid, sizeof(intrid_t));
                i++;
        }
        mutex_exit(&cpu_lock);

        return ii_handler;
}

/*
 * MI interface for subr_interrupt.c
 */
void
interrupt_destruct_intrids(struct intrids_handler *ii_handler)
{
        size_t iih_size;

        if (ii_handler == NULL)
                return;

        iih_size = sizeof(int) + sizeof(intrid_t) * ii_handler->iih_nids;
        kmem_free(ii_handler, iih_size);
}































































































































    8 






    5 

    5 






    1 





    6 





   19 









    9 






    6 



    5 




    4 
    3 
    1 

    4 

    2 


    2 





    4 

    4 

    3 
    2 

    1 





   18 







































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
/*        $NetBSD: kern_uipc_socket_50.c,v 1.4 2019/12/12 02:15:42 pgoyette Exp $        */

/*
 * Copyright (c) 2002, 2007, 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of Wasabi Systems, Inc, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2004 The FreeBSD Foundation
 * Copyright (c) 2004 Robert Watson
 * Copyright (c) 1982, 1986, 1988, 1990, 1993
 *     The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *     @(#)uipc_socket.c       8.6 (Berkeley) 5/2/95
 */

/*
 * Copyright (c) 1988 University of Utah.
 * Copyright (c) 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the Systems Programming Group of the University of Utah Computer
 * Science Department.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from: Utah $Hdr: vn.c 1.13 94/04/02$
 *
 *        @(#)vn.c        8.9 (Berkeley) 5/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_uipc_socket_50.c,v 1.4 2019/12/12 02:15:42 pgoyette Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/compat_stub.h>
#include <sys/socketvar.h>

#include <compat/sys/time.h>
#include <compat/sys/socket.h>

#include <compat/common/compat_mod.h>

static int
uipc_socket_50_getopt1(int opt, struct socket *so, struct sockopt *sopt)
{
        int optval, error;
        struct timeval50 otv;

        switch (opt) {

        case SO_OSNDTIMEO:
        case SO_ORCVTIMEO:
                optval = (opt == SO_OSNDTIMEO ?
                    so->so_snd.sb_timeo : so->so_rcv.sb_timeo);

                otv.tv_sec = optval / hz;
                otv.tv_usec = (optval % hz) * tick;

                error = sockopt_set(sopt, &otv, sizeof(otv));
                break;

        case SO_OTIMESTAMP:
                error = sockopt_setint(sopt, (so->so_options & opt) ? 1 : 0);
                break;

        default:
                error = EPASSTHROUGH;
        }
        return error;
}

static int
uipc_socket_50_setopt1(int opt, struct socket *so, const struct sockopt *sopt)
{
        int optval, error;
        struct timeval50 otv;
        struct timeval tv;

        switch (opt) {

        case SO_OSNDTIMEO:
        case SO_ORCVTIMEO:
                solock(so);

                error = sockopt_get(sopt, &otv, sizeof(otv));
                if (error)
                        break;

                timeval50_to_timeval(&otv, &tv);

                /* Code duplicated from sys/kern/uipc_socket.c */
                if (tv.tv_sec < 0 || tv.tv_usec < 0 || tv.tv_usec >= 1000000) {
                        error = EDOM;
                        break;
                }
                if (tv.tv_sec > (INT_MAX - tv.tv_usec / tick) / hz) {
                        error = EDOM;
                        break;
                }

                optval = tv.tv_sec * hz + tv.tv_usec / tick;
                if (optval == 0 && tv.tv_usec != 0)
                        optval = 1;

                switch (opt) {
                case SO_OSNDTIMEO:
                        so->so_snd.sb_timeo = optval;
                        break;
                case SO_ORCVTIMEO:
                        so->so_rcv.sb_timeo = optval;
                        break;
                }        
                break;

        case SO_OTIMESTAMP:
                error = sockopt_getint(sopt, &optval);
                solock(so);
                if (error)
                        break;
                if (optval)
                        so->so_options |= opt;
                else
                        so->so_options &= ~opt;
                break;

        default:
                error = EPASSTHROUGH;
        }
        return error;
}

static int
uipc_socket_50_sbts(int opt, struct mbuf ***mp)
{
        struct timeval50 tv50;
        struct timeval tv;

        microtime(&tv);

        if (opt & SO_OTIMESTAMP) {

                timeval_to_timeval50(&tv, &tv50);
                **mp = sbcreatecontrol(&tv50, sizeof(tv50), SCM_OTIMESTAMP,
                    SOL_SOCKET);
                if (**mp)
                        *mp = &(**mp)->m_next;
                return 0;
        } else
                return EPASSTHROUGH;
}

void
kern_uipc_socket_50_init(void)
{

        MODULE_HOOK_SET(uipc_socket_50_setopt1_hook, uipc_socket_50_setopt1);
        MODULE_HOOK_SET(uipc_socket_50_getopt1_hook, uipc_socket_50_getopt1);
        MODULE_HOOK_SET(uipc_socket_50_sbts_hook, uipc_socket_50_sbts);
}

void
kern_uipc_socket_50_fini(void)
{

        MODULE_HOOK_UNSET(uipc_socket_50_setopt1_hook);
        MODULE_HOOK_UNSET(uipc_socket_50_getopt1_hook);
        MODULE_HOOK_UNSET(uipc_socket_50_sbts_hook);
}


















































































































































































































































































































    2 


    2 









    2 

    2 

    2 
    2 
    2 
    2 

    2 











    2 




    2 
    2 
    2 

    2 






    2 
    2 


    2 
    2 
    2 
    2 
    2 

    2 
    2 

    2 








    2 



































































































































































    2 







    2 















    1 







    2 
































































































































































































































































































































    2 
    2 












































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
/*        $NetBSD: rf_driver.c,v 1.140 2022/08/10 01:16:38 mrg Exp $        */
/*-
 * Copyright (c) 1999 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Greg Oster
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Mark Holland, Khalil Amiri, Claudson Bornstein, William V. Courtright II,
 *         Robby Findler, Daniel Stodolsky, Rachad Youssef, Jim Zelenka
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/******************************************************************************
 *
 * rf_driver.c -- main setup, teardown, and access routines for the RAID driver
 *
 * all routines are prefixed with rf_ (raidframe), to avoid conficts.
 *
 ******************************************************************************/


#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_driver.c,v 1.140 2022/08/10 01:16:38 mrg Exp $");

#ifdef _KERNEL_OPT
#include "opt_raid_diagnostic.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>


#include "rf_archs.h"
#include "rf_threadstuff.h"

#include <sys/errno.h>

#include "rf_raid.h"
#include "rf_dag.h"
#include "rf_aselect.h"
#include "rf_diskqueue.h"
#include "rf_parityscan.h"
#include "rf_alloclist.h"
#include "rf_dagutils.h"
#include "rf_utils.h"
#include "rf_etimer.h"
#include "rf_acctrace.h"
#include "rf_general.h"
#include "rf_desc.h"
#include "rf_states.h"
#include "rf_decluster.h"
#include "rf_map.h"
#include "rf_revent.h"
#include "rf_callback.h"
#include "rf_engine.h"
#include "rf_mcpair.h"
#include "rf_nwayxor.h"
#include "rf_copyback.h"
#include "rf_driver.h"
#include "rf_options.h"
#include "rf_shutdown.h"
#include "rf_kintf.h"
#include "rf_paritymap.h"

#include <sys/buf.h>

#ifndef RF_ACCESS_DEBUG
#define RF_ACCESS_DEBUG 0
#endif

/* rad == RF_RaidAccessDesc_t */
#define RF_MAX_FREE_RAD 128
#define RF_MIN_FREE_RAD  32

/* main configuration routines */
static int raidframe_booted = 0;

static void rf_ConfigureDebug(RF_Config_t * cfgPtr);
static void set_debug_option(char *name, long val);
static void rf_UnconfigureArray(void);
static void rf_ShutdownRDFreeList(void *);
static int rf_ConfigureRDFreeList(RF_ShutdownList_t **, RF_Raid_t *, RF_Config_t *);

rf_declare_mutex2(rf_printf_mutex);        /* debug only:  avoids interleaved
                                         * printfs by different stripes */

#define SIGNAL_QUIESCENT_COND(_raid_) \
        rf_broadcast_cond2((_raid_)->access_suspend_cv)
#define WAIT_FOR_QUIESCENCE(_raid_) \
        rf_wait_cond2((_raid_)->access_suspend_cv, \
                      (_raid_)->access_suspend_mutex)

static int configureCount = 0;        /* number of active configurations */
static int isconfigged = 0;        /* is basic raidframe (non per-array)
                                 * stuff configured */
static rf_declare_mutex2(configureMutex); /* used to lock the configuration
                                           * stuff */
static RF_ShutdownList_t *globalShutdown;        /* non array-specific
                                                 * stuff */

static int rf_ConfigureRDFreeList(RF_ShutdownList_t ** listp, RF_Raid_t *raidPtr, RF_Config_t *cfgPtr);
static int rf_AllocEmergBuffers(RF_Raid_t *);
static void rf_FreeEmergBuffers(RF_Raid_t *);
static void rf_destroy_mutex_cond(RF_Raid_t *);
static void rf_alloc_mutex_cond(RF_Raid_t *);

/* called at system boot time */
int
rf_BootRaidframe(bool boot)
{

        if (boot) {
                if (raidframe_booted)
                        return (EBUSY);
                raidframe_booted = 1;
                rf_init_mutex2(configureMutex, IPL_NONE);
                 configureCount = 0;
                isconfigged = 0;
                globalShutdown = NULL;
        } else {
                rf_destroy_mutex2(configureMutex);
                raidframe_booted = 0;
        }
        return (0);
}

/*
 * Called whenever an array is shutdown
 */
static void
rf_UnconfigureArray(void)
{

        rf_lock_mutex2(configureMutex);
        if (--configureCount == 0) {        /* if no active configurations, shut
                                         * everything down */
                rf_destroy_mutex2(rf_printf_mutex);
                isconfigged = 0;
                rf_ShutdownList(&globalShutdown);

                /*
                 * We must wait until now, because the AllocList module
                 * uses the DebugMem module.
                 */
#if RF_DEBUG_MEM
                if (rf_memDebug)
                        rf_print_unfreed();
#endif
        }
        rf_unlock_mutex2(configureMutex);
}

/*
 * Called to shut down an array.
 */
int
rf_Shutdown(RF_Raid_t *raidPtr)
{

        if (!raidPtr->valid) {
                RF_ERRORMSG("Attempt to shut down unconfigured RAIDframe driver.  Aborting shutdown\n");
                return (EINVAL);
        }
        /*
         * wait for outstanding IOs to land
         * As described in rf_raid.h, we use the rad_freelist lock
         * to protect the per-array info about outstanding descs
         * since we need to do freelist locking anyway, and this
         * cuts down on the amount of serialization we've got going
         * on.
         */
        rf_lock_mutex2(raidPtr->rad_lock);
        if (raidPtr->waitShutdown) {
                rf_unlock_mutex2(raidPtr->rad_lock);
                return (EBUSY);
        }
        raidPtr->waitShutdown = 1;
        while (raidPtr->nAccOutstanding) {
                rf_wait_cond2(raidPtr->outstandingCond, raidPtr->rad_lock);
        }

        /* Wait for any parity re-writes to stop... */
        while (raidPtr->parity_rewrite_in_progress) {
                printf("raid%d: Waiting for parity re-write to exit...\n",
                       raidPtr->raidid);
                rf_wait_cond2(raidPtr->parity_rewrite_cv, raidPtr->rad_lock);
        }
        rf_unlock_mutex2(raidPtr->rad_lock);

        /* Wait for any reconstruction to stop... */
        rf_lock_mutex2(raidPtr->mutex);
        while (raidPtr->reconInProgress) {
                printf("raid%d: Waiting for reconstruction to stop...\n",
                       raidPtr->raidid);
                rf_wait_cond2(raidPtr->waitForReconCond, raidPtr->mutex);
        }
        rf_unlock_mutex2(raidPtr->mutex);

        raidPtr->valid = 0;

        if (raidPtr->parity_map != NULL)
                rf_paritymap_detach(raidPtr);

        rf_update_component_labels(raidPtr, RF_FINAL_COMPONENT_UPDATE);

        rf_UnconfigureVnodes(raidPtr);

        rf_FreeEmergBuffers(raidPtr);

        rf_ShutdownList(&raidPtr->shutdownList);

        rf_destroy_mutex_cond(raidPtr);

        rf_UnconfigureArray();

        return (0);
}


#define DO_INIT_CONFIGURE(f) { \
        rc = f (&globalShutdown); \
        if (rc) { \
                RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \
                rf_ShutdownList(&globalShutdown); \
                configureCount--; \
                rf_unlock_mutex2(configureMutex); \
                rf_destroy_mutex2(rf_printf_mutex); \
                return(rc); \
        } \
}

#define DO_RAID_FAIL() { \
        rf_UnconfigureVnodes(raidPtr); \
        rf_FreeEmergBuffers(raidPtr); \
        rf_ShutdownList(&raidPtr->shutdownList); \
        rf_UnconfigureArray(); \
        rf_destroy_mutex_cond(raidPtr); \
}

#define DO_RAID_INIT_CONFIGURE(f) { \
        rc = f (&raidPtr->shutdownList, raidPtr, cfgPtr); \
        if (rc) { \
                RF_ERRORMSG2("RAIDFRAME: failed %s with %d\n", RF_STRING(f), rc); \
                DO_RAID_FAIL(); \
                return(rc); \
        } \
}

int
rf_Configure(RF_Raid_t *raidPtr, RF_Config_t *cfgPtr, RF_AutoConfig_t *ac)
{
        RF_RowCol_t col;
        int rc;
        bool swapped = false;
        bool first = true;

        rf_lock_mutex2(configureMutex);
        configureCount++;
        if (isconfigged == 0) {
                rf_init_mutex2(rf_printf_mutex, IPL_VM);

                /* initialize globals */
                DO_INIT_CONFIGURE(rf_ConfigureAllocList);

                /*
                 * Yes, this does make debugging general to the whole
                 * system instead of being array specific. Bummer, drag.
                 */
                rf_ConfigureDebug(cfgPtr);
                DO_INIT_CONFIGURE(rf_ConfigureDebugMem);
#if RF_ACC_TRACE > 0
                DO_INIT_CONFIGURE(rf_ConfigureAccessTrace);
#endif
                DO_INIT_CONFIGURE(rf_ConfigureNWayXor);
                DO_INIT_CONFIGURE(rf_ConfigureDAGFuncs);
                DO_INIT_CONFIGURE(rf_ConfigureCopyback);
                isconfigged = 1;
        }
        rf_unlock_mutex2(configureMutex);

        rf_alloc_mutex_cond(raidPtr);

        /* set up the cleanup list.  Do this after ConfigureDebug so that
         * value of memDebug will be set */

        rf_MakeAllocList(raidPtr->cleanupList);
        if (raidPtr->cleanupList == NULL) {
                DO_RAID_FAIL();
                return (ENOMEM);
        }
        rf_ShutdownCreate(&raidPtr->shutdownList,
                          (void (*) (void *)) rf_FreeAllocList,
                          raidPtr->cleanupList);

        KASSERT(cfgPtr->numCol < RF_MAXCOL);
        KASSERT(cfgPtr->numCol >= 0);
        KASSERT(cfgPtr->numSpare < RF_MAXSPARE);
        KASSERT(cfgPtr->numSpare >= 0);

        raidPtr->numCol = cfgPtr->numCol;
        raidPtr->numSpare = cfgPtr->numSpare;

        raidPtr->status = rf_rs_optimal;
        raidPtr->reconControl = NULL;

        DO_RAID_INIT_CONFIGURE(rf_ConfigureMapModule);
        DO_RAID_INIT_CONFIGURE(rf_ConfigureReconEvent);
        DO_RAID_INIT_CONFIGURE(rf_ConfigureCallback);
        DO_RAID_INIT_CONFIGURE(rf_ConfigureRDFreeList);
        DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLockFreeList);
        DO_RAID_INIT_CONFIGURE(rf_ConfigureMCPair);
        DO_RAID_INIT_CONFIGURE(rf_ConfigureDAGs);
        DO_RAID_INIT_CONFIGURE(rf_ConfigureReconstruction);
        DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueueSystem);
        DO_RAID_INIT_CONFIGURE(rf_ConfigurePSStatus);
        
        DO_RAID_INIT_CONFIGURE(rf_ConfigureEngine);
        DO_RAID_INIT_CONFIGURE(rf_ConfigureStripeLocks);

        raidPtr->nAccOutstanding = 0;
        raidPtr->waitShutdown = 0;

        if (ac!=NULL) {
                /* We have an AutoConfig structure..  Don't do the
                   normal disk configuration... call the auto config
                   stuff */
                rf_AutoConfigureDisks(raidPtr, cfgPtr, ac);
        } else {
                DO_RAID_INIT_CONFIGURE(rf_ConfigureDisks);
                DO_RAID_INIT_CONFIGURE(rf_ConfigureSpareDisks);
        }
        /* do this after ConfigureDisks & ConfigureSpareDisks to be sure dev
         * no. is set */
        DO_RAID_INIT_CONFIGURE(rf_ConfigureDiskQueues);

        DO_RAID_INIT_CONFIGURE(rf_ConfigureLayout);


        
        
        /* Initialize per-RAID PSS bits */
        rf_InitPSStatus(raidPtr);

#if RF_INCLUDE_CHAINDECLUSTER > 0
        for (col = 0; col < raidPtr->numCol; col++) {
                /*
                 * XXX better distribution
                 */
                raidPtr->hist_diskreq[col] = 0;
        }
#endif
        raidPtr->numNewFailures = 0;
        raidPtr->copyback_in_progress = 0;
        raidPtr->parity_rewrite_in_progress = 0;
        raidPtr->adding_hot_spare = 0;
        raidPtr->recon_in_progress = 0;

        raidPtr->maxOutstanding = cfgPtr->maxOutstandingDiskReqs;

        /* autoconfigure and root_partition will actually get filled in
           after the config is done */
        raidPtr->autoconfigure = 0;
        raidPtr->root_partition = 0;
        raidPtr->last_unit = raidPtr->raidid;
        raidPtr->config_order = 0;

        if (rf_keepAccTotals) {
                raidPtr->keep_acc_totals = 1;
        }

        /* Allocate a bunch of buffers to be used in low-memory conditions */
        raidPtr->iobuf = NULL;

        rc = rf_AllocEmergBuffers(raidPtr);
        if (rc) {
                printf("raid%d: Unable to allocate emergency buffers.\n",
                       raidPtr->raidid);
                DO_RAID_FAIL();
                return(rc);
        }

        /* Set up parity map stuff, if applicable. */
#ifndef RF_NO_PARITY_MAP
        rf_paritymap_attach(raidPtr, cfgPtr->force);
#endif

        raidPtr->valid = 1;

        printf("raid%d: %s\n", raidPtr->raidid,
               raidPtr->Layout.map->configName);
        printf("raid%d: Components:", raidPtr->raidid);

        for (col = 0; col < raidPtr->numCol; col++) {
                RF_ComponentLabel_t *clabel;
                bool compswapped;

                printf(" %s", raidPtr->Disks[col].devname);
                if (RF_DEAD_DISK(raidPtr->Disks[col].status)) {
                        printf("[**FAILED**]");
                }
                clabel = raidget_component_label(raidPtr, col);
                compswapped = clabel->version ==
                              bswap32(RF_COMPONENT_LABEL_VERSION);
                if (first)
                        swapped = compswapped;
                else if (swapped != compswapped)
                        printf("raid%d: Component %d has different endian "
                               "than first component.", raidPtr->raidid, col);
        }
        printf("\n");
        printf("raid%d: Total Sectors: %" PRIu64 " (%" PRIu64 " MB)\n",
               raidPtr->raidid,
               raidPtr->totalSectors,
               (raidPtr->totalSectors / 1024 *
                                (1 << raidPtr->logBytesPerSector) / 1024));
        if (swapped)
                printf("raid%d: Using swapped-endian component labels.\n",
                    raidPtr->raidid);

        return (0);
}


/*

  Routines to allocate and free the "emergency buffers" for a given
  RAID set.  These emergency buffers will be used when the kernel runs
  out of kernel memory.

 */

static int
rf_AllocEmergBuffers(RF_Raid_t *raidPtr)
{
        void *tmpbuf;
        RF_VoidPointerListElem_t *vple;
        int i;

        /* XXX next line needs tuning... */
        raidPtr->numEmergencyBuffers = 10 * raidPtr->numCol;
#if DEBUG
        printf("raid%d: allocating %d buffers of %d bytes.\n",
               raidPtr->raidid,
               raidPtr->numEmergencyBuffers,
               (int)(raidPtr->Layout.sectorsPerStripeUnit <<
               raidPtr->logBytesPerSector));
#endif
        for (i = 0; i < raidPtr->numEmergencyBuffers; i++) {
                tmpbuf = malloc( raidPtr->Layout.sectorsPerStripeUnit <<
                                 raidPtr->logBytesPerSector,
                                 M_RAIDFRAME, M_WAITOK);
                if (tmpbuf) {
                        vple = rf_AllocVPListElem(raidPtr);
                        vple->p= tmpbuf;
                        vple->next = raidPtr->iobuf;
                        raidPtr->iobuf = vple;
                        raidPtr->iobuf_count++;
                } else {
                        printf("raid%d: failed to allocate emergency buffer!\n",
                               raidPtr->raidid);
                        return 1;
                }
        }

        /* XXX next line needs tuning too... */
        raidPtr->numEmergencyStripeBuffers = 10;
        for (i = 0; i < raidPtr->numEmergencyStripeBuffers; i++) {
                tmpbuf = malloc( raidPtr->numCol * (raidPtr->Layout.sectorsPerStripeUnit <<
                                 raidPtr->logBytesPerSector),
                                 M_RAIDFRAME, M_WAITOK);
                if (tmpbuf) {
                        vple = rf_AllocVPListElem(raidPtr);
                        vple->p= tmpbuf;
                        vple->next = raidPtr->stripebuf;
                        raidPtr->stripebuf = vple;
                        raidPtr->stripebuf_count++;
                } else {
                        printf("raid%d: failed to allocate emergency stripe buffer!\n",
                               raidPtr->raidid);
                        return 1;
                }
        }

        return (0);
}

static void
rf_FreeEmergBuffers(RF_Raid_t *raidPtr)
{
        RF_VoidPointerListElem_t *tmp;

        /* Free the emergency IO buffers */
        while (raidPtr->iobuf != NULL) {
                tmp = raidPtr->iobuf;
                raidPtr->iobuf = raidPtr->iobuf->next;
                free(tmp->p, M_RAIDFRAME);
                rf_FreeVPListElem(raidPtr,tmp);
        }

        /* Free the emergency stripe buffers */
        while (raidPtr->stripebuf != NULL) {
                tmp = raidPtr->stripebuf;
                raidPtr->stripebuf = raidPtr->stripebuf->next;
                free(tmp->p, M_RAIDFRAME);
                rf_FreeVPListElem(raidPtr, tmp);
        }
}


static void
rf_ShutdownRDFreeList(void *arg)
{
        RF_Raid_t *raidPtr;

        raidPtr = (RF_Raid_t *) arg;
        
        pool_destroy(&raidPtr->pools.rad);
}

static int
rf_ConfigureRDFreeList(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                       RF_Config_t *cfgPtr)
{

        rf_pool_init(raidPtr, raidPtr->poolNames.rad, &raidPtr->pools.rad, sizeof(RF_RaidAccessDesc_t),
                     "rad", RF_MIN_FREE_RAD, RF_MAX_FREE_RAD);
        rf_ShutdownCreate(listp, rf_ShutdownRDFreeList, raidPtr);
        return (0);
}

RF_RaidAccessDesc_t *
rf_AllocRaidAccDesc(RF_Raid_t *raidPtr, RF_IoType_t type,
                    RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks,
                    void *bufPtr, void *bp, RF_RaidAccessFlags_t flags,
                    const RF_AccessState_t *states)
{
        RF_RaidAccessDesc_t *desc;

        desc = pool_get(&raidPtr->pools.rad, PR_WAITOK);

        rf_lock_mutex2(raidPtr->rad_lock);
        if (raidPtr->waitShutdown) {
                /*
                 * Actually, we're shutting the array down. Free the desc
                 * and return NULL.
                 */

                rf_unlock_mutex2(raidPtr->rad_lock);
                pool_put(&raidPtr->pools.rad, desc);
                return (NULL);
        }
        raidPtr->nAccOutstanding++;

        rf_unlock_mutex2(raidPtr->rad_lock);

        desc->raidPtr = (void *) raidPtr;
        desc->type = type;
        desc->raidAddress = raidAddress;
        desc->numBlocks = numBlocks;
        desc->bufPtr = bufPtr;
        desc->bp = bp;
        desc->flags = flags;
        desc->states = states;
        desc->state = 0;
        desc->dagList = NULL;

        desc->status = 0;
        desc->numRetries = 0;
#if RF_ACC_TRACE > 0
        memset(&desc->tracerec, 0, sizeof(desc->tracerec));
#endif
        desc->callbackFunc = NULL;
        desc->callbackArg = NULL;
        desc->next = NULL;
        desc->iobufs = NULL;
        desc->stripebufs = NULL;

        return (desc);
}

void
rf_FreeRaidAccDesc(RF_RaidAccessDesc_t *desc)
{
        RF_Raid_t *raidPtr = desc->raidPtr;
        RF_DagList_t *dagList, *temp;
        RF_VoidPointerListElem_t *tmp;

        RF_ASSERT(desc);

        /* Cleanup the dagList(s) */
        dagList = desc->dagList;
        while(dagList != NULL) {
                temp = dagList;
                dagList = dagList->next;
                rf_FreeDAGList(raidPtr, temp);
        }

        while (desc->iobufs) {
                tmp = desc->iobufs;
                desc->iobufs = desc->iobufs->next;
                rf_FreeIOBuffer(raidPtr, tmp);
        }

        while (desc->stripebufs) {
                tmp = desc->stripebufs;
                desc->stripebufs = desc->stripebufs->next;
                rf_FreeStripeBuffer(raidPtr, tmp);
        }

        pool_put(&raidPtr->pools.rad, desc);
        rf_lock_mutex2(raidPtr->rad_lock);
        raidPtr->nAccOutstanding--;
        if (raidPtr->waitShutdown) {
                rf_signal_cond2(raidPtr->outstandingCond);
        }
        rf_unlock_mutex2(raidPtr->rad_lock);
}
/*********************************************************************
 * Main routine for performing an access.
 * Accesses are retried until a DAG can not be selected.  This occurs
 * when either the DAG library is incomplete or there are too many
 * failures in a parity group.
 *
 * type should be read or write.  bp_in is a buf pointer.  void *to
 * facilitate ignoring it outside the kernel
 ********************************************************************/
int
rf_DoAccess(RF_Raid_t * raidPtr, RF_IoType_t type, RF_RaidAddr_t raidAddress, RF_SectorCount_t numBlocks,
            void *bufPtr, struct buf *bp, RF_RaidAccessFlags_t flags)
{
        RF_RaidAccessDesc_t *desc;
        void *lbufPtr = bufPtr;

        raidAddress += rf_raidSectorOffset;

#if RF_ACCESS_DEBUG
        if (rf_accessDebug) {

                printf("logBytes is: %d %d %d\n", raidPtr->raidid,
                    raidPtr->logBytesPerSector,
                    (int) rf_RaidAddressToByte(raidPtr, numBlocks));
                printf("raid%d: %s raidAddr %d (stripeid %d-%d) numBlocks %d (%d bytes) buf 0x%lx\n", raidPtr->raidid,
                    (type == RF_IO_TYPE_READ) ? "READ" : "WRITE", (int) raidAddress,
                    (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress),
                    (int) rf_RaidAddressToStripeID(&raidPtr->Layout, raidAddress + numBlocks - 1),
                    (int) numBlocks,
                    (int) rf_RaidAddressToByte(raidPtr, numBlocks),
                    (long) bufPtr);
        }
#endif

        desc = rf_AllocRaidAccDesc(raidPtr, type, raidAddress,
            numBlocks, lbufPtr, bp, flags, raidPtr->Layout.map->states);

        if (desc == NULL) {
                return (ENOMEM);
        }
#if RF_ACC_TRACE > 0
        RF_ETIMER_START(desc->tracerec.tot_timer);
#endif

        if (raidPtr->parity_map != NULL && 
            type == RF_IO_TYPE_WRITE)
                rf_paritymap_begin(raidPtr->parity_map, raidAddress, 
                    numBlocks);

        rf_ContinueRaidAccess(desc);

        return (0);
}
#if 0
/* force the array into reconfigured mode without doing reconstruction */
int
rf_SetReconfiguredMode(RF_Raid_t *raidPtr, int col)
{
        if (!(raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE)) {
                printf("Can't set reconfigured mode in dedicated-spare array\n");
                RF_PANIC();
        }
        rf_lock_mutex2(raidPtr->mutex);
        raidPtr->numFailures++;
        raidPtr->Disks[col].status = rf_ds_dist_spared;
        raidPtr->status = rf_rs_reconfigured;
        rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
        /* install spare table only if declustering + distributed sparing
         * architecture. */
        if (raidPtr->Layout.map->flags & RF_BD_DECLUSTERED)
                rf_InstallSpareTable(raidPtr, col);
        rf_unlock_mutex2(raidPtr->mutex);
        return (0);
}
#endif

int
rf_FailDisk(RF_Raid_t *raidPtr, int fcol, int initRecon)
{

        /* need to suspend IO's here -- if there are DAGs in flight
           and we pull the rug out from under ci_vp, Bad Things
           can happen.  */

        rf_SuspendNewRequestsAndWait(raidPtr);

        rf_lock_mutex2(raidPtr->mutex);
        if (raidPtr->Disks[fcol].status != rf_ds_failed) {
                /* must be failing something that is valid, or else it's
                   already marked as failed (in which case we don't
                   want to mark it failed again!) */
                raidPtr->numFailures++;
                raidPtr->Disks[fcol].status = rf_ds_failed;
                raidPtr->status = rf_rs_degraded;
        }
        rf_unlock_mutex2(raidPtr->mutex);

        rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);

        /* Close the component, so that it's not "locked" if someone
           else want's to use it! */

        rf_close_component(raidPtr, raidPtr->raid_cinfo[fcol].ci_vp,
                           raidPtr->Disks[fcol].auto_configured);

        rf_lock_mutex2(raidPtr->mutex);
        raidPtr->raid_cinfo[fcol].ci_vp = NULL;

        /* Need to mark the component as not being auto_configured
           (in case it was previously). */

        raidPtr->Disks[fcol].auto_configured = 0;
        rf_unlock_mutex2(raidPtr->mutex);
        /* now we can allow IO to continue -- we'll be suspending it
           again in rf_ReconstructFailedDisk() if we have to.. */

        rf_ResumeNewRequests(raidPtr);

        if (initRecon)
                rf_ReconstructFailedDisk(raidPtr, fcol);
        return (0);
}
/* releases a thread that is waiting for the array to become quiesced.
 * access_suspend_mutex should be locked upon calling this
 */
void
rf_SignalQuiescenceLock(RF_Raid_t *raidPtr)
{
#if RF_DEBUG_QUIESCE
        if (rf_quiesceDebug) {
                printf("raid%d: Signalling quiescence lock\n",
                       raidPtr->raidid);
        }
#endif
        raidPtr->access_suspend_release = 1;

        if (raidPtr->waiting_for_quiescence) {
                SIGNAL_QUIESCENT_COND(raidPtr);
        }
}
/* suspends all new requests to the array.  No effect on accesses that are in flight.  */
int
rf_SuspendNewRequestsAndWait(RF_Raid_t *raidPtr)
{
#if RF_DEBUG_QUIESCE
        if (rf_quiesceDebug)
                printf("raid%d: Suspending new reqs\n", raidPtr->raidid);
#endif
        rf_lock_mutex2(raidPtr->access_suspend_mutex);
        raidPtr->accesses_suspended++;
        raidPtr->waiting_for_quiescence = (raidPtr->accs_in_flight == 0) ? 0 : 1;

        if (raidPtr->waiting_for_quiescence) {
                raidPtr->access_suspend_release = 0;
                while (!raidPtr->access_suspend_release) {
#if RF_DEBUG_QUIESCE
                        printf("raid%d: Suspending: Waiting for Quiescence\n",
                               raidPtr->raidid);
#endif
                        WAIT_FOR_QUIESCENCE(raidPtr);
                        raidPtr->waiting_for_quiescence = 0;
                }
        }
#if RF_DEBUG_QUIESCE
        printf("raid%d: Quiescence reached..\n", raidPtr->raidid);
#endif

        rf_unlock_mutex2(raidPtr->access_suspend_mutex);
        return (raidPtr->waiting_for_quiescence);
}
/* wake up everyone waiting for quiescence to be released */
void
rf_ResumeNewRequests(RF_Raid_t *raidPtr)
{
        RF_CallbackFuncDesc_t *t, *cb;

#if RF_DEBUG_QUIESCE
        if (rf_quiesceDebug)
                printf("raid%d: Resuming new requests\n", raidPtr->raidid);
#endif

        rf_lock_mutex2(raidPtr->access_suspend_mutex);
        raidPtr->accesses_suspended--;
        if (raidPtr->accesses_suspended == 0)
                cb = raidPtr->quiesce_wait_list;
        else
                cb = NULL;
        raidPtr->quiesce_wait_list = NULL;
        rf_unlock_mutex2(raidPtr->access_suspend_mutex);

        while (cb) {
                t = cb;
                cb = cb->next;
                (t->callbackFunc) (t->callbackArg);
                rf_FreeCallbackFuncDesc(raidPtr, t);
        }
}
/*****************************************************************************************
 *
 * debug routines
 *
 ****************************************************************************************/

static void
set_debug_option(char *name, long val)
{
        RF_DebugName_t *p;

        for (p = rf_debugNames; p->name; p++) {
                if (!strcmp(p->name, name)) {
                        *(p->ptr) = val;
                        printf("[Set debug variable %s to %ld]\n", name, val);
                        return;
                }
        }
        RF_ERRORMSG1("Unknown debug string \"%s\"\n", name);
}


/* would like to use sscanf here, but apparently not available in kernel */
/*ARGSUSED*/
static void
rf_ConfigureDebug(RF_Config_t *cfgPtr)
{
        char   *val_p, *name_p, *white_p;
        long    val;
        int     i;

        rf_ResetDebugOptions();
        for (i = 0; i < RF_MAXDBGV && cfgPtr->debugVars[i][0]; i++) {
                name_p = rf_find_non_white(&cfgPtr->debugVars[i][0]);
                white_p = rf_find_white(name_p);        /* skip to start of 2nd
                                                         * word */
                val_p = rf_find_non_white(white_p);
                if (*val_p == '0' && *(val_p + 1) == 'x')
                        val = rf_htoi(val_p + 2);
                else
                        val = rf_atoi(val_p);
                *white_p = '\0';
                set_debug_option(name_p, val);
        }
}

void
rf_print_panic_message(int line, const char *file)
{
        kern_assert("raidframe error at line %d file %s", line, file);
}

#ifdef RAID_DIAGNOSTIC
void
rf_print_assert_panic_message(int line,        const char *file, const char *condition)
{
        kern_assert("raidframe error at line %d file %s (failed asserting %s)\n",
            line, file, condition);
}
#endif

void
rf_print_unable_to_init_mutex(const char *file, int line, int rc)
{
        RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n",
                     file, line, rc);
}

void
rf_print_unable_to_add_shutdown(const char *file, int line, int rc)
{
        RF_ERRORMSG3("Unable to add to shutdown list file %s line %d rc=%d\n",
                     file, line, rc);
}

static void
rf_alloc_mutex_cond(RF_Raid_t *raidPtr)
{

        rf_init_mutex2(raidPtr->mutex, IPL_VM);

        rf_init_cond2(raidPtr->outstandingCond, "rfocond");
        rf_init_cond2(raidPtr->parity_rewrite_cv, "rfprwshutdown");
        rf_init_mutex2(raidPtr->rad_lock, IPL_VM);

        rf_init_mutex2(raidPtr->access_suspend_mutex, IPL_VM);
        rf_init_cond2(raidPtr->access_suspend_cv, "rfquiesce");

        rf_init_cond2(raidPtr->waitForReconCond, "rfrcnw");

        rf_init_cond2(raidPtr->adding_hot_spare_cv, "raidhs");
}

static void
rf_destroy_mutex_cond(RF_Raid_t *raidPtr)
{

        rf_destroy_cond2(raidPtr->waitForReconCond);
        rf_destroy_cond2(raidPtr->adding_hot_spare_cv);

        rf_destroy_mutex2(raidPtr->access_suspend_mutex);
        rf_destroy_cond2(raidPtr->access_suspend_cv);

        rf_destroy_cond2(raidPtr->parity_rewrite_cv);
        rf_destroy_cond2(raidPtr->outstandingCond);
        rf_destroy_mutex2(raidPtr->rad_lock);

        rf_destroy_mutex2(raidPtr->mutex);
}


























































































































  116 














   16 















   43 
    9 



























 1113 
 1073 


   29 


 1110 


 1068 

 1107 













   45 

   45 

   45 

   44 















 1115 










 1114 
 1115 






 1113 

   44 

   40 

   33 


   18 
   18 


 1116 
 1115 


  193 


 1114 
 1112 


 1110 
 1113 

 1113 
 1113 























 1115 
 1116 
 1114 
 1116 

 1114 







 1115 
 1108 

  159 


  159 



  159 







  219 



   69 














   69 


 1105 





 1104 
    8 





    3 






 1107 





 1108 
   42 







 1108 
   70 




 1108 


 1114 






   19 

    2 
   10 




 1113 
























   34 
  590 













  590 
  590 









  590 



  588 









   58 

   58 
   57 
   58 
   58 
   58 















   77 









   29 

   29 


   29 



   16 


   16 
   16 







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
/*        $NetBSD: uvm_vnode.c,v 1.118 2021/03/13 15:29:55 skrll Exp $        */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * Copyright (c) 1991, 1993
 *      The Regents of the University of California.
 * Copyright (c) 1990 University of Utah.
 *
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the Systems Programming Group of the University of Utah Computer
 * Science Department.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *      @(#)vnode_pager.c       8.8 (Berkeley) 2/13/94
 * from: Id: uvm_vnode.c,v 1.1.2.26 1998/02/02 20:38:07 chuck Exp
 */

/*
 * uvm_vnode.c: the vnode pager.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_vnode.c,v 1.118 2021/03/13 15:29:55 skrll Exp $");

#ifdef _KERNEL_OPT
#include "opt_uvmhist.h"
#endif

#include <sys/atomic.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/disklabel.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/conf.h>
#include <sys/pool.h>
#include <sys/mount.h>

#include <miscfs/specfs/specdev.h>

#include <uvm/uvm.h>
#include <uvm/uvm_readahead.h>
#include <uvm/uvm_page_array.h>

#ifdef UVMHIST
UVMHIST_DEFINE(ubchist);
#endif

/*
 * functions
 */

static void        uvn_alloc_ractx(struct uvm_object *);
static void        uvn_detach(struct uvm_object *);
static int        uvn_get(struct uvm_object *, voff_t, struct vm_page **, int *,
                        int, vm_prot_t, int, int);
static void        uvn_markdirty(struct uvm_object *);
static int        uvn_put(struct uvm_object *, voff_t, voff_t, int);
static void        uvn_reference(struct uvm_object *);

static int        uvn_findpage(struct uvm_object *, voff_t, struct vm_page **,
                             unsigned int, struct uvm_page_array *a,
                             unsigned int);

/*
 * master pager structure
 */

const struct uvm_pagerops uvm_vnodeops = {
        .pgo_reference = uvn_reference,
        .pgo_detach = uvn_detach,
        .pgo_get = uvn_get,
        .pgo_put = uvn_put,
        .pgo_markdirty = uvn_markdirty,
};

/*
 * the ops!
 */

/*
 * uvn_reference
 *
 * duplicate a reference to a VM object.  Note that the reference
 * count must already be at least one (the passed in reference) so
 * there is no chance of the uvn being killed or locked out here.
 *
 * => caller must call with object unlocked.
 * => caller must be using the same accessprot as was used at attach time
 */

static void
uvn_reference(struct uvm_object *uobj)
{
        vref((struct vnode *)uobj);
}


/*
 * uvn_detach
 *
 * remove a reference to a VM object.
 *
 * => caller must call with object unlocked and map locked.
 */

static void
uvn_detach(struct uvm_object *uobj)
{
        vrele((struct vnode *)uobj);
}

/*
 * uvn_put: flush page data to backing store.
 *
 * => object must be locked on entry!   VOP_PUTPAGES must unlock it.
 * => flags: PGO_SYNCIO -- use sync. I/O
 */

static int
uvn_put(struct uvm_object *uobj, voff_t offlo, voff_t offhi, int flags)
{
        struct vnode *vp = (struct vnode *)uobj;
        int error;

        KASSERT(rw_write_held(uobj->vmobjlock));
        error = VOP_PUTPAGES(vp, offlo, offhi, flags);

        return error;
}

/*
 * uvn_get: get pages (synchronously) from backing store
 *
 * => prefer map unlocked (not required)
 * => object must be locked!  we will _unlock_ it before starting any I/O.
 * => flags: PGO_LOCKED: fault data structures are locked
 * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
 * => NOTE: caller must check for released pages!!
 */

static int
uvn_get(struct uvm_object *uobj, voff_t offset,
    struct vm_page **pps /* IN/OUT */,
    int *npagesp /* IN (OUT if PGO_LOCKED)*/,
    int centeridx, vm_prot_t access_type, int advice, int flags)
{
        struct vnode *vp = (struct vnode *)uobj;
        int error;

        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(ubchist, "vp %#jx off %#jx", (uintptr_t)vp, offset,
            0, 0);

        if (vp->v_type == VREG && (access_type & VM_PROT_WRITE) == 0
            && (flags & PGO_LOCKED) == 0 && vp->v_tag != VT_TMPFS) {
                uvn_alloc_ractx(uobj);
                uvm_ra_request(vp->v_ractx, advice, uobj, offset,
                    *npagesp << PAGE_SHIFT);
        }

        error = VOP_GETPAGES(vp, offset, pps, npagesp, centeridx,
                             access_type, advice, flags);

        KASSERT(((flags & PGO_LOCKED) != 0 && rw_lock_held(uobj->vmobjlock)) ||
            (flags & PGO_LOCKED) == 0);
        return error;
}

/*
 * uvn_markdirty: called when the object gains first dirty page
 *
 * => uobj must be write locked.
 */

static void
uvn_markdirty(struct uvm_object *uobj)
{
        struct vnode *vp = (struct vnode *)uobj;

        KASSERT(rw_write_held(uobj->vmobjlock));

        mutex_enter(vp->v_interlock);
        if ((vp->v_iflag & VI_ONWORKLST) == 0) {
                vn_syncer_add_to_worklist(vp, filedelay);
        }
        mutex_exit(vp->v_interlock);
}

/*
 * uvn_findpages:
 * return the page for the uobj and offset requested, allocating if needed.
 * => uobj must be locked.
 * => returned pages will be BUSY.
 */

int
uvn_findpages(struct uvm_object *uobj, voff_t offset, unsigned int *npagesp,
    struct vm_page **pgs, struct uvm_page_array *a, unsigned int flags)
{
        unsigned int count, found, npages;
        int i, rv;
        struct uvm_page_array a_store;

        if (a == NULL) {
                /*
                 * XXX fragile API
                 * note that the array can be the one supplied by the caller of
                 * uvn_findpages.  in that case, fillflags used by the caller
                 * might not match strictly with ours.
                 * in particular, the caller might have filled the array
                 * without DENSE but passed us UFP_DIRTYONLY (thus DENSE).
                 */
                const unsigned int fillflags =
                    ((flags & UFP_BACKWARD) ? UVM_PAGE_ARRAY_FILL_BACKWARD : 0) |
                    ((flags & UFP_DIRTYONLY) ?
                    (UVM_PAGE_ARRAY_FILL_DIRTY|UVM_PAGE_ARRAY_FILL_DENSE) : 0);
                a = &a_store;
                uvm_page_array_init(a, uobj, fillflags);
        }
        count = found = 0;
        npages = *npagesp;
        if (flags & UFP_BACKWARD) {
                for (i = npages - 1; i >= 0; i--, offset -= PAGE_SIZE) {
                        rv = uvn_findpage(uobj, offset, &pgs[i], flags, a,
                            i + 1);
                        if (rv == 0) {
                                if (flags & UFP_DIRTYONLY)
                                        break;
                        } else
                                found++;
                        count++;
                }
        } else {
                for (i = 0; i < npages; i++, offset += PAGE_SIZE) {
                        rv = uvn_findpage(uobj, offset, &pgs[i], flags, a,
                            npages - i);
                        if (rv == 0) {
                                if (flags & UFP_DIRTYONLY)
                                        break;
                        } else
                                found++;
                        count++;
                }
        }
        if (a == &a_store) {
                uvm_page_array_fini(a);
        }
        *npagesp = count;
        return (found);
}

/*
 * uvn_findpage: find a single page
 *
 * if a suitable page was found, put it in *pgp and return 1.
 * otherwise return 0.
 */

static int
uvn_findpage(struct uvm_object *uobj, voff_t offset, struct vm_page **pgp,
    unsigned int flags, struct uvm_page_array *a, unsigned int nleft)
{
        struct vm_page *pg;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(ubchist, "vp %#jx off %#jx", (uintptr_t)uobj, offset,
            0, 0);

        /*
         * NOBUSY must come with NOWAIT and NOALLOC.  if NOBUSY is
         * specified, this may be called with a reader lock.
         */

        KASSERT(rw_lock_held(uobj->vmobjlock));
        KASSERT((flags & UFP_NOBUSY) == 0 || (flags & UFP_NOWAIT) != 0);
        KASSERT((flags & UFP_NOBUSY) == 0 || (flags & UFP_NOALLOC) != 0);
        KASSERT((flags & UFP_NOBUSY) != 0 || rw_write_held(uobj->vmobjlock));

        if (*pgp != NULL) {
                UVMHIST_LOG(ubchist, "dontcare", 0,0,0,0);
                goto skip_offset;
        }
        for (;;) {
                /*
                 * look for an existing page.
                 */
                pg = uvm_page_array_fill_and_peek(a, offset, nleft);
                if (pg != NULL && pg->offset != offset) {
                        struct vm_page __diagused *tpg;
                        KASSERT(
                            ((a->ar_flags & UVM_PAGE_ARRAY_FILL_BACKWARD) != 0)
                            == (pg->offset < offset));
                        KASSERT((tpg = uvm_pagelookup(uobj, offset)) == NULL ||
                                ((a->ar_flags & UVM_PAGE_ARRAY_FILL_DIRTY) != 0 &&
                                 !uvm_obj_page_dirty_p(tpg)));
                        pg = NULL;
                        if ((a->ar_flags & UVM_PAGE_ARRAY_FILL_DENSE) != 0) {
                                UVMHIST_LOG(ubchist, "dense", 0,0,0,0);
                                return 0;
                        }
                }

                /* nope?  allocate one now */
                if (pg == NULL) {
                        if (flags & UFP_NOALLOC) {
                                UVMHIST_LOG(ubchist, "noalloc", 0,0,0,0);
                                return 0;
                        }
                        pg = uvm_pagealloc(uobj, offset, NULL,
                            UVM_FLAG_COLORMATCH);
                        if (pg == NULL) {
                                if (flags & UFP_NOWAIT) {
                                        UVMHIST_LOG(ubchist, "nowait",0,0,0,0);
                                        return 0;
                                }
                                rw_exit(uobj->vmobjlock);
                                uvm_wait("uvnfp1");
                                uvm_page_array_clear(a);
                                rw_enter(uobj->vmobjlock, RW_WRITER);
                                continue;
                        }
                        UVMHIST_LOG(ubchist, "alloced %#jx (color %ju)",
                            (uintptr_t)pg, VM_PGCOLOR(pg), 0, 0);
                        KASSERTMSG(uvm_pagegetdirty(pg) ==
                            UVM_PAGE_STATUS_CLEAN, "page %p not clean", pg);
                        break;
                } else if (flags & UFP_NOCACHE) {
                        UVMHIST_LOG(ubchist, "nocache",0,0,0,0);
                        goto skip;
                }

                /* page is there, see if we need to wait on it */
                if ((pg->flags & PG_BUSY) != 0) {
                        if (flags & UFP_NOWAIT) {
                                UVMHIST_LOG(ubchist, "nowait",0,0,0,0);
                                goto skip;
                        }
                        UVMHIST_LOG(ubchist, "wait %#jx (color %ju)",
                            (uintptr_t)pg, VM_PGCOLOR(pg), 0, 0);
                        uvm_pagewait(pg, uobj->vmobjlock, "uvnfp2");
                        uvm_page_array_clear(a);
                        rw_enter(uobj->vmobjlock, RW_WRITER);
                        continue;
                }

                /* skip PG_RDONLY pages if requested */
                if ((flags & UFP_NORDONLY) && (pg->flags & PG_RDONLY)) {
                        UVMHIST_LOG(ubchist, "nordonly",0,0,0,0);
                        goto skip;
                }

                /* stop on clean pages if requested */
                if (flags & UFP_DIRTYONLY) {
                        const bool dirty = uvm_pagecheckdirty(pg, false);
                        if (!dirty) {
                                UVMHIST_LOG(ubchist, "dirtonly", 0,0,0,0);
                                return 0;
                        }
                }

                /* mark the page BUSY and we're done. */
                if ((flags & UFP_NOBUSY) == 0) {
                        pg->flags |= PG_BUSY;
                        UVM_PAGE_OWN(pg, "uvn_findpage");
                }
                UVMHIST_LOG(ubchist, "found %#jx (color %ju)",
                    (uintptr_t)pg, VM_PGCOLOR(pg), 0, 0);
                uvm_page_array_advance(a);
                break;
        }
        *pgp = pg;
        return 1;

 skip_offset:
        /*
         * skip this offset
         */
        pg = uvm_page_array_peek(a);
        if (pg != NULL) {
                if (pg->offset == offset) {
                        uvm_page_array_advance(a);
                } else {
                        KASSERT((a->ar_flags & UVM_PAGE_ARRAY_FILL_DENSE) == 0);
                }
        }
        return 0;

 skip:
        /*
         * skip this page
         */
        KASSERT(pg != NULL);
        uvm_page_array_advance(a);
        return 0;
}

/*
 * uvm_vnp_setsize: grow or shrink a vnode uobj
 *
 * grow   => just update size value
 * shrink => toss un-needed pages
 *
 * => we assume that the caller has a reference of some sort to the
 *        vnode in question so that it will not be yanked out from under
 *        us.
 */

void
uvm_vnp_setsize(struct vnode *vp, voff_t newsize)
{
        struct uvm_object *uobj = &vp->v_uobj;
        voff_t pgend = round_page(newsize);
        voff_t oldsize;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);

        rw_enter(uobj->vmobjlock, RW_WRITER);
        UVMHIST_LOG(ubchist, "vp %#jx old %#jx new %#jx",
            (uintptr_t)vp, vp->v_size, newsize, 0);

        /*
         * now check if the size has changed: if we shrink we had better
         * toss some pages...
         */

        KASSERT(newsize != VSIZENOTSET && newsize >= 0);
        KASSERT(vp->v_size <= vp->v_writesize);
        KASSERT(vp->v_size == vp->v_writesize ||
            newsize == vp->v_writesize || newsize <= vp->v_size);

        oldsize = vp->v_writesize;

        /*
         * check whether size shrinks
         * if old size hasn't been set, there are no pages to drop
         * if there was an integer overflow in pgend, then this is no shrink
         */
        if (oldsize > pgend && oldsize != VSIZENOTSET && pgend >= 0) {
                (void) uvn_put(uobj, pgend, 0, PGO_FREE | PGO_SYNCIO);
                rw_enter(uobj->vmobjlock, RW_WRITER);
        }
        mutex_enter(vp->v_interlock);
        vp->v_size = vp->v_writesize = newsize;
        mutex_exit(vp->v_interlock);
        rw_exit(uobj->vmobjlock);
}

void
uvm_vnp_setwritesize(struct vnode *vp, voff_t newsize)
{

        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
        KASSERT(newsize != VSIZENOTSET && newsize >= 0);
        KASSERT(vp->v_size != VSIZENOTSET);
        KASSERT(vp->v_writesize != VSIZENOTSET);
        KASSERT(vp->v_size <= vp->v_writesize);
        KASSERT(vp->v_size <= newsize);
        mutex_enter(vp->v_interlock);
        vp->v_writesize = newsize;
        mutex_exit(vp->v_interlock);
        rw_exit(vp->v_uobj.vmobjlock);
}

bool
uvn_text_p(struct uvm_object *uobj)
{
        struct vnode *vp = (struct vnode *)uobj;
        int iflag;

        /*
         * v_interlock is not held here, but VI_EXECMAP is only ever changed
         * with the vmobjlock held too.
         */
        iflag = atomic_load_relaxed(&vp->v_iflag);
        return (iflag & VI_EXECMAP) != 0;
}

static void
uvn_alloc_ractx(struct uvm_object *uobj)
{
        struct vnode *vp = (struct vnode *)uobj;
        struct uvm_ractx *ra = NULL;

        KASSERT(rw_write_held(uobj->vmobjlock));

        if (vp->v_type != VREG) {
                return;
        }
        if (vp->v_ractx != NULL) {
                return;
        }
        if (vp->v_ractx == NULL) {
                rw_exit(uobj->vmobjlock);
                ra = uvm_ra_allocctx();
                rw_enter(uobj->vmobjlock, RW_WRITER);
                if (ra != NULL && vp->v_ractx == NULL) {
                        vp->v_ractx = ra;
                        ra = NULL;
                }
        }
        if (ra != NULL) {
                uvm_ra_freectx(ra);
        }
}



































































































































































































   44 

   44 


   44 











   44 




   44 


   44 

   44 
   44 


   44 
   44 








   28 














   28 



   28 







   25 



   25 
















   35 
    1 

   35 


   34 





    2 


    2 






    2 




   35 

    1 








   33 






   33 












    1 



   33 





    6 
















    6 


   30 






    6 







    6 






















    6 
    2 


    5 



    5 



   29 
   29 



    6 





   36 



   33 

   36 


   35 
    9 



   27 











    2 





   34 











   31 




   28 

   31 

   29 

   28 


   27 
    6 











   26 

    8 

   18 

    2 

    2 
    2 




    2 














   26 
   25 















   22 







   22 




    1 

   23 

   23 

   23 
   22 
   13 









   22 

   23 















   23 
    9 









    4 


    4 




    4 

    4 







   21 


   21 




   21 

   21 



   21 
   21 



    1 

   21 





   21 





    4 


    5 






    6 


    6 





































































    1 

    1 




























    1 



    1 



    1 




    1 




    1 







    1 

    1 

    1 
    1 



    1 





























































   30 










   30 

   30 
    5 


    5 

    5 













    2 

    2 
    1 


    1 


   30 



    1 




    1 









   29 






















































   23 








   23 

   23 











   23 






   23 





   23 






























































   30 


   80 
   27 

   78 

   30 




   23 






   80 









   26 




   25 









   26 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
/*        $NetBSD: in_pcb.c,v 1.189 2022/07/29 07:35:16 knakahara Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1998, 2011 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Coyote Point Systems, Inc.
 * This code is derived from software contributed to The NetBSD Foundation
 * by Public Access Networks Corporation ("Panix").  It was developed under
 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1991, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)in_pcb.c        8.4 (Berkeley) 5/24/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: in_pcb.c,v 1.189 2022/07/29 07:35:16 knakahara Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_ipsec.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/ioctl.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/once.h>
#include <sys/pool.h>
#include <sys/proc.h>
#include <sys/kauth.h>
#include <sys/uidinfo.h>
#include <sys/domain.h>

#include <net/if.h>
#include <net/route.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#include <netinet/portalgo.h>

#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_pcb.h>
#endif

#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/key.h>
#endif /* IPSEC */

#include <netinet/tcp_vtw.h>

struct        in_addr zeroin_addr;

#define        INPCBHASH_PORT(table, lport) \
        &(table)->inpt_porthashtbl[ntohs(lport) & (table)->inpt_porthash]
#define        INPCBHASH_BIND(table, laddr, lport) \
        &(table)->inpt_bindhashtbl[ \
            ((ntohl((laddr).s_addr) + ntohs(lport))) & (table)->inpt_bindhash]
#define        INPCBHASH_CONNECT(table, faddr, fport, laddr, lport) \
        &(table)->inpt_connecthashtbl[ \
            ((ntohl((faddr).s_addr) + ntohs(fport)) + \
             (ntohl((laddr).s_addr) + ntohs(lport))) & (table)->inpt_connecthash]

int        anonportmin = IPPORT_ANONMIN;
int        anonportmax = IPPORT_ANONMAX;
int        lowportmin  = IPPORT_RESERVEDMIN;
int        lowportmax  = IPPORT_RESERVEDMAX;

static struct pool inpcb_pool;

static int
inpcb_poolinit(void)
{

        pool_init(&inpcb_pool, sizeof(struct inpcb), 0, 0, 0, "inpcbpl", NULL,
            IPL_NET);
        return 0;
}

void
in_pcbinit(struct inpcbtable *table, int bindhashsize, int connecthashsize)
{
        static ONCE_DECL(control);

        TAILQ_INIT(&table->inpt_queue);
        table->inpt_porthashtbl = hashinit(bindhashsize, HASH_LIST, true,
            &table->inpt_porthash);
        table->inpt_bindhashtbl = hashinit(bindhashsize, HASH_LIST, true,
            &table->inpt_bindhash);
        table->inpt_connecthashtbl = hashinit(connecthashsize, HASH_LIST, true,
            &table->inpt_connecthash);
        table->inpt_lastlow = IPPORT_RESERVEDMAX;
        table->inpt_lastport = (u_int16_t)anonportmax;

        RUN_ONCE(&control, inpcb_poolinit);
}

int
in_pcballoc(struct socket *so, void *v)
{
        struct inpcbtable *table = v;
        struct inpcb *inp;
        int s;

        KASSERT(so->so_proto->pr_domain->dom_family == AF_INET);

        inp = pool_get(&inpcb_pool, PR_NOWAIT);
        if (inp == NULL)
                return (ENOBUFS);
        memset(inp, 0, sizeof(*inp));
        inp->inp_af = AF_INET;
        inp->inp_table = table;
        inp->inp_socket = so;
        inp->inp_errormtu = -1;
        inp->inp_portalgo = PORTALGO_DEFAULT;
        inp->inp_bindportonsend = false;
        inp->inp_prefsrcip.s_addr = INADDR_ANY;
        inp->inp_overudp_cb = NULL;
        inp->inp_overudp_arg = NULL;
#if defined(IPSEC)
        if (ipsec_enabled) {
                int error = ipsec_init_pcbpolicy(so, &inp->inp_sp);
                if (error != 0) {
                        pool_put(&inpcb_pool, inp);
                        return error;
                }
                inp->inp_sp->sp_inph = (struct inpcb_hdr *)inp;
        }
#endif
        so->so_pcb = inp;
        s = splsoftnet();
        TAILQ_INSERT_HEAD(&table->inpt_queue, &inp->inp_head, inph_queue);
        LIST_INSERT_HEAD(INPCBHASH_PORT(table, inp->inp_lport), &inp->inp_head,
            inph_lhash);
        in_pcbstate(inp, INP_ATTACHED);
        splx(s);
        return (0);
}

static int
in_pcbsetport(struct sockaddr_in *sin, struct inpcb *inp, kauth_cred_t cred)
{
        struct inpcbtable *table = inp->inp_table;
        struct socket *so = inp->inp_socket;
        u_int16_t *lastport;
        u_int16_t lport = 0;
        enum kauth_network_req req;
        int error;

        if (inp->inp_flags & INP_LOWPORT) {
#ifndef IPNOPRIVPORTS
                req = KAUTH_REQ_NETWORK_BIND_PRIVPORT;
#else
                req = KAUTH_REQ_NETWORK_BIND_PORT;
#endif

                lastport = &table->inpt_lastlow;
        } else {
                req = KAUTH_REQ_NETWORK_BIND_PORT;

                lastport = &table->inpt_lastport;
        }

        /* XXX-kauth: KAUTH_REQ_NETWORK_BIND_AUTOASSIGN_{,PRIV}PORT */
        error = kauth_authorize_network(cred, KAUTH_NETWORK_BIND, req, so, sin,
            NULL);
        if (error)
                return (EACCES);

       /*
        * Use RFC6056 randomized port selection
        */
        error = portalgo_randport(&lport, &inp->inp_head, cred);
        if (error)
                return error;

        inp->inp_flags |= INP_ANONPORT;
        *lastport = lport;
        lport = htons(lport);
        inp->inp_lport = lport;
        in_pcbstate(inp, INP_BOUND);

        return (0);
}

int
in_pcbbindableaddr(const struct inpcb *inp, struct sockaddr_in *sin,
    kauth_cred_t cred)
{
        int error = EADDRNOTAVAIL;
        struct ifaddr *ifa = NULL;
        int s;

        if (sin->sin_family != AF_INET)
                return (EAFNOSUPPORT);

        s = pserialize_read_enter();
        if (IN_MULTICAST(sin->sin_addr.s_addr)) {
                /* Always succeed; port reuse handled in in_pcbbind_port(). */
        } else if (!in_nullhost(sin->sin_addr)) {
                struct in_ifaddr *ia;

                ia = in_get_ia(sin->sin_addr);
                /* check for broadcast addresses */
                if (ia == NULL) {
                        ifa = ifa_ifwithaddr(sintosa(sin));
                        if (ifa != NULL)
                                ia = ifatoia(ifa);
                        else if ((inp->inp_flags & INP_BINDANY) != 0) {
                                error = 0;
                                goto error;
                        }
                }
                if (ia == NULL)
                        goto error;
                if (ia->ia4_flags & IN_IFF_DUPLICATED)
                        goto error;
        }
        error = 0;
 error:
        pserialize_read_exit(s);
        return error;
}

static int
in_pcbbind_addr(struct inpcb *inp, struct sockaddr_in *sin, kauth_cred_t cred)
{
        int error;

        error = in_pcbbindableaddr(inp, sin, cred);
        if (error == 0)
                inp->inp_laddr = sin->sin_addr;
        return error;
}

static int
in_pcbbind_port(struct inpcb *inp, struct sockaddr_in *sin, kauth_cred_t cred)
{
        struct inpcbtable *table = inp->inp_table;
        struct socket *so = inp->inp_socket;
        int reuseport = (so->so_options & SO_REUSEPORT);
        int wild = 0, error;

        if (IN_MULTICAST(sin->sin_addr.s_addr)) {
                /*
                 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
                 * allow complete duplication of binding if
                 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
                 * and a multicast address is bound on both
                 * new and duplicated sockets.
                 */
                if (so->so_options & (SO_REUSEADDR | SO_REUSEPORT))
                        reuseport = SO_REUSEADDR|SO_REUSEPORT;
        } 

        if (sin->sin_port == 0) {
                error = in_pcbsetport(sin, inp, cred);
                if (error)
                        return (error);
        } else {
                struct inpcb *t;
                vestigial_inpcb_t vestige;
#ifdef INET6
                struct in6pcb *t6;
                struct in6_addr mapped;
#endif
                enum kauth_network_req req;

                if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
                        wild = 1;

#ifndef IPNOPRIVPORTS
                if (ntohs(sin->sin_port) < IPPORT_RESERVED)
                        req = KAUTH_REQ_NETWORK_BIND_PRIVPORT;
                else
#endif /* !IPNOPRIVPORTS */
                        req = KAUTH_REQ_NETWORK_BIND_PORT;

                error = kauth_authorize_network(cred, KAUTH_NETWORK_BIND, req,
                    so, sin, NULL);
                if (error)
                        return (EACCES);

#ifdef INET6
                in6_in_2_v4mapin6(&sin->sin_addr, &mapped);
                t6 = in6_pcblookup_port(table, &mapped, sin->sin_port, wild, &vestige);
                if (t6 && (reuseport & t6->in6p_socket->so_options) == 0)
                        return (EADDRINUSE);
                if (!t6 && vestige.valid) {
                    if (!!reuseport != !!vestige.reuse_port) {
                        return EADDRINUSE;
                    }
                }
#endif

                /* XXX-kauth */
                if (so->so_uidinfo->ui_uid && !IN_MULTICAST(sin->sin_addr.s_addr)) {
                        t = in_pcblookup_port(table, sin->sin_addr, sin->sin_port, 1, &vestige);
                        /*
                         * XXX:        investigate ramifications of loosening this
                         *        restriction so that as long as both ports have
                         *        SO_REUSEPORT allow the bind
                         */
                        if (t &&
                            (!in_nullhost(sin->sin_addr) ||
                             !in_nullhost(t->inp_laddr) ||
                             (t->inp_socket->so_options & SO_REUSEPORT) == 0)
                            && (so->so_uidinfo->ui_uid != t->inp_socket->so_uidinfo->ui_uid)) {
                                return (EADDRINUSE);
                        }
                        if (!t && vestige.valid) {
                                if ((!in_nullhost(sin->sin_addr)
                                     || !in_nullhost(vestige.laddr.v4)
                                     || !vestige.reuse_port)
                                    && so->so_uidinfo->ui_uid != vestige.uid) {
                                        return EADDRINUSE;
                                }
                        }
                }
                t = in_pcblookup_port(table, sin->sin_addr, sin->sin_port, wild, &vestige);
                if (t && (reuseport & t->inp_socket->so_options) == 0)
                        return (EADDRINUSE);
                if (!t
                    && vestige.valid
                    && !(reuseport && vestige.reuse_port))
                        return EADDRINUSE;

                inp->inp_lport = sin->sin_port;
                in_pcbstate(inp, INP_BOUND);
        }

        LIST_REMOVE(&inp->inp_head, inph_lhash);
        LIST_INSERT_HEAD(INPCBHASH_PORT(table, inp->inp_lport), &inp->inp_head,
            inph_lhash);

        return (0);
}

int
in_pcbbind(void *v, struct sockaddr_in *sin, struct lwp *l)
{
        struct inpcb *inp = v;
        struct sockaddr_in lsin;
        int error;

        if (inp->inp_af != AF_INET)
                return (EINVAL);

        if (inp->inp_lport || !in_nullhost(inp->inp_laddr))
                return (EINVAL);

        if (NULL != sin) {
                if (sin->sin_len != sizeof(*sin))
                        return (EINVAL);
        } else {
                lsin = *((const struct sockaddr_in *)
                    inp->inp_socket->so_proto->pr_domain->dom_sa_any);
                sin = &lsin;
        }

        /* Bind address. */
        error = in_pcbbind_addr(inp, sin, l->l_cred);
        if (error)
                return (error);

        /* Bind port. */
        error = in_pcbbind_port(inp, sin, l->l_cred);
        if (error) {
                inp->inp_laddr.s_addr = INADDR_ANY;

                return (error);
        }

        return (0);
}

/*
 * Connect from a socket to a specified address.
 * Both address and port must be specified in argument sin.
 * If don't have a local address for this socket yet,
 * then pick one.
 */
int
in_pcbconnect(void *v, struct sockaddr_in *sin, struct lwp *l)
{
        struct inpcb *inp = v;
        vestigial_inpcb_t vestige;
        int error;
        struct in_addr laddr;

        if (inp->inp_af != AF_INET)
                return (EINVAL);

        if (sin->sin_len != sizeof (*sin))
                return (EINVAL);
        if (sin->sin_family != AF_INET)
                return (EAFNOSUPPORT);
        if (sin->sin_port == 0)
                return (EADDRNOTAVAIL);

        if (IN_MULTICAST(sin->sin_addr.s_addr) &&
            inp->inp_socket->so_type == SOCK_STREAM)
                return EADDRNOTAVAIL;

        if (!IN_ADDRLIST_READER_EMPTY()) {
                /*
                 * If the destination address is INADDR_ANY,
                 * use any local address (likely loopback).
                 * If the supplied address is INADDR_BROADCAST,
                 * use the broadcast address of an interface
                 * which supports broadcast. (loopback does not)
                 */

                if (in_nullhost(sin->sin_addr)) {
                        /* XXX racy */
                        sin->sin_addr =
                            IN_ADDRLIST_READER_FIRST()->ia_addr.sin_addr;
                } else if (sin->sin_addr.s_addr == INADDR_BROADCAST) {
                        struct in_ifaddr *ia;
                        int s = pserialize_read_enter();
                        IN_ADDRLIST_READER_FOREACH(ia) {
                                if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
                                        sin->sin_addr =
                                            ia->ia_broadaddr.sin_addr;
                                        break;
                                }
                        }
                        pserialize_read_exit(s);
                }
        }
        /*
         * If we haven't bound which network number to use as ours,
         * we will use the number of the outgoing interface.
         * This depends on having done a routing lookup, which
         * we will probably have to do anyway, so we might
         * as well do it now.  On the other hand if we are
         * sending to multiple destinations we may have already
         * done the lookup, so see if we can use the route
         * from before.  In any case, we only
         * chose a port number once, even if sending to multiple
         * destinations.
         */
        if (in_nullhost(inp->inp_laddr)) {
                int xerror;
                struct in_ifaddr *ia, *_ia;
                int s;
                struct psref psref;
                int bound;

                bound = curlwp_bind();
                ia = in_selectsrc(sin, &inp->inp_route,
                    inp->inp_socket->so_options, inp->inp_moptions, &xerror,
                    &psref);
                if (ia == NULL) {
                        curlwp_bindx(bound);
                        if (xerror == 0)
                                xerror = EADDRNOTAVAIL;
                        return xerror;
                }
                s = pserialize_read_enter();
                _ia = in_get_ia(IA_SIN(ia)->sin_addr);
                if (_ia == NULL && (inp->inp_flags & INP_BINDANY) == 0) {
                        pserialize_read_exit(s);
                        ia4_release(ia, &psref);
                        curlwp_bindx(bound);
                        return (EADDRNOTAVAIL);
                }
                pserialize_read_exit(s);
                laddr = IA_SIN(ia)->sin_addr;
                ia4_release(ia, &psref);
                curlwp_bindx(bound);
        } else
                laddr = inp->inp_laddr;
        if (in_pcblookup_connect(inp->inp_table, sin->sin_addr, sin->sin_port,
                                 laddr, inp->inp_lport, &vestige) != NULL ||
            vestige.valid) {
                return (EADDRINUSE);
        }
        if (in_nullhost(inp->inp_laddr)) {
                if (inp->inp_lport == 0) {
                        error = in_pcbbind(inp, NULL, l);
                        /*
                         * This used to ignore the return value
                         * completely, but we need to check for
                         * ephemeral port shortage.
                         * And attempts to request low ports if not root.
                         */
                        if (error != 0)
                                return (error);
                }
                inp->inp_laddr = laddr;
        }
        inp->inp_faddr = sin->sin_addr;
        inp->inp_fport = sin->sin_port;

        /* Late bind, if needed */
        if (inp->inp_bindportonsend) {
               struct sockaddr_in lsin = *((const struct sockaddr_in *)
                    inp->inp_socket->so_proto->pr_domain->dom_sa_any);
                lsin.sin_addr = inp->inp_laddr;
                lsin.sin_port = 0;

                if ((error = in_pcbbind_port(inp, &lsin, l->l_cred)) != 0)
                       return error;
        }

        in_pcbstate(inp, INP_CONNECTED);
#if defined(IPSEC)
        if (ipsec_enabled && inp->inp_socket->so_type == SOCK_STREAM)
                ipsec_pcbconn(inp->inp_sp);
#endif
        return (0);
}

void
in_pcbdisconnect(void *v)
{
        struct inpcb *inp = v;

        if (inp->inp_af != AF_INET)
                return;

        inp->inp_faddr = zeroin_addr;
        inp->inp_fport = 0;
        in_pcbstate(inp, INP_BOUND);
#if defined(IPSEC)
        if (ipsec_enabled)
                ipsec_pcbdisconn(inp->inp_sp);
#endif
        if (inp->inp_socket->so_state & SS_NOFDREF)
                in_pcbdetach(inp);
}

void
in_pcbdetach(void *v)
{
        struct inpcb *inp = v;
        struct socket *so = inp->inp_socket;
        int s;

        if (inp->inp_af != AF_INET)
                return;

#if defined(IPSEC)
        if (ipsec_enabled)
                ipsec_delete_pcbpolicy(inp);
#endif
        so->so_pcb = NULL;

        s = splsoftnet();
        in_pcbstate(inp, INP_ATTACHED);
        LIST_REMOVE(&inp->inp_head, inph_lhash);
        TAILQ_REMOVE(&inp->inp_table->inpt_queue, &inp->inp_head, inph_queue);
        splx(s);

        if (inp->inp_options) {
                m_free(inp->inp_options);
        }
        rtcache_free(&inp->inp_route);
        ip_freemoptions(inp->inp_moptions);
        sofree(so);                        /* drops the socket's lock */

        pool_put(&inpcb_pool, inp);
        mutex_enter(softnet_lock);        /* reacquire the softnet_lock */
}

void
in_setsockaddr(struct inpcb *inp, struct sockaddr_in *sin)
{

        if (inp->inp_af != AF_INET)
                return;

        sockaddr_in_init(sin, &inp->inp_laddr, inp->inp_lport);
}

void
in_setpeeraddr(struct inpcb *inp, struct sockaddr_in *sin)
{

        if (inp->inp_af != AF_INET)
                return;

        sockaddr_in_init(sin, &inp->inp_faddr, inp->inp_fport);
}

/*
 * Pass some notification to all connections of a protocol
 * associated with address dst.  The local address and/or port numbers
 * may be specified to limit the search.  The "usual action" will be
 * taken, depending on the ctlinput cmd.  The caller must filter any
 * cmds that are uninteresting (e.g., no error in the map).
 * Call the protocol specific routine (if any) to report
 * any errors for each matching socket.
 *
 * Must be called at splsoftnet.
 */
int
in_pcbnotify(struct inpcbtable *table, struct in_addr faddr, u_int fport_arg,
    struct in_addr laddr, u_int lport_arg, int errno,
    void (*notify)(struct inpcb *, int))
{
        struct inpcbhead *head;
        struct inpcb_hdr *inph;
        struct inpcb *inp;
        u_int16_t fport = fport_arg, lport = lport_arg;
        int nmatch;

        if (in_nullhost(faddr) || notify == 0)
                return (0);

        nmatch = 0;
        head = INPCBHASH_CONNECT(table, faddr, fport, laddr, lport);
        LIST_FOREACH(inph, head, inph_hash) {
                inp = (struct inpcb *)inph;
                if (inp->inp_af != AF_INET)
                        continue;

                if (in_hosteq(inp->inp_faddr, faddr) &&
                    inp->inp_fport == fport &&
                    inp->inp_lport == lport &&
                    in_hosteq(inp->inp_laddr, laddr)) {
                        (*notify)(inp, errno);
                        nmatch++;
                }
        }
        return (nmatch);
}

void
in_pcbnotifyall(struct inpcbtable *table, struct in_addr faddr, int errno,
    void (*notify)(struct inpcb *, int))
{
        struct inpcb_hdr *inph;

        if (in_nullhost(faddr) || notify == 0)
                return;

        TAILQ_FOREACH(inph, &table->inpt_queue, inph_queue) {
                struct inpcb *inp = (struct inpcb *)inph;
                if (inp->inp_af != AF_INET)
                        continue;
                if (in_hosteq(inp->inp_faddr, faddr))
                        (*notify)(inp, errno);
        }
}

void
in_purgeifmcast(struct ip_moptions *imo, struct ifnet *ifp)
{
        int i, gap;

        /* The owner of imo should be protected by solock */
        KASSERT(ifp != NULL);

        if (imo == NULL)
                return;

        /*
         * Unselect the outgoing interface if it is being
         * detached.
         */
        if (imo->imo_multicast_if_index == ifp->if_index)
                imo->imo_multicast_if_index = 0;

        /*
         * Drop multicast group membership if we joined
         * through the interface being detached.
         */
        for (i = 0, gap = 0; i < imo->imo_num_memberships; i++) {
                if (imo->imo_membership[i]->inm_ifp == ifp) {
                        in_delmulti(imo->imo_membership[i]);
                        gap++;
                } else if (gap != 0)
                        imo->imo_membership[i - gap] = imo->imo_membership[i];
        }
        imo->imo_num_memberships -= gap;
}

void
in_pcbpurgeif0(struct inpcbtable *table, struct ifnet *ifp)
{
        struct inpcb_hdr *inph;

        TAILQ_FOREACH(inph, &table->inpt_queue, inph_queue) {
                struct inpcb *inp = (struct inpcb *)inph;
                bool need_unlock = false;

                if (inp->inp_af != AF_INET)
                        continue;

                /* The caller holds either one of inps' lock */
                if (!inp_locked(inp)) {
                        inp_lock(inp);
                        need_unlock = true;
                }

                in_purgeifmcast(inp->inp_moptions, ifp);

                if (need_unlock)
                        inp_unlock(inp);
        }
}

void
in_pcbpurgeif(struct inpcbtable *table, struct ifnet *ifp)
{
        struct rtentry *rt;
        struct inpcb_hdr *inph;

        TAILQ_FOREACH(inph, &table->inpt_queue, inph_queue) {
                struct inpcb *inp = (struct inpcb *)inph;
                if (inp->inp_af != AF_INET)
                        continue;
                if ((rt = rtcache_validate(&inp->inp_route)) != NULL &&
                    rt->rt_ifp == ifp) {
                        rtcache_unref(rt, &inp->inp_route);
                        in_rtchange(inp, 0);
                } else
                        rtcache_unref(rt, &inp->inp_route);
        }
}

/*
 * Check for alternatives when higher level complains
 * about service problems.  For now, invalidate cached
 * routing information.  If the route was created dynamically
 * (by a redirect), time to try a default gateway again.
 */
void
in_losing(struct inpcb *inp)
{
        struct rtentry *rt;
        struct rt_addrinfo info;

        if (inp->inp_af != AF_INET)
                return;

        if ((rt = rtcache_validate(&inp->inp_route)) == NULL)
                return;

        memset(&info, 0, sizeof(info));
        info.rti_info[RTAX_DST] = rtcache_getdst(&inp->inp_route);
        info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
        info.rti_info[RTAX_NETMASK] = rt_mask(rt);
        rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
        if (rt->rt_flags & RTF_DYNAMIC) {
                int error;
                struct rtentry *nrt;

                error = rtrequest(RTM_DELETE, rt_getkey(rt),
                    rt->rt_gateway, rt_mask(rt), rt->rt_flags, &nrt);
                rtcache_unref(rt, &inp->inp_route);
                if (error == 0)
                        rt_free(nrt);
        } else
                rtcache_unref(rt, &inp->inp_route);
        /*
         * A new route can be allocated
         * the next time output is attempted.
         */
        rtcache_free(&inp->inp_route);
}

/*
 * After a routing change, flush old routing.  A new route can be
 * allocated the next time output is attempted.
 */
void
in_rtchange(struct inpcb *inp, int errno)
{

        if (inp->inp_af != AF_INET)
                return;

        rtcache_free(&inp->inp_route);

        /* XXX SHOULD NOTIFY HIGHER-LEVEL PROTOCOLS */
}

struct inpcb *
in_pcblookup_port(struct inpcbtable *table, struct in_addr laddr,
                  u_int lport_arg, int lookup_wildcard, vestigial_inpcb_t *vp)
{
        struct inpcbhead *head;
        struct inpcb_hdr *inph;
        struct inpcb *match = NULL;
        int matchwild = 3;
        int wildcard;
        u_int16_t lport = lport_arg;

        if (vp)
                vp->valid = 0;

        head = INPCBHASH_PORT(table, lport);
        LIST_FOREACH(inph, head, inph_lhash) {
                struct inpcb * const inp = (struct inpcb *)inph;

                if (inp->inp_af != AF_INET)
                        continue;
                if (inp->inp_lport != lport)
                        continue;
                /*
                 * check if inp's faddr and laddr match with ours.
                 * our faddr is considered null.
                 * count the number of wildcard matches. (0 - 2)
                 *
                 *        null        null        match
                 *        A        null        wildcard match
                 *        null        B        wildcard match
                 *        A        B        non match
                 *        A        A        match
                 */
                wildcard = 0;
                if (!in_nullhost(inp->inp_faddr))
                        wildcard++;
                if (in_nullhost(inp->inp_laddr)) {
                        if (!in_nullhost(laddr))
                                wildcard++;
                } else {
                        if (in_nullhost(laddr))
                                wildcard++;
                        else {
                                if (!in_hosteq(inp->inp_laddr, laddr))
                                        continue;
                        }
                }
                if (wildcard && !lookup_wildcard)
                        continue;
                /*
                 * prefer an address with less wildcards.
                 */
                if (wildcard < matchwild) {
                        match = inp;
                        matchwild = wildcard;
                        if (matchwild == 0)
                                break;
                }
        }
        if (match && matchwild == 0)
                return match;

        if (vp && table->vestige) {
                void        *state = (*table->vestige->init_ports4)(laddr, lport_arg, lookup_wildcard);
                vestigial_inpcb_t better;

                while (table->vestige
                       && (*table->vestige->next_port4)(state, vp)) {

                        if (vp->lport != lport)
                                continue;
                        wildcard = 0;
                        if (!in_nullhost(vp->faddr.v4))
                                wildcard++;
                        if (in_nullhost(vp->laddr.v4)) {
                                if (!in_nullhost(laddr))
                                        wildcard++;
                        } else {
                                if (in_nullhost(laddr))
                                        wildcard++;
                                else {
                                        if (!in_hosteq(vp->laddr.v4, laddr))
                                                continue;
                                }
                        }
                        if (wildcard && !lookup_wildcard)
                                continue;
                        if (wildcard < matchwild) {
                                better = *vp;
                                match  = (void*)&better;

                                matchwild = wildcard;
                                if (matchwild == 0)
                                        break;
                        }
                }

                if (match) {
                        if (match != (void*)&better)
                                return match;
                        else {
                                *vp = better;
                                return 0;
                        }
                }
        }

        return (match);
}

#ifdef DIAGNOSTIC
int        in_pcbnotifymiss = 0;
#endif

struct inpcb *
in_pcblookup_connect(struct inpcbtable *table,
    struct in_addr faddr, u_int fport_arg,
    struct in_addr laddr, u_int lport_arg,
    vestigial_inpcb_t *vp)
{
        struct inpcbhead *head;
        struct inpcb_hdr *inph;
        struct inpcb *inp;
        u_int16_t fport = fport_arg, lport = lport_arg;

        if (vp)
                vp->valid = 0;

        head = INPCBHASH_CONNECT(table, faddr, fport, laddr, lport);
        LIST_FOREACH(inph, head, inph_hash) {
                inp = (struct inpcb *)inph;
                if (inp->inp_af != AF_INET)
                        continue;

                if (in_hosteq(inp->inp_faddr, faddr) &&
                    inp->inp_fport == fport &&
                    inp->inp_lport == lport &&
                    in_hosteq(inp->inp_laddr, laddr))
                        goto out;
        }
        if (vp && table->vestige) {
                if ((*table->vestige->lookup4)(faddr, fport_arg,
                                               laddr, lport_arg, vp))
                        return 0;
        }

#ifdef DIAGNOSTIC
        if (in_pcbnotifymiss) {
                printf("in_pcblookup_connect: faddr=%08x fport=%d laddr=%08x lport=%d\n",
                    ntohl(faddr.s_addr), ntohs(fport),
                    ntohl(laddr.s_addr), ntohs(lport));
        }
#endif
        return (0);

out:
        /* Move this PCB to the head of hash chain. */
        inph = &inp->inp_head;
        if (inph != LIST_FIRST(head)) {
                LIST_REMOVE(inph, inph_hash);
                LIST_INSERT_HEAD(head, inph, inph_hash);
        }
        return (inp);
}

struct inpcb *
in_pcblookup_bind(struct inpcbtable *table,
    struct in_addr laddr, u_int lport_arg)
{
        struct inpcbhead *head;
        struct inpcb_hdr *inph;
        struct inpcb *inp;
        u_int16_t lport = lport_arg;

        head = INPCBHASH_BIND(table, laddr, lport);
        LIST_FOREACH(inph, head, inph_hash) {
                inp = (struct inpcb *)inph;
                if (inp->inp_af != AF_INET)
                        continue;

                if (inp->inp_lport == lport &&
                    in_hosteq(inp->inp_laddr, laddr))
                        goto out;
        }
        head = INPCBHASH_BIND(table, zeroin_addr, lport);
        LIST_FOREACH(inph, head, inph_hash) {
                inp = (struct inpcb *)inph;
                if (inp->inp_af != AF_INET)
                        continue;

                if (inp->inp_lport == lport &&
                    in_hosteq(inp->inp_laddr, zeroin_addr))
                        goto out;
        }
#ifdef DIAGNOSTIC
        if (in_pcbnotifymiss) {
                printf("in_pcblookup_bind: laddr=%08x lport=%d\n",
                    ntohl(laddr.s_addr), ntohs(lport));
        }
#endif
        return (0);

out:
        /* Move this PCB to the head of hash chain. */
        inph = &inp->inp_head;
        if (inph != LIST_FIRST(head)) {
                LIST_REMOVE(inph, inph_hash);
                LIST_INSERT_HEAD(head, inph, inph_hash);
        }
        return (inp);
}

void
in_pcbstate(struct inpcb *inp, int state)
{

        if (inp->inp_af != AF_INET)
                return;

        if (inp->inp_state > INP_ATTACHED)
                LIST_REMOVE(&inp->inp_head, inph_hash);

        switch (state) {
        case INP_BOUND:
                LIST_INSERT_HEAD(INPCBHASH_BIND(inp->inp_table,
                    inp->inp_laddr, inp->inp_lport), &inp->inp_head,
                    inph_hash);
                break;
        case INP_CONNECTED:
                LIST_INSERT_HEAD(INPCBHASH_CONNECT(inp->inp_table,
                    inp->inp_faddr, inp->inp_fport,
                    inp->inp_laddr, inp->inp_lport), &inp->inp_head,
                    inph_hash);
                break;
        }

        inp->inp_state = state;
}

struct rtentry *
in_pcbrtentry(struct inpcb *inp)
{
        struct route *ro;
        union {
                struct sockaddr                dst;
                struct sockaddr_in        dst4;
        } u;

        if (inp->inp_af != AF_INET)
                return (NULL);

        ro = &inp->inp_route;

        sockaddr_in_init(&u.dst4, &inp->inp_faddr, 0);
        return rtcache_lookup(ro, &u.dst);
}

void
in_pcbrtentry_unref(struct rtentry *rt, struct inpcb *inp)
{

        rtcache_unref(rt, &inp->inp_route);
}













































































































































































































































































































































































































































































































































































































































































































































































































































































    6 














   18 




   18 





   18 
   18 


    1 



   17 








   17 























    5 
   17 




    1 



    1 










































    1 










































   17 




   17 


    5 
    5 





















   17 

    5 




    1 



    8 


    8 
    4 

    8 

   16 






















































































































































































































































































































































    8 






    8 


    8 


    8 

    8 

    8 

    8 








    1 

    1 


    1 
    1 
    1 
















    8 

    8 

    8 
    8 






















    8 






    8 
    8 

    8 

    8 



    8 

























    6 































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
/*        $NetBSD: ip6_input.c,v 1.224 2021/02/19 14:52:00 christos Exp $        */
/*        $KAME: ip6_input.c,v 1.188 2001/03/29 05:34:31 itojun Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ip_input.c        8.2 (Berkeley) 1/4/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ip6_input.c,v 1.224 2021/02/19 14:52:00 christos Exp $");

#ifdef _KERNEL_OPT
#include "opt_gateway.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/cprng.h>
#include <sys/percpu.h>

#include <net/if.h>
#include <net/if_types.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/pktqueue.h>
#include <net/pfil.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#ifdef INET
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/ip_icmp.h>
#endif /* INET */
#include <netinet/ip6.h>
#include <netinet/portalgo.h>
#include <netinet6/in6_var.h>
#include <netinet6/ip6_var.h>
#include <netinet6/ip6_private.h>
#include <netinet6/in6_pcb.h>
#include <netinet/icmp6.h>
#include <netinet6/scope6_var.h>
#include <netinet6/in6_ifattach.h>
#include <netinet6/nd6.h>

#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
#include <netipsec/key.h>
#endif /* IPSEC */

#include <netinet6/ip6protosw.h>

#include "faith.h"

extern struct domain inet6domain;

u_char ip6_protox[IPPROTO_MAX];
pktqueue_t *ip6_pktq __read_mostly;

pfil_head_t *inet6_pfil_hook;

percpu_t *ip6stat_percpu;

percpu_t *ip6_forward_rt_percpu __cacheline_aligned;

static void ip6intr(void *);
static void ip6_input(struct mbuf *, struct ifnet *);
static bool ip6_badaddr(struct ip6_hdr *);
static struct m_tag *ip6_setdstifaddr(struct mbuf *, const struct in6_ifaddr *);

static struct m_tag *ip6_addaux(struct mbuf *);
static struct m_tag *ip6_findaux(struct mbuf *);
static void ip6_delaux(struct mbuf *);

static int ip6_process_hopopts(struct mbuf *, u_int8_t *, int, u_int32_t *,
    u_int32_t *);
static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
static void sysctl_net_inet6_ip6_setup(struct sysctllog **);

#ifdef NET_MPSAFE
#define        SOFTNET_LOCK()                mutex_enter(softnet_lock)
#define        SOFTNET_UNLOCK()        mutex_exit(softnet_lock)
#else
#define        SOFTNET_LOCK()                KASSERT(mutex_owned(softnet_lock))
#define        SOFTNET_UNLOCK()        KASSERT(mutex_owned(softnet_lock))
#endif

/* Ensure that non packed structures are the desired size. */
__CTASSERT(sizeof(struct ip6_hdr) == 40);
__CTASSERT(sizeof(struct ip6_ext) == 2);
__CTASSERT(sizeof(struct ip6_hbh) == 2);
__CTASSERT(sizeof(struct ip6_dest) == 2);
__CTASSERT(sizeof(struct ip6_opt) == 2);
__CTASSERT(sizeof(struct ip6_opt_jumbo) == 6);
__CTASSERT(sizeof(struct ip6_opt_nsap) == 4);
__CTASSERT(sizeof(struct ip6_opt_tunnel) == 3);
__CTASSERT(sizeof(struct ip6_opt_router) == 4);
__CTASSERT(sizeof(struct ip6_rthdr) == 4);
__CTASSERT(sizeof(struct ip6_rthdr0) == 8);
__CTASSERT(sizeof(struct ip6_frag) == 8);

/*
 * IP6 initialization: fill in IP6 protocol switch table.
 * All protocols not implemented in kernel go to raw IP6 protocol handler.
 */
void
ip6_init(void)
{
        const struct ip6protosw *pr;
        int i;

        in6_init();

        sysctl_net_inet6_ip6_setup(NULL);
        pr = (const struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
        if (pr == 0)
                panic("ip6_init");
        for (i = 0; i < IPPROTO_MAX; i++)
                ip6_protox[i] = pr - inet6sw;
        for (pr = (const struct ip6protosw *)inet6domain.dom_protosw;
            pr < (const struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++)
                if (pr->pr_domain->dom_family == PF_INET6 &&
                    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
                        ip6_protox[pr->pr_protocol] = pr - inet6sw;

        ip6_pktq = pktq_create(IFQ_MAXLEN, ip6intr, NULL);
        KASSERT(ip6_pktq != NULL);

        scope6_init();
        addrsel_policy_init();
        nd6_init();
        frag6_init();

#ifdef GATEWAY
        ip6flow_init(ip6_hashsize);
#endif
        /* Register our Packet Filter hook. */
        inet6_pfil_hook = pfil_head_create(PFIL_TYPE_AF, (void *)AF_INET6);
        KASSERT(inet6_pfil_hook != NULL);

        ip6stat_percpu = percpu_alloc(sizeof(uint64_t) * IP6_NSTATS);
        ip6_forward_rt_percpu = rtcache_percpu_alloc();
}

/*
 * IP6 input interrupt handling. Just pass the packet to ip6_input.
 */
static void
ip6intr(void *arg __unused)
{
        struct mbuf *m;

        SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
        while ((m = pktq_dequeue(ip6_pktq)) != NULL) {
                struct psref psref;
                struct ifnet *rcvif = m_get_rcvif_psref(m, &psref);

                if (rcvif == NULL) {
                        IP6_STATINC(IP6_STAT_IFDROP);
                        m_freem(m);
                        continue;
                }
                /*
                 * Drop the packet if IPv6 is disabled on the interface.
                 */
                if ((ND_IFINFO(rcvif)->flags & ND6_IFF_IFDISABLED)) {
                        m_put_rcvif_psref(rcvif, &psref);
                        IP6_STATINC(IP6_STAT_IFDROP);
                        m_freem(m);
                        continue;
                }
                ip6_input(m, rcvif);
                m_put_rcvif_psref(rcvif, &psref);
        }
        SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
}

static void
ip6_input(struct mbuf *m, struct ifnet *rcvif)
{
        struct ip6_hdr *ip6;
        int hit, off = sizeof(struct ip6_hdr), nest;
        u_int32_t plen;
        u_int32_t rtalert = ~0;
        int nxt, ours = 0, rh_present = 0, frg_present;
        struct ifnet *deliverifp = NULL;
        int srcrt = 0;
        struct rtentry *rt = NULL;
        union {
                struct sockaddr                dst;
                struct sockaddr_in6        dst6;
        } u;
        struct route *ro;

        KASSERT(rcvif != NULL);

        /*
         * make sure we don't have onion peering information into m_tag.
         */
        ip6_delaux(m);

        /*
         * mbuf statistics
         */
        if (m->m_flags & M_EXT) {
                if (m->m_next)
                        IP6_STATINC(IP6_STAT_MEXT2M);
                else
                        IP6_STATINC(IP6_STAT_MEXT1);
        } else {
#define M2MMAX        32
                if (m->m_next) {
                        if (m->m_flags & M_LOOP)
                        /*XXX*/        IP6_STATINC(IP6_STAT_M2M + lo0ifp->if_index);
                        else if (rcvif->if_index < M2MMAX)
                                IP6_STATINC(IP6_STAT_M2M + rcvif->if_index);
                        else
                                IP6_STATINC(IP6_STAT_M2M);
                } else
                        IP6_STATINC(IP6_STAT_M1);
#undef M2MMAX
        }

        in6_ifstat_inc(rcvif, ifs6_in_receive);
        IP6_STATINC(IP6_STAT_TOTAL);

        /*
         * If the IPv6 header is not aligned, slurp it up into a new
         * mbuf with space for link headers, in the event we forward
         * it.  Otherwise, if it is aligned, make sure the entire base
         * IPv6 header is in the first mbuf of the chain.
         */
        if (M_GET_ALIGNED_HDR(&m, struct ip6_hdr, true) != 0) {
                /* XXXJRT new stat, please */
                IP6_STATINC(IP6_STAT_TOOSMALL);
                in6_ifstat_inc(rcvif, ifs6_in_hdrerr);
                return;
        }

        ip6 = mtod(m, struct ip6_hdr *);

        if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
                IP6_STATINC(IP6_STAT_BADVERS);
                in6_ifstat_inc(rcvif, ifs6_in_hdrerr);
                goto bad;
        }

        if (ip6_badaddr(ip6)) {
                IP6_STATINC(IP6_STAT_BADSCOPE);
                in6_ifstat_inc(rcvif, ifs6_in_addrerr);
                goto bad;
        }

        /*
         * Assume that we can create a fast-forward IP flow entry
         * based on this packet.
         */
        m->m_flags |= M_CANFASTFWD;

        /*
         * Run through list of hooks for input packets.  If there are any
         * filters which require that additional packets in the flow are
         * not fast-forwarded, they must clear the M_CANFASTFWD flag.
         * Note that filters must _never_ set this flag, as another filter
         * in the list may have previously cleared it.
         *
         * Don't call hooks if the packet has already been processed by
         * IPsec (encapsulated, tunnel mode).
         */
#if defined(IPSEC)
        if (!ipsec_used || !ipsec_skip_pfil(m))
#else
        if (1)
#endif
        {
                struct in6_addr odst;
                int error;

                odst = ip6->ip6_dst;
                error = pfil_run_hooks(inet6_pfil_hook, &m, rcvif, PFIL_IN);
                if (error != 0 || m == NULL) {
                        IP6_STATINC(IP6_STAT_PFILDROP_IN);
                        return;
                }
                if (m->m_len < sizeof(struct ip6_hdr)) {
                        if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
                                IP6_STATINC(IP6_STAT_TOOSMALL);
                                in6_ifstat_inc(rcvif, ifs6_in_hdrerr);
                                return;
                        }
                }
                ip6 = mtod(m, struct ip6_hdr *);
                srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
        }

        IP6_STATINC(IP6_STAT_NXTHIST + ip6->ip6_nxt);

#ifdef ALTQ
        if (altq_input != NULL) {
                SOFTNET_LOCK();
                if ((*altq_input)(m, AF_INET6) == 0) {
                        SOFTNET_UNLOCK();
                        /* packet is dropped by traffic conditioner */
                        return;
                }
                SOFTNET_UNLOCK();
        }
#endif

        /*
         * Disambiguate address scope zones (if there is ambiguity).
         * We first make sure that the original source or destination address
         * is not in our internal form for scoped addresses.  Such addresses
         * are not necessarily invalid spec-wise, but we cannot accept them due
         * to the usage conflict.
         * in6_setscope() then also checks and rejects the cases where src or
         * dst are the loopback address and the receiving interface
         * is not loopback.
         */
        if (__predict_false(
            m_makewritable(&m, 0, sizeof(struct ip6_hdr), M_DONTWAIT))) {
                IP6_STATINC(IP6_STAT_IDROPPED);
                goto bad;
        }
        ip6 = mtod(m, struct ip6_hdr *);
        if (in6_clearscope(&ip6->ip6_src) || in6_clearscope(&ip6->ip6_dst)) {
                IP6_STATINC(IP6_STAT_BADSCOPE);        /* XXX */
                goto bad;
        }
        if (in6_setscope(&ip6->ip6_src, rcvif, NULL) ||
            in6_setscope(&ip6->ip6_dst, rcvif, NULL)) {
                IP6_STATINC(IP6_STAT_BADSCOPE);
                goto bad;
        }

        ro = rtcache_percpu_getref(ip6_forward_rt_percpu);

        /*
         * Multicast check
         */
        if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
                bool ingroup;

                in6_ifstat_inc(rcvif, ifs6_in_mcast);
                /*
                 * See if we belong to the destination multicast group on the
                 * arrival interface.
                 */
                ingroup = in6_multi_group(&ip6->ip6_dst, rcvif);
                if (ingroup) {
                        ours = 1;
                } else if (!ip6_mrouter) {
                        uint64_t *ip6s = IP6_STAT_GETREF();
                        ip6s[IP6_STAT_NOTMEMBER]++;
                        ip6s[IP6_STAT_CANTFORWARD]++;
                        IP6_STAT_PUTREF();
                        in6_ifstat_inc(rcvif, ifs6_in_discard);
                        goto bad_unref;
                }
                deliverifp = rcvif;
                goto hbhcheck;
        }

        sockaddr_in6_init(&u.dst6, &ip6->ip6_dst, 0, 0, 0);

        /*
         * Unicast check
         */
        rt = rtcache_lookup2(ro, &u.dst, 1, &hit);
        if (hit)
                IP6_STATINC(IP6_STAT_FORWARD_CACHEHIT);
        else
                IP6_STATINC(IP6_STAT_FORWARD_CACHEMISS);

        /*
         * Accept the packet if the forwarding interface to the destination
         * (according to the routing table) is the loopback interface,
         * unless the associated route has a gateway.
         *
         * We don't explicitly match ip6_dst against an interface here. It
         * is already done in rtcache_lookup2: rt->rt_ifp->if_type will be
         * IFT_LOOP if the packet is for us.
         *
         * Note that this approach causes to accept a packet if there is a
         * route to the loopback interface for the destination of the packet.
         * But we think it's even useful in some situations, e.g. when using
         * a special daemon which wants to intercept the packet.
         */
        if (rt != NULL &&
            (rt->rt_flags & (RTF_HOST|RTF_GATEWAY)) == RTF_HOST &&
            rt->rt_ifp->if_type == IFT_LOOP) {
                struct in6_ifaddr *ia6 = (struct in6_ifaddr *)rt->rt_ifa;
                int addrok;

                if (ia6->ia6_flags & IN6_IFF_ANYCAST)
                        m->m_flags |= M_ANYCAST6;
                /*
                 * packets to a tentative, duplicated, or somehow invalid
                 * address must not be accepted.
                 */
                if (ia6->ia6_flags & IN6_IFF_NOTREADY)
                        addrok = 0;
                else if (ia6->ia6_flags & IN6_IFF_DETACHED &&
                    !IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src))
                {
                        /* Allow internal traffic to DETACHED addresses */
                        struct sockaddr_in6 sin6;
                        int s;

                        memset(&sin6, 0, sizeof(sin6));
                        sin6.sin6_family = AF_INET6;
                        sin6.sin6_len = sizeof(sin6);
                        sin6.sin6_addr = ip6->ip6_src;
                        s = pserialize_read_enter();
                        addrok = (ifa_ifwithaddr(sin6tosa(&sin6)) != NULL);
                        pserialize_read_exit(s);
                } else
                        addrok = 1;
                if (addrok) {
                        /* this address is ready */
                        ours = 1;
                        deliverifp = ia6->ia_ifp;        /* correct? */
                        goto hbhcheck;
                } else {
                        /* address is not ready, so discard the packet. */
                        char ip6bufs[INET6_ADDRSTRLEN];
                        char ip6bufd[INET6_ADDRSTRLEN];
                        nd6log(LOG_INFO, "packet to an unready address %s->%s\n",
                            IN6_PRINT(ip6bufs, &ip6->ip6_src),
                            IN6_PRINT(ip6bufd, &ip6->ip6_dst));

                        IP6_STATINC(IP6_STAT_IDROPPED);
                        goto bad_unref;
                }
        }

        /*
         * FAITH (Firewall Aided Internet Translator)
         */
#if defined(NFAITH) && 0 < NFAITH
        if (ip6_keepfaith) {
                if (rt != NULL && rt->rt_ifp != NULL &&
                    rt->rt_ifp->if_type == IFT_FAITH) {
                        /* XXX do we need more sanity checks? */
                        ours = 1;
                        deliverifp = rt->rt_ifp; /* faith */
                        goto hbhcheck;
                }
        }
#endif

        /*
         * Now there is no reason to process the packet if it's not our own
         * and we're not a router.
         */
        if (!ip6_forwarding) {
                IP6_STATINC(IP6_STAT_CANTFORWARD);
                in6_ifstat_inc(rcvif, ifs6_in_discard);
                goto bad_unref;
        }

hbhcheck:
        /*
         * Record address information into m_tag, if we don't have one yet.
         * Note that we are unable to record it, if the address is not listed
         * as our interface address (e.g. multicast addresses, addresses
         * within FAITH prefixes and such).
         */
        if (deliverifp && ip6_getdstifaddr(m) == NULL) {
                struct in6_ifaddr *ia6;
                int s = pserialize_read_enter();

                ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
                /* Depends on ip6_setdstifaddr never sleep */
                if (ia6 != NULL && ip6_setdstifaddr(m, ia6) == NULL) {
                        /*
                         * XXX maybe we should drop the packet here,
                         * as we could not provide enough information
                         * to the upper layers.
                         */
                }
                pserialize_read_exit(s);
        }

        /*
         * Process Hop-by-Hop options header if it's contained.
         * m may be modified in ip6_hopopts_input().
         * If a JumboPayload option is included, plen will also be modified.
         */
        plen = (u_int32_t)ntohs(ip6->ip6_plen);
        if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
                struct ip6_hbh *hbh;

                if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) {
                        /* m already freed */
                        in6_ifstat_inc(rcvif, ifs6_in_discard);
                        rtcache_unref(rt, ro);
                        rtcache_percpu_putref(ip6_forward_rt_percpu);
                        return;
                }

                /* adjust pointer */
                ip6 = mtod(m, struct ip6_hdr *);

                /*
                 * if the payload length field is 0 and the next header field
                 * indicates Hop-by-Hop Options header, then a Jumbo Payload
                 * option MUST be included.
                 */
                if (ip6->ip6_plen == 0 && plen == 0) {
                        /*
                         * Note that if a valid jumbo payload option is
                         * contained, ip6_hopopts_input() must set a valid
                         * (non-zero) payload length to the variable plen.
                         */
                        IP6_STATINC(IP6_STAT_BADOPTIONS);
                        in6_ifstat_inc(rcvif, ifs6_in_discard);
                        in6_ifstat_inc(rcvif, ifs6_in_hdrerr);
                        icmp6_error(m, ICMP6_PARAM_PROB,
                                    ICMP6_PARAMPROB_HEADER,
                                    (char *)&ip6->ip6_plen - (char *)ip6);
                        rtcache_unref(rt, ro);
                        rtcache_percpu_putref(ip6_forward_rt_percpu);
                        return;
                }
                IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
                        sizeof(struct ip6_hbh));
                if (hbh == NULL) {
                        IP6_STATINC(IP6_STAT_TOOSHORT);
                        rtcache_unref(rt, ro);
                        rtcache_percpu_putref(ip6_forward_rt_percpu);
                        return;
                }
                KASSERT(ACCESSIBLE_POINTER(hbh, struct ip6_hdr));
                nxt = hbh->ip6h_nxt;

                /*
                 * accept the packet if a router alert option is included
                 * and we act as an IPv6 router.
                 */
                if (rtalert != ~0 && ip6_forwarding)
                        ours = 1;
        } else
                nxt = ip6->ip6_nxt;

        /*
         * Check that the amount of data in the buffers is at least much as
         * the IPv6 header would have us expect. Trim mbufs if longer than we
         * expect. Drop packet if shorter than we expect.
         */
        if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) {
                IP6_STATINC(IP6_STAT_TOOSHORT);
                in6_ifstat_inc(rcvif, ifs6_in_truncated);
                goto bad_unref;
        }
        if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) {
                if (m->m_len == m->m_pkthdr.len) {
                        m->m_len = sizeof(struct ip6_hdr) + plen;
                        m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
                } else
                        m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len);
        }

        /*
         * Forward if desirable.
         */
        if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
                /*
                 * If we are acting as a multicast router, all
                 * incoming multicast packets are passed to the
                 * kernel-level multicast forwarding function.
                 * The packet is returned (relatively) intact; if
                 * ip6_mforward() returns a non-zero value, the packet
                 * must be discarded, else it may be accepted below.
                 */
                if (ip6_mrouter != NULL) {
                        int error;

                        SOFTNET_LOCK();
                        error = ip6_mforward(ip6, rcvif, m);
                        SOFTNET_UNLOCK();

                        if (error != 0) {
                                rtcache_unref(rt, ro);
                                rtcache_percpu_putref(ip6_forward_rt_percpu);
                                IP6_STATINC(IP6_STAT_CANTFORWARD);
                                goto bad;
                        }
                }
                if (!ours) {
                        IP6_STATINC(IP6_STAT_CANTFORWARD);
                        goto bad_unref;
                }
        } else if (!ours) {
                rtcache_unref(rt, ro);
                rtcache_percpu_putref(ip6_forward_rt_percpu);
                ip6_forward(m, srcrt, rcvif);
                return;
        }

        ip6 = mtod(m, struct ip6_hdr *);

        /*
         * Malicious party may be able to use IPv4 mapped addr to confuse
         * tcp/udp stack and bypass security checks (act as if it was from
         * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1).  Be cautious.
         *
         * For SIIT end node behavior, you may want to disable the check.
         * However, you will  become vulnerable to attacks using IPv4 mapped
         * source.
         */
        if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
            IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
                IP6_STATINC(IP6_STAT_BADSCOPE);
                in6_ifstat_inc(rcvif, ifs6_in_addrerr);
                goto bad_unref;
        }

#ifdef IFA_STATS
        if (deliverifp != NULL) {
                struct in6_ifaddr *ia6;
                int s = pserialize_read_enter();
                ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
                if (ia6)
                        ia6->ia_ifa.ifa_data.ifad_inbytes += m->m_pkthdr.len;
                pserialize_read_exit(s);
        }
#endif
        IP6_STATINC(IP6_STAT_DELIVERED);
        in6_ifstat_inc(deliverifp, ifs6_in_deliver);
        nest = 0;

        if (rt != NULL) {
                rtcache_unref(rt, ro);
                rt = NULL;
        }
        rtcache_percpu_putref(ip6_forward_rt_percpu);

        rh_present = 0;
        frg_present = 0;
        while (nxt != IPPROTO_DONE) {
                if (ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) {
                        IP6_STATINC(IP6_STAT_TOOMANYHDR);
                        in6_ifstat_inc(rcvif, ifs6_in_hdrerr);
                        goto bad;
                }

                M_VERIFY_PACKET(m);

                /*
                 * protection against faulty packet - there should be
                 * more sanity checks in header chain processing.
                 */
                if (m->m_pkthdr.len < off) {
                        IP6_STATINC(IP6_STAT_TOOSHORT);
                        in6_ifstat_inc(rcvif, ifs6_in_truncated);
                        goto bad;
                }

                if (nxt == IPPROTO_ROUTING) {
                        if (rh_present++) {
                                in6_ifstat_inc(rcvif, ifs6_in_hdrerr);
                                IP6_STATINC(IP6_STAT_BADOPTIONS);
                                goto bad;
                        }
                } else if (nxt == IPPROTO_FRAGMENT) {
                        if (frg_present++) {
                                in6_ifstat_inc(rcvif, ifs6_in_hdrerr);
                                IP6_STATINC(IP6_STAT_BADOPTIONS);
                                goto bad;
                        }
                }

#ifdef IPSEC
                if (ipsec_used) {
                        /*
                         * Enforce IPsec policy checking if we are seeing last
                         * header. Note that we do not visit this with
                         * protocols with pcb layer code - like udp/tcp/raw ip.
                         */
                        if ((inet6sw[ip6_protox[nxt]].pr_flags
                            & PR_LASTHDR) != 0) {
                                int error;

                                error = ipsec_ip_input_checkpolicy(m, false);
                                if (error) {
                                        IP6_STATINC(IP6_STAT_IPSECDROP_IN);
                                        goto bad;
                                }
                        }
                }
#endif

                nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
        }
        return;

bad_unref:
        rtcache_unref(rt, ro);
        rtcache_percpu_putref(ip6_forward_rt_percpu);
bad:
        m_freem(m);
        return;
}

static bool
ip6_badaddr(struct ip6_hdr *ip6)
{
        /* Check against address spoofing/corruption. */
        if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) ||
            IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) {
                return true;
        }

        /*
         * The following check is not documented in specs.  A malicious
         * party may be able to use IPv4 mapped addr to confuse tcp/udp stack
         * and bypass security checks (act as if it was from 127.0.0.1 by using
         * IPv6 src ::ffff:127.0.0.1).  Be cautious.
         *
         * This check chokes if we are in an SIIT cloud.  As none of BSDs
         * support IPv4-less kernel compilation, we cannot support SIIT
         * environment at all.  So, it makes more sense for us to reject any
         * malicious packets for non-SIIT environment, than try to do a
         * partial support for SIIT environment.
         */
        if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
            IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
                return true;
        }

        /*
         * Reject packets with IPv4-compatible IPv6 addresses (RFC4291).
         */
        if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) ||
            IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
                return true;
        }

        return false;
}

/*
 * set/grab in6_ifaddr correspond to IPv6 destination address.
 */
static struct m_tag *
ip6_setdstifaddr(struct mbuf *m, const struct in6_ifaddr *ia)
{
        struct m_tag *mtag;
        struct ip6aux *ip6a;

        mtag = ip6_addaux(m);
        if (mtag == NULL)
                return NULL;

        ip6a = (struct ip6aux *)(mtag + 1);
        if (in6_setscope(&ip6a->ip6a_src, ia->ia_ifp, &ip6a->ip6a_scope_id)) {
                IP6_STATINC(IP6_STAT_BADSCOPE);
                return NULL;
        }

        ip6a->ip6a_src = ia->ia_addr.sin6_addr;
        ip6a->ip6a_flags = ia->ia6_flags;
        return mtag;
}

const struct ip6aux *
ip6_getdstifaddr(struct mbuf *m)
{
        struct m_tag *mtag;

        mtag = ip6_findaux(m);
        if (mtag != NULL)
                return (struct ip6aux *)(mtag + 1);
        else
                return NULL;
}

/*
 * Hop-by-Hop options header processing. If a valid jumbo payload option is
 * included, the real payload length will be stored in plenp.
 *
 * rtalertp - XXX: should be stored more smart way
 */
int
ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp, 
        struct mbuf **mp, int *offp)
{
        struct mbuf *m = *mp;
        int off = *offp, hbhlen;
        struct ip6_hbh *hbh;

        /* validation of the length of the header */
        IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m,
            sizeof(struct ip6_hdr), sizeof(struct ip6_hbh));
        if (hbh == NULL) {
                IP6_STATINC(IP6_STAT_TOOSHORT);
                return -1;
        }
        hbhlen = (hbh->ip6h_len + 1) << 3;
        IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
            hbhlen);
        if (hbh == NULL) {
                IP6_STATINC(IP6_STAT_TOOSHORT);
                return -1;
        }
        KASSERT(ACCESSIBLE_POINTER(hbh, struct ip6_hdr));
        off += hbhlen;
        hbhlen -= sizeof(struct ip6_hbh);

        if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
            hbhlen, rtalertp, plenp) < 0)
                return -1;

        *offp = off;
        *mp = m;
        return 0;
}

/*
 * Search header for all Hop-by-hop options and process each option.
 * This function is separate from ip6_hopopts_input() in order to
 * handle a case where the sending node itself process its hop-by-hop
 * options header. In such a case, the function is called from ip6_output().
 *
 * The function assumes that hbh header is located right after the IPv6 header
 * (RFC2460 p7), opthead is pointer into data content in m, and opthead to
 * opthead + hbhlen is located in continuous memory region.
 */
static int
ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen, 
        u_int32_t *rtalertp, u_int32_t *plenp)
{
        struct ip6_hdr *ip6;
        int optlen = 0;
        u_int8_t *opt = opthead;
        u_int16_t rtalert_val;
        u_int32_t jumboplen;
        const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh);

        for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) {
                switch (*opt) {
                case IP6OPT_PAD1:
                        optlen = 1;
                        break;
                case IP6OPT_PADN:
                        if (hbhlen < IP6OPT_MINLEN) {
                                IP6_STATINC(IP6_STAT_TOOSMALL);
                                goto bad;
                        }
                        optlen = *(opt + 1) + 2;
                        break;
                case IP6OPT_RTALERT:
                        /* XXX may need check for alignment */
                        if (hbhlen < IP6OPT_RTALERT_LEN) {
                                IP6_STATINC(IP6_STAT_TOOSMALL);
                                goto bad;
                        }
                        if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
                                IP6_STATINC(IP6_STAT_BADOPTIONS);
                                icmp6_error(m, ICMP6_PARAM_PROB,
                                    ICMP6_PARAMPROB_HEADER,
                                    erroff + opt + 1 - opthead);
                                return (-1);
                        }
                        optlen = IP6OPT_RTALERT_LEN;
                        memcpy((void *)&rtalert_val, (void *)(opt + 2), 2);
                        *rtalertp = ntohs(rtalert_val);
                        break;
                case IP6OPT_JUMBO:
                        /* XXX may need check for alignment */
                        if (hbhlen < IP6OPT_JUMBO_LEN) {
                                IP6_STATINC(IP6_STAT_TOOSMALL);
                                goto bad;
                        }
                        if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
                                IP6_STATINC(IP6_STAT_BADOPTIONS);
                                icmp6_error(m, ICMP6_PARAM_PROB,
                                    ICMP6_PARAMPROB_HEADER,
                                    erroff + opt + 1 - opthead);
                                return (-1);
                        }
                        optlen = IP6OPT_JUMBO_LEN;

                        /*
                         * IPv6 packets that have non 0 payload length
                         * must not contain a jumbo payload option.
                         */
                        ip6 = mtod(m, struct ip6_hdr *);
                        if (ip6->ip6_plen) {
                                IP6_STATINC(IP6_STAT_BADOPTIONS);
                                icmp6_error(m, ICMP6_PARAM_PROB,
                                    ICMP6_PARAMPROB_HEADER,
                                    erroff + opt - opthead);
                                return (-1);
                        }

                        /*
                         * We may see jumbolen in unaligned location, so
                         * we'd need to perform memcpy().
                         */
                        memcpy(&jumboplen, opt + 2, sizeof(jumboplen));
                        jumboplen = (u_int32_t)htonl(jumboplen);

#if 1
                        /*
                         * if there are multiple jumbo payload options,
                         * *plenp will be non-zero and the packet will be
                         * rejected.
                         * the behavior may need some debate in ipngwg -
                         * multiple options does not make sense, however,
                         * there's no explicit mention in specification.
                         */
                        if (*plenp != 0) {
                                IP6_STATINC(IP6_STAT_BADOPTIONS);
                                icmp6_error(m, ICMP6_PARAM_PROB,
                                    ICMP6_PARAMPROB_HEADER,
                                    erroff + opt + 2 - opthead);
                                return (-1);
                        }
#endif

                        /*
                         * jumbo payload length must be larger than 65535.
                         */
                        if (jumboplen <= IPV6_MAXPACKET) {
                                IP6_STATINC(IP6_STAT_BADOPTIONS);
                                icmp6_error(m, ICMP6_PARAM_PROB,
                                    ICMP6_PARAMPROB_HEADER,
                                    erroff + opt + 2 - opthead);
                                return (-1);
                        }
                        *plenp = jumboplen;

                        break;
                default:                /* unknown option */
                        if (hbhlen < IP6OPT_MINLEN) {
                                IP6_STATINC(IP6_STAT_TOOSMALL);
                                goto bad;
                        }
                        optlen = ip6_unknown_opt(opt, m,
                            erroff + opt - opthead);
                        if (optlen == -1)
                                return (-1);
                        optlen += 2;
                        break;
                }
        }

        return (0);

  bad:
        m_freem(m);
        return (-1);
}

/*
 * Unknown option processing.
 * The third argument `off' is the offset from the IPv6 header to the option,
 * which is necessary if the IPv6 header the and option header and IPv6 header
 * is not continuous in order to return an ICMPv6 error.
 */
int
ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off)
{
        struct ip6_hdr *ip6;

        switch (IP6OPT_TYPE(*optp)) {
        case IP6OPT_TYPE_SKIP: /* ignore the option */
                return ((int)*(optp + 1));
        case IP6OPT_TYPE_DISCARD:        /* silently discard */
                m_freem(m);
                return (-1);
        case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
                IP6_STATINC(IP6_STAT_BADOPTIONS);
                icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
                return (-1);
        case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
                IP6_STATINC(IP6_STAT_BADOPTIONS);
                ip6 = mtod(m, struct ip6_hdr *);
                if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
                    (m->m_flags & (M_BCAST|M_MCAST)))
                        m_freem(m);
                else
                        icmp6_error(m, ICMP6_PARAM_PROB,
                                    ICMP6_PARAMPROB_OPTION, off);
                return (-1);
        }

        m_freem(m);                /* XXX: NOTREACHED */
        return (-1);
}

void
ip6_savecontrol(struct in6pcb *in6p, struct mbuf **mp, 
        struct ip6_hdr *ip6, struct mbuf *m)
{
        struct socket *so = in6p->in6p_socket;
#ifdef RFC2292
#define IS2292(x, y)        ((in6p->in6p_flags & IN6P_RFC2292) ? (x) : (y))
#else
#define IS2292(x, y)        (y)
#endif

        KASSERT(m->m_flags & M_PKTHDR);

        if (SOOPT_TIMESTAMP(so->so_options))
                mp = sbsavetimestamp(so->so_options, mp);

        /* some OSes call this logic with IPv4 packet, for SO_TIMESTAMP */
        if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
                return;

        /* RFC 2292 sec. 5 */
        if ((in6p->in6p_flags & IN6P_PKTINFO) != 0) {
                struct in6_pktinfo pi6;

                memcpy(&pi6.ipi6_addr, &ip6->ip6_dst, sizeof(struct in6_addr));
                in6_clearscope(&pi6.ipi6_addr);        /* XXX */
                pi6.ipi6_ifindex = m->m_pkthdr.rcvif_index;
                *mp = sbcreatecontrol(&pi6, sizeof(pi6),
                    IS2292(IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6);
                if (*mp)
                        mp = &(*mp)->m_next;
        }

        if (in6p->in6p_flags & IN6P_HOPLIMIT) {
                int hlim = ip6->ip6_hlim & 0xff;

                *mp = sbcreatecontrol(&hlim, sizeof(hlim),
                    IS2292(IPV6_2292HOPLIMIT, IPV6_HOPLIMIT), IPPROTO_IPV6);
                if (*mp)
                        mp = &(*mp)->m_next;
        }

        if ((in6p->in6p_flags & IN6P_TCLASS) != 0) {
                u_int32_t flowinfo;
                int tclass;

                flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK);
                flowinfo >>= 20;

                tclass = flowinfo & 0xff;
                *mp = sbcreatecontrol(&tclass, sizeof(tclass),
                    IPV6_TCLASS, IPPROTO_IPV6);

                if (*mp)
                        mp = &(*mp)->m_next;
        }

        /*
         * IPV6_HOPOPTS socket option.  Recall that we required super-user
         * privilege for the option (see ip6_ctloutput), but it might be too
         * strict, since there might be some hop-by-hop options which can be
         * returned to normal user.
         * See also RFC3542 section 8 (or RFC2292 section 6).
         */
        if ((in6p->in6p_flags & IN6P_HOPOPTS) != 0) {
                /*
                 * Check if a hop-by-hop options header is contatined in the
                 * received packet, and if so, store the options as ancillary
                 * data. Note that a hop-by-hop options header must be
                 * just after the IPv6 header, which fact is assured through
                 * the IPv6 input processing.
                 */
                struct ip6_hdr *xip6 = mtod(m, struct ip6_hdr *);
                if (xip6->ip6_nxt == IPPROTO_HOPOPTS) {
                        struct ip6_hbh *hbh;
                        int hbhlen;
                        struct mbuf *ext;

                        ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr),
                            xip6->ip6_nxt);
                        if (ext == NULL) {
                                IP6_STATINC(IP6_STAT_TOOSHORT);
                                return;
                        }
                        hbh = mtod(ext, struct ip6_hbh *);
                        hbhlen = (hbh->ip6h_len + 1) << 3;
                        if (hbhlen != ext->m_len) {
                                m_freem(ext);
                                IP6_STATINC(IP6_STAT_TOOSHORT);
                                return;
                        }

                        /*
                         * XXX: We copy whole the header even if a jumbo
                         * payload option is included, which option is to
                         * be removed before returning in the RFC 2292.
                         * Note: this constraint is removed in RFC3542.
                         */
                        *mp = sbcreatecontrol(hbh, hbhlen,
                            IS2292(IPV6_2292HOPOPTS, IPV6_HOPOPTS),
                            IPPROTO_IPV6);
                        if (*mp)
                                mp = &(*mp)->m_next;
                        m_freem(ext);
                }
        }

        /* IPV6_DSTOPTS and IPV6_RTHDR socket options */
        if (in6p->in6p_flags & (IN6P_DSTOPTS | IN6P_RTHDR)) {
                struct ip6_hdr *xip6 = mtod(m, struct ip6_hdr *);
                int nxt = xip6->ip6_nxt, off = sizeof(struct ip6_hdr);

                /*
                 * Search for destination options headers or routing
                 * header(s) through the header chain, and stores each
                 * header as ancillary data.
                 * Note that the order of the headers remains in
                 * the chain of ancillary data.
                 */
                for (;;) {        /* is explicit loop prevention necessary? */
                        struct ip6_ext *ip6e = NULL;
                        int elen;
                        struct mbuf *ext = NULL;

                        /*
                         * if it is not an extension header, don't try to
                         * pull it from the chain.
                         */
                        switch (nxt) {
                        case IPPROTO_DSTOPTS:
                        case IPPROTO_ROUTING:
                        case IPPROTO_HOPOPTS:
                        case IPPROTO_AH: /* is it possible? */
                                break;
                        default:
                                goto loopend;
                        }

                        ext = ip6_pullexthdr(m, off, nxt);
                        if (ext == NULL) {
                                IP6_STATINC(IP6_STAT_TOOSHORT);
                                return;
                        }
                        ip6e = mtod(ext, struct ip6_ext *);
                        if (nxt == IPPROTO_AH)
                                elen = (ip6e->ip6e_len + 2) << 2;
                        else
                                elen = (ip6e->ip6e_len + 1) << 3;
                        if (elen != ext->m_len) {
                                m_freem(ext);
                                IP6_STATINC(IP6_STAT_TOOSHORT);
                                return;
                        }
                        KASSERT(ACCESSIBLE_POINTER(ip6e, struct ip6_hdr));

                        switch (nxt) {
                        case IPPROTO_DSTOPTS:
                                if (!(in6p->in6p_flags & IN6P_DSTOPTS))
                                        break;

                                *mp = sbcreatecontrol(ip6e, elen,
                                    IS2292(IPV6_2292DSTOPTS, IPV6_DSTOPTS),
                                    IPPROTO_IPV6);
                                if (*mp)
                                        mp = &(*mp)->m_next;
                                break;

                        case IPPROTO_ROUTING:
                                if (!(in6p->in6p_flags & IN6P_RTHDR))
                                        break;

                                *mp = sbcreatecontrol(ip6e, elen,
                                    IS2292(IPV6_2292RTHDR, IPV6_RTHDR),
                                    IPPROTO_IPV6);
                                if (*mp)
                                        mp = &(*mp)->m_next;
                                break;

                        case IPPROTO_HOPOPTS:
                        case IPPROTO_AH: /* is it possible? */
                                break;

                        default:
                                /*
                                  * other cases have been filtered in the above.
                                 * none will visit this case.  here we supply
                                 * the code just in case (nxt overwritten or
                                 * other cases).
                                 */
                                m_freem(ext);
                                goto loopend;

                        }

                        /* proceed with the next header. */
                        off += elen;
                        nxt = ip6e->ip6e_nxt;
                        ip6e = NULL;
                        m_freem(ext);
                        ext = NULL;
                }
          loopend:
                  ;
        }
}
#undef IS2292


void
ip6_notify_pmtu(struct in6pcb *in6p, const struct sockaddr_in6 *dst,
    uint32_t *mtu)
{
        struct socket *so;
        struct mbuf *m_mtu;
        struct ip6_mtuinfo mtuctl;

        so = in6p->in6p_socket;

        if (mtu == NULL)
                return;

        KASSERT(so != NULL);

        memset(&mtuctl, 0, sizeof(mtuctl));        /* zero-clear for safety */
        mtuctl.ip6m_mtu = *mtu;
        mtuctl.ip6m_addr = *dst;
        if (sa6_recoverscope(&mtuctl.ip6m_addr))
                return;

        if ((m_mtu = sbcreatecontrol(&mtuctl, sizeof(mtuctl),
            IPV6_PATHMTU, IPPROTO_IPV6)) == NULL)
                return;

        if (sbappendaddr(&so->so_rcv, (const struct sockaddr *)dst, NULL, m_mtu)
            == 0) {
                soroverflow(so);
                m_freem(m_mtu);
        } else
                sorwakeup(so);

        return;
}

/*
 * pull single extension header from mbuf chain.  returns single mbuf that
 * contains the result, or NULL on error.
 */
static struct mbuf *
ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
{
        struct ip6_ext ip6e;
        size_t elen;
        struct mbuf *n;

        if (off + sizeof(ip6e) > m->m_pkthdr.len)
                return NULL;

        m_copydata(m, off, sizeof(ip6e), (void *)&ip6e);
        if (nxt == IPPROTO_AH)
                elen = (ip6e.ip6e_len + 2) << 2;
        else
                elen = (ip6e.ip6e_len + 1) << 3;

        if (off + elen > m->m_pkthdr.len)
                return NULL;

        MGET(n, M_DONTWAIT, MT_DATA);
        if (n && elen >= MLEN) {
                MCLGET(n, M_DONTWAIT);
                if ((n->m_flags & M_EXT) == 0) {
                        m_free(n);
                        n = NULL;
                }
        }
        if (!n)
                return NULL;

        n->m_len = 0;
        if (elen >= M_TRAILINGSPACE(n)) {
                m_free(n);
                return NULL;
        }

        m_copydata(m, off, elen, mtod(n, void *));
        n->m_len = elen;
        return n;
}

/*
 * Get offset to the previous header followed by the header
 * currently processed.
 */
int
ip6_get_prevhdr(struct mbuf *m, int off)
{
        struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);

        if (off == sizeof(struct ip6_hdr)) {
                return offsetof(struct ip6_hdr, ip6_nxt);
        } else if (off < sizeof(struct ip6_hdr)) {
                panic("%s: off < sizeof(struct ip6_hdr)", __func__);
        } else {
                int len, nlen, nxt;
                struct ip6_ext ip6e;

                nxt = ip6->ip6_nxt;
                len = sizeof(struct ip6_hdr);
                nlen = 0;
                while (len < off) {
                        m_copydata(m, len, sizeof(ip6e), &ip6e);

                        switch (nxt) {
                        case IPPROTO_FRAGMENT:
                                nlen = sizeof(struct ip6_frag);
                                break;
                        case IPPROTO_AH:
                                nlen = (ip6e.ip6e_len + 2) << 2;
                                break;
                        default:
                                nlen = (ip6e.ip6e_len + 1) << 3;
                                break;
                        }
                        len += nlen;
                        nxt = ip6e.ip6e_nxt;
                }

                return (len - nlen);
        }
}

/*
 * get next header offset.  m will be retained.
 */
int
ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp)
{
        struct ip6_hdr ip6;
        struct ip6_ext ip6e;
        struct ip6_frag fh;

        /* just in case */
        if (m == NULL)
                panic("%s: m == NULL", __func__);
        if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off)
                return -1;

        switch (proto) {
        case IPPROTO_IPV6:
                /* do not chase beyond intermediate IPv6 headers */
                if (off != 0)
                        return -1;
                if (m->m_pkthdr.len < off + sizeof(ip6))
                        return -1;
                m_copydata(m, off, sizeof(ip6), (void *)&ip6);
                if (nxtp)
                        *nxtp = ip6.ip6_nxt;
                off += sizeof(ip6);
                return off;

        case IPPROTO_FRAGMENT:
                /*
                 * terminate parsing if it is not the first fragment,
                 * it does not make sense to parse through it.
                 */
                if (m->m_pkthdr.len < off + sizeof(fh))
                        return -1;
                m_copydata(m, off, sizeof(fh), (void *)&fh);
                if ((fh.ip6f_offlg & IP6F_OFF_MASK) != 0)
                        return -1;
                if (nxtp)
                        *nxtp = fh.ip6f_nxt;
                off += sizeof(struct ip6_frag);
                return off;

        case IPPROTO_AH:
                if (m->m_pkthdr.len < off + sizeof(ip6e))
                        return -1;
                m_copydata(m, off, sizeof(ip6e), (void *)&ip6e);
                if (nxtp)
                        *nxtp = ip6e.ip6e_nxt;
                off += (ip6e.ip6e_len + 2) << 2;
                if (m->m_pkthdr.len < off)
                        return -1;
                return off;

        case IPPROTO_HOPOPTS:
        case IPPROTO_ROUTING:
        case IPPROTO_DSTOPTS:
                if (m->m_pkthdr.len < off + sizeof(ip6e))
                        return -1;
                m_copydata(m, off, sizeof(ip6e), (void *)&ip6e);
                if (nxtp)
                        *nxtp = ip6e.ip6e_nxt;
                off += (ip6e.ip6e_len + 1) << 3;
                if (m->m_pkthdr.len < off)
                        return -1;
                return off;

        case IPPROTO_NONE:
        case IPPROTO_ESP:
        case IPPROTO_IPCOMP:
                /* give up */
                return -1;

        default:
                return -1;
        }
}

/*
 * get offset for the last header in the chain.  m will be kept untainted.
 */
int
ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp)
{
        int newoff;
        int nxt;

        if (!nxtp) {
                nxt = -1;
                nxtp = &nxt;
        }
        for (;;) {
                newoff = ip6_nexthdr(m, off, proto, nxtp);
                if (newoff < 0)
                        return off;
                else if (newoff < off)
                        return -1;        /* invalid */
                else if (newoff == off)
                        return newoff;

                off = newoff;
                proto = *nxtp;
        }
}

static struct m_tag *
ip6_addaux(struct mbuf *m)
{
        struct m_tag *mtag;

        mtag = m_tag_find(m, PACKET_TAG_INET6);
        if (!mtag) {
                mtag = m_tag_get(PACKET_TAG_INET6, sizeof(struct ip6aux),
                    M_NOWAIT);
                if (mtag) {
                        m_tag_prepend(m, mtag);
                        memset(mtag + 1, 0, sizeof(struct ip6aux));
                }
        }
        return mtag;
}

static struct m_tag *
ip6_findaux(struct mbuf *m)
{
        struct m_tag *mtag;

        mtag = m_tag_find(m, PACKET_TAG_INET6);
        return mtag;
}

static void
ip6_delaux(struct mbuf *m)
{
        struct m_tag *mtag;

        mtag = m_tag_find(m, PACKET_TAG_INET6);
        if (mtag)
                m_tag_delete(m, mtag);
}

/*
 * System control for IP6
 */

const u_char inet6ctlerrmap[PRC_NCMDS] = {
        0,                0,                0,                0,
        0,                EMSGSIZE,        EHOSTDOWN,        EHOSTUNREACH,
        EHOSTUNREACH,        EHOSTUNREACH,        ECONNREFUSED,        ECONNREFUSED,
        EMSGSIZE,        EHOSTUNREACH,        0,                0,
        0,                0,                0,                0,
        ENOPROTOOPT
};

extern int sysctl_net_inet6_addrctlpolicy(SYSCTLFN_ARGS);

static int
sysctl_net_inet6_ip6_stats(SYSCTLFN_ARGS)
{

        return (NETSTAT_SYSCTL(ip6stat_percpu, IP6_NSTATS));
}

static void
sysctl_net_inet6_ip6_setup(struct sysctllog **clog)
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "inet6",
                       SYSCTL_DESCR("PF_INET6 related settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET6, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "ip6",
                       SYSCTL_DESCR("IPv6 related settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "forwarding",
                       SYSCTL_DESCR("Enable forwarding of INET6 datagrams"),
                       NULL, 0, &ip6_forwarding, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_FORWARDING, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "redirect",
                       SYSCTL_DESCR("Enable sending of ICMPv6 redirect messages"),
                       NULL, 0, &ip6_sendredirects, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_SENDREDIRECTS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "hlim",
                       SYSCTL_DESCR("Hop limit for an INET6 datagram"),
                       NULL, 0, &ip6_defhlim, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_DEFHLIM, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxfragpackets",
                       SYSCTL_DESCR("Maximum number of fragments to buffer "
                                    "for reassembly"),
                       NULL, 0, &ip6_maxfragpackets, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_MAXFRAGPACKETS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "keepfaith",
                       SYSCTL_DESCR("Activate faith interface"),
                       NULL, 0, &ip6_keepfaith, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_KEEPFAITH, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "log_interval",
                       SYSCTL_DESCR("Minimum interval between logging "
                                    "unroutable packets"),
                       NULL, 0, &ip6_log_interval, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_LOG_INTERVAL, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "hdrnestlimit",
                       SYSCTL_DESCR("Maximum number of nested IPv6 headers"),
                       NULL, 0, &ip6_hdrnestlimit, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_HDRNESTLIMIT, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "dad_count",
                       SYSCTL_DESCR("Number of Duplicate Address Detection "
                                    "probes to send"),
                       NULL, 0, &ip6_dad_count, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_DAD_COUNT, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "auto_flowlabel",
                       SYSCTL_DESCR("Assign random IPv6 flow labels"),
                       NULL, 0, &ip6_auto_flowlabel, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_AUTO_FLOWLABEL, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "defmcasthlim",
                       SYSCTL_DESCR("Default multicast hop limit"),
                       NULL, 0, &ip6_defmcasthlim, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_DEFMCASTHLIM, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "kame_version",
                       SYSCTL_DESCR("KAME Version"),
                       NULL, 0, __UNCONST(__KAME_VERSION), 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_KAME_VERSION, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "use_deprecated",
                       SYSCTL_DESCR("Allow use of deprecated addresses as "
                                    "source addresses"),
                       NULL, 0, &ip6_use_deprecated, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_USE_DEPRECATED, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT
#ifndef INET6_BINDV6ONLY
                       |CTLFLAG_READWRITE,
#endif
                       CTLTYPE_INT, "v6only",
                       SYSCTL_DESCR("Disallow PF_INET6 sockets from connecting "
                                    "to PF_INET sockets"),
                       NULL, 0, &ip6_v6only, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_V6ONLY, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "anonportmin",
                       SYSCTL_DESCR("Lowest ephemeral port number to assign"),
                       sysctl_net_inet_ip_ports, 0, &ip6_anonportmin, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_ANONPORTMIN, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "anonportmax",
                       SYSCTL_DESCR("Highest ephemeral port number to assign"),
                       sysctl_net_inet_ip_ports, 0, &ip6_anonportmax, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_ANONPORTMAX, CTL_EOL);
#ifndef IPNOPRIVPORTS
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "lowportmin",
                       SYSCTL_DESCR("Lowest privileged ephemeral port number "
                                    "to assign"),
                       sysctl_net_inet_ip_ports, 0, &ip6_lowportmin, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_LOWPORTMIN, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "lowportmax",
                       SYSCTL_DESCR("Highest privileged ephemeral port number "
                                    "to assign"),
                       sysctl_net_inet_ip_ports, 0, &ip6_lowportmax, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_LOWPORTMAX, CTL_EOL);
#endif /* IPNOPRIVPORTS */
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "auto_linklocal",
                       SYSCTL_DESCR("Default value of per-interface flag for "
                                    "adding an IPv6 link-local address to "
                                    "interfaces when attached"),
                       NULL, 0, &ip6_auto_linklocal, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_AUTO_LINKLOCAL, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_STRUCT, "addctlpolicy",
                       SYSCTL_DESCR("Return the current address control"
                           " policy"),
                       sysctl_net_inet6_addrctlpolicy, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_ADDRCTLPOLICY, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "prefer_tempaddr",
                       SYSCTL_DESCR("Prefer temporary address as source "
                                    "address"),
                       NULL, 0, &ip6_prefer_tempaddr, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxfrags",
                       SYSCTL_DESCR("Maximum fragments in reassembly queue"),
                       NULL, 0, &ip6_maxfrags, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_MAXFRAGS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "stats",
                       SYSCTL_DESCR("IPv6 statistics"),
                       sysctl_net_inet6_ip6_stats, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_STATS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "use_defaultzone",
                       SYSCTL_DESCR("Whether to use the default scope zones"),
                       NULL, 0, &ip6_use_defzone, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       IPV6CTL_USE_DEFAULTZONE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "mcast_pmtu",
                       SYSCTL_DESCR("Enable pMTU discovery for multicast packet"),
                       NULL, 0, &ip6_mcast_pmtu, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       CTL_CREATE, CTL_EOL);
        /* anonportalgo RFC6056 subtree */
        const struct sysctlnode *portalgo_node;
        sysctl_createv(clog, 0, NULL, &portalgo_node,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "anonportalgo",
                       SYSCTL_DESCR("Anonymous port algorithm selection (RFC 6056)"),
                           NULL, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &portalgo_node, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "available",
                       SYSCTL_DESCR("available algorithms"),
                       sysctl_portalgo_available, 0, NULL, PORTALGO_MAXLEN,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &portalgo_node, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_STRING, "selected",
                       SYSCTL_DESCR("selected algorithm"),
                       sysctl_portalgo_selected6, 0, NULL, PORTALGO_MAXLEN,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &portalgo_node, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_STRUCT, "reserve",
                       SYSCTL_DESCR("bitmap of reserved ports"),
                       sysctl_portalgo_reserve6, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "neighborgcthresh",
                       SYSCTL_DESCR("Maximum number of entries in neighbor"
                        " cache"),
                       NULL, 1, &ip6_neighborgcthresh, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxdynroutes",
                       SYSCTL_DESCR("Maximum number of routes created via"
                           " redirect"),
                       NULL, 1, &ip6_maxdynroutes, 0,
                       CTL_NET, PF_INET6, IPPROTO_IPV6,
                       CTL_CREATE, CTL_EOL);
}

void
ip6_statinc(u_int stat)
{

        KASSERT(stat < IP6_NSTATS);
        IP6_STATINC(stat);
}













































































































































    6 









    7 


    6 


    6 


    5 










    5 
















    1 


    1 


    1 




    1 







   13 

   13 






   13 







    5 









    9 




















    1 















  139 



  139 







  139 







   10 








  139 










  129 
















































































































  139 


  139 









































































  414 


  413 




























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
/*        $NetBSD: procfs_vfsops.c,v 1.111 2022/01/17 11:20:00 bouyer Exp $        */

/*
 * Copyright (c) 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)procfs_vfsops.c        8.7 (Berkeley) 5/10/95
 */

/*
 * Copyright (c) 1993 Jan-Simon Pendry
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)procfs_vfsops.c        8.7 (Berkeley) 5/10/95
 */

/*
 * procfs VFS interface
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: procfs_vfsops.c,v 1.111 2022/01/17 11:20:00 bouyer Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/buf.h>
#include <sys/dirent.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/vnode.h>

#include <miscfs/genfs/genfs.h>

#include <miscfs/procfs/procfs.h>

#include <uvm/uvm_extern.h>                        /* for PAGE_SIZE */

MODULE(MODULE_CLASS_VFS, procfs, "ptrace_common");

VFS_PROTOS(procfs);

static kauth_listener_t procfs_listener;

/*
 * VFS Operations.
 *
 * mount system call
 */
/* ARGSUSED */
int
procfs_mount(
    struct mount *mp,
    const char *path,
    void *data,
    size_t *data_len)
{
        struct lwp *l = curlwp;
        struct procfsmount *pmnt;
        struct procfs_args *args = data;
        int error;

        if (args == NULL)
                return EINVAL;

        if (UIO_MX & (UIO_MX-1)) {
                log(LOG_ERR, "procfs: invalid directory entry size");
                return (EINVAL);
        }

        if (mp->mnt_flag & MNT_GETARGS) {
                if (*data_len < sizeof *args)
                        return EINVAL;

                pmnt = VFSTOPROC(mp);
                if (pmnt == NULL)
                        return EIO;
                args->version = PROCFS_ARGSVERSION;
                args->flags = pmnt->pmnt_flags;
                *data_len = sizeof *args;
                return 0;
        }

        if (mp->mnt_flag & MNT_UPDATE)
                return (EOPNOTSUPP);

        if (*data_len >= sizeof *args && args->version != PROCFS_ARGSVERSION)
                return EINVAL;

        pmnt = kmem_zalloc(sizeof(struct procfsmount), KM_SLEEP);

        mp->mnt_stat.f_namemax = PROCFS_MAXNAMLEN;
        mp->mnt_flag |= MNT_LOCAL;
        mp->mnt_data = pmnt;
        vfs_getnewfsid(mp);

        error = set_statvfs_info(path, UIO_USERSPACE, "procfs", UIO_SYSSPACE,
            mp->mnt_op->vfs_name, mp, l);
        pmnt->pmnt_exechook = exechook_establish(procfs_revoke_vnodes, mp);
        if (*data_len >= sizeof *args)
                pmnt->pmnt_flags = args->flags;
        else
                pmnt->pmnt_flags = 0;

        mp->mnt_iflag |= IMNT_MPSAFE | IMNT_SHRLOOKUP;
        return error;
}

/*
 * unmount system call
 */
int
procfs_unmount(struct mount *mp, int mntflags)
{
        int error;
        int flags = 0;

        if (mntflags & MNT_FORCE)
                flags |= FORCECLOSE;

        if ((error = vflush(mp, 0, flags)) != 0)
                return (error);

        exechook_disestablish(VFSTOPROC(mp)->pmnt_exechook);

        kmem_free(mp->mnt_data, sizeof(struct procfsmount));
        mp->mnt_data = NULL;

        return 0;
}

int
procfs_root(struct mount *mp, int lktype, struct vnode **vpp)
{
        int error;

        error = procfs_allocvp(mp, vpp, 0, PFSroot, -1);
        if (error == 0) {
                error = vn_lock(*vpp, lktype);
                if (error != 0) {
                        vrele(*vpp);
                        *vpp = NULL;
                }
        }

        return error;
}

/* ARGSUSED */
int
procfs_start(struct mount *mp, int flags)
{

        return (0);
}

/*
 * Get file system statistics.
 */
int
procfs_statvfs(struct mount *mp, struct statvfs *sbp)
{

        genfs_statvfs(mp, sbp);

        sbp->f_bsize = PAGE_SIZE;
        sbp->f_frsize = PAGE_SIZE;
        sbp->f_iosize = PAGE_SIZE;
        sbp->f_blocks = 1;
        sbp->f_files = maxproc;                                        /* approx */
        sbp->f_ffree = maxproc - atomic_load_relaxed(&nprocs);        /* approx */
        sbp->f_favail = maxproc - atomic_load_relaxed(&nprocs);        /* approx */

        return (0);
}

/*ARGSUSED*/
int
procfs_sync(
    struct mount *mp,
    int waitfor,
    kauth_cred_t uc)
{

        return (0);
}

/*ARGSUSED*/
int
procfs_vget(struct mount *mp, ino_t ino, int lktype,
    struct vnode **vpp)
{
        return (EOPNOTSUPP);
}

int
procfs_loadvnode(struct mount *mp, struct vnode *vp,
    const void *key, size_t key_len, const void **new_key)
{
        int error;
        struct pfskey pfskey;
        struct pfsnode *pfs;

        KASSERT(key_len == sizeof(pfskey));
        memcpy(&pfskey, key, key_len);

        pfs = kmem_alloc(sizeof(*pfs), KM_SLEEP);
        pfs->pfs_pid = pfskey.pk_pid;
        pfs->pfs_type = pfskey.pk_type;
        pfs->pfs_fd = pfskey.pk_fd;
        pfs->pfs_vnode = vp;
        pfs->pfs_flags = 0;
        pfs->pfs_fileno =
            PROCFS_FILENO(pfs->pfs_pid, pfs->pfs_type, pfs->pfs_fd);
        vp->v_tag = VT_PROCFS;
        vp->v_op = procfs_vnodeop_p;
        vp->v_data = pfs;

        switch (pfs->pfs_type) {
        case PFSroot:        /* /proc = dr-xr-xr-x */
                vp->v_vflag |= VV_ROOT;
                /*FALLTHROUGH*/
        case PFSproc:        /* /proc/N = dr-xr-xr-x */
                pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
                vp->v_type = VDIR;
                break;

        case PFStask:        /* /proc/N/task = dr-xr-xr-x */
                if (pfs->pfs_fd == -1) {
                        pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|
                            S_IROTH|S_IXOTH;
                        vp->v_type = VDIR;
                        break;
                }
                /*FALLTHROUGH*/
        case PFScurproc:        /* /proc/curproc = lr-xr-xr-x */
        case PFSself:        /* /proc/self    = lr-xr-xr-x */
        case PFScwd:        /* /proc/N/cwd = lr-xr-xr-x */
        case PFSchroot:        /* /proc/N/chroot = lr-xr-xr-x */
        case PFSexe:        /* /proc/N/exe = lr-xr-xr-x */
                pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
                vp->v_type = VLNK;
                break;

        case PFSfd:
                if (pfs->pfs_fd == -1) {        /* /proc/N/fd = dr-x------ */
                        pfs->pfs_mode = S_IRUSR|S_IXUSR;
                        vp->v_type = VDIR;
                } else {        /* /proc/N/fd/M = [ps-]rw------- */
                        file_t *fp;
                        vnode_t *vxp;
                        struct proc *p;

                        mutex_enter(&proc_lock);
                        p = procfs_proc_find(mp, pfs->pfs_pid);
                        mutex_exit(&proc_lock);
                        if (p == NULL) {
                                error = ENOENT;
                                goto bad;
                        }
                        KASSERT(rw_read_held(&p->p_reflock));
                        if ((fp = fd_getfile2(p, pfs->pfs_fd)) == NULL) {
                                error = EBADF;
                                goto bad;
                        }

                        pfs->pfs_mode = S_IRUSR|S_IWUSR;
                        switch (fp->f_type) {
                        case DTYPE_VNODE:
                                vxp = fp->f_vnode;

                                /*
                                 * We make symlinks for directories
                                 * to avoid cycles.
                                 */
                                if (vxp->v_type == VDIR ||
                                    procfs_proc_is_linux_compat())
                                        goto symlink;
                                vp->v_type = vxp->v_type;
                                break;
                        case DTYPE_PIPE:
                                vp->v_type = VFIFO;
                                break;
                        case DTYPE_SOCKET:
                                vp->v_type = VSOCK;
                                break;
                        case DTYPE_KQUEUE:
                        case DTYPE_MISC:
                        case DTYPE_SEM:
                        symlink:
                                pfs->pfs_mode = S_IRUSR|S_IXUSR|S_IRGRP|
                                    S_IXGRP|S_IROTH|S_IXOTH;
                                vp->v_type = VLNK;
                                break;
                        default:
                                error = EOPNOTSUPP;
                                closef(fp);
                                goto bad;
                        }
                        closef(fp);
                }
                break;

        case PFSfile:        /* /proc/N/file = -rw------- */
        case PFSmem:        /* /proc/N/mem = -rw------- */
        case PFSregs:        /* /proc/N/regs = -rw------- */
        case PFSfpregs:        /* /proc/N/fpregs = -rw------- */
                pfs->pfs_mode = S_IRUSR|S_IWUSR;
                vp->v_type = VREG;
                break;

        case PFSnote:        /* /proc/N/note = --w------ */
        case PFSnotepg:        /* /proc/N/notepg = --w------ */
                pfs->pfs_mode = S_IWUSR;
                vp->v_type = VREG;
                break;

        case PFSmap:                /* /proc/N/map = -r-------- */
        case PFSmaps:                /* /proc/N/maps = -r-------- */
        case PFSauxv:                /* /proc/N/auxv = -r-------- */
        case PFSenviron:        /* /proc/N/environ = -r-------- */
                pfs->pfs_mode = S_IRUSR;
                vp->v_type = VREG;
                break;

        case PFSstatus:                /* /proc/N/status = -r--r--r-- */
        case PFSstat:                /* /proc/N/stat = -r--r--r-- */
        case PFScmdline:        /* /proc/N/cmdline = -r--r--r-- */
        case PFSemul:                /* /proc/N/emul = -r--r--r-- */
        case PFSmeminfo:        /* /proc/meminfo = -r--r--r-- */
        case PFScpustat:        /* /proc/stat = -r--r--r-- */
        case PFSdevices:        /* /proc/devices = -r--r--r-- */
        case PFScpuinfo:        /* /proc/cpuinfo = -r--r--r-- */
        case PFSuptime:                /* /proc/uptime = -r--r--r-- */
        case PFSmounts:                /* /proc/mounts = -r--r--r-- */
        case PFSloadavg:        /* /proc/loadavg = -r--r--r-- */
        case PFSstatm:                /* /proc/N/statm = -r--r--r-- */
        case PFSversion:        /* /proc/version = -r--r--r-- */
        case PFSlimit:                /* /proc/limit = -r--r--r-- */
                pfs->pfs_mode = S_IRUSR|S_IRGRP|S_IROTH;
                vp->v_type = VREG;
                break;

#ifdef __HAVE_PROCFS_MACHDEP
        PROCFS_MACHDEP_NODETYPE_CASES
                procfs_machdep_allocvp(vp);
                break;
#endif

        default:
                panic("procfs_allocvp");
        }

        uvm_vnp_setsize(vp, 0);
        *new_key = &pfs->pfs_key;

        return 0;

bad:
        vp->v_tag =VT_NON;
        vp->v_type = VNON;
        vp->v_op = NULL;
        vp->v_data = NULL;
        kmem_free(pfs, sizeof(*pfs));
        return error;
}

void
procfs_init(void)
{

}

void
procfs_reinit(void)
{

}

void
procfs_done(void)
{

}

extern const struct vnodeopv_desc procfs_vnodeop_opv_desc;

const struct vnodeopv_desc * const procfs_vnodeopv_descs[] = {
        &procfs_vnodeop_opv_desc,
        NULL,
};

struct vfsops procfs_vfsops = {
        .vfs_name = MOUNT_PROCFS,
        .vfs_min_mount_data = sizeof (struct procfs_args),
        .vfs_mount = procfs_mount,
        .vfs_start = procfs_start,
        .vfs_unmount = procfs_unmount,
        .vfs_root = procfs_root,
        .vfs_quotactl = (void *)eopnotsupp,
        .vfs_statvfs = procfs_statvfs,
        .vfs_sync = procfs_sync,
        .vfs_vget = procfs_vget,
        .vfs_loadvnode = procfs_loadvnode,
        .vfs_fhtovp = (void *)eopnotsupp,
        .vfs_vptofh = (void *)eopnotsupp,
        .vfs_init = procfs_init,
        .vfs_reinit = procfs_reinit,
        .vfs_done = procfs_done,
        .vfs_snapshot = (void *)eopnotsupp,
        .vfs_extattrctl = vfs_stdextattrctl,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_renamelock_enter = genfs_renamelock_enter,
        .vfs_renamelock_exit = genfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = procfs_vnodeopv_descs
};

static int
procfs_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        struct proc *p;
        struct pfsnode *pfs;
        int result;

        result = KAUTH_RESULT_DEFER;
        p = arg0;
        pfs = arg1;

        if (action != KAUTH_PROCESS_PROCFS)
                return result;

        switch (pfs->pfs_type) {
        case PFSregs:
        case PFSfpregs:
        case PFSmem:
                if (kauth_cred_getuid(cred) != kauth_cred_getuid(p->p_cred) ||
                    ISSET(p->p_flag, PK_SUGID))
                        break;

                /*FALLTHROUGH*/
        default:
                result = KAUTH_RESULT_ALLOW;
                break;
        }

        return result;
}

SYSCTL_SETUP(procfs_sysctl_setup, "procfs sysctl")
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "procfs",
                       SYSCTL_DESCR("Process file system"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, 12, CTL_EOL);
        /*
         * XXX the "12" above could be dynamic, thereby eliminating
         * one more instance of the "number to vfs" mapping problem,
         * but "12" is the order as taken from sys/mount.h
         */
}

static int
procfs_modcmd(modcmd_t cmd, void *arg)
{
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = vfs_attach(&procfs_vfsops);
                if (error != 0)
                        break;

                procfs_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
                    procfs_listener_cb, NULL);

                break;
        case MODULE_CMD_FINI:
                error = vfs_detach(&procfs_vfsops);
                if (error != 0)
                        break;
                kauth_unlisten_scope(procfs_listener);
                break;
        default:
                error = ENOTTY;
                break;
        }

        return (error);
}






































































































































































































































































































































































































    2 


    2 















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
/*        $NetBSD: uaudio.c,v 1.174 2022/06/28 05:22:13 skrll Exp $        */

/*
 * Copyright (c) 1999, 2012 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology, and Matthew R. Green (mrg@eterna.com.au).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * USB audio specs: http://www.usb.org/developers/docs/devclass_docs/audio10.pdf
 *                  http://www.usb.org/developers/docs/devclass_docs/frmts10.pdf
 *                  http://www.usb.org/developers/docs/devclass_docs/termt10.pdf
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uaudio.c,v 1.174 2022/06/28 05:22:13 skrll Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/file.h>
#include <sys/reboot.h>                /* for bootverbose */
#include <sys/select.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/poll.h>
#include <sys/module.h>
#include <sys/bus.h>
#include <sys/cpu.h>
#include <sys/atomic.h>

#include <sys/audioio.h>
#include <dev/audio/audio_if.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usb_quirks.h>

#include <dev/usb/usbdevs.h>

#include <dev/usb/uaudioreg.h>

/* #define UAUDIO_DEBUG */
/* #define UAUDIO_MULTIPLE_ENDPOINTS */
#ifdef UAUDIO_DEBUG
#define DPRINTF(x,y...)                do { \
                if (uaudiodebug) { \
                        struct lwp *l = curlwp; \
                        printf("%s[%d:%d]: "x, __func__, l->l_proc->p_pid, l->l_lid, y); \
                } \
        } while (0)
#define DPRINTFN_CLEAN(n,x...)        do { \
                if (uaudiodebug > (n)) \
                        printf(x); \
        } while (0)
#define DPRINTFN(n,x,y...)        do { \
                if (uaudiodebug > (n)) { \
                        struct lwp *l = curlwp; \
                        printf("%s[%d:%d]: "x, __func__, l->l_proc->p_pid, l->l_lid, y); \
                } \
        } while (0)
int        uaudiodebug = 0;
#else
#define DPRINTF(x,y...)
#define DPRINTFN_CLEAN(n,x...)
#define DPRINTFN(n,x,y...)
#endif

#define UAUDIO_NCHANBUFS 6        /* number of outstanding request */
#define UAUDIO_NFRAMES   10        /* ms of sound in each request */


#define MIX_MAX_CHAN 8
struct mixerctl {
        uint16_t        wValue[MIX_MAX_CHAN]; /* using nchan */
        uint16_t        wIndex;
        uint8_t                nchan;
        uint8_t                type;
#define MIX_ON_OFF        1
#define MIX_SIGNED_16        2
#define MIX_UNSIGNED_16        3
#define MIX_SIGNED_8        4
#define MIX_SELECTOR        5
#define MIX_SIZE(n) ((n) == MIX_SIGNED_16 || (n) == MIX_UNSIGNED_16 ? 2 : 1)
#define MIX_UNSIGNED(n) ((n) == MIX_UNSIGNED_16)
        int                minval, maxval;
        u_int                delta;
        u_int                mul;
        uint8_t                class;
        char                ctlname[MAX_AUDIO_DEV_LEN];
        const char        *ctlunit;
};
#define MAKE(h,l) (((h) << 8) | (l))

struct as_info {
        uint8_t                alt;
        uint8_t                encoding;
        uint8_t                attributes; /* Copy of bmAttributes of
                                     * usb_audio_streaming_endpoint_descriptor
                                     */
        struct usbd_interface *        ifaceh;
        const usb_interface_descriptor_t *idesc;
        const usb_endpoint_descriptor_audio_t *edesc;
        const usb_endpoint_descriptor_audio_t *edesc1;
        const struct usb_audio_streaming_type1_descriptor *asf1desc;
        struct audio_format *aformat;
        int                sc_busy;        /* currently used */
};

struct chan {
        void        (*intr)(void *);        /* DMA completion intr handler */
        void        *arg;                /* arg for intr() */
        struct usbd_pipe *pipe;
        struct usbd_pipe *sync_pipe;

        u_int        sample_size;
        u_int        sample_rate;
        u_int        bytes_per_frame;
        u_int        fraction;        /* fraction/1000 is the extra samples/frame */
        u_int        residue;        /* accumulates the fractional samples */

        u_char        *start;                /* upper layer buffer start */
        u_char        *end;                /* upper layer buffer end */
        u_char        *cur;                /* current position in upper layer buffer */
        int        blksize;        /* chunk size to report up */
        int        transferred;        /* transferred bytes not reported up */

        int        altidx;                /* currently used altidx */

        int        curchanbuf;
        struct chanbuf {
                struct chan        *chan;
                struct usbd_xfer *xfer;
                u_char                *buffer;
                uint16_t        sizes[UAUDIO_NFRAMES];
                uint16_t        offsets[UAUDIO_NFRAMES];
                uint16_t        size;
        } chanbufs[UAUDIO_NCHANBUFS];

        struct uaudio_softc *sc; /* our softc */
};

/*
 *    The MI USB audio subsystem is now MP-SAFE and expects sc_intr_lock to be
 *    held on entry the callbacks passed to uaudio_trigger_{in,out}put
 */
struct uaudio_softc {
        device_t        sc_dev;                /* base device */
        kmutex_t        sc_lock;
        kmutex_t        sc_intr_lock;
        struct usbd_device *sc_udev;        /* USB device */
        int                sc_ac_iface;        /* Audio Control interface */
        struct usbd_interface *        sc_ac_ifaceh;
        struct chan        sc_playchan;        /* play channel */
        struct chan        sc_recchan;        /* record channel */
        int                sc_nullalt;
        int                sc_audio_rev;
        struct as_info        *sc_alts;        /* alternate settings */
        int                sc_nalts;        /* # of alternate settings */
        int                sc_altflags;
#define HAS_8                0x01
#define HAS_16                0x02
#define HAS_8U                0x04
#define HAS_ALAW        0x08
#define HAS_MULAW        0x10
#define UA_NOFRAC        0x20                /* don't do sample rate adjustment */
#define HAS_24                0x40
        int                sc_mode;        /* play/record capability */
        struct mixerctl *sc_ctls;        /* mixer controls */
        int                sc_nctls;        /* # of mixer controls */
        device_t        sc_audiodev;
        struct audio_format *sc_formats;
        int                sc_nformats;
        u_int                sc_channel_config;
        char                sc_dying;
        struct audio_device sc_adev;
};

struct terminal_list {
        int size;
        uint16_t terminals[1];
};
#define TERMINAL_LIST_SIZE(N)        (offsetof(struct terminal_list, terminals) \
                                + sizeof(uint16_t) * (N))

struct io_terminal {
        union {
                const uaudio_cs_descriptor_t *desc;
                const struct usb_audio_input_terminal *it;
                const struct usb_audio_output_terminal *ot;
                const struct usb_audio_mixer_unit *mu;
                const struct usb_audio_selector_unit *su;
                const struct usb_audio_feature_unit *fu;
                const struct usb_audio_processing_unit *pu;
                const struct usb_audio_extension_unit *eu;
        } d;
        int inputs_size;
        struct terminal_list **inputs; /* list of source input terminals */
        struct terminal_list *output; /* list of destination output terminals */
        int direct;                /* directly connected to an output terminal */
};

#define UAC_OUTPUT        0
#define UAC_INPUT        1
#define UAC_EQUAL        2
#define UAC_RECORD        3
#define UAC_NCLASSES        4
#ifdef UAUDIO_DEBUG
Static const char *uac_names[] = {
        AudioCoutputs, AudioCinputs, AudioCequalization, AudioCrecord,
};
#endif

#ifdef UAUDIO_DEBUG
Static void uaudio_dump_tml
        (struct terminal_list *tml);
#endif
Static usbd_status uaudio_identify_ac
        (struct uaudio_softc *, const usb_config_descriptor_t *);
Static usbd_status uaudio_identify_as
        (struct uaudio_softc *, const usb_config_descriptor_t *);
Static usbd_status uaudio_process_as
        (struct uaudio_softc *, const char *, int *, int,
         const usb_interface_descriptor_t *);

Static void        uaudio_add_alt(struct uaudio_softc *, const struct as_info *);

Static const usb_interface_descriptor_t *uaudio_find_iface
        (const char *, int, int *, int);

Static void        uaudio_mixer_add_ctl(struct uaudio_softc *, struct mixerctl *);
Static char        *uaudio_id_name
        (struct uaudio_softc *, const struct io_terminal *, int);
#ifdef UAUDIO_DEBUG
Static void        uaudio_dump_cluster(const struct usb_audio_cluster *);
#endif
Static struct usb_audio_cluster uaudio_get_cluster
        (int, const struct io_terminal *);
Static void        uaudio_add_input
        (struct uaudio_softc *, const struct io_terminal *, int);
Static void        uaudio_add_output
        (struct uaudio_softc *, const struct io_terminal *, int);
Static void        uaudio_add_mixer
        (struct uaudio_softc *, const struct io_terminal *, int);
Static void        uaudio_add_selector
        (struct uaudio_softc *, const struct io_terminal *, int);
#ifdef UAUDIO_DEBUG
Static const char *uaudio_get_terminal_name(int);
#endif
Static int        uaudio_determine_class
        (const struct io_terminal *, struct mixerctl *);
Static const char *uaudio_feature_name
        (const struct io_terminal *, struct mixerctl *);
Static void        uaudio_add_feature
        (struct uaudio_softc *, const struct io_terminal *, int);
Static void        uaudio_add_processing_updown
        (struct uaudio_softc *, const struct io_terminal *, int);
Static void        uaudio_add_processing
        (struct uaudio_softc *, const struct io_terminal *, int);
Static void        uaudio_add_extension
        (struct uaudio_softc *, const struct io_terminal *, int);
Static struct terminal_list *uaudio_merge_terminal_list
        (const struct io_terminal *);
Static struct terminal_list *uaudio_io_terminaltype
        (int, struct io_terminal *, int);
Static usbd_status uaudio_identify
        (struct uaudio_softc *, const usb_config_descriptor_t *);

Static int        uaudio_signext(int, int);
Static int        uaudio_value2bsd(struct mixerctl *, int);
Static int        uaudio_bsd2value(struct mixerctl *, int);
Static int        uaudio_get(struct uaudio_softc *, int, int, int, int, int);
Static int        uaudio_ctl_get
        (struct uaudio_softc *, int, struct mixerctl *, int);
Static void        uaudio_set
        (struct uaudio_softc *, int, int, int, int, int, int);
Static void        uaudio_ctl_set
        (struct uaudio_softc *, int, struct mixerctl *, int, int);

Static usbd_status uaudio_set_speed(struct uaudio_softc *, int, u_int);

Static usbd_status uaudio_chan_open(struct uaudio_softc *, struct chan *);
Static void        uaudio_chan_abort(struct uaudio_softc *, struct chan *);
Static void        uaudio_chan_close(struct uaudio_softc *, struct chan *);
Static usbd_status uaudio_chan_alloc_buffers
        (struct uaudio_softc *, struct chan *);
Static void        uaudio_chan_free_buffers(struct uaudio_softc *, struct chan *);
Static void        uaudio_chan_init
        (struct chan *, int, const struct audio_params *, int);
Static void        uaudio_chan_set_param(struct chan *, u_char *, u_char *, int);
Static void        uaudio_chan_ptransfer(struct chan *);
Static void        uaudio_chan_pintr
        (struct usbd_xfer *, void *, usbd_status);

Static void        uaudio_chan_rtransfer(struct chan *);
Static void        uaudio_chan_rintr
        (struct usbd_xfer *, void *, usbd_status);

Static int        uaudio_open(void *, int);
Static int        uaudio_query_format(void *, audio_format_query_t *);
Static int        uaudio_set_format
     (void *, int, const audio_params_t *, const audio_params_t *,
         audio_filter_reg_t *, audio_filter_reg_t *);
Static int        uaudio_round_blocksize(void *, int, int, const audio_params_t *);
Static int        uaudio_trigger_output
        (void *, void *, void *, int, void (*)(void *), void *,
         const audio_params_t *);
Static int        uaudio_trigger_input
        (void *, void *, void *, int, void (*)(void *), void *,
         const audio_params_t *);
Static int        uaudio_halt_in_dma(void *);
Static int        uaudio_halt_out_dma(void *);
Static void        uaudio_halt_in_dma_unlocked(struct uaudio_softc *);
Static void        uaudio_halt_out_dma_unlocked(struct uaudio_softc *);
Static int        uaudio_getdev(void *, struct audio_device *);
Static int        uaudio_mixer_set_port(void *, mixer_ctrl_t *);
Static int        uaudio_mixer_get_port(void *, mixer_ctrl_t *);
Static int        uaudio_query_devinfo(void *, mixer_devinfo_t *);
Static int        uaudio_get_props(void *);
Static void        uaudio_get_locks(void *, kmutex_t **, kmutex_t **);

Static const struct audio_hw_if uaudio_hw_if = {
        .open                        = uaudio_open,
        .query_format                = uaudio_query_format,
        .set_format                = uaudio_set_format,
        .round_blocksize        = uaudio_round_blocksize,
        .halt_output                = uaudio_halt_out_dma,
        .halt_input                = uaudio_halt_in_dma,
        .getdev                        = uaudio_getdev,
        .set_port                = uaudio_mixer_set_port,
        .get_port                = uaudio_mixer_get_port,
        .query_devinfo                = uaudio_query_devinfo,
        .get_props                = uaudio_get_props,
        .trigger_output                = uaudio_trigger_output,
        .trigger_input                = uaudio_trigger_input,
        .get_locks                = uaudio_get_locks,
};

static int uaudio_match(device_t, cfdata_t, void *);
static void uaudio_attach(device_t, device_t, void *);
static int uaudio_detach(device_t, int);
static void uaudio_childdet(device_t, device_t);
static int uaudio_activate(device_t, enum devact);



CFATTACH_DECL2_NEW(uaudio, sizeof(struct uaudio_softc),
    uaudio_match, uaudio_attach, uaudio_detach, uaudio_activate, NULL,
    uaudio_childdet);

static int
uaudio_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;

        /* Trigger on the control interface. */
        if (uiaa->uiaa_class != UICLASS_AUDIO ||
            uiaa->uiaa_subclass != UISUBCLASS_AUDIOCONTROL ||
            (usbd_get_quirks(uiaa->uiaa_device)->uq_flags & UQ_BAD_AUDIO))
                return UMATCH_NONE;

        return UMATCH_IFACECLASS_IFACESUBCLASS;
}

static void
uaudio_attach(device_t parent, device_t self, void *aux)
{
        struct uaudio_softc *sc = device_private(self);
        struct usbif_attach_arg *uiaa = aux;
        usb_interface_descriptor_t *id;
        usb_config_descriptor_t *cdesc;
        char *devinfop;
        usbd_status err;
        int i, j, found;

        sc->sc_dev = self;
        sc->sc_udev = uiaa->uiaa_device;
        mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&sc->sc_intr_lock, MUTEX_DEFAULT, IPL_SOFTUSB);

        strlcpy(sc->sc_adev.name, "USB audio", sizeof(sc->sc_adev.name));
        strlcpy(sc->sc_adev.version, "", sizeof(sc->sc_adev.version));
        snprintf(sc->sc_adev.config, sizeof(sc->sc_adev.config), "usb:%08x",
            sc->sc_udev->ud_cookie.cookie);

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(uiaa->uiaa_device, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        cdesc = usbd_get_config_descriptor(sc->sc_udev);
        if (cdesc == NULL) {
                aprint_error_dev(self,
                    "failed to get configuration descriptor\n");
                return;
        }

        err = uaudio_identify(sc, cdesc);
        if (err) {
                aprint_error_dev(self,
                    "audio descriptors make no sense, error=%d\n", err);
                return;
        }

        sc->sc_ac_ifaceh = uiaa->uiaa_iface;
        /* Pick up the AS interface. */
        for (i = 0; i < uiaa->uiaa_nifaces; i++) {
                if (uiaa->uiaa_ifaces[i] == NULL)
                        continue;
                id = usbd_get_interface_descriptor(uiaa->uiaa_ifaces[i]);
                if (id == NULL)
                        continue;
                found = 0;
                for (j = 0; j < sc->sc_nalts; j++) {
                        if (id->bInterfaceNumber ==
                            sc->sc_alts[j].idesc->bInterfaceNumber) {
                                sc->sc_alts[j].ifaceh = uiaa->uiaa_ifaces[i];
                                found = 1;
                        }
                }
                if (found)
                        uiaa->uiaa_ifaces[i] = NULL;
        }

        for (j = 0; j < sc->sc_nalts; j++) {
                if (sc->sc_alts[j].ifaceh == NULL) {
                        aprint_error_dev(self,
                            "alt %d missing AS interface(s)\n", j);
                        return;
                }
        }

        aprint_normal_dev(self, "audio rev %d.%02x\n",
               sc->sc_audio_rev >> 8, sc->sc_audio_rev & 0xff);

        sc->sc_playchan.sc = sc->sc_recchan.sc = sc;
        sc->sc_playchan.altidx = -1;
        sc->sc_recchan.altidx = -1;

        if (usbd_get_quirks(sc->sc_udev)->uq_flags & UQ_AU_NO_FRAC)
                sc->sc_altflags |= UA_NOFRAC;

#ifndef UAUDIO_DEBUG
        if (bootverbose)
#endif
                aprint_normal_dev(self, "%d mixer controls\n",
                    sc->sc_nctls);

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        DPRINTF("%s", "doing audio_attach_mi\n");
        sc->sc_audiodev = audio_attach_mi(&uaudio_hw_if, sc, sc->sc_dev);

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        return;
}

static int
uaudio_activate(device_t self, enum devact act)
{
        struct uaudio_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

static void
uaudio_childdet(device_t self, device_t child)
{
        struct uaudio_softc *sc = device_private(self);

        KASSERT(sc->sc_audiodev == child);
        sc->sc_audiodev = NULL;
}

static int
uaudio_detach(device_t self, int flags)
{
        struct uaudio_softc *sc = device_private(self);
        int rv;

        sc->sc_dying = 1;

        pmf_device_deregister(self);

        /* Wait for outstanding requests to complete. */
        uaudio_halt_out_dma_unlocked(sc);
        uaudio_halt_in_dma_unlocked(sc);

        if (sc->sc_audiodev != NULL) {
                rv = config_detach(sc->sc_audiodev, flags);
                if (rv)
                        return rv;
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        if (sc->sc_formats != NULL)
                kmem_free(sc->sc_formats,
                    sizeof(struct audio_format) * sc->sc_nformats);

        mutex_destroy(&sc->sc_lock);
        mutex_destroy(&sc->sc_intr_lock);

        return 0;
}

Static int
uaudio_query_format(void *addr, audio_format_query_t *afp)
{
        struct uaudio_softc *sc;

        sc = addr;
        return audio_query_format(sc->sc_formats, sc->sc_nformats, afp);
}

Static const usb_interface_descriptor_t *
uaudio_find_iface(const char *tbuf, int size, int *offsp, int subtype)
{
        const usb_interface_descriptor_t *d;

        while (*offsp + sizeof(*d) <= size) {
                d = (const void *)(tbuf + *offsp);
                DPRINTFN(3, "%d + %d <= %d type %d class %d/%d iface %d\n",
                    *offsp, d->bLength, size,
                    d->bDescriptorType,
                    d->bInterfaceClass,
                    d->bInterfaceSubClass,
                    d->bInterfaceNumber);
                *offsp += d->bLength;
                if (d->bDescriptorType == UDESC_INTERFACE &&
                    d->bInterfaceClass == UICLASS_AUDIO &&
                    d->bInterfaceSubClass == subtype)
                        return d;
        }
        return NULL;
}

Static void
uaudio_mixer_add_ctl(struct uaudio_softc *sc, struct mixerctl *mc)
{
        int res;
        size_t len;
        struct mixerctl *nmc;

        if (mc->class < UAC_NCLASSES) {
                DPRINTF("adding %s.%s\n", uac_names[mc->class], mc->ctlname);
        } else {
                DPRINTF("adding %s\n", mc->ctlname);
        }
        len = sizeof(*mc) * (sc->sc_nctls + 1);
        nmc = kmem_alloc(len, KM_SLEEP);
        /* Copy old data, if there was any */
        if (sc->sc_nctls != 0) {
                memcpy(nmc, sc->sc_ctls, sizeof(*mc) * (sc->sc_nctls));
                kmem_free(sc->sc_ctls, sizeof(*mc) * sc->sc_nctls);
        }
        sc->sc_ctls = nmc;

        mc->delta = 0;
        if (mc->type == MIX_ON_OFF) {
                mc->minval = 0;
                mc->maxval = 1;
        } else if (mc->type == MIX_SELECTOR) {
                ;
        } else {
                /* Determine min and max values. */
                mc->minval = uaudio_signext(mc->type,
                        uaudio_get(sc, GET_MIN, UT_READ_CLASS_INTERFACE,
                                   mc->wValue[0], mc->wIndex,
                                   MIX_SIZE(mc->type)));
                mc->maxval = 1 + uaudio_signext(mc->type,
                        uaudio_get(sc, GET_MAX, UT_READ_CLASS_INTERFACE,
                                   mc->wValue[0], mc->wIndex,
                                   MIX_SIZE(mc->type)));
                mc->mul = mc->maxval - mc->minval;
                if (mc->mul == 0)
                        mc->mul = 1;
                res = uaudio_get(sc, GET_RES, UT_READ_CLASS_INTERFACE,
                                 mc->wValue[0], mc->wIndex,
                                 MIX_SIZE(mc->type));
                if (res > 0)
                        mc->delta = (res * 255 + mc->mul/2) / mc->mul;
        }

        sc->sc_ctls[sc->sc_nctls++] = *mc;

#ifdef UAUDIO_DEBUG
        if (uaudiodebug > 2) {
                int i;

                DPRINTFN_CLEAN(2, "wValue=%04x", mc->wValue[0]);
                for (i = 1; i < mc->nchan; i++)
                        DPRINTFN_CLEAN(2, ",%04x", mc->wValue[i]);
                DPRINTFN_CLEAN(2, " wIndex=%04x type=%d name='%s' unit='%s' "
                         "min=%d max=%d\n",
                         mc->wIndex, mc->type, mc->ctlname, mc->ctlunit,
                         mc->minval, mc->maxval);
        }
#endif
}

Static char *
uaudio_id_name(struct uaudio_softc *sc,
    const struct io_terminal *iot, int id)
{
        static char tbuf[32];

        snprintf(tbuf, sizeof(tbuf), "i%d", id);
        return tbuf;
}

#ifdef UAUDIO_DEBUG
Static void
uaudio_dump_cluster(const struct usb_audio_cluster *cl)
{
        static const char *channel_names[16] = {
                "LEFT", "RIGHT", "CENTER", "LFE",
                "LEFT_SURROUND", "RIGHT_SURROUND", "LEFT_CENTER", "RIGHT_CENTER",
                "SURROUND", "LEFT_SIDE", "RIGHT_SIDE", "TOP",
                "RESERVED12", "RESERVED13", "RESERVED14", "RESERVED15",
        };
        int cc, i, first;

        cc = UGETW(cl->wChannelConfig);
        printf("cluster: bNrChannels=%u wChannelConfig=%#.4x",
                  cl->bNrChannels, cc);
        first = TRUE;
        for (i = 0; cc != 0; i++) {
                if (cc & 1) {
                        printf("%c%s", first ? '<' : ',', channel_names[i]);
                        first = FALSE;
                }
                cc = cc >> 1;
        }
        printf("> iChannelNames=%u", cl->iChannelNames);
}
#endif

Static struct usb_audio_cluster
uaudio_get_cluster(int id, const struct io_terminal *iot)
{
        struct usb_audio_cluster r;
        const uaudio_cs_descriptor_t *dp;
        int i;

        for (i = 0; i < 25; i++) { /* avoid infinite loops */
                dp = iot[id].d.desc;
                if (dp == 0)
                        goto bad;
                switch (dp->bDescriptorSubtype) {
                case UDESCSUB_AC_INPUT:
                        r.bNrChannels = iot[id].d.it->bNrChannels;
                        USETW(r.wChannelConfig, UGETW(iot[id].d.it->wChannelConfig));
                        r.iChannelNames = iot[id].d.it->iChannelNames;
                        return r;
                case UDESCSUB_AC_OUTPUT:
                        id = iot[id].d.ot->bSourceId;
                        break;
                case UDESCSUB_AC_MIXER:
                        r = *(const struct usb_audio_cluster *)
                                &iot[id].d.mu->baSourceId[iot[id].d.mu->bNrInPins];
                        return r;
                case UDESCSUB_AC_SELECTOR:
                        /* XXX This is not really right */
                        id = iot[id].d.su->baSourceId[0];
                        break;
                case UDESCSUB_AC_FEATURE:
                        id = iot[id].d.fu->bSourceId;
                        break;
                case UDESCSUB_AC_PROCESSING:
                        r = *(const struct usb_audio_cluster *)
                                &iot[id].d.pu->baSourceId[iot[id].d.pu->bNrInPins];
                        return r;
                case UDESCSUB_AC_EXTENSION:
                        r = *(const struct usb_audio_cluster *)
                                &iot[id].d.eu->baSourceId[iot[id].d.eu->bNrInPins];
                        return r;
                default:
                        goto bad;
                }
        }
 bad:
        aprint_error("uaudio_get_cluster: bad data\n");
        memset(&r, 0, sizeof(r));
        return r;

}

Static void
uaudio_add_input(struct uaudio_softc *sc, const struct io_terminal *iot, int id)
{
        const struct usb_audio_input_terminal *d;

        d = iot[id].d.it;
#ifdef UAUDIO_DEBUG
        DPRINTFN(2,"bTerminalId=%d wTerminalType=0x%04x "
                    "bAssocTerminal=%d bNrChannels=%d wChannelConfig=%d "
                    "iChannelNames=%d iTerminal=%d\n",
                    d->bTerminalId, UGETW(d->wTerminalType), d->bAssocTerminal,
                    d->bNrChannels, UGETW(d->wChannelConfig),
                    d->iChannelNames, d->iTerminal);
#endif
        /* If USB input terminal, record wChannelConfig */
        if ((UGETW(d->wTerminalType) & 0xff00) != 0x0100)
                return;
        sc->sc_channel_config = UGETW(d->wChannelConfig);
}

Static void
uaudio_add_output(struct uaudio_softc *sc,
    const struct io_terminal *iot, int id)
{
#ifdef UAUDIO_DEBUG
        const struct usb_audio_output_terminal *d;

        d = iot[id].d.ot;
        DPRINTFN(2,"bTerminalId=%d wTerminalType=0x%04x "
                    "bAssocTerminal=%d bSourceId=%d iTerminal=%d\n",
                    d->bTerminalId, UGETW(d->wTerminalType), d->bAssocTerminal,
                    d->bSourceId, d->iTerminal);
#endif
}

Static void
uaudio_add_mixer(struct uaudio_softc *sc, const struct io_terminal *iot, int id)
{
        const struct usb_audio_mixer_unit *d;
        const struct usb_audio_mixer_unit_1 *d1;
        int c, chs, ichs, ochs, i, o, bno, p, mo, mc, k;
        const uByte *bm;
        struct mixerctl mix;

        d = iot[id].d.mu;
        DPRINTFN(2,"bUnitId=%d bNrInPins=%d\n",
                    d->bUnitId, d->bNrInPins);

        /* Compute the number of input channels */
        ichs = 0;
        for (i = 0; i < d->bNrInPins; i++)
                ichs += uaudio_get_cluster(d->baSourceId[i], iot).bNrChannels;

        /* and the number of output channels */
        d1 = (const struct usb_audio_mixer_unit_1 *)&d->baSourceId[d->bNrInPins];
        ochs = d1->bNrChannels;
        DPRINTFN(2,"ichs=%d ochs=%d\n", ichs, ochs);

        bm = d1->bmControls;
        mix.wIndex = MAKE(d->bUnitId, sc->sc_ac_iface);
        uaudio_determine_class(&iot[id], &mix);
        mix.type = MIX_SIGNED_16;
        mix.ctlunit = AudioNvolume;
#define _BIT(bno) ((bm[bno / 8] >> (7 - bno % 8)) & 1)
        for (p = i = 0; i < d->bNrInPins; i++) {
                chs = uaudio_get_cluster(d->baSourceId[i], iot).bNrChannels;
                mc = 0;
                for (c = 0; c < chs; c++) {
                        mo = 0;
                        for (o = 0; o < ochs; o++) {
                                bno = (p + c) * ochs + o;
                                if (_BIT(bno))
                                        mo++;
                        }
                        if (mo == 1)
                                mc++;
                }
                if (mc == chs && chs <= MIX_MAX_CHAN) {
                        k = 0;
                        for (c = 0; c < chs; c++)
                                for (o = 0; o < ochs; o++) {
                                        bno = (p + c) * ochs + o;
                                        if (_BIT(bno))
                                                mix.wValue[k++] =
                                                        MAKE(p+c+1, o+1);
                                }
                        snprintf(mix.ctlname, sizeof(mix.ctlname), "mix%d-%s",
                            d->bUnitId, uaudio_id_name(sc, iot,
                            d->baSourceId[i]));
                        mix.nchan = chs;
                        uaudio_mixer_add_ctl(sc, &mix);
                } else {
                        /* XXX */
                }
#undef _BIT
                p += chs;
        }

}

Static void
uaudio_add_selector(struct uaudio_softc *sc, const struct io_terminal *iot, int id)
{
        const struct usb_audio_selector_unit *d;
        struct mixerctl mix;
        int i, wp;

        d = iot[id].d.su;
        DPRINTFN(2,"bUnitId=%d bNrInPins=%d\n",
                    d->bUnitId, d->bNrInPins);
        mix.wIndex = MAKE(d->bUnitId, sc->sc_ac_iface);
        mix.wValue[0] = MAKE(0, 0);
        uaudio_determine_class(&iot[id], &mix);
        mix.nchan = 1;
        mix.type = MIX_SELECTOR;
        mix.ctlunit = "";
        mix.minval = 1;
        mix.maxval = d->bNrInPins;
        mix.mul = mix.maxval - mix.minval;
        wp = snprintf(mix.ctlname, MAX_AUDIO_DEV_LEN, "sel%d-", d->bUnitId);
        for (i = 1; i <= d->bNrInPins; i++) {
                wp += snprintf(mix.ctlname + wp, MAX_AUDIO_DEV_LEN - wp,
                               "i%d", d->baSourceId[i - 1]);
                if (wp > MAX_AUDIO_DEV_LEN - 1)
                        break;
        }
        uaudio_mixer_add_ctl(sc, &mix);
}

#ifdef UAUDIO_DEBUG
Static const char *
uaudio_get_terminal_name(int terminal_type)
{
        static char tbuf[100];

        switch (terminal_type) {
        /* USB terminal types */
        case UAT_UNDEFINED:        return "UAT_UNDEFINED";
        case UAT_STREAM:        return "UAT_STREAM";
        case UAT_VENDOR:        return "UAT_VENDOR";
        /* input terminal types */
        case UATI_UNDEFINED:        return "UATI_UNDEFINED";
        case UATI_MICROPHONE:        return "UATI_MICROPHONE";
        case UATI_DESKMICROPHONE:        return "UATI_DESKMICROPHONE";
        case UATI_PERSONALMICROPHONE:        return "UATI_PERSONALMICROPHONE";
        case UATI_OMNIMICROPHONE:        return "UATI_OMNIMICROPHONE";
        case UATI_MICROPHONEARRAY:        return "UATI_MICROPHONEARRAY";
        case UATI_PROCMICROPHONEARR:        return "UATI_PROCMICROPHONEARR";
        /* output terminal types */
        case UATO_UNDEFINED:        return "UATO_UNDEFINED";
        case UATO_SPEAKER:        return "UATO_SPEAKER";
        case UATO_HEADPHONES:        return "UATO_HEADPHONES";
        case UATO_DISPLAYAUDIO:        return "UATO_DISPLAYAUDIO";
        case UATO_DESKTOPSPEAKER:        return "UATO_DESKTOPSPEAKER";
        case UATO_ROOMSPEAKER:        return "UATO_ROOMSPEAKER";
        case UATO_COMMSPEAKER:        return "UATO_COMMSPEAKER";
        case UATO_SUBWOOFER:        return "UATO_SUBWOOFER";
        /* bidir terminal types */
        case UATB_UNDEFINED:        return "UATB_UNDEFINED";
        case UATB_HANDSET:        return "UATB_HANDSET";
        case UATB_HEADSET:        return "UATB_HEADSET";
        case UATB_SPEAKERPHONE:        return "UATB_SPEAKERPHONE";
        case UATB_SPEAKERPHONEESUP:        return "UATB_SPEAKERPHONEESUP";
        case UATB_SPEAKERPHONEECANC:        return "UATB_SPEAKERPHONEECANC";
        /* telephony terminal types */
        case UATT_UNDEFINED:        return "UATT_UNDEFINED";
        case UATT_PHONELINE:        return "UATT_PHONELINE";
        case UATT_TELEPHONE:        return "UATT_TELEPHONE";
        case UATT_DOWNLINEPHONE:        return "UATT_DOWNLINEPHONE";
        /* external terminal types */
        case UATE_UNDEFINED:        return "UATE_UNDEFINED";
        case UATE_ANALOGCONN:        return "UATE_ANALOGCONN";
        case UATE_LINECONN:        return "UATE_LINECONN";
        case UATE_LEGACYCONN:        return "UATE_LEGACYCONN";
        case UATE_DIGITALAUIFC:        return "UATE_DIGITALAUIFC";
        case UATE_SPDIF:        return "UATE_SPDIF";
        case UATE_1394DA:        return "UATE_1394DA";
        case UATE_1394DV:        return "UATE_1394DV";
        /* embedded function terminal types */
        case UATF_UNDEFINED:        return "UATF_UNDEFINED";
        case UATF_CALIBNOISE:        return "UATF_CALIBNOISE";
        case UATF_EQUNOISE:        return "UATF_EQUNOISE";
        case UATF_CDPLAYER:        return "UATF_CDPLAYER";
        case UATF_DAT:        return "UATF_DAT";
        case UATF_DCC:        return "UATF_DCC";
        case UATF_MINIDISK:        return "UATF_MINIDISK";
        case UATF_ANALOGTAPE:        return "UATF_ANALOGTAPE";
        case UATF_PHONOGRAPH:        return "UATF_PHONOGRAPH";
        case UATF_VCRAUDIO:        return "UATF_VCRAUDIO";
        case UATF_VIDEODISCAUDIO:        return "UATF_VIDEODISCAUDIO";
        case UATF_DVDAUDIO:        return "UATF_DVDAUDIO";
        case UATF_TVTUNERAUDIO:        return "UATF_TVTUNERAUDIO";
        case UATF_SATELLITE:        return "UATF_SATELLITE";
        case UATF_CABLETUNER:        return "UATF_CABLETUNER";
        case UATF_DSS:        return "UATF_DSS";
        case UATF_RADIORECV:        return "UATF_RADIORECV";
        case UATF_RADIOXMIT:        return "UATF_RADIOXMIT";
        case UATF_MULTITRACK:        return "UATF_MULTITRACK";
        case UATF_SYNTHESIZER:        return "UATF_SYNTHESIZER";
        default:
                snprintf(tbuf, sizeof(tbuf), "unknown type (%#.4x)", terminal_type);
                return tbuf;
        }
}
#endif

Static int
uaudio_determine_class(const struct io_terminal *iot, struct mixerctl *mix)
{
        int terminal_type;

        if (iot == NULL || iot->output == NULL) {
                mix->class = UAC_OUTPUT;
                return 0;
        }
        terminal_type = 0;
        if (iot->output->size == 1)
                terminal_type = iot->output->terminals[0];
        /*
         * If the only output terminal is USB,
         * the class is UAC_RECORD.
         */
        if ((terminal_type & 0xff00) == (UAT_UNDEFINED & 0xff00)) {
                mix->class = UAC_RECORD;
                if (iot->inputs_size == 1
                    && iot->inputs[0] != NULL
                    && iot->inputs[0]->size == 1)
                        return iot->inputs[0]->terminals[0];
                else
                        return 0;
        }
        /*
         * If the ultimate destination of the unit is just one output
         * terminal and the unit is connected to the output terminal
         * directly, the class is UAC_OUTPUT.
         */
        if (terminal_type != 0 && iot->direct) {
                mix->class = UAC_OUTPUT;
                return terminal_type;
        }
        /*
         * If the unit is connected to just one input terminal,
         * the class is UAC_INPUT.
         */
        if (iot->inputs_size == 1 && iot->inputs[0] != NULL
            && iot->inputs[0]->size == 1) {
                mix->class = UAC_INPUT;
                return iot->inputs[0]->terminals[0];
        }
        /*
         * Otherwise, the class is UAC_OUTPUT.
         */
        mix->class = UAC_OUTPUT;
        return terminal_type;
}

Static const char *
uaudio_feature_name(const struct io_terminal *iot, struct mixerctl *mix)
{
        int terminal_type;

        terminal_type = uaudio_determine_class(iot, mix);
        if (mix->class == UAC_RECORD && terminal_type == 0)
                return AudioNmixerout;
        DPRINTF("terminal_type=%s\n", uaudio_get_terminal_name(terminal_type));
        switch (terminal_type) {
        case UAT_STREAM:
                return AudioNdac;

        case UATI_MICROPHONE:
        case UATI_DESKMICROPHONE:
        case UATI_PERSONALMICROPHONE:
        case UATI_OMNIMICROPHONE:
        case UATI_MICROPHONEARRAY:
        case UATI_PROCMICROPHONEARR:
                return AudioNmicrophone;

        case UATO_SPEAKER:
        case UATO_DESKTOPSPEAKER:
        case UATO_ROOMSPEAKER:
        case UATO_COMMSPEAKER:
                return AudioNspeaker;

        case UATO_HEADPHONES:
                return AudioNheadphone;

        case UATO_SUBWOOFER:
                return AudioNlfe;

        /* telephony terminal types */
        case UATT_UNDEFINED:
        case UATT_PHONELINE:
        case UATT_TELEPHONE:
        case UATT_DOWNLINEPHONE:
                return "phone";

        case UATE_ANALOGCONN:
        case UATE_LINECONN:
        case UATE_LEGACYCONN:
                return AudioNline;

        case UATE_DIGITALAUIFC:
        case UATE_SPDIF:
        case UATE_1394DA:
        case UATE_1394DV:
                return AudioNaux;

        case UATF_CDPLAYER:
                return AudioNcd;

        case UATF_SYNTHESIZER:
                return AudioNfmsynth;

        case UATF_VIDEODISCAUDIO:
        case UATF_DVDAUDIO:
        case UATF_TVTUNERAUDIO:
                return AudioNvideo;

        case UAT_UNDEFINED:
        case UAT_VENDOR:
        case UATI_UNDEFINED:
/* output terminal types */
        case UATO_UNDEFINED:
        case UATO_DISPLAYAUDIO:
/* bidir terminal types */
        case UATB_UNDEFINED:
        case UATB_HANDSET:
        case UATB_HEADSET:
        case UATB_SPEAKERPHONE:
        case UATB_SPEAKERPHONEESUP:
        case UATB_SPEAKERPHONEECANC:
/* external terminal types */
        case UATE_UNDEFINED:
/* embedded function terminal types */
        case UATF_UNDEFINED:
        case UATF_CALIBNOISE:
        case UATF_EQUNOISE:
        case UATF_DAT:
        case UATF_DCC:
        case UATF_MINIDISK:
        case UATF_ANALOGTAPE:
        case UATF_PHONOGRAPH:
        case UATF_VCRAUDIO:
        case UATF_SATELLITE:
        case UATF_CABLETUNER:
        case UATF_DSS:
        case UATF_RADIORECV:
        case UATF_RADIOXMIT:
        case UATF_MULTITRACK:
        case 0xffff:
        default:
                DPRINTF("'master' for %#.4x\n", terminal_type);
                return AudioNmaster;
        }
        return AudioNmaster;
}

Static void
uaudio_add_feature(struct uaudio_softc *sc, const struct io_terminal *iot, int id)
{
        const struct usb_audio_feature_unit *d;
        const uByte *ctls;
        int ctlsize;
        int nchan;
        u_int fumask, mmask, cmask;
        struct mixerctl mix;
        int chan, ctl, i, unit;
        const char *mixername;

#define GET(i) (ctls[(i)*ctlsize] | \
                (ctlsize > 1 ? ctls[(i)*ctlsize+1] << 8 : 0))
        d = iot[id].d.fu;
        ctls = d->bmaControls;
        ctlsize = d->bControlSize;
        if (ctlsize == 0) {
                DPRINTF("ignoring feature %d with controlSize of zero\n", id);
                return;
        }
        nchan = (d->bLength - 7) / ctlsize;
        mmask = GET(0);
        /* Figure out what we can control */
        for (cmask = 0, chan = 1; chan < nchan; chan++) {
                DPRINTFN(9,"chan=%d mask=%x\n",
                            chan, GET(chan));
                cmask |= GET(chan);
        }

        DPRINTFN(1,"bUnitId=%d, "
                    "%d channels, mmask=0x%04x, cmask=0x%04x\n",
                    d->bUnitId, nchan, mmask, cmask);

        if (nchan > MIX_MAX_CHAN)
                nchan = MIX_MAX_CHAN;
        unit = d->bUnitId;
        mix.wIndex = MAKE(unit, sc->sc_ac_iface);
        for (ctl = MUTE_CONTROL; ctl < LOUDNESS_CONTROL; ctl++) {
                fumask = FU_MASK(ctl);
                DPRINTFN(4,"ctl=%d fumask=0x%04x\n",
                            ctl, fumask);
                if (mmask & fumask) {
                        mix.nchan = 1;
                        mix.wValue[0] = MAKE(ctl, 0);
                } else if (cmask & fumask) {
                        mix.nchan = nchan - 1;
                        for (i = 1; i < nchan; i++) {
                                if (GET(i) & fumask)
                                        mix.wValue[i-1] = MAKE(ctl, i);
                                else
                                        mix.wValue[i-1] = -1;
                        }
                } else {
                        continue;
                }
#undef GET
                mixername = uaudio_feature_name(&iot[id], &mix);
                switch (ctl) {
                case MUTE_CONTROL:
                        mix.type = MIX_ON_OFF;
                        mix.ctlunit = "";
                        snprintf(mix.ctlname, sizeof(mix.ctlname),
                                 "%s.%s", mixername, AudioNmute);
                        break;
                case VOLUME_CONTROL:
                        mix.type = MIX_SIGNED_16;
                        mix.ctlunit = AudioNvolume;
                        strlcpy(mix.ctlname, mixername, sizeof(mix.ctlname));
                        break;
                case BASS_CONTROL:
                        mix.type = MIX_SIGNED_8;
                        mix.ctlunit = AudioNbass;
                        snprintf(mix.ctlname, sizeof(mix.ctlname),
                                 "%s.%s", mixername, AudioNbass);
                        break;
                case MID_CONTROL:
                        mix.type = MIX_SIGNED_8;
                        mix.ctlunit = AudioNmid;
                        snprintf(mix.ctlname, sizeof(mix.ctlname),
                                 "%s.%s", mixername, AudioNmid);
                        break;
                case TREBLE_CONTROL:
                        mix.type = MIX_SIGNED_8;
                        mix.ctlunit = AudioNtreble;
                        snprintf(mix.ctlname, sizeof(mix.ctlname),
                                 "%s.%s", mixername, AudioNtreble);
                        break;
                case GRAPHIC_EQUALIZER_CONTROL:
                        continue; /* XXX don't add anything */
                        break;
                case AGC_CONTROL:
                        mix.type = MIX_ON_OFF;
                        mix.ctlunit = "";
                        snprintf(mix.ctlname, sizeof(mix.ctlname), "%s.%s",
                                 mixername, AudioNagc);
                        break;
                case DELAY_CONTROL:
                        mix.type = MIX_UNSIGNED_16;
                        mix.ctlunit = "4 ms";
                        snprintf(mix.ctlname, sizeof(mix.ctlname),
                                 "%s.%s", mixername, AudioNdelay);
                        break;
                case BASS_BOOST_CONTROL:
                        mix.type = MIX_ON_OFF;
                        mix.ctlunit = "";
                        snprintf(mix.ctlname, sizeof(mix.ctlname),
                                 "%s.%s", mixername, AudioNbassboost);
                        break;
                case LOUDNESS_CONTROL:
                        mix.type = MIX_ON_OFF;
                        mix.ctlunit = "";
                        snprintf(mix.ctlname, sizeof(mix.ctlname),
                                 "%s.%s", mixername, AudioNloudness);
                        break;
                }
                uaudio_mixer_add_ctl(sc, &mix);
        }
}

Static void
uaudio_add_processing_updown(struct uaudio_softc *sc,
                             const struct io_terminal *iot, int id)
{
        const struct usb_audio_processing_unit *d;
        const struct usb_audio_processing_unit_1 *d1;
        const struct usb_audio_processing_unit_updown *ud;
        struct mixerctl mix;
        int i;

        d = iot[id].d.pu;
        d1 = (const struct usb_audio_processing_unit_1 *)
            &d->baSourceId[d->bNrInPins];
        ud = (const struct usb_audio_processing_unit_updown *)
            &d1->bmControls[d1->bControlSize];
        DPRINTFN(2,"bUnitId=%d bNrModes=%d\n",
                    d->bUnitId, ud->bNrModes);

        if (!(d1->bmControls[0] & UA_PROC_MASK(UD_MODE_SELECT_CONTROL))) {
                DPRINTF("%s", "no mode select\n");
                return;
        }

        mix.wIndex = MAKE(d->bUnitId, sc->sc_ac_iface);
        mix.nchan = 1;
        mix.wValue[0] = MAKE(UD_MODE_SELECT_CONTROL, 0);
        uaudio_determine_class(&iot[id], &mix);
        mix.type = MIX_ON_OFF;        /* XXX */
        mix.ctlunit = "";
        snprintf(mix.ctlname, sizeof(mix.ctlname), "pro%d-mode", d->bUnitId);

        for (i = 0; i < ud->bNrModes; i++) {
                DPRINTFN(2,"i=%d bm=%#x\n",
                            i, UGETW(ud->waModes[i]));
                /* XXX */
        }
        uaudio_mixer_add_ctl(sc, &mix);
}

Static void
uaudio_add_processing(struct uaudio_softc *sc, const struct io_terminal *iot, int id)
{
        const struct usb_audio_processing_unit *d;
        const struct usb_audio_processing_unit_1 *d1;
        int ptype;
        struct mixerctl mix;

        d = iot[id].d.pu;
        d1 = (const struct usb_audio_processing_unit_1 *)
            &d->baSourceId[d->bNrInPins];
        ptype = UGETW(d->wProcessType);
        DPRINTFN(2,"wProcessType=%d bUnitId=%d "
                    "bNrInPins=%d\n", ptype, d->bUnitId, d->bNrInPins);

        if (d1->bmControls[0] & UA_PROC_ENABLE_MASK) {
                mix.wIndex = MAKE(d->bUnitId, sc->sc_ac_iface);
                mix.nchan = 1;
                mix.wValue[0] = MAKE(XX_ENABLE_CONTROL, 0);
                uaudio_determine_class(&iot[id], &mix);
                mix.type = MIX_ON_OFF;
                mix.ctlunit = "";
                snprintf(mix.ctlname, sizeof(mix.ctlname), "pro%d.%d-enable",
                    d->bUnitId, ptype);
                uaudio_mixer_add_ctl(sc, &mix);
        }

        switch(ptype) {
        case UPDOWNMIX_PROCESS:
                uaudio_add_processing_updown(sc, iot, id);
                break;
        case DOLBY_PROLOGIC_PROCESS:
        case P3D_STEREO_EXTENDER_PROCESS:
        case REVERBATION_PROCESS:
        case CHORUS_PROCESS:
        case DYN_RANGE_COMP_PROCESS:
        default:
#ifdef UAUDIO_DEBUG
                aprint_debug(
                    "uaudio_add_processing: unit %d, type=%d not impl.\n",
                    d->bUnitId, ptype);
#endif
                break;
        }
}

Static void
uaudio_add_extension(struct uaudio_softc *sc, const struct io_terminal *iot, int id)
{
        const struct usb_audio_extension_unit *d;
        const struct usb_audio_extension_unit_1 *d1;
        struct mixerctl mix;

        d = iot[id].d.eu;
        d1 = (const struct usb_audio_extension_unit_1 *)
            &d->baSourceId[d->bNrInPins];
        DPRINTFN(2,"bUnitId=%d bNrInPins=%d\n",
                    d->bUnitId, d->bNrInPins);

        if (usbd_get_quirks(sc->sc_udev)->uq_flags & UQ_AU_NO_XU)
                return;

        if (d1->bmControls[0] & UA_EXT_ENABLE_MASK) {
                mix.wIndex = MAKE(d->bUnitId, sc->sc_ac_iface);
                mix.nchan = 1;
                mix.wValue[0] = MAKE(UA_EXT_ENABLE, 0);
                uaudio_determine_class(&iot[id], &mix);
                mix.type = MIX_ON_OFF;
                mix.ctlunit = "";
                snprintf(mix.ctlname, sizeof(mix.ctlname), "ext%d-enable",
                    d->bUnitId);
                uaudio_mixer_add_ctl(sc, &mix);
        }
}

Static struct terminal_list*
uaudio_merge_terminal_list(const struct io_terminal *iot)
{
        struct terminal_list *tml;
        uint16_t *ptm;
        int i, len;

        len = 0;
        if (iot->inputs == NULL)
                return NULL;
        for (i = 0; i < iot->inputs_size; i++) {
                if (iot->inputs[i] != NULL)
                        len += iot->inputs[i]->size;
        }
        tml = malloc(TERMINAL_LIST_SIZE(len), M_TEMP, M_NOWAIT);
        if (tml == NULL) {
                aprint_error("uaudio_merge_terminal_list: no memory\n");
                return NULL;
        }
        tml->size = 0;
        ptm = tml->terminals;
        for (i = 0; i < iot->inputs_size; i++) {
                if (iot->inputs[i] == NULL)
                        continue;
                if (iot->inputs[i]->size > len)
                        break;
                memcpy(ptm, iot->inputs[i]->terminals,
                       iot->inputs[i]->size * sizeof(uint16_t));
                tml->size += iot->inputs[i]->size;
                ptm += iot->inputs[i]->size;
                len -= iot->inputs[i]->size;
        }
        return tml;
}

Static struct terminal_list *
uaudio_io_terminaltype(int outtype, struct io_terminal *iot, int id)
{
        struct terminal_list *tml;
        struct io_terminal *it;
        int src_id, i;

        it = &iot[id];
        if (it->output != NULL) {
                /* already has outtype? */
                for (i = 0; i < it->output->size; i++)
                        if (it->output->terminals[i] == outtype)
                                return uaudio_merge_terminal_list(it);
                tml = malloc(TERMINAL_LIST_SIZE(it->output->size + 1),
                             M_TEMP, M_NOWAIT);
                if (tml == NULL) {
                        aprint_error("uaudio_io_terminaltype: no memory\n");
                        return uaudio_merge_terminal_list(it);
                }
                memcpy(tml, it->output, TERMINAL_LIST_SIZE(it->output->size));
                tml->terminals[it->output->size] = outtype;
                tml->size++;
                free(it->output, M_TEMP);
                it->output = tml;
                if (it->inputs != NULL) {
                        for (i = 0; i < it->inputs_size; i++)
                                if (it->inputs[i] != NULL)
                                        free(it->inputs[i], M_TEMP);
                        free(it->inputs, M_TEMP);
                }
                it->inputs_size = 0;
                it->inputs = NULL;
        } else {                /* end `iot[id] != NULL' */
                it->inputs_size = 0;
                it->inputs = NULL;
                it->output = malloc(TERMINAL_LIST_SIZE(1), M_TEMP, M_NOWAIT);
                if (it->output == NULL) {
                        aprint_error("uaudio_io_terminaltype: no memory\n");
                        return NULL;
                }
                it->output->terminals[0] = outtype;
                it->output->size = 1;
                it->direct = FALSE;
        }

        switch (it->d.desc->bDescriptorSubtype) {
        case UDESCSUB_AC_INPUT:
                it->inputs = malloc(sizeof(struct terminal_list *), M_TEMP, M_NOWAIT);
                if (it->inputs == NULL) {
                        aprint_error("uaudio_io_terminaltype: no memory\n");
                        return NULL;
                }
                tml = malloc(TERMINAL_LIST_SIZE(1), M_TEMP, M_NOWAIT);
                if (tml == NULL) {
                        aprint_error("uaudio_io_terminaltype: no memory\n");
                        free(it->inputs, M_TEMP);
                        it->inputs = NULL;
                        return NULL;
                }
                it->inputs[0] = tml;
                tml->terminals[0] = UGETW(it->d.it->wTerminalType);
                tml->size = 1;
                it->inputs_size = 1;
                return uaudio_merge_terminal_list(it);
        case UDESCSUB_AC_FEATURE:
                src_id = it->d.fu->bSourceId;
                it->inputs = malloc(sizeof(struct terminal_list *), M_TEMP, M_NOWAIT);
                if (it->inputs == NULL) {
                        aprint_error("uaudio_io_terminaltype: no memory\n");
                        return uaudio_io_terminaltype(outtype, iot, src_id);
                }
                it->inputs[0] = uaudio_io_terminaltype(outtype, iot, src_id);
                it->inputs_size = 1;
                return uaudio_merge_terminal_list(it);
        case UDESCSUB_AC_OUTPUT:
                it->inputs = malloc(sizeof(struct terminal_list *), M_TEMP, M_NOWAIT);
                if (it->inputs == NULL) {
                        aprint_error("uaudio_io_terminaltype: no memory\n");
                        return NULL;
                }
                src_id = it->d.ot->bSourceId;
                it->inputs[0] = uaudio_io_terminaltype(outtype, iot, src_id);
                it->inputs_size = 1;
                iot[src_id].direct = TRUE;
                return NULL;
        case UDESCSUB_AC_MIXER:
                it->inputs_size = 0;
                it->inputs = malloc(sizeof(struct terminal_list *)
                                    * it->d.mu->bNrInPins, M_TEMP, M_NOWAIT);
                if (it->inputs == NULL) {
                        aprint_error("uaudio_io_terminaltype: no memory\n");
                        return NULL;
                }
                for (i = 0; i < it->d.mu->bNrInPins; i++) {
                        src_id = it->d.mu->baSourceId[i];
                        it->inputs[i] = uaudio_io_terminaltype(outtype, iot,
                                                               src_id);
                        it->inputs_size++;
                }
                return uaudio_merge_terminal_list(it);
        case UDESCSUB_AC_SELECTOR:
                it->inputs_size = 0;
                it->inputs = malloc(sizeof(struct terminal_list *)
                                    * it->d.su->bNrInPins, M_TEMP, M_NOWAIT);
                if (it->inputs == NULL) {
                        aprint_error("uaudio_io_terminaltype: no memory\n");
                        return NULL;
                }
                for (i = 0; i < it->d.su->bNrInPins; i++) {
                        src_id = it->d.su->baSourceId[i];
                        it->inputs[i] = uaudio_io_terminaltype(outtype, iot,
                                                               src_id);
                        it->inputs_size++;
                }
                return uaudio_merge_terminal_list(it);
        case UDESCSUB_AC_PROCESSING:
                it->inputs_size = 0;
                it->inputs = malloc(sizeof(struct terminal_list *)
                                    * it->d.pu->bNrInPins, M_TEMP, M_NOWAIT);
                if (it->inputs == NULL) {
                        aprint_error("uaudio_io_terminaltype: no memory\n");
                        return NULL;
                }
                for (i = 0; i < it->d.pu->bNrInPins; i++) {
                        src_id = it->d.pu->baSourceId[i];
                        it->inputs[i] = uaudio_io_terminaltype(outtype, iot,
                                                               src_id);
                        it->inputs_size++;
                }
                return uaudio_merge_terminal_list(it);
        case UDESCSUB_AC_EXTENSION:
                it->inputs_size = 0;
                it->inputs = malloc(sizeof(struct terminal_list *)
                                    * it->d.eu->bNrInPins, M_TEMP, M_NOWAIT);
                if (it->inputs == NULL) {
                        aprint_error("uaudio_io_terminaltype: no memory\n");
                        return NULL;
                }
                for (i = 0; i < it->d.eu->bNrInPins; i++) {
                        src_id = it->d.eu->baSourceId[i];
                        it->inputs[i] = uaudio_io_terminaltype(outtype, iot,
                                                               src_id);
                        it->inputs_size++;
                }
                return uaudio_merge_terminal_list(it);
        case UDESCSUB_AC_HEADER:
        default:
                return NULL;
        }
}

Static usbd_status
uaudio_identify(struct uaudio_softc *sc, const usb_config_descriptor_t *cdesc)
{
        usbd_status err;

        err = uaudio_identify_ac(sc, cdesc);
        if (err)
                return err;
        return uaudio_identify_as(sc, cdesc);
}

Static void
uaudio_add_alt(struct uaudio_softc *sc, const struct as_info *ai)
{
        size_t len;
        struct as_info *nai;

        len = sizeof(*ai) * (sc->sc_nalts + 1);
        nai = kmem_alloc(len, KM_SLEEP);
        /* Copy old data, if there was any */
        if (sc->sc_nalts != 0) {
                memcpy(nai, sc->sc_alts, sizeof(*ai) * (sc->sc_nalts));
                kmem_free(sc->sc_alts, sizeof(*ai) * sc->sc_nalts);
        }
        sc->sc_alts = nai;
        DPRINTFN(2,"adding alt=%d, enc=%d\n",
                    ai->alt, ai->encoding);
        sc->sc_alts[sc->sc_nalts++] = *ai;
}

Static usbd_status
uaudio_process_as(struct uaudio_softc *sc, const char *tbuf, int *offsp,
                  int size, const usb_interface_descriptor_t *id)
{
        const struct usb_audio_streaming_interface_descriptor *asid;
        const struct usb_audio_streaming_type1_descriptor *asf1d;
        const usb_endpoint_descriptor_audio_t *ed;
        const usb_endpoint_descriptor_audio_t *epdesc1;
        const struct usb_audio_streaming_endpoint_descriptor *sed;
        int format, chan __unused, prec, enc;
        int dir, type, sync, epcount;
        struct as_info ai;
        const char *format_str __unused;
        const uaudio_cs_descriptor_t *desc;

        DPRINTF("offset = %d < %d\n", *offsp, size);

        epcount = 0;
        asid = NULL;
        asf1d = NULL;
        ed = NULL;
        epdesc1 = NULL;
        sed = NULL;

        while (*offsp < size) {
                desc = (const uaudio_cs_descriptor_t *)(tbuf + *offsp);
                if (*offsp + desc->bLength > size)
                        return USBD_INVAL;

                switch (desc->bDescriptorType) {
                case UDESC_CS_INTERFACE:
                        switch (desc->bDescriptorSubtype) {
                        case AS_GENERAL:
                                if (asid != NULL)
                                        goto ignore;
                                asid = (const struct usb_audio_streaming_interface_descriptor *) desc;
                                DPRINTF("asid: bTerminalLink=%d wFormatTag=%d bLength=%d\n",
                                         asid->bTerminalLink, UGETW(asid->wFormatTag), asid->bLength);
                                break;
                        case FORMAT_TYPE:
                                if (asf1d != NULL)
                                        goto ignore;
                                asf1d = (const struct usb_audio_streaming_type1_descriptor *) desc;
                                DPRINTF("asf1d: bDescriptorType=%d bDescriptorSubtype=%d\n",
                                         asf1d->bDescriptorType, asf1d->bDescriptorSubtype);
                                if (asf1d->bFormatType != FORMAT_TYPE_I) {
                                        aprint_normal_dev(sc->sc_dev,
                                            "ignored setting with type %d format\n", asf1d->bFormatType);
                                        return USBD_NORMAL_COMPLETION;
                                }
                                break;
                        default:
                                goto ignore;
                        }
                        break;
                case UDESC_ENDPOINT:
                        epcount++;
                        if (epcount > id->bNumEndpoints)
                                goto ignore;
                        switch (epcount) {
                        case 1:
                                ed = (const usb_endpoint_descriptor_audio_t *) desc;
                                DPRINTF("endpoint[0] bLength=%d bDescriptorType=%d "
                                         "bEndpointAddress=%d bmAttributes=%#x wMaxPacketSize=%d "
                                         "bInterval=%d bRefresh=%d bSynchAddress=%d\n",
                                         ed->bLength, ed->bDescriptorType, ed->bEndpointAddress,
                                         ed->bmAttributes, UGETW(ed->wMaxPacketSize),
                                         ed->bInterval, ed->bRefresh, ed->bSynchAddress);
                                if (UE_GET_XFERTYPE(ed->bmAttributes) != UE_ISOCHRONOUS)
                                        return USBD_INVAL;
                                break;
                        case 2:
                                epdesc1 = (const usb_endpoint_descriptor_audio_t *) desc;
                                DPRINTF("endpoint[1] bLength=%d "
                                         "bDescriptorType=%d bEndpointAddress=%d "
                                         "bmAttributes=%#x wMaxPacketSize=%d bInterval=%d "
                                         "bRefresh=%d bSynchAddress=%d\n",
                                         epdesc1->bLength, epdesc1->bDescriptorType,
                                         epdesc1->bEndpointAddress, epdesc1->bmAttributes,
                                         UGETW(epdesc1->wMaxPacketSize), epdesc1->bInterval,
                                         epdesc1->bRefresh, epdesc1->bSynchAddress);
                                if (epdesc1->bSynchAddress != 0) {
                                        aprint_error_dev(sc->sc_dev,
                                            "invalid endpoint: bSynchAddress=0\n");
                                        return USBD_INVAL;
                                }
                                if (UE_GET_XFERTYPE(epdesc1->bmAttributes) != UE_ISOCHRONOUS) {
                                        aprint_error_dev(sc->sc_dev,
                                            "invalid endpoint: bmAttributes=%#x\n",
                                             epdesc1->bmAttributes);
                                        return USBD_INVAL;
                                }
                                if (epdesc1->bEndpointAddress != ed->bSynchAddress) {
                                        aprint_error_dev(sc->sc_dev,
                                            "invalid endpoint addresses: "
                                            "ep[0]->bSynchAddress=%#x "
                                            "ep[1]->bEndpointAddress=%#x\n",
                                            ed->bSynchAddress, epdesc1->bEndpointAddress);
                                        return USBD_INVAL;
                                }
                                /* UE_GET_ADDR(epdesc1->bEndpointAddress), and epdesc1->bRefresh */
                                break;
                        default:
                                goto ignore;
                        }
                        break;
                case UDESC_CS_ENDPOINT:
                        switch (desc->bDescriptorSubtype) {
                        case AS_GENERAL:
                                if (sed != NULL)
                                        goto ignore;
                                sed = (const struct usb_audio_streaming_endpoint_descriptor *) desc;
                                DPRINTF(" streadming_endpoint: offset=%d bLength=%d\n", *offsp, sed->bLength);
                                break;
                        default:
                                goto ignore;
                        }
                        break;
                case UDESC_INTERFACE:
                case UDESC_DEVICE:
                        goto leave;
                default:
ignore:
                        aprint_normal_dev(sc->sc_dev,
                            "ignored descriptor type %d subtype %d\n",
                            desc->bDescriptorType, desc->bDescriptorSubtype);
                        break;
                }

                *offsp += desc->bLength;
        }
leave:

        if (asid == NULL) {
                DPRINTF("%s", "No streaming interface descriptor found\n");
                return USBD_INVAL;
        }
        if (asf1d == NULL) {
                DPRINTF("%s", "No format type descriptor found\n");
                return USBD_INVAL;
        }
        if (ed == NULL) {
                DPRINTF("%s", "No endpoint descriptor found\n");
                return USBD_INVAL;
        }
        if (sed == NULL) {
                DPRINTF("%s", "No streaming endpoint descriptor found\n");
                return USBD_INVAL;
        }

        dir = UE_GET_DIR(ed->bEndpointAddress);
        type = UE_GET_ISO_TYPE(ed->bmAttributes);
        if ((usbd_get_quirks(sc->sc_udev)->uq_flags & UQ_AU_INP_ASYNC) &&
            dir == UE_DIR_IN && type == UE_ISO_ADAPT)
                type = UE_ISO_ASYNC;
        /* We can't handle endpoints that need a sync pipe yet. */
        sync = FALSE;
        if (dir == UE_DIR_IN && type == UE_ISO_ADAPT) {
                sync = TRUE;
#ifndef UAUDIO_MULTIPLE_ENDPOINTS
                aprint_normal_dev(sc->sc_dev,
                    "ignored input endpoint of type adaptive\n");
                return USBD_NORMAL_COMPLETION;
#endif
        }
        if (dir != UE_DIR_IN && type == UE_ISO_ASYNC) {
                sync = TRUE;
#ifndef UAUDIO_MULTIPLE_ENDPOINTS
                aprint_normal_dev(sc->sc_dev,
                    "ignored output endpoint of type async\n");
                return USBD_NORMAL_COMPLETION;
#endif
        }
#ifdef UAUDIO_MULTIPLE_ENDPOINTS
        if (sync && id->bNumEndpoints <= 1) {
                aprint_error_dev(sc->sc_dev,
                    "a sync-pipe endpoint but no other endpoint\n");
                return USBD_INVAL;
        }
#endif
        if (!sync && id->bNumEndpoints > 1) {
                aprint_error_dev(sc->sc_dev,
                    "non sync-pipe endpoint but multiple endpoints\n");
                return USBD_INVAL;
        }

        format = UGETW(asid->wFormatTag);
        chan = asf1d->bNrChannels;
        prec = asf1d->bBitResolution;
        if (prec != 8 && prec != 16 && prec != 24) {
                aprint_normal_dev(sc->sc_dev,
                    "ignored setting with precision %d\n", prec);
                return USBD_NORMAL_COMPLETION;
        }
        switch (format) {
        case UA_FMT_PCM:
                if (prec == 8) {
                        sc->sc_altflags |= HAS_8;
                } else if (prec == 16) {
                        sc->sc_altflags |= HAS_16;
                } else if (prec == 24) {
                        sc->sc_altflags |= HAS_24;
                }
                enc = AUDIO_ENCODING_SLINEAR_LE;
                format_str = "pcm";
                break;
        case UA_FMT_PCM8:
                enc = AUDIO_ENCODING_ULINEAR_LE;
                sc->sc_altflags |= HAS_8U;
                format_str = "pcm8";
                break;
        case UA_FMT_ALAW:
                enc = AUDIO_ENCODING_ALAW;
                sc->sc_altflags |= HAS_ALAW;
                format_str = "alaw";
                break;
        case UA_FMT_MULAW:
                enc = AUDIO_ENCODING_ULAW;
                sc->sc_altflags |= HAS_MULAW;
                format_str = "mulaw";
                break;
        case UA_FMT_IEEE_FLOAT:
        default:
                aprint_normal_dev(sc->sc_dev,
                    "ignored setting with format %d\n", format);
                return USBD_NORMAL_COMPLETION;
        }
#ifdef UAUDIO_DEBUG
        aprint_debug_dev(sc->sc_dev, "%s: %dch, %d/%dbit, %s,",
               dir == UE_DIR_IN ? "recording" : "playback",
               chan, prec, asf1d->bSubFrameSize * 8, format_str);
        if (asf1d->bSamFreqType == UA_SAMP_CONTNUOUS) {
                aprint_debug(" %d-%dHz\n", UA_SAMP_LO(asf1d),
                    UA_SAMP_HI(asf1d));
        } else {
                int r;
                aprint_debug(" %d", UA_GETSAMP(asf1d, 0));
                for (r = 1; r < asf1d->bSamFreqType; r++)
                        aprint_debug(",%d", UA_GETSAMP(asf1d, r));
                aprint_debug("Hz\n");
        }
#endif
        ai.alt = id->bAlternateSetting;
        ai.encoding = enc;
        ai.attributes = sed->bmAttributes;
        ai.idesc = id;
        ai.edesc = ed;
        ai.edesc1 = epdesc1;
        ai.asf1desc = asf1d;
        ai.sc_busy = 0;
        ai.aformat = NULL;
        ai.ifaceh = NULL;
        uaudio_add_alt(sc, &ai);
#ifdef UAUDIO_DEBUG
        if (ai.attributes & UA_SED_FREQ_CONTROL)
                DPRINTFN(1, "%s", "FREQ_CONTROL\n");
        if (ai.attributes & UA_SED_PITCH_CONTROL)
                DPRINTFN(1, "%s", "PITCH_CONTROL\n");
#endif
        sc->sc_mode |= (dir == UE_DIR_OUT) ? AUMODE_PLAY : AUMODE_RECORD;

        return USBD_NORMAL_COMPLETION;
}

Static usbd_status
uaudio_identify_as(struct uaudio_softc *sc,
                   const usb_config_descriptor_t *cdesc)
{
        const usb_interface_descriptor_t *id;
        const char *tbuf;
        struct audio_format *auf;
        const struct usb_audio_streaming_type1_descriptor *t1desc;
        int size, offs;
        int i, j;

        size = UGETW(cdesc->wTotalLength);
        tbuf = (const char *)cdesc;

        /* Locate the AudioStreaming interface descriptor. */
        offs = 0;
        id = uaudio_find_iface(tbuf, size, &offs, UISUBCLASS_AUDIOSTREAM);
        if (id == NULL)
                return USBD_INVAL;

        /* Loop through all the alternate settings. */
        while (offs <= size) {
                DPRINTFN(2, "interface=%d offset=%d\n",
                    id->bInterfaceNumber, offs);
                switch (id->bNumEndpoints) {
                case 0:
                        DPRINTFN(2, "AS null alt=%d\n",
                                     id->bAlternateSetting);
                        sc->sc_nullalt = id->bAlternateSetting;
                        break;
                case 1:
#ifdef UAUDIO_MULTIPLE_ENDPOINTS
                case 2:
#endif
                        uaudio_process_as(sc, tbuf, &offs, size, id);
                        break;
                default:
                        aprint_error_dev(sc->sc_dev,
                            "ignored audio interface with %d endpoints\n",
                             id->bNumEndpoints);
                        break;
                }
                id = uaudio_find_iface(tbuf, size, &offs, UISUBCLASS_AUDIOSTREAM);
                if (id == NULL)
                        break;
        }
        if (offs > size)
                return USBD_INVAL;
        DPRINTF("%d alts available\n", sc->sc_nalts);

        if (sc->sc_mode == 0) {
                aprint_error_dev(sc->sc_dev, "no usable endpoint found\n");
                return USBD_INVAL;
        }

        /* build audio_format array */
        sc->sc_formats = kmem_zalloc(sizeof(struct audio_format) * sc->sc_nalts,
            KM_SLEEP);
        sc->sc_nformats = sc->sc_nalts;
        for (i = 0; i < sc->sc_nalts; i++) {
                auf = &sc->sc_formats[i];
                t1desc = sc->sc_alts[i].asf1desc;
                if (UE_GET_DIR(sc->sc_alts[i].edesc->bEndpointAddress) == UE_DIR_OUT)
                        auf->mode = AUMODE_PLAY;
                else
                        auf->mode = AUMODE_RECORD;
                auf->encoding = sc->sc_alts[i].encoding;
                auf->validbits = t1desc->bBitResolution;
                auf->precision = t1desc->bSubFrameSize * 8;
                auf->channels = t1desc->bNrChannels;
                auf->channel_mask = sc->sc_channel_config;
                auf->frequency_type = t1desc->bSamFreqType;
                if (t1desc->bSamFreqType == UA_SAMP_CONTNUOUS) {
                        auf->frequency[0] = UA_SAMP_LO(t1desc);
                        auf->frequency[1] = UA_SAMP_HI(t1desc);
                } else {
                        for (j = 0; j  < t1desc->bSamFreqType; j++) {
                                if (j >= AUFMT_MAX_FREQUENCIES) {
                                        aprint_error("%s: please increase "
                                               "AUFMT_MAX_FREQUENCIES to %d\n",
                                               __func__, t1desc->bSamFreqType);
                                        auf->frequency_type =
                                            AUFMT_MAX_FREQUENCIES;
                                        break;
                                }
                                auf->frequency[j] = UA_GETSAMP(t1desc, j);
                        }
                }
                sc->sc_alts[i].aformat = auf;
        }

        return USBD_NORMAL_COMPLETION;
}

#ifdef UAUDIO_DEBUG
Static void
uaudio_dump_tml(struct terminal_list *tml) {
        if (tml == NULL) {
                printf("NULL");
        } else {
                int i;
                for (i = 0; i < tml->size; i++)
                        printf("%s ", uaudio_get_terminal_name
                               (tml->terminals[i]));
        }
        printf("\n");
}
#endif

Static usbd_status
uaudio_identify_ac(struct uaudio_softc *sc, const usb_config_descriptor_t *cdesc)
{
        struct io_terminal* iot;
        const usb_interface_descriptor_t *id;
        const struct usb_audio_control_descriptor *acdp;
        const uaudio_cs_descriptor_t *dp;
        const struct usb_audio_output_terminal *pot;
        struct terminal_list *tml;
        const char *tbuf, *ibuf, *ibufend;
        int size, offs, ndps, i, j;

        size = UGETW(cdesc->wTotalLength);
        tbuf = (const char *)cdesc;

        /* Locate the AudioControl interface descriptor. */
        offs = 0;
        id = uaudio_find_iface(tbuf, size, &offs, UISUBCLASS_AUDIOCONTROL);
        if (id == NULL)
                return USBD_INVAL;
        if (offs + sizeof(*acdp) > size)
                return USBD_INVAL;
        sc->sc_ac_iface = id->bInterfaceNumber;
        DPRINTFN(2,"AC interface is %d\n", sc->sc_ac_iface);

        /* A class-specific AC interface header should follow. */
        ibuf = tbuf + offs;
        ibufend = tbuf + size;
        acdp = (const struct usb_audio_control_descriptor *)ibuf;
        if (acdp->bDescriptorType != UDESC_CS_INTERFACE ||
            acdp->bDescriptorSubtype != UDESCSUB_AC_HEADER)
                return USBD_INVAL;

        if (!(usbd_get_quirks(sc->sc_udev)->uq_flags & UQ_BAD_ADC) &&
             UGETW(acdp->bcdADC) != UAUDIO_VERSION)
                return USBD_INVAL;

        sc->sc_audio_rev = UGETW(acdp->bcdADC);
        DPRINTFN(2, "found AC header, vers=%03x\n", sc->sc_audio_rev);

        sc->sc_nullalt = -1;

        /* Scan through all the AC specific descriptors */
        dp = (const uaudio_cs_descriptor_t *)ibuf;
        ndps = 0;
        iot = malloc(sizeof(struct io_terminal) * 256, M_TEMP, M_NOWAIT | M_ZERO);
        if (iot == NULL) {
                aprint_error("%s: no memory\n", __func__);
                return USBD_NOMEM;
        }
        for (;;) {
                ibuf += dp->bLength;
                if (ibuf >= ibufend)
                        break;
                dp = (const uaudio_cs_descriptor_t *)ibuf;
                if (ibuf + dp->bLength > ibufend) {
                        free(iot, M_TEMP);
                        return USBD_INVAL;
                }
                if (dp->bDescriptorType != UDESC_CS_INTERFACE)
                        break;
                i = ((const struct usb_audio_input_terminal *)dp)->bTerminalId;
                iot[i].d.desc = dp;
                if (i > ndps)
                        ndps = i;
        }
        ndps++;

        /* construct io_terminal */
        for (i = 0; i < ndps; i++) {
                dp = iot[i].d.desc;
                if (dp == NULL)
                        continue;
                if (dp->bDescriptorSubtype != UDESCSUB_AC_OUTPUT)
                        continue;
                pot = iot[i].d.ot;
                tml = uaudio_io_terminaltype(UGETW(pot->wTerminalType), iot, i);
                if (tml != NULL)
                        free(tml, M_TEMP);
        }

#ifdef UAUDIO_DEBUG
        for (i = 0; i < 256; i++) {
                struct usb_audio_cluster cluster;

                if (iot[i].d.desc == NULL)
                        continue;
                printf("id %d:\t", i);
                switch (iot[i].d.desc->bDescriptorSubtype) {
                case UDESCSUB_AC_INPUT:
                        printf("AC_INPUT type=%s\n", uaudio_get_terminal_name
                                  (UGETW(iot[i].d.it->wTerminalType)));
                        printf("\t");
                        cluster = uaudio_get_cluster(i, iot);
                        uaudio_dump_cluster(&cluster);
                        printf("\n");
                        break;
                case UDESCSUB_AC_OUTPUT:
                        printf("AC_OUTPUT type=%s ", uaudio_get_terminal_name
                                  (UGETW(iot[i].d.ot->wTerminalType)));
                        printf("src=%d\n", iot[i].d.ot->bSourceId);
                        break;
                case UDESCSUB_AC_MIXER:
                        printf("AC_MIXER src=");
                        for (j = 0; j < iot[i].d.mu->bNrInPins; j++)
                                printf("%d ", iot[i].d.mu->baSourceId[j]);
                        printf("\n\t");
                        cluster = uaudio_get_cluster(i, iot);
                        uaudio_dump_cluster(&cluster);
                        printf("\n");
                        break;
                case UDESCSUB_AC_SELECTOR:
                        printf("AC_SELECTOR src=");
                        for (j = 0; j < iot[i].d.su->bNrInPins; j++)
                                printf("%d ", iot[i].d.su->baSourceId[j]);
                        printf("\n");
                        break;
                case UDESCSUB_AC_FEATURE:
                        printf("AC_FEATURE src=%d\n", iot[i].d.fu->bSourceId);
                        break;
                case UDESCSUB_AC_PROCESSING:
                        printf("AC_PROCESSING src=");
                        for (j = 0; j < iot[i].d.pu->bNrInPins; j++)
                                printf("%d ", iot[i].d.pu->baSourceId[j]);
                        printf("\n\t");
                        cluster = uaudio_get_cluster(i, iot);
                        uaudio_dump_cluster(&cluster);
                        printf("\n");
                        break;
                case UDESCSUB_AC_EXTENSION:
                        printf("AC_EXTENSION src=");
                        for (j = 0; j < iot[i].d.eu->bNrInPins; j++)
                                printf("%d ", iot[i].d.eu->baSourceId[j]);
                        printf("\n\t");
                        cluster = uaudio_get_cluster(i, iot);
                        uaudio_dump_cluster(&cluster);
                        printf("\n");
                        break;
                default:
                        printf("unknown audio control (subtype=%d)\n",
                                  iot[i].d.desc->bDescriptorSubtype);
                }
                for (j = 0; j < iot[i].inputs_size; j++) {
                        printf("\tinput%d: ", j);
                        uaudio_dump_tml(iot[i].inputs[j]);
                }
                printf("\toutput: ");
                uaudio_dump_tml(iot[i].output);
        }
#endif

        for (i = 0; i < ndps; i++) {
                dp = iot[i].d.desc;
                if (dp == NULL)
                        continue;
                DPRINTF("id=%d subtype=%d\n", i, dp->bDescriptorSubtype);
                switch (dp->bDescriptorSubtype) {
                case UDESCSUB_AC_HEADER:
                        aprint_error("uaudio_identify_ac: unexpected AC header\n");
                        break;
                case UDESCSUB_AC_INPUT:
                        uaudio_add_input(sc, iot, i);
                        break;
                case UDESCSUB_AC_OUTPUT:
                        uaudio_add_output(sc, iot, i);
                        break;
                case UDESCSUB_AC_MIXER:
                        uaudio_add_mixer(sc, iot, i);
                        break;
                case UDESCSUB_AC_SELECTOR:
                        uaudio_add_selector(sc, iot, i);
                        break;
                case UDESCSUB_AC_FEATURE:
                        uaudio_add_feature(sc, iot, i);
                        break;
                case UDESCSUB_AC_PROCESSING:
                        uaudio_add_processing(sc, iot, i);
                        break;
                case UDESCSUB_AC_EXTENSION:
                        uaudio_add_extension(sc, iot, i);
                        break;
                default:
                        aprint_error(
                            "uaudio_identify_ac: bad AC desc subtype=0x%02x\n",
                            dp->bDescriptorSubtype);
                        break;
                }
        }

        /* delete io_terminal */
        for (i = 0; i < 256; i++) {
                if (iot[i].d.desc == NULL)
                        continue;
                if (iot[i].inputs != NULL) {
                        for (j = 0; j < iot[i].inputs_size; j++) {
                                if (iot[i].inputs[j] != NULL)
                                        free(iot[i].inputs[j], M_TEMP);
                        }
                        free(iot[i].inputs, M_TEMP);
                }
                if (iot[i].output != NULL)
                        free(iot[i].output, M_TEMP);
                iot[i].d.desc = NULL;
        }
        free(iot, M_TEMP);

        return USBD_NORMAL_COMPLETION;
}

Static int
uaudio_query_devinfo(void *addr, mixer_devinfo_t *mi)
{
        struct uaudio_softc *sc;
        struct mixerctl *mc;
        int n, nctls, i;

        DPRINTFN(7, "index=%d\n", mi->index);
        sc = addr;
        if (sc->sc_dying)
                return EIO;

        n = mi->index;
        nctls = sc->sc_nctls;

        switch (n) {
        case UAC_OUTPUT:
                mi->type = AUDIO_MIXER_CLASS;
                mi->mixer_class = UAC_OUTPUT;
                mi->next = mi->prev = AUDIO_MIXER_LAST;
                strlcpy(mi->label.name, AudioCoutputs, sizeof(mi->label.name));
                return 0;
        case UAC_INPUT:
                mi->type = AUDIO_MIXER_CLASS;
                mi->mixer_class = UAC_INPUT;
                mi->next = mi->prev = AUDIO_MIXER_LAST;
                strlcpy(mi->label.name, AudioCinputs, sizeof(mi->label.name));
                return 0;
        case UAC_EQUAL:
                mi->type = AUDIO_MIXER_CLASS;
                mi->mixer_class = UAC_EQUAL;
                mi->next = mi->prev = AUDIO_MIXER_LAST;
                strlcpy(mi->label.name, AudioCequalization,
                    sizeof(mi->label.name));
                return 0;
        case UAC_RECORD:
                mi->type = AUDIO_MIXER_CLASS;
                mi->mixer_class = UAC_RECORD;
                mi->next = mi->prev = AUDIO_MIXER_LAST;
                strlcpy(mi->label.name, AudioCrecord, sizeof(mi->label.name));
                return 0;
        default:
                break;
        }

        n -= UAC_NCLASSES;
        if (n < 0 || n >= nctls)
                return ENXIO;

        mc = &sc->sc_ctls[n];
        strlcpy(mi->label.name, mc->ctlname, sizeof(mi->label.name));
        mi->mixer_class = mc->class;
        mi->next = mi->prev = AUDIO_MIXER_LAST;        /* XXX */
        switch (mc->type) {
        case MIX_ON_OFF:
                mi->type = AUDIO_MIXER_ENUM;
                mi->un.e.num_mem = 2;
                strlcpy(mi->un.e.member[0].label.name, AudioNoff,
                    sizeof(mi->un.e.member[0].label.name));
                mi->un.e.member[0].ord = 0;
                strlcpy(mi->un.e.member[1].label.name, AudioNon,
                    sizeof(mi->un.e.member[1].label.name));
                mi->un.e.member[1].ord = 1;
                break;
        case MIX_SELECTOR:
                mi->type = AUDIO_MIXER_ENUM;
                mi->un.e.num_mem = mc->maxval - mc->minval + 1;
                for (i = 0; i <= mc->maxval - mc->minval; i++) {
                        snprintf(mi->un.e.member[i].label.name,
                                 sizeof(mi->un.e.member[i].label.name),
                                 "%d", i + mc->minval);
                        mi->un.e.member[i].ord = i + mc->minval;
                }
                break;
        default:
                mi->type = AUDIO_MIXER_VALUE;
                strncpy(mi->un.v.units.name, mc->ctlunit, MAX_AUDIO_DEV_LEN);
                mi->un.v.num_channels = mc->nchan;
                mi->un.v.delta = mc->delta;
                break;
        }
        return 0;
}

Static int
uaudio_open(void *addr, int flags)
{
        struct uaudio_softc *sc;

        sc = addr;
        DPRINTF("sc=%p\n", sc);
        if (sc->sc_dying)
                return EIO;

        if ((flags & FWRITE) && !(sc->sc_mode & AUMODE_PLAY))
                return EACCES;
        if ((flags & FREAD) && !(sc->sc_mode & AUMODE_RECORD))
                return EACCES;

        return 0;
}

Static int
uaudio_halt_out_dma(void *addr)
{
        struct uaudio_softc *sc = addr;

        DPRINTF("%s", "enter\n");

        mutex_exit(&sc->sc_intr_lock);
        uaudio_halt_out_dma_unlocked(sc);
        mutex_enter(&sc->sc_intr_lock);

        return 0;
}

Static void
uaudio_halt_out_dma_unlocked(struct uaudio_softc *sc)
{
        if (sc->sc_playchan.pipe != NULL) {
                uaudio_chan_abort(sc, &sc->sc_playchan);
                uaudio_chan_free_buffers(sc, &sc->sc_playchan);
                uaudio_chan_close(sc, &sc->sc_playchan);
                sc->sc_playchan.intr = NULL;
        }
}

Static int
uaudio_halt_in_dma(void *addr)
{
        struct uaudio_softc *sc = addr;

        DPRINTF("%s", "enter\n");

        mutex_exit(&sc->sc_intr_lock);
        uaudio_halt_in_dma_unlocked(sc);
        mutex_enter(&sc->sc_intr_lock);

        return 0;
}

Static void
uaudio_halt_in_dma_unlocked(struct uaudio_softc *sc)
{
        if (sc->sc_recchan.pipe != NULL) {
                uaudio_chan_abort(sc, &sc->sc_recchan);
                uaudio_chan_free_buffers(sc, &sc->sc_recchan);
                uaudio_chan_close(sc, &sc->sc_recchan);
                sc->sc_recchan.intr = NULL;
        }
}

Static int
uaudio_getdev(void *addr, struct audio_device *retp)
{
        struct uaudio_softc *sc;

        DPRINTF("%s", "\n");
        sc = addr;
        if (sc->sc_dying)
                return EIO;

        *retp = sc->sc_adev;
        return 0;
}

/*
 * Make sure the block size is large enough to hold all outstanding transfers.
 */
Static int
uaudio_round_blocksize(void *addr, int blk,
                       int mode, const audio_params_t *param)
{
        struct uaudio_softc *sc;
        int b;

        sc = addr;
        DPRINTF("blk=%d mode=%s\n", blk,
            mode == AUMODE_PLAY ? "AUMODE_PLAY" : "AUMODE_RECORD");

        /* chan.bytes_per_frame can be 0. */
        if (mode == AUMODE_PLAY || sc->sc_recchan.bytes_per_frame <= 0) {
                b = param->sample_rate * UAUDIO_NFRAMES * UAUDIO_NCHANBUFS;

                /*
                 * This does not make accurate value in the case
                 * of b % USB_FRAMES_PER_SECOND != 0
                 */
                b /= USB_FRAMES_PER_SECOND;

                b *= param->precision / 8 * param->channels;
        } else {
                /*
                 * use wMaxPacketSize in bytes_per_frame.
                 * See uaudio_set_format() and uaudio_chan_init()
                 */
                b = sc->sc_recchan.bytes_per_frame
                    * UAUDIO_NFRAMES * UAUDIO_NCHANBUFS;
        }

        if (b <= 0)
                b = 1;
        blk = blk <= b ? b : blk / b * b;

#ifdef DIAGNOSTIC
        if (blk <= 0) {
                aprint_debug("uaudio_round_blocksize: blk=%d\n", blk);
                blk = 512;
        }
#endif

        DPRINTF("resultant blk=%d\n", blk);
        return blk;
}

Static int
uaudio_get_props(void *addr)
{
        struct uaudio_softc *sc;
        int props;

        sc = addr;
        props = 0;
        if ((sc->sc_mode & AUMODE_PLAY))
                props |= AUDIO_PROP_PLAYBACK;
        if ((sc->sc_mode & AUMODE_RECORD))
                props |= AUDIO_PROP_CAPTURE;

        /* XXX I'm not sure all bidirectional devices support FULLDUP&INDEP */
        if (props == (AUDIO_PROP_PLAYBACK | AUDIO_PROP_CAPTURE))
                props |= AUDIO_PROP_FULLDUPLEX | AUDIO_PROP_INDEPENDENT;

        return props;
}

Static void
uaudio_get_locks(void *addr, kmutex_t **intr, kmutex_t **thread)
{
        struct uaudio_softc *sc;

        sc = addr;
        *intr = &sc->sc_intr_lock;
        *thread = &sc->sc_lock;
}

Static int
uaudio_get(struct uaudio_softc *sc, int which, int type, int wValue,
           int wIndex, int len)
{
        usb_device_request_t req;
        uint8_t data[4];
        usbd_status err;
        int val;

        if (wValue == -1)
                return 0;

        req.bmRequestType = type;
        req.bRequest = which;
        USETW(req.wValue, wValue);
        USETW(req.wIndex, wIndex);
        USETW(req.wLength, len);
        DPRINTFN(2,"type=0x%02x req=0x%02x wValue=0x%04x "
                    "wIndex=0x%04x len=%d\n",
                    type, which, wValue, wIndex, len);
        err = usbd_do_request(sc->sc_udev, &req, data);
        if (err) {
                DPRINTF("err=%s\n", usbd_errstr(err));
                return -1;
        }
        switch (len) {
        case 1:
                val = data[0];
                break;
        case 2:
                val = data[0] | (data[1] << 8);
                break;
        default:
                DPRINTF("bad length=%d\n", len);
                return -1;
        }
        DPRINTFN(2,"val=%d\n", val);
        return val;
}

Static void
uaudio_set(struct uaudio_softc *sc, int which, int type, int wValue,
           int wIndex, int len, int val)
{
        usb_device_request_t req;
        uint8_t data[4];
        int err __unused;

        if (wValue == -1)
                return;

        req.bmRequestType = type;
        req.bRequest = which;
        USETW(req.wValue, wValue);
        USETW(req.wIndex, wIndex);
        USETW(req.wLength, len);
        switch (len) {
        case 1:
                data[0] = val;
                break;
        case 2:
                data[0] = val;
                data[1] = val >> 8;
                break;
        default:
                return;
        }
        DPRINTFN(2,"type=0x%02x req=0x%02x wValue=0x%04x "
                    "wIndex=0x%04x len=%d, val=%d\n",
                    type, which, wValue, wIndex, len, val & 0xffff);
        err = usbd_do_request(sc->sc_udev, &req, data);
#ifdef UAUDIO_DEBUG
        if (err)
                DPRINTF("err=%d\n", err);
#endif
}

Static int
uaudio_signext(int type, int val)
{
        if (!MIX_UNSIGNED(type)) {
                if (MIX_SIZE(type) == 2)
                        val = (int16_t)val;
                else
                        val = (int8_t)val;
        }
        return val;
}

Static int
uaudio_value2bsd(struct mixerctl *mc, int val)
{
        DPRINTFN(5, "type=%03x val=%d min=%d max=%d ",
                     mc->type, val, mc->minval, mc->maxval);
        if (mc->type == MIX_ON_OFF) {
                val = (val != 0);
        } else if (mc->type == MIX_SELECTOR) {
                if (val < mc->minval || val > mc->maxval)
                        val = mc->minval;
        } else
                val = ((uaudio_signext(mc->type, val) - mc->minval) * 255
                        + mc->mul/2) / mc->mul;
        DPRINTFN_CLEAN(5, "val'=%d\n", val);
        return val;
}

int
uaudio_bsd2value(struct mixerctl *mc, int val)
{
        DPRINTFN(5,"type=%03x val=%d min=%d max=%d ",
                    mc->type, val, mc->minval, mc->maxval);
        if (mc->type == MIX_ON_OFF) {
                val = (val != 0);
        } else if (mc->type == MIX_SELECTOR) {
                if (val < mc->minval || val > mc->maxval)
                        val = mc->minval;
        } else
                val = (val + mc->delta/2) * mc->mul / 255 + mc->minval;
        DPRINTFN_CLEAN(5, "val'=%d\n", val);
        return val;
}

Static int
uaudio_ctl_get(struct uaudio_softc *sc, int which, struct mixerctl *mc,
               int chan)
{
        int val;

        DPRINTFN(5,"which=%d chan=%d\n", which, chan);
        mutex_exit(&sc->sc_lock);
        val = uaudio_get(sc, which, UT_READ_CLASS_INTERFACE, mc->wValue[chan],
                         mc->wIndex, MIX_SIZE(mc->type));
        mutex_enter(&sc->sc_lock);
        return uaudio_value2bsd(mc, val);
}

Static void
uaudio_ctl_set(struct uaudio_softc *sc, int which, struct mixerctl *mc,
               int chan, int val)
{

        val = uaudio_bsd2value(mc, val);
        mutex_exit(&sc->sc_lock);
        uaudio_set(sc, which, UT_WRITE_CLASS_INTERFACE, mc->wValue[chan],
                   mc->wIndex, MIX_SIZE(mc->type), val);
        mutex_enter(&sc->sc_lock);
}

Static int
uaudio_mixer_get_port(void *addr, mixer_ctrl_t *cp)
{
        struct uaudio_softc *sc;
        struct mixerctl *mc;
        int i, n, vals[MIX_MAX_CHAN], val;

        DPRINTFN(2, "index=%d\n", cp->dev);
        sc = addr;
        if (sc->sc_dying)
                return EIO;

        n = cp->dev - UAC_NCLASSES;
        if (n < 0 || n >= sc->sc_nctls)
                return ENXIO;
        mc = &sc->sc_ctls[n];

        if (mc->type == MIX_ON_OFF) {
                if (cp->type != AUDIO_MIXER_ENUM)
                        return EINVAL;
                cp->un.ord = uaudio_ctl_get(sc, GET_CUR, mc, 0);
        } else if (mc->type == MIX_SELECTOR) {
                if (cp->type != AUDIO_MIXER_ENUM)
                        return EINVAL;
                cp->un.ord = uaudio_ctl_get(sc, GET_CUR, mc, 0);
        } else {
                if (cp->type != AUDIO_MIXER_VALUE)
                        return EINVAL;
                if (cp->un.value.num_channels != 1 &&
                    cp->un.value.num_channels != mc->nchan)
                        return EINVAL;
                for (i = 0; i < mc->nchan; i++)
                        vals[i] = uaudio_ctl_get(sc, GET_CUR, mc, i);
                if (cp->un.value.num_channels == 1 && mc->nchan != 1) {
                        for (val = 0, i = 0; i < mc->nchan; i++)
                                val += vals[i];
                        vals[0] = val / mc->nchan;
                }
                for (i = 0; i < cp->un.value.num_channels; i++)
                        cp->un.value.level[i] = vals[i];
        }

        return 0;
}

Static int
uaudio_mixer_set_port(void *addr, mixer_ctrl_t *cp)
{
        struct uaudio_softc *sc;
        struct mixerctl *mc;
        int i, n, vals[MIX_MAX_CHAN];

        DPRINTFN(2, "index = %d\n", cp->dev);
        sc = addr;
        if (sc->sc_dying)
                return EIO;

        n = cp->dev - UAC_NCLASSES;
        if (n < 0 || n >= sc->sc_nctls)
                return ENXIO;
        mc = &sc->sc_ctls[n];

        if (mc->type == MIX_ON_OFF) {
                if (cp->type != AUDIO_MIXER_ENUM)
                        return EINVAL;
                uaudio_ctl_set(sc, SET_CUR, mc, 0, cp->un.ord);
        } else if (mc->type == MIX_SELECTOR) {
                if (cp->type != AUDIO_MIXER_ENUM)
                        return EINVAL;
                uaudio_ctl_set(sc, SET_CUR, mc, 0, cp->un.ord);
        } else {
                if (cp->type != AUDIO_MIXER_VALUE)
                        return EINVAL;
                if (cp->un.value.num_channels == 1)
                        for (i = 0; i < mc->nchan; i++)
                                vals[i] = cp->un.value.level[0];
                else if (cp->un.value.num_channels == mc->nchan)
                        for (i = 0; i < mc->nchan; i++)
                                vals[i] = cp->un.value.level[i];
                else
                        return EINVAL;
                for (i = 0; i < mc->nchan; i++)
                        uaudio_ctl_set(sc, SET_CUR, mc, i, vals[i]);
        }
        return 0;
}

Static int
uaudio_trigger_input(void *addr, void *start, void *end, int blksize,
                     void (*intr)(void *), void *arg,
                     const audio_params_t *param)
{
        struct uaudio_softc *sc;
        struct chan *ch;
        usbd_status err;
        int i;

        sc = addr;
        if (sc->sc_dying)
                return EIO;

        mutex_exit(&sc->sc_intr_lock);

        DPRINTFN(3, "sc=%p start=%p end=%p "
                    "blksize=%d\n", sc, start, end, blksize);
        ch = &sc->sc_recchan;
        uaudio_chan_set_param(ch, start, end, blksize);
        DPRINTFN(3, "sample_size=%d bytes/frame=%d "
                    "fraction=0.%03d\n", ch->sample_size, ch->bytes_per_frame,
                    ch->fraction);

        err = uaudio_chan_open(sc, ch);
        if (err) {
                mutex_enter(&sc->sc_intr_lock);
                device_printf(sc->sc_dev,"%s open channel err=%s\n",__func__, usbd_errstr(err));
                return EIO;
        }

        err = uaudio_chan_alloc_buffers(sc, ch);
        if (err) {
                uaudio_chan_close(sc, ch);
                device_printf(sc->sc_dev,"%s alloc buffers err=%s\n",__func__, usbd_errstr(err));
                mutex_enter(&sc->sc_intr_lock);
                return EIO;
        }


        ch->intr = intr;
        ch->arg = arg;

        /*
         * Start as half as many channels for recording as for playback.
         * This stops playback from stuttering in full-duplex operation.
         */
        for (i = 0; i < UAUDIO_NCHANBUFS / 2; i++) {
                uaudio_chan_rtransfer(ch);
        }

        mutex_enter(&sc->sc_intr_lock);

        return 0;
}

Static int
uaudio_trigger_output(void *addr, void *start, void *end, int blksize,
                      void (*intr)(void *), void *arg,
                      const audio_params_t *param)
{
        struct uaudio_softc *sc;
        struct chan *ch;
        usbd_status err;
        int i;

        sc = addr;
        if (sc->sc_dying)
                return EIO;

        mutex_exit(&sc->sc_intr_lock);

        DPRINTFN(3, "sc=%p start=%p end=%p "
                    "blksize=%d\n", sc, start, end, blksize);
        ch = &sc->sc_playchan;
        uaudio_chan_set_param(ch, start, end, blksize);
        DPRINTFN(3, "sample_size=%d bytes/frame=%d "
                    "fraction=0.%03d\n", ch->sample_size, ch->bytes_per_frame,
                    ch->fraction);

        err = uaudio_chan_open(sc, ch);
        if (err) {
                mutex_enter(&sc->sc_intr_lock);
                device_printf(sc->sc_dev,"%s open channel err=%s\n",__func__, usbd_errstr(err));
                return EIO;
        }

        err = uaudio_chan_alloc_buffers(sc, ch);
        if (err) {
                uaudio_chan_close(sc, ch);
                device_printf(sc->sc_dev,"%s alloc buffers err=%s\n",__func__, usbd_errstr(err));
                mutex_enter(&sc->sc_intr_lock);
                return EIO;
        }

        ch->intr = intr;
        ch->arg = arg;

        for (i = 0; i < UAUDIO_NCHANBUFS; i++)
                uaudio_chan_ptransfer(ch);

        mutex_enter(&sc->sc_intr_lock);

        return 0;
}

/* Set up a pipe for a channel. */
Static usbd_status
uaudio_chan_open(struct uaudio_softc *sc, struct chan *ch)
{
        struct as_info *as;
        usb_device_descriptor_t *ddesc;
        int endpt;
        usbd_status err;

        as = &sc->sc_alts[ch->altidx];
        endpt = as->edesc->bEndpointAddress;
        DPRINTF("endpt=0x%02x, speed=%d, alt=%d\n",
                 endpt, ch->sample_rate, as->alt);

        /* Set alternate interface corresponding to the mode. */
        err = usbd_set_interface(as->ifaceh, as->alt);
        if (err)
                return err;

        /*
         * Roland SD-90 freezes by a SAMPLING_FREQ_CONTROL request.
         */
        ddesc = usbd_get_device_descriptor(sc->sc_udev);
        if ((UGETW(ddesc->idVendor) != USB_VENDOR_ROLAND) &&
            (UGETW(ddesc->idProduct) != USB_PRODUCT_ROLAND_SD90)) {
                err = uaudio_set_speed(sc, endpt, ch->sample_rate);
                if (err) {
                        DPRINTF("set_speed failed err=%s\n", usbd_errstr(err));
                }
        }

        DPRINTF("create pipe to 0x%02x\n", endpt);
        err = usbd_open_pipe(as->ifaceh, endpt, USBD_MPSAFE, &ch->pipe);
        if (err)
                return err;
        if (as->edesc1 != NULL) {
                endpt = as->edesc1->bEndpointAddress;
                DPRINTF("create sync-pipe to 0x%02x\n", endpt);
                err = usbd_open_pipe(as->ifaceh, endpt, USBD_MPSAFE,
                    &ch->sync_pipe);
        }
        return err;
}

Static void
uaudio_chan_abort(struct uaudio_softc *sc, struct chan *ch)
{
        struct usbd_pipe *pipe;
        struct as_info *as;

        as = &sc->sc_alts[ch->altidx];
        as->sc_busy = 0;
        if (sc->sc_nullalt >= 0) {
                DPRINTF("set null alt=%d\n", sc->sc_nullalt);
                usbd_set_interface(as->ifaceh, sc->sc_nullalt);
        }
        pipe = ch->pipe;
        if (pipe) {
                usbd_abort_pipe(pipe);
        }
        pipe = ch->sync_pipe;
        if (pipe) {
                usbd_abort_pipe(pipe);
        }
}

Static void
uaudio_chan_close(struct uaudio_softc *sc, struct chan *ch)
{
        struct usbd_pipe *pipe;

        pipe = atomic_swap_ptr(&ch->pipe, NULL);
        if (pipe) {
                usbd_close_pipe(pipe);
        }
        pipe = atomic_swap_ptr(&ch->sync_pipe, NULL);
        if (pipe) {
                usbd_close_pipe(pipe);
        }
}

Static usbd_status
uaudio_chan_alloc_buffers(struct uaudio_softc *sc, struct chan *ch)
{
        int i, size;

        size = (ch->bytes_per_frame + ch->sample_size) * UAUDIO_NFRAMES;
        for (i = 0; i < UAUDIO_NCHANBUFS; i++) {
                struct usbd_xfer *xfer;

                int err = usbd_create_xfer(ch->pipe, size, 0, UAUDIO_NFRAMES,
                    &xfer);
                if (err)
                        goto bad;

                ch->chanbufs[i].xfer = xfer;
                ch->chanbufs[i].buffer = usbd_get_buffer(xfer);
                ch->chanbufs[i].chan = ch;
        }

        return USBD_NORMAL_COMPLETION;

bad:
        while (--i >= 0)
                /* implicit buffer free */
                usbd_destroy_xfer(ch->chanbufs[i].xfer);
        return USBD_NOMEM;
}

Static void
uaudio_chan_free_buffers(struct uaudio_softc *sc, struct chan *ch)
{
        int i;

        for (i = 0; i < UAUDIO_NCHANBUFS; i++)
                usbd_destroy_xfer(ch->chanbufs[i].xfer);
}

Static void
uaudio_chan_ptransfer(struct chan *ch)
{
        struct chanbuf *cb;
        int i, n, size, residue, total;

        if (ch->sc->sc_dying)
                return;

        /* Pick the next channel buffer. */
        cb = &ch->chanbufs[ch->curchanbuf];
        if (++ch->curchanbuf >= UAUDIO_NCHANBUFS)
                ch->curchanbuf = 0;

        /* Compute the size of each frame in the next transfer. */
        residue = ch->residue;
        total = 0;
        for (i = 0; i < UAUDIO_NFRAMES; i++) {
                size = ch->bytes_per_frame;
                residue += ch->fraction;
                if (residue >= USB_FRAMES_PER_SECOND) {
                        if ((ch->sc->sc_altflags & UA_NOFRAC) == 0)
                                size += ch->sample_size;
                        residue -= USB_FRAMES_PER_SECOND;
                }
                cb->sizes[i] = size;
                total += size;
        }
        ch->residue = residue;
        cb->size = total;

        /*
         * Transfer data from upper layer buffer to channel buffer, taking
         * care of wrapping the upper layer buffer.
         */
        n = uimin(total, ch->end - ch->cur);
        memcpy(cb->buffer, ch->cur, n);
        ch->cur += n;
        if (ch->cur >= ch->end)
                ch->cur = ch->start;
        if (total > n) {
                total -= n;
                memcpy(cb->buffer + n, ch->cur, total);
                ch->cur += total;
        }

#ifdef UAUDIO_DEBUG
        if (uaudiodebug > 8) {
                DPRINTF("buffer=%p, residue=0.%03d\n", cb->buffer, ch->residue);
                for (i = 0; i < UAUDIO_NFRAMES; i++) {
                        DPRINTF("   [%d] length %d\n", i, cb->sizes[i]);
                }
        }
#endif

        //DPRINTFN(5, "ptransfer xfer=%p\n", cb->xfer);
        /* Fill the request */
        usbd_setup_isoc_xfer(cb->xfer, cb, cb->sizes, UAUDIO_NFRAMES, 0,
            uaudio_chan_pintr);

        usbd_status err = usbd_transfer(cb->xfer);
        if (err != USBD_IN_PROGRESS && err != USBD_NORMAL_COMPLETION)
                device_printf(ch->sc->sc_dev, "ptransfer error %d\n", err);
}

Static void
uaudio_chan_pintr(struct usbd_xfer *xfer, void *priv,
                  usbd_status status)
{
        struct chanbuf *cb;
        struct chan *ch;
        uint32_t count;

        cb = priv;
        ch = cb->chan;
        /* Return if we are aborting. */
        if (status == USBD_CANCELLED)
                return;

        if (status != USBD_NORMAL_COMPLETION)
                device_printf(ch->sc->sc_dev, "pintr error: %s\n",
                              usbd_errstr(status));

        usbd_get_xfer_status(xfer, NULL, NULL, &count, NULL);
        DPRINTFN(5, "count=%d, transferred=%d\n",
                    count, ch->transferred);
#ifdef DIAGNOSTIC
        if (count != cb->size) {
                device_printf(ch->sc->sc_dev,
                    "uaudio_chan_pintr: count(%d) != size(%d), status(%d)\n",
                    count, cb->size, status);
        }
#endif

        mutex_enter(&ch->sc->sc_intr_lock);
        ch->transferred += cb->size;
        /* Call back to upper layer */
        while (ch->transferred >= ch->blksize) {
                ch->transferred -= ch->blksize;
                DPRINTFN(5, "call %p(%p)\n", ch->intr, ch->arg);
                ch->intr(ch->arg);
        }
        mutex_exit(&ch->sc->sc_intr_lock);

        /* start next transfer */
        uaudio_chan_ptransfer(ch);
}

Static void
uaudio_chan_rtransfer(struct chan *ch)
{
        struct chanbuf *cb;
        int i, size, residue, total;

        if (ch->sc->sc_dying)
                return;

        /* Pick the next channel buffer. */
        cb = &ch->chanbufs[ch->curchanbuf];
        if (++ch->curchanbuf >= UAUDIO_NCHANBUFS)
                ch->curchanbuf = 0;

        /* Compute the size of each frame in the next transfer. */
        residue = ch->residue;
        total = 0;
        for (i = 0; i < UAUDIO_NFRAMES; i++) {
                size = ch->bytes_per_frame;
                cb->sizes[i] = size;
                cb->offsets[i] = total;
                total += size;
        }
        ch->residue = residue;
        cb->size = total;

#ifdef UAUDIO_DEBUG
        if (uaudiodebug > 8) {
                DPRINTF("buffer=%p, residue=0.%03d\n", cb->buffer, ch->residue);
                for (i = 0; i < UAUDIO_NFRAMES; i++) {
                        DPRINTF("   [%d] length %d\n", i, cb->sizes[i]);
                }
        }
#endif

        DPRINTFN(5, "transfer xfer=%p\n", cb->xfer);
        /* Fill the request */
        usbd_setup_isoc_xfer(cb->xfer, cb, cb->sizes, UAUDIO_NFRAMES, 0,
            uaudio_chan_rintr);

        usbd_status err = usbd_transfer(cb->xfer);
        if (err != USBD_IN_PROGRESS && err != USBD_NORMAL_COMPLETION)
                device_printf(ch->sc->sc_dev, "rtransfer error %d\n", err);
}

Static void
uaudio_chan_rintr(struct usbd_xfer *xfer, void *priv,
                  usbd_status status)
{
        struct chanbuf *cb;
        struct chan *ch;
        uint32_t count;
        int i, n, frsize;

        cb = priv;
        ch = cb->chan;
        /* Return if we are aborting. */
        if (status == USBD_CANCELLED)
                return;

        if (status != USBD_NORMAL_COMPLETION && status != USBD_SHORT_XFER)
                device_printf(ch->sc->sc_dev, "rintr error: %s\n",
                              usbd_errstr(status));

        usbd_get_xfer_status(xfer, NULL, NULL, &count, NULL);
        DPRINTFN(5, "count=%d, transferred=%d\n", count, ch->transferred);

        /* count < cb->size is normal for asynchronous source */
#ifdef DIAGNOSTIC
        if (count > cb->size) {
                device_printf(ch->sc->sc_dev,
                    "uaudio_chan_rintr: count(%d) > size(%d) status(%d)\n",
                    count, cb->size, status);
        }
#endif

        /*
         * Transfer data from channel buffer to upper layer buffer, taking
         * care of wrapping the upper layer buffer.
         */
        for (i = 0; i < UAUDIO_NFRAMES; i++) {
                frsize = cb->sizes[i];
                n = uimin(frsize, ch->end - ch->cur);
                memcpy(ch->cur, cb->buffer + cb->offsets[i], n);
                ch->cur += n;
                if (ch->cur >= ch->end)
                        ch->cur = ch->start;
                if (frsize > n) {
                        memcpy(ch->cur, cb->buffer + cb->offsets[i] + n,
                            frsize - n);
                        ch->cur += frsize - n;
                }
        }

        /* Call back to upper layer */
        mutex_enter(&ch->sc->sc_intr_lock);
        ch->transferred += count;
        while (ch->transferred >= ch->blksize) {
                ch->transferred -= ch->blksize;
                DPRINTFN(5, "call %p(%p)\n", ch->intr, ch->arg);
                ch->intr(ch->arg);
        }
        mutex_exit(&ch->sc->sc_intr_lock);

        /* start next transfer */
        uaudio_chan_rtransfer(ch);
}

Static void
uaudio_chan_init(struct chan *ch, int altidx, const struct audio_params *param,
    int maxpktsize)
{
        int samples_per_frame, sample_size;

        ch->altidx = altidx;
        sample_size = param->precision * param->channels / 8;
        samples_per_frame = param->sample_rate / USB_FRAMES_PER_SECOND;
        ch->sample_size = sample_size;
        ch->sample_rate = param->sample_rate;
        if (maxpktsize == 0) {
                ch->fraction = param->sample_rate % USB_FRAMES_PER_SECOND;
                ch->bytes_per_frame = samples_per_frame * sample_size;
        } else {
                ch->fraction = 0;
                ch->bytes_per_frame = maxpktsize;
        }
        ch->residue = 0;
}

Static void
uaudio_chan_set_param(struct chan *ch, u_char *start, u_char *end, int blksize)
{

        ch->start = start;
        ch->end = end;
        ch->cur = start;
        ch->blksize = blksize;
        ch->transferred = 0;
        ch->curchanbuf = 0;
}

Static int
uaudio_set_format(void *addr, int setmode,
                  const audio_params_t *play, const audio_params_t *rec,
                  audio_filter_reg_t *pfil, audio_filter_reg_t *rfil)
{
        struct uaudio_softc *sc;
        int paltidx, raltidx;

        sc = addr;
        paltidx = -1;
        raltidx = -1;
        if (sc->sc_dying)
                return EIO;

        if ((setmode & AUMODE_PLAY) && sc->sc_playchan.altidx != -1) {
                sc->sc_alts[sc->sc_playchan.altidx].sc_busy = 0;
        }
        if ((setmode & AUMODE_RECORD) && sc->sc_recchan.altidx != -1) {
                sc->sc_alts[sc->sc_recchan.altidx].sc_busy = 0;
        }

        /* Some uaudio devices are unidirectional.  Don't try to find a
           matching mode for the unsupported direction. */
        setmode &= sc->sc_mode;

        if ((setmode & AUMODE_PLAY)) {
                paltidx = audio_indexof_format(sc->sc_formats, sc->sc_nformats,
                    AUMODE_PLAY, play);
                /* Transfer should have halted */
                uaudio_chan_init(&sc->sc_playchan, paltidx, play, 0);
        }
        if ((setmode & AUMODE_RECORD)) {
                raltidx = audio_indexof_format(sc->sc_formats, sc->sc_nformats,
                    AUMODE_RECORD, rec);
                /* Transfer should have halted */
                uaudio_chan_init(&sc->sc_recchan, raltidx, rec,
                    UGETW(sc->sc_alts[raltidx].edesc->wMaxPacketSize));
        }

        if ((setmode & AUMODE_PLAY) && sc->sc_playchan.altidx != -1) {
                sc->sc_alts[sc->sc_playchan.altidx].sc_busy = 1;
        }
        if ((setmode & AUMODE_RECORD) && sc->sc_recchan.altidx != -1) {
                sc->sc_alts[sc->sc_recchan.altidx].sc_busy = 1;
        }

        DPRINTF("use altidx=p%d/r%d, altno=p%d/r%d\n",
                 sc->sc_playchan.altidx, sc->sc_recchan.altidx,
                 (sc->sc_playchan.altidx >= 0)
                   ?sc->sc_alts[sc->sc_playchan.altidx].idesc->bAlternateSetting
                   : -1,
                 (sc->sc_recchan.altidx >= 0)
                   ? sc->sc_alts[sc->sc_recchan.altidx].idesc->bAlternateSetting
                   : -1);

        return 0;
}

Static usbd_status
uaudio_set_speed(struct uaudio_softc *sc, int endpt, u_int speed)
{
        usb_device_request_t req;
        usbd_status err;
        uint8_t data[3];

        DPRINTFN(5, "endpt=%d speed=%u\n", endpt, speed);
        req.bmRequestType = UT_WRITE_CLASS_ENDPOINT;
        req.bRequest = SET_CUR;
        USETW2(req.wValue, SAMPLING_FREQ_CONTROL, 0);
        USETW(req.wIndex, endpt);
        USETW(req.wLength, 3);
        data[0] = speed;
        data[1] = speed >> 8;
        data[2] = speed >> 16;

        err = usbd_do_request(sc->sc_udev, &req, data);

        return err;
}

#ifdef _MODULE

MODULE(MODULE_CLASS_DRIVER, uaudio, NULL);

static const struct cfiattrdata audiobuscf_iattrdata = {
        "audiobus", 0, { { NULL, NULL, 0 }, }
};
static const struct cfiattrdata * const uaudio_attrs[] = {
        &audiobuscf_iattrdata, NULL
};
CFDRIVER_DECL(uaudio, DV_DULL, uaudio_attrs);
extern struct cfattach uaudio_ca;
static int uaudioloc[6/*USBIFIFCF_NLOCS*/] = {
        -1/*USBIFIFCF_PORT_DEFAULT*/,
        -1/*USBIFIFCF_CONFIGURATION_DEFAULT*/,
        -1/*USBIFIFCF_INTERFACE_DEFAULT*/,
        -1/*USBIFIFCF_VENDOR_DEFAULT*/,
        -1/*USBIFIFCF_PRODUCT_DEFAULT*/,
        -1/*USBIFIFCF_RELEASE_DEFAULT*/};
static struct cfparent uhubparent = {
        "usbifif", NULL, DVUNIT_ANY
};
static struct cfdata uaudio_cfdata[] = {
        {
                .cf_name = "uaudio",
                .cf_atname = "uaudio",
                .cf_unit = 0,
                .cf_fstate = FSTATE_STAR,
                .cf_loc = uaudioloc,
                .cf_flags = 0,
                .cf_pspec = &uhubparent,
        },
        { NULL }
};

static int
uaudio_modcmd(modcmd_t cmd, void *arg)
{
        int err;

        switch (cmd) {
        case MODULE_CMD_INIT:
                err = config_cfdriver_attach(&uaudio_cd);
                if (err) {
                        return err;
                }
                err = config_cfattach_attach("uaudio", &uaudio_ca);
                if (err) {
                        config_cfdriver_detach(&uaudio_cd);
                        return err;
                }
                err = config_cfdata_attach(uaudio_cfdata, 1);
                if (err) {
                        config_cfattach_detach("uaudio", &uaudio_ca);
                        config_cfdriver_detach(&uaudio_cd);
                        return err;
                }
                return 0;
        case MODULE_CMD_FINI:
                err = config_cfdata_detach(uaudio_cfdata);
                if (err)
                        return err;
                config_cfattach_detach("uaudio", &uaudio_ca);
                config_cfdriver_detach(&uaudio_cd);
                return 0;
        default:
                return ENOTTY;
        }
}

#endif


















































































































































































































































































































  514 
  514 


    1 









































































































































































































    8 



















































































































































































  113 


  113 











  113 













    8 



















    8 



















   31 






























   31 


































































































































  362 


  402 

  362 








  205 


  205 
    2 





  202 








    8 







































  402 















































































































































































































































































































  654 


  654 










































  654 





















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
/*        $NetBSD: kern_veriexec.c,v 1.26 2020/06/11 02:30:21 thorpej Exp $        */

/*-
 * Copyright (c) 2005, 2006 Elad Efrat <elad@NetBSD.org>
 * Copyright (c) 2005, 2006 Brett Lymn <blymn@NetBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the authors may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_veriexec.c,v 1.26 2020/06/11 02:30:21 thorpej Exp $");

#include "opt_veriexec.h"

#include <sys/param.h>
#include <sys/mount.h>
#include <sys/kmem.h>
#include <sys/vnode.h>
#include <sys/namei.h>
#include <sys/once.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
#include <sys/inttypes.h>
#include <sys/verified_exec.h>
#include <sys/sha1.h>
#include <sys/sha2.h>
#include <sys/rmd160.h>
#include <sys/md5.h>
#include <sys/fileassoc.h>
#include <sys/kauth.h>
#include <sys/conf.h>
#include <miscfs/specfs/specdev.h>
#include <prop/proplib.h>
#include <sys/fcntl.h>

/* Readable values for veriexec_file_report(). */
#define        REPORT_ALWAYS                0x01        /* Always print */
#define        REPORT_VERBOSE                0x02        /* Print when verbose >= 1 */
#define        REPORT_DEBUG                0x04        /* Print when verbose >= 2 (debug) */
#define        REPORT_PANIC                0x08        /* Call panic() */
#define        REPORT_ALARM                0x10        /* Alarm - also print pid/uid/.. */
#define        REPORT_LOGMASK                (REPORT_ALWAYS|REPORT_VERBOSE|REPORT_DEBUG)

/* state of locking for veriexec_file_verify */
#define VERIEXEC_UNLOCKED        0x00        /* Nothing locked, callee does it */
#define VERIEXEC_LOCKED                0x01        /* Global op lock held */

/* state of file locking for veriexec_file_verify */
#define VERIEXEC_FILE_UNLOCKED        0x02        /* Nothing locked, callee does it */
#define VERIEXEC_FILE_LOCKED        0x04        /* File locked */

#define VERIEXEC_RW_UPGRADE(lock)        while((rw_tryupgrade(lock)) == 0){};

struct veriexec_fpops {
        const char *type;
        size_t hash_len;
        size_t context_size;
        veriexec_fpop_init_t init;
        veriexec_fpop_update_t update;
        veriexec_fpop_final_t final;
        LIST_ENTRY(veriexec_fpops) entries;
};

/* Veriexec per-file entry data. */
struct veriexec_file_entry {
        krwlock_t lock;                                /* r/w lock */
        u_char *filename;                        /* File name. */
        u_char type;                                /* Entry type. */
        u_char status;                                /* Evaluation status. */
        u_char *fp;                                /* Fingerprint. */
        struct veriexec_fpops *ops;                /* Fingerprint ops vector*/
        size_t filename_len;                        /* Length of filename. */
};

/* Veriexec per-table data. */
struct veriexec_table_entry {
        uint64_t vte_count;                        /* Number of Veriexec entries. */
        const struct sysctlnode *vte_node;
};

static int veriexec_verbose;
static int veriexec_strict;
static int veriexec_bypass = 1;

static char *veriexec_fp_names = NULL;
static size_t veriexec_name_max = 0;

static const struct sysctlnode *veriexec_count_node;

static fileassoc_t veriexec_hook;
static specificdata_key_t veriexec_mountspecific_key;

static LIST_HEAD(, veriexec_fpops) veriexec_fpops_list =
        LIST_HEAD_INITIALIZER(veriexec_fpops_list);

static int veriexec_raw_cb(kauth_cred_t, kauth_action_t, void *,
    void *, void *, void *, void *);
static struct veriexec_fpops *veriexec_fpops_lookup(const char *);
static void veriexec_file_free(struct veriexec_file_entry *);

static unsigned int veriexec_tablecount = 0;

/*
 * Veriexec operations global lock - most ops hold this as a read
 * lock, it is upgraded to a write lock when destroying veriexec file
 * table entries.
 */
static krwlock_t veriexec_op_lock;

/*
 * Sysctl helper routine for Veriexec.
 */
static int
sysctl_kern_veriexec_algorithms(SYSCTLFN_ARGS)
{
        size_t len;
        int error;
        const char *p;

        if (newp != NULL)
                return EPERM;

        if (namelen != 0)
                return EINVAL;

        p = veriexec_fp_names == NULL ? "" : veriexec_fp_names;

        len = strlen(p) + 1;

        if (*oldlenp < len && oldp)
                return ENOMEM;

        if (oldp && (error = copyout(p, oldp, len)) != 0)
                return error;

        *oldlenp = len;
        return 0;
}

static int
sysctl_kern_veriexec_strict(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int error, newval;

        node = *rnode;
        node.sysctl_data = &newval;

        newval = veriexec_strict;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        if (newval < veriexec_strict)
                return EPERM;

        veriexec_strict = newval;

        return 0;
}

SYSCTL_SETUP(sysctl_kern_veriexec_setup, "sysctl kern.veriexec setup")
{
        const struct sysctlnode *rnode = NULL;

        sysctl_createv(clog, 0, NULL, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "veriexec",
                       SYSCTL_DESCR("Veriexec"),
                       NULL, 0, NULL, 0,
                       CTL_KERN, CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "verbose",
                       SYSCTL_DESCR("Veriexec verbose level"),
                       NULL, 0, &veriexec_verbose, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "strict",
                       SYSCTL_DESCR("Veriexec strict level"),
                       sysctl_kern_veriexec_strict, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "algorithms",
                       SYSCTL_DESCR("Veriexec supported hashing "
                                    "algorithms"),
                       sysctl_kern_veriexec_algorithms, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, &veriexec_count_node,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "count",
                       SYSCTL_DESCR("Number of fingerprints on mount(s)"),
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);
}

/*
 * Add ops to the fingerprint ops vector list.
 */
int
veriexec_fpops_add(const char *fp_type, size_t hash_len, size_t ctx_size,
    veriexec_fpop_init_t init, veriexec_fpop_update_t update,
    veriexec_fpop_final_t final)
{
        struct veriexec_fpops *ops;

        KASSERT((init != NULL) && (update != NULL) && (final != NULL));
        KASSERT((hash_len != 0) && (ctx_size != 0));
        KASSERT(fp_type != NULL);

        if (veriexec_fpops_lookup(fp_type) != NULL)
                return (EEXIST);

        ops = kmem_alloc(sizeof(*ops), KM_SLEEP);
        ops->type = fp_type;
        ops->hash_len = hash_len;
        ops->context_size = ctx_size;
        ops->init = init;
        ops->update = update;
        ops->final = final;

        LIST_INSERT_HEAD(&veriexec_fpops_list, ops, entries);

        /*
         * If we don't have space for any names, allocate enough for six
         * which should be sufficient. (it's also enough for all algorithms
         * we can support at the moment)
         */
        if (veriexec_fp_names == NULL) {
                veriexec_name_max = 64;
                veriexec_fp_names = kmem_zalloc(veriexec_name_max, KM_SLEEP);
        }

        /*
         * If we're running out of space for storing supported algorithms,
         * extend the buffer with space for four names.
         */
        while (veriexec_name_max - (strlen(veriexec_fp_names) + 1) <
            strlen(fp_type)) {
                char *newp;
                unsigned int new_max;

                /* Add space for four algorithm names. */
                new_max = veriexec_name_max + 64;
                newp = kmem_zalloc(new_max, KM_SLEEP);
                strlcpy(newp, veriexec_fp_names, new_max);
                kmem_free(veriexec_fp_names, veriexec_name_max);
                veriexec_fp_names = newp;
                veriexec_name_max = new_max;
        }

        if (*veriexec_fp_names != '\0')
                strlcat(veriexec_fp_names, " ", veriexec_name_max);

        strlcat(veriexec_fp_names, fp_type, veriexec_name_max);

        return (0);
}

static void
veriexec_mountspecific_dtor(void *v)
{
        struct veriexec_table_entry *vte = v;

        if (vte == NULL) {
                return;
        }
        sysctl_free(__UNCONST(vte->vte_node));
        veriexec_tablecount--;
        kmem_free(vte, sizeof(*vte));
}

static int
veriexec_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        int result;
        enum kauth_system_req req;

        if (action != KAUTH_SYSTEM_VERIEXEC)
                return KAUTH_RESULT_DEFER;

        result = KAUTH_RESULT_DEFER;
        req = (enum kauth_system_req)(uintptr_t)arg0;

        if (req == KAUTH_REQ_SYSTEM_VERIEXEC_MODIFY &&
            veriexec_strict > VERIEXEC_LEARNING) {
                log(LOG_WARNING, "Veriexec: Strict mode, modifying "
                    "tables not permitted.\n");

                result = KAUTH_RESULT_DENY;
        }

        return result;
}

/*
 * Initialise Veriexec.
 */
void
veriexec_init(void)
{
        int error;

        /* Register a fileassoc for Veriexec. */
        error = fileassoc_register("veriexec",
            (fileassoc_cleanup_cb_t)veriexec_file_free, &veriexec_hook);
        if (error)
                panic("Veriexec: Can't register fileassoc: error=%d", error);

        /* Register listener to handle raw disk access. */
        if (kauth_listen_scope(KAUTH_SCOPE_DEVICE, veriexec_raw_cb, NULL) ==
            NULL)
                panic("Veriexec: Can't listen on device scope");

        error = mount_specific_key_create(&veriexec_mountspecific_key,
            veriexec_mountspecific_dtor);
        if (error)
                panic("Veriexec: Can't create mountspecific key");

        if (kauth_listen_scope(KAUTH_SCOPE_SYSTEM, veriexec_listener_cb,
            NULL) == NULL)
                panic("Veriexec: Can't listen on system scope");

        rw_init(&veriexec_op_lock);

#define        FPOPS_ADD(a, b, c, d, e, f)                        \
        veriexec_fpops_add(a, b, c,                        \
            __FPTRCAST(veriexec_fpop_init_t, d),        \
            __FPTRCAST(veriexec_fpop_update_t, e),        \
            __FPTRCAST(veriexec_fpop_final_t, f))

#ifdef VERIFIED_EXEC_FP_SHA256
        FPOPS_ADD("SHA256", SHA256_DIGEST_LENGTH, sizeof(SHA256_CTX),
            SHA256_Init, SHA256_Update, SHA256_Final);
#endif /* VERIFIED_EXEC_FP_SHA256 */

#ifdef VERIFIED_EXEC_FP_SHA384
        FPOPS_ADD("SHA384", SHA384_DIGEST_LENGTH, sizeof(SHA384_CTX),
            SHA384_Init, SHA384_Update, SHA384_Final);
#endif /* VERIFIED_EXEC_FP_SHA384 */

#ifdef VERIFIED_EXEC_FP_SHA512
        FPOPS_ADD("SHA512", SHA512_DIGEST_LENGTH, sizeof(SHA512_CTX),
            SHA512_Init, SHA512_Update, SHA512_Final);
#endif /* VERIFIED_EXEC_FP_SHA512 */

#undef FPOPS_ADD
}

static struct veriexec_fpops *
veriexec_fpops_lookup(const char *name)
{
        struct veriexec_fpops *ops;

        if (name == NULL)
                return (NULL);

        LIST_FOREACH(ops, &veriexec_fpops_list, entries) {
                if (strcasecmp(name, ops->type) == 0)
                        return (ops);
        }

        return (NULL);
}

/*
 * Calculate fingerprint. Information on hash length and routines used is
 * extracted from veriexec_hash_list according to the hash type.
 *
 * NOTE: vfe is assumed to be locked for writing on entry.
 */
static int
veriexec_fp_calc(struct lwp *l, struct vnode *vp, int file_lock_state,
    struct veriexec_file_entry *vfe, u_char *fp)
{
        struct vattr va;
        void *ctx;
        u_char *buf;
        off_t offset, len;
        size_t resid;
        int error;

        KASSERT(file_lock_state != VERIEXEC_LOCKED);
        KASSERT(file_lock_state != VERIEXEC_UNLOCKED);

        if (file_lock_state == VERIEXEC_FILE_UNLOCKED)
                vn_lock(vp, LK_SHARED | LK_RETRY);
        error = VOP_GETATTR(vp, &va, l->l_cred);
        if (file_lock_state == VERIEXEC_FILE_UNLOCKED)
                VOP_UNLOCK(vp);
        if (error)
                return (error);

        ctx = kmem_alloc(vfe->ops->context_size, KM_SLEEP);
        buf = kmem_alloc(PAGE_SIZE, KM_SLEEP);

        (vfe->ops->init)(ctx);

        len = 0;
        error = 0;
        for (offset = 0; offset < va.va_size; offset += PAGE_SIZE) {
                len = ((va.va_size - offset) < PAGE_SIZE) ?
                    (va.va_size - offset) : PAGE_SIZE;

                error = vn_rdwr(UIO_READ, vp, buf, len, offset,
                                UIO_SYSSPACE,
                                ((file_lock_state == VERIEXEC_FILE_LOCKED)?
                                 IO_NODELOCKED : 0),
                                l->l_cred, &resid, NULL);

                if (error) {
                        goto bad;
                }

                (vfe->ops->update)(ctx, buf, (unsigned int) len);

                if (len != PAGE_SIZE)
                        break;
        }

        (vfe->ops->final)(fp, ctx);

bad:
        kmem_free(ctx, vfe->ops->context_size);
        kmem_free(buf, PAGE_SIZE);

        return (error);
}

/* Compare two fingerprints of the same type. */
static int
veriexec_fp_cmp(struct veriexec_fpops *ops, u_char *fp1, u_char *fp2)
{
        if (veriexec_verbose >= 2) {
                int i;

                printf("comparing hashes...\n");
                printf("fp1: ");
                for (i = 0; i < ops->hash_len; i++) {
                        printf("%02x", fp1[i]);
                }
                printf("\nfp2: ");
                for (i = 0; i < ops->hash_len; i++) {
                        printf("%02x", fp2[i]);
                }
                printf("\n");
        }

        return (memcmp(fp1, fp2, ops->hash_len));
}

static int
veriexec_fp_status(struct lwp *l, struct vnode *vp, int file_lock_state,
    struct veriexec_file_entry *vfe, u_char *status)
{
        size_t hash_len = vfe->ops->hash_len;
        u_char *digest;
        int error;

        digest = kmem_zalloc(hash_len, KM_SLEEP);

        error = veriexec_fp_calc(l, vp, file_lock_state, vfe, digest);
        if (error)
                goto out;

        /* Compare fingerprint with loaded data. */
        if (veriexec_fp_cmp(vfe->ops, vfe->fp, digest) == 0)
                *status = FINGERPRINT_VALID;
        else
                *status = FINGERPRINT_NOMATCH;

out:
        kmem_free(digest, hash_len);
        return error;
}


static struct veriexec_table_entry *
veriexec_table_lookup(struct mount *mp)
{
        /* XXX: From raidframe init */
        if (mp == NULL)
                return NULL;

        return mount_getspecific(mp, veriexec_mountspecific_key);
}

static struct veriexec_file_entry *
veriexec_get(struct vnode *vp)
{
        return (fileassoc_lookup(vp, veriexec_hook));
}

bool
veriexec_lookup(struct vnode *vp)
{
        return (veriexec_get(vp) == NULL ? false : true);
}

/*
 * Routine for maintaining mostly consistent message formats in Veriexec.
 */
static void
veriexec_file_report(struct veriexec_file_entry *vfe, const u_char *msg,
    const u_char *filename, struct lwp *l, int f)
{
        if (vfe != NULL && vfe->filename != NULL)
                filename = vfe->filename;
        if (filename == NULL)
                return;

        if (((f & REPORT_LOGMASK) >> 1) <= veriexec_verbose) {
                if (!(f & REPORT_ALARM) || (l == NULL))
                        log(LOG_NOTICE, "Veriexec: %s [%s]\n", msg,
                            filename);
                else
                        log(LOG_ALERT, "Veriexec: %s [%s, prog=%s pid=%u, "
                            "uid=%u, gid=%u]\n", msg, filename,
                            l->l_proc->p_comm, l->l_proc->p_pid,
                            kauth_cred_getuid(l->l_cred),
                            kauth_cred_getgid(l->l_cred));
        }

        if (f & REPORT_PANIC)
                panic("Veriexec: Unrecoverable error.");
}

/*
 * Verify the fingerprint of the given file. If we're called directly from
 * sys_execve(), 'flag' will be VERIEXEC_DIRECT. If we're called from
 * exec_script(), 'flag' will be VERIEXEC_INDIRECT.  If we are called from
 * vn_open(), 'flag' will be VERIEXEC_FILE.
 *
 * 'veriexec_op_lock' must be locked (and remains locked).
 *
 * NOTE: The veriexec file entry pointer (vfep) will be returned LOCKED
 *       on no error.
 */
static int
veriexec_file_verify(struct lwp *l, struct vnode *vp, const u_char *name,
    int flag, int file_lock_state, struct veriexec_file_entry **vfep)
{
        struct veriexec_file_entry *vfe;
        int error = 0;

        KASSERT(rw_lock_held(&veriexec_op_lock));
        KASSERT(file_lock_state != VERIEXEC_LOCKED);
        KASSERT(file_lock_state != VERIEXEC_UNLOCKED);

#define VFE_NEEDS_EVAL(vfe) ((vfe->status == FINGERPRINT_NOTEVAL) || \
                             (vfe->type & VERIEXEC_UNTRUSTED))

        if (vfep != NULL)
                *vfep = NULL;

        if (vp->v_type != VREG)
                return (0);

        /* Lookup veriexec table entry, save pointer if requested. */
        vfe = veriexec_get(vp);
        if (vfep != NULL)
                *vfep = vfe;

        /* No entry in the veriexec tables. */
        if (vfe == NULL) {
                veriexec_file_report(NULL, "No entry.", name,
                    l, REPORT_VERBOSE);

                /*
                 * Lockdown mode: Deny access to non-monitored files.
                 * IPS mode: Deny execution of non-monitored files.
                 */
                if ((veriexec_strict >= VERIEXEC_LOCKDOWN) ||
                    ((veriexec_strict >= VERIEXEC_IPS) &&
                     (flag != VERIEXEC_FILE)))
                        return (EPERM);

                return (0);
        }

        /*
         * Grab the lock for the entry, if we need to do an evaluation
         * then the lock is a write lock, after we have the write
         * lock, check if we really need it - some other thread may
         * have already done the work for us.
         */
        if (VFE_NEEDS_EVAL(vfe)) {
                rw_enter(&vfe->lock, RW_WRITER);
                if (!VFE_NEEDS_EVAL(vfe))
                        rw_downgrade(&vfe->lock);
        } else
                rw_enter(&vfe->lock, RW_READER);

        /* Evaluate fingerprint if needed. */
        if (VFE_NEEDS_EVAL(vfe)) {
                u_char status;

                error = veriexec_fp_status(l, vp, file_lock_state, vfe, &status);
                if (error) {
                        veriexec_file_report(vfe, "Fingerprint calculation error.",
                            name, NULL, REPORT_ALWAYS);
                        rw_exit(&vfe->lock);
                        return (error);
                }
                vfe->status = status;
                rw_downgrade(&vfe->lock);
        }

        if (!(vfe->type & flag)) {
                veriexec_file_report(vfe, "Incorrect access type.", name, l,
                    REPORT_ALWAYS|REPORT_ALARM);

                /* IPS mode: Enforce access type. */
                if (veriexec_strict >= VERIEXEC_IPS) {
                        rw_exit(&vfe->lock);
                        return (EPERM);
                }
        }

        switch (vfe->status) {
        case FINGERPRINT_NOTEVAL:
                /* Should not happen. */
                rw_exit(&vfe->lock);
                veriexec_file_report(vfe, "Not-evaluated status "
                    "post evaluation; inconsistency detected.", name,
                    NULL, REPORT_ALWAYS|REPORT_PANIC);
                __builtin_unreachable();
                /* NOTREACHED */

        case FINGERPRINT_VALID:
                /* Valid fingerprint. */
                veriexec_file_report(vfe, "Match.", name, NULL,
                    REPORT_VERBOSE);

                break;

        case FINGERPRINT_NOMATCH:
                /* Fingerprint mismatch. */
                veriexec_file_report(vfe, "Mismatch.", name,
                    NULL, REPORT_ALWAYS|REPORT_ALARM);

                /* IDS mode: Deny access on fingerprint mismatch. */
                if (veriexec_strict >= VERIEXEC_IDS) {
                        rw_exit(&vfe->lock);
                        error = EPERM;
                }

                break;

        default:
                /* Should never happen. */
                rw_exit(&vfe->lock);
                veriexec_file_report(vfe, "Invalid status "
                    "post evaluation.", name, NULL, REPORT_ALWAYS|REPORT_PANIC);
                /* NOTREACHED */
        }

        return (error);
}

int
veriexec_verify(struct lwp *l, struct vnode *vp, const u_char *name, int flag,
    bool *found)
{
        struct veriexec_file_entry *vfe;
        int r;

        if (veriexec_bypass && (veriexec_strict == VERIEXEC_LEARNING))
                return 0;

        rw_enter(&veriexec_op_lock, RW_READER);
        r = veriexec_file_verify(l, vp, name, flag, VERIEXEC_FILE_UNLOCKED,
            &vfe);
        rw_exit(&veriexec_op_lock);

        if ((r  == 0) && (vfe != NULL))
                rw_exit(&vfe->lock);

        if (found != NULL)
                *found = (vfe != NULL) ? true : false;

        return (r);
}

/*
 * Veriexec remove policy code.
 */
int
veriexec_removechk(struct lwp *l, struct vnode *vp, const char *pathbuf)
{
        struct veriexec_file_entry *vfe;
        int error;

        if (veriexec_bypass && (veriexec_strict == VERIEXEC_LEARNING))
                return 0;

        rw_enter(&veriexec_op_lock, RW_READER);
        vfe = veriexec_get(vp);
        rw_exit(&veriexec_op_lock);

        if (vfe == NULL) {
                /* Lockdown mode: Deny access to non-monitored files. */
                if (veriexec_strict >= VERIEXEC_LOCKDOWN)
                        return (EPERM);

                return (0);
        }

        veriexec_file_report(vfe, "Remove request.", pathbuf, l,
            REPORT_ALWAYS|REPORT_ALARM);

        /* IDS mode: Deny removal of monitored files. */
        if (veriexec_strict >= VERIEXEC_IDS)
                error = EPERM;
        else
                error = veriexec_file_delete(l, vp);

        return error;
}

/*
 * Veriexec rename policy.
 *
 * XXX: Once there's a way to hook after a successful rename, it would be
 * XXX: nice to update vfe->filename to the new name if it's not NULL and
 * XXX: the new name is absolute (ie., starts with a slash).
 */
int
veriexec_renamechk(struct lwp *l, struct vnode *fromvp, const char *fromname,
    struct vnode *tovp, const char *toname)
{
        struct veriexec_file_entry *fvfe = NULL, *tvfe = NULL;

        if (veriexec_bypass && (veriexec_strict == VERIEXEC_LEARNING))
                return 0;

        rw_enter(&veriexec_op_lock, RW_READER);

        if (veriexec_strict >= VERIEXEC_LOCKDOWN) {
                log(LOG_ALERT, "Veriexec: Preventing rename of `%s' to "
                    "`%s', uid=%u, pid=%u: Lockdown mode.\n", fromname, toname,
                    kauth_cred_geteuid(l->l_cred), l->l_proc->p_pid);
                rw_exit(&veriexec_op_lock);
                return (EPERM);
        }

        fvfe = veriexec_get(fromvp);
        if (tovp != NULL)
                tvfe = veriexec_get(tovp);

        if ((fvfe == NULL) && (tvfe == NULL)) {
                /* None of them is monitored */
                rw_exit(&veriexec_op_lock);
                return 0;
        }

        if (veriexec_strict >= VERIEXEC_IPS) {
                log(LOG_ALERT, "Veriexec: Preventing rename of `%s' "
                    "to `%s', uid=%u, pid=%u: IPS mode, %s "
                    "monitored.\n", fromname, toname,
                    kauth_cred_geteuid(l->l_cred),
                    l->l_proc->p_pid, (fvfe != NULL && tvfe != NULL) ?
                    "files" : "file");
                rw_exit(&veriexec_op_lock);
                return (EPERM);
        }

        if (fvfe != NULL) {
                /*
                 * Monitored file is renamed; filename no longer relevant.
                 */

                /*
                 * XXX: We could keep the buffer, and when (and if) updating the
                 * XXX: filename post-rename, re-allocate it only if it's not
                 * XXX: big enough for the new filename.
                 */

                /* XXX: Get write lock on fvfe here? */

                VERIEXEC_RW_UPGRADE(&veriexec_op_lock);
                /* once we have the op lock in write mode
                 * there should be no locks on any file
                 * entries so we can destroy the object.
                 */

                if (fvfe->filename_len > 0)
                        kmem_free(fvfe->filename, fvfe->filename_len);

                fvfe->filename = NULL;
                fvfe->filename_len = 0;

                rw_downgrade(&veriexec_op_lock);
        }

        log(LOG_NOTICE, "Veriexec: %s file `%s' renamed to "
            "%s file `%s', uid=%u, pid=%u.\n", (fvfe != NULL) ?
            "Monitored" : "Non-monitored", fromname, (tvfe != NULL) ?
            "monitored" : "non-monitored", toname,
            kauth_cred_geteuid(l->l_cred), l->l_proc->p_pid);

        rw_exit(&veriexec_op_lock);

        if (tvfe != NULL) {
                /*
                 * Monitored file is overwritten. Remove the entry.
                 */
                (void)veriexec_file_delete(l, tovp);
        }

        return (0);
}

static void
veriexec_file_free(struct veriexec_file_entry *vfe)
{
        if (vfe != NULL) {
                if (vfe->fp != NULL)
                        kmem_free(vfe->fp, vfe->ops->hash_len);
                if (vfe->filename != NULL)
                        kmem_free(vfe->filename, vfe->filename_len);
                rw_destroy(&vfe->lock);
                kmem_free(vfe, sizeof(*vfe));
        }
}

static void
veriexec_file_purge(struct veriexec_file_entry *vfe, int have_lock)
{
        if (vfe == NULL)
                return;

        if (have_lock == VERIEXEC_UNLOCKED)
                rw_enter(&vfe->lock, RW_WRITER);
        else
                VERIEXEC_RW_UPGRADE(&vfe->lock);

        vfe->status = FINGERPRINT_NOTEVAL;
        if (have_lock == VERIEXEC_UNLOCKED)
                rw_exit(&vfe->lock);
        else
                rw_downgrade(&vfe->lock);
}

static void
veriexec_file_purge_cb(struct veriexec_file_entry *vfe, void *cookie)
{
        veriexec_file_purge(vfe, VERIEXEC_UNLOCKED);
}

/*
 * Invalidate a Veriexec file entry.
 * XXX: This should be updated when per-page fingerprints are added.
 */
void
veriexec_purge(struct vnode *vp)
{
        rw_enter(&veriexec_op_lock, RW_READER);
        veriexec_file_purge(veriexec_get(vp), VERIEXEC_UNLOCKED);
        rw_exit(&veriexec_op_lock);
}

/*
 * Enforce raw disk access policy.
 *
 * IDS mode: Invalidate fingerprints on a mount if it's opened for writing.
 * IPS mode: Don't allow raw writing to disks we monitor.
 * Lockdown mode: Don't allow raw writing to all disks.
 *
 * XXX: This is bogus. There's an obvious race condition between the time
 * XXX: the disk is open for writing, in which an attacker can access a
 * XXX: monitored file to get its signature cached again, and when the raw
 * XXX: file is overwritten on disk.
 * XXX:
 * XXX: To solve this, we need something like the following:
 * XXX:                open raw disk:
 * XXX:                  - raise refcount,
 * XXX:                  - invalidate fingerprints,
 * XXX:                  - mark all entries for that disk with "no cache" flag
 * XXX:
 * XXX:                veriexec_verify:
 * XXX:                  - if "no cache", don't cache evaluation result
 * XXX:
 * XXX:                close raw disk:
 * XXX:                  - lower refcount,
 * XXX:                  - if refcount == 0, remove "no cache" flag from all entries
 */
static int
veriexec_raw_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        int result;
        enum kauth_device_req req;
        struct veriexec_table_entry *vte;

        result = KAUTH_RESULT_DENY;
        req = (enum kauth_device_req)(uintptr_t)arg0;

        switch (action) {
        case KAUTH_DEVICE_RAWIO_SPEC: {
                struct vnode *vp, *bvp;
                int error;

                if (req == KAUTH_REQ_DEVICE_RAWIO_SPEC_READ) {
                        result = KAUTH_RESULT_DEFER;
                        break;
                }

                vp = arg1;
                KASSERT(vp != NULL);

                /* Handle /dev/mem and /dev/kmem. */
                if (iskmemvp(vp)) {
                        if (veriexec_strict < VERIEXEC_IPS)
                                result = KAUTH_RESULT_DEFER;

                        break;
                }

                error = rawdev_mounted(vp, &bvp);
                if (error == EINVAL) {
                        result = KAUTH_RESULT_DEFER;
                        break;
                }

                /*
                 * XXX: See vfs_mountedon() comment in rawdev_mounted().
                 */
                vte = veriexec_table_lookup(bvp->v_mount);
                if (vte == NULL) {
                        result = KAUTH_RESULT_DEFER;
                        break;
                }

                switch (veriexec_strict) {
                case VERIEXEC_LEARNING:
                case VERIEXEC_IDS:
                        result = KAUTH_RESULT_DEFER;

                        rw_enter(&veriexec_op_lock, RW_WRITER);
                        fileassoc_table_run(bvp->v_mount, veriexec_hook,
                            (fileassoc_cb_t)veriexec_file_purge_cb, NULL);
                        rw_exit(&veriexec_op_lock);

                        break;
                case VERIEXEC_IPS:
                        result = KAUTH_RESULT_DENY;
                        break;
                case VERIEXEC_LOCKDOWN:
                        result = KAUTH_RESULT_DENY;
                        break;
                }

                break;
                }

        case KAUTH_DEVICE_RAWIO_PASSTHRU:
                /* XXX What can we do here? */
                if (veriexec_strict < VERIEXEC_IPS)
                        result = KAUTH_RESULT_DEFER;

                break;

        default:
                result = KAUTH_RESULT_DEFER;
                break;
        }

        return (result);
}

/*
 * Create a new Veriexec table.
 */
static struct veriexec_table_entry *
veriexec_table_add(struct lwp *l, struct mount *mp)
{
        struct veriexec_table_entry *vte;
        u_char buf[16];

        vte = kmem_zalloc(sizeof(*vte), KM_SLEEP);
        mount_setspecific(mp, veriexec_mountspecific_key, vte);

        snprintf(buf, sizeof(buf), "table%u", veriexec_tablecount++);
        sysctl_createv(NULL, 0, &veriexec_count_node, &vte->vte_node,
                       0, CTLTYPE_NODE, buf, NULL, NULL, 0, NULL,
                       0, CTL_CREATE, CTL_EOL);

        sysctl_createv(NULL, 0, &vte->vte_node, NULL,
                       CTLFLAG_READONLY, CTLTYPE_STRING, "mntpt",
                       NULL, NULL, 0, mp->mnt_stat.f_mntonname,
                       0, CTL_CREATE, CTL_EOL);
        sysctl_createv(NULL, 0, &vte->vte_node, NULL,
                       CTLFLAG_READONLY, CTLTYPE_STRING, "fstype",
                       NULL, NULL, 0, mp->mnt_stat.f_fstypename,
                       0, CTL_CREATE, CTL_EOL);
        sysctl_createv(NULL, 0, &vte->vte_node, NULL,
                       CTLFLAG_READONLY, CTLTYPE_QUAD, "nentries",
                       NULL, NULL, 0, &vte->vte_count, 0, CTL_CREATE, CTL_EOL);

        return (vte);
}

/*
 * Add a file to be monitored by Veriexec.
 *
 * Expected elements in dict:
 *     file, fp, fp-type, entry-type, keep-filename, eval-on-load.
 */
int
veriexec_file_add(struct lwp *l, prop_dictionary_t dict)
{
        struct veriexec_table_entry *vte;
        struct veriexec_file_entry *vfe = NULL;
        struct veriexec_file_entry *ovfe;
        struct vnode *vp;
        const char *file, *fp_type;
        int error;
        bool ignore_dup = false;

        if (!prop_dictionary_get_string(dict, "file", &file))
                return (EINVAL);

        error = namei_simple_kernel(file, NSM_FOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        /* Add only regular files. */
        if (vp->v_type != VREG) {
                log(LOG_ERR, "Veriexec: Not adding `%s': Not a regular file.\n",
                    file);
                error = EBADF;
                goto out;
        }

        vfe = kmem_zalloc(sizeof(*vfe), KM_SLEEP);
        rw_init(&vfe->lock);

        /* Lookup fingerprint hashing algorithm. */
        fp_type = prop_string_value(prop_dictionary_get(dict, "fp-type"));
        if ((vfe->ops = veriexec_fpops_lookup(fp_type)) == NULL) {
                log(LOG_ERR, "Veriexec: Invalid or unknown fingerprint type "
                    "`%s' for file `%s'.\n", fp_type, file);
                error = EOPNOTSUPP;
                goto out;
        }

        if (prop_data_size(prop_dictionary_get(dict, "fp")) !=
            vfe->ops->hash_len) {
                log(LOG_ERR, "Veriexec: Bad fingerprint length for `%s'.\n",
                    file);
                error = EINVAL;
                goto out;
        }

        vfe->fp = kmem_alloc(vfe->ops->hash_len, KM_SLEEP);
        memcpy(vfe->fp, prop_data_value(prop_dictionary_get(dict, "fp")),
            vfe->ops->hash_len);

        rw_enter(&veriexec_op_lock, RW_WRITER);

        /* Continue entry initialization. */
        if (prop_dictionary_get_uint8(dict, "entry-type", &vfe->type) == FALSE)
                vfe->type = 0;
        else {
                uint8_t extra_flags;

                extra_flags = vfe->type & ~(VERIEXEC_DIRECT |
                    VERIEXEC_INDIRECT | VERIEXEC_FILE | VERIEXEC_UNTRUSTED);
                if (extra_flags) {
                        log(LOG_NOTICE, "Veriexec: Contaminated flags `0x%x' "
                            "for `%s', skipping.\n", extra_flags, file);
                        error = EINVAL;
                        goto unlock_out;
                }
        }
        if (!(vfe->type & (VERIEXEC_DIRECT | VERIEXEC_INDIRECT |
            VERIEXEC_FILE)))
                vfe->type |= VERIEXEC_DIRECT;

        vfe->status = FINGERPRINT_NOTEVAL;
        if (prop_bool_true(prop_dictionary_get(dict, "keep-filename"))) {
                vfe->filename = kmem_strdupsize(file, &vfe->filename_len,
                    KM_SLEEP);
        } else
                vfe->filename = NULL;

        if (prop_bool_true(prop_dictionary_get(dict, "eval-on-load")) ||
            (vfe->type & VERIEXEC_UNTRUSTED)) {
                u_char status;

                error = veriexec_fp_status(l, vp, VERIEXEC_FILE_UNLOCKED,
                    vfe, &status);
                if (error)
                        goto unlock_out;
                vfe->status = status;
        }

        /*
         * If we already have an entry for this file, and it matches
         * the new entry exactly (except for the filename, which may
         * hard-linked!), we just ignore the new entry.  If the new
         * entry differs, report the error.
         */
        if ((ovfe = veriexec_get(vp)) != NULL) {
                error = EEXIST;
                if (vfe->type == ovfe->type &&
                    vfe->status == ovfe->status &&
                    vfe->ops == ovfe->ops &&
                    memcmp(vfe->fp, ovfe->fp, vfe->ops->hash_len) == 0)
                        ignore_dup = true;
                goto unlock_out;
        }

        vte = veriexec_table_lookup(vp->v_mount);
        if (vte == NULL)
                vte = veriexec_table_add(l, vp->v_mount);

        /* XXX if we bail below this, we might want to gc newly created vtes. */

        error = fileassoc_add(vp, veriexec_hook, vfe);
        if (error)
                goto unlock_out;

        vte->vte_count++;

        veriexec_file_report(NULL, "New entry.", file, NULL, REPORT_DEBUG);
        veriexec_bypass = 0;

  unlock_out:
        rw_exit(&veriexec_op_lock);

  out:
        vrele(vp);
        if (error)
                veriexec_file_free(vfe);

        if (ignore_dup && error == EEXIST)
                error = 0;

        return (error);
}

int
veriexec_table_delete(struct lwp *l, struct mount *mp)
{
        struct veriexec_table_entry *vte;

        vte = veriexec_table_lookup(mp);
        if (vte == NULL)
                return (ENOENT);

        veriexec_mountspecific_dtor(vte);
        mount_setspecific(mp, veriexec_mountspecific_key, NULL);

        return (fileassoc_table_clear(mp, veriexec_hook));
}

int
veriexec_file_delete(struct lwp *l, struct vnode *vp)
{
        struct veriexec_table_entry *vte;
        int error;

        vte = veriexec_table_lookup(vp->v_mount);
        if (vte == NULL)
                return (ENOENT);

        rw_enter(&veriexec_op_lock, RW_WRITER);
        error = fileassoc_clear(vp, veriexec_hook);
        rw_exit(&veriexec_op_lock);
        if (!error) {
                KASSERT(vte->vte_count > 0);
                vte->vte_count--;
        }

        return (error);
}

/*
 * Convert Veriexec entry data to a dictionary readable by userland tools.
 */
static void
veriexec_file_convert(struct veriexec_file_entry *vfe, prop_dictionary_t rdict)
{
        if (vfe->filename)
                prop_dictionary_set(rdict, "file",
                    prop_string_create_copy(vfe->filename));
        prop_dictionary_set_uint8(rdict, "entry-type", vfe->type);
        prop_dictionary_set_uint8(rdict, "status", vfe->status);
        prop_dictionary_set(rdict, "fp-type",
            prop_string_create_copy(vfe->ops->type));
        prop_dictionary_set(rdict, "fp",
            prop_data_create_copy(vfe->fp, vfe->ops->hash_len));
}

int
veriexec_convert(struct vnode *vp, prop_dictionary_t rdict)
{
        struct veriexec_file_entry *vfe;

        rw_enter(&veriexec_op_lock, RW_READER);

        vfe = veriexec_get(vp);
        if (vfe == NULL) {
                rw_exit(&veriexec_op_lock);
                return (ENOENT);
        }

        rw_enter(&vfe->lock, RW_READER);
        veriexec_file_convert(vfe, rdict);
        rw_exit(&vfe->lock);

        rw_exit(&veriexec_op_lock);
        return (0);
}

int
veriexec_unmountchk(struct mount *mp)
{
        int error;

        if ((veriexec_bypass && (veriexec_strict == VERIEXEC_LEARNING))
            || doing_shutdown)
                return (0);

        rw_enter(&veriexec_op_lock, RW_READER);

        switch (veriexec_strict) {
        case VERIEXEC_LEARNING:
                error = 0;
                break;

        case VERIEXEC_IDS:
                if (veriexec_table_lookup(mp) != NULL) {
                        log(LOG_INFO, "Veriexec: IDS mode, allowing unmount "
                            "of \"%s\".\n", mp->mnt_stat.f_mntonname);
                }

                error = 0;
                break;

        case VERIEXEC_IPS: {
                struct veriexec_table_entry *vte;

                vte = veriexec_table_lookup(mp);
                if ((vte != NULL) && (vte->vte_count > 0)) {
                        log(LOG_ALERT, "Veriexec: IPS mode, preventing"
                            " unmount of \"%s\" with monitored files.\n",
                            mp->mnt_stat.f_mntonname);

                        error = EPERM;
                } else
                        error = 0;
                break;
                }

        case VERIEXEC_LOCKDOWN:
        default:
                log(LOG_ALERT, "Veriexec: Lockdown mode, preventing unmount "
                    "of \"%s\".\n", mp->mnt_stat.f_mntonname);
                error = EPERM;
                break;
        }

        rw_exit(&veriexec_op_lock);
        return (error);
}

int
veriexec_openchk(struct lwp *l, struct vnode *vp, const char *path, int fmode)
{
        struct veriexec_file_entry *vfe = NULL;
        int error = 0;

        if (veriexec_bypass && (veriexec_strict == VERIEXEC_LEARNING))
                return 0;

        if (vp == NULL) {
                /* If no creation requested, let this fail normally. */
                if (!(fmode & O_CREAT))
                        goto out;

                /* Lockdown mode: Prevent creation of new files. */
                if (veriexec_strict >= VERIEXEC_LOCKDOWN) {
                        log(LOG_ALERT, "Veriexec: Preventing new file "
                            "creation in `%s'.\n", path);
                        error = EPERM;
                }

                goto out;
        }

        rw_enter(&veriexec_op_lock, RW_READER);
        error = veriexec_file_verify(l, vp, path, VERIEXEC_FILE,
                                     VERIEXEC_FILE_LOCKED, &vfe);

        if (error) {
                rw_exit(&veriexec_op_lock);
                goto out;
        }

        if ((vfe != NULL) && ((fmode & FWRITE) || (fmode & O_TRUNC))) {
                veriexec_file_report(vfe, "Write access request.", path, l,
                    REPORT_ALWAYS | REPORT_ALARM);

                /* IPS mode: Deny write access to monitored files. */
                if (veriexec_strict >= VERIEXEC_IPS)
                        error = EPERM;
                else
                        veriexec_file_purge(vfe, VERIEXEC_LOCKED);
        }

        if (vfe != NULL)
                rw_exit(&vfe->lock);

        rw_exit(&veriexec_op_lock);
 out:
        return (error);
}

static void
veriexec_file_dump(struct veriexec_file_entry *vfe, prop_array_t entries)
{
        prop_dictionary_t entry;

        /* If we don't have a filename, this is meaningless. */
        if (vfe->filename == NULL)
                return;

        entry = prop_dictionary_create();

        veriexec_file_convert(vfe, entry);

        prop_array_add(entries, entry);
}

int
veriexec_dump(struct lwp *l, prop_array_t rarray)
{
        mount_iterator_t *iter;
        struct mount *mp;

        mountlist_iterator_init(&iter);
        while ((mp = mountlist_iterator_next(iter)) != NULL) {
                fileassoc_table_run(mp, veriexec_hook,
                    (fileassoc_cb_t)veriexec_file_dump, rarray);
        }
        mountlist_iterator_destroy(iter);

        return (0);
}

int
veriexec_flush(struct lwp *l)
{
        mount_iterator_t *iter;
        struct mount *mp;
        int error = 0;

        mountlist_iterator_init(&iter);
        while ((mp = mountlist_iterator_next(iter)) != NULL) {
                int lerror;

                lerror = veriexec_table_delete(l, mp);
                if (lerror && lerror != ENOENT)
                        error = lerror;
        }
        mountlist_iterator_destroy(iter);

        return (error);
}




































































    8 






    6 
    7 





    7 
    4 




    5 


















































   26 



   26 






   18 
    5 



   18 
   24 

















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/*        $NetBSD: kern_50.c,v 1.3 2020/01/29 15:47:51 ad Exp $        */

/*-
 * Copyright (c) 2008, 2009, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_50.c,v 1.3 2020/01/29 15:47:51 ad Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/lwp.h>
#include <sys/proc.h>
#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>

#include <compat/sys/resource.h>
#include <compat/sys/time.h>

#include <compat/common/compat_mod.h>

static const struct syscall_package kern_50_syscalls[] = {
        { SYS_compat_50__lwp_park, 0, (sy_call_t *)compat_50_sys__lwp_park },
        { SYS_compat_50___sigtimedwait, 0,
            (sy_call_t *)compat_50_sys___sigtimedwait },
        { SYS_compat_50_wait4, 0, (sy_call_t *)compat_50_sys_wait4 },
        { 0, 0, NULL }
};

int
compat_50_sys__lwp_park(struct lwp *l,
    const struct compat_50_sys__lwp_park_args *uap, register_t *retval)
{
        /* {
                syscallarg(const struct timespec50 *)        ts;
                syscallarg(lwpid_t)                        unpark;
                syscallarg(const void *)                hint;
                syscallarg(const void *)                unparkhint;
        } */
        struct timespec ts, *tsp;
        struct timespec50 ts50;
        int error;

        if (SCARG(uap, ts) == NULL)
                tsp = NULL;
        else {
                error = copyin(SCARG(uap, ts), &ts50, sizeof(ts50));
                if (error != 0)
                        return error;
                timespec50_to_timespec(&ts50, &ts);
                tsp = &ts;
        }

        if (SCARG(uap, unpark) != 0) {
                error = lwp_unpark(&SCARG(uap, unpark), 1);
                if (error != 0)
                        return error;
        }

        return lwp_park(CLOCK_REALTIME, TIMER_ABSTIME, tsp);
}

static int
tscopyin(const void *u, void *s, size_t len)
{
        struct timespec50 ts50;
        int error;

        KASSERT(len == sizeof(struct timespec));
        error = copyin(u, &ts50, sizeof(ts50));
        if (error)
                return error;
        timespec50_to_timespec(&ts50, s);
        return 0;
}

static int
tscopyout(const void *s, void *u, size_t len)
{
        struct timespec50 ts50;

        KASSERT(len == sizeof(struct timespec));
        timespec_to_timespec50(s, &ts50);
        return copyout(&ts50, u, sizeof(ts50));
}

int
compat_50_sys___sigtimedwait(struct lwp *l,
    const struct compat_50_sys___sigtimedwait_args *uap, register_t *retval)
{
        int res;

        res = sigtimedwait1(l,
            (const struct sys_____sigtimedwait50_args *)uap, retval, copyin,
            copyout, tscopyin, tscopyout);
        if (!res)
                *retval = 0; /* XXX NetBSD<=5 was not POSIX compliant */
        return res;
}

int
compat_50_sys_wait4(struct lwp *l, const struct compat_50_sys_wait4_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                        pid;
                syscallarg(int *)                status;
                syscallarg(int)                        options;
                syscallarg(struct rusage50 *)        rusage;
        } */
        int status, error, pid = SCARG(uap, pid);
        struct rusage50 ru50;
        struct rusage ru;

        error = do_sys_wait(&pid, &status, SCARG(uap, options),
            SCARG(uap, rusage) != NULL ? &ru : NULL);

        retval[0] = pid;
        if (pid == 0)
                return error;

        if (SCARG(uap, rusage)) {
                rusage_to_rusage50(&ru, &ru50);
                error = copyout(&ru50, SCARG(uap, rusage), sizeof(ru50));
        }

        if (error == 0 && SCARG(uap, status))
                error = copyout(&status, SCARG(uap, status), sizeof(status));

        return error;
}

int
kern_50_init(void)
{

        return syscall_establish(NULL, kern_50_syscalls);
}

int
kern_50_fini(void)
{

        return syscall_disestablish(NULL, kern_50_syscalls);
}
































































































































































































































































































































































































































































































































































































    7 



    7 


    7 








    7 






























    3 














    3 


    3 


















    2 




















    2 









    2 























































































































    2 














    2 




























































    2 












    5 

    5 





    5 
































   66 
















   65 



   59 




















   66 












    2 



    2 









    3 










    6 

    6 


    6 


    2 
    4 
    2 
    6 

    6 












    2 






    3 








    2 

    2 




















    3 











    2 











    2 





    2 










    2 




    5 









    2 

    2 










    2 


    1 


    1 












    2 














    3 


    2 


    1 












    3 










    2 









    2 








    1 






    3 





    3 



    1 






    1 









    2 




    1 











    1 






    1 











    2 






    2 




    2 
    1 

















    4 

    4 



    1 




    1 












    2 




    1 


    1 


    1 


    1 


    2 






























    2 

    1 
    1 

    1 

    2 
    2 
    2 



    2 






    2 
    2 


    2 



















































    2 

























    2 













    1 

    1 



    1 







    1 


    2 


































































































































































































































































































































































































































































































































































    1 

    1 

    1 

    1 





















    3 
    1 


    3 



    3 












    3 























































    1 


















    1 
    1 
























    1 



    1 










    4 

    4 





    4 






































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
/*        $NetBSD: bpf.c,v 1.246 2022/03/15 13:00:44 riastradh Exp $        */

/*
 * Copyright (c) 1990, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from the Stanford/CMU enet packet filter,
 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
 * Berkeley Laboratory.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)bpf.c        8.4 (Berkeley) 1/9/95
 * static char rcsid[] =
 * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: bpf.c,v 1.246 2022/03/15 13:00:44 riastradh Exp $");

#if defined(_KERNEL_OPT)
#include "opt_bpf.h"
#include "sl.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/buf.h>
#include <sys/time.h>
#include <sys/proc.h>
#include <sys/ioctl.h>
#include <sys/conf.h>
#include <sys/vnode.h>
#include <sys/queue.h>
#include <sys/stat.h>
#include <sys/module.h>
#include <sys/atomic.h>
#include <sys/cpu.h>

#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/tty.h>
#include <sys/uio.h>

#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/poll.h>
#include <sys/sysctl.h>
#include <sys/kauth.h>
#include <sys/syslog.h>
#include <sys/percpu.h>
#include <sys/pserialize.h>
#include <sys/lwp.h>
#include <sys/xcall.h>

#include <net/if.h>
#include <net/slip.h>

#include <net/bpf.h>
#include <net/bpfdesc.h>
#include <net/bpfjit.h>

#include <net/if_arc.h>
#include <net/if_ether.h>

#include <netinet/in.h>
#include <netinet/if_inarp.h>


#include <compat/sys/sockio.h>

#ifndef BPF_BUFSIZE
/*
 * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet
 * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k).
 */
# define BPF_BUFSIZE 32768
#endif

#define PRINET  26                        /* interruptible */

/*
 * The default read buffer size, and limit for BIOCSBLEN, is sysctl'able.
 * XXX the default values should be computed dynamically based
 * on available memory size and available mbuf clusters.
 */
static int bpf_bufsize = BPF_BUFSIZE;
static int bpf_maxbufsize = BPF_DFLTBUFSIZE;        /* XXX set dynamically, see above */
static bool bpf_jit = false;

struct bpfjit_ops bpfjit_module_ops = {
        .bj_generate_code = NULL,
        .bj_free_code = NULL
};

/*
 * Global BPF statistics returned by net.bpf.stats sysctl.
 */
static struct percpu        *bpf_gstats_percpu; /* struct bpf_stat */

#define BPF_STATINC(id)                                        \
        {                                                \
                struct bpf_stat *__stats =                \
                    percpu_getref(bpf_gstats_percpu);        \
                __stats->bs_##id++;                        \
                percpu_putref(bpf_gstats_percpu);        \
        }

/*
 * Locking notes:
 * - bpf_mtx (adaptive mutex) protects:
 *   - Gobal lists: bpf_iflist and bpf_dlist
 *   - struct bpf_if
 *   - bpf_close
 *   - bpf_psz (pserialize)
 * - struct bpf_d has two mutexes:
 *   - bd_buf_mtx (spin mutex) protects the buffers that can be accessed
 *     on packet tapping
 *   - bd_mtx (adaptive mutex) protects member variables other than the buffers
 * - Locking order: bpf_mtx => bpf_d#bd_mtx => bpf_d#bd_buf_mtx
 * - struct bpf_d obtained via fp->f_bpf in bpf_read and bpf_write is
 *   never freed because struct bpf_d is only freed in bpf_close and
 *   bpf_close never be called while executing bpf_read and bpf_write
 * - A filter that is assigned to bpf_d can be replaced with another filter
 *   while tapping packets, so it needs to be done atomically
 * - struct bpf_d is iterated on bpf_dlist with psz
 * - struct bpf_if is iterated on bpf_iflist with psz or psref
 */
/*
 * Use a mutex to avoid a race condition between gathering the stats/peers
 * and opening/closing the device.
 */
static kmutex_t bpf_mtx;

static struct psref_class        *bpf_psref_class __read_mostly;
static pserialize_t                bpf_psz;

static inline void
bpf_if_acquire(struct bpf_if *bp, struct psref *psref)
{

        psref_acquire(psref, &bp->bif_psref, bpf_psref_class);
}

static inline void
bpf_if_release(struct bpf_if *bp, struct psref *psref)
{

        psref_release(psref, &bp->bif_psref, bpf_psref_class);
}

/*
 *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
 *  bpf_dtab holds the descriptors, indexed by minor device #
 */
static struct pslist_head bpf_iflist;
static struct pslist_head bpf_dlist;

/* Macros for bpf_d on bpf_dlist */
#define BPF_DLIST_WRITER_INSERT_HEAD(__d)                                \
        PSLIST_WRITER_INSERT_HEAD(&bpf_dlist, (__d), bd_bpf_dlist_entry)
#define BPF_DLIST_READER_FOREACH(__d)                                        \
        PSLIST_READER_FOREACH((__d), &bpf_dlist, struct bpf_d,                \
                              bd_bpf_dlist_entry)
#define BPF_DLIST_WRITER_FOREACH(__d)                                        \
        PSLIST_WRITER_FOREACH((__d), &bpf_dlist, struct bpf_d,                \
                              bd_bpf_dlist_entry)
#define BPF_DLIST_ENTRY_INIT(__d)                                        \
        PSLIST_ENTRY_INIT((__d), bd_bpf_dlist_entry)
#define BPF_DLIST_WRITER_REMOVE(__d)                                        \
        PSLIST_WRITER_REMOVE((__d), bd_bpf_dlist_entry)
#define BPF_DLIST_ENTRY_DESTROY(__d)                                        \
        PSLIST_ENTRY_DESTROY((__d), bd_bpf_dlist_entry)

/* Macros for bpf_if on bpf_iflist */
#define BPF_IFLIST_WRITER_INSERT_HEAD(__bp)                                \
        PSLIST_WRITER_INSERT_HEAD(&bpf_iflist, (__bp), bif_iflist_entry)
#define BPF_IFLIST_READER_FOREACH(__bp)                                        \
        PSLIST_READER_FOREACH((__bp), &bpf_iflist, struct bpf_if,        \
                              bif_iflist_entry)
#define BPF_IFLIST_WRITER_FOREACH(__bp)                                        \
        PSLIST_WRITER_FOREACH((__bp), &bpf_iflist, struct bpf_if,        \
                              bif_iflist_entry)
#define BPF_IFLIST_WRITER_REMOVE(__bp)                                        \
        PSLIST_WRITER_REMOVE((__bp), bif_iflist_entry)
#define BPF_IFLIST_ENTRY_INIT(__bp)                                        \
        PSLIST_ENTRY_INIT((__bp), bif_iflist_entry)
#define BPF_IFLIST_ENTRY_DESTROY(__bp)                                        \
        PSLIST_ENTRY_DESTROY((__bp), bif_iflist_entry)

/* Macros for bpf_d on bpf_if#bif_dlist_pslist */
#define BPFIF_DLIST_READER_FOREACH(__d, __bp)                                \
        PSLIST_READER_FOREACH((__d), &(__bp)->bif_dlist_head, struct bpf_d, \
                              bd_bif_dlist_entry)
#define BPFIF_DLIST_WRITER_INSERT_HEAD(__bp, __d)                        \
        PSLIST_WRITER_INSERT_HEAD(&(__bp)->bif_dlist_head, (__d),        \
                                  bd_bif_dlist_entry)
#define BPFIF_DLIST_WRITER_REMOVE(__d)                                        \
        PSLIST_WRITER_REMOVE((__d), bd_bif_dlist_entry)
#define BPFIF_DLIST_ENTRY_INIT(__d)                                        \
        PSLIST_ENTRY_INIT((__d), bd_bif_dlist_entry)
#define        BPFIF_DLIST_READER_EMPTY(__bp)                                        \
        (PSLIST_READER_FIRST(&(__bp)->bif_dlist_head, struct bpf_d,        \
                             bd_bif_dlist_entry) == NULL)
#define        BPFIF_DLIST_WRITER_EMPTY(__bp)                                        \
        (PSLIST_WRITER_FIRST(&(__bp)->bif_dlist_head, struct bpf_d,        \
                             bd_bif_dlist_entry) == NULL)
#define BPFIF_DLIST_ENTRY_DESTROY(__d)                                        \
        PSLIST_ENTRY_DESTROY((__d), bd_bif_dlist_entry)

static int        bpf_allocbufs(struct bpf_d *);
static u_int        bpf_xfilter(struct bpf_filter **, void *, u_int, u_int);
static void        bpf_deliver(struct bpf_if *,
                            void *(*cpfn)(void *, const void *, size_t),
                            void *, u_int, u_int, const u_int);
static void        bpf_freed(struct bpf_d *);
static void        bpf_free_filter(struct bpf_filter *);
static void        bpf_ifname(struct ifnet *, struct ifreq *);
static void        *bpf_mcpy(void *, const void *, size_t);
static int        bpf_movein(struct uio *, int, uint64_t,
                                struct mbuf **, struct sockaddr *,
                                struct bpf_filter **);
static void        bpf_attachd(struct bpf_d *, struct bpf_if *);
static void        bpf_detachd(struct bpf_d *);
static int        bpf_setif(struct bpf_d *, struct ifreq *);
static int        bpf_setf(struct bpf_d *, struct bpf_program *, u_long);
static void        bpf_timed_out(void *);
static inline void
                bpf_wakeup(struct bpf_d *);
static int        bpf_hdrlen(struct bpf_d *);
static void        catchpacket(struct bpf_d *, u_char *, u_int, u_int,
    void *(*)(void *, const void *, size_t), struct timespec *);
static void        reset_d(struct bpf_d *);
static int        bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
static int        bpf_setdlt(struct bpf_d *, u_int);

static int        bpf_read(struct file *, off_t *, struct uio *, kauth_cred_t,
    int);
static int        bpf_write(struct file *, off_t *, struct uio *, kauth_cred_t,
    int);
static int        bpf_ioctl(struct file *, u_long, void *);
static int        bpf_poll(struct file *, int);
static int        bpf_stat(struct file *, struct stat *);
static int        bpf_close(struct file *);
static int        bpf_kqfilter(struct file *, struct knote *);

static const struct fileops bpf_fileops = {
        .fo_name = "bpf",
        .fo_read = bpf_read,
        .fo_write = bpf_write,
        .fo_ioctl = bpf_ioctl,
        .fo_fcntl = fnullop_fcntl,
        .fo_poll = bpf_poll,
        .fo_stat = bpf_stat,
        .fo_close = bpf_close,
        .fo_kqfilter = bpf_kqfilter,
        .fo_restart = fnullop_restart,
};

dev_type_open(bpfopen);

const struct cdevsw bpf_cdevsw = {
        .d_open = bpfopen,
        .d_close = noclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = noioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER | D_MPSAFE
};

bpfjit_func_t
bpf_jit_generate(bpf_ctx_t *bc, void *code, size_t size)
{
        struct bpfjit_ops *ops = &bpfjit_module_ops;
        bpfjit_func_t (*generate_code)(const bpf_ctx_t *,
            const struct bpf_insn *, size_t);

        generate_code = atomic_load_acquire(&ops->bj_generate_code);
        if (generate_code != NULL) {
                return generate_code(bc, code, size);
        }
        return NULL;
}

void
bpf_jit_freecode(bpfjit_func_t jcode)
{
        KASSERT(bpfjit_module_ops.bj_free_code != NULL);
        bpfjit_module_ops.bj_free_code(jcode);
}

static int
bpf_movein(struct uio *uio, int linktype, uint64_t mtu, struct mbuf **mp,
           struct sockaddr *sockp, struct bpf_filter **wfilter)
{
        struct mbuf *m, *m0, *n;
        int error;
        size_t len;
        size_t hlen;
        size_t align;
        u_int slen;

        /*
         * Build a sockaddr based on the data link layer type.
         * We do this at this level because the ethernet header
         * is copied directly into the data field of the sockaddr.
         * In the case of SLIP, there is no header and the packet
         * is forwarded as is.
         * Also, we are careful to leave room at the front of the mbuf
         * for the link level header.
         */
        switch (linktype) {

        case DLT_SLIP:
                sockp->sa_family = AF_INET;
                hlen = 0;
                align = 0;
                break;

        case DLT_PPP:
                sockp->sa_family = AF_UNSPEC;
                hlen = 0;
                align = 0;
                break;

        case DLT_EN10MB:
                sockp->sa_family = AF_UNSPEC;
                /* XXX Would MAXLINKHDR be better? */
                 /* 6(dst)+6(src)+2(type) */
                hlen = sizeof(struct ether_header);
                align = 2;
                break;

        case DLT_ARCNET:
                sockp->sa_family = AF_UNSPEC;
                hlen = ARC_HDRLEN;
                align = 5;
                break;

        case DLT_FDDI:
                sockp->sa_family = AF_LINK;
                /* XXX 4(FORMAC)+6(dst)+6(src) */
                hlen = 16;
                align = 0;
                break;

        case DLT_ECONET:
                sockp->sa_family = AF_UNSPEC;
                hlen = 6;
                align = 2;
                break;

        case DLT_NULL:
                sockp->sa_family = AF_UNSPEC;
                hlen = 0;
                align = 0;
                break;

        default:
                return (EIO);
        }

        len = uio->uio_resid;
        /*
         * If there aren't enough bytes for a link level header or the
         * packet length exceeds the interface mtu, return an error.
         */
        if (len - hlen > mtu)
                return (EMSGSIZE);

        m0 = m = m_gethdr(M_WAIT, MT_DATA);
        m_reset_rcvif(m);
        m->m_pkthdr.len = (int)(len - hlen);
        if (len + align > MHLEN) {
                m_clget(m, M_WAIT);
                if ((m->m_flags & M_EXT) == 0) {
                        error = ENOBUFS;
                        goto bad;
                }
        }

        /* Insure the data is properly aligned */
        if (align > 0)
                m->m_data += align;

        for (;;) {
                len = M_TRAILINGSPACE(m);
                if (len > uio->uio_resid)
                        len = uio->uio_resid;
                error = uiomove(mtod(m, void *), len, uio);
                if (error)
                        goto bad;
                m->m_len = len;

                if (uio->uio_resid == 0)
                        break;

                n = m_get(M_WAIT, MT_DATA);
                m_clget(n, M_WAIT);        /* if fails, there is no problem */
                m->m_next = n;
                m = n;
        }

        slen = bpf_xfilter(wfilter, mtod(m, u_char *), len, len);
        if (slen == 0) {
                error = EPERM;
                goto bad;
        }

        if (hlen != 0) {
                /* move link level header in the top of mbuf to sa_data */
                memcpy(sockp->sa_data, mtod(m0, void *), hlen);
                m0->m_data += hlen;
                m0->m_len -= hlen;
        }

        *mp = m0;
        return (0);

bad:
        m_freem(m0);
        return (error);
}

/*
 * Attach file to the bpf interface, i.e. make d listen on bp.
 */
static void
bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
{
        struct bpf_event_tracker *t;

        KASSERT(mutex_owned(&bpf_mtx));
        KASSERT(mutex_owned(d->bd_mtx));
        /*
         * Point d at bp, and add d to the interface's list of listeners.
         * Finally, point the driver's bpf cookie at the interface so
         * it will divert packets to bpf.
         */
        d->bd_bif = bp;
        BPFIF_DLIST_WRITER_INSERT_HEAD(bp, d);

        *bp->bif_driverp = bp;

        SLIST_FOREACH(t, &bp->bif_trackers, bet_entries) {
                t->bet_notify(bp, bp->bif_ifp, bp->bif_dlt,
                    BPF_TRACK_EVENT_ATTACH);
        }
}

/*
 * Detach a file from its interface.
 */
static void
bpf_detachd(struct bpf_d *d)
{
        struct bpf_if *bp;
        struct bpf_event_tracker *t;

        KASSERT(mutex_owned(&bpf_mtx));
        KASSERT(mutex_owned(d->bd_mtx));

        bp = d->bd_bif;
        /*
         * Check if this descriptor had requested promiscuous mode.
         * If so, turn it off.
         */
        if (d->bd_promisc) {
                int error __diagused;

                d->bd_promisc = 0;
                /*
                 * Take device out of promiscuous mode.  Since we were
                 * able to enter promiscuous mode, we should be able
                 * to turn it off.  But we can get an error if
                 * the interface was configured down, so only panic
                 * if we don't get an unexpected error.
                 */
                KERNEL_LOCK_UNLESS_NET_MPSAFE();
                  error = ifpromisc(bp->bif_ifp, 0);
                KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
#ifdef DIAGNOSTIC
                if (error)
                        printf("%s: ifpromisc failed: %d", __func__, error);
#endif
        }

        /* Remove d from the interface's descriptor list. */
        BPFIF_DLIST_WRITER_REMOVE(d);

        pserialize_perform(bpf_psz);

        if (BPFIF_DLIST_WRITER_EMPTY(bp)) {
                /*
                 * Let the driver know that there are no more listeners.
                 */
                *d->bd_bif->bif_driverp = NULL;
        }

        d->bd_bif = NULL;

        SLIST_FOREACH(t, &bp->bif_trackers, bet_entries) {
                t->bet_notify(bp, bp->bif_ifp, bp->bif_dlt,
                    BPF_TRACK_EVENT_DETACH);
        }
}

static void
bpf_init(void)
{

        mutex_init(&bpf_mtx, MUTEX_DEFAULT, IPL_NONE);
        bpf_psz = pserialize_create();
        bpf_psref_class = psref_class_create("bpf", IPL_SOFTNET);

        PSLIST_INIT(&bpf_iflist);
        PSLIST_INIT(&bpf_dlist);

        bpf_gstats_percpu = percpu_alloc(sizeof(struct bpf_stat));

        return;
}

/*
 * bpfilterattach() is called at boot time.  We don't need to do anything
 * here, since any initialization will happen as part of module init code.
 */
/* ARGSUSED */
void
bpfilterattach(int n)
{

}

/*
 * Open ethernet device. Clones.
 */
/* ARGSUSED */
int
bpfopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct bpf_d *d;
        struct file *fp;
        int error, fd;

        /* falloc() will fill in the descriptor for us. */
        if ((error = fd_allocfile(&fp, &fd)) != 0)
                return error;

        d = kmem_zalloc(sizeof(*d), KM_SLEEP);
        d->bd_bufsize = bpf_bufsize;
        d->bd_direction = BPF_D_INOUT;
        d->bd_feedback = 0;
        d->bd_pid = l->l_proc->p_pid;
#ifdef _LP64
        if (curproc->p_flag & PK_32)
                d->bd_compat32 = 1;
#endif
        getnanotime(&d->bd_btime);
        d->bd_atime = d->bd_mtime = d->bd_btime;
        callout_init(&d->bd_callout, CALLOUT_MPSAFE);
        selinit(&d->bd_sel);
        d->bd_jitcode = NULL;
        d->bd_rfilter = NULL;
        d->bd_wfilter = NULL;
        d->bd_locked = 0;
        BPF_DLIST_ENTRY_INIT(d);
        BPFIF_DLIST_ENTRY_INIT(d);
        d->bd_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
        d->bd_buf_mtx = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
        cv_init(&d->bd_cv, "bpf");

        mutex_enter(&bpf_mtx);
        BPF_DLIST_WRITER_INSERT_HEAD(d);
        mutex_exit(&bpf_mtx);

        return fd_clone(fp, fd, flag, &bpf_fileops, d);
}

/*
 * Close the descriptor by detaching it from its interface,
 * deallocating its buffers, and marking it free.
 */
/* ARGSUSED */
static int
bpf_close(struct file *fp)
{
        struct bpf_d *d;

        mutex_enter(&bpf_mtx);

        if ((d = fp->f_bpf) == NULL) {
                mutex_exit(&bpf_mtx);
                return 0;
        }

        /*
         * Refresh the PID associated with this bpf file.
         */
        d->bd_pid = curproc->p_pid;

        mutex_enter(d->bd_mtx);
        if (d->bd_state == BPF_WAITING)
                callout_halt(&d->bd_callout, d->bd_mtx);
        d->bd_state = BPF_IDLE;
        if (d->bd_bif)
                bpf_detachd(d);
        mutex_exit(d->bd_mtx);

        BPF_DLIST_WRITER_REMOVE(d);

        pserialize_perform(bpf_psz);
        mutex_exit(&bpf_mtx);

        BPFIF_DLIST_ENTRY_DESTROY(d);
        BPF_DLIST_ENTRY_DESTROY(d);
        fp->f_bpf = NULL;
        bpf_freed(d);
        callout_destroy(&d->bd_callout);
        seldestroy(&d->bd_sel);
        mutex_obj_free(d->bd_mtx);
        mutex_obj_free(d->bd_buf_mtx);
        cv_destroy(&d->bd_cv);

        kmem_free(d, sizeof(*d));

        return (0);
}

/*
 * Rotate the packet buffers in descriptor d.  Move the store buffer
 * into the hold slot, and the free buffer into the store slot.
 * Zero the length of the new store buffer.
 */
#define ROTATE_BUFFERS(d) \
        (d)->bd_hbuf = (d)->bd_sbuf; \
        (d)->bd_hlen = (d)->bd_slen; \
        (d)->bd_sbuf = (d)->bd_fbuf; \
        (d)->bd_slen = 0; \
        (d)->bd_fbuf = NULL;
/*
 *  bpfread - read next chunk of packets from buffers
 */
static int
bpf_read(struct file *fp, off_t *offp, struct uio *uio,
    kauth_cred_t cred, int flags)
{
        struct bpf_d *d = fp->f_bpf;
        int timed_out;
        int error;

        getnanotime(&d->bd_atime);
        /*
         * Restrict application to use a buffer the same size as
         * the kernel buffers.
         */
        if (uio->uio_resid != d->bd_bufsize)
                return (EINVAL);

        mutex_enter(d->bd_mtx);
        if (d->bd_state == BPF_WAITING)
                callout_halt(&d->bd_callout, d->bd_mtx);
        timed_out = (d->bd_state == BPF_TIMED_OUT);
        d->bd_state = BPF_IDLE;
        mutex_exit(d->bd_mtx);
        /*
         * If the hold buffer is empty, then do a timed sleep, which
         * ends when the timeout expires or when enough packets
         * have arrived to fill the store buffer.
         */
        mutex_enter(d->bd_buf_mtx);
        while (d->bd_hbuf == NULL) {
                if (fp->f_flag & FNONBLOCK) {
                        if (d->bd_slen == 0) {
                                error = EWOULDBLOCK;
                                goto out;
                        }
                        ROTATE_BUFFERS(d);
                        break;
                }

                if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
                        /*
                         * A packet(s) either arrived since the previous
                         * read or arrived while we were asleep.
                         * Rotate the buffers and return what's here.
                         */
                        ROTATE_BUFFERS(d);
                        break;
                }

                error = cv_timedwait_sig(&d->bd_cv, d->bd_buf_mtx, d->bd_rtout);

                if (error == EINTR || error == ERESTART)
                        goto out;

                if (error == EWOULDBLOCK) {
                        /*
                         * On a timeout, return what's in the buffer,
                         * which may be nothing.  If there is something
                         * in the store buffer, we can rotate the buffers.
                         */
                        if (d->bd_hbuf)
                                /*
                                 * We filled up the buffer in between
                                 * getting the timeout and arriving
                                 * here, so we don't need to rotate.
                                 */
                                break;

                        if (d->bd_slen == 0) {
                                error = 0;
                                goto out;
                        }
                        ROTATE_BUFFERS(d);
                        break;
                }
                if (error != 0)
                        goto out;
        }
        /*
         * At this point, we know we have something in the hold slot.
         */
        mutex_exit(d->bd_buf_mtx);

        /*
         * Move data from hold buffer into user space.
         * We know the entire buffer is transferred since
         * we checked above that the read buffer is bpf_bufsize bytes.
         */
        error = uiomove(d->bd_hbuf, d->bd_hlen, uio);

        mutex_enter(d->bd_buf_mtx);
        d->bd_fbuf = d->bd_hbuf;
        d->bd_hbuf = NULL;
        d->bd_hlen = 0;
out:
        mutex_exit(d->bd_buf_mtx);
        return (error);
}


/*
 * If there are processes sleeping on this descriptor, wake them up.
 */
static inline void
bpf_wakeup(struct bpf_d *d)
{

        mutex_enter(d->bd_buf_mtx);
        cv_broadcast(&d->bd_cv);
        mutex_exit(d->bd_buf_mtx);

        if (d->bd_async)
                fownsignal(d->bd_pgid, SIGIO, 0, 0, NULL);
        selnotify(&d->bd_sel, 0, 0);
}

static void
bpf_timed_out(void *arg)
{
        struct bpf_d *d = arg;

        mutex_enter(d->bd_mtx);
        if (d->bd_state == BPF_WAITING) {
                d->bd_state = BPF_TIMED_OUT;
                if (d->bd_slen != 0)
                        bpf_wakeup(d);
        }
        mutex_exit(d->bd_mtx);
}


static int
bpf_write(struct file *fp, off_t *offp, struct uio *uio,
    kauth_cred_t cred, int flags)
{
        struct bpf_d *d = fp->f_bpf;
        struct bpf_if *bp;
        struct ifnet *ifp;
        struct mbuf *m, *mc;
        int error;
        static struct sockaddr_storage dst;
        struct psref psref;
        int bound;

        m = NULL;        /* XXX gcc */

        bound = curlwp_bind();
        mutex_enter(d->bd_mtx);
        bp = d->bd_bif;
        if (bp == NULL) {
                mutex_exit(d->bd_mtx);
                error = ENXIO;
                goto out_bindx;
        }
        bpf_if_acquire(bp, &psref);
        mutex_exit(d->bd_mtx);

        getnanotime(&d->bd_mtime);

        ifp = bp->bif_ifp;
        if (if_is_deactivated(ifp)) {
                error = ENXIO;
                goto out;
        }

        if (uio->uio_resid == 0) {
                error = 0;
                goto out;
        }

        error = bpf_movein(uio, (int)bp->bif_dlt, ifp->if_mtu, &m,
                (struct sockaddr *) &dst, &d->bd_wfilter);
        if (error)
                goto out;

        if (m->m_pkthdr.len > ifp->if_mtu) {
                m_freem(m);
                error = EMSGSIZE;
                goto out;
        }

        if (d->bd_hdrcmplt)
                dst.ss_family = pseudo_AF_HDRCMPLT;

        if (d->bd_feedback) {
                mc = m_dup(m, 0, M_COPYALL, M_NOWAIT);
                if (mc != NULL)
                        m_set_rcvif(mc, ifp);
                /* Set M_PROMISC for outgoing packets to be discarded. */
                if (1 /*d->bd_direction == BPF_D_INOUT*/)
                        m->m_flags |= M_PROMISC;
        } else  
                mc = NULL;

        error = if_output_lock(ifp, ifp, m, (struct sockaddr *) &dst, NULL);

        if (mc != NULL) {
                if (error == 0) {
                        int s = splsoftnet();
                        KERNEL_LOCK_UNLESS_IFP_MPSAFE(ifp);
                        ifp->_if_input(ifp, mc);
                        KERNEL_UNLOCK_UNLESS_IFP_MPSAFE(ifp);
                        splx(s);
                } else
                        m_freem(mc);
        }
        /*
         * The driver frees the mbuf.
         */
out:
        bpf_if_release(bp, &psref);
out_bindx:
        curlwp_bindx(bound);
        return error;
}

/*
 * Reset a descriptor by flushing its packet buffer and clearing the
 * receive and drop counts.
 */
static void
reset_d(struct bpf_d *d)
{

        KASSERT(mutex_owned(d->bd_mtx));

        mutex_enter(d->bd_buf_mtx);
        if (d->bd_hbuf) {
                /* Free the hold buffer. */
                d->bd_fbuf = d->bd_hbuf;
                d->bd_hbuf = NULL;
        }
        d->bd_slen = 0;
        d->bd_hlen = 0;
        d->bd_rcount = 0;
        d->bd_dcount = 0;
        d->bd_ccount = 0;
        mutex_exit(d->bd_buf_mtx);
}

/*
 *  FIONREAD                Check for read packet available.
 *  BIOCGBLEN                Get buffer len [for read()].
 *  BIOCSETF                Set ethernet read filter.
 *  BIOCFLUSH                Flush read packet buffer.
 *  BIOCPROMISC                Put interface into promiscuous mode.
 *  BIOCGDLT                Get link layer type.
 *  BIOCGETIF                Get interface name.
 *  BIOCSETIF                Set interface.
 *  BIOCSRTIMEOUT        Set read timeout.
 *  BIOCGRTIMEOUT        Get read timeout.
 *  BIOCGSTATS                Get packet stats.
 *  BIOCIMMEDIATE        Set immediate mode.
 *  BIOCVERSION                Get filter language version.
 *  BIOCGHDRCMPLT        Get "header already complete" flag.
 *  BIOCSHDRCMPLT        Set "header already complete" flag.
 *  BIOCSFEEDBACK        Set packet feedback mode.
 *  BIOCGFEEDBACK        Get packet feedback mode.
 *  BIOCGDIRECTION        Get packet direction flag
 *  BIOCSDIRECTION        Set packet direction flag
 */
/* ARGSUSED */
static int
bpf_ioctl(struct file *fp, u_long cmd, void *addr)
{
        struct bpf_d *d = fp->f_bpf;
        int error = 0;

        /*
         * Refresh the PID associated with this bpf file.
         */
        d->bd_pid = curproc->p_pid;
#ifdef _LP64
        if (curproc->p_flag & PK_32)
                d->bd_compat32 = 1;
        else
                d->bd_compat32 = 0;
#endif

        mutex_enter(d->bd_mtx);
        if (d->bd_state == BPF_WAITING)
                callout_halt(&d->bd_callout, d->bd_mtx);
        d->bd_state = BPF_IDLE;
        mutex_exit(d->bd_mtx);

        if (d->bd_locked) {
                switch (cmd) {
                case BIOCGBLEN:                /* FALLTHROUGH */
                case BIOCFLUSH:                /* FALLTHROUGH */
                case BIOCGDLT:                /* FALLTHROUGH */
                case BIOCGDLTLIST:        /* FALLTHROUGH */
                case BIOCGETIF:                /* FALLTHROUGH */
                case BIOCGRTIMEOUT:        /* FALLTHROUGH */
                case BIOCGSTATS:        /* FALLTHROUGH */
                case BIOCVERSION:        /* FALLTHROUGH */
                case BIOCGHDRCMPLT:        /* FALLTHROUGH */
                case FIONREAD:                /* FALLTHROUGH */
                case BIOCLOCK:                /* FALLTHROUGH */
                case BIOCSRTIMEOUT:        /* FALLTHROUGH */
                case BIOCIMMEDIATE:        /* FALLTHROUGH */
                case TIOCGPGRP:
                        break;
                default:
                        return EPERM;
                }
        }

        switch (cmd) {

        default:
                error = EINVAL;
                break;

        /*
         * Check for read packet available.
         */
        case FIONREAD:
                {
                        int n;

                        mutex_enter(d->bd_buf_mtx);
                        n = d->bd_slen;
                        if (d->bd_hbuf)
                                n += d->bd_hlen;
                        mutex_exit(d->bd_buf_mtx);

                        *(int *)addr = n;
                        break;
                }

        /*
         * Get buffer len [for read()].
         */
        case BIOCGBLEN:
                *(u_int *)addr = d->bd_bufsize;
                break;

        /*
         * Set buffer length.
         */
        case BIOCSBLEN:
                /*
                 * Forbid to change the buffer length if buffers are already
                 * allocated.
                 */
                mutex_enter(d->bd_mtx);
                mutex_enter(d->bd_buf_mtx);
                if (d->bd_bif != NULL || d->bd_sbuf != NULL)
                        error = EINVAL;
                else {
                        u_int size = *(u_int *)addr;

                        if (size > bpf_maxbufsize)
                                *(u_int *)addr = size = bpf_maxbufsize;
                        else if (size < BPF_MINBUFSIZE)
                                *(u_int *)addr = size = BPF_MINBUFSIZE;
                        d->bd_bufsize = size;
                }
                mutex_exit(d->bd_buf_mtx);
                mutex_exit(d->bd_mtx);
                break;

        /*
         * Set link layer read filter.
         */
        case BIOCSETF:                /* FALLTHROUGH */
        case BIOCSETWF:
                error = bpf_setf(d, addr, cmd);
                break;

        case BIOCLOCK:
                d->bd_locked = 1;
                break;

        /*
         * Flush read packet buffer.
         */
        case BIOCFLUSH:
                mutex_enter(d->bd_mtx);
                reset_d(d);
                mutex_exit(d->bd_mtx);
                break;

        /*
         * Put interface into promiscuous mode.
         */
        case BIOCPROMISC:
                mutex_enter(d->bd_mtx);
                if (d->bd_bif == NULL) {
                        mutex_exit(d->bd_mtx);
                        /*
                         * No interface attached yet.
                         */
                        error = EINVAL;
                        break;
                }
                if (d->bd_promisc == 0) {
                        KERNEL_LOCK_UNLESS_NET_MPSAFE();
                        error = ifpromisc(d->bd_bif->bif_ifp, 1);
                        KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
                        if (error == 0)
                                d->bd_promisc = 1;
                }
                mutex_exit(d->bd_mtx);
                break;

        /*
         * Get device parameters.
         */
        case BIOCGDLT:
                mutex_enter(d->bd_mtx);
                if (d->bd_bif == NULL)
                        error = EINVAL;
                else
                        *(u_int *)addr = d->bd_bif->bif_dlt;
                mutex_exit(d->bd_mtx);
                break;

        /*
         * Get a list of supported device parameters.
         */
        case BIOCGDLTLIST:
                mutex_enter(d->bd_mtx);
                if (d->bd_bif == NULL)
                        error = EINVAL;
                else
                        error = bpf_getdltlist(d, addr);
                mutex_exit(d->bd_mtx);
                break;

        /*
         * Set device parameters.
         */
        case BIOCSDLT:
                mutex_enter(&bpf_mtx);
                mutex_enter(d->bd_mtx);
                if (d->bd_bif == NULL)
                        error = EINVAL;
                else
                        error = bpf_setdlt(d, *(u_int *)addr);
                mutex_exit(d->bd_mtx);
                mutex_exit(&bpf_mtx);
                break;

        /*
         * Set interface name.
         */
#ifdef OBIOCGETIF
        case OBIOCGETIF:
#endif
        case BIOCGETIF:
                mutex_enter(d->bd_mtx);
                if (d->bd_bif == NULL)
                        error = EINVAL;
                else
                        bpf_ifname(d->bd_bif->bif_ifp, addr);
                mutex_exit(d->bd_mtx);
                break;

        /*
         * Set interface.
         */
#ifdef OBIOCSETIF
        case OBIOCSETIF:
#endif
        case BIOCSETIF:
                mutex_enter(&bpf_mtx);
                error = bpf_setif(d, addr);
                mutex_exit(&bpf_mtx);
                break;

        /*
         * Set read timeout.
         */
        case BIOCSRTIMEOUT:
                {
                        struct timeval *tv = addr;

                        /* Compute number of ticks. */
                        if (tv->tv_sec > INT_MAX/hz - 1) {
                                d->bd_rtout = INT_MAX;
                        } else {
                                d->bd_rtout = tv->tv_sec * hz
                                    + tv->tv_usec / tick;
                        }
                        if ((d->bd_rtout == 0) && (tv->tv_usec != 0))
                                d->bd_rtout = 1;
                        break;
                }

#ifdef BIOCGORTIMEOUT
        /*
         * Get read timeout.
         */
        case BIOCGORTIMEOUT:
                {
                        struct timeval50 *tv = addr;

                        tv->tv_sec = d->bd_rtout / hz;
                        tv->tv_usec = (d->bd_rtout % hz) * tick;
                        break;
                }
#endif

#ifdef BIOCSORTIMEOUT
        /*
         * Set read timeout.
         */
        case BIOCSORTIMEOUT:
                {
                        struct timeval50 *tv = addr;

                        /* Compute number of ticks. */
                        if (tv->tv_sec > INT_MAX/hz - 1) {
                                d->bd_rtout = INT_MAX;
                        } else {
                                d->bd_rtout = tv->tv_sec * hz
                                    + tv->tv_usec / tick;
                        }
                        if ((d->bd_rtout == 0) && (tv->tv_usec != 0))
                                d->bd_rtout = 1;
                        break;
                }
#endif

        /*
         * Get read timeout.
         */
        case BIOCGRTIMEOUT:
                {
                        struct timeval *tv = addr;

                        tv->tv_sec = d->bd_rtout / hz;
                        tv->tv_usec = (d->bd_rtout % hz) * tick;
                        break;
                }
        /*
         * Get packet stats.
         */
        case BIOCGSTATS:
                {
                        struct bpf_stat *bs = addr;

                        bs->bs_recv = d->bd_rcount;
                        bs->bs_drop = d->bd_dcount;
                        bs->bs_capt = d->bd_ccount;
                        break;
                }

        case BIOCGSTATSOLD:
                {
                        struct bpf_stat_old *bs = addr;

                        bs->bs_recv = d->bd_rcount;
                        bs->bs_drop = d->bd_dcount;
                        break;
                }

        /*
         * Set immediate mode.
         */
        case BIOCIMMEDIATE:
                d->bd_immediate = *(u_int *)addr;
                break;

        case BIOCVERSION:
                {
                        struct bpf_version *bv = addr;

                        bv->bv_major = BPF_MAJOR_VERSION;
                        bv->bv_minor = BPF_MINOR_VERSION;
                        break;
                }

        case BIOCGHDRCMPLT:        /* get "header already complete" flag */
                *(u_int *)addr = d->bd_hdrcmplt;
                break;

        case BIOCSHDRCMPLT:        /* set "header already complete" flag */
                d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
                break;

        /*
         * Get packet direction flag
         */
        case BIOCGDIRECTION:
                *(u_int *)addr = d->bd_direction;
                break;

        /*
         * Set packet direction flag
         */
        case BIOCSDIRECTION:
                {
                        u_int        direction;

                        direction = *(u_int *)addr;
                        switch (direction) {
                        case BPF_D_IN:
                        case BPF_D_INOUT:
                        case BPF_D_OUT:
                                d->bd_direction = direction;
                                break;
                        default:
                                error = EINVAL;
                        }
                }
                break;

        /*
         * Set "feed packets from bpf back to input" mode
         */
        case BIOCSFEEDBACK:
                d->bd_feedback = *(u_int *)addr;
                break;

        /*
         * Get "feed packets from bpf back to input" mode
         */
        case BIOCGFEEDBACK:
                *(u_int *)addr = d->bd_feedback;
                break;

        case FIONBIO:                /* Non-blocking I/O */
                /*
                 * No need to do anything special as we use IO_NDELAY in
                 * bpfread() as an indication of whether or not to block
                 * the read.
                 */
                break;

        case FIOASYNC:                /* Send signal on receive packets */
                mutex_enter(d->bd_mtx);
                d->bd_async = *(int *)addr;
                mutex_exit(d->bd_mtx);
                break;

        case TIOCSPGRP:                /* Process or group to send signals to */
        case FIOSETOWN:
                error = fsetown(&d->bd_pgid, cmd, addr);
                break;

        case TIOCGPGRP:
        case FIOGETOWN:
                error = fgetown(d->bd_pgid, cmd, addr);
                break;
        }
        return (error);
}

/*
 * Set d's packet filter program to fp.  If this file already has a filter,
 * free it and replace it.  Returns EINVAL for bogus requests.
 */
static int
bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
{
        struct bpf_insn *fcode;
        bpfjit_func_t jcode;
        size_t flen, size = 0;
        struct bpf_filter *oldf, *newf, **storef;

        jcode = NULL;
        flen = fp->bf_len;

        if ((fp->bf_insns == NULL && flen) || flen > BPF_MAXINSNS) {
                return EINVAL;
        }

        if (flen) {
                /*
                 * Allocate the buffer, copy the byte-code from
                 * userspace and validate it.
                 */
                size = flen * sizeof(*fp->bf_insns);
                fcode = kmem_alloc(size, KM_SLEEP);
                if (copyin(fp->bf_insns, fcode, size) != 0 ||
                    !bpf_validate(fcode, (int)flen)) {
                        kmem_free(fcode, size);
                        return EINVAL;
                }
                if (bpf_jit)
                        jcode = bpf_jit_generate(NULL, fcode, flen);
        } else {
                fcode = NULL;
        }

        newf = kmem_alloc(sizeof(*newf), KM_SLEEP);
        newf->bf_insn = fcode;
        newf->bf_size = size;
        newf->bf_jitcode = jcode;
        if (cmd == BIOCSETF)
                d->bd_jitcode = jcode; /* XXX just for kvm(3) users */

        /* Need to hold bpf_mtx for pserialize_perform */
        mutex_enter(&bpf_mtx);
        mutex_enter(d->bd_mtx);
        if (cmd == BIOCSETWF) {
                oldf = d->bd_wfilter;
                storef = &d->bd_wfilter;
        } else {
                oldf = d->bd_rfilter;
                storef = &d->bd_rfilter;
        }
        atomic_store_release(storef, newf);
        reset_d(d);
        pserialize_perform(bpf_psz);
        mutex_exit(d->bd_mtx);
        mutex_exit(&bpf_mtx);

        if (oldf != NULL)
                bpf_free_filter(oldf);

        return 0;
}

/*
 * Detach a file from its current interface (if attached at all) and attach
 * to the interface indicated by the name stored in ifr.
 * Return an errno or 0.
 */
static int
bpf_setif(struct bpf_d *d, struct ifreq *ifr)
{
        struct bpf_if *bp;
        char *cp;
        int unit_seen, i, error;

        KASSERT(mutex_owned(&bpf_mtx));
        /*
         * Make sure the provided name has a unit number, and default
         * it to '0' if not specified.
         * XXX This is ugly ... do this differently?
         */
        unit_seen = 0;
        cp = ifr->ifr_name;
        cp[sizeof(ifr->ifr_name) - 1] = '\0';        /* sanity */
        while (*cp++)
                if (*cp >= '0' && *cp <= '9')
                        unit_seen = 1;
        if (!unit_seen) {
                /* Make sure to leave room for the '\0'. */
                for (i = 0; i < (IFNAMSIZ - 1); ++i) {
                        if ((ifr->ifr_name[i] >= 'a' &&
                             ifr->ifr_name[i] <= 'z') ||
                            (ifr->ifr_name[i] >= 'A' &&
                             ifr->ifr_name[i] <= 'Z'))
                                continue;
                        ifr->ifr_name[i] = '0';
                }
        }

        /*
         * Look through attached interfaces for the named one.
         */
        BPF_IFLIST_WRITER_FOREACH(bp) {
                struct ifnet *ifp = bp->bif_ifp;

                if (ifp == NULL ||
                    strcmp(ifp->if_xname, ifr->ifr_name) != 0)
                        continue;
                /* skip additional entry */
                if (bp->bif_driverp != &ifp->if_bpf)
                        continue;
                /*
                 * We found the requested interface.
                 * Allocate the packet buffers if we need to.
                 * If we're already attached to requested interface,
                 * just flush the buffer.
                 */
                /*
                 * bpf_allocbufs is called only here. bpf_mtx ensures that
                 * no race condition happen on d->bd_sbuf.
                 */
                if (d->bd_sbuf == NULL) {
                        error = bpf_allocbufs(d);
                        if (error != 0)
                                return (error);
                }
                mutex_enter(d->bd_mtx);
                if (bp != d->bd_bif) {
                        if (d->bd_bif) {
                                /*
                                 * Detach if attached to something else.
                                 */
                                bpf_detachd(d);
                                BPFIF_DLIST_ENTRY_INIT(d);
                        }

                        bpf_attachd(d, bp);
                }
                reset_d(d);
                mutex_exit(d->bd_mtx);
                return (0);
        }
        /* Not found. */
        return (ENXIO);
}

/*
 * Copy the interface name to the ifreq.
 */
static void
bpf_ifname(struct ifnet *ifp, struct ifreq *ifr)
{
        memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
}

static int
bpf_stat(struct file *fp, struct stat *st)
{
        struct bpf_d *d = fp->f_bpf;

        (void)memset(st, 0, sizeof(*st));
        mutex_enter(d->bd_mtx);
        st->st_dev = makedev(cdevsw_lookup_major(&bpf_cdevsw), d->bd_pid);
        st->st_atimespec = d->bd_atime;
        st->st_mtimespec = d->bd_mtime;
        st->st_ctimespec = st->st_birthtimespec = d->bd_btime;
        st->st_uid = kauth_cred_geteuid(fp->f_cred);
        st->st_gid = kauth_cred_getegid(fp->f_cred);
        st->st_mode = S_IFCHR;
        mutex_exit(d->bd_mtx);
        return 0;
}

/*
 * Support for poll() system call
 *
 * Return true iff the specific operation will not block indefinitely - with
 * the assumption that it is safe to positively acknowledge a request for the
 * ability to write to the BPF device.
 * Otherwise, return false but make a note that a selnotify() must be done.
 */
static int
bpf_poll(struct file *fp, int events)
{
        struct bpf_d *d = fp->f_bpf;
        int revents;

        /*
         * Refresh the PID associated with this bpf file.
         */
        mutex_enter(&bpf_mtx);
        d->bd_pid = curproc->p_pid;

        revents = events & (POLLOUT | POLLWRNORM);
        if (events & (POLLIN | POLLRDNORM)) {
                /*
                 * An imitation of the FIONREAD ioctl code.
                 */
                mutex_enter(d->bd_mtx);
                if (d->bd_hlen != 0 ||
                    ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
                     d->bd_slen != 0)) {
                        revents |= events & (POLLIN | POLLRDNORM);
                } else {
                        selrecord(curlwp, &d->bd_sel);
                        /* Start the read timeout if necessary */
                        if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
                                callout_reset(&d->bd_callout, d->bd_rtout,
                                              bpf_timed_out, d);
                                d->bd_state = BPF_WAITING;
                        }
                }
                mutex_exit(d->bd_mtx);
        }

        mutex_exit(&bpf_mtx);
        return (revents);
}

static void
filt_bpfrdetach(struct knote *kn)
{
        struct bpf_d *d = kn->kn_hook;

        mutex_enter(d->bd_buf_mtx);
        selremove_knote(&d->bd_sel, kn);
        mutex_exit(d->bd_buf_mtx);
}

static int
filt_bpfread(struct knote *kn, long hint)
{
        struct bpf_d *d = kn->kn_hook;
        int rv;

        mutex_enter(d->bd_buf_mtx);
        kn->kn_data = d->bd_hlen;
        if (d->bd_immediate)
                kn->kn_data += d->bd_slen;
        rv = (kn->kn_data > 0);
        mutex_exit(d->bd_buf_mtx);
        return rv;
}

static const struct filterops bpfread_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_bpfrdetach,
        .f_event = filt_bpfread,
};

static int
bpf_kqfilter(struct file *fp, struct knote *kn)
{
        struct bpf_d *d = fp->f_bpf;

        switch (kn->kn_filter) {
        case EVFILT_READ:
                kn->kn_fop = &bpfread_filtops;
                break;

        default:
                return (EINVAL);
        }

        kn->kn_hook = d;

        mutex_enter(d->bd_buf_mtx);
        selrecord_knote(&d->bd_sel, kn);
        mutex_exit(d->bd_buf_mtx);

        return (0);
}

/*
 * Copy data from an mbuf chain into a buffer.  This code is derived
 * from m_copydata in sys/uipc_mbuf.c.
 */
static void *
bpf_mcpy(void *dst_arg, const void *src_arg, size_t len)
{
        const struct mbuf *m;
        u_int count;
        u_char *dst;

        m = src_arg;
        dst = dst_arg;
        while (len > 0) {
                if (m == NULL)
                        panic("bpf_mcpy");
                count = uimin(m->m_len, len);
                memcpy(dst, mtod(m, const void *), count);
                m = m->m_next;
                dst += count;
                len -= count;
        }
        return dst_arg;
}

static inline u_int
bpf_xfilter(struct bpf_filter **filter, void *pkt, u_int pktlen, u_int buflen)
{
        struct bpf_filter *filt;
        uint32_t mem[BPF_MEMWORDS];
        bpf_args_t args = {
                .pkt = (const uint8_t *)pkt,
                .wirelen = pktlen,
                .buflen = buflen,
                .mem = mem,
                .arg = NULL
        };
        u_int slen;

        filt = atomic_load_consume(filter);
        if (filt == NULL) /* No filter means accept all. */
                return (u_int)-1;

        if (filt->bf_jitcode != NULL)
                slen = filt->bf_jitcode(NULL, &args);
        else
                slen = bpf_filter_ext(NULL, filt->bf_insn, &args);
        return slen;
}

/*
 * Dispatch a packet to all the listeners on interface bp.
 *
 * pkt       pointer to the packet, either a data buffer or an mbuf chain
 * buflen    buffer length, if pkt is a data buffer
 * cpfn      a function that can copy pkt into the listener's buffer
 * pktlen    length of the packet
 * direction BPF_D_IN or BPF_D_OUT
 */
static inline void
bpf_deliver(struct bpf_if *bp, void *(*cpfn)(void *, const void *, size_t),
    void *pkt, u_int pktlen, u_int buflen, const u_int direction)
{
        bool gottime = false;
        struct timespec ts;
        struct bpf_d *d;
        int s;
        u_int slen;

        KASSERT(!cpu_intr_p());

        /*
         * Note that the IPL does not have to be raised at this point.
         * The only problem that could arise here is that if two different
         * interfaces shared any data.  This is not the case.
         */
        s = pserialize_read_enter();
        BPFIF_DLIST_READER_FOREACH(d, bp) {
                if (direction == BPF_D_IN) {
                        if (d->bd_direction == BPF_D_OUT)
                                continue;
                } else { /* BPF_D_OUT */
                        if (d->bd_direction == BPF_D_IN)
                                continue;
                }

                atomic_inc_ulong(&d->bd_rcount);
                BPF_STATINC(recv);

                slen = bpf_xfilter(&d->bd_rfilter, pkt, pktlen, buflen);
                if (slen == 0)
                        continue;

                if (!gottime) {
                        gottime = true;
                        nanotime(&ts);
                }
                /* Assume catchpacket doesn't sleep */
                catchpacket(d, pkt, pktlen, slen, cpfn, &ts);
        }
        pserialize_read_exit(s);
}

/*
 * Incoming linkage from device drivers, when the head of the packet is in
 * a buffer, and the tail is in an mbuf chain.
 */
static void
_bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m,
        u_int direction)
{
        u_int pktlen;
        struct mbuf mb;

        /* Skip outgoing duplicate packets. */
        if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif_index == 0) {
                m->m_flags &= ~M_PROMISC;
                return;
        }

        pktlen = m_length(m) + dlen;

        /*
         * Craft on-stack mbuf suitable for passing to bpf_filter.
         * Note that we cut corners here; we only setup what's
         * absolutely needed--this mbuf should never go anywhere else.
         */
        (void)memset(&mb, 0, sizeof(mb));
        mb.m_type = MT_DATA;
        mb.m_next = m;
        mb.m_data = data;
        mb.m_len = dlen;

        bpf_deliver(bp, bpf_mcpy, &mb, pktlen, 0, direction);
}

/*
 * Incoming linkage from device drivers, when packet is in an mbuf chain.
 */
static void
_bpf_mtap(struct bpf_if *bp, struct mbuf *m, u_int direction)
{
        void *(*cpfn)(void *, const void *, size_t);
        u_int pktlen, buflen;
        void *marg;

        /* Skip outgoing duplicate packets. */
        if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif_index == 0) {
                m->m_flags &= ~M_PROMISC;
                return;
        }

        pktlen = m_length(m);

        /* Skip zero-sized packets. */
        if (__predict_false(pktlen == 0)) {
                return;
        }

        if (pktlen == m->m_len) {
                cpfn = (void *)memcpy;
                marg = mtod(m, void *);
                buflen = pktlen;
                KASSERT(buflen != 0);
        } else {
                cpfn = bpf_mcpy;
                marg = m;
                buflen = 0;
        }

        bpf_deliver(bp, cpfn, marg, pktlen, buflen, direction);
}

/*
 * We need to prepend the address family as
 * a four byte field.  Cons up a dummy header
 * to pacify bpf.  This is safe because bpf
 * will only read from the mbuf (i.e., it won't
 * try to free it or keep a pointer a to it).
 */
static void
_bpf_mtap_af(struct bpf_if *bp, uint32_t af, struct mbuf *m, u_int direction)
{
        struct mbuf m0;

        m0.m_type = MT_DATA;
        m0.m_flags = 0;
        m0.m_next = m;
        m0.m_nextpkt = NULL;
        m0.m_owner = NULL;
        m0.m_len = 4;
        m0.m_data = (char *)&af;

        _bpf_mtap(bp, &m0, direction);
}

/*
 * Put the SLIP pseudo-"link header" in place.
 * Note this M_PREPEND() should never fail,
 * swince we know we always have enough space
 * in the input buffer.
 */
static void
_bpf_mtap_sl_in(struct bpf_if *bp, u_char *chdr, struct mbuf **m)
{
        u_char *hp;

        M_PREPEND(*m, SLIP_HDRLEN, M_DONTWAIT);
        if (*m == NULL)
                return;

        hp = mtod(*m, u_char *);
        hp[SLX_DIR] = SLIPDIR_IN;
        (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN);

        _bpf_mtap(bp, *m, BPF_D_IN);

        m_adj(*m, SLIP_HDRLEN);
}

/*
 * Put the SLIP pseudo-"link header" in
 * place.  The compressed header is now
 * at the beginning of the mbuf.
 */
static void
_bpf_mtap_sl_out(struct bpf_if *bp, u_char *chdr, struct mbuf *m)
{
        struct mbuf m0;
        u_char *hp;

        m0.m_type = MT_DATA;
        m0.m_flags = 0;
        m0.m_next = m;
        m0.m_nextpkt = NULL;
        m0.m_owner = NULL;
        m0.m_data = m0.m_dat;
        m0.m_len = SLIP_HDRLEN;

        hp = mtod(&m0, u_char *);

        hp[SLX_DIR] = SLIPDIR_OUT;
        (void)memcpy(&hp[SLX_CHDR], chdr, CHDR_LEN);

        _bpf_mtap(bp, &m0, BPF_D_OUT);
        m_freem(m);
}

static struct mbuf *
bpf_mbuf_enqueue(struct bpf_if *bp, struct mbuf *m)
{
        struct mbuf *dup;

        dup = m_dup(m, 0, M_COPYALL, M_NOWAIT);
        if (dup == NULL)
                return NULL;

        if (bp->bif_mbuf_tail != NULL) {
                bp->bif_mbuf_tail->m_nextpkt = dup;
        } else {
                bp->bif_mbuf_head = dup;
        }
        bp->bif_mbuf_tail = dup;
#ifdef BPF_MTAP_SOFTINT_DEBUG
        log(LOG_DEBUG, "%s: enqueued mbuf=%p to %s\n",
            __func__, dup, bp->bif_ifp->if_xname);
#endif

        return dup;
}

static struct mbuf *
bpf_mbuf_dequeue(struct bpf_if *bp)
{
        struct mbuf *m;
        int s;

        /* XXX NOMPSAFE: assumed running on one CPU */
        s = splnet();
        m = bp->bif_mbuf_head;
        if (m != NULL) {
                bp->bif_mbuf_head = m->m_nextpkt;
                m->m_nextpkt = NULL;

                if (bp->bif_mbuf_head == NULL)
                        bp->bif_mbuf_tail = NULL;
#ifdef BPF_MTAP_SOFTINT_DEBUG
                log(LOG_DEBUG, "%s: dequeued mbuf=%p from %s\n",
                    __func__, m, bp->bif_ifp->if_xname);
#endif
        }
        splx(s);

        return m;
}

static void
bpf_mtap_si(void *arg)
{
        struct bpf_if *bp = arg;
        struct mbuf *m;

        while ((m = bpf_mbuf_dequeue(bp)) != NULL) {
#ifdef BPF_MTAP_SOFTINT_DEBUG
                log(LOG_DEBUG, "%s: tapping mbuf=%p on %s\n",
                    __func__, m, bp->bif_ifp->if_xname);
#endif
                bpf_ops->bpf_mtap(bp, m, BPF_D_IN);
                m_freem(m);
        }
}

static void
_bpf_mtap_softint(struct ifnet *ifp, struct mbuf *m)
{
        struct bpf_if *bp = ifp->if_bpf;
        struct mbuf *dup;

        KASSERT(cpu_intr_p());

        /* To avoid extra invocations of the softint */
        if (BPFIF_DLIST_READER_EMPTY(bp))
                return;
        KASSERT(bp->bif_si != NULL);

        dup = bpf_mbuf_enqueue(bp, m);
        if (dup != NULL)
                softint_schedule(bp->bif_si);
}

static int
bpf_hdrlen(struct bpf_d *d)
{
        int hdrlen = d->bd_bif->bif_hdrlen;
        /*
         * Compute the length of the bpf header.  This is not necessarily
         * equal to SIZEOF_BPF_HDR because we want to insert spacing such
         * that the network layer header begins on a longword boundary (for
         * performance reasons and to alleviate alignment restrictions).
         */
#ifdef _LP64
        if (d->bd_compat32)
                return (BPF_WORDALIGN32(hdrlen + SIZEOF_BPF_HDR32) - hdrlen);
        else
#endif
                return (BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen);
}

/*
 * Move the packet data from interface memory (pkt) into the
 * store buffer. Call the wakeup functions if it's time to wakeup
 * a listener (buffer full), "cpfn" is the routine called to do the
 * actual data transfer. memcpy is passed in to copy contiguous chunks,
 * while bpf_mcpy is passed in to copy mbuf chains.  In the latter case,
 * pkt is really an mbuf.
 */
static void
catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
    void *(*cpfn)(void *, const void *, size_t), struct timespec *ts)
{
        char *h;
        int totlen, curlen, caplen;
        int hdrlen = bpf_hdrlen(d);
        int do_wakeup = 0;

        atomic_inc_ulong(&d->bd_ccount);
        BPF_STATINC(capt);
        /*
         * Figure out how many bytes to move.  If the packet is
         * greater or equal to the snapshot length, transfer that
         * much.  Otherwise, transfer the whole packet (unless
         * we hit the buffer size limit).
         */
        totlen = hdrlen + uimin(snaplen, pktlen);
        if (totlen > d->bd_bufsize)
                totlen = d->bd_bufsize;
        /*
         * If we adjusted totlen to fit the bufsize, it could be that
         * totlen is smaller than hdrlen because of the link layer header.
         */
        caplen = totlen - hdrlen;
        if (caplen < 0)
                caplen = 0;

        mutex_enter(d->bd_buf_mtx);
        /*
         * Round up the end of the previous packet to the next longword.
         */
#ifdef _LP64
        if (d->bd_compat32)
                curlen = BPF_WORDALIGN32(d->bd_slen);
        else
#endif
                curlen = BPF_WORDALIGN(d->bd_slen);
        if (curlen + totlen > d->bd_bufsize) {
                /*
                 * This packet will overflow the storage buffer.
                 * Rotate the buffers if we can, then wakeup any
                 * pending reads.
                 */
                if (d->bd_fbuf == NULL) {
                        mutex_exit(d->bd_buf_mtx);
                        /*
                         * We haven't completed the previous read yet,
                         * so drop the packet.
                         */
                        atomic_inc_ulong(&d->bd_dcount);
                        BPF_STATINC(drop);
                        return;
                }
                ROTATE_BUFFERS(d);
                do_wakeup = 1;
                curlen = 0;
        } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
                /*
                 * Immediate mode is set, or the read timeout has
                 * already expired during a select call.  A packet
                 * arrived, so the reader should be woken up.
                 */
                do_wakeup = 1;
        }

        /*
         * Append the bpf header.
         */
        h = (char *)d->bd_sbuf + curlen;
#ifdef _LP64
        if (d->bd_compat32) {
                struct bpf_hdr32 *hp32;

                hp32 = (struct bpf_hdr32 *)h;
                hp32->bh_tstamp.tv_sec = ts->tv_sec;
                hp32->bh_tstamp.tv_usec = ts->tv_nsec / 1000;
                hp32->bh_datalen = pktlen;
                hp32->bh_hdrlen = hdrlen;
                hp32->bh_caplen = caplen;
        } else
#endif
        {
                struct bpf_hdr *hp;

                hp = (struct bpf_hdr *)h;
                hp->bh_tstamp.tv_sec = ts->tv_sec;
                hp->bh_tstamp.tv_usec = ts->tv_nsec / 1000;
                hp->bh_datalen = pktlen;
                hp->bh_hdrlen = hdrlen;
                hp->bh_caplen = caplen;
        }

        /*
         * Copy the packet data into the store buffer and update its length.
         */
        (*cpfn)(h + hdrlen, pkt, caplen);
        d->bd_slen = curlen + totlen;
        mutex_exit(d->bd_buf_mtx);

        /*
         * Call bpf_wakeup after bd_slen has been updated so that kevent(2)
         * will cause filt_bpfread() to be called with it adjusted.
         */
        if (do_wakeup)
                bpf_wakeup(d);
}

/*
 * Initialize all nonzero fields of a descriptor.
 */
static int
bpf_allocbufs(struct bpf_d *d)
{

        d->bd_fbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP);
        if (!d->bd_fbuf)
                return (ENOBUFS);
        d->bd_sbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP);
        if (!d->bd_sbuf) {
                kmem_free(d->bd_fbuf, d->bd_bufsize);
                return (ENOBUFS);
        }
        d->bd_slen = 0;
        d->bd_hlen = 0;
        return (0);
}

static void
bpf_free_filter(struct bpf_filter *filter)
{

        KASSERT(filter != NULL);

        if (filter->bf_insn != NULL)
                kmem_free(filter->bf_insn, filter->bf_size);
        if (filter->bf_jitcode != NULL)
                bpf_jit_freecode(filter->bf_jitcode);
        kmem_free(filter, sizeof(*filter));
}

/*
 * Free buffers currently in use by a descriptor.
 * Called on close.
 */
static void
bpf_freed(struct bpf_d *d)
{
        /*
         * We don't need to lock out interrupts since this descriptor has
         * been detached from its interface and it yet hasn't been marked
         * free.
         */
        if (d->bd_sbuf != NULL) {
                kmem_free(d->bd_sbuf, d->bd_bufsize);
                if (d->bd_hbuf != NULL)
                        kmem_free(d->bd_hbuf, d->bd_bufsize);
                if (d->bd_fbuf != NULL)
                        kmem_free(d->bd_fbuf, d->bd_bufsize);
        }
        if (d->bd_rfilter != NULL) {
                bpf_free_filter(d->bd_rfilter);
                d->bd_rfilter = NULL;
        }
        if (d->bd_wfilter != NULL) {
                bpf_free_filter(d->bd_wfilter);
                d->bd_wfilter = NULL;
        }
        d->bd_jitcode = NULL;
}

/*
 * Attach an interface to bpf.  dlt is the link layer type;
 * hdrlen is the fixed size of the link header for the specified dlt
 * (variable length headers not yet supported).
 */
static void
_bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
{
        struct bpf_if *bp;

        bp = kmem_alloc(sizeof(*bp), KM_SLEEP);

        mutex_enter(&bpf_mtx);
        bp->bif_driverp = driverp;
        bp->bif_ifp = ifp;
        bp->bif_dlt = dlt;
        bp->bif_si = NULL;
        BPF_IFLIST_ENTRY_INIT(bp);
        PSLIST_INIT(&bp->bif_dlist_head);
        psref_target_init(&bp->bif_psref, bpf_psref_class);
        SLIST_INIT(&bp->bif_trackers);

        BPF_IFLIST_WRITER_INSERT_HEAD(bp);

        *bp->bif_driverp = NULL;

        bp->bif_hdrlen = hdrlen;
        mutex_exit(&bpf_mtx);
#if 0
        printf("bpf: %s attached with dlt %x\n", ifp->if_xname, dlt);
#endif
}

static void
_bpf_mtap_softint_init(struct ifnet *ifp)
{
        struct bpf_if *bp;

        mutex_enter(&bpf_mtx);
        BPF_IFLIST_WRITER_FOREACH(bp) {
                if (bp->bif_ifp != ifp)
                        continue;

                bp->bif_mbuf_head = NULL;
                bp->bif_mbuf_tail = NULL;
                bp->bif_si = softint_establish(SOFTINT_NET, bpf_mtap_si, bp);
                if (bp->bif_si == NULL)
                        panic("%s: softint_establish() failed", __func__);
                break;
        }
        mutex_exit(&bpf_mtx);

        if (bp == NULL)
                panic("%s: no bpf_if found for %s", __func__, ifp->if_xname);
}

/*
 * Remove an interface from bpf.
 */
static void
_bpfdetach(struct ifnet *ifp)
{
        struct bpf_if *bp;
        struct bpf_d *d;
        int s;

        mutex_enter(&bpf_mtx);
        /* Nuke the vnodes for any open instances */
  again_d:
        BPF_DLIST_WRITER_FOREACH(d) {
                mutex_enter(d->bd_mtx);
                if (d->bd_bif != NULL && d->bd_bif->bif_ifp == ifp) {
                        /*
                         * Detach the descriptor from an interface now.
                         * It will be free'ed later by close routine.
                         */
                        bpf_detachd(d);
                        mutex_exit(d->bd_mtx);
                        goto again_d;
                }
                mutex_exit(d->bd_mtx);
        }

  again:
        BPF_IFLIST_WRITER_FOREACH(bp) {
                if (bp->bif_ifp == ifp) {
                        BPF_IFLIST_WRITER_REMOVE(bp);

                        pserialize_perform(bpf_psz);
                        psref_target_destroy(&bp->bif_psref, bpf_psref_class);

                        while (!SLIST_EMPTY(&bp->bif_trackers)) {
                                struct bpf_event_tracker *t =
                                    SLIST_FIRST(&bp->bif_trackers);
                                SLIST_REMOVE_HEAD(&bp->bif_trackers,
                                    bet_entries);
                                kmem_free(t, sizeof(*t));
                        }

                        BPF_IFLIST_ENTRY_DESTROY(bp);
                        if (bp->bif_si != NULL) {
                                /* XXX NOMPSAFE: assumed running on one CPU */
                                s = splnet();
                                while (bp->bif_mbuf_head != NULL) {
                                        struct mbuf *m = bp->bif_mbuf_head;
                                        bp->bif_mbuf_head = m->m_nextpkt;
                                        m_freem(m);
                                }
                                splx(s);
                                softint_disestablish(bp->bif_si);
                        }
                        kmem_free(bp, sizeof(*bp));
                        goto again;
                }
        }
        mutex_exit(&bpf_mtx);
}

/*
 * Change the data link type of a interface.
 */
static void
_bpf_change_type(struct ifnet *ifp, u_int dlt, u_int hdrlen)
{
        struct bpf_if *bp;

        mutex_enter(&bpf_mtx);
        BPF_IFLIST_WRITER_FOREACH(bp) {
                if (bp->bif_driverp == &ifp->if_bpf)
                        break;
        }
        if (bp == NULL)
                panic("bpf_change_type");

        bp->bif_dlt = dlt;

        bp->bif_hdrlen = hdrlen;
        mutex_exit(&bpf_mtx);
}

/*
 * Get a list of available data link type of the interface.
 */
static int
bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
{
        int n, error;
        struct ifnet *ifp;
        struct bpf_if *bp;
        int s, bound;

        KASSERT(mutex_owned(d->bd_mtx));

        ifp = d->bd_bif->bif_ifp;
        n = 0;
        error = 0;

        bound = curlwp_bind();
        s = pserialize_read_enter();
        BPF_IFLIST_READER_FOREACH(bp) {
                if (bp->bif_ifp != ifp)
                        continue;
                if (bfl->bfl_list != NULL) {
                        struct psref psref;

                        if (n >= bfl->bfl_len) {
                                pserialize_read_exit(s);
                                return ENOMEM;
                        }

                        bpf_if_acquire(bp, &psref);
                        pserialize_read_exit(s);

                        error = copyout(&bp->bif_dlt,
                            bfl->bfl_list + n, sizeof(u_int));

                        s = pserialize_read_enter();
                        bpf_if_release(bp, &psref);
                }
                n++;
        }
        pserialize_read_exit(s);
        curlwp_bindx(bound);

        bfl->bfl_len = n;
        return error;
}

/*
 * Set the data link type of a BPF instance.
 */
static int
bpf_setdlt(struct bpf_d *d, u_int dlt)
{
        int error, opromisc;
        struct ifnet *ifp;
        struct bpf_if *bp;

        KASSERT(mutex_owned(&bpf_mtx));
        KASSERT(mutex_owned(d->bd_mtx));

        if (d->bd_bif->bif_dlt == dlt)
                return 0;
        ifp = d->bd_bif->bif_ifp;
        BPF_IFLIST_WRITER_FOREACH(bp) {
                if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
                        break;
        }
        if (bp == NULL)
                return EINVAL;
        opromisc = d->bd_promisc;
        bpf_detachd(d);
        BPFIF_DLIST_ENTRY_INIT(d);
        bpf_attachd(d, bp);
        reset_d(d);
        if (opromisc) {
                KERNEL_LOCK_UNLESS_NET_MPSAFE();
                error = ifpromisc(bp->bif_ifp, 1);
                KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
                if (error)
                        printf("%s: bpf_setdlt: ifpromisc failed (%d)\n",
                            bp->bif_ifp->if_xname, error);
                else
                        d->bd_promisc = 1;
        }
        return 0;
}

static int
sysctl_net_bpf_maxbufsize(SYSCTLFN_ARGS)
{
        int newsize, error;
        struct sysctlnode node;

        node = *rnode;
        node.sysctl_data = &newsize;
        newsize = bpf_maxbufsize;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return (error);

        if (newsize < BPF_MINBUFSIZE || newsize > BPF_MAXBUFSIZE)
                return (EINVAL);

        bpf_maxbufsize = newsize;

        return (0);
}

#if defined(MODULAR) || defined(BPFJIT)
static int
sysctl_net_bpf_jit(SYSCTLFN_ARGS)
{
        bool newval;
        int error;
        struct sysctlnode node;

        node = *rnode;
        node.sysctl_data = &newval;
        newval = bpf_jit;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error != 0 || newp == NULL)
                return error;

        bpf_jit = newval;
        if (newval && bpfjit_module_ops.bj_generate_code == NULL) {
                printf("JIT compilation is postponed "
                    "until after bpfjit module is loaded\n");
        }

        return 0;
}
#endif

static int
sysctl_net_bpf_peers(SYSCTLFN_ARGS)
{
        int    error, elem_count;
        struct bpf_d         *dp;
        struct bpf_d_ext  dpe;
        size_t len, needed, elem_size, out_size;
        char   *sp;

        if (namelen == 1 && name[0] == CTL_QUERY)
                return (sysctl_query(SYSCTLFN_CALL(rnode)));

        if (namelen != 2)
                return (EINVAL);

        /* BPF peers is privileged information. */
        error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE,
            KAUTH_REQ_NETWORK_INTERFACE_GETPRIV, NULL, NULL, NULL);
        if (error)
                return (EPERM);

        len = (oldp != NULL) ? *oldlenp : 0;
        sp = oldp;
        elem_size = name[0];
        elem_count = name[1];
        out_size = MIN(sizeof(dpe), elem_size);
        needed = 0;

        if (elem_size < 1 || elem_count < 0)
                return (EINVAL);

        mutex_enter(&bpf_mtx);
        BPF_DLIST_WRITER_FOREACH(dp) {
                if (len >= elem_size && elem_count > 0) {
#define BPF_EXT(field)        dpe.bde_ ## field = dp->bd_ ## field
                        BPF_EXT(bufsize);
                        BPF_EXT(promisc);
                        BPF_EXT(state);
                        BPF_EXT(immediate);
                        BPF_EXT(hdrcmplt);
                        BPF_EXT(direction);
                        BPF_EXT(pid);
                        BPF_EXT(rcount);
                        BPF_EXT(dcount);
                        BPF_EXT(ccount);
#undef BPF_EXT
                        mutex_enter(dp->bd_mtx);
                        if (dp->bd_bif)
                                (void)strlcpy(dpe.bde_ifname,
                                    dp->bd_bif->bif_ifp->if_xname,
                                    IFNAMSIZ - 1);
                        else
                                dpe.bde_ifname[0] = '\0';
                        dpe.bde_locked = dp->bd_locked;
                        mutex_exit(dp->bd_mtx);

                        error = copyout(&dpe, sp, out_size);
                        if (error)
                                break;
                        sp += elem_size;
                        len -= elem_size;
                }
                needed += elem_size;
                if (elem_count > 0 && elem_count != INT_MAX)
                        elem_count--;
        }
        mutex_exit(&bpf_mtx);

        *oldlenp = needed;

        return (error);
}

static void
bpf_stats(void *p, void *arg, struct cpu_info *ci __unused)
{
        struct bpf_stat *const stats = p;
        struct bpf_stat *sum = arg;

        int s = splnet();

        sum->bs_recv += stats->bs_recv;
        sum->bs_drop += stats->bs_drop;
        sum->bs_capt += stats->bs_capt;

        splx(s);
}

static int
bpf_sysctl_gstats_handler(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int error;
        struct bpf_stat sum;

        memset(&sum, 0, sizeof(sum));
        node = *rnode;

        percpu_foreach_xcall(bpf_gstats_percpu, XC_HIGHPRI_IPL(IPL_SOFTNET),
            bpf_stats, &sum);

        node.sysctl_data = &sum;
        node.sysctl_size = sizeof(sum);
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error != 0 || newp == NULL)
                return error;

        return 0;
}

SYSCTL_SETUP(sysctl_net_bpf_setup, "bpf sysctls")
{
        const struct sysctlnode *node;

        node = NULL;
        sysctl_createv(clog, 0, NULL, &node,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "bpf",
                       SYSCTL_DESCR("BPF options"),
                       NULL, 0, NULL, 0,
                       CTL_NET, CTL_CREATE, CTL_EOL);
        if (node != NULL) {
#if defined(MODULAR) || defined(BPFJIT)
                sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                        CTLTYPE_BOOL, "jit",
                        SYSCTL_DESCR("Toggle Just-In-Time compilation"),
                        sysctl_net_bpf_jit, 0, &bpf_jit, 0,
                        CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
#endif
                sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                        CTLTYPE_INT, "maxbufsize",
                        SYSCTL_DESCR("Maximum size for data capture buffer"),
                        sysctl_net_bpf_maxbufsize, 0, &bpf_maxbufsize, 0,
                        CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
                sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT,
                        CTLTYPE_STRUCT, "stats",
                        SYSCTL_DESCR("BPF stats"),
                        bpf_sysctl_gstats_handler, 0, NULL, 0,
                        CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
                sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT,
                        CTLTYPE_STRUCT, "peers",
                        SYSCTL_DESCR("BPF peers"),
                        sysctl_net_bpf_peers, 0, NULL, 0,
                        CTL_NET, node->sysctl_num, CTL_CREATE, CTL_EOL);
        }

}

static int
_bpf_register_track_event(struct bpf_if **driverp,
            void (*_fun)(struct bpf_if *, struct ifnet *, int, int))
{
        struct bpf_if *bp;
        struct bpf_event_tracker *t;
        int ret = ENOENT;

        t = kmem_zalloc(sizeof(*t), KM_SLEEP);
        if (!t)
                return ENOMEM;
        t->bet_notify = _fun;

        mutex_enter(&bpf_mtx);
        BPF_IFLIST_WRITER_FOREACH(bp) {
                if (bp->bif_driverp != driverp)
                        continue;
                SLIST_INSERT_HEAD(&bp->bif_trackers, t, bet_entries);
                ret = 0;
                break;
        }
        mutex_exit(&bpf_mtx);

        return ret;
}

static int
_bpf_deregister_track_event(struct bpf_if **driverp,
            void (*_fun)(struct bpf_if *, struct ifnet *, int, int))
{
        struct bpf_if *bp;
        struct bpf_event_tracker *t = NULL;
        int ret = ENOENT;

        mutex_enter(&bpf_mtx);
        BPF_IFLIST_WRITER_FOREACH(bp) {
                if (bp->bif_driverp != driverp)
                        continue;
                SLIST_FOREACH(t, &bp->bif_trackers, bet_entries) {
                        if (t->bet_notify == _fun) {
                                ret = 0;
                                break;
                        }
                }
                if (ret == 0)
                        break;
        }
        if (ret == 0 && t && t->bet_notify == _fun) {
                SLIST_REMOVE(&bp->bif_trackers, t, bpf_event_tracker,
                    bet_entries);
        }
        mutex_exit(&bpf_mtx);
        if (ret == 0)
                kmem_free(t, sizeof(*t));
        return ret;
}

struct bpf_ops bpf_ops_kernel = {
        .bpf_attach =                _bpfattach,
        .bpf_detach =                _bpfdetach,
        .bpf_change_type =        _bpf_change_type,
        .bpf_register_track_event = _bpf_register_track_event,
        .bpf_deregister_track_event = _bpf_deregister_track_event,

        .bpf_mtap =                _bpf_mtap,
        .bpf_mtap2 =                _bpf_mtap2,
        .bpf_mtap_af =                _bpf_mtap_af,
        .bpf_mtap_sl_in =        _bpf_mtap_sl_in,
        .bpf_mtap_sl_out =        _bpf_mtap_sl_out,

        .bpf_mtap_softint =                _bpf_mtap_softint,
        .bpf_mtap_softint_init =        _bpf_mtap_softint_init,
};

MODULE(MODULE_CLASS_DRIVER, bpf, "bpf_filter");

static int
bpf_modcmd(modcmd_t cmd, void *arg)
{
#ifdef _MODULE
        devmajor_t bmajor, cmajor;
#endif
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
                bpf_init();
#ifdef _MODULE
                bmajor = cmajor = NODEVMAJOR;
                error = devsw_attach("bpf", NULL, &bmajor,
                    &bpf_cdevsw, &cmajor);
                if (error)
                        break;
#endif

                bpf_ops_handover_enter(&bpf_ops_kernel);
                atomic_swap_ptr(&bpf_ops, &bpf_ops_kernel);
                bpf_ops_handover_exit();
                break;

        case MODULE_CMD_FINI:
                /*
                 * While there is no reference counting for bpf callers,
                 * unload could at least in theory be done similarly to 
                 * system call disestablishment.  This should even be
                 * a little simpler:
                 * 
                 * 1) replace op vector with stubs
                 * 2) post update to all cpus with xc
                 * 3) check that nobody is in bpf anymore
                 *    (it's doubtful we'd want something like l_sysent,
                 *     but we could do something like *signed* percpu
                 *     counters.  if the sum is 0, we're good).
                 * 4) if fail, unroll changes
                 *
                 * NOTE: change won't be atomic to the outside.  some
                 * packets may be not captured even if unload is
                 * not successful.  I think packet capture not working
                 * is a perfectly logical consequence of trying to
                 * disable packet capture.
                 */
                error = EOPNOTSUPP;
                break;

        default:
                error = ENOTTY;
                break;
        }

        return error;
}














































































































































































































































































































































































































































































































































































































































































































































































































    1 
    1 



















    1 
    1 



























































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
/*-
 * Copyright (c) 2014-2020 Mindaugas Rasiukevicius <rmind at noxt eu>
 * Copyright (c) 2010-2014 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This material is based upon work partially supported by The
 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * NPF connection tracking for stateful filtering and translation.
 *
 * Overview
 *
 *        Packets can be incoming or outgoing with respect to an interface.
 *        Connection direction is identified by the direction of its first
 *        packet.  The meaning of incoming/outgoing packet in the context of
 *        connection direction can be confusing.  Therefore, we will use the
 *        terms "forwards stream" and "backwards stream", where packets in
 *        the forwards stream mean the packets travelling in the direction
 *        as the connection direction.
 *
 *        All connections have two keys and thus two entries:
 *
 *        - npf_conn_getforwkey(con)        -- for the forwards stream;
 *        - npf_conn_getbackkey(con, alen)  -- for the backwards stream.
 *
 *        Note: the keys are stored in npf_conn_t::c_keys[], which is used
 *        to allocate variable-length npf_conn_t structures based on whether
 *        the IPv4 or IPv6 addresses are used.
 *
 *        The key is an n-tuple used to identify the connection flow: see the
 *        npf_connkey.c source file for the description of the key layouts.
 *        The key may be formed using translated values in a case of NAT.
 *
 *        Connections can serve two purposes: for the implicit passing and/or
 *        to accommodate the dynamic NAT.  Connections for the former purpose
 *        are created by the rules with "stateful" attribute and are used for
 *        stateful filtering.  Such connections indicate that the packet of
 *        the backwards stream should be passed without inspection of the
 *        ruleset.  The other purpose is to associate a dynamic NAT mechanism
 *        with a connection.  Such connections are created by the NAT policies
 *        and they have a relationship with NAT translation structure via
 *        npf_conn_t::c_nat.  A single connection can serve both purposes,
 *        which is a common case.
 *
 * Connection life-cycle
 *
 *        Connections are established when a packet matches said rule or
 *        NAT policy.  Both keys of the established connection are inserted
 *        into the connection database.  A garbage collection thread
 *        periodically scans all connections and depending on connection
 *        properties (e.g. last activity time, protocol) removes connection
 *        entries and expires the actual connections.
 *
 *        Each connection has a reference count.  The reference is acquired
 *        on lookup and should be released by the caller.  It guarantees that
 *        the connection will not be destroyed, although it may be expired.
 *
 * Synchronization
 *
 *        Connection database is accessed in a lock-free manner by the main
 *        routines: npf_conn_inspect() and npf_conn_establish().  Since they
 *        are always called from a software interrupt, the database is
 *        protected using EBR.  The main place which can destroy a connection
 *        is npf_conn_worker().  The database itself can be replaced and
 *        destroyed in npf_conn_reload().
 *
 * ALG support
 *
 *        Application-level gateways (ALGs) can override generic connection
 *        inspection (npf_alg_conn() call in npf_conn_inspect() function) by
 *        performing their own lookup using different key.  Recursive call
 *        to npf_conn_inspect() is not allowed.  The ALGs ought to use the
 *        npf_conn_lookup() function for this purpose.
 *
 * Lock order
 *
 *        npf->config_lock ->
 *                conn_lock ->
 *                        npf_conn_t::c_lock
 */

#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_conn.c,v 1.34 2022/02/13 19:20:23 riastradh Exp $");

#include <sys/param.h>
#include <sys/types.h>

#include <netinet/in.h>
#include <netinet/tcp.h>

#include <sys/atomic.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
#include <net/pfil.h>
#include <sys/pool.h>
#include <sys/queue.h>
#include <sys/systm.h>
#endif

#define __NPF_CONN_PRIVATE
#include "npf_conn.h"
#include "npf_impl.h"

/* A helper to select the IPv4 or IPv6 connection cache. */
#define        NPF_CONNCACHE(alen)        (((alen) >> 4) & 0x1)

/*
 * Connection flags: PFIL_IN and PFIL_OUT values are reserved for direction.
 */
CTASSERT(PFIL_ALL == (0x001 | 0x002));
#define        CONN_ACTIVE        0x004        /* visible on inspection */
#define        CONN_PASS        0x008        /* perform implicit passing */
#define        CONN_EXPIRE        0x010        /* explicitly expire */
#define        CONN_REMOVED        0x020        /* "forw/back" entries removed */

enum { CONN_TRACKING_OFF, CONN_TRACKING_ON };

static int        npf_conn_export(npf_t *, npf_conn_t *, nvlist_t *);

/*
 * npf_conn_sys{init,fini}: initialize/destroy connection tracking.
 */

void
npf_conn_init(npf_t *npf)
{
        npf_conn_params_t *params = npf_param_allocgroup(npf,
            NPF_PARAMS_CONN, sizeof(npf_conn_params_t));
        npf_param_t param_map[] = {
                {
                        "state.key.interface",
                        &params->connkey_interface,
                        .default_val = 1, // true
                        .min = 0, .max = 1
                },
                {
                        "state.key.direction",
                        &params->connkey_direction,
                        .default_val = 1, // true
                        .min = 0, .max = 1
                },
        };
        npf_param_register(npf, param_map, __arraycount(param_map));

        npf->conn_cache[0] = pool_cache_init(
            offsetof(npf_conn_t, c_keys[NPF_CONNKEY_V4WORDS * 2]),
            0, 0, 0, "npfcn4pl", NULL, IPL_NET, NULL, NULL, NULL);
        npf->conn_cache[1] = pool_cache_init(
            offsetof(npf_conn_t, c_keys[NPF_CONNKEY_V6WORDS * 2]),
            0, 0, 0, "npfcn6pl", NULL, IPL_NET, NULL, NULL, NULL);

        mutex_init(&npf->conn_lock, MUTEX_DEFAULT, IPL_NONE);
        atomic_store_relaxed(&npf->conn_tracking, CONN_TRACKING_OFF);
        npf->conn_db = npf_conndb_create();
        npf_conndb_sysinit(npf);

        npf_worker_addfunc(npf, npf_conn_worker);
}

void
npf_conn_fini(npf_t *npf)
{
        const size_t len = sizeof(npf_conn_params_t);

        /* Note: the caller should have flushed the connections. */
        KASSERT(atomic_load_relaxed(&npf->conn_tracking) == CONN_TRACKING_OFF);

        npf_conndb_destroy(npf->conn_db);
        pool_cache_destroy(npf->conn_cache[0]);
        pool_cache_destroy(npf->conn_cache[1]);
        mutex_destroy(&npf->conn_lock);

        npf_param_freegroup(npf, NPF_PARAMS_CONN, len);
        npf_conndb_sysfini(npf);
}

/*
 * npf_conn_load: perform the load by flushing the current connection
 * database and replacing it with the new one or just destroying.
 *
 * => The caller must disable the connection tracking and ensure that
 *    there are no connection database lookups or references in-flight.
 */
void
npf_conn_load(npf_t *npf, npf_conndb_t *ndb, bool track)
{
        npf_conndb_t *odb = NULL;

        KASSERT(npf_config_locked_p(npf));

        /*
         * The connection database is in the quiescent state.
         * Prevent G/C thread from running and install a new database.
         */
        mutex_enter(&npf->conn_lock);
        if (ndb) {
                KASSERT(atomic_load_relaxed(&npf->conn_tracking)
                    == CONN_TRACKING_OFF);
                odb = atomic_load_relaxed(&npf->conn_db);
                atomic_store_release(&npf->conn_db, ndb);
        }
        if (track) {
                /* After this point lookups start flying in. */
                membar_producer();
                atomic_store_relaxed(&npf->conn_tracking, CONN_TRACKING_ON);
        }
        mutex_exit(&npf->conn_lock);

        if (odb) {
                /*
                 * Flush all, no sync since the caller did it for us.
                 * Also, release the pool cache memory.
                 */
                npf_conndb_gc(npf, odb, true, false);
                npf_conndb_destroy(odb);
                pool_cache_invalidate(npf->conn_cache[0]);
                pool_cache_invalidate(npf->conn_cache[1]);
        }
}

/*
 * npf_conn_tracking: enable/disable connection tracking.
 */
void
npf_conn_tracking(npf_t *npf, bool track)
{
        KASSERT(npf_config_locked_p(npf));
        atomic_store_relaxed(&npf->conn_tracking,
            track ? CONN_TRACKING_ON : CONN_TRACKING_OFF);
}

static inline bool
npf_conn_trackable_p(const npf_cache_t *npc)
{
        const npf_t *npf = npc->npc_ctx;

        /*
         * Check if connection tracking is on.  Also, if layer 3 and 4 are
         * not cached - protocol is not supported or packet is invalid.
         */
        if (atomic_load_relaxed(&npf->conn_tracking) != CONN_TRACKING_ON) {
                return false;
        }
        if (!npf_iscached(npc, NPC_IP46) || !npf_iscached(npc, NPC_LAYER4)) {
                return false;
        }
        return true;
}

static inline void
conn_update_atime(npf_conn_t *con)
{
        struct timespec tsnow;

        getnanouptime(&tsnow);
        atomic_store_relaxed(&con->c_atime, tsnow.tv_sec);
}

/*
 * npf_conn_check: check that:
 *
 *        - the connection is active;
 *
 *        - the packet is travelling in the right direction with the respect
 *          to the connection direction (if interface-id is not zero);
 *
 *        - the packet is travelling on the same interface as the
 *          connection interface (if interface-id is not zero).
 */
static bool
npf_conn_check(const npf_conn_t *con, const nbuf_t *nbuf,
    const unsigned di, const npf_flow_t flow)
{
        const uint32_t flags = atomic_load_relaxed(&con->c_flags);
        const unsigned ifid = atomic_load_relaxed(&con->c_ifid);
        bool active;

        active = (flags & (CONN_ACTIVE | CONN_EXPIRE)) == CONN_ACTIVE;
        if (__predict_false(!active)) {
                return false;
        }
        if (ifid && nbuf) {
                const bool match = (flags & PFIL_ALL) == di;
                npf_flow_t pflow = match ? NPF_FLOW_FORW : NPF_FLOW_BACK;

                if (__predict_false(flow != pflow)) {
                        return false;
                }
                if (__predict_false(ifid != nbuf->nb_ifid)) {
                        return false;
                }
        }
        return true;
}

/*
 * npf_conn_lookup: lookup if there is an established connection.
 *
 * => If found, we will hold a reference for the caller.
 */
npf_conn_t *
npf_conn_lookup(const npf_cache_t *npc, const unsigned di, npf_flow_t *flow)
{
        npf_t *npf = npc->npc_ctx;
        const nbuf_t *nbuf = npc->npc_nbuf;
        npf_conn_t *con;
        npf_connkey_t key;

        /* Construct a key and lookup for a connection in the store. */
        if (!npf_conn_conkey(npc, &key, di, NPF_FLOW_FORW)) {
                return NULL;
        }
        con = npf_conndb_lookup(npf, &key, flow);
        if (con == NULL) {
                return NULL;
        }
        KASSERT(npc->npc_proto == atomic_load_relaxed(&con->c_proto));

        /* Extra checks for the connection and packet. */
        if (!npf_conn_check(con, nbuf, di, *flow)) {
                atomic_dec_uint(&con->c_refcnt);
                return NULL;
        }

        /* Update the last activity time. */
        conn_update_atime(con);
        return con;
}

/*
 * npf_conn_inspect: lookup a connection and inspecting the protocol data.
 *
 * => If found, we will hold a reference for the caller.
 */
npf_conn_t *
npf_conn_inspect(npf_cache_t *npc, const unsigned di, int *error)
{
        nbuf_t *nbuf = npc->npc_nbuf;
        npf_flow_t flow;
        npf_conn_t *con;
        bool ok;

        KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));
        if (!npf_conn_trackable_p(npc)) {
                return NULL;
        }

        /* Query ALG which may lookup connection for us. */
        if ((con = npf_alg_conn(npc, di)) != NULL) {
                /* Note: reference is held. */
                return con;
        }
        if (nbuf_head_mbuf(nbuf) == NULL) {
                *error = ENOMEM;
                return NULL;
        }
        KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));

        /* The main lookup of the connection (acquires a reference). */
        if ((con = npf_conn_lookup(npc, di, &flow)) == NULL) {
                return NULL;
        }

        /* Inspect the protocol data and handle state changes. */
        mutex_enter(&con->c_lock);
        ok = npf_state_inspect(npc, &con->c_state, flow);
        mutex_exit(&con->c_lock);

        /* If invalid state: let the rules deal with it. */
        if (__predict_false(!ok)) {
                npf_conn_release(con);
                npf_stats_inc(npc->npc_ctx, NPF_STAT_INVALID_STATE);
                return NULL;
        }
#if 0
        /*
         * TODO -- determine when this might be wanted/used.
         *
         * Note: skipping the connection lookup and ruleset inspection
         * on other interfaces will also bypass dynamic NAT.
         */
        if (atomic_load_relaxed(&con->c_flags) & CONN_GPASS) {
                /*
                 * Note: if tagging fails, then give this packet a chance
                 * to go through a regular ruleset.
                 */
                (void)nbuf_add_tag(nbuf, NPF_NTAG_PASS);
        }
#endif
        return con;
}

/*
 * npf_conn_establish: create a new connection, insert into the global list.
 *
 * => Connection is created with the reference held for the caller.
 * => Connection will be activated on the first reference release.
 */
npf_conn_t *
npf_conn_establish(npf_cache_t *npc, const unsigned di, bool global)
{
        npf_t *npf = npc->npc_ctx;
        const unsigned alen = npc->npc_alen;
        const unsigned idx = NPF_CONNCACHE(alen);
        const nbuf_t *nbuf = npc->npc_nbuf;
        npf_connkey_t *fw, *bk;
        npf_conndb_t *conn_db;
        npf_conn_t *con;
        int error = 0;

        KASSERT(!nbuf_flag_p(nbuf, NBUF_DATAREF_RESET));

        if (!npf_conn_trackable_p(npc)) {
                return NULL;
        }

        /* Allocate and initialize the new connection. */
        con = pool_cache_get(npf->conn_cache[idx], PR_NOWAIT);
        if (__predict_false(!con)) {
                npf_worker_signal(npf);
                return NULL;
        }
        NPF_PRINTF(("NPF: create conn %p\n", con));
        npf_stats_inc(npf, NPF_STAT_CONN_CREATE);

        mutex_init(&con->c_lock, MUTEX_DEFAULT, IPL_SOFTNET);
        atomic_store_relaxed(&con->c_flags, di & PFIL_ALL);
        atomic_store_relaxed(&con->c_refcnt, 0);
        con->c_rproc = NULL;
        con->c_nat = NULL;

        con->c_proto = npc->npc_proto;
        CTASSERT(sizeof(con->c_proto) >= sizeof(npc->npc_proto));
        con->c_alen = alen;

        /* Initialize the protocol state. */
        if (!npf_state_init(npc, &con->c_state)) {
                npf_conn_destroy(npf, con);
                return NULL;
        }
        KASSERT(npf_iscached(npc, NPC_IP46));

        fw = npf_conn_getforwkey(con);
        bk = npf_conn_getbackkey(con, alen);

        /*
         * Construct "forwards" and "backwards" keys.  Also, set the
         * interface ID for this connection (unless it is global).
         */
        if (!npf_conn_conkey(npc, fw, di, NPF_FLOW_FORW) ||
            !npf_conn_conkey(npc, bk, di ^ PFIL_ALL, NPF_FLOW_BACK)) {
                npf_conn_destroy(npf, con);
                return NULL;
        }
        con->c_ifid = global ? nbuf->nb_ifid : 0;

        /*
         * Set last activity time for a new connection and acquire
         * a reference for the caller before we make it visible.
         */
        conn_update_atime(con);
        atomic_store_relaxed(&con->c_refcnt, 1);

        /*
         * Insert both keys (entries representing directions) of the
         * connection.  At this point it becomes visible, but we activate
         * the connection later.
         */
        mutex_enter(&con->c_lock);
        conn_db = atomic_load_consume(&npf->conn_db);
        if (!npf_conndb_insert(conn_db, fw, con, NPF_FLOW_FORW)) {
                error = EISCONN;
                goto err;
        }
        if (!npf_conndb_insert(conn_db, bk, con, NPF_FLOW_BACK)) {
                npf_conn_t *ret __diagused;
                ret = npf_conndb_remove(conn_db, fw);
                KASSERT(ret == con);
                error = EISCONN;
                goto err;
        }
err:
        /*
         * If we have hit the duplicate: mark the connection as expired
         * and let the G/C thread to take care of it.  We cannot do it
         * here since there might be references acquired already.
         */
        if (error) {
                atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE);
                atomic_dec_uint(&con->c_refcnt);
                npf_stats_inc(npf, NPF_STAT_RACE_CONN);
        } else {
                NPF_PRINTF(("NPF: establish conn %p\n", con));
        }

        /* Finally, insert into the connection list. */
        npf_conndb_enqueue(conn_db, con);
        mutex_exit(&con->c_lock);

        return error ? NULL : con;
}

void
npf_conn_destroy(npf_t *npf, npf_conn_t *con)
{
        const unsigned idx __unused = NPF_CONNCACHE(con->c_alen);

        KASSERT(atomic_load_relaxed(&con->c_refcnt) == 0);

        if (con->c_nat) {
                /* Release any NAT structures. */
                npf_nat_destroy(con, con->c_nat);
        }
        if (con->c_rproc) {
                /* Release the rule procedure. */
                npf_rproc_release(con->c_rproc);
        }

        /* Destroy the state. */
        npf_state_destroy(&con->c_state);
        mutex_destroy(&con->c_lock);

        /* Free the structure, increase the counter. */
        pool_cache_put(npf->conn_cache[idx], con);
        npf_stats_inc(npf, NPF_STAT_CONN_DESTROY);
        NPF_PRINTF(("NPF: conn %p destroyed\n", con));
}

/*
 * npf_conn_setnat: associate NAT entry with the connection, update and
 * re-insert connection entry using the translation values.
 *
 * => The caller must be holding a reference.
 */
int
npf_conn_setnat(const npf_cache_t *npc, npf_conn_t *con,
    npf_nat_t *nt, unsigned ntype)
{
        static const unsigned nat_type_which[] = {
                /* See the description in npf_nat_which(). */
                [NPF_NATOUT] = NPF_DST,
                [NPF_NATIN] = NPF_SRC,
        };
        npf_t *npf = npc->npc_ctx;
        npf_conn_t *ret __diagused;
        npf_conndb_t *conn_db;
        npf_connkey_t *bk;
        npf_addr_t *taddr;
        in_port_t tport;
        uint32_t flags;

        KASSERT(atomic_load_relaxed(&con->c_refcnt) > 0);

        npf_nat_gettrans(nt, &taddr, &tport);
        KASSERT(ntype == NPF_NATOUT || ntype == NPF_NATIN);

        /* Acquire the lock and check for the races. */
        mutex_enter(&con->c_lock);
        flags = atomic_load_relaxed(&con->c_flags);
        if (__predict_false(flags & CONN_EXPIRE)) {
                /* The connection got expired. */
                mutex_exit(&con->c_lock);
                return EINVAL;
        }
        KASSERT((flags & CONN_REMOVED) == 0);

        if (__predict_false(con->c_nat != NULL)) {
                /* Race with a duplicate packet. */
                mutex_exit(&con->c_lock);
                npf_stats_inc(npc->npc_ctx, NPF_STAT_RACE_NAT);
                return EISCONN;
        }

        /* Remove the "backwards" key. */
        conn_db = atomic_load_consume(&npf->conn_db);
        bk = npf_conn_getbackkey(con, con->c_alen);
        ret = npf_conndb_remove(conn_db, bk);
        KASSERT(ret == con);

        /* Set the source/destination IDs to the translation values. */
        npf_conn_adjkey(bk, taddr, tport, nat_type_which[ntype]);

        /* Finally, re-insert the "backwards" key. */
        if (!npf_conndb_insert(conn_db, bk, con, NPF_FLOW_BACK)) {
                /*
                 * Race: we have hit the duplicate, remove the "forwards"
                 * key and expire our connection; it is no longer valid.
                 */
                npf_connkey_t *fw = npf_conn_getforwkey(con);
                ret = npf_conndb_remove(conn_db, fw);
                KASSERT(ret == con);

                atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE);
                mutex_exit(&con->c_lock);

                npf_stats_inc(npc->npc_ctx, NPF_STAT_RACE_NAT);
                return EISCONN;
        }

        /* Associate the NAT entry and release the lock. */
        con->c_nat = nt;
        mutex_exit(&con->c_lock);
        return 0;
}

/*
 * npf_conn_expire: explicitly mark connection as expired.
 *
 * => Must be called with: a) reference held  b) the relevant lock held.
 *    The relevant lock should prevent from connection destruction, e.g.
 *    npf_t::conn_lock or npf_natpolicy_t::n_lock.
 */
void
npf_conn_expire(npf_conn_t *con)
{
        atomic_or_uint(&con->c_flags, CONN_EXPIRE);
}

/*
 * npf_conn_pass: return true if connection is "pass" one, otherwise false.
 */
bool
npf_conn_pass(const npf_conn_t *con, npf_match_info_t *mi, npf_rproc_t **rp)
{
        KASSERT(atomic_load_relaxed(&con->c_refcnt) > 0);
        if (__predict_true(atomic_load_relaxed(&con->c_flags) & CONN_PASS)) {
                mi->mi_retfl = atomic_load_relaxed(&con->c_retfl);
                mi->mi_rid = con->c_rid;
                *rp = con->c_rproc;
                return true;
        }
        return false;
}

/*
 * npf_conn_setpass: mark connection as a "pass" one and associate the
 * rule procedure with it.
 */
void
npf_conn_setpass(npf_conn_t *con, const npf_match_info_t *mi, npf_rproc_t *rp)
{
        KASSERT((atomic_load_relaxed(&con->c_flags) & CONN_ACTIVE) == 0);
        KASSERT(atomic_load_relaxed(&con->c_refcnt) > 0);
        KASSERT(con->c_rproc == NULL);

        /*
         * No need for atomic since the connection is not yet active.
         * If rproc is set, the caller transfers its reference to us,
         * which will be released on npf_conn_destroy().
         */
        atomic_or_uint(&con->c_flags, CONN_PASS);
        con->c_rproc = rp;
        if (rp) {
                con->c_rid = mi->mi_rid;
                con->c_retfl = mi->mi_retfl;
        }
}

/*
 * npf_conn_release: release a reference, which might allow G/C thread
 * to destroy this connection.
 */
void
npf_conn_release(npf_conn_t *con)
{
        const unsigned flags = atomic_load_relaxed(&con->c_flags);

        if ((flags & (CONN_ACTIVE | CONN_EXPIRE)) == 0) {
                /* Activate: after this, connection is globally visible. */
                atomic_or_uint(&con->c_flags, CONN_ACTIVE);
        }
        KASSERT(atomic_load_relaxed(&con->c_refcnt) > 0);
        atomic_dec_uint(&con->c_refcnt);
}

/*
 * npf_conn_getnat: return the associated NAT entry, if any.
 */
npf_nat_t *
npf_conn_getnat(const npf_conn_t *con)
{
        return con->c_nat;
}

/*
 * npf_conn_expired: criterion to check if connection is expired.
 */
bool
npf_conn_expired(npf_t *npf, const npf_conn_t *con, uint64_t tsnow)
{
        const unsigned flags = atomic_load_relaxed(&con->c_flags);
        const int etime = npf_state_etime(npf, &con->c_state, con->c_proto);
        int elapsed;

        if (__predict_false(flags & CONN_EXPIRE)) {
                /* Explicitly marked to be expired. */
                return true;
        }

        /*
         * Note: another thread may update 'atime' and it might
         * become greater than 'now'.
         */
        elapsed = (int64_t)tsnow - atomic_load_relaxed(&con->c_atime);
        return elapsed > etime;
}

/*
 * npf_conn_remove: unlink the connection and mark as expired.
 */
void
npf_conn_remove(npf_conndb_t *cd, npf_conn_t *con)
{
        /* Remove both entries of the connection. */
        mutex_enter(&con->c_lock);
        if ((atomic_load_relaxed(&con->c_flags) & CONN_REMOVED) == 0) {
                npf_connkey_t *fw, *bk;
                npf_conn_t *ret __diagused;

                fw = npf_conn_getforwkey(con);
                ret = npf_conndb_remove(cd, fw);
                KASSERT(ret == con);

                bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw));
                ret = npf_conndb_remove(cd, bk);
                KASSERT(ret == con);
        }

        /* Flag the removal and expiration. */
        atomic_or_uint(&con->c_flags, CONN_REMOVED | CONN_EXPIRE);
        mutex_exit(&con->c_lock);
}

/*
 * npf_conn_worker: G/C to run from a worker thread or via npfk_gc().
 */
void
npf_conn_worker(npf_t *npf)
{
        npf_conndb_t *conn_db = atomic_load_consume(&npf->conn_db);
        npf_conndb_gc(npf, conn_db, false, true);
}

/*
 * npf_conndb_export: construct a list of connections prepared for saving.
 * Note: this is expected to be an expensive operation.
 */
int
npf_conndb_export(npf_t *npf, nvlist_t *nvl)
{
        npf_conn_t *head, *con;
        npf_conndb_t *conn_db;

        /*
         * Note: acquire conn_lock to prevent from the database
         * destruction and G/C thread.
         */
        mutex_enter(&npf->conn_lock);
        if (atomic_load_relaxed(&npf->conn_tracking) != CONN_TRACKING_ON) {
                mutex_exit(&npf->conn_lock);
                return 0;
        }
        conn_db = atomic_load_relaxed(&npf->conn_db);
        head = npf_conndb_getlist(conn_db);
        con = head;
        while (con) {
                nvlist_t *con_nvl;

                con_nvl = nvlist_create(0);
                if (npf_conn_export(npf, con, con_nvl) == 0) {
                        nvlist_append_nvlist_array(nvl, "conn-list", con_nvl);
                }
                nvlist_destroy(con_nvl);

                if ((con = npf_conndb_getnext(conn_db, con)) == head) {
                        break;
                }
        }
        mutex_exit(&npf->conn_lock);
        return 0;
}

/*
 * npf_conn_export: serialize a single connection.
 */
static int
npf_conn_export(npf_t *npf, npf_conn_t *con, nvlist_t *nvl)
{
        nvlist_t *knvl;
        npf_connkey_t *fw, *bk;
        unsigned flags, alen;

        flags = atomic_load_relaxed(&con->c_flags);
        if ((flags & (CONN_ACTIVE|CONN_EXPIRE)) != CONN_ACTIVE) {
                return ESRCH;
        }
        nvlist_add_number(nvl, "flags", flags);
        nvlist_add_number(nvl, "proto", con->c_proto);
        if (con->c_ifid) {
                char ifname[IFNAMSIZ];
                npf_ifmap_copyname(npf, con->c_ifid, ifname, sizeof(ifname));
                nvlist_add_string(nvl, "ifname", ifname);
        }
        nvlist_add_binary(nvl, "state", &con->c_state, sizeof(npf_state_t));

        fw = npf_conn_getforwkey(con);
        alen = NPF_CONNKEY_ALEN(fw);
        KASSERT(alen == con->c_alen);
        bk = npf_conn_getbackkey(con, alen);

        knvl = npf_connkey_export(npf, fw);
        nvlist_move_nvlist(nvl, "forw-key", knvl);

        knvl = npf_connkey_export(npf, bk);
        nvlist_move_nvlist(nvl, "back-key", knvl);

        /* Let the address length be based on on first key. */
        nvlist_add_number(nvl, "alen", alen);

        if (con->c_nat) {
                npf_nat_export(npf, con->c_nat, nvl);
        }
        return 0;
}

/*
 * npf_conn_import: fully reconstruct a single connection from a
 * nvlist and insert into the given database.
 */
int
npf_conn_import(npf_t *npf, npf_conndb_t *cd, const nvlist_t *cdict,
    npf_ruleset_t *natlist)
{
        npf_conn_t *con;
        npf_connkey_t *fw, *bk;
        const nvlist_t *nat, *conkey;
        unsigned flags, alen, idx;
        const char *ifname;
        const void *state;
        size_t len;

        /*
         * To determine the length of the connection, which depends
         * on the address length in the connection keys.
         */
        alen = dnvlist_get_number(cdict, "alen", 0);
        idx = NPF_CONNCACHE(alen);

        /* Allocate a connection and initialize it (clear first). */
        con = pool_cache_get(npf->conn_cache[idx], PR_WAITOK);
        memset(con, 0, sizeof(npf_conn_t));
        mutex_init(&con->c_lock, MUTEX_DEFAULT, IPL_SOFTNET);
        npf_stats_inc(npf, NPF_STAT_CONN_CREATE);

        con->c_proto = dnvlist_get_number(cdict, "proto", 0);
        flags = dnvlist_get_number(cdict, "flags", 0);
        flags &= PFIL_ALL | CONN_ACTIVE | CONN_PASS;
        atomic_store_relaxed(&con->c_flags, flags);
        conn_update_atime(con);

        ifname = dnvlist_get_string(cdict, "ifname", NULL);
        if (ifname && (con->c_ifid = npf_ifmap_register(npf, ifname)) == 0) {
                goto err;
        }

        state = dnvlist_get_binary(cdict, "state", &len, NULL, 0);
        if (!state || len != sizeof(npf_state_t)) {
                goto err;
        }
        memcpy(&con->c_state, state, sizeof(npf_state_t));

        /* Reconstruct NAT association, if any. */
        if ((nat = dnvlist_get_nvlist(cdict, "nat", NULL)) != NULL &&
            (con->c_nat = npf_nat_import(npf, nat, natlist, con)) == NULL) {
                goto err;
        }

        /*
         * Fetch and copy the keys for each direction.
         */
        fw = npf_conn_getforwkey(con);
        conkey = dnvlist_get_nvlist(cdict, "forw-key", NULL);
        if (conkey == NULL || !npf_connkey_import(npf, conkey, fw)) {
                goto err;
        }
        bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw));
        conkey = dnvlist_get_nvlist(cdict, "back-key", NULL);
        if (conkey == NULL || !npf_connkey_import(npf, conkey, bk)) {
                goto err;
        }

        /* Guard against the contradicting address lengths. */
        if (NPF_CONNKEY_ALEN(fw) != alen || NPF_CONNKEY_ALEN(bk) != alen) {
                goto err;
        }

        /* Insert the entries and the connection itself. */
        if (!npf_conndb_insert(cd, fw, con, NPF_FLOW_FORW)) {
                goto err;
        }
        if (!npf_conndb_insert(cd, bk, con, NPF_FLOW_BACK)) {
                npf_conndb_remove(cd, fw);
                goto err;
        }

        NPF_PRINTF(("NPF: imported conn %p\n", con));
        npf_conndb_enqueue(cd, con);
        return 0;
err:
        npf_conn_destroy(npf, con);
        return EINVAL;
}

/*
 * npf_conn_find: lookup a connection in the list of connections
 */
int
npf_conn_find(npf_t *npf, const nvlist_t *req, nvlist_t *resp)
{
        const nvlist_t *key_nv;
        npf_conn_t *con;
        npf_connkey_t key;
        npf_flow_t flow;
        int error;

        key_nv = dnvlist_get_nvlist(req, "key", NULL);
        if (!key_nv || !npf_connkey_import(npf, key_nv, &key)) {
                return EINVAL;
        }
        con = npf_conndb_lookup(npf, &key, &flow);
        if (con == NULL) {
                return ESRCH;
        }
        if (!npf_conn_check(con, NULL, 0, NPF_FLOW_FORW)) {
                atomic_dec_uint(&con->c_refcnt);
                return ESRCH;
        }
        error = npf_conn_export(npf, con, resp);
        nvlist_add_number(resp, "flow", flow);
        atomic_dec_uint(&con->c_refcnt);
        return error;
}

#if defined(DDB) || defined(_NPF_TESTING)

void
npf_conn_print(npf_conn_t *con)
{
        const npf_connkey_t *fw = npf_conn_getforwkey(con);
        const npf_connkey_t *bk = npf_conn_getbackkey(con, NPF_CONNKEY_ALEN(fw));
        const unsigned flags = atomic_load_relaxed(&con->c_flags);
        const unsigned proto = con->c_proto;
        struct timespec tspnow;

        getnanouptime(&tspnow);
        printf("%p:\n\tproto %d flags 0x%x tsdiff %ld etime %d\n", con,
            proto, flags, (long)(tspnow.tv_sec - con->c_atime),
            npf_state_etime(npf_getkernctx(), &con->c_state, proto));
        npf_connkey_print(fw);
        npf_connkey_print(bk);
        npf_state_dump(&con->c_state);
        if (con->c_nat) {
                npf_nat_dump(con->c_nat);
        }
}

#endif






































































































































































































































































































    2 















    2 





































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
/*        $NetBSD: rf_compat80.c,v 1.17 2022/06/28 03:13:27 oster Exp $        */

/*
 * Copyright (c) 2017 Matthew R. Green
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/module.h>

#include <sys/compat_stub.h>

#include <dev/raidframe/raidframeio.h>
#include <dev/raidframe/raidframevar.h>

#include "rf_raid.h"
#include "rf_compat80.h"
#include "rf_kintf.h"

/* NetBSD 8.99.x removed the row, raidPtr and next members */
struct rf_recon_req80 {
        RF_RowCol_t row, col;
        RF_ReconReqFlags_t flags;
        void   *raidPtr;        /* used internally; need not be set at ioctl
                                 * time */
        struct rf_recon_req *next;        /* used internally; need not be set at
                                         * ioctl time */
};

/* NetBSD 8.99.x made this structure alignment neutral */
typedef struct RF_RaidDisk_s80 {
        char    devname[56];    /* name of device file */
        RF_DiskStatus_t status; /* whether it is up or down */
        RF_RowCol_t spareRow;   /* if in status "spared", this identifies the
                                 * spare disk */
        RF_RowCol_t spareCol;   /* if in status "spared", this identifies the
                                 * spare disk */
        RF_SectorCount_t numBlocks;     /* number of blocks, obtained via READ
                                         * CAPACITY */
        int     blockSize;
        RF_SectorCount_t partitionSize; /* The *actual* and *full* size of
                                           the partition, from the disklabel */
        int     auto_configured;/* 1 if this component was autoconfigured.
                                   0 otherwise. */
        dev_t   dev;
} RF_RaidDisk_t80;

typedef struct RF_DeviceConfig_s80 {
        u_int   rows;
        u_int   cols;
        u_int   maxqdepth;
        int     ndevs;
        RF_RaidDisk_t80 devs[RF_MAX_DISKS];
        int     nspares;
        RF_RaidDisk_t80 spares[RF_MAX_DISKS];
} RF_DeviceConfig_t80;

typedef struct RF_Config_s80 {
        RF_RowCol_t numRow, numCol, numSpare;        /* number of rows, columns,
                                                 * and spare disks */
        dev_t   devs[RF_MAXROW][RF_MAXCOL];        /* device numbers for disks
                                                 * comprising array */
        char    devnames[RF_MAXROW][RF_MAXCOL][50];        /* device names */
        dev_t   spare_devs[RF_MAXSPARE];        /* device numbers for spare
                                                 * disks */
        char    spare_names[RF_MAXSPARE][50];        /* device names */
        RF_SectorNum_t sectPerSU;        /* sectors per stripe unit */
        RF_StripeNum_t SUsPerPU;/* stripe units per parity unit */
        RF_StripeNum_t SUsPerRU;/* stripe units per reconstruction unit */
        RF_ParityConfig_t parityConfig;        /* identifies the RAID architecture to
                                         * be used */
        RF_DiskQueueType_t diskQueueType;        /* 'f' = fifo, 'c' = cvscan,
                                                 * not used in kernel */
        char    maxOutstandingDiskReqs;        /* # concurrent reqs to be sent to a
                                         * disk.  not used in kernel. */
        char    debugVars[RF_MAXDBGV][50];        /* space for specifying debug
                                                 * variables & their values */
        unsigned int layoutSpecificSize;        /* size in bytes of
                                                 * layout-specific info */
        void   *layoutSpecific;        /* a pointer to a layout-specific structure to
                                 * be copied in */
        int     force;                          /* if !0, ignore many fatal
                                                   configuration conditions */
        /*
           "force" is used to override cases where the component labels would
           indicate that configuration should not proceed without user
           intervention
         */
} RF_Config_t80;

static int
rf_check_recon_status_ext80(RF_Raid_t *raidPtr, void *data)
{
        RF_ProgressInfo_t info, **infoPtr = data;

        rf_check_recon_status_ext(raidPtr, &info);
        return copyout(&info, *infoPtr, sizeof(info));
}

static int
rf_check_parityrewrite_status_ext80(RF_Raid_t *raidPtr, void *data)
{
        RF_ProgressInfo_t info, **infoPtr = data;

        rf_check_parityrewrite_status_ext(raidPtr, &info);
        return copyout(&info, *infoPtr, sizeof(info));
}

static int
rf_check_copyback_status_ext80(RF_Raid_t *raidPtr, void *data)
{
        RF_ProgressInfo_t info, **infoPtr = data;

        rf_check_copyback_status_ext(raidPtr, &info);
        return copyout(&info, *infoPtr, sizeof(info));
}

static void
rf_copy_raiddisk80(RF_RaidDisk_t *disk, RF_RaidDisk_t80 *disk80)
{

        /* Be sure the padding areas don't have kernel memory. */
        memset(disk80, 0, sizeof(*disk80));
        memcpy(disk80->devname, disk->devname, sizeof(disk80->devname));
        disk80->status = disk->status;
        disk80->spareRow = 0;
        disk80->spareCol = disk->spareCol;
        disk80->numBlocks = disk->numBlocks;
        disk80->blockSize = disk->blockSize;
        disk80->partitionSize = disk->partitionSize;
        disk80->auto_configured = disk->auto_configured;
        disk80->dev = disk->dev;
}

static int
rf_get_info80(RF_Raid_t *raidPtr, void *data)
{
        RF_DeviceConfig_t *config;
        RF_DeviceConfig_t80 *config80, **configPtr80 = data;
        int rv;

        config = RF_Malloc(sizeof(*config));
        if (config == NULL)
                return ENOMEM;
        config80 = RF_Malloc(sizeof(*config80));
        if (config80 == NULL) {
                RF_Free(config, sizeof(*config));
                return ENOMEM;
        }
        rv = rf_get_info(raidPtr, config);
        if (rv == 0) {
                /* convert new to old */
                config80->rows = 1;
                config80->cols = config->cols;
                config80->maxqdepth = config->maxqdepth;
                config80->ndevs = config->ndevs;
                config80->nspares = config->nspares;
                for (size_t i = 0; i < RF_MAX_DISKS; i++) {
                        rf_copy_raiddisk80(&config->devs[i],
                                           &config80->devs[i]);
                        rf_copy_raiddisk80(&config->spares[i],
                                           &config80->spares[i]);
                }
                rv = copyout(config80, *configPtr80, sizeof(*config80));
        }
        RF_Free(config, sizeof(*config));
        RF_Free(config80, sizeof(*config80));

        return rv;
}

static int
rf_get_component_label80(RF_Raid_t *raidPtr, void *data)
{
        RF_ComponentLabel_t **clabel_ptr = (RF_ComponentLabel_t **)data;
        RF_ComponentLabel_t *clabel;
        int retcode;

        /*
         * Perhaps there should be an option to skip the in-core
         * copy and hit the disk, as with disklabel(8).
         */
        clabel = RF_Malloc(sizeof(*clabel));
        if (clabel == NULL)
                return ENOMEM;
        retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
        if (retcode) {
                RF_Free(clabel, sizeof(*clabel));
                return retcode;
        }

        rf_get_component_label(raidPtr, clabel);
        /* Fix-up for userland. */
        if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION))
                clabel->version = RF_COMPONENT_LABEL_VERSION;

        retcode = copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
        RF_Free(clabel, sizeof(*clabel));

        return retcode;
}

static int
rf_config80(struct raid_softc *rs, void *data)
{
        RF_Config_t80 *u80_cfg, *k80_cfg;
        RF_Config_t *k_cfg;
        RF_Raid_t *raidPtr = rf_get_raid(rs);
        size_t i, j;
        int error;

        if (raidPtr->valid) {
                /* There is a valid RAID set running on this unit! */
                printf("raid%d: Device already configured!\n", rf_get_unit(rs));
                return EINVAL;
        }

        /* copy-in the configuration information */
        /* data points to a pointer to the configuration structure */

        u80_cfg = *((RF_Config_t80 **) data);
        k80_cfg = RF_Malloc(sizeof(*k80_cfg));
        if (k80_cfg == NULL)
                return ENOMEM;

        error = copyin(u80_cfg, k80_cfg, sizeof(*k80_cfg));
        if (error) {
                RF_Free(k80_cfg, sizeof(*k80_cfg));
                return error;
        }
        k_cfg = RF_Malloc(sizeof(*k_cfg));
        if (k_cfg == NULL) {
                RF_Free(k80_cfg, sizeof(*k80_cfg));
                return ENOMEM;
        }

        k_cfg->numCol = k80_cfg->numCol;
        k_cfg->numSpare = k80_cfg->numSpare;

        for (i = 0; i < RF_MAXROW; i++)
                for (j = 0; j < RF_MAXCOL; j++)
                        k_cfg->devs[i][j] = k80_cfg->devs[i][j];

        memcpy(k_cfg->devnames, k80_cfg->devnames,
            sizeof(k_cfg->devnames));

        for (i = 0; i < RF_MAXSPARE; i++)
                k_cfg->spare_devs[i] = k80_cfg->spare_devs[i];

        memcpy(k_cfg->spare_names, k80_cfg->spare_names,
            sizeof(k_cfg->spare_names));

        k_cfg->sectPerSU = k80_cfg->sectPerSU;
        k_cfg->SUsPerPU = k80_cfg->SUsPerPU;
        k_cfg->SUsPerRU = k80_cfg->SUsPerRU;
        k_cfg->parityConfig = k80_cfg->parityConfig;

        memcpy(k_cfg->diskQueueType, k80_cfg->diskQueueType,
            sizeof(k_cfg->diskQueueType));

        k_cfg->maxOutstandingDiskReqs = k80_cfg->maxOutstandingDiskReqs;

        memcpy(k_cfg->debugVars, k80_cfg->debugVars,
            sizeof(k_cfg->debugVars));

        k_cfg->layoutSpecificSize = k80_cfg->layoutSpecificSize;
        k_cfg->layoutSpecific = k80_cfg->layoutSpecific;
        k_cfg->force = k80_cfg->force;

        RF_Free(k80_cfg, sizeof(*k80_cfg));
        return rf_construct(rs, k_cfg);
}

static int
rf_fail_disk80(RF_Raid_t *raidPtr, struct rf_recon_req80 *req80)
{
        struct rf_recon_req req = {
                .col = req80->col,
                .flags = req80->flags,
        };
        return rf_fail_disk(raidPtr, &req);
}

static int
raidframe_ioctl_80(struct raid_softc *rs, u_long cmd, void *data)
{
        RF_Raid_t *raidPtr = rf_get_raid(rs);
 
        switch (cmd) {
        case RAIDFRAME_CHECK_RECON_STATUS_EXT80:
        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT80:
        case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT80:
        case RAIDFRAME_GET_INFO80:
        case RAIDFRAME_GET_COMPONENT_LABEL80:
        case RAIDFRAME_FAIL_DISK80:
                if (!rf_inited(rs))
                        return ENXIO;
                break;
        case RAIDFRAME_CONFIGURE80:
                break;
        default:
                return EPASSTHROUGH;
        }

        switch (cmd) {
        case RAIDFRAME_CHECK_RECON_STATUS_EXT80:
                return rf_check_recon_status_ext80(raidPtr, data);
        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT80:
                return rf_check_parityrewrite_status_ext80(raidPtr, data);
        case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT80:
                return rf_check_copyback_status_ext80(raidPtr, data);
        case RAIDFRAME_GET_INFO80:
                return rf_get_info80(raidPtr, data);
        case RAIDFRAME_GET_COMPONENT_LABEL80:
                return rf_get_component_label80(raidPtr, data);
        case RAIDFRAME_CONFIGURE80:
                return rf_config80(rs, data);
        case RAIDFRAME_FAIL_DISK80:
                return rf_fail_disk80(raidPtr, data);
        default:
                /* abort really */
                return EPASSTHROUGH;
        }
}
 
static void
raidframe_80_init(void)
{
  
        MODULE_HOOK_SET(raidframe_ioctl_80_hook, raidframe_ioctl_80);
}
 
static void
raidframe_80_fini(void)
{
 
        MODULE_HOOK_UNSET(raidframe_ioctl_80_hook);
}

MODULE(MODULE_CLASS_EXEC, compat_raid_80, "raid,compat_80");

static int
compat_raid_80_modcmd(modcmd_t cmd, void *arg)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                raidframe_80_init();
                return 0;
        case MODULE_CMD_FINI:
                raidframe_80_fini();
                return 0;
        default:
                return ENOTTY;
        }
}




































































































































































































































































































































































































































































































































































































































































































































































































































































    3 
    3 



















































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
/*        $NetBSD: if_axe.c,v 1.151 2022/08/20 14:08:59 riastradh Exp $        */
/*        $OpenBSD: if_axe.c,v 1.137 2016/04/13 11:03:37 mpi Exp $ */

/*
 * Copyright (c) 2005, 2006, 2007 Jonathan Gray <jsg@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*
 * Copyright (c) 1997, 1998, 1999, 2000-2003
 *        Bill Paul <wpaul@windriver.com>.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by Bill Paul.
 * 4. Neither the name of the author nor the names of any co-contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * ASIX Electronics AX88172/AX88178/AX88778 USB 2.0 ethernet driver.
 * Used in the LinkSys USB200M and various other adapters.
 *
 * Written by Bill Paul <wpaul@windriver.com>
 * Senior Engineer
 * Wind River Systems
 */

/*
 * The AX88172 provides USB ethernet supports at 10 and 100Mbps.
 * It uses an external PHY (reference designs use a RealTek chip),
 * and has a 64-bit multicast hash filter. There is some information
 * missing from the manual which one needs to know in order to make
 * the chip function:
 *
 * - You must set bit 7 in the RX control register, otherwise the
 *   chip won't receive any packets.
 * - You must initialize all 3 IPG registers, or you won't be able
 *   to send any packets.
 *
 * Note that this device appears to only support loading the station
 * address via autoload from the EEPROM (i.e. there's no way to manually
 * set it).
 *
 * (Adam Weinberger wanted me to name this driver if_gir.c.)
 */

/*
 * Ax88178 and Ax88772 support backported from the OpenBSD driver.
 * 2007/02/12, J.R. Oldroyd, fbsd@opal.com
 *
 * Manual here:
 * http://www.asix.com.tw/FrootAttach/datasheet/AX88178_datasheet_Rev10.pdf
 * http://www.asix.com.tw/FrootAttach/datasheet/AX88772_datasheet_Rev10.pdf
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_axe.c,v 1.151 2022/08/20 14:08:59 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>

#include <dev/usb/usbnet.h>
#include <dev/usb/usbhist.h>
#include <dev/usb/if_axereg.h>

struct axe_type {
        struct usb_devno        axe_dev;
        uint16_t                axe_flags;
};

struct axe_softc {
        struct usbnet                axe_un;

        /* usbnet:un_flags values */
#define AX178                __BIT(0)        /* AX88178 */
#define AX772                __BIT(1)        /* AX88772 */
#define AX772A                __BIT(2)        /* AX88772A */
#define AX772B                __BIT(3)        /* AX88772B */
#define        AXSTD_FRAME        __BIT(12)
#define        AXCSUM_FRAME        __BIT(13)

        uint8_t                        axe_ipgs[3];
        uint8_t                 axe_phyaddrs[2];
        uint16_t                sc_pwrcfg;
        uint16_t                sc_lenmask;

};

#define AXE_IS_178_FAMILY(un)                                \
        ((un)->un_flags & (AX178 | AX772 | AX772A | AX772B))

#define AXE_IS_772(un)                                        \
        ((un)->un_flags & (AX772 | AX772A | AX772B))

#define AXE_IS_172(un) (AXE_IS_178_FAMILY(un) == 0)

#define AX_RXCSUM                                        \
    (IFCAP_CSUM_IPv4_Rx |                                 \
     IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx |        \
     IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_UDPv6_Rx)

#define AX_TXCSUM                                        \
    (IFCAP_CSUM_IPv4_Tx |                                 \
     IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx |        \
     IFCAP_CSUM_TCPv6_Tx | IFCAP_CSUM_UDPv6_Tx)

/*
 * AXE_178_MAX_FRAME_BURST
 * max frame burst size for Ax88178 and Ax88772
 *        0        2048 bytes
 *        1        4096 bytes
 *        2        8192 bytes
 *        3        16384 bytes
 * use the largest your system can handle without USB stalling.
 *
 * NB: 88772 parts appear to generate lots of input errors with
 * a 2K rx buffer and 8K is only slightly faster than 4K on an
 * EHCI port on a T42 so change at your own risk.
 */
#define AXE_178_MAX_FRAME_BURST        1


#ifdef USB_DEBUG
#ifndef AXE_DEBUG
#define axedebug 0
#else
static int axedebug = 0;

SYSCTL_SETUP(sysctl_hw_axe_setup, "sysctl hw.axe setup")
{
        int err;
        const struct sysctlnode *rnode;
        const struct sysctlnode *cnode;

        err = sysctl_createv(clog, 0, NULL, &rnode,
            CTLFLAG_PERMANENT, CTLTYPE_NODE, "axe",
            SYSCTL_DESCR("axe global controls"),
            NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);

        if (err)
                goto fail;

        /* control debugging printfs */
        err = sysctl_createv(clog, 0, &rnode, &cnode,
            CTLFLAG_PERMANENT | CTLFLAG_READWRITE, CTLTYPE_INT,
            "debug", SYSCTL_DESCR("Enable debugging output"),
            NULL, 0, &axedebug, sizeof(axedebug), CTL_CREATE, CTL_EOL);
        if (err)
                goto fail;

        return;
fail:
        aprint_error("%s: sysctl_createv failed (err = %d)\n", __func__, err);
}

#endif /* AXE_DEBUG */
#endif /* USB_DEBUG */

#define DPRINTF(FMT,A,B,C,D)        USBHIST_LOGN(axedebug,1,FMT,A,B,C,D)
#define DPRINTFN(N,FMT,A,B,C,D)        USBHIST_LOGN(axedebug,N,FMT,A,B,C,D)
#define AXEHIST_FUNC()                USBHIST_FUNC()
#define AXEHIST_CALLED(name)        USBHIST_CALLED(axedebug)

/*
 * Various supported device vendors/products.
 */
static const struct axe_type axe_devs[] = {
        { { USB_VENDOR_ABOCOM,                USB_PRODUCT_ABOCOM_UFE2000 }, 0 },
        { { USB_VENDOR_ACERCM,                USB_PRODUCT_ACERCM_EP1427X2 }, 0 },
        { { USB_VENDOR_APPLE,                USB_PRODUCT_APPLE_ETHERNET }, AX772 },
        { { USB_VENDOR_ASIX,                USB_PRODUCT_ASIX_AX88172 }, 0 },
        { { USB_VENDOR_ASIX,                USB_PRODUCT_ASIX_AX88772 }, AX772 },
        { { USB_VENDOR_ASIX,                USB_PRODUCT_ASIX_AX88772A }, AX772 },
        { { USB_VENDOR_ASIX,                USB_PRODUCT_ASIX_AX88772B }, AX772B },
        { { USB_VENDOR_ASIX,                USB_PRODUCT_ASIX_AX88772B_1 }, AX772B },
        { { USB_VENDOR_ASIX,                USB_PRODUCT_ASIX_AX88178 }, AX178 },
        { { USB_VENDOR_ATEN,                USB_PRODUCT_ATEN_UC210T }, 0 },
        { { USB_VENDOR_BELKIN,                USB_PRODUCT_BELKIN_F5D5055 }, AX178 },
        { { USB_VENDOR_BILLIONTON,        USB_PRODUCT_BILLIONTON_USB2AR }, 0},
        { { USB_VENDOR_CISCOLINKSYS,        USB_PRODUCT_CISCOLINKSYS_USB200MV2 }, AX772A },
        { { USB_VENDOR_COREGA,                USB_PRODUCT_COREGA_FETHER_USB2_TX }, 0 },
        { { USB_VENDOR_DLINK,                USB_PRODUCT_DLINK_DUBE100 }, 0 },
        { { USB_VENDOR_DLINK,                USB_PRODUCT_DLINK_DUBE100B1 }, AX772 },
        { { USB_VENDOR_DLINK2,                USB_PRODUCT_DLINK2_DUBE100B1 }, AX772 },
        { { USB_VENDOR_DLINK,                USB_PRODUCT_DLINK_DUBE100C1 }, AX772B },
        { { USB_VENDOR_GOODWAY,                USB_PRODUCT_GOODWAY_GWUSB2E }, 0 },
        { { USB_VENDOR_IODATA,                USB_PRODUCT_IODATA_ETGUS2 }, AX178 },
        { { USB_VENDOR_JVC,                USB_PRODUCT_JVC_MP_PRX1 }, 0 },
        { { USB_VENDOR_LENOVO,                USB_PRODUCT_LENOVO_ETHERNET }, AX772B },
        { { USB_VENDOR_LINKSYS,                USB_PRODUCT_LINKSYS_HG20F9 }, AX772B },
        { { USB_VENDOR_LINKSYS2,        USB_PRODUCT_LINKSYS2_USB200M }, 0 },
        { { USB_VENDOR_LINKSYS4,        USB_PRODUCT_LINKSYS4_USB1000 }, AX178 },
        { { USB_VENDOR_LOGITEC,                USB_PRODUCT_LOGITEC_LAN_GTJU2 }, AX178 },
        { { USB_VENDOR_MELCO,                USB_PRODUCT_MELCO_LUAU2GT }, AX178 },
        { { USB_VENDOR_MELCO,                USB_PRODUCT_MELCO_LUAU2KTX }, 0 },
        { { USB_VENDOR_MSI,                USB_PRODUCT_MSI_AX88772A }, AX772 },
        { { USB_VENDOR_NETGEAR,                USB_PRODUCT_NETGEAR_FA120 }, 0 },
        { { USB_VENDOR_OQO,                USB_PRODUCT_OQO_ETHER01PLUS }, AX772 },
        { { USB_VENDOR_PLANEX3,                USB_PRODUCT_PLANEX3_GU1000T }, AX178 },
        { { USB_VENDOR_SITECOM,                USB_PRODUCT_SITECOM_LN029 }, 0 },
        { { USB_VENDOR_SITECOMEU,        USB_PRODUCT_SITECOMEU_LN028 }, AX178 },
        { { USB_VENDOR_SITECOMEU,        USB_PRODUCT_SITECOMEU_LN031 }, AX178 },
        { { USB_VENDOR_SYSTEMTALKS,        USB_PRODUCT_SYSTEMTALKS_SGCX2UL }, 0 },
};
#define axe_lookup(v, p) ((const struct axe_type *)usb_lookup(axe_devs, v, p))

static const struct ax88772b_mfb ax88772b_mfb_table[] = {
        { 0x8000, 0x8001, 2048 },
        { 0x8100, 0x8147, 4096 },
        { 0x8200, 0x81EB, 6144 },
        { 0x8300, 0x83D7, 8192 },
        { 0x8400, 0x851E, 16384 },
        { 0x8500, 0x8666, 20480 },
        { 0x8600, 0x87AE, 24576 },
        { 0x8700, 0x8A3D, 32768 }
};

static int        axe_match(device_t, cfdata_t, void *);
static void        axe_attach(device_t, device_t, void *);

CFATTACH_DECL_NEW(axe, sizeof(struct axe_softc),
        axe_match, axe_attach, usbnet_detach, usbnet_activate);

static void        axe_uno_stop(struct ifnet *, int);
static void        axe_uno_mcast(struct ifnet *);
static int        axe_uno_init(struct ifnet *);
static int        axe_uno_mii_read_reg(struct usbnet *, int, int, uint16_t *);
static int        axe_uno_mii_write_reg(struct usbnet *, int, int, uint16_t);
static void        axe_uno_mii_statchg(struct ifnet *);
static void        axe_uno_rx_loop(struct usbnet *, struct usbnet_chain *,
                                uint32_t);
static unsigned axe_uno_tx_prepare(struct usbnet *, struct mbuf *,
                                   struct usbnet_chain *);

static void        axe_ax88178_init(struct axe_softc *);
static void        axe_ax88772_init(struct axe_softc *);
static void        axe_ax88772a_init(struct axe_softc *);
static void        axe_ax88772b_init(struct axe_softc *);

static const struct usbnet_ops axe_ops = {
        .uno_stop = axe_uno_stop,
        .uno_mcast = axe_uno_mcast,
        .uno_read_reg = axe_uno_mii_read_reg,
        .uno_write_reg = axe_uno_mii_write_reg,
        .uno_statchg = axe_uno_mii_statchg,
        .uno_tx_prepare = axe_uno_tx_prepare,
        .uno_rx_loop = axe_uno_rx_loop,
        .uno_init = axe_uno_init,
};

static usbd_status
axe_cmd(struct axe_softc *sc, int cmd, int index, int val, void *buf)
{
        AXEHIST_FUNC(); AXEHIST_CALLED();
        struct usbnet * const un = &sc->axe_un;
        usb_device_request_t req;
        usbd_status err;

        if (usbnet_isdying(un))
                return -1;

        DPRINTFN(20, "cmd %#jx index %#jx val %#jx", cmd, index, val, 0);

        if (AXE_CMD_DIR(cmd))
                req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        else
                req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = AXE_CMD_CMD(cmd);
        USETW(req.wValue, val);
        USETW(req.wIndex, index);
        USETW(req.wLength, AXE_CMD_LEN(cmd));

        err = usbd_do_request(un->un_udev, &req, buf);
        if (err)
                DPRINTF("cmd %jd err %jd", cmd, err, 0, 0);

        return err;
}

static int
axe_uno_mii_read_reg(struct usbnet *un, int phy, int reg, uint16_t *val)
{
        AXEHIST_FUNC(); AXEHIST_CALLED();
        struct axe_softc * const sc = usbnet_softc(un);
        usbd_status err;
        uint16_t data;

        DPRINTFN(30, "phy %#jx reg %#jx\n", phy, reg, 0, 0);

        if (un->un_phyno != phy) {
                *val = 0;
                return EINVAL;
        }

        axe_cmd(sc, AXE_CMD_MII_OPMODE_SW, 0, 0, NULL);

        err = axe_cmd(sc, AXE_CMD_MII_READ_REG, reg, phy, &data);
        axe_cmd(sc, AXE_CMD_MII_OPMODE_HW, 0, 0, NULL);

        if (err) {
                device_printf(un->un_dev, "read PHY failed\n");
                *val = 0;
                return EIO;
        }

        *val = le16toh(data);
        if (AXE_IS_772(un) && reg == MII_BMSR) {
                /*
                 * BMSR of AX88772 indicates that it supports extended
                 * capability but the extended status register is
                 * reserved for embedded ethernet PHY. So clear the
                 * extended capability bit of BMSR.
                 */
                *val &= ~BMSR_EXTCAP;
        }

        DPRINTFN(30, "phy %#jx reg %#jx val %#jx", phy, reg, *val, 0);

        return 0;
}

static int
axe_uno_mii_write_reg(struct usbnet *un, int phy, int reg, uint16_t val)
{
        struct axe_softc * const sc = usbnet_softc(un);
        usbd_status err;
        uint16_t aval;

        if (un->un_phyno != phy)
                return EINVAL;

        aval = htole16(val);

        axe_cmd(sc, AXE_CMD_MII_OPMODE_SW, 0, 0, NULL);
        err = axe_cmd(sc, AXE_CMD_MII_WRITE_REG, reg, phy, &aval);
        axe_cmd(sc, AXE_CMD_MII_OPMODE_HW, 0, 0, NULL);

        if (err)
                return EIO;
        return 0;
}

static void
axe_uno_mii_statchg(struct ifnet *ifp)
{
        AXEHIST_FUNC(); AXEHIST_CALLED();

        struct usbnet * const un = ifp->if_softc;
        struct axe_softc * const sc = usbnet_softc(un);
        struct mii_data *mii = usbnet_mii(un);
        int val, err;

        if (usbnet_isdying(un))
                return;

        val = 0;
        if (AXE_IS_172(un)) {
                if (mii->mii_media_active & IFM_FDX)
                        val |= AXE_MEDIA_FULL_DUPLEX;
        } else {
                if (mii->mii_media_active & IFM_FDX) {
                        val |= AXE_MEDIA_FULL_DUPLEX;
                        if (mii->mii_media_active & IFM_ETH_TXPAUSE)
                                val |= AXE_178_MEDIA_TXFLOW_CONTROL_EN;
                        if (mii->mii_media_active & IFM_ETH_RXPAUSE)
                                val |= AXE_178_MEDIA_RXFLOW_CONTROL_EN;
                }
                val |= AXE_178_MEDIA_RX_EN | AXE_178_MEDIA_MAGIC;
                if (un->un_flags & AX178)
                        val |= AXE_178_MEDIA_ENCK;
                switch (IFM_SUBTYPE(mii->mii_media_active)) {
                case IFM_1000_T:
                        val |= AXE_178_MEDIA_GMII | AXE_178_MEDIA_ENCK;
                        usbnet_set_link(un, true);
                        break;
                case IFM_100_TX:
                        val |= AXE_178_MEDIA_100TX;
                        usbnet_set_link(un, true);
                        break;
                case IFM_10_T:
                        usbnet_set_link(un, true);
                        break;
                }
        }

        DPRINTF("val=%#jx", val, 0, 0, 0);
        err = axe_cmd(sc, AXE_CMD_WRITE_MEDIA, 0, val, NULL);
        if (err)
                device_printf(un->un_dev, "media change failed\n");
}

static void
axe_uno_mcast(struct ifnet *ifp)
{
        AXEHIST_FUNC(); AXEHIST_CALLED();
        struct usbnet * const un = ifp->if_softc;
        struct axe_softc * const sc = usbnet_softc(un);
        struct ethercom *ec = usbnet_ec(un);
        struct ether_multi *enm;
        struct ether_multistep step;
        uint16_t rxmode;
        uint32_t h = 0;
        uint8_t mchash[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };

        if (usbnet_isdying(un))
                return;

        if (axe_cmd(sc, AXE_CMD_RXCTL_READ, 0, 0, &rxmode)) {
                device_printf(un->un_dev, "can't read rxmode");
                return;
        }
        rxmode = le16toh(rxmode);

        rxmode &=
            ~(AXE_RXCMD_ALLMULTI | AXE_RXCMD_PROMISC | AXE_RXCMD_MULTICAST);

        ETHER_LOCK(ec);
        if (usbnet_ispromisc(un)) {
                ec->ec_flags |= ETHER_F_ALLMULTI;
                ETHER_UNLOCK(ec);
                /* run promisc. mode */
                rxmode |= AXE_RXCMD_ALLMULTI; /* ??? */
                rxmode |= AXE_RXCMD_PROMISC;
                goto update;
        }
        ec->ec_flags &= ~ETHER_F_ALLMULTI;
        ETHER_FIRST_MULTI(step, ec, enm);
        while (enm != NULL) {
                if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
                        ec->ec_flags |= ETHER_F_ALLMULTI;
                        ETHER_UNLOCK(ec);
                        /* accept all mcast frames */
                        rxmode |= AXE_RXCMD_ALLMULTI;
                        goto update;
                }
                h = ether_crc32_be(enm->enm_addrlo, ETHER_ADDR_LEN);
                mchash[h >> 29] |= 1U << ((h >> 26) & 7);
                ETHER_NEXT_MULTI(step, enm);
        }
        ETHER_UNLOCK(ec);
        if (h != 0)
                rxmode |= AXE_RXCMD_MULTICAST;        /* activate mcast hash filter */
        axe_cmd(sc, AXE_CMD_WRITE_MCAST, 0, 0, mchash);
 update:
        axe_cmd(sc, AXE_CMD_RXCTL_WRITE, 0, rxmode, NULL);
}

static void
axe_ax_init(struct usbnet *un)
{
        struct axe_softc * const sc = usbnet_softc(un);

        int cmd = AXE_178_CMD_READ_NODEID;

        if (un->un_flags & AX178) {
                axe_ax88178_init(sc);
        } else if (un->un_flags & AX772) {
                axe_ax88772_init(sc);
        } else if (un->un_flags & AX772A) {
                axe_ax88772a_init(sc);
        } else if (un->un_flags & AX772B) {
                axe_ax88772b_init(sc);
                return;
        } else {
                cmd = AXE_172_CMD_READ_NODEID;
        }

        if (axe_cmd(sc, cmd, 0, 0, un->un_eaddr)) {
                aprint_error_dev(un->un_dev,
                    "failed to read ethernet address\n");
        }
}


static void
axe_reset(struct usbnet *un)
{

        if (usbnet_isdying(un))
                return;

        /*
         * softnet_lock can be taken when NET_MPAFE is not defined when calling
         * if_addr_init -> if_init.  This doesn't mix well with the
         * usbd_delay_ms calls in the init routines as things like nd6_slowtimo
         * can fire during the wait and attempt to take softnet_lock and then
         * block the softclk thread meaning the wait never ends.
         */
#ifndef NET_MPSAFE
        /* XXX What to reset? */

        /* Wait a little while for the chip to get its brains in order. */
        DELAY(1000);
#else
        axe_ax_init(un);
#endif
}

static int
axe_get_phyno(struct axe_softc *sc, int sel)
{
        int phyno;

        switch (AXE_PHY_TYPE(sc->axe_phyaddrs[sel])) {
        case PHY_TYPE_100_HOME:
                /* FALLTHROUGH */
        case PHY_TYPE_GIG:
                phyno = AXE_PHY_NO(sc->axe_phyaddrs[sel]);
                break;
        case PHY_TYPE_SPECIAL:
                /* FALLTHROUGH */
        case PHY_TYPE_RSVD:
                /* FALLTHROUGH */
        case PHY_TYPE_NON_SUP:
                /* FALLTHROUGH */
        default:
                phyno = -1;
                break;
        }

        return phyno;
}

#define        AXE_GPIO_WRITE(x, y)        do {                                \
        axe_cmd(sc, AXE_CMD_WRITE_GPIO, 0, (x), NULL);                \
        usbd_delay_ms(sc->axe_un.un_udev, hztoms(y));                \
} while (0)

static void
axe_ax88178_init(struct axe_softc *sc)
{
        AXEHIST_FUNC(); AXEHIST_CALLED();
        struct usbnet * const un = &sc->axe_un;
        int gpio0, ledmode, phymode;
        uint16_t eeprom, val;

        axe_cmd(sc, AXE_CMD_SROM_WR_ENABLE, 0, 0, NULL);
        /* XXX magic */
        if (axe_cmd(sc, AXE_CMD_SROM_READ, 0, 0x0017, &eeprom) != 0)
                eeprom = 0xffff;
        axe_cmd(sc, AXE_CMD_SROM_WR_DISABLE, 0, 0, NULL);

        eeprom = le16toh(eeprom);

        DPRINTF("EEPROM is %#jx", eeprom, 0, 0, 0);

        /* if EEPROM is invalid we have to use to GPIO0 */
        if (eeprom == 0xffff) {
                phymode = AXE_PHY_MODE_MARVELL;
                gpio0 = 1;
                ledmode = 0;
        } else {
                phymode = eeprom & 0x7f;
                gpio0 = (eeprom & 0x80) ? 0 : 1;
                ledmode = eeprom >> 8;
        }

        DPRINTF("use gpio0: %jd, phymode %jd", gpio0, phymode, 0, 0);

        /* Program GPIOs depending on PHY hardware. */
        switch (phymode) {
        case AXE_PHY_MODE_MARVELL:
                if (gpio0 == 1) {
                        AXE_GPIO_WRITE(AXE_GPIO_RELOAD_EEPROM | AXE_GPIO0_EN,
                            hz / 32);
                        AXE_GPIO_WRITE(AXE_GPIO0_EN | AXE_GPIO2 | AXE_GPIO2_EN,
                            hz / 32);
                        AXE_GPIO_WRITE(AXE_GPIO0_EN | AXE_GPIO2_EN, hz / 4);
                        AXE_GPIO_WRITE(AXE_GPIO0_EN | AXE_GPIO2 | AXE_GPIO2_EN,
                            hz / 32);
                } else {
                        AXE_GPIO_WRITE(AXE_GPIO_RELOAD_EEPROM | AXE_GPIO1 |
                            AXE_GPIO1_EN, hz / 3);
                        if (ledmode == 1) {
                                AXE_GPIO_WRITE(AXE_GPIO1_EN, hz / 3);
                                AXE_GPIO_WRITE(AXE_GPIO1 | AXE_GPIO1_EN,
                                    hz / 3);
                        } else {
                                AXE_GPIO_WRITE(AXE_GPIO1 | AXE_GPIO1_EN |
                                    AXE_GPIO2 | AXE_GPIO2_EN, hz / 32);
                                AXE_GPIO_WRITE(AXE_GPIO1 | AXE_GPIO1_EN |
                                    AXE_GPIO2_EN, hz / 4);
                                AXE_GPIO_WRITE(AXE_GPIO1 | AXE_GPIO1_EN |
                                    AXE_GPIO2 | AXE_GPIO2_EN, hz / 32);
                        }
                }
                break;
        case AXE_PHY_MODE_CICADA:
        case AXE_PHY_MODE_CICADA_V2:
        case AXE_PHY_MODE_CICADA_V2_ASIX:
                if (gpio0 == 1)
                        AXE_GPIO_WRITE(AXE_GPIO_RELOAD_EEPROM | AXE_GPIO0 |
                            AXE_GPIO0_EN, hz / 32);
                else
                        AXE_GPIO_WRITE(AXE_GPIO_RELOAD_EEPROM | AXE_GPIO1 |
                            AXE_GPIO1_EN, hz / 32);
                break;
        case AXE_PHY_MODE_AGERE:
                AXE_GPIO_WRITE(AXE_GPIO_RELOAD_EEPROM | AXE_GPIO1 |
                    AXE_GPIO1_EN, hz / 32);
                AXE_GPIO_WRITE(AXE_GPIO1 | AXE_GPIO1_EN | AXE_GPIO2 |
                    AXE_GPIO2_EN, hz / 32);
                AXE_GPIO_WRITE(AXE_GPIO1 | AXE_GPIO1_EN | AXE_GPIO2_EN, hz / 4);
                AXE_GPIO_WRITE(AXE_GPIO1 | AXE_GPIO1_EN | AXE_GPIO2 |
                    AXE_GPIO2_EN, hz / 32);
                break;
        case AXE_PHY_MODE_REALTEK_8211CL:
        case AXE_PHY_MODE_REALTEK_8211BN:
        case AXE_PHY_MODE_REALTEK_8251CL:
                val = gpio0 == 1 ? AXE_GPIO0 | AXE_GPIO0_EN :
                    AXE_GPIO1 | AXE_GPIO1_EN;
                AXE_GPIO_WRITE(val, hz / 32);
                AXE_GPIO_WRITE(val | AXE_GPIO2 | AXE_GPIO2_EN, hz / 32);
                AXE_GPIO_WRITE(val | AXE_GPIO2_EN, hz / 4);
                AXE_GPIO_WRITE(val | AXE_GPIO2 | AXE_GPIO2_EN, hz / 32);
                if (phymode == AXE_PHY_MODE_REALTEK_8211CL) {
                        axe_uno_mii_write_reg(un, un->un_phyno, 0x1F, 0x0005);
                        axe_uno_mii_write_reg(un, un->un_phyno, 0x0C, 0x0000);
                        axe_uno_mii_read_reg(un, un->un_phyno, 0x0001, &val);
                        axe_uno_mii_write_reg(un, un->un_phyno, 0x01, val | 0x0080);
                        axe_uno_mii_write_reg(un, un->un_phyno, 0x1F, 0x0000);
                }
                break;
        default:
                /* Unknown PHY model or no need to program GPIOs. */
                break;
        }

        /* soft reset */
        axe_cmd(sc, AXE_CMD_SW_RESET_REG, 0, AXE_SW_RESET_CLEAR, NULL);
        usbd_delay_ms(un->un_udev, 150);
        axe_cmd(sc, AXE_CMD_SW_RESET_REG, 0,
            AXE_SW_RESET_PRL | AXE_178_RESET_MAGIC, NULL);
        usbd_delay_ms(un->un_udev, 150);
        /* Enable MII/GMII/RGMII interface to work with external PHY. */
        axe_cmd(sc, AXE_CMD_SW_PHY_SELECT, 0, 0, NULL);
        usbd_delay_ms(un->un_udev, 10);
        axe_cmd(sc, AXE_CMD_RXCTL_WRITE, 0, 0, NULL);
}

static void
axe_ax88772_init(struct axe_softc *sc)
{
        AXEHIST_FUNC(); AXEHIST_CALLED();
        struct usbnet * const un = &sc->axe_un;

        axe_cmd(sc, AXE_CMD_WRITE_GPIO, 0, 0x00b0, NULL);
        usbd_delay_ms(un->un_udev, 40);

        if (un->un_phyno == AXE_772_PHY_NO_EPHY) {
                /* ask for the embedded PHY */
                axe_cmd(sc, AXE_CMD_SW_PHY_SELECT, 0,
                    AXE_SW_PHY_SELECT_EMBEDDED, NULL);
                usbd_delay_ms(un->un_udev, 10);

                /* power down and reset state, pin reset state */
                axe_cmd(sc, AXE_CMD_SW_RESET_REG, 0, AXE_SW_RESET_CLEAR, NULL);
                usbd_delay_ms(un->un_udev, 60);

                /* power down/reset state, pin operating state */
                axe_cmd(sc, AXE_CMD_SW_RESET_REG, 0,
                    AXE_SW_RESET_IPPD | AXE_SW_RESET_PRL, NULL);
                usbd_delay_ms(un->un_udev, 150);

                /* power up, reset */
                axe_cmd(sc, AXE_CMD_SW_RESET_REG, 0, AXE_SW_RESET_PRL, NULL);

                /* power up, operating */
                axe_cmd(sc, AXE_CMD_SW_RESET_REG, 0,
                    AXE_SW_RESET_IPRL | AXE_SW_RESET_PRL, NULL);
        } else {
                /* ask for external PHY */
                axe_cmd(sc, AXE_CMD_SW_PHY_SELECT, 0, AXE_SW_PHY_SELECT_EXT,
                    NULL);
                usbd_delay_ms(un->un_udev, 10);

                /* power down internal PHY */
                axe_cmd(sc, AXE_CMD_SW_RESET_REG, 0,
                    AXE_SW_RESET_IPPD | AXE_SW_RESET_PRL, NULL);
        }

        usbd_delay_ms(un->un_udev, 150);
        axe_cmd(sc, AXE_CMD_RXCTL_WRITE, 0, 0, NULL);
}

static void
axe_ax88772_phywake(struct axe_softc *sc)
{
        AXEHIST_FUNC(); AXEHIST_CALLED();
        struct usbnet * const un = &sc->axe_un;

        if (un->un_phyno == AXE_772_PHY_NO_EPHY) {
                /* Manually select internal(embedded) PHY - MAC mode. */
                axe_cmd(sc, AXE_CMD_SW_PHY_SELECT, 0,
                    AXE_SW_PHY_SELECT_EMBEDDED, NULL);
                usbd_delay_ms(un->un_udev, hztoms(hz / 32));
        } else {
                /*
                 * Manually select external PHY - MAC mode.
                 * Reverse MII/RMII is for AX88772A PHY mode.
                 */
                axe_cmd(sc, AXE_CMD_SW_PHY_SELECT, 0, AXE_SW_PHY_SELECT_SS_ENB |
                    AXE_SW_PHY_SELECT_EXT | AXE_SW_PHY_SELECT_SS_MII, NULL);
                usbd_delay_ms(un->un_udev, hztoms(hz / 32));
        }

        axe_cmd(sc, AXE_CMD_SW_RESET_REG, 0, AXE_SW_RESET_IPPD |
            AXE_SW_RESET_IPRL, NULL);

        /* T1 = min 500ns everywhere */
        usbd_delay_ms(un->un_udev, 150);

        /* Take PHY out of power down. */
        if (un->un_phyno == AXE_772_PHY_NO_EPHY) {
                axe_cmd(sc, AXE_CMD_SW_RESET_REG, 0, AXE_SW_RESET_IPRL, NULL);
        } else {
                axe_cmd(sc, AXE_CMD_SW_RESET_REG, 0, AXE_SW_RESET_PRTE, NULL);
        }

        /* 772 T2 is 60ms. 772A T2 is 160ms, 772B T2 is 600ms */
        usbd_delay_ms(un->un_udev, 600);

        axe_cmd(sc, AXE_CMD_SW_RESET_REG, 0, AXE_SW_RESET_CLEAR, NULL);

        /* T3 = 500ns everywhere */
        usbd_delay_ms(un->un_udev, hztoms(hz / 32));
        axe_cmd(sc, AXE_CMD_SW_RESET_REG, 0, AXE_SW_RESET_IPRL, NULL);
        usbd_delay_ms(un->un_udev, hztoms(hz / 32));
}

static void
axe_ax88772a_init(struct axe_softc *sc)
{
        AXEHIST_FUNC(); AXEHIST_CALLED();

        /* Reload EEPROM. */
        AXE_GPIO_WRITE(AXE_GPIO_RELOAD_EEPROM, hz / 32);
        axe_ax88772_phywake(sc);
        /* Stop MAC. */
        axe_cmd(sc, AXE_CMD_RXCTL_WRITE, 0, 0, NULL);
}

static void
axe_ax88772b_init(struct axe_softc *sc)
{
        AXEHIST_FUNC(); AXEHIST_CALLED();
        struct usbnet * const un = &sc->axe_un;
        uint16_t eeprom;
        int i;

        /* Reload EEPROM. */
        AXE_GPIO_WRITE(AXE_GPIO_RELOAD_EEPROM , hz / 32);

        /*
         * Save PHY power saving configuration(high byte) and
         * clear EEPROM checksum value(low byte).
         */
        if (axe_cmd(sc, AXE_CMD_SROM_READ, 0, AXE_EEPROM_772B_PHY_PWRCFG,
            &eeprom)) {
                aprint_error_dev(un->un_dev, "failed to read eeprom\n");
                return;
        }

        sc->sc_pwrcfg = le16toh(eeprom) & 0xFF00;

        /*
         * Auto-loaded default station address from internal ROM is
         * 00:00:00:00:00:00 such that an explicit access to EEPROM
         * is required to get real station address.
         */
        uint8_t *eaddr = un->un_eaddr;
        for (i = 0; i < ETHER_ADDR_LEN / 2; i++) {
                if (axe_cmd(sc, AXE_CMD_SROM_READ, 0,
                    AXE_EEPROM_772B_NODE_ID + i, &eeprom)) {
                        aprint_error_dev(un->un_dev,
                            "failed to read eeprom\n");
                    eeprom = 0;
                }
                eeprom = le16toh(eeprom);
                *eaddr++ = (uint8_t)(eeprom & 0xFF);
                *eaddr++ = (uint8_t)((eeprom >> 8) & 0xFF);
        }
        /* Wakeup PHY. */
        axe_ax88772_phywake(sc);
        /* Stop MAC. */
        axe_cmd(sc, AXE_CMD_RXCTL_WRITE, 0, 0, NULL);
}

#undef        AXE_GPIO_WRITE

/*
 * Probe for a AX88172 chip.
 */
static int
axe_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return axe_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
            UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

/*
 * Attach the interface. Allocate softc structures, do ifmedia
 * setup and ethernet/BPF attach.
 */
static void
axe_attach(device_t parent, device_t self, void *aux)
{
        AXEHIST_FUNC(); AXEHIST_CALLED();
        USBNET_MII_DECL_DEFAULT(unm);
        struct axe_softc *sc = device_private(self);
        struct usbnet * const un = &sc->axe_un;
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        usbd_status err;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        unsigned bufsz;
        int i;

        KASSERT((void *)sc == un);

        aprint_naive("\n");
        aprint_normal("\n");
        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        un->un_dev = self;
        un->un_udev = dev;
        un->un_sc = sc;
        un->un_ops = &axe_ops;
        un->un_rx_xfer_flags = USBD_SHORT_XFER_OK;
        un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER;
        un->un_rx_list_cnt = AXE_RX_LIST_CNT;
        un->un_tx_list_cnt = AXE_TX_LIST_CNT;

        err = usbd_set_config_no(dev, AXE_CONFIG_NO, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(err));
                return;
        }

        un->un_flags = axe_lookup(uaa->uaa_vendor, uaa->uaa_product)->axe_flags;

        err = usbd_device2interface_handle(dev, AXE_IFACE_IDX, &un->un_iface);
        if (err) {
                aprint_error_dev(self, "getting interface handle failed\n");
                return;
        }

        id = usbd_get_interface_descriptor(un->un_iface);

        /* decide on what our bufsize will be */
        if (AXE_IS_172(un))
                bufsz = AXE_172_BUFSZ;
        else
                bufsz = (un->un_udev->ud_speed == USB_SPEED_HIGH) ?
                    AXE_178_MAX_BUFSZ : AXE_178_MIN_BUFSZ;
        un->un_rx_bufsz = un->un_tx_bufsz = bufsz;

        un->un_ed[USBNET_ENDPT_RX] = 0;
        un->un_ed[USBNET_ENDPT_TX] = 0;
        un->un_ed[USBNET_ENDPT_INTR] = 0;

        /* Find endpoints. */
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(un->un_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self, "couldn't get ep %d\n", i);
                        return;
                }
                const uint8_t xt = UE_GET_XFERTYPE(ed->bmAttributes);
                const uint8_t dir = UE_GET_DIR(ed->bEndpointAddress);

                if (dir == UE_DIR_IN && xt == UE_BULK &&
                    un->un_ed[USBNET_ENDPT_RX] == 0) {
                        un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress;
                } else if (dir == UE_DIR_OUT && xt == UE_BULK &&
                    un->un_ed[USBNET_ENDPT_TX] == 0) {
                        un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress;
                } else if (dir == UE_DIR_IN && xt == UE_INTERRUPT) {
                        un->un_ed[USBNET_ENDPT_INTR] = ed->bEndpointAddress;
                }
        }

        /* Set these up now for axe_cmd().  */
        usbnet_attach(un);

        /* We need the PHYID for init dance in some cases */
        if (axe_cmd(sc, AXE_CMD_READ_PHYID, 0, 0, &sc->axe_phyaddrs)) {
                aprint_error_dev(self, "failed to read phyaddrs\n");
                return;
        }

        DPRINTF(" phyaddrs[0]: %jx phyaddrs[1]: %jx",
            sc->axe_phyaddrs[0], sc->axe_phyaddrs[1], 0, 0);
        un->un_phyno = axe_get_phyno(sc, AXE_PHY_SEL_PRI);
        if (un->un_phyno == -1)
                un->un_phyno = axe_get_phyno(sc, AXE_PHY_SEL_SEC);
        if (un->un_phyno == -1) {
                DPRINTF(" no valid PHY address found, assuming PHY address 0",
                    0, 0, 0, 0);
                un->un_phyno = 0;
        }

        /* Initialize controller and get station address. */

        axe_ax_init(un);

        /*
         * Fetch IPG values.
         */
        if (un->un_flags & (AX772A | AX772B)) {
                /* Set IPG values. */
                sc->axe_ipgs[0] = AXE_IPG0_DEFAULT;
                sc->axe_ipgs[1] = AXE_IPG1_DEFAULT;
                sc->axe_ipgs[2] = AXE_IPG2_DEFAULT;
        } else {
                if (axe_cmd(sc, AXE_CMD_READ_IPG012, 0, 0, sc->axe_ipgs)) {
                        aprint_error_dev(self, "failed to read ipg\n");
                        return;
                }
        }

        if (!AXE_IS_172(un))
                usbnet_ec(un)->ec_capabilities = ETHERCAP_VLAN_MTU;
        if (un->un_flags & AX772B) {
                struct ifnet *ifp = usbnet_ifp(un);

                ifp->if_capabilities =
                    IFCAP_CSUM_IPv4_Rx |
                    IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx |
                    IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_UDPv6_Rx;
                /*
                 * Checksum offloading of AX88772B also works with VLAN
                 * tagged frames but there is no way to take advantage
                 * of the feature because vlan(4) assumes
                 * IFCAP_VLAN_HWTAGGING is prerequisite condition to
                 * support checksum offloading with VLAN. VLAN hardware
                 * tagging support of AX88772B is very limited so it's
                 * not possible to announce IFCAP_VLAN_HWTAGGING.
                 */
        }
        if (un->un_flags & (AX772A | AX772B | AX178))
                unm.un_mii_flags = MIIF_DOPAUSE;

        usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST,
            0, &unm);
}

static void
axe_uno_rx_loop(struct usbnet * un, struct usbnet_chain *c, uint32_t total_len)
{
        AXEHIST_FUNC(); AXEHIST_CALLED();
        struct axe_softc * const sc = usbnet_softc(un);
        struct ifnet *ifp = usbnet_ifp(un);
        uint8_t *buf = c->unc_buf;

        do {
                u_int pktlen = 0;
                u_int rxlen = 0;
                int flags = 0;

                if ((un->un_flags & AXSTD_FRAME) != 0) {
                        struct axe_sframe_hdr hdr;

                        if (total_len < sizeof(hdr)) {
                                if_statinc(ifp, if_ierrors);
                                break;
                        }

                        memcpy(&hdr, buf, sizeof(hdr));

                        DPRINTFN(20, "total_len %#jx len %#jx ilen %#jx",
                            total_len,
                            (le16toh(hdr.len) & AXE_RH1M_RXLEN_MASK),
                            (le16toh(hdr.ilen) & AXE_RH1M_RXLEN_MASK), 0);

                        total_len -= sizeof(hdr);
                        buf += sizeof(hdr);

                        if (((le16toh(hdr.len) & AXE_RH1M_RXLEN_MASK) ^
                            (le16toh(hdr.ilen) & AXE_RH1M_RXLEN_MASK)) !=
                            AXE_RH1M_RXLEN_MASK) {
                                if_statinc(ifp, if_ierrors);
                                break;
                        }

                        rxlen = le16toh(hdr.len) & AXE_RH1M_RXLEN_MASK;
                        if (total_len < rxlen) {
                                pktlen = total_len;
                                total_len = 0;
                        } else {
                                pktlen = rxlen;
                                rxlen = roundup2(rxlen, 2);
                                total_len -= rxlen;
                        }

                } else if ((un->un_flags & AXCSUM_FRAME) != 0) {
                        struct axe_csum_hdr csum_hdr;

                        if (total_len <        sizeof(csum_hdr)) {
                                if_statinc(ifp, if_ierrors);
                                break;
                        }

                        memcpy(&csum_hdr, buf, sizeof(csum_hdr));

                        csum_hdr.len = le16toh(csum_hdr.len);
                        csum_hdr.ilen = le16toh(csum_hdr.ilen);
                        csum_hdr.cstatus = le16toh(csum_hdr.cstatus);

                        DPRINTFN(20, "total_len %#jx len %#jx ilen %#jx"
                            " cstatus %#jx", total_len,
                            csum_hdr.len, csum_hdr.ilen, csum_hdr.cstatus);

                        if ((AXE_CSUM_RXBYTES(csum_hdr.len) ^
                            AXE_CSUM_RXBYTES(csum_hdr.ilen)) !=
                            sc->sc_lenmask) {
                                /* we lost sync */
                                if_statinc(ifp, if_ierrors);
                                DPRINTFN(20, "len %#jx ilen %#jx lenmask %#jx "
                                    "err",
                                    AXE_CSUM_RXBYTES(csum_hdr.len),
                                    AXE_CSUM_RXBYTES(csum_hdr.ilen),
                                    sc->sc_lenmask, 0);
                                break;
                        }
                        /*
                         * Get total transferred frame length including
                         * checksum header.  The length should be multiple
                         * of 4.
                         */
                        pktlen = AXE_CSUM_RXBYTES(csum_hdr.len);
                        u_int len = sizeof(csum_hdr) + pktlen;
                        len = (len + 3) & ~3;
                        if (total_len < len) {
                                DPRINTFN(20, "total_len %#jx < len %#jx",
                                    total_len, len, 0, 0);
                                /* invalid length */
                                if_statinc(ifp, if_ierrors);
                                break;
                        }
                        buf += sizeof(csum_hdr);

                        const uint16_t cstatus = csum_hdr.cstatus;

                        if (cstatus & AXE_CSUM_HDR_L3_TYPE_IPV4) {
                                if (cstatus & AXE_CSUM_HDR_L4_CSUM_ERR)
                                        flags |= M_CSUM_TCP_UDP_BAD;
                                if (cstatus & AXE_CSUM_HDR_L3_CSUM_ERR)
                                        flags |= M_CSUM_IPv4_BAD;

                                const uint16_t l4type =
                                    cstatus & AXE_CSUM_HDR_L4_TYPE_MASK;

                                if (l4type == AXE_CSUM_HDR_L4_TYPE_TCP)
                                        flags |= M_CSUM_TCPv4;
                                if (l4type == AXE_CSUM_HDR_L4_TYPE_UDP)
                                        flags |= M_CSUM_UDPv4;
                        }
                        if (total_len < len) {
                                pktlen = total_len;
                                total_len = 0;
                        } else {
                                total_len -= len;
                                rxlen = len - sizeof(csum_hdr);
                        }
                        DPRINTFN(20, "total_len %#jx len %#jx pktlen %#jx"
                            " rxlen %#jx", total_len, len, pktlen, rxlen);
                } else { /* AX172 */
                        pktlen = rxlen = total_len;
                        total_len = 0;
                }

                usbnet_enqueue(un, buf, pktlen, flags, 0, 0);
                buf += rxlen;

        } while (total_len > 0);

        DPRINTFN(10, "start rx", 0, 0, 0, 0);
}

static unsigned
axe_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c)
{
        AXEHIST_FUNC(); AXEHIST_CALLED();
        struct axe_sframe_hdr hdr, tlr;
        size_t hdr_len = 0, tlr_len = 0;
        int length, boundary;

        if (!AXE_IS_172(un)) {
                /*
                 * Copy the mbuf data into a contiguous buffer, leaving two
                 * bytes at the beginning to hold the frame length.
                 */
                boundary = (un->un_udev->ud_speed == USB_SPEED_HIGH) ? 512 : 64;

                hdr.len = htole16(m->m_pkthdr.len);
                hdr.ilen = ~hdr.len;
                hdr_len = sizeof(hdr);

                length = hdr_len + m->m_pkthdr.len;

                if ((length % boundary) == 0) {
                        tlr.len = 0x0000;
                        tlr.ilen = 0xffff;
                        tlr_len = sizeof(tlr);
                }
                DPRINTFN(20, "length %jx m_pkthdr.len %jx hdrsize %#jx",
                        length, m->m_pkthdr.len, sizeof(hdr), 0);
        }

        if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz - hdr_len - tlr_len)
                return 0;
        length = hdr_len + m->m_pkthdr.len + tlr_len;

        if (hdr_len)
                memcpy(c->unc_buf, &hdr, hdr_len);
        m_copydata(m, 0, m->m_pkthdr.len, c->unc_buf + hdr_len);
        if (tlr_len)
                memcpy(c->unc_buf + length - tlr_len, &tlr, tlr_len);

        return length;
}

static void
axe_csum_cfg(struct axe_softc *sc)
{
        struct usbnet * const un = &sc->axe_un;
        struct ifnet * const ifp = usbnet_ifp(un);
        uint16_t csum1, csum2;

        if ((un->un_flags & AX772B) != 0) {
                csum1 = 0;
                csum2 = 0;
                if ((ifp->if_capenable & IFCAP_CSUM_IPv4_Tx) != 0)
                        csum1 |= AXE_TXCSUM_IP;
                if ((ifp->if_capenable & IFCAP_CSUM_TCPv4_Tx) != 0)
                        csum1 |= AXE_TXCSUM_TCP;
                if ((ifp->if_capenable & IFCAP_CSUM_UDPv4_Tx) != 0)
                        csum1 |= AXE_TXCSUM_UDP;
                if ((ifp->if_capenable & IFCAP_CSUM_TCPv6_Tx) != 0)
                        csum1 |= AXE_TXCSUM_TCPV6;
                if ((ifp->if_capenable & IFCAP_CSUM_UDPv6_Tx) != 0)
                        csum1 |= AXE_TXCSUM_UDPV6;
                axe_cmd(sc, AXE_772B_CMD_WRITE_TXCSUM, csum2, csum1, NULL);
                csum1 = 0;
                csum2 = 0;

                if ((ifp->if_capenable & IFCAP_CSUM_IPv4_Rx) != 0)
                        csum1 |= AXE_RXCSUM_IP;
                if ((ifp->if_capenable & IFCAP_CSUM_TCPv4_Rx) != 0)
                        csum1 |= AXE_RXCSUM_TCP;
                if ((ifp->if_capenable & IFCAP_CSUM_UDPv4_Rx) != 0)
                        csum1 |= AXE_RXCSUM_UDP;
                if ((ifp->if_capenable & IFCAP_CSUM_TCPv6_Rx) != 0)
                        csum1 |= AXE_RXCSUM_TCPV6;
                if ((ifp->if_capenable & IFCAP_CSUM_UDPv6_Rx) != 0)
                        csum1 |= AXE_RXCSUM_UDPV6;
                axe_cmd(sc, AXE_772B_CMD_WRITE_RXCSUM, csum2, csum1, NULL);
        }
}

static int
axe_uno_init(struct ifnet *ifp)
{
        AXEHIST_FUNC(); AXEHIST_CALLED();
        struct usbnet * const un = ifp->if_softc;
        struct axe_softc * const sc = usbnet_softc(un);
        int rxmode;

        /* Reset the ethernet interface. */
        axe_reset(un);

#if 0
        ret = asix_write_gpio(dev, AX_GPIO_RSE | AX_GPIO_GPO_2 |
                              AX_GPIO_GPO2EN, 5, in_pm);
#endif
        /* Set MAC address and transmitter IPG values. */
        if (AXE_IS_172(un)) {
                axe_cmd(sc, AXE_172_CMD_WRITE_NODEID, 0, 0, un->un_eaddr);
                axe_cmd(sc, AXE_172_CMD_WRITE_IPG0, 0, sc->axe_ipgs[0], NULL);
                axe_cmd(sc, AXE_172_CMD_WRITE_IPG1, 0, sc->axe_ipgs[1], NULL);
                axe_cmd(sc, AXE_172_CMD_WRITE_IPG2, 0, sc->axe_ipgs[2], NULL);
        } else {
                axe_cmd(sc, AXE_178_CMD_WRITE_NODEID, 0, 0, un->un_eaddr);
                axe_cmd(sc, AXE_178_CMD_WRITE_IPG012, sc->axe_ipgs[2],
                    (sc->axe_ipgs[1] << 8) | (sc->axe_ipgs[0]), NULL);

                un->un_flags &= ~(AXSTD_FRAME | AXCSUM_FRAME);
                if ((un->un_flags & AX772B) != 0 &&
                    (ifp->if_capenable & AX_RXCSUM) != 0) {
                        sc->sc_lenmask = AXE_CSUM_HDR_LEN_MASK;
                        un->un_flags |= AXCSUM_FRAME;
                } else {
                        sc->sc_lenmask = AXE_HDR_LEN_MASK;
                        un->un_flags |= AXSTD_FRAME;
                }
        }

        /* Configure TX/RX checksum offloading. */
        axe_csum_cfg(sc);

        if (un->un_flags & AX772B) {
                /* AX88772B uses different maximum frame burst configuration. */
                axe_cmd(sc, AXE_772B_CMD_RXCTL_WRITE_CFG,
                    ax88772b_mfb_table[AX88772B_MFB_16K].threshold,
                    ax88772b_mfb_table[AX88772B_MFB_16K].byte_cnt, NULL);
        }
        /* Enable receiver, set RX mode */
        rxmode = (AXE_RXCMD_BROADCAST | AXE_RXCMD_MULTICAST | AXE_RXCMD_ENABLE);
        if (AXE_IS_172(un))
                rxmode |= AXE_172_RXCMD_UNICAST;
        else {
                if (un->un_flags & AX772B) {
                        /*
                         * Select RX header format type 1.  Aligning IP
                         * header on 4 byte boundary is not needed when
                         * checksum offloading feature is not used
                         * because we always copy the received frame in
                         * RX handler.  When RX checksum offloading is
                         * active, aligning IP header is required to
                         * reflect actual frame length including RX
                         * header size.
                         */
                        rxmode |= AXE_772B_RXCMD_HDR_TYPE_1;
                        if (un->un_flags & AXCSUM_FRAME)
                                rxmode |= AXE_772B_RXCMD_IPHDR_ALIGN;
                } else {
                        /*
                         * Default Rx buffer size is too small to get
                         * maximum performance.
                         */
#if 0
                        if (un->un_udev->ud_speed == USB_SPEED_HIGH) {
                                /* Largest possible USB buffer size for AX88178 */
                        }
#endif
                        rxmode |= AXE_178_RXCMD_MFB_16384;
                }
        }

        DPRINTF("rxmode %#jx", rxmode, 0, 0, 0);

        axe_cmd(sc, AXE_CMD_RXCTL_WRITE, 0, rxmode, NULL);

        return 0;
}

static void
axe_uno_stop(struct ifnet *ifp, int disable)
{
        struct usbnet * const un = ifp->if_softc;

        axe_reset(un);
}

#ifdef _MODULE
#include "ioconf.c"
#endif

USBNET_MODULE(axe)



































































































































































































































































































































































































































































































  482 

  518 



  518 










   13 

   13 












    1 

   12 
   12 


   11 






   13 







   10 














  808 

  807 



  809 










  349 

  348 












    1 

  348 
  347 


  344 






  349 







  339 






































   13 

   13 
   13 



   13 















































    5 


    5 
    5 
    5 
    4 



    5 
    5 













    2 


    2 
    2 
    2 
    1 



    2 
    2 















    2 


    2 
    2 


    2 


    2 


    2 

    2 
    2 
    2 

    2 







    2 
    2 



















    7 


    7 
    7 


    7 


    7 


    7 

    7 
    7 
    7 

    7 







    7 
    7 



















  197 




  197 



  197 
  197 
  197 



   14 
   14 
  197 

  197 















    1 




    1 



    1 
    1 
    1 



    1 
    1 
    1 

    1 


























   13 












    2 

    2 



   10 
    4 


   10 
    1 















    2 




    2 













    2 

    2 













  440 

  441 






  441 
  440 
  434 











   13 
    3 
   11 










































  459 









    4 






    4 
    4 
    4 
    4 

















































  343 












    4 

    3 



  343 
  245 


  339 
    3 















   29 




   29 













   29 
   17 
   27 













   34 
   22 
   24 













   31 
   20 
   24 













  431 
  161 
  381 










   13 


   13 

   13 


























   18 
    8 
   18 














   11 
   10 
   11 





















































  368 











    7 

    6 

    6 

    7 




























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
/*        $NetBSD: subr_devsw.c,v 1.46 2022/07/09 10:30:27 riastradh Exp $        */

/*-
 * Copyright (c) 2001, 2002, 2007, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by MAEKAWA Masahide <gehenna@NetBSD.org>, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Overview
 *
 *        subr_devsw.c: registers device drivers by name and by major
 *        number, and provides wrapper methods for performing I/O and
 *        other tasks on device drivers, keying on the device number
 *        (dev_t).
 *
 *        When the system is built, the config(8) command generates
 *        static tables of device drivers built into the kernel image
 *        along with their associated methods.  These are recorded in
 *        the cdevsw0 and bdevsw0 tables.  Drivers can also be added to
 *        and removed from the system dynamically.
 *
 * Allocation
 *
 *        When the system initially boots only the statically allocated
 *        indexes (bdevsw0, cdevsw0) are used.  If these overflow due to
 *        allocation, we allocate a fixed block of memory to hold the new,
 *        expanded index.  This "fork" of the table is only ever performed
 *        once in order to guarantee that other threads may safely access
 *        the device tables:
 *
 *        o Once a thread has a "reference" to the table via an earlier
 *          open() call, we know that the entry in the table must exist
 *          and so it is safe to access it.
 *
 *        o Regardless of whether other threads see the old or new
 *          pointers, they will point to a correct device switch
 *          structure for the operation being performed.
 *
 *        XXX Currently, the wrapper methods such as cdev_read() verify
 *        that a device driver does in fact exist before calling the
 *        associated driver method.  This should be changed so that
 *        once the device is has been referenced by a vnode (opened),
 *        calling        the other methods should be valid until that reference
 *        is dropped.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_devsw.c,v 1.46 2022/07/09 10:30:27 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_dtrace.h"
#endif

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/kmem.h>
#include <sys/systm.h>
#include <sys/poll.h>
#include <sys/tty.h>
#include <sys/cpu.h>
#include <sys/buf.h>
#include <sys/reboot.h>
#include <sys/sdt.h>
#include <sys/atomic.h>
#include <sys/localcount.h>
#include <sys/pserialize.h>
#include <sys/xcall.h>
#include <sys/device.h>

#ifdef DEVSW_DEBUG
#define        DPRINTF(x)        printf x
#else /* DEVSW_DEBUG */
#define        DPRINTF(x)
#endif /* DEVSW_DEBUG */

#define        MAXDEVSW        512        /* the maximum of major device number */
#define        BDEVSW_SIZE        (sizeof(struct bdevsw *))
#define        CDEVSW_SIZE        (sizeof(struct cdevsw *))
#define        DEVSWCONV_SIZE        (sizeof(struct devsw_conv))

struct devswref {
        struct localcount        *dr_lc;
};

/* XXX bdevsw, cdevsw, max_bdevsws, and max_cdevsws should be volatile */
extern const struct bdevsw **bdevsw, *bdevsw0[];
extern const struct cdevsw **cdevsw, *cdevsw0[];
extern struct devsw_conv *devsw_conv, devsw_conv0[];
extern const int sys_bdevsws, sys_cdevsws;
extern int max_bdevsws, max_cdevsws, max_devsw_convs;

static struct devswref *cdevswref;
static struct devswref *bdevswref;
static kcondvar_t devsw_cv;

static int bdevsw_attach(const struct bdevsw *, devmajor_t *);
static int cdevsw_attach(const struct cdevsw *, devmajor_t *);
static void devsw_detach_locked(const struct bdevsw *, const struct cdevsw *);

kmutex_t device_lock;

void (*biodone_vfs)(buf_t *) = (void *)nullop;

void
devsw_init(void)
{

        KASSERT(sys_bdevsws < MAXDEVSW - 1);
        KASSERT(sys_cdevsws < MAXDEVSW - 1);
        mutex_init(&device_lock, MUTEX_DEFAULT, IPL_NONE);

        cv_init(&devsw_cv, "devsw");
}

int
devsw_attach(const char *devname,
             const struct bdevsw *bdev, devmajor_t *bmajor,
             const struct cdevsw *cdev, devmajor_t *cmajor)
{
        struct devsw_conv *conv;
        char *name;
        int error, i;

        if (devname == NULL || cdev == NULL)
                return EINVAL;

        mutex_enter(&device_lock);

        for (i = 0; i < max_devsw_convs; i++) {
                conv = &devsw_conv[i];
                if (conv->d_name == NULL || strcmp(devname, conv->d_name) != 0)
                        continue;

                if (*bmajor < 0)
                        *bmajor = conv->d_bmajor;
                if (*cmajor < 0)
                        *cmajor = conv->d_cmajor;

                if (*bmajor != conv->d_bmajor || *cmajor != conv->d_cmajor) {
                        error = EINVAL;
                        goto out;
                }
                if ((*bmajor >= 0 && bdev == NULL) || *cmajor < 0) {
                        error = EINVAL;
                        goto out;
                }

                if ((*bmajor >= 0 && bdevsw[*bmajor] != NULL) ||
                    cdevsw[*cmajor] != NULL) {
                        error = EEXIST;
                        goto out;
                }
                break;
        }

        /*
         * XXX This should allocate what it needs up front so we never
         * need to flail around trying to unwind.
         */
        error = bdevsw_attach(bdev, bmajor);
        if (error != 0)
                goto out;
        error = cdevsw_attach(cdev, cmajor);
        if (error != 0) {
                devsw_detach_locked(bdev, NULL);
                goto out;
        }

        /*
         * If we already found a conv, we're done.  Otherwise, find an
         * empty slot or extend the table.
         */
        if (i == max_devsw_convs)
                goto out;

        for (i = 0; i < max_devsw_convs; i++) {
                if (devsw_conv[i].d_name == NULL)
                        break;
        }
        if (i == max_devsw_convs) {
                struct devsw_conv *newptr;
                int old_convs, new_convs;

                old_convs = max_devsw_convs;
                new_convs = old_convs + 1;

                newptr = kmem_zalloc(new_convs * DEVSWCONV_SIZE, KM_NOSLEEP);
                if (newptr == NULL) {
                        devsw_detach_locked(bdev, cdev);
                        error = ENOMEM;
                        goto out;
                }
                newptr[old_convs].d_name = NULL;
                newptr[old_convs].d_bmajor = -1;
                newptr[old_convs].d_cmajor = -1;
                memcpy(newptr, devsw_conv, old_convs * DEVSWCONV_SIZE);
                if (devsw_conv != devsw_conv0)
                        kmem_free(devsw_conv, old_convs * DEVSWCONV_SIZE);
                devsw_conv = newptr;
                max_devsw_convs = new_convs;
        }

        name = kmem_strdupsize(devname, NULL, KM_NOSLEEP);
        if (name == NULL) {
                devsw_detach_locked(bdev, cdev);
                error = ENOMEM;
                goto out;
        }

        devsw_conv[i].d_name = name;
        devsw_conv[i].d_bmajor = *bmajor;
        devsw_conv[i].d_cmajor = *cmajor;
        error = 0;
out:
        mutex_exit(&device_lock);
        return error;
}

static int
bdevsw_attach(const struct bdevsw *devsw, devmajor_t *devmajor)
{
        const struct bdevsw **newbdevsw = NULL;
        struct devswref *newbdevswref = NULL;
        struct localcount *lc;
        devmajor_t bmajor;
        int i;

        KASSERT(mutex_owned(&device_lock));

        if (devsw == NULL)
                return 0;

        if (*devmajor < 0) {
                for (bmajor = sys_bdevsws; bmajor < max_bdevsws; bmajor++) {
                        if (bdevsw[bmajor] != NULL)
                                continue;
                        for (i = 0; i < max_devsw_convs; i++) {
                                if (devsw_conv[i].d_bmajor == bmajor)
                                        break;
                        }
                        if (i != max_devsw_convs)
                                continue;
                        break;
                }
                *devmajor = bmajor;
        }

        if (*devmajor >= MAXDEVSW) {
                printf("%s: block majors exhausted\n", __func__);
                return ENOMEM;
        }

        if (bdevswref == NULL) {
                newbdevswref = kmem_zalloc(MAXDEVSW * sizeof(newbdevswref[0]),
                    KM_NOSLEEP);
                if (newbdevswref == NULL)
                        return ENOMEM;
                atomic_store_release(&bdevswref, newbdevswref);
        }

        if (*devmajor >= max_bdevsws) {
                KASSERT(bdevsw == bdevsw0);
                newbdevsw = kmem_zalloc(MAXDEVSW * sizeof(newbdevsw[0]),
                    KM_NOSLEEP);
                if (newbdevsw == NULL)
                        return ENOMEM;
                memcpy(newbdevsw, bdevsw, max_bdevsws * sizeof(bdevsw[0]));
                atomic_store_release(&bdevsw, newbdevsw);
                atomic_store_release(&max_bdevsws, MAXDEVSW);
        }

        if (bdevsw[*devmajor] != NULL)
                return EEXIST;

        KASSERT(bdevswref[*devmajor].dr_lc == NULL);
        lc = kmem_zalloc(sizeof(*lc), KM_SLEEP);
        localcount_init(lc);
        bdevswref[*devmajor].dr_lc = lc;

        atomic_store_release(&bdevsw[*devmajor], devsw);

        return 0;
}

static int
cdevsw_attach(const struct cdevsw *devsw, devmajor_t *devmajor)
{
        const struct cdevsw **newcdevsw = NULL;
        struct devswref *newcdevswref = NULL;
        struct localcount *lc;
        devmajor_t cmajor;
        int i;

        KASSERT(mutex_owned(&device_lock));

        if (*devmajor < 0) {
                for (cmajor = sys_cdevsws; cmajor < max_cdevsws; cmajor++) {
                        if (cdevsw[cmajor] != NULL)
                                continue;
                        for (i = 0; i < max_devsw_convs; i++) {
                                if (devsw_conv[i].d_cmajor == cmajor)
                                        break;
                        }
                        if (i != max_devsw_convs)
                                continue;
                        break;
                }
                *devmajor = cmajor;
        }

        if (*devmajor >= MAXDEVSW) {
                printf("%s: character majors exhausted\n", __func__);
                return ENOMEM;
        }

        if (cdevswref == NULL) {
                newcdevswref = kmem_zalloc(MAXDEVSW * sizeof(newcdevswref[0]),
                    KM_NOSLEEP);
                if (newcdevswref == NULL)
                        return ENOMEM;
                atomic_store_release(&cdevswref, newcdevswref);
        }

        if (*devmajor >= max_cdevsws) {
                KASSERT(cdevsw == cdevsw0);
                newcdevsw = kmem_zalloc(MAXDEVSW * sizeof(newcdevsw[0]),
                    KM_NOSLEEP);
                if (newcdevsw == NULL)
                        return ENOMEM;
                memcpy(newcdevsw, cdevsw, max_cdevsws * sizeof(cdevsw[0]));
                atomic_store_release(&cdevsw, newcdevsw);
                atomic_store_release(&max_cdevsws, MAXDEVSW);
        }

        if (cdevsw[*devmajor] != NULL)
                return EEXIST;

        KASSERT(cdevswref[*devmajor].dr_lc == NULL);
        lc = kmem_zalloc(sizeof(*lc), KM_SLEEP);
        localcount_init(lc);
        cdevswref[*devmajor].dr_lc = lc;

        atomic_store_release(&cdevsw[*devmajor], devsw);

        return 0;
}

static void
devsw_detach_locked(const struct bdevsw *bdev, const struct cdevsw *cdev)
{
        int bi, ci = -1/*XXXGCC*/, di;
        struct cfdriver *cd;
        device_t dv;

        KASSERT(mutex_owned(&device_lock));

        /*
         * If this is wired to an autoconf device, make sure the device
         * has no more instances.  No locking here because under
         * correct use of devsw_detach, none of this state can change
         * at this point.
         */
        if (cdev != NULL && (cd = cdev->d_cfdriver) != NULL) {
                for (di = 0; di < cd->cd_ndevs; di++) {
                        KASSERTMSG((dv = cd->cd_devs[di]) == NULL,
                            "detaching character device driver %s"
                            " still has attached unit %s",
                            cd->cd_name, device_xname(dv));
                }
        }
        if (bdev != NULL && (cd = bdev->d_cfdriver) != NULL) {
                for (di = 0; di < cd->cd_ndevs; di++) {
                        KASSERTMSG((dv = cd->cd_devs[di]) == NULL,
                            "detaching block device driver %s"
                            " still has attached unit %s",
                            cd->cd_name, device_xname(dv));
                }
        }

        /* Prevent new references.  */
        if (bdev != NULL) {
                for (bi = 0; bi < max_bdevsws; bi++) {
                        if (bdevsw[bi] != bdev)
                                continue;
                        atomic_store_relaxed(&bdevsw[bi], NULL);
                        break;
                }
                KASSERT(bi < max_bdevsws);
        }
        if (cdev != NULL) {
                for (ci = 0; ci < max_cdevsws; ci++) {
                        if (cdevsw[ci] != cdev)
                                continue;
                        atomic_store_relaxed(&cdevsw[ci], NULL);
                        break;
                }
                KASSERT(ci < max_cdevsws);
        }

        if (bdev == NULL && cdev == NULL) /* XXX possible? */
                return;

        /*
         * Wait for all bdevsw_lookup_acquire, cdevsw_lookup_acquire
         * calls to notice that the devsw is gone.
         *
         * XXX Despite the use of the pserialize_read_enter/exit API
         * elsewhere in this file, we use xc_barrier here instead of
         * pserialize_perform -- because devsw_init is too early for
         * pserialize_create.  Either pserialize_create should be made
         * to work earlier, or it should be nixed altogether.  Until
         * that is fixed, xc_barrier will serve the same purpose.
         */
        xc_barrier(0);

        /*
         * Wait for all references to drain.  It is the caller's
         * responsibility to ensure that at this point, there are no
         * extant open instances and all new d_open calls will fail.
         *
         * Note that localcount_drain may release and reacquire
         * device_lock.
         */
        if (bdev != NULL) {
                localcount_drain(bdevswref[bi].dr_lc,
                    &devsw_cv, &device_lock);
                localcount_fini(bdevswref[bi].dr_lc);
                kmem_free(bdevswref[bi].dr_lc, sizeof(*bdevswref[bi].dr_lc));
                bdevswref[bi].dr_lc = NULL;
        }
        if (cdev != NULL) {
                localcount_drain(cdevswref[ci].dr_lc,
                    &devsw_cv, &device_lock);
                localcount_fini(cdevswref[ci].dr_lc);
                kmem_free(cdevswref[ci].dr_lc, sizeof(*cdevswref[ci].dr_lc));
                cdevswref[ci].dr_lc = NULL;
        }
}

void
devsw_detach(const struct bdevsw *bdev, const struct cdevsw *cdev)
{

        mutex_enter(&device_lock);
        devsw_detach_locked(bdev, cdev);
        mutex_exit(&device_lock);
}

/*
 * Look up a block device by number.
 *
 * => Caller must ensure that the device is attached.
 */
const struct bdevsw *
bdevsw_lookup(dev_t dev)
{
        devmajor_t bmajor;

        if (dev == NODEV)
                return NULL;
        bmajor = major(dev);
        if (bmajor < 0 || bmajor >= atomic_load_relaxed(&max_bdevsws))
                return NULL;

        return atomic_load_consume(&bdevsw)[bmajor];
}

static const struct bdevsw *
bdevsw_lookup_acquire(dev_t dev, struct localcount **lcp)
{
        devmajor_t bmajor;
        const struct bdevsw *bdev = NULL, *const *curbdevsw;
        struct devswref *curbdevswref;
        int s;

        if (dev == NODEV)
                return NULL;
        bmajor = major(dev);
        if (bmajor < 0)
                return NULL;

        s = pserialize_read_enter();

        /*
         * max_bdevsws never goes down, so it is safe to rely on this
         * condition without any locking for the array access below.
         * Test sys_bdevsws first so we can avoid the memory barrier in
         * that case.
         */
        if (bmajor >= sys_bdevsws &&
            bmajor >= atomic_load_acquire(&max_bdevsws))
                goto out;
        curbdevsw = atomic_load_consume(&bdevsw);
        if ((bdev = atomic_load_consume(&curbdevsw[bmajor])) == NULL)
                goto out;

        curbdevswref = atomic_load_consume(&bdevswref);
        if (curbdevswref == NULL) {
                *lcp = NULL;
        } else if ((*lcp = curbdevswref[bmajor].dr_lc) != NULL) {
                localcount_acquire(*lcp);
        }
out:
        pserialize_read_exit(s);
        return bdev;
}

static void
bdevsw_release(const struct bdevsw *bdev, struct localcount *lc)
{

        if (lc == NULL)
                return;
        localcount_release(lc, &devsw_cv, &device_lock);
}

/*
 * Look up a character device by number.
 *
 * => Caller must ensure that the device is attached.
 */
const struct cdevsw *
cdevsw_lookup(dev_t dev)
{
        devmajor_t cmajor;

        if (dev == NODEV)
                return NULL;
        cmajor = major(dev);
        if (cmajor < 0 || cmajor >= atomic_load_relaxed(&max_cdevsws))
                return NULL;

        return atomic_load_consume(&cdevsw)[cmajor];
}

static const struct cdevsw *
cdevsw_lookup_acquire(dev_t dev, struct localcount **lcp)
{
        devmajor_t cmajor;
        const struct cdevsw *cdev = NULL, *const *curcdevsw;
        struct devswref *curcdevswref;
        int s;

        if (dev == NODEV)
                return NULL;
        cmajor = major(dev);
        if (cmajor < 0)
                return NULL;

        s = pserialize_read_enter();

        /*
         * max_cdevsws never goes down, so it is safe to rely on this
         * condition without any locking for the array access below.
         * Test sys_cdevsws first so we can avoid the memory barrier in
         * that case.
         */
        if (cmajor >= sys_cdevsws &&
            cmajor >= atomic_load_acquire(&max_cdevsws))
                goto out;
        curcdevsw = atomic_load_consume(&cdevsw);
        if ((cdev = atomic_load_consume(&curcdevsw[cmajor])) == NULL)
                goto out;

        curcdevswref = atomic_load_consume(&cdevswref);
        if (curcdevswref == NULL) {
                *lcp = NULL;
        } else if ((*lcp = curcdevswref[cmajor].dr_lc) != NULL) {
                localcount_acquire(*lcp);
        }
out:
        pserialize_read_exit(s);
        return cdev;
}

static void
cdevsw_release(const struct cdevsw *cdev, struct localcount *lc)
{

        if (lc == NULL)
                return;
        localcount_release(lc, &devsw_cv, &device_lock);
}

/*
 * Look up a block device by reference to its operations set.
 *
 * => Caller must ensure that the device is not detached, and therefore
 *    that the returned major is still valid when dereferenced.
 */
devmajor_t
bdevsw_lookup_major(const struct bdevsw *bdev)
{
        const struct bdevsw *const *curbdevsw;
        devmajor_t bmajor, bmax;

        bmax = atomic_load_acquire(&max_bdevsws);
        curbdevsw = atomic_load_consume(&bdevsw);
        for (bmajor = 0; bmajor < bmax; bmajor++) {
                if (atomic_load_relaxed(&curbdevsw[bmajor]) == bdev)
                        return bmajor;
        }

        return NODEVMAJOR;
}

/*
 * Look up a character device by reference to its operations set.
 *
 * => Caller must ensure that the device is not detached, and therefore
 *    that the returned major is still valid when dereferenced.
 */
devmajor_t
cdevsw_lookup_major(const struct cdevsw *cdev)
{
        const struct cdevsw *const *curcdevsw;
        devmajor_t cmajor, cmax;

        cmax = atomic_load_acquire(&max_cdevsws);
        curcdevsw = atomic_load_consume(&cdevsw);
        for (cmajor = 0; cmajor < cmax; cmajor++) {
                if (atomic_load_relaxed(&curcdevsw[cmajor]) == cdev)
                        return cmajor;
        }

        return NODEVMAJOR;
}

/*
 * Convert from block major number to name.
 *
 * => Caller must ensure that the device is not detached, and therefore
 *    that the name pointer is still valid when dereferenced.
 */
const char *
devsw_blk2name(devmajor_t bmajor)
{
        const char *name;
        devmajor_t cmajor;
        int i;

        name = NULL;
        cmajor = -1;

        mutex_enter(&device_lock);
        if (bmajor < 0 || bmajor >= max_bdevsws || bdevsw[bmajor] == NULL) {
                mutex_exit(&device_lock);
                return NULL;
        }
        for (i = 0; i < max_devsw_convs; i++) {
                if (devsw_conv[i].d_bmajor == bmajor) {
                        cmajor = devsw_conv[i].d_cmajor;
                        break;
                }
        }
        if (cmajor >= 0 && cmajor < max_cdevsws && cdevsw[cmajor] != NULL)
                name = devsw_conv[i].d_name;
        mutex_exit(&device_lock);

        return name;
}

/*
 * Convert char major number to device driver name.
 */
const char *
cdevsw_getname(devmajor_t major)
{
        const char *name;
        int i;

        name = NULL;

        if (major < 0)
                return NULL;

        mutex_enter(&device_lock);
        for (i = 0; i < max_devsw_convs; i++) {
                if (devsw_conv[i].d_cmajor == major) {
                        name = devsw_conv[i].d_name;
                        break;
                }
        }
        mutex_exit(&device_lock);
        return name;
}

/*
 * Convert block major number to device driver name.
 */
const char *
bdevsw_getname(devmajor_t major)
{
        const char *name;
        int i;

        name = NULL;

        if (major < 0)
                return NULL;

        mutex_enter(&device_lock);
        for (i = 0; i < max_devsw_convs; i++) {
                if (devsw_conv[i].d_bmajor == major) {
                        name = devsw_conv[i].d_name;
                        break;
                }
        }
        mutex_exit(&device_lock);
        return name;
}

/*
 * Convert from device name to block major number.
 *
 * => Caller must ensure that the device is not detached, and therefore
 *    that the major number is still valid when dereferenced.
 */
devmajor_t
devsw_name2blk(const char *name, char *devname, size_t devnamelen)
{
        struct devsw_conv *conv;
        devmajor_t bmajor;
        int i;

        if (name == NULL)
                return NODEVMAJOR;

        mutex_enter(&device_lock);
        for (i = 0; i < max_devsw_convs; i++) {
                size_t len;

                conv = &devsw_conv[i];
                if (conv->d_name == NULL)
                        continue;
                len = strlen(conv->d_name);
                if (strncmp(conv->d_name, name, len) != 0)
                        continue;
                if (name[len] != '\0' && !isdigit((unsigned char)name[len]))
                        continue;
                bmajor = conv->d_bmajor;
                if (bmajor < 0 || bmajor >= max_bdevsws ||
                    bdevsw[bmajor] == NULL)
                        break;
                if (devname != NULL) {
#ifdef DEVSW_DEBUG
                        if (strlen(conv->d_name) >= devnamelen)
                                printf("%s: too short buffer\n", __func__);
#endif /* DEVSW_DEBUG */
                        strncpy(devname, conv->d_name, devnamelen);
                        devname[devnamelen - 1] = '\0';
                }
                mutex_exit(&device_lock);
                return bmajor;
        }

        mutex_exit(&device_lock);
        return NODEVMAJOR;
}

/*
 * Convert from device name to char major number.
 *
 * => Caller must ensure that the device is not detached, and therefore
 *    that the major number is still valid when dereferenced.
 */
devmajor_t
devsw_name2chr(const char *name, char *devname, size_t devnamelen)
{
        struct devsw_conv *conv;
        devmajor_t cmajor;
        int i;

        if (name == NULL)
                return NODEVMAJOR;

        mutex_enter(&device_lock);
        for (i = 0; i < max_devsw_convs; i++) {
                size_t len;

                conv = &devsw_conv[i];
                if (conv->d_name == NULL)
                        continue;
                len = strlen(conv->d_name);
                if (strncmp(conv->d_name, name, len) != 0)
                        continue;
                if (name[len] != '\0' && !isdigit((unsigned char)name[len]))
                        continue;
                cmajor = conv->d_cmajor;
                if (cmajor < 0 || cmajor >= max_cdevsws ||
                    cdevsw[cmajor] == NULL)
                        break;
                if (devname != NULL) {
#ifdef DEVSW_DEBUG
                        if (strlen(conv->d_name) >= devnamelen)
                                printf("%s: too short buffer", __func__);
#endif /* DEVSW_DEBUG */
                        strncpy(devname, conv->d_name, devnamelen);
                        devname[devnamelen - 1] = '\0';
                }
                mutex_exit(&device_lock);
                return cmajor;
        }

        mutex_exit(&device_lock);
        return NODEVMAJOR;
}

/*
 * Convert from character dev_t to block dev_t.
 *
 * => Caller must ensure that the device is not detached, and therefore
 *    that the major number is still valid when dereferenced.
 */
dev_t
devsw_chr2blk(dev_t cdev)
{
        devmajor_t bmajor, cmajor;
        int i;
        dev_t rv;

        cmajor = major(cdev);
        bmajor = NODEVMAJOR;
        rv = NODEV;

        mutex_enter(&device_lock);
        if (cmajor < 0 || cmajor >= max_cdevsws || cdevsw[cmajor] == NULL) {
                mutex_exit(&device_lock);
                return NODEV;
        }
        for (i = 0; i < max_devsw_convs; i++) {
                if (devsw_conv[i].d_cmajor == cmajor) {
                        bmajor = devsw_conv[i].d_bmajor;
                        break;
                }
        }
        if (bmajor >= 0 && bmajor < max_bdevsws && bdevsw[bmajor] != NULL)
                rv = makedev(bmajor, minor(cdev));
        mutex_exit(&device_lock);

        return rv;
}

/*
 * Convert from block dev_t to character dev_t.
 *
 * => Caller must ensure that the device is not detached, and therefore
 *    that the major number is still valid when dereferenced.
 */
dev_t
devsw_blk2chr(dev_t bdev)
{
        devmajor_t bmajor, cmajor;
        int i;
        dev_t rv;

        bmajor = major(bdev);
        cmajor = NODEVMAJOR;
        rv = NODEV;

        mutex_enter(&device_lock);
        if (bmajor < 0 || bmajor >= max_bdevsws || bdevsw[bmajor] == NULL) {
                mutex_exit(&device_lock);
                return NODEV;
        }
        for (i = 0; i < max_devsw_convs; i++) {
                if (devsw_conv[i].d_bmajor == bmajor) {
                        cmajor = devsw_conv[i].d_cmajor;
                        break;
                }
        }
        if (cmajor >= 0 && cmajor < max_cdevsws && cdevsw[cmajor] != NULL)
                rv = makedev(cmajor, minor(bdev));
        mutex_exit(&device_lock);

        return rv;
}

/*
 * Device access methods.
 */

#define        DEV_LOCK(d)                                                \
        if ((mpflag = (d->d_flag & D_MPSAFE)) == 0) {                \
                KERNEL_LOCK(1, NULL);                                \
        }

#define        DEV_UNLOCK(d)                                                \
        if (mpflag == 0) {                                        \
                KERNEL_UNLOCK_ONE(NULL);                        \
        }

int
bdev_open(dev_t dev, int flag, int devtype, lwp_t *l)
{
        const struct bdevsw *d;
        struct localcount *lc;
        device_t dv = NULL/*XXXGCC*/;
        int unit, rv, mpflag;

        d = bdevsw_lookup_acquire(dev, &lc);
        if (d == NULL)
                return ENXIO;

        if (d->d_devtounit) {
                /*
                 * If the device node corresponds to an autoconf device
                 * instance, acquire a reference to it so that during
                 * d_open, device_lookup is stable.
                 *
                 * XXX This should also arrange to instantiate cloning
                 * pseudo-devices if appropriate, but that requires
                 * reviewing them all to find and verify a common
                 * pattern.
                 */
                if ((unit = (*d->d_devtounit)(dev)) == -1)
                        return ENXIO;
                if ((dv = device_lookup_acquire(d->d_cfdriver, unit)) == NULL)
                        return ENXIO;
        }

        DEV_LOCK(d);
        rv = (*d->d_open)(dev, flag, devtype, l);
        DEV_UNLOCK(d);

        if (d->d_devtounit) {
                device_release(dv);
        }

        bdevsw_release(d, lc);

        return rv;
}

int
bdev_cancel(dev_t dev, int flag, int devtype, struct lwp *l)
{
        const struct bdevsw *d;
        int rv, mpflag;

        if ((d = bdevsw_lookup(dev)) == NULL)
                return ENXIO;
        if (d->d_cancel == NULL)
                return ENODEV;

        DEV_LOCK(d);
        rv = (*d->d_cancel)(dev, flag, devtype, l);
        DEV_UNLOCK(d);

        return rv;
}

int
bdev_close(dev_t dev, int flag, int devtype, lwp_t *l)
{
        const struct bdevsw *d;
        int rv, mpflag;

        if ((d = bdevsw_lookup(dev)) == NULL)
                return ENXIO;

        DEV_LOCK(d);
        rv = (*d->d_close)(dev, flag, devtype, l);
        DEV_UNLOCK(d);

        return rv;
}

SDT_PROVIDER_DECLARE(io);
SDT_PROBE_DEFINE1(io, kernel, , start, "struct buf *"/*bp*/);

void
bdev_strategy(struct buf *bp)
{
        const struct bdevsw *d;
        int mpflag;

        SDT_PROBE1(io, kernel, , start, bp);

        if ((d = bdevsw_lookup(bp->b_dev)) == NULL) {
                bp->b_error = ENXIO;
                bp->b_resid = bp->b_bcount;
                biodone_vfs(bp); /* biodone() iff vfs present */
                return;
        }

        DEV_LOCK(d);
        (*d->d_strategy)(bp);
        DEV_UNLOCK(d);
}

int
bdev_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
{
        const struct bdevsw *d;
        int rv, mpflag;

        if ((d = bdevsw_lookup(dev)) == NULL)
                return ENXIO;

        DEV_LOCK(d);
        rv = (*d->d_ioctl)(dev, cmd, data, flag, l);
        DEV_UNLOCK(d);

        return rv;
}

int
bdev_dump(dev_t dev, daddr_t addr, void *data, size_t sz)
{
        const struct bdevsw *d;
        int rv;

        /*
         * Dump can be called without the device open.  Since it can
         * currently only be called with the system paused (and in a
         * potentially unstable state), we don't perform any locking.
         */
        if ((d = bdevsw_lookup(dev)) == NULL)
                return ENXIO;

        /* DEV_LOCK(d); */
        rv = (*d->d_dump)(dev, addr, data, sz);
        /* DEV_UNLOCK(d); */

        return rv;
}

int
bdev_flags(dev_t dev)
{
        const struct bdevsw *d;

        if ((d = bdevsw_lookup(dev)) == NULL)
                return 0;
        return d->d_flag & ~D_TYPEMASK;
}

int
bdev_type(dev_t dev)
{
        const struct bdevsw *d;

        if ((d = bdevsw_lookup(dev)) == NULL)
                return D_OTHER;
        return d->d_flag & D_TYPEMASK;
}

int
bdev_size(dev_t dev)
{
        const struct bdevsw *d;
        int rv, mpflag = 0;

        if ((d = bdevsw_lookup(dev)) == NULL ||
            d->d_psize == NULL)
                return -1;

        /*
         * Don't to try lock the device if we're dumping.
         * XXX: is there a better way to test this?
         */
        if ((boothowto & RB_DUMP) == 0)
                DEV_LOCK(d);
        rv = (*d->d_psize)(dev);
        if ((boothowto & RB_DUMP) == 0)
                DEV_UNLOCK(d);

        return rv;
}

int
bdev_discard(dev_t dev, off_t pos, off_t len)
{
        const struct bdevsw *d;
        int rv, mpflag;

        if ((d = bdevsw_lookup(dev)) == NULL)
                return ENXIO;

        DEV_LOCK(d);
        rv = (*d->d_discard)(dev, pos, len);
        DEV_UNLOCK(d);

        return rv;
}

void
bdev_detached(dev_t dev)
{
        const struct bdevsw *d;
        device_t dv;
        int unit;

        if ((d = bdevsw_lookup(dev)) == NULL)
                return;
        if (d->d_devtounit == NULL)
                return;
        if ((unit = (*d->d_devtounit)(dev)) == -1)
                return;
        if ((dv = device_lookup(d->d_cfdriver, unit)) == NULL)
                return;
        config_detach_commit(dv);
}

int
cdev_open(dev_t dev, int flag, int devtype, lwp_t *l)
{
        const struct cdevsw *d;
        struct localcount *lc;
        device_t dv = NULL/*XXXGCC*/;
        int unit, rv, mpflag;

        d = cdevsw_lookup_acquire(dev, &lc);
        if (d == NULL)
                return ENXIO;

        if (d->d_devtounit) {
                /*
                 * If the device node corresponds to an autoconf device
                 * instance, acquire a reference to it so that during
                 * d_open, device_lookup is stable.
                 *
                 * XXX This should also arrange to instantiate cloning
                 * pseudo-devices if appropriate, but that requires
                 * reviewing them all to find and verify a common
                 * pattern.
                 */
                if ((unit = (*d->d_devtounit)(dev)) == -1)
                        return ENXIO;
                if ((dv = device_lookup_acquire(d->d_cfdriver, unit)) == NULL)
                        return ENXIO;
        }

        DEV_LOCK(d);
        rv = (*d->d_open)(dev, flag, devtype, l);
        DEV_UNLOCK(d);

        if (d->d_devtounit) {
                device_release(dv);
        }

        cdevsw_release(d, lc);

        return rv;
}

int
cdev_cancel(dev_t dev, int flag, int devtype, struct lwp *l)
{
        const struct cdevsw *d;
        int rv, mpflag;

        if ((d = cdevsw_lookup(dev)) == NULL)
                return ENXIO;
        if (d->d_cancel == NULL)
                return ENODEV;

        DEV_LOCK(d);
        rv = (*d->d_cancel)(dev, flag, devtype, l);
        DEV_UNLOCK(d);

        return rv;
}

int
cdev_close(dev_t dev, int flag, int devtype, lwp_t *l)
{
        const struct cdevsw *d;
        int rv, mpflag;

        if ((d = cdevsw_lookup(dev)) == NULL)
                return ENXIO;

        DEV_LOCK(d);
        rv = (*d->d_close)(dev, flag, devtype, l);
        DEV_UNLOCK(d);

        return rv;
}

int
cdev_read(dev_t dev, struct uio *uio, int flag)
{
        const struct cdevsw *d;
        int rv, mpflag;

        if ((d = cdevsw_lookup(dev)) == NULL)
                return ENXIO;

        DEV_LOCK(d);
        rv = (*d->d_read)(dev, uio, flag);
        DEV_UNLOCK(d);

        return rv;
}

int
cdev_write(dev_t dev, struct uio *uio, int flag)
{
        const struct cdevsw *d;
        int rv, mpflag;

        if ((d = cdevsw_lookup(dev)) == NULL)
                return ENXIO;

        DEV_LOCK(d);
        rv = (*d->d_write)(dev, uio, flag);
        DEV_UNLOCK(d);

        return rv;
}

int
cdev_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
{
        const struct cdevsw *d;
        int rv, mpflag;

        if ((d = cdevsw_lookup(dev)) == NULL)
                return ENXIO;

        DEV_LOCK(d);
        rv = (*d->d_ioctl)(dev, cmd, data, flag, l);
        DEV_UNLOCK(d);

        return rv;
}

void
cdev_stop(struct tty *tp, int flag)
{
        const struct cdevsw *d;
        int mpflag;

        if ((d = cdevsw_lookup(tp->t_dev)) == NULL)
                return;

        DEV_LOCK(d);
        (*d->d_stop)(tp, flag);
        DEV_UNLOCK(d);
}

struct tty *
cdev_tty(dev_t dev)
{
        const struct cdevsw *d;

        if ((d = cdevsw_lookup(dev)) == NULL)
                return NULL;

        /* XXX Check if necessary. */
        if (d->d_tty == NULL)
                return NULL;

        return (*d->d_tty)(dev);
}

int
cdev_poll(dev_t dev, int flag, lwp_t *l)
{
        const struct cdevsw *d;
        int rv, mpflag;

        if ((d = cdevsw_lookup(dev)) == NULL)
                return POLLERR;

        DEV_LOCK(d);
        rv = (*d->d_poll)(dev, flag, l);
        DEV_UNLOCK(d);

        return rv;
}

paddr_t
cdev_mmap(dev_t dev, off_t off, int flag)
{
        const struct cdevsw *d;
        paddr_t rv;
        int mpflag;

        if ((d = cdevsw_lookup(dev)) == NULL)
                return (paddr_t)-1LL;

        DEV_LOCK(d);
        rv = (*d->d_mmap)(dev, off, flag);
        DEV_UNLOCK(d);

        return rv;
}

int
cdev_kqfilter(dev_t dev, struct knote *kn)
{
        const struct cdevsw *d;
        int rv, mpflag;

        if ((d = cdevsw_lookup(dev)) == NULL)
                return ENXIO;

        DEV_LOCK(d);
        rv = (*d->d_kqfilter)(dev, kn);
        DEV_UNLOCK(d);

        return rv;
}

int
cdev_discard(dev_t dev, off_t pos, off_t len)
{
        const struct cdevsw *d;
        int rv, mpflag;

        if ((d = cdevsw_lookup(dev)) == NULL)
                return ENXIO;

        DEV_LOCK(d);
        rv = (*d->d_discard)(dev, pos, len);
        DEV_UNLOCK(d);

        return rv;
}

int
cdev_flags(dev_t dev)
{
        const struct cdevsw *d;

        if ((d = cdevsw_lookup(dev)) == NULL)
                return 0;
        return d->d_flag & ~D_TYPEMASK;
}

int
cdev_type(dev_t dev)
{
        const struct cdevsw *d;

        if ((d = cdevsw_lookup(dev)) == NULL)
                return D_OTHER;
        return d->d_flag & D_TYPEMASK;
}

void
cdev_detached(dev_t dev)
{
        const struct cdevsw *d;
        device_t dv;
        int unit;

        if ((d = cdevsw_lookup(dev)) == NULL)
                return;
        if (d->d_devtounit == NULL)
                return;
        if ((unit = (*d->d_devtounit)(dev)) == -1)
                return;
        if ((dv = device_lookup(d->d_cfdriver, unit)) == NULL)
                return;
        config_detach_commit(dv);
}

/*
 * nommap(dev, off, prot)
 *
 *        mmap routine that always fails, for non-mmappable devices.
 */
paddr_t
nommap(dev_t dev, off_t off, int prot)
{

        return (paddr_t)-1;
}

/*
 * dev_minor_unit(dev)
 *
 *        Returns minor(dev) as an int.  Intended for use with struct
 *        bdevsw, cdevsw::d_devtounit for drivers whose /dev nodes are
 *        implemented by reference to an autoconf instance with the minor
 *        number.
 */
int
dev_minor_unit(dev_t dev)
{

        return minor(dev);
}












































































    4 
    4 








    4 



   19 
































































    4 












    4 

    4 







    4 





    4 




    4 





























    1 



    1 

    1 
    1 



    1 

    1 






























    4 















    4 


    4 








    1 



    1 



    1 










    1 
















    1 























    1 


    1 



























   14 






   14 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
/*        $NetBSD: subr_workqueue.c,v 1.40 2022/08/15 11:43:56 riastradh Exp $        */

/*-
 * Copyright (c)2002, 2005, 2006, 2007 YAMAMOTO Takashi,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_workqueue.c,v 1.40 2022/08/15 11:43:56 riastradh Exp $");

#include <sys/param.h>
#include <sys/cpu.h>
#include <sys/systm.h>
#include <sys/kthread.h>
#include <sys/kmem.h>
#include <sys/proc.h>
#include <sys/workqueue.h>
#include <sys/mutex.h>
#include <sys/condvar.h>
#include <sys/queue.h>

typedef struct work_impl {
        SIMPLEQ_ENTRY(work_impl) wk_entry;
} work_impl_t;

SIMPLEQ_HEAD(workqhead, work_impl);

struct workqueue_queue {
        kmutex_t q_mutex;
        kcondvar_t q_cv;
        struct workqhead q_queue_pending;
        struct workqhead q_queue_running;
        lwp_t *q_worker;
};

struct workqueue {
        void (*wq_func)(struct work *, void *);
        void *wq_arg;
        int wq_flags;

        char wq_name[MAXCOMLEN];
        pri_t wq_prio;
        void *wq_ptr;
};

#define        WQ_SIZE                (roundup2(sizeof(struct workqueue), coherency_unit))
#define        WQ_QUEUE_SIZE        (roundup2(sizeof(struct workqueue_queue), coherency_unit))

#define        POISON        0xaabbccdd

static size_t
workqueue_size(int flags)
{

        return WQ_SIZE
            + ((flags & WQ_PERCPU) != 0 ? ncpu : 1) * WQ_QUEUE_SIZE
            + coherency_unit;
}

static struct workqueue_queue *
workqueue_queue_lookup(struct workqueue *wq, struct cpu_info *ci)
{
        u_int idx = 0;

        if (wq->wq_flags & WQ_PERCPU) {
                idx = ci ? cpu_index(ci) : cpu_index(curcpu());
        }

        return (void *)((uintptr_t)(wq) + WQ_SIZE + (idx * WQ_QUEUE_SIZE));
}

static void
workqueue_runlist(struct workqueue *wq, struct workqhead *list)
{
        work_impl_t *wk;
        work_impl_t *next;

        /*
         * note that "list" is not a complete SIMPLEQ.
         */

        for (wk = SIMPLEQ_FIRST(list); wk != NULL; wk = next) {
                next = SIMPLEQ_NEXT(wk, wk_entry);
                (*wq->wq_func)((void *)wk, wq->wq_arg);
        }
}

static void
workqueue_worker(void *cookie)
{
        struct workqueue *wq = cookie;
        struct workqueue_queue *q;
        int s;

        /* find the workqueue of this kthread */
        q = workqueue_queue_lookup(wq, curlwp->l_cpu);

        if (wq->wq_flags & WQ_FPU)
                s = kthread_fpu_enter();
        for (;;) {
                /*
                 * we violate abstraction of SIMPLEQ.
                 */

                mutex_enter(&q->q_mutex);
                while (SIMPLEQ_EMPTY(&q->q_queue_pending))
                        cv_wait(&q->q_cv, &q->q_mutex);
                KASSERT(SIMPLEQ_EMPTY(&q->q_queue_running));
                q->q_queue_running.sqh_first =
                    q->q_queue_pending.sqh_first; /* XXX */
                SIMPLEQ_INIT(&q->q_queue_pending);
                mutex_exit(&q->q_mutex);

                workqueue_runlist(wq, &q->q_queue_running);

                mutex_enter(&q->q_mutex);
                KASSERT(!SIMPLEQ_EMPTY(&q->q_queue_running));
                SIMPLEQ_INIT(&q->q_queue_running);
                /* Wake up workqueue_wait */
                cv_broadcast(&q->q_cv);
                mutex_exit(&q->q_mutex);
        }
        if (wq->wq_flags & WQ_FPU)
                kthread_fpu_exit(s);
}

static void
workqueue_init(struct workqueue *wq, const char *name,
    void (*callback_func)(struct work *, void *), void *callback_arg,
    pri_t prio, int ipl)
{

        KASSERT(sizeof(wq->wq_name) > strlen(name));
        strncpy(wq->wq_name, name, sizeof(wq->wq_name));

        wq->wq_prio = prio;
        wq->wq_func = callback_func;
        wq->wq_arg = callback_arg;
}

static int
workqueue_initqueue(struct workqueue *wq, struct workqueue_queue *q,
    int ipl, struct cpu_info *ci)
{
        int error, ktf;

        KASSERT(q->q_worker == NULL);

        mutex_init(&q->q_mutex, MUTEX_DEFAULT, ipl);
        cv_init(&q->q_cv, wq->wq_name);
        SIMPLEQ_INIT(&q->q_queue_pending);
        SIMPLEQ_INIT(&q->q_queue_running);
        ktf = ((wq->wq_flags & WQ_MPSAFE) != 0 ? KTHREAD_MPSAFE : 0);
        if (wq->wq_prio < PRI_KERNEL)
                ktf |= KTHREAD_TS;
        if (ci) {
                error = kthread_create(wq->wq_prio, ktf, ci, workqueue_worker,
                    wq, &q->q_worker, "%s/%u", wq->wq_name, ci->ci_index);
        } else {
                error = kthread_create(wq->wq_prio, ktf, ci, workqueue_worker,
                    wq, &q->q_worker, "%s", wq->wq_name);
        }
        if (error != 0) {
                mutex_destroy(&q->q_mutex);
                cv_destroy(&q->q_cv);
                KASSERT(q->q_worker == NULL);
        }
        return error;
}

struct workqueue_exitargs {
        work_impl_t wqe_wk;
        struct workqueue_queue *wqe_q;
};

static void
workqueue_exit(struct work *wk, void *arg)
{
        struct workqueue_exitargs *wqe = (void *)wk;
        struct workqueue_queue *q = wqe->wqe_q;

        /*
         * only competition at this point is workqueue_finiqueue.
         */

        KASSERT(q->q_worker == curlwp);
        KASSERT(SIMPLEQ_EMPTY(&q->q_queue_pending));
        mutex_enter(&q->q_mutex);
        q->q_worker = NULL;
        cv_broadcast(&q->q_cv);
        mutex_exit(&q->q_mutex);
        kthread_exit(0);
}

static void
workqueue_finiqueue(struct workqueue *wq, struct workqueue_queue *q)
{
        struct workqueue_exitargs wqe;

        KASSERT(wq->wq_func == workqueue_exit);

        wqe.wqe_q = q;
        KASSERT(SIMPLEQ_EMPTY(&q->q_queue_pending));
        KASSERT(q->q_worker != NULL);
        mutex_enter(&q->q_mutex);
        SIMPLEQ_INSERT_TAIL(&q->q_queue_pending, &wqe.wqe_wk, wk_entry);
        cv_broadcast(&q->q_cv);
        while (q->q_worker != NULL) {
                cv_wait(&q->q_cv, &q->q_mutex);
        }
        mutex_exit(&q->q_mutex);
        mutex_destroy(&q->q_mutex);
        cv_destroy(&q->q_cv);
}

/* --- */

int
workqueue_create(struct workqueue **wqp, const char *name,
    void (*callback_func)(struct work *, void *), void *callback_arg,
    pri_t prio, int ipl, int flags)
{
        struct workqueue *wq;
        struct workqueue_queue *q;
        void *ptr;
        int error = 0;

        CTASSERT(sizeof(work_impl_t) <= sizeof(struct work));

        ptr = kmem_zalloc(workqueue_size(flags), KM_SLEEP);
        wq = (void *)roundup2((uintptr_t)ptr, coherency_unit);
        wq->wq_ptr = ptr;
        wq->wq_flags = flags;

        workqueue_init(wq, name, callback_func, callback_arg, prio, ipl);

        if (flags & WQ_PERCPU) {
                struct cpu_info *ci;
                CPU_INFO_ITERATOR cii;

                /* create the work-queue for each CPU */
                for (CPU_INFO_FOREACH(cii, ci)) {
                        q = workqueue_queue_lookup(wq, ci);
                        error = workqueue_initqueue(wq, q, ipl, ci);
                        if (error) {
                                break;
                        }
                }
        } else {
                /* initialize a work-queue */
                q = workqueue_queue_lookup(wq, NULL);
                error = workqueue_initqueue(wq, q, ipl, NULL);
        }

        if (error != 0) {
                workqueue_destroy(wq);
        } else {
                *wqp = wq;
        }

        return error;
}

static bool
workqueue_q_wait(struct workqueue_queue *q, work_impl_t *wk_target)
{
        work_impl_t *wk;
        bool found = false;

        mutex_enter(&q->q_mutex);
        if (q->q_worker == curlwp)
                goto out;
    again:
        SIMPLEQ_FOREACH(wk, &q->q_queue_pending, wk_entry) {
                if (wk == wk_target)
                        goto found;
        }
        SIMPLEQ_FOREACH(wk, &q->q_queue_running, wk_entry) {
                if (wk == wk_target)
                        goto found;
        }
    found:
        if (wk != NULL) {
                found = true;
                cv_wait(&q->q_cv, &q->q_mutex);
                goto again;
        }
    out:
        mutex_exit(&q->q_mutex);

        return found;
}

/*
 * Wait for a specified work to finish.  The caller must ensure that no new
 * work will be enqueued before calling workqueue_wait.  Note that if the
 * workqueue is WQ_PERCPU, the caller can enqueue a new work to another queue
 * other than the waiting queue.
 */
void
workqueue_wait(struct workqueue *wq, struct work *wk)
{
        struct workqueue_queue *q;
        bool found;

        ASSERT_SLEEPABLE();

        if (ISSET(wq->wq_flags, WQ_PERCPU)) {
                struct cpu_info *ci;
                CPU_INFO_ITERATOR cii;
                for (CPU_INFO_FOREACH(cii, ci)) {
                        q = workqueue_queue_lookup(wq, ci);
                        found = workqueue_q_wait(q, (work_impl_t *)wk);
                        if (found)
                                break;
                }
        } else {
                q = workqueue_queue_lookup(wq, NULL);
                (void) workqueue_q_wait(q, (work_impl_t *)wk);
        }
}

void
workqueue_destroy(struct workqueue *wq)
{
        struct workqueue_queue *q;
        struct cpu_info *ci;
        CPU_INFO_ITERATOR cii;

        ASSERT_SLEEPABLE();

        wq->wq_func = workqueue_exit;
        for (CPU_INFO_FOREACH(cii, ci)) {
                q = workqueue_queue_lookup(wq, ci);
                if (q->q_worker != NULL) {
                        workqueue_finiqueue(wq, q);
                }
        }
        kmem_free(wq->wq_ptr, workqueue_size(wq->wq_flags));
}

#ifdef DEBUG
static void
workqueue_check_duplication(struct workqueue_queue *q, work_impl_t *wk)
{
        work_impl_t *_wk;

        SIMPLEQ_FOREACH(_wk, &q->q_queue_pending, wk_entry) {
                if (_wk == wk)
                        panic("%s: tried to enqueue a queued work", __func__);
        }
}
#endif

void
workqueue_enqueue(struct workqueue *wq, struct work *wk0, struct cpu_info *ci)
{
        struct workqueue_queue *q;
        work_impl_t *wk = (void *)wk0;

        KASSERT(wq->wq_flags & WQ_PERCPU || ci == NULL);
        q = workqueue_queue_lookup(wq, ci);

        mutex_enter(&q->q_mutex);
#ifdef DEBUG
        workqueue_check_duplication(q, wk);
#endif
        SIMPLEQ_INSERT_TAIL(&q->q_queue_pending, wk, wk_entry);
        cv_broadcast(&q->q_cv);
        mutex_exit(&q->q_mutex);
}












































































































































































   12 








  181 


  181 

  181 


    2 




    2 


    2 










  180 











    7 












   63 




































    3 
































    3 


    3 

    3 


    2 

































    2 














    1 
    1 











    2 
    3 












  273 


  273 









  273 






   83 
   84 
   83 
   84 


















    2 

    2 
    2 

    2 

    2 


    2 


    2 











    8 



    8 
    8 


    8 
    7 
    2 

    7 

    7 


    8 
















   29 














  103 












    1 
















   26 



   26 




   26 
   21 

   21 













   25 

   25 




















   25 



    1 
   24 
















    4 
























































  270 










  270 






  261 

  260 
  260 
  260 
  260 
  259 

  269 



















  276 
  275 

  276 
  276 

  275 




  266 

    2 












  101 

  101 










































































































   21 

   21 








   21 






    8 
    1 


    7 





   19 

   21 












   12 
   12 
   12 







   12 











































   15 

   15 











   15 


    3 
    1 


   15 



































































   18 

   18 


   18 

   18 




   17 
    1 




   18 
   18 
   18 







   18 










    1 


   17 



   18 



   18 





















































































































































    5 


    6 


    6 





    6 

    6 

    6 

    5 



    5 



    5 





    6 













   47 


   47 
   36 
   15 

    1 

   14 
   13 
   14 


   21 

    9 
    9 
    1 
    1 







   15 

   17 
   21 


   20 



   36 
    2 
    2 














  107 
  107 

  106 
    6 

  107 
  106 
  107 
  107 










    6 

    6 
    5 
    5 
    2 



    2 


    5 





    5 

    5 

    6 

    4 





    6 






    6 
    6 

    1 
    1 











   12 

   12 

   12 


   12 


   12 











    1 






    1 

    1 









    1 









    1 


































 1019 











  216 

  212 
  216 








  102 
   99 




















  377 


  378 
  379 
  378 

    3 

    3 


    2 


    1 
    2 

    2 









  361 


  363 

  360 












   11 



   11 
   10 

   10 






























































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
/*        $NetBSD: uipc_socket2.c,v 1.141 2022/04/09 23:52:23 riastradh Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)uipc_socket2.c        8.2 (Berkeley) 2/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.141 2022/04/09 23:52:23 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_ddb.h"
#include "opt_inet.h"
#include "opt_mbuftrace.h"
#include "opt_sb_max.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/buf.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/domain.h>
#include <sys/poll.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/signalvar.h>
#include <sys/kauth.h>
#include <sys/pool.h>
#include <sys/uidinfo.h>

#ifdef DDB
#include <sys/filedesc.h>
#endif

/*
 * Primitive routines for operating on sockets and socket buffers.
 *
 * Connection life-cycle:
 *
 *        Normal sequence from the active (originating) side:
 *
 *        - soisconnecting() is called during processing of connect() call,
 *        - resulting in an eventual call to soisconnected() if/when the
 *          connection is established.
 *
 *        When the connection is torn down during processing of disconnect():
 *
 *        - soisdisconnecting() is called and,
 *        - soisdisconnected() is called when the connection to the peer
 *          is totally severed.
 *
 *        The semantics of these routines are such that connectionless protocols
 *        can call soisconnected() and soisdisconnected() only, bypassing the
 *        in-progress calls when setting up a ``connection'' takes no time.
 *
 *        From the passive side, a socket is created with two queues of sockets:
 *
 *        - so_q0 (0) for partial connections (i.e. connections in progress)
 *        - so_q (1) for connections already made and awaiting user acceptance.
 *
 *        As a protocol is preparing incoming connections, it creates a socket
 *        structure queued on so_q0 by calling sonewconn().  When the connection
 *        is established, soisconnected() is called, and transfers the
 *        socket structure to so_q, making it available to accept().
 *
 *        If a socket is closed with sockets on either so_q0 or so_q, these
 *        sockets are dropped.
 *
 * Locking rules and assumptions:
 *
 * o socket::so_lock can change on the fly.  The low level routines used
 *   to lock sockets are aware of this.  When so_lock is acquired, the
 *   routine locking must check to see if so_lock still points to the
 *   lock that was acquired.  If so_lock has changed in the meantime, the
 *   now irrelevant lock that was acquired must be dropped and the lock
 *   operation retried.  Although not proven here, this is completely safe
 *   on a multiprocessor system, even with relaxed memory ordering, given
 *   the next two rules:
 *
 * o In order to mutate so_lock, the lock pointed to by the current value
 *   of so_lock must be held: i.e., the socket must be held locked by the
 *   changing thread.  The thread must issue membar_release() to prevent
 *   memory accesses being reordered, and can set so_lock to the desired
 *   value.  If the lock pointed to by the new value of so_lock is not
 *   held by the changing thread, the socket must then be considered
 *   unlocked.
 *
 * o If so_lock is mutated, and the previous lock referred to by so_lock
 *   could still be visible to other threads in the system (e.g. via file
 *   descriptor or protocol-internal reference), then the old lock must
 *   remain valid until the socket and/or protocol control block has been
 *   torn down.
 *
 * o If a socket has a non-NULL so_head value (i.e. is in the process of
 *   connecting), then locking the socket must also lock the socket pointed
 *   to by so_head: their lock pointers must match.
 *
 * o If a socket has connections in progress (so_q, so_q0 not empty) then
 *   locking the socket must also lock the sockets attached to both queues.
 *   Again, their lock pointers must match.
 *
 * o Beyond the initial lock assignment in socreate(), assigning locks to
 *   sockets is the responsibility of the individual protocols / protocol
 *   domains.
 */

static pool_cache_t        socket_cache;
u_long                        sb_max = SB_MAX;/* maximum socket buffer size */
static u_long                sb_max_adj;        /* adjusted sb_max */

void
soisconnecting(struct socket *so)
{

        KASSERT(solocked(so));

        so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
        so->so_state |= SS_ISCONNECTING;
}

void
soisconnected(struct socket *so)
{
        struct socket        *head;

        head = so->so_head;

        KASSERT(solocked(so));
        KASSERT(head == NULL || solocked2(so, head));

        so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING);
        so->so_state |= SS_ISCONNECTED;
        if (head && so->so_onq == &head->so_q0) {
                if ((so->so_options & SO_ACCEPTFILTER) == 0) {
                        /*
                         * Re-enqueue and wake up any waiters, e.g.
                         * processes blocking on accept().
                         */
                        soqremque(so, 0);
                        soqinsque(head, so, 1);
                        sorwakeup(head);
                        cv_broadcast(&head->so_cv);
                } else {
                        so->so_upcall =
                            head->so_accf->so_accept_filter->accf_callback;
                        so->so_upcallarg = head->so_accf->so_accept_filter_arg;
                        so->so_rcv.sb_flags |= SB_UPCALL;
                        so->so_options &= ~SO_ACCEPTFILTER;
                        (*so->so_upcall)(so, so->so_upcallarg,
                                         POLLIN|POLLRDNORM, M_DONTWAIT);
                }
        } else {
                cv_broadcast(&so->so_cv);
                sorwakeup(so);
                sowwakeup(so);
        }
}

void
soisdisconnecting(struct socket *so)
{

        KASSERT(solocked(so));

        so->so_state &= ~SS_ISCONNECTING;
        so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
        cv_broadcast(&so->so_cv);
        sowwakeup(so);
        sorwakeup(so);
}

void
soisdisconnected(struct socket *so)
{

        KASSERT(solocked(so));

        so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
        so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
        cv_broadcast(&so->so_cv);
        sowwakeup(so);
        sorwakeup(so);
}

void
soinit2(void)
{

        socket_cache = pool_cache_init(sizeof(struct socket), 0, 0, 0,
            "socket", NULL, IPL_SOFTNET, NULL, NULL, NULL);
}

/*
 * sonewconn: accept a new connection.
 *
 * When an attempt at a new connection is noted on a socket which accepts
 * connections, sonewconn(9) is called.  If the connection is possible
 * (subject to space constraints, etc) then we allocate a new structure,
 * properly linked into the data structure of the original socket.
 *
 * => If 'soready' is true, then socket will become ready for accept() i.e.
 *    inserted into the so_q queue, SS_ISCONNECTED set and waiters awoken.
 * => May be called from soft-interrupt context.
 * => Listening socket should be locked.
 * => Returns the new socket locked.
 */
struct socket *
sonewconn(struct socket *head, bool soready)
{
        struct socket *so;
        int soqueue, error;

        KASSERT(solocked(head));

        if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) {
                /*
                 * Listen queue overflow.  If there is an accept filter
                 * active, pass through the oldest cxn it's handling.
                 */
                if (head->so_accf == NULL) {
                        return NULL;
                } else {
                        struct socket *so2, *next;

                        /* Pass the oldest connection waiting in the
                           accept filter */
                        for (so2 = TAILQ_FIRST(&head->so_q0);
                             so2 != NULL; so2 = next) {
                                next = TAILQ_NEXT(so2, so_qe);
                                if (so2->so_upcall == NULL) {
                                        continue;
                                }
                                so2->so_upcall = NULL;
                                so2->so_upcallarg = NULL;
                                so2->so_options &= ~SO_ACCEPTFILTER;
                                so2->so_rcv.sb_flags &= ~SB_UPCALL;
                                soisconnected(so2);
                                break;
                        }

                        /* If nothing was nudged out of the acept filter, bail
                         * out; otherwise proceed allocating the socket. */
                        if (so2 == NULL) {
                                return NULL;
                        }
                }
        }
        if ((head->so_options & SO_ACCEPTFILTER) != 0) {
                soready = false;
        }
        soqueue = soready ? 1 : 0;

        if ((so = soget(false)) == NULL) {
                return NULL;
        }
        so->so_type = head->so_type;
        so->so_options = head->so_options & ~SO_ACCEPTCONN;
        so->so_linger = head->so_linger;
        so->so_state = head->so_state | SS_NOFDREF;
        so->so_proto = head->so_proto;
        so->so_timeo = head->so_timeo;
        so->so_pgid = head->so_pgid;
        so->so_send = head->so_send;
        so->so_receive = head->so_receive;
        so->so_uidinfo = head->so_uidinfo;
        so->so_egid = head->so_egid;
        so->so_cpid = head->so_cpid;

        /*
         * Share the lock with the listening-socket, it may get unshared
         * once the connection is complete.
         *
         * so_lock is stable while we hold the socket locked, so no
         * need for atomic_load_* here.
         */
        mutex_obj_hold(head->so_lock);
        so->so_lock = head->so_lock;

        /*
         * Reserve the space for socket buffers.
         */
#ifdef MBUFTRACE
        so->so_mowner = head->so_mowner;
        so->so_rcv.sb_mowner = head->so_rcv.sb_mowner;
        so->so_snd.sb_mowner = head->so_snd.sb_mowner;
#endif
        if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
                goto out;
        }
        so->so_snd.sb_lowat = head->so_snd.sb_lowat;
        so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
        so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
        so->so_snd.sb_timeo = head->so_snd.sb_timeo;
        so->so_rcv.sb_flags |= head->so_rcv.sb_flags & (SB_AUTOSIZE | SB_ASYNC);
        so->so_snd.sb_flags |= head->so_snd.sb_flags & (SB_AUTOSIZE | SB_ASYNC);

        /*
         * Finally, perform the protocol attach.  Note: a new socket
         * lock may be assigned at this point (if so, it will be held).
         */
        error = (*so->so_proto->pr_usrreqs->pr_attach)(so, 0);
        if (error) {
out:
                KASSERT(solocked(so));
                KASSERT(so->so_accf == NULL);
                soput(so);

                /* Note: the listening socket shall stay locked. */
                KASSERT(solocked(head));
                return NULL;
        }
        KASSERT(solocked2(head, so));

        /*
         * Insert into the queue.  If ready, update the connection status
         * and wake up any waiters, e.g. processes blocking on accept().
         */
        soqinsque(head, so, soqueue);
        if (soready) {
                so->so_state |= SS_ISCONNECTED;
                sorwakeup(head);
                cv_broadcast(&head->so_cv);
        }
        return so;
}

struct socket *
soget(bool waitok)
{
        struct socket *so;

        so = pool_cache_get(socket_cache, (waitok ? PR_WAITOK : PR_NOWAIT));
        if (__predict_false(so == NULL))
                return (NULL);
        memset(so, 0, sizeof(*so));
        TAILQ_INIT(&so->so_q0);
        TAILQ_INIT(&so->so_q);
        cv_init(&so->so_cv, "socket");
        cv_init(&so->so_rcv.sb_cv, "netio");
        cv_init(&so->so_snd.sb_cv, "netio");
        selinit(&so->so_rcv.sb_sel);
        selinit(&so->so_snd.sb_sel);
        so->so_rcv.sb_so = so;
        so->so_snd.sb_so = so;
        return so;
}

void
soput(struct socket *so)
{

        KASSERT(!cv_has_waiters(&so->so_cv));
        KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv));
        KASSERT(!cv_has_waiters(&so->so_snd.sb_cv));
        seldestroy(&so->so_rcv.sb_sel);
        seldestroy(&so->so_snd.sb_sel);
        mutex_obj_free(so->so_lock);
        cv_destroy(&so->so_cv);
        cv_destroy(&so->so_rcv.sb_cv);
        cv_destroy(&so->so_snd.sb_cv);
        pool_cache_put(socket_cache, so);
}

/*
 * soqinsque: insert socket of a new connection into the specified
 * accept queue of the listening socket (head).
 *
 *        q = 0: queue of partial connections
 *        q = 1: queue of incoming connections
 */
void
soqinsque(struct socket *head, struct socket *so, int q)
{
        KASSERT(q == 0 || q == 1);
        KASSERT(solocked2(head, so));
        KASSERT(so->so_onq == NULL);
        KASSERT(so->so_head == NULL);

        so->so_head = head;
        if (q == 0) {
                head->so_q0len++;
                so->so_onq = &head->so_q0;
        } else {
                head->so_qlen++;
                so->so_onq = &head->so_q;
        }
        TAILQ_INSERT_TAIL(so->so_onq, so, so_qe);
}

/*
 * soqremque: remove socket from the specified queue.
 *
 * => Returns true if socket was removed from the specified queue.
 * => False if socket was not removed (because it was in other queue).
 */
bool
soqremque(struct socket *so, int q)
{
        struct socket *head = so->so_head;

        KASSERT(q == 0 || q == 1);
        KASSERT(solocked(so));
        KASSERT(so->so_onq != NULL);
        KASSERT(head != NULL);

        if (q == 0) {
                if (so->so_onq != &head->so_q0)
                        return false;
                head->so_q0len--;
        } else {
                if (so->so_onq != &head->so_q)
                        return false;
                head->so_qlen--;
        }
        KASSERT(solocked2(so, head));
        TAILQ_REMOVE(so->so_onq, so, so_qe);
        so->so_onq = NULL;
        so->so_head = NULL;
        return true;
}

/*
 * socantsendmore: indicates that no more data will be sent on the
 * socket; it would normally be applied to a socket when the user
 * informs the system that no more data is to be sent, by the protocol
 * code (in case pr_shutdown()).
 */
void
socantsendmore(struct socket *so)
{
        KASSERT(solocked(so));

        so->so_state |= SS_CANTSENDMORE;
        sowwakeup(so);
}

/*
 * socantrcvmore(): indicates that no more data will be received and
 * will normally be applied to the socket by a protocol when it detects
 * that the peer will send no more data.  Data queued for reading in
 * the socket may yet be read.
 */
void
socantrcvmore(struct socket *so)
{
        KASSERT(solocked(so));

        so->so_state |= SS_CANTRCVMORE;
        sorwakeup(so);
}

/*
 * soroverflow(): indicates that data was attempted to be sent
 * but the receiving buffer overflowed.
 */
void
soroverflow(struct socket *so)
{
        KASSERT(solocked(so));

        so->so_rcv.sb_overflowed++;
        if (so->so_options & SO_RERROR)  {
                so->so_rerror = ENOBUFS;
                sorwakeup(so);
        }
}

/*
 * Wait for data to arrive at/drain from a socket buffer.
 */
int
sbwait(struct sockbuf *sb)
{
        struct socket *so;
        kmutex_t *lock;
        int error;

        so = sb->sb_so;

        KASSERT(solocked(so));

        sb->sb_flags |= SB_NOTIFY;
        lock = so->so_lock;
        if ((sb->sb_flags & SB_NOINTR) != 0)
                error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo);
        else
                error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo);
        if (__predict_false(lock != atomic_load_relaxed(&so->so_lock)))
                solockretry(so, lock);
        return error;
}

/*
 * Wakeup processes waiting on a socket buffer.
 * Do asynchronous notification via SIGIO
 * if the socket buffer has the SB_ASYNC flag set.
 */
void
sowakeup(struct socket *so, struct sockbuf *sb, int code)
{
        int band;

        KASSERT(solocked(so));
        KASSERT(sb->sb_so == so);

        switch (code) {
        case POLL_IN:
                band = POLLIN|POLLRDNORM;
                break;

        case POLL_OUT:
                band = POLLOUT|POLLWRNORM;
                break;

        case POLL_HUP:
                band = POLLHUP;
                break;

        default:
                band = 0;
#ifdef DIAGNOSTIC
                printf("bad siginfo code %d in socket notification.\n", code);
#endif 
                break;
        }

        sb->sb_flags &= ~SB_NOTIFY;
        selnotify(&sb->sb_sel, band, NOTE_SUBMIT);
        cv_broadcast(&sb->sb_cv);
        if (sb->sb_flags & SB_ASYNC)
                fownsignal(so->so_pgid, SIGIO, code, band, so);
        if (sb->sb_flags & SB_UPCALL)
                (*so->so_upcall)(so, so->so_upcallarg, band, M_DONTWAIT);
}

/*
 * Reset a socket's lock pointer.  Wake all threads waiting on the
 * socket's condition variables so that they can restart their waits
 * using the new lock.  The existing lock must be held.
 *
 * Caller must have issued membar_release before this.
 */
void
solockreset(struct socket *so, kmutex_t *lock)
{

        KASSERT(solocked(so));

        so->so_lock = lock;
        cv_broadcast(&so->so_snd.sb_cv);
        cv_broadcast(&so->so_rcv.sb_cv);
        cv_broadcast(&so->so_cv);
}

/*
 * Socket buffer (struct sockbuf) utility routines.
 *
 * Each socket contains two socket buffers: one for sending data and
 * one for receiving data.  Each buffer contains a queue of mbufs,
 * information about the number of mbufs and amount of data in the
 * queue, and other fields allowing poll() statements and notification
 * on data availability to be implemented.
 *
 * Data stored in a socket buffer is maintained as a list of records.
 * Each record is a list of mbufs chained together with the m_next
 * field.  Records are chained together with the m_nextpkt field. The upper
 * level routine soreceive() expects the following conventions to be
 * observed when placing information in the receive buffer:
 *
 * 1. If the protocol requires each message be preceded by the sender's
 *    name, then a record containing that name must be present before
 *    any associated data (mbuf's must be of type MT_SONAME).
 * 2. If the protocol supports the exchange of ``access rights'' (really
 *    just additional data associated with the message), and there are
 *    ``rights'' to be received, then a record containing this data
 *    should be present (mbuf's must be of type MT_CONTROL).
 * 3. If a name or rights record exists, then it must be followed by
 *    a data record, perhaps of zero length.
 *
 * Before using a new socket structure it is first necessary to reserve
 * buffer space to the socket, by calling sbreserve().  This should commit
 * some of the available buffer space in the system buffer pool for the
 * socket (currently, it does nothing but enforce limits).  The space
 * should be released by calling sbrelease() when the socket is destroyed.
 */

int
sb_max_set(u_long new_sbmax)
{
        int s;

        if (new_sbmax < (16 * 1024))
                return (EINVAL);

        s = splsoftnet();
        sb_max = new_sbmax;
        sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES);
        splx(s);

        return (0);
}

int
soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
{
        KASSERT(so->so_pcb == NULL || solocked(so));

        /*
         * there's at least one application (a configure script of screen)
         * which expects a fifo is writable even if it has "some" bytes
         * in its buffer.
         * so we want to make sure (hiwat - lowat) >= (some bytes).
         *
         * PIPE_BUF here is an arbitrary value chosen as (some bytes) above.
         * we expect it's large enough for such applications.
         */
        u_long  lowat = MAX(sock_loan_thresh, MCLBYTES);
        u_long  hiwat = lowat + PIPE_BUF;

        if (sndcc < hiwat)
                sndcc = hiwat;
        if (sbreserve(&so->so_snd, sndcc, so) == 0)
                goto bad;
        if (sbreserve(&so->so_rcv, rcvcc, so) == 0)
                goto bad2;
        if (so->so_rcv.sb_lowat == 0)
                so->so_rcv.sb_lowat = 1;
        if (so->so_snd.sb_lowat == 0)
                so->so_snd.sb_lowat = lowat;
        if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
                so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
        return (0);
 bad2:
        sbrelease(&so->so_snd, so);
 bad:
        return (ENOBUFS);
}

/*
 * Allot mbufs to a sockbuf.
 * Attempt to scale mbmax so that mbcnt doesn't become limiting
 * if buffering efficiency is near the normal case.
 */
int
sbreserve(struct sockbuf *sb, u_long cc, struct socket *so)
{
        struct lwp *l = curlwp; /* XXX */
        rlim_t maxcc;
        struct uidinfo *uidinfo;

        KASSERT(so->so_pcb == NULL || solocked(so));
        KASSERT(sb->sb_so == so);
        KASSERT(sb_max_adj != 0);

        if (cc == 0 || cc > sb_max_adj)
                return (0);

        maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur;

        uidinfo = so->so_uidinfo;
        if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc))
                return 0;
        sb->sb_mbmax = uimin(cc * 2, sb_max);
        if (sb->sb_lowat > sb->sb_hiwat)
                sb->sb_lowat = sb->sb_hiwat;

        return (1);
}

/*
 * Free mbufs held by a socket, and reserved mbuf space.  We do not assert
 * that the socket is held locked here: see sorflush().
 */
void
sbrelease(struct sockbuf *sb, struct socket *so)
{

        KASSERT(sb->sb_so == so);

        sbflush(sb);
        (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY);
        sb->sb_mbmax = 0;
}

/*
 * Routines to add and remove
 * data from an mbuf queue.
 *
 * The routines sbappend() or sbappendrecord() are normally called to
 * append new mbufs to a socket buffer, after checking that adequate
 * space is available, comparing the function sbspace() with the amount
 * of data to be added.  sbappendrecord() differs from sbappend() in
 * that data supplied is treated as the beginning of a new record.
 * To place a sender's address, optional access rights, and data in a
 * socket receive buffer, sbappendaddr() should be used.  To place
 * access rights and data in a socket receive buffer, sbappendrights()
 * should be used.  In either case, the new data begins a new record.
 * Note that unlike sbappend() and sbappendrecord(), these routines check
 * for the caller that there will be enough space to store the data.
 * Each fails if there is not enough space, or if it cannot find mbufs
 * to store additional information in.
 *
 * Reliable protocols may use the socket send buffer to hold data
 * awaiting acknowledgement.  Data is normally copied from a socket
 * send buffer in a protocol with m_copym for output to a peer,
 * and then removing the data from the socket buffer with sbdrop()
 * or sbdroprecord() when the data is acknowledged by the peer.
 */

#ifdef SOCKBUF_DEBUG
void
sblastrecordchk(struct sockbuf *sb, const char *where)
{
        struct mbuf *m = sb->sb_mb;

        KASSERT(solocked(sb->sb_so));

        while (m && m->m_nextpkt)
                m = m->m_nextpkt;

        if (m != sb->sb_lastrecord) {
                printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n",
                    sb->sb_mb, sb->sb_lastrecord, m);
                printf("packet chain:\n");
                for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
                        printf("\t%p\n", m);
                panic("sblastrecordchk from %s", where);
        }
}

void
sblastmbufchk(struct sockbuf *sb, const char *where)
{
        struct mbuf *m = sb->sb_mb;
        struct mbuf *n;

        KASSERT(solocked(sb->sb_so));

        while (m && m->m_nextpkt)
                m = m->m_nextpkt;

        while (m && m->m_next)
                m = m->m_next;

        if (m != sb->sb_mbtail) {
                printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n",
                    sb->sb_mb, sb->sb_mbtail, m);
                printf("packet tree:\n");
                for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
                        printf("\t");
                        for (n = m; n != NULL; n = n->m_next)
                                printf("%p ", n);
                        printf("\n");
                }
                panic("sblastmbufchk from %s", where);
        }
}
#endif /* SOCKBUF_DEBUG */

/*
 * Link a chain of records onto a socket buffer
 */
#define        SBLINKRECORDCHAIN(sb, m0, mlast)                                \
do {                                                                        \
        if ((sb)->sb_lastrecord != NULL)                                \
                (sb)->sb_lastrecord->m_nextpkt = (m0);                        \
        else                                                                \
                (sb)->sb_mb = (m0);                                        \
        (sb)->sb_lastrecord = (mlast);                                        \
} while (/*CONSTCOND*/0)


#define        SBLINKRECORD(sb, m0)                                                \
    SBLINKRECORDCHAIN(sb, m0, m0)

/*
 * Append mbuf chain m to the last record in the
 * socket buffer sb.  The additional space associated
 * the mbuf chain is recorded in sb.  Empty mbufs are
 * discarded and mbufs are compacted where possible.
 */
void
sbappend(struct sockbuf *sb, struct mbuf *m)
{
        struct mbuf        *n;

        KASSERT(solocked(sb->sb_so));

        if (m == NULL)
                return;

#ifdef MBUFTRACE
        m_claimm(m, sb->sb_mowner);
#endif

        SBLASTRECORDCHK(sb, "sbappend 1");

        if ((n = sb->sb_lastrecord) != NULL) {
                /*
                 * XXX Would like to simply use sb_mbtail here, but
                 * XXX I need to verify that I won't miss an EOR that
                 * XXX way.
                 */
                do {
                        if (n->m_flags & M_EOR) {
                                sbappendrecord(sb, m); /* XXXXXX!!!! */
                                return;
                        }
                } while (n->m_next && (n = n->m_next));
        } else {
                /*
                 * If this is the first record in the socket buffer, it's
                 * also the last record.
                 */
                sb->sb_lastrecord = m;
        }
        sbcompress(sb, m, n);
        SBLASTRECORDCHK(sb, "sbappend 2");
}

/*
 * This version of sbappend() should only be used when the caller
 * absolutely knows that there will never be more than one record
 * in the socket buffer, that is, a stream protocol (such as TCP).
 */
void
sbappendstream(struct sockbuf *sb, struct mbuf *m)
{

        KASSERT(solocked(sb->sb_so));
        KDASSERT(m->m_nextpkt == NULL);
        KASSERT(sb->sb_mb == sb->sb_lastrecord);

        SBLASTMBUFCHK(sb, __func__);

#ifdef MBUFTRACE
        m_claimm(m, sb->sb_mowner);
#endif

        sbcompress(sb, m, sb->sb_mbtail);

        sb->sb_lastrecord = sb->sb_mb;
        SBLASTRECORDCHK(sb, __func__);
}

#ifdef SOCKBUF_DEBUG
void
sbcheck(struct sockbuf *sb)
{
        struct mbuf        *m, *m2;
        u_long                len, mbcnt;

        KASSERT(solocked(sb->sb_so));

        len = 0;
        mbcnt = 0;
        for (m = sb->sb_mb; m; m = m->m_nextpkt) {
                for (m2 = m; m2 != NULL; m2 = m2->m_next) {
                        len += m2->m_len;
                        mbcnt += MSIZE;
                        if (m2->m_flags & M_EXT)
                                mbcnt += m2->m_ext.ext_size;
                        if (m2->m_nextpkt != NULL)
                                panic("sbcheck nextpkt");
                }
        }
        if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
                printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc,
                    mbcnt, sb->sb_mbcnt);
                panic("sbcheck");
        }
}
#endif

/*
 * As above, except the mbuf chain
 * begins a new record.
 */
void
sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
{
        struct mbuf        *m;

        KASSERT(solocked(sb->sb_so));

        if (m0 == NULL)
                return;

#ifdef MBUFTRACE
        m_claimm(m0, sb->sb_mowner);
#endif
        /*
         * Put the first mbuf on the queue.
         * Note this permits zero length records.
         */
        sballoc(sb, m0);
        SBLASTRECORDCHK(sb, "sbappendrecord 1");
        SBLINKRECORD(sb, m0);
        m = m0->m_next;
        m0->m_next = 0;
        if (m && (m0->m_flags & M_EOR)) {
                m0->m_flags &= ~M_EOR;
                m->m_flags |= M_EOR;
        }
        sbcompress(sb, m, m0);
        SBLASTRECORDCHK(sb, "sbappendrecord 2");
}

/*
 * As above except that OOB data
 * is inserted at the beginning of the sockbuf,
 * but after any other OOB data.
 */
void
sbinsertoob(struct sockbuf *sb, struct mbuf *m0)
{
        struct mbuf        *m, **mp;

        KASSERT(solocked(sb->sb_so));

        if (m0 == NULL)
                return;

        SBLASTRECORDCHK(sb, "sbinsertoob 1");

        for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) {
            again:
                switch (m->m_type) {

                case MT_OOBDATA:
                        continue;                /* WANT next train */

                case MT_CONTROL:
                        if ((m = m->m_next) != NULL)
                                goto again;        /* inspect THIS train further */
                }
                break;
        }
        /*
         * Put the first mbuf on the queue.
         * Note this permits zero length records.
         */
        sballoc(sb, m0);
        m0->m_nextpkt = *mp;
        if (*mp == NULL) {
                /* m0 is actually the new tail */
                sb->sb_lastrecord = m0;
        }
        *mp = m0;
        m = m0->m_next;
        m0->m_next = 0;
        if (m && (m0->m_flags & M_EOR)) {
                m0->m_flags &= ~M_EOR;
                m->m_flags |= M_EOR;
        }
        sbcompress(sb, m, m0);
        SBLASTRECORDCHK(sb, "sbinsertoob 2");
}

/*
 * Append address and data, and optionally, control (ancillary) data
 * to the receive queue of a socket.  If present,
 * m0 must include a packet header with total length.
 * Returns 0 if no space in sockbuf or insufficient mbufs.
 */
int
sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0,
        struct mbuf *control)
{
        struct mbuf        *m, *n, *nlast;
        int                space, len;

        KASSERT(solocked(sb->sb_so));

        space = asa->sa_len;

        if (m0 != NULL) {
                if ((m0->m_flags & M_PKTHDR) == 0)
                        panic("sbappendaddr");
                space += m0->m_pkthdr.len;
#ifdef MBUFTRACE
                m_claimm(m0, sb->sb_mowner);
#endif
        }
        for (n = control; n; n = n->m_next) {
                space += n->m_len;
                MCLAIM(n, sb->sb_mowner);
                if (n->m_next == NULL)        /* keep pointer to last control buf */
                        break;
        }
        if (space > sbspace(sb))
                return (0);
        m = m_get(M_DONTWAIT, MT_SONAME);
        if (m == NULL)
                return (0);
        MCLAIM(m, sb->sb_mowner);
        /*
         * XXX avoid 'comparison always true' warning which isn't easily
         * avoided.
         */
        len = asa->sa_len;
        if (len > MLEN) {
                MEXTMALLOC(m, asa->sa_len, M_NOWAIT);
                if ((m->m_flags & M_EXT) == 0) {
                        m_free(m);
                        return (0);
                }
        }
        m->m_len = asa->sa_len;
        memcpy(mtod(m, void *), asa, asa->sa_len);
        if (n)
                n->m_next = m0;                /* concatenate data to control */
        else
                control = m0;
        m->m_next = control;

        SBLASTRECORDCHK(sb, "sbappendaddr 1");

        for (n = m; n->m_next != NULL; n = n->m_next)
                sballoc(sb, n);
        sballoc(sb, n);
        nlast = n;
        SBLINKRECORD(sb, m);

        sb->sb_mbtail = nlast;
        SBLASTMBUFCHK(sb, "sbappendaddr");
        SBLASTRECORDCHK(sb, "sbappendaddr 2");

        return (1);
}

/*
 * Helper for sbappendchainaddr: prepend a struct sockaddr* to
 * an mbuf chain.
 */
static inline struct mbuf *
m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0,
                   const struct sockaddr *asa)
{
        struct mbuf *m;
        const int salen = asa->sa_len;

        KASSERT(solocked(sb->sb_so));

        /* only the first in each chain need be a pkthdr */
        m = m_gethdr(M_DONTWAIT, MT_SONAME);
        if (m == NULL)
                return NULL;
        MCLAIM(m, sb->sb_mowner);
#ifdef notyet
        if (salen > MHLEN) {
                MEXTMALLOC(m, salen, M_NOWAIT);
                if ((m->m_flags & M_EXT) == 0) {
                        m_free(m);
                        return NULL;
                }
        }
#else
        KASSERT(salen <= MHLEN);
#endif
        m->m_len = salen;
        memcpy(mtod(m, void *), asa, salen);
        m->m_next = m0;
        m->m_pkthdr.len = salen + m0->m_pkthdr.len;

        return m;
}

int
sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa,
                  struct mbuf *m0, int sbprio)
{
        struct mbuf *m, *n, *n0, *nlast;
        int error;

        KASSERT(solocked(sb->sb_so));

        /*
         * XXX sbprio reserved for encoding priority of this* request:
         *  SB_PRIO_NONE --> honour normal sb limits
         *  SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space,
         *        take whole chain. Intended for large requests
         *      that should be delivered atomically (all, or none).
         * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow
         *       over normal socket limits, for messages indicating
         *       buffer overflow in earlier normal/lower-priority messages
         * SB_PRIO_BESTEFFORT -->  ignore limits entirely.
         *       Intended for  kernel-generated messages only.
         *        Up to generator to avoid total mbuf resource exhaustion.
         */
        (void)sbprio;

        if (m0 && (m0->m_flags & M_PKTHDR) == 0)
                panic("sbappendaddrchain");

#ifdef notyet
        space = sbspace(sb);

        /*
         * Enforce SB_PRIO_* limits as described above.
         */
#endif

        n0 = NULL;
        nlast = NULL;
        for (m = m0; m; m = m->m_nextpkt) {
                struct mbuf *np;

#ifdef MBUFTRACE
                m_claimm(m, sb->sb_mowner);
#endif

                /* Prepend sockaddr to this record (m) of input chain m0 */
                  n = m_prepend_sockaddr(sb, m, asa);
                if (n == NULL) {
                        error = ENOBUFS;
                        goto bad;
                }

                /* Append record (asa+m) to end of new chain n0 */
                if (n0 == NULL) {
                        n0 = n;
                } else {
                        nlast->m_nextpkt = n;
                }
                /* Keep track of last record on new chain */
                nlast = n;

                for (np = n; np; np = np->m_next)
                        sballoc(sb, np);
        }

        SBLASTRECORDCHK(sb, "sbappendaddrchain 1");

        /* Drop the entire chain of (asa+m) records onto the socket */
        SBLINKRECORDCHAIN(sb, n0, nlast);

        SBLASTRECORDCHK(sb, "sbappendaddrchain 2");

        for (m = nlast; m->m_next; m = m->m_next)
                ;
        sb->sb_mbtail = m;
        SBLASTMBUFCHK(sb, "sbappendaddrchain");

        return (1);

bad:
        /*
         * On error, free the prepended addreseses. For consistency
         * with sbappendaddr(), leave it to our caller to free
         * the input record chain passed to us as m0.
         */
        while ((n = n0) != NULL) {
                  struct mbuf *np;

                /* Undo the sballoc() of this record */
                for (np = n; np; np = np->m_next)
                        sbfree(sb, np);

                n0 = n->m_nextpkt;        /* iterate at next prepended address */
                np = m_free(n);                /* free prepended address (not data) */
        }
        return error;
}


int
sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
{
        struct mbuf        *m, *mlast, *n;
        int                space;

        KASSERT(solocked(sb->sb_so));

        space = 0;
        if (control == NULL)
                panic("sbappendcontrol");
        for (m = control; ; m = m->m_next) {
                space += m->m_len;
                MCLAIM(m, sb->sb_mowner);
                if (m->m_next == NULL)
                        break;
        }
        n = m;                        /* save pointer to last control buffer */
        for (m = m0; m; m = m->m_next) {
                MCLAIM(m, sb->sb_mowner);
                space += m->m_len;
        }
        if (space > sbspace(sb))
                return (0);
        n->m_next = m0;                        /* concatenate data to control */

        SBLASTRECORDCHK(sb, "sbappendcontrol 1");

        for (m = control; m->m_next != NULL; m = m->m_next)
                sballoc(sb, m);
        sballoc(sb, m);
        mlast = m;
        SBLINKRECORD(sb, control);

        sb->sb_mbtail = mlast;
        SBLASTMBUFCHK(sb, "sbappendcontrol");
        SBLASTRECORDCHK(sb, "sbappendcontrol 2");

        return (1);
}

/*
 * Compress mbuf chain m into the socket
 * buffer sb following mbuf n.  If n
 * is null, the buffer is presumed empty.
 */
void
sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
{
        int                eor;
        struct mbuf        *o;

        KASSERT(solocked(sb->sb_so));

        eor = 0;
        while (m) {
                eor |= m->m_flags & M_EOR;
                if (m->m_len == 0 &&
                    (eor == 0 ||
                     (((o = m->m_next) || (o = n)) &&
                      o->m_type == m->m_type))) {
                        if (sb->sb_lastrecord == m)
                                sb->sb_lastrecord = m->m_next;
                        m = m_free(m);
                        continue;
                }
                if (n && (n->m_flags & M_EOR) == 0 &&
                    /* M_TRAILINGSPACE() checks buffer writeability */
                    m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */
                    m->m_len <= M_TRAILINGSPACE(n) &&
                    n->m_type == m->m_type) {
                        memcpy(mtod(n, char *) + n->m_len, mtod(m, void *),
                            (unsigned)m->m_len);
                        n->m_len += m->m_len;
                        sb->sb_cc += m->m_len;
                        m = m_free(m);
                        continue;
                }
                if (n)
                        n->m_next = m;
                else
                        sb->sb_mb = m;
                sb->sb_mbtail = m;
                sballoc(sb, m);
                n = m;
                m->m_flags &= ~M_EOR;
                m = m->m_next;
                n->m_next = 0;
        }
        if (eor) {
                if (n)
                        n->m_flags |= eor;
                else
                        printf("semi-panic: sbcompress\n");
        }
        SBLASTMBUFCHK(sb, __func__);
}

/*
 * Free all mbufs in a sockbuf.
 * Check that all resources are reclaimed.
 */
void
sbflush(struct sockbuf *sb)
{

        KASSERT(solocked(sb->sb_so));
        KASSERT((sb->sb_flags & SB_LOCK) == 0);

        while (sb->sb_mbcnt)
                sbdrop(sb, (int)sb->sb_cc);

        KASSERT(sb->sb_cc == 0);
        KASSERT(sb->sb_mb == NULL);
        KASSERT(sb->sb_mbtail == NULL);
        KASSERT(sb->sb_lastrecord == NULL);
}

/*
 * Drop data from (the front of) a sockbuf.
 */
void
sbdrop(struct sockbuf *sb, int len)
{
        struct mbuf        *m, *next;

        KASSERT(solocked(sb->sb_so));

        next = (m = sb->sb_mb) ? m->m_nextpkt : NULL;
        while (len > 0) {
                if (m == NULL) {
                        if (next == NULL)
                                panic("sbdrop(%p,%d): cc=%lu",
                                    sb, len, sb->sb_cc);
                        m = next;
                        next = m->m_nextpkt;
                        continue;
                }
                if (m->m_len > len) {
                        m->m_len -= len;
                        m->m_data += len;
                        sb->sb_cc -= len;
                        break;
                }
                len -= m->m_len;
                sbfree(sb, m);
                m = m_free(m);
        }
        while (m && m->m_len == 0) {
                sbfree(sb, m);
                m = m_free(m);
        }
        if (m) {
                sb->sb_mb = m;
                m->m_nextpkt = next;
        } else
                sb->sb_mb = next;
        /*
         * First part is an inline SB_EMPTY_FIXUP().  Second part
         * makes sure sb_lastrecord is up-to-date if we dropped
         * part of the last record.
         */
        m = sb->sb_mb;
        if (m == NULL) {
                sb->sb_mbtail = NULL;
                sb->sb_lastrecord = NULL;
        } else if (m->m_nextpkt == NULL)
                sb->sb_lastrecord = m;
}

/*
 * Drop a record off the front of a sockbuf
 * and move the next record to the front.
 */
void
sbdroprecord(struct sockbuf *sb)
{
        struct mbuf        *m, *mn;

        KASSERT(solocked(sb->sb_so));

        m = sb->sb_mb;
        if (m) {
                sb->sb_mb = m->m_nextpkt;
                do {
                        sbfree(sb, m);
                        mn = m_free(m);
                } while ((m = mn) != NULL);
        }
        SB_EMPTY_FIXUP(sb);
}

/*
 * Create a "control" mbuf containing the specified data
 * with the specified type for presentation on a socket buffer.
 */
struct mbuf *
sbcreatecontrol1(void **p, int size, int type, int level, int flags)
{
        struct cmsghdr        *cp;
        struct mbuf        *m;
        int space = CMSG_SPACE(size);

        if ((flags & M_DONTWAIT) && space > MCLBYTES) {
                printf("%s: message too large %d\n", __func__, space);
                return NULL;
        }

        if ((m = m_get(flags, MT_CONTROL)) == NULL)
                return NULL;
        if (space > MLEN) {
                if (space > MCLBYTES)
                        MEXTMALLOC(m, space, M_WAITOK);
                else
                        MCLGET(m, flags);
                if ((m->m_flags & M_EXT) == 0) {
                        m_free(m);
                        return NULL;
                }
        }
        cp = mtod(m, struct cmsghdr *);
        *p = CMSG_DATA(cp);
        m->m_len = space;
        cp->cmsg_len = CMSG_LEN(size);
        cp->cmsg_level = level;
        cp->cmsg_type = type;

        memset(cp + 1, 0, CMSG_LEN(0) - sizeof(*cp));
        memset((uint8_t *)*p + size, 0, CMSG_ALIGN(size) - size);

        return m;
}

struct mbuf *
sbcreatecontrol(void *p, int size, int type, int level)
{
        struct mbuf *m;
        void *v;

        m = sbcreatecontrol1(&v, size, type, level, M_DONTWAIT);
        if (m == NULL)
                return NULL;
        memcpy(v, p, size);
        return m;
}

void
solockretry(struct socket *so, kmutex_t *lock)
{

        while (lock != atomic_load_relaxed(&so->so_lock)) {
                mutex_exit(lock);
                lock = atomic_load_consume(&so->so_lock);
                mutex_enter(lock);
        }
}

bool
solocked(const struct socket *so)
{

        /*
         * Used only for diagnostic assertions, so so_lock should be
         * stable at this point, hence on need for atomic_load_*.
         */
        return mutex_owned(so->so_lock);
}

bool
solocked2(const struct socket *so1, const struct socket *so2)
{
        const kmutex_t *lock;

        /*
         * Used only for diagnostic assertions, so so_lock should be
         * stable at this point, hence on need for atomic_load_*.
         */
        lock = so1->so_lock;
        if (lock != so2->so_lock)
                return false;
        return mutex_owned(lock);
}

/*
 * sosetlock: assign a default lock to a new socket.
 */
void
sosetlock(struct socket *so)
{
        if (so->so_lock == NULL) {
                kmutex_t *lock = softnet_lock;

                so->so_lock = lock;
                mutex_obj_hold(lock);
                mutex_enter(lock);
        }
        KASSERT(solocked(so));
}

/*
 * Set lock on sockbuf sb; sleep if lock is already held.
 * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
 * Returns error without lock if sleep is interrupted.
 */
int
sblock(struct sockbuf *sb, int wf)
{
        struct socket *so;
        kmutex_t *lock;
        int error;

        KASSERT(solocked(sb->sb_so));

        for (;;) {
                if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) {
                        sb->sb_flags |= SB_LOCK;
                        return 0;
                }
                if (wf != M_WAITOK)
                        return EWOULDBLOCK;
                so = sb->sb_so;
                lock = so->so_lock;
                if ((sb->sb_flags & SB_NOINTR) != 0) {
                        cv_wait(&so->so_cv, lock);
                        error = 0;
                } else
                        error = cv_wait_sig(&so->so_cv, lock);
                if (__predict_false(lock != atomic_load_relaxed(&so->so_lock)))
                        solockretry(so, lock);
                if (error != 0)
                        return error;
        }
}

void
sbunlock(struct sockbuf *sb)
{
        struct socket *so;

        so = sb->sb_so;

        KASSERT(solocked(so));
        KASSERT((sb->sb_flags & SB_LOCK) != 0);

        sb->sb_flags &= ~SB_LOCK;
        cv_broadcast(&so->so_cv);
}

int
sowait(struct socket *so, bool catch_p, int timo)
{
        kmutex_t *lock;
        int error;

        KASSERT(solocked(so));
        KASSERT(catch_p || timo != 0);

        lock = so->so_lock;
        if (catch_p)
                error = cv_timedwait_sig(&so->so_cv, lock, timo);
        else
                error = cv_timedwait(&so->so_cv, lock, timo);
        if (__predict_false(lock != atomic_load_relaxed(&so->so_lock)))
                solockretry(so, lock);
        return error;
}

#ifdef DDB

/*
 * Currently, sofindproc() is used only from DDB. It could be used from others
 * by using db_mutex_enter()
 */

static inline int
db_mutex_enter(kmutex_t *mtx)
{
        extern int db_active;
        int rv;

        if (!db_active) {
                mutex_enter(mtx);
                rv = 1;
        } else
                rv = mutex_tryenter(mtx);

        return rv;
}

int
sofindproc(struct socket *so, int all, void (*pr)(const char *, ...))
{
        proc_t *p;
        filedesc_t *fdp;
        fdtab_t *dt;
        fdfile_t *ff;
        file_t *fp = NULL;
        int found = 0;
        int i, t;

        if (so == NULL)
                return 0;

        t = db_mutex_enter(&proc_lock);
        if (!t) {
                pr("could not acquire proc_lock mutex\n");
                return 0;
        }
        PROCLIST_FOREACH(p, &allproc) {
                if (p->p_stat == SIDL)
                        continue;
                fdp = p->p_fd;
                t = db_mutex_enter(&fdp->fd_lock);
                if (!t) {
                        pr("could not acquire fd_lock mutex\n");
                        continue;
                }
                dt = atomic_load_consume(&fdp->fd_dt);
                for (i = 0; i < dt->dt_nfiles; i++) {
                        ff = dt->dt_ff[i];
                        if (ff == NULL)
                                continue;

                        fp = atomic_load_consume(&ff->ff_file);
                        if (fp == NULL)
                                continue;

                        t = db_mutex_enter(&fp->f_lock);
                        if (!t) {
                                pr("could not acquire f_lock mutex\n");
                                continue;
                        }
                        if ((struct socket *)fp->f_data != so) {
                                mutex_exit(&fp->f_lock);
                                continue;
                        }
                        found++;
                        if (pr)
                                pr("socket %p: owner %s(pid=%d)\n",
                                    so, p->p_comm, p->p_pid);
                        mutex_exit(&fp->f_lock);
                        if (all == 0)
                                break;
                }
                mutex_exit(&fdp->fd_lock);
                if (all == 0 && found != 0)
                        break;
        }
        mutex_exit(&proc_lock);

        return found;
}

void
socket_print(const char *modif, void (*pr)(const char *, ...))
{
        file_t *fp;
        struct socket *so;
        struct sockbuf *sb_snd, *sb_rcv;
        struct mbuf *m_rec, *m;
        bool opt_v = false;
        bool opt_m = false;
        bool opt_a = false;
        bool opt_p = false;
        int nrecs, nmbufs;
        char ch;
        const char *family;

        while ( (ch = *(modif++)) != '\0') {
                switch (ch) {
                case 'v':
                        opt_v = true;
                        break;
                case 'm':
                        opt_m = true;
                        break;
                case 'a':
                        opt_a = true;
                        break;
                case 'p':
                        opt_p = true;
                        break;
                }
        }
        if (opt_v == false && pr)
                (pr)("Ignore empty sockets. use /v to print all.\n");
        if (opt_p == true && pr)
                (pr)("Don't search owner process.\n");

        LIST_FOREACH(fp, &filehead, f_list) {
                if (fp->f_type != DTYPE_SOCKET)
                        continue;
                so = (struct socket *)fp->f_data;
                if (so == NULL)
                        continue;

                if (so->so_proto->pr_domain->dom_family == AF_INET)
                        family = "INET";
#ifdef INET6
                else if (so->so_proto->pr_domain->dom_family == AF_INET6)
                        family = "INET6";
#endif
                else if (so->so_proto->pr_domain->dom_family == pseudo_AF_KEY)
                        family = "KEY";
                else if (so->so_proto->pr_domain->dom_family == AF_ROUTE)
                        family = "ROUTE";
                else
                        continue;

                sb_snd = &so->so_snd;
                sb_rcv = &so->so_rcv;

                if (opt_v != true &&
                    sb_snd->sb_cc == 0 && sb_rcv->sb_cc == 0)
                        continue;

                pr("---SOCKET %p: type %s\n", so, family);
                if (opt_p != true)
                        sofindproc(so, opt_a == true ? 1 : 0, pr);
                pr("Send Buffer Bytes: %d [bytes]\n", sb_snd->sb_cc);
                pr("Send Buffer mbufs:\n");
                m_rec = m = sb_snd->sb_mb;
                nrecs = 0;
                nmbufs = 0;
                while (m_rec) {
                        nrecs++;
                        if (opt_m == true)
                                pr(" mbuf chain %p\n", m_rec);
                        while (m) {
                                nmbufs++;
                                m = m->m_next;
                        }
                        m_rec = m = m_rec->m_nextpkt;
                }
                pr(" Total %d records, %d mbufs.\n", nrecs, nmbufs);

                pr("Recv Buffer Usage: %d [bytes]\n", sb_rcv->sb_cc);
                pr("Recv Buffer mbufs:\n");
                m_rec = m = sb_rcv->sb_mb;
                nrecs = 0;
                nmbufs = 0;
                while (m_rec) {
                        nrecs++;
                        if (opt_m == true)
                                pr(" mbuf chain %p\n", m_rec);
                        while (m) {
                                nmbufs++;
                                m = m->m_next;
                        }
                        m_rec = m = m_rec->m_nextpkt;
                }
                pr(" Total %d records, %d mbufs.\n", nrecs, nmbufs);
        }
}
#endif /* DDB */





























































































































































































































































































































































    9 






















   10 


























   10 





   10 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
/*        $NetBSD: isa_machdep.c,v 1.52 2022/04/15 17:53:44 jmcneill Exp $        */

/*-
 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace
 * Simulation Facility, NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1991 The Regents of the University of California.
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * William Jolitz.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)isa.c        7.2 (Berkeley) 5/13/91
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: isa_machdep.c,v 1.52 2022/04/15 17:53:44 jmcneill Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <sys/device.h>
#include <sys/proc.h>
#include <sys/bus.h>
#include <sys/cpu.h>

#include <machine/bus_private.h>
#include <machine/pio.h>
#include <machine/cpufunc.h>
#include <machine/autoconf.h>
#include <machine/bootinfo.h>

#include <dev/isa/isareg.h>
#include <dev/isa/isavar.h>

#include <uvm/uvm_extern.h>

#include "acpica.h"
#include "opt_acpi.h"
#include "ioapic.h"

#if NIOAPIC > 0
#include <machine/i82093var.h>
#include <machine/mpbiosvar.h>
#endif

#if NACPICA > 0
#include <dev/acpi/acpivar.h>
#endif

static int _isa_dma_may_bounce(bus_dma_tag_t, bus_dmamap_t, int, int *);

struct x86_bus_dma_tag isa_bus_dma_tag = {
        ._tag_needs_free        = 0,
        ._bounce_thresh                = ISA_DMA_BOUNCE_THRESHOLD,
        ._bounce_alloc_lo        = 0,
        ._bounce_alloc_hi        = ISA_DMA_BOUNCE_THRESHOLD,
        ._may_bounce                = _isa_dma_may_bounce,
};

#define        IDTVEC(name)        __CONCAT(X,name)
typedef void (vector)(void);
extern vector *IDTVEC(intr)[];

#define        LEGAL_IRQ(x)        ((x) >= 0 && (x) < NUM_LEGACY_IRQS && (x) != 2)

int
isa_intr_alloc(isa_chipset_tag_t ic, int mask, int type, int *irq)
{
        int i, tmp, bestirq, count;
        struct intrhand **p, *q;
        struct intrsource *isp;
        struct cpu_info *ci;

        if (type == IST_NONE)
                panic("intr_alloc: bogus type");

        ci = &cpu_info_primary;

        bestirq = -1;
        count = -1;

        /* some interrupts should never be dynamically allocated */
        mask &= 0xdef8;

        /*
         * XXX some interrupts will be used later (6 for fdc, 12 for pms).
         * the right answer is to do "breadth-first" searching of devices.
         */
        mask &= 0xefbf;

        mutex_enter(&cpu_lock);

        for (i = 0; i < NUM_LEGACY_IRQS; i++) {
                if (LEGAL_IRQ(i) == 0 || (mask & (1<<i)) == 0)
                        continue;
                isp = ci->ci_isources[i];
                if (isp == NULL) {
                        /* if nothing's using the irq, just return it */
                        *irq = i;
                        mutex_exit(&cpu_lock);
                        return 0;
                }

                switch(isp->is_type) {
                case IST_EDGE:
                case IST_LEVEL:
                        if (type != isp->is_type)
                                continue;
                        /*
                         * if the irq is shareable, count the number of other
                         * handlers, and if it's smaller than the last irq like
                         * this, remember it
                         *
                         * XXX We should probably also consider the
                         * interrupt level and stick IPL_TTY with other
                         * IPL_TTY, etc.
                         */
                        for (p = &isp->is_handlers, tmp = 0; (q = *p) != NULL;
                             p = &q->ih_next, tmp++)
                                ;
                        if ((bestirq == -1) || (count > tmp)) {
                                bestirq = i;
                                count = tmp;
                        }
                        break;
                case IST_PULSE:
                        /* this just isn't shareable */
                        continue;
                }
        }

        mutex_exit(&cpu_lock);

        if (bestirq == -1)
                return 1;

        *irq = bestirq;

        return 0;
}

const struct evcnt *
isa_intr_evcnt(isa_chipset_tag_t ic, int irq)
{
        /* XXX for now, no evcnt parent reported */
        return NULL;
}

void *
isa_intr_establish(isa_chipset_tag_t ic, int irq, int type, int level,
    int (*ih_fun)(void *), void *ih_arg)
{
        return isa_intr_establish_xname(ic, irq, type, level,
            ih_fun, ih_arg, "unknown");
}

void *
isa_intr_establish_xname(isa_chipset_tag_t ic, int irq, int type, int level,
    int (*ih_fun)(void *), void *ih_arg, const char *xname)
{
        struct pic *pic;
        int pin;
#if NIOAPIC > 0
        intr_handle_t mpih = 0;
        struct ioapic_softc *ioapic = NULL;
#endif

        pin = irq;
        pic = &i8259_pic;

#if NIOAPIC > 0
        if (mp_busses != NULL) {
                if (intr_find_mpmapping(mp_isa_bus, irq, &mpih) == 0 ||
                    intr_find_mpmapping(mp_eisa_bus, irq, &mpih) == 0) {
                        if (!APIC_IRQ_ISLEGACY(mpih)) {
                                pin = APIC_IRQ_PIN(mpih);
                                ioapic = ioapic_find(APIC_IRQ_APIC(mpih));
                                if (ioapic == NULL) {
                                        printf("isa_intr_establish: "
                                               "unknown apic %d\n",
                                            APIC_IRQ_APIC(mpih));
                                        return NULL;
                                }
                                pic = &ioapic->sc_pic;
                        }
                } else
                        printf("isa_intr_establish: no MP mapping found\n");
        }
#endif
        return intr_establish_xname(irq, pic, pin, type, level, ih_fun, ih_arg,
            false, xname);
}

/* Deregister an interrupt handler. */
void
isa_intr_disestablish(isa_chipset_tag_t ic, void *arg)
{
#if !defined(XENPV)
        struct intrhand *ih = arg;

        if (!LEGAL_IRQ(ih->ih_pin))
                panic("intr_disestablish: bogus irq");

        intr_disestablish(ih);
#endif
}

void
isa_attach_hook(device_t parent, device_t self, struct isabus_attach_args *iba)
{
        extern struct x86_isa_chipset x86_isa_chipset;
        extern int isa_has_been_seen;

        /*
         * Notify others that might need to know that the ISA bus
         * has now been attached.
         */
        if (isa_has_been_seen)
                panic("isaattach: ISA bus already seen!");
        isa_has_been_seen = 1;

        /*
         * Since we can only have one ISA bus, we just use a single
         * statically allocated ISA chipset structure.  Pass it up
         * now.
         */
        iba->iba_ic = &x86_isa_chipset;
}

void
isa_detach_hook(isa_chipset_tag_t ic, device_t self)
{
        extern int isa_has_been_seen;

        isa_has_been_seen = 0;
}

int
isa_mem_alloc(bus_space_tag_t t, bus_size_t size, bus_size_t align,
    bus_addr_t boundary, int flags, bus_addr_t *addrp, bus_space_handle_t *bshp)
{
        /* Allocate physical address space in the ISA hole. */
        return bus_space_alloc(t, IOM_BEGIN, IOM_END - 1, size, align,
            boundary, flags, addrp, bshp);
}

void
isa_mem_free(bus_space_tag_t t, bus_space_handle_t bsh, bus_size_t size)
{
        bus_space_free(t, bsh, size);
}

/*
 * ISA only has 24-bits of address space.  This means
 * we can't DMA to pages over 16M.  In order to DMA to
 * arbitrary buffers, we use "bounce buffers" - pages
 * in memory below the 16M boundary.  On DMA reads,
 * DMA happens to the bounce buffers, and is copied into
 * the caller's buffer.  On writes, data is copied into
 * the bounce buffer, and the DMA happens from those
 * pages.  To software using the DMA mapping interface,
 * this looks simply like a data cache.
 *
 * If we have more than 16M of RAM in the system, we may
 * need bounce buffers.  We check and remember that here.
 *
 * There are exceptions, however.  VLB devices can do
 * 32-bit DMA, and indicate that here.
 *
 * ...or, there is an opposite case.  The most segments
 * a transfer will require is (maxxfer / PAGE_SIZE) + 1.  If
 * the caller can't handle that many segments (e.g. the
 * ISA DMA controller), we may have to bounce it as well.
 */
static int
_isa_dma_may_bounce(bus_dma_tag_t t, bus_dmamap_t map, int flags,
    int *cookieflagsp)
{
        if ((flags & ISABUS_DMA_32BIT) != 0)
                map->_dm_bounce_thresh = 0;

        if (((map->_dm_size / PAGE_SIZE) + 1) > map->_dm_segcnt)
                *cookieflagsp |= X86_DMA_MIGHT_NEED_BOUNCE;
        return 0;
}

device_t
device_isa_register(device_t dev, void *aux)
{
        /*
         * Handle network interfaces here, the attachment information is
         * not available driver-independently later.
         *
         * For disks, there is nothing useful available at attach time.
         */
        if (device_class(dev) == DV_IFNET) {
                struct btinfo_netif *bin = lookup_bootinfo(BTINFO_NETIF);
                if (bin == NULL)
                        return NULL;

                /*
                 * We don't check the driver name against the device name
                 * passed by the boot ROM.  The ROM should stay usable if
                 * the driver becomes obsolete.  The physical attachment
                 * information (checked below) must be sufficient to
                 * identify the device.
                 */
                if (bin->bus == BI_BUS_ISA &&
                    device_is_a(device_parent(dev), "isa")) {
                        struct isa_attach_args *iaa = aux;

                        /* Compare IO base address */
                        /* XXXJRT What about multiple IO addrs? */
                        if (iaa->ia_nio > 0 &&
                            bin->addr.iobase == iaa->ia_io[0].ir_addr)
                                    return dev;
                }
        }
        if (vm_guest == VM_GUEST_XENPVH)
                prop_dictionary_set_bool(device_properties(dev),
                    "no-legacy-devices", true);
#if NACPICA > 0
#if notyet
        /*
         * The following code block is technically correct, but unfortunately
         * it breaks things like being able to use lm(4) on platforms that
         * have no other means of exposing temperature, fan, and voltage
         * sensors.
         */
        if (device_is_a(dev, "isa") && acpi_active) {
                /*
                 * For FACP >= 2, the LEGACY_DEVICES flag indicates that
                 * the motherboard supports user-visible devices on the LPC
                 * or ISA bus. If clear, assume that no such devices are
                 * present and we can enumerate everything we need using
                 * ACPI tables.
                 */
                if (AcpiGbl_FADT.Header.Revision >= 2 &&
                    !(AcpiGbl_FADT.BootFlags & ACPI_FADT_LEGACY_DEVICES)) {
                        prop_dictionary_set_bool(device_properties(dev),
                            "no-legacy-devices", true);
                }
        }
#endif

        if (vm_guest == VM_GUEST_VMWARE &&
            device_is_a(dev, "isa") && acpi_active) {
                prop_dictionary_set_bool(device_properties(dev),
                    "no-legacy-devices", true);
        }
#endif /* NACPICA > 0 */
        return NULL;
}













































































































































































  382 


   22 
   22 












  273 

  260 
  260 












   41 
   40 





   32 
   32 


   32 
   14 




























































   99 





  100 



















  100 
  100 

  100 
   97 




















   97 









   15 



  100 
   97 






















   79 
   77 



   78 


   78 







   78 





   61 








   97 








   31 






































































































































































































































































































    1 
    1 
    1 
    1 




























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
/*        $NetBSD: uipc_domain.c,v 1.108 2020/11/06 14:50:13 christos Exp $        */

/*
 * Copyright (c) 1982, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)uipc_domain.c        8.3 (Berkeley) 2/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uipc_domain.c,v 1.108 2020/11/06 14:50:13 christos Exp $");

#include <sys/param.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <sys/domain.h>
#include <sys/mbuf.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/queue.h>
#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/un.h>
#include <sys/unpcb.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/kauth.h>

#include <netatalk/at.h>
#include <net/if_dl.h>
#include <netinet/in.h>

MALLOC_DECLARE(M_SOCKADDR);

MALLOC_DEFINE(M_SOCKADDR, "sockaddr", "socket endpoints");

void        pffasttimo(void *);
void        pfslowtimo(void *);

struct domainhead domains = STAILQ_HEAD_INITIALIZER(domains);
static struct domain *domain_array[AF_MAX];

callout_t pffasttimo_ch, pfslowtimo_ch;

/*
 * Current time values for fast and slow timeouts.  We can use u_int
 * relatively safely.  The fast timer will roll over in 27 years and
 * the slow timer in 68 years.
 */
u_int        pfslowtimo_now;
u_int        pffasttimo_now;

static struct sysctllog *domain_sysctllog;
static void sysctl_net_setup(void);

/* ensure successful linkage even without any domains in link sets */
static struct domain domain_dummy;
__link_set_add_rodata(domains,domain_dummy);

static void
domain_init_timers(void)
{

        callout_init(&pffasttimo_ch, CALLOUT_MPSAFE);
        callout_init(&pfslowtimo_ch, CALLOUT_MPSAFE);

        callout_reset(&pffasttimo_ch, 1, pffasttimo, NULL);
        callout_reset(&pfslowtimo_ch, 1, pfslowtimo, NULL);
}

void
domaininit(bool attach)
{
        __link_set_decl(domains, struct domain);
        struct domain * const * dpp;
        struct domain *rt_domain = NULL;

        sysctl_net_setup();

        /*
         * Add all of the domains.  Make sure the PF_ROUTE
         * domain is added last.
         */
        if (attach) {
                __link_set_foreach(dpp, domains) {
                        if (*dpp == &domain_dummy)
                                continue;
                        if ((*dpp)->dom_family == PF_ROUTE)
                                rt_domain = *dpp;
                        else
                                domain_attach(*dpp);
                }
                if (rt_domain)
                        domain_attach(rt_domain);

                domain_init_timers();
        }
}

/*
 * Must be called only if domaininit has been called with false and
 * after all domains have been attached.
 */
void
domaininit_post(void)
{

        domain_init_timers();
}

void
domain_attach(struct domain *dp)
{
        const struct protosw *pr;

        STAILQ_INSERT_TAIL(&domains, dp, dom_link);
        if (dp->dom_family < __arraycount(domain_array))
                domain_array[dp->dom_family] = dp;

        if (dp->dom_init)
                (*dp->dom_init)();

#ifdef MBUFTRACE
        if (dp->dom_mowner.mo_name[0] == '\0') {
                strncpy(dp->dom_mowner.mo_name, dp->dom_name,
                    sizeof(dp->dom_mowner.mo_name));
                MOWNER_ATTACH(&dp->dom_mowner);
        }
#endif
        for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
                if (pr->pr_init)
                        (*pr->pr_init)();
        }

        if (max_linkhdr < 16)                /* XXX */
                max_linkhdr = 16;
        max_hdr = max_linkhdr + max_protohdr;
        max_datalen = MHLEN - max_hdr;
}

struct domain *
pffinddomain(int family)
{
        struct domain *dp;

        if (family < __arraycount(domain_array) && domain_array[family] != NULL)
                return domain_array[family];

        DOMAIN_FOREACH(dp)
                if (dp->dom_family == family)
                        return dp;
        return NULL;
}

const struct protosw *
pffindtype(int family, int type)
{
        struct domain *dp;
        const struct protosw *pr;

        dp = pffinddomain(family);
        if (dp == NULL)
                return NULL;

        for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
                if (pr->pr_type && pr->pr_type == type)
                        return pr;

        return NULL;
}

const struct protosw *
pffindproto(int family, int protocol, int type)
{
        struct domain *dp;
        const struct protosw *pr;
        const struct protosw *maybe = NULL;

        if (family == 0)
                return NULL;

        dp = pffinddomain(family);
        if (dp == NULL)
                return NULL;

        for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
                if ((pr->pr_protocol == protocol) && (pr->pr_type == type))
                        return pr;

                if (type == SOCK_RAW && pr->pr_type == SOCK_RAW &&
                    pr->pr_protocol == 0 && maybe == NULL)
                        maybe = pr;
        }
        return maybe;
}

void *
sockaddr_addr(struct sockaddr *sa, socklen_t *slenp)
{
        const struct domain *dom;

        if ((dom = pffinddomain(sa->sa_family)) == NULL ||
            dom->dom_sockaddr_addr == NULL)
                return NULL;

        return (*dom->dom_sockaddr_addr)(sa, slenp);
}

const void *
sockaddr_const_addr(const struct sockaddr *sa, socklen_t *slenp)
{
        const struct domain *dom;

        if ((dom = pffinddomain(sa->sa_family)) == NULL ||
            dom->dom_sockaddr_const_addr == NULL)
                return NULL;

        return (*dom->dom_sockaddr_const_addr)(sa, slenp);
}

const struct sockaddr *
sockaddr_any_by_family(sa_family_t family)
{
        const struct domain *dom;

        if ((dom = pffinddomain(family)) == NULL)
                return NULL;

        return dom->dom_sa_any;
}

const struct sockaddr *
sockaddr_any(const struct sockaddr *sa)
{
        return sockaddr_any_by_family(sa->sa_family);
}

const void *
sockaddr_anyaddr(const struct sockaddr *sa, socklen_t *slenp)
{
        const struct sockaddr *any;

        if ((any = sockaddr_any(sa)) == NULL)
                return NULL;

        return sockaddr_const_addr(any, slenp);
}

socklen_t
sockaddr_getsize_by_family(sa_family_t af)
{
        switch (af) {
        case AF_INET:
                return sizeof(struct sockaddr_in);
        case AF_INET6:
                return sizeof(struct sockaddr_in6);
        case AF_UNIX:
                return sizeof(struct sockaddr_un);
        case AF_LINK:
                return sizeof(struct sockaddr_dl);
        case AF_APPLETALK:
                return sizeof(struct sockaddr_at);
        default:
#ifdef DIAGNOSTIC
                printf("%s: (%s:%u:%u) Unhandled address family=%hhu\n",
                    __func__, curlwp->l_proc->p_comm,
                    curlwp->l_proc->p_pid, curlwp->l_lid, af);
#endif
                return 0;
        }
}

#ifdef DIAGNOSTIC
static void
sockaddr_checklen(const struct sockaddr *sa)
{
        // Can't tell how much was allocated, if it was allocated.
        if (sa->sa_family == AF_LINK)
                return;

        socklen_t len = sockaddr_getsize_by_family(sa->sa_family);
        if (len == 0 || len == sa->sa_len)
                return;

        char buf[512];
        sockaddr_format(sa, buf, sizeof(buf));
        printf("%s: %p bad len af=%hhu socklen=%hhu len=%u [%s]\n",
            __func__, sa, sa->sa_family, sa->sa_len, (unsigned)len, buf);
}
#else
#define sockaddr_checklen(sa) ((void)0)
#endif

struct sockaddr *
sockaddr_alloc(sa_family_t af, socklen_t socklen, int flags)
{
        struct sockaddr *sa;
        socklen_t reallen = MAX(socklen, offsetof(struct sockaddr, sa_data[0]));

        if ((sa = malloc(reallen, M_SOCKADDR, flags)) == NULL)
                return NULL;

        sa->sa_family = af;
        sa->sa_len = reallen;
        sockaddr_checklen(sa);
        return sa;
}

struct sockaddr *
sockaddr_copy(struct sockaddr *dst, socklen_t socklen,
    const struct sockaddr *src)
{
        if (__predict_false(socklen < src->sa_len)) {
                panic("%s: source too long, %d < %d bytes", __func__, socklen,
                    src->sa_len);
        }
        sockaddr_checklen(src);
        return memcpy(dst, src, src->sa_len);
}

struct sockaddr *
sockaddr_externalize(struct sockaddr *dst, socklen_t socklen,
    const struct sockaddr *src)
{
        struct domain *dom;

        dom = pffinddomain(src->sa_family);

        if (dom != NULL && dom->dom_sockaddr_externalize != NULL)
                return (*dom->dom_sockaddr_externalize)(dst, socklen, src);

        return sockaddr_copy(dst, socklen, src);
}

int
sockaddr_cmp(const struct sockaddr *sa1, const struct sockaddr *sa2)
{
        int len, rc;
        struct domain *dom;

        if (sa1->sa_family != sa2->sa_family)
                return sa1->sa_family - sa2->sa_family;

        dom = pffinddomain(sa1->sa_family);

        if (dom != NULL && dom->dom_sockaddr_cmp != NULL)
                return (*dom->dom_sockaddr_cmp)(sa1, sa2);

        len = MIN(sa1->sa_len, sa2->sa_len);

        if (dom == NULL || dom->dom_sa_cmplen == 0) {
                if ((rc = memcmp(sa1, sa2, len)) != 0)
                        return rc;
                return sa1->sa_len - sa2->sa_len;
        }

        if ((rc = memcmp((const char *)sa1 + dom->dom_sa_cmpofs,
                         (const char *)sa2 + dom->dom_sa_cmpofs,
                         MIN(dom->dom_sa_cmplen,
                             len - MIN(len, dom->dom_sa_cmpofs)))) != 0)
                return rc;

        return MIN(dom->dom_sa_cmplen + dom->dom_sa_cmpofs, sa1->sa_len) -
               MIN(dom->dom_sa_cmplen + dom->dom_sa_cmpofs, sa2->sa_len);
}

struct sockaddr *
sockaddr_dup(const struct sockaddr *src, int flags)
{
        struct sockaddr *dst;

        if ((dst = sockaddr_alloc(src->sa_family, src->sa_len, flags)) == NULL)
                return NULL;

        return sockaddr_copy(dst, dst->sa_len, src);
}

void
sockaddr_free(struct sockaddr *sa)
{
        free(sa, M_SOCKADDR);
}

static int
sun_print(char *buf, size_t len, const void *v)
{
        const struct sockaddr_un *sun = v;
        size_t plen;

        KASSERT(sun->sun_len >= offsetof(struct sockaddr_un, sun_path[0]));
        plen = sun->sun_len - offsetof(struct sockaddr_un, sun_path[0]);

        len = MIN(len, plen);

        return snprintf(buf, len, "%s", sun->sun_path);
}

int
sockaddr_format(const struct sockaddr *sa, char *buf, size_t len)
{
        size_t plen = 0;

        if (sa == NULL)
                return strlcpy(buf, "(null)", len);

        switch (sa->sa_family) {
        case AF_LOCAL:
                plen = strlcpy(buf, "unix: ", len);
                break;
        case AF_INET:
                plen = strlcpy(buf, "inet: ", len);
                break;
        case AF_INET6:
                plen = strlcpy(buf, "inet6: ", len);
                break;
        case AF_LINK:
                plen = strlcpy(buf, "link: ", len);
                break;
        case AF_APPLETALK:
                plen = strlcpy(buf, "atalk: ", len);
                break;
        default:
                return snprintf(buf, len, "(unknown socket family %d)",
                    (int)sa->sa_family);
        }

        buf += plen;
        if (plen > len)
                len = 0;
        else
                len -= plen;

        switch (sa->sa_family) {
        case AF_LOCAL:
                return sun_print(buf, len, sa);
        case AF_INET:
                return sin_print(buf, len, sa);
        case AF_INET6:
                return sin6_print(buf, len, sa);
        case AF_LINK:
                return sdl_print(buf, len, sa);
        case AF_APPLETALK:
                return sat_print(buf, len, sa);
        default:
                panic("bad family %hhu", sa->sa_family);
        }
}

/*
 * sysctl helper to stuff PF_LOCAL pcbs into sysctl structures
 */
static void
sysctl_dounpcb(struct kinfo_pcb *pcb, const struct socket *so)
{
        const bool allowaddr = get_expose_address(curproc);
        struct unpcb *unp = sotounpcb(so);
        struct sockaddr_un *un = unp->unp_addr;

        memset(pcb, 0, sizeof(*pcb));

        pcb->ki_family = so->so_proto->pr_domain->dom_family;
        pcb->ki_type = so->so_proto->pr_type;
        pcb->ki_protocol = so->so_proto->pr_protocol;
        pcb->ki_pflags = unp->unp_flags;

        COND_SET_VALUE(pcb->ki_pcbaddr, PTRTOUINT64(unp), allowaddr);
        /* pcb->ki_ppcbaddr = unp has no ppcb... */
        COND_SET_VALUE(pcb->ki_sockaddr, PTRTOUINT64(so), allowaddr);

        pcb->ki_sostate = so->so_state;
        /* pcb->ki_prstate = unp has no state... */

        pcb->ki_rcvq = so->so_rcv.sb_cc;
        pcb->ki_sndq = so->so_snd.sb_cc;

        un = (struct sockaddr_un *)pcb->ki_spad;
        /*
         * local domain sockets may bind without having a local
         * endpoint.  bleah!
         */
        if (unp->unp_addr != NULL) {
                /*
                 * We've added one to sun_len when allocating to
                 * hold terminating NUL which we want here.  See
                 * makeun().
                 */
                memcpy(un, unp->unp_addr,
                    uimin(sizeof(pcb->ki_spad), unp->unp_addr->sun_len + 1));
        }
        else {
                un->sun_len = offsetof(struct sockaddr_un, sun_path);
                un->sun_family = pcb->ki_family;
        }
        if (unp->unp_conn != NULL) {
                un = (struct sockaddr_un *)pcb->ki_dpad;
                if (unp->unp_conn->unp_addr != NULL) {
                        memcpy(un, unp->unp_conn->unp_addr,
                            uimin(sizeof(pcb->ki_dpad), unp->unp_conn->unp_addr->sun_len + 1));
                }
                else {
                        un->sun_len = offsetof(struct sockaddr_un, sun_path);
                        un->sun_family = pcb->ki_family;
                }
        }

        pcb->ki_inode = unp->unp_ino;
        COND_SET_VALUE(pcb->ki_vnode, PTRTOUINT64(unp->unp_vnode), allowaddr);
        COND_SET_VALUE(pcb->ki_conn, PTRTOUINT64(unp->unp_conn), allowaddr);
        COND_SET_VALUE(pcb->ki_refs, PTRTOUINT64(unp->unp_refs), allowaddr);
        COND_SET_VALUE(pcb->ki_nextref, PTRTOUINT64(unp->unp_nextref),
            allowaddr);
}

static int
sysctl_unpcblist(SYSCTLFN_ARGS)
{
        struct file *fp, *np, *dfp;
        struct socket *so;
        struct kinfo_pcb pcb;
        char *dp;
        size_t len, needed, elem_size, out_size;
        int error, elem_count, pf, type;

        if (namelen == 1 && name[0] == CTL_QUERY)
                return sysctl_query(SYSCTLFN_CALL(rnode));

        if (namelen != 4)
                return EINVAL;

        if (oldp != NULL) {
                len = *oldlenp;
                elem_size = name[2];
                elem_count = name[3];
                if (elem_size != sizeof(pcb))
                        return EINVAL;
        } else {
                len = 0;
                elem_size = sizeof(pcb);
                elem_count = INT_MAX;
        }
        error = 0;
        dp = oldp;
        out_size = elem_size;
        needed = 0;

        if (name - oname != 4)
                return EINVAL;

        pf = oname[1];
        type = oname[2];

        /*
         * allocate dummy file descriptor to make position in list.
         */
        sysctl_unlock();
        if ((dfp = fgetdummy()) == NULL) {
                 sysctl_relock();
                return ENOMEM;
        }

        /*
         * there's no "list" of local domain sockets, so we have
         * to walk the file list looking for them.  :-/
         */
        mutex_enter(&filelist_lock);
        LIST_FOREACH_SAFE(fp, &filehead, f_list, np) {
                if (fp->f_count == 0 || fp->f_type != DTYPE_SOCKET ||
                    fp->f_socket == NULL)
                        continue;
                so = fp->f_socket;
                if (so->so_type != type)
                        continue;
                if (so->so_proto->pr_domain->dom_family != pf)
                        continue;
                if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET,
                    KAUTH_REQ_NETWORK_SOCKET_CANSEE, so, NULL, NULL) != 0)
                        continue;
                if (len >= elem_size && elem_count > 0) {
                        mutex_enter(&fp->f_lock);
                        /*
                         * Do not add references, if the count reached 0.
                         * Since the check above has been performed without
                         * locking, it must be rechecked here as a concurrent
                         * closef could have reduced it.
                         */
                        if (fp->f_count == 0) {
                                mutex_exit(&fp->f_lock);
                                continue;
                        }
                        fp->f_count++;
                        mutex_exit(&fp->f_lock);
                        LIST_INSERT_AFTER(fp, dfp, f_list);
                        mutex_exit(&filelist_lock);
                        sysctl_dounpcb(&pcb, so);
                        error = copyout(&pcb, dp, out_size);
                        closef(fp);
                        mutex_enter(&filelist_lock);
                        np = LIST_NEXT(dfp, f_list);
                        LIST_REMOVE(dfp, f_list);
                        if (error)
                                break;
                        dp += elem_size;
                        len -= elem_size;
                }
                needed += elem_size;
                if (elem_count > 0 && elem_count != INT_MAX)
                        elem_count--;
        }
        mutex_exit(&filelist_lock);
        fputdummy(dfp);
         *oldlenp = needed;
        if (oldp == NULL)
                *oldlenp += PCB_SLOP * sizeof(struct kinfo_pcb);
         sysctl_relock();

        return error;
}

static void
sysctl_net_setup(void)
{

        KASSERT(domain_sysctllog == NULL);
        sysctl_createv(&domain_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "local",
                       SYSCTL_DESCR("PF_LOCAL related settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_LOCAL, CTL_EOL);
        sysctl_createv(&domain_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "stream",
                       SYSCTL_DESCR("SOCK_STREAM settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_LOCAL, SOCK_STREAM, CTL_EOL);
        sysctl_createv(&domain_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "seqpacket",
                       SYSCTL_DESCR("SOCK_SEQPACKET settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_LOCAL, SOCK_SEQPACKET, CTL_EOL);
        sysctl_createv(&domain_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "dgram",
                       SYSCTL_DESCR("SOCK_DGRAM settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_LOCAL, SOCK_DGRAM, CTL_EOL);

        sysctl_createv(&domain_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "pcblist",
                       SYSCTL_DESCR("SOCK_STREAM protocol control block list"),
                       sysctl_unpcblist, 0, NULL, 0,
                       CTL_NET, PF_LOCAL, SOCK_STREAM, CTL_CREATE, CTL_EOL);
        sysctl_createv(&domain_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "pcblist",
                       SYSCTL_DESCR("SOCK_SEQPACKET protocol control "
                                    "block list"),
                       sysctl_unpcblist, 0, NULL, 0,
                       CTL_NET, PF_LOCAL, SOCK_SEQPACKET, CTL_CREATE, CTL_EOL);
        sysctl_createv(&domain_sysctllog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "pcblist",
                       SYSCTL_DESCR("SOCK_DGRAM protocol control block list"),
                       sysctl_unpcblist, 0, NULL, 0,
                       CTL_NET, PF_LOCAL, SOCK_DGRAM, CTL_CREATE, CTL_EOL);
}

void
pfctlinput(int cmd, const struct sockaddr *sa)
{
        struct domain *dp;
        const struct protosw *pr;

        DOMAIN_FOREACH(dp) {
                for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
                        if (pr->pr_ctlinput != NULL)
                                (*pr->pr_ctlinput)(cmd, sa, NULL);
                }
        }
}

void
pfctlinput2(int cmd, const struct sockaddr *sa, void *ctlparam)
{
        struct domain *dp;
        const struct protosw *pr;

        if (sa == NULL)
                return;

        DOMAIN_FOREACH(dp) {
                /*
                 * the check must be made by xx_ctlinput() anyways, to
                 * make sure we use data item pointed to by ctlparam in
                 * correct way.  the following check is made just for safety.
                 */
                if (dp->dom_family != sa->sa_family)
                        continue;

                for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) {
                        if (pr->pr_ctlinput != NULL)
                                (*pr->pr_ctlinput)(cmd, sa, ctlparam);
                }
        }
}

void
pfslowtimo(void *arg)
{
        struct domain *dp;
        const struct protosw *pr;

        pfslowtimo_now++;

        DOMAIN_FOREACH(dp) {
                for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
                        if (pr->pr_slowtimo)
                                (*pr->pr_slowtimo)();
        }
        callout_schedule(&pfslowtimo_ch, hz / PR_SLOWHZ);
}

void
pffasttimo(void *arg)
{
        struct domain *dp;
        const struct protosw *pr;

        pffasttimo_now++;

        DOMAIN_FOREACH(dp) {
                for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
                        if (pr->pr_fasttimo)
                                (*pr->pr_fasttimo)();
        }
        callout_schedule(&pffasttimo_ch, hz / PR_FASTHZ);
}























































































































































































  363 












    4 










  403 

  403 






































 1151 










 1149 
 1150 














 1154 















 1152 



 1152 



 1150 














 1090 










 1092 

 1090 
 1089 

 1081 
 1084 


  120 
 1083 















   29 



















  127 
  127 







  127 



  119 
  127 
  127 





   65 












   90 













   90 












   25 
   25 
   25 
   10 























   10 
   10 
   16 
   24 




   10 
    3 



   10 



   10 





















































  521 
  527 



























    2 






    2 



    2 
    2 










    2 













   11 

   11 



   11 
   10 

   11 



   11 


















   11 








   11 
   11 

   11 
   11 


















   11 



   11 
   11 

   11 




   11 



    4 
    4 
    4 
    4 

   11 














































  364 


  364 






  364 


  364 










  363 
  359 







  363 

  363 







  349 






  349 

















   15 
   13 
   13 



   13 

   13 






  350 


















  362 
  349 
   31 
















  362 





  362 
  361 







  362 





  349 




   35 






  348 



    4 








   13 




    6 






   13 



    1 













  358 

































  358 










  358 
  102 
  101 


  102 
  102 
  261 









   12 
  261 
  261 


  261 


    7 





































   25 


    3 



















   36 
















   36 



   36 


   35 
















   34 



   34 

   24 
   27 


    1 
    1 


    1 







    1 





    1 




    1 







    1 






































   33 












   34 



   34 
















   31 



   31 

   24 
   25 


    3 
    3 

    2 


    2 






    2 

    2 



    2 




































































  420 








  426 





    8 








  375 
  367 










   19 







   19 




    1 



   19 
   18 















































    1 






    1 



    1 


    1 
















   13 





    2 



    6 

   13 












  442 









  441 



  423 


  383 





  423 



  441 




  433 





  434 










    8 
    8 










    9 



    9 


    9 





































   33 









   33 









   33 
















   31 
   30 

















   31 










    2 

















































   33 

   33 
   32 


   33 


    2 

    2 

   32 
   31 

   30 
   31 


    4 


   30 







   30 







   31 



   30 


   31 


   30 








   29 









   29 
   28 




   28 































    7 

    1 
    1 







    1 
    1 




    1 
    1 

    1 
    1 

    3 

















    8 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
/*        $NetBSD: spec_vnops.c,v 1.214 2022/08/12 21:25:39 riastradh Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)spec_vnops.c        8.15 (Berkeley) 7/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.214 2022/08/12 21:25:39 riastradh Exp $");

#include <sys/param.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/conf.h>
#include <sys/buf.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/vnode_impl.h>
#include <sys/stat.h>
#include <sys/errno.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/file.h>
#include <sys/disklabel.h>
#include <sys/disk.h>
#include <sys/lockf.h>
#include <sys/tty.h>
#include <sys/kauth.h>
#include <sys/fstrans.h>
#include <sys/module.h>
#include <sys/atomic.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>

/*
 * Lock order:
 *
 *        vnode lock
 *        -> device_lock
 *        -> struct vnode::v_interlock
 */

/* symbolic sleep message strings for devices */
const char        devopn[] = "devopn";
const char        devio[] = "devio";
const char        devwait[] = "devwait";
const char        devin[] = "devin";
const char        devout[] = "devout";
const char        devioc[] = "devioc";
const char        devcls[] = "devcls";

#define        SPECHSZ        64
#if        ((SPECHSZ&(SPECHSZ-1)) == 0)
#define        SPECHASH(rdev)        (((rdev>>5)+(rdev))&(SPECHSZ-1))
#else
#define        SPECHASH(rdev)        (((unsigned)((rdev>>5)+(rdev)))%SPECHSZ)
#endif

static vnode_t        *specfs_hash[SPECHSZ];
extern struct mount *dead_rootmount;

/*
 * This vnode operations vector is used for special device nodes
 * created from whole cloth by the kernel.  For the ops vector for
 * vnodes built from special devices found in a filesystem, see (e.g)
 * ffs_specop_entries[] in ffs_vnops.c or the equivalent for other
 * filesystems.
 */

int (**spec_vnodeop_p)(void *);
const struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
        { &vop_default_desc, vn_default_error },
        { &vop_parsepath_desc, genfs_parsepath },        /* parsepath */
        { &vop_lookup_desc, spec_lookup },                /* lookup */
        { &vop_create_desc, genfs_badop },                /* create */
        { &vop_mknod_desc, genfs_badop },                /* mknod */
        { &vop_open_desc, spec_open },                        /* open */
        { &vop_close_desc, spec_close },                /* close */
        { &vop_access_desc, genfs_ebadf },                /* access */
        { &vop_accessx_desc, genfs_ebadf },                /* accessx */
        { &vop_getattr_desc, genfs_ebadf },                /* getattr */
        { &vop_setattr_desc, genfs_ebadf },                /* setattr */
        { &vop_read_desc, spec_read },                        /* read */
        { &vop_write_desc, spec_write },                /* write */
        { &vop_fallocate_desc, genfs_eopnotsupp },        /* fallocate */
        { &vop_fdiscard_desc, spec_fdiscard },                /* fdiscard */
        { &vop_fcntl_desc, genfs_fcntl },                /* fcntl */
        { &vop_ioctl_desc, spec_ioctl },                /* ioctl */
        { &vop_poll_desc, spec_poll },                        /* poll */
        { &vop_kqfilter_desc, spec_kqfilter },                /* kqfilter */
        { &vop_revoke_desc, genfs_revoke },                /* revoke */
        { &vop_mmap_desc, spec_mmap },                        /* mmap */
        { &vop_fsync_desc, spec_fsync },                /* fsync */
        { &vop_seek_desc, spec_seek },                        /* seek */
        { &vop_remove_desc, genfs_badop },                /* remove */
        { &vop_link_desc, genfs_badop },                /* link */
        { &vop_rename_desc, genfs_badop },                /* rename */
        { &vop_mkdir_desc, genfs_badop },                /* mkdir */
        { &vop_rmdir_desc, genfs_badop },                /* rmdir */
        { &vop_symlink_desc, genfs_badop },                /* symlink */
        { &vop_readdir_desc, genfs_badop },                /* readdir */
        { &vop_readlink_desc, genfs_badop },                /* readlink */
        { &vop_abortop_desc, genfs_badop },                /* abortop */
        { &vop_inactive_desc, spec_inactive },                /* inactive */
        { &vop_reclaim_desc, spec_reclaim },                /* reclaim */
        { &vop_lock_desc, genfs_lock },                        /* lock */
        { &vop_unlock_desc, genfs_unlock },                /* unlock */
        { &vop_bmap_desc, spec_bmap },                        /* bmap */
        { &vop_strategy_desc, spec_strategy },                /* strategy */
        { &vop_print_desc, spec_print },                /* print */
        { &vop_islocked_desc, genfs_islocked },                /* islocked */
        { &vop_pathconf_desc, spec_pathconf },                /* pathconf */
        { &vop_advlock_desc, spec_advlock },                /* advlock */
        { &vop_bwrite_desc, vn_bwrite },                /* bwrite */
        { &vop_getpages_desc, genfs_getpages },                /* getpages */
        { &vop_putpages_desc, genfs_putpages },                /* putpages */
        { NULL, NULL }
};
const struct vnodeopv_desc spec_vnodeop_opv_desc =
        { &spec_vnodeop_p, spec_vnodeop_entries };

static kauth_listener_t rawio_listener;
static struct kcondvar specfs_iocv;

/* Returns true if vnode is /dev/mem or /dev/kmem. */
bool
iskmemvp(struct vnode *vp)
{
        return ((vp->v_type == VCHR) && iskmemdev(vp->v_rdev));
}

/*
 * Returns true if dev is /dev/mem or /dev/kmem.
 */
int
iskmemdev(dev_t dev)
{
        /* mem_no is emitted by config(8) to generated devsw.c */
        extern const int mem_no;

        /* minor 14 is /dev/io on i386 with COMPAT_10 */
        return (major(dev) == mem_no && (minor(dev) < 2 || minor(dev) == 14));
}

static int
rawio_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        int result;

        result = KAUTH_RESULT_DEFER;

        if ((action != KAUTH_DEVICE_RAWIO_SPEC) &&
            (action != KAUTH_DEVICE_RAWIO_PASSTHRU))
                return result;

        /* Access is mandated by permissions. */
        result = KAUTH_RESULT_ALLOW;

        return result;
}

void
spec_init(void)
{

        rawio_listener = kauth_listen_scope(KAUTH_SCOPE_DEVICE,
            rawio_listener_cb, NULL);
        cv_init(&specfs_iocv, "specio");
}

/*
 * spec_io_enter(vp, &sn, &dev)
 *
 *        Enter an operation that may not hold vp's vnode lock or an
 *        fstrans on vp's mount.  Until spec_io_exit, the vnode will not
 *        be revoked.
 *
 *        On success, set sn to the specnode pointer and dev to the dev_t
 *        number and return zero.  Caller must later call spec_io_exit
 *        when done.
 *
 *        On failure, return ENXIO -- the device has been revoked and no
 *        longer exists.
 */
static int
spec_io_enter(struct vnode *vp, struct specnode **snp, dev_t *devp)
{
        dev_t dev;
        struct specnode *sn;
        unsigned iocnt;
        int error = 0;

        mutex_enter(vp->v_interlock);

        /*
         * Extract all the info we need from the vnode, unless the
         * vnode has already been reclaimed.  This can happen if the
         * underlying device has been removed and all the device nodes
         * for it have been revoked.  The caller may not hold a vnode
         * lock or fstrans to prevent this from happening before it has
         * had an opportunity to notice the vnode is dead.
         */
        if (vdead_check(vp, VDEAD_NOWAIT) != 0 ||
            (sn = vp->v_specnode) == NULL ||
            (dev = vp->v_rdev) == NODEV) {
                error = ENXIO;
                goto out;
        }

        /*
         * Notify spec_close that we are doing an I/O operation which
         * may not be not bracketed by fstrans(9) and thus is not
         * blocked by vfs suspension.
         *
         * We could hold this reference with psref(9) instead, but we
         * already have to take the interlock for vdead_check, so
         * there's not much more cost here to another atomic operation.
         */
        do {
                iocnt = atomic_load_relaxed(&sn->sn_dev->sd_iocnt);
                if (__predict_false(iocnt == UINT_MAX)) {
                        /*
                         * The I/O count is limited by the number of
                         * LWPs (which will never overflow this) --
                         * unless one driver uses another driver via
                         * specfs, which is rather unusual, but which
                         * could happen via pud(4) userspace drivers.
                         * We could use a 64-bit count, but can't use
                         * atomics for that on all platforms.
                         * (Probably better to switch to psref or
                         * localcount instead.)
                         */
                        error = EBUSY;
                        goto out;
                }
        } while (atomic_cas_uint(&sn->sn_dev->sd_iocnt, iocnt, iocnt + 1)
            != iocnt);

        /* Success!  */
        *snp = sn;
        *devp = dev;
        error = 0;

out:        mutex_exit(vp->v_interlock);
        return error;
}

/*
 * spec_io_exit(vp, sn)
 *
 *        Exit an operation entered with a successful spec_io_enter --
 *        allow concurrent spec_node_revoke to proceed.  The argument sn
 *        must match the struct specnode pointer returned by spec_io_exit
 *        for vp.
 */
static void
spec_io_exit(struct vnode *vp, struct specnode *sn)
{
        struct specdev *sd = sn->sn_dev;
        unsigned iocnt;

        KASSERT(vp->v_specnode == sn);

        /*
         * We are done.  Notify spec_close if appropriate.  The
         * transition of 1 -> 0 must happen under device_lock so
         * spec_close doesn't miss a wakeup.
         */
        do {
                iocnt = atomic_load_relaxed(&sd->sd_iocnt);
                KASSERT(iocnt > 0);
                if (iocnt == 1) {
                        mutex_enter(&device_lock);
                        if (atomic_dec_uint_nv(&sd->sd_iocnt) == 0)
                                cv_broadcast(&specfs_iocv);
                        mutex_exit(&device_lock);
                        break;
                }
        } while (atomic_cas_uint(&sd->sd_iocnt, iocnt, iocnt - 1) != iocnt);
}

/*
 * spec_io_drain(sd)
 *
 *        Wait for all existing spec_io_enter/exit sections to complete.
 *        Caller must ensure spec_io_enter will fail at this point.
 */
static void
spec_io_drain(struct specdev *sd)
{

        /*
         * I/O at the same time as closing is unlikely -- it often
         * indicates an application bug.
         */
        if (__predict_true(atomic_load_relaxed(&sd->sd_iocnt) == 0))
                return;

        mutex_enter(&device_lock);
        while (atomic_load_relaxed(&sd->sd_iocnt) > 0)
                cv_wait(&specfs_iocv, &device_lock);
        mutex_exit(&device_lock);
}

/*
 * Initialize a vnode that represents a device.
 */
void
spec_node_init(vnode_t *vp, dev_t rdev)
{
        specnode_t *sn;
        specdev_t *sd;
        vnode_t *vp2;
        vnode_t **vpp;

        KASSERT(vp->v_type == VBLK || vp->v_type == VCHR);
        KASSERT(vp->v_specnode == NULL);

        /*
         * Search the hash table for this device.  If known, add a
         * reference to the device structure.  If not known, create
         * a new entry to represent the device.  In all cases add
         * the vnode to the hash table.
         */
        sn = kmem_alloc(sizeof(*sn), KM_SLEEP);
        sd = kmem_alloc(sizeof(*sd), KM_SLEEP);
        mutex_enter(&device_lock);
        vpp = &specfs_hash[SPECHASH(rdev)];
        for (vp2 = *vpp; vp2 != NULL; vp2 = vp2->v_specnext) {
                KASSERT(vp2->v_specnode != NULL);
                if (rdev == vp2->v_rdev && vp->v_type == vp2->v_type) {
                        break;
                }
        }
        if (vp2 == NULL) {
                /* No existing record, create a new one. */
                sd->sd_rdev = rdev;
                sd->sd_mountpoint = NULL;
                sd->sd_lockf = NULL;
                sd->sd_refcnt = 1;
                sd->sd_opencnt = 0;
                sd->sd_bdevvp = NULL;
                sd->sd_iocnt = 0;
                sd->sd_opened = false;
                sd->sd_closing = false;
                sn->sn_dev = sd;
                sd = NULL;
        } else {
                /* Use the existing record. */
                sn->sn_dev = vp2->v_specnode->sn_dev;
                sn->sn_dev->sd_refcnt++;
        }
        /* Insert vnode into the hash chain. */
        sn->sn_opencnt = 0;
        sn->sn_rdev = rdev;
        sn->sn_gone = false;
        vp->v_specnode = sn;
        vp->v_specnext = *vpp;
        *vpp = vp;
        mutex_exit(&device_lock);

        /* Free the record we allocated if unused. */
        if (sd != NULL) {
                kmem_free(sd, sizeof(*sd));
        }
}

/*
 * Lookup a vnode by device number and return it referenced.
 */
int
spec_node_lookup_by_dev(enum vtype type, dev_t dev, int flags, vnode_t **vpp)
{
        int error;
        vnode_t *vp;

top:        mutex_enter(&device_lock);
        for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
                if (type == vp->v_type && dev == vp->v_rdev) {
                        mutex_enter(vp->v_interlock);
                        /* If clean or being cleaned, then ignore it. */
                        if (vdead_check(vp, VDEAD_NOWAIT) == 0)
                                break;
                        if ((flags & VDEAD_NOWAIT) == 0) {
                                mutex_exit(&device_lock);
                                /*
                                 * It may be being revoked as we speak,
                                 * and the caller wants to wait until
                                 * all revocation has completed.  Let
                                 * vcache_vget wait for it to finish
                                 * dying; as a side effect, vcache_vget
                                 * releases vp->v_interlock.  Note that
                                 * vcache_vget cannot succeed at this
                                 * point because vdead_check already
                                 * failed.
                                 */
                                error = vcache_vget(vp);
                                KASSERT(error);
                                goto top;
                        }
                        mutex_exit(vp->v_interlock);
                }
        }
        KASSERT(vp == NULL || mutex_owned(vp->v_interlock));
        if (vp == NULL) {
                mutex_exit(&device_lock);
                return ENOENT;
        }
        /*
         * If it is an opened block device return the opened vnode.
         */
        if (type == VBLK && vp->v_specnode->sn_dev->sd_bdevvp != NULL) {
                mutex_exit(vp->v_interlock);
                vp = vp->v_specnode->sn_dev->sd_bdevvp;
                mutex_enter(vp->v_interlock);
        }
        mutex_exit(&device_lock);
        error = vcache_vget(vp);
        if (error != 0)
                return error;
        *vpp = vp;

        return 0;
}

/*
 * Lookup a vnode by file system mounted on and return it referenced.
 */
int
spec_node_lookup_by_mount(struct mount *mp, vnode_t **vpp)
{
        int i, error;
        vnode_t *vp, *vq;

        mutex_enter(&device_lock);
        for (i = 0, vq = NULL; i < SPECHSZ && vq == NULL; i++) {
                for (vp = specfs_hash[i]; vp; vp = vp->v_specnext) {
                        if (vp->v_type != VBLK)
                                continue;
                        vq = vp->v_specnode->sn_dev->sd_bdevvp;
                        if (vq != NULL &&
                            vq->v_specnode->sn_dev->sd_mountpoint == mp)
                                break;
                        vq = NULL;
                }
        }
        if (vq == NULL) {
                mutex_exit(&device_lock);
                return ENOENT;
        }
        mutex_enter(vq->v_interlock);
        mutex_exit(&device_lock);
        error = vcache_vget(vq);
        if (error != 0)
                return error;
        *vpp = vq;

        return 0;

}

/*
 * Get the file system mounted on this block device.
 *
 * XXX Caller should hold the vnode lock -- shared or exclusive -- so
 * that this can't changed, and the vnode can't be revoked while we
 * examine it.  But not all callers do, and they're scattered through a
 * lot of file systems, so we can't assert this yet.
 */
struct mount *
spec_node_getmountedfs(vnode_t *devvp)
{
        struct mount *mp;

        KASSERT(devvp->v_type == VBLK);
        mp = devvp->v_specnode->sn_dev->sd_mountpoint;

        return mp;
}

/*
 * Set the file system mounted on this block device.
 *
 * XXX Caller should hold the vnode lock exclusively so this can't be
 * changed or assumed by spec_node_getmountedfs while we change it, and
 * the vnode can't be revoked while we handle it.  But not all callers
 * do, and they're scattered through a lot of file systems, so we can't
 * assert this yet.  Instead, for now, we'll take an I/O reference so
 * at least the ioctl doesn't race with revoke/detach.
 *
 * If you do change this to assert an exclusive vnode lock, you must
 * also do vdead_check before trying bdev_ioctl, because the vnode may
 * have been revoked by the time the caller locked it, and this is
 * _not_ a vop -- calls to spec_node_setmountedfs don't go through
 * v_op, so revoking the vnode doesn't prevent further calls.
 *
 * XXX Caller should additionally have the vnode open, at least if mp
 * is nonnull, but I'm not sure all callers do that -- need to audit.
 * Currently udf closes the vnode before clearing the mount.
 */
void
spec_node_setmountedfs(vnode_t *devvp, struct mount *mp)
{
        struct dkwedge_info dkw;
        struct specnode *sn;
        dev_t dev;
        int error;

        KASSERT(devvp->v_type == VBLK);

        error = spec_io_enter(devvp, &sn, &dev);
        if (error)
                return;

        KASSERT(sn->sn_dev->sd_mountpoint == NULL || mp == NULL);
        sn->sn_dev->sd_mountpoint = mp;
        if (mp == NULL)
                goto out;

        error = bdev_ioctl(dev, DIOCGWEDGEINFO, &dkw, FREAD, curlwp);
        if (error)
                goto out;

        strlcpy(mp->mnt_stat.f_mntfromlabel, dkw.dkw_wname,
            sizeof(mp->mnt_stat.f_mntfromlabel));

out:        spec_io_exit(devvp, sn);
}

/*
 * A vnode representing a special device is going away.  Close
 * the device if the vnode holds it open.
 */
void
spec_node_revoke(vnode_t *vp)
{
        specnode_t *sn;
        specdev_t *sd;
        struct vnode **vpp;

        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);

        sn = vp->v_specnode;
        sd = sn->sn_dev;

        KASSERT(vp->v_type == VBLK || vp->v_type == VCHR);
        KASSERT(vp->v_specnode != NULL);
        KASSERT(sn->sn_gone == false);

        mutex_enter(&device_lock);
        KASSERTMSG(sn->sn_opencnt <= sd->sd_opencnt,
            "sn_opencnt=%u > sd_opencnt=%u",
            sn->sn_opencnt, sd->sd_opencnt);
        sn->sn_gone = true;
        if (sn->sn_opencnt != 0) {
                sd->sd_opencnt -= (sn->sn_opencnt - 1);
                sn->sn_opencnt = 1;
                mutex_exit(&device_lock);

                VOP_CLOSE(vp, FNONBLOCK, NOCRED);

                mutex_enter(&device_lock);
                KASSERT(sn->sn_opencnt == 0);
        }

        /*
         * We may have revoked the vnode in this thread while another
         * thread was in the middle of spec_close, in the window when
         * spec_close releases the vnode lock to call .d_close for the
         * last close.  In that case, wait for the concurrent
         * spec_close to complete.
         */
        while (sd->sd_closing)
                cv_wait(&specfs_iocv, &device_lock);

        /*
         * Remove from the hash so lookups stop returning this
         * specnode.  We will dissociate it from the specdev -- and
         * possibly free the specdev -- in spec_node_destroy.
         */
        KASSERT(sn->sn_gone);
        KASSERT(sn->sn_opencnt == 0);
        for (vpp = &specfs_hash[SPECHASH(vp->v_rdev)];;
             vpp = &(*vpp)->v_specnext) {
                if (*vpp == vp) {
                        *vpp = vp->v_specnext;
                        vp->v_specnext = NULL;
                        break;
                }
        }
        mutex_exit(&device_lock);
}

/*
 * A vnode representing a special device is being recycled.
 * Destroy the specfs component.
 */
void
spec_node_destroy(vnode_t *vp)
{
        specnode_t *sn;
        specdev_t *sd;
        int refcnt;

        sn = vp->v_specnode;
        sd = sn->sn_dev;

        KASSERT(vp->v_type == VBLK || vp->v_type == VCHR);
        KASSERT(vp->v_specnode != NULL);
        KASSERT(sn->sn_opencnt == 0);

        mutex_enter(&device_lock);
        sn = vp->v_specnode;
        vp->v_specnode = NULL;
        refcnt = sd->sd_refcnt--;
        KASSERT(refcnt > 0);
        mutex_exit(&device_lock);

        /* If the device is no longer in use, destroy our record. */
        if (refcnt == 1) {
                KASSERT(sd->sd_iocnt == 0);
                KASSERT(sd->sd_opencnt == 0);
                KASSERT(sd->sd_bdevvp == NULL);
                kmem_free(sd, sizeof(*sd));
        }
        kmem_free(sn, sizeof(*sn));
}

/*
 * Trivial lookup routine that always fails.
 */
int
spec_lookup(void *v)
{
        struct vop_lookup_v2_args /* {
                struct vnode *a_dvp;
                struct vnode **a_vpp;
                struct componentname *a_cnp;
        } */ *ap = v;

        *ap->a_vpp = NULL;
        return (ENOTDIR);
}

typedef int (*spec_ioctl_t)(dev_t, u_long, void *, int, struct lwp *);

/*
 * Open a special file.
 */
/* ARGSUSED */
int
spec_open(void *v)
{
        struct vop_open_args /* {
                struct vnode *a_vp;
                int  a_mode;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct lwp *l = curlwp;
        struct vnode *vp = ap->a_vp;
        dev_t dev, dev1;
        int error;
        enum kauth_device_req req;
        specnode_t *sn, *sn1;
        specdev_t *sd;
        spec_ioctl_t ioctl;
        u_int gen = 0;
        const char *name = NULL;
        bool needclose = false;
        struct partinfo pi;

        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
        KASSERTMSG(vp->v_type == VBLK || vp->v_type == VCHR, "type=%d",
            vp->v_type);

        dev = vp->v_rdev;
        sn = vp->v_specnode;
        sd = sn->sn_dev;

        /*
         * Don't allow open if fs is mounted -nodev.
         */
        if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
                return (ENXIO);

        switch (ap->a_mode & (FREAD | FWRITE)) {
        case FREAD | FWRITE:
                req = KAUTH_REQ_DEVICE_RAWIO_SPEC_RW;
                break;
        case FWRITE:
                req = KAUTH_REQ_DEVICE_RAWIO_SPEC_WRITE;
                break;
        default:
                req = KAUTH_REQ_DEVICE_RAWIO_SPEC_READ;
                break;
        }
        error = kauth_authorize_device_spec(ap->a_cred, req, vp);
        if (error != 0)
                return (error);

        /*
         * Acquire an open reference -- as long as we hold onto it, and
         * the vnode isn't revoked, it can't be closed, and the vnode
         * can't be revoked until we release the vnode lock.
         */
        mutex_enter(&device_lock);
        KASSERT(!sn->sn_gone);
        switch (vp->v_type) {
        case VCHR:
                /*
                 * Character devices can accept opens from multiple
                 * vnodes.  But first, wait for any close to finish.
                 * Wait under the vnode lock so we don't have to worry
                 * about the vnode being revoked while we wait.
                 */
                while (sd->sd_closing) {
                        error = cv_wait_sig(&specfs_iocv, &device_lock);
                        if (error)
                                break;
                }
                if (error)
                        break;
                sd->sd_opencnt++;
                sn->sn_opencnt++;
                KASSERTMSG(sn->sn_opencnt <= sd->sd_opencnt,
                    "sn_opencnt=%u > sd_opencnt=%u",
                    sn->sn_opencnt, sd->sd_opencnt);
                break;
        case VBLK:
                /*
                 * For block devices, permit only one open.  The buffer
                 * cache cannot remain self-consistent with multiple
                 * vnodes holding a block device open.
                 *
                 * Treat zero opencnt with non-NULL mountpoint as open.
                 * This may happen after forced detach of a mounted device.
                 *
                 * Also treat sd_closing, meaning there is a concurrent
                 * close in progress, as still open.
                 */
                if (sd->sd_opencnt != 0 ||
                    sd->sd_mountpoint != NULL ||
                    sd->sd_closing) {
                        error = EBUSY;
                        break;
                }
                KASSERTMSG(sn->sn_opencnt == 0, "sn_opencnt=%u",
                    sn->sn_opencnt);
                sn->sn_opencnt = 1;
                sd->sd_opencnt = 1;
                sd->sd_bdevvp = vp;
                break;
        default:
                panic("invalid specfs vnode type: %d", vp->v_type);
        }
        mutex_exit(&device_lock);
        if (error)
                return error;

        /*
         * Set VV_ISTTY if this is a tty cdev.
         *
         * XXX This does the wrong thing if the module has to be
         * autoloaded.  We should maybe set this after autoloading
         * modules and calling .d_open successfully, except (a) we need
         * the vnode lock to touch it, and (b) once we acquire the
         * vnode lock again, the vnode may have been revoked, and
         * deadfs's dead_read needs VV_ISTTY to be already set in order
         * to return the right answer.  So this needs some additional
         * synchronization to be made to work correctly with tty driver
         * module autoload.  For now, let's just hope it doesn't cause
         * too much trouble for a tty from an autoloaded driver module
         * to fail with EIO instead of returning EOF.
         */
        if (vp->v_type == VCHR) {
                if (cdev_type(dev) == D_TTY)
                        vp->v_vflag |= VV_ISTTY;
        }

        /*
         * Because opening the device may block indefinitely, e.g. when
         * opening a tty, and loading a module may cross into many
         * other subsystems, we must not hold the vnode lock while
         * calling .d_open, so release it now and reacquire it when
         * done.
         *
         * Take an I/O reference so that any concurrent spec_close via
         * spec_node_revoke will wait for us to finish calling .d_open.
         * The vnode can't be dead at this point because we have it
         * locked.  Note that if revoked, the driver must interrupt
         * .d_open before spec_close starts waiting for I/O to drain so
         * this doesn't deadlock.
         */
        VOP_UNLOCK(vp);
        error = spec_io_enter(vp, &sn1, &dev1);
        if (error) {
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                return error;
        }
        KASSERT(sn1 == sn);
        KASSERT(dev1 == dev);

        /*
         * Open the device.  If .d_open returns ENXIO (device not
         * configured), the driver may not be loaded, so try
         * autoloading a module and then try .d_open again if anything
         * got loaded.
         */
        switch (vp->v_type) {
        case VCHR:
                do {
                        const struct cdevsw *cdev;

                        gen = module_gen;
                        error = cdev_open(dev, ap->a_mode, S_IFCHR, l);
                        if (error != ENXIO)
                                break;
                        
                        /* Check if we already have a valid driver */
                        mutex_enter(&device_lock);
                        cdev = cdevsw_lookup(dev);
                        mutex_exit(&device_lock);
                        if (cdev != NULL)
                                break;

                        /* Get device name from devsw_conv array */
                        if ((name = cdevsw_getname(major(dev))) == NULL)
                                break;
                        
                        /* Try to autoload device module */
                        (void) module_autoload(name, MODULE_CLASS_DRIVER);
                } while (gen != module_gen);
                break;

        case VBLK:
                do {
                        const struct bdevsw *bdev;

                        gen = module_gen;
                        error = bdev_open(dev, ap->a_mode, S_IFBLK, l);
                        if (error != ENXIO)
                                break;

                        /* Check if we already have a valid driver */
                        mutex_enter(&device_lock);
                        bdev = bdevsw_lookup(dev);
                        mutex_exit(&device_lock);
                        if (bdev != NULL)
                                break;

                        /* Get device name from devsw_conv array */
                        if ((name = bdevsw_getname(major(dev))) == NULL)
                                break;

                        /* Try to autoload device module */
                        (void) module_autoload(name, MODULE_CLASS_DRIVER);
                } while (gen != module_gen);
                break;

        default:
                __unreachable();
        }

        /*
         * Release the I/O reference now that we have called .d_open,
         * and reacquire the vnode lock.  At this point, the device may
         * have been revoked, so we must tread carefully.  However, sn
         * and sd remain valid pointers until we drop our reference.
         */
        spec_io_exit(vp, sn);
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        KASSERT(vp->v_specnode == sn);

        /*
         * If it has been revoked since we released the vnode lock and
         * reacquired it, then spec_node_revoke has closed it, and we
         * must fail with EBADF.
         *
         * Otherwise, if opening it failed, back out and release the
         * open reference.  If it was ever successfully opened and we
         * got the last reference this way, it's now our job to close
         * it.  This might happen in the following scenario:
         *
         *        Thread 1                Thread 2
         *        VOP_OPEN
         *          ...
         *          .d_open -> 0 (success)
         *          acquire vnode lock
         *          do stuff                VOP_OPEN
         *          release vnode lock        ...
         *                                  .d_open -> EBUSY
         *        VOP_CLOSE
         *          acquire vnode lock
         *          --sd_opencnt != 0
         *          => no .d_close
         *          release vnode lock
         *                                  acquire vnode lock
         *                                  --sd_opencnt == 0
         *
         * We can't resolve this by making spec_close wait for .d_open
         * to complete before examining sd_opencnt, because .d_open can
         * hang indefinitely, e.g. for a tty.
         */
        mutex_enter(&device_lock);
        if (sn->sn_gone) {
                if (error == 0)
                        error = EBADF;
        } else if (error == 0) {
                /*
                 * Device has not been revoked, so our opencnt can't
                 * have gone away at this point -- transition to
                 * sn_gone=true happens before transition to
                 * sn_opencnt=0 in spec_node_revoke.
                 */
                KASSERT(sd->sd_opencnt);
                KASSERT(sn->sn_opencnt);
                KASSERTMSG(sn->sn_opencnt <= sd->sd_opencnt,
                    "sn_opencnt=%u > sd_opencnt=%u",
                    sn->sn_opencnt, sd->sd_opencnt);
                KASSERT(!sd->sd_closing);
                sd->sd_opened = true;
        } else if (sd->sd_opencnt == 1 && sd->sd_opened) {
                /*
                 * We're the last reference to a _previous_ open even
                 * though this one failed, so we have to close it.
                 * Don't decrement the reference count here --
                 * spec_close will do that.
                 */
                KASSERT(sn->sn_opencnt == 1);
                needclose = true;
        } else {
                KASSERT(sd->sd_opencnt);
                KASSERT(sn->sn_opencnt);
                KASSERTMSG(sn->sn_opencnt <= sd->sd_opencnt,
                    "sn_opencnt=%u > sd_opencnt=%u",
                    sn->sn_opencnt, sd->sd_opencnt);
                sd->sd_opencnt--;
                sn->sn_opencnt--;
                if (vp->v_type == VBLK)
                        sd->sd_bdevvp = NULL;
        }
        mutex_exit(&device_lock);

        /*
         * If this open failed, but the device was previously opened,
         * and another thread concurrently closed the vnode while we
         * were in the middle of reopening it, the other thread will
         * see sd_opencnt > 0 and thus decide not to call .d_close --
         * it is now our responsibility to do so.
         *
         * XXX The flags passed to VOP_CLOSE here are wrong, but
         * drivers can't rely on FREAD|FWRITE anyway -- e.g., consider
         * a device opened by thread 0 with O_READ, then opened by
         * thread 1 with O_WRITE, then closed by thread 0, and finally
         * closed by thread 1; the last .d_close call will have FWRITE
         * but not FREAD.  We should just eliminate the FREAD/FWRITE
         * parameter to .d_close altogether.
         */
        if (needclose) {
                KASSERT(error);
                VOP_CLOSE(vp, FNONBLOCK, NOCRED);
        }

        /* If anything went wrong, we're done.  */
        if (error)
                return error;

        /*
         * For disk devices, automagically set the vnode size to the
         * partition size, if we can.  This applies to block devices
         * and character devices alike -- every block device must have
         * a corresponding character device.  And if the module is
         * loaded it will remain loaded until we're done here (it is
         * forbidden to devsw_detach until closed).  So it is safe to
         * query cdev_type unconditionally here.
         */
        if (cdev_type(dev) == D_DISK) {
                ioctl = vp->v_type == VCHR ? cdev_ioctl : bdev_ioctl;
                if ((*ioctl)(dev, DIOCGPARTINFO, &pi, FREAD, curlwp) == 0)
                        uvm_vnp_setsize(vp,
                            (voff_t)pi.pi_secsize * pi.pi_size);
        }

        /* Success!  */
        return 0;
}

/*
 * Vnode op for read
 */
/* ARGSUSED */
int
spec_read(void *v)
{
        struct vop_read_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                int  a_ioflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct uio *uio = ap->a_uio;
         struct lwp *l = curlwp;
        struct specnode *sn;
        dev_t dev;
        struct buf *bp;
        daddr_t bn;
        int bsize, bscale;
        struct partinfo pi;
        int n, on;
        int error = 0;
        int i, nra;
        daddr_t lastbn, *rablks;
        int *rasizes;
        int nrablks, ratogo;

        KASSERT(uio->uio_rw == UIO_READ);
        KASSERTMSG(VMSPACE_IS_KERNEL_P(uio->uio_vmspace) ||
                   uio->uio_vmspace == curproc->p_vmspace,
                "vmspace belongs to neither kernel nor curproc");

        if (uio->uio_resid == 0)
                return (0);

        switch (vp->v_type) {

        case VCHR:
                /*
                 * Release the lock while we sleep -- possibly
                 * indefinitely, if this is, e.g., a tty -- in
                 * cdev_read, so we don't hold up everything else that
                 * might want access to the vnode.
                 *
                 * But before we issue the read, take an I/O reference
                 * to the specnode so close will know when we're done
                 * reading.  Note that the moment we release the lock,
                 * the vnode's identity may change; hence spec_io_enter
                 * may fail, and the caller may have a dead vnode on
                 * their hands, if the file system on which vp lived
                 * has been unmounted.
                 */
                VOP_UNLOCK(vp);
                error = spec_io_enter(vp, &sn, &dev);
                if (error)
                        goto out;
                error = cdev_read(dev, uio, ap->a_ioflag);
                spec_io_exit(vp, sn);
out:                vn_lock(vp, LK_SHARED | LK_RETRY);
                return (error);

        case VBLK:
                KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
                if (uio->uio_offset < 0)
                        return (EINVAL);

                if (bdev_ioctl(vp->v_rdev, DIOCGPARTINFO, &pi, FREAD, l) == 0)
                        bsize = imin(imax(pi.pi_bsize, DEV_BSIZE), MAXBSIZE);
                else
                        bsize = BLKDEV_IOSIZE;

                bscale = bsize >> DEV_BSHIFT;

                nra = uimax(16 * MAXPHYS / bsize - 1, 511);
                rablks = kmem_alloc(nra * sizeof(*rablks), KM_SLEEP);
                rasizes = kmem_alloc(nra * sizeof(*rasizes), KM_SLEEP);
                lastbn = ((uio->uio_offset + uio->uio_resid - 1) >> DEV_BSHIFT)
                    &~ (bscale - 1);
                nrablks = ratogo = 0;
                do {
                        bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1);
                        on = uio->uio_offset % bsize;
                        n = uimin((unsigned)(bsize - on), uio->uio_resid);

                        if (ratogo == 0) {
                                nrablks = uimin((lastbn - bn) / bscale, nra);
                                ratogo = nrablks;

                                for (i = 0; i < nrablks; ++i) {
                                        rablks[i] = bn + (i+1) * bscale;
                                        rasizes[i] = bsize;
                                }

                                error = breadn(vp, bn, bsize,
                                               rablks, rasizes, nrablks,
                                               0, &bp);
                        } else {
                                if (ratogo > 0)
                                        --ratogo;
                                error = bread(vp, bn, bsize, 0, &bp);
                        }
                        if (error)
                                break;
                        n = uimin(n, bsize - bp->b_resid);
                        error = uiomove((char *)bp->b_data + on, n, uio);
                        brelse(bp, 0);
                } while (error == 0 && uio->uio_resid > 0 && n != 0);

                kmem_free(rablks, nra * sizeof(*rablks));
                kmem_free(rasizes, nra * sizeof(*rasizes));

                return (error);

        default:
                panic("spec_read type");
        }
        /* NOTREACHED */
}

/*
 * Vnode op for write
 */
/* ARGSUSED */
int
spec_write(void *v)
{
        struct vop_write_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                int  a_ioflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct uio *uio = ap->a_uio;
        struct lwp *l = curlwp;
        struct specnode *sn;
        dev_t dev;
        struct buf *bp;
        daddr_t bn;
        int bsize, bscale;
        struct partinfo pi;
        int n, on;
        int error = 0;

        KASSERT(uio->uio_rw == UIO_WRITE);
        KASSERTMSG(VMSPACE_IS_KERNEL_P(uio->uio_vmspace) ||
                   uio->uio_vmspace == curproc->p_vmspace,
                "vmspace belongs to neither kernel nor curproc");

        switch (vp->v_type) {

        case VCHR:
                /*
                 * Release the lock while we sleep -- possibly
                 * indefinitely, if this is, e.g., a tty -- in
                 * cdev_write, so we don't hold up everything else that
                 * might want access to the vnode.
                 *
                 * But before we issue the write, take an I/O reference
                 * to the specnode so close will know when we're done
                 * writing.  Note that the moment we release the lock,
                 * the vnode's identity may change; hence spec_io_enter
                 * may fail, and the caller may have a dead vnode on
                 * their hands, if the file system on which vp lived
                 * has been unmounted.
                 */
                VOP_UNLOCK(vp);
                error = spec_io_enter(vp, &sn, &dev);
                if (error)
                        goto out;
                error = cdev_write(dev, uio, ap->a_ioflag);
                spec_io_exit(vp, sn);
out:                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                return (error);

        case VBLK:
                KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
                if (uio->uio_resid == 0)
                        return (0);
                if (uio->uio_offset < 0)
                        return (EINVAL);

                if (bdev_ioctl(vp->v_rdev, DIOCGPARTINFO, &pi, FREAD, l) == 0)
                        bsize = imin(imax(pi.pi_bsize, DEV_BSIZE), MAXBSIZE);
                else
                        bsize = BLKDEV_IOSIZE;

                bscale = bsize >> DEV_BSHIFT;
                do {
                        bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1);
                        on = uio->uio_offset % bsize;
                        n = uimin((unsigned)(bsize - on), uio->uio_resid);
                        if (n == bsize)
                                bp = getblk(vp, bn, bsize, 0, 0);
                        else
                                error = bread(vp, bn, bsize, B_MODIFY, &bp);
                        if (error) {
                                return (error);
                        }
                        n = uimin(n, bsize - bp->b_resid);
                        error = uiomove((char *)bp->b_data + on, n, uio);
                        if (error)
                                brelse(bp, 0);
                        else {
                                if (n + on == bsize)
                                        bawrite(bp);
                                else
                                        bdwrite(bp);
                                error = bp->b_error;
                        }
                } while (error == 0 && uio->uio_resid > 0 && n != 0);
                return (error);

        default:
                panic("spec_write type");
        }
        /* NOTREACHED */
}

/*
 * fdiscard, which on disk devices becomes TRIM.
 */
int
spec_fdiscard(void *v)
{
        struct vop_fdiscard_args /* {
                struct vnode *a_vp;
                off_t a_pos;
                off_t a_len;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        dev_t dev;

        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);

        dev = vp->v_rdev;

        switch (vp->v_type) {
            case VCHR:
                // this is not stored for character devices
                //KASSERT(vp == vp->v_specnode->sn_dev->sd_cdevvp);
                return cdev_discard(dev, ap->a_pos, ap->a_len);
            case VBLK:
                KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
                return bdev_discard(dev, ap->a_pos, ap->a_len);
            default:
                panic("spec_fdiscard: not a device\n");
        }
}

/*
 * Device ioctl operation.
 */
/* ARGSUSED */
int
spec_ioctl(void *v)
{
        struct vop_ioctl_args /* {
                struct vnode *a_vp;
                u_long a_command;
                void  *a_data;
                int  a_fflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct specnode *sn;
        dev_t dev;
        int error;

        error = spec_io_enter(vp, &sn, &dev);
        if (error)
                return error;

        switch (vp->v_type) {
        case VCHR:
                error = cdev_ioctl(dev, ap->a_command, ap->a_data,
                    ap->a_fflag, curlwp);
                break;
        case VBLK:
                KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
                error = bdev_ioctl(dev, ap->a_command, ap->a_data,
                   ap->a_fflag, curlwp);
                break;
        default:
                panic("spec_ioctl");
                /* NOTREACHED */
        }

        spec_io_exit(vp, sn);
        return error;
}

/* ARGSUSED */
int
spec_poll(void *v)
{
        struct vop_poll_args /* {
                struct vnode *a_vp;
                int a_events;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct specnode *sn;
        dev_t dev;
        int revents;

        if (spec_io_enter(vp, &sn, &dev) != 0)
                return POLLERR;

        switch (vp->v_type) {
        case VCHR:
                revents = cdev_poll(dev, ap->a_events, curlwp);
                break;
        default:
                revents = genfs_poll(v);
                break;
        }

        spec_io_exit(vp, sn);
        return revents;
}

/* ARGSUSED */
int
spec_kqfilter(void *v)
{
        struct vop_kqfilter_args /* {
                struct vnode        *a_vp;
                struct proc        *a_kn;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct specnode *sn;
        dev_t dev;
        int error;

        error = spec_io_enter(vp, &sn, &dev);
        if (error)
                return error;

        switch (vp->v_type) {
        case VCHR:
                error = cdev_kqfilter(dev, ap->a_kn);
                break;
        default:
                /*
                 * Block devices don't support kqfilter, and refuse it
                 * for any other files (like those vflush()ed) too.
                 */
                error = EOPNOTSUPP;
                break;
        }

        spec_io_exit(vp, sn);
        return error;
}

/*
 * Allow mapping of only D_DISK.  This is called only for VBLK.
 */
int
spec_mmap(void *v)
{
        struct vop_mmap_args /* {
                struct vnode *a_vp;
                vm_prot_t a_prot;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct specnode *sn;
        dev_t dev;
        int error;

        KASSERT(vp->v_type == VBLK);

        error = spec_io_enter(vp, &sn, &dev);
        if (error)
                return error;

        error = bdev_type(dev) == D_DISK ? 0 : EINVAL;

        spec_io_exit(vp, sn);
        return 0;
}

/*
 * Synch buffers associated with a block device
 */
/* ARGSUSED */
int
spec_fsync(void *v)
{
        struct vop_fsync_args /* {
                struct vnode *a_vp;
                kauth_cred_t a_cred;
                int  a_flags;
                off_t offlo;
                off_t offhi;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct mount *mp;
        int error;

        if (vp->v_type == VBLK) {
                if ((mp = spec_node_getmountedfs(vp)) != NULL) {
                        error = VFS_FSYNC(mp, vp, ap->a_flags);
                        if (error != EOPNOTSUPP)
                                return error;
                }
                return vflushbuf(vp, ap->a_flags);
        }
        return (0);
}

/*
 * Just call the device strategy routine
 */
int
spec_strategy(void *v)
{
        struct vop_strategy_args /* {
                struct vnode *a_vp;
                struct buf *a_bp;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct buf *bp = ap->a_bp;
        struct specnode *sn = NULL;
        dev_t dev;
        int error;

        error = spec_io_enter(vp, &sn, &dev);
        if (error)
                goto out;

        bp->b_dev = dev;

        if (!(bp->b_flags & B_READ)) {
#ifdef DIAGNOSTIC
                if (bp->b_vp && bp->b_vp->v_type == VBLK) {
                        struct mount *mp = spec_node_getmountedfs(bp->b_vp);

                        if (mp && (mp->mnt_flag & MNT_RDONLY)) {
                                printf("%s blk %"PRId64" written while ro!\n",
                                    mp->mnt_stat.f_mntonname, bp->b_blkno);
                        }
                }
#endif /* DIAGNOSTIC */
                error = fscow_run(bp, false);
                if (error)
                        goto out;
        }
        bdev_strategy(bp);

        error = 0;

out:        if (sn)
                spec_io_exit(vp, sn);
        if (error) {
                bp->b_error = error;
                bp->b_resid = bp->b_bcount;
                biodone(bp);
        }
        return error;
}

int
spec_inactive(void *v)
{
        struct vop_inactive_v2_args /* {
                struct vnode *a_vp;
                struct bool *a_recycle;
        } */ *ap = v;

        KASSERT(ap->a_vp->v_mount == dead_rootmount);
        *ap->a_recycle = true;

        return 0;
}

int
spec_reclaim(void *v)
{
        struct vop_reclaim_v2_args /* {
                struct vnode *a_vp;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;

        KASSERT(vp->v_specnode->sn_opencnt == 0);

        VOP_UNLOCK(vp);

        KASSERT(vp->v_mount == dead_rootmount);
        return 0;
}

/*
 * This is a noop, simply returning what one has been given.
 */
int
spec_bmap(void *v)
{
        struct vop_bmap_args /* {
                struct vnode *a_vp;
                daddr_t  a_bn;
                struct vnode **a_vpp;
                daddr_t *a_bnp;
                int *a_runp;
        } */ *ap = v;

        if (ap->a_vpp != NULL)
                *ap->a_vpp = ap->a_vp;
        if (ap->a_bnp != NULL)
                *ap->a_bnp = ap->a_bn;
        if (ap->a_runp != NULL)
                *ap->a_runp = (MAXBSIZE >> DEV_BSHIFT) - 1;
        return (0);
}

/*
 * Device close routine
 */
/* ARGSUSED */
int
spec_close(void *v)
{
        struct vop_close_args /* {
                struct vnode *a_vp;
                int  a_fflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct session *sess;
        dev_t dev;
        int flags = ap->a_fflag;
        int mode, error, count;
        specnode_t *sn;
        specdev_t *sd;

        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);

        mutex_enter(vp->v_interlock);
        sn = vp->v_specnode;
        dev = vp->v_rdev;
        sd = sn->sn_dev;
        /*
         * If we're going away soon, make this non-blocking.
         * Also ensures that we won't wedge in vn_lock below.
         */
        if (vdead_check(vp, VDEAD_NOWAIT) != 0)
                flags |= FNONBLOCK;
        mutex_exit(vp->v_interlock);

        switch (vp->v_type) {

        case VCHR:
                /*
                 * Hack: a tty device that is a controlling terminal
                 * has a reference from the session structure.  We
                 * cannot easily tell that a character device is a
                 * controlling terminal, unless it is the closing
                 * process' controlling terminal.  In that case, if the
                 * open count is 1 release the reference from the
                 * session.  Also, remove the link from the tty back to
                 * the session and pgrp.
                 *
                 * XXX V. fishy.
                 */
                mutex_enter(&proc_lock);
                sess = curlwp->l_proc->p_session;
                if (sn->sn_opencnt == 1 && vp == sess->s_ttyvp) {
                        mutex_spin_enter(&tty_lock);
                        sess->s_ttyvp = NULL;
                        if (sess->s_ttyp->t_session != NULL) {
                                sess->s_ttyp->t_pgrp = NULL;
                                sess->s_ttyp->t_session = NULL;
                                mutex_spin_exit(&tty_lock);
                                /* Releases proc_lock. */
                                proc_sessrele(sess);
                        } else {
                                mutex_spin_exit(&tty_lock);
                                if (sess->s_ttyp->t_pgrp != NULL)
                                        panic("spec_close: spurious pgrp ref");
                                mutex_exit(&proc_lock);
                        }
                        vrele(vp);
                } else
                        mutex_exit(&proc_lock);

                /*
                 * If the vnode is locked, then we are in the midst
                 * of forcably closing the device, otherwise we only
                 * close on last reference.
                 */
                mode = S_IFCHR;
                break;

        case VBLK:
                KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
                /*
                 * On last close of a block device (that isn't mounted)
                 * we must invalidate any in core blocks, so that
                 * we can, for instance, change floppy disks.
                 */
                error = vinvalbuf(vp, V_SAVE, ap->a_cred, curlwp, 0, 0);
                if (error)
                        return (error);
                /*
                 * We do not want to really close the device if it
                 * is still in use unless we are trying to close it
                 * forcibly. Since every use (buffer, vnode, swap, cmap)
                 * holds a reference to the vnode, and because we mark
                 * any other vnodes that alias this device, when the
                 * sum of the reference counts on all the aliased
                 * vnodes descends to one, we are on last close.
                 */
                mode = S_IFBLK;
                break;

        default:
                panic("spec_close: not special");
        }

        /*
         * Decrement the open reference count of this node and the
         * device.  For block devices, the open reference count must be
         * 1 at this point.  If the device's open reference count goes
         * to zero, we're the last one out so get the lights.
         *
         * We may find --sd->sd_opencnt gives zero, and yet
         * sd->sd_opened is false.  This happens if the vnode is
         * revoked at the same time as it is being opened, which can
         * happen when opening a tty blocks indefinitely.  In that
         * case, we still must call close -- it is the job of close to
         * interrupt the open.  Either way, the device will be no
         * longer opened, so we have to clear sd->sd_opened; subsequent
         * opens will have responsibility for issuing close.
         *
         * This has the side effect that the sequence of opens might
         * happen out of order -- we might end up doing open, open,
         * close, close, instead of open, close, open, close.  This is
         * unavoidable with the current devsw API, where open is
         * allowed to block and close must be able to run concurrently
         * to interrupt it.  It is the driver's responsibility to
         * ensure that close is idempotent so that this works.  Drivers
         * requiring per-open state and exact 1:1 correspondence
         * between open and close can use fd_clone.
         */
        mutex_enter(&device_lock);
        KASSERT(sn->sn_opencnt);
        KASSERT(sd->sd_opencnt);
        KASSERTMSG(sn->sn_opencnt <= sd->sd_opencnt,
            "sn_opencnt=%u > sd_opencnt=%u",
            sn->sn_opencnt, sd->sd_opencnt);
        sn->sn_opencnt--;
        count = --sd->sd_opencnt;
        if (vp->v_type == VBLK) {
                KASSERTMSG(count == 0, "block device with %u opens",
                    count + 1);
                sd->sd_bdevvp = NULL;
        }
        if (count == 0) {
                KASSERTMSG(sn->sn_opencnt == 0, "sn_opencnt=%u",
                    sn->sn_opencnt);
                KASSERT(!sd->sd_closing);
                sd->sd_opened = false;
                sd->sd_closing = true;
        }
        mutex_exit(&device_lock);

        if (count != 0)
                return 0;

        /*
         * If we're able to block, release the vnode lock & reacquire. We
         * might end up sleeping for someone else who wants our queues. They
         * won't get them if we hold the vnode locked.
         */
        if (!(flags & FNONBLOCK))
                VOP_UNLOCK(vp);

        /*
         * If we can cancel all outstanding I/O, then wait for it to
         * drain before we call .d_close.  Drivers that split up
         * .d_cancel and .d_close this way need not have any internal
         * mechanism for waiting in .d_close for I/O to drain.
         */
        if (vp->v_type == VBLK)
                error = bdev_cancel(dev, flags, mode, curlwp);
        else
                error = cdev_cancel(dev, flags, mode, curlwp);
        if (error == 0)
                spec_io_drain(sd);
        else
                KASSERTMSG(error == ENODEV, "cancel dev=0x%lx failed with %d",
                    (unsigned long)dev, error);

        if (vp->v_type == VBLK)
                error = bdev_close(dev, flags, mode, curlwp);
        else
                error = cdev_close(dev, flags, mode, curlwp);

        /*
         * Wait for all other devsw operations to drain.  After this
         * point, no bdev/cdev_* can be active for this specdev.
         */
        spec_io_drain(sd);

        /*
         * Wake any spec_open calls waiting for close to finish -- do
         * this before reacquiring the vnode lock, because spec_open
         * holds the vnode lock while waiting, so doing this after
         * reacquiring the lock would deadlock.
         */
        mutex_enter(&device_lock);
        KASSERT(!sd->sd_opened);
        KASSERT(sd->sd_closing);
        sd->sd_closing = false;
        cv_broadcast(&specfs_iocv);
        mutex_exit(&device_lock);

        if (!(flags & FNONBLOCK))
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);

        return (error);
}

/*
 * Print out the contents of a special device vnode.
 */
int
spec_print(void *v)
{
        struct vop_print_args /* {
                struct vnode *a_vp;
        } */ *ap = v;

        printf("dev %llu, %llu\n", (unsigned long long)major(ap->a_vp->v_rdev),
            (unsigned long long)minor(ap->a_vp->v_rdev));
        return 0;
}

/*
 * Return POSIX pathconf information applicable to special devices.
 */
int
spec_pathconf(void *v)
{
        struct vop_pathconf_args /* {
                struct vnode *a_vp;
                int a_name;
                register_t *a_retval;
        } */ *ap = v;

        switch (ap->a_name) {
        case _PC_LINK_MAX:
                *ap->a_retval = LINK_MAX;
                return (0);
        case _PC_MAX_CANON:
                *ap->a_retval = MAX_CANON;
                return (0);
        case _PC_MAX_INPUT:
                *ap->a_retval = MAX_INPUT;
                return (0);
        case _PC_PIPE_BUF:
                *ap->a_retval = PIPE_BUF;
                return (0);
        case _PC_CHOWN_RESTRICTED:
                *ap->a_retval = 1;
                return (0);
        case _PC_VDISABLE:
                *ap->a_retval = _POSIX_VDISABLE;
                return (0);
        case _PC_SYNC_IO:
                *ap->a_retval = 1;
                return (0);
        default:
                return genfs_pathconf(ap);
        }
        /* NOTREACHED */
}

/*
 * Advisory record locking support.
 */
int
spec_advlock(void *v)
{
        struct vop_advlock_args /* {
                struct vnode *a_vp;
                void *a_id;
                int a_op;
                struct flock *a_fl;
                int a_flags;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;

        return lf_advlock(ap, &vp->v_speclockf, (off_t)0);
}



























































    2 




    2 








    2 













   12 
   12 






































    1 



    2 

















    3 







    3 


    3 


    3 





    2 
    1 


    1 


    3 
    3 














    1 
















    1 



















































    2 
















    2 




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
/*        $NetBSD: sys_process_lwpstatus.c,v 1.4 2022/07/10 17:47:58 riastradh Exp $        */

/*-
 * Copyright (c) 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_process_lwpstatus.c,v 1.4 2022/07/10 17:47:58 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_ptrace.h"
#include "opt_ktrace.h"
#include "opt_pax.h"
#include "opt_compat_netbsd32.h"
#endif

#if defined(__HAVE_COMPAT_NETBSD32) && !defined(COMPAT_NETBSD32) \
    && !defined(_RUMPKERNEL)
#define COMPAT_NETBSD32
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/lwp.h>
#include <sys/proc.h>
#include <sys/ptrace.h>

#ifndef PTRACE_REGS_ALIGN
#define PTRACE_REGS_ALIGN /* nothing */
#endif

void
ptrace_read_lwpstatus(struct lwp *l, struct ptrace_lwpstatus *pls)
{

        pls->pl_lwpid = l->l_lid;
        memcpy(&pls->pl_sigmask, &l->l_sigmask, sizeof(pls->pl_sigmask));
        memcpy(&pls->pl_sigpend, &l->l_sigpend.sp_set, sizeof(pls->pl_sigpend));

        if (l->l_name == NULL)
                memset(&pls->pl_name, 0, PL_LNAMELEN);
        else {
                KASSERT(strlen(l->l_name) < PL_LNAMELEN);
                strncpy(pls->pl_name, l->l_name, PL_LNAMELEN);
        }

#ifdef PTRACE_LWP_GETPRIVATE
        pls->pl_private = (void *)(intptr_t)PTRACE_LWP_GETPRIVATE(l);
#else
        pls->pl_private = l->l_private;
#endif
}

void
process_read_lwpstatus(struct lwp *l, struct ptrace_lwpstatus *pls)
{

        ptrace_read_lwpstatus(l, pls);
}

int
ptrace_update_lwp(struct proc *t, struct lwp **lt, lwpid_t lid)
{
        if (lid == 0 || lid == (*lt)->l_lid || t->p_nlwps == 1)
                return 0;

        mutex_enter(t->p_lock);
        lwp_delref2(*lt);

        *lt = lwp_find(t, lid);
        if (*lt == NULL) {
                mutex_exit(t->p_lock);
                return ESRCH;
        }

        if ((*lt)->l_flag & LW_SYSTEM) {
                mutex_exit(t->p_lock);
                *lt = NULL;
                return EINVAL;
        }

        lwp_addref(*lt);
        mutex_exit(t->p_lock);

        return 0;
}

int
process_validfpregs(struct lwp *l)
{

#if defined(PT_SETFPREGS) || defined(PT_GETFPREGS)
        return (l->l_flag & LW_SYSTEM) == 0;
#else
        return 0;
#endif
}

int
process_validregs(struct lwp *l)
{

#if defined(PT_SETREGS) || defined(PT_GETREGS)
        return (l->l_flag & LW_SYSTEM) == 0;
#else
        return 0;
#endif
}

int
process_validdbregs(struct lwp *l)
{

#if defined(PT_SETDBREGS) || defined(PT_GETDBREGS)
        return (l->l_flag & LW_SYSTEM) == 0;
#else
        return 0;
#endif
}

#ifdef PT_REGISTERS
static int
proc_regio(struct lwp *l, struct uio *uio, size_t ks, ptrace_regrfunc_t r,
    ptrace_regwfunc_t w)
{
        char buf[1024] PTRACE_REGS_ALIGN;
        int error;
        char *kv;
        size_t kl;

        if (ks > sizeof(buf))
                return E2BIG;

        if (uio->uio_offset < 0 || uio->uio_offset > (off_t)ks)
                return EINVAL;

        kv = buf + uio->uio_offset;
        kl = ks - uio->uio_offset;

        if (kl > uio->uio_resid)
                kl = uio->uio_resid;

        error = (*r)(l, buf, &ks);
        if (error == 0)
                error = uiomove(kv, kl, uio);
        if (error == 0 && uio->uio_rw == UIO_WRITE) {
                if (l->l_stat != LSSTOP)
                        error = EBUSY;
                else
                        error = (*w)(l, buf, ks);
        }

        uio->uio_offset = 0;
        return error;
}
#endif

int
process_doregs(struct lwp *curl /*tracer*/,
    struct lwp *l /*traced*/,
    struct uio *uio)
{
#if defined(PT_GETREGS) || defined(PT_SETREGS)
        size_t s;
        ptrace_regrfunc_t r;
        ptrace_regwfunc_t w;

#ifdef COMPAT_NETBSD32
        const bool pk32 = (curl->l_proc->p_flag & PK_32) != 0;

        if (__predict_false(pk32)) {
                if ((l->l_proc->p_flag & PK_32) == 0) {
                        // 32 bit tracer can't trace 64 bit process
                        return EINVAL;
                }
                s = sizeof(process_reg32);
                r = __FPTRCAST(ptrace_regrfunc_t, process_read_regs32);
                w = __FPTRCAST(ptrace_regwfunc_t, process_write_regs32);
        } else
#endif
        {
                s = sizeof(struct reg);
                r = __FPTRCAST(ptrace_regrfunc_t, process_read_regs);
                w = __FPTRCAST(ptrace_regwfunc_t, process_write_regs);
        }
        return proc_regio(l, uio, s, r, w);
#else
        return EINVAL;
#endif
}

int
process_dofpregs(struct lwp *curl /*tracer*/,
    struct lwp *l /*traced*/,
    struct uio *uio)
{
#if defined(PT_GETFPREGS) || defined(PT_SETFPREGS)
        size_t s;
        ptrace_regrfunc_t r;
        ptrace_regwfunc_t w;

#ifdef COMPAT_NETBSD32
        const bool pk32 = (curl->l_proc->p_flag & PK_32) != 0;

        if (__predict_false(pk32)) {
                if ((l->l_proc->p_flag & PK_32) == 0) {
                        // 32 bit tracer can't trace 64 bit process
                        return EINVAL;
                }
                s = sizeof(process_fpreg32);
                r = (ptrace_regrfunc_t)process_read_fpregs32;
                w = (ptrace_regwfunc_t)process_write_fpregs32;
        } else
#endif
        {
                s = sizeof(struct fpreg);
                r = (ptrace_regrfunc_t)process_read_fpregs;
                w = (ptrace_regwfunc_t)process_write_fpregs;
        }
        return proc_regio(l, uio, s, r, w);
#else
        return EINVAL;
#endif
}


int
process_dodbregs(struct lwp *curl /*tracer*/,
    struct lwp *l /*traced*/,
    struct uio *uio)
{
#if defined(PT_GETDBREGS) || defined(PT_SETDBREGS)
        size_t s;
        ptrace_regrfunc_t r;
        ptrace_regwfunc_t w;

#ifdef COMPAT_NETBSD32
        const bool pk32 = (curl->l_proc->p_flag & PK_32) != 0;

        if (__predict_false(pk32)) {
                if ((l->l_proc->p_flag & PK_32) == 0) {
                        // 32 bit tracer can't trace 64 bit process
                        return EINVAL;
                }
                s = sizeof(process_dbreg32);
                r = (ptrace_regrfunc_t)process_read_dbregs32;
                w = (ptrace_regwfunc_t)process_write_dbregs32;
        } else
#endif
        {
                s = sizeof(struct dbreg);
                r = (ptrace_regrfunc_t)process_read_dbregs;
                w = (ptrace_regwfunc_t)process_write_dbregs;
        }
        return proc_regio(l, uio, s, r, w);
#else
        return EINVAL;
#endif
}

































































































    8 



    8 



    8 

    8 

    8 






    8 


    8 









    8 




    8 

    8 







































































































































    9 

    9 
























    9 




    9 

    9 





    9 










































































    9 




























































































































































































































    9 

    9 
    9 
    9 


    9 



    9 















    8 
    8 

    8 




    8 
    8 

















    8 













    9 
    9 

    9 








































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
/*        $NetBSD: prop_array.c,v 1.21 2012/07/27 09:10:59 pooka Exp $        */

/*-
 * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "prop_object_impl.h"
#include <prop/prop_array.h>

#if !defined(_KERNEL) && !defined(_STANDALONE)
#include <errno.h>
#endif

struct _prop_array {
        struct _prop_object        pa_obj;
        _PROP_RWLOCK_DECL(pa_rwlock)
        prop_object_t *                pa_array;
        unsigned int                pa_capacity;
        unsigned int                pa_count;
        int                        pa_flags;

        uint32_t                pa_version;
};

#define PA_F_IMMUTABLE                0x01        /* array is immutable */

_PROP_POOL_INIT(_prop_array_pool, sizeof(struct _prop_array), "proparay")
_PROP_MALLOC_DEFINE(M_PROP_ARRAY, "prop array",
                    "property array container object")

static _prop_object_free_rv_t
                _prop_array_free(prop_stack_t, prop_object_t *);
static void        _prop_array_emergency_free(prop_object_t);
static bool        _prop_array_externalize(
                                struct _prop_object_externalize_context *,
                                void *);
static _prop_object_equals_rv_t
                _prop_array_equals(prop_object_t, prop_object_t,
                                   void **, void **,
                                   prop_object_t *, prop_object_t *);
static void        _prop_array_equals_finish(prop_object_t, prop_object_t);
static prop_object_iterator_t
                _prop_array_iterator_locked(prop_array_t);
static prop_object_t
                _prop_array_iterator_next_object_locked(void *);
static void        _prop_array_iterator_reset_locked(void *);

static const struct _prop_object_type _prop_object_type_array = {
        .pot_type                =        PROP_TYPE_ARRAY,
        .pot_free                =        _prop_array_free,
        .pot_emergency_free        =        _prop_array_emergency_free,
        .pot_extern                =        _prop_array_externalize,
        .pot_equals                =        _prop_array_equals,
        .pot_equals_finish        =        _prop_array_equals_finish,
};

#define prop_object_is_array(x)                \
        ((x) != NULL && (x)->pa_obj.po_type == &_prop_object_type_array)

#define prop_array_is_immutable(x) (((x)->pa_flags & PA_F_IMMUTABLE) != 0)

struct _prop_array_iterator {
        struct _prop_object_iterator pai_base;
        unsigned int                pai_index;
};

#define EXPAND_STEP                16

static _prop_object_free_rv_t
_prop_array_free(prop_stack_t stack, prop_object_t *obj)
{
        prop_array_t pa = *obj;
        prop_object_t po;

        _PROP_ASSERT(pa->pa_count <= pa->pa_capacity);
        _PROP_ASSERT((pa->pa_capacity == 0 && pa->pa_array == NULL) ||
                     (pa->pa_capacity != 0 && pa->pa_array != NULL));

        /* The easy case is an empty array, just free and return. */
        if (pa->pa_count == 0) {
                if (pa->pa_array != NULL)
                        _PROP_FREE(pa->pa_array, M_PROP_ARRAY);

                _PROP_RWLOCK_DESTROY(pa->pa_rwlock);

                _PROP_POOL_PUT(_prop_array_pool, pa);

                return (_PROP_OBJECT_FREE_DONE);
        }

        po = pa->pa_array[pa->pa_count - 1];
        _PROP_ASSERT(po != NULL);

        if (stack == NULL) {
                /*
                 * If we are in emergency release mode,
                 * just let caller recurse down.
                 */
                *obj = po;
                return (_PROP_OBJECT_FREE_FAILED);
        }

        /* Otherwise, try to push the current object on the stack. */
        if (!_prop_stack_push(stack, pa, NULL, NULL, NULL)) {
                /* Push failed, entering emergency release mode. */
                return (_PROP_OBJECT_FREE_FAILED);
        }
        /* Object pushed on stack, caller will release it. */
        --pa->pa_count;
        *obj = po;
        return (_PROP_OBJECT_FREE_RECURSE);
}

static void
_prop_array_emergency_free(prop_object_t obj)
{
        prop_array_t pa = obj;

        _PROP_ASSERT(pa->pa_count != 0);
        --pa->pa_count;
}

static bool
_prop_array_externalize(struct _prop_object_externalize_context *ctx,
                        void *v)
{
        prop_array_t pa = v;
        struct _prop_object *po;
        prop_object_iterator_t pi;
        unsigned int i;
        bool rv = false;

        _PROP_RWLOCK_RDLOCK(pa->pa_rwlock);

        if (pa->pa_count == 0) {
                _PROP_RWLOCK_UNLOCK(pa->pa_rwlock);
                return (_prop_object_externalize_empty_tag(ctx, "array"));
        }

        /* XXXJRT Hint "count" for the internalize step? */
        if (_prop_object_externalize_start_tag(ctx, "array") == false ||
            _prop_object_externalize_append_char(ctx, '\n') == false)
                goto out;

        pi = _prop_array_iterator_locked(pa);
        if (pi == NULL)
                goto out;

        ctx->poec_depth++;
        _PROP_ASSERT(ctx->poec_depth != 0);

        while ((po = _prop_array_iterator_next_object_locked(pi)) != NULL) {
                if ((*po->po_type->pot_extern)(ctx, po) == false) {
                        prop_object_iterator_release(pi);
                        goto out;
                }
        }

        prop_object_iterator_release(pi);

        ctx->poec_depth--;
        for (i = 0; i < ctx->poec_depth; i++) {
                if (_prop_object_externalize_append_char(ctx, '\t') == false)
                        goto out;
        }
        if (_prop_object_externalize_end_tag(ctx, "array") == false)
                goto out;

        rv = true;

 out:
        _PROP_RWLOCK_UNLOCK(pa->pa_rwlock);
        return (rv);
}

/* ARGSUSED */
static _prop_object_equals_rv_t
_prop_array_equals(prop_object_t v1, prop_object_t v2,
    void **stored_pointer1, void **stored_pointer2,
    prop_object_t *next_obj1, prop_object_t *next_obj2)
{
        prop_array_t array1 = v1;
        prop_array_t array2 = v2;
        uintptr_t idx;
        _prop_object_equals_rv_t rv = _PROP_OBJECT_EQUALS_FALSE;

        if (array1 == array2)
                return (_PROP_OBJECT_EQUALS_TRUE);

        _PROP_ASSERT(*stored_pointer1 == *stored_pointer2);
        idx = (uintptr_t)*stored_pointer1;

        /* For the first iteration, lock the objects. */
        if (idx == 0) {
                if ((uintptr_t)array1 < (uintptr_t)array2) {
                        _PROP_RWLOCK_RDLOCK(array1->pa_rwlock);
                        _PROP_RWLOCK_RDLOCK(array2->pa_rwlock);
                } else {
                        _PROP_RWLOCK_RDLOCK(array2->pa_rwlock);
                        _PROP_RWLOCK_RDLOCK(array1->pa_rwlock);
                }
        }

        if (array1->pa_count != array2->pa_count)
                goto out;
        if (idx == array1->pa_count) {
                rv = _PROP_OBJECT_EQUALS_TRUE;
                goto out;
        }
        _PROP_ASSERT(idx < array1->pa_count);

        *stored_pointer1 = (void *)(idx + 1);
        *stored_pointer2 = (void *)(idx + 1);

        *next_obj1 = array1->pa_array[idx];
        *next_obj2 = array2->pa_array[idx];

        return (_PROP_OBJECT_EQUALS_RECURSE);

 out:
        _PROP_RWLOCK_UNLOCK(array1->pa_rwlock);
        _PROP_RWLOCK_UNLOCK(array2->pa_rwlock);
        return (rv);
}

static void
_prop_array_equals_finish(prop_object_t v1, prop_object_t v2)
{
        _PROP_RWLOCK_UNLOCK(((prop_array_t)v1)->pa_rwlock);
        _PROP_RWLOCK_UNLOCK(((prop_array_t)v2)->pa_rwlock);
}

static prop_array_t
_prop_array_alloc(unsigned int capacity)
{
        prop_array_t pa;
        prop_object_t *array;

        if (capacity != 0) {
                array = _PROP_CALLOC(capacity * sizeof(prop_object_t),
                                     M_PROP_ARRAY);
                if (array == NULL)
                        return (NULL);
        } else
                array = NULL;

        pa = _PROP_POOL_GET(_prop_array_pool);
        if (pa != NULL) {
                _prop_object_init(&pa->pa_obj, &_prop_object_type_array);
                pa->pa_obj.po_type = &_prop_object_type_array;

                _PROP_RWLOCK_INIT(pa->pa_rwlock);
                pa->pa_array = array;
                pa->pa_capacity = capacity;
                pa->pa_count = 0;
                pa->pa_flags = 0;

                pa->pa_version = 0;
        } else if (array != NULL)
                _PROP_FREE(array, M_PROP_ARRAY);

        return (pa);
}

static bool
_prop_array_expand(prop_array_t pa, unsigned int capacity)
{
        prop_object_t *array, *oarray;

        /*
         * Array must be WRITE-LOCKED.
         */

        oarray = pa->pa_array;

        array = _PROP_CALLOC(capacity * sizeof(*array), M_PROP_ARRAY);
        if (array == NULL)
                return (false);
        if (oarray != NULL)
                memcpy(array, oarray, pa->pa_capacity * sizeof(*array));
        pa->pa_array = array;
        pa->pa_capacity = capacity;

        if (oarray != NULL)
                _PROP_FREE(oarray, M_PROP_ARRAY);

        return (true);
}

static prop_object_t
_prop_array_iterator_next_object_locked(void *v)
{
        struct _prop_array_iterator *pai = v;
        prop_array_t pa = pai->pai_base.pi_obj;
        prop_object_t po = NULL;

        _PROP_ASSERT(prop_object_is_array(pa));

        if (pa->pa_version != pai->pai_base.pi_version)
                goto out;        /* array changed during iteration */

        _PROP_ASSERT(pai->pai_index <= pa->pa_count);

        if (pai->pai_index == pa->pa_count)
                goto out;        /* we've iterated all objects */

        po = pa->pa_array[pai->pai_index];
        pai->pai_index++;

 out:
        return (po);
}

static prop_object_t
_prop_array_iterator_next_object(void *v)
{
        struct _prop_array_iterator *pai = v;
        prop_array_t pa _PROP_ARG_UNUSED = pai->pai_base.pi_obj;
        prop_object_t po;

        _PROP_ASSERT(prop_object_is_array(pa));

        _PROP_RWLOCK_RDLOCK(pa->pa_rwlock);
        po = _prop_array_iterator_next_object_locked(pai);
        _PROP_RWLOCK_UNLOCK(pa->pa_rwlock);
        return (po);
}

static void
_prop_array_iterator_reset_locked(void *v)
{
        struct _prop_array_iterator *pai = v;
        prop_array_t pa = pai->pai_base.pi_obj;

        _PROP_ASSERT(prop_object_is_array(pa));

        pai->pai_index = 0;
        pai->pai_base.pi_version = pa->pa_version;
}

static void
_prop_array_iterator_reset(void *v)
{
        struct _prop_array_iterator *pai = v;
        prop_array_t pa _PROP_ARG_UNUSED = pai->pai_base.pi_obj;

        _PROP_ASSERT(prop_object_is_array(pa));

        _PROP_RWLOCK_RDLOCK(pa->pa_rwlock);
        _prop_array_iterator_reset_locked(pai);
        _PROP_RWLOCK_UNLOCK(pa->pa_rwlock);
}

/*
 * prop_array_create --
 *        Create an empty array.
 */
prop_array_t
prop_array_create(void)
{

        return (_prop_array_alloc(0));
}

/*
 * prop_array_create_with_capacity --
 *        Create an array with the capacity to store N objects.
 */
prop_array_t
prop_array_create_with_capacity(unsigned int capacity)
{

        return (_prop_array_alloc(capacity));
}

/*
 * prop_array_copy --
 *        Copy an array.        The new array has an initial capacity equal to
 *        the number of objects stored in the original array.  The new
 *        array contains references to the original array's objects, not
 *        copies of those objects (i.e. a shallow copy).
 */
prop_array_t
prop_array_copy(prop_array_t opa)
{
        prop_array_t pa;
        prop_object_t po;
        unsigned int idx;

        if (! prop_object_is_array(opa))
                return (NULL);

        _PROP_RWLOCK_RDLOCK(opa->pa_rwlock);

        pa = _prop_array_alloc(opa->pa_count);
        if (pa != NULL) {
                for (idx = 0; idx < opa->pa_count; idx++) {
                        po = opa->pa_array[idx];
                        prop_object_retain(po);
                        pa->pa_array[idx] = po;
                }
                pa->pa_count = opa->pa_count;
                pa->pa_flags = opa->pa_flags;
        }
        _PROP_RWLOCK_UNLOCK(opa->pa_rwlock);
        return (pa);
}

/*
 * prop_array_copy_mutable --
 *        Like prop_array_copy(), but the resulting array is mutable.
 */
prop_array_t
prop_array_copy_mutable(prop_array_t opa)
{
        prop_array_t pa;

        pa = prop_array_copy(opa);
        if (pa != NULL)
                pa->pa_flags &= ~PA_F_IMMUTABLE;

        return (pa);
}

/*
 * prop_array_capacity --
 *        Return the capacity of the array.
 */
unsigned int
prop_array_capacity(prop_array_t pa)
{
        unsigned int rv;

        if (! prop_object_is_array(pa))
                return (0);

        _PROP_RWLOCK_RDLOCK(pa->pa_rwlock);
        rv = pa->pa_capacity;
        _PROP_RWLOCK_UNLOCK(pa->pa_rwlock);

        return (rv);
}

/*
 * prop_array_count --
 *        Return the number of objects stored in the array.
 */
unsigned int
prop_array_count(prop_array_t pa)
{
        unsigned int rv;

        if (! prop_object_is_array(pa))
                return (0);

        _PROP_RWLOCK_RDLOCK(pa->pa_rwlock);
        rv = pa->pa_count;
        _PROP_RWLOCK_UNLOCK(pa->pa_rwlock);

        return (rv);
}

/*
 * prop_array_ensure_capacity --
 *        Ensure that the array has the capacity to store the specified
 *        total number of objects (inluding the objects already stored
 *        in the array).
 */
bool
prop_array_ensure_capacity(prop_array_t pa, unsigned int capacity)
{
        bool rv;

        if (! prop_object_is_array(pa))
                return (false);

        _PROP_RWLOCK_WRLOCK(pa->pa_rwlock);
        if (capacity > pa->pa_capacity)
                rv = _prop_array_expand(pa, capacity);
        else
                rv = true;
        _PROP_RWLOCK_UNLOCK(pa->pa_rwlock);

        return (rv);
}

static prop_object_iterator_t
_prop_array_iterator_locked(prop_array_t pa)
{
        struct _prop_array_iterator *pai;

        if (! prop_object_is_array(pa))
                return (NULL);

        pai = _PROP_CALLOC(sizeof(*pai), M_TEMP);
        if (pai == NULL)
                return (NULL);
        pai->pai_base.pi_next_object = _prop_array_iterator_next_object;
        pai->pai_base.pi_reset = _prop_array_iterator_reset;
        prop_object_retain(pa);
        pai->pai_base.pi_obj = pa;
        _prop_array_iterator_reset_locked(pai);

        return (&pai->pai_base);
}

/*
 * prop_array_iterator --
 *        Return an iterator for the array.  The array is retained by
 *        the iterator.
 */
prop_object_iterator_t
prop_array_iterator(prop_array_t pa)
{
        prop_object_iterator_t pi;

        _PROP_RWLOCK_RDLOCK(pa->pa_rwlock);
        pi = _prop_array_iterator_locked(pa);
        _PROP_RWLOCK_UNLOCK(pa->pa_rwlock);
        return (pi);
}

/*
 * prop_array_make_immutable --
 *        Make the array immutable.
 */
void
prop_array_make_immutable(prop_array_t pa)
{

        _PROP_RWLOCK_WRLOCK(pa->pa_rwlock);
        if (prop_array_is_immutable(pa) == false)
                pa->pa_flags |= PA_F_IMMUTABLE;
        _PROP_RWLOCK_UNLOCK(pa->pa_rwlock);
}

/*
 * prop_array_mutable --
 *        Returns true if the array is mutable.
 */
bool
prop_array_mutable(prop_array_t pa)
{
        bool rv;

        _PROP_RWLOCK_RDLOCK(pa->pa_rwlock);
        rv = prop_array_is_immutable(pa) == false;
        _PROP_RWLOCK_UNLOCK(pa->pa_rwlock);

        return (rv);
}

/*
 * prop_array_get --
 *        Return the object stored at the specified array index.
 */
prop_object_t
prop_array_get(prop_array_t pa, unsigned int idx)
{
        prop_object_t po = NULL;

        if (! prop_object_is_array(pa))
                return (NULL);

        _PROP_RWLOCK_RDLOCK(pa->pa_rwlock);
        if (idx >= pa->pa_count)
                goto out;
        po = pa->pa_array[idx];
        _PROP_ASSERT(po != NULL);
 out:
        _PROP_RWLOCK_UNLOCK(pa->pa_rwlock);
        return (po);
}

static bool
_prop_array_add(prop_array_t pa, prop_object_t po)
{

        /*
         * Array must be WRITE-LOCKED.
         */

        _PROP_ASSERT(pa->pa_count <= pa->pa_capacity);

        if (prop_array_is_immutable(pa) ||
            (pa->pa_count == pa->pa_capacity &&
            _prop_array_expand(pa, pa->pa_capacity + EXPAND_STEP) == false))
                return (false);

        prop_object_retain(po);
        pa->pa_array[pa->pa_count++] = po;
        pa->pa_version++;

        return (true);
}

/*
 * prop_array_set --
 *        Store a reference to an object at the specified array index.
 *        This method is not allowed to create holes in the array; the
 *        caller must either be setting the object just beyond the existing
 *        count or replacing an already existing object reference.
 */
bool
prop_array_set(prop_array_t pa, unsigned int idx, prop_object_t po)
{
        prop_object_t opo;
        bool rv = false;

        if (! prop_object_is_array(pa))
                return (false);

        _PROP_RWLOCK_WRLOCK(pa->pa_rwlock);

        if (prop_array_is_immutable(pa))
                goto out;

        if (idx == pa->pa_count) {
                rv = _prop_array_add(pa, po);
                goto out;
        }

        _PROP_ASSERT(idx < pa->pa_count);

        opo = pa->pa_array[idx];
        _PROP_ASSERT(opo != NULL);

        prop_object_retain(po);
        pa->pa_array[idx] = po;
        pa->pa_version++;

        prop_object_release(opo);

        rv = true;

 out:
        _PROP_RWLOCK_UNLOCK(pa->pa_rwlock);
        return (rv);
}

/*
 * prop_array_add --
 *        Add a reference to an object to the specified array, appending
 *        to the end and growing the array's capacity, if necessary.
 */
bool
prop_array_add(prop_array_t pa, prop_object_t po)
{
        bool rv;

        if (! prop_object_is_array(pa))
                return (false);

        _PROP_RWLOCK_WRLOCK(pa->pa_rwlock);
        rv = _prop_array_add(pa, po);
        _PROP_RWLOCK_UNLOCK(pa->pa_rwlock);

        return (rv);
}

/*
 * prop_array_remove --
 *        Remove the reference to an object from an array at the specified
 *        index.        The array will be compacted following the removal.
 */
void
prop_array_remove(prop_array_t pa, unsigned int idx)
{
        prop_object_t po;

        if (! prop_object_is_array(pa))
                return;

        _PROP_RWLOCK_WRLOCK(pa->pa_rwlock);

        _PROP_ASSERT(idx < pa->pa_count);

        /* XXX Should this be a _PROP_ASSERT()? */
        if (prop_array_is_immutable(pa)) {
                _PROP_RWLOCK_UNLOCK(pa->pa_rwlock);
                return;
        }

        po = pa->pa_array[idx];
        _PROP_ASSERT(po != NULL);

        for (++idx; idx < pa->pa_count; idx++)
                pa->pa_array[idx - 1] = pa->pa_array[idx];
        pa->pa_count--;
        pa->pa_version++;

        _PROP_RWLOCK_UNLOCK(pa->pa_rwlock);

        prop_object_release(po);
}

/*
 * prop_array_equals --
 *        Return true if the two arrays are equivalent.  Note we do a
 *        by-value comparison of the objects in the array.
 */
bool
prop_array_equals(prop_array_t array1, prop_array_t array2)
{
        if (!prop_object_is_array(array1) || !prop_object_is_array(array2))
                return (false);

        return (prop_object_equals(array1, array2));
}

/*
 * prop_array_externalize --
 *        Externalize an array, return a NUL-terminated buffer
 *        containing the XML-style representation.  The buffer is allocated
 *        with the M_TEMP memory type.
 */
char *
prop_array_externalize(prop_array_t pa)
{
        struct _prop_object_externalize_context *ctx;
        char *cp;

        ctx = _prop_object_externalize_context_alloc();
        if (ctx == NULL)
                return (NULL);

        if (_prop_object_externalize_header(ctx) == false ||
            (*pa->pa_obj.po_type->pot_extern)(ctx, pa) == false ||
            _prop_object_externalize_footer(ctx) == false) {
                /* We are responsible for releasing the buffer. */
                _PROP_FREE(ctx->poec_buf, M_TEMP);
                _prop_object_externalize_context_free(ctx);
                return (NULL);
        }

        cp = ctx->poec_buf;
        _prop_object_externalize_context_free(ctx);

        return (cp);
}

/*
 * _prop_array_internalize --
 *        Parse an <array>...</array> and return the object created from the
 *        external representation.
 */
static bool _prop_array_internalize_body(prop_stack_t, prop_object_t *,
    struct _prop_object_internalize_context *);

bool
_prop_array_internalize(prop_stack_t stack, prop_object_t *obj,
    struct _prop_object_internalize_context *ctx)
{
        /* We don't currently understand any attributes. */
        if (ctx->poic_tagattr != NULL)
                return (true);

        *obj = prop_array_create();
        /*
         * We are done if the create failed or no child elements exist.
         */
        if (*obj == NULL || ctx->poic_is_empty_element)
                return (true);

        /*
         * Opening tag is found, now continue to the first element.
         */
        return (_prop_array_internalize_body(stack, obj, ctx));
}

static bool
_prop_array_internalize_continue(prop_stack_t stack,
    prop_object_t *obj,
    struct _prop_object_internalize_context *ctx,
    void *data, prop_object_t child)
{
        prop_array_t array;

        _PROP_ASSERT(data == NULL);

        if (child == NULL)
                goto bad; /* Element could not be parsed. */

        array = *obj;

        if (prop_array_add(array, child) == false) {
                prop_object_release(child);
                goto bad;
        }
        prop_object_release(child);

        /*
         * Current element is processed and added, look for next.
         */
        return (_prop_array_internalize_body(stack, obj, ctx));

 bad:
        prop_object_release(*obj);
        *obj = NULL;
        return (true);
}

static bool
_prop_array_internalize_body(prop_stack_t stack, prop_object_t *obj,
    struct _prop_object_internalize_context *ctx)
{
        prop_array_t array = *obj;

        _PROP_ASSERT(array != NULL);

        /* Fetch the next tag. */
        if (_prop_object_internalize_find_tag(ctx, NULL,
                                _PROP_TAG_TYPE_EITHER) == false)
                goto bad;

        /* Check to see if this is the end of the array. */
        if (_PROP_TAG_MATCH(ctx, "array") &&
            ctx->poic_tag_type == _PROP_TAG_TYPE_END) {
                /* It is, so don't iterate any further. */
                return (true);
        }

        if (_prop_stack_push(stack, array,
                             _prop_array_internalize_continue, NULL, NULL))
                return (false);

 bad:
        prop_object_release(array);
        *obj = NULL;
        return (true);
}

/*
 * prop_array_internalize --
 *        Create an array by parsing the XML-style representation.
 */
prop_array_t
prop_array_internalize(const char *xml)
{
        return _prop_generic_internalize(xml, "array");
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
/*
 * prop_array_externalize_to_file --
 *        Externalize an array to the specified file.
 */
bool
prop_array_externalize_to_file(prop_array_t array, const char *fname)
{
        char *xml;
        bool rv;
        int save_errno = 0;        /* XXXGCC -Wuninitialized [mips, ...] */

        xml = prop_array_externalize(array);
        if (xml == NULL)
                return (false);
        rv = _prop_object_externalize_write_file(fname, xml, strlen(xml));
        if (rv == false)
                save_errno = errno;
        _PROP_FREE(xml, M_TEMP);
        if (rv == false)
                errno = save_errno;

        return (rv);
}

/*
 * prop_array_internalize_from_file --
 *        Internalize an array from a file.
 */
prop_array_t
prop_array_internalize_from_file(const char *fname)
{
        struct _prop_object_internalize_mapped_file *mf;
        prop_array_t array;

        mf = _prop_object_internalize_map_file(fname);
        if (mf == NULL)
                return (NULL);
        array = prop_array_internalize(mf->poimf_xml);
        _prop_object_internalize_unmap_file(mf);

        return (array);
}
#endif /* _KERNEL && !_STANDALONE */

























































































































































































































































































































































































































































































































































































































































































































































    1 
    1 










    1 





    1 



























    1 



































    1 


































    1 


















































    3 
    3 






































































































    3 

    3 


    3 









    3 








    3 



























































































































































































































































































































































































































































































































































































































































































































































    2 

    2 


    2 


    2 

    2 






    2 


    2 










    2 
















































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
/*        $NetBSD: crypto.c,v 1.131 2022/06/26 22:52:30 riastradh Exp $ */
/*        $FreeBSD: src/sys/opencrypto/crypto.c,v 1.4.2.5 2003/02/26 00:14:05 sam Exp $        */
/*        $OpenBSD: crypto.c,v 1.41 2002/07/17 23:52:38 art Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Coyote Point Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * The author of this code is Angelos D. Keromytis (angelos@cis.upenn.edu)
 *
 * This code was written by Angelos D. Keromytis in Athens, Greece, in
 * February 2000. Network Security Technologies Inc. (NSTI) kindly
 * supported the development of this code.
 *
 * Copyright (c) 2000, 2001 Angelos D. Keromytis
 *
 * Permission to use, copy, and modify this software with or without fee
 * is hereby granted, provided that this entire notice is included in
 * all source code copies of any software which is or includes a copy or
 * modification of this software.
 *
 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
 * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY
 * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
 * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR
 * PURPOSE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: crypto.c,v 1.131 2022/06/26 22:52:30 riastradh Exp $");

#include <sys/param.h>
#include <sys/reboot.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/pool.h>
#include <sys/kthread.h>
#include <sys/once.h>
#include <sys/sysctl.h>
#include <sys/intr.h>
#include <sys/errno.h>
#include <sys/module.h>
#include <sys/xcall.h>
#include <sys/device.h>
#include <sys/cpu.h>
#include <sys/percpu.h>
#include <sys/kmem.h>

#if defined(_KERNEL_OPT)
#include "opt_ocf.h"
#endif

#include <opencrypto/cryptodev.h>
#include <opencrypto/xform.h>                        /* XXX for M_XDATA */

/*
 * Crypto drivers register themselves by allocating a slot in the
 * crypto_drivers table with crypto_get_driverid() and then registering
 * each algorithm they support with crypto_register() and crypto_kregister().
 */
/* Don't directly access crypto_drivers[i], use crypto_checkdriver(i). */
static struct {
        kmutex_t mtx;
        int num;
        struct cryptocap *list;
} crypto_drv __cacheline_aligned;
#define crypto_drv_mtx                (crypto_drv.mtx)
#define crypto_drivers_num        (crypto_drv.num)
#define crypto_drivers                (crypto_drv.list)

static        void *crypto_q_si;
static        void *crypto_ret_si;

/*
 * There are two queues for crypto requests; one for symmetric (e.g.
 * cipher) operations and one for asymmetric (e.g. MOD) operations.
 * See below for how synchronization is handled.
 */
TAILQ_HEAD(crypto_crp_q, cryptop);
TAILQ_HEAD(crypto_crp_kq, cryptkop);
struct crypto_crp_qs {
        struct crypto_crp_q *crp_q;
        struct crypto_crp_kq *crp_kq;
};
static percpu_t *crypto_crp_qs_percpu;

static inline struct crypto_crp_qs *
crypto_get_crp_qs(int *s)
{

        KASSERT(s != NULL);

        *s = splsoftnet();
        return percpu_getref(crypto_crp_qs_percpu);
}

static inline void
crypto_put_crp_qs(int *s)
{

        KASSERT(s != NULL);

        percpu_putref(crypto_crp_qs_percpu);
        splx(*s);
}

static void
crypto_crp_q_is_busy_pc(void *p, void *arg, struct cpu_info *ci __unused)
{
        struct crypto_crp_qs *qs_pc = p;
        bool *isempty = arg;

        if (!TAILQ_EMPTY(qs_pc->crp_q) || !TAILQ_EMPTY(qs_pc->crp_kq))
                *isempty = true;
}

static void
crypto_crp_qs_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
{
        struct crypto_crp_qs *qs = p;

        qs->crp_q = kmem_alloc(sizeof(struct crypto_crp_q), KM_SLEEP);
        qs->crp_kq = kmem_alloc(sizeof(struct crypto_crp_kq), KM_SLEEP);

        TAILQ_INIT(qs->crp_q);
        TAILQ_INIT(qs->crp_kq);
}

/*
 * There are two queues for processing completed crypto requests; one
 * for the symmetric and one for the asymmetric ops.  We only need one
 * but have two to avoid type futzing (cryptop vs. cryptkop).  See below
 * for how synchronization is handled.
 */
TAILQ_HEAD(crypto_crp_ret_q, cryptop);
TAILQ_HEAD(crypto_crp_ret_kq, cryptkop);
struct crypto_crp_ret_qs {
        kmutex_t crp_ret_q_mtx;
        bool crp_ret_q_exit_flag;

        struct crypto_crp_ret_q crp_ret_q;
        int crp_ret_q_len;
        int crp_ret_q_maxlen; /* queue length limit. <=0 means unlimited. */
        int crp_ret_q_drops;

        struct crypto_crp_ret_kq crp_ret_kq;
        int crp_ret_kq_len;
        int crp_ret_kq_maxlen; /* queue length limit. <=0 means unlimited. */
        int crp_ret_kq_drops;
};
struct crypto_crp_ret_qs **crypto_crp_ret_qs_list;


static inline struct crypto_crp_ret_qs *
crypto_get_crp_ret_qs(struct cpu_info *ci)
{
        u_int cpuid;
        struct crypto_crp_ret_qs *qs;

        KASSERT(ci != NULL);

        cpuid = cpu_index(ci);
        qs = crypto_crp_ret_qs_list[cpuid];
        mutex_enter(&qs->crp_ret_q_mtx);
        return qs;
}

static inline void
crypto_put_crp_ret_qs(struct cpu_info *ci)
{
        u_int cpuid;
        struct crypto_crp_ret_qs *qs;

        KASSERT(ci != NULL);

        cpuid = cpu_index(ci);
        qs = crypto_crp_ret_qs_list[cpuid];
        mutex_exit(&qs->crp_ret_q_mtx);
}

#ifndef CRYPTO_RET_Q_MAXLEN
#define CRYPTO_RET_Q_MAXLEN 0
#endif
#ifndef CRYPTO_RET_KQ_MAXLEN
#define CRYPTO_RET_KQ_MAXLEN 0
#endif

static int
sysctl_opencrypto_q_len(SYSCTLFN_ARGS)
{
        int error, len = 0;
        struct sysctlnode node = *rnode;

        for (int i = 0; i < ncpu; i++) {
                struct crypto_crp_ret_qs *qs;
                struct cpu_info *ci = cpu_lookup(i);

                qs = crypto_get_crp_ret_qs(ci);
                len += qs->crp_ret_q_len;
                crypto_put_crp_ret_qs(ci);
        }

        node.sysctl_data = &len;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        return 0;
}

static int
sysctl_opencrypto_q_drops(SYSCTLFN_ARGS)
{
        int error, drops = 0;
        struct sysctlnode node = *rnode;

        for (int i = 0; i < ncpu; i++) {
                struct crypto_crp_ret_qs *qs;
                struct cpu_info *ci = cpu_lookup(i);

                qs = crypto_get_crp_ret_qs(ci);
                drops += qs->crp_ret_q_drops;
                crypto_put_crp_ret_qs(ci);
        }

        node.sysctl_data = &drops;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        return 0;
}

static int
sysctl_opencrypto_q_maxlen(SYSCTLFN_ARGS)
{
        int error, maxlen;
        struct crypto_crp_ret_qs *qs;
        struct sysctlnode node = *rnode;

        /* each crp_ret_kq_maxlen is the same. */
        qs = crypto_get_crp_ret_qs(curcpu());
        maxlen = qs->crp_ret_q_maxlen;
        crypto_put_crp_ret_qs(curcpu());

        node.sysctl_data = &maxlen;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        for (int i = 0; i < ncpu; i++) {
                struct cpu_info *ci = cpu_lookup(i);

                qs = crypto_get_crp_ret_qs(ci);
                qs->crp_ret_q_maxlen = maxlen;
                crypto_put_crp_ret_qs(ci);
        }

        return 0;
}

static int
sysctl_opencrypto_kq_len(SYSCTLFN_ARGS)
{
        int error, len = 0;
        struct sysctlnode node = *rnode;

        for (int i = 0; i < ncpu; i++) {
                struct crypto_crp_ret_qs *qs;
                struct cpu_info *ci = cpu_lookup(i);

                qs = crypto_get_crp_ret_qs(ci);
                len += qs->crp_ret_kq_len;
                crypto_put_crp_ret_qs(ci);
        }

        node.sysctl_data = &len;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        return 0;
}

static int
sysctl_opencrypto_kq_drops(SYSCTLFN_ARGS)
{
        int error, drops = 0;
        struct sysctlnode node = *rnode;

        for (int i = 0; i < ncpu; i++) {
                struct crypto_crp_ret_qs *qs;
                struct cpu_info *ci = cpu_lookup(i);

                qs = crypto_get_crp_ret_qs(ci);
                drops += qs->crp_ret_kq_drops;
                crypto_put_crp_ret_qs(ci);
        }

        node.sysctl_data = &drops;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        return 0;
}

static int
sysctl_opencrypto_kq_maxlen(SYSCTLFN_ARGS)
{
        int error, maxlen;
        struct crypto_crp_ret_qs *qs;
        struct sysctlnode node = *rnode;

        /* each crp_ret_kq_maxlen is the same. */
        qs = crypto_get_crp_ret_qs(curcpu());
        maxlen = qs->crp_ret_kq_maxlen;
        crypto_put_crp_ret_qs(curcpu());

        node.sysctl_data = &maxlen;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        for (int i = 0; i < ncpu; i++) {
                struct cpu_info *ci = cpu_lookup(i);

                qs = crypto_get_crp_ret_qs(ci);
                qs->crp_ret_kq_maxlen = maxlen;
                crypto_put_crp_ret_qs(ci);
        }

        return 0;
}

/*
 * Crypto op and descriptor data structures are allocated
 * from separate private zones(FreeBSD)/pools(netBSD/OpenBSD) .
 */
static pool_cache_t cryptop_cache;
static pool_cache_t cryptodesc_cache;
static pool_cache_t cryptkop_cache;

int        crypto_usercrypto = 1;                /* userland may open /dev/crypto */
int        crypto_userasymcrypto = 1;        /* userland may do asym crypto reqs */
/*
 * cryptodevallowsoft is (intended to be) sysctl'able, controlling
 * access to hardware versus software transforms as below:
 *
 * crypto_devallowsoft < 0:  Force userlevel requests to use software
 *                              transforms, always
 * crypto_devallowsoft = 0:  Use hardware if present, grant userlevel
 *                              requests for non-accelerated transforms
 *                              (handling the latter in software)
 * crypto_devallowsoft > 0:  Allow user requests only for transforms which
 *                               are hardware-accelerated.
 */
int        crypto_devallowsoft = 1;        /* only use hardware crypto */

static void
sysctl_opencrypto_setup(struct sysctllog **clog)
{
        const struct sysctlnode *ocnode;
        const struct sysctlnode *retqnode, *retkqnode;

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "usercrypto",
                       SYSCTL_DESCR("Enable/disable user-mode access to "
                           "crypto support"),
                       NULL, 0, &crypto_usercrypto, 0,
                       CTL_KERN, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "userasymcrypto",
                       SYSCTL_DESCR("Enable/disable user-mode access to "
                           "asymmetric crypto support"),
                       NULL, 0, &crypto_userasymcrypto, 0,
                       CTL_KERN, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "cryptodevallowsoft",
                       SYSCTL_DESCR("Enable/disable use of software "
                           "asymmetric crypto support"),
                       NULL, 0, &crypto_devallowsoft, 0,
                       CTL_KERN, CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, NULL, &ocnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "opencrypto",
                       SYSCTL_DESCR("opencrypto related entries"),
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &ocnode, &retqnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "crypto_ret_q",
                       SYSCTL_DESCR("crypto_ret_q related entries"),
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &retqnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_INT, "len",
                       SYSCTL_DESCR("Current queue length"),
                       sysctl_opencrypto_q_len, 0,
                       NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &retqnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_INT, "drops",
                       SYSCTL_DESCR("Crypto requests dropped due to full ret queue"),
                       sysctl_opencrypto_q_drops, 0,
                       NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &retqnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxlen",
                       SYSCTL_DESCR("Maximum allowed queue length"),
                       sysctl_opencrypto_q_maxlen, 0,
                       NULL, 0,
                       CTL_CREATE, CTL_EOL);


        sysctl_createv(clog, 0, &ocnode, &retkqnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "crypto_ret_kq",
                       SYSCTL_DESCR("crypto_ret_kq related entries"),
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &retkqnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_INT, "len",
                       SYSCTL_DESCR("Current queue length"),
                       sysctl_opencrypto_kq_len, 0,
                       NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &retkqnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_INT, "drops",
                       SYSCTL_DESCR("Crypto requests dropped due to full ret queue"),
                       sysctl_opencrypto_kq_drops, 0,
                       NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &retkqnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxlen",
                       SYSCTL_DESCR("Maximum allowed queue length"),
                       sysctl_opencrypto_kq_maxlen, 0,
                       NULL, 0,
                       CTL_CREATE, CTL_EOL);
}

/*
 * Synchronization: read carefully, this is non-trivial.
 *
 * Crypto requests are submitted via crypto_dispatch.  Typically
 * these come in from network protocols at spl0 (output path) or
 * spl[,soft]net (input path).
 *
 * Requests are typically passed on the driver directly, but they
 * may also be queued for processing by a software interrupt thread,
 * cryptointr, that runs at splsoftcrypto.  This thread dispatches
 * the requests to crypto drivers (h/w or s/w) who call crypto_done
 * when a request is complete.  Hardware crypto drivers are assumed
 * to register their IRQ's as network devices so their interrupt handlers
 * and subsequent "done callbacks" happen at spl[imp,net].
 *
 * Completed crypto ops are queued for a separate kernel thread that
 * handles the callbacks at spl0.  This decoupling insures the crypto
 * driver interrupt service routine is not delayed while the callback
 * takes place and that callbacks are delivered after a context switch
 * (as opposed to a software interrupt that clients must block).
 *
 * This scheme is not intended for SMP machines.
 */
static        void cryptointr(void *);        /* swi thread to dispatch ops */
static        void cryptoret_softint(void *);        /* kernel thread for callbacks*/
static        int crypto_destroy(bool);
static        int crypto_invoke(struct cryptop *crp, int hint);
static        int crypto_kinvoke(struct cryptkop *krp, int hint);

static struct cryptocap *crypto_checkdriver_lock(u_int32_t);
static struct cryptocap *crypto_checkdriver_uninit(u_int32_t);
static struct cryptocap *crypto_checkdriver(u_int32_t);
static void crypto_driver_lock(struct cryptocap *);
static void crypto_driver_unlock(struct cryptocap *);
static void crypto_driver_clear(struct cryptocap *);

static int crypto_init_finalize(device_t);

static struct cryptostats cryptostats;
#ifdef CRYPTO_TIMING
static        int crypto_timing = 0;
#endif

static struct sysctllog *sysctl_opencrypto_clog;

static void
crypto_crp_ret_qs_init(void)
{
        int i;

        crypto_crp_ret_qs_list = kmem_alloc(sizeof(struct crypto_crp_ret_qs *) * ncpu,
            KM_SLEEP);

        for (i = 0; i < ncpu; i++) {
                struct crypto_crp_ret_qs *qs;

                qs = kmem_alloc(sizeof(struct crypto_crp_ret_qs), KM_SLEEP);
                mutex_init(&qs->crp_ret_q_mtx, MUTEX_DEFAULT, IPL_NET);
                qs->crp_ret_q_exit_flag = false;

                TAILQ_INIT(&qs->crp_ret_q);
                qs->crp_ret_q_len = 0;
                qs->crp_ret_q_maxlen = CRYPTO_RET_Q_MAXLEN;
                qs->crp_ret_q_drops = 0;

                TAILQ_INIT(&qs->crp_ret_kq);
                qs->crp_ret_kq_len = 0;
                qs->crp_ret_kq_maxlen = CRYPTO_RET_KQ_MAXLEN;
                qs->crp_ret_kq_drops = 0;

                crypto_crp_ret_qs_list[i] = qs;
        }
}

static int
crypto_init0(void)
{

        mutex_init(&crypto_drv_mtx, MUTEX_DEFAULT, IPL_NONE);
        cryptop_cache = pool_cache_init(sizeof(struct cryptop),
            coherency_unit, 0, 0, "cryptop", NULL, IPL_NET, NULL, NULL, NULL);
        cryptodesc_cache = pool_cache_init(sizeof(struct cryptodesc),
            coherency_unit, 0, 0, "cryptdesc", NULL, IPL_NET, NULL, NULL, NULL);
        cryptkop_cache = pool_cache_init(sizeof(struct cryptkop),
            coherency_unit, 0, 0, "cryptkop", NULL, IPL_NET, NULL, NULL, NULL);

        crypto_crp_qs_percpu = percpu_create(sizeof(struct crypto_crp_qs),
            crypto_crp_qs_init_pc, /*XXX*/NULL, NULL);

        crypto_crp_ret_qs_init();

        crypto_drivers = kmem_zalloc(CRYPTO_DRIVERS_INITIAL *
            sizeof(struct cryptocap), KM_SLEEP);
        crypto_drivers_num = CRYPTO_DRIVERS_INITIAL;

        crypto_q_si = softint_establish(SOFTINT_NET|SOFTINT_MPSAFE, cryptointr, NULL);
        if (crypto_q_si == NULL) {
                printf("crypto_init: cannot establish request queue handler\n");
                return crypto_destroy(false);
        }

        /*
         * Some encryption devices (such as mvcesa) are attached before
         * ipi_sysinit(). That causes an assertion in ipi_register() as
         * crypto_ret_si softint uses SOFTINT_RCPU.
         */
        if (config_finalize_register(NULL, crypto_init_finalize) != 0) {
                printf("crypto_init: cannot register crypto_init_finalize\n");
                return crypto_destroy(false);
        }

        sysctl_opencrypto_setup(&sysctl_opencrypto_clog);

        return 0;
}

static int
crypto_init_finalize(device_t self __unused)
{

        crypto_ret_si = softint_establish(SOFTINT_NET|SOFTINT_MPSAFE|SOFTINT_RCPU,
            &cryptoret_softint, NULL);
        KASSERT(crypto_ret_si != NULL);

        return 0;
}

int
crypto_init(void)
{
        static ONCE_DECL(crypto_init_once);

        return RUN_ONCE(&crypto_init_once, crypto_init0);
}

static int
crypto_destroy(bool exit_kthread)
{
        int i;

        if (exit_kthread) {
                struct cryptocap *cap = NULL;
                bool is_busy = false;

                /* if we have any in-progress requests, don't unload */
                percpu_foreach(crypto_crp_qs_percpu, crypto_crp_q_is_busy_pc,
                                   &is_busy);
                if (is_busy)
                        return EBUSY;
                /* FIXME:
                 * prohibit enqueue to crp_q and crp_kq after here.
                 */

                mutex_enter(&crypto_drv_mtx);
                for (i = 0; i < crypto_drivers_num; i++) {
                        cap = crypto_checkdriver(i);
                        if (cap == NULL)
                                continue;
                        if (cap->cc_sessions != 0) {
                                mutex_exit(&crypto_drv_mtx);
                                return EBUSY;
                        }
                }
                mutex_exit(&crypto_drv_mtx);
                /* FIXME:
                 * prohibit touch crypto_drivers[] and each element after here.
                 */

                /* Ensure cryptoret_softint() is never scheduled again.  */
                for (i = 0; i < ncpu; i++) {
                        struct crypto_crp_ret_qs *qs;
                        struct cpu_info *ci = cpu_lookup(i);

                        qs = crypto_get_crp_ret_qs(ci);
                        qs->crp_ret_q_exit_flag = true;
                        crypto_put_crp_ret_qs(ci);
                }
        }

        if (sysctl_opencrypto_clog != NULL)
                sysctl_teardown(&sysctl_opencrypto_clog);

        if (crypto_ret_si != NULL)
                softint_disestablish(crypto_ret_si);

        if (crypto_q_si != NULL)
                softint_disestablish(crypto_q_si);

        mutex_enter(&crypto_drv_mtx);
        if (crypto_drivers != NULL)
                kmem_free(crypto_drivers,
                    crypto_drivers_num * sizeof(struct cryptocap));
        mutex_exit(&crypto_drv_mtx);

        percpu_free(crypto_crp_qs_percpu, sizeof(struct crypto_crp_qs));

        pool_cache_destroy(cryptop_cache);
        pool_cache_destroy(cryptodesc_cache);
        pool_cache_destroy(cryptkop_cache);

        mutex_destroy(&crypto_drv_mtx);

        return 0;
}

static bool
crypto_driver_suitable(struct cryptocap *cap, struct cryptoini *cri)
{
        struct cryptoini *cr;

        for (cr = cri; cr; cr = cr->cri_next)
                if (cap->cc_alg[cr->cri_alg] == 0) {
                        DPRINTF("alg %d not supported\n", cr->cri_alg);
                        return false;
                }

        return true;
}

#define CRYPTO_ACCEPT_HARDWARE 0x1
#define CRYPTO_ACCEPT_SOFTWARE 0x2
/*
 * The algorithm we use here is pretty stupid; just use the
 * first driver that supports all the algorithms we need.
 * If there are multiple drivers we choose the driver with
 * the fewest active sessions. We prefer hardware-backed
 * drivers to software ones.
 *
 * XXX We need more smarts here (in real life too, but that's
 * XXX another story altogether).
 */
static struct cryptocap *
crypto_select_driver_lock(struct cryptoini *cri, int hard)
{
        u_int32_t hid;
        int accept;
        struct cryptocap *cap, *best;
        int error = 0;

        best = NULL;
        /*
         * hard == 0 can use both hardware and software drivers.
         * We use hardware drivers prior to software drivers, so search
         * hardware drivers at first time.
         */
        if (hard >= 0)
                accept = CRYPTO_ACCEPT_HARDWARE;
        else
                accept = CRYPTO_ACCEPT_SOFTWARE;
again:
        for (hid = 0; hid < crypto_drivers_num; hid++) {
                cap = crypto_checkdriver(hid);
                if (cap == NULL)
                        continue;

                crypto_driver_lock(cap);

                /*
                 * If it's not initialized or has remaining sessions
                 * referencing it, skip.
                 */
                if (cap->cc_newsession == NULL ||
                    (cap->cc_flags & CRYPTOCAP_F_CLEANUP)) {
                        crypto_driver_unlock(cap);
                        continue;
                }

                /* Hardware required -- ignore software drivers. */
                if ((accept & CRYPTO_ACCEPT_SOFTWARE) == 0
                    && (cap->cc_flags & CRYPTOCAP_F_SOFTWARE)) {
                        crypto_driver_unlock(cap);
                        continue;
                }
                /* Software required -- ignore hardware drivers. */
                if ((accept & CRYPTO_ACCEPT_HARDWARE) == 0
                    && (cap->cc_flags & CRYPTOCAP_F_SOFTWARE) == 0) {
                        crypto_driver_unlock(cap);
                        continue;
                }

                /* See if all the algorithms are supported. */
                if (crypto_driver_suitable(cap, cri)) {
                        if (best == NULL) {
                                /* keep holding crypto_driver_lock(cap) */
                                best = cap;
                                continue;
                        } else if (cap->cc_sessions < best->cc_sessions) {
                                crypto_driver_unlock(best);
                                /* keep holding crypto_driver_lock(cap) */
                                best = cap;
                                continue;
                        }
                }

                crypto_driver_unlock(cap);
        }
        if (best == NULL && hard == 0
            && (accept & CRYPTO_ACCEPT_SOFTWARE) == 0) {
                accept = CRYPTO_ACCEPT_SOFTWARE;
                goto again;
        }

        if (best == NULL && hard == 0 && error == 0) {
                mutex_exit(&crypto_drv_mtx);
                error = module_autoload("swcrypto", MODULE_CLASS_DRIVER);
                mutex_enter(&crypto_drv_mtx);
                if (error == 0) {
                        error = EINVAL;
                        goto again;
                }
        }

        return best;
}

/*
 * Create a new session.
 */
int
crypto_newsession(u_int64_t *sid, struct cryptoini *cri, int hard)
{
        struct cryptocap *cap;
        int err = EINVAL;

        /*
         * On failure, leave *sid initialized to a sentinel value that
         * crypto_freesession will ignore.  This is the same as what
         * you get from zero-initialized memory -- some callers (I'm
         * looking at you, netipsec!) have paths that lead from
         * zero-initialized memory into crypto_freesession without any
         * crypto_newsession.
         */
        *sid = 0;

        mutex_enter(&crypto_drv_mtx);

        cap = crypto_select_driver_lock(cri, hard);
        if (cap != NULL) {
                u_int32_t hid, lid;

                hid = cap - crypto_drivers;
                KASSERT(hid < 0xffffff);
                /*
                 * Can't do everything in one session.
                 *
                 * XXX Fix this. We need to inject a "virtual" session layer right
                 * XXX about here.
                 */

                /* Call the driver initialization routine. */
                lid = hid;                /* Pass the driver ID. */
                crypto_driver_unlock(cap);
                err = cap->cc_newsession(cap->cc_arg, &lid, cri);
                crypto_driver_lock(cap);
                if (err == 0) {
                        (*sid) = hid + 1;
                        (*sid) <<= 32;
                        (*sid) |= (lid & 0xffffffff);
                        KASSERT(*sid != 0);
                        cap->cc_sessions++;
                } else {
                        DPRINTF("crypto_drivers[%d].cc_newsession() failed. error=%d\n",
                            hid, err);
                }
                crypto_driver_unlock(cap);
        }

        mutex_exit(&crypto_drv_mtx);

        return err;
}

/*
 * Delete an existing session (or a reserved session on an unregistered
 * driver).
 */
void
crypto_freesession(u_int64_t sid)
{
        struct cryptocap *cap;

        /*
         * crypto_newsession never returns 0 as a sid (by virtue of
         * never returning 0 as a hid, which is part of the sid).
         * However, some callers assume that freeing zero is safe.
         * Previously this relied on all drivers to agree that freeing
         * invalid sids is a no-op, but that's a terrible API contract
         * that we're getting rid of.
         */
        if (sid == 0)
                return;

        /* Determine two IDs. */
        cap = crypto_checkdriver_lock(CRYPTO_SESID2HID(sid));
        KASSERTMSG(cap != NULL, "sid=%"PRIx64, sid);

        KASSERT(cap->cc_sessions > 0);
        cap->cc_sessions--;

        /* Call the driver cleanup routine, if available. */
        if (cap->cc_freesession)
                cap->cc_freesession(cap->cc_arg, sid);

        /*
         * If this was the last session of a driver marked as invalid,
         * make the entry available for reuse.
         */
        if ((cap->cc_flags & CRYPTOCAP_F_CLEANUP) && cap->cc_sessions == 0)
                crypto_driver_clear(cap);

        crypto_driver_unlock(cap);
}

static bool
crypto_checkdriver_initialized(const struct cryptocap *cap)
{

        return cap->cc_process != NULL ||
            (cap->cc_flags & CRYPTOCAP_F_CLEANUP) != 0 ||
            cap->cc_sessions != 0;
}

/*
 * Return an unused driver id.  Used by drivers prior to registering
 * support for the algorithms they handle.
 */
int32_t
crypto_get_driverid(u_int32_t flags)
{
        struct cryptocap *newdrv;
        struct cryptocap *cap = NULL;
        int i;

        (void)crypto_init();                /* XXX oh, this is foul! */

        mutex_enter(&crypto_drv_mtx);
        for (i = 0; i < crypto_drivers_num; i++) {
                cap = crypto_checkdriver_uninit(i);
                if (cap == NULL || crypto_checkdriver_initialized(cap))
                        continue;
                break;
        }

        /* Out of entries, allocate some more. */
        if (cap == NULL) {
                /* Be careful about wrap-around. */
                if (2 * crypto_drivers_num <= crypto_drivers_num) {
                        mutex_exit(&crypto_drv_mtx);
                        printf("crypto: driver count wraparound!\n");
                        return -1;
                }

                newdrv = kmem_zalloc(2 * crypto_drivers_num *
                    sizeof(struct cryptocap), KM_SLEEP);
                memcpy(newdrv, crypto_drivers,
                    crypto_drivers_num * sizeof(struct cryptocap));
                kmem_free(crypto_drivers,
                    crypto_drivers_num * sizeof(struct cryptocap));

                crypto_drivers_num *= 2;
                crypto_drivers = newdrv;

                cap = crypto_checkdriver_uninit(i);
                KASSERT(cap != NULL);
        }

        /* NB: state is zero'd on free */
        cap->cc_sessions = 1;        /* Mark */
        cap->cc_flags = flags;
        mutex_init(&cap->cc_lock, MUTEX_DEFAULT, IPL_NET);

        if (bootverbose)
                printf("crypto: assign driver %u, flags %u\n", i, flags);

        mutex_exit(&crypto_drv_mtx);

        return i;
}

static struct cryptocap *
crypto_checkdriver_lock(u_int32_t hid)
{
        struct cryptocap *cap;

        KASSERT(crypto_drivers != NULL);

        if (hid >= crypto_drivers_num)
                return NULL;

        cap = &crypto_drivers[hid];
        mutex_enter(&cap->cc_lock);
        return cap;
}

/*
 * Use crypto_checkdriver_uninit() instead of crypto_checkdriver() below two
 * situations
 *     - crypto_drivers[] may not be allocated
 *     - crypto_drivers[hid] may not be initialized
 */
static struct cryptocap *
crypto_checkdriver_uninit(u_int32_t hid)
{

        KASSERT(mutex_owned(&crypto_drv_mtx));

        if (crypto_drivers == NULL)
                return NULL;

        return (hid >= crypto_drivers_num ? NULL : &crypto_drivers[hid]);
}

/*
 * Use crypto_checkdriver_uninit() instead of crypto_checkdriver() below two
 * situations
 *     - crypto_drivers[] may not be allocated
 *     - crypto_drivers[hid] may not be initialized
 */
static struct cryptocap *
crypto_checkdriver(u_int32_t hid)
{

        KASSERT(mutex_owned(&crypto_drv_mtx));

        if (crypto_drivers == NULL || hid >= crypto_drivers_num)
                return NULL;

        struct cryptocap *cap = &crypto_drivers[hid];
        return crypto_checkdriver_initialized(cap) ? cap : NULL;
}

static inline void
crypto_driver_lock(struct cryptocap *cap)
{

        KASSERT(cap != NULL);

        mutex_enter(&cap->cc_lock);
}

static inline void
crypto_driver_unlock(struct cryptocap *cap)
{

        KASSERT(cap != NULL);

        mutex_exit(&cap->cc_lock);
}

static void
crypto_driver_clear(struct cryptocap *cap)
{

        if (cap == NULL)
                return;

        KASSERT(mutex_owned(&cap->cc_lock));

        cap->cc_sessions = 0;
        memset(&cap->cc_max_op_len, 0, sizeof(cap->cc_max_op_len));
        memset(&cap->cc_alg, 0, sizeof(cap->cc_alg));
        memset(&cap->cc_kalg, 0, sizeof(cap->cc_kalg));
        cap->cc_flags = 0;
        cap->cc_qblocked = 0;
        cap->cc_kqblocked = 0;

        cap->cc_arg = NULL;
        cap->cc_newsession = NULL;
        cap->cc_process = NULL;
        cap->cc_freesession = NULL;
        cap->cc_kprocess = NULL;
}

/*
 * Register support for a key-related algorithm.  This routine
 * is called once for each algorithm supported a driver.
 */
int
crypto_kregister(u_int32_t driverid, int kalg, u_int32_t flags,
    int (*kprocess)(void *, struct cryptkop *, int),
    void *karg)
{
        struct cryptocap *cap;
        int err;

        mutex_enter(&crypto_drv_mtx);

        cap = crypto_checkdriver_lock(driverid);
        if (cap != NULL &&
            (CRK_ALGORITHM_MIN <= kalg && kalg <= CRK_ALGORITHM_MAX)) {
                /*
                 * XXX Do some performance testing to determine placing.
                 * XXX We probably need an auxiliary data structure that
                 * XXX describes relative performances.
                 */

                cap->cc_kalg[kalg] = flags | CRYPTO_ALG_FLAG_SUPPORTED;
                if (bootverbose) {
                        printf("crypto: driver %u registers key alg %u "
                               " flags %u\n",
                                driverid,
                                kalg,
                                flags
                        );
                }

                if (cap->cc_kprocess == NULL) {
                        cap->cc_karg = karg;
                        cap->cc_kprocess = kprocess;
                }
                err = 0;
        } else
                err = EINVAL;

        mutex_exit(&crypto_drv_mtx);
        return err;
}

/*
 * Register support for a non-key-related algorithm.  This routine
 * is called once for each such algorithm supported by a driver.
 */
int
crypto_register(u_int32_t driverid, int alg, u_int16_t maxoplen,
    u_int32_t flags,
    int (*newses)(void *, u_int32_t*, struct cryptoini*),
    void (*freeses)(void *, u_int64_t),
    int (*process)(void *, struct cryptop *, int),
    void *arg)
{
        struct cryptocap *cap;
        int err;

        cap = crypto_checkdriver_lock(driverid);
        if (cap == NULL)
                return EINVAL;

        /* NB: algorithms are in the range [1..max] */
        if (CRYPTO_ALGORITHM_MIN <= alg && alg <= CRYPTO_ALGORITHM_MAX) {
                /*
                 * XXX Do some performance testing to determine placing.
                 * XXX We probably need an auxiliary data structure that
                 * XXX describes relative performances.
                 */

                cap->cc_alg[alg] = flags | CRYPTO_ALG_FLAG_SUPPORTED;
                cap->cc_max_op_len[alg] = maxoplen;
                if (bootverbose) {
                        printf("crypto: driver %u registers alg %u "
                                "flags %u maxoplen %u\n",
                                driverid,
                                alg,
                                flags,
                                maxoplen
                        );
                }

                if (cap->cc_process == NULL) {
                        cap->cc_arg = arg;
                        cap->cc_newsession = newses;
                        cap->cc_process = process;
                        cap->cc_freesession = freeses;
                        cap->cc_sessions = 0;                /* Unmark */
                }
                err = 0;
        } else
                err = EINVAL;

        crypto_driver_unlock(cap);

        return err;
}

static int
crypto_unregister_locked(struct cryptocap *cap, int alg, bool all)
{
        int i;
        u_int32_t ses;
        bool lastalg = true;

        KASSERT(cap != NULL);
        KASSERT(mutex_owned(&cap->cc_lock));

        if (alg < CRYPTO_ALGORITHM_MIN || CRYPTO_ALGORITHM_MAX < alg)
                return EINVAL;

        if (!all && cap->cc_alg[alg] == 0)
                return EINVAL;

        cap->cc_alg[alg] = 0;
        cap->cc_max_op_len[alg] = 0;

        if (all) {
                if (alg != CRYPTO_ALGORITHM_MAX)
                        lastalg = false;
        } else {
                /* Was this the last algorithm ? */
                for (i = CRYPTO_ALGORITHM_MIN; i <= CRYPTO_ALGORITHM_MAX; i++)
                        if (cap->cc_alg[i] != 0) {
                                lastalg = false;
                                break;
                        }
        }
        if (lastalg) {
                ses = cap->cc_sessions;
                crypto_driver_clear(cap);
                if (ses != 0) {
                        /*
                         * If there are pending sessions, just mark as invalid.
                         */
                        cap->cc_flags |= CRYPTOCAP_F_CLEANUP;
                        cap->cc_sessions = ses;
                }
        }

        return 0;
}

/*
 * Unregister a crypto driver. If there are pending sessions using it,
 * leave enough information around so that subsequent calls using those
 * sessions will correctly detect the driver has been unregistered and
 * reroute requests.
 */
int
crypto_unregister(u_int32_t driverid, int alg)
{
        int err;
        struct cryptocap *cap;

        cap = crypto_checkdriver_lock(driverid);
        err = crypto_unregister_locked(cap, alg, false);
        crypto_driver_unlock(cap);

        return err;
}

/*
 * Unregister all algorithms associated with a crypto driver.
 * If there are pending sessions using it, leave enough information
 * around so that subsequent calls using those sessions will
 * correctly detect the driver has been unregistered and reroute
 * requests.
 */
int
crypto_unregister_all(u_int32_t driverid)
{
        int err, i;
        struct cryptocap *cap;

        cap = crypto_checkdriver_lock(driverid);
        for (i = CRYPTO_ALGORITHM_MIN; i <= CRYPTO_ALGORITHM_MAX; i++) {
                err = crypto_unregister_locked(cap, i, true);
                if (err)
                        break;
        }
        crypto_driver_unlock(cap);

        return err;
}

/*
 * Clear blockage on a driver.  The what parameter indicates whether
 * the driver is now ready for cryptop's and/or cryptokop's.
 */
int
crypto_unblock(u_int32_t driverid, int what)
{
        struct cryptocap *cap;
        int needwakeup = 0;

        cap = crypto_checkdriver_lock(driverid);
        if (cap == NULL)
                return EINVAL;

        if (what & CRYPTO_SYMQ) {
                needwakeup |= cap->cc_qblocked;
                cap->cc_qblocked = 0;
        }
        if (what & CRYPTO_ASYMQ) {
                needwakeup |= cap->cc_kqblocked;
                cap->cc_kqblocked = 0;
        }
        crypto_driver_unlock(cap);
        if (needwakeup) {
                kpreempt_disable();
                softint_schedule(crypto_q_si);
                kpreempt_enable();
        }

        return 0;
}

/*
 * Dispatch a crypto request to a driver or queue
 * it, to be processed by the kernel thread.
 */
void
crypto_dispatch(struct cryptop *crp)
{
        int result, s;
        struct cryptocap *cap;
        struct crypto_crp_qs *crp_qs;
        struct crypto_crp_q *crp_q;

        KASSERT(crp != NULL);
        KASSERT(crp->crp_callback != NULL);
        KASSERT(crp->crp_desc != NULL);
        KASSERT(crp->crp_buf != NULL);
        KASSERT(!cpu_intr_p());

        DPRINTF("crp %p, alg %d\n", crp, crp->crp_desc->crd_alg);

        cryptostats.cs_ops++;

#ifdef CRYPTO_TIMING
        if (crypto_timing)
                nanouptime(&crp->crp_tstamp);
#endif

        if ((crp->crp_flags & CRYPTO_F_BATCH) != 0) {
                int wasempty;
                /*
                 * Caller marked the request as ``ok to delay'';
                 * queue it for the swi thread.  This is desirable
                 * when the operation is low priority and/or suitable
                 * for batching.
                 *
                 * don't care list order in batch job.
                 */
                crp_qs = crypto_get_crp_qs(&s);
                crp_q = crp_qs->crp_q;
                wasempty  = TAILQ_EMPTY(crp_q);
                TAILQ_INSERT_TAIL(crp_q, crp, crp_next);
                crypto_put_crp_qs(&s);
                crp_q = NULL;
                if (wasempty) {
                        kpreempt_disable();
                        softint_schedule(crypto_q_si);
                        kpreempt_enable();
                }
                return;
        }

        crp_qs = crypto_get_crp_qs(&s);
        crp_q = crp_qs->crp_q;
        cap = crypto_checkdriver_lock(CRYPTO_SESID2HID(crp->crp_sid));
        /*
         * TODO:
         * If we can ensure the driver has been valid until the driver is
         * done crypto_unregister(), this migrate operation is not required.
         */
        if (cap == NULL) {
                /*
                 * The driver must be detached, so this request will migrate
                 * to other drivers in cryptointr() later.
                 */
                TAILQ_INSERT_TAIL(crp_q, crp, crp_next);
                goto out;
        }

        if (cap->cc_qblocked != 0) {
                crypto_driver_unlock(cap);
                /*
                 * The driver is blocked, just queue the op until
                 * it unblocks and the swi thread gets kicked.
                 */
                TAILQ_INSERT_TAIL(crp_q, crp, crp_next);
                goto out;
        }

        /*
         * Caller marked the request to be processed
         * immediately; dispatch it directly to the
         * driver unless the driver is currently blocked.
         */
        crypto_driver_unlock(cap);
        result = crypto_invoke(crp, 0);
        KASSERTMSG(result == 0 || result == ERESTART, "result=%d", result);
        if (result == ERESTART) {
                /*
                 * The driver ran out of resources, mark the
                 * driver ``blocked'' for cryptop's and put
                 * the op on the queue.
                 */
                crypto_driver_lock(cap);
                cap->cc_qblocked = 1;
                crypto_driver_unlock(cap);
                TAILQ_INSERT_HEAD(crp_q, crp, crp_next);
                cryptostats.cs_blocks++;
        }

out:
        crypto_put_crp_qs(&s);
}

/*
 * Add an asymmetric crypto request to a queue,
 * to be processed by the kernel thread.
 */
void
crypto_kdispatch(struct cryptkop *krp)
{
        int result, s;
        struct cryptocap *cap;
        struct crypto_crp_qs *crp_qs;
        struct crypto_crp_kq *crp_kq;

        KASSERT(krp != NULL);
        KASSERT(krp->krp_callback != NULL);
        KASSERT(!cpu_intr_p());

        cryptostats.cs_kops++;

        crp_qs = crypto_get_crp_qs(&s);
        crp_kq = crp_qs->crp_kq;
        cap = crypto_checkdriver_lock(krp->krp_hid);
        /*
         * TODO:
         * If we can ensure the driver has been valid until the driver is
         * done crypto_unregister(), this migrate operation is not required.
         */
        if (cap == NULL) {
                TAILQ_INSERT_TAIL(crp_kq, krp, krp_next);
                goto out;
        }

        if (cap->cc_kqblocked != 0) {
                crypto_driver_unlock(cap);
                /*
                 * The driver is blocked, just queue the op until
                 * it unblocks and the swi thread gets kicked.
                 */
                TAILQ_INSERT_TAIL(crp_kq, krp, krp_next);
                goto out;
        }

        crypto_driver_unlock(cap);
        result = crypto_kinvoke(krp, 0);
        KASSERTMSG(result == 0 || result == ERESTART, "result=%d", result);
        if (result == ERESTART) {
                /*
                 * The driver ran out of resources, mark the
                 * driver ``blocked'' for cryptop's and put
                 * the op on the queue.
                 */
                crypto_driver_lock(cap);
                cap->cc_kqblocked = 1;
                crypto_driver_unlock(cap);
                TAILQ_INSERT_HEAD(crp_kq, krp, krp_next);
                cryptostats.cs_kblocks++;
        }

out:
        crypto_put_crp_qs(&s);
}

/*
 * Dispatch an asymmetric crypto request to the appropriate crypto devices.
 */
static int
crypto_kinvoke(struct cryptkop *krp, int hint)
{
        struct cryptocap *cap = NULL;
        u_int32_t hid;
        int error;

        KASSERT(krp != NULL);
        KASSERT(krp->krp_callback != NULL);
        KASSERT(!cpu_intr_p());

        mutex_enter(&crypto_drv_mtx);
        for (hid = 0; hid < crypto_drivers_num; hid++) {
                cap = crypto_checkdriver(hid);
                if (cap == NULL)
                        continue;
                crypto_driver_lock(cap);
                if ((cap->cc_flags & CRYPTOCAP_F_SOFTWARE) &&
                    crypto_devallowsoft == 0) {
                        crypto_driver_unlock(cap);
                        continue;
                }
                if (cap->cc_kprocess == NULL) {
                        crypto_driver_unlock(cap);
                        continue;
                }
                if ((cap->cc_kalg[krp->krp_op] &
                        CRYPTO_ALG_FLAG_SUPPORTED) == 0) {
                        crypto_driver_unlock(cap);
                        continue;
                }
                break;
        }
        mutex_exit(&crypto_drv_mtx);
        if (cap != NULL) {
                int (*process)(void *, struct cryptkop *, int);
                void *arg;

                process = cap->cc_kprocess;
                arg = cap->cc_karg;
                krp->krp_hid = hid;
                krp->reqcpu = curcpu();
                crypto_driver_unlock(cap);
                error = (*process)(arg, krp, hint);
                KASSERTMSG(error == 0 || error == ERESTART, "error=%d",
                    error);
                return error;
        } else {
                krp->krp_status = ENODEV;
                krp->reqcpu = curcpu();
                crypto_kdone(krp);
                return 0;
        }
}

#ifdef CRYPTO_TIMING
static void
crypto_tstat(struct cryptotstat *ts, struct timespec *tv)
{
        struct timespec now, t;

        nanouptime(&now);
        t.tv_sec = now.tv_sec - tv->tv_sec;
        t.tv_nsec = now.tv_nsec - tv->tv_nsec;
        if (t.tv_nsec < 0) {
                t.tv_sec--;
                t.tv_nsec += 1000000000;
        }
        timespecadd(&ts->acc, &t, &t);
        if (timespeccmp(&t, &ts->min, <))
                ts->min = t;
        if (timespeccmp(&t, &ts->max, >))
                ts->max = t;
        ts->count++;

        *tv = now;
}
#endif

/*
 * Dispatch a crypto request to the appropriate crypto devices.
 */
static int
crypto_invoke(struct cryptop *crp, int hint)
{
        struct cryptocap *cap;
        int error;

        KASSERT(crp != NULL);
        KASSERT(crp->crp_callback != NULL);
        KASSERT(crp->crp_desc != NULL);
        KASSERT(!cpu_intr_p());

#ifdef CRYPTO_TIMING
        if (crypto_timing)
                crypto_tstat(&cryptostats.cs_invoke, &crp->crp_tstamp);
#endif

        cap = crypto_checkdriver_lock(CRYPTO_SESID2HID(crp->crp_sid));
        if (cap != NULL && (cap->cc_flags & CRYPTOCAP_F_CLEANUP) == 0) {
                int (*process)(void *, struct cryptop *, int);
                void *arg;

                process = cap->cc_process;
                arg = cap->cc_arg;
                crp->reqcpu = curcpu();

                /*
                 * Invoke the driver to process the request.
                 */
                DPRINTF("calling process for %p\n", crp);
                crypto_driver_unlock(cap);
                error = (*process)(arg, crp, hint);
                KASSERTMSG(error == 0 || error == ERESTART, "error=%d",
                    error);
                return error;
        } else {
                if (cap != NULL) {
                        crypto_driver_unlock(cap);
                        crypto_freesession(crp->crp_sid);
                }
                crp->crp_etype = ENODEV;
                crypto_done(crp);
                return 0;
        }
}

/*
 * Release a set of crypto descriptors.
 */
void
crypto_freereq(struct cryptop *crp)
{
        struct cryptodesc *crd;

        if (crp == NULL)
                return;
        DPRINTF("lid[%u]: crp %p\n", CRYPTO_SESID2LID(crp->crp_sid), crp);

        /* sanity check */
        if (crp->crp_flags & CRYPTO_F_ONRETQ) {
                panic("crypto_freereq() freeing crp on RETQ\n");
        }

        while ((crd = crp->crp_desc) != NULL) {
                crp->crp_desc = crd->crd_next;
                pool_cache_put(cryptodesc_cache, crd);
        }
        pool_cache_put(cryptop_cache, crp);
}

/*
 * Acquire a set of crypto descriptors.
 */
struct cryptop *
crypto_getreq(int num)
{
        struct cryptodesc *crd;
        struct cryptop *crp;
        struct crypto_crp_ret_qs *qs;

        KASSERT(num > 0);

        /*
         * When crp_ret_q is full, we restrict here to avoid crp_ret_q overflow
         * by error callback.
         */
        qs = crypto_get_crp_ret_qs(curcpu());
        if (qs->crp_ret_q_maxlen > 0
            && qs->crp_ret_q_len > qs->crp_ret_q_maxlen) {
                qs->crp_ret_q_drops++;
                crypto_put_crp_ret_qs(curcpu());
                return NULL;
        }
        crypto_put_crp_ret_qs(curcpu());

        crp = pool_cache_get(cryptop_cache, PR_NOWAIT);
        if (crp == NULL) {
                return NULL;
        }
        memset(crp, 0, sizeof(struct cryptop));

        while (num--) {
                crd = pool_cache_get(cryptodesc_cache, PR_NOWAIT);
                if (crd == NULL) {
                        crypto_freereq(crp);
                        return NULL;
                }

                memset(crd, 0, sizeof(struct cryptodesc));
                crd->crd_next = crp->crp_desc;
                crp->crp_desc = crd;
        }

        return crp;
}

/*
 * Release a set of asymmetric crypto descriptors.
 * Currently, support one descriptor only.
 */
void
crypto_kfreereq(struct cryptkop *krp)
{

        if (krp == NULL)
                return;

        DPRINTF("krp %p\n", krp);

        /* sanity check */
        if (krp->krp_flags & CRYPTO_F_ONRETQ) {
                panic("crypto_kfreereq() freeing krp on RETQ\n");
        }

        pool_cache_put(cryptkop_cache, krp);
}

/*
 * Acquire a set of asymmetric crypto descriptors.
 * Currently, support one descriptor only.
 */
struct cryptkop *
crypto_kgetreq(int num __diagused, int prflags)
{
        struct cryptkop *krp;
        struct crypto_crp_ret_qs *qs;

        KASSERTMSG(num == 1, "num=%d not supported", num);

        /*
         * When crp_ret_kq is full, we restrict here to avoid crp_ret_kq
         * overflow by error callback.
         */
        qs = crypto_get_crp_ret_qs(curcpu());
        if (qs->crp_ret_kq_maxlen > 0
            && qs->crp_ret_kq_len > qs->crp_ret_kq_maxlen) {
                qs->crp_ret_kq_drops++;
                crypto_put_crp_ret_qs(curcpu());
                return NULL;
        }
        crypto_put_crp_ret_qs(curcpu());

        krp = pool_cache_get(cryptkop_cache, prflags);
        if (krp == NULL) {
                return NULL;
        }
        memset(krp, 0, sizeof(struct cryptkop));

        return krp;
}

/*
 * Invoke the callback on behalf of the driver.
 */
void
crypto_done(struct cryptop *crp)
{
        int wasempty;
        struct crypto_crp_ret_qs *qs;
        struct crypto_crp_ret_q *crp_ret_q;

        KASSERT(crp != NULL);

        if (crp->crp_etype != 0)
                cryptostats.cs_errs++;
#ifdef CRYPTO_TIMING
        if (crypto_timing)
                crypto_tstat(&cryptostats.cs_done, &crp->crp_tstamp);
#endif
        DPRINTF("lid[%u]: crp %p\n", CRYPTO_SESID2LID(crp->crp_sid), crp);

        qs = crypto_get_crp_ret_qs(crp->reqcpu);
        crp_ret_q = &qs->crp_ret_q;
        wasempty = TAILQ_EMPTY(crp_ret_q);
        DPRINTF("lid[%u]: queueing %p\n", CRYPTO_SESID2LID(crp->crp_sid), crp);
        crp->crp_flags |= CRYPTO_F_ONRETQ;
        TAILQ_INSERT_TAIL(crp_ret_q, crp, crp_next);
        qs->crp_ret_q_len++;
        if (wasempty && !qs->crp_ret_q_exit_flag) {
                DPRINTF("lid[%u]: waking cryptoret, crp %p hit empty queue\n.",
                    CRYPTO_SESID2LID(crp->crp_sid), crp);
                softint_schedule_cpu(crypto_ret_si, crp->reqcpu);
        }
        crypto_put_crp_ret_qs(crp->reqcpu);
}

/*
 * Invoke the callback on behalf of the driver.
 */
void
crypto_kdone(struct cryptkop *krp)
{
        int wasempty;
        struct crypto_crp_ret_qs *qs;
        struct crypto_crp_ret_kq *crp_ret_kq;

        KASSERT(krp != NULL);

        if (krp->krp_status != 0)
                cryptostats.cs_kerrs++;

        qs = crypto_get_crp_ret_qs(krp->reqcpu);
        crp_ret_kq = &qs->crp_ret_kq;

        wasempty = TAILQ_EMPTY(crp_ret_kq);
        krp->krp_flags |= CRYPTO_F_ONRETQ;
        TAILQ_INSERT_TAIL(crp_ret_kq, krp, krp_next);
        qs->crp_ret_kq_len++;
        if (wasempty && !qs->crp_ret_q_exit_flag)
                softint_schedule_cpu(crypto_ret_si, krp->reqcpu);
        crypto_put_crp_ret_qs(krp->reqcpu);
}

int
crypto_getfeat(int *featp)
{

        if (crypto_userasymcrypto == 0) {
                *featp = 0;
                return 0;
        }

        mutex_enter(&crypto_drv_mtx);

        int feat = 0;
        for (int hid = 0; hid < crypto_drivers_num; hid++) {
                struct cryptocap *cap;
                cap = crypto_checkdriver(hid);
                if (cap == NULL)
                        continue;

                crypto_driver_lock(cap);

                if ((cap->cc_flags & CRYPTOCAP_F_SOFTWARE) &&
                    crypto_devallowsoft == 0)
                        goto unlock;

                if (cap->cc_kprocess == NULL)
                        goto unlock;

                for (int kalg = 0; kalg < CRK_ALGORITHM_MAX; kalg++)
                        if ((cap->cc_kalg[kalg] &
                            CRYPTO_ALG_FLAG_SUPPORTED) != 0)
                                feat |=  1 << kalg;

unlock:                crypto_driver_unlock(cap);
        }

        mutex_exit(&crypto_drv_mtx);
        *featp = feat;
        return (0);
}

/*
 * Software interrupt thread to dispatch crypto requests.
 */
static void
cryptointr(void *arg __unused)
{
        struct cryptop *crp, *submit, *cnext;
        struct cryptkop *krp, *knext;
        struct cryptocap *cap;
        struct crypto_crp_qs *crp_qs;
        struct crypto_crp_q *crp_q;
        struct crypto_crp_kq *crp_kq;
        int result, hint, s;

        cryptostats.cs_intrs++;
        crp_qs = crypto_get_crp_qs(&s);
        crp_q = crp_qs->crp_q;
        crp_kq = crp_qs->crp_kq;
        do {
                /*
                 * Find the first element in the queue that can be
                 * processed and look-ahead to see if multiple ops
                 * are ready for the same driver.
                 */
                submit = NULL;
                hint = 0;
                TAILQ_FOREACH_SAFE(crp, crp_q, crp_next, cnext) {
                        u_int32_t hid = CRYPTO_SESID2HID(crp->crp_sid);
                        cap = crypto_checkdriver_lock(hid);
                        if (cap == NULL || cap->cc_process == NULL) {
                                if (cap != NULL)
                                        crypto_driver_unlock(cap);
                                /* Op needs to be migrated, process it. */
                                submit = crp;
                                break;
                        }

                        /*
                         * skip blocked crp regardless of CRYPTO_F_BATCH
                         */
                        if (cap->cc_qblocked != 0) {
                                crypto_driver_unlock(cap);
                                continue;
                        }
                        crypto_driver_unlock(cap);

                        /*
                         * skip batch crp until the end of crp_q
                         */
                        if ((crp->crp_flags & CRYPTO_F_BATCH) != 0) {
                                if (submit == NULL) {
                                        submit = crp;
                                } else {
                                        if (CRYPTO_SESID2HID(submit->crp_sid)
                                            == hid)
                                                hint = CRYPTO_HINT_MORE;
                                }

                                continue;
                        }

                        /*
                         * found first crp which is neither blocked nor batch.
                         */
                        submit = crp;
                        /*
                         * batch crp can be processed much later, so clear hint.
                         */
                        hint = 0;
                        break;
                }
                if (submit != NULL) {
                        TAILQ_REMOVE(crp_q, submit, crp_next);
                        result = crypto_invoke(submit, hint);
                        KASSERTMSG(result == 0 || result == ERESTART,
                            "result=%d", result);
                        /* we must take here as the TAILQ op or kinvoke
                           may need this mutex below.  sigh. */
                        if (result == ERESTART) {
                                /*
                                 * The driver ran out of resources, mark the
                                 * driver ``blocked'' for cryptop's and put
                                 * the request back in the queue.  It would
                                 * best to put the request back where we got
                                 * it but that's hard so for now we put it
                                 * at the front.  This should be ok; putting
                                 * it at the end does not work.
                                 */
                                /* validate sid again */
                                cap = crypto_checkdriver_lock(CRYPTO_SESID2HID(submit->crp_sid));
                                if (cap == NULL) {
                                        /* migrate again, sigh... */
                                        TAILQ_INSERT_TAIL(crp_q, submit, crp_next);
                                } else {
                                        cap->cc_qblocked = 1;
                                        crypto_driver_unlock(cap);
                                        TAILQ_INSERT_HEAD(crp_q, submit, crp_next);
                                        cryptostats.cs_blocks++;
                                }
                        }
                }

                /* As above, but for key ops */
                TAILQ_FOREACH_SAFE(krp, crp_kq, krp_next, knext) {
                        cap = crypto_checkdriver_lock(krp->krp_hid);
                        if (cap == NULL || cap->cc_kprocess == NULL) {
                                if (cap != NULL)
                                        crypto_driver_unlock(cap);
                                /* Op needs to be migrated, process it. */
                                break;
                        }
                        if (!cap->cc_kqblocked) {
                                crypto_driver_unlock(cap);
                                break;
                        }
                        crypto_driver_unlock(cap);
                }
                if (krp != NULL) {
                        TAILQ_REMOVE(crp_kq, krp, krp_next);
                        result = crypto_kinvoke(krp, 0);
                        KASSERTMSG(result == 0 || result == ERESTART,
                            "result=%d", result);
                        /* the next iteration will want the mutex. :-/ */
                        if (result == ERESTART) {
                                /*
                                 * The driver ran out of resources, mark the
                                 * driver ``blocked'' for cryptkop's and put
                                 * the request back in the queue.  It would
                                 * best to put the request back where we got
                                 * it but that's hard so for now we put it
                                 * at the front.  This should be ok; putting
                                 * it at the end does not work.
                                 */
                                /* validate sid again */
                                cap = crypto_checkdriver_lock(krp->krp_hid);
                                if (cap == NULL) {
                                        /* migrate again, sigh... */
                                        TAILQ_INSERT_TAIL(crp_kq, krp, krp_next);
                                } else {
                                        cap->cc_kqblocked = 1;
                                        crypto_driver_unlock(cap);
                                        TAILQ_INSERT_HEAD(crp_kq, krp, krp_next);
                                        cryptostats.cs_kblocks++;
                                }
                        }
                }
        } while (submit != NULL || krp != NULL);
        crypto_put_crp_qs(&s);
}

/*
 * softint handler to do callbacks.
 */
static void
cryptoret_softint(void *arg __unused)
{
        struct crypto_crp_ret_qs *qs;
        struct crypto_crp_ret_q *crp_ret_q;
        struct crypto_crp_ret_kq *crp_ret_kq;

        qs = crypto_get_crp_ret_qs(curcpu());
        crp_ret_q = &qs->crp_ret_q;
        crp_ret_kq = &qs->crp_ret_kq;
        for (;;) {
                struct cryptop *crp;
                struct cryptkop *krp;

                crp = TAILQ_FIRST(crp_ret_q);
                if (crp != NULL) {
                        TAILQ_REMOVE(crp_ret_q, crp, crp_next);
                        qs->crp_ret_q_len--;
                        crp->crp_flags &= ~CRYPTO_F_ONRETQ;
                }
                krp = TAILQ_FIRST(crp_ret_kq);
                if (krp != NULL) {
                        TAILQ_REMOVE(crp_ret_kq, krp, krp_next);
                        qs->crp_ret_q_len--;
                        krp->krp_flags &= ~CRYPTO_F_ONRETQ;
                }

                /* drop before calling any callbacks. */
                if (crp == NULL && krp == NULL)
                        break;

                mutex_spin_exit(&qs->crp_ret_q_mtx);
                if (crp != NULL) {
#ifdef CRYPTO_TIMING
                        if (crypto_timing) {
                                /*
                                 * NB: We must copy the timestamp before
                                 * doing the callback as the cryptop is
                                 * likely to be reclaimed.
                                 */
                                struct timespec t = crp->crp_tstamp;
                                crypto_tstat(&cryptostats.cs_cb, &t);
                                crp->crp_callback(crp);
                                crypto_tstat(&cryptostats.cs_finis, &t);
                        } else
#endif
                        {
                                crp->crp_callback(crp);
                        }
                }
                if (krp != NULL)
                        krp->krp_callback(krp);

                mutex_spin_enter(&qs->crp_ret_q_mtx);
        }
        crypto_put_crp_ret_qs(curcpu());
}

/* NetBSD module interface */

MODULE(MODULE_CLASS_MISC, opencrypto, NULL);

static int
opencrypto_modcmd(modcmd_t cmd, void *opaque)
{
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                error = crypto_init();
#endif
                break;
        case MODULE_CMD_FINI:
#ifdef _MODULE
                error = crypto_destroy(true);
#endif
                break;
        default:
                error = ENOTTY;
        }
        return error;
}





























































    6 






    6 



    6 






    2 














    6 

    6 
    6 
    6 




    6 




    4 



    4 
    4 






    4 













    4 




























    4 








    3 
















    4 
























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
/*        $NetBSD: sys_getrandom.c,v 1.2 2021/12/28 13:22:43 riastradh Exp $        */

/*-
 * Copyright (c) 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * getrandom() system call
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_getrandom.c,v 1.2 2021/12/28 13:22:43 riastradh Exp $");

#include <sys/types.h>
#include <sys/param.h>

#include <sys/atomic.h>
#include <sys/cprng.h>
#include <sys/entropy.h>
#include <sys/kmem.h>
#include <sys/lwp.h>
#include <sys/proc.h>
#include <sys/random.h>
#include <sys/sched.h>
#include <sys/signalvar.h>
#include <sys/syscallargs.h>
#include <sys/uio.h>

#include <crypto/nist_hash_drbg/nist_hash_drbg.h>

#define        RANDOM_BUFSIZE        512

int
dogetrandom(struct uio *uio, unsigned int flags)
{
        uint8_t seed[NIST_HASH_DRBG_SEEDLEN_BYTES] = {0};
        struct nist_hash_drbg drbg;
        uint8_t *buf;
        int extractflags = 0;
        int error;

        KASSERT((flags & ~(GRND_RANDOM|GRND_INSECURE|GRND_NONBLOCK)) == 0);
        KASSERT((flags & (GRND_RANDOM|GRND_INSECURE)) !=
            (GRND_RANDOM|GRND_INSECURE));

        /* Get a buffer for transfers.  */
        buf = kmem_alloc(RANDOM_BUFSIZE, KM_SLEEP);

        /*
         * Fast path: for short reads other than from /dev/random, if
         * seeded or if INSECURE, just draw from per-CPU cprng_strong.
         */
        if (uio->uio_resid <= RANDOM_BUFSIZE &&
            !ISSET(flags, GRND_RANDOM) &&
            (entropy_ready() || ISSET(flags, GRND_INSECURE))) {
                /* Generate data and transfer it out.  */
                cprng_strong(user_cprng, buf, uio->uio_resid, 0);
                error = uiomove(buf, uio->uio_resid, uio);
                goto out;
        }

        /*
         * Try to get a seed from the entropy pool.  Fail if we would
         * block.  If GRND_INSECURE, always return something even if it
         * is partial entropy; if !GRND_INSECURE, set ENTROPY_HARDFAIL
         * in order to tell entropy_extract not to bother drawing
         * anything from a partial pool if we can't get full entropy.
         */
        if (!ISSET(flags, GRND_NONBLOCK) && !ISSET(flags, GRND_INSECURE))
                extractflags |= ENTROPY_WAIT|ENTROPY_SIG;
        if (!ISSET(flags, GRND_INSECURE))
                extractflags |= ENTROPY_HARDFAIL;
        error = entropy_extract(seed, sizeof seed, extractflags);
        if (error && !ISSET(flags, GRND_INSECURE))
                goto out;

        /* Instantiate the DRBG.  */
        if (nist_hash_drbg_instantiate(&drbg, seed, sizeof seed, NULL, 0,
                NULL, 0))
                panic("nist_hash_drbg_instantiate");

        /* Promptly zero the seed.  */
        explicit_memset(seed, 0, sizeof seed);

        /* Generate data.  */
        error = 0;
        while (uio->uio_resid) {
                size_t n = MIN(uio->uio_resid, RANDOM_BUFSIZE);

                /*
                 * Clamp /dev/random output to the entropy capacity and
                 * seed size.  Programs can't rely on long reads.
                 */
                if (ISSET(flags, GRND_RANDOM)) {
                        n = MIN(n, ENTROPY_CAPACITY);
                        n = MIN(n, sizeof seed);
                        /*
                         * Guarantee never to return more than one
                         * buffer in this case to minimize bookkeeping.
                         */
                        CTASSERT(ENTROPY_CAPACITY <= RANDOM_BUFSIZE);
                        CTASSERT(sizeof seed <= RANDOM_BUFSIZE);
                }

                /*
                 * Try to generate a block of data, but if we've hit
                 * the DRBG reseed interval, reseed.
                 */
                if (nist_hash_drbg_generate(&drbg, buf, n, NULL, 0)) {
                        /*
                         * Get a fresh seed without blocking -- we have
                         * already generated some output so it is not
                         * useful to block.  This can fail only if the
                         * request is obscenely large, so it is OK for
                         * either /dev/random or /dev/urandom to fail:
                         * we make no promises about gigabyte-sized
                         * reads happening all at once.
                         */
                        error = entropy_extract(seed, sizeof seed,
                            ENTROPY_HARDFAIL);
                        if (error)
                                break;

                        /* Reseed and try again.  */
                        if (nist_hash_drbg_reseed(&drbg, seed, sizeof seed,
                                NULL, 0))
                                panic("nist_hash_drbg_reseed");

                        /* Promptly zero the seed.  */
                        explicit_memset(seed, 0, sizeof seed);

                        /* If it fails now, that's a bug.  */
                        if (nist_hash_drbg_generate(&drbg, buf, n, NULL, 0))
                                panic("nist_hash_drbg_generate");
                }

                /* Transfer n bytes out.  */
                error = uiomove(buf, n, uio);
                if (error)
                        break;

                /*
                 * If this is /dev/random, stop here, return what we
                 * have, and force the next read to reseed.  Programs
                 * can't rely on /dev/random for long reads.
                 */
                if (ISSET(flags, GRND_RANDOM)) {
                        error = 0;
                        break;
                }

                /* Now's a good time to yield if needed.  */
                preempt_point();

                /* Check for interruption after at least 256 bytes.  */
                CTASSERT(RANDOM_BUFSIZE >= 256);
                if (__predict_false(curlwp->l_flag & LW_PENDSIG) &&
                    sigispending(curlwp, 0)) {
                        error = EINTR;
                        break;
                }
        }

out:        /* Zero the buffer and free it.  */
        explicit_memset(buf, 0, RANDOM_BUFSIZE);
        kmem_free(buf, RANDOM_BUFSIZE);

        return error;
}

int
sys_getrandom(struct lwp *l, const struct sys_getrandom_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(void *)        buf;
                syscallarg(size_t)        buflen;
                syscallarg(unsigned)        flags;
        } */
        void *buf = SCARG(uap, buf);
        size_t buflen = SCARG(uap, buflen);
        int flags = SCARG(uap, flags);
        int error;

        /* Set up an iov and uio to read into the user's buffer.  */
        struct iovec iov = { .iov_base = buf, .iov_len = buflen };
        struct uio uio = {
                .uio_iov = &iov,
                .uio_iovcnt = 1,
                .uio_offset = 0,
                .uio_resid = buflen,
                .uio_rw = UIO_READ,
                .uio_vmspace = curproc->p_vmspace,
        };

        /* Validate the flags.  */
        if (flags & ~(GRND_RANDOM|GRND_INSECURE|GRND_NONBLOCK)) {
                /* Unknown flags.  */
                error = EINVAL;
                goto out;
        }
        if ((flags & (GRND_RANDOM|GRND_INSECURE)) ==
            (GRND_RANDOM|GRND_INSECURE)) {
                /* Nonsensical combination.  */
                error = EINVAL;
                goto out;
        }

        /* Do it.  */
        error = dogetrandom(&uio, flags);

out:        /*
         * If we transferred anything, return the number of bytes
         * transferred and suppress error; otherwise return the error.
         */
        *retval = buflen - uio.uio_resid;
        if (*retval)
                error = 0;
        return error;
}






















































































































































    1 



















































   10 




    1 
















    1 




























    1 











    1 











    1 



    1 











    4 

    2 


    2 





    2 

    3 







    9 



































































  137 























    1 









    1 





























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
/*        $NetBSD: sysmon_wdog.c,v 1.30 2021/12/31 11:05:41 riastradh Exp $        */

/*-
 * Copyright (c) 2000 Zembu Labs, Inc.
 * All rights reserved.
 *
 * Author: Jason R. Thorpe <thorpej@zembu.com>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by Zembu Labs, Inc.
 * 4. Neither the name of Zembu Labs nor the names of its employees may
 *    be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ZEMBU LABS, INC. ``AS IS'' AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WAR-
 * RANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DIS-
 * CLAIMED.  IN NO EVENT SHALL ZEMBU LABS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Watchdog timer framework for sysmon.  Hardware (and software)
 * watchdog timers can register themselves here to provide a
 * watchdog function, which provides an abstract interface to the
 * user.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sysmon_wdog.c,v 1.30 2021/12/31 11:05:41 riastradh Exp $");

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/errno.h>
#include <sys/fcntl.h>
#include <sys/condvar.h>
#include <sys/mutex.h>
#include <sys/callout.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/module.h>
#include <sys/once.h>

#include <dev/sysmon/sysmonvar.h>

static LIST_HEAD(, sysmon_wdog) sysmon_wdog_list =
    LIST_HEAD_INITIALIZER(&sysmon_wdog_list);
static int sysmon_wdog_count;
static kmutex_t sysmon_wdog_list_mtx, sysmon_wdog_mtx;
static kcondvar_t sysmon_wdog_cv;
static struct sysmon_wdog *sysmon_armed_wdog;
static callout_t sysmon_wdog_callout;
static void *sysmon_wdog_sdhook;
static void *sysmon_wdog_cphook;

struct sysmon_wdog *sysmon_wdog_find(const char *);
void        sysmon_wdog_release(struct sysmon_wdog *);
int        sysmon_wdog_setmode(struct sysmon_wdog *, int, u_int);
void        sysmon_wdog_ktickle(void *);
void        sysmon_wdog_critpoll(void *);
void        sysmon_wdog_shutdown(void *);
void        sysmon_wdog_ref(struct sysmon_wdog *);

static struct sysmon_opvec sysmon_wdog_opvec = {
        sysmonopen_wdog, sysmonclose_wdog, sysmonioctl_wdog,
        NULL, NULL, NULL
};

MODULE(MODULE_CLASS_DRIVER, sysmon_wdog, "sysmon");

ONCE_DECL(once_wdog);

static int
wdog_preinit(void)
{

        mutex_init(&sysmon_wdog_list_mtx, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&sysmon_wdog_mtx, MUTEX_DEFAULT, IPL_SOFTCLOCK);
        cv_init(&sysmon_wdog_cv, "wdogref");
        callout_init(&sysmon_wdog_callout, 0);

        return 0;
}

int
sysmon_wdog_init(void)
{
        int error;

        (void)RUN_ONCE(&once_wdog, wdog_preinit);

        sysmon_wdog_sdhook = shutdownhook_establish(sysmon_wdog_shutdown, NULL);
        if (sysmon_wdog_sdhook == NULL)
                printf("WARNING: unable to register watchdog shutdown hook\n");
        sysmon_wdog_cphook = critpollhook_establish(sysmon_wdog_critpoll, NULL);
        if (sysmon_wdog_cphook == NULL)
                printf("WARNING: unable to register watchdog critpoll hook\n");

        error = sysmon_attach_minor(SYSMON_MINOR_WDOG, &sysmon_wdog_opvec);

        return error;
}

int
sysmon_wdog_fini(void)
{
        int error;

        if ( ! LIST_EMPTY(&sysmon_wdog_list))
                return EBUSY;

        error = sysmon_attach_minor(SYSMON_MINOR_WDOG, NULL);

        if (error == 0) {
                callout_destroy(&sysmon_wdog_callout);
                critpollhook_disestablish(sysmon_wdog_cphook);
                shutdownhook_disestablish(sysmon_wdog_sdhook);
                cv_destroy(&sysmon_wdog_cv);
                mutex_destroy(&sysmon_wdog_mtx);
                mutex_destroy(&sysmon_wdog_list_mtx);
        }

        return error;
}

/*
 * sysmonopen_wdog:
 *
 *        Open the system monitor device.
 */
int
sysmonopen_wdog(dev_t dev, int flag, int mode, struct lwp *l)
{

        return 0;
}

/*
 * sysmonclose_wdog:
 *
 *        Close the system monitor device.
 */
int
sysmonclose_wdog(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct sysmon_wdog *smw;
        int error = 0;

        /*
         * If this is the last close, and there is a watchdog
         * running in UTICKLE mode, we need to disable it,
         * otherwise the system will reset in short order.
         *
         * XXX Maybe we should just go into KTICKLE mode?
         */
        mutex_enter(&sysmon_wdog_mtx);
        if ((smw = sysmon_armed_wdog) != NULL) {
                if ((smw->smw_mode & WDOG_MODE_MASK) == WDOG_MODE_UTICKLE) {
                        error = sysmon_wdog_setmode(smw,
                            WDOG_MODE_DISARMED, smw->smw_period);
                        if (error) {
                                printf("WARNING: UNABLE TO DISARM "
                                    "WATCHDOG %s ON CLOSE!\n",
                                    smw->smw_name);
                                /*
                                 * ...we will probably reboot soon.
                                 */
                        }
                }
        }
        mutex_exit(&sysmon_wdog_mtx);

        return error;
}

/*
 * sysmonioctl_wdog:
 *
 *        Perform a watchdog control request.
 */
int
sysmonioctl_wdog(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        struct sysmon_wdog *smw;
        int error = 0;

        switch (cmd) {
        case WDOGIOC_GMODE:
            {
                struct wdog_mode *wm = (void *) data;

                wm->wm_name[sizeof(wm->wm_name) - 1] = '\0';
                smw = sysmon_wdog_find(wm->wm_name);
                if (smw == NULL) {
                        error = ESRCH;
                        break;
                }

                wm->wm_mode = smw->smw_mode;
                wm->wm_period = smw->smw_period;
                sysmon_wdog_release(smw);
                break;
            }

        case WDOGIOC_SMODE:
            {
                struct wdog_mode *wm = (void *) data;

                if ((flag & FWRITE) == 0) {
                        error = EPERM;
                        break;
                }

                wm->wm_name[sizeof(wm->wm_name) - 1] = '\0';
                smw = sysmon_wdog_find(wm->wm_name);
                if (smw == NULL) {
                        error = ESRCH;
                        break;
                }

                if (wm->wm_mode & ~(WDOG_MODE_MASK|WDOG_FEATURE_MASK))
                        error = EINVAL;
                else {
                        mutex_enter(&sysmon_wdog_mtx);
                        error = sysmon_wdog_setmode(smw, wm->wm_mode,
                            wm->wm_period);
                        mutex_exit(&sysmon_wdog_mtx);
                }

                sysmon_wdog_release(smw);
                break;
            }

        case WDOGIOC_WHICH:
            {
                struct wdog_mode *wm = (void *) data;

                mutex_enter(&sysmon_wdog_mtx);
                if ((smw = sysmon_armed_wdog) != NULL) {
                        strcpy(wm->wm_name, smw->smw_name);
                        wm->wm_mode = smw->smw_mode;
                        wm->wm_period = smw->smw_period;
                } else
                        error = ESRCH;
                mutex_exit(&sysmon_wdog_mtx);
                break;
            }

        case WDOGIOC_TICKLE:
                if ((flag & FWRITE) == 0) {
                        error = EPERM;
                        break;
                }

                mutex_enter(&sysmon_wdog_mtx);
                if ((smw = sysmon_armed_wdog) != NULL) {
                        error = (*smw->smw_tickle)(smw);
                        if (error == 0)
                                smw->smw_tickler = l->l_proc->p_pid;
                } else
                        error = ESRCH;
                mutex_exit(&sysmon_wdog_mtx);
                break;

        case WDOGIOC_GTICKLER:
                if ((smw = sysmon_armed_wdog) != NULL)
                        *(pid_t *)data = smw->smw_tickler;
                else
                        error = ESRCH;
                break;

        case WDOGIOC_GWDOGS:
            {
                struct wdog_conf *wc = (void *) data;
                char *cp;
                int i;

                mutex_enter(&sysmon_wdog_list_mtx);
                if (wc->wc_names == NULL)
                        wc->wc_count = sysmon_wdog_count;
                else {
                        for (i = 0, cp = wc->wc_names,
                               smw = LIST_FIRST(&sysmon_wdog_list);
                             i < sysmon_wdog_count && smw != NULL && error == 0;
                             i++, cp += WDOG_NAMESIZE,
                               smw = LIST_NEXT(smw, smw_list))
                                error = copyout(smw->smw_name, cp,
                                    strlen(smw->smw_name) + 1);
                        wc->wc_count = i;
                }
                mutex_exit(&sysmon_wdog_list_mtx);
                break;
            }

        default:
                error = ENOTTY;
        }

        return error;
}

/*
 * sysmon_wdog_register:
 *
 *        Register a watchdog device.
 */
int
sysmon_wdog_register(struct sysmon_wdog *smw)
{
        struct sysmon_wdog *lsmw;
        int error = 0;

        (void)RUN_ONCE(&once_wdog, wdog_preinit);

        mutex_enter(&sysmon_wdog_list_mtx);

        LIST_FOREACH(lsmw, &sysmon_wdog_list, smw_list) {
                if (strcmp(lsmw->smw_name, smw->smw_name) == 0) {
                        error = EEXIST;
                        goto out;
                }
        }

        smw->smw_mode = WDOG_MODE_DISARMED;
        smw->smw_tickler = (pid_t) -1;
        smw->smw_refcnt = 0;
        sysmon_wdog_count++;
        LIST_INSERT_HEAD(&sysmon_wdog_list, smw, smw_list);

 out:
        mutex_exit(&sysmon_wdog_list_mtx);
        return error;
}

/*
 * sysmon_wdog_unregister:
 *
 *        Unregister a watchdog device.
 */
int
sysmon_wdog_unregister(struct sysmon_wdog *smw)
{
        int rc = 0;

        mutex_enter(&sysmon_wdog_list_mtx);
        while (smw->smw_refcnt > 0 && rc == 0) {
                aprint_debug("%s: %d users remain\n", smw->smw_name,
                    smw->smw_refcnt);
                rc = cv_wait_sig(&sysmon_wdog_cv, &sysmon_wdog_list_mtx);
        }
        if (rc == 0) {
                sysmon_wdog_count--;
                LIST_REMOVE(smw, smw_list);
        }
        mutex_exit(&sysmon_wdog_list_mtx);
        return rc;
}

/*
 * sysmon_wdog_critpoll:
 *
 *        Perform critical operations during long polling periods
 */
void
sysmon_wdog_critpoll(void *arg)
{
        struct sysmon_wdog *smw = sysmon_armed_wdog;

        if (smw == NULL)
                return;

        if ((smw->smw_mode & WDOG_MODE_MASK) == WDOG_MODE_KTICKLE) {
                if ((*smw->smw_tickle)(smw) != 0) {
                        printf("WARNING: KERNEL TICKLE OF WATCHDOG %s "
                            "FAILED!\n", smw->smw_name);
                }
        }
}

/*
 * sysmon_wdog_find:
 *
 *        Find a watchdog device.  We increase the reference
 *        count on a match.
 */
struct sysmon_wdog *
sysmon_wdog_find(const char *name)
{
        struct sysmon_wdog *smw;

        mutex_enter(&sysmon_wdog_list_mtx);

        LIST_FOREACH(smw, &sysmon_wdog_list, smw_list) {
                if (strcmp(smw->smw_name, name) == 0)
                        break;
        }

        if (smw != NULL)
                smw->smw_refcnt++;

        mutex_exit(&sysmon_wdog_list_mtx);
        return smw;
}

/*
 * sysmon_wdog_release:
 *
 *        Release a watchdog device.
 */
void
sysmon_wdog_release(struct sysmon_wdog *smw)
{

        mutex_enter(&sysmon_wdog_list_mtx);
        KASSERT(smw->smw_refcnt != 0);
        smw->smw_refcnt--;
        cv_signal(&sysmon_wdog_cv);
        mutex_exit(&sysmon_wdog_list_mtx);
}

void
sysmon_wdog_ref(struct sysmon_wdog *smw)
{
        mutex_enter(&sysmon_wdog_list_mtx);
        smw->smw_refcnt++;
        mutex_exit(&sysmon_wdog_list_mtx);
}

/*
 * sysmon_wdog_setmode:
 *
 *        Set the mode of a watchdog device.
 */
int
sysmon_wdog_setmode(struct sysmon_wdog *smw, int mode, u_int period)
{
        u_int operiod = smw->smw_period;
        int omode = smw->smw_mode;
        int error = 0;

        smw->smw_period = period;
        smw->smw_mode = mode;

        switch (mode & WDOG_MODE_MASK) {
        case WDOG_MODE_DISARMED:
                if (smw != sysmon_armed_wdog) {
                        error = EINVAL;
                        goto out;
                }
                break;

        case WDOG_MODE_KTICKLE:
        case WDOG_MODE_UTICKLE:
        case WDOG_MODE_ETICKLE:
                if (sysmon_armed_wdog != NULL) {
                        error = EBUSY;
                        goto out;
                }
                break;

        default:
                error = EINVAL;
                goto out;
        }

        error = (*smw->smw_setmode)(smw);

 out:
        if (error) {
                smw->smw_period = operiod;
                smw->smw_mode = omode;
        } else {
                if ((mode & WDOG_MODE_MASK) == WDOG_MODE_DISARMED) {
                        sysmon_armed_wdog = NULL;
                        smw->smw_tickler = (pid_t) -1;
                        sysmon_wdog_release(smw);
                        if ((omode & WDOG_MODE_MASK) == WDOG_MODE_KTICKLE)
                                callout_stop(&sysmon_wdog_callout);
                } else {
                        sysmon_armed_wdog = smw;
                        sysmon_wdog_ref(smw);
                        if ((mode & WDOG_MODE_MASK) == WDOG_MODE_KTICKLE) {
                                callout_reset(&sysmon_wdog_callout,
                                    WDOG_PERIOD_TO_TICKS(smw->smw_period) / 2,
                                    sysmon_wdog_ktickle, NULL);
                        }
                }
        }
        return error;
}

/*
 * sysmon_wdog_ktickle:
 *
 *        Kernel watchdog tickle routine.
 */
void
sysmon_wdog_ktickle(void *arg)
{
        struct sysmon_wdog *smw;

        mutex_enter(&sysmon_wdog_mtx);
        if ((smw = sysmon_armed_wdog) != NULL) {
                if ((*smw->smw_tickle)(smw) != 0) {
                        printf("WARNING: KERNEL TICKLE OF WATCHDOG %s "
                            "FAILED!\n", smw->smw_name);
                        /*
                         * ...we will probably reboot soon.
                         */
                }
                callout_reset(&sysmon_wdog_callout,
                    WDOG_PERIOD_TO_TICKS(smw->smw_period) / 2,
                    sysmon_wdog_ktickle, NULL);
        }
        mutex_exit(&sysmon_wdog_mtx);
}

/*
 * sysmon_wdog_shutdown:
 *
 *        Perform shutdown-time operations.
 */
void
sysmon_wdog_shutdown(void *arg)
{
        struct sysmon_wdog *smw;

        /*
         * XXX Locking here?  I don't think it's necessary.
         */

        if ((smw = sysmon_armed_wdog) != NULL) {
                if (sysmon_wdog_setmode(smw, WDOG_MODE_DISARMED,
                    smw->smw_period))
                        printf("WARNING: FAILED TO SHUTDOWN WATCHDOG %s!\n",
                            smw->smw_name);
        }
}

static int
sysmon_wdog_modcmd(modcmd_t cmd, void *arg)
{
        int ret;

        switch (cmd) {
        case MODULE_CMD_INIT:
                ret = sysmon_wdog_init();
                break;
        case MODULE_CMD_FINI:
                ret = sysmon_wdog_fini();
                break;
        case MODULE_CMD_STAT:
        default:
                ret = ENOTTY;
        }

        return ret;
}

















































































































































































 2083 
 2082 

























 1360 


 1359 







 1906 
 1782 

 1781 
















 1907 

 1906 


 1907 







 1908 
 1010 

 1905 
















 2070 
 2071 
 2072 

 2069 


 2068 

 1958 




 2071 
 2068 




 1977 


 1974 









 1977 
 1908 




 1835 


  435 

















  993 





  995 
  503 








  838 


  831 












  830 




  640 








  837 









  838 


  838 







  541 
  640 



   37 

















  994 


  993 

  995 
















  559 


  559 


  555 









 1332 


 1334 



 1333 



 1334 



































 1115 

 1114 







 1115 









 1113 







 1114 


 1115 


 1115 

  724 
  610 






  723 








 1114 

 1110 
 1113 
















  876 


  877 













    2 





















  877 



  807 






  808 
  506 











  285 



  285 







  285 












































































































































































































































































 1790 











 1790 
 1787 
 1336 









 1005 


 1003 

    4 



 1003 


 1004 





















 1005 

  839 
























































































































 2072 



 2068 




 2069 


 2074 




































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
/*        $NetBSD: kern_runq.c,v 1.69 2020/05/23 21:24:41 ad Exp $        */

/*-
 * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org>
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.69 2020/05/23 21:24:41 ad Exp $");

#include "opt_dtrace.h"

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/bitops.h>
#include <sys/cpu.h>
#include <sys/idle.h>
#include <sys/intr.h>
#include <sys/kmem.h>
#include <sys/lwp.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/pset.h>
#include <sys/sched.h>
#include <sys/syscallargs.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/types.h>
#include <sys/evcnt.h>
#include <sys/atomic.h>

/*
 * Bits per map.
 */
#define        BITMAP_BITS        (32)
#define        BITMAP_SHIFT        (5)
#define        BITMAP_MSB        (0x80000000U)
#define        BITMAP_MASK        (BITMAP_BITS - 1)

const int        schedppq = 1;

static void        *sched_getrq(struct schedstate_percpu *, const pri_t);
#ifdef MULTIPROCESSOR
static lwp_t *        sched_catchlwp(struct cpu_info *);
#endif

/*
 * Preemption control.
 */
#ifdef __HAVE_PREEMPTION
# ifdef DEBUG
int                sched_kpreempt_pri = 0;
# else
int                sched_kpreempt_pri = PRI_USER_RT;
# endif
#else
int                sched_kpreempt_pri = 1000;
#endif

/*
 * Migration and balancing.
 */
static u_int        cacheht_time;        /* Cache hotness time */
static u_int        min_catch;        /* Minimal LWP count for catching */
static u_int        skim_interval;        /* Rate limit for stealing LWPs */

#ifdef KDTRACE_HOOKS
struct lwp *curthread;
#endif

void
runq_init(void)
{

        /* Pulling from remote packages, LWP must not have run for 10ms. */
        cacheht_time = 10;

        /* Minimal count of LWPs for catching */
        min_catch = 1;

        /* Steal from other CPUs at most every 10ms. */
        skim_interval = 10;
}

void
sched_cpuattach(struct cpu_info *ci)
{
        struct schedstate_percpu *spc;
        size_t size;
        void *p;
        u_int i;

        spc = &ci->ci_schedstate;
        spc->spc_nextpkg = ci;

        if (spc->spc_lwplock == NULL) {
                spc->spc_lwplock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
        }
        if (ci == lwp0.l_cpu) {
                /* Initialize the scheduler structure of the primary LWP */
                lwp0.l_mutex = spc->spc_lwplock;
        }
        if (spc->spc_mutex != NULL) {
                /* Already initialized. */
                return;
        }

        /* Allocate the run queue */
        size = roundup2(sizeof(spc->spc_queue[0]) * PRI_COUNT, coherency_unit) +
            coherency_unit;
        p = kmem_alloc(size, KM_SLEEP);
        spc->spc_queue = (void *)roundup2((uintptr_t)p, coherency_unit);

        /* Initialize run queues */
        spc->spc_mutex = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
        for (i = 0; i < PRI_COUNT; i++)
                TAILQ_INIT(&spc->spc_queue[i]);
}

/*
 * Control of the runqueue.
 */
static inline void *
sched_getrq(struct schedstate_percpu *spc, const pri_t prio)
{

        KASSERT(prio < PRI_COUNT);
        return &spc->spc_queue[prio];
}

/*
 * Put an LWP onto a run queue.  The LWP must be locked by spc_mutex for
 * l_cpu.
 */
void
sched_enqueue(struct lwp *l)
{
        struct schedstate_percpu *spc;
        TAILQ_HEAD(, lwp) *q_head;
        const pri_t eprio = lwp_eprio(l);
        struct cpu_info *ci;

        ci = l->l_cpu;
        spc = &ci->ci_schedstate;
        KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));

        /* Enqueue the thread */
        q_head = sched_getrq(spc, eprio);
        if (TAILQ_EMPTY(q_head)) {
                u_int i;
                uint32_t q;

                /* Mark bit */
                i = eprio >> BITMAP_SHIFT;
                q = BITMAP_MSB >> (eprio & BITMAP_MASK);
                KASSERT((spc->spc_bitmap[i] & q) == 0);
                spc->spc_bitmap[i] |= q;
        }

        /*
         * Determine run queue position according to POSIX.  XXX Explicitly
         * lowering a thread's priority with pthread_setschedparam() is not
         * handled.
         */
        if ((l->l_pflag & LP_PREEMPTING) != 0) {
                switch (l->l_class) {
                case SCHED_OTHER:
                        TAILQ_INSERT_TAIL(q_head, l, l_runq);
                        break;
                case SCHED_FIFO:
                        TAILQ_INSERT_HEAD(q_head, l, l_runq);
                        break;
                case SCHED_RR:
                        if (getticks() - l->l_rticks >= sched_rrticks) {
                                TAILQ_INSERT_TAIL(q_head, l, l_runq);
                        } else {
                                TAILQ_INSERT_HEAD(q_head, l, l_runq);
                        }
                        break;
                default: /* SCHED_OTHER */
                        panic("sched_enqueue: LWP %p has class %d\n",
                            l, l->l_class);
                }
        } else {
                TAILQ_INSERT_TAIL(q_head, l, l_runq);
        }
        spc->spc_flags &= ~SPCF_IDLE;
        spc->spc_count++;
        if ((l->l_pflag & LP_BOUND) == 0) {
                atomic_store_relaxed(&spc->spc_mcount,
                    atomic_load_relaxed(&spc->spc_mcount) + 1);
        }

        /*
         * Update the value of highest priority in the runqueue,
         * if priority of this thread is higher.
         */
        if (eprio > spc->spc_maxpriority)
                spc->spc_maxpriority = eprio;

        sched_newts(l);
}

/*
 * Remove and LWP from the run queue it's on.  The LWP must be in state
 * LSRUN.
 */
void
sched_dequeue(struct lwp *l)
{
        TAILQ_HEAD(, lwp) *q_head;
        struct schedstate_percpu *spc;
        const pri_t eprio = lwp_eprio(l);

        spc = &l->l_cpu->ci_schedstate;

        KASSERT(lwp_locked(l, spc->spc_mutex));
        KASSERT(eprio <= spc->spc_maxpriority);
        KASSERT(spc->spc_bitmap[eprio >> BITMAP_SHIFT] != 0);
        KASSERT(spc->spc_count > 0);

        if (spc->spc_migrating == l)
                spc->spc_migrating = NULL;

        spc->spc_count--;
        if ((l->l_pflag & LP_BOUND) == 0) {
                atomic_store_relaxed(&spc->spc_mcount,
                    atomic_load_relaxed(&spc->spc_mcount) - 1);
        }

        q_head = sched_getrq(spc, eprio);
        TAILQ_REMOVE(q_head, l, l_runq);
        if (TAILQ_EMPTY(q_head)) {
                u_int i;
                uint32_t q;

                /* Unmark bit */
                i = eprio >> BITMAP_SHIFT;
                q = BITMAP_MSB >> (eprio & BITMAP_MASK);
                KASSERT((spc->spc_bitmap[i] & q) != 0);
                spc->spc_bitmap[i] &= ~q;

                /*
                 * Update the value of highest priority in the runqueue, in a
                 * case it was a last thread in the queue of highest priority.
                 */
                if (eprio != spc->spc_maxpriority)
                        return;

                do {
                        if (spc->spc_bitmap[i] != 0) {
                                q = ffs(spc->spc_bitmap[i]);
                                spc->spc_maxpriority =
                                    (i << BITMAP_SHIFT) + (BITMAP_BITS - q);
                                return;
                        }
                } while (i--);

                /* If not found - set the lowest value */
                spc->spc_maxpriority = 0;
        }
}

/*
 * Cause a preemption on the given CPU, if the priority "pri" is higher
 * priority than the running LWP.  If "unlock" is specified, and ideally it
 * will be for concurrency reasons, spc_mutex will be dropped before return.
 */
void
sched_resched_cpu(struct cpu_info *ci, pri_t pri, bool unlock)
{
        struct schedstate_percpu *spc;
        u_int o, n, f;
        lwp_t *l;

        spc = &ci->ci_schedstate;

        KASSERT(mutex_owned(spc->spc_mutex));

        /*
         * If the priority level we're evaluating wouldn't cause a new LWP
         * to be run on the CPU, then we have nothing to do.
         */
        if (pri <= spc->spc_curpriority || !mp_online) {
                if (__predict_true(unlock)) {
                        spc_unlock(ci);
                }
                return;
        }

        /*
         * Figure out what kind of preemption we should do.
         */        
        l = ci->ci_onproc;
        if ((l->l_flag & LW_IDLE) != 0) {
                f = RESCHED_IDLE | RESCHED_UPREEMPT;
        } else if (pri >= sched_kpreempt_pri && (l->l_pflag & LP_INTR) == 0) {
                /* We can't currently preempt softints - should be able to. */
#ifdef __HAVE_PREEMPTION
                f = RESCHED_KPREEMPT;
#else
                /* Leave door open for test: set kpreempt_pri with sysctl. */
                f = RESCHED_UPREEMPT;
#endif
                /*
                 * l_dopreempt must be set with the CPU locked to sync with
                 * mi_switch().  It must also be set with an atomic to sync
                 * with kpreempt().
                 */
                atomic_or_uint(&l->l_dopreempt, DOPREEMPT_ACTIVE);
        } else {
                f = RESCHED_UPREEMPT;
        }
        if (ci != curcpu()) {
                f |= RESCHED_REMOTE;
        }

        /*
         * Things can start as soon as ci_want_resched is touched: x86 has
         * an instruction that monitors the memory cell it's in.  Drop the
         * schedstate lock in advance, otherwise the remote CPU can awaken
         * and immediately block on the lock.
         */
        if (__predict_true(unlock)) {
                spc_unlock(ci);
        }

        /*
         * The caller almost always has a second scheduler lock held: either
         * the running LWP lock (spc_lwplock), or a sleep queue lock.  That
         * keeps preemption disabled, which among other things ensures all
         * LWPs involved won't be freed while we're here (see lwp_dtor()).
         */
         KASSERT(kpreempt_disabled());

        for (o = 0;; o = n) {
                n = atomic_cas_uint(&ci->ci_want_resched, o, o | f);
                if (__predict_true(o == n)) {
                        /*
                         * We're the first to set a resched on the CPU.  Try
                         * to avoid causing a needless trip through trap()
                         * to handle an AST fault, if it's known the LWP
                         * will either block or go through userret() soon.
                         */
                        if (l != curlwp || cpu_intr_p()) {
                                cpu_need_resched(ci, l, f);
                        }
                        break;
                }
                if (__predict_true(
                    (n & (RESCHED_KPREEMPT|RESCHED_UPREEMPT)) >=
                    (f & (RESCHED_KPREEMPT|RESCHED_UPREEMPT)))) {
                        /* Already in progress, nothing to do. */
                        break;
                }
        }
}

/*
 * Cause a preemption on the given CPU, if the priority of LWP "l" in state
 * LSRUN, is higher priority than the running LWP.  If "unlock" is
 * specified, and ideally it will be for concurrency reasons, spc_mutex will
 * be dropped before return.
 */
void
sched_resched_lwp(struct lwp *l, bool unlock)
{
        struct cpu_info *ci = l->l_cpu;

        KASSERT(lwp_locked(l, ci->ci_schedstate.spc_mutex));
        KASSERT(l->l_stat == LSRUN);

        sched_resched_cpu(ci, lwp_eprio(l), unlock);
}

/*
 * Migration and balancing.
 */

#ifdef MULTIPROCESSOR

/*
 * Estimate if LWP is cache-hot.
 */
static inline bool
lwp_cache_hot(const struct lwp *l)
{

        /* Leave new LWPs in peace, determination has already been made. */
        if (l->l_stat == LSIDL)
                return true;

        if (__predict_false(l->l_slptime != 0 || l->l_rticks == 0))
                return false;

        return (getticks() - l->l_rticks < mstohz(cacheht_time));
}

/*
 * Check if LWP can migrate to the chosen CPU.
 */
static inline bool
sched_migratable(const struct lwp *l, struct cpu_info *ci)
{
        const struct schedstate_percpu *spc = &ci->ci_schedstate;
        KASSERT(lwp_locked(__UNCONST(l), NULL));

        /* Is CPU offline? */
        if (__predict_false(spc->spc_flags & SPCF_OFFLINE))
                return false;

        /* Is affinity set? */
        if (__predict_false(l->l_affinity))
                return kcpuset_isset(l->l_affinity, cpu_index(ci));

        /* Is there a processor-set? */
        return (spc->spc_psid == l->l_psid);
}

/*
 * A small helper to do round robin through CPU packages.
 */
static struct cpu_info *
sched_nextpkg(void)
{
        struct schedstate_percpu *spc = &curcpu()->ci_schedstate;

        spc->spc_nextpkg = 
            spc->spc_nextpkg->ci_sibling[CPUREL_PACKAGE1ST];

        return spc->spc_nextpkg;
}

/*
 * Find a CPU to run LWP "l".  Look for the CPU with the lowest priority
 * thread.  In case of equal priority, prefer first class CPUs, and amongst
 * the remainder choose the CPU with the fewest runqueue entries.
 *
 * Begin the search in the CPU package which "pivot" is a member of.
 */
static struct cpu_info * __noinline 
sched_bestcpu(struct lwp *l, struct cpu_info *pivot)
{
        struct cpu_info *bestci, *curci, *outer;
        struct schedstate_percpu *bestspc, *curspc;
        pri_t bestpri, curpri;

        /*
         * If this fails (it shouldn't), run on the given CPU.  This also
         * gives us a weak preference for "pivot" to begin with.
         */
        bestci = pivot;
        bestspc = &bestci->ci_schedstate;
        if (sched_migratable(l, bestci)) {
                bestpri = MAX(bestspc->spc_curpriority,
                    bestspc->spc_maxpriority);
        } else {
                /* Invalidate the priority. */
                bestpri = PRI_COUNT;
        }

        /* In the outer loop scroll through all CPU packages. */
        pivot = pivot->ci_package1st;
        outer = pivot;
        do {
                /* In the inner loop scroll through all CPUs in package. */
                curci = outer;
                do {
                        if (!sched_migratable(l, curci)) {
                                continue;
                        }

                        curspc = &curci->ci_schedstate;

                        /* If this CPU is idle and 1st class, we're done. */
                        if ((curspc->spc_flags & (SPCF_IDLE | SPCF_1STCLASS)) ==
                            (SPCF_IDLE | SPCF_1STCLASS)) {
                                return curci;
                        }

                        curpri = MAX(curspc->spc_curpriority,
                            curspc->spc_maxpriority);

                        if (curpri > bestpri) {
                                continue;
                        }
                        if (curpri == bestpri) {
                                /* Prefer first class CPUs over others. */
                                if ((curspc->spc_flags & SPCF_1STCLASS) == 0 &&
                                    (bestspc->spc_flags & SPCF_1STCLASS) != 0) {
                                            continue;
                                }
                                /*
                                 * Pick the least busy CPU.  Make sure this is not
                                 * <=, otherwise it defeats the above preference.
                                 */
                                if (bestspc->spc_count < curspc->spc_count) {
                                        continue;
                                }
                        }

                        bestpri = curpri;
                        bestci = curci;
                        bestspc = curspc;

                } while (curci = curci->ci_sibling[CPUREL_PACKAGE],
                    curci != outer);
        } while (outer = outer->ci_sibling[CPUREL_PACKAGE1ST],
            outer != pivot);

        return bestci;
}

/*
 * Estimate the migration of LWP to the other CPU.
 * Take and return the CPU, if migration is needed.
 */
struct cpu_info *
sched_takecpu(struct lwp *l)
{
        struct schedstate_percpu *spc, *tspc;
        struct cpu_info *ci, *curci, *tci;
        pri_t eprio;
        int flags;

        KASSERT(lwp_locked(l, NULL));

        /* If thread is strictly bound, do not estimate other CPUs */
        ci = l->l_cpu;
        if (l->l_pflag & LP_BOUND)
                return ci;

        spc = &ci->ci_schedstate;
        eprio = lwp_eprio(l);

        /*
         * Handle new LWPs.  For vfork() with a timeshared child, make it
         * run on the same CPU as the parent if no other LWPs in queue. 
         * Otherwise scatter far and wide - try for an even distribution
         * across all CPU packages and CPUs.
         */
        if (l->l_stat == LSIDL) {
                if (curlwp->l_vforkwaiting && l->l_class == SCHED_OTHER) {
                        if (sched_migratable(l, curlwp->l_cpu) && eprio >
                            curlwp->l_cpu->ci_schedstate.spc_maxpriority) {
                                return curlwp->l_cpu;
                        }
                } else {
                        return sched_bestcpu(l, sched_nextpkg());
                }
                flags = SPCF_IDLE;
        } else {
                flags = SPCF_IDLE | SPCF_1STCLASS;
        }

        /*
         * Try to send the LWP back to the first CPU in the same core if
         * idle.  This keeps LWPs clustered in the run queues of 1st class
         * CPUs.  This implies stickiness.  If we didn't find a home for
         * a vfork() child above, try to use any SMT sibling to help out.
         */
        tci = ci;
        do {
                tspc = &tci->ci_schedstate;
                if ((tspc->spc_flags & flags) == flags &&
                    sched_migratable(l, tci)) {
                        return tci;
                }
                tci = tci->ci_sibling[CPUREL_CORE];
        } while (tci != ci);

        /*
         * Otherwise the LWP is "sticky", i.e.  generally preferring to stay
         * on the same CPU.
         */
        if (sched_migratable(l, ci) && (eprio > spc->spc_curpriority ||
            (lwp_cache_hot(l) && l->l_class == SCHED_OTHER))) {
                return ci;
        }

        /*
         * If the current CPU core is idle, run there and avoid the
         * expensive scan of CPUs below.
         */
        curci = curcpu();
        tci = curci;
        do {
                tspc = &tci->ci_schedstate;
                if ((tspc->spc_flags & flags) == flags &&
                    sched_migratable(l, tci)) {
                        return tci;
                }
                tci = tci->ci_sibling[CPUREL_CORE];
        } while (tci != curci);

        /*
         * Didn't find a new home above - happens infrequently.  Start the
         * search in last CPU package that the LWP ran in, but expand to
         * include the whole system if needed.
         */
        return sched_bestcpu(l, l->l_cpu);
}

/*
 * Tries to catch an LWP from the runqueue of other CPU.
 */
static struct lwp *
sched_catchlwp(struct cpu_info *ci)
{
        struct cpu_info *curci = curcpu();
        struct schedstate_percpu *spc, *curspc;
        TAILQ_HEAD(, lwp) *q_head;
        struct lwp *l;
        bool gentle;

        curspc = &curci->ci_schedstate;
        spc = &ci->ci_schedstate;

        /*
         * Be more aggressive if this CPU is first class, and the other
         * is not.
         */
        gentle = ((curspc->spc_flags & SPCF_1STCLASS) == 0 ||
            (spc->spc_flags & SPCF_1STCLASS) != 0);

        if (atomic_load_relaxed(&spc->spc_mcount) < (gentle ? min_catch : 1) ||
            curspc->spc_psid != spc->spc_psid) {
                spc_unlock(ci);
                return NULL;
        }

        /* Take the highest priority thread */
        q_head = sched_getrq(spc, spc->spc_maxpriority);
        l = TAILQ_FIRST(q_head);

        for (;;) {
                /* Check the first and next result from the queue */
                if (l == NULL) {
                        break;
                }
                KASSERTMSG(l->l_stat == LSRUN, "%s l %p (%s) l_stat %d",
                    ci->ci_data.cpu_name,
                    l, (l->l_name ? l->l_name : l->l_proc->p_comm), l->l_stat);

                /* Look for threads, whose are allowed to migrate */
                if ((l->l_pflag & LP_BOUND) ||
                    (gentle && lwp_cache_hot(l)) ||
                    !sched_migratable(l, curci)) {
                        l = TAILQ_NEXT(l, l_runq);
                        /* XXX Gap: could walk down priority list. */
                        continue;
                }

                /* Grab the thread, and move to the local run queue */
                sched_dequeue(l);
                l->l_cpu = curci;
                lwp_unlock_to(l, curspc->spc_mutex);
                sched_enqueue(l);
                return l;
        }
        spc_unlock(ci);

        return l;
}

/*
 * Called from sched_idle() to handle migration.  Return the CPU that we
 * pushed the LWP to (may be NULL).
 */
static struct cpu_info *
sched_idle_migrate(void)
{
        struct cpu_info *ci = curcpu(), *tci = NULL;
        struct schedstate_percpu *spc, *tspc;
        bool dlock = false;

        spc = &ci->ci_schedstate;
        spc_lock(ci);
        for (;;) {
                struct lwp *l;

                l = spc->spc_migrating;
                if (l == NULL)
                        break;

                /*
                 * If second attempt, and target CPU has changed,
                 * drop the old lock.
                 */
                if (dlock == true && tci != l->l_target_cpu) {
                        KASSERT(tci != NULL);
                        spc_unlock(tci);
                        dlock = false;
                }

                /*
                 * Nothing to do if destination has changed to the
                 * local CPU, or migration was done by other CPU.
                 */
                tci = l->l_target_cpu;
                if (tci == NULL || tci == ci) {
                        spc->spc_migrating = NULL;
                        l->l_target_cpu = NULL;
                        break;
                }
                tspc = &tci->ci_schedstate;

                /*
                 * Double-lock the runqueues.
                 * We do that only once.
                 */
                if (dlock == false) {
                        dlock = true;
                        if (ci < tci) {
                                spc_lock(tci);
                        } else if (!mutex_tryenter(tspc->spc_mutex)) {
                                spc_unlock(ci);
                                spc_lock(tci);
                                spc_lock(ci);
                                /* Check the situation again.. */
                                continue;
                        }
                }

                /* Migrate the thread */
                KASSERT(l->l_stat == LSRUN);
                spc->spc_migrating = NULL;
                l->l_target_cpu = NULL;
                sched_dequeue(l);
                l->l_cpu = tci;
                lwp_setlock(l, tspc->spc_mutex);
                sched_enqueue(l);
                sched_resched_lwp(l, true);
                /* tci now unlocked */
                spc_unlock(ci);
                return tci;
        }
        if (dlock == true) {
                KASSERT(tci != NULL);
                spc_unlock(tci);
        }
        spc_unlock(ci);
        return NULL;
}

/*
 * Try to steal an LWP from "tci".
 */
static bool
sched_steal(struct cpu_info *ci, struct cpu_info *tci)
{
        struct schedstate_percpu *spc, *tspc;
        lwp_t *l;

        spc = &ci->ci_schedstate;
        tspc = &tci->ci_schedstate;
        if (atomic_load_relaxed(&tspc->spc_mcount) != 0 &&
            spc->spc_psid == tspc->spc_psid) {
                spc_dlock(ci, tci);
                l = sched_catchlwp(tci);
                spc_unlock(ci);
                if (l != NULL) {
                        return true;
                }
        }
        return false;
}

/*
 * Called from each CPU's idle loop.
 */
void
sched_idle(void)
{
        struct cpu_info *ci, *inner, *outer, *first, *tci, *mci;
        struct schedstate_percpu *spc, *tspc;
        struct lwp *l;

        ci = curcpu();
        spc = &ci->ci_schedstate;
        tci = NULL;
        mci = NULL;

        /*
         * Handle LWP migrations off this CPU to another.  If there a is
         * migration to do then remember the CPU the LWP was sent to, and
         * don't steal the LWP back from that CPU below.
         */
        if (spc->spc_migrating != NULL) {
                mci = sched_idle_migrate();
        }

        /* If this CPU is offline, or we have an LWP to run, we're done. */
        if ((spc->spc_flags & SPCF_OFFLINE) != 0 || spc->spc_count != 0) {
                return;
        }

        /* Deal with SMT. */
        if (ci->ci_nsibling[CPUREL_CORE] > 1) {
                /* Try to help our siblings out. */
                tci = ci->ci_sibling[CPUREL_CORE];
                while (tci != ci) {
                        if (tci != mci && sched_steal(ci, tci)) {
                                return;
                        }
                        tci = tci->ci_sibling[CPUREL_CORE];
                }
                /*
                 * If not the first SMT in the core, and in the default
                 * processor set, the search ends here.
                 */
                if ((spc->spc_flags & SPCF_1STCLASS) == 0 &&
                    spc->spc_psid == PS_NONE) {
                        return;
                }
        }

        /*
         * Find something to run, unless this CPU exceeded the rate limit. 
         * Start looking on the current package to maximise L2/L3 cache
         * locality.  Then expand to looking at the rest of the system.
         *
         * XXX Should probably look at 2nd class CPUs first, but they will
         * shed jobs via preempt() anyway.
         */
        if (spc->spc_nextskim > getticks()) {
                return;
        }
        spc->spc_nextskim = getticks() + mstohz(skim_interval);

        /* In the outer loop scroll through all CPU packages, starting here. */
        first = ci->ci_package1st;
        outer = first;
        do {
                /* In the inner loop scroll through all CPUs in package. */
                inner = outer;
                do {
                        /* Don't hit the locks unless needed. */
                        tspc = &inner->ci_schedstate;
                        if (ci == inner || ci == mci ||
                            spc->spc_psid != tspc->spc_psid ||
                            atomic_load_relaxed(&tspc->spc_mcount) < min_catch) {
                                continue;
                        }
                        spc_dlock(ci, inner);
                        l = sched_catchlwp(inner);
                        spc_unlock(ci);
                        if (l != NULL) {
                                /* Got it! */
                                return;
                        }
                } while (inner = inner->ci_sibling[CPUREL_PACKAGE],
                    inner != outer);
        } while (outer = outer->ci_sibling[CPUREL_PACKAGE1ST],
            outer != first);
}

/*
 * Called from mi_switch() when an LWP has been preempted / has yielded. 
 * The LWP is presently in the CPU's run queue.  Here we look for a better
 * CPU to teleport the LWP to; there may not be one.
 */
void
sched_preempted(struct lwp *l)
{
        const int flags = SPCF_IDLE | SPCF_1STCLASS;
        struct schedstate_percpu *tspc;
        struct cpu_info *ci, *tci;

        ci = l->l_cpu;
        tspc = &ci->ci_schedstate;

        KASSERT(tspc->spc_count >= 1);

        /*
         * Try to select another CPU if:
         *
         * - there is no migration pending already
         * - and this LWP is running on a 2nd class CPU
         * - or this LWP is a child of vfork() that has just done execve()
         */
        if (l->l_target_cpu != NULL ||
            ((tspc->spc_flags & SPCF_1STCLASS) != 0 &&
            (l->l_pflag & LP_TELEPORT) == 0)) {
                return;
        }

        /*
         * Fast path: if the first SMT in the core is idle, send it back
         * there, because the cache is shared (cheap) and we want all LWPs
         * to be clustered on 1st class CPUs (either running there or on
         * their runqueues).
         */
        tci = ci->ci_sibling[CPUREL_CORE];
        while (tci != ci) {
                tspc = &tci->ci_schedstate;
                if ((tspc->spc_flags & flags) == flags &&
                    sched_migratable(l, tci)) {
                            l->l_target_cpu = tci;
                        l->l_pflag &= ~LP_TELEPORT;
                            return;
                }
                tci = tci->ci_sibling[CPUREL_CORE];
        }

        if ((l->l_pflag & LP_TELEPORT) != 0) {
                /*
                 * A child of vfork(): now that the parent is released,
                 * scatter far and wide, to match the LSIDL distribution
                 * done in sched_takecpu().
                 */
                l->l_pflag &= ~LP_TELEPORT;
                tci = sched_bestcpu(l, sched_nextpkg());
                if (tci != ci) {
                        l->l_target_cpu = tci;
                }
        } else {
                /*
                 * Try to find a better CPU to take it, but don't move to
                 * another 2nd class CPU, and don't move to a non-idle CPU,
                 * because that would prevent SMT being used to maximise
                 * throughput.
                 *
                 * Search in the current CPU package in order to try and
                 * keep L2/L3 cache locality, but expand to include the
                 * whole system if needed.
                 */
                tci = sched_bestcpu(l, l->l_cpu);
                if (tci != ci &&
                    (tci->ci_schedstate.spc_flags & flags) == flags) {
                        l->l_target_cpu = tci;
                }
        }
}

/*
 * Called during execve() by a child of vfork().  Does two things:
 *
 * - If the parent has been awoken and put back on curcpu then give the
 *   CPU back to the parent.
 *
 * - If curlwp is not on a 1st class CPU then find somewhere else to run,
 *   since it dodged the distribution in sched_takecpu() when first set
 *   runnable.
 */
void
sched_vforkexec(struct lwp *l, bool samecpu)
{

        KASSERT(l == curlwp);
        if ((samecpu && ncpu > 1) ||
            (l->l_cpu->ci_schedstate.spc_flags & SPCF_1STCLASS) == 0) {
                l->l_pflag |= LP_TELEPORT;
                preempt();
        }
}

#else

/*
 * stubs for !MULTIPROCESSOR
 */

struct cpu_info *
sched_takecpu(struct lwp *l)
{

        return l->l_cpu;
}

void
sched_idle(void)
{

}

void
sched_preempted(struct lwp *l)
{

}

void
sched_vforkexec(struct lwp *l, bool samecpu)
{

        KASSERT(l == curlwp);
}

#endif        /* MULTIPROCESSOR */

/*
 * Scheduling statistics and balancing.
 */
void
sched_lwp_stats(struct lwp *l)
{
        int batch;

        KASSERT(lwp_locked(l, NULL));

        /* Update sleep time */
        if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
            l->l_stat == LSSUSPENDED)
                l->l_slptime++;

        /*
         * Set that thread is more CPU-bound, if sum of run time exceeds the
         * sum of sleep time.  Check if thread is CPU-bound a first time.
         */
        batch = (l->l_rticksum > l->l_slpticksum);
        if (batch != 0) {
                if ((l->l_flag & LW_BATCH) == 0)
                        batch = 0;
                l->l_flag |= LW_BATCH;
        } else
                l->l_flag &= ~LW_BATCH;

        /* Reset the time sums */
        l->l_slpticksum = 0;
        l->l_rticksum = 0;

        /* Scheduler-specific hook */
        sched_pstats_hook(l, batch);
#ifdef KDTRACE_HOOKS
        curthread = l;
#endif
}

/*
 * Scheduler mill.
 */
struct lwp *
sched_nextlwp(void)
{
        struct cpu_info *ci = curcpu();
        struct schedstate_percpu *spc;
        TAILQ_HEAD(, lwp) *q_head;
        struct lwp *l;

        /* Update the last run time on switch */
        l = curlwp;
        l->l_rticksum += (getticks() - l->l_rticks);

        /* Return to idle LWP if there is a migrating thread */
        spc = &ci->ci_schedstate;
        if (__predict_false(spc->spc_migrating != NULL))
                return NULL;

        /* Return to idle LWP if there is no runnable job */
        if (__predict_false(spc->spc_count == 0))
                return NULL;

        /* Take the highest priority thread */
        KASSERT(spc->spc_bitmap[spc->spc_maxpriority >> BITMAP_SHIFT]);
        q_head = sched_getrq(spc, spc->spc_maxpriority);
        l = TAILQ_FIRST(q_head);
        KASSERT(l != NULL);

        sched_oncpu(l);
        l->l_rticks = getticks();

        return l;
}

/*
 * sched_curcpu_runnable_p: return if curcpu() should exit the idle loop.
 */

bool
sched_curcpu_runnable_p(void)
{
        const struct cpu_info *ci;
        const struct schedstate_percpu *spc;
        bool rv;

        kpreempt_disable();
        ci = curcpu();
        spc = &ci->ci_schedstate;
        rv = (spc->spc_count != 0);
#ifndef __HAVE_FAST_SOFTINTS
        rv |= (ci->ci_data.cpu_softints != 0);
#endif
        kpreempt_enable();

        return rv;
}

/*
 * Sysctl nodes and initialization.
 */

SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
{
        const struct sysctlnode *node = NULL;

        sysctl_createv(clog, 0, NULL, &node,
                CTLFLAG_PERMANENT,
                CTLTYPE_NODE, "sched",
                SYSCTL_DESCR("Scheduler options"),
                NULL, 0, NULL, 0,
                CTL_KERN, CTL_CREATE, CTL_EOL);

        if (node == NULL)
                return;

        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "cacheht_time",
                SYSCTL_DESCR("Cache hotness time (in ms)"),
                NULL, 0, &cacheht_time, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "skim_interval",
                SYSCTL_DESCR("Rate limit for stealing from other CPUs (in ms)"),
                NULL, 0, &skim_interval, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "min_catch",
                SYSCTL_DESCR("Minimal count of threads for catching"),
                NULL, 0, &min_catch, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "timesoftints",
                SYSCTL_DESCR("Track CPU time for soft interrupts"),
                NULL, 0, &softint_timing, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "kpreempt_pri",
                SYSCTL_DESCR("Minimum priority to trigger kernel preemption"),
                NULL, 0, &sched_kpreempt_pri, 0,
                CTL_CREATE, CTL_EOL);
}

/*
 * Debugging.
 */

#ifdef DDB

void
sched_print_runqueue(void (*pr)(const char *, ...))
{
        struct cpu_info *ci, *tci;
        struct schedstate_percpu *spc;
        struct lwp *l;
        struct proc *p;
        CPU_INFO_ITERATOR cii;

        for (CPU_INFO_FOREACH(cii, ci)) {
                int i;

                spc = &ci->ci_schedstate;

                (*pr)("Run-queue (CPU = %u):\n", ci->ci_index);
                (*pr)(" pid.lid = %d.%d, r_count = %u, "
                    "maxpri = %d, mlwp = %p\n",
#ifdef MULTIPROCESSOR
                    ci->ci_curlwp->l_proc->p_pid, ci->ci_curlwp->l_lid,
#else
                    curlwp->l_proc->p_pid, curlwp->l_lid,
#endif
                    spc->spc_count, spc->spc_maxpriority,
                    spc->spc_migrating);
                i = (PRI_COUNT >> BITMAP_SHIFT) - 1;
                do {
                        uint32_t q;
                        q = spc->spc_bitmap[i];
                        (*pr)(" bitmap[%d] => [ %d (0x%x) ]\n", i, ffs(q), q);
                } while (i--);
        }

        (*pr)("   %5s %4s %4s %10s %3s %18s %4s %4s %s\n",
            "LID", "PRI", "EPRI", "FL", "ST", "LWP", "CPU", "TCI", "LRTICKS");

        PROCLIST_FOREACH(p, &allproc) {
                (*pr)(" /- %d (%s)\n", (int)p->p_pid, p->p_comm);
                LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                        ci = l->l_cpu;
                        tci = l->l_target_cpu;
                        (*pr)(" | %5d %4u %4u 0x%8.8x %3s %18p %4u %4d %u\n",
                            (int)l->l_lid, l->l_priority, lwp_eprio(l),
                            l->l_flag, l->l_stat == LSRUN ? "RQ" :
                            (l->l_stat == LSSLEEP ? "SQ" : "-"),
                            l, ci->ci_index, (tci ? tci->ci_index : -1),
                            (u_int)(getticks() - l->l_rticks));
                }
        }
}

#endif














































































































  192 




  191 
    1 




  192 







  203 


























  202 

  195 


  196 

  196 
  197 
















   17 







  122 










  121 


  119 
    1 




  118 






  102 
  101 



  100 





  115 




  114 
    9 



    8 


  106 




  111 





  109 
  111 






  109 









    1 



  106 


   27 


   64 



    1 
    1 


   69 

    7 
   73 

   76 
















   60 




   60 
    2 




   60 







   83 























   82 

   22 


   17 





   63 

   64 
   65 
















   79 







  101 










  101 


   98 
    2 




   97 






   22 
   22 



   22 





   96 




   95 
   28 



   27 


   70 




   94 





   93 
   92 






   93 









    4 



   88 


   28 


   26 





   77 



    4 
    4 


   83 

   25 
   84 

   89 





















  802 















  792 





  795 










  794 













   38 






































  793 




  791 

    3 

  792 
   39 




  792 

  607 








  601 




  539 



  356 




  139 
  216 
  216 



  786 



   20 
    5 

   15 
   20 




   27 
   12 

   18 
   27 



  740 




  431 
  149 

  128 




  694 

   36 
  692 
  701 














1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
/*        $NetBSD: sys_generic.c,v 1.134 2022/07/10 23:12:12 riastradh Exp $        */

/*-
 * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)sys_generic.c        8.9 (Berkeley) 2/14/95
 */

/*
 * System calls relating to files.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.134 2022/07/10 23:12:12 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/filedesc.h>
#include <sys/ioctl.h>
#include <sys/file.h>
#include <sys/proc.h>
#include <sys/socketvar.h>
#include <sys/signalvar.h>
#include <sys/uio.h>
#include <sys/kernel.h>
#include <sys/stat.h>
#include <sys/kmem.h>
#include <sys/poll.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
#include <sys/ktrace.h>
#include <sys/atomic.h>
#include <sys/disklabel.h>

/*
 * Read system call.
 */
/* ARGSUSED */
int
sys_read(struct lwp *l, const struct sys_read_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)                fd;
                syscallarg(void *)        buf;
                syscallarg(size_t)        nbyte;
        } */
        file_t *fp;
        int fd;

        fd = SCARG(uap, fd);

        if ((fp = fd_getfile(fd)) == NULL)
                return (EBADF);

        if ((fp->f_flag & FREAD) == 0) {
                fd_putfile(fd);
                return (EBADF);
        }

        /* dofileread() will unuse the descriptor for us */
        return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
            &fp->f_offset, FOF_UPDATE_OFFSET, retval));
}

int
dofileread(int fd, struct file *fp, void *buf, size_t nbyte,
        off_t *offset, int flags, register_t *retval)
{
        struct iovec aiov;
        struct uio auio;
        size_t cnt;
        int error;
        lwp_t *l;

        l = curlwp;

        aiov.iov_base = (void *)buf;
        aiov.iov_len = nbyte;
        auio.uio_iov = &aiov;
        auio.uio_iovcnt = 1;
        auio.uio_resid = nbyte;
        auio.uio_rw = UIO_READ;
        auio.uio_vmspace = l->l_proc->p_vmspace;

        /*
         * Reads return ssize_t because -1 is returned on error.  Therefore
         * we must restrict the length to SSIZE_MAX to avoid garbage return
         * values.
         */
        if (auio.uio_resid > SSIZE_MAX) {
                error = EINVAL;
                goto out;
        }

        cnt = auio.uio_resid;
        error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
        if (error)
                if (auio.uio_resid != cnt && (error == ERESTART ||
                    error == EINTR || error == EWOULDBLOCK))
                        error = 0;
        cnt -= auio.uio_resid;
        ktrgenio(fd, UIO_READ, buf, cnt, error);
        *retval = cnt;
 out:
        fd_putfile(fd);
        return (error);
}

/*
 * Scatter read system call.
 */
int
sys_readv(struct lwp *l, const struct sys_readv_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)                                fd;
                syscallarg(const struct iovec *)        iovp;
                syscallarg(int)                                iovcnt;
        } */

        return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp),
            SCARG(uap, iovcnt), NULL, FOF_UPDATE_OFFSET, retval);
}

int
do_filereadv(int fd, const struct iovec *iovp, int iovcnt,
    off_t *offset, int flags, register_t *retval)
{
        struct uio        auio;
        struct iovec        *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
        int                i, error;
        size_t                cnt;
        u_int                iovlen;
        struct file        *fp;
        struct iovec        *ktriov = NULL;

        if (iovcnt == 0)
                return EINVAL;

        if ((fp = fd_getfile(fd)) == NULL)
                return EBADF;

        if ((fp->f_flag & FREAD) == 0) {
                fd_putfile(fd);
                return EBADF;
        }

        if (offset == NULL)
                offset = &fp->f_offset;
        else {
                /*
                 * Caller must not specify &fp->f_offset -- we can't
                 * safely dereference it for the call to fo_seek
                 * without holding some underlying object lock.
                 */
                KASSERT(offset != &fp->f_offset);
                if (fp->f_ops->fo_seek == NULL) {
                        error = ESPIPE;
                        goto out;
                }
                error = (*fp->f_ops->fo_seek)(fp, *offset, SEEK_SET, NULL,
                    0);
                if (error != 0)
                        goto out;
        }

        iovlen = iovcnt * sizeof(struct iovec);
        if (flags & FOF_IOV_SYSSPACE)
                iov = __UNCONST(iovp);
        else {
                iov = aiov;
                if ((u_int)iovcnt > UIO_SMALLIOV) {
                        if ((u_int)iovcnt > IOV_MAX) {
                                error = EINVAL;
                                goto out;
                        }
                        iov = kmem_alloc(iovlen, KM_SLEEP);
                        needfree = iov;
                }
                error = copyin(iovp, iov, iovlen);
                if (error)
                        goto done;
        }

        auio.uio_iov = iov;
        auio.uio_iovcnt = iovcnt;
        auio.uio_rw = UIO_READ;
        auio.uio_vmspace = curproc->p_vmspace;

        auio.uio_resid = 0;
        for (i = 0; i < iovcnt; i++, iov++) {
                auio.uio_resid += iov->iov_len;
                /*
                 * Reads return ssize_t because -1 is returned on error.
                 * Therefore we must restrict the length to SSIZE_MAX to
                 * avoid garbage return values.
                 */
                if (iov->iov_len > SSIZE_MAX ||
                    auio.uio_resid > SSIZE_MAX - iov->iov_len) {
                        error = EINVAL;
                        goto done;
                }
        }

        /*
         * if tracing, save a copy of iovec
         */
        if (ktrpoint(KTR_GENIO))  {
                ktriov = kmem_alloc(iovlen, KM_SLEEP);
                memcpy(ktriov, auio.uio_iov, iovlen);
        }

        cnt = auio.uio_resid;
        error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
        if (error)
                if (auio.uio_resid != cnt && (error == ERESTART ||
                    error == EINTR || error == EWOULDBLOCK))
                        error = 0;
        cnt -= auio.uio_resid;
        *retval = cnt;

        if (ktriov != NULL) {
                ktrgeniov(fd, UIO_READ, ktriov, cnt, error);
                kmem_free(ktriov, iovlen);
        }

 done:
        if (needfree)
                kmem_free(needfree, iovlen);
 out:
        fd_putfile(fd);
        return (error);
}

/*
 * Write system call
 */
int
sys_write(struct lwp *l, const struct sys_write_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)                        fd;
                syscallarg(const void *)        buf;
                syscallarg(size_t)                nbyte;
        } */
        file_t *fp;
        int fd;

        fd = SCARG(uap, fd);

        if ((fp = fd_getfile(fd)) == NULL)
                return (EBADF);

        if ((fp->f_flag & FWRITE) == 0) {
                fd_putfile(fd);
                return (EBADF);
        }

        /* dofilewrite() will unuse the descriptor for us */
        return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
            &fp->f_offset, FOF_UPDATE_OFFSET, retval));
}

int
dofilewrite(int fd, struct file *fp, const void *buf,
        size_t nbyte, off_t *offset, int flags, register_t *retval)
{
        struct iovec aiov;
        struct uio auio;
        size_t cnt;
        int error;

        aiov.iov_base = __UNCONST(buf);                /* XXXUNCONST kills const */
        aiov.iov_len = nbyte;
        auio.uio_iov = &aiov;
        auio.uio_iovcnt = 1;
        auio.uio_resid = nbyte;
        auio.uio_rw = UIO_WRITE;
        auio.uio_vmspace = curproc->p_vmspace;

        /*
         * Writes return ssize_t because -1 is returned on error.  Therefore
         * we must restrict the length to SSIZE_MAX to avoid garbage return
         * values.
         */
        if (auio.uio_resid > SSIZE_MAX) {
                error = EINVAL;
                goto out;
        }

        cnt = auio.uio_resid;
        error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
        if (error) {
                if (auio.uio_resid != cnt && (error == ERESTART ||
                    error == EINTR || error == EWOULDBLOCK))
                        error = 0;
                if (error == EPIPE && !(fp->f_flag & FNOSIGPIPE)) {
                        mutex_enter(&proc_lock);
                        psignal(curproc, SIGPIPE);
                        mutex_exit(&proc_lock);
                }
        }
        cnt -= auio.uio_resid;
        ktrgenio(fd, UIO_WRITE, buf, cnt, error);
        *retval = cnt;
 out:
        fd_putfile(fd);
        return (error);
}

/*
 * Gather write system call
 */
int
sys_writev(struct lwp *l, const struct sys_writev_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)                                fd;
                syscallarg(const struct iovec *)        iovp;
                syscallarg(int)                                iovcnt;
        } */

        return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp),
            SCARG(uap, iovcnt), NULL, FOF_UPDATE_OFFSET, retval);
}

int
do_filewritev(int fd, const struct iovec *iovp, int iovcnt,
    off_t *offset, int flags, register_t *retval)
{
        struct uio        auio;
        struct iovec        *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
        int                i, error;
        size_t                cnt;
        u_int                iovlen;
        struct file        *fp;
        struct iovec        *ktriov = NULL;

        if (iovcnt == 0)
                return EINVAL;

        if ((fp = fd_getfile(fd)) == NULL)
                return EBADF;

        if ((fp->f_flag & FWRITE) == 0) {
                fd_putfile(fd);
                return EBADF;
        }

        if (offset == NULL)
                offset = &fp->f_offset;
        else {
                /*
                 * Caller must not specify &fp->f_offset -- we can't
                 * safely dereference it for the call to fo_seek
                 * without holding some underlying object lock.
                 */
                KASSERT(offset != &fp->f_offset);
                if (fp->f_ops->fo_seek == NULL) {
                        error = ESPIPE;
                        goto out;
                }
                error = (*fp->f_ops->fo_seek)(fp, *offset, SEEK_SET, NULL,
                    0);
                if (error != 0)
                        goto out;
        }

        iovlen = iovcnt * sizeof(struct iovec);
        if (flags & FOF_IOV_SYSSPACE)
                iov = __UNCONST(iovp);
        else {
                iov = aiov;
                if ((u_int)iovcnt > UIO_SMALLIOV) {
                        if ((u_int)iovcnt > IOV_MAX) {
                                error = EINVAL;
                                goto out;
                        }
                        iov = kmem_alloc(iovlen, KM_SLEEP);
                        needfree = iov;
                }
                error = copyin(iovp, iov, iovlen);
                if (error)
                        goto done;
        }

        auio.uio_iov = iov;
        auio.uio_iovcnt = iovcnt;
        auio.uio_rw = UIO_WRITE;
        auio.uio_vmspace = curproc->p_vmspace;

        auio.uio_resid = 0;
        for (i = 0; i < iovcnt; i++, iov++) {
                auio.uio_resid += iov->iov_len;
                /*
                 * Writes return ssize_t because -1 is returned on error.
                 * Therefore we must restrict the length to SSIZE_MAX to
                 * avoid garbage return values.
                 */
                if (iov->iov_len > SSIZE_MAX ||
                    auio.uio_resid > SSIZE_MAX - iov->iov_len) {
                        error = EINVAL;
                        goto done;
                }
        }

        /*
         * if tracing, save a copy of iovec
         */
        if (ktrpoint(KTR_GENIO))  {
                ktriov = kmem_alloc(iovlen, KM_SLEEP);
                memcpy(ktriov, auio.uio_iov, iovlen);
        }

        cnt = auio.uio_resid;
        error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
        if (error) {
                if (auio.uio_resid != cnt && (error == ERESTART ||
                    error == EINTR || error == EWOULDBLOCK))
                        error = 0;
                if (error == EPIPE && !(fp->f_flag & FNOSIGPIPE)) {
                        mutex_enter(&proc_lock);
                        psignal(curproc, SIGPIPE);
                        mutex_exit(&proc_lock);
                }
        }
        cnt -= auio.uio_resid;
        *retval = cnt;

        if (ktriov != NULL) {
                ktrgeniov(fd, UIO_WRITE, ktriov, cnt, error);
                kmem_free(ktriov, iovlen);
        }

 done:
        if (needfree)
                kmem_free(needfree, iovlen);
 out:
        fd_putfile(fd);
        return (error);
}

/*
 * Ioctl system call
 */
/* ARGSUSED */
int
sys_ioctl(struct lwp *l, const struct sys_ioctl_args *uap, register_t *retval)
{
        /* {
                syscallarg(int)                fd;
                syscallarg(u_long)        com;
                syscallarg(void *)        data;
        } */
        struct file        *fp;
        proc_t                *p;
        u_long                com;
        int                error;
        size_t                size, alloc_size;
        void                 *data, *memp;
#define        STK_PARAMS        128
        u_long                stkbuf[STK_PARAMS/sizeof(u_long)];
#if  __TMPBIGMAXPARTITIONS > MAXPARTITIONS
        size_t                zero_last = 0;
#define        zero_size(SZ)        ((SZ)+zero_last)
#else
#define        zero_size(SZ)        (SZ)
#endif

        memp = NULL;
        alloc_size = 0;
        error = 0;
        p = l->l_proc;

        if ((fp = fd_getfile(SCARG(uap, fd))) == NULL)
                return (EBADF);

        if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
                error = EBADF;
                com = 0;
                goto out;
        }

        switch (com = SCARG(uap, com)) {
        case FIONCLEX:
        case FIOCLEX:
                fd_set_exclose(l, SCARG(uap, fd), com == FIOCLEX);
                goto out;
        }

        /*
         * Interpret high order word to find amount of data to be
         * copied to/from the user's address space.
         */
        size = IOCPARM_LEN(com);
        alloc_size = size;

        /*
         * The disklabel is now padded to a multiple of 8 bytes however the old
         * disklabel on 32bit platforms wasn't.  This leaves a difference in
         * size of 4 bytes between the two but are otherwise identical.
         * To deal with this, we allocate enough space for the new disklabel
         * but only copyin/out the smaller amount.
         */
        if (IOCGROUP(com) == 'd') {
#if  __TMPBIGMAXPARTITIONS > MAXPARTITIONS
                u_long ocom = com;
#endif
                u_long ncom = com ^ (DIOCGDINFO ^ DIOCGDINFO32);

#if  __TMPBIGMAXPARTITIONS > MAXPARTITIONS
        /*
         * Userland might use struct disklabel that is bigger than the
         * the kernel version (historic accident) - alloc userland
         * size and zero unused part on copyout.
         */
#define        DISKLABELLENDIFF        (sizeof(struct partition)        \
                                       *(__TMPBIGMAXPARTITIONS-MAXPARTITIONS))
#define        IOCFIXUP(NIOC)        ((NIOC&~(IOCPARM_MASK<<IOCPARM_SHIFT))        | \
                           (IOCPARM_LEN(NIOC)-DISKLABELLENDIFF)<<IOCPARM_SHIFT)

                switch (IOCFIXUP(ocom)) {
                case DIOCGDINFO:
                case DIOCWDINFO:
                case DIOCSDINFO:
                case DIOCGDEFLABEL:
                        com = ncom = IOCFIXUP(ocom);
                        zero_last = DISKLABELLENDIFF;
                        size -= DISKLABELLENDIFF;
                        goto done;
                }
#endif

                switch (ncom) {
                case DIOCGDINFO:
                case DIOCWDINFO:
                case DIOCSDINFO:
                case DIOCGDEFLABEL:
                        com = ncom;
                        if (IOCPARM_LEN(DIOCGDINFO32) < IOCPARM_LEN(DIOCGDINFO))
                                alloc_size = IOCPARM_LEN(DIOCGDINFO);
                        break;
                }
#if  __TMPBIGMAXPARTITIONS > MAXPARTITIONS
                done: ;
#endif
        }
        if (size > IOCPARM_MAX) {
                error = ENOTTY;
                goto out;
        }
        memp = NULL;
        if ((com >> IOCPARM_SHIFT) == 0)  {
                /* UNIX-style ioctl. */
                data = SCARG(uap, data);
        } else {
                if (alloc_size > sizeof(stkbuf)) {
                        memp = kmem_alloc(alloc_size, KM_SLEEP);
                        data = memp;
                } else {
                        data = (void *)stkbuf;
                }
                if (com&IOC_IN) {
                        if (size) {
                                error = copyin(SCARG(uap, data), data, size);
                                if (error) {
                                        goto out;
                                }
                                /*
                                 * The data between size and alloc_size has
                                 * not been overwritten.  It shouldn't matter
                                 * but let's clear that anyway.
                                 */
                                if (__predict_false(size < alloc_size)) {
                                        memset((char *)data+size, 0,
                                            alloc_size - size);
                                }
                                ktrgenio(SCARG(uap, fd), UIO_WRITE,
                                    SCARG(uap, data), size, 0);
                        } else {
                                *(void **)data = SCARG(uap, data);
                        }
                } else if ((com&IOC_OUT) && size) {
                        /*
                         * Zero the buffer so the user always
                         * gets back something deterministic.
                         */
                        memset(data, 0, zero_size(size));
                } else if (com&IOC_VOID) {
                        *(void **)data = SCARG(uap, data);
                }
        }

        switch (com) {

        case FIONBIO:
                /* XXX Code block is not atomic */
                if (*(int *)data != 0)
                        atomic_or_uint(&fp->f_flag, FNONBLOCK);
                else
                        atomic_and_uint(&fp->f_flag, ~FNONBLOCK);
                error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data);
                break;

        case FIOASYNC:
                /* XXX Code block is not atomic */
                if (*(int *)data != 0)
                        atomic_or_uint(&fp->f_flag, FASYNC);
                else
                        atomic_and_uint(&fp->f_flag, ~FASYNC);
                error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data);
                break;

        default:
                error = (*fp->f_ops->fo_ioctl)(fp, com, data);
                /*
                 * Copy any data to user, size was
                 * already set and checked above.
                 */
                if (error == 0 && (com&IOC_OUT) && size) {
                        error = copyout(data, SCARG(uap, data),
                            zero_size(size));
                        ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, data),
                            size, error);
                }
                break;
        }
 out:
        if (memp)
                kmem_free(memp, alloc_size);
        fd_putfile(SCARG(uap, fd));
        switch (error) {
        case -1:
                printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
                    "pid=%d comm=%s\n",
                    (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
                    (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
                    p->p_pid, p->p_comm);
                /* FALLTHROUGH */
        case EPASSTHROUGH:
                error = ENOTTY;
                /* FALLTHROUGH */
        default:
                return (error);
        }
}



















































































































































































































































































































































































    3 


    3 
















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
/*        $NetBSD: umodeswitch.c,v 1.5 2020/02/15 02:14:02 manu Exp $        */

/*-
 * Copyright (c) 2009, 2017 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */


#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: umodeswitch.c,v 1.5 2020/02/15 02:14:02 manu Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/tty.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdi_util.h>

#include "usbdevs.h"

/*
 * This device driver handles devices that have two personalities.
 * The first uses the 'usbdevif'
 * interface attribute so that a match will claim the entire USB device
 * for itself. This is used for when a device needs to be mode-switched
 * and ensures any other interfaces present cannot be claimed by other
 * drivers while the mode-switch is in progress.
 */
static int umodeswitch_match(device_t, cfdata_t, void *);
static void umodeswitch_attach(device_t, device_t, void *);
static int umodeswitch_detach(device_t, int);

CFATTACH_DECL2_NEW(umodeswitch, 0, umodeswitch_match,
    umodeswitch_attach, umodeswitch_detach, NULL, NULL, NULL);

static int
send_bulkmsg(struct usbd_device *dev, void *cmd, size_t cmdlen)
{
        struct usbd_interface *iface;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        struct usbd_pipe *pipe;
        struct usbd_xfer *xfer;
        int err, i;

        /* Move the device into the configured state. */
        err = usbd_set_config_index(dev, 0, 0);
        if (err) {
                aprint_error("%s: failed to set config index\n", __func__);
                return UMATCH_NONE;
        }

        err = usbd_device2interface_handle(dev, 0, &iface);
        if (err != 0) {
                aprint_error("%s: failed to get interface\n", __func__);
                return UMATCH_NONE;
        }

        id = usbd_get_interface_descriptor(iface);
        ed = NULL;
        for (i = 0 ; i < id->bNumEndpoints ; i++) {
                ed = usbd_interface2endpoint_descriptor(iface, i);
                if (ed == NULL)
                        continue;
                if (UE_GET_DIR(ed->bEndpointAddress) != UE_DIR_OUT)
                        continue;
                if ((ed->bmAttributes & UE_XFERTYPE) == UE_BULK)
                        break;
        }

        if (i == id->bNumEndpoints)
                return UMATCH_NONE;

        err = usbd_open_pipe(iface, ed->bEndpointAddress,
            USBD_EXCLUSIVE_USE, &pipe);
        if (err != 0) {
                aprint_error("%s: failed to open bulk transfer pipe %d\n",
                    __func__, ed->bEndpointAddress);
                return UMATCH_NONE;
        }

        int error = usbd_create_xfer(pipe, cmdlen, 0, 0, &xfer);
        if (!error) {

                usbd_setup_xfer(xfer, NULL, cmd, cmdlen,
                    USBD_SYNCHRONOUS, USBD_DEFAULT_TIMEOUT, NULL);

                err = usbd_transfer(xfer);

#if 0 /* XXXpooka: at least my huawei "fails" this always, but still detaches */
                if (err)
                        aprint_error("%s: transfer failed\n", __func__);
#else
                err = 0;
#endif
                usbd_destroy_xfer(xfer);
        } else {
                aprint_error("%s: failed to allocate xfer\n", __func__);
                err = USBD_NOMEM;
        }

        usbd_abort_pipe(pipe);
        usbd_close_pipe(pipe);

        return err == USBD_NORMAL_COMPLETION ? UMATCH_HIGHEST : UMATCH_NONE;
}

/* Byte 0..3: Command Block Wrapper (CBW) signature */
static void
set_cbw(unsigned char *cmd)
{
        cmd[0] = 0x55;
        cmd[1] = 0x53;
        cmd[2] = 0x42;
        cmd[3] = 0x43;
}

static int
u3g_bulk_scsi_eject(struct usbd_device *dev)
{
        unsigned char cmd[31];

        memset(cmd, 0, sizeof(cmd));
        /* Byte 0..3: Command Block Wrapper (CBW) signature */
        set_cbw(cmd);
        /* 4..7: CBW Tag, has to unique, but only a single transfer used. */
        cmd[4] = 0x01;
        /* 8..11: CBW Transfer Length, no data here */
        /* 12: CBW Flag: output, so 0 */
        /* 13: CBW Lun: 0 */
        /* 14: CBW Length */
        cmd[14] = 0x06;

        /* Rest is the SCSI payload */

        /* 0: SCSI START/STOP opcode */
        cmd[15] = 0x1b;
        /* 1..3 unused */
        /* 4 Load/Eject command */
        cmd[19] = 0x02;
        /* 5: unused */

        return send_bulkmsg(dev, cmd, sizeof(cmd));
}

static int
u3g_bulk_ata_eject(struct usbd_device *dev)
{
        unsigned char cmd[31];

        memset(cmd, 0, sizeof(cmd));
        /* Byte 0..3: Command Block Wrapper (CBW) signature */
        set_cbw(cmd);
        /* 4..7: CBW Tag, has to unique, but only a single transfer used. */
        cmd[4] = 0x01;
        /* 8..11: CBW Transfer Length, no data here */
        /* 12: CBW Flag: output, so 0 */
        /* 13: CBW Lun: 0 */
        /* 14: CBW Length */
        cmd[14] = 0x06;

        /* Rest is the SCSI payload */

        /* 0: ATA pass-through */
        cmd[15] = 0x85;
        /* 1..3 unused */
        /* 4 XXX What is this command? */
        cmd[19] = 0x24;
        /* 5: unused */

        return send_bulkmsg(dev, cmd, sizeof(cmd));
}

static int
u3g_huawei_reinit(struct usbd_device *dev)
{
        /*
         * The Huawei device presents itself as a umass device with Windows
         * drivers on it. After installation of the driver, it reinits into a
         * 3G serial device.
         */
        usb_device_request_t req;
        usb_config_descriptor_t *cdesc;

        /* Get the config descriptor */
        cdesc = usbd_get_config_descriptor(dev);
        if (cdesc == NULL) {
                usb_device_descriptor_t dd;

                if (usbd_get_device_desc(dev, &dd) != 0)
                        return UMATCH_NONE;

                if (dd.bNumConfigurations != 1)
                        return UMATCH_NONE;

                if (usbd_set_config_index(dev, 0, 1) != 0)
                        return UMATCH_NONE;

                cdesc = usbd_get_config_descriptor(dev);

                if (cdesc == NULL)
                        return UMATCH_NONE;
        }

        /*
         * One iface means umass mode, more than 1 (4 usually) means 3G mode.
         *
         * XXX: We should check the first interface's device class just to be
         * sure. If it's a mass storage device, then we can be fairly certain
         * it needs a mode-switch.
         */
        if (cdesc->bNumInterface > 1)
                return UMATCH_NONE;

        req.bmRequestType = UT_WRITE_DEVICE;
        req.bRequest = UR_SET_FEATURE;
        USETW(req.wValue, UF_DEVICE_REMOTE_WAKEUP);
        USETW(req.wIndex, UHF_PORT_SUSPEND);
        USETW(req.wLength, 0);

        (void) usbd_do_request(dev, &req, 0);

        return UMATCH_HIGHEST; /* Prevent umass from attaching */
}

static int
u3g_huawei_k3765_reinit(struct usbd_device *dev)
{
        unsigned char cmd[31];

        /* magic string adapted from some webpage */
        memset(cmd, 0, sizeof(cmd));
        /* Byte 0..3: Command Block Wrapper (CBW) signature */
        set_cbw(cmd);

        cmd[15]= 0x11;
        cmd[16]= 0x06;

        return send_bulkmsg(dev, cmd, sizeof(cmd));
}
static int
u3g_huawei_e171_reinit(struct usbd_device *dev)
{
        unsigned char cmd[31];

        /* magic string adapted from some webpage */
        memset(cmd, 0, sizeof(cmd));
        /* Byte 0..3: Command Block Wrapper (CBW) signature */
        set_cbw(cmd);

        cmd[15]= 0x11;
        cmd[16]= 0x06;
        cmd[17]= 0x20;
        cmd[20]= 0x01;

        return send_bulkmsg(dev, cmd, sizeof(cmd));
}

static int
u3g_huawei_e353_reinit(struct usbd_device *dev)
{
        unsigned char cmd[31];

        /* magic string adapted from some webpage */
        memset(cmd, 0, sizeof(cmd));
        /* Byte 0..3: Command Block Wrapper (CBW) signature */
        set_cbw(cmd);

        cmd[4] = 0x7f;
        cmd[9] = 0x02;
        cmd[12] = 0x80;
        cmd[14] = 0x0a;
        cmd[15] = 0x11;
        cmd[16] = 0x06;
        cmd[17] = 0x20;
        cmd[23] = 0x01;

        return send_bulkmsg(dev, cmd, sizeof(cmd));
}

static int
u3g_sierra_reinit(struct usbd_device *dev)
{
        /* Some Sierra devices presents themselves as a umass device with
         * Windows drivers on it. After installation of the driver, it
         * reinits into a * 3G serial device.
         */
        usb_device_request_t req;

        req.bmRequestType = UT_VENDOR;
        req.bRequest = UR_SET_INTERFACE;
        USETW(req.wValue, UF_DEVICE_REMOTE_WAKEUP);
        USETW(req.wIndex, UHF_PORT_CONNECTION);
        USETW(req.wLength, 0);

        (void) usbd_do_request(dev, &req, 0);

        return UMATCH_HIGHEST; /* Match to prevent umass from attaching */
}

static int
u3g_4gsystems_reinit(struct usbd_device *dev)
{
        /* magic string adapted from usb_modeswitch database */
        unsigned char cmd[31];

        memset(cmd, 0, sizeof(cmd));
        /* Byte 0..3: Command Block Wrapper (CBW) signature */
        set_cbw(cmd);

        cmd[4] = 0x12;
        cmd[5] = 0x34;
        cmd[6] = 0x56;
        cmd[7] = 0x78;
        cmd[8] = 0x80;
        cmd[12] = 0x80;
        cmd[14] = 0x06;
        cmd[15] = 0x06;
        cmd[16] = 0xf5;
        cmd[17] = 0x04;
        cmd[18] = 0x02;
        cmd[19] = 0x52;
        cmd[20] = 0x70;

        return send_bulkmsg(dev, cmd, sizeof(cmd));
}

/*
 * First personality:
 *
 * Claim the entire device if a mode-switch is required.
 */

static int
umodeswitch_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        /*
         * Huawei changes product when it is configured as a modem.
         */
        switch (uaa->uaa_vendor) {
        case USB_VENDOR_HUAWEI:
                if (uaa->uaa_product == USB_PRODUCT_HUAWEI_K3765)
                        return UMATCH_NONE;

                switch (uaa->uaa_product) {
                case USB_PRODUCT_HUAWEI_E1750INIT:
                case USB_PRODUCT_HUAWEI_K3765INIT:
                        return u3g_huawei_k3765_reinit(uaa->uaa_device);
                        break;
                case USB_PRODUCT_HUAWEI_E171INIT:
                        return u3g_huawei_e171_reinit(uaa->uaa_device);
                        break;
                case USB_PRODUCT_HUAWEI_E353INIT:
                        return u3g_huawei_e353_reinit(uaa->uaa_device);
                        break;
                default:
                        return u3g_huawei_reinit(uaa->uaa_device);
                        break;
                }
                break;

        case USB_VENDOR_NOVATEL2:
                switch (uaa->uaa_product){
                case USB_PRODUCT_NOVATEL2_MC950D_DRIVER:
                case USB_PRODUCT_NOVATEL2_U760_DRIVER:
                        return u3g_bulk_scsi_eject(uaa->uaa_device);
                        break;
                default:
                        break;
                }
                break;

        case USB_VENDOR_LG:
                if (uaa->uaa_product == USB_PRODUCT_LG_NTT_DOCOMO_L02C_STORAGE)
                        return u3g_bulk_scsi_eject(uaa->uaa_device);
                break;

        case USB_VENDOR_RALINK:
                switch (uaa->uaa_product){
                case USB_PRODUCT_RALINK_RT73:
                        return u3g_bulk_scsi_eject(uaa->uaa_device);
                        break;
                }
                break;

        case USB_VENDOR_SIERRA:
                if (uaa->uaa_product == USB_PRODUCT_SIERRA_INSTALLER)
                        return u3g_sierra_reinit(uaa->uaa_device);
                break;

        case USB_VENDOR_ZTE:
                switch (uaa->uaa_product){
                case USB_PRODUCT_ZTE_INSTALLER:
                case USB_PRODUCT_ZTE_MF820D_INSTALLER:
                        (void)u3g_bulk_ata_eject(uaa->uaa_device);
                        (void)u3g_bulk_scsi_eject(uaa->uaa_device);
                        return UMATCH_HIGHEST;
                default:
                        break;
                }
                break;

        case USB_VENDOR_LONGCHEER:
                if (uaa->uaa_product == USB_PRODUCT_LONGCHEER_XSSTICK_P14_INSTALLER)
                        return u3g_4gsystems_reinit(uaa->uaa_device);
                break;

        case USB_VENDOR_DLINK:
                switch (uaa->uaa_product) {
                case USB_PRODUCT_DLINK_DWM157E_CD:
                case USB_PRODUCT_DLINK_DWM157_CD:
                        (void)u3g_bulk_ata_eject(uaa->uaa_device);
                        (void)u3g_bulk_scsi_eject(uaa->uaa_device);
                        return UMATCH_HIGHEST;
                default:
                        break;
                }

        default:
                break;
        }

        return UMATCH_NONE;
}

static void
umodeswitch_attach(device_t parent, device_t self, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        aprint_naive("\n");
        aprint_normal(": Switching off umass mode\n");

        if (uaa->uaa_vendor == USB_VENDOR_NOVATEL2) {
                switch (uaa->uaa_product) {
                    case USB_PRODUCT_NOVATEL2_MC950D_DRIVER:
                    case USB_PRODUCT_NOVATEL2_U760_DRIVER:
                        /* About to disappear... */
                        return;
                        break;
                default:
                        break;
                }
        }

        /* Move the device into the configured state. */
        (void) usbd_set_config_index(uaa->uaa_device, 0, 1);
}

static int
umodeswitch_detach(device_t self, int flags)
{

        return 0;
}



























































































    2 
    2 



    2 





    2 



    2 




    2 










    3 
    3 













































    1 


    1 































    1 
























    1 



















































    1 
    1 

    1 

















































































    6 







    6 
    6 

    5 

    3 



    2 































































































































    8 










































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
/*        $NetBSD: rfcomm_socket.c,v 1.38 2019/01/28 12:53:01 martin Exp $        */

/*-
 * Copyright (c) 2006 Itronix Inc.
 * All rights reserved.
 *
 * Written by Iain Hibbert for Itronix Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of Itronix Inc. may not be used to endorse
 *    or promote products derived from this software without specific
 *    prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rfcomm_socket.c,v 1.38 2019/01/28 12:53:01 martin Exp $");

/* load symbolic names */
#ifdef BLUETOOTH_DEBUG
#define PRUREQUESTS
#define PRCOREQUESTS
#endif

#include <sys/param.h>
#include <sys/domain.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/systm.h>

#include <netbt/bluetooth.h>
#include <netbt/rfcomm.h>

/****************************************************************************
 *
 *        RFCOMM SOCK_STREAM Sockets - serial line emulation
 *
 */

static void rfcomm_connecting(void *);
static void rfcomm_connected(void *);
static void rfcomm_disconnected(void *, int);
static void *rfcomm_newconn(void *, struct sockaddr_bt *, struct sockaddr_bt *);
static void rfcomm_complete(void *, int);
static void rfcomm_linkmode(void *, int);
static void rfcomm_input(void *, struct mbuf *);

static const struct btproto rfcomm_proto = {
        rfcomm_connecting,
        rfcomm_connected,
        rfcomm_disconnected,
        rfcomm_newconn,
        rfcomm_complete,
        rfcomm_linkmode,
        rfcomm_input,
};

/* sysctl variables */
int rfcomm_sendspace = 4096;
int rfcomm_recvspace = 4096;

static int
rfcomm_attach(struct socket *so, int proto)
{
        int error;

        KASSERT(so->so_pcb == NULL);

        if (so->so_lock == NULL) {
                mutex_obj_hold(bt_lock);
                so->so_lock = bt_lock;
                solock(so);
        }
        KASSERT(solocked(so));

        /*
         * Since we have nothing to add, we attach the DLC
         * structure directly to our PCB pointer.
         */
        error = soreserve(so, rfcomm_sendspace, rfcomm_recvspace);
        if (error)
                return error;

        error = rfcomm_attach_pcb((struct rfcomm_dlc **)&so->so_pcb,
                                &rfcomm_proto, so);
        if (error)
                return error;

        error = rfcomm_rcvd_pcb(so->so_pcb, sbspace(&so->so_rcv));
        if (error) {
                rfcomm_detach_pcb((struct rfcomm_dlc **)&so->so_pcb);
                return error;
        }
        return 0;
}

static void
rfcomm_detach(struct socket *so)
{
        KASSERT(so->so_pcb != NULL);
        rfcomm_detach_pcb((struct rfcomm_dlc **)&so->so_pcb);
        KASSERT(so->so_pcb == NULL);
}

static int
rfcomm_accept(struct socket *so, struct sockaddr *nam)
{
        struct rfcomm_dlc *pcb = so->so_pcb;

        KASSERT(solocked(so));
        KASSERT(nam != NULL);

        if (pcb == NULL)
                return EINVAL;

        return rfcomm_peeraddr_pcb(pcb, (struct sockaddr_bt *)nam);
}

static int
rfcomm_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct rfcomm_dlc *pcb = so->so_pcb;
        struct sockaddr_bt *sa = (struct sockaddr_bt *)nam;

        KASSERT(solocked(so));
        KASSERT(nam != NULL);

        if (pcb == NULL)
                return EINVAL;

        if (sa->bt_len != sizeof(struct sockaddr_bt))
                return EINVAL;

        if (sa->bt_family != AF_BLUETOOTH)
                return EAFNOSUPPORT;

        return rfcomm_bind_pcb(pcb, sa);
}

static int
rfcomm_listen(struct socket *so, struct lwp *l)
{
        struct rfcomm_dlc *pcb = so->so_pcb;

        KASSERT(solocked(so));

        if (pcb == NULL)
                return EINVAL;

        return rfcomm_listen_pcb(pcb);
}

static int
rfcomm_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct rfcomm_dlc *pcb = so->so_pcb;
        struct sockaddr_bt *sa = (struct sockaddr_bt *)nam;

        KASSERT(solocked(so));
        KASSERT(nam != NULL);

        if (pcb == NULL)
                return EINVAL;

        if (sa->bt_len != sizeof(struct sockaddr_bt))
                return EINVAL;

        if (sa->bt_family != AF_BLUETOOTH)
                return EAFNOSUPPORT;

        soisconnecting(so);
        return rfcomm_connect_pcb(pcb, sa);
}

static int
rfcomm_connect2(struct socket *so, struct socket *so2)
{
        struct rfcomm_dlc *pcb = so->so_pcb;

        KASSERT(solocked(so));

        if (pcb == NULL)
                return EINVAL;

        return EOPNOTSUPP;
}

static int
rfcomm_disconnect(struct socket *so)
{
        struct rfcomm_dlc *pcb = so->so_pcb;

        KASSERT(solocked(so));

        if (pcb == NULL)
                return EINVAL;

        soisdisconnecting(so);
        return rfcomm_disconnect_pcb(pcb, so->so_linger);
}

static int
rfcomm_shutdown(struct socket *so)
{
        KASSERT(solocked(so));

        socantsendmore(so);
        return 0;
}

static int
rfcomm_abort(struct socket *so)
{
        struct rfcomm_dlc *pcb = so->so_pcb;

        KASSERT(solocked(so));

        if (pcb == NULL)
                return EINVAL;

        rfcomm_disconnect_pcb(pcb, 0);
        soisdisconnected(so);
        rfcomm_detach(so);
        return 0;
}

static int
rfcomm_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
{
        return EPASSTHROUGH;
}

static int
rfcomm_stat(struct socket *so, struct stat *ub)
{
        KASSERT(solocked(so));

        return 0;
}

static int
rfcomm_peeraddr(struct socket *so, struct sockaddr *nam)
{
        struct rfcomm_dlc *pcb = so->so_pcb;

        KASSERT(solocked(so));
        KASSERT(pcb != NULL);
        KASSERT(nam != NULL);

        return rfcomm_peeraddr_pcb(pcb, (struct sockaddr_bt *)nam);
}

static int
rfcomm_sockaddr(struct socket *so, struct sockaddr *nam)
{
        struct rfcomm_dlc *pcb = so->so_pcb;

        KASSERT(solocked(so));
        KASSERT(pcb != NULL);
        KASSERT(nam != NULL);

        return rfcomm_sockaddr_pcb(pcb, (struct sockaddr_bt *)nam);
}

static int
rfcomm_rcvd(struct socket *so, int flags, struct lwp *l)
{
        struct rfcomm_dlc *pcb = so->so_pcb;

        KASSERT(solocked(so));

        if (pcb == NULL)
                return EINVAL;

        return rfcomm_rcvd_pcb(pcb, sbspace(&so->so_rcv));
}

static int
rfcomm_recvoob(struct socket *so, struct mbuf *m, int flags)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
rfcomm_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
    struct mbuf *control, struct lwp *l)
{
        struct rfcomm_dlc *pcb = so->so_pcb;
        int err = 0;
        struct mbuf *m0;

        KASSERT(solocked(so));
        KASSERT(m != NULL);

        if (control)        /* no use for that */
                m_freem(control);

        if (pcb == NULL) {
                err = EINVAL;
                goto release;
        }

        m0 = m_copypacket(m, M_DONTWAIT);
        if (m0 == NULL) {
                err = ENOMEM;
                goto release;
        }

        sbappendstream(&so->so_snd, m);
        return rfcomm_send_pcb(pcb, m0);

release:
        m_freem(m);
        return err;
}

static int
rfcomm_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
{
        KASSERT(solocked(so));

        m_freem(m);
        m_freem(control);

        return EOPNOTSUPP;
}

static int
rfcomm_purgeif(struct socket *so, struct ifnet *ifp)
{

        return EOPNOTSUPP;
}

/*
 * rfcomm_ctloutput(req, socket, sockopt)
 *
 */
int
rfcomm_ctloutput(int req, struct socket *so, struct sockopt *sopt)
{
        struct rfcomm_dlc *pcb = so->so_pcb;
        int err = 0;

        DPRINTFN(2, "%s\n", prcorequests[req]);

        if (pcb == NULL)
                return EINVAL;

        if (sopt->sopt_level != BTPROTO_RFCOMM)
                return ENOPROTOOPT;

        switch(req) {
        case PRCO_GETOPT:
                err = rfcomm_getopt(pcb, sopt);
                break;

        case PRCO_SETOPT:
                err = rfcomm_setopt(pcb, sopt);
                break;

        default:
                err = ENOPROTOOPT;
                break;
        }

        return err;
}

/**********************************************************************
 *
 * RFCOMM callbacks
 */

static void
rfcomm_connecting(void *arg)
{
        /* struct socket *so = arg; */

        KASSERT(arg != NULL);
        DPRINTF("Connecting\n");
}

static void
rfcomm_connected(void *arg)
{
        struct socket *so = arg;

        KASSERT(so != NULL);
        DPRINTF("Connected\n");
        soisconnected(so);
}

static void
rfcomm_disconnected(void *arg, int err)
{
        struct socket *so = arg;

        KASSERT(so != NULL);
        DPRINTF("Disconnected\n");

        so->so_error = err;
        soisdisconnected(so);
}

static void *
rfcomm_newconn(void *arg, struct sockaddr_bt *laddr,
    struct sockaddr_bt *raddr)
{
        struct socket *so = arg;

        DPRINTF("New Connection\n");
        so = sonewconn(so, false);
        if (so == NULL)
                return NULL;

        soisconnecting(so);

        return so->so_pcb;
}

/*
 * rfcomm_complete(rfcomm_dlc, length)
 *
 * length bytes are sent and may be removed from socket buffer
 */
static void
rfcomm_complete(void *arg, int length)
{
        struct socket *so = arg;

        sbdrop(&so->so_snd, length);
        sowwakeup(so);
}

/*
 * rfcomm_linkmode(rfcomm_dlc, new)
 *
 * link mode change notification.
 */
static void
rfcomm_linkmode(void *arg, int new)
{
        struct socket *so = arg;
        struct sockopt sopt;
        int mode;

        DPRINTF("auth %s, encrypt %s, secure %s\n",
                (new & RFCOMM_LM_AUTH ? "on" : "off"),
                (new & RFCOMM_LM_ENCRYPT ? "on" : "off"),
                (new & RFCOMM_LM_SECURE ? "on" : "off"));

        sockopt_init(&sopt, BTPROTO_RFCOMM, SO_RFCOMM_LM, 0);
        (void)rfcomm_getopt(so->so_pcb, &sopt);
        (void)sockopt_getint(&sopt, &mode);
        sockopt_destroy(&sopt);

        if (((mode & RFCOMM_LM_AUTH) && !(new & RFCOMM_LM_AUTH))
            || ((mode & RFCOMM_LM_ENCRYPT) && !(new & RFCOMM_LM_ENCRYPT))
            || ((mode & RFCOMM_LM_SECURE) && !(new & RFCOMM_LM_SECURE)))
                rfcomm_disconnect_pcb(so->so_pcb, 0);
}

/*
 * rfcomm_input(rfcomm_dlc, mbuf)
 */
static void
rfcomm_input(void *arg, struct mbuf *m)
{
        struct socket *so = arg;

        KASSERT(so != NULL);

        if (m->m_pkthdr.len > sbspace(&so->so_rcv)) {
                printf("%s: %d bytes dropped (socket buffer full)\n",
                        __func__, m->m_pkthdr.len);
                m_freem(m);
                return;
        }

        DPRINTFN(10, "received %d bytes\n", m->m_pkthdr.len);

        sbappendstream(&so->so_rcv, m);
        sorwakeup(so);
}

PR_WRAP_USRREQS(rfcomm)

#define        rfcomm_attach                rfcomm_attach_wrapper
#define        rfcomm_detach                rfcomm_detach_wrapper
#define        rfcomm_accept                rfcomm_accept_wrapper
#define        rfcomm_bind                rfcomm_bind_wrapper
#define        rfcomm_listen                rfcomm_listen_wrapper
#define        rfcomm_connect                rfcomm_connect_wrapper
#define        rfcomm_connect2                rfcomm_connect2_wrapper
#define        rfcomm_disconnect        rfcomm_disconnect_wrapper
#define        rfcomm_shutdown                rfcomm_shutdown_wrapper
#define        rfcomm_abort                rfcomm_abort_wrapper
#define        rfcomm_ioctl                rfcomm_ioctl_wrapper
#define        rfcomm_stat                rfcomm_stat_wrapper
#define        rfcomm_peeraddr                rfcomm_peeraddr_wrapper
#define        rfcomm_sockaddr                rfcomm_sockaddr_wrapper
#define        rfcomm_rcvd                rfcomm_rcvd_wrapper
#define        rfcomm_recvoob                rfcomm_recvoob_wrapper
#define        rfcomm_send                rfcomm_send_wrapper
#define        rfcomm_sendoob                rfcomm_sendoob_wrapper
#define        rfcomm_purgeif                rfcomm_purgeif_wrapper

const struct pr_usrreqs rfcomm_usrreqs = {
        .pr_attach        = rfcomm_attach,
        .pr_detach        = rfcomm_detach,
        .pr_accept        = rfcomm_accept,
        .pr_bind        = rfcomm_bind,
        .pr_listen        = rfcomm_listen,
        .pr_connect        = rfcomm_connect,
        .pr_connect2        = rfcomm_connect2,
        .pr_disconnect        = rfcomm_disconnect,
        .pr_shutdown        = rfcomm_shutdown,
        .pr_abort        = rfcomm_abort,
        .pr_ioctl        = rfcomm_ioctl,
        .pr_stat        = rfcomm_stat,
        .pr_peeraddr        = rfcomm_peeraddr,
        .pr_sockaddr        = rfcomm_sockaddr,
        .pr_rcvd        = rfcomm_rcvd,
        .pr_recvoob        = rfcomm_recvoob,
        .pr_send        = rfcomm_send,
        .pr_sendoob        = rfcomm_sendoob,
        .pr_purgeif        = rfcomm_purgeif,
};



































































































































































































    3 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
/*        $NetBSD: subr_kobj_vfs.c,v 1.12 2021/06/29 22:40:53 dholland Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software developed for The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1998-2000 Doug Rabson
 * Copyright (c) 2004 Peter Wemm
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Kernel loader vfs routines.
 */

#include <sys/kobj_impl.h>

#ifdef _KERNEL_OPT
#include "opt_modular.h"
#endif

#ifdef MODULAR

#include <sys/param.h>
#include <sys/fcntl.h>
#include <sys/module.h>
#include <sys/namei.h>
#include <sys/vnode.h>

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_kobj_vfs.c,v 1.12 2021/06/29 22:40:53 dholland Exp $");

static void
kobj_close_vfs(kobj_t ko)
{

        VOP_UNLOCK(ko->ko_source);
        vn_close(ko->ko_source, FREAD, kauth_cred_get());
}

/*
 * kobj_read:
 *
 *        Utility function: read from the object.
 */
static int
kobj_read_vfs(kobj_t ko, void **basep, size_t size, off_t off,
        bool allocate)
{
        size_t resid;
        void *base;
        int error;

        KASSERT(ko->ko_source != NULL);

        if (allocate) {
                base = kmem_alloc(size, KM_SLEEP);
        } else {
                base = *basep;
#ifdef DIAGNOSTIC
                bool ok = false;
                if ((uintptr_t)base >= (uintptr_t)ko->ko_text_address &&
                    (uintptr_t)base + size <=
                    (uintptr_t)ko->ko_text_address + ko->ko_text_size)
                        ok = true;
                if ((uintptr_t)base >= (uintptr_t)ko->ko_data_address &&
                    (uintptr_t)base + size <=
                    (uintptr_t)ko->ko_data_address + ko->ko_data_size)
                        ok = true;
                if ((uintptr_t)base >= (uintptr_t)ko->ko_rodata_address &&
                    (uintptr_t)base + size <=
                    (uintptr_t)ko->ko_rodata_address + ko->ko_rodata_size)
                        ok = true;
                if (!ok)
                        panic("kobj_read_vfs: not in a dedicated segment");
#endif
        }

        error = vn_rdwr(UIO_READ, ko->ko_source, base, size, off,
            UIO_SYSSPACE, IO_NODELOCKED, curlwp->l_cred, &resid,
            curlwp);

        if (error == 0 && resid != 0) {
                error = EINVAL;
        }

        if (allocate && error != 0) {
                kmem_free(base, size);
                base = NULL;
        }

        if (allocate)
                *basep = base;

        return error;
}

/*
 * kobj_load_vfs:
 *
 *        Load an object located in the file system.
 */
int
kobj_load_vfs(kobj_t *kop, const char *path, const bool nochroot)
{
        struct pathbuf *pb;
        struct vnode *vp;
        int error;
        kobj_t ko;

        KASSERT(path != NULL);
        if (strchr(path, '/') == NULL)
                return ENOENT;

        ko = kmem_zalloc(sizeof(*ko), KM_SLEEP);
        pb = pathbuf_create(path);
        if (pb == NULL) {
                 kmem_free(ko, sizeof(*ko));
                return ENOMEM;
        }

        error = vn_open(NULL, pb, (nochroot ? NOCHROOT : 0), FREAD, 0,
            &vp, NULL, NULL);

         if (error != 0) {
                pathbuf_destroy(pb);
                 kmem_free(ko, sizeof(*ko));
                 return error;
        }

        ko->ko_type = KT_VNODE;
        kobj_setname(ko, path);
        ko->ko_source = vp;
        ko->ko_read = kobj_read_vfs;
        ko->ko_close = kobj_close_vfs;
        pathbuf_destroy(pb);

        *kop = ko;
        return kobj_load(ko);
}

#else /* MODULAR */

int
kobj_load_vfs(kobj_t *kop, const char *path, const bool nochroot)
{

        return ENOSYS;
}

#endif



















































    3 




























































    1 








1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/*        $NetBSD: if_stats.c,v 1.4 2021/06/29 21:19:58 riastradh Exp $        */

/*-
 * Copyright (c) 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_stats.c,v 1.4 2021/06/29 21:19:58 riastradh Exp $");

#include <sys/param.h>
#include <sys/mbuf.h>
#include <sys/systm.h>
#include <sys/xcall.h>

#include <net/if.h>

#define        IF_STATS_SIZE        (sizeof(uint64_t) * IF_NSTATS)

/*
 * if_stats_init --
 *        Initialize statistics storage for a network interface.
 */
void
if_stats_init(ifnet_t * const ifp)
{
        ifp->if_stats = percpu_alloc(IF_STATS_SIZE);
}

/*
 * if_stats_fini --
 *        Tear down statistics storage for a network interface.
 */
void
if_stats_fini(ifnet_t * const ifp)
{
        percpu_t *pc = ifp->if_stats;
        ifp->if_stats = NULL;
        if (pc) {
                percpu_free(pc, IF_STATS_SIZE);
        }
}

struct if_stats_to_if_data_ctx {
        struct if_data * const ifi;
        const bool zero_stats;
};

static void
if_stats_to_if_data_cb(void *v1, void *v2, struct cpu_info *ci)
{
        const uint64_t * const local_counters = v1;
        struct if_stats_to_if_data_ctx *ctx = v2;

        int s = splnet();

        if (ctx->ifi) {
                ctx->ifi->ifi_ipackets   += local_counters[if_ipackets];
                ctx->ifi->ifi_ierrors    += local_counters[if_ierrors];
                ctx->ifi->ifi_opackets   += local_counters[if_opackets];
                ctx->ifi->ifi_oerrors    += local_counters[if_oerrors];
                ctx->ifi->ifi_collisions += local_counters[if_collisions];
                ctx->ifi->ifi_ibytes     += local_counters[if_ibytes];
                ctx->ifi->ifi_obytes     += local_counters[if_obytes];
                ctx->ifi->ifi_imcasts    += local_counters[if_imcasts];
                ctx->ifi->ifi_omcasts    += local_counters[if_omcasts];
                ctx->ifi->ifi_iqdrops    += local_counters[if_iqdrops];
                ctx->ifi->ifi_noproto    += local_counters[if_noproto];
        }

        if (ctx->zero_stats) {
                memset(v1, 0, IF_STATS_SIZE);
        }

        splx(s);
}

/*
 * if_stats_to_if_data --
 *        Collect the interface statistics and place them into the
 *        legacy if_data structure for reportig to user space.
 *        Optionally zeros the stats after collection.
 */
void
if_stats_to_if_data(ifnet_t * const ifp, struct if_data * const ifi,
                    const bool zero_stats)
{
        struct if_stats_to_if_data_ctx ctx = {
                .ifi = ifi,
                .zero_stats = zero_stats,
        };

        memset(ifi, 0, sizeof(*ifi));
        percpu_foreach_xcall(ifp->if_stats, XC_HIGHPRI_IPL(IPL_SOFTNET),
            if_stats_to_if_data_cb, &ctx);
}









































































































































    1 















    1 





























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
/*        $NetBSD: cir.c,v 1.33 2022/03/31 19:30:16 pgoyette Exp $        */

/*
 * Copyright (c) 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: cir.c,v 1.33 2022/03/31 19:30:16 pgoyette Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/ioctl.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/conf.h>
#include <sys/poll.h>
#include <sys/select.h>
#include <sys/vnode.h>
#include <sys/module.h>

#include <dev/ir/ir.h>
#include <dev/ir/cirio.h>
#include <dev/ir/cirvar.h>

dev_type_open(ciropen);
dev_type_close(circlose);
dev_type_read(cirread);
dev_type_write(cirwrite);
dev_type_ioctl(cirioctl);
dev_type_poll(cirpoll);

const struct cdevsw cir_cdevsw = {
        .d_open = ciropen,
        .d_close = circlose,
        .d_read = cirread,
        .d_write = cirwrite,
        .d_ioctl = cirioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = cirpoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

int cir_match(device_t parent, cfdata_t match, void *aux);
void cir_attach(device_t parent, device_t self, void *aux);
int cir_detach(device_t self, int flags);

CFATTACH_DECL_NEW(cir, sizeof(struct cir_softc),
    cir_match, cir_attach, cir_detach, NULL);

extern struct cfdriver cir_cd;

#define CIRUNIT(dev) (minor(dev))

int
cir_match(device_t parent, cfdata_t match, void *aux)
{
        struct ir_attach_args *ia = aux;

        return (ia->ia_type == IR_TYPE_CIR);
}

void
cir_attach(device_t parent, device_t self, void *aux)
{
        struct cir_softc *sc = device_private(self);
        struct ir_attach_args *ia = aux;

        sc->sc_dev = self;

        selinit(&sc->sc_rdsel);
        sc->sc_methods = ia->ia_methods;
        sc->sc_handle = ia->ia_handle;

#ifdef DIAGNOSTIC
        if (sc->sc_methods->im_read == NULL ||
            sc->sc_methods->im_write == NULL ||
            sc->sc_methods->im_setparams == NULL)
                panic("%s: missing methods", device_xname(sc->sc_dev));
#endif
        aprint_naive("\n");
        aprint_normal("\n");
}

int
cir_detach(device_t self, int flags)
{
        struct cir_softc *sc = device_private(self);
        int maj, mn;

        /* locate the major number */
        maj = cdevsw_lookup_major(&cir_cdevsw);

        /* Nuke the vnodes for any open instances (calls close). */
        mn = device_unit(self);
        vdevgone(maj, mn, mn, VCHR);

        seldestroy(&sc->sc_rdsel);

        return (0);
}

int
ciropen(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct cir_softc *sc;
        int error;

        sc = device_lookup_private(&cir_cd, CIRUNIT(dev));
        if (sc == NULL)
                return (ENXIO);
        if (!device_is_active(sc->sc_dev))
                return (EIO);
        if (sc->sc_open)
                return (EBUSY);

        sc->sc_rdframes = 0;
        if (sc->sc_methods->im_open != NULL) {
                error = sc->sc_methods->im_open(sc->sc_handle, flag, mode,
                    l->l_proc);
                if (error)
                        return (error);
        }
        sc->sc_open = 1;
        return (0);
}

int
circlose(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct cir_softc *sc;
        int error;

        sc = device_lookup_private(&cir_cd, CIRUNIT(dev));
        if (sc == NULL)
                return (ENXIO);
        if (sc->sc_methods->im_close != NULL)
                error = sc->sc_methods->im_close(sc->sc_handle, flag, mode,
                    l->l_proc);
        else
                error = 0;
        sc->sc_open = 0;
        return (error);
}

int
cirread(dev_t dev, struct uio *uio, int flag)
{
        struct cir_softc *sc;

        sc = device_lookup_private(&cir_cd, CIRUNIT(dev));
        if (sc == NULL)
                return (ENXIO);
        if (!device_is_active(sc->sc_dev))
                return (EIO);
        return (sc->sc_methods->im_read(sc->sc_handle, uio, flag));
}

int
cirwrite(dev_t dev, struct uio *uio, int flag)
{
        struct cir_softc *sc;

        sc = device_lookup_private(&cir_cd, CIRUNIT(dev));
        if (sc == NULL)
                return (ENXIO);
        if (!device_is_active(sc->sc_dev))
                return (EIO);
        return (sc->sc_methods->im_write(sc->sc_handle, uio, flag));
}

int
cirioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
{
        struct cir_softc *sc;
        int error;

        sc = device_lookup_private(&cir_cd, CIRUNIT(dev));
        if (sc == NULL)
                return (ENXIO);
        if (!device_is_active(sc->sc_dev))
                return (EIO);

        switch (cmd) {
        case FIONBIO:
                /* All handled in the upper FS layer. */
                error = 0;
                break;
        case CIR_GET_PARAMS:
                *(struct cir_params *)addr = sc->sc_params;
                error = 0;
                break;
        case CIR_SET_PARAMS:
                error = sc->sc_methods->im_setparams(sc->sc_handle,
                            (struct cir_params *)addr);
                if (!error)
                        sc->sc_params = *(struct cir_params *)addr;
                break;
        default:
                error = EINVAL;
                break;
        }
        return (error);
}

int
cirpoll(dev_t dev, int events, struct lwp *l)
{
        struct cir_softc *sc;
        int revents;
        int s;

        sc = device_lookup_private(&cir_cd, CIRUNIT(dev));
        if (sc == NULL)
                return (POLLERR);
        if (!device_is_active(sc->sc_dev))
                return (POLLERR);

        revents = 0;
        s = splir();
        if (events & (POLLIN | POLLRDNORM))
                if (sc->sc_rdframes > 0)
                        revents |= events & (POLLIN | POLLRDNORM);

#if 0
        /* How about write? */
        if (events & (POLLOUT | POLLWRNORM))
                if (/* ??? */)
                        revents |= events & (POLLOUT | POLLWRNORM);
#endif

        if (revents == 0) {
                if (events & (POLLIN | POLLRDNORM))
                        selrecord(l, &sc->sc_rdsel);

#if 0
                if (events & (POLLOUT | POLLWRNORM))
                        selrecord(p, &sc->sc_wrsel);
#endif
        }

        splx(s);
        return (revents);
}

MODULE(MODULE_CLASS_DRIVER, cir, "ir");

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
cir_modcmd(modcmd_t cmd, void *opaque)
{
        int error = 0;
#ifdef _MODULE
        int bmaj = -1, cmaj = -1;
#endif

        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                error = devsw_attach("cir", NULL, &bmaj, &cir_cdevsw, &cmaj);
                if (error)
                        return error;
                error = config_init_component(cfdriver_ioconf_cir,
                    cfattach_ioconf_cir, cfdata_ioconf_cir);
                if (error)
                        devsw_detach(NULL, &cir_cdevsw);
#endif
                return error;
        case MODULE_CMD_FINI:
#ifdef _MODULE
                return config_fini_component(cfdriver_ioconf_cir,
                    cfattach_ioconf_cir, cfdata_ioconf_cir);
                devsw_detach(NULL, &cir_cdevsw);
#endif
                return error;
        default:
                return ENOTTY;
        }
}



















































































































































































































































































































































































































































































    4 








    4 




    4 
    4 


    4 

    4 
    4 

    4 








    4 

    4 

    3 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
/*        $NetBSD: subr_cpu.c,v 1.18 2022/01/24 09:42:14 andvar Exp $        */

/*-
 * Copyright (c) 2007, 2008, 2009, 2010, 2012, 2019, 2020
 *     The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c)2007 YAMAMOTO Takashi,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * CPU related routines shared with rump.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_cpu.c,v 1.18 2022/01/24 09:42:14 andvar Exp $");

#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/systm.h>
#include <sys/sched.h>
#include <sys/conf.h>
#include <sys/cpu.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/kmem.h>

static void        cpu_topology_fake1(struct cpu_info *);

kmutex_t        cpu_lock                __cacheline_aligned;
int                ncpu                        __read_mostly;
int                ncpuonline                __read_mostly;
bool                mp_online                __read_mostly;
static bool        cpu_topology_present        __read_mostly;
static bool        cpu_topology_haveslow        __read_mostly;
int64_t                cpu_counts[CPU_COUNT_MAX];

/* An array of CPUs.  There are ncpu entries. */
struct cpu_info **cpu_infos                __read_mostly;

/* Note: set on mi_cpu_attach() and idle_loop(). */
kcpuset_t *        kcpuset_attached        __read_mostly        = NULL;
kcpuset_t *        kcpuset_running                __read_mostly        = NULL;

static char cpu_model[128];

/*
 * mi_cpu_init: early initialisation of MI CPU related structures.
 *
 * Note: may not block and memory allocator is not yet available.
 */
void
mi_cpu_init(void)
{
        struct cpu_info *ci;

        mutex_init(&cpu_lock, MUTEX_DEFAULT, IPL_NONE);

        kcpuset_create(&kcpuset_attached, true);
        kcpuset_create(&kcpuset_running, true);
        kcpuset_set(kcpuset_running, 0);

        ci = curcpu();
        cpu_topology_fake1(ci);
}

int
cpu_setmodel(const char *fmt, ...)
{
        int len;
        va_list ap;

        va_start(ap, fmt);
        len = vsnprintf(cpu_model, sizeof(cpu_model), fmt, ap);
        va_end(ap);
        return len;
}

const char *
cpu_getmodel(void)
{
        return cpu_model;
}

bool
cpu_softintr_p(void)
{

        return (curlwp->l_pflag & LP_INTR) != 0;
}

/*
 * Collect CPU topology information as each CPU is attached.  This can be
 * called early during boot, so we need to be careful what we do.
 */
void
cpu_topology_set(struct cpu_info *ci, u_int package_id, u_int core_id,
    u_int smt_id, u_int numa_id)
{
        enum cpu_rel rel;

        cpu_topology_present = true;
        ci->ci_package_id = package_id;
        ci->ci_core_id = core_id;
        ci->ci_smt_id = smt_id;
        ci->ci_numa_id = numa_id;
        for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
                ci->ci_sibling[rel] = ci;
                ci->ci_nsibling[rel] = 1;
        }
}

/*
 * Collect CPU relative speed
 */
void
cpu_topology_setspeed(struct cpu_info *ci, bool slow)
{

        cpu_topology_haveslow |= slow;
        ci->ci_is_slow = slow;
}

/*
 * Link a CPU into the given circular list.
 */
static void
cpu_topology_link(struct cpu_info *ci, struct cpu_info *ci2, enum cpu_rel rel)
{
        struct cpu_info *ci3;

        /* Walk to the end of the existing circular list and append. */
        for (ci3 = ci2;; ci3 = ci3->ci_sibling[rel]) {
                ci3->ci_nsibling[rel]++;
                if (ci3->ci_sibling[rel] == ci2) {
                        break;
                }
        }
        ci->ci_sibling[rel] = ci2;
        ci3->ci_sibling[rel] = ci;
        ci->ci_nsibling[rel] = ci3->ci_nsibling[rel];
}

/*
 * Print out the topology lists.
 */
static void
cpu_topology_dump(void)
{
#ifdef DEBUG
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci, *ci2;
        const char *names[] = { "core", "pkg", "1st" };
        enum cpu_rel rel;
        int i;

        CTASSERT(__arraycount(names) >= __arraycount(ci->ci_sibling));
        if (ncpu == 1) {
                return;
        }

        for (CPU_INFO_FOREACH(cii, ci)) {
                if (cpu_topology_haveslow)
                        printf("%s ", ci->ci_is_slow ? "slow" : "fast");
                for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
                        printf("%s has %d %s siblings:", cpu_name(ci),
                            ci->ci_nsibling[rel], names[rel]);
                        ci2 = ci->ci_sibling[rel];
                        i = 0;
                        do {
                                printf(" %s", cpu_name(ci2));
                                ci2 = ci2->ci_sibling[rel];
                        } while (++i < 64 && ci2 != ci->ci_sibling[rel]);
                        if (i == 64) {
                                printf(" GAVE UP");
                        }
                        printf("\n");
                }
                printf("%s first in package: %s\n", cpu_name(ci),
                    cpu_name(ci->ci_package1st));
        }
#endif        /* DEBUG */
}

/*
 * Fake up topology info if we have none, or if what we got was bogus.
 * Used early in boot, and by cpu_topology_fake().
 */
static void
cpu_topology_fake1(struct cpu_info *ci)
{
        enum cpu_rel rel;

        for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
                ci->ci_sibling[rel] = ci;
                ci->ci_nsibling[rel] = 1;
        }
        if (!cpu_topology_present) {
                ci->ci_package_id = cpu_index(ci);
        }
        ci->ci_schedstate.spc_flags |=
            (SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
        ci->ci_package1st = ci;
        if (!cpu_topology_haveslow) {
                ci->ci_is_slow = false;
        }
}

/*
 * Fake up topology info if we have none, or if what we got was bogus.
 * Don't override ci_package_id, etc, if cpu_topology_present is set.
 * MD code also uses these.
 */
static void
cpu_topology_fake(void)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;

        for (CPU_INFO_FOREACH(cii, ci)) {
                cpu_topology_fake1(ci);
                /* Undo (early boot) flag set so everything links OK. */
                ci->ci_schedstate.spc_flags &=
                    ~(SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
        }
}

/*
 * Fix up basic CPU topology info.  Right now that means attach each CPU to
 * circular lists of its siblings in the same core, and in the same package.
 */
void
cpu_topology_init(void)
{
        CPU_INFO_ITERATOR cii, cii2;
        struct cpu_info *ci, *ci2, *ci3;
        u_int minsmt, mincore;

        if (!cpu_topology_present) {
                cpu_topology_fake();
                goto linkit;
        }

        /* Find siblings in same core and package. */
        for (CPU_INFO_FOREACH(cii, ci)) {
                ci->ci_schedstate.spc_flags &=
                    ~(SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
                for (CPU_INFO_FOREACH(cii2, ci2)) {
                        /* Avoid bad things happening. */
                        if (ci2->ci_package_id == ci->ci_package_id &&
                            ci2->ci_core_id == ci->ci_core_id &&
                            ci2->ci_smt_id == ci->ci_smt_id &&
                            ci2 != ci) {
#ifdef DEBUG
                                printf("cpu%u %p pkg %u core %u smt %u same as "
                                       "cpu%u %p pkg %u core %u smt %u\n",
                                       cpu_index(ci), ci, ci->ci_package_id,
                                       ci->ci_core_id, ci->ci_smt_id,
                                       cpu_index(ci2), ci2, ci2->ci_package_id,
                                       ci2->ci_core_id, ci2->ci_smt_id);
#endif
                                    printf("cpu_topology_init: info bogus, "
                                        "faking it\n");
                                    cpu_topology_fake();
                                    goto linkit;
                        }
                        if (ci2 == ci ||
                            ci2->ci_package_id != ci->ci_package_id) {
                                continue;
                        }
                        /* Find CPUs in the same core. */
                        if (ci->ci_nsibling[CPUREL_CORE] == 1 &&
                            ci->ci_core_id == ci2->ci_core_id) {
                                    cpu_topology_link(ci, ci2, CPUREL_CORE);
                        }
                        /* Find CPUs in the same package. */
                        if (ci->ci_nsibling[CPUREL_PACKAGE] == 1) {
                                    cpu_topology_link(ci, ci2, CPUREL_PACKAGE);
                        }
                        if (ci->ci_nsibling[CPUREL_CORE] > 1 &&
                            ci->ci_nsibling[CPUREL_PACKAGE] > 1) {
                                break;
                        }
                }
        }

 linkit:
        /* Identify lowest numbered SMT in each core. */
        for (CPU_INFO_FOREACH(cii, ci)) {
                ci2 = ci3 = ci;
                minsmt = ci->ci_smt_id;
                do {
                        if (ci2->ci_smt_id < minsmt) {
                                ci3 = ci2;
                                minsmt = ci2->ci_smt_id;
                        }
                        ci2 = ci2->ci_sibling[CPUREL_CORE];
                } while (ci2 != ci);
                ci3->ci_schedstate.spc_flags |= SPCF_CORE1ST;
        }

        /* Identify lowest numbered SMT in each package. */
        ci3 = NULL;
        for (CPU_INFO_FOREACH(cii, ci)) {
                if ((ci->ci_schedstate.spc_flags & SPCF_CORE1ST) == 0) {
                        continue;
                }
                ci2 = ci3 = ci;
                mincore = ci->ci_core_id;
                do {
                        if ((ci2->ci_schedstate.spc_flags &
                            SPCF_CORE1ST) != 0 &&
                            ci2->ci_core_id < mincore) {
                                ci3 = ci2;
                                mincore = ci2->ci_core_id;
                        }
                        ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
                } while (ci2 != ci);

                if ((ci3->ci_schedstate.spc_flags & SPCF_PACKAGE1ST) != 0) {
                        /* Already identified - nothing more to do. */
                        continue;
                }
                ci3->ci_schedstate.spc_flags |= SPCF_PACKAGE1ST;

                /* Walk through all CPUs in package and point to first. */
                ci2 = ci3;
                do {
                        ci2->ci_package1st = ci3;
                        ci2->ci_sibling[CPUREL_PACKAGE1ST] = ci3;
                        ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
                } while (ci2 != ci3);

                /* Now look for somebody else to link to. */
                for (CPU_INFO_FOREACH(cii2, ci2)) {
                        if ((ci2->ci_schedstate.spc_flags & SPCF_PACKAGE1ST)
                            != 0 && ci2 != ci3) {
                                    cpu_topology_link(ci3, ci2, CPUREL_PACKAGE1ST);
                                    break;
                        }
                }
        }

        /* Walk through all packages, starting with value of ci3 from above. */
        KASSERT(ci3 != NULL);
        ci = ci3;
        do {
                /* Walk through CPUs in the package and copy in PACKAGE1ST. */
                ci2 = ci;
                do {
                        ci2->ci_sibling[CPUREL_PACKAGE1ST] =
                            ci->ci_sibling[CPUREL_PACKAGE1ST];
                        ci2->ci_nsibling[CPUREL_PACKAGE1ST] =
                            ci->ci_nsibling[CPUREL_PACKAGE1ST];
                        ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
                } while (ci2 != ci);
                ci = ci->ci_sibling[CPUREL_PACKAGE1ST];
        } while (ci != ci3);

        if (cpu_topology_haveslow) {
                /*
                 * For asymmetric systems where some CPUs are slower than
                 * others, mark first class CPUs for the scheduler.  This
                 * conflicts with SMT right now so whinge if observed.
                 */
                if (curcpu()->ci_nsibling[CPUREL_CORE] > 1) {
                        printf("cpu_topology_init: asymmetric & SMT??\n");
                }
                for (CPU_INFO_FOREACH(cii, ci)) {
                        if (!ci->ci_is_slow) {
                                ci->ci_schedstate.spc_flags |= SPCF_1STCLASS;
                        }
                }
        } else {
                /*
                 * For any other configuration mark the 1st CPU in each
                 * core as a first class CPU.
                 */
                for (CPU_INFO_FOREACH(cii, ci)) {
                        if ((ci->ci_schedstate.spc_flags & SPCF_CORE1ST) != 0) {
                                ci->ci_schedstate.spc_flags |= SPCF_1STCLASS;
                        }
                }
        }

        cpu_topology_dump();
}

/*
 * Adjust one count, for a counter that's NOT updated from interrupt
 * context.  Hardly worth making an inline due to preemption stuff.
 */
void
cpu_count(enum cpu_count idx, int64_t delta)
{
        lwp_t *l = curlwp;
        KPREEMPT_DISABLE(l);
        l->l_cpu->ci_counts[idx] += delta;
        KPREEMPT_ENABLE(l);
}

/*
 * Fetch fresh sum total for all counts.  Expensive - don't call often.
 *
 * If poll is true, the caller is okay with less recent values (but
 * no more than 1/hz seconds old).  Where this is called very often that
 * should be the case.
 *
 * This should be reasonably quick so that any value collected get isn't
 * totally out of whack, and it can also be called from interrupt context,
 * so go to splvm() while summing the counters.  It's tempting to use a spin
 * mutex here but this routine is called from DDB.
 */
void
cpu_count_sync(bool poll)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;
        int64_t sum[CPU_COUNT_MAX], *ptr;
        static int lasttick;
        int curtick, s;
        enum cpu_count i;

        KASSERT(sizeof(ci->ci_counts) == sizeof(cpu_counts));

        if (__predict_false(!mp_online)) {
                memcpy(cpu_counts, curcpu()->ci_counts, sizeof(cpu_counts));
                return;
        }

        s = splvm();
        curtick = getticks();
        if (poll && atomic_load_acquire(&lasttick) == curtick) {
                splx(s);
                return;
        }
        memset(sum, 0, sizeof(sum));
        curcpu()->ci_counts[CPU_COUNT_SYNC]++;
        for (CPU_INFO_FOREACH(cii, ci)) {
                ptr = ci->ci_counts;
                for (i = 0; i < CPU_COUNT_MAX; i += 8) {
                        sum[i+0] += ptr[i+0];
                        sum[i+1] += ptr[i+1];
                        sum[i+2] += ptr[i+2];
                        sum[i+3] += ptr[i+3];
                        sum[i+4] += ptr[i+4];
                        sum[i+5] += ptr[i+5];
                        sum[i+6] += ptr[i+6];
                        sum[i+7] += ptr[i+7];
                }
                KASSERT(i == CPU_COUNT_MAX);
        }
        memcpy(cpu_counts, sum, sizeof(cpu_counts));
        atomic_store_release(&lasttick, curtick);
        splx(s);
}

























































































































    3 






    2 






    3 




    2 









    2 












































































































































    1 


    1 

    1 
    1 































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
/*        $NetBSD: union_vfsops.c,v 1.81 2020/03/16 21:20:10 pgoyette Exp $        */

/*
 * Copyright (c) 1994 The Regents of the University of California.
 * All rights reserved.
 *
 * This code is derived from software donated to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)union_vfsops.c        8.20 (Berkeley) 5/20/95
 */

/*
 * Copyright (c) 1994 Jan-Simon Pendry.
 * All rights reserved.
 *
 * This code is derived from software donated to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)union_vfsops.c        8.20 (Berkeley) 5/20/95
 */

/*
 * Union Layer
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: union_vfsops.c,v 1.81 2020/03/16 21:20:10 pgoyette Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/malloc.h>
#include <sys/filedesc.h>
#include <sys/queue.h>
#include <sys/stat.h>
#include <sys/kauth.h>
#include <sys/module.h>

#include <miscfs/genfs/genfs.h>
#include <fs/union/union.h>

MODULE(MODULE_CLASS_VFS, union, NULL);

/*
 * Mount union filesystem
 */
int
union_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct lwp *l = curlwp;
        int error = 0;
        struct union_args *args = data;
        struct vnode *lowerrootvp = NULLVP;
        struct vnode *upperrootvp = NULLVP;
        struct union_mount *um = 0;
        const char *cp;
        char *xp;
        int len;
        size_t size;

        if (args == NULL)
                return EINVAL;
        if (*data_len < sizeof *args)
                return EINVAL;

#ifdef UNION_DIAGNOSTIC
        printf("union_mount(mp = %p)\n", mp);
#endif

        if (mp->mnt_flag & MNT_GETARGS) {
                um = MOUNTTOUNIONMOUNT(mp);
                if (um == NULL)
                        return EIO;
                args->target = NULL;
                args->mntflags = um->um_op;
                *data_len = sizeof *args;
                return 0;
        }
        /*
         * Update is a no-op
         */
        if (mp->mnt_flag & MNT_UPDATE) {
                /*
                 * Need to provide.
                 * 1. a way to convert between rdonly and rdwr mounts.
                 * 2. support for nfs exports.
                 */
                error = EOPNOTSUPP;
                goto bad;
        }

        lowerrootvp = mp->mnt_vnodecovered;
        vref(lowerrootvp);

        /*
         * Find upper node.
         */
        error = namei_simple_user(args->target,
                                NSM_FOLLOW_NOEMULROOT, &upperrootvp);
        if (error != 0)
                goto bad;

        if (upperrootvp->v_type != VDIR) {
                error = EINVAL;
                goto bad;
        }

        um = kmem_zalloc(sizeof(struct union_mount), KM_SLEEP);

        /*
         * Keep a held reference to the target vnodes.
         * They are vrele'd in union_unmount.
         *
         * Depending on the _BELOW flag, the filesystems are
         * viewed in a different order.  In effect, this is the
         * same as providing a mount under option to the mount syscall.
         */

        um->um_op = args->mntflags & UNMNT_OPMASK;
        switch (um->um_op) {
        case UNMNT_ABOVE:
                um->um_lowervp = lowerrootvp;
                um->um_uppervp = upperrootvp;
                break;

        case UNMNT_BELOW:
                um->um_lowervp = upperrootvp;
                um->um_uppervp = lowerrootvp;
                break;

        case UNMNT_REPLACE:
                vrele(lowerrootvp);
                lowerrootvp = NULLVP;
                um->um_uppervp = upperrootvp;
                um->um_lowervp = lowerrootvp;
                break;

        default:
                error = EINVAL;
                goto bad;
        }

        mp->mnt_iflag |= IMNT_MPSAFE;

        /*
         * Unless the mount is readonly, ensure that the top layer
         * supports whiteout operations
         */
        if ((mp->mnt_flag & MNT_RDONLY) == 0) {
                vn_lock(um->um_uppervp, LK_EXCLUSIVE | LK_RETRY);
                error = VOP_WHITEOUT(um->um_uppervp,
                    (struct componentname *) 0, LOOKUP);
                VOP_UNLOCK(um->um_uppervp);
                if (error)
                        goto bad;
        }

        um->um_cred = l->l_cred;
        kauth_cred_hold(um->um_cred);
        um->um_cmode = UN_DIRMODE &~ l->l_proc->p_cwdi->cwdi_cmask;

        /*
         * Depending on what you think the MNT_LOCAL flag might mean,
         * you may want the && to be || on the conditional below.
         * At the moment it has been defined that the filesystem is
         * only local if it is all local, ie the MNT_LOCAL flag implies
         * that the entire namespace is local.  If you think the MNT_LOCAL
         * flag implies that some of the files might be stored locally
         * then you will want to change the conditional.
         */
        if (um->um_op == UNMNT_ABOVE) {
                if (((um->um_lowervp == NULLVP) ||
                     (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) &&
                    (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
                        mp->mnt_flag |= MNT_LOCAL;
        }

        /*
         * Copy in the upper layer's RDONLY flag.  This is for the benefit
         * of lookup() which explicitly checks the flag, rather than asking
         * the filesystem for its own opinion.  This means, that an update
         * mount of the underlying filesystem to go from rdonly to rdwr
         * will leave the unioned view as read-only.
         */
        mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY);

        mp->mnt_data = um;
        vfs_getnewfsid(mp);
        mp->mnt_lower = um->um_uppervp->v_mount;

        error = set_statvfs_info( path, UIO_USERSPACE, NULL, UIO_USERSPACE,
            mp->mnt_op->vfs_name, mp, l);
        if (error)
                goto bad;

        switch (um->um_op) {
        case UNMNT_ABOVE:
                cp = "<above>:";
                break;
        case UNMNT_BELOW:
                cp = "<below>:";
                break;
        case UNMNT_REPLACE:
                cp = "";
                break;
        default:
                cp = "<invalid>:";
#ifdef DIAGNOSTIC
                panic("union_mount: bad um_op");
#endif
                break;
        }
        len = strlen(cp);
        memcpy(mp->mnt_stat.f_mntfromname, cp, len);

        xp = mp->mnt_stat.f_mntfromname + len;
        len = MNAMELEN - len;

        (void) copyinstr(args->target, xp, len - 1, &size);
        memset(xp + size, 0, len - size);

#ifdef UNION_DIAGNOSTIC
        printf("union_mount: from %s, on %s\n",
            mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
#endif

        /* Setup the readdir hook if it's not set already */
        if (!vn_union_readdir_hook)
                vn_union_readdir_hook = union_readdirhook;

        return (0);

bad:
        if (um)
                kmem_free(um, sizeof(struct union_mount));
        if (upperrootvp)
                vrele(upperrootvp);
        if (lowerrootvp)
                vrele(lowerrootvp);
        return (error);
}

/*
 * VFS start.  Nothing needed here - the start routine
 * on the underlying filesystem(s) will have been called
 * when that filesystem was mounted.
 */
 /*ARGSUSED*/
int
union_start(struct mount *mp, int flags)
{

        return (0);
}

/*
 * Free reference to union layer
 */
static bool
union_unmount_selector(void *cl, struct vnode *vp)
{
        int *count = cl;

        KASSERT(mutex_owned(vp->v_interlock));

        *count += 1;
        return false;
}

int
union_unmount(struct mount *mp, int mntflags)
{
        struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
        int freeing;
        int error;

#ifdef UNION_DIAGNOSTIC
        printf("union_unmount(mp = %p)\n", mp);
#endif

        /*
         * Keep flushing vnodes from the mount list.
         * This is needed because of the un_pvp held
         * reference to the parent vnode.
         * If more vnodes have been freed on a given pass,
         * the try again.  The loop will iterate at most
         * (d) times, where (d) is the maximum tree depth
         * in the filesystem.
         */
        for (freeing = 0; (error = vflush(mp, NULL, 0)) != 0;) {
                struct vnode_iterator *marker;
                int n;

                /* count #vnodes held on mount list */
                n = 0;
                vfs_vnode_iterator_init(mp, &marker);
                vfs_vnode_iterator_next(marker, union_unmount_selector, &n);
                vfs_vnode_iterator_destroy(marker);

                /* if this is unchanged then stop */
                if (n == freeing)
                        break;

                /* otherwise try once more time */
                freeing = n;
        }

        /*
         * Ok, now that we've tried doing it gently, get out the hammer.
         */

        if (mntflags & MNT_FORCE)
                error = vflush(mp, NULL, FORCECLOSE);

        if (error)
                return error;

        /*
         * Discard references to upper and lower target vnodes.
         */
        if (um->um_lowervp)
                vrele(um->um_lowervp);
        vrele(um->um_uppervp);
        kauth_cred_free(um->um_cred);
        /*
         * Finally, throw away the union_mount structure
         */
        kmem_free(um, sizeof(struct union_mount));
        mp->mnt_data = NULL;
        return 0;
}

int
union_root(struct mount *mp, int lktype, struct vnode **vpp)
{
        struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
        int error;

        /*
         * Return locked reference to root.
         */
        vref(um->um_uppervp);
        if (um->um_lowervp)
                vref(um->um_lowervp);
        error = union_allocvp(vpp, mp, NULL, NULL, NULL,
                              um->um_uppervp, um->um_lowervp, 1);

        if (error) {
                vrele(um->um_uppervp);
                if (um->um_lowervp)
                        vrele(um->um_lowervp);
                return error;
        }

        vn_lock(*vpp, lktype | LK_RETRY);

        return 0;
}

int
union_statvfs(struct mount *mp, struct statvfs *sbp)
{
        int error;
        struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
        struct statvfs *sbuf = malloc(sizeof(*sbuf), M_TEMP, M_WAITOK | M_ZERO);
        unsigned long lbsize;

#ifdef UNION_DIAGNOSTIC
        printf("union_statvfs(mp = %p, lvp = %p, uvp = %p)\n", mp,
            um->um_lowervp, um->um_uppervp);
#endif

        if (um->um_lowervp) {
                error = VFS_STATVFS(um->um_lowervp->v_mount, sbuf);
                if (error)
                        goto done;
        }

        /* now copy across the "interesting" information and fake the rest */
        lbsize = sbuf->f_bsize;
        sbp->f_blocks = sbuf->f_blocks - sbuf->f_bfree;
        sbp->f_files = sbuf->f_files - sbuf->f_ffree;

        error = VFS_STATVFS(um->um_uppervp->v_mount, sbuf);
        if (error)
                goto done;

        sbp->f_flag = sbuf->f_flag;
        sbp->f_bsize = sbuf->f_bsize;
        sbp->f_frsize = sbuf->f_frsize;
        sbp->f_iosize = sbuf->f_iosize;

        /*
         * The "total" fields count total resources in all layers,
         * the "free" fields count only those resources which are
         * free in the upper layer (since only the upper layer
         * is writable).
         */

        if (sbuf->f_bsize != lbsize)
                sbp->f_blocks = sbp->f_blocks * lbsize / sbuf->f_bsize;
        sbp->f_blocks += sbuf->f_blocks;
        sbp->f_bfree = sbuf->f_bfree;
        sbp->f_bavail = sbuf->f_bavail;
        sbp->f_bresvd = sbuf->f_bresvd;
        sbp->f_files += sbuf->f_files;
        sbp->f_ffree = sbuf->f_ffree;
        sbp->f_favail = sbuf->f_favail;
        sbp->f_fresvd = sbuf->f_fresvd;

        copy_statvfs_info(sbp, mp);
done:
        free(sbuf, M_TEMP);
        return error;
}

/*ARGSUSED*/
int
union_sync(struct mount *mp, int waitfor,
    kauth_cred_t cred)
{

        /*
         * XXX - Assumes no data cached at union layer.
         */
        return (0);
}

/*ARGSUSED*/
int
union_vget(struct mount *mp, ino_t ino, int lktype,
    struct vnode **vpp)
{

        return (EOPNOTSUPP);
}

static int
union_renamelock_enter(struct mount *mp)
{
        struct union_mount *um = MOUNTTOUNIONMOUNT(mp);

        /* Lock just the upper fs, where the action happens. */
        return VFS_RENAMELOCK_ENTER(um->um_uppervp->v_mount);
}

static void
union_renamelock_exit(struct mount *mp)
{
        struct union_mount *um = MOUNTTOUNIONMOUNT(mp);

        VFS_RENAMELOCK_EXIT(um->um_uppervp->v_mount);
}

extern const struct vnodeopv_desc union_vnodeop_opv_desc;

const struct vnodeopv_desc * const union_vnodeopv_descs[] = {
        &union_vnodeop_opv_desc,
        NULL,
};

struct vfsops union_vfsops = {
        .vfs_name = MOUNT_UNION,
        .vfs_min_mount_data = sizeof (struct union_args),
        .vfs_mount = union_mount,
        .vfs_start = union_start,
        .vfs_unmount = union_unmount,
        .vfs_root = union_root,
        .vfs_quotactl = (void *)eopnotsupp,
        .vfs_statvfs = union_statvfs,
        .vfs_sync = union_sync,
        .vfs_vget = union_vget,
        .vfs_loadvnode = union_loadvnode,
        .vfs_fhtovp = (void *)eopnotsupp,
        .vfs_vptofh = (void *)eopnotsupp,
        .vfs_init = union_init,
        .vfs_reinit = union_reinit,
        .vfs_done = union_done,
        .vfs_snapshot = (void *)eopnotsupp,
        .vfs_extattrctl = vfs_stdextattrctl,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_renamelock_enter = union_renamelock_enter,
        .vfs_renamelock_exit = union_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = union_vnodeopv_descs
};

SYSCTL_SETUP(unionfs_sysctl_setup, "unionfs sysctl")
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "union",
                       SYSCTL_DESCR("Union file system"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, 15, CTL_EOL);
        /*
         * XXX the "15" above could be dynamic, thereby eliminating
         * one more instance of the "number to vfs" mapping problem,
         * but "15" is the order as taken from sys/mount.h
         */
}

static int
union_modcmd(modcmd_t cmd, void *arg)
{
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = vfs_attach(&union_vfsops);
                if (error != 0)
                        break;
                break;
        case MODULE_CMD_FINI:
                error = vfs_detach(&union_vfsops);
                if (error != 0)
                        break;
                break;
        default:
                error = ENOTTY;
                break;
        }

        return (error);
}


























































































































































































































































































   16 
   11 


   14 

    4 








1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
/*        $NetBSD: init_sysctl_base.c,v 1.8 2017/10/31 12:37:23 martin Exp $ */

/*-
 * Copyright (c) 2003, 2007, 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Brown, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: init_sysctl_base.c,v 1.8 2017/10/31 12:37:23 martin Exp $");

#include <sys/types.h>
#include <sys/param.h>
#include <sys/sysctl.h>
#include <sys/proc.h>
#include <sys/cpu.h>
#include <sys/kernel.h>
#include <sys/disklabel.h>

static int sysctl_setlen(SYSCTLFN_PROTO);

/*
 * sets up the base nodes...
 */
void
sysctl_basenode_init(void)
{

        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "kern",
                       SYSCTL_DESCR("High kernel"),
                       NULL, 0, NULL, 0,
                       CTL_KERN, CTL_EOL);
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "vm",
                       SYSCTL_DESCR("Virtual memory"),
                       NULL, 0, NULL, 0,
                       CTL_VM, CTL_EOL);
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "vfs",
                       SYSCTL_DESCR("Filesystem"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, CTL_EOL);
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "net",
                       SYSCTL_DESCR("Networking"),
                       NULL, 0, NULL, 0,
                       CTL_NET, CTL_EOL);
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "debug",
                       SYSCTL_DESCR("Debugging"),
                       NULL, 0, NULL, 0,
                       CTL_DEBUG, CTL_EOL);
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "hw",
                       SYSCTL_DESCR("Generic CPU, I/O"),
                       NULL, 0, NULL, 0,
                       CTL_HW, CTL_EOL);
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "machdep",
                       SYSCTL_DESCR("Machine dependent"),
                       NULL, 0, NULL, 0,
                       CTL_MACHDEP, CTL_EOL);
        /*
         * this node is inserted so that the sysctl nodes in libc can
         * operate.
         */
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "user",
                       SYSCTL_DESCR("User-level"),
                       NULL, 0, NULL, 0,
                       CTL_USER, CTL_EOL);
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "ddb",
                       SYSCTL_DESCR("In-kernel debugger"),
                       NULL, 0, NULL, 0,
                       CTL_DDB, CTL_EOL);
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "proc",
                       SYSCTL_DESCR("Per-process"),
                       NULL, 0, NULL, 0,
                       CTL_PROC, CTL_EOL);
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_NODE, "vendor",
                       SYSCTL_DESCR("Vendor specific"),
                       NULL, 0, NULL, 0,
                       CTL_VENDOR, CTL_EOL);
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "emul",
                       SYSCTL_DESCR("Emulation settings"),
                       NULL, 0, NULL, 0,
                       CTL_EMUL, CTL_EOL);
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "security",
                       SYSCTL_DESCR("Security"),
                       NULL, 0, NULL, 0,
                       CTL_SECURITY, CTL_EOL);
}

/*
 * now add some nodes which both rump kernel and standard
 * NetBSD both need, as rump cannot use sys/kern/init_sysctl.c
 */
SYSCTL_SETUP(sysctl_kernbase_setup, "sysctl kern subtree base setup")
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "ostype",
                       SYSCTL_DESCR("Operating system type"),
                       NULL, 0, __UNCONST(&ostype), 0,
                       CTL_KERN, KERN_OSTYPE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "osrelease",
                       SYSCTL_DESCR("Operating system release"),
                       NULL, 0, __UNCONST(&osrelease), 0,
                       CTL_KERN, KERN_OSRELEASE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "osrevision",
                       SYSCTL_DESCR("Operating system revision"),
                       NULL, __NetBSD_Version__, NULL, 0,
                       CTL_KERN, KERN_OSREV, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "version",
                       SYSCTL_DESCR("Kernel version"),
                       NULL, 0, __UNCONST(&version), 0,
                       CTL_KERN, KERN_VERSION, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_STRING, "hostname",
                       SYSCTL_DESCR("System hostname"),
                       sysctl_setlen, 0, hostname, MAXHOSTNAMELEN,
                       CTL_KERN, KERN_HOSTNAME, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_STRING, "domainname",
                       SYSCTL_DESCR("YP domain name"),
                       sysctl_setlen, 0, domainname, MAXHOSTNAMELEN,
                       CTL_KERN, KERN_DOMAINNAME, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "rawpartition",
                       SYSCTL_DESCR("Raw partition of a disk"),
                       NULL, RAW_PART, NULL, 0,
                       CTL_KERN, KERN_RAWPARTITION, CTL_EOL);
}

static int
sysctl_hw_machine_arch(SYSCTLFN_ARGS)
{
        struct sysctlnode node = *rnode;
#ifndef PROC_MACHINE_ARCH
#define PROC_MACHINE_ARCH(P)        machine_arch
#endif

        node.sysctl_data = PROC_MACHINE_ARCH(l->l_proc);
        node.sysctl_size = strlen(node.sysctl_data) + 1;
        return sysctl_lookup(SYSCTLFN_CALL(&node));
}

SYSCTL_SETUP(sysctl_hwbase_setup, "sysctl hw subtree base setup")
{
        u_int u;
        u_quad_t q;
        const char *model = cpu_getmodel();

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "model",
                       SYSCTL_DESCR("Machine model"),
                       NULL, 0, __UNCONST(model), 0,
                       CTL_HW, HW_MODEL, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "machine",
                       SYSCTL_DESCR("Machine class"),
                       NULL, 0, machine, 0,
                       CTL_HW, HW_MACHINE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_STRING, "machine_arch",
                       SYSCTL_DESCR("Machine CPU class"),
                       sysctl_hw_machine_arch, 0, NULL, 0,
                       CTL_HW, HW_MACHINE_ARCH, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "ncpu",
                       SYSCTL_DESCR("Number of CPUs configured"),
                       NULL, 0, &ncpu, 0,
                       CTL_HW, HW_NCPU, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "byteorder",
                       SYSCTL_DESCR("System byte order"),
                       NULL, BYTE_ORDER, NULL, 0,
                       CTL_HW, HW_BYTEORDER, CTL_EOL);
        u = ((u_int)physmem > (UINT_MAX / PAGE_SIZE)) ?
                UINT_MAX : physmem * PAGE_SIZE;
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "physmem",
                       SYSCTL_DESCR("Bytes of physical memory"),
                       NULL, u, NULL, 0,
                       CTL_HW, HW_PHYSMEM, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "pagesize",
                       SYSCTL_DESCR("Software page size"),
                       NULL, PAGE_SIZE, NULL, 0,
                       CTL_HW, HW_PAGESIZE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "alignbytes",
                       SYSCTL_DESCR("Alignment constraint for all possible "
                                    "data types"),
                       NULL, ALIGNBYTES, NULL, 0,
                       CTL_HW, HW_ALIGNBYTES, CTL_EOL);
        q = (u_quad_t)physmem * PAGE_SIZE;
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_QUAD, "physmem64",
                       SYSCTL_DESCR("Bytes of physical memory"),
                       NULL, q, NULL, 0,
                       CTL_HW, HW_PHYSMEM64, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "ncpuonline",
                       SYSCTL_DESCR("Number of CPUs online"),
                       NULL, 0, &ncpuonline, 0,
                       CTL_HW, HW_NCPUONLINE, CTL_EOL);
}

/*
 * sysctl helper function for kern.hostname and kern.domainnname.
 * resets the relevant recorded length when the underlying name is
 * changed.
 */
static int
sysctl_setlen(SYSCTLFN_ARGS)
{
        int error;

        error = sysctl_lookup(SYSCTLFN_CALL(rnode));
        if (error || newp == NULL)
                return (error);

        switch (rnode->sysctl_num) {
        case KERN_HOSTNAME:
                hostnamelen = strlen((const char*)rnode->sysctl_data);
                break;
        case KERN_DOMAINNAME:
                domainnamelen = strlen((const char*)rnode->sysctl_data);
                break;
        }

        return (0);
}
























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
/*        $NetBSD: exec_elf32.c,v 1.143 2019/11/20 19:37:53 pgoyette Exp $        */

/*
 * Copyright (c) 1996 Christopher G. Demetriou
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Christopher G. Demetriou
 *        for the NetBSD Project.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: exec_elf32.c,v 1.143 2019/11/20 19:37:53 pgoyette Exp $");

#define        ELFSIZE        32

#include "exec_elf.c"

#include <sys/module.h>

#define ELF32_AUXSIZE (ELF_AUX_ENTRIES * sizeof(Aux32Info) \
    + MAXPATHLEN + ALIGN(1))

MODULE(MODULE_CLASS_EXEC, exec_elf32, NULL);

static struct execsw exec_elf32_execsw[] = {
        {
                .es_hdrsz = sizeof (Elf32_Ehdr),
                  .es_makecmds = exec_elf32_makecmds,
                  .u = {
                        .elf_probe_func = netbsd_elf32_probe,
                },
                .es_emul = &emul_netbsd,
                .es_prio = EXECSW_PRIO_FIRST,
                .es_arglen = ELF32_AUXSIZE,
                .es_copyargs = elf32_copyargs,
                .es_setregs = NULL,
                .es_coredump = coredump_elf32,
                .es_setup_stack = exec_setup_stack,
        },
#if EXEC_ELF_NOTELESS
        {
                .es_hdrsz = sizeof (Elf32_Ehdr),
                  .es_makecmds = exec_elf32_makecmds,
                  .u {
                        elf_probe_func = NULL,
                },
                .es_emul = &emul_netbsd,
                .es_prio = EXECSW_PRIO_LAST,
                .es_arglen = ELF32_AUXSIZE,
                .es_copyargs = elf32_copyargs,
                .es_setregs = NULL,
                .es_coredump = coredump_elf32,
                .es_setup_stack = exec_setup_stack,
        },
#endif
};

static int
exec_elf32_modcmd(modcmd_t cmd, void *arg)
{
#if ARCH_ELFSIZE == 64
        /*
         * If we are on a 64bit system, we don't want the 32bit execsw[] to be
         * added in the global array, because the exec_elf32 module only works
         * on 32bit systems.
         *
         * However, we need the exec_elf32 module, because it will make the 32bit
         * functions available for netbsd32 and linux32.
         *
         * Therefore, allow this module on 64bit systems, but make it dormant.
         */

        (void)exec_elf32_execsw; /* unused */

        switch (cmd) {
        case MODULE_CMD_INIT:
        case MODULE_CMD_FINI:
                return 0;
        default:
                return ENOTTY;
        }
#else /* ARCH_ELFSIZE == 64 */
        switch (cmd) {
        case MODULE_CMD_INIT:
                return exec_add(exec_elf32_execsw,
                    __arraycount(exec_elf32_execsw));

        case MODULE_CMD_FINI:
                return exec_remove(exec_elf32_execsw,
                    __arraycount(exec_elf32_execsw));

        default:
                return ENOTTY;
        }
#endif /* ARCH_ELFSIZE == 64 */
}








































































































































   92 



   92 














































    1 





















    1 





    1 



    1 





   92 

   92 

























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
/*        $NetBSD: rtbl.c,v 1.7 2017/06/01 02:45:14 chs Exp $        */

/*-
 * Copyright (c) 1998, 2008, 2011 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Kevin M. Lahey of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1980, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)route.c        8.3 (Berkeley) 1/9/95
 */

#if defined(_KERNEL) && defined(_KERNEL_OPT)
#include "opt_route.h"
#endif /* _KERNEL && _KERNEL_OPT */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rtbl.c,v 1.7 2017/06/01 02:45:14 chs Exp $");

#include <sys/param.h>
#include <sys/kmem.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/proc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/domain.h>
#include <sys/kernel.h>
#include <sys/ioctl.h>
#include <sys/pool.h>
#include <sys/kauth.h>

#include <net/if.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/raw_cb.h>

static rtbl_t *rt_tables[AF_MAX+1];

int
rt_inithead(rtbl_t **tp, int off)
{
        rtbl_t *t;
        if (*tp != NULL)
                return 1;
        t = kmem_alloc(sizeof(*t), KM_SLEEP);
        *tp = t;
        return rn_inithead0(&t->t_rnh, off);
}

struct rtentry *
rt_matchaddr(rtbl_t *t, const struct sockaddr *dst)
{
        struct radix_node_head *rnh = &t->t_rnh;
        struct radix_node *rn;

        rn = rnh->rnh_matchaddr(dst, rnh);
        if (rn == NULL || (rn->rn_flags & RNF_ROOT) != 0)
                return NULL;
        return (struct rtentry *)rn;
}

int
rt_addaddr(rtbl_t *t, struct rtentry *rt, const struct sockaddr *netmask)
{
        struct radix_node_head *rnh = &t->t_rnh;
        struct radix_node *rn;

        rn = rnh->rnh_addaddr(rt_getkey(rt), netmask, rnh, rt->rt_nodes);

        return (rn == NULL) ? EEXIST : 0;
}

struct rtentry *
rt_lookup(rtbl_t *t, const struct sockaddr *dst, const struct sockaddr *netmask)
{
        struct radix_node_head *rnh = &t->t_rnh;
        struct radix_node *rn;

        rn = rnh->rnh_lookup(dst, netmask, rnh);
        if (rn == NULL || (rn->rn_flags & RNF_ROOT) != 0)
                return NULL;
        return (struct rtentry *)rn;
}

struct rtentry *
rt_deladdr(rtbl_t *t, const struct sockaddr *dst,
    const struct sockaddr *netmask)
{
        struct radix_node_head *rnh = &t->t_rnh;
        struct radix_node *rn;

        if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == NULL)
                return NULL;
        if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
                panic("%s", __func__);
        return (struct rtentry *)rn;
}

static int
rt_walktree_visitor(struct radix_node *rn, void *v)
{
        struct rtwalk *rw = (struct rtwalk *)v;

        return (*rw->rw_f)((struct rtentry *)rn, rw->rw_v);
}

int
rtbl_walktree(sa_family_t family, int (*f)(struct rtentry *, void *), void *v)
{
        rtbl_t *t = rt_tables[family];
        struct rtwalk rw;

        if (t == NULL)
                return 0;

        rw.rw_f = f;
        rw.rw_v = v;

        return rn_walktree(&t->t_rnh, rt_walktree_visitor, &rw);
}

struct rtentry *
rtbl_search_matched_entry(sa_family_t family,
    int (*f)(struct rtentry *, void *), void *v)
{
        rtbl_t *t = rt_tables[family];
        struct rtwalk rw;

        if (t == NULL)
                return 0;

        rw.rw_f = f;
        rw.rw_v = v;

        return (struct rtentry *)
            rn_search_matched(&t->t_rnh, rt_walktree_visitor, &rw);
}

rtbl_t *
rt_gettable(sa_family_t af)
{
        if (af >= __arraycount(rt_tables))
                return NULL;
        return rt_tables[af];
}

void
rtbl_init(void)
{
        struct domain *dom;
        DOMAIN_FOREACH(dom)
                if (dom->dom_rtattach)
                        dom->dom_rtattach(&rt_tables[dom->dom_family],
                            dom->dom_rtoffset);
}

void
rt_assert_inactive(const struct rtentry *rt)
{
        if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
                panic ("rtfree 2");
}

int
rt_refines(const struct sockaddr *m_sa, const struct sockaddr *n_sa)
{

        return rn_refines(m_sa, n_sa);
}


















































































    3 




    3 

































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
/* $NetBSD: auvitek.c,v 1.13 2022/03/13 12:49:36 riastradh Exp $ */

/*-
 * Copyright (c) 2010 Jared D. McNeill <jmcneill@invisible.ca>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Auvitek AU0828 USB controller
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: auvitek.c,v 1.13 2022/03/13 12:49:36 riastradh Exp $");

#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/conf.h>
#include <sys/bus.h>
#include <sys/module.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <dev/usb/auvitekreg.h>
#include <dev/usb/auvitekvar.h>

static int        auvitek_match(device_t, cfdata_t, void *);
static void        auvitek_attach(device_t, device_t, void *);
static int        auvitek_detach(device_t, int);
static int        auvitek_rescan(device_t, const char *, const int *);
static void        auvitek_childdet(device_t, device_t);
static int        auvitek_activate(device_t, enum devact);

CFATTACH_DECL2_NEW(auvitek, sizeof(struct auvitek_softc),
    auvitek_match, auvitek_attach, auvitek_detach, auvitek_activate,
    auvitek_rescan, auvitek_childdet);

static const struct {
        uint16_t                vendor;
        uint16_t                product;
        const char *                name;
        enum auvitek_board        board;
} auvitek_devices[] = {
        { 0x2040, 0x7200,
          "WinTV HVR-950Q", AUVITEK_BOARD_HVR_950Q },
        { 0x2040, 0x7240,
          "WinTV HVR-850", AUVITEK_BOARD_HVR_850 },
};

static int
auvitek_match(device_t parent, cfdata_t match, void *opaque)
{
        struct usb_attach_arg *uaa = opaque;
        unsigned int i;

        for (i = 0; i < __arraycount(auvitek_devices); i++) {
                if (auvitek_devices[i].vendor == uaa->uaa_vendor &&
                    auvitek_devices[i].product == uaa->uaa_product)
                        return UMATCH_VENDOR_PRODUCT;
        }

        return UMATCH_NONE;
}

static void
auvitek_attach(device_t parent, device_t self, void *opaque)
{
        struct auvitek_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = opaque;
        struct usbd_device *dev = uaa->uaa_device;
        usb_endpoint_descriptor_t *ed;
        usbd_status err;
        unsigned int i;
        uint8_t nep;

        aprint_naive("\n");
        aprint_normal(": AU0828\n");

        sc->sc_dev = self;
        sc->sc_udev = dev;
        sc->sc_uport = uaa->uaa_port;

        for (i = 0; i < __arraycount(auvitek_devices); i++) {
                if (auvitek_devices[i].vendor == uaa->uaa_vendor &&
                    auvitek_devices[i].product == uaa->uaa_product)
                        break;
        }
        KASSERT(i != __arraycount(auvitek_devices));
        sc->sc_descr = auvitek_devices[i].name;
        sc->sc_board = auvitek_devices[i].board;

        sc->sc_dying = sc->sc_running = 0;

        mutex_init(&sc->sc_subdev_lock, MUTEX_DEFAULT, IPL_NONE);

        err = usbd_set_config_index(dev, 0, 1);
        if (err) {
                aprint_error_dev(self, "couldn't set config index: %s\n",
                    usbd_errstr(err));
                return;
        }
        err = usbd_device2interface_handle(dev, 0, &sc->sc_isoc_iface);
        if (err) {
                aprint_error_dev(self, "couldn't get interface handle: %s\n",
                    usbd_errstr(err));
                return;
        }
        err = usbd_device2interface_handle(dev, 3, &sc->sc_bulk_iface);
        if (err) {
                aprint_error_dev(self, "couldn't get interface handle: %s\n",
                    usbd_errstr(err));
                return;
        }

        sc->sc_ax.ax_sc = sc->sc_ab.ab_sc = sc;
        sc->sc_ax.ax_endpt = sc->sc_ab.ab_endpt = -1;

        err = usbd_set_interface(sc->sc_isoc_iface, AUVITEK_XFER_ALTNO);
        if (err) {
                aprint_error_dev(self, "couldn't set interface: %s\n",
                    usbd_errstr(err));
                return;
        }

        nep = 0;
        usbd_endpoint_count(sc->sc_isoc_iface, &nep);
        for (i = 0; i < nep; i++) {
                int dir, type;

                ed = usbd_interface2endpoint_descriptor(sc->sc_isoc_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "couldn't read endpoint descriptor %d\n", i);
                        continue;
                }

                dir = UE_GET_DIR(ed->bEndpointAddress);
                type = UE_GET_XFERTYPE(ed->bmAttributes);

                if (dir == UE_DIR_IN && type == UE_ISOCHRONOUS &&
                    sc->sc_ax.ax_endpt == -1) {
                        sc->sc_ax.ax_endpt = ed->bEndpointAddress;
                        sc->sc_ax.ax_maxpktlen =
                            UE_GET_SIZE(UGETW(ed->wMaxPacketSize)) *
                            (UE_GET_TRANS(UGETW(ed->wMaxPacketSize)) + 1);
                }
        }

        err = usbd_set_interface(sc->sc_isoc_iface, 0);
        if (err) {
                aprint_error_dev(self, "couldn't set interface: %s\n",
                    usbd_errstr(err));
                return;
        }

        if (sc->sc_ax.ax_endpt == -1) {
                aprint_error_dev(self, "couldn't find isoc endpoint\n");
                sc->sc_dying = 1;
                return;
        }
        if (sc->sc_ax.ax_maxpktlen == 0) {
                aprint_error_dev(self, "couldn't determine packet length\n");
                sc->sc_dying = 1;
                return;
        }

        aprint_debug_dev(self, "isoc endpoint 0x%02x size %d\n",
            sc->sc_ax.ax_endpt, sc->sc_ax.ax_maxpktlen);

        nep = 0;
        usbd_endpoint_count(sc->sc_bulk_iface, &nep);
        for (i = 0; i < nep; i++) {
                int dir, type;

                ed = usbd_interface2endpoint_descriptor(sc->sc_bulk_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "couldn't read endpoint descriptor %d\n", i);
                        continue;
                }

                dir = UE_GET_DIR(ed->bEndpointAddress);
                type = UE_GET_XFERTYPE(ed->bmAttributes);

                if (dir == UE_DIR_IN && type == UE_BULK &&
                    sc->sc_ab.ab_endpt == -1) {
                        sc->sc_ab.ab_endpt = ed->bEndpointAddress;
                }
        }

        if (sc->sc_ab.ab_endpt == -1) {
                aprint_error_dev(self, "couldn't find bulk endpoint\n");
                sc->sc_dying = 1;
                return;
        }

        aprint_debug_dev(self, "bulk endpoint 0x%02x size %d\n",
            sc->sc_ab.ab_endpt, AUVITEK_BULK_BUFLEN);

        auvitek_board_init(sc);

        auvitek_i2c_attach(sc);

        sc->sc_au8522 = au8522_open(self, &sc->sc_i2c, 0x8e >> 1,
            auvitek_board_get_if_frequency(sc));
        if (sc->sc_au8522 == NULL) {
                aprint_error_dev(sc->sc_dev, "couldn't initialize decoder\n");
                sc->sc_dying = 1;
                return;
        }

        config_mountroot(self, auvitek_attach_tuner);

        auvitek_video_attach(sc);
        auvitek_audio_attach(sc);
        auvitek_dtv_attach(sc);
}

void
auvitek_attach_tuner(device_t self)
{
        struct auvitek_softc *sc = device_private(self);

        mutex_enter(&sc->sc_subdev_lock);
        if (sc->sc_xc5k == NULL) {
                sc->sc_xc5k = xc5k_open(sc->sc_dev, &sc->sc_i2c, 0xc2 >> 1,
                    auvitek_board_tuner_reset, sc,
                    auvitek_board_get_if_frequency(sc),
                    FE_ATSC);
        }
        mutex_exit(&sc->sc_subdev_lock);
}

static int
auvitek_detach(device_t self, int flags)
{
        struct auvitek_softc *sc = device_private(self);
        int error;

        sc->sc_dying = 1;

        error = config_detach_children(self, flags);
        if (error) {
                /*
                 * XXX Should ask autoconf to block open with
                 * .d_cfdriver until we're done, instead of setting
                 * this and then rolling it back.
                 */
                sc->sc_dying = 0;
                return error;
        }

        pmf_device_deregister(self);

        auvitek_dtv_detach(sc, flags);
        auvitek_audio_detach(sc, flags);
        auvitek_video_detach(sc, flags);

        if (sc->sc_xc5k)
                xc5k_close(sc->sc_xc5k);
        if (sc->sc_au8522)
                au8522_close(sc->sc_au8522);

        auvitek_i2c_detach(sc, flags);

        mutex_destroy(&sc->sc_subdev_lock);

        return 0;
}

int
auvitek_activate(device_t self, enum devact act)
{
        struct auvitek_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                return 0;
        default:
                return 0;
        }
}

static int
auvitek_rescan(device_t self, const char *ifattr, const int *locs)
{
        struct auvitek_softc *sc = device_private(self);

        auvitek_video_rescan(sc, ifattr, locs);
        auvitek_dtv_rescan(sc, ifattr, locs);
        auvitek_i2c_rescan(sc, ifattr, locs);

        return 0;
}

static void
auvitek_childdet(device_t self, device_t child)
{
        struct auvitek_softc *sc = device_private(self);

        auvitek_video_childdet(sc, child);
        auvitek_audio_childdet(sc, child);
        auvitek_dtv_childdet(sc, child);
}

uint8_t
auvitek_read_1(struct auvitek_softc *sc, uint16_t reg)
{
        usb_device_request_t req;
        usbd_status err;
        int actlen;
        uint8_t data;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = AU0828_CMD_REQUEST_IN;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, sizeof(data));

        KERNEL_LOCK(1, curlwp);
        err = usbd_do_request_flags(sc->sc_udev, &req, &data, 0,
            &actlen, USBD_DEFAULT_TIMEOUT);
        KERNEL_UNLOCK_ONE(curlwp);

        if (err)
                printf("%s: read failed: %s\n", device_xname(sc->sc_dev),
                    usbd_errstr(err));

        return data;
}

void
auvitek_write_1(struct auvitek_softc *sc, uint16_t reg, uint8_t data)
{
        usb_device_request_t req;
        usbd_status err;
        int actlen;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = AU0828_CMD_REQUEST_OUT;
        USETW(req.wValue, data);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 0);

        KERNEL_LOCK(1, curlwp);
        err = usbd_do_request_flags(sc->sc_udev, &req, NULL, 0,
            &actlen, USBD_DEFAULT_TIMEOUT);
        KERNEL_UNLOCK_ONE(curlwp);

        if (err)
                printf("%s: write failed: %s\n", device_xname(sc->sc_dev),
                    usbd_errstr(err));
}

MODULE(MODULE_CLASS_DRIVER, auvitek, "au8522,xc5k");

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
auvitek_modcmd(modcmd_t cmd, void *opaque)
{
        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                return config_init_component(cfdriver_ioconf_auvitek,
                    cfattach_ioconf_auvitek, cfdata_ioconf_auvitek);
#else
                return 0;
#endif
        case MODULE_CMD_FINI:
#ifdef _MODULE
                return config_fini_component(cfdriver_ioconf_auvitek,
                    cfattach_ioconf_auvitek, cfdata_ioconf_auvitek);
#else
                return 0;
#endif
        default:
                return ENOTTY;
        }
}



































































    2 


    2 





    4 







    2 


    2 





    2 






    3 




















    1 





    1 





    1 


    1 

    1 











































































    2 



    2 
    2 





    1 




    1 


    1 



































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
/*        $NetBSD: nv_kern_netbsd.c,v 1.6 2018/10/16 13:18:25 maxv Exp $        */

/*-
 * Copyright (c) 2018 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__RCSID("$NetBSD: nv_kern_netbsd.c,v 1.6 2018/10/16 13:18:25 maxv Exp $");

#if !defined(_KERNEL) && !defined(_STANDALONE)
#include <sys/mman.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#endif
#ifdef _KERNEL
#include <sys/param.h>
#include <sys/lwp.h>
#include <sys/kmem.h>
#include <sys/malloc.h>
#include <sys/mman.h>
#include <uvm/uvm_extern.h>
#endif
#ifdef _STANDALONE
/* XXX */
extern void *alloc(unsigned int);
extern void dealloc(void *, unsigned int);
// #include "stand.h"
#else
#include <sys/ioctl.h>
#endif
#include "nv.h"
#include "nv_impl.h"

#ifndef _STANDALONE
#ifdef _KERNEL

void 
nv_free(void *buf)
{
        if (!buf) {
                return;
        }
        free(buf, M_NVLIST);
}

int
nvlist_copyin(const nvlist_ref_t *nref, nvlist_t **nvlp, size_t lim)
{
        const size_t len = nref->len;
        int flags, error;
        nvlist_t *nvl;
        void *buf;

        if (len == 0) {
                return EINVAL;
        }
        if (len >= lim) {
                return E2BIG;
        }
        buf = kmem_alloc(len, KM_SLEEP);
        error = copyin(nref->buf, buf, len);
        if (error) {
                kmem_free(buf, len);
                return error;
        }
        flags = nref->flags & (NV_FLAG_IGNORE_CASE | NV_FLAG_NO_UNIQUE);
        nvl = nvlist_unpack(buf, len, flags);
        kmem_free(buf, len);
        if (nvl == NULL) {
                return EINVAL;
        }
        *nvlp = nvl;
        return 0;
}

int
nvlist_copyout(nvlist_ref_t *nref, const nvlist_t *nvl)
{
        struct proc *p = curproc;
        void *buf, *uaddr;
        size_t len, rlen;
        int error;

        buf = nvlist_pack(nvl, &len);
        if (buf == NULL) {
                return ENOMEM;
        }

        /*
         * Map the user page(s).
         *
         * Note: nvlist_recv_ioctl() will unmap it.
         */
        uaddr = NULL;
        rlen = round_page(len);
        error = uvm_mmap_anon(p, &uaddr, rlen);
        if (error) {
                goto err;
        }
        error = copyout(buf, uaddr, len);
        if (error) {
                uvm_unmap(&p->p_vmspace->vm_map, (vaddr_t)uaddr,
                    (vaddr_t)uaddr + rlen);
                goto err;
        }
        nref->flags = nvlist_flags(nvl);
        nref->buf = uaddr;
        nref->len = len;
err:
        free(buf, M_TEMP);
        return error;
}

#else

int
nvlist_xfer_ioctl(int fd, unsigned long cmd, const nvlist_t *nvl,
    nvlist_t **nvlp)
{
        nvlist_ref_t nref;
        void *buf = NULL;

        memset(&nref, 0, sizeof(nvlist_ref_t));

        if (nvl) {
                /*
                 * Sending: serialize the name-value list.
                 */
                buf = nvlist_pack(nvl, &nref.len);
                if (buf == NULL) {
                        errno = ENOMEM;
                        return -1;
                }
                nref.buf = buf;
                nref.flags = nvlist_flags(nvl);
        }

        /*
         * Exchange the nvlist reference data.
         */
        if (ioctl(fd, cmd, &nref) == -1) {
                free(buf);
                return -1;
        }
        free(buf);

        if (nvlp) {
                nvlist_t *retnvl;

                /*
                 * Receiving: unserialize the nvlist.
                 *
                 * Note: pages are mapped by nvlist_kern_copyout() for us.
                 */
                if (nref.buf == NULL || nref.len == 0) {
                        errno = EIO;
                        return -1;
                }
                retnvl = nvlist_unpack(nref.buf, nref.len, nref.flags);
                munmap(nref.buf, nref.len);
                if (retnvl == NULL) {
                        errno = EIO;
                        return -1;
                }
                *nvlp = retnvl;
        }
        return 0;
}

int
nvlist_send_ioctl(int fd, unsigned long cmd, const nvlist_t *nvl)
{
        return nvlist_xfer_ioctl(fd, cmd, nvl, NULL);
}

int
nvlist_recv_ioctl(int fd, unsigned long cmd, nvlist_t **nvlp)
{
        return nvlist_xfer_ioctl(fd, cmd, NULL, nvlp);
}
#endif
#endif

void *
nv_calloc(size_t n, size_t s)
{
        const size_t len = n * s;
        void *buf = nv_malloc(len);
        if (buf == NULL)
                return NULL;
        memset(buf, 0, len);
        return buf;
}

char *
nv_strdup(const char *s1)
{
        size_t len = strlen(s1) + 1;
        char *s2;

        s2 = nv_malloc(len);
        if (s2) {
                memcpy(s2, s1, len);
                s2[len-1] = '\0';
        }
        return s2;
}

#ifdef _STANDALONE

void *
nv_malloc(size_t len)
{
        return alloc(len);
}

void 
nv_free(void *buf)
{
        if (buf == NULL)
                return;
        unsigned int *olen = (void *)((char *)buf - sizeof(unsigned int));
        dealloc(buf, *olen);
}

void *
nv_realloc(void *buf, size_t len)
{
        if (buf == NULL)
                return alloc(len);

        unsigned int *olen = (void *)((char *)buf - sizeof(unsigned int));
        if (*olen < len)
                return buf;

        void *nbuf = alloc(len);
        memcpy(nbuf, buf, *olen);
        dealloc(buf, *olen);
        return nbuf;
}
#endif






























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   13 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
/*        $NetBSD: sysctl.h,v 1.236 2021/09/16 22:47:29 christos Exp $        */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Mike Karels at Berkeley Software Design, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)sysctl.h        8.1 (Berkeley) 6/2/93
 */

#ifndef _SYS_SYSCTL_H_
#define        _SYS_SYSCTL_H_

#include <sys/param.h> /* precautionary upon removal from ucred.h */
#include <sys/proc.h>  /* Needed for things like P_ZOMBIE() and LW_SINTR */
#include <uvm/uvm_param.h>

#if defined(_KERNEL) || defined(_KMEMUSER)
/*
 * These are for the eproc structure defined below.
 */
#include <sys/time.h>
#include <sys/ucred.h>
#include <sys/ucontext.h>
#include <sys/mallocvar.h>
#include <uvm/uvm_extern.h>
#endif


/* For offsetof() */
#if defined(_KERNEL) || defined(_STANDALONE)
#include <sys/systm.h>
#else
#include <stddef.h>
#include <stdbool.h>
#endif

/*
 * Definitions for sysctl call.  The sysctl call uses a hierarchical name
 * for objects that can be examined or modified.  The name is expressed as
 * a sequence of integers.  Like a file path name, the meaning of each
 * component depends on its place in the hierarchy.  The top-level and kern
 * identifiers are defined here, and other identifiers are defined in the
 * respective subsystem header files.
 */

struct sysctlnode;

#define        CTL_MAXNAME        12        /* largest number of components supported */
#define SYSCTL_NAMELEN        32        /* longest name allowed for a node */

#define CREATE_BASE        (1024)        /* start of dynamic mib allocation */
#define SYSCTL_DEFSIZE        8        /* initial size of a child set */

/*
 * Each subsystem defined by sysctl defines a list of variables
 * for that subsystem. Each name is either a node with further
 * levels defined below it, or it is a leaf of some particular
 * type given below. Each sysctl level defines a set of name/type
 * pairs to be used by sysctl(1) in manipulating the subsystem.
 */
struct ctlname {
        const char *ctl_name;        /* subsystem name */
        int        ctl_type;        /* type of name */
};
#define        CTLTYPE_NODE        1        /* name is a node */
#define        CTLTYPE_INT        2        /* name describes an integer */
#define        CTLTYPE_STRING        3        /* name describes a string */
#define        CTLTYPE_QUAD        4        /* name describes a 64-bit number */
#define        CTLTYPE_STRUCT        5        /* name describes a structure */
#define        CTLTYPE_BOOL        6        /* name describes a bool */

#ifdef _LP64
#define        CTLTYPE_LONG        CTLTYPE_QUAD
#else
#define        CTLTYPE_LONG        CTLTYPE_INT
#endif

/*
 * Flags that apply to each node, governing access and other features
 */
#define CTLFLAG_READONLY        0x00000000
/* #define CTLFLAG_UNUSED1                0x00000010 */
/* #define CTLFLAG_UNUSED2                0x00000020 */
/* #define CTLFLAG_READ*        0x00000040 */
#define CTLFLAG_READWRITE        0x00000070
#define CTLFLAG_ANYWRITE        0x00000080
#define CTLFLAG_PRIVATE                0x00000100
#define CTLFLAG_PERMANENT        0x00000200
#define CTLFLAG_OWNDATA                0x00000400
#define CTLFLAG_IMMEDIATE        0x00000800
#define CTLFLAG_HEX                0x00001000
#define CTLFLAG_ROOT                0x00002000
#define CTLFLAG_ANYNUMBER        0x00004000
#define CTLFLAG_HIDDEN                0x00008000
#define CTLFLAG_ALIAS                0x00010000
#define CTLFLAG_MMAP                0x00020000
#define CTLFLAG_OWNDESC                0x00040000
#define CTLFLAG_UNSIGNED        0x00080000

/*
 * sysctl API version
 */
#define SYSCTL_VERS_MASK        0xff000000
#define SYSCTL_VERS_0                0x00000000
#define SYSCTL_VERS_1                0x01000000
#define SYSCTL_VERSION                SYSCTL_VERS_1
#define SYSCTL_VERS(f)                ((f) & SYSCTL_VERS_MASK)

/*
 * Flags that can be set by a create request from user-space
 */
#define SYSCTL_USERFLAGS        (CTLFLAG_READWRITE|\
                                CTLFLAG_ANYWRITE|\
                                CTLFLAG_PRIVATE|\
                                CTLFLAG_OWNDATA|\
                                CTLFLAG_IMMEDIATE|\
                                CTLFLAG_HEX|\
                                CTLFLAG_HIDDEN)

/*
 * Accessor macros
 */
#define SYSCTL_TYPEMASK                0x0000000f
#define SYSCTL_TYPE(x)                ((x) & SYSCTL_TYPEMASK)
#define SYSCTL_FLAGMASK                0x00fffff0
#define SYSCTL_FLAGS(x)                ((x) & SYSCTL_FLAGMASK)

/*
 * Meta-identifiers
 */
#define CTL_EOL                (-1)                /* end of createv/destroyv list */
#define CTL_QUERY        (-2)                /* enumerates children of a node */
#define CTL_CREATE        (-3)                /* node create request */
#define CTL_CREATESYM        (-4)                /* node create request with symbol */
#define CTL_DESTROY        (-5)                /* node destroy request */
#define CTL_MMAP        (-6)                /* mmap request */
#define CTL_DESCRIBE        (-7)                /* get node descriptions */

/*
 * Top-level identifiers
 */
#define        CTL_UNSPEC        0                /* unused */
#define        CTL_KERN        1                /* "high kernel": proc, limits */
#define        CTL_VM                2                /* virtual memory */
#define        CTL_VFS                3                /* file system, mount type is next */
#define        CTL_NET                4                /* network, see socket.h */
#define        CTL_DEBUG        5                /* debugging parameters */
#define        CTL_HW                6                /* generic CPU/io */
#define        CTL_MACHDEP        7                /* machine dependent */
#define        CTL_USER        8                /* user-level */
#define        CTL_DDB                9                /* in-kernel debugger */
#define        CTL_PROC        10                /* per-proc attr */
#define        CTL_VENDOR        11                /* vendor-specific data */
#define        CTL_EMUL        12                /* emulation-specific data */
#define        CTL_SECURITY        13                /* security */

/*
 * The "vendor" toplevel name is to be used by vendors who wish to
 * have their own private MIB tree. If you do that, please use
 * vendor.<yourname>.*
 */

/*
 * CTL_KERN identifiers
 */
#define        KERN_OSTYPE                  1        /* string: system version */
#define        KERN_OSRELEASE                  2        /* string: system release */
#define        KERN_OSREV                  3        /* int: system revision */
#define        KERN_VERSION                  4        /* string: compile time info */
#define        KERN_MAXVNODES                  5        /* int: max vnodes */
#define        KERN_MAXPROC                  6        /* int: max processes */
#define        KERN_MAXFILES                  7        /* int: max open files */
#define        KERN_ARGMAX                  8        /* int: max arguments to exec */
#define        KERN_SECURELVL                  9        /* int: system security level */
#define        KERN_HOSTNAME                10        /* string: hostname */
#define        KERN_HOSTID                11        /* int: host identifier */
#define        KERN_CLOCKRATE                12        /* struct: struct clockinfo */
#define        KERN_VNODE                13        /* struct: vnode structures */
#define        KERN_PROC                14        /* struct: process entries */
#define        KERN_FILE                15        /* struct: file entries */
#define        KERN_PROF                16        /* node: kernel profiling info */
#define        KERN_POSIX1                17        /* int: POSIX.1 version */
#define        KERN_NGROUPS                18        /* int: # of supplemental group ids */
#define        KERN_JOB_CONTROL        19        /* int: is job control available */
#define        KERN_SAVED_IDS                20        /* int: saved set-user/group-ID */
#define        KERN_OBOOTTIME                21        /* struct: time kernel was booted */
#define        KERN_DOMAINNAME                22        /* string: (YP) domainname */
#define        KERN_MAXPARTITIONS        23        /* int: number of partitions/disk */
#define        KERN_RAWPARTITION        24        /* int: raw partition number */
#define        KERN_NTPTIME                25        /* struct: extended-precision time */
#define        KERN_TIMEX                26        /* struct: ntp timekeeping state */
#define        KERN_AUTONICETIME        27        /* int: proc time before autonice */
#define        KERN_AUTONICEVAL        28        /* int: auto nice value */
#define        KERN_RTC_OFFSET                29        /* int: offset of rtc from gmt */
#define        KERN_ROOT_DEVICE        30        /* string: root device */
#define        KERN_MSGBUFSIZE                31        /* int: max # of chars in msg buffer */
#define        KERN_FSYNC                32        /* int: file synchronization support */
#define        KERN_OLDSYSVMSG                33        /* old: SysV message queue support */
#define        KERN_OLDSYSVSEM                34        /* old: SysV semaphore support */
#define        KERN_OLDSYSVSHM                35        /* old: SysV shared memory support */
#define        KERN_OLDSHORTCORENAME        36        /* old, unimplemented */
#define        KERN_SYNCHRONIZED_IO        37        /* int: POSIX synchronized I/O */
#define        KERN_IOV_MAX                38        /* int: max iovec's for readv(2) etc. */
#define        KERN_MBUF                39        /* node: mbuf parameters */
#define        KERN_MAPPED_FILES        40        /* int: POSIX memory mapped files */
#define        KERN_MEMLOCK                41        /* int: POSIX memory locking */
#define        KERN_MEMLOCK_RANGE        42        /* int: POSIX memory range locking */
#define        KERN_MEMORY_PROTECTION        43        /* int: POSIX memory protections */
#define        KERN_LOGIN_NAME_MAX        44        /* int: max length login name + NUL */
#define        KERN_DEFCORENAME        45        /* old: sort core name format */
#define        KERN_LOGSIGEXIT                46        /* int: log signaled processes */
#define        KERN_PROC2                47        /* struct: process entries */
#define        KERN_PROC_ARGS                48        /* struct: process argv/env */
#define        KERN_FSCALE                49        /* int: fixpt FSCALE */
#define        KERN_CCPU                50        /* old: fixpt ccpu */
#define        KERN_CP_TIME                51        /* struct: CPU time counters */
#define        KERN_OLDSYSVIPC_INFO        52        /* old: number of valid kern ids */
#define        KERN_MSGBUF                53        /* kernel message buffer */
#define        KERN_CONSDEV                54        /* dev_t: console terminal device */
#define        KERN_MAXPTYS                55        /* int: maximum number of ptys */
#define        KERN_PIPE                56        /* node: pipe limits */
#define        KERN_MAXPHYS                57        /* int: kernel value of MAXPHYS */
#define        KERN_SBMAX                58        /* int: max socket buffer size */
#define        KERN_TKSTAT                59        /* tty in/out counters */
#define        KERN_MONOTONIC_CLOCK        60        /* int: POSIX monotonic clock */
#define        KERN_URND                61        /* int: random integer from urandom */
#define        KERN_LABELSECTOR        62        /* int: disklabel sector */
#define        KERN_LABELOFFSET        63        /* int: offset of label within sector */
#define        KERN_LWP                64        /* struct: lwp entries */
#define        KERN_FORKFSLEEP                65        /* int: sleep length on failed fork */
#define        KERN_POSIX_THREADS        66        /* int: POSIX Threads option */
#define        KERN_POSIX_SEMAPHORES        67        /* int: POSIX Semaphores option */
#define        KERN_POSIX_BARRIERS        68        /* int: POSIX Barriers option */
#define        KERN_POSIX_TIMERS        69        /* int: POSIX Timers option */
#define        KERN_POSIX_SPIN_LOCKS        70        /* int: POSIX Spin Locks option */
#define        KERN_POSIX_READER_WRITER_LOCKS 71 /* int: POSIX R/W Locks option */
#define        KERN_DUMP_ON_PANIC        72        /* int: dump on panic */
#define        KERN_SOMAXKVA                73        /* int: max socket kernel virtual mem */
#define        KERN_ROOT_PARTITION        74        /* int: root partition */
#define        KERN_DRIVERS                75        /* struct: driver names and majors #s */
#define        KERN_BUF                76        /* struct: buffers */
#define        KERN_FILE2                77        /* struct: file entries */
#define        KERN_VERIEXEC                78        /* node: verified exec */
#define        KERN_CP_ID                79        /* struct: cpu id numbers */
#define        KERN_HARDCLOCK_TICKS        80        /* int: number of hardclock ticks */
#define        KERN_ARND                81        /* void *buf, size_t siz random */
#define        KERN_SYSVIPC                82        /* node: SysV IPC parameters */
#define        KERN_BOOTTIME                83        /* struct: time kernel was booted */
#define        KERN_EVCNT                84        /* struct: evcnts */
#define        KERN_SOFIXEDBUF                85        /* bool: fixed socket buffer sizes */

/*
 *  KERN_CLOCKRATE structure
 */
struct clockinfo {
        int        hz;                /* clock frequency */
        int        tick;                /* micro-seconds per hz tick */
        int        tickadj;        /* clock skew rate for adjtime() */
        int        stathz;                /* statistics clock frequency */
        int        profhz;                /* profiling clock frequency */
};

/*
 * KERN_PROC subtypes
 */
#define        KERN_PROC_ALL                 0        /* everything */
#define        KERN_PROC_PID                 1        /* by process id */
#define        KERN_PROC_PGRP                 2        /* by process group id */
#define        KERN_PROC_SESSION         3        /* by session of pid */
#define        KERN_PROC_TTY                 4        /* by controlling tty */
#define        KERN_PROC_UID                 5        /* by effective uid */
#define        KERN_PROC_RUID                 6        /* by real uid */
#define        KERN_PROC_GID                 7        /* by effective gid */
#define        KERN_PROC_RGID                 8        /* by real gid */

/*
 * KERN_PROC_TTY sub-subtypes
 */
#define        KERN_PROC_TTY_NODEV        NODEV                /* no controlling tty */
#define        KERN_PROC_TTY_REVOKE        ((dev_t)-2)        /* revoked tty */

struct ki_pcred {
        void                *p_pad;
        uid_t                p_ruid;                /* Real user id */
        uid_t                p_svuid;        /* Saved effective user id */
        gid_t                p_rgid;                /* Real group id */
        gid_t                p_svgid;        /* Saved effective group id */
        int                p_refcnt;        /* Number of references */
};

struct ki_ucred {
        uint32_t        cr_ref;                        /* reference count */
        uid_t                cr_uid;                        /* effective user id */
        gid_t                cr_gid;                        /* effective group id */
        uint32_t        cr_ngroups;                /* number of groups */
        gid_t                cr_groups[NGROUPS];        /* groups */
};

#if defined(_KERNEL) || defined(_KMEMUSER)

struct        eproc {
        struct        proc *e_paddr;                /* address of proc */
        struct        session *e_sess;        /* session pointer */
        struct        ki_pcred e_pcred;        /* process credentials */
        struct        ki_ucred e_ucred;        /* current credentials */
        struct        vmspace e_vm;                /* address space */
        pid_t        e_ppid;                        /* parent process id */
        pid_t        e_pgid;                        /* process group id */
        short        e_jobc;                        /* job control counter */
        uint32_t e_tdev;                /* XXX: controlling tty dev */
        pid_t        e_tpgid;                /* tty process group id */
        struct        session *e_tsess;        /* tty session pointer */
#define        WMESGLEN        8
        char        e_wmesg[WMESGLEN];        /* wchan message */
        segsz_t e_xsize;                /* text size */
        short        e_xrssize;                /* text rss */
        short        e_xccount;                /* text references */
        short        e_xswrss;
        long        e_flag;                        /* see p_eflag  below */
        char        e_login[MAXLOGNAME];        /* setlogin() name */
        pid_t        e_sid;                        /* session id */
        long        e_spare[3];
};

/*
 * KERN_PROC subtype ops return arrays of augmented proc structures:
 */
struct kinfo_proc {
        struct        proc kp_proc;                        /* proc structure */
        struct        eproc kp_eproc;                        /* eproc structure */
};
#endif /* defined(_KERNEL) || defined(_KMEMUSER) */

/*
 * Convert pointer to 64 bit unsigned integer for struct
 * kinfo_proc2, etc.
 */
#define PTRTOUINT64(p) ((uint64_t)(uintptr_t)(p))
#define UINT64TOPTR(u) ((void *)(uintptr_t)(u))

/*
 * KERN_PROC2 subtype ops return arrays of relatively fixed size
 * structures of process info.   Use 8 byte alignment, and new
 * elements should only be added to the end of this structure so
 * binary compatibility can be preserved.
 */
#define        KI_NGROUPS        16
#define        KI_MAXCOMLEN        24        /* extra for 8 byte alignment */
#define        KI_WMESGLEN        8
#define        KI_MAXLOGNAME        24        /* extra for 8 byte alignment */
#define        KI_MAXEMULLEN        16
#define        KI_LNAMELEN        20        /* extra 4 for alignment */

#define KI_NOCPU        (~(uint64_t)0)

typedef struct {
        uint32_t        __bits[4];
} ki_sigset_t;

struct kinfo_proc2 {
        uint64_t p_forw;                /* PTR: linked run/sleep queue. */
        uint64_t p_back;
        uint64_t p_paddr;                /* PTR: address of proc */

        uint64_t p_addr;                /* PTR: Kernel virtual addr of u-area */
        uint64_t p_fd;                        /* PTR: Ptr to open files structure. */
        uint64_t p_cwdi;                /* PTR: cdir/rdir/cmask info */
        uint64_t p_stats;                /* PTR: Accounting/statistics */
        uint64_t p_limit;                /* PTR: Process limits. */
        uint64_t p_vmspace;                /* PTR: Address space. */
        uint64_t p_sigacts;                /* PTR: Signal actions, state */
        uint64_t p_sess;                /* PTR: session pointer */
        uint64_t p_tsess;                /* PTR: tty session pointer */
        uint64_t p_ru;                        /* PTR: Exit information. XXX */

        int32_t        p_eflag;                /* LONG: extra kinfo_proc2 flags */
#define        EPROC_CTTY        0x01        /* controlling tty vnode active */
#define        EPROC_SLEADER        0x02        /* session leader */
        int32_t        p_exitsig;                /* INT: signal to sent to parent on exit */
        int32_t        p_flag;                        /* INT: P_* flags. */

        int32_t        p_pid;                        /* PID_T: Process identifier. */
        int32_t        p_ppid;                        /* PID_T: Parent process id */
        int32_t        p_sid;                        /* PID_T: session id */
        int32_t        p__pgid;                /* PID_T: process group id */
                                        /* XXX: <sys/proc.h> hijacks p_pgid */
        int32_t        p_tpgid;                /* PID_T: tty process group id */

        uint32_t p_uid;                        /* UID_T: effective user id */
        uint32_t p_ruid;                /* UID_T: real user id */
        uint32_t p_gid;                        /* GID_T: effective group id */
        uint32_t p_rgid;                /* GID_T: real group id */

        uint32_t p_groups[KI_NGROUPS];        /* GID_T: groups */
        int16_t        p_ngroups;                /* SHORT: number of groups */

        int16_t        p_jobc;                        /* SHORT: job control counter */
        uint32_t p_tdev;                /* XXX: DEV_T: controlling tty dev */

        uint32_t p_estcpu;                /* U_INT: Time averaged value of p_cpticks. */
        uint32_t p_rtime_sec;                /* STRUCT TIMEVAL: Real time. */
        uint32_t p_rtime_usec;                /* STRUCT TIMEVAL: Real time. */
        int32_t        p_cpticks;                /* INT: Ticks of CPU time. */
        uint32_t p_pctcpu;                /* FIXPT_T: %cpu for this process during p_swtime */
        uint32_t p_swtime;                /* U_INT: Time swapped in or out. */
        uint32_t p_slptime;                /* U_INT: Time since last blocked. */
        int32_t        p_schedflags;                /* INT: PSCHED_* flags */

        uint64_t p_uticks;                /* U_QUAD_T: Statclock hits in user mode. */
        uint64_t p_sticks;                /* U_QUAD_T: Statclock hits in system mode. */
        uint64_t p_iticks;                /* U_QUAD_T: Statclock hits processing intr. */

        uint64_t p_tracep;                /* PTR: Trace to vnode or file */
        int32_t        p_traceflag;                /* INT: Kernel trace points. */

        int32_t        p_holdcnt;              /* INT: If non-zero, don't swap. */

        ki_sigset_t p_siglist;                /* SIGSET_T: Signals arrived but not delivered. */
        ki_sigset_t p_sigmask;                /* SIGSET_T: Current signal mask. */
        ki_sigset_t p_sigignore;        /* SIGSET_T: Signals being ignored. */
        ki_sigset_t p_sigcatch;                /* SIGSET_T: Signals being caught by user. */

        int8_t        p_stat;                        /* CHAR: S* process status (from LWP). */
        uint8_t p_priority;                /* U_CHAR: Process priority. */
        uint8_t p_usrpri;                /* U_CHAR: User-priority based on p_cpu and p_nice. */
        uint8_t p_nice;                        /* U_CHAR: Process "nice" value. */

        uint16_t p_xstat;                /* U_SHORT: Exit status for wait; also stop signal. */
        uint16_t p_acflag;                /* U_SHORT: Accounting flags. */

        char        p_comm[KI_MAXCOMLEN];

        char        p_wmesg[KI_WMESGLEN];        /* wchan message */
        uint64_t p_wchan;                /* PTR: sleep address. */

        char        p_login[KI_MAXLOGNAME];        /* setlogin() name */

        int32_t        p_vm_rssize;                /* SEGSZ_T: current resident set size in pages */
        int32_t        p_vm_tsize;                /* SEGSZ_T: text size (pages) */
        int32_t        p_vm_dsize;                /* SEGSZ_T: data size (pages) */
        int32_t        p_vm_ssize;                /* SEGSZ_T: stack size (pages) */

        int64_t        p_uvalid;                /* CHAR: following p_u* parameters are valid */
                                        /* XXX 64 bits for alignment */
        uint32_t p_ustart_sec;                /* STRUCT TIMEVAL: starting time. */
        uint32_t p_ustart_usec;                /* STRUCT TIMEVAL: starting time. */

        uint32_t p_uutime_sec;                /* STRUCT TIMEVAL: user time. */
        uint32_t p_uutime_usec;                /* STRUCT TIMEVAL: user time. */
        uint32_t p_ustime_sec;                /* STRUCT TIMEVAL: system time. */
        uint32_t p_ustime_usec;                /* STRUCT TIMEVAL: system time. */

        uint64_t p_uru_maxrss;                /* LONG: max resident set size. */
        uint64_t p_uru_ixrss;                /* LONG: integral shared memory size. */
        uint64_t p_uru_idrss;                /* LONG: integral unshared data ". */
        uint64_t p_uru_isrss;                /* LONG: integral unshared stack ". */
        uint64_t p_uru_minflt;                /* LONG: page reclaims. */
        uint64_t p_uru_majflt;                /* LONG: page faults. */
        uint64_t p_uru_nswap;                /* LONG: swaps. */
        uint64_t p_uru_inblock;                /* LONG: block input operations. */
        uint64_t p_uru_oublock;                /* LONG: block output operations. */
        uint64_t p_uru_msgsnd;                /* LONG: messages sent. */
        uint64_t p_uru_msgrcv;                /* LONG: messages received. */
        uint64_t p_uru_nsignals;        /* LONG: signals received. */
        uint64_t p_uru_nvcsw;                /* LONG: voluntary context switches. */
        uint64_t p_uru_nivcsw;                /* LONG: involuntary ". */

        uint32_t p_uctime_sec;                /* STRUCT TIMEVAL: child u+s time. */
        uint32_t p_uctime_usec;                /* STRUCT TIMEVAL: child u+s time. */
        uint64_t p_cpuid;                /* LONG: CPU id */
        uint64_t p_realflag;                       /* INT: P_* flags (not including LWPs). */
        uint64_t p_nlwps;                /* LONG: Number of LWPs */
        uint64_t p_nrlwps;                /* LONG: Number of running LWPs */
        uint64_t p_realstat;                /* LONG: non-LWP process status */
        uint32_t p_svuid;                /* UID_T: saved user id */
        uint32_t p_svgid;                /* GID_T: saved group id */
        char p_ename[KI_MAXEMULLEN];        /* emulation name */
        int64_t        p_vm_vsize;                /* SEGSZ_T: total map size (pages) */
        int64_t        p_vm_msize;                /* SEGSZ_T: stack-adjusted map size (pages) */
};

/*
 * Compat flags for kinfo_proc, kinfo_proc2.  Not guaranteed to be stable.
 * Some of them used to be shared with LWP flags.
 * XXXAD Trim to the minimum necessary...
 */

#define        P_ADVLOCK                0x00000001
#define        P_CONTROLT                0x00000002
#define        L_INMEM                        0x00000004
#define        P_INMEM                     /* 0x00000004 */        L_INMEM
#define        P_NOCLDSTOP                0x00000008
#define        P_PPWAIT                0x00000010
#define        P_PROFIL                0x00000020
#define        L_SELECT                0x00000040
#define        P_SELECT             /* 0x00000040 */        L_SELECT
#define        L_SINTR                        0x00000080
#define        P_SINTR                     /* 0x00000080 */        L_SINTR
#define        P_SUGID                        0x00000100
#define        L_SYSTEM                     0x00000200
#define        P_SYSTEM             /*        0x00000200 */        L_SYSTEM
#define        L_SA                        0x00000400
#define        P_SA                     /* 0x00000400 */        L_SA
#define        P_TRACED                0x00000800
#define        P_WAITED                0x00001000
#define        P_WEXIT                        0x00002000
#define        P_EXEC                        0x00004000
#define        P_OWEUPC                0x00008000
#define        P_NOCLDWAIT                0x00020000
#define        P_32                        0x00040000
#define        P_CLDSIGIGN                0x00080000
#define        P_SYSTRACE                0x00200000
#define        P_CHTRACED                0x00400000
#define        P_STOPFORK                0x00800000
#define        P_STOPEXEC                0x01000000
#define        P_STOPEXIT                0x02000000
#define        P_SYSCALL                0x04000000

/*
 * LWP compat flags.
 */
#define        L_DETACHED                0x00800000

#define        __SYSCTL_PROC_FLAG_BITS \
        "\20" \
        "\1ADVLOCK" \
        "\2CONTROLT" \
        "\3INMEM" \
        "\4NOCLDSTOP" \
        "\5PPWAIT" \
        "\6PROFIL" \
        "\7SELECT" \
        "\10SINTR" \
        "\11SUGID" \
        "\12SYSTEM" \
        "\13SA" \
        "\14TRACED" \
        "\15WAITED" \
        "\16WEXIT" \
        "\17EXEC" \
        "\20OWEUPC" \
        "\22NOCLDWAIT" \
        "\23P32" \
        "\24CLDSIGIGN" \
        "\26SYSTRACE" \
        "\27CHTRACED" \
        "\30STOPFORK" \
        "\31STOPEXEC" \
        "\32STOPEXIT" \
        "\33SYSCALL"

/*
 * KERN_LWP structure. See notes on KERN_PROC2 about adding elements.
 */
struct kinfo_lwp {
        uint64_t l_forw;                /* PTR: linked run/sleep queue. */
        uint64_t l_back;
        uint64_t l_laddr;                /* PTR: Address of LWP */
        uint64_t l_addr;                /* PTR: Kernel virtual addr of u-area */
        int32_t        l_lid;                        /* LWPID_T: LWP identifier */
        int32_t        l_flag;                        /* INT: L_* flags. */
        uint32_t l_swtime;                /* U_INT: Time swapped in or out. */
        uint32_t l_slptime;                /* U_INT: Time since last blocked. */
        int32_t        l_schedflags;                /* INT: PSCHED_* flags */
        int32_t        l_holdcnt;              /* INT: If non-zero, don't swap. */
        uint8_t l_priority;                /* U_CHAR: Process priority. */
        uint8_t l_usrpri;                /* U_CHAR: User-priority based on l_cpu and p_nice. */
        int8_t        l_stat;                        /* CHAR: S* process status. */
        int8_t        l_pad1;                        /* fill out to 4-byte boundary */
        int32_t        l_pad2;                        /* .. and then to an 8-byte boundary */
        char        l_wmesg[KI_WMESGLEN];        /* wchan message */
        uint64_t l_wchan;                /* PTR: sleep address. */
        uint64_t l_cpuid;                /* LONG: CPU id */
        uint32_t l_rtime_sec;                /* STRUCT TIMEVAL: Real time. */
        uint32_t l_rtime_usec;                /* STRUCT TIMEVAL: Real time. */
        uint32_t l_cpticks;                /* INT: ticks during l_swtime */
        uint32_t l_pctcpu;                /* FIXPT_T: cpu usage for ps */
        uint32_t l_pid;                        /* PID_T: process identifier */
        char        l_name[KI_LNAMELEN];        /* CHAR[]: name, may be empty */
};

/*
 * KERN_PROC_ARGS subtypes
 */
#define        KERN_PROC_ARGV                1        /* argv */
#define        KERN_PROC_NARGV                2        /* number of strings in above */
#define        KERN_PROC_ENV                3        /* environ */
#define        KERN_PROC_NENV                4        /* number of strings in above */
#define        KERN_PROC_PATHNAME         5        /* path to executable */
#define        KERN_PROC_CWD                 6        /* current working dir */

/*
 * KERN_SYSVIPC subtypes
 */
#define        KERN_SYSVIPC_INFO        1        /* struct: number of valid kern ids */
#define        KERN_SYSVIPC_MSG        2        /* int: SysV message queue support */
#define        KERN_SYSVIPC_SEM        3        /* int: SysV semaphore support */
#define        KERN_SYSVIPC_SHM        4        /* int: SysV shared memory support */
#define        KERN_SYSVIPC_SHMMAX        5        /* int: max shared memory segment size (bytes) */
#define        KERN_SYSVIPC_SHMMNI        6        /* int: max number of shared memory identifiers */
#define        KERN_SYSVIPC_SHMSEG        7        /* int: max shared memory segments per process */
#define        KERN_SYSVIPC_SHMMAXPGS        8        /* int: max amount of shared memory (pages) */
#define        KERN_SYSVIPC_SHMUSEPHYS        9        /* int: physical memory usage */

/*
 * KERN_SYSVIPC_INFO subtypes
 */
/* KERN_SYSVIPC_OMSG_INFO                1        */
/* KERN_SYSVIPC_OSEM_INFO                2        */
/* KERN_SYSVIPC_OSHM_INFO                3        */
#define        KERN_SYSVIPC_MSG_INFO                4        /* msginfo and msgid_ds */
#define        KERN_SYSVIPC_SEM_INFO                5        /* seminfo and semid_ds */
#define        KERN_SYSVIPC_SHM_INFO                6        /* shminfo and shmid_ds */

/*
 * tty counter sysctl variables
 */
#define        KERN_TKSTAT_NIN                        1        /* total input character */
#define        KERN_TKSTAT_NOUT                2        /* total output character */
#define        KERN_TKSTAT_CANCC                3        /* canonical input character */
#define        KERN_TKSTAT_RAWCC                4        /* raw input character */

/*
 * kern.drivers returns an array of these.
 */

struct kinfo_drivers {
        devmajor_t        d_cmajor;
        devmajor_t        d_bmajor;
        char                d_name[24];
};

/*
 * KERN_BUF subtypes, like KERN_PROC2, where the four following mib
 * entries specify "which type of buf", "which particular buf",
 * "sizeof buf", and "how many".  Currently, only "all buf" is
 * defined.
 */
#define        KERN_BUF_ALL        0                /* all buffers */

/*
 * kern.buf returns an array of these structures, which are designed
 * both to be immune to 32/64 bit emulation issues and to provide
 * backwards compatibility.  Note that the order here differs slightly
 * from the real struct buf in order to achieve proper 64 bit
 * alignment.
 */
struct buf_sysctl {
        uint32_t b_flags;        /* LONG: B_* flags */
        int32_t  b_error;        /* INT: Errno value */
        int32_t  b_prio;        /* INT: Hint for buffer queue discipline */
        uint32_t b_dev;                /* DEV_T: Device associated with buffer */
        uint64_t b_bufsize;        /* LONG: Allocated buffer size */
        uint64_t b_bcount;        /* LONG: Valid bytes in buffer */
        uint64_t b_resid;        /* LONG: Remaining I/O */
        uint64_t b_addr;        /* CADDR_T: Memory, superblocks, indirect... */
        uint64_t b_blkno;        /* DADDR_T: Underlying physical block number */
        uint64_t b_rawblkno;        /* DADDR_T: Raw underlying physical block */
        uint64_t b_iodone;        /* PTR: Function called upon completion */
        uint64_t b_proc;        /* PTR: Associated proc if B_PHYS set */
        uint64_t b_vp;                /* PTR: File vnode */
        uint64_t b_saveaddr;        /* PTR: Original b_addr for physio */
        uint64_t b_lblkno;        /* DADDR_T: Logical block number */
};

#define        KERN_BUFSLOP        20

/*
 * kern.file2 returns an array of these structures, which are designed
 * both to be immune to 32/64 bit emulation issues and to
 * provide backwards compatibility.  The order differs slightly from
 * that of the real struct file, and some fields are taken from other
 * structures (struct vnode, struct proc) in order to make the file
 * information more useful.
 */
struct kinfo_file {
        uint64_t        ki_fileaddr;        /* PTR: address of struct file */
        uint32_t        ki_flag;        /* INT: flags (see fcntl.h) */
        uint32_t        ki_iflags;        /* INT: internal flags */
        uint32_t        ki_ftype;        /* INT: descriptor type */
        uint32_t        ki_count;        /* UINT: reference count */
        uint32_t        ki_msgcount;        /* UINT: references from msg queue */
        uint32_t        ki_usecount;        /* INT: number active users */
        uint64_t        ki_fucred;        /* PTR: creds for descriptor */
        uint32_t        ki_fuid;        /* UID_T: descriptor credentials */
        uint32_t        ki_fgid;        /* GID_T: descriptor credentials */
        uint64_t        ki_fops;        /* PTR: address of fileops */
        uint64_t        ki_foffset;        /* OFF_T: offset */
        uint64_t        ki_fdata;        /* PTR: descriptor data */

        /* vnode information to glue this file to something */
        uint64_t        ki_vun;                /* PTR: socket, specinfo, etc */
        uint64_t        ki_vsize;        /* OFF_T: size of file */
        uint32_t        ki_vtype;        /* ENUM: vnode type */
        uint32_t        ki_vtag;        /* ENUM: type of underlying data */
        uint64_t        ki_vdata;        /* PTR: private data for fs */

        /* process information when retrieved via KERN_FILE_BYPID */
        uint32_t        ki_pid;                /* PID_T: process id */
        int32_t                ki_fd;                /* INT: descriptor number */
        uint32_t        ki_ofileflags;        /* CHAR: open file flags */
        uint32_t        _ki_padto64bits;
};

#define        KERN_FILE_BYFILE        1
#define        KERN_FILE_BYPID                2
#define        KERN_FILESLOP                10

/*
 * kern.evcnt returns an array of these structures, which are designed both to
 * be immune to 32/64 bit emulation issues.  Note that the struct here differs
 * from the real struct evcnt but contains the same information in order to
 * accommodate sysctl.
 */
struct evcnt_sysctl {
        uint64_t        ev_count;                /* current count */
        uint64_t        ev_addr;                /* kernel address of evcnt */
        uint64_t        ev_parent;                /* kernel address of parent */
        uint8_t                ev_type;                /* EVCNT_TRAP_* */
        uint8_t                ev_grouplen;                /* length of group with NUL */
        uint8_t                ev_namelen;                /* length of name with NUL */
        uint8_t                ev_len;                        /* multiply by 8 */
        /*
         * Now the group and name strings follow (both include the trailing
         * NUL).  ev_name start at &ev_strings[ev_grouplen+1]
         */
        char                ev_strings[];
};

#define        KERN_EVCNT_COUNT_ANY                0
#define        KERN_EVCNT_COUNT_NONZERO        1


/*
 * kern.hashstat returns an array of these structures, which are designed
 * to be immune to 32/64 bit emulation issues.
 *
 * Hash users can register a filler function to fill the hashstat_sysctl
 * which can then be exposed via vmstat(1).
 *
 * See comments for hashstat_sysctl() in kern/subr_hash.c for details
 * on sysctl(3) usage.
 */
struct hashstat_sysctl {
        char                hash_name[SYSCTL_NAMELEN];
        char                hash_desc[SYSCTL_NAMELEN];
        uint64_t        hash_size;
        uint64_t        hash_used;
        uint64_t        hash_items;
        uint64_t        hash_maxchain;
};
typedef int        (*hashstat_func_t)(struct hashstat_sysctl *, bool);
void                hashstat_register(const char *, hashstat_func_t);

/*
 * CTL_VM identifiers in <uvm/uvm_param.h>
 */

/*
 * The vm.proc.map sysctl allows a process to dump the VM layout of
 * another process as a series of entries.
 */
#define        KVME_TYPE_NONE                0
#define        KVME_TYPE_OBJECT        1
#define        KVME_TYPE_VNODE                2
#define        KVME_TYPE_KERN                3
#define        KVME_TYPE_DEVICE        4
#define        KVME_TYPE_ANON                5
#define        KVME_TYPE_SUBMAP        6
#define        KVME_TYPE_UNKNOWN        255

#define        KVME_PROT_READ                0x00000001
#define        KVME_PROT_WRITE                0x00000002
#define        KVME_PROT_EXEC                0x00000004

#define        KVME_FLAG_COW                0x00000001
#define        KVME_FLAG_NEEDS_COPY        0x00000002
#define        KVME_FLAG_NOCOREDUMP        0x00000004
#define        KVME_FLAG_PAGEABLE        0x00000008
#define        KVME_FLAG_GROWS_UP        0x00000010
#define        KVME_FLAG_GROWS_DOWN        0x00000020

struct kinfo_vmentry {
        uint64_t kve_start;                        /* Starting address. */
        uint64_t kve_end;                        /* Finishing address. */
        uint64_t kve_offset;                        /* Mapping offset in object */

        uint32_t kve_type;                        /* Type of map entry. */
        uint32_t kve_flags;                        /* Flags on map entry. */

        uint32_t kve_count;                        /* Number of pages/entries */
        uint32_t kve_wired_count;                /* Number of wired pages */

        uint32_t kve_advice;                        /* Advice */
        uint32_t kve_attributes;                /* Map attribute */

        uint32_t kve_protection;                /* Protection bitmask. */
        uint32_t kve_max_protection;                /* Max protection bitmask */

        uint32_t kve_ref_count;                        /* VM obj ref count. */
        uint32_t kve_inheritance;                /* Inheritance */

        uint64_t kve_vn_fileid;                        /* inode number if vnode */
        uint64_t kve_vn_size;                        /* File size. */
        uint64_t kve_vn_fsid;                        /* dev_t of vnode location */
        uint64_t kve_vn_rdev;                        /* Device id if device. */

        uint32_t kve_vn_type;                        /* Vnode type. */
        uint32_t kve_vn_mode;                        /* File mode. */

        char         kve_path[PATH_MAX];                /* Path to VM obj, if any. */
};

/*
 * CTL_HW identifiers
 */
#define        HW_MACHINE         1                /* string: machine class */
#define        HW_MODEL         2                /* string: specific machine model */
#define        HW_NCPU                 3                /* int: number of cpus */
#define        HW_BYTEORDER         4                /* int: machine byte order */
#define        HW_PHYSMEM         5                /* int: total memory (bytes) */
#define        HW_USERMEM         6                /* int: non-kernel memory (bytes) */
#define        HW_PAGESIZE         7                /* int: software page size */
#define        HW_DISKNAMES         8                /* string: disk drive names */
#define        HW_IOSTATS         9                /* struct: iostats[] */
#define        HW_MACHINE_ARCH        10                /* string: machine architecture */
#define        HW_ALIGNBYTES        11                /* int: ALIGNBYTES for the kernel */
#define        HW_CNMAGIC        12                /* string: console magic sequence(s) */
#define        HW_PHYSMEM64        13                /* quad: total memory (bytes) */
#define        HW_USERMEM64        14                /* quad: non-kernel memory (bytes) */
#define        HW_IOSTATNAMES        15                /* string: iostat names */
#define        HW_NCPUONLINE        16                /* number CPUs online */

/*
 * CTL_USER definitions
 */
#define        USER_CS_PATH                 1        /* string: _CS_PATH */
#define        USER_BC_BASE_MAX         2        /* int: BC_BASE_MAX */
#define        USER_BC_DIM_MAX                 3        /* int: BC_DIM_MAX */
#define        USER_BC_SCALE_MAX         4        /* int: BC_SCALE_MAX */
#define        USER_BC_STRING_MAX         5        /* int: BC_STRING_MAX */
#define        USER_COLL_WEIGHTS_MAX         6        /* int: COLL_WEIGHTS_MAX */
#define        USER_EXPR_NEST_MAX         7        /* int: EXPR_NEST_MAX */
#define        USER_LINE_MAX                 8        /* int: LINE_MAX */
#define        USER_RE_DUP_MAX                 9        /* int: RE_DUP_MAX */
#define        USER_POSIX2_VERSION        10        /* int: POSIX2_VERSION */
#define        USER_POSIX2_C_BIND        11        /* int: POSIX2_C_BIND */
#define        USER_POSIX2_C_DEV        12        /* int: POSIX2_C_DEV */
#define        USER_POSIX2_CHAR_TERM        13        /* int: POSIX2_CHAR_TERM */
#define        USER_POSIX2_FORT_DEV        14        /* int: POSIX2_FORT_DEV */
#define        USER_POSIX2_FORT_RUN        15        /* int: POSIX2_FORT_RUN */
#define        USER_POSIX2_LOCALEDEF        16        /* int: POSIX2_LOCALEDEF */
#define        USER_POSIX2_SW_DEV        17        /* int: POSIX2_SW_DEV */
#define        USER_POSIX2_UPE                18        /* int: POSIX2_UPE */
#define        USER_STREAM_MAX                19        /* int: POSIX2_STREAM_MAX */
#define        USER_TZNAME_MAX                20        /* int: _POSIX_TZNAME_MAX */
#define        USER_ATEXIT_MAX                21        /* int: {ATEXIT_MAX} */

/*
 * CTL_DDB definitions
 */
#define        DDBCTL_RADIX                1        /* int: Input and output radix */
#define        DDBCTL_MAXOFF                2        /* int: max symbol offset */
#define        DDBCTL_MAXWIDTH                3        /* int: width of the display line */
#define        DDBCTL_LINES                4        /* int: number of display lines */
#define        DDBCTL_TABSTOPS                5        /* int: tab width */
#define        DDBCTL_ONPANIC                6        /* int: DDB on panic if non-zero */
#define        DDBCTL_FROMCONSOLE        7        /* int: DDB via console if non-zero */

/*
 * CTL_DEBUG definitions
 *
 * Second level identifier specifies which debug variable.
 * Third level identifier specifies which structure component.
 */
#define        CTL_DEBUG_NAME                0        /* string: variable name */
#define        CTL_DEBUG_VALUE                1        /* int: variable value */

/*
 * CTL_PROC subtype. Either a PID, or a magic value for the current proc.
 */

#define        PROC_CURPROC        (~((u_int)1 << 31))

/*
 * CTL_PROC tree: either corename (string), a limit
 * (rlimit.<type>.{hard,soft}, int), a process stop
 * condition, or paxflags.
 */
#define        PROC_PID_CORENAME        1
#define        PROC_PID_LIMIT                2
#define        PROC_PID_STOPFORK        3
#define        PROC_PID_STOPEXEC        4
#define        PROC_PID_STOPEXIT        5
#define        PROC_PID_PAXFLAGS        6

/* Limit types from <sys/resources.h> */
#define        PROC_PID_LIMIT_CPU        (RLIMIT_CPU+1)
#define        PROC_PID_LIMIT_FSIZE        (RLIMIT_FSIZE+1)
#define        PROC_PID_LIMIT_DATA        (RLIMIT_DATA+1)
#define        PROC_PID_LIMIT_STACK        (RLIMIT_STACK+1)
#define        PROC_PID_LIMIT_CORE        (RLIMIT_CORE+1)
#define        PROC_PID_LIMIT_RSS        (RLIMIT_RSS+1)
#define        PROC_PID_LIMIT_MEMLOCK        (RLIMIT_MEMLOCK+1)
#define PROC_PID_LIMIT_NPROC        (RLIMIT_NPROC+1)
#define        PROC_PID_LIMIT_NOFILE        (RLIMIT_NOFILE+1)
#define        PROC_PID_LIMIT_SBSIZE        (RLIMIT_SBSIZE+1)
#define        PROC_PID_LIMIT_AS        (RLIMIT_AS+1)
#define        PROC_PID_LIMIT_NTHR        (RLIMIT_NTHR+1)

/* for each type, either hard or soft value */
#define        PROC_PID_LIMIT_TYPE_SOFT        1
#define        PROC_PID_LIMIT_TYPE_HARD        2

/*
 * Export PAX flag definitions to userland.
 *
 * XXX These are duplicated from sys/pax.h but that header is not
 * XXX installed.
 */
#define        CTL_PROC_PAXFLAGS_ASLR                0x01
#define        CTL_PROC_PAXFLAGS_MPROTECT        0x02
#define        CTL_PROC_PAXFLAGS_GUARD                0x04

/*
 * CTL_EMUL definitions
 *
 * Second level identifier specifies which emulation variable.
 * Subsequent levels are specified in the emulations themselves.
 */
#define        EMUL_LINUX        1
#define        EMUL_LINUX32        5

#ifdef _KERNEL

#if defined(_KERNEL_OPT)
#include "opt_sysctl.h"
#endif

/* Root node of the kernel sysctl tree */
extern struct sysctlnode sysctl_root;

/*
 * A log of nodes created by a setup function or set of setup
 * functions so that they can be torn down in one "transaction"
 * when no longer needed.
 *
 * Users of the log merely pass a pointer to a pointer, and the sysctl
 * infrastructure takes care of the rest.
 */
struct sysctllog;

/*
 * CTL_DEBUG variables.
 *
 * These are declared as separate variables so that they can be
 * individually initialized at the location of their associated
 * variable. The loader prevents multiple use by issuing errors
 * if a variable is initialized in more than one place. They are
 * aggregated into an array in debug_sysctl(), so that it can
 * conveniently locate them when queried. If more debugging
 * variables are added, they must also be declared here and also
 * entered into the array.
 *
 * Note that the debug subtree is largely obsolescent in terms of
 * functionality now that we have dynamic sysctl, but the
 * infrastructure is retained for backwards compatibility.
 */
struct ctldebug {
        const char *debugname;        /* name of debugging variable */
        int        *debugvar;        /* pointer to debugging variable */
};
#ifdef        DEBUG
extern struct ctldebug debug0, debug1, debug2, debug3, debug4;
extern struct ctldebug debug5, debug6, debug7, debug8, debug9;
extern struct ctldebug debug10, debug11, debug12, debug13, debug14;
extern struct ctldebug debug15, debug16, debug17, debug18, debug19;
#endif        /* DEBUG */

#define SYSCTLFN_PROTO const int *, u_int, void *, \
        size_t *, const void *, size_t, \
        const int *, struct lwp *, const struct sysctlnode *
#define SYSCTLFN_ARGS const int *name, u_int namelen, \
        void *oldp, size_t *oldlenp, \
        const void *newp, size_t newlen, \
        const int *oname, struct lwp *l, \
        const struct sysctlnode *rnode
#define SYSCTLFN_CALL(node) name, namelen, oldp, \
        oldlenp, newp, newlen, \
        oname, l, node

#ifdef RUMP_USE_CTOR
#include <sys/kernel.h>

struct sysctl_setup_chain {
        void (*ssc_func)(struct sysctllog **);
        LIST_ENTRY(sysctl_setup_chain) ssc_entries;
};
LIST_HEAD(sysctl_boot_chain, sysctl_setup_chain);
#define _SYSCTL_REGISTER(name)                                                \
static struct sysctl_setup_chain __CONCAT(ssc,name) = {                        \
        .ssc_func = name,                                                \
};                                                                        \
static void sysctlctor_##name(void) __attribute__((constructor));        \
static void sysctlctor_##name(void)                                        \
{                                                                        \
        struct sysctl_setup_chain *ssc = &__CONCAT(ssc,name);                \
        extern struct sysctl_boot_chain sysctl_boot_chain;                \
        if (cold) {                                                        \
                LIST_INSERT_HEAD(&sysctl_boot_chain, ssc, ssc_entries);        \
        }                                                                \
}                                                                        \
static void sysctldtor_##name(void) __attribute__((destructor));        \
static void sysctldtor_##name(void)                                        \
{                                                                        \
        struct sysctl_setup_chain *ssc = &__CONCAT(ssc,name);                \
        if (cold) {                                                        \
                LIST_REMOVE(ssc, ssc_entries);                                \
        }                                                                \
}

#else /* RUMP_USE_CTOR */

#define _SYSCTL_REGISTER(name) __link_set_add_text(sysctl_funcs, name);

#endif /* RUMP_USE_CTOR */

#ifdef _MODULE

#define SYSCTL_SETUP_PROTO(name)                                \
        void name(struct sysctllog **)
#ifdef SYSCTL_DEBUG_SETUP
#define SYSCTL_SETUP(name, desc)                                \
        SYSCTL_SETUP_PROTO(name);                                \
        static void __CONCAT(___,name)(struct sysctllog **);        \
        void name(struct sysctllog **clog) {                        \
                printf("%s\n", desc);                                \
                __CONCAT(___,name)(clog); }                        \
        _SYSCTL_REGISTER(name);                                        \
        static void __CONCAT(___,name)(struct sysctllog **clog)
#else  /* !SYSCTL_DEBUG_SETUP */
#define SYSCTL_SETUP(name, desc)                                \
        SYSCTL_SETUP_PROTO(name);                                \
        _SYSCTL_REGISTER(name);                                        \
        void name(struct sysctllog **clog)
#endif /* !SYSCTL_DEBUG_SETUP */

#else /* !_MODULE */

#define SYSCTL_SETUP_PROTO(name)
#ifdef SYSCTL_DEBUG_SETUP
#define SYSCTL_SETUP(name, desc)                                \
        static void __CONCAT(___,name)(struct sysctllog **);        \
        static void name(struct sysctllog **clog) {                \
                printf("%s\n", desc);                                \
                __CONCAT(___,name)(clog); }                        \
        _SYSCTL_REGISTER(name);                                        \
        static void __CONCAT(___,name)(struct sysctllog **clog)
#else  /* !SYSCTL_DEBUG_SETUP */
#define SYSCTL_SETUP(name, desc)                                \
        static void name(struct sysctllog **);                        \
        _SYSCTL_REGISTER(name);                                        \
        static void name(struct sysctllog **clog)
#endif /* !SYSCTL_DEBUG_SETUP */

#endif /* !_MODULE */

/*
 * Internal sysctl function calling convention:
 *
 *        (*sysctlfn)(name, namelen, oldval, oldlenp, newval, newlen,
 *                    origname, lwp, node);
 *
 * The name parameter points at the next component of the name to be
 * interpreted.  The namelen parameter is the number of integers in
 * the name.  The origname parameter points to the start of the name
 * being parsed.  The node parameter points to the node on which the
 * current operation is to be performed.
 */
typedef int (*sysctlfn)(SYSCTLFN_PROTO);

/*
 * used in more than just sysctl
 */
void        fill_eproc(struct proc *, struct eproc *, bool, bool);
void        fill_kproc2(struct proc *, struct kinfo_proc2 *, bool, bool);

/*
 * subsystem setup
 */
void        sysctl_init(void);
void        sysctl_basenode_init(void);
void        sysctl_finalize(void);

/*
 * typical syscall call order
 */
void        sysctl_lock(bool);
int        sysctl_dispatch(SYSCTLFN_PROTO);
void        sysctl_unlock(void);
void        sysctl_relock(void);

/*
 * tree navigation primitives (must obtain lock before using these)
 */
int        sysctl_locate(struct lwp *, const int *, u_int,
                      const struct sysctlnode **, int *);
int        sysctl_query(SYSCTLFN_PROTO);
int        sysctl_create(SYSCTLFN_PROTO);
int        sysctl_destroy(SYSCTLFN_PROTO);
int        sysctl_lookup(SYSCTLFN_PROTO);
int        sysctl_describe(SYSCTLFN_PROTO);

/*
 * simple variadic interface for adding/removing nodes
 */
int        sysctl_createv(struct sysctllog **, int,
                       const struct sysctlnode **, const struct sysctlnode **,
                       int, int, const char *, const char *,
                       sysctlfn, u_quad_t, void *, size_t, ...);
int        sysctl_destroyv(struct sysctlnode *, ...);

#define VERIFY_FN(ctl_type, c_type) \
__always_inline static __inline void * \
__sysctl_verify_##ctl_type##_arg(c_type *arg) \
{ \
    return arg; \
}

VERIFY_FN(CTLTYPE_NODE, struct sysctlnode);
VERIFY_FN(CTLTYPE_INT, int);
VERIFY_FN(CTLTYPE_STRING, char);
VERIFY_FN(CTLTYPE_QUAD, int64_t);
VERIFY_FN(CTLTYPE_STRUCT, void);
VERIFY_FN(CTLTYPE_BOOL, bool);
VERIFY_FN(CTLTYPE_LONG, long);
#undef VERIFY_FN

#define sysctl_createv(lg, cfl, rn, cn, fl, type, nm, desc, fn, qv, newp, ...) \
    sysctl_createv(lg, cfl, rn, cn, fl, type, nm, desc, fn, qv, \
            __sysctl_verify_##type##_arg(newp), __VA_ARGS__)

/*
 * miscellany
 */
void        sysctl_dump(const struct sysctlnode *);
void        sysctl_free(struct sysctlnode *);
void        sysctl_teardown(struct sysctllog **);
void        sysctl_log_print(const struct sysctllog *);

#ifdef SYSCTL_INCLUDE_DESCR
#define SYSCTL_DESCR(s) s
#else /* SYSCTL_INCLUDE_DESCR */
#define SYSCTL_DESCR(s) NULL
#endif /* SYSCTL_INCLUDE_DESCR */

/*
 * simple interface similar to old interface for in-kernel consumption
 */
int        old_sysctl(int *, u_int, void *, size_t *, void *, size_t, struct lwp *);

/*
 * these helpers are in other files (XXX so should the nodes be) or
 * are used by more than one node
 */
int        sysctl_hw_tapenames(SYSCTLFN_PROTO);
int        sysctl_hw_tapestats(SYSCTLFN_PROTO);
int        sysctl_kern_vnode(SYSCTLFN_PROTO);
int        sysctl_net_inet_ip_ports(SYSCTLFN_PROTO);
int        sysctl_consdev(SYSCTLFN_PROTO);
int        sysctl_root_device(SYSCTLFN_PROTO);
int        sysctl_vfs_generic_fstypes(SYSCTLFN_PROTO);

/*
 * primitive helper stubs
 */
int        sysctl_needfunc(SYSCTLFN_PROTO);
int        sysctl_notavail(SYSCTLFN_PROTO);
int        sysctl_null(SYSCTLFN_PROTO);

int        sysctl_copyin(struct lwp *, const void *, void *, size_t);
int        sysctl_copyout(struct lwp *, const void *, void *, size_t);
int        sysctl_copyinstr(struct lwp *, const void *, void *, size_t, size_t *);

u_int        sysctl_map_flags(const u_int *, u_int);

MALLOC_DECLARE(M_SYSCTLNODE);
MALLOC_DECLARE(M_SYSCTLDATA);

extern const u_int sysctl_lwpflagmap[];

#else        /* !_KERNEL */
#include <sys/cdefs.h>

typedef void *sysctlfn;

__BEGIN_DECLS
int        sysctl(const int *, u_int, void *, size_t *, const void *, size_t);
int        sysctlbyname(const char *, void *, size_t *, const void *, size_t);
int        sysctlgetmibinfo(const char *, int *, u_int *,
                         char *, size_t *, struct sysctlnode **, int);
int        sysctlnametomib(const char *, int *, size_t *);
int        proc_compare(const struct kinfo_proc2 *, const struct kinfo_lwp *,
    const struct kinfo_proc2 *, const struct kinfo_lwp *);
void        *asysctl(const int *, size_t, size_t *);
void        *asysctlbyname(const char *, size_t *);
__END_DECLS

#endif        /* !_KERNEL */

#ifdef __COMPAT_SYSCTL
/*
 * old node definitions go here
 */
#endif /* __COMPAT_SYSCTL */

/*
 * padding makes alignment magically "work" for 32/64 compatibility at
 * the expense of making things bigger on 32 bit platforms.
 */
#if defined(_LP64) || (BYTE_ORDER == LITTLE_ENDIAN)
#define __sysc_pad(type) union { uint64_t __sysc_upad; \
        struct { type __sysc_sdatum; } __sysc_ustr; }
#else
#define __sysc_pad(type) union { uint64_t __sysc_upad; \
        struct { uint32_t __sysc_spad; type __sysc_sdatum; } __sysc_ustr; }
#endif
#define __sysc_unpad(x) x.__sysc_ustr.__sysc_sdatum

/*
 * The following is for gcc2, which doesn't handle __sysc_unpad().
 * The code gets a little less ugly this way.
 */
#define sysc_init_field(field, value)         \
        .field = { .__sysc_ustr = { .__sysc_sdatum = (value), }, }

struct sysctlnode {
        uint32_t sysctl_flags;                /* flags and type */
        int32_t sysctl_num;                /* mib number */
        char sysctl_name[SYSCTL_NAMELEN]; /* node name */
        uint32_t sysctl_ver;                /* node's version vs. rest of tree */
        uint32_t __rsvd;
        union {
                struct {
                        uint32_t suc_csize;        /* size of child node array */
                        uint32_t suc_clen;        /* number of valid children */
                        __sysc_pad(struct sysctlnode*) _suc_child; /* array of child nodes */
                } scu_child;
                struct {
                        __sysc_pad(void*) _sud_data; /* pointer to external data */
                        __sysc_pad(size_t) _sud_offset; /* offset to data */
                } scu_data;
                int32_t scu_alias;                /* node this node refers to */
                int32_t scu_idata;                /* immediate "int" data */
                u_quad_t scu_qdata;                /* immediate "u_quad_t" data */
                bool scu_bdata;                        /* immediate bool data */
        } sysctl_un;
        __sysc_pad(size_t) _sysctl_size;        /* size of instrumented data */
        __sysc_pad(sysctlfn) _sysctl_func;        /* access helper function */
        __sysc_pad(struct sysctlnode*) _sysctl_parent; /* parent of this node */
        __sysc_pad(const char *) _sysctl_desc;        /* description of node */
};

/*
 * padded data
 */
#define suc_child        __sysc_unpad(_suc_child)
#define sud_data        __sysc_unpad(_sud_data)
#define sud_offset        __sysc_unpad(_sud_offset)
#define sysctl_size        __sysc_unpad(_sysctl_size)
#define sysctl_func        __sysc_unpad(_sysctl_func)
#define sysctl_parent        __sysc_unpad(_sysctl_parent)
#define sysctl_desc        __sysc_unpad(_sysctl_desc)

/*
 * nested data (may also be padded)
 */
#define sysctl_csize        sysctl_un.scu_child.suc_csize
#define sysctl_clen        sysctl_un.scu_child.suc_clen
#define sysctl_child        sysctl_un.scu_child.suc_child
#define sysctl_data        sysctl_un.scu_data.sud_data
#define sysctl_offset        sysctl_un.scu_data.sud_offset
#define sysctl_alias        sysctl_un.scu_alias
#define sysctl_idata        sysctl_un.scu_idata
#define sysctl_qdata        sysctl_un.scu_qdata
#define sysctl_bdata        sysctl_un.scu_bdata

/*
 * when requesting a description of a node (a set of nodes, actually),
 * you get back an "array" of these, where the actual length of the
 * descr_str is noted in descr_len (which includes the trailing nul
 * byte), rounded up to the nearest four (sizeof(int32_t) actually).
 *
 * NEXT_DESCR() will take a pointer to a description and advance it to
 * the next description.
 */
struct sysctldesc {
        int32_t                descr_num;        /* mib number of node */
        uint32_t        descr_ver;        /* version of node */
        uint32_t        descr_len;        /* length of description string */
        char                descr_str[1];        /* not really 1...see above */
};

#define __sysc_desc_roundup(x) ((((x) - 1) | (sizeof(int32_t) - 1)) + 1)
#define __sysc_desc_len(l) (offsetof(struct sysctldesc, descr_str) +\
                __sysc_desc_roundup(l))
#define __sysc_desc_adv(d, l) \
        (/*XXXUNCONST ptr cast*/(struct sysctldesc *) \
        __UNCONST(((const char*)(d)) + __sysc_desc_len(l)))
#define NEXT_DESCR(d) __sysc_desc_adv((d), (d)->descr_len)

static __inline const struct sysctlnode *
sysctl_rootof(const struct sysctlnode *n)
{
        while (n->sysctl_parent != NULL)
                n = n->sysctl_parent;
        return (n);
}

#endif        /* !_SYS_SYSCTL_H_ */














































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 





    1 


    1 


    1 

    2 








    1 














    2 











    1 



    1 















    2 






    1 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
/*        $NetBSD: scsiconf.c,v 1.302 2022/04/14 16:50:26 pgoyette Exp $        */

/*-
 * Copyright (c) 1998, 1999, 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum; Jason R. Thorpe of the Numerical Aerospace
 * Simulation Facility, NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Originally written by Julian Elischer (julian@tfs.com)
 * for TRW Financial Systems for use under the MACH(2.5) operating system.
 *
 * TRW Financial Systems, in accordance with their agreement with Carnegie
 * Mellon University, makes this software available to CMU to distribute
 * or use in any manner that they see fit as long as this message is kept with
 * the software. For this reason TFS also grants any other persons or
 * organisations permission to use or modify this software.
 *
 * TFS supplies this software to be publicly redistributed
 * on the understanding that TFS is not responsible for the correct
 * functioning of this software in any circumstances.
 *
 * Ported to run under 386BSD by Julian Elischer (julian@tfs.com) Sept 1992
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: scsiconf.c,v 1.302 2022/04/14 16:50:26 pgoyette Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/kthread.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/once.h>
#include <sys/device.h>
#include <sys/conf.h>
#include <sys/fcntl.h>
#include <sys/scsiio.h>
#include <sys/queue.h>
#include <sys/atomic.h>
#include <sys/kmem.h>

#include <dev/scsipi/scsi_all.h>
#include <dev/scsipi/scsipi_all.h>
#include <dev/scsipi/scsiconf.h>

#include "locators.h"

static const struct scsipi_periphsw scsi_probe_dev = {
        NULL,
        NULL,
        NULL,
        NULL,
};

struct scsi_initq {
        struct scsipi_channel *sc_channel;
        TAILQ_ENTRY(scsi_initq) scsi_initq;
};

static ONCE_DECL(scsi_conf_ctrl);
static TAILQ_HEAD(, scsi_initq)        scsi_initq_head;
static kmutex_t                        scsibus_qlock;
static kcondvar_t                scsibus_qcv;

static int        scsi_probe_device(struct scsibus_softc *, int, int);

static int        scsibusmatch(device_t, cfdata_t, void *);
static void        scsibusattach(device_t, device_t, void *);
static int        scsibusdetach(device_t, int flags);
static int        scsibusrescan(device_t, const char *, const int *);
static void        scsidevdetached(device_t, device_t);

CFATTACH_DECL3_NEW(scsibus, sizeof(struct scsibus_softc),
    scsibusmatch, scsibusattach, scsibusdetach, NULL,
    scsibusrescan, scsidevdetached, DVF_DETACH_SHUTDOWN);

extern struct cfdriver scsibus_cd;

static dev_type_open(scsibusopen);
static dev_type_close(scsibusclose);
static dev_type_ioctl(scsibusioctl);

const struct cdevsw scsibus_cdevsw = {
        .d_open = scsibusopen,
        .d_close = scsibusclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = scsibusioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER | D_MPSAFE
};

static int        scsibusprint(void *, const char *);
static void        scsibus_discover_thread(void *);
static void        scsibus_config(struct scsibus_softc *);

static int
scsibus_init(void)
{

        TAILQ_INIT(&scsi_initq_head);
        mutex_init(&scsibus_qlock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&scsibus_qcv, "scsinitq");
        return 0;
}

static int
scsibusmatch(device_t parent, cfdata_t cf, void *aux)
{
        struct scsipi_channel *chan = aux;

        if (SCSIPI_BUSTYPE_TYPE(chan->chan_bustype->bustype_type) !=
            SCSIPI_BUSTYPE_SCSI)
                return 0;

        if (cf->cf_loc[SCSICF_CHANNEL] != chan->chan_channel &&
            cf->cf_loc[SCSICF_CHANNEL] != SCSICF_CHANNEL_DEFAULT)
                return (0);

        return (1);
}

static void
scsibusattach(device_t parent, device_t self, void *aux)
{
        struct scsibus_softc *sc = device_private(self);
        struct scsipi_channel *chan = aux;
        struct scsi_initq *scsi_initq;

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        sc->sc_dev = self;
        sc->sc_channel = chan;
        chan->chan_name = device_xname(sc->sc_dev);

        aprint_naive(": SCSI bus\n");
        aprint_normal(": %d target%s, %d lun%s per target\n",
            chan->chan_ntargets,
            chan->chan_ntargets == 1 ? "" : "s",
            chan->chan_nluns,
            chan->chan_nluns == 1 ? "" : "s");

        /*
         * XXX 
         * newer adapters support more than 256 outstanding commands
         * per periph and don't use the tag (they eventually allocate one
         * internally). Right now scsipi always allocate a tag and
         * is limited to 256 tags, per scsi specs.
         * this should be revisited
         */
        if (chan->chan_flags & SCSIPI_CHAN_OPENINGS) {
                if (chan->chan_max_periph > 256)
                        chan->chan_max_periph = 256;
        } else {
                if (chan->chan_adapter->adapt_max_periph > 256)
                        chan->chan_adapter->adapt_max_periph = 256;
        }

        if (atomic_inc_uint_nv(&chan_running(chan)) == 1)
                mutex_init(chan_mtx(chan), MUTEX_DEFAULT, IPL_BIO);

        cv_init(&chan->chan_cv_thr, "scshut");
        cv_init(&chan->chan_cv_comp, "sccomp");
        cv_init(&chan->chan_cv_xs, "xscmd");

        if (scsipi_adapter_addref(chan->chan_adapter))
                return;

        RUN_ONCE(&scsi_conf_ctrl, scsibus_init);

        /* Initialize the channel structure first */
        chan->chan_init_cb = NULL;
        chan->chan_init_cb_arg = NULL;

        scsi_initq = malloc(sizeof(struct scsi_initq), M_DEVBUF, M_WAITOK);
        scsi_initq->sc_channel = chan;
        TAILQ_INSERT_TAIL(&scsi_initq_head, scsi_initq, scsi_initq);
        config_pending_incr(sc->sc_dev);
        if (scsipi_channel_init(chan)) {
                aprint_error_dev(sc->sc_dev, "failed to init channel\n");
                return;
        }

        /*
         * Create the discover thread
         */
        if (kthread_create(PRI_NONE, 0, NULL, scsibus_discover_thread, sc,
            &chan->chan_dthread, "%s-d", chan->chan_name)) {
                aprint_error_dev(sc->sc_dev, "unable to create discovery "
                    "thread for channel %d\n", chan->chan_channel);
                return;
        }
}

static void
scsibus_discover_thread(void *arg)
{
        struct scsibus_softc *sc = arg;

        scsibus_config(sc);
        sc->sc_channel->chan_dthread = NULL;
        kthread_exit(0);
}

static void
scsibus_config(struct scsibus_softc *sc)
{
        struct scsipi_channel *chan = sc->sc_channel;
        struct scsi_initq *scsi_initq;

#ifndef SCSI_DELAY
#define SCSI_DELAY 2
#endif
        if ((chan->chan_flags & SCSIPI_CHAN_NOSETTLE) == 0 &&
            SCSI_DELAY > 0) {
                aprint_normal_dev(sc->sc_dev,
                    "waiting %d seconds for devices to settle...\n",
                    SCSI_DELAY);
                /* ...an identifier we know no one will use... */
                kpause("scsidly", false, SCSI_DELAY * hz, NULL);
        }

        /* Make sure the devices probe in scsibus order to avoid jitter. */
        mutex_enter(&scsibus_qlock);
        for (;;) {
                scsi_initq = TAILQ_FIRST(&scsi_initq_head);
                if (scsi_initq->sc_channel == chan)
                        break;
                cv_wait(&scsibus_qcv, &scsibus_qlock);
        }
        mutex_exit(&scsibus_qlock);

        scsi_probe_bus(sc, -1, -1);

        mutex_enter(&scsibus_qlock);
        TAILQ_REMOVE(&scsi_initq_head, scsi_initq, scsi_initq);
        cv_broadcast(&scsibus_qcv);
        mutex_exit(&scsibus_qlock);

        free(scsi_initq, M_DEVBUF);

        scsipi_adapter_delref(chan->chan_adapter);

        config_pending_decr(sc->sc_dev);
}

static int
scsibusdetach(device_t self, int flags)
{
        struct scsibus_softc *sc = device_private(self);
        struct scsipi_channel *chan = sc->sc_channel;
        int error;

        /*
         * Defer while discovery thread is running
         */
        while (chan->chan_dthread != NULL)
                kpause("scsibusdet", false, hz, NULL);

        /*
         * Detach all of the periphs.
         */
        error = scsipi_target_detach(chan, -1, -1, flags);
        if (error)
                return error;

        pmf_device_deregister(self);

        /*
         * Shut down the channel.
         */
        scsipi_channel_shutdown(chan);

        cv_destroy(&chan->chan_cv_xs);
        cv_destroy(&chan->chan_cv_comp);
        cv_destroy(&chan->chan_cv_thr);

        membar_release();
        if (atomic_dec_uint_nv(&chan_running(chan)) == 0) {
                membar_acquire();
                mutex_destroy(chan_mtx(chan));
        }

        return 0;
}

static int
lun_compar(const void *a, const void *b)
{
        const uint16_t * const la = a, * const lb = b;

        if (*la < *lb)
                return -1;
        if (*la > *lb)
                return 1;
        return 0;
}

static int
scsi_report_luns(struct scsibus_softc *sc, int target,
    uint16_t ** const luns, size_t *nluns)
{
        struct scsi_report_luns replun;
        struct scsi_report_luns_header *rlr;
        struct scsi_report_luns_lun *lunp;

        struct scsipi_channel *chan = sc->sc_channel;
        struct scsipi_inquiry_data inqbuf;
        struct scsipi_periph *periph;
        uint16_t tmp;

        int error;
        size_t i, rlrlen, rlrlenmin;

        memset(&replun, 0, sizeof(replun));

        periph = scsipi_alloc_periph(M_WAITOK);
        periph->periph_channel = chan;
        periph->periph_switch = &scsi_probe_dev;

        periph->periph_target = target;
        periph->periph_lun = 0;
        periph->periph_quirks = chan->chan_defquirks;

        if ((error = scsipi_inquire(periph, &inqbuf,
            XS_CTL_DISCOVERY | XS_CTL_SILENT)))
                goto end2;
        periph->periph_version = inqbuf.version & SID_ANSII;
        if (periph->periph_version < 3) {
                error = ENOTSUP;
                goto end2;
        }

        rlrlen = rlrlenmin = sizeof(*rlr) + sizeof(*lunp) * 1;

again:
        rlr = kmem_zalloc(rlrlen, KM_SLEEP);

        replun.opcode = SCSI_REPORT_LUNS;
        replun.selectreport = SELECTREPORT_NORMAL;
        _lto4b(rlrlen, replun.alloclen);

        error = scsipi_command(periph, (void *)&replun, sizeof(replun),
            (void *)rlr, rlrlen, SCSIPIRETRIES, 10000, NULL,
            XS_CTL_DATA_IN | XS_CTL_DISCOVERY | XS_CTL_SILENT);
        if (error)
                goto end;

        if (sizeof(*rlr) + _4btol(rlr->length) > rlrlen &&
            sizeof(*rlr) + _4btol(rlr->length) <= 32) {
                    const size_t old_rlrlen = rlrlen;
                rlrlen = sizeof(*rlr) + uimin(_4btol(rlr->length),
                    16383 * sizeof(*lunp));
                kmem_free(rlr, old_rlrlen);
                rlr = NULL;
                if (rlrlen < rlrlenmin) {
                        error = EIO;
                        goto end;
                }
                goto again;
        }

        KASSERT(nluns != NULL);
        *nluns = (rlrlen - sizeof(*rlr)) / sizeof(*lunp);

        KASSERT(luns != NULL);
        *luns = kmem_alloc(*nluns * sizeof(**luns), KM_SLEEP);

        for (i = 0; i < *nluns; i++) {
                lunp = &((struct scsi_report_luns_lun *)&rlr[1])[i];
                switch (lunp->lun[0] & 0xC0) {
                default:
                        scsi_print_addr(periph);
                        printf("LUN %016"PRIx64" ignored\n", _8btol(lunp->lun));
                        (*luns)[i] = 0;
                        break;
                case 0x40:
                        (*luns)[i] = _2btol(&lunp->lun[0]) & 0x3FFF;
                        break;
                case 0x00:
                        (*luns)[i] = _2btol(&lunp->lun[0]) & 0x00FF;
                        break;
                }
        }

        kheapsort(*luns, *nluns, sizeof(**luns), lun_compar, &tmp);

end:
        if (rlr)
                kmem_free(rlr, rlrlen);
end2:
        scsipi_free_periph(periph);
        return error;
}

static void
scsi_discover_luns(struct scsibus_softc *sc, int target, int minlun, int maxlun)
{
        uint16_t *luns = NULL;        /* XXX gcc */
        size_t nluns = 0;        /* XXX gcc */

        if (scsi_report_luns(sc, target, &luns, &nluns) == 0) {
                for (size_t i = 0; i < nluns; i++)
                        if (luns[i] >= minlun && luns[i] <= maxlun)
                                scsi_probe_device(sc, target, luns[i]);
                kmem_free(luns, sizeof(*luns) * nluns);
                return;
        }

        for (int lun = minlun; lun <= maxlun; lun++) {
                /*
                 * See if there's a device present, and configure it.
                 */
                if (scsi_probe_device(sc, target, lun) == 0)
                        break;
                /* otherwise something says we should look further */
        }
}

/*
 * Probe the requested scsi bus. It must be already set up.
 * target and lun optionally narrow the search if not -1
 */
int
scsi_probe_bus(struct scsibus_softc *sc, int target, int lun)
{
        struct scsipi_channel *chan = sc->sc_channel;
        int maxtarget, mintarget, maxlun, minlun;
        int error;

        if (target == -1) {
                maxtarget = chan->chan_ntargets - 1;
                mintarget = 0;
        } else {
                if (target < 0 || target >= chan->chan_ntargets)
                        return (EINVAL);
                maxtarget = mintarget = target;
        }

        if (lun == -1) {
                maxlun = chan->chan_nluns - 1;
                minlun = 0;
        } else {
                if (lun < 0 || lun >= chan->chan_nluns)
                        return (EINVAL);
                maxlun = minlun = lun;
        }

        /*
         * Some HBAs provide an abstracted view of the bus; give them an
         * opportunity to re-scan it before we do.
         */
        scsipi_adapter_ioctl(chan, SCBUSIOLLSCAN, NULL, 0, curproc);

        if ((error = scsipi_adapter_addref(chan->chan_adapter)) != 0)
                goto ret;
        for (target = mintarget; target <= maxtarget; target++) {
                if (target == chan->chan_id)
                        continue;

                scsi_discover_luns(sc, target, minlun, maxlun);

                /*
                 * Now that we've discovered all of the LUNs on this
                 * I_T Nexus, update the xfer mode for all of them
                 * that we know about.
                 */
                scsipi_set_xfer_mode(chan, target, 1);
        }

        scsipi_adapter_delref(chan->chan_adapter);
ret:
        return (error);
}

static int
scsibusrescan(device_t sc, const char *ifattr, const int *locators)
{

        KASSERT(ifattr && !strcmp(ifattr, "scsibus"));
        KASSERT(locators);

        return (scsi_probe_bus(device_private(sc),
                locators[SCSIBUSCF_TARGET], locators[SCSIBUSCF_LUN]));
}

static void
scsidevdetached(device_t self, device_t child)
{
        struct scsibus_softc *sc = device_private(self);
        struct scsipi_channel *chan = sc->sc_channel;
        struct scsipi_periph *periph;
        int target, lun;

        target = device_locator(child, SCSIBUSCF_TARGET);
        lun = device_locator(child, SCSIBUSCF_LUN);

        mutex_enter(chan_mtx(chan));

        periph = scsipi_lookup_periph_locked(chan, target, lun);
        KASSERT(periph != NULL && periph->periph_dev == child);

        scsipi_remove_periph(chan, periph);
        scsipi_free_periph(periph);

        mutex_exit(chan_mtx(chan));
}

/*
 * Print out autoconfiguration information for a subdevice.
 *
 * This is a slight abuse of 'standard' autoconfiguration semantics,
 * because 'print' functions don't normally print the colon and
 * device information.  However, in this case that's better than
 * either printing redundant information before the attach message,
 * or having the device driver call a special function to print out
 * the standard device information.
 */
static int
scsibusprint(void *aux, const char *pnp)
{
        struct scsipibus_attach_args *sa = aux;
        struct scsipi_inquiry_pattern *inqbuf;
        u_int8_t type;
        const char *dtype;
        char vendor[33], product[65], revision[17];
        int target, lun;

        if (pnp != NULL)
                aprint_normal("%s", pnp);

        inqbuf = &sa->sa_inqbuf;

        target = sa->sa_periph->periph_target;
        lun = sa->sa_periph->periph_lun;
        type = inqbuf->type & SID_TYPE;

        dtype = scsipi_dtype(type);

        strnvisx(vendor, sizeof(vendor), inqbuf->vendor, 8,
            VIS_TRIM|VIS_SAFE|VIS_OCTAL);
        strnvisx(product, sizeof(product), inqbuf->product, 16,
            VIS_TRIM|VIS_SAFE|VIS_OCTAL);
        strnvisx(revision, sizeof(revision), inqbuf->revision, 4,
            VIS_TRIM|VIS_SAFE|VIS_OCTAL);

        aprint_normal(" target %d lun %d: <%s, %s, %s> %s %s%s",
                      target, lun, vendor, product, revision, dtype,
                      inqbuf->removable ? "removable" : "fixed",
                      (sa->sa_periph->periph_opcs != NULL)
                        ? " timeout-info" : "");

        return (UNCONF);
}

static const struct scsi_quirk_inquiry_pattern scsi_quirk_patterns[] = {
        {{T_DIRECT, T_REMOV,
         "Apple   ", "iPod            ", ""},          PQUIRK_START},
        {{T_CDROM, T_REMOV,
         "CHINON  ", "CD-ROM CDS-431  ", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "CHINON  ", "CD-ROM CDS-435  ", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "Chinon  ", "CD-ROM CDS-525  ", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "CHINON  ", "CD-ROM CDS-535  ", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "DEC     ", "RRD42   (C) DEC ", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "DENON   ", "DRD-25X         ", "V"},    PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "GENERIC ", "CRD-BP2         ", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "HP      ", "C4324/C4325     ", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "IMS     ", "CDD521/10       ", "2.06"}, PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "MATSHITA", "CD-ROM CR-5XX   ", "1.0b"}, PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "MEDAVIS ", "RENO CD-ROMX2A  ", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "MEDIAVIS", "CDR-H93MV       ", "1.3"},  PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "NEC     ", "CD-ROM DRIVE:502", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "NEC     ", "CD-ROM DRIVE:55 ", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "NEC     ", "CD-ROM DRIVE:83 ", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "NEC     ", "CD-ROM DRIVE:84 ", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "NEC     ", "CD-ROM DRIVE:841", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "OLYMPUS ", "CDS620E         ", "1.1d"},
                               PQUIRK_NOLUNS|PQUIRK_NOSYNC|PQUIRK_NOCAPACITY},
        {{T_CDROM, T_REMOV,
         "PIONEER ", "CD-ROM DR-124X  ", "1.01"}, PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "PLEXTOR ", "CD-ROM PX-4XCS  ", "1.01"},
                               PQUIRK_NOLUNS|PQUIRK_NOSYNC},
        {{T_CDROM, T_REMOV,
         "SONY    ", "CD-ROM CDU-541  ", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "SONY    ", "CD-ROM CDU-55S  ", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "SONY    ", "CD-ROM CDU-561  ", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "SONY    ", "CD-ROM CDU-76S", ""},
                                PQUIRK_NOLUNS|PQUIRK_NOSYNC|PQUIRK_NOWIDE},
        {{T_CDROM, T_REMOV,
         "SONY    ", "CD-ROM CDU-8003A", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "SONY    ", "CD-ROM CDU-8012 ", ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "TEAC    ", "CD-ROM          ", "1.06"}, PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "TEAC    ", "CD-ROM CD-56S   ", "1.0B"}, PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "TEXEL   ", "CD-ROM          ", "1.06"}, PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "TEXEL   ", "CD-ROM DM-XX24 K", "1.09"}, PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "TEXEL   ", "CD-ROM DM-XX24 K", "1.10"}, PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "TOSHIBA ", "XM-4101TASUNSLCD", ""}, PQUIRK_NOLUNS|PQUIRK_NOSYNC},
        /* "IBM CDRM00201     !F" 0724 is an IBM OEM Toshiba XM-4101BME */
        {{T_CDROM, T_REMOV,
         "IBM     ", "CDRM00201     !F", "0724"}, PQUIRK_NOLUNS|PQUIRK_NOSYNC},
        {{T_CDROM, T_REMOV,
         "ShinaKen", "CD-ROM DM-3x1S",   "1.04"}, PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "JVC     ", "R2626",            ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "YAMAHA", "CRW8424S",           ""},     PQUIRK_NOLUNS},
        {{T_CDROM, T_REMOV,
         "NEC     ", "CD-ROM DRIVE:222", ""},          PQUIRK_NOLUNS|PQUIRK_NOSYNC},

        {{T_DIRECT, T_FIXED,
         "MICROP  ", "1588-15MBSUN0669", ""},     PQUIRK_AUTOSAVE},
        {{T_DIRECT, T_FIXED,
         "MICROP  ", "2217-15MQ1091501", ""},     PQUIRK_NOSYNCCACHE},
        {{T_OPTICAL, T_REMOV,
         "EPSON   ", "OMD-5010        ", "3.08"}, PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "ADAPTEC ", "AEC-4412BD",       "1.2A"}, PQUIRK_NOMODESENSE},
        {{T_DIRECT, T_FIXED,
         "ADAPTEC ", "ACB-4000",         ""},     PQUIRK_FORCELUNS|PQUIRK_AUTOSAVE|PQUIRK_NOMODESENSE},
        {{T_DIRECT, T_FIXED,
         "DEC     ", "RZ55     (C) DEC", ""},     PQUIRK_AUTOSAVE},
        {{T_DIRECT, T_FIXED,
         "EMULEX  ", "MD21/S2     ESDI", "A00"},
                                PQUIRK_FORCELUNS|PQUIRK_AUTOSAVE},
        {{T_DIRECT, T_FIXED,
         "MICROP",  "1548-15MZ1077801",  "HZ2P"}, PQUIRK_NOTAG},
        {{T_DIRECT, T_FIXED,
         "HP      ", "C372",             ""},     PQUIRK_NOTAG},
        {{T_DIRECT, T_FIXED,
         "IBMRAID ", "0662S",                 ""},     PQUIRK_AUTOSAVE},
        {{T_DIRECT, T_FIXED,
         "IBM     ", "0663H",                 ""},     PQUIRK_AUTOSAVE},
        {{T_DIRECT, T_FIXED,
         "IBM",             "0664",                 ""},     PQUIRK_AUTOSAVE},
        {{T_DIRECT, T_FIXED,
        /* improperly report DT-only sync mode */
         "IBM     ", "DXHS36D",                 ""},
                                PQUIRK_CAP_SYNC|PQUIRK_CAP_WIDE16},
        {{T_DIRECT, T_FIXED,
         "IBM     ", "DXHS18Y",                 ""},
                                PQUIRK_CAP_SYNC|PQUIRK_CAP_WIDE16},
        {{T_DIRECT, T_FIXED,
         "IBM     ", "H3171-S2",         ""},
                                PQUIRK_NOLUNS|PQUIRK_AUTOSAVE},
        {{T_DIRECT, T_FIXED,
         "IBM     ", "KZ-C",                 ""},          PQUIRK_AUTOSAVE},
        /* Broken IBM disk */
        {{T_DIRECT, T_FIXED,
         ""           , "DFRSS2F",                 ""},          PQUIRK_AUTOSAVE},
        {{T_DIRECT, T_FIXED,
         "Initio  ", "",                 ""},          PQUIRK_NOBIGMODESENSE},
        {{T_DIRECT, T_FIXED,
         "JMicron ", "Generic         ", ""},          PQUIRK_NOFUA},
        {{T_DIRECT, T_REMOV,
         "MPL     ", "MC-DISK-        ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "MAXTOR  ", "XT-3280         ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "MAXTOR  ", "XT-4380S        ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "MAXTOR  ", "MXT-1240S       ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "MAXTOR  ", "XT-4170S        ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "MAXTOR  ", "XT-8760S",         ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "MAXTOR  ", "LXT-213S        ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "MAXTOR  ", "LXT-213S SUN0207", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "MAXTOR  ", "LXT-200S        ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "MEGADRV ", "EV1000",           ""},     PQUIRK_NOMODESENSE},
        {{T_DIRECT, T_FIXED,
         "MICROP", "1991-27MZ",          ""},     PQUIRK_NOTAG},
        {{T_DIRECT, T_FIXED,
         "MST     ", "SnapLink        ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "NEC     ", "D3847           ", "0307"}, PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "QUANTUM ", "ELS85S          ", ""},     PQUIRK_AUTOSAVE},
        {{T_DIRECT, T_FIXED,
         "QUANTUM ", "LPS525S         ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "QUANTUM ", "P105S 910-10-94x", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "QUANTUM ", "PD1225S         ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "QUANTUM ", "PD210S   SUN0207", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "QUANTUM ", "ATLAS IV 9 WLS", "0A0A"},   PQUIRK_CAP_NODT},
        {{T_DIRECT, T_FIXED,
         "RODIME  ", "RO3000S         ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "SEAGATE ", "ST125N          ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "SEAGATE ", "ST157N          ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "SEAGATE ", "ST296           ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "SEAGATE ", "ST296N          ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "SEAGATE ", "ST318404LC      ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "SEAGATE ", "ST336753LC      ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "SEAGATE ", "ST336753LW      ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "SEAGATE ", "ST336754LC      ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "SEAGATE ", "ST39236LC       ", ""},     PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "SEAGATE ", "ST15150N        ", ""},     PQUIRK_NOTAG},
        {{T_DIRECT, T_FIXED,
         "SEAGATE ", "ST19171",          ""},     PQUIRK_NOMODESENSE},
        {{T_DIRECT, T_FIXED,
         "SEAGATE ", "ST32430N",         ""},     PQUIRK_CAP_SYNC},
        {{T_DIRECT, T_FIXED,
         "SEAGATE ", "ST34501FC       ", ""},     PQUIRK_NOMODESENSE},
        {{T_DIRECT, T_FIXED,
         "SEAGATE ", "SX910800N",        ""},     PQUIRK_NOTAG},
        {{T_DIRECT, T_FIXED,
         "TOSHIBA ", "MK538FB         ", "6027"}, PQUIRK_NOLUNS},
        {{T_DIRECT, T_FIXED,
         "MICROP  ", "1924",          ""},     PQUIRK_CAP_SYNC},
        {{T_DIRECT, T_FIXED,
         "FUJITSU ", "M2266",         ""},     PQUIRK_CAP_SYNC},
        {{T_DIRECT, T_FIXED,
         "FUJITSU ", "M2624S-512      ", ""},     PQUIRK_CAP_SYNC},
        {{T_DIRECT, T_FIXED,
         "SEAGATE ", "SX336704LC"   , ""}, PQUIRK_CAP_SYNC | PQUIRK_CAP_WIDE16},
        {{T_DIRECT, T_FIXED,
         "SEAGATE ", "SX173404LC",       ""},     PQUIRK_CAP_SYNC | PQUIRK_CAP_WIDE16},

        {{T_DIRECT, T_REMOV,
         "IOMEGA", "ZIP 100",                 "J.03"}, PQUIRK_NOLUNS|PQUIRK_NOSYNC},
        {{T_DIRECT, T_REMOV,
         "INSITE", "I325VM",             ""},     PQUIRK_NOLUNS},

        /* XXX: QIC-36 tape behind Emulex adapter.  Very broken. */
        {{T_SEQUENTIAL, T_REMOV,
         "        ", "                ", "    "}, PQUIRK_NOLUNS},
        {{T_SEQUENTIAL, T_REMOV,
         "EMULEX  ", "MT-02 QIC       ", ""},     PQUIRK_NOLUNS},
        {{T_SEQUENTIAL, T_REMOV,
         "CALIPER ", "CP150           ", ""},     PQUIRK_NOLUNS},
        {{T_SEQUENTIAL, T_REMOV,
         "EXABYTE ", "EXB-8200        ", ""},     PQUIRK_NOLUNS},
        {{T_SEQUENTIAL, T_REMOV,
         "SONY    ", "GY-10C          ", ""},     PQUIRK_NOLUNS},
        {{T_SEQUENTIAL, T_REMOV,
         "SONY    ", "SDT-2000        ", "2.09"}, PQUIRK_NOLUNS},
        {{T_SEQUENTIAL, T_REMOV,
         "SONY    ", "SDT-5000        ", "3."},   PQUIRK_NOSYNC|PQUIRK_NOWIDE},
        {{T_SEQUENTIAL, T_REMOV,
         "SONY    ", "SDT-5200        ", "3."},   PQUIRK_NOLUNS},
        {{T_SEQUENTIAL, T_REMOV,
         "TANDBERG", " TDC 3600       ", ""},     PQUIRK_NOLUNS},
        /* Following entry reported as a Tandberg 3600; ref. PR1933 */
        {{T_SEQUENTIAL, T_REMOV,
         "ARCHIVE ", "VIPER 150  21247", ""},     PQUIRK_NOLUNS},
        /* Following entry for a Cipher ST150S; ref. PR4171 */
        {{T_SEQUENTIAL, T_REMOV,
         "ARCHIVE ", "VIPER 1500 21247", "2.2G"}, PQUIRK_NOLUNS},
        {{T_SEQUENTIAL, T_REMOV,
         "ARCHIVE ", "Python 28454-XXX", ""},     PQUIRK_NOLUNS},
        {{T_SEQUENTIAL, T_REMOV,
         "WANGTEK ", "5099ES SCSI",      ""},     PQUIRK_NOLUNS},
        {{T_SEQUENTIAL, T_REMOV,
         "WANGTEK ", "5150ES SCSI",      ""},     PQUIRK_NOLUNS},
        {{T_SEQUENTIAL, T_REMOV,
         "WANGTEK ", "SCSI-36",                 ""},     PQUIRK_NOLUNS},
        {{T_SEQUENTIAL, T_REMOV,
         "WangDAT ", "Model 1300      ", "02.4"}, PQUIRK_NOSYNC|PQUIRK_NOWIDE},
        {{T_SEQUENTIAL, T_REMOV,
         "WangDAT ", "Model 2600      ", "01.7"}, PQUIRK_NOSYNC|PQUIRK_NOWIDE},
        {{T_SEQUENTIAL, T_REMOV,
         "WangDAT ", "Model 3200      ", "02.2"}, PQUIRK_NOSYNC|PQUIRK_NOWIDE},
        {{T_SEQUENTIAL, T_REMOV,
         "TEAC    ", "MT-2ST/N50      ", ""},     PQUIRK_NOLUNS},

        {{T_SCANNER, T_FIXED,
         "RICOH   ", "IS60            ", "1R08"}, PQUIRK_NOLUNS},
        {{T_SCANNER, T_FIXED,
         "UMAX    ", "Astra 1200S     ", "V2.9"}, PQUIRK_NOLUNS},
        {{T_SCANNER, T_FIXED,
         "UMAX    ", "Astra 1220S     ", ""},     PQUIRK_NOLUNS},
        {{T_SCANNER, T_FIXED,
         "UMAX    ", "UMAX S-6E       ", "V2.0"}, PQUIRK_NOLUNS},
        {{T_SCANNER, T_FIXED,
         "UMAX    ", "UMAX S-12       ", "V2.1"}, PQUIRK_NOLUNS},
        {{T_SCANNER, T_FIXED,
         "ULTIMA  ", "A6000C          ", ""},     PQUIRK_NOLUNS},
        {{T_PROCESSOR, T_FIXED,
         "ESG-SHV",  "SCA HSBP M15",     ""},     PQUIRK_NOLUNS},
        {{T_PROCESSOR, T_FIXED,
         "SYMBIOS",  "",                 ""},     PQUIRK_NOLUNS},
        {{T_PROCESSOR, T_FIXED,
         "LITRONIC", "PCMCIA          ", ""},     PQUIRK_NOLUNS},
        {{T_CHANGER, T_REMOV,
         "SONY    ", "CDL1100         ", ""},     PQUIRK_NOLUNS},
        {{T_ENCLOSURE, T_FIXED,
         "SUN     ", "SENA            ", ""},     PQUIRK_NOLUNS},
};

/*
 * given a target and lun, ask the device what
 * it is, and find the correct driver table
 * entry.
 */
static int
scsi_probe_device(struct scsibus_softc *sc, int target, int lun)
{
        struct scsipi_channel *chan = sc->sc_channel;
        struct scsipi_periph *periph;
        struct scsipi_inquiry_data inqbuf;
        const struct scsi_quirk_inquiry_pattern *finger;
        int checkdtype, priority, docontinue, quirks;
        struct scsipibus_attach_args sa;
        cfdata_t cf;
        int locs[SCSIBUSCF_NLOCS];

        /*
         * Assume no more LUNs to search after this one.
         * If we successfully get Inquiry data and after
         * merging quirks we find we can probe for more
         * LUNs, we will.
         */
        docontinue = 0;

        /* Skip this slot if it is already attached. */
        if (scsipi_lookup_periph(chan, target, lun) != NULL)
                return (docontinue);

        periph = scsipi_alloc_periph(M_WAITOK);
        periph->periph_channel = chan;
        periph->periph_switch = &scsi_probe_dev;

        periph->periph_target = target;
        periph->periph_lun = lun;
        periph->periph_quirks = chan->chan_defquirks;

#ifdef SCSIPI_DEBUG
        if (SCSIPI_DEBUG_TYPE == SCSIPI_BUSTYPE_SCSI &&
            SCSIPI_DEBUG_TARGET == target &&
            SCSIPI_DEBUG_LUN == lun)
                periph->periph_dbflags |= SCSIPI_DEBUG_FLAGS;
#endif

        /*
         * Ask the device what it is
         */

#ifdef SCSI_2_DEF
        /* some devices need to be told to go to SCSI2 */
        /* However some just explode if you tell them this.. leave it out */
        scsi_change_def(periph, XS_CTL_DISCOVERY | XS_CTL_SILENT);
#endif /* SCSI_2_DEF */

        /* Now go ask the device all about itself. */
        memset(&inqbuf, 0, sizeof(inqbuf));
        {
                u_int8_t *extension = &inqbuf.flags1;
                int len = 0;
                while (len < 3)
                        extension[len++] = '\0';
                while (len < 3 + 28)
                        extension[len++] = ' ';
                while (len < 3 + 28 + 20)
                        extension[len++] = '\0';
                while (len < 3 + 28 + 20 + 1)
                        extension[len++] = '\0';
                while (len < 3 + 28 + 20 + 1 + 1)
                        extension[len++] = '\0';
                while (len < 3 + 28 + 20 + 1 + 1 + (8*2))
                        extension[len++] = ' ';
        }
        if (scsipi_inquire(periph, &inqbuf, XS_CTL_DISCOVERY | XS_CTL_SILENT))
                goto bad;

        periph->periph_type = inqbuf.device & SID_TYPE;
        if (inqbuf.dev_qual2 & SID_REMOVABLE)
                periph->periph_flags |= PERIPH_REMOVABLE;
        periph->periph_version = inqbuf.version & SID_ANSII;

        /*
         * Any device qualifier that has the top bit set (qualifier&4 != 0)
         * is vendor specific and won't match in this switch.
         * All we do here is throw out bad/negative responses.
         */
        checkdtype = 0;
        switch (inqbuf.device & SID_QUAL) {
        case SID_QUAL_LU_PRESENT:
                checkdtype = 1;
                break;

        case SID_QUAL_LU_NOTPRESENT:
        case SID_QUAL_reserved:
        case SID_QUAL_LU_NOT_SUPP:
                goto bad;

        default:
                break;
        }

        /* Let the adapter driver handle the device separately if it wants. */
        if (chan->chan_adapter->adapt_accesschk != NULL &&
            (*chan->chan_adapter->adapt_accesschk)(periph, &sa.sa_inqbuf))
                goto bad;

        if (checkdtype) {
                switch (periph->periph_type) {
                case T_DIRECT:
                case T_SEQUENTIAL:
                case T_PRINTER:
                case T_PROCESSOR:
                case T_WORM:
                case T_CDROM:
                case T_SCANNER:
                case T_OPTICAL:
                case T_CHANGER:
                case T_COMM:
                case T_IT8_1:
                case T_IT8_2:
                case T_STORARRAY:
                case T_ENCLOSURE:
                case T_SIMPLE_DIRECT:
                case T_OPTIC_CARD_RW:
                case T_OBJECT_STORED:
                default:
                        break;
                case T_NODEVICE:
                        goto bad;
                }
        }

        sa.sa_periph = periph;
        sa.sa_inqbuf.type = inqbuf.device;
        sa.sa_inqbuf.removable = inqbuf.dev_qual2 & SID_REMOVABLE ?
            T_REMOV : T_FIXED;
        sa.sa_inqbuf.vendor = inqbuf.vendor;
        sa.sa_inqbuf.product = inqbuf.product;
        sa.sa_inqbuf.revision = inqbuf.revision;
        sa.scsipi_info.scsi_version = inqbuf.version;
        sa.sa_inqptr = &inqbuf;

        finger = scsipi_inqmatch(
            &sa.sa_inqbuf, scsi_quirk_patterns,
            sizeof(scsi_quirk_patterns)/sizeof(scsi_quirk_patterns[0]),
            sizeof(scsi_quirk_patterns[0]), &priority);

        if (finger != NULL)
                quirks = finger->quirks;
        else
                quirks = 0;

        /*
         * Determine the operating mode capabilities of the device.
         */
        if (periph->periph_version >= 2) {
                if ((inqbuf.flags3 & SID_CmdQue) != 0 &&
                    (quirks & PQUIRK_NOTAG) == 0)
                        periph->periph_cap |= PERIPH_CAP_TQING;
                if ((inqbuf.flags3 & SID_Linked) != 0)
                        periph->periph_cap |= PERIPH_CAP_LINKCMDS;
                if ((inqbuf.flags3 & SID_Sync) != 0 &&
                    (quirks & PQUIRK_NOSYNC) == 0)
                        periph->periph_cap |= PERIPH_CAP_SYNC;
                if ((inqbuf.flags3 & SID_WBus16) != 0 &&
                    (quirks & PQUIRK_NOWIDE) == 0)
                        periph->periph_cap |= PERIPH_CAP_WIDE16;
                if ((inqbuf.flags3 & SID_WBus32) != 0 &&
                    (quirks & PQUIRK_NOWIDE) == 0)
                        periph->periph_cap |= PERIPH_CAP_WIDE32;
                if ((inqbuf.flags3 & SID_SftRe) != 0)
                        periph->periph_cap |= PERIPH_CAP_SFTRESET;
                if ((inqbuf.flags3 & SID_RelAdr) != 0)
                        periph->periph_cap |= PERIPH_CAP_RELADR;
                /* SPC-2 */
                if (periph->periph_version >= 3 &&
                    !(quirks & PQUIRK_CAP_NODT)){
                        /*
                         * Report ST clocking though CAP_WIDExx/CAP_SYNC.
                         * If the device only supports DT, clear these
                         * flags (DT implies SYNC and WIDE)
                         */
                        switch (inqbuf.flags4 & SID_Clocking) {
                        case SID_CLOCKING_DT_ONLY:
                                periph->periph_cap &=
                                    ~(PERIPH_CAP_SYNC |
                                      PERIPH_CAP_WIDE16 |
                                      PERIPH_CAP_WIDE32);
                                /* FALLTHROUGH */
                        case SID_CLOCKING_SD_DT:
                                periph->periph_cap |= PERIPH_CAP_DT;
                                break;
                        default: /* ST only or invalid */
                                /* nothing to do */
                                break;
                        }
                }
                if (periph->periph_version >= 3) {
                        if (inqbuf.flags4 & SID_IUS)
                                periph->periph_cap |= PERIPH_CAP_IUS;
                        if (inqbuf.flags4 & SID_QAS)
                                periph->periph_cap |= PERIPH_CAP_QAS;
                }
        }
        if (quirks & PQUIRK_CAP_SYNC)
                periph->periph_cap |= PERIPH_CAP_SYNC;
        if (quirks & PQUIRK_CAP_WIDE16)
                periph->periph_cap |= PERIPH_CAP_WIDE16;

        /*
         * Now apply any quirks from the table.
         */
        periph->periph_quirks |= quirks;
        if (periph->periph_version == 0 &&
            (periph->periph_quirks & PQUIRK_FORCELUNS) == 0)
                periph->periph_quirks |= PQUIRK_NOLUNS;

        if ((periph->periph_quirks & PQUIRK_NOLUNS) == 0)
                docontinue = 1;

        locs[SCSIBUSCF_TARGET] = target;
        locs[SCSIBUSCF_LUN] = lun;

        KERNEL_LOCK(1, NULL);
        if ((cf = config_search(sc->sc_dev, &sa,
                                CFARGS(.submatch = config_stdsubmatch,
                                       .locators = locs))) != NULL) {
                scsipi_insert_periph(chan, periph);

                /*
                 * Determine supported opcodes and timeouts if available.
                 * Only do this on peripherals reporting SCSI version 3
                 * or greater - this command isn't in the SCSI-2 spec. and
                 * it causes either timeouts or peripherals disappearing
                 * when sent to some SCSI-1 or SCSI-2 peripherals.
                 */
                if (periph->periph_version >= 3)
                        scsipi_get_opcodeinfo(periph);

                /*
                 * XXX Can't assign periph_dev here, because we'll
                 * XXX need it before config_attach() returns.  Must
                 * XXX assign it in periph driver.
                 */
                config_attach(sc->sc_dev, cf, &sa, scsibusprint,
                    CFARGS(.locators = locs));
                KERNEL_UNLOCK_ONE(NULL);
        } else {
                scsibusprint(&sa, device_xname(sc->sc_dev));
                aprint_normal(" not configured\n");
                KERNEL_UNLOCK_ONE(NULL);
                goto bad;
        }

        return (docontinue);

bad:
        scsipi_free_periph(periph);
        return (docontinue);
}

/****** Entry points for user control of the SCSI bus. ******/

static int
scsibusopen(dev_t dev, int flag, int fmt,
    struct lwp *l)
{
        struct scsibus_softc *sc;
        int error, unit = minor(dev);

        sc = device_lookup_private(&scsibus_cd, unit);
        if (sc == NULL)
                return (ENXIO);

        if (sc->sc_flags & SCSIBUSF_OPEN)
                return (EBUSY);

        if ((error = scsipi_adapter_addref(sc->sc_channel->chan_adapter)) != 0)
                return (error);

        sc->sc_flags |= SCSIBUSF_OPEN;

        return (0);
}

static int
scsibusclose(dev_t dev, int flag, int fmt,
    struct lwp *l)
{
        struct scsibus_softc *sc;

        sc = device_lookup_private(&scsibus_cd, minor(dev));
        scsipi_adapter_delref(sc->sc_channel->chan_adapter);

        sc->sc_flags &= ~SCSIBUSF_OPEN;

        return (0);
}

static int
scsibusioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
{
        struct scsibus_softc *sc;
        struct scsipi_channel *chan;
        int error;

        sc = device_lookup_private(&scsibus_cd, minor(dev));
        chan = sc->sc_channel;

        /*
         * Enforce write permission for ioctls that change the
         * state of the bus.  Host adapter specific ioctls must
         * be checked by the adapter driver.
         */
        switch (cmd) {
        case SCBUSIOSCAN:
        case SCBUSIODETACH:
        case SCBUSIORESET:
                if ((flag & FWRITE) == 0)
                        return (EBADF);
        }

        switch (cmd) {
        case SCBUSIOSCAN:
            {
                struct scbusioscan_args *a =
                    (struct scbusioscan_args *)addr;

                error = scsi_probe_bus(sc, a->sa_target, a->sa_lun);
                break;
            }

        case SCBUSIODETACH:
            {
                struct scbusiodetach_args *a =
                    (struct scbusiodetach_args *)addr;

                error = scsipi_target_detach(chan, a->sa_target, a->sa_lun, 0);
                break;
            }


        case SCBUSIORESET:
                /* FALLTHROUGH */
        default:
                error = scsipi_adapter_ioctl(chan, cmd, addr, flag, l->l_proc);
                break;
        }

        return (error);
}


























































    6 











    5 


    6 





    6 





    6 
    6 











    4 





    6 




    1 













    4 















    4 

    4 




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/*        $NetBSD: uvm_io.c,v 1.29 2020/09/21 18:41:59 chs Exp $        */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * from: Id: uvm_io.c,v 1.1.2.2 1997/12/30 12:02:00 mrg Exp
 */

/*
 * uvm_io.c: uvm i/o ops
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_io.c,v 1.29 2020/09/21 18:41:59 chs Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mman.h>
#include <sys/uio.h>

#include <uvm/uvm.h>

/*
 * functions
 */

/*
 * uvm_io: perform I/O on a map
 *
 * => caller must have a reference to "map" so that it doesn't go away
 *    while we are working.
 */

int
uvm_io(struct vm_map *map, struct uio *uio, int flags)
{
        vaddr_t baseva, endva, pageoffset, kva;
        vsize_t chunksz, togo, sz;
        struct vm_map_entry *dead_entries;
        int error;

        /*
         * step 0: sanity checks and set up for copy loop.  start with a
         * large chunk size.  if we have trouble finding vm space we will
         * reduce it.
         */

        if (uio->uio_resid == 0)
                return(0);
        togo = uio->uio_resid;

        baseva = (vaddr_t) uio->uio_offset;
        endva = baseva + (togo - 1);

        if (endva < baseva)   /* wrap around? */
                return(EIO);

        if (baseva >= VM_MAXUSER_ADDRESS)
                return(0);
        if (endva >= VM_MAXUSER_ADDRESS)
                /* EOF truncate */
                togo = togo - (endva - VM_MAXUSER_ADDRESS + 1);
        pageoffset = baseva & PAGE_MASK;
        baseva = trunc_page(baseva);
        chunksz = MIN(round_page(togo + pageoffset), trunc_page(MAXPHYS));
        error = 0;

        flags |= UVM_EXTRACT_QREF | UVM_EXTRACT_CONTIG | UVM_EXTRACT_FIXPROT;

        /* XXX cannot use QREF with without AMAP_REFALL, and REFALL is unsafe */
        flags &= ~UVM_EXTRACT_QREF;

        /*
         * step 1: main loop...  while we've got data to move
         */

        for (/*null*/; togo > 0 ; pageoffset = 0) {

                /*
                 * step 2: extract mappings from the map into kernel_map
                 */

                error = uvm_map_extract(map, baseva, chunksz, kernel_map, &kva,
                    flags);
                if (error) {

                        /* retry with a smaller chunk... */
                        if (error == ENOMEM && chunksz > PAGE_SIZE) {
                                chunksz = trunc_page(chunksz / 2);
                                if (chunksz < PAGE_SIZE)
                                        chunksz = PAGE_SIZE;
                                continue;
                        }

                        break;
                }

                /*
                 * step 3: move a chunk of data
                 */

                sz = chunksz - pageoffset;
                if (sz > togo)
                        sz = togo;
                error = uiomove((void *) (kva + pageoffset), sz, uio);
                togo -= sz;
                baseva += chunksz;

                /*
                 * step 4: unmap the area of kernel memory
                 */

                vm_map_lock(kernel_map);
                uvm_unmap_remove(kernel_map, kva, kva + chunksz, &dead_entries,
                   0);
                vm_map_unlock(kernel_map);
                if (dead_entries != NULL)
                        uvm_unmap_detach(dead_entries, AMAP_REFALL);

                if (error)
                        break;
        }
        return (error);
}






























































































   14 






























    5 






    3 
    4 














    5 






    3 
    4 













   10 






    8 
    7 





























   17 
   20 






   19 
















    7 
















    8 


    8 

    6 


















    9 



















































   71 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
/*        $NetBSD: vfs_syscalls_50.c,v 1.26 2021/08/15 07:57:46 christos Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls_50.c,v 1.26 2021/08/15 07:57:46 christos Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#include "opt_quota.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/socketvar.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/uio.h>
#include <sys/dirent.h>
#include <sys/kauth.h>
#include <sys/time.h>
#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>
#include <sys/vfs_syscalls.h>
#ifndef LFS
#define LFS
#endif
#include <sys/syscallargs.h>

#include <ufs/lfs/lfs_extern.h>

#include <compat/common/compat_util.h>
#include <compat/common/compat_mod.h>
#include <compat/sys/time.h>
#include <compat/sys/stat.h>
#include <compat/sys/dirent.h>
#include <compat/sys/mount.h>

static const struct syscall_package vfs_syscalls_50_syscalls[] = {
        { SYS_compat_50___stat30, 0, (sy_call_t *)compat_50_sys___stat30 },
        { SYS_compat_50___fstat30, 0, (sy_call_t *)compat_50_sys___fstat30 },
        { SYS_compat_50___lstat30, 0, (sy_call_t *)compat_50_sys___lstat30 },
        { SYS_compat_50___fhstat40, 0, (sy_call_t *)compat_50_sys___fhstat40 },
        { SYS_compat_50_utimes, 0, (sy_call_t *)compat_50_sys_utimes },
        { SYS_compat_50_lfs_segwait, 0,
            (sy_call_t *)compat_50_sys_lfs_segwait } ,
        { SYS_compat_50_futimes, 0, (sy_call_t *)compat_50_sys_futimes },
        { SYS_compat_50_lutimes, 0, (sy_call_t *)compat_50_sys_lutimes },
        { SYS_compat_50_mknod, 0, (sy_call_t *)compat_50_sys_mknod },
        { 0, 0, NULL }
};

/*
 * Convert from a new to an old stat structure.
 */
static void
cvtstat(struct stat30 *ost, const struct stat *st)
{

        /* Handle any padding. */
        memset(ost, 0, sizeof(*ost));
        ost->st_dev = st->st_dev;
        ost->st_ino = st->st_ino;
        ost->st_mode = st->st_mode;
        ost->st_nlink = st->st_nlink;
        ost->st_uid = st->st_uid;
        ost->st_gid = st->st_gid;
        ost->st_rdev = st->st_rdev;
        timespec_to_timespec50(&st->st_atimespec, &ost->st_atimespec);
        timespec_to_timespec50(&st->st_mtimespec, &ost->st_mtimespec);
        timespec_to_timespec50(&st->st_ctimespec, &ost->st_ctimespec);
        timespec_to_timespec50(&st->st_birthtimespec, &ost->st_birthtimespec);
        ost->st_size = st->st_size;
        ost->st_blocks = st->st_blocks;
        ost->st_blksize = st->st_blksize;
        ost->st_flags = st->st_flags;
        ost->st_gen = st->st_gen;
        memset(ost->st_spare, 0, sizeof(ost->st_spare));
}

/*
 * Get file status; this version follows links.
 */
/* ARGSUSED */
int
compat_50_sys___stat30(struct lwp *l, const struct compat_50_sys___stat30_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(struct stat30 *) ub;
        } */
        struct stat sb;
        struct stat30 osb;
        int error;

        error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb);
        if (error)
                return error;
        cvtstat(&osb, &sb);
        return copyout(&osb, SCARG(uap, ub), sizeof(osb));
}


/*
 * Get file status; this version does not follow links.
 */
/* ARGSUSED */
int
compat_50_sys___lstat30(struct lwp *l, const struct compat_50_sys___lstat30_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(struct stat30 *) ub;
        } */
        struct stat sb;
        struct stat30 osb;
        int error;

        error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb);
        if (error)
                return error;
        cvtstat(&osb, &sb);
        return copyout(&osb, SCARG(uap, ub), sizeof(osb));
}

/*
 * Return status information about a file descriptor.
 */
/* ARGSUSED */
int
compat_50_sys___fstat30(struct lwp *l, const struct compat_50_sys___fstat30_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(struct stat30 *) sb;
        } */
        struct stat sb;
        struct stat30 osb;
        int error;

        error = do_sys_fstat(SCARG(uap, fd), &sb);
        if (error)
                return error;
        cvtstat(&osb, &sb);
        return copyout(&osb, SCARG(uap, sb), sizeof(osb));
}

/* ARGSUSED */
int
compat_50_sys___fhstat40(struct lwp *l, const struct compat_50_sys___fhstat40_args *uap, register_t *retval)
{
        /* {
                syscallarg(const void *) fhp;
                syscallarg(size_t) fh_size;
                syscallarg(struct stat30 *) sb;
        } */
        struct stat sb;
        struct stat30 osb;
        int error;

        error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb);
        if (error)
                return error;
        cvtstat(&osb, &sb);
        return copyout(&osb, SCARG(uap, sb), sizeof(osb));
}

static int
compat_50_do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path,
    int flag, const struct timeval50 *tptr)
{
        struct timeval tv[2], *tvp;
        struct timeval50 tv50[2];
        if (tptr) {
                int error = copyin(tptr, tv50, sizeof(tv50));
                if (error)
                        return error;
                timeval50_to_timeval(&tv50[0], &tv[0]);
                timeval50_to_timeval(&tv50[1], &tv[1]);
                tvp = tv;
        } else
                tvp = NULL;
        return do_sys_utimes(l, vp, path, flag, tvp, UIO_SYSSPACE);
}
    
/*
 * Set the access and modification times given a path name; this
 * version follows links.
 */
/* ARGSUSED */
int
compat_50_sys_utimes(struct lwp *l, const struct compat_50_sys_utimes_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(const struct timeval50 *) tptr;
        } */

        return compat_50_do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW,
            SCARG(uap, tptr));
}

/*
 * Set the access and modification times given a file descriptor.
 */
/* ARGSUSED */
int
compat_50_sys_futimes(struct lwp *l,
    const struct compat_50_sys_futimes_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const struct timeval50 *) tptr;
        } */
        int error;
        struct file *fp;

        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
                return error;
        error = compat_50_do_sys_utimes(l, fp->f_vnode, NULL, 0,
            SCARG(uap, tptr));
        fd_putfile(SCARG(uap, fd));
        return error;
}

/*
 * Set the access and modification times given a path name; this
 * version does not follow links.
 */
int
compat_50_sys_lutimes(struct lwp *l,
    const struct compat_50_sys_lutimes_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(const struct timeval50 *) tptr;
        } */

        return compat_50_do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW,
            SCARG(uap, tptr));
}

int
compat_50_sys_lfs_segwait(struct lwp *l,
    const struct compat_50_sys_lfs_segwait_args *uap, register_t *retval)
{
        /* {
                syscallarg(fsid_t *) fsidp;
                syscallarg(struct timeval50 *) tv;
        } */
#ifdef notyet
/* XXX need to check presence of LFS at run-time XXX */
        struct timeval atv;
        struct timeval50 atv50;
        fsid_t fsid;
        int error;

        /* XXX need we be su to segwait? */
        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS,
            KAUTH_REQ_SYSTEM_LFS_SEGWAIT, NULL, NULL, NULL);
        if (error)
                return (error);
        if ((error = copyin(SCARG(uap, fsidp), &fsid, sizeof(fsid_t))) != 0)
                return (error);

        if (SCARG(uap, tv)) {
                error = copyin(SCARG(uap, tv), &atv50, sizeof(atv50));
                if (error)
                        return (error);
                timeval50_to_timeval(&atv50, &atv);
                if (itimerfix(&atv))
                        return (EINVAL);
        } else /* NULL or invalid */
                atv.tv_sec = atv.tv_usec = 0;
        return lfs_segwait(&fsid, &atv);
#else
        return ENOSYS;
#endif
}

int
compat_50_sys_mknod(struct lwp *l,
    const struct compat_50_sys_mknod_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(mode_t) mode;
                syscallarg(uint32_t) dev;
        } */
        return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode),
            SCARG(uap, dev), UIO_USERSPACE);
}

int             
vfs_syscalls_50_init(void)
{               
        
        return syscall_establish(NULL, vfs_syscalls_50_syscalls);
}       
        
int
vfs_syscalls_50_fini(void)
{               

        return syscall_disestablish(NULL, vfs_syscalls_50_syscalls);
}
















































































































































































































    2 









    1 





    2 















    2 




    1 


    1 








   13 









   13 

    2 




    2 













    4 



    3 


    2 




    2 


    1 


    1 














    1 
























    5 































    4 




















    4 




    3 



    1 


































    1 




















    1 




    1 





    1 








    1 



    1 
    1 








    1 


    1 










    1 









    1 



    1 



    1 







    1 
































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
/*-
 * Copyright (c) 2009-2016 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This material is based upon work partially supported by The
 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * NPF main: dynamic load/initialisation and unload routines.
 */

#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_os.c,v 1.21 2021/01/27 17:39:13 christos Exp $");

#ifdef _KERNEL_OPT
#include "pf.h"
#if NPF > 0
#error "NPF and PF are mutually exclusive; please select one"
#endif
#endif

#include <sys/param.h>
#include <sys/types.h>

#include <sys/conf.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/lwp.h>
#include <sys/module.h>
#include <sys/pserialize.h>
#include <sys/socketvar.h>
#include <sys/uio.h>

#include <netinet/in.h>
#include <netinet6/in6_var.h>
#endif

#include "npf_impl.h"
#include "npfkern.h"

#ifdef _KERNEL
#ifndef _MODULE
#include "opt_modular.h"
#include "opt_net_mpsafe.h"
#endif
#include "ioconf.h"
#endif

/*
 * Module and device structures.
 */
#ifndef _MODULE
/*
 * Modular kernels load drivers too early, and we need percpu to be inited
 * So we make this misc; a better way would be to have early boot and late
 * boot drivers.
 */
MODULE(MODULE_CLASS_MISC, npf, "bpf");
#else
/* This module autoloads via /dev/npf so it needs to be a driver */
MODULE(MODULE_CLASS_DRIVER, npf, "bpf");
#endif

#define        NPF_IOCTL_DATA_LIMIT        (4 * 1024 * 1024)

static int        npf_pfil_register(bool);
static void        npf_pfil_unregister(bool);

static int        npf_dev_open(dev_t, int, int, lwp_t *);
static int        npf_dev_close(dev_t, int, int, lwp_t *);
static int        npf_dev_ioctl(dev_t, u_long, void *, int, lwp_t *);
static int        npf_dev_poll(dev_t, int, lwp_t *);
static int        npf_dev_read(dev_t, struct uio *, int);

const struct cdevsw npf_cdevsw = {
        .d_open = npf_dev_open,
        .d_close = npf_dev_close,
        .d_read = npf_dev_read,
        .d_write = nowrite,
        .d_ioctl = npf_dev_ioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = npf_dev_poll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER | D_MPSAFE
};

static const char *        npf_ifop_getname(npf_t *, ifnet_t *);
static ifnet_t *        npf_ifop_lookup(npf_t *, const char *);
static void                npf_ifop_flush(npf_t *, void *);
static void *                npf_ifop_getmeta(npf_t *, const ifnet_t *);
static void                npf_ifop_setmeta(npf_t *, ifnet_t *, void *);

static const unsigned        nworkers = 1;

static bool                pfil_registered = false;
static pfil_head_t *        npf_ph_if = NULL;
static pfil_head_t *        npf_ph_inet = NULL;
static pfil_head_t *        npf_ph_inet6 = NULL;

static const npf_ifops_t kern_ifops = {
        .getname        = npf_ifop_getname,
        .lookup                = npf_ifop_lookup,
        .flush                = npf_ifop_flush,
        .getmeta        = npf_ifop_getmeta,
        .setmeta        = npf_ifop_setmeta,
};

static int
npf_fini(void)
{
        npf_t *npf = npf_getkernctx();

        /* At first, detach device and remove pfil hooks. */
#ifdef _MODULE
        devsw_detach(NULL, &npf_cdevsw);
#endif
        npf_pfil_unregister(true);
        npfk_destroy(npf);
        npfk_sysfini();
        return 0;
}

static int
npf_init(void)
{
        npf_t *npf;
        int error = 0;

        error = npfk_sysinit(nworkers);
        if (error)
                return error;
        npf = npfk_create(0, NULL, &kern_ifops, NULL);
        npf_setkernctx(npf);
        npf_pfil_register(true);

#ifdef _MODULE
        devmajor_t bmajor = NODEVMAJOR, cmajor = NODEVMAJOR;

        /* Attach /dev/npf device. */
        error = devsw_attach("npf", NULL, &bmajor, &npf_cdevsw, &cmajor);
        if (error) {
                /* It will call devsw_detach(), which is safe. */
                (void)npf_fini();
        }
#endif
        return error;
}


/*
 * Module interface.
 */
static int
npf_modcmd(modcmd_t cmd, void *arg)
{
        switch (cmd) {
        case MODULE_CMD_INIT:
                return npf_init();
        case MODULE_CMD_FINI:
                return npf_fini();
        case MODULE_CMD_AUTOUNLOAD:
                if (npf_autounload_p()) {
                        return EBUSY;
                }
                break;
        default:
                return ENOTTY;
        }
        return 0;
}

void
npfattach(int nunits)
{
        /* Nothing */
}

static int
npf_dev_open(dev_t dev, int flag, int mode, lwp_t *l)
{
        /* Available only for super-user. */
        if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_FIREWALL,
            KAUTH_REQ_NETWORK_FIREWALL_FW, NULL, NULL, NULL)) {
                return EPERM;
        }
        return 0;
}

static int
npf_dev_close(dev_t dev, int flag, int mode, lwp_t *l)
{
        return 0;
}

static int
npf_stats_export(npf_t *npf, void *data)
{
        uint64_t *fullst, *uptr = *(uint64_t **)data;
        int error;

        fullst = kmem_alloc(NPF_STATS_SIZE, KM_SLEEP);
        npfk_stats(npf, fullst); /* will zero the buffer */
        error = copyout(fullst, uptr, NPF_STATS_SIZE);
        kmem_free(fullst, NPF_STATS_SIZE);
        return error;
}

/*
 * npfctl_switch: enable or disable packet inspection.
 */
static int
npfctl_switch(void *data)
{
        const bool onoff = *(int *)data ? true : false;
        int error;

        if (onoff) {
                /* Enable: add pfil hooks. */
                error = npf_pfil_register(false);
        } else {
                /* Disable: remove pfil hooks. */
                npf_pfil_unregister(false);
                error = 0;
        }
        return error;
}

static int
npf_dev_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
{
        npf_t *npf = npf_getkernctx();
        nvlist_t *req, *resp;
        int error;

        /* Available only for super-user. */
        if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_FIREWALL,
            KAUTH_REQ_NETWORK_FIREWALL_FW, NULL, NULL, NULL)) {
                return EPERM;
        }

        switch (cmd) {
        case IOC_NPF_VERSION:
                *(int *)data = NPF_VERSION;
                return 0;
        case IOC_NPF_SWITCH:
                return npfctl_switch(data);
        case IOC_NPF_TABLE:
                return npfctl_table(npf, data);
        case IOC_NPF_STATS:
                return npf_stats_export(npf, data);
        case IOC_NPF_LOAD:
        case IOC_NPF_SAVE:
        case IOC_NPF_RULE:
        case IOC_NPF_CONN_LOOKUP:
        case IOC_NPF_TABLE_REPLACE:
                /* nvlist_ref_t argument, handled below */
                break;
        default:
                return EINVAL;
        }

        error = nvlist_copyin(data, &req, NPF_IOCTL_DATA_LIMIT);
        if (__predict_false(error)) {
#ifdef __NetBSD__
                /* Until the version bump. */
                if (cmd != IOC_NPF_SAVE) {
                        return error;
                }
                req = nvlist_create(0);
#else
                return error;
#endif
        }
        resp = nvlist_create(0);

        if ((error = npfctl_run_op(npf, cmd, req, resp)) == 0) {
                error = nvlist_copyout(data, resp);
        }

        nvlist_destroy(resp);
        nvlist_destroy(req);

        return error;
}

static int
npf_dev_poll(dev_t dev, int events, lwp_t *l)
{
        return ENOTSUP;
}

static int
npf_dev_read(dev_t dev, struct uio *uio, int flag)
{
        return ENOTSUP;
}

bool
npf_autounload_p(void)
{
        if (npf_active_p())
                return false;

        npf_t *npf = npf_getkernctx();

        npf_config_enter(npf);
        bool pass = npf_default_pass(npf);
        npf_config_exit(npf);

        return pass;
}

/*
 * Interface operations.
 */

static const char *
npf_ifop_getname(npf_t *npf __unused, ifnet_t *ifp)
{
        return ifp->if_xname;
}

static ifnet_t *
npf_ifop_lookup(npf_t *npf __unused, const char *name)
{
        return ifunit(name);
}

static void
npf_ifop_flush(npf_t *npf __unused, void *arg)
{
        ifnet_t *ifp;

        KERNEL_LOCK(1, NULL);
        IFNET_GLOBAL_LOCK();
        IFNET_WRITER_FOREACH(ifp) {
                ifp->if_npf_private = arg;
        }
        IFNET_GLOBAL_UNLOCK();
        KERNEL_UNLOCK_ONE(NULL);
}

static void *
npf_ifop_getmeta(npf_t *npf __unused, const ifnet_t *ifp)
{
        return ifp->if_npf_private;
}

static void
npf_ifop_setmeta(npf_t *npf __unused, ifnet_t *ifp, void *arg)
{
        ifp->if_npf_private = arg;
}

#ifdef _KERNEL

/*
 * Wrapper of the main packet handler to pass the kernel NPF context.
 */
static int
npfos_packet_handler(void *arg, struct mbuf **mp, ifnet_t *ifp, int di)
{
        npf_t *npf = npf_getkernctx();
        return npfk_packet_handler(npf, mp, ifp, di);
}

/*
 * npf_ifhook: hook handling interface changes.
 */
static void
npf_ifhook(void *arg, unsigned long cmd, void *arg2)
{
        npf_t *npf = npf_getkernctx();
        ifnet_t *ifp = arg2;

        switch (cmd) {
        case PFIL_IFNET_ATTACH:
                npfk_ifmap_attach(npf, ifp);
                npf_ifaddr_sync(npf, ifp);
                break;
        case PFIL_IFNET_DETACH:
                npfk_ifmap_detach(npf, ifp);
                npf_ifaddr_flush(npf, ifp);
                break;
        }
}

static void
npf_ifaddrhook(void *arg, u_long cmd, void *arg2)
{
        npf_t *npf = npf_getkernctx();
        struct ifaddr *ifa = arg2;

        switch (cmd) {
        case SIOCSIFADDR:
        case SIOCAIFADDR:
        case SIOCDIFADDR:
#ifdef INET6
        case SIOCSIFADDR_IN6:
        case SIOCAIFADDR_IN6:
        case SIOCDIFADDR_IN6:
#endif
                KASSERT(ifa != NULL);
                break;
        default:
                return;
        }
        npf_ifaddr_sync(npf, ifa->ifa_ifp);
}

/*
 * npf_pfil_register: register pfil(9) hooks.
 */
static int
npf_pfil_register(bool init)
{
        npf_t *npf = npf_getkernctx();
        int error = 0;

        SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();

        /* Init: interface re-config and attach/detach hook. */
        if (!npf_ph_if) {
                npf_ph_if = pfil_head_get(PFIL_TYPE_IFNET, 0);
                if (!npf_ph_if) {
                        error = ENOENT;
                        goto out;
                }

                error = pfil_add_ihook(npf_ifhook, NULL,
                    PFIL_IFNET, npf_ph_if);
                KASSERT(error == 0);

                error = pfil_add_ihook(npf_ifaddrhook, NULL,
                    PFIL_IFADDR, npf_ph_if);
                KASSERT(error == 0);
        }
        if (init) {
                goto out;
        }

        /* Check if pfil hooks are not already registered. */
        if (pfil_registered) {
                error = EEXIST;
                goto out;
        }

        /* Capture points of the activity in the IP layer. */
        npf_ph_inet = pfil_head_get(PFIL_TYPE_AF, (void *)AF_INET);
        npf_ph_inet6 = pfil_head_get(PFIL_TYPE_AF, (void *)AF_INET6);
        if (!npf_ph_inet && !npf_ph_inet6) {
                error = ENOENT;
                goto out;
        }

        /* Packet IN/OUT handlers for IP layer. */
        if (npf_ph_inet) {
                error = pfil_add_hook(npfos_packet_handler, npf,
                    PFIL_ALL, npf_ph_inet);
                KASSERT(error == 0);
        }
        if (npf_ph_inet6) {
                error = pfil_add_hook(npfos_packet_handler, npf,
                    PFIL_ALL, npf_ph_inet6);
                KASSERT(error == 0);
        }

        /*
         * It is necessary to re-sync all/any interface address tables,
         * since we did not listen for any changes.
         */
        npf_ifaddr_syncall(npf);
        pfil_registered = true;
out:
        SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();

        return error;
}

/*
 * npf_pfil_unregister: unregister pfil(9) hooks.
 */
static void
npf_pfil_unregister(bool fini)
{
        npf_t *npf = npf_getkernctx();

        SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();

        if (fini && npf_ph_if) {
                (void)pfil_remove_ihook(npf_ifhook, NULL,
                    PFIL_IFNET, npf_ph_if);
                (void)pfil_remove_ihook(npf_ifaddrhook, NULL,
                    PFIL_IFADDR, npf_ph_if);
        }
        if (npf_ph_inet) {
                (void)pfil_remove_hook(npfos_packet_handler, npf,
                    PFIL_ALL, npf_ph_inet);
        }
        if (npf_ph_inet6) {
                (void)pfil_remove_hook(npfos_packet_handler, npf,
                    PFIL_ALL, npf_ph_inet6);
        }
        pfil_registered = false;

        SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
}

bool
npf_active_p(void)
{
        return pfil_registered;
}

#endif

#ifdef __NetBSD__

/*
 * Epoch-Based Reclamation (EBR) wrappers: in NetBSD, we rely on the
 * passive serialization mechanism (see pserialize(9) manual page),
 * which provides sufficient guarantees for NPF.
 */

ebr_t *
npf_ebr_create(void)
{
        return pserialize_create();
}

void
npf_ebr_destroy(ebr_t *ebr)
{
        pserialize_destroy(ebr);
}

void
npf_ebr_register(ebr_t *ebr)
{
        KASSERT(ebr != NULL); (void)ebr;
}

void
npf_ebr_unregister(ebr_t *ebr)
{
        KASSERT(ebr != NULL); (void)ebr;
}

int
npf_ebr_enter(ebr_t *ebr)
{
        KASSERT(ebr != NULL); (void)ebr;
        return pserialize_read_enter();
}

void
npf_ebr_exit(ebr_t *ebr, int s)
{
        KASSERT(ebr != NULL); (void)ebr;
        pserialize_read_exit(s);
}

void
npf_ebr_full_sync(ebr_t *ebr)
{
        pserialize_perform(ebr);
}

bool
npf_ebr_incrit_p(ebr_t *ebr)
{
        KASSERT(ebr != NULL); (void)ebr;
        return pserialize_in_read_section();
}

#endif













































































































































































































































































































































































































   88 







   88 
   89 











   18 





   19 
   19 













   14 


   14 





   14 




   14 






   14 


   13 





    4 







    4 

































    6 



    6 




    6 

    3 





    3 






















































   11 






   11 











   11 









   11 
    1 
















   11 
    6 




    5 
    5 




    7 

    8 
   10 










   10 



    9 

    9 
    1 







    9 
    3 
    7 












    9 






    6 



    6 
    6 




    8 














    8 
    8 








    8 









    8 











    3 
    4 
    3 

    3 
    1 

    4 



















   56 







  124 
   57 




















  124 
   56 



   56 
   56 
   55 


   88 











  124 
  124 
  124 
  124 

  124 























  124 















    2 
    2 







  124 

   57 
   57 

   70 


  124 

   57 














  122 










  122 
  122 



   55 


   70 

  122 





   55 


  122 






  122 






  122 













  122 



  120 
  119 









  119 


























































































    8 








    8 


    8 












    8 






    8 














    8 




































    7 














    6 

    6 
    2 






    2 









    5 














    6 

    1 





    5 
    5 












    5 













   77 




   77 







   77 
    8 








   77 
















   77 


















   64 



   59 





   63 














   77 






    8 

   77 








   77 
   56 


   76 




   77 
   77 

   77 
   21 




























































































































































   46 

   45 
   34 





   34 


   45 
















  123 








    1 









 2122 

 2116 








 2080 



 2082 














  952 
  952 









  373 

  372 
  372 








   29 
   29 













   10 
   10 




   10 





   10 

    3 
    3 
    3 
    2 













    9 

    8 



    8 



    8 




    4 






    4 








    4 








   31 














   31 
    2 











  193 
  196 
  196 









  192 














  193 


  193 
  193 

  193 
  192 









    6 




    6 





    6 










   62 

   63 



















   63 

   63 





























    4 

    4 
    4 






















































































































    8 










    8 


    8 



    8 





    8 



    8 












































































    8 
    8 






























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
/*        $NetBSD: kern_lwp.c,v 1.251 2022/07/01 01:06:04 riastradh Exp $        */

/*-
 * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020
 *     The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Nathan J. Williams, and Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Overview
 *
 *        Lightweight processes (LWPs) are the basic unit or thread of
 *        execution within the kernel.  The core state of an LWP is described
 *        by "struct lwp", also known as lwp_t.
 *
 *        Each LWP is contained within a process (described by "struct proc"),
 *        Every process contains at least one LWP, but may contain more.  The
 *        process describes attributes shared among all of its LWPs such as a
 *        private address space, global execution state (stopped, active,
 *        zombie, ...), signal disposition and so on.  On a multiprocessor
 *        machine, multiple LWPs be executing concurrently in the kernel.
 *
 * Execution states
 *
 *        At any given time, an LWP has overall state that is described by
 *        lwp::l_stat.  The states are broken into two sets below.  The first
 *        set is guaranteed to represent the absolute, current state of the
 *        LWP:
 *
 *        LSONPROC
 *
 *                On processor: the LWP is executing on a CPU, either in the
 *                kernel or in user space.
 *
 *        LSRUN
 *
 *                Runnable: the LWP is parked on a run queue, and may soon be
 *                chosen to run by an idle processor, or by a processor that
 *                has been asked to preempt a currently runnning but lower
 *                priority LWP.
 *
 *        LSIDL
 *
 *                Idle: the LWP has been created but has not yet executed, or
 *                it has ceased executing a unit of work and is waiting to be
 *                started again.  This state exists so that the LWP can occupy
 *                a slot in the process & PID table, but without having to
 *                worry about being touched; lookups of the LWP by ID will
 *                fail while in this state.  The LWP will become visible for
 *                lookup once its state transitions further.  Some special
 *                kernel threads also (ab)use this state to indicate that they
 *                are idle (soft interrupts and idle LWPs).
 *
 *        LSSUSPENDED:
 *
 *                Suspended: the LWP has had its execution suspended by
 *                another LWP in the same process using the _lwp_suspend()
 *                system call.  User-level LWPs also enter the suspended
 *                state when the system is shutting down.
 *
 *        The second set represent a "statement of intent" on behalf of the
 *        LWP.  The LWP may in fact be executing on a processor, may be
 *        sleeping or idle. It is expected to take the necessary action to
 *        stop executing or become "running" again within a short timeframe.
 *        The LP_RUNNING flag in lwp::l_pflag indicates that an LWP is running.
 *        Importantly, it indicates that its state is tied to a CPU.
 *
 *        LSZOMB:
 *
 *                Dead or dying: the LWP has released most of its resources
 *                and is about to switch away into oblivion, or has already
 *                switched away.  When it switches away, its few remaining
 *                resources can be collected.
 *
 *        LSSLEEP:
 *
 *                Sleeping: the LWP has entered itself onto a sleep queue, and
 *                has switched away or will switch away shortly to allow other
 *                LWPs to run on the CPU.
 *
 *        LSSTOP:
 *
 *                Stopped: the LWP has been stopped as a result of a job
 *                control signal, or as a result of the ptrace() interface. 
 *
 *                Stopped LWPs may run briefly within the kernel to handle
 *                signals that they receive, but will not return to user space
 *                until their process' state is changed away from stopped. 
 *
 *                Single LWPs within a process can not be set stopped
 *                selectively: all actions that can stop or continue LWPs
 *                occur at the process level.
 *
 * State transitions
 *
 *        Note that the LSSTOP state may only be set when returning to
 *        user space in userret(), or when sleeping interruptably.  The
 *        LSSUSPENDED state may only be set in userret().  Before setting
 *        those states, we try to ensure that the LWPs will release all
 *        locks that they hold, and at a minimum try to ensure that the
 *        LWP can be set runnable again by a signal.
 *
 *        LWPs may transition states in the following ways:
 *
 *         RUN -------> ONPROC                ONPROC -----> RUN
 *                                                        > SLEEP
 *                                                        > STOPPED
 *                                                    > SUSPENDED
 *                                                    > ZOMB
 *                                                    > IDL (special cases)
 *
 *         STOPPED ---> RUN                SUSPENDED --> RUN
 *                    > SLEEP
 *
 *         SLEEP -----> ONPROC                IDL --------> RUN
 *                    > RUN                            > SUSPENDED
 *                    > STOPPED                            > STOPPED
 *                                                    > ONPROC (special cases)
 *
 *        Some state transitions are only possible with kernel threads (eg
 *        ONPROC -> IDL) and happen under tightly controlled circumstances
 *        free of unwanted side effects.
 *
 * Migration
 *
 *        Migration of threads from one CPU to another could be performed
 *        internally by the scheduler via sched_takecpu() or sched_catchlwp()
 *        functions.  The universal lwp_migrate() function should be used for
 *        any other cases.  Subsystems in the kernel must be aware that CPU
 *        of LWP may change, while it is not locked.
 *
 * Locking
 *
 *        The majority of fields in 'struct lwp' are covered by a single,
 *        general spin lock pointed to by lwp::l_mutex.  The locks covering
 *        each field are documented in sys/lwp.h.
 *
 *        State transitions must be made with the LWP's general lock held,
 *        and may cause the LWP's lock pointer to change.  Manipulation of
 *        the general lock is not performed directly, but through calls to
 *        lwp_lock(), lwp_unlock() and others.  It should be noted that the
 *        adaptive locks are not allowed to be released while the LWP's lock
 *        is being held (unlike for other spin-locks).
 *
 *        States and their associated locks:
 *
 *        LSIDL, LSONPROC, LSZOMB, LSSUPENDED:
 *
 *                Always covered by spc_lwplock, which protects LWPs not
 *                associated with any other sync object.  This is a per-CPU
 *                lock and matches lwp::l_cpu.
 *
 *        LSRUN:
 *
 *                Always covered by spc_mutex, which protects the run queues.
 *                This is a per-CPU lock and matches lwp::l_cpu.
 *
 *        LSSLEEP:
 *
 *                Covered by a lock associated with the sleep queue (sometimes
 *                a turnstile sleep queue) that the LWP resides on.  This can
 *                be spc_lwplock for SOBJ_SLEEPQ_NULL (an "untracked" sleep).
 *
 *        LSSTOP:
 *
 *                If the LWP was previously sleeping (l_wchan != NULL), then
 *                l_mutex references the sleep queue lock.  If the LWP was
 *                runnable or on the CPU when halted, or has been removed from
 *                the sleep queue since halted, then the lock is spc_lwplock.
 *
 *        The lock order is as follows:
 *
 *                sleepq -> turnstile -> spc_lwplock -> spc_mutex
 *
 *        Each process has a scheduler state lock (proc::p_lock), and a
 *        number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and
 *        so on.  When an LWP is to be entered into or removed from one of the
 *        following states, p_lock must be held and the process wide counters
 *        adjusted:
 *
 *                LSIDL, LSZOMB, LSSTOP, LSSUSPENDED
 *
 *        (But not always for kernel threads.  There are some special cases
 *        as mentioned above: soft interrupts, and the idle loops.)
 *
 *        Note that an LWP is considered running or likely to run soon if in
 *        one of the following states.  This affects the value of p_nrlwps:
 *
 *                LSRUN, LSONPROC, LSSLEEP
 *
 *        p_lock does not need to be held when transitioning among these
 *        three states, hence p_lock is rarely taken for state transitions.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.251 2022/07/01 01:06:04 riastradh Exp $");

#include "opt_ddb.h"
#include "opt_lockdebug.h"
#include "opt_dtrace.h"

#define _LWP_API_PRIVATE

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/cpu.h>
#include <sys/pool.h>
#include <sys/proc.h>
#include <sys/syscallargs.h>
#include <sys/syscall_stats.h>
#include <sys/kauth.h>
#include <sys/sleepq.h>
#include <sys/lockdebug.h>
#include <sys/kmem.h>
#include <sys/pset.h>
#include <sys/intr.h>
#include <sys/lwpctl.h>
#include <sys/atomic.h>
#include <sys/filedesc.h>
#include <sys/fstrans.h>
#include <sys/dtrace_bsd.h>
#include <sys/sdt.h>
#include <sys/ptrace.h>
#include <sys/xcall.h>
#include <sys/uidinfo.h>
#include <sys/sysctl.h>
#include <sys/psref.h>
#include <sys/msan.h>
#include <sys/kcov.h>
#include <sys/cprng.h>
#include <sys/futex.h>

#include <uvm/uvm_extern.h>
#include <uvm/uvm_object.h>

static pool_cache_t        lwp_cache        __read_mostly;
struct lwplist                alllwp                __cacheline_aligned;

static int                lwp_ctor(void *, void *, int);
static void                lwp_dtor(void *, void *);

/* DTrace proc provider probes */
SDT_PROVIDER_DEFINE(proc);

SDT_PROBE_DEFINE1(proc, kernel, , lwp__create, "struct lwp *");
SDT_PROBE_DEFINE1(proc, kernel, , lwp__start, "struct lwp *");
SDT_PROBE_DEFINE1(proc, kernel, , lwp__exit, "struct lwp *");

struct turnstile turnstile0 __cacheline_aligned;
struct lwp lwp0 __aligned(MIN_LWP_ALIGNMENT) = {
#ifdef LWP0_CPU_INFO
        .l_cpu = LWP0_CPU_INFO,
#endif
#ifdef LWP0_MD_INITIALIZER
        .l_md = LWP0_MD_INITIALIZER,
#endif
        .l_proc = &proc0,
        .l_lid = 0,                /* we own proc0's slot in the pid table */
        .l_flag = LW_SYSTEM,
        .l_stat = LSONPROC,
        .l_ts = &turnstile0,
        .l_syncobj = &sched_syncobj,
        .l_refcnt = 0,
        .l_priority = PRI_USER + NPRI_USER - 1,
        .l_inheritedprio = -1,
        .l_class = SCHED_OTHER,
        .l_psid = PS_NONE,
        .l_pi_lenders = SLIST_HEAD_INITIALIZER(&lwp0.l_pi_lenders),
        .l_name = __UNCONST("swapper"),
        .l_fd = &filedesc0,
};

static int
lwp_maxlwp(void)
{
        /* Assume 1 LWP per 1MiB. */
        uint64_t lwps_per = ctob(physmem) / (1024 * 1024);

        return MAX(MIN(MAXMAXLWP, lwps_per), MAXLWP);
}

static int sysctl_kern_maxlwp(SYSCTLFN_PROTO);

/*
 * sysctl helper routine for kern.maxlwp. Ensures that the new
 * values are not too low or too high.
 */
static int
sysctl_kern_maxlwp(SYSCTLFN_ARGS)
{
        int error, nmaxlwp;
        struct sysctlnode node;

        nmaxlwp = maxlwp;
        node = *rnode;
        node.sysctl_data = &nmaxlwp;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        if (nmaxlwp < 0 || nmaxlwp >= MAXMAXLWP)
                return EINVAL;
        if (nmaxlwp > lwp_maxlwp())
                return EINVAL;
        maxlwp = nmaxlwp;

        return 0;
}

static void
sysctl_kern_lwp_setup(void)
{
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxlwp",
                       SYSCTL_DESCR("Maximum number of simultaneous threads"),
                       sysctl_kern_maxlwp, 0, NULL, 0,
                       CTL_KERN, CTL_CREATE, CTL_EOL);
}

void
lwpinit(void)
{

        LIST_INIT(&alllwp);
        lwpinit_specificdata();
        /*
         * Provide a barrier to ensure that all mutex_oncpu() and rw_oncpu()
         * calls will exit before memory of LWPs is returned to the pool, where
         * KVA of LWP structure might be freed and re-used for other purposes.
         * Kernel preemption is disabled around mutex_oncpu() and rw_oncpu()
         * callers, therefore a regular passive serialization barrier will
         * do the job.
         */
        lwp_cache = pool_cache_init(sizeof(lwp_t), MIN_LWP_ALIGNMENT, 0,
            PR_PSERIALIZE, "lwppl", NULL, IPL_NONE, lwp_ctor, lwp_dtor, NULL);

        maxlwp = lwp_maxlwp();
        sysctl_kern_lwp_setup();
}

void
lwp0_init(void)
{
        struct lwp *l = &lwp0;

        KASSERT((void *)uvm_lwp_getuarea(l) != NULL);

        LIST_INSERT_HEAD(&alllwp, l, l_list);

        callout_init(&l->l_timeout_ch, CALLOUT_MPSAFE);
        callout_setfunc(&l->l_timeout_ch, sleepq_timeout, l);
        cv_init(&l->l_sigcv, "sigwait");
        cv_init(&l->l_waitcv, "vfork");

        kauth_cred_hold(proc0.p_cred);
        l->l_cred = proc0.p_cred;

        kdtrace_thread_ctor(NULL, l);
        lwp_initspecific(l);

        SYSCALL_TIME_LWP_INIT(l);
}

/*
 * Initialize the non-zeroed portion of an lwp_t.
 */
static int
lwp_ctor(void *arg, void *obj, int flags)
{
        lwp_t *l = obj;

        l->l_stat = LSIDL;
        l->l_cpu = curcpu();
        l->l_mutex = l->l_cpu->ci_schedstate.spc_lwplock;
        l->l_ts = pool_get(&turnstile_pool, flags);

        if (l->l_ts == NULL) {
                return ENOMEM;
        } else {
                turnstile_ctor(l->l_ts);
                return 0;
        }
}

static void
lwp_dtor(void *arg, void *obj)
{
        lwp_t *l = obj;

        /*
         * The value of l->l_cpu must still be valid at this point.
         */
        KASSERT(l->l_cpu != NULL);

        /*
         * We can't return turnstile0 to the pool (it didn't come from it),
         * so if it comes up just drop it quietly and move on.
         */
        if (l->l_ts != &turnstile0)
                pool_put(&turnstile_pool, l->l_ts);
}

/*
 * Set an LWP suspended.
 *
 * Must be called with p_lock held, and the LWP locked.  Will unlock the
 * LWP before return.
 */
int
lwp_suspend(struct lwp *curl, struct lwp *t)
{
        int error;

        KASSERT(mutex_owned(t->l_proc->p_lock));
        KASSERT(lwp_locked(t, NULL));

        KASSERT(curl != t || curl->l_stat == LSONPROC);

        /*
         * If the current LWP has been told to exit, we must not suspend anyone
         * else or deadlock could occur.  We won't return to userspace.
         */
        if ((curl->l_flag & (LW_WEXIT | LW_WCORE)) != 0) {
                lwp_unlock(t);
                return (EDEADLK);
        }

        if ((t->l_flag & LW_DBGSUSPEND) != 0) {
                lwp_unlock(t);
                return 0;
        }

        error = 0;

        switch (t->l_stat) {
        case LSRUN:
        case LSONPROC:
                t->l_flag |= LW_WSUSPEND;
                lwp_need_userret(t);
                lwp_unlock(t);
                break;

        case LSSLEEP:
                t->l_flag |= LW_WSUSPEND;

                /*
                 * Kick the LWP and try to get it to the kernel boundary
                 * so that it will release any locks that it holds.
                 * setrunnable() will release the lock.
                 */
                if ((t->l_flag & LW_SINTR) != 0)
                        setrunnable(t);
                else
                        lwp_unlock(t);
                break;

        case LSSUSPENDED:
                lwp_unlock(t);
                break;

        case LSSTOP:
                t->l_flag |= LW_WSUSPEND;
                setrunnable(t);
                break;

        case LSIDL:
        case LSZOMB:
                error = EINTR; /* It's what Solaris does..... */
                lwp_unlock(t);
                break;
        }

        return (error);
}

/*
 * Restart a suspended LWP.
 *
 * Must be called with p_lock held, and the LWP locked.  Will unlock the
 * LWP before return.
 */
void
lwp_continue(struct lwp *l)
{

        KASSERT(mutex_owned(l->l_proc->p_lock));
        KASSERT(lwp_locked(l, NULL));

        /* If rebooting or not suspended, then just bail out. */
        if ((l->l_flag & LW_WREBOOT) != 0) {
                lwp_unlock(l);
                return;
        }

        l->l_flag &= ~LW_WSUSPEND;

        if (l->l_stat != LSSUSPENDED || (l->l_flag & LW_DBGSUSPEND) != 0) {
                lwp_unlock(l);
                return;
        }

        /* setrunnable() will release the lock. */
        setrunnable(l);
}

/*
 * Restart a stopped LWP.
 *
 * Must be called with p_lock held, and the LWP NOT locked.  Will unlock the
 * LWP before return.
 */
void
lwp_unstop(struct lwp *l)
{
        struct proc *p = l->l_proc;

        KASSERT(mutex_owned(&proc_lock));
        KASSERT(mutex_owned(p->p_lock));

        lwp_lock(l);

        KASSERT((l->l_flag & LW_DBGSUSPEND) == 0);

        /* If not stopped, then just bail out. */
        if (l->l_stat != LSSTOP) {
                lwp_unlock(l);
                return;
        }

        p->p_stat = SACTIVE;
        p->p_sflag &= ~PS_STOPPING;

        if (!p->p_waited)
                p->p_pptr->p_nstopchild--;

        if (l->l_wchan == NULL) {
                /* setrunnable() will release the lock. */
                setrunnable(l);
        } else if (p->p_xsig && (l->l_flag & LW_SINTR) != 0) {
                /* setrunnable() so we can receive the signal */
                setrunnable(l);
        } else {
                l->l_stat = LSSLEEP;
                p->p_nrlwps++;
                lwp_unlock(l);
        }
}

/*
 * Wait for an LWP within the current process to exit.  If 'lid' is
 * non-zero, we are waiting for a specific LWP.
 *
 * Must be called with p->p_lock held.
 */
int
lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting)
{
        const lwpid_t curlid = l->l_lid;
        proc_t *p = l->l_proc;
        lwp_t *l2, *next;
        int error;

        KASSERT(mutex_owned(p->p_lock));

        p->p_nlwpwait++;
        l->l_waitingfor = lid;

        for (;;) {
                int nfound;

                /*
                 * Avoid a race between exit1() and sigexit(): if the
                 * process is dumping core, then we need to bail out: call
                 * into lwp_userret() where we will be suspended until the
                 * deed is done.
                 */
                if ((p->p_sflag & PS_WCORE) != 0) {
                        mutex_exit(p->p_lock);
                        lwp_userret(l);
                        KASSERT(false);
                }

                /*
                 * First off, drain any detached LWP that is waiting to be
                 * reaped.
                 */
                while ((l2 = p->p_zomblwp) != NULL) {
                        p->p_zomblwp = NULL;
                        lwp_free(l2, false, false);/* releases proc mutex */
                        mutex_enter(p->p_lock);
                }

                /*
                 * Now look for an LWP to collect.  If the whole process is
                 * exiting, count detached LWPs as eligible to be collected,
                 * but don't drain them here.
                 */
                nfound = 0;
                error = 0;

                /*
                 * If given a specific LID, go via pid_table and make sure
                 * it's not detached.
                 */
                if (lid != 0) {
                        l2 = proc_find_lwp(p, lid);
                        if (l2 == NULL) {
                                error = ESRCH;
                                break;
                        }
                        KASSERT(l2->l_lid == lid);
                        if ((l2->l_prflag & LPR_DETACHED) != 0) {
                                error = EINVAL;
                                break;
                        }
                } else {
                        l2 = LIST_FIRST(&p->p_lwps);
                }
                for (; l2 != NULL; l2 = next) {
                        next = (lid != 0 ? NULL : LIST_NEXT(l2, l_sibling));

                        /*
                         * If a specific wait and the target is waiting on
                         * us, then avoid deadlock.  This also traps LWPs
                         * that try to wait on themselves.
                         *
                         * Note that this does not handle more complicated
                         * cycles, like: t1 -> t2 -> t3 -> t1.  The process
                         * can still be killed so it is not a major problem.
                         */
                        if (l2->l_lid == lid && l2->l_waitingfor == curlid) {
                                error = EDEADLK;
                                break;
                        }
                        if (l2 == l)
                                continue;
                        if ((l2->l_prflag & LPR_DETACHED) != 0) {
                                nfound += exiting;
                                continue;
                        }
                        if (lid != 0) {
                                /*
                                 * Mark this LWP as the first waiter, if there
                                 * is no other.
                                 */
                                if (l2->l_waiter == 0)
                                        l2->l_waiter = curlid;
                        } else if (l2->l_waiter != 0) {
                                /*
                                 * It already has a waiter - so don't
                                 * collect it.  If the waiter doesn't
                                 * grab it we'll get another chance
                                 * later.
                                 */
                                nfound++;
                                continue;
                        }
                        nfound++;

                        /* No need to lock the LWP in order to see LSZOMB. */
                        if (l2->l_stat != LSZOMB)
                                continue;

                        /*
                         * We're no longer waiting.  Reset the "first waiter"
                         * pointer on the target, in case it was us.
                         */
                        l->l_waitingfor = 0;
                        l2->l_waiter = 0;
                        p->p_nlwpwait--;
                        if (departed)
                                *departed = l2->l_lid;
                        sched_lwp_collect(l2);

                        /* lwp_free() releases the proc lock. */
                        lwp_free(l2, false, false);
                        mutex_enter(p->p_lock);
                        return 0;
                }

                if (error != 0)
                        break;
                if (nfound == 0) {
                        error = ESRCH;
                        break;
                }

                /*
                 * Note: since the lock will be dropped, need to restart on
                 * wakeup to run all LWPs again, e.g. there may be new LWPs.
                 */
                if (exiting) {
                        KASSERT(p->p_nlwps > 1);
                        error = cv_timedwait(&p->p_lwpcv, p->p_lock, 1);
                        break;
                }

                /*
                 * Break out if all LWPs are in _lwp_wait().  There are
                 * other ways to hang the process with _lwp_wait(), but the
                 * sleep is interruptable so little point checking for them.
                 */
                if (p->p_nlwpwait == p->p_nlwps) {
                        error = EDEADLK;
                        break;
                }

                /*
                 * Sit around and wait for something to happen.  We'll be 
                 * awoken if any of the conditions examined change: if an
                 * LWP exits, is collected, or is detached.
                 */
                if ((error = cv_wait_sig(&p->p_lwpcv, p->p_lock)) != 0)
                        break;
        }

        /*
         * We didn't find any LWPs to collect, we may have received a 
         * signal, or some other condition has caused us to bail out.
         *
         * If waiting on a specific LWP, clear the waiters marker: some
         * other LWP may want it.  Then, kick all the remaining waiters
         * so that they can re-check for zombies and for deadlock.
         */
        if (lid != 0) {
                l2 = proc_find_lwp(p, lid);
                KASSERT(l2 == NULL || l2->l_lid == lid);

                if (l2 != NULL && l2->l_waiter == curlid)
                        l2->l_waiter = 0;
        }
        p->p_nlwpwait--;
        l->l_waitingfor = 0;
        cv_broadcast(&p->p_lwpcv);

        return error;
}

/*
 * Create a new LWP within process 'p2', using LWP 'l1' as a template.
 * The new LWP is created in state LSIDL and must be set running,
 * suspended, or stopped by the caller.
 */
int
lwp_create(lwp_t *l1, proc_t *p2, vaddr_t uaddr, int flags,
    void *stack, size_t stacksize, void (*func)(void *), void *arg,
    lwp_t **rnewlwpp, int sclass, const sigset_t *sigmask,
    const stack_t *sigstk)
{
        struct lwp *l2;

        KASSERT(l1 == curlwp || l1->l_proc == &proc0);

        /*
         * Enforce limits, excluding the first lwp and kthreads.  We must
         * use the process credentials here when adjusting the limit, as
         * they are what's tied to the accounting entity.  However for
         * authorizing the action, we'll use the LWP's credentials.
         */
        mutex_enter(p2->p_lock);
        if (p2->p_nlwps != 0 && p2 != &proc0) {
                uid_t uid = kauth_cred_getuid(p2->p_cred);
                int count = chglwpcnt(uid, 1);
                if (__predict_false(count >
                    p2->p_rlimit[RLIMIT_NTHR].rlim_cur)) {
                        if (kauth_authorize_process(l1->l_cred,
                            KAUTH_PROCESS_RLIMIT, p2,
                            KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
                            &p2->p_rlimit[RLIMIT_NTHR], KAUTH_ARG(RLIMIT_NTHR))
                            != 0) {
                                (void)chglwpcnt(uid, -1);
                                mutex_exit(p2->p_lock);
                                return EAGAIN;
                        }
                }
        }

        /*
         * First off, reap any detached LWP waiting to be collected.
         * We can re-use its LWP structure and turnstile.
         */
        if ((l2 = p2->p_zomblwp) != NULL) {
                p2->p_zomblwp = NULL;
                lwp_free(l2, true, false);
                /* p2 now unlocked by lwp_free() */
                KASSERT(l2->l_ts != NULL);
                KASSERT(l2->l_inheritedprio == -1);
                KASSERT(SLIST_EMPTY(&l2->l_pi_lenders));
                memset(&l2->l_startzero, 0, sizeof(*l2) -
                    offsetof(lwp_t, l_startzero));
        } else {
                mutex_exit(p2->p_lock);
                l2 = pool_cache_get(lwp_cache, PR_WAITOK);
                memset(&l2->l_startzero, 0, sizeof(*l2) -
                    offsetof(lwp_t, l_startzero));
                SLIST_INIT(&l2->l_pi_lenders);
        }

        /*
         * Because of lockless lookup via pid_table, the LWP can be locked
         * and inspected briefly even after it's freed, so a few fields are
         * kept stable.
         */
        KASSERT(l2->l_stat == LSIDL);
        KASSERT(l2->l_cpu != NULL);
        KASSERT(l2->l_ts != NULL);
        KASSERT(l2->l_mutex == l2->l_cpu->ci_schedstate.spc_lwplock);

        l2->l_proc = p2;
        l2->l_refcnt = 0;
        l2->l_class = sclass;

        /*
         * Allocate a process ID for this LWP.  We need to do this now
         * while we can still unwind if it fails.  Because we're marked
         * as LSIDL, no lookups by the ID will succeed.
         *
         * N.B. this will always succeed for the first LWP in a process,
         * because proc_alloc_lwpid() will usurp the slot.  Also note
         * that l2->l_proc MUST be valid so that lookups of the proc
         * will succeed, even if the LWP itself is not visible.
         */
        if (__predict_false(proc_alloc_lwpid(p2, l2) == -1)) {
                pool_cache_put(lwp_cache, l2);
                return EAGAIN;
        }

        /*
         * If vfork(), we want the LWP to run fast and on the same CPU
         * as its parent, so that it can reuse the VM context and cache
         * footprint on the local CPU.
         */
        l2->l_kpriority = ((flags & LWP_VFORK) ? true : false);
        l2->l_kpribase = PRI_KERNEL;
        l2->l_priority = l1->l_priority;
        l2->l_inheritedprio = -1;
        l2->l_protectprio = -1;
        l2->l_auxprio = -1;
        l2->l_flag = 0;
        l2->l_pflag = LP_MPSAFE;
        TAILQ_INIT(&l2->l_ld_locks);
        l2->l_psrefs = 0;
        kmsan_lwp_alloc(l2);

        /*
         * For vfork, borrow parent's lwpctl context if it exists.
         * This also causes us to return via lwp_userret.
         */
        if (flags & LWP_VFORK && l1->l_lwpctl) {
                l2->l_lwpctl = l1->l_lwpctl;
                l2->l_flag |= LW_LWPCTL;
        }

        /*
         * If not the first LWP in the process, grab a reference to the
         * descriptor table.
         */
        l2->l_fd = p2->p_fd;
        if (p2->p_nlwps != 0) {
                KASSERT(l1->l_proc == p2);
                fd_hold(l2);
        } else {
                KASSERT(l1->l_proc != p2);
        }

        if (p2->p_flag & PK_SYSTEM) {
                /* Mark it as a system LWP. */
                l2->l_flag |= LW_SYSTEM;
        }

        kdtrace_thread_ctor(NULL, l2);
        lwp_initspecific(l2);
        sched_lwp_fork(l1, l2);
        lwp_update_creds(l2);
        callout_init(&l2->l_timeout_ch, CALLOUT_MPSAFE);
        callout_setfunc(&l2->l_timeout_ch, sleepq_timeout, l2);
        cv_init(&l2->l_sigcv, "sigwait");
        cv_init(&l2->l_waitcv, "vfork");
        l2->l_syncobj = &sched_syncobj;
        PSREF_DEBUG_INIT_LWP(l2);

        if (rnewlwpp != NULL)
                *rnewlwpp = l2;

        /*
         * PCU state needs to be saved before calling uvm_lwp_fork() so that
         * the MD cpu_lwp_fork() can copy the saved state to the new LWP.
         */
        pcu_save_all(l1);
#if PCU_UNIT_COUNT > 0
        l2->l_pcu_valid = l1->l_pcu_valid;
#endif

        uvm_lwp_setuarea(l2, uaddr);
        uvm_lwp_fork(l1, l2, stack, stacksize, func, (arg != NULL) ? arg : l2);

        mutex_enter(p2->p_lock);
        if ((flags & LWP_DETACHED) != 0) {
                l2->l_prflag = LPR_DETACHED;
                p2->p_ndlwps++;
        } else
                l2->l_prflag = 0;

        if (l1->l_proc == p2) {
                /*
                 * These flags are set while p_lock is held.  Copy with
                 * p_lock held too, so the LWP doesn't sneak into the
                 * process without them being set.
                 */
                l2->l_flag |= (l1->l_flag & (LW_WEXIT | LW_WREBOOT | LW_WCORE));
        } else {
                /* fork(): pending core/exit doesn't apply to child. */
                l2->l_flag |= (l1->l_flag & LW_WREBOOT);
        }

        l2->l_sigstk = *sigstk;
        l2->l_sigmask = *sigmask;
        TAILQ_INIT(&l2->l_sigpend.sp_info);
        sigemptyset(&l2->l_sigpend.sp_set);
        LIST_INSERT_HEAD(&p2->p_lwps, l2, l_sibling);
        p2->p_nlwps++;
        p2->p_nrlwps++;

        KASSERT(l2->l_affinity == NULL);

        /* Inherit the affinity mask. */
        if (l1->l_affinity) {
                /*
                 * Note that we hold the state lock while inheriting
                 * the affinity to avoid race with sched_setaffinity().
                 */
                lwp_lock(l1);
                if (l1->l_affinity) {
                        kcpuset_use(l1->l_affinity);
                        l2->l_affinity = l1->l_affinity;
                }
                lwp_unlock(l1);
        }

        /* This marks the end of the "must be atomic" section. */
        mutex_exit(p2->p_lock);

        SDT_PROBE(proc, kernel, , lwp__create, l2, 0, 0, 0, 0);

        mutex_enter(&proc_lock);
        LIST_INSERT_HEAD(&alllwp, l2, l_list);
        /* Inherit a processor-set */
        l2->l_psid = l1->l_psid;
        mutex_exit(&proc_lock);

        SYSCALL_TIME_LWP_INIT(l2);

        if (p2->p_emul->e_lwp_fork)
                (*p2->p_emul->e_lwp_fork)(l1, l2);

        return (0);
}

/*
 * Set a new LWP running.  If the process is stopping, then the LWP is
 * created stopped.
 */
void
lwp_start(lwp_t *l, int flags)
{
        proc_t *p = l->l_proc;

        mutex_enter(p->p_lock);
        lwp_lock(l);
        KASSERT(l->l_stat == LSIDL);
        if ((flags & LWP_SUSPENDED) != 0) {
                /* It'll suspend itself in lwp_userret(). */
                l->l_flag |= LW_WSUSPEND;
        }
        if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) {
                KASSERT(l->l_wchan == NULL);
                    l->l_stat = LSSTOP;
                p->p_nrlwps--;
                lwp_unlock(l);
        } else {
                setrunnable(l);
                /* LWP now unlocked */
        }
        mutex_exit(p->p_lock);
}

/*
 * Called by MD code when a new LWP begins execution.  Must be called
 * with the previous LWP locked (so at splsched), or if there is no
 * previous LWP, at splsched.
 */
void
lwp_startup(struct lwp *prev, struct lwp *new_lwp)
{
        kmutex_t *lock;

        KASSERTMSG(new_lwp == curlwp, "l %p curlwp %p prevlwp %p", new_lwp, curlwp, prev);
        KASSERT(kpreempt_disabled());
        KASSERT(prev != NULL);
        KASSERT((prev->l_pflag & LP_RUNNING) != 0);
        KASSERT(curcpu()->ci_mtx_count == -2);

        /*
         * Immediately mark the previous LWP as no longer running and
         * unlock (to keep lock wait times short as possible).  If a
         * zombie, don't touch after clearing LP_RUNNING as it could be
         * reaped by another CPU.  Use atomic_store_release to ensure
         * this -- matches atomic_load_acquire in lwp_free.
         */
        lock = prev->l_mutex;
        if (__predict_false(prev->l_stat == LSZOMB)) {
                atomic_store_release(&prev->l_pflag,
                    prev->l_pflag & ~LP_RUNNING);
        } else {
                prev->l_pflag &= ~LP_RUNNING;
        }
        mutex_spin_exit(lock);

        /* Correct spin mutex count after mi_switch(). */
        curcpu()->ci_mtx_count = 0;

        /* Install new VM context. */
        if (__predict_true(new_lwp->l_proc->p_vmspace)) {
                pmap_activate(new_lwp);
        }

        /* We remain at IPL_SCHED from mi_switch() - reset it. */
        spl0();

        LOCKDEBUG_BARRIER(NULL, 0);
        SDT_PROBE(proc, kernel, , lwp__start, new_lwp, 0, 0, 0, 0);

        /* For kthreads, acquire kernel lock if not MPSAFE. */
        if (__predict_false((new_lwp->l_pflag & LP_MPSAFE) == 0)) {
                KERNEL_LOCK(1, new_lwp);
        }
}

/*
 * Exit an LWP.
 *
 * *** WARNING *** This can be called with (l != curlwp) in error paths.
 */
void
lwp_exit(struct lwp *l)
{
        struct proc *p = l->l_proc;
        struct lwp *l2;
        bool current;

        current = (l == curlwp);

        KASSERT(current || (l->l_stat == LSIDL && l->l_target_cpu == NULL));
        KASSERT(p == curproc);

        SDT_PROBE(proc, kernel, , lwp__exit, l, 0, 0, 0, 0);

        /* Verify that we hold no locks; for DIAGNOSTIC check kernel_lock. */
        LOCKDEBUG_BARRIER(NULL, 0);
        KASSERTMSG(curcpu()->ci_biglock_count == 0, "kernel_lock leaked");

        /*
         * If we are the last live LWP in a process, we need to exit the
         * entire process.  We do so with an exit status of zero, because
         * it's a "controlled" exit, and because that's what Solaris does.
         *
         * We are not quite a zombie yet, but for accounting purposes we
         * must increment the count of zombies here.
         *
         * Note: the last LWP's specificdata will be deleted here.
         */
        mutex_enter(p->p_lock);
        if (p->p_nlwps - p->p_nzlwps == 1) {
                KASSERT(current == true);
                KASSERT(p != &proc0);
                exit1(l, 0, 0);
                /* NOTREACHED */
        }
        p->p_nzlwps++;

        /*
         * Perform any required thread cleanup.  Do this early so
         * anyone wanting to look us up with lwp_getref_lwpid() will
         * fail to find us before we become a zombie.
         *
         * N.B. this will unlock p->p_lock on our behalf.
         */
        lwp_thread_cleanup(l);

        if (p->p_emul->e_lwp_exit)
                (*p->p_emul->e_lwp_exit)(l);

        /* Drop filedesc reference. */
        fd_free();

        /* Release fstrans private data. */
        fstrans_lwp_dtor(l);

        /* Delete the specificdata while it's still safe to sleep. */
        lwp_finispecific(l);

        /*
         * Release our cached credentials.
         */
        kauth_cred_free(l->l_cred);
        callout_destroy(&l->l_timeout_ch);

        /*
         * If traced, report LWP exit event to the debugger.
         *
         * Remove the LWP from the global list.
         * Free its LID from the PID namespace if needed.
         */
        mutex_enter(&proc_lock);

        if ((p->p_slflag & (PSL_TRACED|PSL_TRACELWP_EXIT)) ==
            (PSL_TRACED|PSL_TRACELWP_EXIT)) {
                mutex_enter(p->p_lock);
                if (ISSET(p->p_sflag, PS_WEXIT)) {
                        mutex_exit(p->p_lock);
                        /*
                         * We are exiting, bail out without informing parent
                         * about a terminating LWP as it would deadlock.
                         */
                } else {
                        eventswitch(TRAP_LWP, PTRACE_LWP_EXIT, l->l_lid);
                        mutex_enter(&proc_lock);
                }
        }

        LIST_REMOVE(l, l_list);
        mutex_exit(&proc_lock);

        /*
         * Get rid of all references to the LWP that others (e.g. procfs)
         * may have, and mark the LWP as a zombie.  If the LWP is detached,
         * mark it waiting for collection in the proc structure.  Note that
         * before we can do that, we need to free any other dead, deatched
         * LWP waiting to meet its maker.
         *
         * All conditions need to be observed upon under the same hold of
         * p_lock, because if the lock is dropped any of them can change.
         */
        mutex_enter(p->p_lock);
        for (;;) {
                if (lwp_drainrefs(l))
                        continue;
                if ((l->l_prflag & LPR_DETACHED) != 0) {
                        if ((l2 = p->p_zomblwp) != NULL) {
                                p->p_zomblwp = NULL;
                                lwp_free(l2, false, false);
                                /* proc now unlocked */
                                mutex_enter(p->p_lock);
                                continue;
                        }
                        p->p_zomblwp = l;
                }
                break;
        }

        /*
         * If we find a pending signal for the process and we have been
         * asked to check for signals, then we lose: arrange to have
         * all other LWPs in the process check for signals.
         */
        if ((l->l_flag & LW_PENDSIG) != 0 &&
            firstsig(&p->p_sigpend.sp_set) != 0) {
                LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
                        lwp_lock(l2);
                        signotify(l2);
                        lwp_unlock(l2);
                }
        }

        /*
         * Release any PCU resources before becoming a zombie.
         */
        pcu_discard_all(l);

        lwp_lock(l);
        l->l_stat = LSZOMB;
        if (l->l_name != NULL) {
                strcpy(l->l_name, "(zombie)");
        }
        lwp_unlock(l);
        p->p_nrlwps--;
        cv_broadcast(&p->p_lwpcv);
        if (l->l_lwpctl != NULL)
                l->l_lwpctl->lc_curcpu = LWPCTL_CPU_EXITED;
        mutex_exit(p->p_lock);

        /*
         * We can no longer block.  At this point, lwp_free() may already
         * be gunning for us.  On a multi-CPU system, we may be off p_lwps.
         *
         * Free MD LWP resources.
         */
        cpu_lwp_free(l, 0);

        if (current) {
                /* Switch away into oblivion. */
                lwp_lock(l);
                spc_lock(l->l_cpu);
                mi_switch(l);
                panic("lwp_exit");
        }
}

/*
 * Free a dead LWP's remaining resources.
 *
 * XXXLWP limits.
 */
void
lwp_free(struct lwp *l, bool recycle, bool last)
{
        struct proc *p = l->l_proc;
        struct rusage *ru;
        ksiginfoq_t kq;

        KASSERT(l != curlwp);
        KASSERT(last || mutex_owned(p->p_lock));

        /*
         * We use the process credentials instead of the lwp credentials here
         * because the lwp credentials maybe cached (just after a setuid call)
         * and we don't want pay for syncing, since the lwp is going away
         * anyway
         */
        if (p != &proc0 && p->p_nlwps != 1)
                (void)chglwpcnt(kauth_cred_getuid(p->p_cred), -1);

        /*
         * In the unlikely event that the LWP is still on the CPU,
         * then spin until it has switched away.
         *
         * atomic_load_acquire matches atomic_store_release in
         * lwp_startup and mi_switch.
         */
        while (__predict_false((atomic_load_acquire(&l->l_pflag) & LP_RUNNING)
                != 0)) {
                SPINLOCK_BACKOFF_HOOK;
        }

        /*
         * Now that the LWP's known off the CPU, reset its state back to
         * LSIDL, which defeats anything that might have gotten a hold on
         * the LWP via pid_table before the ID was freed.  It's important
         * to do this with both the LWP locked and p_lock held.
         *
         * Also reset the CPU and lock pointer back to curcpu(), since the
         * LWP will in all likelyhood be cached with the current CPU in
         * lwp_cache when we free it and later allocated from there again
         * (avoid incidental lock contention).
         */
        lwp_lock(l);
        l->l_stat = LSIDL;
        l->l_cpu = curcpu();
        lwp_unlock_to(l, l->l_cpu->ci_schedstate.spc_lwplock);

        /*
         * If this was not the last LWP in the process, then adjust counters
         * and unlock.  This is done differently for the last LWP in exit1().
         */
        if (!last) {
                /*
                 * Add the LWP's run time to the process' base value.
                 * This needs to co-incide with coming off p_lwps.
                 */
                bintime_add(&p->p_rtime, &l->l_rtime);
                p->p_pctcpu += l->l_pctcpu;
                ru = &p->p_stats->p_ru;
                ruadd(ru, &l->l_ru);
                ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw);
                ru->ru_nivcsw += l->l_nivcsw;
                LIST_REMOVE(l, l_sibling);
                p->p_nlwps--;
                p->p_nzlwps--;
                if ((l->l_prflag & LPR_DETACHED) != 0)
                        p->p_ndlwps--;

                /*
                 * Have any LWPs sleeping in lwp_wait() recheck for
                 * deadlock.
                 */
                cv_broadcast(&p->p_lwpcv);
                mutex_exit(p->p_lock);

                /* Free the LWP ID. */
                mutex_enter(&proc_lock);
                proc_free_lwpid(p, l->l_lid);
                mutex_exit(&proc_lock);
        }

        /*
         * Destroy the LWP's remaining signal information.
         */
        ksiginfo_queue_init(&kq);
        sigclear(&l->l_sigpend, NULL, &kq);
        ksiginfo_queue_drain(&kq);
        cv_destroy(&l->l_sigcv);
        cv_destroy(&l->l_waitcv);

        /*
         * Free lwpctl structure and affinity.
         */
        if (l->l_lwpctl) {
                lwp_ctl_free(l);
        }
        if (l->l_affinity) {
                kcpuset_unuse(l->l_affinity, NULL);
                l->l_affinity = NULL;
        }

        /*
         * Free remaining data structures and the LWP itself unless the
         * caller wants to recycle.
         */
        if (l->l_name != NULL)
                kmem_free(l->l_name, MAXCOMLEN);

        kmsan_lwp_free(l);
        kcov_lwp_free(l);
        cpu_lwp_free2(l);
        uvm_lwp_exit(l);

        KASSERT(SLIST_EMPTY(&l->l_pi_lenders));
        KASSERT(l->l_inheritedprio == -1);
        KASSERT(l->l_blcnt == 0);
        kdtrace_thread_dtor(NULL, l);
        if (!recycle)
                pool_cache_put(lwp_cache, l);
}

/*
 * Migrate the LWP to the another CPU.  Unlocks the LWP.
 */
void
lwp_migrate(lwp_t *l, struct cpu_info *tci)
{
        struct schedstate_percpu *tspc;
        int lstat = l->l_stat;

        KASSERT(lwp_locked(l, NULL));
        KASSERT(tci != NULL);

        /* If LWP is still on the CPU, it must be handled like LSONPROC */
        if ((l->l_pflag & LP_RUNNING) != 0) {
                lstat = LSONPROC;
        }

        /*
         * The destination CPU could be changed while previous migration
         * was not finished.
         */
        if (l->l_target_cpu != NULL) {
                l->l_target_cpu = tci;
                lwp_unlock(l);
                return;
        }

        /* Nothing to do if trying to migrate to the same CPU */
        if (l->l_cpu == tci) {
                lwp_unlock(l);
                return;
        }

        KASSERT(l->l_target_cpu == NULL);
        tspc = &tci->ci_schedstate;
        switch (lstat) {
        case LSRUN:
                l->l_target_cpu = tci;
                break;
        case LSSLEEP:
                l->l_cpu = tci;
                break;
        case LSIDL:
        case LSSTOP:
        case LSSUSPENDED:
                l->l_cpu = tci;
                if (l->l_wchan == NULL) {
                        lwp_unlock_to(l, tspc->spc_lwplock);
                        return;
                }
                break;
        case LSONPROC:
                l->l_target_cpu = tci;
                spc_lock(l->l_cpu);
                sched_resched_cpu(l->l_cpu, PRI_USER_RT, true);
                /* spc now unlocked */
                break;
        }
        lwp_unlock(l);
}

#define        lwp_find_exclude(l)                                        \
        ((l)->l_stat == LSIDL || (l)->l_stat == LSZOMB)

/*
 * Find the LWP in the process.  Arguments may be zero, in such case,
 * the calling process and first LWP in the list will be used.
 * On success - returns proc locked.
 *
 * => pid == 0 -> look in curproc.
 * => pid == -1 -> match any proc.
 * => otherwise look up the proc.
 *
 * => lid == 0 -> first LWP in the proc
 * => otherwise specific LWP
 */
struct lwp *
lwp_find2(pid_t pid, lwpid_t lid)
{
        proc_t *p;
        lwp_t *l;

        /* First LWP of specified proc. */
        if (lid == 0) {
                switch (pid) {
                case -1:
                        /* No lookup keys. */
                        return NULL;
                case 0:
                        p = curproc;
                        mutex_enter(p->p_lock);
                        break;
                default:
                        mutex_enter(&proc_lock);
                        p = proc_find(pid);
                        if (__predict_false(p == NULL)) {
                                mutex_exit(&proc_lock);
                                return NULL;
                        }
                        mutex_enter(p->p_lock);
                        mutex_exit(&proc_lock);
                        break;
                }
                LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                        if (__predict_true(!lwp_find_exclude(l)))
                                break;
                }
                goto out;
        }

        l = proc_find_lwp_acquire_proc(lid, &p);
        if (l == NULL)
                return NULL;
        KASSERT(p != NULL);
        KASSERT(mutex_owned(p->p_lock));

        if (__predict_false(lwp_find_exclude(l))) {
                l = NULL;
                goto out;
        }

        /* Apply proc filter, if applicable. */
        switch (pid) {
        case -1:
                /* Match anything. */
                break;
        case 0:
                if (p != curproc)
                        l = NULL;
                break;
        default:
                if (p->p_pid != pid)
                        l = NULL;
                break;
        }

 out:
        if (__predict_false(l == NULL)) {
                mutex_exit(p->p_lock);
        }
        return l;
}

/*
 * Look up a live LWP within the specified process.
 *
 * Must be called with p->p_lock held (as it looks at the radix tree,
 * and also wants to exclude idle and zombie LWPs).
 */
struct lwp *
lwp_find(struct proc *p, lwpid_t id)
{
        struct lwp *l;

        KASSERT(mutex_owned(p->p_lock));

        l = proc_find_lwp(p, id);
        KASSERT(l == NULL || l->l_lid == id);

        /*
         * No need to lock - all of these conditions will
         * be visible with the process level mutex held.
         */
        if (__predict_false(l != NULL && lwp_find_exclude(l)))
                l = NULL;

        return l;
}

/*
 * Update an LWP's cached credentials to mirror the process' master copy.
 *
 * This happens early in the syscall path, on user trap, and on LWP
 * creation.  A long-running LWP can also voluntarily choose to update
 * its credentials by calling this routine.  This may be called from
 * LWP_CACHE_CREDS(), which checks l->l_prflag & LPR_CRMOD beforehand.
 */
void
lwp_update_creds(struct lwp *l)
{
        kauth_cred_t oc;
        struct proc *p;

        p = l->l_proc;
        oc = l->l_cred;

        mutex_enter(p->p_lock);
        kauth_cred_hold(p->p_cred);
        l->l_cred = p->p_cred;
        l->l_prflag &= ~LPR_CRMOD;
        mutex_exit(p->p_lock);
        if (oc != NULL)
                kauth_cred_free(oc);
}

/*
 * Verify that an LWP is locked, and optionally verify that the lock matches
 * one we specify.
 */
int
lwp_locked(struct lwp *l, kmutex_t *mtx)
{
        kmutex_t *cur = l->l_mutex;

        return mutex_owned(cur) && (mtx == cur || mtx == NULL);
}

/*
 * Lend a new mutex to an LWP.  The old mutex must be held.
 */
kmutex_t *
lwp_setlock(struct lwp *l, kmutex_t *mtx)
{
        kmutex_t *oldmtx = l->l_mutex;

        KASSERT(mutex_owned(oldmtx));

        atomic_store_release(&l->l_mutex, mtx);
        return oldmtx;
}

/*
 * Lend a new mutex to an LWP, and release the old mutex.  The old mutex
 * must be held.
 */
void
lwp_unlock_to(struct lwp *l, kmutex_t *mtx)
{
        kmutex_t *old;

        KASSERT(lwp_locked(l, NULL));

        old = l->l_mutex;
        atomic_store_release(&l->l_mutex, mtx);
        mutex_spin_exit(old);
}

int
lwp_trylock(struct lwp *l)
{
        kmutex_t *old;

        for (;;) {
                if (!mutex_tryenter(old = atomic_load_consume(&l->l_mutex)))
                        return 0;
                if (__predict_true(atomic_load_relaxed(&l->l_mutex) == old))
                        return 1;
                mutex_spin_exit(old);
        }
}

void
lwp_unsleep(lwp_t *l, bool unlock)
{

        KASSERT(mutex_owned(l->l_mutex));
        (*l->l_syncobj->sobj_unsleep)(l, unlock);
}

/*
 * Handle exceptions for mi_userret().  Called if a member of LW_USERRET is
 * set.
 */
void
lwp_userret(struct lwp *l)
{
        struct proc *p;
        int sig;

        KASSERT(l == curlwp);
        KASSERT(l->l_stat == LSONPROC);
        p = l->l_proc;

        /*
         * It is safe to do this read unlocked on a MP system..
         */
        while ((l->l_flag & LW_USERRET) != 0) {
                /*
                 * Process pending signals first, unless the process
                 * is dumping core or exiting, where we will instead
                 * enter the LW_WSUSPEND case below.
                 */
                if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) ==
                    LW_PENDSIG) {
                        mutex_enter(p->p_lock);
                        while ((sig = issignal(l)) != 0)
                                postsig(sig);
                        mutex_exit(p->p_lock);
                }

                /*
                 * Core-dump or suspend pending.
                 *
                 * In case of core dump, suspend ourselves, so that the kernel
                 * stack and therefore the userland registers saved in the
                 * trapframe are around for coredump() to write them out.
                 * We also need to save any PCU resources that we have so that
                 * they accessible for coredump().  We issue a wakeup on
                 * p->p_lwpcv so that sigexit() will write the core file out
                 * once all other LWPs are suspended.  
                 */
                if ((l->l_flag & LW_WSUSPEND) != 0) {
                        pcu_save_all(l);
                        mutex_enter(p->p_lock);
                        p->p_nrlwps--;
                        cv_broadcast(&p->p_lwpcv);
                        lwp_lock(l);
                        l->l_stat = LSSUSPENDED;
                        lwp_unlock(l);
                        mutex_exit(p->p_lock);
                        lwp_lock(l);
                        spc_lock(l->l_cpu);
                        mi_switch(l);
                }

                /* Process is exiting. */
                if ((l->l_flag & LW_WEXIT) != 0) {
                        lwp_exit(l);
                        KASSERT(0);
                        /* NOTREACHED */
                }

                /* update lwpctl processor (for vfork child_return) */
                if (l->l_flag & LW_LWPCTL) {
                        lwp_lock(l);
                        KASSERT(kpreempt_disabled());
                        l->l_lwpctl->lc_curcpu = (int)cpu_index(l->l_cpu);
                        l->l_lwpctl->lc_pctr++;
                        l->l_flag &= ~LW_LWPCTL;
                        lwp_unlock(l);
                }
        }
}

/*
 * Force an LWP to enter the kernel, to take a trip through lwp_userret().
 */
void
lwp_need_userret(struct lwp *l)
{

        KASSERT(!cpu_intr_p());
        KASSERT(lwp_locked(l, NULL));

        /*
         * If the LWP is in any state other than LSONPROC, we know that it
         * is executing in-kernel and will hit userret() on the way out. 
         *
         * If the LWP is curlwp, then we know we'll be back out to userspace
         * soon (can't be called from a hardware interrupt here).
         *
         * Otherwise, we can't be sure what the LWP is doing, so first make
         * sure the update to l_flag will be globally visible, and then
         * force the LWP to take a trip through trap() where it will do
         * userret().
         */
        if (l->l_stat == LSONPROC && l != curlwp) {
                membar_producer();
                cpu_signotify(l);
        }
}

/*
 * Add one reference to an LWP.  This will prevent the LWP from
 * exiting, thus keep the lwp structure and PCB around to inspect.
 */
void
lwp_addref(struct lwp *l)
{
        KASSERT(mutex_owned(l->l_proc->p_lock));
        KASSERT(l->l_stat != LSZOMB);
        l->l_refcnt++;
}

/*
 * Remove one reference to an LWP.  If this is the last reference,
 * then we must finalize the LWP's death.
 */
void
lwp_delref(struct lwp *l)
{
        struct proc *p = l->l_proc;

        mutex_enter(p->p_lock);
        lwp_delref2(l);
        mutex_exit(p->p_lock);
}

/*
 * Remove one reference to an LWP.  If this is the last reference,
 * then we must finalize the LWP's death.  The proc mutex is held
 * on entry.
 */
void
lwp_delref2(struct lwp *l)
{
        struct proc *p = l->l_proc;

        KASSERT(mutex_owned(p->p_lock));
        KASSERT(l->l_stat != LSZOMB);
        KASSERT(l->l_refcnt > 0);

        if (--l->l_refcnt == 0)
                cv_broadcast(&p->p_lwpcv);
}

/*
 * Drain all references to the current LWP.  Returns true if
 * we blocked.
 */
bool
lwp_drainrefs(struct lwp *l)
{
        struct proc *p = l->l_proc;
        bool rv = false;

        KASSERT(mutex_owned(p->p_lock));

        l->l_prflag |= LPR_DRAINING;

        while (l->l_refcnt > 0) {
                rv = true;
                cv_wait(&p->p_lwpcv, p->p_lock);
        }
        return rv;
}

/*
 * Return true if the specified LWP is 'alive'.  Only p->p_lock need
 * be held.
 */
bool
lwp_alive(lwp_t *l)
{

        KASSERT(mutex_owned(l->l_proc->p_lock));

        switch (l->l_stat) {
        case LSSLEEP:
        case LSRUN:
        case LSONPROC:
        case LSSTOP:
        case LSSUSPENDED:
                return true;
        default:
                return false;
        }
}

/*
 * Return first live LWP in the process.
 */
lwp_t *
lwp_find_first(proc_t *p)
{
        lwp_t *l;

        KASSERT(mutex_owned(p->p_lock));

        LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                if (lwp_alive(l)) {
                        return l;
                }
        }

        return NULL;
}

/*
 * Allocate a new lwpctl structure for a user LWP.
 */
int
lwp_ctl_alloc(vaddr_t *uaddr)
{
        lcproc_t *lp;
        u_int bit, i, offset;
        struct uvm_object *uao;
        int error;
        lcpage_t *lcp;
        proc_t *p;
        lwp_t *l;

        l = curlwp;
        p = l->l_proc;

        /* don't allow a vforked process to create lwp ctls */
        if (p->p_lflag & PL_PPWAIT)
                return EBUSY;

        if (l->l_lcpage != NULL) {
                lcp = l->l_lcpage;
                *uaddr = lcp->lcp_uaddr + (vaddr_t)l->l_lwpctl - lcp->lcp_kaddr;
                return 0;
        }

        /* First time around, allocate header structure for the process. */
        if ((lp = p->p_lwpctl) == NULL) {
                lp = kmem_alloc(sizeof(*lp), KM_SLEEP);
                mutex_init(&lp->lp_lock, MUTEX_DEFAULT, IPL_NONE);
                lp->lp_uao = NULL;
                TAILQ_INIT(&lp->lp_pages);
                mutex_enter(p->p_lock);
                if (p->p_lwpctl == NULL) {
                        p->p_lwpctl = lp;
                        mutex_exit(p->p_lock);
                } else {
                        mutex_exit(p->p_lock);
                        mutex_destroy(&lp->lp_lock);
                        kmem_free(lp, sizeof(*lp));
                        lp = p->p_lwpctl;
                }
        }

         /*
          * Set up an anonymous memory region to hold the shared pages.
          * Map them into the process' address space.  The user vmspace
          * gets the first reference on the UAO.
          */
        mutex_enter(&lp->lp_lock);
        if (lp->lp_uao == NULL) {
                lp->lp_uao = uao_create(LWPCTL_UAREA_SZ, 0);
                lp->lp_cur = 0;
                lp->lp_max = LWPCTL_UAREA_SZ;
                lp->lp_uva = p->p_emul->e_vm_default_addr(p,
                     (vaddr_t)p->p_vmspace->vm_daddr, LWPCTL_UAREA_SZ,
                     p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
                error = uvm_map(&p->p_vmspace->vm_map, &lp->lp_uva,
                    LWPCTL_UAREA_SZ, lp->lp_uao, 0, 0, UVM_MAPFLAG(UVM_PROT_RW,
                    UVM_PROT_RW, UVM_INH_NONE, UVM_ADV_NORMAL, 0));
                if (error != 0) {
                        uao_detach(lp->lp_uao);
                        lp->lp_uao = NULL;
                        mutex_exit(&lp->lp_lock);
                        return error;
                }
        }

        /* Get a free block and allocate for this LWP. */
        TAILQ_FOREACH(lcp, &lp->lp_pages, lcp_chain) {
                if (lcp->lcp_nfree != 0)
                        break;
        }
        if (lcp == NULL) {
                /* Nothing available - try to set up a free page. */
                if (lp->lp_cur == lp->lp_max) {
                        mutex_exit(&lp->lp_lock);
                        return ENOMEM;
                }
                lcp = kmem_alloc(LWPCTL_LCPAGE_SZ, KM_SLEEP);

                /*
                 * Wire the next page down in kernel space.  Since this
                 * is a new mapping, we must add a reference.
                 */
                uao = lp->lp_uao;
                (*uao->pgops->pgo_reference)(uao);
                lcp->lcp_kaddr = vm_map_min(kernel_map);
                error = uvm_map(kernel_map, &lcp->lcp_kaddr, PAGE_SIZE,
                    uao, lp->lp_cur, PAGE_SIZE,
                    UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
                    UVM_INH_NONE, UVM_ADV_RANDOM, 0));
                if (error != 0) {
                        mutex_exit(&lp->lp_lock);
                        kmem_free(lcp, LWPCTL_LCPAGE_SZ);
                        (*uao->pgops->pgo_detach)(uao);
                        return error;
                }
                error = uvm_map_pageable(kernel_map, lcp->lcp_kaddr,
                    lcp->lcp_kaddr + PAGE_SIZE, FALSE, 0);
                if (error != 0) {
                        mutex_exit(&lp->lp_lock);
                        uvm_unmap(kernel_map, lcp->lcp_kaddr,
                            lcp->lcp_kaddr + PAGE_SIZE);
                        kmem_free(lcp, LWPCTL_LCPAGE_SZ);
                        return error;
                }
                /* Prepare the page descriptor and link into the list. */
                lcp->lcp_uaddr = lp->lp_uva + lp->lp_cur;
                lp->lp_cur += PAGE_SIZE;
                lcp->lcp_nfree = LWPCTL_PER_PAGE;
                lcp->lcp_rotor = 0;
                memset(lcp->lcp_bitmap, 0xff, LWPCTL_BITMAP_SZ);
                TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain);
        }
        for (i = lcp->lcp_rotor; lcp->lcp_bitmap[i] == 0;) {
                if (++i >= LWPCTL_BITMAP_ENTRIES)
                        i = 0;
        }
        bit = ffs(lcp->lcp_bitmap[i]) - 1;
        lcp->lcp_bitmap[i] ^= (1U << bit);
        lcp->lcp_rotor = i;
        lcp->lcp_nfree--;
        l->l_lcpage = lcp;
        offset = (i << 5) + bit;
        l->l_lwpctl = (lwpctl_t *)lcp->lcp_kaddr + offset;
        *uaddr = lcp->lcp_uaddr + offset * sizeof(lwpctl_t);
        mutex_exit(&lp->lp_lock);

        KPREEMPT_DISABLE(l);
        l->l_lwpctl->lc_curcpu = (int)cpu_index(curcpu());
        KPREEMPT_ENABLE(l);

        return 0;
}

/*
 * Free an lwpctl structure back to the per-process list.
 */
void
lwp_ctl_free(lwp_t *l)
{
        struct proc *p = l->l_proc;
        lcproc_t *lp;
        lcpage_t *lcp;
        u_int map, offset;

        /* don't free a lwp context we borrowed for vfork */
        if (p->p_lflag & PL_PPWAIT) {
                l->l_lwpctl = NULL;
                return;
        }

        lp = p->p_lwpctl;
        KASSERT(lp != NULL);

        lcp = l->l_lcpage;
        offset = (u_int)((lwpctl_t *)l->l_lwpctl - (lwpctl_t *)lcp->lcp_kaddr);
        KASSERT(offset < LWPCTL_PER_PAGE);

        mutex_enter(&lp->lp_lock);
        lcp->lcp_nfree++;
        map = offset >> 5;
        lcp->lcp_bitmap[map] |= (1U << (offset & 31));
        if (lcp->lcp_bitmap[lcp->lcp_rotor] == 0)
                lcp->lcp_rotor = map;
        if (TAILQ_FIRST(&lp->lp_pages)->lcp_nfree == 0) {
                TAILQ_REMOVE(&lp->lp_pages, lcp, lcp_chain);
                TAILQ_INSERT_HEAD(&lp->lp_pages, lcp, lcp_chain);
        }
        mutex_exit(&lp->lp_lock);
}

/*
 * Process is exiting; tear down lwpctl state.  This can only be safely
 * called by the last LWP in the process.
 */
void
lwp_ctl_exit(void)
{
        lcpage_t *lcp, *next;
        lcproc_t *lp;
        proc_t *p;
        lwp_t *l;

        l = curlwp;
        l->l_lwpctl = NULL;
        l->l_lcpage = NULL;
        p = l->l_proc;
        lp = p->p_lwpctl;

        KASSERT(lp != NULL);
        KASSERT(p->p_nlwps == 1);

        for (lcp = TAILQ_FIRST(&lp->lp_pages); lcp != NULL; lcp = next) {
                next = TAILQ_NEXT(lcp, lcp_chain);
                uvm_unmap(kernel_map, lcp->lcp_kaddr,
                    lcp->lcp_kaddr + PAGE_SIZE);
                kmem_free(lcp, LWPCTL_LCPAGE_SZ);
        }

        if (lp->lp_uao != NULL) {
                uvm_unmap(&p->p_vmspace->vm_map, lp->lp_uva,
                    lp->lp_uva + LWPCTL_UAREA_SZ);
        }

        mutex_destroy(&lp->lp_lock);
        kmem_free(lp, sizeof(*lp));
        p->p_lwpctl = NULL;
}

/*
 * Return the current LWP's "preemption counter".  Used to detect
 * preemption across operations that can tolerate preemption without
 * crashing, but which may generate incorrect results if preempted.
 */
uint64_t
lwp_pctr(void)
{

        return curlwp->l_ncsw;
}

/*
 * Set an LWP's private data pointer.
 */
int
lwp_setprivate(struct lwp *l, void *ptr)
{
        int error = 0;

        l->l_private = ptr;
#ifdef __HAVE_CPU_LWP_SETPRIVATE
        error = cpu_lwp_setprivate(l, ptr);
#endif
        return error;
}

/*
 * Perform any thread-related cleanup on LWP exit.
 * N.B. l->l_proc->p_lock must be HELD on entry but will
 * be released before returning!
 */
void
lwp_thread_cleanup(struct lwp *l)
{

        KASSERT(mutex_owned(l->l_proc->p_lock));
        mutex_exit(l->l_proc->p_lock);

        /*
         * If the LWP has robust futexes, release them all
         * now.
         */
        if (__predict_false(l->l_robust_head != 0)) {
                futex_release_all_lwp(l);
        }
}

#if defined(DDB)
#include <machine/pcb.h>

void
lwp_whatis(uintptr_t addr, void (*pr)(const char *, ...))
{
        lwp_t *l;

        LIST_FOREACH(l, &alllwp, l_list) {
                uintptr_t stack = (uintptr_t)KSTACK_LOWEST_ADDR(l);

                if (addr < stack || stack + KSTACK_SIZE <= addr) {
                        continue;
                }
                (*pr)("%p is %p+%zu, LWP %p's stack\n",
                    (void *)addr, (void *)stack,
                    (size_t)(addr - stack), l);
        }
}
#endif /* defined(DDB) */









































































































































































































   78 








   13 
   13 










  124 








   77 
   77 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
/*        $NetBSD: dtrace_bsd.h,v 1.9 2018/04/19 21:19:07 christos Exp $        */

/*-
 * Copyright (c) 2007-2008 John Birrell (jb@freebsd.org)
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD: src/sys/sys/dtrace_bsd.h,v 1.3.2.1 2009/08/03 08:13:06 kensmith Exp $
 *
 * This file contains BSD shims for Sun's DTrace code.
 */

#ifndef _SYS_DTRACE_BSD_H
#define        _SYS_DTRACE_BSD_H

#if defined(_KERNEL_OPT)
#include "opt_dtrace.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/proc.h>

/* Forward definitions: */
struct mbuf;
struct trapframe;
struct lwp;
struct vattr;
struct vnode;
struct ucred;

/*
 * Cyclic clock function type definition used to hook the cyclic
 * subsystem into the appropriate timer interrupt.
 */
typedef        void (*cyclic_clock_func_t)(struct clockframe *);
extern cyclic_clock_func_t        cyclic_clock_func[];

/*
 * The dtrace module handles traps that occur during a DTrace probe.
 * This type definition is used in the trap handler to provide a
 * hook for the dtrace module to register its handler with.
 */
typedef int (*dtrace_trap_func_t)(struct trapframe *, u_int);

int        dtrace_trap(struct trapframe *, u_int);

extern dtrace_trap_func_t        dtrace_trap_func;

/* Used by the machine dependent trap() code. */
typedef        int (*dtrace_invop_func_t)(uintptr_t, uintptr_t *, uintptr_t);
typedef void (*dtrace_doubletrap_func_t)(void);

/* Global variables in trap.c */
extern        dtrace_invop_func_t        dtrace_invop_func;
extern        dtrace_doubletrap_func_t        dtrace_doubletrap_func;

/* Virtual time hook function type. */
typedef        void (*dtrace_vtime_switch_func_t)(struct lwp *);

extern int                        dtrace_vtime_active;
extern dtrace_vtime_switch_func_t        dtrace_vtime_switch_func;

/* The fasttrap module hooks into the fork, exit and exit. */
typedef void (*dtrace_fork_func_t)(struct proc *, struct proc *);
typedef void (*dtrace_execexit_func_t)(struct proc *);

/* Global variable in kern_fork.c */
extern dtrace_fork_func_t        dtrace_fasttrap_fork;

/* Global variable in kern_exec.c */
extern dtrace_execexit_func_t        dtrace_fasttrap_exec;

/* Global variable in kern_exit.c */
extern dtrace_execexit_func_t        dtrace_fasttrap_exit;

/* The dtmalloc provider hooks into malloc. */
typedef        void (*dtrace_malloc_probe_func_t)(u_int32_t, uintptr_t arg0,
    uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4);

extern dtrace_malloc_probe_func_t   dtrace_malloc_probe;

/* dtnfsclient NFSv3 access cache provider hooks. */
typedef void (*dtrace_nfsclient_accesscache_flush_probe_func_t)(uint32_t,
    struct vnode *);
extern dtrace_nfsclient_accesscache_flush_probe_func_t
    dtrace_nfsclient_accesscache_flush_done_probe;

typedef void (*dtrace_nfsclient_accesscache_get_probe_func_t)(uint32_t,
    struct vnode *, uid_t, uint32_t);
extern dtrace_nfsclient_accesscache_get_probe_func_t
    dtrace_nfsclient_accesscache_get_hit_probe,
    dtrace_nfsclient_accesscache_get_miss_probe;

typedef void (*dtrace_nfsclient_accesscache_load_probe_func_t)(uint32_t,
    struct vnode *, uid_t, uint32_t, int);
extern dtrace_nfsclient_accesscache_load_probe_func_t
    dtrace_nfsclient_accesscache_load_done_probe;

/* dtnfsclient NFSv[23] attribute cache provider hooks. */
typedef void (*dtrace_nfsclient_attrcache_flush_probe_func_t)(uint32_t,
    struct vnode *);
extern dtrace_nfsclient_attrcache_flush_probe_func_t
    dtrace_nfsclient_attrcache_flush_done_probe;

typedef void (*dtrace_nfsclient_attrcache_get_hit_probe_func_t)(uint32_t,
    struct vnode *, struct vattr *);
extern dtrace_nfsclient_attrcache_get_hit_probe_func_t
    dtrace_nfsclient_attrcache_get_hit_probe;

typedef void (*dtrace_nfsclient_attrcache_get_miss_probe_func_t)(uint32_t,
    struct vnode *);
extern dtrace_nfsclient_attrcache_get_miss_probe_func_t
    dtrace_nfsclient_attrcache_get_miss_probe;

typedef void (*dtrace_nfsclient_attrcache_load_probe_func_t)(uint32_t,
    struct vnode *, struct vattr *, int);
extern dtrace_nfsclient_attrcache_load_probe_func_t
    dtrace_nfsclient_attrcache_load_done_probe;

/* dtnfsclient NFSv[23] RPC provider hooks. */
typedef void (*dtrace_nfsclient_nfs23_start_probe_func_t)(uint32_t,
    struct vnode *, struct mbuf *, struct ucred *, int);
extern dtrace_nfsclient_nfs23_start_probe_func_t
    dtrace_nfsclient_nfs23_start_probe;

typedef void (*dtrace_nfsclient_nfs23_done_probe_func_t)(uint32_t,
    struct vnode *, struct mbuf *, struct ucred *, int, int);
extern dtrace_nfsclient_nfs23_done_probe_func_t
    dtrace_nfsclient_nfs23_done_probe;

/*
 * OpenSolaris compatible time functions returning nanoseconds.
 * On OpenSolaris these return hrtime_t which we define as uint64_t.
 */
uint64_t        dtrace_gethrtime(void);
uint64_t        dtrace_gethrestime(void);

/* sizes based on DTrace structure requirements */
#define KDTRACE_PROC_SIZE        64
#define KDTRACE_PROC_ZERO        8
#define        KDTRACE_THREAD_SIZE        256
#define        KDTRACE_THREAD_ZERO        64

/*
 * Functions for managing the opaque DTrace memory areas for 
 * processes and lwps.
 */

static __inline size_t        kdtrace_proc_size(void);
static __inline void kdtrace_proc_ctor(void *, struct proc *);
static __inline void kdtrace_proc_dtor(void *, struct proc *);
static __inline size_t        kdtrace_thread_size(void);
static __inline void kdtrace_thread_ctor(void *, struct lwp *);
static __inline void kdtrace_thread_dtor(void *, struct lwp *);


/* Return the DTrace process data size compiled in the kernel hooks. */
static __inline size_t
kdtrace_proc_size(void)
{

        return KDTRACE_PROC_SIZE;
}

/* Return the DTrace thread data size compiled in the kernel hooks. */
static __inline size_t
kdtrace_thread_size(void)
{

        return KDTRACE_THREAD_SIZE;
}

static __inline void
kdtrace_proc_ctor(void *arg, struct proc *p)
{

#ifdef KDTRACE_HOOKS
        p->p_dtrace = kmem_zalloc(KDTRACE_PROC_SIZE, KM_SLEEP);
#endif
}

static __inline void
kdtrace_proc_dtor(void *arg, struct proc *p)
{

#ifdef KDTRACE_HOOKS
        if (p->p_dtrace != NULL) {
                kmem_free(p->p_dtrace, KDTRACE_PROC_SIZE);
                p->p_dtrace = NULL;
        }
#endif
}

static __inline void
kdtrace_thread_ctor(void *arg, struct lwp *l)
{

#ifdef KDTRACE_HOOKS
        l->l_dtrace = kmem_zalloc(KDTRACE_THREAD_SIZE, KM_SLEEP);
#endif
}

static __inline void
kdtrace_thread_dtor(void *arg, struct lwp *l)
{

#ifdef KDTRACE_HOOKS
        if (l->l_dtrace != NULL) {
                kmem_free(l->l_dtrace, KDTRACE_THREAD_SIZE);
                l->l_dtrace = NULL;
        }
#endif
}

#endif /* _SYS_DTRACE_BSD_H */































































    1 






    1 





















    3 

    3 

    3 












    3 




















    3 































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
/* $NetBSD: cpu_ucode.c,v 1.13 2020/04/25 15:26:18 bouyer Exp $ */
/*
 * Copyright (c) 2012 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christoph Egger.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: cpu_ucode.c,v 1.13 2020/04/25 15:26:18 bouyer Exp $");

#if defined(_KERNEL_OPT)
#include "opt_cpu_ucode.h"
#include "opt_xen.h"
#endif

#include <sys/param.h>
#include <sys/cpuio.h>
#include <sys/cpu.h>

#include <dev/firmload.h>

#include <machine/cpuvar.h>
#include <machine/cputypes.h>

#include <x86/cpu_ucode.h>

#ifdef XEN
#include <xen/include/public/xen.h>
#include <xen/hypervisor.h>
#endif

static struct cpu_ucode_softc ucode_softc;

int
cpu_ucode_get_version(struct cpu_ucode_version *data)
{
        union {
                struct cpu_ucode_version_amd a;
                struct cpu_ucode_version_intel1 i;
        } v;
        size_t l;
        int error;

        if (!data->data)
                return 0;

        switch (cpu_vendor) {
        case CPUVENDOR_AMD:
                l = sizeof(v.a);
                error = cpu_ucode_amd_get_version(data, &v, l);
                break;
        case CPUVENDOR_INTEL:
                l = sizeof(v.i);
                error = cpu_ucode_intel_get_version(data, &v, l);
                break;
        default:
                return EOPNOTSUPP;
        }

        if (error)
                return error;

        return copyout(&v, data->data, l);
}

int
cpu_ucode_md_open(firmware_handle_t *fwh, int loader_version, const char *fwname)
{
        switch (cpu_vendor) {
        case CPUVENDOR_AMD:
                return cpu_ucode_amd_firmware_open(fwh, fwname);
        case CPUVENDOR_INTEL:
                return cpu_ucode_intel_firmware_open(fwh, fwname);
        default:
                return EOPNOTSUPP;
        }
}

#ifndef XENPV
int
cpu_ucode_apply(const struct cpu_ucode *data)
{
        struct cpu_ucode_softc *sc = &ucode_softc;
        int error;

        sc->loader_version = data->loader_version;

        error = cpu_ucode_load(sc, data->fwname);
        if (error)
                return error;

        switch (cpu_vendor) {
        case CPUVENDOR_AMD:
                error = cpu_ucode_amd_apply(sc, data->cpu_nr);
                break;
        case CPUVENDOR_INTEL:
                error = cpu_ucode_intel_apply(sc, data->cpu_nr);
                break;
        default:
                error = EOPNOTSUPP;
        }

        if (sc->sc_blob != NULL)
                firmware_free(sc->sc_blob, sc->sc_blobsize);
        sc->sc_blob = NULL;
        sc->sc_blobsize = 0;
        return error;
}
#else
int
cpu_ucode_apply(const struct cpu_ucode *data)
{
        struct cpu_ucode_softc *sc = &ucode_softc;
        struct xen_platform_op op;
        int error;

        /* Xen updates all??? */
        if (data->cpu_nr != CPU_UCODE_ALL_CPUS)
                return EOPNOTSUPP;

        sc->loader_version = data->loader_version;
        error = cpu_ucode_load(sc, data->fwname);
        if (error)
                return error;

        op.cmd = XENPF_microcode_update;
        set_xen_guest_handle(op.u.microcode.data, sc->sc_blob);
        op.u.microcode.length = sc->sc_blobsize;

        error = -HYPERVISOR_platform_op(&op);

        if (sc->sc_blob)
                firmware_free(sc->sc_blob, sc->sc_blobsize);
        sc->sc_blob = NULL;
        sc->sc_blobsize = 0;
        return error;
}
#endif /* XEN */
























































































  460 











  365 
   23 


























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
/*        $NetBSD: ufs_wapbl.h,v 1.19 2020/04/11 17:43:54 jdolecek Exp $        */

/*-
 * Copyright (c) 2003,2006,2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */


#ifndef _UFS_UFS_UFS_WAPBL_H_
#define        _UFS_UFS_UFS_WAPBL_H_

#if defined(_KERNEL_OPT)
#include "opt_wapbl.h"
#endif

/*
 * Information for the journal location stored in the superblock.
 * We store the journal version, some flags, the journal location
 * type, and some location specific "locators" that identify where
 * the log itself is located.
 */

/* fs->fs_journal_version */
#define        UFS_WAPBL_VERSION                        1

/* fs->fs_journal_location */
#define        UFS_WAPBL_JOURNALLOC_NONE                0

#define        UFS_WAPBL_JOURNALLOC_END_PARTITION        1
#define         UFS_WAPBL_EPART_ADDR                          0 /* locator slots */
#define         UFS_WAPBL_EPART_COUNT                          1
#define         UFS_WAPBL_EPART_BLKSZ                          2
#define         UFS_WAPBL_EPART_UNUSED                          3

#define        UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM        2
#define         UFS_WAPBL_INFS_ADDR                          0 /* locator slots */
#define         UFS_WAPBL_INFS_COUNT                          1
#define         UFS_WAPBL_INFS_BLKSZ                          2
#define         UFS_WAPBL_INFS_INO                          3

/* fs->fs_journal_flags */
#define        UFS_WAPBL_FLAGS_CREATE_LOG                0x1
#define        UFS_WAPBL_FLAGS_CLEAR_LOG                0x2


/*
 * The journal size is limited to between 1MB and 64MB.
 * The default journal size is the filesystem size divided by
 * the scale factor - this is 1M of journal per 1GB of filesystem
 * space.
 *
 * XXX: Is 64MB too limiting?  If user explicitly asks for more, allow it?
 */
#define        UFS_WAPBL_JOURNAL_SCALE                        1024
#define        UFS_WAPBL_MIN_JOURNAL_SIZE                (1024 * 1024)
#define        UFS_WAPBL_MAX_JOURNAL_SIZE                (64 * 1024 * 1024)


#if defined(WAPBL)

static __inline int
ufs_wapbl_begin(struct mount *mp, const char *file, int line)
{
        if (mp->mnt_wapbl) {
                int error;
                error = wapbl_begin(mp->mnt_wapbl, file, line);
                if (error)
                        return error;
        }
        return 0;
}

static __inline void
ufs_wapbl_end(struct mount *mp)
{
        if (mp->mnt_wapbl) {
                wapbl_end(mp->mnt_wapbl);
        }
}

#define        UFS_WAPBL_BEGIN(mp)                                                \
        ufs_wapbl_begin(mp, __func__, __LINE__)
#define        UFS_WAPBL_END(mp) ufs_wapbl_end(mp)

#define        UFS_WAPBL_UPDATE(vp, access, modify, flags)                        \
        if ((vp)->v_mount->mnt_wapbl) {                                        \
                UFS_UPDATE(vp, access, modify, flags);                        \
        }

#ifdef DIAGNOSTIC
#define        UFS_WAPBL_JLOCK_ASSERT(mp)                                        \
        if (mp->mnt_wapbl) wapbl_jlock_assert(mp->mnt_wapbl)
#define        UFS_WAPBL_JUNLOCK_ASSERT(mp)                                        \
        if (mp->mnt_wapbl) wapbl_junlock_assert(mp->mnt_wapbl)
#else
#define        UFS_WAPBL_JLOCK_ASSERT(mp)
#define UFS_WAPBL_JUNLOCK_ASSERT(mp)
#endif

#define        UFS_WAPBL_REGISTER_INODE(mp, ino, mode)                                \
        if (mp->mnt_wapbl) wapbl_register_inode(mp->mnt_wapbl, ino, mode)
#define        UFS_WAPBL_UNREGISTER_INODE(mp, ino, mode)                        \
        if (mp->mnt_wapbl) wapbl_unregister_inode(mp->mnt_wapbl, ino, mode)

#define        UFS_WAPBL_REGISTER_DEALLOCATION(mp, blk, len, cookiep)                \
        (mp->mnt_wapbl)                                                        \
            ? wapbl_register_deallocation(mp->mnt_wapbl, blk, len,        \
                false, cookiep)                                                \
            : 0

#define        UFS_WAPBL_REGISTER_DEALLOCATION_FORCE(mp, blk, len)                \
        (                                                                \
          (mp->mnt_wapbl)                                                \
            ? wapbl_register_deallocation(mp->mnt_wapbl, blk, len,        \
                true, NULL)                                                \
            : 0                                                                \
        )

#define        UFS_WAPBL_UNREGISTER_DEALLOCATION(mp, cookie)                        \
        if (mp->mnt_wapbl) wapbl_unregister_deallocation(mp->mnt_wapbl, cookie)

#else /* ! WAPBL */
#define        UFS_WAPBL_BEGIN(mp) (__USE(mp), 0)
#define        UFS_WAPBL_END(mp)        do { } while (0)
#define        UFS_WAPBL_UPDATE(vp, access, modify, flags)        do { } while (0)
#define        UFS_WAPBL_JLOCK_ASSERT(mp)
#define        UFS_WAPBL_JUNLOCK_ASSERT(mp)
#define        UFS_WAPBL_REGISTER_INODE(mp, ino, mode)                do { } while (0)
#define        UFS_WAPBL_UNREGISTER_INODE(mp, ino, mode)        do { } while (0)
#define        UFS_WAPBL_REGISTER_DEALLOCATION(mp, blk, len, cookiep)                0
#define        UFS_WAPBL_REGISTER_DEALLOCATION_FORCE(mp, blk, len)                0
#define        UFS_WAPBL_UNREGISTER_DEALLOCATION(mp, cookie)        do { } while (0)
#endif

#endif /* !_UFS_UFS_UFS_WAPBL_H_ */














































































































































































































































































































































  402 

   80 







  358 





 1633 





  900 
  809 






    1 





  602 
  181 





    7 
    1 






    8 





   38 
   38 



















  422 






  389 
  388 





  219 
  377 










































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
/*        $NetBSD: ktrace.h,v 1.68 2022/06/29 22:10:43 riastradh Exp $        */

/*
 * Copyright (c) 1988, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ktrace.h        8.2 (Berkeley) 2/19/95
 */

#ifndef _SYS_KTRACE_H_
#define _SYS_KTRACE_H_

#include <sys/mutex.h>
#include <sys/lwp.h>
#include <sys/signal.h>
#include <sys/time.h>
#include <sys/uio.h>

/*
 * operations to ktrace system call  (KTROP(op))
 */
#define KTROP_SET                0        /* set trace points */
#define KTROP_CLEAR                1        /* clear trace points */
#define KTROP_CLEARFILE                2        /* stop all tracing to file */
#define        KTROP_MASK                0x3
#define        KTROP(o)                ((o)&KTROP_MASK) /* macro to extract operation */
/*
 * flags (ORed in with operation)
 */
#define KTRFLAG_DESCEND                4        /* perform op on all children too */

/*
 * ktrace record header
 */
struct ktr_header {
        int        ktr_len;                /* length of record minus length of old header */
#if BYTE_ORDER == LITTLE_ENDIAN
        short        ktr_type;                /* trace record type */
        short        ktr_version;                /* trace record version */
#else
        short        ktr_version;                /* trace record version */
        short        ktr_type;                /* trace record type */
#endif
        pid_t        ktr_pid;                /* process id */
        char        ktr_comm[MAXCOMLEN+1];        /* command name */
        union {
                struct { /* v0 */
                        struct {
                                int32_t tv_sec;
                                long tv_usec;
                        } _tv;
                        const void *_buf;
                } _v0;
                struct { /* v1 */
                        struct {
                                int32_t tv_sec;
                                long tv_nsec;
                        } _ts;
                        lwpid_t _lid;
                } _v1;
                struct { /* v2 */
                        struct timespec _ts;
                        lwpid_t _lid;
                } _v2;
        } _v;
};

#define ktr_lid                _v._v2._lid
#define ktr_olid        _v._v1._lid
#define ktr_time        _v._v2._ts
#define ktr_otv                _v._v0._tv
#define ktr_ots                _v._v1._ts
#define ktr_ts                _v._v2._ts
#define ktr_unused        _v._v0._buf

#define        KTR_SHIMLEN        offsetof(struct ktr_header, ktr_pid)

/*
 * Test for kernel trace point
 */
#define KTRPOINT(p, type)        \
        (((p)->p_traceflag & (1<<(type))) != 0)

/*
 * ktrace record types
 */

/*
 * KTR_SYSCALL - system call record
 */
#define KTR_SYSCALL        1
struct ktr_syscall {
        int        ktr_code;                /* syscall number */
        int        ktr_argsize;                /* size of arguments */
        /*
         * followed by ktr_argsize/sizeof(register_t) "register_t"s
         */
};

/*
 * KTR_SYSRET - return from system call record
 */
#define KTR_SYSRET        2
struct ktr_sysret {
        short        ktr_code;
        short        ktr_eosys;                /* XXX unused */
        int        ktr_error;
        __register_t ktr_retval;
        __register_t ktr_retval_1;
};

/*
 * KTR_NAMEI - namei record
 */
#define KTR_NAMEI        3
        /* record contains pathname */

/*
 * KTR_GENIO - trace generic process i/o
 */
#define KTR_GENIO        4
struct ktr_genio {
        int        ktr_fd;
        enum        uio_rw ktr_rw;
        /*
         * followed by data successfully read/written
         */
};

/*
 * KTR_PSIG - trace processed signal
 */
#define        KTR_PSIG        5
struct ktr_psig {
        int        signo;
        sig_t        action;
        sigset_t mask;
        int        code;
        /*
         * followed by optional siginfo_t
         */
};

/*
 * KTR_CSW - trace context switches
 */
#define KTR_CSW                6
struct ktr_csw {
        int        out;        /* 1 if switch out, 0 if switch in */
        int        user;        /* 1 if usermode (ivcsw), 0 if kernel (vcsw) */
};

/*
 * KTR_EMUL - emulation change
 */
#define KTR_EMUL        7
        /* record contains emulation name */

/*
 * KTR_USER - user record
 */
#define        KTR_USER        8
#define KTR_USER_MAXIDLEN        20
#define KTR_USER_MAXLEN                2048        /* maximum length of passed data */
struct ktr_user {
        char         ktr_id[KTR_USER_MAXIDLEN];        /* string id of caller */
        /*
         * Followed by ktr_len - sizeof(struct ktr_user) of user data.
         */
};

/*
 * KTR_EXEC_ARG, KTR_EXEC_ENV - Arguments and environment from exec
 */
#define KTR_EXEC_ARG                10
#define KTR_EXEC_ENV                11
        /* record contains arg/env string */

/*
 * KTR_SAUPCALL - scheduler activated upcall.
 *
 * The structure is no longer used, but retained for compatibility.
 */
#define        KTR_SAUPCALL        13
struct ktr_saupcall {
        int ktr_type;
        int ktr_nevent;
        int ktr_nint;
        void *ktr_sas;
        void *ktr_ap;
        /*
         * followed by nevent sa_t's from sas[]
         */
};

/*
 * KTR_MIB - MIB name and data
 */
#define KTR_MIB                14
        /* Record contains MIB name */

/*
 * KTR_EXEC_FD - Opened file descriptor from exec
 */
#define KTR_EXEC_FD                15
struct ktr_execfd {
        int   ktr_fd;
        u_int ktr_dtype; /* one of DTYPE_* constants */
};

/*
 * kernel trace points (in p_traceflag)
 */
#define KTRFAC_MASK        0x00ffffff
#define KTRFAC_SYSCALL        (1<<KTR_SYSCALL)
#define KTRFAC_SYSRET        (1<<KTR_SYSRET)
#define KTRFAC_NAMEI        (1<<KTR_NAMEI)
#define KTRFAC_GENIO        (1<<KTR_GENIO)
#define        KTRFAC_PSIG        (1<<KTR_PSIG)
#define KTRFAC_CSW        (1<<KTR_CSW)
#define KTRFAC_EMUL        (1<<KTR_EMUL)
#define        KTRFAC_USER        (1<<KTR_USER)
#define KTRFAC_EXEC_ARG        (1<<KTR_EXEC_ARG)
#define KTRFAC_EXEC_ENV        (1<<KTR_EXEC_ENV)
#define        KTRFAC_MIB        (1<<KTR_MIB)
#define        KTRFAC_EXEC_FD        (1<<KTR_EXEC_FD)

#define __KTRACE_FLAG_BITS \
    "\177\020" \
    "b\1SYSCALL\0" \
    "b\2SYSRET\0" \
    "b\3NAMEI\0" \
    "b\4GENIO\0" \
    "b\5PSIG\0" \
    "b\6CSW\0" \
    "b\7EMUL\0" \
    "b\10USER\0" \
    "b\12EXEC_ARG\0" \
    "b\13EXEC_ENV\0" \
    "b\15SAUPCALL\0" \
    "b\16MIB\0" \
    "b\17EXEC_FD\0" \
    "f\30\4VERSION\0" \
    "b\36TRC_EMUL\0" \
    "b\37INHERIT\0" \
    "b\40PERSISTENT\0"

/*
 * trace flags (also in p_traceflags)
 */
#define KTRFAC_PERSISTENT        0x80000000        /* persistent trace across sugid
                                                   exec (exclusive) */
#define KTRFAC_INHERIT        0x40000000        /* pass trace flags to children */
#define KTRFAC_TRC_EMUL        0x10000000        /* ktrace KTR_EMUL before next trace */
#define        KTRFAC_VER_MASK        0x0f000000        /* record version mask */
#define        KTRFAC_VER_SHIFT        24        /* record version shift */

#define        KTRFAC_VERSION(tf)        (((tf) & KTRFAC_VER_MASK) >> KTRFAC_VER_SHIFT)

#define        KTRFACv0        (0 << KTRFAC_VER_SHIFT)
#define        KTRFACv1        (1 << KTRFAC_VER_SHIFT)
#define        KTRFACv2        (2 << KTRFAC_VER_SHIFT)

#ifndef        _KERNEL

#include <sys/cdefs.h>

__BEGIN_DECLS
int        ktrace(const char *, int, int, pid_t);
int        fktrace(int, int, int, pid_t);
int        utrace(const char *, void *, size_t);
__END_DECLS

#else

struct syncobj;

void ktrinit(void);
void ktrderef(struct proc *);
void ktradref(struct proc *);

extern kmutex_t ktrace_lock;
extern int ktrace_on;

int ktruser(const char *, void *, size_t, int);
bool ktr_point(int);

void ktr_csw(int, int, const struct syncobj *);
void ktr_emul(void);
void ktr_geniov(int, enum uio_rw, struct iovec *, size_t, int);
void ktr_genio(int, enum uio_rw, const void *, size_t, int);
void ktr_mibio(int, enum uio_rw, const void *, size_t, int);
void ktr_namei(const char *, size_t);
void ktr_namei2(const char *, size_t, const char *, size_t);
void ktr_psig(int, sig_t, const sigset_t *, const ksiginfo_t *);
void ktr_syscall(register_t, const register_t [], int);
void ktr_sysret(register_t, int, register_t *);
void ktr_kuser(const char *, const void *, size_t);
void ktr_mib(const int *a , u_int b);
void ktr_execarg(const void *, size_t);
void ktr_execenv(const void *, size_t);
void ktr_execfd(int, u_int);

int  ktrace_common(lwp_t *, int, int, int, file_t **);

static __inline int
ktrenter(lwp_t *l)
{

        if ((l->l_pflag & LP_KTRACTIVE) != 0)
                return 1;
        l->l_pflag |= LP_KTRACTIVE;
        return 0;
}

static __inline void
ktrexit(lwp_t *l)
{

        l->l_pflag &= ~LP_KTRACTIVE;
}

static __inline bool
ktrpoint(int fac)
{
    return __predict_false(ktrace_on) && __predict_false(ktr_point(1 << fac));
}

static __inline void
ktrcsw(int a, int b, const struct syncobj *c)
{
        if (__predict_false(ktrace_on))
                ktr_csw(a, b, c);
}

static __inline void
ktremul(void)
{
        if (__predict_false(ktrace_on))
                ktr_emul();
}

static __inline void
ktrgenio(int a, enum uio_rw b, const void *c, size_t d, int e)
{
        if (__predict_false(ktrace_on))
                ktr_genio(a, b, c, d, e);
}

static __inline void
ktrgeniov(int a, enum uio_rw b, struct iovec *c, int d, int e)
{
        if (__predict_false(ktrace_on))
                ktr_geniov(a, b, c, d, e);
}

static __inline void
ktrmibio(int a, enum uio_rw b, const void *c, size_t d, int e)
{
        if (__predict_false(ktrace_on))
                ktr_mibio(a, b, c, d, e);
}

static __inline void
ktrnamei(const char *a, size_t b)
{
        if (__predict_false(ktrace_on))
                ktr_namei(a, b);
}

static __inline void
ktrnamei2(const char *a, size_t b, const char *c, size_t d)
{
        if (__predict_false(ktrace_on))
                ktr_namei2(a, b, c, d);
}

static __inline void
ktrpsig(int a, sig_t b, const sigset_t *c, const ksiginfo_t * d)
{
        if (__predict_false(ktrace_on))
                ktr_psig(a, b, c, d);
}

static __inline void
ktrsyscall(register_t code, const register_t args[], int narg)
{
        if (__predict_false(ktrace_on))
                ktr_syscall(code, args, narg);
}

static __inline void
ktrsysret(register_t a, int b, register_t *c)
{
        if (__predict_false(ktrace_on))
                ktr_sysret(a, b, c);
}

static __inline void
ktrkuser(const char *a, const void *b, size_t c)
{
        if (__predict_false(ktrace_on))
                ktr_kuser(a, b, c);
}

static __inline void
ktrmib(const int *a , u_int b)
{
        if (__predict_false(ktrace_on))
                ktr_mib(a, b);
}

static __inline void
ktrexecarg(const void *a, size_t b)
{
        if (__predict_false(ktrace_on))
                ktr_execarg(a, b);
}

static __inline void
ktrexecenv(const void *a, size_t b)
{
        if (__predict_false(ktrace_on))
                ktr_execenv(a, b);
}

static __inline void
ktrexecfd(int fd, u_int dtype)
{
        if (__predict_false(ktrace_on))
                ktr_execfd(fd, dtype);
}

struct ktrace_entry;
int        ktealloc(struct ktrace_entry **, void **, lwp_t *, int, size_t);
void        ktesethdrlen(struct ktrace_entry *, size_t);
void        ktraddentry(lwp_t *, struct ktrace_entry *, int);
/* Flags for ktraddentry (3rd arg) */
#define        KTA_NOWAIT                0x0000
#define        KTA_WAITOK                0x0001
#define        KTA_LARGE                0x0002

#endif        /* !_KERNEL */

#endif /* _SYS_KTRACE_H_ */


























































































































































































    2 

    1 



    2 
























































































































































































































































    1 

    1 

































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
/*        $NetBSD: ulpt.c,v 1.107 2020/06/27 07:29:11 maxv Exp $        */

/*
 * Copyright (c) 1998, 2003 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Printer Class spec: http://www.usb.org/developers/data/devclass/usbprint109.PDF
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ulpt.c,v 1.107 2020/06/27 07:29:11 maxv Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/fcntl.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/uio.h>
#include <sys/conf.h>
#include <sys/vnode.h>
#include <sys/syslog.h>

#include <machine/vmparam.h>        /* PAGE_SIZE */

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usb_quirks.h>

#include "ioconf.h"

#define        TIMEOUT                hz*16        /* wait up to 16 seconds for a ready */
#define        STEP                hz/4

#define        LPTPRI                (PZERO+8)
#define        ULPT_BSIZE        PAGE_SIZE

#define ULPT_READS_PER_SEC 5
/* XXX Why is 10 us a reasonable value? */
#define ULPT_READ_TIMO 10

#ifdef ULPT_DEBUG
#define DPRINTFN(n,x)        if (ulptdebug>=(n)) printf x
int        ulptdebug = 0;
/*
 * The strategy for debug levels is:
 *   1: attach-time operations
 *   2: open/close/status/reset
 *   3: read/write basic
 *   4: read/write details
 *  10: left over from previous debug code
 */
#else
#define DPRINTFN(n,x)
#endif

#define UR_GET_DEVICE_ID 0
#define UR_GET_PORT_STATUS 1
#define UR_SOFT_RESET 2

#define        LPS_NERR                0x08        /* printer no error */
#define        LPS_SELECT                0x10        /* printer selected */
#define        LPS_NOPAPER                0x20        /* printer out of paper */
#define LPS_INVERT      (LPS_SELECT|LPS_NERR)
#define LPS_MASK        (LPS_SELECT|LPS_NERR|LPS_NOPAPER)

struct ulpt_softc {
        device_t sc_dev;
        struct usbd_device *sc_udev;        /* device */
        struct usbd_interface *sc_iface;        /* interface */
        int sc_ifaceno;

        int sc_out;
        struct usbd_pipe *sc_out_pipe;        /* bulk out pipe */
        struct usbd_xfer *sc_out_xfer;
        void *sc_out_buf;

        int sc_in;
        struct usbd_pipe *sc_in_pipe;        /* bulk in pipe */
        struct usbd_xfer *sc_in_xfer;
        void *sc_in_buf;

        struct callout sc_read_callout;        /* to drain input on write-only opens */
        int sc_has_callout;

        u_char sc_state;
#define        ULPT_OPEN        0x01        /* device is open */
#define        ULPT_OBUSY        0x02        /* printer is busy doing output */
#define        ULPT_INIT        0x04        /* waiting to initialize for open */
        u_char sc_flags;
#define        ULPT_NOPRIME        0x40        /* don't prime on open */
        u_char sc_laststatus;

        int sc_refcnt;
        u_char sc_dying;
};

static dev_type_open(ulptopen);
static dev_type_close(ulptclose);
static dev_type_write(ulptwrite);
static dev_type_read(ulptread);
static dev_type_ioctl(ulptioctl);

const struct cdevsw ulpt_cdevsw = {
        .d_open = ulptopen,
        .d_close = ulptclose,
        .d_read = ulptread,
        .d_write = ulptwrite,
        .d_ioctl = ulptioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

static int ulpt_do_write(struct ulpt_softc *, struct uio *, int);
static int ulpt_do_read(struct ulpt_softc *, struct uio *, int);
static int ulpt_status(struct ulpt_softc *);
static void ulpt_reset(struct ulpt_softc *);
static int ulpt_statusmsg(u_char, struct ulpt_softc *);
static void ulpt_read_cb(struct usbd_xfer *, void *, usbd_status);
static void ulpt_tick(void *xsc);

#if 0
void ieee1284_print_id(char *);
#endif

#define        ULPTUNIT(s)        (minor(s) & 0x1f)
#define        ULPTFLAGS(s)        (minor(s) & 0xe0)


static int ulpt_match(device_t, cfdata_t, void *);
static void ulpt_attach(device_t, device_t, void *);
static int ulpt_detach(device_t, int);
static int ulpt_activate(device_t, enum devact);



CFATTACH_DECL_NEW(ulpt, sizeof(struct ulpt_softc), ulpt_match, ulpt_attach,
    ulpt_detach, ulpt_activate);

static int
ulpt_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;
        /* XXX Print something useful, or don't. */
        DPRINTFN(10,("ulpt_match\n"));

        if (uiaa->uiaa_class == UICLASS_PRINTER &&
            uiaa->uiaa_subclass == UISUBCLASS_PRINTER &&
            (uiaa->uiaa_proto == UIPROTO_PRINTER_UNI ||
             uiaa->uiaa_proto == UIPROTO_PRINTER_BI ||
             uiaa->uiaa_proto == UIPROTO_PRINTER_1284))
                return UMATCH_IFACECLASS_IFACESUBCLASS_IFACEPROTO;
        return UMATCH_NONE;
}

static void
ulpt_attach(device_t parent, device_t self, void *aux)
{
        struct ulpt_softc *sc = device_private(self);
        struct usbif_attach_arg *uiaa = aux;
        struct usbd_device *dev = uiaa->uiaa_device;
        struct usbd_interface *iface = uiaa->uiaa_iface;
        usb_interface_descriptor_t *ifcd = usbd_get_interface_descriptor(iface);
        const usb_interface_descriptor_t *id;
        usbd_status err;
        char *devinfop;
        usb_endpoint_descriptor_t *ed;
        uint8_t epcount;
        int i, altno;
        usbd_desc_iter_t iter;

        sc->sc_dev = self;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s, iclass %d/%d\n",
               devinfop, ifcd->bInterfaceClass, ifcd->bInterfaceSubClass);
        usbd_devinfo_free(devinfop);

        /* Loop through descriptors looking for a bidir mode. */
        usb_desc_iter_init(dev, &iter);
        for (altno = 0;;) {
                id = (const usb_interface_descriptor_t *)usb_desc_iter_next(&iter);
                if (!id)
                        break;
                if (id->bDescriptorType == UDESC_INTERFACE &&
                    id->bInterfaceNumber == ifcd->bInterfaceNumber) {
                        if (id->bInterfaceClass == UICLASS_PRINTER &&
                            id->bInterfaceSubClass == UISUBCLASS_PRINTER &&
                            (id->bInterfaceProtocol == UIPROTO_PRINTER_BI /*||
                             id->bInterfaceProtocol == UIPROTO_PRINTER_1284*/))
                                goto found;
                        altno++;
                }
        }
        id = ifcd;                /* not found, use original */
 found:
        if (id != ifcd) {
                /* Found a new bidir setting */
                DPRINTFN(1, ("ulpt_attach: set altno = %d\n", altno));
                err = usbd_set_interface(iface, altno);
                if (err) {
                        aprint_error_dev(self,
                            "setting alternate interface failed\n");
                        sc->sc_dying = 1;
                        return;
                }
        }

        epcount = 0;
        (void)usbd_endpoint_count(iface, &epcount);

        sc->sc_in = -1;
        sc->sc_out = -1;
        for (i = 0; i < epcount; i++) {
                ed = usbd_interface2endpoint_descriptor(iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self, "couldn't get ep %d\n", i);
                        return;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        sc->sc_in = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        sc->sc_out = ed->bEndpointAddress;
                }
        }
        if (sc->sc_out == -1) {
                aprint_error_dev(self, "could not find bulk out endpoint\n");
                sc->sc_dying = 1;
                return;
        }

        if (usbd_get_quirks(dev)->uq_flags & UQ_BROKEN_BIDIR) {
                /* This device doesn't handle reading properly. */
                sc->sc_in = -1;
        }

        aprint_normal_dev(self, "using %s-directional mode\n",
               sc->sc_in >= 0 ? "bi" : "uni");

        sc->sc_iface = iface;
        sc->sc_ifaceno = id->bInterfaceNumber;
        sc->sc_udev = dev;

#if 0
/*
 * This code is disabled because for some mysterious reason it causes
 * printing not to work.  But only sometimes, and mostly with
 * UHCI and less often with OHCI.  *sigh*
 */
        {
        usb_config_descriptor_t *cd = usbd_get_config_descriptor(dev);
        usb_device_request_t req;
        int len, alen;

        req.bmRequestType = UT_READ_CLASS_INTERFACE;
        req.bRequest = UR_GET_DEVICE_ID;
        USETW(req.wValue, cd->bConfigurationValue);
        USETW2(req.wIndex, id->bInterfaceNumber, id->bAlternateSetting);
        USETW(req.wLength, DEVINFOSIZE - 1);
        err = usbd_do_request_flags(dev, &req, devinfop, USBD_SHORT_XFER_OK,
                  &alen, USBD_DEFAULT_TIMEOUT);
        if (err) {
                printf("%s: cannot get device id\n", device_xname(sc->sc_dev));
        } else if (alen <= 2) {
                printf("%s: empty device id, no printer connected?\n",
                       device_xname(sc->sc_dev));
        } else {
                /* devinfop now contains an IEEE-1284 device ID */
                len = ((devinfop[0] & 0xff) << 8) | (devinfop[1] & 0xff);
                if (len > DEVINFOSIZE - 3)
                        len = DEVINFOSIZE - 3;
                devinfop[len] = 0;
                printf("%s: device id <", device_xname(sc->sc_dev));
                ieee1284_print_id(devinfop+2);
                printf(">\n");
        }
        }
#endif

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        DPRINTFN(1, ("ulpt_attach: sc=%p in=%d out=%d\n",
                     sc, sc->sc_out, sc->sc_in));

        return;
}

static int
ulpt_activate(device_t self, enum devact act)
{
        struct ulpt_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

static int
ulpt_detach(device_t self, int flags)
{
        struct ulpt_softc *sc = device_private(self);
        int s;
        int maj, mn;

        DPRINTFN(1, ("ulpt_detach: sc=%p\n", sc));

        sc->sc_dying = 1;
        if (sc->sc_out_pipe != NULL)
                usbd_abort_pipe(sc->sc_out_pipe);
        if (sc->sc_in_pipe != NULL)
                usbd_abort_pipe(sc->sc_in_pipe);

        s = splusb();
        if (--sc->sc_refcnt >= 0) {
                /* There is noone to wake, aborting the pipe is enough */
                /* Wait for processes to go away. */
                usb_detach_waitold(sc->sc_dev);
        }
        splx(s);

        /* locate the major number */
        maj = cdevsw_lookup_major(&ulpt_cdevsw);

        /* Nuke the vnodes for any open instances (calls close). */
        mn = device_unit(self);
        vdevgone(maj, mn, mn, VCHR);
        vdevgone(maj, mn | ULPT_NOPRIME , mn | ULPT_NOPRIME, VCHR);

        if (sc->sc_udev != NULL)
                usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev,
                    sc->sc_dev);

        return 0;
}

static int
ulpt_status(struct ulpt_softc *sc)
{
        usb_device_request_t req;
        usbd_status err;
        u_char status;

        req.bmRequestType = UT_READ_CLASS_INTERFACE;
        req.bRequest = UR_GET_PORT_STATUS;
        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_ifaceno);
        USETW(req.wLength, 1);
        err = usbd_do_request(sc->sc_udev, &req, &status);
        DPRINTFN(2, ("ulpt_status: status=0x%02x err=%d\n", status, err));
        if (!err)
                return status;
        else
                return 0;
}

static void
ulpt_reset(struct ulpt_softc *sc)
{
        usb_device_request_t req;

        DPRINTFN(2, ("ulpt_reset\n"));
        req.bRequest = UR_SOFT_RESET;
        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_ifaceno);
        USETW(req.wLength, 0);

        /*
         * There was a mistake in the USB printer 1.0 spec that gave the
         * request type as UT_WRITE_CLASS_OTHER; it should have been
         * UT_WRITE_CLASS_INTERFACE.  Many printers use the old one,
         * so we try both.
         */
        req.bmRequestType = UT_WRITE_CLASS_OTHER;
        if (usbd_do_request(sc->sc_udev, &req, 0)) {        /* 1.0 */
                req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
                (void)usbd_do_request(sc->sc_udev, &req, 0); /* 1.1 */
        }
}

int ulptusein = 1;

/*
 * Reset the printer, then wait until it's selected and not busy.
 */
static int
ulptopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        u_char flags = ULPTFLAGS(dev);
        struct ulpt_softc *sc;
        usbd_status err;
        int spin, error;

        sc = device_lookup_private(&ulpt_cd, ULPTUNIT(dev));
        if (sc == NULL)
                return ENXIO;

        if (sc == NULL || sc->sc_iface == NULL || sc->sc_dying)
                return ENXIO;

        if (sc->sc_state)
                return EBUSY;

        sc->sc_state = ULPT_INIT;
        sc->sc_flags = flags;
        DPRINTFN(2, ("ulptopen: flags=%#x\n", (unsigned)flags));

        error = 0;
        sc->sc_refcnt++;

        if ((flags & ULPT_NOPRIME) == 0)
                ulpt_reset(sc);

        for (spin = 0; (ulpt_status(sc) & LPS_SELECT) == 0; spin += STEP) {
                DPRINTFN(2, ("ulpt_open: waiting a while\n"));
                if (spin >= TIMEOUT) {
                        error = EBUSY;
                        sc->sc_state = 0;
                        goto done;
                }

                /* wait 1/4 second, give up if we get a signal */
                error = kpause("ulptop", true, STEP, NULL);
                if (error != EWOULDBLOCK) {
                        sc->sc_state = 0;
                        goto done;
                }

                if (sc->sc_dying) {
                        error = ENXIO;
                        sc->sc_state = 0;
                        goto done;
                }
        }

        err = usbd_open_pipe(sc->sc_iface, sc->sc_out, 0, &sc->sc_out_pipe);
        if (err) {
                error = EIO;
                goto err0;
        }
        error = usbd_create_xfer(sc->sc_out_pipe, ULPT_BSIZE, 0, 0,
            &sc->sc_out_xfer);
        if (error)
                goto err2;
        sc->sc_out_buf = usbd_get_buffer(sc->sc_out_xfer);

        if (ulptusein && sc->sc_in != -1) {
                DPRINTFN(2, ("ulpt_open: opening input pipe %d\n", sc->sc_in));
                err = usbd_open_pipe(sc->sc_iface, sc->sc_in,0,&sc->sc_in_pipe);
                if (err) {
                        error = EIO;
                        goto err2;
                }
                error = usbd_create_xfer(sc->sc_in_pipe, ULPT_BSIZE,
                    0, 0, &sc->sc_in_xfer);
                if (error)
                        goto err3;
                sc->sc_in_buf = usbd_get_buffer(sc->sc_in_xfer);
                /* If it's not opened for read then set up a reader. */
                if (!(flag & FREAD)) {
                        DPRINTFN(2, ("ulpt_open: start read callout\n"));
                        callout_init(&sc->sc_read_callout, 0);
                        callout_reset(&sc->sc_read_callout, hz/5, ulpt_tick, sc);
                        sc->sc_has_callout = 1;
                }
        }

        sc->sc_state = ULPT_OPEN;
        goto done;

 err3:
        usbd_close_pipe(sc->sc_in_pipe);
        sc->sc_in_pipe = NULL;
 err2:
        usbd_destroy_xfer(sc->sc_out_xfer);
        sc->sc_out_xfer = NULL;

        usbd_close_pipe(sc->sc_out_pipe);
        sc->sc_out_pipe = NULL;
 err0:
        sc->sc_state = 0;

 done:
        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        DPRINTFN(2, ("ulptopen: done, error=%d\n", error));
        return error;
}

/*
 * XXX Document return value semantics.
 */
static int
ulpt_statusmsg(u_char status, struct ulpt_softc *sc)
{
        u_char new;

        status = (status ^ LPS_INVERT) & LPS_MASK;
        new = status & ~sc->sc_laststatus;
        sc->sc_laststatus = status;

        if (new & LPS_SELECT)
                log(LOG_NOTICE, "%s: offline\n", device_xname(sc->sc_dev));
        if (new & LPS_NOPAPER)
                log(LOG_NOTICE, "%s: out of paper\n", device_xname(sc->sc_dev));
        if (new & LPS_NERR)
                log(LOG_NOTICE, "%s: output error\n", device_xname(sc->sc_dev));

        return status;
}

static int
ulptclose(dev_t dev, int flag, int mode,
    struct lwp *l)
{
        struct ulpt_softc *sc;

        sc = device_lookup_private(&ulpt_cd, ULPTUNIT(dev));

        if (sc->sc_state != ULPT_OPEN)
                /* We are being forced to close before the open completed. */
                return 0;

        if (sc->sc_has_callout) {
                DPRINTFN(2, ("ulptclose: stopping read callout\n"));
                callout_halt(&sc->sc_read_callout, NULL);
                callout_destroy(&sc->sc_read_callout);
                sc->sc_has_callout = 0;
        }

        if (sc->sc_out_pipe != NULL) {
                usbd_abort_pipe(sc->sc_out_pipe);
        }
        if (sc->sc_out_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_out_xfer);
                sc->sc_out_xfer = NULL;
        }
        if (sc->sc_out_pipe != NULL) {
                usbd_close_pipe(sc->sc_out_pipe);
                sc->sc_out_pipe = NULL;
        }
        if (sc->sc_in_pipe != NULL) {
                usbd_abort_pipe(sc->sc_in_pipe);
        }
        if (sc->sc_in_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_in_xfer);
                sc->sc_in_xfer = NULL;
        }
        if (sc->sc_in_pipe != NULL) {
                usbd_close_pipe(sc->sc_in_pipe);
                sc->sc_in_pipe = NULL;
        }

        sc->sc_state = 0;

        DPRINTFN(2, ("ulptclose: closed\n"));
        return 0;
}

static int
ulpt_do_write(struct ulpt_softc *sc, struct uio *uio, int flags)
{
        uint32_t n;
        int error = 0;
        void *bufp;
        struct usbd_xfer *xfer;
        usbd_status err;

        DPRINTFN(3, ("ulptwrite\n"));
        xfer = sc->sc_out_xfer;
        bufp = sc->sc_out_buf;
        while ((n = uimin(ULPT_BSIZE, uio->uio_resid)) != 0) {
                ulpt_statusmsg(ulpt_status(sc), sc);
                error = uiomove(bufp, n, uio);
                if (error)
                        break;
                DPRINTFN(4, ("ulptwrite: transfer %d bytes\n", n));
                err = usbd_bulk_transfer(xfer, sc->sc_out_pipe, 0,
                    USBD_NO_TIMEOUT, bufp, &n);
                if (err) {
                        DPRINTFN(3, ("ulptwrite: error=%d\n", err));
                        error = EIO;
                        break;
                }
        }

        return error;
}

static int
ulptwrite(dev_t dev, struct uio *uio, int flags)
{
        struct ulpt_softc *sc;
        int error;

        sc = device_lookup_private(&ulpt_cd, ULPTUNIT(dev));

        if (sc->sc_dying)
                return EIO;

        sc->sc_refcnt++;
        error = ulpt_do_write(sc, uio, flags);
        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);
        return error;
}

/*
 * Perform a read operation according to the given uio.
 * This should respect nonblocking I/O status.
 *
 * XXX Doing a short read when more data is available seems to be
 * problematic.  See
 * http://www.freebsd.org/cgi/query-pr.cgi?pr=91538&cat= for a fix.
 * However, this will be unnecessary given a proper fix for the next
 * problem, and most actual callers read a lot.
 *
 * XXX This code should interact properly with select/poll, and that
 * requires the USB transactions to be queued and function before the
 * user does a read.  Read will then consume data from a buffer, and
 * not interact with the device. See ucom.c for an example of how to
 * do this.
 */
static int
ulpt_do_read(struct ulpt_softc *sc, struct uio *uio, int flags)
{
        uint32_t n, nread, nreq;
        int error = 0, nonblocking, timeout;
        void *bufp;
        struct usbd_xfer *xfer;
        usbd_status err = USBD_NORMAL_COMPLETION;

        /* XXX Resolve with background reader process.  KASSERT? */
        if (sc->sc_in_pipe == NULL)
                return EIO;

        if (flags & IO_NDELAY)
                nonblocking = 1;
        else
                nonblocking = 0;

        if (nonblocking)
                timeout = USBD_DEFAULT_TIMEOUT; /* 5 ms */
        else
                timeout = USBD_NO_TIMEOUT;

        DPRINTFN(3, ("ulptread nonblocking=%d uio_reside=%ld timeout=%d\n",
                     nonblocking, (u_long)uio->uio_resid, timeout));

        xfer = sc->sc_in_xfer;
        bufp = sc->sc_in_buf;
        nread = 0;
        while ((nreq = uimin(ULPT_BSIZE, uio->uio_resid)) != 0) {
                KASSERT(error == 0);
                if (error != 0) {
                        printf("ulptread: pre-switch error %d != 0", error);
                        goto done;
                }

                /*
                 * XXX Even with the short timeout, this will sleep,
                 * but it should be adequately prompt in practice.
                 */
                n = nreq;
                DPRINTFN(4, ("ulptread: transfer %d bytes, nonblocking=%d timeout=%d\n",
                             n, nonblocking, timeout));
                err = usbd_bulk_transfer(xfer, sc->sc_in_pipe,
                    USBD_SHORT_XFER_OK, timeout, bufp, &n);

                DPRINTFN(4, ("ulptread: transfer complete nreq %d n %d nread %d err %d\n",
                             nreq, n, nread, err));
                /*
                 * Process "err" return, jumping to done if we set "error".
                 */
                switch (err) {
                case USBD_NORMAL_COMPLETION:
                        if (n == 0) {
                                DPRINTFN(3, ("ulptread: NORMAL n==0\n"));
                        }
                        break;

                case USBD_SHORT_XFER:
                        /* We said SHORT_XFER_OK, so shouldn't happen. */
                        DPRINTFN(3, ("ulptread: SHORT n=%d\n", n));
                        break;

                case USBD_TIMEOUT:
                        if (nonblocking == 0) {
                                /* XXX Cannot happen; perhaps KASSERT. */
                                printf("ulptread: timeout in blocking mode\n");
                                error = EIO;
                                goto done;
                        }

                        DPRINTFN(3, ("ulptread: TIMEOUT n %d nread %d error %d\n",
                                     n, nread, error));
                        /*
                         * Don't set error until we understand why
                         * this happens.
                         */
                        break;

                case USBD_INTERRUPTED:
                        /*
                         * The sleep in usbd_bulk_transfer was
                         * interrupted.  Reflect it to the caller so
                         * that reading can be interrupted.
                         */
                        error = EINTR;
                        DPRINTFN(3, ("ulptread: EINTR error %d\n", error));
                        goto done;
                        break;

                default:
                        /* Assume all other return codes are really errors. */
                        error = EIO;
                        DPRINTFN(3, ("ulptread: n %d err %d error %d\n",
                                     n, err, error));
                        goto done;
                        break;
                }
                /* XXX KASSERT */
                if (error != 0) {
                        printf("ulptread: post-switch error %d != 0", error);
                        goto done;
                }

                if (n > 0) {
                        /*
                         * Record progress to enable later choosing
                         * between short reads and EWOULDBLOCK.
                         */
                        nread += n;

                        /* Copy to userspace, giving up on any error. */
                        error = uiomove(bufp, n, uio);
                        if (error != 0)
                                break;
                } else {
                        /*
                         * We read 0 bytes, and therefore are done,
                         * even if we aren't in nonblocking mode.
                         */
                        if (error == 0 && nread == 0)
                                error = EWOULDBLOCK;
                        DPRINTFN(3, ("ulptread: read 0=>done error %d\n",
                                     error));
                        goto done;
                }

                /*
                 * A short transfer indicates no more data will be
                 * forthcoming.  Terminate this read regardless of
                 * whether we are in nonblocking mode.  XXX Reconsider
                 * for blocking mode; maybe we should continue to
                 * block, but maybe it just doesn't make senes to do
                 * blocking reads from devices like this.
                 */
                if (err == USBD_SHORT_XFER) {
                        DPRINTFN(3, ("ulptread: SHORT=>done n %d nread %d err %d error %d\n",
                                     n, nread, err, error));
                        break;
                }
        }

done:
        DPRINTFN(3, ("ulptread: finished n %d nread %d err %d error %d\n",
                             n, nread, err, error));
        return error;
}

static int
ulptread(dev_t dev, struct uio *uio, int flags)
{
        struct ulpt_softc *sc;
        int error;

        sc = device_lookup_private(&ulpt_cd, ULPTUNIT(dev));

        if (sc->sc_dying)
                return EIO;

        sc->sc_refcnt++;
        error = ulpt_do_read(sc, uio, flags);
        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);
        return error;
}

static void
ulpt_read_cb(struct usbd_xfer *xfer, void *priv,
             usbd_status status)
{
        usbd_status err;
        uint32_t n;
        void *xsc;
        struct ulpt_softc *sc;

        usbd_get_xfer_status(xfer, &xsc, NULL, &n, &err);
        sc = xsc;

        DPRINTFN(4, ("ulpt_read_cb: start sc=%p, err=%d n=%d\n", sc, err, n));

#ifdef ULPT_DEBUG
        if (!err && n > 0)
                DPRINTFN(3, ("ulpt_tick: discarding %d bytes\n", n));
#endif
        if (!err || err == USBD_TIMEOUT)
                callout_reset(&sc->sc_read_callout, hz / ULPT_READS_PER_SEC,
                            ulpt_tick, sc);
}

/*
 * For devices which are not opened for reading, this function is
 * called continuously to start read bulk transfers to avoid the
 * printer overflowing its output buffer.
 *
 * XXX This should be adapted for continuous reads to allow select to
 * work; see do_ulpt_read().
 */
static void
ulpt_tick(void *xsc)
{
        struct ulpt_softc *sc = xsc;
        usbd_status err __unused;

        if (sc == NULL || sc->sc_dying)
                return;

        usbd_setup_xfer(sc->sc_in_xfer, sc, sc->sc_in_buf, ULPT_BSIZE,
            USBD_SHORT_XFER_OK, ULPT_READ_TIMO, ulpt_read_cb);
        err = usbd_transfer(sc->sc_in_xfer);
        DPRINTFN(3, ("ulpt_tick: sc=%p err=%d\n", sc, err));
}

static int
ulptioctl(dev_t dev, u_long cmd, void *data,
    int flag, struct lwp *l)
{
#if 0
        struct ulpt_softc *sc;

        sc = device_lookup_private(&ulpt_cd, ULPTUNIT(dev));
#endif

        switch (cmd) {
        case FIONBIO:
                return 0;
        }

        return ENODEV;
}

#if 0
/* XXX This does not belong here. */
/*
 * Print select parts of a IEEE 1284 device ID.
 */
void
ieee1284_print_id(char *str)
{
        char *p, *q;

        for (p = str-1; p; p = strchr(p, ';')) {
                p++;                /* skip ';' */
                if (strncmp(p, "MFG:", 4) == 0 ||
                    strncmp(p, "MANUFACTURER:", 14) == 0 ||
                    strncmp(p, "MDL:", 4) == 0 ||
                    strncmp(p, "MODEL:", 6) == 0) {
                        q = strchr(p, ';');
                        if (q)
                                printf("%.*s", (int)(q - p + 1), p);
                }
        }
}
#endif








































































































































































































































































































































    7 







    7 
    7 

    7 


















































































































    3 










    3 




















    3 
    3 


    3 







    3 






























    3 




    3 




    3 


    3 













    3 










    3 





    3 


























































































    3 













































































































































    8 




    7 




    7 




    7 





    7 







































    7 
    8 








    5 












    5 









































































































































































































    3 









    3 






    3 
    3 




    3 





    3 


















    3 
    3 

    3 





    3 




































    3 





    3 
    3 


    3 
    3 
    3 




























    3 
    3 


    3 
    3 








    3 















































































    3 

    3 















    3 

    3 


    3 














    3 




    3 
    3 

    3 
    3 




















    3 








    3 







    3 


    3 

    3 

    3 


    3 









    3 
    3 







    3 















































































































































    3 








    3 



    3 
    3 

    3 











































































































































































































































































    7 


    7 



































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
/*        $NetBSD: midi.c,v 1.98 2022/06/04 03:31:10 pgoyette Exp $        */

/*
 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (augustss@NetBSD.org), (MIDI FST and Active
 * Sense handling) Chapman Flack (chap@NetBSD.org), and Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: midi.c,v 1.98 2022/06/04 03:31:10 pgoyette Exp $");

#ifdef _KERNEL_OPT
#include "midi.h"
#endif

#include <sys/param.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/select.h>
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/syslog.h>
#include <sys/kernel.h>
#include <sys/signalvar.h>
#include <sys/conf.h>
#include <sys/audioio.h>
#include <sys/midiio.h>
#include <sys/device.h>
#include <sys/intr.h>
#include <sys/module.h>

#include <dev/audio/audio_if.h>
#include <dev/midi_if.h>
#include <dev/midivar.h>

#include "ioconf.h"

#if NMIDI > 0

#ifdef AUDIO_DEBUG
#define DPRINTF(x)        if (mididebug) printf x
#define DPRINTFN(n,x)        if (mididebug >= (n)) printf x
int        mididebug = 0;
/*
 *      1: detected protocol errors and buffer overflows
 *      2: probe, attach, detach
 *      3: open, close
 *      4: data received except realtime
 *      5: ioctl
 *      6: read, write, poll
 *      7: data transmitted
 *      8: uiomoves, synchronization
 *      9: realtime data received
 */
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

static        struct midi_softc *hwif_softc = NULL;
static        kmutex_t hwif_softc_lock;

static void        midi_in(void *, int);
static void        midi_out(void *);
static int        midi_poll_out(struct midi_softc *);
static int        midi_intr_out(struct midi_softc *);
static int         midi_msg_out(struct midi_softc *, u_char **, u_char **,
                             u_char **, u_char **);
static int        midi_start_output(struct midi_softc *);
static void        midi_initbuf(struct midi_buffer *);
static void        midi_xmt_asense(void *);
static void        midi_rcv_asense(void *);
static void        midi_softint(void *);

static int        midiprobe(device_t, cfdata_t, void *);
static void        midiattach(device_t, device_t, void *);
int                mididetach(device_t, int);
static int        midiactivate(device_t, enum devact);

static dev_type_open(midiopen);
static dev_type_close(midiclose);
static dev_type_read(midiread);
static dev_type_write(midiwrite);
static dev_type_ioctl(midiioctl);
static dev_type_poll(midipoll);
static dev_type_kqfilter(midikqfilter);

const struct cdevsw midi_cdevsw = {
        .d_open = midiopen,
        .d_close = midiclose,
        .d_read = midiread,
        .d_write = midiwrite,
        .d_ioctl = midiioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = midipoll,
        .d_mmap = nommap,
        .d_kqfilter = midikqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER | D_MPSAFE
};

CFATTACH_DECL_NEW(midi, sizeof(struct midi_softc),
    midiprobe, midiattach, mididetach, midiactivate);

#define MIDI_XMT_ASENSE_PERIOD mstohz(275)
#define MIDI_RCV_ASENSE_PERIOD mstohz(300)

static int
midiprobe(device_t parent, cfdata_t match, void *aux)
{
        struct audio_attach_args *sa;

        sa = aux;

        DPRINTFN(2,("midiprobe: type=%d sa=%p hw=%p\n", sa->type, sa,
            sa->hwif));

        return sa->type == AUDIODEV_TYPE_MIDI;
}

static void
midiattach(device_t parent, device_t self, void *aux)
{
        struct midi_softc *sc = device_private(self);
        struct audio_attach_args *sa = aux;
        const struct midi_hw_if *hwp;
        void *hdlp;

        hwp = sa->hwif;
        hdlp = sa->hdl;

        aprint_naive("\n");

        DPRINTFN(2, ("MIDI attach\n"));

#ifdef DIAGNOSTIC
        if (hwp == 0 ||
            hwp->open == 0 ||
            hwp->close == 0 ||
            hwp->output == 0 ||
            hwp->getinfo == 0) {
                aprint_error_dev(self, "missing method\n");
                return;
        }
#endif

        sc->dev = self;
        sc->hw_if = hwp;
        sc->hw_hdl = hdlp;
        midi_attach(sc);
}

static int
midiactivate(device_t self, enum devact act)
{
        struct midi_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                mutex_enter(sc->lock);
                sc->dying = 1;
                mutex_exit(sc->lock);
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

int
mididetach(device_t self, int flags)
{
        struct midi_softc *sc = device_private(self);
        int maj, mn;

        DPRINTFN(2,("%s: sc=%p flags=%d\n", __func__, sc, flags));

        pmf_device_deregister(self);

        mutex_enter(sc->lock);
        sc->dying = 1;

        if (--sc->refcnt >= 0) {
                /* Wake anything? */
                (void)cv_timedwait(&sc->detach_cv, sc->lock, hz * 60);
        }
        cv_broadcast(&sc->wchan);
        cv_broadcast(&sc->rchan);
        mutex_exit(sc->lock);

        /* locate the major number */
        maj = cdevsw_lookup_major(&midi_cdevsw);

        /*
         * Nuke the vnodes for any open instances (calls close).
         * Will wait until any activity on the device nodes has ceased.
         *
         * XXXAD NOT YET.
         *
         * XXXAD NEED TO PREVENT NEW REFERENCES THROUGH AUDIO_ENTER().
         */
        mn = device_unit(self);
        vdevgone(maj, mn, mn, VCHR);

        if (!(sc->props & MIDI_PROP_NO_OUTPUT)) {
                evcnt_detach(&sc->xmt.bytesDiscarded);
                evcnt_detach(&sc->xmt.incompleteMessages);
        }
        if (sc->props & MIDI_PROP_CAN_INPUT) {
                evcnt_detach(&sc->rcv.bytesDiscarded);
                evcnt_detach(&sc->rcv.incompleteMessages);
        }

        if (sc->sih != NULL) {
                softint_disestablish(sc->sih);
                sc->sih = NULL;
        }

        mutex_enter(sc->lock);
        callout_halt(&sc->xmt_asense_co, sc->lock);
        callout_halt(&sc->rcv_asense_co, sc->lock);
        mutex_exit(sc->lock);

        callout_destroy(&sc->xmt_asense_co);
        callout_destroy(&sc->rcv_asense_co);

        cv_destroy(&sc->wchan);
        cv_destroy(&sc->rchan);
        cv_destroy(&sc->detach_cv);

        return (0);
}

void
midi_attach(struct midi_softc *sc)
{
        struct midi_info mi;
        kmutex_t *dummy;
        static int first = 1;

        if (first) {
                mutex_init(&hwif_softc_lock, MUTEX_DEFAULT, IPL_NONE);
                first = 0;
        }

        sc->hw_if->get_locks(sc->hw_hdl, &sc->lock, &dummy);

        callout_init(&sc->xmt_asense_co, CALLOUT_MPSAFE);
        callout_init(&sc->rcv_asense_co, CALLOUT_MPSAFE);
        callout_setfunc(&sc->xmt_asense_co, midi_xmt_asense, sc);
        callout_setfunc(&sc->rcv_asense_co, midi_rcv_asense, sc);

        sc->sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE,
            midi_softint, sc);

        cv_init(&sc->rchan, "midird");
        cv_init(&sc->wchan, "midiwr");
        cv_init(&sc->detach_cv, "mididet");

        sc->dying = 0;
        sc->isopen = 0;
        sc->refcnt = 0;

        mutex_enter(&hwif_softc_lock);
        mutex_enter(sc->lock);
        hwif_softc = sc;
        sc->hw_if->getinfo(sc->hw_hdl, &mi);
        hwif_softc = NULL;
        mutex_exit(sc->lock);
        mutex_exit(&hwif_softc_lock);

        sc->props = mi.props;

        if (!(sc->props & MIDI_PROP_NO_OUTPUT)) {
                evcnt_attach_dynamic(&sc->xmt.bytesDiscarded,
                        EVCNT_TYPE_MISC, NULL,
                        device_xname(sc->dev), "xmt bytes discarded");
                evcnt_attach_dynamic(&sc->xmt.incompleteMessages,
                        EVCNT_TYPE_MISC, NULL,
                        device_xname(sc->dev), "xmt incomplete msgs");
        }
        if (sc->props & MIDI_PROP_CAN_INPUT) {
                evcnt_attach_dynamic(&sc->rcv.bytesDiscarded,
                        EVCNT_TYPE_MISC, NULL,
                        device_xname(sc->dev), "rcv bytes discarded");
                evcnt_attach_dynamic(&sc->rcv.incompleteMessages,
                        EVCNT_TYPE_MISC, NULL,
                        device_xname(sc->dev), "rcv incomplete msgs");
        }

        aprint_naive("\n");
        aprint_normal(": %s\n", mi.name);

        if (!pmf_device_register(sc->dev, NULL, NULL))
                aprint_error_dev(sc->dev, "couldn't establish power handler\n");
}

void
midi_register_hw_if_ext(struct midi_hw_if_ext *exthw)
{
        if (hwif_softc != NULL) /* ignore calls resulting from non-init */
                hwif_softc->hw_if_ext = exthw; /* uses of getinfo */
}

int
midi_unit_count(void)
{
        int i;
        for ( i = 0; i < midi_cd.cd_ndevs; ++i)
                if (NULL == device_lookup(&midi_cd, i))
                        break;
        return i;
}

static void
midi_initbuf(struct midi_buffer *mb)
{
        mb->idx_producerp = mb->idx_consumerp = mb->idx;
        mb->buf_producerp = mb->buf_consumerp = mb->buf;
}

#define PACK_MB_IDX(cat,len) (((cat)<<4)|(len))
#define MB_IDX_CAT(idx) ((idx)>>4)
#define MB_IDX_LEN(idx) ((idx)&0xf)

static char const midi_cats[] = "\0\0\0\0\0\0\0\0\2\2\2\2\1\1\2\3";
#define MIDI_CAT(d) (midi_cats[((d)>>4)&15])
#define FST_RETURN(offp,endp,ret) \
        return (s->pos=s->msg+(offp)), (s->end=s->msg+(endp)), (ret)

enum fst_ret { FST_CHN, FST_CHV, FST_COM, FST_SYX, FST_RT, FST_MORE, FST_ERR,
               FST_HUH, FST_SXP };
enum fst_form { FST_CANON, FST_COMPR, FST_VCOMP };
static struct {
        int off;
        enum fst_ret tag;
} const midi_forms[] = {
        [FST_CANON] = { .off=0, .tag=FST_CHN },
        [FST_COMPR] = { .off=1, .tag=FST_CHN },
        [FST_VCOMP] = { .off=0, .tag=FST_CHV }
};
#define FST_CRETURN(endp) \
        FST_RETURN(midi_forms[form].off,endp,midi_forms[form].tag)

/*
 * A MIDI finite state transducer suitable for receiving or transmitting. It
 * will accept correct MIDI input that uses, doesn't use, or sometimes uses the
 * 'running status' compression technique, and transduce it to fully expanded
 * (form=FST_CANON) or fully compressed (form=FST_COMPR or FST_VCOMP) form.
 *
 * Returns FST_MORE if a complete message has not been parsed yet (SysEx
 * messages are the exception), FST_ERR or FST_HUH if the input does not
 * conform to the protocol, or FST_CHN (channel messages), FST_COM (System
 * Common messages), FST_RT (System Real-Time messages), or FST_SYX (System
 * Exclusive) to broadly categorize the message parsed. s->pos and s->end
 * locate the parsed message; while (s->pos<s->end) putchar(*(s->pos++));
 * would output it.
 *
 * FST_HUH means the character c wasn't valid in the original state, but the
 * state has now been reset to START and the caller should try again passing
 * the same c. FST_ERR means c isn't valid in the start state; the caller
 * should kiss it goodbye and continue to try successive characters from the
 * input until something other than FST_ERR or FST_HUH is returned, at which
 * point things are resynchronized.
 *
 * A FST_SYX return means that between pos and end are from 1 to 3
 * bytes of a system exclusive message. A SysEx message will be delivered in
 * one or more chunks of that form, where the first begins with 0xf0 and the
 * last (which is the only one that might have length < 3) ends with 0xf7.
 *
 * Messages corrupted by a protocol error are discarded and won't be seen at
 * all; again SysEx is the exception, as one or more chunks of it may already
 * have been parsed.
 *
 * For FST_CHN messages, s->msg[0] always contains the status byte even if
 * FST_COMPR form was requested (pos then points to msg[1]). That way, the
 * caller can always identify the exact message if there is a need to do so.
 * For all other message types except FST_SYX, the status byte is at *pos
 * (which may not necessarily be msg[0]!). There is only one SysEx status
 * byte, so the return value FST_SYX is sufficient to identify it.
 *
 * To simplify some use cases, compression can also be requested with
 * form=FST_VCOMP. In this form a compressible channel message is indicated
 * by returning a classification of FST_CHV instead of FST_CHN, and pos points
 * to the status byte rather than being advanced past it. If the caller in this
 * case saves the bytes from pos to end, it will have saved the entire message,
 * and can act on the FST_CHV tag to drop the first byte later. In this form,
 * unlike FST_CANON, hidden note-off (i.e. note-on with velocity 0) may occur.
 *
 * Two obscure points in the MIDI protocol complicate things further, both to
 * do with the EndSysEx code, 0xf7. First, this code is permitted (and
 * meaningless) outside of a System Exclusive message, anywhere a status byte
 * could appear. Second, it is allowed to be absent at the end of a System
 * Exclusive message (!) - any status byte at all (non-realtime) is allowed to
 * terminate the message. Both require accommodation in the interface to
 * midi_fst's caller. A stray 0xf7 should be ignored BUT should count as a
 * message received for purposes of Active Sense timeout; the case is
 * represented by a return of FST_COM with a length of zero (pos == end). A
 * status byte other than 0xf7 during a system exclusive message will cause an
 * FST_SXP (sysex plus) return; the bytes from pos to end are the end of the
 * system exclusive message, and after handling those the caller should call
 * midi_fst again with the same input byte.
 *
 * midi(4) will never produce either such form of rubbish.
 */
static enum fst_ret
midi_fst(struct midi_state *s, u_char c, enum fst_form form)
{
        int syxpos = 0;

        if (c >= 0xf8) { /* All realtime messages bypass state machine */
                if (c == 0xf9 || c == 0xfd) {
                        DPRINTF( ("midi_fst: s=%p c=0x%02x undefined\n",
                            s, c));
                        s->bytesDiscarded.ev_count++;
                        return FST_ERR;
                }
                DPRINTFN(9, ("midi_fst: s=%p System Real-Time data=0x%02x\n",
                    s, c));
                s->msg[2] = c;
                FST_RETURN(2,3,FST_RT);
        }

        DPRINTFN(4, ("midi_fst: s=%p data=0x%02x state=%d\n",
            s, c, s->state));

        switch (s->state | MIDI_CAT(c)) { /* break ==> return FST_MORE */
        case MIDI_IN_START  | MIDI_CAT_COMMON:
        case MIDI_IN_RUN1_1 | MIDI_CAT_COMMON:
        case MIDI_IN_RUN2_2 | MIDI_CAT_COMMON:
        case MIDI_IN_RXX2_2 | MIDI_CAT_COMMON:
                s->msg[0] = c;
                switch ( c) {
                case 0xf0: s->state = MIDI_IN_SYX1_3; break;
                case 0xf1: s->state = MIDI_IN_COM0_1; break;
                case 0xf2: s->state = MIDI_IN_COM0_2; break;
                case 0xf3: s->state = MIDI_IN_COM0_1; break;
                case 0xf6: s->state = MIDI_IN_START;  FST_RETURN(0,1,FST_COM);
                case 0xf7: s->state = MIDI_IN_START;  FST_RETURN(0,0,FST_COM);
                default: goto protocol_violation;
                }
                break;

        case MIDI_IN_RUN1_1 | MIDI_CAT_STATUS1:
                if (c == s->msg[0]) {
                        s->state = MIDI_IN_RNX0_1;
                        break;
                }
                /* FALLTHROUGH */
        case MIDI_IN_RUN2_2 | MIDI_CAT_STATUS1:
        case MIDI_IN_RXX2_2 | MIDI_CAT_STATUS1:
        case MIDI_IN_START  | MIDI_CAT_STATUS1:
                s->state = MIDI_IN_RUN0_1;
                s->msg[0] = c;
                break;

        case MIDI_IN_RUN2_2 | MIDI_CAT_STATUS2:
        case MIDI_IN_RXX2_2 | MIDI_CAT_STATUS2:
                if (c == s->msg[0]) {
                        s->state = MIDI_IN_RNX0_2;
                        break;
                }
                if ((c ^ s->msg[0]) == 0x10 && (c & 0xe0) == 0x80) {
                        s->state = MIDI_IN_RXX0_2;
                        s->msg[0] = c;
                        break;
                }
                /* FALLTHROUGH */
        case MIDI_IN_RUN1_1 | MIDI_CAT_STATUS2:
        case MIDI_IN_START  | MIDI_CAT_STATUS2:
                s->state = MIDI_IN_RUN0_2;
                s->msg[0] = c;
                break;

        case MIDI_IN_COM0_1 | MIDI_CAT_DATA:
                s->state = MIDI_IN_START;
                s->msg[1] = c;
                FST_RETURN(0,2,FST_COM);

        case MIDI_IN_COM0_2 | MIDI_CAT_DATA:
                s->state = MIDI_IN_COM1_2;
                s->msg[1] = c;
                break;

        case MIDI_IN_COM1_2 | MIDI_CAT_DATA:
                s->state = MIDI_IN_START;
                s->msg[2] = c;
                FST_RETURN(0,3,FST_COM);

        case MIDI_IN_RUN0_1 | MIDI_CAT_DATA:
                s->state = MIDI_IN_RUN1_1;
                s->msg[1] = c;
                FST_RETURN(0,2,FST_CHN);

        case MIDI_IN_RUN1_1 | MIDI_CAT_DATA:
        case MIDI_IN_RNX0_1 | MIDI_CAT_DATA:
                s->state = MIDI_IN_RUN1_1;
                s->msg[1] = c;
                FST_CRETURN(2);

        case MIDI_IN_RUN0_2 | MIDI_CAT_DATA:
                s->state = MIDI_IN_RUN1_2;
                s->msg[1] = c;
                break;

        case MIDI_IN_RUN1_2 | MIDI_CAT_DATA:
                if (FST_CANON == form && 0 == c && (s->msg[0]&0xf0) == 0x90) {
                        s->state = MIDI_IN_RXX2_2;
                        s->msg[0] ^= 0x10;
                        s->msg[2] = 64;
                } else {
                        s->state = MIDI_IN_RUN2_2;
                        s->msg[2] = c;
                }
                FST_RETURN(0,3,FST_CHN);

        case MIDI_IN_RUN2_2 | MIDI_CAT_DATA:
                s->state = MIDI_IN_RNX1_2;
                s->msg[1] = c;
                break;

        case MIDI_IN_RXX2_2 | MIDI_CAT_DATA:
                s->state = MIDI_IN_RXX1_2;
                s->msg[0] ^= 0x10;
                s->msg[1] = c;
                break;

        case MIDI_IN_RNX0_2 | MIDI_CAT_DATA:
                s->state = MIDI_IN_RNY1_2;
                s->msg[1] = c;
                break;

        case MIDI_IN_RXX0_2 | MIDI_CAT_DATA:
                s->state = MIDI_IN_RXY1_2;
                s->msg[1] = c;
                break;

        case MIDI_IN_RNX1_2 | MIDI_CAT_DATA:
        case MIDI_IN_RNY1_2 | MIDI_CAT_DATA:
                if (FST_CANON == form && 0 == c && (s->msg[0]&0xf0) == 0x90) {
                        s->state = MIDI_IN_RXX2_2;
                        s->msg[0] ^= 0x10;
                        s->msg[2] = 64;
                        FST_RETURN(0,3,FST_CHN);
                }
                s->state = MIDI_IN_RUN2_2;
                s->msg[2] = c;
                FST_CRETURN(3);

        case MIDI_IN_RXX1_2 | MIDI_CAT_DATA:
        case MIDI_IN_RXY1_2 | MIDI_CAT_DATA:
                if (( 0 == c && (s->msg[0]&0xf0) == 0x90)
                  || (64 == c && (s->msg[0]&0xf0) == 0x80
                      && FST_CANON != form)) {
                        s->state = MIDI_IN_RXX2_2;
                        s->msg[0] ^= 0x10;
                        s->msg[2] = 64 - c;
                        FST_CRETURN(3);
                }
                s->state = MIDI_IN_RUN2_2;
                s->msg[2] = c;
                FST_RETURN(0,3,FST_CHN);

        case MIDI_IN_SYX1_3 | MIDI_CAT_DATA:
                s->state = MIDI_IN_SYX2_3;
                s->msg[1] = c;
                break;

        case MIDI_IN_SYX2_3 | MIDI_CAT_DATA:
                s->state = MIDI_IN_SYX0_3;
                s->msg[2] = c;
                FST_RETURN(0,3,FST_SYX);

        case MIDI_IN_SYX0_3 | MIDI_CAT_DATA:
                s->state = MIDI_IN_SYX1_3;
                s->msg[0] = c;
                break;

        case MIDI_IN_SYX2_3 | MIDI_CAT_COMMON:
        case MIDI_IN_SYX2_3 | MIDI_CAT_STATUS1:
        case MIDI_IN_SYX2_3 | MIDI_CAT_STATUS2:
                ++ syxpos;
                /* FALLTHROUGH */
        case MIDI_IN_SYX1_3 | MIDI_CAT_COMMON:
        case MIDI_IN_SYX1_3 | MIDI_CAT_STATUS1:
        case MIDI_IN_SYX1_3 | MIDI_CAT_STATUS2:
                ++ syxpos;
                /* FALLTHROUGH */
        case MIDI_IN_SYX0_3 | MIDI_CAT_COMMON:
        case MIDI_IN_SYX0_3 | MIDI_CAT_STATUS1:
        case MIDI_IN_SYX0_3 | MIDI_CAT_STATUS2:
                s->state = MIDI_IN_START;
                if (c == 0xf7) {
                        s->msg[syxpos] = c;
                        FST_RETURN(0,1+syxpos,FST_SYX);
                }
                s->msg[syxpos] = 0xf7;
                FST_RETURN(0,1+syxpos,FST_SXP);

        default:
protocol_violation:
                DPRINTF(("midi_fst: unexpected %#02x in state %u\n",
                    c, s->state));
                switch ( s->state) {
                case MIDI_IN_RUN1_1: /* can only get here by seeing an */
                case MIDI_IN_RUN2_2: /* INVALID System Common message */
                case MIDI_IN_RXX2_2:
                        s->state = MIDI_IN_START;
                        /* FALLTHROUGH */
                case MIDI_IN_START:
                        s->bytesDiscarded.ev_count++;
                        return FST_ERR;
                case MIDI_IN_COM1_2:
                case MIDI_IN_RUN1_2:
                case MIDI_IN_RNY1_2:
                case MIDI_IN_RXY1_2:
                        s->bytesDiscarded.ev_count++;
                        /* FALLTHROUGH */
                case MIDI_IN_COM0_1:
                case MIDI_IN_RUN0_1:
                case MIDI_IN_RNX0_1:
                case MIDI_IN_COM0_2:
                case MIDI_IN_RUN0_2:
                case MIDI_IN_RNX0_2:
                case MIDI_IN_RXX0_2:
                case MIDI_IN_RNX1_2:
                case MIDI_IN_RXX1_2:
                        s->bytesDiscarded.ev_count++;
                        s->incompleteMessages.ev_count++;
                        break;
                default:
                        DPRINTF(("midi_fst: mishandled %#02x(%u) in state %u?!\n",
                            c, MIDI_CAT(c), s->state));
                        break;
                }
                s->state = MIDI_IN_START;
                return FST_HUH;
        }
        return FST_MORE;
}

static void
midi_softint(void *cookie)
{
        struct midi_softc *sc;
        proc_t *p;
        pid_t pid;

        sc = cookie;

        mutex_enter(&proc_lock);
        pid = sc->async;
        if (pid != 0 && (p = proc_find(pid)) != NULL)
                psignal(p, SIGIO);
        mutex_exit(&proc_lock);
}

static void
midi_in(void *addr, int data)
{
        struct midi_softc *sc;
        struct midi_buffer *mb;
        int i, count;
        enum fst_ret got;
        MIDI_BUF_DECLARE(idx);
        MIDI_BUF_DECLARE(buf);

        sc = addr;
        mb = &sc->inbuf;

        KASSERT(mutex_owned(sc->lock));

        if (!sc->isopen)
                return;

        if ((sc->flags & FREAD) == 0)
                return;                /* discard data if not reading */

sxp_again:
        do {
                got = midi_fst(&sc->rcv, data, FST_CANON);
        } while (got == FST_HUH);

        switch (got) {
        case FST_MORE:
        case FST_ERR:
                return;
        case FST_CHN:
        case FST_COM:
        case FST_RT:
#if NSEQUENCER > 0
                if (sc->seqopen) {
                        extern void midiseq_in(struct midi_dev *,u_char *,int);
                        count = sc->rcv.end - sc->rcv.pos;
                        midiseq_in(sc->seq_md, sc->rcv.pos, count);
                        return;
                }
#endif
                /*
                 * Pass Active Sense to the sequencer if it's open, but not to
                 * a raw reader. (Really should do something intelligent with
                 * it then, though....)
                 */
                if (got == FST_RT && MIDI_ACK == sc->rcv.pos[0]) {
                        if (!sc->rcv_expect_asense) {
                                sc->rcv_expect_asense = 1;
                                callout_schedule(&sc->rcv_asense_co,
                                    MIDI_RCV_ASENSE_PERIOD);
                        }
                        sc->rcv_quiescent = 0;
                        sc->rcv_eof = 0;
                        return;
                }
                /* FALLTHROUGH */
        /*
         * Ultimately SysEx msgs should be offered to the sequencer also; the
         * sequencer API addresses them - but maybe our sequencer can't handle
         * them yet, so offer only to raw reader. (Which means, ultimately,
         * discard them if the sequencer's open, as it's not doing reads!)
         * -> When SysEx support is added to the sequencer, be sure to handle
         *    FST_SXP there too.
         */
        case FST_SYX:
        case FST_SXP:
                count = sc->rcv.end - sc->rcv.pos;
                sc->rcv_quiescent = 0;
                sc->rcv_eof = 0;
                if (0 == count)
                        break;
                MIDI_BUF_PRODUCER_INIT(mb,idx);
                MIDI_BUF_PRODUCER_INIT(mb,buf);
                if (count > buf_lim - buf_cur
                     || 1 > idx_lim - idx_cur) {
                        sc->rcv.bytesDiscarded.ev_count += count;
                        DPRINTF(("midi_in: buffer full, discard data=0x%02x\n",
                                 sc->rcv.pos[0]));
                        return;
                }
                for (i = 0; i < count; i++) {
                        *buf_cur++ = sc->rcv.pos[i];
                        MIDI_BUF_WRAP(buf);
                }
                *idx_cur++ = PACK_MB_IDX(got,count);
                MIDI_BUF_WRAP(idx);
                MIDI_BUF_PRODUCER_WBACK(mb,buf);
                MIDI_BUF_PRODUCER_WBACK(mb,idx);
                cv_broadcast(&sc->rchan);
                selnotify(&sc->rsel, 0, NOTE_SUBMIT);
                if (sc->async != 0)
                        softint_schedule(sc->sih);
                break;
        default: /* don't #ifdef this away, gcc will say FST_HUH not handled */
                printf("midi_in: midi_fst returned %d?!\n", got);
        }
        if (FST_SXP == got)
                goto sxp_again;
}

static void
midi_out(void *addr)
{
        struct midi_softc *sc = addr;

        KASSERT(mutex_owned(sc->lock));

        if (!sc->isopen)
                return;
        DPRINTFN(8, ("midi_out: %p\n", sc));
        midi_intr_out(sc);
}

static int
midiopen(dev_t dev, int flags, int ifmt, struct lwp *l)
{
        struct midi_softc *sc;
        const struct midi_hw_if *hw;
        int error;

        sc = device_lookup_private(&midi_cd, MIDIUNIT(dev));
        if (sc == NULL)
                return (ENXIO);
        DPRINTFN(3,("midiopen %p\n", sc));

        mutex_enter(sc->lock);
        if (sc->dying) {
                mutex_exit(sc->lock);
                return (EIO);
        }
        hw = sc->hw_if;
        if (hw == NULL) {
                mutex_exit(sc->lock);
                return ENXIO;
        }
        if (sc->isopen) {
                mutex_exit(sc->lock);
                return EBUSY;
        }

        /* put both state machines into known states */
        sc->rcv.state = MIDI_IN_START;
        sc->rcv.pos = sc->rcv.msg;
        sc->rcv.end = sc->rcv.msg;
        sc->xmt.state = MIDI_IN_START;
        sc->xmt.pos = sc->xmt.msg;
        sc->xmt.end = sc->xmt.msg;

        /* copy error counters so an ioctl (TBA) can give since-open stats */
        sc->rcv.atOpen.bytesDiscarded  = sc->rcv.bytesDiscarded.ev_count;
        sc->rcv.atQuery.bytesDiscarded = sc->rcv.bytesDiscarded.ev_count;

        sc->xmt.atOpen.bytesDiscarded  = sc->xmt.bytesDiscarded.ev_count;
        sc->xmt.atQuery.bytesDiscarded = sc->xmt.bytesDiscarded.ev_count;

        /* and the buffers */
        midi_initbuf(&sc->outbuf);
        midi_initbuf(&sc->inbuf);

        /* and the receive flags */
        sc->rcv_expect_asense = 0;
        sc->rcv_quiescent = 0;
        sc->rcv_eof = 0;
        sc->isopen++;
        sc->flags = flags;
        sc->pbus = 0;
        sc->async = 0;

#ifdef MIDI_SAVE
        if (midicnt != 0) {
                midisave.cnt = midicnt;
                midicnt = 0;
        }
#endif

        error = hw->open(sc->hw_hdl, flags, midi_in, midi_out, sc);
        if (error) {
                mutex_exit(sc->lock);
                return error;
        }

        mutex_exit(sc->lock);
        return 0;
}

static int
midiclose(dev_t dev, int flags, int ifmt, struct lwp *l)
{
        struct midi_softc *sc;
        const struct midi_hw_if *hw;

        sc = device_lookup_private(&midi_cd, MIDIUNIT(dev));
        hw = sc->hw_if;

        DPRINTFN(3,("midiclose %p\n", sc));

        mutex_enter(sc->lock);
        /* midi_start_output(sc); anything buffered => pbus already set! */
        while (sc->pbus) {
                if (sc->dying)
                        break;
                DPRINTFN(8,("midiclose sleep ...\n"));
                cv_wait(&sc->wchan, sc->lock);
        }
        sc->isopen = 0;
        callout_halt(&sc->xmt_asense_co, sc->lock);
        callout_halt(&sc->rcv_asense_co, sc->lock);
        hw->close(sc->hw_hdl);
        sc->seqopen = 0;
        sc->seq_md = 0;
        mutex_exit(sc->lock);

        return 0;
}

static int
midiread(dev_t dev, struct uio *uio, int ioflag)
{
        struct midi_softc *sc;
        struct midi_buffer *mb;
        int appetite, error, first;
        MIDI_BUF_DECLARE(idx);
        MIDI_BUF_DECLARE(buf);

        sc = device_lookup_private(&midi_cd, MIDIUNIT(dev));
        mb = &sc->inbuf;
        first = 1;

        DPRINTFN(6,("midiread: %p, count=%lu\n", sc,
            (unsigned long)uio->uio_resid));

        mutex_enter(sc->lock);
        if (sc->dying) {
                mutex_exit(sc->lock);
                return EIO;
        }
        if ((sc->props & MIDI_PROP_CAN_INPUT) == 0) {
                mutex_exit(sc->lock);
                return ENXIO;
        }
        MIDI_BUF_CONSUMER_INIT(mb,idx);
        MIDI_BUF_CONSUMER_INIT(mb,buf);
        error = 0;
        for (;;) {
                /*
                 * If the used portion of idx wraps around the end, just take
                 * the first part on this iteration, and we'll get the rest on
                 * the next.
                 */
                if (idx_lim > idx_end)
                        idx_lim = idx_end;
                /*
                 * Count bytes through the last complete message that will
                 * fit in the requested read.
                 */
                for (appetite = uio->uio_resid; idx_cur < idx_lim; ++idx_cur) {
                        if (appetite < MB_IDX_LEN(*idx_cur))
                                break;
                        appetite -= MB_IDX_LEN(*idx_cur);
                }
                appetite = uio->uio_resid - appetite;

                /*
                 * Only if the read is too small to hold even the first
                 * complete message will we return a partial one (updating idx
                 * to reflect the remaining length of the message).
                 */
                if (appetite == 0 && idx_cur < idx_lim) {
                        if (!first)
                                break;
                        appetite = uio->uio_resid;
                        *idx_cur = PACK_MB_IDX(MB_IDX_CAT(*idx_cur),
                            MB_IDX_LEN(*idx_cur) - appetite);
                }
                KASSERT(buf_cur + appetite <= buf_lim);

                /* move the bytes */
                if (appetite > 0) {
                        first = 0;  /* we know we won't return empty-handed */
                        /* do two uiomoves if data wrap around end of buf */
                        if (buf_cur + appetite > buf_end) {
                                DPRINTFN(8,
                                        ("midiread: uiomove cc=%td (prewrap)\n",
                                        buf_end - buf_cur));
                                mutex_exit(sc->lock);
                                error = uiomove(buf_cur, buf_end - buf_cur, uio);
                                mutex_enter(sc->lock);
                                if (error)
                                        break;
                                if (sc->dying) {
                                        error = EIO;
                                        break;
                                }
                                appetite -= buf_end - buf_cur;
                                buf_cur = mb->buf;
                        }
                        DPRINTFN(8, ("midiread: uiomove cc=%d\n", appetite));
                        mutex_exit(sc->lock);
                        error = uiomove(buf_cur, appetite, uio);
                        mutex_enter(sc->lock);
                        if (error)
                                break;
                        if (sc->dying) {
                                error = EIO;
                                break;
                        }
                        buf_cur += appetite;
                }

                MIDI_BUF_WRAP(idx);
                MIDI_BUF_WRAP(buf);
                MIDI_BUF_CONSUMER_WBACK(mb,idx);
                MIDI_BUF_CONSUMER_WBACK(mb,buf);
                if (0 == uio->uio_resid) /* if read satisfied, we're done */
                        break;
                MIDI_BUF_CONSUMER_REFRESH(mb,idx);
                if (idx_cur == idx_lim) { /* need to wait for data? */
                        if (!first || sc->rcv_eof) /* never block reader if */
                                break;            /* any data already in hand */
                        if (ioflag & IO_NDELAY) {
                                error = EWOULDBLOCK;
                                break;
                        }
                        error = cv_wait_sig(&sc->rchan, sc->lock);
                        if (error)
                                break;
                        MIDI_BUF_CONSUMER_REFRESH(mb,idx); /* what'd we get? */
                }
                MIDI_BUF_CONSUMER_REFRESH(mb,buf);
                if (sc->dying) {
                        error = EIO;
                        break;
                }
        }
        mutex_exit(sc->lock);

        return error;
}

static void
midi_rcv_asense(void *arg)
{
        struct midi_softc *sc;

        sc = arg;

        mutex_enter(sc->lock);
        if (sc->dying || !sc->isopen) {
                mutex_exit(sc->lock);
                return;
        }
        if (sc->rcv_quiescent) {
                sc->rcv_eof = 1;
                sc->rcv_quiescent = 0;
                sc->rcv_expect_asense = 0;
                cv_broadcast(&sc->rchan);
                selnotify(&sc->rsel, 0, NOTE_SUBMIT);
                if (sc->async)
                        softint_schedule(sc->sih);
                mutex_exit(sc->lock);
                return;
        }
        sc->rcv_quiescent = 1;
        callout_schedule(&sc->rcv_asense_co, MIDI_RCV_ASENSE_PERIOD);
        mutex_exit(sc->lock);
}

static void
midi_xmt_asense(void *arg)
{
        struct midi_softc *sc;
        int error, armed;

        sc = arg;

        mutex_enter(sc->lock);
        if (sc->pbus || sc->dying || !sc->isopen) {
                mutex_exit(sc->lock);
                return;
        }
        sc->pbus = 1;
        if (sc->props & MIDI_PROP_OUT_INTR) {
                error = sc->hw_if->output(sc->hw_hdl, MIDI_ACK);
                armed = (error == 0);
        } else {
                error = sc->hw_if->output(sc->hw_hdl, MIDI_ACK);
                armed = 0;
        }
        if (!armed) {
                sc->pbus = 0;
                callout_schedule(&sc->xmt_asense_co, MIDI_XMT_ASENSE_PERIOD);
        }
        mutex_exit(sc->lock);
}

/*
 * The way this function was hacked up to plug into poll_out and intr_out
 * after they were written won't win it any beauty contests, but it'll work
 * (code in haste, refactor at leisure).
 */
static int
midi_msg_out(struct midi_softc *sc, u_char **idx, u_char **idxl, u_char **buf,
             u_char **bufl)
{
        MIDI_BUF_DECLARE(idx);
        MIDI_BUF_DECLARE(buf);
        MIDI_BUF_EXTENT_INIT(&sc->outbuf,idx);
        MIDI_BUF_EXTENT_INIT(&sc->outbuf,buf);
        int length;
        int error;
        u_char contig[3];
        u_char *cp;
        u_char *ep;

        KASSERT(mutex_owned(sc->lock));

        idx_cur = *idx;
        idx_lim = *idxl;
        buf_cur = *buf;
        buf_lim = *bufl;

        length = MB_IDX_LEN(*idx_cur);

        for ( cp = contig, ep = cp + length; cp < ep;) {
                *cp++ = *buf_cur++;
                MIDI_BUF_WRAP(buf);
        }
        cp = contig;

        switch ( MB_IDX_CAT(*idx_cur)) {
        case FST_CHV: /* chnmsg to be compressed (for device that wants it) */
                ++ cp;
                -- length;
                /* FALLTHROUGH */
        case FST_CHN:
                error = sc->hw_if_ext->channel(sc->hw_hdl,
                    MIDI_GET_STATUS(contig[0]), MIDI_GET_CHAN(contig[0]),
                    cp, length);
                break;
        case FST_COM:
                error = sc->hw_if_ext->common(sc->hw_hdl,
                    MIDI_GET_STATUS(contig[0]), cp, length);
                break;
        case FST_SYX:
        case FST_SXP:
                error = sc->hw_if_ext->sysex(sc->hw_hdl, cp, length);
                break;
        case FST_RT:
                error = sc->hw_if->output(sc->hw_hdl, *cp);
                break;
        default:
                error = EIO;
        }

        if (!error) {
                ++ idx_cur;
                MIDI_BUF_WRAP(idx);
                *idx  = idx_cur;
                *idxl = idx_lim;
                *buf  = buf_cur;
                *bufl = buf_lim;
        }

        return error;
}

/*
 * midi_poll_out is intended for the midi hw (the vast majority of MIDI UARTs
 * on sound cards, apparently) that _do not have transmit-ready interrupts_.
 * Every call to hw_if->output for one of these may busy-wait to output the
 * byte; at the standard midi data rate that'll be 320us per byte. The
 * technique of writing only MIDI_MAX_WRITE bytes in a row and then waiting
 * for MIDI_WAIT does not reduce the total time spent busy-waiting, and it
 * adds arbitrary delays in transmission (and, since MIDI_WAIT is roughly the
 * same as the time to send MIDI_MAX_WRITE bytes, it effectively halves the
 * data rate). Here, a somewhat bolder approach is taken. Since midi traffic
 * is bursty but time-sensitive--most of the time there will be none at all,
 * but when there is it should go out ASAP--the strategy is to just get it
 * over with, and empty the buffer in one go. The effect this can have on
 * the rest of the system will be limited by the size of the buffer and the
 * sparseness of the traffic. But some precautions are in order. Interrupts
 * should all be unmasked when this is called, and midiwrite should not fill
 * the buffer more than once (when MIDI_PROP_CAN_INTR is false) without a
 * yield() so some other process can get scheduled. If the write is nonblocking,
 * midiwrite should return a short count rather than yield.
 *
 * Someday when there is fine-grained MP support, this should be reworked to
 * run in a callout so the writing process really could proceed concurrently.
 * But obviously where performance is a concern, interrupt-driven hardware
 * such as USB midi or (apparently) clcs will always be preferable. And it
 * seems (kern/32651) that many of the devices currently working in poll mode
 * may really have tx interrupt capability and want only implementation; that
 * ought to happen.
 */
static int
midi_poll_out(struct midi_softc *sc)
{
        struct midi_buffer *mb = &sc->outbuf;
        int error;
        int msglen;
        MIDI_BUF_DECLARE(idx);
        MIDI_BUF_DECLARE(buf);

        KASSERT(mutex_owned(sc->lock));

        error = 0;
        MIDI_BUF_CONSUMER_INIT(mb,idx);
        MIDI_BUF_CONSUMER_INIT(mb,buf);

        for (;;) {
                while (idx_cur != idx_lim) {
                        if (sc->hw_if_ext) {
                                error = midi_msg_out(sc, &idx_cur, &idx_lim,
                                    &buf_cur, &buf_lim);
                                if (error != 0) {
                                        break;
                                }
                                continue;
                        }
                        /* or, lacking hw_if_ext ... */
                        msglen = MB_IDX_LEN(*idx_cur);
                        DPRINTFN(7,("midi_poll_out: %p <- %#02x\n",
                            sc->hw_hdl, *buf_cur));
                        error = sc->hw_if->output(sc->hw_hdl, *buf_cur);
                        if (error) {
                                break;
                        }
                        buf_cur++;
                        MIDI_BUF_WRAP(buf);
                        msglen--;
                        if (msglen) {
                                *idx_cur = PACK_MB_IDX(MB_IDX_CAT(*idx_cur),
                                    msglen);
                        } else {
                                idx_cur++;
                                MIDI_BUF_WRAP(idx);
                        }
                }
                if (error != 0) {
                        break;
                }
                KASSERT(buf_cur == buf_lim);
                MIDI_BUF_CONSUMER_WBACK(mb,idx);
                MIDI_BUF_CONSUMER_WBACK(mb,buf);
                MIDI_BUF_CONSUMER_REFRESH(mb,idx); /* any more to transmit? */
                MIDI_BUF_CONSUMER_REFRESH(mb,buf);
                if (idx_lim == idx_cur)
                        break;
        }

        if (error != 0) {
                DPRINTF(("midi_poll_output error %d\n", error));
                MIDI_BUF_CONSUMER_WBACK(mb,idx);
                MIDI_BUF_CONSUMER_WBACK(mb,buf);
        }
        sc->pbus = 0;
        callout_schedule(&sc->xmt_asense_co, MIDI_XMT_ASENSE_PERIOD);
        return error;
}

/*
 * The interrupt flavor acquires spl and lock once and releases at the end,
 * as it expects to write only one byte or message. The interface convention
 * is that if hw_if->output returns 0, it has initiated transmission and the
 * completion interrupt WILL be forthcoming; if it has not returned 0, NO
 * interrupt will be forthcoming, and if it returns EINPROGRESS it wants
 * another byte right away.
 */
static int
midi_intr_out(struct midi_softc *sc)
{
        struct midi_buffer *mb;
        int error, msglen;
        MIDI_BUF_DECLARE(idx);
        MIDI_BUF_DECLARE(buf);
        int armed = 0;

        KASSERT(mutex_owned(sc->lock));

        error = 0;
        mb = &sc->outbuf;

        MIDI_BUF_CONSUMER_INIT(mb,idx);
        MIDI_BUF_CONSUMER_INIT(mb,buf);

        while (idx_cur != idx_lim) {
                if (sc->hw_if_ext) {
                        error = midi_msg_out(sc, &idx_cur, &idx_lim,
                            &buf_cur, &buf_lim);
                        if (!error ) /* no EINPROGRESS from extended hw_if */
                                armed = 1;
                        break;
                }
                /* or, lacking hw_if_ext ... */
                msglen = MB_IDX_LEN(*idx_cur);
                error = sc->hw_if->output(sc->hw_hdl, *buf_cur);
                if (error &&  error != EINPROGRESS)
                        break;
                ++buf_cur;
                MIDI_BUF_WRAP(buf);
                --msglen;
                if (msglen)
                        *idx_cur = PACK_MB_IDX(MB_IDX_CAT(*idx_cur),msglen);
                else {
                        ++idx_cur;
                        MIDI_BUF_WRAP(idx);
                }
                if (!error) {
                        armed = 1;
                        break;
                }
        }
        MIDI_BUF_CONSUMER_WBACK(mb,idx);
        MIDI_BUF_CONSUMER_WBACK(mb,buf);
        if (!armed) {
                sc->pbus = 0;
                callout_schedule(&sc->xmt_asense_co, MIDI_XMT_ASENSE_PERIOD);
        }
        cv_broadcast(&sc->wchan);
        selnotify(&sc->wsel, 0, NOTE_SUBMIT);
        if (sc->async) {
                softint_schedule(sc->sih);
        }
        if (error) {
                DPRINTF(("midi_intr_output error %d\n", error));
        }
        return error;
}

static int
midi_start_output(struct midi_softc *sc)
{

        KASSERT(mutex_owned(sc->lock));

        if (sc->dying)
                return EIO;
        if (sc->props & MIDI_PROP_OUT_INTR)
                return midi_intr_out(sc);
        return midi_poll_out(sc);
}

static int
real_writebytes(struct midi_softc *sc, u_char *ibuf, int cc)
{
        u_char *iend;
        struct midi_buffer *mb;
        int arming, count, got;
        enum fst_form form;
        MIDI_BUF_DECLARE(idx);
        MIDI_BUF_DECLARE(buf);
        int error;

        KASSERT(mutex_owned(sc->lock));

        if (sc->dying || !sc->isopen)
                return EIO;

        sc->refcnt++;

        iend = ibuf + cc;
        mb = &sc->outbuf;
        arming = 0;

        /*
         * If the hardware uses the extended hw_if, pass it canonicalized
         * messages (or compressed ones if it specifically requests, using
         * VCOMP form so the bottom half can still pass the op and chan along);
         * if it does not, send it compressed messages (using COMPR form as
         * there is no need to preserve the status for the bottom half).
         */
        if (NULL == sc->hw_if_ext)
                form = FST_COMPR;
        else if (sc->hw_if_ext->compress)
                form = FST_VCOMP;
        else
                form = FST_CANON;

        MIDI_BUF_PRODUCER_INIT(mb,idx);
        MIDI_BUF_PRODUCER_INIT(mb,buf);

        while (ibuf < iend) {
                got = midi_fst(&sc->xmt, *ibuf, form);
                ++ibuf;
                switch ( got) {
                case FST_MORE:
                        continue;
                case FST_ERR:
                case FST_HUH:
                        error = EPROTO;
                        goto out;
                case FST_CHN:
                case FST_CHV: /* only occurs in VCOMP form */
                case FST_COM:
                case FST_RT:
                case FST_SYX:
                case FST_SXP:
                        break; /* go add to buffer */
#if defined(AUDIO_DEBUG) || defined(DIAGNOSTIC)
                default:
                        printf("midi_wr: midi_fst returned %d?!\n", got);
#endif
                }
                count = sc->xmt.end - sc->xmt.pos;
                if (0 == count ) /* can happen with stray 0xf7; see midi_fst */
                        continue;
                /*
                 * return EWOULDBLOCK if the data passed will not fit in
                 * the buffer; the caller should have taken steps to avoid that.
                 * If got==FST_SXP we lose the new status byte, but we're losing
                 * anyway, so c'est la vie.
                 */
                if (idx_cur == idx_lim || count > buf_lim - buf_cur) {
                        MIDI_BUF_PRODUCER_REFRESH(mb,idx); /* get the most */
                        MIDI_BUF_PRODUCER_REFRESH(mb,buf); /*  current facts */
                        if (idx_cur == idx_lim || count > buf_lim - buf_cur) {
                                error = EWOULDBLOCK; /* caller's problem */
                                goto out;
                        }
                }
                *idx_cur++ = PACK_MB_IDX(got,count);
                MIDI_BUF_WRAP(idx);
                while (count) {
                        *buf_cur++ = *(sc->xmt.pos)++;
                        MIDI_BUF_WRAP(buf);
                        -- count;
                }
                if (FST_SXP == got)
                        -- ibuf; /* again with same status byte */
        }
        MIDI_BUF_PRODUCER_WBACK(mb,buf);
        MIDI_BUF_PRODUCER_WBACK(mb,idx);
        /*
         * If the output transfer is not already busy, and there is a message
         * buffered, mark it busy, stop the Active Sense callout (what if we're
         * too late and it's expired already? No big deal, an extra Active Sense
         * never hurt anybody) and start the output transfer once we're out of
         * the critical section (pbus==1 will stop anyone else doing the same).
         */
        MIDI_BUF_CONSUMER_INIT(mb,idx); /* check what consumer's got to read */
        if (!sc->pbus && idx_cur < idx_lim) {
                sc->pbus = 1;
                callout_stop(&sc->xmt_asense_co);
                arming = 1;
        }

        error = arming ? midi_start_output(sc) : 0;

out:
        if (--sc->refcnt < 0)
                cv_broadcast(&sc->detach_cv);

        return error;
}

static int
midiwrite(dev_t dev, struct uio *uio, int ioflag)
{
        struct midi_softc *sc;
        struct midi_buffer *mb;
        int error;
        u_char inp[256];
        MIDI_BUF_DECLARE(idx);
        MIDI_BUF_DECLARE(buf);
        size_t idxspace;
        size_t bufspace;
        size_t xfrcount;
        int pollout = 0;

        (void)buf_end; (void)idx_end;
        sc = device_lookup_private(&midi_cd, MIDIUNIT(dev));

        DPRINTFN(6,("midiwrite: %p, unit=%d, count=%lu\n", sc, (int)minor(dev),
            (unsigned long)uio->uio_resid));

        mutex_enter(sc->lock);
        if (sc->dying) {
                mutex_exit(sc->lock);
                return EIO;
        }

        sc->refcnt++;

        mb = &sc->outbuf;
        error = 0;
        while (uio->uio_resid > 0 && !error) {
                /*
                 * block if necessary for the minimum buffer space to guarantee
                 * we can write something.
                 */
                MIDI_BUF_PRODUCER_INIT(mb,idx); /* init can't go above loop; */
                MIDI_BUF_PRODUCER_INIT(mb,buf); /* real_writebytes moves cur */
                for (;;) {
                        idxspace = MIDI_BUF_PRODUCER_REFRESH(mb,idx) - idx_cur;
                        bufspace = MIDI_BUF_PRODUCER_REFRESH(mb,buf) - buf_cur;
                        if (idxspace >= 1 && bufspace >= 3 && !pollout)
                                break;
                        DPRINTFN(8,("midi_write: sleep idx=%zd buf=%zd\n",
                                 idxspace, bufspace));
                        if (ioflag & IO_NDELAY) {
                                /*
                                 * If some amount has already been transferred,
                                 * the common syscall code will automagically
                                 * convert this to success with a short count.
                                 */
                                error = EWOULDBLOCK;
                                goto out;
                        }
                        if (pollout) {
                                mutex_exit(sc->lock);
                                yield(); /* see midi_poll_output */
                                mutex_enter(sc->lock);
                                pollout = 0;
                        } else
                                error = cv_wait_sig(&sc->wchan, sc->lock);
                        if (sc->dying)
                                error = EIO;
                        if (error) {
                                /*
                                 * Similarly, the common code will handle
                                 * EINTR and ERESTART properly here, changing to
                                 * a short count if something transferred.
                                 */
                                goto out;
                        }
                }

                /*
                 * The number of bytes we can safely extract from the uio
                 * depends on the available idx and buf space. Worst case,
                 * every byte is a message so 1 idx is required per byte.
                 * Worst case, the first byte completes a 3-byte msg in prior
                 * state, and every subsequent byte is a Program Change or
                 * Channel Pressure msg with running status and expands to 2
                 * bytes, so the buf space reqd is 3+2(n-1) or 2n+1. So limit
                 * the transfer to the min of idxspace and (bufspace-1)>>1.
                 */
                xfrcount = (bufspace - 1) >> 1;
                if (xfrcount > idxspace)
                        xfrcount = idxspace;
                if (xfrcount > sizeof inp)
                        xfrcount = sizeof inp;
                if (xfrcount > uio->uio_resid)
                        xfrcount = uio->uio_resid;

                mutex_exit(sc->lock);
                error = uiomove(inp, xfrcount, uio);
                mutex_enter(sc->lock);
#ifdef MIDI_DEBUG
                if (error)
                        printf("midi_write:(1) uiomove failed %d; "
                               "xfrcount=%zu inp=%p\n",
                               error, xfrcount, inp);
#endif
                if (error)
                        break;

                /*
                 * The number of bytes we extracted being calculated to
                 * definitely fit in the buffer even with canonicalization,
                 * there is no excuse for real_writebytes to return EWOULDBLOCK.
                 */
                error = real_writebytes(sc, inp, xfrcount);
                KASSERT(error != EWOULDBLOCK);
                if (error)
                        break;

                /*
                 * If this is a polling device and we just sent a buffer, let's
                 * not send another without giving some other process a chance.
                 */
                if ((sc->props & MIDI_PROP_OUT_INTR) == 0)
                        pollout = 1;
                DPRINTFN(8,("midiwrite: uio_resid now %zu, props=%d\n",
                    uio->uio_resid, sc->props));
        }

out:
        if (--sc->refcnt < 0)
                cv_broadcast(&sc->detach_cv);

        mutex_exit(sc->lock);
        return error;
}

/*
 * This write routine is only called from sequencer code and expects
 * a write that is smaller than the MIDI buffer.
 */
int
midi_writebytes(int unit, u_char *bf, int cc)
{
        struct midi_softc *sc =
            device_lookup_private(&midi_cd, unit);
        int error;

        if (!sc)
                return EIO;

        DPRINTFN(7, ("midi_writebytes: %p, unit=%d, cc=%d %#02x %#02x %#02x\n",
                    sc, unit, cc, bf[0], bf[1], bf[2]));

        mutex_enter(sc->lock);
        if (sc->dying)
                error = EIO;
        else
                error = real_writebytes(sc, bf, cc);
        mutex_exit(sc->lock);

        return error;
}

static int
midiioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
{
        struct midi_softc *sc;
        const struct midi_hw_if *hw;
        int error;
        MIDI_BUF_DECLARE(buf);

        (void)buf_end;
        sc = device_lookup_private(&midi_cd, MIDIUNIT(dev));

        mutex_enter(sc->lock);
        if (sc->dying) {
                mutex_exit(sc->lock);
                return EIO;
        }
        hw = sc->hw_if;
        error = 0;

        sc->refcnt++;

        DPRINTFN(5,("midiioctl: %p cmd=0x%08lx\n", sc, cmd));

        switch (cmd) {
        case FIONBIO:
                /* All handled in the upper layer. */
                break;

        case FIONREAD:
                /*
                 * This code relies on the current implementation of midi_in
                 * always updating buf and idx together in a critical section,
                 * so buf always ends at a message boundary. Document this
                 * ioctl as always returning a value such that the last message
                 * included is complete (SysEx the only exception), and then
                 * make sure the implementation doesn't regress.  NB that
                 * means if this ioctl returns n and the proc then issues a
                 * read of n, n bytes will be read, but if the proc issues a
                 * read of m < n, fewer than m bytes may be read to ensure the
                 * read ends at a message boundary.
                 */
                MIDI_BUF_CONSUMER_INIT(&sc->inbuf,buf);
                *(int *)addr = buf_lim - buf_cur;
                break;

        case FIOASYNC:
                mutex_exit(sc->lock);
                mutex_enter(&proc_lock);
                if (*(int *)addr) {
                        if (sc->async) {
                                error = EBUSY;
                        } else {
                                sc->async = curproc->p_pid;
                        }
                        DPRINTFN(5,("midi_ioctl: FIOASYNC %d\n",
                            curproc->p_pid));
                } else {
                        sc->async = 0;
                }
                mutex_exit(&proc_lock);
                mutex_enter(sc->lock);
                break;

#if 0
        case MIDI_PRETIME:
                /* XXX OSS
                 * This should set up a read timeout, but that's
                 * why we have poll(), so there's nothing yet. */
                error = EINVAL;
                break;
#endif

#ifdef MIDI_SAVE
        case MIDI_GETSAVE:
                mutex_exit(sc->lock);
                error = copyout(&midisave, *(void **)addr, sizeof midisave);
                mutex_enter(sc->lock);
                  break;
#endif

        default:
                if (hw->ioctl != NULL) {
                        error = hw->ioctl(sc->hw_hdl, cmd, addr, flag, l);
                } else {
                        error = EINVAL;
                }
                break;
        }

        if (--sc->refcnt < 0)
                cv_broadcast(&sc->detach_cv);
        mutex_exit(sc->lock);
        return error;
}

static int
midipoll(dev_t dev, int events, struct lwp *l)
{
        struct midi_softc *sc;
        int revents;
        MIDI_BUF_DECLARE(idx);
        MIDI_BUF_DECLARE(buf);

        (void)buf_end; (void)idx_end;
        sc = device_lookup_private(&midi_cd, MIDIUNIT(dev));
        revents = 0;

        DPRINTFN(6,("midipoll: %p events=0x%x\n", sc, events));

        mutex_enter(sc->lock);
        if (sc->dying) {
                mutex_exit(sc->lock);
                return POLLHUP;
        }

        sc->refcnt++;

        if ((events & (POLLIN | POLLRDNORM)) != 0) {
                MIDI_BUF_CONSUMER_INIT(&sc->inbuf, idx);
                if (idx_cur < idx_lim)
                        revents |= events & (POLLIN | POLLRDNORM);
                else
                        selrecord(l, &sc->rsel);
        }
        if ((events & (POLLOUT | POLLWRNORM)) != 0) {
                MIDI_BUF_PRODUCER_INIT(&sc->outbuf, idx);
                MIDI_BUF_PRODUCER_INIT(&sc->outbuf, buf);
                if (idx_lim - idx_cur >= 1 && buf_lim - buf_cur >= 3)
                        revents |= events & (POLLOUT | POLLWRNORM);
                else
                        selrecord(l, &sc->wsel);
        }

        if (--sc->refcnt < 0)
                cv_broadcast(&sc->detach_cv);

        mutex_exit(sc->lock);

        return revents;
}

static void
filt_midirdetach(struct knote *kn)
{
        struct midi_softc *sc = kn->kn_hook;

        mutex_enter(sc->lock);
        selremove_knote(&sc->rsel, kn);
        mutex_exit(sc->lock);
}

static int
filt_midiread(struct knote *kn, long hint)
{
        struct midi_softc *sc = kn->kn_hook;
        MIDI_BUF_DECLARE(buf);

        (void)buf_end;
        if (hint != NOTE_SUBMIT)
                mutex_enter(sc->lock);
        MIDI_BUF_CONSUMER_INIT(&sc->inbuf,buf);
        kn->kn_data = buf_lim - buf_cur;
        if (hint != NOTE_SUBMIT)
                mutex_exit(sc->lock);
        return (kn->kn_data > 0);
}

static const struct filterops midiread_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_midirdetach,
        .f_event = filt_midiread,
};

static void
filt_midiwdetach(struct knote *kn)
{
        struct midi_softc *sc = kn->kn_hook;

        mutex_enter(sc->lock);
        selremove_knote(&sc->wsel, kn);
        mutex_exit(sc->lock);
}

static int
filt_midiwrite(struct knote *kn, long hint)
{
        struct midi_softc *sc = kn->kn_hook;
        MIDI_BUF_DECLARE(idx);
        MIDI_BUF_DECLARE(buf);

        mutex_exit(sc->lock);
        sc->refcnt++;
        mutex_enter(sc->lock);

        (void)idx_end; (void)buf_end;
        if (hint != NOTE_SUBMIT)
                mutex_enter(sc->lock);
        MIDI_BUF_PRODUCER_INIT(&sc->outbuf,idx);
        MIDI_BUF_PRODUCER_INIT(&sc->outbuf,buf);
        kn->kn_data = ((buf_lim - buf_cur)-1)>>1;
        if (kn->kn_data > idx_lim - idx_cur)
                kn->kn_data = idx_lim - idx_cur;
        if (hint != NOTE_SUBMIT)
                mutex_exit(sc->lock);

        // XXXMRG -- move this up, avoid the relock?
        mutex_enter(sc->lock);
        if (--sc->refcnt < 0)
                cv_broadcast(&sc->detach_cv);
        mutex_exit(sc->lock);

        return (kn->kn_data > 0);
}

static const struct filterops midiwrite_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_midiwdetach,
        .f_event = filt_midiwrite,
};

int
midikqfilter(dev_t dev, struct knote *kn)
{
        struct midi_softc *sc =
            device_lookup_private(&midi_cd, MIDIUNIT(dev));
        struct selinfo *sip;
        int error = 0;

        mutex_enter(sc->lock);
        sc->refcnt++;

        switch (kn->kn_filter) {
        case EVFILT_READ:
                sip = &sc->rsel;
                kn->kn_fop = &midiread_filtops;
                break;

        case EVFILT_WRITE:
                sip = &sc->wsel;
                kn->kn_fop = &midiwrite_filtops;
                break;

        default:
                error = EINVAL;
                goto out;
        }

        kn->kn_hook = sc;

        selrecord_knote(sip, kn);
 out:
        if (--sc->refcnt < 0)
                cv_broadcast(&sc->detach_cv);
        mutex_exit(sc->lock);

        return (error);
}

void
midi_getinfo(dev_t dev, struct midi_info *mi)
{
        struct midi_softc *sc;

        sc = device_lookup_private(&midi_cd, MIDIUNIT(dev));
        if (sc == NULL)
                return;
        mutex_enter(sc->lock);
        sc->hw_if->getinfo(sc->hw_hdl, mi);
        mutex_exit(sc->lock);
}

#elif NMIDIBUS > 0 /* but NMIDI == 0 */

void
midi_register_hw_if_ext(struct midi_hw_if_ext *exthw)
{

        /* nothing */
}

#endif /* NMIDI > 0 */

#if NMIDI > 0 || NMIDIBUS > 0

device_t
midi_attach_mi(const struct midi_hw_if *mhwp, void *hdlp, device_t dev)
{
        struct audio_attach_args arg;

        if (mhwp == NULL) {
                panic("midi_attach_mi: NULL\n");
                return (0);
        }

        arg.type = AUDIODEV_TYPE_MIDI;
        arg.hwif = mhwp;
        arg.hdl = hdlp;
        return (config_found(dev, &arg, audioprint,
                             CFARGS(.iattr = "midibus")));
}

#endif /* NMIDI > 0 || NMIDIBUS > 0 */

























































































































































































































































































































    5 

    5 



    5 













    5 










    5 
    4 
    4 

    5 









    1 



    1 





    1 

    1 




    1 







    1 









    1 













    1 









































    5 










1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
/*        $NetBSD: nd.c,v 1.4 2020/09/15 23:40:03 roy Exp $        */

/*
 * Copyright (c) 2020 The NetBSD Foundation, Inc.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Roy Marples.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: nd.c,v 1.4 2020/09/15 23:40:03 roy Exp $");

#include <sys/callout.h>
#include <sys/mbuf.h>
#include <sys/socketvar.h> /* for softnet_lock */

#include <net/if_llatbl.h>
#include <net/nd.h>
#include <net/route.h>

#include <netinet/in.h>
#include <netinet/ip6.h>

static struct nd_domain *nd_domains[AF_MAX];

static int nd_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */

static void nd_set_timertick(struct llentry *, time_t);
static struct nd_domain *nd_find_domain(int);

static void
nd_timer(void *arg)
{
        struct llentry *ln = arg;
        struct nd_domain *nd;
        struct ifnet *ifp = NULL;
        struct psref psref;
        struct mbuf *m = NULL;
        bool send_ns = false;
        int16_t missed = ND_LLINFO_NOSTATE;
        union l3addr taddr, *daddrp = NULL;

        SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
        LLE_WLOCK(ln);

        if (!(ln->la_flags & LLE_LINKED))
                goto out;
        if (ln->ln_ntick > 0) {
                nd_set_timer(ln, ND_TIMER_TICK);
                goto out;
        }

        nd = nd_find_domain(ln->lle_tbl->llt_af);
        ifp = ln->lle_tbl->llt_ifp;
        KASSERT(ifp != NULL);
        if_acquire(ifp, &psref);

        memcpy(&taddr, &ln->r_l3addr, sizeof(taddr));

        switch (ln->ln_state) {
        case ND_LLINFO_WAITDELETE:
                LLE_REMREF(ln);
                nd->nd_free(ln, 0);
                ln = NULL;
                break;

        case ND_LLINFO_INCOMPLETE:
                send_ns = true;
                if (ln->ln_asked++ < nd->nd_mmaxtries)
                        break;

                if (ln->ln_hold) {
                        struct mbuf *m0, *mnxt;

                        /*
                         * Assuming every packet in ln_hold
                         * has the same IP header.
                         */
                        m = ln->ln_hold;
                        for (m0 = m->m_nextpkt; m0 != NULL; m0 = mnxt) {
                                mnxt = m0->m_nextpkt;
                                m0->m_nextpkt = NULL;
                                m_freem(m0);
                        }

                        m->m_nextpkt = NULL;
                        ln->ln_hold = NULL;
                }

                missed = ND_LLINFO_INCOMPLETE;
                ln->ln_state = ND_LLINFO_WAITDELETE;
                break;

        case ND_LLINFO_REACHABLE:
                if (!ND_IS_LLINFO_PERMANENT(ln)) {
                        ln->ln_state = ND_LLINFO_STALE;
                        nd_set_timer(ln, ND_TIMER_GC);
                }
                break;

        case ND_LLINFO_PURGE: /* FALLTHROUGH */
        case ND_LLINFO_STALE:
                if (!ND_IS_LLINFO_PERMANENT(ln)) {
                        LLE_REMREF(ln);
                        nd->nd_free(ln, 1);
                        ln = NULL;
                }
                break;

        case ND_LLINFO_DELAY:
                if (nd->nd_nud_enabled(ifp)) {
                        ln->ln_asked = 1;
                        ln->ln_state = ND_LLINFO_PROBE;
                        send_ns = true;
                        daddrp = &taddr;
                } else {
                        ln->ln_state = ND_LLINFO_STALE;
                        nd_set_timer(ln, ND_TIMER_GC);
                }
                break;

        case ND_LLINFO_PROBE:
                send_ns = true;
                if (ln->ln_asked++ < nd->nd_umaxtries) {
                        daddrp = &taddr;
                } else {
                        ln->ln_state = ND_LLINFO_UNREACHABLE;
                        ln->ln_asked = 1;
                        missed = ND_LLINFO_PROBE;
                        /* nd_missed() consumers can use missed to know if
                         * they need to send ICMP UNREACHABLE or not. */
                }
                break;
        case ND_LLINFO_UNREACHABLE:
                /*
                 * RFC 7048 Section 3 says in the UNREACHABLE state
                 * packets continue to be sent to the link-layer address and
                 * then backoff exponentially.
                 * We adjust this slightly and move to the INCOMPLETE state
                 * after nd_mmaxtries probes and then start backing off.
                 *
                 * This results in simpler code whilst providing a more robust
                 * model which doubles the time to failure over what we did
                 * before. We don't want to be back to the old ARP model where
                 * no unreachability errors are returned because very
                 * few applications would look at unreachability hints provided
                 * such as ND_LLINFO_UNREACHABLE or RTM_MISS.
                 */
                send_ns = true;
                if (ln->ln_asked++ < nd->nd_mmaxtries)
                        break;

                missed = ND_LLINFO_UNREACHABLE;
                ln->ln_state = ND_LLINFO_WAITDELETE;
                ln->la_flags &= ~LLE_VALID;
                break;
        }

        if (send_ns) {
                uint8_t lladdr[255], *lladdrp;
                union l3addr src, *psrc;

                if (ln->ln_state == ND_LLINFO_WAITDELETE)
                        nd_set_timer(ln, ND_TIMER_RETRANS_BACKOFF);
                else
                        nd_set_timer(ln, ND_TIMER_RETRANS);
                if (ln->ln_state > ND_LLINFO_INCOMPLETE &&
                    ln->la_flags & LLE_VALID)
                {
                        KASSERT(sizeof(lladdr) >= ifp->if_addrlen);
                        memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen);
                        lladdrp = lladdr;
                } else
                        lladdrp = NULL;
                psrc = nd->nd_holdsrc(ln, &src);
                LLE_FREE_LOCKED(ln);
                ln = NULL;
                nd->nd_output(ifp, daddrp, &taddr, lladdrp, psrc);
        }

out:
        if (ln != NULL)
                LLE_FREE_LOCKED(ln);
        SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();

        if (missed != ND_LLINFO_NOSTATE)
                nd->nd_missed(ifp, &taddr, missed, m);
        if (ifp != NULL)
                if_release(ifp, &psref);
}

static void
nd_set_timertick(struct llentry *ln, time_t xtick)
{

        CTASSERT(sizeof(time_t) > sizeof(int));
        KASSERT(xtick >= 0);

        /*
         * We have to take care of a reference leak which occurs if
         * callout_reset overwrites a pending callout schedule.  Unfortunately
         * we don't have a mean to know the overwrite, so we need to know it
         * using callout_stop.  We need to call callout_pending first to exclude
         * the case that the callout has never been scheduled.
         */
        if (callout_pending(&ln->la_timer)) {
                bool expired;

                expired = callout_stop(&ln->la_timer);
                if (!expired)
                        LLE_REMREF(ln);
        }

        ln->ln_expire = time_uptime + xtick / hz;
        LLE_ADDREF(ln);
        if (xtick > INT_MAX) {
                ln->ln_ntick = xtick - INT_MAX;
                xtick = INT_MAX;
        } else {
                ln->ln_ntick = 0;
        }
        callout_reset(&ln->ln_timer_ch, xtick, nd_timer, ln);
}

void
nd_set_timer(struct llentry *ln, int type)
{
        time_t xtick;
        struct ifnet *ifp;
        struct nd_domain *nd;

        LLE_WLOCK_ASSERT(ln);

        ifp = ln->lle_tbl->llt_ifp;
        nd = nd_find_domain(ln->lle_tbl->llt_af);

        switch (type) {
        case ND_TIMER_IMMEDIATE:
                xtick = 0;
                break;
        case ND_TIMER_TICK:
                xtick = ln->ln_ntick;
                break;
        case ND_TIMER_RETRANS:
                xtick = nd->nd_retrans(ifp) * hz / 1000;
                break;
        case ND_TIMER_RETRANS_BACKOFF:
        {
                unsigned int retrans = nd->nd_retrans(ifp);
                unsigned int attempts = ln->ln_asked - nd->nd_mmaxtries;

                xtick = retrans;
                while (attempts-- != 0) {
                        xtick *= nd->nd_retransmultiple;
                        if (xtick > nd->nd_maxretrans || xtick < retrans) {
                                xtick = nd->nd_maxretrans;
                                break;
                        }
                }
                xtick = xtick * hz / 1000;
                break;
        }
        case ND_TIMER_REACHABLE:
                xtick = nd->nd_reachable(ifp) * hz / 1000;
                break;
        case ND_TIMER_EXPIRE:
                if (ln->ln_expire > time_uptime)
                        xtick = (ln->ln_expire - time_uptime) * hz;
                else
                        xtick = nd_gctimer * hz;
                break;
        case ND_TIMER_DELAY:
                xtick = nd->nd_delay * hz;
                break;
        case ND_TIMER_GC:
                xtick = nd_gctimer * hz;
                break;
        default:
                panic("%s: invalid timer type\n", __func__);
        }

        nd_set_timertick(ln, xtick);
}

int
nd_resolve(struct llentry *ln, const struct rtentry *rt, struct mbuf *m,
    uint8_t *lldst, size_t dstsize)
{
        struct ifnet *ifp;
        struct nd_domain *nd;
        int error;

        LLE_WLOCK_ASSERT(ln);

        ifp = ln->lle_tbl->llt_ifp;
        nd = nd_find_domain(ln->lle_tbl->llt_af);

        /* We don't have to do link-layer address resolution on a p2p link. */
        if (ifp->if_flags & IFF_POINTOPOINT &&
            ln->ln_state < ND_LLINFO_REACHABLE)
        {
                ln->ln_state = ND_LLINFO_STALE;
                nd_set_timer(ln, ND_TIMER_GC);
        }

        /*
         * The first time we send a packet to a neighbor whose entry is
         * STALE, we have to change the state to DELAY and a sets a timer to
         * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
         * neighbor unreachability detection on expiration.
         * (RFC 2461 7.3.3)
         */
        if (ln->ln_state == ND_LLINFO_STALE) {
                ln->ln_asked = 0;
                ln->ln_state = ND_LLINFO_DELAY;
                nd_set_timer(ln, ND_TIMER_DELAY);
        }

        /*
         * If the neighbor cache entry has a state other than INCOMPLETE
         * (i.e. its link-layer address is already resolved), just
         * send the packet.
         */
        if (ln->ln_state > ND_LLINFO_INCOMPLETE) {
                KASSERT((ln->la_flags & LLE_VALID) != 0);
                memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen));
                LLE_WUNLOCK(ln);
                return 0;
        }

        /*
         * There is a neighbor cache entry, but no ethernet address
         * response yet.  Append this latest packet to the end of the
         * packet queue in the mbuf, unless the number of the packet
         * does not exceed maxqueuelen.  When it exceeds maxqueuelen,
         * the oldest packet in the queue will be removed.
         */
        if (ln->ln_state == ND_LLINFO_NOSTATE ||
            ln->ln_state == ND_LLINFO_WAITDELETE)
                ln->ln_state = ND_LLINFO_INCOMPLETE;

        if (ln->ln_hold != NULL) {
                struct mbuf *m_hold;
                int i;

                i = 0;
                for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) {
                        i++;
                        if (m_hold->m_nextpkt == NULL) {
                                m_hold->m_nextpkt = m;
                                break;
                        }
                }
                while (i >= nd->nd_maxqueuelen) {
                        m_hold = ln->ln_hold;
                        ln->ln_hold = ln->ln_hold->m_nextpkt;
                        m_freem(m_hold);
                        i--;
                }
        } else
                ln->ln_hold = m;

        if (ln->ln_asked >= nd->nd_mmaxtries)
                error = (rt != NULL && rt->rt_flags & RTF_GATEWAY) ?
                    EHOSTUNREACH : EHOSTDOWN;
        else
                error = EWOULDBLOCK;

        /*
         * If there has been no NS for the neighbor after entering the
         * INCOMPLETE state, send the first solicitation.
         */
        if (!ND_IS_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) {
                struct psref psref;
                union l3addr dst, src, *psrc;

                ln->ln_asked++;
                nd_set_timer(ln, ND_TIMER_RETRANS);
                memcpy(&dst, &ln->r_l3addr, sizeof(dst));
                psrc = nd->nd_holdsrc(ln, &src);
                if_acquire(ifp, &psref);
                LLE_WUNLOCK(ln);

                nd->nd_output(ifp, NULL, &dst, NULL, psrc);
                if_release(ifp, &psref);
        } else
                LLE_WUNLOCK(ln);

        return error;
}

void
nd_nud_hint(struct llentry *ln)
{
        struct nd_domain *nd;

        if (ln == NULL)
                return;

        LLE_WLOCK_ASSERT(ln);

        if (ln->ln_state < ND_LLINFO_REACHABLE)
                goto done;

        nd = nd_find_domain(ln->lle_tbl->llt_af);

        /*
         * if we get upper-layer reachability confirmation many times,
         * it is possible we have false information.
         */
        ln->ln_byhint++;
        if (ln->ln_byhint > nd->nd_maxnudhint)
                goto done;

        ln->ln_state = ND_LLINFO_REACHABLE;
        if (!ND_IS_LLINFO_PERMANENT(ln))
                nd_set_timer(ln, ND_TIMER_REACHABLE);

done:
        LLE_WUNLOCK(ln);

        return;
}

static struct nd_domain *
nd_find_domain(int af)
{

        KASSERT(af < __arraycount(nd_domains) && nd_domains[af] != NULL);
        return nd_domains[af];
}

void
nd_attach_domain(struct nd_domain *nd)
{

        KASSERT(nd->nd_family < __arraycount(nd_domains));
        nd_domains[nd->nd_family] = nd;
}











































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    1 





    1 





























































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
/*        $NetBSD: twe.c,v 1.110 2021/08/07 16:19:14 thorpej Exp $        */

/*-
 * Copyright (c) 2000, 2001, 2002, 2003, 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran; and by Jason R. Thorpe of Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 2000 Michael Smith
 * Copyright (c) 2000 BSDi
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from FreeBSD: twe.c,v 1.1 2000/05/24 23:35:23 msmith Exp
 */

/*
 * Driver for the 3ware Escalade family of RAID controllers.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: twe.c,v 1.110 2021/08/07 16:19:14 thorpej Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/queue.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/endian.h>
#include <sys/malloc.h>
#include <sys/conf.h>
#include <sys/disk.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/kauth.h>
#include <sys/module.h>
#include <sys/bswap.h>
#include <sys/bus.h>

#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pcidevs.h>
#include <dev/pci/twereg.h>
#include <dev/pci/twevar.h>
#include <dev/pci/tweio.h>

#include "locators.h"
#include "ioconf.h"

#define        PCI_CBIO        0x10

static int        twe_aen_get(struct twe_softc *, uint16_t *);
static void        twe_aen_handler(struct twe_ccb *, int);
static void        twe_aen_enqueue(struct twe_softc *sc, uint16_t, int);
static uint16_t        twe_aen_dequeue(struct twe_softc *);

static void        twe_attach(device_t, device_t, void *);
static int        twe_rescan(device_t, const char *, const int *);
static int        twe_init_connection(struct twe_softc *);
static int        twe_intr(void *);
static int        twe_match(device_t, cfdata_t, void *);
static int        twe_param_set(struct twe_softc *, int, int, size_t, void *);
static void        twe_poll(struct twe_softc *);
static int        twe_print(void *, const char *);
static int        twe_reset(struct twe_softc *);
static int        twe_status_check(struct twe_softc *, u_int);
static int        twe_status_wait(struct twe_softc *, u_int, int);
static void        twe_describe_controller(struct twe_softc *);
static void        twe_clear_pci_abort(struct twe_softc *sc);
static void        twe_clear_pci_parity_error(struct twe_softc *sc);

static int        twe_add_unit(struct twe_softc *, int);
static int        twe_del_unit(struct twe_softc *, int);
static int        twe_init_connection(struct twe_softc *);

static inline u_int32_t        twe_inl(struct twe_softc *, int);
static inline void twe_outl(struct twe_softc *, int, u_int32_t);

extern struct        cfdriver twe_cd;

CFATTACH_DECL3_NEW(twe, sizeof(struct twe_softc),
    twe_match, twe_attach, NULL, NULL, twe_rescan, NULL, 0);

/* FreeBSD driver revision for sysctl expected by the 3ware cli */
const char twever[] = "1.50.01.002";

/*
 * Tables to convert numeric codes to strings.
 */
const struct twe_code_table twe_table_status[] = {
        { 0x00,        "successful completion" },

        /* info */
        { 0x42,        "command in progress" },
        { 0x6c,        "retrying interface CRC error from UDMA command" },

        /* warning */
        { 0x81,        "redundant/inconsequential request ignored" },
        { 0x8e,        "failed to write zeroes to LBA 0" },
        { 0x8f,        "failed to profile TwinStor zones" },

        /* fatal */
        { 0xc1,        "aborted due to system command or reconfiguration" },
        { 0xc4,        "aborted" },
        { 0xc5,        "access error" },
        { 0xc6,        "access violation" },
        { 0xc7,        "device failure" },        /* high byte may be port # */
        { 0xc8,        "controller error" },
        { 0xc9,        "timed out" },
        { 0xcb,        "invalid unit number" },
        { 0xcf,        "unit not available" },
        { 0xd2,        "undefined opcode" },
        { 0xdb,        "request incompatible with unit" },
        { 0xdc,        "invalid request" },
        { 0xff,        "firmware error, reset requested" },

        { 0,        NULL }
};

const struct twe_code_table twe_table_unitstate[] = {
        { TWE_PARAM_UNITSTATUS_Normal,                "Normal" },
        { TWE_PARAM_UNITSTATUS_Initialising,        "Initializing" },
        { TWE_PARAM_UNITSTATUS_Degraded,        "Degraded" },
        { TWE_PARAM_UNITSTATUS_Rebuilding,        "Rebuilding" },
        { TWE_PARAM_UNITSTATUS_Verifying,        "Verifying" },
        { TWE_PARAM_UNITSTATUS_Corrupt,                "Corrupt" },
        { TWE_PARAM_UNITSTATUS_Missing,                "Missing" },

        { 0,                                        NULL }
};

const struct twe_code_table twe_table_unittype[] = {
        /* array descriptor configuration */
        { TWE_AD_CONFIG_RAID0,                        "RAID0" },
        { TWE_AD_CONFIG_RAID1,                        "RAID1" },
        { TWE_AD_CONFIG_TwinStor,                "TwinStor" },
        { TWE_AD_CONFIG_RAID5,                        "RAID5" },
        { TWE_AD_CONFIG_RAID10,                        "RAID10" },
        { TWE_UD_CONFIG_JBOD,                        "JBOD" },

        { 0,                                        NULL }
};

const struct twe_code_table twe_table_stripedepth[] = {
        { TWE_AD_STRIPE_4k,                        "4K" },
        { TWE_AD_STRIPE_8k,                        "8K" },
        { TWE_AD_STRIPE_16k,                        "16K" },
        { TWE_AD_STRIPE_32k,                        "32K" },
        { TWE_AD_STRIPE_64k,                        "64K" },
        { TWE_AD_STRIPE_128k,                        "128K" },
        { TWE_AD_STRIPE_256k,                        "256K" },
        { TWE_AD_STRIPE_512k,                        "512K" },
        { TWE_AD_STRIPE_1024k,                        "1024K" },

        { 0,                                        NULL }
};

/*
 * Asynchronous event notification messages are qualified:
 *        a - not unit/port specific
 *        u - unit specific
 *        p - port specific
 *
 * They are further qualified with a severity:
 *        E - LOG_EMERG
 *        a - LOG_ALERT
 *        c - LOG_CRIT
 *        e - LOG_ERR
 *        w - LOG_WARNING
 *        n - LOG_NOTICE
 *        i - LOG_INFO
 *        d - LOG_DEBUG
 *        blank - just use printf
 */
const struct twe_code_table twe_table_aen[] = {
        { 0x00,        "a  queue empty" },
        { 0x01,        "a  soft reset" },
        { 0x02,        "uc degraded mode" },
        { 0x03,        "aa controller error" },
        { 0x04,        "uE rebuild fail" },
        { 0x05,        "un rebuild done" },
        { 0x06,        "ue incomplete unit" },
        { 0x07,        "un initialization done" },
        { 0x08,        "uw unclean shutdown detected" },
        { 0x09,        "pe drive timeout" },
        { 0x0a,        "pc drive error" },
        { 0x0b,        "un rebuild started" },
        { 0x0c,        "un initialization started" },
        { 0x0d,        "ui logical unit deleted" },
        { 0x0f,        "pc SMART threshold exceeded" },
        { 0x15,        "a  table undefined" },        /* XXX: Not in FreeBSD's table */
        { 0x21,        "pe ATA UDMA downgrade" },
        { 0x22,        "pi ATA UDMA upgrade" },
        { 0x23,        "pw sector repair occurred" },
        { 0x24,        "aa SBUF integrity check failure" },
        { 0x25,        "pa lost cached write" },
        { 0x26,        "pa drive ECC error detected" },
        { 0x27,        "pe DCB checksum error" },
        { 0x28,        "pn DCB unsupported version" },
        { 0x29,        "ui verify started" },
        { 0x2a,        "ua verify failed" },
        { 0x2b,        "ui verify complete" },
        { 0x2c,        "pw overwrote bad sector during rebuild" },
        { 0x2d,        "pa encountered bad sector during rebuild" },
        { 0x2e,        "pe replacement drive too small" },
        { 0x2f,        "ue array not previously initialized" },
        { 0x30,        "p  drive not supported" },
        { 0xff,        "a  aen queue full" },

        { 0,        NULL },
};

const char *
twe_describe_code(const struct twe_code_table *table, uint32_t code)
{

        for (; table->string != NULL; table++) {
                if (table->code == code)
                        return (table->string);
        }
        return (NULL);
}

static inline u_int32_t
twe_inl(struct twe_softc *sc, int off)
{

        bus_space_barrier(sc->sc_iot, sc->sc_ioh, off, 4,
            BUS_SPACE_BARRIER_WRITE | BUS_SPACE_BARRIER_READ);
        return (bus_space_read_4(sc->sc_iot, sc->sc_ioh, off));
}

static inline void
twe_outl(struct twe_softc *sc, int off, u_int32_t val)
{

        bus_space_write_4(sc->sc_iot, sc->sc_ioh, off, val);
        bus_space_barrier(sc->sc_iot, sc->sc_ioh, off, 4,
            BUS_SPACE_BARRIER_WRITE);
}

/*
 * Match a supported board.
 */
static int
twe_match(device_t parent, cfdata_t cfdata, void *aux)
{
        struct pci_attach_args *pa;

        pa = aux;

        return (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_3WARE &&
            (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_3WARE_ESCALADE ||
            PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_3WARE_ESCALADE_ASIC));
}

/*
 * Attach a supported board.
 *
 * XXX This doesn't fail gracefully.
 */
static void
twe_attach(device_t parent, device_t self, void *aux)
{
        struct pci_attach_args *pa;
        struct twe_softc *sc;
        pci_chipset_tag_t pc;
        pci_intr_handle_t ih;
        pcireg_t csr;
        const char *intrstr;
        int s, size, i, rv, rseg;
        size_t max_segs, max_xfer;
        bus_dma_segment_t seg;
        const struct sysctlnode *node;
        struct twe_cmd *tc;
        struct twe_ccb *ccb;
        char intrbuf[PCI_INTRSTR_LEN];

        sc = device_private(self);
        sc->sc_dev = self;
        pa = aux;
        pc = pa->pa_pc;
        sc->sc_dmat = pa->pa_dmat;
        SIMPLEQ_INIT(&sc->sc_ccb_queue);
        SLIST_INIT(&sc->sc_ccb_freelist);

        aprint_naive(": RAID controller\n");
        aprint_normal(": 3ware Escalade\n");


        if (pci_mapreg_map(pa, PCI_CBIO, PCI_MAPREG_TYPE_IO, 0,
            &sc->sc_iot, &sc->sc_ioh, NULL, NULL)) {
                aprint_error_dev(self, "can't map i/o space\n");
                return;
        }

        /* Enable the device. */
        csr = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
        pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
            csr | PCI_COMMAND_MASTER_ENABLE);

        /* Map and establish the interrupt. */
        if (pci_intr_map(pa, &ih)) {
                aprint_error_dev(self, "can't map interrupt\n");
                return;
        }

        intrstr = pci_intr_string(pc, ih, intrbuf, sizeof(intrbuf));
        sc->sc_ih = pci_intr_establish_xname(pc, ih, IPL_BIO, twe_intr, sc,
            device_xname(self));
        if (sc->sc_ih == NULL) {
                aprint_error_dev(self, "can't establish interrupt%s%s\n",
                        (intrstr) ? " at " : "",
                        (intrstr) ? intrstr : "");
                return;
        }

        if (intrstr != NULL)
                aprint_normal_dev(self, "interrupting at %s\n", intrstr);

        /*
         * Allocate and initialise the command blocks and CCBs.
         */
        size = sizeof(struct twe_cmd) * TWE_MAX_QUEUECNT;

        if ((rv = bus_dmamem_alloc(sc->sc_dmat, size, PAGE_SIZE, 0, &seg, 1,
            &rseg, BUS_DMA_NOWAIT)) != 0) {
                aprint_error_dev(self,
                    "unable to allocate commands, rv = %d\n", rv);
                return;
        }

        if ((rv = bus_dmamem_map(sc->sc_dmat, &seg, rseg, size,
            (void **)&sc->sc_cmds,
            BUS_DMA_NOWAIT | BUS_DMA_COHERENT)) != 0) {
                aprint_error_dev(self,
                    "unable to map commands, rv = %d\n", rv);
                return;
        }

        if ((rv = bus_dmamap_create(sc->sc_dmat, size, size, 1, 0,
            BUS_DMA_NOWAIT, &sc->sc_dmamap)) != 0) {
                aprint_error_dev(self,
                    "unable to create command DMA map, rv = %d\n", rv);
                return;
        }

        if ((rv = bus_dmamap_load(sc->sc_dmat, sc->sc_dmamap, sc->sc_cmds,
            size, NULL, BUS_DMA_NOWAIT)) != 0) {
                aprint_error_dev(self,
                    "unable to load command DMA map, rv = %d\n", rv);
                return;
        }

        sc->sc_cmds_paddr = sc->sc_dmamap->dm_segs[0].ds_addr;
        memset(sc->sc_cmds, 0, size);

        tc = (struct twe_cmd *)sc->sc_cmds;
        max_segs = twe_get_maxsegs();
        max_xfer = twe_get_maxxfer(max_segs);

        ccb = malloc(sizeof(*ccb) * TWE_MAX_QUEUECNT, M_DEVBUF, M_WAITOK);
        sc->sc_ccbs = ccb;

        for (i = 0; i < TWE_MAX_QUEUECNT; i++, tc++, ccb++) {
                ccb->ccb_cmd = tc;
                ccb->ccb_cmdid = i;
                ccb->ccb_flags = 0;
                rv = bus_dmamap_create(sc->sc_dmat, max_xfer,
                    max_segs, PAGE_SIZE, 0,
                    BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW,
                    &ccb->ccb_dmamap_xfer);
                if (rv != 0) {
                        aprint_error_dev(self,
                            "can't create dmamap, rv = %d\n", rv);
                        return;
                }

                /* Save the first CCB for AEN retrieval. */
                if (i != 0)
                        SLIST_INSERT_HEAD(&sc->sc_ccb_freelist, ccb,
                            ccb_chain.slist);
        }

        /* Wait for the controller to become ready. */
        if (twe_status_wait(sc, TWE_STS_MICROCONTROLLER_READY, 6)) {
                aprint_error_dev(self, "microcontroller not ready\n");
                return;
        }

        twe_outl(sc, TWE_REG_CTL, TWE_CTL_DISABLE_INTRS);

        /* Reset the controller. */
        s = splbio();
        rv = twe_reset(sc);
        splx(s);
        if (rv) {
                aprint_error_dev(self, "reset failed\n");
                return;
        }

        /* Initialise connection with controller. */
        twe_init_connection(sc);

        twe_describe_controller(sc);

        /* Find and attach RAID array units. */
        twe_rescan(self, NULL, NULL);

        /* ...and finally, enable interrupts. */
        twe_outl(sc, TWE_REG_CTL, TWE_CTL_CLEAR_ATTN_INTR |
            TWE_CTL_UNMASK_RESP_INTR |
            TWE_CTL_ENABLE_INTRS);

        /* sysctl set-up for 3ware cli */
        if (sysctl_createv(NULL, 0, NULL, &node,
                                0, CTLTYPE_NODE, device_xname(self),
                                SYSCTL_DESCR("twe driver information"),
                                NULL, 0, NULL, 0,
                                CTL_HW, CTL_CREATE, CTL_EOL) != 0) {
                aprint_error_dev(self, "could not create %s.%s sysctl node\n",
                    "hw", device_xname(self));
                return;
        }
        if ((i = sysctl_createv(NULL, 0, NULL, NULL,
                                0, CTLTYPE_STRING, "driver_version",
                                SYSCTL_DESCR("twe0 driver version"),
                                NULL, 0, __UNCONST(&twever), 0,
                                CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL))
                                != 0) {
                aprint_error_dev(self,
                    "could not create %s.%s.driver_version sysctl\n",
                    "hw", device_xname(self));
                return;
        }
}

static int
twe_rescan(device_t self, const char *ifattr, const int *locs)
{
        struct twe_softc *sc;
        int i;

        sc = device_private(self);
        sc->sc_nunits = 0;
        for (i = 0; i < TWE_MAX_UNITS; i++)
                (void) twe_add_unit(sc, i);
        return 0;
}


void
twe_register_callbacks(struct twe_softc *sc, int unit,
    const struct twe_callbacks *tcb)
{

        sc->sc_units[unit].td_callbacks = tcb;
}

static void
twe_recompute_openings(struct twe_softc *sc)
{
        struct twe_drive *td;
        int unit, openings;

        if (sc->sc_nunits != 0)
                openings = (TWE_MAX_QUEUECNT - 1) / sc->sc_nunits;
        else
                openings = 0;
        if (openings == sc->sc_openings)
                return;
        sc->sc_openings = openings;

#ifdef TWE_DEBUG
        printf("%s: %d array%s, %d openings per array\n",
            device_xname(sc->sc_dev), sc->sc_nunits,
            sc->sc_nunits == 1 ? "" : "s", sc->sc_openings);
#endif

        for (unit = 0; unit < TWE_MAX_UNITS; unit++) {
                td = &sc->sc_units[unit];
                if (td->td_dev != NULL)
                        (*td->td_callbacks->tcb_openings)(td->td_dev,
                            sc->sc_openings);
        }
}

static int
twe_add_unit(struct twe_softc *sc, int unit)
{
        struct twe_param *dtp, *atp;
        struct twe_array_descriptor *ad;
        struct twe_drive *td;
        struct twe_attach_args twea;
        uint32_t newsize;
        int rv;
        uint16_t dsize;
        uint8_t newtype, newstripe;
        int locs[TWECF_NLOCS];

        if (unit < 0 || unit >= TWE_MAX_UNITS)
                return (EINVAL);

        /* Find attached units. */
        rv = twe_param_get(sc, TWE_PARAM_UNITSUMMARY,
            TWE_PARAM_UNITSUMMARY_Status, TWE_MAX_UNITS, NULL, &dtp);
        if (rv != 0) {
                aprint_error_dev(sc->sc_dev,
                    "error %d fetching unit summary\n", rv);
                return (rv);
        }

        /* For each detected unit, collect size and store in an array. */
        td = &sc->sc_units[unit];

        /* Unit present? */
        if ((dtp->tp_data[unit] & TWE_PARAM_UNITSTATUS_Online) == 0) {
                /*
                 * XXX Should we check to see if a device has been
                 * XXX attached at this index and detach it if it
                 * XXX has?  ("rescan" semantics)
                 */
                rv = 0;
                goto out;
           }

        rv = twe_param_get_2(sc, TWE_PARAM_UNITINFO + unit,
            TWE_PARAM_UNITINFO_DescriptorSize, &dsize);
        if (rv != 0) {
                aprint_error_dev(sc->sc_dev,
                    "error %d fetching descriptor size for unit %d\n",
                    rv, unit);
                goto out;
        }

        rv = twe_param_get(sc, TWE_PARAM_UNITINFO + unit,
            TWE_PARAM_UNITINFO_Descriptor, dsize - 3, NULL, &atp);
        if (rv != 0) {
                aprint_error_dev(sc->sc_dev,
                    "error %d fetching array descriptor for unit %d\n",
                    rv, unit);
                goto out;
        }

        ad = (struct twe_array_descriptor *)atp->tp_data;
        newtype = ad->configuration;
        newstripe = ad->stripe_size;
        free(atp, M_DEVBUF);

        rv = twe_param_get_4(sc, TWE_PARAM_UNITINFO + unit,
            TWE_PARAM_UNITINFO_Capacity, &newsize);
        if (rv != 0) {
                aprint_error_dev(sc->sc_dev,
                    "error %d fetching capacity for unit %d\n",
                    rv, unit);
                goto out;
        }

        /*
         * Have a device, so we need to attach it.  If there is currently
         * something sitting at the slot, and the parameters are different,
         * then we detach the old device before attaching the new one.
         */
        if (td->td_dev != NULL &&
            td->td_size == newsize &&
            td->td_type == newtype &&
            td->td_stripe == newstripe) {
                /* Same as the old device; just keep using it. */
                rv = 0;
                goto out;
        } else if (td->td_dev != NULL) {
                /* Detach the old device first. */
                (void) config_detach(td->td_dev, DETACH_FORCE);
                td->td_dev = NULL;
        } else if (td->td_size == 0)
                sc->sc_nunits++;

        /*
         * Committed to the new array unit; assign its parameters and
         * recompute the number of available command openings.
         */
        td->td_size = newsize;
        td->td_type = newtype;
        td->td_stripe = newstripe;
        twe_recompute_openings(sc);

        twea.twea_unit = unit;

        locs[TWECF_UNIT] = unit;

        td->td_dev = config_found(sc->sc_dev, &twea, twe_print,
            CFARGS(.submatch = config_stdsubmatch,
                   .locators = locs));

        rv = 0;
 out:
        free(dtp, M_DEVBUF);
        return (rv);
}

static int
twe_del_unit(struct twe_softc *sc, int unit)
{
        struct twe_drive *td;

        if (unit < 0 || unit >= TWE_MAX_UNITS)
                return (EINVAL);

        td = &sc->sc_units[unit];
        if (td->td_size != 0)
                sc->sc_nunits--;
        td->td_size = 0;
        td->td_type = 0;
        td->td_stripe = 0;
        if (td->td_dev != NULL) {
                (void) config_detach(td->td_dev, DETACH_FORCE);
                td->td_dev = NULL;
        }
        twe_recompute_openings(sc);
        return (0);
}

/*
 * Reset the controller.
 * MUST BE CALLED AT splbio()!
 */
static int
twe_reset(struct twe_softc *sc)
{
        uint16_t aen;
        u_int status;
        int got, rv;

        /* Issue a soft reset. */
        twe_outl(sc, TWE_REG_CTL, TWE_CTL_ISSUE_SOFT_RESET |
            TWE_CTL_CLEAR_HOST_INTR |
            TWE_CTL_CLEAR_ATTN_INTR |
            TWE_CTL_MASK_CMD_INTR |
            TWE_CTL_MASK_RESP_INTR |
            TWE_CTL_CLEAR_ERROR_STS |
            TWE_CTL_DISABLE_INTRS);

        /* Wait for attention... */
        if (twe_status_wait(sc, TWE_STS_ATTN_INTR, 30)) {
                aprint_error_dev(sc->sc_dev,
                    "timeout waiting for attention interrupt\n");
                return (-1);
        }

        /* ...and ACK it. */
        twe_outl(sc, TWE_REG_CTL, TWE_CTL_CLEAR_ATTN_INTR);

        /*
         * Pull AENs out of the controller; look for a soft reset AEN.
         * Open code this, since we want to detect reset even if the
         * queue for management tools is full.
         *
         * Note that since:
         *        - interrupts are blocked
         *        - we have reset the controller
         *        - acknowledged the pending ATTENTION
         * that there is no way a pending asynchronous AEN fetch would
         * finish, so clear the flag.
         */
        sc->sc_flags &= ~TWEF_AEN;
        for (got = 0;;) {
                rv = twe_aen_get(sc, &aen);
                if (rv != 0)
                        printf("%s: error %d while draining event queue\n",
                            device_xname(sc->sc_dev), rv);
                if (TWE_AEN_CODE(aen) == TWE_AEN_QUEUE_EMPTY)
                        break;
                if (TWE_AEN_CODE(aen) == TWE_AEN_SOFT_RESET)
                        got = 1;
                twe_aen_enqueue(sc, aen, 1);
        }

        if (!got) {
                printf("%s: reset not reported\n", device_xname(sc->sc_dev));
                return (-1);
        }

        /* Check controller status. */
        status = twe_inl(sc, TWE_REG_STS);
        if (twe_status_check(sc, status)) {
                printf("%s: controller errors detected\n",
                    device_xname(sc->sc_dev));
                return (-1);
        }

        /* Drain the response queue. */
        for (;;) {
                status = twe_inl(sc, TWE_REG_STS);
                if (twe_status_check(sc, status) != 0) {
                        aprint_error_dev(sc->sc_dev,
                            "can't drain response queue\n");
                        return (-1);
                }
                if ((status & TWE_STS_RESP_QUEUE_EMPTY) != 0)
                        break;
                (void)twe_inl(sc, TWE_REG_RESP_QUEUE);
        }

        return (0);
}

/*
 * Print autoconfiguration message for a sub-device.
 */
static int
twe_print(void *aux, const char *pnp)
{
        struct twe_attach_args *twea;

        twea = aux;

        if (pnp != NULL)
                aprint_normal("block device at %s", pnp);
        aprint_normal(" unit %d", twea->twea_unit);
        return (UNCONF);
}

/*
 * Interrupt service routine.
 */
static int
twe_intr(void *arg)
{
        struct twe_softc *sc;
        u_int status;
        int caught, rv;

        sc = arg;
        caught = 0;
        status = twe_inl(sc, TWE_REG_STS);
        twe_status_check(sc, status);

        /* Host interrupts - purpose unknown. */
        if ((status & TWE_STS_HOST_INTR) != 0) {
#ifdef DEBUG
                printf("%s: host interrupt\n", device_xname(sc->sc_dev));
#endif
                twe_outl(sc, TWE_REG_CTL, TWE_CTL_CLEAR_HOST_INTR);
                caught = 1;
        }

        /*
         * Attention interrupts, signalled when a controller or child device
         * state change has occurred.
         */
        if ((status & TWE_STS_ATTN_INTR) != 0) {
                rv = twe_aen_get(sc, NULL);
                if (rv != 0)
                        aprint_error_dev(sc->sc_dev,
                            "unable to retrieve AEN (%d)\n", rv);
                else
                        twe_outl(sc, TWE_REG_CTL, TWE_CTL_CLEAR_ATTN_INTR);
                caught = 1;
        }

        /*
         * Command interrupts, signalled when the controller can accept more
         * commands.  We don't use this; instead, we try to submit commands
         * when we receive them, and when other commands have completed.
         * Mask it so we don't get another one.
         */
        if ((status & TWE_STS_CMD_INTR) != 0) {
#ifdef DEBUG
                printf("%s: command interrupt\n", device_xname(sc->sc_dev));
#endif
                twe_outl(sc, TWE_REG_CTL, TWE_CTL_MASK_CMD_INTR);
                caught = 1;
        }

        if ((status & TWE_STS_RESP_INTR) != 0) {
                twe_poll(sc);
                caught = 1;
        }

        return (caught);
}

/*
 * Fetch an AEN.  Even though this is really like parameter
 * retrieval, we handle this specially, because we issue this
 * AEN retrieval command from interrupt context, and thus
 * reserve a CCB for it to avoid resource shortage.
 *
 * XXX There are still potential resource shortages we could
 * XXX encounter.  Consider pre-allocating all AEN-related
 * XXX resources.
 *
 * MUST BE CALLED AT splbio()!
 */
static int
twe_aen_get(struct twe_softc *sc, uint16_t *aenp)
{
        struct twe_ccb *ccb;
        struct twe_cmd *tc;
        struct twe_param *tp;
        int rv;

        /*
         * If we're already retrieving an AEN, just wait; another
         * retrieval will be chained after the current one completes.
         */
        if (sc->sc_flags & TWEF_AEN) {
                /*
                 * It is a fatal software programming error to attempt
                 * to fetch an AEN synchronously when an AEN fetch is
                 * already pending.
                 */
                KASSERT(aenp == NULL);
                return (0);
        }

        tp = malloc(TWE_SECTOR_SIZE, M_DEVBUF, M_NOWAIT);
        if (tp == NULL)
                return (ENOMEM);

        ccb = twe_ccb_alloc(sc,
            TWE_CCB_AEN | TWE_CCB_DATA_IN | TWE_CCB_DATA_OUT);
        KASSERT(ccb != NULL);

        ccb->ccb_data = tp;
        ccb->ccb_datasize = TWE_SECTOR_SIZE;
        ccb->ccb_tx.tx_handler = (aenp == NULL) ? twe_aen_handler : NULL;
        ccb->ccb_tx.tx_context = tp;
        ccb->ccb_tx.tx_dv = sc->sc_dev;

        tc = ccb->ccb_cmd;
        tc->tc_size = 2;
        tc->tc_opcode = TWE_OP_GET_PARAM | (tc->tc_size << 5);
        tc->tc_unit = 0;
        tc->tc_count = htole16(1);

        /* Fill in the outbound parameter data. */
        tp->tp_table_id = htole16(TWE_PARAM_AEN);
        tp->tp_param_id = TWE_PARAM_AEN_UnitCode;
        tp->tp_param_size = 2;

        /* Map the transfer. */
        if ((rv = twe_ccb_map(sc, ccb)) != 0) {
                twe_ccb_free(sc, ccb);
                goto done;
        }

        /* Enqueue the command and wait. */
        if (aenp != NULL) {
                rv = twe_ccb_poll(sc, ccb, 5);
                twe_ccb_unmap(sc, ccb);
                twe_ccb_free(sc, ccb);
                if (rv == 0)
                        *aenp = le16toh(*(uint16_t *)tp->tp_data);
                free(tp, M_DEVBUF);
        } else {
                sc->sc_flags |= TWEF_AEN;
                twe_ccb_enqueue(sc, ccb);
                rv = 0;
        }

 done:
        return (rv);
}

/*
 * Handle an AEN returned by the controller.
 * MUST BE CALLED AT splbio()!
 */
static void
twe_aen_handler(struct twe_ccb *ccb, int error)
{
        struct twe_softc *sc;
        struct twe_param *tp;
        uint16_t aen;
        int rv;

        sc = device_private(ccb->ccb_tx.tx_dv);
        tp = ccb->ccb_tx.tx_context;
        twe_ccb_unmap(sc, ccb);

        sc->sc_flags &= ~TWEF_AEN;

        if (error) {
                aprint_error_dev(sc->sc_dev, "error retrieving AEN\n");
                aen = TWE_AEN_QUEUE_EMPTY;
        } else
                aen = le16toh(*(u_int16_t *)tp->tp_data);
        free(tp, M_DEVBUF);
        twe_ccb_free(sc, ccb);

        if (TWE_AEN_CODE(aen) == TWE_AEN_QUEUE_EMPTY) {
                twe_outl(sc, TWE_REG_CTL, TWE_CTL_CLEAR_ATTN_INTR);
                return;
        }

        twe_aen_enqueue(sc, aen, 0);

        /*
         * Chain another retrieval in case interrupts have been
         * coalesced.
         */
        rv = twe_aen_get(sc, NULL);
        if (rv != 0)
                aprint_error_dev(sc->sc_dev,
                    "unable to retrieve AEN (%d)\n", rv);
}

static void
twe_aen_enqueue(struct twe_softc *sc, uint16_t aen, int quiet)
{
        const char *str, *msg;
        int s, next, nextnext, level;

        /*
         * First report the AEN on the console.  Maybe.
         */
        if (! quiet) {
                str = twe_describe_code(twe_table_aen, TWE_AEN_CODE(aen));
                if (str == NULL) {
                        aprint_error_dev(sc->sc_dev,
                            "unknown AEN 0x%04x\n", aen);
                } else {
                        msg = str + 3;
                        switch (str[1]) {
                        case 'E':        level = LOG_EMERG; break;
                        case 'a':        level = LOG_ALERT; break;
                        case 'c':        level = LOG_CRIT; break;
                        case 'e':        level = LOG_ERR; break;
                        case 'w':        level = LOG_WARNING; break;
                        case 'n':        level = LOG_NOTICE; break;
                        case 'i':        level = LOG_INFO; break;
                        case 'd':        level = LOG_DEBUG; break;
                        default:
                                /* Don't use syslog. */
                                level = -1;
                        }

                        if (level < 0) {
                                switch (str[0]) {
                                case 'u':
                                case 'p':
                                        printf("%s: %s %d: %s\n",
                                            device_xname(sc->sc_dev),
                                            str[0] == 'u' ? "unit" : "port",
                                            TWE_AEN_UNIT(aen), msg);
                                        break;

                                default:
                                        printf("%s: %s\n",
                                            device_xname(sc->sc_dev), msg);
                                }
                        } else {
                                switch (str[0]) {
                                case 'u':
                                case 'p':
                                        log(level, "%s: %s %d: %s\n",
                                            device_xname(sc->sc_dev),
                                            str[0] == 'u' ? "unit" : "port",
                                            TWE_AEN_UNIT(aen), msg);
                                        break;

                                default:
                                        log(level, "%s: %s\n",
                                            device_xname(sc->sc_dev), msg);
                                }
                        }
                }
        }

        /* Now enqueue the AEN for mangement tools. */
        s = splbio();

        next = (sc->sc_aen_head + 1) % TWE_AEN_Q_LENGTH;
        nextnext = (sc->sc_aen_head + 2) % TWE_AEN_Q_LENGTH;

        /*
         * If this is the last free slot, then queue up a "queue
         * full" message.
         */
        if (nextnext == sc->sc_aen_tail)
                aen = TWE_AEN_QUEUE_FULL;

        if (next != sc->sc_aen_tail) {
                sc->sc_aen_queue[sc->sc_aen_head] = aen;
                sc->sc_aen_head = next;
        }

        if (sc->sc_flags & TWEF_AENQ_WAIT) {
                sc->sc_flags &= ~TWEF_AENQ_WAIT;
                wakeup(&sc->sc_aen_queue);
        }

        splx(s);
}

/* NOTE: Must be called at splbio(). */
static uint16_t
twe_aen_dequeue(struct twe_softc *sc)
{
        uint16_t aen;

        if (sc->sc_aen_tail == sc->sc_aen_head)
                aen = TWE_AEN_QUEUE_EMPTY;
        else {
                aen = sc->sc_aen_queue[sc->sc_aen_tail];
                sc->sc_aen_tail = (sc->sc_aen_tail + 1) % TWE_AEN_Q_LENGTH;
        }

        return (aen);
}

/*
 * These are short-hand functions that execute TWE_OP_GET_PARAM to
 * fetch 1, 2, and 4 byte parameter values, respectively.
 */
int
twe_param_get_1(struct twe_softc *sc, int table_id, int param_id,
    uint8_t *valp)
{
        struct twe_param *tp;
        int rv;

        rv = twe_param_get(sc, table_id, param_id, 1, NULL, &tp);
        if (rv != 0)
                return (rv);
        *valp = *(uint8_t *)tp->tp_data;
        free(tp, M_DEVBUF);
        return (0);
}

int
twe_param_get_2(struct twe_softc *sc, int table_id, int param_id,
    uint16_t *valp)
{
        struct twe_param *tp;
        int rv;

        rv = twe_param_get(sc, table_id, param_id, 2, NULL, &tp);
        if (rv != 0)
                return (rv);
        *valp = le16toh(*(uint16_t *)tp->tp_data);
        free(tp, M_DEVBUF);
        return (0);
}

int
twe_param_get_4(struct twe_softc *sc, int table_id, int param_id,
    uint32_t *valp)
{
        struct twe_param *tp;
        int rv;

        rv = twe_param_get(sc, table_id, param_id, 4, NULL, &tp);
        if (rv != 0)
                return (rv);
        *valp = le32toh(*(uint32_t *)tp->tp_data);
        free(tp, M_DEVBUF);
        return (0);
}

/*
 * Execute a TWE_OP_GET_PARAM command.  If a callback function is provided,
 * it will be called with generated context when the command has completed.
 * If no callback is provided, the command will be executed synchronously
 * and a pointer to a buffer containing the data returned.
 *
 * The caller or callback is responsible for freeing the buffer.
 *
 * NOTE: We assume we can sleep here to wait for a CCB to become available.
 */
int
twe_param_get(struct twe_softc *sc, int table_id, int param_id, size_t size,
              void (*func)(struct twe_ccb *, int), struct twe_param **pbuf)
{
        struct twe_ccb *ccb;
        struct twe_cmd *tc;
        struct twe_param *tp;
        int rv, s;

        tp = malloc(TWE_SECTOR_SIZE, M_DEVBUF, M_NOWAIT);
        if (tp == NULL)
                return ENOMEM;

        ccb = twe_ccb_alloc_wait(sc, TWE_CCB_DATA_IN | TWE_CCB_DATA_OUT);
        KASSERT(ccb != NULL);

        ccb->ccb_data = tp;
        ccb->ccb_datasize = TWE_SECTOR_SIZE;
        ccb->ccb_tx.tx_handler = func;
        ccb->ccb_tx.tx_context = tp;
        ccb->ccb_tx.tx_dv = sc->sc_dev;

        tc = ccb->ccb_cmd;
        tc->tc_size = 2;
        tc->tc_opcode = TWE_OP_GET_PARAM | (tc->tc_size << 5);
        tc->tc_unit = 0;
        tc->tc_count = htole16(1);

        /* Fill in the outbound parameter data. */
        tp->tp_table_id = htole16(table_id);
        tp->tp_param_id = param_id;
        tp->tp_param_size = size;

        /* Map the transfer. */
        if ((rv = twe_ccb_map(sc, ccb)) != 0) {
                twe_ccb_free(sc, ccb);
                goto done;
        }

        /* Submit the command and either wait or let the callback handle it. */
        if (func == NULL) {
                s = splbio();
                rv = twe_ccb_poll(sc, ccb, 5);
                twe_ccb_unmap(sc, ccb);
                twe_ccb_free(sc, ccb);
                splx(s);
        } else {
#ifdef DEBUG
                if (pbuf != NULL)
                        panic("both func and pbuf defined");
#endif
                twe_ccb_enqueue(sc, ccb);
                return 0;
        }

done:
        if (pbuf == NULL || rv != 0)
                free(tp, M_DEVBUF);
        else if (pbuf != NULL && rv == 0)
                *pbuf = tp;
        return rv;
}

/*
 * Execute a TWE_OP_SET_PARAM command.
 *
 * NOTE: We assume we can sleep here to wait for a CCB to become available.
 */
static int
twe_param_set(struct twe_softc *sc, int table_id, int param_id, size_t size,
              void *sbuf)
{
        struct twe_ccb *ccb;
        struct twe_cmd *tc;
        struct twe_param *tp;
        int rv, s;

        tp = malloc(TWE_SECTOR_SIZE, M_DEVBUF, M_WAITOK);
        ccb = twe_ccb_alloc_wait(sc, TWE_CCB_DATA_IN | TWE_CCB_DATA_OUT);
        ccb->ccb_data = tp;
        ccb->ccb_datasize = TWE_SECTOR_SIZE;
        ccb->ccb_tx.tx_handler = 0;
        ccb->ccb_tx.tx_context = tp;
        ccb->ccb_tx.tx_dv = sc->sc_dev;

        tc = ccb->ccb_cmd;
        tc->tc_size = 2;
        tc->tc_opcode = TWE_OP_SET_PARAM | (tc->tc_size << 5);
        tc->tc_unit = 0;
        tc->tc_count = htole16(1);

        /* Fill in the outbound parameter data. */
        tp->tp_table_id = htole16(table_id);
        tp->tp_param_id = param_id;
        tp->tp_param_size = size;
        memcpy(tp->tp_data, sbuf, size);

        /* Map the transfer. */
        if ((rv = twe_ccb_map(sc, ccb)) != 0) {
                twe_ccb_free(sc, ccb);
                goto done;
        }

        /* Submit the command and wait. */
        s = splbio();
        rv = twe_ccb_poll(sc, ccb, 5);
        twe_ccb_unmap(sc, ccb);
        twe_ccb_free(sc, ccb);
        splx(s);
done:
        free(tp, M_DEVBUF);
        return (rv);
}

/*
 * Execute a TWE_OP_INIT_CONNECTION command.  Return non-zero on error.
 * Must be called with interrupts blocked.
 */
static int
twe_init_connection(struct twe_softc *sc)
{
        struct twe_ccb *ccb;
        struct twe_cmd *tc;
        int rv;

        if ((ccb = twe_ccb_alloc(sc, 0)) == NULL)
                return (EAGAIN);

        /* Build the command. */
        tc = ccb->ccb_cmd;
        tc->tc_size = 3;
        tc->tc_opcode = TWE_OP_INIT_CONNECTION;
        tc->tc_unit = 0;
        tc->tc_count = htole16(TWE_MAX_CMDS);
        tc->tc_args.init_connection.response_queue_pointer = 0;

        /* Submit the command for immediate execution. */
        rv = twe_ccb_poll(sc, ccb, 5);
        twe_ccb_free(sc, ccb);
        return (rv);
}

/*
 * Poll the controller for completed commands.  Must be called with
 * interrupts blocked.
 */
static void
twe_poll(struct twe_softc *sc)
{
        struct twe_ccb *ccb;
        int found;
        u_int status, cmdid;

        found = 0;

        for (;;) {
                status = twe_inl(sc, TWE_REG_STS);
                twe_status_check(sc, status);

                if ((status & TWE_STS_RESP_QUEUE_EMPTY))
                        break;

                found = 1;
                cmdid = twe_inl(sc, TWE_REG_RESP_QUEUE);
                cmdid = (cmdid & TWE_RESP_MASK) >> TWE_RESP_SHIFT;
                if (cmdid >= TWE_MAX_QUEUECNT) {
                        aprint_error_dev(sc->sc_dev, "bad cmdid %d\n", cmdid);
                        continue;
                }

                ccb = sc->sc_ccbs + cmdid;
                if ((ccb->ccb_flags & TWE_CCB_ACTIVE) == 0) {
                        printf("%s: CCB for cmdid %d not active\n",
                            device_xname(sc->sc_dev), cmdid);
                        continue;
                }
                ccb->ccb_flags ^= TWE_CCB_COMPLETE | TWE_CCB_ACTIVE;

                bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap,
                    (char *)ccb->ccb_cmd - (char *)sc->sc_cmds,
                    sizeof(struct twe_cmd),
                    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);

                /* Pass notification to upper layers. */
                if (ccb->ccb_tx.tx_handler != NULL)
                        (*ccb->ccb_tx.tx_handler)(ccb,
                            ccb->ccb_cmd->tc_status != 0 ? EIO : 0);
        }

        /* If any commands have completed, run the software queue. */
        if (found)
                twe_ccb_enqueue(sc, NULL);
}

/*
 * Wait for `status' to be set in the controller status register.  Return
 * zero if found, non-zero if the operation timed out.
 */
static int
twe_status_wait(struct twe_softc *sc, u_int32_t status, int timo)
{

        for (timo *= 10; timo != 0; timo--) {
                if ((twe_inl(sc, TWE_REG_STS) & status) == status)
                        break;
                delay(100000);
        }

        return (timo == 0);
}

/*
 * Clear a PCI parity error.
 */
static void
twe_clear_pci_parity_error(struct twe_softc *sc)
{
        bus_space_write_4(sc->sc_iot, sc->sc_ioh, 0x0,
            TWE_CTL_CLEAR_PARITY_ERROR);

        //FreeBSD: pci_write_config(sc->twe_dev, PCIR_STATUS, TWE_PCI_CLEAR_PARITY_ERROR, 2);
}


/*
 * Clear a PCI abort.
 */
static void
twe_clear_pci_abort(struct twe_softc *sc)
{
        bus_space_write_4(sc->sc_iot, sc->sc_ioh, 0x0, TWE_CTL_CLEAR_PCI_ABORT);

        //FreeBSD: pci_write_config(sc->twe_dev, PCIR_STATUS, TWE_PCI_CLEAR_PCI_ABORT, 2);
}

/*
 * Complain if the status bits aren't what we expect.
 */
static int
twe_status_check(struct twe_softc *sc, u_int status)
{
        int rv;

        rv = 0;

        if ((status & TWE_STS_EXPECTED_BITS) != TWE_STS_EXPECTED_BITS) {
                aprint_error_dev(sc->sc_dev, "missing status bits: 0x%08x\n",
                    status & ~TWE_STS_EXPECTED_BITS);
                rv = -1;
        }

        if ((status & TWE_STS_UNEXPECTED_BITS) != 0) {
                aprint_error_dev(sc->sc_dev, "unexpected status bits: 0x%08x\n",
                    status & TWE_STS_UNEXPECTED_BITS);
                rv = -1;
                if (status & TWE_STS_PCI_PARITY_ERROR) {
                        aprint_error_dev(sc->sc_dev, "PCI parity error: Reseat"
                            " card, move card or buggy device present.\n");
                        twe_clear_pci_parity_error(sc);
                }
                if (status & TWE_STS_PCI_ABORT) {
                        aprint_error_dev(sc->sc_dev, "PCI abort, clearing.\n");
                        twe_clear_pci_abort(sc);
                }
        }

        return (rv);
}

/*
 * Allocate and initialise a CCB.
 */
static inline void
twe_ccb_init(struct twe_softc *sc, struct twe_ccb *ccb, int flags)
{
        struct twe_cmd *tc;

        ccb->ccb_tx.tx_handler = NULL;
        ccb->ccb_flags = flags;
        tc = ccb->ccb_cmd;
        tc->tc_status = 0;
        tc->tc_flags = 0;
        tc->tc_cmdid = ccb->ccb_cmdid;
}

struct twe_ccb *
twe_ccb_alloc(struct twe_softc *sc, int flags)
{
        struct twe_ccb *ccb;
        int s;

        s = splbio();
        if (__predict_false((flags & TWE_CCB_AEN) != 0)) {
                /* Use the reserved CCB. */
                ccb = sc->sc_ccbs;
        } else {
                /* Allocate a CCB and command block. */
                if (__predict_false((ccb =
                                SLIST_FIRST(&sc->sc_ccb_freelist)) == NULL)) {
                        splx(s);
                        return (NULL);
                }
                SLIST_REMOVE_HEAD(&sc->sc_ccb_freelist, ccb_chain.slist);
        }
#ifdef DIAGNOSTIC
        if ((long)(ccb - sc->sc_ccbs) == 0 && (flags & TWE_CCB_AEN) == 0)
                panic("twe_ccb_alloc: got reserved CCB for non-AEN");
        if ((ccb->ccb_flags & TWE_CCB_ALLOCED) != 0)
                panic("twe_ccb_alloc: CCB %ld already allocated",
                    (long)(ccb - sc->sc_ccbs));
        flags |= TWE_CCB_ALLOCED;
#endif
        splx(s);

        twe_ccb_init(sc, ccb, flags);
        return (ccb);
}

struct twe_ccb *
twe_ccb_alloc_wait(struct twe_softc *sc, int flags)
{
        struct twe_ccb *ccb;
        int s;

        KASSERT((flags & TWE_CCB_AEN) == 0);

        s = splbio();
        while (__predict_false((ccb =
                                SLIST_FIRST(&sc->sc_ccb_freelist)) == NULL)) {
                sc->sc_flags |= TWEF_WAIT_CCB;
                (void) tsleep(&sc->sc_ccb_freelist, PRIBIO, "tweccb", 0);
        }
        SLIST_REMOVE_HEAD(&sc->sc_ccb_freelist, ccb_chain.slist);
#ifdef DIAGNOSTIC
        if ((ccb->ccb_flags & TWE_CCB_ALLOCED) != 0)
                panic("twe_ccb_alloc_wait: CCB %ld already allocated",
                    (long)(ccb - sc->sc_ccbs));
        flags |= TWE_CCB_ALLOCED;
#endif
        splx(s);

        twe_ccb_init(sc, ccb, flags);
        return (ccb);
}

/*
 * Free a CCB.
 */
void
twe_ccb_free(struct twe_softc *sc, struct twe_ccb *ccb)
{
        int s;

        s = splbio();
        if ((ccb->ccb_flags & TWE_CCB_AEN) == 0) {
                SLIST_INSERT_HEAD(&sc->sc_ccb_freelist, ccb, ccb_chain.slist);
                if (__predict_false((sc->sc_flags & TWEF_WAIT_CCB) != 0)) {
                        sc->sc_flags &= ~TWEF_WAIT_CCB;
                        wakeup(&sc->sc_ccb_freelist);
                }
        }
        ccb->ccb_flags = 0;
        splx(s);
}

/*
 * Map the specified CCB's command block and data buffer (if any) into
 * controller visible space.  Perform DMA synchronisation.
 */
int
twe_ccb_map(struct twe_softc *sc, struct twe_ccb *ccb)
{
        struct twe_cmd *tc;
        int flags, nsegs, i, s, rv;
        void *data;

        /*
         * The data as a whole must be 512-byte aligned.
         */
        if (((u_long)ccb->ccb_data & (TWE_ALIGNMENT - 1)) != 0) {
                s = splvm();
                /* XXX */
                rv = uvm_km_kmem_alloc(kmem_va_arena,
                    ccb->ccb_datasize, (VM_NOSLEEP | VM_INSTANTFIT),
                    (vmem_addr_t *)&ccb->ccb_abuf);
                splx(s);
                data = (void *)ccb->ccb_abuf;
                if ((ccb->ccb_flags & TWE_CCB_DATA_OUT) != 0)
                        memcpy(data, ccb->ccb_data, ccb->ccb_datasize);
        } else {
                ccb->ccb_abuf = (vaddr_t)0;
                data = ccb->ccb_data;
        }

        /*
         * Map the data buffer into bus space and build the S/G list.
         */
        rv = bus_dmamap_load(sc->sc_dmat, ccb->ccb_dmamap_xfer, data,
            ccb->ccb_datasize, NULL, BUS_DMA_NOWAIT | BUS_DMA_STREAMING |
            ((ccb->ccb_flags & TWE_CCB_DATA_IN) ?
            BUS_DMA_READ : BUS_DMA_WRITE));
        if (rv != 0) {
                if (ccb->ccb_abuf != (vaddr_t)0) {
                        s = splvm();
                        /* XXX */
                        uvm_km_kmem_free(kmem_va_arena, ccb->ccb_abuf,
                            ccb->ccb_datasize);
                        splx(s);
                }
                return (rv);
        }

        nsegs = ccb->ccb_dmamap_xfer->dm_nsegs;
        tc = ccb->ccb_cmd;
        tc->tc_size += 2 * nsegs;

        /* The location of the S/G list is dependent upon command type. */
        switch (tc->tc_opcode >> 5) {
        case 2:
                for (i = 0; i < nsegs; i++) {
                        tc->tc_args.param.sgl[i].tsg_address =
                            htole32(ccb->ccb_dmamap_xfer->dm_segs[i].ds_addr);
                        tc->tc_args.param.sgl[i].tsg_length =
                            htole32(ccb->ccb_dmamap_xfer->dm_segs[i].ds_len);
                }
                /* XXX Needed? */
                for (; i < TWE_SG_SIZE; i++) {
                        tc->tc_args.param.sgl[i].tsg_address = 0;
                        tc->tc_args.param.sgl[i].tsg_length = 0;
                }
                break;
        case 3:
                for (i = 0; i < nsegs; i++) {
                        tc->tc_args.io.sgl[i].tsg_address =
                            htole32(ccb->ccb_dmamap_xfer->dm_segs[i].ds_addr);
                        tc->tc_args.io.sgl[i].tsg_length =
                            htole32(ccb->ccb_dmamap_xfer->dm_segs[i].ds_len);
                }
                /* XXX Needed? */
                for (; i < TWE_SG_SIZE; i++) {
                        tc->tc_args.io.sgl[i].tsg_address = 0;
                        tc->tc_args.io.sgl[i].tsg_length = 0;
                }
                break;
        default:
                /*
                 * In all likelihood, this is a command passed from
                 * management tools in userspace where no S/G list is
                 * necessary because no data is being passed.
                 */
                break;
        }

        if ((ccb->ccb_flags & TWE_CCB_DATA_IN) != 0)
                flags = BUS_DMASYNC_PREREAD;
        else
                flags = 0;
        if ((ccb->ccb_flags & TWE_CCB_DATA_OUT) != 0)
                flags |= BUS_DMASYNC_PREWRITE;

        bus_dmamap_sync(sc->sc_dmat, ccb->ccb_dmamap_xfer, 0,
            ccb->ccb_datasize, flags);
        return (0);
}

/*
 * Unmap the specified CCB's command block and data buffer (if any) and
 * perform DMA synchronisation.
 */
void
twe_ccb_unmap(struct twe_softc *sc, struct twe_ccb *ccb)
{
        int flags, s;

        if ((ccb->ccb_flags & TWE_CCB_DATA_IN) != 0)
                flags = BUS_DMASYNC_POSTREAD;
        else
                flags = 0;
        if ((ccb->ccb_flags & TWE_CCB_DATA_OUT) != 0)
                flags |= BUS_DMASYNC_POSTWRITE;

        bus_dmamap_sync(sc->sc_dmat, ccb->ccb_dmamap_xfer, 0,
            ccb->ccb_datasize, flags);
        bus_dmamap_unload(sc->sc_dmat, ccb->ccb_dmamap_xfer);

        if (ccb->ccb_abuf != (vaddr_t)0) {
                if ((ccb->ccb_flags & TWE_CCB_DATA_IN) != 0)
                        memcpy(ccb->ccb_data, (void *)ccb->ccb_abuf,
                            ccb->ccb_datasize);
                s = splvm();
                /* XXX */
                uvm_km_kmem_free(kmem_va_arena, ccb->ccb_abuf,
                    ccb->ccb_datasize);
                splx(s);
        }
}

/*
 * Submit a command to the controller and poll on completion.  Return
 * non-zero on timeout (but don't check status, as some command types don't
 * return status).  Must be called with interrupts blocked.
 */
int
twe_ccb_poll(struct twe_softc *sc, struct twe_ccb *ccb, int timo)
{
        int rv;

        if ((rv = twe_ccb_submit(sc, ccb)) != 0)
                return (rv);

        for (timo *= 1000; timo != 0; timo--) {
                twe_poll(sc);
                if ((ccb->ccb_flags & TWE_CCB_COMPLETE) != 0)
                        break;
                DELAY(100);
        }

        return (timo == 0);
}

/*
 * If a CCB is specified, enqueue it.  Pull CCBs off the software queue in
 * the order that they were enqueued and try to submit their command blocks
 * to the controller for execution.
 */
void
twe_ccb_enqueue(struct twe_softc *sc, struct twe_ccb *ccb)
{
        int s;

        s = splbio();

        if (ccb != NULL)
                SIMPLEQ_INSERT_TAIL(&sc->sc_ccb_queue, ccb, ccb_chain.simpleq);

        while ((ccb = SIMPLEQ_FIRST(&sc->sc_ccb_queue)) != NULL) {
                if (twe_ccb_submit(sc, ccb))
                        break;
                SIMPLEQ_REMOVE_HEAD(&sc->sc_ccb_queue, ccb_chain.simpleq);
        }

        splx(s);
}

/*
 * Submit the command block associated with the specified CCB to the
 * controller for execution.  Must be called with interrupts blocked.
 */
int
twe_ccb_submit(struct twe_softc *sc, struct twe_ccb *ccb)
{
        bus_addr_t pa;
        int rv;
        u_int status;

        /* Check to see if we can post a command. */
        status = twe_inl(sc, TWE_REG_STS);
        twe_status_check(sc, status);

        if ((status & TWE_STS_CMD_QUEUE_FULL) == 0) {
                bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap,
                    (char *)ccb->ccb_cmd - (char *)sc->sc_cmds,
                    sizeof(struct twe_cmd),
                    BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD);
#ifdef DIAGNOSTIC
                if ((ccb->ccb_flags & TWE_CCB_ALLOCED) == 0)
                        panic("%s: CCB %ld not ALLOCED\n",
                            device_xname(sc->sc_dev), (long)(ccb - sc->sc_ccbs));
#endif
                ccb->ccb_flags |= TWE_CCB_ACTIVE;
                pa = sc->sc_cmds_paddr +
                    ccb->ccb_cmdid * sizeof(struct twe_cmd);
                twe_outl(sc, TWE_REG_CMD_QUEUE, (u_int32_t)pa);
                rv = 0;
        } else
                rv = EBUSY;

        return (rv);
}


/*
 * Accept an open operation on the control device.
 */
static int
tweopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct twe_softc *twe;

        if ((twe = device_lookup_private(&twe_cd, minor(dev))) == NULL)
                return (ENXIO);
        if ((twe->sc_flags & TWEF_OPEN) != 0)
                return (EBUSY);

        twe->sc_flags |= TWEF_OPEN;
        return (0);
}

/*
 * Accept the last close on the control device.
 */
static int
tweclose(dev_t dev, int flag, int mode,
    struct lwp *l)
{
        struct twe_softc *twe;

        twe = device_lookup_private(&twe_cd, minor(dev));
        twe->sc_flags &= ~TWEF_OPEN;
        return (0);
}

void
twe_ccb_wait_handler(struct twe_ccb *ccb, int error)
{

        /* Just wake up the sleeper. */
        wakeup(ccb);
}

/*
 * Handle control operations.
 */
static int
tweioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        struct twe_softc *twe;
        struct twe_ccb *ccb;
        struct twe_param *param;
        struct twe_usercommand *tu;
        struct twe_paramcommand *tp;
        struct twe_drivecommand *td;
        void *pdata = NULL;
        int s, error = 0;
        u_int8_t cmdid;

        twe = device_lookup_private(&twe_cd, minor(dev));
        tu = (struct twe_usercommand *)data;
        tp = (struct twe_paramcommand *)data;
        td = (struct twe_drivecommand *)data;

        /* This is intended to be compatible with the FreeBSD interface. */
        switch (cmd) {
        case TWEIO_COMMAND:
                error = kauth_authorize_device_passthru(l->l_cred, dev,
                    KAUTH_REQ_DEVICE_RAWIO_PASSTHRU_ALL, data);
                if (error)
                        return (error);

                /* XXX mutex */
                if (tu->tu_size > 0) {
                        /*
                         * XXX Handle > TWE_SECTOR_SIZE?  Let's see if
                         * it's really necessary, first.
                         */
                        if (tu->tu_size > TWE_SECTOR_SIZE) {
#ifdef TWE_DEBUG
                                printf("%s: TWEIO_COMMAND: tu_size = %zu\n",
                                    device_xname(twe->sc_dev), tu->tu_size);
#endif
                                return EINVAL;
                        }
                        pdata = malloc(TWE_SECTOR_SIZE, M_DEVBUF, M_WAITOK);
                        error = copyin(tu->tu_data, pdata, tu->tu_size);
                        if (error != 0)
                                goto done;
                        ccb = twe_ccb_alloc_wait(twe,
                            TWE_CCB_DATA_IN | TWE_CCB_DATA_OUT);
                        KASSERT(ccb != NULL);
                        ccb->ccb_data = pdata;
                        ccb->ccb_datasize = TWE_SECTOR_SIZE;
                } else {
                        ccb = twe_ccb_alloc_wait(twe, 0);
                        KASSERT(ccb != NULL);
                }

                ccb->ccb_tx.tx_handler = twe_ccb_wait_handler;
                ccb->ccb_tx.tx_context = NULL;
                ccb->ccb_tx.tx_dv = twe->sc_dev;

                cmdid = ccb->ccb_cmdid;
                memcpy(ccb->ccb_cmd, &tu->tu_cmd, sizeof(struct twe_cmd));
                ccb->ccb_cmd->tc_cmdid = cmdid;

                /* Map the transfer. */
                if ((error = twe_ccb_map(twe, ccb)) != 0) {
                        twe_ccb_free(twe, ccb);
                        goto done;
                }

                /* Submit the command and wait up to 1 minute. */
                error = 0;
                twe_ccb_enqueue(twe, ccb);
                s = splbio();
                while ((ccb->ccb_flags & TWE_CCB_COMPLETE) == 0)
                        if ((error = tsleep(ccb, PRIBIO, "tweioctl",
                                            60 * hz)) != 0)
                                break;
                splx(s);

                /* Copy the command back to the ioctl argument. */
                memcpy(&tu->tu_cmd, ccb->ccb_cmd, sizeof(struct twe_cmd));
#ifdef TWE_DEBUG
                printf("%s: TWEIO_COMMAND: tc_opcode = 0x%02x, "
                    "tc_status = 0x%02x\n", device_xname(twe->sc_dev),
                    tu->tu_cmd.tc_opcode, tu->tu_cmd.tc_status);
#endif

                s = splbio();
                twe_ccb_free(twe, ccb);
                splx(s);

                if (tu->tu_size > 0)
                        error = copyout(pdata, tu->tu_data, tu->tu_size);
                goto done;

        case TWEIO_STATS:
                return (ENOENT);

        case TWEIO_AEN_POLL:
                s = splbio();
                *(u_int *)data = twe_aen_dequeue(twe);
                splx(s);
                return (0);

        case TWEIO_AEN_WAIT:
                s = splbio();
                while ((*(u_int *)data =
                    twe_aen_dequeue(twe)) == TWE_AEN_QUEUE_EMPTY) {
                        twe->sc_flags |= TWEF_AENQ_WAIT;
                        error = tsleep(&twe->sc_aen_queue, PRIBIO | PCATCH,
                            "tweaen", 0);
                        if (error == EINTR) {
                                splx(s);
                                return (error);
                        }
                }
                splx(s);
                return (0);

        case TWEIO_GET_PARAM:
                error = twe_param_get(twe, tp->tp_table_id, tp->tp_param_id,
                    tp->tp_size, 0, &param);
                if (error != 0)
                        return (error);
                if (param->tp_param_size > tp->tp_size) {
                        error = EFAULT;
                        goto done;
                }
                error = copyout(param->tp_data, tp->tp_data,
                    param->tp_param_size);
                free(param, M_DEVBUF);
                goto done;

        case TWEIO_SET_PARAM:
                pdata = malloc(tp->tp_size, M_DEVBUF, M_WAITOK);
                if ((error = copyin(tp->tp_data, pdata, tp->tp_size)) != 0)
                        goto done;
                error = twe_param_set(twe, tp->tp_table_id, tp->tp_param_id,
                    tp->tp_size, pdata);
                goto done;

        case TWEIO_RESET:
                s = splbio();
                twe_reset(twe);
                splx(s);
                return (0);

        case TWEIO_ADD_UNIT:
                /* XXX mutex */
                return (twe_add_unit(twe, td->td_unit));

        case TWEIO_DEL_UNIT:
                /* XXX mutex */
                return (twe_del_unit(twe, td->td_unit));

        default:
                return EINVAL;
        }
done:
        if (pdata)
                free(pdata, M_DEVBUF);
        return error;
}

const struct cdevsw twe_cdevsw = {
        .d_open = tweopen,
        .d_close = tweclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = tweioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

/*
 * Print some information about the controller
 */
static void
twe_describe_controller(struct twe_softc *sc)
{
        struct twe_param *p[6];
        int i, rv = 0;
        uint32_t dsize;
        uint8_t ports;

        ports = 0;

        /* get the port count */
        rv |= twe_param_get_1(sc, TWE_PARAM_CONTROLLER,
                TWE_PARAM_CONTROLLER_PortCount, &ports);

        /* get version strings */
        rv |= twe_param_get(sc, TWE_PARAM_VERSION, TWE_PARAM_VERSION_Mon,
                16, NULL, &p[0]);
        rv |= twe_param_get(sc, TWE_PARAM_VERSION, TWE_PARAM_VERSION_FW,
                16, NULL, &p[1]);
        rv |= twe_param_get(sc, TWE_PARAM_VERSION, TWE_PARAM_VERSION_BIOS,
                16, NULL, &p[2]);
        rv |= twe_param_get(sc, TWE_PARAM_VERSION, TWE_PARAM_VERSION_PCB,
                8, NULL, &p[3]);
        rv |= twe_param_get(sc, TWE_PARAM_VERSION, TWE_PARAM_VERSION_ATA,
                8, NULL, &p[4]);
        rv |= twe_param_get(sc, TWE_PARAM_VERSION, TWE_PARAM_VERSION_PCI,
                8, NULL, &p[5]);

        if (rv) {
                /* some error occurred */
                aprint_error_dev(sc->sc_dev,
                    "failed to fetch version information\n");
                return;
        }

        aprint_normal_dev(sc->sc_dev, "%d ports, Firmware %.16s, BIOS %.16s\n",
            ports, p[1]->tp_data, p[2]->tp_data);

        aprint_verbose_dev(sc->sc_dev,
            "Monitor %.16s, PCB %.8s, Achip %.8s, Pchip %.8s\n",
            p[0]->tp_data, p[3]->tp_data,
            p[4]->tp_data, p[5]->tp_data);

        free(p[0], M_DEVBUF);
        free(p[1], M_DEVBUF);
        free(p[2], M_DEVBUF);
        free(p[3], M_DEVBUF);
        free(p[4], M_DEVBUF);
        free(p[5], M_DEVBUF);

        rv = twe_param_get(sc, TWE_PARAM_DRIVESUMMARY,
            TWE_PARAM_DRIVESUMMARY_Status, 16, NULL, &p[0]);
        if (rv) {
                aprint_error_dev(sc->sc_dev,
                    "failed to get drive status summary\n");
                return;
        }
        for (i = 0; i < ports; i++) {
                if (p[0]->tp_data[i] != TWE_PARAM_DRIVESTATUS_Present)
                        continue;
                rv = twe_param_get_4(sc, TWE_PARAM_DRIVEINFO + i,
                    TWE_PARAM_DRIVEINFO_Size, &dsize);
                if (rv) {
                        aprint_error_dev(sc->sc_dev,
                            "unable to get drive size for port %d\n", i);
                        continue;
                }
                rv = twe_param_get(sc, TWE_PARAM_DRIVEINFO + i,
                    TWE_PARAM_DRIVEINFO_Model, 40, NULL, &p[1]);
                if (rv) {
                        aprint_error_dev(sc->sc_dev,
                            "unable to get drive model for port %d\n", i);
                        continue;
                }
                aprint_verbose_dev(sc->sc_dev, "port %d: %.40s %d MB\n",
                    i, p[1]->tp_data, dsize / 2048);
                free(p[1], M_DEVBUF);
        }
        free(p[0], M_DEVBUF);
}

MODULE(MODULE_CLASS_DRIVER, twe, "pci");
 
#ifdef _MODULE  
#include "ioconf.c"
#endif 

static int      
twe_modcmd(modcmd_t cmd, void *opaque)
{
        int error = 0;

#ifdef _MODULE
        switch (cmd) {
        case MODULE_CMD_INIT:
                error = config_init_component(cfdriver_ioconf_twe,
                    cfattach_ioconf_twe, cfdata_ioconf_twe);
                break;
        case MODULE_CMD_FINI:
                error = config_fini_component(cfdriver_ioconf_twe,
                    cfattach_ioconf_twe, cfdata_ioconf_twe);
                break;
        default:
                error = ENOTTY;
                break;
        }
#endif  
        
        return error;
}

























































































































   89 














  658 





  659 
   25 
   25 







  660 
  660 


  660 
















  914 



    5 
  681 






  456 











   60 
























  442 







  354 





  441 






  372 











  373 
  373 




























  355 
  354 
    7 
  354 



    7 
    7 


  352 

  354 


  440 
  440 



  440 


















  644 














  644 
  641 


  643 

  642 
  643 




    2 







  641 


























  441 
  442 

  442 





  427 
  428 



  428 







   35 
   35 




   35 
   35 



  441 






  441 
  441 












  437 















  662 




  662 
  661 
  661 





  643 



  660 

    1 
    1 







  660 
  660 




  658 




























    1 















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
/*        $NetBSD: kern_turnstile.c,v 1.44 2022/06/29 22:27:01 riastradh Exp $        */

/*-
 * Copyright (c) 2002, 2006, 2007, 2009, 2019, 2020
 *     The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe and Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Turnstiles are described in detail in:
 *
 *        Solaris Internals: Core Kernel Architecture, Jim Mauro and
 *            Richard McDougall.
 *
 * Turnstiles are kept in a hash table.  There are likely to be many more
 * synchronisation objects than there are threads.  Since a thread can block
 * on only one lock at a time, we only need one turnstile per thread, and
 * so they are allocated at thread creation time.
 *
 * When a thread decides it needs to block on a lock, it looks up the
 * active turnstile for that lock.  If no active turnstile exists, then
 * the process lends its turnstile to the lock.  If there is already an
 * active turnstile for the lock, the thread places its turnstile on a
 * list of free turnstiles, and references the active one instead.
 *
 * The act of looking up the turnstile acquires an interlock on the sleep
 * queue.  If a thread decides it doesn't need to block after all, then this
 * interlock must be released by explicitly aborting the turnstile
 * operation.
 *
 * When a thread is awakened, it needs to get its turnstile back.  If there
 * are still other threads waiting in the active turnstile, the thread
 * grabs a free turnstile off the free list.  Otherwise, it can take back
 * the active turnstile from the lock (thus deactivating the turnstile).
 *
 * Turnstiles are where we do priority inheritence.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.44 2022/06/29 22:27:01 riastradh Exp $");

#include <sys/param.h>
#include <sys/lockdebug.h>
#include <sys/pool.h>
#include <sys/proc.h> 
#include <sys/sleepq.h>
#include <sys/systm.h>

/*
 * Shift of 6 aligns to typical cache line size of 64 bytes;  there's no
 * point having two turnstile locks to back two lock objects that share one
 * cache line.
 */
#define        TS_HASH_SIZE        128
#define        TS_HASH_MASK        (TS_HASH_SIZE - 1)
#define        TS_HASH(obj)        (((uintptr_t)(obj) >> 6) & TS_HASH_MASK)

static tschain_t        turnstile_chains[TS_HASH_SIZE] __cacheline_aligned;
struct pool                turnstile_pool;
extern turnstile_t        turnstile0;

static union {
        kmutex_t        lock;
        uint8_t                pad[COHERENCY_UNIT];
} turnstile_locks[TS_HASH_SIZE] __cacheline_aligned;

/*
 * turnstile_init:
 *
 *        Initialize the turnstile mechanism.
 */
void
turnstile_init(void)
{
        int i;

        for (i = 0; i < TS_HASH_SIZE; i++) {
                LIST_INIT(&turnstile_chains[i]);
                mutex_init(&turnstile_locks[i].lock, MUTEX_DEFAULT, IPL_SCHED);
        }

        pool_init(&turnstile_pool, sizeof(turnstile_t), coherency_unit,
            0, 0, "tstile", NULL, IPL_NONE);

        turnstile_ctor(&turnstile0);
}

/*
 * turnstile_ctor:
 *
 *        Constructor for turnstiles.
 */
void
turnstile_ctor(turnstile_t *ts)
{

        memset(ts, 0, sizeof(*ts));
        sleepq_init(&ts->ts_sleepq[TS_READER_Q]);
        sleepq_init(&ts->ts_sleepq[TS_WRITER_Q]);
}

/*
 * turnstile_remove:
 *
 *        Remove an LWP from a turnstile sleep queue and wake it.
 */
static inline void
turnstile_remove(turnstile_t *ts, lwp_t *l, int q)
{
        turnstile_t *nts;

        KASSERT(l->l_ts == ts);

        /*
         * This process is no longer using the active turnstile.
         * Find an inactive one on the free list to give to it.
         */
        if ((nts = ts->ts_free) != NULL) {
                KASSERT(TS_ALL_WAITERS(ts) > 1);
                l->l_ts = nts;
                ts->ts_free = nts->ts_free;
                nts->ts_free = NULL;
        } else {
                /*
                 * If the free list is empty, this is the last
                 * waiter.
                 */
                KASSERT(TS_ALL_WAITERS(ts) == 1);
                LIST_REMOVE(ts, ts_chain);
        }

        ts->ts_waiters[q]--;
        sleepq_remove(&ts->ts_sleepq[q], l);
}

/*
 * turnstile_lookup:
 *
 *        Look up the turnstile for the specified lock.  This acquires and
 *        holds the turnstile chain lock (sleep queue interlock).
 */
turnstile_t *
turnstile_lookup(wchan_t obj)
{
        turnstile_t *ts;
        tschain_t *tc;
        u_int hash;

        hash = TS_HASH(obj);
        tc = &turnstile_chains[hash];
        mutex_spin_enter(&turnstile_locks[hash].lock);

        LIST_FOREACH(ts, tc, ts_chain)
                if (ts->ts_obj == obj)
                        return (ts);

        /*
         * No turnstile yet for this lock.  No problem, turnstile_block()
         * handles this by fetching the turnstile from the blocking thread.
         */
        return (NULL);
}

/*
 * turnstile_exit:
 *
 *        Abort a turnstile operation.
 */
void
turnstile_exit(wchan_t obj)
{

        mutex_spin_exit(&turnstile_locks[TS_HASH(obj)].lock);
}

/*
 * turnstile_lendpri:
 *
 *        Lend our priority to lwps on the blocking chain.
 *
 *        If the current owner of the lock (l->l_wchan, set by sleepq_enqueue)
 *        has a priority lower than ours (lwp_eprio(l)), lend our priority to
 *        him to avoid priority inversions.
 */

static void
turnstile_lendpri(lwp_t *cur)
{
        lwp_t * l = cur;
        pri_t prio;

        /*
         * NOTE: if you get a panic in this code block, it is likely that
         * a lock has been destroyed or corrupted while still in use.  Try
         * compiling a kernel with LOCKDEBUG to pinpoint the problem.
         */

        LOCKDEBUG_BARRIER(l->l_mutex, 1);
        KASSERT(l == curlwp);
        prio = lwp_eprio(l);
        for (;;) {
                lwp_t *owner;
                turnstile_t *ts;
                bool dolock;

                if (l->l_wchan == NULL)
                        break;

                /*
                 * Ask syncobj the owner of the lock.
                 */
                owner = (*l->l_syncobj->sobj_owner)(l->l_wchan);
                if (owner == NULL)
                        break;

                /*
                 * The owner may have changed as we have dropped the tc lock.
                 */
                if (cur == owner) {
                        /*
                         * We own the lock: stop here, sleepq_block()
                         * should wake up immediately.
                         */
                        break;
                }
                /*
                 * Acquire owner->l_mutex if we don't have it yet.
                 * Because we already have another LWP lock (l->l_mutex) held,
                 * we need to play a try lock dance to avoid deadlock.
                 */
                dolock = l->l_mutex != atomic_load_relaxed(&owner->l_mutex);
                if (l == owner || (dolock && !lwp_trylock(owner))) {
                        /*
                         * The owner was changed behind us or trylock failed.
                         * Restart from curlwp.
                         *
                         * Note that there may be a livelock here:
                         * the owner may try grabbing cur's lock (which is the
                         * tc lock) while we're trying to grab the owner's lock.
                         */
                        lwp_unlock(l);
                        l = cur;
                        lwp_lock(l);
                        prio = lwp_eprio(l);
                        continue;
                }
                /*
                 * If the owner's priority is already higher than ours,
                 * there's nothing to do anymore.
                 */
                if (prio <= lwp_eprio(owner)) {
                        if (dolock)
                                lwp_unlock(owner);
                        break;
                }
                /*
                 * Lend our priority to the 'owner' LWP.
                 *
                 * Update lenders info for turnstile_unlendpri.
                 */
                ts = l->l_ts;
                KASSERT(ts->ts_inheritor == owner || ts->ts_inheritor == NULL);
                if (ts->ts_inheritor == NULL) {
                        ts->ts_inheritor = owner;
                        ts->ts_eprio = prio;
                        SLIST_INSERT_HEAD(&owner->l_pi_lenders, ts, ts_pichain);
                        lwp_lendpri(owner, prio);
                } else if (prio > ts->ts_eprio) {
                        ts->ts_eprio = prio;
                        lwp_lendpri(owner, prio);
                }
                if (dolock)
                        lwp_unlock(l);
                LOCKDEBUG_BARRIER(owner->l_mutex, 1);
                l = owner;
        }
        LOCKDEBUG_BARRIER(l->l_mutex, 1);
        if (cur->l_mutex != atomic_load_relaxed(&l->l_mutex)) {
                lwp_unlock(l);
                lwp_lock(cur);
        }
        LOCKDEBUG_BARRIER(cur->l_mutex, 1);
}

/*
 * turnstile_unlendpri: undo turnstile_lendpri
 */

static void
turnstile_unlendpri(turnstile_t *ts)
{
        lwp_t * const l = curlwp;
        turnstile_t *iter;
        turnstile_t *next;
        turnstile_t *prev = NULL;
        pri_t prio;
        bool dolock;

        KASSERT(ts->ts_inheritor != NULL);
        ts->ts_inheritor = NULL;
        dolock = (atomic_load_relaxed(&l->l_mutex) ==
            l->l_cpu->ci_schedstate.spc_lwplock);
        if (dolock) {
                lwp_lock(l);
        }

        /*
         * the following loop does two things.
         *
         * - remove ts from the list.
         *
         * - from the rest of the list, find the highest priority.
         */

        prio = -1;
        KASSERT(!SLIST_EMPTY(&l->l_pi_lenders));
        for (iter = SLIST_FIRST(&l->l_pi_lenders);
            iter != NULL; iter = next) {
                KASSERT(lwp_eprio(l) >= ts->ts_eprio);
                next = SLIST_NEXT(iter, ts_pichain);
                if (iter == ts) {
                        if (prev == NULL) {
                                SLIST_REMOVE_HEAD(&l->l_pi_lenders,
                                    ts_pichain);
                        } else {
                                SLIST_REMOVE_AFTER(prev, ts_pichain);
                        }
                } else if (prio < iter->ts_eprio) {
                        prio = iter->ts_eprio;
                }
                prev = iter;
        }

        lwp_lendpri(l, prio);

        if (dolock) {
                lwp_unlock(l);
        }
}

/*
 * turnstile_block:
 *
 *         Enter an object into the turnstile chain and prepare the current
 *         LWP for sleep.
 */
void
turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj)
{
        lwp_t * const l = curlwp; /* cached curlwp */
        turnstile_t *ots;
        tschain_t *tc;
        kmutex_t *lock;
        sleepq_t *sq;
        pri_t obase;
        u_int hash;

        hash = TS_HASH(obj);
        tc = &turnstile_chains[hash];
        lock = &turnstile_locks[hash].lock;

        KASSERT(q == TS_READER_Q || q == TS_WRITER_Q);
        KASSERT(mutex_owned(lock));
        KASSERT(l != NULL && l->l_ts != NULL);

        if (ts == NULL) {
                /*
                 * We are the first thread to wait for this object;
                 * lend our turnstile to it.
                 */
                ts = l->l_ts;
                KASSERT(TS_ALL_WAITERS(ts) == 0);
                KASSERT(LIST_EMPTY(&ts->ts_sleepq[TS_READER_Q]) &&
                        LIST_EMPTY(&ts->ts_sleepq[TS_WRITER_Q]));
                ts->ts_obj = obj;
                ts->ts_inheritor = NULL;
                LIST_INSERT_HEAD(tc, ts, ts_chain);
        } else {
                /*
                 * Object already has a turnstile.  Put our turnstile
                 * onto the free list, and reference the existing
                 * turnstile instead.
                 */
                ots = l->l_ts;
                KASSERT(ots->ts_free == NULL);
                ots->ts_free = ts->ts_free;
                ts->ts_free = ots;
                l->l_ts = ts;

                KASSERT(ts->ts_obj == obj);
                KASSERT(TS_ALL_WAITERS(ts) != 0);
                KASSERT(!LIST_EMPTY(&ts->ts_sleepq[TS_READER_Q]) ||
                        !LIST_EMPTY(&ts->ts_sleepq[TS_WRITER_Q]));
        }

        sq = &ts->ts_sleepq[q];
        ts->ts_waiters[q]++;
        sleepq_enter(sq, l, lock);
        LOCKDEBUG_BARRIER(lock, 1);
        l->l_kpriority = true;
        obase = l->l_kpribase;
        if (obase < PRI_KTHREAD)
                l->l_kpribase = PRI_KTHREAD;
        sleepq_enqueue(sq, obj, "tstile", sobj, false);

        /*
         * Disable preemption across this entire block, as we may drop
         * scheduler locks (allowing preemption), and would prefer not
         * to be interrupted while in a state of flux.
         */
        KPREEMPT_DISABLE(l);
        KASSERT(lock == l->l_mutex);
        turnstile_lendpri(l);
        sleepq_block(0, false, sobj);
        l->l_kpribase = obase;
        KPREEMPT_ENABLE(l);
}

/*
 * turnstile_wakeup:
 *
 *        Wake up the specified number of threads that are blocked
 *        in a turnstile.
 */
void
turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl)
{
        sleepq_t *sq;
        kmutex_t *lock;
        u_int hash;
        lwp_t *l;

        hash = TS_HASH(ts->ts_obj);
        lock = &turnstile_locks[hash].lock;
        sq = &ts->ts_sleepq[q];

        KASSERT(q == TS_READER_Q || q == TS_WRITER_Q);
        KASSERT(count > 0 && count <= TS_WAITERS(ts, q));
        KASSERT(mutex_owned(lock));
        KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL);

        /*
         * restore inherited priority if necessary.
         */

        if (ts->ts_inheritor != NULL) {
                turnstile_unlendpri(ts);
        }

        if (nl != NULL) {
#if defined(DEBUG) || defined(LOCKDEBUG)
                LIST_FOREACH(l, sq, l_sleepchain) {
                        if (l == nl)
                                break;
                }
                if (l == NULL)
                        panic("turnstile_wakeup: nl not on sleepq");
#endif
                turnstile_remove(ts, nl, q);
        } else {
                while (count-- > 0) {
                        l = LIST_FIRST(sq);
                        KASSERT(l != NULL);
                        turnstile_remove(ts, l, q);
                }
        }
        mutex_spin_exit(lock);
}

/*
 * turnstile_unsleep:
 *
 *        Remove an LWP from the turnstile.  This is called when the LWP has
 *        not been awoken normally but instead interrupted: for example, if it
 *        has received a signal.  It's not a valid action for turnstiles,
 *        since LWPs blocking on a turnstile are not interruptable.
 */
void
turnstile_unsleep(lwp_t *l, bool cleanup)
{

        lwp_unlock(l);
        panic("turnstile_unsleep");
}

/*
 * turnstile_changepri:
 *
 *        Adjust the priority of an LWP residing on a turnstile.
 */
void
turnstile_changepri(lwp_t *l, pri_t pri)
{

        /* XXX priority inheritance */
        sleepq_changepri(l, pri);
}

#if defined(LOCKDEBUG)
/*
 * turnstile_print:
 *
 *        Given the address of a lock object, print the contents of a
 *        turnstile.
 */
void
turnstile_print(volatile void *obj, void (*pr)(const char *, ...))
{
        turnstile_t *ts;
        tschain_t *tc;
        sleepq_t *rsq, *wsq;
        u_int hash;
        lwp_t *l;

        hash = TS_HASH(obj);
        tc = &turnstile_chains[hash];

        LIST_FOREACH(ts, tc, ts_chain)
                if (ts->ts_obj == obj)
                        break;

        if (ts == NULL) {
                (*pr)("Turnstile: no active turnstile for this lock.\n");
                return;
        }

        rsq = &ts->ts_sleepq[TS_READER_Q];
        wsq = &ts->ts_sleepq[TS_WRITER_Q];

        (*pr)("Turnstile:\n");
        (*pr)("=> %d waiting readers:", TS_WAITERS(ts, TS_READER_Q));
        LIST_FOREACH(l, rsq, l_sleepchain) {
                (*pr)(" %p", l);
        }
        (*pr)("\n");

        (*pr)("=> %d waiting writers:", TS_WAITERS(ts, TS_WRITER_Q));
        LIST_FOREACH(l, wsq, l_sleepchain) {
                (*pr)(" %p", l);
        }
        (*pr)("\n");
}
#endif        /* LOCKDEBUG */








































































































   16 






















    3 
   16 
    3 

   13 
   16 
    3 









   16 
    2 
   16 
    1 
   16 

















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
/*        $NetBSD: vm_43.c,v 1.21 2019/01/27 02:08:39 pgoyette Exp $        */

/*
 * Copyright (c) 1988 University of Utah.
 * Copyright (c) 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the Systems Programming Group of the University of Utah Computer
 * Science Department.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
 *
 *        @(#)vm_mmap.c        8.5 (Berkeley) 5/19/94
 */

/*
 * Mapped file (mmap) interface to VM
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vm_43.c,v 1.21 2019/01/27 02:08:39 pgoyette Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/filedesc.h>
#include <sys/resourcevar.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/mman.h>

#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>

#include <miscfs/specfs/specdev.h>

#include <compat/common/compat_mod.h>

static struct syscall_package vm_43_syscalls[] = {
        { SYS_compat_43_ogetpagesize, 0,
            (sy_call_t *)compat_43_sys_getpagesize },
        { SYS_compat_43_ommap, 0, (sy_call_t *)compat_43_sys_mmap },
        { 0, 0, NULL }
};

/* ARGSUSED */
int
compat_43_sys_getpagesize(struct lwp *l, const void *v, register_t *retval)
{

        *retval = PAGE_SIZE;
        return (0);
}

int
compat_43_sys_mmap(struct lwp *l, const struct compat_43_sys_mmap_args *uap, register_t *retval)
{
        /* {
                syscallarg(void *) addr;
                syscallarg(size_t) len;
                syscallarg(int) prot;
                syscallarg(int) flags;
                syscallarg(int) fd;
                syscallarg(long) pos;
        } */
        struct sys_mmap_args /* {
                syscallarg(void *) addr;
                syscallarg(size_t) len;
                syscallarg(int) prot;
                syscallarg(int) flags;
                syscallarg(int) fd;
                syscallarg(long) pad;
                syscallarg(off_t) pos;
        } */ nargs;
        static const char cvtbsdprot[8] = {
                0,
                PROT_EXEC,
                PROT_WRITE,
                PROT_EXEC|PROT_WRITE,
                PROT_READ,
                PROT_EXEC|PROT_READ,
                PROT_WRITE|PROT_READ,
                PROT_EXEC|PROT_WRITE|PROT_READ,
        };
#define        OMAP_ANON        0x0002
#define        OMAP_COPY        0x0020
#define        OMAP_SHARED        0x0010
#define        OMAP_FIXED        0x0100
#define        OMAP_INHERIT        0x0800

        SCARG(&nargs, addr) = SCARG(uap, addr);
        SCARG(&nargs, len) = SCARG(uap, len);
        /* Note: index using prot is sign-safe due to mask */
        SCARG(&nargs, prot) = cvtbsdprot[SCARG(uap, prot)&0x7];
        SCARG(&nargs, flags) = 0;
        if (SCARG(uap, flags) & OMAP_ANON)
                SCARG(&nargs, flags) |= MAP_ANON;
        if (SCARG(uap, flags) & OMAP_SHARED)
                SCARG(&nargs, flags) |= MAP_SHARED;
        else
                SCARG(&nargs, flags) |= MAP_PRIVATE;
        if (SCARG(uap, flags) & OMAP_COPY) {
                SCARG(&nargs, flags) |= MAP_PRIVATE;
#if defined(COMPAT_10) && defined(__i386__)
                /*
                 * Ancient kernel on x86 did not obey PROT_EXEC on i386 at least
                 * and ld.so did not turn it on. We take care of this on amd64
                 * in compat32.
                 */
                SCARG(&nargs, prot) |= PROT_EXEC;
#endif
        }
        if (SCARG(uap, flags) & OMAP_FIXED)
                SCARG(&nargs, flags) |= MAP_FIXED;
        if (SCARG(uap, flags) & OMAP_INHERIT)
                SCARG(&nargs, flags) |= MAP_INHERIT;
        SCARG(&nargs, fd) = SCARG(uap, fd);
        SCARG(&nargs, pos) = SCARG(uap, pos);
        return (sys_mmap(l, &nargs, retval));
}

int
vm_43_init(void)
{

        return syscall_establish(NULL, vm_43_syscalls);
}

int
vm_43_fini(void)
{

        return syscall_disestablish(NULL, vm_43_syscalls);
}





























































































































































































































































































































































































    6 




















    2 




    6 


    4 





































































































































































































































































































































    6 



































    6 

    6 












































































































































































































































































































































































































































































































































































































































































































    1 









    2 

    2 
    2 

    2 
    2 



    2 











































































































































































































































































































































































































































































    3 


    3 

    3 





    3 

    3 




































   35 


   38 

   35 




   38 
   36 































   18 













   18 


   18 





   18 





   18 




   18 




   18 
   18 
   17 


   10 


   18 
























































































































    1 




    1 


    1 




























    1 







    1 
































































































































































































































































































































    3 



















    1 

















    6 



























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
/*        $NetBSD: in6.c,v 1.285 2021/12/05 04:42:55 msaitoh Exp $        */
/*        $KAME: in6.c,v 1.198 2001/07/18 09:12:38 itojun Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)in.c        8.2 (Berkeley) 11/15/93
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: in6.c,v 1.285 2021/12/05 04:42:55 msaitoh Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_compat_netbsd.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/ioctl.h>
#include <sys/errno.h>
#include <sys/malloc.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <sys/kauth.h>
#include <sys/cprng.h>
#include <sys/kmem.h>

#include <net/if.h>
#include <net/if_types.h>
#include <net/if_llatbl.h>
#include <net/if_ether.h>
#include <net/if_dl.h>
#include <net/pfil.h>
#include <net/route.h>

#include <netinet/in.h>
#include <netinet/in_var.h>

#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/nd6.h>
#include <netinet6/mld6_var.h>
#include <netinet6/ip6_mroute.h>
#include <netinet6/in6_ifattach.h>
#include <netinet6/scope6_var.h>

#ifdef COMPAT_50
#include <compat/netinet6/in6_var.h>
#endif
#ifdef COMPAT_90
#include <compat/netinet6/in6_var.h>
#include <compat/netinet6/nd6.h>
#endif

MALLOC_DEFINE(M_IP6OPT, "ip6_options", "IPv6 options");

/* enable backward compatibility code for obsoleted ioctls */
#define COMPAT_IN6IFIOCTL

#ifdef        IN6_DEBUG
#define        IN6_DPRINTF(__fmt, ...)        printf(__fmt, __VA_ARGS__)
#else
#define        IN6_DPRINTF(__fmt, ...)        do { } while (/*CONSTCOND*/0) 
#endif /* IN6_DEBUG */

/*
 * Definitions of some constant IP6 addresses.
 */
const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
const struct in6_addr in6addr_nodelocal_allnodes =
        IN6ADDR_NODELOCAL_ALLNODES_INIT;
const struct in6_addr in6addr_linklocal_allnodes =
        IN6ADDR_LINKLOCAL_ALLNODES_INIT;
const struct in6_addr in6addr_linklocal_allrouters =
        IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;

const struct in6_addr in6mask0 = IN6MASK0;
const struct in6_addr in6mask32 = IN6MASK32;
const struct in6_addr in6mask64 = IN6MASK64;
const struct in6_addr in6mask96 = IN6MASK96;
const struct in6_addr in6mask128 = IN6MASK128;

const struct sockaddr_in6 sa6_any = {sizeof(sa6_any), AF_INET6,
                                     0, 0, IN6ADDR_ANY_INIT, 0};

struct pslist_head        in6_ifaddr_list;
kmutex_t                in6_ifaddr_lock;

static int in6_lifaddr_ioctl(struct socket *, u_long, void *,
        struct ifnet *);
static int in6_ifaddprefix(struct in6_ifaddr *);
static int in6_ifremprefix(struct in6_ifaddr *);
static int in6_ifinit(struct ifnet *, struct in6_ifaddr *,
        const struct sockaddr_in6 *, int);
static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *);
static int in6_update_ifa1(struct ifnet *, struct in6_aliasreq *,
    struct in6_ifaddr **, struct psref *, int);

void
in6_init(void)
{

        PSLIST_INIT(&in6_ifaddr_list);
        mutex_init(&in6_ifaddr_lock, MUTEX_DEFAULT, IPL_NONE);

        in6_sysctl_multicast_setup(NULL);
}

/*
 * Add ownaddr as loopback rtentry.  We previously add the route only if
 * necessary (ex. on a p2p link).  However, since we now manage addresses
 * separately from prefixes, we should always add the route.  We can't
 * rely on the cloning mechanism from the corresponding interface route
 * any more.
 */
void
in6_ifaddlocal(struct ifaddr *ifa)
{

        if (IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa), &in6addr_any) ||
            (ifa->ifa_ifp->if_flags & IFF_POINTOPOINT &&
            IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa), IFA_DSTIN6(ifa))))
        {
                rt_addrmsg(RTM_NEWADDR, ifa);
                return;
        }

        rt_ifa_addlocal(ifa);
}

/*
 * Remove loopback rtentry of ownaddr generated by in6_ifaddlocal(),
 * if it exists.
 */
void
in6_ifremlocal(struct ifaddr *ifa)
{
        struct in6_ifaddr *ia;
        struct ifaddr *alt_ifa = NULL;
        int ia_count = 0;
        struct psref psref;
        int s;

        /*
         * Some of BSD variants do not remove cloned routes
         * from an interface direct route, when removing the direct route
         * (see comments in net/net_osdep.h).  Even for variants that do remove
         * cloned routes, they could fail to remove the cloned routes when
         * we handle multiple addresses that share a common prefix.
         * So, we should remove the route corresponding to the deleted address.
         */

        /*
         * Delete the entry only if exactly one ifaddr matches the
         * address, ifa->ifa_addr.
         *
         * If more than one ifaddr matches, replace the ifaddr in
         * the routing table, rt_ifa, with a different ifaddr than
         * the one we are purging, ifa.  It is important to do
         * this, or else the routing table can accumulate dangling
         * pointers rt->rt_ifa->ifa_ifp to destroyed interfaces,
         * which will lead to crashes, later.  (More than one ifaddr
         * can match if we assign the same address to multiple---probably
         * p2p---interfaces.)
         *
         * XXX An old comment at this place said, "we should avoid
         * XXX such a configuration [i.e., interfaces with the same
         * XXX addressed assigned --ed.] in IPv6...".  I do not
         * XXX agree, especially now that I have fixed the dangling
         * XXX ifp-pointers bug.
         */
        s = pserialize_read_enter();
        IN6_ADDRLIST_READER_FOREACH(ia) {
                if (!IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa), &ia->ia_addr.sin6_addr))
                        continue;
                if (ia->ia_ifp != ifa->ifa_ifp)
                        alt_ifa = &ia->ia_ifa;
                if (++ia_count > 1 && alt_ifa != NULL)
                        break;
        }
        if (ia_count > 1 && alt_ifa != NULL)
                ifa_acquire(alt_ifa, &psref);
        pserialize_read_exit(s);

        if (ia_count == 0)
                return;

        rt_ifa_remlocal(ifa, ia_count == 1 ? NULL : alt_ifa);

        if (ia_count > 1 && alt_ifa != NULL)
                ifa_release(alt_ifa, &psref);
}

/* Add prefix route for the network. */
static int
in6_ifaddprefix(struct in6_ifaddr *ia)
{
        int error, flags = 0;

        if (in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) == 128) {
                if (ia->ia_dstaddr.sin6_family != AF_INET6)
                        /* We don't need to install a host route. */
                        return 0;
                flags |= RTF_HOST;
        }

        /* Is this a connected route for neighbour discovery? */
        if (nd6_need_cache(ia->ia_ifp))
                flags |= RTF_CONNECTED;

        if ((error = rtinit(&ia->ia_ifa, RTM_ADD, RTF_UP | flags)) == 0)
                ia->ia_flags |= IFA_ROUTE;
        else if (error == EEXIST)
                /* Existence of the route is not an error. */
                error = 0;

        return error;
}

/* Delete network prefix route if present.
 * Re-add it to another address if the prefix matches. */
static int
in6_ifremprefix(struct in6_ifaddr *target)
{
        int error, s;
        struct in6_ifaddr *ia;

        if ((target->ia_flags & IFA_ROUTE) == 0)
                return 0;

        s = pserialize_read_enter();
        IN6_ADDRLIST_READER_FOREACH(ia) {
                if (target->ia_dstaddr.sin6_len) {
                        if (ia->ia_dstaddr.sin6_len == 0 ||
                            !IN6_ARE_ADDR_EQUAL(&ia->ia_dstaddr.sin6_addr,
                            &target->ia_dstaddr.sin6_addr))
                                continue;
                } else {
                        if (!IN6_ARE_MASKED_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
                            &target->ia_addr.sin6_addr,
                            &target->ia_prefixmask.sin6_addr))
                                continue;
                }

                /*
                 * if we got a matching prefix route, move IFA_ROUTE to him
                 */
                if ((ia->ia_flags & IFA_ROUTE) == 0) {
                        struct psref psref;
                        int bound = curlwp_bind();

                        ia6_acquire(ia, &psref);
                        pserialize_read_exit(s);

                        rtinit(&target->ia_ifa, RTM_DELETE, 0);
                        target->ia_flags &= ~IFA_ROUTE;

                        error = in6_ifaddprefix(ia);

                        ia6_release(ia, &psref);
                        curlwp_bindx(bound);

                        return error;
                }
        }
        pserialize_read_exit(s);

        /*
         * noone seem to have prefix route.  remove it.
         */
        rtinit(&target->ia_ifa, RTM_DELETE, 0);
        target->ia_flags &= ~IFA_ROUTE;
        return 0;
}

int
in6_mask2len(struct in6_addr *mask, u_char *lim0)
{
        int x = 0, y;
        u_char *lim = lim0, *p;

        /* ignore the scope_id part */
        if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask))
                lim = (u_char *)mask + sizeof(*mask);
        for (p = (u_char *)mask; p < lim; x++, p++) {
                if (*p != 0xff)
                        break;
        }
        y = 0;
        if (p < lim) {
                for (y = 0; y < NBBY; y++) {
                        if ((*p & (0x80 >> y)) == 0)
                                break;
                }
        }

        /*
         * when the limit pointer is given, do a stricter check on the
         * remaining bits.
         */
        if (p < lim) {
                if (y != 0 && (*p & (0x00ff >> y)) != 0)
                        return -1;
                for (p = p + 1; p < lim; p++)
                        if (*p != 0)
                                return -1;
        }

        return x * NBBY + y;
}

#define ifa2ia6(ifa)        ((struct in6_ifaddr *)(ifa))
#define ia62ifa(ia6)        (&((ia6)->ia_ifa))

static int
in6_control1(struct socket *so, u_long cmd, void *data, struct ifnet *ifp)
{
        struct        in6_ifreq *ifr = (struct in6_ifreq *)data;
        struct        in6_ifaddr *ia = NULL;
        struct        in6_aliasreq *ifra = (struct in6_aliasreq *)data;
        struct sockaddr_in6 *sa6;
        int error, bound;
        struct psref psref;

        switch (cmd) {
        case SIOCAADDRCTL_POLICY:
        case SIOCDADDRCTL_POLICY:
                /* Privileged. */
                return in6_src_ioctl(cmd, data);
        /*
         * XXX: Fix me, once we fix SIOCSIFADDR, SIOCIFDSTADDR, etc.
         */
        case SIOCSIFADDR:
        case SIOCSIFDSTADDR:
        case SIOCSIFBRDADDR:
        case SIOCSIFNETMASK:
                return EOPNOTSUPP;
        case SIOCGETSGCNT_IN6:
        case SIOCGETMIFCNT_IN6:
                return mrt6_ioctl(cmd, data);
        case SIOCGIFADDRPREF:
        case SIOCSIFADDRPREF:
                if (ifp == NULL)
                        return EINVAL;
                return ifaddrpref_ioctl(so, cmd, data, ifp);
        }

        if (ifp == NULL)
                return EOPNOTSUPP;

        switch (cmd) {
#ifdef OSIOCSIFINFO_IN6_90
        case OSIOCSIFINFO_FLAGS_90:
        case OSIOCSIFINFO_IN6_90:
        case OSIOCSDEFIFACE_IN6:
        case OSIOCSNDFLUSH_IN6:
        case OSIOCSPFXFLUSH_IN6:
        case OSIOCSRTRFLUSH_IN6:
#endif
        case SIOCSIFINFO_FLAGS:
        case SIOCSIFINFO_IN6:
                /* Privileged. */
                /* FALLTHROUGH */
#ifdef OSIOCGIFINFO_IN6
        case OSIOCGIFINFO_IN6:
#endif
#ifdef OSIOCGIFINFO_IN6_90
        case OSIOCGDRLST_IN6:
        case OSIOCGPRLST_IN6:
        case OSIOCGIFINFO_IN6_90:
        case OSIOCGDEFIFACE_IN6:
#endif
        case SIOCGIFINFO_IN6:
        case SIOCGNBRINFO_IN6:
                return nd6_ioctl(cmd, data, ifp);
        }

        switch (cmd) {
        case SIOCALIFADDR:
        case SIOCDLIFADDR:
                /* Privileged. */
                /* FALLTHROUGH */
        case SIOCGLIFADDR:
                return in6_lifaddr_ioctl(so, cmd, data, ifp);
        }

        /*
         * Find address for this interface, if it exists.
         *
         * In netinet code, we have checked ifra_addr in SIOCSIF*ADDR operation
         * only, and used the first interface address as the target of other
         * operations (without checking ifra_addr).  This was because netinet
         * code/API assumed at most 1 interface address per interface.
         * Since IPv6 allows a node to assign multiple addresses
         * on a single interface, we almost always look and check the
         * presence of ifra_addr, and reject invalid ones here.
         * It also decreases duplicated code among SIOC*_IN6 operations.
         */
        switch (cmd) {
        case SIOCAIFADDR_IN6:
#ifdef OSIOCAIFADDR_IN6
        case OSIOCAIFADDR_IN6:
#endif
#ifdef OSIOCSIFPHYADDR_IN6
        case OSIOCSIFPHYADDR_IN6:
#endif
        case SIOCSIFPHYADDR_IN6:
                sa6 = &ifra->ifra_addr;
                break;
        case SIOCSIFADDR_IN6:
        case SIOCGIFADDR_IN6:
        case SIOCSIFDSTADDR_IN6:
        case SIOCSIFNETMASK_IN6:
        case SIOCGIFDSTADDR_IN6:
        case SIOCGIFNETMASK_IN6:
        case SIOCDIFADDR_IN6:
        case SIOCGIFPSRCADDR_IN6:
        case SIOCGIFPDSTADDR_IN6:
        case SIOCGIFAFLAG_IN6:
        case SIOCGIFALIFETIME_IN6:
#ifdef OSIOCGIFALIFETIME_IN6
        case OSIOCGIFALIFETIME_IN6:
#endif
        case SIOCGIFSTAT_IN6:
        case SIOCGIFSTAT_ICMP6:
                sa6 = &ifr->ifr_addr;
                break;
        default:
                sa6 = NULL;
                break;
        }

        error = 0;
        bound = curlwp_bind();
        if (sa6 && sa6->sin6_family == AF_INET6) {
                if (sa6->sin6_scope_id != 0)
                        error = sa6_embedscope(sa6, 0);
                else
                        error = in6_setscope(&sa6->sin6_addr, ifp, NULL);
                if (error != 0)
                        goto out;
                ia = in6ifa_ifpwithaddr_psref(ifp, &sa6->sin6_addr, &psref);
        } else
                ia = NULL;

        switch (cmd) {
        case SIOCSIFADDR_IN6:
        case SIOCSIFDSTADDR_IN6:
        case SIOCSIFNETMASK_IN6:
                /*
                 * Since IPv6 allows a node to assign multiple addresses
                 * on a single interface, SIOCSIFxxx ioctls are deprecated.
                 */
                error = EINVAL;
                goto release;

        case SIOCDIFADDR_IN6:
                /*
                 * for IPv4, we look for existing in_ifaddr here to allow
                 * "ifconfig if0 delete" to remove the first IPv4 address on
                 * the interface.  For IPv6, as the spec allows multiple
                 * interface address from the day one, we consider "remove the
                 * first one" semantics to be not preferable.
                 */
                if (ia == NULL) {
                        error = EADDRNOTAVAIL;
                        goto out;
                }
#ifdef OSIOCAIFADDR_IN6
                /* FALLTHROUGH */
        case OSIOCAIFADDR_IN6:
#endif
                /* FALLTHROUGH */
        case SIOCAIFADDR_IN6:
                /*
                 * We always require users to specify a valid IPv6 address for
                 * the corresponding operation.
                 */
                if (ifra->ifra_addr.sin6_family != AF_INET6 ||
                    ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6)) {
                        error = EAFNOSUPPORT;
                        goto release;
                }
                /* Privileged. */

                break;

        case SIOCGIFADDR_IN6:
                /* This interface is basically deprecated. use SIOCGIFCONF. */
                /* FALLTHROUGH */
        case SIOCGIFAFLAG_IN6:
        case SIOCGIFNETMASK_IN6:
        case SIOCGIFDSTADDR_IN6:
        case SIOCGIFALIFETIME_IN6:
#ifdef OSIOCGIFALIFETIME_IN6
        case OSIOCGIFALIFETIME_IN6:
#endif
                /* must think again about its semantics */
                if (ia == NULL) {
                        error = EADDRNOTAVAIL;
                        goto out;
                }
                break;
        }

        switch (cmd) {

        case SIOCGIFADDR_IN6:
                ifr->ifr_addr = ia->ia_addr;
                error = sa6_recoverscope(&ifr->ifr_addr);
                break;

        case SIOCGIFDSTADDR_IN6:
                if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
                        error = EINVAL;
                        break;
                }
                /*
                 * XXX: should we check if ifa_dstaddr is NULL and return
                 * an error?
                 */
                ifr->ifr_dstaddr = ia->ia_dstaddr;
                error = sa6_recoverscope(&ifr->ifr_dstaddr);
                break;

        case SIOCGIFNETMASK_IN6:
                ifr->ifr_addr = ia->ia_prefixmask;
                break;

        case SIOCGIFAFLAG_IN6:
                ifr->ifr_ifru.ifru_flags6 = ia->ia6_flags;
                break;

        case SIOCGIFSTAT_IN6:
                if (ifp == NULL) {
                        error = EINVAL;
                        break;
                }
                memset(&ifr->ifr_ifru.ifru_stat, 0,
                    sizeof(ifr->ifr_ifru.ifru_stat));
                ifr->ifr_ifru.ifru_stat =
                    *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->in6_ifstat;
                break;

        case SIOCGIFSTAT_ICMP6:
                if (ifp == NULL) {
                        error = EINVAL;
                        break;
                }
                memset(&ifr->ifr_ifru.ifru_icmp6stat, 0,
                    sizeof(ifr->ifr_ifru.ifru_icmp6stat));
                ifr->ifr_ifru.ifru_icmp6stat =
                    *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->icmp6_ifstat;
                break;

#ifdef OSIOCGIFALIFETIME_IN6
        case OSIOCGIFALIFETIME_IN6:
#endif
        case SIOCGIFALIFETIME_IN6:
                ifr->ifr_ifru.ifru_lifetime = ia->ia6_lifetime;
                if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
                        time_t maxexpire;
                        struct in6_addrlifetime *retlt =
                            &ifr->ifr_ifru.ifru_lifetime;

                        /*
                         * XXX: adjust expiration time assuming time_t is
                         * signed.
                         */
                        maxexpire = ((time_t)~0) &
                            (time_t)~(1ULL << ((sizeof(maxexpire) * NBBY) - 1));
                        if (ia->ia6_lifetime.ia6t_vltime <
                            maxexpire - ia->ia6_updatetime) {
                                retlt->ia6t_expire = ia->ia6_updatetime +
                                    ia->ia6_lifetime.ia6t_vltime;
                                retlt->ia6t_expire = retlt->ia6t_expire ?
                                    time_mono_to_wall(retlt->ia6t_expire) :
                                    0;
                        } else
                                retlt->ia6t_expire = maxexpire;
                }
                if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
                        time_t maxexpire;
                        struct in6_addrlifetime *retlt =
                            &ifr->ifr_ifru.ifru_lifetime;

                        /*
                         * XXX: adjust expiration time assuming time_t is
                         * signed.
                         */
                        maxexpire = ((time_t)~0) &
                            (time_t)~(1ULL << ((sizeof(maxexpire) * NBBY) - 1));
                        if (ia->ia6_lifetime.ia6t_pltime <
                            maxexpire - ia->ia6_updatetime) {
                                retlt->ia6t_preferred = ia->ia6_updatetime +
                                    ia->ia6_lifetime.ia6t_pltime;
                                retlt->ia6t_preferred = retlt->ia6t_preferred ?
                                    time_mono_to_wall(retlt->ia6t_preferred) :
                                    0;
                        } else
                                retlt->ia6t_preferred = maxexpire;
                }
#ifdef OSIOCFIFALIFETIME_IN6
                if (cmd == OSIOCFIFALIFETIME_IN6)
                        in6_addrlifetime_to_in6_addrlifetime50(
                            &ifr->ifru.ifru_lifetime);
#endif
                break;

#ifdef OSIOCAIFADDR_IN6
        case OSIOCAIFADDR_IN6:
                in6_aliasreq50_to_in6_aliasreq(ifra);
#endif
                /*FALLTHROUGH*/
        case SIOCAIFADDR_IN6:
        {
                struct in6_addrlifetime *lt;

                /* reject read-only flags */
                if ((ifra->ifra_flags & IN6_IFF_DUPLICATED) != 0 ||
                    (ifra->ifra_flags & IN6_IFF_DETACHED) != 0 ||
                    (ifra->ifra_flags & IN6_IFF_TENTATIVE) != 0 ||
                    (ifra->ifra_flags & IN6_IFF_NODAD) != 0) {
                        error = EINVAL;
                        break;
                }
                /*
                 * ia6t_expire and ia6t_preferred won't be used for now,
                 * so just in case.
                 */
                lt = &ifra->ifra_lifetime;
                if (lt->ia6t_expire != 0)
                        lt->ia6t_expire = time_wall_to_mono(lt->ia6t_expire);
                if (lt->ia6t_preferred != 0)
                        lt->ia6t_preferred =
                            time_wall_to_mono(lt->ia6t_preferred);
                /*
                 * make (ia == NULL) or update (ia != NULL) the interface
                 * address structure, and link it to the list.
                 */
                int s = splsoftnet();
                error = in6_update_ifa1(ifp, ifra, &ia, &psref, 0);
                splx(s);
                if (error)
                        break;
                pfil_run_addrhooks(if_pfil, cmd, &ia->ia_ifa);
                break;
        }

        case SIOCDIFADDR_IN6:
                ia6_release(ia, &psref);
                ifaref(&ia->ia_ifa);
                in6_purgeaddr(&ia->ia_ifa);
                pfil_run_addrhooks(if_pfil, cmd, &ia->ia_ifa);
                ifafree(&ia->ia_ifa);
                ia = NULL;
                break;

        default:
                error = ENOTTY;
        }
release:
        ia6_release(ia, &psref);
out:
        curlwp_bindx(bound);
        return error;
}

int
in6_control(struct socket *so, u_long cmd, void *data, struct ifnet *ifp)
{
        int error, s;

        switch (cmd) {
#ifdef OSIOCSIFINFO_IN6_90
        case OSIOCSIFINFO_FLAGS_90:
        case OSIOCSIFINFO_IN6_90:
        case OSIOCSDEFIFACE_IN6:
        case OSIOCSNDFLUSH_IN6:
        case OSIOCSPFXFLUSH_IN6:
        case OSIOCSRTRFLUSH_IN6:
#endif
        case SIOCSIFINFO_FLAGS:
        case SIOCSIFINFO_IN6:

        case SIOCALIFADDR:
        case SIOCDLIFADDR:

        case SIOCDIFADDR_IN6:
#ifdef OSIOCAIFADDR_IN6
        case OSIOCAIFADDR_IN6:
#endif
        case SIOCAIFADDR_IN6:

        case SIOCAADDRCTL_POLICY:
        case SIOCDADDRCTL_POLICY:

                if (kauth_authorize_network(kauth_cred_get(),
                    KAUTH_NETWORK_SOCKET,
                    KAUTH_REQ_NETWORK_SOCKET_SETPRIV,
                    so, NULL, NULL))
                        return EPERM;
                break;
        }

        s = splsoftnet();
#ifndef NET_MPSAFE
        KASSERT(KERNEL_LOCKED_P());
#endif
        error = in6_control1(so , cmd, data, ifp);
        splx(s);
        return error;
}

static int
in6_get_llsol_addr(struct in6_addr *llsol, struct ifnet *ifp,
    struct in6_addr *ip6)
{
        int error;

        memset(llsol, 0, sizeof(struct in6_addr));
        llsol->s6_addr16[0] = htons(0xff02);
        llsol->s6_addr32[1] = 0;
        llsol->s6_addr32[2] = htonl(1);
        llsol->s6_addr32[3] = ip6->s6_addr32[3];
        llsol->s6_addr8[12] = 0xff;

        error = in6_setscope(llsol, ifp, NULL);
        if (error != 0) {
                /* XXX: should not happen */
                log(LOG_ERR, "%s: in6_setscope failed\n", __func__);
        }

        return error;
}

static int
in6_join_mcastgroups(struct in6_aliasreq *ifra, struct in6_ifaddr *ia,
    struct ifnet *ifp, int flags)
{
        int error;
        struct sockaddr_in6 mltaddr, mltmask;
        struct in6_multi_mship *imm;
        struct in6_addr llsol;
        struct rtentry *rt;
        int dad_delay;
        char ip6buf[INET6_ADDRSTRLEN];

        /* join solicited multicast addr for new host id */
        error = in6_get_llsol_addr(&llsol, ifp, &ifra->ifra_addr.sin6_addr);
        if (error != 0)
                goto out;
        dad_delay = 0;
        if ((flags & IN6_IFAUPDATE_DADDELAY)) {
                /*
                 * We need a random delay for DAD on the address
                 * being configured.  It also means delaying
                 * transmission of the corresponding MLD report to
                 * avoid report collision.
                 * [draft-ietf-ipv6-rfc2462bis-02.txt]
                 */
                dad_delay = cprng_fast32() % (MAX_RTR_SOLICITATION_DELAY * hz);
        }

#define        MLTMASK_LEN  4        /* mltmask's masklen (=32bit=4octet) */
        /* join solicited multicast addr for new host id */
        imm = in6_joingroup(ifp, &llsol, &error, dad_delay);
        if (!imm) {
                nd6log(LOG_ERR,
                    "addmulti failed for %s on %s (errno=%d)\n",
                    IN6_PRINT(ip6buf, &llsol), if_name(ifp), error);
                goto out;
        }
        mutex_enter(&in6_ifaddr_lock);
        LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
        mutex_exit(&in6_ifaddr_lock);

        sockaddr_in6_init(&mltmask, &in6mask32, 0, 0, 0);

        /*
         * join link-local all-nodes address
         */
        sockaddr_in6_init(&mltaddr, &in6addr_linklocal_allnodes,
            0, 0, 0);
        if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0)
                goto out; /* XXX: should not fail */

        /*
         * XXX: do we really need this automatic routes?
         * We should probably reconsider this stuff.  Most applications
         * actually do not need the routes, since they usually specify
         * the outgoing interface.
         */
        rt = rtalloc1(sin6tosa(&mltaddr), 0);
        if (rt) {
                if (memcmp(&mltaddr.sin6_addr,
                    &satocsin6(rt_getkey(rt))->sin6_addr,
                    MLTMASK_LEN)) {
                        rt_unref(rt);
                        rt = NULL;
                } else if (rt->rt_ifp != ifp) {
                        IN6_DPRINTF("%s: rt_ifp %p -> %p (%s) "
                            "network %04x:%04x::/32 = %04x:%04x::/32\n",
                            __func__, rt->rt_ifp, ifp, ifp->if_xname,
                            ntohs(mltaddr.sin6_addr.s6_addr16[0]),
                            ntohs(mltaddr.sin6_addr.s6_addr16[1]),
                            satocsin6(rt_getkey(rt))->sin6_addr.s6_addr16[0],
                            satocsin6(rt_getkey(rt))->sin6_addr.s6_addr16[1]);
#ifdef NET_MPSAFE
                        error = rt_update_prepare(rt);
                        if (error == 0) {
                                rt_replace_ifa(rt, &ia->ia_ifa);
                                rt->rt_ifp = ifp;
                                rt_update_finish(rt);
                        } else {
                                /*
                                 * If error != 0, the rtentry is being
                                 * destroyed, so doing nothing doesn't
                                 * matter.
                                 */
                        }
#else
                        rt_replace_ifa(rt, &ia->ia_ifa);
                        rt->rt_ifp = ifp;
#endif
                }
        }
        if (!rt) {
                struct rt_addrinfo info;

                memset(&info, 0, sizeof(info));
                info.rti_info[RTAX_DST] = sin6tosa(&mltaddr);
                info.rti_info[RTAX_GATEWAY] = sin6tosa(&ia->ia_addr);
                info.rti_info[RTAX_NETMASK] = sin6tosa(&mltmask);
                info.rti_info[RTAX_IFA] = sin6tosa(&ia->ia_addr);
                /* XXX: we need RTF_CONNECTED to fake nd6_rtrequest */
                info.rti_flags = RTF_UP | RTF_CONNECTED;
                error = rtrequest1(RTM_ADD, &info, NULL);
                if (error)
                        goto out;
        } else {
                rt_unref(rt);
        }
        imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0);
        if (!imm) {
                nd6log(LOG_WARNING,
                    "addmulti failed for %s on %s (errno=%d)\n",
                    IN6_PRINT(ip6buf, &mltaddr.sin6_addr),
                    if_name(ifp), error);
                goto out;
        }
        mutex_enter(&in6_ifaddr_lock);
        LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
        mutex_exit(&in6_ifaddr_lock);

        /*
         * join node information group address
         */
        dad_delay = 0;
        if ((flags & IN6_IFAUPDATE_DADDELAY)) {
                /*
                 * The spec doesn't say anything about delay for this
                 * group, but the same logic should apply.
                 */
                dad_delay = cprng_fast32() % (MAX_RTR_SOLICITATION_DELAY * hz);
        }
        if (in6_nigroup(ifp, hostname, hostnamelen, &mltaddr) != 0)
                ;
        else if ((imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error,
                  dad_delay)) == NULL) { /* XXX jinmei */
                nd6log(LOG_WARNING,
                    "addmulti failed for %s on %s (errno=%d)\n",
                    IN6_PRINT(ip6buf, &mltaddr.sin6_addr),
                    if_name(ifp), error);
                /* XXX not very fatal, go on... */
        } else {
                mutex_enter(&in6_ifaddr_lock);
                LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
                mutex_exit(&in6_ifaddr_lock);
        }


        /*
         * join interface-local all-nodes address.
         * (ff01::1%ifN, and ff01::%ifN/32)
         */
        mltaddr.sin6_addr = in6addr_nodelocal_allnodes;
        if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0)
                goto out; /* XXX: should not fail */

        /* XXX: again, do we really need the route? */
        rt = rtalloc1(sin6tosa(&mltaddr), 0);
        if (rt) {
                /* 32bit came from "mltmask" */
                if (memcmp(&mltaddr.sin6_addr,
                    &satocsin6(rt_getkey(rt))->sin6_addr,
                    32 / NBBY)) {
                        rt_unref(rt);
                        rt = NULL;
                } else if (rt->rt_ifp != ifp) {
                        IN6_DPRINTF("%s: rt_ifp %p -> %p (%s) "
                            "network %04x:%04x::/32 = %04x:%04x::/32\n",
                            __func__, rt->rt_ifp, ifp, ifp->if_xname,
                            ntohs(mltaddr.sin6_addr.s6_addr16[0]),
                            ntohs(mltaddr.sin6_addr.s6_addr16[1]),
                            satocsin6(rt_getkey(rt))->sin6_addr.s6_addr16[0],
                            satocsin6(rt_getkey(rt))->sin6_addr.s6_addr16[1]);
#ifdef NET_MPSAFE
                        error = rt_update_prepare(rt);
                        if (error == 0) {
                                rt_replace_ifa(rt, &ia->ia_ifa);
                                rt->rt_ifp = ifp;
                                rt_update_finish(rt);
                        } else {
                                /*
                                 * If error != 0, the rtentry is being
                                 * destroyed, so doing nothing doesn't
                                 * matter.
                                 */
                        }
#else
                        rt_replace_ifa(rt, &ia->ia_ifa);
                        rt->rt_ifp = ifp;
#endif
                }
        }
        if (!rt) {
                struct rt_addrinfo info;

                memset(&info, 0, sizeof(info));
                info.rti_info[RTAX_DST] = sin6tosa(&mltaddr);
                info.rti_info[RTAX_GATEWAY] = sin6tosa(&ia->ia_addr);
                info.rti_info[RTAX_NETMASK] = sin6tosa(&mltmask);
                info.rti_info[RTAX_IFA] = sin6tosa(&ia->ia_addr);
                info.rti_flags = RTF_UP | RTF_CONNECTED;
                error = rtrequest1(RTM_ADD, &info, NULL);
                if (error)
                        goto out;
#undef        MLTMASK_LEN
        } else {
                rt_unref(rt);
        }
        imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0);
        if (!imm) {
                nd6log(LOG_WARNING,
                    "addmulti failed for %s on %s (errno=%d)\n",
                    IN6_PRINT(ip6buf, &mltaddr.sin6_addr),
                    if_name(ifp), error);
                goto out;
        } else {
                mutex_enter(&in6_ifaddr_lock);
                LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
                mutex_exit(&in6_ifaddr_lock);
        }
        return 0;

out:
        KASSERT(error != 0);
        return error;
}

/*
 * Update parameters of an IPv6 interface address.
 * If necessary, a new entry is created and linked into address chains.
 * This function is separated from in6_control().
 * XXX: should this be performed under splsoftnet()?
 */
static int
in6_update_ifa1(struct ifnet *ifp, struct in6_aliasreq *ifra,
    struct in6_ifaddr **iap, struct psref *psref, int flags)
{
        int error = 0, hostIsNew = 0, plen = -1;
        struct sockaddr_in6 dst6;
        struct in6_addrlifetime *lt;
        int dad_delay, was_tentative;
        struct in6_ifaddr *ia = iap ? *iap : NULL;
        char ip6buf[INET6_ADDRSTRLEN];

        KASSERT((iap == NULL && psref == NULL) ||
            (iap != NULL && psref != NULL));

        /* Validate parameters */
        if (ifp == NULL || ifra == NULL) /* this maybe redundant */
                return EINVAL;

        /*
         * The destination address for a p2p link must have a family
         * of AF_UNSPEC or AF_INET6.
         */
        if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
            ifra->ifra_dstaddr.sin6_family != AF_INET6 &&
            ifra->ifra_dstaddr.sin6_family != AF_UNSPEC)
                return EAFNOSUPPORT;
        /*
         * validate ifra_prefixmask.  don't check sin6_family, netmask
         * does not carry fields other than sin6_len.
         */
        if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6))
                return EINVAL;
        /*
         * Because the IPv6 address architecture is classless, we require
         * users to specify a (non 0) prefix length (mask) for a new address.
         * We also require the prefix (when specified) mask is valid, and thus
         * reject a non-consecutive mask.
         */
        if (ia == NULL && ifra->ifra_prefixmask.sin6_len == 0)
                return EINVAL;
        if (ifra->ifra_prefixmask.sin6_len != 0) {
                plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
                    (u_char *)&ifra->ifra_prefixmask +
                    ifra->ifra_prefixmask.sin6_len);
                if (plen <= 0)
                        return EINVAL;
        } else {
                /*
                 * In this case, ia must not be NULL.  We just use its prefix
                 * length.
                 */
                plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL);
        }
        /*
         * If the destination address on a p2p interface is specified,
         * and the address is a scoped one, validate/set the scope
         * zone identifier.
         */
        dst6 = ifra->ifra_dstaddr;
        if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) != 0 &&
            (dst6.sin6_family == AF_INET6)) {
                struct in6_addr in6_tmp;
                u_int32_t zoneid;

                in6_tmp = dst6.sin6_addr;
                if (in6_setscope(&in6_tmp, ifp, &zoneid))
                        return EINVAL; /* XXX: should be impossible */

                if (dst6.sin6_scope_id != 0) {
                        if (dst6.sin6_scope_id != zoneid)
                                return EINVAL;
                } else                /* user omit to specify the ID. */
                        dst6.sin6_scope_id = zoneid;

                /* convert into the internal form */
                if (sa6_embedscope(&dst6, 0))
                        return EINVAL; /* XXX: should be impossible */
        }
        /*
         * The destination address can be specified only for a p2p or a
         * loopback interface.  If specified, the corresponding prefix length
         * must be 128.
         */
        if (ifra->ifra_dstaddr.sin6_family == AF_INET6) {
#ifdef FORCE_P2PPLEN
                int i;
#endif

                if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) == 0) {
                        /* XXX: noisy message */
                        nd6log(LOG_INFO, "a destination can "
                            "be specified for a p2p or a loopback IF only\n");
                        return EINVAL;
                }
                if (plen != 128) {
                        nd6log(LOG_INFO, "prefixlen should "
                            "be 128 when dstaddr is specified\n");
#ifdef FORCE_P2PPLEN
                        /*
                         * To be compatible with old configurations,
                         * such as ifconfig gif0 inet6 2001::1 2001::2
                         * prefixlen 126, we override the specified
                         * prefixmask as if the prefix length was 128.
                         */
                        ifra->ifra_prefixmask.sin6_len =
                            sizeof(struct sockaddr_in6);
                        for (i = 0; i < 4; i++)
                                ifra->ifra_prefixmask.sin6_addr.s6_addr32[i] =
                                    0xffffffff;
                        plen = 128;
#else
                        return EINVAL;
#endif
                }
        }
        /* lifetime consistency check */
        lt = &ifra->ifra_lifetime;
        if (lt->ia6t_pltime > lt->ia6t_vltime)
                return EINVAL;
        if (lt->ia6t_vltime == 0) {
                /*
                 * the following log might be noisy, but this is a typical
                 * configuration mistake or a tool's bug.
                 */
                nd6log(LOG_INFO, "valid lifetime is 0 for %s\n",
                    IN6_PRINT(ip6buf, &ifra->ifra_addr.sin6_addr));

                if (ia == NULL)
                        return 0; /* there's nothing to do */
        }

        /*
         * If this is a new address, allocate a new ifaddr and link it
         * into chains.
         */
        if (ia == NULL) {
                hostIsNew = 1;
                /*
                 * When in6_update_ifa() is called in a process of a received
                 * RA, it is called under an interrupt context.  So, we should
                 * call malloc with M_NOWAIT.
                 */
                ia = malloc(sizeof(*ia), M_IFADDR, M_NOWAIT|M_ZERO);
                if (ia == NULL)
                        return ENOBUFS;
                LIST_INIT(&ia->ia6_memberships);
                /* Initialize the address and masks, and put time stamp */
                ia->ia_ifa.ifa_addr = sin6tosa(&ia->ia_addr);
                ia->ia_addr.sin6_family = AF_INET6;
                ia->ia_addr.sin6_len = sizeof(ia->ia_addr);
                ia->ia6_createtime = time_uptime;
                if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) {
                        /*
                         * XXX: some functions expect that ifa_dstaddr is not
                         * NULL for p2p interfaces.
                         */
                        ia->ia_ifa.ifa_dstaddr = sin6tosa(&ia->ia_dstaddr);
                } else {
                        ia->ia_ifa.ifa_dstaddr = NULL;
                }
                ia->ia_ifa.ifa_netmask = sin6tosa(&ia->ia_prefixmask);

                ia->ia_ifp = ifp;
                IN6_ADDRLIST_ENTRY_INIT(ia);
                ifa_psref_init(&ia->ia_ifa);
        }

        /* update timestamp */
        ia->ia6_updatetime = time_uptime;

        /* set prefix mask */
        if (ifra->ifra_prefixmask.sin6_len) {
                if (ia->ia_prefixmask.sin6_len) {
                        if (!IN6_ARE_ADDR_EQUAL(&ia->ia_prefixmask.sin6_addr,
                            &ifra->ifra_prefixmask.sin6_addr))
                                in6_ifremprefix(ia);
                }
                ia->ia_prefixmask = ifra->ifra_prefixmask;
        }

        /* Set destination address. */
        if (dst6.sin6_family == AF_INET6) {
                if (!IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr,
                    &ia->ia_dstaddr.sin6_addr))
                        in6_ifremprefix(ia);
                ia->ia_dstaddr = dst6;
        }

        /*
         * Set lifetimes.  We do not refer to ia6t_expire and ia6t_preferred
         * to see if the address is deprecated or invalidated, but initialize
         * these members for applications.
         */
        ia->ia6_lifetime = ifra->ifra_lifetime;
        if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
                ia->ia6_lifetime.ia6t_expire =
                    time_uptime + ia->ia6_lifetime.ia6t_vltime;
        } else
                ia->ia6_lifetime.ia6t_expire = 0;
        if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
                ia->ia6_lifetime.ia6t_preferred =
                    time_uptime + ia->ia6_lifetime.ia6t_pltime;
        } else
                ia->ia6_lifetime.ia6t_preferred = 0;

        /*
         * configure address flags.
         * We need to preserve tentative state so DAD works if
         * something adds the same address before DAD finishes.
         */
        was_tentative = ia->ia6_flags & (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED);
        ia->ia6_flags = ifra->ifra_flags;

        /*
         * Make the address tentative before joining multicast addresses,
         * so that corresponding MLD responses would not have a tentative
         * source address.
         */
        ia->ia6_flags &= ~IN6_IFF_DUPLICATED;        /* safety */
        if (ifp->if_link_state == LINK_STATE_DOWN) {
                ia->ia6_flags |= IN6_IFF_DETACHED;
                ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
        } else if ((hostIsNew || was_tentative) && if_do_dad(ifp) &&
                   ip6_dad_enabled()) {
                ia->ia6_flags |= IN6_IFF_TENTATIVE;
        }

        /*
         * backward compatibility - if IN6_IFF_DEPRECATED is set from the
         * userland, make it deprecated.
         */
        if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) {
                ia->ia6_lifetime.ia6t_pltime = 0;
                ia->ia6_lifetime.ia6t_preferred = time_uptime;
        }

        if (hostIsNew) {
                /*
                 * We need a reference to ia before calling in6_ifinit.
                 * Otherwise ia can be freed in in6_ifinit accidentally.
                 */
                ifaref(&ia->ia_ifa);
        }

        /* Must execute in6_ifinit and ifa_insert atomically */
        mutex_enter(&in6_ifaddr_lock);

        /* reset the interface and routing table appropriately. */
        error = in6_ifinit(ifp, ia, &ifra->ifra_addr, hostIsNew);
        if (error != 0) {
                if (hostIsNew)
                        free(ia, M_IFADDR);
                mutex_exit(&in6_ifaddr_lock);
                return error;
        }

        /*
         * We are done if we have simply modified an existing address.
         */
        if (!hostIsNew) {
                mutex_exit(&in6_ifaddr_lock);
                return error;
        }

        /*
         * Insert ia to the global list and ifa to the interface's list.
         * A reference to it is already gained above.
         */
        IN6_ADDRLIST_WRITER_INSERT_TAIL(ia);
        ifa_insert(ifp, &ia->ia_ifa);

        mutex_exit(&in6_ifaddr_lock);

        /*
         * Beyond this point, we should call in6_purgeaddr upon an error,
         * not just go to unlink.
         */

        /* join necessary multicast groups */
        if ((ifp->if_flags & IFF_MULTICAST) != 0) {
                error = in6_join_mcastgroups(ifra, ia, ifp, flags);
                if (error != 0)
                        goto cleanup;
        }

        if (nd6_need_cache(ifp)) {
                /* XXX maybe unnecessary */
                ia->ia_ifa.ifa_rtrequest = nd6_rtrequest;
                ia->ia_ifa.ifa_flags |= RTF_CONNECTED;
        }

        /*
         * Perform DAD, if needed.
         * XXX It may be of use, if we can administratively
         * disable DAD.
         */
        if (hostIsNew && if_do_dad(ifp) &&
            ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) &&
            (ia->ia6_flags & IN6_IFF_TENTATIVE))
        {
                int mindelay, maxdelay;

                dad_delay = 0;
                if ((flags & IN6_IFAUPDATE_DADDELAY)) {
                        struct in6_addr llsol;
                        struct in6_multi *in6m_sol = NULL;
                        /*
                         * We need to impose a delay before sending an NS
                         * for DAD.  Check if we also needed a delay for the
                         * corresponding MLD message.  If we did, the delay
                         * should be larger than the MLD delay (this could be
                         * relaxed a bit, but this simple logic is at least
                         * safe).
                         */
                        mindelay = 0;
                        error = in6_get_llsol_addr(&llsol, ifp,
                            &ifra->ifra_addr.sin6_addr);
                        in6_multi_lock(RW_READER);
                        if (error == 0)
                                in6m_sol = in6_lookup_multi(&llsol, ifp);
                        if (in6m_sol != NULL &&
                            in6m_sol->in6m_state == MLD_REPORTPENDING) {
                                mindelay = in6m_sol->in6m_timer;
                        }
                        in6_multi_unlock();
                        maxdelay = MAX_RTR_SOLICITATION_DELAY * hz;
                        if (maxdelay - mindelay == 0)
                                dad_delay = 0;
                        else {
                                dad_delay =
                                    (cprng_fast32() % (maxdelay - mindelay)) +
                                    mindelay;
                        }
                }
                /* +1 ensures callout is always used */
                nd6_dad_start(&ia->ia_ifa, dad_delay + 1);
        }

        if (iap != NULL) {
                *iap = ia;
                if (hostIsNew)
                        ia6_acquire(ia, psref);
        }

        return 0;

  cleanup:
        in6_purgeaddr(&ia->ia_ifa);
        return error;
}

int
in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, int flags)
{
        int rc, s;

        s = splsoftnet();
        rc = in6_update_ifa1(ifp, ifra, NULL, NULL, flags);
        splx(s);
        return rc;
}

void
in6_purgeaddr(struct ifaddr *ifa)
{
        struct ifnet *ifp = ifa->ifa_ifp;
        struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa;
        struct in6_multi_mship *imm;

        /* KASSERT(!ifa_held(ifa)); XXX need ifa_not_held (psref_not_held) */
        KASSERT(IFNET_LOCKED(ifp));

        ifa->ifa_flags |= IFA_DESTROYING;

        /* stop DAD processing */
        nd6_dad_stop(ifa);

        /* Delete any network route. */
        in6_ifremprefix(ia);

        /* Remove ownaddr's loopback rtentry, if it exists. */
        in6_ifremlocal(&(ia->ia_ifa));

        /*
         * leave from multicast groups we have joined for the interface
         */
    again:
        mutex_enter(&in6_ifaddr_lock);
        while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) {
                struct in6_multi *in6m __diagused = imm->i6mm_maddr;
                KASSERTMSG(in6m == NULL || in6m->in6m_ifp == ifp,
                    "in6m_ifp=%s ifp=%s", in6m ? in6m->in6m_ifp->if_xname : NULL,
                    ifp->if_xname);
                LIST_REMOVE(imm, i6mm_chain);
                mutex_exit(&in6_ifaddr_lock);

                in6_leavegroup(imm);
                goto again;
        }
        mutex_exit(&in6_ifaddr_lock);

        in6_unlink_ifa(ia, ifp);
}

static void
in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
{
        int        s = splsoftnet();

        mutex_enter(&in6_ifaddr_lock);
        IN6_ADDRLIST_WRITER_REMOVE(ia);
        ifa_remove(ifp, &ia->ia_ifa);
        /* Assume ifa_remove called pserialize_perform and psref_destroy */
        mutex_exit(&in6_ifaddr_lock);
        IN6_ADDRLIST_ENTRY_DESTROY(ia);

        /*
         * release another refcnt for the link from in6_ifaddr.
         * Note that we should decrement the refcnt at least once for all *BSD.
         */
        ifafree(&ia->ia_ifa);

        splx(s);
}

void
in6_purgeif(struct ifnet *ifp)
{

        IFNET_LOCK(ifp);
        in6_ifdetach(ifp);
        IFNET_UNLOCK(ifp);
}

void
in6_purge_mcast_references(struct in6_multi *in6m)
{
        struct        in6_ifaddr *ia;

        KASSERT(in6_multi_locked(RW_WRITER));

        mutex_enter(&in6_ifaddr_lock);
        IN6_ADDRLIST_WRITER_FOREACH(ia) {
                struct in6_multi_mship *imm;
                LIST_FOREACH(imm, &ia->ia6_memberships, i6mm_chain) {
                        if (imm->i6mm_maddr == in6m)
                                imm->i6mm_maddr = NULL;
                }
        }
        mutex_exit(&in6_ifaddr_lock);
}

/*
 * SIOC[GAD]LIFADDR.
 *        SIOCGLIFADDR: get first address. (?)
 *        SIOCGLIFADDR with IFLR_PREFIX:
 *                get first address that matches the specified prefix.
 *        SIOCALIFADDR: add the specified address.
 *        SIOCALIFADDR with IFLR_PREFIX:
 *                add the specified prefix, filling hostid part from
 *                the first link-local address.  prefixlen must be <= 64.
 *        SIOCDLIFADDR: delete the specified address.
 *        SIOCDLIFADDR with IFLR_PREFIX:
 *                delete the first address that matches the specified prefix.
 * return values:
 *        EINVAL on invalid parameters
 *        EADDRNOTAVAIL on prefix match failed/specified address not found
 *        other values may be returned from in6_ioctl()
 *
 * NOTE: SIOCALIFADDR(with IFLR_PREFIX set) allows prefixlen less than 64.
 * this is to accommodate address naming scheme other than RFC2374,
 * in the future.
 * RFC2373 defines interface id to be 64bit, but it allows non-RFC2374
 * address encoding scheme. (see figure on page 8)
 */
static int
in6_lifaddr_ioctl(struct socket *so, u_long cmd, void *data, 
        struct ifnet *ifp)
{
        struct in6_ifaddr *ia = NULL; /* XXX gcc 4.8 maybe-uninitialized */
        struct if_laddrreq *iflr = (struct if_laddrreq *)data;
        struct ifaddr *ifa;
        struct sockaddr *sa;

        /* sanity checks */
        if (!data || !ifp) {
                panic("invalid argument to in6_lifaddr_ioctl");
                /* NOTREACHED */
        }

        switch (cmd) {
        case SIOCGLIFADDR:
                /* address must be specified on GET with IFLR_PREFIX */
                if ((iflr->flags & IFLR_PREFIX) == 0)
                        break;
                /* FALLTHROUGH */
        case SIOCALIFADDR:
        case SIOCDLIFADDR:
                /* address must be specified on ADD and DELETE */
                sa = (struct sockaddr *)&iflr->addr;
                if (sa->sa_family != AF_INET6)
                        return EINVAL;
                if (sa->sa_len != sizeof(struct sockaddr_in6))
                        return EINVAL;
                /* XXX need improvement */
                sa = (struct sockaddr *)&iflr->dstaddr;
                if (sa->sa_family && sa->sa_family != AF_INET6)
                        return EINVAL;
                if (sa->sa_len && sa->sa_len != sizeof(struct sockaddr_in6))
                        return EINVAL;
                break;
        default: /* shouldn't happen */
#if 0
                panic("invalid cmd to in6_lifaddr_ioctl");
                /* NOTREACHED */
#else
                return EOPNOTSUPP;
#endif
        }
        if (sizeof(struct in6_addr) * NBBY < iflr->prefixlen)
                return EINVAL;

        switch (cmd) {
        case SIOCALIFADDR:
            {
                struct in6_aliasreq ifra;
                struct in6_addr *xhostid = NULL;
                int prefixlen;
                int bound = curlwp_bind();
                struct psref psref;

                if ((iflr->flags & IFLR_PREFIX) != 0) {
                        struct sockaddr_in6 *sin6;

                        /*
                         * xhostid is to fill in the hostid part of the
                         * address.  xhostid points to the first link-local
                         * address attached to the interface.
                         */
                        ia = in6ifa_ifpforlinklocal_psref(ifp, 0, &psref);
                        if (ia == NULL) {
                                curlwp_bindx(bound);
                                return EADDRNOTAVAIL;
                        }
                        xhostid = IFA_IN6(&ia->ia_ifa);

                         /* prefixlen must be <= 64. */
                        if (64 < iflr->prefixlen) {
                                ia6_release(ia, &psref);
                                curlwp_bindx(bound);
                                return EINVAL;
                        }
                        prefixlen = iflr->prefixlen;

                        /* hostid part must be zero. */
                        sin6 = (struct sockaddr_in6 *)&iflr->addr;
                        if (sin6->sin6_addr.s6_addr32[2] != 0
                         || sin6->sin6_addr.s6_addr32[3] != 0) {
                                ia6_release(ia, &psref);
                                curlwp_bindx(bound);
                                return EINVAL;
                        }
                } else
                        prefixlen = iflr->prefixlen;

                /* copy args to in6_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */
                memset(&ifra, 0, sizeof(ifra));
                memcpy(ifra.ifra_name, iflr->iflr_name, sizeof(ifra.ifra_name));

                memcpy(&ifra.ifra_addr, &iflr->addr,
                    ((struct sockaddr *)&iflr->addr)->sa_len);
                if (xhostid) {
                        /* fill in hostid part */
                        ifra.ifra_addr.sin6_addr.s6_addr32[2] =
                            xhostid->s6_addr32[2];
                        ifra.ifra_addr.sin6_addr.s6_addr32[3] =
                            xhostid->s6_addr32[3];
                }

                if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /* XXX */
                        memcpy(&ifra.ifra_dstaddr, &iflr->dstaddr,
                            ((struct sockaddr *)&iflr->dstaddr)->sa_len);
                        if (xhostid) {
                                ifra.ifra_dstaddr.sin6_addr.s6_addr32[2] =
                                    xhostid->s6_addr32[2];
                                ifra.ifra_dstaddr.sin6_addr.s6_addr32[3] =
                                    xhostid->s6_addr32[3];
                        }
                }
                if (xhostid) {
                        ia6_release(ia, &psref);
                        ia = NULL;
                }
                curlwp_bindx(bound);

                ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
                in6_prefixlen2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen);

                ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
                ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
                ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX;
                return in6_control(so, SIOCAIFADDR_IN6, &ifra, ifp);
            }
        case SIOCGLIFADDR:
        case SIOCDLIFADDR:
            {
                struct in6_addr mask, candidate, match;
                struct sockaddr_in6 *sin6;
                int cmp;
                int error, s;

                memset(&mask, 0, sizeof(mask));
                if (iflr->flags & IFLR_PREFIX) {
                        /* lookup a prefix rather than address. */
                        in6_prefixlen2mask(&mask, iflr->prefixlen);

                        sin6 = (struct sockaddr_in6 *)&iflr->addr;
                        memcpy(&match, &sin6->sin6_addr, sizeof(match));
                        match.s6_addr32[0] &= mask.s6_addr32[0];
                        match.s6_addr32[1] &= mask.s6_addr32[1];
                        match.s6_addr32[2] &= mask.s6_addr32[2];
                        match.s6_addr32[3] &= mask.s6_addr32[3];

                        /* if you set extra bits, that's wrong */
                        if (memcmp(&match, &sin6->sin6_addr, sizeof(match)))
                                return EINVAL;

                        cmp = 1;
                } else {
                        if (cmd == SIOCGLIFADDR) {
                                /* on getting an address, take the 1st match */
                                cmp = 0;        /* XXX */
                        } else {
                                /* on deleting an address, do exact match */
                                in6_prefixlen2mask(&mask, 128);
                                sin6 = (struct sockaddr_in6 *)&iflr->addr;
                                memcpy(&match, &sin6->sin6_addr, sizeof(match));

                                cmp = 1;
                        }
                }

                s = pserialize_read_enter();
                IFADDR_READER_FOREACH(ifa, ifp) {
                        if (ifa->ifa_addr->sa_family != AF_INET6)
                                continue;
                        if (!cmp)
                                break;

                        /*
                         * XXX: this is adhoc, but is necessary to allow
                         * a user to specify fe80::/64 (not /10) for a
                         * link-local address.
                         */
                        memcpy(&candidate, IFA_IN6(ifa), sizeof(candidate));
                        in6_clearscope(&candidate);
                        candidate.s6_addr32[0] &= mask.s6_addr32[0];
                        candidate.s6_addr32[1] &= mask.s6_addr32[1];
                        candidate.s6_addr32[2] &= mask.s6_addr32[2];
                        candidate.s6_addr32[3] &= mask.s6_addr32[3];
                        if (IN6_ARE_ADDR_EQUAL(&candidate, &match))
                                break;
                }
                if (!ifa) {
                        error = EADDRNOTAVAIL;
                        goto error;
                }
                ia = ifa2ia6(ifa);

                if (cmd == SIOCGLIFADDR) {
                        /* fill in the if_laddrreq structure */
                        memcpy(&iflr->addr, &ia->ia_addr, ia->ia_addr.sin6_len);
                        error = sa6_recoverscope(
                            (struct sockaddr_in6 *)&iflr->addr);
                        if (error != 0)
                                goto error;

                        if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
                                memcpy(&iflr->dstaddr, &ia->ia_dstaddr,
                                    ia->ia_dstaddr.sin6_len);
                                error = sa6_recoverscope(
                                    (struct sockaddr_in6 *)&iflr->dstaddr);
                                if (error != 0)
                                        goto error;
                        } else
                                memset(&iflr->dstaddr, 0, sizeof(iflr->dstaddr));

                        iflr->prefixlen =
                            in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL);

                        iflr->flags = ia->ia6_flags;        /* XXX */

                        error = 0;
                } else {
                        struct in6_aliasreq ifra;

                        /* fill in6_aliasreq and do ioctl(SIOCDIFADDR_IN6) */
                        memset(&ifra, 0, sizeof(ifra));
                        memcpy(ifra.ifra_name, iflr->iflr_name,
                            sizeof(ifra.ifra_name));

                        memcpy(&ifra.ifra_addr, &ia->ia_addr,
                            ia->ia_addr.sin6_len);
                        if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
                                memcpy(&ifra.ifra_dstaddr, &ia->ia_dstaddr,
                                    ia->ia_dstaddr.sin6_len);
                        } else {
                                memset(&ifra.ifra_dstaddr, 0,
                                    sizeof(ifra.ifra_dstaddr));
                        }
                        memcpy(&ifra.ifra_dstaddr, &ia->ia_prefixmask,
                            ia->ia_prefixmask.sin6_len);

                        ifra.ifra_flags = ia->ia6_flags;
                        pserialize_read_exit(s);

                        return in6_control(so, SIOCDIFADDR_IN6, &ifra, ifp);
                }
        error:
                pserialize_read_exit(s);
                return error;
            }
        }

        return EOPNOTSUPP;        /* just for safety */
}

/*
 * Initialize an interface's internet6 address
 * and routing table entry.
 */
static int
in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, 
        const struct sockaddr_in6 *sin6, int newhost)
{
        int        error = 0, ifacount = 0;
        int s;
        struct ifaddr *ifa;

        KASSERT(mutex_owned(&in6_ifaddr_lock));

        /*
         * Give the interface a chance to initialize
         * if this is its first address,
         * and to validate the address if necessary.
         */
        s = pserialize_read_enter();
        IFADDR_READER_FOREACH(ifa, ifp) {
                if (ifa->ifa_addr->sa_family != AF_INET6)
                        continue;
                ifacount++;
        }
        pserialize_read_exit(s);

        ia->ia_addr = *sin6;

        if (ifacount == 0 &&
            (error = if_addr_init(ifp, &ia->ia_ifa, true)) != 0) {
                return error;
        }

        ia->ia_ifa.ifa_metric = ifp->if_metric;

        /* we could do in(6)_socktrim here, but just omit it at this moment. */

        /* Add ownaddr as loopback rtentry, if necessary (ex. on p2p link). */
        if (newhost) {
                /* set the rtrequest function to create llinfo */
                if (ifp->if_flags & IFF_POINTOPOINT)
                        ia->ia_ifa.ifa_rtrequest = p2p_rtrequest;
                else if ((ifp->if_flags & IFF_LOOPBACK) == 0)
                        ia->ia_ifa.ifa_rtrequest = nd6_rtrequest;
                in6_ifaddlocal(&ia->ia_ifa);
        } else {
                /* Inform the routing socket of new flags/timings */
                rt_addrmsg(RTM_NEWADDR, &ia->ia_ifa);
        }

        /* Add the network prefix route. */
        if ((error = in6_ifaddprefix(ia)) != 0) {
                if (newhost)
                        in6_ifremlocal(&ia->ia_ifa);
                return error;
        }

        return error;
}

static struct ifaddr *
bestifa(struct ifaddr *best_ifa, struct ifaddr *ifa)
{
        if (best_ifa == NULL || best_ifa->ifa_preference < ifa->ifa_preference)
                return ifa;
        return best_ifa;
}

/*
 * Find an IPv6 interface link-local address specific to an interface.
 */
struct in6_ifaddr *
in6ifa_ifpforlinklocal(const struct ifnet *ifp, const int ignoreflags)
{
        struct ifaddr *best_ifa = NULL, *ifa;

        IFADDR_READER_FOREACH(ifa, ifp) {
                if (ifa->ifa_addr->sa_family != AF_INET6)
                        continue;
                if (!IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa)))
                        continue;
                if ((((struct in6_ifaddr *)ifa)->ia6_flags & ignoreflags) != 0)
                        continue;
                best_ifa = bestifa(best_ifa, ifa);
        }

        return (struct in6_ifaddr *)best_ifa;
}

struct in6_ifaddr *
in6ifa_ifpforlinklocal_psref(const struct ifnet *ifp, const int ignoreflags,
    struct psref *psref)
{
        struct in6_ifaddr *ia;
        int s = pserialize_read_enter();

        ia = in6ifa_ifpforlinklocal(ifp, ignoreflags);
        if (ia != NULL)
                ia6_acquire(ia, psref);
        pserialize_read_exit(s);

        return ia;
}

/*
 * find the internet address corresponding to a given address.
 * ifaddr is returned referenced.
 */
struct in6_ifaddr *
in6ifa_ifwithaddr(const struct in6_addr *addr, uint32_t zoneid)
{
        struct in6_ifaddr *ia;
        int s;

        s = pserialize_read_enter();
        IN6_ADDRLIST_READER_FOREACH(ia) {
                if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), addr)) {
                        if (zoneid != 0 &&
                            zoneid != ia->ia_addr.sin6_scope_id)
                                continue;
                        ifaref(&ia->ia_ifa);
                        break;
                }
        }
        pserialize_read_exit(s);

        return ia;
}

/*
 * find the internet address corresponding to a given interface and address.
 */
struct in6_ifaddr *
in6ifa_ifpwithaddr(const struct ifnet *ifp, const struct in6_addr *addr)
{
        struct ifaddr *best_ifa = NULL, *ifa;

        IFADDR_READER_FOREACH(ifa, ifp) {
                if (ifa->ifa_addr->sa_family != AF_INET6)
                        continue;
                if (!IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa)))
                        continue;
                best_ifa = bestifa(best_ifa, ifa);
        }

        return (struct in6_ifaddr *)best_ifa;
}

struct in6_ifaddr *
in6ifa_ifpwithaddr_psref(const struct ifnet *ifp, const struct in6_addr *addr,
    struct psref *psref)
{
        struct in6_ifaddr *ia;
        int s = pserialize_read_enter();

        ia = in6ifa_ifpwithaddr(ifp, addr);
        if (ia != NULL)
                ia6_acquire(ia, psref);
        pserialize_read_exit(s);

        return ia;
}

static struct in6_ifaddr *
bestia(struct in6_ifaddr *best_ia, struct in6_ifaddr *ia)
{
        if (best_ia == NULL ||
            best_ia->ia_ifa.ifa_preference < ia->ia_ifa.ifa_preference)
                return ia;
        return best_ia;
}

/*
 * Determine if an address is on a local network.
 */
int
in6_localaddr(const struct in6_addr *in6)
{
        struct in6_ifaddr *ia;
        int s;

        if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6))
                return 1;

        s = pserialize_read_enter();
        IN6_ADDRLIST_READER_FOREACH(ia) {
                if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr,
                                              &ia->ia_prefixmask.sin6_addr)) {
                        pserialize_read_exit(s);
                        return 1;
                }
        }
        pserialize_read_exit(s);

        return 0;
}

int
in6_is_addr_deprecated(struct sockaddr_in6 *sa6)
{
        struct in6_ifaddr *ia;
        int s;

        s = pserialize_read_enter();
        IN6_ADDRLIST_READER_FOREACH(ia) {
                if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
                    &sa6->sin6_addr) &&
#ifdef SCOPEDROUTING
                    ia->ia_addr.sin6_scope_id == sa6->sin6_scope_id &&
#endif
                    (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) {
                        pserialize_read_exit(s);
                        return 1; /* true */
                }

                /* XXX: do we still have to go thru the rest of the list? */
        }
        pserialize_read_exit(s);

        return 0;                /* false */
}

/*
 * return length of part which dst and src are equal
 * hard coding...
 */
int
in6_matchlen(struct in6_addr *src, struct in6_addr *dst)
{
        int match = 0;
        u_char *s = (u_char *)src, *d = (u_char *)dst;
        u_char *lim = s + 16, r;

        while (s < lim)
                if ((r = (*d++ ^ *s++)) != 0) {
                        while (r < 128) {
                                match++;
                                r <<= 1;
                        }
                        break;
                } else
                        match += NBBY;
        return match;
}

void
in6_prefixlen2mask(struct in6_addr *maskp, int len)
{
        static const u_char maskarray[NBBY] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
        int bytelen, bitlen, i;

        /* sanity check */
        if (len < 0 || len > 128) {
                log(LOG_ERR, "in6_prefixlen2mask: invalid prefix length(%d)\n",
                    len);
                return;
        }

        memset(maskp, 0, sizeof(*maskp));
        bytelen = len / NBBY;
        bitlen = len % NBBY;
        for (i = 0; i < bytelen; i++)
                maskp->s6_addr[i] = 0xff;
        if (bitlen)
                maskp->s6_addr[bytelen] = maskarray[bitlen - 1];
}

/*
 * return the best address out of the same scope. if no address was
 * found, return the first valid address from designated IF.
 */
struct in6_ifaddr *
in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst)
{
        int dst_scope =        in6_addrscope(dst), blen = -1, tlen;
        struct ifaddr *ifa;
        struct in6_ifaddr *best_ia = NULL, *ia;
        struct in6_ifaddr *dep[2];        /* last-resort: deprecated */

        dep[0] = dep[1] = NULL;

        /*
         * We first look for addresses in the same scope.
         * If there is one, return it.
         * If two or more, return one which matches the dst longest.
         * If none, return one of global addresses assigned other ifs.
         */
        IFADDR_READER_FOREACH(ifa, ifp) {
                if (ifa->ifa_addr->sa_family != AF_INET6)
                        continue;
                ia = (struct in6_ifaddr *)ifa;
                if (ia->ia6_flags & IN6_IFF_ANYCAST)
                        continue; /* XXX: is there any case to allow anycast? */
                if (ia->ia6_flags & IN6_IFF_NOTREADY)
                        continue; /* don't use this interface */
                if (ia->ia6_flags & IN6_IFF_DETACHED)
                        continue;
                if (ia->ia6_flags & IN6_IFF_DEPRECATED) {
                        if (ip6_use_deprecated)
                                dep[0] = ia;
                        continue;
                }

                if (dst_scope != in6_addrscope(IFA_IN6(ifa)))
                        continue;
                /*
                 * call in6_matchlen() as few as possible
                 */
                if (best_ia == NULL) {
                        best_ia = ia;
                        continue;
                }
                if (blen == -1)
                        blen = in6_matchlen(&best_ia->ia_addr.sin6_addr, dst);
                tlen = in6_matchlen(IFA_IN6(ifa), dst);
                if (tlen > blen) {
                        blen = tlen;
                        best_ia = ia;
                } else if (tlen == blen)
                        best_ia = bestia(best_ia, ia);
        }
        if (best_ia != NULL)
                return best_ia;

        IFADDR_READER_FOREACH(ifa, ifp) {
                if (ifa->ifa_addr->sa_family != AF_INET6)
                        continue;
                ia = (struct in6_ifaddr *)ifa;
                if (ia->ia6_flags & IN6_IFF_ANYCAST)
                        continue; /* XXX: is there any case to allow anycast? */
                if (ia->ia6_flags & IN6_IFF_NOTREADY)
                        continue; /* don't use this interface */
                if (ia->ia6_flags & IN6_IFF_DETACHED)
                        continue;
                if (ia->ia6_flags & IN6_IFF_DEPRECATED) {
                        if (ip6_use_deprecated)
                                dep[1] = (struct in6_ifaddr *)ifa;
                        continue;
                }

                best_ia = bestia(best_ia, ia);
        }
        if (best_ia != NULL)
                return best_ia;

        /* use the last-resort values, that are, deprecated addresses */
        if (dep[0])
                return dep[0];
        if (dep[1])
                return dep[1];

        return NULL;
}

/*
 * perform DAD when interface becomes IFF_UP.
 */
void
in6_if_link_up(struct ifnet *ifp)
{
        struct ifaddr *ifa;
        struct in6_ifaddr *ia;
        int s, bound;
        char ip6buf[INET6_ADDRSTRLEN];

        /* Ensure it's sane to run DAD */
        if (ifp->if_link_state == LINK_STATE_DOWN)
                return;
        if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
                return;

        bound = curlwp_bind();
        s = pserialize_read_enter();
        IFADDR_READER_FOREACH(ifa, ifp) {
                struct psref psref;

                if (ifa->ifa_addr->sa_family != AF_INET6)
                        continue;

                ifa_acquire(ifa, &psref);
                pserialize_read_exit(s);
                ia = (struct in6_ifaddr *)ifa;

                /* If detached then mark as tentative */
                if (ia->ia6_flags & IN6_IFF_DETACHED) {
                        ia->ia6_flags &= ~IN6_IFF_DETACHED;
                        if (ip6_dad_enabled() && if_do_dad(ifp)) {
                                ia->ia6_flags |= IN6_IFF_TENTATIVE;
                                nd6log(LOG_ERR, "%s marked tentative\n",
                                    IN6_PRINT(ip6buf,
                                    &ia->ia_addr.sin6_addr));
                        } else if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0)
                                rt_addrmsg(RTM_NEWADDR, ifa);
                }

                if (ia->ia6_flags & IN6_IFF_TENTATIVE) {
                        int rand_delay;

                        /* Clear the duplicated flag as we're starting DAD. */
                        ia->ia6_flags &= ~IN6_IFF_DUPLICATED;

                        /*
                         * The TENTATIVE flag was likely set by hand
                         * beforehand, implicitly indicating the need for DAD.
                         * We may be able to skip the random delay in this
                         * case, but we impose delays just in case.
                         */
                        rand_delay = cprng_fast32() %
                            (MAX_RTR_SOLICITATION_DELAY * hz);
                        /* +1 ensures callout is always used */
                        nd6_dad_start(ifa, rand_delay + 1);
                }

                s = pserialize_read_enter();
                ifa_release(ifa, &psref);
        }
        pserialize_read_exit(s);
        curlwp_bindx(bound);
}

void
in6_if_up(struct ifnet *ifp)
{

        /*
         * special cases, like 6to4, are handled in in6_ifattach
         */
        in6_ifattach(ifp, NULL);

        /* interface may not support link state, so bring it up also */
        in6_if_link_up(ifp);
}

/*
 * Mark all addresses as detached.
 */
void
in6_if_link_down(struct ifnet *ifp)
{
        struct ifaddr *ifa;
        struct in6_ifaddr *ia;
        int s, bound;
        char ip6buf[INET6_ADDRSTRLEN];

        bound = curlwp_bind();
        s = pserialize_read_enter();
        IFADDR_READER_FOREACH(ifa, ifp) {
                struct psref psref;

                if (ifa->ifa_addr->sa_family != AF_INET6)
                        continue;

                ifa_acquire(ifa, &psref);
                pserialize_read_exit(s);
                ia = (struct in6_ifaddr *)ifa;

                /* Stop DAD processing */
                nd6_dad_stop(ifa);

                /*
                 * Mark the address as detached.
                 * This satisfies RFC4862 Section 5.3, but we should apply
                 * this logic to all addresses to be a good citizen and
                 * avoid potential duplicated addresses.
                 * When the interface comes up again, detached addresses
                 * are marked tentative and DAD commences.
                 */
                if (!(ia->ia6_flags & IN6_IFF_DETACHED)) {
                        nd6log(LOG_DEBUG, "%s marked detached\n",
                            IN6_PRINT(ip6buf, &ia->ia_addr.sin6_addr));
                        ia->ia6_flags |= IN6_IFF_DETACHED;
                        ia->ia6_flags &=
                            ~(IN6_IFF_TENTATIVE | IN6_IFF_DUPLICATED);
                        rt_addrmsg(RTM_NEWADDR, ifa);
                }

                s = pserialize_read_enter();
                ifa_release(ifa, &psref);
        }
        pserialize_read_exit(s);
        curlwp_bindx(bound);
}

void
in6_if_down(struct ifnet *ifp)
{

        in6_if_link_down(ifp);
        lltable_purge_entries(LLTABLE6(ifp));
}

void
in6_if_link_state_change(struct ifnet *ifp, int link_state)
{

        /*
         * Treat LINK_STATE_UNKNOWN as UP.
         * LINK_STATE_UNKNOWN transitions to LINK_STATE_DOWN when
         * if_link_state_change() transitions to LINK_STATE_UP.
         */
        if (link_state == LINK_STATE_DOWN)
                in6_if_link_down(ifp);
        else
                in6_if_link_up(ifp);
}

int
in6_tunnel_validate(const struct ip6_hdr *ip6, const struct in6_addr *src,
    const struct in6_addr *dst)
{

        /* check for address match */
        if (!IN6_ARE_ADDR_EQUAL(src, &ip6->ip6_dst) ||
            !IN6_ARE_ADDR_EQUAL(dst, &ip6->ip6_src))
                return 0;

        /* martian filters on outer source - done in ip6_input */

        /* NOTE: the packet may be dropped by uRPF. */

        /* return valid bytes length */
        return sizeof(*src) + sizeof(*dst);
}

#define        IN6_LLTBL_DEFAULT_HSIZE        32
#define        IN6_LLTBL_HASH(k, h) \
        (((((((k >> 8) ^ k) >> 8) ^ k) >> 8) ^ k) & ((h) - 1))

/*
 * Do actual deallocation of @lle.
 * Called by LLE_FREE_LOCKED when number of references
 * drops to zero.
 */
static void
in6_lltable_destroy_lle(struct llentry *lle)
{

        KASSERTMSG(lle->la_numheld == 0, "la_numheld=%d", lle->la_numheld);

        LLE_WUNLOCK(lle);
        LLE_LOCK_DESTROY(lle);
        llentry_pool_put(lle);
}

static struct llentry *
in6_lltable_new(const struct in6_addr *addr6, u_int flags)
{
        struct llentry *lle;

        lle = llentry_pool_get(PR_NOWAIT);
        if (lle == NULL)                /* NB: caller generates msg */
                return NULL;

        lle->r_l3addr.addr6 = *addr6;
        lle->lle_refcnt = 1;
        lle->lle_free = in6_lltable_destroy_lle;
        LLE_LOCK_INIT(lle);
        callout_init(&lle->lle_timer, CALLOUT_MPSAFE);

        return lle;
}

static int
in6_lltable_match_prefix(const struct sockaddr *prefix,
    const struct sockaddr *mask, u_int flags, struct llentry *lle)
{
        const struct sockaddr_in6 *pfx = (const struct sockaddr_in6 *)prefix;
        const struct sockaddr_in6 *msk = (const struct sockaddr_in6 *)mask;

        if (IN6_ARE_MASKED_ADDR_EQUAL(&lle->r_l3addr.addr6,
            &pfx->sin6_addr, &msk->sin6_addr) &&
            ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC)))
                return 1;

        return 0;
}

static void
in6_lltable_free_entry(struct lltable *llt, struct llentry *lle)
{

        LLE_WLOCK_ASSERT(lle);
        (void) llentry_free(lle);
}

static int
in6_lltable_rtcheck(struct ifnet *ifp, u_int flags,
    const struct sockaddr *l3addr, const struct rtentry *rt)
{
        char ip6buf[INET6_ADDRSTRLEN];

        if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) {
                int s;
                struct ifaddr *ifa;
                /*
                 * Create an ND6 cache for an IPv6 neighbor
                 * that is not covered by our own prefix.
                 */
                /* XXX ifaof_ifpforaddr should take a const param */
                s = pserialize_read_enter();
                ifa = ifaof_ifpforaddr(l3addr, ifp);
                if (ifa != NULL) {
                        pserialize_read_exit(s);
                        return 0;
                }
                pserialize_read_exit(s);
                log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n",
                    IN6_PRINT(ip6buf,
                    &((const struct sockaddr_in6 *)l3addr)->sin6_addr));
                return EINVAL;
        }
        return 0;
}

static inline uint32_t
in6_lltable_hash_dst(const struct in6_addr *dst, uint32_t hsize)
{

        return IN6_LLTBL_HASH(dst->s6_addr32[3], hsize);
}

static uint32_t
in6_lltable_hash(const struct llentry *lle, uint32_t hsize)
{

        return in6_lltable_hash_dst(&lle->r_l3addr.addr6, hsize);
}

static void
in6_lltable_fill_sa_entry(const struct llentry *lle, struct sockaddr *sa)
{
        struct sockaddr_in6 *sin6;

        sin6 = (struct sockaddr_in6 *)sa;
        bzero(sin6, sizeof(*sin6));
        sin6->sin6_family = AF_INET6;
        sin6->sin6_len = sizeof(*sin6);
        sin6->sin6_addr = lle->r_l3addr.addr6;
}

static inline struct llentry *
in6_lltable_find_dst(struct lltable *llt, const struct in6_addr *dst)
{
        struct llentry *lle;
        struct llentries *lleh;
        u_int hashidx;

        hashidx = in6_lltable_hash_dst(dst, llt->llt_hsize);
        lleh = &llt->lle_head[hashidx];
        LIST_FOREACH(lle, lleh, lle_next) {
                if (lle->la_flags & LLE_DELETED)
                        continue;
                if (IN6_ARE_ADDR_EQUAL(&lle->r_l3addr.addr6, dst))
                        break;
        }

        return lle;
}

static int
in6_lltable_delete(struct lltable *llt, u_int flags,
        const struct sockaddr *l3addr)
{
        const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
        struct llentry *lle;

        IF_AFDATA_WLOCK_ASSERT(llt->llt_ifp);
        KASSERTMSG(l3addr->sa_family == AF_INET6,
            "sin_family %d", l3addr->sa_family);

        lle = in6_lltable_find_dst(llt, &sin6->sin6_addr);

        if (lle == NULL) {
#ifdef LLTABLE_DEBUG
                char buf[64];
                sockaddr_format(l3addr, buf, sizeof(buf));
                log(LOG_INFO, "%s: cache for %s is not found\n",
                    __func__, buf);
#endif
                return ENOENT;
        }

        LLE_WLOCK(lle);
#ifdef LLTABLE_DEBUG
        {
                char buf[64];
                sockaddr_format(l3addr, buf, sizeof(buf));
                log(LOG_INFO, "%s: cache for %s (%p) is deleted\n",
                    __func__, buf, lle);
        }
#endif
        llentry_free(lle);

        return 0;
}

static struct llentry *
in6_lltable_create(struct lltable *llt, u_int flags,
    const struct sockaddr *l3addr, const struct rtentry *rt)
{
        const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
        struct ifnet *ifp = llt->llt_ifp;
        struct llentry *lle;

        IF_AFDATA_WLOCK_ASSERT(ifp);
        KASSERTMSG(l3addr->sa_family == AF_INET6,
            "sin_family %d", l3addr->sa_family);

        lle = in6_lltable_find_dst(llt, &sin6->sin6_addr);

        if (lle != NULL) {
                LLE_WLOCK(lle);
                return lle;
        }

        /*
         * A route that covers the given address must have
         * been installed 1st because we are doing a resolution,
         * verify this.
         */
        if (!(flags & LLE_IFADDR) &&
            in6_lltable_rtcheck(ifp, flags, l3addr, rt) != 0)
                return NULL;

        lle = in6_lltable_new(&sin6->sin6_addr, flags);
        if (lle == NULL) {
                log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
                return NULL;
        }
        lle->la_flags = flags;
        if ((flags & LLE_IFADDR) == LLE_IFADDR) {
                memcpy(&lle->ll_addr, CLLADDR(ifp->if_sadl), ifp->if_addrlen);
                lle->la_flags |= LLE_VALID;
        }

        lltable_link_entry(llt, lle);
        LLE_WLOCK(lle);

        return lle;
}

static struct llentry *
in6_lltable_lookup(struct lltable *llt, u_int flags,
        const struct sockaddr *l3addr)
{
        const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
        struct llentry *lle;

        IF_AFDATA_LOCK_ASSERT(llt->llt_ifp);
        KASSERTMSG(l3addr->sa_family == AF_INET6,
            "sin_family %d", l3addr->sa_family);

        lle = in6_lltable_find_dst(llt, &sin6->sin6_addr);

        if (lle == NULL)
                return NULL;

        if (flags & LLE_EXCLUSIVE)
                LLE_WLOCK(lle);
        else
                LLE_RLOCK(lle);
        return lle;
}

static int
in6_lltable_dump_entry(struct lltable *llt, struct llentry *lle,
    struct rt_walkarg *w)
{
        struct sockaddr_in6 sin6;

        LLTABLE_LOCK_ASSERT();

        /* skip deleted entries */
        if (lle->la_flags & LLE_DELETED)
                return 0;

        sockaddr_in6_init(&sin6, &lle->r_l3addr.addr6, 0, 0, 0);

        return lltable_dump_entry(llt, lle, w, sin6tosa(&sin6));
}

static struct lltable *
in6_lltattach(struct ifnet *ifp)
{
        struct lltable *llt;

        llt = lltable_allocate_htbl(IN6_LLTBL_DEFAULT_HSIZE);
        llt->llt_af = AF_INET6;
        llt->llt_ifp = ifp;

        llt->llt_lookup = in6_lltable_lookup;
        llt->llt_create = in6_lltable_create;
        llt->llt_delete = in6_lltable_delete;
        llt->llt_dump_entry = in6_lltable_dump_entry;
        llt->llt_hash = in6_lltable_hash;
        llt->llt_fill_sa_entry = in6_lltable_fill_sa_entry;
        llt->llt_free_entry = in6_lltable_free_entry;
        llt->llt_match_prefix = in6_lltable_match_prefix;
        lltable_link(llt);

        return llt;
}

void *
in6_domifattach(struct ifnet *ifp)
{
        struct in6_ifextra *ext;

        ext = malloc(sizeof(*ext), M_IFADDR, M_WAITOK|M_ZERO);

        ext->in6_ifstat = malloc(sizeof(struct in6_ifstat),
            M_IFADDR, M_WAITOK|M_ZERO);

        ext->icmp6_ifstat = malloc(sizeof(struct icmp6_ifstat),
            M_IFADDR, M_WAITOK|M_ZERO);

        ext->nd_ifinfo = nd6_ifattach(ifp);
        ext->scope6_id = scope6_ifattach(ifp);
        ext->lltable = in6_lltattach(ifp);

        return ext;
}

void
in6_domifdetach(struct ifnet *ifp, void *aux)
{
        struct in6_ifextra *ext = (struct in6_ifextra *)aux;

        lltable_free(ext->lltable);
        ext->lltable = NULL;
        SOFTNET_LOCK_UNLESS_NET_MPSAFE();
        nd6_ifdetach(ifp, ext);
        SOFTNET_UNLOCK_UNLESS_NET_MPSAFE();
        free(ext->in6_ifstat, M_IFADDR);
        free(ext->icmp6_ifstat, M_IFADDR);
        scope6_ifdetach(ext->scope6_id);
        free(ext, M_IFADDR);
}

/*
 * Convert IPv4 address stored in struct in_addr to IPv4-Mapped IPv6 address
 * stored in struct in6_addr as defined in RFC 4921 section 2.5.5.2.
 */
void
in6_in_2_v4mapin6(const struct in_addr *in, struct in6_addr *in6)
{
        in6->s6_addr32[0] = 0;
        in6->s6_addr32[1] = 0;
        in6->s6_addr32[2] = IPV6_ADDR_INT32_SMP;
        in6->s6_addr32[3] = in->s_addr;
}

/*
 * Convert sockaddr_in6 to sockaddr_in.  Original sockaddr_in6 must be
 * v4 mapped addr or v4 compat addr
 */
void
in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
{
        memset(sin, 0, sizeof(*sin));
        sin->sin_len = sizeof(struct sockaddr_in);
        sin->sin_family = AF_INET;
        sin->sin_port = sin6->sin6_port;
        sin->sin_addr.s_addr = sin6->sin6_addr.s6_addr32[3];
}

/* Convert sockaddr_in to sockaddr_in6 in v4 mapped addr format. */
void
in6_sin_2_v4mapsin6(const struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
{
        memset(sin6, 0, sizeof(*sin6));
        sin6->sin6_len = sizeof(struct sockaddr_in6);
        sin6->sin6_family = AF_INET6;
        sin6->sin6_port = sin->sin_port;
        in6_in_2_v4mapin6(&sin->sin_addr, &sin6->sin6_addr);
}

/* Convert sockaddr_in6 into sockaddr_in. */
void
in6_sin6_2_sin_in_sock(struct sockaddr *nam)
{
        struct sockaddr_in *sin_p;
        struct sockaddr_in6 sin6;

        /*
         * Save original sockaddr_in6 addr and convert it
         * to sockaddr_in.
         */
        sin6 = *(struct sockaddr_in6 *)nam;
        sin_p = (struct sockaddr_in *)nam;
        in6_sin6_2_sin(sin_p, &sin6);
}

/* Convert sockaddr_in into sockaddr_in6 in v4 mapped addr format. */
void
in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam)
{
        struct sockaddr_in *sin_p;
        struct sockaddr_in6 *sin6_p;

        sin6_p = malloc(sizeof(*sin6_p), M_SONAME, M_WAITOK);
        sin_p = (struct sockaddr_in *)*nam;
        in6_sin_2_v4mapsin6(sin_p, sin6_p);
        free(*nam, M_SONAME);
        *nam = sin6tosa(sin6_p);
}




































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
/*        $NetBSD: drm_file.h,v 1.8 2021/12/19 12:23:42 riastradh Exp $        */

/*
 * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
 * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
 * Copyright (c) 2009-2010, Code Aurora Forum.
 * All rights reserved.
 *
 * Author: Rickard E. (Rik) Faith <faith@valinux.com>
 * Author: Gareth Hughes <gareth@valinux.com>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef _DRM_FILE_H_
#define _DRM_FILE_H_

#include <linux/types.h>
#include <linux/completion.h>
#include <linux/idr.h>

#include <uapi/drm/drm.h>

#include <drm/drm_prime.h>

#ifdef __NetBSD__                /* XXX */
#include <drm/drm_wait_netbsd.h>
#endif

struct dma_fence;
struct drm_file;
struct drm_device;
struct device;
struct file;

/*
 * FIXME: Not sure we want to have drm_minor here in the end, but to avoid
 * header include loops we need it here for now.
 */

/* Note that the order of this enum is ABI (it determines
 * /dev/dri/renderD* numbers).
 */
enum drm_minor_type {
        DRM_MINOR_PRIMARY,
        DRM_MINOR_CONTROL,
        DRM_MINOR_RENDER,
};

/**
 * struct drm_minor - DRM device minor structure
 *
 * This structure represents a DRM minor number for device nodes in /dev.
 * Entirely opaque to drivers and should never be inspected directly by drivers.
 * Drivers instead should only interact with &struct drm_file and of course
 * &struct drm_device, which is also where driver-private data and resources can
 * be attached to.
 */
struct drm_minor {
        /* private: */
        int index;                        /* Minor device number */
        int type;                       /* Control or render */
        struct device *kdev;                /* Linux device */
        struct drm_device *dev;

#ifndef __NetBSD__                /* XXX debugfs */
        struct dentry *debugfs_root;

        struct list_head debugfs_list;
        struct mutex debugfs_lock; /* Protects debugfs_list. */
#endif
};

/**
 * struct drm_pending_event - Event queued up for userspace to read
 *
 * This represents a DRM event. Drivers can use this as a generic completion
 * mechanism, which supports kernel-internal &struct completion, &struct dma_fence
 * and also the DRM-specific &struct drm_event delivery mechanism.
 */
struct drm_pending_event {
        /**
         * @completion:
         *
         * Optional pointer to a kernel internal completion signalled when
         * drm_send_event() is called, useful to internally synchronize with
         * nonblocking operations.
         */
        struct completion *completion;

        /**
         * @completion_release:
         *
         * Optional callback currently only used by the atomic modeset helpers
         * to clean up the reference count for the structure @completion is
         * stored in.
         */
        void (*completion_release)(struct completion *completion);

        /**
         * @event:
         *
         * Pointer to the actual event that should be sent to userspace to be
         * read using drm_read(). Can be optional, since nowadays events are
         * also used to signal kernel internal threads with @completion or DMA
         * transactions using @fence.
         */
        struct drm_event *event;

        /**
         * @fence:
         *
         * Optional DMA fence to unblock other hardware transactions which
         * depend upon the nonblocking DRM operation this event represents.
         */
        struct dma_fence *fence;

        /**
         * @file_priv:
         *
         * &struct drm_file where @event should be delivered to. Only set when
         * @event is set.
         */
        struct drm_file *file_priv;

        /**
         * @link:
         *
         * Double-linked list to keep track of this event. Can be used by the
         * driver up to the point when it calls drm_send_event(), after that
         * this list entry is owned by the core for its own book-keeping.
         */
        struct list_head link;

        /**
         * @pending_link:
         *
         * Entry on &drm_file.pending_event_list, to keep track of all pending
         * events for @file_priv, to allow correct unwinding of them when
         * userspace closes the file before the event is delivered.
         */
        struct list_head pending_link;
};

/**
 * struct drm_file - DRM file private data
 *
 * This structure tracks DRM state per open file descriptor.
 */
struct drm_file {
        /**
         * @authenticated:
         *
         * Whether the client is allowed to submit rendering, which for legacy
         * nodes means it must be authenticated.
         *
         * See also the :ref:`section on primary nodes and authentication
         * <drm_primary_node>`.
         */
        bool authenticated;

        /**
         * @stereo_allowed:
         *
         * True when the client has asked us to expose stereo 3D mode flags.
         */
        bool stereo_allowed;

        /**
         * @universal_planes:
         *
         * True if client understands CRTC primary planes and cursor planes
         * in the plane list. Automatically set when @atomic is set.
         */
        bool universal_planes;

        /** @atomic: True if client understands atomic properties. */
        bool atomic;

        /**
         * @aspect_ratio_allowed:
         *
         * True, if client can handle picture aspect ratios, and has requested
         * to pass this information along with the mode.
         */
        bool aspect_ratio_allowed;

        /**
         * @writeback_connectors:
         *
         * True if client understands writeback connectors
         */
        bool writeback_connectors;

        /**
         * @is_master:
         *
         * This client is the creator of @master. Protected by struct
         * &drm_device.master_mutex.
         *
         * See also the :ref:`section on primary nodes and authentication
         * <drm_primary_node>`.
         */
        bool is_master;

        /**
         * @master:
         *
         * Master this node is currently associated with. Only relevant if
         * drm_is_primary_client() returns true. Note that this only
         * matches &drm_device.master if the master is the currently active one.
         *
         * See also @authentication and @is_master and the :ref:`section on
         * primary nodes and authentication <drm_primary_node>`.
         */
        struct drm_master *master;

#ifndef __NetBSD__
        /** @pid: Process that opened this file. */
        struct pid *pid;
#endif

        /** @magic: Authentication magic, see @authenticated. */
        drm_magic_t magic;

        /**
         * @lhead:
         *
         * List of all open files of a DRM device, linked into
         * &drm_device.filelist. Protected by &drm_device.filelist_mutex.
         */
        struct list_head lhead;

        /** @minor: &struct drm_minor for this file. */
        struct drm_minor *minor;

        /**
         * @object_idr:
         *
         * Mapping of mm object handles to object pointers. Used by the GEM
         * subsystem. Protected by @table_lock.
         */
        struct idr object_idr;

        /** @table_lock: Protects @object_idr. */
        spinlock_t table_lock;

        /** @syncobj_idr: Mapping of sync object handles to object pointers. */
        struct idr syncobj_idr;
        /** @syncobj_table_lock: Protects @syncobj_idr. */
        spinlock_t syncobj_table_lock;

        /** @filp: Pointer to the core file structure. */
        struct file *filp;

        /**
         * @driver_priv:
         *
         * Optional pointer for driver private data. Can be allocated in
         * &drm_driver.open and should be freed in &drm_driver.postclose.
         */
        void *driver_priv;

        /**
         * @fbs:
         *
         * List of &struct drm_framebuffer associated with this file, using the
         * &drm_framebuffer.filp_head entry.
         *
         * Protected by @fbs_lock. Note that the @fbs list holds a reference on
         * the framebuffer object to prevent it from untimely disappearing.
         */
        struct list_head fbs;

        /** @fbs_lock: Protects @fbs. */
        struct mutex fbs_lock;

        /**
         * @blobs:
         *
         * User-created blob properties; this retains a reference on the
         * property.
         *
         * Protected by @drm_mode_config.blob_lock;
         */
        struct list_head blobs;

        /** @event_wait: Waitqueue for new events added to @event_list. */
#ifdef __NetBSD__
        drm_waitqueue_t event_wait;
        struct selinfo event_selq;
#else
        wait_queue_head_t event_wait;
#endif

        /**
         * @pending_event_list:
         *
         * List of pending &struct drm_pending_event, used to clean up pending
         * events in case this file gets closed before the event is signalled.
         * Uses the &drm_pending_event.pending_link entry.
         *
         * Protect by &drm_device.event_lock.
         */
        struct list_head pending_event_list;

        /**
         * @event_list:
         *
         * List of &struct drm_pending_event, ready for delivery to userspace
         * through drm_read(). Uses the &drm_pending_event.link entry.
         *
         * Protect by &drm_device.event_lock.
         */
        struct list_head event_list;

        /**
         * @event_space:
         *
         * Available event space to prevent userspace from
         * exhausting kernel memory. Currently limited to the fairly arbitrary
         * value of 4KB.
         */
        int event_space;

        /** @event_read_lock: Serializes drm_read(). */
#ifdef __NetBSD__
        struct lwp *event_read_lock;
        drm_waitqueue_t event_read_wq;
#else
        struct mutex event_read_lock;
#endif

        /**
         * @prime:
         *
         * Per-file buffer caches used by the PRIME buffer sharing code.
         */
        struct drm_prime_file_private prime;

        /* private: */
#if IS_ENABLED(CONFIG_DRM_LEGACY)
        unsigned long lock_count; /* DRI1 legacy lock count */
#endif
};

/**
 * drm_is_primary_client - is this an open file of the primary node
 * @file_priv: DRM file
 *
 * Returns true if this is an open file of the primary node, i.e.
 * &drm_file.minor of @file_priv is a primary minor.
 *
 * See also the :ref:`section on primary nodes and authentication
 * <drm_primary_node>`.
 */
static inline bool drm_is_primary_client(const struct drm_file *file_priv)
{
        return file_priv->minor->type == DRM_MINOR_PRIMARY;
}

/**
 * drm_is_render_client - is this an open file of the render node
 * @file_priv: DRM file
 *
 * Returns true if this is an open file of the render node, i.e.
 * &drm_file.minor of @file_priv is a render minor.
 *
 * See also the :ref:`section on render nodes <drm_render_node>`.
 */
static inline bool drm_is_render_client(const struct drm_file *file_priv)
{
        return file_priv->minor->type == DRM_MINOR_RENDER;
}

#ifdef __NetBSD__
extern const struct fileops drm_fileops;
int drm_open_file(struct drm_file *, void *, struct drm_minor *);
void drm_close_file(struct drm_file *);
#else
int drm_open(struct inode *inode, struct file *filp);
ssize_t drm_read(struct file *filp, char __user *buffer,
                 size_t count, loff_t *offset);
int drm_release(struct inode *inode, struct file *filp);
__poll_t drm_poll(struct file *filp, struct poll_table_struct *wait);
#endif
int drm_event_reserve_init_locked(struct drm_device *dev,
                                  struct drm_file *file_priv,
                                  struct drm_pending_event *p,
                                  struct drm_event *e);
int drm_event_reserve_init(struct drm_device *dev,
                           struct drm_file *file_priv,
                           struct drm_pending_event *p,
                           struct drm_event *e);
void drm_event_cancel_free(struct drm_device *dev,
                           struct drm_pending_event *p);
void drm_send_event_locked(struct drm_device *dev, struct drm_pending_event *e);
void drm_send_event(struct drm_device *dev, struct drm_pending_event *e);

struct file *mock_drm_getfile(struct drm_minor *minor, unsigned int flags);

#endif /* _DRM_FILE_H_ */




























































































































































































































































































































































   22 

   22 

   22 



   22 
   22 








   21 









   22 








   22 
   22 







   22 

   22 





















    9 
    9 

















    9 




    9 


    9 

    9 






















  127 


  128 
  127 














  128 



  128 



  126 












   37 


   30 





   30 




   31 






























































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
/*        $NetBSD: kern_softint.c,v 1.70 2022/03/30 17:02:02 riastradh Exp $        */

/*-
 * Copyright (c) 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Generic software interrupt framework.
 *
 * Overview
 *
 *        The soft interrupt framework provides a mechanism to schedule a
 *        low priority callback that runs with thread context.  It allows
 *        for dynamic registration of software interrupts, and for fair
 *        queueing and prioritization of those interrupts.  The callbacks
 *        can be scheduled to run from nearly any point in the kernel: by
 *        code running with thread context, by code running from a
 *        hardware interrupt handler, and at any interrupt priority
 *        level.
 *
 * Priority levels
 *
 *        Since soft interrupt dispatch can be tied to the underlying
 *        architecture's interrupt dispatch code, it can be limited
 *        both by the capabilities of the hardware and the capabilities
 *        of the interrupt dispatch code itself.  The number of priority
 *        levels is restricted to four.  In order of priority (lowest to
 *        highest) the levels are: clock, bio, net, serial.
 *
 *        The names are symbolic and in isolation do not have any direct
 *        connection with a particular kind of device activity: they are
 *        only meant as a guide.
 *
 *        The four priority levels map directly to scheduler priority
 *        levels, and where the architecture implements 'fast' software
 *        interrupts, they also map onto interrupt priorities.  The
 *        interrupt priorities are intended to be hidden from machine
 *        independent code, which should use thread-safe mechanisms to
 *        synchronize with software interrupts (for example: mutexes).
 *
 * Capabilities
 *
 *        Software interrupts run with limited machine context.  In
 *        particular, they do not posess any address space context.  They
 *        should not try to operate on user space addresses, or to use
 *        virtual memory facilities other than those noted as interrupt
 *        safe.
 *
 *        Unlike hardware interrupts, software interrupts do have thread
 *        context.  They may block on synchronization objects, sleep, and
 *        resume execution at a later time.
 *
 *        Since software interrupts are a limited resource and run with
 *        higher priority than most other LWPs in the system, all
 *        block-and-resume activity by a software interrupt must be kept
 *        short to allow further processing at that level to continue.  By
 *        extension, code running with process context must take care to
 *        ensure that any lock that may be taken from a software interrupt
 *        can not be held for more than a short period of time.
 *
 *        The kernel does not allow software interrupts to use facilities
 *        or perform actions that may block for a significant amount of
 *        time.  This means that it's not valid for a software interrupt
 *        to sleep on condition variables        or wait for resources to become
 *        available (for example,        memory).
 *
 * Per-CPU operation
 *
 *        If a soft interrupt is triggered on a CPU, it can only be
 *        dispatched on the same CPU.  Each LWP dedicated to handling a
 *        soft interrupt is bound to its home CPU, so if the LWP blocks
 *        and needs to run again, it can only run there.  Nearly all data
 *        structures used to manage software interrupts are per-CPU.
 *
 *        The per-CPU requirement is intended to reduce "ping-pong" of
 *        cache lines between CPUs: lines occupied by data structures
 *        used to manage the soft interrupts, and lines occupied by data
 *        items being passed down to the soft interrupt.  As a positive
 *        side effect, this also means that the soft interrupt dispatch
 *        code does not need to to use spinlocks to synchronize.
 *
 * Generic implementation
 *
 *        A generic, low performance implementation is provided that
 *        works across all architectures, with no machine-dependent
 *        modifications needed.  This implementation uses the scheduler,
 *        and so has a number of restrictions:
 *
 *        1) The software interrupts are not currently preemptive, so
 *        must wait for the currently executing LWP to yield the CPU.
 *        This can introduce latency.
 *
 *        2) An expensive context switch is required for a software
 *        interrupt to be handled.
 *
 * 'Fast' software interrupts
 *
 *        If an architectures defines __HAVE_FAST_SOFTINTS, it implements
 *        the fast mechanism.  Threads running either in the kernel or in
 *        userspace will be interrupted, but will not be preempted.  When
 *        the soft interrupt completes execution, the interrupted LWP
 *        is resumed.  Interrupt dispatch code must provide the minimum
 *        level of context necessary for the soft interrupt to block and
 *        be resumed at a later time.  The machine-dependent dispatch
 *        path looks something like the following:
 *
 *        softintr()
 *        {
 *                go to IPL_HIGH if necessary for switch;
 *                save any necessary registers in a format that can be
 *                    restored by cpu_switchto if the softint blocks;
 *                arrange for cpu_switchto() to restore into the
 *                    trampoline function;
 *                identify LWP to handle this interrupt;
 *                switch to the LWP's stack;
 *                switch register stacks, if necessary;
 *                assign new value of curlwp;
 *                call MI softint_dispatch, passing old curlwp and IPL
 *                    to execute interrupt at;
 *                switch back to old stack;
 *                switch back to old register stack, if necessary;
 *                restore curlwp;
 *                return to interrupted LWP;
 *        }
 *
 *        If the soft interrupt blocks, a trampoline function is returned
 *        to in the context of the interrupted LWP, as arranged for by
 *        softint():
 *
 *        softint_ret()
 *        {
 *                unlock soft interrupt LWP;
 *                resume interrupt processing, likely returning to
 *                    interrupted LWP or dispatching another, different
 *                    interrupt;
 *        }
 *
 *        Once the soft interrupt has fired (and even if it has blocked),
 *        no further soft interrupts at that level will be triggered by
 *        MI code until the soft interrupt handler has ceased execution.
 *        If a soft interrupt handler blocks and is resumed, it resumes
 *        execution as a normal LWP (kthread) and gains VM context.  Only
 *        when it has completed and is ready to fire again will it
 *        interrupt other threads.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_softint.c,v 1.70 2022/03/30 17:02:02 riastradh Exp $");

#include <sys/param.h>
#include <sys/proc.h>
#include <sys/intr.h>
#include <sys/ipi.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/evcnt.h>
#include <sys/cpu.h>
#include <sys/xcall.h>

#include <net/netisr.h>

#include <uvm/uvm_extern.h>

/* This could overlap with signal info in struct lwp. */
typedef struct softint {
        SIMPLEQ_HEAD(, softhand) si_q;
        struct lwp                *si_lwp;
        struct cpu_info                *si_cpu;
        uintptr_t                si_machdep;
        struct evcnt                si_evcnt;
        struct evcnt                si_evcnt_block;
        volatile int                si_active;
        int                        si_ipl;
        char                        si_name[8];
        char                        si_name_block[8+6];
} softint_t;

typedef struct softhand {
        SIMPLEQ_ENTRY(softhand)        sh_q;
        void                        (*sh_func)(void *);
        void                        *sh_arg;
        softint_t                *sh_isr;
        u_int                        sh_flags;
        u_int                        sh_ipi_id;
} softhand_t;

typedef struct softcpu {
        struct cpu_info                *sc_cpu;
        softint_t                sc_int[SOFTINT_COUNT];
        softhand_t                sc_hand[1];
} softcpu_t;

static void        softint_thread(void *);

u_int                softint_bytes = 32768;
u_int                softint_timing;
static u_int        softint_max;
static kmutex_t        softint_lock;
static void        *softint_netisrs[NETISR_MAX];

/*
 * softint_init_isr:
 *
 *        Initialize a single interrupt level for a single CPU.
 */
static void
softint_init_isr(softcpu_t *sc, const char *desc, pri_t pri, u_int level,
    int ipl)
{
        struct cpu_info *ci;
        softint_t *si;
        int error;

        si = &sc->sc_int[level];
        ci = sc->sc_cpu;
        si->si_cpu = ci;

        SIMPLEQ_INIT(&si->si_q);

        error = kthread_create(pri, KTHREAD_MPSAFE | KTHREAD_INTR |
            KTHREAD_IDLE, ci, softint_thread, si, &si->si_lwp,
            "soft%s/%u", desc, ci->ci_index);
        if (error != 0)
                panic("softint_init_isr: error %d", error);

        snprintf(si->si_name, sizeof(si->si_name), "%s/%u", desc,
            ci->ci_index);
        evcnt_attach_dynamic(&si->si_evcnt, EVCNT_TYPE_MISC, NULL,
           "softint", si->si_name);
        snprintf(si->si_name_block, sizeof(si->si_name_block), "%s block/%u",
            desc, ci->ci_index);
        evcnt_attach_dynamic(&si->si_evcnt_block, EVCNT_TYPE_MISC, NULL,
           "softint", si->si_name_block);

        si->si_ipl = ipl;
        si->si_lwp->l_private = si;
        softint_init_md(si->si_lwp, level, &si->si_machdep);
}

/*
 * softint_init:
 *
 *        Initialize per-CPU data structures.  Called from mi_cpu_attach().
 */
void
softint_init(struct cpu_info *ci)
{
        static struct cpu_info *first;
        softcpu_t *sc, *scfirst;
        softhand_t *sh, *shmax;

        if (first == NULL) {
                /* Boot CPU. */
                first = ci;
                mutex_init(&softint_lock, MUTEX_DEFAULT, IPL_NONE);
                softint_bytes = round_page(softint_bytes);
                softint_max = (softint_bytes - sizeof(softcpu_t)) /
                    sizeof(softhand_t);
        }

        /* Use uvm_km(9) for persistent, page-aligned allocation. */
        sc = (softcpu_t *)uvm_km_alloc(kernel_map, softint_bytes, 0,
            UVM_KMF_WIRED | UVM_KMF_ZERO);
        if (sc == NULL)
                panic("softint_init_cpu: cannot allocate memory");

        ci->ci_data.cpu_softcpu = sc;
        ci->ci_data.cpu_softints = 0;
        sc->sc_cpu = ci;

        softint_init_isr(sc, "net", PRI_SOFTNET, SOFTINT_NET,
            IPL_SOFTNET);
        softint_init_isr(sc, "bio", PRI_SOFTBIO, SOFTINT_BIO,
            IPL_SOFTBIO);
        softint_init_isr(sc, "clk", PRI_SOFTCLOCK, SOFTINT_CLOCK,
            IPL_SOFTCLOCK);
        softint_init_isr(sc, "ser", PRI_SOFTSERIAL, SOFTINT_SERIAL,
            IPL_SOFTSERIAL);

        if (first != ci) {
                mutex_enter(&softint_lock);
                scfirst = first->ci_data.cpu_softcpu;
                sh = sc->sc_hand;
                memcpy(sh, scfirst->sc_hand, sizeof(*sh) * softint_max);
                /* Update pointers for this CPU. */
                for (shmax = sh + softint_max; sh < shmax; sh++) {
                        if (sh->sh_func == NULL)
                                continue;
                        sh->sh_isr =
                            &sc->sc_int[sh->sh_flags & SOFTINT_LVLMASK];
                }
                mutex_exit(&softint_lock);
        } else {
                /*
                 * Establish handlers for legacy net interrupts.
                 * XXX Needs to go away.
                 */
#define DONETISR(n, f)                                                        \
    softint_netisrs[(n)] = softint_establish(SOFTINT_NET|SOFTINT_MPSAFE,\
        (void (*)(void *))(f), NULL)
#include <net/netisr_dispatch.h>
        }
}

/*
 * softint_establish:
 *
 *        Register a software interrupt handler.
 */
void *
softint_establish(u_int flags, void (*func)(void *), void *arg)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;
        softcpu_t *sc;
        softhand_t *sh;
        u_int level, index;
        u_int ipi_id = 0;
        void *sih;

        level = (flags & SOFTINT_LVLMASK);
        KASSERT(level < SOFTINT_COUNT);
        KASSERT((flags & SOFTINT_IMPMASK) == 0);

        mutex_enter(&softint_lock);

        /* Find a free slot. */
        sc = curcpu()->ci_data.cpu_softcpu;
        for (index = 1; index < softint_max; index++) {
                if (sc->sc_hand[index].sh_func == NULL)
                        break;
        }
        if (index == softint_max) {
                mutex_exit(&softint_lock);
                printf("WARNING: softint_establish: table full, "
                    "increase softint_bytes\n");
                return NULL;
        }
        sih = (void *)((uint8_t *)&sc->sc_hand[index] - (uint8_t *)sc);

        if (flags & SOFTINT_RCPU) {
                if ((ipi_id = ipi_register(softint_schedule, sih)) == 0) {
                        mutex_exit(&softint_lock);
                        return NULL;
                }
        }

        /* Set up the handler on each CPU. */
        if (ncpu < 2) {
                /* XXX hack for machines with no CPU_INFO_FOREACH() early on */
                sc = curcpu()->ci_data.cpu_softcpu;
                sh = &sc->sc_hand[index];
                sh->sh_isr = &sc->sc_int[level];
                sh->sh_func = func;
                sh->sh_arg = arg;
                sh->sh_flags = flags;
                sh->sh_ipi_id = ipi_id;
        } else for (CPU_INFO_FOREACH(cii, ci)) {
                sc = ci->ci_data.cpu_softcpu;
                sh = &sc->sc_hand[index];
                sh->sh_isr = &sc->sc_int[level];
                sh->sh_func = func;
                sh->sh_arg = arg;
                sh->sh_flags = flags;
                sh->sh_ipi_id = ipi_id;
        }
        mutex_exit(&softint_lock);

        return sih;
}

/*
 * softint_disestablish:
 *
 *        Unregister a software interrupt handler.  The soft interrupt could
 *        still be active at this point, but the caller commits not to try
 *        and trigger it again once this call is made.  The caller must not
 *        hold any locks that could be taken from soft interrupt context,
 *        because we will wait for the softint to complete if it's still
 *        running.
 */
void
softint_disestablish(void *arg)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;
        softcpu_t *sc;
        softhand_t *sh;
        uintptr_t offset;

        offset = (uintptr_t)arg;
        KASSERTMSG(offset != 0 && offset < softint_bytes, "%"PRIuPTR" %u",
            offset, softint_bytes);

        /*
         * Unregister IPI handler if there is any.  Note: there is no need
         * to disable preemption here - ID is stable.
         */
        sc = curcpu()->ci_data.cpu_softcpu;
        sh = (softhand_t *)((uint8_t *)sc + offset);
        if (sh->sh_ipi_id) {
                ipi_unregister(sh->sh_ipi_id);
        }

        /*
         * Run a dummy softint at the same level on all CPUs and wait for
         * completion, to make sure this softint is no longer running
         * anywhere.
         */
        xc_barrier(XC_HIGHPRI_IPL(sh->sh_isr->si_ipl));

        /* Clear the handler on each CPU. */
        mutex_enter(&softint_lock);
        for (CPU_INFO_FOREACH(cii, ci)) {
                sc = ci->ci_data.cpu_softcpu;
                sh = (softhand_t *)((uint8_t *)sc + offset);
                KASSERT(sh->sh_func != NULL);
                sh->sh_func = NULL;
        }
        mutex_exit(&softint_lock);
}

/*
 * softint_schedule:
 *
 *        Trigger a software interrupt.  Must be called from a hardware
 *        interrupt handler, or with preemption disabled (since we are
 *        using the value of curcpu()).
 */
void
softint_schedule(void *arg)
{
        softhand_t *sh;
        softint_t *si;
        uintptr_t offset;
        int s;

        /*
         * If this assert fires, rather than disabling preemption explicitly
         * to make it stop, consider that you are probably using a softint
         * when you don't need to.
         */
        KASSERT(kpreempt_disabled());

        /* Find the handler record for this CPU. */
        offset = (uintptr_t)arg;
        KASSERTMSG(offset != 0 && offset < softint_bytes, "%"PRIuPTR" %u",
            offset, softint_bytes);
        sh = (softhand_t *)((uint8_t *)curcpu()->ci_data.cpu_softcpu + offset);

        /* If it's already pending there's nothing to do. */
        if ((sh->sh_flags & SOFTINT_PENDING) != 0) {
                return;
        }

        /*
         * Enqueue the handler into the LWP's pending list.
         * If the LWP is completely idle, then make it run.
         */
        s = splhigh();
        if ((sh->sh_flags & SOFTINT_PENDING) == 0) {
                si = sh->sh_isr;
                sh->sh_flags |= SOFTINT_PENDING;
                SIMPLEQ_INSERT_TAIL(&si->si_q, sh, sh_q);
                if (si->si_active == 0) {
                        si->si_active = 1;
                        softint_trigger(si->si_machdep);
                }
        }
        splx(s);
}

/*
 * softint_schedule_cpu:
 *
 *        Trigger a software interrupt on a target CPU.  This invokes
 *        softint_schedule() for the local CPU or send an IPI to invoke
 *        this routine on the remote CPU.  Preemption must be disabled.
 */
void
softint_schedule_cpu(void *arg, struct cpu_info *ci)
{
        KASSERT(kpreempt_disabled());

        if (curcpu() != ci) {
                const softcpu_t *sc = ci->ci_data.cpu_softcpu;
                const uintptr_t offset = (uintptr_t)arg;
                const softhand_t *sh;

                sh = (const softhand_t *)((const uint8_t *)sc + offset);
                KASSERT((sh->sh_flags & SOFTINT_RCPU) != 0);
                ipi_trigger(sh->sh_ipi_id, ci);
                return;
        }

        /* Just a local CPU. */
        softint_schedule(arg);
}

/*
 * softint_execute:
 *
 *        Invoke handlers for the specified soft interrupt.
 *        Must be entered at splhigh.  Will drop the priority
 *        to the level specified, but returns back at splhigh.
 */
static inline void
softint_execute(lwp_t *l, int s)
{
        softint_t *si = l->l_private;
        softhand_t *sh;

        KASSERT(si->si_lwp == curlwp);
        KASSERT(si->si_cpu == curcpu());
        KASSERT(si->si_lwp->l_wchan == NULL);
        KASSERT(si->si_active);

        /*
         * Note: due to priority inheritance we may have interrupted a
         * higher priority LWP.  Since the soft interrupt must be quick
         * and is non-preemptable, we don't bother yielding.
         */

        while (!SIMPLEQ_EMPTY(&si->si_q)) {
                /*
                 * Pick the longest waiting handler to run.  We block
                 * interrupts but do not lock in order to do this, as
                 * we are protecting against the local CPU only.
                 */
                sh = SIMPLEQ_FIRST(&si->si_q);
                SIMPLEQ_REMOVE_HEAD(&si->si_q, sh_q);
                KASSERT((sh->sh_flags & SOFTINT_PENDING) != 0);
                sh->sh_flags ^= SOFTINT_PENDING;
                splx(s);

                /* Run the handler. */
                if (__predict_true((sh->sh_flags & SOFTINT_MPSAFE) != 0)) {
                        (*sh->sh_func)(sh->sh_arg);
                } else {
                        KERNEL_LOCK(1, l);
                        (*sh->sh_func)(sh->sh_arg);
                        KERNEL_UNLOCK_ONE(l);
                }

                /* Diagnostic: check that spin-locks have not leaked. */
                KASSERTMSG(curcpu()->ci_mtx_count == 0,
                    "%s: ci_mtx_count (%d) != 0, sh_func %p\n",
                    __func__, curcpu()->ci_mtx_count, sh->sh_func);
                /* Diagnostic: check that psrefs have not leaked. */
                KASSERTMSG(l->l_psrefs == 0, "%s: l_psrefs=%d, sh_func=%p\n",
                    __func__, l->l_psrefs, sh->sh_func);
                /* Diagnostic: check that biglocks have not leaked. */
                KASSERTMSG(l->l_blcnt == 0,
                    "%s: sh_func=%p leaked %d biglocks",
                    __func__, sh->sh_func, curlwp->l_blcnt);

                (void)splhigh();
        }

        PSREF_DEBUG_BARRIER();

        CPU_COUNT(CPU_COUNT_NSOFT, 1);

        KASSERT(si->si_cpu == curcpu());
        KASSERT(si->si_lwp->l_wchan == NULL);
        KASSERT(si->si_active);
        si->si_evcnt.ev_count++;
        si->si_active = 0;
}

/*
 * softint_block:
 *
 *        Update statistics when the soft interrupt blocks.
 */
void
softint_block(lwp_t *l)
{
        softint_t *si = l->l_private;

        KASSERT((l->l_pflag & LP_INTR) != 0);
        si->si_evcnt_block.ev_count++;
}

/*
 * schednetisr:
 *
 *        Trigger a legacy network interrupt.  XXX Needs to go away.
 */
void
schednetisr(int isr)
{

        softint_schedule(softint_netisrs[isr]);
}

#ifndef __HAVE_FAST_SOFTINTS

#ifdef __HAVE_PREEMPTION
#error __HAVE_PREEMPTION requires __HAVE_FAST_SOFTINTS
#endif

/*
 * softint_init_md:
 *
 *        Slow path: perform machine-dependent initialization.
 */
void
softint_init_md(lwp_t *l, u_int level, uintptr_t *machdep)
{
        struct proc *p;
        softint_t *si;

        *machdep = (1 << level);
        si = l->l_private;
        p = l->l_proc;

        mutex_enter(p->p_lock);
        lwp_lock(l);
        /* Cheat and make the KASSERT in softint_thread() happy. */
        si->si_active = 1;
        setrunnable(l);
        /* LWP now unlocked */
        mutex_exit(p->p_lock);
}

/*
 * softint_trigger:
 *
 *        Slow path: cause a soft interrupt handler to begin executing.
 *        Called at IPL_HIGH.
 */
void
softint_trigger(uintptr_t machdep)
{
        struct cpu_info *ci;
        lwp_t *l;

        ci = curcpu();
        ci->ci_data.cpu_softints |= machdep;
        l = ci->ci_onproc;

        /*
         * Arrange for mi_switch() to be called.  If called from interrupt
         * mode, we don't know if curlwp is executing in kernel or user, so
         * post an AST and have it take a trip through userret().  If not in
         * interrupt mode, curlwp is running in kernel and will notice the
         * resched soon enough; avoid the AST.
         */
        if (l == ci->ci_data.cpu_idlelwp) {
                atomic_or_uint(&ci->ci_want_resched,
                    RESCHED_IDLE | RESCHED_UPREEMPT);
        } else {
                atomic_or_uint(&ci->ci_want_resched, RESCHED_UPREEMPT);
                if (cpu_intr_p()) {
                        cpu_signotify(l);
                }
        }
}

/*
 * softint_thread:
 *
 *        Slow path: MI software interrupt dispatch.
 */
void
softint_thread(void *cookie)
{
        softint_t *si;
        lwp_t *l;
        int s;

        l = curlwp;
        si = l->l_private;

        for (;;) {
                /* Clear pending status and run it. */
                s = splhigh();
                l->l_cpu->ci_data.cpu_softints &= ~si->si_machdep;
                softint_execute(l, s);
                splx(s);

                /* Interrupts allowed to run again before switching. */
                lwp_lock(l);
                l->l_stat = LSIDL;
                spc_lock(l->l_cpu);
                mi_switch(l);
        }
}

/*
 * softint_picklwp:
 *
 *        Slow path: called from mi_switch() to pick the highest priority
 *        soft interrupt LWP that needs to run.
 */
lwp_t *
softint_picklwp(void)
{
        struct cpu_info *ci;
        u_int mask;
        softint_t *si;
        lwp_t *l;

        ci = curcpu();
        si = ((softcpu_t *)ci->ci_data.cpu_softcpu)->sc_int;
        mask = ci->ci_data.cpu_softints;

        if ((mask & (1 << SOFTINT_SERIAL)) != 0) {
                l = si[SOFTINT_SERIAL].si_lwp;
        } else if ((mask & (1 << SOFTINT_NET)) != 0) {
                l = si[SOFTINT_NET].si_lwp;
        } else if ((mask & (1 << SOFTINT_BIO)) != 0) {
                l = si[SOFTINT_BIO].si_lwp;
        } else if ((mask & (1 << SOFTINT_CLOCK)) != 0) {
                l = si[SOFTINT_CLOCK].si_lwp;
        } else {
                panic("softint_picklwp");
        }

        return l;
}

#else        /*  !__HAVE_FAST_SOFTINTS */

/*
 * softint_thread:
 *
 *        Fast path: the LWP is switched to without restoring any state,
 *        so we should not arrive here - there is a direct handoff between
 *        the interrupt stub and softint_dispatch().
 */
void
softint_thread(void *cookie)
{

        panic("softint_thread");
}

/*
 * softint_dispatch:
 *
 *        Fast path: entry point from machine-dependent code.
 */
void
softint_dispatch(lwp_t *pinned, int s)
{
        struct bintime now;
        u_int timing;
        lwp_t *l;

#ifdef DIAGNOSTIC
        if ((pinned->l_pflag & LP_RUNNING) == 0 || curlwp->l_stat != LSIDL) {
                struct lwp *onproc = curcpu()->ci_onproc;
                int s2 = splhigh();
                printf("curcpu=%d, spl=%d curspl=%d\n"
                        "onproc=%p => l_stat=%d l_flag=%08x l_cpu=%d\n"
                        "curlwp=%p => l_stat=%d l_flag=%08x l_cpu=%d\n"
                        "pinned=%p => l_stat=%d l_flag=%08x l_cpu=%d\n",
                        cpu_index(curcpu()), s, s2, onproc, onproc->l_stat,
                        onproc->l_flag, cpu_index(onproc->l_cpu), curlwp,
                        curlwp->l_stat, curlwp->l_flag,
                        cpu_index(curlwp->l_cpu), pinned, pinned->l_stat,
                        pinned->l_flag, cpu_index(pinned->l_cpu));
                splx(s2);
                panic("softint screwup");
        }
#endif

        /*
         * Note the interrupted LWP, and mark the current LWP as running
         * before proceeding.  Although this must as a rule be done with
         * the LWP locked, at this point no external agents will want to
         * modify the interrupt LWP's state.
         */
        timing = softint_timing;
        l = curlwp;
        l->l_switchto = pinned;
        l->l_stat = LSONPROC;

        /*
         * Dispatch the interrupt.  If softints are being timed, charge
         * for it.
         */
        if (timing) {
                binuptime(&l->l_stime);
                membar_producer();        /* for calcru */
                l->l_pflag |= LP_TIMEINTR;
        }
        l->l_pflag |= LP_RUNNING;
        softint_execute(l, s);
        if (timing) {
                binuptime(&now);
                updatertime(l, &now);
                l->l_pflag &= ~LP_TIMEINTR;
        }

        /*
         * If we blocked while handling the interrupt, the pinned LWP is
         * gone and we are now running as a kthread, so find another LWP to
         * run.  softint_dispatch() won't be reentered until the priority is
         * finally dropped to IPL_NONE on entry to the next LWP on this CPU.
         */
        l->l_stat = LSIDL;
        if (l->l_switchto == NULL) {
                lwp_lock(l);
                spc_lock(l->l_cpu);
                mi_switch(l);
                /* NOTREACHED */
        }
        l->l_switchto = NULL;
        l->l_pflag &= ~LP_RUNNING;
}

#endif        /* !__HAVE_FAST_SOFTINTS */









































































































































































































































































  441 





  441 




































  439 






  438 
  438 



  438 

























  438 


  437 







  436 



  439 






















  438 
  436 

  439 










  439 










  437 
  437 

  439 



  419 
  438 



   28 
  439 






















  439 























































































































  439 
  438 
























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
/*        $NetBSD: vioscsi.c,v 1.29 2022/01/27 18:38:07 jakllsch Exp $        */
/*        $OpenBSD: vioscsi.c,v 1.3 2015/03/14 03:38:49 jsg Exp $        */

/*
 * Copyright (c) 2013 Google Inc.
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vioscsi.c,v 1.29 2022/01/27 18:38:07 jakllsch Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/bus.h>
#include <sys/buf.h>
#include <sys/module.h>

#include <dev/pci/vioscsireg.h>
#include <dev/pci/virtiovar.h>

#include <dev/scsipi/scsi_all.h>
#include <dev/scsipi/scsiconf.h>

#ifdef VIOSCSI_DEBUG
static int vioscsi_debug = 1;
#define DPRINTF(f) do { if (vioscsi_debug) printf f; } while (/*CONSTCOND*/0)
#else
#define DPRINTF(f) ((void)0)
#endif

struct vioscsi_req {
        struct virtio_scsi_req_hdr         vr_req;
        struct virtio_scsi_res_hdr         vr_res;
        struct scsipi_xfer                *vr_xs;
        bus_dmamap_t                         vr_control;
        bus_dmamap_t                         vr_data;
};

struct vioscsi_softc {
        device_t                  sc_dev;
        struct scsipi_adapter         sc_adapter;
        struct scsipi_channel          sc_channel;

        struct virtqueue         sc_vqs[3];
#define VIOSCSI_VQ_CONTROL        0
#define VIOSCSI_VQ_EVENT        1
#define VIOSCSI_VQ_REQUEST        2

        struct vioscsi_req        *sc_reqs;
        int                         sc_nreqs;
        bus_dma_segment_t        sc_reqs_segs[1];

        u_int32_t                 sc_seg_max;

        kmutex_t                 sc_mutex;
};

/*
 * Each block request uses at least two segments - one for the header
 * and one for the status.
*/
#define VIRTIO_SCSI_MIN_SEGMENTS 2

static int         vioscsi_match(device_t, cfdata_t, void *);
static void         vioscsi_attach(device_t, device_t, void *);
static int         vioscsi_detach(device_t, int);

static int         vioscsi_alloc_reqs(struct vioscsi_softc *,
    struct virtio_softc *, int);
static void         vioscsi_free_reqs(struct vioscsi_softc *,
    struct virtio_softc *);
static void         vioscsi_scsipi_request(struct scsipi_channel *,
    scsipi_adapter_req_t, void *);
static int         vioscsi_vq_done(struct virtqueue *);
static void         vioscsi_req_done(struct vioscsi_softc *, struct virtio_softc *,
    struct vioscsi_req *, struct virtqueue *, int);
static struct vioscsi_req *vioscsi_req_get(struct vioscsi_softc *);
static void         vioscsi_bad_target(struct scsipi_xfer *);

static const char *const vioscsi_vq_names[] = {
        "control",
        "event",
        "request",
};

CFATTACH_DECL3_NEW(vioscsi, sizeof(struct vioscsi_softc),
    vioscsi_match, vioscsi_attach, vioscsi_detach, NULL, NULL, NULL,
    DVF_DETACH_SHUTDOWN);

static int
vioscsi_match(device_t parent, cfdata_t match, void *aux)
{
        struct virtio_attach_args *va = aux;

        if (va->sc_childdevid == VIRTIO_DEVICE_ID_SCSI)
                return 1;

        return 0;
}

static void
vioscsi_attach(device_t parent, device_t self, void *aux)
{
        struct vioscsi_softc *sc = device_private(self);
        struct virtio_softc *vsc = device_private(parent);
        struct scsipi_adapter *adapt = &sc->sc_adapter;
        struct scsipi_channel *chan = &sc->sc_channel;
        int rv, qsize = 0, i = 0;
        int ipl = IPL_BIO;

        if (virtio_child(vsc) != NULL) {
                aprint_error(": parent %s already has a child\n",
                    device_xname(parent));
                return;
        }

        sc->sc_dev = self;

        virtio_child_attach_start(vsc, self, ipl, sc->sc_vqs,
            NULL, virtio_vq_intr, VIRTIO_F_INTR_MSIX,
            0, VIRTIO_COMMON_FLAG_BITS);

        mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, ipl);

        uint32_t cmd_per_lun = virtio_read_device_config_4(vsc,
            VIRTIO_SCSI_CONFIG_CMD_PER_LUN);

        uint32_t seg_max = virtio_read_device_config_4(vsc,
            VIRTIO_SCSI_CONFIG_SEG_MAX);

        uint16_t max_target = virtio_read_device_config_2(vsc,
            VIRTIO_SCSI_CONFIG_MAX_TARGET);

        uint32_t max_lun = virtio_read_device_config_4(vsc,
            VIRTIO_SCSI_CONFIG_MAX_LUN);

        sc->sc_seg_max = seg_max;

        for(i=0; i < __arraycount(sc->sc_vqs); i++) {
                rv = virtio_alloc_vq(vsc, &sc->sc_vqs[i], i, MAXPHYS,
                    VIRTIO_SCSI_MIN_SEGMENTS + howmany(MAXPHYS, NBPG),
                    vioscsi_vq_names[i]);
                if (rv) {
                        aprint_error_dev(sc->sc_dev,
                            "failed to allocate virtqueue %d\n", i);
                        goto err;
                }

                if (i == VIOSCSI_VQ_REQUEST)
                        sc->sc_vqs[i].vq_done = vioscsi_vq_done;
        }

        qsize = sc->sc_vqs[VIOSCSI_VQ_REQUEST].vq_num;
        if (vioscsi_alloc_reqs(sc, vsc, qsize))
                goto err;

        aprint_normal_dev(sc->sc_dev,
            "cmd_per_lun %u qsize %d seg_max %u max_target %hu"
            " max_lun %u\n",
            cmd_per_lun, qsize, seg_max, max_target, max_lun);

        if (virtio_child_attach_finish(vsc) != 0)
                goto err;

        /*
         * Fill in the scsipi_adapter.
         */
        memset(adapt, 0, sizeof(*adapt));
        adapt->adapt_dev = sc->sc_dev;
        adapt->adapt_nchannels = 1;
        adapt->adapt_openings = MIN(qsize, cmd_per_lun);
        adapt->adapt_max_periph = adapt->adapt_openings;
        adapt->adapt_request = vioscsi_scsipi_request;
        adapt->adapt_minphys = minphys;

        /*
         * Fill in the scsipi_channel.
         */
        memset(chan, 0, sizeof(*chan));
        chan->chan_adapter = adapt;
        chan->chan_bustype = &scsi_bustype;
        chan->chan_channel = 0;
        chan->chan_ntargets = MIN(1 + max_target, 256);        /* cap reasonably */
        chan->chan_nluns = MIN(1 + max_lun, 16384);        /* cap reasonably */
        chan->chan_id = max_target + 1;
        chan->chan_flags = SCSIPI_CHAN_NOSETTLE;

        config_found(self, &sc->sc_channel, scsiprint, CFARGS_NONE);
        return;

err:
        if (qsize > 0)
                vioscsi_free_reqs(sc, vsc);

        for (i=0; i < __arraycount(sc->sc_vqs); i++) {
                if (sc->sc_vqs[i].vq_num > 0)
                        virtio_free_vq(vsc, &sc->sc_vqs[i]);
        }

        virtio_child_attach_failed(vsc);
}

static int
vioscsi_detach(device_t self, int flags)
{
        struct vioscsi_softc *sc = device_private(self);
        struct virtio_softc *vsc = device_private(device_parent(sc->sc_dev));
        int rc, i;

        /*
         * Dequeue all pending finished requests. Must be done
         * before we try to detach children so that we process
         * their pending requests while they still exist.
         */
        if (sc->sc_vqs[VIOSCSI_VQ_REQUEST].vq_num > 0)
                vioscsi_vq_done(&sc->sc_vqs[VIOSCSI_VQ_REQUEST]);

        if ((rc = config_detach_children(self, flags)) != 0)
                return rc;

        virtio_reset(vsc);

        for (i = 0; i < __arraycount(sc->sc_vqs); i++) {
                if (sc->sc_vqs[i].vq_num > 0)
                        virtio_free_vq(vsc, &sc->sc_vqs[i]);
        }

        vioscsi_free_reqs(sc, vsc);

        virtio_child_detach(vsc);

        mutex_destroy(&sc->sc_mutex);

        return 0;
}

#define XS2DMA(xs) \
    ((((xs)->xs_control & XS_CTL_DATA_IN) ? BUS_DMA_READ : BUS_DMA_WRITE) | \
    (((xs)->xs_control & XS_CTL_NOSLEEP) ? BUS_DMA_NOWAIT : BUS_DMA_WAITOK) | \
    BUS_DMA_STREAMING)

#define XS2DMAPRE(xs) (((xs)->xs_control & XS_CTL_DATA_IN) ? \
    BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE)

#define XS2DMAPOST(xs) (((xs)->xs_control & XS_CTL_DATA_IN) ? \
    BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE)

static void
vioscsi_scsipi_request(struct scsipi_channel *chan, scsipi_adapter_req_t
    request, void *arg)
{
        struct vioscsi_softc *sc =
            device_private(chan->chan_adapter->adapt_dev);
        struct virtio_softc *vsc = device_private(device_parent(sc->sc_dev));
        struct scsipi_xfer *xs;
        struct scsipi_periph *periph;
        struct vioscsi_req *vr;
        struct virtio_scsi_req_hdr *req;
        struct virtqueue *vq = &sc->sc_vqs[VIOSCSI_VQ_REQUEST];
        int slot, error;
        bool dopoll;

        DPRINTF(("%s: enter\n", __func__));

        switch (request) {
        case ADAPTER_REQ_RUN_XFER:
                break;
        case ADAPTER_REQ_SET_XFER_MODE:
        {
                struct scsipi_xfer_mode *xm = arg;
                xm->xm_mode = PERIPH_CAP_TQING;
                xm->xm_period = 0;
                xm->xm_offset = 0;
                scsipi_async_event(chan, ASYNC_EVENT_XFER_MODE, xm);
                return;
        }
        default:
                DPRINTF(("%s: unhandled %d\n", __func__, request));
                return;
        }

        xs = arg;
        periph = xs->xs_periph;

        /*
         * This can happen when we run out of queue slots.
         */
        vr = vioscsi_req_get(sc);
        if (vr == NULL) {
                xs->error = XS_RESOURCE_SHORTAGE;
                scsipi_done(xs);
                return;
        }

        req = &vr->vr_req;
        slot = vr - sc->sc_reqs;

        /*
         * "The only supported format for the LUN field is: first byte set to
         * 1, second byte set to target, third and fourth byte representing a
         * single level LUN structure, followed by four zero bytes."
         */
        if (periph->periph_target >= 256 || periph->periph_lun >= 16384
            || periph->periph_target < 0 || periph->periph_lun < 0) {
                goto stuffup;
        }

        req->lun[0] = 1;
        req->lun[1] = periph->periph_target;
        req->lun[2] = 0x40 | ((periph->periph_lun >> 8) & 0x3F);
        req->lun[3] = periph->periph_lun & 0xFF;
        memset(req->lun + 4, 0, 4);
        DPRINTF(("%s: command %p for %d:%d at slot %d\n", __func__,
            xs, periph->periph_target, periph->periph_lun, slot));

        /* tag */
        switch (XS_CTL_TAGTYPE(xs)) {
        case XS_CTL_HEAD_TAG:
                req->task_attr = VIRTIO_SCSI_S_HEAD;
                break;

#if 0        /* XXX */
        case XS_CTL_ACA_TAG:
                req->task_attr = VIRTIO_SCSI_S_ACA;
                break;
#endif

        case XS_CTL_ORDERED_TAG:
                req->task_attr = VIRTIO_SCSI_S_ORDERED;
                break;

        case XS_CTL_SIMPLE_TAG:
        default:
                req->task_attr = VIRTIO_SCSI_S_SIMPLE;
                break;
        }
        req->id = virtio_rw64(vsc, slot);

        if ((size_t)xs->cmdlen > sizeof(req->cdb)) {
                DPRINTF(("%s: bad cmdlen %zu > %zu\n", __func__,
                    (size_t)xs->cmdlen, sizeof(req->cdb)));
                goto stuffup;
        }

        memset(req->cdb, 0, sizeof(req->cdb));
        memcpy(req->cdb, xs->cmd, xs->cmdlen);

        error = bus_dmamap_load(virtio_dmat(vsc), vr->vr_data,
            xs->data, xs->datalen, NULL, XS2DMA(xs));
        if (error) {
                aprint_error_dev(sc->sc_dev, "%s: error %d loading DMA map\n",
                    __func__, error);

                if (error == ENOMEM || error == EAGAIN) {
                        /*
                         * Map is allocated with ALLOCNOW, so this should
                         * actually never ever happen.
                         */
                        xs->error = XS_RESOURCE_SHORTAGE;
                } else {
stuffup:
                        /* not a temporary condition */
                        xs->error = XS_DRIVER_STUFFUP;
                }

                virtio_enqueue_abort(vsc, vq, slot);
                scsipi_done(xs);
                return;
        }

        int nsegs = VIRTIO_SCSI_MIN_SEGMENTS;
        if ((xs->xs_control & (XS_CTL_DATA_IN|XS_CTL_DATA_OUT)) != 0)
                nsegs += vr->vr_data->dm_nsegs;

        error = virtio_enqueue_reserve(vsc, vq, slot, nsegs);
        if (error) {
                aprint_error_dev(sc->sc_dev, "error reserving %d (nsegs %d)\n",
                    error, nsegs);
                bus_dmamap_unload(virtio_dmat(vsc), vr->vr_data);
                /* slot already freed by virtio_enqueue_reserve() */
                xs->error = XS_RESOURCE_SHORTAGE;
                scsipi_done(xs);
                return;
        }

        vr->vr_xs = xs;

        bus_dmamap_sync(virtio_dmat(vsc), vr->vr_control,
            offsetof(struct vioscsi_req, vr_req),
            sizeof(struct virtio_scsi_req_hdr),
            BUS_DMASYNC_PREWRITE);
        bus_dmamap_sync(virtio_dmat(vsc), vr->vr_control,
            offsetof(struct vioscsi_req, vr_res),
            sizeof(struct virtio_scsi_res_hdr),
            BUS_DMASYNC_PREREAD);
        if ((xs->xs_control & (XS_CTL_DATA_IN|XS_CTL_DATA_OUT)) != 0)
                bus_dmamap_sync(virtio_dmat(vsc), vr->vr_data, 0, xs->datalen,
                    XS2DMAPRE(xs));

        virtio_enqueue_p(vsc, vq, slot, vr->vr_control,
            offsetof(struct vioscsi_req, vr_req),
            sizeof(struct virtio_scsi_req_hdr), 1);
        if (xs->xs_control & XS_CTL_DATA_OUT)
                virtio_enqueue(vsc, vq, slot, vr->vr_data, 1);
        virtio_enqueue_p(vsc, vq, slot, vr->vr_control,
            offsetof(struct vioscsi_req, vr_res),
            sizeof(struct virtio_scsi_res_hdr), 0);
        if (xs->xs_control & XS_CTL_DATA_IN)
                virtio_enqueue(vsc, vq, slot, vr->vr_data, 0);
        dopoll = (xs->xs_control & XS_CTL_POLL) != 0;
        virtio_enqueue_commit(vsc, vq, slot, 1);

        if (!dopoll)
                return;

        DPRINTF(("%s: polling...\n", __func__));
        // XXX: do this better.
        int timeout = 1000;
        do {
                virtio_intrhand(vsc);
                if (vr->vr_xs != xs)
                        break;
                delay(1000);
        } while (--timeout > 0);

        if (vr->vr_xs == xs) {
                // XXX: Abort!
                xs->error = XS_TIMEOUT;
                xs->resid = xs->datalen;
                DPRINTF(("%s: polling timeout\n", __func__));
                scsipi_done(xs);
        }
        DPRINTF(("%s: command %p done (timeout=%d)\n", __func__,
            xs, timeout));
}

static void
vioscsi_req_done(struct vioscsi_softc *sc, struct virtio_softc *vsc,
    struct vioscsi_req *vr, struct virtqueue *vq, int slot)
{
        struct scsipi_xfer *xs = vr->vr_xs;
        size_t sense_len;

        DPRINTF(("%s: enter\n", __func__));

        bus_dmamap_sync(virtio_dmat(vsc), vr->vr_control,
            offsetof(struct vioscsi_req, vr_req),
            sizeof(struct virtio_scsi_req_hdr),
            BUS_DMASYNC_POSTWRITE);
        bus_dmamap_sync(virtio_dmat(vsc), vr->vr_control,
            offsetof(struct vioscsi_req, vr_res),
            sizeof(struct virtio_scsi_res_hdr),
            BUS_DMASYNC_POSTREAD);
        if (xs->datalen)
                bus_dmamap_sync(virtio_dmat(vsc), vr->vr_data, 0, xs->datalen,
                    XS2DMAPOST(xs));

        xs->status = vr->vr_res.status;
        xs->resid  = virtio_rw32(vsc, vr->vr_res.residual);

        switch (vr->vr_res.response) {
        case VIRTIO_SCSI_S_OK:
                sense_len = MIN(sizeof(xs->sense),
                                virtio_rw32(vsc, vr->vr_res.sense_len));
                memcpy(&xs->sense, vr->vr_res.sense, sense_len);
                xs->error = (sense_len == 0) ? XS_NOERROR : XS_SENSE;
                break;
        case VIRTIO_SCSI_S_BAD_TARGET:
                vioscsi_bad_target(xs);
                break;
        default:
                DPRINTF(("%s: stuffup: %d\n", __func__, vr->vr_res.response));
                xs->error = XS_DRIVER_STUFFUP;
                xs->resid = xs->datalen;
                break;
        }

        DPRINTF(("%s: command %p done %d, %d, %d\n", __func__,
            xs, xs->error, xs->status, xs->resid));

        bus_dmamap_unload(virtio_dmat(vsc), vr->vr_data);
        vr->vr_xs = NULL;

        virtio_dequeue_commit(vsc, vq, slot);

        mutex_exit(&sc->sc_mutex);
        scsipi_done(xs);
        mutex_enter(&sc->sc_mutex);
}

static void
vioscsi_bad_target(struct scsipi_xfer *xs)
{
        struct scsi_sense_data *sense = &xs->sense.scsi_sense;

        DPRINTF(("%s: bad target %d:%d\n", __func__,
            xs->xs_periph->periph_target, xs->xs_periph->periph_lun));

        memset(sense, 0, sizeof(*sense));
        sense->response_code = 0x70;
        sense->flags = SKEY_ILLEGAL_REQUEST;
        xs->error = XS_SENSE;
        xs->status = 0;
        xs->resid = 0;
}

static int
vioscsi_vq_done(struct virtqueue *vq)
{
        struct virtio_softc *vsc = vq->vq_owner;
        struct vioscsi_softc *sc = device_private(virtio_child(vsc));
        int ret = 0;

        DPRINTF(("%s: enter %d\n", __func__, vq->vq_index));

        mutex_enter(&sc->sc_mutex);

        for (;;) {
                int r, slot;

                r = virtio_dequeue(vsc, vq, &slot, NULL);
                if (r != 0)
                        break;

                DPRINTF(("%s: slot=%d\n", __func__, slot));

                vioscsi_req_done(sc, vsc, &sc->sc_reqs[slot], vq, slot);

                ret = 1;
        }

        mutex_exit(&sc->sc_mutex);

        DPRINTF(("%s: exit %d: %d\n", __func__, vq->vq_index, ret));

        return ret;
}

static struct vioscsi_req *
vioscsi_req_get(struct vioscsi_softc *sc)
{
        struct virtio_softc *vsc = device_private(device_parent(sc->sc_dev));
        struct virtqueue *vq = &sc->sc_vqs[VIOSCSI_VQ_REQUEST];
        struct vioscsi_req *vr = NULL;
        int r, slot;

        mutex_enter(&sc->sc_mutex);

        if ((r = virtio_enqueue_prep(vsc, vq, &slot)) != 0) {
                DPRINTF(("%s: virtio_enqueue_get error %d\n", __func__, r));
                goto out;
        }
        KASSERT(slot < sc->sc_nreqs);
        vr = &sc->sc_reqs[slot];

        DPRINTF(("%s: %p, %d\n", __func__, vr, slot));

out:
        mutex_exit(&sc->sc_mutex);

        return vr;
}

static int
vioscsi_alloc_reqs(struct vioscsi_softc *sc, struct virtio_softc *vsc,
    int qsize)
{
        size_t allocsize;
        int r, rsegs, slot;
        void *vaddr;
        struct vioscsi_req *vr;

        allocsize = qsize * sizeof(struct vioscsi_req);
        r = bus_dmamem_alloc(virtio_dmat(vsc), allocsize, 0, 0,
            &sc->sc_reqs_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
        if (r != 0) {
                aprint_error_dev(sc->sc_dev,
                    "%s: bus_dmamem_alloc, size %zu, error %d\n", __func__,
                    allocsize, r);
                return r;
        }
        r = bus_dmamem_map(virtio_dmat(vsc), &sc->sc_reqs_segs[0], 1,
            allocsize, &vaddr, BUS_DMA_NOWAIT);
        if (r != 0) {
                aprint_error_dev(sc->sc_dev,
                    "%s: bus_dmamem_map failed, error %d\n", __func__, r);
                bus_dmamem_free(virtio_dmat(vsc), &sc->sc_reqs_segs[0], 1);
                return r;
        }
        memset(vaddr, 0, allocsize);

        sc->sc_reqs = vaddr;
        sc->sc_nreqs = qsize;

        /* Prepare maps for the requests */
        for (slot=0; slot < qsize; slot++) {
                vr = &sc->sc_reqs[slot];

                r = bus_dmamap_create(virtio_dmat(vsc),
                    offsetof(struct vioscsi_req, vr_xs), 1,
                    offsetof(struct vioscsi_req, vr_xs), 0,
                    BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &vr->vr_control);
                if (r != 0) {
                        aprint_error_dev(sc->sc_dev,
                                "%s: bus_dmamem_create ctrl failed, error %d\n",
                            __func__, r);
                        goto cleanup;
                }

                r = bus_dmamap_create(virtio_dmat(vsc), MAXPHYS, sc->sc_seg_max,
                    MAXPHYS, 0, BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &vr->vr_data);
                if (r != 0) {
                        aprint_error_dev(sc->sc_dev,
                                "%s: bus_dmamem_create data failed, error %d\n",
                            __func__, r);
                        goto cleanup;
                }

                r = bus_dmamap_load(virtio_dmat(vsc), vr->vr_control,
                    vr, offsetof(struct vioscsi_req, vr_xs), NULL,
                    BUS_DMA_NOWAIT);
                if (r != 0) {
                        aprint_error_dev(sc->sc_dev,
                                "%s: bus_dmamap_load ctrl error %d\n",
                            __func__, r);
                        goto cleanup;
                }
        }

        return 0;

cleanup:
        for (; slot > 0; slot--) {
                vr = &sc->sc_reqs[slot];

                if (vr->vr_control) {
                        /* this will also unload the mapping if loaded */
                        bus_dmamap_destroy(virtio_dmat(vsc), vr->vr_control);
                        vr->vr_control = NULL;
                }

                if (vr->vr_data) {
                        bus_dmamap_destroy(virtio_dmat(vsc), vr->vr_data);
                        vr->vr_data = NULL;
                }
        }

        bus_dmamem_unmap(virtio_dmat(vsc), vaddr, allocsize);
        bus_dmamem_free(virtio_dmat(vsc), &sc->sc_reqs_segs[0], 1);

        return r;
}

static void
vioscsi_free_reqs(struct vioscsi_softc *sc, struct virtio_softc *vsc)
{
        int slot;
        struct vioscsi_req *vr;

        if (sc->sc_nreqs == 0) {
                /* Not allocated */
                return;
        }

        /* Free request maps */
        for (slot=0; slot < sc->sc_nreqs; slot++) {
                vr = &sc->sc_reqs[slot];

                bus_dmamap_destroy(virtio_dmat(vsc), vr->vr_control);
                bus_dmamap_destroy(virtio_dmat(vsc), vr->vr_data);
        }

        bus_dmamem_unmap(virtio_dmat(vsc), sc->sc_reqs,
                         sc->sc_nreqs * sizeof(struct vioscsi_req));
        bus_dmamem_free(virtio_dmat(vsc), &sc->sc_reqs_segs[0], 1);
}

MODULE(MODULE_CLASS_DRIVER, vioscsi, "virtio");

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
vioscsi_modcmd(modcmd_t cmd, void *opaque)
{
        int error = 0;

#ifdef _MODULE
        switch (cmd) {
        case MODULE_CMD_INIT:
                error = config_init_component(cfdriver_ioconf_vioscsi,
                    cfattach_ioconf_vioscsi, cfdata_ioconf_vioscsi);
                break;
        case MODULE_CMD_FINI:
                error = config_fini_component(cfdriver_ioconf_vioscsi,
                    cfattach_ioconf_vioscsi, cfdata_ioconf_vioscsi);
                break;
        default:
                error = ENOTTY;
                break;
        }
#endif

        return error;
}


















































































































































    3 
    3 





















































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
/*        $NetBSD: ubsa.c,v 1.43 2021/08/07 16:19:17 thorpej Exp $        */

/*-
 * Copyright (c) 2002, Alexander Kabaev <kan.FreeBSD.org>.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
/*
 * Copyright (c) 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Ichiro FUKUHARA (ichiro@ichiro.org).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ubsa.c,v 1.43 2021/08/07 16:19:17 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/ioccom.h>
#include <sys/fcntl.h>
#include <sys/conf.h>
#include <sys/tty.h>
#include <sys/file.h>
#include <sys/select.h>
#include <sys/proc.h>
#include <sys/device.h>
#include <sys/poll.h>
#include <sys/sysctl.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbcdc.h>

#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usb_quirks.h>

#include <dev/usb/ucomvar.h>
#include <dev/usb/ubsavar.h>

#ifdef UBSA_DEBUG
int                ubsadebug = 0;

#define        DPRINTFN(n, x)        do { \
                                if (ubsadebug > (n)) \
                                        printf x; \
                        } while (0)
#else
#define        DPRINTFN(n, x)
#endif
#define        DPRINTF(x) DPRINTFN(0, x)

static const struct        ucom_methods ubsa_methods = {
        .ucom_get_status = ubsa_get_status,
        .ucom_set = ubsa_set,
        .ucom_param = ubsa_param,
        .ucom_open = ubsa_open,
        .ucom_close = ubsa_close,
};

Static const struct ubsa_type {
        struct usb_devno ubsa_dev;
        int ubsa_quadumts;
} ubsa_devs[] = {
        /* BELKIN F5U103 */
        { { USB_VENDOR_BELKIN, USB_PRODUCT_BELKIN_F5U103 }, 0 },
        /* BELKIN F5U120 */
        { { USB_VENDOR_BELKIN, USB_PRODUCT_BELKIN_F5U120 }, 0 },
        /* GoHubs GO-COM232 */
        { { USB_VENDOR_ETEK, USB_PRODUCT_ETEK_1COM }, 0 },
        /* GoHubs GO-COM232 */
        { { USB_VENDOR_GOHUBS, USB_PRODUCT_GOHUBS_GOCOM232 }, 0 },
        /* Peracom */
        { { USB_VENDOR_PERACOM, USB_PRODUCT_PERACOM_SERIAL1 }, 0 },
        /* Option N.V. */
        { { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_MC3G }, 0 },
        { { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_QUADUMTS2 }, 1 },
        { { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_QUADUMTS }, 1 },
        { { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_QUADPLUSUMTS }, 1 },
        { { USB_VENDOR_OPTIONNV, USB_PRODUCT_OPTIONNV_HSDPA }, 1 },
};
#define ubsa_lookup(v, p) ((const struct ubsa_type *)usb_lookup(ubsa_devs, v, p))

int ubsa_match(device_t, cfdata_t, void *);
void ubsa_attach(device_t, device_t, void *);
void ubsa_childdet(device_t, device_t);
int ubsa_detach(device_t, int);

CFATTACH_DECL2_NEW(ubsa, sizeof(struct ubsa_softc),
    ubsa_match, ubsa_attach, ubsa_detach, NULL, NULL, ubsa_childdet);

int
ubsa_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return (ubsa_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE);
}

void
ubsa_attach(device_t parent, device_t self, void *aux)
{
        struct ubsa_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        usb_config_descriptor_t *cdesc;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        usbd_status err;
        struct ucom_attach_args ucaa;
        int i;

        sc->sc_dev = self;
        sc->sc_dying = false;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        sc->sc_udev = dev;
        sc->sc_config_index = UBSA_DEFAULT_CONFIG_INDEX;
        sc->sc_numif = 1; /* default device has one interface */

        /*
         * initialize rts, dtr variables to something
         * different from boolean 0, 1
         */
        sc->sc_dtr = -1;
        sc->sc_rts = -1;

        /*
         * Quad UMTS cards use different requests to
         * control com settings and only some.
         */
        sc->sc_quadumts = ubsa_lookup(uaa->uaa_vendor, uaa->uaa_product)->ubsa_quadumts;

        DPRINTF(("ubsa attach: sc = %p\n", sc));

        /* Move the device into the configured state. */
        err = usbd_set_config_index(dev, sc->sc_config_index, 1);
        if (err) {
                aprint_error_dev(self,
                    "failed to set configuration: %s\n",
                    usbd_errstr(err));
                goto error;
        }

        /* get the config descriptor */
        cdesc = usbd_get_config_descriptor(sc->sc_udev);

        if (cdesc == NULL) {
                aprint_error_dev(self,
                    "failed to get configuration descriptor\n");
                goto error;
        }

        sc->sc_intr_number = -1;
        sc->sc_intr_pipe = NULL;

        /* get the interfaces */
        err = usbd_device2interface_handle(dev, UBSA_IFACE_INDEX_OFFSET,
                         &sc->sc_iface[0]);
        if (err) {
                /* can not get main interface */
                goto error;
        }

        /* Find the endpoints */
        id = usbd_get_interface_descriptor(sc->sc_iface[0]);
        sc->sc_iface_number[0] = id->bInterfaceNumber;

        /* initialize endpoints */
        ucaa.ucaa_bulkin = ucaa.ucaa_bulkout = -1;

        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface[0], i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                             "no endpoint descriptor for %d\n", i);
                        break;
                }

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        sc->sc_intr_number = ed->bEndpointAddress;
                        sc->sc_isize = UGETW(ed->wMaxPacketSize);
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        ucaa.ucaa_bulkin = ed->bEndpointAddress;
                        ucaa.ucaa_ibufsize = UGETW(ed->wMaxPacketSize);
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        ucaa.ucaa_bulkout = ed->bEndpointAddress;
                        ucaa.ucaa_obufsize = UGETW(ed->wMaxPacketSize);
                }
        } /* end of Endpoint loop */

        if (sc->sc_intr_number == -1) {
                aprint_error_dev(self, "Could not find interrupt in\n");
                goto error;
        }

        if (ucaa.ucaa_bulkin == -1) {
                aprint_error_dev(self, "Could not find data bulk in\n");
                goto error;
        }

        if (ucaa.ucaa_bulkout == -1) {
                aprint_error_dev(self, "Could not find data bulk out\n");
                goto error;
        }

        ucaa.ucaa_portno = 0;
        /* bulkin, bulkout set above */
        ucaa.ucaa_ibufsizepad = ucaa.ucaa_ibufsize;
        ucaa.ucaa_opkthdrlen = 0;
        ucaa.ucaa_device = dev;
        ucaa.ucaa_iface = sc->sc_iface[0];
        ucaa.ucaa_methods = &ubsa_methods;
        ucaa.ucaa_arg = sc;
        ucaa.ucaa_info = NULL;
        DPRINTF(("ubsa: int#=%d, in = %#x, out = %#x, intr = %#x\n",
            i, ucaa.ucaa_bulkin, ucaa.ucaa_bulkout, sc->sc_intr_number));
        sc->sc_subdevs[0] = config_found(self, &ucaa, ucomprint,
            CFARGS(.submatch = ucomsubmatch));

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        return;

error:
        sc->sc_dying = true;
        return;
}


void
ubsa_childdet(device_t self, device_t child)
{
        int i;
        struct ubsa_softc *sc = device_private(self);

        for (i = 0; i < sc->sc_numif; i++) {
                if (sc->sc_subdevs[i] == child)
                        break;
        }
        KASSERT(i < sc->sc_numif);
                sc->sc_subdevs[i] = NULL;
}

int
ubsa_detach(device_t self, int flags)
{
        struct ubsa_softc *sc = device_private(self);
        int i;
        int rv = 0;

        DPRINTF(("ubsa_detach: sc = %p\n", sc));

        sc->sc_dying = true;

        ubsa_close_pipe(sc);

        for (i = 0; i < sc->sc_numif; i++) {
                if (sc->sc_subdevs[i] != NULL) {
                        rv |= config_detach(sc->sc_subdevs[i], flags);
                        sc->sc_subdevs[i] = NULL;
                }
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return rv;
}





















































































































































































































































































































































































































































































































































































































 1802 








































































































































































































































































 1805 





































































    1 
 1802 
 1801 
 1801 
 1523 













 1802 








 1802 





 1803 





















































































 1525 















 1331 

















































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
/* $NetBSD: uvm_physseg.c,v 1.17 2020/07/15 15:08:26 rin Exp $ */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * Copyright (c) 1991, 1993, The Regents of the University of California.
 *
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * The Mach Operating System project at Carnegie-Mellon University.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vm_page.h   7.3 (Berkeley) 4/21/91
 * from: Id: uvm_page.h,v 1.1.2.6 1998/02/04 02:31:42 chuck Exp
 *
 *
 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*
 * Consolidated API from uvm_page.c and others.
 * Consolidated and designed by Cherry G. Mathew <cherry@zyx.in>
 * rbtree(3) backing implementation by:
 * Santhosh N. Raju <santhosh.raju@gmail.com>
 */

#ifdef _KERNEL_OPT
#include "opt_uvm.h"
#endif

#include <sys/param.h>
#include <sys/types.h>
#include <sys/extent.h>
#include <sys/kmem.h>

#include <uvm/uvm.h>
#include <uvm/uvm_page.h>
#include <uvm/uvm_param.h>
#include <uvm/uvm_pdpolicy.h>
#include <uvm/uvm_physseg.h>

/*
 * uvm_physseg: describes one segment of physical memory
 */
struct uvm_physseg {
        /* used during RB tree lookup for PHYS_TO_VM_PAGE(). */
        struct  rb_node rb_node;        /* tree information */
        paddr_t        start;                        /* PF# of first page in segment */
        paddr_t        end;                        /* (PF# of last page in segment) + 1 */
        struct        vm_page *pgs;                /* vm_page structures (from start) */

        /* less performance sensitive fields. */
        paddr_t        avail_start;                /* PF# of first free page in segment */
        paddr_t        avail_end;                /* (PF# of last free page in segment) +1  */
        struct  extent *ext;                /* extent(9) structure to manage pgs[] */
        int        free_list;                /* which free list they belong on */
        u_int        start_hint;                /* start looking for free pages here */
#ifdef __HAVE_PMAP_PHYSSEG
        struct        pmap_physseg pmseg;        /* pmap specific (MD) data */
#endif
};

/*
 * These functions are reserved for uvm(9) internal use and are not
 * exported in the header file uvm_physseg.h
 *
 * Thus they are redefined here.
 */
void uvm_physseg_init_seg(uvm_physseg_t, struct vm_page *);
void uvm_physseg_seg_chomp_slab(uvm_physseg_t, struct vm_page *, size_t);

/* returns a pgs array */
struct vm_page *uvm_physseg_seg_alloc_from_slab(uvm_physseg_t, size_t);

#if defined(UVM_HOTPLUG) /* rbtree impementation */

#define                HANDLE_TO_PHYSSEG_NODE(h)        ((struct uvm_physseg *)(h))
#define                PHYSSEG_NODE_TO_HANDLE(u)        ((uvm_physseg_t)(u))

struct uvm_physseg_graph {
        struct rb_tree rb_tree;                /* Tree for entries */
        int            nentries;        /* Number of entries */
} __aligned(COHERENCY_UNIT);

static struct uvm_physseg_graph uvm_physseg_graph __read_mostly;

/*
 * Note on kmem(9) allocator usage:
 * We take the conservative approach that plug/unplug are allowed to
 * fail in high memory stress situations.
 *
 * We want to avoid re-entrant situations in which one plug/unplug
 * operation is waiting on a previous one to complete, since this
 * makes the design more complicated than necessary.
 *
 * We may review this and change its behaviour, once the use cases
 * become more obvious.
 */

/*
 * Special alloc()/free() functions for boot time support:
 * We assume that alloc() at boot time is only for new 'vm_physseg's
 * This allows us to use a static array for memory allocation at boot
 * time. Thus we avoid using kmem(9) which is not ready at this point
 * in boot.
 *
 * After kmem(9) is ready, we use it. We currently discard any free()s
 * to this static array, since the size is small enough to be a
 * trivial waste on all architectures we run on.
 */

static size_t nseg = 0;
static struct uvm_physseg uvm_physseg[VM_PHYSSEG_MAX];

static void *
uvm_physseg_alloc(size_t sz)
{
        /*
         * During boot time, we only support allocating vm_physseg
         * entries from the static array.
         * We need to assert for this.
         */

        if (__predict_false(uvm.page_init_done == false)) {
                if (sz % sizeof(struct uvm_physseg))
                        panic("%s: tried to alloc size other than multiple"
                            " of struct uvm_physseg at boot\n", __func__);

                size_t n = sz / sizeof(struct uvm_physseg);
                nseg += n;

                KASSERT(nseg > 0 && nseg <= VM_PHYSSEG_MAX);

                return &uvm_physseg[nseg - n];
        }

        return kmem_zalloc(sz, KM_NOSLEEP);
}

static void
uvm_physseg_free(void *p, size_t sz)
{
        /*
         * This is a bit tricky. We do allow simulation of free()
         * during boot (for eg: when MD code is "steal"ing memory,
         * and the segment has been exhausted (and thus needs to be
         * free() - ed.
         * free() also complicates things because we leak the
         * free(). Therefore calling code can't assume that free()-ed
         * memory is available for alloc() again, at boot time.
         *
         * Thus we can't explicitly disallow free()s during
         * boot time. However, the same restriction for alloc()
         * applies to free(). We only allow uvm_physseg related free()s
         * via this function during boot time.
         */

        if (__predict_false(uvm.page_init_done == false)) {
                if (sz % sizeof(struct uvm_physseg))
                        panic("%s: tried to free size other than struct uvm_physseg"
                            " at boot\n", __func__);

        }

        /*
         * Could have been in a single if(){} block - split for
         * clarity
         */

        if ((struct uvm_physseg *)p >= uvm_physseg &&
            (struct uvm_physseg *)p < (uvm_physseg + VM_PHYSSEG_MAX)) {
                if (sz % sizeof(struct uvm_physseg))
                        panic("%s: tried to free() other than struct uvm_physseg"
                            " from static array\n", __func__);

                if ((sz / sizeof(struct uvm_physseg)) >= VM_PHYSSEG_MAX)
                        panic("%s: tried to free() the entire static array!", __func__);
                return; /* Nothing to free */
        }

        kmem_free(p, sz);
}

/* XXX: Multi page size */
bool
uvm_physseg_plug(paddr_t pfn, size_t pages, uvm_physseg_t *psp)
{
        int preload;
        size_t slabpages;
        struct uvm_physseg *ps, *current_ps = NULL;
        struct vm_page *slab = NULL, *pgs = NULL;

#ifdef DEBUG
        paddr_t off;
        uvm_physseg_t upm;
        upm = uvm_physseg_find(pfn, &off);

        ps = HANDLE_TO_PHYSSEG_NODE(upm);

        if (ps != NULL) /* XXX; do we allow "update" plugs ? */
                return false;
#endif

        /*
         * do we have room?
         */

        ps = uvm_physseg_alloc(sizeof (struct uvm_physseg));
        if (ps == NULL) {
                printf("uvm_page_physload: unable to load physical memory "
                    "segment\n");
                printf("\t%d segments allocated, ignoring 0x%"PRIxPADDR" -> 0x%"PRIxPADDR"\n",
                    VM_PHYSSEG_MAX, pfn, pfn + pages + 1);
                printf("\tincrease VM_PHYSSEG_MAX\n");
                return false;
        }

        /* span init */
        ps->start = pfn;
        ps->end = pfn + pages;

        /*
         * XXX: Ugly hack because uvmexp.npages accounts for only
         * those pages in the segment included below as well - this
         * should be legacy and removed.
         */

        ps->avail_start = ps->start;
        ps->avail_end = ps->end;

        /*
         * check to see if this is a "preload" (i.e. uvm_page_init hasn't been
         * called yet, so kmem is not available).
         */

        preload = 1; /* We are going to assume it is a preload */

        RB_TREE_FOREACH(current_ps, &(uvm_physseg_graph.rb_tree)) {
                /* If there are non NULL pages then we are not in a preload */
                if (current_ps->pgs != NULL) {
                        preload = 0;
                        /* Try to scavenge from earlier unplug()s. */
                        pgs = uvm_physseg_seg_alloc_from_slab(current_ps, pages);

                        if (pgs != NULL) {
                                break;
                        }
                }
        }


        /*
         * if VM is already running, attempt to kmem_alloc vm_page structures
         */

        if (!preload) {
                if (pgs == NULL) { /* Brand new */
                        /* Iteratively try alloc down from uvmexp.npages */
                        for (slabpages = (size_t) uvmexp.npages; slabpages >= pages; slabpages--) {
                                slab = kmem_zalloc(sizeof *pgs * (long unsigned int)slabpages, KM_NOSLEEP);
                                if (slab != NULL)
                                        break;
                        }

                        if (slab == NULL) {
                                uvm_physseg_free(ps, sizeof(struct uvm_physseg));
                                return false;
                        }

                        uvm_physseg_seg_chomp_slab(ps, slab, (size_t) slabpages);
                        /* We allocate enough for this plug */
                        pgs = uvm_physseg_seg_alloc_from_slab(ps, pages);

                        if (pgs == NULL) {
                                printf("unable to uvm_physseg_seg_alloc_from_slab() from backend\n");
                                return false;
                        }
                } else {
                        /* Reuse scavenged extent */
                        ps->ext = current_ps->ext;
                }

                physmem += pages;
                uvmpdpol_reinit();
        } else { /* Boot time - see uvm_page.c:uvm_page_init() */
                pgs = NULL;
                ps->pgs = pgs;
        }

        /*
         * now insert us in the proper place in uvm_physseg_graph.rb_tree
         */

        current_ps = rb_tree_insert_node(&(uvm_physseg_graph.rb_tree), ps);
        if (current_ps != ps) {
                panic("uvm_page_physload: Duplicate address range detected!");
        }
        uvm_physseg_graph.nentries++;

        /*
         * uvm_pagefree() requires the PHYS_TO_VM_PAGE(pgs[i]) on the
         * newly allocated pgs[] to return the correct value. This is
         * a bit of a chicken and egg problem, since it needs
         * uvm_physseg_find() to succeed. For this, the node needs to
         * be inserted *before* uvm_physseg_init_seg() happens.
         *
         * During boot, this happens anyway, since
         * uvm_physseg_init_seg() is called later on and separately
         * from uvm_page.c:uvm_page_init().
         * In the case of hotplug we need to ensure this.
         */

        if (__predict_true(!preload))
                uvm_physseg_init_seg(ps, pgs);

        if (psp != NULL)
                *psp = ps;

        return true;
}

static int
uvm_physseg_compare_nodes(void *ctx, const void *nnode1, const void *nnode2)
{
        const struct uvm_physseg *enode1 = nnode1;
        const struct uvm_physseg *enode2 = nnode2;

        KASSERT(enode1->start < enode2->start || enode1->start >= enode2->end);
        KASSERT(enode2->start < enode1->start || enode2->start >= enode1->end);

        if (enode1->start < enode2->start)
                return -1;
        if (enode1->start >= enode2->end)
                return 1;
        return 0;
}

static int
uvm_physseg_compare_key(void *ctx, const void *nnode, const void *pkey)
{
        const struct uvm_physseg *enode = nnode;
        const paddr_t pa = *(const paddr_t *) pkey;

        if(enode->start <= pa && pa < enode->end)
                return 0;
        if (enode->start < pa)
                return -1;
        if (enode->end > pa)
                return 1;

        return 0;
}

static const rb_tree_ops_t uvm_physseg_tree_ops = {
        .rbto_compare_nodes = uvm_physseg_compare_nodes,
        .rbto_compare_key = uvm_physseg_compare_key,
        .rbto_node_offset = offsetof(struct uvm_physseg, rb_node),
        .rbto_context = NULL
};

/*
 * uvm_physseg_init: init the physmem
 *
 * => physmem unit should not be in use at this point
 */

void
uvm_physseg_init(void)
{
        rb_tree_init(&(uvm_physseg_graph.rb_tree), &uvm_physseg_tree_ops);
        uvm_physseg_graph.nentries = 0;
}

uvm_physseg_t
uvm_physseg_get_next(uvm_physseg_t upm)
{
        /* next of invalid is invalid, not fatal */
        if (uvm_physseg_valid_p(upm) == false)
                return UVM_PHYSSEG_TYPE_INVALID;

        return (uvm_physseg_t) rb_tree_iterate(&(uvm_physseg_graph.rb_tree), upm,
            RB_DIR_RIGHT);
}

uvm_physseg_t
uvm_physseg_get_prev(uvm_physseg_t upm)
{
        /* prev of invalid is invalid, not fatal */
        if (uvm_physseg_valid_p(upm) == false)
                return UVM_PHYSSEG_TYPE_INVALID;

        return (uvm_physseg_t) rb_tree_iterate(&(uvm_physseg_graph.rb_tree), upm,
            RB_DIR_LEFT);
}

uvm_physseg_t
uvm_physseg_get_last(void)
{
        return (uvm_physseg_t) RB_TREE_MAX(&(uvm_physseg_graph.rb_tree));
}

uvm_physseg_t
uvm_physseg_get_first(void)
{
        return (uvm_physseg_t) RB_TREE_MIN(&(uvm_physseg_graph.rb_tree));
}

paddr_t
uvm_physseg_get_highest_frame(void)
{
        struct uvm_physseg *ps =
            (uvm_physseg_t) RB_TREE_MAX(&(uvm_physseg_graph.rb_tree));

        return ps->end - 1;
}

/*
 * uvm_page_physunload: unload physical memory and return it to
 * caller.
 */
bool
uvm_page_physunload(uvm_physseg_t upm, int freelist, paddr_t *paddrp)
{
        struct uvm_physseg *seg;

        if (__predict_true(uvm.page_init_done == true))
                panic("%s: unload attempted after uvm_page_init()\n", __func__);

        seg = HANDLE_TO_PHYSSEG_NODE(upm);

        if (seg->free_list != freelist) {
                return false;
        }

        /*
         * During cold boot, what we're about to unplug hasn't been
         * put on the uvm freelist, nor has uvmexp.npages been
         * updated. (This happens in uvm_page.c:uvm_page_init())
         *
         * For hotplug, we assume here that the pages being unloaded
         * here are completely out of sight of uvm (ie; not on any uvm
         * lists), and that  uvmexp.npages has been suitably
         * decremented before we're called.
         *
         * XXX: will avail_end == start if avail_start < avail_end?
         */

        /* try from front */
        if (seg->avail_start == seg->start &&
            seg->avail_start < seg->avail_end) {
                *paddrp = ctob(seg->avail_start);
                return uvm_physseg_unplug(seg->avail_start, 1);
        }

        /* try from rear */
        if (seg->avail_end == seg->end &&
            seg->avail_start < seg->avail_end) {
                *paddrp = ctob(seg->avail_end - 1);
                return uvm_physseg_unplug(seg->avail_end - 1, 1);
        }

        return false;
}

bool
uvm_page_physunload_force(uvm_physseg_t upm, int freelist, paddr_t *paddrp)
{
        struct uvm_physseg *seg;

        seg = HANDLE_TO_PHYSSEG_NODE(upm);

        if (__predict_true(uvm.page_init_done == true))
                panic("%s: unload attempted after uvm_page_init()\n", __func__);
        /* any room in this bank? */
        if (seg->avail_start >= seg->avail_end) {
                return false; /* nope */
        }

        *paddrp = ctob(seg->avail_start);

        /* Always unplug from front */
        return uvm_physseg_unplug(seg->avail_start, 1);
}


/*
 * vm_physseg_find: find vm_physseg structure that belongs to a PA
 */
uvm_physseg_t
uvm_physseg_find(paddr_t pframe, psize_t *offp)
{
        struct uvm_physseg * ps = NULL;

        ps = rb_tree_find_node(&(uvm_physseg_graph.rb_tree), &pframe);

        if(ps != NULL && offp != NULL)
                *offp = pframe - ps->start;

        return ps;
}

#else  /* UVM_HOTPLUG */

/*
 * physical memory config is stored in vm_physmem.
 */

#define        VM_PHYSMEM_PTR(i)        (&vm_physmem[i])
#if VM_PHYSSEG_MAX == 1
#define VM_PHYSMEM_PTR_SWAP(i, j) /* impossible */
#else
#define VM_PHYSMEM_PTR_SWAP(i, j)                                              \
        do { vm_physmem[(i)] = vm_physmem[(j)]; } while (0)
#endif

#define                HANDLE_TO_PHYSSEG_NODE(h)        (VM_PHYSMEM_PTR((int)h))
#define                PHYSSEG_NODE_TO_HANDLE(u)        ((int)((vsize_t) (u - vm_physmem) / sizeof(struct uvm_physseg)))

static struct uvm_physseg vm_physmem[VM_PHYSSEG_MAX];        /* XXXCDC: uvm.physmem */
static int vm_nphysseg = 0;                                /* XXXCDC: uvm.nphysseg */
#define        vm_nphysmem        vm_nphysseg

void
uvm_physseg_init(void)
{
        /* XXX: Provisioning for rb_tree related init(s) */
        return;
}

int
uvm_physseg_get_next(uvm_physseg_t lcv)
{
        /* next of invalid is invalid, not fatal */
        if (uvm_physseg_valid_p(lcv) == false)
                return UVM_PHYSSEG_TYPE_INVALID;

        return (lcv + 1);
}

int
uvm_physseg_get_prev(uvm_physseg_t lcv)
{
        /* prev of invalid is invalid, not fatal */
        if (uvm_physseg_valid_p(lcv) == false)
                return UVM_PHYSSEG_TYPE_INVALID;

        return (lcv - 1);
}

int
uvm_physseg_get_last(void)
{
        return (vm_nphysseg - 1);
}

int
uvm_physseg_get_first(void)
{
        return 0;
}

paddr_t
uvm_physseg_get_highest_frame(void)
{
        int lcv;
        paddr_t last = 0;
        struct uvm_physseg *ps;

        for (lcv = 0; lcv < vm_nphysseg; lcv++) {
                ps = VM_PHYSMEM_PTR(lcv);
                if (last < ps->end)
                        last = ps->end;
        }

        return last;
}


static struct vm_page *
uvm_post_preload_check(void)
{
        int preload, lcv;

        /*
         * check to see if this is a "preload" (i.e. uvm_page_init hasn't been
         * called yet, so kmem is not available).
         */

        for (lcv = 0 ; lcv < vm_nphysmem ; lcv++) {
                if (VM_PHYSMEM_PTR(lcv)->pgs)
                        break;
        }
        preload = (lcv == vm_nphysmem);

        /*
         * if VM is already running, attempt to kmem_alloc vm_page structures
         */

        if (!preload) {
                panic("Tried to add RAM after uvm_page_init");
        }

        return NULL;
}

/*
 * uvm_page_physunload: unload physical memory and return it to
 * caller.
 */
bool
uvm_page_physunload(uvm_physseg_t psi, int freelist, paddr_t *paddrp)
{
        int x;
        struct uvm_physseg *seg;

        uvm_post_preload_check();

        seg = VM_PHYSMEM_PTR(psi);

        if (seg->free_list != freelist) {
                return false;
        }

        /* try from front */
        if (seg->avail_start == seg->start &&
            seg->avail_start < seg->avail_end) {
                *paddrp = ctob(seg->avail_start);
                seg->avail_start++;
                seg->start++;
                /* nothing left?   nuke it */
                if (seg->avail_start == seg->end) {
                        if (vm_nphysmem == 1)
                                panic("uvm_page_physget: out of memory!");
                        vm_nphysmem--;
                        for (x = psi ; x < vm_nphysmem ; x++)
                                /* structure copy */
                                VM_PHYSMEM_PTR_SWAP(x, x + 1);
                }
                return (true);
        }

        /* try from rear */
        if (seg->avail_end == seg->end &&
            seg->avail_start < seg->avail_end) {
                *paddrp = ctob(seg->avail_end - 1);
                seg->avail_end--;
                seg->end--;
                /* nothing left?   nuke it */
                if (seg->avail_end == seg->start) {
                        if (vm_nphysmem == 1)
                                panic("uvm_page_physget: out of memory!");
                        vm_nphysmem--;
                        for (x = psi ; x < vm_nphysmem ; x++)
                                /* structure copy */
                                VM_PHYSMEM_PTR_SWAP(x, x + 1);
                }
                return (true);
        }

        return false;
}

bool
uvm_page_physunload_force(uvm_physseg_t psi, int freelist, paddr_t *paddrp)
{
        int x;
        struct uvm_physseg *seg;

        uvm_post_preload_check();

        seg = VM_PHYSMEM_PTR(psi);

        /* any room in this bank? */
        if (seg->avail_start >= seg->avail_end) {
                return false; /* nope */
        }

        *paddrp = ctob(seg->avail_start);
        seg->avail_start++;
        /* truncate! */
        seg->start = seg->avail_start;

        /* nothing left?   nuke it */
        if (seg->avail_start == seg->end) {
                if (vm_nphysmem == 1)
                        panic("uvm_page_physget: out of memory!");
                vm_nphysmem--;
                for (x = psi ; x < vm_nphysmem ; x++)
                        /* structure copy */
                        VM_PHYSMEM_PTR_SWAP(x, x + 1);
        }
        return (true);
}

bool
uvm_physseg_plug(paddr_t pfn, size_t pages, uvm_physseg_t *psp)
{
        int lcv;
        struct vm_page *pgs;
        struct uvm_physseg *ps;

#ifdef DEBUG
        paddr_t off;
        uvm_physseg_t upm;
        upm = uvm_physseg_find(pfn, &off);

        if (uvm_physseg_valid_p(upm)) /* XXX; do we allow "update" plugs ? */
                return false;
#endif

        paddr_t start = pfn;
        paddr_t end = pfn + pages;
        paddr_t avail_start = start;
        paddr_t avail_end = end;

        if (uvmexp.pagesize == 0)
                panic("uvm_page_physload: page size not set!");

        /*
         * do we have room?
         */

        if (vm_nphysmem == VM_PHYSSEG_MAX) {
                printf("uvm_page_physload: unable to load physical memory "
                    "segment\n");
                printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n",
                    VM_PHYSSEG_MAX, (long long)start, (long long)end);
                printf("\tincrease VM_PHYSSEG_MAX\n");
                if (psp != NULL)
                        *psp = UVM_PHYSSEG_TYPE_INVALID_OVERFLOW;
                return false;
        }

        /*
         * check to see if this is a "preload" (i.e. uvm_page_init hasn't been
         * called yet, so kmem is not available).
         */
        pgs = uvm_post_preload_check();

        /*
         * now insert us in the proper place in vm_physmem[]
         */

#if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
        /* random: put it at the end (easy!) */
        ps = VM_PHYSMEM_PTR(vm_nphysmem);
        lcv = vm_nphysmem;
#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
        {
                int x;
                /* sort by address for binary search */
                for (lcv = 0 ; lcv < vm_nphysmem ; lcv++)
                        if (start < VM_PHYSMEM_PTR(lcv)->start)
                                break;
                ps = VM_PHYSMEM_PTR(lcv);
                /* move back other entries, if necessary ... */
                for (x = vm_nphysmem ; x > lcv ; x--)
                        /* structure copy */
                        VM_PHYSMEM_PTR_SWAP(x, x - 1);
        }
#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
        {
                int x;
                /* sort by largest segment first */
                for (lcv = 0 ; lcv < vm_nphysmem ; lcv++)
                        if ((end - start) >
                            (VM_PHYSMEM_PTR(lcv)->end - VM_PHYSMEM_PTR(lcv)->start))
                                break;
                ps = VM_PHYSMEM_PTR(lcv);
                /* move back other entries, if necessary ... */
                for (x = vm_nphysmem ; x > lcv ; x--)
                        /* structure copy */
                        VM_PHYSMEM_PTR_SWAP(x, x - 1);
        }
#else
        panic("uvm_page_physload: unknown physseg strategy selected!");
#endif

        ps->start = start;
        ps->end = end;
        ps->avail_start = avail_start;
        ps->avail_end = avail_end;

        ps->pgs = pgs;

        vm_nphysmem++;

        if (psp != NULL)
                *psp = lcv;

        return true;
}

/*
 * when VM_PHYSSEG_MAX is 1, we can simplify these functions
 */

#if VM_PHYSSEG_MAX == 1
static inline int vm_physseg_find_contig(struct uvm_physseg *, int, paddr_t, psize_t *);
#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
static inline int vm_physseg_find_bsearch(struct uvm_physseg *, int, paddr_t, psize_t *);
#else
static inline int vm_physseg_find_linear(struct uvm_physseg *, int, paddr_t, psize_t *);
#endif

/*
 * vm_physseg_find: find vm_physseg structure that belongs to a PA
 */
int
uvm_physseg_find(paddr_t pframe, psize_t *offp)
{

#if VM_PHYSSEG_MAX == 1
        return vm_physseg_find_contig(vm_physmem, vm_nphysseg, pframe, offp);
#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
        return vm_physseg_find_bsearch(vm_physmem, vm_nphysseg, pframe, offp);
#else
        return vm_physseg_find_linear(vm_physmem, vm_nphysseg, pframe, offp);
#endif
}

#if VM_PHYSSEG_MAX == 1
static inline int
vm_physseg_find_contig(struct uvm_physseg *segs, int nsegs, paddr_t pframe, psize_t *offp)
{

        /* 'contig' case */
        if (pframe >= segs[0].start && pframe < segs[0].end) {
                if (offp)
                        *offp = pframe - segs[0].start;
                return(0);
        }
        return(-1);
}

#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)

static inline int
vm_physseg_find_bsearch(struct uvm_physseg *segs, int nsegs, paddr_t pframe, psize_t *offp)
{
        /* binary search for it */
        int        start, len, guess;

        /*
         * if try is too large (thus target is less than try) we reduce
         * the length to trunc(len/2) [i.e. everything smaller than "try"]
         *
         * if the try is too small (thus target is greater than try) then
         * we set the new start to be (try + 1).   this means we need to
         * reduce the length to (round(len/2) - 1).
         *
         * note "adjust" below which takes advantage of the fact that
         *  (round(len/2) - 1) == trunc((len - 1) / 2)
         * for any value of len we may have
         */

        for (start = 0, len = nsegs ; len != 0 ; len = len / 2) {
                guess = start + (len / 2);        /* try in the middle */

                /* start past our try? */
                if (pframe >= segs[guess].start) {
                        /* was try correct? */
                        if (pframe < segs[guess].end) {
                                if (offp)
                                        *offp = pframe - segs[guess].start;
                                return guess;            /* got it */
                        }
                        start = guess + 1;        /* next time, start here */
                        len--;                        /* "adjust" */
                } else {
                        /*
                         * pframe before try, just reduce length of
                         * region, done in "for" loop
                         */
                }
        }
        return(-1);
}

#else

static inline int
vm_physseg_find_linear(struct uvm_physseg *segs, int nsegs, paddr_t pframe, psize_t *offp)
{
        /* linear search for it */
        int        lcv;

        for (lcv = 0; lcv < nsegs; lcv++) {
                if (pframe >= segs[lcv].start &&
                    pframe < segs[lcv].end) {
                        if (offp)
                                *offp = pframe - segs[lcv].start;
                        return(lcv);                   /* got it */
                }
        }
        return(-1);
}
#endif
#endif /* UVM_HOTPLUG */

bool
uvm_physseg_valid_p(uvm_physseg_t upm)
{
        struct uvm_physseg *ps;

        if (upm == UVM_PHYSSEG_TYPE_INVALID ||
            upm == UVM_PHYSSEG_TYPE_INVALID_EMPTY ||
            upm == UVM_PHYSSEG_TYPE_INVALID_OVERFLOW)
                return false;

        /*
         * This is the delicate init dance -
         * needs to go with the dance.
         */
        if (uvm.page_init_done != true)
                return true;

        ps = HANDLE_TO_PHYSSEG_NODE(upm);

        /* Extra checks needed only post uvm_page_init() */
        if (ps->pgs == NULL)
                return false;

        /* XXX: etc. */

        return true;

}

/*
 * Boot protocol dictates that these must be able to return partially
 * initialised segments.
 */
paddr_t
uvm_physseg_get_start(uvm_physseg_t upm)
{
        if (uvm_physseg_valid_p(upm) == false)
                return (paddr_t) -1;

        return HANDLE_TO_PHYSSEG_NODE(upm)->start;
}

paddr_t
uvm_physseg_get_end(uvm_physseg_t upm)
{
        if (uvm_physseg_valid_p(upm) == false)
                return (paddr_t) -1;

        return HANDLE_TO_PHYSSEG_NODE(upm)->end;
}

paddr_t
uvm_physseg_get_avail_start(uvm_physseg_t upm)
{
        if (uvm_physseg_valid_p(upm) == false)
                return (paddr_t) -1;

        return HANDLE_TO_PHYSSEG_NODE(upm)->avail_start;
}

#if defined(UVM_PHYSSEG_LEGACY)
void
uvm_physseg_set_avail_start(uvm_physseg_t upm, paddr_t avail_start)
{
        struct uvm_physseg *ps = HANDLE_TO_PHYSSEG_NODE(upm);

#if defined(DIAGNOSTIC)
        paddr_t avail_end;
        avail_end = uvm_physseg_get_avail_end(upm);
        KASSERT(uvm_physseg_valid_p(upm));
        KASSERT(avail_start < avail_end && avail_start >= ps->start);
#endif

        ps->avail_start = avail_start;
}

void
uvm_physseg_set_avail_end(uvm_physseg_t upm, paddr_t avail_end)
{
        struct uvm_physseg *ps = HANDLE_TO_PHYSSEG_NODE(upm);

#if defined(DIAGNOSTIC)
        paddr_t avail_start;
        avail_start = uvm_physseg_get_avail_start(upm);
        KASSERT(uvm_physseg_valid_p(upm));
        KASSERT(avail_end > avail_start && avail_end <= ps->end);
#endif

        ps->avail_end = avail_end;
}

#endif /* UVM_PHYSSEG_LEGACY */

paddr_t
uvm_physseg_get_avail_end(uvm_physseg_t upm)
{
        if (uvm_physseg_valid_p(upm) == false)
                return (paddr_t) -1;

        return HANDLE_TO_PHYSSEG_NODE(upm)->avail_end;
}

struct vm_page *
uvm_physseg_get_pg(uvm_physseg_t upm, paddr_t idx)
{
        KASSERT(uvm_physseg_valid_p(upm));
        return &HANDLE_TO_PHYSSEG_NODE(upm)->pgs[idx];
}

#ifdef __HAVE_PMAP_PHYSSEG
struct pmap_physseg *
uvm_physseg_get_pmseg(uvm_physseg_t upm)
{
        KASSERT(uvm_physseg_valid_p(upm));
        return &(HANDLE_TO_PHYSSEG_NODE(upm)->pmseg);
}
#endif

int
uvm_physseg_get_free_list(uvm_physseg_t upm)
{
        KASSERT(uvm_physseg_valid_p(upm));
        return HANDLE_TO_PHYSSEG_NODE(upm)->free_list;
}

u_int
uvm_physseg_get_start_hint(uvm_physseg_t upm)
{
        KASSERT(uvm_physseg_valid_p(upm));
        return HANDLE_TO_PHYSSEG_NODE(upm)->start_hint;
}

bool
uvm_physseg_set_start_hint(uvm_physseg_t upm, u_int start_hint)
{
        if (uvm_physseg_valid_p(upm) == false)
                return false;

        HANDLE_TO_PHYSSEG_NODE(upm)->start_hint = start_hint;
        return true;
}

void
uvm_physseg_init_seg(uvm_physseg_t upm, struct vm_page *pgs)
{
        psize_t i;
        psize_t n;
        paddr_t paddr;
        struct uvm_physseg *seg;
        struct vm_page *pg;

        KASSERT(upm != UVM_PHYSSEG_TYPE_INVALID && pgs != NULL);

        seg = HANDLE_TO_PHYSSEG_NODE(upm);
        KASSERT(seg != NULL);
        KASSERT(seg->pgs == NULL);

        n = seg->end - seg->start;
        seg->pgs = pgs;

        /* init and free vm_pages (we've already zeroed them) */
        paddr = ctob(seg->start);
        for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
                pg = &seg->pgs[i];
                pg->phys_addr = paddr;
#ifdef __HAVE_VM_PAGE_MD
                VM_MDPAGE_INIT(pg);
#endif
                if (atop(paddr) >= seg->avail_start &&
                    atop(paddr) < seg->avail_end) {
                        uvmexp.npages++;
                        /* add page to free pool */
                        uvm_page_set_freelist(pg,
                            uvm_page_lookup_freelist(pg));
                        /* Disable LOCKDEBUG: too many and too early. */
                        mutex_init(&pg->interlock, MUTEX_NODEBUG, IPL_NONE);
                        uvm_pagefree(pg);
                }
        }
}

void
uvm_physseg_seg_chomp_slab(uvm_physseg_t upm, struct vm_page *pgs, size_t n)
{
        struct uvm_physseg *seg = HANDLE_TO_PHYSSEG_NODE(upm);

        /* max number of pre-boot unplug()s allowed */
#define UVM_PHYSSEG_BOOT_UNPLUG_MAX VM_PHYSSEG_MAX

        static char btslab_ex_storage[EXTENT_FIXED_STORAGE_SIZE(UVM_PHYSSEG_BOOT_UNPLUG_MAX)];

        if (__predict_false(uvm.page_init_done == false)) {
                seg->ext = extent_create("Boot time slab", (u_long) pgs, (u_long) (pgs + n),
                    (void *)btslab_ex_storage, sizeof(btslab_ex_storage), 0);
        } else {
                seg->ext = extent_create("Hotplug slab", (u_long) pgs, (u_long) (pgs + n), NULL, 0, 0);
        }

        KASSERT(seg->ext != NULL);

}

struct vm_page *
uvm_physseg_seg_alloc_from_slab(uvm_physseg_t upm, size_t pages)
{
        int err;
        struct uvm_physseg *seg;
        struct vm_page *pgs = NULL;

        KASSERT(pages > 0);

        seg = HANDLE_TO_PHYSSEG_NODE(upm);

        if (__predict_false(seg->ext == NULL)) {
                /*
                 * This is a situation unique to boot time.
                 * It shouldn't happen at any point other than from
                 * the first uvm_page.c:uvm_page_init() call
                 * Since we're in a loop, we can get away with the
                 * below.
                 */
                KASSERT(uvm.page_init_done != true);

                uvm_physseg_t upmp = uvm_physseg_get_prev(upm);
                KASSERT(upmp != UVM_PHYSSEG_TYPE_INVALID);

                seg->ext = HANDLE_TO_PHYSSEG_NODE(upmp)->ext;

                KASSERT(seg->ext != NULL);
        }

        /* We allocate enough for this segment */
        err = extent_alloc(seg->ext, sizeof(*pgs) * pages, 1, 0, EX_BOUNDZERO, (u_long *)&pgs);

        if (err != 0) {
#ifdef DEBUG
                printf("%s: extent_alloc failed with error: %d \n",
                    __func__, err);
#endif
        }

        return pgs;
}

/*
 * uvm_page_physload: load physical memory into VM system
 *
 * => all args are PFs
 * => all pages in start/end get vm_page structures
 * => areas marked by avail_start/avail_end get added to the free page pool
 * => we are limited to VM_PHYSSEG_MAX physical memory segments
 */

uvm_physseg_t
uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start,
    paddr_t avail_end, int free_list)
{
        struct uvm_physseg *ps;
        uvm_physseg_t upm;

        if (__predict_true(uvm.page_init_done == true))
                panic("%s: unload attempted after uvm_page_init()\n", __func__);
        if (uvmexp.pagesize == 0)
                panic("uvm_page_physload: page size not set!");
        if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
                panic("uvm_page_physload: bad free list %d", free_list);
        if (start >= end)
                panic("uvm_page_physload: start[%" PRIxPADDR "] >= end[%"
                    PRIxPADDR "]", start, end);

        if (uvm_physseg_plug(start, end - start, &upm) == false) {
                panic("uvm_physseg_plug() failed at boot.");
                /* NOTREACHED */
                return UVM_PHYSSEG_TYPE_INVALID; /* XXX: correct type */
        }

        ps = HANDLE_TO_PHYSSEG_NODE(upm);

        /* Legacy */
        ps->avail_start = avail_start;
        ps->avail_end = avail_end;

        ps->free_list = free_list; /* XXX: */


        return upm;
}

bool
uvm_physseg_unplug(paddr_t pfn, size_t pages)
{
        uvm_physseg_t upm;
        paddr_t off = 0, start __diagused, end;
        struct uvm_physseg *seg;

        upm = uvm_physseg_find(pfn, &off);

        if (!uvm_physseg_valid_p(upm)) {
                printf("%s: Tried to unplug from unknown offset\n", __func__);
                return false;
        }

        seg = HANDLE_TO_PHYSSEG_NODE(upm);

        start = uvm_physseg_get_start(upm);
        end = uvm_physseg_get_end(upm);

        if (end < (pfn + pages)) {
                printf("%s: Tried to unplug oversized span \n", __func__);
                return false;
        }

        KASSERT(pfn == start + off); /* sanity */

        if (__predict_true(uvm.page_init_done == true)) {
                /* XXX: KASSERT() that seg->pgs[] are not on any uvm lists */
                if (extent_free(seg->ext, (u_long)(seg->pgs + off), sizeof(struct vm_page) * pages, EX_MALLOCOK | EX_NOWAIT) != 0)
                        return false;
        }

        if (off == 0 && (pfn + pages) == end) {
#if defined(UVM_HOTPLUG) /* rbtree implementation */
                int segcount = 0;
                struct uvm_physseg *current_ps;
                /* Complete segment */
                if (uvm_physseg_graph.nentries == 1)
                        panic("%s: out of memory!", __func__);

                if (__predict_true(uvm.page_init_done == true)) {
                        RB_TREE_FOREACH(current_ps, &(uvm_physseg_graph.rb_tree)) {
                                if (seg->ext == current_ps->ext)
                                        segcount++;
                        }
                        KASSERT(segcount > 0);

                        if (segcount == 1) {
                                extent_destroy(seg->ext);
                        }

                        /*
                         * We assume that the unplug will succeed from
                         *  this point onwards
                         */
                        uvmexp.npages -= (int) pages;
                }

                rb_tree_remove_node(&(uvm_physseg_graph.rb_tree), upm);
                memset(seg, 0, sizeof(struct uvm_physseg));
                uvm_physseg_free(seg, sizeof(struct uvm_physseg));
                uvm_physseg_graph.nentries--;
#else /* UVM_HOTPLUG */
                int x;
                if (vm_nphysmem == 1)
                        panic("uvm_page_physget: out of memory!");
                vm_nphysmem--;
                for (x = upm ; x < vm_nphysmem ; x++)
                        /* structure copy */
                        VM_PHYSMEM_PTR_SWAP(x, x + 1);
#endif /* UVM_HOTPLUG */
                /* XXX: KASSERT() that seg->pgs[] are not on any uvm lists */
                return true;
        }

        if (off > 0 &&
            (pfn + pages) < end) {
#if defined(UVM_HOTPLUG) /* rbtree implementation */
                /* middle chunk - need a new segment */
                struct uvm_physseg *ps, *current_ps;
                ps = uvm_physseg_alloc(sizeof (struct uvm_physseg));
                if (ps == NULL) {
                        printf("%s: Unable to allocated new fragment vm_physseg \n",
                            __func__);
                        return false;
                }

                /* Remove middle chunk */
                if (__predict_true(uvm.page_init_done == true)) {
                        KASSERT(seg->ext != NULL);
                        ps->ext = seg->ext;

                        /* XXX: KASSERT() that seg->pgs[] are not on any uvm lists */
                        /*
                         * We assume that the unplug will succeed from
                         *  this point onwards
                         */
                        uvmexp.npages -= (int) pages;
                }

                ps->start = pfn + pages;
                ps->avail_start = ps->start; /* XXX: Legacy */

                ps->end = seg->end;
                ps->avail_end = ps->end; /* XXX: Legacy */

                seg->end = pfn;
                seg->avail_end = seg->end; /* XXX: Legacy */


                /*
                 * The new pgs array points to the beginning of the
                 * tail fragment.
                 */
                if (__predict_true(uvm.page_init_done == true))
                        ps->pgs = seg->pgs + off + pages;

                current_ps = rb_tree_insert_node(&(uvm_physseg_graph.rb_tree), ps);
                if (current_ps != ps) {
                        panic("uvm_page_physload: Duplicate address range detected!");
                }
                uvm_physseg_graph.nentries++;
#else /* UVM_HOTPLUG */
                panic("%s: can't unplug() from the middle of a segment without"
                    " UVM_HOTPLUG\n",  __func__);
                /* NOTREACHED */
#endif /* UVM_HOTPLUG */
                return true;
        }

        if (off == 0 && (pfn + pages) < end) {
                /* Remove front chunk */
                if (__predict_true(uvm.page_init_done == true)) {
                        /* XXX: KASSERT() that seg->pgs[] are not on any uvm lists */
                        /*
                         * We assume that the unplug will succeed from
                         *  this point onwards
                         */
                        uvmexp.npages -= (int) pages;
                }

                /* Truncate */
                seg->start = pfn + pages;
                seg->avail_start = seg->start; /* XXX: Legacy */

                /*
                 * Move the pgs array start to the beginning of the
                 * tail end.
                 */
                if (__predict_true(uvm.page_init_done == true))
                        seg->pgs += pages;

                return true;
        }

        if (off > 0 && (pfn + pages) == end) {
                /* back chunk */


                /* Truncate! */
                seg->end = pfn;
                seg->avail_end = seg->end; /* XXX: Legacy */

                uvmexp.npages -= (int) pages;

                return true;
        }

        printf("%s: Tried to unplug unknown range \n", __func__);

        return false;
}







































































































































































   12 



























































































    4 










  441 









  441 









    1 






















  441 





  440 






















  441 





























































































































































































   37 

    2 

   37 


    2 



    2 






   31 



   28 

    1 

    1 















    4 




    4 



















































    2 


    1 






    1 






























































































    6 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
/*        $NetBSD: subr_disk.c,v 1.134 2022/03/28 12:33:59 riastradh Exp $        */

/*-
 * Copyright (c) 1996, 1997, 1999, 2000, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ufs_disksubr.c        8.5 (Berkeley) 1/21/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_disk.c,v 1.134 2022/03/28 12:33:59 riastradh Exp $");

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/buf.h>
#include <sys/fcntl.h>
#include <sys/syslog.h>
#include <sys/disklabel.h>
#include <sys/disk.h>
#include <sys/sysctl.h>
#include <lib/libkern/libkern.h>

/*
 * Disk error is the preface to plaintive error messages
 * about failing disk transfers.  It prints messages of the form

hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)

 * if the offset of the error in the transfer and a disk label
 * are both available.  blkdone should be -1 if the position of the error
 * is unknown; the disklabel pointer may be null from drivers that have not
 * been converted to use them.  The message is printed with printf
 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
 * The message should be completed (with at least a newline) with printf
 * or addlog, respectively.  There is no trailing space.
 */
#ifndef PRIdaddr
#define PRIdaddr PRId64
#endif
void
diskerr(const struct buf *bp, const char *dname, const char *what, int pri,
    int blkdone, const struct disklabel *lp)
{
        int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev);
        void (*pr)(const char *, ...) __printflike(1, 2);
        char partname = 'a' + part;
        daddr_t sn;

        if (/*CONSTCOND*/0)
                /* Compiler will error this if the format is wrong... */
                printf("%" PRIdaddr, bp->b_blkno);

        if (pri != LOG_PRINTF) {
                static const char fmt[] = "";
                log(pri, fmt);
                pr = addlog;
        } else
                pr = printf;
        (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
            bp->b_flags & B_READ ? "read" : "writ");
        sn = bp->b_blkno;
        if (bp->b_bcount <= DEV_BSIZE)
                (*pr)("%" PRIdaddr, sn);
        else {
                if (blkdone >= 0) {
                        sn += blkdone;
                        (*pr)("%" PRIdaddr " of ", sn);
                }
                (*pr)("%" PRIdaddr "-%" PRIdaddr "", bp->b_blkno,
                    bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
        }
        if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
                sn += lp->d_partitions[part].p_offset;
                (*pr)(" (%s%d bn %" PRIdaddr "; cn %" PRIdaddr "",
                    dname, unit, sn, sn / lp->d_secpercyl);
                sn %= lp->d_secpercyl;
                (*pr)(" tn %" PRIdaddr " sn %" PRIdaddr ")",
                    sn / lp->d_nsectors, sn % lp->d_nsectors);
        }
}

/*
 * Searches the iostatlist for the disk corresponding to the
 * name provided.
 */
struct disk *
disk_find(const char *name)
{
        struct io_stats *stat;

        stat = iostat_find(name);

        if ((stat != NULL) && (stat->io_type == IOSTAT_DISK))
                return stat->io_parent;

        return (NULL);
}

void
disk_init(struct disk *diskp, const char *name, const struct dkdriver *driver)
{
        u_int blocksize = DEV_BSIZE;

        /*
         * Initialize the wedge-related locks and other fields.
         */
        mutex_init(&diskp->dk_rawlock, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&diskp->dk_openlock, MUTEX_DEFAULT, IPL_NONE);
        LIST_INIT(&diskp->dk_wedges);
        diskp->dk_nwedges = 0;
        diskp->dk_labelsector = LABELSECTOR;
        diskp->dk_blkshift = DK_BSIZE2BLKSHIFT(blocksize);
        diskp->dk_byteshift = DK_BSIZE2BYTESHIFT(blocksize);
        diskp->dk_name = name;
        diskp->dk_driver = driver;
}

/*
 * Rename a disk.
 */
void
disk_rename(struct disk *diskp, const char *name)
{

        diskp->dk_name = name;
        iostat_rename(diskp->dk_stats, diskp->dk_name);
}

/*
 * Attach a disk.
 */
void
disk_attach(struct disk *diskp)
{

        /*
         * Allocate and initialize the disklabel structures.
         */
        diskp->dk_label = kmem_zalloc(sizeof(struct disklabel), KM_SLEEP);
        diskp->dk_cpulabel = kmem_zalloc(sizeof(struct cpu_disklabel),
            KM_SLEEP);

        /*
         * Set up the stats collection.
         */
        diskp->dk_stats = iostat_alloc(IOSTAT_DISK, diskp, diskp->dk_name);
}

int
disk_begindetach(struct disk *dk, int (*lastclose)(device_t),
    device_t self, int flags)
{
        int rc;

        rc = 0;
        mutex_enter(&dk->dk_openlock);
        if (dk->dk_openmask == 0)
                ;        /* nothing to do */
        else if ((flags & DETACH_FORCE) == 0)
                rc = EBUSY;
        else if (lastclose != NULL)
                rc = (*lastclose)(self);
        mutex_exit(&dk->dk_openlock);

        return rc;
}

/*
 * Detach a disk.
 */
void
disk_detach(struct disk *diskp)
{

        /*
         * Remove from the drivelist.
         */
        iostat_free(diskp->dk_stats);

        /*
         * Release the disk-info dictionary.
         */
        if (diskp->dk_info) {
                prop_object_release(diskp->dk_info);
                diskp->dk_info = NULL;
        }

        /*
         * Free the space used by the disklabel structures.
         */
        kmem_free(diskp->dk_label, sizeof(*diskp->dk_label));
        kmem_free(diskp->dk_cpulabel, sizeof(*diskp->dk_cpulabel));
}

void
disk_destroy(struct disk *diskp)
{

        mutex_destroy(&diskp->dk_openlock);
        mutex_destroy(&diskp->dk_rawlock);
}

/*
 * Mark the disk as having work queued for metrics collection.
 */
void
disk_wait(struct disk *diskp)
{

        iostat_wait(diskp->dk_stats);
}

/*
 * Mark the disk as busy for metrics collection.
 */
void
disk_busy(struct disk *diskp)
{

        iostat_busy(diskp->dk_stats);
}

/*
 * Finished disk operations, gather metrics.
 */
void
disk_unbusy(struct disk *diskp, long bcount, int read)
{

        iostat_unbusy(diskp->dk_stats, bcount, read);
}

/*
 * Return true if disk has an I/O operation in flight.
 */
bool
disk_isbusy(struct disk *diskp)
{

        return iostat_isbusy(diskp->dk_stats);
}

/*
 * Bounds checking against the media size, used for the raw partition.
 * secsize, mediasize and b_blkno must all be the same units.
 * Possibly this has to be DEV_BSIZE (512).
 */
int
bounds_check_with_mediasize(struct buf *bp, int secsize, uint64_t mediasize)
{
        int64_t sz;

        if (bp->b_blkno < 0) {
                /* Reject negative offsets immediately. */
                bp->b_error = EINVAL;
                return 0;
        }

        sz = howmany((int64_t)bp->b_bcount, secsize);

        /*
         * bp->b_bcount is a 32-bit value, and we rejected a negative
         * bp->b_blkno already, so "bp->b_blkno + sz" cannot overflow.
         */

        if (bp->b_blkno + sz > mediasize) {
                sz = mediasize - bp->b_blkno;
                if (sz == 0) {
                        /* If exactly at end of disk, return EOF. */
                        bp->b_resid = bp->b_bcount;
                        return 0;
                }
                if (sz < 0) {
                        /* If past end of disk, return EINVAL. */
                        bp->b_error = EINVAL;
                        return 0;
                }
                /* Otherwise, truncate request. */
                bp->b_bcount = sz * secsize;
        }

        return 1;
}

/*
 * Determine the size of the transfer, and make sure it is
 * within the boundaries of the partition. Adjust transfer
 * if needed, and signal errors or early completion.
 */
int
bounds_check_with_label(struct disk *dk, struct buf *bp, int wlabel)
{
        struct disklabel *lp = dk->dk_label;
        struct partition *p = lp->d_partitions + DISKPART(bp->b_dev);
        uint64_t p_size, p_offset, labelsector;
        int64_t sz;

        if (bp->b_blkno < 0) {
                /* Reject negative offsets immediately. */
                bp->b_error = EINVAL;
                return -1;
        }

        /* Protect against division by zero. XXX: Should never happen?!?! */
        if ((lp->d_secsize / DEV_BSIZE) == 0 || lp->d_secpercyl == 0) {
                bp->b_error = EINVAL;
                return -1;
        }

        p_size = (uint64_t)p->p_size << dk->dk_blkshift;
        p_offset = (uint64_t)p->p_offset << dk->dk_blkshift;
#if RAW_PART == 3
        labelsector = lp->d_partitions[2].p_offset;
#else
        labelsector = lp->d_partitions[RAW_PART].p_offset;
#endif
        labelsector = (labelsector + dk->dk_labelsector) << dk->dk_blkshift;

        sz = howmany((int64_t)bp->b_bcount, DEV_BSIZE);

        /*
         * bp->b_bcount is a 32-bit value, and we rejected a negative
         * bp->b_blkno already, so "bp->b_blkno + sz" cannot overflow.
         */

        if (bp->b_blkno + sz > p_size) {
                sz = p_size - bp->b_blkno;
                if (sz == 0) {
                        /* If exactly at end of disk, return EOF. */
                        bp->b_resid = bp->b_bcount;
                        return 0;
                }
                if (sz < 0) {
                        /* If past end of disk, return EINVAL. */
                        bp->b_error = EINVAL;
                        return -1;
                }
                /* Otherwise, truncate request. */
                bp->b_bcount = sz << DEV_BSHIFT;
        }

        /* Overwriting disk label? */
        if (bp->b_blkno + p_offset <= labelsector &&
            bp->b_blkno + p_offset + sz > labelsector &&
            (bp->b_flags & B_READ) == 0 && !wlabel) {
                bp->b_error = EROFS;
                return -1;
        }

        /* calculate cylinder for disksort to order transfers with */
        bp->b_cylinder = (bp->b_blkno + p->p_offset) /
            (lp->d_secsize / DEV_BSIZE) / lp->d_secpercyl;
        return 1;
}

int
disk_read_sectors(void (*strat)(struct buf *), const struct disklabel *lp,
    struct buf *bp, unsigned int sector, int count)
{

        if ((lp->d_secsize / DEV_BSIZE) == 0 || lp->d_secpercyl == 0)
                return EINVAL;

        bp->b_blkno = btodb((off_t)sector * lp->d_secsize);
        bp->b_bcount = count * lp->d_secsize;
        bp->b_flags = (bp->b_flags & ~B_WRITE) | B_READ;
        bp->b_oflags &= ~BO_DONE;
        bp->b_cylinder = sector / lp->d_secpercyl;
        (*strat)(bp);
        return biowait(bp);
}

const char *
convertdisklabel(struct disklabel *lp, void (*strat)(struct buf *),
    struct buf *bp, uint32_t secperunit)
{
        struct partition rp, *altp, *p;
        int geom_ok;
        const char *str;

        memset(&rp, 0, sizeof(rp));
        rp.p_size = secperunit;
        rp.p_fstype = FS_UNUSED;

        /* If we can seek to d_secperunit - 1, believe the disk geometry. */
        if (secperunit != 0 &&
            disk_read_sectors(strat, lp, bp, secperunit - 1, 1) == 0)
                geom_ok = 1;
        else
                geom_ok = 0;

#if 0
        printf("%s: secperunit (%" PRIu32 ") %s\n", __func__,
            secperunit, geom_ok ? "ok" : "not ok");
#endif

        p = &lp->d_partitions[RAW_PART];
        if (RAW_PART == 'c' - 'a')
                altp = &lp->d_partitions['d' - 'a'];
        else
                altp = &lp->d_partitions['c' - 'a'];

        if (lp->d_npartitions > RAW_PART && p->p_offset == 0 && p->p_size != 0)
                return NULL;        /* already a raw partition */
        else if (lp->d_npartitions > MAX('c', 'd') - 'a' &&
                 altp->p_offset == 0 && altp->p_size != 0) {
                /* alternate partition ('c' or 'd') is suitable for raw slot,
                 * swap with 'd' or 'c'.
                 */
                rp = *p;
                *p = *altp;
                *altp = rp;
                return NULL;
        } else if (lp->d_npartitions <= RAW_PART &&
                   lp->d_npartitions > 'c' - 'a') {
                /* No raw partition is present, but the alternate is present.
                 * Copy alternate to raw partition.
                 */
                lp->d_npartitions = RAW_PART + 1;
                *p = *altp;
                return NULL;
        } else if (!geom_ok)
                str = "no raw partition and disk reports bad geometry";
        else if (lp->d_npartitions <= RAW_PART) {
                memset(&lp->d_partitions[lp->d_npartitions], 0,
                    sizeof(struct partition) * (RAW_PART - lp->d_npartitions));
                *p = rp;
                lp->d_npartitions = RAW_PART + 1;
                return NULL;
        } else if (lp->d_npartitions < MAXPARTITIONS) {
                memmove(p + 1, p,
                    sizeof(struct partition) * (lp->d_npartitions - RAW_PART));
                *p = rp;
                lp->d_npartitions++;
                return NULL;
        } else
                str = "no raw partition and partition table is full";
#ifdef DIAGNOSTIC
        printf("Bad partition: %s\n", str);
        printf("type = %u, subtype = %u, typename = %s\n",
            lp->d_type, lp->d_subtype, lp->d_typename);
        printf("secsize = %u, nsectors = %u, ntracks = %u\n",
            lp->d_secsize, lp->d_nsectors, lp->d_ntracks);
        printf("ncylinders = %u, secpercyl = %u, secperunit = %u\n",
            lp->d_ncylinders, lp->d_secpercyl, lp->d_secperunit);
        printf("npartitions = %u\n", lp->d_npartitions);

        for (size_t i = 0; i < MIN(lp->d_npartitions, MAXPARTITIONS); i++) {
                p = &lp->d_partitions[i];
                printf("\t%c: offset = %u size = %u fstype = %u\n",
                    (char)(i + 'a'), p->p_offset, p->p_size, p->p_fstype);
        }
#endif                        
        return str;
}

/*
 * disk_ioctl --
 *        Generic disk ioctl handling.
 */
int
disk_ioctl(struct disk *dk, dev_t dev, u_long cmd, void *data, int flag,
    struct lwp *l)
{
        struct dkwedge_info *dkw;
        struct partinfo *pi;
        struct partition *dp;
#ifdef __HAVE_OLD_DISKLABEL
        struct disklabel newlabel;
#endif

        switch (cmd) {
        case DIOCGDISKINFO:
                if (dk->dk_info == NULL)
                        return ENOTSUP;
                return prop_dictionary_copyout_ioctl(data, cmd, dk->dk_info);

        case DIOCGSECTORSIZE:
                *(u_int *)data = dk->dk_geom.dg_secsize;
                return 0;

        case DIOCGMEDIASIZE:
                *(off_t *)data = (off_t)dk->dk_geom.dg_secsize *
                    dk->dk_geom.dg_secperunit;
                return 0;
        default:
                break;
        }

        if (dev == NODEV)
                return EPASSTHROUGH;

        /* The following should be moved to dk_ioctl */
        switch (cmd) {
        case DIOCGDINFO:
                if (dk->dk_label == NULL)
                        return EBUSY;
                memcpy(data, dk->dk_label, sizeof (*dk->dk_label));
                return 0;

#ifdef __HAVE_OLD_DISKLABEL
        case ODIOCGDINFO:
                if (dk->dk_label == NULL)
                        return EBUSY;
                memcpy(&newlabel, dk->dk_label, sizeof(newlabel));
                if (newlabel.d_npartitions > OLDMAXPARTITIONS)
                        return ENOTTY;
                memcpy(data, &newlabel, sizeof(struct olddisklabel));
                return 0;
#endif

        case DIOCGPARTINFO:
                pi = data;
                memset(pi, 0, sizeof(*pi));
                pi->pi_secsize = dk->dk_geom.dg_secsize;
                pi->pi_bsize = MAX(BLKDEV_IOSIZE, pi->pi_secsize);

                if (DISKPART(dev) == RAW_PART) {
                        pi->pi_size = dk->dk_geom.dg_secperunit;
                        return 0;
                }

                if (dk->dk_label == NULL)
                        return EBUSY;

                dp = &dk->dk_label->d_partitions[DISKPART(dev)];
                pi->pi_offset = dp->p_offset;
                pi->pi_size = dp->p_size;

                pi->pi_fstype = dp->p_fstype;
                pi->pi_frag = dp->p_frag;
                pi->pi_fsize = dp->p_fsize;
                pi->pi_cpg = dp->p_cpg;
                
                /*
                 * dholland 20130616: XXX this logic should not be
                 * here. It is here because the old buffer cache
                 * demands that all accesses to the same blocks need
                 * to be the same size; but it only works for FFS and
                 * nowadays I think it'll fail silently if the size
                 * info in the disklabel is wrong. (Or missing.) The
                 * buffer cache needs to be smarter; or failing that
                 * we need a reliable way here to get the right block
                 * size; or a reliable way to guarantee that (a) the
                 * fs is not mounted when we get here and (b) any
                 * buffers generated here will get purged when the fs
                 * does get mounted.
                 */
                if (dp->p_fstype == FS_BSDFFS &&
                    dp->p_frag != 0 && dp->p_fsize != 0)
                        pi->pi_bsize = dp->p_frag * dp->p_fsize;
                return 0;

        case DIOCAWEDGE:
                if ((flag & FWRITE) == 0)
                        return EBADF;

                dkw = data;
                strlcpy(dkw->dkw_parent, dk->dk_name, sizeof(dkw->dkw_parent));
                return dkwedge_add(dkw);

        case DIOCDWEDGE:
                if ((flag & FWRITE) == 0)
                        return EBADF;

                dkw = data;
                strlcpy(dkw->dkw_parent, dk->dk_name, sizeof(dkw->dkw_parent));
                return dkwedge_del(dkw);

        case DIOCLWEDGES:
                return dkwedge_list(dk, data, l);

        case DIOCMWEDGES:
                if ((flag & FWRITE) == 0)
                        return EBADF;

                dkwedge_discover(dk);
                return 0;

        case DIOCRMWEDGES:
                if ((flag & FWRITE) == 0)
                        return EBADF;

                dkwedge_delall(dk);
                return 0;

        default:
                return EPASSTHROUGH;
        }
}

void
disk_set_info(device_t dev, struct disk *dk, const char *type)
{
        struct disk_geom *dg = &dk->dk_geom;

        if (dg->dg_secsize == 0) {
#ifdef DIAGNOSTIC
                printf("%s: fixing 0 sector size\n", dk->dk_name);
#endif
                dg->dg_secsize = DEV_BSIZE;
        }

        dk->dk_blkshift = DK_BSIZE2BLKSHIFT(dg->dg_secsize);
        dk->dk_byteshift = DK_BSIZE2BYTESHIFT(dg->dg_secsize);

        if (dg->dg_secperunit == 0) {
#ifdef DIAGNOSTIC
                if (dg->dg_ncylinders == 0) {
                        printf("%s: secperunit and ncylinders are zero\n",
                            dk->dk_name);
                }
                if (dg->dg_nsectors == 0 || dg->dg_ntracks == 0) {
                        printf("%s: secperunit and (sectors or tracks) "
                            "are zero\n", dk->dk_name);
                }
#endif
                dg->dg_secperunit = (int64_t) dg->dg_nsectors *
                    dg->dg_ntracks * dg->dg_ncylinders;
        }

        if (dg->dg_ncylinders == 0) {
                if (dg->dg_ntracks && dg->dg_nsectors)
                        dg->dg_ncylinders = dg->dg_secperunit /
                            (dg->dg_ntracks * dg->dg_nsectors);
        }

        prop_dictionary_t disk_info, odisk_info, geom;

        disk_info = prop_dictionary_create();
        geom = prop_dictionary_create();

        prop_dictionary_set_uint64(geom, "sectors-per-unit",
            dg->dg_secperunit);

        prop_dictionary_set_uint32(geom, "sector-size", dg->dg_secsize);

        if (dg->dg_nsectors)
                prop_dictionary_set_uint16(geom, "sectors-per-track",
                    dg->dg_nsectors);

        if (dg->dg_ntracks)
                prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
                    dg->dg_ntracks);

        if (dg->dg_ncylinders)
                prop_dictionary_set_uint64(geom, "cylinders-per-unit",
                    dg->dg_ncylinders);

        prop_dictionary_set(disk_info, "geometry", geom);

        if (type)
                prop_dictionary_set_string_nocopy(disk_info, "type", type);

        prop_object_release(geom);

        odisk_info = dk->dk_info;
        dk->dk_info = disk_info;

        if (dev)
                prop_dictionary_set(device_properties(dev), "disk-info",
                    disk_info);

        /*
         * Don't release disk_info here; we keep a reference to it.
         * disk_detach() will release it when we go away.
         */
        if (odisk_info)
                prop_object_release(odisk_info);
}

int
disklabel_dev_unit(dev_t dev)
{

        return DISKUNIT(dev);
}

























































































































































































































































 1786 





























 1597 

 1598 

    3 




  508 


  510 

 1597 













 1623 
 1624 



 1625 







  573 
  574 



  574 











  542 



  542 
  529 







  542 


  542 




  542 



  529 

   37 




  529 
   36 
  541 
  542 
















































































   27 






















   19 
   19 












   27 









  945 

  947 





















 1053 
  890 



  752 


  409 


  750 


  741 

   32 
  751 





  542 

  751 


  751 













   17 

   17 


   16 

   16 
    2 
    2 

    2 



    2 
    2 










    2 



    2 


   16 















































































































































 1421 
 1420 


 1412 
 1415 

 1420 




















  604 





  400 


  400 
  933 










  961 





  962 


  962 










  962 
  962 
    4 



    4 
    1 




    4 






  962 
  961 

  748 








  961 




  962 
  414 








  961 
    9 



    9 














  960 

  960 

   11 





  951 

  622 






  957 














  391 


  957 



  956 

  957 
  957 

    2 

    2 






    2 


    2 


  955 

    2 











  957 




  947 

  955 







   25 
   25 




   25 





   25 
   25 



   24 


  956 

  956 
  957 
  957 




  956 

  956 

    5 









  957 




   32 






  947 
  955 







 1406 


  623 





























 1558 










  173 

  173 

  173 





















   68 

   68 



   68 


   68 





















    9 










    9 



    6 




    4 

















    4 


    7 












    1 



    1 

    2 



    2 
    1 

    1 



    1 










    2 





    2 

    1 



    1 




    1 



    1 

    1 
    1 














    7 

    7 




    7 



    7 








  338 

  355 





















































































  338 

  756 
   11 
  491 

  487 

  488 















  531 






















































   32 



   32 
   32 
   32 




   10 

   32 



   32 
















 1073 
 1071 



 1073 




 1072 
















  502 


  501 











  502 


  501 

  502 











  492 












  493 













  486 




  486 





  486 
  486 
  493 


  206 





  206 

  206 



  206 























  206 
  206 
  206 
  206 

  139 
  206 

























  349 



  347 

  349 













  347 
  347 
  347 
  338 















  338 



  347 

  346 
  347 


































































































































   32 


   33 

















   31 







   33 

















   33 
   24 





   33 











   33 
   33 
   11 








   33 

   33 



   33 
   32 

   32 




   33 



   24 
   24 

   24 

   33 



   32 




















   33 









































































   14 


   12 
   12 

   12 

   12 
   12 















 1210 

 1209 
   91 

 1185 


 1207 
 1207 















































   28 












   28 














   28 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
/*        $NetBSD: vfs_vnode.c,v 1.145 2022/08/05 05:20:39 thorpej Exp $        */

/*-
 * Copyright (c) 1997-2011, 2019, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vfs_subr.c        8.13 (Berkeley) 4/18/94
 */

/*
 * The vnode cache subsystem.
 *
 * Life-cycle
 *
 *        Normally, there are two points where new vnodes are created:
 *        VOP_CREATE(9) and VOP_LOOKUP(9).  The life-cycle of a vnode
 *        starts in one of the following ways:
 *
 *        - Allocation, via vcache_get(9) or vcache_new(9).
 *        - Reclamation of inactive vnode, via vcache_vget(9).
 *
 *        Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9)
 *        was another, traditional way.  Currently, only the draining thread
 *        recycles the vnodes.  This behaviour might be revisited.
 *
 *        The life-cycle ends when the last reference is dropped, usually
 *        in VOP_REMOVE(9).  In such case, VOP_INACTIVE(9) is called to inform
 *        the file system that vnode is inactive.  Via this call, file system
 *        indicates whether vnode can be recycled (usually, it checks its own
 *        references, e.g. count of links, whether the file was removed).
 *
 *        Depending on indication, vnode can be put into a free list (cache),
 *        or cleaned via vcache_reclaim, which calls VOP_RECLAIM(9) to
 *        disassociate underlying file system from the vnode, and finally
 *        destroyed.
 *
 * Vnode state
 *
 *        Vnode is always in one of six states:
 *        - MARKER        This is a marker vnode to help list traversal.  It
 *                        will never change its state.
 *        - LOADING        Vnode is associating underlying file system and not
 *                        yet ready to use.
 *        - LOADED        Vnode has associated underlying file system and is
 *                        ready to use.
 *        - BLOCKED        Vnode is active but cannot get new references.
 *        - RECLAIMING        Vnode is disassociating from the underlying file
 *                        system.
 *        - RECLAIMED        Vnode has disassociated from underlying file system
 *                        and is dead.
 *
 *        Valid state changes are:
 *        LOADING -> LOADED
 *                        Vnode has been initialised in vcache_get() or
 *                        vcache_new() and is ready to use.
 *        BLOCKED -> RECLAIMING
 *                        Vnode starts disassociation from underlying file
 *                        system in vcache_reclaim().
 *        RECLAIMING -> RECLAIMED
 *                        Vnode finished disassociation from underlying file
 *                        system in vcache_reclaim().
 *        LOADED -> BLOCKED
 *                        Either vcache_rekey*() is changing the vnode key or
 *                        vrelel() is about to call VOP_INACTIVE().
 *        BLOCKED -> LOADED
 *                        The block condition is over.
 *        LOADING -> RECLAIMED
 *                        Either vcache_get() or vcache_new() failed to
 *                        associate the underlying file system or vcache_rekey*()
 *                        drops a vnode used as placeholder.
 *
 *        Of these states LOADING, BLOCKED and RECLAIMING are intermediate
 *        and it is possible to wait for state change.
 *
 *        State is protected with v_interlock with one exception:
 *        to change from LOADING both v_interlock and vcache_lock must be held
 *        so it is possible to check "state == LOADING" without holding
 *        v_interlock.  See vcache_get() for details.
 *
 * Reference counting
 *
 *        Vnode is considered active, if reference count (vnode_t::v_usecount)
 *        is non-zero.  It is maintained using: vref(9) and vrele(9), as well
 *        as vput(9), routines.  Common points holding references are e.g.
 *        file openings, current working directory, mount points, etc.  
 *
 *        v_usecount is adjusted with atomic operations, however to change
 *        from a non-zero value to zero the interlock must also be held.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.145 2022/08/05 05:20:39 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_pax.h"
#endif

#include <sys/param.h>
#include <sys/kernel.h>

#include <sys/atomic.h>
#include <sys/buf.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/hash.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/kthread.h>
#include <sys/module.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/pax.h>
#include <sys/syscallargs.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/vnode_impl.h>
#include <sys/wapbl.h>
#include <sys/fstrans.h>

#include <uvm/uvm.h>
#include <uvm/uvm_readahead.h>
#include <uvm/uvm_stat.h>

/* Flags to vrelel. */
#define        VRELEL_ASYNC        0x0001        /* Always defer to vrele thread. */

#define        LRU_VRELE        0
#define        LRU_FREE        1
#define        LRU_HOLD        2
#define        LRU_COUNT        3

/*
 * There are three lru lists: one holds vnodes waiting for async release,
 * one is for vnodes which have no buffer/page references and one for those
 * which do (i.e.  v_holdcnt is non-zero).  We put the lists into a single,
 * private cache line as vnodes migrate between them while under the same
 * lock (vdrain_lock).
 */
u_int                        numvnodes                __cacheline_aligned;
static vnodelst_t        lru_list[LRU_COUNT]        __cacheline_aligned;
static kmutex_t                vdrain_lock                __cacheline_aligned;
static kcondvar_t        vdrain_cv;
static int                vdrain_gen;
static kcondvar_t        vdrain_gen_cv;
static bool                vdrain_retry;
static lwp_t *                vdrain_lwp;
SLIST_HEAD(hashhead, vnode_impl);
static kmutex_t                vcache_lock                __cacheline_aligned;
static kcondvar_t        vcache_cv;
static u_int                vcache_hashsize;
static u_long                vcache_hashmask;
static struct hashhead        *vcache_hashtab;
static pool_cache_t        vcache_pool;
static void                lru_requeue(vnode_t *, vnodelst_t *);
static vnodelst_t *        lru_which(vnode_t *);
static vnode_impl_t *        vcache_alloc(void);
static void                vcache_dealloc(vnode_impl_t *);
static void                vcache_free(vnode_impl_t *);
static void                vcache_init(void);
static void                vcache_reinit(void);
static void                vcache_reclaim(vnode_t *);
static void                vrelel(vnode_t *, int, int);
static void                vdrain_thread(void *);
static void                vnpanic(vnode_t *, const char *, ...)
    __printflike(2, 3);

/* Routines having to do with the management of the vnode table. */
extern struct mount        *dead_rootmount;
extern int                (**dead_vnodeop_p)(void *);
extern int                (**spec_vnodeop_p)(void *);
extern struct vfsops        dead_vfsops;

/*
 * The high bit of v_usecount is a gate for vcache_tryvget().  It's set
 * only when the vnode state is LOADED.
 * The next bit of v_usecount is a flag for vrelel().  It's set
 * from vcache_vget() and vcache_tryvget() whenever the operation succeeds.
 */
#define        VUSECOUNT_MASK        0x3fffffff
#define        VUSECOUNT_GATE        0x80000000
#define        VUSECOUNT_VGET        0x40000000

/*
 * Return the current usecount of a vnode.
 */
inline int
vrefcnt(struct vnode *vp)
{

        return atomic_load_relaxed(&vp->v_usecount) & VUSECOUNT_MASK;
}

/* Vnode state operations and diagnostics. */

#if defined(DIAGNOSTIC)

#define VSTATE_VALID(state) \
        ((state) != VS_ACTIVE && (state) != VS_MARKER)
#define VSTATE_GET(vp) \
        vstate_assert_get((vp), __func__, __LINE__)
#define VSTATE_CHANGE(vp, from, to) \
        vstate_assert_change((vp), (from), (to), __func__, __LINE__)
#define VSTATE_WAIT_STABLE(vp) \
        vstate_assert_wait_stable((vp), __func__, __LINE__)

void
_vstate_assert(vnode_t *vp, enum vnode_state state, const char *func, int line,
    bool has_lock)
{
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
        int refcnt = vrefcnt(vp);

        if (!has_lock) {
                /*
                 * Prevent predictive loads from the CPU, but check the state
                 * without loooking first.
                 *
                 * XXX what does this pair with?
                 */
                membar_enter();
                if (state == VS_ACTIVE && refcnt > 0 &&
                    (vip->vi_state == VS_LOADED || vip->vi_state == VS_BLOCKED))
                        return;
                if (vip->vi_state == state)
                        return;
                mutex_enter((vp)->v_interlock);
        }

        KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line);

        if ((state == VS_ACTIVE && refcnt > 0 &&
            (vip->vi_state == VS_LOADED || vip->vi_state == VS_BLOCKED)) ||
            vip->vi_state == state) {
                if (!has_lock)
                        mutex_exit((vp)->v_interlock);
                return;
        }
        vnpanic(vp, "state is %s, usecount %d, expected %s at %s:%d",
            vstate_name(vip->vi_state), refcnt,
            vstate_name(state), func, line);
}

static enum vnode_state
vstate_assert_get(vnode_t *vp, const char *func, int line)
{
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);

        KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line);
        if (! VSTATE_VALID(vip->vi_state))
                vnpanic(vp, "state is %s at %s:%d",
                    vstate_name(vip->vi_state), func, line);

        return vip->vi_state;
}

static void
vstate_assert_wait_stable(vnode_t *vp, const char *func, int line)
{
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);

        KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line);
        if (! VSTATE_VALID(vip->vi_state))
                vnpanic(vp, "state is %s at %s:%d",
                    vstate_name(vip->vi_state), func, line);

        while (vip->vi_state != VS_LOADED && vip->vi_state != VS_RECLAIMED)
                cv_wait(&vp->v_cv, vp->v_interlock);

        if (! VSTATE_VALID(vip->vi_state))
                vnpanic(vp, "state is %s at %s:%d",
                    vstate_name(vip->vi_state), func, line);
}

static void
vstate_assert_change(vnode_t *vp, enum vnode_state from, enum vnode_state to,
    const char *func, int line)
{
        bool gated = (atomic_load_relaxed(&vp->v_usecount) & VUSECOUNT_GATE);
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);

        KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line);
        if (from == VS_LOADING)
                KASSERTMSG(mutex_owned(&vcache_lock), "at %s:%d", func, line);

        if (! VSTATE_VALID(from))
                vnpanic(vp, "from is %s at %s:%d",
                    vstate_name(from), func, line);
        if (! VSTATE_VALID(to))
                vnpanic(vp, "to is %s at %s:%d",
                    vstate_name(to), func, line);
        if (vip->vi_state != from)
                vnpanic(vp, "from is %s, expected %s at %s:%d\n",
                    vstate_name(vip->vi_state), vstate_name(from), func, line);
        if ((from == VS_LOADED) != gated)
                vnpanic(vp, "state is %s, gate %d does not match at %s:%d\n",
                    vstate_name(vip->vi_state), gated, func, line);

        /* Open/close the gate for vcache_tryvget(). */
        if (to == VS_LOADED) {
#ifndef __HAVE_ATOMIC_AS_MEMBAR
                membar_release();
#endif
                atomic_or_uint(&vp->v_usecount, VUSECOUNT_GATE);
        } else {
                atomic_and_uint(&vp->v_usecount, ~VUSECOUNT_GATE);
        }

        vip->vi_state = to;
        if (from == VS_LOADING)
                cv_broadcast(&vcache_cv);
        if (to == VS_LOADED || to == VS_RECLAIMED)
                cv_broadcast(&vp->v_cv);
}

#else /* defined(DIAGNOSTIC) */

#define VSTATE_GET(vp) \
        (VNODE_TO_VIMPL((vp))->vi_state)
#define VSTATE_CHANGE(vp, from, to) \
        vstate_change((vp), (from), (to))
#define VSTATE_WAIT_STABLE(vp) \
        vstate_wait_stable((vp))
void
_vstate_assert(vnode_t *vp, enum vnode_state state, const char *func, int line,
    bool has_lock)
{

}

static void
vstate_wait_stable(vnode_t *vp)
{
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);

        while (vip->vi_state != VS_LOADED && vip->vi_state != VS_RECLAIMED)
                cv_wait(&vp->v_cv, vp->v_interlock);
}

static void
vstate_change(vnode_t *vp, enum vnode_state from, enum vnode_state to)
{
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);

        /* Open/close the gate for vcache_tryvget(). */
        if (to == VS_LOADED) {
#ifndef __HAVE_ATOMIC_AS_MEMBAR
                membar_release();
#endif
                atomic_or_uint(&vp->v_usecount, VUSECOUNT_GATE);
        } else {
                atomic_and_uint(&vp->v_usecount, ~VUSECOUNT_GATE);
        }

        vip->vi_state = to;
        if (from == VS_LOADING)
                cv_broadcast(&vcache_cv);
        if (to == VS_LOADED || to == VS_RECLAIMED)
                cv_broadcast(&vp->v_cv);
}

#endif /* defined(DIAGNOSTIC) */

void
vfs_vnode_sysinit(void)
{
        int error __diagused, i;

        dead_rootmount = vfs_mountalloc(&dead_vfsops, NULL);
        KASSERT(dead_rootmount != NULL);
        dead_rootmount->mnt_iflag |= IMNT_MPSAFE;

        mutex_init(&vdrain_lock, MUTEX_DEFAULT, IPL_NONE);
        for (i = 0; i < LRU_COUNT; i++) {
                TAILQ_INIT(&lru_list[i]);
        }
        vcache_init();

        cv_init(&vdrain_cv, "vdrain");
        cv_init(&vdrain_gen_cv, "vdrainwt");
        error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread,
            NULL, &vdrain_lwp, "vdrain");
        KASSERTMSG((error == 0), "kthread_create(vdrain) failed: %d", error);
}

/*
 * Allocate a new marker vnode.
 */
vnode_t *
vnalloc_marker(struct mount *mp)
{
        vnode_impl_t *vip;
        vnode_t *vp;

        vip = pool_cache_get(vcache_pool, PR_WAITOK);
        memset(vip, 0, sizeof(*vip));
        vp = VIMPL_TO_VNODE(vip);
        uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 1);
        vp->v_mount = mp;
        vp->v_type = VBAD;
        vp->v_interlock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
        klist_init(&vip->vi_klist.vk_klist);
        vp->v_klist = &vip->vi_klist;
        vip->vi_state = VS_MARKER;

        return vp;
}

/*
 * Free a marker vnode.
 */
void
vnfree_marker(vnode_t *vp)
{
        vnode_impl_t *vip;

        vip = VNODE_TO_VIMPL(vp);
        KASSERT(vip->vi_state == VS_MARKER);
        mutex_obj_free(vp->v_interlock);
        uvm_obj_destroy(&vp->v_uobj, true);
        klist_fini(&vip->vi_klist.vk_klist);
        pool_cache_put(vcache_pool, vip);
}

/*
 * Test a vnode for being a marker vnode.
 */
bool
vnis_marker(vnode_t *vp)
{

        return (VNODE_TO_VIMPL(vp)->vi_state == VS_MARKER);
}

/*
 * Return the lru list this node should be on.
 */
static vnodelst_t *
lru_which(vnode_t *vp)
{

        KASSERT(mutex_owned(vp->v_interlock));

        if (vp->v_holdcnt > 0)
                return &lru_list[LRU_HOLD];
        else
                return &lru_list[LRU_FREE];
}

/*
 * Put vnode to end of given list.
 * Both the current and the new list may be NULL, used on vnode alloc/free.
 * Adjust numvnodes and signal vdrain thread if there is work.
 */
static void
lru_requeue(vnode_t *vp, vnodelst_t *listhd)
{
        vnode_impl_t *vip;
        int d;

        /*
         * If the vnode is on the correct list, and was put there recently,
         * then leave it be, thus avoiding huge cache and lock contention.
         */
        vip = VNODE_TO_VIMPL(vp);
        if (listhd == vip->vi_lrulisthd &&
            (getticks() - vip->vi_lrulisttm) < hz) {
                    return;
        }

        mutex_enter(&vdrain_lock);
        d = 0;
        if (vip->vi_lrulisthd != NULL)
                TAILQ_REMOVE(vip->vi_lrulisthd, vip, vi_lrulist);
        else
                d++;
        vip->vi_lrulisthd = listhd;
        vip->vi_lrulisttm = getticks();
        if (vip->vi_lrulisthd != NULL)
                TAILQ_INSERT_TAIL(vip->vi_lrulisthd, vip, vi_lrulist);
        else
                d--;
        if (d != 0) {
                /*
                 * Looks strange?  This is not a bug.  Don't store
                 * numvnodes unless there is a change - avoid false
                 * sharing on MP.
                 */
                numvnodes += d;
        }
        if ((d > 0 && numvnodes > desiredvnodes) ||
            listhd == &lru_list[LRU_VRELE])
                cv_signal(&vdrain_cv);
        mutex_exit(&vdrain_lock);
}

/*
 * Release deferred vrele vnodes for this mount.
 * Called with file system suspended.
 */
void
vrele_flush(struct mount *mp)
{
        vnode_impl_t *vip, *marker;
        vnode_t *vp;
        int when = 0;

        KASSERT(fstrans_is_owner(mp));

        marker = VNODE_TO_VIMPL(vnalloc_marker(NULL));

        mutex_enter(&vdrain_lock);
        TAILQ_INSERT_HEAD(&lru_list[LRU_VRELE], marker, vi_lrulist);

        while ((vip = TAILQ_NEXT(marker, vi_lrulist))) {
                TAILQ_REMOVE(&lru_list[LRU_VRELE], marker, vi_lrulist);
                TAILQ_INSERT_AFTER(&lru_list[LRU_VRELE], vip, marker,
                    vi_lrulist);
                vp = VIMPL_TO_VNODE(vip);
                if (vnis_marker(vp))
                        continue;

                KASSERT(vip->vi_lrulisthd == &lru_list[LRU_VRELE]);
                TAILQ_REMOVE(vip->vi_lrulisthd, vip, vi_lrulist);
                vip->vi_lrulisthd = &lru_list[LRU_HOLD];
                vip->vi_lrulisttm = getticks();
                TAILQ_INSERT_TAIL(vip->vi_lrulisthd, vip, vi_lrulist);
                mutex_exit(&vdrain_lock);

                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                mutex_enter(vp->v_interlock);
                vrelel(vp, 0, LK_EXCLUSIVE);

                if (getticks() > when) {
                        yield();
                        when = getticks() + hz / 10;
                }

                mutex_enter(&vdrain_lock);
        }

        TAILQ_REMOVE(&lru_list[LRU_VRELE], marker, vi_lrulist);
        mutex_exit(&vdrain_lock);

        vnfree_marker(VIMPL_TO_VNODE(marker));
}

/*
 * Reclaim a cached vnode.  Used from vdrain_thread only.
 */
static __inline void
vdrain_remove(vnode_t *vp)
{
        struct mount *mp;

        KASSERT(mutex_owned(&vdrain_lock));

        /* Probe usecount (unlocked). */
        if (vrefcnt(vp) > 0)
                return;
        /* Try v_interlock -- we lock the wrong direction! */
        if (!mutex_tryenter(vp->v_interlock))
                return;
        /* Probe usecount and state. */
        if (vrefcnt(vp) > 0 || VSTATE_GET(vp) != VS_LOADED) {
                mutex_exit(vp->v_interlock);
                return;
        }
        mp = vp->v_mount;
        if (fstrans_start_nowait(mp) != 0) {
                mutex_exit(vp->v_interlock);
                return;
        }
        vdrain_retry = true;
        mutex_exit(&vdrain_lock);

        if (vcache_vget(vp) == 0) {
                if (!vrecycle(vp)) {
                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                        mutex_enter(vp->v_interlock);
                        vrelel(vp, 0, LK_EXCLUSIVE);
                }
        }
        fstrans_done(mp);

        mutex_enter(&vdrain_lock);
}

/*
 * Release a cached vnode.  Used from vdrain_thread only.
 */
static __inline void
vdrain_vrele(vnode_t *vp)
{
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
        struct mount *mp;

        KASSERT(mutex_owned(&vdrain_lock));

        mp = vp->v_mount;
        if (fstrans_start_nowait(mp) != 0)
                return;

        /*
         * First remove the vnode from the vrele list.
         * Put it on the last lru list, the last vrele()
         * will put it back onto the right list before
         * its usecount reaches zero.
         */
        KASSERT(vip->vi_lrulisthd == &lru_list[LRU_VRELE]);
        TAILQ_REMOVE(vip->vi_lrulisthd, vip, vi_lrulist);
        vip->vi_lrulisthd = &lru_list[LRU_HOLD];
        vip->vi_lrulisttm = getticks();
        TAILQ_INSERT_TAIL(vip->vi_lrulisthd, vip, vi_lrulist);

        vdrain_retry = true;
        mutex_exit(&vdrain_lock);

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        mutex_enter(vp->v_interlock);
        vrelel(vp, 0, LK_EXCLUSIVE);
        fstrans_done(mp);

        mutex_enter(&vdrain_lock);
}

/*
 * Helper thread to keep the number of vnodes below desiredvnodes
 * and release vnodes from asynchronous vrele.
 */
static void
vdrain_thread(void *cookie)
{
        int i;
        u_int target;
        vnode_impl_t *vip, *marker;

        marker = VNODE_TO_VIMPL(vnalloc_marker(NULL));

        mutex_enter(&vdrain_lock);

        for (;;) {
                vdrain_retry = false;
                target = desiredvnodes - desiredvnodes/10;

                for (i = 0; i < LRU_COUNT; i++) {
                        TAILQ_INSERT_HEAD(&lru_list[i], marker, vi_lrulist);
                        while ((vip = TAILQ_NEXT(marker, vi_lrulist))) {
                                TAILQ_REMOVE(&lru_list[i], marker, vi_lrulist);
                                TAILQ_INSERT_AFTER(&lru_list[i], vip, marker,
                                    vi_lrulist);
                                if (vnis_marker(VIMPL_TO_VNODE(vip)))
                                        continue;
                                if (i == LRU_VRELE)
                                        vdrain_vrele(VIMPL_TO_VNODE(vip));
                                else if (numvnodes < target)
                                        break;
                                else
                                        vdrain_remove(VIMPL_TO_VNODE(vip));
                        }
                        TAILQ_REMOVE(&lru_list[i], marker, vi_lrulist);
                }

                if (vdrain_retry) {
                        kpause("vdrainrt", false, 1, &vdrain_lock);
                } else {
                        vdrain_gen++;
                        cv_broadcast(&vdrain_gen_cv);
                        cv_wait(&vdrain_cv, &vdrain_lock);
                }
        }
}

/*
 * Try to drop reference on a vnode.  Abort if we are releasing the
 * last reference.  Note: this _must_ succeed if not the last reference.
 */
static bool
vtryrele(vnode_t *vp)
{
        u_int use, next;

#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_release();
#endif
        for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) {
                if (__predict_false((use & VUSECOUNT_MASK) == 1)) {
                        return false;
                }
                KASSERT((use & VUSECOUNT_MASK) > 1);
                next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
                if (__predict_true(next == use)) {
                        return true;
                }
        }
}

/*
 * vput: unlock and release the reference.
 */
void
vput(vnode_t *vp)
{
        int lktype;

        /*
         * Do an unlocked check of the usecount.  If it looks like we're not
         * about to drop the last reference, then unlock the vnode and try
         * to drop the reference.  If it ends up being the last reference
         * after all, vrelel() can fix it all up.  Most of the time this
         * will all go to plan.
         */
        if (vrefcnt(vp) > 1) {
                VOP_UNLOCK(vp);
                if (vtryrele(vp)) {
                        return;
                }
                lktype = LK_NONE;
        } else {
                lktype = VOP_ISLOCKED(vp);
                KASSERT(lktype != LK_NONE);
        }
        mutex_enter(vp->v_interlock);
        vrelel(vp, 0, lktype);
}

/*
 * Vnode release.  If reference count drops to zero, call inactive
 * routine and either return to freelist or free to the pool.
 */
static void
vrelel(vnode_t *vp, int flags, int lktype)
{
        const bool async = ((flags & VRELEL_ASYNC) != 0);
        bool recycle, defer, objlock_held;
        u_int use, next;
        int error;

        objlock_held = false;

retry:
        KASSERT(mutex_owned(vp->v_interlock));

        if (__predict_false(vp->v_op == dead_vnodeop_p &&
            VSTATE_GET(vp) != VS_RECLAIMED)) {
                vnpanic(vp, "dead but not clean");
        }

        /*
         * If not the last reference, just unlock and drop the reference count.
         *
         * Otherwise make sure we pass a point in time where we hold the
         * last reference with VGET flag unset.
         */
        for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) {
                if (__predict_false((use & VUSECOUNT_MASK) > 1)) {
                        if (objlock_held) {
                                objlock_held = false;
                                rw_exit(vp->v_uobj.vmobjlock);
                        }
                        if (lktype != LK_NONE) {
                                mutex_exit(vp->v_interlock);
                                lktype = LK_NONE;
                                VOP_UNLOCK(vp);
                                mutex_enter(vp->v_interlock);
                        }
                        if (vtryrele(vp)) {
                                mutex_exit(vp->v_interlock);
                                return;
                        }
                        next = atomic_load_relaxed(&vp->v_usecount);
                        continue;
                }
                KASSERT((use & VUSECOUNT_MASK) == 1);
                next = use & ~VUSECOUNT_VGET;
                if (next != use) {
                        next = atomic_cas_uint(&vp->v_usecount, use, next);
                }
                if (__predict_true(next == use)) {
                        break;
                }
        }
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_acquire();
#endif
        if (vrefcnt(vp) <= 0 || vp->v_writecount != 0) {
                vnpanic(vp, "%s: bad ref count", __func__);
        }

#ifdef DIAGNOSTIC
        if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
            vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
                vprint("vrelel: missing VOP_CLOSE()", vp);
        }
#endif

        /*
         * If already clean there is no need to lock, defer or
         * deactivate this node.
         */
        if (VSTATE_GET(vp) == VS_RECLAIMED) {
                if (objlock_held) {
                        objlock_held = false;
                        rw_exit(vp->v_uobj.vmobjlock);
                }
                if (lktype != LK_NONE) {
                        mutex_exit(vp->v_interlock);
                        lktype = LK_NONE;
                        VOP_UNLOCK(vp);
                        mutex_enter(vp->v_interlock);
                }
                goto out;
        }

        /*
         * First try to get the vnode locked for VOP_INACTIVE().
         * Defer vnode release to vdrain_thread if caller requests
         * it explicitly, is the pagedaemon or the lock failed.
         */
        defer = false;
        if ((curlwp == uvm.pagedaemon_lwp) || async) {
                defer = true;
        } else if (lktype == LK_SHARED) {
                /* Excellent chance of getting, if the last ref. */
                error = vn_lock(vp, LK_UPGRADE | LK_RETRY | LK_NOWAIT);
                if (error != 0) {
                        defer = true;
                } else {
                        lktype = LK_EXCLUSIVE;
                }
        } else if (lktype == LK_NONE) {
                /* Excellent chance of getting, if the last ref. */
                error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT);
                if (error != 0) {
                        defer = true;
                } else {
                        lktype = LK_EXCLUSIVE;
                }
        }
        KASSERT(mutex_owned(vp->v_interlock));
        if (defer) {
                /*
                 * Defer reclaim to the kthread; it's not safe to
                 * clean it here.  We donate it our last reference.
                 */
                if (lktype != LK_NONE) {
                        mutex_exit(vp->v_interlock);
                        VOP_UNLOCK(vp);
                        mutex_enter(vp->v_interlock);
                }
                lru_requeue(vp, &lru_list[LRU_VRELE]);
                mutex_exit(vp->v_interlock);
                return;
        }
        KASSERT(lktype == LK_EXCLUSIVE);

        /* If the node gained another reference, retry. */
        use = atomic_load_relaxed(&vp->v_usecount);
        if ((use & VUSECOUNT_VGET) != 0) {
                goto retry;
        }
        KASSERT((use & VUSECOUNT_MASK) == 1);

        if ((vp->v_iflag & (VI_TEXT|VI_EXECMAP|VI_WRMAP)) != 0 ||
            (vp->v_vflag & VV_MAPPED) != 0) {
                /* Take care of space accounting. */
                if (!objlock_held) {
                        objlock_held = true;
                        if (!rw_tryenter(vp->v_uobj.vmobjlock, RW_WRITER)) {
                                mutex_exit(vp->v_interlock);
                                rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
                                mutex_enter(vp->v_interlock);
                                goto retry;
                        }
                }
                if ((vp->v_iflag & VI_EXECMAP) != 0) {
                        cpu_count(CPU_COUNT_EXECPAGES, -vp->v_uobj.uo_npages);
                }
                vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP);
                vp->v_vflag &= ~VV_MAPPED;
        }
        if (objlock_held) {
                objlock_held = false;
                rw_exit(vp->v_uobj.vmobjlock);
        }

        /*
         * Deactivate the vnode, but preserve our reference across
         * the call to VOP_INACTIVE().
         *
         * If VOP_INACTIVE() indicates that the file has been
         * deleted, then recycle the vnode.
         *
         * Note that VOP_INACTIVE() will not drop the vnode lock.
         */
        mutex_exit(vp->v_interlock);
        recycle = false;
        VOP_INACTIVE(vp, &recycle);
        if (!recycle) {
                lktype = LK_NONE;
                VOP_UNLOCK(vp);
        }
        mutex_enter(vp->v_interlock);

        /*
         * Block new references then check again to see if a
         * new reference was acquired in the meantime.  If
         * it was, restore the vnode state and try again.
         */
        if (recycle) {
                VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED);
                use = atomic_load_relaxed(&vp->v_usecount);
                if ((use & VUSECOUNT_VGET) != 0) {
                        VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED);
                        goto retry;
                }
                KASSERT((use & VUSECOUNT_MASK) == 1);
        }

        /*
         * Recycle the vnode if the file is now unused (unlinked).
         */
        if (recycle) {
                VSTATE_ASSERT(vp, VS_BLOCKED);
                KASSERT(lktype == LK_EXCLUSIVE);
                /* vcache_reclaim drops the lock. */
                lktype = LK_NONE;
                vcache_reclaim(vp);
        }
        KASSERT(vrefcnt(vp) > 0);
        KASSERT(lktype == LK_NONE);

out:
        for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) {
                if (__predict_false((use & VUSECOUNT_VGET) != 0 &&
                    (use & VUSECOUNT_MASK) == 1)) {
                        /* Gained and released another reference, retry. */
                        goto retry;
                }
                next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
                if (__predict_true(next == use)) {
                        if (__predict_false((use & VUSECOUNT_MASK) != 1)) {
                                /* Gained another reference. */
                                mutex_exit(vp->v_interlock);
                                return;
                        }
                        break;
                }
        }
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_acquire();
#endif

        if (VSTATE_GET(vp) == VS_RECLAIMED && vp->v_holdcnt == 0) {
                /*
                 * It's clean so destroy it.  It isn't referenced
                 * anywhere since it has been reclaimed.
                 */
                vcache_free(VNODE_TO_VIMPL(vp));
        } else {
                /*
                 * Otherwise, put it back onto the freelist.  It
                 * can't be destroyed while still associated with
                 * a file system.
                 */
                lru_requeue(vp, lru_which(vp));
                mutex_exit(vp->v_interlock);
        }
}

void
vrele(vnode_t *vp)
{

        if (vtryrele(vp)) {
                return;
        }
        mutex_enter(vp->v_interlock);
        vrelel(vp, 0, LK_NONE);
}

/*
 * Asynchronous vnode release, vnode is released in different context.
 */
void
vrele_async(vnode_t *vp)
{

        if (vtryrele(vp)) {
                return;
        }
        mutex_enter(vp->v_interlock);
        vrelel(vp, VRELEL_ASYNC, LK_NONE);
}

/*
 * Vnode reference, where a reference is already held by some other
 * object (for example, a file structure).
 *
 * NB: lockless code sequences may rely on this not blocking.
 */
void
vref(vnode_t *vp)
{

        KASSERT(vrefcnt(vp) > 0);

        atomic_inc_uint(&vp->v_usecount);
}

/*
 * Page or buffer structure gets a reference.
 * Called with v_interlock held.
 */
void
vholdl(vnode_t *vp)
{

        KASSERT(mutex_owned(vp->v_interlock));

        if (vp->v_holdcnt++ == 0 && vrefcnt(vp) == 0)
                lru_requeue(vp, lru_which(vp));
}

/*
 * Page or buffer structure gets a reference.
 */
void
vhold(vnode_t *vp)
{

        mutex_enter(vp->v_interlock);
        vholdl(vp);
        mutex_exit(vp->v_interlock);
}

/*
 * Page or buffer structure frees a reference.
 * Called with v_interlock held.
 */
void
holdrelel(vnode_t *vp)
{

        KASSERT(mutex_owned(vp->v_interlock));

        if (vp->v_holdcnt <= 0) {
                vnpanic(vp, "%s: holdcnt vp %p", __func__, vp);
        }

        vp->v_holdcnt--;
        if (vp->v_holdcnt == 0 && vrefcnt(vp) == 0)
                lru_requeue(vp, lru_which(vp));
}

/*
 * Page or buffer structure frees a reference.
 */
void
holdrele(vnode_t *vp)
{

        mutex_enter(vp->v_interlock);
        holdrelel(vp);
        mutex_exit(vp->v_interlock);
}

/*
 * Recycle an unused vnode if caller holds the last reference.
 */
bool
vrecycle(vnode_t *vp)
{
        int error __diagused;

        mutex_enter(vp->v_interlock);

        /* If the vnode is already clean we're done. */
        VSTATE_WAIT_STABLE(vp);
        if (VSTATE_GET(vp) != VS_LOADED) {
                VSTATE_ASSERT(vp, VS_RECLAIMED);
                vrelel(vp, 0, LK_NONE);
                return true;
        }

        /* Prevent further references until the vnode is locked. */
        VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED);

        /* Make sure we hold the last reference. */
        if (vrefcnt(vp) != 1) {
                VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED);
                mutex_exit(vp->v_interlock);
                return false;
        }

        mutex_exit(vp->v_interlock);

        /*
         * On a leaf file system this lock will always succeed as we hold
         * the last reference and prevent further references.
         * On layered file systems waiting for the lock would open a can of
         * deadlocks as the lower vnodes may have other active references.
         */
        error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT);

        mutex_enter(vp->v_interlock);
        if (error) {
                VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED);
                mutex_exit(vp->v_interlock);
                return false;
        }

        KASSERT(vrefcnt(vp) == 1);
        vcache_reclaim(vp);
        vrelel(vp, 0, LK_NONE);

        return true;
}

/*
 * Helper for vrevoke() to propagate suspension from lastmp
 * to thismp.  Both args may be NULL.
 * Returns the currently suspended file system or NULL.
 */
static struct mount *
vrevoke_suspend_next(struct mount *lastmp, struct mount *thismp)
{
        int error;

        if (lastmp == thismp)
                return thismp;

        if (lastmp != NULL)
                vfs_resume(lastmp);

        if (thismp == NULL)
                return NULL;

        do {
                error = vfs_suspend(thismp, 0);
        } while (error == EINTR || error == ERESTART);

        if (error == 0)
                return thismp;

        KASSERT(error == EOPNOTSUPP || error == ENOENT);
        return NULL;
}

/*
 * Eliminate all activity associated with the requested vnode
 * and with all vnodes aliased to the requested vnode.
 */
void
vrevoke(vnode_t *vp)
{
        struct mount *mp;
        vnode_t *vq;
        enum vtype type;
        dev_t dev;

        KASSERT(vrefcnt(vp) > 0);

        mp = vrevoke_suspend_next(NULL, vp->v_mount);

        mutex_enter(vp->v_interlock);
        VSTATE_WAIT_STABLE(vp);
        if (VSTATE_GET(vp) == VS_RECLAIMED) {
                mutex_exit(vp->v_interlock);
        } else if (vp->v_type != VBLK && vp->v_type != VCHR) {
                atomic_inc_uint(&vp->v_usecount);
                mutex_exit(vp->v_interlock);
                vgone(vp);
        } else {
                dev = vp->v_rdev;
                type = vp->v_type;
                mutex_exit(vp->v_interlock);

                while (spec_node_lookup_by_dev(type, dev, VDEAD_NOWAIT, &vq)
                    == 0) {
                        mp = vrevoke_suspend_next(mp, vq->v_mount);
                        vgone(vq);
                }
        }
        vrevoke_suspend_next(mp, NULL);
}

/*
 * Eliminate all activity associated with a vnode in preparation for
 * reuse.  Drops a reference from the vnode.
 */
void
vgone(vnode_t *vp)
{
        int lktype;

        KASSERT(vp->v_mount == dead_rootmount || fstrans_is_owner(vp->v_mount));

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        lktype = LK_EXCLUSIVE;
        mutex_enter(vp->v_interlock);
        VSTATE_WAIT_STABLE(vp);
        if (VSTATE_GET(vp) == VS_LOADED) { 
                VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED);
                vcache_reclaim(vp);
                lktype = LK_NONE;
        }
        VSTATE_ASSERT(vp, VS_RECLAIMED);
        vrelel(vp, 0, lktype);
}

static inline uint32_t
vcache_hash(const struct vcache_key *key)
{
        uint32_t hash = HASH32_BUF_INIT;

        KASSERT(key->vk_key_len > 0);

        hash = hash32_buf(&key->vk_mount, sizeof(struct mount *), hash);
        hash = hash32_buf(key->vk_key, key->vk_key_len, hash);
        return hash;
}

static int
vcache_stats(struct hashstat_sysctl *hs, bool fill)
{
        vnode_impl_t *vip;
        uint64_t chain;

        strlcpy(hs->hash_name, "vcache", sizeof(hs->hash_name));
        strlcpy(hs->hash_desc, "vnode cache hash", sizeof(hs->hash_desc));
        if (!fill)
                return 0;

        hs->hash_size = vcache_hashmask + 1;

        for (size_t i = 0; i < hs->hash_size; i++) {
                chain = 0;
                mutex_enter(&vcache_lock);
                SLIST_FOREACH(vip, &vcache_hashtab[i], vi_hash) {
                        chain++;
                }
                mutex_exit(&vcache_lock);
                if (chain > 0) {
                        hs->hash_used++;
                        hs->hash_items += chain;
                        if (chain > hs->hash_maxchain)
                                hs->hash_maxchain = chain;
                }
                preempt_point();
        }

        return 0;
}

static void
vcache_init(void)
{

        vcache_pool = pool_cache_init(sizeof(vnode_impl_t), coherency_unit,
            0, 0, "vcachepl", NULL, IPL_NONE, NULL, NULL, NULL);
        KASSERT(vcache_pool != NULL);
        mutex_init(&vcache_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&vcache_cv, "vcache");
        vcache_hashsize = desiredvnodes;
        vcache_hashtab = hashinit(desiredvnodes, HASH_SLIST, true,
            &vcache_hashmask);
        hashstat_register("vcache", vcache_stats);
}

static void
vcache_reinit(void)
{
        int i;
        uint32_t hash;
        u_long oldmask, newmask;
        struct hashhead *oldtab, *newtab;
        vnode_impl_t *vip;

        newtab = hashinit(desiredvnodes, HASH_SLIST, true, &newmask);
        mutex_enter(&vcache_lock);
        oldtab = vcache_hashtab;
        oldmask = vcache_hashmask;
        vcache_hashsize = desiredvnodes;
        vcache_hashtab = newtab;
        vcache_hashmask = newmask;
        for (i = 0; i <= oldmask; i++) {
                while ((vip = SLIST_FIRST(&oldtab[i])) != NULL) {
                        SLIST_REMOVE(&oldtab[i], vip, vnode_impl, vi_hash);
                        hash = vcache_hash(&vip->vi_key);
                        SLIST_INSERT_HEAD(&newtab[hash & vcache_hashmask],
                            vip, vi_hash);
                }
        }
        mutex_exit(&vcache_lock);
        hashdone(oldtab, HASH_SLIST, oldmask);
}

static inline vnode_impl_t *
vcache_hash_lookup(const struct vcache_key *key, uint32_t hash)
{
        struct hashhead *hashp;
        vnode_impl_t *vip;

        KASSERT(mutex_owned(&vcache_lock));

        hashp = &vcache_hashtab[hash & vcache_hashmask];
        SLIST_FOREACH(vip, hashp, vi_hash) {
                if (key->vk_mount != vip->vi_key.vk_mount)
                        continue;
                if (key->vk_key_len != vip->vi_key.vk_key_len)
                        continue;
                if (memcmp(key->vk_key, vip->vi_key.vk_key, key->vk_key_len))
                        continue;
                return vip;
        }
        return NULL;
}

/*
 * Allocate a new, uninitialized vcache node.
 */
static vnode_impl_t *
vcache_alloc(void)
{
        vnode_impl_t *vip;
        vnode_t *vp;

        vip = pool_cache_get(vcache_pool, PR_WAITOK);
        vp = VIMPL_TO_VNODE(vip);
        memset(vip, 0, sizeof(*vip));

        rw_init(&vip->vi_lock);
        vp->v_interlock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);

        uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 1);
        klist_init(&vip->vi_klist.vk_klist);
        vp->v_klist = &vip->vi_klist;
        cv_init(&vp->v_cv, "vnode");
        cache_vnode_init(vp);

        vp->v_usecount = 1;
        vp->v_type = VNON;
        vp->v_size = vp->v_writesize = VSIZENOTSET;

        vip->vi_state = VS_LOADING;

        lru_requeue(vp, &lru_list[LRU_FREE]);

        return vip;
}

/*
 * Deallocate a vcache node in state VS_LOADING.
 *
 * vcache_lock held on entry and released on return.
 */
static void
vcache_dealloc(vnode_impl_t *vip)
{
        vnode_t *vp;

        KASSERT(mutex_owned(&vcache_lock));

        vp = VIMPL_TO_VNODE(vip);
        vfs_ref(dead_rootmount);
        vfs_insmntque(vp, dead_rootmount);
        mutex_enter(vp->v_interlock);
        vp->v_op = dead_vnodeop_p;
        VSTATE_CHANGE(vp, VS_LOADING, VS_RECLAIMED);
        mutex_exit(&vcache_lock);
        vrelel(vp, 0, LK_NONE);
}

/*
 * Free an unused, unreferenced vcache node.
 * v_interlock locked on entry.
 */
static void
vcache_free(vnode_impl_t *vip)
{
        vnode_t *vp;

        vp = VIMPL_TO_VNODE(vip);
        KASSERT(mutex_owned(vp->v_interlock));

        KASSERT(vrefcnt(vp) == 0);
        KASSERT(vp->v_holdcnt == 0);
        KASSERT(vp->v_writecount == 0);
        lru_requeue(vp, NULL);
        mutex_exit(vp->v_interlock);

        vfs_insmntque(vp, NULL);
        if (vp->v_type == VBLK || vp->v_type == VCHR)
                spec_node_destroy(vp);

        mutex_obj_free(vp->v_interlock);
        rw_destroy(&vip->vi_lock);
        uvm_obj_destroy(&vp->v_uobj, true);
        KASSERT(vp->v_klist == &vip->vi_klist);
        klist_fini(&vip->vi_klist.vk_klist);
        cv_destroy(&vp->v_cv);
        cache_vnode_fini(vp);
        pool_cache_put(vcache_pool, vip);
}

/*
 * Try to get an initial reference on this cached vnode.
 * Returns zero on success or EBUSY if the vnode state is not LOADED.
 *
 * NB: lockless code sequences may rely on this not blocking.
 */
int
vcache_tryvget(vnode_t *vp)
{
        u_int use, next;

        for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) {
                if (__predict_false((use & VUSECOUNT_GATE) == 0)) {
                        return EBUSY;
                }
                next = atomic_cas_uint(&vp->v_usecount,
                    use, (use + 1) | VUSECOUNT_VGET);
                if (__predict_true(next == use)) {
#ifndef __HAVE_ATOMIC_AS_MEMBAR
                        membar_acquire();
#endif
                        return 0;
                }
        }
}

/*
 * Try to get an initial reference on this cached vnode.
 * Returns zero on success and  ENOENT if the vnode has been reclaimed.
 * Will wait for the vnode state to be stable.
 *
 * v_interlock locked on entry and unlocked on exit.
 */
int
vcache_vget(vnode_t *vp)
{
        int error;

        KASSERT(mutex_owned(vp->v_interlock));

        /* Increment hold count to prevent vnode from disappearing. */
        vp->v_holdcnt++;
        VSTATE_WAIT_STABLE(vp);
        vp->v_holdcnt--;

        /* If this was the last reference to a reclaimed vnode free it now. */
        if (__predict_false(VSTATE_GET(vp) == VS_RECLAIMED)) {
                if (vp->v_holdcnt == 0 && vrefcnt(vp) == 0)
                        vcache_free(VNODE_TO_VIMPL(vp));
                else
                        mutex_exit(vp->v_interlock);
                return ENOENT;
        }
        VSTATE_ASSERT(vp, VS_LOADED);
        error = vcache_tryvget(vp);
        KASSERT(error == 0);
        mutex_exit(vp->v_interlock);

        return 0;
}

/*
 * Get a vnode / fs node pair by key and return it referenced through vpp.
 */
int
vcache_get(struct mount *mp, const void *key, size_t key_len,
    struct vnode **vpp)
{
        int error;
        uint32_t hash;
        const void *new_key;
        struct vnode *vp;
        struct vcache_key vcache_key;
        vnode_impl_t *vip, *new_vip;

        new_key = NULL;
        *vpp = NULL;

        vcache_key.vk_mount = mp;
        vcache_key.vk_key = key;
        vcache_key.vk_key_len = key_len;
        hash = vcache_hash(&vcache_key);

again:
        mutex_enter(&vcache_lock);
        vip = vcache_hash_lookup(&vcache_key, hash);

        /* If found, take a reference or retry. */
        if (__predict_true(vip != NULL)) {
                /*
                 * If the vnode is loading we cannot take the v_interlock
                 * here as it might change during load (see uvm_obj_setlock()).
                 * As changing state from VS_LOADING requires both vcache_lock
                 * and v_interlock it is safe to test with vcache_lock held.
                 *
                 * Wait for vnodes changing state from VS_LOADING and retry.
                 */
                if (__predict_false(vip->vi_state == VS_LOADING)) {
                        cv_wait(&vcache_cv, &vcache_lock);
                        mutex_exit(&vcache_lock);
                        goto again;
                }
                vp = VIMPL_TO_VNODE(vip);
                mutex_enter(vp->v_interlock);
                mutex_exit(&vcache_lock);
                error = vcache_vget(vp);
                if (error == ENOENT)
                        goto again;
                if (error == 0)
                        *vpp = vp;
                KASSERT((error != 0) == (*vpp == NULL));
                return error;
        }
        mutex_exit(&vcache_lock);

        /* Allocate and initialize a new vcache / vnode pair. */
        error = vfs_busy(mp);
        if (error)
                return error;
        new_vip = vcache_alloc();
        new_vip->vi_key = vcache_key;
        vp = VIMPL_TO_VNODE(new_vip);
        mutex_enter(&vcache_lock);
        vip = vcache_hash_lookup(&vcache_key, hash);
        if (vip == NULL) {
                SLIST_INSERT_HEAD(&vcache_hashtab[hash & vcache_hashmask],
                    new_vip, vi_hash);
                vip = new_vip;
        }

        /* If another thread beat us inserting this node, retry. */
        if (vip != new_vip) {
                vcache_dealloc(new_vip);
                vfs_unbusy(mp);
                goto again;
        }
        mutex_exit(&vcache_lock);

        /* Load the fs node.  Exclusive as new_node is VS_LOADING. */
        error = VFS_LOADVNODE(mp, vp, key, key_len, &new_key);
        if (error) {
                mutex_enter(&vcache_lock);
                SLIST_REMOVE(&vcache_hashtab[hash & vcache_hashmask],
                    new_vip, vnode_impl, vi_hash);
                vcache_dealloc(new_vip);
                vfs_unbusy(mp);
                KASSERT(*vpp == NULL);
                return error;
        }
        KASSERT(new_key != NULL);
        KASSERT(memcmp(key, new_key, key_len) == 0);
        KASSERT(vp->v_op != NULL);
        vfs_insmntque(vp, mp);
        if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
                vp->v_vflag |= VV_MPSAFE;
        vfs_ref(mp);
        vfs_unbusy(mp);

        /* Finished loading, finalize node. */
        mutex_enter(&vcache_lock);
        new_vip->vi_key.vk_key = new_key;
        mutex_enter(vp->v_interlock);
        VSTATE_CHANGE(vp, VS_LOADING, VS_LOADED);
        mutex_exit(vp->v_interlock);
        mutex_exit(&vcache_lock);
        *vpp = vp;
        return 0;
}

/*
 * Create a new vnode / fs node pair and return it referenced through vpp.
 */
int
vcache_new(struct mount *mp, struct vnode *dvp, struct vattr *vap,
    kauth_cred_t cred, void *extra, struct vnode **vpp)
{
        int error;
        uint32_t hash;
        struct vnode *vp, *ovp;
        vnode_impl_t *vip, *ovip;

        *vpp = NULL;

        /* Allocate and initialize a new vcache / vnode pair. */
        error = vfs_busy(mp);
        if (error)
                return error;
        vip = vcache_alloc();
        vip->vi_key.vk_mount = mp;
        vp = VIMPL_TO_VNODE(vip);

        /* Create and load the fs node. */
        error = VFS_NEWVNODE(mp, dvp, vp, vap, cred, extra,
            &vip->vi_key.vk_key_len, &vip->vi_key.vk_key);
        if (error) {
                mutex_enter(&vcache_lock);
                vcache_dealloc(vip);
                vfs_unbusy(mp);
                KASSERT(*vpp == NULL);
                return error;
        }
        KASSERT(vp->v_op != NULL);
        KASSERT((vip->vi_key.vk_key_len == 0) == (mp == dead_rootmount));
        if (vip->vi_key.vk_key_len > 0) {
                KASSERT(vip->vi_key.vk_key != NULL);
                hash = vcache_hash(&vip->vi_key);

                /*
                 * Wait for previous instance to be reclaimed,
                 * then insert new node.
                 */
                mutex_enter(&vcache_lock);
                while ((ovip = vcache_hash_lookup(&vip->vi_key, hash))) {
                        ovp = VIMPL_TO_VNODE(ovip);
                        mutex_enter(ovp->v_interlock);
                        mutex_exit(&vcache_lock);
                        error = vcache_vget(ovp);
                        KASSERT(error == ENOENT);
                        mutex_enter(&vcache_lock);
                }
                SLIST_INSERT_HEAD(&vcache_hashtab[hash & vcache_hashmask],
                    vip, vi_hash);
                mutex_exit(&vcache_lock);
        }
        vfs_insmntque(vp, mp);
        if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
                vp->v_vflag |= VV_MPSAFE;
        vfs_ref(mp);
        vfs_unbusy(mp);

        /* Finished loading, finalize node. */
        mutex_enter(&vcache_lock);
        mutex_enter(vp->v_interlock);
        VSTATE_CHANGE(vp, VS_LOADING, VS_LOADED);
        mutex_exit(&vcache_lock);
        mutex_exit(vp->v_interlock);
        *vpp = vp;
        return 0;
}

/*
 * Prepare key change: update old cache nodes key and lock new cache node.
 * Return an error if the new node already exists.
 */
int
vcache_rekey_enter(struct mount *mp, struct vnode *vp,
    const void *old_key, size_t old_key_len,
    const void *new_key, size_t new_key_len)
{
        uint32_t old_hash, new_hash;
        struct vcache_key old_vcache_key, new_vcache_key;
        vnode_impl_t *vip, *new_vip;

        old_vcache_key.vk_mount = mp;
        old_vcache_key.vk_key = old_key;
        old_vcache_key.vk_key_len = old_key_len;
        old_hash = vcache_hash(&old_vcache_key);

        new_vcache_key.vk_mount = mp;
        new_vcache_key.vk_key = new_key;
        new_vcache_key.vk_key_len = new_key_len;
        new_hash = vcache_hash(&new_vcache_key);

        new_vip = vcache_alloc();
        new_vip->vi_key = new_vcache_key;

        /* Insert locked new node used as placeholder. */
        mutex_enter(&vcache_lock);
        vip = vcache_hash_lookup(&new_vcache_key, new_hash);
        if (vip != NULL) {
                vcache_dealloc(new_vip);
                return EEXIST;
        }
        SLIST_INSERT_HEAD(&vcache_hashtab[new_hash & vcache_hashmask],
            new_vip, vi_hash);

        /* Replace old nodes key with the temporary copy. */
        vip = vcache_hash_lookup(&old_vcache_key, old_hash);
        KASSERT(vip != NULL);
        KASSERT(VIMPL_TO_VNODE(vip) == vp);
        KASSERT(vip->vi_key.vk_key != old_vcache_key.vk_key);
        vip->vi_key = old_vcache_key;
        mutex_exit(&vcache_lock);
        return 0;
}

/*
 * Key change complete: update old node and remove placeholder.
 */
void
vcache_rekey_exit(struct mount *mp, struct vnode *vp,
    const void *old_key, size_t old_key_len,
    const void *new_key, size_t new_key_len)
{
        uint32_t old_hash, new_hash;
        struct vcache_key old_vcache_key, new_vcache_key;
        vnode_impl_t *vip, *new_vip;
        struct vnode *new_vp;

        old_vcache_key.vk_mount = mp;
        old_vcache_key.vk_key = old_key;
        old_vcache_key.vk_key_len = old_key_len;
        old_hash = vcache_hash(&old_vcache_key);

        new_vcache_key.vk_mount = mp;
        new_vcache_key.vk_key = new_key;
        new_vcache_key.vk_key_len = new_key_len;
        new_hash = vcache_hash(&new_vcache_key);

        mutex_enter(&vcache_lock);

        /* Lookup old and new node. */
        vip = vcache_hash_lookup(&old_vcache_key, old_hash);
        KASSERT(vip != NULL);
        KASSERT(VIMPL_TO_VNODE(vip) == vp);

        new_vip = vcache_hash_lookup(&new_vcache_key, new_hash);
        KASSERT(new_vip != NULL);
        KASSERT(new_vip->vi_key.vk_key_len == new_key_len);
        new_vp = VIMPL_TO_VNODE(new_vip);
        mutex_enter(new_vp->v_interlock);
        VSTATE_ASSERT(VIMPL_TO_VNODE(new_vip), VS_LOADING);
        mutex_exit(new_vp->v_interlock);

        /* Rekey old node and put it onto its new hashlist. */
        vip->vi_key = new_vcache_key;
        if (old_hash != new_hash) {
                SLIST_REMOVE(&vcache_hashtab[old_hash & vcache_hashmask],
                    vip, vnode_impl, vi_hash);
                SLIST_INSERT_HEAD(&vcache_hashtab[new_hash & vcache_hashmask],
                    vip, vi_hash);
        }

        /* Remove new node used as placeholder. */
        SLIST_REMOVE(&vcache_hashtab[new_hash & vcache_hashmask],
            new_vip, vnode_impl, vi_hash);
        vcache_dealloc(new_vip);
}

/*
 * Disassociate the underlying file system from a vnode.
 *
 * Must be called with vnode locked and will return unlocked.
 * Must be called with the interlock held, and will return with it held.
 */
static void
vcache_reclaim(vnode_t *vp)
{
        lwp_t *l = curlwp;
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
        struct mount *mp = vp->v_mount;
        uint32_t hash;
        uint8_t temp_buf[64], *temp_key;
        size_t temp_key_len;
        bool recycle;
        int error;

        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
        KASSERT(mutex_owned(vp->v_interlock));
        KASSERT(vrefcnt(vp) != 0);

        temp_key_len = vip->vi_key.vk_key_len;
        /*
         * Prevent the vnode from being recycled or brought into use
         * while we clean it out.
         */
        VSTATE_CHANGE(vp, VS_BLOCKED, VS_RECLAIMING);

        /*
         * Send NOTE_REVOKE now, before we call VOP_RECLAIM(),
         * because VOP_RECLAIM() could cause vp->v_klist to
         * become invalid.  Don't check for interest in NOTE_REVOKE
         * here; it's always posted because it sets EV_EOF.
         *
         * Once it's been posted, reset vp->v_klist to point to
         * our own local storage, in case we were sharing with
         * someone else.
         */
        KNOTE(&vp->v_klist->vk_klist, NOTE_REVOKE);
        vp->v_klist = &vip->vi_klist;
        mutex_exit(vp->v_interlock);

        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
        mutex_enter(vp->v_interlock);
        if ((vp->v_iflag & VI_EXECMAP) != 0) {
                cpu_count(CPU_COUNT_EXECPAGES, -vp->v_uobj.uo_npages);
        }
        vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
        vp->v_iflag |= VI_DEADCHECK; /* for genfs_getpages() */
        mutex_exit(vp->v_interlock);
        rw_exit(vp->v_uobj.vmobjlock);

        /*
         * With vnode state set to reclaiming, purge name cache immediately
         * to prevent new handles on vnode, and wait for existing threads
         * trying to get a handle to notice VS_RECLAIMED status and abort.
         */
        cache_purge(vp);

        /* Replace the vnode key with a temporary copy. */
        if (vip->vi_key.vk_key_len > sizeof(temp_buf)) {
                temp_key = kmem_alloc(temp_key_len, KM_SLEEP);
        } else {
                temp_key = temp_buf;
        }
        if (vip->vi_key.vk_key_len > 0) {
                mutex_enter(&vcache_lock);
                memcpy(temp_key, vip->vi_key.vk_key, temp_key_len);
                vip->vi_key.vk_key = temp_key;
                mutex_exit(&vcache_lock);
        }

        fstrans_start(mp);

        /*
         * Clean out any cached data associated with the vnode.
         */
        error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
        if (error != 0) {
                if (wapbl_vphaswapbl(vp))
                        WAPBL_DISCARD(wapbl_vptomp(vp));
                error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
        }
        KASSERTMSG((error == 0), "vinvalbuf failed: %d", error);
        KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
        if (vp->v_type == VBLK || vp->v_type == VCHR) {
                 spec_node_revoke(vp);
        }

        /*
         * Disassociate the underlying file system from the vnode.
         * VOP_INACTIVE leaves the vnode locked; VOP_RECLAIM unlocks
         * the vnode, and may destroy the vnode so that VOP_UNLOCK
         * would no longer function.
         */
        VOP_INACTIVE(vp, &recycle);
        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
        if (VOP_RECLAIM(vp)) {
                vnpanic(vp, "%s: cannot reclaim", __func__);
        }

        KASSERT(vp->v_data == NULL);
        KASSERT((vp->v_iflag & VI_PAGES) == 0);

        if (vp->v_type == VREG && vp->v_ractx != NULL) {
                uvm_ra_freectx(vp->v_ractx);
                vp->v_ractx = NULL;
        }

        if (vip->vi_key.vk_key_len > 0) {
        /* Remove from vnode cache. */
                hash = vcache_hash(&vip->vi_key);
                mutex_enter(&vcache_lock);
                KASSERT(vip == vcache_hash_lookup(&vip->vi_key, hash));
                SLIST_REMOVE(&vcache_hashtab[hash & vcache_hashmask],
                    vip, vnode_impl, vi_hash);
                mutex_exit(&vcache_lock);
        }
        if (temp_key != temp_buf)
                kmem_free(temp_key, temp_key_len);

        /* Done with purge, notify sleepers of the grim news. */
        mutex_enter(vp->v_interlock);
        vp->v_op = dead_vnodeop_p;
        VSTATE_CHANGE(vp, VS_RECLAIMING, VS_RECLAIMED);
        vp->v_tag = VT_NON;
        mutex_exit(vp->v_interlock);

        /*
         * Move to dead mount.  Must be after changing the operations
         * vector as vnode operations enter the mount before using the
         * operations vector.  See sys/kern/vnode_if.c.
         */
        vp->v_vflag &= ~VV_ROOT;
        vfs_ref(dead_rootmount);
        vfs_insmntque(vp, dead_rootmount);

#ifdef PAX_SEGVGUARD
        pax_segvguard_cleanup(vp);
#endif /* PAX_SEGVGUARD */

        mutex_enter(vp->v_interlock);
        fstrans_done(mp);
        KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
}

/*
 * Disassociate the underlying file system from an open device vnode
 * and make it anonymous.
 *
 * Vnode unlocked on entry, drops a reference to the vnode.
 */
void
vcache_make_anon(vnode_t *vp)
{
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
        uint32_t hash;
        bool recycle;

        KASSERT(vp->v_type == VBLK || vp->v_type == VCHR);
        KASSERT(vp->v_mount == dead_rootmount || fstrans_is_owner(vp->v_mount));
        VSTATE_ASSERT_UNLOCKED(vp, VS_ACTIVE);

        /* Remove from vnode cache. */
        hash = vcache_hash(&vip->vi_key);
        mutex_enter(&vcache_lock);
        KASSERT(vip == vcache_hash_lookup(&vip->vi_key, hash));
        SLIST_REMOVE(&vcache_hashtab[hash & vcache_hashmask],
            vip, vnode_impl, vi_hash);
        vip->vi_key.vk_mount = dead_rootmount;
        vip->vi_key.vk_key_len = 0;
        vip->vi_key.vk_key = NULL;
        mutex_exit(&vcache_lock);

        /*
         * Disassociate the underlying file system from the vnode.
         * VOP_INACTIVE leaves the vnode locked; VOP_RECLAIM unlocks
         * the vnode, and may destroy the vnode so that VOP_UNLOCK
         * would no longer function.
         */
        if (vn_lock(vp, LK_EXCLUSIVE)) {
                vnpanic(vp, "%s: cannot lock", __func__);
        }
        VOP_INACTIVE(vp, &recycle);
        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
        if (VOP_RECLAIM(vp)) {
                vnpanic(vp, "%s: cannot reclaim", __func__);
        }

        /* Purge name cache. */
        cache_purge(vp);

        /* Done with purge, change operations vector. */
        mutex_enter(vp->v_interlock);
        vp->v_op = spec_vnodeop_p;
        vp->v_vflag |= VV_MPSAFE;
        mutex_exit(vp->v_interlock);

        /*
         * Move to dead mount.  Must be after changing the operations
         * vector as vnode operations enter the mount before using the
         * operations vector.  See sys/kern/vnode_if.c.
         */
        vfs_ref(dead_rootmount);
        vfs_insmntque(vp, dead_rootmount);

        vrele(vp);
}

/*
 * Update outstanding I/O count and do wakeup if requested.
 */
void
vwakeup(struct buf *bp)
{
        vnode_t *vp;

        if ((vp = bp->b_vp) == NULL)
                return;

        KASSERT(bp->b_objlock == vp->v_interlock);
        KASSERT(mutex_owned(bp->b_objlock));

        if (--vp->v_numoutput < 0)
                vnpanic(vp, "%s: neg numoutput, vp %p", __func__, vp);
        if (vp->v_numoutput == 0)
                cv_broadcast(&vp->v_cv);
}

/*
 * Test a vnode for being or becoming dead.  Returns one of:
 * EBUSY:  vnode is becoming dead, with "flags == VDEAD_NOWAIT" only.
 * ENOENT: vnode is dead.
 * 0:      otherwise.
 *
 * Whenever this function returns a non-zero value all future
 * calls will also return a non-zero value.
 */
int
vdead_check(struct vnode *vp, int flags)
{

        KASSERT(mutex_owned(vp->v_interlock));

        if (! ISSET(flags, VDEAD_NOWAIT))
                VSTATE_WAIT_STABLE(vp);

        if (VSTATE_GET(vp) == VS_RECLAIMING) {
                KASSERT(ISSET(flags, VDEAD_NOWAIT));
                return EBUSY;
        } else if (VSTATE_GET(vp) == VS_RECLAIMED) {
                return ENOENT;
        }

        return 0;
}

int
vfs_drainvnodes(void)
{
        int i, gen;

        mutex_enter(&vdrain_lock);
        for (i = 0; i < 2; i++) {
                gen = vdrain_gen;
                while (gen == vdrain_gen) {
                        cv_broadcast(&vdrain_cv);
                        cv_wait(&vdrain_gen_cv, &vdrain_lock);
                }
        }
        mutex_exit(&vdrain_lock);

        if (numvnodes >= desiredvnodes)
                return EBUSY;

        if (vcache_hashsize != desiredvnodes)
                vcache_reinit();

        return 0;
}

void
vnpanic(vnode_t *vp, const char *fmt, ...)
{
        va_list ap;

#ifdef DIAGNOSTIC
        vprint(NULL, vp);
#endif
        va_start(ap, fmt);
        vpanic(fmt, ap);
        va_end(ap);
}

void
vshareilock(vnode_t *tvp, vnode_t *fvp)
{
        kmutex_t *oldlock;

        oldlock = tvp->v_interlock;
        mutex_obj_hold(fvp->v_interlock);
        tvp->v_interlock = fvp->v_interlock;
        mutex_obj_free(oldlock);
}

void
vshareklist(vnode_t *tvp, vnode_t *fvp)
{
        /*
         * If two vnodes share klist state, they must also share
         * an interlock.
         */
        KASSERT(tvp->v_interlock == fvp->v_interlock);

        /*
         * We make the following assumptions:
         *
         * ==> Some other synchronization is happening outside of
         *     our view to make this safe.
         *
         * ==> That the "to" vnode will have the necessary references
         *     on the "from" vnode so that the storage for the klist
         *     won't be yanked out from beneath us (the vnode_impl).
         *
         * ==> If "from" is also sharing, we then assume that "from"
         *     has the necessary references, and so on.
         */
        tvp->v_klist = fvp->v_klist;
}




































































































































































































































































































































































































































    3 



    3 
    3 
    2 









    3 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
/*        $NetBSD: usb_quirks.c,v 1.105 2022/03/06 23:36:50 andvar Exp $        */
/*        $FreeBSD: src/sys/dev/usb/usb_quirks.c,v 1.30 2003/01/02 04:15:55 imp Exp $        */

/*
 * Copyright (c) 1998, 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: usb_quirks.c,v 1.105 2022/03/06 23:36:50 andvar Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbhist.h>
#include <dev/usb/usb_quirks.h>

#define DPRINTF(FMT,A,B,C,D)    USBHIST_LOG(usbdebug,FMT,A,B,C,D)

#define ANY 0xffff
#define _USETW(w) { (w) & 0x00ff, ((w) & 0xff00) >> 8 }

/*
 * NXP PN533 NFC chip descriptors
 */
static const usb_endpoint_descriptor_t desc_ep_pn533_in = {
        /* bLength */                sizeof(desc_ep_pn533_in),
        /* bDescriptorType */        UDESC_ENDPOINT,
        /* bEndpointAddress */        UE_DIR_IN | 0x04,
        /* bmAttributes */        UE_BULK,
        /* wMaxPacketSize */        _USETW(0x0040),
        /* bInterval */                0x04, /* 255ms */
};

static const usb_endpoint_descriptor_t desc_ep_pn533_out = {
        /* bLength */                sizeof(desc_ep_pn533_in),
        /* bDescriptorType */        UDESC_ENDPOINT,
        /* bEndpointAddress */        UE_DIR_OUT | 0x04,
        /* bmAttributes */        UE_BULK,
        /* wMaxPacketSize */        _USETW(0x0040),
        /* bInterval */                0x04, /* 255ms */
};

static const usb_interface_descriptor_t desc_iface_pn533 = {
        /* bLength */                sizeof(desc_iface_pn533),
        /* bDescriptorType */         UDESC_INTERFACE,
        /* bInterfaceNumber */         0,
        /* bAlternateSetting */         0,
        /* bNumEndpoints */         2,
        /* bInterfaceClass */         0xff,
        /* bInterfaceSubClass */ 0xff,
        /* bInterfaceProtocol */ 0xff,
        /* iInterface */         0,
};

static const usb_config_descriptor_t desc_conf_pn533 = {
        /* bLength */                 sizeof(desc_conf_pn533),
        /* bDescriptorType */         UDESC_CONFIG,
        /* wTotalLength         */         _USETW(sizeof(desc_conf_pn533) +
                                        sizeof(desc_iface_pn533) +
                                        sizeof(desc_ep_pn533_in) +
                                        sizeof(desc_ep_pn533_out)
                                 ),
        /* bNumInterface */         1,
        /* bConfigurationValue */1,
        /* iConfiguration */         0,
        /* bmAttributes        */         UC_ATTR_MBO,
        /* bMaxPower */                 0x32, /* 100mA */
};

static const usb_descriptor_t *desc_pn533[] = {
        (const usb_descriptor_t *)&desc_conf_pn533,
        (const usb_descriptor_t *)&desc_iface_pn533,
        (const usb_descriptor_t *)&desc_ep_pn533_out,
        (const usb_descriptor_t *)&desc_ep_pn533_in,
        NULL
};


usbd_status
usbd_get_desc_fake(struct usbd_device *dev, int type, int index,
                   int len, void *desc)
{
        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);
#ifdef USB_DEBUG
        const usb_device_descriptor_t *dd = usbd_get_device_descriptor(dev);
#endif
        const usb_descriptor_t *ub;
        int i = 0;
        int j = 0;
        usbd_status err = USBD_INVAL;

        if (dev->ud_quirks == NULL || dev->ud_quirks->desc == NULL) {
                DPRINTF("%04jx/%04j: no fake descriptors",
                        UGETW(dd->idVendor), UGETW(dd->idProduct), 0, 0);
                goto out;
        }

        for (j = 0; dev->ud_quirks->desc[j]; j++) {
                ub = dev->ud_quirks->desc[j];
                if (ub->bDescriptorType == type && i++ == index)
                        break;
        }

        if (dev->ud_quirks->desc[j] == NULL) {
                DPRINTF("%04jx/%04jx: no fake descriptor type = %jd, len = %jd",
                       UGETW(dd->idVendor), UGETW(dd->idProduct), type, len);
                goto out;
        }

        do {
                ub = dev->ud_quirks->desc[j];

                if (ub->bLength > len) {
                        DPRINTF("%04jx/%04jx: short buf len = %jd, bLength = %jd",
                                UGETW(dd->idVendor), UGETW(dd->idProduct),
                                type, ub->bLength);
                        goto out;
                }

                memcpy(desc, ub, ub->bLength);
                DPRINTF("%04jx/%04jx: Use fake descriptor type %jd",
                        UGETW(dd->idVendor), UGETW(dd->idProduct),
                        type, 0);

                desc = (char *)desc + ub->bLength;
                len -= ub->bLength;
                j++;
        } while (len && dev->ud_quirks->desc[j] &&
                 dev->ud_quirks->desc[j]->bDescriptorType != type);

        err = USBD_NORMAL_COMPLETION;

        DPRINTF("%04jx/%04jx: Using fake USB descriptors\n",
                UGETW(dd->idVendor), UGETW(dd->idProduct), 0, 0);
out:
        DPRINTF("return err = %jd", err, 0, 0, 0);
        return err;
}

Static const struct usbd_quirk_entry {
        uint16_t idVendor;
        uint16_t idProduct;
        uint16_t bcdDevice;
        struct usbd_quirks quirks;
} usb_quirks[] = {
 /* Devices which should be ignored by uhid */
 { USB_VENDOR_APC,                USB_PRODUCT_APC_UPS,                        ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_APC,                USB_PRODUCT_APC_UPS3,                        ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_CYBERPOWER,        USB_PRODUCT_CYBERPOWER_UPS0,                ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_CYBERPOWER,        USB_PRODUCT_CYBERPOWER_UPS,                ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_CYBERPOWER,        USB_PRODUCT_CYBERPOWER_UPS2,                ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_GRETAGMACBETH,        ANY,                                        ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_MGE,                USB_PRODUCT_MGE_UPS1,                        ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_MGE,                USB_PRODUCT_MGE_UPS2,                        ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_MICROCHIP,        USB_PRODUCT_MICROCHIP_PICKIT1,                ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_MICROCHIP,        USB_PRODUCT_MICROCHIP_PICKIT2,                ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_MICROCHIP,        USB_PRODUCT_MICROCHIP_PICKIT3,                ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_TRIPPLITE2,        ANY,                                        ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_MISC,                USB_PRODUCT_MISC_WISPY_24X,                ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_WELTREND,        USB_PRODUCT_WELTREND_HID,                ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_SILABS,                USB_PRODUCT_SILABS_EC3,                        ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_TI,                USB_PRODUCT_TI_MSP430,                        ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_XRITE,                ANY,                                        ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_WAYTECH,                USB_PRODUCT_WAYTECH_USB2SERIAL,                ANY,
        { UQ_HID_IGNORE, NULL }},
 { USB_VENDOR_KYE,                USB_PRODUCT_KYE_NICHE,                        0x100,
        { UQ_NO_SET_PROTO, NULL }},
 { USB_VENDOR_INSIDEOUT,        USB_PRODUCT_INSIDEOUT_EDGEPORT4,        0x094,
        { UQ_SWAP_UNICODE, NULL }},
 { USB_VENDOR_DALLAS,                USB_PRODUCT_DALLAS_J6502,                0x0a2,
        { UQ_BAD_ADC, NULL }},
 { USB_VENDOR_DALLAS,                USB_PRODUCT_DALLAS_J6502,                0x0a2,
        { UQ_AU_NO_XU, NULL }},
 { USB_VENDOR_ALTEC,                USB_PRODUCT_ALTEC_ADA70,                0x103,
        { UQ_BAD_ADC, NULL }},
 { USB_VENDOR_ALTEC,                USB_PRODUCT_ALTEC_ASC495,                0x000,
        { UQ_BAD_AUDIO, NULL }},
 { USB_VENDOR_SONY,                USB_PRODUCT_SONY_PS2EYETOY4,                0x000,
        { UQ_BAD_AUDIO, NULL }},
 { USB_VENDOR_SONY,                USB_PRODUCT_SONY_PS2EYETOY5,                0x000,
        { UQ_BAD_AUDIO, NULL }},
 { USB_VENDOR_PHILIPS,                USB_PRODUCT_PHILIPS_PCVC740K,                ANY,
        { UQ_BAD_AUDIO, NULL }},
 { USB_VENDOR_LOGITECH,                USB_PRODUCT_LOGITECH_QUICKCAMPRONB,        0x000,
        { UQ_BAD_AUDIO, NULL }},
 { USB_VENDOR_LOGITECH,                USB_PRODUCT_LOGITECH_QUICKCAMPRO4K,        0x000,
        { UQ_BAD_AUDIO, NULL }},
 { USB_VENDOR_LOGITECH,                USB_PRODUCT_LOGITECH_QUICKCAMMESS,        0x100,
        { UQ_BAD_ADC, NULL }},
 { USB_VENDOR_QTRONIX,                USB_PRODUCT_QTRONIX_980N,                0x110,
        { UQ_SPUR_BUT_UP, NULL }},
 { USB_VENDOR_ALCOR2,                USB_PRODUCT_ALCOR2_KBD_HUB,                0x001,
        { UQ_SPUR_BUT_UP, NULL }},
 { USB_VENDOR_METRICOM,                USB_PRODUCT_METRICOM_RICOCHET_GS,        0x100,
        { UQ_ASSUME_CM_OVER_DATA, NULL }},
 { USB_VENDOR_SANYO,                USB_PRODUCT_SANYO_SCP4900,                0x000,
        { UQ_ASSUME_CM_OVER_DATA, NULL }},
 { USB_VENDOR_MOTOROLA2,        USB_PRODUCT_MOTOROLA2_T720C,                0x001,
        { UQ_ASSUME_CM_OVER_DATA, NULL }},
 { USB_VENDOR_EICON,                USB_PRODUCT_EICON_DIVA852,                0x100,
        { UQ_ASSUME_CM_OVER_DATA, NULL }},
 { USB_VENDOR_SIEMENS2,                USB_PRODUCT_SIEMENS2_MC75,                0x000,
        { UQ_ASSUME_CM_OVER_DATA, NULL }},
 { USB_VENDOR_TELEX,                USB_PRODUCT_TELEX_MIC1,                        0x009,
        { UQ_AU_NO_FRAC, NULL }},
 { USB_VENDOR_SILICONPORTALS,        USB_PRODUCT_SILICONPORTALS_YAPPHONE,        0x100,
        { UQ_AU_INP_ASYNC, NULL }},
 { USB_VENDOR_AVANCELOGIC,        USB_PRODUCT_AVANCELOGIC_USBAUDIO,        0x101,
        { UQ_AU_INP_ASYNC, NULL }},
 { USB_VENDOR_PLANTRONICS,        USB_PRODUCT_PLANTRONICS_HEADSET,        0x004,
        { UQ_AU_INP_ASYNC, NULL }},
 { USB_VENDOR_CMEDIA,                USB_PRODUCT_CMEDIA_USBAUDIO,                ANY,
        { UQ_AU_INP_ASYNC, NULL }},

 /* XXX These should have a revision number, but I don't know what they are. */
 { USB_VENDOR_HP,                USB_PRODUCT_HP_895C,                        ANY,
        { UQ_BROKEN_BIDIR, NULL }},
 { USB_VENDOR_HP,                USB_PRODUCT_HP_880C,                        ANY,
        { UQ_BROKEN_BIDIR, NULL }},
 { USB_VENDOR_HP,                USB_PRODUCT_HP_815C,                        ANY,
        { UQ_BROKEN_BIDIR, NULL }},
 { USB_VENDOR_HP,                USB_PRODUCT_HP_810C,                        ANY,
        { UQ_BROKEN_BIDIR, NULL }},
 { USB_VENDOR_HP,                USB_PRODUCT_HP_830C,                        ANY,
        { UQ_BROKEN_BIDIR, NULL }},
 { USB_VENDOR_HP,                USB_PRODUCT_HP_885C,                        ANY,
        { UQ_BROKEN_BIDIR, NULL }},
 { USB_VENDOR_HP,                USB_PRODUCT_HP_840C,                        ANY,
        { UQ_BROKEN_BIDIR, NULL }},
 { USB_VENDOR_HP,                USB_PRODUCT_HP_816C,                        ANY,
        { UQ_BROKEN_BIDIR, NULL }},
 { USB_VENDOR_HP,                USB_PRODUCT_HP_959C,                        ANY,
        { UQ_BROKEN_BIDIR, NULL }},
 { USB_VENDOR_MTK,                USB_PRODUCT_MTK_GPS_RECEIVER,                ANY,
        { UQ_NO_UNION_NRM, NULL }},
 { USB_VENDOR_NEC,                USB_PRODUCT_NEC_PICTY900,                ANY,
        { UQ_BROKEN_BIDIR, NULL }},
 { USB_VENDOR_NEC,                USB_PRODUCT_NEC_PICTY760,                ANY,
        { UQ_BROKEN_BIDIR, NULL }},
 { USB_VENDOR_NEC,                USB_PRODUCT_NEC_PICTY920,                ANY,
        { UQ_BROKEN_BIDIR, NULL }},
 { USB_VENDOR_NEC,                USB_PRODUCT_NEC_PICTY800,                ANY,
        { UQ_BROKEN_BIDIR, NULL }},
 { USB_VENDOR_HP,                USB_PRODUCT_HP_1220C,                        ANY,
        { UQ_BROKEN_BIDIR, NULL }},

 /* Apple internal notebook ISO keyboards have swapped keys */
 { USB_VENDOR_APPLE,                USB_PRODUCT_APPLE_FOUNTAIN_ISO,                ANY,
        { UQ_APPLE_ISO, NULL }},
 { USB_VENDOR_APPLE,                USB_PRODUCT_APPLE_GEYSER_ISO,                ANY,
        { UQ_APPLE_ISO, NULL }},

 /* HID and audio are both invalid on iPhone/iPod Touch */
 { USB_VENDOR_APPLE,                USB_PRODUCT_APPLE_IPHONE,                ANY,
        { UQ_HID_IGNORE | UQ_BAD_AUDIO, NULL }},
 { USB_VENDOR_APPLE,                USB_PRODUCT_APPLE_IPOD_TOUCH,                ANY,
        { UQ_HID_IGNORE | UQ_BAD_AUDIO, NULL }},
 { USB_VENDOR_APPLE,                USB_PRODUCT_APPLE_IPOD_TOUCH_4G,        ANY,
        { UQ_HID_IGNORE | UQ_BAD_AUDIO, NULL }},
 { USB_VENDOR_APPLE,                USB_PRODUCT_APPLE_IPHONE_3G,                ANY,
        { UQ_HID_IGNORE | UQ_BAD_AUDIO, NULL }},
 { USB_VENDOR_APPLE,                USB_PRODUCT_APPLE_IPHONE_3GS,                ANY,
        { UQ_HID_IGNORE | UQ_BAD_AUDIO, NULL }},

 /*
  * Various devices using serial boot loader protocol, as supported
  * by pkgsrc/sysutils/imx_usb_loader
  */
 { 0x066f,                        0x3780,                /* mx23 */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x15a2,                        0x004f,                /* mx28 */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x15a2,                        0x0052,                /* mx50 */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x15a2,                        0x0054,                /* mx6 */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x15a2,                        0x0061,                /* mx6 */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x15a2,                        0x0063,                /* mx6 */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x15a2,                        0x0071,                /* mx6 */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x15a2,                        0x007d,                /* mx6 */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x15a2,                        0x0080,                /* mx6ull */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x1fc9,                        0x0128,                /* mx6 */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x15a2,                        0x0076,                /* mx7 */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x1fc9,                        0x0126,                /* mx7ulp */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x15a2,                        0x0041,                /* mx51 */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x15a2,                        0x004e,                /* mx53 */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x15a2,                        0x006a,                /* vybrid */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x066f,                        0x37ff,                /* linux_gadget */        ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x1b67,                        0x4fff,                /* mx6 */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x0525,                        0xb4a4,                /* mx6 */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x1fc9,                        0x012b,                /* mx8mq */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x1fc9,                        0x0134,                /* mx8mm */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x1fc9,                        0x013e,                /* mx8mn */                ANY,
        { UQ_HID_IGNORE, NULL }},
 { 0x3016,                        0x1001,         /* mx8mn */                ANY,
        { UQ_HID_IGNORE, NULL }},

 { USB_VENDOR_LG,                USB_PRODUCT_LG_CDMA_MSM,                ANY,
        { UQ_ASSUME_CM_OVER_DATA, NULL }},
 { USB_VENDOR_QUALCOMM2,        USB_PRODUCT_QUALCOMM2_CDMA_MSM,                ANY,
        { UQ_ASSUME_CM_OVER_DATA, NULL }},
 { USB_VENDOR_HYUNDAI,                USB_PRODUCT_HYUNDAI_UM175,                ANY,
        { UQ_ASSUME_CM_OVER_DATA, NULL }},
 { USB_VENDOR_ZOOM,                USB_PRODUCT_ZOOM_3095,                        ANY,
        { UQ_LOST_CS_DESC, NULL }},

 /*
  * NXP PN533 bugs
  * 
  * 1. It corrupts its USB descriptors. The quirk is to provide hardcoded
  *    descriptors instead of getting them from the device.
  * 2. It mishandles the USB toggle bit. This causes some replies to be
  *    filtered out by the USB host controller and be reported as timed out.
  *    NFC tool's libnfc workaround this bug by sending a dummy frame to
  *    resync the toggle bit, but in order to succeed, that operation must
  *    not be reported as failed. The quirk is therefore to pretend to 
  *    userland that output timeouts are successes.
  */
 { USB_VENDOR_PHILIPSSEMI,        USB_PRODUCT_PHILIPSSEMI_PN533,                ANY,
        { UQ_DESC_CORRUPT | UQ_MISS_OUT_ACK, desc_pn533 }},
 { USB_VENDOR_SHUTTLE,                USB_PRODUCT_SHUTTLE_SCL3711,                ANY,
        { UQ_DESC_CORRUPT | UQ_MISS_OUT_ACK, desc_pn533 }},
 { USB_VENDOR_SHUTTLE,                USB_PRODUCT_SHUTTLE_SCL3712,                ANY,
        { UQ_DESC_CORRUPT | UQ_MISS_OUT_ACK, desc_pn533 }},

/*
 * These cheap mice will disconnect after 60 seconds,
 * reconnect, and then disconnect again (ad nauseum)
 * unless it's kept open.
 */
 { USB_VENDOR_CHICONY,                USB_PRODUCT_CHICONY_OPTMOUSE0939,        ANY,
        { UQ_ALWAYS_ON, NULL }},
 { USB_VENDOR_PIXART,                USB_PRODUCT_PIXART_RPIMOUSE,                ANY,
        { UQ_ALWAYS_ON, NULL }},
 { USB_VENDOR_LOGITECH,         USB_PRODUCT_LOGITECH_B100,                ANY,
        { UQ_ALWAYS_ON, NULL }},
/*
 * The HAILUCK USB Keyboard has a built-in touchpad, which
 * needs to be active for the keyboard to function properly.
 */
 { USB_VENDOR_HAILUCK,                USB_PRODUCT_HAILUCK_KEYBOARD,                ANY,
        { UQ_ALWAYS_ON, NULL }},
 
 { 0, 0, 0, { 0, NULL } }
};

const struct usbd_quirks usbd_no_quirk = { 0 };

const struct usbd_quirks *
usbd_find_quirk(usb_device_descriptor_t *d)
{
        const struct usbd_quirk_entry *t;
        uint16_t vendor = UGETW(d->idVendor);
        uint16_t product = UGETW(d->idProduct);
        uint16_t revision = UGETW(d->bcdDevice);

        for (t = usb_quirks; t->idVendor != 0; t++) {
                if (t->idVendor == vendor &&
                    (t->idProduct == ANY || t->idProduct == product) &&
                    (t->bcdDevice == ANY || t->bcdDevice == revision))
                        break;
        }
#ifdef USB_DEBUG
        if (usbdebug && t->quirks.uq_flags)
                printf("usbd_find_quirk 0x%04x/0x%04x/%x: %d\n",
                          UGETW(d->idVendor), UGETW(d->idProduct),
                          UGETW(d->bcdDevice), t->quirks.uq_flags);
#endif
        return &t->quirks;
}






























































































































































    2 


    1 


    2 


    2 












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
/*        $NetBSD: uhidev.c,v 1.93 2022/03/28 12:44:45 riastradh Exp $        */

/*
 * Copyright (c) 2001, 2012 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology and Matthew R. Green (mrg@eterna.com.au).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * HID spec: http://www.usb.org/developers/devclass_docs/HID1_11.pdf
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uhidev.c,v 1.93 2022/03/28 12:44:45 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/types.h>

#include <sys/atomic.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/lwp.h>
#include <sys/rndsource.h>
#include <sys/signalvar.h>
#include <sys/systm.h>
#include <sys/xcall.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbhid.h>

#include <dev/usb/usbdevs.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usb_quirks.h>

#include <dev/usb/uhidev.h>
#include <dev/hid/hid.h>

/* Report descriptor for broken Wacom Graphire */
#include <dev/usb/ugraphire_rdesc.h>
/* Report descriptor for game controllers in "XInput" mode */
#include <dev/usb/xinput_rdesc.h>
/* Report descriptor for Xbox One controllers */
#include <dev/usb/x1input_rdesc.h>

#include "locators.h"

struct uhidev_softc {
        device_t sc_dev;                /* base device */
        struct usbd_device *sc_udev;
        struct usbd_interface *sc_iface;        /* interface */
        int sc_iep_addr;
        int sc_oep_addr;
        u_int sc_isize;

        int sc_repdesc_size;
        void *sc_repdesc;

        u_int sc_nrepid;
        struct uhidev {
                struct uhidev_softc *sc_parent;
                device_t        sc_dev;
                void                (*sc_intr)(void *, void *, u_int);
                void                *sc_cookie;
                krndsource_t        sc_rndsource;
                int                sc_in_rep_size;
                uint8_t                sc_report_id;
                uint8_t                sc_state;
#define        UHIDEV_OPEN        0x01        /* device is open */
#define        UHIDEV_STOPPED        0x02        /* xfers are stopped */
        } *sc_subdevs;

        kmutex_t sc_lock;
        kcondvar_t sc_cv;

        /* Read/written under sc_lock.  */
        struct lwp *sc_writelock;
        struct lwp *sc_configlock;
        int sc_refcnt;
        int sc_writereportid;
        int sc_stopreportid;

        /*
         * - Read under sc_lock, provided sc_refcnt > 0.
         * - Written under sc_configlock only when transitioning to and
         *   from sc_refcnt = 0.
         */
        u_char *sc_ibuf;
        struct usbd_pipe *sc_ipipe;        /* input interrupt pipe */
        struct usbd_pipe *sc_opipe;        /* output interrupt pipe */
        struct usbd_xfer *sc_oxfer;        /* write request */
        usbd_callback sc_writecallback;        /* async write request callback */
        void *sc_writecookie;

        u_int sc_flags;
#define UHIDEV_F_XB1        0x0001        /* Xbox 1 controller */
};

#ifdef UHIDEV_DEBUG
#define DPRINTF(x)        if (uhidevdebug) printf x
#define DPRINTFN(n,x)        if (uhidevdebug>(n)) printf x
int        uhidevdebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

static void uhidev_intr(struct usbd_xfer *, void *, usbd_status);

static int uhidev_maxrepid(void *, int);
static int uhidevprint(void *, const char *);

static int uhidev_match(device_t, cfdata_t, void *);
static void uhidev_attach(device_t, device_t, void *);
static void uhidev_childdet(device_t, device_t);
static int uhidev_detach(device_t, int);

CFATTACH_DECL2_NEW(uhidev, sizeof(struct uhidev_softc), uhidev_match,
    uhidev_attach, uhidev_detach, NULL, NULL, uhidev_childdet);

static int
uhidev_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;

        /* Game controllers in "XInput" mode */
        if (USBIF_IS_XINPUT(uiaa))
                return UMATCH_IFACECLASS_IFACESUBCLASS_IFACEPROTO;
        /* Xbox One controllers */
        if (USBIF_IS_X1INPUT(uiaa) && uiaa->uiaa_ifaceno == 0)
                return UMATCH_IFACECLASS_IFACESUBCLASS_IFACEPROTO;

        if (uiaa->uiaa_class != UICLASS_HID)
                return UMATCH_NONE;
        if (usbd_get_quirks(uiaa->uiaa_device)->uq_flags & UQ_HID_IGNORE)
                return UMATCH_NONE;
        return UMATCH_IFACECLASS_GENERIC;
}

static void
uhidev_attach(device_t parent, device_t self, void *aux)
{
        struct uhidev_softc *sc = device_private(self);
        struct usbif_attach_arg *uiaa = aux;
        struct usbd_interface *iface = uiaa->uiaa_iface;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        struct uhidev_attach_arg uha;
        device_t dev;
        int maxinpktsize, size, nrepid, repid, repsz;
        int *repsizes;
        int i;
        void *desc;
        const void *descptr;
        usbd_status err;
        char *devinfop;
        int locs[UHIDBUSCF_NLOCS];

        sc->sc_dev = self;
        sc->sc_udev = uiaa->uiaa_device;
        sc->sc_iface = iface;

        aprint_naive("\n");
        aprint_normal("\n");

        mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_SOFTUSB);
        cv_init(&sc->sc_cv, "uhidev");
        sc->sc_writelock = NULL;
        sc->sc_configlock = NULL;
        sc->sc_refcnt = 0;
        sc->sc_writereportid = -1;
        sc->sc_stopreportid = -1;

        id = usbd_get_interface_descriptor(iface);

        devinfop = usbd_devinfo_alloc(uiaa->uiaa_device, 0);
        aprint_normal_dev(self, "%s, iclass %d/%d\n",
               devinfop, id->bInterfaceClass, id->bInterfaceSubClass);
        usbd_devinfo_free(devinfop);

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        if (uiaa->uiaa_vendor == USB_VENDOR_WACOM) {
                if (uiaa->uiaa_product == USB_PRODUCT_WACOM_XD0912U) {
                /*
                 * Wacom Intuos2 (XD-0912-U) requires longer idle time to
                 * initialize the device with 0x0202.
                 */
                        DELAY(500000);
                }
        }
        (void)usbd_set_idle(iface, 0, 0);

#if 0
        /*
         * HID 1.11 says we should do this, but the device firmware is
         * supposed to come up in Report Protocol after reset anyway, and
         * apparently explicitly requesting it confuses some devices.
         */
        if ((usbd_get_quirks(sc->sc_udev)->uq_flags & UQ_NO_SET_PROTO) == 0 &&
            id->bInterfaceSubClass == UISUBCLASS_BOOT)
                (void)usbd_set_protocol(iface, 1);
#endif

        maxinpktsize = 0;
        sc->sc_iep_addr = sc->sc_oep_addr = -1;
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "could not read endpoint descriptor\n");
                        return;
                }

                DPRINTFN(10,("uhidev_attach: bLength=%d bDescriptorType=%d "
                    "bEndpointAddress=%d-%s bmAttributes=%d wMaxPacketSize=%d"
                    " bInterval=%d\n",
                    ed->bLength, ed->bDescriptorType,
                    ed->bEndpointAddress & UE_ADDR,
                    UE_GET_DIR(ed->bEndpointAddress)==UE_DIR_IN? "in" : "out",
                    ed->bmAttributes & UE_XFERTYPE,
                    UGETW(ed->wMaxPacketSize), ed->bInterval));

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    (ed->bmAttributes & UE_XFERTYPE) == UE_INTERRUPT) {
                        maxinpktsize = UGETW(ed->wMaxPacketSize);
                        sc->sc_iep_addr = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                    (ed->bmAttributes & UE_XFERTYPE) == UE_INTERRUPT) {
                        sc->sc_oep_addr = ed->bEndpointAddress;
                } else {
                        aprint_verbose_dev(self, "endpoint %d: ignored\n", i);
                }
        }

        /*
         * Check that we found an input interrupt endpoint. The output interrupt
         * endpoint is optional
         */
        if (sc->sc_iep_addr == -1) {
                aprint_error_dev(self, "no input interrupt endpoint\n");
                return;
        }

        /* XXX need to extend this */
        descptr = NULL;
        if (uiaa->uiaa_vendor == USB_VENDOR_WACOM) {
                static uByte reportbuf[3];

                /* The report descriptor for the Wacom Graphire is broken. */
                switch (uiaa->uiaa_product) {
                case USB_PRODUCT_WACOM_GRAPHIRE3_4X5:
                case USB_PRODUCT_WACOM_GRAPHIRE3_6X8:
                case USB_PRODUCT_WACOM_GRAPHIRE4_4X5: /* The 6x8 too? */
                        /*
                         * The Graphire3 needs 0x0202 to be written to
                         * feature report ID 2 before it'll start
                         * returning digitizer data.
                         */
                        reportbuf[0] = 0x02;
                        reportbuf[1] = 0x02;
                        usbd_set_report(uiaa->uiaa_iface, UHID_FEATURE_REPORT, 2,
                            &reportbuf, 2);

                        size = sizeof(uhid_graphire3_4x5_report_descr);
                        descptr = uhid_graphire3_4x5_report_descr;
                        break;
                case USB_PRODUCT_WACOM_GRAPHIRE:
                case USB_PRODUCT_WACOM_GRAPHIRE2:
                case USB_PRODUCT_WACOM_XD0912U:
                case USB_PRODUCT_WACOM_CTH690K0:
                        reportbuf[0] = 0x02;
                        reportbuf[1] = 0x02;
                        usbd_set_report(uiaa->uiaa_iface, UHID_FEATURE_REPORT, 2,
                            &reportbuf, 2);
                        break;
                default:
                        /* Keep descriptor */
                        break;
                }
        }
        if (USBIF_IS_XINPUT(uiaa)) {
                size = sizeof(uhid_xinput_report_descr);
                descptr = uhid_xinput_report_descr;
        }
        if (USBIF_IS_X1INPUT(uiaa)) {
                sc->sc_flags |= UHIDEV_F_XB1;
                size = sizeof(uhid_x1input_report_descr);
                descptr = uhid_x1input_report_descr;
        }

        if (descptr) {
                desc = kmem_alloc(size, KM_SLEEP);
                err = USBD_NORMAL_COMPLETION;
                memcpy(desc, descptr, size);
        } else {
                desc = NULL;
                err = usbd_read_report_desc(uiaa->uiaa_iface, &desc, &size);
        }
        if (err) {
                aprint_error_dev(self, "no report descriptor\n");
                return;
        }

        if (uiaa->uiaa_vendor == USB_VENDOR_HOSIDEN &&
            uiaa->uiaa_product == USB_PRODUCT_HOSIDEN_PPP) {
                static uByte reportbuf[] = { 1 };
                /*
                 *  This device was sold by Konami with its ParaParaParadise
                 *  game for PlayStation2.  It needs to be "turned on"
                 *  before it will send any reports.
                 */

                usbd_set_report(uiaa->uiaa_iface, UHID_FEATURE_REPORT, 0,
                    &reportbuf, sizeof(reportbuf));
        }

        if (uiaa->uiaa_vendor == USB_VENDOR_LOGITECH &&
            uiaa->uiaa_product == USB_PRODUCT_LOGITECH_CBT44 && size == 0xb1) {
                uint8_t *data = desc;
                /*
                 * This device has a odd USAGE_MINIMUM value that would
                 * cause the multimedia keys to have their usage number
                 * shifted up one usage.  Adjust so the usages are sane.
                 */

                if (data[0x56] == 0x19 && data[0x57] == 0x01 &&
                    data[0x58] == 0x2a && data[0x59] == 0x8c)
                        data[0x57] = 0x00;
        }

        /*
         * Enable the Six Axis and DualShock 3 controllers.
         * See http://ps3.jim.sh/sixaxis/usb/
         */
        if (uiaa->uiaa_vendor == USB_VENDOR_SONY &&
            uiaa->uiaa_product == USB_PRODUCT_SONY_PS3CONTROLLER) {
                usb_device_request_t req;
                char data[17];
                int actlen;

                req.bmRequestType = UT_READ_CLASS_INTERFACE;
                req.bRequest = 1;
                USETW(req.wValue, 0x3f2);
                USETW(req.wIndex, 0);
                USETW(req.wLength, sizeof(data));

                usbd_do_request_flags(sc->sc_udev, &req, data,
                        USBD_SHORT_XFER_OK, &actlen, USBD_DEFAULT_TIMEOUT);
        }

        sc->sc_repdesc = desc;
        sc->sc_repdesc_size = size;

        uha.uiaa = uiaa;
        nrepid = uhidev_maxrepid(desc, size);
        if (nrepid < 0)
                return;
        if (nrepid > 0)
                aprint_normal_dev(self, "%d report ids\n", nrepid);
        nrepid++;
        repsizes = kmem_alloc(nrepid * sizeof(*repsizes), KM_SLEEP);
        sc->sc_subdevs = kmem_zalloc(nrepid * sizeof(sc->sc_subdevs[0]),
            KM_SLEEP);

        /* Just request max packet size for the interrupt pipe */
        sc->sc_isize = maxinpktsize;
        sc->sc_nrepid = nrepid;

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        for (repid = 0; repid < nrepid; repid++) {
                repsz = hid_report_size(desc, size, hid_input, repid);
                DPRINTF(("uhidev_match: repid=%d, repsz=%d\n", repid, repsz));
                repsizes[repid] = repsz;
        }

        DPRINTF(("uhidev_attach: isize=%d\n", sc->sc_isize));

        for (repid = 0; repid < nrepid; repid++) {
                struct uhidev *scd = &sc->sc_subdevs[repid];

                scd->sc_parent = sc;
                scd->sc_report_id = repid;
                scd->sc_in_rep_size = repsizes[repid];

                DPRINTF(("uhidev_match: try repid=%d\n", repid));
                if (hid_report_size(desc, size, hid_input, repid) == 0 &&
                    hid_report_size(desc, size, hid_output, repid) == 0 &&
                    hid_report_size(desc, size, hid_feature, repid) == 0) {
                        ;        /* already NULL in sc->sc_subdevs[repid] */
                } else {
                        uha.parent = scd;
                        uha.reportid = repid;
                        locs[UHIDBUSCF_REPORTID] = repid;

                        dev = config_found(self, &uha, uhidevprint,
                            CFARGS(.submatch = config_stdsubmatch,
                                   .locators = locs));
                        sc->sc_subdevs[repid].sc_dev = dev;
                        if (dev == NULL)
                                continue;
                        /*
                         * XXXSMP -- could be detached in the middle of
                         * sleeping for allocation in rnd_attach_source
                         */
                        rnd_attach_source(&scd->sc_rndsource,
                            device_xname(dev), RND_TYPE_TTY, RND_FLAG_DEFAULT);
                }
        }
        kmem_free(repsizes, nrepid * sizeof(*repsizes));

        return;
}

static int
uhidev_maxrepid(void *buf, int len)
{
        struct hid_data *d;
        struct hid_item h;
        int maxid;

        maxid = -1;
        h.report_ID = 0;
        for (d = hid_start_parse(buf, len, hid_none); hid_get_item(d, &h); )
                if ((int)h.report_ID > maxid)
                        maxid = h.report_ID;
        hid_end_parse(d);
        return MIN(maxid, UHIDEV_MAXREPID);
}

static int
uhidevprint(void *aux, const char *pnp)
{
        struct uhidev_attach_arg *uha = aux;

        if (pnp)
                aprint_normal("uhid at %s", pnp);
        if (uha->reportid != 0)
                aprint_normal(" reportid %d", uha->reportid);
        return UNCONF;
}

static void
uhidev_childdet(device_t self, device_t child)
{
        int i;
        struct uhidev_softc *sc = device_private(self);

        for (i = 0; i < sc->sc_nrepid; i++) {
                if (sc->sc_subdevs[i].sc_dev == child)
                        break;
        }
        KASSERT(i < sc->sc_nrepid);
        sc->sc_subdevs[i].sc_dev = NULL;
        /*
         * XXXSMP -- could be reattached in the middle of sleeping for
         * lock on sources to delete this in rnd_attach_source
         *
         * (Actually this can't happen right now because there's no
         * rescan method, but if there were, it could.)
         */
        rnd_detach_source(&sc->sc_subdevs[i].sc_rndsource);
}

static int
uhidev_detach(device_t self, int flags)
{
        struct uhidev_softc *sc = device_private(self);
        int rv;

        DPRINTF(("uhidev_detach: sc=%p flags=%d\n", sc, flags));

        /*
         * Try to detach all our children.  If anything fails, bail.
         * Failure can happen if this is from drvctl -d; of course, if
         * this is a USB device being yanked, flags will have
         * DETACH_FORCE and the children will not have the option of
         * refusing detachment.  If they do detach, the pipes can no
         * longer be in use.
         */
        rv = config_detach_children(self, flags);
        if (rv)
                return rv;

        KASSERTMSG(sc->sc_refcnt == 0,
            "%s: %d refs remain", device_xname(sc->sc_dev), sc->sc_refcnt);
        KASSERT(sc->sc_opipe == NULL);
        KASSERT(sc->sc_ipipe == NULL);
        KASSERT(sc->sc_ibuf == NULL);

        if (sc->sc_repdesc != NULL) {
                kmem_free(sc->sc_repdesc, sc->sc_repdesc_size);
                sc->sc_repdesc = NULL;
        }
        if (sc->sc_subdevs != NULL) {
                int nrepid = sc->sc_nrepid;
                kmem_free(sc->sc_subdevs, nrepid * sizeof(sc->sc_subdevs[0]));
                sc->sc_subdevs = NULL;
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        pmf_device_deregister(self);
        KASSERT(sc->sc_configlock == NULL);
        KASSERT(sc->sc_writelock == NULL);
        cv_destroy(&sc->sc_cv);
        mutex_destroy(&sc->sc_lock);

        return rv;
}

static void
uhidev_intr(struct usbd_xfer *xfer, void *addr, usbd_status status)
{
        struct uhidev_softc *sc = addr;
        struct uhidev *scd;
        u_char *p;
        u_int rep;
        uint32_t cc;

        usbd_get_xfer_status(xfer, NULL, NULL, &cc, NULL);

#ifdef UHIDEV_DEBUG
        if (uhidevdebug > 5) {
                uint32_t i;

                DPRINTF(("uhidev_intr: status=%d cc=%d\n", status, cc));
                DPRINTF(("uhidev_intr: data ="));
                for (i = 0; i < cc; i++)
                        DPRINTF((" %02x", sc->sc_ibuf[i]));
                DPRINTF(("\n"));
        }
#endif

        if (status == USBD_CANCELLED)
                return;

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF(("%s: interrupt status=%d\n", device_xname(sc->sc_dev),
                         status));
                usbd_clear_endpoint_stall_async(sc->sc_ipipe);
                return;
        }

        p = sc->sc_ibuf;
        if (sc->sc_nrepid != 1)
                rep = *p++, cc--;
        else
                rep = 0;
        if (rep >= sc->sc_nrepid) {
                printf("uhidev_intr: bad repid %d\n", rep);
                return;
        }
        scd = &sc->sc_subdevs[rep];
        DPRINTFN(5,("uhidev_intr: rep=%d, scd=%p state=%#x\n",
                    rep, scd, scd->sc_state));
        if (!(atomic_load_acquire(&scd->sc_state) & UHIDEV_OPEN))
                return;
#ifdef UHIDEV_DEBUG
        if (scd->sc_in_rep_size != cc) {
                DPRINTF(("%s: expected %d bytes, got %d\n",
                       device_xname(sc->sc_dev), scd->sc_in_rep_size, cc));
        }
#endif
        if (cc == 0) {
                DPRINTF(("%s: 0-length input ignored\n",
                        device_xname(sc->sc_dev)));
                return;
        }
        rnd_add_uint32(&scd->sc_rndsource, (uintptr_t)(sc->sc_ibuf));
        scd->sc_intr(scd->sc_cookie, p, cc);
}

void
uhidev_get_report_desc(struct uhidev *scd, void **desc, int *size)
{
        struct uhidev_softc *sc = scd->sc_parent;

        *desc = sc->sc_repdesc;
        *size = sc->sc_repdesc_size;
}

static int
uhidev_config_enter(struct uhidev_softc *sc)
{
        int error;

        KASSERT(mutex_owned(&sc->sc_lock));

        for (;;) {
                if (sc->sc_configlock == NULL)
                        break;
                error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock);
                if (error)
                        return error;
        }

        sc->sc_configlock = curlwp;
        return 0;
}

static void
uhidev_config_enter_nointr(struct uhidev_softc *sc)
{

        KASSERT(mutex_owned(&sc->sc_lock));

        while (sc->sc_configlock)
                cv_wait(&sc->sc_cv, &sc->sc_lock);
        sc->sc_configlock = curlwp;
}

static void
uhidev_config_exit(struct uhidev_softc *sc)
{

        KASSERT(mutex_owned(&sc->sc_lock));
        KASSERTMSG(sc->sc_configlock == curlwp, "%s: migrated from %p to %p",
            device_xname(sc->sc_dev), curlwp, sc->sc_configlock);

        sc->sc_configlock = NULL;
        cv_broadcast(&sc->sc_cv);
}

/*
 * uhidev_open_pipes(sc)
 *
 *        Ensure the pipes of the softc are open.  Caller must hold
 *        sc_lock, which may be released and reacquired.
 */
static int
uhidev_open_pipes(struct uhidev_softc *sc)
{
        usbd_status err;
        int error;

        KASSERT(mutex_owned(&sc->sc_lock));

        /*
         * If the pipes are already open, just increment the reference
         * count.  The reference count is limited by the number of
         * report ids, so this can't overflow.
         */
        if (sc->sc_refcnt) {
                KASSERT(sc->sc_refcnt < UHIDEV_MAXREPID);
                sc->sc_refcnt++;
                return 0;
        }

        /*
         * If there's no input data to prepare, don't bother with the
         * pipes.  We assume any device that does output also does
         * input; if you have a device where this is wrong, then
         * uhidev_write will fail gracefully (it checks sc->sc_opipe),
         * and you can use that device to test the changes needed to
         * open the output pipe here.
         */
        if (sc->sc_isize == 0)
                return 0;

        /*
         * Lock the configuration and release sc_lock -- we may sleep
         * to allocate.  If someone else got in first, we're done;
         * otherwise open the pipes.
         */
        error = uhidev_config_enter(sc);
        if (error)
                goto out;
        if (sc->sc_refcnt) {
                KASSERT(sc->sc_refcnt < UHIDEV_MAXREPID);
                sc->sc_refcnt++;
                error = 0;
                goto out0;
        }
        mutex_exit(&sc->sc_lock);

        /* Allocate an input buffer.  */
        sc->sc_ibuf = kmem_alloc(sc->sc_isize, KM_SLEEP);

        /* Set up input interrupt pipe. */
        DPRINTF(("%s: isize=%d, ep=0x%02x\n", __func__, sc->sc_isize,
                 sc->sc_iep_addr));

        err = usbd_open_pipe_intr(sc->sc_iface, sc->sc_iep_addr,
                  USBD_SHORT_XFER_OK, &sc->sc_ipipe, sc, sc->sc_ibuf,
                  sc->sc_isize, uhidev_intr, USBD_DEFAULT_INTERVAL);
        if (err != USBD_NORMAL_COMPLETION) {
                DPRINTF(("uhidopen: usbd_open_pipe_intr failed, "
                    "error=%d\n", err));
                error = EIO;
                goto out1;
        }

        /*
         * Set up output interrupt pipe if an output interrupt endpoint
         * exists.
         */
        if (sc->sc_oep_addr != -1) {
                DPRINTF(("uhidev_open: oep=0x%02x\n", sc->sc_oep_addr));

                err = usbd_open_pipe(sc->sc_iface, sc->sc_oep_addr,
                    0, &sc->sc_opipe);

                if (err != USBD_NORMAL_COMPLETION) {
                        DPRINTF(("uhidev_open: usbd_open_pipe failed, "
                            "error=%d\n", err));
                        error = EIO;
                        goto out2;
                }
                DPRINTF(("uhidev_open: sc->sc_opipe=%p\n", sc->sc_opipe));

                error = usbd_create_xfer(sc->sc_opipe, UHIDEV_OSIZE, 0, 0,
                    &sc->sc_oxfer);
                if (error) {
                        DPRINTF(("uhidev_open: couldn't allocate an xfer\n"));
                        goto out3;
                }

                if (sc->sc_flags & UHIDEV_F_XB1) {
                        uint8_t init_data[] = { 0x05, 0x20 };
                        int init_data_len = sizeof(init_data);
                        err = usbd_intr_transfer(sc->sc_oxfer, sc->sc_opipe, 0,
                            USBD_NO_TIMEOUT, init_data, &init_data_len);
                        if (err != USBD_NORMAL_COMPLETION) {
                                DPRINTF(("uhidev_open: xb1 init failed, "
                                    "error=%d\n", err));
                                error = EIO;
                                goto out4;
                        }
                }
        }

        /* Success!  */
        mutex_enter(&sc->sc_lock);
        KASSERTMSG(sc->sc_refcnt == 0, "%d refs spuriously acquired",
            sc->sc_refcnt);
        sc->sc_refcnt++;
        goto out0;

out4:        if (sc->sc_oxfer) {
                usbd_abort_pipe(sc->sc_opipe);
                usbd_destroy_xfer(sc->sc_oxfer);
                sc->sc_oxfer = NULL;
        }
out3:        if (sc->sc_opipe) {
                usbd_close_pipe(sc->sc_opipe);
                sc->sc_opipe = NULL;
        }
out2:        if (sc->sc_ipipe) {
                usbd_abort_pipe(sc->sc_ipipe);
                usbd_close_pipe(sc->sc_ipipe);
                sc->sc_ipipe = NULL;
        }
out1:        kmem_free(sc->sc_ibuf, sc->sc_isize);
        sc->sc_ibuf = NULL;
        mutex_enter(&sc->sc_lock);
out0:        KASSERT(mutex_owned(&sc->sc_lock));
        uhidev_config_exit(sc);
out:        KASSERT(mutex_owned(&sc->sc_lock));
        return error;
}

static void
uhidev_close_pipes(struct uhidev_softc *sc)
{

        KASSERT(mutex_owned(&sc->sc_lock));
        KASSERTMSG(sc->sc_refcnt > 0, "%s: refcnt fouled: %d",
            device_xname(sc->sc_dev), sc->sc_refcnt);

        /* If this isn't the last reference, just decrement.  */
        if (sc->sc_refcnt > 1) {
                sc->sc_refcnt--;
                return;
        }

        /*
         * Lock the configuration and release sc_lock so we may sleep
         * to free memory.  We're not waiting for anyone to allocate or
         * free anything.
         */
        uhidev_config_enter_nointr(sc);

        /*
         * If someone else acquired a reference while we were waiting
         * for the config lock, nothing more for us to do.
         */
        if (sc->sc_refcnt > 1) {
                sc->sc_refcnt--;
                uhidev_config_exit(sc);
                return;
        }

        /*
         * We're the last reference and committed to closing the pipes.
         * Decrement the reference count before we release the lock --
         * access to the pipes is allowed as long as the reference
         * count is positive, so this forces all new opens to wait
         * until the config lock is released.
         */
        KASSERTMSG(sc->sc_refcnt == 1, "%s: refcnt fouled: %d",
            device_xname(sc->sc_dev), sc->sc_refcnt);
        sc->sc_refcnt--;
        mutex_exit(&sc->sc_lock);

        if (sc->sc_oxfer) {
                usbd_abort_pipe(sc->sc_opipe);
                usbd_destroy_xfer(sc->sc_oxfer);
                sc->sc_oxfer = NULL;
        }
        if (sc->sc_opipe) {
                usbd_close_pipe(sc->sc_opipe);
                sc->sc_opipe = NULL;
        }
        if (sc->sc_ipipe) {
                usbd_abort_pipe(sc->sc_ipipe);
                usbd_close_pipe(sc->sc_ipipe);
                sc->sc_ipipe = NULL;
        }
        kmem_free(sc->sc_ibuf, sc->sc_isize);
        sc->sc_ibuf = NULL;

        mutex_enter(&sc->sc_lock);
        uhidev_config_exit(sc);
        KASSERTMSG(sc->sc_refcnt == 0, "%s: refcnt fouled: %d",
            device_xname(sc->sc_dev), sc->sc_refcnt);
}

int
uhidev_open(struct uhidev *scd, void (*intr)(void *, void *, u_int),
    void *cookie)
{
        struct uhidev_softc *sc = scd->sc_parent;
        int error;

        mutex_enter(&sc->sc_lock);

        DPRINTF(("uhidev_open(%s, report %d = %s): state=%x refcnt=%d\n",
                device_xname(sc->sc_dev),
                scd->sc_report_id,
                device_xname(scd->sc_dev),
                scd->sc_state,
                sc->sc_refcnt));

        /* Mark the report id open.  This is an exclusive lock.  */
        if (scd->sc_state & UHIDEV_OPEN) {
                error = EBUSY;
                goto out;
        }
        scd->sc_intr = intr;
        scd->sc_cookie = cookie;
        atomic_store_release(&scd->sc_state, scd->sc_state | UHIDEV_OPEN);

        /* Open the pipes which are shared by all report ids.  */
        error = uhidev_open_pipes(sc);
        if (error)
                goto out;

        /* Success!  */
        error = 0;

out:        if (error) {
                KASSERTMSG(scd->sc_state & UHIDEV_OPEN,
                    "%s: report id %d: closed while opening",
                    device_xname(sc->sc_dev), scd->sc_report_id);
                atomic_store_relaxed(&scd->sc_state,
                    scd->sc_state & ~UHIDEV_OPEN);
        }
        mutex_exit(&sc->sc_lock);
        return error;
}

/*
 * uhidev_stop(scd)
 *
 *        Make all current and future output reports or xfers by scd to
 *        the output pipe to fail.  Caller must then ensure no more will
 *        be submitted and then call uhidev_close.
 *
 *        Side effect: If uhidev_write was in progress for this scd,
 *        blocks all other uhidev_writes until uhidev_close on this scd.
 *
 *        May sleep but only for a short duration to wait for USB
 *        transfer completion callbacks to run.
 */
void
uhidev_stop(struct uhidev *scd)
{
        struct uhidev_softc *sc = scd->sc_parent;

        mutex_enter(&sc->sc_lock);

        /* Prevent further writes on this report from starting.  */
        atomic_store_relaxed(&scd->sc_state, scd->sc_state | UHIDEV_STOPPED);

        /* If there's no output pipe at all, nothing to do.  */
        if (sc->sc_opipe == NULL)
                goto out;

        /*
         * If there's no write on this report in progress, nothing to
         * do -- any subsequent attempts will be prevented by
         * UHIDEV_STOPPED.
         */
        if (sc->sc_writereportid != scd->sc_report_id)
                goto out;

        /*
         * Caller must wait for uhidev_open to succeed before calling
         * uhidev_write, and must wait for all uhidev_writes to return
         * before calling uhidev_close, so neither on can be in flight
         * right now.
         *
         * Suspend the pipe, but hold up uhidev_write from any report
         * until we confirm this one has finished.  We will resume the
         * pipe only after all uhidev_writes on this report have
         * finished -- when the caller calls uhidev_close.
         */
        KASSERTMSG(sc->sc_stopreportid == -1, "%d", sc->sc_stopreportid);
        sc->sc_stopreportid = scd->sc_report_id;
        mutex_exit(&sc->sc_lock);

        usbd_suspend_pipe(sc->sc_opipe);

        mutex_enter(&sc->sc_lock);
        KASSERT(sc->sc_stopreportid == scd->sc_report_id);
        sc->sc_stopreportid = scd->sc_report_id;
        cv_broadcast(&sc->sc_cv);
out:        mutex_exit(&sc->sc_lock);
}

/*
 * uhidev_close(scd)
 *
 *        Close a uhidev previously opened with uhidev_open.  If writes
 *        had been stopped with uhidev_stop, allow writes at other report
 *        ids again.
 */
void
uhidev_close(struct uhidev *scd)
{
        struct uhidev_softc *sc = scd->sc_parent;

        mutex_enter(&sc->sc_lock);

        DPRINTF(("uhidev_close(%s, report %d = %s): state=%x refcnt=%d\n",
                device_xname(sc->sc_dev),
                scd->sc_report_id,
                device_xname(scd->sc_dev),
                scd->sc_state,
                sc->sc_refcnt));

        KASSERTMSG(scd->sc_state & UHIDEV_OPEN,
            "%s: report id %d: unpaired close",
            device_xname(sc->sc_dev), scd->sc_report_id);

        /*
         * If the caller had issued uhidev_stop to interrupt a write
         * for this report, then resume the pipe now that no further
         * uhidev_write on the same report is possible, and wake anyone
         * trying to write on other reports.
         */
        if (sc->sc_stopreportid == scd->sc_report_id) {
                KASSERT(scd->sc_state & UHIDEV_STOPPED);
                mutex_exit(&sc->sc_lock);

                usbd_resume_pipe(sc->sc_opipe);

                mutex_enter(&sc->sc_lock);
                KASSERT(sc->sc_stopreportid == scd->sc_report_id);
                KASSERT(scd->sc_state & UHIDEV_STOPPED);
                sc->sc_stopreportid = -1;
                cv_broadcast(&sc->sc_cv);
        }

        /*
         * Close our reference to the pipes, and mark our report as no
         * longer open.  If it was stopped, clear that too -- drivers
         * are forbidden from issuing writes after uhidev_close anyway.
         */
        KASSERT(scd->sc_state & UHIDEV_OPEN);
        uhidev_close_pipes(sc);
        KASSERT(scd->sc_state & UHIDEV_OPEN);
        atomic_store_relaxed(&scd->sc_state,
            scd->sc_state & ~(UHIDEV_OPEN | UHIDEV_STOPPED));

        /*
         * Make sure the next uhidev_intr (which runs in softint, like
         * XC_HIGHPRI) notices that UHIDEV_OPEN is cleared, and wait
         * for any current one to finish, in case the pipe is still
         * open for other report ids.
         *
         * We must drop the lock while doing this, because
         * uhidev_write_callback takes the lock in softint context and
         * it could deadlock with the xcall softint.
         *
         * It is safe to drop the lock now before zeroing sc_intr and
         * sc_cookie because the driver is obligated not to reopen
         * until after uhidev_close returns.
         */
        mutex_exit(&sc->sc_lock);
        xc_barrier(XC_HIGHPRI);
        mutex_enter(&sc->sc_lock);
        KASSERT((scd->sc_state & UHIDEV_OPEN) == 0);
        scd->sc_intr = NULL;
        scd->sc_cookie = NULL;

        mutex_exit(&sc->sc_lock);
}

usbd_status
uhidev_set_report(struct uhidev *scd, int type, void *data, int len)
{
        char *buf;
        usbd_status retstat;

        if (scd->sc_report_id == 0)
                return usbd_set_report(scd->sc_parent->sc_iface, type,
                                       scd->sc_report_id, data, len);

        buf = kmem_alloc(len + 1, KM_SLEEP);
        buf[0] = scd->sc_report_id;
        memcpy(buf+1, data, len);

        retstat = usbd_set_report(scd->sc_parent->sc_iface, type,
                                  scd->sc_report_id, buf, len + 1);

        kmem_free(buf, len + 1);

        return retstat;
}

usbd_status
uhidev_get_report(struct uhidev *scd, int type, void *data, int len)
{
        return usbd_get_report(scd->sc_parent->sc_iface, type,
                               scd->sc_report_id, data, len);
}

usbd_status
uhidev_write(struct uhidev *scd, void *data, int len)
{
        struct uhidev_softc *sc = scd->sc_parent;
        usbd_status err;

        DPRINTF(("uhidev_write: data=%p, len=%d\n", data, len));

        if (sc->sc_opipe == NULL)
                return USBD_INVAL;

        mutex_enter(&sc->sc_lock);
        KASSERT(sc->sc_refcnt);
        KASSERT(scd->sc_state & UHIDEV_OPEN);
        for (;;) {
                if (scd->sc_state & UHIDEV_STOPPED) {
                        err = USBD_CANCELLED;
                        goto out;
                }
                if (sc->sc_writelock == NULL && sc->sc_stopreportid == -1)
                        break;
                if (cv_wait_sig(&sc->sc_cv, &sc->sc_lock)) {
                        err = USBD_INTERRUPTED;
                        goto out;
                }
        }
        sc->sc_writelock = curlwp;
        sc->sc_writereportid = scd->sc_report_id;
        mutex_exit(&sc->sc_lock);

#ifdef UHIDEV_DEBUG
        if (uhidevdebug > 50) {

                uint32_t i;
                uint8_t *d = data;

                DPRINTF(("uhidev_write: data ="));
                for (i = 0; i < len; i++)
                        DPRINTF((" %02x", d[i]));
                DPRINTF(("\n"));
        }
#endif
        err = usbd_intr_transfer(sc->sc_oxfer, sc->sc_opipe, 0,
            USBD_NO_TIMEOUT, data, &len);

        mutex_enter(&sc->sc_lock);
        KASSERT(sc->sc_refcnt);
        KASSERT(scd->sc_state & UHIDEV_OPEN);
        KASSERTMSG(sc->sc_writelock == curlwp, "%s: migrated from %p to %p",
            device_xname(sc->sc_dev), curlwp, sc->sc_writelock);
        KASSERTMSG(sc->sc_writereportid == scd->sc_report_id,
            "%s: changed write report ids from %d to %d",
            device_xname(sc->sc_dev), scd->sc_report_id, sc->sc_writereportid);
        sc->sc_writereportid = -1;
        sc->sc_writelock = NULL;
        cv_broadcast(&sc->sc_cv);
out:        mutex_exit(&sc->sc_lock);
        return err;
}

static void
uhidev_write_callback(struct usbd_xfer *xfer, void *cookie, usbd_status err)
{
        struct uhidev_softc *sc = cookie;
        usbd_callback writecallback;
        void *writecookie;

        if (err) {
                if (err != USBD_CANCELLED)
                        usbd_clear_endpoint_stall_async(sc->sc_opipe);
        }

        mutex_enter(&sc->sc_lock);
        KASSERT(sc->sc_writelock == (void *)1);
        writecallback = sc->sc_writecallback;
        writecookie = sc->sc_writecookie;
        sc->sc_writereportid = -1;
        sc->sc_writelock = NULL;
        sc->sc_writecallback = NULL;
        sc->sc_writecookie = NULL;
        cv_broadcast(&sc->sc_cv);
        mutex_exit(&sc->sc_lock);

        (*writecallback)(xfer, writecookie, err);
}

usbd_status
uhidev_write_async(struct uhidev *scd, void *data, int len, int flags,
    int timo, usbd_callback writecallback, void *writecookie)
{
        struct uhidev_softc *sc = scd->sc_parent;
        usbd_status err;

        DPRINTF(("%s: data=%p, len=%d\n", __func__, data, len));

        if (sc->sc_opipe == NULL)
                return USBD_INVAL;

        mutex_enter(&sc->sc_lock);
        KASSERT(sc->sc_refcnt);
        KASSERT(scd->sc_state & UHIDEV_OPEN);
        if (scd->sc_state & UHIDEV_STOPPED) {
                err = USBD_CANCELLED;
                goto out;
        }
        if (sc->sc_writelock != NULL || sc->sc_stopreportid != -1) {
                err = USBD_IN_USE;
                goto out;
        }
        sc->sc_writelock = (void *)1; /* XXX no lwp to attribute async xfer */
        sc->sc_writereportid = scd->sc_report_id;
        sc->sc_writecallback = writecallback;
        sc->sc_writecookie = writecookie;
        usbd_setup_xfer(sc->sc_oxfer, sc, data, len, flags, timo,
            uhidev_write_callback);
        err = usbd_transfer(sc->sc_oxfer);
        switch (err) {
        case USBD_IN_PROGRESS:
                break;
        case USBD_NORMAL_COMPLETION:
                panic("unexpected normal completion of async xfer under lock");
        default:                /* error */
                sc->sc_writelock = NULL;
                sc->sc_writereportid = -1;
                sc->sc_writecallback = NULL;
                sc->sc_writecookie = NULL;
                cv_broadcast(&sc->sc_cv);
        }
out:        mutex_exit(&sc->sc_lock);
        return err;
}



































































   57 






   57 
   57 



   57 





   57 





   52 
   52 








   50 
   44 







   50 

   50 
   50 
    4 
    4 


    4 



   50 



   50 
   50 

   50 
    2 






   50 
   50 
   50 




   50 



   50 











































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
/*        $NetBSD: kern_kthread.c,v 1.46 2020/08/01 02:04:55 riastradh Exp $        */

/*-
 * Copyright (c) 1998, 1999, 2007, 2009, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_kthread.c,v 1.46 2020/08/01 02:04:55 riastradh Exp $");

#include <sys/param.h>
#include <sys/cpu.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/mutex.h>
#include <sys/sched.h>
#include <sys/kmem.h>

#include <uvm/uvm_extern.h>

static lwp_t *                kthread_jtarget;
static kmutex_t                kthread_lock;
static kcondvar_t        kthread_cv;

void
kthread_sysinit(void)
{

        mutex_init(&kthread_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&kthread_cv, "kthrwait");
        kthread_jtarget = NULL;
}

/*
 * kthread_create: create a kernel thread, that is, system-only LWP.
 */
int
kthread_create(pri_t pri, int flag, struct cpu_info *ci,
    void (*func)(void *), void *arg, lwp_t **lp, const char *fmt, ...)
{
        lwp_t *l;
        vaddr_t uaddr;
        int error, lc;
        va_list ap;

        KASSERT((flag & KTHREAD_INTR) == 0 || (flag & KTHREAD_MPSAFE) != 0);

        uaddr = uvm_uarea_system_alloc(
           (flag & (KTHREAD_INTR|KTHREAD_IDLE)) == KTHREAD_IDLE ? ci : NULL);
        if (uaddr == 0) {
                return ENOMEM;
        }
        if ((flag & KTHREAD_TS) != 0) {
                lc = SCHED_OTHER;
        } else {
                lc = SCHED_RR;
        }

        error = lwp_create(&lwp0, &proc0, uaddr, LWP_DETACHED, NULL,
            0, func, arg, &l, lc, &lwp0.l_sigmask, &lwp0.l_sigstk);
        if (error) {
                uvm_uarea_system_free(uaddr);
                return error;
        }
        if (fmt != NULL) {
                l->l_name = kmem_alloc(MAXCOMLEN, KM_SLEEP);
                va_start(ap, fmt);
                vsnprintf(l->l_name, MAXCOMLEN, fmt, ap);
                va_end(ap);
        }

        /*
         * Set parameters.
         */
        if (pri == PRI_NONE) {
                if ((flag & KTHREAD_TS) != 0) {
                        /* Maximum user priority level. */
                        pri = MAXPRI_USER;
                } else {
                        /* Minimum kernel priority level. */
                        pri = PRI_KTHREAD;
                }
        }
        mutex_enter(proc0.p_lock);
        lwp_lock(l);
        lwp_changepri(l, pri);
        if (ci != NULL) {
                if (ci != l->l_cpu) {
                        lwp_unlock_to(l, ci->ci_schedstate.spc_lwplock);
                        lwp_lock(l);
                }
                l->l_pflag |= LP_BOUND;
                l->l_cpu = ci;
        }

        if ((flag & KTHREAD_MUSTJOIN) != 0) {
                KASSERT(lp != NULL);
                l->l_pflag |= LP_MUSTJOIN;
        }
        if ((flag & KTHREAD_INTR) != 0) {
                l->l_pflag |= LP_INTR;
        }
        if ((flag & KTHREAD_MPSAFE) == 0) {
                l->l_pflag &= ~LP_MPSAFE;
        }

        /*
         * Set the new LWP running, unless the caller has requested
         * otherwise.
         */
        KASSERT(l->l_stat == LSIDL);
        if ((flag & KTHREAD_IDLE) == 0) {
                setrunnable(l);
                /* LWP now unlocked */
        } else {
                lwp_unlock(l);
        }
        mutex_exit(proc0.p_lock);

        /* All done! */
        if (lp != NULL) {
                *lp = l;
        }
        return 0;
}

/*
 * Cause a kernel thread to exit.  Assumes the exiting thread is the
 * current context.
 */
void
kthread_exit(int ecode)
{
        const char *name;
        lwp_t *l = curlwp;

        /* We can't do much with the exit code, so just report it. */
        if (ecode != 0) {
                if ((name = l->l_name) == NULL)
                        name = "unnamed";
                printf("WARNING: kthread `%s' (%d) exits with status %d\n",
                    name, l->l_lid, ecode);
        }

        /* Barrier for joining. */
        if (l->l_pflag & LP_MUSTJOIN) {
                mutex_enter(&kthread_lock);
                while (kthread_jtarget != l) {
                        cv_wait(&kthread_cv, &kthread_lock);
                }
                kthread_jtarget = NULL;
                cv_broadcast(&kthread_cv);
                mutex_exit(&kthread_lock);
        }

        /* If the kernel lock is held, we need to drop it now. */
        if ((l->l_pflag & LP_MPSAFE) == 0) {
                KERNEL_UNLOCK_LAST(l);
        }

        /* And exit.. */
        lwp_exit(l);
        panic("kthread_exit");
}

/*
 * Wait for a kthread to exit, as pthread_join().
 */
int
kthread_join(lwp_t *l)
{

        KASSERT((l->l_flag & LW_SYSTEM) != 0);
        KASSERT((l->l_pflag & LP_MUSTJOIN) != 0);

        /*
         * - Wait if some other thread has occupied the target.
         * - Specify our kthread as a target and notify it.
         * - Wait for the target kthread to notify us.
         */
        mutex_enter(&kthread_lock);
        while (kthread_jtarget) {
                cv_wait(&kthread_cv, &kthread_lock);
        }
        kthread_jtarget = l;
        cv_broadcast(&kthread_cv);
        while (kthread_jtarget == l) {
                cv_wait(&kthread_cv, &kthread_lock);
        }
        mutex_exit(&kthread_lock);

        return 0;
}

/*
 * kthread_fpu_enter()
 *
 *        Allow the current lwp, which must be a kthread, to use the FPU.
 *        Return a cookie that must be passed to kthread_fpu_exit when
 *        done.  Must be used only in thread context.  Recursive -- you
 *        can call kthread_fpu_enter several times in a row as long as
 *        you pass the cookies in reverse order to kthread_fpu_exit.
 */
int
kthread_fpu_enter(void)
{
        struct lwp *l = curlwp;
        int s;

        KASSERTMSG(!cpu_intr_p(),
            "%s is not allowed in interrupt context", __func__);
        KASSERTMSG(!cpu_softintr_p(),
            "%s is not allowed in interrupt context", __func__);

        /*
         * Remember whether this thread already had FPU access, and
         * mark this thread as having FPU access.
         */
        lwp_lock(l);
        KASSERTMSG(l->l_flag & LW_SYSTEM,
            "%s is allowed only in kthreads", __func__);
        s = l->l_flag & LW_SYSTEM_FPU;
        l->l_flag |= LW_SYSTEM_FPU;
        lwp_unlock(l);

        /* Take MD steps to enable the FPU if necessary.  */
        if (s == 0)
                kthread_fpu_enter_md();

        return s;
}

/*
 * kthread_fpu_exit(s)
 *
 *        Restore the current lwp's FPU access to what it was before the
 *        matching call to kthread_fpu_enter() that returned s.  Must be
 *        used only in thread context.
 */
void
kthread_fpu_exit(int s)
{
        struct lwp *l = curlwp;

        KASSERT(s == (s & LW_SYSTEM_FPU));
        KASSERTMSG(!cpu_intr_p(),
            "%s is not allowed in interrupt context", __func__);
        KASSERTMSG(!cpu_softintr_p(),
            "%s is not allowed in interrupt context", __func__);

        lwp_lock(l);
        KASSERTMSG(l->l_flag & LW_SYSTEM,
            "%s is allowed only in kthreads", __func__);
        KASSERT(l->l_flag & LW_SYSTEM_FPU);
        l->l_flag ^= s ^ LW_SYSTEM_FPU;
        lwp_unlock(l);

        /* Take MD steps to zero and disable the FPU if necessary.  */
        if (s == 0)
                kthread_fpu_exit_md();
}































































































































































































































































































































































































































    1 






    1 

    1 
    1 







    1 






    1 
    1 
    1 

    1 










    1 



    1 



    1 








    4 

    4 
    1 

    1 
    1 
    1 



    1 



















   30 






   30 

   19 


   26 


   11 
    5 





   24 


   28 
    2 








   36 











   23 



   34 

   32 




   15 

    2 

    2 



    1 
    2 
    2 
    2 
    1 

    2 





    2 

    2 
    2 







    4 

    4 
    4 








    2 

    2 

















    1 

    1 







    2 

    2 
    2 

    2 






    2 

    1 







    1 

    1 














    2 

    2 
    2 
    2 


    1 

    1 



    1 

    2 








   14 
   35 











   16 








   16 


    8 
    8 
    8 




    5 


    5 




    4 









    6 
   11 





    8 






    7 
    7 


    2 





    4 























    6 

   15 













   17 
















   17 
   11 




   17 


    5 
   17 






   16 



    3 




   14 




   14 





   13 


   12 




    9 




    9 
    9 


















    8 
    7 










    4 





    1 




    1 

    7 
    3 




    5 

    5 








    5 






    5 
    2 






    5 




    4 
    1 

    3 

    4 

    8 










    2 








    2 



    2 


    2 





    2 






    5 




    1 






    1 

    1 


    1 


































    4 
    5 





    5 




    1 



    5 

   13 


   16 















































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
/*        $NetBSD: sysv_sem.c,v 1.98 2019/08/07 00:38:02 pgoyette Exp $        */

/*-
 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Implementation of SVID semaphores
 *
 * Author: Daniel Boulet
 *
 * This software is provided ``AS IS'' without any warranties of any kind.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sysv_sem.c,v 1.98 2019/08/07 00:38:02 pgoyette Exp $");

#ifdef _KERNEL_OPT
#include "opt_sysv.h"
#endif

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/sem.h>
#include <sys/sysctl.h>
#include <sys/kmem.h>
#include <sys/mount.h>                /* XXX for <sys/syscallargs.h> */
#include <sys/syscallargs.h>
#include <sys/kauth.h>
#include <sys/once.h>

/* 
 * Memory areas:
 *  1st: Pool of semaphore identifiers
 *  2nd: Semaphores
 *  3rd: Conditional variables
 *  4th: Undo structures
 */
struct semid_ds *        sema                        __read_mostly;
static struct __sem *        sem                        __read_mostly;
static kcondvar_t *        semcv                        __read_mostly;
static int *                semu                        __read_mostly;

static kmutex_t                semlock                        __cacheline_aligned;
static bool                sem_realloc_state        __read_mostly;
static kcondvar_t        sem_realloc_cv;

/*
 * List of active undo structures, total number of semaphores,
 * and total number of semop waiters.
 */
static struct sem_undo *semu_list                __read_mostly;
static u_int                semtot                        __cacheline_aligned;
static u_int                sem_waiters                __cacheline_aligned;

/* Macro to find a particular sem_undo vector */
#define SEMU(s, ix)        ((struct sem_undo *)(((long)s) + ix * seminfo.semusz))

#ifdef SEM_DEBUG
#define SEM_PRINTF(a) printf a
#else
#define SEM_PRINTF(a)
#endif

void *hook;        /* cookie from exithook_establish() */

extern int kern_has_sysvsem;

SYSCTL_SETUP_PROTO(sysctl_ipc_sem_setup);

struct sem_undo *semu_alloc(struct proc *);
int semundo_adjust(struct proc *, struct sem_undo **, int, int, int);
void semundo_clear(int, int);

static ONCE_DECL(exithook_control);
static int seminit_exithook(void);

int
seminit(void)
{
        int i, sz;
        vaddr_t v;

        mutex_init(&semlock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&sem_realloc_cv, "semrealc");
        sem_realloc_state = false;
        semtot = 0;
        sem_waiters = 0;

        /* Allocate the wired memory for our structures */
        sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) +
            ALIGN(seminfo.semmns * sizeof(struct __sem)) +
            ALIGN(seminfo.semmni * sizeof(kcondvar_t)) +
            ALIGN(seminfo.semmnu * seminfo.semusz);
        sz = round_page(sz);
        v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
        if (v == 0) {
                printf("sysv_sem: cannot allocate memory");
                return ENOMEM;
        }
        sema = (void *)v;
        sem = (void *)((uintptr_t)sema +
            ALIGN(seminfo.semmni * sizeof(struct semid_ds)));
        semcv = (void *)((uintptr_t)sem +
            ALIGN(seminfo.semmns * sizeof(struct __sem)));
        semu = (void *)((uintptr_t)semcv +
            ALIGN(seminfo.semmni * sizeof(kcondvar_t)));

        for (i = 0; i < seminfo.semmni; i++) {
                sema[i]._sem_base = 0;
                sema[i].sem_perm.mode = 0;
                cv_init(&semcv[i], "semwait");
        }
        for (i = 0; i < seminfo.semmnu; i++) {
                struct sem_undo *suptr = SEMU(semu, i);
                suptr->un_proc = NULL;
        }
        semu_list = NULL;

        kern_has_sysvsem = 1;

        return 0;
}

static int
seminit_exithook(void)
{

        hook = exithook_establish(semexit, NULL);
        return 0;
}

int
semfini(void)
{
        int i, sz;
        vaddr_t v = (vaddr_t)sema;

        /* Don't allow module unload if we're busy */
        mutex_enter(&semlock);
        if (semtot) {
                mutex_exit(&semlock);
                return 1;
        }

        /* Remove the exit hook */
        if (hook)
                exithook_disestablish(hook);

        /* Destroy all our condvars */
        for (i = 0; i < seminfo.semmni; i++) {
                cv_destroy(&semcv[i]);
        }

        /* Free the wired memory that we allocated */
        sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) +
            ALIGN(seminfo.semmns * sizeof(struct __sem)) +
            ALIGN(seminfo.semmni * sizeof(kcondvar_t)) +
            ALIGN(seminfo.semmnu * seminfo.semusz);
        sz = round_page(sz);
        uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);

        /* Destroy the last cv and mutex */
        cv_destroy(&sem_realloc_cv);
        mutex_exit(&semlock);
        mutex_destroy(&semlock);

        kern_has_sysvsem = 0;

        return 0;
}

static int
semrealloc(int newsemmni, int newsemmns, int newsemmnu)
{
        struct semid_ds *new_sema, *old_sema;
        struct __sem *new_sem;
        struct sem_undo *new_semu_list, *suptr, *nsuptr;
        int *new_semu;
        kcondvar_t *new_semcv;
        vaddr_t v;
        int i, j, lsemid, nmnus, sz;

        if (newsemmni < 1 || newsemmns < 1 || newsemmnu < 1)
                return EINVAL;

        /* Allocate the wired memory for our structures */
        sz = ALIGN(newsemmni * sizeof(struct semid_ds)) +
            ALIGN(newsemmns * sizeof(struct __sem)) +
            ALIGN(newsemmni * sizeof(kcondvar_t)) +
            ALIGN(newsemmnu * seminfo.semusz);
        sz = round_page(sz);
        v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
        if (v == 0)
                return ENOMEM;

        mutex_enter(&semlock);
        if (sem_realloc_state) {
                mutex_exit(&semlock);
                uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
                return EBUSY;
        }
        sem_realloc_state = true;
        if (sem_waiters) {
                /*
                 * Mark reallocation state, wake-up all waiters,
                 * and wait while they will all exit.
                 */
                for (i = 0; i < seminfo.semmni; i++)
                        cv_broadcast(&semcv[i]);
                while (sem_waiters)
                        cv_wait(&sem_realloc_cv, &semlock);
        }
        old_sema = sema;

        /* Get the number of last slot */
        lsemid = 0;
        for (i = 0; i < seminfo.semmni; i++)
                if (sema[i].sem_perm.mode & SEM_ALLOC)
                        lsemid = i;

        /* Get the number of currently used undo structures */
        nmnus = 0;
        for (i = 0; i < seminfo.semmnu; i++) {
                suptr = SEMU(semu, i);
                if (suptr->un_proc == NULL)
                        continue;
                nmnus++;
        }

        /* We cannot reallocate less memory than we use */
        if (lsemid >= newsemmni || semtot > newsemmns || nmnus > newsemmnu) {
                mutex_exit(&semlock);
                uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
                return EBUSY;
        }

        new_sema = (void *)v;
        new_sem = (void *)((uintptr_t)new_sema +
            ALIGN(newsemmni * sizeof(struct semid_ds)));
        new_semcv = (void *)((uintptr_t)new_sem +
            ALIGN(newsemmns * sizeof(struct __sem)));
        new_semu = (void *)((uintptr_t)new_semcv +
            ALIGN(newsemmni * sizeof(kcondvar_t)));

        /* Initialize all semaphore identifiers and condvars */
        for (i = 0; i < newsemmni; i++) {
                new_sema[i]._sem_base = 0;
                new_sema[i].sem_perm.mode = 0;
                cv_init(&new_semcv[i], "semwait");
        }
        for (i = 0; i < newsemmnu; i++) {
                nsuptr = SEMU(new_semu, i);
                nsuptr->un_proc = NULL;
        }

        /*
         * Copy all identifiers, semaphores and list of the
         * undo structures to the new memory allocation.
         */
        j = 0;
        for (i = 0; i <= lsemid; i++) {
                if ((sema[i].sem_perm.mode & SEM_ALLOC) == 0)
                        continue;
                memcpy(&new_sema[i], &sema[i], sizeof(struct semid_ds));
                new_sema[i]._sem_base = &new_sem[j];
                memcpy(new_sema[i]._sem_base, sema[i]._sem_base,
                    (sizeof(struct __sem) * sema[i].sem_nsems));
                j += sema[i].sem_nsems;
        }
        KASSERT(j == semtot);

        j = 0;
        new_semu_list = NULL;
        for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) {
                KASSERT(j < newsemmnu);
                nsuptr = SEMU(new_semu, j);
                memcpy(nsuptr, suptr, SEMUSZ);
                nsuptr->un_next = new_semu_list;
                new_semu_list = nsuptr;
                j++;
        }

        for (i = 0; i < seminfo.semmni; i++) {
                KASSERT(cv_has_waiters(&semcv[i]) == false);
                cv_destroy(&semcv[i]);
        }

        sz = ALIGN(seminfo.semmni * sizeof(struct semid_ds)) +
            ALIGN(seminfo.semmns * sizeof(struct __sem)) +
            ALIGN(seminfo.semmni * sizeof(kcondvar_t)) +
            ALIGN(seminfo.semmnu * seminfo.semusz);
        sz = round_page(sz);

        /* Set the pointers and update the new values */
        sema = new_sema;
        sem = new_sem;
        semcv = new_semcv;
        semu = new_semu;
        semu_list = new_semu_list;

        seminfo.semmni = newsemmni;
        seminfo.semmns = newsemmns;
        seminfo.semmnu = newsemmnu;

        /* Reallocation completed - notify all waiters, if any */
        sem_realloc_state = false;
        cv_broadcast(&sem_realloc_cv);
        mutex_exit(&semlock);

        uvm_km_free(kernel_map, (vaddr_t)old_sema, sz, UVM_KMF_WIRED);
        return 0;
}

/*
 * Placebo.
 */

int
sys_semconfig(struct lwp *l, const struct sys_semconfig_args *uap, register_t *retval)
{

        RUN_ONCE(&exithook_control, seminit_exithook);

        *retval = 0;
        return 0;
}

/*
 * Allocate a new sem_undo structure for a process.
 * => Returns NULL on failure.
 */
struct sem_undo *
semu_alloc(struct proc *p)
{
        struct sem_undo *suptr, **supptr;
        bool attempted = false;
        int i;

        KASSERT(mutex_owned(&semlock));
again:
        /* Look for a free structure. */
        for (i = 0; i < seminfo.semmnu; i++) {
                suptr = SEMU(semu, i);
                if (suptr->un_proc == NULL) {
                        /* Found.  Fill it in and return. */
                        suptr->un_next = semu_list;
                        semu_list = suptr;
                        suptr->un_cnt = 0;
                        suptr->un_proc = p;
                        return suptr;
                }
        }

        /* Not found.  Attempt to free some structures. */
        if (!attempted) {
                bool freed = false;

                attempted = true;
                supptr = &semu_list;
                while ((suptr = *supptr) != NULL) {
                        if (suptr->un_cnt == 0)  {
                                suptr->un_proc = NULL;
                                *supptr = suptr->un_next;
                                freed = true;
                        } else {
                                supptr = &suptr->un_next;
                        }
                }
                if (freed) {
                        goto again;
                }
        }
        return NULL;
}

/*
 * Adjust a particular entry for a particular proc
 */

int
semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid, int semnum,
    int adjval)
{
        struct sem_undo *suptr;
        struct sem_undo_entry *sunptr;
        int i;

        KASSERT(mutex_owned(&semlock));

        /*
         * Look for and remember the sem_undo if the caller doesn't
         * provide it
         */

        suptr = *supptr;
        if (suptr == NULL) {
                for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next)
                        if (suptr->un_proc == p)
                                break;

                if (suptr == NULL) {
                        suptr = semu_alloc(p);
                        if (suptr == NULL)
                                return (ENOSPC);
                }
                *supptr = suptr;
        }

        /*
         * Look for the requested entry and adjust it (delete if
         * adjval becomes 0).
         */
        sunptr = &suptr->un_ent[0];
        for (i = 0; i < suptr->un_cnt; i++, sunptr++) {
                if (sunptr->un_id != semid || sunptr->un_num != semnum)
                        continue;
                sunptr->un_adjval += adjval;
                if (sunptr->un_adjval == 0) {
                        suptr->un_cnt--;
                        if (i < suptr->un_cnt)
                                suptr->un_ent[i] =
                                    suptr->un_ent[suptr->un_cnt];
                }
                return (0);
        }

        /* Didn't find the right entry - create it */
        if (suptr->un_cnt == SEMUME)
                return (EINVAL);

        sunptr = &suptr->un_ent[suptr->un_cnt];
        suptr->un_cnt++;
        sunptr->un_adjval = adjval;
        sunptr->un_id = semid;
        sunptr->un_num = semnum;
        return (0);
}

void
semundo_clear(int semid, int semnum)
{
        struct sem_undo *suptr;
        struct sem_undo_entry *sunptr, *sunend;

        KASSERT(mutex_owned(&semlock));

        for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next)
                for (sunptr = &suptr->un_ent[0],
                    sunend = sunptr + suptr->un_cnt; sunptr < sunend;) {
                        if (sunptr->un_id == semid) {
                                if (semnum == -1 || sunptr->un_num == semnum) {
                                        suptr->un_cnt--;
                                        sunend--;
                                        if (sunptr != sunend)
                                                *sunptr = *sunend;
                                        if (semnum != -1)
                                                break;
                                        else
                                                continue;
                                }
                        }
                        sunptr++;
                }
}

int
sys_____semctl50(struct lwp *l, const struct sys_____semctl50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) semid;
                syscallarg(int) semnum;
                syscallarg(int) cmd;
                syscallarg(union __semun *) arg;
        } */
        struct semid_ds sembuf;
        int cmd, error;
        void *pass_arg;
        union __semun karg;

        RUN_ONCE(&exithook_control, seminit_exithook);

        cmd = SCARG(uap, cmd);

        pass_arg = get_semctl_arg(cmd, &sembuf, &karg);

        if (pass_arg) {
                error = copyin(SCARG(uap, arg), &karg, sizeof(karg));
                if (error)
                        return error;
                if (cmd == IPC_SET) {
                        error = copyin(karg.buf, &sembuf, sizeof(sembuf));
                        if (error)
                                return (error);
                }
        }

        error = semctl1(l, SCARG(uap, semid), SCARG(uap, semnum), cmd,
            pass_arg, retval);

        if (error == 0 && cmd == IPC_STAT)
                error = copyout(&sembuf, karg.buf, sizeof(sembuf));

        return (error);
}

int
semctl1(struct lwp *l, int semid, int semnum, int cmd, void *v,
    register_t *retval)
{
        kauth_cred_t cred = l->l_cred;
        union __semun *arg = v;
        struct semid_ds *sembuf = v, *semaptr;
        int i, error, ix;

        SEM_PRINTF(("call to semctl(%d, %d, %d, %p)\n",
            semid, semnum, cmd, v));

        mutex_enter(&semlock);

        ix = IPCID_TO_IX(semid);
        if (ix < 0 || ix >= seminfo.semmni) {
                mutex_exit(&semlock);
                return (EINVAL);
        }

        semaptr = &sema[ix];
        if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
            semaptr->sem_perm._seq != IPCID_TO_SEQ(semid)) {
                mutex_exit(&semlock);
                return (EINVAL);
        }

        switch (cmd) {
        case IPC_RMID:
                if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)) != 0)
                        break;
                semaptr->sem_perm.cuid = kauth_cred_geteuid(cred);
                semaptr->sem_perm.uid = kauth_cred_geteuid(cred);
                semtot -= semaptr->sem_nsems;
                for (i = semaptr->_sem_base - sem; i < semtot; i++)
                        sem[i] = sem[i + semaptr->sem_nsems];
                for (i = 0; i < seminfo.semmni; i++) {
                        if ((sema[i].sem_perm.mode & SEM_ALLOC) &&
                            sema[i]._sem_base > semaptr->_sem_base)
                                sema[i]._sem_base -= semaptr->sem_nsems;
                }
                semaptr->sem_perm.mode = 0;
                semundo_clear(ix, -1);
                cv_broadcast(&semcv[ix]);
                break;

        case IPC_SET:
                if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_M)))
                        break;
                KASSERT(sembuf != NULL);
                semaptr->sem_perm.uid = sembuf->sem_perm.uid;
                semaptr->sem_perm.gid = sembuf->sem_perm.gid;
                semaptr->sem_perm.mode = (semaptr->sem_perm.mode & ~0777) |
                    (sembuf->sem_perm.mode & 0777);
                semaptr->sem_ctime = time_second;
                break;

        case IPC_STAT:
                if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
                        break;
                KASSERT(sembuf != NULL);
                memset(sembuf, 0, sizeof *sembuf);
                sembuf->sem_perm = semaptr->sem_perm;
                sembuf->sem_perm.mode &= 0777;
                sembuf->sem_nsems = semaptr->sem_nsems;
                sembuf->sem_otime = semaptr->sem_otime;
                sembuf->sem_ctime = semaptr->sem_ctime;
                break;

        case GETNCNT:
                if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
                        break;
                if (semnum < 0 || semnum >= semaptr->sem_nsems) {
                        error = EINVAL;
                        break;
                }
                *retval = semaptr->_sem_base[semnum].semncnt;
                break;

        case GETPID:
                if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
                        break;
                if (semnum < 0 || semnum >= semaptr->sem_nsems) {
                        error = EINVAL;
                        break;
                }
                *retval = semaptr->_sem_base[semnum].sempid;
                break;

        case GETVAL:
                if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
                        break;
                if (semnum < 0 || semnum >= semaptr->sem_nsems) {
                        error = EINVAL;
                        break;
                }
                *retval = semaptr->_sem_base[semnum].semval;
                break;

        case GETALL:
                if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
                        break;
                KASSERT(arg != NULL);
                for (i = 0; i < semaptr->sem_nsems; i++) {
                        error = copyout(&semaptr->_sem_base[i].semval,
                            &arg->array[i], sizeof(arg->array[i]));
                        if (error != 0)
                                break;
                }
                break;

        case GETZCNT:
                if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
                        break;
                if (semnum < 0 || semnum >= semaptr->sem_nsems) {
                        error = EINVAL;
                        break;
                }
                *retval = semaptr->_sem_base[semnum].semzcnt;
                break;

        case SETVAL:
                if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
                        break;
                if (semnum < 0 || semnum >= semaptr->sem_nsems) {
                        error = EINVAL;
                        break;
                }
                KASSERT(arg != NULL);
                if ((unsigned int)arg->val > seminfo.semvmx) {
                        error = ERANGE;
                        break;
                }
                semaptr->_sem_base[semnum].semval = arg->val;
                semundo_clear(ix, semnum);
                cv_broadcast(&semcv[ix]);
                break;

        case SETALL:
                if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
                        break;
                KASSERT(arg != NULL);
                for (i = 0; i < semaptr->sem_nsems; i++) {
                        unsigned short semval;
                        error = copyin(&arg->array[i], &semval,
                            sizeof(arg->array[i]));
                        if (error != 0)
                                break;
                        if ((unsigned int)semval > seminfo.semvmx) {
                                error = ERANGE;
                                break;
                        }
                        semaptr->_sem_base[i].semval = semval;
                }
                semundo_clear(ix, -1);
                cv_broadcast(&semcv[ix]);
                break;

        default:
                error = EINVAL;
                break;
        }

        mutex_exit(&semlock);
        return (error);
}

int
sys_semget(struct lwp *l, const struct sys_semget_args *uap, register_t *retval)
{
        /* {
                syscallarg(key_t) key;
                syscallarg(int) nsems;
                syscallarg(int) semflg;
        } */
        int semid, error = 0;
        int key = SCARG(uap, key);
        int nsems = SCARG(uap, nsems);
        int semflg = SCARG(uap, semflg);
        kauth_cred_t cred = l->l_cred;

        RUN_ONCE(&exithook_control, seminit_exithook);

        SEM_PRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));

        mutex_enter(&semlock);

        if (key != IPC_PRIVATE) {
                for (semid = 0; semid < seminfo.semmni; semid++) {
                        if ((sema[semid].sem_perm.mode & SEM_ALLOC) &&
                            sema[semid].sem_perm._key == key)
                                break;
                }
                if (semid < seminfo.semmni) {
                        SEM_PRINTF(("found public key\n"));
                        if ((error = ipcperm(cred, &sema[semid].sem_perm,
                            semflg & 0700)))
                                    goto out;
                        if (nsems > 0 && sema[semid].sem_nsems < nsems) {
                                SEM_PRINTF(("too small\n"));
                                error = EINVAL;
                                goto out;
                        }
                        if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
                                SEM_PRINTF(("not exclusive\n"));
                                error = EEXIST;
                                goto out;
                        }
                        goto found;
                }
        }

        SEM_PRINTF(("need to allocate the semid_ds\n"));
        if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
                if (nsems <= 0 || nsems > seminfo.semmsl) {
                        SEM_PRINTF(("nsems out of range (0<%d<=%d)\n", nsems,
                            seminfo.semmsl));
                        error = EINVAL;
                        goto out;
                }
                if (nsems > seminfo.semmns - semtot) {
                        SEM_PRINTF(("not enough semaphores left "
                            "(need %d, got %d)\n",
                            nsems, seminfo.semmns - semtot));
                        error = ENOSPC;
                        goto out;
                }
                for (semid = 0; semid < seminfo.semmni; semid++) {
                        if ((sema[semid].sem_perm.mode & SEM_ALLOC) == 0)
                                break;
                }
                if (semid == seminfo.semmni) {
                        SEM_PRINTF(("no more semid_ds's available\n"));
                        error = ENOSPC;
                        goto out;
                }
                SEM_PRINTF(("semid %d is available\n", semid));
                sema[semid].sem_perm._key = key;
                sema[semid].sem_perm.cuid = kauth_cred_geteuid(cred);
                sema[semid].sem_perm.uid = kauth_cred_geteuid(cred);
                sema[semid].sem_perm.cgid = kauth_cred_getegid(cred);
                sema[semid].sem_perm.gid = kauth_cred_getegid(cred);
                sema[semid].sem_perm.mode = (semflg & 0777) | SEM_ALLOC;
                sema[semid].sem_perm._seq =
                    (sema[semid].sem_perm._seq + 1) & 0x7fff;
                sema[semid].sem_nsems = nsems;
                sema[semid].sem_otime = 0;
                sema[semid].sem_ctime = time_second;
                sema[semid]._sem_base = &sem[semtot];
                semtot += nsems;
                memset(sema[semid]._sem_base, 0,
                    sizeof(sema[semid]._sem_base[0]) * nsems);
                SEM_PRINTF(("sembase = %p, next = %p\n", sema[semid]._sem_base,
                    &sem[semtot]));
        } else {
                SEM_PRINTF(("didn't find it and wasn't asked to create it\n"));
                error = ENOENT;
                goto out;
        }

 found:
        *retval = IXSEQ_TO_IPCID(semid, sema[semid].sem_perm);
 out:
        mutex_exit(&semlock);
        return (error);
}

#define SMALL_SOPS 8

int
sys_semop(struct lwp *l, const struct sys_semop_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) semid;
                syscallarg(struct sembuf *) sops;
                syscallarg(size_t) nsops;
        } */
        struct proc *p = l->l_proc;
        int semid = SCARG(uap, semid), seq;
        size_t nsops = SCARG(uap, nsops);
        struct sembuf small_sops[SMALL_SOPS];
        struct sembuf *sops;
        struct semid_ds *semaptr;
        struct sembuf *sopptr = NULL;
        struct __sem *semptr = NULL;
        struct sem_undo *suptr = NULL;
        kauth_cred_t cred = l->l_cred;
        int i, error;
        int do_wakeup, do_undos;

        RUN_ONCE(&exithook_control, seminit_exithook);

        SEM_PRINTF(("call to semop(%d, %p, %zd)\n", semid, SCARG(uap,sops), nsops));

        if (__predict_false((p->p_flag & PK_SYSVSEM) == 0)) {
                mutex_enter(p->p_lock);
                p->p_flag |= PK_SYSVSEM;
                mutex_exit(p->p_lock);
        }

restart:
        if (nsops <= SMALL_SOPS) {
                sops = small_sops;
        } else if (nsops <= seminfo.semopm) {
                sops = kmem_alloc(nsops * sizeof(*sops), KM_SLEEP);
        } else {
                SEM_PRINTF(("too many sops (max=%d, nsops=%zd)\n",
                    seminfo.semopm, nsops));
                return (E2BIG);
        }

        error = copyin(SCARG(uap, sops), sops, nsops * sizeof(sops[0]));
        if (error) {
                SEM_PRINTF(("error = %d from copyin(%p, %p, %zd)\n", error,
                    SCARG(uap, sops), &sops, nsops * sizeof(sops[0])));
                if (sops != small_sops)
                        kmem_free(sops, nsops * sizeof(*sops));
                return error;
        }

        mutex_enter(&semlock);
        /* In case of reallocation, we will wait for completion */
        while (__predict_false(sem_realloc_state))
                cv_wait(&sem_realloc_cv, &semlock);

        semid = IPCID_TO_IX(semid);        /* Convert back to zero origin */
        if (semid < 0 || semid >= seminfo.semmni) {
                error = EINVAL;
                goto out;
        }

        semaptr = &sema[semid];
        seq = IPCID_TO_SEQ(SCARG(uap, semid));
        if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
            semaptr->sem_perm._seq != seq) {
                error = EINVAL;
                goto out;
        }

        if ((error = ipcperm(cred, &semaptr->sem_perm, IPC_W))) {
                SEM_PRINTF(("error = %d from ipaccess\n", error));
                goto out;
        }

        for (i = 0; i < nsops; i++)
                if (sops[i].sem_num >= semaptr->sem_nsems) {
                        error = EFBIG;
                        goto out;
                }

        /*
         * Loop trying to satisfy the vector of requests.
         * If we reach a point where we must wait, any requests already
         * performed are rolled back and we go to sleep until some other
         * process wakes us up.  At this point, we start all over again.
         *
         * This ensures that from the perspective of other tasks, a set
         * of requests is atomic (never partially satisfied).
         */
        do_undos = 0;

        for (;;) {
                do_wakeup = 0;

                for (i = 0; i < nsops; i++) {
                        sopptr = &sops[i];
                        semptr = &semaptr->_sem_base[sopptr->sem_num];

                        SEM_PRINTF(("semop:  semaptr=%p, sem_base=%p, "
                            "semptr=%p, sem[%d]=%d : op=%d, flag=%s\n",
                            semaptr, semaptr->_sem_base, semptr,
                            sopptr->sem_num, semptr->semval, sopptr->sem_op,
                            (sopptr->sem_flg & IPC_NOWAIT) ?
                            "nowait" : "wait"));

                        if (sopptr->sem_op < 0) {
                                if ((int)(semptr->semval +
                                    sopptr->sem_op) < 0) {
                                        SEM_PRINTF(("semop:  "
                                            "can't do it now\n"));
                                        break;
                                } else {
                                        semptr->semval += sopptr->sem_op;
                                        if (semptr->semval == 0 &&
                                            semptr->semzcnt > 0)
                                                do_wakeup = 1;
                                }
                                if (sopptr->sem_flg & SEM_UNDO)
                                        do_undos = 1;
                        } else if (sopptr->sem_op == 0) {
                                if (semptr->semval > 0) {
                                        SEM_PRINTF(("semop:  not zero now\n"));
                                        break;
                                }
                        } else {
                                if (semptr->semncnt > 0)
                                        do_wakeup = 1;
                                semptr->semval += sopptr->sem_op;
                                if (sopptr->sem_flg & SEM_UNDO)
                                        do_undos = 1;
                        }
                }

                /*
                 * Did we get through the entire vector?
                 */
                if (i >= nsops)
                        goto done;

                /*
                 * No ... rollback anything that we've already done
                 */
                SEM_PRINTF(("semop:  rollback 0 through %d\n", i - 1));
                while (i-- > 0)
                        semaptr->_sem_base[sops[i].sem_num].semval -=
                            sops[i].sem_op;

                /*
                 * If the request that we couldn't satisfy has the
                 * NOWAIT flag set then return with EAGAIN.
                 */
                if (sopptr->sem_flg & IPC_NOWAIT) {
                        error = EAGAIN;
                        goto out;
                }

                if (sopptr->sem_op == 0)
                        semptr->semzcnt++;
                else
                        semptr->semncnt++;

                sem_waiters++;
                SEM_PRINTF(("semop:  good night!\n"));
                error = cv_wait_sig(&semcv[semid], &semlock);
                SEM_PRINTF(("semop:  good morning (error=%d)!\n", error));
                sem_waiters--;

                /* Notify reallocator, if it is waiting */
                cv_broadcast(&sem_realloc_cv);

                /*
                 * Make sure that the semaphore still exists
                 */
                if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
                    semaptr->sem_perm._seq != seq) {
                        error = EIDRM;
                        goto out;
                }

                /*
                 * The semaphore is still alive.  Readjust the count of
                 * waiting processes.
                 */
                semptr = &semaptr->_sem_base[sopptr->sem_num];
                if (sopptr->sem_op == 0)
                        semptr->semzcnt--;
                else
                        semptr->semncnt--;

                /* In case of such state, restart the call */
                if (sem_realloc_state) {
                        mutex_exit(&semlock);
                        goto restart;
                }

                /* Is it really morning, or was our sleep interrupted? */
                if (error != 0) {
                        error = EINTR;
                        goto out;
                }
                SEM_PRINTF(("semop:  good morning!\n"));
        }

done:
        /*
         * Process any SEM_UNDO requests.
         */
        if (do_undos) {
                for (i = 0; i < nsops; i++) {
                        /*
                         * We only need to deal with SEM_UNDO's for non-zero
                         * op's.
                         */
                        int adjval;

                        if ((sops[i].sem_flg & SEM_UNDO) == 0)
                                continue;
                        adjval = sops[i].sem_op;
                        if (adjval == 0)
                                continue;
                        error = semundo_adjust(p, &suptr, semid,
                            sops[i].sem_num, -adjval);
                        if (error == 0)
                                continue;

                        /*
                         * Oh-Oh!  We ran out of either sem_undo's or undo's.
                         * Rollback the adjustments to this point and then
                         * rollback the semaphore ups and down so we can return
                         * with an error with all structures restored.  We
                         * rollback the undo's in the exact reverse order that
                         * we applied them.  This guarantees that we won't run
                         * out of space as we roll things back out.
                         */
                        while (i-- > 0) {
                                if ((sops[i].sem_flg & SEM_UNDO) == 0)
                                        continue;
                                adjval = sops[i].sem_op;
                                if (adjval == 0)
                                        continue;
                                if (semundo_adjust(p, &suptr, semid,
                                    sops[i].sem_num, adjval) != 0)
                                        panic("semop - can't undo undos");
                        }

                        for (i = 0; i < nsops; i++)
                                semaptr->_sem_base[sops[i].sem_num].semval -=
                                    sops[i].sem_op;

                        SEM_PRINTF(("error = %d from semundo_adjust\n", error));
                        goto out;
                } /* loop through the sops */
        } /* if (do_undos) */

        /* We're definitely done - set the sempid's */
        for (i = 0; i < nsops; i++) {
                sopptr = &sops[i];
                semptr = &semaptr->_sem_base[sopptr->sem_num];
                semptr->sempid = p->p_pid;
        }

        /* Update sem_otime */
        semaptr->sem_otime = time_second;

        /* Do a wakeup if any semaphore was up'd. */
        if (do_wakeup) {
                SEM_PRINTF(("semop:  doing wakeup\n"));
                cv_broadcast(&semcv[semid]);
                SEM_PRINTF(("semop:  back from wakeup\n"));
        }
        SEM_PRINTF(("semop:  done\n"));
        *retval = 0;

 out:
        mutex_exit(&semlock);
        if (sops != small_sops)
                kmem_free(sops, nsops * sizeof(*sops));
        return error;
}

/*
 * Go through the undo structures for this process and apply the
 * adjustments to semaphores.
 */
/*ARGSUSED*/
void
semexit(struct proc *p, void *v)
{
        struct sem_undo *suptr;
        struct sem_undo **supptr;

        if ((p->p_flag & PK_SYSVSEM) == 0)
                return;

        mutex_enter(&semlock);

        /*
         * Go through the chain of undo vectors looking for one
         * associated with this process.
         */

        for (supptr = &semu_list; (suptr = *supptr) != NULL;
            supptr = &suptr->un_next) {
                if (suptr->un_proc == p)
                        break;
        }

        /*
         * If there is no undo vector, skip to the end.
         */

        if (suptr == NULL) {
                mutex_exit(&semlock);
                return;
        }

        /*
         * We now have an undo vector for this process.
         */

        SEM_PRINTF(("proc @%p has undo structure with %d entries\n", p,
            suptr->un_cnt));

        /*
         * If there are any active undo elements then process them.
         */
        if (suptr->un_cnt > 0) {
                int ix;

                for (ix = 0; ix < suptr->un_cnt; ix++) {
                        int semid = suptr->un_ent[ix].un_id;
                        int semnum = suptr->un_ent[ix].un_num;
                        int adjval = suptr->un_ent[ix].un_adjval;
                        struct semid_ds *semaptr;

                        semaptr = &sema[semid];
                        if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0)
                        if (semnum >= semaptr->sem_nsems)
                                panic("semexit - semnum out of range");

                        SEM_PRINTF(("semexit:  %p id=%d num=%d(adj=%d) ; "
                            "sem=%d\n",
                            suptr->un_proc, suptr->un_ent[ix].un_id,
                            suptr->un_ent[ix].un_num,
                            suptr->un_ent[ix].un_adjval,
                            semaptr->_sem_base[semnum].semval));

                        if (adjval < 0 &&
                            semaptr->_sem_base[semnum].semval < -adjval)
                                semaptr->_sem_base[semnum].semval = 0;
                        else
                                semaptr->_sem_base[semnum].semval += adjval;

                        cv_broadcast(&semcv[semid]);
                        SEM_PRINTF(("semexit:  back from wakeup\n"));
                }
        }

        /*
         * Deallocate the undo vector.
         */
        SEM_PRINTF(("removing vector\n"));
        suptr->un_proc = NULL;
        *supptr = suptr->un_next;
        mutex_exit(&semlock);
}

/*
 * Sysctl initialization and nodes.
 */

static int
sysctl_ipc_semmni(SYSCTLFN_ARGS)
{
        int newsize, error;
        struct sysctlnode node;
        node = *rnode;
        node.sysctl_data = &newsize;

        newsize = seminfo.semmni;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        return semrealloc(newsize, seminfo.semmns, seminfo.semmnu);
}

static int
sysctl_ipc_semmns(SYSCTLFN_ARGS)
{
        int newsize, error;
        struct sysctlnode node;
        node = *rnode;
        node.sysctl_data = &newsize;

        newsize = seminfo.semmns;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        return semrealloc(seminfo.semmni, newsize, seminfo.semmnu);
}

static int
sysctl_ipc_semmnu(SYSCTLFN_ARGS)
{
        int newsize, error;
        struct sysctlnode node;
        node = *rnode;
        node.sysctl_data = &newsize;

        newsize = seminfo.semmnu;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        return semrealloc(seminfo.semmni, seminfo.semmns, newsize);
}

SYSCTL_SETUP(sysctl_ipc_sem_setup, "sysctl kern.ipc subtree setup")
{
        const struct sysctlnode *node = NULL;

        sysctl_createv(clog, 0, NULL, &node,
                CTLFLAG_PERMANENT,
                CTLTYPE_NODE, "ipc",
                SYSCTL_DESCR("SysV IPC options"),
                NULL, 0, NULL, 0,
                CTL_KERN, KERN_SYSVIPC, CTL_EOL);

        if (node == NULL)
                return;

        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "semmni",
                SYSCTL_DESCR("Max number of number of semaphore identifiers"),
                sysctl_ipc_semmni, 0, &seminfo.semmni, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "semmns",
                SYSCTL_DESCR("Max number of number of semaphores in system"),
                sysctl_ipc_semmns, 0, &seminfo.semmns, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "semmnu",
                SYSCTL_DESCR("Max number of undo structures in system"),
                sysctl_ipc_semmnu, 0, &seminfo.semmnu, 0,
                CTL_CREATE, CTL_EOL);
}





























































































    1 











































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
/*        $NetBSD: spinlock.h,v 1.14 2021/12/19 11:52:08 riastradh Exp $        */

/*-
 * Copyright (c) 2013 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _LINUX_SPINLOCK_H_
#define _LINUX_SPINLOCK_H_

#include <sys/cdefs.h>
#include <sys/mutex.h>

#include <machine/limits.h>

#include <linux/atomic.h>
#include <linux/irqflags.h>
#include <linux/lockdep.h>
#include <linux/preempt.h>

typedef struct spinlock {
        kmutex_t sl_lock;
} spinlock_t;

static inline int
spin_is_locked(spinlock_t *spinlock)
{
        return mutex_owned(&spinlock->sl_lock);
}

static inline void
spin_lock(spinlock_t *spinlock)
{
        mutex_enter(&spinlock->sl_lock);
}

static inline void
spin_unlock(spinlock_t *spinlock)
{
        mutex_exit(&spinlock->sl_lock);
}

static inline void
spin_lock_irq(spinlock_t *spinlock)
{
        spin_lock(spinlock);
}

static inline void
spin_unlock_irq(spinlock_t *spinlock)
{
        spin_unlock(spinlock);
}

/* Must be a macro because the second argument is to be assigned.  */
#define        spin_lock_irqsave(SPINLOCK, FLAGS)                                \
        do {                                                                \
                (FLAGS) = 0;                                                \
                mutex_enter(&((spinlock_t *)(SPINLOCK))->sl_lock);        \
        } while (0)

#define        spin_trylock_irqsave(SPINLOCK, FLAGS)                                \
                ( (FLAGS) = 0,                                                \
                mutex_tryenter(&((spinlock_t *)(SPINLOCK))->sl_lock) )

static inline void
spin_unlock_irqrestore(spinlock_t *spinlock, unsigned long __unused flags)
{
        mutex_exit(&spinlock->sl_lock);
}

static inline void
spin_lock_nested(spinlock_t *spinlock, int subclass)
{
        spin_lock(spinlock);
}

#define        spin_lock_irqsave_nested(SPINLOCK, FLAGS, SUBCLASS)                      \
        spin_lock_irqsave(SPINLOCK, FLAGS)

static inline void
spin_lock_init(spinlock_t *spinlock)
{
        /* XXX What's the right IPL?  IPL_DRM...?  */
        mutex_init(&spinlock->sl_lock, MUTEX_DEFAULT, IPL_VM);
}

/*
 * XXX Linux doesn't ever destroy spin locks, it seems.  We'll have to
 * kludge it up.
 */

static inline void
spin_lock_destroy(spinlock_t *spinlock)
{
        mutex_destroy(&spinlock->sl_lock);
}

/* This is a macro to make the panic message clearer.  */
#define        assert_spin_locked(spinlock)        \
        KASSERT(mutex_owned(&(spinlock)->sl_lock))

/*
 * Stupid reader/writer spin locks.  No attempt to avoid writer
 * starvation.  Must allow recursive readers.  We use mutex and state
 * instead of compare-and-swap for expedience and LOCKDEBUG support.
 */

typedef struct linux_rwlock {
        kmutex_t        rw_lock;
        unsigned        rw_nreaders;
} rwlock_t;

static inline void
rwlock_init(rwlock_t *rw)
{

        mutex_init(&rw->rw_lock, MUTEX_DEFAULT, IPL_VM);
        rw->rw_nreaders = 0;
}

static inline void
rwlock_destroy(rwlock_t *rw)
{

        KASSERTMSG(rw->rw_nreaders == 0,
            "rwlock still held by %u readers", rw->rw_nreaders);
        mutex_destroy(&rw->rw_lock);
}

static inline void
write_lock_irq(rwlock_t *rw)
{

        for (;;) {
                mutex_spin_enter(&rw->rw_lock);
                if (rw->rw_nreaders == 0)
                        break;
                mutex_spin_exit(&rw->rw_lock);
        }
}

static inline void
write_unlock_irq(rwlock_t *rw)
{

        KASSERT(rw->rw_nreaders == 0);
        mutex_spin_exit(&rw->rw_lock);
}

static inline void
read_lock(rwlock_t *rw)
{

        mutex_spin_enter(&rw->rw_lock);
        KASSERT(rw->rw_nreaders < UINT_MAX);
        rw->rw_nreaders++;
        mutex_spin_exit(&rw->rw_lock);
}

static inline void
read_unlock(rwlock_t *rw)
{

        mutex_spin_enter(&rw->rw_lock);
        KASSERT(0 < rw->rw_nreaders);
        rw->rw_nreaders--;
        mutex_spin_exit(&rw->rw_lock);
}

static inline void
local_bh_disable(void)
{
}

static inline void
local_bh_enable(void)
{
}

#define        atomic_dec_and_lock_irqsave(A, L, F)                                      \
        _atomic_dec_and_lock_irqsave(A, L, &(F))

static inline bool __must_check
_atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
    unsigned long *flagsp)
{
        unsigned old, new;

        do {
                old = atomic_read(atomic);
                KASSERT(old);
                if (old == 1) {
                        spin_lock_irqsave(lock, *flagsp);
                        if (atomic_dec_return(atomic) == 0)
                                return true;
                        spin_unlock_irqrestore(lock, *flagsp);
                        return false;
                }
                new = old - 1;
        } while (atomic_cmpxchg(atomic, old, new) != old);

        KASSERT(old != 1);
        KASSERT(new != 0);
        return false;
}

#endif  /* _LINUX_SPINLOCK_H_ */













































































































































































































    2 














   13 





   13 














    7 

    1 
    1 













    2 
    2 

    1 
    1 

    3 






























    2 






    2 




















   10 





   10 





















    2 

    2 




    8 







   10 












    5 


    5 








    5 









    1 



    1 





























    4 
    3 

    2 




    4 
    3 
    1 

    1 

    1 




    1 
    1 
    1 
    1 
    1 



    1 

    1 
    1 



    4 
    2 

    2 

















    2 








    2 












    1 







    3 





    3 





















    9 


    9 


    9 
    9 




























    4 











    3 
    2 




    2 





    2 
















    4 


    2 

























    3 









    2 




    2 







    2 






    2 



    1 





    2 

    2 






    1 
    1 
    1 

    1 

    1 


















    2 






    1 

    2 

    3 










    5 







    5 













    2 




    1 
    2 






    2 














    1 




















    1 











































    1 








    1 















































    1 


    1 







    1 



    1 

    1 

    1 

    1 
    1 

    1 

    1 
    1 

    1 

    1 










    2 
    2 







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
/*        $NetBSD: ptyfs_vnops.c,v 1.69 2022/08/05 10:36:02 riastradh Exp $        */

/*
 * Copyright (c) 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)procfs_vnops.c        8.18 (Berkeley) 5/21/95
 */

/*
 * Copyright (c) 1993 Jan-Simon Pendry
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)procfs_vnops.c        8.18 (Berkeley) 5/21/95
 */

/*
 * ptyfs vnode interface
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ptyfs_vnops.c,v 1.69 2022/08/05 10:36:02 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/namei.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/select.h>
#include <sys/dirent.h>
#include <sys/resourcevar.h>
#include <sys/stat.h>
#include <sys/conf.h>
#include <sys/tty.h>
#include <sys/pty.h>
#include <sys/kauth.h>

#include <uvm/uvm_extern.h>        /* for PAGE_SIZE */

#include <machine/reg.h>

#include <fs/ptyfs/ptyfs.h>
#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>

MALLOC_DECLARE(M_PTYFSTMP);

/*
 * Vnode Operations.
 *
 */

int        ptyfs_lookup        (void *);
int        ptyfs_open        (void *);
int        ptyfs_close        (void *);
int        ptyfs_access        (void *);
int        ptyfs_getattr        (void *);
int        ptyfs_setattr        (void *);
int        ptyfs_read        (void *);
int        ptyfs_write        (void *);
int        ptyfs_ioctl        (void *);
int        ptyfs_poll        (void *);
int        ptyfs_kqfilter        (void *);
int        ptyfs_readdir        (void *);
int        ptyfs_reclaim        (void *);
int        ptyfs_inactive        (void *);
int        ptyfs_print        (void *);
int        ptyfs_pathconf        (void *);
int        ptyfs_advlock        (void *);

static int ptyfs_update(struct vnode *, const struct timespec *,
    const struct timespec *, int);
static int ptyfs_chown(struct vnode *, uid_t, gid_t, kauth_cred_t,
    struct lwp *);
static int ptyfs_chmod(struct vnode *, mode_t, kauth_cred_t, struct lwp *);
static int atoi(const char *, size_t);

/*
 * ptyfs vnode operations.
 */
int (**ptyfs_vnodeop_p)(void *);
const struct vnodeopv_entry_desc ptyfs_vnodeop_entries[] = {
        { &vop_default_desc, vn_default_error },
        { &vop_parsepath_desc, genfs_parsepath },        /* parsepath */
        { &vop_lookup_desc, ptyfs_lookup },                /* lookup */
        { &vop_create_desc, genfs_eopnotsupp },                /* create */
        { &vop_mknod_desc, genfs_eopnotsupp },                /* mknod */
        { &vop_open_desc, ptyfs_open },                        /* open */
        { &vop_close_desc, ptyfs_close },                /* close */
        { &vop_access_desc, ptyfs_access },                /* access */
        { &vop_accessx_desc, genfs_accessx },                /* accessx */
        { &vop_getattr_desc, ptyfs_getattr },                /* getattr */
        { &vop_setattr_desc, ptyfs_setattr },                /* setattr */
        { &vop_read_desc, ptyfs_read },                        /* read */
        { &vop_write_desc, ptyfs_write },                /* write */
        { &vop_fallocate_desc, genfs_eopnotsupp },        /* fallocate */
        { &vop_fdiscard_desc, genfs_eopnotsupp },        /* fdiscard */
        { &vop_ioctl_desc, ptyfs_ioctl },                /* ioctl */
        { &vop_fcntl_desc, genfs_fcntl },                /* fcntl */
        { &vop_poll_desc, ptyfs_poll },                        /* poll */
        { &vop_kqfilter_desc, ptyfs_kqfilter },                /* kqfilter */
        { &vop_revoke_desc, genfs_revoke },                /* revoke */
        { &vop_mmap_desc, genfs_eopnotsupp },                /* mmap */
        { &vop_fsync_desc, genfs_nullop },                /* fsync */
        { &vop_seek_desc, genfs_nullop },                /* seek */
        { &vop_remove_desc, genfs_eopnotsupp },                /* remove */
        { &vop_link_desc, genfs_eopnotsupp },                /* link */
        { &vop_rename_desc, genfs_eopnotsupp },                /* rename */
        { &vop_mkdir_desc, genfs_eopnotsupp },                /* mkdir */
        { &vop_rmdir_desc, genfs_eopnotsupp },                /* rmdir */
        { &vop_symlink_desc, genfs_eopnotsupp },        /* symlink */
        { &vop_readdir_desc, ptyfs_readdir },                /* readdir */
        { &vop_readlink_desc, genfs_eopnotsupp },        /* readlink */
        { &vop_abortop_desc, genfs_abortop },                /* abortop */
        { &vop_inactive_desc, ptyfs_inactive },                /* inactive */
        { &vop_reclaim_desc, ptyfs_reclaim },                /* reclaim */
        { &vop_lock_desc, genfs_lock },                        /* lock */
        { &vop_unlock_desc, genfs_unlock },                /* unlock */
        { &vop_bmap_desc, genfs_eopnotsupp },                /* bmap */
        { &vop_strategy_desc, genfs_badop },                /* strategy */
        { &vop_print_desc, ptyfs_print },                /* print */
        { &vop_islocked_desc, genfs_islocked },                /* islocked */
        { &vop_pathconf_desc, ptyfs_pathconf },                /* pathconf */
        { &vop_advlock_desc, ptyfs_advlock },                /* advlock */
        { &vop_bwrite_desc, genfs_eopnotsupp },                /* bwrite */
        { &vop_putpages_desc, genfs_null_putpages },        /* putpages */
        { NULL, NULL }
};
const struct vnodeopv_desc ptyfs_vnodeop_opv_desc =
        { &ptyfs_vnodeop_p, ptyfs_vnodeop_entries };

/*
 * free any private data and remove the node
 * from any private lists.
 */
int
ptyfs_reclaim(void *v)
{
        struct vop_reclaim_v2_args /* {
                struct vnode *a_vp;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;

        VOP_UNLOCK(vp);

        vp->v_data = NULL;
        return 0;
}

int
ptyfs_inactive(void *v)
{
        struct vop_inactive_v2_args /* {
                struct vnode *a_vp;
                bool *a_recycle;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct ptyfsnode *ptyfs = VTOPTYFS(vp);

        if (ptyfs->ptyfs_type == PTYFSptc)
                ptyfs_clr_active(vp->v_mount, ptyfs->ptyfs_pty);

        return 0;
}

/*
 * Return POSIX pathconf information applicable to special devices.
 */
int
ptyfs_pathconf(void *v)
{
        struct vop_pathconf_args /* {
                struct vnode *a_vp;
                int a_name;
                register_t *a_retval;
        } */ *ap = v;

        switch (ap->a_name) {
        case _PC_LINK_MAX:
                *ap->a_retval = LINK_MAX;
                return 0;
        case _PC_MAX_CANON:
                *ap->a_retval = MAX_CANON;
                return 0;
        case _PC_MAX_INPUT:
                *ap->a_retval = MAX_INPUT;
                return 0;
        case _PC_PIPE_BUF:
                *ap->a_retval = PIPE_BUF;
                return 0;
        case _PC_CHOWN_RESTRICTED:
                *ap->a_retval = 1;
                return 0;
        case _PC_VDISABLE:
                *ap->a_retval = _POSIX_VDISABLE;
                return 0;
        case _PC_SYNC_IO:
                *ap->a_retval = 1;
                return 0;
        default:
                return genfs_pathconf(ap);
        }
}

/*
 * _print is used for debugging.
 * just print a readable description
 * of (vp).
 */
int
ptyfs_print(void *v)
{
        struct vop_print_args /* {
                struct vnode *a_vp;
        } */ *ap = v;
        struct ptyfsnode *ptyfs = VTOPTYFS(ap->a_vp);

        printf("tag VT_PTYFS, type %d, pty %d\n",
            ptyfs->ptyfs_type, ptyfs->ptyfs_pty);
        return 0;
}

/*
 * support advisory locking on pty nodes
 */
int
ptyfs_advlock(void *v)
{
        struct vop_print_args /* {
                struct vnode *a_vp;
        } */ *ap = v;
        struct ptyfsnode *ptyfs = VTOPTYFS(ap->a_vp);

        switch (ptyfs->ptyfs_type) {
        case PTYFSpts:
        case PTYFSptc:
                return spec_advlock(v);
        default:
                return EOPNOTSUPP;
        }
}

/*
 * Invent attributes for ptyfsnode (vp) and store
 * them in (vap).
 * Directories lengths are returned as zero since
 * any real length would require the genuine size
 * to be computed, and nothing cares anyway.
 *
 * this is relatively minimal for ptyfs.
 */
int
ptyfs_getattr(void *v)
{
        struct vop_getattr_args /* {
                struct vnode *a_vp;
                struct vattr *a_vap;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct ptyfsnode *ptyfs = VTOPTYFS(ap->a_vp);
        struct vattr *vap = ap->a_vap;

        PTYFS_ITIMES(ptyfs, NULL, NULL, NULL);

        /* start by zeroing out the attributes */
        vattr_null(vap);

        /* next do all the common fields */
        vap->va_type = ap->a_vp->v_type;
        vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
        vap->va_fileid = ptyfs->ptyfs_fileno;
        vap->va_gen = 0;
        vap->va_flags = 0;
        vap->va_blocksize = PAGE_SIZE;

        vap->va_atime = ptyfs->ptyfs_atime;
        vap->va_mtime = ptyfs->ptyfs_mtime;
        vap->va_ctime = ptyfs->ptyfs_ctime;
        vap->va_birthtime = ptyfs->ptyfs_birthtime;
        vap->va_mode = ptyfs->ptyfs_mode;
        vap->va_flags = ptyfs->ptyfs_flags;
        vap->va_uid = ptyfs->ptyfs_uid;
        vap->va_gid = ptyfs->ptyfs_gid;

        switch (ptyfs->ptyfs_type) {
        case PTYFSpts:
        case PTYFSptc:
                if (pty_isfree(ptyfs->ptyfs_pty, 1))
                        return ENOENT;
                vap->va_bytes = vap->va_size = 0;
                vap->va_rdev = ap->a_vp->v_rdev;
                vap->va_nlink = 1;
                break;
        case PTYFSroot:
                vap->va_rdev = 0;
                vap->va_bytes = vap->va_size = DEV_BSIZE;
                vap->va_nlink = 2;
                break;
        default:
                return EOPNOTSUPP;
        }

        return 0;
}

/*ARGSUSED*/
int
ptyfs_setattr(void *v)
{
        struct vop_setattr_args /* {
                struct vnodeop_desc *a_desc;
                struct vnode *a_vp;
                struct vattr *a_vap;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct ptyfsnode *ptyfs = VTOPTYFS(vp);
        struct vattr *vap = ap->a_vap;
        kauth_cred_t cred = ap->a_cred;
        struct lwp *l = curlwp;
        int error;
        kauth_action_t action = KAUTH_VNODE_WRITE_FLAGS;
        bool changing_sysflags = false;

        if (vap->va_size != VNOVALSIZE) {
                 switch (ptyfs->ptyfs_type) {
                 case PTYFSroot:
                         return EISDIR;
                 case PTYFSpts:
                 case PTYFSptc:
                        break;
                default:
                        return EINVAL;
                }
        }

        if (vap->va_flags != VNOVALFLAGS) {
                if (vp->v_mount->mnt_flag & MNT_RDONLY)
                        return EROFS;

                /* Immutable and append-only flags are not supported on ptyfs. */
                if (vap->va_flags & (IMMUTABLE | APPEND))
                        return EINVAL;

                /* Snapshot flag cannot be set or cleared */
                if ((vap->va_flags & SF_SNAPSHOT) != (ptyfs->ptyfs_flags & SF_SNAPSHOT))
                        return EPERM;

                if ((ptyfs->ptyfs_flags & SF_SETTABLE) != (vap->va_flags & SF_SETTABLE)) {
                        changing_sysflags = true;
                        action |= KAUTH_VNODE_WRITE_SYSFLAGS;
                }

                error = kauth_authorize_vnode(cred, action, vp, NULL,
                    genfs_can_chflags(vp, cred, ptyfs->ptyfs_uid,
                    changing_sysflags));
                if (error)
                        return error;

                if (changing_sysflags) {
                        ptyfs->ptyfs_flags = vap->va_flags;
                } else {
                        ptyfs->ptyfs_flags &= SF_SETTABLE;
                        ptyfs->ptyfs_flags |= (vap->va_flags & UF_SETTABLE);
                }
                ptyfs->ptyfs_status |= PTYFS_CHANGE;
        }

        /*
         * Go through the fields and update iff not VNOVAL.
         */
        if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
                if (vp->v_mount->mnt_flag & MNT_RDONLY)
                        return EROFS;
                error = ptyfs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
                if (error)
                        return error;
        }

        if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL ||
            vap->va_birthtime.tv_sec != VNOVAL) {
                if (vp->v_mount->mnt_flag & MNT_RDONLY)
                        return EROFS;
                if ((ptyfs->ptyfs_flags & SF_SNAPSHOT) != 0)
                        return EPERM;
                error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp,
                    NULL, genfs_can_chtimes(vp, cred, ptyfs->ptyfs_uid,
                    vap->va_vaflags));
                if (error)
                        return (error);
                if (vap->va_atime.tv_sec != VNOVAL)
                        if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
                                ptyfs->ptyfs_status |= PTYFS_ACCESS;
                if (vap->va_mtime.tv_sec != VNOVAL) {
                        ptyfs->ptyfs_status |= PTYFS_CHANGE | PTYFS_MODIFY;
                        if (vp->v_mount->mnt_flag & MNT_RELATIME)
                                ptyfs->ptyfs_status |= PTYFS_ACCESS;
                }
                if (vap->va_birthtime.tv_sec != VNOVAL)
                        ptyfs->ptyfs_birthtime = vap->va_birthtime;
                ptyfs->ptyfs_status |= PTYFS_CHANGE;
                error = ptyfs_update(vp, &vap->va_atime, &vap->va_mtime, 0);
                if (error)
                        return error;
        }
        if (vap->va_mode != (mode_t)VNOVAL) {
                if (vp->v_mount->mnt_flag & MNT_RDONLY)
                        return EROFS;
                if ((ptyfs->ptyfs_flags & SF_SNAPSHOT) != 0 &&
                    (vap->va_mode &
                    (S_IXUSR|S_IWUSR|S_IXGRP|S_IWGRP|S_IXOTH|S_IWOTH)))
                        return EPERM;
                error = ptyfs_chmod(vp, vap->va_mode, cred, l);
                if (error)
                        return error;
        }
        return 0;
}

/*
 * Change the mode on a file.
 * Inode must be locked before calling.
 */
static int
ptyfs_chmod(struct vnode *vp, mode_t mode, kauth_cred_t cred, struct lwp *l)
{
        struct ptyfsnode *ptyfs = VTOPTYFS(vp);
        int error;

        error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY, vp,
            NULL, genfs_can_chmod(vp, cred, ptyfs->ptyfs_uid, ptyfs->ptyfs_gid,
            mode));
        if (error)
                return (error);

        ptyfs->ptyfs_mode &= ~ALLPERMS;
        ptyfs->ptyfs_mode |= (mode & ALLPERMS);
        return 0;
}

/*
 * Perform chown operation on inode ip;
 * inode must be locked prior to call.
 */
static int
ptyfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred,
    struct lwp *l)
{
        struct ptyfsnode *ptyfs = VTOPTYFS(vp);
        int error;

        if (uid == (uid_t)VNOVAL)
                uid = ptyfs->ptyfs_uid;
        if (gid == (gid_t)VNOVAL)
                gid = ptyfs->ptyfs_gid;

        error = kauth_authorize_vnode(cred, KAUTH_VNODE_CHANGE_OWNERSHIP, vp,
            NULL, genfs_can_chown(vp, cred, ptyfs->ptyfs_uid, ptyfs->ptyfs_gid,
            uid, gid));
        if (error)
                return (error);

        ptyfs->ptyfs_gid = gid;
        ptyfs->ptyfs_uid = uid;
        return 0;
}

/*
 * implement access checking.
 *
 * actually, the check for super-user is slightly
 * broken since it will allow read access to write-only
 * objects.  this doesn't cause any particular trouble
 * but does mean that the i/o entry points need to check
 * that the operation really does make sense.
 */
int
ptyfs_access(void *v)
{
        struct vop_access_args /* {
                struct vnode *a_vp;
                accmode_t a_accmode;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vattr va;
        int error;

        if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred)) != 0)
                return error;

        return kauth_authorize_vnode(ap->a_cred,
            KAUTH_ACCESS_ACTION(ap->a_accmode, ap->a_vp->v_type, va.va_mode),
            ap->a_vp, NULL, genfs_can_access(ap->a_vp, ap->a_cred, va.va_uid,
            va.va_gid, va.va_mode, NULL, ap->a_accmode));
}

/*
 * lookup.  this is incredibly complicated in the
 * general case, however for most pseudo-filesystems
 * very little needs to be done.
 *
 * Locking isn't hard here, just poorly documented.
 *
 * If we're looking up ".", just vref the parent & return it.
 *
 * If we're looking up "..", unlock the parent, and lock "..". If everything
 * went ok, try to re-lock the parent. We do this to prevent lock races.
 *
 * For anything else, get the needed node.
 *
 * We try to exit with the parent locked in error cases.
 */
int
ptyfs_lookup(void *v)
{
        struct vop_lookup_v2_args /* {
                struct vnode * a_dvp;
                struct vnode ** a_vpp;
                struct componentname * a_cnp;
        } */ *ap = v;
        struct componentname *cnp = ap->a_cnp;
        struct vnode **vpp = ap->a_vpp;
        struct vnode *dvp = ap->a_dvp;
        const char *pname = cnp->cn_nameptr;
        struct ptyfsnode *ptyfs;
        int pty, error;

        *vpp = NULL;

        if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
                return EROFS;

        if (cnp->cn_namelen == 1 && *pname == '.') {
                *vpp = dvp;
                vref(dvp);
                return 0;
        }

        ptyfs = VTOPTYFS(dvp);
        switch (ptyfs->ptyfs_type) {
        case PTYFSroot:
                /*
                 * Shouldn't get here with .. in the root node.
                 */
                if (cnp->cn_flags & ISDOTDOT)
                        return EIO;

                pty = atoi(pname, cnp->cn_namelen);
                if (pty < 0 || ptyfs_next_active(dvp->v_mount, pty) != pty)
                        break;
                error = ptyfs_allocvp(dvp->v_mount, vpp, PTYFSpts, pty);
                if (error)
                        return error;
                if (ptyfs_next_active(dvp->v_mount, pty) != pty) {
                        vrele(*vpp);
                        *vpp = NULL;
                        return ENOENT;
                }
                return 0;

        default:
                return ENOTDIR;
        }

        return cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS;
}

/*
 * readdir returns directory entries from ptyfsnode (vp).
 *
 * the strategy here with ptyfs is to generate a single
 * directory entry at a time (struct dirent) and then
 * copy that out to userland using uiomove.  a more efficient
 * though more complex implementation, would try to minimize
 * the number of calls to uiomove().  for ptyfs, this is
 * hardly worth the added code complexity.
 *
 * this should just be done through read()
 */
int
ptyfs_readdir(void *v)
{
        struct vop_readdir_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                kauth_cred_t a_cred;
                int *a_eofflag;
                off_t **a_cookies;
                int *a_ncookies;
        } */ *ap = v;
        struct uio *uio = ap->a_uio;
        struct dirent *dp;
        struct ptyfsnode *ptyfs;
        off_t i;
        int error;
        off_t *cookies = NULL;
        int ncookies;
        struct vnode *vp;
        int n, nc = 0;

        vp = ap->a_vp;
        ptyfs = VTOPTYFS(vp);

        if (uio->uio_resid < UIO_MX)
                return EINVAL;
        if (uio->uio_offset < 0)
                return EINVAL;

        dp = malloc(sizeof(struct dirent), M_PTYFSTMP, M_WAITOK | M_ZERO);

        error = 0;
        i = uio->uio_offset;
        dp->d_reclen = UIO_MX;
        ncookies = uio->uio_resid / UIO_MX;

        if (ptyfs->ptyfs_type != PTYFSroot) {
                error = ENOTDIR;
                goto out;
        }

        if (i >= npty)
                goto out;

        if (ap->a_ncookies) {
                ncookies = uimin(ncookies, (npty + 2 - i));
                cookies = malloc(ncookies * sizeof (off_t),
                    M_TEMP, M_WAITOK);
                *ap->a_cookies = cookies;
        }

        for (; i < 2 && uio->uio_resid >= UIO_MX; i++) {
                /* `.' and/or `..' */
                dp->d_fileno = PTYFS_FILENO(PTYFSroot, 0);
                dp->d_namlen = i + 1;
                (void)memcpy(dp->d_name, "..", dp->d_namlen);
                dp->d_name[i + 1] = '\0';
                dp->d_type = DT_DIR;
                if ((error = uiomove(dp, UIO_MX, uio)) != 0)
                        goto out;
                if (cookies)
                        *cookies++ = i + 1;
                nc++;
        }
        while (uio->uio_resid >= UIO_MX) {
                /* check for used ptys */
                n = ptyfs_next_active(vp->v_mount, i - 2);
                if (n < 0)
                        break;
                dp->d_fileno = PTYFS_FILENO(PTYFSpts, n);
                dp->d_namlen = snprintf(dp->d_name, sizeof(dp->d_name),
                    "%lld", (long long)(n));
                dp->d_type = DT_CHR;
                if ((error = uiomove(dp, UIO_MX, uio)) != 0)
                        goto out;
                i = n + 3;
                if (cookies)
                        *cookies++ = i;
                nc++;
        }

out:
        /* not pertinent in error cases */
        ncookies = nc;

        if (ap->a_ncookies) {
                if (error) {
                        if (cookies)
                                free(*ap->a_cookies, M_TEMP);
                        *ap->a_ncookies = 0;
                        *ap->a_cookies = NULL;
                } else
                        *ap->a_ncookies = ncookies;
        }
        uio->uio_offset = i;
        free(dp, M_PTYFSTMP);
        return error;
}

int
ptyfs_open(void *v)
{
        struct vop_open_args /* {
                struct vnode *a_vp;
                int  a_mode;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct ptyfsnode *ptyfs = VTOPTYFS(vp);

        switch (ptyfs->ptyfs_type) {
        case PTYFSpts:
        case PTYFSptc:
                return spec_open(v);
        case PTYFSroot:
                return 0;
        default:
                return EINVAL;
        }
}

int
ptyfs_close(void *v)
{
        struct vop_close_args /* {
                struct vnode *a_vp;
                int  a_fflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct ptyfsnode *ptyfs = VTOPTYFS(vp);

        mutex_enter(vp->v_interlock);
        if (vrefcnt(vp) > 1)
                PTYFS_ITIMES(ptyfs, NULL, NULL, NULL);
        mutex_exit(vp->v_interlock);

        switch (ptyfs->ptyfs_type) {
        case PTYFSpts:
        case PTYFSptc:
                return spec_close(v);
        case PTYFSroot:
                return 0;
        default:
                return EINVAL;
        }
}

int
ptyfs_read(void *v)
{
        struct vop_read_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                int  a_ioflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct timespec ts;
        struct vnode *vp = ap->a_vp;
        struct ptyfsnode *ptyfs = VTOPTYFS(vp);
        int error;

        if (vp->v_type == VDIR)
                return EISDIR;

        ptyfs->ptyfs_status |= PTYFS_ACCESS;
        /* hardclock() resolution is good enough for ptyfs */
        getnanotime(&ts);
        (void)ptyfs_update(vp, &ts, &ts, 0);

        switch (ptyfs->ptyfs_type) {
        case PTYFSpts:
        case PTYFSptc:
                VOP_UNLOCK(vp);
                error = cdev_read(vp->v_rdev, ap->a_uio, ap->a_ioflag);
                vn_lock(vp, LK_RETRY|LK_EXCLUSIVE);
                return error;
        default:
                return EOPNOTSUPP;
        }
}

int
ptyfs_write(void *v)
{
        struct vop_write_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                int  a_ioflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct timespec ts;
        struct vnode *vp = ap->a_vp;
        struct ptyfsnode *ptyfs = VTOPTYFS(vp);
        int error;

        ptyfs->ptyfs_status |= PTYFS_MODIFY;
        getnanotime(&ts);
        (void)ptyfs_update(vp, &ts, &ts, 0);

        switch (ptyfs->ptyfs_type) {
        case PTYFSpts:
        case PTYFSptc:
                VOP_UNLOCK(vp);
                error = cdev_write(vp->v_rdev, ap->a_uio, ap->a_ioflag);
                vn_lock(vp, LK_RETRY|LK_EXCLUSIVE);
                return error;
        default:
                return EOPNOTSUPP;
        }
}

int
ptyfs_ioctl(void *v)
{
        struct vop_ioctl_args /* {
                struct vnode *a_vp;
                u_long a_command;
                void *a_data;
                int  a_fflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct ptyfsnode *ptyfs = VTOPTYFS(vp);

        switch (ptyfs->ptyfs_type) {
        case PTYFSpts:
        case PTYFSptc:
                return cdev_ioctl(vp->v_rdev, ap->a_command,
                    ap->a_data, ap->a_fflag, curlwp);
        default:
                return EOPNOTSUPP;
        }
}

int
ptyfs_poll(void *v)
{
        struct vop_poll_args /* {
                struct vnode *a_vp;
                int a_events;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct ptyfsnode *ptyfs = VTOPTYFS(vp);

        switch (ptyfs->ptyfs_type) {
        case PTYFSpts:
        case PTYFSptc:
                return cdev_poll(vp->v_rdev, ap->a_events, curlwp);
        default:
                return genfs_poll(v);
        }
}

int
ptyfs_kqfilter(void *v)
{
        struct vop_kqfilter_args /* {
                struct vnode *a_vp;
                struct knote *a_kn;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct ptyfsnode *ptyfs = VTOPTYFS(vp);

        switch (ptyfs->ptyfs_type) {
        case PTYFSpts:
        case PTYFSptc:
                return cdev_kqfilter(vp->v_rdev, ap->a_kn);
        default:
                return genfs_kqfilter(v);
        }
}

static int
ptyfs_update(struct vnode *vp, const struct timespec *acc,
    const struct timespec *mod, int flags)
{
        struct ptyfsnode *ptyfs = VTOPTYFS(vp);

        if (vp->v_mount->mnt_flag & MNT_RDONLY)
                return 0;

        PTYFS_ITIMES(ptyfs, acc, mod, NULL);
        return 0;
}

void
ptyfs_itimes(struct ptyfsnode *ptyfs, const struct timespec *acc,
    const struct timespec *mod, const struct timespec *cre)
{
        struct timespec now;
 
        KASSERT(ptyfs->ptyfs_status & (PTYFS_ACCESS|PTYFS_CHANGE|PTYFS_MODIFY));

        getnanotime(&now);
        if (ptyfs->ptyfs_status & PTYFS_ACCESS) {
                if (acc == NULL)
                        acc = &now;
                ptyfs->ptyfs_atime = *acc;
        }
        if (ptyfs->ptyfs_status & PTYFS_MODIFY) {
                if (mod == NULL)
                        mod = &now;
                ptyfs->ptyfs_mtime = *mod;
        }
        if (ptyfs->ptyfs_status & PTYFS_CHANGE) {
                if (cre == NULL)
                        cre = &now;
                ptyfs->ptyfs_ctime = *cre;
        }
        ptyfs->ptyfs_status &= ~(PTYFS_ACCESS|PTYFS_CHANGE|PTYFS_MODIFY);
}

/*
 * convert decimal ascii to int
 */
static int
atoi(const char *b, size_t len)
{
        int p = 0;

        while (len--) {
                char c = *b++;
                if (c < '0' || c > '9')
                        return -1;
                p = 10 * p + (c - '0');
        }

        return p;
}




























































    1 





























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
/*        $NetBSD: err.h,v 1.3 2019/04/16 10:00:04 mrg Exp $        */

/*-
 * Copyright (c) 2013 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _LINUX_ERR_H_
#define _LINUX_ERR_H_

/* XXX Linux uses long and int inconsistently here.  Hope this works out.  */

#include <sys/types.h>
#include <sys/errno.h>
#include <sys/systm.h>

#define        MAX_ERRNO        4095

static inline bool
IS_ERR_VALUE(uintptr_t n)
{
        return (n >= (uintptr_t)-MAX_ERRNO);
}

static inline void *
ERR_PTR(long error)
{
        KASSERT(error < 0);
        return (void *)(intptr_t)error;
}

static inline long
PTR_ERR(const void *ptr)
{
        KASSERT(ptr == (void *)(intptr_t)(long)(intptr_t)ptr); /* XXX Hurk!  */
        return (long)(intptr_t)ptr;
}

static inline bool
IS_ERR(const void *ptr)
{
        return IS_ERR_VALUE((uintptr_t)ptr);
}

static inline bool
IS_ERR_OR_NULL(const void *ptr)
{
        return ((ptr == NULL) || IS_ERR(ptr));
}

static inline void *
ERR_CAST(void *ptr)                /* XXX Linux declares with const.  */
{
        return ptr;
}

static inline long
PTR_ERR_OR_ZERO(const void *ptr)
{
        return (IS_ERR(ptr)? PTR_ERR(ptr) : 0);
}

#define        PTR_RET        PTR_ERR_OR_ZERO

#endif  /* _LINUX_ERR_H_ */

































































































































































    1 




















    1 



























































































   24 

   24 

   21 
   23 




   23 




   23 
































   24 
   23 



   24 














































































  147 
  148 



  148 








  146 
  147 




  148 


















  148 
  147 






























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
/*        $NetBSD: subr_log.c,v 1.62 2021/09/26 15:11:33 thorpej Exp $        */

/*-
 * Copyright (c) 2007, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)subr_log.c        8.3 (Berkeley) 2/14/95
 */

/*
 * Error log buffer for kernel printf's.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_log.c,v 1.62 2021/09/26 15:11:33 thorpej Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/ioctl.h>
#include <sys/msgbuf.h>
#include <sys/file.h>
#include <sys/syslog.h>
#include <sys/conf.h>
#include <sys/select.h>
#include <sys/poll.h> 
#include <sys/intr.h>
#include <sys/sysctl.h>
#include <sys/ktrace.h>

static int sysctl_msgbuf(SYSCTLFN_PROTO);

static void        logsoftintr(void *);

static bool        log_async;
static struct selinfo log_selp;                /* process waiting on select call */
static pid_t        log_pgid;                /* process/group for async I/O */
static kcondvar_t log_cv;
static void        *log_sih;

kmutex_t log_lock;
int        log_open;                        /* also used in log() */
int        msgbufmapped;                        /* is the message buffer mapped */
int        msgbufenabled;                        /* is logging to the buffer enabled */
struct        kern_msgbuf *msgbufp;                /* the mapped buffer, itself. */

void
initmsgbuf(void *bf, size_t bufsize)
{
        struct kern_msgbuf *mbp;
        long new_bufs;

        /* Sanity-check the given size. */
        if (bufsize < sizeof(struct kern_msgbuf))
                return;

        mbp = msgbufp = (struct kern_msgbuf *)bf;

        new_bufs = bufsize - offsetof(struct kern_msgbuf, msg_bufc);
        if ((mbp->msg_magic != MSG_MAGIC) || (mbp->msg_bufs != new_bufs) ||
            (mbp->msg_bufr < 0) || (mbp->msg_bufr >= mbp->msg_bufs) ||
            (mbp->msg_bufx < 0) || (mbp->msg_bufx >= mbp->msg_bufs)) {
                /*
                 * If the buffer magic number is wrong, has changed
                 * size (which shouldn't happen often), or is
                 * internally inconsistent, initialize it.
                 */

                memset(bf, 0, bufsize);
                mbp->msg_magic = MSG_MAGIC;
                mbp->msg_bufs = new_bufs;
        }

        /* mark it as ready for use. */
        msgbufmapped = msgbufenabled = 1;
}

void
loginit(void)
{

        mutex_init(&log_lock, MUTEX_DEFAULT, IPL_VM);
        selinit(&log_selp);
        cv_init(&log_cv, "klog");
        log_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE,
            logsoftintr, NULL);

        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "msgbufsize",
                       SYSCTL_DESCR("Size of the kernel message buffer"),
                       sysctl_msgbuf, 0, NULL, 0,
                       CTL_KERN, KERN_MSGBUFSIZE, CTL_EOL);
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "msgbuf",
                       SYSCTL_DESCR("Kernel message buffer"),
                       sysctl_msgbuf, 0, NULL, 0,
                       CTL_KERN, KERN_MSGBUF, CTL_EOL);
}

/*ARGSUSED*/
static int
logopen(dev_t dev, int flags, int mode, struct lwp *l)
{
        struct kern_msgbuf *mbp = msgbufp;
        int error = 0;

        mutex_spin_enter(&log_lock);
        if (log_open) {
                error = EBUSY;
        } else {
                log_open = 1;
                log_pgid = l->l_proc->p_pid;        /* signal process only */
                /*
                 * The message buffer is initialized during system
                 * configuration.  If it's been clobbered, note that
                 * and return an error.  (This allows a user to read
                 * the buffer via /dev/kmem, and try to figure out
                 * what clobbered it.
                 */
                if (mbp->msg_magic != MSG_MAGIC) {
                        msgbufenabled = 0;
                        error = ENXIO;
                }
        }
        mutex_spin_exit(&log_lock);

        return error;
}

/*ARGSUSED*/
static int
logclose(dev_t dev, int flag, int mode, struct lwp *l)
{

        mutex_spin_enter(&log_lock);
        log_pgid = 0;
        log_open = 0;
        log_async = 0;
        mutex_spin_exit(&log_lock);

        return 0;
}

/*ARGSUSED*/
static int
logread(dev_t dev, struct uio *uio, int flag)
{
        struct kern_msgbuf *mbp = msgbufp;
        long l;
        int error = 0;

        mutex_spin_enter(&log_lock);
        while (mbp->msg_bufr == mbp->msg_bufx) {
                if (flag & IO_NDELAY) {
                        mutex_spin_exit(&log_lock);
                        return EWOULDBLOCK;
                }
                error = cv_wait_sig(&log_cv, &log_lock);
                if (error) {
                        mutex_spin_exit(&log_lock);
                        return error;
                }
        }
        while (uio->uio_resid > 0) {
                l = mbp->msg_bufx - mbp->msg_bufr;
                if (l < 0)
                        l = mbp->msg_bufs - mbp->msg_bufr;
                l = uimin(l, uio->uio_resid);
                if (l == 0)
                        break;
                mutex_spin_exit(&log_lock);
                error = uiomove(&mbp->msg_bufc[mbp->msg_bufr], (int)l, uio);
                mutex_spin_enter(&log_lock);
                if (error)
                        break;
                mbp->msg_bufr += l;
                if (mbp->msg_bufr < 0 || mbp->msg_bufr >= mbp->msg_bufs)
                        mbp->msg_bufr = 0;
        }
        mutex_spin_exit(&log_lock);

        return error;
}

/*ARGSUSED*/
static int
logpoll(dev_t dev, int events, struct lwp *l)
{
        int revents = 0;

        if (events & (POLLIN | POLLRDNORM)) {
                mutex_spin_enter(&log_lock);
                if (msgbufp->msg_bufr != msgbufp->msg_bufx)
                        revents |= events & (POLLIN | POLLRDNORM);
                else
                        selrecord(l, &log_selp);
                mutex_spin_exit(&log_lock);
        }

        return revents;
}

static void
filt_logrdetach(struct knote *kn)
{

        mutex_spin_enter(&log_lock);
        selremove_knote(&log_selp, kn);
        mutex_spin_exit(&log_lock);
}

static int
filt_logread(struct knote *kn, long hint)
{
        int rv;

        if ((hint & NOTE_SUBMIT) == 0)
                mutex_spin_enter(&log_lock);
        if (msgbufp->msg_bufr == msgbufp->msg_bufx) {
                rv = 0;
        } else if (msgbufp->msg_bufr < msgbufp->msg_bufx) {
                kn->kn_data = msgbufp->msg_bufx - msgbufp->msg_bufr;
                rv = 1;
        } else {
                kn->kn_data = (msgbufp->msg_bufs - msgbufp->msg_bufr) +
                    msgbufp->msg_bufx;
                rv = 1;
        }
        if ((hint & NOTE_SUBMIT) == 0)
                mutex_spin_exit(&log_lock);

        return rv;
}

static const struct filterops logread_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_logrdetach,
        .f_event = filt_logread,
};

static int
logkqfilter(dev_t dev, struct knote *kn)
{

        switch (kn->kn_filter) {
        case EVFILT_READ:
                kn->kn_fop = &logread_filtops;
                mutex_spin_enter(&log_lock);
                selrecord_knote(&log_selp, kn);
                mutex_spin_exit(&log_lock);
                break;

        default:
                return (EINVAL);
        }

        return (0);
}

void
logwakeup(void)
{

        if (!cold && log_open) {
                mutex_spin_enter(&log_lock);
                selnotify(&log_selp, 0, NOTE_SUBMIT);
                if (log_async)
                        softint_schedule(log_sih);
                cv_broadcast(&log_cv);
                mutex_spin_exit(&log_lock);
        }
}

static void
logsoftintr(void *cookie)
{
        pid_t pid;

        if ((pid = log_pgid) != 0)
                fownsignal(pid, SIGIO, 0, 0, NULL);
}

/*ARGSUSED*/
static int
logioctl(dev_t dev, u_long com, void *data, int flag, struct lwp *lwp)
{
        long l;

        switch (com) {

        /* return number of characters immediately available */
        case FIONREAD:
                mutex_spin_enter(&log_lock);
                l = msgbufp->msg_bufx - msgbufp->msg_bufr;
                if (l < 0)
                        l += msgbufp->msg_bufs;
                mutex_spin_exit(&log_lock);
                *(int *)data = l;
                break;

        case FIONBIO:
                break;

        case FIOASYNC:
                /* No locking needed, 'thread private'. */
                log_async = (*((int *)data) != 0);
                break;

        case TIOCSPGRP:
        case FIOSETOWN:
                return fsetown(&log_pgid, com, data);

        case TIOCGPGRP:
        case FIOGETOWN:
                return fgetown(log_pgid, com, data);

        default:
                return (EPASSTHROUGH);
        }
        return (0);
}

static void
logskip(struct kern_msgbuf *mbp)
{
        /*
         * Move forward read pointer to the next line
         * in the buffer.  Note that the buffer is
         * a ring buffer so we should reset msg_bufr
         * to 0 when msg_bufr exceeds msg_bufs.
         *
         * To prevent to loop forever, give up if we
         * cannot find a newline in mbp->msg_bufs
         * characters (the max size of the buffer).
         */
        for (int i = 0; i < mbp->msg_bufs; i++) {
                char c0 = mbp->msg_bufc[mbp->msg_bufr];
                if (++mbp->msg_bufr >= mbp->msg_bufs)
                        mbp->msg_bufr = 0;
                if (c0 == '\n')
                        break;
        }
}

static void
logaddchar(struct kern_msgbuf *mbp, int c)
{
        mbp->msg_bufc[mbp->msg_bufx++] = c;
        if (mbp->msg_bufx < 0 || mbp->msg_bufx >= mbp->msg_bufs)
                mbp->msg_bufx = 0;

        /* If the buffer is full, keep the most recent data. */
        if (mbp->msg_bufr == mbp->msg_bufx)
                logskip(mbp);
}

void
logputchar(int c)
{
        struct kern_msgbuf *mbp;

        if (!cold)
                mutex_spin_enter(&log_lock);

        if (!msgbufenabled)
                goto out;

        mbp = msgbufp;
        if (mbp->msg_magic != MSG_MAGIC) {
                /*
                 * Arguably should panic or somehow notify the
                 * user...  but how?  Panic may be too drastic,
                 * and would obliterate the message being kicked
                 * out (maybe a panic itself), and printf
                 * would invoke us recursively.  Silently punt
                 * for now.  If syslog is running, it should
                 * notice.
                 */
                msgbufenabled = 0;
                goto out;

        }

        logaddchar(mbp, c);

out:
        if (!cold)
                mutex_spin_exit(&log_lock);
}

/*
 * sysctl helper routine for kern.msgbufsize and kern.msgbuf. For the
 * former it merely checks the message buffer is set up. For the latter,
 * it also copies out the data if necessary.
 */
static int
sysctl_msgbuf(SYSCTLFN_ARGS)
{
        char *where = oldp;
        size_t len, maxlen;
        long beg, end;
        extern kmutex_t log_lock;
        int error;

        if (!logenabled(msgbufp)) {
                msgbufenabled = 0;
                return (ENXIO);
        }

        switch (rnode->sysctl_num) {
        case KERN_MSGBUFSIZE: {
                struct sysctlnode node = *rnode;
                int msg_bufs = (int)msgbufp->msg_bufs;
                node.sysctl_data = &msg_bufs;
                return (sysctl_lookup(SYSCTLFN_CALL(&node)));
        }
        case KERN_MSGBUF:
                break;
        default:
                return (EOPNOTSUPP);
        }

        if (newp != NULL)
                return (EPERM);

        if (oldp == NULL) {
                /* always return full buffer size */
                *oldlenp = msgbufp->msg_bufs;
                return (0);
        }

        sysctl_unlock();

        /*
         * First, copy from the write pointer to the end of
         * message buffer.
         */
        error = 0;
        mutex_spin_enter(&log_lock);
        maxlen = MIN(msgbufp->msg_bufs, *oldlenp);
        beg = msgbufp->msg_bufx;
        end = msgbufp->msg_bufs;
        mutex_spin_exit(&log_lock);

        while (maxlen > 0) {
                len = MIN(end - beg, maxlen);
                if (len == 0)
                        break;
                /* XXX unlocked, but hardly matters. */
                error = copyout(&msgbufp->msg_bufc[beg], where, len);
                ktrmibio(-1, UIO_READ, where, len, error);
                if (error)
                        break;
                where += len;
                maxlen -= len;

                /*
                 * ... then, copy from the beginning of message buffer to
                 * the write pointer.
                 */
                beg = 0;
                end = msgbufp->msg_bufx;
        }

        sysctl_relock();
        return (error);
}

const struct cdevsw log_cdevsw = {
        .d_open = logopen,
        .d_close = logclose,
        .d_read = logread,
        .d_write = nowrite,
        .d_ioctl = logioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = logpoll,
        .d_mmap = nommap,
        .d_kqfilter = logkqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER | D_MPSAFE
};





























































































































































































































































































































































































































































   21 



  663 




































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
/*        $NetBSD: vnode.h,v 1.303 2022/08/05 05:20:39 thorpej Exp $        */

/*-
 * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vnode.h        8.17 (Berkeley) 5/20/95
 */

#ifndef _SYS_VNODE_H_
#define        _SYS_VNODE_H_

#include <sys/event.h>
#include <sys/queue.h>
#include <sys/condvar.h>
#include <sys/rwlock.h>
#include <sys/mutex.h>
#include <sys/time.h>
#include <sys/acl.h>

/* XXX: clean up includes later */
#include <uvm/uvm_param.h>        /* XXX */
#if defined(_KERNEL) || defined(_KMEMUSER)
#include <uvm/uvm_pglist.h>        /* XXX */
#include <uvm/uvm_object.h>        /* XXX */
#include <uvm/uvm_extern.h>        /* XXX */

struct uvm_ractx;
#endif

/*
 * The vnode is the focus of all file activity in UNIX.  There is a
 * unique vnode allocated for each active file, each current directory,
 * each mounted-on file, text file, and the root.
 */

/*
 * Vnode types.  VNON means no type.
 */
enum vtype        { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD };

#define        VNODE_TYPES \
    "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"

/*
 * Vnode tag types.
 * These are for the benefit of external programs only (e.g., pstat)
 * and should NEVER be inspected by the kernel.
 */
enum vtagtype        {
        VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_MSDOSFS, VT_LFS, VT_LOFS,
        VT_FDESC, VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS,
        VT_AFS, VT_ISOFS, VT_UNION, VT_ADOSFS, VT_EXT2FS, VT_CODA,
        VT_FILECORE, VT_NTFS, VT_VFS, VT_OVERLAY, VT_SMBFS, VT_PTYFS,
        VT_TMPFS, VT_UDF, VT_SYSVBFS, VT_PUFFS, VT_HFS, VT_EFS, VT_ZFS,
        VT_RUMP, VT_NILFS, VT_V7FS, VT_CHFS, VT_AUTOFS
};

#define        VNODE_TAGS \
    "VT_NON", "VT_UFS", "VT_NFS", "VT_MFS", "VT_MSDOSFS", "VT_LFS", "VT_LOFS", \
    "VT_FDESC", "VT_PORTAL", "VT_NULL", "VT_UMAP", "VT_KERNFS", "VT_PROCFS", \
    "VT_AFS", "VT_ISOFS", "VT_UNION", "VT_ADOSFS", "VT_EXT2FS", "VT_CODA", \
    "VT_FILECORE", "VT_NTFS", "VT_VFS", "VT_OVERLAY", "VT_SMBFS", "VT_PTYFS", \
    "VT_TMPFS", "VT_UDF", "VT_SYSVBFS", "VT_PUFFS", "VT_HFS", "VT_EFS", \
    "VT_ZFS", "VT_RUMP", "VT_NILFS", "VT_V7FS", "VT_CHFS", "VT_AUTOFS"

#if defined(_KERNEL) || defined(_KMEMUSER)
struct vnode;
struct buf;

LIST_HEAD(buflists, buf);

/*
 * Reading or writing any of these items requires holding the appropriate
 * lock.  Field markings and the corresponding locks:
 *
 *        -        stable, reference to the vnode is required
 *        b        bufcache_lock
 *        e        exec_lock
 *        f        vnode_free_list_lock, or vrele_lock for vrele_list
 *        i        v_interlock
 *        i+b        v_interlock + bufcache_lock to modify, either to inspect
 *        i+u        v_interlock + v_uobj.vmobjlock to modify, either to inspect
 *        k        locked by underlying filesystem (maybe kernel_lock)
 *        u        v_uobj.vmobjlock
 *        v        vnode lock
 *
 * Each underlying filesystem allocates its own private area and hangs
 * it from v_data.
 */
struct vnode {
        /*
         * VM system related items.
         */
        struct uvm_object v_uobj;                /* u   the VM object */
        voff_t                v_size;                        /* i+u size of file */
        voff_t                v_writesize;                /* i+u new size after write */

        /*
         * Unstable items get their own cache line.
         * On _LP64 this fills the space nicely.
         */
        kcondvar_t        v_cv                        /* i   synchronization */
            __aligned(COHERENCY_UNIT);
        int                v_iflag;                /* i+u VI_* flags */
        int                v_uflag;                /* k   VU_* flags */
        int                v_usecount;                /* i   reference count */
        int                v_numoutput;                /* i   # of pending writes */
        int                v_writecount;                /* i   ref count of writers */
        int                v_holdcnt;                /* i   page & buffer refs */
        struct buflists        v_cleanblkhd;                /* i+b clean blocklist head */
        struct buflists        v_dirtyblkhd;                /* i+b dirty blocklist head */

        /*
         * The remaining items are largely stable.
         */
        int                v_vflag                        /* v   VV_* flags */
            __aligned(COHERENCY_UNIT);
        kmutex_t        *v_interlock;                /* -   vnode interlock */
        struct mount        *v_mount;                /* v   ptr to vfs we are in */
        int                (**v_op)(void *);        /* :   vnode operations vector */
        union {
                struct mount        *vu_mountedhere;/* v   ptr to vfs (VDIR) */
                struct socket        *vu_socket;        /* v   unix ipc (VSOCK) */
                struct specnode        *vu_specnode;        /* v   device (VCHR, VBLK) */
                struct fifoinfo        *vu_fifoinfo;        /* v   fifo (VFIFO) */
                struct uvm_ractx *vu_ractx;        /* u   read-ahead ctx (VREG) */
        } v_un;
        enum vtype        v_type;                        /* -   vnode type */
        enum vtagtype        v_tag;                        /* -   type of underlying data */
        void                 *v_data;                /* -   private data for fs */
        struct vnode_klist *v_klist;                /* i   kevent / knote info */

        void                *v_segvguard;                /* e   for PAX_SEGVGUARD */
};
#define        v_mountedhere        v_un.vu_mountedhere
#define        v_socket        v_un.vu_socket
#define        v_specnode        v_un.vu_specnode
#define        v_fifoinfo        v_un.vu_fifoinfo
#define        v_ractx                v_un.vu_ractx

typedef struct vnode vnode_t;

/*
 * Structure that encompasses the kevent state for a vnode.  This is
 * carved out as a separate structure because some vnodes may share
 * this state with one another.
 *
 * N.B. if two vnodes share a vnode_klist, then they must also share
 * v_interlock.
 */
struct vnode_klist {
        struct klist        vk_klist;        /* i   notes attached to vnode */
        long                vk_interest;        /* i   what the notes are interested in */
};
#endif

/*
 * Vnode flags.  The first set are locked by vnode lock or are stable.
 * VSYSTEM is only used to skip vflush()ing quota files.  VISTTY is used
 * when reading dead vnodes.
 */
#define        VV_ROOT                0x00000001        /* root of its file system */
#define        VV_SYSTEM        0x00000002        /* vnode being used by kernel */
#define        VV_ISTTY        0x00000004        /* vnode represents a tty */
#define        VV_MAPPED        0x00000008        /* vnode might have user mappings */
#define        VV_MPSAFE        0x00000010        /* file system code is MP safe */

/*
 * The second set are locked by vp->v_interlock.  VI_TEXT and VI_EXECMAP are
 * typically updated with vp->v_uobj.vmobjlock also held as the VM system
 * uses them for accounting purposes.
 */
#define        VI_TEXT                0x00000100        /* vnode is a pure text prototype */
#define        VI_EXECMAP        0x00000200        /* might have PROT_EXEC mappings */
#define        VI_WRMAP        0x00000400        /* might have PROT_WRITE u. mappings */
#define        VI_PAGES        0x00000800        /* UVM object has >0 pages */
#define        VI_ONWORKLST        0x00004000        /* On syncer work-list */
#define        VI_DEADCHECK        0x00008000        /* UVM: need to call vdead_check() */

/*
 * The third set are locked by the underlying file system.
 */
#define        VU_DIROP        0x01000000        /* LFS: involved in a directory op */

#define        VNODE_FLAGBITS \
    "\20\1ROOT\2SYSTEM\3ISTTY\4MAPPED\5MPSAFE\11TEXT\12EXECMAP" \
    "\13WRMAP\14PAGES\17ONWORKLST\20DEADCHECK\31DIROP"

#define        VSIZENOTSET        ((voff_t)-1)

/*
 * vnode lock flags
 */
#define        LK_NONE                0x00000000        /* no lock - for VOP_ISLOCKED() */
#define        LK_SHARED        0x00000001        /* shared lock */
#define        LK_EXCLUSIVE        0x00000002        /* exclusive lock */
#define        LK_UPGRADE        0x00000010        /* upgrade shared -> exclusive */
#define        LK_DOWNGRADE        0x00000020        /* downgrade exclusive -> shared */
#define        LK_NOWAIT        0x00000100        /* do not sleep to await lock */
#define        LK_RETRY        0x00000200        /* vn_lock: retry until locked */

/*
 * Vnode attributes.  A field value of VNOVAL represents a field whose value
 * is unavailable (getattr) or which is not to be changed (setattr).
 */
struct vattr {
        enum vtype        va_type;        /* vnode type (for create) */
        mode_t                va_mode;        /* files access mode and type */
        nlink_t                va_nlink;        /* number of references to file */
        uid_t                va_uid;                /* owner user id */
        gid_t                va_gid;                /* owner group id */
        dev_t                va_fsid;        /* file system id (dev for now) */
        ino_t                va_fileid;        /* file id */
        u_quad_t        va_size;        /* file size in bytes */
        long                va_blocksize;        /* blocksize preferred for i/o */
        struct timespec        va_atime;        /* time of last access */
        struct timespec        va_mtime;        /* time of last modification */
        struct timespec        va_ctime;        /* time file changed */
        struct timespec va_birthtime;        /* time file created */
        u_long                va_gen;                /* generation number of file */
        u_long                va_flags;        /* flags defined for file */
        dev_t                va_rdev;        /* device the special file represents */
        u_quad_t        va_bytes;        /* bytes of disk space held by file */
        u_quad_t        va_filerev;        /* file modification number */
        unsigned int        va_vaflags;        /* operations flags, see below */
        long                va_spare;        /* remain quad aligned */
};

/*
 * Flags for va_vaflags.
 */
#define        VA_UTIMES_NULL        0x01                /* utimes argument was NULL */
#define        VA_EXCLUSIVE        0x02                /* exclusive create request */

#ifdef _KERNEL

/*
 * Flags for ioflag.
 */
#define        IO_UNIT                0x00010                /* do I/O as atomic unit */
#define        IO_APPEND        0x00020                /* append write to end */
#define        IO_SYNC                (0x40|IO_DSYNC)        /* sync I/O file integrity completion */
#define        IO_NODELOCKED        0x00080                /* underlying node already locked */
#define        IO_NDELAY        0x00100                /* FNDELAY flag set in file table */
#define        IO_DSYNC        0x00200                /* sync I/O data integrity completion */
#define        IO_ALTSEMANTICS        0x00400                /* use alternate i/o semantics */
#define        IO_NORMAL        0x00800                /* operate on regular data */
#define        IO_EXT                0x01000                /* operate on extended attributes */
#define        IO_DIRECT        0x02000                /* direct I/O hint */
#define        IO_JOURNALLOCKED 0x04000        /* journal is already locked */
#define        IO_ADV_MASK        0x00003                /* access pattern hint */

#define        IO_ADV_SHIFT        0
#define        IO_ADV_ENCODE(adv)        (((adv) << IO_ADV_SHIFT) & IO_ADV_MASK)
#define        IO_ADV_DECODE(ioflag)        (((ioflag) & IO_ADV_MASK) >> IO_ADV_SHIFT)

/*
 * Flags for accmode_t.
 */
#define        VEXEC                        000000000100 /* execute/search permission */
#define        VWRITE                        000000000200 /* write permission */
#define        VREAD                        000000000400 /* read permission */
#define        VADMIN                        000000010000 /* being the file owner */
#define        VAPPEND                        000000040000 /* permission to write/append */

/*
 * VEXPLICIT_DENY makes VOP_ACCESSX(9) return EPERM or EACCES only
 * if permission was denied explicitly, by a "deny" rule in NFSv4 ACL,
 * and 0 otherwise.  This never happens with ordinary unix access rights
 * or POSIX.1e ACLs.  Obviously, VEXPLICIT_DENY must be OR-ed with
 * some other V* constant.
 */
#define        VEXPLICIT_DENY                000000100000
#define        VREAD_NAMED_ATTRS         000000200000 /* not used */
#define        VWRITE_NAMED_ATTRS         000000400000 /* not used */
#define        VDELETE_CHILD                 000001000000
#define        VREAD_ATTRIBUTES         000002000000 /* permission to stat(2) */
#define        VWRITE_ATTRIBUTES         000004000000 /* change {m,c,a}time */
#define        VDELETE                         000010000000
#define        VREAD_ACL                 000020000000 /* read ACL and file mode */
#define        VWRITE_ACL                 000040000000 /* change ACL and/or file mode */
#define        VWRITE_OWNER                 000100000000 /* change file owner */
#define        VSYNCHRONIZE                 000200000000 /* not used */
#define        VCREAT                        000400000000 /* creating new file */
#define        VVERIFY                        001000000000 /* verification required */

#define __VNODE_PERM_BITS        \
        "\10"                        \
        "\07VEXEC"                \
        "\10VWRITE"                \
        "\11VREAD"                \
        "\15VADMIN"                \
        "\17VAPPEND"                \
        "\20VEXPLICIT_DENY"        \
        "\21VREAD_NAMED_ATTRS"        \
        "\22VWRITE_NAMED_ATTRS"        \
        "\23VDELETE_CHILD"        \
        "\24VREAD_ATTRIBUTES"        \
        "\25VWRITE_ATTRIBUTES"        \
        "\26VDELETE"                \
        "\27VREAD_ACL"                \
        "\30VWRITE_ACL"                \
        "\31VWRITE_OWNER"        \
        "\32VSYNCHRONIZE"        \
        "\33VCREAT"                \
        "\34VVERIFY"

/*
 * Permissions that were traditionally granted only to the file owner.
 */
#define VADMIN_PERMS        (VADMIN | VWRITE_ATTRIBUTES | VWRITE_ACL | \
    VWRITE_OWNER)

/*
 * Permissions that were traditionally granted to everyone.
 */
#define VSTAT_PERMS        (VREAD_ATTRIBUTES | VREAD_ACL)

/*
 * Permissions that allow to change the state of the file in any way.
 */
#define VMODIFY_PERMS        (VWRITE | VAPPEND | VADMIN_PERMS | VDELETE_CHILD | \
    VDELETE)

/*
 * Token indicating no attribute value yet assigned.
 */
#define        VNOVAL        (-1)
#define VNOVALSIZE ((u_quad_t)-1)
#define VNOVALFLAGS ((u_long)-1)

/*
 * Convert between vnode types and inode formats (since POSIX.1
 * defines mode word of stat structure in terms of inode formats).
 */
extern const enum vtype        iftovt_tab[];
extern const int        vttoif_tab[];
#define        IFTOVT(mode)        (iftovt_tab[((mode) & S_IFMT) >> 12])
#define        VTTOIF(indx)        (vttoif_tab[(int)(indx)])
#define        MAKEIMODE(indx, mode)        (int)(VTTOIF(indx) | (mode))

/*
 * Flags to various vnode functions.
 */
#define        SKIPSYSTEM        0x0001                /* vflush: skip vnodes marked VSYSTEM */
#define        FORCECLOSE        0x0002                /* vflush: force file closeure */
#define        WRITECLOSE        0x0004                /* vflush: only close writable files */
#define        V_SAVE                0x0001                /* vinvalbuf: sync file first */

/*
 * Flags to various vnode operations.
 */
#define        REVOKEALL        0x0001                /* revoke: revoke all aliases */

#define        FSYNC_WAIT        0x0001                /* fsync: wait for completion */
#define        FSYNC_DATAONLY        0x0002                /* fsync: hint: sync file data only */
#define        FSYNC_RECLAIM        0x0004                /* fsync: hint: vnode is being reclaimed */
#define        FSYNC_LAZY        0x0008                /* fsync: lazy sync (trickle) */
#define        FSYNC_NOLOG        0x0010                /* fsync: do not flush the log */
#define        FSYNC_CACHE        0x0100                /* fsync: flush disk caches too */

#define        UPDATE_WAIT        0x0001                /* update: wait for completion */
#define        UPDATE_DIROP        0x0002                /* update: hint to fs to wait or not */
#define        UPDATE_CLOSE        0x0004                /* update: clean up on close */

#define VDEAD_NOWAIT        0x0001                /* vdead_check: do not sleep */

void holdrelel(struct vnode *);
void holdrele(struct vnode *);
void vholdl(struct vnode *);
void vhold(struct vnode *);
void vref(struct vnode *);

#define        NULLVP        ((struct vnode *)NULL)

/*
 * Macro to determine kevent interest on a vnode.
 */
#define        _VN_KEVENT_INTEREST(vp, n)                                        \
        (((vp)->v_klist->vk_interest & (n)) != 0)

static inline bool
VN_KEVENT_INTEREST(struct vnode *vp, long hint)
{
        mutex_enter(vp->v_interlock);
        bool rv = _VN_KEVENT_INTEREST(vp, hint);
        mutex_exit(vp->v_interlock);
        return rv;
}

static inline void
VN_KNOTE(struct vnode *vp, long hint)
{
        mutex_enter(vp->v_interlock);
        if (__predict_false(_VN_KEVENT_INTEREST(vp, hint))) {
                knote(&vp->v_klist->vk_klist, hint);
        }
        mutex_exit(vp->v_interlock);
}

void        vn_knote_attach(struct vnode *, struct knote *);
void        vn_knote_detach(struct vnode *, struct knote *);

/*
 * Global vnode data.
 */
extern struct vnode        *rootvnode;        /* root (i.e. "/") vnode */
extern int                desiredvnodes;        /* number of vnodes desired */
extern unsigned int        numvnodes;        /* current number of vnodes */

#endif /* _KERNEL */


/*
 * Mods for exensibility.
 */

/*
 * Flags for vdesc_flags:
 */
#define        VDESC_MAX_VPS                8
/* Low order 16 flag bits are reserved for willrele flags for vp arguments. */
#define        VDESC_VP0_WILLRELE        0x00000001
#define        VDESC_VP1_WILLRELE        0x00000002
#define        VDESC_VP2_WILLRELE        0x00000004
#define        VDESC_VP3_WILLRELE        0x00000008
#define        VDESC_VP0_WILLPUT        0x00000101
#define        VDESC_VP1_WILLPUT        0x00000202
#define        VDESC_VP2_WILLPUT        0x00000404
#define        VDESC_VP3_WILLPUT        0x00000808

/*
 * VDESC_NO_OFFSET is used to identify the end of the offset list
 * and in places where no such field exists.
 */
#define        VDESC_NO_OFFSET -1

/*
 * This structure describes the vnode operation taking place.
 */
struct vnodeop_desc {
        int                vdesc_offset;        /* offset in vector--first for speed */
        const char        *vdesc_name;        /* a readable name for debugging */
        int                vdesc_flags;        /* VDESC_* flags */

        /*
         * These ops are used by bypass routines to map and locate arguments.
         * Creds and procs are not needed in bypass routines, but sometimes
         * they are useful to (for example) transport layers.
         * Nameidata is useful because it has a cred in it.
         */
        const int        *vdesc_vp_offsets;        /* list ended by VDESC_NO_OFFSET */
        int                vdesc_vpp_offset;        /* return vpp location */
        int                vdesc_cred_offset;        /* cred location, if any */
        int                vdesc_componentname_offset; /* if any */
};

#ifdef _KERNEL

/*
 * Union filesystem hook for vn_readdir().
 */
extern int (*vn_union_readdir_hook) (struct vnode **, struct file *, struct lwp *);

/*
 * Macros for offsets in the vdesc struct.
 */
#define        VOPARG_OFFSETOF(type, member)        offsetof(type, member)
#define        VOPARG_OFFSETTO(type,offset,sp)        ((type)(((char *)(sp)) + (offset)))

/*
 * This structure is used to configure the new vnodeops vector.
 */
struct vnodeopv_entry_desc {
        const struct vnodeop_desc *opve_op;        /* which operation this is */
        int (*opve_impl)(void *);        /* code implementing this operation */
};

struct vnodeopv_desc {
                        /* ptr to the ptr to the vector where op should go */
        int (***opv_desc_vector_p)(void *);
        const struct vnodeopv_entry_desc *opv_desc_ops; /* null terminated list */
};

/*
 * A default routine which just returns an error.
 */
int vn_default_error(void *);

/*
 * A generic structure.
 * This can be used by bypass routines to identify generic arguments.
 */
struct vop_generic_args {
        struct vnodeop_desc *a_desc;
        /* other random data follows, presumably */
};

/*
 * VOCALL calls an op given an ops vector.  We break it out because BSD's
 * vclean changes the ops vector and then wants to call ops with the old
 * vector.
 */
/*
 * actually, vclean doesn't use it anymore, but nfs does,
 * for device specials and fifos.
 */
#define        VOCALL(OPSV,OFF,AP) (( *((OPSV)[(OFF)])) (AP))

/*
 * This call works for vnodes in the kernel.
 */
#define        VCALL(VP,OFF,AP) VOCALL((VP)->v_op,(OFF),(AP))
#define        VDESC(OP) (& __CONCAT(OP,_desc))
#define        VOFFSET(OP) (VDESC(OP)->vdesc_offset)

/* XXX This include should go away */
#include <sys/mount.h>

/*
 * Finally, include the default set of vnode operations.
 */
#include <sys/vnode_if.h>

/*
 * Public vnode manipulation functions.
 */
struct file;
struct filedesc;
struct nameidata;
struct pathbuf;
struct proc;
struct stat;
struct uio;
struct vattr;
struct vnode;

/* see vnode(9) */
void        vfs_vnode_sysinit(void);
int         bdevvp(dev_t, struct vnode **);
int         cdevvp(dev_t, struct vnode **);
void         vattr_null(struct vattr *);
void        vdevgone(int, int, int, enum vtype);
int        vfinddev(dev_t, enum vtype, struct vnode **);
int        vflush(struct mount *, struct vnode *, int);
int        vflushbuf(struct vnode *, int);
void         vgone(struct vnode *);
int        vinvalbuf(struct vnode *, int, kauth_cred_t, struct lwp *, bool, int);
void        vprint(const char *, struct vnode *);
void         vput(struct vnode *);
bool        vrecycle(struct vnode *);
void         vrele(struct vnode *);
void         vrele_async(struct vnode *);
void        vrele_flush(struct mount *);
int        vtruncbuf(struct vnode *, daddr_t, bool, int);
void        vwakeup(struct buf *);
int        vdead_check(struct vnode *, int);
void        vrevoke(struct vnode *);
void        vremfree(struct vnode *);
void        vshareilock(struct vnode *, struct vnode *);
void        vshareklist(struct vnode *, struct vnode *);
int        vrefcnt(struct vnode *);
int        vcache_get(struct mount *, const void *, size_t, struct vnode **);
int        vcache_new(struct mount *, struct vnode *,
            struct vattr *, kauth_cred_t, void *, struct vnode **);
int        vcache_rekey_enter(struct mount *, struct vnode *,
            const void *, size_t, const void *, size_t);
void        vcache_rekey_exit(struct mount *, struct vnode *,
            const void *, size_t, const void *, size_t);

/* see vnsubr(9) */
int        vn_bwrite(void *);
int         vn_close(struct vnode *, int, kauth_cred_t);
int        vn_isunder(struct vnode *, struct vnode *, struct lwp *);
int        vn_lock(struct vnode *, int);
void        vn_markexec(struct vnode *);
int        vn_marktext(struct vnode *);
int         vn_open(struct vnode *, struct pathbuf *, int, int, int,
            struct vnode **, bool *, int *);
int         vn_rdwr(enum uio_rw, struct vnode *, void *, int, off_t, enum uio_seg,
    int, kauth_cred_t, size_t *, struct lwp *);
int        vn_readdir(struct file *, char *, int, unsigned int, int *,
    struct lwp *, off_t **, int *);
int        vn_stat(struct vnode *, struct stat *);
int        vn_kqfilter(struct file *, struct knote *);
int        vn_writechk(struct vnode *);
int        vn_openchk(struct vnode *, kauth_cred_t, int);
int        vn_extattr_get(struct vnode *, int, int, const char *, size_t *,
            void *, struct lwp *);
int        vn_extattr_set(struct vnode *, int, int, const char *, size_t,
            const void *, struct lwp *);
int        vn_extattr_rm(struct vnode *, int, int, const char *, struct lwp *);
int        vn_fifo_bypass(void *);
int        vn_bdev_open(dev_t, struct vnode **, struct lwp *);
int        vn_bdev_openpath(struct pathbuf *pb, struct vnode **, struct lwp *);


/* initialise global vnode management */
void        vntblinit(void);

/* misc stuff */
void        sched_sync(void *);
void        vn_syncer_add_to_worklist(struct vnode *, int);
void        vn_syncer_remove_from_worklist(struct vnode *);
int        dorevoke(struct vnode *, kauth_cred_t);
int        rawdev_mounted(struct vnode *, struct vnode **);
uint8_t        vtype2dt(enum vtype);

/* see vfssubr(9) */
int        vfs_unixify_accmode(accmode_t *);
void        vfs_getnewfsid(struct mount *);
void        vfs_timestamp(struct timespec *);
#if defined(DDB) || defined(DEBUGPRINT)
void        vfs_vnode_print(struct vnode *, int, void (*)(const char *, ...)
    __printflike(1, 2));
void        vfs_vnode_lock_print(void *, int, void (*)(const char *, ...)
    __printflike(1, 2));
void        vfs_mount_print(struct mount *, int, void (*)(const char *, ...)
    __printflike(1, 2));
void        vfs_mount_print_all(int, void (*)(const char *, ...)
    __printflike(1, 2));
#endif /* DDB */

#endif /* _KERNEL */

#endif /* !_SYS_VNODE_H_ */
































































































































































































































































































































































 2357 

 2355 


 2355 



 2358 

 2354 






   14 









    1 


















  955 









   11 




























  221 


  222 










  456 


  456 

























  297 


  298 


























   79 


   79 














    4 

























































































































































































    7 









   10 












   10 
















   10 
















   10 
















   10 










   10 













   10 




   10 

    1 









































    1 








   10 





















   10 















    5 

    5 

    5 






    2 

    1 



    1 

    5 

    1 


























   10 

   10 

   10 
   10 
   10 





























































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
/* $NetBSD: kern_tc.c,v 1.62 2021/06/02 21:34:58 riastradh Exp $ */

/*-
 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * ----------------------------------------------------------------------------
 * "THE BEER-WARE LICENSE" (Revision 42):
 * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
 * can do whatever you want with this stuff. If we meet some day, and you think
 * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
 * ---------------------------------------------------------------------------
 */

#include <sys/cdefs.h>
/* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */
__KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.62 2021/06/02 21:34:58 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_ntp.h"
#endif

#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/evcnt.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/mutex.h>
#include <sys/reboot.h>        /* XXX just to get AB_VERBOSE */
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/timepps.h>
#include <sys/timetc.h>
#include <sys/timex.h>
#include <sys/xcall.h>

/*
 * A large step happens on boot.  This constant detects such steps.
 * It is relatively small so that ntp_update_second gets called enough
 * in the typical 'missed a couple of seconds' case, but doesn't loop
 * forever when the time step is large.
 */
#define LARGE_STEP        200

/*
 * Implement a dummy timecounter which we can use until we get a real one
 * in the air.  This allows the console and other early stuff to use
 * time services.
 */

static u_int
dummy_get_timecount(struct timecounter *tc)
{
        static u_int now;

        return ++now;
}

static struct timecounter dummy_timecounter = {
        .tc_get_timecount        = dummy_get_timecount,
        .tc_counter_mask        = ~0u,
        .tc_frequency                = 1000000,
        .tc_name                = "dummy",
        .tc_quality                = -1000000,
        .tc_priv                = NULL,
};

struct timehands {
        /* These fields must be initialized by the driver. */
        struct timecounter        *th_counter;     /* active timecounter */
        int64_t                        th_adjustment;   /* frequency adjustment */
                                                 /* (NTP/adjtime) */
        uint64_t                th_scale;        /* scale factor (counter */
                                                 /* tick->time) */
        uint64_t                 th_offset_count; /* offset at last time */
                                                 /* update (tc_windup()) */
        struct bintime                th_offset;       /* bin (up)time at windup */
        struct timeval                th_microtime;    /* cached microtime */
        struct timespec                th_nanotime;     /* cached nanotime */
        /* Fields not to be copied in tc_windup start with th_generation. */
        volatile u_int                th_generation;   /* current genration */
        struct timehands        *th_next;        /* next timehand */
};

static struct timehands th0;
static struct timehands th9 = { .th_next = &th0, };
static struct timehands th8 = { .th_next = &th9, };
static struct timehands th7 = { .th_next = &th8, };
static struct timehands th6 = { .th_next = &th7, };
static struct timehands th5 = { .th_next = &th6, };
static struct timehands th4 = { .th_next = &th5, };
static struct timehands th3 = { .th_next = &th4, };
static struct timehands th2 = { .th_next = &th3, };
static struct timehands th1 = { .th_next = &th2, };
static struct timehands th0 = {
        .th_counter = &dummy_timecounter,
        .th_scale = (uint64_t)-1 / 1000000,
        .th_offset = { .sec = 1, .frac = 0 },
        .th_generation = 1,
        .th_next = &th1,
};

static struct timehands *volatile timehands = &th0;
struct timecounter *timecounter = &dummy_timecounter;
static struct timecounter *timecounters = &dummy_timecounter;

volatile time_t time_second __cacheline_aligned = 1;
volatile time_t time_uptime __cacheline_aligned = 1;

static struct bintime timebasebin;

static int timestepwarnings;

kmutex_t timecounter_lock;
static u_int timecounter_mods;
static volatile int timecounter_removals = 1;
static u_int timecounter_bad;

/*
 * sysctl helper routine for kern.timercounter.hardware
 */
static int
sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int error;
        char newname[MAX_TCNAMELEN];
        struct timecounter *newtc, *tc;

        tc = timecounter;

        strlcpy(newname, tc->tc_name, sizeof(newname));

        node = *rnode;
        node.sysctl_data = newname;
        node.sysctl_size = sizeof(newname);

        error = sysctl_lookup(SYSCTLFN_CALL(&node));

        if (error ||
            newp == NULL ||
            strncmp(newname, tc->tc_name, sizeof(newname)) == 0)
                return error;

        if (l != NULL && (error = kauth_authorize_system(l->l_cred, 
            KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_TIMECOUNTERS, newname,
            NULL, NULL)) != 0)
                return error;

        if (!cold)
                mutex_spin_enter(&timecounter_lock);
        error = EINVAL;
        for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
                if (strcmp(newname, newtc->tc_name) != 0)
                        continue;
                /* Warm up new timecounter. */
                (void)newtc->tc_get_timecount(newtc);
                (void)newtc->tc_get_timecount(newtc);
                timecounter = newtc;
                error = 0;
                break;
        }
        if (!cold)
                mutex_spin_exit(&timecounter_lock);
        return error;
}

static int
sysctl_kern_timecounter_choice(SYSCTLFN_ARGS)
{
        char buf[MAX_TCNAMELEN+48];
        char *where;
        const char *spc;
        struct timecounter *tc;
        size_t needed, left, slen;
        int error, mods;

        if (newp != NULL)
                return EPERM;
        if (namelen != 0)
                return EINVAL;

        mutex_spin_enter(&timecounter_lock);
 retry:
        spc = "";
        error = 0;
        needed = 0;
        left = *oldlenp;
        where = oldp;
        for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) {
                if (where == NULL) {
                        needed += sizeof(buf);  /* be conservative */
                } else {
                        slen = snprintf(buf, sizeof(buf), "%s%s(q=%d, f=%" PRId64
                                        " Hz)", spc, tc->tc_name, tc->tc_quality,
                                        tc->tc_frequency);
                        if (left < slen + 1)
                                break;
                         mods = timecounter_mods;
                        mutex_spin_exit(&timecounter_lock);
                        error = copyout(buf, where, slen + 1);
                        mutex_spin_enter(&timecounter_lock);
                        if (mods != timecounter_mods) {
                                goto retry;
                        }
                        spc = " ";
                        where += slen;
                        needed += slen;
                        left -= slen;
                }
        }
        mutex_spin_exit(&timecounter_lock);

        *oldlenp = needed;
        return error;
}

SYSCTL_SETUP(sysctl_timecounter_setup, "sysctl timecounter setup")
{
        const struct sysctlnode *node;

        sysctl_createv(clog, 0, NULL, &node,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "timecounter",
                       SYSCTL_DESCR("time counter information"),
                       NULL, 0, NULL, 0,
                       CTL_KERN, CTL_CREATE, CTL_EOL);

        if (node != NULL) {
                sysctl_createv(clog, 0, NULL, NULL,
                               CTLFLAG_PERMANENT,
                               CTLTYPE_STRING, "choice",
                               SYSCTL_DESCR("available counters"),
                               sysctl_kern_timecounter_choice, 0, NULL, 0,
                               CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);

                sysctl_createv(clog, 0, NULL, NULL,
                               CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                               CTLTYPE_STRING, "hardware",
                               SYSCTL_DESCR("currently active time counter"),
                               sysctl_kern_timecounter_hardware, 0, NULL, MAX_TCNAMELEN,
                               CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);

                sysctl_createv(clog, 0, NULL, NULL,
                               CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                               CTLTYPE_INT, "timestepwarnings",
                               SYSCTL_DESCR("log time steps"),
                               NULL, 0, &timestepwarnings, 0,
                               CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
        }
}

#ifdef TC_COUNTERS
#define        TC_STATS(name)                                                        \
static struct evcnt n##name =                                                \
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "timecounter", #name);        \
EVCNT_ATTACH_STATIC(n##name)
TC_STATS(binuptime);    TC_STATS(nanouptime);    TC_STATS(microuptime);
TC_STATS(bintime);      TC_STATS(nanotime);      TC_STATS(microtime);
TC_STATS(getbinuptime); TC_STATS(getnanouptime); TC_STATS(getmicrouptime);
TC_STATS(getbintime);   TC_STATS(getnanotime);   TC_STATS(getmicrotime);
TC_STATS(setclock);
#define        TC_COUNT(var)        var.ev_count++
#undef TC_STATS
#else
#define        TC_COUNT(var)        /* nothing */
#endif        /* TC_COUNTERS */

static void tc_windup(void);

/*
 * Return the difference between the timehands' counter value now and what
 * was when we copied it to the timehands' offset_count.
 */
static inline u_int
tc_delta(struct timehands *th)
{
        struct timecounter *tc;

        tc = th->th_counter;
        return (tc->tc_get_timecount(tc) -
                 th->th_offset_count) & tc->tc_counter_mask;
}

/*
 * Functions for reading the time.  We have to loop until we are sure that
 * the timehands that we operated on was not updated under our feet.  See
 * the comment in <sys/timevar.h> for a description of these 12 functions.
 */

void
binuptime(struct bintime *bt)
{
        struct timehands *th;
        lwp_t *l;
        u_int lgen, gen;

        TC_COUNT(nbinuptime);

        /*
         * Provide exclusion against tc_detach().
         *
         * We record the number of timecounter removals before accessing
         * timecounter state.  Note that the LWP can be using multiple
         * "generations" at once, due to interrupts (interrupted while in
         * this function).  Hardware interrupts will borrow the interrupted
         * LWP's l_tcgen value for this purpose, and can themselves be
         * interrupted by higher priority interrupts.  In this case we need
         * to ensure that the oldest generation in use is recorded.
         *
         * splsched() is too expensive to use, so we take care to structure
         * this code in such a way that it is not required.  Likewise, we
         * do not disable preemption.
         *
         * Memory barriers are also too expensive to use for such a
         * performance critical function.  The good news is that we do not
         * need memory barriers for this type of exclusion, as the thread
         * updating timecounter_removals will issue a broadcast cross call
         * before inspecting our l_tcgen value (this elides memory ordering
         * issues).
         */
        l = curlwp;
        lgen = l->l_tcgen;
        if (__predict_true(lgen == 0)) {
                l->l_tcgen = timecounter_removals;
        }
        __insn_barrier();

        do {
                th = timehands;
                gen = th->th_generation;
                *bt = th->th_offset;
                bintime_addx(bt, th->th_scale * tc_delta(th));
        } while (gen == 0 || gen != th->th_generation);

        __insn_barrier();
        l->l_tcgen = lgen;
}

void
nanouptime(struct timespec *tsp)
{
        struct bintime bt;

        TC_COUNT(nnanouptime);
        binuptime(&bt);
        bintime2timespec(&bt, tsp);
}

void
microuptime(struct timeval *tvp)
{
        struct bintime bt;

        TC_COUNT(nmicrouptime);
        binuptime(&bt);
        bintime2timeval(&bt, tvp);
}

void
bintime(struct bintime *bt)
{

        TC_COUNT(nbintime);
        binuptime(bt);
        bintime_add(bt, &timebasebin);
}

void
nanotime(struct timespec *tsp)
{
        struct bintime bt;

        TC_COUNT(nnanotime);
        bintime(&bt);
        bintime2timespec(&bt, tsp);
}

void
microtime(struct timeval *tvp)
{
        struct bintime bt;

        TC_COUNT(nmicrotime);
        bintime(&bt);
        bintime2timeval(&bt, tvp);
}

void
getbinuptime(struct bintime *bt)
{
        struct timehands *th;
        u_int gen;

        TC_COUNT(ngetbinuptime);
        do {
                th = timehands;
                gen = th->th_generation;
                *bt = th->th_offset;
        } while (gen == 0 || gen != th->th_generation);
}

void
getnanouptime(struct timespec *tsp)
{
        struct timehands *th;
        u_int gen;

        TC_COUNT(ngetnanouptime);
        do {
                th = timehands;
                gen = th->th_generation;
                bintime2timespec(&th->th_offset, tsp);
        } while (gen == 0 || gen != th->th_generation);
}

void
getmicrouptime(struct timeval *tvp)
{
        struct timehands *th;
        u_int gen;

        TC_COUNT(ngetmicrouptime);
        do {
                th = timehands;
                gen = th->th_generation;
                bintime2timeval(&th->th_offset, tvp);
        } while (gen == 0 || gen != th->th_generation);
}

void
getbintime(struct bintime *bt)
{
        struct timehands *th;
        u_int gen;

        TC_COUNT(ngetbintime);
        do {
                th = timehands;
                gen = th->th_generation;
                *bt = th->th_offset;
        } while (gen == 0 || gen != th->th_generation);
        bintime_add(bt, &timebasebin);
}

static inline void
dogetnanotime(struct timespec *tsp)
{
        struct timehands *th;
        u_int gen;

        TC_COUNT(ngetnanotime);
        do {
                th = timehands;
                gen = th->th_generation;
                *tsp = th->th_nanotime;
        } while (gen == 0 || gen != th->th_generation);
}

void
getnanotime(struct timespec *tsp)
{

        dogetnanotime(tsp);
}

void dtrace_getnanotime(struct timespec *tsp);

void
dtrace_getnanotime(struct timespec *tsp)
{

        dogetnanotime(tsp);
}

void
getmicrotime(struct timeval *tvp)
{
        struct timehands *th;
        u_int gen;

        TC_COUNT(ngetmicrotime);
        do {
                th = timehands;
                gen = th->th_generation;
                *tvp = th->th_microtime;
        } while (gen == 0 || gen != th->th_generation);
}

void
getnanoboottime(struct timespec *tsp)
{
        struct bintime bt;

        getbinboottime(&bt);
        bintime2timespec(&bt, tsp);
}

void
getmicroboottime(struct timeval *tvp)
{
        struct bintime bt;

        getbinboottime(&bt);
        bintime2timeval(&bt, tvp);
}

void
getbinboottime(struct bintime *bt)
{

        /*
         * XXX Need lockless read synchronization around timebasebin
         * (and not just here).
         */
        *bt = timebasebin;
}

/*
 * Initialize a new timecounter and possibly use it.
 */
void
tc_init(struct timecounter *tc)
{
        u_int u;

        KASSERTMSG(tc->tc_next == NULL, "timecounter %s already initialised",
            tc->tc_name);

        u = tc->tc_frequency / tc->tc_counter_mask;
        /* XXX: We need some margin here, 10% is a guess */
        u *= 11;
        u /= 10;
        if (u > hz && tc->tc_quality >= 0) {
                tc->tc_quality = -2000;
                aprint_verbose(
                    "timecounter: Timecounter \"%s\" frequency %ju Hz",
                            tc->tc_name, (uintmax_t)tc->tc_frequency);
                aprint_verbose(" -- Insufficient hz, needs at least %u\n", u);
        } else if (tc->tc_quality >= 0 || bootverbose) {
                aprint_verbose(
                    "timecounter: Timecounter \"%s\" frequency %ju Hz "
                    "quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency,
                    tc->tc_quality);
        }

        mutex_spin_enter(&timecounter_lock);
        tc->tc_next = timecounters;
        timecounters = tc;
        timecounter_mods++;
        /*
         * Never automatically use a timecounter with negative quality.
         * Even though we run on the dummy counter, switching here may be
         * worse since this timecounter may not be monotonous.
         */
        if (tc->tc_quality >= 0 && (tc->tc_quality > timecounter->tc_quality ||
            (tc->tc_quality == timecounter->tc_quality &&
            tc->tc_frequency > timecounter->tc_frequency))) {
                (void)tc->tc_get_timecount(tc);
                (void)tc->tc_get_timecount(tc);
                timecounter = tc;
                tc_windup();
        }
        mutex_spin_exit(&timecounter_lock);
}

/*
 * Pick a new timecounter due to the existing counter going bad.
 */
static void
tc_pick(void)
{
        struct timecounter *best, *tc;

        KASSERT(mutex_owned(&timecounter_lock));

        for (best = tc = timecounters; tc != NULL; tc = tc->tc_next) {
                if (tc->tc_quality > best->tc_quality)
                        best = tc;
                else if (tc->tc_quality < best->tc_quality)
                        continue;
                else if (tc->tc_frequency > best->tc_frequency)
                        best = tc;
        }
        (void)best->tc_get_timecount(best);
        (void)best->tc_get_timecount(best);
        timecounter = best;
}

/*
 * A timecounter has gone bad, arrange to pick a new one at the next
 * clock tick.
 */
void
tc_gonebad(struct timecounter *tc)
{

        tc->tc_quality = -100;
        membar_producer();
        atomic_inc_uint(&timecounter_bad);
}

/*
 * Stop using a timecounter and remove it from the timecounters list.
 */
int
tc_detach(struct timecounter *target)
{
        struct timecounter *tc;
        struct timecounter **tcp = NULL;
        int removals;
        lwp_t *l;

        /* First, find the timecounter. */
        mutex_spin_enter(&timecounter_lock);
        for (tcp = &timecounters, tc = timecounters;
             tc != NULL;
             tcp = &tc->tc_next, tc = tc->tc_next) {
                if (tc == target)
                        break;
        }
        if (tc == NULL) {
                mutex_spin_exit(&timecounter_lock);
                return ESRCH;
        }

        /* And now, remove it. */
        *tcp = tc->tc_next;
        if (timecounter == target) {
                tc_pick();
                tc_windup();
        }
        timecounter_mods++;
        removals = timecounter_removals++;
        mutex_spin_exit(&timecounter_lock);

        /*
         * We now have to determine if any threads in the system are still
         * making use of this timecounter.
         *
         * We issue a broadcast cross call to elide memory ordering issues,
         * then scan all LWPs in the system looking at each's timecounter
         * generation number.  We need to see a value of zero (not actively
         * using a timecounter) or a value greater than our removal value.
         *
         * We may race with threads that read `timecounter_removals' and
         * and then get preempted before updating `l_tcgen'.  This is not
         * a problem, since it means that these threads have not yet started
         * accessing timecounter state.  All we do need is one clean
         * snapshot of the system where every thread appears not to be using
         * old timecounter state.
         */
        for (;;) {
                xc_barrier(0);

                mutex_enter(&proc_lock);
                LIST_FOREACH(l, &alllwp, l_list) {
                        if (l->l_tcgen == 0 || l->l_tcgen > removals) {
                                /*
                                 * Not using timecounter or old timecounter
                                 * state at time of our xcall or later.
                                 */
                                continue;
                        }
                        break;
                }
                mutex_exit(&proc_lock);

                /*
                 * If the timecounter is still in use, wait at least 10ms
                 * before retrying.
                 */
                if (l == NULL) {
                        break;
                }
                (void)kpause("tcdetach", false, mstohz(10), NULL);
        }

        tc->tc_next = NULL;
        return 0;
}

/* Report the frequency of the current timecounter. */
uint64_t
tc_getfrequency(void)
{

        return timehands->th_counter->tc_frequency;
}

/*
 * Step our concept of UTC.  This is done by modifying our estimate of
 * when we booted.
 */
void
tc_setclock(const struct timespec *ts)
{
        struct timespec ts2;
        struct bintime bt, bt2;

        mutex_spin_enter(&timecounter_lock);
        TC_COUNT(nsetclock);
        binuptime(&bt2);
        timespec2bintime(ts, &bt);
        bintime_sub(&bt, &bt2);
        bintime_add(&bt2, &timebasebin);
        timebasebin = bt;
        tc_windup();
        mutex_spin_exit(&timecounter_lock);

        if (timestepwarnings) {
                bintime2timespec(&bt2, &ts2);
                log(LOG_INFO,
                    "Time stepped from %lld.%09ld to %lld.%09ld\n",
                    (long long)ts2.tv_sec, ts2.tv_nsec,
                    (long long)ts->tv_sec, ts->tv_nsec);
        }
}

/*
 * Initialize the next struct timehands in the ring and make
 * it the active timehands.  Along the way we might switch to a different
 * timecounter and/or do seconds processing in NTP.  Slightly magic.
 */
static void
tc_windup(void)
{
        struct bintime bt;
        struct timehands *th, *tho;
        uint64_t scale;
        u_int delta, ncount, ogen;
        int i, s_update;
        time_t t;

        KASSERT(mutex_owned(&timecounter_lock));

        s_update = 0;

        /*
         * Make the next timehands a copy of the current one, but do not
         * overwrite the generation or next pointer.  While we update
         * the contents, the generation must be zero.  Ensure global
         * visibility of the generation before proceeding.
         */
        tho = timehands;
        th = tho->th_next;
        ogen = th->th_generation;
        th->th_generation = 0;
        membar_producer();
        bcopy(tho, th, offsetof(struct timehands, th_generation));

        /*
         * Capture a timecounter delta on the current timecounter and if
         * changing timecounters, a counter value from the new timecounter.
         * Update the offset fields accordingly.
         */
        delta = tc_delta(th);
        if (th->th_counter != timecounter)
                ncount = timecounter->tc_get_timecount(timecounter);
        else
                ncount = 0;
        th->th_offset_count += delta;
        bintime_addx(&th->th_offset, th->th_scale * delta);

        /*
         * Hardware latching timecounters may not generate interrupts on
         * PPS events, so instead we poll them.  There is a finite risk that
         * the hardware might capture a count which is later than the one we
         * got above, and therefore possibly in the next NTP second which might
         * have a different rate than the current NTP second.  It doesn't
         * matter in practice.
         */
        if (tho->th_counter->tc_poll_pps)
                tho->th_counter->tc_poll_pps(tho->th_counter);

        /*
         * Deal with NTP second processing.  The for loop normally
         * iterates at most once, but in extreme situations it might
         * keep NTP sane if timeouts are not run for several seconds.
         * At boot, the time step can be large when the TOD hardware
         * has been read, so on really large steps, we call
         * ntp_update_second only twice.  We need to call it twice in
         * case we missed a leap second.
         * If NTP is not compiled in ntp_update_second still calculates
         * the adjustment resulting from adjtime() calls.
         */
        bt = th->th_offset;
        bintime_add(&bt, &timebasebin);
        i = bt.sec - tho->th_microtime.tv_sec;
        if (i > LARGE_STEP)
                i = 2;
        for (; i > 0; i--) {
                t = bt.sec;
                ntp_update_second(&th->th_adjustment, &bt.sec);
                s_update = 1;
                if (bt.sec != t)
                        timebasebin.sec += bt.sec - t;
        }

        /* Update the UTC timestamps used by the get*() functions. */
        /* XXX shouldn't do this here.  Should force non-`get' versions. */
        bintime2timeval(&bt, &th->th_microtime);
        bintime2timespec(&bt, &th->th_nanotime);
        /* Now is a good time to change timecounters. */
        if (th->th_counter != timecounter) {
                th->th_counter = timecounter;
                th->th_offset_count = ncount;
                s_update = 1;
        }

        /*-
         * Recalculate the scaling factor.  We want the number of 1/2^64
         * fractions of a second per period of the hardware counter, taking
         * into account the th_adjustment factor which the NTP PLL/adjtime(2)
         * processing provides us with.
         *
         * The th_adjustment is nanoseconds per second with 32 bit binary
         * fraction and we want 64 bit binary fraction of second:
         *
         *         x = a * 2^32 / 10^9 = a * 4.294967296
         *
         * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
         * we can only multiply by about 850 without overflowing, but that
         * leaves suitably precise fractions for multiply before divide.
         *
         * Divide before multiply with a fraction of 2199/512 results in a
         * systematic undercompensation of 10PPM of th_adjustment.  On a
         * 5000PPM adjustment this is a 0.05PPM error.  This is acceptable.
          *
         * We happily sacrifice the lowest of the 64 bits of our result
         * to the goddess of code clarity.
         *
         */
        if (s_update) {
                scale = (uint64_t)1 << 63;
                scale += (th->th_adjustment / 1024) * 2199;
                scale /= th->th_counter->tc_frequency;
                th->th_scale = scale * 2;
        }
        /*
         * Now that the struct timehands is again consistent, set the new
         * generation number, making sure to not make it zero.  Ensure
         * changes are globally visible before changing.
         */
        if (++ogen == 0)
                ogen = 1;
        membar_producer();
        th->th_generation = ogen;

        /*
         * Go live with the new struct timehands.  Ensure changes are
         * globally visible before changing.
         */
        time_second = th->th_microtime.tv_sec;
        time_uptime = th->th_offset.sec;
        membar_producer();
        timehands = th;

        /*
         * Force users of the old timehand to move on.  This is
         * necessary for MP systems; we need to ensure that the
         * consumers will move away from the old timehand before
         * we begin updating it again when we eventually wrap
         * around.
         */
        if (++tho->th_generation == 0)
                tho->th_generation = 1;
}

/*
 * RFC 2783 PPS-API implementation.
 */

int
pps_ioctl(u_long cmd, void *data, struct pps_state *pps)
{
        pps_params_t *app;
        pps_info_t *pipi;
#ifdef PPS_SYNC
        int *epi;
#endif

        KASSERT(mutex_owned(&timecounter_lock));

        KASSERT(pps != NULL);

        switch (cmd) {
        case PPS_IOC_CREATE:
                return 0;
        case PPS_IOC_DESTROY:
                return 0;
        case PPS_IOC_SETPARAMS:
                app = (pps_params_t *)data;
                if (app->mode & ~pps->ppscap)
                        return EINVAL;
                pps->ppsparam = *app;
                return 0;
        case PPS_IOC_GETPARAMS:
                app = (pps_params_t *)data;
                *app = pps->ppsparam;
                app->api_version = PPS_API_VERS_1;
                return 0;
        case PPS_IOC_GETCAP:
                *(int*)data = pps->ppscap;
                return 0;
        case PPS_IOC_FETCH:
                pipi = (pps_info_t *)data;
                pps->ppsinfo.current_mode = pps->ppsparam.mode;
                *pipi = pps->ppsinfo;
                return 0;
        case PPS_IOC_KCBIND:
#ifdef PPS_SYNC
                epi = (int *)data;
                /* XXX Only root should be able to do this */
                if (*epi & ~pps->ppscap)
                        return EINVAL;
                pps->kcmode = *epi;
                return 0;
#else
                return EOPNOTSUPP;
#endif
        default:
                return EPASSTHROUGH;
        }
}

void
pps_init(struct pps_state *pps)
{

        KASSERT(mutex_owned(&timecounter_lock));

        pps->ppscap |= PPS_TSFMT_TSPEC;
        if (pps->ppscap & PPS_CAPTUREASSERT)
                pps->ppscap |= PPS_OFFSETASSERT;
        if (pps->ppscap & PPS_CAPTURECLEAR)
                pps->ppscap |= PPS_OFFSETCLEAR;
}

/*
 * capture a timetamp in the pps structure
 */
void
pps_capture(struct pps_state *pps)
{
        struct timehands *th;

        KASSERT(mutex_owned(&timecounter_lock));
        KASSERT(pps != NULL);

        th = timehands;
        pps->capgen = th->th_generation;
        pps->capth = th;
        pps->capcount = (uint64_t)tc_delta(th) + th->th_offset_count;
        if (pps->capgen != th->th_generation)
                pps->capgen = 0;
}

#ifdef PPS_DEBUG
int ppsdebug = 0;
#endif

/*
 * process a pps_capture()ed event
 */
void
pps_event(struct pps_state *pps, int event)
{
        pps_ref_event(pps, event, NULL, PPS_REFEVNT_PPS|PPS_REFEVNT_CAPTURE);
}

/*
 * extended pps api /  kernel pll/fll entry point
 *
 * feed reference time stamps to PPS engine
 *
 * will simulate a PPS event and feed
 * the NTP PLL/FLL if requested.
 *
 * the ref time stamps should be roughly once
 * a second but do not need to be exactly in phase
 * with the UTC second but should be close to it.
 * this relaxation of requirements allows callout
 * driven timestamping mechanisms to feed to pps 
 * capture/kernel pll logic.
 *
 * calling pattern is:
 *  pps_capture() (for PPS_REFEVNT_{CAPTURE|CAPCUR})
 *  read timestamp from reference source
 *  pps_ref_event()
 *
 * supported refmodes:
 *  PPS_REFEVNT_CAPTURE
 *    use system timestamp of pps_capture()
 *  PPS_REFEVNT_CURRENT
 *    use system timestamp of this call
 *  PPS_REFEVNT_CAPCUR
 *    use average of read capture and current system time stamp
 *  PPS_REFEVNT_PPS
 *    assume timestamp on second mark - ref_ts is ignored
 *
 */

void
pps_ref_event(struct pps_state *pps,
              int event,
              struct bintime *ref_ts,
              int refmode
        )
{
        struct bintime bt;        /* current time */
        struct bintime btd;        /* time difference */
        struct bintime bt_ref;        /* reference time */
        struct timespec ts, *tsp, *osp;
        struct timehands *th;
        uint64_t tcount, acount, dcount, *pcount;
        int foff, gen;
#ifdef PPS_SYNC
        int fhard;
#endif
        pps_seq_t *pseq;

        KASSERT(mutex_owned(&timecounter_lock));

        KASSERT(pps != NULL);

        /* pick up current time stamp if needed */
        if (refmode & (PPS_REFEVNT_CURRENT|PPS_REFEVNT_CAPCUR)) {
                /* pick up current time stamp */
                th = timehands;
                gen = th->th_generation;
                tcount = (uint64_t)tc_delta(th) + th->th_offset_count;
                if (gen != th->th_generation)
                        gen = 0;

                /* If the timecounter was wound up underneath us, bail out. */
                if (pps->capgen == 0 ||
                    pps->capgen != pps->capth->th_generation ||
                    gen == 0 ||
                    gen != pps->capgen) {
#ifdef PPS_DEBUG
                        if (ppsdebug & 0x1) {
                                log(LOG_DEBUG,
                                    "pps_ref_event(pps=%p, event=%d, ...): DROP (wind-up)\n",
                                    pps, event);
                        }
#endif
                        return;
                }
        } else {
                tcount = 0;        /* keep GCC happy */
        }

#ifdef PPS_DEBUG
        if (ppsdebug & 0x1) {
                struct timespec tmsp;
        
                if (ref_ts == NULL) {
                        tmsp.tv_sec = 0;
                        tmsp.tv_nsec = 0;
                } else {
                        bintime2timespec(ref_ts, &tmsp);
                }

                log(LOG_DEBUG,
                    "pps_ref_event(pps=%p, event=%d, ref_ts=%"PRIi64
                    ".%09"PRIi32", refmode=0x%1x)\n",
                    pps, event, tmsp.tv_sec, (int32_t)tmsp.tv_nsec, refmode);
        }
#endif

        /* setup correct event references */
        if (event == PPS_CAPTUREASSERT) {
                tsp = &pps->ppsinfo.assert_timestamp;
                osp = &pps->ppsparam.assert_offset;
                foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
#ifdef PPS_SYNC
                fhard = pps->kcmode & PPS_CAPTUREASSERT;
#endif
                pcount = &pps->ppscount[0];
                pseq = &pps->ppsinfo.assert_sequence;
        } else {
                tsp = &pps->ppsinfo.clear_timestamp;
                osp = &pps->ppsparam.clear_offset;
                foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
#ifdef PPS_SYNC
                fhard = pps->kcmode & PPS_CAPTURECLEAR;
#endif
                pcount = &pps->ppscount[1];
                pseq = &pps->ppsinfo.clear_sequence;
        }

        /* determine system time stamp according to refmode */
        dcount = 0;                /* keep GCC happy */
        switch (refmode & PPS_REFEVNT_RMASK) {
        case PPS_REFEVNT_CAPTURE:
                acount = pps->capcount;        /* use capture timestamp */
                break;

        case PPS_REFEVNT_CURRENT:
                acount = tcount; /* use current timestamp */
                break;

        case PPS_REFEVNT_CAPCUR:
                /*
                 * calculate counter value between pps_capture() and
                 * pps_ref_event()
                 */
                dcount = tcount - pps->capcount;
                acount = (dcount / 2) + pps->capcount;
                break;

        default:                /* ignore call error silently */
                return;
        }

        /*
         * If the timecounter changed, we cannot compare the count values, so
         * we have to drop the rest of the PPS-stuff until the next event.
         */
        if (pps->ppstc != pps->capth->th_counter) {
                pps->ppstc = pps->capth->th_counter;
                pps->capcount = acount;
                *pcount = acount;
                pps->ppscount[2] = acount;
#ifdef PPS_DEBUG
                if (ppsdebug & 0x1) {
                        log(LOG_DEBUG,
                            "pps_ref_event(pps=%p, event=%d, ...): DROP (time-counter change)\n",
                            pps, event);
                }
#endif
                return;
        }

        pps->capcount = acount;

        /* Convert the count to a bintime. */
        bt = pps->capth->th_offset;
        bintime_addx(&bt, pps->capth->th_scale * (acount - pps->capth->th_offset_count));
        bintime_add(&bt, &timebasebin);

        if ((refmode & PPS_REFEVNT_PPS) == 0) {
                /* determine difference to reference time stamp */
                bt_ref = *ref_ts;

                btd = bt;
                bintime_sub(&btd, &bt_ref);

                /* 
                 * simulate a PPS timestamp by dropping the fraction
                 * and applying the offset
                 */
                if (bt.frac >= (uint64_t)1<<63)        /* skip to nearest second */
                        bt.sec++;
                bt.frac = 0;
                bintime_add(&bt, &btd);
        } else {
                /*
                 * create ref_ts from current time - 
                 * we are supposed to be called on
                 * the second mark
                 */
                bt_ref = bt;
                if (bt_ref.frac >= (uint64_t)1<<63)        /* skip to nearest second */
                        bt_ref.sec++;
                bt_ref.frac = 0;
        }

        /* convert bintime to timestamp */
        bintime2timespec(&bt, &ts);

        /* If the timecounter was wound up underneath us, bail out. */
        if (pps->capgen != pps->capth->th_generation)
                return;

        /* store time stamp */
        *pcount = pps->capcount;
        (*pseq)++;
        *tsp = ts;

        /* add offset correction */
        if (foff) {
                timespecadd(tsp, osp, tsp);
                if (tsp->tv_nsec < 0) {
                        tsp->tv_nsec += 1000000000;
                        tsp->tv_sec -= 1;
                }
        }

#ifdef PPS_DEBUG
        if (ppsdebug & 0x2) {
                struct timespec ts2;
                struct timespec ts3;

                bintime2timespec(&bt_ref, &ts2);

                bt.sec = 0;
                bt.frac = 0;

                if (refmode & PPS_REFEVNT_CAPCUR) {
                            bintime_addx(&bt, pps->capth->th_scale * dcount);
                }
                bintime2timespec(&bt, &ts3);

                log(LOG_DEBUG, "ref_ts=%"PRIi64".%09"PRIi32
                    ", ts=%"PRIi64".%09"PRIi32", read latency=%"PRIi64" ns\n",
                    ts2.tv_sec, (int32_t)ts2.tv_nsec,
                    tsp->tv_sec, (int32_t)tsp->tv_nsec,
                    timespec2ns(&ts3));
        }
#endif

#ifdef PPS_SYNC
        if (fhard) {
                uint64_t scale;
                uint64_t div;

                /*
                 * Feed the NTP PLL/FLL.
                 * The FLL wants to know how many (hardware) nanoseconds
                 * elapsed since the previous event (mod 1 second) thus
                 * we are actually looking at the frequency difference scaled
                 * in nsec.
                 * As the counter time stamps are not truly at 1Hz
                 * we need to scale the count by the elapsed
                 * reference time.
                 * valid sampling interval: [0.5..2[ sec
                 */

                /* calculate elapsed raw count */
                tcount = pps->capcount - pps->ppscount[2];
                pps->ppscount[2] = pps->capcount;
                tcount &= pps->capth->th_counter->tc_counter_mask;
                
                /* calculate elapsed ref time */
                btd = bt_ref;
                bintime_sub(&btd, &pps->ref_time);
                pps->ref_time = bt_ref;

                /* check that we stay below 2 sec */
                if (btd.sec < 0 || btd.sec > 1)
                        return;

                /* we want at least 0.5 sec between samples */
                if (btd.sec == 0 && btd.frac < (uint64_t)1<<63)
                        return;

                /*
                 * calculate cycles per period by multiplying
                 * the frequency with the elapsed period
                 * we pick a fraction of 30 bits
                 * ~1ns resolution for elapsed time
                 */ 
                div   = (uint64_t)btd.sec << 30;
                div  |= (btd.frac >> 34) & (((uint64_t)1 << 30) - 1);
                div  *= pps->capth->th_counter->tc_frequency;
                div >>= 30;

                if (div == 0)        /* safeguard */
                        return;

                scale = (uint64_t)1 << 63;
                scale /= div;
                scale *= 2;

                bt.sec = 0;
                bt.frac = 0;
                bintime_addx(&bt, scale * tcount);
                bintime2timespec(&bt, &ts);

#ifdef PPS_DEBUG
                if (ppsdebug & 0x4) {
                        struct timespec ts2;
                        int64_t df;

                        bintime2timespec(&bt_ref, &ts2);
                        df = timespec2ns(&ts);
                        if (df > 500000000)
                                df -= 1000000000;
                        log(LOG_DEBUG, "hardpps: ref_ts=%"PRIi64
                            ".%09"PRIi32", ts=%"PRIi64".%09"PRIi32
                            ", freqdiff=%"PRIi64" ns/s\n",
                            ts2.tv_sec, (int32_t)ts2.tv_nsec,
                            tsp->tv_sec, (int32_t)tsp->tv_nsec,
                            df);
                }
#endif

                hardpps(tsp, timespec2ns(&ts));
        }
#endif
}

/*
 * Timecounters need to be updated every so often to prevent the hardware
 * counter from overflowing.  Updating also recalculates the cached values
 * used by the get*() family of functions, so their precision depends on
 * the update frequency.
 */

static int tc_tick;

void
tc_ticktock(void)
{
        static int count;

        if (++count < tc_tick)
                return;
        count = 0;
        mutex_spin_enter(&timecounter_lock);
        if (__predict_false(timecounter_bad != 0)) {
                /* An existing timecounter has gone bad, pick a new one. */
                (void)atomic_swap_uint(&timecounter_bad, 0);
                if (timecounter->tc_quality < 0) {
                        tc_pick();
                }
        }
        tc_windup();
        mutex_spin_exit(&timecounter_lock);
}

void
inittimecounter(void)
{
        u_int p;

        mutex_init(&timecounter_lock, MUTEX_DEFAULT, IPL_HIGH);

        /*
         * Set the initial timeout to
         * max(1, <approx. number of hardclock ticks in a millisecond>).
         * People should probably not use the sysctl to set the timeout
         * to smaller than its initial value, since that value is the
         * smallest reasonable one.  If they want better timestamps they
         * should use the non-"get"* functions.
         */
        if (hz > 1000)
                tc_tick = (hz + 500) / 1000;
        else
                tc_tick = 1;
        p = (tc_tick * 1000000) / hz;
        aprint_verbose("timecounter: Timecounters tick every %d.%03u msec\n",
            p / 1000, p % 1000);

        /* warm up new timecounter (again) and get rolling. */
        (void)timecounter->tc_get_timecount(timecounter);
        (void)timecounter->tc_get_timecount(timecounter);
}






























































































































































































































































































































































































































































































































































































































































































































































































































































































































    2 











    2 














































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
/*        $NetBSD: init_sysctl.c,v 1.227 2020/09/20 12:51:57 skrll Exp $ */

/*-
 * Copyright (c) 2003, 2007, 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Brown, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: init_sysctl.c,v 1.227 2020/09/20 12:51:57 skrll Exp $");

#include "opt_sysv.h"
#include "opt_compat_netbsd.h"
#include "opt_modular.h"
#include "opt_gprof.h"
#include "pty.h"

#include <sys/param.h>
#include <sys/types.h>

#include <dev/cons.h>
#include <sys/conf.h>
#include <sys/cprng.h>
#include <sys/cpu.h>
#include <sys/device.h>
#include <sys/disklabel.h>
#include <sys/errno.h>
#include <sys/exec.h>
#include <sys/filedesc.h>
#include <sys/file.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/ktrace.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/reboot.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <sys/socketvar.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/tty.h>
#include <sys/unistd.h>
#include <sys/vnode_impl.h>     /* For vfs_drainvnodes(). */

int security_setidcore_dump;
char security_setidcore_path[MAXPATHLEN] = "/var/crash/%n.core";
uid_t security_setidcore_owner = 0;
gid_t security_setidcore_group = 0;
mode_t security_setidcore_mode = (S_IRUSR|S_IWUSR);

/*
 * Current status of SysV IPC capability.  Initially, these are
 * 0 if the capability is not built-in to the kernel, but can
 * be updated if the appropriate kernel module is (auto)loaded.
 */

int kern_has_sysvmsg = 0;
int kern_has_sysvshm = 0;
int kern_has_sysvsem = 0;

static const u_int sysctl_lwpprflagmap[] = {
        LPR_DETACHED, L_DETACHED,
        0
};

/*
 * try over estimating by 5 procs/lwps
 */
#define KERN_LWPSLOP        (5 * sizeof(struct kinfo_lwp))

static int dcopyout(struct lwp *, const void *, void *, size_t);

static int
dcopyout(struct lwp *l, const void *kaddr, void *uaddr, size_t len)
{
        int error;

        error = copyout(kaddr, uaddr, len);
        ktrmibio(-1, UIO_READ, uaddr, len, error);

        return error;
}

static int sysctl_kern_maxvnodes(SYSCTLFN_PROTO);
static int sysctl_kern_messages(SYSCTLFN_PROTO);
static int sysctl_kern_boottime(SYSCTLFN_PROTO);
static int sysctl_kern_rtc_offset(SYSCTLFN_PROTO);
static int sysctl_kern_maxproc(SYSCTLFN_PROTO);
static int sysctl_kern_hostid(SYSCTLFN_PROTO);
static int sysctl_kern_defcorename(SYSCTLFN_PROTO);
static int sysctl_kern_cptime(SYSCTLFN_PROTO);
#if NPTY > 0
static int sysctl_kern_maxptys(SYSCTLFN_PROTO);
#endif /* NPTY > 0 */
static int sysctl_kern_lwp(SYSCTLFN_PROTO);
static int sysctl_kern_forkfsleep(SYSCTLFN_PROTO);
static int sysctl_kern_root_partition(SYSCTLFN_PROTO);
static int sysctl_kern_drivers(SYSCTLFN_PROTO);
static int sysctl_security_setidcore(SYSCTLFN_PROTO);
static int sysctl_security_setidcorename(SYSCTLFN_PROTO);
static int sysctl_kern_cpid(SYSCTLFN_PROTO);
static int sysctl_hw_usermem(SYSCTLFN_PROTO);
static int sysctl_hw_cnmagic(SYSCTLFN_PROTO);

static void fill_lwp(struct lwp *l, struct kinfo_lwp *kl);

/*
 * ********************************************************************
 * section 1: setup routines
 * ********************************************************************
 * These functions are stuffed into a link set for sysctl setup
 * functions. They're never called or referenced from anywhere else.
 * ********************************************************************
 */

/*
 * this setup routine is a replacement for kern_sysctl()
 */
SYSCTL_SETUP(sysctl_kern_setup, "sysctl kern subtree setup")
{
        extern int kern_logsigexit;        /* defined in kern/kern_sig.c */
        extern fixpt_t ccpu;                /* defined in kern/kern_synch.c */
        extern int dumponpanic;                /* defined in kern/subr_prf.c */
        const struct sysctlnode *rnode;

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxvnodes",
                       SYSCTL_DESCR("Maximum number of vnodes"),
                       sysctl_kern_maxvnodes, 0, NULL, 0,
                       CTL_KERN, KERN_MAXVNODES, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxproc",
                       SYSCTL_DESCR("Maximum number of simultaneous processes"),
                       sysctl_kern_maxproc, 0, NULL, 0,
                       CTL_KERN, KERN_MAXPROC, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxfiles",
                       SYSCTL_DESCR("Maximum number of open files"),
                       NULL, 0, &maxfiles, 0,
                       CTL_KERN, KERN_MAXFILES, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "argmax",
                       SYSCTL_DESCR("Maximum number of bytes of arguments to "
                                    "execve(2)"),
                       NULL, ARG_MAX, NULL, 0,
                       CTL_KERN, KERN_ARGMAX, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_HEX,
                       CTLTYPE_INT, "hostid",
                       SYSCTL_DESCR("System host ID number"),
                       sysctl_kern_hostid, 0, NULL, 0,
                       CTL_KERN, KERN_HOSTID, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "vnode",
                       SYSCTL_DESCR("System vnode table"),
                       sysctl_kern_vnode, 0, NULL, 0,
                       CTL_KERN, KERN_VNODE, CTL_EOL);
#ifndef GPROF
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "profiling",
                       SYSCTL_DESCR("Profiling information (not available)"),
                       sysctl_notavail, 0, NULL, 0,
                       CTL_KERN, KERN_PROF, CTL_EOL);
#endif
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "posix1version",
                       SYSCTL_DESCR("Version of ISO/IEC 9945 (POSIX 1003.1) "
                                    "with which the operating system attempts "
                                    "to comply"),
                       NULL, _POSIX_VERSION, NULL, 0,
                       CTL_KERN, KERN_POSIX1, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "ngroups",
                       SYSCTL_DESCR("Maximum number of supplemental groups"),
                       NULL, NGROUPS_MAX, NULL, 0,
                       CTL_KERN, KERN_NGROUPS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "job_control",
                       SYSCTL_DESCR("Whether job control is available"),
                       NULL, 1, NULL, 0,
                       CTL_KERN, KERN_JOB_CONTROL, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "saved_ids",
                       SYSCTL_DESCR("Whether POSIX saved set-group/user ID is "
                                    "available"), NULL,
#ifdef _POSIX_SAVED_IDS
                       1,
#else /* _POSIX_SAVED_IDS */
                       0,
#endif /* _POSIX_SAVED_IDS */
                       NULL, 0, CTL_KERN, KERN_SAVED_IDS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_HEX,
                       CTLTYPE_INT, "boothowto",
                       SYSCTL_DESCR("Flags from boot loader"),
                       NULL, 0, &boothowto, sizeof(boothowto),
                       CTL_KERN, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "boottime",
                       SYSCTL_DESCR("System boot time"),
                       sysctl_kern_boottime, 0, NULL, sizeof(struct timespec),
                       CTL_KERN, KERN_BOOTTIME, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "maxpartitions",
                       SYSCTL_DESCR("Maximum number of partitions allowed per "
                                    "disk"),
                       NULL, MAXPARTITIONS, NULL, 0,
                       CTL_KERN, KERN_MAXPARTITIONS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "timex", NULL,
                       sysctl_notavail, 0, NULL, 0,
                       CTL_KERN, KERN_TIMEX, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "rtc_offset",
                       SYSCTL_DESCR("Offset of real time clock from UTC in "
                                    "minutes"),
                       sysctl_kern_rtc_offset, 0, &rtc_offset, 0,
                       CTL_KERN, KERN_RTC_OFFSET, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "root_device",
                       SYSCTL_DESCR("Name of the root device"),
                       sysctl_root_device, 0, NULL, 0,
                       CTL_KERN, KERN_ROOT_DEVICE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "fsync",
                       SYSCTL_DESCR("Whether the POSIX 1003.1b File "
                                    "Synchronization Option is available on "
                                    "this system"),
                       NULL, 1, NULL, 0,
                       CTL_KERN, KERN_FSYNC, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "ipc",
                       SYSCTL_DESCR("SysV IPC options"),
                       NULL, 0, NULL, 0,
                       CTL_KERN, KERN_SYSVIPC, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_INT, "sysvmsg",
                       SYSCTL_DESCR("System V style message support available"),
                       NULL, 0, &kern_has_sysvmsg, sizeof(int),
                       CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_MSG, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_INT, "sysvsem",
                       SYSCTL_DESCR("System V style semaphore support "
                                    "available"),
                       NULL, 0, &kern_has_sysvsem, sizeof(int),
                       CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SEM, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_INT, "sysvshm",
                       SYSCTL_DESCR("System V style shared memory support "
                                    "available"),
                       NULL, 0, &kern_has_sysvshm, sizeof(int),
                       CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHM, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "synchronized_io",
                       SYSCTL_DESCR("Whether the POSIX 1003.1b Synchronized "
                                    "I/O Option is available on this system"),
                       NULL, 1, NULL, 0,
                       CTL_KERN, KERN_SYNCHRONIZED_IO, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "iov_max",
                       SYSCTL_DESCR("Maximum number of iovec structures per "
                                    "process"),
                       NULL, IOV_MAX, NULL, 0,
                       CTL_KERN, KERN_IOV_MAX, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "mapped_files",
                       SYSCTL_DESCR("Whether the POSIX 1003.1b Memory Mapped "
                                    "Files Option is available on this system"),
                       NULL, 1, NULL, 0,
                       CTL_KERN, KERN_MAPPED_FILES, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "memlock",
                       SYSCTL_DESCR("Whether the POSIX 1003.1b Process Memory "
                                    "Locking Option is available on this "
                                    "system"),
                       NULL, 1, NULL, 0,
                       CTL_KERN, KERN_MEMLOCK, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "memlock_range",
                       SYSCTL_DESCR("Whether the POSIX 1003.1b Range Memory "
                                    "Locking Option is available on this "
                                    "system"),
                       NULL, 1, NULL, 0,
                       CTL_KERN, KERN_MEMLOCK_RANGE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "memory_protection",
                       SYSCTL_DESCR("Whether the POSIX 1003.1b Memory "
                                    "Protection Option is available on this "
                                    "system"),
                       NULL, 1, NULL, 0,
                       CTL_KERN, KERN_MEMORY_PROTECTION, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "login_name_max",
                       SYSCTL_DESCR("Maximum login name length"),
                       NULL, LOGIN_NAME_MAX, NULL, 0,
                       CTL_KERN, KERN_LOGIN_NAME_MAX, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_STRING, "defcorename",
                       SYSCTL_DESCR("Default core file name"),
                       sysctl_kern_defcorename, 0, defcorename, MAXPATHLEN,
                       CTL_KERN, KERN_DEFCORENAME, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "logsigexit",
                       SYSCTL_DESCR("Log process exit when caused by signals"),
                       NULL, 0, &kern_logsigexit, 0,
                       CTL_KERN, KERN_LOGSIGEXIT, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "fscale",
                       SYSCTL_DESCR("Kernel fixed-point scale factor"),
                       NULL, FSCALE, NULL, 0,
                       CTL_KERN, KERN_FSCALE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "ccpu",
                       SYSCTL_DESCR("Scheduler exponential decay value"),
                       NULL, 0, &ccpu, 0,
                       CTL_KERN, KERN_CCPU, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "cp_time",
                       SYSCTL_DESCR("Clock ticks spent in different CPU states"),
                       sysctl_kern_cptime, 0, NULL, 0,
                       CTL_KERN, KERN_CP_TIME, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "consdev",
                       SYSCTL_DESCR("Console device"),
                       sysctl_consdev, 0, NULL, sizeof(dev_t),
                       CTL_KERN, KERN_CONSDEV, CTL_EOL);
#if NPTY > 0
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "maxptys",
                       SYSCTL_DESCR("Maximum number of pseudo-ttys"),
                       sysctl_kern_maxptys, 0, NULL, 0,
                       CTL_KERN, KERN_MAXPTYS, CTL_EOL);
#endif /* NPTY > 0 */
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "maxphys",
                       SYSCTL_DESCR("Maximum raw I/O transfer size"),
                       NULL, MAXPHYS, NULL, 0,
                       CTL_KERN, KERN_MAXPHYS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "monotonic_clock",
                       SYSCTL_DESCR("Implementation version of the POSIX "
                                    "1003.1b Monotonic Clock Option"),
                       /* XXX _POSIX_VERSION */
                       NULL, _POSIX_MONOTONIC_CLOCK, NULL, 0,
                       CTL_KERN, KERN_MONOTONIC_CLOCK, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "labelsector",
                       SYSCTL_DESCR("Sector number containing the disklabel"),
                       NULL, LABELSECTOR, NULL, 0,
                       CTL_KERN, KERN_LABELSECTOR, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "labeloffset",
                       SYSCTL_DESCR("Offset of the disklabel within the "
                                    "sector"),
                       NULL, LABELOFFSET, NULL, 0,
                       CTL_KERN, KERN_LABELOFFSET, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "labelusesmbr",
                       SYSCTL_DESCR("disklabel is inside MBR partition"),
                       NULL, LABELUSESMBR, NULL, 0,
                       CTL_KERN, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "lwp",
                       SYSCTL_DESCR("System-wide LWP information"),
                       sysctl_kern_lwp, 0, NULL, 0,
                       CTL_KERN, KERN_LWP, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "forkfsleep",
                       SYSCTL_DESCR("Milliseconds to sleep on fork failure due "
                                    "to process limits"),
                       sysctl_kern_forkfsleep, 0, NULL, 0,
                       CTL_KERN, KERN_FORKFSLEEP, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "posix_threads",
                       SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
                                    "Threads option to which the system "
                                    "attempts to conform"),
                       /* XXX _POSIX_VERSION */
                       NULL, _POSIX_THREADS, NULL, 0,
                       CTL_KERN, KERN_POSIX_THREADS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "posix_semaphores",
                       SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
                                    "Semaphores option to which the system "
                                    "attempts to conform"), NULL,
                       200112, NULL, 0,
                       CTL_KERN, KERN_POSIX_SEMAPHORES, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "posix_barriers",
                       SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
                                    "Barriers option to which the system "
                                    "attempts to conform"),
                       /* XXX _POSIX_VERSION */
                       NULL, _POSIX_BARRIERS, NULL, 0,
                       CTL_KERN, KERN_POSIX_BARRIERS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "posix_timers",
                       SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
                                    "Timers option to which the system "
                                    "attempts to conform"),
                       /* XXX _POSIX_VERSION */
                       NULL, _POSIX_TIMERS, NULL, 0,
                       CTL_KERN, KERN_POSIX_TIMERS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "posix_spin_locks",
                       SYSCTL_DESCR("Version of IEEE Std 1003.1 and its Spin "
                                    "Locks option to which the system attempts "
                                    "to conform"),
                       /* XXX _POSIX_VERSION */
                       NULL, _POSIX_SPIN_LOCKS, NULL, 0,
                       CTL_KERN, KERN_POSIX_SPIN_LOCKS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                       CTLTYPE_INT, "posix_reader_writer_locks",
                       SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
                                    "Read-Write Locks option to which the "
                                    "system attempts to conform"),
                       /* XXX _POSIX_VERSION */
                       NULL, _POSIX_READER_WRITER_LOCKS, NULL, 0,
                       CTL_KERN, KERN_POSIX_READER_WRITER_LOCKS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "dump_on_panic",
                       SYSCTL_DESCR("Perform a crash dump on system panic"),
                       NULL, 0, &dumponpanic, 0,
                       CTL_KERN, KERN_DUMP_ON_PANIC, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "root_partition",
                       SYSCTL_DESCR("Root partition on the root device"),
                       sysctl_kern_root_partition, 0, NULL, 0,
                       CTL_KERN, KERN_ROOT_PARTITION, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "drivers",
                       SYSCTL_DESCR("List of all drivers with block and "
                                    "character device numbers"),
                       sysctl_kern_drivers, 0, NULL, 0,
                       CTL_KERN, KERN_DRIVERS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "cp_id",
                       SYSCTL_DESCR("Mapping of CPU number to CPU id"),
                       sysctl_kern_cpid, 0, NULL, 0,
                       CTL_KERN, KERN_CP_ID, CTL_EOL);
        sysctl_createv(clog, 0, NULL, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "coredump",
                       SYSCTL_DESCR("Coredump settings."),
                       NULL, 0, NULL, 0,
                       CTL_KERN, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "setid",
                       SYSCTL_DESCR("Set-id processes' coredump settings."),
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "dump",
                       SYSCTL_DESCR("Allow set-id processes to dump core."),
                       sysctl_security_setidcore, 0, &security_setidcore_dump,
                       sizeof(security_setidcore_dump),
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_STRING, "path",
                       SYSCTL_DESCR("Path pattern for set-id coredumps."),
                       sysctl_security_setidcorename, 0,
                       security_setidcore_path,
                       sizeof(security_setidcore_path),
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "owner",
                       SYSCTL_DESCR("Owner id for set-id processes' cores."),
                       sysctl_security_setidcore, 0, &security_setidcore_owner,
                       0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "group",
                       SYSCTL_DESCR("Group id for set-id processes' cores."),
                       sysctl_security_setidcore, 0, &security_setidcore_group,
                       0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "mode",
                       SYSCTL_DESCR("Mode for set-id processes' cores."),
                       sysctl_security_setidcore, 0, &security_setidcore_mode,
                       0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_IMMEDIATE|CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "no_sa_support",
                       SYSCTL_DESCR("0 if the kernel supports SA, otherwise "
                       "it doesn't"),
                       NULL, 1, NULL, 0,
                       CTL_KERN, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT,
                        CTLTYPE_STRING, "configname",
                        SYSCTL_DESCR("Name of config file"),
                        NULL, 0, __UNCONST(kernel_ident), 0,
                        CTL_KERN, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT,
                        CTLTYPE_STRING, "buildinfo",
                        SYSCTL_DESCR("Information from build environment"),
                        NULL, 0, __UNCONST(buildinfo), 0,
                        CTL_KERN, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                        CTLTYPE_INT, "messages",
                        SYSCTL_DESCR("Kernel message verbosity"),
                        sysctl_kern_messages, 0, NULL, 0,
                        CTL_KERN, CTL_CREATE, CTL_EOL);
}

SYSCTL_SETUP(sysctl_hw_misc_setup, "sysctl hw subtree misc setup")
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "usermem",
                       SYSCTL_DESCR("Bytes of non-kernel memory"),
                       sysctl_hw_usermem, 0, NULL, 0,
                       CTL_HW, HW_USERMEM, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_HEX,
                       CTLTYPE_STRING, "cnmagic",
                       SYSCTL_DESCR("Console magic key sequence"),
                       sysctl_hw_cnmagic, 0, NULL, CNS_LEN,
                       CTL_HW, HW_CNMAGIC, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_QUAD, "usermem64",
                       SYSCTL_DESCR("Bytes of non-kernel memory"),
                       sysctl_hw_usermem, 0, NULL, 0,
                       CTL_HW, HW_USERMEM64, CTL_EOL);
}

#ifdef DEBUG
/*
 * Debugging related system variables.
 */
struct ctldebug /* debug0, */ /* debug1, */ debug2, debug3, debug4;
struct ctldebug debug5, debug6, debug7, debug8, debug9;
struct ctldebug debug10, debug11, debug12, debug13, debug14;
struct ctldebug debug15, debug16, debug17, debug18, debug19;
static struct ctldebug *debugvars[] = {
        &debug0, &debug1, &debug2, &debug3, &debug4,
        &debug5, &debug6, &debug7, &debug8, &debug9,
        &debug10, &debug11, &debug12, &debug13, &debug14,
        &debug15, &debug16, &debug17, &debug18, &debug19,
};

/*
 * this setup routine is a replacement for debug_sysctl()
 *
 * note that it creates several nodes per defined debug variable
 */
SYSCTL_SETUP(sysctl_debug_setup, "sysctl debug subtree setup")
{
        struct ctldebug *cdp;
        char nodename[20];
        int i;

        /*
         * two ways here:
         *
         * the "old" way (debug.name -> value) which was emulated by
         * the sysctl(8) binary
         *
         * the new way, which the sysctl(8) binary was actually using

         node        debug
         node        debug.0
         string debug.0.name
         int        debug.0.value
         int        debug.name

         */

        for (i = 0; i < __arraycount(debugvars); i++) {
                cdp = debugvars[i];
                if (cdp->debugname == NULL || cdp->debugvar == NULL)
                        continue;

                snprintf(nodename, sizeof(nodename), "debug%d", i);
                sysctl_createv(clog, 0, NULL, NULL,
                               CTLFLAG_PERMANENT|CTLFLAG_HIDDEN,
                               CTLTYPE_NODE, nodename, NULL,
                               NULL, 0, NULL, 0,
                               CTL_DEBUG, i, CTL_EOL);
                sysctl_createv(clog, 0, NULL, NULL,
                               CTLFLAG_PERMANENT|CTLFLAG_HIDDEN,
                               CTLTYPE_STRING, "name", NULL,
                               /*XXXUNCONST*/
                               NULL, 0, __UNCONST(cdp->debugname), 0,
                               CTL_DEBUG, i, CTL_DEBUG_NAME, CTL_EOL);
                sysctl_createv(clog, 0, NULL, NULL,
                               CTLFLAG_PERMANENT|CTLFLAG_HIDDEN,
                               CTLTYPE_INT, "value", NULL,
                               NULL, 0, cdp->debugvar, 0,
                               CTL_DEBUG, i, CTL_DEBUG_VALUE, CTL_EOL);
                sysctl_createv(clog, 0, NULL, NULL,
                               CTLFLAG_PERMANENT,
                               CTLTYPE_INT, cdp->debugname, NULL,
                               NULL, 0, cdp->debugvar, 0,
                               CTL_DEBUG, CTL_CREATE, CTL_EOL);
        }
}
#endif /* DEBUG */

/*
 * ********************************************************************
 * section 2: private node-specific helper routines.
 * ********************************************************************
 */

/*
 * sysctl helper routine for kern.maxvnodes.  Drain vnodes if
 * new value is lower than desiredvnodes and then calls reinit
 * routines that needs to adjust to the new value.
 */
static int
sysctl_kern_maxvnodes(SYSCTLFN_ARGS)
{
        int error, new_vnodes, old_vnodes, new_max;
        struct sysctlnode node;

        new_vnodes = desiredvnodes;
        node = *rnode;
        node.sysctl_data = &new_vnodes;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return (error);

        /*
         * sysctl passes down unsigned values, require them
         * to be positive
         */
        if (new_vnodes <= 0)
                return (EINVAL);

        /* Limits: 75% of kmem and physical memory. */
        new_max = calc_cache_size(vmem_size(kmem_arena, VMEM_FREE|VMEM_ALLOC),
            75, 75) / VNODE_COST;
        if (new_vnodes > new_max)
                new_vnodes = new_max;

        old_vnodes = desiredvnodes;
        desiredvnodes = new_vnodes;
        error = vfs_drainvnodes();
        if (error) {
                desiredvnodes = old_vnodes;
                return (error);
        }
        vfs_reinit();

        return (0);
}

/*
 * sysctl helper routine for kern.messages.
 * Alters boothowto to display kernel messages in increasing verbosity
 * from 0 to 4.
 */

#define MAXMESSAGES            4
static int
sysctl_kern_messages(SYSCTLFN_ARGS)
{
        int error, messageverbose, messagemask, newboothowto;
        struct sysctlnode node;

        messagemask = (AB_NORMAL|AB_QUIET|AB_SILENT|AB_VERBOSE|AB_DEBUG);
        switch (boothowto & messagemask) {
        case AB_SILENT:
                messageverbose = 0;
                break;
        case AB_QUIET:
                messageverbose = 1;
                break;
        case AB_VERBOSE:
                messageverbose = 3;
                break;
        case AB_DEBUG:
                messageverbose = 4;
                break;
        case AB_NORMAL:
        default:
                messageverbose = 2;
        }

        node = *rnode;
        node.sysctl_data = &messageverbose;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return (error);
        if (messageverbose < 0 || messageverbose > MAXMESSAGES)
                return EINVAL;

        /* Set boothowto */
        newboothowto = boothowto & ~messagemask;

        switch (messageverbose) {
        case 0:
                newboothowto |= AB_SILENT;
                break;
        case 1:
                newboothowto |= AB_QUIET;
                break;
        case 3:
                newboothowto |= AB_VERBOSE;
                break;
        case 4:
                newboothowto |= AB_DEBUG;
                break;
        case 2:
        default:                /* Messages default to normal. */
                break;
        }

        boothowto = newboothowto;

        return (0);
}

/*
 * sysctl helper routine for the kern.boottime node
 */
static int
sysctl_kern_boottime(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        struct timespec ts;

        getnanoboottime(&ts);
        node = *rnode;
        node.sysctl_data = &ts;
        return (sysctl_lookup(SYSCTLFN_CALL(&node)));
}

/*
 * sysctl helper routine for rtc_offset - set time after changes
 */
static int
sysctl_kern_rtc_offset(SYSCTLFN_ARGS)
{
        struct timespec ts, delta;
        int error, new_rtc_offset;
        struct sysctlnode node;

        new_rtc_offset = rtc_offset;
        node = *rnode;
        node.sysctl_data = &new_rtc_offset;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return (error);

        if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_TIME,
            KAUTH_REQ_SYSTEM_TIME_RTCOFFSET,
            KAUTH_ARG(new_rtc_offset), NULL, NULL))
                return (EPERM);
        if (rtc_offset == new_rtc_offset)
                return (0);

        /* if we change the offset, adjust the time */
        nanotime(&ts);
        delta.tv_sec = 60 * (new_rtc_offset - rtc_offset);
        delta.tv_nsec = 0;
        timespecadd(&ts, &delta, &ts);
        rtc_offset = new_rtc_offset;
        return (settime(l->l_proc, &ts));
}

/*
 * sysctl helper routine for kern.maxproc. Ensures that the new
 * values are not too low or too high.
 */
static int
sysctl_kern_maxproc(SYSCTLFN_ARGS)
{
        int error, nmaxproc;
        struct sysctlnode node;

        nmaxproc = maxproc;
        node = *rnode;
        node.sysctl_data = &nmaxproc;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return (error);

        if (nmaxproc < 0 || nmaxproc >= PID_MAX)
                return (EINVAL);
#ifdef __HAVE_CPU_MAXPROC
        if (nmaxproc > cpu_maxproc())
                return (EINVAL);
#endif
        error = 0;
#ifdef __HAVE_MAXPROC_HOOK
        error = cpu_maxproc_hook(nmaxproc);
#endif
        if (error)
                return error;

        maxproc = nmaxproc;

        return (0);
}

/*
 * sysctl helper function for kern.hostid. The hostid is a long, but
 * we export it as an int, so we need to give it a little help.
 */
static int
sysctl_kern_hostid(SYSCTLFN_ARGS)
{
        int error, inthostid;
        struct sysctlnode node;

        inthostid = hostid;  /* XXX assumes sizeof int <= sizeof long */
        node = *rnode;
        node.sysctl_data = &inthostid;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return (error);

        hostid = (unsigned)inthostid;

        return (0);
}

/*
 * sysctl helper routine for kern.defcorename. In the case of a new
 * string being assigned, check that it's not a zero-length string.
 * (XXX the check in -current doesn't work, but do we really care?)
 */
static int
sysctl_kern_defcorename(SYSCTLFN_ARGS)
{
        int error;
        char *newcorename;
        struct sysctlnode node;

        newcorename = PNBUF_GET();
        node = *rnode;
        node.sysctl_data = &newcorename[0];
        memcpy(node.sysctl_data, rnode->sysctl_data, MAXPATHLEN);
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL) {
                goto done;
        }

        /*
         * when sysctl_lookup() deals with a string, it's guaranteed
         * to come back nul terminated. So there.  :)
         */
        if (strlen(newcorename) == 0) {
                error = EINVAL;
        } else {
                memcpy(rnode->sysctl_data, node.sysctl_data, MAXPATHLEN);
                error = 0;
        }
done:
        PNBUF_PUT(newcorename);
        return error;
}

/*
 * sysctl helper routine for kern.cp_time node. Adds up cpu time
 * across all cpus.
 */
static int
sysctl_kern_cptime(SYSCTLFN_ARGS)
{
        struct sysctlnode node = *rnode;
        uint64_t *cp_time = NULL;
        int error, n = ncpu, i;
        struct cpu_info *ci;
        CPU_INFO_ITERATOR cii;

        /*
         * if you specifically pass a buffer that is the size of the
         * sum, or if you are probing for the size, you get the "sum"
         * of cp_time (and the size thereof) across all processors.
         *
         * alternately, you can pass an additional mib number and get
         * cp_time for that particular processor.
         */
        switch (namelen) {
        case 0:
                if (*oldlenp == sizeof(uint64_t) * CPUSTATES || oldp == NULL) {
                        node.sysctl_size = sizeof(uint64_t) * CPUSTATES;
                        n = -1; /* SUM */
                }
                else {
                        node.sysctl_size = n * sizeof(uint64_t) * CPUSTATES;
                        n = -2; /* ALL */
                }
                break;
        case 1:
                if (name[0] < 0 || name[0] >= n)
                        return (ENOENT); /* ENOSUCHPROCESSOR */
                node.sysctl_size = sizeof(uint64_t) * CPUSTATES;
                n = name[0];
                /*
                 * adjust these so that sysctl_lookup() will be happy
                 */
                name++;
                namelen--;
                break;
        default:
                return (EINVAL);
        }

        cp_time = kmem_alloc(node.sysctl_size, KM_SLEEP);
        node.sysctl_data = cp_time;
        memset(cp_time, 0, node.sysctl_size);

        for (CPU_INFO_FOREACH(cii, ci)) {
                if (n <= 0) {
                        for (i = 0; i < CPUSTATES; i++) {
                                cp_time[i] += ci->ci_schedstate.spc_cp_time[i];
                        }
                }
                /*
                 * if a specific processor was requested and we just
                 * did it, we're done here
                 */
                if (n == 0)
                        break;
                /*
                 * if doing "all", skip to next cp_time set for next processor
                 */
                if (n == -2)
                        cp_time += CPUSTATES;
                /*
                 * if we're doing a specific processor, we're one
                 * processor closer
                 */
                if (n > 0)
                        n--;
        }

        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        kmem_free(node.sysctl_data, node.sysctl_size);
        return (error);
}

#if NPTY > 0
/*
 * sysctl helper routine for kern.maxptys. Ensures that any new value
 * is acceptable to the pty subsystem.
 */
static int
sysctl_kern_maxptys(SYSCTLFN_ARGS)
{
        int pty_maxptys(int, int);                /* defined in kern/tty_pty.c */
        int error, xmax;
        struct sysctlnode node;

        /* get current value of maxptys */
        xmax = pty_maxptys(0, 0);

        node = *rnode;
        node.sysctl_data = &xmax;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return (error);

        if (xmax != pty_maxptys(xmax, 1))
                return (EINVAL);

        return (0);
}
#endif /* NPTY > 0 */

/*
 * sysctl helper routine to do kern.lwp.* work.
 */
static int
sysctl_kern_lwp(SYSCTLFN_ARGS)
{
        struct kinfo_lwp klwp;
        struct proc *p;
        struct lwp *l2, *l3;
        char *where, *dp;
        int pid, elem_size, elem_count;
        int buflen, needed, error;
        bool gotit;

        if (namelen == 1 && name[0] == CTL_QUERY)
                return (sysctl_query(SYSCTLFN_CALL(rnode)));

        dp = where = oldp;
        buflen = where != NULL ? *oldlenp : 0;
        error = needed = 0;

        if (newp != NULL || namelen != 3)
                return (EINVAL);
        pid = name[0];
        elem_size = name[1];
        elem_count = name[2];

        sysctl_unlock();
        if (pid == -1) {
                mutex_enter(&proc_lock);
                PROCLIST_FOREACH(p, &allproc) {
                        /* Grab a hold on the process. */
                        if (!rw_tryenter(&p->p_reflock, RW_READER)) {
                                continue;
                        }
                        mutex_exit(&proc_lock);

                        mutex_enter(p->p_lock);
                        LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
                                if (buflen >= elem_size && elem_count > 0) {
                                        lwp_lock(l2);
                                        fill_lwp(l2, &klwp);
                                        lwp_unlock(l2);
                                        mutex_exit(p->p_lock);

                                        /*
                                         * Copy out elem_size, but not
                                         * larger than the size of a
                                         * struct kinfo_proc2.
                                         */
                                        error = dcopyout(l, &klwp, dp,
                                            uimin(sizeof(klwp), elem_size));
                                        if (error) {
                                                rw_exit(&p->p_reflock);
                                                goto cleanup;
                                        }
                                        mutex_enter(p->p_lock);
                                        LIST_FOREACH(l3, &p->p_lwps,
                                            l_sibling) {
                                                if (l2 == l3)
                                                        break;
                                        }
                                        if (l3 == NULL) {
                                                mutex_exit(p->p_lock);
                                                rw_exit(&p->p_reflock);
                                                error = EAGAIN;
                                                goto cleanup;
                                        }
                                        dp += elem_size;
                                        buflen -= elem_size;
                                        elem_count--;
                                }
                                needed += elem_size;
                        }
                        mutex_exit(p->p_lock);

                        /* Drop reference to process. */
                        mutex_enter(&proc_lock);
                        rw_exit(&p->p_reflock);
                }
                mutex_exit(&proc_lock);
        } else {
                mutex_enter(&proc_lock);
                p = proc_find(pid);
                if (p == NULL) {
                        error = ESRCH;
                        mutex_exit(&proc_lock);
                        goto cleanup;
                }
                /* Grab a hold on the process. */
                gotit = rw_tryenter(&p->p_reflock, RW_READER);
                mutex_exit(&proc_lock);
                if (!gotit) {
                        error = ESRCH;
                        goto cleanup;
                }

                mutex_enter(p->p_lock);
                LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
                        if (buflen >= elem_size && elem_count > 0) {
                                lwp_lock(l2);
                                fill_lwp(l2, &klwp);
                                lwp_unlock(l2);
                                mutex_exit(p->p_lock);
                                /*
                                 * Copy out elem_size, but not larger than
                                 * the size of a struct kinfo_proc2.
                                 */
                                error = dcopyout(l, &klwp, dp,
                                    uimin(sizeof(klwp), elem_size));
                                if (error) {
                                        rw_exit(&p->p_reflock);
                                        goto cleanup;
                                }
                                mutex_enter(p->p_lock);
                                LIST_FOREACH(l3, &p->p_lwps, l_sibling) {
                                        if (l2 == l3)
                                                break;
                                }
                                if (l3 == NULL) {
                                        mutex_exit(p->p_lock);
                                        rw_exit(&p->p_reflock);
                                        error = EAGAIN;
                                        goto cleanup;
                                }
                                dp += elem_size;
                                buflen -= elem_size;
                                elem_count--;
                        }
                        needed += elem_size;
                }
                mutex_exit(p->p_lock);

                /* Drop reference to process. */
                rw_exit(&p->p_reflock);
        }

        if (where != NULL) {
                *oldlenp = dp - where;
                if (needed > *oldlenp) {
                        sysctl_relock();
                        return (ENOMEM);
                }
        } else {
                needed += KERN_LWPSLOP;
                *oldlenp = needed;
        }
        error = 0;
 cleanup:
        sysctl_relock();
        return (error);
}

/*
 * sysctl helper routine for kern.forkfsleep node. Ensures that the
 * given value is not too large or two small, and is at least one
 * timer tick if not zero.
 */
static int
sysctl_kern_forkfsleep(SYSCTLFN_ARGS)
{
        /* userland sees value in ms, internally is in ticks */
        extern int forkfsleep;                /* defined in kern/kern_fork.c */
        int error, timo, lsleep;
        struct sysctlnode node;

        lsleep = forkfsleep * 1000 / hz;
        node = *rnode;
        node.sysctl_data = &lsleep;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return (error);

        /* refuse negative values, and overly 'long time' */
        if (lsleep < 0 || lsleep > MAXSLP * 1000)
                return (EINVAL);

        timo = mstohz(lsleep);

        /* if the interval is >0 ms && <1 tick, use 1 tick */
        if (lsleep != 0 && timo == 0)
                forkfsleep = 1;
        else
                forkfsleep = timo;

        return (0);
}

/*
 * sysctl helper routine for kern.root_partition
 */
static int
sysctl_kern_root_partition(SYSCTLFN_ARGS)
{
        int rootpart = DISKPART(rootdev);
        struct sysctlnode node = *rnode;

        node.sysctl_data = &rootpart;
        return (sysctl_lookup(SYSCTLFN_CALL(&node)));
}

/*
 * sysctl helper function for kern.drivers
 */
static int
sysctl_kern_drivers(SYSCTLFN_ARGS)
{
        int error;
        size_t buflen;
        struct kinfo_drivers kd;
        char *start, *where;
        const char *dname;
        int i;
        extern struct devsw_conv *devsw_conv;
        extern int max_devsw_convs;

        start = where = oldp;
        buflen = *oldlenp;
        if (where == NULL) {
                *oldlenp = max_devsw_convs * sizeof kd;
                return 0;
        }

        /*
         * An array of kinfo_drivers structures
         */
        error = 0;
        sysctl_unlock();
        mutex_enter(&device_lock);
        for (i = 0; i < max_devsw_convs; i++) {
                dname = devsw_conv[i].d_name;
                if (dname == NULL)
                        continue;
                if (buflen < sizeof kd) {
                        error = ENOMEM;
                        break;
                }
                memset(&kd, 0, sizeof(kd));
                kd.d_bmajor = devsw_conv[i].d_bmajor;
                kd.d_cmajor = devsw_conv[i].d_cmajor;
                strlcpy(kd.d_name, dname, sizeof kd.d_name);
                mutex_exit(&device_lock);
                error = dcopyout(l, &kd, where, sizeof kd);
                mutex_enter(&device_lock);
                if (error != 0)
                        break;
                buflen -= sizeof kd;
                where += sizeof kd;
        }
        mutex_exit(&device_lock);
        sysctl_relock();
        *oldlenp = where - start;
        return error;
}

static int
sysctl_security_setidcore(SYSCTLFN_ARGS)
{
        int newsize, error;
        struct sysctlnode node;

        node = *rnode;
        node.sysctl_data = &newsize;
        newsize = *(int *)rnode->sysctl_data;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_SETIDCORE,
            0, NULL, NULL, NULL))
                return (EPERM);

        *(int *)rnode->sysctl_data = newsize;

        return 0;
}

static int
sysctl_security_setidcorename(SYSCTLFN_ARGS)
{
        int error;
        char *newsetidcorename;
        struct sysctlnode node;

        newsetidcorename = PNBUF_GET();
        node = *rnode;
        node.sysctl_data = newsetidcorename;
        memcpy(node.sysctl_data, rnode->sysctl_data, MAXPATHLEN);
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL) {
                goto out;
        }
        if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_SETIDCORE,
            0, NULL, NULL, NULL)) {
                error = EPERM;
                goto out;
        }
        if (strlen(newsetidcorename) == 0) {
                error = EINVAL;
                goto out;
        }
        memcpy(rnode->sysctl_data, node.sysctl_data, MAXPATHLEN);
out:
        PNBUF_PUT(newsetidcorename);
        return error;
}

/*
 * sysctl helper routine for kern.cp_id node. Maps cpus to their
 * cpuids.
 */
static int
sysctl_kern_cpid(SYSCTLFN_ARGS)
{
        struct sysctlnode node = *rnode;
        uint64_t *cp_id = NULL;
        int error, n = ncpu;
        struct cpu_info *ci;
        CPU_INFO_ITERATOR cii;

        /*
         * Here you may either retrieve a single cpu id or the whole
         * set. The size you get back when probing depends on what
         * you ask for.
         */
        switch (namelen) {
        case 0:
                node.sysctl_size = n * sizeof(uint64_t);
                n = -2; /* ALL */
                break;
        case 1:
                if (name[0] < 0 || name[0] >= n)
                        return (ENOENT); /* ENOSUCHPROCESSOR */
                node.sysctl_size = sizeof(uint64_t);
                n = name[0];
                /*
                 * adjust these so that sysctl_lookup() will be happy
                 */
                name++;
                namelen--;
                break;
        default:
                return (EINVAL);
        }

        cp_id = kmem_alloc(node.sysctl_size, KM_SLEEP);
        node.sysctl_data = cp_id;
        memset(cp_id, 0, node.sysctl_size);

        for (CPU_INFO_FOREACH(cii, ci)) {
                if (n <= 0)
                        cp_id[0] = cpu_index(ci);
                /*
                 * if a specific processor was requested and we just
                 * did it, we're done here
                 */
                if (n == 0)
                        break;
                /*
                 * if doing "all", skip to next cp_id slot for next processor
                 */
                if (n == -2)
                        cp_id++;
                /*
                 * if we're doing a specific processor, we're one
                 * processor closer
                 */
                if (n > 0)
                        n--;
        }

        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        kmem_free(node.sysctl_data, node.sysctl_size);
        return (error);
}

/*
 * sysctl helper routine for hw.usermem and hw.usermem64. Values are
 * calculate on the fly taking into account integer overflow and the
 * current wired count.
 */
static int
sysctl_hw_usermem(SYSCTLFN_ARGS)
{
        u_int ui;
        u_quad_t uq;
        struct sysctlnode node;

        node = *rnode;
        switch (rnode->sysctl_num) {
        case HW_USERMEM:
                if ((ui = physmem - uvmexp.wired) > (UINT_MAX / PAGE_SIZE))
                        ui = UINT_MAX;
                else
                        ui *= PAGE_SIZE;
                node.sysctl_data = &ui;
                break;
        case HW_USERMEM64:
                uq = (u_quad_t)(physmem - uvmexp.wired) * PAGE_SIZE;
                node.sysctl_data = &uq;
                break;
        default:
                return (EINVAL);
        }

        return (sysctl_lookup(SYSCTLFN_CALL(&node)));
}

/*
 * sysctl helper routine for kern.cnmagic node. Pulls the old value
 * out, encoded, and stuffs the new value in for decoding.
 */
static int
sysctl_hw_cnmagic(SYSCTLFN_ARGS)
{
        char magic[CNS_LEN];
        int error;
        struct sysctlnode node;

        if (oldp)
                cn_get_magic(magic, CNS_LEN);
        node = *rnode;
        node.sysctl_data = &magic[0];
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return (error);

        return (cn_set_magic(magic));
}

/*
 * ********************************************************************
 * section 3: public helper routines that are used for more than one
 * node
 * ********************************************************************
 */

/*
 * sysctl helper routine for the kern.root_device node and some ports'
 * machdep.root_device nodes.
 */
int
sysctl_root_device(SYSCTLFN_ARGS)
{
        struct sysctlnode node;

        node = *rnode;
        node.sysctl_data = __UNCONST(device_xname(root_device));
        node.sysctl_size = strlen(device_xname(root_device)) + 1;
        return (sysctl_lookup(SYSCTLFN_CALL(&node)));
}

/*
 * sysctl helper routine for kern.consdev, dependent on the current
 * state of the console. Also used for machdep.console_device on some
 * ports.
 */
int
sysctl_consdev(SYSCTLFN_ARGS)
{
        dev_t consdev;
        uint32_t oconsdev;
        struct sysctlnode node;

        if (cn_tab != NULL)
                consdev = cn_tab->cn_dev;
        else
                consdev = NODEV;
        node = *rnode;
        switch (*oldlenp) {
        case sizeof(consdev):
                node.sysctl_data = &consdev;
                node.sysctl_size = sizeof(consdev);
                break;
        case sizeof(oconsdev):
                oconsdev = (uint32_t)consdev;
                node.sysctl_data = &oconsdev;
                node.sysctl_size = sizeof(oconsdev);
                break;
        default:
                return EINVAL;
        }
        return (sysctl_lookup(SYSCTLFN_CALL(&node)));
}

/*
 * ********************************************************************
 * section 4: support for some helpers
 * ********************************************************************
 */


/*
 * Fill in a kinfo_lwp structure for the specified lwp.
 */
static void
fill_lwp(struct lwp *l, struct kinfo_lwp *kl)
{
        const bool allowaddr = get_expose_address(curproc);
        struct proc *p = l->l_proc;
        struct timeval tv;

        KASSERT(lwp_locked(l, NULL));

        memset(kl, 0, sizeof(*kl));

        kl->l_forw = 0;
        kl->l_back = 0;
        COND_SET_VALUE(kl->l_laddr, PTRTOUINT64(l), allowaddr);
        COND_SET_VALUE(kl->l_addr, PTRTOUINT64(l->l_addr), allowaddr);
        kl->l_stat = l->l_stat;
        kl->l_lid = l->l_lid;
        kl->l_flag = L_INMEM;
        kl->l_flag |= sysctl_map_flags(sysctl_lwpprflagmap, l->l_prflag);
        kl->l_flag |= sysctl_map_flags(sysctl_lwpflagmap, l->l_flag);

        kl->l_swtime = l->l_swtime;
        kl->l_slptime = l->l_slptime;
        if (l->l_stat == LSONPROC)
                kl->l_schedflags = l->l_cpu->ci_schedstate.spc_flags;
        else
                kl->l_schedflags = 0;
        kl->l_priority = lwp_eprio(l);
        kl->l_usrpri = l->l_priority;
        if (l->l_wchan)
                strncpy(kl->l_wmesg, l->l_wmesg, sizeof(kl->l_wmesg));
        COND_SET_VALUE(kl->l_wchan, PTRTOUINT64(l->l_wchan), allowaddr);
        kl->l_cpuid = cpu_index(l->l_cpu);
        bintime2timeval(&l->l_rtime, &tv);
        kl->l_rtime_sec = tv.tv_sec;
        kl->l_rtime_usec = tv.tv_usec;
        kl->l_cpticks = l->l_cpticks;
        kl->l_pctcpu = l->l_pctcpu;
        kl->l_pid = p->p_pid;
        if (l->l_name == NULL)
                kl->l_name[0] = '\0';
        else
                strlcpy(kl->l_name, l->l_name, sizeof(kl->l_name));
}





























































































   31 












   26 


   29 

   25 

    3 



   20 


    3 








   17 
    1 



   16 





   21 


   16 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/*        $NetBSD: kern_ktrace_vfs.c,v 1.3 2021/06/29 22:40:53 dholland Exp $        */

/*-
 * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_ktrace.c        8.5 (Berkeley) 5/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_ktrace_vfs.c,v 1.3 2021/06/29 22:40:53 dholland Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/namei.h>
#include <sys/vnode.h>
#include <sys/kernel.h>
#include <sys/ktrace.h>
#include <sys/kauth.h>

#include <sys/mount.h>
#include <sys/syscallargs.h>

/*
 * ktrace system call, the part of the ktrace framework that
 * explicitly interacts with VFS
 */
/* ARGSUSED */
int
sys_ktrace(struct lwp *l, const struct sys_ktrace_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) fname;
                syscallarg(int) ops;
                syscallarg(int) facs;
                syscallarg(int) pid;
        } */
        struct vnode *vp = NULL;
        file_t *fp = NULL;
        struct pathbuf *pb;
        int error = 0;
        int fd;

        if (ktrenter(l))
                return EAGAIN;

        if (KTROP(SCARG(uap, ops)) != KTROP_CLEAR) {
                /*
                 * an operation which requires a file argument.
                 */
                error = pathbuf_copyin(SCARG(uap, fname), &pb);
                if (error) {
                        ktrexit(l);
                        return (error);
                }
                error = vn_open(NULL, pb, 0, FREAD|FWRITE, 0, &vp, NULL, NULL);
                if (error != 0) {
                        pathbuf_destroy(pb);
                        ktrexit(l);
                        return (error);
                }
                pathbuf_destroy(pb);
                VOP_UNLOCK(vp);
                if (vp->v_type != VREG) {
                        vn_close(vp, FREAD|FWRITE, l->l_cred);
                        ktrexit(l);
                        return (EACCES);
                }
                /*
                 * This uses up a file descriptor slot in the
                 * tracing process for the duration of this syscall.
                 * This is not expected to be a problem.
                 */
                if ((error = fd_allocfile(&fp, &fd)) != 0) {
                        vn_close(vp, FWRITE, l->l_cred);
                        ktrexit(l);
                        return error;
                }
                fp->f_flag = FWRITE;
                fp->f_type = DTYPE_VNODE;
                fp->f_ops = &vnops;
                fp->f_vnode = vp;
                vp = NULL;
        }
        error = ktrace_common(l, SCARG(uap, ops), SCARG(uap, facs),
            SCARG(uap, pid), &fp);
        if (KTROP(SCARG(uap, ops)) != KTROP_CLEAR)
                fd_abort(curproc, fp, fd);
        return (error);
}









































































































































































































































































   83 

  514 




   77 


   17 


  514 




   64 
    6 
   61 

   62 




    4 

    4 

    4 








   59 












   78 




    5 
   76 






































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
/*        $NetBSD: sysv_ipc.c,v 1.42 2022/03/27 16:23:08 christos Exp $        */

/*-
 * Copyright (c) 1998, 2007 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sysv_ipc.c,v 1.42 2022/03/27 16:23:08 christos Exp $");

#ifdef _KERNEL_OPT
#include "opt_sysv.h"
#include "opt_sysvparam.h"
#include "opt_compat_netbsd.h"
#endif

#include <sys/syscall.h>
#include <sys/syscallargs.h>
#include <sys/syscallvar.h>
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/ipc.h>
#ifdef SYSVMSG
#include <sys/msg.h>
#endif
#ifdef SYSVSEM
#include <sys/sem.h>
#endif
#ifdef SYSVSHM
#include <sys/shm.h>
#endif
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/module.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/kauth.h>
#include <sys/compat_stub.h>

#include <compat/common/compat_sysv_mod.h>        /* for sysctl routine vector */

/*
 * Values in support of System V compatible shared memory.        XXX
 * (originally located in sys/conf/param.c)
 */
#ifdef SYSVSHM
#if !defined(SHMMAX) && defined(SHMMAXPGS)
#define        SHMMAX        SHMMAXPGS        /* shminit() performs a `*= PAGE_SIZE' */
#elif !defined(SHMMAX)
#define SHMMAX 0
#endif
#ifndef        SHMMIN
#define        SHMMIN        1
#endif
#ifndef        SHMMNI
#define        SHMMNI        128                /* <64k, see IPCID_TO_IX in ipc.h */
#endif
#ifndef        SHMSEG
#define        SHMSEG        128
#endif

struct        shminfo shminfo = {
        SHMMAX,
        SHMMIN,
        SHMMNI,
        SHMSEG,
        0
};
#endif

/*
 * Values in support of System V compatible semaphores.
 */
#ifdef SYSVSEM
struct        seminfo seminfo = {
        SEMMAP,                /* # of entries in semaphore map */
        SEMMNI,                /* # of semaphore identifiers */
        SEMMNS,                /* # of semaphores in system */
        SEMMNU,                /* # of undo structures in system */
        SEMMSL,                /* max # of semaphores per id */
        SEMOPM,                /* max # of operations per semop call */
        SEMUME,                /* max # of undo entries per process */
        SEMUSZ,                /* size in bytes of undo structure */
        SEMVMX,                /* semaphore maximum value */
        SEMAEM                /* adjust on exit max value */
};
#endif

/*
 * Values in support of System V compatible messages.
 */
#ifdef SYSVMSG
struct        msginfo msginfo = {
        MSGMAX,                /* max chars in a message */
        MSGMNI,                /* # of message queue identifiers */
        MSGMNB,                /* max chars in a queue */
        MSGTQL,                /* max messages in system */
        MSGSSZ,                /* size of a message segment */
                        /* (must be small power of 2 greater than 4) */
        MSGSEG                /* number of message segments */
};
#endif

MODULE(MODULE_CLASS_EXEC, sysv_ipc, NULL);
 
SYSCTL_SETUP_PROTO(sysctl_ipc_setup);

static const struct syscall_package sysvipc_syscalls[] = {
#if defined(SYSVSHM)
        { SYS___shmctl50, 0, (sy_call_t *)sys___shmctl50 },
        { SYS_shmat, 0, (sy_call_t *)sys_shmat },
        { SYS_shmdt, 0, (sy_call_t *)sys_shmdt },
        { SYS_shmget, 0, (sy_call_t *)sys_shmget },
#endif        /* SYSVSHM */

#if defined(SYSVSEM)
        { SYS_____semctl50, 0, (sy_call_t *)sys_____semctl50 },
        { SYS_semget, 0, (sy_call_t *)sys_semget },
        { SYS_semop, 0, (sy_call_t *)sys_semop },
        { SYS_semconfig, 0, (sy_call_t *)sys_semconfig },
#endif        /* SYSVSEM */

#if defined(SYSVMSG)
        { SYS___msgctl50, 0, (sy_call_t *)sys___msgctl50 },
        { SYS_msgget, 0, (sy_call_t *)sys_msgget },
        { SYS_msgsnd, 0, (sy_call_t *)sys_msgsnd },
        { SYS_msgrcv, 0, (sy_call_t *)sys_msgrcv },
#endif        /* SYSVMSG */
        { 0, 0, NULL }
};

static int
sysv_ipc_modcmd(modcmd_t cmd, void *arg)
{
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
                /* Set up the kauth listener */
                sysvipcinit();

                /* Link the system calls */
                error = syscall_establish(NULL, sysvipc_syscalls);
                if (error) {
                        sysvipcfini();
                        return error;
                }

                /*
                 * Initialize each sub-component, including their
                 * sysctl data
                 */
#ifdef SYSVSHM
                error = shminit();
                if (error != 0)
                        return error;
#endif
#ifdef SYSVSEM
                error = seminit();
                if (error != 0) {
#ifdef SYSVSHM
                        shmfini();
#endif
                        return error;
                }
#endif
#ifdef SYSVMSG
                error = msginit();
                if (error != 0) {
#ifdef SYSVSEM
                        semfini();
#endif
#ifdef SYSVSHM
                        shmfini();
#endif
                        return error;
                }
#endif
                break;
        case MODULE_CMD_FINI:
                /*
                 * Make sure no subcomponents are active.  Each one
                 * tells us if it is busy, and if it was _not_ busy,
                 * we assume it has already done its own clean-up.
                 * So we might need to re-init any components that
                 * are successfully fini'd if we find one that is 
                 * still busy.
                 */
#ifdef SYSVSHM
                if (shmfini()) {
                        return EBUSY;
                }
#endif
#ifdef SYSVSEM
                if (semfini()) {
#ifdef SYSVSHM
                        shminit();
#endif
                        return EBUSY;
                }
#endif
#ifdef SYSVMSG
                if (msgfini()) {
#ifdef SYSVSEM
                        seminit();
#endif
#ifdef SYSVSHM
                        shminit();
#endif
                        return EBUSY;
                }
#endif
                /* Unlink the system calls. */
                error = syscall_disestablish(NULL, sysvipc_syscalls);
                if (error)
                        return error;

                /* Remove the kauth listener */
                sysvipcfini();
                break;
        default:
                return ENOTTY;
        }
        return error;
}

static kauth_listener_t sysvipc_listener = NULL;

static int
sysvipc_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        mode_t mask;
        struct ipc_perm *perm;
        int mode;
        enum kauth_system_req req;

        req = (enum kauth_system_req)(uintptr_t)arg0;

        if (!(action == KAUTH_SYSTEM_SYSVIPC &&
              req == KAUTH_REQ_SYSTEM_SYSVIPC_BYPASS))
                return KAUTH_RESULT_DEFER;

        perm = arg1;
        mode = (int)(uintptr_t)arg2;

        if (mode == IPC_M) {
                if (kauth_cred_geteuid(cred) == perm->uid ||
                    kauth_cred_geteuid(cred) == perm->cuid)
                        return (KAUTH_RESULT_ALLOW);
                return (KAUTH_RESULT_DEFER); /* EPERM */
        }

        mask = 0;

        if (kauth_cred_geteuid(cred) == perm->uid ||
            kauth_cred_geteuid(cred) == perm->cuid) {
                if (mode & IPC_R)
                        mask |= S_IRUSR;
                if (mode & IPC_W)
                        mask |= S_IWUSR;
                return ((perm->mode & mask) == mask ? KAUTH_RESULT_ALLOW : KAUTH_RESULT_DEFER /* EACCES */);
        }

        if (kauth_cred_groupmember(cred, perm->gid) == 0 ||
            kauth_cred_groupmember(cred, perm->cgid) == 0) {
                if (mode & IPC_R)
                        mask |= S_IRGRP;
                if (mode & IPC_W)
                        mask |= S_IWGRP;
                return ((perm->mode & mask) == mask ? KAUTH_RESULT_ALLOW : KAUTH_RESULT_DEFER /* EACCES */);
        }

        if (mode & IPC_R)
                mask |= S_IROTH;
        if (mode & IPC_W)
                mask |= S_IWOTH;
        return ((perm->mode & mask) == mask ? KAUTH_RESULT_ALLOW : KAUTH_RESULT_DEFER /* EACCES */);
}

/*
 * Check for ipc permission
 */

int
ipcperm(kauth_cred_t cred, struct ipc_perm *perm, int mode)
{
        int error;

        error = kauth_authorize_system(cred, KAUTH_SYSTEM_SYSVIPC,
            KAUTH_REQ_SYSTEM_SYSVIPC_BYPASS, perm, KAUTH_ARG(mode), NULL);
        if (error == 0)
                return (0);

        /* Adjust EPERM and EACCES errors until there's a better way to do this. */
        if (mode != IPC_M)
                error = EACCES;

        return error;
}

void
sysvipcfini(void)
{

        KASSERT(sysvipc_listener != NULL);
        kauth_unlisten_scope(sysvipc_listener);
        sysvipc_listener = NULL;
}

void
sysvipcinit(void)
{

        KASSERT(sysvipc_listener == NULL);

        sysvipc_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
            sysvipc_listener_cb, NULL);
}

static int
stub_sysvipc50_sysctl(SYSCTLFN_ARGS)
{
        return EPASSTHROUGH;
}

static int
sysctl_kern_sysvipc(SYSCTLFN_ARGS)
{
        void *where = oldp;
        size_t sz, *sizep = oldlenp;
#ifdef SYSVMSG
        struct msg_sysctl_info *msgsi = NULL;
#endif
#ifdef SYSVSEM
        struct sem_sysctl_info *semsi = NULL;
#endif
#ifdef SYSVSHM
        struct shm_sysctl_info *shmsi = NULL;
#endif
        size_t infosize, dssize, tsize, buflen;
        void *bf = NULL;
        char *start;
        int32_t nds;
        int i, error, ret;

/*
 * If present, call the compat sysctl() code.  If it handles the request
 * completely (either success or error), return.  Otherwise fallthrough
 * to the non-compat sysctl code.
 */

        MODULE_HOOK_CALL(sysvipc_sysctl_50_hook, (SYSCTLFN_CALL(rnode)),
            stub_sysvipc50_sysctl(SYSCTLFN_CALL(rnode)), error);
        if (error != EPASSTHROUGH)
                return error;

        if (namelen != 1)
                return EINVAL;

        start = where;
        buflen = *sizep;

        switch (*name) {
        case KERN_SYSVIPC_MSG_INFO:
#ifdef SYSVMSG
                infosize = sizeof(msgsi->msginfo);
                nds = msginfo.msgmni;
                dssize = sizeof(msgsi->msgids[0]);
                break;
#else
                return EINVAL;
#endif
        case KERN_SYSVIPC_SEM_INFO:
#ifdef SYSVSEM
                infosize = sizeof(semsi->seminfo);
                nds = seminfo.semmni;
                dssize = sizeof(semsi->semids[0]);
                break;
#else
                return EINVAL;
#endif
        case KERN_SYSVIPC_SHM_INFO:
#ifdef SYSVSHM
                infosize = sizeof(shmsi->shminfo);
                nds = shminfo.shmmni;
                dssize = sizeof(shmsi->shmids[0]);
                break;
#else
                return EINVAL;
#endif
        default:
                return EINVAL;
        }
        /*
         * Round infosize to 64 bit boundary if requesting more than just
         * the info structure or getting the total data size.
         */
        if (where == NULL || *sizep > infosize)
                infosize = roundup(infosize, sizeof(quad_t));
        tsize = infosize + nds * dssize;

        /* Return just the total size required. */
        if (where == NULL) {
                *sizep = tsize;
                return 0;
        }

        /* Not enough room for even the info struct. */
        if (buflen < infosize) {
                *sizep = 0;
                return ENOMEM;
        }
        sz = uimin(tsize, buflen);
        bf = kmem_zalloc(sz, KM_SLEEP);

        switch (*name) {
#ifdef SYSVMSG
        case KERN_SYSVIPC_MSG_INFO:
                msgsi = (struct msg_sysctl_info *)bf;
                msgsi->msginfo = msginfo;
                break;
#endif
#ifdef SYSVSEM
        case KERN_SYSVIPC_SEM_INFO:
                semsi = (struct sem_sysctl_info *)bf;
                semsi->seminfo = seminfo;
                break;
#endif
#ifdef SYSVSHM
        case KERN_SYSVIPC_SHM_INFO:
                shmsi = (struct shm_sysctl_info *)bf;
                shmsi->shminfo = shminfo;
                break;
#endif
        }
        buflen -= infosize;

        ret = 0;
        if (buflen > 0) {
                /* Fill in the IPC data structures.  */
                for (i = 0; i < nds; i++) {
                        if (buflen < dssize) {
                                ret = ENOMEM;
                                break;
                        }
                        switch (*name) {
#ifdef SYSVMSG
                        case KERN_SYSVIPC_MSG_INFO:
                                mutex_enter(&msgmutex);
                                SYSCTL_FILL_MSG(msqs[i].msq_u, msgsi->msgids[i]);
                                mutex_exit(&msgmutex);
                                break;
#endif
#ifdef SYSVSEM
                        case KERN_SYSVIPC_SEM_INFO:
                                SYSCTL_FILL_SEM(sema[i], semsi->semids[i]);
                                break;
#endif
#ifdef SYSVSHM
                        case KERN_SYSVIPC_SHM_INFO:
                                SYSCTL_FILL_SHM(shmsegs[i], shmsi->shmids[i]);
                                break;
#endif
                        }
                        buflen -= dssize;
                }
        }
        *sizep -= buflen;
        error = copyout(bf, start, *sizep);
        /* If copyout succeeded, use return code set earlier. */
        if (error == 0)
                error = ret;
        if (bf)
                kmem_free(bf, sz);
        return error;
}

SYSCTL_SETUP(sysctl_ipc_setup, "sysctl kern.ipc subtree setup")
{

        sysctl_createv(clog, 0, NULL, NULL,
                CTLFLAG_PERMANENT,
                CTLTYPE_NODE, "ipc",
                SYSCTL_DESCR("SysV IPC options"),
                NULL, 0, NULL, 0,
                CTL_KERN, KERN_SYSVIPC, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                CTLFLAG_PERMANENT,
                CTLTYPE_STRUCT, "sysvipc_info",
                SYSCTL_DESCR("System V style IPC information"),
                sysctl_kern_sysvipc, 0, NULL, 0,
                CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_INFO, CTL_EOL);
}





























































































































































    3 
    3 















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
/*        $NetBSD: uberry.c,v 1.16 2019/12/15 16:48:27 tsutsui Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uberry.c,v 1.16 2019/12/15 16:48:27 tsutsui Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/conf.h>
#include <sys/file.h>
#include <sys/select.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/poll.h>
#include <sys/bus.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdivar.h>

#include <dev/usb/usbdevs.h>

#ifdef UBERRY_DEBUG
#define DPRINTF(x)        if (uberrydebug) printf x
#define DPRINTFN(n, x)        if (uberrydebug > n) printf x
int        uberrydebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n, x)
#endif

struct uberry_softc {
        device_t                sc_dev;
        struct usbd_device *        sc_udev;
};

/*
 * Note that we do not attach to USB_PRODUCT_RIM_BLACKBERRY_PEARL_DUAL
 * as we let umass claim the device instead.
 */
static const struct usb_devno uberry_devs[] = {
        { USB_VENDOR_RIM, USB_PRODUCT_RIM_BLACKBERRY },
        { USB_VENDOR_RIM, USB_PRODUCT_RIM_BLACKBERRY_PEARL },
};

#define uberry_lookup(v, p) usb_lookup(uberry_devs, v, p)
#define UBERRY_CONFIG_NO 1

static int        uberry_match(device_t, cfdata_t, void *);
static void        uberry_attach(device_t, device_t, void *);
static int        uberry_detach(device_t, int);

CFATTACH_DECL_NEW(uberry, sizeof(struct uberry_softc), uberry_match,
    uberry_attach, uberry_detach, NULL);

static void
uberry_cmd(struct uberry_softc *sc, uint8_t requestType, uint8_t reqno,
    uint8_t value, uint8_t index, void *data, uint8_t length)
{
        usb_device_request_t req;
        usbd_status err;

        DPRINTF(("berry cmd type=%x, number=%x, value=%d, index=%d, len=%d\n",
            requestType, reqno, value, index, length));
        req.bmRequestType = requestType;
        req.bRequest = reqno;
        USETW(req.wValue, value);
        USETW(req.wIndex, index);
        USETW(req.wLength, length);

        if ((err = usbd_do_request(sc->sc_udev, &req, data)) != 0)
                aprint_error_dev(sc->sc_dev, "sending command failed %d\n",
                    err);
}

static void
uberry_charge(struct uberry_softc *sc)
{
        char dummy[2];
        usbd_status err;

        if (sc->sc_udev->ud_power != USB_MAX_POWER) {
                uberry_cmd(sc, UT_READ | UT_VENDOR, 0xa5, 0, 1, dummy, 2);
                uberry_cmd(sc, UT_WRITE | UT_VENDOR, 0xa2, 0, 1, dummy, 0);
        }

        err = usbd_set_config_no(sc->sc_udev, UBERRY_CONFIG_NO, 1);
        if (err) {
                aprint_error_dev(sc->sc_dev, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(err));
                return;
        }
}

/*
 * Expose both the USB mass storage interface and the database access one
 */
static void
uberry_dual_mode(struct uberry_softc *sc)
{
        char dummy[2];
        usbd_status err;

        uberry_cmd(sc, UT_READ | UT_VENDOR, 0xa9, 1, 1, dummy, 2);

        err = usbd_set_config_no(sc->sc_udev, UBERRY_CONFIG_NO, 1);
        if (err) {
                aprint_error_dev(sc->sc_dev, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(err));
                return;
        }
}


static int
uberry_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        DPRINTFN(50, ("uberry_match\n"));
        return (uberry_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE);
}

static void
uberry_attach(device_t parent, device_t self, void *aux)
{
        struct uberry_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *        dev = uaa->uaa_device;
        char                        *devinfop;

        DPRINTFN(10,("uberry_attach: sc=%p\n", sc));

        sc->sc_dev = self;
        sc->sc_udev = dev;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        uberry_charge(sc);
        if (uaa->uaa_product == USB_PRODUCT_RIM_BLACKBERRY_PEARL)
                uberry_dual_mode(sc);

        DPRINTFN(10, ("uberry_attach: %p\n", sc->sc_udev));

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);
        return;
}

static int
uberry_detach(device_t self, int flags)
{
        struct uberry_softc *sc = device_private(self);
        DPRINTF(("uberry_detach: sc=%p flags=%d\n", sc, flags));

        pmf_device_deregister(self);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return 0;
}































































   45 


   45 























   75 
   51 
   74 


   75 

   75 
























   74 










   75 






   51 



   74 
   64 

   64 



   64 

   64 











   19 
   64 






   56 
   35 








   35 
   35 
   35 


   35 
   26 


   75 
   18 








   18 
   18 

   18 


   17 






   17 


   17 






   17 


   17 
    6 










   16 
    7 

   16 











   15 















   15 













   15 

   15 
   13 
   13 
   13 



   13 
    4 



    6 
   15 








   16 




   16 

   10 



























   26 

   26 


   26 










   26 
   15 

   26 

   26 







   25 













   25 


   25 










   25 
   25 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
/*        $NetBSD: ufs_bmap.c,v 1.53 2020/04/20 03:57:02 christos Exp $        */

/*
 * Copyright (c) 1989, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ufs_bmap.c        8.8 (Berkeley) 8/11/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_bmap.c,v 1.53 2020/04/20 03:57:02 christos Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/resourcevar.h>
#include <sys/trace.h>

#include <miscfs/specfs/specdev.h>

#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_bswap.h>

static bool
ufs_issequential(const struct ufsmount *ump, daddr_t daddr0, daddr_t daddr1)
{

        /* for ufs, blocks in a hole is not 'contiguous'. */
        if (daddr0 == 0)
                return false;

        return (daddr0 + ump->um_seqinc == daddr1);
}

/*
 * Bmap converts the logical block number of a file to its physical block
 * number on the disk. The conversion is done by using the logical block
 * number to index into the array of block pointers described by the dinode.
 */
int
ufs_bmap(void *v)
{
        struct vop_bmap_args /* {
                struct vnode *a_vp;
                daddr_t  a_bn;
                struct vnode **a_vpp;
                daddr_t *a_bnp;
                int *a_runp;
        } */ *ap = v;
        int error;

        /*
         * Check for underlying vnode requests and ensure that logical
         * to physical mapping is requested.
         */
        if (ap->a_vpp != NULL)
                *ap->a_vpp = VTOI(ap->a_vp)->i_devvp;
        if (ap->a_bnp == NULL)
                return (0);

        error = ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL,
            ap->a_runp, ufs_issequential);
        return error;
}

/*
 * Indirect blocks are now on the vnode for the file.  They are given negative
 * logical block numbers.  Indirect blocks are addressed by the negative
 * address of the first data block to which they point.  Double indirect blocks
 * are addressed by one less than the address of the first indirect block to
 * which they point.  Triple indirect blocks are addressed by one less than
 * the address of the first double indirect block to which they point.
 *
 * ufs_bmaparray does the bmap conversion, and if requested returns the
 * array of logical blocks which must be traversed to get to a block.
 * Each entry contains the offset into that block that gets you to the
 * next block and the disk address of the block (if it is assigned).
 */

int
ufs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap,
    int *nump, int *runp, ufs_issequential_callback_t is_sequential)
{
        struct inode *ip;
        struct buf *bp, *cbp;
        struct ufsmount *ump;
        struct mount *mp;
        struct indir a[UFS_NIADDR + 1], *xap;
        daddr_t daddr;
        daddr_t metalbn;
        int error, maxrun = 0, num;

        ip = VTOI(vp);
        mp = vp->v_mount;
        ump = ip->i_ump;
        KASSERTMSG(((ap == NULL) == (nump == NULL)),
            "ufs_bmaparray: invalid arguments: ap = %p, nump = %p", ap, nump);

        if (runp) {
                /*
                 * XXX
                 * If MAXBSIZE is the largest transfer the disks can handle,
                 * we probably want maxrun to be 1 block less so that we
                 * don't create a block larger than the device can handle.
                 */
                *runp = 0;
                maxrun = MAXPHYS / mp->mnt_stat.f_iosize - 1;
        }

        if (bn >= 0 && bn < UFS_NDADDR) {
                if (nump != NULL)
                        *nump = 0;
                if (ump->um_fstype == UFS1)
                        daddr = ufs_rw32(ip->i_ffs1_db[bn],
                            UFS_MPNEEDSWAP(ump));
                else
                        daddr = ufs_rw64(ip->i_ffs2_db[bn],
                            UFS_MPNEEDSWAP(ump));
                *bnp = blkptrtodb(ump, daddr);
                /*
                 * Since this is FFS independent code, we are out of
                 * scope for the definitions of BLK_NOCOPY and
                 * BLK_SNAP, but we do know that they will fall in
                 * the range 1..um_seqinc, so we use that test and
                 * return a request for a zeroed out buffer if attempts
                 * are made to read a BLK_NOCOPY or BLK_SNAP block.
                 */
                if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT
                    && daddr > 0 &&
                    daddr < ump->um_seqinc) {
                        *bnp = -1;
                } else if (*bnp == 0) {
                        if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL))
                            == SF_SNAPSHOT) {
                                *bnp = blkptrtodb(ump, bn * ump->um_seqinc);
                        } else {
                                *bnp = -1;
                        }
                } else if (runp) {
                        if (ump->um_fstype == UFS1) {
                                for (++bn; bn < UFS_NDADDR && *runp < maxrun &&
                                    is_sequential(ump,
                                        ufs_rw32(ip->i_ffs1_db[bn - 1],
                                            UFS_MPNEEDSWAP(ump)),
                                        ufs_rw32(ip->i_ffs1_db[bn],
                                            UFS_MPNEEDSWAP(ump)));
                                    ++bn, ++*runp);
                        } else {
                                for (++bn; bn < UFS_NDADDR && *runp < maxrun &&
                                    is_sequential(ump,
                                        ufs_rw64(ip->i_ffs2_db[bn - 1],
                                            UFS_MPNEEDSWAP(ump)),
                                        ufs_rw64(ip->i_ffs2_db[bn],
                                            UFS_MPNEEDSWAP(ump)));
                                    ++bn, ++*runp);
                        }
                }
                return (0);
        } else if (bn < 0 && bn >= -UFS_NXADDR) {
                KASSERT(ump->um_fstype == UFS2);
                daddr = ufs_rw64(ip->i_ffs2_extb[-1 - bn], UFS_MPNEEDSWAP(ump));
                *bnp = blkptrtodb(ump, daddr);
                if (*bnp == 0)
                        *bnp = -1;
                return 0;
        }

        xap = ap == NULL ? a : ap;
        if (!nump)
                nump = &num;
        if ((error = ufs_getlbns(vp, bn, xap, nump)) != 0)
                return (error);

        num = *nump;

        /* Get disk address out of indirect block array */
        if (ump->um_fstype == UFS1)
                daddr = ufs_rw32(ip->i_ffs1_ib[xap->in_off],
                    UFS_MPNEEDSWAP(ump));
        else
                daddr = ufs_rw64(ip->i_ffs2_ib[xap->in_off],
                    UFS_MPNEEDSWAP(ump));

        for (bp = NULL, ++xap; --num; ++xap) {
                /*
                 * Exit the loop if there is no disk address assigned yet and
                 * the indirect block isn't in the cache, or if we were
                 * looking for an indirect block and we've found it.
                 */

                metalbn = xap->in_lbn;
                if (metalbn == bn)
                        break;
                if (daddr == 0) {
                        mutex_enter(&bufcache_lock);
                        cbp = incore(vp, metalbn);
                        mutex_exit(&bufcache_lock);
                        if (cbp == NULL)
                                break;
                }

                /*
                 * If we get here, we've either got the block in the cache
                 * or we have a disk address for it, go fetch it.
                 */
                if (bp)
                        brelse(bp, 0);

                xap->in_exists = 1;
                bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0);
                if (bp == NULL) {

                        /*
                         * getblk() above returns NULL only iff we are
                         * pagedaemon.  See the implementation of getblk
                         * for detail.
                         */

                        return (ENOMEM);
                }
                if (bp->b_oflags & (BO_DONE | BO_DELWRI)) {
                        trace(TR_BREADHIT, pack(vp, size), metalbn);
                } else {
                        KASSERTMSG((daddr != 0),
                            "ufs_bmaparray: indirect block not in cache");
                        trace(TR_BREADMISS, pack(vp, size), metalbn);
                        bp->b_blkno = blkptrtodb(ump, daddr);
                        bp->b_flags |= B_READ;
                        BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
                        VOP_STRATEGY(vp, bp);
                        curlwp->l_ru.ru_inblock++;        /* XXX */
                        if ((error = biowait(bp)) != 0) {
                                brelse(bp, 0);
                                return (error);
                        }
                }
                if (ump->um_fstype == UFS1) {
                        daddr = ufs_rw32(((u_int32_t *)bp->b_data)[xap->in_off],
                            UFS_MPNEEDSWAP(ump));
                        if (num == 1 && daddr && runp) {
                                for (bn = xap->in_off + 1;
                                    bn < MNINDIR(ump) && *runp < maxrun &&
                                    is_sequential(ump,
                                        ufs_rw32(((int32_t *)bp->b_data)[bn-1],
                                            UFS_MPNEEDSWAP(ump)),
                                        ufs_rw32(((int32_t *)bp->b_data)[bn],
                                            UFS_MPNEEDSWAP(ump)));
                                    ++bn, ++*runp);
                        }
                } else {
                        daddr = ufs_rw64(((u_int64_t *)bp->b_data)[xap->in_off],
                            UFS_MPNEEDSWAP(ump));
                        if (num == 1 && daddr && runp) {
                                for (bn = xap->in_off + 1;
                                    bn < MNINDIR(ump) && *runp < maxrun &&
                                    is_sequential(ump,
                                        ufs_rw64(((int64_t *)bp->b_data)[bn-1],
                                            UFS_MPNEEDSWAP(ump)),
                                        ufs_rw64(((int64_t *)bp->b_data)[bn],
                                            UFS_MPNEEDSWAP(ump)));
                                    ++bn, ++*runp);
                        }
                }
        }
        if (bp)
                brelse(bp, 0);

        /*
         * Since this is FFS independent code, we are out of scope for the
         * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they
         * will fall in the range 1..um_seqinc, so we use that test and
         * return a request for a zeroed out buffer if attempts are made
         * to read a BLK_NOCOPY or BLK_SNAP block.
         */
        if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT
            && daddr > 0 && daddr < ump->um_seqinc) {
                *bnp = -1;
                return (0);
        }
        *bnp = blkptrtodb(ump, daddr);
        if (*bnp == 0) {
                if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL))
                    == SF_SNAPSHOT) {
                        *bnp = blkptrtodb(ump, bn * ump->um_seqinc);
                } else {
                        *bnp = -1;
                }
        }
        return (0);
}

/*
 * Create an array of logical block number/offset pairs which represent the
 * path of indirect blocks required to access a data block.  The first "pair"
 * contains the logical block number of the appropriate single, double or
 * triple indirect block and the offset into the inode indirect block array.
 * Note, the logical block number of the inode single/double/triple indirect
 * block appears twice in the array, once with the offset into the i_ffs1_ib and
 * once with the offset into the page itself.
 */
int
ufs_getlbns(struct vnode *vp, daddr_t bn, struct indir *ap, int *nump)
{
        daddr_t metalbn, realbn;
        struct ufsmount *ump;
        int64_t blockcnt;
        int lbc;
        int i, numlevels, off;

        ump = VFSTOUFS(vp->v_mount);
        if (nump)
                *nump = 0;
        numlevels = 0;
        realbn = bn;
        if (bn < 0)
                bn = -bn;
        KASSERT(bn >= UFS_NDADDR);

        /*
         * Determine the number of levels of indirection.  After this loop
         * is done, blockcnt indicates the number of data blocks possible
         * at the given level of indirection, and UFS_NIADDR - i is the number
         * of levels of indirection needed to locate the requested block.
         */

        bn -= UFS_NDADDR;
        for (lbc = 0, i = UFS_NIADDR;; i--, bn -= blockcnt) {
                if (i == 0)
                        return (EFBIG);

                lbc += ump->um_lognindir;
                blockcnt = (int64_t)1 << lbc;

                if (bn < blockcnt)
                        break;
        }

        /* Calculate the address of the first meta-block. */
        metalbn = -((realbn >= 0 ? realbn : -realbn) - bn + UFS_NIADDR - i);

        /*
         * At each iteration, off is the offset into the bap array which is
         * an array of disk addresses at the current level of indirection.
         * The logical block number and the offset in that block are stored
         * into the argument array.
         */
        ap->in_lbn = metalbn;
        ap->in_off = off = UFS_NIADDR - i;
        ap->in_exists = 0;
        ap++;
        for (++numlevels; i <= UFS_NIADDR; i++) {
                /* If searching for a meta-data block, quit when found. */
                if (metalbn == realbn)
                        break;

                lbc -= ump->um_lognindir;
                off = (bn >> lbc) & (MNINDIR(ump) - 1);

                ++numlevels;
                ap->in_lbn = metalbn;
                ap->in_off = off;
                ap->in_exists = 0;
                ++ap;

                metalbn -= -1 + ((int64_t)off << lbc);
        }
        if (nump)
                *nump = numlevels;
        return (0);
}
















































































































































   76 













   74 
   74 


   82 































































































 1674 










 1676 



 1675 


















 1911 

 1912 


 1908 


















 2035 
 1664 





 2038 













   70 





   70 

















































    8 





    8 







































    8 
































































































































































































































































































    4 
























    2 











    2 










    2 














    2 
    2 
    1 
    2 










    1 


    1 










    1 



    1 








    1 






    1 





    1 



    1 




    1 








    1 



    1 





    1 
    1 













    1 
    1 
    1 
    1 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
/*        $NetBSD: fpu.c,v 1.79 2022/08/20 11:34:08 riastradh Exp $        */

/*
 * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc.  All
 * rights reserved.
 *
 * This code is derived from software developed for The NetBSD Foundation
 * by Andrew Doran and Maxime Villard.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1991 The Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)npx.c        7.2 (Berkeley) 5/12/91
 */

/*
 * Copyright (c) 1994, 1995, 1998 Charles M. Hannum.  All rights reserved.
 * Copyright (c) 1990 William Jolitz.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)npx.c        7.2 (Berkeley) 5/12/91
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.79 2022/08/20 11:34:08 riastradh Exp $");

#include "opt_multiprocessor.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/cpu.h>
#include <sys/file.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/xcall.h>

#include <machine/cpu.h>
#include <machine/cpuvar.h>
#include <machine/cputypes.h>
#include <machine/intr.h>
#include <machine/cpufunc.h>
#include <machine/pcb.h>
#include <machine/trap.h>
#include <machine/specialreg.h>
#include <x86/cpu.h>
#include <x86/fpu.h>

#ifdef XENPV
#define clts() HYPERVISOR_fpu_taskswitch(0)
#define stts() HYPERVISOR_fpu_taskswitch(1)
#endif

void fpu_handle_deferred(void);
void fpu_switch(struct lwp *, struct lwp *);

uint32_t x86_fpu_mxcsr_mask __read_mostly = 0;

static inline union savefpu *
fpu_lwp_area(struct lwp *l)
{
        struct pcb *pcb = lwp_getpcb(l);
        union savefpu *area = &pcb->pcb_savefpu;

        KASSERT((l->l_flag & LW_SYSTEM) == 0);
        if (l == curlwp) {
                fpu_save();
        }
        KASSERT(!(l->l_md.md_flags & MDL_FPU_IN_CPU));

        return area;
}

static inline void
fpu_save_lwp(struct lwp *l)
{
        struct pcb *pcb = lwp_getpcb(l);
        union savefpu *area = &pcb->pcb_savefpu;
        int s;

        s = splvm();
        if (l->l_md.md_flags & MDL_FPU_IN_CPU) {
                KASSERT((l->l_flag & LW_SYSTEM) == 0);
                fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & PK_32));
                l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
        }
        splx(s);
}

/*
 * Bring curlwp's FPU state in memory. It will get installed back in the CPU
 * when returning to userland.
 */
void
fpu_save(void)
{
        fpu_save_lwp(curlwp);
}

void
fpuinit(struct cpu_info *ci)
{
        /*
         * This might not be strictly necessary since it will be initialized
         * for each process. However it does no harm.
         */
        clts();
        fninit();
        stts();
}

void
fpuinit_mxcsr_mask(void)
{
#ifndef XENPV
        union savefpu fpusave __aligned(16);
        u_long psl;

        memset(&fpusave, 0, sizeof(fpusave));

        /* Disable interrupts, and enable FPU */
        psl = x86_read_psl();
        x86_disable_intr();
        clts();

        /* Fill in the FPU area */
        fxsave(&fpusave);

        /* Restore previous state */
        stts();
        x86_write_psl(psl);

        if (fpusave.sv_xmm.fx_mxcsr_mask == 0) {
                x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
        } else {
                x86_fpu_mxcsr_mask = fpusave.sv_xmm.fx_mxcsr_mask;
        }
#else
        /*
         * XXX XXX XXX: On Xen the FXSAVE above faults. That's because
         * &fpusave is not 16-byte aligned. Stack alignment problem
         * somewhere, it seems.
         */
        x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
#endif
}

static inline void
fpu_errata_amd(void)
{
        uint16_t sw;

        /*
         * AMD FPUs do not restore FIP, FDP, and FOP on fxrstor and xrstor
         * when FSW.ES=0, leaking other threads' execution history.
         *
         * Clear them manually by loading a zero (fldummy). We do this
         * unconditionally, regardless of FSW.ES.
         *
         * Before that, clear the ES bit in the x87 status word if it is
         * currently set, in order to avoid causing a fault in the
         * upcoming load.
         *
         * Newer generations of AMD CPUs have CPUID_Fn80000008_EBX[2],
         * which indicates that FIP/FDP/FOP are restored (same behavior
         * as Intel). We're not using it though.
         */
        fnstsw(&sw);
        if (sw & 0x80)
                fnclex();
        fldummy();
}

#ifdef __x86_64__
#define XS64(x) (is_64bit ? x##64 : x)
#else
#define XS64(x) x
#endif

void
fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit)
{
        switch (x86_fpu_save) {
        case FPU_SAVE_FSAVE:
                fnsave(area);
                break;
        case FPU_SAVE_FXSAVE:
                XS64(fxsave)(area);
                break;
        case FPU_SAVE_XSAVE:
                XS64(xsave)(area, xsave_features);
                break;
        case FPU_SAVE_XSAVEOPT:
                XS64(xsaveopt)(area, xsave_features);
                break;
        }

        stts();
}

void
fpu_area_restore(const void *area, uint64_t xsave_features, bool is_64bit)
{
        clts();

        switch (x86_fpu_save) {
        case FPU_SAVE_FSAVE:
                frstor(area);
                break;
        case FPU_SAVE_FXSAVE:
                if (cpu_vendor == CPUVENDOR_AMD)
                        fpu_errata_amd();
                XS64(fxrstor)(area);
                break;
        case FPU_SAVE_XSAVE:
        case FPU_SAVE_XSAVEOPT:
                if (cpu_vendor == CPUVENDOR_AMD)
                        fpu_errata_amd();
                XS64(xrstor)(area, xsave_features);
                break;
        }
}

void
fpu_handle_deferred(void)
{
        struct pcb *pcb = lwp_getpcb(curlwp);
        fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features,
            !(curlwp->l_proc->p_flag & PK_32));
}

void
fpu_switch(struct lwp *oldlwp, struct lwp *newlwp)
{
        struct cpu_info *ci __diagused = curcpu();
        struct pcb *pcb;

        KASSERTMSG(ci->ci_ilevel >= IPL_SCHED, "cpu%d ilevel=%d",
            cpu_index(ci), ci->ci_ilevel);

        if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) {
                KASSERT(!(oldlwp->l_flag & LW_SYSTEM));
                pcb = lwp_getpcb(oldlwp);
                fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features,
                    !(oldlwp->l_proc->p_flag & PK_32));
                oldlwp->l_md.md_flags &= ~MDL_FPU_IN_CPU;
        }
        KASSERT(!(newlwp->l_md.md_flags & MDL_FPU_IN_CPU));
}

void
fpu_lwp_fork(struct lwp *l1, struct lwp *l2)
{
        struct pcb *pcb2 = lwp_getpcb(l2);
        union savefpu *fpu_save;

        /* Kernel threads have no FPU. */
        if (__predict_false(l2->l_flag & LW_SYSTEM)) {
                return;
        }
        /* For init(8). */
        if (__predict_false(l1->l_flag & LW_SYSTEM)) {
                memset(&pcb2->pcb_savefpu, 0, x86_fpu_save_size);
                return;
        }

        fpu_save = fpu_lwp_area(l1);
        memcpy(&pcb2->pcb_savefpu, fpu_save, x86_fpu_save_size);
        l2->l_md.md_flags &= ~MDL_FPU_IN_CPU;
}

void
fpu_lwp_abandon(struct lwp *l)
{
        int s;

        KASSERT(l == curlwp);
        s = splvm();
        l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
        stts();
        splx(s);
}

/* -------------------------------------------------------------------------- */

/*
 * fpu_kern_enter()
 *
 *        Begin using the FPU.  Raises to splvm, disabling most
 *        interrupts and rendering the thread non-preemptible; caller
 *        should not use this for long periods of time, and must call
 *        fpu_kern_leave() afterward.  Non-recursive -- you cannot call
 *        fpu_kern_enter() again without calling fpu_kern_leave() first.
 *
 *        Must be used only at IPL_VM or below -- never in IPL_SCHED or
 *        IPL_HIGH interrupt handlers.
 */
void
fpu_kern_enter(void)
{
        struct lwp *l = curlwp;
        struct cpu_info *ci;
        int s;

        s = splvm();

        ci = curcpu();
#if 0
        /*
         * Can't assert this because if the caller holds a spin lock at
         * IPL_VM, and previously held and released a spin lock at
         * higher IPL, the IPL remains raised above IPL_VM.
         */
        KASSERTMSG(ci->ci_ilevel <= IPL_VM || cold, "ilevel=%d",
            ci->ci_ilevel);
#endif
        KASSERT(ci->ci_kfpu_spl == -1);
        ci->ci_kfpu_spl = s;

        /*
         * If we are in a softint and have a pinned lwp, the fpu state is that
         * of the pinned lwp, so save it there.
         */
        while ((l->l_pflag & LP_INTR) && (l->l_switchto != NULL))
                l = l->l_switchto;
        fpu_save_lwp(l);

        /*
         * Clear CR0_TS, which fpu_save_lwp set if it saved anything --
         * otherwise the CPU will trap if we try to use the FPU under
         * the false impression that there has been a task switch since
         * the last FPU usage requiring that we save the FPU state.
         */
        clts();
}

/*
 * fpu_kern_leave()
 *
 *        End using the FPU after fpu_kern_enter().
 */
void
fpu_kern_leave(void)
{
        static const union savefpu zero_fpu __aligned(64);
        struct cpu_info *ci = curcpu();
        int s;

#if 0
        /*
         * Can't assert this because if the caller holds a spin lock at
         * IPL_VM, and previously held and released a spin lock at
         * higher IPL, the IPL remains raised above IPL_VM.
         */
        KASSERT(ci->ci_ilevel == IPL_VM || cold);
#endif
        KASSERT(ci->ci_kfpu_spl != -1);

        /*
         * Zero the fpu registers; otherwise we might leak secrets
         * through Spectre-class attacks to userland, even if there are
         * no bugs in fpu state management.
         */
        fpu_area_restore(&zero_fpu, x86_xsave_features, false);

        /*
         * Set CR0_TS again so that the kernel can't accidentally use
         * the FPU.
         */
        stts();

        s = ci->ci_kfpu_spl;
        ci->ci_kfpu_spl = -1;
        splx(s);
}

/* -------------------------------------------------------------------------- */

/*
 * The following table is used to ensure that the FPE_... value
 * that is passed as a trapcode to the signal handler of the user
 * process does not have more than one bit set.
 *
 * Multiple bits may be set if SSE simd instructions generate errors
 * on more than one value or if the user process modifies the control
 * word while a status word bit is already set (which this is a sign
 * of bad coding).
 * We have no choice than to narrow them down to one bit, since we must
 * not send a trapcode that is not exactly one of the FPE_ macros.
 *
 * The mechanism has a static table with 127 entries.  Each combination
 * of the 7 FPU status word exception bits directly translates to a
 * position in this table, where a single FPE_... value is stored.
 * This FPE_... value stored there is considered the "most important"
 * of the exception bits and will be sent as the signal code.  The
 * precedence of the bits is based upon Intel Document "Numerical
 * Applications", Chapter "Special Computational Situations".
 *
 * The code to choose one of these values does these steps:
 * 1) Throw away status word bits that cannot be masked.
 * 2) Throw away the bits currently masked in the control word,
 *    assuming the user isn't interested in them anymore.
 * 3) Reinsert status word bit 7 (stack fault) if it is set, which
 *    cannot be masked but must be preserved.
 *    'Stack fault' is a sub-class of 'invalid operation'.
 * 4) Use the remaining bits to point into the trapcode table.
 *
 * The 6 maskable bits in order of their preference, as stated in the
 * above referenced Intel manual:
 * 1  Invalid operation (FP_X_INV)
 * 1a   Stack underflow
 * 1b   Stack overflow
 * 1c   Operand of unsupported format
 * 1d   SNaN operand.
 * 2  QNaN operand (not an exception, irrelevant here)
 * 3  Any other invalid-operation not mentioned above or zero divide
 *      (FP_X_INV, FP_X_DZ)
 * 4  Denormal operand (FP_X_DNML)
 * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
 * 6  Inexact result (FP_X_IMP)
 *
 * NB: the above seems to mix up the mxscr error bits and the x87 ones.
 * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx
 * status.
 *
 * The table is nearly, but not quite, in bit order (ZERODIV and DENORM
 * are swapped).
 *
 * This table assumes that any stack fault is cleared - so that an INVOP
 * fault will only be reported as FLTSUB once.
 * This might not happen if the mask is being changed.
 */
#define FPE_xxx1(f) (f & EN_SW_INVOP \
                ? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \
        : f & EN_SW_ZERODIV ? FPE_FLTDIV \
        : f & EN_SW_DENORM ? FPE_FLTUND \
        : f & EN_SW_OVERFLOW ? FPE_FLTOVF \
        : f & EN_SW_UNDERFLOW ? FPE_FLTUND \
        : f & EN_SW_PRECLOSS ? FPE_FLTRES \
        : f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0)
#define        FPE_xxx2(f)        FPE_xxx1(f),        FPE_xxx1((f + 1))
#define        FPE_xxx4(f)        FPE_xxx2(f),        FPE_xxx2((f + 2))
#define        FPE_xxx8(f)        FPE_xxx4(f),        FPE_xxx4((f + 4))
#define        FPE_xxx16(f)        FPE_xxx8(f),        FPE_xxx8((f + 8))
#define        FPE_xxx32(f)        FPE_xxx16(f),        FPE_xxx16((f + 16))
static const uint8_t fpetable[128] = {
        FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96)
};
#undef FPE_xxx1
#undef FPE_xxx2
#undef FPE_xxx4
#undef FPE_xxx8
#undef FPE_xxx16
#undef FPE_xxx32

/*
 * This is a synchronous trap on either an x87 instruction (due to an unmasked
 * error on the previous x87 instruction) or on an SSE/SSE2/etc instruction due
 * to an error on the instruction itself.
 *
 * If trap actually generates a signal, then the fpu state is saved and then
 * copied onto the lwp's user-stack, and then recovered from there when the
 * signal returns.
 *
 * All this code needs to do is save the reason for the trap. For x87 traps the
 * status word bits need clearing to stop the trap re-occurring. For SSE traps
 * the mxcsr bits are 'sticky' and need clearing to not confuse a later trap.
 *
 * We come here with interrupts disabled.
 */
void
fputrap(struct trapframe *frame)
{
        uint32_t statbits;
        ksiginfo_t ksi;

        if (__predict_false(!USERMODE(frame->tf_cs))) {
                panic("fpu trap from kernel, trapframe %p\n", frame);
        }

        KASSERT(curlwp->l_md.md_flags & MDL_FPU_IN_CPU);

        if (frame->tf_trapno == T_XMM) {
                uint32_t mxcsr;
                x86_stmxcsr(&mxcsr);
                statbits = mxcsr;
                /* Clear the sticky status bits */
                mxcsr &= ~0x3f;
                x86_ldmxcsr(&mxcsr);

                /* Remove masked interrupts and non-status bits */
                statbits &= ~(statbits >> 7) & 0x3f;
                /* Mark this is an XMM status */
                statbits |= 0x10000;
        } else {
                uint16_t cw, sw;
                /* Get current control and status words */
                fnstcw(&cw);
                fnstsw(&sw);
                /* Clear any pending exceptions from status word */
                fnclex();

                /* Remove masked interrupts */
                statbits = sw & ~(cw & 0x3f);
        }

        /* Doesn't matter now if we get pre-empted */
        x86_enable_intr();

        KSI_INIT_TRAP(&ksi);
        ksi.ksi_signo = SIGFPE;
        ksi.ksi_addr = (void *)X86_TF_RIP(frame);
        ksi.ksi_code = fpetable[statbits & 0x7f];
        ksi.ksi_trap = statbits;
        (*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi);
}

void
fpudna(struct trapframe *frame)
{
        panic("fpudna from %s, ip %p, trapframe %p",
            USERMODE(frame->tf_cs) ? "userland" : "kernel",
            (void *)X86_TF_RIP(frame), frame);
}

/* -------------------------------------------------------------------------- */

static inline void
fpu_xstate_reload(union savefpu *fpu_save, uint64_t xstate)
{
        /*
         * Force a reload of the given xstate during the next XRSTOR.
         */
        if (x86_fpu_save >= FPU_SAVE_XSAVE) {
                fpu_save->sv_xsave_hdr.xsh_xstate_bv |= xstate;
        }
}

void
fpu_set_default_cw(struct lwp *l, unsigned int x87_cw)
{
        union savefpu *fpu_save = fpu_lwp_area(l);
        struct pcb *pcb = lwp_getpcb(l);

        if (i386_use_fxsave) {
                fpu_save->sv_xmm.fx_cw = x87_cw;
                if (x87_cw != __INITIAL_NPXCW__) {
                        fpu_xstate_reload(fpu_save, XCR0_X87);
                }
        } else {
                fpu_save->sv_87.s87_cw = x87_cw;
        }
        pcb->pcb_fpu_dflt_cw = x87_cw;
}

void
fpu_clear(struct lwp *l, unsigned int x87_cw)
{
        union savefpu *fpu_save;
        struct pcb *pcb;

        KASSERT(l == curlwp);
        fpu_save = fpu_lwp_area(l);

        switch (x86_fpu_save) {
        case FPU_SAVE_FSAVE:
                memset(&fpu_save->sv_87, 0, x86_fpu_save_size);
                fpu_save->sv_87.s87_tw = 0xffff;
                fpu_save->sv_87.s87_cw = x87_cw;
                break;
        case FPU_SAVE_FXSAVE:
                memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size);
                fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
                fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
                fpu_save->sv_xmm.fx_cw = x87_cw;
                break;
        case FPU_SAVE_XSAVE:
        case FPU_SAVE_XSAVEOPT:
                memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size);
                fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
                fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
                fpu_save->sv_xmm.fx_cw = x87_cw;
                if (__predict_false(x87_cw != __INITIAL_NPXCW__)) {
                        fpu_xstate_reload(fpu_save, XCR0_X87);
                }
                break;
        }

        pcb = lwp_getpcb(l);
        pcb->pcb_fpu_dflt_cw = x87_cw;
}

void
fpu_sigreset(struct lwp *l)
{
        union savefpu *fpu_save = fpu_lwp_area(l);
        struct pcb *pcb = lwp_getpcb(l);

        /*
         * For signal handlers the register values don't matter. Just reset
         * a few fields.
         */
        if (i386_use_fxsave) {
                fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
                fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
                fpu_save->sv_xmm.fx_tw = 0;
                fpu_save->sv_xmm.fx_cw = pcb->pcb_fpu_dflt_cw;
        } else {
                fpu_save->sv_87.s87_tw = 0xffff;
                fpu_save->sv_87.s87_cw = pcb->pcb_fpu_dflt_cw;
        }
}

void
process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs)
{
        union savefpu *fpu_save = fpu_lwp_area(l);

        if (i386_use_fxsave) {
                memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm));

                /*
                 * Invalid bits in mxcsr or mxcsr_mask will cause faults.
                 */
                fpu_save->sv_xmm.fx_mxcsr_mask &= x86_fpu_mxcsr_mask;
                fpu_save->sv_xmm.fx_mxcsr &= fpu_save->sv_xmm.fx_mxcsr_mask;

                fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE);
        } else {
                process_xmm_to_s87(fpregs, &fpu_save->sv_87);
        }
}

void
process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs)
{
        union savefpu *fpu_save = fpu_lwp_area(l);

        if (i386_use_fxsave) {
                process_s87_to_xmm(fpregs, &fpu_save->sv_xmm);
                fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE);
        } else {
                memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87));
        }
}

void
process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs)
{
        union savefpu *fpu_save = fpu_lwp_area(l);

        if (i386_use_fxsave) {
                memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm));
        } else {
                memset(fpregs, 0, sizeof(*fpregs));
                process_s87_to_xmm(&fpu_save->sv_87, fpregs);
        }
}

void
process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs)
{
        union savefpu *fpu_save = fpu_lwp_area(l);

        if (i386_use_fxsave) {
                memset(fpregs, 0, sizeof(*fpregs));
                process_xmm_to_s87(&fpu_save->sv_xmm, fpregs);
        } else {
                memcpy(fpregs, &fpu_save->sv_87, sizeof(fpu_save->sv_87));
        }
}

int
process_read_xstate(struct lwp *l, struct xstate *xstate)
{
        union savefpu *fpu_save = fpu_lwp_area(l);

        if (x86_fpu_save == FPU_SAVE_FSAVE) {
                /* Convert from legacy FSAVE format. */
                memset(&xstate->xs_fxsave, 0, sizeof(xstate->xs_fxsave));
                process_s87_to_xmm(&fpu_save->sv_87, &xstate->xs_fxsave);

                /* We only got x87 data. */
                xstate->xs_rfbm = XCR0_X87;
                xstate->xs_xstate_bv = XCR0_X87;
                return 0;
        }

        /* Copy the legacy area. */
        memcpy(&xstate->xs_fxsave, fpu_save->sv_xsave_hdr.xsh_fxsave,
            sizeof(xstate->xs_fxsave));

        if (x86_fpu_save == FPU_SAVE_FXSAVE) {
                /* FXSAVE means we've got x87 + SSE data. */
                xstate->xs_rfbm = XCR0_X87 | XCR0_SSE;
                xstate->xs_xstate_bv = XCR0_X87 | XCR0_SSE;
                return 0;
        }

        /* Copy the bitmap indicating which states are available. */
        xstate->xs_rfbm = x86_xsave_features & XCR0_FPU;
        xstate->xs_xstate_bv = fpu_save->sv_xsave_hdr.xsh_xstate_bv;
        KASSERT(!(xstate->xs_xstate_bv & ~xstate->xs_rfbm));

#define COPY_COMPONENT(xcr0_val, xsave_val, field)                        \
        if (xstate->xs_xstate_bv & xcr0_val) {                                \
                KASSERT(x86_xsave_offsets[xsave_val]                        \
                    >= sizeof(struct xsave_header));                        \
                KASSERT(x86_xsave_sizes[xsave_val]                        \
                    >= sizeof(xstate->field));                                \
                memcpy(&xstate->field,                                        \
                    (char*)fpu_save + x86_xsave_offsets[xsave_val],        \
                    sizeof(xstate->field));                                \
        }

        COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
        COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
        COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
        COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);

#undef COPY_COMPONENT

        return 0;
}

int
process_verify_xstate(const struct xstate *xstate)
{
        /* xstate_bv must be a subset of RFBM */
        if (xstate->xs_xstate_bv & ~xstate->xs_rfbm)
                return EINVAL;

        switch (x86_fpu_save) {
        case FPU_SAVE_FSAVE:
                if ((xstate->xs_rfbm & ~XCR0_X87))
                        return EINVAL;
                break;
        case FPU_SAVE_FXSAVE:
                if ((xstate->xs_rfbm & ~(XCR0_X87 | XCR0_SSE)))
                        return EINVAL;
                break;
        default:
                /* Verify whether no unsupported features are enabled */
                if ((xstate->xs_rfbm & ~(x86_xsave_features & XCR0_FPU)) != 0)
                        return EINVAL;
        }

        return 0;
}

int
process_write_xstate(struct lwp *l, const struct xstate *xstate)
{
        union savefpu *fpu_save = fpu_lwp_area(l);

        /* Convert data into legacy FSAVE format. */
        if (x86_fpu_save == FPU_SAVE_FSAVE) {
                if (xstate->xs_xstate_bv & XCR0_X87)
                        process_xmm_to_s87(&xstate->xs_fxsave, &fpu_save->sv_87);
                return 0;
        }

        /* If XSAVE is supported, make sure that xstate_bv is set correctly. */
        if (x86_fpu_save >= FPU_SAVE_XSAVE) {
                /*
                 * Bit-wise "xstate->xs_rfbm ? xstate->xs_xstate_bv :
                 *           fpu_save->sv_xsave_hdr.xsh_xstate_bv"
                 */
                fpu_save->sv_xsave_hdr.xsh_xstate_bv =
                    (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) |
                    xstate->xs_xstate_bv;
        }

        if (xstate->xs_xstate_bv & XCR0_X87) {
                /*
                 * X87 state is split into two areas, interspersed with SSE
                 * data.
                 */
                memcpy(&fpu_save->sv_xmm, &xstate->xs_fxsave, 24);
                memcpy(fpu_save->sv_xmm.fx_87_ac, xstate->xs_fxsave.fx_87_ac,
                    sizeof(xstate->xs_fxsave.fx_87_ac));
        }

        /*
         * Copy MXCSR if either SSE or AVX state is requested, to match the
         * XSAVE behavior for those flags.
         */
        if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) {
                /*
                 * Invalid bits in mxcsr or mxcsr_mask will cause faults.
                 */
                fpu_save->sv_xmm.fx_mxcsr_mask = xstate->xs_fxsave.fx_mxcsr_mask
                    & x86_fpu_mxcsr_mask;
                fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr &
                    fpu_save->sv_xmm.fx_mxcsr_mask;
        }

        if (xstate->xs_xstate_bv & XCR0_SSE) {
                memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160],
                    xstate->xs_fxsave.fx_xmm, sizeof(xstate->xs_fxsave.fx_xmm));
        }

#define COPY_COMPONENT(xcr0_val, xsave_val, field)                        \
        if (xstate->xs_xstate_bv & xcr0_val) {                                \
                KASSERT(x86_xsave_offsets[xsave_val]                        \
                    >= sizeof(struct xsave_header));                        \
                KASSERT(x86_xsave_sizes[xsave_val]                        \
                    >= sizeof(xstate->field));                                \
                memcpy((char *)fpu_save + x86_xsave_offsets[xsave_val],        \
                    &xstate->field, sizeof(xstate->field));                \
        }

        COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
        COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
        COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
        COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);

#undef COPY_COMPONENT

        return 0;
}














































































































    4 










































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
/*        $NetBSD: scsipiconf.c,v 1.45 2019/03/28 10:44:29 kardel Exp $        */

/*-
 * Copyright (c) 1998, 1999, 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum; by Jason R. Thorpe of the Numerical Aerospace
 * Simulation Facility, NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Originally written by Julian Elischer (julian@tfs.com)
 * for TRW Financial Systems for use under the MACH(2.5) operating system.
 *
 * TRW Financial Systems, in accordance with their agreement with Carnegie
 * Mellon University, makes this software available to CMU to distribute
 * or use in any manner that they see fit as long as this message is kept with
 * the software. For this reason TFS also grants any other persons or
 * organisations permission to use or modify this software.
 *
 * TFS supplies this software to be publicly redistributed
 * on the understanding that TFS is not responsible for the correct
 * functioning of this software in any circumstances.
 *
 * Ported to run under 386BSD by Julian Elischer (julian@tfs.com) Sept 1992
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: scsipiconf.c,v 1.45 2019/03/28 10:44:29 kardel Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/device.h>
#include <sys/proc.h>

#include <dev/scsipi/scsipi_all.h>
#include <dev/scsipi/scsipiconf.h>
#include <dev/scsipi/scsipi_base.h>


/* Function pointers and stub routines for scsiverbose module */
int (*scsipi_print_sense)(struct scsipi_xfer *, int) = scsipi_print_sense_stub;
void (*scsipi_print_sense_data)(struct scsi_sense_data *, int) =
                scsipi_print_sense_data_stub;

int scsi_verbose_loaded = 0; 

int
scsipi_print_sense_stub(struct scsipi_xfer * xs, int verbosity)
{
        scsipi_load_verbose();
        if (scsi_verbose_loaded)
                return scsipi_print_sense(xs, verbosity);
        else
                return 0;
}

void
scsipi_print_sense_data_stub(struct scsi_sense_data *sense, int verbosity)
{
        scsipi_load_verbose();
        if (scsi_verbose_loaded)
                scsipi_print_sense_data(sense, verbosity);
}

int
scsipi_command(struct scsipi_periph *periph, struct scsipi_generic *cmd,
    int cmdlen, u_char *data_addr, int datalen, int retries, int timeout,
    struct buf *bp, int flags)
{
        struct scsipi_xfer *xs;
        int rc;

        /*
         * execute unlocked to allow waiting for memory
         */
        xs = scsipi_make_xs_unlocked(periph, cmd, cmdlen, data_addr, datalen, retries,
            timeout, bp, flags);
        if (!xs)
                return (ENOMEM);

        mutex_enter(chan_mtx(periph->periph_channel));
        rc = scsipi_execute_xs(xs);
        mutex_exit(chan_mtx(periph->periph_channel));

        return rc;
}

/* 
 * Load the scsiverbose module
 */   
void
scsipi_load_verbose(void)
{
        if (scsi_verbose_loaded == 0)
                module_autoload("scsiverbose", MODULE_CLASS_MISC);
}

/*
 * allocate and init a scsipi_periph structure for a new device.
 */
struct scsipi_periph *
scsipi_alloc_periph(int malloc_flag)
{
        struct scsipi_periph *periph;
        u_int i;

        periph = malloc(sizeof(*periph), M_DEVBUF, malloc_flag|M_ZERO);
        if (periph == NULL)
                return NULL;

        periph->periph_dev = NULL;
        periph->periph_opcs = NULL;

        /*
         * Start with one command opening.  The periph driver
         * will grow this if it knows it can take advantage of it.
         */
        periph->periph_openings = 1;
        periph->periph_active = 0;

        for (i = 0; i < PERIPH_NTAGWORDS; i++)
                periph->periph_freetags[i] = 0xffffffff;

        TAILQ_INIT(&periph->periph_xferq);
        callout_init(&periph->periph_callout, 0);
        cv_init(&periph->periph_cv, "periph");

        return periph;
}

/*
 * cleanup and free scsipi_periph structure
 */
void
scsipi_free_periph(struct scsipi_periph *periph)
{
        scsipi_free_opcodeinfo(periph);
        cv_destroy(&periph->periph_cv);
        free(periph, M_DEVBUF);
}

/*
 * Return a priority based on how much of the inquiry data matches
 * the patterns for the particular driver.
 */
const void *
scsipi_inqmatch(struct scsipi_inquiry_pattern *inqbuf, const void *base,
    size_t nmatches, size_t matchsize, int *bestpriority)
{
        u_int8_t type;
        const struct scsipi_inquiry_pattern *bestmatch;

        /* Include the qualifier to catch vendor-unique types. */
        type = inqbuf->type;

        for (*bestpriority = 0, bestmatch = 0; nmatches--;
            base = (const char *)base + matchsize) {
                const struct scsipi_inquiry_pattern *match = base;
                int priority, len;

                if (type != match->type)
                        continue;
                if (inqbuf->removable != match->removable)
                        continue;
                priority = 2;
                len = strlen(match->vendor);
                if (memcmp(inqbuf->vendor, match->vendor, len))
                        continue;
                priority += len;
                len = strlen(match->product);
                if (memcmp(inqbuf->product, match->product, len))
                        continue;
                priority += len;
                len = strlen(match->revision);
                if (memcmp(inqbuf->revision, match->revision, len))
                        continue;
                priority += len;

#ifdef SCSIPI_DEBUG
                printf("scsipi_inqmatch: %d/%d/%d <%s, %s, %s>\n",
                    priority, match->type, match->removable,
                    match->vendor, match->product, match->revision);
#endif
                if (priority > *bestpriority) {
                        *bestpriority = priority;
                        bestmatch = base;
                }
        }

        return (bestmatch);
}

const char *
scsipi_dtype(int type)
{
        const char *dtype;

        switch (type) {
        case T_DIRECT:
                dtype = "disk";
                break;
        case T_SEQUENTIAL:
                dtype = "tape";
                break;
        case T_PRINTER:
                dtype = "printer";
                break;
        case T_PROCESSOR:
                dtype = "processor";
                break;
        case T_WORM:
                dtype = "worm";
                break;
        case T_CDROM:
                dtype = "cdrom";
                break;
        case T_SCANNER:
                dtype = "scanner";
                break;
        case T_OPTICAL:
                dtype = "optical";
                break;
        case T_CHANGER:
                dtype = "changer";
                break;
        case T_COMM:
                dtype = "communication";
                break;
        case T_IT8_1:
        case T_IT8_2:
                dtype = "graphic arts pre-press";
                break;
        case T_STORARRAY:
                dtype = "storage array";
                break;
        case T_ENCLOSURE:
                dtype = "enclosure services";
                break;
        case T_SIMPLE_DIRECT:
                dtype = "simplified direct";
                break;
        case T_OPTIC_CARD_RW:
                dtype = "optical card r/w";
                break;
        case T_OBJECT_STORED:
                dtype = "object-based storage";
                break;
        case T_NODEVICE:
                panic("scsipi_dtype: impossible device type");
        default:
                dtype = "unknown";
                break;
        }
        return (dtype);
}









































































































































































  689 









  689 
  689 

  688 
  680 

  688 

    4 

  688 


   98 
   17 


   81 
   98 
   90 

  599 
    4 

  595 


  688 










  688 





  648 
































   96 
  648 
   72 



    7 
   72 





   70 




   24 



   24 
   24 
















    1 
   23 



  644 



  643 




  640 
  580 




  636 
   18 





  635 

  376 
   81 




  572 


  308 






  200 



  199 





  376 


  679 















  268 
    1 









  580 


  579 


  578 


  576 


  575 
    1 

  576 
  235 
  236 







  580 











    4 




    3 


    3 


    3 











    6 


















    6 












   49 

   15 

   15 


   49 










   97 
  106 

   61 
   61 
   29 

   34 

   61 


















   10 










   10 







   10 




   10 


   10 
   10 





   10 




   10 



   10 






   10 










    9 
    9 







    9 

    9 











    4 









    1 











    4 
    9 









  113 






    7 
  113 

  113 
    2 
  113 
    2 
  113 
    9 

  104 




    7 
   65 










   96 





   61 
   29 
   97 
   26 
   97 
   78 
   21 
   78 
    8 
   97 
    1 
   97 
   14 
   96 






   97 


   42 







   20 
   18 

   22 



   73 









    5 











   36 





   36 




   36 





   14 


   10 


    1 


    6 


    1 


    1 


    2 




   35 






















    8 












  448 







   23 
    2 


    2 





   21 





    2 


   19 


    5 


    3 




   14 
    3 


    3 
    3 






   10 
  389 




  435 

  203 






















   29 


















  118 














  118 
   16 


  113 

  104 



  103 



  102 






   12 











  112 

   98 
   98 





    8 

   87 







   23 
    3 







  110 

    6 




  109 






   21 
    6 





    6 


    2 


   15 




   92 






  107 
    5 



  107 




   95 







    4 










   12 



    2 



    8 










  103 

   15 
  103 
   18 


    3 





   18 











  102 


















  103 












  170 








  166 
    9 

  158 


  167 


    4 
    3 




    1 




    4 


    2 


    1 
    1 



    1 










  166 




  165 
   44 
  162 
    7 


  165 
  167 












 1601 
 1598 

 1602 









 1598 







  487 


 1573 


  613 


  625 



 1597 









   35 





































































































   16 



























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
/*        $NetBSD: vfs_vnops.c,v 1.235 2022/08/06 21:21:10 riastradh Exp $        */

/*-
 * Copyright (c) 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vfs_vnops.c        8.14 (Berkeley) 6/15/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.235 2022/08/06 21:21:10 riastradh Exp $");

#include "veriexec.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/buf.h>
#include <sys/proc.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/vnode_impl.h>
#include <sys/ioctl.h>
#include <sys/tty.h>
#include <sys/poll.h>
#include <sys/kauth.h>
#include <sys/syslog.h>
#include <sys/fstrans.h>
#include <sys/atomic.h>
#include <sys/filedesc.h>
#include <sys/wapbl.h>
#include <sys/mman.h>

#include <miscfs/specfs/specdev.h>
#include <miscfs/fifofs/fifo.h>

#include <uvm/uvm_extern.h>
#include <uvm/uvm_readahead.h>
#include <uvm/uvm_device.h>

#ifdef UNION
#include <fs/union/union.h>
#endif

#ifndef COMPAT_ZERODEV
#define COMPAT_ZERODEV(dev)        (0)
#endif

int (*vn_union_readdir_hook)(struct vnode **, struct file *, struct lwp *);

#include <sys/verified_exec.h>

static int vn_read(file_t *fp, off_t *offset, struct uio *uio,
    kauth_cred_t cred, int flags);
static int vn_write(file_t *fp, off_t *offset, struct uio *uio,
    kauth_cred_t cred, int flags);
static int vn_closefile(file_t *fp);
static int vn_poll(file_t *fp, int events);
static int vn_fcntl(file_t *fp, u_int com, void *data);
static int vn_statfile(file_t *fp, struct stat *sb);
static int vn_ioctl(file_t *fp, u_long com, void *data);
static int vn_mmap(struct file *, off_t *, size_t, int, int *, int *,
    struct uvm_object **, int *);
static int vn_seek(struct file *, off_t, int, off_t *, int);

const struct fileops vnops = {
        .fo_name = "vn",
        .fo_read = vn_read,
        .fo_write = vn_write,
        .fo_ioctl = vn_ioctl,
        .fo_fcntl = vn_fcntl,
        .fo_poll = vn_poll,
        .fo_stat = vn_statfile,
        .fo_close = vn_closefile,
        .fo_kqfilter = vn_kqfilter,
        .fo_restart = fnullop_restart,
        .fo_mmap = vn_mmap,
        .fo_seek = vn_seek,
};

/*
 * Common code for vnode open operations.
 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
 *
 * at_dvp is the directory for openat(), if any.
 * pb is the path.
 * nmode is additional namei flags, restricted to TRYEMULROOT and NOCHROOT.
 * fmode is the open flags, converted from O_* to F*
 * cmode is the creation file permissions.
 *
 * XXX shouldn't cmode be mode_t?
 *
 * On success produces either a vnode in *ret_vp, or if that is NULL,
 * a file descriptor number in ret_fd.
 *
 * The caller may pass NULL for ret_fd (and ret_domove), in which case
 * EOPNOTSUPP will be produced in the cases that would otherwise return
 * a file descriptor.
 *
 * Note that callers that want no-follow behavior should pass
 * O_NOFOLLOW in fmode. Neither FOLLOW nor NOFOLLOW in nmode is
 * honored.
 */
int
vn_open(struct vnode *at_dvp, struct pathbuf *pb,
        int nmode, int fmode, int cmode,
        struct vnode **ret_vp, bool *ret_domove, int *ret_fd)
{
        struct nameidata nd;
        struct vnode *vp = NULL;
        struct lwp *l = curlwp;
        kauth_cred_t cred = l->l_cred;
        struct vattr va;
        int error;
        const char *pathstring;

        KASSERT((nmode & (TRYEMULROOT | NOCHROOT)) == nmode);

        KASSERT(ret_vp != NULL);
        KASSERT((ret_domove == NULL) == (ret_fd == NULL));

        if ((fmode & (O_CREAT | O_DIRECTORY)) == (O_CREAT | O_DIRECTORY))
                return EINVAL;

        NDINIT(&nd, LOOKUP, nmode, pb);
        if (at_dvp != NULL)
                NDAT(&nd, at_dvp);

        nd.ni_cnd.cn_flags &= TRYEMULROOT | NOCHROOT;

        if (fmode & O_CREAT) {
                nd.ni_cnd.cn_nameiop = CREATE;
                nd.ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF;
                if ((fmode & O_EXCL) == 0 &&
                    ((fmode & O_NOFOLLOW) == 0))
                        nd.ni_cnd.cn_flags |= FOLLOW;
                if ((fmode & O_EXCL) == 0)
                        nd.ni_cnd.cn_flags |= NONEXCLHACK;
        } else {
                nd.ni_cnd.cn_nameiop = LOOKUP;
                nd.ni_cnd.cn_flags |= LOCKLEAF;
                if ((fmode & O_NOFOLLOW) == 0)
                        nd.ni_cnd.cn_flags |= FOLLOW;
        }

        pathstring = pathbuf_stringcopy_get(nd.ni_pathbuf);
        if (pathstring == NULL) {
                return ENOMEM;
        }

        /*
         * When this "interface" was exposed to do_open() it used
         * to initialize l_dupfd to -newfd-1 (thus passing in the
         * new file handle number to use)... but nothing in the
         * kernel uses that value. So just send 0.
         */
        l->l_dupfd = 0;

        error = namei(&nd);
        if (error)
                goto out;

        vp = nd.ni_vp;

#if NVERIEXEC > 0
        error = veriexec_openchk(l, nd.ni_vp, pathstring, fmode);
        if (error) {
                /* We have to release the locks ourselves */
                /*
                 * 20210604 dholland passing NONEXCLHACK means we can
                 * get ni_dvp == NULL back if ni_vp exists, and we should
                 * treat that like the non-O_CREAT case.
                 */
                if ((fmode & O_CREAT) != 0 && nd.ni_dvp != NULL) {
                        if (vp == NULL) {
                                vput(nd.ni_dvp);
                        } else {
                                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                                if (nd.ni_dvp == nd.ni_vp)
                                        vrele(nd.ni_dvp);
                                else
                                        vput(nd.ni_dvp);
                                nd.ni_dvp = NULL;
                                vput(vp);
                        }
                } else {
                        vput(vp);
                }
                goto out;
        }
#endif /* NVERIEXEC > 0 */

        /*
         * 20210604 dholland ditto
         */
        if ((fmode & O_CREAT) != 0 && nd.ni_dvp != NULL) {
                if (nd.ni_vp == NULL) {
                        vattr_null(&va);
                        va.va_type = VREG;
                        va.va_mode = cmode;
                        if (fmode & O_EXCL)
                                 va.va_vaflags |= VA_EXCLUSIVE;
                        error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp,
                                           &nd.ni_cnd, &va);
                        if (error) {
                                vput(nd.ni_dvp);
                                goto out;
                        }
                        fmode &= ~O_TRUNC;
                        vp = nd.ni_vp;
                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                        vput(nd.ni_dvp);
                } else {
                        VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                        if (nd.ni_dvp == nd.ni_vp)
                                vrele(nd.ni_dvp);
                        else
                                vput(nd.ni_dvp);
                        nd.ni_dvp = NULL;
                        vp = nd.ni_vp;
                        if (fmode & O_EXCL) {
                                error = EEXIST;
                                goto bad;
                        }
                        fmode &= ~O_CREAT;
                }
        } else if ((fmode & O_CREAT) != 0) {
                /*
                 * 20210606 dholland passing NONEXCLHACK means this
                 * case exists; it is the same as the following one
                 * but also needs to do things in the second (exists)
                 * half of the following block. (Besides handle
                 * ni_dvp, anyway.)
                 */
                vp = nd.ni_vp;
                KASSERT((fmode & O_EXCL) == 0);
                fmode &= ~O_CREAT;
        } else {
                vp = nd.ni_vp;
        }
        if (vp->v_type == VSOCK) {
                error = EOPNOTSUPP;
                goto bad;
        }
        if (nd.ni_vp->v_type == VLNK) {
                error = EFTYPE;
                goto bad;
        }

        if ((fmode & O_CREAT) == 0) {
                error = vn_openchk(vp, cred, fmode);
                if (error != 0)
                        goto bad;
        }

        if (fmode & O_TRUNC) {
                vattr_null(&va);
                va.va_size = 0;
                error = VOP_SETATTR(vp, &va, cred);
                if (error != 0)
                        goto bad;
        }
        if ((error = VOP_OPEN(vp, fmode, cred)) != 0)
                goto bad;
        if (fmode & FWRITE) {
                mutex_enter(vp->v_interlock);
                vp->v_writecount++;
                mutex_exit(vp->v_interlock);
        }

bad:
        if (error)
                vput(vp);
out:
        pathbuf_stringcopy_put(nd.ni_pathbuf, pathstring);

        switch (error) {
        case EDUPFD:
        case EMOVEFD:
                /* if the caller isn't prepared to handle fds, fail for them */
                if (ret_fd == NULL) {
                        error = EOPNOTSUPP;
                        break;
                }
                *ret_vp = NULL;
                *ret_domove = error == EMOVEFD;
                *ret_fd = l->l_dupfd;
                error = 0;
                break;
        case 0:
                *ret_vp = vp;
                break;
        }
        l->l_dupfd = 0;
        return error;
}

/*
 * Check for write permissions on the specified vnode.
 * Prototype text segments cannot be written.
 */
int
vn_writechk(struct vnode *vp)
{

        /*
         * If the vnode is in use as a process's text,
         * we can't allow writing.
         */
        if (vp->v_iflag & VI_TEXT)
                return ETXTBSY;
        return 0;
}

int
vn_openchk(struct vnode *vp, kauth_cred_t cred, int fflags)
{
        int permbits = 0;
        int error;

        if (vp->v_type == VNON || vp->v_type == VBAD)
                return ENXIO;

        if ((fflags & O_DIRECTORY) != 0 && vp->v_type != VDIR)
                return ENOTDIR;

        if ((fflags & O_REGULAR) != 0 && vp->v_type != VREG)
                return EFTYPE;

        if ((fflags & FREAD) != 0) {
                permbits = VREAD;
        }
        if ((fflags & FEXEC) != 0) {
                permbits |= VEXEC;
        }
        if ((fflags & (FWRITE | O_TRUNC)) != 0) {
                permbits |= VWRITE;
                if (vp->v_type == VDIR) {
                        error = EISDIR;
                        goto bad;
                }
                error = vn_writechk(vp);
                if (error != 0)
                        goto bad;
        }
        error = VOP_ACCESS(vp, permbits, cred);
bad:
        return error;
}

/*
 * Mark a vnode as having executable mappings.
 */
void
vn_markexec(struct vnode *vp)
{

        if ((vp->v_iflag & VI_EXECMAP) != 0) {
                /* Safe unlocked, as long as caller holds a reference. */
                return;
        }

        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
        mutex_enter(vp->v_interlock);
        if ((vp->v_iflag & VI_EXECMAP) == 0) {
                cpu_count(CPU_COUNT_EXECPAGES, vp->v_uobj.uo_npages);
                vp->v_iflag |= VI_EXECMAP;
        }
        mutex_exit(vp->v_interlock);
        rw_exit(vp->v_uobj.vmobjlock);
}

/*
 * Mark a vnode as being the text of a process.
 * Fail if the vnode is currently writable.
 */
int
vn_marktext(struct vnode *vp)
{

        if ((vp->v_iflag & (VI_TEXT|VI_EXECMAP)) == (VI_TEXT|VI_EXECMAP)) {
                /* Safe unlocked, as long as caller holds a reference. */
                return 0;
        }

        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
        mutex_enter(vp->v_interlock);
        if (vp->v_writecount != 0) {
                KASSERT((vp->v_iflag & VI_TEXT) == 0);
                mutex_exit(vp->v_interlock);
                rw_exit(vp->v_uobj.vmobjlock);
                return ETXTBSY;
        }
        if ((vp->v_iflag & VI_EXECMAP) == 0) {
                cpu_count(CPU_COUNT_EXECPAGES, vp->v_uobj.uo_npages);
        }
        vp->v_iflag |= (VI_TEXT | VI_EXECMAP);
        mutex_exit(vp->v_interlock);
        rw_exit(vp->v_uobj.vmobjlock);
        return 0;
}

/*
 * Vnode close call
 *
 * Note: takes an unlocked vnode, while VOP_CLOSE takes a locked node.
 */
int
vn_close(struct vnode *vp, int flags, kauth_cred_t cred)
{
        int error;

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        if (flags & FWRITE) {
                mutex_enter(vp->v_interlock);
                KASSERT(vp->v_writecount > 0);
                vp->v_writecount--;
                mutex_exit(vp->v_interlock);
        }
        error = VOP_CLOSE(vp, flags, cred);
        vput(vp);
        return error;
}

static int
enforce_rlimit_fsize(struct vnode *vp, struct uio *uio, int ioflag)
{
        struct lwp *l = curlwp;
        off_t testoff;

        if (uio->uio_rw != UIO_WRITE || vp->v_type != VREG)
                return 0;

        KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
        if (ioflag & IO_APPEND)
                testoff = vp->v_size;
        else
                testoff = uio->uio_offset;

        if (testoff + uio->uio_resid >
            l->l_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
                mutex_enter(&proc_lock);
                psignal(l->l_proc, SIGXFSZ);
                mutex_exit(&proc_lock);
                return EFBIG;
        }

        return 0;
}

/*
 * Package up an I/O request on a vnode into a uio and do it.
 */
int
vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset,
    enum uio_seg segflg, int ioflg, kauth_cred_t cred, size_t *aresid,
    struct lwp *l)
{
        struct uio auio;
        struct iovec aiov;
        int error;

        if ((ioflg & IO_NODELOCKED) == 0) {
                if (rw == UIO_READ) {
                        vn_lock(vp, LK_SHARED | LK_RETRY);
                } else /* UIO_WRITE */ {
                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                }
        }
        auio.uio_iov = &aiov;
        auio.uio_iovcnt = 1;
        aiov.iov_base = base;
        aiov.iov_len = len;
        auio.uio_resid = len;
        auio.uio_offset = offset;
        auio.uio_rw = rw;
        if (segflg == UIO_SYSSPACE) {
                UIO_SETUP_SYSSPACE(&auio);
        } else {
                auio.uio_vmspace = l->l_proc->p_vmspace;
        }

        if ((error = enforce_rlimit_fsize(vp, &auio, ioflg)) != 0)
                goto out;

        if (rw == UIO_READ) {
                error = VOP_READ(vp, &auio, ioflg, cred);
        } else {
                error = VOP_WRITE(vp, &auio, ioflg, cred);
        }

        if (aresid)
                *aresid = auio.uio_resid;
        else
                if (auio.uio_resid && error == 0)
                        error = EIO;

 out:
        if ((ioflg & IO_NODELOCKED) == 0) {
                VOP_UNLOCK(vp);
        }
        return error;
}

int
vn_readdir(file_t *fp, char *bf, int segflg, u_int count, int *done,
    struct lwp *l, off_t **cookies, int *ncookies)
{
        struct vnode *vp = fp->f_vnode;
        struct iovec aiov;
        struct uio auio;
        int error, eofflag;

        /* Limit the size on any kernel buffers used by VOP_READDIR */
        count = uimin(MAXBSIZE, count);

unionread:
        if (vp->v_type != VDIR)
                return EINVAL;
        aiov.iov_base = bf;
        aiov.iov_len = count;
        auio.uio_iov = &aiov;
        auio.uio_iovcnt = 1;
        auio.uio_rw = UIO_READ;
        if (segflg == UIO_SYSSPACE) {
                UIO_SETUP_SYSSPACE(&auio);
        } else {
                KASSERT(l == curlwp);
                auio.uio_vmspace = l->l_proc->p_vmspace;
        }
        auio.uio_resid = count;
        vn_lock(vp, LK_SHARED | LK_RETRY);
        auio.uio_offset = fp->f_offset;
        error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, cookies,
                    ncookies);
        mutex_enter(&fp->f_lock);
        fp->f_offset = auio.uio_offset;
        mutex_exit(&fp->f_lock);
        VOP_UNLOCK(vp);
        if (error)
                return error;

        if (count == auio.uio_resid && vn_union_readdir_hook) {
                struct vnode *ovp = vp;

                error = (*vn_union_readdir_hook)(&vp, fp, l);
                if (error)
                        return error;
                if (vp != ovp)
                        goto unionread;
        }

        if (count == auio.uio_resid && (vp->v_vflag & VV_ROOT) &&
            (vp->v_mount->mnt_flag & MNT_UNION)) {
                struct vnode *tvp = vp;
                vp = vp->v_mount->mnt_vnodecovered;
                vref(vp);
                mutex_enter(&fp->f_lock);
                fp->f_vnode = vp;
                fp->f_offset = 0;
                mutex_exit(&fp->f_lock);
                vrele(tvp);
                goto unionread;
        }
        *done = count - auio.uio_resid;
        return error;
}

/*
 * File table vnode read routine.
 */
static int
vn_read(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
    int flags)
{
        struct vnode *vp = fp->f_vnode;
        int error, ioflag, fflag;
        size_t count;

        ioflag = IO_ADV_ENCODE(fp->f_advice);
        fflag = fp->f_flag;
        if (fflag & FNONBLOCK)
                ioflag |= IO_NDELAY;
        if ((fflag & (FFSYNC | FRSYNC)) == (FFSYNC | FRSYNC))
                ioflag |= IO_SYNC;
        if (fflag & FALTIO)
                ioflag |= IO_ALTSEMANTICS;
        if (fflag & FDIRECT)
                ioflag |= IO_DIRECT;
        if (offset == &fp->f_offset && (flags & FOF_UPDATE_OFFSET) != 0)
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        else
                vn_lock(vp, LK_SHARED | LK_RETRY);
        uio->uio_offset = *offset;
        count = uio->uio_resid;
        error = VOP_READ(vp, uio, ioflag, cred);
        if (flags & FOF_UPDATE_OFFSET)
                *offset += count - uio->uio_resid;
        VOP_UNLOCK(vp);
        return error;
}

/*
 * File table vnode write routine.
 */
static int
vn_write(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
    int flags)
{
        struct vnode *vp = fp->f_vnode;
        int error, ioflag, fflag;
        size_t count;

        ioflag = IO_ADV_ENCODE(fp->f_advice) | IO_UNIT;
        fflag = fp->f_flag;
        if (vp->v_type == VREG && (fflag & O_APPEND))
                ioflag |= IO_APPEND;
        if (fflag & FNONBLOCK)
                ioflag |= IO_NDELAY;
        if (fflag & FFSYNC ||
            (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
                ioflag |= IO_SYNC;
        else if (fflag & FDSYNC)
                ioflag |= IO_DSYNC;
        if (fflag & FALTIO)
                ioflag |= IO_ALTSEMANTICS;
        if (fflag & FDIRECT)
                ioflag |= IO_DIRECT;
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        uio->uio_offset = *offset;
        count = uio->uio_resid;

        if ((error = enforce_rlimit_fsize(vp, uio, ioflag)) != 0)
                goto out;

        error = VOP_WRITE(vp, uio, ioflag, cred);

        if (flags & FOF_UPDATE_OFFSET) {
                if (ioflag & IO_APPEND) {
                        /*
                         * SUSv3 describes behaviour for count = 0 as following:
                         * "Before any action ... is taken, and if nbyte is zero
                         * and the file is a regular file, the write() function
                         * ... in the absence of errors ... shall return zero
                         * and have no other results."
                         */ 
                        if (count)
                                *offset = uio->uio_offset;
                } else
                        *offset += count - uio->uio_resid;
        }

 out:
        VOP_UNLOCK(vp);
        return error;
}

/*
 * File table vnode stat routine.
 */
static int
vn_statfile(file_t *fp, struct stat *sb)
{
        struct vnode *vp = fp->f_vnode;
        int error;

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        error = vn_stat(vp, sb);
        VOP_UNLOCK(vp);
        return error;
}

int
vn_stat(struct vnode *vp, struct stat *sb)
{
        struct vattr va;
        int error;
        mode_t mode;

        memset(&va, 0, sizeof(va));
        error = VOP_GETATTR(vp, &va, kauth_cred_get());
        if (error)
                return error;
        /*
         * Copy from vattr table
         */
        memset(sb, 0, sizeof(*sb));
        sb->st_dev = va.va_fsid;
        sb->st_ino = va.va_fileid;
        mode = va.va_mode;
        switch (vp->v_type) {
        case VREG:
                mode |= S_IFREG;
                break;
        case VDIR:
                mode |= S_IFDIR;
                break;
        case VBLK:
                mode |= S_IFBLK;
                break;
        case VCHR:
                mode |= S_IFCHR;
                break;
        case VLNK:
                mode |= S_IFLNK;
                break;
        case VSOCK:
                mode |= S_IFSOCK;
                break;
        case VFIFO:
                mode |= S_IFIFO;
                break;
        default:
                return EBADF;
        }
        sb->st_mode = mode;
        sb->st_nlink = va.va_nlink;
        sb->st_uid = va.va_uid;
        sb->st_gid = va.va_gid;
        sb->st_rdev = va.va_rdev;
        sb->st_size = va.va_size;
        sb->st_atimespec = va.va_atime;
        sb->st_mtimespec = va.va_mtime;
        sb->st_ctimespec = va.va_ctime;
        sb->st_birthtimespec = va.va_birthtime;
        sb->st_blksize = va.va_blocksize;
        sb->st_flags = va.va_flags;
        sb->st_gen = 0;
        sb->st_blocks = va.va_bytes / S_BLKSIZE;
        return 0;
}

/*
 * File table vnode fcntl routine.
 */
static int
vn_fcntl(file_t *fp, u_int com, void *data)
{
        struct vnode *vp = fp->f_vnode;
        int error;

        error = VOP_FCNTL(vp, com, data, fp->f_flag, kauth_cred_get());
        return error;
}

/*
 * File table vnode ioctl routine.
 */
static int
vn_ioctl(file_t *fp, u_long com, void *data)
{
        struct vnode *vp = fp->f_vnode, *ovp;
        struct vattr vattr;
        int error;

        switch (vp->v_type) {

        case VREG:
        case VDIR:
                if (com == FIONREAD) {
                        vn_lock(vp, LK_SHARED | LK_RETRY);
                        error = VOP_GETATTR(vp, &vattr, kauth_cred_get());
                        if (error == 0)
                                *(int *)data = vattr.va_size - fp->f_offset;
                        VOP_UNLOCK(vp);
                        if (error)
                                return error;
                        return 0;
                }
                if ((com == FIONWRITE) || (com == FIONSPACE)) {
                        /*
                         * Files don't have send queues, so there never
                         * are any bytes in them, nor is there any
                         * open space in them.
                         */
                        *(int *)data = 0;
                        return 0;
                }
                if (com == FIOGETBMAP) {
                        daddr_t *block;

                        if (*(daddr_t *)data < 0)
                                return EINVAL;
                        block = (daddr_t *)data;
                        vn_lock(vp, LK_SHARED | LK_RETRY);
                        error = VOP_BMAP(vp, *block, NULL, block, NULL);
                        VOP_UNLOCK(vp);
                        return error;
                }
                if (com == OFIOGETBMAP) {
                        daddr_t ibn, obn;

                        if (*(int32_t *)data < 0)
                                return EINVAL;
                        ibn = (daddr_t)*(int32_t *)data;
                        vn_lock(vp, LK_SHARED | LK_RETRY);
                        error = VOP_BMAP(vp, ibn, NULL, &obn, NULL);
                        VOP_UNLOCK(vp);
                        *(int32_t *)data = (int32_t)obn;
                        return error;
                }
                if (com == FIONBIO || com == FIOASYNC)        /* XXX */
                        return 0;                        /* XXX */
                /* FALLTHROUGH */
        case VFIFO:
        case VCHR:
        case VBLK:
                error = VOP_IOCTL(vp, com, data, fp->f_flag,
                    kauth_cred_get());
                if (error == 0 && com == TIOCSCTTY) {
                        vref(vp);
                        mutex_enter(&proc_lock);
                        ovp = curproc->p_session->s_ttyvp;
                        curproc->p_session->s_ttyvp = vp;
                        mutex_exit(&proc_lock);
                        if (ovp != NULL)
                                vrele(ovp);
                }
                return error;

        default:
                return EPASSTHROUGH;
        }
}

/*
 * File table vnode poll routine.
 */
static int
vn_poll(file_t *fp, int events)
{

        return VOP_POLL(fp->f_vnode, events);
}

/*
 * File table vnode kqfilter routine.
 */
int
vn_kqfilter(file_t *fp, struct knote *kn)
{

        return VOP_KQFILTER(fp->f_vnode, kn);
}

static int
vn_mmap(struct file *fp, off_t *offp, size_t size, int prot, int *flagsp,
    int *advicep, struct uvm_object **uobjp, int *maxprotp)
{
        struct uvm_object *uobj;
        struct vnode *vp;
        struct vattr va;
        struct lwp *l;
        vm_prot_t maxprot;
        off_t off;
        int error, flags;
        bool needwritemap;

        l = curlwp;

        off = *offp;
        flags = *flagsp;
        maxprot = VM_PROT_EXECUTE;

        KASSERT(size > 0);

        vp = fp->f_vnode;
        if (vp->v_type != VREG && vp->v_type != VCHR &&
            vp->v_type != VBLK) {
                /* only REG/CHR/BLK support mmap */
                return ENODEV;
        }
        if (vp->v_type != VCHR && off < 0) {
                return EINVAL;
        }
#if SIZE_MAX > UINT32_MAX        /* XXX -Wtype-limits */
        if (vp->v_type != VCHR && size > __type_max(off_t)) {
                return EOVERFLOW;
        }
#endif
        if (vp->v_type != VCHR && off > __type_max(off_t) - size) {
                /* no offset wrapping */
                return EOVERFLOW;
        }

        /* special case: catch SunOS style /dev/zero */
        if (vp->v_type == VCHR &&
            (vp->v_rdev == zerodev || COMPAT_ZERODEV(vp->v_rdev))) {
                *uobjp = NULL;
                *maxprotp = VM_PROT_ALL;
                return 0;
        }

        /*
         * Old programs may not select a specific sharing type, so
         * default to an appropriate one.
         *
         * XXX: how does MAP_ANON fit in the picture?
         */
        if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) {
#if defined(DEBUG)
                struct proc *p = l->l_proc;
                printf("WARNING: defaulted mmap() share type to "
                       "%s (pid %d command %s)\n", vp->v_type == VCHR ?
                       "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
                       p->p_comm);
#endif
                if (vp->v_type == VCHR)
                        flags |= MAP_SHARED;        /* for a device */
                else
                        flags |= MAP_PRIVATE;        /* for a file */
        }

        /*
         * MAP_PRIVATE device mappings don't make sense (and aren't
         * supported anyway).  However, some programs rely on this,
         * so just change it to MAP_SHARED.
         */
        if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
                flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
        }

        /*
         * now check protection
         */

        /* check read access */
        if (fp->f_flag & FREAD)
                maxprot |= VM_PROT_READ;
        else if (prot & PROT_READ) {
                return EACCES;
        }

        /* check write access, shared case first */
        if (flags & MAP_SHARED) {
                /*
                 * if the file is writable, only add PROT_WRITE to
                 * maxprot if the file is not immutable, append-only.
                 * otherwise, if we have asked for PROT_WRITE, return
                 * EPERM.
                 */
                if (fp->f_flag & FWRITE) {
                        vn_lock(vp, LK_SHARED | LK_RETRY);
                        error = VOP_GETATTR(vp, &va, l->l_cred);
                        VOP_UNLOCK(vp);
                        if (error) {
                                return error;
                        }
                        if ((va.va_flags &
                             (SF_SNAPSHOT|IMMUTABLE|APPEND)) == 0)
                                maxprot |= VM_PROT_WRITE;
                        else if (prot & PROT_WRITE) {
                                return EPERM;
                        }
                } else if (prot & PROT_WRITE) {
                        return EACCES;
                }
        } else {
                /* MAP_PRIVATE mappings can always write to */
                maxprot |= VM_PROT_WRITE;
        }

        /*
         * Don't allow mmap for EXEC if the file system
         * is mounted NOEXEC.
         */
        if ((prot & PROT_EXEC) != 0 &&
            (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0) {
                return EACCES;
        }

        if (vp->v_type != VCHR) {
                error = VOP_MMAP(vp, prot, curlwp->l_cred);
                if (error) {
                        return error;
                }
                vref(vp);
                uobj = &vp->v_uobj;

                /*
                 * If the vnode is being mapped with PROT_EXEC,
                 * then mark it as text.
                 */
                if (prot & PROT_EXEC) {
                        vn_markexec(vp);
                }
        } else {
                int i = maxprot;

                /*
                 * XXX Some devices don't like to be mapped with
                 * XXX PROT_EXEC or PROT_WRITE, but we don't really
                 * XXX have a better way of handling this, right now
                 */
                do {
                        uobj = udv_attach(vp->v_rdev,
                                          (flags & MAP_SHARED) ? i :
                                          (i & ~VM_PROT_WRITE), off, size);
                        i--;
                } while ((uobj == NULL) && (i > 0));
                if (uobj == NULL) {
                        return EINVAL;
                }
                *advicep = UVM_ADV_RANDOM;
        }

        /*
         * Set vnode flags to indicate the new kinds of mapping.
         * We take the vnode lock in exclusive mode here to serialize
         * with direct I/O.
         *
         * Safe to check for these flag values without a lock, as
         * long as a reference to the vnode is held.
         */
        needwritemap = (vp->v_iflag & VI_WRMAP) == 0 &&
                (flags & MAP_SHARED) != 0 &&
                (maxprot & VM_PROT_WRITE) != 0;
        if ((vp->v_vflag & VV_MAPPED) == 0 || needwritemap) {
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                vp->v_vflag |= VV_MAPPED;
                if (needwritemap) {
                        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
                        mutex_enter(vp->v_interlock);
                        vp->v_iflag |= VI_WRMAP;
                        mutex_exit(vp->v_interlock);
                        rw_exit(vp->v_uobj.vmobjlock);
                }
                VOP_UNLOCK(vp);
        }

#if NVERIEXEC > 0

        /*
         * Check if the file can be executed indirectly.
         *
         * XXX: This gives false warnings about "Incorrect access type"
         * XXX: if the mapping is not executable. Harmless, but will be
         * XXX: fixed as part of other changes.
         */
        if (veriexec_verify(l, vp, "(mmap)", VERIEXEC_INDIRECT,
                            NULL)) {

                /*
                 * Don't allow executable mappings if we can't
                 * indirectly execute the file.
                 */
                if (prot & VM_PROT_EXECUTE) {
                        return EPERM;
                }

                /*
                 * Strip the executable bit from 'maxprot' to make sure
                 * it can't be made executable later.
                 */
                maxprot &= ~VM_PROT_EXECUTE;
        }
#endif /* NVERIEXEC > 0 */

        *uobjp = uobj;
        *maxprotp = maxprot;
        *flagsp = flags;

        return 0;
}

static int
vn_seek(struct file *fp, off_t delta, int whence, off_t *newoffp,
    int flags)
{
        const off_t OFF_MIN = __type_min(off_t);
        const off_t OFF_MAX = __type_max(off_t);
        kauth_cred_t cred = fp->f_cred;
        off_t oldoff, newoff;
        struct vnode *vp = fp->f_vnode;
        struct vattr vattr;
        int error;

        if (vp->v_type == VFIFO)
                return ESPIPE;

        if (flags & FOF_UPDATE_OFFSET)
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        else
                vn_lock(vp, LK_SHARED | LK_RETRY);

        /* Compute the old and new offsets.  */
        oldoff = fp->f_offset;
        switch (whence) {
        case SEEK_CUR:
                if (delta > 0) {
                        if (oldoff > 0 && delta > OFF_MAX - oldoff) {
                                newoff = OFF_MAX;
                                break;
                        }
                } else {
                        if (oldoff < 0 && delta < OFF_MIN - oldoff) {
                                newoff = OFF_MIN;
                                break;
                        }
                }
                newoff = oldoff + delta;
                break;
        case SEEK_END:
                error = VOP_GETATTR(vp, &vattr, cred);
                if (error)
                        goto out;
                if (vattr.va_size > OFF_MAX ||
                    delta > OFF_MAX - (off_t)vattr.va_size) {
                        newoff = OFF_MAX;
                        break;
                }
                newoff = delta + vattr.va_size;
                break;
        case SEEK_SET:
                newoff = delta;
                break;
        default:
                error = EINVAL;
                goto out;
        }

        /* Pass the proposed change to the file system to audit.  */
        error = VOP_SEEK(vp, oldoff, newoff, cred);
        if (error)
                goto out;

        /* Success!  */
        if (newoffp)
                *newoffp = newoff;
        if (flags & FOF_UPDATE_OFFSET)
                fp->f_offset = newoff;
        error = 0;

out:        VOP_UNLOCK(vp);
        return error;
}

/*
 * Check that the vnode is still valid, and if so
 * acquire requested lock.
 */
int
vn_lock(struct vnode *vp, int flags)
{
        struct lwp *l;
        int error;

        KASSERT(vrefcnt(vp) > 0);
        KASSERT((flags & ~(LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT|LK_RETRY|
            LK_UPGRADE|LK_DOWNGRADE)) == 0);
        KASSERT((flags & LK_NOWAIT) != 0 || !mutex_owned(vp->v_interlock));

#ifdef DIAGNOSTIC
        if (wapbl_vphaswapbl(vp))
                WAPBL_JUNLOCK_ASSERT(wapbl_vptomp(vp));
#endif

        /* Get a more useful report for lockstat. */
        l = curlwp;
        KASSERT(l->l_rwcallsite == 0);
        l->l_rwcallsite = (uintptr_t)__builtin_return_address(0);        

        error = VOP_LOCK(vp, flags);

        l->l_rwcallsite = 0;

        switch (flags & (LK_RETRY | LK_NOWAIT)) {
        case 0:
                KASSERT(error == 0 || error == ENOENT);
                break;
        case LK_RETRY:
                KASSERT(error == 0);
                break;
        case LK_NOWAIT:
                KASSERT(error == 0 || error == EBUSY || error == ENOENT);
                break;
        case LK_RETRY | LK_NOWAIT:
                KASSERT(error == 0 || error == EBUSY);
                break;
        }

        return error;
}

/*
 * File table vnode close routine.
 */
static int
vn_closefile(file_t *fp)
{

        return vn_close(fp->f_vnode, fp->f_flag, fp->f_cred);
}

/*
 * Simplified in-kernel wrapper calls for extended attribute access.
 * Both calls pass in a NULL credential, authorizing a "kernel" access.
 * Set IO_NODELOCKED in ioflg if the vnode is already locked.
 */
int
vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace,
    const char *attrname, size_t *buflen, void *bf, struct lwp *l)
{
        struct uio auio;
        struct iovec aiov;
        int error;

        aiov.iov_len = *buflen;
        aiov.iov_base = bf;

        auio.uio_iov = &aiov;
        auio.uio_iovcnt = 1;
        auio.uio_rw = UIO_READ;
        auio.uio_offset = 0;
        auio.uio_resid = *buflen;
        UIO_SETUP_SYSSPACE(&auio);

        if ((ioflg & IO_NODELOCKED) == 0)
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);

        error = VOP_GETEXTATTR(vp, attrnamespace, attrname, &auio, NULL,
            NOCRED);

        if ((ioflg & IO_NODELOCKED) == 0)
                VOP_UNLOCK(vp);

        if (error == 0)
                *buflen = *buflen - auio.uio_resid;

        return error;
}

/*
 * XXX Failure mode if partially written?
 */
int
vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace,
    const char *attrname, size_t buflen, const void *bf, struct lwp *l)
{
        struct uio auio;
        struct iovec aiov;
        int error;

        aiov.iov_len = buflen;
        aiov.iov_base = __UNCONST(bf);                /* XXXUNCONST kills const */

        auio.uio_iov = &aiov;
        auio.uio_iovcnt = 1;
        auio.uio_rw = UIO_WRITE;
        auio.uio_offset = 0;
        auio.uio_resid = buflen;
        UIO_SETUP_SYSSPACE(&auio);

        if ((ioflg & IO_NODELOCKED) == 0) {
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        }

        error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, NOCRED);

        if ((ioflg & IO_NODELOCKED) == 0) {
                VOP_UNLOCK(vp);
        }

        return error;
}

int
vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
    const char *attrname, struct lwp *l)
{
        int error;

        if ((ioflg & IO_NODELOCKED) == 0) {
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        }

        error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, NOCRED);
        if (error == EOPNOTSUPP)
                error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
                    NOCRED);

        if ((ioflg & IO_NODELOCKED) == 0) {
                VOP_UNLOCK(vp);
        }

        return error;
}

int
vn_fifo_bypass(void *v)
{
        struct vop_generic_args *ap = v;

        return VOCALL(fifo_vnodeop_p, ap->a_desc->vdesc_offset, v);
}

/*
 * Open block device by device number
 */
int
vn_bdev_open(dev_t dev, struct vnode **vpp, struct lwp *l)
{
        int     error;

        if ((error = bdevvp(dev, vpp)) != 0)
                return error;

        vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
        if ((error = VOP_OPEN(*vpp, FREAD | FWRITE, l->l_cred)) != 0) {
                vput(*vpp);
                return error;
        }
        mutex_enter((*vpp)->v_interlock);
        (*vpp)->v_writecount++;
        mutex_exit((*vpp)->v_interlock);
        VOP_UNLOCK(*vpp);

        return 0;
}

/*
 * Lookup the provided name in the filesystem.  If the file exists,
 * is a valid block device, and isn't being used by anyone else,
 * set *vpp to the file's vnode.
 */
int
vn_bdev_openpath(struct pathbuf *pb, struct vnode **vpp, struct lwp *l)
{
        struct vnode *vp;
        dev_t dev;
        enum vtype vt;
        int     error;

        error = vn_open(NULL, pb, 0, FREAD | FWRITE, 0, &vp, NULL, NULL);
        if (error != 0)
                return error;

        dev = vp->v_rdev;
        vt = vp->v_type;

        VOP_UNLOCK(vp);
        (void) vn_close(vp, FREAD | FWRITE, l->l_cred);

        if (vt != VBLK)
                return ENOTBLK;

        return vn_bdev_open(dev, vpp, l);
}

static long
vn_knote_to_interest(const struct knote *kn)
{
        switch (kn->kn_filter) {
        case EVFILT_READ:
                /*
                 * Writing to the file or changing its attributes can
                 * set the file size, which impacts the readability
                 * filter.
                 *
                 * (No need to set NOTE_EXTEND here; it's only ever
                 * send with other hints; see vnode_if.c.)
                 */
                return NOTE_WRITE | NOTE_ATTRIB;

        case EVFILT_VNODE:
                return kn->kn_sfflags;

        case EVFILT_WRITE:
        default:
                return 0;
        }
}

void
vn_knote_attach(struct vnode *vp, struct knote *kn)
{
        struct vnode_klist *vk = vp->v_klist;
        long interest = 0;

        /*
         * In the case of layered / stacked file systems, knotes
         * should only ever be associated with the base vnode.
         */
        KASSERT(kn->kn_hook == vp);
        KASSERT(vp->v_klist == &VNODE_TO_VIMPL(vp)->vi_klist);

        /*
         * We maintain a bitmask of the kevents that there is interest in,
         * to minimize the impact of having watchers.  It's silly to have
         * to traverse vn_klist every time a read or write happens simply
         * because there is someone interested in knowing when the file
         * is deleted, for example.
         */

        mutex_enter(vp->v_interlock);
        SLIST_INSERT_HEAD(&vk->vk_klist, kn, kn_selnext);
        SLIST_FOREACH(kn, &vk->vk_klist, kn_selnext) {
                interest |= vn_knote_to_interest(kn);
        }
        vk->vk_interest = interest;
        mutex_exit(vp->v_interlock);
}

void
vn_knote_detach(struct vnode *vp, struct knote *kn)
{
        struct vnode_klist *vk = vp->v_klist;
        long interest = 0;

        /* See above. */
        KASSERT(kn->kn_hook == vp);
        KASSERT(vp->v_klist == &VNODE_TO_VIMPL(vp)->vi_klist);

        /*
         * We special case removing the head of the list, because:
         *
         * 1. It's extremely likely that we're detaching the only
         *    knote.
         *
         * 2. We're already traversing the whole list, so we don't
         *    want to use the generic SLIST_REMOVE() which would
         *    traverse it *again*.
         */

        mutex_enter(vp->v_interlock);
        if (__predict_true(kn == SLIST_FIRST(&vk->vk_klist))) {
                SLIST_REMOVE_HEAD(&vk->vk_klist, kn_selnext);
                SLIST_FOREACH(kn, &vk->vk_klist, kn_selnext) {
                        interest |= vn_knote_to_interest(kn);
                }
                vk->vk_interest = interest;
        } else {
                struct knote *thiskn, *nextkn, *prevkn = NULL;

                SLIST_FOREACH_SAFE(thiskn, &vk->vk_klist, kn_selnext, nextkn) {
                        if (thiskn == kn) {
                                KASSERT(kn != NULL);
                                KASSERT(prevkn != NULL);
                                SLIST_REMOVE_AFTER(prevkn, kn_selnext);
                                kn = NULL;
                        } else {
                                interest |= vn_knote_to_interest(thiskn);
                                prevkn = thiskn;
                        }
                }
                vk->vk_interest = interest;
        }
        mutex_exit(vp->v_interlock);
}






















































































































    2 
    2 


























































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
/*        $NetBSD: stuirda.c,v 1.20 2019/05/05 03:17:54 mrg Exp $        */

/*
 * Copyright (c) 2001,2007 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: stuirda.c,v 1.20 2019/05/05 03:17:54 mrg Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>

#include <sys/device.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/conf.h>
#include <sys/file.h>
#include <sys/poll.h>
#include <sys/select.h>
#include <sys/proc.h>


#include <dev/firmload.h>
#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <dev/ir/ir.h>
#include <dev/ir/irdaio.h>
#include <dev/ir/irframevar.h>

#include <dev/usb/uirdavar.h>

#ifdef UIRDA_DEBUG
#define DPRINTF(x)        if (stuirdadebug) printf x
#define DPRINTFN(n,x)        if (stuirdadebug>(n)) printf x
int        stuirdadebug = 1;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

struct stuirda_softc {
        struct uirda_softc sc_uirda;
};

int stuirda_fwload(struct uirda_softc *sc);

/*
 * These devices need firmware download.
 */
Static const struct usb_devno stuirda_devs[] = {
        { USB_VENDOR_SIGMATEL, USB_PRODUCT_SIGMATEL_SIR4116 },
        { USB_VENDOR_SIGMATEL, USB_PRODUCT_SIGMATEL_FIR4210 },
        { USB_VENDOR_SIGMATEL, USB_PRODUCT_SIGMATEL_VFIR4220 },
};
#define stuirda_lookup(v, p) (usb_lookup(stuirda_devs, v, p))

int stuirda_write(void *h, struct uio *uio, int flag);

struct irframe_methods stuirda_methods = {
        uirda_open, uirda_close, uirda_read, stuirda_write, uirda_poll,
        uirda_kqfilter, uirda_set_params, uirda_get_speeds,
        uirda_get_turnarounds
};

#define STUIRDA_HEADER_SIZE 3

#define stuirda_activate uirda_activate
#define stuirda_detach uirda_detach

int        stuirda_match(device_t, cfdata_t, void *);
void        stuirda_attach(device_t, device_t, void *);
int        stuirda_detach(device_t, int);
int        stuirda_activate(device_t, enum devact);

CFATTACH_DECL_NEW(stuirda, sizeof(struct stuirda_softc), stuirda_match,
    stuirda_attach, stuirda_detach, stuirda_activate);

int
stuirda_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;

        DPRINTFN(50,("stuirda_match\n"));

        if (stuirda_lookup(uiaa->uiaa_vendor, uiaa->uiaa_product) != NULL)
                return UMATCH_VENDOR_PRODUCT;

        return UMATCH_NONE;
}

void uirda_attach(device_t, device_t, void *);

void
stuirda_attach(device_t parent, device_t self, void *aux)
{
        struct stuirda_softc *sc = device_private(self);

        sc->sc_uirda.sc_loadfw = stuirda_fwload;
        sc->sc_uirda.sc_irm = &stuirda_methods;
        sc->sc_uirda.sc_hdszi = STUIRDA_HEADER_SIZE;

        uirda_attach(parent,self,aux);
}

int
stuirda_fwload(struct uirda_softc *sc)
{


        int rc;
        firmware_handle_t fh;
        off_t fwsize;
        usb_device_descriptor_t usbddsc;
        struct usbd_xfer *        fwxfer;
        struct usbd_pipe *        fwpipe;
        usbd_status status;
        usb_device_request_t req;
        char *buffer;
        char *p;
        char fwname[12];
        int n;
        uint8_t *usbbuf;
        /* size_t bsize; */

        printf("%s: needing to download firmware\n",
                device_xname(sc->sc_dev));

        status = usbd_get_device_desc(sc->sc_udev, &usbddsc);
        if (status) {
                printf("%s: can't get device descriptor, status %d\n",
                    device_xname(sc->sc_dev), status);
                return status;
        }

        rc = usbd_get_class_desc(sc->sc_udev, UDESC_IRDA, 0,
                USB_IRDA_DESCRIPTOR_SIZE, &sc->sc_irdadesc);
        printf("error %d reading class desc\n", rc);

        snprintf(fwname, sizeof(fwname), "4210%02x%02x.sb",
                usbddsc.bcdDevice[1],
                usbddsc.bcdDevice[0]);

        printf("%s: Attempting to load firmware %s\n",
                device_xname(sc->sc_dev), fwname);

        rc = firmware_open("stuirda", fwname, &fh);

        if (rc) {
                printf("%s: Cannot load firmware\n",
                        device_xname(sc->sc_dev));
                return rc;
        }
        fwsize = firmware_get_size(fh);

        printf("%s: Firmware size %lld\n",
                device_xname(sc->sc_dev), (long long)fwsize);

        buffer = firmware_malloc(fwsize);
        if (buffer == NULL) {
                printf("%s: Cannot load firmware: out of memory\n",
                        device_xname(sc->sc_dev));
                goto giveup2;
        }

        rc = firmware_read(fh, 0, buffer, (size_t)fwsize);

        if (rc) {
                printf("%s: Cannot read firmware\n", device_xname(sc->sc_dev));
                goto giveup3;
        }

        for (p = buffer + sizeof("Product Version:");
            p < buffer + fwsize - 5; p++) {

                if (0x1A == *p)
                        break;
        }
        if (0x1a != *p || memcmp(p+1, "STMP", 4) != 0) {
                /* firmware bad */
                printf("%s: Bad firmware\n", device_xname(sc->sc_dev));
                goto giveup3;
        }

        p += 5;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = 2 /* XXX magic */;
        USETW(req.wValue, 0);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);
        rc = usbd_do_request(sc->sc_udev, &req, 0);
        if (rc) {
                printf("%s: Cannot switch to f/w d/l mode, error %d\n",
                        device_xname(sc->sc_dev), rc);
                goto giveup3;
        }

        delay(100000);

        rc = usbd_open_pipe(sc->sc_iface, sc->sc_wr_addr, 0, &fwpipe);
        if (rc) {
                printf("%s: Cannot open pipe, rc=%d\n",
                    device_xname(sc->sc_dev), rc);
                goto giveup3;
        }

        int err = usbd_create_xfer(fwpipe, 1024, USBD_FORCE_SHORT_XFER, 0,
            &fwxfer);
        if (err) {
                printf("%s: Cannot alloc xfer\n", device_xname(sc->sc_dev));
                goto giveup4;
        }
        usbbuf = usbd_get_buffer(fwxfer);
        n = (buffer + fwsize - p);
        while (n > 0) {
                if (n > 1023)
                        n = 1023;
                memcpy(usbbuf, p, n);
                rc = usbd_bulk_transfer(fwxfer, fwpipe, USBD_FORCE_SHORT_XFER,
                    5000, usbbuf, &n);
                printf("%s: write: rc=%d, %d left\n",
                    device_xname(sc->sc_dev), rc, n);
                if (rc) {
                        printf("%s: write: rc=%d, %d bytes written\n",
                            device_xname(sc->sc_dev), rc, n);
                        goto giveup4;
                }
                printf("%s: written %d\n", device_xname(sc->sc_dev), n);
                p += n;
                n = (buffer + fwsize - p);
        }
        delay(100000);
        /* TODO: more code here */
        rc = 0;
        usbd_destroy_xfer(fwxfer);

        giveup4: usbd_close_pipe(fwpipe);
        giveup3: firmware_free(buffer, fwsize);
        giveup2: firmware_close(fh);

        return rc;

}

int
stuirda_write(void *h, struct uio *uio, int flag)
{
        struct uirda_softc *sc = h;
        usbd_status err;
        uint32_t n;
        int error = 0;

        DPRINTFN(1,("%s: sc=%p\n", __func__, sc));

        if (sc->sc_dying)
                return EIO;

#ifdef DIAGNOSTIC
        if (sc->sc_wr_buf == NULL)
                return EINVAL;
#endif

        n = uio->uio_resid;
        if (n > sc->sc_params.maxsize)
                return EINVAL;

        sc->sc_refcnt++;
        mutex_enter(&sc->sc_wr_buf_lk);

        sc->sc_wr_buf[0] = UIRDA_EB_NO_CHANGE | UIRDA_NO_SPEED;

        sc->sc_wr_buf[1] = 0;
        sc->sc_wr_buf[2] = 7; /* XXX turnaround - maximum for now */
        if ((n > 0 && (n % 128) == 0 && (n % 512) != 0)) {
                sc->sc_wr_buf[1] = 1;
        }

        error = uiomove(sc->sc_wr_buf + STUIRDA_HEADER_SIZE, n, uio);
        if (error)
                goto done;

        DPRINTFN(1, ("uirdawrite: transfer %d bytes\n", n));

        n += STUIRDA_HEADER_SIZE + sc->sc_wr_buf[1];

        err = usbd_bulk_transfer(sc->sc_wr_xfer, sc->sc_wr_pipe,
            USBD_FORCE_SHORT_XFER, UIRDA_WR_TIMEOUT, sc->sc_wr_buf, &n);
        DPRINTFN(2, ("uirdawrite: err=%d\n", err));
        if (err) {
                if (err == USBD_INTERRUPTED)
                        error = EINTR;
                else if (err == USBD_TIMEOUT)
                        error = ETIMEDOUT;
                else
                        error = EIO;
        }
done:
        mutex_exit(&sc->sc_wr_buf_lk);
        if (--sc->sc_refcnt < 0)
                usb_detach_wakeupold(sc->sc_dev);

        DPRINTFN(1,("%s: sc=%p done\n", __func__, sc));
        return error;
}
















































































































    2 










    2 
    2 
    2 
    2 
    2 
    2 



    2 



    2 


    2 



    2 



    2 



    2 

    2 









    1 











































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
/*        $NetBSD: irframe.c,v 1.46 2014/07/25 08:10:37 dholland Exp $        */

/*
 * Copyright (c) 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: irframe.c,v 1.46 2014/07/25 08:10:37 dholland Exp $");

#include "irframe.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/ioctl.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/conf.h>
#include <sys/malloc.h>
#include <sys/poll.h>
#include <sys/select.h>
#include <sys/vnode.h>

#include <dev/ir/ir.h>
#include <dev/ir/irdaio.h>
#include <dev/ir/irframevar.h>

#ifdef IRFRAME_DEBUG
#define DPRINTF(x)        if (irframedebug) printf x
#define Static
int irframedebug = 0;
#else
#define DPRINTF(x)
#define Static static
#endif

dev_type_open(irframeopen);
dev_type_close(irframeclose);
dev_type_read(irframeread);
dev_type_write(irframewrite);
dev_type_ioctl(irframeioctl);
dev_type_poll(irframepoll);
dev_type_kqfilter(irframekqfilter);

const struct cdevsw irframe_cdevsw = {
        .d_open = irframeopen,
        .d_close = irframeclose,
        .d_read = irframeread,
        .d_write = irframewrite,
        .d_ioctl = irframeioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = irframepoll,
        .d_mmap = nommap,
        .d_kqfilter = irframekqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

int irframe_match(device_t parent, cfdata_t match, void *aux);

Static int irf_set_params(struct irframe_softc *sc, struct irda_params *p);
Static int irf_reset_params(struct irframe_softc *sc);

#if NIRFRAME == 0
/* In case we just have tty attachment. */
CFDRIVER_DECL(irframe, DV_DULL, NULL);
#endif

CFATTACH_DECL_NEW(irframe, sizeof(struct irframe_softc),
    irframe_match, irframe_attach, irframe_detach, NULL);

extern struct cfdriver irframe_cd;

#define IRFRAMEUNIT(dev) (minor(dev))

int
irframe_match(device_t parent, cfdata_t match, void *aux)
{
        struct ir_attach_args *ia = aux;

        return (ia->ia_type == IR_TYPE_IRFRAME);
}

void
irframe_attach(device_t parent, device_t self, void *aux)
{
        struct irframe_softc *sc = device_private(self);
        struct ir_attach_args *ia = aux;
        const char *delim;
        int speeds = 0;

        sc->sc_dev = self;
        sc->sc_methods = ia->ia_methods;
        sc->sc_handle = ia->ia_handle;

#ifdef DIAGNOSTIC
        if (sc->sc_methods->im_read == NULL ||
            sc->sc_methods->im_write == NULL ||
            sc->sc_methods->im_poll == NULL ||
            sc->sc_methods->im_kqfilter == NULL ||
            sc->sc_methods->im_set_params == NULL ||
            sc->sc_methods->im_get_speeds == NULL ||
            sc->sc_methods->im_get_turnarounds == NULL)
                panic("%s: missing methods", device_xname(self));
#endif

        (void)sc->sc_methods->im_get_speeds(sc->sc_handle, &speeds);
        sc->sc_speedmask = speeds;
        delim = ":";
        if (speeds & IRDA_SPEEDS_SIR) {
                printf("%s SIR", delim);
                delim = ",";
        }
        if (speeds & IRDA_SPEEDS_MIR) {
                printf("%s MIR", delim);
                delim = ",";
        }
        if (speeds & IRDA_SPEEDS_FIR) {
                printf("%s FIR", delim);
                delim = ",";
        }
        if (speeds & IRDA_SPEEDS_VFIR) {
                printf("%s VFIR", delim);
                delim = ",";
        }
        printf("\n");

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");
}

int
irframe_detach(device_t self, int flags)
{
        /*struct irframe_softc *sc = device_private(self);*/
        int maj, mn;

        pmf_device_deregister(self);

        /* XXX needs reference count */

        /* locate the major number */
        maj = cdevsw_lookup_major(&irframe_cdevsw);

        /* Nuke the vnodes for any open instances (calls close). */
        mn = device_unit(self);
        vdevgone(maj, mn, mn, VCHR);

        return (0);
}

int
irframeopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct irframe_softc *sc;
        int error;

        sc = device_lookup_private(&irframe_cd, IRFRAMEUNIT(dev));
        if (sc == NULL)
                return (ENXIO);
        if (!device_is_active(sc->sc_dev))
                return (EIO);
        if (sc->sc_open)
                return (EBUSY);
        if (sc->sc_methods->im_open != NULL) {
                error = sc->sc_methods->im_open(sc->sc_handle, flag, mode, l);
                if (error)
                        return (error);
        }
        sc->sc_open = 1;
#ifdef DIAGNOSTIC
        sc->sc_speed = IRDA_DEFAULT_SPEED;
#endif
        (void)irf_reset_params(sc);
        return (0);
}

int
irframeclose(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct irframe_softc *sc;
        int error;

        sc = device_lookup_private(&irframe_cd, IRFRAMEUNIT(dev));
        if (sc == NULL)
                return (ENXIO);
        sc->sc_open = 0;
        if (sc->sc_methods->im_close != NULL)
                error = sc->sc_methods->im_close(sc->sc_handle, flag, mode, l);
        else
                error = 0;
        return (error);
}

int
irframeread(dev_t dev, struct uio *uio, int flag)
{
        struct irframe_softc *sc;

        sc = device_lookup_private(&irframe_cd, IRFRAMEUNIT(dev));
        if (sc == NULL)
                return (ENXIO);
        if (!device_is_active(sc->sc_dev) || !sc->sc_open)
                return (EIO);
        if (uio->uio_resid < sc->sc_params.maxsize) {
#ifdef DIAGNOSTIC
                printf("irframeread: short read %ld < %d\n",
                       (long)uio->uio_resid, sc->sc_params.maxsize);
#endif
                return (EINVAL);
        }
        return (sc->sc_methods->im_read(sc->sc_handle, uio, flag));
}

int
irframewrite(dev_t dev, struct uio *uio, int flag)
{
        struct irframe_softc *sc;

        sc = device_lookup_private(&irframe_cd, IRFRAMEUNIT(dev));
        if (sc == NULL)
                return (ENXIO);
        if (!device_is_active(sc->sc_dev) || !sc->sc_open)
                return (EIO);
        if (uio->uio_resid > sc->sc_params.maxsize) {
#ifdef DIAGNOSTIC
                printf("irframeread: long write %ld > %d\n",
                       (long)uio->uio_resid, sc->sc_params.maxsize);
#endif
                return (EINVAL);
        }
        return (sc->sc_methods->im_write(sc->sc_handle, uio, flag));
}

int
irf_set_params(struct irframe_softc *sc, struct irda_params *p)
{
        int error;

        DPRINTF(("irf_set_params: set params speed=%u ebofs=%u maxsize=%u "
                 "speedmask=0x%x\n", p->speed, p->ebofs, p->maxsize,
                 sc->sc_speedmask));

        if (p->maxsize > IRDA_MAX_FRAME_SIZE) {
#ifdef IRFRAME_DEBUG
                printf("irf_set_params: bad maxsize=%u\n", p->maxsize);
#endif
                return (EINVAL);
        }

        if (p->ebofs > IRDA_MAX_EBOFS) {
#ifdef IRFRAME_DEBUG
                printf("irf_set_params: bad maxsize=%u\n", p->maxsize);
#endif
                return (EINVAL);
        }

#define CONC(x,y) x##y
#define CASE(s) case s: if (!(sc->sc_speedmask & CONC(IRDA_SPEED_,s))) return (EINVAL); break
        switch (p->speed) {
        CASE(2400);
        CASE(9600);
        CASE(19200);
        CASE(38400);
        CASE(57600);
        CASE(115200);
        CASE(576000);
        CASE(1152000);
        CASE(4000000);
        CASE(16000000);
        default: return (EINVAL);
        }
#undef CONC
#undef CASE

        error = sc->sc_methods->im_set_params(sc->sc_handle, p);
        if (!error) {
                sc->sc_params = *p;
                DPRINTF(("irf_set_params: ok\n"));
#ifdef DIAGNOSTIC
                if (p->speed != sc->sc_speed) {
                        sc->sc_speed = p->speed;
                        aprint_verbose_dev(sc->sc_dev, "set speed %u\n",
                               sc->sc_speed);
                }
#endif
        } else {
#ifdef IRFRAME_DEBUG
                printf("irf_set_params: error=%d\n", error);
#endif
        }
        return (error);
}

int
irf_reset_params(struct irframe_softc *sc)
{
        struct irda_params params;

        params.speed = IRDA_DEFAULT_SPEED;
        params.ebofs = IRDA_DEFAULT_EBOFS;
        params.maxsize = IRDA_DEFAULT_SIZE;
        return (irf_set_params(sc, &params));
}

int
irframeioctl(dev_t dev, u_long cmd, void *addr, int flag,
    struct lwp *l)
{
        struct irframe_softc *sc;
        void *vaddr = addr;
        int error;

        sc = device_lookup_private(&irframe_cd, IRFRAMEUNIT(dev));
        if (sc == NULL)
                return (ENXIO);
        if (!device_is_active(sc->sc_dev) || !sc->sc_open)
                return (EIO);

        switch (cmd) {
        case FIONBIO:
                /* All handled in the upper FS layer. */
                error = 0;
                break;

        case IRDA_SET_PARAMS:
                error = irf_set_params(sc, vaddr);
                break;

        case IRDA_RESET_PARAMS:
                error = irf_reset_params(sc);
                break;

        case IRDA_GET_SPEEDMASK:
                error = sc->sc_methods->im_get_speeds(sc->sc_handle, vaddr);
                break;

        case IRDA_GET_TURNAROUNDMASK:
                error = sc->sc_methods->im_get_turnarounds(sc->sc_handle,vaddr);
                break;

        default:
                error = EINVAL;
                break;
        }
        return (error);
}

int
irframepoll(dev_t dev, int events, struct lwp *l)
{
        struct irframe_softc *sc;

        sc = device_lookup_private(&irframe_cd, IRFRAMEUNIT(dev));
        if (sc == NULL)
                return (POLLHUP);
        if (!device_is_active(sc->sc_dev) || !sc->sc_open)
                return (POLLHUP);

        return (sc->sc_methods->im_poll(sc->sc_handle, events, l));
}

int
irframekqfilter(dev_t dev, struct knote *kn)
{
        struct irframe_softc *sc;

        sc = device_lookup_private(&irframe_cd, IRFRAMEUNIT(dev));
        if (!device_is_active(sc->sc_dev) || !sc->sc_open)
                return (1);

        return (sc->sc_methods->im_kqfilter(sc->sc_handle, kn));
}









































































































































































































    7 






    5 





    1 






























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
/*-
 * Copyright (c) 2013-2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This material is based upon work partially supported by The
 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * NPF configuration loading mechanism.
 *
 * The main operations on the configuration are the following:
 * 1) Read access, primarily from the npf_packet_handler() function.
 * 2) Write access on a particular set, mainly rule or table updates.
 * 3) Deletion of the configuration after the reload operation.
 *
 * Synchronization
 *
 *        For the (1) case, EBR is used to allow concurrent access to
 *        the configuration set (ruleset, etc).  It guarantees that the
 *        configuration will not be destroyed while accessing it.
 *
 *        For the cases (2) and (3), mutual exclusion (npf_t::config_lock)
 *        is used with, when necessary, the writer-side barrier of EBR.
 */

#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_conf.c,v 1.18 2022/02/13 19:20:11 riastradh Exp $");

#include <sys/param.h>
#include <sys/types.h>

#include <sys/atomic.h>
#include <sys/kmem.h>
#include <sys/mutex.h>
#endif

#include "npf_impl.h"
#include "npf_conn.h"

void
npf_config_init(npf_t *npf)
{
        npf_config_t *nc;

        mutex_init(&npf->config_lock, MUTEX_DEFAULT, IPL_SOFTNET);
        nc = npf_config_create();

        /*
         * Load an empty configuration.
         */
        nc->ruleset = npf_ruleset_create(0);
        nc->nat_ruleset = npf_ruleset_create(0);
        nc->rule_procs = npf_rprocset_create();
        nc->tableset = npf_tableset_create(0);
        nc->default_pass = true;

        npf_config_load(npf, nc, NULL, true);
        KASSERT(npf->config != NULL);
}

npf_config_t *
npf_config_create(void)
{
        return kmem_zalloc(sizeof(npf_config_t), KM_SLEEP);
}

void
npf_config_destroy(npf_config_t *nc)
{
        /*
         * Note: the rulesets must be destroyed first, in order to drop
         * any references to the tableset.
         */
        if (nc->ruleset) {
                npf_ruleset_destroy(nc->ruleset);
        }
        if (nc->nat_ruleset) {
                npf_ruleset_destroy(nc->nat_ruleset);
        }
        if (nc->rule_procs) {
                npf_rprocset_destroy(nc->rule_procs);
        }
        if (nc->tableset) {
                npf_tableset_destroy(nc->tableset);
        }
        kmem_free(nc, sizeof(npf_config_t));
}

void
npf_config_fini(npf_t *npf)
{
        npf_conndb_t *cd = npf_conndb_create();

        /* Flush the connections. */
        mutex_enter(&npf->config_lock);
        npf_conn_tracking(npf, false);
        npf_ebr_full_sync(npf->ebr);
        npf_conn_load(npf, cd, false);
        npf_ifmap_flush(npf);
        mutex_exit(&npf->config_lock);

        npf_config_destroy(npf->config);
        mutex_destroy(&npf->config_lock);
}

/*
 * npf_config_load: the main routine performing configuration load.
 * Performs the necessary synchronization and destroys the old config.
 */
void
npf_config_load(npf_t *npf, npf_config_t *nc, npf_conndb_t *conns, bool flush)
{
        const bool load = conns != NULL;
        npf_config_t *onc;

        nc->default_pass = flush;

        /*
         * Acquire the lock and perform the first phase:
         * - Scan and use existing dynamic tables, reload only static.
         * - Scan and use matching NAT policies to preserve the connections.
         */
        mutex_enter(&npf->config_lock);
        if ((onc = atomic_load_relaxed(&npf->config)) != NULL) {
                npf_ruleset_reload(npf, nc->ruleset, onc->ruleset, load);
                npf_tableset_reload(npf, nc->tableset, onc->tableset);
                npf_ruleset_reload(npf, nc->nat_ruleset, onc->nat_ruleset, load);
        }

        /*
         * Set the new config and release the lock.
         */
        atomic_store_release(&npf->config, nc);
        if (onc == NULL) {
                /* Initial load, done. */
                npf_ifmap_flush(npf);
                npf_conn_load(npf, conns, !flush);
                mutex_exit(&npf->config_lock);
                goto done;
        }

        /*
         * If we are going to flush the connections or load the new ones,
         * then disable the connection tracking for the grace period.
         */
        if (flush || conns) {
                npf_conn_tracking(npf, false);
        }

        /* Synchronise: drain all references. */
        npf_ebr_full_sync(npf->ebr);
        if (flush) {
                npf_portmap_flush(npf->portmap);
                npf_ifmap_flush(npf);
        }

        /*
         * G/C the existing connections and, if passed, load the new ones.
         * If not flushing - enable the connection tracking.
         */
        npf_conn_load(npf, conns, !flush);
        mutex_exit(&npf->config_lock);

        /* Finally, it is safe to destroy the old config. */
        npf_config_destroy(onc);
done:
        /* Sync all interface address tables (can be done asynchronously). */
        npf_ifaddr_syncall(npf);
}

/*
 * Writer-side exclusive locking.
 */

npf_config_t *
npf_config_enter(npf_t *npf)
{
        mutex_enter(&npf->config_lock);
        return npf->config;
}

void
npf_config_exit(npf_t *npf)
{
        mutex_exit(&npf->config_lock);
}

bool
npf_config_locked_p(npf_t *npf)
{
        return mutex_owned(&npf->config_lock);
}

void
npf_config_sync(npf_t *npf)
{
        KASSERT(npf_config_locked_p(npf));
        npf_ebr_full_sync(npf->ebr);
}

/*
 * Reader-side synchronization routines.
 */

int
npf_config_read_enter(npf_t *npf)
{
        /* Note: issues an acquire fence. */
        return npf_ebr_enter(npf->ebr);
}

void
npf_config_read_exit(npf_t *npf, int s)
{
        /* Note: issues a release fence. */
        npf_ebr_exit(npf->ebr, s);
}

/*
 * Accessors.
 */

npf_ruleset_t *
npf_config_ruleset(npf_t *npf)
{
        npf_config_t *config = atomic_load_consume(&npf->config);
        KASSERT(npf_config_locked_p(npf) || npf_ebr_incrit_p(npf->ebr));
        return config->ruleset;
}

npf_ruleset_t *
npf_config_natset(npf_t *npf)
{
        npf_config_t *config = atomic_load_consume(&npf->config);
        KASSERT(npf_config_locked_p(npf) || npf_ebr_incrit_p(npf->ebr));
        return config->nat_ruleset;
}

npf_tableset_t *
npf_config_tableset(npf_t *npf)
{
        npf_config_t *config = atomic_load_consume(&npf->config);
        KASSERT(npf_config_locked_p(npf) || npf_ebr_incrit_p(npf->ebr));
        return config->tableset;
}

bool
npf_default_pass(npf_t *npf)
{
        npf_config_t *config = atomic_load_consume(&npf->config);
        KASSERT(npf_config_locked_p(npf) || npf_ebr_incrit_p(npf->ebr));
        return config->default_pass;
}














































































































































































   84 


   84 


   84 


   84 
















   16 





   11 











   86 









   84 


   84 






   72 




   70 



















    1 

    3 

    3 



















   84 
   84 

    7 



    7 



   84 

















   84 

   84 







   84 





   84 











   23 

   23 
    4 


    4 






    2 
    4 
    4 


   23 

   23 









   13 

   21 

    3 

































   25 

   23 


    3 






    9 













    9 



    9 



    7 
    4 











    4 



    4 







    2 


    4 






    7 






    7 





    6 















    5 




    5 








    5 


    5 




    7 



    9 

    8 


    9 
    7 







    9 
    8 
    8 
    8 


    9 














   18 









   17 
    1 


   17 

















   17 
    9 
    7 





   17 


   15 


    9 


   15 







   15 
    8 

   10 







   15 








   12 









   14 



   12 

    9 
    9 


   12 






    8 













    5 


    5 








    5 





    5 



    4 



   15 

   12 


   15 
   11 





    5 


   15 
   10 






   15 





   14 


   14 
   14 








   23 








    6 

    5 

    2 





    2 






    1 




    2 

   23 



    2 




    2 






    7 



    2 








   31 








   19 
   29 





   30 


   29 
   18 
   15 

   25 



    1 

   25 
   24 
   18 

   24 
    3 

   30 







    7 





    5 
    4 
    6 


















   13 



   11 





    3 



















  101 
   17 
    1 

   17 
    1 





   17 












   87 


   16 
   16 
   16 
   16 

   16 




   16 






   16 










   16 

   13 











   16 


   16 






   16 





   11 

    9 







































    6 






    6 


    6 
    6 



    6 


    6 


    6 
















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
/*        $NetBSD: sys_pipe.c,v 1.158 2021/10/11 01:07:36 thorpej Exp $        */

/*-
 * Copyright (c) 2003, 2007, 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Paul Kranenburg, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1996 John S. Dyson
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice immediately at the beginning of the file, without modification,
 *    this list of conditions, and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Absolutely no warranty of function or purpose is made by the author
 *    John S. Dyson.
 * 4. Modifications may be freely made to this file if the above conditions
 *    are met.
 */

/*
 * This file contains a high-performance replacement for the socket-based
 * pipes scheme originally used.  It does not support all features of
 * sockets, but does do everything that pipes normally do.
 *
 * This code has two modes of operation, a small write mode and a large
 * write mode.  The small write mode acts like conventional pipes with
 * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
 * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
 * and PIPE_SIZE in size it is mapped read-only into the kernel address space
 * using the UVM page loan facility from where the receiving process can copy
 * the data directly from the pages in the sending process.
 *
 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
 * happen for small transfers so that the system will not spend all of
 * its time context switching.  PIPE_SIZE is constrained by the
 * amount of kernel virtual memory.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_pipe.c,v 1.158 2021/10/11 01:07:36 thorpej Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/filio.h>
#include <sys/kernel.h>
#include <sys/ttycom.h>
#include <sys/stat.h>
#include <sys/poll.h>
#include <sys/signalvar.h>
#include <sys/vnode.h>
#include <sys/uio.h>
#include <sys/select.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
#include <sys/sysctl.h>
#include <sys/kauth.h>
#include <sys/atomic.h>
#include <sys/pipe.h>

static int        pipe_read(file_t *, off_t *, struct uio *, kauth_cred_t, int);
static int        pipe_write(file_t *, off_t *, struct uio *, kauth_cred_t, int);
static int        pipe_close(file_t *);
static int        pipe_poll(file_t *, int);
static int        pipe_kqfilter(file_t *, struct knote *);
static int        pipe_stat(file_t *, struct stat *);
static int        pipe_ioctl(file_t *, u_long, void *);
static void        pipe_restart(file_t *);

static const struct fileops pipeops = {
        .fo_name = "pipe",
        .fo_read = pipe_read,
        .fo_write = pipe_write,
        .fo_ioctl = pipe_ioctl,
        .fo_fcntl = fnullop_fcntl,
        .fo_poll = pipe_poll,
        .fo_stat = pipe_stat,
        .fo_close = pipe_close,
        .fo_kqfilter = pipe_kqfilter,
        .fo_restart = pipe_restart,
};

/*
 * Default pipe buffer size(s), this can be kind-of large now because pipe
 * space is pageable.  The pipe code will try to maintain locality of
 * reference for performance reasons, so small amounts of outstanding I/O
 * will not wipe the cache.
 */
#define        MINPIPESIZE        (PIPE_SIZE / 3)
#define        MAXPIPESIZE        (2 * PIPE_SIZE / 3)

/*
 * Limit the number of "big" pipes
 */
#define        LIMITBIGPIPES        32
static u_int        maxbigpipes = LIMITBIGPIPES;
static u_int        nbigpipe = 0;

/*
 * Amount of KVA consumed by pipe buffers.
 */
static u_int        amountpipekva = 0;

static void        pipeclose(struct pipe *);
static void        pipe_free_kmem(struct pipe *);
static int        pipe_create(struct pipe **, pool_cache_t);
static int        pipelock(struct pipe *, bool);
static inline void pipeunlock(struct pipe *);
static void        pipeselwakeup(struct pipe *, struct pipe *, int);
static int        pipespace(struct pipe *, int);
static int        pipe_ctor(void *, void *, int);
static void        pipe_dtor(void *, void *);

static pool_cache_t        pipe_wr_cache;
static pool_cache_t        pipe_rd_cache;

void
pipe_init(void)
{

        /* Writer side is not automatically allocated KVA. */
        pipe_wr_cache = pool_cache_init(sizeof(struct pipe), 0, 0, 0, "pipewr",
            NULL, IPL_NONE, pipe_ctor, pipe_dtor, NULL);
        KASSERT(pipe_wr_cache != NULL);

        /* Reader side gets preallocated KVA. */
        pipe_rd_cache = pool_cache_init(sizeof(struct pipe), 0, 0, 0, "piperd",
            NULL, IPL_NONE, pipe_ctor, pipe_dtor, (void *)1);
        KASSERT(pipe_rd_cache != NULL);
}

static int
pipe_ctor(void *arg, void *obj, int flags)
{
        struct pipe *pipe;
        vaddr_t va;

        pipe = obj;

        memset(pipe, 0, sizeof(struct pipe));
        if (arg != NULL) {
                /* Preallocate space. */
                va = uvm_km_alloc(kernel_map, PIPE_SIZE, 0,
                    UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
                KASSERT(va != 0);
                pipe->pipe_kmem = va;
                atomic_add_int(&amountpipekva, PIPE_SIZE);
        }
        cv_init(&pipe->pipe_rcv, "pipe_rd");
        cv_init(&pipe->pipe_wcv, "pipe_wr");
        cv_init(&pipe->pipe_draincv, "pipe_drn");
        cv_init(&pipe->pipe_lkcv, "pipe_lk");
        selinit(&pipe->pipe_sel);
        pipe->pipe_state = PIPE_SIGNALR;

        return 0;
}

static void
pipe_dtor(void *arg, void *obj)
{
        struct pipe *pipe;

        pipe = obj;

        cv_destroy(&pipe->pipe_rcv);
        cv_destroy(&pipe->pipe_wcv);
        cv_destroy(&pipe->pipe_draincv);
        cv_destroy(&pipe->pipe_lkcv);
        seldestroy(&pipe->pipe_sel);
        if (pipe->pipe_kmem != 0) {
                uvm_km_free(kernel_map, pipe->pipe_kmem, PIPE_SIZE,
                    UVM_KMF_PAGEABLE);
                atomic_add_int(&amountpipekva, -PIPE_SIZE);
        }
}

/*
 * The pipe system call for the DTYPE_PIPE type of pipes
 */
int
pipe1(struct lwp *l, int *fildes, int flags)
{
        struct pipe *rpipe, *wpipe;
        file_t *rf, *wf;
        int fd, error;
        proc_t *p;

        if (flags & ~(O_CLOEXEC|O_NONBLOCK|O_NOSIGPIPE))
                return EINVAL;
        p = curproc;
        rpipe = wpipe = NULL;
        if ((error = pipe_create(&rpipe, pipe_rd_cache)) ||
            (error = pipe_create(&wpipe, pipe_wr_cache))) {
                goto free2;
        }
        rpipe->pipe_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
        wpipe->pipe_lock = rpipe->pipe_lock;
        mutex_obj_hold(wpipe->pipe_lock);

        error = fd_allocfile(&rf, &fd);
        if (error)
                goto free2;
        fildes[0] = fd;

        error = fd_allocfile(&wf, &fd);
        if (error)
                goto free3;
        fildes[1] = fd;

        rf->f_flag = FREAD | flags;
        rf->f_type = DTYPE_PIPE;
        rf->f_pipe = rpipe;
        rf->f_ops = &pipeops;
        fd_set_exclose(l, fildes[0], (flags & O_CLOEXEC) != 0);

        wf->f_flag = FWRITE | flags;
        wf->f_type = DTYPE_PIPE;
        wf->f_pipe = wpipe;
        wf->f_ops = &pipeops;
        fd_set_exclose(l, fildes[1], (flags & O_CLOEXEC) != 0);

        rpipe->pipe_peer = wpipe;
        wpipe->pipe_peer = rpipe;

        fd_affix(p, rf, fildes[0]);
        fd_affix(p, wf, fildes[1]);
        return (0);
free3:
        fd_abort(p, rf, fildes[0]);
free2:
        pipeclose(wpipe);
        pipeclose(rpipe);

        return (error);
}

/*
 * Allocate kva for pipe circular buffer, the space is pageable
 * This routine will 'realloc' the size of a pipe safely, if it fails
 * it will retain the old buffer.
 * If it fails it will return ENOMEM.
 */
static int
pipespace(struct pipe *pipe, int size)
{
        void *buffer;

        /*
         * Allocate pageable virtual address space.  Physical memory is
         * allocated on demand.
         */
        if (size == PIPE_SIZE && pipe->pipe_kmem != 0) {
                buffer = (void *)pipe->pipe_kmem;
        } else {
                buffer = (void *)uvm_km_alloc(kernel_map, round_page(size),
                    0, UVM_KMF_PAGEABLE);
                if (buffer == NULL)
                        return (ENOMEM);
                atomic_add_int(&amountpipekva, size);
        }

        /* free old resources if we're resizing */
        pipe_free_kmem(pipe);
        pipe->pipe_buffer.buffer = buffer;
        pipe->pipe_buffer.size = size;
        pipe->pipe_buffer.in = 0;
        pipe->pipe_buffer.out = 0;
        pipe->pipe_buffer.cnt = 0;
        return (0);
}

/*
 * Initialize and allocate VM and memory for pipe.
 */
static int
pipe_create(struct pipe **pipep, pool_cache_t cache)
{
        struct pipe *pipe;
        int error;

        pipe = pool_cache_get(cache, PR_WAITOK);
        KASSERT(pipe != NULL);
        *pipep = pipe;
        error = 0;
        getnanotime(&pipe->pipe_btime);
        pipe->pipe_atime = pipe->pipe_mtime = pipe->pipe_btime;
        pipe->pipe_lock = NULL;
        if (cache == pipe_rd_cache) {
                error = pipespace(pipe, PIPE_SIZE);
        } else {
                pipe->pipe_buffer.buffer = NULL;
                pipe->pipe_buffer.size = 0;
                pipe->pipe_buffer.in = 0;
                pipe->pipe_buffer.out = 0;
                pipe->pipe_buffer.cnt = 0;
        }
        return error;
}

/*
 * Lock a pipe for I/O, blocking other access
 * Called with pipe spin lock held.
 */
static int
pipelock(struct pipe *pipe, bool catch_p)
{
        int error;

        KASSERT(mutex_owned(pipe->pipe_lock));

        while (pipe->pipe_state & PIPE_LOCKFL) {
                pipe->pipe_waiters++;
                KASSERT(pipe->pipe_waiters != 0); /* just in case */
                if (catch_p) {
                        error = cv_wait_sig(&pipe->pipe_lkcv, pipe->pipe_lock);
                        if (error != 0) {
                                KASSERT(pipe->pipe_waiters > 0);
                                pipe->pipe_waiters--;
                                return error;
                        }
                } else
                        cv_wait(&pipe->pipe_lkcv, pipe->pipe_lock);
                KASSERT(pipe->pipe_waiters > 0);
                pipe->pipe_waiters--;
        }

        pipe->pipe_state |= PIPE_LOCKFL;

        return 0;
}

/*
 * unlock a pipe I/O lock
 */
static inline void
pipeunlock(struct pipe *pipe)
{

        KASSERT(pipe->pipe_state & PIPE_LOCKFL);

        pipe->pipe_state &= ~PIPE_LOCKFL;
        if (pipe->pipe_waiters > 0) {
                cv_signal(&pipe->pipe_lkcv);
        }
}

/*
 * Select/poll wakup. This also sends SIGIO to peer connected to
 * 'sigpipe' side of pipe.
 */
static void
pipeselwakeup(struct pipe *selp, struct pipe *sigp, int code)
{
        int band;

        switch (code) {
        case POLL_IN:
                band = POLLIN|POLLRDNORM;
                break;
        case POLL_OUT:
                band = POLLOUT|POLLWRNORM;
                break;
        case POLL_HUP:
                band = POLLHUP;
                break;
        case POLL_ERR:
                band = POLLERR;
                break;
        default:
                band = 0;
#ifdef DIAGNOSTIC
                printf("bad siginfo code %d in pipe notification.\n", code);
#endif
                break;
        }

        selnotify(&selp->pipe_sel, band, NOTE_SUBMIT);

        if (sigp == NULL || (sigp->pipe_state & PIPE_ASYNC) == 0)
                return;

        fownsignal(sigp->pipe_pgid, SIGIO, code, band, selp);
}

static int
pipe_read(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
    int flags)
{
        struct pipe *rpipe = fp->f_pipe;
        struct pipebuf *bp = &rpipe->pipe_buffer;
        kmutex_t *lock = rpipe->pipe_lock;
        int error;
        size_t nread = 0;
        size_t size;
        size_t ocnt;
        unsigned int wakeup_state = 0;

        mutex_enter(lock);
        ++rpipe->pipe_busy;
        ocnt = bp->cnt;

again:
        error = pipelock(rpipe, true);
        if (error)
                goto unlocked_error;

        while (uio->uio_resid) {
                /*
                 * Normal pipe buffer receive.
                 */
                if (bp->cnt > 0) {
                        size = bp->size - bp->out;
                        if (size > bp->cnt)
                                size = bp->cnt;
                        if (size > uio->uio_resid)
                                size = uio->uio_resid;

                        mutex_exit(lock);
                        error = uiomove((char *)bp->buffer + bp->out, size, uio);
                        mutex_enter(lock);
                        if (error)
                                break;

                        bp->out += size;
                        if (bp->out >= bp->size)
                                bp->out = 0;

                        bp->cnt -= size;

                        /*
                         * If there is no more to read in the pipe, reset
                         * its pointers to the beginning.  This improves
                         * cache hit stats.
                         */
                        if (bp->cnt == 0) {
                                bp->in = 0;
                                bp->out = 0;
                        }
                        nread += size;
                        continue;
                }

                /*
                 * Break if some data was read.
                 */
                if (nread > 0)
                        break;

                /*
                 * Detect EOF condition.
                 * Read returns 0 on EOF, no need to set error.
                 */
                if (rpipe->pipe_state & PIPE_EOF)
                        break;

                /*
                 * Don't block on non-blocking I/O.
                 */
                if (fp->f_flag & FNONBLOCK) {
                        error = EAGAIN;
                        break;
                }

                /*
                 * Unlock the pipe buffer for our remaining processing.
                 * We will either break out with an error or we will
                 * sleep and relock to loop.
                 */
                pipeunlock(rpipe);

#if 1   /* XXX (dsl) I'm sure these aren't needed here ... */
                /*
                 * We want to read more, wake up select/poll.
                 */
                pipeselwakeup(rpipe, rpipe->pipe_peer, POLL_OUT);

                /*
                 * If the "write-side" is blocked, wake it up now.
                 */
                cv_broadcast(&rpipe->pipe_wcv);
#endif

                if (wakeup_state & PIPE_RESTART) {
                        error = ERESTART;
                        goto unlocked_error;
                }

                /* Now wait until the pipe is filled */
                error = cv_wait_sig(&rpipe->pipe_rcv, lock);
                if (error != 0)
                        goto unlocked_error;
                wakeup_state = rpipe->pipe_state;
                goto again;
        }

        if (error == 0)
                getnanotime(&rpipe->pipe_atime);
        pipeunlock(rpipe);

unlocked_error:
        --rpipe->pipe_busy;
        if (rpipe->pipe_busy == 0) {
                rpipe->pipe_state &= ~PIPE_RESTART;
                cv_broadcast(&rpipe->pipe_draincv);
        }
        if (bp->cnt < MINPIPESIZE) {
                cv_broadcast(&rpipe->pipe_wcv);
        }

        /*
         * If anything was read off the buffer, signal to the writer it's
         * possible to write more data. Also send signal if we are here for the
         * first time after last write.
         */
        if ((bp->size - bp->cnt) >= PIPE_BUF
            && (ocnt != bp->cnt || (rpipe->pipe_state & PIPE_SIGNALR))) {
                pipeselwakeup(rpipe, rpipe->pipe_peer, POLL_OUT);
                rpipe->pipe_state &= ~PIPE_SIGNALR;
        }

        mutex_exit(lock);
        return (error);
}

static int
pipe_write(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
    int flags)
{
        struct pipe *wpipe, *rpipe;
        struct pipebuf *bp;
        kmutex_t *lock;
        int error;
        unsigned int wakeup_state = 0;

        /* We want to write to our peer */
        rpipe = fp->f_pipe;
        lock = rpipe->pipe_lock;
        error = 0;

        mutex_enter(lock);
        wpipe = rpipe->pipe_peer;

        /*
         * Detect loss of pipe read side, issue SIGPIPE if lost.
         */
        if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) != 0) {
                mutex_exit(lock);
                return EPIPE;
        }
        ++wpipe->pipe_busy;

        /* Acquire the long-term pipe lock */
        if ((error = pipelock(wpipe, true)) != 0) {
                --wpipe->pipe_busy;
                if (wpipe->pipe_busy == 0) {
                        wpipe->pipe_state &= ~PIPE_RESTART;
                        cv_broadcast(&wpipe->pipe_draincv);
                }
                mutex_exit(lock);
                return (error);
        }

        bp = &wpipe->pipe_buffer;

        /*
         * If it is advantageous to resize the pipe buffer, do so.
         */
        if ((uio->uio_resid > PIPE_SIZE) &&
            (nbigpipe < maxbigpipes) &&
            (bp->size <= PIPE_SIZE) && (bp->cnt == 0)) {

                if (pipespace(wpipe, BIG_PIPE_SIZE) == 0)
                        atomic_inc_uint(&nbigpipe);
        }

        while (uio->uio_resid) {
                size_t space;

                space = bp->size - bp->cnt;

                /* Writes of size <= PIPE_BUF must be atomic. */
                if ((space < uio->uio_resid) && (uio->uio_resid <= PIPE_BUF))
                        space = 0;

                if (space > 0) {
                        int size;        /* Transfer size */
                        int segsize;        /* first segment to transfer */

                        /*
                         * Transfer size is minimum of uio transfer
                         * and free space in pipe buffer.
                         */
                        if (space > uio->uio_resid)
                                size = uio->uio_resid;
                        else
                                size = space;
                        /*
                         * First segment to transfer is minimum of
                         * transfer size and contiguous space in
                         * pipe buffer.  If first segment to transfer
                         * is less than the transfer size, we've got
                         * a wraparound in the buffer.
                         */
                        segsize = bp->size - bp->in;
                        if (segsize > size)
                                segsize = size;

                        /* Transfer first segment */
                        mutex_exit(lock);
                        error = uiomove((char *)bp->buffer + bp->in, segsize,
                            uio);

                        if (error == 0 && segsize < size) {
                                /*
                                 * Transfer remaining part now, to
                                 * support atomic writes.  Wraparound
                                 * happened.
                                 */
                                KASSERT(bp->in + segsize == bp->size);
                                error = uiomove(bp->buffer,
                                    size - segsize, uio);
                        }
                        mutex_enter(lock);
                        if (error)
                                break;

                        bp->in += size;
                        if (bp->in >= bp->size) {
                                KASSERT(bp->in == size - segsize + bp->size);
                                bp->in = size - segsize;
                        }

                        bp->cnt += size;
                        KASSERT(bp->cnt <= bp->size);
                        wakeup_state = 0;
                } else {
                        /*
                         * If the "read-side" has been blocked, wake it up now.
                         */
                        cv_broadcast(&wpipe->pipe_rcv);

                        /*
                         * Don't block on non-blocking I/O.
                         */
                        if (fp->f_flag & FNONBLOCK) {
                                error = EAGAIN;
                                break;
                        }

                        /*
                         * We have no more space and have something to offer,
                         * wake up select/poll.
                         */
                        if (bp->cnt)
                                pipeselwakeup(wpipe, wpipe, POLL_IN);

                        if (wakeup_state & PIPE_RESTART) {
                                error = ERESTART;
                                break;
                        }

                        /*
                         * If read side wants to go away, we just issue a signal
                         * to ourselves.
                         */
                        if (wpipe->pipe_state & PIPE_EOF) {
                                error = EPIPE;
                                break;
                        }

                        pipeunlock(wpipe);
                        error = cv_wait_sig(&wpipe->pipe_wcv, lock);
                        (void)pipelock(wpipe, false);
                        if (error != 0)
                                break;
                        wakeup_state = wpipe->pipe_state;
                }
        }

        --wpipe->pipe_busy;
        if (wpipe->pipe_busy == 0) {
                wpipe->pipe_state &= ~PIPE_RESTART;
                cv_broadcast(&wpipe->pipe_draincv);
        }
        if (bp->cnt > 0) {
                cv_broadcast(&wpipe->pipe_rcv);
        }

        /*
         * Don't return EPIPE if I/O was successful
         */
        if (error == EPIPE && bp->cnt == 0 && uio->uio_resid == 0)
                error = 0;

        if (error == 0)
                getnanotime(&wpipe->pipe_mtime);

        /*
         * We have something to offer, wake up select/poll.
         * wmap->cnt is always 0 in this point (direct write
         * is only done synchronously), so check only wpipe->pipe_buffer.cnt
         */
        if (bp->cnt)
                pipeselwakeup(wpipe, wpipe, POLL_IN);

        /*
         * Arrange for next read(2) to do a signal.
         */
        wpipe->pipe_state |= PIPE_SIGNALR;

        pipeunlock(wpipe);
        mutex_exit(lock);
        return (error);
}

/*
 * We implement a very minimal set of ioctls for compatibility with sockets.
 */
int
pipe_ioctl(file_t *fp, u_long cmd, void *data)
{
        struct pipe *pipe = fp->f_pipe;
        kmutex_t *lock = pipe->pipe_lock;

        switch (cmd) {

        case FIONBIO:
                return (0);

        case FIOASYNC:
                mutex_enter(lock);
                if (*(int *)data) {
                        pipe->pipe_state |= PIPE_ASYNC;
                } else {
                        pipe->pipe_state &= ~PIPE_ASYNC;
                }
                mutex_exit(lock);
                return (0);

        case FIONREAD:
                mutex_enter(lock);
                *(int *)data = pipe->pipe_buffer.cnt;
                mutex_exit(lock);
                return (0);

        case FIONWRITE:
                /* Look at other side */
                mutex_enter(lock);
                pipe = pipe->pipe_peer;
                if (pipe == NULL)
                        *(int *)data = 0;
                else
                        *(int *)data = pipe->pipe_buffer.cnt;
                mutex_exit(lock);
                return (0);

        case FIONSPACE:
                /* Look at other side */
                mutex_enter(lock);
                pipe = pipe->pipe_peer;
                if (pipe == NULL)
                        *(int *)data = 0;
                else
                        *(int *)data = pipe->pipe_buffer.size -
                            pipe->pipe_buffer.cnt;
                mutex_exit(lock);
                return (0);

        case TIOCSPGRP:
        case FIOSETOWN:
                return fsetown(&pipe->pipe_pgid, cmd, data);

        case TIOCGPGRP:
        case FIOGETOWN:
                return fgetown(pipe->pipe_pgid, cmd, data);

        }
        return (EPASSTHROUGH);
}

int
pipe_poll(file_t *fp, int events)
{
        struct pipe *rpipe = fp->f_pipe;
        struct pipe *wpipe;
        int eof = 0;
        int revents = 0;

        mutex_enter(rpipe->pipe_lock);
        wpipe = rpipe->pipe_peer;

        if (events & (POLLIN | POLLRDNORM))
                if ((rpipe->pipe_buffer.cnt > 0) ||
                    (rpipe->pipe_state & PIPE_EOF))
                        revents |= events & (POLLIN | POLLRDNORM);

        eof |= (rpipe->pipe_state & PIPE_EOF);

        if (wpipe == NULL)
                revents |= events & (POLLOUT | POLLWRNORM);
        else {
                if (events & (POLLOUT | POLLWRNORM))
                        if ((wpipe->pipe_state & PIPE_EOF) || (
                             (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF))
                                revents |= events & (POLLOUT | POLLWRNORM);

                eof |= (wpipe->pipe_state & PIPE_EOF);
        }

        if (wpipe == NULL || eof)
                revents |= POLLHUP;

        if (revents == 0) {
                if (events & (POLLIN | POLLRDNORM))
                        selrecord(curlwp, &rpipe->pipe_sel);

                if (events & (POLLOUT | POLLWRNORM))
                        selrecord(curlwp, &wpipe->pipe_sel);
        }
        mutex_exit(rpipe->pipe_lock);

        return (revents);
}

static int
pipe_stat(file_t *fp, struct stat *ub)
{
        struct pipe *pipe = fp->f_pipe;

        mutex_enter(pipe->pipe_lock);
        memset(ub, 0, sizeof(*ub));
        ub->st_mode = S_IFIFO | S_IRUSR | S_IWUSR;
        ub->st_blksize = pipe->pipe_buffer.size;
        if (ub->st_blksize == 0 && pipe->pipe_peer)
                ub->st_blksize = pipe->pipe_peer->pipe_buffer.size;
        ub->st_size = pipe->pipe_buffer.cnt;
        ub->st_blocks = (ub->st_size) ? 1 : 0;
        ub->st_atimespec = pipe->pipe_atime;
        ub->st_mtimespec = pipe->pipe_mtime;
        ub->st_ctimespec = ub->st_birthtimespec = pipe->pipe_btime;
        ub->st_uid = kauth_cred_geteuid(fp->f_cred);
        ub->st_gid = kauth_cred_getegid(fp->f_cred);

        /*
         * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
         * XXX (st_dev, st_ino) should be unique.
         */
        mutex_exit(pipe->pipe_lock);
        return 0;
}

static int
pipe_close(file_t *fp)
{
        struct pipe *pipe = fp->f_pipe;

        fp->f_pipe = NULL;
        pipeclose(pipe);
        return (0);
}

static void
pipe_restart(file_t *fp)
{
        struct pipe *pipe = fp->f_pipe;

        /*
         * Unblock blocked reads/writes in order to allow close() to complete.
         * System calls return ERESTART so that the fd is revalidated.
         * (Partial writes return the transfer length.)
         */
        mutex_enter(pipe->pipe_lock);
        pipe->pipe_state |= PIPE_RESTART;
        /* Wakeup both cvs, maybe we only need one, but maybe there are some
         * other paths where wakeup is needed, and it saves deciding which! */
        cv_broadcast(&pipe->pipe_rcv);
        cv_broadcast(&pipe->pipe_wcv);
        mutex_exit(pipe->pipe_lock);
}

static void
pipe_free_kmem(struct pipe *pipe)
{

        if (pipe->pipe_buffer.buffer != NULL) {
                if (pipe->pipe_buffer.size > PIPE_SIZE) {
                        atomic_dec_uint(&nbigpipe);
                }
                if (pipe->pipe_buffer.buffer != (void *)pipe->pipe_kmem) {
                        uvm_km_free(kernel_map,
                            (vaddr_t)pipe->pipe_buffer.buffer,
                            pipe->pipe_buffer.size, UVM_KMF_PAGEABLE);
                        atomic_add_int(&amountpipekva,
                            -pipe->pipe_buffer.size);
                }
                pipe->pipe_buffer.buffer = NULL;
        }
}

/*
 * Shutdown the pipe.
 */
static void
pipeclose(struct pipe *pipe)
{
        kmutex_t *lock;
        struct pipe *ppipe;

        if (pipe == NULL)
                return;

        KASSERT(cv_is_valid(&pipe->pipe_rcv));
        KASSERT(cv_is_valid(&pipe->pipe_wcv));
        KASSERT(cv_is_valid(&pipe->pipe_draincv));
        KASSERT(cv_is_valid(&pipe->pipe_lkcv));

        lock = pipe->pipe_lock;
        if (lock == NULL)
                /* Must have failed during create */
                goto free_resources;

        mutex_enter(lock);
        pipeselwakeup(pipe, pipe, POLL_HUP);

        /*
         * If the other side is blocked, wake it up saying that
         * we want to close it down.
         */
        pipe->pipe_state |= PIPE_EOF;
        if (pipe->pipe_busy) {
                while (pipe->pipe_busy) {
                        cv_broadcast(&pipe->pipe_wcv);
                        cv_wait_sig(&pipe->pipe_draincv, lock);
                }
        }

        /*
         * Disconnect from peer.
         */
        if ((ppipe = pipe->pipe_peer) != NULL) {
                pipeselwakeup(ppipe, ppipe, POLL_HUP);
                ppipe->pipe_state |= PIPE_EOF;
                cv_broadcast(&ppipe->pipe_rcv);
                ppipe->pipe_peer = NULL;
        }

        /*
         * Any knote objects still left in the list are
         * the one attached by peer.  Since no one will
         * traverse this list, we just clear it.
         *
         * XXX Exposes select/kqueue internals.
         */
        SLIST_INIT(&pipe->pipe_sel.sel_klist);

        KASSERT((pipe->pipe_state & PIPE_LOCKFL) == 0);
        mutex_exit(lock);
        mutex_obj_free(lock);

        /*
         * Free resources.
         */
    free_resources:
        pipe->pipe_pgid = 0;
        pipe->pipe_state = PIPE_SIGNALR;
        pipe->pipe_peer = NULL;
        pipe->pipe_lock = NULL;
        pipe_free_kmem(pipe);
        if (pipe->pipe_kmem != 0) {
                pool_cache_put(pipe_rd_cache, pipe);
        } else {
                pool_cache_put(pipe_wr_cache, pipe);
        }
}

static void
filt_pipedetach(struct knote *kn)
{
        struct pipe *pipe;
        kmutex_t *lock;

        pipe = ((file_t *)kn->kn_obj)->f_pipe;
        lock = pipe->pipe_lock;

        mutex_enter(lock);

        switch(kn->kn_filter) {
        case EVFILT_WRITE:
                /* Need the peer structure, not our own. */
                pipe = pipe->pipe_peer;

                /* If reader end already closed, just return. */
                if (pipe == NULL) {
                        mutex_exit(lock);
                        return;
                }

                break;
        default:
                /* Nothing to do. */
                break;
        }

        KASSERT(kn->kn_hook == pipe);
        selremove_knote(&pipe->pipe_sel, kn);
        mutex_exit(lock);
}

static int
filt_piperead(struct knote *kn, long hint)
{
        struct pipe *rpipe = ((file_t *)kn->kn_obj)->f_pipe;
        struct pipe *wpipe;
        int rv;

        if ((hint & NOTE_SUBMIT) == 0) {
                mutex_enter(rpipe->pipe_lock);
        }
        wpipe = rpipe->pipe_peer;
        kn->kn_data = rpipe->pipe_buffer.cnt;

        if ((rpipe->pipe_state & PIPE_EOF) ||
            (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
                knote_set_eof(kn, 0);
                rv = 1;
        } else {
                rv = kn->kn_data > 0;
        }

        if ((hint & NOTE_SUBMIT) == 0) {
                mutex_exit(rpipe->pipe_lock);
        }
        return rv;
}

static int
filt_pipewrite(struct knote *kn, long hint)
{
        struct pipe *rpipe = ((file_t *)kn->kn_obj)->f_pipe;
        struct pipe *wpipe;
        int rv;

        if ((hint & NOTE_SUBMIT) == 0) {
                mutex_enter(rpipe->pipe_lock);
        }
        wpipe = rpipe->pipe_peer;

        if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
                kn->kn_data = 0;
                knote_set_eof(kn, 0);
                rv = 1;
        } else {
                kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
                rv = kn->kn_data >= PIPE_BUF;
        }

        if ((hint & NOTE_SUBMIT) == 0) {
                mutex_exit(rpipe->pipe_lock);
        }
        return rv;
}

static const struct filterops pipe_rfiltops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_pipedetach,
        .f_event = filt_piperead,
};

static const struct filterops pipe_wfiltops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_pipedetach,
        .f_event = filt_pipewrite,
};

static int
pipe_kqfilter(file_t *fp, struct knote *kn)
{
        struct pipe *pipe;
        kmutex_t *lock;

        pipe = ((file_t *)kn->kn_obj)->f_pipe;
        lock = pipe->pipe_lock;

        mutex_enter(lock);

        switch (kn->kn_filter) {
        case EVFILT_READ:
                kn->kn_fop = &pipe_rfiltops;
                break;
        case EVFILT_WRITE:
                kn->kn_fop = &pipe_wfiltops;
                pipe = pipe->pipe_peer;
                if (pipe == NULL) {
                        /* Other end of pipe has been closed. */
                        mutex_exit(lock);
                        return (EBADF);
                }
                break;
        default:
                mutex_exit(lock);
                return (EINVAL);
        }

        kn->kn_hook = pipe;
        selrecord_knote(&pipe->pipe_sel, kn);
        mutex_exit(lock);

        return (0);
}

/*
 * Handle pipe sysctls.
 */
SYSCTL_SETUP(sysctl_kern_pipe_setup, "sysctl kern.pipe subtree setup")
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "pipe",
                       SYSCTL_DESCR("Pipe settings"),
                       NULL, 0, NULL, 0,
                       CTL_KERN, KERN_PIPE, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxbigpipes",
                       SYSCTL_DESCR("Maximum number of \"big\" pipes"),
                       NULL, 0, &maxbigpipes, 0,
                       CTL_KERN, KERN_PIPE, KERN_PIPE_MAXBIGPIPES, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "nbigpipes",
                       SYSCTL_DESCR("Number of \"big\" pipes"),
                       NULL, 0, &nbigpipe, 0,
                       CTL_KERN, KERN_PIPE, KERN_PIPE_NBIGPIPES, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_INT, "kvasize",
                       SYSCTL_DESCR("Amount of kernel memory consumed by pipe "
                                    "buffers"),
                       NULL, 0, &amountpipekva, 0,
                       CTL_KERN, KERN_PIPE, KERN_PIPE_KVASIZE, CTL_EOL);
}
























































































































   12 





   12 

   10 

   12 
   12 







   11 
   11 


   11 


   11 











   11 
   11 
   11 

   10 













   10 

    9 












    8 

    6 














    6 







    6 













    6 








    3 


































    3 

    3 





















    3 
























    1 

    1 























































    1 




























    1 














    1 








    1 

    1 


    1 


    1 




    1 
























    1 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
/*        $NetBSD: uvm_device.c,v 1.80 2022/07/07 13:27:02 riastradh Exp $        */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * from: Id: uvm_device.c,v 1.1.2.9 1998/02/06 05:11:47 chs Exp
 */

/*
 * uvm_device.c: the device pager.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_device.c,v 1.80 2022/07/07 13:27:02 riastradh Exp $");

#include "opt_uvmhist.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/proc.h>
#include <sys/kmem.h>

#include <uvm/uvm.h>
#include <uvm/uvm_device.h>
#include <uvm/uvm_pmap.h>

/*
 * private global data structure
 *
 * we keep a list of active device objects in the system.
 */

LIST_HEAD(udv_list_struct, uvm_device);
static struct udv_list_struct udv_list;
static kmutex_t udv_lock __cacheline_aligned;

/*
 * functions
 */

static void        udv_init(void);
static void        udv_reference(struct uvm_object *);
static void        udv_detach(struct uvm_object *);
static int        udv_fault(struct uvm_faultinfo *, vaddr_t,
                          struct vm_page **, int, int, vm_prot_t,
                          int);

/*
 * master pager structure
 */

const struct uvm_pagerops uvm_deviceops = {
        .pgo_init = udv_init,
        .pgo_reference = udv_reference,
        .pgo_detach = udv_detach,
        .pgo_fault = udv_fault,
};

/*
 * the ops!
 */

/*
 * udv_init
 *
 * init pager private data structures.
 */

static void
udv_init(void)
{
        LIST_INIT(&udv_list);
        mutex_init(&udv_lock, MUTEX_DEFAULT, IPL_NONE);
}

/*
 * udv_attach
 *
 * get a VM object that is associated with a device.   allocate a new
 * one if needed.
 *
 * => caller must _not_ already be holding the lock on the uvm_object.
 * => in fact, nothing should be locked so that we can sleep here.
 */

struct uvm_object *
udv_attach(dev_t device, vm_prot_t accessprot,
    voff_t off,                /* used only for access check */
    vsize_t size        /* used only for access check */)
{
        struct uvm_device *udv, *lcv;
        const struct cdevsw *cdev;
        dev_mmap_t *mapfn;

        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist, "(device=%#jx)", device,0,0,0);

        KASSERT(size > 0);

        /*
         * before we do anything, ensure this device supports mmap
         */

        cdev = cdevsw_lookup(device);
        if (cdev == NULL) {
                return NULL;
        }
        mapfn = cdev->d_mmap;
        if (mapfn == NULL || mapfn == nommap) {
                return NULL;
        }

        /*
         * Negative offsets on the object are not allowed, unless the
         * device has affirmatively set D_NEGOFFSAFE.
         */
        if ((cdev->d_flag & D_NEGOFFSAFE) == 0 && off != UVM_UNKNOWN_OFFSET) {
                if (off < 0)
                        return NULL;
#if SIZE_MAX > UINT32_MAX        /* XXX -Wtype-limits */
                if (size > __type_max(voff_t))
                        return NULL;
#endif
                if (off > __type_max(voff_t) - size)
                        return NULL;
        }

        /*
         * Check that the specified range of the device allows the
         * desired protection.
         *
         * XXX assumes VM_PROT_* == PROT_*
         * XXX clobbers off and size, but nothing else here needs them.
         */
        do {
                KASSERTMSG((off % PAGE_SIZE) == 0, "off=%jd", (intmax_t)off);
                KASSERTMSG(size >= PAGE_SIZE, "size=%"PRIuVSIZE, size);
                if (cdev_mmap(device, off, accessprot) == -1)
                        return NULL;
                KASSERT(off <= __type_max(voff_t) - PAGE_SIZE ||
                    (cdev->d_flag & D_NEGOFFSAFE) != 0);
                if (__predict_false(off > __type_max(voff_t) - PAGE_SIZE)) {
                        /*
                         * off += PAGE_SIZE, with two's-complement
                         * wraparound, or
                         *
                         *        off += PAGE_SIZE - 2*(VOFF_MAX + 1).
                         */
                        CTASSERT(MIN_PAGE_SIZE >= 2);
                        off -= __type_max(voff_t);
                        off += PAGE_SIZE - 2;
                        off -= __type_max(voff_t);
                } else {
                        off += PAGE_SIZE;
                }
                size -= PAGE_SIZE;
        } while (size != 0);

        /*
         * keep looping until we get it
         */

        for (;;) {

                /*
                 * first, attempt to find it on the main list
                 */

                mutex_enter(&udv_lock);
                LIST_FOREACH(lcv, &udv_list, u_list) {
                        if (device == lcv->u_device)
                                break;
                }

                /*
                 * got it on main list.  put a hold on it and unlock udv_lock.
                 */

                if (lcv) {

                        /*
                         * if someone else has a hold on it, sleep and start
                         * over again.
                         */

                        if (lcv->u_flags & UVM_DEVICE_HOLD) {
                                lcv->u_flags |= UVM_DEVICE_WANTED;
                                UVM_UNLOCK_AND_WAIT(lcv, &udv_lock, false,
                                    "udv_attach",0);
                                continue;
                        }

                        /* we are now holding it */
                        lcv->u_flags |= UVM_DEVICE_HOLD;
                        mutex_exit(&udv_lock);

                        /*
                         * bump reference count, unhold, return.
                         */

                        rw_enter(lcv->u_obj.vmobjlock, RW_WRITER);
                        lcv->u_obj.uo_refs++;
                        rw_exit(lcv->u_obj.vmobjlock);

                        mutex_enter(&udv_lock);
                        if (lcv->u_flags & UVM_DEVICE_WANTED)
                                wakeup(lcv);
                        lcv->u_flags &= ~(UVM_DEVICE_WANTED|UVM_DEVICE_HOLD);
                        mutex_exit(&udv_lock);
                        return &lcv->u_obj;
                }

                /*
                 * Did not find it on main list.  Need to allocate a new one.
                 */

                mutex_exit(&udv_lock);

                /* Note: both calls may allocate memory and sleep. */
                udv = kmem_alloc(sizeof(*udv), KM_SLEEP);
                uvm_obj_init(&udv->u_obj, &uvm_deviceops, true, 1);

                mutex_enter(&udv_lock);

                /*
                 * now we have to double check to make sure no one added it
                 * to the list while we were sleeping...
                 */

                LIST_FOREACH(lcv, &udv_list, u_list) {
                        if (device == lcv->u_device)
                                break;
                }

                /*
                 * did we lose a race to someone else?
                 * free our memory and retry.
                 */

                if (lcv) {
                        mutex_exit(&udv_lock);
                        uvm_obj_destroy(&udv->u_obj, true);
                        kmem_free(udv, sizeof(*udv));
                        continue;
                }

                /*
                 * we have it!   init the data structures, add to list
                 * and return.
                 */

                udv->u_flags = 0;
                udv->u_device = device;
                LIST_INSERT_HEAD(&udv_list, udv, u_list);
                mutex_exit(&udv_lock);
                return &udv->u_obj;
        }
        /*NOTREACHED*/
}

/*
 * udv_reference
 *
 * add a reference to a VM object.   Note that the reference count must
 * already be one (the passed in reference) so there is no chance of the
 * udv being released or locked out here.
 *
 * => caller must call with object unlocked.
 */

static void
udv_reference(struct uvm_object *uobj)
{
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        rw_enter(uobj->vmobjlock, RW_WRITER);
        uobj->uo_refs++;
        UVMHIST_LOG(maphist, "<- done (uobj=%#jx, ref = %jd)",
            (uintptr_t)uobj, uobj->uo_refs,0,0);
        rw_exit(uobj->vmobjlock);
}

/*
 * udv_detach
 *
 * remove a reference to a VM object.
 *
 * => caller must call with object unlocked and map locked.
 */

static void
udv_detach(struct uvm_object *uobj)
{
        struct uvm_device *udv = (struct uvm_device *)uobj;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        /*
         * loop until done
         */
again:
        rw_enter(uobj->vmobjlock, RW_WRITER);
        if (uobj->uo_refs > 1) {
                uobj->uo_refs--;
                rw_exit(uobj->vmobjlock);
                UVMHIST_LOG(maphist," <- done, uobj=%#jx, ref=%jd",
                    (uintptr_t)uobj,uobj->uo_refs,0,0);
                return;
        }

        /*
         * is it being held?   if so, wait until others are done.
         */

        mutex_enter(&udv_lock);
        if (udv->u_flags & UVM_DEVICE_HOLD) {
                udv->u_flags |= UVM_DEVICE_WANTED;
                rw_exit(uobj->vmobjlock);
                UVM_UNLOCK_AND_WAIT(udv, &udv_lock, false, "udv_detach",0);
                goto again;
        }

        /*
         * got it!   nuke it now.
         */

        LIST_REMOVE(udv, u_list);
        if (udv->u_flags & UVM_DEVICE_WANTED)
                wakeup(udv);
        mutex_exit(&udv_lock);
        rw_exit(uobj->vmobjlock);

        uvm_obj_destroy(uobj, true);
        kmem_free(udv, sizeof(*udv));
        UVMHIST_LOG(maphist," <- done, freed uobj=%#jx", (uintptr_t)uobj,
            0, 0, 0);
}

/*
 * udv_fault: non-standard fault routine for device "pages"
 *
 * => rather than having a "get" function, we have a fault routine
 *        since we don't return vm_pages we need full control over the
 *        pmap_enter map in
 * => all the usual fault data structured are locked by the caller
 *        (i.e. maps(read), amap (if any), uobj)
 * => on return, we unlock all fault data structures
 * => flags: PGO_ALLPAGES: get all of the pages
 *             PGO_LOCKED: fault data structures are locked
 *    XXX: currently PGO_LOCKED is always required ... consider removing
 *        it as a flag
 * => NOTE: vaddr is the VA of pps[0] in ufi->entry, _NOT_ pps[centeridx]
 */

static int
udv_fault(struct uvm_faultinfo *ufi, vaddr_t vaddr, struct vm_page **pps,
    int npages, int centeridx, vm_prot_t access_type,
    int flags)
{
        struct vm_map_entry *entry = ufi->entry;
        struct uvm_object *uobj = entry->object.uvm_obj;
        struct uvm_device *udv = (struct uvm_device *)uobj;
        vaddr_t curr_va;
        off_t curr_offset;
        paddr_t paddr, mdpgno;
        u_int mmapflags;
        int lcv, retval;
        dev_t device;
        vm_prot_t mapprot;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
        UVMHIST_LOG(maphist,"  flags=%#jx", flags,0,0,0);

        /*
         * we do not allow device mappings to be mapped copy-on-write
         * so we kill any attempt to do so here.
         */

        if (UVM_ET_ISCOPYONWRITE(entry)) {
                UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=%#jx)",
                    entry->etype, 0,0,0);
                uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
                return EIO;
        }

        /*
         * get device map function.
         */

        device = udv->u_device;
        if (cdevsw_lookup(device) == NULL) {
                /* XXX This should not happen */
                uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
                return EIO;
        }

        /*
         * now we must determine the offset in udv to use and the VA to
         * use for pmap_enter.  note that we always use orig_map's pmap
         * for pmap_enter (even if we have a submap).   since virtual
         * addresses in a submap must match the main map, this is ok.
         */

        /* udv offset = (offset from start of entry) + entry's offset */
        curr_offset = entry->offset + (vaddr - entry->start);
        /* pmap va = vaddr (virtual address of pps[0]) */
        curr_va = vaddr;

        /*
         * loop over the page range entering in as needed
         */

        retval = 0;
        for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE,
            curr_va += PAGE_SIZE) {
                if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx)
                        continue;

                if (pps[lcv] == PGO_DONTCARE)
                        continue;

                mdpgno = cdev_mmap(device, curr_offset, access_type);
                if (mdpgno == -1) {
                        retval = EIO;
                        break;
                }
                paddr = pmap_phys_address(mdpgno);
                mmapflags = pmap_mmap_flags(mdpgno);
                mapprot = ufi->entry->protection;
                UVMHIST_LOG(maphist,
                    "  MAPPING: device: pm=%#jx, va=%#jx, pa=%#jx, at=%jd",
                    (uintptr_t)ufi->orig_map->pmap, curr_va, paddr, mapprot);
                if (pmap_enter(ufi->orig_map->pmap, curr_va, paddr, mapprot,
                    PMAP_CANFAIL | mapprot | mmapflags) != 0) {
                        /*
                         * pmap_enter() didn't have the resource to
                         * enter this mapping.  Unlock everything,
                         * wait for the pagedaemon to free up some
                         * pages, and then tell uvm_fault() to start
                         * the fault again.
                         *
                         * XXX Needs some rethinking for the PGO_ALLPAGES
                         * XXX case.
                         */
                        pmap_update(ufi->orig_map->pmap);        /* sync what we have so far */
                        uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
                            uobj);
                        return ENOMEM;
                }
        }

        pmap_update(ufi->orig_map->pmap);
        uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj);
        return retval;
}






















































































































   32 


   32 







   32 






   32 



















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
/*        $NetBSD: layer_subr.c,v 1.39 2022/04/10 09:50:46 andvar Exp $        */

/*
 * Copyright (c) 1999 National Aeronautics & Space Administration
 * All rights reserved.
 *
 * This software was written by William Studenmund of the
 * Numerical Aerospace Simulation Facility, NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the National Aeronautics & Space Administration
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this software without specific prior written
 *    permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NATIONAL AERONAUTICS & SPACE ADMINISTRATION
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE ADMINISTRATION OR CONTRIB-
 * UTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1992, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software donated to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        from: Id: lofs_subr.c,v 1.11 1992/05/30 10:05:43 jsp Exp
 *        @(#)null_subr.c        8.7 (Berkeley) 5/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: layer_subr.c,v 1.39 2022/04/10 09:50:46 andvar Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/time.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/kmem.h>

#include <miscfs/genfs/layer.h>
#include <miscfs/genfs/layer_extern.h>

#ifdef LAYERFS_DIAGNOSTIC
int layerfs_debug = 1;
#endif

/*
 * layer cache:
 * Each cache entry holds a reference to the lower vnode
 * along with a pointer to the alias vnode.  When an
 * entry is added the lower vnode is VREF'd.  When the
 * alias is removed the lower vnode is vrele'd.
 */

void
layerfs_init(void)
{
        /* Nothing. */
}

void
layerfs_done(void)
{
        /* Nothing. */
}

/*
 * layer_node_create: try to find an existing layerfs vnode referring to it,
 * otherwise make a new vnode which contains a reference to the lower vnode.
 */
int
layer_node_create(struct mount *mp, struct vnode *lowervp, struct vnode **nvpp)
{
        int error;
        struct vnode *aliasvp;

        error = vcache_get(mp, &lowervp, sizeof(lowervp), &aliasvp);
        if (error)
                return error;

        /*
         * Now that we acquired a reference on the upper vnode, release one
         * on the lower node.  The existence of the layer_node retains one
         * reference to the lower node.
         */
        vrele(lowervp);
        KASSERT(vrefcnt(lowervp) > 0);

#ifdef LAYERFS_DIAGNOSTIC
        if (layerfs_debug)
                vprint("layer_node_create: alias", aliasvp);
#endif
        *nvpp = aliasvp;
        return 0;
}

#ifdef LAYERFS_DIAGNOSTIC
struct vnode *
layer_checkvp(struct vnode *vp, const char *fil, int lno)
{
        struct layer_node *a = VTOLAYER(vp);
#ifdef notyet
        /*
         * Can't do this check because vop_reclaim runs
         * with a funny vop vector.
         *
         * WRS - no it doesnt...
         */
        if (vp->v_op != layer_vnodeop_p) {
                printf ("layer_checkvp: on non-layer-node\n");
#ifdef notyet
                while (layer_checkvp_barrier) /*WAIT*/ ;
#endif
                panic("layer_checkvp");
        };
#endif
        if (a->layer_lowervp == NULL) {
                /* Should never happen */
                int i; u_long *p;
                printf("vp = %p, ZERO ptr\n", vp);
                for (p = (u_long *) a, i = 0; i < 8; i++)
                        printf(" %lx", p[i]);
                printf("\n");
                /* wait for debugger */
                panic("layer_checkvp");
        }
        if (vrefcnt(a->layer_lowervp) < 1) {
                int i; u_long *p;
                printf("vp = %p, unref'ed lowervp\n", vp);
                for (p = (u_long *) a, i = 0; i < 8; i++)
                        printf(" %lx", p[i]);
                printf("\n");
                /* wait for debugger */
                panic ("layer with unref'ed lowervp");
        };
#ifdef notnow
        printf("layer %p/%d -> %p/%d [%s, %d]\n",
                LAYERTOV(a), vrefcnt(LAYERTOV(a)),
                a->layer_lowervp, vrefcnt(a->layer_lowervp),
                fil, lno);
#endif
        return a->layer_lowervp;
}
#endif










































































































    3 
    3 

    2 














    1 
    1 


















    2 































   70 












































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
/*        $NetBSD: sys_ptrace.c,v 1.12 2022/07/10 14:07:55 riastradh Exp $        */

/*-
 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        from: @(#)sys_process.c        8.1 (Berkeley) 6/10/93
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_ptrace.c,v 1.12 2022/07/10 14:07:55 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_ptrace.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/exec.h>
#include <sys/pax.h>
#include <sys/ptrace.h>
#include <sys/uio.h>
#include <sys/ras.h>
#include <sys/kmem.h>
#include <sys/kauth.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
#include <sys/syscallvar.h>
#include <sys/syscall.h>
#include <sys/module.h>

#include <uvm/uvm_extern.h>

#include <machine/reg.h>

/*
 * PTRACE methods
 */

static int
ptrace_copyin_piod(struct ptrace_io_desc *piod, const void *addr, size_t len)
{
        if (len != 0 && sizeof(*piod) != len)
                return EINVAL;

        return copyin(addr, piod, sizeof(*piod));
}

static int
ptrace_copyout_piod(const struct ptrace_io_desc *piod, void *addr, size_t len)
{
        if (len != 0 && sizeof(*piod) != len)
                return EINVAL;

        return copyout(piod, addr, sizeof(*piod));
}

static int
ptrace_copyin_siginfo(struct ptrace_siginfo *psi, const void *addr, size_t len)
{
        if (sizeof(*psi) != len)
                return EINVAL;

        return copyin(addr, psi, sizeof(*psi));
}

static int
ptrace_copyout_siginfo(const struct ptrace_siginfo *psi, void *addr, size_t len)
{
        if (sizeof(*psi) != len)
                return EINVAL;

        return copyout(psi, addr, sizeof(*psi));
}

static int
ptrace_copyout_lwpstatus(const struct ptrace_lwpstatus *pls, void *addr,
    size_t len)
{

        return copyout(pls, addr, len);
}

static struct ptrace_methods native_ptm = {
        .ptm_copyin_piod = ptrace_copyin_piod,
        .ptm_copyout_piod = ptrace_copyout_piod,
        .ptm_copyin_siginfo = ptrace_copyin_siginfo,
        .ptm_copyout_siginfo = ptrace_copyout_siginfo,
        .ptm_copyout_lwpstatus = ptrace_copyout_lwpstatus,
        .ptm_doregs = process_doregs,
        .ptm_dofpregs = process_dofpregs,
        .ptm_dodbregs = process_dodbregs,
};

static const struct syscall_package ptrace_syscalls[] = {
        { SYS_ptrace, 0, (sy_call_t *)sys_ptrace },
        { 0, 0, NULL },
};

/*
 * Process debugging system call.
 */
int
sys_ptrace(struct lwp *l, const struct sys_ptrace_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) req;
                syscallarg(pid_t) pid;
                syscallarg(void *) addr;
                syscallarg(int) data;
        } */

        return do_ptrace(&native_ptm, l, SCARG(uap, req), SCARG(uap, pid),
            SCARG(uap, addr), SCARG(uap, data), retval);
}

#define        DEPS        "ptrace_common"

MODULE(MODULE_CLASS_EXEC, ptrace, DEPS);

static int
ptrace_init(void)
{
        int error;

        error = syscall_establish(&emul_netbsd, ptrace_syscalls);
        return error;
}

static int
ptrace_fini(void)
{
        int error;

        error = syscall_disestablish(&emul_netbsd, ptrace_syscalls);
        return error;
}


static int
ptrace_modcmd(modcmd_t cmd, void *arg)
{
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = ptrace_init();
                break;
        case MODULE_CMD_FINI:
                error = ptrace_fini();
                break;
        default:
                error = ENOTTY;
                break;
        }
        return error;
}




















































































































































    8 

































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
/*        $NetBSD: chacha_impl.c,v 1.3 2020/07/27 20:49:10 riastradh Exp $        */

/*-
 * Copyright (c) 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/types.h>
#include <sys/cdefs.h>
#include <sys/errno.h>
#include <sys/module.h>
#include <sys/once.h>
#include <sys/sysctl.h>

#include <lib/libkern/libkern.h>

#include "chacha.h"
#include "chacha_ref.h"

static const struct chacha_impl        *chacha_md_impl __read_mostly;
static const struct chacha_impl        *chacha_impl __read_mostly = &chacha_ref_impl;

static int
sysctl_kern_crypto_chacha_selected(SYSCTLFN_ARGS)
{
        struct sysctlnode node;

        node = *rnode;
        node.sysctl_data = __UNCONST(chacha_impl->ci_name);
        node.sysctl_size = strlen(chacha_impl->ci_name) + 1;
        return sysctl_lookup(SYSCTLFN_CALL(&node));
}

SYSCTL_SETUP(sysctl_kern_crypto_chacha_setup, "sysctl kern.crypto.chacha setup")
{
        const struct sysctlnode *cnode;
        const struct sysctlnode *chacha_node;

        sysctl_createv(clog, 0, NULL, &cnode, 0, CTLTYPE_NODE, "crypto",
            SYSCTL_DESCR("Kernel cryptography"),
            NULL, 0, NULL, 0,
            CTL_KERN, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &cnode, &chacha_node, 0, CTLTYPE_NODE, "chacha",
            SYSCTL_DESCR("ChaCha"),
            NULL, 0, NULL, 0,
            CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &chacha_node, NULL,
            CTLFLAG_PERMANENT|CTLFLAG_READONLY, CTLTYPE_STRING, "selected",
            SYSCTL_DESCR("Selected ChaCha implementation"),
            sysctl_kern_crypto_chacha_selected, 0, NULL, 0,
            CTL_CREATE, CTL_EOL);
}

static int
chacha_select(void)
{

        if (chacha_md_impl) {
                if (chacha_selftest(chacha_md_impl))
                        aprint_error("chacha: self-test failed: %s\n",
                            chacha_md_impl->ci_name);
                else
                        chacha_impl = chacha_md_impl;
        }

        aprint_verbose("chacha: %s\n", chacha_impl->ci_name);
        return 0;
}

MODULE(MODULE_CLASS_MISC, chacha, NULL);

static int
chacha_modcmd(modcmd_t cmd, void *opaque)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                return chacha_select();
        case MODULE_CMD_FINI:
                return 0;
        default:
                return ENOTTY;
        }
}

void
chacha_md_init(const struct chacha_impl *impl)
{

        KASSERT(cold);
        KASSERTMSG(chacha_md_impl == NULL,
            "ChaCha implementation `%s' already offered, can't offer `%s'",
            chacha_md_impl->ci_name, impl->ci_name);

        chacha_md_impl = impl;
}

void
chacha_core(uint8_t out[restrict static CHACHA_CORE_OUTBYTES],
    const uint8_t in[static CHACHA_CORE_INBYTES],
    const uint8_t k[static CHACHA_CORE_KEYBYTES],
    const uint8_t c[static CHACHA_CORE_CONSTBYTES],
    unsigned nr)
{

        (*chacha_impl->ci_chacha_core)(out, in, k, c, nr);
}

void
hchacha(uint8_t out[restrict static HCHACHA_OUTBYTES],
    const uint8_t in[static HCHACHA_INBYTES],
    const uint8_t k[static HCHACHA_KEYBYTES],
    const uint8_t c[static HCHACHA_CONSTBYTES],
    unsigned nr)
{

        (*chacha_impl->ci_hchacha)(out, in, k, c, nr);
}

void
chacha_stream(uint8_t *restrict s, size_t nbytes, uint32_t blkno,
    const uint8_t nonce[static CHACHA_STREAM_NONCEBYTES],
    const uint8_t key[static CHACHA_STREAM_KEYBYTES],
    unsigned nr)
{

        (*chacha_impl->ci_chacha_stream)(s, nbytes, blkno, nonce, key, nr);
}

void
chacha_stream_xor(uint8_t *c, const uint8_t *p, size_t nbytes, uint32_t blkno,
    const uint8_t nonce[static CHACHA_STREAM_NONCEBYTES],
    const uint8_t key[static CHACHA_STREAM_KEYBYTES],
    unsigned nr)
{

        (*chacha_impl->ci_chacha_stream_xor)(c, p, nbytes, blkno, nonce, key,
            nr);
}

void
xchacha_stream(uint8_t *restrict s, size_t nbytes, uint32_t blkno,
    const uint8_t nonce[static XCHACHA_STREAM_NONCEBYTES],
    const uint8_t key[static XCHACHA_STREAM_KEYBYTES],
    unsigned nr)
{

        (*chacha_impl->ci_xchacha_stream)(s, nbytes, blkno, nonce, key, nr);
}

void
xchacha_stream_xor(uint8_t *c, const uint8_t *p, size_t nbytes, uint32_t blkno,
    const uint8_t nonce[static XCHACHA_STREAM_NONCEBYTES],
    const uint8_t key[static XCHACHA_STREAM_KEYBYTES],
    unsigned nr)
{

        (*chacha_impl->ci_xchacha_stream_xor)(c, p, nbytes, blkno, nonce, key,
            nr);
}
















































































































































    5 

































    5 

    5 


    5 
















    5 
    4 
    2 
    6 

    3 



    2 


    1 
    1 


















































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
/*        $NetBSD: coda_vfsops.c,v 1.90 2022/03/28 12:37:46 riastradh Exp $        */

/*
 *
 *             Coda: an Experimental Distributed File System
 *                              Release 3.1
 *
 *           Copyright (c) 1987-1998 Carnegie Mellon University
 *                          All Rights Reserved
 *
 * Permission  to  use, copy, modify and distribute this software and its
 * documentation is hereby granted,  provided  that  both  the  copyright
 * notice  and  this  permission  notice  appear  in  all  copies  of the
 * software, derivative works or  modified  versions,  and  any  portions
 * thereof, and that both notices appear in supporting documentation, and
 * that credit is given to Carnegie Mellon University  in  all  documents
 * and publicity pertaining to direct or indirect use of this code or its
 * derivatives.
 *
 * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
 * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
 * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
 * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
 * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
 * ANY DERIVATIVE WORK.
 *
 * Carnegie  Mellon  encourages  users  of  this  software  to return any
 * improvements or extensions that  they  make,  and  to  grant  Carnegie
 * Mellon the rights to redistribute these changes without encumbrance.
 *
 *         @(#) cfs/coda_vfsops.c,v 1.1.1.1 1998/08/29 21:26:45 rvb Exp $
 */

/*
 * Mach Operating System
 * Copyright (c) 1989 Carnegie-Mellon University
 * All rights reserved.  The CMU software License Agreement specifies
 * the terms and conditions for use and redistribution.
 */

/*
 * This code was written for the Coda file system at Carnegie Mellon
 * University.  Contributers include David Steere, James Kistler, and
 * M. Satyanarayanan.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: coda_vfsops.c,v 1.90 2022/03/28 12:37:46 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/malloc.h>
#include <sys/conf.h>
#include <sys/namei.h>
#include <sys/dirent.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/select.h>
#include <sys/kauth.h>
#include <sys/module.h>

#include <coda/coda.h>
#include <coda/cnode.h>
#include <coda/coda_vfsops.h>
#include <coda/coda_venus.h>
#include <coda/coda_subr.h>
#include <coda/coda_opstats.h>
/* for VN_RDEV */
#include <miscfs/specfs/specdev.h>
#include <miscfs/genfs/genfs.h>
 
MODULE(MODULE_CLASS_VFS, coda, "vcoda");

#define ENTRY if(coda_vfsop_print_entry) myprintf(("Entered %s\n",__func__))

extern struct vnode *coda_ctlvp;
extern struct coda_mntinfo coda_mnttbl[NVCODA]; /* indexed by minor device number */

/* structure to keep statistics of internally generated/satisfied calls */

struct coda_op_stats coda_vfsopstats[CODA_VFSOPS_SIZE];

#define MARK_ENTRY(op) (coda_vfsopstats[op].entries++)
#define MARK_INT_SAT(op) (coda_vfsopstats[op].sat_intrn++)
#define MARK_INT_FAIL(op) (coda_vfsopstats[op].unsat_intrn++)
#define MRAK_INT_GEN(op) (coda_vfsopstats[op].gen_intrn++)

extern const struct cdevsw vcoda_cdevsw;
extern const struct vnodeopv_desc coda_vnodeop_opv_desc;

const struct vnodeopv_desc * const coda_vnodeopv_descs[] = {
        &coda_vnodeop_opv_desc,
        NULL,
};

struct vfsops coda_vfsops = {
        .vfs_name = MOUNT_CODA,
        .vfs_min_mount_data = 256,
                        /* This is the pathname, unlike every other fs */
        .vfs_mount = coda_mount,
        .vfs_start = coda_start,
        .vfs_unmount = coda_unmount,
        .vfs_root = coda_root,
        .vfs_quotactl = (void *)eopnotsupp,
        .vfs_statvfs = coda_nb_statvfs,
        .vfs_sync = coda_sync,
        .vfs_vget = coda_vget,
        .vfs_loadvnode = coda_loadvnode,
        .vfs_fhtovp = (void *)eopnotsupp,
        .vfs_vptofh = (void *)eopnotsupp,
        .vfs_init = coda_init,
        .vfs_done = coda_done,
        .vfs_mountroot = (void *)eopnotsupp,
        .vfs_snapshot = (void *)eopnotsupp,
        .vfs_extattrctl = vfs_stdextattrctl,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_renamelock_enter = genfs_renamelock_enter,
        .vfs_renamelock_exit = genfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = coda_vnodeopv_descs
};

static int
coda_modcmd(modcmd_t cmd, void *arg)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                return vfs_attach(&coda_vfsops);
        case MODULE_CMD_FINI:
                return vfs_detach(&coda_vfsops);
        default:
                return ENOTTY;
        }
}

int
coda_vfsopstats_init(void)
{
        int i;

        for (i=0;i<CODA_VFSOPS_SIZE;i++) {
                coda_vfsopstats[i].opcode = i;
                coda_vfsopstats[i].entries = 0;
                coda_vfsopstats[i].sat_intrn = 0;
                coda_vfsopstats[i].unsat_intrn = 0;
                coda_vfsopstats[i].gen_intrn = 0;
        }

        return 0;
}

/*
 * cfs mount vfsop
 * Set up mount info record and attach it to vfs struct.
 */
/*ARGSUSED*/
int
coda_mount(struct mount *vfsp,        /* Allocated and initialized by mount(2) */
    const char *path,        /* path covered: ignored by the fs-layer */
    void *data,                /* Need to define a data type for this in netbsd? */
    size_t *data_len)
{
    struct lwp *l = curlwp;
    struct vnode *dvp;
    struct cnode *cp;
    dev_t dev;
    struct coda_mntinfo *mi;
    struct vnode *rtvp;
    const struct cdevsw *cdev;
    CodaFid rootfid = INVAL_FID;
    CodaFid ctlfid = CTL_FID;
    int error;

    if (data == NULL)
        return EINVAL;
    if (vfsp->mnt_flag & MNT_GETARGS)
        return EINVAL;
    ENTRY;

    coda_vfsopstats_init();
    coda_vnodeopstats_init();

    MARK_ENTRY(CODA_MOUNT_STATS);
    if (CODA_MOUNTED(vfsp)) {
        MARK_INT_FAIL(CODA_MOUNT_STATS);
        return(EBUSY);
    }

    /* Validate mount device.  Similar to getmdev(). */

    /*
     * XXX: coda passes the mount device as the entire mount args,
     * All other fs pass a structure contining a pointer.
     * In order to get sys_mount() to do the copyin() we've set a
     * fixed default size for the filename buffer.
     */
    /* Ensure that namei() doesn't run off the filename buffer */
    if (*data_len < 1 || *data_len > PATH_MAX ||
        strnlen(data, *data_len) >= *data_len) {
        MARK_INT_FAIL(CODA_MOUNT_STATS);
        return EINVAL;
    }
    error = namei_simple_kernel((char *)data, NSM_FOLLOW_NOEMULROOT,
                &dvp);

    if (error) {
        MARK_INT_FAIL(CODA_MOUNT_STATS);
        return (error);
    }
    if (dvp->v_type != VCHR) {
        MARK_INT_FAIL(CODA_MOUNT_STATS);
        vrele(dvp);
        return(ENXIO);
    }
    dev = dvp->v_rdev;
    vrele(dvp);
    cdev = cdevsw_lookup(dev);
    if (cdev == NULL) {
        MARK_INT_FAIL(CODA_MOUNT_STATS);
        return(ENXIO);
    }

    /*
     * See if the device table matches our expectations.
     */
    if (cdev != &vcoda_cdevsw)
    {
        MARK_INT_FAIL(CODA_MOUNT_STATS);
        return(ENXIO);
    }

    if (minor(dev) >= NVCODA) {
        MARK_INT_FAIL(CODA_MOUNT_STATS);
        return(ENXIO);
    }

    /*
     * Initialize the mount record and link it to the vfs struct
     */
    mi = &coda_mnttbl[minor(dev)];

    if (!VC_OPEN(&mi->mi_vcomm)) {
        MARK_INT_FAIL(CODA_MOUNT_STATS);
        return(ENODEV);
    }

    /* No initialization (here) of mi_vcomm! */
    vfsp->mnt_data = mi;
    vfsp->mnt_stat.f_fsidx.__fsid_val[0] = 0;
    vfsp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_CODA);
    vfsp->mnt_stat.f_fsid = vfsp->mnt_stat.f_fsidx.__fsid_val[0];
    vfsp->mnt_stat.f_namemax = CODA_MAXNAMLEN;
    mi->mi_vfsp = vfsp;

    /*
     * Make a root vnode to placate the Vnode interface, but don't
     * actually make the CODA_ROOT call to venus until the first call
     * to coda_root in case a server is down while venus is starting.
     */
    cp = make_coda_node(&rootfid, vfsp, VDIR);
    rtvp = CTOV(cp);
    rtvp->v_vflag |= VV_ROOT;

    cp = make_coda_node(&ctlfid, vfsp, VCHR);

    coda_ctlvp = CTOV(cp);

    /* Add vfs and rootvp to chain of vfs hanging off mntinfo */
    mi->mi_vfsp = vfsp;
    mi->mi_rootvp = rtvp;

    /* set filesystem block size */
    vfsp->mnt_stat.f_bsize = 8192;            /* XXX -JJK */
    vfsp->mnt_stat.f_frsize = 8192;            /* XXX -JJK */

    /* error is currently guaranteed to be zero, but in case some
       code changes... */
    CODADEBUG(1,
             myprintf(("coda_mount returned %d\n",error)););
    if (error)
        MARK_INT_FAIL(CODA_MOUNT_STATS);
    else
        MARK_INT_SAT(CODA_MOUNT_STATS);

    return set_statvfs_info("/coda", UIO_SYSSPACE, "CODA", UIO_SYSSPACE,
        vfsp->mnt_op->vfs_name, vfsp, l);
}

int
coda_start(struct mount *vfsp, int flags)
{
    ENTRY;
    vftomi(vfsp)->mi_started = 1;
    return (0);
}

int
coda_unmount(struct mount *vfsp, int mntflags)
{
    struct coda_mntinfo *mi = vftomi(vfsp);
    int active, error = 0;

    ENTRY;
    MARK_ENTRY(CODA_UMOUNT_STATS);
    if (!CODA_MOUNTED(vfsp)) {
        MARK_INT_FAIL(CODA_UMOUNT_STATS);
        return(EINVAL);
    }

    if (mi->mi_vfsp == vfsp) {        /* We found the victim */
        if (!IS_UNMOUNTING(VTOC(mi->mi_rootvp)))
            return (EBUSY);         /* Venus is still running */

#ifdef        DEBUG
        printf("coda_unmount: ROOT: vp %p, cp %p\n", mi->mi_rootvp, VTOC(mi->mi_rootvp));
#endif
        mi->mi_started = 0;

        vrele(mi->mi_rootvp);
        vrele(coda_ctlvp);

        active = coda_kill(vfsp, NOT_DOWNCALL);
        mi->mi_rootvp->v_vflag &= ~VV_ROOT;
        error = vflush(mi->mi_vfsp, NULLVP, FORCECLOSE);
        printf("coda_unmount: active = %d, vflush active %d\n", active, error);
        error = 0;

        /* I'm going to take this out to allow lookups to go through. I'm
         * not sure it's important anyway. -- DCS 2/2/94
         */
        /* vfsp->VFS_DATA = NULL; */

        /* No more vfsp's to hold onto */
        mi->mi_vfsp = NULL;
        mi->mi_rootvp = NULL;

        if (error)
            MARK_INT_FAIL(CODA_UMOUNT_STATS);
        else
            MARK_INT_SAT(CODA_UMOUNT_STATS);

        return(error);
    }
    return (EINVAL);
}

/*
 * find root of cfs
 */
int
coda_root(struct mount *vfsp, int lktype, struct vnode **vpp)
{
    struct coda_mntinfo *mi = vftomi(vfsp);
    int error;
    struct lwp *l = curlwp;    /* XXX - bnoble */
    CodaFid VFid;
    static const CodaFid invalfid = INVAL_FID;

    ENTRY;
    MARK_ENTRY(CODA_ROOT_STATS);

    if (vfsp == mi->mi_vfsp) {
            if (memcmp(&VTOC(mi->mi_rootvp)->c_fid, &invalfid, sizeof(CodaFid)))
            { /* Found valid root. */
                *vpp = mi->mi_rootvp;
                /* On Mach, this is vref.  On NetBSD, VOP_LOCK */
                vref(*vpp);
                vn_lock(*vpp, lktype);
                MARK_INT_SAT(CODA_ROOT_STATS);
                return(0);
            }
    }

    error = venus_root(vftomi(vfsp), l->l_cred, l->l_proc, &VFid);

    if (!error) {
        struct cnode *cp = VTOC(mi->mi_rootvp);

        /*
         * Save the new rootfid in the cnode, and rekey the cnode
         * with the new fid key.
         */
        error = vcache_rekey_enter(vfsp, mi->mi_rootvp,
            &invalfid, sizeof(CodaFid), &VFid, sizeof(CodaFid));
        if (error)
                goto exit;
        cp->c_fid = VFid;
        vcache_rekey_exit(vfsp, mi->mi_rootvp,
            &invalfid, sizeof(CodaFid), &cp->c_fid, sizeof(CodaFid));

        *vpp = mi->mi_rootvp;
        vref(*vpp);
        vn_lock(*vpp, lktype);
        MARK_INT_SAT(CODA_ROOT_STATS);
        goto exit;
    } else if (error == ENODEV || error == EINTR) {
        /* Gross hack here! */
        /*
         * If Venus fails to respond to the CODA_ROOT call, coda_call returns
         * ENODEV. Return the uninitialized root vnode to allow vfs
         * operations such as unmount to continue. Without this hack,
         * there is no way to do an unmount if Venus dies before a
         * successful CODA_ROOT call is done. All vnode operations
         * will fail.
         */
        *vpp = mi->mi_rootvp;
        vref(*vpp);
        vn_lock(*vpp, lktype);
        MARK_INT_FAIL(CODA_ROOT_STATS);
        error = 0;
        goto exit;
    } else {
        CODADEBUG( CODA_ROOT, myprintf(("error %d in CODA_ROOT\n", error)); );
        MARK_INT_FAIL(CODA_ROOT_STATS);

        goto exit;
    }
 exit:
    return(error);
}

/*
 * Get file system statistics.
 */
int
coda_nb_statvfs(struct mount *vfsp, struct statvfs *sbp)
{
    struct lwp *l = curlwp;
    struct coda_statfs fsstat;
    int error;

    ENTRY;
    MARK_ENTRY(CODA_STATFS_STATS);
    if (!CODA_MOUNTED(vfsp)) {
/*        MARK_INT_FAIL(CODA_STATFS_STATS); */
        return(EINVAL);
    }

    /* XXX - what to do about f_flags, others? --bnoble */
    /* Below This is what AFS does
            #define NB_SFS_SIZ 0x895440
     */
    /* Note: Normal fs's have a bsize of 0x400 == 1024 */

    error = venus_statfs(vftomi(vfsp), l->l_cred, l, &fsstat);

    if (!error) {
        sbp->f_bsize = 8192; /* XXX */
        sbp->f_frsize = 8192; /* XXX */
        sbp->f_iosize = 8192; /* XXX */
        sbp->f_blocks = fsstat.f_blocks;
        sbp->f_bfree  = fsstat.f_bfree;
        sbp->f_bavail = fsstat.f_bavail;
        sbp->f_bresvd = 0;
        sbp->f_files  = fsstat.f_files;
        sbp->f_ffree  = fsstat.f_ffree;
        sbp->f_favail = fsstat.f_ffree;
        sbp->f_fresvd = 0;
        copy_statvfs_info(sbp, vfsp);
    }

    MARK_INT_SAT(CODA_STATFS_STATS);
    return(error);
}

/*
 * Flush any pending I/O.
 */
int
coda_sync(struct mount *vfsp, int waitfor,
    kauth_cred_t cred)
{
    ENTRY;
    MARK_ENTRY(CODA_SYNC_STATS);
    MARK_INT_SAT(CODA_SYNC_STATS);
    return(0);
}

int
coda_vget(struct mount *vfsp, ino_t ino, int lktype,
    struct vnode **vpp)
{
    ENTRY;
    return (EOPNOTSUPP);
}

int
coda_loadvnode(struct mount *mp, struct vnode *vp,
    const void *key, size_t key_len, const void **new_key)
{
        CodaFid fid;
        struct cnode *cp;
        extern int (**coda_vnodeop_p)(void *);

        KASSERT(key_len == sizeof(CodaFid));
        memcpy(&fid, key, key_len);

        cp = kmem_zalloc(sizeof(*cp), KM_SLEEP);
        mutex_init(&cp->c_lock, MUTEX_DEFAULT, IPL_NONE);
        cp->c_fid = fid;
        cp->c_vnode = vp;
        vp->v_op = coda_vnodeop_p;
        vp->v_tag = VT_CODA;
        vp->v_type = VNON;
        vp->v_data = cp;

        *new_key = &cp->c_fid;

        return 0;
}

/*
 * fhtovp is now what vget used to be in 4.3-derived systems.  For
 * some silly reason, vget is now keyed by a 32 bit ino_t, rather than
 * a type-specific fid.
 */
int
coda_fhtovp(struct mount *vfsp, struct fid *fhp, struct mbuf *nam,
    struct vnode **vpp, int *exflagsp,
    kauth_cred_t *creadanonp, int lktype)
{
    struct cfid *cfid = (struct cfid *)fhp;
    struct cnode *cp = 0;
    int error;
    struct lwp *l = curlwp; /* XXX -mach */
    CodaFid VFid;
    int vtype;

    ENTRY;

    MARK_ENTRY(CODA_VGET_STATS);
    /* Check for vget of control object. */
    if (IS_CTL_FID(&cfid->cfid_fid)) {
        *vpp = coda_ctlvp;
        vref(coda_ctlvp);
        MARK_INT_SAT(CODA_VGET_STATS);
        return(0);
    }

    error = venus_fhtovp(vftomi(vfsp), &cfid->cfid_fid, l->l_cred, l->l_proc, &VFid, &vtype);

    if (error) {
        CODADEBUG(CODA_VGET, myprintf(("vget error %d\n",error));)
            *vpp = (struct vnode *)0;
    } else {
        CODADEBUG(CODA_VGET,
                 myprintf(("vget: %s type %d result %d\n",
                        coda_f2s(&VFid), vtype, error)); )

        cp = make_coda_node(&VFid, vfsp, vtype);
        *vpp = CTOV(cp);
    }
    return(error);
}

int
coda_vptofh(struct vnode *vnp, struct fid *fidp)
{
    ENTRY;
    return (EOPNOTSUPP);
}

void
coda_init(void)
{
    ENTRY;
}

void
coda_done(void)
{
    ENTRY;
}

SYSCTL_SETUP(sysctl_vfs_coda_setup, "sysctl vfs.coda subtree setup")
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "coda",
                       SYSCTL_DESCR("code vfs options"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, 18, CTL_EOL);
        /*
         * XXX the "18" above could be dynamic, thereby eliminating
         * one more instance of the "number to vfs" mapping problem,
         * but "18" is the order as taken from sys/mount.h
         */

/*
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "clusterread",
                       SYSCTL_DESCR( anyone? ),
                       NULL, 0, &doclusterread, 0,
                       CTL_VFS, 18, FFS_CLUSTERREAD, CTL_EOL);
*/
}

/*
 * To allow for greater ease of use, some vnodes may be orphaned when
 * Venus dies.  Certain operations should still be allowed to go
 * through, but without propagating orphan-ness.  So this function will
 * get a new vnode for the file from the current run of Venus.
 */

int
getNewVnode(struct vnode **vpp)
{
    struct cfid cfid;
    struct coda_mntinfo *mi = vftomi((*vpp)->v_mount);

    ENTRY;

    cfid.cfid_len = (short)sizeof(CodaFid);
    cfid.cfid_fid = VTOC(*vpp)->c_fid;        /* Structure assignment. */
    /* XXX ? */

    /* We're guessing that if set, the 1st element on the list is a
     * valid vnode to use. If not, return ENODEV as venus is dead.
     */
    if (mi->mi_vfsp == NULL)
        return ENODEV;

    return coda_fhtovp(mi->mi_vfsp, (struct fid*)&cfid, NULL, vpp,
                      NULL, NULL, LK_EXCLUSIVE);
}

/* Get the mount structure corresponding to a given device.
 * Return NULL if no device is found or the device is not mounted.
 */
struct mount *devtomp(dev_t dev)
{
    struct mount *mp;
    struct vnode *vp;

    if (spec_node_lookup_by_dev(VBLK, dev, VDEAD_NOWAIT, &vp) == 0) {
        mp = spec_node_getmountedfs(vp);
        vrele(vp);
    } else {
        mp = NULL;
    }

    return mp;
}





















































































































































































































































































   13 


   13 
   13 

   13 






   13 


   13 




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
/*        $NetBSD: in_offload.c,v 1.14 2020/03/27 16:34:58 jdolecek Exp $        */

/*
 * Copyright (c)2005, 2006 YAMAMOTO Takashi,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: in_offload.c,v 1.14 2020/03/27 16:34:58 jdolecek Exp $");

#include <sys/param.h>
#include <sys/mbuf.h>

#include <net/if.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/tcp.h>
#include <netinet/in_offload.h>

/*
 * Handle M_CSUM_TSOv4 in software. Split the TCP payload in chunks of
 * size MSS, and return mbuf chain consists of them.
 */
struct mbuf *
tcp4_segment(struct mbuf *m, int off)
{
        int mss;
        int iphlen, thlen;
        int hlen, len;
        struct ip *ip;
        struct tcphdr *th;
        uint16_t ipid, phsum;
        uint32_t tcpseq;
        struct mbuf *hdr = NULL;
        struct mbuf *m0 = NULL;
        struct mbuf *prev = NULL;
        struct mbuf *n, *t;
        int nsegs;

        KASSERT((m->m_flags & M_PKTHDR) != 0);
        KASSERT((m->m_pkthdr.csum_flags & M_CSUM_TSOv4) != 0);

        m->m_pkthdr.csum_flags = 0;

        len = m->m_pkthdr.len;
        KASSERT(len >= off + sizeof(*ip) + sizeof(*th));

        hlen = off + sizeof(*ip);
        if (m->m_len < hlen) {
                m = m_pullup(m, hlen);
                if (m == NULL)
                        goto quit;
        }
        ip = (void *)(mtod(m, char *) + off);
        iphlen = ip->ip_hl * 4;
        KASSERT(ip->ip_v == IPVERSION);
        KASSERT(iphlen >= sizeof(*ip));
        KASSERT(ip->ip_p == IPPROTO_TCP);
        ipid = ntohs(ip->ip_id);

        hlen = off + iphlen + sizeof(*th);
        if (m->m_len < hlen) {
                m = m_pullup(m, hlen);
                if (m == NULL)
                        goto quit;
        }
        th = (void *)(mtod(m, char *) + off + iphlen);
        tcpseq = ntohl(th->th_seq);
        thlen = th->th_off * 4;
        hlen = off + iphlen + thlen;

        mss = m->m_pkthdr.segsz;
        KASSERT(mss != 0);
        KASSERT(len > hlen);

        t = m_split(m, hlen, M_NOWAIT);
        if (t == NULL)
                goto quit;
        hdr = m;
        m = t;

        len -= hlen;
        KASSERT(len % mss == 0);

        ip = (void *)(mtod(hdr, char *) + off);
        ip->ip_len = htons(iphlen + thlen + mss);
        phsum = in_cksum_phdr(ip->ip_src.s_addr, ip->ip_dst.s_addr,
            htons((uint16_t)(thlen + mss) + IPPROTO_TCP));

        for (nsegs = len / mss; nsegs > 0; nsegs--) {
                if (nsegs > 1) {
                        n = m_dup(hdr, 0, hlen, M_NOWAIT);
                        if (n == NULL)
                                goto quit;
                } else
                        n = hdr;
                KASSERT(n->m_len == hlen); /* XXX */

                if (nsegs > 1) {
                        t = m_split(m, mss, M_NOWAIT);
                        if (t == NULL) {
                                m_freem(n);
                                goto quit;
                        }
                } else
                        t = m;
                m_cat(n, m);
                m = t;

                KASSERT(n->m_len >= hlen); /* XXX */

                if (m0 == NULL)
                        m0 = n;

                if (prev != NULL)
                        prev->m_nextpkt = n;

                n->m_pkthdr.len = hlen + mss;
                n->m_nextpkt = NULL;        /* XXX */

                ip = (void *)(mtod(n, char *) + off);
                ip->ip_id = htons(ipid);
                ip->ip_sum = 0;
                ip->ip_sum = in4_cksum(n, 0, off, iphlen);

                th = (void *)(mtod(n, char *) + off + iphlen);
                th->th_seq = htonl(tcpseq);
                th->th_sum = phsum;
                th->th_sum = in4_cksum(n, 0, off + iphlen, thlen + mss);

                tcpseq += mss;
                ipid++;
                prev = n;
        }
        return m0;

quit:
        if (hdr != NULL)
                m_freem(hdr);
        if (m != NULL)
                m_freem(m);
        for (m = m0; m != NULL; m = n) {
                n = m->m_nextpkt;
                m_freem(m);
        }

        return NULL;
}

int
ip_tso_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
    struct rtentry *rt)
{
        struct mbuf *n;
        int error = 0;

        m = tcp4_segment(m, 0);
        if (m == NULL)
                return ENOMEM;
        do {
                n = m->m_nextpkt;
                if (error == 0)
                        error = ip_if_output(ifp, m, sa, rt);
                else
                        m_freem(m);
                m = n;
        } while (m != NULL);
        return error;
}

/*
 * Compute now in software the IP and TCP/UDP checksums. Cancel the
 * hardware offloading.
 */
void
in_undefer_cksum(struct mbuf *mh, size_t hdrlen, int csum_flags)
{
        const size_t iphdrlen = M_CSUM_DATA_IPv4_IPHL(mh->m_pkthdr.csum_data);
        uint16_t csum;
        uint16_t ip_len;
        uint16_t *csump;
        struct mbuf *m = mh;

        KASSERT(mh->m_flags & M_PKTHDR);
        KASSERT(mh->m_pkthdr.len > hdrlen);
        KASSERT((mh->m_pkthdr.csum_flags & csum_flags) == csum_flags);

        /*
         * Deal with prepended frame header as done by e.g. ether_output().
         * If first mbuf in chain has just the header, use second mbuf
         * for the actual checksum. in4_csum() expects the passed mbuf
         * to have the whole (struct ip) area contiguous.
         */
        if (m->m_len <= hdrlen) {
                hdrlen -= m->m_len;
                m = m->m_next;
                KASSERT(m != NULL);
        }

        if (__predict_true(hdrlen + sizeof(struct ip) <= m->m_len)) {
                struct ip *ip = (struct ip *)(mtod(m, uint8_t *) + hdrlen);

                ip_len = ip->ip_len;
                csump = &ip->ip_sum;
        } else {
                const size_t ip_len_offset =
                    hdrlen + offsetof(struct ip, ip_len);

                m_copydata(m, ip_len_offset, sizeof(ip_len), &ip_len);
                csump = NULL;
        }
        ip_len = ntohs(ip_len);

        if (csum_flags & M_CSUM_IPv4) {
                csum = in4_cksum(m, 0, hdrlen, iphdrlen);
                if (csump != NULL) {
                        *csump = csum;
                } else {
                        const size_t offset = hdrlen +
                            offsetof(struct ip, ip_sum);

                        m_copyback(m, offset, sizeof(uint16_t), &csum);
                }
        }

        if (csum_flags & (M_CSUM_UDPv4|M_CSUM_TCPv4)) {
                size_t l4offset = hdrlen + iphdrlen;

                csum = in4_cksum(m, 0, l4offset, ip_len - iphdrlen);
                if (csum == 0 && (csum_flags & M_CSUM_UDPv4) != 0)
                        csum = 0xffff;

                l4offset += M_CSUM_DATA_IPv4_OFFSET(m->m_pkthdr.csum_data);

                if (__predict_true(l4offset + sizeof(uint16_t) <= m->m_len)) {
                        *(uint16_t *)(mtod(m, char *) + l4offset) = csum;
                } else {
                        m_copyback(m, l4offset, sizeof(csum), (void *)&csum);
                }
        }

        mh->m_pkthdr.csum_flags ^= csum_flags;
}

/*
 * Compute now in software the TCP/UDP checksum. Cancel the hardware
 * offloading.
 */
void
in_undefer_cksum_tcpudp(struct mbuf *m)
{
        struct ip *ip;
        uint16_t csum, offset;

        KASSERT((m->m_flags & M_PKTHDR) != 0);
        KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv4|M_CSUM_UDPv4)) != 0);
        KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_TCPv6|M_CSUM_UDPv6)) == 0);

        ip = mtod(m, struct ip *);
        offset = ip->ip_hl << 2;

        csum = in4_cksum(m, 0, offset, ntohs(ip->ip_len) - offset);
        if (csum == 0 && (m->m_pkthdr.csum_flags & M_CSUM_UDPv4) != 0)
                csum = 0xffff;

        offset += M_CSUM_DATA_IPv4_OFFSET(m->m_pkthdr.csum_data);

        if ((offset + sizeof(uint16_t)) <= m->m_len) {
                *(uint16_t *)(mtod(m, char *) + offset) = csum;
        } else {
                m_copyback(m, offset, sizeof(csum), (void *)&csum);
        }
}












































































































































































































































































































































































































































































































































































































    2 


    1 





























    3 


    1 























































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
/*        $NetBSD: nfs_vfsops.c,v 1.243 2021/06/13 10:25:11 mlelstv Exp $        */

/*
 * Copyright (c) 1989, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Rick Macklem at The University of Guelph.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)nfs_vfsops.c        8.12 (Berkeley) 5/20/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: nfs_vfsops.c,v 1.243 2021/06/13 10:25:11 mlelstv Exp $");

#if defined(_KERNEL_OPT)
#include "opt_nfs.h"
#endif

#include <sys/param.h>
#include <sys/ioctl.h>
#include <sys/signal.h>
#include <sys/proc.h>
#include <sys/namei.h>
#include <sys/device.h>
#include <sys/vnode.h>
#include <sys/kernel.h>
#include <sys/mount.h>
#include <sys/buf.h>
#include <sys/mbuf.h>
#include <sys/dirent.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/timetc.h>
#include <sys/kauth.h>
#include <sys/module.h>

#include <net/if.h>
#include <net/route.h>
#include <netinet/in.h>

#include <nfs/rpcv2.h>
#include <nfs/nfsproto.h>
#include <nfs/nfsnode.h>
#include <nfs/nfs.h>
#include <nfs/nfsmount.h>
#include <nfs/xdr_subs.h>
#include <nfs/nfsm_subs.h>
#include <nfs/nfsdiskless.h>
#include <nfs/nfs_var.h>

MODULE(MODULE_CLASS_VFS, nfs, NULL);

extern struct nfsstats nfsstats;
extern int nfs_ticks;

/*
 * keep a count of the nfs mounts to generate ficticious drive names
 * for the per drive stats.
 */
unsigned int nfs_mount_count = 0;

int nfs_commitsize;

/*
 * nfs vfs operations.
 */

extern const struct vnodeopv_desc nfsv2_vnodeop_opv_desc;
extern const struct vnodeopv_desc spec_nfsv2nodeop_opv_desc;
extern const struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc;

const struct vnodeopv_desc * const nfs_vnodeopv_descs[] = {
        &nfsv2_vnodeop_opv_desc,
        &spec_nfsv2nodeop_opv_desc,
        &fifo_nfsv2nodeop_opv_desc,
        NULL,
};

struct vfsops nfs_vfsops = {
        .vfs_name = MOUNT_NFS,
        .vfs_min_mount_data = sizeof (struct nfs_args),
        .vfs_mount = nfs_mount,
        .vfs_start = nfs_start,
        .vfs_unmount = nfs_unmount,
        .vfs_root = nfs_root,
        .vfs_quotactl = (void *)eopnotsupp,
        .vfs_statvfs = nfs_statvfs,
        .vfs_sync = nfs_sync,
        .vfs_loadvnode = nfs_loadvnode,
        .vfs_vget = nfs_vget,
        .vfs_fhtovp = nfs_fhtovp,
        .vfs_vptofh = nfs_vptofh,
        .vfs_init = nfs_vfs_init,
        .vfs_done = nfs_vfs_done,
        .vfs_mountroot = nfs_mountroot,
        .vfs_snapshot = (void *)eopnotsupp,
        .vfs_extattrctl = vfs_stdextattrctl,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_renamelock_enter = genfs_renamelock_enter,
        .vfs_renamelock_exit = genfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = nfs_vnodeopv_descs
};

extern u_int32_t nfs_procids[NFS_NPROCS];
extern u_int32_t nfs_prog, nfs_vers;

static int nfs_mount_diskless(struct nfs_dlmount *, const char *,
    struct mount **, struct vnode **, struct lwp *);

static int
nfs_modcmd(modcmd_t cmd, void *arg)
{
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = vfs_attach(&nfs_vfsops);
                return error;
        case MODULE_CMD_FINI:
                error = vfs_detach(&nfs_vfsops);
                return error;
        default:
                return ENOTTY;
        }
}

/*
 * nfs statvfs call
 */
int
nfs_statvfs(struct mount *mp, struct statvfs *sbp)
{
        struct lwp *l = curlwp;
        struct vnode *vp;
        struct nfs_statfs *sfp;
        char *cp;
        u_int32_t *tl;
        int32_t t1, t2;
        char *bpos, *dpos, *cp2;
        struct nfsmount *nmp = VFSTONFS(mp);
        int error = 0, retattr;
#ifdef NFS_V2_ONLY
        const int v3 = 0;
#else
        int v3 = (nmp->nm_flag & NFSMNT_NFSV3);
#endif
        struct mbuf *mreq, *mrep = NULL, *md, *mb;
        kauth_cred_t cred;
        u_quad_t tquad;
        struct nfsnode *np;

#ifndef nolint
        sfp = (struct nfs_statfs *)0;
#endif
        vp = nmp->nm_vnode;
        np = VTONFS(vp);
        cred = kauth_cred_alloc();
#ifndef NFS_V2_ONLY
        if (v3 && (nmp->nm_iflag & NFSMNT_GOTFSINFO) == 0)
                (void)nfs_fsinfo(nmp, vp, cred, l);
#endif
        nfsstats.rpccnt[NFSPROC_FSSTAT]++;
        nfsm_reqhead(np, NFSPROC_FSSTAT, NFSX_FH(v3));
        nfsm_fhtom(np, v3);
        nfsm_request(np, NFSPROC_FSSTAT, l, cred);
        if (v3)
                nfsm_postop_attr(vp, retattr, 0);
        if (error) {
                if (mrep != NULL) {
                        if (mrep->m_next != NULL)
                                printf("nfs_vfsops: nfs_statvfs would lose buffers\n");
                        m_freem(mrep);
                }
                goto nfsmout;
        }
        nfsm_dissect(sfp, struct nfs_statfs *, NFSX_STATFS(v3));
        sbp->f_flag = nmp->nm_flag;
        sbp->f_iosize = uimin(nmp->nm_rsize, nmp->nm_wsize);
        if (v3) {
                sbp->f_frsize = sbp->f_bsize = NFS_FABLKSIZE;
                tquad = fxdr_hyper(&sfp->sf_tbytes);
                sbp->f_blocks = ((quad_t)tquad / (quad_t)NFS_FABLKSIZE);
                tquad = fxdr_hyper(&sfp->sf_fbytes);
                sbp->f_bfree = ((quad_t)tquad / (quad_t)NFS_FABLKSIZE);
                tquad = fxdr_hyper(&sfp->sf_abytes);
                tquad = ((quad_t)tquad / (quad_t)NFS_FABLKSIZE);
                sbp->f_bresvd = sbp->f_bfree - tquad;
                sbp->f_bavail = tquad;
                /* Handle older NFS servers returning negative values */
                if ((quad_t)sbp->f_bavail < 0)
                        sbp->f_bavail = 0;
                tquad = fxdr_hyper(&sfp->sf_tfiles);
                sbp->f_files = tquad;
                tquad = fxdr_hyper(&sfp->sf_ffiles);
                sbp->f_ffree = tquad;
                sbp->f_favail = tquad;
                sbp->f_fresvd = 0;
        } else {
                sbp->f_bsize = NFS_FABLKSIZE;
                sbp->f_frsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
                sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
                sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
                sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
                sbp->f_fresvd = 0;
                sbp->f_files = 0;
                sbp->f_ffree = 0;
                sbp->f_favail = 0;
                sbp->f_fresvd = 0;
        }
        copy_statvfs_info(sbp, mp);
        nfsm_reqdone;
        kauth_cred_free(cred);
        return (error);
}

#ifndef NFS_V2_ONLY
/*
 * nfs version 3 fsinfo rpc call
 */
int
nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, kauth_cred_t cred, struct lwp *l)
{
        struct nfsv3_fsinfo *fsp;
        char *cp;
        int32_t t1, t2;
        u_int32_t *tl, pref, xmax;
        char *bpos, *dpos, *cp2;
        int error = 0, retattr;
        struct mbuf *mreq, *mrep, *md, *mb;
        u_int64_t maxfsize;
        struct nfsnode *np = VTONFS(vp);

        nfsstats.rpccnt[NFSPROC_FSINFO]++;
        nfsm_reqhead(np, NFSPROC_FSINFO, NFSX_FH(1));
        nfsm_fhtom(np, 1);
        nfsm_request(np, NFSPROC_FSINFO, l, cred);
        nfsm_postop_attr(vp, retattr, 0);
        if (!error) {
                nfsm_dissect(fsp, struct nfsv3_fsinfo *, NFSX_V3FSINFO);
                pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
                if ((nmp->nm_flag & NFSMNT_WSIZE) == 0 &&
                    pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
                        nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) &
                                ~(NFS_FABLKSIZE - 1);
                xmax = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
                if (xmax < nmp->nm_wsize && xmax > 0) {
                        nmp->nm_wsize = xmax & ~(NFS_FABLKSIZE - 1);
                        if (nmp->nm_wsize == 0)
                                nmp->nm_wsize = xmax;
                }
                pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
                if ((nmp->nm_flag & NFSMNT_RSIZE) == 0 &&
                    pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
                        nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) &
                                ~(NFS_FABLKSIZE - 1);
                xmax = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
                if (xmax < nmp->nm_rsize && xmax > 0) {
                        nmp->nm_rsize = xmax & ~(NFS_FABLKSIZE - 1);
                        if (nmp->nm_rsize == 0)
                                nmp->nm_rsize = xmax;
                }
                pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
                if (pref < nmp->nm_readdirsize && pref >= NFS_DIRFRAGSIZ)
                        nmp->nm_readdirsize = (pref + NFS_DIRFRAGSIZ - 1) &
                                ~(NFS_DIRFRAGSIZ - 1);
                if (xmax < nmp->nm_readdirsize && xmax > 0) {
                        nmp->nm_readdirsize = xmax & ~(NFS_DIRFRAGSIZ - 1);
                        if (nmp->nm_readdirsize == 0)
                                nmp->nm_readdirsize = xmax;
                }
                nmp->nm_maxfilesize = 0xffffffffffffffffull;
                maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
                if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
                        nmp->nm_maxfilesize = maxfsize;
                nmp->nm_mountp->mnt_fs_bshift =
                    ffs(MIN(nmp->nm_rsize, nmp->nm_wsize)) - 1;
                nmp->nm_iflag |= NFSMNT_GOTFSINFO;
        }
        nfsm_reqdone;
        return (error);
}
#endif

/*
 * Mount a remote root fs via. NFS.  It goes like this:
 * - Call nfs_boot_init() to fill in the nfs_diskless struct
 * - build the rootfs mount point and call mountnfs() to do the rest.
 */
int
nfs_mountroot(void)
{
        struct timespec ts;
        struct nfs_diskless *nd;
        struct vattr attr;
        struct mount *mp;
        struct vnode *vp;
        struct lwp *l;
        long n;
        int error;

        l = curlwp; /* XXX */

        if (device_class(root_device) != DV_IFNET)
                return (ENODEV);

        /*
         * XXX time must be non-zero when we init the interface or else
         * the arp code will wedge.  [Fixed now in if_ether.c]
         * However, the NFS attribute cache gives false "hits" when the
         * current time < nfs_attrtimeo(nmp, np) so keep this in for now.
         */
        if (time_second < NFS_MAXATTRTIMO) {
                ts.tv_sec = NFS_MAXATTRTIMO;
                ts.tv_nsec = 0;
                tc_setclock(&ts);
        }

        /*
         * Call nfs_boot_init() to fill in the nfs_diskless struct.
         * Side effect:  Finds and configures a network interface.
         */
        nd = kmem_zalloc(sizeof(*nd), KM_SLEEP);
        error = nfs_boot_init(nd, l);
        if (error) {
                kmem_free(nd, sizeof(*nd));
                return (error);
        }

        /*
         * Create the root mount point.
         */
        error = nfs_mount_diskless(&nd->nd_root, "/", &mp, &vp, l);
        if (error)
                goto out;
        printf("root on %s\n", nd->nd_root.ndm_host);

        /*
         * Link it into the mount list.
         */
        mountlist_append(mp);
        rootvp = vp;
        mp->mnt_vnodecovered = NULLVP;
        vfs_unbusy(mp);

        /* Get root attributes (for the time). */
        vn_lock(vp, LK_SHARED | LK_RETRY);
        error = VOP_GETATTR(vp, &attr, l->l_cred);
        VOP_UNLOCK(vp);
        if (error)
                panic("nfs_mountroot: getattr for root");
        n = attr.va_atime.tv_sec;
#ifdef        DEBUG
        printf("root time: 0x%lx\n", n);
#endif
        setrootfstime(n);

out:
        if (error)
                nfs_boot_cleanup(nd, l);
        kmem_free(nd, sizeof(*nd));
        return (error);
}

/*
 * Internal version of mount system call for diskless setup.
 * Separate function because we used to call it twice.
 * (once for root and once for swap)
 */
static int
nfs_mount_diskless(struct nfs_dlmount *ndmntp, const char *mntname, struct mount **mpp, struct vnode **vpp, struct lwp *l)
        /* mntname:         mount point name */
{
        struct mount *mp;
        struct mbuf *m;
        int error;

        vfs_rootmountalloc(MOUNT_NFS, mntname, &mp);

        mp->mnt_op = &nfs_vfsops;

        /*
         * Historical practice expects NFS root file systems to
         * be initially mounted r/w.
         */
        mp->mnt_flag &= ~MNT_RDONLY;

        /* Get mbuf for server sockaddr. */
        m = m_get(M_WAIT, MT_SONAME);
        if (m == NULL)
                panic("nfs_mountroot: mget soname for %s", mntname);
        MCLAIM(m, &nfs_mowner);
        memcpy(mtod(m, void *), (void *)ndmntp->ndm_args.addr,
              (m->m_len = ndmntp->ndm_args.addr->sa_len));

        error = mountnfs(&ndmntp->ndm_args, mp, m, mntname,
                         ndmntp->ndm_args.hostname, vpp, l);
        if (error) {
                vfs_unbusy(mp);
                vfs_rele(mp);
                printf("nfs_mountroot: mount %s failed: %d\n",
                       mntname, error);
        } else
                *mpp = mp;

        return (error);
}

void
nfs_decode_args(struct nfsmount *nmp, struct nfs_args *argp, struct lwp *l)
{
        int s;
        int adjsock;
        int maxio;

        s = splsoftnet();

        /*
         * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
         * no sense in that context.
         */
        if (argp->sotype == SOCK_STREAM)
                argp->flags &= ~NFSMNT_NOCONN;

        /*
         * Cookie translation is not needed for v2, silently ignore it.
         */
        if ((argp->flags & (NFSMNT_XLATECOOKIE|NFSMNT_NFSV3)) ==
            NFSMNT_XLATECOOKIE)
                argp->flags &= ~NFSMNT_XLATECOOKIE;

        /* Re-bind if rsrvd port requested and wasn't on one */
        adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
                  && (argp->flags & NFSMNT_RESVPORT);
        /* Also re-bind if we're switching to/from a connected UDP socket */
        adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
                    (argp->flags & NFSMNT_NOCONN));

        /* Update flags. */
        nmp->nm_flag = argp->flags;
        splx(s);

        if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
                nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
                if (nmp->nm_timeo < NFS_MINTIMEO)
                        nmp->nm_timeo = NFS_MINTIMEO;
                else if (nmp->nm_timeo > NFS_MAXTIMEO)
                        nmp->nm_timeo = NFS_MAXTIMEO;
        }

        if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
                nmp->nm_retry = argp->retrans;
                if (nmp->nm_retry > NFS_MAXREXMIT)
                        nmp->nm_retry = NFS_MAXREXMIT;
        }

#ifndef NFS_V2_ONLY
        if (argp->flags & NFSMNT_NFSV3) {
                if (argp->sotype == SOCK_DGRAM)
                        maxio = NFS_MAXDGRAMDATA;
                else
                        maxio = NFS_MAXDATA;
        } else
#endif
                maxio = NFS_V2MAXDATA;

        if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
                int osize = nmp->nm_wsize;
                nmp->nm_wsize = argp->wsize;
                /* Round down to multiple of blocksize */
                nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
                if (nmp->nm_wsize <= 0)
                        nmp->nm_wsize = NFS_FABLKSIZE;
                adjsock |= (nmp->nm_wsize != osize);
        }
        if (nmp->nm_wsize > maxio)
                nmp->nm_wsize = maxio;
        if (nmp->nm_wsize > MAXBSIZE)
                nmp->nm_wsize = MAXBSIZE;

        if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
                int osize = nmp->nm_rsize;
                nmp->nm_rsize = argp->rsize;
                /* Round down to multiple of blocksize */
                nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
                if (nmp->nm_rsize <= 0)
                        nmp->nm_rsize = NFS_FABLKSIZE;
                adjsock |= (nmp->nm_rsize != osize);
        }
        if (nmp->nm_rsize > maxio)
                nmp->nm_rsize = maxio;
        if (nmp->nm_rsize > MAXBSIZE)
                nmp->nm_rsize = MAXBSIZE;

        if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
                nmp->nm_readdirsize = argp->readdirsize;
                /* Round down to multiple of minimum blocksize */
                nmp->nm_readdirsize &= ~(NFS_DIRFRAGSIZ - 1);
                if (nmp->nm_readdirsize < NFS_DIRFRAGSIZ)
                        nmp->nm_readdirsize = NFS_DIRFRAGSIZ;
                /* Bigger than buffer size makes no sense */
                if (nmp->nm_readdirsize > NFS_DIRBLKSIZ)
                        nmp->nm_readdirsize = NFS_DIRBLKSIZ;
        } else if (argp->flags & NFSMNT_RSIZE)
                nmp->nm_readdirsize = nmp->nm_rsize;

        if (nmp->nm_readdirsize > maxio)
                nmp->nm_readdirsize = maxio;

        if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 &&
                argp->maxgrouplist <= NFS_MAXGRPS)
                nmp->nm_numgrps = argp->maxgrouplist;
        if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 &&
                argp->readahead <= NFS_MAXRAHEAD)
                nmp->nm_readahead = argp->readahead;
        if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 &&
                argp->deadthresh <= NFS_NEVERDEAD)
                nmp->nm_deadthresh = argp->deadthresh;

        adjsock |= ((nmp->nm_sotype != argp->sotype) ||
                    (nmp->nm_soproto != argp->proto));
        nmp->nm_sotype = argp->sotype;
        nmp->nm_soproto = argp->proto;

        if (nmp->nm_so && adjsock) {
                nfs_safedisconnect(nmp);
                if (nmp->nm_sotype == SOCK_DGRAM)
                        while (nfs_connect(nmp, (struct nfsreq *)0, l)) {
                                printf("nfs_args: retrying connect\n");
                                kpause("nfscn3", false, hz, NULL);
                        }
        }
}

/*
 * VFS Operations.
 *
 * mount system call
 * It seems a bit dumb to copyinstr() the host and path here and then
 * memcpy() them in mountnfs(), but I wanted to detect errors before
 * doing the sockargs() call because sockargs() allocates an mbuf and
 * an error after that means that I have to release the mbuf.
 */
/* ARGSUSED */
int
nfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct lwp *l = curlwp;
        int error;
        struct nfs_args *args = data;
        struct mbuf *nam;
        struct nfsmount *nmp = VFSTONFS(mp);
        struct sockaddr *sa;
        struct vnode *vp;
        char *pth, *hst;
        size_t len;
        u_char *nfh;

        if (args == NULL)
                return EINVAL;
        if (*data_len < sizeof *args)
                return EINVAL;

        if (mp->mnt_flag & MNT_GETARGS) {

                if (nmp == NULL)
                        return (EIO);
                if (args->addr != NULL) {
                        sa = mtod(nmp->nm_nam, struct sockaddr *);
                        error = copyout(sa, args->addr, sa->sa_len);
                        if (error)
                                return (error);
                        args->addrlen = sa->sa_len;
                } else
                        args->addrlen = 0;

                args->version = NFS_ARGSVERSION;
                args->sotype = nmp->nm_sotype;
                args->proto = nmp->nm_soproto;
                args->fh = NULL;
                args->fhsize = 0;
                args->flags = nmp->nm_flag;
                args->wsize = nmp->nm_wsize;
                args->rsize = nmp->nm_rsize;
                args->readdirsize = nmp->nm_readdirsize;
                args->timeo = nmp->nm_timeo;
                args->retrans = nmp->nm_retry;
                args->maxgrouplist = nmp->nm_numgrps;
                args->readahead = nmp->nm_readahead;
                args->leaseterm = 0; /* dummy */
                args->deadthresh = nmp->nm_deadthresh;
                args->hostname = NULL;
                *data_len = sizeof *args;
                return 0;
        }

        if (args->version != NFS_ARGSVERSION)
                return (EPROGMISMATCH);
        if (args->flags & (NFSMNT_NQNFS|NFSMNT_KERB))
                return (EPROGUNAVAIL);
#ifdef NFS_V2_ONLY
        if (args->flags & NFSMNT_NFSV3)
                return (EPROGMISMATCH);
#endif
        if (mp->mnt_flag & MNT_UPDATE) {
                if (nmp == NULL)
                        return (EIO);
                /*
                 * When doing an update, we can't change from or to
                 * v3, or change cookie translation
                 */
                args->flags = (args->flags & ~(NFSMNT_NFSV3|NFSMNT_XLATECOOKIE)) |
                    (nmp->nm_flag & (NFSMNT_NFSV3|NFSMNT_XLATECOOKIE));
                nfs_decode_args(nmp, args, l);
                return (0);
        }
        if (args->fhsize < 0 || args->fhsize > NFSX_V3FHMAX)
                return (EINVAL);
        nfh = malloc(NFSX_V3FHMAX, M_TEMP, M_WAITOK);
        error = copyin(args->fh, nfh, args->fhsize);
        if (error)
                goto free_nfh;
        pth = malloc(MNAMELEN, M_TEMP, M_WAITOK);
        error = copyinstr(path, pth, MNAMELEN - 1, &len);
        if (error)
                goto free_pth;
        memset(&pth[len], 0, MNAMELEN - len);
        hst = malloc(MNAMELEN, M_TEMP, M_WAITOK);
        error = copyinstr(args->hostname, hst, MNAMELEN - 1, &len);
        if (error)
                goto free_hst;
        memset(&hst[len], 0, MNAMELEN - len);
        /* sockargs() call must be after above copyin() calls */
        error = sockargs(&nam, args->addr, args->addrlen, UIO_USERSPACE,
            MT_SONAME);
        if (error)
                goto free_hst;
        MCLAIM(nam, &nfs_mowner);
        args->fh = nfh;
        error = mountnfs(args, mp, nam, pth, hst, &vp, l);

free_hst:
        free(hst, M_TEMP);
free_pth:
        free(pth, M_TEMP);
free_nfh:
        free(nfh, M_TEMP);

        return (error);
}

/*
 * Common code for mount and mountroot
 */
int
mountnfs(struct nfs_args *argp, struct mount *mp, struct mbuf *nam, const char *pth, const char *hst, struct vnode **vpp, struct lwp *l)
{
        struct nfsmount *nmp;
        struct nfsnode *np;
        struct vnode *vp;
        int error;
        struct vattr *attrs;
        kauth_cred_t cr;
        char iosname[IOSTATNAMELEN];

        /*
         * If the number of nfs iothreads to use has never
         * been set, create a reasonable number of them.
         */

        if (nfs_niothreads < 0) {
                nfs_set_niothreads(NFS_DEFAULT_NIOTHREADS);
        }

        if (mp->mnt_flag & MNT_UPDATE) {
                nmp = VFSTONFS(mp);
                /* update paths, file handles, etc, here        XXX */
                m_freem(nam);
                return 0;
        }
        nmp = kmem_zalloc(sizeof(*nmp), KM_SLEEP);
        TAILQ_INIT(&nmp->nm_uidlruhead);
        TAILQ_INIT(&nmp->nm_bufq);
        rw_init(&nmp->nm_writeverflock);
        mutex_init(&nmp->nm_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&nmp->nm_rcvcv, "nfsrcv");
        cv_init(&nmp->nm_sndcv, "nfssnd");
        cv_init(&nmp->nm_aiocv, "nfsaio");
        cv_init(&nmp->nm_disconcv, "nfsdis");

        mp->mnt_data = nmp;
        mp->mnt_stat.f_namemax = NFS_MAXNAMLEN;
        vfs_getnewfsid(mp);
        nmp->nm_mountp = mp;

#ifndef NFS_V2_ONLY
        if ((argp->flags & NFSMNT_NFSV3) == 0)
#endif
        {
                if (argp->fhsize != NFSX_V2FH) {
                        return EINVAL;
                }
        }

        /*
         * V2 can only handle 32 bit filesizes. For v3, nfs_fsinfo
         * will overwrite this.
         */
        nmp->nm_maxfilesize = 0xffffffffLL;

        nmp->nm_timeo = NFS_TIMEO;
        nmp->nm_retry = NFS_RETRANS;
        nmp->nm_wsize = NFS_WSIZE;
        nmp->nm_rsize = NFS_RSIZE;
        nmp->nm_readdirsize = NFS_READDIRSIZE;
        nmp->nm_numgrps = NFS_MAXGRPS;
        nmp->nm_readahead = NFS_DEFRAHEAD;
        nmp->nm_deadthresh = NFS_DEFDEADTHRESH;
        error = set_statvfs_info(pth, UIO_SYSSPACE, hst, UIO_SYSSPACE,
            mp->mnt_op->vfs_name, mp, l);
        if (error)
                goto bad;
        nmp->nm_nam = nam;

        /* Set up the sockets and per-host congestion */
        nmp->nm_sotype = argp->sotype;
        nmp->nm_soproto = argp->proto;

        nfs_decode_args(nmp, argp, l);

        mp->mnt_fs_bshift = ffs(MIN(nmp->nm_rsize, nmp->nm_wsize)) - 1;
        mp->mnt_dev_bshift = DEV_BSHIFT;

        /*
         * For Connection based sockets (TCP,...) defer the connect until
         * the first request, in case the server is not responding.
         */
        if (nmp->nm_sotype == SOCK_DGRAM &&
                (error = nfs_connect(nmp, (struct nfsreq *)0, l)))
                goto bad;

        /*
         * This is silly, but it has to be set so that vinifod() works.
         * We do not want to do an nfs_statvfs() here since we can get
         * stuck on a dead server and we are holding a lock on the mount
         * point.
         */
        mp->mnt_stat.f_iosize = NFS_MAXDGRAMDATA;
        error = nfs_nget(mp, (nfsfh_t *)argp->fh, argp->fhsize, &np);
        if (error)
                goto bad;
        vp = NFSTOV(np);
        attrs = malloc(sizeof(struct vattr), M_TEMP, M_WAITOK);
        VOP_GETATTR(vp, attrs, l->l_cred);
        if ((nmp->nm_flag & NFSMNT_NFSV3) && (vp->v_type == VDIR)) {
                cr = kauth_cred_alloc();
                kauth_cred_setuid(cr, attrs->va_uid);
                kauth_cred_seteuid(cr, attrs->va_uid);
                kauth_cred_setsvuid(cr, attrs->va_uid);
                kauth_cred_setgid(cr, attrs->va_gid);
                kauth_cred_setegid(cr, attrs->va_gid);
                kauth_cred_setsvgid(cr, attrs->va_gid);
                nfs_cookieheuristic(vp, &nmp->nm_iflag, l, cr);
                kauth_cred_free(cr);
        }
        free(attrs, M_TEMP);

        /*
         * A reference count is needed on the nfsnode representing the
         * remote root.  If this object is not persistent, then backward
         * traversals of the mount point (i.e. "..") will not work if
         * the nfsnode gets flushed out of the cache. Ufs does not have
         * this problem, because one can identify root inodes by their
         * number == UFS_ROOTINO (2). So, just unlock, but no rele.
         */

        nmp->nm_vnode = vp;
        if (vp->v_type == VNON)
                vp->v_type = VDIR;
        vp->v_vflag |= VV_ROOT;
        VOP_UNLOCK(vp);
        *vpp = vp;

        snprintf(iosname, sizeof(iosname), "nfs%u", nfs_mount_count++);
        nmp->nm_stats = iostat_alloc(IOSTAT_NFS, nmp, iosname);

        return (0);
bad:
        nfs_disconnect(nmp);
        rw_destroy(&nmp->nm_writeverflock);
        mutex_destroy(&nmp->nm_lock);
        cv_destroy(&nmp->nm_rcvcv);
        cv_destroy(&nmp->nm_sndcv);
        cv_destroy(&nmp->nm_aiocv);
        cv_destroy(&nmp->nm_disconcv);
        kmem_free(nmp, sizeof(*nmp));
        m_freem(nam);
        return (error);
}

/*
 * unmount system call
 */
int
nfs_unmount(struct mount *mp, int mntflags)
{
        struct nfsmount *nmp = VFSTONFS(mp);
        struct vnode *vp;
        int error, flags = 0;

        if (mntflags & MNT_FORCE) {
                mutex_enter(&nmp->nm_lock);
                flags |= FORCECLOSE;
                nmp->nm_iflag |= NFSMNT_DISMNTFORCE;
                mutex_exit(&nmp->nm_lock);

        }

        /*
         * Goes something like this..
         * - Check for activity on the root vnode (other than ourselves).
         * - Call vflush() to clear out vnodes for this file system,
         *   except for the root vnode.
         * - Decrement reference on the vnode representing remote root.
         * - Close the socket
         * - Free up the data structures
         */
        /*
         * We need to decrement the ref. count on the nfsnode representing
         * the remote root.  See comment in mountnfs().
         */
        vp = nmp->nm_vnode;
        error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        if (error != 0)
                goto err;

        if ((mntflags & MNT_FORCE) == 0 && vrefcnt(vp) > 1) {
                VOP_UNLOCK(vp);
                error = EBUSY;
                goto err;
        }

        error = vflush(mp, vp, flags);
        if (error) {
                VOP_UNLOCK(vp);
                goto err;
        }

        /*
         * We are now committed to the unmount; mark the mount structure
         * as doomed so that any sleepers kicked awake by nfs_disconnect
         * will go away cleanly.
         */
        nmp->nm_iflag |= NFSMNT_DISMNT;

        /*
         * No new async I/O will be added, but await for pending
         * ones to drain.
         */
        while (nfs_iodbusy(nmp))
                kpause("nfsumnt", false, hz, NULL);

        /*
         * Clean up the stats... note that we carefully avoid decrementing
         * nfs_mount_count here for good reason - we may not be unmounting
         * the last thing mounted.
         */
        iostat_free(nmp->nm_stats);

        /*
         * There is one reference count to get rid of here
         * (see comment in mountnfs()).
         */
        VOP_UNLOCK(vp);
        vgone(vp);
        nfs_disconnect(nmp);
        m_freem(nmp->nm_nam);

        rw_destroy(&nmp->nm_writeverflock);
        mutex_destroy(&nmp->nm_lock);
        cv_destroy(&nmp->nm_rcvcv);
        cv_destroy(&nmp->nm_sndcv);
        cv_destroy(&nmp->nm_aiocv);
        cv_destroy(&nmp->nm_disconcv);
        kmem_free(nmp, sizeof(*nmp));
        return (0);

err:
        if (mntflags & MNT_FORCE) {
                mutex_enter(&nmp->nm_lock);
                nmp->nm_iflag &= ~NFSMNT_DISMNTFORCE;        
                mutex_exit(&nmp->nm_lock);
        }

        return error;
}

/*
 * Return root of a filesystem
 */
int
nfs_root(struct mount *mp, int lktype, struct vnode **vpp)
{
        struct vnode *vp;
        struct nfsmount *nmp;
        int error;

        nmp = VFSTONFS(mp);
        vp = nmp->nm_vnode;
        vref(vp);
        error = vn_lock(vp, lktype | LK_RETRY);
        if (error != 0) {
                vrele(vp);
                return error;
        }
        *vpp = vp;
        return (0);
}

extern int syncprt;

static bool
nfs_sync_selector(void *cl, struct vnode *vp)
{

        KASSERT(mutex_owned(vp->v_interlock));

        return !LIST_EMPTY(&vp->v_dirtyblkhd) ||
            (vp->v_iflag & VI_ONWORKLST) != 0;
}

/*
 * Flush out the buffer cache
 */
/* ARGSUSED */
int
nfs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
{
        struct vnode *vp;
        struct vnode_iterator *marker;
        int error, allerror = 0;

        /*
         * Force stale buffer cache information to be flushed.
         */
        vfs_vnode_iterator_init(mp, &marker);
        while ((vp = vfs_vnode_iterator_next(marker, nfs_sync_selector,
            NULL)))
        {
                error = vn_lock(vp, LK_EXCLUSIVE);
                if (error) {
                        vrele(vp);
                        continue;
                }
                error = VOP_FSYNC(vp, cred,
                    waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0);
                if (error)
                        allerror = error;
                vput(vp);
        }
        vfs_vnode_iterator_destroy(marker);
        return allerror;
}

/*
 * NFS flat namespace lookup.
 * Currently unsupported.
 */
/* ARGSUSED */
int
nfs_vget(struct mount *mp, ino_t ino, int lktype, struct vnode **vpp)
{

        return (EOPNOTSUPP);
}

/*
 * Do that sysctl thang...
 */
static int
sysctl_vfs_nfs_iothreads(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int val;
        int error;

        val = nfs_niothreads;
        node = *rnode;
        node.sysctl_data = &val;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        return nfs_set_niothreads(val);
}

SYSCTL_SETUP(nfs_sysctl_init, "nfs sysctl")
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "nfs",
                       SYSCTL_DESCR("NFS vfs options"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, 2, CTL_EOL);
        /*
         * XXX the "2" above could be dynamic, thereby eliminating one
         * more instance of the "number to vfs" mapping problem, but
         * "2" is the order as taken from sys/mount.h
         */

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_STRUCT, "nfsstats",
                       SYSCTL_DESCR("NFS operation statistics"),
                       NULL, 0, &nfsstats, sizeof(nfsstats),
                       CTL_VFS, 2, NFS_NFSSTATS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "iothreads",
                       SYSCTL_DESCR("Number of NFS client processes desired"),
                       sysctl_vfs_nfs_iothreads, 0, NULL, 0,
                       CTL_VFS, 2, NFS_IOTHREADS, CTL_EOL);
}

/* ARGSUSED */
int
nfs_fhtovp(struct mount *mp, struct fid *fid, int lktype, struct vnode **vpp)
{
        size_t fidsize;
        size_t fhsize;
        struct nfsnode *np;
        int error;
        struct vattr va;

        fidsize = fid->fid_len;
        if (fidsize < sizeof(*fid)) {
                return EINVAL;
        }
        fhsize = fidsize - sizeof(*fid);
        if ((fhsize % NFSX_UNSIGNED) != 0) {
                return EINVAL;
        }
        if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV3) != 0) {
                if (fhsize > NFSX_V3FHMAX || fhsize == 0) {
                        return EINVAL;
                }
        } else {
                if (fhsize != NFSX_V2FH) {
                        return EINVAL;
                }
        }
        /* XXX lktype ignored */
        error = nfs_nget(mp, (void *)fid->fid_data, fhsize, &np);
        if (error) {
                return error;
        }
        *vpp = NFSTOV(np);
        error = VOP_GETATTR(*vpp, &va, kauth_cred_get());
        if (error != 0) {
                vput(*vpp);
                *vpp = NULLVP;
        }
        return error;
}

/* ARGSUSED */
int
nfs_vptofh(struct vnode *vp, struct fid *buf, size_t *bufsize)
{
        struct nfsnode *np;
        struct fid *fid;
        size_t fidsize;
        int error = 0;

        np = VTONFS(vp);
        fidsize = sizeof(*fid) + np->n_fhsize;
        if (*bufsize < fidsize) {
                error = E2BIG;
        }
        *bufsize = fidsize;
        if (error == 0) {
                struct fid fid_store;

                fid = &fid_store;
                memset(fid, 0, sizeof(*fid));
                fid->fid_len = fidsize;
                memcpy(buf, fid, sizeof(*fid));
                memcpy(buf->fid_data, np->n_fhp, np->n_fhsize);
        }
        return error;
}

/*
 * Vfs start routine, a no-op.
 */
/* ARGSUSED */
int
nfs_start(struct mount *mp, int flags)
{

        return (0);
}

/*
 * Called once at VFS init to initialize client-specific data structures.
 */
void
nfs_vfs_init(void)
{

        /* Initialize NFS server / client shared data. */
        nfs_init();
        nfs_node_init();

        /* Initialize the kqueue structures */
        nfs_kqinit();
        /* Initialize the iod structures */
        nfs_iodinit();

        nfs_commitsize = uvmexp.npages << (PAGE_SHIFT - 4);
}

void
nfs_vfs_done(void)
{

        nfs_node_done();
        nfs_kqfini();
        nfs_iodfini();
        nfs_fini();
}

























































    1 





    1 












    3 

    3 


    3 





    3 








    3 











   39 


   39 
































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
/*        $NetBSD: tty_60.c,v 1.11 2021/07/21 06:35:44 skrll Exp $        */

/*-
 * Copyright (c) 2012 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Alan Barrett
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tty_60.c,v 1.11 2021/07/21 06:35:44 skrll Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/types.h>

#include <sys/conf.h>
#include <sys/errno.h>
#include <sys/systm.h>
#include <sys/compat_stub.h>
#include <sys/kmem.h>

#include <sys/tty.h>

#include <compat/common/compat_mod.h>
#include <compat/sys/ttycom.h>

/* convert struct ptmget to struct compat_60_ptmget */
static int
ptmget_to_ptmget60(struct ptmget *pg, struct compat_60_ptmget *pg60)
{
        memset(pg60, 0, sizeof(*pg60));
        pg60->cfd = pg->cfd;
        pg60->sfd = pg->sfd;
        strlcpy(pg60->cn, pg->cn, sizeof(pg60->cn));
        strlcpy(pg60->sn, pg->sn, sizeof(pg60->sn));
        if (strlen(pg->cn) >= sizeof(pg60->cn)
            || strlen(pg->sn) >= sizeof(pg60->sn))
                return E2BIG;
        return 0;
}

/* Helper for compat ioctls that use struct compat_60_ptmget. */
static int
compat_60_ptmget_ioctl(dev_t dev, u_long cmd, void *data, int flag,
        struct lwp *l)
{
        int ret;
        u_long newcmd;
        struct ptmget *pg;
        const struct cdevsw *cd = cdevsw_lookup(dev);

        if (cd == NULL || cd->d_ioctl == NULL)
                return ENXIO;

        switch (cmd) {
        case COMPAT_60_TIOCPTMGET:  newcmd = TIOCPTMGET; break;
        case COMPAT_60_TIOCPTSNAME: newcmd = TIOCPTSNAME; break;
        default: return ENOTTY;
        }

        pg = kmem_alloc(sizeof(*pg), KM_SLEEP);

        ret = (cd->d_ioctl)(dev, newcmd, pg, flag, l);
        if (ret != 0)
                goto out;

        ret = ptmget_to_ptmget60(pg, data);

out:
        kmem_free(pg, sizeof(*pg));
        return ret;
}

/*
 * COMPAT_60 versions of ttioctl and ptmioctl.
 */
int
compat_60_ttioctl(struct tty *tp, u_long cmd, void *data, int flag,
        struct lwp *l)
{

        switch (cmd) {
        case COMPAT_60_TIOCPTMGET:
        case COMPAT_60_TIOCPTSNAME:
                return compat_60_ptmget_ioctl(tp->t_dev, cmd, data, flag, l);
        default:
                return EPASSTHROUGH;
        }
}

int
compat_60_ptmioctl(dev_t dev, u_long cmd, void *data, int flag,
    struct lwp *l)
{

        switch (cmd) {
        case COMPAT_60_TIOCPTMGET:
                return compat_60_ptmget_ioctl(dev, cmd, data, flag, l);
        default:
                return EPASSTHROUGH;
        }
}

void
kern_tty_60_init(void)
{

        MODULE_HOOK_SET(tty_ttioctl_60_hook, compat_60_ttioctl);
        MODULE_HOOK_SET(tty_ptmioctl_60_hook, compat_60_ptmioctl);
}

void
kern_tty_60_fini(void)
{
        MODULE_HOOK_UNSET(tty_ttioctl_60_hook);
        MODULE_HOOK_UNSET(tty_ptmioctl_60_hook);
}























































































  797 











  796 


  795 















   16 


   16 
   16 


   16 
   16 





  792 








  208 








  795 

   16 








  796 








   16 


   16 


   16 


   16 



   16 





   16 














   62 





















































   61 















   58 

   59 
   59 
   48 

   59 

   59 



   58 


   58 








   37 













   37 


   31 

   30 


   31 






   17 



   17 

   17 
   16 
    7 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
/*        $NetBSD: ufs_inode.c,v 1.112 2020/09/05 16:30:13 riastradh Exp $        */

/*
 * Copyright (c) 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ufs_inode.c        8.9 (Berkeley) 5/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.112 2020/09/05 16:30:13 riastradh Exp $");

#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
#include "opt_quota.h"
#include "opt_wapbl.h"
#include "opt_uvmhist.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/kernel.h>
#include <sys/namei.h>
#include <sys/kauth.h>
#include <sys/wapbl.h>
#include <sys/kmem.h>

#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_wapbl.h>
#ifdef UFS_DIRHASH
#include <ufs/ufs/dirhash.h>
#endif
#ifdef UFS_EXTATTR
#include <ufs/ufs/extattr.h>
#endif

#ifdef UVMHIST
#include <uvm/uvm.h>
#endif
#include <uvm/uvm_page.h>
#include <uvm/uvm_stat.h>

/*
 * Last reference to an inode.  If necessary, write or delete it.
 */
int
ufs_inactive(void *v)
{
        struct vop_inactive_v2_args /* {
                struct vnode *a_vp;
                struct bool *a_recycle;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct inode *ip = VTOI(vp);
        struct mount *mp = vp->v_mount;
        mode_t mode;
        int allerror = 0, error;
        bool wapbl_locked = false;

        UFS_WAPBL_JUNLOCK_ASSERT(mp);

        /*
         * Ignore inodes related to stale file handles.
         */
        if (ip->i_mode == 0)
                goto out;

        if (ip->i_nlink <= 0 && (mp->mnt_flag & MNT_RDONLY) == 0) {
#ifdef UFS_EXTATTR
                ufs_extattr_vnode_inactive(vp, curlwp);
#endif
                /*
                 * All file blocks must be freed before we can let the vnode
                 * be reclaimed, so can't postpone full truncating any further.
                 */
                ufs_truncate_all(vp);

#if defined(QUOTA) || defined(QUOTA2)
                error = UFS_WAPBL_BEGIN(mp);
                if (error) {
                        allerror = error;
                } else {
                        wapbl_locked = true;
                        (void)chkiq(ip, -1, NOCRED, 0);
                }
#endif
                DIP_ASSIGN(ip, rdev, 0);
                mode = ip->i_mode;
                ip->i_mode = 0;
                ip->i_omode = mode;
                DIP_ASSIGN(ip, mode, 0);
                ip->i_flag |= IN_CHANGE | IN_UPDATE;
                /*
                 * Defer final inode free and update to ufs_reclaim().
                 */
        }

        if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) {
                if (! wapbl_locked) {
                        error = UFS_WAPBL_BEGIN(mp);
                        if (error) {
                                allerror = error;
                                goto out;
                        }
                        wapbl_locked = true;
                }
                UFS_UPDATE(vp, NULL, NULL, 0);
        }
out:
        if (wapbl_locked)
                UFS_WAPBL_END(mp);
        /*
         * If we are done with the inode, reclaim it
         * so that it can be reused immediately.
         */
        *ap->a_recycle = (ip->i_mode == 0);

        if (ip->i_mode == 0 && (DIP(ip, size) != 0 || DIP(ip, blocks) != 0)) {
                printf("%s: unlinked ino %" PRId64 " on \"%s\" has"
                    " non zero size %" PRIx64 " or blocks %" PRIx64
                    " with allerror %d\n",
                    __func__, ip->i_number, mp->mnt_stat.f_mntonname,
                    DIP(ip, size), DIP(ip, blocks), allerror);
                panic("%s: dirty filesystem?", __func__);
        }

        return (allerror);
}

/*
 * Reclaim an inode so that it can be used for other purposes.
 */
int
ufs_reclaim(struct vnode *vp)
{
        struct inode *ip = VTOI(vp);

        if (!UFS_WAPBL_BEGIN(vp->v_mount)) {
                UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE);
                UFS_WAPBL_END(vp->v_mount);
        }
        UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE);

        if (ip->i_devvp) {
                vrele(ip->i_devvp);
                ip->i_devvp = 0;
        }
#if defined(QUOTA) || defined(QUOTA2)
        ufsquota_free(ip);
#endif
#ifdef UFS_DIRHASH
        if (ip->i_dirhash != NULL)
                ufsdirhash_free(ip);
#endif
        return (0);
}

/*
 * allocate a range of blocks in a file.
 * after this function returns, any page entirely contained within the range
 * will map to invalid data and thus must be overwritten before it is made
 * accessible to others.
 */

int
ufs_balloc_range(struct vnode *vp, off_t off, off_t len, kauth_cred_t cred,
    int flags)
{
        off_t neweof;        /* file size after the operation */
        off_t neweob;        /* offset next to the last block after the operation */
        off_t pagestart; /* starting offset of range covered by pgs */
        off_t eob;        /* offset next to allocated blocks */
        struct uvm_object *uobj;
        int i, delta, error, npages;
        int bshift = vp->v_mount->mnt_fs_bshift;
        int bsize = 1 << bshift;
        int ppb = MAX(bsize >> PAGE_SHIFT, 1);
        struct vm_page **pgs;
        size_t pgssize;
        UVMHIST_FUNC("ufs_balloc_range"); UVMHIST_CALLED(ubchist);
        UVMHIST_LOG(ubchist, "vp %#jx off 0x%jx len 0x%jx u_size 0x%jx",
                    (uintptr_t)vp, off, len, vp->v_size);

        neweof = MAX(vp->v_size, off + len);
        GOP_SIZE(vp, neweof, &neweob, 0);

        error = 0;
        uobj = &vp->v_uobj;

        /*
         * read or create pages covering the range of the allocation and
         * keep them locked until the new block is allocated, so there
         * will be no window where the old contents of the new block are
         * visible to racing threads.
         */

        pagestart = trunc_page(off) & ~(bsize - 1);
        npages = MIN(ppb, (round_page(neweob) - pagestart) >> PAGE_SHIFT);
        pgssize = npages * sizeof(struct vm_page *);
        pgs = kmem_zalloc(pgssize, KM_SLEEP);

        /*
         * adjust off to be block-aligned.
         */

        delta = off & (bsize - 1);
        off -= delta;
        len += delta;

        genfs_node_wrlock(vp);
        rw_enter(uobj->vmobjlock, RW_WRITER);
        error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0,
            VM_PROT_WRITE, 0, PGO_SYNCIO | PGO_PASTEOF | PGO_NOBLOCKALLOC |
            PGO_NOTIMESTAMP | PGO_GLOCKHELD);
        if (error) {
                genfs_node_unlock(vp);
                goto out;
        }

        /*
         * now allocate the range.
         */

        error = GOP_ALLOC(vp, off, len, flags, cred);
        genfs_node_unlock(vp);

        /*
         * if the allocation succeeded, mark all the pages dirty
         * and clear PG_RDONLY on any pages that are now fully backed
         * by disk blocks.  if the allocation failed, we do not invalidate
         * the pages since they might have already existed and been dirty,
         * in which case we need to keep them around.  if we created the pages,
         * they will be clean and read-only, and leaving such pages
         * in the cache won't cause any problems.
         */

        GOP_SIZE(vp, off + len, &eob, 0);
        rw_enter(uobj->vmobjlock, RW_WRITER);
        for (i = 0; i < npages; i++) {
                KASSERT((pgs[i]->flags & PG_RELEASED) == 0);
                if (!error) {
                        if (off <= pagestart + (i << PAGE_SHIFT) &&
                            pagestart + ((i + 1) << PAGE_SHIFT) <= eob) {
                                pgs[i]->flags &= ~PG_RDONLY;
                        }
                        uvm_pagemarkdirty(pgs[i], UVM_PAGE_STATUS_DIRTY);
                }
                uvm_pagelock(pgs[i]);
                uvm_pageactivate(pgs[i]);
                uvm_pageunlock(pgs[i]);
        }
        uvm_page_unbusy(pgs, npages);
        rw_exit(uobj->vmobjlock);

 out:
         kmem_free(pgs, pgssize);
        return error;
}

int
ufs_truncate_retry(struct vnode *vp, int ioflag, uint64_t newsize,
    kauth_cred_t cred)
{
        struct inode *ip = VTOI(vp);
        struct mount *mp = vp->v_mount;
        int error = 0;

        UFS_WAPBL_JUNLOCK_ASSERT(mp);

        /*
         * Truncate might temporarily fail, loop until done.
         */
        do {
                error = UFS_WAPBL_BEGIN(mp);
                if (error)
                        goto out;

                error = UFS_TRUNCATE(vp, newsize, ioflag, cred);
                UFS_WAPBL_END(mp);

                if (error != 0 && error != EAGAIN)
                        goto out;
        } while (ip->i_size != newsize);

  out:
        return error;
}

/* truncate all the data of the inode including extended attributes */
int
ufs_truncate_all(struct vnode *vp)
{
        struct inode *ip = VTOI(vp);
        off_t isize = ip->i_size;

        if (ip->i_ump->um_fstype == UFS2)
                isize += ip->i_ffs2_extsize;

        if (isize == 0)
                return 0;
        return ufs_truncate_retry(vp, IO_NORMAL | IO_EXT, 0, NOCRED);
}


































































































    3 




    3 









    3 
    3 









































    1 

    1 


    1 









    1 

    1 












    1 











    2 










    2 
    2 

    2 





    2 








    2 














































   10 
   10 
    1 








    9 




















    9 


    9 


    8 






    8 













    7 
    7 











    7 










    2 


    2 


    1 





    1 
    1 
    1 







    1 

    1 



    2 








   21 
   21 

   20 













    2 











   20 



   20 




   20 

   18 





    2 
    2 





   20 



























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
/*        $NetBSD: ptyfs_vfsops.c,v 1.58 2020/03/16 21:20:10 pgoyette Exp $        */

/*
 * Copyright (c) 1992, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software donated to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

/*
 * Pseudo-tty Filesystem
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ptyfs_vfsops.c,v 1.58 2020/03/16 21:20:10 pgoyette Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/conf.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/stat.h>
#include <sys/dirent.h>
#include <sys/malloc.h>
#include <sys/syslog.h>
#include <sys/select.h>
#include <sys/filedesc.h>
#include <sys/tty.h>
#include <sys/pty.h>
#include <sys/kauth.h>
#include <sys/module.h>

#include <fs/ptyfs/ptyfs.h>
#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>

MODULE(MODULE_CLASS_VFS, ptyfs, NULL);

MALLOC_JUSTDEFINE(M_PTYFSMNT, "ptyfs mount", "ptyfs mount structures");
MALLOC_JUSTDEFINE(M_PTYFSTMP, "ptyfs temp", "ptyfs temporary structures");

VFS_PROTOS(ptyfs);

static int ptyfs__allocvp(struct mount *, struct lwp *, struct vnode **,
    dev_t, char);
static int ptyfs__makename(struct mount *, struct lwp *, char *, size_t,
    dev_t, char);
static void ptyfs__getvattr(struct mount *, struct lwp *, struct vattr *);
static int ptyfs__getmp(struct lwp *, struct mount **);

/*
 * ptm glue: When we mount, we make ptm point to us.
 */
struct ptm_pty *ptyfs_save_ptm;
static int ptyfs_count;

static TAILQ_HEAD(, ptyfsmount) ptyfs_head;

struct ptm_pty ptm_ptyfspty = {
        ptyfs__allocvp,
        ptyfs__makename,
        ptyfs__getvattr,
        ptyfs__getmp,
};

static int
ptyfs__getmp(struct lwp *l, struct mount **mpp)
{
         struct cwdinfo *cwdi = l->l_proc->p_cwdi;
         struct mount *mp;
        struct ptyfsmount *pmnt;
 
        TAILQ_FOREACH(pmnt, &ptyfs_head, pmnt_le) {
                mp = pmnt->pmnt_mp;
                if (cwdi->cwdi_rdir == NULL)
                        goto ok;

                if (vn_isunder(mp->mnt_vnodecovered, cwdi->cwdi_rdir, l))
                        goto ok;
        }
         *mpp = NULL;
         return EOPNOTSUPP;
ok:
        *mpp = mp;
        return 0;
}

static const char *
ptyfs__getpath(struct lwp *l, const struct mount *mp)
{
#define MAXBUF (sizeof(mp->mnt_stat.f_mntonname) + 32)
        struct cwdinfo *cwdi = l->l_proc->p_cwdi;
        char *buf;
        const char *rv;
        size_t len;
        char *bp;
        int error;

        rv = mp->mnt_stat.f_mntonname;
        if (cwdi->cwdi_rdir == NULL)
                return rv;

        buf = malloc(MAXBUF, M_TEMP, M_WAITOK);
        bp = buf + MAXBUF;
        *--bp = '\0';
        error = getcwd_common(mp->mnt_vnodecovered, cwdi->cwdi_rdir, &bp,
            buf, MAXBUF / 2, 0, l);
        if (error) {        /* Mount point is out of rdir */
                rv = NULL;
                goto out;
        }

        len = strlen(bp);
        if (len < sizeof(mp->mnt_stat.f_mntonname))        /* XXX */
                rv += strlen(rv) - len;
out:
        free(buf, M_TEMP);
        return rv;
}

static int
ptyfs__makename(struct mount *mp, struct lwp *l, char *tbuf, size_t bufsiz,
    dev_t dev, char ms)
{
        size_t len;
        const char *np;
        int pty = minor(dev);

        switch (ms) {
        case 'p':
                /* We don't provide access to the master, should we? */
                len = snprintf(tbuf, bufsiz, "/dev/null");
                break;
        case 't':
                /*
                 * We support traditional ptys, so we can get here,
                 * if pty had been opened before PTYFS was mounted,
                 * or was opened through /dev/ptyXX devices.
                 * Return it only outside chroot for more security .
                 */
                if (l->l_proc->p_cwdi->cwdi_rdir == NULL
                    && ptyfs_save_ptm != NULL 
                    && ptyfs_next_active(mp, pty) != pty)
                        return (*ptyfs_save_ptm->makename)(mp, l,
                            tbuf, bufsiz, dev, ms);

                np = ptyfs__getpath(l, mp);
                if (np == NULL)
                        return EOPNOTSUPP;
                len = snprintf(tbuf, bufsiz, "%s/%llu", np,
                        (unsigned long long)minor(dev));
                break;
        default:
                return EINVAL;
        }

        return len >= bufsiz ? ENOSPC : 0;
}


static int
/*ARGSUSED*/
ptyfs__allocvp(struct mount *mp, struct lwp *l, struct vnode **vpp,
    dev_t dev, char ms)
{
        int error;
        ptyfstype type;

        switch (ms) {
        case 'p':
                type = PTYFSptc;
                break;
        case 't':
                type = PTYFSpts;
                break;
        default:
                return EINVAL;
        }

        error = ptyfs_allocvp(mp, vpp, type, minor(dev));
        if (error)
                return error;
        error = vn_lock(*vpp, LK_EXCLUSIVE);
        if (error) {
                vrele(*vpp);
                *vpp = NULL;
                return error;
        }
        if (type == PTYFSptc)
                ptyfs_set_active(mp, minor(dev));
        return 0;
}


static void
ptyfs__getvattr(struct mount *mp, struct lwp *l, struct vattr *vattr)
{
        struct ptyfsmount *pmnt = VFSTOPTY(mp);
        vattr_null(vattr);
        /* get real uid */
        vattr->va_uid = kauth_cred_getuid(l->l_cred);
        vattr->va_gid = pmnt->pmnt_gid;
        vattr->va_mode = pmnt->pmnt_mode;
}


void
ptyfs_init(void)
{

        TAILQ_INIT(&ptyfs_head);
        malloc_type_attach(M_PTYFSMNT);
        malloc_type_attach(M_PTYFSTMP);
        ptyfs_hashinit();
}

void
ptyfs_reinit(void)
{

}

void
ptyfs_done(void)
{

        ptyfs_hashdone();
        malloc_type_detach(M_PTYFSTMP);
        malloc_type_detach(M_PTYFSMNT);
}

#define OSIZE sizeof(struct { int f; gid_t g; mode_t m; })
/*
 * Mount the Pseudo tty params filesystem
 */
int
ptyfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct lwp *l = curlwp;
        int error = 0;
        struct ptyfsmount *pmnt;
        struct ptyfs_args *args = data;

        if (args == NULL)
                return EINVAL;
        if (*data_len != sizeof *args) {
                if (*data_len != OSIZE || args->version >= PTYFS_ARGSVERSION)
                        return EINVAL;
        }

        if (UIO_MX & (UIO_MX - 1)) {
                log(LOG_ERR, "ptyfs: invalid directory entry size");
                return EINVAL;
        }

        if (mp->mnt_flag & MNT_GETARGS) {
                pmnt = VFSTOPTY(mp);
                if (pmnt == NULL)
                        return EIO;
                args->mode = pmnt->pmnt_mode;
                args->gid = pmnt->pmnt_gid;
                if (args->version >= PTYFS_ARGSVERSION) {
                        args->flags = pmnt->pmnt_flags;
                        *data_len = sizeof *args;
                } else {
                        *data_len = OSIZE;
                }
                return 0;
        }

#if 0
        /* Don't allow more than one mount */
        if (ptyfs_count)
                return EBUSY;
#endif

        if (mp->mnt_flag & MNT_UPDATE)
                return EOPNOTSUPP;

        if (args->version > PTYFS_ARGSVERSION)
                return EINVAL;

        pmnt = malloc(sizeof(struct ptyfsmount), M_PTYFSMNT, M_WAITOK);

        mp->mnt_data = pmnt;
        mutex_init(&pmnt->pmnt_lock, MUTEX_DEFAULT, IPL_NONE);
        pmnt->pmnt_gid = args->gid;
        pmnt->pmnt_mode = args->mode;
        if (args->version >= PTYFS_ARGSVERSION)
                pmnt->pmnt_flags = args->flags;
        else
                pmnt->pmnt_flags = 0;
        pmnt->pmnt_bitmap_size = 0;
        pmnt->pmnt_bitmap = NULL;
        mp->mnt_flag |= MNT_LOCAL;
        vfs_getnewfsid(mp);

        if ((error = set_statvfs_info(path, UIO_USERSPACE, "ptyfs",
            UIO_SYSSPACE, mp->mnt_op->vfs_name, mp, l)) != 0) {
                free(pmnt, M_PTYFSMNT);
                return error;
        }

        pmnt->pmnt_mp = mp;
        TAILQ_INSERT_TAIL(&ptyfs_head, pmnt, pmnt_le);
        if (ptyfs_count++ == 0) {
                /* Point pty access to us */
                ptyfs_save_ptm = pty_sethandler(&ptm_ptyfspty);
        }
        return 0;
}

/*ARGSUSED*/
int
ptyfs_start(struct mount *mp, int flags)
{
        return 0;
}

/*ARGSUSED*/
int
ptyfs_unmount(struct mount *mp, int mntflags)
{
        int error;
        int flags = 0;
        struct ptyfsmount *pmnt;

        if (mntflags & MNT_FORCE)
                flags |= FORCECLOSE;

        if ((error = vflush(mp, 0, flags)) != 0)
                return error;

        ptyfs_count--;
        if (ptyfs_count == 0) {
                /* Restore where pty access was pointing */
                (void)pty_sethandler(ptyfs_save_ptm);
                ptyfs_save_ptm = NULL;
        }
        TAILQ_FOREACH(pmnt, &ptyfs_head, pmnt_le) {
                if (pmnt->pmnt_mp == mp) {
                        TAILQ_REMOVE(&ptyfs_head, pmnt, pmnt_le);
                        break;
                }
         }

        /*
         * Finally, throw away the ptyfsmount structure
         */
        if (pmnt->pmnt_bitmap_size > 0)
                kmem_free(pmnt->pmnt_bitmap, pmnt->pmnt_bitmap_size);
        mutex_destroy(&pmnt->pmnt_lock);
        free(mp->mnt_data, M_PTYFSMNT);
        mp->mnt_data = NULL;

        return 0;
}

int
ptyfs_root(struct mount *mp, int lktype, struct vnode **vpp)
{
        int error;

        /* setup "." */
        error = ptyfs_allocvp(mp, vpp, PTYFSroot, 0);
        if (error)
                return error;
        error = vn_lock(*vpp, lktype);
        if (error) {
                vrele(*vpp);
                *vpp = NULL;
                return error;
        }
        return 0;
}

/*ARGSUSED*/
int
ptyfs_sync(struct mount *mp, int waitfor,
    kauth_cred_t uc)
{
        return 0;
}

/*
 * Initialize this vnode / ptynode pair.
 * Only for the slave side of a pty, caller assures
 * no other thread will try to load this node.
 */
int
ptyfs_loadvnode(struct mount *mp, struct vnode *vp,
    const void *key, size_t key_len, const void **new_key)
{
        struct ptyfskey pkey;
        struct ptyfsnode *ptyfs;

        KASSERT(key_len == sizeof(pkey));
        memcpy(&pkey, key, key_len);

        ptyfs = ptyfs_get_node(pkey.ptk_type, pkey.ptk_pty);
        KASSERT(memcmp(&ptyfs->ptyfs_key, &pkey, sizeof(pkey)) == 0);

        switch (pkey.ptk_type) {
        case PTYFSroot:        /* /pts = dr-xr-xr-x */
                vp->v_type = VDIR;
                vp->v_vflag = VV_ROOT;
                break;

        case PTYFSpts:        /* /pts/N = cxxxxxxxxx */
        case PTYFSptc:        /* controlling side = cxxxxxxxxx */
                vp->v_type = VCHR;
                spec_node_init(vp, PTYFS_MAKEDEV(ptyfs));
                break;
        default:
                panic("ptyfs_loadvnode");
        }

        vp->v_tag = VT_PTYFS;
        vp->v_op = ptyfs_vnodeop_p;
        vp->v_data = ptyfs;
        uvm_vnp_setsize(vp, 0);
        *new_key = &ptyfs->ptyfs_key;
        return 0;
}

/*
 * Kernfs flat namespace lookup.
 * Currently unsupported.
 */
/*ARGSUSED*/
int
ptyfs_vget(struct mount *mp, ino_t ino, int lktype,
    struct vnode **vpp)
{
        return EOPNOTSUPP;
}

extern const struct vnodeopv_desc ptyfs_vnodeop_opv_desc;

const struct vnodeopv_desc * const ptyfs_vnodeopv_descs[] = {
        &ptyfs_vnodeop_opv_desc,
        NULL,
};

struct vfsops ptyfs_vfsops = {
        .vfs_name = MOUNT_PTYFS,
        .vfs_min_mount_data = sizeof (struct ptyfs_args),
        .vfs_mount = ptyfs_mount,
        .vfs_start = ptyfs_start,
        .vfs_unmount = ptyfs_unmount,
        .vfs_root = ptyfs_root,
        .vfs_quotactl = (void *)eopnotsupp,
        .vfs_statvfs = genfs_statvfs,
        .vfs_sync = ptyfs_sync,
        .vfs_vget = ptyfs_vget,
        .vfs_loadvnode = ptyfs_loadvnode,
        .vfs_fhtovp = (void *)eopnotsupp,
        .vfs_vptofh = (void *)eopnotsupp,
        .vfs_init = ptyfs_init,
        .vfs_reinit = ptyfs_reinit,
        .vfs_done = ptyfs_done,
        .vfs_snapshot = (void *)eopnotsupp,
        .vfs_extattrctl = (void *)eopnotsupp,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_renamelock_enter = genfs_renamelock_enter,
        .vfs_renamelock_exit = genfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = ptyfs_vnodeopv_descs
};

SYSCTL_SETUP(ptyfs_sysctl_setup, "ptyfs sysctl")
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "ptyfs",
                       SYSCTL_DESCR("Pty file system"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, 23, CTL_EOL);
        /*
         * XXX the "23" above could be dynamic, thereby eliminating
         * one more instance of the "number to vfs" mapping problem,
         * but "23" is the order as taken from sys/mount.h
         */
}

static int
ptyfs_modcmd(modcmd_t cmd, void *arg)
{
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = vfs_attach(&ptyfs_vfsops);
                if (error != 0)
                        break;
                break;
        case MODULE_CMD_FINI:
                error = vfs_detach(&ptyfs_vfsops);
                if (error != 0)
                        break;
                break;
        default:
                error = ENOTTY;
                break;
        }

        return (error);
}




















































































































   11 




















   75 


   74 
   74 

   74 
   74 




   74 
   74 

   74 
   74 



   75 

   74 


















































































    6 


    6 







    6 













    6 

    6 





































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
/*        $NetBSD: entpool.c,v 1.1 2020/04/30 03:28:19 riastradh Exp $        */

/*-
 * Copyright (c) 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Entropy pool (`reseedable pseudorandom number generator') based on a
 * sponge duplex, following the design described and analyzed in
 *
 *        Guido Bertoni, Joan Daemen, Michaël Peeters, and Gilles Van
 *        Assche, `Sponge-Based Pseudo-Random Number Generators', in
 *        Stefan Mangard and François-Xavier Standaert, eds.,
 *        Cryptographic Hardware and Embedded Systems—CHES 2010, Springer
 *        LNCS 6225, pp. 33–47.
 *        https://link.springer.com/chapter/10.1007/978-3-642-15031-9_3
 *        https://keccak.team/files/SpongePRNG.pdf
 *
 *        Guido Bertoni, Joan Daemen, Michaël Peeters, and Gilles Van
 *        Assche, `Duplexing the Sponge: Single-Pass Authenticated
 *        Encryption and Other Applications', in Ali Miri and Serge
 *        Vaudenay, eds., Selected Areas in Cryptography—SAC 2011,
 *        Springer LNCS 7118, pp. 320–337.
 *        https://link.springer.com/chapter/10.1007/978-3-642-28496-0_19
 *        https://keccak.team/files/SpongeDuplex.pdf
 *
 * We make the following tweaks that don't affect security:
 *
 *        - Samples are length-delimited 7-bit variable-length encoding.
 *          The encoding is still injective, so the security theorems
 *          continue to apply.
 *
 *        - Output is not buffered -- callers should draw 32 bytes and
 *          expand with a stream cipher.  In effect, every output draws
 *          the full rate, and we just discard whatever the caller didn't
 *          ask for; the impact is only on performance, not security.
 *
 * On top of the underlying sponge state, an entropy pool maintains an
 * integer i in [0, RATE-1] indicating where to write the next byte in
 * the input buffer.  Zeroing an entropy pool initializes it.
 */

#if defined(_KERNEL) || defined(_STANDALONE)
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: entpool.c,v 1.1 2020/04/30 03:28:19 riastradh Exp $");
#endif

#include "entpool.h"
#include ENTPOOL_HEADER

#if defined(_KERNEL) || defined(_STANDALONE)
#include <sys/types.h>
#include <lib/libkern/libkern.h>
#define        ASSERT                KASSERT
#else
#include <sys/cdefs.h>
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#define        ASSERT                assert
#define        CTASSERT        __CTASSERT
#endif

#define        secret        /* must not use in variable-time operations; should zero */
#define        arraycount(A)        (sizeof(A)/sizeof((A)[0]))
#define        MIN(X,Y)        ((X) < (Y) ? (X) : (Y))

#define        RATE                ENTPOOL_RATE

/*
 * stir(P)
 *
 *        Internal subroutine to apply the sponge permutation to the
 *        state in P.  Resets P->i to 0 to indicate that the input buffer
 *        is empty.
 */
static void
stir(struct entpool *P)
{
        size_t i;

        /*
         * Switch to the permutation's byte order, if necessary, apply
         * permutation, and then switch back.  This way we can data in
         * and out byte by byte, but get the same answers out of test
         * vectors.
         */
        for (i = 0; i < arraycount(P->s.w); i++)
                P->s.w[i] = ENTPOOL_WTOH(P->s.w[i]);
        ENTPOOL_PERMUTE(P->s.w);
        for (i = 0; i < arraycount(P->s.w); i++)
                P->s.w[i] = ENTPOOL_HTOW(P->s.w[i]);

        /* Reset the input buffer.  */
        P->i = 0;
}

/*
 * entpool_enter(P, buf, len)
 *
 *        Enter len bytes from buf into the entropy pool P, stirring as
 *        needed.  Corresponds to P.feed in the paper.
 */
void
entpool_enter(struct entpool *P, const void *buf, size_t len)
{
        const uint8_t *p = buf;
        size_t n = len, n1 = n;

        /* Sanity-check P->i.  */
        ASSERT(P->i <= RATE-1);

        /* Encode the length, stirring as needed.  */
        while (n1) {
                if (P->i == RATE-1)
                        stir(P);
                ASSERT(P->i < RATE-1);
                P->s.u8[P->i++] ^= (n1 >= 0x80 ? 0x80 : 0) | (n1 & 0x7f);
                n1 >>= 7;
        }

        /* Enter the sample, stirring as needed.  */
        while (n --> 0) {
                if (P->i == RATE-1)
                        stir(P);
                ASSERT(P->i < RATE-1);
                P->s.u8[P->i++] ^= *p++;
        }

        /* If we filled the input buffer exactly, stir once more.  */
        if (P->i == RATE-1)
                stir(P);
        ASSERT(P->i < RATE-1);
}

/*
 * entpool_enter_nostir(P, buf, len)
 *
 *        Enter as many bytes as possible, up to len, from buf into the
 *        entropy pool P.  Roughly corresponds to P.feed in the paper,
 *        but we stop if we would have run the permutation.
 *
 *        Return true if the sample was consumed in its entirety, or true
 *        if the sample was truncated so the caller should arrange to
 *        call entpool_stir when it is next convenient to do so.
 *
 *        This function is cheap -- it only xors the input into the
 *        state, and never calls the underlying permutation, but it may
 *        truncate samples.
 */
bool
entpool_enter_nostir(struct entpool *P, const void *buf, size_t len)
{
        const uint8_t *p = buf;
        size_t n0, n;

        /* Sanity-check P->i.  */
        ASSERT(P->i <= RATE-1);

        /* If the input buffer is full, fail.  */
        if (P->i == RATE-1)
                return false;
        ASSERT(P->i < RATE-1);

        /*
         * Truncate the sample and enter it with 1-byte length encoding
         * -- don't bother with variable-length encoding, not worth the
         * trouble.
         */
        n = n0 = MIN(127, MIN(len, RATE-1 - P->i - 1));
        P->s.u8[P->i++] ^= n;
        while (n --> 0)
                P->s.u8[P->i++] ^= *p++;

        /* Can't guarantee anything better than 0 <= i <= RATE-1.  */
        ASSERT(P->i <= RATE-1);

        /* Return true if all done, false if truncated and in need of stir.  */
        return (n0 == len);
}

/*
 * entpool_stir(P)
 *
 *        Stir the entropy pool after entpool_enter_nostir fails.  If it
 *        has already been stirred already, this has no effect.
 */
void
entpool_stir(struct entpool *P)
{

        /* Sanity-check P->i.  */
        ASSERT(P->i <= RATE-1);

        /* If the input buffer is full, stir.  */
        if (P->i == RATE-1)
                stir(P);
        ASSERT(P->i < RATE-1);
}

/*
 * entpool_extract(P, buf, len)
 *
 *        Extract len bytes from the entropy pool P into buf.
 *        Corresponds to iterating P.fetch/P.forget in the paper.
 *        (Feeding the output back in -- as P.forget does -- is the same
 *        as zeroing what we just read out.)
 */
void
entpool_extract(struct entpool *P, secret void *buf, size_t len)
{
        uint8_t *p = buf;
        size_t n = len;

        /* Sanity-check P->i.  */
        ASSERT(P->i <= RATE-1);

        /* If input buffer is not empty, stir.  */
        if (P->i != 0)
                stir(P);
        ASSERT(P->i == 0);

        /*
         * Copy out and zero (RATE-1)-sized chunks at a time, stirring
         * with a bit set to distinguish this from inputs.
         */
        while (n >= RATE-1) {
                memcpy(p, P->s.u8, RATE-1);
                memset(P->s.u8, 0, RATE-1);
                P->s.u8[RATE-1] ^= 0x80;
                stir(P);
                p += RATE-1;
                n -= RATE-1;
        }

        /*
         * If there's anything left, copy out a partial rate's worth
         * and zero the entire rate's worth, stirring with a bit set to
         * distinguish this from inputs.
         */
        if (n) {
                ASSERT(n < RATE-1);
                memcpy(p, P->s.u8, n);                /* Copy part of it.  */
                memset(P->s.u8, 0, RATE-1);        /* Zero all of it. */
                P->s.u8[RATE-1] ^= 0x80;
                stir(P);
        }
}

/*
 * Known-answer tests
 */

#if ENTPOOL_SMALL

#define        KATLEN        15

/* Gimli */
static const uint8_t known_answers[][KATLEN] = {
        [0] = {
                0x69,0xb8,0x49,0x0d,0x39,0xfb,0x42,0x61,
                0xf7,0x66,0xdf,0x04,0xb6,0xed,0x11,
        },
        [1] = {
                0x74,0x15,0x16,0x49,0x31,0x07,0x77,0xa1,
                0x3b,0x4d,0x78,0xc6,0x5d,0xef,0x87,
        },
        [2] = {
                0xae,0xfd,0x7d,0xc4,0x3b,0xce,0x09,0x25,
                0xbf,0x60,0x21,0x6e,0x3c,0x3a,0x84,
        },
        [3] = {
                0xae,0xfd,0x7d,0xc4,0x3b,0xce,0x09,0x25,
                0xbf,0x60,0x21,0x6e,0x3c,0x3a,0x84,
        },
        [4] = {
                0x69,0xb8,0x49,0x0d,0x39,0xfb,0x42,0x61,
                0xf7,0x66,0xdf,0x04,0xb6,0xed,0x11,
        },
        [5] = {
                0xa9,0x3c,0x3c,0xac,0x5f,0x6d,0x80,0xdc,
                0x33,0x0c,0xb2,0xe3,0xdd,0x55,0x31,
        },
        [6] = {
                0x2e,0x69,0x1a,0x2a,0x2d,0x09,0xd4,0x5e,
                0x49,0xcc,0x8c,0xb2,0x0b,0xcc,0x42,
        },
        [7] = {
                0xae,0xfd,0x7d,0xc4,0x3b,0xce,0x09,0x25,
                0xbf,0x60,0x21,0x6e,0x3c,0x3a,0x84,
        },
        [8] = {
                0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                0x00,0x00,0x00,0x00,0x00,0x00,0x00,
        },
        [9] = {
                0x69,0xb8,0x49,0x0d,0x39,0xfb,0x42,0x61,
                0xf7,0x66,0xdf,0x04,0xb6,0xed,0x11,
        },
        [10] = {
                0x2e,0x69,0x1a,0x2a,0x2d,0x09,0xd4,0x5e,
                0x49,0xcc,0x8c,0xb2,0x0b,0xcc,0x42,
        },
        [11] = {
                0x6f,0xfd,0xd2,0x29,0x78,0x46,0xc0,0x7d,
                0xc7,0xf2,0x0a,0x2b,0x72,0xd6,0xc6,
        },
        [12] = {
                0x86,0xf0,0xc1,0xf9,0x95,0x0f,0xc9,0x12,
                0xde,0x38,0x39,0x10,0x1f,0x8c,0xc4,
        },
};

#else  /* !ENTPOOL_SMALL */

#define        KATLEN        16

/* Keccak-p[1600, 24] */
static const uint8_t known_answers[][KATLEN] = {
        [0] = {
                0x3b,0x20,0xf0,0xe9,0xce,0x94,0x48,0x07,
                0x97,0xb6,0x16,0xb5,0xb5,0x05,0x1a,0xce,
        },
        [1] = {
                0x57,0x49,0x6e,0x28,0x7f,0xaa,0xee,0x6c,
                0xa8,0xb0,0xf5,0x0b,0x87,0xae,0xd6,0xd6,
        },
        [2] = {
                0x51,0x72,0x0f,0x59,0x54,0xe1,0xaf,0xa8,
                0x16,0x67,0xfa,0x3f,0x8a,0x19,0x52,0x50,
        },
        [3] = {
                0x51,0x72,0x0f,0x59,0x54,0xe1,0xaf,0xa8,
                0x16,0x67,0xfa,0x3f,0x8a,0x19,0x52,0x50,
        },
        [4] = {
                0x3b,0x20,0xf0,0xe9,0xce,0x94,0x48,0x07,
                0x97,0xb6,0x16,0xb5,0xb5,0x05,0x1a,0xce,
        },
        [5] = {
                0x95,0x23,0x77,0xe4,0x84,0xeb,0xaa,0x2e,
                0x6a,0x99,0xc2,0x52,0x06,0x6d,0xdf,0xea,
        },
        [6] = {
                0x8c,0xdd,0x1b,0xaf,0x0e,0xf6,0xe9,0x1d,
                0x51,0x33,0x68,0x38,0x8d,0xad,0x55,0x84,
        },
        [7] = {
                0x51,0x72,0x0f,0x59,0x54,0xe1,0xaf,0xa8,
                0x16,0x67,0xfa,0x3f,0x8a,0x19,0x52,0x50,
        },
        [8] = {
                0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
                0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
        },
        [9] = {
                0x3b,0x20,0xf0,0xe9,0xce,0x94,0x48,0x07,
                0x97,0xb6,0x16,0xb5,0xb5,0x05,0x1a,0xce,
        },
        [10] = {
                0x8c,0xdd,0x1b,0xaf,0x0e,0xf6,0xe9,0x1d,
                0x51,0x33,0x68,0x38,0x8d,0xad,0x55,0x84,
        },
        [11] = {
                0xf6,0xc1,0x14,0xbb,0x13,0x0a,0xaf,0xed,
                0xca,0x0b,0x35,0x2c,0xf1,0x2b,0x1a,0x85,
        },
        [12] = {
                0xf9,0x4b,0x05,0xd1,0x8b,0xcd,0xb3,0xd0,
                0x77,0x27,0xfe,0x46,0xf9,0x33,0xb2,0xa2,
        },
};

#endif

#define        KAT_BEGIN(P, n)        memset(P, 0, sizeof(*(P)))
#define        KAT_ERROR()        return -1
#define        KAT_END(P, n)        do                                                      \
{                                                                              \
        uint8_t KAT_ACTUAL[KATLEN];                                              \
        entpool_extract(P, KAT_ACTUAL, KATLEN);                                      \
        if (memcmp(KAT_ACTUAL, known_answers[n], KATLEN))                      \
                return -1;                                                      \
} while (0)

int
entpool_selftest(void)
{
        struct entpool pool, *P = &pool;
        uint8_t sample[1] = {0xff};
        uint8_t scratch[RATE];
        const uint8_t zero[RATE] = {0};

        /* Test entpool_enter with empty buffer.  */
        KAT_BEGIN(P, 0);
        entpool_stir(P);        /* noop */
        entpool_enter(P, sample, 1);
        entpool_stir(P);        /* noop */
        KAT_END(P, 0);

        /* Test entpool_enter with partial buffer.  */
        KAT_BEGIN(P, 1);
        entpool_stir(P);        /* noop */
#if ENTPOOL_SMALL
        entpool_enter(P, zero, RATE-3);
#else
        entpool_enter(P, zero, RATE-4);
#endif
        entpool_stir(P);        /* noop */
        entpool_enter(P, sample, 1);
        entpool_stir(P);        /* noop */
        KAT_END(P, 1);

        /* Test entpool_enter with full buffer.  */
        KAT_BEGIN(P, 2);
        entpool_stir(P);        /* noop */
#if ENTPOOL_SMALL
        if (!entpool_enter_nostir(P, zero, RATE-2))
                KAT_ERROR();
#else
        if (!entpool_enter_nostir(P, zero, 127))
                KAT_ERROR();
        if (!entpool_enter_nostir(P, zero, RATE-2 - 127 - 1))
                KAT_ERROR();
#endif
        entpool_enter(P, sample, 1);
        entpool_stir(P);        /* noop */
        KAT_END(P, 2);

        /* Test entpool_enter with full buffer after stir.  */
        KAT_BEGIN(P, 3);
        entpool_stir(P);        /* noop */
#if ENTPOOL_SMALL
        if (!entpool_enter_nostir(P, zero, RATE-2))
                KAT_ERROR();
#else
        CTASSERT(127 <= RATE-2);
        if (!entpool_enter_nostir(P, zero, 127))
                KAT_ERROR();
        if (!entpool_enter_nostir(P, zero, RATE-2 - 127 - 1))
                KAT_ERROR();
#endif
        entpool_stir(P);
        entpool_enter(P, sample, 1);
        entpool_stir(P);        /* noop */
        KAT_END(P, 3);

        /* Test entpool_enter_nostir with empty buffer.  */
        KAT_BEGIN(P, 4);
        entpool_stir(P);        /* noop */
        if (!entpool_enter_nostir(P, sample, 1))
                KAT_ERROR();
        entpool_stir(P);        /* noop */
        KAT_END(P, 4);

        /* Test entpool_enter_nostir with partial buffer.  */
        KAT_BEGIN(P, 5);
        entpool_stir(P);        /* noop */
#if ENTPOOL_SMALL
        entpool_enter(P, zero, RATE-3);
#else
        entpool_enter(P, zero, RATE-4);
#endif
        entpool_stir(P);        /* noop */
        if (entpool_enter_nostir(P, sample, 1))
                KAT_ERROR();
        entpool_stir(P);
        KAT_END(P, 5);

        /* Test entpool_enter_nostir with full buffer.  */
        KAT_BEGIN(P, 6);
        entpool_stir(P);        /* noop */
#if ENTPOOL_SMALL
        if (!entpool_enter_nostir(P, zero, RATE-2))
                KAT_ERROR();
#else
        CTASSERT(127 <= RATE-2);
        if (!entpool_enter_nostir(P, zero, 127))
                KAT_ERROR();
        if (!entpool_enter_nostir(P, zero, RATE-2 - 127 - 1))
                KAT_ERROR();
#endif
        if (entpool_enter_nostir(P, sample, 1))
                KAT_ERROR();
        entpool_stir(P);
        KAT_END(P, 6);

        /* Test entpool_enter_nostir with full buffer after stir.  */
        KAT_BEGIN(P, 7);
        entpool_stir(P);        /* noop */
#if ENTPOOL_SMALL
        if (!entpool_enter_nostir(P, zero, RATE-2))
                KAT_ERROR();
#else
        CTASSERT(127 <= RATE-2);
        if (!entpool_enter_nostir(P, zero, 127))
                KAT_ERROR();
        if (!entpool_enter_nostir(P, zero, RATE-2 - 127 - 1))
                KAT_ERROR();
#endif
        entpool_stir(P);
        if (!entpool_enter_nostir(P, sample, 1))
                KAT_ERROR();
        entpool_stir(P);        /* noop */
        KAT_END(P, 7);

        /* Test entpool_extract with empty input buffer.  */
        KAT_BEGIN(P, 8);
        entpool_stir(P);        /* noop */
        KAT_END(P, 8);

        /* Test entpool_extract with nonempty input buffer.  */
        KAT_BEGIN(P, 9);
        entpool_stir(P);        /* noop */
        entpool_enter(P, sample, 1);
        entpool_stir(P);        /* noop */
        KAT_END(P, 9);

        /* Test entpool_extract with full input buffer.  */
        KAT_BEGIN(P, 10);
        entpool_stir(P);        /* noop */
#if ENTPOOL_SMALL
        if (!entpool_enter_nostir(P, zero, RATE-2))
                KAT_ERROR();
#else
        CTASSERT(127 <= RATE-2);
        if (!entpool_enter_nostir(P, zero, 127))
                KAT_ERROR();
        if (!entpool_enter_nostir(P, zero, RATE-2 - 127 - 1))
                KAT_ERROR();
#endif
        KAT_END(P, 10);

        /* Test entpool_extract with iterated output.  */
        KAT_BEGIN(P, 11);
        entpool_stir(P);        /* noop */
        entpool_extract(P, scratch, RATE-1 + 1);
        entpool_stir(P);        /* noop */
        KAT_END(P, 11);

        /* Test extract, enter, extract.  */
        KAT_BEGIN(P, 12);
        entpool_stir(P);        /* noop */
        entpool_extract(P, scratch, 1);
        entpool_stir(P);        /* noop */
        entpool_enter(P, sample, 1);
        entpool_stir(P);        /* noop */
        KAT_END(P, 12);

        return 0;
}

#if ENTPOOL_TEST
int
main(void)
{
        return entpool_selftest();
}
#endif

/*
 * Known-answer test generation
 *
 *        This generates the known-answer test vectors from explicitly
 *        specified duplex inputs that correspond to what entpool_enter
 *        &c. induce, to confirm the encoding of inputs works as
 *        intended.
 */

#if ENTPOOL_GENKAT

#include <stdio.h>

struct event {
        enum { IN, OUT, STOP } t;
        uint8_t b[RATE-1];
};

/* Cases correspond to entpool_selftest above.  */
static const struct event *const cases[] = {
        [0] = (const struct event[]) {
                {IN, {1, 0xff}},
                {STOP, {0}},
        },
        [1] = (const struct event[]) {
#if ENTPOOL_SMALL
                {IN, {RATE-3, [RATE-2] = 1}},
#else
                {IN, {0x80|((RATE-4)&0x7f), (RATE-4)>>7, [RATE-2] = 1}},
#endif
                {IN, {0xff}},
                {STOP, {0}},
        },
        [2] = (const struct event[]) {
#if ENTPOOL_SMALL
                {IN, {RATE-2}},
#else
                {IN, {127, [128] = RATE-2 - 127 - 1}},
#endif
                {IN, {1, 0xff}},
                {STOP, {0}},
        },
        [3] = (const struct event[]) {
#if ENTPOOL_SMALL
                {IN, {RATE-2}},
#else
                {IN, {127, [128] = RATE-2 - 127 - 1}},
#endif
                {IN, {1, 0xff}},
                {STOP, {0}},
        },
        [4] = (const struct event[]) {
                {IN, {1, 0xff}},
                {STOP, {0}},
        },

        [5] = (const struct event[]) {
#if ENTPOOL_SMALL
                {IN, {RATE-3, [RATE-2] = 0 /* truncated length */}},
#else
                {IN, {0x80|((RATE-4)&0x7f), (RATE-4)>>7,
                      [RATE-2] = 0 /* truncated length */}},
#endif
                {STOP, {0}},
        },
        [6] = (const struct event[]) {
#if ENTPOOL_SMALL
                {IN, {RATE-2}},
#else
                {IN, {127, [128] = RATE-2 - 127 - 1}},
#endif
                {STOP, {0}},
        },
        [7] = (const struct event[]) {
#if ENTPOOL_SMALL
                {IN, {RATE-2}},
#else
                {IN, {127, [128] = RATE-2 - 127 - 1}},
#endif
                {IN, {1, 0xff}},
                {STOP, {0}},
        },
        [8] = (const struct event[]) {
                {STOP, {0}},
        },
        [9] = (const struct event[]) {
                {IN, {1, 0xff}},
                {STOP, {0}},
        },
        [10] = (const struct event[]) {
#if ENTPOOL_SMALL
                {IN, {RATE-2}},
#else
                {IN, {127, [128] = RATE-2 - 127 - 1}},
#endif
                {STOP, {0}},
        },
        [11] = (const struct event[]) {
                {OUT, {0}},
                {OUT, {0}},
                {STOP, {0}},
        },
        [12] = (const struct event[]) {
                {OUT, {0}},
                {IN, {1, 0xff}},
                {STOP, {0}},
        },
};

static void
compute(uint8_t output[KATLEN], const struct event *events)
{
        union {
                uint8_t b[ENTPOOL_SIZE];
                ENTPOOL_WORD w[ENTPOOL_SIZE/sizeof(ENTPOOL_WORD)];
        } u;
        unsigned i, j, k;

        memset(&u.b, 0, sizeof u.b);
        for (i = 0;; i++) {
                if (events[i].t == STOP)
                        break;
                for (j = 0; j < sizeof(events[i].b); j++)
                        u.b[j] ^= events[i].b[j];
                if (events[i].t == OUT) {
                        memset(u.b, 0, RATE-1);
                        u.b[RATE-1] ^= 0x80;
                }

                for (k = 0; k < arraycount(u.w); k++)
                        u.w[k] = ENTPOOL_WTOH(u.w[k]);
                ENTPOOL_PERMUTE(u.w);
                for (k = 0; k < arraycount(u.w); k++)
                        u.w[k] = ENTPOOL_HTOW(u.w[k]);
        }

        for (j = 0; j < KATLEN; j++)
                output[j] = u.b[j];
}

int
main(void)
{
        uint8_t output[KATLEN];
        unsigned i, j;

        printf("static const uint8_t known_answers[][KATLEN] = {\n");
        for (i = 0; i < arraycount(cases); i++) {
                printf("\t[%u] = {\n", i);
                compute(output, cases[i]);
                for (j = 0; j < KATLEN; j++) {
                        if (j % 8 == 0)
                                printf("\t\t");
                        printf("0x%02hhx,", output[j]);
                        if (j % 8 == 7)
                                printf("\n");
                }
                if ((KATLEN % 8) != 0)
                        printf("\n");
                printf("\t},\n");
        }
        printf("};\n");

        fflush(stdout);
        return ferror(stdout);
}

#endif








































































































   26 
   26 



   25 





















    9 










   26 
















    4 























   28 
   28 





   28 







    2 






    2 





    2 
    2 
    2 
    2 

































































   25 





   26 


   18 

   26 

   10 









   10 















   10 

   10 





































   25 

























































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
/*        $NetBSD: prop_number.c,v 1.34 2022/08/03 21:13:46 riastradh Exp $        */

/*-
 * Copyright (c) 2006, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "prop_object_impl.h"
#include <prop/prop_number.h>
#include <sys/rbtree.h>

#if defined(_KERNEL)
#include <sys/systm.h>
#elif defined(_STANDALONE)
#include <sys/param.h>
#include <lib/libkern/libkern.h>
#else
#include <errno.h>
#include <limits.h>
#include <stdlib.h>
#endif

struct _prop_number_value {
        union {
                int64_t  pnu_signed;
                uint64_t pnu_unsigned;
        } pnv_un;
#define        pnv_signed        pnv_un.pnu_signed
#define        pnv_unsigned        pnv_un.pnu_unsigned
        unsigned int        pnv_is_unsigned        :1,
                                        :31;
};

struct _prop_number {
        struct _prop_object        pn_obj;
        struct rb_node                pn_link;
        struct _prop_number_value pn_value;
};

_PROP_POOL_INIT(_prop_number_pool, sizeof(struct _prop_number), "propnmbr")

static _prop_object_free_rv_t
                _prop_number_free(prop_stack_t, prop_object_t *);
static bool        _prop_number_externalize(
                                struct _prop_object_externalize_context *,
                                void *);
static _prop_object_equals_rv_t
                _prop_number_equals(prop_object_t, prop_object_t,
                                    void **, void **,
                                    prop_object_t *, prop_object_t *);

static void _prop_number_lock(void);
static void _prop_number_unlock(void);

static const struct _prop_object_type _prop_object_type_number = {
        .pot_type        =        PROP_TYPE_NUMBER,
        .pot_free        =        _prop_number_free,
        .pot_extern        =        _prop_number_externalize,
        .pot_equals        =        _prop_number_equals,
        .pot_lock       =       _prop_number_lock,
        .pot_unlock     =            _prop_number_unlock,
};

#define        prop_object_is_number(x)        \
        ((x) != NULL && (x)->pn_obj.po_type == &_prop_object_type_number)

/*
 * Number objects are immutable, and we are likely to have many number
 * objects that have the same value.  So, to save memory, we unique'ify
 * numbers so we only have one copy of each.
 */

static int
_prop_number_compare_values(const struct _prop_number_value *pnv1,
                            const struct _prop_number_value *pnv2)
{

        /* Signed numbers are sorted before unsigned numbers. */

        if (pnv1->pnv_is_unsigned) {
                if (! pnv2->pnv_is_unsigned)
                        return (1);
                if (pnv1->pnv_unsigned < pnv2->pnv_unsigned)
                        return (-1);
                if (pnv1->pnv_unsigned > pnv2->pnv_unsigned)
                        return (1);
                return (0);
        }

        if (pnv2->pnv_is_unsigned)
                return (-1);
        if (pnv1->pnv_signed < pnv2->pnv_signed)
                return (-1);
        if (pnv1->pnv_signed > pnv2->pnv_signed)
                return (1);
        return (0);
}

static int
/*ARGSUSED*/
_prop_number_rb_compare_nodes(void *ctx _PROP_ARG_UNUSED,
                              const void *n1, const void *n2)
{
        const struct _prop_number *pn1 = n1;
        const struct _prop_number *pn2 = n2;

        return _prop_number_compare_values(&pn1->pn_value, &pn2->pn_value);
}

static int
/*ARGSUSED*/
_prop_number_rb_compare_key(void *ctx _PROP_ARG_UNUSED,
                            const void *n, const void *v)
{
        const struct _prop_number *pn = n;
        const struct _prop_number_value *pnv = v;

        return _prop_number_compare_values(&pn->pn_value, pnv);
}

static const rb_tree_ops_t _prop_number_rb_tree_ops = {
        .rbto_compare_nodes = _prop_number_rb_compare_nodes,
        .rbto_compare_key = _prop_number_rb_compare_key,
        .rbto_node_offset = offsetof(struct _prop_number, pn_link),
        .rbto_context = NULL
};

static struct rb_tree _prop_number_tree;
_PROP_MUTEX_DECL_STATIC(_prop_number_tree_mutex)

/* ARGSUSED */
static _prop_object_free_rv_t
_prop_number_free(prop_stack_t stack, prop_object_t *obj)
{
        prop_number_t pn = *obj;

        rb_tree_remove_node(&_prop_number_tree, pn);

        _PROP_POOL_PUT(_prop_number_pool, pn);

        return (_PROP_OBJECT_FREE_DONE);
}

_PROP_ONCE_DECL(_prop_number_init_once)

static int
_prop_number_init(void)
{

        _PROP_MUTEX_INIT(_prop_number_tree_mutex);
        rb_tree_init(&_prop_number_tree, &_prop_number_rb_tree_ops);
        return 0;
}

static void
_prop_number_lock(void)
{
        /* XXX: init necessary? */
        _PROP_ONCE_RUN(_prop_number_init_once, _prop_number_init);
        _PROP_MUTEX_LOCK(_prop_number_tree_mutex);
}

static void
_prop_number_unlock(void)
{
        _PROP_MUTEX_UNLOCK(_prop_number_tree_mutex);
}

static bool
_prop_number_externalize(struct _prop_object_externalize_context *ctx,
                         void *v)
{
        prop_number_t pn = v;
        char tmpstr[32];

        /*
         * For unsigned numbers, we output in hex.  For signed numbers,
         * we output in decimal.
         */
        if (pn->pn_value.pnv_is_unsigned)
                snprintf(tmpstr, sizeof(tmpstr), "0x%" PRIx64,
                    pn->pn_value.pnv_unsigned);
        else
                snprintf(tmpstr, sizeof(tmpstr), "%" PRIi64,
                    pn->pn_value.pnv_signed);

        if (_prop_object_externalize_start_tag(ctx, "integer") == false ||
            _prop_object_externalize_append_cstring(ctx, tmpstr) == false ||
            _prop_object_externalize_end_tag(ctx, "integer") == false)
                return (false);

        return (true);
}

/* ARGSUSED */
static _prop_object_equals_rv_t
_prop_number_equals(prop_object_t v1, prop_object_t v2,
    void **stored_pointer1, void **stored_pointer2,
    prop_object_t *next_obj1, prop_object_t *next_obj2)
{
        prop_number_t num1 = v1;
        prop_number_t num2 = v2;

        /*
         * There is only ever one copy of a number object at any given
         * time, so we can reduce this to a simple pointer equality check
         * in the common case.
         */
        if (num1 == num2)
                return (_PROP_OBJECT_EQUALS_TRUE);

        /*
         * If the numbers are the same signed-ness, then we know they
         * cannot be equal because they would have had pointer equality.
         */
        if (num1->pn_value.pnv_is_unsigned == num2->pn_value.pnv_is_unsigned)
                return (_PROP_OBJECT_EQUALS_FALSE);

        /*
         * We now have one signed value and one unsigned value.  We can
         * compare them iff:
         *        - The unsigned value is not larger than the signed value
         *          can represent.
         *        - The signed value is not smaller than the unsigned value
         *          can represent.
         */
        if (num1->pn_value.pnv_is_unsigned) {
                /*
                 * num1 is unsigned and num2 is signed.
                 */
                if (num1->pn_value.pnv_unsigned > INTMAX_MAX)
                        return (_PROP_OBJECT_EQUALS_FALSE);
                if (num2->pn_value.pnv_signed < 0)
                        return (_PROP_OBJECT_EQUALS_FALSE);
        } else {
                /*
                 * num1 is signed and num2 is unsigned.
                 */
                if (num1->pn_value.pnv_signed < 0)
                        return (_PROP_OBJECT_EQUALS_FALSE);
                if (num2->pn_value.pnv_unsigned > INTMAX_MAX)
                        return (_PROP_OBJECT_EQUALS_FALSE);
        }

        if (num1->pn_value.pnv_signed == num2->pn_value.pnv_signed)
                return _PROP_OBJECT_EQUALS_TRUE;
        else
                return _PROP_OBJECT_EQUALS_FALSE;
}

static prop_number_t
_prop_number_alloc(const struct _prop_number_value *pnv)
{
        prop_number_t opn, pn, rpn;

        _PROP_ONCE_RUN(_prop_number_init_once, _prop_number_init);

        /*
         * Check to see if this already exists in the tree.  If it does,
         * we just retain it and return it.
         */
        _PROP_MUTEX_LOCK(_prop_number_tree_mutex);
        opn = rb_tree_find_node(&_prop_number_tree, pnv);
        if (opn != NULL) {
                prop_object_retain(opn);
                _PROP_MUTEX_UNLOCK(_prop_number_tree_mutex);
                return (opn);
        }
        _PROP_MUTEX_UNLOCK(_prop_number_tree_mutex);

        /*
         * Not in the tree.  Create it now.
         */

        pn = _PROP_POOL_GET(_prop_number_pool);
        if (pn == NULL)
                return (NULL);

        _prop_object_init(&pn->pn_obj, &_prop_object_type_number);

        pn->pn_value = *pnv;

        /*
         * We dropped the mutex when we allocated the new object, so
         * we have to check again if it is in the tree.
         */
        _PROP_MUTEX_LOCK(_prop_number_tree_mutex);
        opn = rb_tree_find_node(&_prop_number_tree, pnv);
        if (opn != NULL) {
                prop_object_retain(opn);
                _PROP_MUTEX_UNLOCK(_prop_number_tree_mutex);
                _PROP_POOL_PUT(_prop_number_pool, pn);
                return (opn);
        }
        rpn = rb_tree_insert_node(&_prop_number_tree, pn);
        _PROP_ASSERT(rpn == pn);
        _PROP_MUTEX_UNLOCK(_prop_number_tree_mutex);
        return (rpn);
}

/*
 * prop_number_create_signed --
 *        Create a prop_number_t and initialize it with the
 *        provided signed value.
 */
prop_number_t
prop_number_create_signed(intmax_t val)
{
        struct _prop_number_value pnv;

        memset(&pnv, 0, sizeof(pnv));
        pnv.pnv_signed = val;
        pnv.pnv_is_unsigned = false;

        return (_prop_number_alloc(&pnv));
}

_PROP_DEPRECATED(prop_number_create_integer,
    "this program uses prop_number_create_integer(), "
    "which is deprecated; use prop_number_create_signed() instead.")
prop_number_t
prop_number_create_integer(int64_t val)
{
        return prop_number_create_signed(val);
}

/*
 * prop_number_create_unsigned --
 *        Create a prop_number_t and initialize it with the
 *        provided unsigned value.
 */
prop_number_t
prop_number_create_unsigned(uintmax_t val)
{
        struct _prop_number_value pnv;

        memset(&pnv, 0, sizeof(pnv));
        pnv.pnv_unsigned = val;
        pnv.pnv_is_unsigned = true;

        return (_prop_number_alloc(&pnv));
}

_PROP_DEPRECATED(prop_number_create_unsigned_integer,
    "this program uses prop_number_create_unsigned_integer(), "
    "which is deprecated; use prop_number_create_unsigned() instead.")
prop_number_t
prop_number_create_unsigned_integer(uint64_t val)
{
        return prop_number_create_unsigned(val);
}

/*
 * prop_number_copy --
 *        Copy a prop_number_t.
 */
prop_number_t
prop_number_copy(prop_number_t opn)
{

        if (! prop_object_is_number(opn))
                return (NULL);

        /*
         * Because we only ever allocate one object for any given
         * value, this can be reduced to a simple retain operation.
         */
        prop_object_retain(opn);
        return (opn);
}

/*
 * prop_number_unsigned --
 *        Returns true if the prop_number_t has an unsigned value.
 */
bool
prop_number_unsigned(prop_number_t pn)
{

        return (pn->pn_value.pnv_is_unsigned);
}

/*
 * prop_number_size --
 *        Return the size, in bits, required to hold the value of
 *        the specified number.
 */
int
prop_number_size(prop_number_t pn)
{
        struct _prop_number_value *pnv;

        if (! prop_object_is_number(pn))
                return (0);

        pnv = &pn->pn_value;

        if (pnv->pnv_is_unsigned) {
                if (pnv->pnv_unsigned > UINT32_MAX)
                        return (64);
                if (pnv->pnv_unsigned > UINT16_MAX)
                        return (32);
                if (pnv->pnv_unsigned > UINT8_MAX)
                        return (16);
                return (8);
        }

        if (pnv->pnv_signed > INT32_MAX || pnv->pnv_signed < INT32_MIN)
                    return (64);
        if (pnv->pnv_signed > INT16_MAX || pnv->pnv_signed < INT16_MIN)
                return (32);
        if (pnv->pnv_signed > INT8_MAX  || pnv->pnv_signed < INT8_MIN)
                return (16);
        return (8);
}

/*
 * prop_number_signed_value --
 *        Get the signed value of a prop_number_t.
 */
intmax_t
prop_number_signed_value(prop_number_t pn)
{

        /*
         * XXX Impossible to distinguish between "not a prop_number_t"
         * XXX and "prop_number_t has a value of 0".
         */
        if (! prop_object_is_number(pn))
                return (0);

        return (pn->pn_value.pnv_signed);
}

_PROP_DEPRECATED(prop_number_integer_value,
    "this program uses prop_number_integer_value(), "
    "which is deprecated; use prop_number_signed_value() instead.")
int64_t
prop_number_integer_value(prop_number_t pn)
{
        return prop_number_signed_value(pn);
}

/*
 * prop_number_unsigned_value --
 *        Get the unsigned value of a prop_number_t.
 */
uintmax_t
prop_number_unsigned_value(prop_number_t pn)
{

        /*
         * XXX Impossible to distinguish between "not a prop_number_t"
         * XXX and "prop_number_t has a value of 0".
         */
        if (! prop_object_is_number(pn))
                return (0);

        return (pn->pn_value.pnv_unsigned);
}

_PROP_DEPRECATED(prop_number_unsigned_integer_value,
    "this program uses prop_number_unsigned_integer_value(), "
    "which is deprecated; use prop_number_unsigned_value() instead.")
uint64_t
prop_number_unsigned_integer_value(prop_number_t pn)
{
        return prop_number_unsigned_value(pn);
}

/*
 * prop_number_[...]_value --
 *        Retrieve the bounds-checked value as the specified type.
 *        Returns true if successful.
 */
#define        TEMPLATE(name, typ, minv, maxv)                                        \
bool                                                                        \
prop_number_ ## name ## _value(prop_number_t pn, typ * const valp)        \
{                                                                        \
                                                                        \
        if (! prop_object_is_number(pn))                                \
                return (false);                                                \
                                                                        \
        if (pn->pn_value.pnv_is_unsigned) {                                \
                if (pn->pn_value.pnv_unsigned > (maxv))                        \
                        return (false);                                        \
                *valp = (typ) pn->pn_value.pnv_unsigned;                \
        } else {                                                        \
                if ((pn->pn_value.pnv_signed > 0 &&                        \
                     (uintmax_t)pn->pn_value.pnv_signed > (maxv)) ||        \
                    pn->pn_value.pnv_signed < (minv))                        \
                        return (false);                                        \
                *valp = (typ) pn->pn_value.pnv_signed;                        \
        }                                                                \
                                                                        \
        return (true);                                                        \
}
TEMPLATE(schar,    signed char, SCHAR_MIN,  SCHAR_MAX)
TEMPLATE(short,    short,       SHRT_MIN,   SHRT_MAX)
TEMPLATE(int,      int,         INT_MIN,    INT_MAX)
TEMPLATE(long,     long,        LONG_MIN,   LONG_MAX)
TEMPLATE(longlong, long long,   LLONG_MIN,  LLONG_MAX)
TEMPLATE(intptr,   intptr_t,    INTPTR_MIN, INTPTR_MAX)
TEMPLATE(int8,     int8_t,      INT8_MIN,   INT8_MAX)
TEMPLATE(int16,    int16_t,     INT16_MIN,  INT16_MAX)
TEMPLATE(int32,    int32_t,     INT32_MIN,  INT32_MAX)
TEMPLATE(int64,    int64_t,     INT64_MIN,  INT64_MAX)

TEMPLATE(uchar,     unsigned char,      0, UCHAR_MAX)
TEMPLATE(ushort,    unsigned short,     0, USHRT_MAX)
TEMPLATE(uint,      unsigned int,       0, UINT_MAX)
TEMPLATE(ulong,     unsigned long,      0, ULONG_MAX)
TEMPLATE(ulonglong, unsigned long long, 0, ULLONG_MAX)
TEMPLATE(uintptr,   uintptr_t,          0, UINTPTR_MAX)
TEMPLATE(uint8,     uint8_t,            0, UINT8_MAX)
TEMPLATE(uint16,    uint16_t,           0, UINT16_MAX)
TEMPLATE(uint32,    uint32_t,           0, UINT32_MAX)
TEMPLATE(uint64,    uint64_t,           0, UINT64_MAX)

#undef TEMPLATE

/*
 * prop_number_equals --
 *        Return true if two numbers are equivalent.
 */
bool
prop_number_equals(prop_number_t num1, prop_number_t num2)
{
        if (!prop_object_is_number(num1) || !prop_object_is_number(num2))
                return (false);

        return (prop_object_equals(num1, num2));
}

/*
 * prop_number_equals_signed --
 *        Return true if the number is equivalent to the specified signed
 *        value.
 */
bool
prop_number_equals_signed(prop_number_t pn, intmax_t val)
{

        if (! prop_object_is_number(pn))
                return (false);

        if (pn->pn_value.pnv_is_unsigned &&
            (pn->pn_value.pnv_unsigned > INTMAX_MAX || val < 0))
                return (false);

        return (pn->pn_value.pnv_signed == val);
}

_PROP_DEPRECATED(prop_number_equals_integer,
    "this program uses prop_number_equals_integer(), "
    "which is deprecated; use prop_number_equals_signed() instead.")
bool
prop_number_equals_integer(prop_number_t pn, int64_t val)
{
        return prop_number_equals_signed(pn, val);
}

/*
 * prop_number_equals_unsigned --
 *        Return true if the number is equivalent to the specified
 *        unsigned value.
 */
bool
prop_number_equals_unsigned(prop_number_t pn, uintmax_t val)
{

        if (! prop_object_is_number(pn))
                return (false);

        if (! pn->pn_value.pnv_is_unsigned &&
            (pn->pn_value.pnv_signed < 0 || val > INT64_MAX))
                return (false);

        return (pn->pn_value.pnv_unsigned == val);
}

_PROP_DEPRECATED(prop_number_equals_unsigned_integer,
    "this program uses prop_number_equals_unsigned_integer(), "
    "which is deprecated; use prop_number_equals_unsigned() instead.")
bool
prop_number_equals_unsigned_integer(prop_number_t pn, uint64_t val)
{
        return prop_number_equals_unsigned(pn, val);
}

static bool
_prop_number_internalize_unsigned(struct _prop_object_internalize_context *ctx,
                                  struct _prop_number_value *pnv)
{
        char *cp;

        _PROP_ASSERT(/*CONSTCOND*/sizeof(unsigned long long) ==
                     sizeof(uint64_t));

#ifndef _KERNEL
        errno = 0;
#endif
        pnv->pnv_unsigned = (uint64_t) strtoull(ctx->poic_cp, &cp, 0);
#ifndef _KERNEL                /* XXX can't check for ERANGE in the kernel */
        if (pnv->pnv_unsigned == UINT64_MAX && errno == ERANGE)
                return (false);
#endif
        pnv->pnv_is_unsigned = true;
        ctx->poic_cp = cp;

        return (true);
}

static bool
_prop_number_internalize_signed(struct _prop_object_internalize_context *ctx,
                                struct _prop_number_value *pnv)
{
        char *cp;

        _PROP_ASSERT(/*CONSTCOND*/sizeof(long long) == sizeof(int64_t));

#ifndef _KERNEL
        errno = 0;
#endif
        pnv->pnv_signed = (int64_t) strtoll(ctx->poic_cp, &cp, 0);
#ifndef _KERNEL                /* XXX can't check for ERANGE in the kernel */
        if ((pnv->pnv_signed == INT64_MAX || pnv->pnv_signed == INT64_MIN) &&
            errno == ERANGE)
                    return (false);
#endif
        pnv->pnv_is_unsigned = false;
        ctx->poic_cp = cp;

        return (true);
}

/*
 * _prop_number_internalize --
 *        Parse a <number>...</number> and return the object created from
 *        the external representation.
 */
/* ARGSUSED */
bool
_prop_number_internalize(prop_stack_t stack, prop_object_t *obj,
    struct _prop_object_internalize_context *ctx)
{
        struct _prop_number_value pnv;

        memset(&pnv, 0, sizeof(pnv));

        /* No attributes, no empty elements. */
        if (ctx->poic_tagattr != NULL || ctx->poic_is_empty_element)
                return (true);

        /*
         * If the first character is '-', then we treat as signed.
         * If the first two characters are "0x" (i.e. the number is
         * in hex), then we treat as unsigned.  Otherwise, we try
         * signed first, and if that fails (presumably due to ERANGE),
         * then we switch to unsigned.
         */
        if (ctx->poic_cp[0] == '-') {
                if (_prop_number_internalize_signed(ctx, &pnv) == false)
                        return (true);
        } else if (ctx->poic_cp[0] == '0' && ctx->poic_cp[1] == 'x') {
                if (_prop_number_internalize_unsigned(ctx, &pnv) == false)
                        return (true);
        } else {
                if (_prop_number_internalize_signed(ctx, &pnv) == false &&
                    _prop_number_internalize_unsigned(ctx, &pnv) == false)
                            return (true);
        }

        if (_prop_object_internalize_find_tag(ctx, "integer",
                                              _PROP_TAG_TYPE_END) == false)
                return (true);

        *obj = _prop_number_alloc(&pnv);
        return (true);
}













































































































































































































































































































  441 









  441 
  442 


































































  441 







  442 











  441 










  442 











































  444 









  444 
























  443 
    6 

  443 
  444 












  444 
  444 

  443 






  443 







  444 
    6 


  437 



















  443 






  443 

  443 
    6 

















    4 




    4 

    4 





    4 






    4 



    4 
    4 
    4 




    4 



    4 
    4 


    4 








































































































    1 








    1 
    1 





































































































    1 
































    1 

















    1 




















































    1 




    1 


















    1 













    1 



    1 






























    1 






    1 



    1 














































    1 










































































































































































































































































    4 

































    1 































































































































































  443 



  443 
















































































































































































































    4 






    3 









    4 



















    4 





    4 










    4 






    1 














































































































    4 


























    4 
    1 
    4 


    4 

    4 


    4 

    4 





































































  441 








  442 
  442 









  441 














  441 












  442 

  442 















  442 

  442 













  442 

  442 


  441 
  441 


  442 






































  438 
















  442 









  441 



  441 

  438 










  442 







  443 


























  442 
  442 








  442 
    6 


    6 


  442 





  442 















  441 






  442 
  441 






  442 







  441 













  442 

  442 












    5 
    5 




    5 






    4 








    4 





    4 

    4 









  437 
































































































































































































































































































































































    1 







    1 












    1 


































































  443 






  440 
  441 





    6 


    6 








  441 


  441 
  440 








    1 


















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
/*        $NetBSD: scsipi_base.c,v 1.189 2022/04/09 23:38:32 riastradh Exp $        */

/*-
 * Copyright (c) 1998, 1999, 2000, 2002, 2003, 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum; by Jason R. Thorpe of the Numerical Aerospace
 * Simulation Facility, NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: scsipi_base.c,v 1.189 2022/04/09 23:38:32 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_scsi.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/buf.h>
#include <sys/uio.h>
#include <sys/malloc.h>
#include <sys/pool.h>
#include <sys/errno.h>
#include <sys/device.h>
#include <sys/proc.h>
#include <sys/kthread.h>
#include <sys/hash.h>
#include <sys/atomic.h>

#include <dev/scsipi/scsi_sdt.h>
#include <dev/scsipi/scsi_spc.h>
#include <dev/scsipi/scsipi_all.h>
#include <dev/scsipi/scsipi_disk.h>
#include <dev/scsipi/scsipiconf.h>
#include <dev/scsipi/scsipi_base.h>

#include <dev/scsipi/scsi_all.h>
#include <dev/scsipi/scsi_message.h>

#include <machine/param.h>

SDT_PROVIDER_DEFINE(scsi);

SDT_PROBE_DEFINE3(scsi, base, tag, get,
    "struct scsipi_xfer *"/*xs*/, "uint8_t"/*tag*/, "uint8_t"/*type*/);
SDT_PROBE_DEFINE3(scsi, base, tag, put,
    "struct scsipi_xfer *"/*xs*/, "uint8_t"/*tag*/, "uint8_t"/*type*/);

SDT_PROBE_DEFINE3(scsi, base, adapter, request__start,
    "struct scsipi_channel *"/*chan*/,
    "scsipi_adapter_req_t"/*req*/,
    "void *"/*arg*/);
SDT_PROBE_DEFINE3(scsi, base, adapter, request__done,
    "struct scsipi_channel *"/*chan*/,
    "scsipi_adapter_req_t"/*req*/,
    "void *"/*arg*/);

SDT_PROBE_DEFINE1(scsi, base, queue, batch__start,
    "struct scsipi_channel *"/*chan*/);
SDT_PROBE_DEFINE2(scsi, base, queue, run,
    "struct scsipi_channel *"/*chan*/,
    "struct scsipi_xfer *"/*xs*/);
SDT_PROBE_DEFINE1(scsi, base, queue, batch__done,
    "struct scsipi_channel *"/*chan*/);

SDT_PROBE_DEFINE1(scsi, base, xfer, execute,  "struct scsipi_xfer *"/*xs*/);
SDT_PROBE_DEFINE1(scsi, base, xfer, enqueue,  "struct scsipi_xfer *"/*xs*/);
SDT_PROBE_DEFINE1(scsi, base, xfer, done,  "struct scsipi_xfer *"/*xs*/);
SDT_PROBE_DEFINE1(scsi, base, xfer, redone,  "struct scsipi_xfer *"/*xs*/);
SDT_PROBE_DEFINE1(scsi, base, xfer, complete,  "struct scsipi_xfer *"/*xs*/);
SDT_PROBE_DEFINE1(scsi, base, xfer, restart,  "struct scsipi_xfer *"/*xs*/);
SDT_PROBE_DEFINE1(scsi, base, xfer, free,  "struct scsipi_xfer *"/*xs*/);

static int        scsipi_complete(struct scsipi_xfer *);
static void        scsipi_request_sense(struct scsipi_xfer *);
static int        scsipi_enqueue(struct scsipi_xfer *);
static void        scsipi_run_queue(struct scsipi_channel *chan);

static void        scsipi_completion_thread(void *);

static void        scsipi_get_tag(struct scsipi_xfer *);
static void        scsipi_put_tag(struct scsipi_xfer *);

static int        scsipi_get_resource(struct scsipi_channel *);
static void        scsipi_put_resource(struct scsipi_channel *);

static void        scsipi_async_event_max_openings(struct scsipi_channel *,
                    struct scsipi_max_openings *);
static void        scsipi_async_event_channel_reset(struct scsipi_channel *);

static void        scsipi_channel_freeze_locked(struct scsipi_channel *, int);

static void        scsipi_adapter_lock(struct scsipi_adapter *adapt);
static void        scsipi_adapter_unlock(struct scsipi_adapter *adapt);

static void        scsipi_update_timeouts(struct scsipi_xfer *xs);

static struct pool scsipi_xfer_pool;

int scsipi_xs_count = 0;

/*
 * scsipi_init:
 *
 *        Called when a scsibus or atapibus is attached to the system
 *        to initialize shared data structures.
 */
void
scsipi_init(void)
{
        static int scsipi_init_done;

        if (scsipi_init_done)
                return;
        scsipi_init_done = 1;

        /* Initialize the scsipi_xfer pool. */
        pool_init(&scsipi_xfer_pool, sizeof(struct scsipi_xfer), 0,
            0, 0, "scxspl", NULL, IPL_BIO);
        pool_prime(&scsipi_xfer_pool, 1);

        scsipi_ioctl_init();
}

/*
 * scsipi_channel_init:
 *
 *        Initialize a scsipi_channel when it is attached.
 */
int
scsipi_channel_init(struct scsipi_channel *chan)
{
        struct scsipi_adapter *adapt = chan->chan_adapter;
        int i;

        /* Initialize shared data. */
        scsipi_init();

        /* Initialize the queues. */
        TAILQ_INIT(&chan->chan_queue);
        TAILQ_INIT(&chan->chan_complete);

        for (i = 0; i < SCSIPI_CHAN_PERIPH_BUCKETS; i++)
                LIST_INIT(&chan->chan_periphtab[i]);

        /*
         * Create the asynchronous completion thread.
         */
        if (kthread_create(PRI_NONE, 0, NULL, scsipi_completion_thread, chan,
            &chan->chan_thread, "%s", chan->chan_name)) {
                aprint_error_dev(adapt->adapt_dev, "unable to create completion thread for "
                    "channel %d\n", chan->chan_channel);
                panic("scsipi_channel_init");
        }

        return 0;
}

/*
 * scsipi_channel_shutdown:
 *
 *        Shutdown a scsipi_channel.
 */
void
scsipi_channel_shutdown(struct scsipi_channel *chan)
{

        mutex_enter(chan_mtx(chan));
        /*
         * Shut down the completion thread.
         */
        chan->chan_tflags |= SCSIPI_CHANT_SHUTDOWN;
        cv_broadcast(chan_cv_complete(chan));

        /*
         * Now wait for the thread to exit.
         */
        while (chan->chan_thread != NULL)
                cv_wait(chan_cv_thread(chan), chan_mtx(chan));
        mutex_exit(chan_mtx(chan));
}

static uint32_t
scsipi_chan_periph_hash(uint64_t t, uint64_t l)
{
        uint32_t hash;

        hash = hash32_buf(&t, sizeof(t), HASH32_BUF_INIT);
        hash = hash32_buf(&l, sizeof(l), hash);

        return hash & SCSIPI_CHAN_PERIPH_HASHMASK;
}

/*
 * scsipi_insert_periph:
 *
 *        Insert a periph into the channel.
 */
void
scsipi_insert_periph(struct scsipi_channel *chan, struct scsipi_periph *periph)
{
        uint32_t hash;

        hash = scsipi_chan_periph_hash(periph->periph_target,
            periph->periph_lun);

        mutex_enter(chan_mtx(chan));
        LIST_INSERT_HEAD(&chan->chan_periphtab[hash], periph, periph_hash);
        mutex_exit(chan_mtx(chan));
}

/*
 * scsipi_remove_periph:
 *
 *        Remove a periph from the channel.
 */
void
scsipi_remove_periph(struct scsipi_channel *chan,
    struct scsipi_periph *periph)
{

        LIST_REMOVE(periph, periph_hash);
}

/*
 * scsipi_lookup_periph:
 *
 *        Lookup a periph on the specified channel.
 */
static struct scsipi_periph *
scsipi_lookup_periph_internal(struct scsipi_channel *chan, int target, int lun, bool lock)
{
        struct scsipi_periph *periph;
        uint32_t hash;

        if (target >= chan->chan_ntargets ||
            lun >= chan->chan_nluns)
                return NULL;

        hash = scsipi_chan_periph_hash(target, lun);

        if (lock)
                mutex_enter(chan_mtx(chan));
        LIST_FOREACH(periph, &chan->chan_periphtab[hash], periph_hash) {
                if (periph->periph_target == target &&
                    periph->periph_lun == lun)
                        break;
        }
        if (lock)
                mutex_exit(chan_mtx(chan));

        return periph;
}

struct scsipi_periph *
scsipi_lookup_periph_locked(struct scsipi_channel *chan, int target, int lun)
{
        return scsipi_lookup_periph_internal(chan, target, lun, false);
}

struct scsipi_periph *
scsipi_lookup_periph(struct scsipi_channel *chan, int target, int lun)
{
        return scsipi_lookup_periph_internal(chan, target, lun, true);
}

/*
 * scsipi_get_resource:
 *
 *        Allocate a single xfer `resource' from the channel.
 *
 *        NOTE: Must be called with channel lock held
 */
static int
scsipi_get_resource(struct scsipi_channel *chan)
{
        struct scsipi_adapter *adapt = chan->chan_adapter;

        if (chan->chan_flags & SCSIPI_CHAN_OPENINGS) {
                if (chan->chan_openings > 0) {
                        chan->chan_openings--;
                        return 1;
                }
                return 0;
        }

        if (adapt->adapt_openings > 0) {
                adapt->adapt_openings--;
                return 1;
        }
        return 0;
}

/*
 * scsipi_grow_resources:
 *
 *        Attempt to grow resources for a channel.  If this succeeds,
 *        we allocate one for our caller.
 *
 *        NOTE: Must be called with channel lock held
 */
static inline int
scsipi_grow_resources(struct scsipi_channel *chan)
{

        if (chan->chan_flags & SCSIPI_CHAN_CANGROW) {
                if ((chan->chan_flags & SCSIPI_CHAN_TACTIVE) == 0) {
                        mutex_exit(chan_mtx(chan));
                        scsipi_adapter_request(chan,
                            ADAPTER_REQ_GROW_RESOURCES, NULL);
                        mutex_enter(chan_mtx(chan));
                        return scsipi_get_resource(chan);
                }
                /*
                 * ask the channel thread to do it. It'll have to thaw the
                 * queue
                 */
                scsipi_channel_freeze_locked(chan, 1);
                chan->chan_tflags |= SCSIPI_CHANT_GROWRES;
                cv_broadcast(chan_cv_complete(chan));
                return 0;
        }

        return 0;
}

/*
 * scsipi_put_resource:
 *
 *        Free a single xfer `resource' to the channel.
 *
 *        NOTE: Must be called with channel lock held
 */
static void
scsipi_put_resource(struct scsipi_channel *chan)
{
        struct scsipi_adapter *adapt = chan->chan_adapter;

        if (chan->chan_flags & SCSIPI_CHAN_OPENINGS)
                chan->chan_openings++;
        else
                adapt->adapt_openings++;
}

/*
 * scsipi_get_tag:
 *
 *        Get a tag ID for the specified xfer.
 *
 *        NOTE: Must be called with channel lock held
 */
static void
scsipi_get_tag(struct scsipi_xfer *xs)
{
        struct scsipi_periph *periph = xs->xs_periph;
        int bit, tag;
        u_int word;

        KASSERT(mutex_owned(chan_mtx(periph->periph_channel)));

        bit = 0;        /* XXX gcc */
        for (word = 0; word < PERIPH_NTAGWORDS; word++) {
                bit = ffs(periph->periph_freetags[word]);
                if (bit != 0)
                        break;
        }
#ifdef DIAGNOSTIC
        if (word == PERIPH_NTAGWORDS) {
                scsipi_printaddr(periph);
                printf("no free tags\n");
                panic("scsipi_get_tag");
        }
#endif

        bit -= 1;
        periph->periph_freetags[word] &= ~(1U << bit);
        tag = (word << 5) | bit;

        /* XXX Should eventually disallow this completely. */
        if (tag >= periph->periph_openings) {
                scsipi_printaddr(periph);
                printf("WARNING: tag %d greater than available openings %d\n",
                    tag, periph->periph_openings);
        }

        xs->xs_tag_id = tag;
        SDT_PROBE3(scsi, base, tag, get,
            xs, xs->xs_tag_id, xs->xs_tag_type);
}

/*
 * scsipi_put_tag:
 *
 *        Put the tag ID for the specified xfer back into the pool.
 *
 *        NOTE: Must be called with channel lock held
 */
static void
scsipi_put_tag(struct scsipi_xfer *xs)
{
        struct scsipi_periph *periph = xs->xs_periph;
        int word, bit;

        KASSERT(mutex_owned(chan_mtx(periph->periph_channel)));

        SDT_PROBE3(scsi, base, tag, put,
            xs, xs->xs_tag_id, xs->xs_tag_type);

        word = xs->xs_tag_id >> 5;
        bit = xs->xs_tag_id & 0x1f;

        periph->periph_freetags[word] |= (1U << bit);
}

/*
 * scsipi_get_xs:
 *
 *        Allocate an xfer descriptor and associate it with the
 *        specified peripheral.  If the peripheral has no more
 *        available command openings, we either block waiting for
 *        one to become available, or fail.
 *
 *        When this routine is called with the channel lock held
 *        the flags must include XS_CTL_NOSLEEP.
 */
struct scsipi_xfer *
scsipi_get_xs(struct scsipi_periph *periph, int flags)
{
        struct scsipi_xfer *xs;
        bool lock = (flags & XS_CTL_NOSLEEP) == 0;

        SC_DEBUG(periph, SCSIPI_DB3, ("scsipi_get_xs\n"));

        KASSERT(!cold);

#ifdef DIAGNOSTIC
        /*
         * URGENT commands can never be ASYNC.
         */
        if ((flags & (XS_CTL_URGENT|XS_CTL_ASYNC)) ==
            (XS_CTL_URGENT|XS_CTL_ASYNC)) {
                scsipi_printaddr(periph);
                printf("URGENT and ASYNC\n");
                panic("scsipi_get_xs");
        }
#endif

        /*
         * Wait for a command opening to become available.  Rules:
         *
         *        - All xfers must wait for an available opening.
         *          Exception: URGENT xfers can proceed when
         *          active == openings, because we use the opening
         *          of the command we're recovering for.
         *        - if the periph has sense pending, only URGENT & REQSENSE
         *          xfers may proceed.
         *
         *        - If the periph is recovering, only URGENT xfers may
         *          proceed.
         *
         *        - If the periph is currently executing a recovery
         *          command, URGENT commands must block, because only
         *          one recovery command can execute at a time.
         */
        if (lock)
                mutex_enter(chan_mtx(periph->periph_channel));
        for (;;) {
                if (flags & XS_CTL_URGENT) {
                        if (periph->periph_active > periph->periph_openings)
                                goto wait_for_opening;
                        if (periph->periph_flags & PERIPH_SENSE) {
                                if ((flags & XS_CTL_REQSENSE) == 0)
                                        goto wait_for_opening;
                        } else {
                                if ((periph->periph_flags &
                                    PERIPH_RECOVERY_ACTIVE) != 0)
                                        goto wait_for_opening;
                                periph->periph_flags |= PERIPH_RECOVERY_ACTIVE;
                        }
                        break;
                }
                if (periph->periph_active >= periph->periph_openings ||
                    (periph->periph_flags & PERIPH_RECOVERING) != 0)
                        goto wait_for_opening;
                periph->periph_active++;
                KASSERT(mutex_owned(chan_mtx(periph->periph_channel)));
                break;

 wait_for_opening:
                if (flags & XS_CTL_NOSLEEP) {
                        KASSERT(!lock);
                        return NULL;
                }
                KASSERT(lock);
                SC_DEBUG(periph, SCSIPI_DB3, ("sleeping\n"));
                periph->periph_flags |= PERIPH_WAITING;
                cv_wait(periph_cv_periph(periph),
                    chan_mtx(periph->periph_channel));
        }
        if (lock)
                mutex_exit(chan_mtx(periph->periph_channel));

        SC_DEBUG(periph, SCSIPI_DB3, ("calling pool_get\n"));
        xs = pool_get(&scsipi_xfer_pool,
            ((flags & XS_CTL_NOSLEEP) != 0 ? PR_NOWAIT : PR_WAITOK));
        if (xs == NULL) {
                if (lock)
                        mutex_enter(chan_mtx(periph->periph_channel));
                if (flags & XS_CTL_URGENT) {
                        if ((flags & XS_CTL_REQSENSE) == 0)
                                periph->periph_flags &= ~PERIPH_RECOVERY_ACTIVE;
                } else
                        periph->periph_active--;
                if (lock)
                        mutex_exit(chan_mtx(periph->periph_channel));
                scsipi_printaddr(periph);
                printf("unable to allocate %sscsipi_xfer\n",
                    (flags & XS_CTL_URGENT) ? "URGENT " : "");
        }

        SC_DEBUG(periph, SCSIPI_DB3, ("returning\n"));

        if (xs != NULL) {
                memset(xs, 0, sizeof(*xs));
                callout_init(&xs->xs_callout, 0);
                xs->xs_periph = periph;
                xs->xs_control = flags;
                xs->xs_status = 0;
                if ((flags & XS_CTL_NOSLEEP) == 0)
                        mutex_enter(chan_mtx(periph->periph_channel));
                TAILQ_INSERT_TAIL(&periph->periph_xferq, xs, device_q);
                KASSERT(mutex_owned(chan_mtx(periph->periph_channel)));
                if ((flags & XS_CTL_NOSLEEP) == 0)
                        mutex_exit(chan_mtx(periph->periph_channel));
        }
        return xs;
}

/*
 * scsipi_put_xs:
 *
 *        Release an xfer descriptor, decreasing the outstanding command
 *        count for the peripheral.  If there is a thread waiting for
 *        an opening, wake it up.  If not, kick any queued I/O the
 *        peripheral may have.
 *
 *        NOTE: Must be called with channel lock held
 */
void
scsipi_put_xs(struct scsipi_xfer *xs)
{
        struct scsipi_periph *periph = xs->xs_periph;
        int flags = xs->xs_control;

        SDT_PROBE1(scsi, base, xfer, free,  xs);
        SC_DEBUG(periph, SCSIPI_DB3, ("scsipi_free_xs\n"));
        KASSERT(mutex_owned(chan_mtx(periph->periph_channel)));

        TAILQ_REMOVE(&periph->periph_xferq, xs, device_q);
        callout_destroy(&xs->xs_callout);
        pool_put(&scsipi_xfer_pool, xs);

#ifdef DIAGNOSTIC
        if ((periph->periph_flags & PERIPH_RECOVERY_ACTIVE) != 0 &&
            periph->periph_active == 0) {
                scsipi_printaddr(periph);
                printf("recovery without a command to recovery for\n");
                panic("scsipi_put_xs");
        }
#endif

        if (flags & XS_CTL_URGENT) {
                if ((flags & XS_CTL_REQSENSE) == 0)
                        periph->periph_flags &= ~PERIPH_RECOVERY_ACTIVE;
        } else
                periph->periph_active--;
        if (periph->periph_active == 0 &&
            (periph->periph_flags & PERIPH_WAITDRAIN) != 0) {
                periph->periph_flags &= ~PERIPH_WAITDRAIN;
                cv_broadcast(periph_cv_active(periph));
        }

        if (periph->periph_flags & PERIPH_WAITING) {
                periph->periph_flags &= ~PERIPH_WAITING;
                cv_broadcast(periph_cv_periph(periph));
        } else {
                if (periph->periph_switch->psw_start != NULL &&
                    device_is_active(periph->periph_dev)) {
                        SC_DEBUG(periph, SCSIPI_DB2,
                            ("calling private start()\n"));
                        (*periph->periph_switch->psw_start)(periph);
                }
        }
}

/*
 * scsipi_channel_freeze:
 *
 *        Freeze a channel's xfer queue.
 */
void
scsipi_channel_freeze(struct scsipi_channel *chan, int count)
{
        bool lock = chan_running(chan) > 0;

        if (lock)
                mutex_enter(chan_mtx(chan));
        chan->chan_qfreeze += count;
        if (lock)
                mutex_exit(chan_mtx(chan));
}

static void
scsipi_channel_freeze_locked(struct scsipi_channel *chan, int count)
{

        chan->chan_qfreeze += count;
}

/*
 * scsipi_channel_thaw:
 *
 *        Thaw a channel's xfer queue.
 */
void
scsipi_channel_thaw(struct scsipi_channel *chan, int count)
{
        bool lock = chan_running(chan) > 0;

        if (lock)
                mutex_enter(chan_mtx(chan));
        chan->chan_qfreeze -= count;
        /*
         * Don't let the freeze count go negative.
         *
         * Presumably the adapter driver could keep track of this,
         * but it might just be easier to do this here so as to allow
         * multiple callers, including those outside the adapter driver.
         */
        if (chan->chan_qfreeze < 0) {
                chan->chan_qfreeze = 0;
        }
        if (lock)
                mutex_exit(chan_mtx(chan));

        /*
         * until the channel is running
         */
        if (!lock)
                return;

        /*
         * Kick the channel's queue here.  Note, we may be running in
         * interrupt context (softclock or HBA's interrupt), so the adapter
         * driver had better not sleep.
         */
        if (chan->chan_qfreeze == 0)
                scsipi_run_queue(chan);
}

/*
 * scsipi_channel_timed_thaw:
 *
 *        Thaw a channel after some time has expired. This will also
 *         run the channel's queue if the freeze count has reached 0.
 */
void
scsipi_channel_timed_thaw(void *arg)
{
        struct scsipi_channel *chan = arg;

        scsipi_channel_thaw(chan, 1);
}

/*
 * scsipi_periph_freeze:
 *
 *        Freeze a device's xfer queue.
 */
void
scsipi_periph_freeze_locked(struct scsipi_periph *periph, int count)
{

        periph->periph_qfreeze += count;
}

/*
 * scsipi_periph_thaw:
 *
 *        Thaw a device's xfer queue.
 */
void
scsipi_periph_thaw_locked(struct scsipi_periph *periph, int count)
{

        periph->periph_qfreeze -= count;
#ifdef DIAGNOSTIC
        if (periph->periph_qfreeze < 0) {
                static const char pc[] = "periph freeze count < 0";
                scsipi_printaddr(periph);
                printf("%s\n", pc);
                panic(pc);
        }
#endif
        if (periph->periph_qfreeze == 0 &&
            (periph->periph_flags & PERIPH_WAITING) != 0)
                cv_broadcast(periph_cv_periph(periph));
}

void
scsipi_periph_freeze(struct scsipi_periph *periph, int count)
{

        mutex_enter(chan_mtx(periph->periph_channel));
        scsipi_periph_freeze_locked(periph, count);
        mutex_exit(chan_mtx(periph->periph_channel));
}

void
scsipi_periph_thaw(struct scsipi_periph *periph, int count)
{

        mutex_enter(chan_mtx(periph->periph_channel));
        scsipi_periph_thaw_locked(periph, count);
        mutex_exit(chan_mtx(periph->periph_channel));
}

/*
 * scsipi_periph_timed_thaw:
 *
 *        Thaw a device after some time has expired.
 */
void
scsipi_periph_timed_thaw(void *arg)
{
        struct scsipi_periph *periph = arg;
        struct scsipi_channel *chan = periph->periph_channel;

        callout_stop(&periph->periph_callout);

        mutex_enter(chan_mtx(chan));
        scsipi_periph_thaw_locked(periph, 1);
        if ((periph->periph_channel->chan_flags & SCSIPI_CHAN_TACTIVE) == 0) {
                /*
                 * Kick the channel's queue here.  Note, we're running in
                 * interrupt context (softclock), so the adapter driver
                 * had better not sleep.
                 */
                mutex_exit(chan_mtx(chan));
                scsipi_run_queue(periph->periph_channel);
        } else {
                /*
                 * Tell the completion thread to kick the channel's queue here.
                 */
                periph->periph_channel->chan_tflags |= SCSIPI_CHANT_KICK;
                cv_broadcast(chan_cv_complete(chan));
                mutex_exit(chan_mtx(chan));
        }
}

/*
 * scsipi_wait_drain:
 *
 *        Wait for a periph's pending xfers to drain.
 */
void
scsipi_wait_drain(struct scsipi_periph *periph)
{
        struct scsipi_channel *chan = periph->periph_channel;

        mutex_enter(chan_mtx(chan));
        while (periph->periph_active != 0) {
                periph->periph_flags |= PERIPH_WAITDRAIN;
                cv_wait(periph_cv_active(periph), chan_mtx(chan));
        }
        mutex_exit(chan_mtx(chan));
}

/*
 * scsipi_kill_pending:
 *
 *        Kill off all pending xfers for a periph.
 *
 *        NOTE: Must be called with channel lock held
 */
void
scsipi_kill_pending(struct scsipi_periph *periph)
{
        struct scsipi_channel *chan = periph->periph_channel;

        (*chan->chan_bustype->bustype_kill_pending)(periph);
        while (periph->periph_active != 0) {
                periph->periph_flags |= PERIPH_WAITDRAIN;
                cv_wait(periph_cv_active(periph), chan_mtx(chan));
        }
}

/*
 * scsipi_print_cdb:
 * prints a command descriptor block (for debug purpose, error messages,
 * SCSIVERBOSE, ...)
 */
void
scsipi_print_cdb(struct scsipi_generic *cmd)
{
        int i, j;

         printf("0x%02x", cmd->opcode);

         switch (CDB_GROUPID(cmd->opcode)) {
         case CDB_GROUPID_0:
                 j = CDB_GROUP0;
                 break;
         case CDB_GROUPID_1:
                 j = CDB_GROUP1;
                 break;
         case CDB_GROUPID_2:
                 j = CDB_GROUP2;
                 break;
         case CDB_GROUPID_3:
                 j = CDB_GROUP3;
                 break;
         case CDB_GROUPID_4:
                 j = CDB_GROUP4;
                 break;
         case CDB_GROUPID_5:
                 j = CDB_GROUP5;
                 break;
         case CDB_GROUPID_6:
                 j = CDB_GROUP6;
                 break;
         case CDB_GROUPID_7:
                 j = CDB_GROUP7;
                 break;
         default:
                 j = 0;
         }
         if (j == 0)
                 j = sizeof (cmd->bytes);
         for (i = 0; i < j-1; i++) /* already done the opcode */
                 printf(" %02x", cmd->bytes[i]);
}

/*
 * scsipi_interpret_sense:
 *
 *        Look at the returned sense and act on the error, determining
 *        the unix error number to pass back.  (0 = report no error)
 *
 *        NOTE: If we return ERESTART, we are expected to have
 *        thawed the device!
 *
 *        THIS IS THE DEFAULT ERROR HANDLER FOR SCSI DEVICES.
 */
int
scsipi_interpret_sense(struct scsipi_xfer *xs)
{
        struct scsi_sense_data *sense;
        struct scsipi_periph *periph = xs->xs_periph;
        u_int8_t key;
        int error;
        u_int32_t info;
        static const char *error_mes[] = {
                "soft error (corrected)",
                "not ready", "medium error",
                "non-media hardware failure", "illegal request",
                "unit attention", "readonly device",
                "no data found", "vendor unique",
                "copy aborted", "command aborted",
                "search returned equal", "volume overflow",
                "verify miscompare", "unknown error key"
        };

        sense = &xs->sense.scsi_sense;
#ifdef SCSIPI_DEBUG
        if (periph->periph_flags & SCSIPI_DB1) {
                int count, len;
                scsipi_printaddr(periph);
                printf(" sense debug information:\n");
                printf("\tcode 0x%x valid %d\n",
                        SSD_RCODE(sense->response_code),
                        sense->response_code & SSD_RCODE_VALID ? 1 : 0);
                printf("\tseg 0x%x key 0x%x ili 0x%x eom 0x%x fmark 0x%x\n",
                        sense->segment,
                        SSD_SENSE_KEY(sense->flags),
                        sense->flags & SSD_ILI ? 1 : 0,
                        sense->flags & SSD_EOM ? 1 : 0,
                        sense->flags & SSD_FILEMARK ? 1 : 0);
                printf("\ninfo: 0x%x 0x%x 0x%x 0x%x followed by %d "
                        "extra bytes\n",
                        sense->info[0],
                        sense->info[1],
                        sense->info[2],
                        sense->info[3],
                        sense->extra_len);
                len = SSD_ADD_BYTES_LIM(sense);
                printf("\textra (up to %d bytes): ", len);
                for (count = 0; count < len; count++)
                        printf("0x%x ", sense->csi[count]);
                printf("\n");
        }
#endif

        /*
         * If the periph has its own error handler, call it first.
         * If it returns a legit error value, return that, otherwise
         * it wants us to continue with normal error processing.
         */
        if (periph->periph_switch->psw_error != NULL) {
                SC_DEBUG(periph, SCSIPI_DB2,
                    ("calling private err_handler()\n"));
                error = (*periph->periph_switch->psw_error)(xs);
                if (error != EJUSTRETURN)
                        return error;
        }
        /* otherwise use the default */
        switch (SSD_RCODE(sense->response_code)) {

                /*
                 * Old SCSI-1 and SASI devices respond with
                 * codes other than 70.
                 */
        case 0x00:                /* no error (command completed OK) */
                return 0;
        case 0x04:                /* drive not ready after it was selected */
                if ((periph->periph_flags & PERIPH_REMOVABLE) != 0)
                        periph->periph_flags &= ~PERIPH_MEDIA_LOADED;
                if ((xs->xs_control & XS_CTL_IGNORE_NOT_READY) != 0)
                        return 0;
                /* XXX - display some sort of error here? */
                return EIO;
        case 0x20:                /* invalid command */
                if ((xs->xs_control &
                     XS_CTL_IGNORE_ILLEGAL_REQUEST) != 0)
                        return 0;
                return EINVAL;
        case 0x25:                /* invalid LUN (Adaptec ACB-4000) */
                return EACCES;

                /*
                 * If it's code 70, use the extended stuff and
                 * interpret the key
                 */
        case 0x71:                /* delayed error */
                scsipi_printaddr(periph);
                key = SSD_SENSE_KEY(sense->flags);
                printf(" DEFERRED ERROR, key = 0x%x\n", key);
                /* FALLTHROUGH */
        case 0x70:
                if ((sense->response_code & SSD_RCODE_VALID) != 0)
                        info = _4btol(sense->info);
                else
                        info = 0;
                key = SSD_SENSE_KEY(sense->flags);

                switch (key) {
                case SKEY_NO_SENSE:
                case SKEY_RECOVERED_ERROR:
                        if (xs->resid == xs->datalen && xs->datalen) {
                                /*
                                 * Why is this here?
                                 */
                                xs->resid = 0;        /* not short read */
                        }
                        error = 0;
                        break;
                case SKEY_EQUAL:
                        error = 0;
                        break;
                case SKEY_NOT_READY:
                        if ((periph->periph_flags & PERIPH_REMOVABLE) != 0)
                                periph->periph_flags &= ~PERIPH_MEDIA_LOADED;
                        if ((xs->xs_control & XS_CTL_IGNORE_NOT_READY) != 0)
                                return 0;
                        if (sense->asc == 0x3A) {
                                error = ENODEV; /* Medium not present */
                                if (xs->xs_control & XS_CTL_SILENT_NODEV)
                                        return error;
                        } else
                                error = EIO;
                        if ((xs->xs_control & XS_CTL_SILENT) != 0)
                                return error;
                        break;
                case SKEY_ILLEGAL_REQUEST:
                        if ((xs->xs_control &
                             XS_CTL_IGNORE_ILLEGAL_REQUEST) != 0)
                                return 0;
                        /*
                         * Handle the case where a device reports
                         * Logical Unit Not Supported during discovery.
                         */
                        if ((xs->xs_control & XS_CTL_DISCOVERY) != 0 &&
                            sense->asc == 0x25 &&
                            sense->ascq == 0x00)
                                return EINVAL;
                        if ((xs->xs_control & XS_CTL_SILENT) != 0)
                                return EIO;
                        error = EINVAL;
                        break;
                case SKEY_UNIT_ATTENTION:
                        if (sense->asc == 0x29 &&
                            sense->ascq == 0x00) {
                                /* device or bus reset */
                                return ERESTART;
                        }
                        if ((periph->periph_flags & PERIPH_REMOVABLE) != 0)
                                periph->periph_flags &= ~PERIPH_MEDIA_LOADED;
                        if ((xs->xs_control &
                             XS_CTL_IGNORE_MEDIA_CHANGE) != 0 ||
                                /* XXX Should reupload any transient state. */
                                (periph->periph_flags &
                                 PERIPH_REMOVABLE) == 0) {
                                return ERESTART;
                        }
                        if ((xs->xs_control & XS_CTL_SILENT) != 0)
                                return EIO;
                        error = EIO;
                        break;
                case SKEY_DATA_PROTECT:
                        error = EROFS;
                        break;
                case SKEY_BLANK_CHECK:
                        error = 0;
                        break;
                case SKEY_ABORTED_COMMAND:
                        if (xs->xs_retries != 0) {
                                xs->xs_retries--;
                                error = ERESTART;
                        } else
                                error = EIO;
                        break;
                case SKEY_VOLUME_OVERFLOW:
                        error = ENOSPC;
                        break;
                default:
                        error = EIO;
                        break;
                }

                /* Print verbose decode if appropriate and possible */
                if ((key == 0) ||
                    ((xs->xs_control & XS_CTL_SILENT) != 0) ||
                    (scsipi_print_sense(xs, 0) != 0))
                        return error;

                /* Print brief(er) sense information */
                scsipi_printaddr(periph);
                printf("%s", error_mes[key - 1]);
                if ((sense->response_code & SSD_RCODE_VALID) != 0) {
                        switch (key) {
                        case SKEY_NOT_READY:
                        case SKEY_ILLEGAL_REQUEST:
                        case SKEY_UNIT_ATTENTION:
                        case SKEY_DATA_PROTECT:
                                break;
                        case SKEY_BLANK_CHECK:
                                printf(", requested size: %d (decimal)",
                                    info);
                                break;
                        case SKEY_ABORTED_COMMAND:
                                if (xs->xs_retries)
                                        printf(", retrying");
                                printf(", cmd 0x%x, info 0x%x",
                                    xs->cmd->opcode, info);
                                break;
                        default:
                                printf(", info = %d (decimal)", info);
                        }
                }
                if (sense->extra_len != 0) {
                        int n;
                        printf(", data =");
                        for (n = 0; n < sense->extra_len; n++)
                                printf(" %02x",
                                    sense->csi[n]);
                }
                printf("\n");
                return error;

        /*
         * Some other code, just report it
         */
        default:
#if    defined(SCSIDEBUG) || defined(DEBUG)
        {
                static const char *uc = "undecodable sense error";
                int i;
                u_int8_t *cptr = (u_int8_t *) sense;
                scsipi_printaddr(periph);
                if (xs->cmd == &xs->cmdstore) {
                        printf("%s for opcode 0x%x, data=",
                            uc, xs->cmdstore.opcode);
                } else {
                        printf("%s, data=", uc);
                }
                for (i = 0; i < sizeof (sense); i++)
                        printf(" 0x%02x", *(cptr++) & 0xff);
                printf("\n");
        }
#else
                scsipi_printaddr(periph);
                printf("Sense Error Code 0x%x",
                        SSD_RCODE(sense->response_code));
                if ((sense->response_code & SSD_RCODE_VALID) != 0) {
                        struct scsi_sense_data_unextended *usense =
                            (struct scsi_sense_data_unextended *)sense;
                        printf(" at block no. %d (decimal)",
                            _3btol(usense->block));
                }
                printf("\n");
#endif
                return EIO;
        }
}

/*
 * scsipi_test_unit_ready:
 *
 *        Issue a `test unit ready' request.
 */
int
scsipi_test_unit_ready(struct scsipi_periph *periph, int flags)
{
        struct scsi_test_unit_ready cmd;
        int retries;

        /* some ATAPI drives don't support TEST UNIT READY. Sigh */
        if (periph->periph_quirks & PQUIRK_NOTUR)
                return 0;

        if (flags & XS_CTL_DISCOVERY)
                retries = 0;
        else
                retries = SCSIPIRETRIES;

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = SCSI_TEST_UNIT_READY;

        return scsipi_command(periph, (void *)&cmd, sizeof(cmd), 0, 0,
            retries, 10000, NULL, flags);
}

static const struct scsipi_inquiry3_pattern {
        const char vendor[8];
        const char product[16];
        const char revision[4];
} scsipi_inquiry3_quirk[] = {
        { "ES-6600 ", "", "" },
};

static int
scsipi_inquiry3_ok(const struct scsipi_inquiry_data *ib)
{
        for (size_t i = 0; i < __arraycount(scsipi_inquiry3_quirk); i++) {
                const struct scsipi_inquiry3_pattern *q =
                    &scsipi_inquiry3_quirk[i];
#define MATCH(field) \
    (q->field[0] ? memcmp(ib->field, q->field, sizeof(ib->field)) == 0 : 1)
                if (MATCH(vendor) && MATCH(product) && MATCH(revision))
                        return 0;
        }
        return 1;
}

/*
 * scsipi_inquire:
 *
 *        Ask the device about itself.
 */
int
scsipi_inquire(struct scsipi_periph *periph, struct scsipi_inquiry_data *inqbuf,
    int flags)
{
        struct scsipi_inquiry cmd;
        int error;
        int retries;

        if (flags & XS_CTL_DISCOVERY)
                retries = 0;
        else
                retries = SCSIPIRETRIES;

        /*
         * If we request more data than the device can provide, it SHOULD just
         * return a short response.  However, some devices error with an
         * ILLEGAL REQUEST sense code, and yet others have even more special
         * failure modes (such as the GL641USB flash adapter, which goes loony
         * and sends corrupted CRCs).  To work around this, and to bring our
         * behavior more in line with other OSes, we do a shorter inquiry,
         * covering all the SCSI-2 information, first, and then request more
         * data iff the "additional length" field indicates there is more.
         * - mycroft, 2003/10/16
         */
        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = INQUIRY;
        cmd.length = SCSIPI_INQUIRY_LENGTH_SCSI2;
        error = scsipi_command(periph, (void *)&cmd, sizeof(cmd),
            (void *)inqbuf, SCSIPI_INQUIRY_LENGTH_SCSI2, retries,
            10000, NULL, flags | XS_CTL_DATA_IN);
        if (!error &&
            inqbuf->additional_length > SCSIPI_INQUIRY_LENGTH_SCSI2 - 4) {
            if (scsipi_inquiry3_ok(inqbuf)) {
#if 0
printf("inquire: addlen=%d, retrying\n", inqbuf->additional_length);
#endif
                cmd.length = SCSIPI_INQUIRY_LENGTH_SCSI3;
                error = scsipi_command(periph, (void *)&cmd, sizeof(cmd),
                    (void *)inqbuf, SCSIPI_INQUIRY_LENGTH_SCSI3, retries,
                    10000, NULL, flags | XS_CTL_DATA_IN);
#if 0
printf("inquire: error=%d\n", error);
#endif
            }
        }

#ifdef SCSI_OLD_NOINQUIRY
        /*
         * Kludge for the Adaptec ACB-4000 SCSI->MFM translator.
         * This board doesn't support the INQUIRY command at all.
         */
        if (error == EINVAL || error == EACCES) {
                /*
                 * Conjure up an INQUIRY response.
                 */
                inqbuf->device = (error == EINVAL ?
                         SID_QUAL_LU_PRESENT :
                         SID_QUAL_LU_NOTPRESENT) | T_DIRECT;
                inqbuf->dev_qual2 = 0;
                inqbuf->version = 0;
                inqbuf->response_format = SID_FORMAT_SCSI1;
                inqbuf->additional_length = SCSIPI_INQUIRY_LENGTH_SCSI2 - 4;
                inqbuf->flags1 = inqbuf->flags2 = inqbuf->flags3 = 0;
                memcpy(inqbuf->vendor, "ADAPTEC ACB-4000            ", 28);
                error = 0;
        }

        /*
         * Kludge for the Emulex MT-02 SCSI->QIC translator.
         * This board gives an empty response to an INQUIRY command.
         */
        else if (error == 0 &&
            inqbuf->device == (SID_QUAL_LU_PRESENT | T_DIRECT) &&
            inqbuf->dev_qual2 == 0 &&
            inqbuf->version == 0 &&
            inqbuf->response_format == SID_FORMAT_SCSI1) {
                /*
                 * Fill out the INQUIRY response.
                 */
                inqbuf->device = (SID_QUAL_LU_PRESENT | T_SEQUENTIAL);
                inqbuf->dev_qual2 = SID_REMOVABLE;
                inqbuf->additional_length = SCSIPI_INQUIRY_LENGTH_SCSI2 - 4;
                inqbuf->flags1 = inqbuf->flags2 = inqbuf->flags3 = 0;
                memcpy(inqbuf->vendor, "EMULEX  MT-02 QIC           ", 28);
        }
#endif /* SCSI_OLD_NOINQUIRY */

        return error;
}

/*
 * scsipi_prevent:
 *
 *        Prevent or allow the user to remove the media
 */
int
scsipi_prevent(struct scsipi_periph *periph, int type, int flags)
{
        struct scsi_prevent_allow_medium_removal cmd;

        if (periph->periph_quirks & PQUIRK_NODOORLOCK)
                return 0;

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = SCSI_PREVENT_ALLOW_MEDIUM_REMOVAL;
        cmd.how = type;

        return (scsipi_command(periph, (void *)&cmd, sizeof(cmd), 0, 0,
            SCSIPIRETRIES, 5000, NULL, flags));
}

/*
 * scsipi_start:
 *
 *        Send a START UNIT.
 */
int
scsipi_start(struct scsipi_periph *periph, int type, int flags)
{
        struct scsipi_start_stop cmd;

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = START_STOP;
        cmd.byte2 = 0x00;
        cmd.how = type;

        return scsipi_command(periph, (void *)&cmd, sizeof(cmd), 0, 0,
            SCSIPIRETRIES, (type & SSS_START) ? 60000 : 10000, NULL, flags);
}

/*
 * scsipi_mode_sense, scsipi_mode_sense_big:
 *        get a sense page from a device
 */

int
scsipi_mode_sense(struct scsipi_periph *periph, int byte2, int page,
    struct scsi_mode_parameter_header_6 *data, int len, int flags, int retries,
    int timeout)
{
        struct scsi_mode_sense_6 cmd;

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = SCSI_MODE_SENSE_6;
        cmd.byte2 = byte2;
        cmd.page = page;
        cmd.length = len & 0xff;

        return scsipi_command(periph, (void *)&cmd, sizeof(cmd),
            (void *)data, len, retries, timeout, NULL, flags | XS_CTL_DATA_IN);
}

int
scsipi_mode_sense_big(struct scsipi_periph *periph, int byte2, int page,
    struct scsi_mode_parameter_header_10 *data, int len, int flags, int retries,
    int timeout)
{
        struct scsi_mode_sense_10 cmd;

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = SCSI_MODE_SENSE_10;
        cmd.byte2 = byte2;
        cmd.page = page;
        _lto2b(len, cmd.length);

        return scsipi_command(periph, (void *)&cmd, sizeof(cmd),
            (void *)data, len, retries, timeout, NULL, flags | XS_CTL_DATA_IN);
}

int
scsipi_mode_select(struct scsipi_periph *periph, int byte2,
    struct scsi_mode_parameter_header_6 *data, int len, int flags, int retries,
    int timeout)
{
        struct scsi_mode_select_6 cmd;

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = SCSI_MODE_SELECT_6;
        cmd.byte2 = byte2;
        cmd.length = len & 0xff;

        return scsipi_command(periph, (void *)&cmd, sizeof(cmd),
            (void *)data, len, retries, timeout, NULL, flags | XS_CTL_DATA_OUT);
}

int
scsipi_mode_select_big(struct scsipi_periph *periph, int byte2,
    struct scsi_mode_parameter_header_10 *data, int len, int flags, int retries,
    int timeout)
{
        struct scsi_mode_select_10 cmd;

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = SCSI_MODE_SELECT_10;
        cmd.byte2 = byte2;
        _lto2b(len, cmd.length);

        return scsipi_command(periph, (void *)&cmd, sizeof(cmd),
            (void *)data, len, retries, timeout, NULL, flags | XS_CTL_DATA_OUT);
}

/*
 * scsipi_get_opcodeinfo:
 *
 * query the device for supported commands and their timeout
 * building a timeout lookup table if timeout information is available.
 */
void
scsipi_get_opcodeinfo(struct scsipi_periph *periph)
{
        u_int8_t *data;
        int len = 16*1024;
        int rc;
        struct scsi_repsuppopcode cmd;
        
        /* refrain from asking for supported opcodes */
        if (periph->periph_quirks & PQUIRK_NOREPSUPPOPC ||
            periph->periph_type == T_PROCESSOR || /* spec. */
            periph->periph_type == T_CDROM) /* spec. */
                return;

        scsipi_free_opcodeinfo(periph);

        /*
         * query REPORT SUPPORTED OPERATION CODES
         * if OK
         *   enumerate all codes
         *     if timeout exists insert maximum into opcode table
         */

        data = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = SCSI_MAINTENANCE_IN;
        cmd.svcaction = RSOC_REPORT_SUPPORTED_OPCODES;
        cmd.repoption = RSOC_RCTD|RSOC_ALL;
        _lto4b(len, cmd.alloclen);
        
        rc = scsipi_command(periph, (void *)&cmd, sizeof(cmd),
                            (void *)data, len, 0, 1000, NULL,
                            XS_CTL_DATA_IN|XS_CTL_SILENT);

        if (rc == 0) {
                int count;
                int dlen = _4btol(data);
                u_int8_t *c = data + 4;
                
                SC_DEBUG(periph, SCSIPI_DB3,
                         ("supported opcode timeout-values loaded\n"));
                SC_DEBUG(periph, SCSIPI_DB3,
                         ("CMD  LEN  SA    spec  nom. time  cmd timeout\n"));

                struct scsipi_opcodes *tot = malloc(sizeof(struct scsipi_opcodes),
                    M_DEVBUF, M_WAITOK|M_ZERO);

                count = 0;
                while (tot != NULL &&
                       dlen >= (int)sizeof(struct scsi_repsupopcode_all_commands_descriptor)) {
                        struct scsi_repsupopcode_all_commands_descriptor *acd
                                = (struct scsi_repsupopcode_all_commands_descriptor *)c;
#ifdef SCSIPI_DEBUG
                        int cdblen = _2btol((const u_int8_t *)&acd->cdblen);
#endif
                        dlen -= sizeof(struct scsi_repsupopcode_all_commands_descriptor);
                        c += sizeof(struct scsi_repsupopcode_all_commands_descriptor);
                        SC_DEBUG(periph, SCSIPI_DB3,
                                 ("0x%02x(%2d) ", acd->opcode, cdblen));
                        
                        tot->opcode_info[acd->opcode].ti_flags = SCSIPI_TI_VALID;
                        
                        if (acd->flags & RSOC_ACD_SERVACTV) {
                                SC_DEBUGN(periph, SCSIPI_DB3,
                                         ("0x%02x%02x ",
                                          acd->serviceaction[0],
                                          acd->serviceaction[1]));
                        } else {
                                SC_DEBUGN(periph, SCSIPI_DB3, ("       "));
                        }
                        
                        if (acd->flags & RSOC_ACD_CTDP
                            && dlen >= (int)sizeof(struct scsi_repsupopcode_timeouts_descriptor)) {
                                struct scsi_repsupopcode_timeouts_descriptor *td
                                        = (struct scsi_repsupopcode_timeouts_descriptor *)c;
                                long nomto = _4btol(td->nom_process_timeout);
                                long cmdto = _4btol(td->cmd_process_timeout);
                                long t = (cmdto > nomto) ? cmdto : nomto;

                                dlen -= sizeof(struct scsi_repsupopcode_timeouts_descriptor);
                                c += sizeof(struct scsi_repsupopcode_timeouts_descriptor);

                                SC_DEBUGN(periph, SCSIPI_DB3,
                                          ("0x%02x %10ld %10ld",
                                           td->cmd_specific,
                                           nomto, cmdto));

                                if (t > tot->opcode_info[acd->opcode].ti_timeout) {
                                        tot->opcode_info[acd->opcode].ti_timeout = t;
                                        ++count;
                                }
                        }
                        SC_DEBUGN(periph, SCSIPI_DB3,("\n"));
                }

                if (count > 0) {
                        periph->periph_opcs = tot;
                } else {
                        free(tot, M_DEVBUF);
                        SC_DEBUG(periph, SCSIPI_DB3,
                                 ("no usable timeout values available\n"));
                }
        } else {
                SC_DEBUG(periph, SCSIPI_DB3,
                         ("SCSI_MAINTENANCE_IN"
                          "[RSOC_REPORT_SUPPORTED_OPCODES] failed error=%d"
                          " - no device provided timeout "
                          "values available\n", rc));
        }

        free(data, M_DEVBUF);
}

/*
 * scsipi_update_timeouts:
 *         Override timeout value if device/config provided
 *      timeouts are available.
 */
static void
scsipi_update_timeouts(struct scsipi_xfer *xs)
{
        struct scsipi_opcodes *opcs;
        u_int8_t cmd;
        int timeout;
        struct scsipi_opinfo *oi;
        
        if (xs->timeout <= 0) {
                return;        
        }
        
        opcs = xs->xs_periph->periph_opcs;
        
        if (opcs == NULL) {
                return;
        }
        
        cmd = xs->cmd->opcode;
        oi = &opcs->opcode_info[cmd];
        
        timeout = 1000 * (int)oi->ti_timeout;


        if (timeout > xs->timeout && timeout < 86400000) {
                /*
                 * pick up device configured timeouts if they
                 * are longer than the requested ones but less
                 * than a day
                 */
#ifdef SCSIPI_DEBUG
                if ((oi->ti_flags & SCSIPI_TI_LOGGED) == 0) {
                        SC_DEBUG(xs->xs_periph, SCSIPI_DB3,
                                 ("Overriding command 0x%02x "
                                  "timeout of %d with %d ms\n",
                                  cmd, xs->timeout, timeout));
                        oi->ti_flags |= SCSIPI_TI_LOGGED;
                }
#endif
                xs->timeout = timeout;
        }
}

/*
 * scsipi_free_opcodeinfo:
 *
 * free the opcode information table
 */
void
scsipi_free_opcodeinfo(struct scsipi_periph *periph)
{
        if (periph->periph_opcs != NULL) {
                free(periph->periph_opcs, M_DEVBUF);
        }

        periph->periph_opcs = NULL;
}

/*
 * scsipi_done:
 *
 *        This routine is called by an adapter's interrupt handler when
 *        an xfer is completed.
 */
void
scsipi_done(struct scsipi_xfer *xs)
{
        struct scsipi_periph *periph = xs->xs_periph;
        struct scsipi_channel *chan = periph->periph_channel;
        int freezecnt;

        SC_DEBUG(periph, SCSIPI_DB2, ("scsipi_done\n"));
#ifdef SCSIPI_DEBUG
        if (periph->periph_dbflags & SCSIPI_DB1)
                show_scsipi_cmd(xs);
#endif

        mutex_enter(chan_mtx(chan));
        SDT_PROBE1(scsi, base, xfer, done,  xs);
        /*
         * The resource this command was using is now free.
         */
        if (xs->xs_status & XS_STS_DONE) {
                /* XXX in certain circumstances, such as a device
                 * being detached, a xs that has already been
                 * scsipi_done()'d by the main thread will be done'd
                 * again by scsibusdetach(). Putting the xs on the
                 * chan_complete queue causes list corruption and
                 * everyone dies. This prevents that, but perhaps
                 * there should be better coordination somewhere such
                 * that this won't ever happen (and can be turned into
                 * a KASSERT().
                 */
                SDT_PROBE1(scsi, base, xfer, redone,  xs);
                mutex_exit(chan_mtx(chan));
                goto out;
        }
        scsipi_put_resource(chan);
        xs->xs_periph->periph_sent--;

        /*
         * If the command was tagged, free the tag.
         */
        if (XS_CTL_TAGTYPE(xs) != 0)
                scsipi_put_tag(xs);
        else
                periph->periph_flags &= ~PERIPH_UNTAG;

        /* Mark the command as `done'. */
        xs->xs_status |= XS_STS_DONE;

#ifdef DIAGNOSTIC
        if ((xs->xs_control & (XS_CTL_ASYNC|XS_CTL_POLL)) ==
            (XS_CTL_ASYNC|XS_CTL_POLL))
                panic("scsipi_done: ASYNC and POLL");
#endif

        /*
         * If the xfer had an error of any sort, freeze the
         * periph's queue.  Freeze it again if we were requested
         * to do so in the xfer.
         */
        freezecnt = 0;
        if (xs->error != XS_NOERROR)
                freezecnt++;
        if (xs->xs_control & XS_CTL_FREEZE_PERIPH)
                freezecnt++;
        if (freezecnt != 0)
                scsipi_periph_freeze_locked(periph, freezecnt);

        /*
         * record the xfer with a pending sense, in case a SCSI reset is
         * received before the thread is waked up.
         */
        if (xs->error == XS_BUSY && xs->status == SCSI_CHECK) {
                periph->periph_flags |= PERIPH_SENSE;
                periph->periph_xscheck = xs;
        }

        /*
         * If this was an xfer that was not to complete asynchronously,
         * let the requesting thread perform error checking/handling
         * in its context.
         */
        if ((xs->xs_control & XS_CTL_ASYNC) == 0) {
                /*
                 * If it's a polling job, just return, to unwind the
                 * call graph.  We don't need to restart the queue,
                 * because polling jobs are treated specially, and
                 * are really only used during crash dumps anyway
                 * (XXX or during boot-time autoconfiguration of
                 * ATAPI devices).
                 */
                if (xs->xs_control & XS_CTL_POLL) {
                        mutex_exit(chan_mtx(chan));
                        return;
                }
                cv_broadcast(xs_cv(xs));
                mutex_exit(chan_mtx(chan));
                goto out;
        }

        /*
         * Catch the extremely common case of I/O completing
         * without error; no use in taking a context switch
         * if we can handle it in interrupt context.
         */
        if (xs->error == XS_NOERROR) {
                mutex_exit(chan_mtx(chan));
                (void) scsipi_complete(xs);
                goto out;
        }

        /*
         * There is an error on this xfer.  Put it on the channel's
         * completion queue, and wake up the completion thread.
         */
        TAILQ_INSERT_TAIL(&chan->chan_complete, xs, channel_q);
        cv_broadcast(chan_cv_complete(chan));
        mutex_exit(chan_mtx(chan));

 out:
        /*
         * If there are more xfers on the channel's queue, attempt to
         * run them.
         */
        scsipi_run_queue(chan);
}

/*
 * scsipi_complete:
 *
 *        Completion of a scsipi_xfer.  This is the guts of scsipi_done().
 *
 *        NOTE: This routine MUST be called with valid thread context
 *        except for the case where the following two conditions are
 *        true:
 *
 *                xs->error == XS_NOERROR
 *                XS_CTL_ASYNC is set in xs->xs_control
 *
 *        The semantics of this routine can be tricky, so here is an
 *        explanation:
 *
 *                0                Xfer completed successfully.
 *
 *                ERESTART        Xfer had an error, but was restarted.
 *
 *                anything else        Xfer had an error, return value is Unix
 *                                errno.
 *
 *        If the return value is anything but ERESTART:
 *
 *                - If XS_CTL_ASYNC is set, `xs' has been freed back to
 *                  the pool.
 *                - If there is a buf associated with the xfer,
 *                  it has been biodone()'d.
 */
static int
scsipi_complete(struct scsipi_xfer *xs)
{
        struct scsipi_periph *periph = xs->xs_periph;
        struct scsipi_channel *chan = periph->periph_channel;
        int error;

        SDT_PROBE1(scsi, base, xfer, complete,  xs);

#ifdef DIAGNOSTIC
        if ((xs->xs_control & XS_CTL_ASYNC) != 0 && xs->bp == NULL)
                panic("scsipi_complete: XS_CTL_ASYNC but no buf");
#endif
        /*
         * If command terminated with a CHECK CONDITION, we need to issue a
         * REQUEST_SENSE command. Once the REQUEST_SENSE has been processed
         * we'll have the real status.
         * Must be processed with channel lock held to avoid missing
         * a SCSI bus reset for this command.
         */
        mutex_enter(chan_mtx(chan));
        if (xs->error == XS_BUSY && xs->status == SCSI_CHECK) {
                /* request sense for a request sense ? */
                if (xs->xs_control & XS_CTL_REQSENSE) {
                        scsipi_printaddr(periph);
                        printf("request sense for a request sense ?\n");
                        /* XXX maybe we should reset the device ? */
                        /* we've been frozen because xs->error != XS_NOERROR */
                        scsipi_periph_thaw_locked(periph, 1);
                        mutex_exit(chan_mtx(chan));
                        if (xs->resid < xs->datalen) {
                                printf("we read %d bytes of sense anyway:\n",
                                    xs->datalen - xs->resid);
                                scsipi_print_sense_data((void *)xs->data, 0);
                        }
                        return EINVAL;
                }
                mutex_exit(chan_mtx(chan)); // XXX allows other commands to queue or run
                scsipi_request_sense(xs);
        } else
                mutex_exit(chan_mtx(chan));

        /*
         * If it's a user level request, bypass all usual completion
         * processing, let the user work it out..
         */
        if ((xs->xs_control & XS_CTL_USERCMD) != 0) {
                SC_DEBUG(periph, SCSIPI_DB3, ("calling user done()\n"));
                mutex_enter(chan_mtx(chan));
                if (xs->error != XS_NOERROR)
                        scsipi_periph_thaw_locked(periph, 1);
                mutex_exit(chan_mtx(chan));
                scsipi_user_done(xs);
                SC_DEBUG(periph, SCSIPI_DB3, ("returned from user done()\n "));
                return 0;
        }

        switch (xs->error) {
        case XS_NOERROR:
                error = 0;
                break;

        case XS_SENSE:
        case XS_SHORTSENSE:
                error = (*chan->chan_bustype->bustype_interpret_sense)(xs);
                break;

        case XS_RESOURCE_SHORTAGE:
                /*
                 * XXX Should freeze channel's queue.
                 */
                scsipi_printaddr(periph);
                printf("adapter resource shortage\n");
                /* FALLTHROUGH */

        case XS_BUSY:
                if (xs->error == XS_BUSY && xs->status == SCSI_QUEUE_FULL) {
                        struct scsipi_max_openings mo;

                        /*
                         * We set the openings to active - 1, assuming that
                         * the command that got us here is the first one that
                         * can't fit into the device's queue.  If that's not
                         * the case, I guess we'll find out soon enough.
                         */
                        mo.mo_target = periph->periph_target;
                        mo.mo_lun = periph->periph_lun;
                        if (periph->periph_active < periph->periph_openings)
                                mo.mo_openings = periph->periph_active - 1;
                        else
                                mo.mo_openings = periph->periph_openings - 1;
#ifdef DIAGNOSTIC
                        if (mo.mo_openings < 0) {
                                scsipi_printaddr(periph);
                                printf("QUEUE FULL resulted in < 0 openings\n");
                                panic("scsipi_done");
                        }
#endif
                        if (mo.mo_openings == 0) {
                                scsipi_printaddr(periph);
                                printf("QUEUE FULL resulted in 0 openings\n");
                                mo.mo_openings = 1;
                        }
                        scsipi_async_event(chan, ASYNC_EVENT_MAX_OPENINGS, &mo);
                        error = ERESTART;
                } else if (xs->xs_retries != 0) {
                        xs->xs_retries--;
                        /*
                         * Wait one second, and try again.
                         */
                        mutex_enter(chan_mtx(chan));
                        if ((xs->xs_control & XS_CTL_POLL) ||
                            (chan->chan_flags & SCSIPI_CHAN_TACTIVE) == 0) {
                                /* XXX: quite extreme */
                                kpause("xsbusy", false, hz, chan_mtx(chan));
                        } else if (!callout_pending(&periph->periph_callout)) {
                                scsipi_periph_freeze_locked(periph, 1);
                                callout_reset(&periph->periph_callout,
                                    hz, scsipi_periph_timed_thaw, periph);
                        }
                        mutex_exit(chan_mtx(chan));
                        error = ERESTART;
                } else
                        error = EBUSY;
                break;

        case XS_REQUEUE:
                error = ERESTART;
                break;

        case XS_SELTIMEOUT:
        case XS_TIMEOUT:
                /*
                 * If the device hasn't gone away, honor retry counts.
                 *
                 * Note that if we're in the middle of probing it,
                 * it won't be found because it isn't here yet so
                 * we won't honor the retry count in that case.
                 */
                if (scsipi_lookup_periph(chan, periph->periph_target,
                    periph->periph_lun) && xs->xs_retries != 0) {
                        xs->xs_retries--;
                        error = ERESTART;
                } else
                        error = EIO;
                break;

        case XS_RESET:
                if (xs->xs_control & XS_CTL_REQSENSE) {
                        /*
                         * request sense interrupted by reset: signal it
                         * with EINTR return code.
                         */
                        error = EINTR;
                } else {
                        if (xs->xs_retries != 0) {
                                xs->xs_retries--;
                                error = ERESTART;
                        } else
                                error = EIO;
                }
                break;

        case XS_DRIVER_STUFFUP:
                scsipi_printaddr(periph);
                printf("generic HBA error\n");
                error = EIO;
                break;
        default:
                scsipi_printaddr(periph);
                printf("invalid return code from adapter: %d\n", xs->error);
                error = EIO;
                break;
        }

        mutex_enter(chan_mtx(chan));
        if (error == ERESTART) {
                SDT_PROBE1(scsi, base, xfer, restart,  xs);
                /*
                 * If we get here, the periph has been thawed and frozen
                 * again if we had to issue recovery commands.  Alternatively,
                 * it may have been frozen again and in a timed thaw.  In
                 * any case, we thaw the periph once we re-enqueue the
                 * command.  Once the periph is fully thawed, it will begin
                 * operation again.
                 */
                xs->error = XS_NOERROR;
                xs->status = SCSI_OK;
                xs->xs_status &= ~XS_STS_DONE;
                xs->xs_requeuecnt++;
                error = scsipi_enqueue(xs);
                if (error == 0) {
                        scsipi_periph_thaw_locked(periph, 1);
                        mutex_exit(chan_mtx(chan));
                        return ERESTART;
                }
        }

        /*
         * scsipi_done() freezes the queue if not XS_NOERROR.
         * Thaw it here.
         */
        if (xs->error != XS_NOERROR)
                scsipi_periph_thaw_locked(periph, 1);
        mutex_exit(chan_mtx(chan));

        if (periph->periph_switch->psw_done)
                periph->periph_switch->psw_done(xs, error);

        mutex_enter(chan_mtx(chan));
        if (xs->xs_control & XS_CTL_ASYNC)
                scsipi_put_xs(xs);
        mutex_exit(chan_mtx(chan));

        return error;
}

/*
 * Issue a request sense for the given scsipi_xfer. Called when the xfer
 * returns with a CHECK_CONDITION status. Must be called in valid thread
 * context.
 */

static void
scsipi_request_sense(struct scsipi_xfer *xs)
{
        struct scsipi_periph *periph = xs->xs_periph;
        int flags, error;
        struct scsi_request_sense cmd;

        periph->periph_flags |= PERIPH_SENSE;

        /* if command was polling, request sense will too */
        flags = xs->xs_control & XS_CTL_POLL;
        /* Polling commands can't sleep */
        if (flags)
                flags |= XS_CTL_NOSLEEP;

        flags |= XS_CTL_REQSENSE | XS_CTL_URGENT | XS_CTL_DATA_IN |
            XS_CTL_THAW_PERIPH | XS_CTL_FREEZE_PERIPH;

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = SCSI_REQUEST_SENSE;
        cmd.length = sizeof(struct scsi_sense_data);

        error = scsipi_command(periph, (void *)&cmd, sizeof(cmd),
            (void *)&xs->sense.scsi_sense, sizeof(struct scsi_sense_data),
            0, 1000, NULL, flags);
        periph->periph_flags &= ~PERIPH_SENSE;
        periph->periph_xscheck = NULL;
        switch (error) {
        case 0:
                /* we have a valid sense */
                xs->error = XS_SENSE;
                return;
        case EINTR:
                /* REQUEST_SENSE interrupted by bus reset. */
                xs->error = XS_RESET;
                return;
        case EIO:
                 /* request sense couldn't be performed */
                /*
                 * XXX this isn't quite right but we don't have anything
                 * better for now
                 */
                xs->error = XS_DRIVER_STUFFUP;
                return;
        default:
                 /* Notify that request sense failed. */
                xs->error = XS_DRIVER_STUFFUP;
                scsipi_printaddr(periph);
                printf("request sense failed with error %d\n", error);
                return;
        }
}

/*
 * scsipi_enqueue:
 *
 *        Enqueue an xfer on a channel.
 */
static int
scsipi_enqueue(struct scsipi_xfer *xs)
{
        struct scsipi_channel *chan = xs->xs_periph->periph_channel;
        struct scsipi_xfer *qxs;

        SDT_PROBE1(scsi, base, xfer, enqueue,  xs);

        /*
         * If the xfer is to be polled, and there are already jobs on
         * the queue, we can't proceed.
         */
        KASSERT(mutex_owned(chan_mtx(chan)));
        if ((xs->xs_control & XS_CTL_POLL) != 0 &&
            TAILQ_FIRST(&chan->chan_queue) != NULL) {
                xs->error = XS_DRIVER_STUFFUP;
                return EAGAIN;
        }

        /*
         * If we have an URGENT xfer, it's an error recovery command
         * and it should just go on the head of the channel's queue.
         */
        if (xs->xs_control & XS_CTL_URGENT) {
                TAILQ_INSERT_HEAD(&chan->chan_queue, xs, channel_q);
                goto out;
        }

        /*
         * If this xfer has already been on the queue before, we
         * need to reinsert it in the correct order.  That order is:
         *
         *        Immediately before the first xfer for this periph
         *        with a requeuecnt less than xs->xs_requeuecnt.
         *
         * Failing that, at the end of the queue.  (We'll end up
         * there naturally.)
         */
        if (xs->xs_requeuecnt != 0) {
                for (qxs = TAILQ_FIRST(&chan->chan_queue); qxs != NULL;
                     qxs = TAILQ_NEXT(qxs, channel_q)) {
                        if (qxs->xs_periph == xs->xs_periph &&
                            qxs->xs_requeuecnt < xs->xs_requeuecnt)
                                break;
                }
                if (qxs != NULL) {
                        TAILQ_INSERT_AFTER(&chan->chan_queue, qxs, xs,
                            channel_q);
                        goto out;
                }
        }
        TAILQ_INSERT_TAIL(&chan->chan_queue, xs, channel_q);
 out:
        if (xs->xs_control & XS_CTL_THAW_PERIPH)
                scsipi_periph_thaw_locked(xs->xs_periph, 1);
        return 0;
}

/*
 * scsipi_run_queue:
 *
 *        Start as many xfers as possible running on the channel.
 */
static void
scsipi_run_queue(struct scsipi_channel *chan)
{
        struct scsipi_xfer *xs;
        struct scsipi_periph *periph;

        SDT_PROBE1(scsi, base, queue, batch__start,  chan);
        for (;;) {
                mutex_enter(chan_mtx(chan));

                /*
                 * If the channel is frozen, we can't do any work right
                 * now.
                 */
                if (chan->chan_qfreeze != 0) {
                        mutex_exit(chan_mtx(chan));
                        break;
                }

                /*
                 * Look for work to do, and make sure we can do it.
                 */
                for (xs = TAILQ_FIRST(&chan->chan_queue); xs != NULL;
                     xs = TAILQ_NEXT(xs, channel_q)) {
                        periph = xs->xs_periph;

                        if ((periph->periph_sent >= periph->periph_openings) ||
                            periph->periph_qfreeze != 0 ||
                            (periph->periph_flags & PERIPH_UNTAG) != 0)
                                continue;

                        if ((periph->periph_flags &
                            (PERIPH_RECOVERING | PERIPH_SENSE)) != 0 &&
                            (xs->xs_control & XS_CTL_URGENT) == 0)
                                continue;

                        /*
                         * We can issue this xfer!
                         */
                        goto got_one;
                }

                /*
                 * Can't find any work to do right now.
                 */
                mutex_exit(chan_mtx(chan));
                break;

 got_one:
                /*
                 * Have an xfer to run.  Allocate a resource from
                 * the adapter to run it.  If we can't allocate that
                 * resource, we don't dequeue the xfer.
                 */
                if (scsipi_get_resource(chan) == 0) {
                        /*
                         * Adapter is out of resources.  If the adapter
                         * supports it, attempt to grow them.
                         */
                        if (scsipi_grow_resources(chan) == 0) {
                                /*
                                 * Wasn't able to grow resources,
                                 * nothing more we can do.
                                 */
                                if (xs->xs_control & XS_CTL_POLL) {
                                        scsipi_printaddr(xs->xs_periph);
                                        printf("polling command but no "
                                            "adapter resources");
                                        /* We'll panic shortly... */
                                }
                                mutex_exit(chan_mtx(chan));

                                /*
                                 * XXX: We should be able to note that
                                 * XXX: that resources are needed here!
                                 */
                                break;
                        }
                        /*
                         * scsipi_grow_resources() allocated the resource
                         * for us.
                         */
                }

                /*
                 * We have a resource to run this xfer, do it!
                 */
                TAILQ_REMOVE(&chan->chan_queue, xs, channel_q);

                /*
                 * If the command is to be tagged, allocate a tag ID
                 * for it.
                 */
                if (XS_CTL_TAGTYPE(xs) != 0)
                        scsipi_get_tag(xs);
                else
                        periph->periph_flags |= PERIPH_UNTAG;
                periph->periph_sent++;
                mutex_exit(chan_mtx(chan));

                SDT_PROBE2(scsi, base, queue, run,  chan, xs);
                scsipi_adapter_request(chan, ADAPTER_REQ_RUN_XFER, xs);
        }
        SDT_PROBE1(scsi, base, queue, batch__done,  chan);
}

/*
 * scsipi_execute_xs:
 *
 *        Begin execution of an xfer, waiting for it to complete, if necessary.
 */
int
scsipi_execute_xs(struct scsipi_xfer *xs)
{
        struct scsipi_periph *periph = xs->xs_periph;
        struct scsipi_channel *chan = periph->periph_channel;
        int oasync, async, poll, error;

        KASSERT(!cold);

        scsipi_update_timeouts(xs);
        
        (chan->chan_bustype->bustype_cmd)(xs);

        xs->xs_status &= ~XS_STS_DONE;
        xs->error = XS_NOERROR;
        xs->resid = xs->datalen;
        xs->status = SCSI_OK;
        SDT_PROBE1(scsi, base, xfer, execute,  xs);

#ifdef SCSIPI_DEBUG
        if (xs->xs_periph->periph_dbflags & SCSIPI_DB3) {
                printf("scsipi_execute_xs: ");
                show_scsipi_xs(xs);
                printf("\n");
        }
#endif

        /*
         * Deal with command tagging:
         *
         *        - If the device's current operating mode doesn't
         *          include tagged queueing, clear the tag mask.
         *
         *        - If the device's current operating mode *does*
         *          include tagged queueing, set the tag_type in
         *          the xfer to the appropriate byte for the tag
         *          message.
         */
        if ((PERIPH_XFER_MODE(periph) & PERIPH_CAP_TQING) == 0 ||
                (xs->xs_control & XS_CTL_REQSENSE)) {
                xs->xs_control &= ~XS_CTL_TAGMASK;
                xs->xs_tag_type = 0;
        } else {
                /*
                 * If the request doesn't specify a tag, give Head
                 * tags to URGENT operations and Simple tags to
                 * everything else.
                 */
                if (XS_CTL_TAGTYPE(xs) == 0) {
                        if (xs->xs_control & XS_CTL_URGENT)
                                xs->xs_control |= XS_CTL_HEAD_TAG;
                        else
                                xs->xs_control |= XS_CTL_SIMPLE_TAG;
                }

                switch (XS_CTL_TAGTYPE(xs)) {
                case XS_CTL_ORDERED_TAG:
                        xs->xs_tag_type = MSG_ORDERED_Q_TAG;
                        break;

                case XS_CTL_SIMPLE_TAG:
                        xs->xs_tag_type = MSG_SIMPLE_Q_TAG;
                        break;

                case XS_CTL_HEAD_TAG:
                        xs->xs_tag_type = MSG_HEAD_OF_Q_TAG;
                        break;

                default:
                        scsipi_printaddr(periph);
                        printf("invalid tag mask 0x%08x\n",
                            XS_CTL_TAGTYPE(xs));
                        panic("scsipi_execute_xs");
                }
        }

        /* If the adapter wants us to poll, poll. */
        if (chan->chan_adapter->adapt_flags & SCSIPI_ADAPT_POLL_ONLY)
                xs->xs_control |= XS_CTL_POLL;

        /*
         * If we don't yet have a completion thread, or we are to poll for
         * completion, clear the ASYNC flag.
         */
        oasync =  (xs->xs_control & XS_CTL_ASYNC);
        if (chan->chan_thread == NULL || (xs->xs_control & XS_CTL_POLL) != 0)
                xs->xs_control &= ~XS_CTL_ASYNC;

        async = (xs->xs_control & XS_CTL_ASYNC);
        poll = (xs->xs_control & XS_CTL_POLL);

#ifdef DIAGNOSTIC
        if (oasync != 0 && xs->bp == NULL)
                panic("scsipi_execute_xs: XS_CTL_ASYNC but no buf");
#endif

        /*
         * Enqueue the transfer.  If we're not polling for completion, this
         * should ALWAYS return `no error'.
         */
        error = scsipi_enqueue(xs);
        if (error) {
                if (poll == 0) {
                        scsipi_printaddr(periph);
                        printf("not polling, but enqueue failed with %d\n",
                            error);
                        panic("scsipi_execute_xs");
                }

                scsipi_printaddr(periph);
                printf("should have flushed queue?\n");
                goto free_xs;
        }

        mutex_exit(chan_mtx(chan));
 restarted:
        scsipi_run_queue(chan);
        mutex_enter(chan_mtx(chan));

        /*
         * The xfer is enqueued, and possibly running.  If it's to be
         * completed asynchronously, just return now.
         */
        if (async)
                return 0;

        /*
         * Not an asynchronous command; wait for it to complete.
         */
        while ((xs->xs_status & XS_STS_DONE) == 0) {
                if (poll) {
                        scsipi_printaddr(periph);
                        printf("polling command not done\n");
                        panic("scsipi_execute_xs");
                }
                cv_wait(xs_cv(xs), chan_mtx(chan));
        }

        /*
         * Command is complete.  scsipi_done() has awakened us to perform
         * the error handling.
         */
        mutex_exit(chan_mtx(chan));
        error = scsipi_complete(xs);
        if (error == ERESTART)
                goto restarted;

        /*
         * If it was meant to run async and we cleared async ourselves,
         * don't return an error here. It has already been handled
         */
        if (oasync)
                error = 0;
        /*
         * Command completed successfully or fatal error occurred.  Fall
         * into....
         */
        mutex_enter(chan_mtx(chan));
 free_xs:
        scsipi_put_xs(xs);
        mutex_exit(chan_mtx(chan));

        /*
         * Kick the queue, keep it running in case it stopped for some
         * reason.
         */
        scsipi_run_queue(chan);

        mutex_enter(chan_mtx(chan));
        return error;
}

/*
 * scsipi_completion_thread:
 *
 *        This is the completion thread.  We wait for errors on
 *        asynchronous xfers, and perform the error handling
 *        function, restarting the command, if necessary.
 */
static void
scsipi_completion_thread(void *arg)
{
        struct scsipi_channel *chan = arg;
        struct scsipi_xfer *xs;

        if (chan->chan_init_cb)
                (*chan->chan_init_cb)(chan, chan->chan_init_cb_arg);

        mutex_enter(chan_mtx(chan));
        chan->chan_flags |= SCSIPI_CHAN_TACTIVE;
        for (;;) {
                xs = TAILQ_FIRST(&chan->chan_complete);
                if (xs == NULL && chan->chan_tflags == 0) {
                        /* nothing to do; wait */
                        cv_wait(chan_cv_complete(chan), chan_mtx(chan));
                        continue;
                }
                if (chan->chan_tflags & SCSIPI_CHANT_CALLBACK) {
                        /* call chan_callback from thread context */
                        chan->chan_tflags &= ~SCSIPI_CHANT_CALLBACK;
                        chan->chan_callback(chan, chan->chan_callback_arg);
                        continue;
                }
                if (chan->chan_tflags & SCSIPI_CHANT_GROWRES) {
                        /* attempt to get more openings for this channel */
                        chan->chan_tflags &= ~SCSIPI_CHANT_GROWRES;
                        mutex_exit(chan_mtx(chan));
                        scsipi_adapter_request(chan,
                            ADAPTER_REQ_GROW_RESOURCES, NULL);
                        scsipi_channel_thaw(chan, 1);
                        if (chan->chan_tflags & SCSIPI_CHANT_GROWRES)
                                kpause("scsizzz", FALSE, hz/10, NULL);
                        mutex_enter(chan_mtx(chan));
                        continue;
                }
                if (chan->chan_tflags & SCSIPI_CHANT_KICK) {
                        /* explicitly run the queues for this channel */
                        chan->chan_tflags &= ~SCSIPI_CHANT_KICK;
                        mutex_exit(chan_mtx(chan));
                        scsipi_run_queue(chan);
                        mutex_enter(chan_mtx(chan));
                        continue;
                }
                if (chan->chan_tflags & SCSIPI_CHANT_SHUTDOWN) {
                        break;
                }
                if (xs) {
                        TAILQ_REMOVE(&chan->chan_complete, xs, channel_q);
                        mutex_exit(chan_mtx(chan));

                        /*
                         * Have an xfer with an error; process it.
                         */
                        (void) scsipi_complete(xs);

                        /*
                         * Kick the queue; keep it running if it was stopped
                         * for some reason.
                         */
                        scsipi_run_queue(chan);
                        mutex_enter(chan_mtx(chan));
                }
        }

        chan->chan_thread = NULL;

        /* In case parent is waiting for us to exit. */
        cv_broadcast(chan_cv_thread(chan));
        mutex_exit(chan_mtx(chan));

        kthread_exit(0);
}
/*
 * scsipi_thread_call_callback:
 *
 *         request to call a callback from the completion thread
 */
int
scsipi_thread_call_callback(struct scsipi_channel *chan,
    void (*callback)(struct scsipi_channel *, void *), void *arg)
{

        mutex_enter(chan_mtx(chan));
        if ((chan->chan_flags & SCSIPI_CHAN_TACTIVE) == 0) {
                /* kernel thread doesn't exist yet */
                mutex_exit(chan_mtx(chan));
                return ESRCH;
        }
        if (chan->chan_tflags & SCSIPI_CHANT_CALLBACK) {
                mutex_exit(chan_mtx(chan));
                return EBUSY;
        }
        scsipi_channel_freeze(chan, 1);
        chan->chan_callback = callback;
        chan->chan_callback_arg = arg;
        chan->chan_tflags |= SCSIPI_CHANT_CALLBACK;
        cv_broadcast(chan_cv_complete(chan));
        mutex_exit(chan_mtx(chan));
        return 0;
}

/*
 * scsipi_async_event:
 *
 *        Handle an asynchronous event from an adapter.
 */
void
scsipi_async_event(struct scsipi_channel *chan, scsipi_async_event_t event,
    void *arg)
{
        bool lock = chan_running(chan) > 0;

        if (lock)
                mutex_enter(chan_mtx(chan));
        switch (event) {
        case ASYNC_EVENT_MAX_OPENINGS:
                scsipi_async_event_max_openings(chan,
                    (struct scsipi_max_openings *)arg);
                break;

        case ASYNC_EVENT_XFER_MODE:
                if (chan->chan_bustype->bustype_async_event_xfer_mode) {
                        chan->chan_bustype->bustype_async_event_xfer_mode(
                            chan, arg);
                }
                break;
        case ASYNC_EVENT_RESET:
                scsipi_async_event_channel_reset(chan);
                break;
        }
        if (lock)
                mutex_exit(chan_mtx(chan));
}

/*
 * scsipi_async_event_max_openings:
 *
 *        Update the maximum number of outstanding commands a
 *        device may have.
 */
static void
scsipi_async_event_max_openings(struct scsipi_channel *chan,
    struct scsipi_max_openings *mo)
{
        struct scsipi_periph *periph;
        int minlun, maxlun;

        if (mo->mo_lun == -1) {
                /*
                 * Wildcarded; apply it to all LUNs.
                 */
                minlun = 0;
                maxlun = chan->chan_nluns - 1;
        } else
                minlun = maxlun = mo->mo_lun;

        /* XXX This could really suck with a large LUN space. */
        for (; minlun <= maxlun; minlun++) {
                periph = scsipi_lookup_periph_locked(chan, mo->mo_target, minlun);
                if (periph == NULL)
                        continue;

                if (mo->mo_openings < periph->periph_openings)
                        periph->periph_openings = mo->mo_openings;
                else if (mo->mo_openings > periph->periph_openings &&
                    (periph->periph_flags & PERIPH_GROW_OPENINGS) != 0)
                        periph->periph_openings = mo->mo_openings;
        }
}

/*
 * scsipi_set_xfer_mode:
 *
 *        Set the xfer mode for the specified I_T Nexus.
 */
void
scsipi_set_xfer_mode(struct scsipi_channel *chan, int target, int immed)
{
        struct scsipi_xfer_mode xm;
        struct scsipi_periph *itperiph;
        int lun;

        /*
         * Go to the minimal xfer mode.
         */
        xm.xm_target = target;
        xm.xm_mode = 0;
        xm.xm_period = 0;                        /* ignored */
        xm.xm_offset = 0;                        /* ignored */

        /*
         * Find the first LUN we know about on this I_T Nexus.
         */
        for (itperiph = NULL, lun = 0; lun < chan->chan_nluns; lun++) {
                itperiph = scsipi_lookup_periph(chan, target, lun);
                if (itperiph != NULL)
                        break;
        }
        if (itperiph != NULL) {
                xm.xm_mode = itperiph->periph_cap;
                /*
                 * Now issue the request to the adapter.
                 */
                scsipi_adapter_request(chan, ADAPTER_REQ_SET_XFER_MODE, &xm);
                /*
                 * If we want this to happen immediately, issue a dummy
                 * command, since most adapters can't really negotiate unless
                 * they're executing a job.
                 */
                if (immed != 0) {
                        (void) scsipi_test_unit_ready(itperiph,
                            XS_CTL_DISCOVERY | XS_CTL_IGNORE_ILLEGAL_REQUEST |
                            XS_CTL_IGNORE_NOT_READY |
                            XS_CTL_IGNORE_MEDIA_CHANGE);
                }
        }
}

/*
 * scsipi_channel_reset:
 *
 *        handle scsi bus reset
 * called with channel lock held
 */
static void
scsipi_async_event_channel_reset(struct scsipi_channel *chan)
{
        struct scsipi_xfer *xs, *xs_next;
        struct scsipi_periph *periph;
        int target, lun;

        /*
         * Channel has been reset. Also mark as reset pending REQUEST_SENSE
         * commands; as the sense is not available any more.
         * can't call scsipi_done() from here, as the command has not been
         * sent to the adapter yet (this would corrupt accounting).
         */

        for (xs = TAILQ_FIRST(&chan->chan_queue); xs != NULL; xs = xs_next) {
                xs_next = TAILQ_NEXT(xs, channel_q);
                if (xs->xs_control & XS_CTL_REQSENSE) {
                        TAILQ_REMOVE(&chan->chan_queue, xs, channel_q);
                        xs->error = XS_RESET;
                        if ((xs->xs_control & XS_CTL_ASYNC) != 0)
                                TAILQ_INSERT_TAIL(&chan->chan_complete, xs,
                                    channel_q);
                }
        }
        cv_broadcast(chan_cv_complete(chan));
        /* Catch xs with pending sense which may not have a REQSENSE xs yet */
        for (target = 0; target < chan->chan_ntargets; target++) {
                if (target == chan->chan_id)
                        continue;
                for (lun = 0; lun <  chan->chan_nluns; lun++) {
                        periph = scsipi_lookup_periph_locked(chan, target, lun);
                        if (periph) {
                                xs = periph->periph_xscheck;
                                if (xs)
                                        xs->error = XS_RESET;
                        }
                }
        }
}

/*
 * scsipi_target_detach:
 *
 *        detach all periph associated with a I_T
 *         must be called from valid thread context
 */
int
scsipi_target_detach(struct scsipi_channel *chan, int target, int lun,
    int flags)
{
        struct scsipi_periph *periph;
        device_t tdev;
        int ctarget, mintarget, maxtarget;
        int clun, minlun, maxlun;
        int error = 0;

        if (target == -1) {
                mintarget = 0;
                maxtarget = chan->chan_ntargets;
        } else {
                if (target == chan->chan_id)
                        return EINVAL;
                if (target < 0 || target >= chan->chan_ntargets)
                        return EINVAL;
                mintarget = target;
                maxtarget = target + 1;
        }

        if (lun == -1) {
                minlun = 0;
                maxlun = chan->chan_nluns;
        } else {
                if (lun < 0 || lun >= chan->chan_nluns)
                        return EINVAL;
                minlun = lun;
                maxlun = lun + 1;
        }

        /* for config_detach */
        KERNEL_LOCK(1, curlwp);

        mutex_enter(chan_mtx(chan));
        for (ctarget = mintarget; ctarget < maxtarget; ctarget++) {
                if (ctarget == chan->chan_id)
                        continue;

                for (clun = minlun; clun < maxlun; clun++) {
                        periph = scsipi_lookup_periph_locked(chan, ctarget, clun);
                        if (periph == NULL)
                                continue;
                        tdev = periph->periph_dev;
                        mutex_exit(chan_mtx(chan));
                        error = config_detach(tdev, flags);
                        if (error)
                                goto out;
                        mutex_enter(chan_mtx(chan));
                        KASSERT(scsipi_lookup_periph_locked(chan, ctarget, clun) == NULL);
                }
        }
        mutex_exit(chan_mtx(chan));

out:
        KERNEL_UNLOCK_ONE(curlwp);

        return error;
}

/*
 * scsipi_adapter_addref:
 *
 *        Add a reference to the adapter pointed to by the provided
 *        link, enabling the adapter if necessary.
 */
int
scsipi_adapter_addref(struct scsipi_adapter *adapt)
{
        int error = 0;

        if (atomic_inc_uint_nv(&adapt->adapt_refcnt) == 1
            && adapt->adapt_enable != NULL) {
                scsipi_adapter_lock(adapt);
                error = scsipi_adapter_enable(adapt, 1);
                scsipi_adapter_unlock(adapt);
                if (error)
                        atomic_dec_uint(&adapt->adapt_refcnt);
        }
        return error;
}

/*
 * scsipi_adapter_delref:
 *
 *        Delete a reference to the adapter pointed to by the provided
 *        link, disabling the adapter if possible.
 */
void
scsipi_adapter_delref(struct scsipi_adapter *adapt)
{

        membar_release();
        if (atomic_dec_uint_nv(&adapt->adapt_refcnt) == 0
            && adapt->adapt_enable != NULL) {
                membar_acquire();
                scsipi_adapter_lock(adapt);
                (void) scsipi_adapter_enable(adapt, 0);
                scsipi_adapter_unlock(adapt);
        }
}

static struct scsipi_syncparam {
        int        ss_factor;
        int        ss_period;        /* ns * 100 */
} scsipi_syncparams[] = {
        { 0x08,                 625 },        /* FAST-160 (Ultra320) */
        { 0x09,                1250 },        /* FAST-80 (Ultra160) */
        { 0x0a,                2500 },        /* FAST-40 40MHz (Ultra2) */
        { 0x0b,                3030 },        /* FAST-40 33MHz (Ultra2) */
        { 0x0c,                5000 },        /* FAST-20 (Ultra) */
};
static const int scsipi_nsyncparams =
    sizeof(scsipi_syncparams) / sizeof(scsipi_syncparams[0]);

int
scsipi_sync_period_to_factor(int period /* ns * 100 */)
{
        int i;

        for (i = 0; i < scsipi_nsyncparams; i++) {
                if (period <= scsipi_syncparams[i].ss_period)
                        return scsipi_syncparams[i].ss_factor;
        }

        return (period / 100) / 4;
}

int
scsipi_sync_factor_to_period(int factor)
{
        int i;

        for (i = 0; i < scsipi_nsyncparams; i++) {
                if (factor == scsipi_syncparams[i].ss_factor)
                        return scsipi_syncparams[i].ss_period;
        }

        return (factor * 4) * 100;
}

int
scsipi_sync_factor_to_freq(int factor)
{
        int i;

        for (i = 0; i < scsipi_nsyncparams; i++) {
                if (factor == scsipi_syncparams[i].ss_factor)
                        return 100000000 / scsipi_syncparams[i].ss_period;
        }

        return 10000000 / ((factor * 4) * 10);
}

static inline void
scsipi_adapter_lock(struct scsipi_adapter *adapt)
{

        if ((adapt->adapt_flags & SCSIPI_ADAPT_MPSAFE) == 0)
                KERNEL_LOCK(1, NULL);
}

static inline void
scsipi_adapter_unlock(struct scsipi_adapter *adapt)
{

        if ((adapt->adapt_flags & SCSIPI_ADAPT_MPSAFE) == 0)
                KERNEL_UNLOCK_ONE(NULL);
}

void
scsipi_adapter_minphys(struct scsipi_channel *chan, struct buf *bp)
{
        struct scsipi_adapter *adapt = chan->chan_adapter;

        scsipi_adapter_lock(adapt);
        (adapt->adapt_minphys)(bp);
        scsipi_adapter_unlock(chan->chan_adapter);
}

void
scsipi_adapter_request(struct scsipi_channel *chan, 
        scsipi_adapter_req_t req, void *arg)

{
        struct scsipi_adapter *adapt = chan->chan_adapter;

        scsipi_adapter_lock(adapt);
        SDT_PROBE3(scsi, base, adapter, request__start,  chan, req, arg);
        (adapt->adapt_request)(chan, req, arg);
        SDT_PROBE3(scsi, base, adapter, request__done,  chan, req, arg);
        scsipi_adapter_unlock(adapt);
}

int
scsipi_adapter_ioctl(struct scsipi_channel *chan, u_long cmd,
        void *data, int flag, struct proc *p)
{
        struct scsipi_adapter *adapt = chan->chan_adapter;
        int error;

        if (adapt->adapt_ioctl == NULL)
                return ENOTTY;

        scsipi_adapter_lock(adapt);
        error = (adapt->adapt_ioctl)(chan, cmd, data, flag, p);
        scsipi_adapter_unlock(adapt);
        return error;
}

int
scsipi_adapter_enable(struct scsipi_adapter *adapt, int enable)
{
        int error;

        scsipi_adapter_lock(adapt);
        error = (adapt->adapt_enable)(adapt->adapt_dev, enable);
        scsipi_adapter_unlock(adapt);
        return error;
}

#ifdef SCSIPI_DEBUG
/*
 * Given a scsipi_xfer, dump the request, in all its glory
 */
void
show_scsipi_xs(struct scsipi_xfer *xs)
{

        printf("xs(%p): ", xs);
        printf("xs_control(0x%08x)", xs->xs_control);
        printf("xs_status(0x%08x)", xs->xs_status);
        printf("periph(%p)", xs->xs_periph);
        printf("retr(0x%x)", xs->xs_retries);
        printf("timo(0x%x)", xs->timeout);
        printf("cmd(%p)", xs->cmd);
        printf("len(0x%x)", xs->cmdlen);
        printf("data(%p)", xs->data);
        printf("len(0x%x)", xs->datalen);
        printf("res(0x%x)", xs->resid);
        printf("err(0x%x)", xs->error);
        printf("bp(%p)", xs->bp);
        show_scsipi_cmd(xs);
}

void
show_scsipi_cmd(struct scsipi_xfer *xs)
{
        u_char *b = (u_char *) xs->cmd;
        int i = 0;

        scsipi_printaddr(xs->xs_periph);
        printf(" command: ");

        if ((xs->xs_control & XS_CTL_RESET) == 0) {
                while (i < xs->cmdlen) {
                        if (i)
                                printf(",");
                        printf("0x%x", b[i++]);
                }
                printf("-[%d bytes]\n", xs->datalen);
                if (xs->datalen)
                        show_mem(xs->data, uimin(64, xs->datalen));
        } else
                printf("-RESET-\n");
}

void
show_mem(u_char *address, int num)
{
        int x;

        printf("------------------------------");
        for (x = 0; x < num; x++) {
                if ((x % 16) == 0)
                        printf("\n%03d: ", x);
                printf("%02x ", *address++);
        }
        printf("\n------------------------------\n");
}
#endif /* SCSIPI_DEBUG */













































































  442 























    1 


    1 
    1 














































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
/*        $NetBSD: scsi_base.c,v 1.93 2019/05/03 16:06:56 mlelstv Exp $        */

/*-
 * Copyright (c) 1998, 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: scsi_base.c,v 1.93 2019/05/03 16:06:56 mlelstv Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/buf.h>
#include <sys/uio.h>
#include <sys/malloc.h>
#include <sys/errno.h>
#include <sys/device.h>
#include <sys/proc.h>

#include <dev/scsipi/scsipi_all.h>
#include <dev/scsipi/scsi_all.h>
#include <dev/scsipi/scsi_disk.h>
#include <dev/scsipi/scsiconf.h>
#include <dev/scsipi/scsipi_base.h>

static void scsi_print_xfer_mode(struct scsipi_periph *);
/*
 * Do a scsi operation, asking a device to run as SCSI-II if it can.
 */
int
scsi_change_def(struct scsipi_periph *periph, int flags)
{
        struct scsi_changedef cmd;

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = SCSI_CHANGE_DEFINITION;
        cmd.how = SC_SCSI_2;

        return (scsipi_command(periph, (void *)&cmd, sizeof(cmd), 0, 0,
            SCSIPIRETRIES, 100000, NULL, flags));
}

/*
 * ask the scsi driver to perform a command for us.
 * tell it where to read/write the data, and how
 * long the data is supposed to be. If we have  a buf
 * to associate with the transfer, we need that too.
 */
void
scsi_scsipi_cmd(struct scsipi_xfer *xs)
{
        struct scsipi_periph *periph = xs->xs_periph;

        SC_DEBUG(periph, SCSIPI_DB2, ("scsi_scsipi_cmd\n"));

        /*
         * Set the LUN in the CDB if we have an older device.  We also
         * set it for more modern SCSI-2 devices "just in case".
         */
        if (periph->periph_version <= 2)
                xs->cmd->bytes[0] |=
                    ((periph->periph_lun << SCSI_CMD_LUN_SHIFT) &
                        SCSI_CMD_LUN_MASK);
}

/*
 * Utility routines often used in SCSI stuff
 */

/*
 * Print out the periph's address info.
 */
void
scsi_print_addr(struct scsipi_periph *periph)
{
        struct scsipi_channel *chan = periph->periph_channel;
        struct scsipi_adapter *adapt = chan->chan_adapter;

        printf("%s(%s:%d:%d:%d): ", periph->periph_dev != NULL ?
            device_xname(periph->periph_dev) : "probe",
            device_xname(adapt->adapt_dev),
            chan->chan_channel, periph->periph_target,
            periph->periph_lun);
}

/*
 * Kill off all pending xfers for a periph.
 *
 * Must be called with channel lock held
 */
void
scsi_kill_pending(struct scsipi_periph *periph)
{
        struct scsipi_xfer *xs;

        TAILQ_FOREACH(xs, &periph->periph_xferq, device_q) {
                callout_stop(&xs->xs_callout);
                scsi_print_addr(periph);
                printf("killed ");
                scsipi_print_cdb(xs->cmd);
                xs->error = XS_DRIVER_STUFFUP;
                scsipi_done(xs);
        }
}

/*
 * scsi_print_xfer_mode:
 *
 *        Print a parallel SCSI periph's capabilities.
 */
static void
scsi_print_xfer_mode(struct scsipi_periph *periph)
{
        struct scsipi_channel *chan = periph->periph_channel;
        struct scsipi_adapter *adapt = chan->chan_adapter;
        int period, freq, speed, mbs;

        if (periph->periph_dev)
                aprint_normal_dev(periph->periph_dev, "");
        else
                aprint_normal("probe(%s:%d:%d:%d): ",
                        device_xname(adapt->adapt_dev),
                        chan->chan_channel, periph->periph_target,
                        periph->periph_lun);
        if (periph->periph_mode & (PERIPH_CAP_SYNC | PERIPH_CAP_DT)) {
                period = scsipi_sync_factor_to_period(periph->periph_period);
                aprint_normal("sync (%d.%02dns offset %d)",
                    period / 100, period % 100, periph->periph_offset);
        } else
                aprint_normal("async");

        if (periph->periph_mode & PERIPH_CAP_WIDE32)
                aprint_normal(", 32-bit");
        else if (periph->periph_mode & (PERIPH_CAP_WIDE16 | PERIPH_CAP_DT))
                aprint_normal(", 16-bit");
        else
                aprint_normal(", 8-bit");

        if (periph->periph_mode & (PERIPH_CAP_SYNC | PERIPH_CAP_DT)) {
                freq = scsipi_sync_factor_to_freq(periph->periph_period);
                speed = freq;
                if (periph->periph_mode & PERIPH_CAP_WIDE32)
                        speed *= 4;
                else if (periph->periph_mode &
                    (PERIPH_CAP_WIDE16 | PERIPH_CAP_DT))
                        speed *= 2;
                mbs = speed / 1000;
                if (mbs > 0) {
                        aprint_normal(" (%d.%03dMB/s)", mbs,
                            speed % 1000);
                } else
                        aprint_normal(" (%dKB/s)", speed % 1000);
        }

        aprint_normal(" transfers");

        if (periph->periph_mode & PERIPH_CAP_TQING)
                aprint_normal(", tagged queueing");

        aprint_normal("\n");
}

/*
 * scsi_async_event_xfer_mode:
 *
 *        Update the xfer mode for all parallel SCSI periphs sharing the
 *        specified I_T Nexus.
 */
void
scsi_async_event_xfer_mode(struct scsipi_channel *chan, void *arg)
{
        struct scsipi_xfer_mode *xm = arg;
        struct scsipi_periph *periph;
        int lun, announce, mode, period, offset;

        for (lun = 0; lun < chan->chan_nluns; lun++) {
                periph = scsipi_lookup_periph_locked(chan, xm->xm_target, lun);
                if (periph == NULL)
                        continue;
                announce = 0;

                /*
                 * Clamp the xfer mode down to this periph's capabilities.
                 */
                mode = xm->xm_mode & periph->periph_cap;
                if (mode & PERIPH_CAP_SYNC) {
                        period = xm->xm_period;
                        offset = xm->xm_offset;
                } else {
                        period = 0;
                        offset = 0;
                }

                /*
                 * If we do not have a valid xfer mode yet, or the parameters
                 * are different, announce them.
                 */
                if ((periph->periph_flags & PERIPH_MODE_VALID) == 0 ||
                    periph->periph_mode != mode ||
                    periph->periph_period != period ||
                    periph->periph_offset != offset)
                        announce = 1;

                periph->periph_mode = mode;
                periph->periph_period = period;
                periph->periph_offset = offset;
                periph->periph_flags |= PERIPH_MODE_VALID;

                if (announce)
                        scsi_print_xfer_mode(periph);
        }
}

/*
 * scsipi_async_event_xfer_mode:
 *
 *        Update the xfer mode for all SAS/FC periphs sharing the
 *        specified I_T Nexus.
 */
void
scsi_fc_sas_async_event_xfer_mode(struct scsipi_channel *chan, void *arg)
{
        struct scsipi_xfer_mode *xm = arg;
        struct scsipi_periph *periph;
        int lun, announce, mode;

        for (lun = 0; lun < chan->chan_nluns; lun++) {
                periph = scsipi_lookup_periph_locked(chan, xm->xm_target, lun);
                if (periph == NULL)
                        continue;
                announce = 0;

                /*
                 * Clamp the xfer mode down to this periph's capabilities.
                 */
                mode = xm->xm_mode & periph->periph_cap;
                /*
                 * If we do not have a valid xfer mode yet, or the parameters
                 * are different, announce them.
                 */
                if ((periph->periph_flags & PERIPH_MODE_VALID) == 0 ||
                    periph->periph_mode != mode)
                        announce = 1;

                periph->periph_mode = mode;
                periph->periph_flags |= PERIPH_MODE_VALID;

                if (announce &&
                    (periph->periph_mode & PERIPH_CAP_TQING) != 0) {
                        aprint_normal_dev(periph->periph_dev,
                            "tagged queueing\n");
                }
        }
}
















































































































































































































































































    6 


    6 




    6 





























































































    6 













    6 


    6 















    6 





    6 















    6 



















    6 




    6 



    6 












    6 













    6 
    6 
















    6 

    6 















    6 
    6 




    6 



    4 



    6 
    6 




    6 















    6 






    6 



    6 








    6 





































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
/* $NetBSD: sha2.c,v 1.25 2021/10/28 15:08:05 christos Exp $ */
/*        $KAME: sha2.c,v 1.9 2003/07/20 00:28:38 itojun Exp $        */

/*
 * sha2.c
 *
 * Version 1.0.0beta1
 *
 * Written by Aaron D. Gifford <me@aarongifford.com>
 *
 * Copyright 2000 Aaron D. Gifford.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the copyright holder nor the names of contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) AND CONTRIBUTOR(S) ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR(S) OR CONTRIBUTOR(S) BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif

#include <sys/cdefs.h>

#if defined(_KERNEL) || defined(_STANDALONE)
__KERNEL_RCSID(0, "$NetBSD: sha2.c,v 1.25 2021/10/28 15:08:05 christos Exp $");

#include <sys/param.h>        /* XXX: to pull <machine/macros.h> for vax memset(9) */
#include <lib/libkern/libkern.h>

#else

#if defined(LIBC_SCCS) && !defined(lint)
__RCSID("$NetBSD: sha2.c,v 1.25 2021/10/28 15:08:05 christos Exp $");
#endif /* LIBC_SCCS and not lint */

#include "namespace.h"
#include <string.h>

#endif

#include <sys/types.h>
#include <sys/sha2.h>

#if HAVE_SYS_ENDIAN_H
# include <sys/endian.h>
#endif

/*** SHA-256/384/512 Various Length Definitions ***********************/
/* NOTE: Most of these are in sha2.h */
#define SHA256_SHORT_BLOCK_LENGTH        (SHA256_BLOCK_LENGTH - 8)
#define SHA384_SHORT_BLOCK_LENGTH        (SHA384_BLOCK_LENGTH - 16)
#define SHA512_SHORT_BLOCK_LENGTH        (SHA512_BLOCK_LENGTH - 16)

/*
 * Macro for incrementally adding the unsigned 64-bit integer n to the
 * unsigned 128-bit integer (represented using a two-element array of
 * 64-bit words):
 */
#define ADDINC128(w,n)        { \
        (w)[0] += (uint64_t)(n); \
        if ((w)[0] < (n)) { \
                (w)[1]++; \
        } \
}

/*** THE SIX LOGICAL FUNCTIONS ****************************************/
/*
 * Bit shifting and rotation (used by the six SHA-XYZ logical functions:
 *
 *   NOTE:  The naming of R and S appears backwards here (R is a SHIFT and
 *   S is a ROTATION) because the SHA-256/384/512 description document
 *   (see http://csrc.nist.gov/cryptval/shs/sha256-384-512.pdf) uses this
 *   same "backwards" definition.
 */
/* Shift-right (used in SHA-256, SHA-384, and SHA-512): */
#define R(b,x)                 ((x) >> (b))
/* 32-bit Rotate-right (used in SHA-256): */
#define S32(b,x)        (((x) >> (b)) | ((x) << (32 - (b))))
/* 64-bit Rotate-right (used in SHA-384 and SHA-512): */
#define S64(b,x)        (((x) >> (b)) | ((x) << (64 - (b))))

/* Two of six logical functions used in SHA-256, SHA-384, and SHA-512: */
#define Ch(x,y,z)        (((x) & (y)) ^ ((~(x)) & (z)))
#define Maj(x,y,z)        (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))

/* Four of six logical functions used in SHA-256: */
#define Sigma0_256(x)        (S32(2,  (x)) ^ S32(13, (x)) ^ S32(22, (x)))
#define Sigma1_256(x)        (S32(6,  (x)) ^ S32(11, (x)) ^ S32(25, (x)))
#define sigma0_256(x)        (S32(7,  (x)) ^ S32(18, (x)) ^ R(3 ,   (x)))
#define sigma1_256(x)        (S32(17, (x)) ^ S32(19, (x)) ^ R(10,   (x)))

/* Four of six logical functions used in SHA-384 and SHA-512: */
#define Sigma0_512(x)        (S64(28, (x)) ^ S64(34, (x)) ^ S64(39, (x)))
#define Sigma1_512(x)        (S64(14, (x)) ^ S64(18, (x)) ^ S64(41, (x)))
#define sigma0_512(x)        (S64( 1, (x)) ^ S64( 8, (x)) ^ R( 7,   (x)))
#define sigma1_512(x)        (S64(19, (x)) ^ S64(61, (x)) ^ R( 6,   (x)))

/*** INTERNAL FUNCTION PROTOTYPES *************************************/
/* NOTE: These should not be accessed directly from outside this
 * library -- they are intended for private internal visibility/use
 * only.
 */
static void SHA512_Last(SHA512_CTX *);
void SHA224_Transform(SHA224_CTX *, const uint32_t*);
void SHA256_Transform(SHA256_CTX *, const uint32_t*);
void SHA384_Transform(SHA384_CTX *, const uint64_t*);
void SHA512_Transform(SHA512_CTX *, const uint64_t*);


/*** SHA-XYZ INITIAL HASH VALUES AND CONSTANTS ************************/
/* Hash constant words K for SHA-256: */
static const uint32_t K256[64] = {
        0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
        0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
        0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL,
        0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
        0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
        0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
        0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL,
        0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
        0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL,
        0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
        0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL,
        0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
        0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL,
        0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
        0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
        0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
};

/* Initial hash value H for SHA-224: */
static const uint32_t sha224_initial_hash_value[8] = {
        0xc1059ed8UL,
        0x367cd507UL,
        0x3070dd17UL,
        0xf70e5939UL,
        0xffc00b31UL,
        0x68581511UL,
        0x64f98fa7UL,
        0xbefa4fa4UL
};

/* Initial hash value H for SHA-256: */
static const uint32_t sha256_initial_hash_value[8] = {
        0x6a09e667UL,
        0xbb67ae85UL,
        0x3c6ef372UL,
        0xa54ff53aUL,
        0x510e527fUL,
        0x9b05688cUL,
        0x1f83d9abUL,
        0x5be0cd19UL
};

/* Hash constant words K for SHA-384 and SHA-512: */
static const uint64_t K512[80] = {
        0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
        0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
        0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
        0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
        0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
        0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
        0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
        0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
        0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
        0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
        0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
        0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
        0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
        0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
        0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
        0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
        0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
        0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
        0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
        0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
        0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
        0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
        0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
        0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
        0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
        0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
        0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
        0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
        0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
        0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
        0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
        0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
        0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
        0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
        0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
        0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
        0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
        0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
        0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
        0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
};

/* Initial hash value H for SHA-384 */
static const uint64_t sha384_initial_hash_value[8] = {
        0xcbbb9d5dc1059ed8ULL,
        0x629a292a367cd507ULL,
        0x9159015a3070dd17ULL,
        0x152fecd8f70e5939ULL,
        0x67332667ffc00b31ULL,
        0x8eb44a8768581511ULL,
        0xdb0c2e0d64f98fa7ULL,
        0x47b5481dbefa4fa4ULL
};

/* Initial hash value H for SHA-512 */
static const uint64_t sha512_initial_hash_value[8] = {
        0x6a09e667f3bcc908ULL,
        0xbb67ae8584caa73bULL,
        0x3c6ef372fe94f82bULL,
        0xa54ff53a5f1d36f1ULL,
        0x510e527fade682d1ULL,
        0x9b05688c2b3e6c1fULL,
        0x1f83d9abfb41bd6bULL,
        0x5be0cd19137e2179ULL
};

#if !defined(_KERNEL) && !defined(_STANDALONE)
#if defined(__weak_alias)
__weak_alias(SHA224_Init,_SHA224_Init) 
__weak_alias(SHA224_Update,_SHA224_Update)
__weak_alias(SHA224_Final,_SHA224_Final)
__weak_alias(SHA224_Transform,_SHA224_Transform)

__weak_alias(SHA256_Init,_SHA256_Init) 
__weak_alias(SHA256_Update,_SHA256_Update)
__weak_alias(SHA256_Final,_SHA256_Final)
__weak_alias(SHA256_Transform,_SHA256_Transform)

__weak_alias(SHA384_Init,_SHA384_Init) 
__weak_alias(SHA384_Update,_SHA384_Update)
__weak_alias(SHA384_Final,_SHA384_Final)
__weak_alias(SHA384_Transform,_SHA384_Transform)

__weak_alias(SHA512_Init,_SHA512_Init) 
__weak_alias(SHA512_Update,_SHA512_Update)
__weak_alias(SHA512_Final,_SHA512_Final)
__weak_alias(SHA512_Transform,_SHA512_Transform)
#endif
#endif

/*** SHA-256: *********************************************************/
int
SHA256_Init(SHA256_CTX *context)
{
        if (context == NULL)
                return 1;

        memcpy(context->state, sha256_initial_hash_value,
            (size_t)(SHA256_DIGEST_LENGTH));
        memset(context->buffer, 0, (size_t)(SHA256_BLOCK_LENGTH));
        context->bitcount = 0;

        return 1;
}

#ifdef SHA2_UNROLL_TRANSFORM

/* Unrolled SHA-256 round macros: */

#define ROUND256_0_TO_15(a,b,c,d,e,f,g,h)        \
        W256[j] = be32dec(data);                \
        ++data;                                        \
        T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + \
             K256[j] + W256[j]; \
        (d) += T1; \
        (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \
        j++

#define ROUND256(a,b,c,d,e,f,g,h)        \
        s0 = W256[(j+1)&0x0f]; \
        s0 = sigma0_256(s0); \
        s1 = W256[(j+14)&0x0f]; \
        s1 = sigma1_256(s1); \
        T1 = (h) + Sigma1_256(e) + Ch((e), (f), (g)) + K256[j] + \
             (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0); \
        (d) += T1; \
        (h) = T1 + Sigma0_256(a) + Maj((a), (b), (c)); \
        j++

void 
SHA256_Transform(SHA256_CTX *context, const uint32_t *data)
{
        uint32_t        a, b, c, d, e, f, g, h, s0, s1;
        uint32_t        T1, *W256;
        int                j;

        W256 = (uint32_t *)context->buffer;

        /* Initialize registers with the prev. intermediate value */
        a = context->state[0];
        b = context->state[1];
        c = context->state[2];
        d = context->state[3];
        e = context->state[4];
        f = context->state[5];
        g = context->state[6];
        h = context->state[7];

        j = 0;
        do {
                /* Rounds 0 to 15 (unrolled): */
                ROUND256_0_TO_15(a,b,c,d,e,f,g,h);
                ROUND256_0_TO_15(h,a,b,c,d,e,f,g);
                ROUND256_0_TO_15(g,h,a,b,c,d,e,f);
                ROUND256_0_TO_15(f,g,h,a,b,c,d,e);
                ROUND256_0_TO_15(e,f,g,h,a,b,c,d);
                ROUND256_0_TO_15(d,e,f,g,h,a,b,c);
                ROUND256_0_TO_15(c,d,e,f,g,h,a,b);
                ROUND256_0_TO_15(b,c,d,e,f,g,h,a);
        } while (j < 16);

        /* Now for the remaining rounds to 64: */
        do {
                ROUND256(a,b,c,d,e,f,g,h);
                ROUND256(h,a,b,c,d,e,f,g);
                ROUND256(g,h,a,b,c,d,e,f);
                ROUND256(f,g,h,a,b,c,d,e);
                ROUND256(e,f,g,h,a,b,c,d);
                ROUND256(d,e,f,g,h,a,b,c);
                ROUND256(c,d,e,f,g,h,a,b);
                ROUND256(b,c,d,e,f,g,h,a);
        } while (j < 64);

        /* Compute the current intermediate hash value */
        context->state[0] += a;
        context->state[1] += b;
        context->state[2] += c;
        context->state[3] += d;
        context->state[4] += e;
        context->state[5] += f;
        context->state[6] += g;
        context->state[7] += h;

        /* Clean up */
        a = b = c = d = e = f = g = h = T1 = 0;
}

#else /* SHA2_UNROLL_TRANSFORM */

void
SHA256_Transform(SHA256_CTX *context, const uint32_t *data)
{
        uint32_t        a, b, c, d, e, f, g, h, s0, s1;
        uint32_t        T1, T2, *W256;
        int                j;

        W256 = (uint32_t *)(void *)context->buffer;

        /* Initialize registers with the prev. intermediate value */
        a = context->state[0];
        b = context->state[1];
        c = context->state[2];
        d = context->state[3];
        e = context->state[4];
        f = context->state[5];
        g = context->state[6];
        h = context->state[7];

        j = 0;
        do {
                W256[j] = be32dec(data);
                ++data;
                /* Apply the SHA-256 compression function to update a..h */
                T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + W256[j];
                T2 = Sigma0_256(a) + Maj(a, b, c);
                h = g;
                g = f;
                f = e;
                e = d + T1;
                d = c;
                c = b;
                b = a;
                a = T1 + T2;

                j++;
        } while (j < 16);

        do {
                /* Part of the message block expansion: */
                s0 = W256[(j+1)&0x0f];
                s0 = sigma0_256(s0);
                s1 = W256[(j+14)&0x0f];
                s1 = sigma1_256(s1);

                /* Apply the SHA-256 compression function to update a..h */
                T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] +
                     (W256[j&0x0f] += s1 + W256[(j+9)&0x0f] + s0);
                T2 = Sigma0_256(a) + Maj(a, b, c);
                h = g;
                g = f;
                f = e;
                e = d + T1;
                d = c;
                c = b;
                b = a;
                a = T1 + T2;

                j++;
        } while (j < 64);

        /* Compute the current intermediate hash value */
        context->state[0] += a;
        context->state[1] += b;
        context->state[2] += c;
        context->state[3] += d;
        context->state[4] += e;
        context->state[5] += f;
        context->state[6] += g;
        context->state[7] += h;

        /* Clean up */
        a = b = c = d = e = f = g = h = T1 = T2 = 0;
}

#endif /* SHA2_UNROLL_TRANSFORM */

int
SHA256_Update(SHA256_CTX *context, const uint8_t *data, size_t len)
{
        unsigned int        freespace, usedspace;

        if (len == 0) {
                /* Calling with no data is valid - we do nothing */
                return 1;
        }

        usedspace = (unsigned int)((context->bitcount >> 3) %
                                    SHA256_BLOCK_LENGTH);
        if (usedspace > 0) {
                /* Calculate how much free space is available in the buffer */
                freespace = SHA256_BLOCK_LENGTH - usedspace;

                if (len >= freespace) {
                        /* Fill the buffer completely and process it */
                        memcpy(&context->buffer[usedspace], data,
                            (size_t)(freespace));
                        context->bitcount += freespace << 3;
                        len -= freespace;
                        data += freespace;
                        SHA256_Transform(context,
                            (uint32_t *)(void *)context->buffer);
                } else {
                        /* The buffer is not yet full */
                        memcpy(&context->buffer[usedspace], data, len);
                        context->bitcount += len << 3;
                        /* Clean up: */
                        usedspace = freespace = 0;
                        return 1;
                }
        }
        /*
         * Process as many complete blocks as possible.
         *
         * Check alignment of the data pointer. If it is 32bit aligned,
         * SHA256_Transform can be called directly on the data stream,
         * otherwise enforce the alignment by copy into the buffer.
         */
        if ((uintptr_t)data % 4 == 0) {
                while (len >= SHA256_BLOCK_LENGTH) {
                        SHA256_Transform(context,
                            (const uint32_t *)(const void *)data);
                        context->bitcount += SHA256_BLOCK_LENGTH << 3;
                        len -= SHA256_BLOCK_LENGTH;
                        data += SHA256_BLOCK_LENGTH;
                }
        } else {
                while (len >= SHA256_BLOCK_LENGTH) {
                        memcpy(context->buffer, data, SHA256_BLOCK_LENGTH);
                        SHA256_Transform(context,
                            (const uint32_t *)(const void *)context->buffer);
                        context->bitcount += SHA256_BLOCK_LENGTH << 3;
                        len -= SHA256_BLOCK_LENGTH;
                        data += SHA256_BLOCK_LENGTH;
                }
        }
        if (len > 0) {
                /* There's left-overs, so save 'em */
                memcpy(context->buffer, data, len);
                context->bitcount += len << 3;
        }
        /* Clean up: */
        usedspace = freespace = 0;

        return 1;
}

static int
SHA224_256_Final(uint8_t digest[], SHA256_CTX *context, size_t len)
{
        unsigned int        usedspace;
        size_t i;

        /* If no digest buffer is passed, we don't bother doing this: */
        if (digest != NULL) {
                usedspace = (unsigned int)((context->bitcount >> 3) %
                    SHA256_BLOCK_LENGTH);
                context->bitcount = htobe64(context->bitcount);
                if (usedspace > 0) {
                        /* Begin padding with a 1 bit: */
                        context->buffer[usedspace++] = 0x80;

                        if (usedspace <= SHA256_SHORT_BLOCK_LENGTH) {
                                /* Set-up for the last transform: */
                                memset(&context->buffer[usedspace], 0,
                                    (size_t)(SHA256_SHORT_BLOCK_LENGTH -
                                    usedspace));
                        } else {
                                if (usedspace < SHA256_BLOCK_LENGTH) {
                                        memset(&context->buffer[usedspace], 0,
                                            (size_t)(SHA256_BLOCK_LENGTH -
                                            usedspace));
                                }
                                /* Do second-to-last transform: */
                                SHA256_Transform(context,
                                    (uint32_t *)(void *)context->buffer);

                                /* And set-up for the last transform: */
                                memset(context->buffer, 0,
                                    (size_t)(SHA256_SHORT_BLOCK_LENGTH));
                        }
                } else {
                        /* Set-up for the last transform: */
                        memset(context->buffer, 0,
                            (size_t)(SHA256_SHORT_BLOCK_LENGTH));

                        /* Begin padding with a 1 bit: */
                        *context->buffer = 0x80;
                }
                /* Set the bit count: */
                memcpy(&context->buffer[SHA256_SHORT_BLOCK_LENGTH],
                    &context->bitcount, sizeof(context->bitcount));

                /* Final transform: */
                SHA256_Transform(context, (uint32_t *)(void *)context->buffer);

                for (i = 0; i < len / 4; i++)
                        be32enc(digest + 4 * i, context->state[i]);
        }

        /* Clean up state data: */
        memset(context, 0, sizeof(*context));
        usedspace = 0;

        return 1;
}

int
SHA256_Final(uint8_t digest[SHA256_DIGEST_LENGTH], SHA256_CTX *context)
{
        return SHA224_256_Final(digest, context, SHA256_DIGEST_LENGTH);
}

/*** SHA-224: *********************************************************/
int 
SHA224_Init(SHA224_CTX *context)
{
        if (context == NULL)
                return 1;

        /* The state and buffer size are driven by SHA256, not by SHA224. */
        memcpy(context->state, sha224_initial_hash_value,
            (size_t)(SHA256_DIGEST_LENGTH));
        memset(context->buffer, 0, (size_t)(SHA256_BLOCK_LENGTH));
        context->bitcount = 0;

        return 1;
}

int
SHA224_Update(SHA224_CTX *context, const uint8_t *data, size_t len)
{
        return SHA256_Update((SHA256_CTX *)context, data, len);
}

void
SHA224_Transform(SHA224_CTX *context, const uint32_t *data)
{
        SHA256_Transform((SHA256_CTX *)context, data);
}

int
SHA224_Final(uint8_t digest[SHA224_DIGEST_LENGTH], SHA224_CTX *context)
{
        return SHA224_256_Final(digest, (SHA256_CTX *)context,
            SHA224_DIGEST_LENGTH);
}

/*** SHA-512: *********************************************************/
int
SHA512_Init(SHA512_CTX *context)
{
        if (context == NULL)
                return 1;

        memcpy(context->state, sha512_initial_hash_value,
            (size_t)(SHA512_DIGEST_LENGTH));
        memset(context->buffer, 0, (size_t)(SHA512_BLOCK_LENGTH));
        context->bitcount[0] = context->bitcount[1] =  0;

        return 1;
}

#ifdef SHA2_UNROLL_TRANSFORM

/* Unrolled SHA-512 round macros: */
#define ROUND512_0_TO_15(a,b,c,d,e,f,g,h)        \
        W512[j] = be64dec(data);                \
        ++data;                                        \
        T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + \
             K512[j] + W512[j]; \
        (d) += T1, \
        (h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)), \
        j++

#define ROUND512(a,b,c,d,e,f,g,h)        \
        s0 = W512[(j+1)&0x0f]; \
        s0 = sigma0_512(s0); \
        s1 = W512[(j+14)&0x0f]; \
        s1 = sigma1_512(s1); \
        T1 = (h) + Sigma1_512(e) + Ch((e), (f), (g)) + K512[j] + \
             (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0); \
        (d) += T1; \
        (h) = T1 + Sigma0_512(a) + Maj((a), (b), (c)); \
        j++

void
SHA512_Transform(SHA512_CTX *context, const uint64_t *data)
{
        uint64_t        a, b, c, d, e, f, g, h, s0, s1;
        uint64_t        T1, *W512 = (uint64_t *)context->buffer;
        int                j;

        /* Initialize registers with the prev. intermediate value */
        a = context->state[0];
        b = context->state[1];
        c = context->state[2];
        d = context->state[3];
        e = context->state[4];
        f = context->state[5];
        g = context->state[6];
        h = context->state[7];

        j = 0;
        do {
                ROUND512_0_TO_15(a,b,c,d,e,f,g,h);
                ROUND512_0_TO_15(h,a,b,c,d,e,f,g);
                ROUND512_0_TO_15(g,h,a,b,c,d,e,f);
                ROUND512_0_TO_15(f,g,h,a,b,c,d,e);
                ROUND512_0_TO_15(e,f,g,h,a,b,c,d);
                ROUND512_0_TO_15(d,e,f,g,h,a,b,c);
                ROUND512_0_TO_15(c,d,e,f,g,h,a,b);
                ROUND512_0_TO_15(b,c,d,e,f,g,h,a);
        } while (j < 16);

        /* Now for the remaining rounds up to 79: */
        do {
                ROUND512(a,b,c,d,e,f,g,h);
                ROUND512(h,a,b,c,d,e,f,g);
                ROUND512(g,h,a,b,c,d,e,f);
                ROUND512(f,g,h,a,b,c,d,e);
                ROUND512(e,f,g,h,a,b,c,d);
                ROUND512(d,e,f,g,h,a,b,c);
                ROUND512(c,d,e,f,g,h,a,b);
                ROUND512(b,c,d,e,f,g,h,a);
        } while (j < 80);

        /* Compute the current intermediate hash value */
        context->state[0] += a;
        context->state[1] += b;
        context->state[2] += c;
        context->state[3] += d;
        context->state[4] += e;
        context->state[5] += f;
        context->state[6] += g;
        context->state[7] += h;

        /* Clean up */
        a = b = c = d = e = f = g = h = T1 = 0;
}

#else /* SHA2_UNROLL_TRANSFORM */

void
SHA512_Transform(SHA512_CTX *context, const uint64_t *data)
{
        uint64_t        a, b, c, d, e, f, g, h, s0, s1;
        uint64_t        T1, T2, *W512 = (void *)context->buffer;
        int                j;

        /* Initialize registers with the prev. intermediate value */
        a = context->state[0];
        b = context->state[1];
        c = context->state[2];
        d = context->state[3];
        e = context->state[4];
        f = context->state[5];
        g = context->state[6];
        h = context->state[7];

        j = 0;
        do {
                W512[j] = be64dec(data);
                ++data;
                /* Apply the SHA-512 compression function to update a..h */
                T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + W512[j];
                T2 = Sigma0_512(a) + Maj(a, b, c);
                h = g;
                g = f;
                f = e;
                e = d + T1;
                d = c;
                c = b;
                b = a;
                a = T1 + T2;

                j++;
        } while (j < 16);

        do {
                /* Part of the message block expansion: */
                s0 = W512[(j+1)&0x0f];
                s0 = sigma0_512(s0);
                s1 = W512[(j+14)&0x0f];
                s1 =  sigma1_512(s1);

                /* Apply the SHA-512 compression function to update a..h */
                T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] +
                     (W512[j&0x0f] += s1 + W512[(j+9)&0x0f] + s0);
                T2 = Sigma0_512(a) + Maj(a, b, c);
                h = g;
                g = f;
                f = e;
                e = d + T1;
                d = c;
                c = b;
                b = a;
                a = T1 + T2;

                j++;
        } while (j < 80);

        /* Compute the current intermediate hash value */
        context->state[0] += a;
        context->state[1] += b;
        context->state[2] += c;
        context->state[3] += d;
        context->state[4] += e;
        context->state[5] += f;
        context->state[6] += g;
        context->state[7] += h;

        /* Clean up */
        a = b = c = d = e = f = g = h = T1 = T2 = 0;
}

#endif /* SHA2_UNROLL_TRANSFORM */

int
SHA512_Update(SHA512_CTX *context, const uint8_t *data, size_t len)
{
        unsigned int        freespace, usedspace;

        if (len == 0) {
                /* Calling with no data is valid - we do nothing */
                return 1;
        }

        usedspace = (unsigned int)((context->bitcount[0] >> 3) %
            SHA512_BLOCK_LENGTH);
        if (usedspace > 0) {
                /* Calculate how much free space is available in the buffer */
                freespace = SHA512_BLOCK_LENGTH - usedspace;

                if (len >= freespace) {
                        /* Fill the buffer completely and process it */
                        memcpy(&context->buffer[usedspace], data,
                            (size_t)(freespace));
                        ADDINC128(context->bitcount, freespace << 3);
                        len -= freespace;
                        data += freespace;
                        SHA512_Transform(context,
                            (uint64_t *)(void *)context->buffer);
                } else {
                        /* The buffer is not yet full */
                        memcpy(&context->buffer[usedspace], data, len);
                        ADDINC128(context->bitcount, len << 3);
                        /* Clean up: */
                        usedspace = freespace = 0;
                        return 1;
                }
        }
        /*
         * Process as many complete blocks as possible.
         *
         * Check alignment of the data pointer. If it is 64bit aligned,
         * SHA512_Transform can be called directly on the data stream,
         * otherwise enforce the alignment by copy into the buffer.
         */
        if ((uintptr_t)data % 8 == 0) {
                while (len >= SHA512_BLOCK_LENGTH) {
                        SHA512_Transform(context,
                            (const uint64_t*)(const void *)data);
                        ADDINC128(context->bitcount, SHA512_BLOCK_LENGTH << 3);
                        len -= SHA512_BLOCK_LENGTH;
                        data += SHA512_BLOCK_LENGTH;
                }
        } else {
                while (len >= SHA512_BLOCK_LENGTH) {
                        memcpy(context->buffer, data, SHA512_BLOCK_LENGTH);
                        SHA512_Transform(context,
                            (const void *)context->buffer);
                        ADDINC128(context->bitcount, SHA512_BLOCK_LENGTH << 3);
                        len -= SHA512_BLOCK_LENGTH;
                        data += SHA512_BLOCK_LENGTH;
                }
        }
        if (len > 0) {
                /* There's left-overs, so save 'em */
                memcpy(context->buffer, data, len);
                ADDINC128(context->bitcount, len << 3);
        }
        /* Clean up: */
        usedspace = freespace = 0;

        return 1;
}

static void
SHA512_Last(SHA512_CTX *context)
{
        unsigned int        usedspace;

        usedspace = (unsigned int)((context->bitcount[0] >> 3) % SHA512_BLOCK_LENGTH);
        context->bitcount[0] = htobe64(context->bitcount[0]);
        context->bitcount[1] = htobe64(context->bitcount[1]);
        if (usedspace > 0) {
                /* Begin padding with a 1 bit: */
                context->buffer[usedspace++] = 0x80;

                if (usedspace <= SHA512_SHORT_BLOCK_LENGTH) {
                        /* Set-up for the last transform: */
                        memset(&context->buffer[usedspace], 0,
                            (size_t)(SHA512_SHORT_BLOCK_LENGTH - usedspace));
                } else {
                        if (usedspace < SHA512_BLOCK_LENGTH) {
                                memset(&context->buffer[usedspace], 0,
                                    (size_t)(SHA512_BLOCK_LENGTH - usedspace));
                        }
                        /* Do second-to-last transform: */
                        SHA512_Transform(context,
                            (uint64_t *)(void *)context->buffer);

                        /* And set-up for the last transform: */
                        memset(context->buffer, 0,
                            (size_t)(SHA512_BLOCK_LENGTH - 2));
                }
        } else {
                /* Prepare for final transform: */
                memset(context->buffer, 0, (size_t)(SHA512_SHORT_BLOCK_LENGTH));

                /* Begin padding with a 1 bit: */
                *context->buffer = 0x80;
        }
        /* Store the length of input data (in bits): */
        memcpy(&context->buffer[SHA512_SHORT_BLOCK_LENGTH],
            &context->bitcount[1], sizeof(context->bitcount[1]));
        memcpy(&context->buffer[SHA512_SHORT_BLOCK_LENGTH + 8],
            &context->bitcount[0], sizeof(context->bitcount[0]));

        /* Final transform: */
        SHA512_Transform(context, (uint64_t *)(void *)context->buffer);
}

int
SHA512_Final(uint8_t digest[SHA512_DIGEST_LENGTH], SHA512_CTX *context)
{
        size_t i;

        /* If no digest buffer is passed, we don't bother doing this: */
        if (digest != NULL) {
                SHA512_Last(context);

                /* Save the hash data for output: */
                for (i = 0; i < 8; ++i)
                        be64enc(digest + 8 * i, context->state[i]);
        }

        /* Zero out state data */
        memset(context, 0, sizeof(*context));

        return 1;
}

/*** SHA-384: *********************************************************/
int
SHA384_Init(SHA384_CTX *context)
{
        if (context == NULL)
                return 1;

        memcpy(context->state, sha384_initial_hash_value,
            (size_t)(SHA512_DIGEST_LENGTH));
        memset(context->buffer, 0, (size_t)(SHA384_BLOCK_LENGTH));
        context->bitcount[0] = context->bitcount[1] = 0;

        return 1;
}

int
SHA384_Update(SHA384_CTX *context, const uint8_t *data, size_t len)
{
        return SHA512_Update((SHA512_CTX *)context, data, len);
}

void
SHA384_Transform(SHA512_CTX *context, const uint64_t *data)
{
        SHA512_Transform((SHA512_CTX *)context, data);
}

int
SHA384_Final(uint8_t digest[SHA384_DIGEST_LENGTH], SHA384_CTX *context)
{
        size_t i;

        /* If no digest buffer is passed, we don't bother doing this: */
        if (digest != NULL) {
                SHA512_Last((SHA512_CTX *)context);

                /* Save the hash data for output: */
                for (i = 0; i < 6; ++i)
                        be64enc(digest + 8 * i, context->state[i]);
        }

        /* Zero out state data */
        memset(context, 0, sizeof(*context));

        return 1;
}
































































































































































































































































































































































    2 











































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
/*        $NetBSD: umass_quirks.c,v 1.102 2020/06/19 11:52:42 flxd Exp $        */

/*
 * Copyright (c) 2001, 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by MAEKAWA Masahide (gehenna@NetBSD.org).
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: umass_quirks.c,v 1.102 2020/06/19 11:52:42 flxd Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/buf.h>

#include <dev/scsipi/scsipi_all.h> /* for scsiconf.h below */
#include <dev/scsipi/scsiconf.h> /* for quirks defines */

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usbhist.h>

#include <dev/usb/umassvar.h>
#include <dev/usb/umass_quirks.h>

Static usbd_status umass_init_insystem(struct umass_softc *);
Static usbd_status umass_init_shuttle(struct umass_softc *);

Static void umass_fixup_sony(struct umass_softc *);

/*
 * XXX
 * PLEASE NOTE that if you want quirk entries added to this table, you MUST
 * compile a kernel with USB_DEBUG, and submit a full log of the output from
 * whatever operation is "failing" with ?hcidebug=20 or higher and
 * umassdebug=0xffffff.  (It's usually helpful to also set MSGBUFSIZE to
 * something "large" unless you're using a serial console.)  Without this
 * information, the source of the problem cannot be properly analyzed, and
 * the quirk entry WILL NOT be accepted.
 * Also, when an entry is committed to this table, a concise but clear
 * description of the problem MUST accompany it.
 * - mycroft
 */
Static const struct umass_quirk umass_quirks[] = {
        { { USB_VENDOR_INSYSTEM, USB_PRODUCT_INSYSTEM_USBCABLE },
          UMASS_WPROTO_CBI, UMASS_CPROTO_ATAPI,
          0,
          0,
          UMATCH_VENDOR_PRODUCT,
          umass_init_insystem, NULL
        },

        { { USB_VENDOR_SHUTTLE, USB_PRODUCT_SHUTTLE_EUSB },
          UMASS_WPROTO_CBI_I, UMASS_CPROTO_ATAPI,
          0,
          0,
          UMATCH_VENDOR_PRODUCT,
          umass_init_shuttle, NULL
        },

        /*
         * These work around genuine device bugs -- returning the wrong info in
         * the CSW block.
         */
        { { USB_VENDOR_OLYMPUS, USB_PRODUCT_OLYMPUS_C1 },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          UMASS_QUIRK_WRONG_CSWSIG,
          0,
          UMATCH_DEVCLASS_DEVSUBCLASS_DEVPROTO,
          NULL, NULL
        },
        { { USB_VENDOR_SCANLOGIC, USB_PRODUCT_SCANLOGIC_SL11R },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          UMASS_QUIRK_WRONG_CSWTAG,
          0,
          UMATCH_DEVCLASS_DEVSUBCLASS_DEVPROTO,
          NULL, NULL
        },
        { { USB_VENDOR_SHUTTLE, USB_PRODUCT_SHUTTLE_ORCA },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          UMASS_QUIRK_WRONG_CSWTAG,
          0,
          UMATCH_DEVCLASS_DEVSUBCLASS_DEVPROTO,
          NULL, NULL
        },

        /*
         * Some Sony cameras advertise a subclass code of 0xff, so we force it
         * to the correct value iff necessary.
         */
        { { USB_VENDOR_SONY, USB_PRODUCT_SONY_DSC },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          UMASS_QUIRK_RBC_PAD_TO_12,
          0,
          UMATCH_DEVCLASS_DEVSUBCLASS_DEVPROTO,
          NULL, umass_fixup_sony
        },

        /*
         * Stupid device reports itself as SFF-8070, but actually returns a UFI
         * interrupt descriptor.  - mycroft, 2004/06/28
         */
        { { USB_VENDOR_SONY, USB_PRODUCT_SONY_CLIE_40_MS },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UFI,
          0,
          0,
          UMATCH_DEVCLASS_DEVSUBCLASS_DEVPROTO,
          NULL, NULL
        },

        /*
         * The SONY Portable GPS strage device almost hangs up when request
         * UR_BBB_GET_MAX_LUN - disable the query logic.
         */
        { { USB_VENDOR_SONY, USB_PRODUCT_SONY_GPS_CS1 },
          UMASS_WPROTO_BBB, UMASS_CPROTO_UNSPEC,
          UMASS_QUIRK_NOGETMAXLUN,
          0,
          UMATCH_DEVCLASS_DEVSUBCLASS_DEVPROTO,
          NULL, NULL
        },

        /*
         * The DiskOnKey does not reject commands it doesn't recognize in a
         * sane way -- rather than STALLing the bulk pipe, it continually NAKs
         * until we time out.  To prevent being screwed by this, for now we
         * disable 10-byte MODE SENSE the klugy way.  - mycroft, 2003/10/16
         */
        { { USB_VENDOR_MSYSTEMS, USB_PRODUCT_MSYSTEMS_DISKONKEY },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NOBIGMODESENSE,
          UMATCH_DEVCLASS_DEVSUBCLASS_DEVPROTO,
          NULL, NULL
        },
        { { USB_VENDOR_MSYSTEMS, USB_PRODUCT_MSYSTEMS_DISKONKEY2 },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NOBIGMODESENSE,
          UMATCH_DEVCLASS_DEVSUBCLASS_DEVPROTO,
          NULL, NULL
        },
        { { USB_VENDOR_MSYSTEMS, USB_PRODUCT_MSYSTEMS_DISKONKEY3 },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NOBIGMODESENSE,
          UMATCH_DEVCLASS_DEVSUBCLASS_DEVPROTO,
          NULL, NULL
        },
        /* Some Sigmatel-based devices don't like all SCSI commands */
        { { USB_VENDOR_SIGMATEL, USB_PRODUCT_SIGMATEL_MUSICSTICK },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NODOORLOCK | PQUIRK_NOSYNCCACHE,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },
        { { USB_VENDOR_SIGMATEL, USB_PRODUCT_SIGMATEL_I_BEAD100 },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NODOORLOCK | PQUIRK_NOSYNCCACHE,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },
        { { USB_VENDOR_SIGMATEL, USB_PRODUCT_SIGMATEL_I_BEAD150 },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NODOORLOCK | PQUIRK_NOSYNCCACHE,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },
        { { USB_VENDOR_PHILIPS, USB_PRODUCT_PHILIPS_SA235 },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NODOORLOCK | PQUIRK_NOSYNCCACHE,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },
        /* Creative Nomad MuVo, NetBSD PR 30389, FreeBSD PR 53094 */
        { { USB_VENDOR_CREATIVE, USB_PRODUCT_CREATIVE_NOMAD },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NODOORLOCK | PQUIRK_NOSYNCCACHE,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },

        /* iRiver iFP-[135]xx players fail on PREVENT/ALLOW, see PR 25440 */
        { { USB_VENDOR_IRIVER, USB_PRODUCT_IRIVER_IFP_1XX },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NODOORLOCK,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },
        { { USB_VENDOR_IRIVER, USB_PRODUCT_IRIVER_IFP_3XX },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NODOORLOCK,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },
        { { USB_VENDOR_IRIVER, USB_PRODUCT_IRIVER_IFP_5XX },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NODOORLOCK,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },

        /* Meizu M6 doesn't like synchronize-cache, see PR 40442 */
        { { USB_VENDOR_MEIZU, USB_PRODUCT_MEIZU_M6_SL },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NOSYNCCACHE,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },

        /*
         * SanDisk Cruzer rejects cache sync.
         */
        { { USB_VENDOR_SANDISK, USB_PRODUCT_SANDISK_CRUZER },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NOSYNCCACHE,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },

        /*
         * SanDisk Sansa Clip rejects cache sync in unconventional way.
         * However, unlike some other devices listed in this table,
         * this is does not cause the device firmware to stop responding.
         */
        { { USB_VENDOR_SANDISK, USB_PRODUCT_SANDISK_SANSA_CLIP },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NOSYNCCACHE,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },

        /* Kingston USB pendrives don't like being told to lock the door */
        { { USB_VENDOR_KINGSTON, USB_PRODUCT_KINGSTON_DT101_II },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NODOORLOCK,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },

        { { USB_VENDOR_KINGSTON, USB_PRODUCT_KINGSTON_DT101_G2 },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NODOORLOCK,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },

        { { USB_VENDOR_KINGSTON, USB_PRODUCT_KINGSTON_DT102_G2 },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NODOORLOCK,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },

        { { USB_VENDOR_KINGSTON, USB_PRODUCT_KINGSTON_DTMINI10 },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NODOORLOCK,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },

        /* Also, some Kingston pendrives have Toshiba vendor ID */
        { { USB_VENDOR_TOSHIBA, USB_PRODUCT_KINGSTON_DT100_G2 },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NODOORLOCK,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },

        /* HP USB pendrives don't like being told to lock the door */
        { { USB_VENDOR_HP, USB_PRODUCT_HP_V125W },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NODOORLOCK,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },

        { { USB_VENDOR_IODATA2, USB_PRODUCT_IODATA2_USB2SC },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          0,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },

        /*
         * Fix Alcor multi-card readers in many HP machines (like net-tops).
         * FreeBSD applies the no-sync-cache quirk for /all/ Alcor usb devices
         * as well as a no-test-unit-ready quirk. Mine works without the latter.
         */
        { { USB_VENDOR_ALCOR, USB_PRODUCT_ALCOR_AU6366 },
          UMASS_WPROTO_UNSPEC, UMASS_CPROTO_UNSPEC,
          0,
          PQUIRK_NOSYNCCACHE,
          UMATCH_VENDOR_PRODUCT,
          NULL, NULL
        },
};

const struct umass_quirk *
umass_lookup(uint16_t vendor, uint16_t product)
{
        return (const struct umass_quirk *)
                usb_lookup(umass_quirks, vendor, product);
}

Static usbd_status
umass_init_insystem(struct umass_softc *sc)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        usbd_status err;

        err = usbd_set_interface(sc->sc_iface, 1);
        if (err) {
                DPRINTFM(UDMASS_USB, "sc %#jx: could not switch to Alt "
                    "Interface 1", (uintptr_t)sc, 0, 0, 0);
                return err;
        }

        return USBD_NORMAL_COMPLETION;
}

Static usbd_status
umass_init_shuttle(struct umass_softc *sc)
{
        usb_device_request_t req;
        uint8_t status[2];

        /* The Linux driver does this */
        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = 1;
        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_ifaceno);
        USETW(req.wLength, sizeof(status));

        return usbd_do_request(sc->sc_udev, &req, &status);
}

Static void
umass_fixup_sony(struct umass_softc *sc)
{
        usb_interface_descriptor_t *id;

        id = usbd_get_interface_descriptor(sc->sc_iface);
        if (id->bInterfaceSubClass == 0xff)
                sc->sc_cmd = UMASS_CPROTO_RBC;
}





















































































































































































    1 
















































    2 





    1 

    1 




    1 




    1 










    1 





























































    1 





























































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
/*        $NetBSD: btuart.c,v 1.30 2022/06/28 13:25:36 plunky Exp $        */

/*-
 * Copyright (c) 2006, 2007 KIYOHARA Takashi
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: btuart.c,v 1.30 2022/06/28 13:25:36 plunky Exp $");

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/errno.h>
#include <sys/fcntl.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/syslimits.h>
#include <sys/systm.h>
#include <sys/tty.h>

#include <sys/bus.h>
#include <sys/intr.h>

#include <netbt/bluetooth.h>
#include <netbt/hci.h>

#include "ioconf.h"

struct btuart_softc {
        device_t        sc_dev;
        struct tty *        sc_tp;                /* tty pointer */

        bool                sc_enabled;        /* device is enabled */
        struct hci_unit *sc_unit;        /* Bluetooth HCI handle */
        struct bt_stats        sc_stats;

        int                sc_state;        /* receive state */
        int                sc_want;        /* how much we want */
        struct mbuf *        sc_rxp;                /* incoming packet */

        bool                sc_xmit;        /* transmit is active */
        struct mbuf *        sc_txp;                /* outgoing packet */

        /* transmit queues */
        MBUFQ_HEAD()        sc_cmdq;
        MBUFQ_HEAD()        sc_aclq;
        MBUFQ_HEAD()        sc_scoq;
};

/* sc_state */
#define BTUART_RECV_PKT_TYPE        0        /* packet type */
#define BTUART_RECV_ACL_HDR        1        /* acl header */
#define BTUART_RECV_SCO_HDR        2        /* sco header */
#define BTUART_RECV_EVENT_HDR        3        /* event header */
#define BTUART_RECV_ACL_DATA        4        /* acl packet data */
#define BTUART_RECV_SCO_DATA        5        /* sco packet data */
#define BTUART_RECV_EVENT_DATA        6        /* event packet data */

static int btuart_match(device_t, cfdata_t, void *);
static void btuart_attach(device_t, device_t, void *);
static int btuart_detach(device_t, int);

static int btuartopen(dev_t, struct tty *);
static int btuartclose(struct tty *, int);
static int btuartioctl(struct tty *, u_long, void *, int, struct lwp *);
static int btuartinput(int, struct tty *);
static int btuartstart(struct tty *);

static int btuart_enable(device_t);
static void btuart_disable(device_t);
static void btuart_output_cmd(device_t, struct mbuf *);
static void btuart_output_acl(device_t, struct mbuf *);
static void btuart_output_sco(device_t, struct mbuf *);
static void btuart_stats(device_t, struct bt_stats *, int);

/*
 * It doesn't need to be exported, as only btuartattach() uses it,
 * but there's no "official" way to make it static.
 */
CFATTACH_DECL_NEW(btuart, sizeof(struct btuart_softc),
    btuart_match, btuart_attach, btuart_detach, NULL);

static struct linesw btuart_disc = {
        .l_name =        "btuart",
        .l_open =        btuartopen,
        .l_close =        btuartclose,
        .l_read =        ttyerrio,
        .l_write =        ttyerrio,
        .l_ioctl =        btuartioctl,
        .l_rint =        btuartinput,
        .l_start =        btuartstart,
        .l_modem =        ttymodem,
        .l_poll =        ttyerrpoll,
};

static const struct hci_if btuart_hci = {
        .enable =        btuart_enable,
        .disable =        btuart_disable,
        .output_cmd =        btuart_output_cmd,
        .output_acl =        btuart_output_acl,
        .output_sco =        btuart_output_sco,
        .get_stats =        btuart_stats,
        .ipl =                IPL_TTY,
};

/*****************************************************************************
 *
 *        autoconf(9) functions
 */

/*
 * pseudo-device attach routine.
 */
void
btuartattach(int num __unused)
{
        int error;

        error = ttyldisc_attach(&btuart_disc);
        if (error) {
                aprint_error("%s: unable to register line discipline, "
                    "error = %d\n", btuart_cd.cd_name, error);

                return;
        }

        error = config_cfattach_attach(btuart_cd.cd_name, &btuart_ca);
        if (error) {
                aprint_error("%s: unable to register cfattach, error = %d\n",
                    btuart_cd.cd_name, error);

                config_cfdriver_detach(&btuart_cd);
                (void) ttyldisc_detach(&btuart_disc);
        }
}

/*
 * Autoconf match routine.
 */
static int
btuart_match(device_t self __unused, cfdata_t cfdata __unused,
             void *arg __unused)
{

        /* pseudo-device; always present */
        return 1;
}

/*
 * Autoconf attach routine.
 * Called by config_attach_pseudo(9) when we open the line discipline.
 */
static void
btuart_attach(device_t parent __unused, device_t self, void *aux __unused)
{
        struct btuart_softc *sc = device_private(self);

        sc->sc_dev = self;

        MBUFQ_INIT(&sc->sc_cmdq);
        MBUFQ_INIT(&sc->sc_aclq);
        MBUFQ_INIT(&sc->sc_scoq);

        /* Attach Bluetooth unit */
        sc->sc_unit = hci_attach_pcb(&btuart_hci, self, 0);
        if (sc->sc_unit == NULL)
                aprint_error_dev(self, "HCI attach failed\n");
}

/*
 * Autoconf detach routine.
 * Called when we close the line discipline.
 */
static int
btuart_detach(device_t self, int flags __unused)
{
        struct btuart_softc *sc = device_private(self);

        btuart_disable(self);

        if (sc->sc_unit) {
                hci_detach_pcb(sc->sc_unit);
                sc->sc_unit = NULL;
        }

        return 0;
}

/*****************************************************************************
 *
 *        Line discipline functions.
 */

static int
btuartopen(dev_t devno __unused, struct tty *tp)
{
        struct btuart_softc *sc;
        device_t dev;
        cfdata_t cfdata;
        struct lwp *l = curlwp;                /* XXX */
        int error, unit, s;

        error = kauth_authorize_device(l->l_cred, KAUTH_DEVICE_BLUETOOTH_BTUART,
            KAUTH_ARG(KAUTH_REQ_DEVICE_BLUETOOTH_BTUART_ADD), NULL, NULL, NULL);
        if (error)
                return (error);

        s = spltty();

        if (tp->t_linesw == &btuart_disc) {
                sc = tp->t_sc;
                if (sc != NULL) {
                        splx(s);
                        return EBUSY;
                }
        }

        cfdata = malloc(sizeof(struct cfdata), M_DEVBUF, M_WAITOK);
        for (unit = 0; unit < btuart_cd.cd_ndevs; unit++)
                if (device_lookup(&btuart_cd, unit) == NULL)
                        break;

        cfdata->cf_name = btuart_cd.cd_name;
        cfdata->cf_atname = btuart_cd.cd_name;
        cfdata->cf_unit = unit;
        cfdata->cf_fstate = FSTATE_STAR;

        dev = config_attach_pseudo(cfdata);
        if (dev == NULL) {
                free(cfdata, M_DEVBUF);
                splx(s);
                return EIO;
        }
        sc = device_private(dev);

        aprint_normal_dev(dev, "major %llu minor %llu\n",
            (unsigned long long)major(tp->t_dev),
            (unsigned long long)minor(tp->t_dev));

        sc->sc_tp = tp;
        tp->t_sc = sc;

        mutex_spin_enter(&tty_lock);
        ttyflush(tp, FREAD | FWRITE);
        mutex_spin_exit(&tty_lock);

        splx(s);

        return 0;
}

static int
btuartclose(struct tty *tp, int flag __unused)
{
        struct btuart_softc *sc = tp->t_sc;
        cfdata_t cfdata;
        int s;

        s = spltty();

        mutex_spin_enter(&tty_lock);
        ttyflush(tp, FREAD | FWRITE);
        mutex_spin_exit(&tty_lock);        /* XXX */

        ttyldisc_release(tp->t_linesw);
        tp->t_linesw = ttyldisc_default();

        if (sc != NULL) {
                tp->t_sc = NULL;
                if (sc->sc_tp == tp) {
                        cfdata = device_cfdata(sc->sc_dev);
                        config_detach(sc->sc_dev, 0);
                        free(cfdata, M_DEVBUF);
                }
        }

        splx(s);

        return 0;
}

static int
btuartioctl(struct tty *tp, u_long cmd, void *data __unused,
    int flag __unused, struct lwp *l __unused)
{
        struct btuart_softc *sc = tp->t_sc;
        int error;

        /*
         * XXX
         * This function can be called without KERNEL_LOCK when caller's
         * struct cdevsw is set D_MPSAFE. Is KERNEL_LOCK required?
         */

        if (sc == NULL || tp != sc->sc_tp)
                return EPASSTHROUGH;

        switch(cmd) {
        default:
                error = EPASSTHROUGH;
                break;
        }

        return error;
}

static int
btuartinput(int c, struct tty *tp)
{
        struct btuart_softc *sc = tp->t_sc;
        struct mbuf *m = sc->sc_rxp;
        int space = 0;

        if (!sc->sc_enabled)
                return 0;

        c &= TTY_CHARMASK;

        /* If we already started a packet, find the trailing end of it. */
        if (m) {
                while (m->m_next)
                        m = m->m_next;

                space = M_TRAILINGSPACE(m);
        }

        if (space == 0) {
                if (m == NULL) {
                        /* new packet */
                        MGETHDR(m, M_DONTWAIT, MT_DATA);
                        if (m == NULL) {
                                aprint_error_dev(sc->sc_dev, "out of memory\n");
                                sc->sc_stats.err_rx++;
                                return 0;        /* (lost sync) */
                        }

                        sc->sc_rxp = m;
                        m->m_pkthdr.len = m->m_len = 0;
                        space = MHLEN;

                        sc->sc_state = BTUART_RECV_PKT_TYPE;
                        sc->sc_want = 1;
                } else {
                        /* extend mbuf */
                        MGET(m->m_next, M_DONTWAIT, MT_DATA);
                        if (m->m_next == NULL) {
                                aprint_error_dev(sc->sc_dev, "out of memory\n");
                                sc->sc_stats.err_rx++;
                                return 0;        /* (lost sync) */
                        }

                        m = m->m_next;
                        m->m_len = 0;
                        space = MLEN;

                        if (sc->sc_want > MINCLSIZE) {
                                MCLGET(m, M_DONTWAIT);
                                if (m->m_flags & M_EXT)
                                        space = MCLBYTES;
                        }
                }
        }

        mtod(m, uint8_t *)[m->m_len++] = c;
        sc->sc_rxp->m_pkthdr.len++;
        sc->sc_stats.byte_rx++;

        sc->sc_want--;
        if (sc->sc_want > 0)
                return 0;        /* want more */

        switch (sc->sc_state) {
        case BTUART_RECV_PKT_TYPE:        /* Got packet type */

                switch (c) {
                case HCI_ACL_DATA_PKT:
                        sc->sc_state = BTUART_RECV_ACL_HDR;
                        sc->sc_want = sizeof(hci_acldata_hdr_t) - 1;
                        break;

                case HCI_SCO_DATA_PKT:
                        sc->sc_state = BTUART_RECV_SCO_HDR;
                        sc->sc_want = sizeof(hci_scodata_hdr_t) - 1;
                        break;

                case HCI_EVENT_PKT:
                        sc->sc_state = BTUART_RECV_EVENT_HDR;
                        sc->sc_want = sizeof(hci_event_hdr_t) - 1;
                        break;

                default:
                        aprint_error_dev(sc->sc_dev,
                            "Unknown packet type=%#x!\n", c);
                        sc->sc_stats.err_rx++;
                        m_freem(sc->sc_rxp);
                        sc->sc_rxp = NULL;
                        return 0;        /* (lost sync) */
                }

                break;

        /*
         * we assume (correctly of course :) that the packet headers all fit
         * into a single pkthdr mbuf
         */
        case BTUART_RECV_ACL_HDR:        /* Got ACL Header */
                sc->sc_state = BTUART_RECV_ACL_DATA;
                sc->sc_want = mtod(m, hci_acldata_hdr_t *)->length;
                sc->sc_want = le16toh(sc->sc_want);
                break;

        case BTUART_RECV_SCO_HDR:        /* Got SCO Header */
                sc->sc_state = BTUART_RECV_SCO_DATA;
                sc->sc_want =  mtod(m, hci_scodata_hdr_t *)->length;
                break;

        case BTUART_RECV_EVENT_HDR:        /* Got Event Header */
                sc->sc_state = BTUART_RECV_EVENT_DATA;
                sc->sc_want =  mtod(m, hci_event_hdr_t *)->length;
                break;

        case BTUART_RECV_ACL_DATA:        /* ACL Packet Complete */
                if (!hci_input_acl(sc->sc_unit, sc->sc_rxp))
                        sc->sc_stats.err_rx++;

                sc->sc_stats.acl_rx++;
                sc->sc_rxp = m = NULL;
                break;

        case BTUART_RECV_SCO_DATA:        /* SCO Packet Complete */
                if (!hci_input_sco(sc->sc_unit, sc->sc_rxp))
                        sc->sc_stats.err_rx++;

                sc->sc_stats.sco_rx++;
                sc->sc_rxp = m = NULL;
                break;

        case BTUART_RECV_EVENT_DATA:        /* Event Packet Complete */
                if (!hci_input_event(sc->sc_unit, sc->sc_rxp))
                        sc->sc_stats.err_rx++;

                sc->sc_stats.evt_rx++;
                sc->sc_rxp = m = NULL;
                break;

        default:
                panic("%s: invalid state %d!\n",
                    device_xname(sc->sc_dev), sc->sc_state);
        }

        return 0;
}

static int
btuartstart(struct tty *tp)
{
        struct btuart_softc *sc = tp->t_sc;
        struct mbuf *m;
        int count, rlen;
        uint8_t *rptr;

        if (!sc->sc_enabled)
                return 0;

        m = sc->sc_txp;
        if (m == NULL) {
                if (MBUFQ_FIRST(&sc->sc_cmdq)) {
                        MBUFQ_DEQUEUE(&sc->sc_cmdq, m);
                        sc->sc_stats.cmd_tx++;
                } else if (MBUFQ_FIRST(&sc->sc_scoq)) {
                        MBUFQ_DEQUEUE(&sc->sc_scoq, m);
                        sc->sc_stats.sco_tx++;
                } else if (MBUFQ_FIRST(&sc->sc_aclq)) {
                        MBUFQ_DEQUEUE(&sc->sc_aclq, m);
                        sc->sc_stats.acl_tx++;
                } else {
                        sc->sc_xmit = false;
                        return 0; /* no more to send */
                }

                sc->sc_txp = m;
                sc->sc_xmit = true;
        }

        count = 0;
        rlen = 0;
        rptr = mtod(m, uint8_t *);

        for(;;) {
                if (rlen >= m->m_len) {
                        m = m->m_next;
                        if (m == NULL) {
                                m = sc->sc_txp;
                                sc->sc_txp = NULL;

                                if (M_GETCTX(m, void *) == NULL)
                                        m_freem(m);
                                else if (!hci_complete_sco(sc->sc_unit, m))
                                        sc->sc_stats.err_tx++;

                                break;
                        }

                        rlen = 0;
                        rptr = mtod(m, uint8_t *);
                        continue;
                }

                if (putc(*rptr++, &tp->t_outq) < 0) {
                        m_adj(m, rlen);
                        break;
                }
                rlen++;
                count++;
        }

        sc->sc_stats.byte_tx += count;

        if (tp->t_outq.c_cc != 0 && tp->t_oproc != NULL)
                (*tp->t_oproc)(tp);

        return 0;
}

/*****************************************************************************
 *
 *        bluetooth(9) functions
 */

static int
btuart_enable(device_t self)
{
        struct btuart_softc *sc = device_private(self);
        int s;

        if (sc->sc_enabled)
                return 0;

        s = spltty();

        sc->sc_enabled = true;
        sc->sc_xmit = false;

        splx(s);

        return 0;
}

static void
btuart_disable(device_t self)
{
        struct btuart_softc *sc = device_private(self);
        int s;

        if (!sc->sc_enabled)
                return;

        s = spltty();

        if (sc->sc_rxp) {
                m_freem(sc->sc_rxp);
                sc->sc_rxp = NULL;
        }

        if (sc->sc_txp) {
                m_freem(sc->sc_txp);
                sc->sc_txp = NULL;
        }

        MBUFQ_DRAIN(&sc->sc_cmdq);
        MBUFQ_DRAIN(&sc->sc_aclq);
        MBUFQ_DRAIN(&sc->sc_scoq);

        sc->sc_enabled = false;

        splx(s);
}

static void
btuart_output_cmd(device_t self, struct mbuf *m)
{
        struct btuart_softc *sc = device_private(self);
        int s;

        KASSERT(sc->sc_enabled);

        M_SETCTX(m, NULL);

        s = spltty();
        MBUFQ_ENQUEUE(&sc->sc_cmdq, m);
        if (!sc->sc_xmit)
                btuartstart(sc->sc_tp);

        splx(s);
}

static void
btuart_output_acl(device_t self, struct mbuf *m)
{
        struct btuart_softc *sc = device_private(self);
        int s;

        KASSERT(sc->sc_enabled);

        M_SETCTX(m, NULL);

        s = spltty();
        MBUFQ_ENQUEUE(&sc->sc_aclq, m);
        if (!sc->sc_xmit)
                btuartstart(sc->sc_tp);

        splx(s);
}

static void
btuart_output_sco(device_t self, struct mbuf *m)
{
        struct btuart_softc *sc = device_private(self);
        int s;

        KASSERT(sc->sc_enabled);

        s = spltty();
        MBUFQ_ENQUEUE(&sc->sc_scoq, m);
        if (!sc->sc_xmit)
                btuartstart(sc->sc_tp);

        splx(s);
}

static void
btuart_stats(device_t self, struct bt_stats *dest, int flush)
{
        struct btuart_softc *sc = device_private(self);
        int s;

        s = spltty();

        memcpy(dest, &sc->sc_stats, sizeof(struct bt_stats));

        if (flush)
                memset(&sc->sc_stats, 0, sizeof(struct bt_stats));

        splx(s);
}



































































































   18 



    2 






    2 

   17 
   18 










   17 








   18 






















    7 






    5 
    6 













    5 












    2 
    3 













   13 







   11 
   11 


















    4 













































    7 


























    7 



































   16 














   14 




   13 

   14 



   11 





   11 

    3 

    8 
   11 



   10 


















    9 




    5 

    5 




    5 







    5 





    5 

    2 







    3 








    2 

    2 



    2 




    4 





    4 

    8 

    9 


    7 
    9 
    3 



    8 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
/*        $NetBSD: vfs_syscalls_43.c,v 1.68 2021/09/07 11:43:02 riastradh Exp $        */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vfs_syscalls.c        8.28 (Berkeley) 12/10/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls_43.c,v 1.68 2021/09/07 11:43:02 riastradh Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/vnode.h>
#include <sys/namei.h>
#include <sys/dirent.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/stat.h>
#include <sys/malloc.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/syslog.h>
#include <sys/unistd.h>
#include <sys/resourcevar.h>

#include <sys/mount.h>
#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>
#include <sys/vfs_syscalls.h>

#include <compat/sys/stat.h>
#include <compat/sys/mount.h>
#include <compat/sys/dirent.h>

#include <compat/common/compat_util.h>
#include <compat/common/compat_mod.h>

static struct syscall_package vfs_syscalls_43_syscalls[] = {
        { SYS_compat_43_oquota,     0, (sy_call_t *)compat_43_sys_quota },
        { SYS_compat_43_stat43,     0, (sy_call_t *)compat_43_sys_stat },
        { SYS_compat_43_lstat43,    0, (sy_call_t *)compat_43_sys_lstat },
        { SYS_compat_43_fstat43,    0, (sy_call_t *)compat_43_sys_fstat },
        { SYS_compat_43_otruncate,  0, (sy_call_t *)compat_43_sys_ftruncate },
        { SYS_compat_43_oftruncate, 0, (sy_call_t *)compat_43_sys_ftruncate },
        { SYS_compat_43_olseek,     0, (sy_call_t *)compat_43_sys_lseek },
        { SYS_compat_43_ocreat,     0, (sy_call_t *)compat_43_sys_creat },
        { SYS_compat_43_ogetdirentries, 0,
            (sy_call_t *)compat_43_sys_getdirentries },
        { 0, 0, NULL }
};

/*
 * Convert from an old to a new timespec structure.
 */
static void
cvttimespec(struct timespec50 *ots, const struct timespec *ts)
{

        if (ts->tv_sec > INT_MAX) {
#if defined(DEBUG) || 1
                static bool first = true;

                if (first) {
                        first = false;
                        printf("%s[%s:%d]: time_t does not fit\n",
                            __func__, curlwp->l_proc->p_comm,
                            curlwp->l_lid);
                }
#endif
                ots->tv_sec = INT_MAX;
        } else
                ots->tv_sec = ts->tv_sec;
        ots->tv_nsec = ts->tv_nsec;
}

/*
 * Convert from an old to a new stat structure.
 */
static void
cvtstat(struct stat43 *ost, const struct stat *st)
{

        /* Handle any padding. */
        memset(ost, 0, sizeof(*ost));
        ost->st_dev = st->st_dev;
        ost->st_ino = st->st_ino;
        ost->st_mode = st->st_mode & 0xffff;
        ost->st_nlink = st->st_nlink;
        ost->st_uid = st->st_uid;
        ost->st_gid = st->st_gid;
        ost->st_rdev = st->st_rdev;
        if (st->st_size < (quad_t)1 << 32)
                ost->st_size = st->st_size;
        else
                ost->st_size = -2;
        cvttimespec(&ost->st_atimespec, &st->st_atimespec);
        cvttimespec(&ost->st_mtimespec, &st->st_mtimespec);
        cvttimespec(&ost->st_ctimespec, &st->st_ctimespec);
        ost->st_blksize = st->st_blksize;
        ost->st_blocks = st->st_blocks;
        ost->st_flags = st->st_flags;
        ost->st_gen = st->st_gen;
}

/*
 * Get file status; this version follows links.
 */
/* ARGSUSED */
int
compat_43_sys_stat(struct lwp *l, const struct compat_43_sys_stat_args *uap, register_t *retval)
{
        /* {
                syscallarg(char *) path;
                syscallarg(struct stat43 *) ub;
        } */
        struct stat sb;
        struct stat43 osb;
        int error;

        error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb);
        if (error)
                return error;
        cvtstat(&osb, &sb);
        return copyout(&osb, SCARG(uap, ub), sizeof(osb));
}

/*
 * Get file status; this version does not follow links.
 */
/* ARGSUSED */
int
compat_43_sys_lstat(struct lwp *l, const struct compat_43_sys_lstat_args *uap, register_t *retval)
{
        /* {
                syscallarg(char *) path;
                syscallarg(struct stat43 *) ub;
        } */
        struct stat sb;
        struct stat43 osb;
        int error;

        error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb);
        if (error)
                return error;

        /*
         * For symbolic links, BSD4.3 returned the attributes of its
         * containing directory, except for mode, size, and links.
         * This is no longer emulated, the parent directory is not consulted.
         */
        cvtstat(&osb, &sb);
        return copyout(&osb, SCARG(uap, ub), sizeof(osb));
}

/*
 * Return status information about a file descriptor.
 */
/* ARGSUSED */
int
compat_43_sys_fstat(struct lwp *l, const struct compat_43_sys_fstat_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(struct stat43 *) sb;
        } */
        struct stat sb;
        struct stat43 osb;
        int error;

        error = do_sys_fstat(SCARG(uap, fd), &sb);
        if (error)
                return error;

        cvtstat(&osb, &sb);
        return copyout(&osb, SCARG(uap, sb), sizeof(osb));
}


/*
 * Truncate a file given a file descriptor.
 */
/* ARGSUSED */
int
compat_43_sys_ftruncate(struct lwp *l, const struct compat_43_sys_ftruncate_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(long) length;
        } */
        struct sys_ftruncate_args /* {
                syscallarg(int) fd;
                syscallarg(int) pad;
                syscallarg(off_t) length;
        } */ nuap;

        SCARG(&nuap, fd) = SCARG(uap, fd);
        SCARG(&nuap, length) = SCARG(uap, length);
        return sys_ftruncate(l, &nuap, retval);
}

/*
 * Truncate a file given its path name.
 */
/* ARGSUSED */
int
compat_43_sys_truncate(struct lwp *l, const struct compat_43_sys_truncate_args *uap, register_t *retval)
{
        /* {
                syscallarg(char *) path;
                syscallarg(long) length;
        } */
        struct sys_truncate_args /* {
                syscallarg(char *) path;
                syscallarg(int) pad;
                syscallarg(off_t) length;
        } */ nuap;

        SCARG(&nuap, path) = SCARG(uap, path);
        SCARG(&nuap, length) = SCARG(uap, length);
        return (sys_truncate(l, &nuap, retval));
}


/*
 * Reposition read/write file offset.
 */
int
compat_43_sys_lseek(struct lwp *l, const struct compat_43_sys_lseek_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(long) offset;
                syscallarg(int) whence;
        } */
        struct sys_lseek_args /* {
                syscallarg(int) fd;
                syscallarg(int) pad;
                syscallarg(off_t) offset;
                syscallarg(int) whence;
        } */ nuap;
        off_t qret;
        int error;

        SCARG(&nuap, fd) = SCARG(uap, fd);
        SCARG(&nuap, offset) = SCARG(uap, offset);
        SCARG(&nuap, whence) = SCARG(uap, whence);
        error = sys_lseek(l, &nuap, (register_t *)&qret);
        *(long *)retval = qret;
        return (error);
}


/*
 * Create a file.
 */
int
compat_43_sys_creat(struct lwp *l, const struct compat_43_sys_creat_args *uap, register_t *retval)
{
        /* {
                syscallarg(char *) path;
                syscallarg(int) mode;
        } */
        struct sys_open_args /* {
                syscallarg(char *) path;
                syscallarg(int) flags;
                syscallarg(int) mode;
        } */ nuap;

        SCARG(&nuap, path) = SCARG(uap, path);
        SCARG(&nuap, mode) = SCARG(uap, mode);
        SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
        return (sys_open(l, &nuap, retval));
}

/*ARGSUSED*/
int
compat_43_sys_quota(struct lwp *l, const void *v, register_t *retval)
{

        return (ENOSYS);
}


/*
 * Read a block of directory entries in a file system independent format.
 */
int
compat_43_sys_getdirentries(struct lwp *l, const struct compat_43_sys_getdirentries_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(char *) buf;
                syscallarg(u_int) count;
                syscallarg(long *) basep;
        } */
        struct dirent *bdp;
        struct vnode *vp;
        void *tbuf;                        /* Current-format */
        char *inp;                        /* Current-format */
        int len, reclen;                /* Current-format */
        char *outp;                        /* Dirent12-format */
        int resid, old_reclen = 0;        /* Dirent12-format */
        struct file *fp;
        struct uio auio;
        struct iovec aiov;
        struct dirent43 idb;
        off_t off;                /* true file offset */
        int buflen, error, eofflag, nbytes;
        struct vattr va;
        off_t *cookiebuf = NULL, *cookie;
        int ncookies;
        long loff;
                 
        /* fd_getvnode() will use the descriptor for us */
        if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
                return (error);

        if ((fp->f_flag & FREAD) == 0) {
                error = EBADF;
                goto out1;
        }

        vp = fp->f_vnode;
        if (vp->v_type != VDIR) {
                error = ENOTDIR;
                goto out1;
        }

        vn_lock(vp, LK_SHARED | LK_RETRY);
        error = VOP_GETATTR(vp, &va, l->l_cred);
        VOP_UNLOCK(vp);
        if (error)
                goto out1;

        loff = fp->f_offset;
        nbytes = SCARG(uap, count);
        buflen = uimin(MAXBSIZE, nbytes);
        if (buflen < va.va_blocksize)
                buflen = va.va_blocksize;
        tbuf = malloc(buflen, M_TEMP, M_WAITOK);

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        off = fp->f_offset;
again:
        aiov.iov_base = tbuf;
        aiov.iov_len = buflen;
        auio.uio_iov = &aiov;
        auio.uio_iovcnt = 1;
        auio.uio_rw = UIO_READ;
        auio.uio_resid = buflen;
        auio.uio_offset = off;
        UIO_SETUP_SYSSPACE(&auio);
        /*
         * First we read into the malloc'ed buffer, then
         * we massage it into user space, one record at a time.
         */
        error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &cookiebuf,
            &ncookies);
        if (error)
                goto out;

        inp = (char *)tbuf;
        outp = SCARG(uap, buf);
        resid = nbytes;
        if ((len = buflen - auio.uio_resid) == 0)
                goto eof;

        for (cookie = cookiebuf; len > 0; len -= reclen) {
                bdp = (struct dirent *)inp;
                reclen = bdp->d_reclen;
                if (reclen & 3) {
                        error = EIO;
                        goto out;
                }
                if (bdp->d_fileno == 0) {
                        inp += reclen;        /* it is a hole; squish it out */
                        if (cookie)
                                off = *cookie++;
                        else
                                off += reclen;
                        continue;
                }
                memset(&idb, 0, sizeof(idb));
                if (bdp->d_namlen >= sizeof(idb.d_name))
                        idb.d_namlen = sizeof(idb.d_name) - 1;
                else
                        idb.d_namlen = bdp->d_namlen;
                old_reclen = _DIRENT_RECLEN(&idb, bdp->d_namlen);
                if (reclen > len || resid < old_reclen) {
                        /* entry too big for buffer, so just stop */
                        outp++;
                        break;
                }
                /*
                 * Massage in place to make a Dirent12-shaped dirent (otherwise
                 * we have to worry about touching user memory outside of
                 * the copyout() call).
                 */
                idb.d_fileno = (uint32_t)bdp->d_fileno;
                idb.d_reclen = (uint16_t)old_reclen;
                idb.d_fileno = (uint32_t)bdp->d_fileno;
                (void)memcpy(idb.d_name, bdp->d_name, idb.d_namlen);
                memset(idb.d_name + idb.d_namlen, 0,
                    idb.d_reclen - _DIRENT_NAMEOFF(&idb) - idb.d_namlen);
                if ((error = copyout(&idb, outp, old_reclen)))
                        goto out;
                /* advance past this real entry */
                inp += reclen;
                if (cookie)
                        off = *cookie++; /* each entry points to itself */
                else
                        off += reclen;
                /* advance output past Dirent12-shaped entry */
                outp += old_reclen;
                resid -= old_reclen;
        }

        /* if we squished out the whole block, try again */
        if (outp == SCARG(uap, buf)) {
                if (cookiebuf)
                        free(cookiebuf, M_TEMP);
                cookiebuf = NULL;
                goto again;
        }
        fp->f_offset = off;        /* update the vnode offset */

eof:
        *retval = nbytes - resid;
out:
        VOP_UNLOCK(vp);
        if (cookiebuf)
                free(cookiebuf, M_TEMP);
        free(tbuf, M_TEMP);
out1:
        fd_putfile(SCARG(uap, fd));
        if (error)
                return error;
        return copyout(&loff, SCARG(uap, basep), sizeof(loff));
}

int
vfs_syscalls_43_init(void)
{

        return syscall_establish(NULL, vfs_syscalls_43_syscalls);
}

int
vfs_syscalls_43_fini(void)
{

        return syscall_disestablish(NULL, vfs_syscalls_43_syscalls);
}














































































  515 
  514 




















































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
/*        $NetBSD: sys_pset.c,v 1.24 2020/05/23 23:42:43 ad Exp $        */

/*
 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Implementation of the Processor Sets.
 * 
 * Locking
 *  The array of the processor-set structures and its members are protected
 *  by the global cpu_lock.  Note that in scheduler, the very l_psid value
 *  might be used without lock held.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_pset.c,v 1.24 2020/05/23 23:42:43 ad Exp $");

#include <sys/param.h>

#include <sys/cpu.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/lwp.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/pset.h>
#include <sys/sched.h>
#include <sys/syscallargs.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/types.h>

static pset_info_t **        psets;
static u_int                psets_max;
static u_int                psets_count;
static kauth_listener_t        psets_listener;

static int        psets_realloc(int);
static int        psid_validate(psetid_t, bool);
static int        kern_pset_create(psetid_t *);
static int        kern_pset_destroy(psetid_t);

static int
psets_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        psetid_t id;
        enum kauth_system_req req;
        int result;

        result = KAUTH_RESULT_DEFER;
        req = (enum kauth_system_req)(uintptr_t)arg0;
        id = (psetid_t)(uintptr_t)arg1;

        if (action != KAUTH_SYSTEM_PSET)
                return result;

        if ((req == KAUTH_REQ_SYSTEM_PSET_ASSIGN) ||
            (req == KAUTH_REQ_SYSTEM_PSET_BIND)) {
                if (id == PS_QUERY)
                        result = KAUTH_RESULT_ALLOW;
        }

        return result;
}

/*
 * Initialization of the processor-sets.
 */
void
psets_init(void)
{

        psets_max = uimax(maxcpus, 32);
        psets = kmem_zalloc(psets_max * sizeof(void *), KM_SLEEP);
        psets_count = 0;

        psets_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
            psets_listener_cb, NULL);
}

/*
 * Reallocate the array of the processor-set structures.
 */
static int
psets_realloc(int new_psets_max)
{
        pset_info_t **new_psets, **old_psets;
        const u_int newsize = new_psets_max * sizeof(void *);
        u_int i, oldsize;

        if (new_psets_max < 1)
                return EINVAL;

        new_psets = kmem_zalloc(newsize, KM_SLEEP);
        mutex_enter(&cpu_lock);
        old_psets = psets;
        oldsize = psets_max * sizeof(void *);

        /* Check if we can lower the size of the array */
        if (new_psets_max < psets_max) {
                for (i = new_psets_max; i < psets_max; i++) {
                        if (psets[i] == NULL)
                                continue;
                        mutex_exit(&cpu_lock);
                        kmem_free(new_psets, newsize);
                        return EBUSY;
                }
        }

        /* Copy all pointers to the new array */
        memcpy(new_psets, psets, newsize);
        psets_max = new_psets_max;
        psets = new_psets;
        mutex_exit(&cpu_lock);

        kmem_free(old_psets, oldsize);
        return 0;
}

/*
 * Validate processor-set ID.
 */
static int
psid_validate(psetid_t psid, bool chkps)
{

        KASSERT(mutex_owned(&cpu_lock));

        if (chkps && (psid == PS_NONE || psid == PS_QUERY || psid == PS_MYID))
                return 0;
        if (psid <= 0 || psid > psets_max)
                return EINVAL;
        if (psets[psid - 1] == NULL)
                return EINVAL;

        return 0;
}

/*
 * Create a processor-set.
 */
static int
kern_pset_create(psetid_t *psid)
{
        pset_info_t *pi;
        u_int i;

        if (psets_count == psets_max)
                return ENOMEM;

        pi = kmem_zalloc(sizeof(pset_info_t), KM_SLEEP);

        mutex_enter(&cpu_lock);
        if (psets_count == psets_max) {
                mutex_exit(&cpu_lock);
                kmem_free(pi, sizeof(pset_info_t));
                return ENOMEM;
        }

        /* Find a free entry in the array */
        for (i = 0; i < psets_max; i++)
                if (psets[i] == NULL)
                        break;
        KASSERT(i != psets_max);

        psets[i] = pi;
        psets_count++;
        mutex_exit(&cpu_lock);

        *psid = i + 1;
        return 0;
}

/*
 * Destroy a processor-set.
 */
static int
kern_pset_destroy(psetid_t psid)
{
        struct cpu_info *ci;
        struct lwp *l;
        CPU_INFO_ITERATOR cii;
        int error;

        mutex_enter(&cpu_lock);
        if (psid == PS_MYID) {
                /* Use caller's processor-set ID */
                psid = curlwp->l_psid;
        }
        error = psid_validate(psid, false);
        if (error) {
                mutex_exit(&cpu_lock);
                return error;
        }

        /* Release the processor-set from all CPUs */
        for (CPU_INFO_FOREACH(cii, ci)) {
                struct schedstate_percpu *spc;

                spc = &ci->ci_schedstate;
                if (spc->spc_psid != psid)
                        continue;
                spc->spc_psid = PS_NONE;
        }

        /* Unmark the processor-set ID from each thread */
        mutex_enter(&proc_lock);
        LIST_FOREACH(l, &alllwp, l_list) {
                /* Safe to check and set without lock held */
                if (l->l_psid != psid)
                        continue;
                l->l_psid = PS_NONE;
        }
        mutex_exit(&proc_lock);

        /* Destroy the processor-set */
        kmem_free(psets[psid - 1], sizeof(pset_info_t));
        psets[psid - 1] = NULL;
        psets_count--;
        mutex_exit(&cpu_lock);

        return 0;
}

/*
 * General system calls for the processor-sets.
 */

int
sys_pset_create(struct lwp *l, const struct sys_pset_create_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(psetid_t) *psid;
        } */
        psetid_t psid;
        int error;

        /* Available only for super-user */
        if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
            KAUTH_REQ_SYSTEM_PSET_CREATE, NULL, NULL, NULL))
                return EPERM;

        error = kern_pset_create(&psid);
        if (error)
                return error;

        error = copyout(&psid, SCARG(uap, psid), sizeof(psetid_t));
        if (error)
                (void)kern_pset_destroy(psid);

        return error;
}

int
sys_pset_destroy(struct lwp *l, const struct sys_pset_destroy_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(psetid_t) psid;
        } */

        /* Available only for super-user */
        if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
            KAUTH_REQ_SYSTEM_PSET_DESTROY,
            KAUTH_ARG(SCARG(uap, psid)), NULL, NULL))
                return EPERM;

        return kern_pset_destroy(SCARG(uap, psid));
}

int
sys_pset_assign(struct lwp *l, const struct sys_pset_assign_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(psetid_t) psid;
                syscallarg(cpuid_t) cpuid;
                syscallarg(psetid_t) *opsid;
        } */
        struct cpu_info *ici, *ci = NULL;
        struct schedstate_percpu *spc = NULL;
        struct lwp *t;
        psetid_t psid = SCARG(uap, psid), opsid = 0;
        CPU_INFO_ITERATOR cii;
        int error = 0, nnone = 0;

        /* Available only for super-user, except the case of PS_QUERY */
        if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
            KAUTH_REQ_SYSTEM_PSET_ASSIGN, KAUTH_ARG(SCARG(uap, psid)), NULL,
            NULL))
                return EPERM;

        /* Find the target CPU */
        mutex_enter(&cpu_lock);
        for (CPU_INFO_FOREACH(cii, ici)) {
                struct schedstate_percpu *ispc;
                ispc = &ici->ci_schedstate;
                if (cpu_index(ici) == SCARG(uap, cpuid)) {
                        ci = ici;
                        spc = ispc;
                }
                nnone += (ispc->spc_psid == PS_NONE);
        }
        if (ci == NULL) {
                mutex_exit(&cpu_lock);
                return EINVAL;
        }
        error = psid_validate(psid, true);
        if (error) {
                mutex_exit(&cpu_lock);
                return error;
        }
        opsid = spc->spc_psid;
        switch (psid) {
        case PS_QUERY:
                break;
        case PS_MYID:
                psid = curlwp->l_psid;
                /* FALLTHROUGH */
        default:
                /*
                 * Just finish if old and new processor-sets are
                 * the same.
                 */
                if (spc->spc_psid == psid)
                        break;
                /*
                 * Ensure at least one CPU stays in the default set,
                 * and that specified CPU is not offline.
                 */
                if (psid != PS_NONE && ((spc->spc_flags & SPCF_OFFLINE) ||
                    (nnone == 1 && spc->spc_psid == PS_NONE))) {
                        mutex_exit(&cpu_lock);
                        return EBUSY;
                }
                mutex_enter(&proc_lock);
                /*
                 * Ensure that none of the threads are using affinity mask
                 * with this target CPU in it.
                 */
                LIST_FOREACH(t, &alllwp, l_list) {
                        if (t->l_affinity == NULL) {
                                continue;
                        }
                        lwp_lock(t);
                        if (t->l_affinity == NULL) {
                                lwp_unlock(t);
                                continue;
                        }
                        if (kcpuset_isset(t->l_affinity, cpu_index(ci))) {
                                lwp_unlock(t);
                                mutex_exit(&proc_lock);
                                mutex_exit(&cpu_lock);
                                return EPERM;
                        }
                        lwp_unlock(t);
                }
                /*
                 * Set the processor-set ID.
                 * Migrate out any threads running on this CPU.
                 */
                spc->spc_psid = psid;

                LIST_FOREACH(t, &alllwp, l_list) {
                        struct cpu_info *tci;
                        if (t->l_cpu != ci)
                                continue;
                        if (t->l_pflag & (LP_BOUND | LP_INTR))
                                continue;
                        lwp_lock(t);
                        tci = sched_takecpu(t);
                        KASSERT(tci != ci);
                        lwp_migrate(t, tci);
                }
                mutex_exit(&proc_lock);
                break;
        }
        mutex_exit(&cpu_lock);

        if (SCARG(uap, opsid) != NULL)
                error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));

        return error;
}

int
sys__pset_bind(struct lwp *l, const struct sys__pset_bind_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(idtype_t) idtype;
                syscallarg(id_t) first_id;
                syscallarg(id_t) second_id;
                syscallarg(psetid_t) psid;
                syscallarg(psetid_t) *opsid;
        } */
        struct cpu_info *ci;
        struct proc *p;
        struct lwp *t;
        id_t id1, id2;
        pid_t pid = 0;
        lwpid_t lid = 0;
        psetid_t psid, opsid;
        int error = 0, lcnt;

        psid = SCARG(uap, psid);

        /* Available only for super-user, except the case of PS_QUERY */
        if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_PSET,
            KAUTH_REQ_SYSTEM_PSET_BIND, KAUTH_ARG(SCARG(uap, psid)), NULL,
            NULL))
                return EPERM;

        mutex_enter(&cpu_lock);
        error = psid_validate(psid, true);
        if (error) {
                mutex_exit(&cpu_lock);
                return error;
        }
        if (psid == PS_MYID)
                psid = curlwp->l_psid;

        /*
         * Get PID and LID from the ID.
         */
        p = l->l_proc;
        id1 = SCARG(uap, first_id);
        id2 = SCARG(uap, second_id);

        mutex_enter(&proc_lock);
        switch (SCARG(uap, idtype)) {
        case P_PID:
                /*
                 * Process:
                 *  First ID        - PID;
                 *  Second ID        - ignored;
                 */
                pid = (id1 == P_MYID) ? p->p_pid : id1;
                lid = 0;
                break;
        case P_LWPID:
                /*
                 * Thread (LWP):
                 *  First ID        - LID;
                 *  Second ID        - PID;
                 */
                if (id1 == P_MYID) {
                        pid = p->p_pid;
                        lid = l->l_lid;
                        break;
                }
                lid = id1;
                pid = (id2 == P_MYID) ? p->p_pid : id2;
                break;
        default:
                error = EINVAL;
                goto error;
        }

        /* Find the process */
        p = proc_find(pid);
        if (p == NULL) {
                error = ESRCH;
                goto error;
        }
        /* Disallow modification of the system processes */
        if (p->p_flag & PK_SYSTEM) {
                error = EPERM;
                goto error;
        }

        /* Find the LWP(s) */
        lcnt = 0;
        ci = NULL;
        mutex_enter(p->p_lock);
        LIST_FOREACH(t, &p->p_lwps, l_sibling) {
                if (lid && lid != t->l_lid)
                        continue;
                /*
                 * Bind the thread to the processor-set,
                 * take some CPU and migrate.
                 */
                lwp_lock(t);
                opsid = t->l_psid;
                t->l_psid = psid;
                ci = sched_takecpu(t);
                /* Unlocks LWP */
                lwp_migrate(t, ci);
                lcnt++;
        }
        mutex_exit(p->p_lock);
        if (lcnt == 0) {
                error = ESRCH;
        }
error:
        mutex_exit(&proc_lock);
        mutex_exit(&cpu_lock);
        if (error == 0 && SCARG(uap, opsid))
                error = copyout(&opsid, SCARG(uap, opsid), sizeof(psetid_t));
        return error;
}

/*
 * Sysctl nodes and initialization.
 */

static int
sysctl_psets_max(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int error, newsize;

        node = *rnode;
        node.sysctl_data = &newsize;

        newsize = psets_max;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        if (newsize <= 0)
                return EINVAL;

        sysctl_unlock();
        error = psets_realloc(newsize);
        sysctl_relock();
        return error;
}

static int
sysctl_psets_list(SYSCTLFN_ARGS)
{
        const size_t bufsz = 1024;
        char *buf, tbuf[16];
        int i, error;
        size_t len;

        sysctl_unlock();
        buf = kmem_alloc(bufsz, KM_SLEEP);
        snprintf(buf, bufsz, "%d:1", PS_NONE);        /* XXX */

        mutex_enter(&cpu_lock);
        for (i = 0; i < psets_max; i++) {
                if (psets[i] == NULL)
                        continue;
                snprintf(tbuf, sizeof(tbuf), ",%d:2", i + 1);        /* XXX */
                strlcat(buf, tbuf, bufsz);
        }
        mutex_exit(&cpu_lock);
        len = strlen(buf) + 1;
        error = 0;
        if (oldp != NULL)
                error = copyout(buf, oldp, uimin(len, *oldlenp));
        *oldlenp = len;
        kmem_free(buf, bufsz);
        sysctl_relock();
        return error;
}

SYSCTL_SETUP(sysctl_pset_setup, "sysctl kern.pset subtree setup")
{
        const struct sysctlnode *node = NULL;

        sysctl_createv(clog, 0, NULL, &node,
                CTLFLAG_PERMANENT,
                CTLTYPE_NODE, "pset",
                SYSCTL_DESCR("Processor-set options"),
                NULL, 0, NULL, 0,
                CTL_KERN, CTL_CREATE, CTL_EOL);

        if (node == NULL)
                return;

        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "psets_max",
                SYSCTL_DESCR("Maximal count of the processor-sets"),
                sysctl_psets_max, 0, &psets_max, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT,
                CTLTYPE_STRING, "list",
                SYSCTL_DESCR("List of active sets"),
                sysctl_psets_list, 0, NULL, 0,
                CTL_CREATE, CTL_EOL);
}


























































































































































































































































    4 












    4 



    4 



    4 





    4 


    4 
















































    4 


    4 





    4 





































































    3 




    1 



    1 












    3 



















































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
/* $NetBSD: pms.c,v 1.39 2021/08/07 16:19:15 thorpej Exp $ */

/*-
 * Copyright (c) 2004 Kentaro Kurahone.
 * Copyright (c) 2004 Ales Krenek.
 * Copyright (c) 1994 Charles M. Hannum.
 * Copyright (c) 1992, 1993 Erik Forsberg.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
 * NO EVENT SHALL I BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: pms.c,v 1.39 2021/08/07 16:19:15 thorpej Exp $");

#include "opt_pms.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/kernel.h>
#include <sys/kthread.h>

#include <sys/bus.h>

#include <dev/pckbport/pckbportvar.h>
#ifdef PMS_SYNAPTICS_TOUCHPAD
#include <dev/pckbport/synapticsvar.h>
#endif
#ifdef PMS_ELANTECH_TOUCHPAD
#include <dev/pckbport/elantechvar.h>
#endif
#ifdef PMS_ALPS_TOUCHPAD
#include <dev/pckbport/alpsvar.h>
#endif

#include <dev/pckbport/pmsreg.h>
#include <dev/pckbport/pmsvar.h>


#include <dev/wscons/wsconsio.h>
#include <dev/wscons/wsmousevar.h>

#ifdef PMSDEBUG
int pmsdebug = 1;
#define DPRINTF(x)      if (pmsdebug) printf x
#else
#define DPRINTF(x)
#endif

static const enum pms_type tries[] = {
        PMS_SCROLL5, PMS_SCROLL3, PMS_STANDARD, PMS_UNKNOWN
};

static const struct pms_protocol pms_protocols[] = {
        { { 0, 0, 0 }, 0, "unknown protocol" },
        { { 0, 0, 0 }, 0, "no scroll wheel (3 buttons)" },
        { { 200, 100, 80 }, 3, "scroll wheel (3 buttons)" },
        { { 200, 200, 80 }, 4, "scroll wheel (5 buttons)" },
        { { 0, 0, 0 }, 0, "synaptics" },
        { { 0, 0, 0 }, 0, "elantech" }
};


static int pmsprobe(device_t, cfdata_t, void *);
static void pmsattach(device_t, device_t, void *);
static void pmsinput(void *, int);

CFATTACH_DECL_NEW(pms, sizeof(struct pms_softc),
    pmsprobe, pmsattach, NULL, NULL);

static int        pms_protocol(pckbport_tag_t, pckbport_slot_t);
static void        do_enable(struct pms_softc *);
static void        do_disable(struct pms_softc *);
static void        pms_reset_thread(void*);
static int        pms_enable(void *);
static int        pms_ioctl(void *, u_long, void *, int, struct lwp *);
static void        pms_disable(void *);

static bool        pms_suspend(device_t, const pmf_qual_t *);
static bool        pms_resume(device_t, const pmf_qual_t *);

static const struct wsmouse_accessops pms_accessops = {
        pms_enable,
        pms_ioctl,
        pms_disable,
};

static int
pms_protocol(pckbport_tag_t tag, pckbport_slot_t slot)
{
        u_char cmd[2], resp[1];
        int i, j, res;
        const struct pms_protocol *p;

        for (j = 0; j < sizeof(tries) / sizeof(tries[0]); ++j) {
                p = &pms_protocols[tries[j]];
                if (!p->rates[0])
                        break;
                cmd[0] = PMS_SET_SAMPLE;
                for (i = 0; i < 3; i++) {
                        cmd[1] = p->rates[i];
                        res = pckbport_enqueue_cmd(tag, slot, cmd, 2, 0, 1, 0);
                        if (res)
                                return PMS_STANDARD;
                }

                cmd[0] = PMS_SEND_DEV_ID;
                res = pckbport_enqueue_cmd(tag, slot, cmd, 1, 1, 1, resp);
                if (res)
                        return PMS_UNKNOWN;
                if (resp[0] == p->response) {
                        DPRINTF(("pms_protocol: found mouse protocol %d\n",
                                tries[j]));
                        return tries[j];
                }
        }
        DPRINTF(("pms_protocol: standard PS/2 protocol (no scroll wheel)\n"));
        return PMS_STANDARD;
}

int
pmsprobe(device_t parent, cfdata_t match, void *aux)
{
        struct pckbport_attach_args *pa = aux;
        u_char cmd[1], resp[2];
        int res;

        if (pa->pa_slot != PCKBPORT_AUX_SLOT)
                return 0;

        /* Flush any garbage. */
        pckbport_flush(pa->pa_tag, pa->pa_slot);

        /* reset the device */
        cmd[0] = PMS_RESET;
        res = pckbport_poll_cmd(pa->pa_tag, pa->pa_slot, cmd, 1, 2, resp, 1);
        if (res) {
                aprint_debug("pmsprobe: reset error %d\n", res);
                return 0;
        }
        if (resp[0] != PMS_RSTDONE) {
                printf("pmsprobe: reset response 0x%x\n", resp[0]);
                return 0;
        }

        /* get type number (0 = mouse) */
        if (resp[1] != 0) {
                aprint_debug("pmsprobe: type 0x%x\n", resp[1]);
                return 0;
        }

        return 10;
}

static void
pmsattach(device_t parent, device_t self, void *aux)
{
        struct pms_softc *sc = device_private(self);
        struct pckbport_attach_args *pa = aux;
        struct wsmousedev_attach_args a;
        u_char cmd[2], resp[2];
        int res;

        sc->sc_dev = self;
        sc->sc_kbctag = pa->pa_tag;
        sc->sc_kbcslot = pa->pa_slot;

        aprint_naive("\n");
        aprint_normal("\n");

        /* Flush any garbage. */
        pckbport_flush(pa->pa_tag, pa->pa_slot);

        /* reset the device */
        cmd[0] = PMS_RESET;
        res = pckbport_poll_cmd(pa->pa_tag, pa->pa_slot, cmd, 1, 2, resp, 1);
        if (res || resp[0] != PMS_RSTDONE || resp[1] != 0) {
                aprint_debug("pmsattach: reset error\n");
                return;
        }
        sc->inputstate = 0;
        sc->buttons = 0;
        sc->protocol = PMS_UNKNOWN;

#ifdef PMS_SYNAPTICS_TOUCHPAD
        /* Probe for synaptics touchpad. */
        if (pms_synaptics_probe_init(sc) == 0) {
                sc->protocol = PMS_SYNAPTICS;
        } else
#endif
#ifdef PMS_ELANTECH_TOUCHPAD
        if (pms_elantech_probe_init(sc) == 0) {
                sc->protocol = PMS_ELANTECH;
        } else
#endif
#ifdef PMS_ALPS_TOUCHPAD
        if (pms_alps_probe_init(sc) == 0) {
                sc->protocol = PMS_ALPS;
        } else
#endif
                /* Install generic handler. */
                pckbport_set_inputhandler(sc->sc_kbctag, sc->sc_kbcslot,
                    pmsinput, sc, device_xname(sc->sc_dev));

        a.accessops = &pms_accessops;
        a.accesscookie = sc;

        /*
         * Attach the wsmouse, saving a handle to it.
         * Note that we don't need to check this pointer against NULL
         * here or in pmsintr, because if this fails pms_enable() will
         * never be called, so pmsinput() will never be called.
         */
        sc->sc_wsmousedev = config_found(self, &a, wsmousedevprint, CFARGS_NONE);

        /* no interrupts until enabled */
        cmd[0] = PMS_DEV_DISABLE;
        res = pckbport_poll_cmd(pa->pa_tag, pa->pa_slot, cmd, 1, 0, NULL, 0);
        if (res)
                aprint_error("pmsattach: disable error\n");
        pckbport_slot_enable(sc->sc_kbctag, sc->sc_kbcslot, 0);

        kthread_create(PRI_NONE, 0, NULL, pms_reset_thread, sc,
            &sc->sc_event_thread, "%s", device_xname(sc->sc_dev));

        if (!pmf_device_register(self, pms_suspend, pms_resume))
                aprint_error_dev(self, "couldn't establish power handler\n");
}

static void
do_enable(struct pms_softc *sc)
{
        u_char cmd[2];
        int res;

        sc->inputstate = 0;
        sc->buttons = 0;

        pckbport_slot_enable(sc->sc_kbctag, sc->sc_kbcslot, 1);

#ifdef PMS_SYNAPTICS_TOUCHPAD
        if (sc->protocol == PMS_SYNAPTICS)
                pms_synaptics_enable(sc);
#endif
#ifdef PMS_ELANTECH_TOUCHPAD
        if (sc->protocol == PMS_ELANTECH)
                pms_elantech_enable(sc);
#endif
#ifdef PMS_ALPS_TOUCHPAD
        if (sc->protocol == PMS_ALPS)
                pms_alps_enable(sc);
#endif

        cmd[0] = PMS_DEV_ENABLE;
        res = pckbport_enqueue_cmd(sc->sc_kbctag, sc->sc_kbcslot, cmd,
            1, 0, 1, 0);
        if (res)
                aprint_error("pms_enable: command error %d\n", res);

        if (sc->protocol == PMS_UNKNOWN)
                sc->protocol = pms_protocol(sc->sc_kbctag, sc->sc_kbcslot);
        DPRINTF(("pms_enable: using %s protocol\n",
            pms_protocols[sc->protocol].name));
#if 0
        {
                u_char scmd[2];

                scmd[0] = PMS_SET_RES;
                scmd[1] = 3; /* 8 counts/mm */
                res = pckbport_enqueue_cmd(sc->sc_kbctag, sc->sc_kbcslot, scmd,
                    2, 0, 1, 0);
                if (res)
                        printf("pms_enable: setup error1 (%d)\n", res);

                scmd[0] = PMS_SET_SCALE21;
                res = pckbport_enqueue_cmd(sc->sc_kbctag, sc->sc_kbcslot, scmd,
                    1, 0, 1, 0);
                if (res)
                        printf("pms_enable: setup error2 (%d)\n", res);

                scmd[0] = PMS_SET_SAMPLE;
                scmd[1] = 100; /* 100 samples/sec */
                res = pckbport_enqueue_cmd(sc->sc_kbctag, sc->sc_kbcslot, scmd,
                    2, 0, 1, 0);
                if (res)
                        printf("pms_enable: setup error3 (%d)\n", res);
        }
#endif
}

static void
do_disable(struct pms_softc *sc)
{
        u_char cmd[1];
        int res;

        cmd[0] = PMS_DEV_DISABLE;
        res = pckbport_enqueue_cmd(sc->sc_kbctag, sc->sc_kbcslot, cmd,
            1, 0, 1, 0);
        if (res)
                aprint_error("pms_disable: command error\n");

        pckbport_slot_enable(sc->sc_kbctag, sc->sc_kbcslot, 0);
}

static int
pms_enable(void *v)
{
        struct pms_softc *sc = v;
        int s;

        if (sc->sc_enabled)
                return EBUSY;

        do_enable(sc);

        s = spltty();
        sc->sc_enabled = 1;
        splx(s);

        return 0;
}

static void
pms_disable(void *v)
{
        struct pms_softc *sc = v;
        int s;

        do_disable(sc);

        s = spltty();
        sc->sc_enabled = 0;
        splx(s);
}

static bool
pms_suspend(device_t dv, const pmf_qual_t *qual)
{
        struct pms_softc *sc = device_private(dv);

        if (sc->sc_enabled)
                do_disable(sc);

        return true;
}

static bool
pms_resume(device_t dv, const pmf_qual_t *qual)
{
        struct pms_softc *sc = device_private(dv);

#ifdef PMS_SYNAPTICS_TOUCHPAD
        if (sc->protocol == PMS_SYNAPTICS) {
                pms_synaptics_resume(sc);
                if (sc->sc_enabled) {
                        do_enable(sc);
                }
        } else
#endif
#ifdef PMS_ELANTECH_TOUCHPAD
        if (sc->protocol == PMS_ELANTECH) {
                pms_elantech_resume(sc);
                if (sc->sc_enabled) {
                        do_enable(sc);
                }
        } else
#endif
#ifdef PMS_ALPS_TOUCHPAD
        if (sc->protocol == PMS_ALPS) {
                pms_alps_resume(sc);
                if (sc->sc_enabled) {
                        do_enable(sc);
                }
        } else
#endif
        if (sc->sc_enabled) {
                /* recheck protocol & init mouse */
                sc->protocol = PMS_UNKNOWN;
                do_enable(sc); /* only if we were suspended */
        }

        return true;
}

static int
pms_ioctl(void *v, u_long cmd, void *data, int flag,
    struct lwp *l)
{
        struct pms_softc *sc = v;
        u_char kbcmd[2];
        int i;

        switch (cmd) {
        case WSMOUSEIO_GTYPE:
                *(u_int *)data = WSMOUSE_TYPE_PS2;
                break;

        case WSMOUSEIO_SRES:
                i = (*(u_int *)data - 12) / 25;

                if (i < 0)
                        i = 0;

                if (i > 3)
                        i = 3;

                kbcmd[0] = PMS_SET_RES;
                kbcmd[1] = i;
                i = pckbport_enqueue_cmd(sc->sc_kbctag, sc->sc_kbcslot, kbcmd,
                    2, 0, 1, 0);

                if (i)
                        printf("pms_ioctl: SET_RES command error\n");
                break;

        default:
                return EPASSTHROUGH;
        }
        return 0;
}

static void
pms_reset_thread(void *arg)
{
        struct pms_softc *sc = arg;
        u_char cmd[1], resp[2];
        int res;
        int save_protocol;

        for (;;) {
                tsleep(&sc->sc_enabled, PWAIT, "pmsreset", 0);
#ifdef PMSDEBUG
                if (pmsdebug)
#endif
#if defined(PMSDEBUG) || defined(DIAGNOSTIC)
                        aprint_debug_dev(sc->sc_dev,
                            "resetting mouse interface\n");
#endif
                save_protocol = sc->protocol;
                pms_disable(sc);
                cmd[0] = PMS_RESET;
                res = pckbport_enqueue_cmd(sc->sc_kbctag, sc->sc_kbcslot, cmd,
                    1, 2, 1, resp);
                if (res) {
                        DPRINTF(("%s: reset error %d\n",
                            device_xname(sc->sc_dev), res));
                }

                /* For the synaptics and elantech case, leave the protocol alone. */
                if (sc->protocol != PMS_SYNAPTICS && sc->protocol != PMS_ELANTECH
                        && sc->protocol != PMS_ALPS)
                        sc->protocol = PMS_UNKNOWN;

                pms_enable(sc);
                if (sc->protocol != save_protocol) {
#if defined(PMSDEBUG) || defined(DIAGNOSTIC)
                        aprint_verbose_dev(sc->sc_dev,
                            "protocol change, sleeping and retrying\n");
#endif
                        pms_disable(sc);
                        cmd[0] = PMS_RESET;
                        res = pckbport_enqueue_cmd(sc->sc_kbctag,
                            sc->sc_kbcslot, cmd, 1, 2, 1, resp);
                        if (res) {
                                DPRINTF(("%s: reset error %d\n",
                                    device_xname(sc->sc_dev), res));
                        }
                        tsleep(pms_reset_thread, PWAIT, "pmsreset", hz);
                        cmd[0] = PMS_RESET;
                        res = pckbport_enqueue_cmd(sc->sc_kbctag,
                            sc->sc_kbcslot, cmd, 1, 2, 1, resp);
                        if (res) {
                                DPRINTF(("%s: reset error %d\n",
                                    device_xname(sc->sc_dev), res));
                        }
                        sc->protocol = PMS_UNKNOWN;        /* reprobe protocol */
                        pms_enable(sc);
#if defined(PMSDEBUG) || defined(DIAGNOSTIC)
                        if (sc->protocol != save_protocol) {
                                printf("%s: protocol changed.\n",
                                    device_xname(sc->sc_dev));
                        }
#endif
                }
        }
}

/* Masks for the first byte of a packet */
#define PMS_LBUTMASK 0x01
#define PMS_RBUTMASK 0x02
#define PMS_MBUTMASK 0x04
#define PMS_4BUTMASK 0x10
#define PMS_5BUTMASK 0x20

static void
pmsinput(void *vsc, int data)
{
        struct pms_softc *sc = vsc;
        u_int changed;
        int dx, dy, dz = 0;
        int newbuttons = 0;

        if (!sc->sc_enabled) {
                /* Interrupts are not expected. Discard the byte. */
                return;
        }

        getmicrouptime(&sc->current);

        if (sc->inputstate > 0) {
                struct timeval diff;

                timersub(&sc->current, &sc->last, &diff);
                /*
                 * Empirically, the delay should be about 1700us on a standard
                 * PS/2 port.  I have seen delays as large as 4500us (rarely)
                 * in regular use.  When using a confused mouse, I generally
                 * see delays at least as large as 30,000us.  -seebs
                 *
                 * The thinkpad trackball returns at 22-23ms. So we use
                 * >= 40ms. In the future, I'll implement adaptable timeout
                 * by increasing the timeout if the mouse reset happens
                 * too frequently -christos
                 */
                if (diff.tv_sec > 0 || diff.tv_usec >= 40000) {
                        DPRINTF(("pms_input: unusual delay (%ld.%06ld s), "
                            "scheduling reset\n",
                            (long)diff.tv_sec, (long)diff.tv_usec));
                        sc->inputstate = 0;
                        sc->sc_enabled = 0;
                        wakeup(&sc->sc_enabled);
                        return;
                }
        }
        sc->last = sc->current;

        if (sc->inputstate == 0) {
                /*
                 * Some devices (seen on trackballs anytime, and on
                 * some mice shortly after reset) output garbage bytes
                 * between packets.  Just ignore them.
                 */
                if ((data & 0xc0) != 0)
                        return;        /* not in sync yet, discard input */
        }

        sc->packet[sc->inputstate++] = data & 0xff;
        switch (sc->inputstate) {
        case 0:
                /* no useful processing can be done yet */
                break;

        case 1:
                /*
                 * Why should we test for bit 0x8 and insist on it here?
                 * The old (psm.c and psm_intelli.c) drivers didn't do
                 * it, and there are devices where it does harm (that's
                 * why it is not used if using PMS_STANDARD protocol).
                 * Anyway, it does not to cause any harm to accept packets
                 * without this bit.
                 */
#if 0
                if (sc->protocol == PMS_STANDARD)
                        break;
                if (!(sc->packet[0] & 0x8)) {
                        DPRINTF(("pmsinput: 0x8 not set in first byte "
                            "[0x%02x], resetting\n", sc->packet[0]));
                        sc->inputstate = 0;
                        sc->sc_enabled = 0;
                        wakeup(&sc->sc_enabled);
                        return;
                }
#endif
                break;

        case 2:
                break;

        case 4:
                /* Case 4 is a superset of case 3. This is *not* an accident. */
                if (sc->protocol == PMS_SCROLL3) {
                        dz = sc->packet[3];
                        if (dz >= 128)
                                dz -= 256;
                        if (dz == -128)
                                dz = -127;
                } else if (sc->protocol == PMS_SCROLL5) {
                        dz = sc->packet[3] & 0xf;
                        if (dz >= 8)
                                dz -= 16;
                        if (sc->packet[3] & PMS_4BUTMASK)
                                newbuttons |= 0x8;
                        if (sc->packet[3] & PMS_5BUTMASK)
                                newbuttons |= 0x10;
                } else {
                        DPRINTF(("pmsinput: why am I looking at this byte?\n"));
                        dz = 0;
                }
                /* FALLTHROUGH */
        case 3:
                /*
                 * This is only an endpoint for scroll protocols with 4
                 * bytes, or the standard protocol with 3.
                 */
                if (sc->protocol != PMS_STANDARD && sc->inputstate == 3)
                        break;

                newbuttons |= ((sc->packet[0] & PMS_LBUTMASK) ? 0x1 : 0) |
                    ((sc->packet[0] & PMS_MBUTMASK) ? 0x2 : 0) |
                    ((sc->packet[0] & PMS_RBUTMASK) ? 0x4 : 0);

                dx = sc->packet[1];
                if (dx >= 128)
                        dx -= 256;
                if (dx == -128)
                        dx = -127;

                dy = sc->packet[2];
                if (dy >= 128)
                        dy -= 256;
                if (dy == -128)
                        dy = -127;

                sc->inputstate = 0;
                changed = (sc->buttons ^ newbuttons);
                sc->buttons = newbuttons;

#ifdef PMSDEBUG
                if (sc->protocol == PMS_STANDARD) {
                        DPRINTF(("pms: packet: 0x%02x%02x%02x\n",
                            sc->packet[0], sc->packet[1], sc->packet[2]));
                } else {
                        DPRINTF(("pms: packet: 0x%02x%02x%02x%02x\n",
                            sc->packet[0], sc->packet[1], sc->packet[2],
                            sc->packet[3]));
                }
#endif
                if (dx || dy || dz || changed) {
#ifdef PMSDEBUG
                        DPRINTF(("pms: x %+03d y %+03d z %+03d "
                            "buttons 0x%02x\n",        dx, dy, dz, sc->buttons));
#endif
                        wsmouse_input(sc->sc_wsmousedev,
                            sc->buttons, dx, dy, dz, 0,
                            WSMOUSE_INPUT_DELTA);
                }
                memset(sc->packet, 0, 4);
                break;

        /* If we get here, we have problems. */
        default:
                printf("pmsinput: very confused.  resetting.\n");
                sc->inputstate = 0;
                sc->sc_enabled = 0;
                wakeup(&sc->sc_enabled);
                return;
        }
}

/* 
 * Touchpad special command sequence used by Synaptics and others.
 * Sends 0xE6 0xE8 rr 0xE8 ss 0xE8 tt 0xE8 uu where (rr*64)+(ss*16)+(tt*4)+uu
 */
int
pms_sliced_command(pckbport_tag_t tag, pckbport_slot_t slot, u_char scmd)
{
        u_char cmd[2];
        int i, err, ret = 0;

        cmd[0] = PMS_SET_SCALE11;
        ret = pckbport_poll_cmd(tag, slot, cmd, 1, 0, NULL, 0);

        /*
         * Need to send 4 Set Resolution commands, with the argument
         * encoded in the bottom most 2 bits.
         */
        for (i = 6; i >= 0; i -= 2) {
                cmd[0] = PMS_SET_RES;
                cmd[1] = (scmd >> i) & 3;
                err = pckbport_poll_cmd(tag, slot, cmd, 2, 0, NULL, 0);
                if (ret == 0 && err != 0) {
                        ret = err;
                }
        }

        return ret;
}
























































































































































   87 














   83 
   83 
   83 





















  125 












  123 


  123 
  123 

  123 
























  264 
    1 

  264 


















   17 

   14 
   17 
















  704 



  488 



















 1272 





 1273 
  661 












 1272 

 1272 










 1272 








  488 




 1272 




































 1272 
 1271 

 1271 
















  150 



































  149 


   14 
   14 


  135 


  149 









  149 




    1 
  148 
   13 

  134 
  149 
























  116 
  116 









  117 
  117 
  117 
  116 
  109 




  116 
    9 






    9 




    8 



    8 

    4 



    4 





    4 


    4 


    4 


    4 

    8 

    8 


    8 




  112 






  111 
   17 






   18 
    2 



  108 





   46 








   30 



   31 



   31 








  101 
   79 
   77 
   24 



   24 



   24 




   23 


   23 
   21 
   21 
   23 
   22 



   23 
    7 
    7 


   23 

   23 



  100 
   15 



   15 









   14 


  109 









































   15 










   14 





   14 






   14 


   14 
   14 
   14 






















    4 
   18 
    1 






   18 



   17 





   15 

   15 
   15 



   15 
   15 
   15 

   15 
   15 













   15 

   15 


















    8 

















    7 


    8 






    7 

    7 
    7 






    8 


    8 
    8 













    5 









    5 
    5 


    5 






    5 




    5 




    5 








    5 
    5 
    5 



    5 












    5 



    5 













    6 
























    5 


    5 




















    2 


    2 






    5 







































































































































































































  103 

















  103 








  103 



  103 





  103 





  103 



  103 
  103 
    1 
    1 







  103 
  103 
  103 




  103 







  103 










  103 

  103 



  103 














  103 


   98 
   98 
   98 










   98 


   98 





   98 

















   98 

















    8 

    7 





















    8 











    8 




    6 
    6 



    5 







    5 




    5 






    5 
    5 

    5 
    5 
    5 








    4 



    7 






    7 




















   31 










   31 


   31 










   27 

   27 
   27 



   27 


    4 

   27 
   31 



   31 






















   60 








   60 





























   60 





   56 



































   59 















   58 






























   58 







   58 













   57 
















    9 









   55 
   52 
   53 


    2 
    1 







   52 









   52 



   52 


   52 







   52 








    8 
    8 

   52 




   52 
   54 

    1 


    9 




    9 



   54 
















   49 











    4 

   45 


   48 



















  393 




  393 
  393 
  393 
   20 






   20 


  392 



  393 





  391 



   19 
  391 





























































   36 
   36 
   35 


















   34 
   34 
   34 

















   29 

    1 
   29 


















    3 



















    2 


















    6 

    4 
    6 














   10 

    1 


    1 





    1 









    2 





    1 


    3 
   10 



    1 


    1 








    1 


    1 


    1 






















   42 











  338 










  113 






  113 
  113 


   10 









  338 




  338 















  244 




  244 
  237 

  242 




  242 






  242 

  242 


  242 



    6 






  242 
    1 
    1 





  242 








  237 






  237 





  237 
























   65 












   64 











   63 


   55 
   55 



   63 



   63 
   62 







 1063 



    4 
    4 




 1059 
 1064 









   25 




   25 
   25 
    4 


   25 










   21 


    4 






   25 
   15 


   25 
   25 
   25 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
/*        $NetBSD: ufs_vnops.c,v 1.262 2022/03/27 16:24:59 christos Exp $        */

/*-
 * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ufs_vnops.c        8.28 (Berkeley) 7/31/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.262 2022/03/27 16:24:59 christos Exp $");

#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
#include "opt_quota.h"
#include "opt_uvmhist.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/resourcevar.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/buf.h>
#include <sys/proc.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/fstrans.h>
#include <sys/kmem.h>
#include <sys/malloc.h>
#include <sys/dirent.h>
#include <sys/lockf.h>
#include <sys/kauth.h>
#include <sys/wapbl.h>

#include <miscfs/specfs/specdev.h>
#include <miscfs/fifofs/fifo.h>
#include <miscfs/genfs/genfs.h>

#include <ufs/ufs/acl.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/dir.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_wapbl.h>
#ifdef UFS_DIRHASH
#include <ufs/ufs/dirhash.h>
#endif
#include <ufs/ext2fs/ext2fs_extern.h>
#include <ufs/ext2fs/ext2fs_dir.h>
#include <ufs/ffs/ffs_extern.h>
#include <ufs/lfs/lfs_extern.h>
#include <ufs/lfs/lfs.h>

#ifdef UVMHIST
#include <uvm/uvm.h>
#endif
#include <uvm/uvm_extern.h>
#include <uvm/uvm_stat.h>

__CTASSERT(EXT2FS_MAXNAMLEN == FFS_MAXNAMLEN);
__CTASSERT(LFS_MAXNAMLEN == FFS_MAXNAMLEN);

static int ufs_chmod(struct vnode *, int, kauth_cred_t, struct lwp *);
static int ufs_chown(struct vnode *, uid_t, gid_t, kauth_cred_t,
    struct lwp *);
static int ufs_makeinode(struct vattr *, struct vnode *,
    const struct ufs_lookup_results *, struct vnode **, struct componentname *);

/*
 * A virgin directory (no blushing please).
 */
static const struct dirtemplate mastertemplate = {
        0,        12,                        DT_DIR,        1,        ".",
        0,        UFS_DIRBLKSIZ - 12,        DT_DIR,        2,        ".."
};

/*
 * Create a regular file
 */
int
ufs_create(void *v)
{
        struct vop_create_v3_args /* {
                struct vnode                *a_dvp;
                struct vnode                **a_vpp;
                struct componentname        *a_cnp;
                struct vattr                *a_vap;
        } */ *ap = v;
        int        error;
        struct vnode *dvp = ap->a_dvp;
        struct ufs_lookup_results *ulr;

        /* XXX should handle this material another way */
        ulr = &VTOI(dvp)->i_crap;
        UFS_CHECK_CRAPCOUNTER(VTOI(dvp));

        /*
         * UFS_WAPBL_BEGIN(dvp->v_mount) performed by successful
         * ufs_makeinode
         */
        error = ufs_makeinode(ap->a_vap, dvp, ulr, ap->a_vpp, ap->a_cnp);
        if (error) {
                return (error);
        }
        UFS_WAPBL_END(dvp->v_mount);
        VOP_UNLOCK(*ap->a_vpp);
        return (0);
}

/*
 * Mknod vnode call
 */
/* ARGSUSED */
int
ufs_mknod(void *v)
{
        struct vop_mknod_v3_args /* {
                struct vnode                *a_dvp;
                struct vnode                **a_vpp;
                struct componentname        *a_cnp;
                struct vattr                *a_vap;
        } */ *ap = v;
        struct vattr        *vap;
        struct vnode        **vpp;
        struct inode        *ip;
        int                error;
        struct ufs_lookup_results *ulr;

        vap = ap->a_vap;
        vpp = ap->a_vpp;

        /* XXX should handle this material another way */
        ulr = &VTOI(ap->a_dvp)->i_crap;
        UFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp));

        /*
         * UFS_WAPBL_BEGIN(dvp->v_mount) performed by successful
         * ufs_makeinode
         */
        if ((error = ufs_makeinode(vap, ap->a_dvp, ulr, vpp, ap->a_cnp)) != 0)
                goto out;
        ip = VTOI(*vpp);
        ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
        UFS_WAPBL_UPDATE(*vpp, NULL, NULL, 0);
        UFS_WAPBL_END(ap->a_dvp->v_mount);
        VOP_UNLOCK(*vpp);
out:
        if (error != 0) {
                *vpp = NULL;
                return (error);
        }
        return (0);
}

/*
 * Open called.
 *
 * Nothing to do.
 */
/* ARGSUSED */
int
ufs_open(void *v)
{
        struct vop_open_args /* {
                struct vnode        *a_vp;
                int                a_mode;
                kauth_cred_t        a_cred;
        } */ *ap = v;

        /*
         * Files marked append-only must be opened for appending.
         */
        if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
            (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
                return (EPERM);
        return (0);
}

/*
 * Close called.
 *
 * Update the times on the inode.
 */
/* ARGSUSED */
int
ufs_close(void *v)
{
        struct vop_close_args /* {
                struct vnode        *a_vp;
                int                a_fflag;
                kauth_cred_t        a_cred;
        } */ *ap = v;
        struct vnode        *vp;

        vp = ap->a_vp;
        if (vrefcnt(vp) > 1)
                UFS_ITIMES(vp, NULL, NULL, NULL);
        return (0);
}

static int
ufs_check_possible(struct vnode *vp, struct inode *ip, accmode_t accmode,
    kauth_cred_t cred)
{
#if defined(QUOTA) || defined(QUOTA2)
        int error;
#endif

        /*
         * Disallow write attempts on read-only file systems;
         * unless the file is a socket, fifo, or a block or
         * character device resident on the file system.
         */
        if (accmode & VMODIFY_PERMS) {
                switch (vp->v_type) {
                case VDIR:
                case VLNK:
                case VREG:
                        if (vp->v_mount->mnt_flag & MNT_RDONLY)
                                return EROFS;
#if defined(QUOTA) || defined(QUOTA2)
                        error = chkdq(ip, 0, cred, 0);
                        if (error != 0)
                                return error;
#endif
                        break;
                case VBAD:
                case VBLK:
                case VCHR:
                case VSOCK:
                case VFIFO:
                case VNON:
                default:
                        break;
                }
        }

        /* If it is a snapshot, nobody gets access to it. */
        if ((ip->i_flags & SF_SNAPSHOT))
                return EPERM;
        /*
         * If immutable bit set, nobody gets to write it.  "& ~VADMIN_PERMS"
         * permits the owner of the file to remove the IMMUTABLE flag.
         */
        if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) &&
            (ip->i_flags & IMMUTABLE))
                return EPERM;

        return 0;
}

static int
ufs_check_permitted(struct vnode *vp, struct inode *ip,
    struct acl *acl, accmode_t accmode, kauth_cred_t cred,
    int (*func)(struct vnode *, kauth_cred_t, uid_t, gid_t, mode_t,
    struct acl *, accmode_t))
{

        return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(accmode,
            vp->v_type, ip->i_mode & ALLPERMS), vp, NULL, (*func)(vp, cred,
            ip->i_uid, ip->i_gid, ip->i_mode & ALLPERMS, acl, accmode));
}

int
ufs_accessx(void *v)
{
        struct vop_accessx_args /* {
                struct vnode *a_vp;
                accmode_t a_accmode;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct inode *ip = VTOI(vp);
        accmode_t accmode = ap->a_accmode;
        int error;
#ifdef UFS_ACL
        struct acl *acl;
        acl_type_t type;
#endif

        error = ufs_check_possible(vp, ip, accmode, ap->a_cred);
        if (error)
                return error;

#ifdef UFS_ACL
        if ((vp->v_mount->mnt_flag & (MNT_POSIX1EACLS | MNT_NFS4ACLS)) != 0) {
                if (vp->v_mount->mnt_flag & MNT_NFS4ACLS)
                        type = ACL_TYPE_NFS4;
                else
                        type = ACL_TYPE_ACCESS;

                acl = acl_alloc(KM_SLEEP);
                if (type == ACL_TYPE_NFS4)
                        error = ufs_getacl_nfs4_internal(vp, acl, curlwp);
                else
                        error = VOP_GETACL(vp, type, acl, ap->a_cred);
                if (!error) {
                        if (type == ACL_TYPE_NFS4) {
                                error = ufs_check_permitted(vp,
                                    ip, acl, accmode, ap->a_cred,
                                    genfs_can_access_acl_nfs4);
                        } else {
                                error = vfs_unixify_accmode(&accmode);
                                if (error == 0)
                                        error = ufs_check_permitted(vp,
                                            ip, acl, accmode, ap->a_cred,
                                            genfs_can_access_acl_posix1e);
                        }
                        acl_free(acl);
                        return error;
                }
                if (error != EOPNOTSUPP)
                        printf("%s: Error retrieving ACL: %d\n",
                            __func__, error);
                /*
                 * XXX: Fall back until debugged.  Should
                 * eventually possibly log an error, and return
                 * EPERM for safety.
                 */
                acl_free(acl);
        }
#endif /* !UFS_ACL */
        error = vfs_unixify_accmode(&accmode);
        if (error)
                return error;
        return ufs_check_permitted(vp, ip,
            NULL, accmode, ap->a_cred, genfs_can_access);
}

/* ARGSUSED */
int
ufs_getattr(void *v)
{
        struct vop_getattr_args /* {
                struct vnode        *a_vp;
                struct vattr        *a_vap;
                kauth_cred_t        a_cred;
        } */ *ap = v;
        struct vnode        *vp;
        struct inode        *ip;
        struct vattr        *vap;

        vp = ap->a_vp;
        ip = VTOI(vp);
        vap = ap->a_vap;
        UFS_ITIMES(vp, NULL, NULL, NULL);

        /*
         * Copy from inode table
         */
        vap->va_fsid = ip->i_dev;
        vap->va_fileid = ip->i_number;
        vap->va_mode = ip->i_mode & ALLPERMS;
        vap->va_nlink = ip->i_nlink;
        vap->va_uid = ip->i_uid;
        vap->va_gid = ip->i_gid;
        vap->va_size = vp->v_size;
        if (ip->i_ump->um_fstype == UFS1) {
                switch (vp->v_type) {
                    case VBLK:
                    case VCHR:
                        vap->va_rdev = (dev_t)ufs_rw32(ip->i_ffs1_rdev,
                            UFS_MPNEEDSWAP(ip->i_ump));
                        break;
                    default:
                        vap->va_rdev = NODEV;
                        break;
                }
                vap->va_atime.tv_sec = ip->i_ffs1_atime;
                vap->va_atime.tv_nsec = ip->i_ffs1_atimensec;
                vap->va_mtime.tv_sec = ip->i_ffs1_mtime;
                vap->va_mtime.tv_nsec = ip->i_ffs1_mtimensec;
                vap->va_ctime.tv_sec = ip->i_ffs1_ctime;
                vap->va_ctime.tv_nsec = ip->i_ffs1_ctimensec;
                vap->va_birthtime.tv_sec = 0;
                vap->va_birthtime.tv_nsec = 0;
                vap->va_bytes = dbtob((u_quad_t)ip->i_ffs1_blocks);
        } else {
                switch (vp->v_type) {
                    case VBLK:
                    case VCHR:
                        vap->va_rdev = (dev_t)ufs_rw64(ip->i_ffs2_rdev,
                            UFS_MPNEEDSWAP(ip->i_ump));
                        break;
                    default:
                        vap->va_rdev = NODEV;
                        break;
                }
                vap->va_atime.tv_sec = ip->i_ffs2_atime;
                vap->va_atime.tv_nsec = ip->i_ffs2_atimensec;
                vap->va_mtime.tv_sec = ip->i_ffs2_mtime;
                vap->va_mtime.tv_nsec = ip->i_ffs2_mtimensec;
                vap->va_ctime.tv_sec = ip->i_ffs2_ctime;
                vap->va_ctime.tv_nsec = ip->i_ffs2_ctimensec;
                vap->va_birthtime.tv_sec = ip->i_ffs2_birthtime;
                vap->va_birthtime.tv_nsec = ip->i_ffs2_birthnsec;
                vap->va_bytes = dbtob(ip->i_ffs2_blocks);
        }
        vap->va_gen = ip->i_gen;
        vap->va_flags = ip->i_flags;

        /* this doesn't belong here */
        if (vp->v_type == VBLK)
                vap->va_blocksize = BLKDEV_IOSIZE;
        else if (vp->v_type == VCHR)
                vap->va_blocksize = MAXBSIZE;
        else
                vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
        vap->va_type = vp->v_type;
        vap->va_filerev = ip->i_modrev;
        return (0);
}

/*
 * Set attribute vnode op. called from several syscalls
 */
int
ufs_setattr(void *v)
{
        struct vop_setattr_args /* {
                struct vnode        *a_vp;
                struct vattr        *a_vap;
                kauth_cred_t        a_cred;
        } */ *ap = v;
        struct vattr        *vap;
        struct vnode        *vp;
        struct inode        *ip;
        kauth_cred_t        cred;
        struct lwp        *l;
        int                error;
        kauth_action_t        action;
        bool                changing_sysflags;

        vap = ap->a_vap;
        vp = ap->a_vp;
        ip = VTOI(vp);
        cred = ap->a_cred;
        l = curlwp;
        action = KAUTH_VNODE_WRITE_FLAGS;
        changing_sysflags = false;

        /*
         * Check for unsettable attributes.
         */
        if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
            (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
            (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
            ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
                return (EINVAL);
        }

        UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount);

        if (vap->va_flags != VNOVAL) {
                if (vp->v_mount->mnt_flag & MNT_RDONLY) {
                        error = EROFS;
                        goto out;
                }

                /* Snapshot flag cannot be set or cleared */
                if ((vap->va_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) !=
                    (ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL))) {
                        error = EPERM;
                        goto out;
                }

                if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) {
                        action |= KAUTH_VNODE_HAS_SYSFLAGS;
                }

                if ((vap->va_flags & SF_SETTABLE) !=
                    (ip->i_flags & SF_SETTABLE)) {
                        action |= KAUTH_VNODE_WRITE_SYSFLAGS;
                        changing_sysflags = true;
                }

                error = kauth_authorize_vnode(cred, action, vp, NULL,
                    genfs_can_chflags(vp, cred, ip->i_uid, changing_sysflags));
                if (error)
                        goto out;

                if (changing_sysflags) {
                        error = UFS_WAPBL_BEGIN(vp->v_mount);
                        if (error)
                                goto out;
                        ip->i_flags = vap->va_flags;
                        DIP_ASSIGN(ip, flags, ip->i_flags);
                } else {
                        error = UFS_WAPBL_BEGIN(vp->v_mount);
                        if (error)
                                goto out;
                        ip->i_flags &= SF_SETTABLE;
                        ip->i_flags |= (vap->va_flags & UF_SETTABLE);
                        DIP_ASSIGN(ip, flags, ip->i_flags);
                }
                ip->i_flag |= IN_CHANGE;
                UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
                UFS_WAPBL_END(vp->v_mount);
                if (vap->va_flags & (IMMUTABLE | APPEND)) {
                        error = 0;
                        goto out;
                }
        }
        if (ip->i_flags & (IMMUTABLE | APPEND)) {
                error = EPERM;
                goto out;
        }
        /*
         * Go through the fields and update iff not VNOVAL.
         */
        if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
                if (vp->v_mount->mnt_flag & MNT_RDONLY) {
                        error = EROFS;
                        goto out;
                }
                error = UFS_WAPBL_BEGIN(vp->v_mount);
                if (error)
                        goto out;
                error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
                UFS_WAPBL_END(vp->v_mount);
                if (error)
                        goto out;
        }
        if (vap->va_size != VNOVAL) {
                /*
                 * Disallow write attempts on read-only file systems;
                 * unless the file is a socket, fifo, or a block or
                 * character device resident on the file system.
                 */
                switch (vp->v_type) {
                case VDIR:
                        error = EISDIR;
                        goto out;
                case VCHR:
                case VBLK:
                case VFIFO:
                        break;
                case VREG:
                        if (vp->v_mount->mnt_flag & MNT_RDONLY) {
                                error = EROFS;
                                goto out;
                        }
                        if ((ip->i_flags & SF_SNAPSHOT) != 0) {
                                error = EPERM;
                                goto out;
                        }
                        error = ufs_truncate_retry(vp, 0, vap->va_size, cred);
                        if (error)
                                goto out;
                        break;
                default:
                        error = EOPNOTSUPP;
                        goto out;
                }
        }
        ip = VTOI(vp);
        if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL ||
            vap->va_birthtime.tv_sec != VNOVAL) {
                if (vp->v_mount->mnt_flag & MNT_RDONLY) {
                        error = EROFS;
                        goto out;
                }
                if ((ip->i_flags & SF_SNAPSHOT) != 0) {
                        error = EPERM;
                        goto out;
                }
                error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp,
                    NULL, genfs_can_chtimes(vp, cred, ip->i_uid,
                    vap->va_vaflags));
                if (error)
                        goto out;
                error = UFS_WAPBL_BEGIN(vp->v_mount);
                if (error)
                        goto out;
                if (vap->va_atime.tv_sec != VNOVAL)
                        if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
                                ip->i_flag |= IN_ACCESS;
                if (vap->va_mtime.tv_sec != VNOVAL) {
                        ip->i_flag |= IN_CHANGE | IN_UPDATE;
                        if (vp->v_mount->mnt_flag & MNT_RELATIME)
                                ip->i_flag |= IN_ACCESS;
                }
                if (vap->va_birthtime.tv_sec != VNOVAL &&
                    ip->i_ump->um_fstype == UFS2) {
                        ip->i_ffs2_birthtime = vap->va_birthtime.tv_sec;
                        ip->i_ffs2_birthnsec = vap->va_birthtime.tv_nsec;
                }
                error = UFS_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 0);
                UFS_WAPBL_END(vp->v_mount);
                if (error)
                        goto out;
        }
        error = 0;
        if (vap->va_mode != (mode_t)VNOVAL) {
                if (vp->v_mount->mnt_flag & MNT_RDONLY) {
                        error = EROFS;
                        goto out;
                }
                if ((ip->i_flags & SF_SNAPSHOT) != 0 &&
                    (vap->va_mode & (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP |
                     S_IXOTH | S_IWOTH))) {
                        error = EPERM;
                        goto out;
                }
                error = UFS_WAPBL_BEGIN(vp->v_mount);
                if (error)
                        goto out;
                error = ufs_chmod(vp, (int)vap->va_mode, cred, l);
                UFS_WAPBL_END(vp->v_mount);
        }
out:
        cache_enter_id(vp, ip->i_mode, ip->i_uid, ip->i_gid, !HAS_ACLS(ip));
        return (error);
}

#ifdef UFS_ACL
static int
ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode,
    int file_owner_id, kauth_cred_t cred, struct lwp *l)
{
        int error;
        struct acl *aclp;

        aclp = acl_alloc(KM_SLEEP);
        error = ufs_getacl_nfs4_internal(vp, aclp, l);
        /*
         * We don't have to handle EOPNOTSUPP here, as the filesystem claims
         * it supports ACLs.
         */
        if (error)
                goto out;

        acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id);
        error = ufs_setacl_nfs4_internal(vp, aclp, l, false);

out:
        acl_free(aclp);
        return (error);
}
#endif /* UFS_ACL */

/*
 * Change the mode on a file.
 * Inode must be locked before calling.
 */
static int
ufs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct lwp *l)
{
        struct inode        *ip;
        int                error;

        UFS_WAPBL_JLOCK_ASSERT(vp->v_mount);

        ip = VTOI(vp);

#ifdef UFS_ACL
        /*
         * To modify the permissions on a file, must possess VADMIN
         * for that file.
         */
        if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred)) != 0)
                return error;
#endif

        error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY, vp,
            NULL, genfs_can_chmod(vp, cred, ip->i_uid, ip->i_gid, mode));
        if (error)
                return (error);

#ifdef UFS_ACL
        if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0) {
                error = ufs_update_nfs4_acl_after_mode_change(vp, mode,
                    ip->i_uid, cred, l);
                if (error)
                        return error;
        }
#endif
        ip->i_mode &= ~ALLPERMS;
        ip->i_mode |= (mode & ALLPERMS);
        ip->i_flag |= IN_CHANGE;
        DIP_ASSIGN(ip, mode, ip->i_mode);
        UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
        cache_enter_id(vp, ip->i_mode, ip->i_uid, ip->i_gid, !HAS_ACLS(ip));
        return (0);
}

/*
 * Perform chown operation on inode ip;
 * inode must be locked prior to call.
 */
static int
ufs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred,
            struct lwp *l)
{
        struct inode        *ip;
        int                error = 0;
#if defined(QUOTA) || defined(QUOTA2)
        uid_t                ouid;
        gid_t                ogid;
        int64_t                change;
#endif
        ip = VTOI(vp);
        error = 0;

        if (uid == (uid_t)VNOVAL)
                uid = ip->i_uid;
        if (gid == (gid_t)VNOVAL)
                gid = ip->i_gid;

#ifdef UFS_ACL
        /*
         * To modify the ownership of a file, must possess VADMIN for that
         * file.
         */
        if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred)) != 0)
                return error;
#endif

        error = kauth_authorize_vnode(cred, KAUTH_VNODE_CHANGE_OWNERSHIP, vp,
            NULL, genfs_can_chown(vp, cred, ip->i_uid, ip->i_gid, uid, gid));
        if (error)
                return (error);

#if defined(QUOTA) || defined(QUOTA2)
        ogid = ip->i_gid;
        ouid = ip->i_uid;
        change = DIP(ip, blocks);
        (void) chkdq(ip, -change, cred, 0);
        (void) chkiq(ip, -1, cred, 0);
#endif
        ip->i_gid = gid;
        DIP_ASSIGN(ip, gid, gid);
        ip->i_uid = uid;
        DIP_ASSIGN(ip, uid, uid);
#if defined(QUOTA) || defined(QUOTA2)
        if ((error = chkdq(ip, change, cred, 0)) == 0) {
                if ((error = chkiq(ip, 1, cred, 0)) == 0)
                        goto good;
                else
                        (void) chkdq(ip, -change, cred, FORCE);
        }
        ip->i_gid = ogid;
        DIP_ASSIGN(ip, gid, ogid);
        ip->i_uid = ouid;
        DIP_ASSIGN(ip, uid, ouid);
        (void) chkdq(ip, change, cred, FORCE);
        (void) chkiq(ip, 1, cred, FORCE);
        return (error);
 good:
#endif /* QUOTA || QUOTA2 */
        ip->i_flag |= IN_CHANGE;
        UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
        cache_enter_id(vp, ip->i_mode, ip->i_uid, ip->i_gid, !HAS_ACLS(ip));
        return (0);
}

int
ufs_remove(void *v)
{
        struct vop_remove_v3_args /* {
                struct vnode                *a_dvp;
                struct vnode                *a_vp;
                struct componentname        *a_cnp;
                nlink_t                  ctx_vp_new_nlink;
        } */ *ap = v;
        struct vnode        *vp, *dvp;
        struct inode        *ip;
        struct mount        *mp;
        int                error;
        struct ufs_lookup_results *ulr;

        vp = ap->a_vp;
        dvp = ap->a_dvp;
        ip = VTOI(vp);
        mp = dvp->v_mount;
        KASSERT(mp == vp->v_mount); /* XXX Not stable without lock.  */

#ifdef UFS_ACL
#ifdef notyet
        /* We don't do this because if the filesystem is mounted without ACLs
         * this goes through vfs_unixify_accmode() and we get EPERM.
         */
        error = VOP_ACCESSX(vp, VDELETE, ap->a_cnp->cn_cred);
        if (error)
                goto err;
#endif
#endif

        /* XXX should handle this material another way */
        ulr = &VTOI(dvp)->i_crap;
        UFS_CHECK_CRAPCOUNTER(VTOI(dvp));

        if (vp->v_type == VDIR || (ip->i_flags & (IMMUTABLE | APPEND)) ||
            (VTOI(dvp)->i_flags & APPEND))
                error = EPERM;
        else {
                error = UFS_WAPBL_BEGIN(mp);
                if (error == 0) {
                        error = ufs_dirremove(dvp, ulr,
                                              ip, ap->a_cnp->cn_flags, 0);
                        UFS_WAPBL_END(mp);
                        if (error == 0) {
                                ap->ctx_vp_new_nlink = ip->i_nlink;
                        }
                }
        }
#ifdef notyet
err:
#endif
        if (dvp == vp)
                vrele(vp);
        else
                vput(vp);
        return (error);
}

/*
 * ufs_link: create hard link.
 */
int
ufs_link(void *v)
{
        struct vop_link_v2_args /* {
                struct vnode *a_dvp;
                struct vnode *a_vp;
                struct componentname *a_cnp;
        } */ *ap = v;
        struct vnode *dvp = ap->a_dvp;
        struct vnode *vp = ap->a_vp;
        struct componentname *cnp = ap->a_cnp;
        struct mount *mp = dvp->v_mount;
        struct inode *ip;
        struct direct *newdir;
        int error, abrt = 1;
        struct ufs_lookup_results *ulr;

        KASSERT(dvp != vp);
        KASSERT(vp->v_type != VDIR);
        KASSERT(mp == vp->v_mount); /* XXX Not stable without lock.  */

        /* XXX should handle this material another way */
        ulr = &VTOI(dvp)->i_crap;
        UFS_CHECK_CRAPCOUNTER(VTOI(dvp));

        error = vn_lock(vp, LK_EXCLUSIVE);
        if (error)
                goto out2;

        ip = VTOI(vp);
        if ((nlink_t)ip->i_nlink >= LINK_MAX) {
                error = EMLINK;
                goto out1;
        }
        if (ip->i_flags & (IMMUTABLE | APPEND)) {
                error = EPERM;
                goto out1;
        }

        error = kauth_authorize_vnode(cnp->cn_cred, KAUTH_VNODE_ADD_LINK, vp,
            dvp, 0);
        if (error)
                goto out1;

        error = UFS_WAPBL_BEGIN(mp);
        if (error)
                goto out1;

        ip->i_nlink++;
        DIP_ASSIGN(ip, nlink, ip->i_nlink);
        ip->i_flag |= IN_CHANGE;
        abrt = 0;
        error = UFS_UPDATE(vp, NULL, NULL, UPDATE_DIROP);
        if (!error) {
                newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
                ufs_makedirentry(ip, cnp, newdir);
                error = ufs_direnter(dvp, ulr, vp, newdir, cnp, NULL);
                pool_cache_put(ufs_direct_cache, newdir);
        }
        if (error) {
                ip->i_nlink--;
                DIP_ASSIGN(ip, nlink, ip->i_nlink);
                ip->i_flag |= IN_CHANGE;
                UFS_WAPBL_UPDATE(vp, NULL, NULL, UPDATE_DIROP);
        }
        UFS_WAPBL_END(mp);
 out1:
        VOP_UNLOCK(vp);
 out2:
        if (abrt)
                VOP_ABORTOP(dvp, cnp);
        return (error);
}

/*
 * whiteout vnode call
 */
int
ufs_whiteout(void *v)
{
        struct vop_whiteout_args /* {
                struct vnode                *a_dvp;
                struct componentname        *a_cnp;
                int                        a_flags;
        } */ *ap = v;
        struct vnode                *dvp = ap->a_dvp;
        struct componentname        *cnp = ap->a_cnp;
        struct direct                *newdir;
        int                        error;
        struct ufsmount                *ump = VFSTOUFS(dvp->v_mount);
        struct ufs_lookup_results *ulr;

        /* XXX should handle this material another way */
        ulr = &VTOI(dvp)->i_crap;
        UFS_CHECK_CRAPCOUNTER(VTOI(dvp));

        error = 0;
        switch (ap->a_flags) {
        case LOOKUP:
                /* 4.4 format directories support whiteout operations */
                if (ump->um_maxsymlinklen > 0)
                        return (0);
                return (EOPNOTSUPP);

        case CREATE:
                /* create a new directory whiteout */
                error = UFS_WAPBL_BEGIN(dvp->v_mount);
                if (error)
                        break;

                KASSERTMSG((ump->um_maxsymlinklen > 0),
                    "ufs_whiteout: old format filesystem");

                newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
                newdir->d_ino = UFS_WINO;
                newdir->d_namlen = cnp->cn_namelen;
                memcpy(newdir->d_name, cnp->cn_nameptr,
                    (size_t)cnp->cn_namelen);

                /* NUL terminate and zero out padding */
                memset(&newdir->d_name[cnp->cn_namelen], 0,
                    UFS_NAMEPAD(cnp->cn_namelen));

                newdir->d_type = DT_WHT;
                error = ufs_direnter(dvp, ulr, NULL, newdir, cnp, NULL);
                pool_cache_put(ufs_direct_cache, newdir);
                break;

        case DELETE:
                /* remove an existing directory whiteout */
                error = UFS_WAPBL_BEGIN(dvp->v_mount);
                if (error)
                        break;

                KASSERTMSG((ump->um_maxsymlinklen > 0),
                    "ufs_whiteout: old format filesystem");

                cnp->cn_flags &= ~DOWHITEOUT;
                error = ufs_dirremove(dvp, ulr, NULL, cnp->cn_flags, 0);
                break;
        default:
                panic("ufs_whiteout: unknown op");
                /* NOTREACHED */
        }
        UFS_WAPBL_END(dvp->v_mount);
        return (error);
}

#ifdef UFS_ACL
static int
ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp,
    mode_t dmode, kauth_cred_t cred, struct lwp *l)
{
        int error;
        struct inode *ip = VTOI(tvp);
        struct acl *dacl, *acl;

        acl = acl_alloc(KM_SLEEP);
        dacl = acl_alloc(KM_SLEEP);

        /*
         * Retrieve default ACL from parent, if any.
         */
        error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred);
        switch (error) {
        case 0:
                /*
                 * Retrieved a default ACL, so merge mode and ACL if
                 * necessary.  If the ACL is empty, fall through to
                 * the "not defined or available" case.
                 */
                if (acl->acl_cnt != 0) {
                        dmode = acl_posix1e_newfilemode(dmode, acl);
                        ip->i_mode = dmode;
                        DIP_ASSIGN(ip, mode, dmode);
                        *dacl = *acl;
                        ufs_sync_acl_from_inode(ip, acl);
                        break;
                }
                /* FALLTHROUGH */

        case EOPNOTSUPP:
                /*
                 * Just use the mode as-is.
                 */
                ip->i_mode = dmode;
                DIP_ASSIGN(ip, mode, dmode);
                error = 0;
                goto out;
        
        default:
                goto out;
        }

        /*
         * XXX: If we abort now, will Soft Updates notify the extattr
         * code that the EAs for the file need to be released?
         */
        UFS_WAPBL_END(tvp->v_mount);
        error = ufs_setacl_posix1e(tvp, ACL_TYPE_ACCESS, acl, cred, l);
        if (error == 0)
                error = ufs_setacl_posix1e(tvp, ACL_TYPE_DEFAULT, dacl, cred,
                    l);
        UFS_WAPBL_BEGIN(tvp->v_mount);
        switch (error) {
        case 0:
                break;

        case EOPNOTSUPP:
                /*
                 * XXX: This should not happen, as EOPNOTSUPP above
                 * was supposed to free acl.
                 */
                printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n");
                /*
                panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()");
                 */
                break;

        default:
                goto out;
        }

out:
        acl_free(acl);
        acl_free(dacl);

        return (error);
}

static int
ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp,
    mode_t mode, kauth_cred_t cred, struct lwp *l)
{
        int error;
        struct inode *ip = VTOI(tvp);
        struct acl *acl;

        acl = acl_alloc(KM_SLEEP);

        /*
         * Retrieve default ACL for parent, if any.
         */
        error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred);
        switch (error) {
        case 0:
                /*
                 * Retrieved a default ACL, so merge mode and ACL if
                 * necessary.
                 */
                if (acl->acl_cnt != 0) {
                        /*
                         * Two possible ways for default ACL to not
                         * be present.  First, the EA can be
                         * undefined, or second, the default ACL can
                         * be blank.  If it's blank, fall through to
                         * the it's not defined case.
                         */
                        mode = acl_posix1e_newfilemode(mode, acl);
                        ip->i_mode = mode;
                        DIP_ASSIGN(ip, mode, mode);
                        ufs_sync_acl_from_inode(ip, acl);
                        break;
                }
                /* FALLTHROUGH */

        case EOPNOTSUPP:
                /*
                 * Just use the mode as-is.
                 */
                ip->i_mode = mode;
                DIP_ASSIGN(ip, mode, mode);
                error = 0;
                goto out;

        default:
                goto out;
        }

        UFS_WAPBL_END(tvp->v_mount);
        /*
         * XXX: If we abort now, will Soft Updates notify the extattr
         * code that the EAs for the file need to be released?
         */
        error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred);
        UFS_WAPBL_BEGIN(tvp->v_mount);
        switch (error) {
        case 0:
                break;

        case EOPNOTSUPP:
                /*
                 * XXX: This should not happen, as EOPNOTSUPP above was
                 * supposed to free acl.
                 */
                printf("%s: VOP_GETACL() but no VOP_SETACL()\n", __func__);
                /* panic("%s: VOP_GETACL() but no VOP_SETACL()", __func__); */
                break;

        default:
                goto out;
        }

out:
        acl_free(acl);

        return (error);
}

static int
ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp,
    mode_t child_mode, kauth_cred_t cred, struct lwp *l)
{
        int error;
        struct acl *parent_aclp, *child_aclp;

        parent_aclp = acl_alloc(KM_SLEEP);
        child_aclp = acl_alloc(KM_SLEEP);

        error = ufs_getacl_nfs4_internal(dvp, parent_aclp, l);
        if (error)
                goto out;
        acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp,
            child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR);
        error = ufs_setacl_nfs4_internal(tvp, child_aclp, l, false);
        if (error)
                goto out;
out:
        acl_free(parent_aclp);
        acl_free(child_aclp);

        return (error);
}
#endif

int
ufs_mkdir(void *v)
{
        struct vop_mkdir_v3_args /* {
                struct vnode                *a_dvp;
                struct vnode                **a_vpp;
                struct componentname        *a_cnp;
                struct vattr                *a_vap;
        } */ *ap = v;
        struct vnode                *dvp = ap->a_dvp, *tvp;
        struct vattr                *vap = ap->a_vap;
        struct componentname        *cnp = ap->a_cnp;
        struct inode                *ip, *dp = VTOI(dvp);
        struct buf                *bp;
        struct dirtemplate        dirtemplate;
        struct direct                *newdir;
        int                        error;
        struct ufsmount                *ump = dp->i_ump;
        int                        dirblksiz = ump->um_dirblksiz;
        struct ufs_lookup_results *ulr;

        /* XXX should handle this material another way */
        ulr = &dp->i_crap;
        UFS_CHECK_CRAPCOUNTER(dp);

        KASSERT(vap->va_type == VDIR);

        if ((nlink_t)dp->i_nlink >= LINK_MAX) {
                error = EMLINK;
                goto out;
        }
        /*
         * Must simulate part of ufs_makeinode here to acquire the inode,
         * but not have it entered in the parent directory. The entry is
         * made later after writing "." and ".." entries.
         */
        error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, NULL,
            ap->a_vpp);
        if (error)
                goto out;
        error = vn_lock(*ap->a_vpp, LK_EXCLUSIVE);
        if (error) {
                vrele(*ap->a_vpp);
                *ap->a_vpp = NULL;
                goto out;
        }
        error = UFS_WAPBL_BEGIN(ap->a_dvp->v_mount);
        if (error) {
                vput(*ap->a_vpp);
                goto out;
        }

        tvp = *ap->a_vpp;
        ip = VTOI(tvp);
        ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
        ip->i_nlink = 2;
        DIP_ASSIGN(ip, nlink, 2);
        if (cnp->cn_flags & ISWHITEOUT) {
                ip->i_flags |= UF_OPAQUE;
                DIP_ASSIGN(ip, flags, ip->i_flags);
        }

        /*
         * Bump link count in parent directory to reflect work done below.
         * Should be done before reference is created so cleanup is
         * possible if we crash.
         */
        dp->i_nlink++;
        DIP_ASSIGN(dp, nlink, dp->i_nlink);
        dp->i_flag |= IN_CHANGE;
        if ((error = UFS_UPDATE(dvp, NULL, NULL, UPDATE_DIROP)) != 0)
                goto bad;

#ifdef UFS_ACL
        mode_t dmode = (vap->va_mode & 0777) | IFDIR;
        struct lwp *l = curlwp;
        if (dvp->v_mount->mnt_flag & MNT_POSIX1EACLS) {

                error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode,
                    cnp->cn_cred, l);
                if (error)
                        goto bad;
        } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
                error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode,
                    cnp->cn_cred, l);
                if (error)
                        goto bad;
        }
#endif /* !UFS_ACL */

        /*
         * Initialize directory with "." and ".." from static template.
         */
        dirtemplate = mastertemplate;
        dirtemplate.dotdot_reclen = dirblksiz - dirtemplate.dot_reclen;
        dirtemplate.dot_ino = ufs_rw32(ip->i_number, UFS_MPNEEDSWAP(ump));
        dirtemplate.dotdot_ino = ufs_rw32(dp->i_number, UFS_MPNEEDSWAP(ump));
        dirtemplate.dot_reclen = ufs_rw16(dirtemplate.dot_reclen,
            UFS_MPNEEDSWAP(ump));
        dirtemplate.dotdot_reclen = ufs_rw16(dirtemplate.dotdot_reclen,
            UFS_MPNEEDSWAP(ump));
        if (ump->um_maxsymlinklen <= 0) {
#if BYTE_ORDER == LITTLE_ENDIAN
                if (UFS_MPNEEDSWAP(ump) == 0)
#else
                if (UFS_MPNEEDSWAP(ump) != 0)
#endif
                {
                        dirtemplate.dot_type = dirtemplate.dot_namlen;
                        dirtemplate.dotdot_type = dirtemplate.dotdot_namlen;
                        dirtemplate.dot_namlen = dirtemplate.dotdot_namlen = 0;
                } else
                        dirtemplate.dot_type = dirtemplate.dotdot_type = 0;
        }
        if ((error = UFS_BALLOC(tvp, (off_t)0, dirblksiz, cnp->cn_cred,
            B_CLRBUF, &bp)) != 0)
                goto bad;
        ip->i_size = dirblksiz;
        DIP_ASSIGN(ip, size, dirblksiz);
        ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
        uvm_vnp_setsize(tvp, ip->i_size);
        memcpy((void *)bp->b_data, (void *)&dirtemplate, sizeof dirtemplate);

        /*
         * Directory set up, now install its entry in the parent directory.
         * We must write out the buffer containing the new directory body
         * before entering the new name in the parent.
         */
        if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0)
                goto bad;
        if ((error = UFS_UPDATE(tvp, NULL, NULL, UPDATE_DIROP)) != 0) {
                goto bad;
        }
        newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
        ufs_makedirentry(ip, cnp, newdir);
        error = ufs_direnter(dvp, ulr, tvp, newdir, cnp, bp);
        pool_cache_put(ufs_direct_cache, newdir);
 bad:
        if (error == 0) {
                VOP_UNLOCK(tvp);
                UFS_WAPBL_END(dvp->v_mount);
        } else {
                dp->i_nlink--;
                DIP_ASSIGN(dp, nlink, dp->i_nlink);
                dp->i_flag |= IN_CHANGE;
                UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
                /*
                 * No need to do an explicit UFS_TRUNCATE here, vrele will
                 * do this for us because we set the link count to 0.
                 */
                ip->i_nlink = 0;
                DIP_ASSIGN(ip, nlink, 0);
                ip->i_flag |= IN_CHANGE;
                UFS_WAPBL_UPDATE(tvp, NULL, NULL, UPDATE_DIROP);
                UFS_WAPBL_END(dvp->v_mount);
                vput(tvp);
        }
 out:
        return (error);
}

int
ufs_rmdir(void *v)
{
        struct vop_rmdir_v2_args /* {
                struct vnode                *a_dvp;
                struct vnode                *a_vp;
                struct componentname        *a_cnp;
        } */ *ap = v;
        struct vnode                *vp, *dvp;
        struct componentname        *cnp;
        struct inode                *ip, *dp;
        int                        error;
        struct ufs_lookup_results *ulr;

        vp = ap->a_vp;
        dvp = ap->a_dvp;
        cnp = ap->a_cnp;
        ip = VTOI(vp);
        dp = VTOI(dvp);

#ifdef UFS_ACL
#ifdef notyet
        /* We don't do this because if the filesystem is mounted without ACLs
         * this goes through vfs_unixify_accmode() and we get EPERM.
         */
        error = VOP_ACCESSX(vp, VDELETE, cnp->cn_cred);
        if (error)
                goto err;
#endif
#endif

        /* XXX should handle this material another way */
        ulr = &dp->i_crap;
        UFS_CHECK_CRAPCOUNTER(dp);

        /*
         * No rmdir "." or of mounted directories please.
         */
        if (dp == ip || vp->v_mountedhere != NULL) {
                error = EINVAL;
                goto err;
        }

        /*
         * Do not remove a directory that is in the process of being renamed.
         * Verify that the directory is empty (and valid). (Rmdir ".." won't
         * be valid since ".." will contain a reference to the current
         * directory and thus be non-empty.)
         */
        error = 0;
        if (ip->i_nlink != 2 ||
            !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
                error = ENOTEMPTY;
                goto out;
        }
        if ((dp->i_flags & APPEND) ||
                (ip->i_flags & (IMMUTABLE | APPEND))) {
                error = EPERM;
                goto out;
        }
        error = UFS_WAPBL_BEGIN(dvp->v_mount);
        if (error)
                goto out;
        /*
         * Delete reference to directory before purging
         * inode.  If we crash in between, the directory
         * will be reattached to lost+found,
         */
        error = ufs_dirremove(dvp, ulr, ip, cnp->cn_flags, 1);
        if (error) {
                UFS_WAPBL_END(dvp->v_mount);
                goto out;
        }
        cache_purge(dvp);
        /*
         * Truncate inode.  The only stuff left in the directory is "." and
         * "..".  The "." reference is inconsequential since we're quashing
         * it.
         */
        dp->i_nlink--;
        DIP_ASSIGN(dp, nlink, dp->i_nlink);
        dp->i_flag |= IN_CHANGE;
        UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP);
        ip->i_nlink--;
        DIP_ASSIGN(ip, nlink, ip->i_nlink);
        ip->i_flag |= IN_CHANGE;
        (void) UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred);
        cache_purge(vp);
        /*
         * Unlock the log while we still have reference to unlinked
         * directory vp so that it will not get locked for recycling
         */
        UFS_WAPBL_END(dvp->v_mount);
#ifdef UFS_DIRHASH
        if (ip->i_dirhash != NULL)
                ufsdirhash_free(ip);
#endif
 out:
        vput(vp);
        return error;
 err:
        if (dp == ip)
                vrele(vp);
        else
                vput(vp);
        return error;
}

/*
 * symlink -- make a symbolic link
 */
int
ufs_symlink(void *v)
{
        struct vop_symlink_v3_args /* {
                struct vnode                *a_dvp;
                struct vnode                **a_vpp;
                struct componentname        *a_cnp;
                struct vattr                *a_vap;
                char                        *a_target;
        } */ *ap = v;
        struct vnode        *vp, **vpp;
        struct inode        *ip;
        int                len, error;
        struct ufs_lookup_results *ulr;

        vpp = ap->a_vpp;

        /* XXX should handle this material another way */
        ulr = &VTOI(ap->a_dvp)->i_crap;
        UFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp));

        /*
         * UFS_WAPBL_BEGIN(dvp->v_mount) performed by successful
         * ufs_makeinode
         */
        KASSERT(ap->a_vap->va_type == VLNK);
        error = ufs_makeinode(ap->a_vap, ap->a_dvp, ulr, vpp, ap->a_cnp);
        if (error)
                goto out;
        vp = *vpp;
        len = strlen(ap->a_target);
        ip = VTOI(vp);
        /*
         * This test is off by one. um_maxsymlinklen contains the
         * number of bytes available, and we aren't storing a \0, so
         * the test should properly be <=. However, it cannot be
         * changed as this would break compatibility with existing fs
         * images -- see the way ufs_readlink() works.
         */
        if (len < ip->i_ump->um_maxsymlinklen) {
                memcpy((char *)SHORTLINK(ip), ap->a_target, len);
                ip->i_size = len;
                DIP_ASSIGN(ip, size, len);
                uvm_vnp_setsize(vp, ip->i_size);
                ip->i_flag |= IN_CHANGE | IN_UPDATE;
                if (vp->v_mount->mnt_flag & MNT_RELATIME)
                        ip->i_flag |= IN_ACCESS;
                UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
        } else
                error = ufs_bufio(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
                    IO_NODELOCKED | IO_JOURNALLOCKED, ap->a_cnp->cn_cred, NULL,
                    NULL);
        UFS_WAPBL_END(ap->a_dvp->v_mount);
        VOP_UNLOCK(vp);
        if (error)
                vrele(vp);
out:
        return (error);
}

/*
 * Vnode op for reading directories.
 *
 * This routine handles converting from the on-disk directory format
 * "struct direct" to the in-memory format "struct dirent" as well as
 * byte swapping the entries if necessary.
 */
int
ufs_readdir(void *v)
{
        struct vop_readdir_args /* {
                struct vnode        *a_vp;
                struct uio        *a_uio;
                kauth_cred_t        a_cred;
                int                *a_eofflag;
                off_t                **a_cookies;
                int                *a_ncookies;
        } */ *ap = v;

        /* vnode and fs */
        struct vnode        *vp = ap->a_vp;
        struct ufsmount        *ump = VFSTOUFS(vp->v_mount);
        int nswap = UFS_MPNEEDSWAP(ump);
#if BYTE_ORDER == LITTLE_ENDIAN
        int needswap = ump->um_maxsymlinklen <= 0 && nswap == 0;
#else
        int needswap = ump->um_maxsymlinklen <= 0 && nswap != 0;
#endif
        /* caller's buffer */
        struct uio        *calleruio = ap->a_uio;
        off_t                startoffset, endoffset;
        size_t                callerbytes;
        off_t                curoffset;
        /* dirent production buffer */
        char                *direntbuf;
        size_t                direntbufmax;
        struct dirent        *dirent, *stopdirent;
        /* output cookies array */
        off_t                *cookies;
        size_t                numcookies, maxcookies;
        /* disk buffer */
        off_t                physstart, physend;
        size_t                skipstart, dropend;
        char                *rawbuf;
        size_t                rawbufmax, rawbytes;
        struct uio        rawuio;
        struct iovec        rawiov;
        struct direct        *rawdp, *stoprawdp;
        /* general */
        int                error;

        KASSERT(VOP_ISLOCKED(vp));

        /*
         * Figure out where the user wants us to read and how much.
         *
         * XXX: there should probably be an upper bound on callerbytes
         * to avoid silliness trying to do large kernel allocations.
         */
        callerbytes = calleruio->uio_resid;
        startoffset = calleruio->uio_offset;
        endoffset = startoffset + callerbytes;

        if (callerbytes < _DIRENT_MINSIZE(dirent)) {
                /* no room for even one struct dirent */
                return EINVAL;
        }

        /*
         * Now figure out where to actually start reading. Round the
         * start down to a block boundary: we need to start at the
         * beginning of a block in order to read the directory
         * correctly.
         *
         * We also want to always read a whole number of blocks so
         * that the copying code below doesn't have to worry about
         * partial entries. (It used to try at one point, and was a
         * horrible mess.)
         *
         * Furthermore, since blocks have to be scanned from the
         * beginning, if we go partially into another block now we'll
         * just have to rescan it on the next readdir call, which
         * doesn't really serve any useful purpose.
         *
         * So, round down the end as well. It's ok to underpopulate
         * the transfer buffer, as long as we send back at least one
         * dirent so as to avoid giving a bogus EOF indication.
         *
         * Note that because dirents are larger than ffs struct
         * directs, despite the rounding down we may not be able to
         * send all the entries in the blocks we read and may have to
         * rescan some of them on the next call anyway. Alternatively
         * if there's empty space on disk we might have actually been
         * able to fit the next block in, and so forth. None of this
         * actually matters that much in practice.
         *
         * XXX: what does ffs do if a directory block becomes
         * completely empty, and what happens if all the blocks we
         * read are completely empty even though we aren't at EOF? As
         * of this writing I (dholland) can't remember the details.
         */
        physstart = rounddown2(startoffset, ump->um_dirblksiz);
        physend = rounddown2(endoffset, ump->um_dirblksiz);

        if (physstart >= physend) {
                /* Need at least one block */
                return EINVAL;
        }

        /*
         * skipstart is the number of bytes we need to read in
         * (because we need to start at the beginning of a block) but
         * not transfer to the user.
         *
         * dropend is the number of bytes to ignore at the end of the
         * user's buffer.
         */
        skipstart = startoffset - physstart;
        dropend = endoffset - physend;

        /*
         * Make a transfer buffer.
         *
         * Note: rawbufmax = physend - physstart. Proof:
         *
         * physend - physstart = physend - physstart
         *   = physend - physstart + startoffset - startoffset
         *   = physend + (startoffset - physstart) - startoffset
         *   = physend + skipstart - startoffset
         *   = physend + skipstart - startoffset + endoffset - endoffset
         *   = skipstart - startoffset + endoffset - (endoffset - physend)
         *   = skipstart - startoffset + endoffset - dropend
         *   = skipstart - startoffset + (startoffset + callerbytes) - dropend
         *   = skipstart + callerbytes - dropend
         *   = rawbufmax
         * Qed.
         *
         * XXX: this should just use physend - physstart.
         *
         * XXX: this should be rewritten to read the directs straight
         * out of bufferio buffers instead of copying twice. This would
         * also let us adapt better to the user's buffer size.
         */

        /* Base buffer space for CALLERBYTES of new data */
        rawbufmax = callerbytes + skipstart;
        if (rawbufmax < callerbytes)
                return EINVAL;
        rawbufmax -= dropend;

        if (rawbufmax < _DIRENT_MINSIZE(rawdp)) {
                /* no room for even one struct direct */
                return EINVAL;
        }

        /* read it */
        rawbuf = kmem_alloc(rawbufmax, KM_SLEEP);
        rawiov.iov_base = rawbuf;
        rawiov.iov_len = rawbufmax;
        rawuio.uio_iov = &rawiov;
        rawuio.uio_iovcnt = 1;
        rawuio.uio_offset = physstart;
        rawuio.uio_resid = rawbufmax;
        UIO_SETUP_SYSSPACE(&rawuio);
        rawuio.uio_rw = UIO_READ;
        error = UFS_BUFRD(vp, &rawuio, 0, ap->a_cred);
        if (error != 0) {
                kmem_free(rawbuf, rawbufmax);
                return error;
        }
        rawbytes = rawbufmax - rawuio.uio_resid;

        /* the raw entries to iterate over */
        rawdp = (struct direct *)(void *)rawbuf;
        stoprawdp = (struct direct *)(void *)&rawbuf[rawbytes];

        /* allocate space to produce dirents into */
        direntbufmax = callerbytes;
        direntbuf = kmem_alloc(direntbufmax, KM_SLEEP);

        /* the dirents to iterate over */
        dirent = (struct dirent *)(void *)direntbuf;
        stopdirent = (struct dirent *)(void *)&direntbuf[direntbufmax];

        /* the output "cookies" (seek positions of directory entries) */
        if (ap->a_cookies) {
                numcookies = 0;
                maxcookies = rawbytes / _DIRENT_RECLEN(rawdp, 1);
                cookies = malloc(maxcookies * sizeof(*cookies),
                    M_TEMP, M_WAITOK);
        } else {
                /* XXX: GCC */
                maxcookies = 0;
                cookies = NULL;
        }

        /* now produce the dirents */
        curoffset = calleruio->uio_offset;
        while (rawdp < stoprawdp) {
                rawdp->d_reclen = ufs_rw16(rawdp->d_reclen, nswap);
                if (skipstart > 0) {
                        /* drain skipstart */
                        if (rawdp->d_reclen <= skipstart) {
                                skipstart -= rawdp->d_reclen;
                                rawdp = _DIRENT_NEXT(rawdp);
                                continue;
                        }
                        /* caller's start position wasn't on an entry */
                        error = EINVAL;
                        goto out;
                }
                if (rawdp->d_reclen == 0) {
                        struct dirent *save = dirent;
                        dirent->d_reclen = _DIRENT_MINSIZE(dirent);
                        dirent = _DIRENT_NEXT(dirent);
                        save->d_reclen = 0;
                        rawdp = stoprawdp;
                        break;
                }

                /* copy the header */
                if (needswap) {
                        dirent->d_type = rawdp->d_namlen;
                        dirent->d_namlen = rawdp->d_type;
                } else {
                        dirent->d_type = rawdp->d_type;
                        dirent->d_namlen = rawdp->d_namlen;
                }
                dirent->d_reclen = _DIRENT_RECLEN(dirent, dirent->d_namlen);

                /* stop if there isn't room for the name AND another header */
                if ((char *)(void *)dirent + dirent->d_reclen +
                    _DIRENT_MINSIZE(dirent) > (char *)(void *)stopdirent)
                        break;

                /* copy the name (and inode (XXX: why after the test?)) */
                dirent->d_fileno = ufs_rw32(rawdp->d_ino, nswap);
                (void)memcpy(dirent->d_name, rawdp->d_name, dirent->d_namlen);
                memset(&dirent->d_name[dirent->d_namlen], 0,
                    dirent->d_reclen - _DIRENT_NAMEOFF(dirent)
                    - dirent->d_namlen);

                /* onward */
                curoffset += rawdp->d_reclen;
                if (ap->a_cookies) {
                        KASSERT(numcookies < maxcookies);
                        cookies[numcookies++] = curoffset;
                }
                dirent = _DIRENT_NEXT(dirent);
                rawdp = _DIRENT_NEXT(rawdp);
        }

        /* transfer the dirents to the caller's buffer */
        callerbytes = ((char *)(void *)dirent - direntbuf);
        error = uiomove(direntbuf, callerbytes, calleruio);

out:
        calleruio->uio_offset = curoffset;
        if (ap->a_cookies) {
                if (error) {
                        free(cookies, M_TEMP);
                        *ap->a_cookies = NULL;
                        *ap->a_ncookies = 0;
                } else {
                        *ap->a_cookies = cookies;
                        *ap->a_ncookies = numcookies;
                }
        }
        kmem_free(direntbuf, direntbufmax);
        kmem_free(rawbuf, rawbufmax);
        *ap->a_eofflag = VTOI(vp)->i_size <= calleruio->uio_offset;
        return error;
}

/*
 * Return target name of a symbolic link
 */
int
ufs_readlink(void *v)
{
        struct vop_readlink_args /* {
                struct vnode        *a_vp;
                struct uio        *a_uio;
                kauth_cred_t        a_cred;
        } */ *ap = v;
        struct vnode        *vp = ap->a_vp;
        struct inode        *ip = VTOI(vp);
        struct ufsmount        *ump = VFSTOUFS(vp->v_mount);
        int                isize;

        /*
         * The test against um_maxsymlinklen is off by one; it should
         * theoretically be <=, not <. However, it cannot be changed
         * as that would break compatibility with existing fs images.
         */

        isize = ip->i_size;
        if (isize < ump->um_maxsymlinklen ||
            (ump->um_maxsymlinklen == 0 && DIP(ip, blocks) == 0)) {
                uiomove((char *)SHORTLINK(ip), isize, ap->a_uio);
                return (0);
        }
        return (UFS_BUFRD(vp, ap->a_uio, 0, ap->a_cred));
}

/*
 * Calculate the logical to physical mapping if not done already,
 * then call the device strategy routine.
 */
int
ufs_strategy(void *v)
{
        struct vop_strategy_args /* {
                struct vnode *a_vp;
                struct buf *a_bp;
        } */ *ap = v;
        struct buf        *bp;
        struct vnode        *vp;
        struct inode        *ip;
        struct mount        *mp;
        int                error;

        bp = ap->a_bp;
        vp = ap->a_vp;
        ip = VTOI(vp);
        if (vp->v_type == VBLK || vp->v_type == VCHR)
                panic("ufs_strategy: spec");
        KASSERT(fstrans_held(vp->v_mount));
        KASSERT(bp->b_bcount != 0);
        if (bp->b_blkno == bp->b_lblkno) {
                error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
                                 NULL);
                if (error) {
                        bp->b_error = error;
                        biodone(bp);
                        return (error);
                }
                if (bp->b_blkno == -1) /* no valid data */
                        clrbuf(bp);
        }
        if (bp->b_blkno < 0) { /* block is not on disk */
                biodone(bp);
                return (0);
        }
        vp = ip->i_devvp;

        error = VOP_STRATEGY(vp, bp);
        if (error)
                return error;

        if (!BUF_ISREAD(bp))
                return 0;

        mp = wapbl_vptomp(vp);
        if (mp == NULL || mp->mnt_wapbl_replay == NULL ||
            !WAPBL_REPLAY_ISOPEN(mp) ||
            !WAPBL_REPLAY_CAN_READ(mp, bp->b_blkno, bp->b_bcount))
                return 0;

        error = biowait(bp);
        if (error)
                return error;

        error = WAPBL_REPLAY_READ(mp, bp->b_data, bp->b_blkno, bp->b_bcount);
        if (error) {
                mutex_enter(&bufcache_lock);
                SET(bp->b_cflags, BC_INVAL);
                mutex_exit(&bufcache_lock);
        }
        return error;
}

/*
 * Print out the contents of an inode.
 */
int
ufs_print(void *v)
{
        struct vop_print_args /* {
                struct vnode        *a_vp;
        } */ *ap = v;
        struct vnode        *vp;
        struct inode        *ip;

        vp = ap->a_vp;
        ip = VTOI(vp);
        printf("tag VT_UFS, ino %llu, on dev %llu, %llu",
            (unsigned long long)ip->i_number,
            (unsigned long long)major(ip->i_dev),
            (unsigned long long)minor(ip->i_dev));
        printf(" flags 0x%x, nlink %d\n",
            ip->i_flag, ip->i_nlink);
        printf("\tmode 0%o, owner %d, group %d, size %qd",
            ip->i_mode, ip->i_uid, ip->i_gid,
            (long long)ip->i_size);
        if (vp->v_type == VFIFO)
                VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
        printf("\n");
        return (0);
}

/*
 * Read wrapper for special devices.
 */
int
ufsspec_read(void *v)
{
        struct vop_read_args /* {
                struct vnode        *a_vp;
                struct uio        *a_uio;
                int                a_ioflag;
                kauth_cred_t        a_cred;
        } */ *ap = v;

        /*
         * Set access flag.
         */
        if ((ap->a_vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)
                VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
        return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap));
}

/*
 * Write wrapper for special devices.
 */
int
ufsspec_write(void *v)
{
        struct vop_write_args /* {
                struct vnode        *a_vp;
                struct uio        *a_uio;
                int                a_ioflag;
                kauth_cred_t        a_cred;
        } */ *ap = v;

        /*
         * Set update and change flags.
         */
        if ((ap->a_vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)
                VTOI(ap->a_vp)->i_flag |= IN_MODIFY;
        return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap));
}

/*
 * Close wrapper for special devices.
 *
 * Update the times on the inode then do device close.
 */
int
ufsspec_close(void *v)
{
        struct vop_close_args /* {
                struct vnode        *a_vp;
                int                a_fflag;
                kauth_cred_t        a_cred;
        } */ *ap = v;
        struct vnode        *vp;

        vp = ap->a_vp;
        if (vrefcnt(vp) > 1)
                UFS_ITIMES(vp, NULL, NULL, NULL);
        return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
}

/*
 * Read wrapper for fifo's
 */
int
ufsfifo_read(void *v)
{
        struct vop_read_args /* {
                struct vnode        *a_vp;
                struct uio        *a_uio;
                int                a_ioflag;
                kauth_cred_t        a_cred;
        } */ *ap = v;

        /*
         * Set access flag.
         */
        VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
        return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_read), ap));
}

/*
 * Write wrapper for fifo's.
 */
int
ufsfifo_write(void *v)
{
        struct vop_write_args /* {
                struct vnode        *a_vp;
                struct uio        *a_uio;
                int                a_ioflag;
                kauth_cred_t        a_cred;
        } */ *ap = v;

        /*
         * Set update and change flags.
         */
        VTOI(ap->a_vp)->i_flag |= IN_MODIFY;
        return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_write), ap));
}

/*
 * Close wrapper for fifo's.
 *
 * Update the times on the inode then do device close.
 */
int
ufsfifo_close(void *v)
{
        struct vop_close_args /* {
                struct vnode        *a_vp;
                int                a_fflag;
                kauth_cred_t        a_cred;
        } */ *ap = v;
        struct vnode        *vp;

        vp = ap->a_vp;
        if (vrefcnt(ap->a_vp) > 1)
                UFS_ITIMES(vp, NULL, NULL, NULL);
        return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
}

/*
 * Return POSIX pathconf information applicable to ufs filesystems.
 */
int
ufs_pathconf(void *v)
{
        struct vop_pathconf_args /* {
                struct vnode        *a_vp;
                int                a_name;
                register_t        *a_retval;
        } */ *ap = v;

        switch (ap->a_name) {
        case _PC_LINK_MAX:
                *ap->a_retval = LINK_MAX;
                return (0);
        case _PC_NAME_MAX:
                *ap->a_retval = FFS_MAXNAMLEN;
                return (0);
        case _PC_PATH_MAX:
                *ap->a_retval = PATH_MAX;
                return (0);
        case _PC_PIPE_BUF:
                *ap->a_retval = PIPE_BUF;
                return (0);
        case _PC_CHOWN_RESTRICTED:
                *ap->a_retval = 1;
                return (0);
        case _PC_NO_TRUNC:
                *ap->a_retval = 1;
                return (0);
#ifdef UFS_ACL
        case _PC_ACL_EXTENDED:
                if (ap->a_vp->v_mount->mnt_flag & MNT_POSIX1EACLS)
                        *ap->a_retval = 1;
                else
                        *ap->a_retval = 0;
                return 0;
        case _PC_ACL_NFS4:
                if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS)
                        *ap->a_retval = 1;
                else
                        *ap->a_retval = 0;
                return 0;
#endif
        case _PC_ACL_PATH_MAX:
#ifdef UFS_ACL
                if (ap->a_vp->v_mount->mnt_flag & (MNT_POSIX1EACLS | MNT_NFS4ACLS))
                        *ap->a_retval = ACL_MAX_ENTRIES;
                else
                        *ap->a_retval = 3;
#else
                *ap->a_retval = 3;
#endif
                return 0;
        case _PC_SYNC_IO:
                *ap->a_retval = 1;
                return (0);
        case _PC_FILESIZEBITS:
                *ap->a_retval = 42;
                return (0);
        case _PC_SYMLINK_MAX:
                *ap->a_retval = MAXPATHLEN;
                return (0);
        case _PC_2_SYMLINKS:
                *ap->a_retval = 1;
                return (0);
        default:
                return (EINVAL);
        }
        /* NOTREACHED */
}

/*
 * Advisory record locking support
 */
int
ufs_advlock(void *v)
{
        struct vop_advlock_args /* {
                struct vnode        *a_vp;
                void *                a_id;
                int                a_op;
                struct flock        *a_fl;
                int                a_flags;
        } */ *ap = v;
        struct inode *ip;

        ip = VTOI(ap->a_vp);
        return lf_advlock(ap, &ip->i_lockf, ip->i_size);
}

/*
 * Initialize the vnode associated with a new inode, handle aliased
 * vnodes.
 */
void
ufs_vinit(struct mount *mntp, int (**specops)(void *), int (**fifoops)(void *),
        struct vnode **vpp)
{
        struct timeval        tv;
        struct inode        *ip;
        struct vnode        *vp;
        dev_t                rdev;
        struct ufsmount        *ump;

        vp = *vpp;
        ip = VTOI(vp);
        switch(vp->v_type = IFTOVT(ip->i_mode)) {
        case VCHR:
        case VBLK:
                vp->v_op = specops;
                ump = ip->i_ump;
                if (ump->um_fstype == UFS1)
                        rdev = (dev_t)ufs_rw32(ip->i_ffs1_rdev,
                            UFS_MPNEEDSWAP(ump));
                else
                        rdev = (dev_t)ufs_rw64(ip->i_ffs2_rdev,
                            UFS_MPNEEDSWAP(ump));
                spec_node_init(vp, rdev);
                break;
        case VFIFO:
                vp->v_op = fifoops;
                break;
        case VNON:
        case VBAD:
        case VSOCK:
        case VLNK:
        case VDIR:
        case VREG:
                break;
        }
        if (ip->i_number == UFS_ROOTINO)
                vp->v_vflag |= VV_ROOT;
        /*
         * Initialize modrev times
         */
        getmicrouptime(&tv);
        ip->i_modrev = (uint64_t)(uint)tv.tv_sec << 32
                        | tv.tv_usec * 4294u;
        *vpp = vp;
}

/*
 * Allocate a new inode.
 */
static int
ufs_makeinode(struct vattr *vap, struct vnode *dvp,
        const struct ufs_lookup_results *ulr,
        struct vnode **vpp, struct componentname *cnp)
{
        struct inode        *ip;
        struct direct        *newdir;
        struct vnode        *tvp;
        int                error;

        UFS_WAPBL_JUNLOCK_ASSERT(dvp->v_mount);

        error = vcache_new(dvp->v_mount, dvp, vap, cnp->cn_cred, NULL, &tvp);
        if (error)
                return error;
        error = vn_lock(tvp, LK_EXCLUSIVE);
        if (error) {
                vrele(tvp);
                return error;
        }
        *vpp = tvp;
        ip = VTOI(tvp);
        error = UFS_WAPBL_BEGIN(dvp->v_mount);
        if (error) {
                vput(tvp);
                return (error);
        }
        ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
        ip->i_nlink = 1;
        DIP_ASSIGN(ip, nlink, 1);

        /* Authorize setting SGID if needed. */
        if (ip->i_mode & ISGID) {
                error = kauth_authorize_vnode(cnp->cn_cred,
                    KAUTH_VNODE_WRITE_SECURITY,
                    tvp, NULL, genfs_can_chmod(tvp, cnp->cn_cred, ip->i_uid,
                    ip->i_gid, MAKEIMODE(vap->va_type, vap->va_mode)));
                if (error) {
                        ip->i_mode &= ~ISGID;
                        DIP_ASSIGN(ip, mode, ip->i_mode);
                }
        }

        if (cnp->cn_flags & ISWHITEOUT) {
                ip->i_flags |= UF_OPAQUE;
                DIP_ASSIGN(ip, flags, ip->i_flags);
        }

        /*
         * Make sure inode goes to disk before directory entry.
         */
        if ((error = UFS_UPDATE(tvp, NULL, NULL, UPDATE_DIROP)) != 0)
                goto bad;
#ifdef UFS_ACL
        struct lwp *l = curlwp;
        if (dvp->v_mount->mnt_flag & MNT_POSIX1EACLS) {
                error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp,
                    ip->i_mode, cnp->cn_cred, l);
                if (error)
                        goto bad;
        } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
                error = ufs_do_nfs4_acl_inheritance(dvp, tvp, ip->i_mode,
                    cnp->cn_cred, l);
                if (error)
                        goto bad;
        }
#endif /* !UFS_ACL */
        newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK);
        ufs_makedirentry(ip, cnp, newdir);
        error = ufs_direnter(dvp, ulr, tvp, newdir, cnp, NULL);
        pool_cache_put(ufs_direct_cache, newdir);
        if (error)
                goto bad;
        *vpp = tvp;
        cache_enter(dvp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_flags);
        return (0);

 bad:
        /*
         * Write error occurred trying to update the inode
         * or the directory so must deallocate the inode.
         */
        ip->i_nlink = 0;
        DIP_ASSIGN(ip, nlink, 0);
        ip->i_flag |= IN_CHANGE;
        UFS_WAPBL_UPDATE(tvp, NULL, NULL, 0);
        UFS_WAPBL_END(dvp->v_mount);
        vput(tvp);
        return (error);
}

/*
 * Allocate len bytes at offset off.
 */
int
ufs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
    kauth_cred_t cred)
{
        struct inode *ip = VTOI(vp);
        int error, delta, bshift, bsize;
        UVMHIST_FUNC("ufs_gop_alloc"); UVMHIST_CALLED(ubchist);

        error = 0;
        bshift = vp->v_mount->mnt_fs_bshift;
        bsize = 1 << bshift;

        delta = off & (bsize - 1);
        off -= delta;
        len += delta;

        while (len > 0) {
                bsize = MIN(bsize, len);

                error = UFS_BALLOC(vp, off, bsize, cred, flags, NULL);
                if (error) {
                        goto out;
                }

                /*
                 * increase file size now, UFS_BALLOC() requires that
                 * EOF be up-to-date before each call.
                 */

                if (ip->i_size < off + bsize) {
                        UVMHIST_LOG(ubchist, "vp %#jx old 0x%jx new 0x%x",
                            (uintptr_t)vp, ip->i_size, off + bsize, 0);
                        ip->i_size = off + bsize;
                        DIP_ASSIGN(ip, size, ip->i_size);
                }

                off += bsize;
                len -= bsize;
        }

out:
        UFS_WAPBL_UPDATE(vp, NULL, NULL, 0);
        return error;
}

void
ufs_gop_markupdate(struct vnode *vp, int flags)
{
        u_int32_t mask = 0;

        if ((flags & GOP_UPDATE_ACCESSED) != 0) {
                mask = IN_ACCESS;
        }
        if ((flags & GOP_UPDATE_MODIFIED) != 0) {
                if (vp->v_type == VREG) {
                        mask |= IN_CHANGE | IN_UPDATE;
                } else {
                        mask |= IN_MODIFY;
                }
        }
        if (mask) {
                struct inode *ip = VTOI(vp);

                ip->i_flag |= mask;
        }
}

int
ufs_bufio(enum uio_rw rw, struct vnode *vp, void *buf, size_t len, off_t off,
    int ioflg, kauth_cred_t cred, size_t *aresid, struct lwp *l)
{
        struct iovec iov;
        struct uio uio;
        int error;

        KASSERT(ISSET(ioflg, IO_NODELOCKED));
        KASSERT(VOP_ISLOCKED(vp));
        KASSERT(rw != UIO_WRITE || VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
        KASSERT(rw != UIO_WRITE || vp->v_mount->mnt_wapbl == NULL ||
            ISSET(ioflg, IO_JOURNALLOCKED));

        iov.iov_base = buf;
        iov.iov_len = len;
        uio.uio_iov = &iov;
        uio.uio_iovcnt = 1;
        uio.uio_resid = len;
        uio.uio_offset = off;
        uio.uio_rw = rw;
        UIO_SETUP_SYSSPACE(&uio);

        switch (rw) {
        case UIO_READ:
                error = UFS_BUFRD(vp, &uio, ioflg, cred);
                break;
        case UIO_WRITE:
                error = UFS_BUFWR(vp, &uio, ioflg, cred);
                break;
        default:
                panic("invalid uio rw: %d", (int)rw);
        }

        if (aresid)
                *aresid = uio.uio_resid;
        else if (uio.uio_resid && error == 0)
                error = EIO;

        KASSERT(VOP_ISLOCKED(vp));
        KASSERT(rw != UIO_WRITE || VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
        return error;
}







































































































    9 












   35 










   76 






    3 
    3 

    1 





    4 
    3 


    1 






    2 



    2 










    3 
    3 


    3 






    6 





    4 



    2 




    1 









   48 




   19 

   28 








   63 






   18 





   25 






   25 











   71 
   70 
   59 








    7 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
/*        $NetBSD: sys_socket.c,v 1.79 2020/11/17 03:22:33 chs Exp $        */

/*-
 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)sys_socket.c        8.3 (Berkeley) 2/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_socket.c,v 1.79 2020/11/17 03:22:33 chs Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/systm.h>
#include <sys/file.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/kauth.h>

#include <net/if.h>
#include <net/route.h>

const struct fileops socketops = {
        .fo_name = "socket",
        .fo_read = soo_read,
        .fo_write = soo_write,
        .fo_ioctl = soo_ioctl,
        .fo_fcntl = fnullop_fcntl,
        .fo_poll = soo_poll,
        .fo_stat = soo_stat,
        .fo_close = soo_close,
        .fo_kqfilter = soo_kqfilter,
        .fo_restart = soo_restart,
};

int (*ifioctl)(struct socket *, u_long, void *, struct lwp *) = (void *)eopnotsupp;

/* ARGSUSED */
int
soo_read(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
         int flags)
{
        struct socket *so = fp->f_socket;
        int error;

        error = (*so->so_receive)(so, NULL, uio, NULL, NULL, NULL);

        return error;
}

/* ARGSUSED */
int
soo_write(file_t *fp, off_t *offset, struct uio *uio, kauth_cred_t cred,
          int flags)
{
        struct socket *so = fp->f_socket;
        int error;

        error = (*so->so_send)(so, NULL, uio, NULL, NULL, 0, curlwp);

        return error;
}

int
soo_ioctl(file_t *fp, u_long cmd, void *data)
{
        struct socket *so = fp->f_socket;
        int error = 0;

        switch (cmd) {

        case FIONBIO:
                solock(so);
                if (*(int *)data)
                        so->so_state |= SS_NBIO;
                else 
                        so->so_state &= ~SS_NBIO;
                sounlock(so);
                break;

        case FIOASYNC:
                solock(so);
                if (*(int *)data) {
                        so->so_rcv.sb_flags |= SB_ASYNC;
                        so->so_snd.sb_flags |= SB_ASYNC;
                } else {
                        so->so_rcv.sb_flags &= ~SB_ASYNC;
                        so->so_snd.sb_flags &= ~SB_ASYNC;
                }
                sounlock(so);
                break;

        case FIONREAD:
                *(int *)data = so->so_rcv.sb_cc;
                break;

        case FIONWRITE:
                *(int *)data = so->so_snd.sb_cc;
                break;

        case FIONSPACE:
                /*
                 * See the comment around sbspace()'s definition
                 * in sys/socketvar.h in face of counts about maximum
                 * to understand the following test. We detect overflow
                 * and return zero.
                 */
                solock(so);
                if ((so->so_snd.sb_hiwat < so->so_snd.sb_cc)
                    || (so->so_snd.sb_mbmax < so->so_snd.sb_mbcnt))
                        *(int *)data = 0;
                else
                        *(int *)data = sbspace(&so->so_snd);
                sounlock(so);
                break;

        case SIOCSPGRP:
        case FIOSETOWN:
        case TIOCSPGRP:
                error = fsetown(&so->so_pgid, cmd, data);
                break;

        case SIOCGPGRP:
        case FIOGETOWN:
        case TIOCGPGRP:
                error = fgetown(so->so_pgid, cmd, data);
                break;

        case SIOCATMARK:
                *(int *)data = (so->so_state&SS_RCVATMARK) != 0;
                break;

        case SIOCPEELOFF:
                solock(so);
                error = do_sys_peeloff(so, data);
                sounlock(so);
                break;

        default:
                /*
                 * Interface/routing/protocol specific ioctls:
                 * interface and routing ioctls should have a
                 * different entry since a socket's unnecessary
                 */
                if (IOCGROUP(cmd) == 'i')
                        /*
                         * KERNEL_LOCK will be held later if if_ioctl() of the
                         * interface isn't MP-safe.
                         */
                        error = ifioctl(so, cmd, data, curlwp);
                else {
                        KERNEL_LOCK(1, NULL);
                        error = (*so->so_proto->pr_usrreqs->pr_ioctl)(so,
                            cmd, data, NULL);
                        KERNEL_UNLOCK_ONE(NULL);
                }
                break;
        }


        return error;
}

int
soo_poll(file_t *fp, int events)
{

        return sopoll(fp->f_socket, events);
}

int
soo_stat(file_t *fp, struct stat *ub)
{
        struct socket *so = fp->f_socket;
        int error;

        memset(ub, 0, sizeof(*ub));
        ub->st_mode = S_IFSOCK;

        solock(so);
        error = (*so->so_proto->pr_usrreqs->pr_stat)(so, ub);
        sounlock(so);

        return error;
}

/* ARGSUSED */
int
soo_close(file_t *fp)
{
        int error = 0;

        if (fp->f_socket)
                error = soclose(fp->f_socket);
        fp->f_socket = NULL;

        return error;
}

void
soo_restart(file_t *fp)
{

        sorestart(fp->f_socket);
}













































   39 


















   44 







   41 







   62 

   62 


   61 
   61 


   55 
   55 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
/*        $NetBSD: genfs_vfsops.c,v 1.11 2022/07/08 07:42:06 hannken Exp $        */

/*-
 * Copyright (c) 2008, 2009, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: genfs_vfsops.c,v 1.11 2022/07/08 07:42:06 hannken Exp $");

#include <sys/types.h>
#include <sys/mount.h>
#include <sys/fstrans.h>
#include <sys/statvfs.h>
#include <sys/vnode.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/genfs/genfs_node.h>

int
genfs_statvfs(struct mount *mp, struct statvfs *sbp)
{

        sbp->f_bsize = DEV_BSIZE;
        sbp->f_frsize = DEV_BSIZE;
        sbp->f_iosize = DEV_BSIZE;
        sbp->f_blocks = 2;                /* 1k to keep df happy */
        sbp->f_bfree = 0;
        sbp->f_bavail = 0;
        sbp->f_bresvd = 0;
        sbp->f_files = 0;
        sbp->f_ffree = 0;
        sbp->f_favail = 0;
        sbp->f_fresvd = 0;
        copy_statvfs_info(sbp, mp);

        return 0;
}

int
genfs_renamelock_enter(struct mount *mp)
{
        mutex_enter(mp->mnt_renamelock);
        /* Preserve possible error return in case we become interruptible. */
        return 0;
}

void
genfs_renamelock_exit(struct mount *mp)
{
        mutex_exit(mp->mnt_renamelock);
}

int
genfs_suspendctl(struct mount *mp, int cmd)
{
        int error;

        switch (cmd) {
        case SUSPEND_SUSPEND:
                error = fstrans_setstate(mp, FSTRANS_SUSPENDING);
                if (error)
                        return error;
                error = fstrans_setstate(mp, FSTRANS_SUSPENDED);
                return error;

        case SUSPEND_RESUME:
                error = fstrans_setstate(mp, FSTRANS_NORMAL);
                KASSERT(error == 0);
                return 0;

        default:
                panic("%s: bogus command %d", __func__, cmd);
        }
}























































































































































































































































































































































































































































































































































































































































    3 


    2 





    4 


    2 






    1 




































    1 


































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
/*        $NetBSD: lfs_vfsops.c,v 1.382 2022/03/19 13:53:33 hannken Exp $        */

/*-
 * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007, 2007
 *     The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Konrad E. Schroder <perseant@hhhh.org>.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/*-
 * Copyright (c) 1989, 1991, 1993, 1994
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)lfs_vfsops.c        8.20 (Berkeley) 6/10/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.382 2022/03/19 13:53:33 hannken Exp $");

#if defined(_KERNEL_OPT)
#include "opt_lfs.h"
#include "opt_quota.h"
#include "opt_uvmhist.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/kthread.h>
#include <sys/buf.h>
#include <sys/device.h>
#include <sys/file.h>
#include <sys/disklabel.h>
#include <sys/ioctl.h>
#include <sys/errno.h>
#include <sys/malloc.h>
#include <sys/pool.h>
#include <sys/socket.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
#include <sys/conf.h>
#include <sys/kauth.h>
#include <sys/module.h>
#include <sys/syscallvar.h>
#include <sys/syscall.h>
#include <sys/syscallargs.h>

#include <miscfs/specfs/specdev.h>

#include <ufs/lfs/ulfs_quotacommon.h>
#include <ufs/lfs/ulfs_inode.h>
#include <ufs/lfs/ulfsmount.h>
#include <ufs/lfs/ulfs_bswap.h>
#include <ufs/lfs/ulfs_extern.h>

#ifdef UVMHIST
#include <uvm/uvm.h>
#endif
#include <uvm/uvm_extern.h>
#include <uvm/uvm_object.h>
#include <uvm/uvm_page.h>
#include <uvm/uvm_stat.h>

#include <ufs/lfs/lfs.h>
#include <ufs/lfs/lfs_accessors.h>
#include <ufs/lfs/lfs_kernel.h>
#include <ufs/lfs/lfs_extern.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/genfs/genfs_node.h>

MODULE(MODULE_CLASS_VFS, lfs, NULL);

static int lfs_gop_write(struct vnode *, struct vm_page **, int, int);
static int lfs_mountfs(struct vnode *, struct mount *, struct lwp *);
static int lfs_flushfiles(struct mount *, int);

extern const struct vnodeopv_desc lfs_vnodeop_opv_desc;
extern const struct vnodeopv_desc lfs_specop_opv_desc;
extern const struct vnodeopv_desc lfs_fifoop_opv_desc;

struct lwp * lfs_writer_daemon = NULL;
kcondvar_t lfs_writerd_cv;

int lfs_do_flush = 0;
#ifdef LFS_KERNEL_RFW
int lfs_do_rfw = 0;
#endif

const struct vnodeopv_desc * const lfs_vnodeopv_descs[] = {
        &lfs_vnodeop_opv_desc,
        &lfs_specop_opv_desc,
        &lfs_fifoop_opv_desc,
        NULL,
};

struct vfsops lfs_vfsops = {
        .vfs_name = MOUNT_LFS,
        .vfs_min_mount_data = sizeof (struct ulfs_args),
        .vfs_mount = lfs_mount,
        .vfs_start = ulfs_start,
        .vfs_unmount = lfs_unmount,
        .vfs_root = ulfs_root,
        .vfs_quotactl = ulfs_quotactl,
        .vfs_statvfs = lfs_statvfs,
        .vfs_sync = lfs_sync,
        .vfs_vget = lfs_vget,
        .vfs_loadvnode = lfs_loadvnode,
        .vfs_newvnode = lfs_newvnode,
        .vfs_fhtovp = lfs_fhtovp,
        .vfs_vptofh = lfs_vptofh,
        .vfs_init = lfs_init,
        .vfs_reinit = lfs_reinit,
        .vfs_done = lfs_done,
        .vfs_mountroot = lfs_mountroot,
        .vfs_snapshot = (void *)eopnotsupp,
        .vfs_extattrctl = lfs_extattrctl,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_renamelock_enter = genfs_renamelock_enter,
        .vfs_renamelock_exit = genfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = lfs_vnodeopv_descs
};

const struct genfs_ops lfs_genfsops = {
        .gop_size = lfs_gop_size,
        .gop_alloc = ulfs_gop_alloc,
        .gop_write = lfs_gop_write,
        .gop_markupdate = ulfs_gop_markupdate,
        .gop_putrange = genfs_gop_putrange,
};

struct shortlong {
        const char *sname;
        const char *lname;
};

static int
sysctl_lfs_dostats(SYSCTLFN_ARGS)
{
        extern struct lfs_stats lfs_stats;
        extern int lfs_dostats;
        int error;

        error = sysctl_lookup(SYSCTLFN_CALL(rnode));
        if (error || newp == NULL)
                return (error);

        if (lfs_dostats == 0)
                memset(&lfs_stats, 0, sizeof(lfs_stats));

        return (0);
}

SYSCTL_SETUP(lfs_sysctl_setup, "lfs sysctl")
{
        int i;
        extern int lfs_writeindir, lfs_dostats, lfs_clean_vnhead,
                   lfs_fs_pagetrip, lfs_ignore_lazy_sync;
#ifdef DEBUG
        extern int lfs_debug_log_subsys[DLOG_MAX];
        struct shortlong dlog_names[DLOG_MAX] = { /* Must match lfs.h ! */
                { "rollforward", "Debug roll-forward code" },
                { "alloc",        "Debug inode allocation and free list" },
                { "avail",        "Debug space-available-now accounting" },
                { "flush",        "Debug flush triggers" },
                { "lockedlist",        "Debug locked list accounting" },
                { "vnode_verbose", "Verbose per-vnode-written debugging" },
                { "vnode",        "Debug vnode use during segment write" },
                { "segment",        "Debug segment writing" },
                { "seguse",        "Debug segment used-bytes accounting" },
                { "cleaner",        "Debug cleaning routines" },
                { "mount",        "Debug mount/unmount routines" },
                { "pagecache",        "Debug UBC interactions" },
                { "dirop",        "Debug directory-operation accounting" },
                { "malloc",        "Debug private malloc accounting" },
        };
#endif /* DEBUG */
        struct shortlong stat_names[] = { /* Must match lfs.h! */
                { "segsused",            "Number of new segments allocated" },
                { "psegwrites",            "Number of partial-segment writes" },
                { "psyncwrites",    "Number of synchronous partial-segment"
                                    " writes" },
                { "pcleanwrites",   "Number of partial-segment writes by the"
                                    " cleaner" },
                { "blocktot",       "Number of blocks written" },
                { "cleanblocks",    "Number of blocks written by the cleaner" },
                { "ncheckpoints",   "Number of checkpoints made" },
                { "nwrites",        "Number of whole writes" },
                { "nsync_writes",   "Number of synchronous writes" },
                { "wait_exceeded",  "Number of times writer waited for"
                                    " cleaner" },
                { "write_exceeded", "Number of times writer invoked flush" },
                { "flush_invoked",  "Number of times flush was invoked" },
                { "vflush_invoked", "Number of time vflush was called" },
                { "clean_inlocked", "Number of vnodes skipped for being dead" },
                { "clean_vnlocked", "Number of vnodes skipped for vget failure" },
                { "segs_reclaimed", "Number of segments reclaimed" },
        };

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "lfs",
                       SYSCTL_DESCR("Log-structured file system"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, 5, CTL_EOL);
        /*
         * XXX the "5" above could be dynamic, thereby eliminating one
         * more instance of the "number to vfs" mapping problem, but
         * "5" is the order as taken from sys/mount.h
         */

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "flushindir", NULL,
                       NULL, 0, &lfs_writeindir, 0,
                       CTL_VFS, 5, LFS_WRITEINDIR, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "clean_vnhead", NULL,
                       NULL, 0, &lfs_clean_vnhead, 0,
                       CTL_VFS, 5, LFS_CLEAN_VNHEAD, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "dostats",
                       SYSCTL_DESCR("Maintain statistics on LFS operations"),
                       sysctl_lfs_dostats, 0, &lfs_dostats, 0,
                       CTL_VFS, 5, LFS_DOSTATS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "pagetrip",
                       SYSCTL_DESCR("How many dirty pages in fs triggers"
                                    " a flush"),
                       NULL, 0, &lfs_fs_pagetrip, 0,
                       CTL_VFS, 5, LFS_FS_PAGETRIP, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "ignore_lazy_sync",
                       SYSCTL_DESCR("Lazy Sync is ignored entirely"),
                       NULL, 0, &lfs_ignore_lazy_sync, 0,
                       CTL_VFS, 5, LFS_IGNORE_LAZY_SYNC, CTL_EOL);
#ifdef LFS_KERNEL_RFW
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "rfw",
                       SYSCTL_DESCR("Use in-kernel roll-forward on mount"),
                       NULL, 0, &lfs_do_rfw, 0,
                       CTL_VFS, 5, LFS_DO_RFW, CTL_EOL);
#endif

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "stats",
                       SYSCTL_DESCR("Debugging options"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, 5, LFS_STATS, CTL_EOL);
        for (i = 0; i < sizeof(struct lfs_stats) / sizeof(u_int); i++) {
                sysctl_createv(clog, 0, NULL, NULL,
                               CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                               CTLTYPE_INT, stat_names[i].sname,
                               SYSCTL_DESCR(stat_names[i].lname),
                               NULL, 0, &(((u_int *)&lfs_stats.segsused)[i]),
                               0, CTL_VFS, 5, LFS_STATS, i, CTL_EOL);
        }

#ifdef DEBUG
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "debug",
                       SYSCTL_DESCR("Debugging options"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, 5, LFS_DEBUGLOG, CTL_EOL);
        for (i = 0; i < DLOG_MAX; i++) {
                sysctl_createv(clog, 0, NULL, NULL,
                               CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                               CTLTYPE_INT, dlog_names[i].sname,
                               SYSCTL_DESCR(dlog_names[i].lname),
                               NULL, 0, &(lfs_debug_log_subsys[i]), 0,
                               CTL_VFS, 5, LFS_DEBUGLOG, i, CTL_EOL);
        }
#endif
}

/* old cleaner syscall interface.  see VOP_FCNTL() */
static const struct syscall_package lfs_syscalls[] = {
        { SYS_lfs_bmapv,        0, (sy_call_t *)sys_lfs_bmapv                },
        { SYS_lfs_markv,        0, (sy_call_t *)sys_lfs_markv                },
        { SYS___lfs_segwait50,        0, (sy_call_t *)sys___lfs_segwait50        },
        { SYS_lfs_segclean,        0, (sy_call_t *)sys_lfs_segclean        },
        { 0, 0, NULL },
};

static int
lfs_modcmd(modcmd_t cmd, void *arg)
{
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = syscall_establish(NULL, lfs_syscalls);
                if (error)
                        return error;
                error = vfs_attach(&lfs_vfsops);
                if (error != 0) {
                        syscall_disestablish(NULL, lfs_syscalls);
                        break;
                }
                cv_init(&lfs_allclean_wakeup, "segment");
                break;
        case MODULE_CMD_FINI:
                error = vfs_detach(&lfs_vfsops);
                if (error != 0)
                        break;
                syscall_disestablish(NULL, lfs_syscalls);
                cv_destroy(&lfs_allclean_wakeup);
                break;
        default:
                error = ENOTTY;
                break;
        }

        return (error);
}

/*
 * XXX Same structure as FFS inodes?  Should we share a common pool?
 */
struct pool lfs_inode_pool;
struct pool lfs_dinode_pool;
struct pool lfs_inoext_pool;
struct pool lfs_lbnentry_pool;

/*
 * The writer daemon.  UVM keeps track of how many dirty pages we are holding
 * in lfs_subsys_pages; the daemon flushes the filesystem when this value
 * crosses the (user-defined) threshold LFS_MAX_PAGES.
 */
static void
lfs_writerd(void *arg)
{
        mount_iterator_t *iter;
         struct mount *mp;
         struct lfs *fs;
        struct vfsops *vfs = NULL;
         int fsflags;
        int lfsc;
        int wrote_something = 0;
 
        mutex_enter(&lfs_lock);
        KASSERTMSG(lfs_writer_daemon == NULL, "more than one LFS writer daemon");
        lfs_writer_daemon = curlwp;
        mutex_exit(&lfs_lock);

        /* Take an extra reference to the LFS vfsops. */
        vfs = vfs_getopsbyname(MOUNT_LFS);
 
         mutex_enter(&lfs_lock);
         for (;;) {
                KASSERT(mutex_owned(&lfs_lock));
                if (wrote_something == 0)
                        cv_timedwait(&lfs_writerd_cv, &lfs_lock, hz/10 + 1);
                KASSERT(mutex_owned(&lfs_lock));
                wrote_something = 0;

                /*
                 * If global state wants a flush, flush everything.
                 */
                if (lfs_do_flush || locked_queue_count > LFS_MAX_BUFS ||
                        locked_queue_bytes > LFS_MAX_BYTES ||
                        lfs_subsys_pages > LFS_MAX_PAGES) {

                        if (lfs_do_flush) {
                                DLOG((DLOG_FLUSH, "lfs_writerd: lfs_do_flush\n"));
                        }
                        if (locked_queue_count > LFS_MAX_BUFS) {
                                DLOG((DLOG_FLUSH, "lfs_writerd: lqc = %d, max %d\n",
                                      locked_queue_count, LFS_MAX_BUFS));
                        }
                        if (locked_queue_bytes > LFS_MAX_BYTES) {
                                DLOG((DLOG_FLUSH, "lfs_writerd: lqb = %ld, max %ld\n",
                                      locked_queue_bytes, LFS_MAX_BYTES));
                        }
                        if (lfs_subsys_pages > LFS_MAX_PAGES) {
                                DLOG((DLOG_FLUSH, "lfs_writerd: lssp = %d, max %d\n",
                                      lfs_subsys_pages, LFS_MAX_PAGES));
                        }

                        lfs_flush(NULL, SEGM_WRITERD, 0);
                        lfs_do_flush = 0;
                        KASSERT(mutex_owned(&lfs_lock));
                        continue;
                }
                KASSERT(mutex_owned(&lfs_lock));
                mutex_exit(&lfs_lock);
 
                 /*
                  * Look through the list of LFSs to see if any of them
                  * have requested pageouts.
                  */
                 mountlist_iterator_init(&iter);
                lfsc = 0;
                while ((mp = mountlist_iterator_next(iter)) != NULL) {
                        KASSERT(!mutex_owned(&lfs_lock));
                         if (strncmp(mp->mnt_stat.f_fstypename, MOUNT_LFS,
                             sizeof(mp->mnt_stat.f_fstypename)) == 0) {
                                ++lfsc;
                                 fs = VFSTOULFS(mp)->um_lfs;
                                daddr_t ooffset = 0;
                                fsflags = SEGM_SINGLE;

                                 mutex_enter(&lfs_lock);
                                ooffset = lfs_sb_getoffset(fs);

                                if (lfs_sb_getnextseg(fs) < lfs_sb_getcurseg(fs) && fs->lfs_nowrap) {
                                        /* Don't try to write if we're suspended */
                                        mutex_exit(&lfs_lock);
                                        continue;
                                }
                                if (LFS_STARVED_FOR_SEGS(fs)) {
                                        mutex_exit(&lfs_lock);

                                        DLOG((DLOG_FLUSH, "lfs_writerd: need cleaning before writing possible\n"));
                                        lfs_wakeup_cleaner(fs);
                                        continue;
                                }

                                 if ((fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) ||
                                      lfs_dirvcount > LFS_MAX_DIROP) &&
                                    fs->lfs_dirops == 0) {
                                        fsflags &= ~SEGM_SINGLE;
                                         fsflags |= SEGM_CKP;
                                        DLOG((DLOG_FLUSH, "lfs_writerd: checkpoint\n"));
                                        lfs_flush_fs(fs, fsflags);
                                } else if (fs->lfs_pdflush) {
                                         DLOG((DLOG_FLUSH, "lfs_writerd: pdflush set\n"));
                                         lfs_flush_fs(fs, fsflags);
                                 } else if (!TAILQ_EMPTY(&fs->lfs_pchainhd)) {
                                         DLOG((DLOG_FLUSH, "lfs_writerd: pchain non-empty\n"));
                                         mutex_exit(&lfs_lock);
                                         lfs_writer_enter(fs, "wrdirop");
                                         lfs_flush_pchain(fs);
                                         lfs_writer_leave(fs);
                                        mutex_enter(&lfs_lock);
                                }
                                if (lfs_sb_getoffset(fs) != ooffset)
                                        ++wrote_something;
                                mutex_exit(&lfs_lock);
                         }
                        KASSERT(!mutex_owned(&lfs_lock));
                 }
                if (lfsc == 0) {
                        mutex_enter(&lfs_lock);
                        lfs_writer_daemon = NULL;
                        mutex_exit(&lfs_lock);
                        mountlist_iterator_destroy(iter);
                        break;
                }
                 mountlist_iterator_destroy(iter);
 
                 mutex_enter(&lfs_lock);
         }
        KASSERT(!mutex_owned(&lfs_lock));

        /* Give up our extra reference so the module can be unloaded. */
        mutex_enter(&vfs_list_lock);
        if (vfs != NULL)
                vfs->vfs_refcount--;
        mutex_exit(&vfs_list_lock);

        /* Done! */
        kthread_exit(0);
}

/*
 * Initialize the filesystem, most work done by ulfs_init.
 */
void
lfs_init(void)
{

        /*
         * XXX: should we use separate pools for 32-bit and 64-bit
         * dinodes?
         */
        malloc_type_attach(M_SEGMENT);
        pool_init(&lfs_inode_pool, sizeof(struct inode), 0, 0, 0,
            "lfsinopl", &pool_allocator_nointr, IPL_NONE);
        pool_init(&lfs_dinode_pool, sizeof(union lfs_dinode), 0, 0, 0,
            "lfsdinopl", &pool_allocator_nointr, IPL_NONE);
        pool_init(&lfs_inoext_pool, sizeof(struct lfs_inode_ext), 8, 0, 0,
            "lfsinoextpl", &pool_allocator_nointr, IPL_NONE);
        pool_init(&lfs_lbnentry_pool, sizeof(struct lbnentry), 0, 0, 0,
            "lfslbnpool", &pool_allocator_nointr, IPL_NONE);
        ulfs_init();

#ifdef DEBUG
        memset(lfs_log, 0, sizeof(lfs_log));
#endif
        mutex_init(&lfs_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&lfs_writerd_cv, "lfswrite");
        cv_init(&locked_queue_cv, "lfsbuf");
        cv_init(&lfs_writing_cv, "lfsflush");
}

void
lfs_reinit(void)
{
        ulfs_reinit();
}

void
lfs_done(void)
{
        ulfs_done();
        mutex_destroy(&lfs_lock);
        cv_destroy(&lfs_writerd_cv);
        cv_destroy(&locked_queue_cv);
        cv_destroy(&lfs_writing_cv);
        pool_destroy(&lfs_inode_pool);
        pool_destroy(&lfs_dinode_pool);
        pool_destroy(&lfs_inoext_pool);
        pool_destroy(&lfs_lbnentry_pool);
        malloc_type_detach(M_SEGMENT);
}

/*
 * Called by main() when ulfs is going to be mounted as root.
 */
int
lfs_mountroot(void)
{
        extern struct vnode *rootvp;
        struct lfs *fs = NULL;                                /* LFS */
        struct mount *mp;
        struct lwp *l = curlwp;
        struct ulfsmount *ump;
        int error;

        if (device_class(root_device) != DV_DISK)
                return (ENODEV);

        if (rootdev == NODEV)
                return (ENODEV);
        if ((error = vfs_rootmountalloc(MOUNT_LFS, "root_device", &mp))) {
                vrele(rootvp);
                return (error);
        }
        if ((error = lfs_mountfs(rootvp, mp, l))) {
                vfs_unbusy(mp);
                vfs_rele(mp);
                return (error);
        }
        mountlist_append(mp);
        ump = VFSTOULFS(mp);
        fs = ump->um_lfs;
        lfs_sb_setfsmnt(fs, mp->mnt_stat.f_mntonname);
        (void)lfs_statvfs(mp, &mp->mnt_stat);
        vfs_unbusy(mp);
        setrootfstime((time_t)lfs_sb_gettstamp(VFSTOULFS(mp)->um_lfs));
        return (0);
}

/*
 * VFS Operations.
 *
 * mount system call
 */
int
lfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct lwp *l = curlwp;
        struct vnode *devvp;
        struct ulfs_args *args = data;
        struct ulfsmount *ump = NULL;
        struct lfs *fs = NULL;                                /* LFS */
        int error = 0, update;
        mode_t accessmode;

        if (args == NULL)
                return EINVAL;
        if (*data_len < sizeof *args)
                return EINVAL;

        if (mp->mnt_flag & MNT_GETARGS) {
                ump = VFSTOULFS(mp);
                if (ump == NULL)
                        return EIO;
                args->fspec = NULL;
                *data_len = sizeof *args;
                return 0;
        }

        update = mp->mnt_flag & MNT_UPDATE;

        /* Check arguments */
        if (args->fspec != NULL) {
                /*
                 * Look up the name and verify that it's sane.
                 */
                error = namei_simple_user(args->fspec,
                                        NSM_FOLLOW_NOEMULROOT, &devvp);
                if (error != 0)
                        return (error);

                if (!update) {
                        /*
                         * Be sure this is a valid block device
                         */
                        if (devvp->v_type != VBLK)
                                error = ENOTBLK;
                        else if (bdevsw_lookup(devvp->v_rdev) == NULL)
                                error = ENXIO;
                } else {
                        /*
                         * Be sure we're still naming the same device
                         * used for our initial mount
                         *
                         * XXX dholland 20151010: if namei gives us a
                         * different vnode for the same device,
                         * wouldn't it be better to use it going
                         * forward rather than ignore it in favor of
                         * the old one?
                         */
                        ump = VFSTOULFS(mp);
                        fs = ump->um_lfs;
                        if (devvp != fs->lfs_devvp) {
                                if (devvp->v_rdev != fs->lfs_devvp->v_rdev)
                                        error = EINVAL;
                                else {
                                        vrele(devvp);
                                        devvp = fs->lfs_devvp;
                                        vref(devvp);
                                }
                        }
                }
        } else {
                if (!update) {
                        /* New mounts must have a filename for the device */
                        return (EINVAL);
                } else {
                        /* Use the extant mount */
                        ump = VFSTOULFS(mp);
                        fs = ump->um_lfs;
                        devvp = fs->lfs_devvp;
                        vref(devvp);
                }
        }


        /*
         * If mount by non-root, then verify that user has necessary
         * permissions on the device.
         */
        if (error == 0) {
                accessmode = VREAD;
                if (update ?
                    (mp->mnt_iflag & IMNT_WANTRDWR) != 0 :
                    (mp->mnt_flag & MNT_RDONLY) == 0)
                        accessmode |= VWRITE;
                vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
                error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
                    KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp,
                    KAUTH_ARG(accessmode));
                VOP_UNLOCK(devvp);
        }

        if (error) {
                vrele(devvp);
                return (error);
        }

        if (!update) {
                int flags;

                if (mp->mnt_flag & MNT_RDONLY)
                        flags = FREAD;
                else
                        flags = FREAD|FWRITE;
                vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
                error = VOP_OPEN(devvp, flags, FSCRED);
                VOP_UNLOCK(devvp);
                if (error)
                        goto fail;
                error = lfs_mountfs(devvp, mp, l);                /* LFS */
                if (error) {
                        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
                        (void)VOP_CLOSE(devvp, flags, NOCRED);
                        VOP_UNLOCK(devvp);
                        goto fail;
                }

                ump = VFSTOULFS(mp);
                fs = ump->um_lfs;
        } else {
                /*
                 * Update the mount.
                 */

                /*
                 * The initial mount got a reference on this
                 * device, so drop the one obtained via
                 * namei(), above.
                 */
                vrele(devvp);

                ump = VFSTOULFS(mp);
                fs = ump->um_lfs;

                if (!fs->lfs_ronly && (mp->mnt_iflag & IMNT_WANTRDONLY)) {
                        /*
                         * Changing from read/write to read-only.
                         */
                        int flags = WRITECLOSE;
                        if (mp->mnt_flag & MNT_FORCE)
                                flags |= FORCECLOSE;
                        error = lfs_flushfiles(mp, flags);
                        if (error)
                                return error;
                        fs->lfs_ronly = 1;
                } else if (fs->lfs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) {
                        /*
                         * Changing from read-only to read/write.
                         * Note in the superblocks that we're writing.
                         */

                        /* XXX: quotas should have been on even if readonly */
                        if (fs->lfs_use_quota2) {
#ifdef LFS_QUOTA2
                                error = lfs_quota2_mount(mp);
#else
                                uprintf("%s: no kernel support for this "
                                        "filesystem's quotas\n",
                                        mp->mnt_stat.f_mntonname);
                                if (mp->mnt_flag & MNT_FORCE) {
                                        uprintf("%s: mounting anyway; "
                                                "fsck afterwards\n",
                                                mp->mnt_stat.f_mntonname);
                                } else {
                                        error = EINVAL;
                                }
#endif
                                if (error) {
                                        return error;
                                }
                        }

                        fs->lfs_ronly = 0;
                        if (lfs_sb_getpflags(fs) & LFS_PF_CLEAN) {
                                lfs_sb_setpflags(fs, lfs_sb_getpflags(fs) & ~LFS_PF_CLEAN);
                                lfs_writesuper(fs, lfs_sb_getsboff(fs, 0));
                                lfs_writesuper(fs, lfs_sb_getsboff(fs, 1));
                        }
                }

                if (args->fspec == NULL)
                        return 0;
        }

        error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
            UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
        if (error == 0)
                lfs_sb_setfsmnt(fs, mp->mnt_stat.f_mntonname);
        return error;

fail:
        vrele(devvp);
        return (error);
}

/*
 * Helper for mountfs. Note that the fs pointer may be a dummy one
 * pointing into a superblock buffer. (Which is gross; see below.)
 */
static int
lfs_checkmagic(struct lfs *fs)
{
        switch (fs->lfs_dlfs_u.u_32.dlfs_magic) {
            case LFS_MAGIC:
                fs->lfs_is64 = false;
                fs->lfs_dobyteswap = false;
                break;
            case LFS64_MAGIC:
                fs->lfs_is64 = true;
                fs->lfs_dobyteswap = false;
                break;
#ifdef LFS_EI
            case LFS_MAGIC_SWAPPED:
                fs->lfs_is64 = false;
                fs->lfs_dobyteswap = true;
                break;
            case LFS64_MAGIC_SWAPPED:
                fs->lfs_is64 = true;
                fs->lfs_dobyteswap = true;
                break;
#endif
            default:
                /* XXX needs translation */
                return EINVAL;
        }
        return 0;
}

/*
 * Common code for mount and mountroot
 * LFS specific
 */
int
lfs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
{
        struct lfs *primarysb, *altsb, *thesb;
        struct buf *primarybuf, *altbuf;
        struct lfs *fs;
        struct ulfsmount *ump;
        struct vnode *vp;
        dev_t dev;
        int error, i, ronly, fsbsize;
        kauth_cred_t cred;
        CLEANERINFO *cip;
        SEGUSE *sup;
        daddr_t sb_addr;
        ino_t *orphan;
        size_t norphan;

        cred = l ? l->l_cred : NOCRED;

        /* The superblock is supposed to be 512 bytes. */
        __CTASSERT(sizeof(struct dlfs) == DEV_BSIZE);

        /*
         * Flush out any old buffers remaining from a previous use.
         */
        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
        error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0);
        VOP_UNLOCK(devvp);
        if (error)
                return (error);

        ronly = (mp->mnt_flag & MNT_RDONLY) != 0;

        /* Don't free random space on error. */
        primarybuf = NULL;
        altbuf = NULL;
        ump = NULL;

        sb_addr = LFS_LABELPAD / DEV_BSIZE;
        while (1) {
                /*
                 * Read in the superblock.
                 *
                 * Note that because LFS_SBPAD is substantially larger
                 * (8K) than the actual on-disk superblock (512 bytes)
                 * the buffer contains enough space to be used as a
                 * whole struct lfs (in-memory superblock) - we do this
                 * only so we can set and use the is64 and dobyteswap
                 * members. XXX this is gross and the logic here should
                 * be reworked.
                 */
                error = bread(devvp, sb_addr, LFS_SBPAD, 0, &primarybuf);
                if (error)
                        goto out;
                primarysb = (struct lfs *)primarybuf->b_data;

                /* Check the basics. */
                error = lfs_checkmagic(primarysb);
                if (error) {
                        DLOG((DLOG_MOUNT, "lfs_mountfs: primary superblock wrong magic\n"));
                        goto out;
                }
                if (lfs_sb_getbsize(primarysb) > MAXBSIZE ||
                    lfs_sb_getversion(primarysb) > LFS_VERSION ||
                    lfs_sb_getbsize(primarysb) < sizeof(struct dlfs)) {
                        DLOG((DLOG_MOUNT, "lfs_mountfs: primary superblock sanity failed\n"));
                        /* XXX needs translation */
                        error = EINVAL;
                        goto out;
                }
                if (lfs_sb_getinodefmt(primarysb) > LFS_MAXINODEFMT) {
                        DLOG((DLOG_MOUNT, "lfs_mountfs: unknown inode format %d\n",
                               lfs_sb_getinodefmt(primarysb)));
                        error = EINVAL;
                        goto out;
                }

                if (lfs_sb_getversion(primarysb) == 1)
                        fsbsize = DEV_BSIZE;
                else {
                        fsbsize = 1 << lfs_sb_getffshift(primarysb);
                        /*
                         * Could be, if the frag size is large enough, that we
                         * don't have the "real" primary superblock.  If that's
                         * the case, get the real one, and try again.
                         */
                        if (sb_addr != (lfs_sb_getsboff(primarysb, 0) << (lfs_sb_getffshift(primarysb) - DEV_BSHIFT))) {
                                DLOG((DLOG_MOUNT, "lfs_mountfs: sb daddr"
                                      " 0x%llx is not right, trying 0x%llx\n",
                                      (long long)sb_addr,
                                      (long long)(lfs_sb_getsboff(primarysb, 0) << (lfs_sb_getffshift(primarysb) - DEV_BSHIFT))));
                                sb_addr = lfs_sb_getsboff(primarysb, 0) << (lfs_sb_getffshift(primarysb) - DEV_BSHIFT);
                                brelse(primarybuf, BC_INVAL);
                                continue;
                        }
                }
                break;
        }

        /*
         * Check the second superblock to see which is newer; then mount
         * using the older of the two.        This is necessary to ensure that
         * the filesystem is valid if it was not unmounted cleanly.
         */

        if (lfs_sb_getsboff(primarysb, 1) &&
            lfs_sb_getsboff(primarysb, 1) - LFS_LABELPAD / fsbsize > LFS_SBPAD / fsbsize)
        {
                error = bread(devvp, lfs_sb_getsboff(primarysb, 1) * (fsbsize / DEV_BSIZE),
                        LFS_SBPAD, 0, &altbuf);
                if (error)
                        goto out;
                altsb = (struct lfs *)altbuf->b_data;

                /*
                 * Note: this used to do the sanity check only if the
                 * timestamp/serial comparison required use of altsb;
                 * this way is less tolerant, but if altsb is corrupted
                 * enough that the magic number, version, and blocksize
                 * are bogus, why would the timestamp or serial fields
                 * mean anything either? If this kind of thing happens,
                 * you need to fsck anyway.
                 */

                error = lfs_checkmagic(altsb);
                if (error)
                        goto out;

                /* Check the basics. */
                if (lfs_sb_getbsize(altsb) > MAXBSIZE ||
                    lfs_sb_getversion(altsb) > LFS_VERSION ||
                    lfs_sb_getbsize(altsb) < sizeof(struct dlfs)) {
                        DLOG((DLOG_MOUNT, "lfs_mountfs: alt superblock"
                              " sanity failed\n"));
                        error = EINVAL;                /* XXX needs translation */
                        goto out;
                }

                if (lfs_sb_getversion(primarysb) == 1) {
                        /* 1s resolution comparison */
                        if (lfs_sb_gettstamp(altsb) < lfs_sb_gettstamp(primarysb))
                                thesb = altsb;
                        else
                                thesb = primarysb;
                } else {
                        /* monotonic infinite-resolution comparison */
                        if (lfs_sb_getserial(altsb) < lfs_sb_getserial(primarysb))
                                thesb = altsb;
                        else
                                thesb = primarysb;
                }
        } else {
                DLOG((DLOG_MOUNT, "lfs_mountfs: invalid alt superblock location"
                      " daddr=0x%x\n", lfs_sb_getsboff(primarysb, 1)));
                error = EINVAL;
                goto out;
        }

        /*
         * Allocate the mount structure, copy the superblock into it.
         * Note that the 32-bit and 64-bit superblocks are the same size.
         */
        fs = kmem_zalloc(sizeof(struct lfs), KM_SLEEP);
        memcpy(&fs->lfs_dlfs_u.u_32, &thesb->lfs_dlfs_u.u_32,
               sizeof(struct dlfs));
        fs->lfs_is64 = thesb->lfs_is64;
        fs->lfs_dobyteswap = thesb->lfs_dobyteswap;
        fs->lfs_hasolddirfmt = false; /* set for real below */

        /* Compatibility */
        if (lfs_sb_getversion(fs) < 2) {
                lfs_sb_setsumsize(fs, LFS_V1_SUMMARY_SIZE);
                lfs_sb_setibsize(fs, lfs_sb_getbsize(fs));
                lfs_sb_sets0addr(fs, lfs_sb_getsboff(fs, 0));
                lfs_sb_settstamp(fs, lfs_sb_getotstamp(fs));
                lfs_sb_setfsbtodb(fs, 0);
        }
        if (lfs_sb_getresvseg(fs) == 0)
                lfs_sb_setresvseg(fs, MIN(lfs_sb_getminfreeseg(fs) - 1, \
                        MAX(MIN_RESV_SEGS, lfs_sb_getminfreeseg(fs) / 2 + 1)));

        /*
         * If we aren't going to be able to write meaningfully to this
         * filesystem, and were not mounted readonly, bomb out now.
         */
        if (lfs_fsbtob(fs, LFS_NRESERVE(fs)) > LFS_MAX_BYTES && !ronly) {
                DLOG((DLOG_MOUNT, "lfs_mount: to mount this filesystem read/write,"
                      " we need BUFPAGES >= %lld\n",
                      (long long)((bufmem_hiwater / bufmem_lowater) *
                                  LFS_INVERSE_MAX_BYTES(
                                          lfs_fsbtob(fs, LFS_NRESERVE(fs))) >> PAGE_SHIFT)));
                kmem_free(fs, sizeof(struct lfs));
                error = EFBIG; /* XXX needs translation */
                goto out;
        }

        /* Before rolling forward, lock so vget will sleep for other procs */
        if (l != NULL) {
                fs->lfs_flags = LFS_NOTYET;
                fs->lfs_rfpid = l->l_proc->p_pid;
        }

        ump = kmem_zalloc(sizeof(*ump), KM_SLEEP);
        ump->um_lfs = fs;
        ump->um_fstype = fs->lfs_is64 ? ULFS2 : ULFS1;
        /* ump->um_cleaner_thread = NULL; */
        brelse(primarybuf, BC_INVAL);
        brelse(altbuf, BC_INVAL);
        primarybuf = NULL;
        altbuf = NULL;


        /* Set up the I/O information */
        fs->lfs_devbsize = DEV_BSIZE;
        fs->lfs_iocount = 0;
        fs->lfs_diropwait = 0;
        fs->lfs_activesb = 0;
        lfs_sb_setuinodes(fs, 0);
        fs->lfs_ravail = 0;
        fs->lfs_favail = 0;
        fs->lfs_sbactive = 0;

        /* Set up the ifile and lock aflags */
        fs->lfs_doifile = 0;
        fs->lfs_writer = 0;
        fs->lfs_dirops = 0;
        fs->lfs_nadirop = 0;
        fs->lfs_seglock = 0;
        fs->lfs_pdflush = 0;
        fs->lfs_sleepers = 0;
        fs->lfs_pages = 0;
        rw_init(&fs->lfs_fraglock);
        rw_init(&fs->lfs_iflock);
        cv_init(&fs->lfs_sleeperscv, "lfs_slp");
        cv_init(&fs->lfs_diropscv, "lfs_dirop");
        cv_init(&fs->lfs_stopcv, "lfsstop");
        cv_init(&fs->lfs_nextsegsleep, "segment");

        /* Set the file system readonly/modify bits. */
        fs->lfs_ronly = ronly;
        if (ronly == 0)
                fs->lfs_fmod = 1;

        /* Device we're using */
        dev = devvp->v_rdev;
        fs->lfs_dev = dev;
        fs->lfs_devvp = devvp;

        /* ulfs-level information */
        fs->um_flags = 0;
        fs->um_bptrtodb = lfs_sb_getffshift(fs) - DEV_BSHIFT;
        fs->um_seqinc = lfs_sb_getfrag(fs);
        fs->um_nindir = lfs_sb_getnindir(fs);
        fs->um_lognindir = ffs(lfs_sb_getnindir(fs)) - 1;
        fs->um_maxsymlinklen = lfs_sb_getmaxsymlinklen(fs);
        fs->um_dirblksiz = LFS_DIRBLKSIZ;
        fs->um_maxfilesize = lfs_sb_getmaxfilesize(fs);

        /* quota stuff */
        /* XXX: these need to come from the on-disk superblock to be used */
        fs->lfs_use_quota2 = 0;
        fs->lfs_quota_magic = 0;
        fs->lfs_quota_flags = 0;
        fs->lfs_quotaino[0] = 0;
        fs->lfs_quotaino[1] = 0;

        /* Initialize the mount structure. */
        mp->mnt_data = ump;
        mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
        mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_LFS);
        mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
        mp->mnt_stat.f_namemax = LFS_MAXNAMLEN;
        mp->mnt_stat.f_iosize = lfs_sb_getbsize(fs);
        mp->mnt_flag |= MNT_LOCAL;
        mp->mnt_iflag |= IMNT_SHRLOOKUP;
        mp->mnt_fs_bshift = lfs_sb_getbshift(fs);
        mp->mnt_iflag |= IMNT_CAN_RWTORO;
        if (fs->um_maxsymlinklen > 0)
                mp->mnt_iflag |= IMNT_DTYPE;
        else
                fs->lfs_hasolddirfmt = true;

        ump->um_mountp = mp;
        for (i = 0; i < ULFS_MAXQUOTAS; i++)
                ump->um_quotas[i] = NULLVP;
        spec_node_setmountedfs(devvp, mp);

        /* Set up reserved memory for pageout */
        lfs_setup_resblks(fs);
        /* Set up vdirop tailq */
        TAILQ_INIT(&fs->lfs_dchainhd);
        /* and paging tailq */
        TAILQ_INIT(&fs->lfs_pchainhd);
        /* and delayed segment accounting for truncation list */
        LIST_INIT(&fs->lfs_segdhd);

        /*
         * We use the ifile vnode for almost every operation.  Instead of
         * retrieving it from the hash table each time we retrieve it here,
         * artificially increment the reference count and keep a pointer
         * to it in the incore copy of the superblock.
         */
        if ((error = VFS_VGET(mp, LFS_IFILE_INUM, LK_EXCLUSIVE, &vp)) != 0) {
                DLOG((DLOG_MOUNT, "lfs_mountfs: ifile vget failed, error=%d\n", error));
                goto out;
        }
        fs->lfs_ivnode = vp;
        vref(vp);

        /* Set up inode bitmap, order free list, and gather orphans.  */
        lfs_order_freelist(fs, &orphan, &norphan);

        /* Set up segment usage flags for the autocleaner. */
        fs->lfs_nactive = 0;
        fs->lfs_suflags = malloc(2 * sizeof(u_int32_t *),
                                 M_SEGMENT, M_WAITOK);
        fs->lfs_suflags[0] = malloc(lfs_sb_getnseg(fs) * sizeof(u_int32_t),
                                    M_SEGMENT, M_WAITOK);
        fs->lfs_suflags[1] = malloc(lfs_sb_getnseg(fs) * sizeof(u_int32_t),
                                    M_SEGMENT, M_WAITOK);
        memset(fs->lfs_suflags[1], 0, lfs_sb_getnseg(fs) * sizeof(u_int32_t));
        for (i = 0; i < lfs_sb_getnseg(fs); i++) {
                int changed;
                struct buf *bp;

                LFS_SEGENTRY(sup, fs, i, bp);
                changed = 0;
                if (!ronly) {
                        if (sup->su_nbytes == 0 &&
                            !(sup->su_flags & SEGUSE_EMPTY)) {
                                sup->su_flags |= SEGUSE_EMPTY;
                                ++changed;
                        } else if (!(sup->su_nbytes == 0) &&
                                   (sup->su_flags & SEGUSE_EMPTY)) {
                                sup->su_flags &= ~SEGUSE_EMPTY;
                                ++changed;
                        }
                        if (sup->su_flags & (SEGUSE_ACTIVE|SEGUSE_INVAL)) {
                                sup->su_flags &= ~(SEGUSE_ACTIVE|SEGUSE_INVAL);
                                ++changed;
                        }
                }
                fs->lfs_suflags[0][i] = sup->su_flags;
                if (changed)
                        LFS_WRITESEGENTRY(sup, fs, i, bp);
                else
                        brelse(bp, 0);
        }

        /* Free the orphans we discovered while ordering the freelist.  */
        lfs_free_orphans(fs, orphan, norphan);

        /*
         * XXX: if the fs has quotas, quotas should be on even if
         * readonly. Otherwise you can't query the quota info!
         * However, that's not how the quota2 code got written and I
         * don't know if it'll behave itself if enabled while
         * readonly, so for now use the same enable logic as ffs.
         *
         * XXX: also, if you use the -f behavior allowed here (and
         * equivalently above for remount) it will corrupt the fs. It
         * ought not to allow that. It should allow mounting readonly
         * if there are quotas and the kernel doesn't have the quota
         * code, but only readonly.
         *
         * XXX: and if you use the -f behavior allowed here it will
         * likely crash at unmount time (or remount time) because we
         * think quotas are active.
         *
         * Although none of this applies until there's a way to set
         * lfs_use_quota2 and have quotas in the fs at all.
         */
        if (!ronly && fs->lfs_use_quota2) {
#ifdef LFS_QUOTA2
                error = lfs_quota2_mount(mp);
#else
                uprintf("%s: no kernel support for this filesystem's quotas\n",
                        mp->mnt_stat.f_mntonname);
                if (mp->mnt_flag & MNT_FORCE) {
                        uprintf("%s: mounting anyway; fsck afterwards\n",
                                mp->mnt_stat.f_mntonname);
                } else {
                        error = EINVAL;
                }
#endif
                if (error) {
                        /* XXX XXX must clean up the stuff immediately above */
                        printf("lfs_mountfs: sorry, leaking some memory\n");
                        goto out;
                }
        }

#ifdef LFS_KERNEL_RFW
        lfs_roll_forward(fs, mp, l);
#endif

        /* If writing, sb is not clean; record in case of immediate crash */
        if (!fs->lfs_ronly) {
                lfs_sb_setpflags(fs, lfs_sb_getpflags(fs) & ~LFS_PF_CLEAN);
                lfs_writesuper(fs, lfs_sb_getsboff(fs, 0));
                lfs_writesuper(fs, lfs_sb_getsboff(fs, 1));
        }

        /* Allow vget now that roll-forward is complete */
        fs->lfs_flags &= ~(LFS_NOTYET);
        wakeup(&fs->lfs_flags);

        /*
         * Initialize the ifile cleaner info with information from
         * the superblock.
         */
        {
                struct buf *bp;

                LFS_CLEANERINFO(cip, fs, bp);
                lfs_ci_setclean(fs, cip, lfs_sb_getnclean(fs));
                lfs_ci_setdirty(fs, cip, lfs_sb_getnseg(fs) - lfs_sb_getnclean(fs));
                lfs_ci_setavail(fs, cip, lfs_sb_getavail(fs));
                lfs_ci_setbfree(fs, cip, lfs_sb_getbfree(fs));
                (void) LFS_BWRITE_LOG(bp); /* Ifile */
        }

        /*
         * Mark the current segment as ACTIVE, since we're going to
         * be writing to it.
         */
        {
                struct buf *bp;

                LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, lfs_sb_getoffset(fs)), bp);
                sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
                fs->lfs_nactive++;
                LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, lfs_sb_getoffset(fs)), bp);  /* Ifile */
        }

        /* Now that roll-forward is done, unlock the Ifile */
        vput(vp);

        /* Start the pagedaemon-anticipating daemon */
        mutex_enter(&lfs_lock);
        if (lfs_writer_daemon == NULL &&
            kthread_create(PRI_BIO, 0, NULL,
            lfs_writerd, NULL, NULL, "lfs_writer") != 0)
                panic("fork lfs_writer");
        mutex_exit(&lfs_lock);

        printf("WARNING: the log-structured file system is experimental\n"
            "WARNING: it may cause system crashes and/or corrupt data\n");

        return (0);

out:
        if (primarybuf)
                brelse(primarybuf, BC_INVAL);
        if (altbuf)
                brelse(altbuf, BC_INVAL);
        if (ump) {
                kmem_free(ump->um_lfs, sizeof(struct lfs));
                kmem_free(ump, sizeof(*ump));
                mp->mnt_data = NULL;
        }

        return (error);
}

/*
 * unmount system call
 */
int
lfs_unmount(struct mount *mp, int mntflags)
{
        struct ulfsmount *ump;
        struct lfs *fs;
        int error, ronly;

        ump = VFSTOULFS(mp);
        fs = ump->um_lfs;

        error = lfs_flushfiles(mp, mntflags & MNT_FORCE ? FORCECLOSE : 0);
        if (error)
                return error;

        /* Finish with the Ifile, now that we're done with it */
        vgone(fs->lfs_ivnode);

        ronly = !fs->lfs_ronly;
        if (fs->lfs_devvp->v_type != VBAD)
                spec_node_setmountedfs(fs->lfs_devvp, NULL);
        vn_lock(fs->lfs_devvp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_CLOSE(fs->lfs_devvp,
            ronly ? FREAD : FREAD|FWRITE, NOCRED);
        vput(fs->lfs_devvp);

        /* Complain about page leakage */
        if (fs->lfs_pages > 0)
                printf("lfs_unmount: still claim %d pages (%d in subsystem)\n",
                        fs->lfs_pages, lfs_subsys_pages);

        /* Free per-mount data structures */
        free(fs->lfs_ino_bitmap, M_SEGMENT);
        free(fs->lfs_suflags[0], M_SEGMENT);
        free(fs->lfs_suflags[1], M_SEGMENT);
        free(fs->lfs_suflags, M_SEGMENT);
        lfs_free_resblks(fs);
        cv_destroy(&fs->lfs_sleeperscv);
        cv_destroy(&fs->lfs_diropscv);
        cv_destroy(&fs->lfs_stopcv);
        cv_destroy(&fs->lfs_nextsegsleep);

        rw_destroy(&fs->lfs_fraglock);
        rw_destroy(&fs->lfs_iflock);

        kmem_free(fs, sizeof(struct lfs));
        kmem_free(ump, sizeof(*ump));

        mp->mnt_data = NULL;
        mp->mnt_flag &= ~MNT_LOCAL;
        return (error);
}

static int
lfs_flushfiles(struct mount *mp, int flags)
{
        struct lwp *l = curlwp;
        struct ulfsmount *ump;
        struct lfs *fs;
        struct vnode *vp;
        int error;

        ump = VFSTOULFS(mp);
        fs = ump->um_lfs;

        /* Two checkpoints */
        if (!fs->lfs_ronly) {
                lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC);
                lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC);
        }

        /* wake up the cleaner so it can die */
        /* XXX: shouldn't this be *after* the error cases below? */
        lfs_wakeup_cleaner(fs);
        mutex_enter(&lfs_lock);
        while (fs->lfs_sleepers)
                cv_wait(&fs->lfs_sleeperscv, &lfs_lock);
        mutex_exit(&lfs_lock);

#ifdef LFS_EXTATTR
        if (ump->um_fstype == ULFS1) {
                if (ump->um_extattr.uepm_flags & ULFS_EXTATTR_UEPM_STARTED) {
                        ulfs_extattr_stop(mp, curlwp);
                }
                if (ump->um_extattr.uepm_flags & ULFS_EXTATTR_UEPM_INITIALIZED) {
                        ulfs_extattr_uepm_destroy(&ump->um_extattr);
                        mp->mnt_flag &= ~MNT_EXTATTR;
                }
        }
#endif
#ifdef LFS_QUOTA
        if ((error = lfsquota1_umount(mp, flags)) != 0)
                return (error);
#endif
#ifdef LFS_QUOTA2
        if ((error = lfsquota2_umount(mp, flags)) != 0)
                return (error);
#endif
        if ((error = vflush(mp, fs->lfs_ivnode, flags)) != 0)
                return (error);
        if ((error = VFS_SYNC(mp, 1, l->l_cred)) != 0)
                return (error);
        vp = fs->lfs_ivnode;
        mutex_enter(vp->v_interlock);
        if (LIST_FIRST(&vp->v_dirtyblkhd))
                panic("lfs_unmount: still dirty blocks on ifile vnode");
        mutex_exit(vp->v_interlock);

        /* Explicitly write the superblock, to update serial and pflags */
        if (!fs->lfs_ronly) {
                lfs_sb_setpflags(fs, lfs_sb_getpflags(fs) | LFS_PF_CLEAN);
                lfs_writesuper(fs, lfs_sb_getsboff(fs, 0));
                lfs_writesuper(fs, lfs_sb_getsboff(fs, 1));
        }
        mutex_enter(&lfs_lock);
        while (fs->lfs_iocount)
                mtsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs_umount", 0,
                        &lfs_lock);
        mutex_exit(&lfs_lock);

        return 0;
}

/*
 * Get file system statistics.
 *
 * NB: We don't lock to access the superblock here, because it's not
 * really that important if we get it wrong.
 */
int
lfs_statvfs(struct mount *mp, struct statvfs *sbp)
{
        struct lfs *fs;
        struct ulfsmount *ump;

        ump = VFSTOULFS(mp);
        fs = ump->um_lfs;

        sbp->f_bsize = lfs_sb_getbsize(fs);
        sbp->f_frsize = lfs_sb_getfsize(fs);
        sbp->f_iosize = lfs_sb_getbsize(fs);
        sbp->f_blocks = LFS_EST_NONMETA(fs) - VTOI(fs->lfs_ivnode)->i_lfs_effnblks;

        sbp->f_bfree = LFS_EST_BFREE(fs);
        /*
         * XXX this should be lfs_sb_getsize (measured in frags)
         * rather than dsize (measured in diskblocks). However,
         * getsize needs a format version check (for version 1 it
         * needs to be blockstofrags'd) so for the moment I'm going to
         * leave this...  it won't fire wrongly as frags are at least
         * as big as diskblocks.
         */
        KASSERT(sbp->f_bfree <= lfs_sb_getdsize(fs));
#if 0
        if (sbp->f_bfree < 0)
                sbp->f_bfree = 0;
#endif

        sbp->f_bresvd = LFS_EST_RSVD(fs);
        if (sbp->f_bfree > sbp->f_bresvd)
                sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd;
        else
                sbp->f_bavail = 0;

        /* XXX: huh? - dholland 20150728 */
        sbp->f_files = lfs_sb_getbfree(fs) / lfs_btofsb(fs, lfs_sb_getibsize(fs))
            * LFS_INOPB(fs);
        sbp->f_ffree = sbp->f_files - lfs_sb_getnfiles(fs);
        sbp->f_favail = sbp->f_ffree;
        sbp->f_fresvd = 0;
        copy_statvfs_info(sbp, mp);
        return (0);
}

/*
 * Go through the disk queues to initiate sandbagged IO;
 * go through the inodes to write those that have been modified;
 * initiate the writing of the super block if it has been modified.
 *
 * Note: we are always called with the filesystem marked `MPBUSY'.
 */
int
lfs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
{
        int error;
        struct lfs *fs;

        fs = VFSTOULFS(mp)->um_lfs;
        if (fs->lfs_ronly)
                return 0;

        /* Snapshots should not hose the syncer */
        /*
         * XXX Sync can block here anyway, since we don't have a very
         * XXX good idea of how much data is pending.  If it's more
         * XXX than a segment and lfs_nextseg is close to the end of
         * XXX the log, we'll likely block.
         */
        mutex_enter(&lfs_lock);
        if (fs->lfs_nowrap && lfs_sb_getnextseg(fs) < lfs_sb_getcurseg(fs)) {
                mutex_exit(&lfs_lock);
                return 0;
        }
        mutex_exit(&lfs_lock);

        lfs_writer_enter(fs, "lfs_dirops");

        /* All syncs must be checkpoints until roll-forward is implemented. */
        DLOG((DLOG_FLUSH, "lfs_sync at 0x%jx\n",
              (uintmax_t)lfs_sb_getoffset(fs)));
        error = lfs_segwrite(mp, SEGM_CKP | (waitfor ? SEGM_SYNC : 0));
        lfs_writer_leave(fs);
#ifdef LFS_QUOTA
        lfs_qsync(mp);
#endif
        return (error);
}

/*
 * Look up an LFS dinode number to find its incore vnode.  If not already
 * in core, read it in from the specified device.  Return the inode locked.
 * Detection and handling of mount points must be done by the calling routine.
 */
int
lfs_vget(struct mount *mp, ino_t ino, int lktype, struct vnode **vpp)
{
        int error;

        error = vcache_get(mp, &ino, sizeof(ino), vpp);
        if (error)
                return error;
        error = vn_lock(*vpp, lktype);
        if (error) {
                vrele(*vpp);
                *vpp = NULL;
                return error;
        }

        return 0;
}

/*
 * Create a new vnode/inode pair and initialize what fields we can.
 */
static void
lfs_init_vnode(struct ulfsmount *ump, ino_t ino, struct vnode *vp)
{
        struct lfs *fs = ump->um_lfs;
        struct inode *ip;
        union lfs_dinode *dp;

        ASSERT_NO_SEGLOCK(fs);

        /* Initialize the inode. */
        ip = pool_get(&lfs_inode_pool, PR_WAITOK);
        memset(ip, 0, sizeof(*ip));
        dp = pool_get(&lfs_dinode_pool, PR_WAITOK);
        memset(dp, 0, sizeof(*dp));
        ip->inode_ext.lfs = pool_get(&lfs_inoext_pool, PR_WAITOK);
        memset(ip->inode_ext.lfs, 0, sizeof(*ip->inode_ext.lfs));
        ip->i_din = dp;
        ip->i_ump = ump;
        ip->i_vnode = vp;
        ip->i_dev = fs->lfs_dev;
        lfs_dino_setinumber(fs, dp, ino);
        ip->i_number = ino;
        ip->i_lfs = fs;
        ip->i_lfs_effnblks = 0;
        SPLAY_INIT(&ip->i_lfs_lbtree);
        ip->i_lfs_nbtree = 0;
        LIST_INIT(&ip->i_lfs_segdhd);

        vp->v_tag = VT_LFS;
        vp->v_op = lfs_vnodeop_p;
        vp->v_data = ip;
}

/*
 * Undo lfs_init_vnode().
 */
static void
lfs_deinit_vnode(struct ulfsmount *ump, struct vnode *vp)
{
        struct inode *ip = VTOI(vp);

        pool_put(&lfs_inoext_pool, ip->inode_ext.lfs);
        pool_put(&lfs_dinode_pool, ip->i_din);
        pool_put(&lfs_inode_pool, ip);
        vp->v_data = NULL;
}

/*
 * Read an inode from disk and initialize this vnode / inode pair.
 * Caller assures no other thread will try to load this inode.
 */
int
lfs_loadvnode(struct mount *mp, struct vnode *vp,
    const void *key, size_t key_len, const void **new_key)
{
        struct lfs *fs;
        union lfs_dinode *dip;
        struct inode *ip;
        struct buf *bp;
        IFILE *ifp;
        struct ulfsmount *ump;
        ino_t ino;
        daddr_t daddr;
        int error, retries;
        struct timespec ts;

        KASSERT(key_len == sizeof(ino));
        memcpy(&ino, key, key_len);

        memset(&ts, 0, sizeof ts);        /* XXX gcc */

        ump = VFSTOULFS(mp);
        fs = ump->um_lfs;

        /*
         * If the filesystem is not completely mounted yet, suspend
         * any access requests (wait for roll-forward to complete).
         */
        mutex_enter(&lfs_lock);
        while ((fs->lfs_flags & LFS_NOTYET) && curproc->p_pid != fs->lfs_rfpid)
                mtsleep(&fs->lfs_flags, PRIBIO+1, "lfs_notyet", 0,
                        &lfs_lock);
        mutex_exit(&lfs_lock);

        /* Translate the inode number to a disk address. */
        if (ino == LFS_IFILE_INUM)
                daddr = lfs_sb_getidaddr(fs);
        else {
                /* XXX bounds-check this too */
                LFS_IENTRY(ifp, fs, ino, bp);
                daddr = lfs_if_getdaddr(fs, ifp);
                if (lfs_sb_getversion(fs) > 1) {
                        ts.tv_sec = lfs_if_getatime_sec(fs, ifp);
                        ts.tv_nsec = lfs_if_getatime_nsec(fs, ifp);
                }

                brelse(bp, 0);
                if (daddr == LFS_UNUSED_DADDR)
                        return (ENOENT);
        }

        /* Allocate/init new vnode/inode. */
        lfs_init_vnode(ump, ino, vp);
        ip = VTOI(vp);

        /* If the cleaner supplied the inode, use it. */
        if (curlwp == fs->lfs_cleaner_thread && fs->lfs_cleaner_hint != NULL &&
            fs->lfs_cleaner_hint->bi_lbn == LFS_UNUSED_LBN) {
                dip = fs->lfs_cleaner_hint->bi_bp;
                if (fs->lfs_is64) {
                        error = copyin(dip, &ip->i_din->u_64,
                                       sizeof(struct lfs64_dinode));
                } else {
                        error = copyin(dip, &ip->i_din->u_32,
                                       sizeof(struct lfs32_dinode));
                }
                if (error) {
                        lfs_deinit_vnode(ump, vp);
                        return error;
                }
                KASSERT(ip->i_number == ino);
                goto out;
        }

        /* Read in the disk contents for the inode, copy into the inode. */
        retries = 0;
again:
        error = bread(fs->lfs_devvp, LFS_FSBTODB(fs, daddr),
                (lfs_sb_getversion(fs) == 1 ? lfs_sb_getbsize(fs) : lfs_sb_getibsize(fs)),
                0, &bp);
        if (error) {
                lfs_deinit_vnode(ump, vp);
                return error;
        }

        dip = lfs_ifind(fs, ino, bp);
        if (dip == NULL) {
                /* Assume write has not completed yet; try again */
                brelse(bp, BC_INVAL);
                ++retries;
                if (retries <= LFS_IFIND_RETRIES) {
                        mutex_enter(&lfs_lock);
                        if (fs->lfs_iocount) {
                                DLOG((DLOG_VNODE,
                                    "%s: dinode %d not found, retrying...\n",
                                    __func__, ino));
                                (void)mtsleep(&fs->lfs_iocount, PRIBIO + 1,
                                              "lfs ifind", 1, &lfs_lock);
                        } else
                                retries = LFS_IFIND_RETRIES;
                        mutex_exit(&lfs_lock);
                        goto again;
                }
#ifdef DEBUG
                /* If the seglock is held look at the bpp to see
                   what is there anyway */
                mutex_enter(&lfs_lock);
                if (fs->lfs_seglock > 0) {
                        struct buf **bpp;
                        union lfs_dinode *dp;
                        int i;

                        for (bpp = fs->lfs_sp->bpp;
                             bpp != fs->lfs_sp->cbpp; ++bpp) {
                                if ((*bpp)->b_vp == fs->lfs_ivnode &&
                                    bpp != fs->lfs_sp->bpp) {
                                        /* Inode block */
                                        printf("%s: block 0x%" PRIx64 ": ",
                                               __func__, (*bpp)->b_blkno);
                                        for (i = 0; i < LFS_INOPB(fs); i++) {
                                                dp = DINO_IN_BLOCK(fs,
                                                    (*bpp)->b_data, i);
                                                if (lfs_dino_getinumber(fs, dp))
                                                        printf("%ju ",
                                                            (uintmax_t)lfs_dino_getinumber(fs, dp));
                                        }
                                        printf("\n");
                                }
                        }
                }
                mutex_exit(&lfs_lock);
#endif /* DEBUG */
                panic("lfs_loadvnode: dinode not found");
        }
        lfs_copy_dinode(fs, ip->i_din, dip);
        brelse(bp, 0);

out:        
        if (lfs_sb_getversion(fs) > 1) {
                lfs_dino_setatime(fs, ip->i_din, ts.tv_sec);
                lfs_dino_setatimensec(fs, ip->i_din, ts.tv_nsec);
        }

        lfs_vinit(mp, &vp);

        *new_key = &ip->i_number;
        return 0;
}

/*
 * Create a new inode and initialize this vnode / inode pair.
 */
int
lfs_newvnode(struct mount *mp, struct vnode *dvp, struct vnode *vp,
    struct vattr *vap, kauth_cred_t cred, void *extra,
    size_t *key_len, const void **new_key)
{
        ino_t ino;
        struct inode *ip;
        struct ulfsmount *ump;
        struct lfs *fs;
        int error, mode, gen;

        KASSERT(dvp != NULL || vap->va_fileid > 0);
        KASSERT(dvp != NULL && dvp->v_mount == mp);
        KASSERT(vap->va_type != VNON);

        *key_len = sizeof(ino);
        ump = VFSTOULFS(mp);
        fs = ump->um_lfs;
        mode = MAKEIMODE(vap->va_type, vap->va_mode);

        /*
         * Allocate fresh inode.  With "dvp == NULL" take the inode number
         * and version from "vap".
        */
        if (dvp == NULL) {
                ino = vap->va_fileid;
                gen = vap->va_gen;
                error = lfs_valloc_fixed(fs, ino, gen);
        } else {
                error = lfs_valloc(dvp, mode, cred, &ino, &gen);
        }
        if (error)
                return error;

        /* Attach inode to vnode. */
        lfs_init_vnode(ump, ino, vp);
        ip = VTOI(vp);

        mutex_enter(&lfs_lock);
        LFS_SET_UINO(ip, IN_CHANGE);
        mutex_exit(&lfs_lock);

        /* Note no blocks yet */
        ip->i_lfs_hiblk = -1;

        /* Set a new generation number for this inode. */
        ip->i_gen = gen;
        lfs_dino_setgen(fs, ip->i_din, gen);

        memset(ip->i_lfs_fragsize, 0,
            ULFS_NDADDR * sizeof(*ip->i_lfs_fragsize));

        /* Set uid / gid. */
        if (cred == NOCRED || cred == FSCRED) {
                ip->i_gid = 0;
                ip->i_uid = 0;
        } else {
                ip->i_gid = VTOI(dvp)->i_gid;
                ip->i_uid = kauth_cred_geteuid(cred);
        }
        DIP_ASSIGN(ip, gid, ip->i_gid);
        DIP_ASSIGN(ip, uid, ip->i_uid);

#if defined(LFS_QUOTA) || defined(LFS_QUOTA2)
        error = lfs_chkiq(ip, 1, cred, 0);
        if (error) {
                lfs_vfree(dvp, ino, mode);
                lfs_deinit_vnode(ump, vp);

                return error;
        }
#endif

        /* Set type and finalize. */
        ip->i_flags = 0;
        DIP_ASSIGN(ip, flags, 0);
        ip->i_mode = mode;
        DIP_ASSIGN(ip, mode, mode);
        if (vap->va_rdev != VNOVAL) {
                /*
                 * Want to be able to use this to make badblock
                 * inodes, so don't truncate the dev number.
                 */
                // XXX clean this up
                if (ump->um_fstype == ULFS1)
                        ip->i_din->u_32.di_rdev = ulfs_rw32(vap->va_rdev,
                            ULFS_MPNEEDSWAP(fs));
                else
                        ip->i_din->u_64.di_rdev = ulfs_rw64(vap->va_rdev,
                            ULFS_MPNEEDSWAP(fs));
        }
        lfs_vinit(mp, &vp);

        *new_key = &ip->i_number;
        return 0;
}

/*
 * File handle to vnode
 */
int
lfs_fhtovp(struct mount *mp, struct fid *fhp, int lktype, struct vnode **vpp)
{
        struct lfid lfh;
        struct lfs *fs;

        if (fhp->fid_len != sizeof(struct lfid))
                return EINVAL;

        memcpy(&lfh, fhp, sizeof(lfh));
        if (lfh.lfid_ino < LFS_IFILE_INUM)
                return ESTALE;

        fs = VFSTOULFS(mp)->um_lfs;
        if (lfh.lfid_ident != lfs_sb_getident(fs))
                return ESTALE;

        if (lfh.lfid_ino >
            ((lfs_dino_getsize(fs, VTOI(fs->lfs_ivnode)->i_din) >> lfs_sb_getbshift(fs)) -
             lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs))
                return ESTALE;

        return (ulfs_fhtovp(mp, &lfh.lfid_ufid, lktype, vpp));
}

/*
 * Vnode pointer to File handle
 */
/* ARGSUSED */
int
lfs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
{
        struct inode *ip;
        struct lfid lfh;

        if (*fh_size < sizeof(struct lfid)) {
                *fh_size = sizeof(struct lfid);
                return E2BIG;
        }
        *fh_size = sizeof(struct lfid);
        ip = VTOI(vp);
        memset(&lfh, 0, sizeof(lfh));
        lfh.lfid_len = sizeof(struct lfid);
        lfh.lfid_ino = ip->i_number;
        lfh.lfid_gen = ip->i_gen;
        lfh.lfid_ident = lfs_sb_getident(ip->i_lfs);
        memcpy(fhp, &lfh, sizeof(lfh));
        return (0);
}

/*
 * ulfs_bmaparray callback function for writing.
 *
 * Since blocks will be written to the new segment anyway,
 * we don't care about current daddr of them.
 */
static bool
lfs_issequential_hole(const struct lfs *fs,
    daddr_t daddr0, daddr_t daddr1)
{
        (void)fs; /* not used */

        KASSERT(daddr0 == UNWRITTEN ||
            (0 <= daddr0 && daddr0 <= LFS_MAX_DADDR(fs)));
        KASSERT(daddr1 == UNWRITTEN ||
            (0 <= daddr1 && daddr1 <= LFS_MAX_DADDR(fs)));

        /* NOTE: all we want to know here is 'hole or not'. */
        /* NOTE: UNASSIGNED is converted to 0 by ulfs_bmaparray. */

        /*
         * treat UNWRITTENs and all resident blocks as 'contiguous'
         */
        if (daddr0 != 0 && daddr1 != 0)
                return true;

        /*
         * both are in hole?
         */
        if (daddr0 == 0 && daddr1 == 0)
                return true; /* all holes are 'contiguous' for us. */

        return false;
}

/*
 * lfs_gop_write functions exactly like genfs_gop_write, except that
 * (1) it requires the seglock to be held by its caller, and sp->fip
 *     to be properly initialized (it will return without re-initializing
 *     sp->fip, and without calling lfs_writeseg).
 * (2) it uses the remaining space in the segment, rather than VOP_BMAP,
 *     to determine how large a block it can write at once (though it does
 *     still use VOP_BMAP to find holes in the file);
 * (3) it calls lfs_gatherblock instead of VOP_STRATEGY on its blocks
 *     (leaving lfs_writeseg to deal with the cluster blocks, so we might
 *     now have clusters of clusters, ick.)
 */
static int
lfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
    int flags)
{
        int i, error, run, haveeof = 0;
        int fs_bshift;
        vaddr_t kva;
        off_t eof, offset, startoffset = 0;
        size_t bytes, iobytes, skipbytes;
        bool async = (flags & PGO_SYNCIO) == 0;
        daddr_t lbn, blkno;
        struct vm_page *pg;
        struct buf *mbp, *bp;
        struct vnode *devvp = VTOI(vp)->i_devvp;
        struct inode *ip = VTOI(vp);
        struct lfs *fs = ip->i_lfs;
        struct segment *sp = fs->lfs_sp;
        SEGSUM *ssp;
        UVMHIST_FUNC("lfs_gop_write"); UVMHIST_CALLED(ubchist);
        const char * failreason = NULL;

        ASSERT_SEGLOCK(fs);

        /* The Ifile lives in the buffer cache */
        KASSERT(vp != fs->lfs_ivnode);

        /*
         * We don't want to fill the disk before the cleaner has a chance
         * to make room for us.  If we're in danger of doing that, fail
         * with EAGAIN.  The caller will have to notice this, unlock
         * so the cleaner can run, relock and try again.
         *
         * We must write everything, however, if our vnode is being
         * reclaimed.
         */
        mutex_enter(vp->v_interlock);
        if (LFS_STARVED_FOR_SEGS(fs) && vdead_check(vp, VDEAD_NOWAIT) == 0) {
                mutex_exit(vp->v_interlock);
                failreason = "Starved for segs and not flushing vp";
                 goto tryagain;
        }
        mutex_exit(vp->v_interlock);

        /*
         * Sometimes things slip past the filters in lfs_putpages,
         * and the pagedaemon tries to write pages---problem is
         * that the pagedaemon never acquires the segment lock.
         *
         * Alternatively, pages that were clean when we called
         * genfs_putpages may have become dirty in the meantime.  In this
         * case the segment header is not properly set up for blocks
         * to be added to it.
         *
         * Unbusy and unclean the pages, and put them on the ACTIVE
         * queue under the hypothesis that they couldn't have got here
         * unless they were modified *quite* recently.
         *
         * XXXUBC that last statement is an oversimplification of course.
         */
        if (!LFS_SEGLOCK_HELD(fs)) {
                failreason = "Seglock not held";
                goto tryagain;
        }
        if (ip->i_lfs_iflags & LFSI_NO_GOP_WRITE) {
                failreason = "Inode with no_gop_write";
                goto tryagain;
        }
        if ((pgs[0]->offset & lfs_sb_getbmask(fs)) != 0) {
                failreason = "Bad page offset";
                goto tryagain;
        }

        UVMHIST_LOG(ubchist, "vp %#jx pgs %#jx npages %jd flags 0x%jx",
            (uintptr_t)vp, (uintptr_t)pgs, npages, flags);

        GOP_SIZE(vp, vp->v_size, &eof, 0);
        haveeof = 1;

        if (vp->v_type == VREG)
                fs_bshift = vp->v_mount->mnt_fs_bshift;
        else
                fs_bshift = DEV_BSHIFT;
        error = 0;
        pg = pgs[0];
        startoffset = pg->offset;
        KASSERT(eof >= 0);

        if (startoffset >= eof) {
                failreason = "Offset beyond EOF";
                goto tryagain;
        } else
                bytes = MIN(npages << PAGE_SHIFT, eof - startoffset);
        skipbytes = 0;

        KASSERT(bytes != 0);

        /* Swap PG_DELWRI for PG_PAGEOUT */
        for (i = 0; i < npages; i++) {
                if (pgs[i]->flags & PG_DELWRI) {
                        KASSERT(!(pgs[i]->flags & PG_PAGEOUT));
                        pgs[i]->flags &= ~PG_DELWRI;
                        pgs[i]->flags |= PG_PAGEOUT;
                        uvm_pageout_start(1);
                        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
                        uvm_pagelock(pgs[i]);
                        uvm_pageunwire(pgs[i]);
                        uvm_pageunlock(pgs[i]);
                        rw_exit(vp->v_uobj.vmobjlock);
                }
        }

        /*
         * Check to make sure we're starting on a block boundary.
         * We'll check later to make sure we always write entire
         * blocks (or fragments).
         */
        if (startoffset & lfs_sb_getbmask(fs))
                printf("%" PRId64 " & %" PRIu64 " = %" PRId64 "\n",
                       startoffset, lfs_sb_getbmask(fs),
                       startoffset & lfs_sb_getbmask(fs));
        KASSERT((startoffset & lfs_sb_getbmask(fs)) == 0);
        if (bytes & lfs_sb_getffmask(fs)) {
                printf("lfs_gop_write: asked to write %ld bytes\n", (long)bytes);
                panic("lfs_gop_write: non-integer blocks");
        }

        /*
         * We could deadlock here on pager_map with UVMPAGER_MAPIN_WAITOK.
         * If we would, write what we have and try again.  If we don't
         * have anything to write, we'll have to sleep.
         */
        ssp = (SEGSUM *)sp->segsum;
        if ((kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WRITE |
                                      (lfs_ss_getnfinfo(fs, ssp) < 1 ?
                                       UVMPAGER_MAPIN_WAITOK : 0))) == 0x0) {
                DLOG((DLOG_PAGE, "lfs_gop_write: forcing write\n"));
#if 0
                      " with nfinfo=%d at offset 0x%jx\n",
                      (int)lfs_ss_getnfinfo(fs, ssp),
                      (uintmax_t)lfs_sb_getoffset(fs)));
#endif
                lfs_updatemeta(sp);
                lfs_release_finfo(fs);
                (void) lfs_writeseg(fs, sp);

                lfs_acquire_finfo(fs, ip->i_number, ip->i_gen);

                /*
                 * Having given up all of the pager_map we were holding,
                 * we can now wait for aiodoned to reclaim it for us
                 * without fear of deadlock.
                 */
                kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WRITE |
                                     UVMPAGER_MAPIN_WAITOK);
        }

        mbp = getiobuf(NULL, true);
        UVMHIST_LOG(ubchist, "vp %#jx mbp %#jx num now %jd bytes 0x%jx",
            (uintptr_t)vp, (uintptr_t)mbp, vp->v_numoutput, bytes);
        mbp->b_bufsize = npages << PAGE_SHIFT;
        mbp->b_data = (void *)kva;
        mbp->b_resid = mbp->b_bcount = bytes;
        mbp->b_cflags |= BC_BUSY|BC_AGE;
        mbp->b_iodone = uvm_aio_aiodone;

        bp = NULL;
        for (offset = startoffset;
            bytes > 0;
            offset += iobytes, bytes -= iobytes) {
                lbn = offset >> fs_bshift;
                error = ulfs_bmaparray(vp, lbn, &blkno, NULL, NULL, &run,
                    lfs_issequential_hole);
                if (error) {
                        UVMHIST_LOG(ubchist, "ulfs_bmaparray() -> %jd",
                            error,0,0,0);
                        skipbytes += bytes;
                        bytes = 0;
                        break;
                }

                iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
                    bytes);
                if (blkno == (daddr_t)-1) {
                        skipbytes += iobytes;
                        continue;
                }

                /*
                 * Discover how much we can really pack into this buffer.
                 */
                /* If no room in the current segment, finish it up */
                if (sp->sum_bytes_left < sizeof(int32_t) ||
                    sp->seg_bytes_left < (1 << lfs_sb_getbshift(fs))) {
                        int vers;

                        lfs_updatemeta(sp);
                        vers = lfs_fi_getversion(fs, sp->fip);
                        lfs_release_finfo(fs);
                        (void) lfs_writeseg(fs, sp);

                        lfs_acquire_finfo(fs, ip->i_number, vers);
                }
                /* Check both for space in segment and space in segsum */
                iobytes = MIN(iobytes, (sp->seg_bytes_left >> fs_bshift)
                                        << fs_bshift);
                iobytes = MIN(iobytes, (sp->sum_bytes_left / sizeof(int32_t))
                                       << fs_bshift);
                KASSERT(iobytes > 0);

                /* if it's really one i/o, don't make a second buf */
                if (offset == startoffset && iobytes == bytes) {
                        bp = mbp;
                        /* 
                         * All the LFS output is done by the segwriter.  It
                         * will increment numoutput by one for all the bufs it
                         * receives.  However this buffer needs one extra to
                         * account for aiodone.
                         */
                        mutex_enter(vp->v_interlock);
                        vp->v_numoutput++;
                        mutex_exit(vp->v_interlock);
                } else {
                        bp = getiobuf(NULL, true);
                        UVMHIST_LOG(ubchist, "vp %#jx bp %#jx num now %jd",
                            (uintptr_t)vp, (uintptr_t)bp, vp->v_numoutput, 0);
                        nestiobuf_setup(mbp, bp, offset - pg->offset, iobytes);
                        /*
                         * LFS doesn't like async I/O here, dies with
                         * an assert in lfs_bwrite().  Is that assert
                         * valid?  I retained non-async behaviour when
                         * converted this to use nestiobuf --pooka
                         */
                        bp->b_flags &= ~B_ASYNC;
                }

                /* XXX This is silly ... is this necessary? */
                mutex_enter(&bufcache_lock);
                mutex_enter(vp->v_interlock);
                bgetvp(vp, bp);
                mutex_exit(vp->v_interlock);
                mutex_exit(&bufcache_lock);

                bp->b_lblkno = lfs_lblkno(fs, offset);
                bp->b_private = mbp;
                if (devvp->v_type == VBLK) {
                        bp->b_dev = devvp->v_rdev;
                }
                VOP_BWRITE(bp->b_vp, bp);
                while (lfs_gatherblock(sp, bp, NULL))
                        continue;
        }

        nestiobuf_done(mbp, skipbytes, error);
        if (skipbytes) {
                UVMHIST_LOG(ubchist, "skipbytes %jd", skipbytes, 0,0,0);
        }
        UVMHIST_LOG(ubchist, "returning 0", 0,0,0,0);

        if (!async) {
                /* Start a segment write. */
                UVMHIST_LOG(ubchist, "flushing", 0,0,0,0);
                mutex_enter(&lfs_lock);
                lfs_flush(fs, 0, 1);
                mutex_exit(&lfs_lock);
        }

        if ((sp->seg_flags & SEGM_SINGLE) && lfs_sb_getcurseg(fs) != fs->lfs_startseg)
                return EAGAIN;

        return (0);

    tryagain:
        /*
         * We can't write the pages, for whatever reason.
         * Clean up after ourselves, and make the caller try again.
         */
        mutex_enter(vp->v_interlock);

        /* Tell why we're here, if we know */
        if (failreason != NULL) {
                DLOG((DLOG_PAGE, "lfs_gop_write: %s\n", failreason));
        }
        if (haveeof && startoffset >= eof) {
                 DLOG((DLOG_PAGE, "lfs_gop_write: ino %d start 0x%" PRIx64
                       " eof 0x%" PRIx64 " npages=%d\n", VTOI(vp)->i_number,
                       pgs[0]->offset, eof, npages));
        }

        for (i = 0; i < npages; i++) {
                pg = pgs[i];

                if (pg->flags & PG_PAGEOUT)
                        uvm_pageout_done(1);
                uvm_pagelock(pg);
                if (pg->flags & PG_DELWRI) {
                        uvm_pageunwire(pg);
                }
                uvm_pageactivate(pg);
                uvm_pageunlock(pg);
                pg->flags &= ~(PG_DELWRI|PG_PAGEOUT|PG_RELEASED);
                uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
                DLOG((DLOG_PAGE, "pg[%d] = %p (vp %p off %" PRIx64 ")\n", i, pg,
                        vp, pg->offset));
                DLOG((DLOG_PAGE, "pg[%d]->flags = %x\n", i, pg->flags));
                DLOG((DLOG_PAGE, "pg[%d]->pqflags = %x\n", i, pg->pqflags));
                DLOG((DLOG_PAGE, "pg[%d]->uanon = %p\n", i, pg->uanon));
                DLOG((DLOG_PAGE, "pg[%d]->uobject = %p\n", i, pg->uobject));
                DLOG((DLOG_PAGE, "pg[%d]->wire_count = %d\n", i,
                      pg->wire_count));
                DLOG((DLOG_PAGE, "pg[%d]->loan_count = %d\n", i,
                      pg->loan_count));
        }
        uvm_page_unbusy(pgs, npages);
        mutex_exit(vp->v_interlock);
        return EAGAIN;
}

/*
 * finish vnode/inode initialization.
 * used by lfs_vget.
 */
void
lfs_vinit(struct mount *mp, struct vnode **vpp)
{
        struct vnode *vp = *vpp;
        struct inode *ip = VTOI(vp);
        struct ulfsmount *ump = VFSTOULFS(mp);
        struct lfs *fs = ump->um_lfs;
        int i;

        ip->i_mode = lfs_dino_getmode(fs, ip->i_din);
        ip->i_nlink = lfs_dino_getnlink(fs, ip->i_din);
        ip->i_lfs_osize = ip->i_size = lfs_dino_getsize(fs, ip->i_din);
        ip->i_flags = lfs_dino_getflags(fs, ip->i_din);
        ip->i_gen = lfs_dino_getgen(fs, ip->i_din);
        ip->i_uid = lfs_dino_getuid(fs, ip->i_din);
        ip->i_gid = lfs_dino_getgid(fs, ip->i_din);

        ip->i_lfs_effnblks = lfs_dino_getblocks(fs, ip->i_din);
        ip->i_lfs_odnlink = lfs_dino_getnlink(fs, ip->i_din);

        /*
         * Initialize the vnode from the inode, check for aliases.  In all
         * cases re-init ip, the underlying vnode/inode may have changed.
         */
        ulfs_vinit(mp, lfs_specop_p, lfs_fifoop_p, &vp);
        ip = VTOI(vp);

        memset(ip->i_lfs_fragsize, 0, ULFS_NDADDR * sizeof(*ip->i_lfs_fragsize));
        if (vp->v_type != VLNK || ip->i_size >= ip->i_lfs->um_maxsymlinklen) {
#ifdef DEBUG
                for (i = (ip->i_size + lfs_sb_getbsize(fs) - 1) >> lfs_sb_getbshift(fs);
                    i < ULFS_NDADDR; i++) {
                        if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
                            i == 0)
                                continue;
                        if (lfs_dino_getdb(fs, ip->i_din, i) != 0) {
                                lfs_dump_dinode(fs, ip->i_din);
                                panic("inconsistent inode (direct)");
                        }
                }
                for ( ; i < ULFS_NDADDR + ULFS_NIADDR; i++) {
                        if (lfs_dino_getib(fs, ip->i_din, i - ULFS_NDADDR) != 0) {
                                lfs_dump_dinode(fs, ip->i_din);
                                panic("inconsistent inode (indirect)");
                        }
                }
#endif /* DEBUG */
                for (i = 0; i < ULFS_NDADDR; i++)
                        if (lfs_dino_getdb(fs, ip->i_din, i) != 0)
                                ip->i_lfs_fragsize[i] = lfs_blksize(fs, ip, i);
        }

        KASSERTMSG((vp->v_type != VNON),
            "lfs_vinit: ino %llu is type VNON! (ifmt=%o)\n",
            (unsigned long long)ip->i_number,
            (ip->i_mode & LFS_IFMT) >> 12);

        /*
         * Finish inode initialization now that aliasing has been resolved.
         */

        ip->i_devvp = fs->lfs_devvp;
        vref(ip->i_devvp);
#if defined(LFS_QUOTA) || defined(LFS_QUOTA2)
        ulfsquota_init(ip);
#endif
        genfs_node_init(vp, &lfs_genfsops);
        uvm_vnp_setsize(vp, ip->i_size);

        /* Initialize hiblk from file size */
        ip->i_lfs_hiblk = lfs_lblkno(ip->i_lfs, ip->i_size + lfs_sb_getbsize(ip->i_lfs) - 1) - 1;

        *vpp = vp;
}

/*
 * Resize the filesystem to contain the specified number of segments.
 */
int
lfs_resize_fs(struct lfs *fs, int newnsegs)
{
        SEGUSE *sup;
        CLEANERINFO *cip;
        struct buf *bp, *obp;
        daddr_t olast, nlast, ilast, noff, start, end;
        struct vnode *ivp;
        struct inode *ip;
        int error, badnews, inc, oldnsegs;
        int sbbytes, csbbytes, gain, cgain;
        int i;

        /* Only support v2 and up */
        if (lfs_sb_getversion(fs) < 2)
                return EOPNOTSUPP;

        /* If we're doing nothing, do it fast */
        oldnsegs = lfs_sb_getnseg(fs);
        if (newnsegs == oldnsegs)
                return 0;

        /* We always have to have two superblocks */
        if (newnsegs <= lfs_dtosn(fs, lfs_sb_getsboff(fs, 1)))
                /* XXX this error code is rather nonsense */
                return EFBIG;

        ivp = fs->lfs_ivnode;
        ip = VTOI(ivp);
        error = 0;

        /* Take the segment lock so no one else calls lfs_newseg() */
        lfs_seglock(fs, SEGM_PROT);

        /*
         * Make sure the segments we're going to be losing, if any,
         * are in fact empty.  We hold the seglock, so their status
         * cannot change underneath us.  Count the superblocks we lose,
         * while we're at it.
         */
        sbbytes = csbbytes = 0;
        cgain = 0;
        for (i = newnsegs; i < oldnsegs; i++) {
                LFS_SEGENTRY(sup, fs, i, bp);
                badnews = sup->su_nbytes || !(sup->su_flags & SEGUSE_INVAL);
                if (sup->su_flags & SEGUSE_SUPERBLOCK)
                        sbbytes += LFS_SBPAD;
                if (!(sup->su_flags & SEGUSE_DIRTY)) {
                        ++cgain;
                        if (sup->su_flags & SEGUSE_SUPERBLOCK)
                                csbbytes += LFS_SBPAD;
                }
                brelse(bp, 0);
                if (badnews) {
                        error = EBUSY;
                        goto out;
                }
        }

        /* Note old and new segment table endpoints, and old ifile size */
        olast = lfs_sb_getcleansz(fs) + lfs_sb_getsegtabsz(fs);
        nlast = howmany(newnsegs, lfs_sb_getsepb(fs)) + lfs_sb_getcleansz(fs);
        ilast = ivp->v_size >> lfs_sb_getbshift(fs);
        noff = nlast - olast;

        /*
         * Make sure no one can use the Ifile while we change it around.
         * Even after taking the iflock we need to make sure no one still
         * is holding Ifile buffers, so we get each one, to drain them.
         * (XXX this could be done better.)
         */
        rw_enter(&fs->lfs_iflock, RW_WRITER);
        for (i = 0; i < ilast; i++) {
                /* XXX what to do if bread fails? */
                bread(ivp, i, lfs_sb_getbsize(fs), 0, &bp);
                brelse(bp, 0);
        }

        /* Allocate new Ifile blocks */
        for (i = ilast; i < ilast + noff; i++) {
                if (lfs_balloc(ivp, i * lfs_sb_getbsize(fs), lfs_sb_getbsize(fs), NOCRED, 0,
                               &bp) != 0)
                        panic("balloc extending ifile");
                memset(bp->b_data, 0, lfs_sb_getbsize(fs));
                VOP_BWRITE(bp->b_vp, bp);
        }

        /* Register new ifile size */
        ip->i_size += noff * lfs_sb_getbsize(fs);
        lfs_dino_setsize(fs, ip->i_din, ip->i_size);
        uvm_vnp_setsize(ivp, ip->i_size);

        /* Copy the inode table to its new position */
        if (noff != 0) {
                if (noff < 0) {
                        start = nlast;
                        end = ilast + noff;
                        inc = 1;
                } else {
                        start = ilast + noff - 1;
                        end = nlast - 1;
                        inc = -1;
                }
                for (i = start; i != end; i += inc) {
                        if (bread(ivp, i, lfs_sb_getbsize(fs),
                            B_MODIFY, &bp) != 0)
                                panic("resize: bread dst blk failed");
                        if (bread(ivp, i - noff, lfs_sb_getbsize(fs),
                            0, &obp))
                                panic("resize: bread src blk failed");
                        memcpy(bp->b_data, obp->b_data, lfs_sb_getbsize(fs));
                        VOP_BWRITE(bp->b_vp, bp);
                        brelse(obp, 0);
                }
        }

        /* If we are expanding, write the new empty SEGUSE entries */
        if (newnsegs > oldnsegs) {
                for (i = oldnsegs; i < newnsegs; i++) {
                        if ((error = bread(ivp, i / lfs_sb_getsepb(fs) +
                                           lfs_sb_getcleansz(fs), lfs_sb_getbsize(fs),
                                           B_MODIFY, &bp)) != 0)
                                panic("lfs: ifile read: %d", error);
                        while ((i + 1) % lfs_sb_getsepb(fs) && i < newnsegs) {
                                sup = &((SEGUSE *)bp->b_data)[i % lfs_sb_getsepb(fs)];
                                memset(sup, 0, sizeof(*sup));
                                i++;
                        }
                        VOP_BWRITE(bp->b_vp, bp);
                }
        }

        /* Zero out unused superblock offsets */
        for (i = 2; i < LFS_MAXNUMSB; i++)
                if (lfs_dtosn(fs, lfs_sb_getsboff(fs, i)) >= newnsegs)
                        lfs_sb_setsboff(fs, i, 0x0);

        /*
         * Correct superblock entries that depend on fs size.
         * The computations of these are as follows:
         *
         * size  = lfs_segtod(fs, nseg)
         * dsize = lfs_segtod(fs, nseg - minfreeseg) - lfs_btofsb(#super * LFS_SBPAD)
         * bfree = dsize - lfs_btofsb(fs, bsize * nseg / 2) - blocks_actually_used
         * avail = lfs_segtod(fs, nclean) - lfs_btofsb(#clean_super * LFS_SBPAD)
         *         + (lfs_segtod(fs, 1) - (offset - curseg))
         *           - lfs_segtod(fs, minfreeseg - (minfreeseg / 2))
         *
         * XXX - we should probably adjust minfreeseg as well.
         */
        gain = (newnsegs - oldnsegs);
        lfs_sb_setnseg(fs, newnsegs);
        lfs_sb_setsegtabsz(fs, nlast - lfs_sb_getcleansz(fs));
        lfs_sb_addsize(fs, gain * lfs_btofsb(fs, lfs_sb_getssize(fs)));
        lfs_sb_adddsize(fs, gain * lfs_btofsb(fs, lfs_sb_getssize(fs)) - lfs_btofsb(fs, sbbytes));
        lfs_sb_addbfree(fs, gain * lfs_btofsb(fs, lfs_sb_getssize(fs)) - lfs_btofsb(fs, sbbytes)
                       - gain * lfs_btofsb(fs, lfs_sb_getbsize(fs) / 2));
        if (gain > 0) {
                lfs_sb_addnclean(fs, gain);
                lfs_sb_addavail(fs, gain * lfs_btofsb(fs, lfs_sb_getssize(fs)));
        } else {
                lfs_sb_subnclean(fs, cgain);
                lfs_sb_subavail(fs, cgain * lfs_btofsb(fs, lfs_sb_getssize(fs)) -
                                 lfs_btofsb(fs, csbbytes));
        }

        /* Resize segment flag cache */
        fs->lfs_suflags[0] = realloc(fs->lfs_suflags[0],
            lfs_sb_getnseg(fs) * sizeof(u_int32_t), M_SEGMENT, M_WAITOK);
        fs->lfs_suflags[1] = realloc(fs->lfs_suflags[1],
            lfs_sb_getnseg(fs) * sizeof(u_int32_t), M_SEGMENT, M_WAITOK);
        for (i = oldnsegs; i < newnsegs; i++)
                fs->lfs_suflags[0][i] = fs->lfs_suflags[1][i] = 0x0;

        /* Truncate Ifile if necessary */
        if (noff < 0)
                lfs_truncate(ivp, ivp->v_size + (noff << lfs_sb_getbshift(fs)), 0,
                    NOCRED);

        /* Update cleaner info so the cleaner can die */
        /* XXX what to do if bread fails? */
        bread(ivp, 0, lfs_sb_getbsize(fs), B_MODIFY, &bp);
        cip = bp->b_data;
        lfs_ci_setclean(fs, cip, lfs_sb_getnclean(fs));
        lfs_ci_setdirty(fs, cip, lfs_sb_getnseg(fs) - lfs_sb_getnclean(fs));
        VOP_BWRITE(bp->b_vp, bp);

        /* Let Ifile accesses proceed */
        rw_exit(&fs->lfs_iflock);

    out:
        lfs_segunlock(fs);
        return error;
}

/*
 * Extended attribute dispatch
 */
int
lfs_extattrctl(struct mount *mp, int cmd, struct vnode *vp,
               int attrnamespace, const char *attrname)
{
#ifdef LFS_EXTATTR
        struct ulfsmount *ump;

        ump = VFSTOULFS(mp);
        if (ump->um_fstype == ULFS1) {
                return ulfs_extattrctl(mp, cmd, vp, attrnamespace, attrname);
        }
#endif
        return vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname);
}














































































































































































































    3 
    3 






























































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
/*        $NetBSD: uvisor.c,v 1.57 2021/08/07 16:19:17 thorpej Exp $        */

/*
 * Copyright (c) 2000 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Handspring Visor (Palmpilot compatible PDA) driver
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvisor.c,v 1.57 2021/08/07 16:19:17 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/conf.h>
#include <sys/tty.h>

#include <dev/usb/usb.h>

#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <dev/usb/ucomvar.h>

#ifdef UVISOR_DEBUG
#define DPRINTF(x)        if (uvisordebug) printf x
#define DPRINTFN(n,x)        if (uvisordebug>(n)) printf x
int uvisordebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

#define UVISOR_CONFIG_INDEX        0
#define UVISOR_IFACE_INDEX        0

/* From the Linux driver */
/*
 * UVISOR_REQUEST_BYTES_AVAILABLE asks the visor for the number of bytes that
 * are available to be transferred to the host for the specified endpoint.
 * Currently this is not used, and always returns 0x0001
 */
#define UVISOR_REQUEST_BYTES_AVAILABLE                0x01

/*
 * UVISOR_CLOSE_NOTIFICATION is set to the device to notify it that the host
 * is now closing the pipe. An empty packet is sent in response.
 */
#define UVISOR_CLOSE_NOTIFICATION                0x02

/*
 * UVISOR_GET_CONNECTION_INFORMATION is sent by the host during enumeration to
 * get the endpoints used by the connection.
 */
#define UVISOR_GET_CONNECTION_INFORMATION        0x03


/*
 * UVISOR_GET_CONNECTION_INFORMATION returns data in the following format
 */
#define UVISOR_MAX_CONN 8
struct uvisor_connection_info {
        uWord        num_ports;
        struct {
                uByte        port_function_id;
                uByte        port;
        } connections[UVISOR_MAX_CONN];
};
#define UVISOR_CONNECTION_INFO_SIZE 18

/* struct uvisor_connection_info.connection[x].port_function_id defines: */
#define UVISOR_FUNCTION_GENERIC                0x00
#define UVISOR_FUNCTION_DEBUGGER        0x01
#define UVISOR_FUNCTION_HOTSYNC                0x02
#define UVISOR_FUNCTION_CONSOLE                0x03
#define UVISOR_FUNCTION_REMOTE_FILE_SYS        0x04

/*
 * Unknown PalmOS stuff.
 */
#define UVISOR_GET_PALM_INFORMATION                0x04
#define UVISOR_GET_PALM_INFORMATION_LEN                0x44

struct uvisor_palm_connection_info {
        uByte        num_ports;
        uByte        endpoint_numbers_different;
        uWord        reserved1;
        struct {
                uDWord        port_function_id;
                uByte        port;
                uByte        end_point_info;
                uWord        reserved;
        } connections[UVISOR_MAX_CONN];
};



#define UVISORIBUFSIZE 64
#define UVISOROBUFSIZE 1024

struct uvisor_softc {
        device_t                sc_dev;                /* base device */
        struct usbd_device *        sc_udev;        /* device */
        struct usbd_interface *        sc_iface;        /* interface */

        device_t                sc_subdevs[UVISOR_MAX_CONN];
        int                        sc_numcon;

        uint16_t                sc_flags;

        bool                        sc_dying;
};

static usbd_status uvisor_init(struct uvisor_softc *,
                               struct uvisor_connection_info *,
                               struct uvisor_palm_connection_info *);

static int uvisor_open(void *, int);
static void uvisor_close(void *, int);

static const struct ucom_methods uvisor_methods = {
        .ucom_open = uvisor_open,
        .ucom_close = uvisor_close,
};

struct uvisor_type {
        struct usb_devno        uv_dev;
        uint16_t                uv_flags;
#define PALM4        0x0001
#define VISOR        0x0002

};
static const struct uvisor_type uvisor_devs[] = {
        {{ USB_VENDOR_HANDSPRING, USB_PRODUCT_HANDSPRING_VISOR }, VISOR },
        {{ USB_VENDOR_HANDSPRING, USB_PRODUCT_HANDSPRING_TREO }, PALM4 },
        {{ USB_VENDOR_HANDSPRING, USB_PRODUCT_HANDSPRING_TREO600 }, PALM4 },
        {{ USB_VENDOR_PALM, USB_PRODUCT_PALM_M500 }, PALM4 },
        {{ USB_VENDOR_PALM, USB_PRODUCT_PALM_M505 }, PALM4 },
        {{ USB_VENDOR_PALM, USB_PRODUCT_PALM_M515 }, PALM4 },
        {{ USB_VENDOR_PALM, USB_PRODUCT_PALM_I705 }, PALM4 },
        {{ USB_VENDOR_PALM, USB_PRODUCT_PALM_M125 }, PALM4 },
        {{ USB_VENDOR_PALM, USB_PRODUCT_PALM_M130 }, PALM4 },
        {{ USB_VENDOR_PALM, USB_PRODUCT_PALM_TUNGSTEN_Z }, PALM4 },
        {{ USB_VENDOR_PALM, USB_PRODUCT_PALM_TUNGSTEN_T }, PALM4 },
        {{ USB_VENDOR_PALM, USB_PRODUCT_PALM_ZIRE31 }, PALM4 },
        {{ USB_VENDOR_PALM, USB_PRODUCT_PALM_ZIRE }, PALM4 },
        {{ USB_VENDOR_SONY, USB_PRODUCT_SONY_CLIE_40 }, PALM4 },
        {{ USB_VENDOR_SONY, USB_PRODUCT_SONY_CLIE_41 }, PALM4 },
        {{ USB_VENDOR_SONY, USB_PRODUCT_SONY_CLIE_S360 }, PALM4 },
        {{ USB_VENDOR_SONY, USB_PRODUCT_SONY_CLIE_NX60 }, PALM4 },
        {{ USB_VENDOR_SONY, USB_PRODUCT_SONY_CLIE_35 }, 0 },
/*        {{ USB_VENDOR_SONY, USB_PRODUCT_SONY_CLIE_25 }, PALM4 },*/
};
#define uvisor_lookup(v, p) ((const struct uvisor_type *)usb_lookup(uvisor_devs, v, p))

static int        uvisor_match(device_t, cfdata_t, void *);
static void        uvisor_attach(device_t, device_t, void *);
static void        uvisor_childdet(device_t, device_t);
static int        uvisor_detach(device_t, int);

CFATTACH_DECL2_NEW(uvisor, sizeof(struct uvisor_softc), uvisor_match,
    uvisor_attach, uvisor_detach, NULL, NULL, uvisor_childdet);

static int
uvisor_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        DPRINTFN(20,("uvisor: vendor=%#x, product=%#x\n",
                     uaa->uaa_vendor, uaa->uaa_product));

        return uvisor_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
uvisor_attach(device_t parent, device_t self, void *aux)
{
        struct uvisor_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        struct usbd_interface *iface;
        usb_interface_descriptor_t *id;
        struct uvisor_connection_info coninfo;
        struct uvisor_palm_connection_info palmconinfo;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        const char *devname = device_xname(self);
        int i, j, hasin, hasout, port;
        usbd_status err;
        struct ucom_attach_args ucaa;

        DPRINTFN(10,("\nuvisor_attach: sc=%p\n", sc));

        sc->sc_dev = self;
        sc->sc_dying = false;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        /* Move the device into the configured state. */
        err = usbd_set_config_index(dev, UVISOR_CONFIG_INDEX, 1);
        if (err) {
                aprint_error("\n%s: failed to set configuration, err=%s\n",
                       devname, usbd_errstr(err));
                goto bad;
        }

        err = usbd_device2interface_handle(dev, UVISOR_IFACE_INDEX, &iface);
        if (err) {
                aprint_error("\n%s: failed to get interface, err=%s\n",
                       devname, usbd_errstr(err));
                goto bad;
        }

        sc->sc_flags = uvisor_lookup(uaa->uaa_vendor, uaa->uaa_product)->uv_flags;

        if ((sc->sc_flags & (VISOR | PALM4)) == 0) {
                aprint_error_dev(self,
                    "init failed, device type is neither visor nor palm\n");
                goto bad;
        }

        id = usbd_get_interface_descriptor(iface);

        sc->sc_udev = dev;
        sc->sc_iface = iface;

        ucaa.ucaa_ibufsize = UVISORIBUFSIZE;
        ucaa.ucaa_obufsize = UVISOROBUFSIZE;
        ucaa.ucaa_ibufsizepad = UVISORIBUFSIZE;
        ucaa.ucaa_opkthdrlen = 0;
        ucaa.ucaa_device = dev;
        ucaa.ucaa_iface = iface;
        ucaa.ucaa_methods = &uvisor_methods;
        ucaa.ucaa_arg = sc;

        err = uvisor_init(sc, &coninfo, &palmconinfo);
        if (err) {
                aprint_error_dev(self, "init failed, %s\n", usbd_errstr(err));
                goto bad;
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        if (sc->sc_flags & VISOR) {
                sc->sc_numcon = UGETW(coninfo.num_ports);
                if (sc->sc_numcon > UVISOR_MAX_CONN)
                        sc->sc_numcon = UVISOR_MAX_CONN;

                /* Attach a ucom for each connection. */
                for (i = 0; i < sc->sc_numcon; ++i) {
                        switch (coninfo.connections[i].port_function_id) {
                        case UVISOR_FUNCTION_GENERIC:
                                ucaa.ucaa_info = "Generic";
                                break;
                        case UVISOR_FUNCTION_DEBUGGER:
                                ucaa.ucaa_info = "Debugger";
                                break;
                        case UVISOR_FUNCTION_HOTSYNC:
                                ucaa.ucaa_info = "HotSync";
                                break;
                        case UVISOR_FUNCTION_REMOTE_FILE_SYS:
                                ucaa.ucaa_info = "Remote File System";
                                break;
                        default:
                                ucaa.ucaa_info = "unknown";
                                break;
                        }
                        port = coninfo.connections[i].port;
                        ucaa.ucaa_portno = port;
                        ucaa.ucaa_bulkin = port | UE_DIR_IN;
                        ucaa.ucaa_bulkout = port | UE_DIR_OUT;
                        /* Verify that endpoints exist. */
                        hasin = 0;
                        hasout = 0;
                        for (j = 0; j < id->bNumEndpoints; j++) {
                                ed = usbd_interface2endpoint_descriptor(iface, j);
                                if (ed == NULL)
                                        break;
                                if (UE_GET_ADDR(ed->bEndpointAddress) == port &&
                                    (ed->bmAttributes & UE_XFERTYPE) == UE_BULK) {
                                        if (UE_GET_DIR(ed->bEndpointAddress)
                                            == UE_DIR_IN)
                                                hasin++;
                                        else
                                                hasout++;
                                }
                        }
                        if (hasin == 1 && hasout == 1)
                                sc->sc_subdevs[i] =
                                    config_found(self, &ucaa, ucomprint,
                                                 CFARGS(.submatch =
                                                            ucomsubmatch));
                        else
                                aprint_error_dev(self,
                                    "no proper endpoints for port %d (%d,%d)\n",
                                    port, hasin, hasout);
                }

        } else {
                sc->sc_numcon = palmconinfo.num_ports;
                if (sc->sc_numcon > UVISOR_MAX_CONN)
                        sc->sc_numcon = UVISOR_MAX_CONN;

                /* Attach a ucom for each connection. */
                for (i = 0; i < sc->sc_numcon; ++i) {
                        /*
                         * XXX this should copy out 4-char string from the
                         * XXX port_function_id, but where would the string go?
                         * XXX ucaa.ucaa_info is a const char *, not an array.
                         */
                        ucaa.ucaa_info = "sync";
                        ucaa.ucaa_portno = i;
                        if (palmconinfo.endpoint_numbers_different) {
                                port = palmconinfo.connections[i].end_point_info;
                                ucaa.ucaa_bulkin = (port >> 4) | UE_DIR_IN;
                                ucaa.ucaa_bulkout = (port & 0xf) | UE_DIR_OUT;
                        } else {
                                port = palmconinfo.connections[i].port;
                                ucaa.ucaa_bulkin = port | UE_DIR_IN;
                                ucaa.ucaa_bulkout = port | UE_DIR_OUT;
                        }
                        sc->sc_subdevs[i] =
                            config_found(self, &ucaa, ucomprint,
                                         CFARGS(.submatch = ucomsubmatch));
                }
        }

        return;

bad:
        DPRINTF(("uvisor_attach: ATTACH ERROR\n"));
        sc->sc_dying = true;
        return;
}

static void
uvisor_childdet(device_t self, device_t child)
{
        int i;
        struct uvisor_softc *sc = device_private(self);

        for (i = 0; i < sc->sc_numcon; i++) {
                if (sc->sc_subdevs[i] == child)
                        break;
        }
        KASSERT(i < sc->sc_numcon);
        sc->sc_subdevs[i] = NULL;
}

static int
uvisor_detach(device_t self, int flags)
{
        struct uvisor_softc *sc = device_private(self);
        int rv = 0;
        int i;

        DPRINTF(("uvisor_detach: sc=%p flags=%d\n", sc, flags));

        sc->sc_dying = true;

        for (i = 0; i < sc->sc_numcon; i++) {
                if (sc->sc_subdevs[i] != NULL) {
                        rv |= config_detach(sc->sc_subdevs[i], flags);
                        sc->sc_subdevs[i] = NULL;
                }
        }
        if (sc->sc_udev != NULL)
                usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev,
                    sc->sc_dev);

        return rv;
}

static usbd_status
uvisor_init(struct uvisor_softc *sc, struct uvisor_connection_info *ci,
    struct uvisor_palm_connection_info *cpi)
{
        usbd_status err;
        usb_device_request_t req;
        int actlen;
        uWord avail;

        if (sc->sc_flags & VISOR) {
                DPRINTF(("uvisor_init: getting Visor connection info\n"));
                req.bmRequestType = UT_READ_VENDOR_ENDPOINT;
                req.bRequest = UVISOR_GET_CONNECTION_INFORMATION;
                USETW(req.wValue, 0);
                USETW(req.wIndex, 0);
                USETW(req.wLength, UVISOR_CONNECTION_INFO_SIZE);
                err = usbd_do_request_flags(sc->sc_udev, &req, ci,
                    USBD_SHORT_XFER_OK, &actlen, USBD_DEFAULT_TIMEOUT);
                if (err)
                        return err;
        }

        if (sc->sc_flags & PALM4) {
                DPRINTF(("uvisor_init: getting Palm connection info\n"));
                req.bmRequestType = UT_READ_VENDOR_ENDPOINT;
                req.bRequest = UVISOR_GET_PALM_INFORMATION;
                USETW(req.wValue, 0);
                USETW(req.wIndex, 0);
                USETW(req.wLength, UVISOR_GET_PALM_INFORMATION_LEN);
                err = usbd_do_request_flags(sc->sc_udev, &req, cpi,
                    USBD_SHORT_XFER_OK, &actlen, USBD_DEFAULT_TIMEOUT);
                if (err)
                        return err;
        }

        DPRINTF(("uvisor_init: getting available bytes\n"));
        req.bmRequestType = UT_READ_VENDOR_ENDPOINT;
        req.bRequest = UVISOR_REQUEST_BYTES_AVAILABLE;
        USETW(req.wValue, 0);
        USETW(req.wIndex, 5);
        USETW(req.wLength, sizeof(avail));
        err = usbd_do_request(sc->sc_udev, &req, &avail);
        if (err)
                return err;
        DPRINTF(("uvisor_init: avail=%d\n", UGETW(avail)));

        DPRINTF(("uvisor_init: done\n"));
        return err;
}

static int
uvisor_open(void *arg, int portno)
{
        struct uvisor_softc *sc = arg;

        if (sc->sc_dying)
                return EIO;

        return 0;
}

void
uvisor_close(void *addr, int portno)
{
        struct uvisor_softc *sc = addr;
        usb_device_request_t req;
        struct uvisor_connection_info coninfo; /* XXX ? */
        int actlen;

        if (sc->sc_dying)
                return;

        req.bmRequestType = UT_READ_VENDOR_ENDPOINT; /* XXX read? */
        req.bRequest = UVISOR_CLOSE_NOTIFICATION;
        USETW(req.wValue, 0);
        USETW(req.wIndex, 0);
        USETW(req.wLength, UVISOR_CONNECTION_INFO_SIZE);
        (void)usbd_do_request_flags(sc->sc_udev, &req, &coninfo,
                  USBD_SHORT_XFER_OK, &actlen, USBD_DEFAULT_TIMEOUT);
}













































































































    8 


    2 





    8 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/*        $NetBSD: vnd_50.c,v 1.5 2019/12/12 02:15:42 pgoyette Exp $        */

/*-
 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1988 University of Utah.
 * Copyright (c) 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the Systems Programming Group of the University of Utah Computer
 * Science Department.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from: Utah $Hdr: vn.c 1.13 94/04/02$
 *
 *        @(#)vn.c        8.9 (Berkeley) 5/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vnd_50.c,v 1.5 2019/12/12 02:15:42 pgoyette Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/malloc.h>
#include <sys/ioctl.h>
#include <sys/device.h>
#include <sys/disk.h>
#include <sys/stat.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/uio.h>
#include <sys/conf.h>
#include <sys/compat_stub.h>

#include <net/zlib.h>

#include <dev/vndvar.h>

#include <compat/common/compat_mod.h>

static int compat_50_vndioctl(u_long, struct lwp *, void *, int, struct vattr *,
    int (*)(struct lwp *, void *, int, struct vattr *));

static int
compat_50_vndioctl(u_long cmd, struct lwp *l, void *data, int unit,
    struct vattr *vattr_p,
    int (*get)(struct lwp *, void *, int, struct vattr *))
{
        struct vnd_user50 *vnu = data;
        int error;

        if (cmd != VNDIOCGET50)
                return EPASSTHROUGH;

        error = (*get)(l, data, unit, vattr_p);
        if (error != 0)
                return error;

        vnu->vnu_dev = vattr_p->va_fsid;
        vnu->vnu_ino = vattr_p->va_fileid;
        return 0;
}

void
vnd_50_init(void)
{

        MODULE_HOOK_SET(compat_vndioctl_50_hook, compat_50_vndioctl);
}

void
vnd_50_fini(void)
{

        MODULE_HOOK_UNSET(compat_vndioctl_50_hook);
}















































































































    1 


    1 






    1 


    1 
    1 

    2 





















































    8 





















    8 














    5 



















    5 















   13 
    9 





    4 



    9 


   13 
    3 





    3 





   13 


    4 
   13 
   13 

















































    4 






    4 
    4 






    4 



    4 
    4 

    2 










    4 




    4 


    4 
    4 









































































































    1 











    2 
    1 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
/*        $NetBSD: tty_subr.c,v 1.43 2019/12/27 09:41:51 msaitoh Exp $        */

/*
 * Copyright (c) 1993, 1994 Theo de Raadt
 * All rights reserved.
 *
 * Per Lindqvist <pgd@compuram.bbt.se> supplied an almost fully working
 * set of true clist functions that this is very loosely based on.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tty_subr.c,v 1.43 2019/12/27 09:41:51 msaitoh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/ioctl.h>
#include <sys/tty.h>
#include <sys/kmem.h>

/*
 * At compile time, choose:
 * There are two ways the TTY_QUOTE bit can be stored. If QBITS is
 * defined we allocate an array of bits -- 1/8th as much memory but
 * setbit(), clrbit(), and isset() take more CPU. If QBITS is
 * undefined, we just use an array of bytes.
 *
 * If TTY_QUOTE functionality isn't required by a line discipline,
 * it can free c_cq and set it to NULL. This speeds things up,
 * and also does not use any extra memory. This is useful for (say)
 * a SLIP line discipline that wants a 32K ring buffer for data
 * but doesn't need quoting.
 */
#define QBITS

#ifdef QBITS
#define QMEM(n)                ((((n)-1)/NBBY)+1)
#else
#define QMEM(n)                (n)
#endif

#ifdef QBITS
static void        clrbits(u_char *, unsigned int, unsigned int);
#endif

/*
 * Initialize a particular clist. Ok, they are really ring buffers,
 * of the specified length, with/without quoting support.
 */
int
clalloc(struct clist *clp, int size, int quot)
{

        clp->c_cs = kmem_zalloc(size, KM_SLEEP);
        if (quot)
                clp->c_cq = kmem_zalloc(QMEM(size), KM_SLEEP);
        else
                clp->c_cq = NULL;

        clp->c_cf = clp->c_cl = NULL;
        clp->c_ce = clp->c_cs + size;
        clp->c_cn = size;
        clp->c_cc = 0;

        return (0);
}

void
clfree(struct clist *clp)
{
        if (clp->c_cs)
                kmem_free(clp->c_cs, clp->c_cn);
        if (clp->c_cq)
                kmem_free(clp->c_cq, QMEM(clp->c_cn));
        clp->c_cs = clp->c_cq = NULL;
}

/*
 * Get a character from a clist.
 */
int
getc(struct clist *clp)
{
        int c = -1;
        int s;

        s = spltty();
        if (clp->c_cc == 0)
                goto out;

        c = *clp->c_cf & 0xff;
        if (clp->c_cq) {
#ifdef QBITS
                if (isset(clp->c_cq, clp->c_cf - clp->c_cs) )
                        c |= TTY_QUOTE;
#else
                if (*(clp->c_cf - clp->c_cs + clp->c_cq))
                        c |= TTY_QUOTE;
#endif
        }
        *clp->c_cf = 0; /* wipe out to avoid information disclosure */
        if (++clp->c_cf == clp->c_ce)
                clp->c_cf = clp->c_cs;
        if (--clp->c_cc == 0)
                clp->c_cf = clp->c_cl = (u_char *)0;
out:
        splx(s);
        return c;
}

/*
 * Copy clist to buffer.
 * Return number of bytes moved.
 */
int
q_to_b(struct clist *clp, u_char *cp, int count)
{
        int cc;
        u_char *p = cp;
        int s;

        s = spltty();
        /* optimize this while loop */
        while (count > 0 && clp->c_cc > 0) {
                cc = clp->c_cl - clp->c_cf;
                if (clp->c_cf >= clp->c_cl)
                        cc = clp->c_ce - clp->c_cf;
                if (cc > count)
                        cc = count;
                memcpy(p, clp->c_cf, cc);
                count -= cc;
                p += cc;
                clp->c_cc -= cc;
                clp->c_cf += cc;
                if (clp->c_cf == clp->c_ce)
                        clp->c_cf = clp->c_cs;
        }
        if (clp->c_cc == 0)
                clp->c_cf = clp->c_cl = (u_char *)0;
        splx(s);
        return p - cp;
}

/*
 * Return count of contiguous characters in clist.
 * Stop counting if flag&character is non-null.
 */
int
ndqb(struct clist *clp, int flag)
{
        int count = 0;
        int i;
        int cc;
        int s;

        s = spltty();
        if ((cc = clp->c_cc) == 0)
                goto out;

        if (flag == 0) {
                count = clp->c_cl - clp->c_cf;
                if (count <= 0)
                        count = clp->c_ce - clp->c_cf;
                goto out;
        }

        i = clp->c_cf - clp->c_cs;
        if (flag & TTY_QUOTE) {
                while (cc-- > 0 && !(clp->c_cs[i++] & (flag & ~TTY_QUOTE) ||
                    isset(clp->c_cq, i))) {
                        count++;
                        if (i == clp->c_cn)
                                break;
                }
        } else {
                while (cc-- > 0 && !(clp->c_cs[i++] & flag)) {
                        count++;
                        if (i == clp->c_cn)
                                break;
                }
        }
out:
        splx(s);
        return count;
}

/*
 * Flush count bytes from clist.
 */
void
ndflush(struct clist *clp, int count)
{
        int cc;
        int s;

        s = spltty();
        if (count == clp->c_cc) {
                clp->c_cc = 0;
                clp->c_cf = clp->c_cl = (u_char *)0;
                goto out;
        }
        /* optimize this while loop */
        while (count > 0 && clp->c_cc > 0) {
                cc = clp->c_cl - clp->c_cf;
                if (clp->c_cf >= clp->c_cl)
                        cc = clp->c_ce - clp->c_cf;
                if (cc > count)
                        cc = count;
                count -= cc;
                clp->c_cc -= cc;
                clp->c_cf += cc;
                if (clp->c_cf == clp->c_ce)
                        clp->c_cf = clp->c_cs;
        }
        if (clp->c_cc == 0)
                clp->c_cf = clp->c_cl = (u_char *)0;
out:
        splx(s);
}

/*
 * Put a character into the output queue.
 */
int
putc(int c, struct clist *clp)
{
        int i;
        int s;

        s = spltty();
        if (clp->c_cc == clp->c_cn)
                goto out;

        if (clp->c_cc == 0) {
                if (!clp->c_cs) {
#if defined(DIAGNOSTIC) || 1
                        printf("putc: required clalloc\n");
#endif
                        if (clalloc(clp, clp->c_cn, 1)) {
out:
                                splx(s);
                                return -1;
                        }
                }
                clp->c_cf = clp->c_cl = clp->c_cs;
        }

        *clp->c_cl = c & 0xff;
        i = clp->c_cl - clp->c_cs;
        if (clp->c_cq) {
#ifdef QBITS
                if (c & TTY_QUOTE)
                        setbit(clp->c_cq, i);
                else
                        clrbit(clp->c_cq, i);
#else
                q = clp->c_cq + i;
                *q = (c & TTY_QUOTE) ? 1 : 0;
#endif
        }
        clp->c_cc++;
        clp->c_cl++;
        if (clp->c_cl == clp->c_ce)
                clp->c_cl = clp->c_cs;
        splx(s);
        return 0;
}

#ifdef QBITS
/*
 * optimized version of
 *
 * for (i = 0; i < len; i++)
 *        clrbit(cp, off + len);
 */
static void
clrbits(u_char *cp, unsigned int off, unsigned int len)
{
        unsigned int sbi, ebi;
        u_char *scp, *ecp;
        unsigned int end;
        unsigned char mask;

        scp = cp + off / NBBY;
        sbi = off % NBBY;
        end = off + len + NBBY - 1;
        ecp = cp + end / NBBY - 1;
        ebi = end % NBBY + 1;
        if (scp >= ecp) {
                mask = ((1 << len) - 1) << sbi;
                *scp &= ~mask;
        } else {
                mask = (1 << sbi) - 1;
                *scp++ &= mask;

                mask = (1 << ebi) - 1;
                *ecp &= ~mask;

                while (scp < ecp)
                        *scp++ = 0x00;
        }
}
#endif

/*
 * Copy buffer to clist.
 * Return number of bytes not transferred.
 */
int
b_to_q(const u_char *cp, int count, struct clist *clp)
{
        int cc;
        const u_char *p = cp;
        int s;

        if (count <= 0)
                return 0;

        s = spltty();
        if (clp->c_cc == clp->c_cn)
                goto out;

        if (clp->c_cc == 0) {
                if (!clp->c_cs) {
#if defined(DIAGNOSTIC) || 1
                        printf("b_to_q: required clalloc\n");
#endif
                        if (clalloc(clp, clp->c_cn, 1))
                                goto out;
                }
                clp->c_cf = clp->c_cl = clp->c_cs;
        }

        /* optimize this while loop */
        while (count > 0 && clp->c_cc < clp->c_cn) {
                cc = clp->c_ce - clp->c_cl;
                if (clp->c_cf > clp->c_cl)
                        cc = clp->c_cf - clp->c_cl;
                if (cc > count)
                        cc = count;
                memcpy(clp->c_cl, p, cc);
                if (clp->c_cq) {
#ifdef QBITS
                        clrbits(clp->c_cq, clp->c_cl - clp->c_cs, cc);
#else
                        memset(clp->c_cl - clp->c_cs + clp->c_cq, 0, cc);
#endif
                }
                p += cc;
                count -= cc;
                clp->c_cc += cc;
                clp->c_cl += cc;
                if (clp->c_cl == clp->c_ce)
                        clp->c_cl = clp->c_cs;
        }
out:
        splx(s);
        return count;
}

static int tty_global_cc;

/*
 * Given a non-NULL pointer into the clist return the pointer
 * to the next character in the list or return NULL if no more chars.
 *
 * Callers must not allow getc's to happen between firstc's and getc's
 * so that the pointer becomes invalid.  Note that interrupts are NOT
 * masked.
 */
u_char *
nextc(struct clist *clp, u_char *cp, int *c)
{

        if (clp->c_cf == cp) {
                /*
                 * First time initialization.
                 */
                tty_global_cc = clp->c_cc;
        }
        if (tty_global_cc == 0 || cp == NULL)
                return NULL;
        if (--tty_global_cc == 0)
                return NULL;
        if (++cp == clp->c_ce)
                cp = clp->c_cs;
        *c = *cp & 0xff;
        if (clp->c_cq) {
#ifdef QBITS
                if (isset(clp->c_cq, cp - clp->c_cs))
                        *c |= TTY_QUOTE;
#else
                if (*(clp->c_cf - clp->c_cs + clp->c_cq))
                        *c |= TTY_QUOTE;
#endif
        }
        return cp;
}

/*
 * Given a non-NULL pointer into the clist return the pointer
 * to the first character in the list or return NULL if no more chars.
 *
 * Callers must not allow getc's to happen between firstc's and getc's
 * so that the pointer becomes invalid.  Note that interrupts are NOT
 * masked.
 *
 * *c is set to the NEXT character
 */
u_char *
firstc(struct clist *clp, int *c)
{
        u_char *cp;

        tty_global_cc = clp->c_cc;
        if (tty_global_cc == 0)
                return NULL;
        cp = clp->c_cf;
        *c = *cp & 0xff;
        if (clp->c_cq) {
#ifdef QBITS
                if (isset(clp->c_cq, cp - clp->c_cs))
                        *c |= TTY_QUOTE;
#else
                if (*(cp - clp->c_cs + clp->c_cq))
                        *c |= TTY_QUOTE;
#endif
        }
        return clp->c_cf;
}

/*
 * Remove the last character in the clist and return it.
 */
int
unputc(struct clist *clp)
{
        unsigned int c = -1;
        int s;

        s = spltty();
        if (clp->c_cc == 0)
                goto out;

        if (clp->c_cl == clp->c_cs)
                clp->c_cl = clp->c_ce - 1;
        else
                --clp->c_cl;
        clp->c_cc--;

        c = *clp->c_cl & 0xff;
        if (clp->c_cq) {
#ifdef QBITS
                if (isset(clp->c_cq, clp->c_cl - clp->c_cs))
                        c |= TTY_QUOTE;
#else
                if (*(clp->c_cf - clp->c_cs + clp->c_cq))
                        c |= TTY_QUOTE;
#endif
        }
        if (clp->c_cc == 0)
                clp->c_cf = clp->c_cl = (u_char *)0;
out:
        splx(s);
        return c;
}

/*
 * Put the chars in the from queue on the end of the to queue.
 */
void
catq(struct clist *from, struct clist *to)
{
        int c;

        while ((c = getc(from)) != -1)
                putc(c, to);
}























































































































































































   30 










   29 




























   29 
   29 












   28 



















































   30 





   30 


   26 



   29 








   29 







   30 


   30 
















   30 

   30 






   29 




   29 



   28 

   29 

   29 






   28 


   29 









   27 




   29 



   28 


   28 








   29 
   30 





   30 



























    2 






    2 
    2 









    2 



    2 
    2 

    2 
    2 





    1 


    1 
    1 


    1 





    1 
    1 














































































   30 

   29 























   30 




   30 

   30 





   30 






    2 


    2 

    2 


    2 

    2 


    2 



    2 

























    2 






















   29 





































































































































    2 


    2 


    2 







































































   32 

   31 


   32 

    2 



   31 
   31 





   29 
   30 
   31 








   29 


   32 

   32 

   31 















   30 
   30 

   29 

   29 








    2 
    2 


    2 























   29 


   29 
   29 

   29 

   30 


   29 















   30 
   29 






   30 


   30 







   29 


   28 








   10 
   10 








    2 


   29 









   30 





































































































































































    2 



    2 
    2 
    1 






    1 


    1 
















































































































































   10 














































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
/*        $NetBSD: prop_dictionary.c,v 1.45 2022/08/03 21:13:46 riastradh Exp $        */

/*-
 * Copyright (c) 2006, 2007, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "prop_object_impl.h"
#include <prop/prop_array.h>
#include <prop/prop_dictionary.h>
#include <prop/prop_string.h>

#include <sys/rbtree.h>

#if !defined(_KERNEL) && !defined(_STANDALONE)
#include <errno.h>
#endif

/*
 * We implement these like arrays, but we keep them sorted by key.
 * This allows us to binary-search as well as keep externalized output
 * sane-looking for human eyes.
 */

#define        EXPAND_STEP                16

/*
 * prop_dictionary_keysym_t is allocated with space at the end to hold the
 * key.  This must be a regular object so that we can maintain sane iterator
 * semantics -- we don't want to require that the caller release the result
 * of prop_object_iterator_next().
 *
 * We'd like to have some small'ish keysym objects for up-to-16 characters
 * in a key, some for up-to-32 characters in a key, and then a final bucket
 * for up-to-128 characters in a key (not including NUL).  Keys longer than
 * 128 characters are not allowed.
 */
struct _prop_dictionary_keysym {
        struct _prop_object                pdk_obj;
        size_t                                pdk_size;
        struct rb_node                        pdk_link;
        char                                 pdk_key[1];
        /* actually variable length */
};

        /* pdk_key[1] takes care of the NUL */
#define        PDK_SIZE_16                (sizeof(struct _prop_dictionary_keysym) + 16)
#define        PDK_SIZE_32                (sizeof(struct _prop_dictionary_keysym) + 32)
#define        PDK_SIZE_128                (sizeof(struct _prop_dictionary_keysym) + 128)

#define        PDK_MAXKEY                128

_PROP_POOL_INIT(_prop_dictionary_keysym16_pool, PDK_SIZE_16, "pdict16")
_PROP_POOL_INIT(_prop_dictionary_keysym32_pool, PDK_SIZE_32, "pdict32")
_PROP_POOL_INIT(_prop_dictionary_keysym128_pool, PDK_SIZE_128, "pdict128")

struct _prop_dict_entry {
        prop_dictionary_keysym_t        pde_key;
        prop_object_t                        pde_objref;
};

struct _prop_dictionary {
        struct _prop_object        pd_obj;
        _PROP_RWLOCK_DECL(pd_rwlock)
        struct _prop_dict_entry        *pd_array;
        unsigned int                pd_capacity;
        unsigned int                pd_count;
        int                        pd_flags;

        uint32_t                pd_version;
};

#define        PD_F_IMMUTABLE                0x01        /* dictionary is immutable */

_PROP_POOL_INIT(_prop_dictionary_pool, sizeof(struct _prop_dictionary),
                "propdict")
_PROP_MALLOC_DEFINE(M_PROP_DICT, "prop dictionary",
                    "property dictionary container object")

static _prop_object_free_rv_t
                _prop_dictionary_free(prop_stack_t, prop_object_t *);
static void        _prop_dictionary_emergency_free(prop_object_t);
static bool        _prop_dictionary_externalize(
                                struct _prop_object_externalize_context *,
                                void *);
static _prop_object_equals_rv_t
                _prop_dictionary_equals(prop_object_t, prop_object_t,
                                        void **, void **,
                                        prop_object_t *, prop_object_t *);
static void        _prop_dictionary_equals_finish(prop_object_t, prop_object_t);
static prop_object_iterator_t
                _prop_dictionary_iterator_locked(prop_dictionary_t);
static prop_object_t
                _prop_dictionary_iterator_next_object_locked(void *);
static prop_object_t
                _prop_dictionary_get_keysym(prop_dictionary_t,
                                            prop_dictionary_keysym_t, bool);
static prop_object_t
                _prop_dictionary_get(prop_dictionary_t, const char *, bool);

static void _prop_dictionary_lock(void);
static void _prop_dictionary_unlock(void);

static const struct _prop_object_type _prop_object_type_dictionary = {
        .pot_type                =        PROP_TYPE_DICTIONARY,
        .pot_free                =        _prop_dictionary_free,
        .pot_emergency_free        =        _prop_dictionary_emergency_free,
        .pot_extern                =        _prop_dictionary_externalize,
        .pot_equals                =        _prop_dictionary_equals,
        .pot_equals_finish        =        _prop_dictionary_equals_finish,
        .pot_lock                 =       _prop_dictionary_lock,
        .pot_unlock                 =       _prop_dictionary_unlock,
};

static _prop_object_free_rv_t
                _prop_dict_keysym_free(prop_stack_t, prop_object_t *);
static bool        _prop_dict_keysym_externalize(
                                struct _prop_object_externalize_context *,
                                void *);
static _prop_object_equals_rv_t
                _prop_dict_keysym_equals(prop_object_t, prop_object_t,
                                         void **, void **,
                                         prop_object_t *, prop_object_t *);

static const struct _prop_object_type _prop_object_type_dict_keysym = {
        .pot_type        =        PROP_TYPE_DICT_KEYSYM,
        .pot_free        =        _prop_dict_keysym_free,
        .pot_extern        =        _prop_dict_keysym_externalize,
        .pot_equals        =        _prop_dict_keysym_equals,
};

#define        prop_object_is_dictionary(x)                \
        ((x) != NULL && (x)->pd_obj.po_type == &_prop_object_type_dictionary)
#define        prop_object_is_dictionary_keysym(x)        \
        ((x) != NULL && (x)->pdk_obj.po_type == &_prop_object_type_dict_keysym)

#define        prop_dictionary_is_immutable(x)                \
                                (((x)->pd_flags & PD_F_IMMUTABLE) != 0)

struct _prop_dictionary_iterator {
        struct _prop_object_iterator pdi_base;
        unsigned int                pdi_index;
};

/*
 * Dictionary key symbols are immutable, and we are likely to have many
 * duplicated key symbols.  So, to save memory, we unique'ify key symbols
 * so we only have to have one copy of each string.
 */

static int
/*ARGSUSED*/
_prop_dict_keysym_rb_compare_nodes(void *ctx _PROP_ARG_UNUSED,
                                   const void *n1, const void *n2)
{
        const struct _prop_dictionary_keysym *pdk1 = n1;
        const struct _prop_dictionary_keysym *pdk2 = n2;

        return strcmp(pdk1->pdk_key, pdk2->pdk_key);
}

static int
/*ARGSUSED*/
_prop_dict_keysym_rb_compare_key(void *ctx _PROP_ARG_UNUSED,
                                 const void *n, const void *v)
{
        const struct _prop_dictionary_keysym *pdk = n;
        const char *cp = v;

        return strcmp(pdk->pdk_key, cp);
}

static const rb_tree_ops_t _prop_dict_keysym_rb_tree_ops = {
        .rbto_compare_nodes = _prop_dict_keysym_rb_compare_nodes,
        .rbto_compare_key = _prop_dict_keysym_rb_compare_key,
        .rbto_node_offset = offsetof(struct _prop_dictionary_keysym, pdk_link),
        .rbto_context = NULL
};

static struct rb_tree _prop_dict_keysym_tree;

_PROP_ONCE_DECL(_prop_dict_init_once)
_PROP_MUTEX_DECL_STATIC(_prop_dict_keysym_tree_mutex)

static int
_prop_dict_init(void)
{

        _PROP_MUTEX_INIT(_prop_dict_keysym_tree_mutex);
        rb_tree_init(&_prop_dict_keysym_tree,
                           &_prop_dict_keysym_rb_tree_ops);
        return 0;
}

static void
_prop_dict_keysym_put(prop_dictionary_keysym_t pdk)
{

        if (pdk->pdk_size <= PDK_SIZE_16)
                _PROP_POOL_PUT(_prop_dictionary_keysym16_pool, pdk);
        else if (pdk->pdk_size <= PDK_SIZE_32)
                _PROP_POOL_PUT(_prop_dictionary_keysym32_pool, pdk);
        else {
                _PROP_ASSERT(pdk->pdk_size <= PDK_SIZE_128);
                _PROP_POOL_PUT(_prop_dictionary_keysym128_pool, pdk);
        }
}

/* ARGSUSED */
static _prop_object_free_rv_t
_prop_dict_keysym_free(prop_stack_t stack, prop_object_t *obj)
{
        prop_dictionary_keysym_t pdk = *obj;

        rb_tree_remove_node(&_prop_dict_keysym_tree, pdk);
        _prop_dict_keysym_put(pdk);

        return _PROP_OBJECT_FREE_DONE;
}

static bool
_prop_dict_keysym_externalize(struct _prop_object_externalize_context *ctx,
                             void *v)
{
        prop_dictionary_keysym_t pdk = v;

        /* We externalize these as strings, and they're never empty. */

        _PROP_ASSERT(pdk->pdk_key[0] != '\0');

        if (_prop_object_externalize_start_tag(ctx, "string") == false ||
            _prop_object_externalize_append_encoded_cstring(ctx,
                                                pdk->pdk_key) == false ||
            _prop_object_externalize_end_tag(ctx, "string") == false)
                return (false);

        return (true);
}

/* ARGSUSED */
static _prop_object_equals_rv_t
_prop_dict_keysym_equals(prop_object_t v1, prop_object_t v2,
    void **stored_pointer1, void **stored_pointer2,
    prop_object_t *next_obj1, prop_object_t *next_obj2)
{
        prop_dictionary_keysym_t pdk1 = v1;
        prop_dictionary_keysym_t pdk2 = v2;

        /*
         * There is only ever one copy of a keysym at any given time,
         * so we can reduce this to a simple pointer equality check.
         */
        if (pdk1 == pdk2)
                return _PROP_OBJECT_EQUALS_TRUE;
        else
                return _PROP_OBJECT_EQUALS_FALSE;
}

static prop_dictionary_keysym_t
_prop_dict_keysym_alloc(const char *key)
{
        prop_dictionary_keysym_t opdk, pdk, rpdk;
        size_t size;

        _PROP_ONCE_RUN(_prop_dict_init_once, _prop_dict_init);

        /*
         * Check to see if this already exists in the tree.  If it does,
         * we just retain it and return it.
         */
        _PROP_MUTEX_LOCK(_prop_dict_keysym_tree_mutex);
        opdk = rb_tree_find_node(&_prop_dict_keysym_tree, key);
        if (opdk != NULL) {
                prop_object_retain(opdk);
                _PROP_MUTEX_UNLOCK(_prop_dict_keysym_tree_mutex);
                return (opdk);
        }
        _PROP_MUTEX_UNLOCK(_prop_dict_keysym_tree_mutex);

        /*
         * Not in the tree.  Create it now.
         */

        size = sizeof(*pdk) + strlen(key) /* pdk_key[1] covers the NUL */;

        if (size <= PDK_SIZE_16)
                pdk = _PROP_POOL_GET(_prop_dictionary_keysym16_pool);
        else if (size <= PDK_SIZE_32)
                pdk = _PROP_POOL_GET(_prop_dictionary_keysym32_pool);
        else if (size <= PDK_SIZE_128)
                pdk = _PROP_POOL_GET(_prop_dictionary_keysym128_pool);
        else
                pdk = NULL;        /* key too long */

        if (pdk == NULL)
                return (NULL);

        _prop_object_init(&pdk->pdk_obj, &_prop_object_type_dict_keysym);

        strcpy(pdk->pdk_key, key);
        pdk->pdk_size = size;

        /*
         * We dropped the mutex when we allocated the new object, so
         * we have to check again if it is in the tree.
         */
        _PROP_MUTEX_LOCK(_prop_dict_keysym_tree_mutex);
        opdk = rb_tree_find_node(&_prop_dict_keysym_tree, key);
        if (opdk != NULL) {
                prop_object_retain(opdk);
                _PROP_MUTEX_UNLOCK(_prop_dict_keysym_tree_mutex);
                _prop_dict_keysym_put(pdk);
                return (opdk);
        }
        rpdk = rb_tree_insert_node(&_prop_dict_keysym_tree, pdk);
        _PROP_ASSERT(rpdk == pdk);
        _PROP_MUTEX_UNLOCK(_prop_dict_keysym_tree_mutex);
        return (rpdk);
}

static _prop_object_free_rv_t
_prop_dictionary_free(prop_stack_t stack, prop_object_t *obj)
{
        prop_dictionary_t pd = *obj;
        prop_dictionary_keysym_t pdk;
        prop_object_t po;

        _PROP_ASSERT(pd->pd_count <= pd->pd_capacity);
        _PROP_ASSERT((pd->pd_capacity == 0 && pd->pd_array == NULL) ||
                     (pd->pd_capacity != 0 && pd->pd_array != NULL));

        /* The empty dictorinary is easy, handle that first. */
        if (pd->pd_count == 0) {
                if (pd->pd_array != NULL)
                        _PROP_FREE(pd->pd_array, M_PROP_DICT);

                _PROP_RWLOCK_DESTROY(pd->pd_rwlock);

                _PROP_POOL_PUT(_prop_dictionary_pool, pd);

                return (_PROP_OBJECT_FREE_DONE);
        }

        po = pd->pd_array[pd->pd_count - 1].pde_objref;
        _PROP_ASSERT(po != NULL);

        if (stack == NULL) {
                /*
                 * If we are in emergency release mode,
                 * just let caller recurse down.
                 */
                *obj = po;
                return (_PROP_OBJECT_FREE_FAILED);
        }

        /* Otherwise, try to push the current object on the stack. */
        if (!_prop_stack_push(stack, pd, NULL, NULL, NULL)) {
                /* Push failed, entering emergency release mode. */
                return (_PROP_OBJECT_FREE_FAILED);
        }
        /* Object pushed on stack, caller will release it. */
        --pd->pd_count;
        pdk = pd->pd_array[pd->pd_count].pde_key;
        _PROP_ASSERT(pdk != NULL);

        prop_object_release(pdk);

        *obj = po;
        return (_PROP_OBJECT_FREE_RECURSE);
}


static void
_prop_dictionary_lock(void)
{

        /* XXX: once necessary or paranoia? */
        _PROP_ONCE_RUN(_prop_dict_init_once, _prop_dict_init);
        _PROP_MUTEX_LOCK(_prop_dict_keysym_tree_mutex);
}

static void
_prop_dictionary_unlock(void)
{
        _PROP_MUTEX_UNLOCK(_prop_dict_keysym_tree_mutex);
}

static void
_prop_dictionary_emergency_free(prop_object_t obj)
{
        prop_dictionary_t pd = obj;
        prop_dictionary_keysym_t pdk;

        _PROP_ASSERT(pd->pd_count != 0);
        --pd->pd_count;

        pdk = pd->pd_array[pd->pd_count].pde_key;
        _PROP_ASSERT(pdk != NULL);
        prop_object_release(pdk);
}

static bool
_prop_dictionary_externalize(struct _prop_object_externalize_context *ctx,
                             void *v)
{
        prop_dictionary_t pd = v;
        prop_dictionary_keysym_t pdk;
        struct _prop_object *po;
        prop_object_iterator_t pi;
        unsigned int i;
        bool rv = false;

        _PROP_RWLOCK_RDLOCK(pd->pd_rwlock);

        if (pd->pd_count == 0) {
                _PROP_RWLOCK_UNLOCK(pd->pd_rwlock);
                return (_prop_object_externalize_empty_tag(ctx, "dict"));
        }

        if (_prop_object_externalize_start_tag(ctx, "dict") == false ||
            _prop_object_externalize_append_char(ctx, '\n') == false)
                goto out;

        pi = _prop_dictionary_iterator_locked(pd);
        if (pi == NULL)
                goto out;

        ctx->poec_depth++;
        _PROP_ASSERT(ctx->poec_depth != 0);

        while ((pdk = _prop_dictionary_iterator_next_object_locked(pi))
            != NULL) {
                po = _prop_dictionary_get_keysym(pd, pdk, true);
                if (po == NULL ||
                    _prop_object_externalize_start_tag(ctx, "key") == false ||
                    _prop_object_externalize_append_encoded_cstring(ctx,
                                                   pdk->pdk_key) == false ||
                    _prop_object_externalize_end_tag(ctx, "key") == false ||
                    (*po->po_type->pot_extern)(ctx, po) == false) {
                        prop_object_iterator_release(pi);
                        goto out;
                }
        }

        prop_object_iterator_release(pi);

        ctx->poec_depth--;
        for (i = 0; i < ctx->poec_depth; i++) {
                if (_prop_object_externalize_append_char(ctx, '\t') == false)
                        goto out;
        }
        if (_prop_object_externalize_end_tag(ctx, "dict") == false)
                goto out;

        rv = true;

 out:
        _PROP_RWLOCK_UNLOCK(pd->pd_rwlock);
        return (rv);
}

/* ARGSUSED */
static _prop_object_equals_rv_t
_prop_dictionary_equals(prop_object_t v1, prop_object_t v2,
    void **stored_pointer1, void **stored_pointer2,
    prop_object_t *next_obj1, prop_object_t *next_obj2)
{
        prop_dictionary_t dict1 = v1;
        prop_dictionary_t dict2 = v2;
        uintptr_t idx;
        _prop_object_equals_rv_t rv = _PROP_OBJECT_EQUALS_FALSE;

        if (dict1 == dict2)
                return (_PROP_OBJECT_EQUALS_TRUE);

        _PROP_ASSERT(*stored_pointer1 == *stored_pointer2);

        idx = (uintptr_t)*stored_pointer1;

        if (idx == 0) {
                if ((uintptr_t)dict1 < (uintptr_t)dict2) {
                        _PROP_RWLOCK_RDLOCK(dict1->pd_rwlock);
                        _PROP_RWLOCK_RDLOCK(dict2->pd_rwlock);
                } else {
                        _PROP_RWLOCK_RDLOCK(dict2->pd_rwlock);
                        _PROP_RWLOCK_RDLOCK(dict1->pd_rwlock);
                }
        }

        if (dict1->pd_count != dict2->pd_count)
                goto out;

        if (idx == dict1->pd_count) {
                rv = _PROP_OBJECT_EQUALS_TRUE;
                goto out;
        }

        _PROP_ASSERT(idx < dict1->pd_count);

        *stored_pointer1 = (void *)(idx + 1);
        *stored_pointer2 = (void *)(idx + 1);

        *next_obj1 = dict1->pd_array[idx].pde_objref;
        *next_obj2 = dict2->pd_array[idx].pde_objref;

        if (!prop_dictionary_keysym_equals(dict1->pd_array[idx].pde_key,
                                           dict2->pd_array[idx].pde_key))
                goto out;

        return (_PROP_OBJECT_EQUALS_RECURSE);

 out:
         _PROP_RWLOCK_UNLOCK(dict1->pd_rwlock);
        _PROP_RWLOCK_UNLOCK(dict2->pd_rwlock);
        return (rv);
}

static void
_prop_dictionary_equals_finish(prop_object_t v1, prop_object_t v2)
{
         _PROP_RWLOCK_UNLOCK(((prop_dictionary_t)v1)->pd_rwlock);
         _PROP_RWLOCK_UNLOCK(((prop_dictionary_t)v2)->pd_rwlock);
}

static prop_dictionary_t
_prop_dictionary_alloc(unsigned int capacity)
{
        prop_dictionary_t pd;
        struct _prop_dict_entry *array;

        if (capacity != 0) {
                array = _PROP_CALLOC(capacity * sizeof(*array), M_PROP_DICT);
                if (array == NULL)
                        return (NULL);
        } else
                array = NULL;

        pd = _PROP_POOL_GET(_prop_dictionary_pool);
        if (pd != NULL) {
                _prop_object_init(&pd->pd_obj, &_prop_object_type_dictionary);

                _PROP_RWLOCK_INIT(pd->pd_rwlock);
                pd->pd_array = array;
                pd->pd_capacity = capacity;
                pd->pd_count = 0;
                pd->pd_flags = 0;

                pd->pd_version = 0;
        } else if (array != NULL)
                _PROP_FREE(array, M_PROP_DICT);

        return (pd);
}

static bool
_prop_dictionary_expand(prop_dictionary_t pd, unsigned int capacity)
{
        struct _prop_dict_entry *array, *oarray;

        /*
         * Dictionary must be WRITE-LOCKED.
         */

        oarray = pd->pd_array;

        array = _PROP_CALLOC(capacity * sizeof(*array), M_PROP_DICT);
        if (array == NULL)
                return (false);
        if (oarray != NULL)
                memcpy(array, oarray, pd->pd_capacity * sizeof(*array));
        pd->pd_array = array;
        pd->pd_capacity = capacity;

        if (oarray != NULL)
                _PROP_FREE(oarray, M_PROP_DICT);

        return (true);
}

static prop_object_t
_prop_dictionary_iterator_next_object_locked(void *v)
{
        struct _prop_dictionary_iterator *pdi = v;
        prop_dictionary_t pd = pdi->pdi_base.pi_obj;
        prop_dictionary_keysym_t pdk = NULL;

        _PROP_ASSERT(prop_object_is_dictionary(pd));

        if (pd->pd_version != pdi->pdi_base.pi_version)
                goto out;        /* dictionary changed during iteration */

        _PROP_ASSERT(pdi->pdi_index <= pd->pd_count);

        if (pdi->pdi_index == pd->pd_count)
                goto out;        /* we've iterated all objects */

        pdk = pd->pd_array[pdi->pdi_index].pde_key;
        pdi->pdi_index++;

 out:
        return (pdk);
}

static prop_object_t
_prop_dictionary_iterator_next_object(void *v)
{
        struct _prop_dictionary_iterator *pdi = v;
        prop_dictionary_t pd _PROP_ARG_UNUSED = pdi->pdi_base.pi_obj;
        prop_dictionary_keysym_t pdk;

        _PROP_ASSERT(prop_object_is_dictionary(pd));

        _PROP_RWLOCK_RDLOCK(pd->pd_rwlock);
        pdk = _prop_dictionary_iterator_next_object_locked(pdi);
        _PROP_RWLOCK_UNLOCK(pd->pd_rwlock);
        return (pdk);
}

static void
_prop_dictionary_iterator_reset_locked(void *v)
{
        struct _prop_dictionary_iterator *pdi = v;
        prop_dictionary_t pd = pdi->pdi_base.pi_obj;

        _PROP_ASSERT(prop_object_is_dictionary(pd));

        pdi->pdi_index = 0;
        pdi->pdi_base.pi_version = pd->pd_version;
}

static void
_prop_dictionary_iterator_reset(void *v)
{
        struct _prop_dictionary_iterator *pdi = v;
        prop_dictionary_t pd _PROP_ARG_UNUSED = pdi->pdi_base.pi_obj;

        _PROP_RWLOCK_RDLOCK(pd->pd_rwlock);
        _prop_dictionary_iterator_reset_locked(pdi);
        _PROP_RWLOCK_UNLOCK(pd->pd_rwlock);
}

/*
 * prop_dictionary_create --
 *        Create a dictionary.
 */
prop_dictionary_t
prop_dictionary_create(void)
{

        return (_prop_dictionary_alloc(0));
}

/*
 * prop_dictionary_create_with_capacity --
 *        Create a dictionary with the capacity to store N objects.
 */
prop_dictionary_t
prop_dictionary_create_with_capacity(unsigned int capacity)
{

        return (_prop_dictionary_alloc(capacity));
}

/*
 * prop_dictionary_copy --
 *        Copy a dictionary.  The new dictionary has an initial capacity equal
 *        to the number of objects stored int the original dictionary.  The new
 *        dictionary contains references to the original dictionary's objects,
 *        not copies of those objects (i.e. a shallow copy).
 */
prop_dictionary_t
prop_dictionary_copy(prop_dictionary_t opd)
{
        prop_dictionary_t pd;
        prop_dictionary_keysym_t pdk;
        prop_object_t po;
        unsigned int idx;

        if (! prop_object_is_dictionary(opd))
                return (NULL);

        _PROP_RWLOCK_RDLOCK(opd->pd_rwlock);

        pd = _prop_dictionary_alloc(opd->pd_count);
        if (pd != NULL) {
                for (idx = 0; idx < opd->pd_count; idx++) {
                        pdk = opd->pd_array[idx].pde_key;
                        po = opd->pd_array[idx].pde_objref;

                        prop_object_retain(pdk);
                        prop_object_retain(po);

                        pd->pd_array[idx].pde_key = pdk;
                        pd->pd_array[idx].pde_objref = po;
                }
                pd->pd_count = opd->pd_count;
                pd->pd_flags = opd->pd_flags;
        }
        _PROP_RWLOCK_UNLOCK(opd->pd_rwlock);
        return (pd);
}

/*
 * prop_dictionary_copy_mutable --
 *        Like prop_dictionary_copy(), but the resulting dictionary is
 *        mutable.
 */
prop_dictionary_t
prop_dictionary_copy_mutable(prop_dictionary_t opd)
{
        prop_dictionary_t pd;

        if (! prop_object_is_dictionary(opd))
                return (NULL);

        pd = prop_dictionary_copy(opd);
        if (pd != NULL)
                pd->pd_flags &= ~PD_F_IMMUTABLE;

        return (pd);
}

/*
 * prop_dictionary_make_immutable --
 *        Set the immutable flag on that dictionary.
 */
void
prop_dictionary_make_immutable(prop_dictionary_t pd)
{

        _PROP_RWLOCK_WRLOCK(pd->pd_rwlock);
        if (prop_dictionary_is_immutable(pd) == false)
                pd->pd_flags |= PD_F_IMMUTABLE;
        _PROP_RWLOCK_UNLOCK(pd->pd_rwlock);
}

/*
 * prop_dictionary_count --
 *        Return the number of objects stored in the dictionary.
 */
unsigned int
prop_dictionary_count(prop_dictionary_t pd)
{
        unsigned int rv;

        if (! prop_object_is_dictionary(pd))
                return (0);

        _PROP_RWLOCK_RDLOCK(pd->pd_rwlock);
        rv = pd->pd_count;
        _PROP_RWLOCK_UNLOCK(pd->pd_rwlock);

        return (rv);
}

/*
 * prop_dictionary_ensure_capacity --
 *        Ensure that the dictionary has the capacity to store the specified
 *        total number of objects (including the objects already stored in
 *        the dictionary).
 */
bool
prop_dictionary_ensure_capacity(prop_dictionary_t pd, unsigned int capacity)
{
        bool rv;

        if (! prop_object_is_dictionary(pd))
                return (false);

        _PROP_RWLOCK_WRLOCK(pd->pd_rwlock);
        if (capacity > pd->pd_capacity)
                rv = _prop_dictionary_expand(pd, capacity);
        else
                rv = true;
        _PROP_RWLOCK_UNLOCK(pd->pd_rwlock);
        return (rv);
}

static prop_object_iterator_t
_prop_dictionary_iterator_locked(prop_dictionary_t pd)
{
        struct _prop_dictionary_iterator *pdi;

        if (! prop_object_is_dictionary(pd))
                return (NULL);

        pdi = _PROP_CALLOC(sizeof(*pdi), M_TEMP);
        if (pdi == NULL)
                return (NULL);
        pdi->pdi_base.pi_next_object = _prop_dictionary_iterator_next_object;
        pdi->pdi_base.pi_reset = _prop_dictionary_iterator_reset;
        prop_object_retain(pd);
        pdi->pdi_base.pi_obj = pd;
        _prop_dictionary_iterator_reset_locked(pdi);

        return (&pdi->pdi_base);
}

/*
 * prop_dictionary_iterator --
 *        Return an iterator for the dictionary.  The dictionary is retained by
 *        the iterator.
 */
prop_object_iterator_t
prop_dictionary_iterator(prop_dictionary_t pd)
{
        prop_object_iterator_t pi;

        _PROP_RWLOCK_RDLOCK(pd->pd_rwlock);
        pi = _prop_dictionary_iterator_locked(pd);
        _PROP_RWLOCK_UNLOCK(pd->pd_rwlock);
        return (pi);
}

/*
 * prop_dictionary_all_keys --
 *        Return an array containing a snapshot of all of the keys
 *        in the dictionary.
 */
prop_array_t
prop_dictionary_all_keys(prop_dictionary_t pd)
{
        prop_array_t array;
        unsigned int idx;
        bool rv = true;

        if (! prop_object_is_dictionary(pd))
                return (NULL);

        /* There is no pressing need to lock the dictionary for this. */
        array = prop_array_create_with_capacity(pd->pd_count);

        _PROP_RWLOCK_RDLOCK(pd->pd_rwlock);

        for (idx = 0; idx < pd->pd_count; idx++) {
                rv = prop_array_add(array, pd->pd_array[idx].pde_key);
                if (rv == false)
                        break;
        }

        _PROP_RWLOCK_UNLOCK(pd->pd_rwlock);

        if (rv == false) {
                prop_object_release(array);
                array = NULL;
        }
        return (array);
}

static struct _prop_dict_entry *
_prop_dict_lookup(prop_dictionary_t pd, const char *key,
                  unsigned int *idxp)
{
        struct _prop_dict_entry *pde;
        unsigned int base, idx, distance;
        int res;

        /*
         * Dictionary must be READ-LOCKED or WRITE-LOCKED.
         */

        for (idx = 0, base = 0, distance = pd->pd_count; distance != 0;
             distance >>= 1) {
                idx = base + (distance >> 1);
                pde = &pd->pd_array[idx];
                _PROP_ASSERT(pde->pde_key != NULL);
                res = strcmp(key, pde->pde_key->pdk_key);
                if (res == 0) {
                        if (idxp != NULL)
                                *idxp = idx;
                        return (pde);
                }
                if (res > 0) {        /* key > pdk_key: move right */
                        base = idx + 1;
                        distance--;
                }                /* else move left */
        }

        /* idx points to the slot we looked at last. */
        if (idxp != NULL)
                *idxp = idx;
        return (NULL);
}

static prop_object_t
_prop_dictionary_get(prop_dictionary_t pd, const char *key, bool locked)
{
        const struct _prop_dict_entry *pde;
        prop_object_t po = NULL;

        if (! prop_object_is_dictionary(pd))
                return (NULL);

        if (!locked)
                _PROP_RWLOCK_RDLOCK(pd->pd_rwlock);
        pde = _prop_dict_lookup(pd, key, NULL);
        if (pde != NULL) {
                _PROP_ASSERT(pde->pde_objref != NULL);
                po = pde->pde_objref;
        }
        if (!locked)
                _PROP_RWLOCK_UNLOCK(pd->pd_rwlock);
        return (po);
}
/*
 * prop_dictionary_get --
 *        Return the object stored with specified key.
 */
prop_object_t
prop_dictionary_get(prop_dictionary_t pd, const char *key)
{
        prop_object_t po = NULL;

        if (! prop_object_is_dictionary(pd))
                return (NULL);

        _PROP_RWLOCK_RDLOCK(pd->pd_rwlock);
        po = _prop_dictionary_get(pd, key, true);
        _PROP_RWLOCK_UNLOCK(pd->pd_rwlock);
        return (po);
}

static prop_object_t
_prop_dictionary_get_keysym(prop_dictionary_t pd, prop_dictionary_keysym_t pdk,
    bool locked)
{

        if (! (prop_object_is_dictionary(pd) &&
               prop_object_is_dictionary_keysym(pdk)))
                return (NULL);

        return (_prop_dictionary_get(pd, pdk->pdk_key, locked));
}

/*
 * prop_dictionary_get_keysym --
 *        Return the object stored at the location encoded by the keysym.
 */
prop_object_t
prop_dictionary_get_keysym(prop_dictionary_t pd, prop_dictionary_keysym_t pdk)
{

        return (_prop_dictionary_get_keysym(pd, pdk, false));
}

/*
 * prop_dictionary_set --
 *        Store a reference to an object at with the specified key.
 *        If the key already exist, the original object is released.
 */
bool
prop_dictionary_set(prop_dictionary_t pd, const char *key, prop_object_t po)
{
        struct _prop_dict_entry *pde;
        prop_dictionary_keysym_t pdk;
        unsigned int idx;
        bool rv = false;

        if (! prop_object_is_dictionary(pd))
                return (false);

        _PROP_ASSERT(pd->pd_count <= pd->pd_capacity);

        if (prop_dictionary_is_immutable(pd))
                return (false);

        _PROP_RWLOCK_WRLOCK(pd->pd_rwlock);

        pde = _prop_dict_lookup(pd, key, &idx);
        if (pde != NULL) {
                prop_object_t opo = pde->pde_objref;
                prop_object_retain(po);
                pde->pde_objref = po;
                prop_object_release(opo);
                rv = true;
                goto out;
        }

        pdk = _prop_dict_keysym_alloc(key);
        if (pdk == NULL)
                goto out;

        if (pd->pd_count == pd->pd_capacity &&
            _prop_dictionary_expand(pd,
                                        pd->pd_capacity + EXPAND_STEP) == false) {
                prop_object_release(pdk);
                    goto out;
        }

        /* At this point, the store will succeed. */
        prop_object_retain(po);

        if (pd->pd_count == 0) {
                pd->pd_array[0].pde_key = pdk;
                pd->pd_array[0].pde_objref = po;
                pd->pd_count++;
                pd->pd_version++;
                rv = true;
                goto out;
        }

        pde = &pd->pd_array[idx];
        _PROP_ASSERT(pde->pde_key != NULL);

        if (strcmp(key, pde->pde_key->pdk_key) < 0) {
                /*
                 * key < pdk_key: insert to the left.  This is the same as
                 * inserting to the right, except we decrement the current
                 * index first.
                 *
                 * Because we're unsigned, we have to special case 0
                 * (grumble).
                 */
                if (idx == 0) {
                        memmove(&pd->pd_array[1], &pd->pd_array[0],
                                pd->pd_count * sizeof(*pde));
                        pd->pd_array[0].pde_key = pdk;
                        pd->pd_array[0].pde_objref = po;
                        pd->pd_count++;
                        pd->pd_version++;
                        rv = true;
                        goto out;
                }
                idx--;
        }

        memmove(&pd->pd_array[idx + 2], &pd->pd_array[idx + 1],
                (pd->pd_count - (idx + 1)) * sizeof(*pde));
        pd->pd_array[idx + 1].pde_key = pdk;
        pd->pd_array[idx + 1].pde_objref = po;
        pd->pd_count++;

        pd->pd_version++;

        rv = true;

 out:
        _PROP_RWLOCK_UNLOCK(pd->pd_rwlock);
        return (rv);
}

/*
 * prop_dictionary_set_keysym --
 *        Replace the object in the dictionary at the location encoded by
 *        the keysym.
 */
bool
prop_dictionary_set_keysym(prop_dictionary_t pd, prop_dictionary_keysym_t pdk,
                           prop_object_t po)
{

        if (! (prop_object_is_dictionary(pd) &&
               prop_object_is_dictionary_keysym(pdk)))
                return (false);

        return (prop_dictionary_set(pd, pdk->pdk_key, po));
}

static void
_prop_dictionary_remove(prop_dictionary_t pd, struct _prop_dict_entry *pde,
    unsigned int idx)
{
        prop_dictionary_keysym_t pdk = pde->pde_key;
        prop_object_t po = pde->pde_objref;

        /*
         * Dictionary must be WRITE-LOCKED.
         */

        _PROP_ASSERT(pd->pd_count != 0);
        _PROP_ASSERT(idx < pd->pd_count);
        _PROP_ASSERT(pde == &pd->pd_array[idx]);

        idx++;
        memmove(&pd->pd_array[idx - 1], &pd->pd_array[idx],
                (pd->pd_count - idx) * sizeof(*pde));
        pd->pd_count--;
        pd->pd_version++;


        prop_object_release(pdk);

        prop_object_release(po);
}

/*
 * prop_dictionary_remove --
 *        Remove the reference to an object with the specified key from
 *        the dictionary.
 */
void
prop_dictionary_remove(prop_dictionary_t pd, const char *key)
{
        struct _prop_dict_entry *pde;
        unsigned int idx;

        if (! prop_object_is_dictionary(pd))
                return;

        _PROP_RWLOCK_WRLOCK(pd->pd_rwlock);

        /* XXX Should this be a _PROP_ASSERT()? */
        if (prop_dictionary_is_immutable(pd))
                goto out;

        pde = _prop_dict_lookup(pd, key, &idx);
        /* XXX Should this be a _PROP_ASSERT()? */
        if (pde == NULL)
                goto out;

        _prop_dictionary_remove(pd, pde, idx);
 out:
        _PROP_RWLOCK_UNLOCK(pd->pd_rwlock);
}

/*
 * prop_dictionary_remove_keysym --
 *        Remove a reference to an object stored in the dictionary at the
 *        location encoded by the keysym.
 */
void
prop_dictionary_remove_keysym(prop_dictionary_t pd,
                              prop_dictionary_keysym_t pdk)
{

        if (! (prop_object_is_dictionary(pd) &&
               prop_object_is_dictionary_keysym(pdk)))
                return;

        prop_dictionary_remove(pd, pdk->pdk_key);
}

/*
 * prop_dictionary_equals --
 *        Return true if the two dictionaries are equivalent.  Note we do a
 *        by-value comparison of the objects in the dictionary.
 */
bool
prop_dictionary_equals(prop_dictionary_t dict1, prop_dictionary_t dict2)
{
        if (!prop_object_is_dictionary(dict1) ||
            !prop_object_is_dictionary(dict2))
                return (false);

        return (prop_object_equals(dict1, dict2));
}

/*
 * prop_dictionary_keysym_value --
 *        Return a reference to the keysym's value.
 */
const char *
prop_dictionary_keysym_value(prop_dictionary_keysym_t pdk)
{

        if (! prop_object_is_dictionary_keysym(pdk))
                return (NULL);

        return (pdk->pdk_key);
}

_PROP_DEPRECATED(prop_dictionary_keysym_cstring_nocopy,
    "this program uses prop_dictionary_keysym_cstring_nocopy(), "
    "which is deprecated; use prop_dictionary_keysym_value() instead.")
const char *
prop_dictionary_keysym_cstring_nocopy(prop_dictionary_keysym_t pdk)
{

        if (! prop_object_is_dictionary_keysym(pdk))
                return (NULL);

        return (pdk->pdk_key);
}

/*
 * prop_dictionary_keysym_equals --
 *        Return true if the two dictionary key symbols are equivalent.
 *        Note: We do not compare the object references.
 */
bool
prop_dictionary_keysym_equals(prop_dictionary_keysym_t pdk1,
                              prop_dictionary_keysym_t pdk2)
{
        if (!prop_object_is_dictionary_keysym(pdk1) ||
            !prop_object_is_dictionary_keysym(pdk2))
                return (false);

        return (prop_object_equals(pdk1, pdk2));
}

/*
 * prop_dictionary_externalize --
 *        Externalize a dictionary, returning a NUL-terminated buffer
 *        containing the XML-style representation.  The buffer is allocated
 *        with the M_TEMP memory type.
 */
char *
prop_dictionary_externalize(prop_dictionary_t pd)
{
        struct _prop_object_externalize_context *ctx;
        char *cp;

        ctx = _prop_object_externalize_context_alloc();
        if (ctx == NULL)
                return (NULL);

        if (_prop_object_externalize_header(ctx) == false ||
            (*pd->pd_obj.po_type->pot_extern)(ctx, pd) == false ||
            _prop_object_externalize_footer(ctx) == false) {
                /* We are responsible for releasing the buffer. */
                _PROP_FREE(ctx->poec_buf, M_TEMP);
                _prop_object_externalize_context_free(ctx);
                return (NULL);
        }

        cp = ctx->poec_buf;
        _prop_object_externalize_context_free(ctx);

        return (cp);
}

/*
 * _prop_dictionary_internalize --
 *        Parse a <dict>...</dict> and return the object created from the
 *        external representation.
 *
 * Internal state in via rec_data is the storage area for the last processed
 * key.
 * _prop_dictionary_internalize_body is the upper half of the parse loop.
 * It is responsible for parsing the key directly and storing it in the area
 * referenced by rec_data.
 * _prop_dictionary_internalize_cont is the lower half and called with the value
 * associated with the key.
 */
static bool _prop_dictionary_internalize_body(prop_stack_t,
    prop_object_t *, struct _prop_object_internalize_context *, char *);

bool
_prop_dictionary_internalize(prop_stack_t stack, prop_object_t *obj,
    struct _prop_object_internalize_context *ctx)
{
        prop_dictionary_t dict;
        char *tmpkey;

        /* We don't currently understand any attributes. */
        if (ctx->poic_tagattr != NULL)
                return (true);

        dict = prop_dictionary_create();
        if (dict == NULL)
                return (true);

        if (ctx->poic_is_empty_element) {
                *obj = dict;
                return (true);
        }

        tmpkey = _PROP_MALLOC(PDK_MAXKEY + 1, M_TEMP);
        if (tmpkey == NULL) {
                prop_object_release(dict);
                return (true);
        }

        *obj = dict;
        /*
         * Opening tag is found, storage for key allocated and
         * now continue to the first element.
         */
        return _prop_dictionary_internalize_body(stack, obj, ctx, tmpkey);
}

static bool
_prop_dictionary_internalize_continue(prop_stack_t stack, prop_object_t *obj,
    struct _prop_object_internalize_context *ctx, void *data, prop_object_t child)
{
        prop_dictionary_t dict = *obj;
        char *tmpkey = data;

        _PROP_ASSERT(tmpkey != NULL);

        if (child == NULL ||
            prop_dictionary_set(dict, tmpkey, child) == false) {
                _PROP_FREE(tmpkey, M_TEMP);
                if (child != NULL)
                        prop_object_release(child);
                prop_object_release(dict);
                *obj = NULL;
                return (true);
        }

        prop_object_release(child);

        /*
         * key, value was added, now continue looking for the next key
         * or the closing tag.
         */
        return _prop_dictionary_internalize_body(stack, obj, ctx, tmpkey);
}

static bool
_prop_dictionary_internalize_body(prop_stack_t stack, prop_object_t *obj,
    struct _prop_object_internalize_context *ctx, char *tmpkey)
{
        prop_dictionary_t dict = *obj;
        size_t keylen;

        /* Fetch the next tag. */
        if (_prop_object_internalize_find_tag(ctx, NULL, _PROP_TAG_TYPE_EITHER) == false)
                goto bad;

        /* Check to see if this is the end of the dictionary. */
        if (_PROP_TAG_MATCH(ctx, "dict") &&
            ctx->poic_tag_type == _PROP_TAG_TYPE_END) {
                _PROP_FREE(tmpkey, M_TEMP);
                return (true);
        }

        /* Ok, it must be a non-empty key start tag. */
        if (!_PROP_TAG_MATCH(ctx, "key") ||
            ctx->poic_tag_type != _PROP_TAG_TYPE_START ||
            ctx->poic_is_empty_element)
                    goto bad;

        if (_prop_object_internalize_decode_string(ctx,
                                        tmpkey, PDK_MAXKEY, &keylen,
                                        &ctx->poic_cp) == false)
                goto bad;

        _PROP_ASSERT(keylen <= PDK_MAXKEY);
        tmpkey[keylen] = '\0';

        if (_prop_object_internalize_find_tag(ctx, "key",
                                _PROP_TAG_TYPE_END) == false)
                goto bad;

        /* ..and now the beginning of the value. */
        if (_prop_object_internalize_find_tag(ctx, NULL,
                                _PROP_TAG_TYPE_START) == false)
                goto bad;

        /*
         * Key is found, now wait for value to be parsed.
         */
        if (_prop_stack_push(stack, *obj,
                             _prop_dictionary_internalize_continue,
                             tmpkey, NULL))
                return (false);

 bad:
        _PROP_FREE(tmpkey, M_TEMP);
        prop_object_release(dict);
        *obj = NULL;
        return (true);
}

/*
 * prop_dictionary_internalize --
 *        Create a dictionary by parsing the NUL-terminated XML-style
 *        representation.
 */
prop_dictionary_t
prop_dictionary_internalize(const char *xml)
{
        return _prop_generic_internalize(xml, "dict");
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
/*
 * prop_dictionary_externalize_to_file --
 *        Externalize a dictionary to the specified file.
 */
bool
prop_dictionary_externalize_to_file(prop_dictionary_t dict, const char *fname)
{
        char *xml;
        bool rv;
        int save_errno = 0;        /* XXXGCC -Wuninitialized [mips, ...] */

        xml = prop_dictionary_externalize(dict);
        if (xml == NULL)
                return (false);
        rv = _prop_object_externalize_write_file(fname, xml, strlen(xml));
        if (rv == false)
                save_errno = errno;
        _PROP_FREE(xml, M_TEMP);
        if (rv == false)
                errno = save_errno;

        return (rv);
}

/*
 * prop_dictionary_internalize_from_file --
 *        Internalize a dictionary from a file.
 */
prop_dictionary_t
prop_dictionary_internalize_from_file(const char *fname)
{
        struct _prop_object_internalize_mapped_file *mf;
        prop_dictionary_t dict;

        mf = _prop_object_internalize_map_file(fname);
        if (mf == NULL)
                return (NULL);
        dict = prop_dictionary_internalize(mf->poimf_xml);
        _prop_object_internalize_unmap_file(mf);

        return (dict);
}
#endif /* !_KERNEL && !_STANDALONE */

























































































    1 


    1 









    1 










    1 

    1 

    1 















































































































































    1 



























    1 
    1 


















    9 



    9 




    2 
    1 



    5 



    9 





    1 































    1 







    1 
























    1 

    1 






































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
/*        $NetBSD: scsipi_ioctl.c,v 1.73 2019/12/27 09:41:51 msaitoh Exp $        */

/*-
 * Copyright (c) 1998, 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Contributed by HD Associates (hd@world.std.com).
 * Copyright (c) 1992, 1993 HD Associates
 *
 * Berkeley style copyright.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: scsipi_ioctl.c,v 1.73 2019/12/27 09:41:51 msaitoh Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_freebsd.h"
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/errno.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/buf.h>
#include <sys/proc.h>
#include <sys/device.h>
#include <sys/fcntl.h>

#include <dev/scsipi/scsipi_all.h>
#include <dev/scsipi/scsipiconf.h>
#include <dev/scsipi/scsipi_base.h>
#include <dev/scsipi/scsiconf.h>
#include <sys/scsiio.h>

#include "scsibus.h"
#include "atapibus.h"

struct scsi_ioctl {
        LIST_ENTRY(scsi_ioctl) si_list;
        struct buf si_bp;
        struct uio si_uio;
        struct iovec si_iov;
        scsireq_t si_screq;
        struct scsipi_periph *si_periph;
};

static LIST_HEAD(, scsi_ioctl) si_head;
static kmutex_t si_lock;

void
scsipi_ioctl_init(void)
{

        mutex_init(&si_lock, MUTEX_DEFAULT, IPL_BIO);
}

static struct scsi_ioctl *
si_get(void)
{
        struct scsi_ioctl *si;

        si = malloc(sizeof(struct scsi_ioctl), M_TEMP, M_WAITOK|M_ZERO);
        buf_init(&si->si_bp);
        mutex_enter(&si_lock);
        LIST_INSERT_HEAD(&si_head, si, si_list);
        mutex_exit(&si_lock);
        return (si);
}

static void
si_free(struct scsi_ioctl *si)
{

        mutex_enter(&si_lock);
        LIST_REMOVE(si, si_list);
        mutex_exit(&si_lock);
        buf_destroy(&si->si_bp);
        free(si, M_TEMP);
}

static struct scsi_ioctl *
si_find(struct buf *bp)
{
        struct scsi_ioctl *si;

        mutex_enter(&si_lock);
        for (si = si_head.lh_first; si != 0; si = si->si_list.le_next)
                if (bp == &si->si_bp)
                        break;
        mutex_exit(&si_lock);
        return (si);
}

/*
 * We let the user interpret his own sense in the generic scsi world.
 * This routine is called at interrupt time if the XS_CTL_USERCMD bit was set
 * in the flags passed to scsi_scsipi_cmd(). No other completion processing
 * takes place, even if we are running over another device driver.
 * The lower level routines that call us here, will free the xs and restart
 * the device's queue if such exists.
 */
void
scsipi_user_done(struct scsipi_xfer *xs)
{
        struct buf *bp;
        struct scsi_ioctl *si;
        scsireq_t *screq;
        struct scsipi_periph *periph = xs->xs_periph;

        bp = xs->bp;
#ifdef DIAGNOSTIC
        if (bp == NULL) {
                scsipi_printaddr(periph);
                printf("user command with no buf\n");
                panic("scsipi_user_done");
        }
#endif
        si = si_find(bp);
#ifdef DIAGNOSTIC
        if (si == NULL) {
                scsipi_printaddr(periph);
                printf("user command with no ioctl\n");
                panic("scsipi_user_done");
        }
#endif

        screq = &si->si_screq;

        SC_DEBUG(xs->xs_periph, SCSIPI_DB2, ("user-done\n"));

        screq->retsts = 0;
        screq->status = xs->status;
        switch (xs->error) {
        case XS_NOERROR:
                SC_DEBUG(periph, SCSIPI_DB3, ("no error\n"));
                screq->datalen_used =
                    xs->datalen - xs->resid;        /* probably rubbish */
                screq->retsts = SCCMD_OK;
                break;
        case XS_SENSE:
                SC_DEBUG(periph, SCSIPI_DB3, ("have sense\n"));
                screq->senselen_used = uimin(sizeof(xs->sense.scsi_sense),
                    SENSEBUFLEN);
                memcpy(screq->sense, &xs->sense.scsi_sense,
                    screq->senselen_used);
                screq->retsts = SCCMD_SENSE;
                break;
        case XS_SHORTSENSE:
                SC_DEBUG(periph, SCSIPI_DB3, ("have short sense\n"));
                screq->senselen_used = uimin(sizeof(xs->sense.atapi_sense),
                    SENSEBUFLEN);
                memcpy(screq->sense, &xs->sense.atapi_sense,
                    screq->senselen_used);
                screq->retsts = SCCMD_UNKNOWN; /* XXX need a shortsense here */
                break;
        case XS_DRIVER_STUFFUP:
                scsipi_printaddr(periph);
                printf("passthrough: adapter inconsistency\n");
                screq->retsts = SCCMD_UNKNOWN;
                break;
        case XS_SELTIMEOUT:
                SC_DEBUG(periph, SCSIPI_DB3, ("seltimeout\n"));
                screq->retsts = SCCMD_TIMEOUT;
                break;
        case XS_TIMEOUT:
                SC_DEBUG(periph, SCSIPI_DB3, ("timeout\n"));
                screq->retsts = SCCMD_TIMEOUT;
                break;
        case XS_BUSY:
                SC_DEBUG(periph, SCSIPI_DB3, ("busy\n"));
                screq->retsts = SCCMD_BUSY;
                break;
        default:
                scsipi_printaddr(periph);
                printf("unknown error category %d from adapter\n",
                    xs->error);
                screq->retsts = SCCMD_UNKNOWN;
                break;
        }

        if (xs->xs_control & XS_CTL_ASYNC) {
                mutex_enter(chan_mtx(periph->periph_channel));
                scsipi_put_xs(xs);
                mutex_exit(chan_mtx(periph->periph_channel));
        }
}


/* Pseudo strategy function
 * Called by scsipi_do_ioctl() via physio/physstrat if there is to
 * be data transferred, and directly if there is no data transfer.
 *
 * Should I reorganize this so it returns to physio instead
 * of sleeping in scsiio_scsipi_cmd?  Is there any advantage, other
 * than avoiding the probable duplicate wakeup in iodone? [PD]
 *
 * No, seems ok to me... [JRE]
 * (I don't see any duplicate wakeups)
 *
 * Can't be used with block devices or raw_read/raw_write directly
 * from the cdevsw/bdevsw tables because they couldn't have added
 * the screq structure. [JRE]
 */
static void
scsistrategy(struct buf *bp)
{
        struct scsi_ioctl *si;
        scsireq_t *screq;
        struct scsipi_periph *periph;
        int error;
        int flags = 0;

        si = si_find(bp);
        if (si == NULL) {
                printf("scsistrategy: "
                    "No matching ioctl request found in queue\n");
                error = EINVAL;
                goto done;
        }
        screq = &si->si_screq;
        periph = si->si_periph;
        SC_DEBUG(periph, SCSIPI_DB2, ("user_strategy\n"));

        /*
         * We're in trouble if physio tried to break up the transfer.
         */
        if (bp->b_bcount != screq->datalen) {
                scsipi_printaddr(periph);
                printf("physio split the request.. cannot proceed\n");
                error = EIO;
                goto done;
        }

        if (screq->timeout == 0) {
                error = EINVAL;
                goto done;
        }

        if (screq->cmdlen > sizeof(struct scsipi_generic)) {
                scsipi_printaddr(periph);
                printf("cmdlen too big\n");
                error = EFAULT;
                goto done;
        }

        if ((screq->flags & SCCMD_READ) && screq->datalen > 0)
                flags |= XS_CTL_DATA_IN;
        if ((screq->flags & SCCMD_WRITE) && screq->datalen > 0)
                flags |= XS_CTL_DATA_OUT;
        if (screq->flags & SCCMD_TARGET)
                flags |= XS_CTL_TARGET;
        if (screq->flags & SCCMD_ESCAPE)
                flags |= XS_CTL_ESCAPE;

        error = scsipi_command(periph, (void *)screq->cmd, screq->cmdlen,
            (void *)bp->b_data, screq->datalen,
            0, /* user must do the retries *//* ignored */
            screq->timeout, bp, flags | XS_CTL_USERCMD);

done:
        if (error)
                bp->b_resid = bp->b_bcount;
        bp->b_error = error;
        biodone(bp);
        return;
}

/*
 * Something (e.g. another driver) has called us
 * with a periph and a scsi-specific ioctl to perform,
 * better try.  If user-level type command, we must
 * still be running in the context of the calling process
 */
int
scsipi_do_ioctl(struct scsipi_periph *periph, dev_t dev, u_long cmd,
    void *addr, int flag, struct lwp *l)
{
        int error;

        SC_DEBUG(periph, SCSIPI_DB2, ("scsipi_do_ioctl(0x%lx)\n", cmd));

        if (addr == NULL)
                return EINVAL;

        /* Check for the safe-ness of this request. */
        switch (cmd) {
        case OSCIOCIDENTIFY:
        case SCIOCIDENTIFY:
                break;
        case SCIOCCOMMAND:
                if ((((scsireq_t *)addr)->flags & SCCMD_READ) == 0 &&
                    (flag & FWRITE) == 0)
                        return (EBADF);
                break;
        default:
                if ((flag & FWRITE) == 0)
                        return (EBADF);
        }

        switch (cmd) {
        case SCIOCCOMMAND: {
                scsireq_t *screq = (scsireq_t *)addr;
                struct scsi_ioctl *si;
                int len;

                len = screq->datalen;

                /*
                 * If there is data, there must be a data buffer and a direction specified
                 */
                if (len > 0 && (screq->databuf == NULL ||
                    (screq->flags & (SCCMD_READ|SCCMD_WRITE)) == 0))
                        return (EINVAL);

                si = si_get();
                si->si_screq = *screq;
                si->si_periph = periph;
                if (len) {
                        si->si_iov.iov_base = screq->databuf;
                        si->si_iov.iov_len = len;
                        si->si_uio.uio_iov = &si->si_iov;
                        si->si_uio.uio_iovcnt = 1;
                        si->si_uio.uio_resid = len;
                        si->si_uio.uio_offset = 0;
                        si->si_uio.uio_rw =
                            (screq->flags & SCCMD_READ) ? UIO_READ : UIO_WRITE;
                        if ((flag & FKIOCTL) == 0) {
                                si->si_uio.uio_vmspace = l->l_proc->p_vmspace;
                        } else {
                                UIO_SETUP_SYSSPACE(&si->si_uio);
                        }
                        error = physio(scsistrategy, &si->si_bp, dev,
                            (screq->flags & SCCMD_READ) ? B_READ : B_WRITE,
                            periph->periph_channel->chan_adapter->adapt_minphys,
                            &si->si_uio);
                } else {
                        /* if no data, no need to translate it.. */
                        si->si_bp.b_flags = 0;
                        si->si_bp.b_data = 0;
                        si->si_bp.b_bcount = 0;
                        si->si_bp.b_dev = dev;
                        si->si_bp.b_proc = l->l_proc;
                        scsistrategy(&si->si_bp);
                        error = si->si_bp.b_error;
                }
                *screq = si->si_screq;
                si_free(si);
                return (error);
        }
        case SCIOCDEBUG: {
                int level = *((int *)addr);

                SC_DEBUG(periph, SCSIPI_DB3, ("debug set to %d\n", level));
                periph->periph_dbflags = 0;
                if (level & 1)
                        periph->periph_dbflags |= SCSIPI_DB1;
                if (level & 2)
                        periph->periph_dbflags |= SCSIPI_DB2;
                if (level & 4)
                        periph->periph_dbflags |= SCSIPI_DB3;
                if (level & 8)
                        periph->periph_dbflags |= SCSIPI_DB4;
                return (0);
        }
        case SCIOCRECONFIG:
        case SCIOCDECONFIG:
                return (EINVAL);
        case SCIOCIDENTIFY: {
                struct scsi_addr *sca = (struct scsi_addr *)addr;

                switch (SCSIPI_BUSTYPE_TYPE(scsipi_periph_bustype(periph))) {
                case SCSIPI_BUSTYPE_SCSI:
                        sca->type = TYPE_SCSI;
                        sca->addr.scsi.scbus =
                            device_unit(device_parent(periph->periph_dev));
                        sca->addr.scsi.target = periph->periph_target;
                        sca->addr.scsi.lun = periph->periph_lun;
                        return (0);
                case SCSIPI_BUSTYPE_ATAPI:
                        sca->type = TYPE_ATAPI;
                        sca->addr.atapi.atbus =
                            device_unit(device_parent(periph->periph_dev));
                        sca->addr.atapi.drive = periph->periph_target;
                        return (0);
                }
                return (ENXIO);
        }
#if defined(COMPAT_12) || defined(COMPAT_FREEBSD)
        /* SCIOCIDENTIFY before ATAPI staff merge */
        case OSCIOCIDENTIFY: {
                struct oscsi_addr *sca = (struct oscsi_addr *)addr;

                switch (SCSIPI_BUSTYPE_TYPE(scsipi_periph_bustype(periph))) {
                case SCSIPI_BUSTYPE_SCSI:
                        sca->scbus =
                            device_unit(device_parent(periph->periph_dev));
                        sca->target = periph->periph_target;
                        sca->lun = periph->periph_lun;
                        return (0);
                }
                return (ENODEV);
        }
#endif
        default:
                return (ENOTTY);
        }

#ifdef DIAGNOSTIC
        panic("scsipi_do_ioctl: impossible");
#endif
}



















































    2 












    2 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
/*        $NetBSD: dkcksum.c,v 1.1 2021/05/17 08:50:36 mrg Exp $        */

/*-
 * Copyright (c) 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from: NetBSD: dkcksum.c,v 1.14 2013/05/03 16:05:12 matt Exp
 */

#include <sys/cdefs.h>
#ifndef lint
#if 0
static char sccsid[] = "@(#)dkcksum.c        8.1 (Berkeley) 6/5/93";
#else
__RCSID("$NetBSD: dkcksum.c,v 1.1 2021/05/17 08:50:36 mrg Exp $");
#endif
#endif /* not lint */

#include <sys/types.h>
#include <sys/disklabel.h>
#include <lib/libkern/libkern.h>

uint16_t
dkcksum(const struct disklabel *lp)
{

        return dkcksum_sized(lp, lp->d_npartitions);
}

uint16_t
dkcksum_sized(const struct disklabel *lp, size_t npartitions)
{
        const uint16_t *start, *end;
        uint16_t sum;

        sum = 0;
        start = (const uint16_t *)lp;
        end = (const uint16_t *)&lp->d_partitions[npartitions];
        while (start < end) {
                sum ^= *start++;
        }
        return sum;
}
























































































    6 


    6 




    6 






    5 





    4 







    3 




    3 

    3 





    1 













   17 












    3 


















    6 



    6 











    6 

































    6 












































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
/*        $NetBSD: fdesc_vfsops.c,v 1.96 2020/04/13 19:23:18 ad Exp $        */

/*
 * Copyright (c) 1992, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software donated to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)fdesc_vfsops.c        8.10 (Berkeley) 5/14/95
 *
 * #Id: fdesc_vfsops.c,v 1.9 1993/04/06 15:28:33 jsp Exp #
 */

/*
 * /dev/fd Filesystem
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: fdesc_vfsops.c,v 1.96 2020/04/13 19:23:18 ad Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/filedesc.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/dirent.h>
#include <sys/namei.h>
#include <sys/kauth.h>
#include <sys/module.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/fdesc/fdesc.h>

MODULE(MODULE_CLASS_VFS, fdesc, NULL);

VFS_PROTOS(fdesc);

/*
 * Mount the per-process file descriptors (/dev/fd)
 */
int
fdesc_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct lwp *l = curlwp;
        int error = 0, ix;
        struct vnode *rvp;

        if (mp->mnt_flag & MNT_GETARGS) {
                *data_len = 0;
                return 0;
        }
        /*
         * Update is a no-op
         */
        if (mp->mnt_flag & MNT_UPDATE)
                return (EOPNOTSUPP);

        ix = FD_ROOT;
        error = vcache_get(mp, &ix, sizeof(ix), &rvp);
        if (error)
                return error;

        mp->mnt_stat.f_namemax = FDESC_MAXNAMLEN;
        mp->mnt_flag |= MNT_LOCAL;
        mp->mnt_data = rvp;
        vfs_getnewfsid(mp);

        error = set_statvfs_info(path, UIO_USERSPACE, "fdesc", UIO_SYSSPACE,
            mp->mnt_op->vfs_name, mp, l);
        return error;
}

int
fdesc_start(struct mount *mp, int flags)
{
        return (0);
}

int
fdesc_unmount(struct mount *mp, int mntflags)
{
        int error;
        int flags = 0;
        struct vnode *rtvp = mp->mnt_data;

        if (mntflags & MNT_FORCE)
                flags |= FORCECLOSE;

        if (vrefcnt(rtvp) > 1 && (mntflags & MNT_FORCE) == 0)
                return (EBUSY);
        if ((error = vflush(mp, rtvp, flags)) != 0)
                return (error);

        /*
         * Blow it away for future re-use
         */
        vgone(rtvp);
        mp->mnt_data = NULL;

        return (0);
}

int
fdesc_root(struct mount *mp, int lktype, struct vnode **vpp)
{
        struct vnode *vp;

        /*
         * Return locked reference to root.
         */
        vp = mp->mnt_data;
        vref(vp);
        vn_lock(vp, lktype | LK_RETRY);
        *vpp = vp;
        return (0);
}

/*ARGSUSED*/
int
fdesc_sync(struct mount *mp, int waitfor,
    kauth_cred_t uc)
{

        return (0);
}

/*
 * Fdesc flat namespace lookup.
 * Currently unsupported.
 */
int
fdesc_vget(struct mount *mp, ino_t ino, int lktype,
    struct vnode **vpp)
{

        return (EOPNOTSUPP);
}

int
fdesc_loadvnode(struct mount *mp, struct vnode *vp,
    const void *key, size_t key_len, const void **new_key)
{
        int ix;
        struct fdescnode *fd;

        KASSERT(key_len == sizeof(ix));
        memcpy(&ix, key, key_len);

        fd = kmem_alloc(sizeof(struct fdescnode), KM_SLEEP);
        fd->fd_fd = -1;
        fd->fd_link = NULL;
        fd->fd_ix = ix;
        fd->fd_vnode = vp;
        vp->v_tag = VT_FDESC;
        vp->v_op = fdesc_vnodeop_p;
        vp->v_data = fd;
        switch (ix) {
        case FD_ROOT:
                fd->fd_type = Froot;
                vp->v_type = VDIR;
                vp->v_vflag |= VV_ROOT;
                break;
        case FD_DEVFD:
                fd->fd_type = Fdevfd;
                vp->v_type = VDIR;
                break;
        case FD_CTTY:
                fd->fd_type = Fctty;
                vp->v_type = VCHR;
                break;
        case FD_STDIN:
                fd->fd_type = Flink;
                fd->fd_link = "fd/0";
                vp->v_type = VLNK;
                break;
        case FD_STDOUT:
                fd->fd_type = Flink;
                fd->fd_link = "fd/1";
                vp->v_type = VLNK;
                break;
        case FD_STDERR:
                fd->fd_type = Flink;
                fd->fd_link = "fd/2";
                vp->v_type = VLNK;
                break;
        default:
                KASSERT(ix >= FD_DESC);
                fd->fd_type = Fdesc;
                fd->fd_fd = ix - FD_DESC;
                vp->v_type = VNON;
                break;
        }
        uvm_vnp_setsize(vp, 0);
        *new_key = &fd->fd_ix;

        return 0;
}

extern const struct vnodeopv_desc fdesc_vnodeop_opv_desc;

const struct vnodeopv_desc * const fdesc_vnodeopv_descs[] = {
        &fdesc_vnodeop_opv_desc,
        NULL,
};

struct vfsops fdesc_vfsops = {
        .vfs_name = MOUNT_FDESC,
        .vfs_min_mount_data = 0,
        .vfs_mount = fdesc_mount,
        .vfs_start = fdesc_start,
        .vfs_unmount = fdesc_unmount,
        .vfs_root = fdesc_root,
        .vfs_quotactl = (void *)eopnotsupp,
        .vfs_statvfs = genfs_statvfs,
        .vfs_sync = fdesc_sync,
        .vfs_vget = fdesc_vget,
        .vfs_loadvnode = fdesc_loadvnode,
        .vfs_fhtovp = (void *)eopnotsupp,
        .vfs_vptofh = (void *)eopnotsupp,
        .vfs_init = fdesc_init,
        .vfs_done = fdesc_done,
        .vfs_snapshot = (void *)eopnotsupp,
        .vfs_extattrctl = vfs_stdextattrctl,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_renamelock_enter = genfs_renamelock_enter,
        .vfs_renamelock_exit = genfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = fdesc_vnodeopv_descs
};

SYSCTL_SETUP(fdesc_sysctl_setup, "fdesc sysctl")
{

                sysctl_createv(clog, 0, NULL, NULL,
                               CTLFLAG_PERMANENT,
                               CTLTYPE_NODE, "fdesc",
                               SYSCTL_DESCR("File-descriptor file system"),
                               NULL, 0, NULL, 0,
                               CTL_VFS, 7, CTL_EOL);
                /*
                 * XXX the "7" above could be dynamic, thereby eliminating one
                 * more instance of the "number to vfs" mapping problem, but
                 * "7" is the order as taken from sys/mount.h
                 */
}

static int
fdesc_modcmd(modcmd_t cmd, void *arg)
{
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = vfs_attach(&fdesc_vfsops);
                if (error != 0)
                        break;
                break;
        case MODULE_CMD_FINI:
                error = vfs_detach(&fdesc_vfsops);
                if (error != 0)
                        break;
                break;
        default:
                error = ENOTTY;
                break;
        }

        return (error);
}





















































    2 





































    3 



    3 
    1 


    2 


    2 



    3 





































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
/* $NetBSD: cpu_ucode_intel.c,v 1.18 2020/04/25 15:26:18 bouyer Exp $ */

/*
 * Copyright (c) 2012, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Matthias Drochner and Maxime Villard.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: cpu_ucode_intel.c,v 1.18 2020/04/25 15:26:18 bouyer Exp $");

#ifdef _KERNEL_OPT
#include "opt_xen.h"
#include "opt_cpu_ucode.h"
#endif

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/cpuio.h>
#include <sys/cpu.h>
#include <sys/kmem.h>

#include <machine/cpufunc.h>
#include <machine/specialreg.h>
#include <x86/cpu_ucode.h>

static void
intel_getcurrentucode(uint32_t *ucodeversion, int *platformid)
{
        unsigned int unneeded_ids[4];
        uint64_t msr;

        kpreempt_disable();

        wrmsr(MSR_BIOS_SIGN, 0);
        x86_cpuid(0, unneeded_ids);
        msr = rdmsr(MSR_BIOS_SIGN);
        *ucodeversion = msr >> 32;

        kpreempt_enable();

        msr = rdmsr(MSR_IA32_PLATFORM_ID);
        *platformid = ((int)(msr >> 50)) & 7;
}

int
cpu_ucode_intel_get_version(struct cpu_ucode_version *ucode,
    void *ptr, size_t len)
{
        struct cpu_info *ci = curcpu();
        struct cpu_ucode_version_intel1 *data = ptr;

        if (ucode->loader_version != CPU_UCODE_LOADER_INTEL1 ||
            CPUID_TO_FAMILY(ci->ci_signature) < 6)
                return EOPNOTSUPP;

        if (len < sizeof(*data))
                return ENOSPC;

        intel_getcurrentucode(&data->ucodeversion, &data->platformid);
        return 0;
}

int
cpu_ucode_intel_firmware_open(firmware_handle_t *fwh, const char *fwname)
{
        const char *fw_path = "cpu_x86_intel1";
        uint32_t ucodeversion, cpu_signature;
        char cpuspec[11];
        int platformid;

        if (fwname != NULL && fwname[0] != '\0')
                return firmware_open(fw_path, fwname, fwh);

        cpu_signature = curcpu()->ci_signature;
        if (CPUID_TO_FAMILY(cpu_signature) < 6)
                return EOPNOTSUPP;

        intel_getcurrentucode(&ucodeversion, &platformid);
        snprintf(cpuspec, sizeof(cpuspec), "%08x-%d", cpu_signature,
            platformid);

        return firmware_open(fw_path, cpuspec, fwh);
}

#ifndef XENPV
static int
cpu_ucode_intel_verify(struct cpu_ucode_softc *sc,
    struct intel1_ucode_header *buf)
{
        uint32_t data_size, total_size, payload_size, ext_size;
        uint32_t sum;
        int i;

        if ((buf->uh_header_ver != 1) || (buf->uh_loader_rev != 1))
                return EINVAL;

        /*
         * Data size.
         */
        if (buf->uh_data_size == 0) {
                data_size = 2000;
        } else {
                data_size = buf->uh_data_size;
        }
        if ((data_size % 4) != 0)
                return EINVAL;
        if (data_size > sc->sc_blobsize)
                return EINVAL;

        /*
         * Total size.
         */
        if (buf->uh_total_size == 0) {
                total_size = data_size + 48;
        } else {
                total_size = buf->uh_total_size;
        }
        if ((total_size % 1024) != 0)
                return EINVAL;
        if (total_size > sc->sc_blobsize)
                return EINVAL;

        /*
         * Payload size.
         */
        payload_size = data_size + 48;
        if (payload_size > sc->sc_blobsize)
                return EINVAL;

        /*
         * Verify checksum of update data and header. Exclude extended
         * signature.
         */
        sum = 0;
        for (i = 0; i < (payload_size / sizeof(uint32_t)); i++) {
                sum += *((uint32_t *)buf + i);
        }
        if (sum != 0)
                return EINVAL;

        /*
         * Extended table size. Ignored for now.
         */
        ext_size = total_size - payload_size;
        if (ext_size > 0) {
                printf("This image has extended signature table.");
        }

        return 0;
}

int
cpu_ucode_intel_apply(struct cpu_ucode_softc *sc, int cpuno)
{
        uint32_t ucodetarget, oucodeversion, nucodeversion;
        struct intel1_ucode_header *uh;
        int platformid, cpuid, error;
        size_t newbufsize = 0;
        void *uha;

        if (sc->loader_version != CPU_UCODE_LOADER_INTEL1 ||
            cpuno != CPU_UCODE_CURRENT_CPU)
                return EINVAL;

        uh = (struct intel1_ucode_header *)sc->sc_blob;

        error = cpu_ucode_intel_verify(sc, uh);
        if (error != 0)
                return error;

        ucodetarget = uh->uh_rev;

        if (((uintptr_t)sc->sc_blob) & 15) {
                /* Make the buffer 16 byte aligned. */
                newbufsize = sc->sc_blobsize + 15;
                uha = kmem_alloc(newbufsize, KM_SLEEP);
                uh = (struct intel1_ucode_header *)roundup2((uintptr_t)uha, 16);
                memcpy(uh, sc->sc_blob, sc->sc_blobsize);
        }

        kpreempt_disable();

        intel_getcurrentucode(&oucodeversion, &platformid);
        if (oucodeversion >= ucodetarget) {
                kpreempt_enable();
                error = EEXIST;
                goto out;
        }

        /*
         * Perform update. On some platforms a cache invalidation is
         * required.
         */
        wbinvd();
        wrmsr(MSR_BIOS_UPDT_TRIG, (uintptr_t)uh + 48);

        intel_getcurrentucode(&nucodeversion, &platformid);
        cpuid = curcpu()->ci_index;

        kpreempt_enable();

        if (nucodeversion != ucodetarget) {
                error = EIO;
                goto out;
        }

        printf("cpu %d: ucode 0x%x->0x%x\n", cpuid, oucodeversion,
            nucodeversion);

out:
        if (newbufsize != 0)
                kmem_free(uha, newbufsize);
        return error;
}
#endif /* ! XEN */











































































    1 






































































































    2 
















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
/*        $NetBSD: vfs_quotactl.c,v 1.40 2014/06/28 22:27:50 dholland Exp $        */
/*-
 * Copyright (c) 2012 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by David A. Holland.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_quotactl.c,v 1.40 2014/06/28 22:27:50 dholland Exp $$");

#include <sys/mount.h>
#include <sys/quotactl.h>

int
vfs_quotactl_stat(struct mount *mp, struct quotastat *info)
{
        struct quotactl_args args;

        args.qc_op = QUOTACTL_STAT;
        args.u.stat.qc_info = info;
        return VFS_QUOTACTL(mp, &args);
}

int
vfs_quotactl_idtypestat(struct mount *mp, int idtype,
    struct quotaidtypestat *info)
{
        struct quotactl_args args;

        args.qc_op = QUOTACTL_IDTYPESTAT;
        args.u.idtypestat.qc_idtype = idtype;
        args.u.idtypestat.qc_info = info;
        return VFS_QUOTACTL(mp, &args);
}

int
vfs_quotactl_objtypestat(struct mount *mp, int objtype,
    struct quotaobjtypestat *info)
{
        struct quotactl_args args;

        args.qc_op = QUOTACTL_OBJTYPESTAT;
        args.u.objtypestat.qc_objtype = objtype;
        args.u.objtypestat.qc_info = info;
        return VFS_QUOTACTL(mp, &args);
}

int
vfs_quotactl_get(struct mount *mp, const struct quotakey *key,
    struct quotaval *val)
{
        struct quotactl_args args;

        args.qc_op = QUOTACTL_GET;
        args.u.get.qc_key = key;
        args.u.get.qc_val = val;
        return VFS_QUOTACTL(mp, &args);
}

int
vfs_quotactl_put(struct mount *mp, const struct quotakey *key,
    const struct quotaval *val)
{
        struct quotactl_args args;

        args.qc_op = QUOTACTL_PUT;
        args.u.put.qc_key = key;
        args.u.put.qc_val = val;
        return VFS_QUOTACTL(mp, &args);
}

int
vfs_quotactl_del(struct mount *mp, const struct quotakey *key)
{
        struct quotactl_args args;

        args.qc_op = QUOTACTL_DEL;
        args.u.del.qc_key = key;
        return VFS_QUOTACTL(mp, &args);
}

int
vfs_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor)
{
        struct quotactl_args args;

        args.qc_op = QUOTACTL_CURSOROPEN;
        args.u.cursoropen.qc_cursor = cursor;
        return VFS_QUOTACTL(mp, &args);
}

int
vfs_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor)
{
        struct quotactl_args args;

        args.qc_op = QUOTACTL_CURSORCLOSE;
        args.u.cursorclose.qc_cursor = cursor;
        return VFS_QUOTACTL(mp, &args);
}

int
vfs_quotactl_cursorskipidtype(struct mount *mp, struct quotakcursor *cursor,
    int idtype)
{
        struct quotactl_args args;

        args.qc_op = QUOTACTL_CURSORSKIPIDTYPE;
        args.u.cursorskipidtype.qc_cursor = cursor;
        args.u.cursorskipidtype.qc_idtype = idtype;
        return VFS_QUOTACTL(mp, &args);
}

int
vfs_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor,
    struct quotakey *keys, struct quotaval *vals, unsigned maxnum,
    unsigned *ret)
{
        struct quotactl_args args;

        args.qc_op = QUOTACTL_CURSORGET;
        args.u.cursorget.qc_cursor = cursor;
        args.u.cursorget.qc_keys = keys;
        args.u.cursorget.qc_vals = vals;
        args.u.cursorget.qc_maxnum = maxnum;
        args.u.cursorget.qc_ret = ret;
        return VFS_QUOTACTL(mp, &args);
}

int
vfs_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor,
    int *ret)
{
        struct quotactl_args args;

        args.qc_op = QUOTACTL_CURSORATEND;
        args.u.cursoratend.qc_cursor = cursor;
        args.u.cursoratend.qc_ret = ret;
        return VFS_QUOTACTL(mp, &args);
}

int
vfs_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor)
{
        struct quotactl_args args;

        args.qc_op = QUOTACTL_CURSORREWIND;
        args.u.cursorrewind.qc_cursor = cursor;
        return VFS_QUOTACTL(mp, &args);
}

int
vfs_quotactl_quotaon(struct mount *mp, int idtype, const char *path)
{
        struct quotactl_args args;

        args.qc_op = QUOTACTL_QUOTAON;
        args.u.quotaon.qc_idtype = idtype;
        args.u.quotaon.qc_quotafile = path;
        return VFS_QUOTACTL(mp, &args);
}

int
vfs_quotactl_quotaoff(struct mount *mp, int idtype)
{
        struct quotactl_args args;

        args.qc_op = QUOTACTL_QUOTAOFF;
        args.u.quotaoff.qc_idtype = idtype;
        return VFS_QUOTACTL(mp, &args);
}






















































































    3 










































    9 


    7 






    9 





    4 





    8 







    8 









    8 
    3 






    8 
    1 









    7 






    7 


    9 








    2 







    2 



    2 





    2 


    1 

    1 
    1 


    2 














  438 



  438 
  439 





  439 
    2 




  437 

















  438 



  438 

  438 




    2 
  438 







  439 


  440 




  439 


  440 

    2 











  440 






  438 










































  442 


  442 








  437 









  441 
  442 
















  442 



  442 



  438 



  432 










  433 





  432 


  436 


  437 






































































































































   33 











   33 











    3 




   30 



















   15 



   27 



   32 

   14 


















































    2 















    2 

    2 




    2 



































































































































































    2 






























    2 

    2 






































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
/* $NetBSD: dksubr.c,v 1.113 2021/04/15 00:32:50 rin Exp $ */

/*-
 * Copyright (c) 1996, 1997, 1998, 1999, 2002, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe and Roland C. Dowdeswell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: dksubr.c,v 1.113 2021/04/15 00:32:50 rin Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/stat.h>
#include <sys/proc.h>
#include <sys/ioctl.h>
#include <sys/device.h>
#include <sys/disk.h>
#include <sys/disklabel.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/vnode.h>
#include <sys/fcntl.h>
#include <sys/namei.h>
#include <sys/module.h>
#include <sys/syslog.h>

#include <dev/dkvar.h>
#include <miscfs/specfs/specdev.h> /* for v_rdev */

int        dkdebug = 0;

#ifdef DEBUG
#define DKDB_FOLLOW        0x1
#define DKDB_INIT        0x2
#define DKDB_VNODE        0x4
#define DKDB_DUMP        0x8

#define IFDEBUG(x,y)                if (dkdebug & (x)) y
#define DPRINTF(x,y)                IFDEBUG(x, printf y)
#define DPRINTF_FOLLOW(y)        DPRINTF(DKDB_FOLLOW, y)
#else
#define IFDEBUG(x,y)
#define DPRINTF(x,y)
#define DPRINTF_FOLLOW(y)
#endif

#define DKF_READYFORDUMP        (DKF_INITED|DKF_TAKEDUMP)

static int dk_subr_modcmd(modcmd_t, void *);

#define DKLABELDEV(dev)        \
        (MAKEDISKDEV(major((dev)), DISKUNIT((dev)), RAW_PART))

static void        dk_makedisklabel(struct dk_softc *);
static int        dk_translate(struct dk_softc *, struct buf *);
static void        dk_done1(struct dk_softc *, struct buf *, bool);

void
dk_init(struct dk_softc *dksc, device_t dev, int dtype)
{

        memset(dksc, 0x0, sizeof(*dksc));
        dksc->sc_dtype = dtype;
        dksc->sc_dev = dev;

        strlcpy(dksc->sc_xname, device_xname(dev), DK_XNAME_SIZE);
        dksc->sc_dkdev.dk_name = dksc->sc_xname;
}

void
dk_attach(struct dk_softc *dksc)
{
        KASSERT(dksc->sc_dev != NULL);

        mutex_init(&dksc->sc_iolock, MUTEX_DEFAULT, IPL_VM);
        dksc->sc_flags |= DKF_READYFORDUMP;
#ifdef DIAGNOSTIC
        dksc->sc_flags |= DKF_WARNLABEL | DKF_LABELSANITY;
#endif

        if ((dksc->sc_flags & DKF_NO_RND) == 0) {
                /* Attach the device into the rnd source list. */
                rnd_attach_source(&dksc->sc_rnd_source, dksc->sc_xname,
                    RND_TYPE_DISK, RND_FLAG_DEFAULT);
        }
}

void
dk_detach(struct dk_softc *dksc)
{
        if ((dksc->sc_flags & DKF_NO_RND) == 0) {
                /* Unhook the entropy source. */
                rnd_detach_source(&dksc->sc_rnd_source);
        }

        dksc->sc_flags &= ~DKF_READYFORDUMP;
        mutex_destroy(&dksc->sc_iolock);
}

/* ARGSUSED */
int
dk_open(struct dk_softc *dksc, dev_t dev,
    int flags, int fmt, struct lwp *l)
{
        const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
        struct        disklabel *lp = dksc->sc_dkdev.dk_label;
        int        part = DISKPART(dev);
        int        pmask = 1 << part;
        int        ret = 0;
        struct disk *dk = &dksc->sc_dkdev;

        DPRINTF_FOLLOW(("%s(%s, %p, 0x%"PRIx64", 0x%x)\n", __func__,
            dksc->sc_xname, dksc, dev, flags));

        mutex_enter(&dk->dk_openlock);

        /*
         * If there are wedges, and this is not RAW_PART, then we
         * need to fail.
         */
        if (dk->dk_nwedges != 0 && part != RAW_PART) {
                ret = EBUSY;
                goto done;
        }

        /* If no dkdriver attached, bail */
        if (dkd == NULL) {
                ret = ENXIO;
                goto done;
        }

        /*
         * initialize driver for the first opener
         */
        if (dk->dk_openmask == 0 && dkd->d_firstopen != NULL) {
                ret = (*dkd->d_firstopen)(dksc->sc_dev, dev, flags, fmt);
                if (ret)
                        goto done;
        }

        /*
         * If we're init'ed and there are no other open partitions then
         * update the in-core disklabel.
         */
        if ((dksc->sc_flags & DKF_INITED)) {
                if ((dksc->sc_flags & DKF_VLABEL) == 0) {
                        dksc->sc_flags |= DKF_VLABEL;
                        dk_getdisklabel(dksc, dev);
                }
        }

        /* Fail if we can't find the partition. */
        if (part != RAW_PART &&
            ((dksc->sc_flags & DKF_VLABEL) == 0 ||
             part >= lp->d_npartitions ||
             lp->d_partitions[part].p_fstype == FS_UNUSED)) {
                ret = ENXIO;
                goto done;
        }

        /* Mark our unit as open. */
        switch (fmt) {
        case S_IFCHR:
                dk->dk_copenmask |= pmask;
                break;
        case S_IFBLK:
                dk->dk_bopenmask |= pmask;
                break;
        }

        dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;

done:
        mutex_exit(&dk->dk_openlock);
        return ret;
}

/* ARGSUSED */
int
dk_close(struct dk_softc *dksc, dev_t dev,
    int flags, int fmt, struct lwp *l)
{
        const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
        int        part = DISKPART(dev);
        int        pmask = 1 << part;
        struct disk *dk = &dksc->sc_dkdev;

        DPRINTF_FOLLOW(("%s(%s, %p, 0x%"PRIx64", 0x%x)\n", __func__,
            dksc->sc_xname, dksc, dev, flags));

        mutex_enter(&dk->dk_openlock);

        switch (fmt) {
        case S_IFCHR:
                dk->dk_copenmask &= ~pmask;
                break;
        case S_IFBLK:
                dk->dk_bopenmask &= ~pmask;
                break;
        }
        dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;

        if (dk->dk_openmask == 0) {
                if (dkd->d_lastclose != NULL)
                        (*dkd->d_lastclose)(dksc->sc_dev);
                if ((dksc->sc_flags & DKF_KLABEL) == 0)
                        dksc->sc_flags &= ~DKF_VLABEL;
        }

        mutex_exit(&dk->dk_openlock);
        return 0;
}

static int
dk_translate(struct dk_softc *dksc, struct buf *bp)
{
        int        part;
        int        wlabel;
        daddr_t        blkno;
        struct disklabel *lp;
        struct disk *dk;
        uint64_t numsecs;
        unsigned secsize;

        lp = dksc->sc_dkdev.dk_label;
        dk = &dksc->sc_dkdev;

        part = DISKPART(bp->b_dev);
        numsecs = dk->dk_geom.dg_secperunit;
        secsize = dk->dk_geom.dg_secsize;

        /*
         * The transfer must be a whole number of blocks and the offset must
         * not be negative.
         */
        if ((bp->b_bcount % secsize) != 0 || bp->b_blkno < 0) {
                bp->b_error = EINVAL;
                goto done;
        }

        /* If there is nothing to do, then we are done */
        if (bp->b_bcount == 0)
                goto done;

        wlabel = dksc->sc_flags & (DKF_WLABEL|DKF_LABELLING);
        if (part == RAW_PART) {
                uint64_t numblocks = btodb(numsecs * secsize);
                if (bounds_check_with_mediasize(bp, DEV_BSIZE, numblocks) <= 0)
                        goto done;
        } else {
                if (bounds_check_with_label(&dksc->sc_dkdev, bp, wlabel) <= 0)
                        goto done;
        }

        /*
         * Convert the block number to absolute and put it in terms
         * of the device's logical block size.
         */
        if (secsize >= DEV_BSIZE)
                blkno = bp->b_blkno / (secsize / DEV_BSIZE);
        else
                blkno = bp->b_blkno * (DEV_BSIZE / secsize);

        if (part != RAW_PART)
                blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset;
        bp->b_rawblkno = blkno;

        return -1;

done:
        bp->b_resid = bp->b_bcount;
        return bp->b_error;
}

static int
dk_strategy1(struct dk_softc *dksc, struct buf *bp)
{
        int error;

        DPRINTF_FOLLOW(("%s(%s, %p, %p)\n", __func__,
            dksc->sc_xname, dksc, bp));

        if (!(dksc->sc_flags & DKF_INITED)) {
                DPRINTF_FOLLOW(("%s: not inited\n", __func__));
                bp->b_error = ENXIO;
                bp->b_resid = bp->b_bcount;
                biodone(bp);
                return 1;
        }

        error = dk_translate(dksc, bp);
        if (error >= 0) {
                biodone(bp);
                return 1;
        }

        return 0;
}

void
dk_strategy(struct dk_softc *dksc, struct buf *bp)
{
        int error;

        error = dk_strategy1(dksc, bp);
        if (error)
                return;

        /*
         * Queue buffer and start unit
         */
        dk_start(dksc, bp);
}

int
dk_strategy_defer(struct dk_softc *dksc, struct buf *bp)
{
        int error;

        error = dk_strategy1(dksc, bp);
        if (error)
                return error;

        /*
         * Queue buffer only
         */
        mutex_enter(&dksc->sc_iolock);
        disk_wait(&dksc->sc_dkdev);
        bufq_put(dksc->sc_bufq, bp);
        mutex_exit(&dksc->sc_iolock);

        return 0;
}

int
dk_strategy_pending(struct dk_softc *dksc)
{
        struct buf *bp;

        if (!(dksc->sc_flags & DKF_INITED)) {
                DPRINTF_FOLLOW(("%s: not inited\n", __func__));
                return 0;
        }

        mutex_enter(&dksc->sc_iolock);
        bp = bufq_peek(dksc->sc_bufq);
        mutex_exit(&dksc->sc_iolock);

        return bp != NULL;
}

void
dk_start(struct dk_softc *dksc, struct buf *bp)
{
        const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
        int error;

        if (!(dksc->sc_flags & DKF_INITED)) {
                DPRINTF_FOLLOW(("%s: not inited\n", __func__));
                return;
        }

        mutex_enter(&dksc->sc_iolock);

        if (bp != NULL) {
                bp->b_ci = curcpu();
                disk_wait(&dksc->sc_dkdev);
                bufq_put(dksc->sc_bufq, bp);
        }

        /*
         * If another thread is running the queue, increment
         * busy counter to 2 so that the queue is retried,
         * because the driver may now accept additional
         * requests.
         */
        if (dksc->sc_busy < 2)
                dksc->sc_busy++;
        if (dksc->sc_busy > 1)
                goto done;

        /*
         * Peeking at the buffer queue and committing the operation
         * only after success isn't atomic.
         *
         * So when a diskstart fails, the buffer is saved
         * and tried again before the next buffer is fetched.
         * dk_drain() handles flushing of a saved buffer.
         *
         * This keeps order of I/O operations, unlike bufq_put.
         */

        while (dksc->sc_busy > 0) {

                bp = dksc->sc_deferred;
                dksc->sc_deferred = NULL;

                if (bp == NULL)
                        bp = bufq_get(dksc->sc_bufq);

                while (bp != NULL) {

                        disk_busy(&dksc->sc_dkdev);
                        mutex_exit(&dksc->sc_iolock);
                        error = dkd->d_diskstart(dksc->sc_dev, bp);
                        mutex_enter(&dksc->sc_iolock);
                        if (error == EAGAIN || error == ENOMEM) {
                                /*
                                 * Not a disk error. Retry later.
                                 */
                                KASSERT(dksc->sc_deferred == NULL);
                                dksc->sc_deferred = bp;
                                disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ));
                                disk_wait(&dksc->sc_dkdev);
                                break;
                        }

                        if (error != 0) {
                                bp->b_error = error;
                                bp->b_resid = bp->b_bcount;
                                dk_done1(dksc, bp, false);
                        }

                        bp = bufq_get(dksc->sc_bufq);
                }

                dksc->sc_busy--;
        }
done:
        mutex_exit(&dksc->sc_iolock);
}

static void
dk_done1(struct dk_softc *dksc, struct buf *bp, bool lock)
{
        struct disk *dk = &dksc->sc_dkdev;

        if (bp->b_error != 0) {
                struct cfdriver *cd = device_cfdriver(dksc->sc_dev);

                diskerr(bp, cd->cd_name, "error", LOG_PRINTF, 0,
                        dk->dk_label);
                printf("\n");
        }

        if (lock)
                mutex_enter(&dksc->sc_iolock);
        disk_unbusy(dk, bp->b_bcount - bp->b_resid, (bp->b_flags & B_READ));

        if ((dksc->sc_flags & DKF_NO_RND) == 0)
                rnd_add_uint32(&dksc->sc_rnd_source, bp->b_rawblkno);
        if (lock)
                mutex_exit(&dksc->sc_iolock);

        biodone(bp);
}

void
dk_done(struct dk_softc *dksc, struct buf *bp)
{
        dk_done1(dksc, bp, true);
}

void
dk_drain(struct dk_softc *dksc)
{
        struct buf *bp;

        mutex_enter(&dksc->sc_iolock);
        bp = dksc->sc_deferred;
        dksc->sc_deferred = NULL;
        if (bp != NULL) {
                bp->b_error = EIO;
                bp->b_resid = bp->b_bcount;
                biodone(bp); 
        }
        bufq_drain(dksc->sc_bufq);
        mutex_exit(&dksc->sc_iolock);
}

int
dk_discard(struct dk_softc *dksc, dev_t dev, off_t pos, off_t len)
{
        const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
        unsigned secsize = dksc->sc_dkdev.dk_geom.dg_secsize;
        struct buf tmp, *bp = &tmp;
        int maxsz;
        int error = 0;

        KASSERT(len >= 0);

        DPRINTF_FOLLOW(("%s(%s, %p, 0x"PRIx64", %jd, %jd)\n", __func__,
            dksc->sc_xname, dksc, (intmax_t)pos, (intmax_t)len));

        if (!(dksc->sc_flags & DKF_INITED)) {
                DPRINTF_FOLLOW(("%s: not inited\n", __func__));
                return ENXIO;
        }

        if (secsize == 0 || (pos % secsize) != 0 || (len % secsize) != 0)
                return EINVAL;

        /* largest value that b_bcount can store */
        maxsz = rounddown(INT_MAX, secsize);

        while (len > 0) {
                /* enough data to please the bounds checking code */
                bp->b_dev = dev;
                bp->b_blkno = (daddr_t)(pos / secsize);
                bp->b_bcount = uimin(len, maxsz);
                bp->b_flags = B_WRITE;

                error = dk_translate(dksc, bp);
                if (error >= 0)
                        break;

                error = dkd->d_discard(dksc->sc_dev,
                        (off_t)bp->b_rawblkno * secsize,
                        (off_t)bp->b_bcount);
                if (error)
                        break;

                pos += bp->b_bcount;
                len -= bp->b_bcount;
        }

        return error;
}

int
dk_size(struct dk_softc *dksc, dev_t dev)
{
        const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
        struct        disklabel *lp;
        int        is_open;
        int        part;
        int        size;

        if ((dksc->sc_flags & DKF_INITED) == 0)
                return -1;

        part = DISKPART(dev);
        is_open = dksc->sc_dkdev.dk_openmask & (1 << part);

        if (!is_open && dkd->d_open(dev, 0, S_IFBLK, curlwp))
                return -1;

        lp = dksc->sc_dkdev.dk_label;
        if (lp->d_partitions[part].p_fstype != FS_SWAP)
                size = -1;
        else
                size = lp->d_partitions[part].p_size *
                    (lp->d_secsize / DEV_BSIZE);

        if (!is_open && dkd->d_close(dev, 0, S_IFBLK, curlwp))
                return -1;

        return size;
}

int
dk_ioctl(struct dk_softc *dksc, dev_t dev,
            u_long cmd, void *data, int flag, struct lwp *l)
{
        const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
        struct        disklabel *lp;
        struct        disk *dk = &dksc->sc_dkdev;
#ifdef __HAVE_OLD_DISKLABEL
        struct        disklabel newlabel;
#endif
        int        error;

        DPRINTF_FOLLOW(("%s(%s, %p, 0x%"PRIx64", 0x%lx)\n", __func__,
            dksc->sc_xname, dksc, dev, cmd));

        /* ensure that the pseudo disk is open for writes for these commands */
        switch (cmd) {
        case DIOCSDINFO:
        case DIOCWDINFO:
#ifdef __HAVE_OLD_DISKLABEL
        case ODIOCSDINFO:
        case ODIOCWDINFO:
#endif
        case DIOCKLABEL:
        case DIOCWLABEL:
        case DIOCAWEDGE:
        case DIOCDWEDGE:
        case DIOCSSTRATEGY:
                if ((flag & FWRITE) == 0)
                        return EBADF;
        }

        /* ensure that the pseudo-disk is initialized for these */
        switch (cmd) {
        case DIOCGDINFO:
        case DIOCSDINFO:
        case DIOCWDINFO:
        case DIOCGPARTINFO:
        case DIOCKLABEL:
        case DIOCWLABEL:
        case DIOCGDEFLABEL:
        case DIOCAWEDGE:
        case DIOCDWEDGE:
        case DIOCLWEDGES:
        case DIOCMWEDGES:
        case DIOCRMWEDGES:
        case DIOCCACHESYNC:
#ifdef __HAVE_OLD_DISKLABEL
        case ODIOCGDINFO:
        case ODIOCSDINFO:
        case ODIOCWDINFO:
        case ODIOCGDEFLABEL:
#endif
                if ((dksc->sc_flags & DKF_INITED) == 0)
                        return ENXIO;
        }

        error = disk_ioctl(dk, dev, cmd, data, flag, l);
        if (error != EPASSTHROUGH)
                return error;
        else
                error = 0;

        switch (cmd) {
        case DIOCWDINFO:
        case DIOCSDINFO:
#ifdef __HAVE_OLD_DISKLABEL
        case ODIOCWDINFO:
        case ODIOCSDINFO:
#endif
#ifdef __HAVE_OLD_DISKLABEL
                if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
                        memset(&newlabel, 0, sizeof newlabel);
                        memcpy(&newlabel, data, sizeof (struct olddisklabel));
                        lp = &newlabel;
                } else
#endif
                lp = (struct disklabel *)data;

                mutex_enter(&dk->dk_openlock);
                dksc->sc_flags |= DKF_LABELLING;

                error = setdisklabel(dksc->sc_dkdev.dk_label,
                    lp, 0, dksc->sc_dkdev.dk_cpulabel);
                if (error == 0) {
                        if (cmd == DIOCWDINFO
#ifdef __HAVE_OLD_DISKLABEL
                            || cmd == ODIOCWDINFO
#endif
                           )
                                error = writedisklabel(DKLABELDEV(dev),
                                    dkd->d_strategy, dksc->sc_dkdev.dk_label,
                                    dksc->sc_dkdev.dk_cpulabel);
                }

                dksc->sc_flags &= ~DKF_LABELLING;
                mutex_exit(&dk->dk_openlock);
                break;

        case DIOCKLABEL:
                if (*(int *)data != 0)
                        dksc->sc_flags |= DKF_KLABEL;
                else
                        dksc->sc_flags &= ~DKF_KLABEL;
                break;

        case DIOCWLABEL:
                if (*(int *)data != 0)
                        dksc->sc_flags |= DKF_WLABEL;
                else
                        dksc->sc_flags &= ~DKF_WLABEL;
                break;

        case DIOCGDEFLABEL:
                dk_getdefaultlabel(dksc, (struct disklabel *)data);
                break;

#ifdef __HAVE_OLD_DISKLABEL
        case ODIOCGDEFLABEL:
                dk_getdefaultlabel(dksc, &newlabel);
                if (newlabel.d_npartitions > OLDMAXPARTITIONS)
                        return ENOTTY;
                memcpy(data, &newlabel, sizeof (struct olddisklabel));
                break;
#endif

        case DIOCGSTRATEGY:
            {
                struct disk_strategy *dks = (void *)data;

                mutex_enter(&dksc->sc_iolock);
                if (dksc->sc_bufq != NULL)
                        strlcpy(dks->dks_name,
                            bufq_getstrategyname(dksc->sc_bufq),
                            sizeof(dks->dks_name));
                else
                        error = EINVAL;
                mutex_exit(&dksc->sc_iolock);
                dks->dks_paramlen = 0;
                break;
            }

        case DIOCSSTRATEGY:
            {
                struct disk_strategy *dks = (void *)data;
                struct bufq_state *new;
                struct bufq_state *old;

                if (dks->dks_param != NULL) {
                        return EINVAL;
                }
                dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
                error = bufq_alloc(&new, dks->dks_name,
                    BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
                if (error) {
                        return error;
                }
                mutex_enter(&dksc->sc_iolock);
                old = dksc->sc_bufq;
                if (old)
                        bufq_move(new, old);
                dksc->sc_bufq = new;
                mutex_exit(&dksc->sc_iolock);
                if (old)
                        bufq_free(old);
                break;
            }

        default:
                error = ENOTTY;
        }

        return error;
}

/*
 * dk_dump dumps all of physical memory into the partition specified.
 * This requires substantially more framework than {s,w}ddump, and hence
 * is probably much more fragile.
 *
 */

#define DKFF_READYFORDUMP(x)        (((x) & DKF_READYFORDUMP) == DKF_READYFORDUMP)
static volatile int        dk_dumping = 0;

/* ARGSUSED */
int
dk_dump(struct dk_softc *dksc, dev_t dev,
    daddr_t blkno, void *vav, size_t size, int flags)
{
        const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
        struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
        char *va = vav;
        struct disklabel *lp;
        struct partition *p;
        int part, towrt, maxblkcnt, nblk;
        int maxxfer, rv = 0;

        /*
         * ensure that we consider this device to be safe for dumping,
         * and that the device is configured.
         */
        if (!DKFF_READYFORDUMP(dksc->sc_flags)) {
                DPRINTF(DKDB_DUMP, ("%s: bad dump flags 0x%x\n", __func__,
                    dksc->sc_flags));
                return ENXIO;
        }

        /* ensure that we are not already dumping */
        if (dk_dumping)
                return EFAULT;
        if ((flags & DK_DUMP_RECURSIVE) == 0)
                dk_dumping = 1;

        if (dkd->d_dumpblocks == NULL) {
                DPRINTF(DKDB_DUMP, ("%s: no dumpblocks\n", __func__));
                return ENXIO;
        }

        /* device specific max transfer size */
        maxxfer = MAXPHYS;
        if (dkd->d_iosize != NULL)
                (*dkd->d_iosize)(dksc->sc_dev, &maxxfer);

        /* Convert to disk sectors.  Request must be a multiple of size. */
        part = DISKPART(dev);
        lp = dksc->sc_dkdev.dk_label;
        if ((size % lp->d_secsize) != 0) {
                DPRINTF(DKDB_DUMP, ("%s: odd size %zu\n", __func__, size));
                return EFAULT;
        }
        towrt = size / lp->d_secsize;
        blkno = dbtob(blkno) / lp->d_secsize;   /* blkno in secsize units */

        p = &lp->d_partitions[part];
        if (part == RAW_PART) {
                if (p->p_fstype != FS_UNUSED) {
                        DPRINTF(DKDB_DUMP, ("%s: bad fstype %d\n", __func__,
                            p->p_fstype));
                        return ENXIO;
                }
                /* Check whether dump goes to a wedge */
                if (dksc->sc_dkdev.dk_nwedges == 0) {
                        DPRINTF(DKDB_DUMP, ("%s: dump to raw\n", __func__));
                        return ENXIO;
                }
                /* Check transfer bounds against media size */
                if (blkno < 0 || (blkno + towrt) > dg->dg_secperunit) {
                        DPRINTF(DKDB_DUMP, ("%s: out of bounds blkno=%jd, towrt=%d, "
                            "nsects=%jd\n", __func__, (intmax_t)blkno, towrt, dg->dg_secperunit));
                        return EINVAL;
                }
        } else {
                int nsects, sectoff;

                if (p->p_fstype != FS_SWAP) {
                        DPRINTF(DKDB_DUMP, ("%s: bad fstype %d\n", __func__,
                            p->p_fstype));
                        return ENXIO;
                }
                nsects = p->p_size;
                sectoff = p->p_offset;

                /* Check transfer bounds against partition size. */
                if ((blkno < 0) || ((blkno + towrt) > nsects)) {
                        DPRINTF(DKDB_DUMP, ("%s: out of bounds blkno=%jd, towrt=%d, "
                            "nsects=%d\n", __func__, (intmax_t)blkno, towrt, nsects));
                        return EINVAL;
                }

                /* Offset block number to start of partition. */
                blkno += sectoff;
        }

        /* Start dumping and return when done. */
        maxblkcnt = howmany(maxxfer, lp->d_secsize);
        while (towrt > 0) {
                nblk = uimin(maxblkcnt, towrt);

                if ((rv = (*dkd->d_dumpblocks)(dksc->sc_dev, va, blkno, nblk))
                    != 0) {
                        DPRINTF(DKDB_DUMP, ("%s: dumpblocks %d\n", __func__,
                            rv));
                        return rv;
                }

                towrt -= nblk;
                blkno += nblk;
                va += nblk * lp->d_secsize;
        }

        if ((flags & DK_DUMP_RECURSIVE) == 0)
                dk_dumping = 0;

        return 0;
}

/* ARGSUSED */
void
dk_getdefaultlabel(struct dk_softc *dksc, struct disklabel *lp)
{
        const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
        struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;

        memset(lp, 0, sizeof(*lp));

        if (dg->dg_secperunit > UINT32_MAX)
                lp->d_secperunit = UINT32_MAX;
        else
                lp->d_secperunit = dg->dg_secperunit;
        lp->d_secsize = dg->dg_secsize;
        lp->d_nsectors = dg->dg_nsectors;
        lp->d_ntracks = dg->dg_ntracks;
        lp->d_ncylinders = dg->dg_ncylinders;
        lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;

        strlcpy(lp->d_typename, dksc->sc_xname, sizeof(lp->d_typename));
        lp->d_type = dksc->sc_dtype;
        strlcpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
        lp->d_rpm = 3600;
        lp->d_interleave = 1;
        lp->d_flags = 0;

        lp->d_partitions[RAW_PART].p_offset = 0;
        lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
        lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
        lp->d_npartitions = RAW_PART + 1;

        lp->d_magic = DISKMAGIC;
        lp->d_magic2 = DISKMAGIC;

        if (dkd->d_label)
                dkd->d_label(dksc->sc_dev, lp);

        lp->d_checksum = dkcksum(lp);
}

/* ARGSUSED */
void
dk_getdisklabel(struct dk_softc *dksc, dev_t dev)
{
        const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
        struct         disklabel *lp = dksc->sc_dkdev.dk_label;
        struct         cpu_disklabel *clp = dksc->sc_dkdev.dk_cpulabel;
        struct   disk_geom *dg = &dksc->sc_dkdev.dk_geom;
        struct         partition *pp;
        int         i, lpratio, dgratio;
        const char        *errstring;

        memset(clp, 0x0, sizeof(*clp));
        dk_getdefaultlabel(dksc, lp);
        errstring = readdisklabel(DKLABELDEV(dev), dkd->d_strategy,
            dksc->sc_dkdev.dk_label, dksc->sc_dkdev.dk_cpulabel);
        if (errstring) {
                dk_makedisklabel(dksc);
                if (dksc->sc_flags & DKF_WARNLABEL)
                        printf("%s: %s\n", dksc->sc_xname, errstring);
                return;
        }

        if ((dksc->sc_flags & DKF_LABELSANITY) == 0)
                return;

        /* Convert sector counts to multiple of DEV_BSIZE for comparison */
        lpratio = dgratio = 1;
        if (lp->d_secsize > DEV_BSIZE)
                lpratio = lp->d_secsize / DEV_BSIZE;
        if (dg->dg_secsize > DEV_BSIZE)
                dgratio = dg->dg_secsize / DEV_BSIZE;

        /* Sanity check */
        if ((uint64_t)lp->d_secperunit * lpratio > dg->dg_secperunit * dgratio)
                printf("WARNING: %s: "
                    "total unit size in disklabel (%" PRIu64 ") "
                    "!= the size of %s (%" PRIu64 ")\n", dksc->sc_xname,
                    (uint64_t)lp->d_secperunit * lpratio, dksc->sc_xname,
                    dg->dg_secperunit * dgratio);
        else if (lp->d_secperunit < UINT32_MAX &&
            (uint64_t)lp->d_secperunit * lpratio < dg->dg_secperunit * dgratio)
                printf("%s: %" PRIu64 " trailing sectors not covered"
                    " by disklabel\n", dksc->sc_xname,
                    (dg->dg_secperunit * dgratio)
                    - (lp->d_secperunit * lpratio));

        for (i=0; i < lp->d_npartitions; i++) {
                uint64_t pend;

                pp = &lp->d_partitions[i];
                pend = pp->p_offset + pp->p_size;
                if (pend * lpratio > dg->dg_secperunit * dgratio)
                        printf("WARNING: %s: end of partition `%c' exceeds "
                            "the size of %s (%" PRIu64 ")\n", dksc->sc_xname,
                            'a' + i, dksc->sc_xname,
                            dg->dg_secperunit * dgratio);
        }
}

/*      
 * Heuristic to conjure a disklabel if reading a disklabel failed.
 *
 * This is to allow the raw partition to be used for a filesystem
 * without caring about the write protected label sector. 
 *
 * If the driver provides it's own callback, use that instead.
 */
/* ARGSUSED */
static void
dk_makedisklabel(struct dk_softc *dksc)
{
        const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
        struct  disklabel *lp = dksc->sc_dkdev.dk_label;

        strlcpy(lp->d_packname, "default label", sizeof(lp->d_packname));

        if (dkd->d_label)
                dkd->d_label(dksc->sc_dev, lp);
        else
                lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;

        lp->d_checksum = dkcksum(lp);
}

MODULE(MODULE_CLASS_MISC, dk_subr, NULL);

static int
dk_subr_modcmd(modcmd_t cmd, void *arg)
{
        switch (cmd) {
        case MODULE_CMD_INIT:
        case MODULE_CMD_FINI:
                return 0;
        case MODULE_CMD_STAT:
        case MODULE_CMD_AUTOUNLOAD:
        default:
                return ENOTTY;
        }
}














































































































    2 
   10 
   10 




   10 
   10 
   10 
   10 

   10 

   10 
   10 
   10 



   10 
   10 










   10 




























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
/*        $NetBSD: mount.h,v 1.15 2021/08/30 08:40:00 riastradh Exp $        */

/*
 * Copyright (c) 1989, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)mount.h        8.21 (Berkeley) 5/20/95
 */

#ifndef _COMPAT_SYS_MOUNT_H_
#define _COMPAT_SYS_MOUNT_H_

#ifdef _KERNEL_OPT
#include "opt_compat_43.h"
#endif

#define MFSNAMELEN        16

struct statfs12 {
        short        f_type;                        /* type of file system */
        u_short        f_oflags;                /* deprecated copy of mount flags */
        long        f_bsize;                /* fundamental file system block size */
        long        f_iosize;                /* optimal transfer block size */
        long        f_blocks;                /* total data blocks in file system */
        long        f_bfree;                /* free blocks in fs */
        long        f_bavail;                /* free blocks avail to non-superuser */
        long        f_files;                /* total file nodes in file system */
        long        f_ffree;                /* free file nodes in fs */
        fsid_t        f_fsid;                        /* file system id */
        uid_t        f_owner;                /* user that mounted the file system */
        long        f_flags;                /* copy of mount flags */
        long        f_syncwrites;                /* count of sync writes since mount */
        long        f_asyncwrites;                /* count of async writes since mount */
        long        f_spare[1];                /* spare for later */
        char        f_fstypename[MFSNAMELEN]; /* fs type name */
        char        f_mntonname[MNAMELEN];          /* directory on which mounted */
        char        f_mntfromname[MNAMELEN];  /* mounted file system */
};

#ifndef _KERNEL
#include <string.h>
#endif

/*
 * Operations supported on mounted file system.
 */
/*
 * Convert from a new statvfs to an old statfs structure.
 */

#define MOUNTNO_NONE        0
#define MOUNTNO_UFS        1                /* UNIX "Fast" Filesystem */
#define MOUNTNO_NFS        2                /* Network Filesystem */
#define MOUNTNO_MFS        3                /* Memory Filesystem */
#define MOUNTNO_MSDOS        4                /* MSDOS Filesystem */
#define MOUNTNO_CD9660        5                /* iso9660 cdrom */
#define MOUNTNO_FDESC        6                /* /dev/fd filesystem */
#define MOUNTNO_KERNFS        7                /* kernel variable filesystem */ 
#define MOUNTNO_DEVFS        8                /* device node filesystem */
#define MOUNTNO_AFS        9                /* AFS 3.x */

static const struct {
        const char *name;
        const int value;
} __nv[] = {
        { MOUNT_UFS, MOUNTNO_UFS },
        { MOUNT_NFS, MOUNTNO_NFS },
        { MOUNT_MFS, MOUNTNO_MFS },
        { MOUNT_MSDOS, MOUNTNO_MSDOS },
        { MOUNT_CD9660, MOUNTNO_CD9660 },
        { MOUNT_FDESC, MOUNTNO_FDESC },
        { MOUNT_KERNFS, MOUNTNO_KERNFS },
        { MOUNT_AFS, MOUNTNO_AFS },
};

static __inline void
statvfs_to_statfs12(const struct statvfs *fs, struct statfs12 *s12)
{
        size_t i = 0;

        memset(s12, 0, sizeof(*s12));

        s12->f_type = 0;
        s12->f_oflags = (short)fs->f_flag;

        for (i = 0; i < sizeof(__nv) / sizeof(__nv[0]); i++) {
                if (strcmp(__nv[i].name, fs->f_fstypename) == 0) {
                        s12->f_type = __nv[i].value;
                        break;
                }
        }
#define __STATFSCLAMP(a)        (long)(((a) & ~LONG_MAX) ? LONG_MAX : (a))
        s12->f_bsize = __STATFSCLAMP(fs->f_frsize);
        s12->f_iosize = __STATFSCLAMP(fs->f_iosize);
        s12->f_blocks = __STATFSCLAMP(fs->f_blocks);
        s12->f_bfree = __STATFSCLAMP(fs->f_bfree);
        if (fs->f_bfree > fs->f_bresvd)
                s12->f_bavail = __STATFSCLAMP(fs->f_bfree - fs->f_bresvd);
        else
                s12->f_bavail = -__STATFSCLAMP(fs->f_bresvd - fs->f_bfree);
        s12->f_files = __STATFSCLAMP(fs->f_files);
        s12->f_ffree = __STATFSCLAMP(fs->f_ffree);
        s12->f_fsid = fs->f_fsidx;
        s12->f_owner = fs->f_owner;
        s12->f_flags = (long)fs->f_flag;
        s12->f_syncwrites = __STATFSCLAMP(fs->f_syncwrites);
        s12->f_asyncwrites = __STATFSCLAMP(fs->f_asyncwrites);
        memcpy(s12->f_fstypename, fs->f_fstypename, sizeof(s12->f_fstypename));
        memcpy(s12->f_mntonname, fs->f_mntonname, sizeof(s12->f_mntonname));
        memcpy(s12->f_mntfromname, fs->f_mntfromname,
            sizeof(s12->f_mntfromname));
}

#ifdef _KERNEL
static __inline int
statvfs_to_statfs12_copy(const void *vs, void *vs12, size_t l)
{
        struct statfs12 *s12 = kmem_zalloc(sizeof(*s12), KM_SLEEP);
        int error;

        statvfs_to_statfs12(vs, s12);
        error = copyout(s12, vs12, sizeof(*s12));
        kmem_free(s12, sizeof(*s12));

        return error;
}

/*
 * Filesystem configuration information. Not used by NetBSD, but
 * defined here to provide a compatible sysctl interface to Lite2.
 */
struct vfsconf {
        struct        vfsops *vfc_vfsops;        /* filesystem operations vector */
        char        vfc_name[MFSNAMELEN];         /* filesystem type name */
        int        vfc_typenum;                /* historic filesystem type number */
        int          vfc_refcount;                /* number mounted of this type */
        int        vfc_flags;                /* permanent flags */
        int        (*vfc_mountroot)(void);        /* if != NULL, routine to mount root */
        struct        vfsconf *vfc_next;         /* next in list */
};

/* Old, fixed size filehandle structures (used upto (including) 3.x) */
struct compat_30_fid {
        unsigned short        fid_len;
        unsigned short        fid_reserved;
        char                fid_data[16];
};
struct compat_30_fhandle {
        fsid_t        fh_fsid;
        struct compat_30_fid fh_fid;
};

#else

__BEGIN_DECLS
int        __compat_fstatfs(int, struct statfs12 *) __dso_hidden;
int        __compat_getfsstat(struct statfs12 *, long, int) __dso_hidden;
int        __compat_statfs(const char *, struct statfs12 *) __dso_hidden;
int        __compat_getmntinfo(struct statfs12 **, int) __dso_hidden;
#if defined(_NETBSD_SOURCE)
struct compat_30_fhandle;
int        __compat_fhstatfs(const struct compat_30_fhandle *, struct statfs12 *)
    __dso_hidden;
struct stat13;
int        __compat_fhstat(const struct compat_30_fhandle *, struct stat13 *)
    __dso_hidden;
struct stat30;
int        __compat___fhstat30(const struct compat_30_fhandle *, struct stat30 *)
    __dso_hidden;
int        __compat___fhstat40(const void *, size_t, struct stat30 *) __dso_hidden;
struct stat;
int        __fhstat50(const void *, size_t, struct stat *);
#endif /* _NETBSD_SOURCE */
__END_DECLS

#endif /* _KERNEL */

#endif /* !_COMPAT_SYS_MOUNT_H_ */


























































































































































































   72 


































   71 
   71 


   72 





























   72 


   72 


   72 
   72 

   71 


   72 












   68 































   67 






   67 


   66 
   67 






   14 













   14 

   14 


















   14 




   14 
   14 





















   14 
   12 
   12 











   12 


    2 



































   14 

    2 





    2 









   14 









   14 


   14 
   14 















   14 











   14 







   14 

   14 



   14 



   14 



   12 
   12 



   14 











   14 




   14 

   14 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
/*        $NetBSD: uvm_pager.c,v 1.130 2020/10/18 18:22:29 chs Exp $        */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * from: Id: uvm_pager.c,v 1.1.2.23 1998/02/02 20:38:06 chuck Exp
 */

/*
 * uvm_pager.c: generic functions used to assist the pagers.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_pager.c,v 1.130 2020/10/18 18:22:29 chs Exp $");

#include "opt_uvmhist.h"
#include "opt_readahead.h"
#include "opt_pagermap.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/atomic.h>
#include <sys/vnode.h>
#include <sys/buf.h>

#include <uvm/uvm.h>

/*
 * XXX
 * this is needed until the device strategy interface
 * is changed to do physically-addressed i/o.
 */

#ifndef PAGER_MAP_DEFAULT_SIZE
#define PAGER_MAP_DEFAULT_SIZE        (16 * 1024 * 1024)
#endif

#ifndef PAGER_MAP_SIZE
#define PAGER_MAP_SIZE        PAGER_MAP_DEFAULT_SIZE
#endif

size_t pager_map_size = PAGER_MAP_SIZE;

/*
 * list of uvm pagers in the system
 */

const struct uvm_pagerops * const uvmpagerops[] = {
        &aobj_pager,
        &uvm_deviceops,
        &uvm_vnodeops,
        &ubc_pager,
};

/*
 * the pager map: provides KVA for I/O
 */

struct vm_map *pager_map;                /* XXX */
kmutex_t pager_map_wanted_lock __cacheline_aligned;
bool pager_map_wanted;        /* locked by pager map */
static vaddr_t emergva;
static int emerg_ncolors;
static bool emerginuse;

void
uvm_pager_realloc_emerg(void)
{
        vaddr_t new_emergva, old_emergva;
        int old_emerg_ncolors;

        if (__predict_true(emergva != 0 && emerg_ncolors >= uvmexp.ncolors))
                return;

        KASSERT(!emerginuse);

        new_emergva = uvm_km_alloc(kernel_map,
            round_page(MAXPHYS) + ptoa(uvmexp.ncolors), ptoa(uvmexp.ncolors),
            UVM_KMF_VAONLY);

        KASSERT(new_emergva != 0);

        old_emergva = emergva;
        old_emerg_ncolors = emerg_ncolors;

        /*
         * don't support re-color in late boot anyway.
         */
        if (0) /* XXX */
                mutex_enter(&pager_map_wanted_lock);

        emergva = new_emergva;
        emerg_ncolors = uvmexp.ncolors;
        wakeup(&old_emergva);

        if (0) /* XXX */
                mutex_exit(&pager_map_wanted_lock);

        if (old_emergva)
                uvm_km_free(kernel_map, old_emergva,
                    round_page(MAXPHYS) + ptoa(old_emerg_ncolors),
                    UVM_KMF_VAONLY);
}

/*
 * uvm_pager_init: init pagers (at boot time)
 */

void
uvm_pager_init(void)
{
        u_int lcv;
        vaddr_t sva, eva;

        /*
         * init pager map
         */

        sva = 0;
        pager_map = uvm_km_suballoc(kernel_map, &sva, &eva, pager_map_size, 0,
            false, NULL);
        mutex_init(&pager_map_wanted_lock, MUTEX_DEFAULT, IPL_NONE);
        pager_map_wanted = false;

        uvm_pager_realloc_emerg();

        /*
         * call pager init functions
         */
        for (lcv = 0 ; lcv < __arraycount(uvmpagerops); lcv++) {
                if (uvmpagerops[lcv]->pgo_init)
                        uvmpagerops[lcv]->pgo_init();
        }
}

#ifdef PMAP_DIRECT
/*
 * uvm_pagermapdirect: map a single page via the pmap's direct segment
 *
 * this is an abuse of pmap_direct_process(), since the kva is being grabbed
 * and no processing is taking place, but for now..
 */

static int
uvm_pagermapdirect(void *kva, size_t sz, void *cookie)
{

        KASSERT(sz == PAGE_SIZE);
        *(vaddr_t *)cookie = (vaddr_t)kva;
        return 0;
}
#endif

/*
 * uvm_pagermapin: map pages into KVA (pager_map) for I/O that needs mappings
 *
 * we basically just map in a blank map entry to reserve the space in the
 * map and then use pmap_enter() to put the mappings in by hand.
 */

vaddr_t
uvm_pagermapin(struct vm_page **pps, int npages, int flags)
{
        vsize_t size;
        vaddr_t kva;
        vaddr_t cva;
        struct vm_page *pp;
        vm_prot_t prot;
        const bool pdaemon = (curlwp == uvm.pagedaemon_lwp);
        const u_int first_color = VM_PGCOLOR(*pps);
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist,"(pps=%#jx, npages=%jd, first_color=%ju)",
                (uintptr_t)pps, npages, first_color, 0);

#ifdef PMAP_DIRECT
        /*
         * for a single page the direct mapped segment can be used.
         */

        if (npages == 1) {
                int error __diagused;
                KASSERT((pps[0]->flags & PG_BUSY) != 0);
                error = pmap_direct_process(VM_PAGE_TO_PHYS(pps[0]), 0,
                    PAGE_SIZE, uvm_pagermapdirect, &kva);
                KASSERT(error == 0);
                UVMHIST_LOG(maphist, "<- done, direct (KVA=%#jx)", kva,0,0,0);
                return kva;
        }
#endif

        /*
         * compute protection.  outgoing I/O only needs read
         * access to the page, whereas incoming needs read/write.
         */

        prot = VM_PROT_READ;
        if (flags & UVMPAGER_MAPIN_READ)
                prot |= VM_PROT_WRITE;

ReStart:
        size = ptoa(npages);
        kva = 0;                        /* let system choose VA */

        if (uvm_map(pager_map, &kva, size, NULL, UVM_UNKNOWN_OFFSET,
            first_color, UVM_FLAG_COLORMATCH | UVM_FLAG_NOMERGE
            | (pdaemon ? UVM_FLAG_NOWAIT : 0)) != 0) {
                if (pdaemon) {
                        mutex_enter(&pager_map_wanted_lock);
                        if (emerginuse) {
                                UVM_UNLOCK_AND_WAIT(&emergva,
                                    &pager_map_wanted_lock, false,
                                    "emergva", 0);
                                goto ReStart;
                        }
                        emerginuse = true;
                        mutex_exit(&pager_map_wanted_lock);
                        kva = emergva + ptoa(first_color);
                        /* The shift implicitly truncates to PAGE_SIZE */
                        KASSERT(npages <= (MAXPHYS >> PAGE_SHIFT));
                        goto enter;
                }
                if ((flags & UVMPAGER_MAPIN_WAITOK) == 0) {
                        UVMHIST_LOG(maphist,"<- NOWAIT failed", 0,0,0,0);
                        return(0);
                }
                mutex_enter(&pager_map_wanted_lock);
                pager_map_wanted = true;
                UVMHIST_LOG(maphist, "  SLEEPING on pager_map",0,0,0,0);
                UVM_UNLOCK_AND_WAIT(pager_map, &pager_map_wanted_lock, false,
                    "pager_map", 0);
                goto ReStart;
        }

enter:
        /* got it */
        for (cva = kva; npages != 0; npages--, cva += PAGE_SIZE) {
                pp = *pps++;
                KASSERT(pp);
                // KASSERT(!((VM_PAGE_TO_PHYS(pp) ^ cva) & uvmexp.colormask));
                KASSERT(pp->flags & PG_BUSY);
                pmap_kenter_pa(cva, VM_PAGE_TO_PHYS(pp), prot, 0);
        }
        pmap_update(vm_map_pmap(pager_map));

        UVMHIST_LOG(maphist, "<- done (KVA=%#jx)", kva,0,0,0);
        return(kva);
}

/*
 * uvm_pagermapout: remove pager_map mapping
 *
 * we remove our mappings by hand and then remove the mapping (waking
 * up anyone wanting space).
 */

void
uvm_pagermapout(vaddr_t kva, int npages)
{
        vsize_t size = ptoa(npages);
        struct vm_map_entry *entries;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist, " (kva=%#jx, npages=%jd)", kva, npages,0,0);

#ifdef PMAP_DIRECT
        /*
         * solitary pages are mapped directly.
         */

        if (npages == 1) {
                UVMHIST_LOG(maphist,"<- done, direct", 0,0,0,0);
                return;
        }
#endif

        /*
         * duplicate uvm_unmap, but add in pager_map_wanted handling.
         */

        pmap_kremove(kva, size);
        pmap_update(pmap_kernel());

        if ((kva & ~ptoa(uvmexp.colormask)) == emergva) {
                mutex_enter(&pager_map_wanted_lock);
                KASSERT(emerginuse);
                emerginuse = false;
                wakeup(&emergva);
                mutex_exit(&pager_map_wanted_lock);
                return;
        }

        vm_map_lock(pager_map);
        uvm_unmap_remove(pager_map, kva, kva + size, &entries, 0);
        mutex_enter(&pager_map_wanted_lock);
        if (pager_map_wanted) {
                pager_map_wanted = false;
                wakeup(pager_map);
        }
        mutex_exit(&pager_map_wanted_lock);
        vm_map_unlock(pager_map);
        if (entries)
                uvm_unmap_detach(entries, 0);
        UVMHIST_LOG(maphist,"<- done",0,0,0,0);
}

void
uvm_aio_aiodone_pages(struct vm_page **pgs, int npages, bool write, int error)
{
        struct uvm_object *uobj;
        struct vm_page *pg;
        krwlock_t *slock;
        int pageout_done;        /* number of PG_PAGEOUT pages processed */
        int swslot;
        int i;
        bool swap;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);

        swslot = 0;
        pageout_done = 0;
        slock = NULL;
        uobj = NULL;
        pg = pgs[0];
        swap = (pg->uanon != NULL && pg->uobject == NULL) ||
                (pg->flags & PG_AOBJ) != 0;
        if (!swap) {
                uobj = pg->uobject;
                slock = uobj->vmobjlock;
                rw_enter(slock, RW_WRITER);
        } else {
#if defined(VMSWAP)
                if (error) {
                        if (pg->uobject != NULL) {
                                swslot = uao_find_swslot(pg->uobject,
                                    pg->offset >> PAGE_SHIFT);
                        } else {
                                KASSERT(pg->uanon != NULL);
                                swslot = pg->uanon->an_swslot;
                        }
                        KASSERT(swslot);
                }
#else /* defined(VMSWAP) */
                panic("%s: swap", __func__);
#endif /* defined(VMSWAP) */
        }
        for (i = 0; i < npages; i++) {
#if defined(VMSWAP)
                bool anon_disposed = false; /* XXX gcc */
#endif /* defined(VMSWAP) */

                pg = pgs[i];
                KASSERT(swap || pg->uobject == uobj);
                UVMHIST_LOG(ubchist, "pg %#jx", (uintptr_t)pg, 0,0,0);

#if defined(VMSWAP)
                /*
                 * for swap i/os, lock each page's object (or anon)
                 * individually since each page may need a different lock.
                 */

                if (swap) {
                        if (pg->uobject != NULL) {
                                slock = pg->uobject->vmobjlock;
                        } else {
                                slock = pg->uanon->an_lock;
                        }
                        rw_enter(slock, RW_WRITER);
                        anon_disposed = (pg->flags & PG_RELEASED) != 0;
                        KASSERT(!anon_disposed || pg->uobject != NULL ||
                            pg->uanon->an_ref == 0);
                }
#endif /* defined(VMSWAP) */

                if (write && uobj != NULL) {
                        KASSERT(uvm_obj_page_writeback_p(pg));
                        uvm_obj_page_clear_writeback(pg);
                }

                /*
                 * process errors.  for reads, just mark the page to be freed.
                 * for writes, if the error was ENOMEM, we assume this was
                 * a transient failure so we mark the page dirty so that
                 * we'll try to write it again later.  for all other write
                 * errors, we assume the error is permanent, thus the data
                 * in the page is lost.  bummer.
                 */

                if (error) {
                        int slot;
                        if (!write) {
                                pg->flags |= PG_RELEASED;
                                continue;
                        } else if (error == ENOMEM) {
                                if (pg->flags & PG_PAGEOUT) {
                                        pg->flags &= ~PG_PAGEOUT;
                                        pageout_done++;
                                }
                                uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
                                uvm_pagelock(pg);
                                uvm_pageactivate(pg);
                                uvm_pageunlock(pg);
                                slot = 0;
                        } else
                                slot = SWSLOT_BAD;

#if defined(VMSWAP)
                        if (swap) {
                                if (pg->uobject != NULL) {
                                        int oldslot __diagused;
                                        oldslot = uao_set_swslot(pg->uobject,
                                                pg->offset >> PAGE_SHIFT, slot);
                                        KASSERT(oldslot == swslot + i);
                                } else {
                                        KASSERT(pg->uanon->an_swslot ==
                                                swslot + i);
                                        pg->uanon->an_swslot = slot;
                                }
                        }
#endif /* defined(VMSWAP) */
                }

                /*
                 * if the page is PG_FAKE, this must have been a read to
                 * initialize the page.  clear PG_FAKE and activate the page.
                 */

                if (pg->flags & PG_FAKE) {
                        KASSERT(!write);
                        pg->flags &= ~PG_FAKE;
#if defined(READAHEAD_STATS)
                        pg->flags |= PG_READAHEAD;
                        uvm_ra_total.ev_count++;
#endif /* defined(READAHEAD_STATS) */
                        KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN);
                        uvm_pagelock(pg);
                        uvm_pageenqueue(pg);
                        uvm_pageunlock(pg);
                }

#if defined(VMSWAP)
                /*
                 * for swap pages, unlock everything for this page now.
                 */

                if (swap) {
                        if (pg->uobject == NULL && anon_disposed) {
                                uvm_anon_release(pg->uanon);
                        } else {
                                uvm_page_unbusy(&pg, 1);
                                rw_exit(slock);
                        }
                }
#endif /* defined(VMSWAP) */
        }
        if (pageout_done != 0) {
                uvm_pageout_done(pageout_done);
        }
        if (!swap) {
                uvm_page_unbusy(pgs, npages);
                rw_exit(slock);
        } else {
#if defined(VMSWAP)
                KASSERT(write);

                /* these pages are now only in swap. */
                if (error != ENOMEM) {
                        atomic_add_int(&uvmexp.swpgonly, npages);
                }
                if (error) {
                        if (error != ENOMEM)
                                uvm_swap_markbad(swslot, npages);
                        else
                                uvm_swap_free(swslot, npages);
                }
                atomic_dec_uint(&uvmexp.pdpending);
#endif /* defined(VMSWAP) */
        }
}

/*
 * uvm_aio_aiodone: do iodone processing for async i/os.
 * this should be called in thread context, not interrupt context.
 */
void
uvm_aio_aiodone(struct buf *bp)
{
        const int npages = bp->b_bufsize >> PAGE_SHIFT;
        struct vm_page *pgs[howmany(MAXPHYS, MIN_PAGE_SIZE)];
        int i, error;
        bool write;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(ubchist, "bp %#jx", (uintptr_t)bp, 0,0,0);

        KASSERT(bp->b_bufsize <= MAXPHYS);
        KASSERT(npages <= __arraycount(pgs));

        error = bp->b_error;
        write = (bp->b_flags & B_READ) == 0;

        for (i = 0; i < npages; i++) {
                pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
                UVMHIST_LOG(ubchist, "pgs[%jd] = %#jx", i,
                    (uintptr_t)pgs[i], 0, 0);
        }
        uvm_pagermapout((vaddr_t)bp->b_data, npages);

        uvm_aio_aiodone_pages(pgs, npages, write, error);

        if (write && (bp->b_cflags & BC_AGE) != 0) {
                mutex_enter(bp->b_objlock);
                vwakeup(bp);
                mutex_exit(bp->b_objlock);
        }
        putiobuf(bp);
}

/*
 * uvm_pageratop: convert KVAs in the pager map back to their page
 * structures.
 */

struct vm_page *
uvm_pageratop(vaddr_t kva)
{
        struct vm_page *pg;
        paddr_t pa;
        bool rv __diagused;

        rv = pmap_extract(pmap_kernel(), kva, &pa);
        KASSERT(rv);
        pg = PHYS_TO_VM_PAGE(pa);
        KASSERT(pg != NULL);
        return (pg);
}



































































































 1361 





 1363 
 1363 

 1362 












   11 












  107 
  107 


   69 


























    8 


    8 















   74 







    3 




























    1 




    1 







    1 
    1 
    1 









    1 
    1 
    1 

    1 























    1 















    4 
















    2 


    2 













    3 

    3 







    3 




    3 
    3 




    3 


    3 
    3 











    2 

















 1598 






 1597 
 1570 
  620 


 1598 
 1598 

  620 


 1585 


 1597 
 1595 




























 1202 



 1192 

  639 



   94 





















   12 


   12 






  338 








   16 

































































































































































   78 







   70 







 1059 







 1114 







   61 
















 1387 
 1385 









 1387 

 1373 

 1373 
 1357 
 1371 







   22 


   22 
   19 

   19 
   18 
   19 






    3 

    3 
    1 
    3 
 1360 


 1387 


 1387 















































































































































































































































































































































































































































































   22 





   22 






   18 
    4 



    4 
    1 





   22 
























   20 





   19 






   19 



   18 





   16 





    1 































   25 
   25 


   24 



    2 



    1 

























    8 
    8 


    7 







    3 



















    5 
    2 


    5 




























 1248 


 1248 







    4 




    4 


    4 















   15 

    5 
    5 


    5 
    5 




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
/*        $NetBSD: genfs_vnops.c,v 1.219 2022/03/27 17:10:55 christos Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.219 2022/03/27 17:10:55 christos Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/mount.h>
#include <sys/fstrans.h>
#include <sys/namei.h>
#include <sys/vnode_impl.h>
#include <sys/fcntl.h>
#include <sys/kmem.h>
#include <sys/poll.h>
#include <sys/mman.h>
#include <sys/file.h>
#include <sys/kauth.h>
#include <sys/stat.h>
#include <sys/extattr.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/genfs/genfs_node.h>
#include <miscfs/specfs/specdev.h>

static void filt_genfsdetach(struct knote *);
static int filt_genfsread(struct knote *, long);
static int filt_genfsvnode(struct knote *, long);

/*
 * Find the end of the first path component in NAME and return its
 * length.
 */
int
genfs_parsepath(void *v)
{
        struct vop_parsepath_args /* {
                struct vnode *a_dvp;
                const char *a_name;
                size_t *a_ret;
        } */ *ap = v;
        const char *name = ap->a_name;
        size_t pos;

        (void)ap->a_dvp;

        pos = 0;
        while (name[pos] != '\0' && name[pos] != '/') {
                pos++;
        }
        *ap->a_retval = pos;
        return 0;
}

int
genfs_poll(void *v)
{
        struct vop_poll_args /* {
                struct vnode *a_vp;
                int a_events;
                struct lwp *a_l;
        } */ *ap = v;

        return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
}

int
genfs_seek(void *v)
{
        struct vop_seek_args /* {
                struct vnode *a_vp;
                off_t a_oldoff;
                off_t a_newoff;
                kauth_cred_t cred;
        } */ *ap = v;

        if (ap->a_newoff < 0)
                return (EINVAL);

        return (0);
}

int
genfs_abortop(void *v)
{
        struct vop_abortop_args /* {
                struct vnode *a_dvp;
                struct componentname *a_cnp;
        } */ *ap = v;

        (void)ap;

        return (0);
}

int
genfs_fcntl(void *v)
{
        struct vop_fcntl_args /* {
                struct vnode *a_vp;
                u_int a_command;
                void *a_data;
                int a_fflag;
                kauth_cred_t a_cred;
                struct lwp *a_l;
        } */ *ap = v;

        if (ap->a_command == F_SETFL)
                return (0);
        else
                return (EOPNOTSUPP);
}

/*ARGSUSED*/
int
genfs_badop(void *v)
{

        panic("genfs: bad op");
}

/*ARGSUSED*/
int
genfs_nullop(void *v)
{

        return (0);
}

/*ARGSUSED*/
int
genfs_einval(void *v)
{

        return (EINVAL);
}

int
genfs_erofs_link(void *v)
{
        /* also for symlink */
        struct vop_link_v2_args /* {
                struct vnode *a_dvp;
                struct vnode **a_vpp;
                struct componentname *a_cnp;
        } */ *ap = v;

        VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
        return EROFS;
}

/*
 * Called when an fs doesn't support a particular vop.
 * This takes care to vrele, vput, or vunlock passed in vnodes
 * and calls VOP_ABORTOP for a componentname (in non-rename VOP).
 */
int
genfs_eopnotsupp(void *v)
{
        struct vop_generic_args /*
                struct vnodeop_desc *a_desc;
                / * other random data follows, presumably * /
        } */ *ap = v;
        struct vnodeop_desc *desc = ap->a_desc;
        struct vnode *vp, *vp_last = NULL;
        int flags, i, j, offset_cnp, offset_vp;

        KASSERT(desc->vdesc_offset != VOP_LOOKUP_DESCOFFSET);
        KASSERT(desc->vdesc_offset != VOP_ABORTOP_DESCOFFSET);

        /*
         * Abort any componentname that lookup potentially left state in.
         *
         * As is logical, componentnames for VOP_RENAME are handled by
         * the caller of VOP_RENAME.  Yay, rename!
         */
        if (desc->vdesc_offset != VOP_RENAME_DESCOFFSET &&
            (offset_vp = desc->vdesc_vp_offsets[0]) != VDESC_NO_OFFSET &&
            (offset_cnp = desc->vdesc_componentname_offset) != VDESC_NO_OFFSET){
                struct componentname *cnp;
                struct vnode *dvp;

                dvp = *VOPARG_OFFSETTO(struct vnode **, offset_vp, ap);
                cnp = *VOPARG_OFFSETTO(struct componentname **, offset_cnp, ap);

                VOP_ABORTOP(dvp, cnp);
        }

        flags = desc->vdesc_flags;
        for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) {
                if ((offset_vp = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET)
                        break;        /* stop at end of list */
                if ((j = flags & VDESC_VP0_WILLPUT)) {
                        vp = *VOPARG_OFFSETTO(struct vnode **, offset_vp, ap);

                        /* Skip if NULL */
                        if (!vp)
                                continue;

                        switch (j) {
                        case VDESC_VP0_WILLPUT:
                                /* Check for dvp == vp cases */
                                if (vp == vp_last)
                                        vrele(vp);
                                else {
                                        vput(vp);
                                        vp_last = vp;
                                }
                                break;
                        case VDESC_VP0_WILLRELE:
                                vrele(vp);
                                break;
                        }
                }
        }

        return (EOPNOTSUPP);
}

/*ARGSUSED*/
int
genfs_ebadf(void *v)
{

        return (EBADF);
}

/* ARGSUSED */
int
genfs_enoioctl(void *v)
{

        return (EPASSTHROUGH);
}


/*
 * Eliminate all activity associated with the requested vnode
 * and with all vnodes aliased to the requested vnode.
 */
int
genfs_revoke(void *v)
{
        struct vop_revoke_args /* {
                struct vnode *a_vp;
                int a_flags;
        } */ *ap = v;

#ifdef DIAGNOSTIC
        if ((ap->a_flags & REVOKEALL) == 0)
                panic("genfs_revoke: not revokeall");
#endif
        vrevoke(ap->a_vp);
        return (0);
}

/*
 * Lock the node (for deadfs).
 */
int
genfs_deadlock(void *v)
{
        struct vop_lock_args /* {
                struct vnode *a_vp;
                int a_flags;
        } */ *ap = v;
        vnode_t *vp = ap->a_vp;
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
        int flags = ap->a_flags;
        krw_t op;

        if (! ISSET(flags, LK_RETRY))
                return ENOENT;

        if (ISSET(flags, LK_DOWNGRADE)) {
                rw_downgrade(&vip->vi_lock);
        } else if (ISSET(flags, LK_UPGRADE)) {
                KASSERT(ISSET(flags, LK_NOWAIT));
                if (!rw_tryupgrade(&vip->vi_lock)) {
                        return EBUSY;
                }
        } else if ((flags & (LK_EXCLUSIVE | LK_SHARED)) != 0) {
                op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
                if (ISSET(flags, LK_NOWAIT)) {
                        if (!rw_tryenter(&vip->vi_lock, op))
                                return EBUSY;
                } else {
                        rw_enter(&vip->vi_lock, op);
                }
        }
        VSTATE_ASSERT_UNLOCKED(vp, VS_RECLAIMED);
        return 0;
}

/*
 * Unlock the node (for deadfs).
 */
int
genfs_deadunlock(void *v)
{
        struct vop_unlock_args /* {
                struct vnode *a_vp;
        } */ *ap = v;
        vnode_t *vp = ap->a_vp;
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);

        rw_exit(&vip->vi_lock);

        return 0;
}

/*
 * Lock the node.
 */
int
genfs_lock(void *v)
{
        struct vop_lock_args /* {
                struct vnode *a_vp;
                int a_flags;
        } */ *ap = v;
        vnode_t *vp = ap->a_vp;
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
        int flags = ap->a_flags;
        krw_t op;

        if (ISSET(flags, LK_DOWNGRADE)) {
                rw_downgrade(&vip->vi_lock);
        } else if (ISSET(flags, LK_UPGRADE)) {
                KASSERT(ISSET(flags, LK_NOWAIT));
                if (!rw_tryupgrade(&vip->vi_lock)) {
                        return EBUSY;
                }
        } else if ((flags & (LK_EXCLUSIVE | LK_SHARED)) != 0) {
                op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
                if (ISSET(flags, LK_NOWAIT)) {
                        if (!rw_tryenter(&vip->vi_lock, op))
                                return EBUSY;
                } else {
                        rw_enter(&vip->vi_lock, op);
                }
        }
        VSTATE_ASSERT_UNLOCKED(vp, VS_ACTIVE);
        return 0;
}

/*
 * Unlock the node.
 */
int
genfs_unlock(void *v)
{
        struct vop_unlock_args /* {
                struct vnode *a_vp;
        } */ *ap = v;
        vnode_t *vp = ap->a_vp;
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);

        rw_exit(&vip->vi_lock);

        return 0;
}

/*
 * Return whether or not the node is locked.
 */
int
genfs_islocked(void *v)
{
        struct vop_islocked_args /* {
                struct vnode *a_vp;
        } */ *ap = v;
        vnode_t *vp = ap->a_vp;
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);

        if (rw_write_held(&vip->vi_lock))
                return LK_EXCLUSIVE;

        if (rw_read_held(&vip->vi_lock))
                return LK_SHARED;

        return 0;
}

int
genfs_mmap(void *v)
{

        return (0);
}

/*
 * VOP_PUTPAGES() for vnodes which never have pages.
 */

int
genfs_null_putpages(void *v)
{
        struct vop_putpages_args /* {
                struct vnode *a_vp;
                voff_t a_offlo;
                voff_t a_offhi;
                int a_flags;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;

        KASSERT(vp->v_uobj.uo_npages == 0);
        rw_exit(vp->v_uobj.vmobjlock);
        return (0);
}

void
genfs_node_init(struct vnode *vp, const struct genfs_ops *ops)
{
        struct genfs_node *gp = VTOG(vp);

        rw_init(&gp->g_glock);
        gp->g_op = ops;
}

void
genfs_node_destroy(struct vnode *vp)
{
        struct genfs_node *gp = VTOG(vp);

        rw_destroy(&gp->g_glock);
}

void
genfs_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
{
        int bsize;

        bsize = 1 << vp->v_mount->mnt_fs_bshift;
        *eobp = (size + bsize - 1) & ~(bsize - 1);
}

static void
filt_genfsdetach(struct knote *kn)
{
        struct vnode *vp = (struct vnode *)kn->kn_hook;

        vn_knote_detach(vp, kn);
}

static int
filt_genfsread(struct knote *kn, long hint)
{
        struct vnode *vp = (struct vnode *)kn->kn_hook;
        int rv;

        /*
         * filesystem is gone, so set the EOF flag and schedule
         * the knote for deletion.
         */
        switch (hint) {
        case NOTE_REVOKE:
                KASSERT(mutex_owned(vp->v_interlock));
                knote_set_eof(kn, EV_ONESHOT);
                return (1);
        case 0:
                mutex_enter(vp->v_interlock);
                kn->kn_data = vp->v_size - ((file_t *)kn->kn_obj)->f_offset;
                rv = (kn->kn_data != 0);
                mutex_exit(vp->v_interlock);
                return rv;
        default:
                KASSERT(mutex_owned(vp->v_interlock));
                kn->kn_data = vp->v_size - ((file_t *)kn->kn_obj)->f_offset;
                return (kn->kn_data != 0);
        }
}

static int
filt_genfswrite(struct knote *kn, long hint)
{
        struct vnode *vp = (struct vnode *)kn->kn_hook;

        /*
         * filesystem is gone, so set the EOF flag and schedule
         * the knote for deletion.
         */
        switch (hint) {
        case NOTE_REVOKE:
                KASSERT(mutex_owned(vp->v_interlock));
                knote_set_eof(kn, EV_ONESHOT);
                return (1);
        case 0:
                mutex_enter(vp->v_interlock);
                kn->kn_data = 0;
                mutex_exit(vp->v_interlock);
                return 1;
        default:
                KASSERT(mutex_owned(vp->v_interlock));
                kn->kn_data = 0;
                return 1;
        }
}

static int
filt_genfsvnode(struct knote *kn, long hint)
{
        struct vnode *vp = (struct vnode *)kn->kn_hook;
        int fflags;

        switch (hint) {
        case NOTE_REVOKE:
                KASSERT(mutex_owned(vp->v_interlock));
                knote_set_eof(kn, 0);
                if ((kn->kn_sfflags & hint) != 0)
                        kn->kn_fflags |= hint;
                return (1);
        case 0:
                mutex_enter(vp->v_interlock);
                fflags = kn->kn_fflags;
                mutex_exit(vp->v_interlock);
                break;
        default:
                KASSERT(mutex_owned(vp->v_interlock));
                if ((kn->kn_sfflags & hint) != 0)
                        kn->kn_fflags |= hint;
                fflags = kn->kn_fflags;
                break;
        }

        return (fflags != 0);
}

static const struct filterops genfsread_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_genfsdetach,
        .f_event = filt_genfsread,
};

static const struct filterops genfswrite_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_genfsdetach,
        .f_event = filt_genfswrite,
};

static const struct filterops genfsvnode_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_genfsdetach,
        .f_event = filt_genfsvnode,
};

int
genfs_kqfilter(void *v)
{
        struct vop_kqfilter_args /* {
                struct vnode        *a_vp;
                struct knote        *a_kn;
        } */ *ap = v;
        struct vnode *vp;
        struct knote *kn;

        vp = ap->a_vp;
        kn = ap->a_kn;
        switch (kn->kn_filter) {
        case EVFILT_READ:
                kn->kn_fop = &genfsread_filtops;
                break;
        case EVFILT_WRITE:
                kn->kn_fop = &genfswrite_filtops;
                break;
        case EVFILT_VNODE:
                kn->kn_fop = &genfsvnode_filtops;
                break;
        default:
                return (EINVAL);
        }

        kn->kn_hook = vp;

        vn_knote_attach(vp, kn);

        return (0);
}

void
genfs_node_wrlock(struct vnode *vp)
{
        struct genfs_node *gp = VTOG(vp);

        rw_enter(&gp->g_glock, RW_WRITER);
}

void
genfs_node_rdlock(struct vnode *vp)
{
        struct genfs_node *gp = VTOG(vp);

        rw_enter(&gp->g_glock, RW_READER);
}

int
genfs_node_rdtrylock(struct vnode *vp)
{
        struct genfs_node *gp = VTOG(vp);

        return rw_tryenter(&gp->g_glock, RW_READER);
}

void
genfs_node_unlock(struct vnode *vp)
{
        struct genfs_node *gp = VTOG(vp);

        rw_exit(&gp->g_glock);
}

int
genfs_node_wrlocked(struct vnode *vp)
{
        struct genfs_node *gp = VTOG(vp);

        return rw_write_held(&gp->g_glock);
}

/*
 * Common filesystem object access control check routine.  Accepts a
 * vnode, cred, uid, gid, mode, acl, requested access mode.
 * Returns 0 on success, or an errno on failure.
 */
int
genfs_can_access(vnode_t *vp, kauth_cred_t cred, uid_t file_uid, gid_t file_gid,
    mode_t file_mode, struct acl *acl, accmode_t accmode)
{
        accmode_t dac_granted;
        int error;

        KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0);
        KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));

        /*
         * Look for a normal, non-privileged way to access the file/directory
         * as requested.  If it exists, go with that.
         */

        dac_granted = 0;

        /* Check the owner. */
        if (kauth_cred_geteuid(cred) == file_uid) {
                dac_granted |= VADMIN;
                if (file_mode & S_IXUSR)
                        dac_granted |= VEXEC;
                if (file_mode & S_IRUSR)
                        dac_granted |= VREAD;
                if (file_mode & S_IWUSR)
                        dac_granted |= (VWRITE | VAPPEND);

                goto privchk;
        }

        /* Otherwise, check the groups (first match) */
        /* Otherwise, check the groups. */
        error = kauth_cred_groupmember(cred, file_gid);
        if (error > 0)
                return error;
        if (error == 0) {
                if (file_mode & S_IXGRP)
                        dac_granted |= VEXEC;
                if (file_mode & S_IRGRP)
                        dac_granted |= VREAD;
                if (file_mode & S_IWGRP)
                        dac_granted |= (VWRITE | VAPPEND);

                goto privchk;
        }

        /* Otherwise, check everyone else. */
        if (file_mode & S_IXOTH)
                dac_granted |= VEXEC;
        if (file_mode & S_IROTH)
                dac_granted |= VREAD;
        if (file_mode & S_IWOTH)
                dac_granted |= (VWRITE | VAPPEND);

privchk:
        if ((accmode & dac_granted) == accmode)
                return 0;

        return (accmode & VADMIN) ? EPERM : EACCES;
}

/*
 * Implement a version of genfs_can_access() that understands POSIX.1e ACL
 * semantics;
 * the access ACL has already been prepared for evaluation by the file system
 * and is passed via 'uid', 'gid', and 'acl'.  Return 0 on success, else an
 * errno value.
 */
int
genfs_can_access_acl_posix1e(vnode_t *vp, kauth_cred_t cred, uid_t file_uid,
    gid_t file_gid, mode_t file_mode, struct acl *acl, accmode_t accmode)
{
        struct acl_entry *acl_other, *acl_mask;
        accmode_t dac_granted;
        accmode_t acl_mask_granted;
        int group_matched, i;
        int error;

        KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0);
        KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));

        /*
         * The owner matches if the effective uid associated with the
         * credential matches that of the ACL_USER_OBJ entry.  While we're
         * doing the first scan, also cache the location of the ACL_MASK and
         * ACL_OTHER entries, preventing some future iterations.
         */
        acl_mask = acl_other = NULL;
        for (i = 0; i < acl->acl_cnt; i++) {
                struct acl_entry *ae = &acl->acl_entry[i];
                switch (ae->ae_tag) {
                case ACL_USER_OBJ:
                        if (kauth_cred_geteuid(cred) != file_uid)
                                break;
                        dac_granted = 0;
                        dac_granted |= VADMIN;
                        if (ae->ae_perm & ACL_EXECUTE)
                                dac_granted |= VEXEC;
                        if (ae->ae_perm & ACL_READ)
                                dac_granted |= VREAD;
                        if (ae->ae_perm & ACL_WRITE)
                                dac_granted |= (VWRITE | VAPPEND);
                        goto out;

                case ACL_MASK:
                        acl_mask = ae;
                        break;

                case ACL_OTHER:
                        acl_other = ae;
                        break;

                default:
                        break;
                }
        }

        /*
         * An ACL_OTHER entry should always exist in a valid access ACL.  If
         * it doesn't, then generate a serious failure.         For now, this means
         * a debugging message and EPERM, but in the future should probably
         * be a panic.
         */
        if (acl_other == NULL) {
                /*
                 * XXX This should never happen
                 */
                printf("%s: ACL_OTHER missing\n", __func__);
                return EPERM;
        }

        /*
         * Checks against ACL_USER, ACL_GROUP_OBJ, and ACL_GROUP fields are
         * masked by an ACL_MASK entry, if any.         As such, first identify the
         * ACL_MASK field, then iterate through identifying potential user
         * matches, then group matches.         If there is no ACL_MASK, assume that
         * the mask allows all requests to succeed.
         */
        if (acl_mask != NULL) {
                acl_mask_granted = 0;
                if (acl_mask->ae_perm & ACL_EXECUTE)
                        acl_mask_granted |= VEXEC;
                if (acl_mask->ae_perm & ACL_READ)
                        acl_mask_granted |= VREAD;
                if (acl_mask->ae_perm & ACL_WRITE)
                        acl_mask_granted |= (VWRITE | VAPPEND);
        } else
                acl_mask_granted = VEXEC | VREAD | VWRITE | VAPPEND;

        /*
         * Check ACL_USER ACL entries.        There will either be one or no
         * matches; if there is one, we accept or rejected based on the
         * match; otherwise, we continue on to groups.
         */
        for (i = 0; i < acl->acl_cnt; i++) {
                struct acl_entry *ae = &acl->acl_entry[i];
                switch (ae->ae_tag) {
                case ACL_USER:
                        if (kauth_cred_geteuid(cred) != ae->ae_id)
                                break;
                        dac_granted = 0;
                        if (ae->ae_perm & ACL_EXECUTE)
                                dac_granted |= VEXEC;
                        if (ae->ae_perm & ACL_READ)
                                dac_granted |= VREAD;
                        if (ae->ae_perm & ACL_WRITE)
                                dac_granted |= (VWRITE | VAPPEND);
                        dac_granted &= acl_mask_granted;
                        goto out;
                }
        }

        /*
         * Group match is best-match, not first-match, so find a "best"
         * match.  Iterate across, testing each potential group match.        Make
         * sure we keep track of whether we found a match or not, so that we
         * know if we should try again with any available privilege, or if we
         * should move on to ACL_OTHER.
         */
        group_matched = 0;
        for (i = 0; i < acl->acl_cnt; i++) {
                struct acl_entry *ae = &acl->acl_entry[i];
                switch (ae->ae_tag) {
                case ACL_GROUP_OBJ:
                        error = kauth_cred_groupmember(cred, file_gid);
                        if (error > 0)
                                return error;
                        if (error)
                                break;
                        dac_granted = 0;
                        if (ae->ae_perm & ACL_EXECUTE)
                                dac_granted |= VEXEC;
                        if (ae->ae_perm & ACL_READ)
                                dac_granted |= VREAD;
                        if (ae->ae_perm & ACL_WRITE)
                                dac_granted |= (VWRITE | VAPPEND);
                        dac_granted  &= acl_mask_granted;

                        if ((accmode & dac_granted) == accmode)
                                return 0;

                        group_matched = 1;
                        break;

                case ACL_GROUP:
                        error = kauth_cred_groupmember(cred, ae->ae_id);
                        if (error > 0)
                                return error;
                        if (error)
                                break;
                        dac_granted = 0;
                        if (ae->ae_perm & ACL_EXECUTE)
                                dac_granted |= VEXEC;
                        if (ae->ae_perm & ACL_READ)
                                dac_granted |= VREAD;
                        if (ae->ae_perm & ACL_WRITE)
                                dac_granted |= (VWRITE | VAPPEND);
                        dac_granted  &= acl_mask_granted;

                        if ((accmode & dac_granted) == accmode)
                                return 0;

                        group_matched = 1;
                        break;

                default:
                        break;
                }
        }

        if (group_matched == 1) {
                /*
                 * There was a match, but it did not grant rights via pure
                 * DAC.         Try again, this time with privilege.
                 */
                for (i = 0; i < acl->acl_cnt; i++) {
                        struct acl_entry *ae = &acl->acl_entry[i];
                        switch (ae->ae_tag) {
                        case ACL_GROUP_OBJ:
                                error = kauth_cred_groupmember(cred, file_gid);
                                if (error > 0)
                                        return error;
                                if (error)
                                        break;
                                dac_granted = 0;
                                if (ae->ae_perm & ACL_EXECUTE)
                                        dac_granted |= VEXEC;
                                if (ae->ae_perm & ACL_READ)
                                        dac_granted |= VREAD;
                                if (ae->ae_perm & ACL_WRITE)
                                        dac_granted |= (VWRITE | VAPPEND);
                                dac_granted &= acl_mask_granted;
                                goto out;

                        case ACL_GROUP:
                                error = kauth_cred_groupmember(cred, ae->ae_id);
                                if (error > 0)
                                        return error;
                                if (error)
                                        break;
                                dac_granted = 0;
                                if (ae->ae_perm & ACL_EXECUTE)
                                dac_granted |= VEXEC;
                                if (ae->ae_perm & ACL_READ)
                                        dac_granted |= VREAD;
                                if (ae->ae_perm & ACL_WRITE)
                                        dac_granted |= (VWRITE | VAPPEND);
                                dac_granted &= acl_mask_granted;

                                goto out;
                        default:
                                break;
                        }
                }
                /*
                 * Even with privilege, group membership was not sufficient.
                 * Return failure.
                 */
                dac_granted = 0;
                goto out;
        }
                
        /*
         * Fall back on ACL_OTHER.  ACL_MASK is not applied to ACL_OTHER.
         */
        dac_granted = 0;
        if (acl_other->ae_perm & ACL_EXECUTE)
                dac_granted |= VEXEC;
        if (acl_other->ae_perm & ACL_READ)
                dac_granted |= VREAD;
        if (acl_other->ae_perm & ACL_WRITE)
                dac_granted |= (VWRITE | VAPPEND);

out:
        if ((accmode & dac_granted) == accmode)
                return 0;
        return (accmode & VADMIN) ? EPERM : EACCES;
}

static struct {
        accmode_t accmode;
        int mask;
} accmode2mask[] = {
        { VREAD, ACL_READ_DATA },
        { VWRITE, ACL_WRITE_DATA },
        { VAPPEND, ACL_APPEND_DATA },
        { VEXEC, ACL_EXECUTE },
        { VREAD_NAMED_ATTRS, ACL_READ_NAMED_ATTRS },
        { VWRITE_NAMED_ATTRS, ACL_WRITE_NAMED_ATTRS },
        { VDELETE_CHILD, ACL_DELETE_CHILD },
        { VREAD_ATTRIBUTES, ACL_READ_ATTRIBUTES },
        { VWRITE_ATTRIBUTES, ACL_WRITE_ATTRIBUTES },
        { VDELETE, ACL_DELETE },
        { VREAD_ACL, ACL_READ_ACL },
        { VWRITE_ACL, ACL_WRITE_ACL },
        { VWRITE_OWNER, ACL_WRITE_OWNER },
        { VSYNCHRONIZE, ACL_SYNCHRONIZE },
        { 0, 0 },
};

static int
_access_mask_from_accmode(accmode_t accmode)
{
        int access_mask = 0, i;

        for (i = 0; accmode2mask[i].accmode != 0; i++) {
                if (accmode & accmode2mask[i].accmode)
                        access_mask |= accmode2mask[i].mask;
        }

        /*
         * VAPPEND is just a modifier for VWRITE; if the caller asked
         * for 'VAPPEND | VWRITE', we want to check for ACL_APPEND_DATA only.
         */
        if (access_mask & ACL_APPEND_DATA)
                access_mask &= ~ACL_WRITE_DATA;

        return (access_mask);
}

/*
 * Return 0, iff access is allowed, 1 otherwise.
 */
static int
_acl_denies(const struct acl *aclp, int access_mask, kauth_cred_t cred,
    int file_uid, int file_gid, int *denied_explicitly)
{
        int i, error;
        const struct acl_entry *ae;

        if (denied_explicitly != NULL)
                *denied_explicitly = 0;

        KASSERT(aclp->acl_cnt <= ACL_MAX_ENTRIES);

        for (i = 0; i < aclp->acl_cnt; i++) {
                ae = &(aclp->acl_entry[i]);

                if (ae->ae_entry_type != ACL_ENTRY_TYPE_ALLOW &&
                    ae->ae_entry_type != ACL_ENTRY_TYPE_DENY)
                        continue;
                if (ae->ae_flags & ACL_ENTRY_INHERIT_ONLY)
                        continue;
                switch (ae->ae_tag) {
                case ACL_USER_OBJ:
                        if (kauth_cred_geteuid(cred) != file_uid)
                                continue;
                        break;
                case ACL_USER:
                        if (kauth_cred_geteuid(cred) != ae->ae_id)
                                continue;
                        break;
                case ACL_GROUP_OBJ:
                        error = kauth_cred_groupmember(cred, file_gid);
                        if (error > 0)
                                return error;
                        if (error != 0)
                                continue;
                        break;
                case ACL_GROUP:
                        error = kauth_cred_groupmember(cred, ae->ae_id);
                        if (error > 0)
                                return error;
                        if (error != 0)
                                continue;
                        break;
                default:
                        KASSERT(ae->ae_tag == ACL_EVERYONE);
                }

                if (ae->ae_entry_type == ACL_ENTRY_TYPE_DENY) {
                        if (ae->ae_perm & access_mask) {
                                if (denied_explicitly != NULL)
                                        *denied_explicitly = 1;
                                return (1);
                        }
                }

                access_mask &= ~(ae->ae_perm);
                if (access_mask == 0)
                        return (0);
        }

        if (access_mask == 0)
                return (0);

        return (1);
}

int
genfs_can_access_acl_nfs4(vnode_t *vp, kauth_cred_t cred, uid_t file_uid,
    gid_t file_gid, mode_t file_mode, struct acl *aclp, accmode_t accmode)
{
        int denied, explicitly_denied, access_mask, is_directory,
            must_be_owner = 0;
        file_mode = 0;

        KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND |
            VEXPLICIT_DENY | VREAD_NAMED_ATTRS | VWRITE_NAMED_ATTRS |
            VDELETE_CHILD | VREAD_ATTRIBUTES | VWRITE_ATTRIBUTES | VDELETE |
            VREAD_ACL | VWRITE_ACL | VWRITE_OWNER | VSYNCHRONIZE)) == 0);
        KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE));

        if (accmode & VADMIN)
                must_be_owner = 1;

        /*
         * Ignore VSYNCHRONIZE permission.
         */
        accmode &= ~VSYNCHRONIZE;

        access_mask = _access_mask_from_accmode(accmode);

        if (vp && vp->v_type == VDIR)
                is_directory = 1;
        else
                is_directory = 0;

        /*
         * File owner is always allowed to read and write the ACL
         * and basic attributes.  This is to prevent a situation
         * where user would change ACL in a way that prevents him
         * from undoing the change.
         */
        if (kauth_cred_geteuid(cred) == file_uid)
                access_mask &= ~(ACL_READ_ACL | ACL_WRITE_ACL |
                    ACL_READ_ATTRIBUTES | ACL_WRITE_ATTRIBUTES);

        /*
         * Ignore append permission for regular files; use write
         * permission instead.
         */
        if (!is_directory && (access_mask & ACL_APPEND_DATA)) {
                access_mask &= ~ACL_APPEND_DATA;
                access_mask |= ACL_WRITE_DATA;
        }

        denied = _acl_denies(aclp, access_mask, cred, file_uid, file_gid,
            &explicitly_denied);

        if (must_be_owner) {
                if (kauth_cred_geteuid(cred) != file_uid)
                        denied = EPERM;
        }

        /*
         * For VEXEC, ensure that at least one execute bit is set for
         * non-directories. We have to check the mode here to stay
         * consistent with execve(2). See the test in
         * exec_check_permissions().
         */
        __acl_nfs4_sync_mode_from_acl(&file_mode, aclp);
        if (!denied && !is_directory && (accmode & VEXEC) &&
            (file_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)
                denied = EACCES;

        if (!denied)
                return (0);

        /*
         * Access failed.  Iff it was not denied explicitly and
         * VEXPLICIT_DENY flag was specified, allow access.
         */
        if ((accmode & VEXPLICIT_DENY) && explicitly_denied == 0)
                return (0);

        accmode &= ~VEXPLICIT_DENY;

        if (accmode & (VADMIN_PERMS | VDELETE_CHILD | VDELETE))
                denied = EPERM;
        else
                denied = EACCES;

        return (denied);
}

/*
 * Common routine to check if chmod() is allowed.
 *
 * Policy:
 *   - You must own the file, and
 *     - You must not set the "sticky" bit (meaningless, see chmod(2))
 *     - You must be a member of the group if you're trying to set the
 *         SGIDf bit
 *
 * vp - vnode of the file-system object
 * cred - credentials of the invoker
 * cur_uid, cur_gid - current uid/gid of the file-system object
 * new_mode - new mode for the file-system object
 *
 * Returns 0 if the change is allowed, or an error value otherwise.
 */
int
genfs_can_chmod(vnode_t *vp, kauth_cred_t cred, uid_t cur_uid,
    gid_t cur_gid, mode_t new_mode)
{
        int error;

        /*
         * To modify the permissions on a file, must possess VADMIN
         * for that file.
         */
        if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred)) != 0)
                return (error);

        /*
         * Unprivileged users can't set the sticky bit on files.
         */
        if ((vp->v_type != VDIR) && (new_mode & S_ISTXT))
                return (EFTYPE);

        /*
         * If the invoker is trying to set the SGID bit on the file,
         * check group membership.
         */
        if (new_mode & S_ISGID) {
                int ismember;

                error = kauth_cred_ismember_gid(cred, cur_gid,
                    &ismember);
                if (error || !ismember)
                        return (EPERM);
        }

        /*
         * Deny setting setuid if we are not the file owner.
         */
        if ((new_mode & S_ISUID) && cur_uid != kauth_cred_geteuid(cred))
                return (EPERM);

        return (0);
}

/*
 * Common routine to check if chown() is allowed.
 *
 * Policy:
 *   - You must own the file, and
 *     - You must not try to change ownership, and
 *     - You must be member of the new group
 *
 * vp - vnode
 * cred - credentials of the invoker
 * cur_uid, cur_gid - current uid/gid of the file-system object
 * new_uid, new_gid - target uid/gid of the file-system object
 *
 * Returns 0 if the change is allowed, or an error value otherwise.
 */
int        
genfs_can_chown(vnode_t *vp, kauth_cred_t cred, uid_t cur_uid,
    gid_t cur_gid, uid_t new_uid, gid_t new_gid)
{
        int error, ismember;

        /*
         * To modify the ownership of a file, must possess VADMIN for that
         * file.
         */
        if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred)) != 0)
                return (error);

        /*
         * You can only change ownership of a file if:
         * You own the file and...
         */
        if (kauth_cred_geteuid(cred) == cur_uid) {
                /*
                 * You don't try to change ownership, and...
                 */
                if (new_uid != cur_uid)
                        return (EPERM);

                /*
                 * You don't try to change group (no-op), or...
                 */
                if (new_gid == cur_gid)
                        return (0);

                /*
                 * Your effective gid is the new gid, or...
                 */
                if (kauth_cred_getegid(cred) == new_gid)
                        return (0);

                /*
                 * The new gid is one you're a member of.
                 */
                ismember = 0;
                error = kauth_cred_ismember_gid(cred, new_gid,
                    &ismember);
                if (!error && ismember)
                        return (0);
        }

        return (EPERM);
}

int
genfs_can_chtimes(vnode_t *vp, kauth_cred_t cred, uid_t owner_uid,
    u_int vaflags)
{
        int error;
        /*
         * Grant permission if the caller is the owner of the file, or
         * the super-user, or has ACL_WRITE_ATTRIBUTES permission on
         * on the file.         If the time pointer is null, then write
         * permission on the file is also sufficient.
         *
         * From NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes: 
         * A user having ACL_WRITE_DATA or ACL_WRITE_ATTRIBUTES
         * will be allowed to set the times [..] to the current 
         * server time.
         */
        if ((error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred)) != 0)
                return (vaflags & VA_UTIMES_NULL) == 0 ? EPERM : EACCES;

        /* Must be owner, or... */
        if (kauth_cred_geteuid(cred) == owner_uid)
                return (0);

        /* set the times to the current time, and... */
        if ((vaflags & VA_UTIMES_NULL) == 0)
                return (EPERM);

        /* have write access. */
        error = VOP_ACCESS(vp, VWRITE, cred);
        if (error)
                return (error);

        return (0);
}

/*
 * Common routine to check if chflags() is allowed.
 *
 * Policy:
 *   - You must own the file, and
 *   - You must not change system flags, and
 *   - You must not change flags on character/block devices.
 *
 * vp - vnode
 * cred - credentials of the invoker
 * owner_uid - uid of the file-system object
 * changing_sysflags - true if the invoker wants to change system flags
 */
int
genfs_can_chflags(vnode_t *vp, kauth_cred_t cred,
     uid_t owner_uid, bool changing_sysflags)
{

        /* The user must own the file. */
        if (kauth_cred_geteuid(cred) != owner_uid) {
                return EPERM;
        }

        if (changing_sysflags) {
                return EPERM;
        }

        /*
         * Unprivileged users cannot change the flags on devices, even if they
         * own them.
         */
        if (vp->v_type == VCHR || vp->v_type == VBLK) {
                return EPERM;
        }

        return 0;
}

/*
 * Common "sticky" policy.
 *
 * When a directory is "sticky" (as determined by the caller), this
 * function may help implementing the following policy:
 * - Renaming a file in it is only possible if the user owns the directory
 *   or the file being renamed.
 * - Deleting a file from it is only possible if the user owns the
 *   directory or the file being deleted.
 */
int
genfs_can_sticky(vnode_t *vp, kauth_cred_t cred, uid_t dir_uid, uid_t file_uid)
{
        if (kauth_cred_geteuid(cred) != dir_uid &&
            kauth_cred_geteuid(cred) != file_uid)
                return EPERM;

        return 0;
}

int
genfs_can_extattr(vnode_t *vp, kauth_cred_t cred, accmode_t accmode,
    int attrnamespace)
{
        /*
         * Kernel-invoked always succeeds.
         */
        if (cred == NOCRED)
                return 0;

        switch (attrnamespace) {
        case EXTATTR_NAMESPACE_SYSTEM:
                return kauth_authorize_system(cred, KAUTH_SYSTEM_FS_EXTATTR,
                    0, vp->v_mount, NULL, NULL);
        case EXTATTR_NAMESPACE_USER:
                return VOP_ACCESS(vp, accmode, cred);
        default:
                return EPERM;
        }
}

int
genfs_access(void *v)
{
        struct vop_access_args *ap = v;

        KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
            VAPPEND)) == 0);

        return VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred);
}

int
genfs_accessx(void *v)
{
        struct vop_accessx_args *ap = v;
        int error;
        accmode_t accmode = ap->a_accmode;
        error = vfs_unixify_accmode(&accmode);
        if (error != 0)
                return error;

        if (accmode == 0)
                return 0;

        return VOP_ACCESS(ap->a_vp, accmode, ap->a_cred);
}

/*
 * genfs_pathconf:
 *
 * Standard implementation of POSIX pathconf, to get information about limits
 * for a filesystem.
 * Override per filesystem for the case where the filesystem has smaller
 * limits.
 */
int
genfs_pathconf(void *v)
{
        struct vop_pathconf_args *ap = v;

        switch (ap->a_name) {
        case _PC_PATH_MAX:
                *ap->a_retval = PATH_MAX;
                return 0;
        case _PC_ACL_EXTENDED:
        case _PC_ACL_NFS4:
                *ap->a_retval = 0;
                return 0;
        default:
                return EINVAL;
        }
}




















































































































































































































    8 



    8 


















    7 





    6 






    6 


    6 
    3 


    3 
















    3 



    3 
    3 






































    1 

    1 

    1 























    8 









    8 










    7 



    7 

    7 






    7 

    7 








































    6 









    6 





























    2 





    1 

























    2 













    2 
    1 











    1 














    8 









    8 


    8 
















































































































    8 

    8 

    8 



    8 

    8 



    8 

    8 







    8 





    8 









    8 







    8 

















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
/* $NetBSD: pad.c,v 1.78 2022/03/31 19:30:16 pgoyette Exp $ */

/*-
 * Copyright (c) 2007 Jared D. McNeill <jmcneill@invisible.ca>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: pad.c,v 1.78 2022/03/31 19:30:16 pgoyette Exp $");

#include <sys/param.h>
#include <sys/types.h>

#include <sys/audioio.h>
#include <sys/buf.h>
#include <sys/condvar.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/module.h>
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/select.h>
#include <sys/stat.h>
#include <sys/vnode.h>

#include <dev/audio/audio_if.h>
#include <dev/audio/audiovar.h>

#include <dev/pad/padvar.h>

#include "ioconf.h"

/* #define PAD_DEBUG */
#ifdef PAD_DEBUG
#define DPRINTF(fmt...)        printf(fmt)
#else
#define DPRINTF(fmt...) /**/
#endif

#define PADFREQ                44100
#define PADCHAN                2
#define PADPREC                16

typedef struct pad_block {
        uint8_t                *pb_ptr;
        int                pb_len;
} pad_block_t;

enum {
        PAD_OUTPUT_CLASS,
        PAD_INPUT_CLASS,
        PAD_OUTPUT_MASTER_VOLUME,
        PAD_INPUT_DAC_VOLUME,
        PAD_ENUM_LAST,
};

static int        pad_match(device_t, cfdata_t, void *);
static void        pad_attach(device_t, device_t, void *);
static int        pad_detach(device_t, int);
static void        pad_childdet(device_t, device_t);

static int        pad_query_format(void *, audio_format_query_t *);
static int        pad_set_format(void *, int,
                    const audio_params_t *, const audio_params_t *,
                    audio_filter_reg_t *, audio_filter_reg_t *);
static int        pad_start_output(void *, void *, int,
                    void (*)(void *), void *);
static int        pad_halt_output(void *);
static int        pad_getdev(void *, struct audio_device *);
static int        pad_set_port(void *, mixer_ctrl_t *);
static int        pad_get_port(void *, mixer_ctrl_t *);
static int        pad_query_devinfo(void *, mixer_devinfo_t *);
static int        pad_get_props(void *);
static void        pad_get_locks(void *, kmutex_t **, kmutex_t **);

static void        pad_done_output(void *);
static void        pad_swvol_codec(audio_filter_arg_t *);

static void        pad_close(struct pad_softc *);
static int        pad_read(struct pad_softc *, off_t *, struct uio *,
                    kauth_cred_t, int);

static int        fops_pad_close(struct file *);
static int        fops_pad_read(struct file *, off_t *, struct uio *,
                    kauth_cred_t, int);
static int        fops_pad_write(struct file *, off_t *, struct uio *,
                    kauth_cred_t, int);
static int        fops_pad_ioctl(struct file *, u_long, void *);
static int        fops_pad_kqfilter(struct file *, struct knote *);
static int        fops_pad_poll(struct file *, int);
static int        fops_pad_stat(struct file *, struct stat *);
static int        fops_pad_mmap(struct file *, off_t *, size_t, int, int *, int *,
                    struct uvm_object **, int *);

static const struct audio_hw_if pad_hw_if = {
        .query_format        = pad_query_format,
        .set_format        = pad_set_format,
        .start_output        = pad_start_output,
        .halt_output        = pad_halt_output,
        .getdev                = pad_getdev,
        .set_port        = pad_set_port,
        .get_port        = pad_get_port,
        .query_devinfo        = pad_query_devinfo,
        .get_props        = pad_get_props,
        .get_locks        = pad_get_locks,
};

#define PAD_NFORMATS        1
static const struct audio_format pad_formats[PAD_NFORMATS] = {
        {
                .mode                = AUMODE_PLAY,
                .encoding        = AUDIO_ENCODING_SLINEAR_LE,
                .validbits        = PADPREC,
                .precision        = PADPREC,
                .channels        = PADCHAN,
                .channel_mask        = AUFMT_STEREO,
                .frequency_type        = 1,
                .frequency        = { PADFREQ },
        },
};

extern void        padattach(int);

static int        pad_add_block(struct pad_softc *, uint8_t *, int);
static int        pad_get_block(struct pad_softc *, pad_block_t *, int);

static dev_type_open(pad_open);

const struct cdevsw pad_cdevsw = {
        .d_open                = pad_open,
        .d_close        = noclose,
        .d_read                = noread,
        .d_write        = nowrite,
        .d_ioctl        = noioctl,
        .d_stop                = nostop,
        .d_tty                = notty,
        .d_poll                = nopoll,
        .d_mmap                = nommap,
        .d_kqfilter        = nokqfilter,
        .d_discard        = nodiscard,
        .d_flag                = D_OTHER | D_MPSAFE,
};

const struct fileops pad_fileops = {
        .fo_name        = "pad",
        .fo_read        = fops_pad_read,
        .fo_write        = fops_pad_write,
        .fo_ioctl        = fops_pad_ioctl,
        .fo_fcntl        = fnullop_fcntl,
        .fo_stat        = fops_pad_stat,
        .fo_poll        = fops_pad_poll,
        .fo_close        = fops_pad_close,
        .fo_mmap        = fops_pad_mmap,
        .fo_kqfilter        = fops_pad_kqfilter,
        .fo_restart        = fnullop_restart
};

CFATTACH_DECL2_NEW(pad, sizeof(struct pad_softc),
    pad_match, pad_attach, pad_detach,
    NULL, NULL, pad_childdet);

void
padattach(int n)
{
        int error;

        error = config_cfattach_attach(pad_cd.cd_name, &pad_ca);
        if (error) {
                aprint_error("%s: couldn't register cfattach: %d\n",
                    pad_cd.cd_name, error);
                config_cfdriver_detach(&pad_cd);
                return;
        }
}

static int
pad_match(device_t parent, cfdata_t data, void *opaque)
{

        return 1;
}

static void
pad_attach(device_t parent, device_t self, void *opaque)
{
        struct pad_softc *sc = device_private(self);

        KASSERT(KERNEL_LOCKED_P());

        aprint_normal_dev(self, "outputs: 44100Hz, 16-bit, stereo\n");

        sc->sc_dev = self;

        cv_init(&sc->sc_condvar, device_xname(sc->sc_dev));
        mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&sc->sc_intr_lock, MUTEX_DEFAULT, IPL_SOFTCLOCK);
        callout_init(&sc->sc_pcallout, CALLOUT_MPSAFE);
        callout_setfunc(&sc->sc_pcallout, pad_done_output, sc);

        sc->sc_swvol = 255;
        sc->sc_buflen = 0;
        sc->sc_rpos = sc->sc_wpos = 0;
        sc->sc_audiodev = audio_attach_mi(&pad_hw_if, sc, sc->sc_dev);

        if (!pmf_device_register(sc->sc_dev, NULL, NULL))
                aprint_error_dev(sc->sc_dev,
                    "couldn't establish power handler\n");

        sc->sc_open = 1;
}

static int
pad_detach(device_t self, int flags)
{
        struct pad_softc *sc = device_private(self);
        int cmaj, mn;
        int error;

        KASSERT(KERNEL_LOCKED_P());

        /* Prevent detach without going through close -- e.g., drvctl.  */
        if (sc->sc_open)
                return EBUSY;

        error = config_detach_children(self, flags);
        if (error)
                return error;

        cmaj = cdevsw_lookup_major(&pad_cdevsw);
        mn = device_unit(sc->sc_dev);
        vdevgone(cmaj, mn, mn, VCHR);

        pmf_device_deregister(sc->sc_dev);

        callout_destroy(&sc->sc_pcallout);
        mutex_destroy(&sc->sc_lock);
        mutex_destroy(&sc->sc_intr_lock);
        cv_destroy(&sc->sc_condvar);

        return 0;
}

static void
pad_childdet(device_t self, device_t child)
{
        struct pad_softc *sc = device_private(self);

        KASSERT(KERNEL_LOCKED_P());

        if (child == sc->sc_audiodev)
                sc->sc_audiodev = NULL;
}

static int
pad_add_block(struct pad_softc *sc, uint8_t *blk, int blksize)
{
        int l;

        KASSERT(blksize >= 0);
        KASSERT(mutex_owned(&sc->sc_intr_lock));

        if (blksize > PAD_BUFSIZE ||
            sc->sc_buflen > PAD_BUFSIZE - (unsigned)blksize)
                return ENOBUFS;

        if (sc->sc_wpos + blksize <= PAD_BUFSIZE)
                memcpy(sc->sc_audiobuf + sc->sc_wpos, blk, blksize);
        else {
                l = PAD_BUFSIZE - sc->sc_wpos;
                memcpy(sc->sc_audiobuf + sc->sc_wpos, blk, l);
                memcpy(sc->sc_audiobuf, blk + l, blksize - l);
        }

        sc->sc_wpos += blksize;
        if (sc->sc_wpos >= PAD_BUFSIZE)
                sc->sc_wpos -= PAD_BUFSIZE;

        sc->sc_buflen += blksize;
        cv_broadcast(&sc->sc_condvar);

        return 0;
}

static int
pad_get_block(struct pad_softc *sc, pad_block_t *pb, int maxblksize)
{
        int l, blksize, error;

        KASSERT(maxblksize > 0);
        KASSERT(mutex_owned(&sc->sc_intr_lock));

        while (sc->sc_buflen == 0) {
                DPRINTF("%s: wait\n", __func__);
                error = cv_wait_sig(&sc->sc_condvar, &sc->sc_intr_lock);
                DPRINTF("%s: wake up %d\n", __func__, err);
                if (error)
                        return error;
        }
        blksize = uimin(maxblksize, sc->sc_buflen);

        pb->pb_ptr = (sc->sc_audiobuf + sc->sc_rpos);
        if (sc->sc_rpos + blksize < PAD_BUFSIZE) {
                pb->pb_len = blksize;
                sc->sc_rpos += blksize;
        } else {
                l = PAD_BUFSIZE - sc->sc_rpos;
                pb->pb_len = l;
                sc->sc_rpos = 0;
        }
        sc->sc_buflen -= pb->pb_len;

        return 0;
}

static int
pad_open(dev_t dev, int flags, int fmt, struct lwp *l)
{
        struct file *fp = NULL;
        device_t self;
        struct pad_softc *sc = NULL;
        cfdata_t cf = NULL;
        int error, fd;

        error = fd_allocfile(&fp, &fd);
        if (error)
                goto out;

        cf = kmem_alloc(sizeof(*cf), KM_SLEEP);
        cf->cf_name = pad_cd.cd_name;
        cf->cf_atname = pad_cd.cd_name;
        cf->cf_unit = 0;
        cf->cf_fstate = FSTATE_STAR;

        self = config_attach_pseudo(cf);
        if (self == NULL) {
                error = ENXIO;
                goto out;
        }
        sc = device_private(self);
        KASSERT(sc->sc_dev == self);
        cf = NULL;

        error = fd_clone(fp, fd, flags, &pad_fileops, sc);
        KASSERT(error == EMOVEFD);
        fp = NULL;
        sc = NULL;

out:        if (sc)
                pad_close(sc);
        if (cf)
                kmem_free(cf, sizeof(*cf));
        if (fp)
                fd_abort(curproc, fp, fd);
        return error;
}

static void
pad_close(struct pad_softc *sc)
{
        device_t self = sc->sc_dev;
        cfdata_t cf = device_cfdata(self);

        /*
         * XXX This is not quite enough to prevent racing with drvctl
         * detach.  What can happen:
         *
         *        cpu0                                cpu1
         *
         *        pad_close
         *        take kernel lock
         *        sc->sc_open = 0
         *        drop kernel lock
         *        wait for config_misc_lock
         *                                        drvctl detach
         *                                        take kernel lock
         *                                        drop kernel lock
         *                                        wait for config_misc_lock
         *                                        retake kernel lock
         *                                        drop config_misc_lock
         *        take config_misc_lock
         *        wait for kernel lock
         *                                        pad_detach (sc_open=0 already)
         *                                        free device
         *                                        drop kernel lock
         *        use device after free
         *
         * We need a way to grab a reference to the device so it won't
         * be freed until we're done -- it's OK if we config_detach
         * twice as long as it's idempotent, but not OK if the first
         * config_detach frees the struct device before the second one
         * has finished handling it.
         */
        KERNEL_LOCK(1, NULL);
        KASSERT(sc->sc_open);
        sc->sc_open = 0;
        (void)config_detach(self, DETACH_FORCE);
        KERNEL_UNLOCK_ONE(NULL);

        kmem_free(cf, sizeof(*cf));
}

static int
fops_pad_close(struct file *fp)
{
        struct pad_softc *sc = fp->f_pad;

        pad_close(sc);

        return 0;
}

static int
fops_pad_poll(struct file *fp, int events)
{

        return POLLERR;
}

static int
fops_pad_kqfilter(struct file *fp, struct knote *kn)
{
        struct pad_softc *sc = fp->f_pad;
        dev_t dev;

        dev = makedev(cdevsw_lookup_major(&pad_cdevsw),
            device_unit(sc->sc_dev));

        return seltrue_kqfilter(dev, kn);
}

static int
fops_pad_ioctl(struct file *fp, u_long cmd, void *data)
{

        return ENODEV;
}

static int
fops_pad_stat(struct file *fp, struct stat *st)
{
        struct pad_softc *sc = fp->f_pad;

        memset(st, 0, sizeof(*st));

        st->st_dev = makedev(cdevsw_lookup_major(&pad_cdevsw),
            device_unit(sc->sc_dev));

        st->st_uid = kauth_cred_geteuid(fp->f_cred);
        st->st_gid = kauth_cred_getegid(fp->f_cred);
        st->st_mode = S_IFCHR;

        return 0;
}

static int
fops_pad_mmap(struct file *fp, off_t *offp, size_t len, int prot, int *flagsp,
    int *advicep, struct uvm_object **uobjp, int *maxprotp)
{

        return 1;
}

static int
fops_pad_read(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
    int ioflag)
{
        struct pad_softc *sc = fp->f_pad;

        return pad_read(sc, offp, uio, cred, ioflag);
}

static int
pad_read(struct pad_softc *sc, off_t *offp, struct uio *uio, kauth_cred_t cred,
    int ioflag)
{
        pad_block_t pb;
        int err;

        err = 0;
        DPRINTF("%s: resid=%zu\n", __func__, uio->uio_resid);
        while (uio->uio_resid > 0) {
                mutex_enter(&sc->sc_intr_lock);
                err = pad_get_block(sc, &pb, MIN(uio->uio_resid, INT_MAX));
                mutex_exit(&sc->sc_intr_lock);
                if (err)
                        break;

                DPRINTF("%s: move %d\n", __func__, pb.pb_len);
                err = uiomove(pb.pb_ptr, pb.pb_len, uio);
                if (err)
                        break;
        }

        return err;
}

static int
fops_pad_write(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
    int ioflag)
{

        return EOPNOTSUPP;
}

static int
pad_query_format(void *opaque, audio_format_query_t *afp)
{

        return audio_query_format(pad_formats, PAD_NFORMATS, afp);
}

static int
pad_set_format(void *opaque, int setmode,
    const audio_params_t *play, const audio_params_t *rec,
    audio_filter_reg_t *pfil, audio_filter_reg_t *rfil)
{
        struct pad_softc *sc = opaque;

        KASSERT(mutex_owned(&sc->sc_lock));

        /* XXX playback only */
        pfil->codec = pad_swvol_codec;
        pfil->context = sc;

        return 0;
}

static int
pad_start_output(void *opaque, void *block, int blksize,
    void (*intr)(void *), void *intrarg)
{
        struct pad_softc *sc = opaque;
        int err;
        int ms;

        KASSERT(mutex_owned(&sc->sc_intr_lock));

        sc->sc_intr = intr;
        sc->sc_intrarg = intrarg;

        DPRINTF("%s: blksize=%d\n", __func__, blksize);
        err = pad_add_block(sc, block, blksize);

        ms = blksize * 1000 / PADCHAN / (PADPREC / NBBY) / PADFREQ;
        DPRINTF("%s: callout ms=%d\n", __func__, ms);
        callout_schedule(&sc->sc_pcallout, mstohz(ms));

        return err;
}

static int
pad_halt_output(void *opaque)
{
        struct pad_softc *sc = opaque;

        DPRINTF("%s\n", __func__);
        KASSERT(mutex_owned(&sc->sc_intr_lock));

        callout_halt(&sc->sc_pcallout, &sc->sc_intr_lock);

        sc->sc_intr = NULL;
        sc->sc_intrarg = NULL;
        sc->sc_buflen = 0;
        sc->sc_rpos = sc->sc_wpos = 0;

        return 0;
}

static void
pad_done_output(void *arg)
{
        struct pad_softc *sc = arg;

        DPRINTF("%s\n", __func__);

        mutex_enter(&sc->sc_intr_lock);
        (*sc->sc_intr)(sc->sc_intrarg);
        mutex_exit(&sc->sc_intr_lock);
}

static int
pad_getdev(void *opaque, struct audio_device *ret)
{

        strlcpy(ret->name, "Virtual Audio", sizeof(ret->name));
        strlcpy(ret->version, osrelease, sizeof(ret->version));
        strlcpy(ret->config, "pad", sizeof(ret->config));

        return 0;
}

static int
pad_set_port(void *opaque, mixer_ctrl_t *mc)
{
        struct pad_softc *sc = opaque;

        KASSERT(mutex_owned(&sc->sc_lock));

        switch (mc->dev) {
        case PAD_OUTPUT_MASTER_VOLUME:
        case PAD_INPUT_DAC_VOLUME:
                if (mc->un.value.num_channels != 1)
                        return EINVAL;
                sc->sc_swvol = mc->un.value.level[AUDIO_MIXER_LEVEL_MONO];
                return 0;
        }

        return ENXIO;
}

static int
pad_get_port(void *opaque, mixer_ctrl_t *mc)
{
        struct pad_softc *sc = opaque;

        KASSERT(mutex_owned(&sc->sc_lock));

        switch (mc->dev) {
        case PAD_OUTPUT_MASTER_VOLUME:
        case PAD_INPUT_DAC_VOLUME:
                if (mc->un.value.num_channels != 1)
                        return EINVAL;
                mc->un.value.level[AUDIO_MIXER_LEVEL_MONO] = sc->sc_swvol;
                return 0;
        }

        return ENXIO;
}

static int
pad_query_devinfo(void *opaque, mixer_devinfo_t *di)
{
        struct pad_softc *sc __diagused = opaque;

        KASSERT(mutex_owned(&sc->sc_lock));

        switch (di->index) {
        case PAD_OUTPUT_CLASS:
                di->mixer_class = PAD_OUTPUT_CLASS;
                strcpy(di->label.name, AudioCoutputs);
                di->type = AUDIO_MIXER_CLASS;
                di->next = di->prev = AUDIO_MIXER_LAST;
                return 0;
        case PAD_INPUT_CLASS:
                di->mixer_class = PAD_INPUT_CLASS;
                strcpy(di->label.name, AudioCinputs);
                di->type = AUDIO_MIXER_CLASS;
                di->next = di->prev = AUDIO_MIXER_LAST;
                return 0;
        case PAD_OUTPUT_MASTER_VOLUME:
                di->mixer_class = PAD_OUTPUT_CLASS;
                strcpy(di->label.name, AudioNmaster);
                di->type = AUDIO_MIXER_VALUE;
                di->next = di->prev = AUDIO_MIXER_LAST;
                di->un.v.num_channels = 1;
                strcpy(di->un.v.units.name, AudioNvolume);
                return 0;
        case PAD_INPUT_DAC_VOLUME:
                di->mixer_class = PAD_INPUT_CLASS;
                strcpy(di->label.name, AudioNdac);
                di->type = AUDIO_MIXER_VALUE;
                di->next = di->prev = AUDIO_MIXER_LAST;
                di->un.v.num_channels = 1;
                strcpy(di->un.v.units.name, AudioNvolume);
                return 0;
        }

        return ENXIO;
}

static int
pad_get_props(void *opaque)
{

        return AUDIO_PROP_PLAYBACK;
}

static void
pad_get_locks(void *opaque, kmutex_t **intr, kmutex_t **thread)
{
        struct pad_softc *sc = opaque;

        *intr = &sc->sc_intr_lock;
        *thread = &sc->sc_lock;
}

static void
pad_swvol_codec(audio_filter_arg_t *arg)
{
        struct pad_softc *sc = arg->context;
        const aint_t *src;
        aint_t *dst;
        u_int sample_count;
        u_int i;

        src = arg->src;
        dst = arg->dst;
        sample_count = arg->count * arg->srcfmt->channels;
        for (i = 0; i < sample_count; i++) {
                aint2_t v = (aint2_t)(*src++);
                v = v * sc->sc_swvol / 255;
                *dst++ = (aint_t)v;
        }
}

MODULE(MODULE_CLASS_DRIVER, pad, "audio");

#ifdef _MODULE

#include "ioconf.c"

devmajor_t cmajor = NODEVMAJOR, bmajor = NODEVMAJOR;

/*
 * We need our own version of cfattach since config(1)'s ioconf does not
 * generate what we need
 */

static struct cfattach *pad_cfattachinit[] = { &pad_ca, NULL };

static struct cfattachinit pad_cfattach[] = {
        { "pad", pad_cfattachinit },
        { NULL, NULL }
};
#endif

static int
pad_modcmd(modcmd_t cmd, void *arg)
{
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                error = devsw_attach(pad_cd.cd_name, NULL, &bmajor,
                            &pad_cdevsw, &cmajor);
                if (error)
                        break;

                pad_cfattach[1] = cfattach_ioconf_pad[0];
                error = config_init_component(cfdriver_ioconf_pad,
                    pad_cfattach, cfdata_ioconf_pad);
                if (error) {
                        devsw_detach(NULL, &pad_cdevsw);
                        break;
                }
#endif
                break;

        case MODULE_CMD_FINI:
#ifdef _MODULE
                error = config_fini_component(cfdriver_ioconf_pad,
                    pad_cfattach, cfdata_ioconf_pad);
                if (error == 0)
                        devsw_detach(NULL, &pad_cdevsw);
#endif
                break;

        default:
                error = ENOTTY;
        }

        return error;
}


















































    7 
    7 
    7 



























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
/* $NetBSD: wsbellmux.c,v 1.1 2017/06/11 03:55:56 nat Exp $ */
/*-
 * Copyright (c) 2017 Nathanial Sloss <nathanialsloss@yahoo.com.au>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: wsbellmux.c,v 1.1 2017/06/11 03:55:56 nat Exp $");

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/types.h>
#include <sys/device.h>
#include <sys/errno.h>
#include <sys/selinfo.h>

#include "wsmux.h"

#include <dev/wscons/wsconsio.h>
#include <dev/wscons/wsbellvar.h>

/*
 * Print function (for parent devices).
 */
int
wsbelldevprint(void *aux, const char *pnp)
{

        if (pnp)
                aprint_normal("wsbell at %s", pnp);
        return (UNCONF);
}

#if NWSMUX > 0
int
wsbell_add_mux(int unit, struct wsmux_softc *muxsc)
{
        struct wsbell_softc *sc;
        device_t wsbelldev;
        cfdriver_t wsbellcd;

        wsbelldev = device_find_by_driver_unit("wsbell", unit);
        if (wsbelldev == NULL)
                return ENXIO;
        wsbellcd = device_cfdriver(wsbelldev);
        if (wsbellcd == NULL)
                return ENXIO;

        sc = device_lookup_private(wsbellcd, unit);
        if (sc == NULL)
                return ENXIO;

        if (sc->sc_base.me_parent != NULL || sc->sc_base.me_evp != NULL)
                return (EBUSY);

        return (wsmux_attach_sc(muxsc, &sc->sc_base));
}
#endif
































































































































































    2 



    2 

























































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
/* $NetBSD: uslsa.c,v 1.32 2022/07/29 13:07:14 rin Exp $ */

/* from ugensa.c */

/*
 * Copyright (c) 2004, 2005 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Roland C. Dowdeswell <elric@netbsd.org>.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2007, 2009 Jonathan A. Kollasch.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uslsa.c,v 1.32 2022/07/29 13:07:14 rin Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/conf.h>
#include <sys/tty.h>

#include <dev/usb/usb.h>

#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <dev/usb/ucomvar.h>

#include <dev/usb/uslsareg.h>

#include <fs/unicode.h>

#ifdef USLSA_DEBUG
#define DPRINTF(x)        if (uslsadebug) device_printf x
int uslsadebug = 0;
#else
#define DPRINTF(x)
#endif

struct uslsa_softc {
        device_t                sc_dev;                /* base device */
        device_t                sc_subdev;        /* ucom device */
        struct usbd_device *        sc_udev;        /* usb device */
        struct usbd_interface *        sc_iface;        /* interface */
        uint8_t                        sc_ifnum;        /* interface number */
        bool                        sc_dying;        /* disconnecting */
};

static void uslsa_get_status(void *sc, int, u_char *, u_char *);
static void uslsa_set(void *, int, int, int);
static int uslsa_param(void *, int, struct termios *);
static int uslsa_ioctl(void *, int, u_long, void *, int, proc_t *);
static int uslsa_open(void *, int);
static void uslsa_close(void *, int);

static int uslsa_usbd_errno(usbd_status);
static int uslsa_request_set(struct uslsa_softc *, uint8_t, uint16_t);
static int uslsa_set_flow(struct uslsa_softc *, tcflag_t, tcflag_t);

static const struct ucom_methods uslsa_methods = {
        .ucom_get_status = uslsa_get_status,
        .ucom_set = uslsa_set,
        .ucom_param = uslsa_param,
        .ucom_ioctl = uslsa_ioctl,
        .ucom_open = uslsa_open,
        .ucom_close = uslsa_close,
};

#define USLSA_CONFIG_INDEX        0
#define USLSA_IFACE_INDEX        0
#define USLSA_BUFSIZE                256

static const struct usb_devno uslsa_devs[] = {
        { USB_VENDOR_BALTECH,           USB_PRODUCT_BALTECH_CARDREADER },
        { USB_VENDOR_DYNASTREAM,        USB_PRODUCT_DYNASTREAM_ANTDEVBOARD },
        { USB_VENDOR_JABLOTRON,         USB_PRODUCT_JABLOTRON_PC60B },
        { USB_VENDOR_SILABS,            USB_PRODUCT_SILABS_ARGUSISP },
        { USB_VENDOR_SILABS,            USB_PRODUCT_SILABS_CRUMB128 },
        { USB_VENDOR_SILABS,            USB_PRODUCT_SILABS_DEGREECONT },
        { USB_VENDOR_SILABS,            USB_PRODUCT_SILABS_DESKTOPMOBILE },
        { USB_VENDOR_SILABS,            USB_PRODUCT_SILABS_IPLINK1220 },
        { USB_VENDOR_SILABS,            USB_PRODUCT_SILABS_LIPOWSKY_HARP },
        { USB_VENDOR_SILABS,            USB_PRODUCT_SILABS_LIPOWSKY_JTAG },
        { USB_VENDOR_SILABS,            USB_PRODUCT_SILABS_LIPOWSKY_LIN },
        { USB_VENDOR_SILABS,            USB_PRODUCT_SILABS_POLOLU },
        { USB_VENDOR_SILABS,            USB_PRODUCT_SILABS_CP210X_1 },
        { USB_VENDOR_SILABS,            USB_PRODUCT_SILABS_CP210X_2 },
        { USB_VENDOR_SILABS,            USB_PRODUCT_SILABS_SUNNTO },
        { USB_VENDOR_SILABS2,           USB_PRODUCT_SILABS2_DCU11CLONE },
        { USB_VENDOR_USI,               USB_PRODUCT_USI_MC60 },
        { USB_VENDOR_WMR,                USB_PRODUCT_WMR_RIGBLASTER },
};

static int uslsa_match(device_t, cfdata_t, void *);
static void uslsa_attach(device_t, device_t, void *);
static void uslsa_childdet(device_t, device_t);
static int uslsa_detach(device_t, int);

CFATTACH_DECL2_NEW(uslsa, sizeof(struct uslsa_softc), uslsa_match,
    uslsa_attach, uslsa_detach, NULL, NULL, uslsa_childdet);

static int
uslsa_match(device_t parent, cfdata_t match, void *aux)
{
        const struct usbif_attach_arg *uiaa = aux;

        if (usb_lookup(uslsa_devs, uiaa->uiaa_vendor, uiaa->uiaa_product)
            != NULL)
                return UMATCH_VENDOR_PRODUCT;
        else
                return UMATCH_NONE;
}

static void
uslsa_attach(device_t parent, device_t self, void *aux)
{
        struct uslsa_softc *sc;
        const struct usbif_attach_arg *uiaa = aux;
        const usb_interface_descriptor_t *id;
        const usb_endpoint_descriptor_t *ed;
        char *devinfop;
        struct ucom_attach_args ucaa;
        int i;

        sc = device_private(self);

        sc->sc_dev = self;
        sc->sc_udev = uiaa->uiaa_device;
        sc->sc_iface = uiaa->uiaa_iface;
        sc->sc_dying = false;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(sc->sc_udev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        id = usbd_get_interface_descriptor(sc->sc_iface);

        sc->sc_ifnum = id->bInterfaceNumber;

        ucaa.ucaa_info = "Silicon Labs CP210x";
        ucaa.ucaa_portno = UCOM_UNK_PORTNO;
        ucaa.ucaa_ibufsize = USLSA_BUFSIZE;
        ucaa.ucaa_obufsize = USLSA_BUFSIZE;
        ucaa.ucaa_ibufsizepad = USLSA_BUFSIZE;
        ucaa.ucaa_opkthdrlen = 0;
        ucaa.ucaa_device = sc->sc_udev;
        ucaa.ucaa_iface = sc->sc_iface;
        ucaa.ucaa_methods = &uslsa_methods;
        ucaa.ucaa_arg = sc;

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        ucaa.ucaa_bulkin = ucaa.ucaa_bulkout = -1;
        for (i = 0; i < id->bNumEndpoints; i++) {
                int addr, dir, attr;

                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "could not read endpoint descriptor\n");
                        sc->sc_dying = true;
                        return;
                }
                addr = ed->bEndpointAddress;
                dir = UE_GET_DIR(ed->bEndpointAddress);
                attr = ed->bmAttributes & UE_XFERTYPE;
                if (dir == UE_DIR_IN && attr == UE_BULK) {
                        ucaa.ucaa_bulkin = addr;
                } else if (dir == UE_DIR_OUT && attr == UE_BULK) {
                        ucaa.ucaa_bulkout = addr;
                } else {
                        aprint_error_dev(self, "unexpected endpoint\n");
                }
        }
        aprint_debug_dev(sc->sc_dev, "EPs: in=%#x out=%#x\n",
                ucaa.ucaa_bulkin, ucaa.ucaa_bulkout);
        if ((ucaa.ucaa_bulkin == -1) || (ucaa.ucaa_bulkout == -1)) {
                aprint_error_dev(self, "could not find endpoints\n");
                sc->sc_dying = true;
                return;
        }

        sc->sc_subdev = config_found(self, &ucaa, ucomprint,
            CFARGS(.submatch = ucomsubmatch));

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");
        
        return;
}

static void
uslsa_childdet(device_t self, device_t child)
{
        struct uslsa_softc *sc = device_private(self);

        KASSERT(sc->sc_subdev == child);
        sc->sc_subdev = NULL;
}

static int
uslsa_detach(device_t self, int flags)
{
        struct uslsa_softc *sc = device_private(self);
        int rv = 0;

        DPRINTF((self, "%s(%p, %#x)\n", __func__, self, flags));

        sc->sc_dying = true;

        if (sc->sc_subdev != NULL) {
                rv = config_detach(sc->sc_subdev, flags);
                sc->sc_subdev = NULL;
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return rv;
}

static int
uslsa_usbd_errno(usbd_status status)
{
        switch (status) {
        case USBD_NORMAL_COMPLETION:
                return 0;
        case USBD_STALLED:
                return EINVAL;
        default:
                return EIO;
        }
}

static void
uslsa_get_status(void *vsc, int portno, u_char *lsr, u_char *msr)
{
        struct uslsa_softc *sc;
        usb_device_request_t req;
        usbd_status status;
        uint8_t mdmsts;

        sc = vsc;

        DPRINTF((sc->sc_dev, "%s(%p, %d, ....)\n", __func__, vsc, portno));

        if (sc->sc_dying)
                return;

        req.bmRequestType = UT_READ_VENDOR_INTERFACE;
        req.bRequest = SLSA_R_GET_MDMSTS;
        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_ifnum);
        USETW(req.wLength, SLSA_RL_GET_MDMSTS);

        status = usbd_do_request(sc->sc_udev, &req, &mdmsts);
        if (status != USBD_NORMAL_COMPLETION) {
                device_printf(sc->sc_dev, "%s: GET_MDMSTS %s\n",
                    __func__, usbd_errstr(status));
                return;
        }

        DPRINTF((sc->sc_dev, "%s: GET_MDMSTS %#x\n", __func__, mdmsts));

        *lsr = 0;

        *msr  = ISSET(mdmsts, SLSA_MDMSTS_CTS) ? UMSR_CTS : 0;
        *msr |= ISSET(mdmsts, SLSA_MDMSTS_DSR) ? UMSR_DSR : 0;
        *msr |= ISSET(mdmsts, SLSA_MDMSTS_RI) ? UMSR_RI : 0;
        *msr |= ISSET(mdmsts, SLSA_MDMSTS_DCD) ? UMSR_DCD : 0;
}

static void
uslsa_set(void *vsc, int portno, int reg, int onoff)
{
        struct uslsa_softc *sc;

        sc = vsc;

        DPRINTF((sc->sc_dev, "%s(%p, %d, %d, %d)\n", __func__, vsc, portno,
            reg, onoff));

        if (sc->sc_dying)
                return;

        switch (reg) {
        case UCOM_SET_DTR:
                if (uslsa_request_set(sc, SLSA_R_SET_MHS,
                    SLSA_RV_SET_MHS_DTR_MASK |
                    (onoff ? SLSA_RV_SET_MHS_DTR : 0))) {
                        device_printf(sc->sc_dev, "SET_MHS/DTR failed\n");
                }
                break;
        case UCOM_SET_RTS:
                if (uslsa_request_set(sc, SLSA_R_SET_MHS,
                    SLSA_RV_SET_MHS_RTS_MASK |
                    (onoff ? SLSA_RV_SET_MHS_RTS : 0))) {
                        device_printf(sc->sc_dev, "SET_MHS/RTS failed\n");
                }
                break;
        case UCOM_SET_BREAK:
                if (uslsa_request_set(sc, SLSA_R_SET_BREAK,
                    (onoff ? SLSA_RV_SET_BREAK_ENABLE :
                     SLSA_RV_SET_BREAK_DISABLE))) {
                        device_printf(sc->sc_dev, "SET_BREAK failed\n");
                }
                break;
        default:
                break;
        }
}

static int
uslsa_param(void *vsc, int portno, struct termios *t)
{
        struct uslsa_softc *sc;
        usb_device_request_t req;
        usbd_status status;
        uint16_t value;
        uint32_t baud;
        int ret;

        sc = vsc;

        DPRINTF((sc->sc_dev, "%s(%p, %d, %p)\n", __func__, vsc, portno, t));

        if (sc->sc_dying)
                return EIO;

        req.bmRequestType = UT_WRITE_VENDOR_INTERFACE;
        req.bRequest = SLSA_R_SET_BAUDRATE;
        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_ifnum);
        USETW(req.wLength, 4);

        baud = htole32(t->c_ospeed);
        status = usbd_do_request(sc->sc_udev, &req, &baud);
        if (status != USBD_NORMAL_COMPLETION) {
                /* fallback method for devices that don't know SET_BAUDRATE */
                /* hope we calculate it right */
                device_printf(sc->sc_dev, "%s: set baudrate %d, failed %s,"
                                " using set bauddiv\n",
                    __func__, baud, usbd_errstr(status));

                value = SLSA_RV_BAUDDIV(t->c_ospeed);
                if ((ret = uslsa_request_set(sc, SLSA_R_SET_BAUDDIV, value))
                    != 0) {
                        device_printf(sc->sc_dev, "%s: SET_BAUDDIV failed\n",
                               __func__);
                        return ret;
                }
        }

        value = 0;

        if (ISSET(t->c_cflag, CSTOPB)) {
                value |= SLSA_RV_LINE_CTL_STOP_2;
        } else {
                value |= SLSA_RV_LINE_CTL_STOP_1;
        }

        if (ISSET(t->c_cflag, PARENB)) {
                if (ISSET(t->c_cflag, PARODD)) {
                        value |= SLSA_RV_LINE_CTL_PARITY_ODD;
                } else {
                        value |= SLSA_RV_LINE_CTL_PARITY_EVEN;
                }
        } else {
                value |= SLSA_RV_LINE_CTL_PARITY_NONE;
        }

        switch (ISSET(t->c_cflag, CSIZE)) {
        case CS5:
                value |= SLSA_RV_LINE_CTL_LEN_5;
                break;
        case CS6:
                value |= SLSA_RV_LINE_CTL_LEN_6;
                break;
        case CS7:
                value |= SLSA_RV_LINE_CTL_LEN_7;
                break;
        case CS8:
                value |= SLSA_RV_LINE_CTL_LEN_8;
                break;
        }

        DPRINTF((sc->sc_dev, "%s: setting LINE_CTL to %#x\n",
            __func__, value));
        if ((ret = uslsa_request_set(sc, SLSA_R_SET_LINE_CTL, value)) != 0) {
                device_printf(sc->sc_dev, "SET_LINE_CTL failed\n");
                return ret;
        }

        if ((ret = uslsa_set_flow(sc, t->c_cflag, t->c_iflag)) != 0) {
                device_printf(sc->sc_dev, "SET_LINE_CTL failed\n");
        }

        return ret;
}

static int
uslsa_ioctl(void *vsc, int portno, u_long cmd, void *data, int flag, proc_t *p)
{
        struct uslsa_softc *sc;

        sc = vsc;

        if (sc->sc_dying)
                return EIO;

        switch (cmd) {
        case TIOCMGET:
                ucom_status_change(device_private(sc->sc_subdev));
                return EPASSTHROUGH;
        default:
                return EPASSTHROUGH;
        }

        return 0;
}

static int
uslsa_open(void *vsc, int portno)
{
        struct uslsa_softc *sc;

        sc = vsc;

        DPRINTF((sc->sc_dev, "%s(%p, %d)\n", __func__, vsc, portno));

        if (sc->sc_dying)
                return EIO;

        return uslsa_request_set(sc, SLSA_R_IFC_ENABLE,
            SLSA_RV_IFC_ENABLE_ENABLE);
}

static void
uslsa_close(void *vsc, int portno)
{
        struct uslsa_softc *sc;

        sc = vsc;

        DPRINTF((sc->sc_dev, "%s(%p, %d)\n", __func__, vsc, portno));

        if (sc->sc_dying)
                return;

        uslsa_request_set(sc, SLSA_R_IFC_ENABLE, SLSA_RV_IFC_ENABLE_DISABLE);
}

static int
uslsa_request_set(struct uslsa_softc * sc, uint8_t request, uint16_t value)
{
        usb_device_request_t req;
        usbd_status status;

        req.bmRequestType = UT_WRITE_VENDOR_INTERFACE;
        req.bRequest = request;
        USETW(req.wValue, value);
        USETW(req.wIndex, sc->sc_ifnum);
        USETW(req.wLength, 0);

        status = usbd_do_request(sc->sc_udev, &req, NULL);

        return uslsa_usbd_errno(status);
}

static int
uslsa_set_flow(struct uslsa_softc *sc, tcflag_t cflag, tcflag_t iflag)
{
        struct slsa_fcs fcs;
        usb_device_request_t req;
        uint32_t ulControlHandshake;
        uint32_t ulFlowReplace;
        usbd_status status;

        DPRINTF((sc->sc_dev, "%s(%p, %#x, %#x)\n", __func__, sc, cflag, iflag));

        req.bmRequestType = UT_READ_VENDOR_INTERFACE;
        req.bRequest = SLSA_R_GET_FLOW;
        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_ifnum);
        USETW(req.wLength, SLSA_RL_GET_FLOW);

        status = usbd_do_request(sc->sc_udev, &req, &fcs);
        if (status != USBD_NORMAL_COMPLETION) {
                device_printf(sc->sc_dev, "%s: GET_FLOW %s\n",
                        __func__, usbd_errstr(status));
                return uslsa_usbd_errno(status);
        }

        ulControlHandshake = le32toh(fcs.ulControlHandshake);
        ulFlowReplace = le32toh(fcs.ulFlowReplace);

        if (ISSET(cflag, CRTSCTS)) {
                ulControlHandshake =
                    SERIAL_CTS_HANDSHAKE | __SHIFTIN(1, SERIAL_DTR_MASK);
                ulFlowReplace = __SHIFTIN(2, SERIAL_RTS_MASK);
        } else {
                ulControlHandshake = __SHIFTIN(1, SERIAL_DTR_MASK);
                ulFlowReplace = __SHIFTIN(1, SERIAL_RTS_MASK);
        }

        fcs.ulControlHandshake = htole32(ulControlHandshake);
        fcs.ulFlowReplace = htole32(ulFlowReplace);

        req.bmRequestType = UT_WRITE_VENDOR_INTERFACE;
        req.bRequest = SLSA_R_SET_FLOW;
        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_ifnum);
        USETW(req.wLength, SLSA_RL_SET_FLOW);

        status = usbd_do_request(sc->sc_udev, &req, &fcs);

        return uslsa_usbd_errno(status);
}



































































































    3 









    2 


    1 





    3 



    1 



    1 























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
/*        $NetBSD: null_vfsops.c,v 1.99 2020/04/13 19:23:19 ad Exp $        */

/*
 * Copyright (c) 1999 National Aeronautics & Space Administration
 * All rights reserved.
 *
 * This software was written by William Studenmund of the
 * Numerical Aerospace Simulation Facility, NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the National Aeronautics & Space Administration
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this software without specific prior written
 *    permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NATIONAL AERONAUTICS & SPACE ADMINISTRATION
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE ADMINISTRATION OR CONTRIB-
 * UTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1992, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software donated to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        from: Id: lofs_vfsops.c,v 1.9 1992/05/30 10:26:24 jsp Exp
 *        from: @(#)lofs_vfsops.c        1.2 (Berkeley) 6/18/92
 *        @(#)null_vfsops.c        8.7 (Berkeley) 5/14/95
 */

/*
 * Null file-system: VFS operations.
 *
 * See null_vnops.c for a description.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: null_vfsops.c,v 1.99 2020/04/13 19:23:19 ad Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/module.h>

#include <miscfs/nullfs/null.h>
#include <miscfs/genfs/layer_extern.h>

MODULE(MODULE_CLASS_VFS, null, "layerfs");

VFS_PROTOS(nullfs);

int
nullfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct vnode *lowerrootvp, *vp;
        struct null_args *args = data;
        struct null_mount *nmp;
        struct layer_mount *lmp;
        struct pathbuf *pb;
        struct nameidata nd;
        int error;

        if (args == NULL)
                return EINVAL;
        if (*data_len < sizeof(*args))
                return EINVAL;

        if (mp->mnt_flag & MNT_GETARGS) {
                lmp = MOUNTTOLAYERMOUNT(mp);
                if (lmp == NULL)
                        return EIO;
                args->la.target = NULL;
                *data_len = sizeof(*args);
                return 0;
        }

        /* Update is not supported. */
        if (mp->mnt_flag & MNT_UPDATE)
                return EOPNOTSUPP;

        /* Find the lower vnode and lock it. */
        error = pathbuf_copyin(args->la.target, &pb);
        if (error) {
                return error;
        }
        NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, pb);
        if ((error = namei(&nd)) != 0) {
                pathbuf_destroy(pb);
                return error;
        }
        lowerrootvp = nd.ni_vp;
        pathbuf_destroy(pb);

        /* Create the mount point. */
        nmp = kmem_zalloc(sizeof(struct null_mount), KM_SLEEP);
        mp->mnt_data = nmp;
        mp->mnt_iflag |= IMNT_MPSAFE;
        mp->mnt_iflag |= lowerrootvp->v_mount->mnt_iflag & IMNT_SHRLOOKUP;

        /*
         * Make sure that the mount point is sufficiently initialized
         * that the node create call will work.
         */
        vfs_getnewfsid(mp);
        mp->mnt_lower = lowerrootvp->v_mount;

        nmp->nullm_size = sizeof(struct null_node);
        nmp->nullm_tag = VT_NULL;
        nmp->nullm_bypass = layer_bypass;
        nmp->nullm_vnodeop_p = null_vnodeop_p;

        /* Setup a null node for root vnode. */
        VOP_UNLOCK(lowerrootvp);
        error = layer_node_create(mp, lowerrootvp, &vp);
        if (error) {
                vrele(lowerrootvp);
                kmem_free(nmp, sizeof(struct null_mount));
                return error;
        }
        /*
         * Keep a held reference to the root vnode.  It will be released on
         * umount.  Note: nullfs is MP-safe.
         */
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        vp->v_vflag |= VV_ROOT;
        nmp->nullm_rootvp = vp;
        VOP_UNLOCK(vp);

        error = set_statvfs_info(path, UIO_USERSPACE, args->la.target,
            UIO_USERSPACE, mp->mnt_op->vfs_name, mp, curlwp);
        if (error)
                return error;

        if (mp->mnt_lower->mnt_flag & MNT_LOCAL)
                mp->mnt_flag |= MNT_LOCAL;
        return 0;
}

int
nullfs_unmount(struct mount *mp, int mntflags)
{
        struct null_mount *nmp = MOUNTTONULLMOUNT(mp);
        struct vnode *null_rootvp = nmp->nullm_rootvp;
        int error, flags = 0;

        if (mntflags & MNT_FORCE)
                flags |= FORCECLOSE;

        if (vrefcnt(null_rootvp) > 1 && (mntflags & MNT_FORCE) == 0)
                return EBUSY;

        if ((error = vflush(mp, null_rootvp, flags)) != 0)
                return error;

        /* Eliminate all activity and release the vnode. */
        vgone(null_rootvp);

        /* Finally, destroy the mount point structures. */
        kmem_free(mp->mnt_data, sizeof(struct null_mount));
        mp->mnt_data = NULL;
        return 0;
}

extern const struct vnodeopv_desc null_vnodeop_opv_desc;

const struct vnodeopv_desc * const nullfs_vnodeopv_descs[] = {
        &null_vnodeop_opv_desc,
        NULL,
};

struct vfsops nullfs_vfsops = {
        .vfs_name = MOUNT_NULL,
        .vfs_min_mount_data = sizeof (struct null_args),
        .vfs_mount = nullfs_mount,
        .vfs_start = layerfs_start,
        .vfs_unmount = nullfs_unmount,
        .vfs_root = layerfs_root,
        .vfs_quotactl = layerfs_quotactl,
        .vfs_statvfs = layerfs_statvfs,
        .vfs_sync = layerfs_sync,
        .vfs_loadvnode = layerfs_loadvnode,
        .vfs_vget = layerfs_vget,
        .vfs_fhtovp = layerfs_fhtovp,
        .vfs_vptofh = layerfs_vptofh,
        .vfs_init = layerfs_init,
        .vfs_done = layerfs_done,
        .vfs_snapshot = layerfs_snapshot,
        .vfs_extattrctl = vfs_stdextattrctl,
        .vfs_suspendctl = layerfs_suspendctl,
        .vfs_renamelock_enter = layerfs_renamelock_enter,
        .vfs_renamelock_exit = layerfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = nullfs_vnodeopv_descs
};

SYSCTL_SETUP(nullfs_sysctl_setup, "nullfs sysctl")
{

        sysctl_createv(clog, 0, NULL, NULL,
            CTLFLAG_PERMANENT,
            CTLTYPE_NODE, "null",
            SYSCTL_DESCR("Loopback file system"),
            NULL, 0, NULL, 0,
            CTL_VFS, 9, CTL_EOL);
        /*
         * XXX the "9" above could be dynamic, thereby eliminating
         * one more instance of the "number to vfs" mapping problem,
         * but "9" is the order as taken from sys/mount.h
         */
}

static int
null_modcmd(modcmd_t cmd, void *arg)
{
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = vfs_attach(&nullfs_vfsops);
                if (error != 0)
                        break;
                break;
        case MODULE_CMD_FINI:
                error = vfs_detach(&nullfs_vfsops);
                if (error != 0)
                        break;
                break;
        default:
                error = ENOTTY;
                break;
        }
        return error;
}

































































































   57 

   57 















   58 

   58 








































   58 

   58 


   58 



    8 















   58 


   58 

























   58 
































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
/*        $NetBSD: cprng_fast.c,v 1.17 2022/06/01 15:44:37 riastradh Exp $        */

/*-
 * Copyright (c) 2014 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.17 2022/06/01 15:44:37 riastradh Exp $");

#include <sys/types.h>
#include <sys/param.h>
#include <sys/bitops.h>
#include <sys/cprng.h>
#include <sys/cpu.h>
#include <sys/entropy.h>
#include <sys/evcnt.h>
#include <sys/kmem.h>
#include <sys/percpu.h>

#include <crypto/chacha/chacha.h>

#define        CPRNG_FAST_SEED_BYTES        CHACHA_STREAM_KEYBYTES

struct cprng_fast {
        /* 128-bit vector unit generates 256 bytes at once */
        uint8_t                buf[256];
        uint8_t                key[CPRNG_FAST_SEED_BYTES];
        uint8_t                nonce[CHACHA_STREAM_NONCEBYTES];
        unsigned        i;
        struct evcnt        *reseed_evcnt;
        unsigned        epoch;
};

static void        cprng_fast_init_cpu(void *, void *, struct cpu_info *);
static void        cprng_fast_reseed(struct cprng_fast *);

static void        cprng_fast_seed(struct cprng_fast *, const void *);
static void        cprng_fast_buf(struct cprng_fast *, void *, unsigned);

static void        cprng_fast_buf_short(void *, size_t);
static void        cprng_fast_buf_long(void *, size_t);

static percpu_t        *cprng_fast_percpu        __read_mostly;

void
cprng_fast_init(void)
{

        cprng_fast_percpu = percpu_create(sizeof(struct cprng_fast),
            cprng_fast_init_cpu, NULL, NULL);
}

static void
cprng_fast_init_cpu(void *p, void *arg __unused, struct cpu_info *ci)
{
        struct cprng_fast *const cprng = p;

        cprng->epoch = 0;

        cprng->reseed_evcnt = kmem_alloc(sizeof(*cprng->reseed_evcnt),
            KM_SLEEP);
        evcnt_attach_dynamic(cprng->reseed_evcnt, EVCNT_TYPE_MISC, NULL,
            ci->ci_cpuname, "cprng_fast reseed");
}

static int
cprng_fast_get(struct cprng_fast **cprngp)
{
        struct cprng_fast *cprng;
        int s;

        KASSERT(!cpu_intr_p());

        *cprngp = cprng = percpu_getref(cprng_fast_percpu);
        s = splsoftserial();

        if (__predict_false(cprng->epoch != entropy_epoch())) {
                splx(s);
                cprng_fast_reseed(cprng);
                s = splsoftserial();
        }

        return s;
}

static void
cprng_fast_put(struct cprng_fast *cprng, int s)
{

        KASSERT((cprng == percpu_getref(cprng_fast_percpu)) &&
            (percpu_putref(cprng_fast_percpu), true));
        splx(s);
        percpu_putref(cprng_fast_percpu);
}

static void
cprng_fast_reseed(struct cprng_fast *cprng)
{
        unsigned epoch = entropy_epoch();
        uint8_t seed[CPRNG_FAST_SEED_BYTES];
        int s;

        cprng_strong(kern_cprng, seed, sizeof(seed), 0);

        s = splsoftserial();
        cprng_fast_seed(cprng, seed);
        cprng->epoch = epoch;
        cprng->reseed_evcnt->ev_count++;
        splx(s);

        explicit_memset(seed, 0, sizeof(seed));
}

/* CPRNG algorithm */

static void
cprng_fast_seed(struct cprng_fast *cprng, const void *seed)
{

        (void)memset(cprng->buf, 0, sizeof cprng->buf);
        (void)memcpy(cprng->key, seed, sizeof cprng->key);
        (void)memset(cprng->nonce, 0, sizeof cprng->nonce);
        cprng->i = sizeof cprng->buf;
}

static void
cprng_fast_buf(struct cprng_fast *cprng, void *buf, unsigned len)
{
        uint8_t *p = buf;
        unsigned n = len, n0;

        KASSERT(cprng->i <= sizeof(cprng->buf));
        KASSERT(len <= sizeof(cprng->buf));

        n0 = MIN(n, sizeof(cprng->buf) - cprng->i);
        memcpy(p, &cprng->buf[cprng->i], n0);
        if ((n -= n0) == 0) {
                cprng->i += n0;
                KASSERT(cprng->i <= sizeof(cprng->buf));
                return;
        }
        p += n0;
        le64enc(cprng->nonce, 1 + le64dec(cprng->nonce));
        chacha_stream(cprng->buf, sizeof(cprng->buf), 0, cprng->nonce,
            cprng->key, 8);
        memcpy(p, cprng->buf, n);
        cprng->i = n;
}

/* Public API */

static void
cprng_fast_buf_short(void *buf, size_t len)
{
        struct cprng_fast *cprng;
        int s;

        KASSERT(len <= sizeof(cprng->buf));

        s = cprng_fast_get(&cprng);
        cprng_fast_buf(cprng, buf, len);
        cprng_fast_put(cprng, s);
}

static void
cprng_fast_buf_long(void *buf, size_t len)
{
        uint8_t seed[CHACHA_STREAM_KEYBYTES];
        uint8_t nonce[CHACHA_STREAM_NONCEBYTES] = {0};

        CTASSERT(sizeof(seed) <= sizeof(((struct cprng_fast *)0)->buf));

#if SIZE_MAX >= 0x3fffffffff
        /* >=256 GB is not reasonable */
        KASSERT(len <= 0x3fffffffff);
#endif

        cprng_fast_buf_short(seed, sizeof seed);
        chacha_stream(buf, len, 0, nonce, seed, 8);

        (void)explicit_memset(seed, 0, sizeof seed);
}

uint32_t
cprng_fast32(void)
{
        uint32_t v;

        cprng_fast_buf_short(&v, sizeof v);

        return v;
}

uint64_t
cprng_fast64(void)
{
        uint64_t v;

        cprng_fast_buf_short(&v, sizeof v);

        return v;
}

size_t
cprng_fast(void *buf, size_t len)
{

        /*
         * We don't want to hog the CPU, so we use the short version,
         * to generate output without preemption, only if we can do it
         * with at most one ChaCha call.
         */
        if (len <= sizeof(((struct cprng_fast *)0)->buf))
                cprng_fast_buf_short(buf, len);
        else
                cprng_fast_buf_long(buf, len);

        return len;                /* hysterical raisins */
}











































   29 

   29 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
/*        $NetBSD: in_cksum.c,v 1.22 2008/01/25 21:12:15 joerg Exp $        */

/*
 * Copyright (c) 1988, 1992, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)in_cksum.c        8.1 (Berkeley) 6/10/93
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: in_cksum.c,v 1.22 2008/01/25 21:12:15 joerg Exp $");

#include <sys/param.h>
#include <netinet/in.h>

int
in_cksum(struct mbuf *m, int len)
{
        KASSERT(len >= 0);

        return cpu_in_cksum(m, len, 0, 0);
}































































































































































































































































































  160 









  167 










    8 


















































































































   15 



   15 

   14 

   13 


   13 


   15 




























































































































































































































































































































































































































































































































































    2 















    2 






    2 
    2 
    2 









    2 





















   14 
   13 
   13 
   14 

   13 










   13 








   13 








   10 















   10 


   10 
    1 
    1 
    1 
    1 





    1 



   10 

    1 

   10 

   10 


   10 


















    9 


    1 


   10 


   10 

   10 


   10 



















   10 













   13 





   13 






    2 














    2 









    2 


    2 


    2 


    2 





    2 









    6 
    6 




    6 






    5 
    5 




    5 






    6 
    6 




    6 



















   63 


   63 


   63 


   57 


   63 




   22 

    1 










   22 



















    1 



    1 





    2 



    1 











    5 





    1 















   21 
























    1 

    1 


    2 




    2 







    2 



   11 

    1 


   14 




   14 









    2 


    1 

    2 

    1 



    1 








    2 
    2 




    2 









    1 


    1 
    1 

    1 



    1 
    1 

    1 
    1 
    1 
    1 
    1 


    1 
    1 

    1 







   13 













   13 




   13 


   13 






   15 



   14 


   15 













   15 


   15 




   14 






   14 
   14 

   13 







   14 
    4 


   13 












   13 













   13 




    1 





    1 







   12 





   12 

   12 
   10 

    1 

   13 

    1 
    1 
   12 
   10 

   11 













   13 

   13 
   13 
    1 
   12 
   12 






   13 



   13 




   13 




   13 


   13 

   12 





   12 
   10 



    1 



   12 










   13 






   13 
   12 











   20 











   20 












   20 



















   20 







   20 

   20 




   20 






   20 




   20 



   20 








   20 




























   19 












































   11 

















    8 
    8 


    8 





    8 

    8 





















    8 




    8 


    8 







    8 

    8 










    8 









    8 



































































































































































































































































































































































































































































































































































  142 


  141 

























































































































































  141 












































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
/* $NetBSD: com.c,v 1.373 2021/11/12 21:57:13 jmcneill Exp $ */

/*-
 * Copyright (c) 1998, 1999, 2004, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1991 The Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)com.c        7.5 (Berkeley) 5/16/91
 */

/*
 * COM driver, uses National Semiconductor NS16450/NS16550AF UART
 * Supports automatic hardware flow control on StarTech ST16C650A UART
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: com.c,v 1.373 2021/11/12 21:57:13 jmcneill Exp $");

#include "opt_com.h"
#include "opt_ddb.h"
#include "opt_kgdb.h"
#include "opt_lockdebug.h"
#include "opt_multiprocessor.h"
#include "opt_ntp.h"

/* The COM16650 option was renamed to COM_16650. */
#ifdef COM16650
#error Obsolete COM16650 option; use COM_16650 instead.
#endif

/*
 * Override cnmagic(9) macro before including <sys/systm.h>.
 * We need to know if cn_check_magic triggered debugger, so set a flag.
 * Callers of cn_check_magic must declare int cn_trapped = 0;
 * XXX: this is *ugly*!
 */
#define cn_trap()                                \
        do {                                        \
                console_debugger();                \
                cn_trapped = 1;                        \
                (void)cn_trapped;                \
        } while (/* CONSTCOND */ 0)

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/ioctl.h>
#include <sys/select.h>
#include <sys/poll.h>
#include <sys/tty.h>
#include <sys/proc.h>
#include <sys/conf.h>
#include <sys/file.h>
#include <sys/uio.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <sys/device.h>
#include <sys/malloc.h>
#include <sys/timepps.h>
#include <sys/vnode.h>
#include <sys/kauth.h>
#include <sys/intr.h>
#ifdef RND_COM
#include <sys/rndsource.h>
#endif


#include <sys/bus.h>

#include <dev/ic/comreg.h>
#include <dev/ic/comvar.h>
#include <dev/ic/ns16550reg.h>
#include <dev/ic/st16650reg.h>
#include <dev/ic/hayespreg.h>
#define        com_lcr        com_cfcr
#include <dev/cons.h>

#include "ioconf.h"

#define        CSR_READ_1(r, o)        \
        (r)->cr_read((r), (r)->cr_map[o])
#define        CSR_WRITE_1(r, o, v)        \
        (r)->cr_write((r), (r)->cr_map[o], (v))
#define        CSR_WRITE_MULTI(r, o, p, n)        \
        (r)->cr_write_multi((r), (r)->cr_map[o], (p), (n))

/*
 * XXX COM_TYPE_AU1x00 specific
 */
#define        CSR_WRITE_2(r, o, v)        \
        bus_space_write_2((r)->cr_iot, (r)->cr_ioh, (r)->cr_map[o], v)
#define        CSR_READ_2(r, o)        \
        bus_space_read_2((r)->cr_iot, (r)->cr_ioh, (r)->cr_map[o])

static void com_enable_debugport(struct com_softc *);

void        com_config(struct com_softc *);
void        com_shutdown(struct com_softc *);
int        comspeed(long, long, int);
static        u_char        cflag2lcr(tcflag_t);
int        comparam(struct tty *, struct termios *);
void        comstart(struct tty *);
int        comhwiflow(struct tty *, int);

void        com_loadchannelregs(struct com_softc *);
void        com_hwiflow(struct com_softc *);
void        com_break(struct com_softc *, int);
void        com_modem(struct com_softc *, int);
void        tiocm_to_com(struct com_softc *, u_long, int);
int        com_to_tiocm(struct com_softc *);
void        com_iflush(struct com_softc *);

int        com_common_getc(dev_t, struct com_regs *);
static void        com_common_putc(dev_t, struct com_regs *, int, int);

int        cominit(struct com_regs *, int, int, int, tcflag_t);

static int comcnreattach(void);

int        comcngetc(dev_t);
void        comcnputc(dev_t, int);
void        comcnpollc(dev_t, int);

#define        integrate        static inline
void        comsoft(void *);
integrate void com_rxsoft(struct com_softc *, struct tty *);
integrate void com_txsoft(struct com_softc *, struct tty *);
integrate void com_stsoft(struct com_softc *, struct tty *);
integrate void com_schedrx(struct com_softc *);
void        comdiag(void *);

dev_type_open(comopen);
dev_type_close(comclose);
dev_type_read(comread);
dev_type_write(comwrite);
dev_type_ioctl(comioctl);
dev_type_stop(comstop);
dev_type_tty(comtty);
dev_type_poll(compoll);

static struct comcons_info comcons_info;

/*
 * Following are all routines needed for COM to act as console
 */
static struct consdev comcons = {
        .cn_getc = comcngetc,
        .cn_putc = comcnputc,
        .cn_pollc = comcnpollc,
        .cn_dev = NODEV,
        .cn_pri = CN_NORMAL
};


const struct cdevsw com_cdevsw = {
        .d_open = comopen,
        .d_close = comclose,
        .d_read = comread,
        .d_write = comwrite,
        .d_ioctl = comioctl,
        .d_stop = comstop,
        .d_tty = comtty,
        .d_poll = compoll,
        .d_mmap = nommap,
        .d_kqfilter = ttykqfilter,
        .d_discard = nodiscard,
        .d_flag = D_TTY
};

/*
 * Make this an option variable one can patch.
 * But be warned:  this must be a power of 2!
 */
u_int com_rbuf_size = COM_RING_SIZE;

/* Stop input when 3/4 of the ring is full; restart when only 1/4 is full. */
u_int com_rbuf_hiwat = (COM_RING_SIZE * 1) / 4;
u_int com_rbuf_lowat = (COM_RING_SIZE * 3) / 4;

static int comconsattached;
static struct cnm_state com_cnm_state;

#ifdef KGDB
#include <sys/kgdb.h>

static struct com_regs comkgdbregs;
static int com_kgdb_attached;

int        com_kgdb_getc(void *);
void        com_kgdb_putc(void *, int);
#endif /* KGDB */

/* initializer for typical 16550-ish hardware */
static const bus_size_t com_std_map[COM_REGMAP_NENTRIES] = {
        [COM_REG_RXDATA]        =        com_data,
        [COM_REG_TXDATA]        =        com_data,
        [COM_REG_DLBL]                =        com_dlbl,
        [COM_REG_DLBH]                =        com_dlbh,
        [COM_REG_IER]                =        com_ier,
        [COM_REG_IIR]                =        com_iir,
        [COM_REG_FIFO]                =        com_fifo,
        [COM_REG_TCR]                =        com_fifo,
        [COM_REG_EFR]                =        com_efr,
        [COM_REG_TLR]                =        com_efr,
        [COM_REG_LCR]                =        com_lcr,
        [COM_REG_MCR]                =        com_mcr,
        [COM_REG_LSR]                =        com_lsr,
        [COM_REG_MSR]                =        com_msr,
        [COM_REG_USR]                =        com_usr,
        [COM_REG_TFL]                =        com_tfl,
        [COM_REG_RFL]                =        com_rfl,
        [COM_REG_HALT]                =        com_halt,
        [COM_REG_MDR1]                =        com_mdr1,
};

#define        COMDIALOUT_MASK        TTDIALOUT_MASK

#define        COMUNIT(x)        TTUNIT(x)
#define        COMDIALOUT(x)        TTDIALOUT(x)

#define        COM_ISALIVE(sc)        ((sc)->enabled != 0 && \
                         device_is_active((sc)->sc_dev))

#define        BR        BUS_SPACE_BARRIER_READ
#define        BW        BUS_SPACE_BARRIER_WRITE
#define COM_BARRIER(r, f) \
        bus_space_barrier((r)->cr_iot, (r)->cr_ioh, 0, (r)->cr_nports, (f))

/*
 * com_read_1 --
 *        Default register read callback using single byte accesses.
 */
static uint8_t
com_read_1(struct com_regs *regs, u_int reg)
{
        return bus_space_read_1(regs->cr_iot, regs->cr_ioh, reg);
}

/*
 * com_write_1 --
 *        Default register write callback using single byte accesses.
 */
static void
com_write_1(struct com_regs *regs, u_int reg, uint8_t val)
{
        bus_space_write_1(regs->cr_iot, regs->cr_ioh, reg, val);
}

/*
 * com_write_multi_1 --
 *        Default register multi write callback using single byte accesses.
 */
static void
com_write_multi_1(struct com_regs *regs, u_int reg, const uint8_t *datap,
    bus_size_t count)
{
        bus_space_write_multi_1(regs->cr_iot, regs->cr_ioh, reg, datap, count);
}

/*
 * com_read_4 --
 *        Default register read callback using dword accesses.
 */
static uint8_t
com_read_4(struct com_regs *regs, u_int reg)
{
        return bus_space_read_4(regs->cr_iot, regs->cr_ioh, reg) & 0xff;
}

/*
 * com_write_4 --
 *        Default register write callback using dword accesses.
 */
static void
com_write_4(struct com_regs *regs, u_int reg, uint8_t val)
{
        bus_space_write_4(regs->cr_iot, regs->cr_ioh, reg, val);
}

/*
 * com_write_multi_4 --
 *        Default register multi write callback using dword accesses.
 */
static void
com_write_multi_4(struct com_regs *regs, u_int reg, const uint8_t *datap,
    bus_size_t count)
{
        while (count-- > 0) {
                bus_space_write_4(regs->cr_iot, regs->cr_ioh, reg, *datap++);
        }
}

/*
 * com_init_regs --
 *        Driver front-ends use this to initialize our register map
 *        in the standard fashion.  They may then tailor the map to
 *        their own particular requirements.
 */
void
com_init_regs(struct com_regs *regs, bus_space_tag_t st, bus_space_handle_t sh,
              bus_addr_t addr)
{

        memset(regs, 0, sizeof(*regs));
        regs->cr_iot = st;
        regs->cr_ioh = sh;
        regs->cr_iobase = addr;
        regs->cr_nports = COM_NPORTS;
        regs->cr_read = com_read_1;
        regs->cr_write = com_write_1;
        regs->cr_write_multi = com_write_multi_1;
        memcpy(regs->cr_map, com_std_map, sizeof(regs->cr_map));
}

/*
 * com_init_regs_stride --
 *        Convenience function for front-ends that have a stride between
 *        registers.
 */
void
com_init_regs_stride(struct com_regs *regs, bus_space_tag_t st,
                     bus_space_handle_t sh, bus_addr_t addr, u_int regshift)
{

        com_init_regs(regs, st, sh, addr);
        for (size_t i = 0; i < __arraycount(regs->cr_map); i++) {
                regs->cr_map[i] <<= regshift;
        }
        regs->cr_nports <<= regshift;
}

/*
 * com_init_regs_stride_width --
 *        Convenience function for front-ends that have a stride between
 *        registers and specific I/O width requirements.
 */
void
com_init_regs_stride_width(struct com_regs *regs, bus_space_tag_t st,
                           bus_space_handle_t sh, bus_addr_t addr,
                           u_int regshift, u_int width)
{

        com_init_regs(regs, st, sh, addr);
        for (size_t i = 0; i < __arraycount(regs->cr_map); i++) {
                regs->cr_map[i] <<= regshift;
        }
        regs->cr_nports <<= regshift;

        switch (width) {
        case 1:
                /* Already set by com_init_regs */
                break;
        case 4:
                regs->cr_read = com_read_4;
                regs->cr_write = com_write_4;
                regs->cr_write_multi = com_write_multi_4;
                break;
        default:
                panic("com: unsupported I/O width %d", width);
        }
}

/*ARGSUSED*/
int
comspeed(long speed, long frequency, int type)
{
#define        divrnd(n, q)        (((n)*2/(q)+1)/2)        /* divide and round off */

        int x, err;
        int divisor = 16;

        if ((type == COM_TYPE_OMAP) && (speed > 230400)) {
            divisor = 13;
        }

        if (speed == 0)
                return (0);
        if (speed < 0)
                return (-1);
        x = divrnd(frequency / divisor, speed);
        if (x <= 0)
                return (-1);
        err = divrnd(((quad_t)frequency) * 1000 / divisor, speed * x) - 1000;
        if (err < 0)
                err = -err;
        if (err > COM_TOLERANCE)
                return (-1);
        return (x);

#undef        divrnd
}

#ifdef COM_DEBUG
int        com_debug = 0;

void comstatus(struct com_softc *, const char *);
void
comstatus(struct com_softc *sc, const char *str)
{
        struct tty *tp = sc->sc_tty;

        aprint_normal_dev(sc->sc_dev,
            "%s %cclocal  %cdcd %cts_carr_on %cdtr %ctx_stopped\n",
            str,
            ISSET(tp->t_cflag, CLOCAL) ? '+' : '-',
            ISSET(sc->sc_msr, MSR_DCD) ? '+' : '-',
            ISSET(tp->t_state, TS_CARR_ON) ? '+' : '-',
            ISSET(sc->sc_mcr, MCR_DTR) ? '+' : '-',
            sc->sc_tx_stopped ? '+' : '-');

        aprint_normal_dev(sc->sc_dev,
            "%s %ccrtscts %ccts %cts_ttstop  %crts rx_flags=0x%x\n",
            str,
            ISSET(tp->t_cflag, CRTSCTS) ? '+' : '-',
            ISSET(sc->sc_msr, MSR_CTS) ? '+' : '-',
            ISSET(tp->t_state, TS_TTSTOP) ? '+' : '-',
            ISSET(sc->sc_mcr, MCR_RTS) ? '+' : '-',
            sc->sc_rx_flags);
}
#endif

int
com_probe_subr(struct com_regs *regs)
{

        /* force access to id reg */
        CSR_WRITE_1(regs, COM_REG_LCR, LCR_8BITS);
        CSR_WRITE_1(regs, COM_REG_IIR, 0);
        if ((CSR_READ_1(regs, COM_REG_LCR) != LCR_8BITS) ||
            (CSR_READ_1(regs, COM_REG_IIR) & 0x38))
                return (0);

        return (1);
}

int
comprobe1(bus_space_tag_t iot, bus_space_handle_t ioh)
{
        struct com_regs        regs;

        com_init_regs(&regs, iot, ioh, 0/*XXX*/);

        return com_probe_subr(&regs);
}

/*
 * No locking in this routine; it is only called during attach,
 * or with the port already locked.
 */
static void
com_enable_debugport(struct com_softc *sc)
{

        /* Turn on line break interrupt, set carrier. */
        sc->sc_ier = IER_ERLS;
        if (sc->sc_type == COM_TYPE_PXA2x0)
                sc->sc_ier |= IER_EUART | IER_ERXTOUT;
        if (sc->sc_type == COM_TYPE_INGENIC ||
            sc->sc_type == COM_TYPE_TEGRA)
                sc->sc_ier |= IER_ERXTOUT;
        CSR_WRITE_1(&sc->sc_regs, COM_REG_IER, sc->sc_ier);
        SET(sc->sc_mcr, MCR_DTR | MCR_RTS);
        CSR_WRITE_1(&sc->sc_regs, COM_REG_MCR, sc->sc_mcr);
}

static void
com_intr_poll(void *arg)
{
        struct com_softc * const sc = arg;

        comintr(sc);

        callout_schedule(&sc->sc_poll_callout, sc->sc_poll_ticks);
}

void
com_attach_subr(struct com_softc *sc)
{
        struct com_regs *regsp = &sc->sc_regs;
        struct tty *tp;
        uint32_t cpr;
        uint8_t lcr;
        const char *fifo_msg = NULL;
        prop_dictionary_t dict;
        bool is_console = true;
        bool force_console = false;

        aprint_naive("\n");

        dict = device_properties(sc->sc_dev);
        prop_dictionary_get_bool(dict, "is_console", &is_console);
        prop_dictionary_get_bool(dict, "force_console", &force_console);
        callout_init(&sc->sc_diag_callout, 0);
        callout_init(&sc->sc_poll_callout, 0);
        callout_setfunc(&sc->sc_poll_callout, com_intr_poll, sc);
        mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_HIGH);

#if defined(COM_16650)
        sc->sc_type = COM_TYPE_16650;
#elif defined(COM_16750)
        sc->sc_type = COM_TYPE_16750;
#elif defined(COM_HAYESP)
        sc->sc_type = COM_TYPE_HAYESP;
#elif defined(COM_PXA2X0)
        sc->sc_type = COM_TYPE_PXA2x0;
#endif

        /* Disable interrupts before configuring the device. */
        if (sc->sc_type == COM_TYPE_PXA2x0)
                sc->sc_ier = IER_EUART;
        else
                sc->sc_ier = 0;

        CSR_WRITE_1(regsp, COM_REG_IER, sc->sc_ier);

        if ((bus_space_is_equal(regsp->cr_iot, comcons_info.regs.cr_iot) &&
            regsp->cr_iobase == comcons_info.regs.cr_iobase) || force_console) {
                comconsattached = 1;

                if (force_console)
                        memcpy(regsp, &comcons_info.regs, sizeof(*regsp));

                if (cn_tab == NULL && comcnreattach() != 0) {
                        printf("can't re-init serial console @%lx\n",
                            (u_long)comcons_info.regs.cr_iobase);
                }

                switch (sc->sc_type) {
                case COM_TYPE_16750:
                case COM_TYPE_DW_APB:
                        /* Use in comintr(). */
                         sc->sc_lcr = cflag2lcr(comcons_info.cflag);
                        break;
                }

                /* Make sure the console is always "hardwired". */
                delay(10000);                        /* wait for output to finish */
                if (is_console) {
                        SET(sc->sc_hwflags, COM_HW_CONSOLE);
                }

                SET(sc->sc_swflags, TIOCFLAG_SOFTCAR);
        }

        /* Probe for FIFO */
        switch (sc->sc_type) {
        case COM_TYPE_HAYESP:
                goto fifodone;

        case COM_TYPE_AU1x00:
                sc->sc_fifolen = 16;
                fifo_msg = "Au1X00 UART";
                SET(sc->sc_hwflags, COM_HW_FIFO);
                goto fifodelay;

        case COM_TYPE_16550_NOERS:
                sc->sc_fifolen = 16;
                fifo_msg = "ns16650, no ERS";
                SET(sc->sc_hwflags, COM_HW_FIFO);
                goto fifodelay;

        case COM_TYPE_OMAP:
                sc->sc_fifolen = 64;
                fifo_msg = "OMAP UART";
                SET(sc->sc_hwflags, COM_HW_FIFO);
                goto fifodelay;

        case COM_TYPE_INGENIC:
                sc->sc_fifolen = 16;
                fifo_msg = "Ingenic UART";
                SET(sc->sc_hwflags, COM_HW_FIFO);
                SET(sc->sc_hwflags, COM_HW_NOIEN);
                goto fifodelay;

        case COM_TYPE_TEGRA:
                sc->sc_fifolen = 8;
                fifo_msg = "Tegra UART";
                SET(sc->sc_hwflags, COM_HW_FIFO);
                CSR_WRITE_1(regsp, COM_REG_FIFO,
                    FIFO_ENABLE | FIFO_RCV_RST | FIFO_XMT_RST | FIFO_TRIGGER_1);
                goto fifodelay;

        case COM_TYPE_BCMAUXUART:
                sc->sc_fifolen = 1;
                fifo_msg = "BCM AUX UART";
                SET(sc->sc_hwflags, COM_HW_FIFO);
                CSR_WRITE_1(regsp, COM_REG_FIFO,
                    FIFO_ENABLE | FIFO_RCV_RST | FIFO_XMT_RST | FIFO_TRIGGER_1);
                goto fifodelay;

        case COM_TYPE_DW_APB:
                if (!prop_dictionary_get_uint(dict, "fifolen", &sc->sc_fifolen)) {
                        cpr = bus_space_read_4(sc->sc_regs.cr_iot,
                            sc->sc_regs.cr_ioh, DW_APB_UART_CPR);
                        sc->sc_fifolen = __SHIFTOUT(cpr, UART_CPR_FIFO_MODE) * 16;
                }
                if (sc->sc_fifolen == 0) {
                        sc->sc_fifolen = 1;
                        fifo_msg = "DesignWare APB UART, no fifo";
                        CSR_WRITE_1(regsp, COM_REG_FIFO, 0);
                } else {
                        fifo_msg = "DesignWare APB UART";
                        SET(sc->sc_hwflags, COM_HW_FIFO);
                        CSR_WRITE_1(regsp, COM_REG_FIFO,
                            FIFO_ENABLE | FIFO_RCV_RST | FIFO_XMT_RST | FIFO_TRIGGER_1);
                }
                goto fifodelay;
        }

        sc->sc_fifolen = 1;
        /* look for a NS 16550AF UART with FIFOs */
        if (sc->sc_type == COM_TYPE_INGENIC) {
                CSR_WRITE_1(regsp, COM_REG_FIFO,
                    FIFO_ENABLE | FIFO_RCV_RST | FIFO_XMT_RST | 
                    FIFO_TRIGGER_14 | FIFO_UART_ON);
        } else
                CSR_WRITE_1(regsp, COM_REG_FIFO,
                    FIFO_ENABLE | FIFO_RCV_RST | FIFO_XMT_RST | FIFO_TRIGGER_14);
        delay(100);
        if (ISSET(CSR_READ_1(regsp, COM_REG_IIR), IIR_FIFO_MASK)
            == IIR_FIFO_MASK)
                if (ISSET(CSR_READ_1(regsp, COM_REG_FIFO), FIFO_TRIGGER_14)
                    == FIFO_TRIGGER_14) {
                        SET(sc->sc_hwflags, COM_HW_FIFO);

                        fifo_msg = "ns16550a";
                        sc->sc_fifolen = 16;

                        /*
                         * IIR changes into the EFR if LCR is set to LCR_EERS
                         * on 16650s. We also know IIR != 0 at this point.
                         * Write 0 into the EFR, and read it. If the result
                         * is 0, we have a 16650.
                         *
                         * Older 16650s were broken; the test to detect them
                         * is taken from the Linux driver. Apparently
                         * setting DLAB enable gives access to the EFR on
                         * these chips.
                         */
                        if (sc->sc_type == COM_TYPE_16650) {
                                lcr = CSR_READ_1(regsp, COM_REG_LCR);
                                CSR_WRITE_1(regsp, COM_REG_LCR, LCR_EERS);
                                CSR_WRITE_1(regsp, COM_REG_EFR, 0);
                                if (CSR_READ_1(regsp, COM_REG_EFR) == 0) {
                                        CSR_WRITE_1(regsp, COM_REG_LCR,
                                            lcr | LCR_DLAB);
                                        if (CSR_READ_1(regsp, COM_REG_EFR) == 0) {
                                                CLR(sc->sc_hwflags, COM_HW_FIFO);
                                                sc->sc_fifolen = 0;
                                        } else {
                                                SET(sc->sc_hwflags, COM_HW_FLOW);
                                                sc->sc_fifolen = 32;
                                        }
                                } else
                                        sc->sc_fifolen = 16;

                                CSR_WRITE_1(regsp, COM_REG_LCR, lcr);
                                if (sc->sc_fifolen == 0)
                                        fifo_msg = "st16650, broken fifo";
                                else if (sc->sc_fifolen == 32)
                                        fifo_msg = "st16650a";
                                else
                                        fifo_msg = "ns16550a";
                        }

                        /*
                         * TL16C750 can enable 64byte FIFO, only when DLAB
                         * is 1.  However, some 16750 may always enable.  For
                         * example, restrictions according to DLAB in a data
                         * sheet for SC16C750 were not described.
                         * Please enable 'options COM_16650', supposing you
                         * use SC16C750.  Probably 32 bytes of FIFO and HW FLOW
                         * should become effective.
                         */
                        if (sc->sc_type == COM_TYPE_16750) {
                                uint8_t iir1, iir2;
                                uint8_t fcr = FIFO_ENABLE | FIFO_TRIGGER_14;

                                lcr = CSR_READ_1(regsp, COM_REG_LCR);
                                CSR_WRITE_1(regsp, COM_REG_LCR,
                                    lcr & ~LCR_DLAB);
                                CSR_WRITE_1(regsp, COM_REG_FIFO,
                                    fcr | FIFO_64B_ENABLE);
                                iir1 = CSR_READ_1(regsp, COM_REG_IIR);
                                CSR_WRITE_1(regsp, COM_REG_FIFO, fcr);
                                CSR_WRITE_1(regsp, COM_REG_LCR, lcr | LCR_DLAB);
                                CSR_WRITE_1(regsp, COM_REG_FIFO,
                                    fcr | FIFO_64B_ENABLE);
                                iir2 = CSR_READ_1(regsp, COM_REG_IIR);

                                CSR_WRITE_1(regsp, COM_REG_LCR, lcr);

                                if (!ISSET(iir1, IIR_64B_FIFO) &&
                                    ISSET(iir2, IIR_64B_FIFO)) {
                                        /* It is TL16C750. */
                                        sc->sc_fifolen = 64;
                                        SET(sc->sc_hwflags, COM_HW_AFE);
                                } else
                                        CSR_WRITE_1(regsp, COM_REG_FIFO, fcr);

                                if (sc->sc_fifolen == 64)
                                        fifo_msg = "tl16c750";
                                else
                                        fifo_msg = "ns16750";
                        }
                } else
                        fifo_msg = "ns16550, broken fifo";
        else
                fifo_msg = "ns8250 or ns16450, no fifo";
        CSR_WRITE_1(regsp, COM_REG_FIFO, 0);

fifodelay:
        /*
         * Some chips will clear down both Tx and Rx FIFOs when zero is
         * written to com_fifo. If this chip is the console, writing zero
         * results in some of the chip/FIFO description being lost, so delay
         * printing it until now.
         */
        delay(10);
        if (ISSET(sc->sc_hwflags, COM_HW_FIFO)) {
                aprint_normal(": %s, %d-byte FIFO\n", fifo_msg, sc->sc_fifolen);
        } else {
                aprint_normal(": %s\n", fifo_msg);
        }
        if (ISSET(sc->sc_hwflags, COM_HW_TXFIFO_DISABLE)) {
                sc->sc_fifolen = 1;
                aprint_normal_dev(sc->sc_dev, "txfifo disabled\n");
        }

fifodone:

        tp = tty_alloc();
        tp->t_oproc = comstart;
        tp->t_param = comparam;
        tp->t_hwiflow = comhwiflow;
        tp->t_softc = sc;

        sc->sc_tty = tp;
        sc->sc_rbuf = malloc(com_rbuf_size << 1, M_DEVBUF, M_WAITOK);
        sc->sc_rbput = sc->sc_rbget = sc->sc_rbuf;
        sc->sc_rbavail = com_rbuf_size;
        sc->sc_ebuf = sc->sc_rbuf + (com_rbuf_size << 1);

        tty_attach(tp);

        if (!ISSET(sc->sc_hwflags, COM_HW_NOIEN))
                SET(sc->sc_mcr, MCR_IENABLE);

        if (ISSET(sc->sc_hwflags, COM_HW_CONSOLE)) {
                int maj;

                /* locate the major number */
                maj = cdevsw_lookup_major(&com_cdevsw);

                tp->t_dev = cn_tab->cn_dev = makedev(maj,
                                                     device_unit(sc->sc_dev));

                aprint_normal_dev(sc->sc_dev, "console\n");
        }

#ifdef KGDB
        /*
         * Allow kgdb to "take over" this port.  If this is
         * not the console and is the kgdb device, it has
         * exclusive use.  If it's the console _and_ the
         * kgdb device, it doesn't.
         */
        if (bus_space_is_equal(regsp->cr_iot, comkgdbregs.cr_iot) &&
            regsp->cr_iobase == comkgdbregs.cr_iobase) {
                if (!ISSET(sc->sc_hwflags, COM_HW_CONSOLE)) {
                        com_kgdb_attached = 1;

                        SET(sc->sc_hwflags, COM_HW_KGDB);
                }
                aprint_normal_dev(sc->sc_dev, "kgdb\n");
        }
#endif

        sc->sc_si = softint_establish(SOFTINT_SERIAL, comsoft, sc);

#ifdef RND_COM
        rnd_attach_source(&sc->rnd_source, device_xname(sc->sc_dev),
                          RND_TYPE_TTY, RND_FLAG_DEFAULT);
#endif

        /* if there are no enable/disable functions, assume the device
           is always enabled */
        if (!sc->enable)
                sc->enabled = 1;

        com_config(sc);

        SET(sc->sc_hwflags, COM_HW_DEV_OK);

        if (sc->sc_poll_ticks != 0)
                callout_schedule(&sc->sc_poll_callout, sc->sc_poll_ticks);
}

void
com_config(struct com_softc *sc)
{
        struct com_regs *regsp = &sc->sc_regs;

        /* Disable interrupts before configuring the device. */
        if (sc->sc_type == COM_TYPE_PXA2x0)
                sc->sc_ier = IER_EUART;
        else
                sc->sc_ier = 0;
        CSR_WRITE_1(regsp, COM_REG_IER, sc->sc_ier);
        (void) CSR_READ_1(regsp, COM_REG_IIR);

        /* Look for a Hayes ESP board. */
        if (sc->sc_type == COM_TYPE_HAYESP) {

                /* Set 16550 compatibility mode */
                bus_space_write_1(regsp->cr_iot, sc->sc_hayespioh, HAYESP_CMD1,
                                  HAYESP_SETMODE);
                bus_space_write_1(regsp->cr_iot, sc->sc_hayespioh, HAYESP_CMD2,
                                  HAYESP_MODE_FIFO|HAYESP_MODE_RTS|
                                  HAYESP_MODE_SCALE);

                /* Set RTS/CTS flow control */
                bus_space_write_1(regsp->cr_iot, sc->sc_hayespioh, HAYESP_CMD1,
                                  HAYESP_SETFLOWTYPE);
                bus_space_write_1(regsp->cr_iot, sc->sc_hayespioh, HAYESP_CMD2,
                                  HAYESP_FLOW_RTS);
                bus_space_write_1(regsp->cr_iot, sc->sc_hayespioh, HAYESP_CMD2,
                                  HAYESP_FLOW_CTS);

                /* Set flow control levels */
                bus_space_write_1(regsp->cr_iot, sc->sc_hayespioh, HAYESP_CMD1,
                                  HAYESP_SETRXFLOW);
                bus_space_write_1(regsp->cr_iot, sc->sc_hayespioh, HAYESP_CMD2,
                                  HAYESP_HIBYTE(HAYESP_RXHIWMARK));
                bus_space_write_1(regsp->cr_iot, sc->sc_hayespioh, HAYESP_CMD2,
                                  HAYESP_LOBYTE(HAYESP_RXHIWMARK));
                bus_space_write_1(regsp->cr_iot, sc->sc_hayespioh, HAYESP_CMD2,
                                  HAYESP_HIBYTE(HAYESP_RXLOWMARK));
                bus_space_write_1(regsp->cr_iot, sc->sc_hayespioh, HAYESP_CMD2,
                                  HAYESP_LOBYTE(HAYESP_RXLOWMARK));
        }

        if (ISSET(sc->sc_hwflags, COM_HW_CONSOLE|COM_HW_KGDB))
                com_enable_debugport(sc);
}

#if 0
static int
comcngetc_detached(dev_t dev)
{
        return 0;
}

static void
comcnputc_detached(dev_t dev, int c)
{
}
#endif

int
com_detach(device_t self, int flags)
{
        struct com_softc *sc = device_private(self);
        int maj, mn;

        if (ISSET(sc->sc_hwflags, COM_HW_KGDB))
                return EBUSY;

        if (ISSET(sc->sc_hwflags, COM_HW_CONSOLE) &&
            (flags & DETACH_SHUTDOWN) != 0)
                return EBUSY;

        if (sc->disable != NULL && sc->enabled != 0) {
                (*sc->disable)(sc);
                sc->enabled = 0;
        }

        if (ISSET(sc->sc_hwflags, COM_HW_CONSOLE)) {
                comconsattached = 0;
                cn_tab = NULL;
        }

        /* locate the major number */
        maj = cdevsw_lookup_major(&com_cdevsw);

        /* Nuke the vnodes for any open instances. */
        mn = device_unit(self);
        vdevgone(maj, mn, mn, VCHR);

        mn |= COMDIALOUT_MASK;
        vdevgone(maj, mn, mn, VCHR);

        if (sc->sc_rbuf == NULL) {
                /*
                 * Ring buffer allocation failed in the com_attach_subr,
                 * only the tty is allocated, and nothing else.
                 */
                tty_free(sc->sc_tty);
                return 0;
        }

        /* Free the receive buffer. */
        free(sc->sc_rbuf, M_DEVBUF);

        /* Detach and free the tty. */
        tty_detach(sc->sc_tty);
        tty_free(sc->sc_tty);

        /* Unhook the soft interrupt handler. */
        softint_disestablish(sc->sc_si);

#ifdef RND_COM
        /* Unhook the entropy source. */
        rnd_detach_source(&sc->rnd_source);
#endif
        callout_destroy(&sc->sc_diag_callout);

        /* Destroy the lock. */
        mutex_destroy(&sc->sc_lock);

        return (0);
}

void
com_shutdown(struct com_softc *sc)
{
        struct tty *tp = sc->sc_tty;

        mutex_spin_enter(&sc->sc_lock);

        /* If we were asserting flow control, then deassert it. */
        SET(sc->sc_rx_flags, RX_IBUF_BLOCKED);
        com_hwiflow(sc);

        /* Clear any break condition set with TIOCSBRK. */
        com_break(sc, 0);

        /*
         * Hang up if necessary.  Wait a bit, so the other side has time to
         * notice even if we immediately open the port again.
         * Avoid tsleeping above splhigh().
         */
        if (ISSET(tp->t_cflag, HUPCL)) {
                com_modem(sc, 0);
                microuptime(&sc->sc_hup_pending);
                sc->sc_hup_pending.tv_sec++;
        }

        /* Turn off interrupts. */
        if (ISSET(sc->sc_hwflags, COM_HW_CONSOLE)) {
                sc->sc_ier = IER_ERLS; /* interrupt on line break */
                if ((sc->sc_type == COM_TYPE_PXA2x0) ||
                    (sc->sc_type == COM_TYPE_INGENIC) ||
                    (sc->sc_type == COM_TYPE_TEGRA))
                        sc->sc_ier |= IER_ERXTOUT;
        } else
                sc->sc_ier = 0;

        if (sc->sc_type == COM_TYPE_PXA2x0)
                sc->sc_ier |= IER_EUART;

        CSR_WRITE_1(&sc->sc_regs, COM_REG_IER, sc->sc_ier);

        mutex_spin_exit(&sc->sc_lock);

        if (sc->disable) {
#ifdef DIAGNOSTIC
                if (!sc->enabled)
                        panic("com_shutdown: not enabled?");
#endif
                (*sc->disable)(sc);
                sc->enabled = 0;
        }
}

int
comopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct com_softc *sc;
        struct tty *tp;
        int s;
        int error;

        sc = device_lookup_private(&com_cd, COMUNIT(dev));
        if (sc == NULL || !ISSET(sc->sc_hwflags, COM_HW_DEV_OK) ||
                sc->sc_rbuf == NULL)
                return (ENXIO);

        if (!device_is_active(sc->sc_dev))
                return (ENXIO);

#ifdef KGDB
        /*
         * If this is the kgdb port, no other use is permitted.
         */
        if (ISSET(sc->sc_hwflags, COM_HW_KGDB))
                return (EBUSY);
#endif

        tp = sc->sc_tty;

        /*
         * If the device is exclusively for kernel use, deny userland
         * open.
         */
        if (ISSET(tp->t_state, TS_KERN_ONLY))
                return (EBUSY);

        if (kauth_authorize_device_tty(l->l_cred, KAUTH_DEVICE_TTY_OPEN, tp))
                return (EBUSY);

        s = spltty();

        /*
         * Do the following iff this is a first open.
         */
        if (!ISSET(tp->t_state, TS_ISOPEN) && tp->t_wopen == 0) {
                struct termios t;
                struct timeval now, diff;

                tp->t_dev = dev;

                if (sc->enable) {
                        if ((*sc->enable)(sc)) {
                                splx(s);
                                aprint_error_dev(sc->sc_dev,
                                    "device enable failed\n");
                                return (EIO);
                        }
                        mutex_spin_enter(&sc->sc_lock);
                        sc->enabled = 1;
                        com_config(sc);
                } else {
                        mutex_spin_enter(&sc->sc_lock);
                }

                if (timerisset(&sc->sc_hup_pending)) {
                        microuptime(&now);
                        while (timercmp(&now, &sc->sc_hup_pending, <)) {
                                timersub(&sc->sc_hup_pending, &now, &diff);
                                const int ms = diff.tv_sec * 1000 +
                                    diff.tv_usec / 1000;
                                kpause(ttclos, false, uimax(mstohz(ms), 1),
                                    &sc->sc_lock);
                                microuptime(&now);
                        }
                        timerclear(&sc->sc_hup_pending);
                }

                /* Turn on interrupts. */
                sc->sc_ier = IER_ERXRDY | IER_ERLS;
                if (!ISSET(tp->t_cflag, CLOCAL))
                        sc->sc_ier |= IER_EMSC;

                if (sc->sc_type == COM_TYPE_PXA2x0)
                        sc->sc_ier |= IER_EUART | IER_ERXTOUT;
                else if (sc->sc_type == COM_TYPE_INGENIC ||
                         sc->sc_type == COM_TYPE_TEGRA)
                        sc->sc_ier |= IER_ERXTOUT;
                CSR_WRITE_1(&sc->sc_regs, COM_REG_IER, sc->sc_ier);

                /* Fetch the current modem control status, needed later. */
                sc->sc_msr = CSR_READ_1(&sc->sc_regs, COM_REG_MSR);

                /* Clear PPS capture state on first open. */
                mutex_spin_enter(&timecounter_lock);
                memset(&sc->sc_pps_state, 0, sizeof(sc->sc_pps_state));
                sc->sc_pps_state.ppscap = PPS_CAPTUREASSERT | PPS_CAPTURECLEAR;
                pps_init(&sc->sc_pps_state);
                mutex_spin_exit(&timecounter_lock);

                mutex_spin_exit(&sc->sc_lock);

                /*
                 * Initialize the termios status to the defaults.  Add in the
                 * sticky bits from TIOCSFLAGS.
                 */
                if (ISSET(sc->sc_hwflags, COM_HW_CONSOLE)) {
                        t.c_ospeed = comcons_info.rate;
                        t.c_cflag = comcons_info.cflag;
                } else {
                        t.c_ospeed = TTYDEF_SPEED;
                        t.c_cflag = TTYDEF_CFLAG;
                }
                t.c_ispeed = t.c_ospeed;
                if (ISSET(sc->sc_swflags, TIOCFLAG_CLOCAL))
                        SET(t.c_cflag, CLOCAL);
                if (ISSET(sc->sc_swflags, TIOCFLAG_CRTSCTS))
                        SET(t.c_cflag, CRTSCTS);
                if (ISSET(sc->sc_swflags, TIOCFLAG_MDMBUF))
                        SET(t.c_cflag, MDMBUF);
                /* Make sure comparam() will do something. */
                tp->t_ospeed = 0;
                (void) comparam(tp, &t);
                tp->t_iflag = TTYDEF_IFLAG;
                tp->t_oflag = TTYDEF_OFLAG;
                tp->t_lflag = TTYDEF_LFLAG;
                ttychars(tp);
                ttsetwater(tp);

                mutex_spin_enter(&sc->sc_lock);

                /*
                 * Turn on DTR.  We must always do this, even if carrier is not
                 * present, because otherwise we'd have to use TIOCSDTR
                 * immediately after setting CLOCAL, which applications do not
                 * expect.  We always assert DTR while the device is open
                 * unless explicitly requested to deassert it.
                 */
                com_modem(sc, 1);

                /* Clear the input ring, and unblock. */
                sc->sc_rbput = sc->sc_rbget = sc->sc_rbuf;
                sc->sc_rbavail = com_rbuf_size;
                com_iflush(sc);
                CLR(sc->sc_rx_flags, RX_ANY_BLOCK);
                com_hwiflow(sc);

#ifdef COM_DEBUG
                if (com_debug)
                        comstatus(sc, "comopen  ");
#endif

                mutex_spin_exit(&sc->sc_lock);
        }

        splx(s);

        error = ttyopen(tp, COMDIALOUT(dev), ISSET(flag, O_NONBLOCK));
        if (error)
                goto bad;

        error = (*tp->t_linesw->l_open)(dev, tp);
        if (error)
                goto bad;

        return (0);

bad:
        if (!ISSET(tp->t_state, TS_ISOPEN) && tp->t_wopen == 0) {
                /*
                 * We failed to open the device, and nobody else had it opened.
                 * Clean up the state as appropriate.
                 */
                com_shutdown(sc);
        }

        return (error);
}

int
comclose(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct com_softc *sc =
            device_lookup_private(&com_cd, COMUNIT(dev));
        struct tty *tp = sc->sc_tty;

        /* XXX This is for cons.c. */
        if (!ISSET(tp->t_state, TS_ISOPEN))
                return (0);
        /*
         * If the device is exclusively for kernel use, deny userland
         * close.
         */
        if (ISSET(tp->t_state, TS_KERN_ONLY))
                return (0);

        (*tp->t_linesw->l_close)(tp, flag);
        ttyclose(tp);

        if (COM_ISALIVE(sc) == 0)
                return (0);

        if (!ISSET(tp->t_state, TS_ISOPEN) && tp->t_wopen == 0) {
                /*
                 * Although we got a last close, the device may still be in
                 * use; e.g. if this was the dialout node, and there are still
                 * processes waiting for carrier on the non-dialout node.
                 */
                com_shutdown(sc);
        }

        return (0);
}

int
comread(dev_t dev, struct uio *uio, int flag)
{
        struct com_softc *sc =
            device_lookup_private(&com_cd, COMUNIT(dev));
        struct tty *tp = sc->sc_tty;

        if (COM_ISALIVE(sc) == 0)
                return (EIO);

        return ((*tp->t_linesw->l_read)(tp, uio, flag));
}

int
comwrite(dev_t dev, struct uio *uio, int flag)
{
        struct com_softc *sc =
            device_lookup_private(&com_cd, COMUNIT(dev));
        struct tty *tp = sc->sc_tty;

        if (COM_ISALIVE(sc) == 0)
                return (EIO);

        return ((*tp->t_linesw->l_write)(tp, uio, flag));
}

int
compoll(dev_t dev, int events, struct lwp *l)
{
        struct com_softc *sc =
            device_lookup_private(&com_cd, COMUNIT(dev));
        struct tty *tp = sc->sc_tty;

        if (COM_ISALIVE(sc) == 0)
                return (POLLHUP);

        return ((*tp->t_linesw->l_poll)(tp, events, l));
}

struct tty *
comtty(dev_t dev)
{
        struct com_softc *sc =
            device_lookup_private(&com_cd, COMUNIT(dev));
        struct tty *tp = sc->sc_tty;

        return (tp);
}

int
comioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        struct com_softc *sc;
        struct tty *tp;
        int error;

        sc = device_lookup_private(&com_cd, COMUNIT(dev));
        if (sc == NULL)
                return ENXIO;
        if (COM_ISALIVE(sc) == 0)
                return (EIO);

        tp = sc->sc_tty;

        error = (*tp->t_linesw->l_ioctl)(tp, cmd, data, flag, l);
        if (error != EPASSTHROUGH)
                return (error);

        error = ttioctl(tp, cmd, data, flag, l);
        if (error != EPASSTHROUGH)
                return (error);

        error = 0;
        switch (cmd) {
        case TIOCSFLAGS:
                error = kauth_authorize_device_tty(l->l_cred,
                    KAUTH_DEVICE_TTY_PRIVSET, tp);
                break;
        default:
                /* nothing */
                break;
        }
        if (error) {
                return error;
        }

        mutex_spin_enter(&sc->sc_lock);

        switch (cmd) {
        case TIOCSBRK:
                com_break(sc, 1);
                break;

        case TIOCCBRK:
                com_break(sc, 0);
                break;

        case TIOCSDTR:
                com_modem(sc, 1);
                break;

        case TIOCCDTR:
                com_modem(sc, 0);
                break;

        case TIOCGFLAGS:
                *(int *)data = sc->sc_swflags;
                break;

        case TIOCSFLAGS:
                sc->sc_swflags = *(int *)data;
                break;

        case TIOCMSET:
        case TIOCMBIS:
        case TIOCMBIC:
                tiocm_to_com(sc, cmd, *(int *)data);
                break;

        case TIOCMGET:
                *(int *)data = com_to_tiocm(sc);
                break;

        case PPS_IOC_CREATE:
        case PPS_IOC_DESTROY:
        case PPS_IOC_GETPARAMS:
        case PPS_IOC_SETPARAMS:
        case PPS_IOC_GETCAP:
        case PPS_IOC_FETCH:
#ifdef PPS_SYNC
        case PPS_IOC_KCBIND:
#endif
                mutex_spin_enter(&timecounter_lock);
                error = pps_ioctl(cmd, data, &sc->sc_pps_state);
                mutex_spin_exit(&timecounter_lock);
                break;

        case TIOCDCDTIMESTAMP:        /* XXX old, overloaded  API used by xntpd v3 */
                mutex_spin_enter(&timecounter_lock);
#ifndef PPS_TRAILING_EDGE
                TIMESPEC_TO_TIMEVAL((struct timeval *)data,
                    &sc->sc_pps_state.ppsinfo.assert_timestamp);
#else
                TIMESPEC_TO_TIMEVAL((struct timeval *)data,
                    &sc->sc_pps_state.ppsinfo.clear_timestamp);
#endif
                mutex_spin_exit(&timecounter_lock);
                break;

        default:
                error = EPASSTHROUGH;
                break;
        }

        mutex_spin_exit(&sc->sc_lock);

#ifdef COM_DEBUG
        if (com_debug)
                comstatus(sc, "comioctl ");
#endif

        return (error);
}

integrate void
com_schedrx(struct com_softc *sc)
{

        sc->sc_rx_ready = 1;

        /* Wake up the poller. */
        softint_schedule(sc->sc_si);
}

void
com_break(struct com_softc *sc, int onoff)
{

        if (onoff)
                SET(sc->sc_lcr, LCR_SBREAK);
        else
                CLR(sc->sc_lcr, LCR_SBREAK);

        if (!sc->sc_heldchange) {
                if (sc->sc_tx_busy) {
                        sc->sc_heldtbc = sc->sc_tbc;
                        sc->sc_tbc = 0;
                        sc->sc_heldchange = 1;
                } else
                        com_loadchannelregs(sc);
        }
}

void
com_modem(struct com_softc *sc, int onoff)
{

        if (sc->sc_mcr_dtr == 0)
                return;

        if (onoff)
                SET(sc->sc_mcr, sc->sc_mcr_dtr);
        else
                CLR(sc->sc_mcr, sc->sc_mcr_dtr);

        if (!sc->sc_heldchange) {
                if (sc->sc_tx_busy) {
                        sc->sc_heldtbc = sc->sc_tbc;
                        sc->sc_tbc = 0;
                        sc->sc_heldchange = 1;
                } else
                        com_loadchannelregs(sc);
        }
}

void
tiocm_to_com(struct com_softc *sc, u_long how, int ttybits)
{
        u_char combits;

        combits = 0;
        if (ISSET(ttybits, TIOCM_DTR))
                SET(combits, MCR_DTR);
        if (ISSET(ttybits, TIOCM_RTS))
                SET(combits, MCR_RTS);

        switch (how) {
        case TIOCMBIC:
                CLR(sc->sc_mcr, combits);
                break;

        case TIOCMBIS:
                SET(sc->sc_mcr, combits);
                break;

        case TIOCMSET:
                CLR(sc->sc_mcr, MCR_DTR | MCR_RTS);
                SET(sc->sc_mcr, combits);
                break;
        }

        if (!sc->sc_heldchange) {
                if (sc->sc_tx_busy) {
                        sc->sc_heldtbc = sc->sc_tbc;
                        sc->sc_tbc = 0;
                        sc->sc_heldchange = 1;
                } else
                        com_loadchannelregs(sc);
        }
}

int
com_to_tiocm(struct com_softc *sc)
{
        u_char combits;
        int ttybits = 0;

        combits = sc->sc_mcr;
        if (ISSET(combits, MCR_DTR))
                SET(ttybits, TIOCM_DTR);
        if (ISSET(combits, MCR_RTS))
                SET(ttybits, TIOCM_RTS);

        combits = sc->sc_msr;
        if (sc->sc_type == COM_TYPE_INGENIC) {
                SET(ttybits, TIOCM_CD);
        } else {
                if (ISSET(combits, MSR_DCD))
                        SET(ttybits, TIOCM_CD);
        }
        if (ISSET(combits, MSR_CTS))
                SET(ttybits, TIOCM_CTS);
        if (ISSET(combits, MSR_DSR))
                SET(ttybits, TIOCM_DSR);
        if (ISSET(combits, MSR_RI | MSR_TERI))
                SET(ttybits, TIOCM_RI);

        if (ISSET(sc->sc_ier, IER_ERXRDY | IER_ETXRDY | IER_ERLS | IER_EMSC))
                SET(ttybits, TIOCM_LE);

        return (ttybits);
}

static u_char
cflag2lcr(tcflag_t cflag)
{
        u_char lcr = 0;

        switch (ISSET(cflag, CSIZE)) {
        case CS5:
                SET(lcr, LCR_5BITS);
                break;
        case CS6:
                SET(lcr, LCR_6BITS);
                break;
        case CS7:
                SET(lcr, LCR_7BITS);
                break;
        case CS8:
                SET(lcr, LCR_8BITS);
                break;
        }
        if (ISSET(cflag, PARENB)) {
                SET(lcr, LCR_PENAB);
                if (!ISSET(cflag, PARODD))
                        SET(lcr, LCR_PEVEN);
        }
        if (ISSET(cflag, CSTOPB))
                SET(lcr, LCR_STOPB);

        return (lcr);
}

int
comparam(struct tty *tp, struct termios *t)
{
        struct com_softc *sc =
            device_lookup_private(&com_cd, COMUNIT(tp->t_dev));
        int ospeed;
        u_char lcr;

        if (COM_ISALIVE(sc) == 0)
                return (EIO);

        if (sc->sc_type == COM_TYPE_HAYESP) {
                int prescaler, speed;

                /*
                 * Calculate UART clock prescaler.  It should be in
                 * range of 0 .. 3.
                 */
                for (prescaler = 0, speed = t->c_ospeed; prescaler < 4;
                    prescaler++, speed /= 2)
                        if ((ospeed = comspeed(speed, sc->sc_frequency,
                                               sc->sc_type)) > 0)
                                break;

                if (prescaler == 4)
                        return (EINVAL);
                sc->sc_prescaler = prescaler;
        } else
                ospeed = comspeed(t->c_ospeed, sc->sc_frequency, sc->sc_type);

        /* Check requested parameters. */
        if (ospeed < 0)
                return (EINVAL);
        if (t->c_ispeed && t->c_ispeed != t->c_ospeed)
                return (EINVAL);

        /*
         * For the console, always force CLOCAL and !HUPCL, so that the port
         * is always active.
         */
        if (ISSET(sc->sc_swflags, TIOCFLAG_SOFTCAR) ||
            ISSET(sc->sc_hwflags, COM_HW_CONSOLE)) {
                SET(t->c_cflag, CLOCAL);
                CLR(t->c_cflag, HUPCL);
        }

        /*
         * If there were no changes, don't do anything.  This avoids dropping
         * input and improves performance when all we did was frob things like
         * VMIN and VTIME.
         */
        if (tp->t_ospeed == t->c_ospeed &&
            tp->t_cflag == t->c_cflag)
                return (0);

        lcr = ISSET(sc->sc_lcr, LCR_SBREAK) | cflag2lcr(t->c_cflag);

        mutex_spin_enter(&sc->sc_lock);

        sc->sc_lcr = lcr;

        /*
         * If we're not in a mode that assumes a connection is present, then
         * ignore carrier changes.
         */
        if (ISSET(t->c_cflag, CLOCAL | MDMBUF))
                sc->sc_msr_dcd = 0;
        else
                sc->sc_msr_dcd = MSR_DCD;
        /*
         * Set the flow control pins depending on the current flow control
         * mode.
         */
        if (ISSET(t->c_cflag, CRTSCTS)) {
                sc->sc_mcr_dtr = MCR_DTR;
                sc->sc_mcr_rts = MCR_RTS;
                sc->sc_msr_cts = MSR_CTS;
                if (ISSET(sc->sc_hwflags, COM_HW_AFE)) {
                        SET(sc->sc_mcr, MCR_AFE);
                } else {
                        sc->sc_efr = EFR_AUTORTS | EFR_AUTOCTS;
                }
        } else if (ISSET(t->c_cflag, MDMBUF)) {
                /*
                 * For DTR/DCD flow control, make sure we don't toggle DTR for
                 * carrier detection.
                 */
                sc->sc_mcr_dtr = 0;
                sc->sc_mcr_rts = MCR_DTR;
                sc->sc_msr_cts = MSR_DCD;
                if (ISSET(sc->sc_hwflags, COM_HW_AFE)) {
                        CLR(sc->sc_mcr, MCR_AFE);
                } else {
                        sc->sc_efr = 0;
                }
        } else {
                /*
                 * If no flow control, then always set RTS.  This will make
                 * the other side happy if it mistakenly thinks we're doing
                 * RTS/CTS flow control.
                 */
                sc->sc_mcr_dtr = MCR_DTR | MCR_RTS;
                sc->sc_mcr_rts = 0;
                sc->sc_msr_cts = 0;
                if (ISSET(sc->sc_hwflags, COM_HW_AFE)) {
                        CLR(sc->sc_mcr, MCR_AFE);
                } else {
                        sc->sc_efr = 0;
                }
                if (ISSET(sc->sc_mcr, MCR_DTR))
                        SET(sc->sc_mcr, MCR_RTS);
                else
                        CLR(sc->sc_mcr, MCR_RTS);
        }
        sc->sc_msr_mask = sc->sc_msr_cts | sc->sc_msr_dcd;

        if (t->c_ospeed == 0 && tp->t_ospeed != 0)
                CLR(sc->sc_mcr, sc->sc_mcr_dtr);
        else if (t->c_ospeed != 0 && tp->t_ospeed == 0)
                SET(sc->sc_mcr, sc->sc_mcr_dtr);

        sc->sc_dlbl = ospeed;
        sc->sc_dlbh = ospeed >> 8;

        /*
         * Set the FIFO threshold based on the receive speed.
         *
         *  * If it's a low speed, it's probably a mouse or some other
         *    interactive device, so set the threshold low.
         *  * If it's a high speed, trim the trigger level down to prevent
         *    overflows.
         *  * Otherwise set it a bit higher.
         */
        if (sc->sc_type == COM_TYPE_HAYESP) {
                sc->sc_fifo = FIFO_DMA_MODE | FIFO_ENABLE | FIFO_TRIGGER_8;
        } else if (sc->sc_type == COM_TYPE_TEGRA) {
                sc->sc_fifo = FIFO_ENABLE | FIFO_TRIGGER_1;
        } else if (ISSET(sc->sc_hwflags, COM_HW_FIFO)) {
                if (t->c_ospeed <= 1200)
                        sc->sc_fifo = FIFO_ENABLE | FIFO_TRIGGER_1;
                else if (t->c_ospeed <= 38400)
                        sc->sc_fifo = FIFO_ENABLE | FIFO_TRIGGER_8;
                else
                        sc->sc_fifo = FIFO_ENABLE | FIFO_TRIGGER_4;
        } else {
                sc->sc_fifo = 0;
        }

        if (sc->sc_type == COM_TYPE_INGENIC)
                sc->sc_fifo |= FIFO_UART_ON;

        /* And copy to tty. */
        tp->t_ispeed = t->c_ospeed;
        tp->t_ospeed = t->c_ospeed;
        tp->t_cflag = t->c_cflag;

        if (!sc->sc_heldchange) {
                if (sc->sc_tx_busy) {
                        sc->sc_heldtbc = sc->sc_tbc;
                        sc->sc_tbc = 0;
                        sc->sc_heldchange = 1;
                } else
                        com_loadchannelregs(sc);
        }

        if (!ISSET(t->c_cflag, CHWFLOW)) {
                /* Disable the high water mark. */
                sc->sc_r_hiwat = 0;
                sc->sc_r_lowat = 0;
                if (ISSET(sc->sc_rx_flags, RX_TTY_OVERFLOWED)) {
                        CLR(sc->sc_rx_flags, RX_TTY_OVERFLOWED);
                        com_schedrx(sc);
                }
                if (ISSET(sc->sc_rx_flags, RX_TTY_BLOCKED|RX_IBUF_BLOCKED)) {
                        CLR(sc->sc_rx_flags, RX_TTY_BLOCKED|RX_IBUF_BLOCKED);
                        com_hwiflow(sc);
                }
        } else {
                sc->sc_r_hiwat = com_rbuf_hiwat;
                sc->sc_r_lowat = com_rbuf_lowat;
        }

        mutex_spin_exit(&sc->sc_lock);

        /*
         * Update the tty layer's idea of the carrier bit, in case we changed
         * CLOCAL or MDMBUF.  We don't hang up here; we only do that by
         * explicit request.
         */
        if (sc->sc_type == COM_TYPE_INGENIC) {
                /* no DCD here */
                (void) (*tp->t_linesw->l_modem)(tp, 1);
        } else
                (void) (*tp->t_linesw->l_modem)(tp, ISSET(sc->sc_msr, MSR_DCD));

#ifdef COM_DEBUG
        if (com_debug)
                comstatus(sc, "comparam ");
#endif

        if (!ISSET(t->c_cflag, CHWFLOW)) {
                if (sc->sc_tx_stopped) {
                        sc->sc_tx_stopped = 0;
                        comstart(tp);
                }
        }

        return (0);
}

void
com_iflush(struct com_softc *sc)
{
        struct com_regs        *regsp = &sc->sc_regs;
        uint8_t fifo;
#ifdef DIAGNOSTIC
        int reg;
#endif
        int timo;

#ifdef DIAGNOSTIC
        reg = 0xffff;
#endif
        timo = 50000;
        /* flush any pending I/O */
        while (ISSET(CSR_READ_1(regsp, COM_REG_LSR), LSR_RXRDY)
            && --timo)
#ifdef DIAGNOSTIC
                reg =
#else
                    (void)
#endif
                    CSR_READ_1(regsp, COM_REG_RXDATA);
#ifdef DIAGNOSTIC
        if (!timo)
                aprint_error_dev(sc->sc_dev, "com_iflush timeout %02x\n", reg);
#endif

        switch (sc->sc_type) {
        case COM_TYPE_16750:
        case COM_TYPE_DW_APB:
                /*
                 * Reset all Rx/Tx FIFO, preserve current FIFO length.
                 * This should prevent triggering busy interrupt while
                 * manipulating divisors.
                 */
                fifo = CSR_READ_1(regsp, COM_REG_FIFO) & (FIFO_TRIGGER_1 |
                    FIFO_TRIGGER_4 | FIFO_TRIGGER_8 | FIFO_TRIGGER_14);
                CSR_WRITE_1(regsp, COM_REG_FIFO,
                    fifo | FIFO_ENABLE | FIFO_RCV_RST | FIFO_XMT_RST);
                delay(100);
                break;
        }
}

void
com_loadchannelregs(struct com_softc *sc)
{
        struct com_regs *regsp = &sc->sc_regs;

        /* XXXXX necessary? */
        com_iflush(sc);

        if (sc->sc_type == COM_TYPE_PXA2x0)
                CSR_WRITE_1(regsp, COM_REG_IER, IER_EUART);
        else
                CSR_WRITE_1(regsp, COM_REG_IER, 0);

        if (sc->sc_type == COM_TYPE_OMAP) {
                /* disable before changing settings */
                CSR_WRITE_1(regsp, COM_REG_MDR1, MDR1_MODE_DISABLE);
        }

        if (ISSET(sc->sc_hwflags, COM_HW_FLOW)) {
                KASSERT(sc->sc_type != COM_TYPE_AU1x00);
                KASSERT(sc->sc_type != COM_TYPE_16550_NOERS);
                /* no EFR on alchemy */
                CSR_WRITE_1(regsp, COM_REG_LCR, LCR_EERS);
                CSR_WRITE_1(regsp, COM_REG_EFR, sc->sc_efr);
        }
        if (sc->sc_type == COM_TYPE_AU1x00) {
                /* alchemy has single separate 16-bit clock divisor register */
                CSR_WRITE_2(regsp, COM_REG_DLBL, sc->sc_dlbl +
                    (sc->sc_dlbh << 8));
        } else {
                CSR_WRITE_1(regsp, COM_REG_LCR, sc->sc_lcr | LCR_DLAB);
                CSR_WRITE_1(regsp, COM_REG_DLBL, sc->sc_dlbl);
                CSR_WRITE_1(regsp, COM_REG_DLBH, sc->sc_dlbh);
        }
        CSR_WRITE_1(regsp, COM_REG_LCR, sc->sc_lcr);
        CSR_WRITE_1(regsp, COM_REG_MCR, sc->sc_mcr_active = sc->sc_mcr);
        CSR_WRITE_1(regsp, COM_REG_FIFO, sc->sc_fifo);
        if (sc->sc_type == COM_TYPE_HAYESP) {
                bus_space_write_1(regsp->cr_iot, sc->sc_hayespioh, HAYESP_CMD1,
                    HAYESP_SETPRESCALER);
                bus_space_write_1(regsp->cr_iot, sc->sc_hayespioh, HAYESP_CMD2,
                    sc->sc_prescaler);
        }
        if (sc->sc_type == COM_TYPE_OMAP) {
                /* setup the fifos.  the FCR value is not used as long
                   as SCR[6] and SCR[7] are 0, which they are at reset
                   and we never touch the SCR register */
                uint8_t rx_fifo_trig = 40;
                uint8_t tx_fifo_trig = 60;
                uint8_t rx_start = 8;
                uint8_t rx_halt = 60;
                uint8_t tlr_value = ((rx_fifo_trig>>2) << 4) | (tx_fifo_trig>>2);
                uint8_t tcr_value = ((rx_start>>2) << 4) | (rx_halt>>2);

                /* enable access to TCR & TLR */
                CSR_WRITE_1(regsp, COM_REG_MCR, sc->sc_mcr | MCR_TCR_TLR);

                /* write tcr and tlr values */
                CSR_WRITE_1(regsp, COM_REG_TLR, tlr_value);
                CSR_WRITE_1(regsp, COM_REG_TCR, tcr_value);

                /* disable access to TCR & TLR */
                CSR_WRITE_1(regsp, COM_REG_MCR, sc->sc_mcr);

                /* enable again, but mode is based on speed */
                if (sc->sc_tty->t_termios.c_ospeed > 230400) {
                        CSR_WRITE_1(regsp, COM_REG_MDR1, MDR1_MODE_UART_13X);
                } else {
                        CSR_WRITE_1(regsp, COM_REG_MDR1, MDR1_MODE_UART_16X);
                }
        }

        CSR_WRITE_1(regsp, COM_REG_IER, sc->sc_ier);
}

int
comhwiflow(struct tty *tp, int block)
{
        struct com_softc *sc =
            device_lookup_private(&com_cd, COMUNIT(tp->t_dev));

        if (COM_ISALIVE(sc) == 0)
                return (0);

        if (sc->sc_mcr_rts == 0)
                return (0);

        mutex_spin_enter(&sc->sc_lock);

        if (block) {
                if (!ISSET(sc->sc_rx_flags, RX_TTY_BLOCKED)) {
                        SET(sc->sc_rx_flags, RX_TTY_BLOCKED);
                        com_hwiflow(sc);
                }
        } else {
                if (ISSET(sc->sc_rx_flags, RX_TTY_OVERFLOWED)) {
                        CLR(sc->sc_rx_flags, RX_TTY_OVERFLOWED);
                        com_schedrx(sc);
                }
                if (ISSET(sc->sc_rx_flags, RX_TTY_BLOCKED)) {
                        CLR(sc->sc_rx_flags, RX_TTY_BLOCKED);
                        com_hwiflow(sc);
                }
        }

        mutex_spin_exit(&sc->sc_lock);
        return (1);
}

/*
 * (un)block input via hw flowcontrol
 */
void
com_hwiflow(struct com_softc *sc)
{
        struct com_regs *regsp= &sc->sc_regs;

        if (sc->sc_mcr_rts == 0)
                return;

        if (ISSET(sc->sc_rx_flags, RX_ANY_BLOCK)) {
                CLR(sc->sc_mcr, sc->sc_mcr_rts);
                CLR(sc->sc_mcr_active, sc->sc_mcr_rts);
        } else {
                SET(sc->sc_mcr, sc->sc_mcr_rts);
                SET(sc->sc_mcr_active, sc->sc_mcr_rts);
        }
        CSR_WRITE_1(regsp, COM_REG_MCR, sc->sc_mcr_active);
}


void
comstart(struct tty *tp)
{
        struct com_softc *sc =
            device_lookup_private(&com_cd, COMUNIT(tp->t_dev));
        struct com_regs *regsp = &sc->sc_regs;
        int s;

        if (COM_ISALIVE(sc) == 0)
                return;

        s = spltty();
        if (ISSET(tp->t_state, TS_BUSY | TS_TIMEOUT | TS_TTSTOP))
                goto out;
        if (sc->sc_tx_stopped)
                goto out;
        if (!ttypull(tp))
                goto out;

        /* Grab the first contiguous region of buffer space. */
        {
                u_char *tba;
                int tbc;

                tba = tp->t_outq.c_cf;
                tbc = ndqb(&tp->t_outq, 0);

                mutex_spin_enter(&sc->sc_lock);

                sc->sc_tba = tba;
                sc->sc_tbc = tbc;
        }

        SET(tp->t_state, TS_BUSY);
        sc->sc_tx_busy = 1;

        /* Enable transmit completion interrupts if necessary. */
        if (!ISSET(sc->sc_ier, IER_ETXRDY)) {
                SET(sc->sc_ier, IER_ETXRDY);
                CSR_WRITE_1(regsp, COM_REG_IER, sc->sc_ier);
        }

        /* Output the first chunk of the contiguous buffer. */
        if (!ISSET(sc->sc_hwflags, COM_HW_NO_TXPRELOAD)) {
                u_int n;

                n = sc->sc_tbc;
                if (n > sc->sc_fifolen)
                        n = sc->sc_fifolen;
                CSR_WRITE_MULTI(regsp, COM_REG_TXDATA, sc->sc_tba, n);
                sc->sc_tbc -= n;
                sc->sc_tba += n;
        }

        mutex_spin_exit(&sc->sc_lock);
out:
        splx(s);
        return;
}

/*
 * Stop output on a line.
 */
void
comstop(struct tty *tp, int flag)
{
        struct com_softc *sc =
            device_lookup_private(&com_cd, COMUNIT(tp->t_dev));

        mutex_spin_enter(&sc->sc_lock);
        if (ISSET(tp->t_state, TS_BUSY)) {
                /* Stop transmitting at the next chunk. */
                sc->sc_tbc = 0;
                sc->sc_heldtbc = 0;
                if (!ISSET(tp->t_state, TS_TTSTOP))
                        SET(tp->t_state, TS_FLUSH);
        }
        mutex_spin_exit(&sc->sc_lock);
}

void
comdiag(void *arg)
{
        struct com_softc *sc = arg;
        int overflows, floods;

        mutex_spin_enter(&sc->sc_lock);
        overflows = sc->sc_overflows;
        sc->sc_overflows = 0;
        floods = sc->sc_floods;
        sc->sc_floods = 0;
        sc->sc_errors = 0;
        mutex_spin_exit(&sc->sc_lock);

        log(LOG_WARNING, "%s: %d silo overflow%s, %d ibuf flood%s\n",
            device_xname(sc->sc_dev),
            overflows, overflows == 1 ? "" : "s",
            floods, floods == 1 ? "" : "s");
}

integrate void
com_rxsoft(struct com_softc *sc, struct tty *tp)
{
        int (*rint)(int, struct tty *) = tp->t_linesw->l_rint;
        u_char *get, *end;
        u_int cc, scc;
        u_char lsr;
        int code;

        end = sc->sc_ebuf;
        get = sc->sc_rbget;
        scc = cc = com_rbuf_size - sc->sc_rbavail;

        if (cc == com_rbuf_size) {
                sc->sc_floods++;
                if (sc->sc_errors++ == 0)
                        callout_reset(&sc->sc_diag_callout, 60 * hz,
                            comdiag, sc);
        }

        /* If not yet open, drop the entire buffer content here */
        if (!ISSET(tp->t_state, TS_ISOPEN)) {
                get += cc << 1;
                if (get >= end)
                        get -= com_rbuf_size << 1;
                cc = 0;
        }
        while (cc) {
                code = get[0];
                lsr = get[1];
                if (ISSET(lsr, LSR_OE | LSR_BI | LSR_FE | LSR_PE)) {
                        if (ISSET(lsr, LSR_OE)) {
                                sc->sc_overflows++;
                                if (sc->sc_errors++ == 0)
                                        callout_reset(&sc->sc_diag_callout,
                                            60 * hz, comdiag, sc);
                        }
                        if (ISSET(lsr, LSR_BI | LSR_FE))
                                SET(code, TTY_FE);
                        if (ISSET(lsr, LSR_PE))
                                SET(code, TTY_PE);
                }
                if ((*rint)(code, tp) == -1) {
                        /*
                         * The line discipline's buffer is out of space.
                         */
                        if (!ISSET(sc->sc_rx_flags, RX_TTY_BLOCKED)) {
                                /*
                                 * We're either not using flow control, or the
                                 * line discipline didn't tell us to block for
                                 * some reason.  Either way, we have no way to
                                 * know when there's more space available, so
                                 * just drop the rest of the data.
                                 */
                                get += cc << 1;
                                if (get >= end)
                                        get -= com_rbuf_size << 1;
                                cc = 0;
                        } else {
                                /*
                                 * Don't schedule any more receive processing
                                 * until the line discipline tells us there's
                                 * space available (through comhwiflow()).
                                 * Leave the rest of the data in the input
                                 * buffer.
                                 */
                                SET(sc->sc_rx_flags, RX_TTY_OVERFLOWED);
                        }
                        break;
                }
                get += 2;
                if (get >= end)
                        get = sc->sc_rbuf;
                cc--;
        }

        if (cc != scc) {
                sc->sc_rbget = get;
                mutex_spin_enter(&sc->sc_lock);

                cc = sc->sc_rbavail += scc - cc;
                /* Buffers should be ok again, release possible block. */
                if (cc >= sc->sc_r_lowat) {
                        if (ISSET(sc->sc_rx_flags, RX_IBUF_OVERFLOWED)) {
                                CLR(sc->sc_rx_flags, RX_IBUF_OVERFLOWED);
                                SET(sc->sc_ier, IER_ERXRDY);
                                if (sc->sc_type == COM_TYPE_PXA2x0)
                                        SET(sc->sc_ier, IER_ERXTOUT);
                                if (sc->sc_type == COM_TYPE_INGENIC ||
                                    sc->sc_type == COM_TYPE_TEGRA)
                                        SET(sc->sc_ier, IER_ERXTOUT);

                                CSR_WRITE_1(&sc->sc_regs, COM_REG_IER,
                                    sc->sc_ier);
                        }
                        if (ISSET(sc->sc_rx_flags, RX_IBUF_BLOCKED)) {
                                CLR(sc->sc_rx_flags, RX_IBUF_BLOCKED);
                                com_hwiflow(sc);
                        }
                }
                mutex_spin_exit(&sc->sc_lock);
        }
}

integrate void
com_txsoft(struct com_softc *sc, struct tty *tp)
{

        CLR(tp->t_state, TS_BUSY);
        if (ISSET(tp->t_state, TS_FLUSH))
                CLR(tp->t_state, TS_FLUSH);
        else
                ndflush(&tp->t_outq, (int)(sc->sc_tba - tp->t_outq.c_cf));
        (*tp->t_linesw->l_start)(tp);
}

integrate void
com_stsoft(struct com_softc *sc, struct tty *tp)
{
        u_char msr, delta;

        mutex_spin_enter(&sc->sc_lock);
        msr = sc->sc_msr;
        delta = sc->sc_msr_delta;
        sc->sc_msr_delta = 0;
        mutex_spin_exit(&sc->sc_lock);

        if (ISSET(delta, sc->sc_msr_dcd)) {
                /*
                 * Inform the tty layer that carrier detect changed.
                 */
                (void) (*tp->t_linesw->l_modem)(tp, ISSET(msr, MSR_DCD));
        }

        if (ISSET(delta, sc->sc_msr_cts)) {
                /* Block or unblock output according to flow control. */
                if (ISSET(msr, sc->sc_msr_cts)) {
                        sc->sc_tx_stopped = 0;
                        (*tp->t_linesw->l_start)(tp);
                } else {
                        sc->sc_tx_stopped = 1;
                }
        }

#ifdef COM_DEBUG
        if (com_debug)
                comstatus(sc, "com_stsoft");
#endif
}

void
comsoft(void *arg)
{
        struct com_softc *sc = arg;
        struct tty *tp;

        if (COM_ISALIVE(sc) == 0)
                return;

        tp = sc->sc_tty;

        if (sc->sc_rx_ready) {
                sc->sc_rx_ready = 0;
                com_rxsoft(sc, tp);
        }

        if (sc->sc_st_check) {
                sc->sc_st_check = 0;
                com_stsoft(sc, tp);
        }

        if (sc->sc_tx_done) {
                sc->sc_tx_done = 0;
                com_txsoft(sc, tp);
        }
}

int
comintr(void *arg)
{
        struct com_softc *sc = arg;
        struct com_regs *regsp = &sc->sc_regs;

        u_char *put, *end;
        u_int cc;
        u_char lsr, iir;

        if (COM_ISALIVE(sc) == 0)
                return (0);

        KASSERT(regsp != NULL);

        mutex_spin_enter(&sc->sc_lock);
        iir = CSR_READ_1(regsp, COM_REG_IIR);

        /* Handle ns16750-specific busy interrupt. */
        if (sc->sc_type == COM_TYPE_16750 &&
            (iir & IIR_BUSY) == IIR_BUSY) {
                for (int timeout = 10000;
                    (CSR_READ_1(regsp, COM_REG_USR) & 0x1) != 0; timeout--)
                        if (timeout <= 0) {
                                aprint_error_dev(sc->sc_dev,
                                    "timeout while waiting for BUSY interrupt "
                                    "acknowledge\n");
                                mutex_spin_exit(&sc->sc_lock);
                                return (0);
                        }

                CSR_WRITE_1(regsp, COM_REG_LCR, sc->sc_lcr);
                iir = CSR_READ_1(regsp, COM_REG_IIR);
        }

        /* DesignWare APB UART BUSY interrupt */
        if (sc->sc_type == COM_TYPE_DW_APB &&
            (iir & IIR_BUSY) == IIR_BUSY) {
                if (ISSET(sc->sc_hwflags, COM_HW_CONSOLE)) {
                        (void)CSR_READ_1(regsp, COM_REG_USR);
                } else if ((CSR_READ_1(regsp, COM_REG_USR) & 0x1) != 0) {
                        CSR_WRITE_1(regsp, COM_REG_HALT, HALT_CHCFG_EN);
                        CSR_WRITE_1(regsp, COM_REG_LCR, sc->sc_lcr | LCR_DLAB);
                        CSR_WRITE_1(regsp, COM_REG_DLBL, sc->sc_dlbl);
                        CSR_WRITE_1(regsp, COM_REG_DLBH, sc->sc_dlbh);
                        CSR_WRITE_1(regsp, COM_REG_LCR, sc->sc_lcr);
                        CSR_WRITE_1(regsp, COM_REG_HALT,
                            HALT_CHCFG_EN | HALT_CHCFG_UD);
                        for (int timeout = 10000000;
                            (CSR_READ_1(regsp, COM_REG_HALT) & HALT_CHCFG_UD) != 0;
                            timeout--) {
                                if (timeout <= 0) {
                                        aprint_error_dev(sc->sc_dev,
                                            "timeout while waiting for HALT "
                                            "update acknowledge 0x%x 0x%x\n",
                                            CSR_READ_1(regsp, COM_REG_HALT),
                                            CSR_READ_1(regsp, COM_REG_USR));
                                        break;
                                }
                        }
                        CSR_WRITE_1(regsp, COM_REG_HALT, 0);
                        (void)CSR_READ_1(regsp, COM_REG_USR);
                } else {
                        CSR_WRITE_1(regsp, COM_REG_LCR, sc->sc_lcr | LCR_DLAB);
                        CSR_WRITE_1(regsp, COM_REG_DLBL, sc->sc_dlbl);
                        CSR_WRITE_1(regsp, COM_REG_DLBH, sc->sc_dlbh);
                        CSR_WRITE_1(regsp, COM_REG_LCR, sc->sc_lcr);
                }
        }

        end = sc->sc_ebuf;
        put = sc->sc_rbput;
        cc = sc->sc_rbavail;

        if (ISSET(iir, IIR_NOPEND)) {
                if (ISSET(sc->sc_hwflags, COM_HW_BROKEN_ETXRDY))
                        goto do_tx;
                mutex_spin_exit(&sc->sc_lock);
                return (0);
        }

again:        do {
                u_char        msr, delta;

                lsr = CSR_READ_1(regsp, COM_REG_LSR);
                if (ISSET(lsr, LSR_BI)) {
                        int cn_trapped = 0; /* see above: cn_trap() */

                        cn_check_magic(sc->sc_tty->t_dev,
                                       CNC_BREAK, com_cnm_state);
                        if (cn_trapped)
                                continue;
#if defined(KGDB) && !defined(DDB)
                        if (ISSET(sc->sc_hwflags, COM_HW_KGDB)) {
                                kgdb_connect(1);
                                continue;
                        }
#endif
                }

                if (sc->sc_type == COM_TYPE_BCMAUXUART && ISSET(iir, IIR_RXRDY))
                        lsr |= LSR_RXRDY;

                if (ISSET(lsr, LSR_RCV_MASK) &&
                    !ISSET(sc->sc_rx_flags, RX_IBUF_OVERFLOWED)) {
                        while (cc > 0) {
                                int cn_trapped = 0;
                                put[0] = CSR_READ_1(regsp, COM_REG_RXDATA);
                                put[1] = lsr;
                                cn_check_magic(sc->sc_tty->t_dev,
                                               put[0], com_cnm_state);
                                if (cn_trapped)
                                        goto next;
                                put += 2;
                                if (put >= end)
                                        put = sc->sc_rbuf;
                                cc--;
                        next:
                                lsr = CSR_READ_1(regsp, COM_REG_LSR);
                                if (!ISSET(lsr, LSR_RCV_MASK))
                                        break;
                        }

                        /*
                         * Current string of incoming characters ended because
                         * no more data was available or we ran out of space.
                         * Schedule a receive event if any data was received.
                         * If we're out of space, turn off receive interrupts.
                         */
                        sc->sc_rbput = put;
                        sc->sc_rbavail = cc;
                        if (!ISSET(sc->sc_rx_flags, RX_TTY_OVERFLOWED))
                                sc->sc_rx_ready = 1;

                        /*
                         * See if we are in danger of overflowing a buffer. If
                         * so, use hardware flow control to ease the pressure.
                         */
                        if (!ISSET(sc->sc_rx_flags, RX_IBUF_BLOCKED) &&
                            cc < sc->sc_r_hiwat) {
                                SET(sc->sc_rx_flags, RX_IBUF_BLOCKED);
                                com_hwiflow(sc);
                        }

                        /*
                         * If we're out of space, disable receive interrupts
                         * until the queue has drained a bit.
                         */
                        if (!cc) {
                                SET(sc->sc_rx_flags, RX_IBUF_OVERFLOWED);
                                switch (sc->sc_type) {
                                case COM_TYPE_PXA2x0:
                                        CLR(sc->sc_ier, IER_ERXRDY|IER_ERXTOUT);
                                        break;
                                case COM_TYPE_INGENIC:
                                case COM_TYPE_TEGRA:
                                        CLR(sc->sc_ier,
                                            IER_ERXRDY | IER_ERXTOUT);
                                        break;
                                default:
                                        CLR(sc->sc_ier, IER_ERXRDY);
                                        break;
                                }
                                CSR_WRITE_1(regsp, COM_REG_IER, sc->sc_ier);
                        }
                } else {
                        if ((iir & (IIR_RXRDY|IIR_TXRDY)) == IIR_RXRDY) {
                                (void) CSR_READ_1(regsp, COM_REG_RXDATA);
                                continue;
                        }
                }

                msr = CSR_READ_1(regsp, COM_REG_MSR);
                delta = msr ^ sc->sc_msr;
                sc->sc_msr = msr;
                if ((sc->sc_pps_state.ppsparam.mode & PPS_CAPTUREBOTH) &&
                    (delta & MSR_DCD)) {
                        mutex_spin_enter(&timecounter_lock);
                        pps_capture(&sc->sc_pps_state);
                        pps_event(&sc->sc_pps_state,
                            (msr & MSR_DCD) ?
                            PPS_CAPTUREASSERT :
                            PPS_CAPTURECLEAR);
                        mutex_spin_exit(&timecounter_lock);
                }

                /*
                 * Process normal status changes
                 */
                if (ISSET(delta, sc->sc_msr_mask)) {
                        SET(sc->sc_msr_delta, delta);

                        /*
                         * Stop output immediately if we lose the output
                         * flow control signal or carrier detect.
                         */
                        if (ISSET(~msr, sc->sc_msr_mask)) {
                                sc->sc_tbc = 0;
                                sc->sc_heldtbc = 0;
#ifdef COM_DEBUG
                                if (com_debug)
                                        comstatus(sc, "comintr  ");
#endif
                        }

                        sc->sc_st_check = 1;
                }
        } while (!ISSET((iir =
            CSR_READ_1(regsp, COM_REG_IIR)), IIR_NOPEND) &&
            /*
             * Since some device (e.g., ST16C1550) doesn't clear IIR_TXRDY
             * by IIR read, so we can't do this way: `process all interrupts,
             * then do TX if possible'.
             */
            (iir & IIR_IMASK) != IIR_TXRDY);

do_tx:
        /*
         * Read LSR again, since there may be an interrupt between
         * the last LSR read and IIR read above.
         */
        lsr = CSR_READ_1(regsp, COM_REG_LSR);

        /*
         * See if data can be transmitted as well.
         * Schedule tx done event if no data left
         * and tty was marked busy.
         */
        if (ISSET(lsr, LSR_TXRDY)) {
                /*
                 * If we've delayed a parameter change, do it now, and restart
                 * output.
                 */
                if (sc->sc_heldchange) {
                        com_loadchannelregs(sc);
                        sc->sc_heldchange = 0;
                        sc->sc_tbc = sc->sc_heldtbc;
                        sc->sc_heldtbc = 0;
                }

                /* Output the next chunk of the contiguous buffer, if any. */
                if (sc->sc_tbc > 0) {
                        u_int n;

                        n = sc->sc_tbc;
                        if (n > sc->sc_fifolen)
                                n = sc->sc_fifolen;
                        CSR_WRITE_MULTI(regsp, COM_REG_TXDATA, sc->sc_tba, n);
                        sc->sc_tbc -= n;
                        sc->sc_tba += n;
                } else {
                        /* Disable transmit completion interrupts if necessary. */
                        if (ISSET(sc->sc_ier, IER_ETXRDY)) {
                                CLR(sc->sc_ier, IER_ETXRDY);
                                CSR_WRITE_1(regsp, COM_REG_IER, sc->sc_ier);
                        }
                        if (sc->sc_tx_busy) {
                                sc->sc_tx_busy = 0;
                                sc->sc_tx_done = 1;
                        }
                }
        }

        if (!ISSET((iir = CSR_READ_1(regsp, COM_REG_IIR)), IIR_NOPEND))
                goto again;

        mutex_spin_exit(&sc->sc_lock);

        /* Wake up the poller. */
        if ((sc->sc_rx_ready | sc->sc_st_check | sc->sc_tx_done) != 0)
                softint_schedule(sc->sc_si);

#ifdef RND_COM
        rnd_add_uint32(&sc->rnd_source, iir | lsr);
#endif

        return (1);
}

/*
 * The following functions are polled getc and putc routines, shared
 * by the console and kgdb glue.
 *
 * The read-ahead code is so that you can detect pending in-band
 * cn_magic in polled mode while doing output rather than having to
 * wait until the kernel decides it needs input.
 */

#define MAX_READAHEAD        20
static int com_readahead[MAX_READAHEAD];
static int com_readaheadcount = 0;

int
com_common_getc(dev_t dev, struct com_regs *regsp)
{
        int s = splserial();
        u_char stat, c;

        /* got a character from reading things earlier */
        if (com_readaheadcount > 0) {
                int i;

                c = com_readahead[0];
                for (i = 1; i < com_readaheadcount; i++) {
                        com_readahead[i-1] = com_readahead[i];
                }
                com_readaheadcount--;
                splx(s);
                return (c);
        }

        /* don't block until a character becomes available */
        if (!ISSET(stat = CSR_READ_1(regsp, COM_REG_LSR), LSR_RXRDY)) {
                splx(s);
                return -1;
        }

        c = CSR_READ_1(regsp, COM_REG_RXDATA);
        stat = CSR_READ_1(regsp, COM_REG_IIR);
        {
                int cn_trapped = 0;        /* required by cn_trap, see above */
#ifdef DDB
                extern int db_active;
                if (!db_active)
#endif
                        cn_check_magic(dev, c, com_cnm_state);
        }
        splx(s);
        return (c);
}

static void
com_common_putc(dev_t dev, struct com_regs *regsp, int c, int with_readahead)
{
        int s = splserial();
        int cin, stat, timo;

        if (with_readahead && com_readaheadcount < MAX_READAHEAD
             && ISSET(stat = CSR_READ_1(regsp, COM_REG_LSR), LSR_RXRDY)) {
                int cn_trapped = 0;
                cin = CSR_READ_1(regsp, COM_REG_RXDATA);
                stat = CSR_READ_1(regsp, COM_REG_IIR);
                cn_check_magic(dev, cin, com_cnm_state);
                com_readahead[com_readaheadcount++] = cin;
        }

        /* wait for any pending transmission to finish */
        timo = 150000;
        while (!ISSET(CSR_READ_1(regsp, COM_REG_LSR), LSR_TXRDY) && --timo)
                continue;

        CSR_WRITE_1(regsp, COM_REG_TXDATA, c);
        COM_BARRIER(regsp, BR | BW);

        splx(s);
}

/*
 * Initialize UART for use as console or KGDB line.
 */
int
cominit(struct com_regs *regsp, int rate, int frequency, int type,
    tcflag_t cflag)
{

        if (bus_space_map(regsp->cr_iot, regsp->cr_iobase, regsp->cr_nports, 0,
                &regsp->cr_ioh))
                return (ENOMEM); /* ??? */

        if (type == COM_TYPE_OMAP) {
                /* disable before changing settings */
                CSR_WRITE_1(regsp, COM_REG_MDR1, MDR1_MODE_DISABLE);
        }

        rate = comspeed(rate, frequency, type);
        if (rate != -1) {
                if (type == COM_TYPE_AU1x00) {
                        /* no EFR on alchemy */
                        CSR_WRITE_2(regsp, COM_REG_DLBL, rate);
                } else {
                        if ((type != COM_TYPE_16550_NOERS) && 
                            (type != COM_TYPE_INGENIC)) {
                                CSR_WRITE_1(regsp, COM_REG_LCR, LCR_EERS);
                                CSR_WRITE_1(regsp, COM_REG_EFR, 0);
                        }
                        CSR_WRITE_1(regsp, COM_REG_LCR, LCR_DLAB);
                        CSR_WRITE_1(regsp, COM_REG_DLBL, rate & 0xff);
                        CSR_WRITE_1(regsp, COM_REG_DLBH, rate >> 8);
                }
        }
        CSR_WRITE_1(regsp, COM_REG_LCR, cflag2lcr(cflag));
        CSR_WRITE_1(regsp, COM_REG_MCR, MCR_DTR | MCR_RTS);

        if (type == COM_TYPE_INGENIC) {
                CSR_WRITE_1(regsp, COM_REG_FIFO,
                    FIFO_ENABLE | FIFO_RCV_RST | FIFO_XMT_RST |
                    FIFO_TRIGGER_1 | FIFO_UART_ON);
        } else {
                CSR_WRITE_1(regsp, COM_REG_FIFO,
                    FIFO_ENABLE | FIFO_RCV_RST | FIFO_XMT_RST |
                    FIFO_TRIGGER_1);
        }

        if (type == COM_TYPE_OMAP) {
                /* setup the fifos.  the FCR value is not used as long
                   as SCR[6] and SCR[7] are 0, which they are at reset
                   and we never touch the SCR register */
                uint8_t rx_fifo_trig = 40;
                uint8_t tx_fifo_trig = 60;
                uint8_t rx_start = 8;
                uint8_t rx_halt = 60;
                uint8_t tlr_value = ((rx_fifo_trig>>2) << 4) | (tx_fifo_trig>>2);
                uint8_t tcr_value = ((rx_start>>2) << 4) | (rx_halt>>2);

                /* enable access to TCR & TLR */
                CSR_WRITE_1(regsp, COM_REG_MCR, MCR_DTR | MCR_RTS | MCR_TCR_TLR);

                /* write tcr and tlr values */
                CSR_WRITE_1(regsp, COM_REG_TLR, tlr_value);
                CSR_WRITE_1(regsp, COM_REG_TCR, tcr_value);

                /* disable access to TCR & TLR */
                CSR_WRITE_1(regsp, COM_REG_MCR, MCR_DTR | MCR_RTS);

                /* enable again, but mode is based on speed */
                if (rate > 230400) {
                        CSR_WRITE_1(regsp, COM_REG_MDR1, MDR1_MODE_UART_13X);
                } else {
                        CSR_WRITE_1(regsp, COM_REG_MDR1, MDR1_MODE_UART_16X);
                }
        }

        if (type == COM_TYPE_PXA2x0)
                CSR_WRITE_1(regsp, COM_REG_IER, IER_EUART);
        else
                CSR_WRITE_1(regsp, COM_REG_IER, 0);

        return (0);
}

int
comcnattach1(struct com_regs *regsp, int rate, int frequency, int type,
    tcflag_t cflag)
{
        int res;

        comcons_info.regs = *regsp;

        res = cominit(&comcons_info.regs, rate, frequency, type, cflag);
        if (res)
                return (res);

        cn_tab = &comcons;
        cn_init_magic(&com_cnm_state);
        cn_set_magic("\047\001"); /* default magic is BREAK */

        comcons_info.frequency = frequency;
        comcons_info.type = type;
        comcons_info.rate = rate;
        comcons_info.cflag = cflag;

        return (0);
}

int
comcnattach(bus_space_tag_t iot, bus_addr_t iobase, int rate, int frequency,
    int type, tcflag_t cflag)
{
        struct com_regs        regs;

        /*XXX*/
        bus_space_handle_t dummy_bsh;
        memset(&dummy_bsh, 0, sizeof(dummy_bsh));

        /*
         * dummy_bsh required because com_init_regs() wants it.  A
         * real bus_space_handle will be filled in by cominit() later.
         * XXXJRT Detangle this mess eventually, plz.
         */
        com_init_regs(&regs, iot, dummy_bsh/*XXX*/, iobase);

        return comcnattach1(&regs, rate, frequency, type, cflag);
}

static int
comcnreattach(void)
{
        return comcnattach1(&comcons_info.regs, comcons_info.rate,
            comcons_info.frequency, comcons_info.type, comcons_info.cflag);
}

int
comcngetc(dev_t dev)
{

        return (com_common_getc(dev, &comcons_info.regs));
}

/*
 * Console kernel output character routine.
 */
void
comcnputc(dev_t dev, int c)
{

        com_common_putc(dev, &comcons_info.regs, c, cold);
}

void
comcnpollc(dev_t dev, int on)
{

        com_readaheadcount = 0;
}

#ifdef KGDB
int
com_kgdb_attach1(struct com_regs *regsp, int rate, int frequency, int type,
    tcflag_t cflag)
{
        int res;

        if (bus_space_is_equal(regsp->cr_iot, comcons_info.regs.cr_iot) &&
            regsp->cr_iobase == comcons_info.regs.cr_iobase) {
#if !defined(DDB)
                return (EBUSY); /* cannot share with console */
#else
                comkgdbregs = *regsp;
                comkgdbregs.cr_ioh = comcons_info.regs.cr_ioh;
#endif
        } else {
                comkgdbregs = *regsp;
                res = cominit(&comkgdbregs, rate, frequency, type, cflag);
                if (res)
                        return (res);

                /*
                 * XXXfvdl this shouldn't be needed, but the cn_magic goo
                 * expects this to be initialized
                 */
                cn_init_magic(&com_cnm_state);
                cn_set_magic("\047\001");
        }

        kgdb_attach(com_kgdb_getc, com_kgdb_putc, NULL);
        kgdb_dev = 123; /* unneeded, only to satisfy some tests */

        return (0);
}

int
com_kgdb_attach(bus_space_tag_t iot, bus_addr_t iobase, int rate,
    int frequency, int type, tcflag_t cflag)
{
        struct com_regs regs;

        com_init_regs(&regs, iot, (bus_space_handle_t)0/*XXX*/, iobase);

        return com_kgdb_attach1(&regs, rate, frequency, type, cflag);
}

/* ARGSUSED */
int
com_kgdb_getc(void *arg)
{

        return (com_common_getc(NODEV, &comkgdbregs));
}

/* ARGSUSED */
void
com_kgdb_putc(void *arg, int c)
{

        com_common_putc(NODEV, &comkgdbregs, c, 0);
}
#endif /* KGDB */

/* helper function to identify the com ports used by
 console or KGDB (and not yet autoconf attached) */
int
com_is_console(bus_space_tag_t iot, bus_addr_t iobase, bus_space_handle_t *ioh)
{
        bus_space_handle_t help;

        if (!comconsattached &&
            bus_space_is_equal(iot, comcons_info.regs.cr_iot) &&
            iobase == comcons_info.regs.cr_iobase)
                help = comcons_info.regs.cr_ioh;
#ifdef KGDB
        else if (!com_kgdb_attached &&
            bus_space_is_equal(iot, comkgdbregs.cr_iot) &&
            iobase == comkgdbregs.cr_iobase)
                help = comkgdbregs.cr_ioh;
#endif
        else
                return (0);

        if (ioh)
                *ioh = help;
        return (1);
}

/*
 * this routine exists to serve as a shutdown hook for systems that
 * have firmware which doesn't interact properly with a com device in
 * FIFO mode.
 */
bool
com_cleanup(device_t self, int how)
{
        struct com_softc *sc = device_private(self);

        if (ISSET(sc->sc_hwflags, COM_HW_FIFO))
                CSR_WRITE_1(&sc->sc_regs, COM_REG_FIFO, 0);

        return true;
}

bool
com_suspend(device_t self, const pmf_qual_t *qual)
{
        struct com_softc *sc = device_private(self);

#if 0
        if (ISSET(sc->sc_hwflags, COM_HW_CONSOLE) && cn_tab == &comcons)
                cn_tab = &comcons_suspend;
#endif

        CSR_WRITE_1(&sc->sc_regs, COM_REG_IER, 0);
        (void)CSR_READ_1(&sc->sc_regs, COM_REG_IIR);

        return true;
}

bool
com_resume(device_t self, const pmf_qual_t *qual)
{
        struct com_softc *sc = device_private(self);

        mutex_spin_enter(&sc->sc_lock);
        com_loadchannelregs(sc);
        mutex_spin_exit(&sc->sc_lock);

        return true;
}














































































   36 





















   36 


   37 


   37 

   37 













   37 















    1 





    1 

















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
/*        $NetBSD: subr_pcq.c,v 1.13 2021/02/08 09:31:05 wiz Exp $        */

/*-
 * Copyright (c) 2009, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Lockless producer/consumer queue.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_pcq.c,v 1.13 2021/02/08 09:31:05 wiz Exp $");

#include <sys/param.h>
#include <sys/types.h>
#include <sys/atomic.h>
#include <sys/kmem.h>

#include <sys/pcq.h>

/*
 * Internal producer-consumer queue structure.  Note: providing a separate
 * cache-line both for pcq_t::pcq_pc and pcq_t::pcq_items.
 */
struct pcq {
        u_int                        pcq_nitems;
        uint8_t                        pcq_pad1[COHERENCY_UNIT - sizeof(u_int)];
        volatile uint32_t        pcq_pc;
        uint8_t                        pcq_pad2[COHERENCY_UNIT - sizeof(uint32_t)];
        void * volatile                pcq_items[];
};

/*
 * Producer (p) - stored in the lower 16 bits of pcq_t::pcq_pc.
 * Consumer (c) - in the higher 16 bits.
 *
 * We have a limitation of 16 bits i.e. 0xffff items in the queue.
 * The PCQ_MAXLEN constant is set accordingly.
 */

static inline void
pcq_split(uint32_t v, u_int *p, u_int *c)
{

        *p = v & 0xffff;
        *c = v >> 16;
}

static inline uint32_t
pcq_combine(u_int p, u_int c)
{

        return p | (c << 16);
}

static inline u_int
pcq_advance(pcq_t *pcq, u_int pc)
{

        if (__predict_false(++pc == pcq->pcq_nitems)) {
                return 0;
        }
        return pc;
}

/*
 * pcq_put: place an item at the end of the queue.
 */
bool
pcq_put(pcq_t *pcq, void *item)
{
        uint32_t v, nv;
        u_int op, p, c;

        KASSERT(item != NULL);

        do {
                v = pcq->pcq_pc;
                pcq_split(v, &op, &c);
                p = pcq_advance(pcq, op);
                if (p == c) {
                        /* Queue is full. */
                        return false;
                }
                nv = pcq_combine(p, c);
        } while (atomic_cas_32(&pcq->pcq_pc, v, nv) != v);

        /*
         * Ensure that the update to pcq_pc is globally visible before the
         * data item.  See pcq_get().  This also ensures that any changes
         * that the caller made to the data item are globally visible
         * before we put it onto the list.
         */
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_producer();
#endif
        pcq->pcq_items[op] = item;

        /*
         * Synchronization activity to wake up the consumer will ensure
         * that the update to pcq_items[] is visible before the wakeup
         * arrives.  So, we do not need an additional memory barrier here.
         */
        return true;
}

/*
 * pcq_peek: return the next item from the queue without removal.
 */
void *
pcq_peek(pcq_t *pcq)
{
        const uint32_t v = pcq->pcq_pc;
        u_int p, c;

        pcq_split(v, &p, &c);

        /* See comment on race below in pcq_get(). */
        return (p == c) ? NULL :
            (membar_datadep_consumer(), pcq->pcq_items[c]);
}

/*
 * pcq_get: remove and return the next item for consumption or NULL if empty.
 *
 * => The caller must prevent concurrent gets from occurring.
 */
void *
pcq_get(pcq_t *pcq)
{
        uint32_t v, nv;
        u_int p, c;
        void *item;

        v = pcq->pcq_pc;
        pcq_split(v, &p, &c);
        if (p == c) {
                /* Queue is empty: nothing to return. */
                return NULL;
        }
        /* Make sure we read pcq->pcq_pc before pcq->pcq_items[c].  */
        membar_datadep_consumer();
        item = pcq->pcq_items[c];
        if (item == NULL) {
                /*
                 * Raced with sender: we rely on a notification (e.g. softint
                 * or wakeup) being generated after the producer's pcq_put(),
                 * causing us to retry pcq_get() later.
                 */
                return NULL;
        }
        pcq->pcq_items[c] = NULL;
        c = pcq_advance(pcq, c);
        nv = pcq_combine(p, c);

        /*
         * Ensure that update to pcq_items[] becomes globally visible
         * before the update to pcq_pc.  If it were reordered to occur
         * after it, we could in theory wipe out a modification made
         * to pcq_items[] by pcq_put().
         */
#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_producer();
#endif
        while (__predict_false(atomic_cas_32(&pcq->pcq_pc, v, nv) != v)) {
                v = pcq->pcq_pc;
                pcq_split(v, &p, &c);
                c = pcq_advance(pcq, c);
                nv = pcq_combine(p, c);
        }
        return item;
}

pcq_t *
pcq_create(size_t nitems, km_flag_t kmflags)
{
        pcq_t *pcq;

        KASSERT(nitems > 0 && nitems <= PCQ_MAXLEN);

        pcq = kmem_zalloc(offsetof(pcq_t, pcq_items[nitems]), kmflags);
        if (pcq != NULL) {
                pcq->pcq_nitems = nitems;
        }
        return pcq;
}

void
pcq_destroy(pcq_t *pcq)
{

        kmem_free(pcq, offsetof(pcq_t, pcq_items[pcq->pcq_nitems]));
}

size_t
pcq_maxitems(pcq_t *pcq)
{

        return pcq->pcq_nitems;
}































































































































  739 









  733 
  732 




   10 
   10 




   10 
  739 




























































































































































































   10 
















   10 



















   10 

   10 






   10 
    1 

   10 



   10 
























   10 
    1 
   10 


   10 

   10 
   10 


   10 





   10 
   10 






   10 
   10 

   10 





   10 







   10 

   10 







    1 










    1 

















































































































































































































































































































































































































































   10 






















































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
/*        $NetBSD: ufs_dirhash.c,v 1.41 2022/08/07 02:33:47 simonb Exp $        */

/*
 * Copyright (c) 2001, 2002 Ian Dowse.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD: src/sys/ufs/ufs/ufs_dirhash.c,v 1.3.2.8 2004/12/08 11:54:13 dwmalone Exp $
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ufs_dirhash.c,v 1.41 2022/08/07 02:33:47 simonb Exp $");

/*
 * This implements a hash-based lookup scheme for UFS directories.
 */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/types.h>
#include <sys/hash.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/pool.h>
#include <sys/sysctl.h>
#include <sys/atomic.h>

#include <ufs/ufs/inode.h>
#include <ufs/ufs/dir.h>
#include <ufs/ufs/dirhash.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_extern.h>


/*
 * Defaults for dirhash cache sizes:
 *  - use up to 1/64th of system memory.
 *  - disable dirhash (set the cache size to 0 bytes) if the
 *    calculated value of hash is less than 2MB.
 *  - cap maximum size of the dirhash cache at 32MB.
 */
#define        DIRHASH_DEFAULT_DIVIDER        64
#define        MIN_DEFAULT_DIRHASH_MEM        (2 * 1024 * 1024)
#define        MAX_DEFAULT_DIRHASH_MEM        (32 * 1024 * 1024)


#define WRAPINCR(val, limit)        (((val) + 1 == (limit)) ? 0 : ((val) + 1))
#define WRAPDECR(val, limit)        (((val) == 0) ? ((limit) - 1) : ((val) - 1))
#define OFSFMT(ip)                ((ip)->i_ump->um_maxsymlinklen <= 0)
#define BLKFREE2IDX(n)                ((n) > DH_NFSTATS ? DH_NFSTATS : (n))

static u_int ufs_dirhashminblks = 5;
static u_int ufs_dirhashmaxmem = 0;
static u_int ufs_dirhashmem;
static u_int ufs_dirhashcheck = 0;

static int ufsdirhash_hash(struct dirhash *dh, const char *name, int namelen);
static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff,
           int dirblksiz);
static void ufsdirhash_delslot(struct dirhash *dh, int slot);
static int ufsdirhash_findslot(struct dirhash *dh, const char *name,
           int namelen, doff_t offset);
static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset,
           int dirblksiz);
static int ufsdirhash_recycle(int wanted);

static pool_cache_t ufsdirhashblk_cache;
static pool_cache_t ufsdirhash_cache;

#define DIRHASHLIST_LOCK()                mutex_enter(&ufsdirhash_lock)
#define DIRHASHLIST_UNLOCK()                mutex_exit(&ufsdirhash_lock)
#define DIRHASH_LOCK(dh)                mutex_enter(&(dh)->dh_lock)
#define DIRHASH_UNLOCK(dh)                mutex_exit(&(dh)->dh_lock)
#define DIRHASH_BLKALLOC()                \
    pool_cache_get(ufsdirhashblk_cache, PR_NOWAIT)
#define DIRHASH_BLKFREE(ptr)                \
    pool_cache_put(ufsdirhashblk_cache, ptr)

/* Dirhash list; recently-used entries are near the tail. */
static TAILQ_HEAD(, dirhash) ufsdirhash_list;

/* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */
static kmutex_t ufsdirhash_lock;

/*
 * Locking order:
 *        ufsdirhash_lock
 *        dh_lock
 *
 * The dh_lock mutex should be acquired either via the inode lock, or via
 * ufsdirhash_lock. Only the owner of the inode may free the associated
 * dirhash, but anything can steal its memory and set dh_hash to NULL.
 */

/*
 * Attempt to build up a hash table for the directory contents in
 * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
 */
int
ufsdirhash_build(struct inode *ip)
{
        struct dirhash *dh;
        struct buf *bp = NULL;
        struct direct *ep;
        struct vnode *vp;
        doff_t bmask, pos;
        int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot;
        const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
        int dirblksiz = ip->i_ump->um_dirblksiz;

        /* Check if we can/should use dirhash. */
        if (ip->i_dirhash == NULL) {
                if (ufs_dirhashmaxmem == 0 ||
                    ip->i_size < (ufs_dirhashminblks * dirblksiz) ||
                    OFSFMT(ip))
                        return (-1);
        } else {
                /* Hash exists, but sysctls could have changed. */
                if (ip->i_size < (ufs_dirhashminblks * dirblksiz) ||
                    ufs_dirhashmem > ufs_dirhashmaxmem) {
                        ufsdirhash_free(ip);
                        return (-1);
                }
                /* Check if hash exists and is intact (note: unlocked read). */
                if (ip->i_dirhash->dh_hash != NULL)
                        return (0);
                /* Free the old, recycled hash and build a new one. */
                ufsdirhash_free(ip);
        }

        /* Don't hash removed directories. */
        if (ip->i_nlink == 0)
                return (-1);

        vp = ip->i_vnode;
        /* Allocate 50% more entries than this dir size could ever need. */
        KASSERT(ip->i_size >= dirblksiz);
        nslots = ip->i_size / UFS_DIRECTSIZ(1);
        nslots = (nslots * 3 + 1) / 2;
        narrays = howmany(nslots, DH_NBLKOFF);
        nslots = narrays * DH_NBLKOFF;
        dirblocks = howmany(ip->i_size, dirblksiz);
        nblocks = (dirblocks * 3 + 1) / 2;

        memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) +
            narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
            nblocks * sizeof(*dh->dh_blkfree);

        while (atomic_add_int_nv(&ufs_dirhashmem, memreqd) >
            ufs_dirhashmaxmem) {
                atomic_add_int(&ufs_dirhashmem, -memreqd);
                if (memreqd > ufs_dirhashmaxmem / 2)
                        return (-1);
                /* Try to free some space. */
                if (ufsdirhash_recycle(memreqd) != 0)
                        return (-1);
                else
                            DIRHASHLIST_UNLOCK();
        }

        /*
         * Use non-blocking mallocs so that we will revert to a linear
         * lookup on failure rather than potentially blocking forever.
         */
        dh = pool_cache_get(ufsdirhash_cache, PR_NOWAIT);
        if (dh == NULL) {
                atomic_add_int(&ufs_dirhashmem, -memreqd);
                return (-1);
        }
        memset(dh, 0, sizeof(*dh));
        mutex_init(&dh->dh_lock, MUTEX_DEFAULT, IPL_NONE);
        DIRHASH_LOCK(dh);
        dh->dh_hashsz = narrays * sizeof(dh->dh_hash[0]);
        dh->dh_hash = kmem_zalloc(dh->dh_hashsz, KM_NOSLEEP);
        dh->dh_blkfreesz = nblocks * sizeof(dh->dh_blkfree[0]);
        dh->dh_blkfree = kmem_zalloc(dh->dh_blkfreesz, KM_NOSLEEP);
        if (dh->dh_hash == NULL || dh->dh_blkfree == NULL)
                goto fail;
        for (i = 0; i < narrays; i++) {
                if ((dh->dh_hash[i] = DIRHASH_BLKALLOC()) == NULL)
                        goto fail;
                for (j = 0; j < DH_NBLKOFF; j++)
                        dh->dh_hash[i][j] = DIRHASH_EMPTY;
        }

        /* Initialise the hash table and block statistics. */
        dh->dh_narrays = narrays;
        dh->dh_hlen = nslots;
        dh->dh_nblk = nblocks;
        dh->dh_dirblks = dirblocks;
        for (i = 0; i < dirblocks; i++)
                dh->dh_blkfree[i] = dirblksiz / DIRALIGN;
        for (i = 0; i < DH_NFSTATS; i++)
                dh->dh_firstfree[i] = -1;
        dh->dh_firstfree[DH_NFSTATS] = 0;
        dh->dh_seqopt = 0;
        dh->dh_seqoff = 0;
        dh->dh_score = DH_SCOREINIT;
        ip->i_dirhash = dh;

        bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
        pos = 0;
        while (pos < ip->i_size) {
                preempt_point();

                /* If necessary, get the next directory block. */
                if ((pos & bmask) == 0) {
                        if (bp != NULL)
                                brelse(bp, 0);
                        if (ufs_blkatoff(vp, (off_t)pos, NULL, &bp, false) != 0)
                                goto fail;
                }

                /* Add this entry to the hash. */
                ep = (struct direct *)((char *)bp->b_data + (pos & bmask));
                if (ep->d_reclen == 0 || ep->d_reclen >
                    dirblksiz - (pos & (dirblksiz - 1))) {
                        /* Corrupted directory. */
                        brelse(bp, 0);
                        goto fail;
                }
                if (ep->d_ino != 0) {
                        /* Add the entry (simplified ufsdirhash_add). */
                        slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen);
                        while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
                                slot = WRAPINCR(slot, dh->dh_hlen);
                        dh->dh_hused++;
                        DH_ENTRY(dh, slot) = pos;
                        ufsdirhash_adjfree(dh, pos, -UFS_DIRSIZ(0, ep, needswap),
                            dirblksiz);
                }
                pos += ep->d_reclen;
        }

        if (bp != NULL)
                brelse(bp, 0);
        DIRHASHLIST_LOCK();
        TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list);
        dh->dh_onlist = 1;
        DIRHASH_UNLOCK(dh);
        DIRHASHLIST_UNLOCK();
        return (0);

fail:
        ip->i_dirhash = NULL;
        DIRHASH_UNLOCK(dh);
        if (dh->dh_hash != NULL) {
                for (i = 0; i < narrays; i++)
                        if (dh->dh_hash[i] != NULL)
                                DIRHASH_BLKFREE(dh->dh_hash[i]);
                kmem_free(dh->dh_hash, dh->dh_hashsz);
        }
        if (dh->dh_blkfree != NULL)
                kmem_free(dh->dh_blkfree, dh->dh_blkfreesz);
        mutex_destroy(&dh->dh_lock);
        pool_cache_put(ufsdirhash_cache, dh);
        atomic_add_int(&ufs_dirhashmem, -memreqd);
        return (-1);
}

/*
 * Free any hash table associated with inode 'ip'.
 */
void
ufsdirhash_free(struct inode *ip)
{
        struct dirhash *dh;
        int i, mem;

        if ((dh = ip->i_dirhash) == NULL)
                return;

        ip->i_dirhash = NULL;

        DIRHASHLIST_LOCK();
        if (dh->dh_onlist)
                TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
        DIRHASHLIST_UNLOCK();

        /* The dirhash pointed to by 'dh' is exclusively ours now. */
        mem = sizeof(*dh);
        if (dh->dh_hash != NULL) {
                for (i = 0; i < dh->dh_narrays; i++)
                        DIRHASH_BLKFREE(dh->dh_hash[i]);
                kmem_free(dh->dh_hash, dh->dh_hashsz);
                kmem_free(dh->dh_blkfree, dh->dh_blkfreesz);
                mem += dh->dh_hashsz;
                mem += dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash);
                mem += dh->dh_nblk * sizeof(*dh->dh_blkfree);
        }
        mutex_destroy(&dh->dh_lock);
        pool_cache_put(ufsdirhash_cache, dh);

        atomic_add_int(&ufs_dirhashmem, -mem);
}

/*
 * Find the offset of the specified name within the given inode.
 * Returns 0 on success, ENOENT if the entry does not exist, or
 * EJUSTRETURN if the caller should revert to a linear search.
 *
 * If successful, the directory offset is stored in *offp, and a
 * pointer to a struct buf containing the entry is stored in *bpp. If
 * prevoffp is non-NULL, the offset of the previous entry within
 * the UFS_DIRBLKSIZ-sized block is stored in *prevoffp (if the entry
 * is the first in a block, the start of the block is used).
 */
int
ufsdirhash_lookup(struct inode *ip, const char *name, int namelen, doff_t *offp,
    struct buf **bpp, doff_t *prevoffp)
{
        struct dirhash *dh, *dh_next;
        struct direct *dp;
        struct vnode *vp;
        struct buf *bp;
        doff_t blkoff, bmask, offset, prevoff;
        int i, slot;
        const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
        int dirblksiz = ip->i_ump->um_dirblksiz;

        if ((dh = ip->i_dirhash) == NULL)
                return (EJUSTRETURN);

        /*
         * Move this dirhash towards the end of the list if it has a
         * score higher than the next entry, and acquire the dh_lock.
         * Optimise the case where it's already the last by performing
         * an unlocked read of the TAILQ_NEXT pointer.
         *
         * In both cases, end up holding just dh_lock.
         */
        if (TAILQ_NEXT(dh, dh_list) != NULL) {
                DIRHASHLIST_LOCK();
                DIRHASH_LOCK(dh);
                /*
                 * If the new score will be greater than that of the next
                 * entry, then move this entry past it. With both mutexes
                 * held, dh_next won't go away, but its dh_score could
                 * change; that's not important since it is just a hint.
                 */
                if (dh->dh_hash != NULL &&
                    (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL &&
                    dh->dh_score >= dh_next->dh_score) {
                        KASSERT(dh->dh_onlist);
                        TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
                        TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh,
                            dh_list);
                }
                DIRHASHLIST_UNLOCK();
        } else {
                /* Already the last, though that could change as we wait. */
                DIRHASH_LOCK(dh);
        }
        if (dh->dh_hash == NULL) {
                DIRHASH_UNLOCK(dh);
                ufsdirhash_free(ip);
                return (EJUSTRETURN);
        }

        /* Update the score. */
        if (dh->dh_score < DH_SCOREMAX)
                dh->dh_score++;

        vp = ip->i_vnode;
        bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
        blkoff = -1;
        bp = NULL;
restart:
        slot = ufsdirhash_hash(dh, name, namelen);

        if (dh->dh_seqopt) {
                /*
                 * Sequential access optimisation. dh_seqoff contains the
                 * offset of the directory entry immediately following
                 * the last entry that was looked up. Check if this offset
                 * appears in the hash chain for the name we are looking for.
                 */
                for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY;
                    i = WRAPINCR(i, dh->dh_hlen))
                        if (offset == dh->dh_seqoff)
                                break;
                if (offset == dh->dh_seqoff) {
                        /*
                         * We found an entry with the expected offset. This
                         * is probably the entry we want, but if not, the
                         * code below will turn off seqoff and retry.
                         */
                        slot = i;
                } else
                        dh->dh_seqopt = 0;
        }

        for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY;
            slot = WRAPINCR(slot, dh->dh_hlen)) {
                if (offset == DIRHASH_DEL)
                        continue;

                if (offset < 0 || offset >= ip->i_size)
                        panic("ufsdirhash_lookup: bad offset in hash array");
                if ((offset & ~bmask) != blkoff) {
                        if (bp != NULL)
                                brelse(bp, 0);
                        blkoff = offset & ~bmask;
                        if (ufs_blkatoff(vp, (off_t)blkoff,
                            NULL, &bp, false) != 0) {
                                DIRHASH_UNLOCK(dh);
                                return (EJUSTRETURN);
                        }
                }
                dp = (struct direct *)((char *)bp->b_data + (offset & bmask));
                if (dp->d_reclen == 0 || dp->d_reclen >
                    dirblksiz - (offset & (dirblksiz - 1))) {
                        /* Corrupted directory. */
                        DIRHASH_UNLOCK(dh);
                        brelse(bp, 0);
                        return (EJUSTRETURN);
                }
                if (dp->d_namlen == namelen &&
                    memcmp(dp->d_name, name, namelen) == 0) {
                        /* Found. Get the prev offset if needed. */
                        if (prevoffp != NULL) {
                                if (offset & (dirblksiz - 1)) {
                                        prevoff = ufsdirhash_getprev(dp,
                                            offset, dirblksiz);
                                        if (prevoff == -1) {
                                                brelse(bp, 0);
                                                return (EJUSTRETURN);
                                        }
                                } else
                                        prevoff = offset;
                                *prevoffp = prevoff;
                        }

                        /* Check for sequential access, and update offset. */
                        if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset)
                                dh->dh_seqopt = 1;
                        dh->dh_seqoff = offset + UFS_DIRSIZ(0, dp, needswap);
                        DIRHASH_UNLOCK(dh);

                        *bpp = bp;
                        *offp = offset;
                        return (0);
                }

                if (dh->dh_hash == NULL) {
                        DIRHASH_UNLOCK(dh);
                        if (bp != NULL)
                                brelse(bp, 0);
                        ufsdirhash_free(ip);
                        return (EJUSTRETURN);
                }
                /*
                 * When the name doesn't match in the seqopt case, go back
                 * and search normally.
                 */
                if (dh->dh_seqopt) {
                        dh->dh_seqopt = 0;
                        goto restart;
                }
        }
        DIRHASH_UNLOCK(dh);
        if (bp != NULL)
                brelse(bp, 0);
        return (ENOENT);
}

/*
 * Find a directory block with room for 'slotneeded' bytes. Returns
 * the offset of the directory entry that begins the free space.
 * This will either be the offset of an existing entry that has free
 * space at the end, or the offset of an entry with d_ino == 0 at
 * the start of a UFS_DIRBLKSIZ block.
 *
 * To use the space, the caller may need to compact existing entries in
 * the directory. The total number of bytes in all of the entries involved
 * in the compaction is stored in *slotsize. In other words, all of
 * the entries that must be compacted are exactly contained in the
 * region beginning at the returned offset and spanning *slotsize bytes.
 *
 * Returns -1 if no space was found, indicating that the directory
 * must be extended.
 */
doff_t
ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize)
{
        struct direct *dp;
        struct dirhash *dh;
        struct buf *bp;
        doff_t pos, slotstart;
        int dirblock, error, freebytes, i;
        const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
        int dirblksiz = ip->i_ump->um_dirblksiz;

        if ((dh = ip->i_dirhash) == NULL)
                return (-1);

        DIRHASH_LOCK(dh);
        if (dh->dh_hash == NULL) {
                DIRHASH_UNLOCK(dh);
                ufsdirhash_free(ip);
                return (-1);
        }

        /* Find a directory block with the desired free space. */
        dirblock = -1;
        for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++)
                if ((dirblock = dh->dh_firstfree[i]) != -1)
                        break;
        if (dirblock == -1) {
                DIRHASH_UNLOCK(dh);
                return (-1);
        }

        KASSERT(dirblock < dh->dh_nblk &&
            dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN));
        pos = dirblock * dirblksiz;
        error = ufs_blkatoff(ip->i_vnode, (off_t)pos, (void *)&dp, &bp, false);
        if (error) {
                DIRHASH_UNLOCK(dh);
                return (-1);
        }
        /* Find the first entry with free space. */
        for (i = 0; i < dirblksiz; ) {
                if (dp->d_reclen == 0) {
                        DIRHASH_UNLOCK(dh);
                        brelse(bp, 0);
                        return (-1);
                }
                if (dp->d_ino == 0 || dp->d_reclen > UFS_DIRSIZ(0, dp, needswap))
                        break;
                i += dp->d_reclen;
                dp = (struct direct *)((char *)dp + dp->d_reclen);
        }
        if (i > dirblksiz) {
                DIRHASH_UNLOCK(dh);
                brelse(bp, 0);
                return (-1);
        }
        slotstart = pos + i;

        /* Find the range of entries needed to get enough space */
        freebytes = 0;
        while (i < dirblksiz && freebytes < slotneeded) {
                freebytes += dp->d_reclen;
                if (dp->d_ino != 0)
                        freebytes -= UFS_DIRSIZ(0, dp, needswap);
                if (dp->d_reclen == 0) {
                        DIRHASH_UNLOCK(dh);
                        brelse(bp, 0);
                        return (-1);
                }
                i += dp->d_reclen;
                dp = (struct direct *)((char *)dp + dp->d_reclen);
        }
        if (i > dirblksiz) {
                DIRHASH_UNLOCK(dh);
                brelse(bp, 0);
                return (-1);
        }
        if (freebytes < slotneeded)
                panic("ufsdirhash_findfree: free mismatch");
        DIRHASH_UNLOCK(dh);
        brelse(bp, 0);
        *slotsize = pos + i - slotstart;
        return (slotstart);
}

/*
 * Return the start of the unused space at the end of a directory, or
 * -1 if there are no trailing unused blocks.
 */
doff_t
ufsdirhash_enduseful(struct inode *ip)
{
        struct dirhash *dh;
        int i;
        int dirblksiz = ip->i_ump->um_dirblksiz;

        if ((dh = ip->i_dirhash) == NULL)
                return (-1);

        DIRHASH_LOCK(dh);
        if (dh->dh_hash == NULL) {
                DIRHASH_UNLOCK(dh);
                ufsdirhash_free(ip);
                return (-1);
        }

        if (dh->dh_blkfree[dh->dh_dirblks - 1] != dirblksiz / DIRALIGN) {
                DIRHASH_UNLOCK(dh);
                return (-1);
        }

        for (i = dh->dh_dirblks - 1; i >= 0; i--)
                if (dh->dh_blkfree[i] != dirblksiz / DIRALIGN)
                        break;
        DIRHASH_UNLOCK(dh);
        return ((doff_t)(i + 1) * dirblksiz);
}

/*
 * Insert information into the hash about a new directory entry. dirp
 * points to a struct direct containing the entry, and offset specifies
 * the offset of this entry.
 */
void
ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset)
{
        struct dirhash *dh;
        int slot;
        const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
        int dirblksiz = ip->i_ump->um_dirblksiz;

        if ((dh = ip->i_dirhash) == NULL)
                return;

        DIRHASH_LOCK(dh);
        if (dh->dh_hash == NULL) {
                DIRHASH_UNLOCK(dh);
                ufsdirhash_free(ip);
                return;
        }

        KASSERT(offset < dh->dh_dirblks * dirblksiz);
        /*
         * Normal hash usage is < 66%. If the usage gets too high then
         * remove the hash entirely and let it be rebuilt later.
         */
        if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) {
                DIRHASH_UNLOCK(dh);
                ufsdirhash_free(ip);
                return;
        }

        /* Find a free hash slot (empty or deleted), and add the entry. */
        slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen);
        while (DH_ENTRY(dh, slot) >= 0)
                slot = WRAPINCR(slot, dh->dh_hlen);
        if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY)
                dh->dh_hused++;
        DH_ENTRY(dh, slot) = offset;

        /* Update the per-block summary info. */
        ufsdirhash_adjfree(dh, offset, -UFS_DIRSIZ(0, dirp, needswap), dirblksiz);
        DIRHASH_UNLOCK(dh);
}

/*
 * Remove the specified directory entry from the hash. The entry to remove
 * is defined by the name in `dirp', which must exist at the specified
 * `offset' within the directory.
 */
void
ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset)
{
        struct dirhash *dh;
        int slot;
        const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
        int dirblksiz = ip->i_ump->um_dirblksiz;

        if ((dh = ip->i_dirhash) == NULL)
                return;

        DIRHASH_LOCK(dh);
        if (dh->dh_hash == NULL) {
                DIRHASH_UNLOCK(dh);
                ufsdirhash_free(ip);
                return;
        }

        KASSERT(offset < dh->dh_dirblks * dirblksiz);
        /* Find the entry */
        slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset);

        /* Remove the hash entry. */
        ufsdirhash_delslot(dh, slot);

        /* Update the per-block summary info. */
        ufsdirhash_adjfree(dh, offset, UFS_DIRSIZ(0, dirp, needswap), dirblksiz);
        DIRHASH_UNLOCK(dh);
}

/*
 * Change the offset associated with a directory entry in the hash. Used
 * when compacting directory blocks.
 */
void
ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff,
    doff_t newoff)
{
        struct dirhash *dh;
        int slot;

        if ((dh = ip->i_dirhash) == NULL)
                return;
        DIRHASH_LOCK(dh);
        if (dh->dh_hash == NULL) {
                DIRHASH_UNLOCK(dh);
                ufsdirhash_free(ip);
                return;
        }

        KASSERT(oldoff < dh->dh_dirblks * ip->i_ump->um_dirblksiz &&
            newoff < dh->dh_dirblks * ip->i_ump->um_dirblksiz);
        /* Find the entry, and update the offset. */
        slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff);
        DH_ENTRY(dh, slot) = newoff;
        DIRHASH_UNLOCK(dh);
}

/*
 * Inform dirhash that the directory has grown by one block that
 * begins at offset (i.e. the new length is offset + UFS_DIRBLKSIZ).
 */
void
ufsdirhash_newblk(struct inode *ip, doff_t offset)
{
        struct dirhash *dh;
        int block;
        int dirblksiz = ip->i_ump->um_dirblksiz;

        if ((dh = ip->i_dirhash) == NULL)
                return;
        DIRHASH_LOCK(dh);
        if (dh->dh_hash == NULL) {
                DIRHASH_UNLOCK(dh);
                ufsdirhash_free(ip);
                return;
        }

        KASSERT(offset == dh->dh_dirblks * dirblksiz);
        block = offset / dirblksiz;
        if (block >= dh->dh_nblk) {
                /* Out of space; must rebuild. */
                DIRHASH_UNLOCK(dh);
                ufsdirhash_free(ip);
                return;
        }
        dh->dh_dirblks = block + 1;

        /* Account for the new free block. */
        dh->dh_blkfree[block] = dirblksiz / DIRALIGN;
        if (dh->dh_firstfree[DH_NFSTATS] == -1)
                dh->dh_firstfree[DH_NFSTATS] = block;
        DIRHASH_UNLOCK(dh);
}

/*
 * Inform dirhash that the directory is being truncated.
 */
void
ufsdirhash_dirtrunc(struct inode *ip, doff_t offset)
{
        struct dirhash *dh;
        int block, i;
        int dirblksiz = ip->i_ump->um_dirblksiz;

        if ((dh = ip->i_dirhash) == NULL)
                return;

        DIRHASH_LOCK(dh);
        if (dh->dh_hash == NULL) {
                DIRHASH_UNLOCK(dh);
                ufsdirhash_free(ip);
                return;
        }

        KASSERT(offset <= dh->dh_dirblks * dirblksiz);
        block = howmany(offset, dirblksiz);
        /*
         * If the directory shrinks to less than 1/8 of dh_nblk blocks
         * (about 20% of its original size due to the 50% extra added in
         * ufsdirhash_build) then free it, and let the caller rebuild
         * if necessary.
         */
        if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) {
                DIRHASH_UNLOCK(dh);
                ufsdirhash_free(ip);
                return;
        }

        /*
         * Remove any `first free' information pertaining to the
         * truncated blocks. All blocks we're removing should be
         * completely unused.
         */
        if (dh->dh_firstfree[DH_NFSTATS] >= block)
                dh->dh_firstfree[DH_NFSTATS] = -1;
        for (i = block; i < dh->dh_dirblks; i++)
                if (dh->dh_blkfree[i] != dirblksiz / DIRALIGN)
                        panic("ufsdirhash_dirtrunc: blocks in use");
        for (i = 0; i < DH_NFSTATS; i++)
                if (dh->dh_firstfree[i] >= block)
                        panic("ufsdirhash_dirtrunc: first free corrupt");
        dh->dh_dirblks = block;
        DIRHASH_UNLOCK(dh);
}

/*
 * Debugging function to check that the dirhash information about
 * a directory block matches its actual contents. Panics if a mismatch
 * is detected.
 *
 * On entry, `sbuf' should point to the start of an in-core
 * DIRBLKSIZ-sized directory block, and `offset' should contain the
 * offset from the start of the directory of that block.
 */
void
ufsdirhash_checkblock(struct inode *ip, char *sbuf, doff_t offset)
{
        struct dirhash *dh;
        struct direct *dp;
        int block, ffslot, i, nfree;
        const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
        int dirblksiz = ip->i_ump->um_dirblksiz;

        if (!ufs_dirhashcheck)
                return;
        if ((dh = ip->i_dirhash) == NULL)
                return;

        DIRHASH_LOCK(dh);
        if (dh->dh_hash == NULL) {
                DIRHASH_UNLOCK(dh);
                ufsdirhash_free(ip);
                return;
        }

        block = offset / dirblksiz;
        if ((offset & (dirblksiz - 1)) != 0 || block >= dh->dh_dirblks)
                panic("ufsdirhash_checkblock: bad offset");

        nfree = 0;
        for (i = 0; i < dirblksiz; i += dp->d_reclen) {
                dp = (struct direct *)(sbuf + i);
                if (dp->d_reclen == 0 || i + dp->d_reclen > dirblksiz)
                        panic("ufsdirhash_checkblock: bad dir");

                if (dp->d_ino == 0) {
#if 0
                        /*
                         * XXX entries with d_ino == 0 should only occur
                         * at the start of a DIRBLKSIZ block. However the
                         * ufs code is tolerant of such entries at other
                         * offsets, and fsck does not fix them.
                         */
                        if (i != 0)
                                panic("ufsdirhash_checkblock: bad dir inode");
#endif
                        nfree += dp->d_reclen;
                        continue;
                }

                /* Check that the entry        exists (will panic if it doesn't). */
                ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i);

                nfree += dp->d_reclen - UFS_DIRSIZ(0, dp, needswap);
        }
        if (i != dirblksiz)
                panic("ufsdirhash_checkblock: bad dir end");

        if (dh->dh_blkfree[block] * DIRALIGN != nfree)
                panic("ufsdirhash_checkblock: bad free count");

        ffslot = BLKFREE2IDX(nfree / DIRALIGN);
        for (i = 0; i <= DH_NFSTATS; i++)
                if (dh->dh_firstfree[i] == block && i != ffslot)
                        panic("ufsdirhash_checkblock: bad first-free");
        if (dh->dh_firstfree[ffslot] == -1)
                panic("ufsdirhash_checkblock: missing first-free entry");
        DIRHASH_UNLOCK(dh);
}

/*
 * Hash the specified filename into a dirhash slot.
 */
static int
ufsdirhash_hash(struct dirhash *dh, const char *name, int namelen)
{
        u_int32_t hash;

        /*
         * We hash the name and then some other bit of data that is
         * invariant over the dirhash's lifetime. Otherwise names
         * differing only in the last byte are placed close to one
         * another in the table, which is bad for linear probing.
         */
        hash = hash32_buf(name, namelen, HASH32_BUF_INIT);
        hash = hash32_buf(&dh, sizeof(dh), hash);
        return (hash % dh->dh_hlen);
}

/*
 * Adjust the number of free bytes in the block containing `offset'
 * by the value specified by `diff'.
 *
 * The caller must ensure we have exclusive access to `dh'; normally
 * that means that dh_lock should be held, but this is also called
 * from ufsdirhash_build() where exclusive access can be assumed.
 */
static void
ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff, int dirblksiz)
{
        int block, i, nfidx, ofidx;

        KASSERT(mutex_owned(&dh->dh_lock));

        /* Update the per-block summary info. */
        block = offset / dirblksiz;
        KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks);
        ofidx = BLKFREE2IDX(dh->dh_blkfree[block]);
        dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN);
        nfidx = BLKFREE2IDX(dh->dh_blkfree[block]);

        /* Update the `first free' list if necessary. */
        if (ofidx != nfidx) {
                /* If removing, scan forward for the next block. */
                if (dh->dh_firstfree[ofidx] == block) {
                        for (i = block + 1; i < dh->dh_dirblks; i++)
                                if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx)
                                        break;
                        dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1;
                }

                /* Make this the new `first free' if necessary */
                if (dh->dh_firstfree[nfidx] > block ||
                    dh->dh_firstfree[nfidx] == -1)
                        dh->dh_firstfree[nfidx] = block;
        }
}

/*
 * Find the specified name which should have the specified offset.
 * Returns a slot number, and panics on failure.
 *
 * `dh' must be locked on entry and remains so on return.
 */
static int
ufsdirhash_findslot(struct dirhash *dh, const char *name, int namelen,
    doff_t offset)
{
        int slot;

        KASSERT(mutex_owned(&dh->dh_lock));

        /* Find the entry. */
        KASSERT(dh->dh_hused < dh->dh_hlen);
        slot = ufsdirhash_hash(dh, name, namelen);
        while (DH_ENTRY(dh, slot) != offset &&
            DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
                slot = WRAPINCR(slot, dh->dh_hlen);
        if (DH_ENTRY(dh, slot) != offset)
                panic("ufsdirhash_findslot: '%.*s' not found", namelen, name);

        return (slot);
}

/*
 * Remove the entry corresponding to the specified slot from the hash array.
 *
 * `dh' must be locked on entry and remains so on return.
 */
static void
ufsdirhash_delslot(struct dirhash *dh, int slot)
{
        int i;

        KASSERT(mutex_owned(&dh->dh_lock));

        /* Mark the entry as deleted. */
        DH_ENTRY(dh, slot) = DIRHASH_DEL;

        /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */
        for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; )
                i = WRAPINCR(i, dh->dh_hlen);
        if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) {
                i = WRAPDECR(i, dh->dh_hlen);
                while (DH_ENTRY(dh, i) == DIRHASH_DEL) {
                        DH_ENTRY(dh, i) = DIRHASH_EMPTY;
                        dh->dh_hused--;
                        i = WRAPDECR(i, dh->dh_hlen);
                }
                KASSERT(dh->dh_hused >= 0);
        }
}

/*
 * Given a directory entry and its offset, find the offset of the
 * previous entry in the same UFS_DIRBLKSIZ-sized block. Returns an
 * offset, or -1 if there is no previous entry in the block or some
 * other problem occurred.
 */
static doff_t
ufsdirhash_getprev(struct direct *dirp, doff_t offset, int dirblksiz)
{
        struct direct *dp;
        char *blkbuf;
        doff_t blkoff, prevoff;
        int entrypos, i;

        blkoff = offset & ~(dirblksiz - 1);        /* offset of start of block */
        entrypos = offset & (dirblksiz - 1);        /* entry relative to block */
        blkbuf = (char *)dirp - entrypos;
        prevoff = blkoff;

        /* If `offset' is the start of a block, there is no previous entry. */
        if (entrypos == 0)
                return (-1);

        /* Scan from the start of the block until we get to the entry. */
        for (i = 0; i < entrypos; i += dp->d_reclen) {
                dp = (struct direct *)(blkbuf + i);
                if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos)
                        return (-1);        /* Corrupted directory. */
                prevoff = blkoff + i;
        }
        return (prevoff);
}

/*
 * Try to free up `wanted' bytes by stealing memory from existing
 * dirhashes. Returns zero with list locked if successful.
 */
static int
ufsdirhash_recycle(int wanted)
{
        struct dirhash *dh;
        doff_t **hash;
        u_int8_t *blkfree;
        int i, mem, narrays;
        size_t hashsz, blkfreesz;

        DIRHASHLIST_LOCK();
        while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) {
                /* Find a dirhash, and lock it. */
                if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) {
                        DIRHASHLIST_UNLOCK();
                        return (-1);
                }
                DIRHASH_LOCK(dh);
                KASSERT(dh->dh_hash != NULL);

                /* Decrement the score; only recycle if it becomes zero. */
                if (--dh->dh_score > 0) {
                        DIRHASH_UNLOCK(dh);
                        DIRHASHLIST_UNLOCK();
                        return (-1);
                }

                /* Remove it from the list and detach its memory. */
                TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
                dh->dh_onlist = 0;
                hash = dh->dh_hash;
                hashsz = dh->dh_hashsz;
                dh->dh_hash = NULL;
                blkfree = dh->dh_blkfree;
                blkfreesz = dh->dh_blkfreesz;
                dh->dh_blkfree = NULL;
                narrays = dh->dh_narrays;
                mem = narrays * sizeof(*dh->dh_hash) +
                    narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
                    dh->dh_nblk * sizeof(*dh->dh_blkfree);

                /* Unlock everything, free the detached memory. */
                DIRHASH_UNLOCK(dh);
                DIRHASHLIST_UNLOCK();

                for (i = 0; i < narrays; i++)
                        DIRHASH_BLKFREE(hash[i]);
                kmem_free(hash, hashsz);
                kmem_free(blkfree, blkfreesz);

                /* Account for the returned memory, and repeat if necessary. */
                DIRHASHLIST_LOCK();
                atomic_add_int(&ufs_dirhashmem, -mem);
        }
        /* Success. */
        return (0);
}

SYSCTL_SETUP(ufsdirhash_sysctl_init, "ufs_dirhash sysctl")
{
        const struct sysctlnode *rnode, *cnode;

        sysctl_createv(clog, 0, NULL, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "ufs",
                       SYSCTL_DESCR("ufs"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "dirhash",
                       SYSCTL_DESCR("dirhash"),
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, &cnode,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "minblocks",
                       SYSCTL_DESCR("minimum hashed directory size in blocks"),
                       NULL, 0, &ufs_dirhashminblks, 0,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, &cnode,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxmem",
                       SYSCTL_DESCR("maximum dirhash memory usage"),
                       NULL, 0, &ufs_dirhashmaxmem, 0,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, &cnode,
                       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
                       CTLTYPE_INT, "memused",
                       SYSCTL_DESCR("current dirhash memory usage"),
                       NULL, 0, &ufs_dirhashmem, 0,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, &cnode,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "docheck",
                       SYSCTL_DESCR("enable extra sanity checks"),
                       NULL, 0, &ufs_dirhashcheck, 0,
                       CTL_CREATE, CTL_EOL);
}

void
ufsdirhash_init(void)
{

        /*
         * Only initialise defaults for the dirhash size if it hasn't
         * hasn't been set.
         */
        if (ufs_dirhashmaxmem == 0) {
                /* Use 64-bit math to avoid overflows. */
                uint64_t physmem_bytes, hash_bytes;

                physmem_bytes = ctob((uint64_t)physmem);
                hash_bytes = physmem_bytes / DIRHASH_DEFAULT_DIVIDER;

                if (hash_bytes < MIN_DEFAULT_DIRHASH_MEM)
                        hash_bytes = 0;

                if (hash_bytes > MAX_DEFAULT_DIRHASH_MEM)
                        hash_bytes = MAX_DEFAULT_DIRHASH_MEM;

                ufs_dirhashmaxmem = (u_int)hash_bytes;
        }

        mutex_init(&ufsdirhash_lock, MUTEX_DEFAULT, IPL_NONE);
        ufsdirhashblk_cache = pool_cache_init(DH_NBLKOFF * sizeof(daddr_t), 0,
            0, 0, "dirhashblk", NULL, IPL_NONE, NULL, NULL, NULL);
        ufsdirhash_cache = pool_cache_init(sizeof(struct dirhash), 0,
            0, 0, "dirhash", NULL, IPL_NONE, NULL, NULL, NULL);
        TAILQ_INIT(&ufsdirhash_list);
}

void
ufsdirhash_done(void)
{

        KASSERT(TAILQ_EMPTY(&ufsdirhash_list));
        pool_cache_destroy(ufsdirhashblk_cache);
        pool_cache_destroy(ufsdirhash_cache);
        mutex_destroy(&ufsdirhash_lock);
}





























































  436 





















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
/*        $NetBSD: popcount32.c,v 1.5 2015/05/29 19:39:41 matt Exp $        */
/*-
 * Copyright (c) 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Joerg Sonnenberger.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__RCSID("$NetBSD: popcount32.c,v 1.5 2015/05/29 19:39:41 matt Exp $");

#if !defined(_KERNEL) && !defined(_STANDALONE)
#include <limits.h>
#include <stdint.h>
#include <strings.h>
#else
#include <lib/libkern/libkern.h>
#include <machine/limits.h>
#endif

#ifndef popcount32        // might be a builtin

/*
 * This a hybrid algorithm for bit counting between parallel counting and
 * using multiplication.  The idea is to sum up the bits in each Byte, so
 * that the final accumulation can be done with a single multiplication.
 * If the platform has a slow multiplication instruction, it can be replaced
 * by the commented out version below.
 */

unsigned int
popcount32(uint32_t v)
{
        unsigned int c;

        v = v - ((v >> 1) & 0x55555555U);
        v = (v & 0x33333333U) + ((v >> 2) & 0x33333333U);
        v = (v + (v >> 4)) & 0x0f0f0f0fU;
        c = (v * 0x01010101U) >> 24;
        /*
         * v = (v >> 16) + v;
         * v = (v >> 8) + v;
         * c = v & 255;
         */

        return c;
}

#if UINT_MAX == 0xffffffffU
__strong_alias(popcount, popcount32)
#endif

#if ULONG_MAX == 0xffffffffU
__strong_alias(popcountl, popcount32)
#endif

#endif        /* !popcount32 */




































































































































































































































































































    7 












































































































































































































    3 














    2 
    1 




    2 
    3 




    2 


    2 
    2 
    2 
    1 


    2 
































    2 










    6 












    2 












    7 


    7 


    7 
    7 




























   11 




















   11 

   11 







   11 












































   11 













    2 











    1 















    1 













    1 



    1 

































































    2 















    3 













    3 










    3 
    3 


    3 
    3 













    4 

    3 









    4 
    3 










    3 


    3 
    3 






    3 
    3 
    3 















    3 
    3 



    3 



    3 




    3 
    3 













































































    3 






    3 


    3 











   21 










    3 






    3 


















    8 

    1 
    1 







    1 
    1 




    2 
    2 

    1 
    1 

    3 



































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
/*        $NetBSD: kernfs_vnops.c,v 1.174 2022/03/27 17:10:56 christos Exp $        */

/*
 * Copyright (c) 1992, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software donated to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kernfs_vnops.c        8.15 (Berkeley) 5/21/95
 */

/*
 * Kernel parameter filesystem (/kern)
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kernfs_vnops.c,v 1.174 2022/03/27 17:10:56 christos Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/vmmeter.h>
#include <sys/time.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/malloc.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/buf.h>
#include <sys/dirent.h>
#include <sys/msgbuf.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/kernfs/kernfs.h>
#include <miscfs/specfs/specdev.h>

#include <uvm/uvm_extern.h>

#define KSTRING        256                /* Largest I/O available via this filesystem */
#define        UIO_MX 32

#define        READ_MODE        (S_IRUSR|S_IRGRP|S_IROTH)
#define        WRITE_MODE        (S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH)
#define        UREAD_MODE        (S_IRUSR)
#define        DIR_MODE        (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
#define        UDIR_MODE        (S_IRUSR|S_IXUSR)

#define N(s) sizeof(s)-1, s
const struct kern_target kern_targets[] = {
/* NOTE: The name must be less than UIO_MX-16 chars in length */
     /*        name            data          tag           type  ro/rw */
     { DT_DIR, N("."),         0,            KFSkern,        VDIR, DIR_MODE   },
     { DT_DIR, N(".."),        0,            KFSroot,        VDIR, DIR_MODE   },
     { DT_REG, N("boottime"),  0,            KFSboottime,    VREG, READ_MODE  },
                        /* XXXUNCONST */
     { DT_REG, N("copyright"), __UNCONST(copyright),
                                                  KFSstring,      VREG, READ_MODE  },
     { DT_REG, N("hostname"),  0,            KFShostname,    VREG, WRITE_MODE },
     { DT_REG, N("hz"),        &hz,          KFSint,         VREG, READ_MODE  },
     { DT_REG, N("loadavg"),   0,            KFSavenrun,     VREG, READ_MODE  },
     { DT_REG, N("msgbuf"),    0,             KFSmsgbuf,      VREG, READ_MODE  },
     { DT_REG, N("pagesize"),  &uvmexp.pagesize, KFSint,     VREG, READ_MODE  },
     { DT_REG, N("physmem"),   &physmem,     KFSint,         VREG, READ_MODE  },
#if 0
     { DT_DIR, N("root"),      0,            KFSnull,        VDIR, DIR_MODE   },
#endif
     { DT_BLK, N("rootdev"),   &rootdev,     KFSdevice,      VBLK, UREAD_MODE  },
     { DT_CHR, N("rrootdev"),  &rrootdev,    KFSdevice,      VCHR, UREAD_MODE  },
     { DT_REG, N("time"),      0,            KFStime,        VREG, READ_MODE  },
                        /* XXXUNCONST */
     { DT_REG, N("version"),   __UNCONST(version),
                                                  KFSstring,      VREG, READ_MODE  },
};
const struct kern_target subdir_targets[] = {
/* NOTE: The name must be less than UIO_MX-16 chars in length */
     /*        name            data          tag           type  ro/rw */
     { DT_DIR, N("."),         0,            KFSsubdir,      VDIR, DIR_MODE   },
     { DT_DIR, N(".."),        0,            KFSkern,        VDIR, DIR_MODE   },
};
#undef N
SIMPLEQ_HEAD(,dyn_kern_target) dyn_kern_targets =
        SIMPLEQ_HEAD_INITIALIZER(dyn_kern_targets);
int nkern_targets = sizeof(kern_targets) / sizeof(kern_targets[0]);
const int static_nkern_targets = sizeof(kern_targets) / sizeof(kern_targets[0]);
int nkern_dirs = 2;

int kernfs_try_fileop(kfstype, kfsfileop, void *, int);
int kernfs_try_xread(kfstype, const struct kernfs_node *, char **,
    size_t, int);
int kernfs_try_xwrite(kfstype, const struct kernfs_node *, char *,
    size_t, int);

static int kernfs_default_xread(void *v);
static int kernfs_default_xwrite(void *v);
static int kernfs_default_fileop_getattr(void *);

/* must include all fileop's */
const struct kernfs_fileop kernfs_default_fileops[] = {
  { .kf_fileop = KERNFS_XREAD },
  { .kf_fileop = KERNFS_XWRITE },
  { .kf_fileop = KERNFS_FILEOP_OPEN },
  { .kf_fileop = KERNFS_FILEOP_GETATTR,
    .kf_vop = kernfs_default_fileop_getattr },
  { .kf_fileop = KERNFS_FILEOP_IOCTL },
  { .kf_fileop = KERNFS_FILEOP_CLOSE },
  { .kf_fileop = KERNFS_FILEOP_READ, 
    .kf_vop = kernfs_default_xread },
  { .kf_fileop = KERNFS_FILEOP_WRITE, 
    .kf_vop = kernfs_default_xwrite },
};

int        kernfs_lookup(void *);
int        kernfs_open(void *);
int        kernfs_close(void *);
int        kernfs_access(void *);
int        kernfs_getattr(void *);
int        kernfs_setattr(void *);
int        kernfs_read(void *);
int        kernfs_write(void *);
int        kernfs_ioctl(void *);
int        kernfs_readdir(void *);
int        kernfs_inactive(void *);
int        kernfs_reclaim(void *);
int        kernfs_print(void *);
int        kernfs_pathconf(void *);
int        kernfs_getpages(void *);

static int        kernfs_xread(struct kernfs_node *, int, char **,
                                size_t, size_t *);
static int        kernfs_xwrite(const struct kernfs_node *, char *, size_t);

int (**kernfs_vnodeop_p)(void *);
const struct vnodeopv_entry_desc kernfs_vnodeop_entries[] = {
        { &vop_default_desc, vn_default_error },
        { &vop_parsepath_desc, genfs_parsepath },        /* parsepath */
        { &vop_lookup_desc, kernfs_lookup },                /* lookup */
        { &vop_create_desc, genfs_eopnotsupp },                /* create */
        { &vop_mknod_desc, genfs_eopnotsupp },                /* mknod */
        { &vop_open_desc, kernfs_open },                /* open */
        { &vop_close_desc, kernfs_close },                /* close */
        { &vop_access_desc, kernfs_access },                /* access */
        { &vop_accessx_desc, genfs_accessx },                /* accessx */
        { &vop_getattr_desc, kernfs_getattr },                /* getattr */
        { &vop_setattr_desc, kernfs_setattr },                /* setattr */
        { &vop_read_desc, kernfs_read },                /* read */
        { &vop_write_desc, kernfs_write },                /* write */
        { &vop_fallocate_desc, genfs_eopnotsupp },        /* fallocate */
        { &vop_fdiscard_desc, genfs_eopnotsupp },        /* fdiscard */
        { &vop_fcntl_desc, genfs_fcntl },                /* fcntl */
        { &vop_ioctl_desc, kernfs_ioctl },                /* ioctl */
        { &vop_poll_desc, genfs_poll },                        /* poll */
        { &vop_kqfilter_desc, genfs_kqfilter },                /* kqfilter */
        { &vop_revoke_desc, genfs_revoke },                /* revoke */
        { &vop_fsync_desc, genfs_nullop },                /* fsync */
        { &vop_seek_desc, genfs_nullop },                /* seek */
        { &vop_remove_desc, genfs_eopnotsupp },                /* remove */
        { &vop_link_desc, genfs_erofs_link },                /* link */
        { &vop_rename_desc, genfs_eopnotsupp },                /* rename */
        { &vop_mkdir_desc, genfs_eopnotsupp },                /* mkdir */
        { &vop_rmdir_desc, genfs_eopnotsupp },                /* rmdir */
        { &vop_symlink_desc, genfs_erofs_symlink },        /* symlink */
        { &vop_readdir_desc, kernfs_readdir },                /* readdir */
        { &vop_readlink_desc, genfs_eopnotsupp },        /* readlink */
        { &vop_abortop_desc, genfs_abortop },                /* abortop */
        { &vop_inactive_desc, kernfs_inactive },        /* inactive */
        { &vop_reclaim_desc, kernfs_reclaim },                /* reclaim */
        { &vop_lock_desc, genfs_lock },                        /* lock */
        { &vop_unlock_desc, genfs_unlock },                /* unlock */
        { &vop_bmap_desc, genfs_eopnotsupp },                /* bmap */
        { &vop_strategy_desc, genfs_eopnotsupp },        /* strategy */
        { &vop_print_desc, kernfs_print },                /* print */
        { &vop_islocked_desc, genfs_islocked },                /* islocked */
        { &vop_pathconf_desc, kernfs_pathconf },        /* pathconf */
        { &vop_advlock_desc, genfs_einval },                /* advlock */
        { &vop_bwrite_desc, genfs_eopnotsupp },                /* bwrite */
        { &vop_getpages_desc, kernfs_getpages },        /* getpages */
        { &vop_putpages_desc, genfs_putpages },                /* putpages */
        { NULL, NULL }
};
const struct vnodeopv_desc kernfs_vnodeop_opv_desc =
        { &kernfs_vnodeop_p, kernfs_vnodeop_entries };

int (**kernfs_specop_p)(void *);
const struct vnodeopv_entry_desc kernfs_specop_entries[] = {
        { &vop_default_desc, vn_default_error },
        GENFS_SPECOP_ENTRIES,
        { &vop_close_desc, spec_close },                /* close */
        { &vop_access_desc, kernfs_access },                /* access */
        { &vop_accessx_desc, genfs_accessx },                /* accessx */
        { &vop_getattr_desc, kernfs_getattr },                /* getattr */
        { &vop_setattr_desc, kernfs_setattr },                /* setattr */
        { &vop_read_desc, spec_read },                        /* read */
        { &vop_write_desc, spec_write },                /* write */
        { &vop_fcntl_desc, genfs_fcntl },                /* fcntl */
        { &vop_fsync_desc, spec_fsync },                /* fsync */
        { &vop_inactive_desc, kernfs_inactive },        /* inactive */
        { &vop_reclaim_desc, kernfs_reclaim },                /* reclaim */
        { &vop_lock_desc, genfs_lock },                        /* lock */
        { &vop_unlock_desc, genfs_unlock },                /* unlock */
        { &vop_print_desc, kernfs_print },                /* print */
        { &vop_islocked_desc, genfs_islocked },                /* islocked */
        { &vop_bwrite_desc, vn_bwrite },                /* bwrite */
        { NULL, NULL }
};
const struct vnodeopv_desc kernfs_specop_opv_desc =
        { &kernfs_specop_p, kernfs_specop_entries };

static inline int
kernfs_fileop_compare(struct kernfs_fileop *a, struct kernfs_fileop *b)
{
        if (a->kf_type < b->kf_type)
                return -1;
        if (a->kf_type > b->kf_type)
                return 1;
        if (a->kf_fileop < b->kf_fileop)
                return -1;
        if (a->kf_fileop > b->kf_fileop)
                return 1;
        return (0);
}

SPLAY_HEAD(kfsfileoptree, kernfs_fileop) kfsfileoptree =
        SPLAY_INITIALIZER(kfsfileoptree);
SPLAY_PROTOTYPE(kfsfileoptree, kernfs_fileop, kf_node, kernfs_fileop_compare);
SPLAY_GENERATE(kfsfileoptree, kernfs_fileop, kf_node, kernfs_fileop_compare);

kfstype
kernfs_alloctype(int nkf, const struct kernfs_fileop *kf)
{
        static u_char nextfreetype = KFSlasttype;
        struct kernfs_fileop *dkf, *fkf, skf;
        int i;

        /* XXX need to keep track of dkf's memory if we support
           deallocating types */
        dkf = malloc(sizeof(kernfs_default_fileops), M_TEMP, M_WAITOK);
        memcpy(dkf, kernfs_default_fileops, sizeof(kernfs_default_fileops));

        for (i = 0; i < sizeof(kernfs_default_fileops) /
                     sizeof(kernfs_default_fileops[0]); i++) {
                dkf[i].kf_type = nextfreetype;
                SPLAY_INSERT(kfsfileoptree, &kfsfileoptree, &dkf[i]);
        }

        for (i = 0; i < nkf; i++) {
                skf.kf_type = nextfreetype;
                skf.kf_fileop = kf[i].kf_fileop;
                if ((fkf = SPLAY_FIND(kfsfileoptree, &kfsfileoptree, &skf)))
                        fkf->kf_vop = kf[i].kf_vop;
        }

        return nextfreetype++;
}

int
kernfs_try_fileop(kfstype type, kfsfileop fileop, void *v, int error)
{
        struct kernfs_fileop *kf, skf;

        skf.kf_type = type;
        skf.kf_fileop = fileop;
        if ((kf = SPLAY_FIND(kfsfileoptree, &kfsfileoptree, &skf)))
                if (kf->kf_vop)
                        return kf->kf_vop(v);
        return error;
}

int
kernfs_try_xread(kfstype type, const struct kernfs_node *kfs, char **bfp,
    size_t len, int error)
{
        struct kernfs_fileop *kf, skf;

        skf.kf_type = type;
        skf.kf_fileop = KERNFS_XREAD;
        if ((kf = SPLAY_FIND(kfsfileoptree, &kfsfileoptree, &skf)))
                if (kf->kf_xread)
                        return kf->kf_xread(kfs, bfp, len);
        return error;
}

int
kernfs_try_xwrite(kfstype type, const struct kernfs_node *kfs, char *bf,
    size_t len, int error)
{
        struct kernfs_fileop *kf, skf;

        skf.kf_type = type;
        skf.kf_fileop = KERNFS_XWRITE;
        if ((kf = SPLAY_FIND(kfsfileoptree, &kfsfileoptree, &skf)))
                if (kf->kf_xwrite)
                        return kf->kf_xwrite(kfs, bf, len);
        return error;
}

int
kernfs_addentry(kernfs_parentdir_t *pkt, kernfs_entry_t *dkt)
{
        struct kernfs_subdir *ks, *parent;

        if (pkt == NULL) {
                SIMPLEQ_INSERT_TAIL(&dyn_kern_targets, dkt, dkt_queue);
                nkern_targets++;
                if (dkt->dkt_kt.kt_vtype == VDIR)
                        nkern_dirs++;
        } else {
                parent = (struct kernfs_subdir *)pkt->kt_data;
                SIMPLEQ_INSERT_TAIL(&parent->ks_entries, dkt, dkt_queue);
                parent->ks_nentries++;
                if (dkt->dkt_kt.kt_vtype == VDIR)
                        parent->ks_dirs++;
        }
        if (dkt->dkt_kt.kt_vtype == VDIR && dkt->dkt_kt.kt_data == NULL) {
                ks = malloc(sizeof(struct kernfs_subdir),
                    M_TEMP, M_WAITOK);
                SIMPLEQ_INIT(&ks->ks_entries);
                ks->ks_nentries = 2; /* . and .. */
                ks->ks_dirs = 2;
                ks->ks_parent = pkt ? pkt : &kern_targets[0];
                dkt->dkt_kt.kt_data = ks;
        }
        return 0;
}

static int
kernfs_xread(struct kernfs_node *kfs, int off, char **bufp, size_t len, size_t *wrlen)
{
        const struct kern_target *kt;
        int err;

        kt = kfs->kfs_kt;

        switch (kfs->kfs_type) {
        case KFStime: {
                struct timeval tv;

                microtime(&tv);
                snprintf(*bufp, len, "%lld %ld\n", (long long)tv.tv_sec,
                    (long)tv.tv_usec);
                break;
        }

        case KFSboottime: {
                struct timeval tv;

                /*
                 * Historically, /kern/boottime only contained seconds.
                 */
                getmicroboottime(&tv);
                snprintf(*bufp, len, "%lld\n", (long long)tv.tv_sec);
                break;
        }

        case KFSint: {
                int *ip = kt->kt_data;

                snprintf(*bufp, len, "%d\n", *ip);
                break;
        }

        case KFSstring: {
                char *cp = kt->kt_data;

                *bufp = cp;
                break;
        }

        case KFSmsgbuf: {
                long n;

                /*
                 * deal with cases where the message buffer has
                 * become corrupted.
                 */
                if (!logenabled(msgbufp)) {
                        msgbufenabled = 0;
                        return (ENXIO);
                }

                /*
                 * Note that reads of /kern/msgbuf won't necessarily yield
                 * consistent results, if the message buffer is modified
                 * while the read is in progress.  The worst that can happen
                 * is that incorrect data will be read.  There's no way
                 * that this can crash the system unless the values in the
                 * message buffer header are corrupted, but that'll cause
                 * the system to die anyway.
                 */
                if (off >= msgbufp->msg_bufs) {
                        *wrlen = 0;
                        return (0);
                }
                n = msgbufp->msg_bufx + off;
                if (n >= msgbufp->msg_bufs)
                        n -= msgbufp->msg_bufs;
                len = uimin(msgbufp->msg_bufs - n, msgbufp->msg_bufs - off);
                *bufp = msgbufp->msg_bufc + n;
                *wrlen = len;
                return (0);
        }

        case KFShostname: {
                char *cp = hostname;
                size_t xlen = hostnamelen;

                if (xlen >= (len - 2))
                        return (EINVAL);

                memcpy(*bufp, cp, xlen);
                (*bufp)[xlen] = '\n';
                (*bufp)[xlen+1] = '\0';
                break;
        }

        case KFSavenrun:
                averunnable.fscale = FSCALE;
                snprintf(*bufp, len, "%d %d %d %ld\n",
                    averunnable.ldavg[0], averunnable.ldavg[1],
                    averunnable.ldavg[2], averunnable.fscale);
                break;

        default:
                err = kernfs_try_xread(kfs->kfs_type, kfs, bufp, len,
                    EOPNOTSUPP);
                if (err)
                        return err;
        }

        len = strlen(*bufp);
        if (len <= off)
                *wrlen = 0;
        else {
                *bufp += off;
                *wrlen = len - off;
        }
        return (0);
}

static int
kernfs_xwrite(const struct kernfs_node *kfs, char *bf, size_t len)
{

        switch (kfs->kfs_type) {
        case KFShostname:
                if (bf[len-1] == '\n')
                        --len;
                memcpy(hostname, bf, len);
                hostname[len] = '\0';
                hostnamelen = (size_t) len;
                return (0);

        default:
                return kernfs_try_xwrite(kfs->kfs_type, kfs, bf, len, EIO);
        }
}


/*
 * vp is the current namei directory
 * ndp is the name to locate in that directory...
 */
int
kernfs_lookup(void *v)
{
        struct vop_lookup_v2_args /* {
                struct vnode * a_dvp;
                struct vnode ** a_vpp;
                struct componentname * a_cnp;
        } */ *ap = v;
        struct componentname *cnp = ap->a_cnp;
        struct vnode **vpp = ap->a_vpp;
        struct vnode *dvp = ap->a_dvp;
        const char *pname = cnp->cn_nameptr;
        const struct kernfs_node *kfs;
        const struct kern_target *kt;
        const struct dyn_kern_target *dkt;
        const struct kernfs_subdir *ks;
        int error, i;

        *vpp = NULLVP;

        if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
                return (EROFS);

        if (cnp->cn_namelen == 1 && *pname == '.') {
                *vpp = dvp;
                vref(dvp);
                return (0);
        }

        kfs = VTOKERN(dvp);
        switch (kfs->kfs_type) {
        case KFSkern:
                /*
                 * Shouldn't get here with .. in the root node.
                 */
                if (cnp->cn_flags & ISDOTDOT)
                        return (EIO);

                for (i = 0; i < static_nkern_targets; i++) {
                        kt = &kern_targets[i];
                        if (cnp->cn_namelen == kt->kt_namlen &&
                            memcmp(kt->kt_name, pname, cnp->cn_namelen) == 0)
                                goto found;
                }
                SIMPLEQ_FOREACH(dkt, &dyn_kern_targets, dkt_queue) {
                        if (cnp->cn_namelen == dkt->dkt_kt.kt_namlen &&
                            memcmp(dkt->dkt_kt.kt_name, pname, cnp->cn_namelen) == 0) {
                                kt = &dkt->dkt_kt;
                                goto found;
                        }
                }
                break;

        found:
                error = vcache_get(dvp->v_mount, &kt, sizeof(kt), vpp);
                return error;

        case KFSsubdir:
                ks = (struct kernfs_subdir *)kfs->kfs_kt->kt_data;
                if (cnp->cn_flags & ISDOTDOT) {
                        kt = ks->ks_parent;
                        goto found;
                }

                SIMPLEQ_FOREACH(dkt, &ks->ks_entries, dkt_queue) {
                        if (cnp->cn_namelen == dkt->dkt_kt.kt_namlen &&
                            memcmp(dkt->dkt_kt.kt_name, pname, cnp->cn_namelen) == 0) {
                                kt = &dkt->dkt_kt;
                                goto found;
                        }
                }
                break;

        default:
                return (ENOTDIR);
        }

        return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
}

int
kernfs_open(void *v)
{
        struct vop_open_args /* {
                struct vnode *a_vp;
                int a_mode;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct kernfs_node *kfs = VTOKERN(ap->a_vp);

        return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_OPEN, v, 0);
}

int
kernfs_close(void *v)
{
        struct vop_close_args /* {
                struct vnode *a_vp;
                int a_fflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct kernfs_node *kfs = VTOKERN(ap->a_vp);

        return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_CLOSE, v, 0);
}

int
kernfs_access(void *v)
{
        struct vop_access_args /* {
                struct vnode *a_vp;
                accmode_t a_accmode;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vattr va;
        int error;

        if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred)) != 0)
                return (error);

        return kauth_authorize_vnode(ap->a_cred,
            KAUTH_ACCESS_ACTION(ap->a_accmode, ap->a_vp->v_type, va.va_mode),
            ap->a_vp, NULL, genfs_can_access(ap->a_vp, ap->a_cred,
            va.va_uid, va.va_gid, va.va_mode, NULL, ap->a_accmode));
}

static int
kernfs_default_fileop_getattr(void *v)
{
        struct vop_getattr_args /* {
                struct vnode *a_vp;
                struct vattr *a_vap;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vattr *vap = ap->a_vap;

        vap->va_nlink = 1;
        vap->va_bytes = vap->va_size = 0;

        return 0;
}

int
kernfs_getattr(void *v)
{
        struct vop_getattr_args /* {
                struct vnode *a_vp;
                struct vattr *a_vap;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct kernfs_node *kfs = VTOKERN(ap->a_vp);
        struct kernfs_subdir *ks;
        struct vattr *vap = ap->a_vap;
        int error = 0;
        char strbuf[KSTRING], *bf;
        size_t nread, total;

        vattr_null(vap);
        vap->va_type = ap->a_vp->v_type;
        vap->va_uid = 0;
        vap->va_gid = 0;
        vap->va_mode = kfs->kfs_mode;
        vap->va_fileid = kfs->kfs_fileno;
        vap->va_flags = 0;
        vap->va_size = 0;
        vap->va_blocksize = DEV_BSIZE;
        /* Make all times be current TOD, except for the "boottime" node. */
        if (kfs->kfs_kt->kt_namlen == 8 &&
            !memcmp(kfs->kfs_kt->kt_name, "boottime", 8)) {
                getnanoboottime(&vap->va_ctime);
        } else {
                getnanotime(&vap->va_ctime);
        }
        vap->va_atime = vap->va_mtime = vap->va_ctime;
        vap->va_gen = 0;
        vap->va_flags = 0;
        vap->va_rdev = 0;
        vap->va_bytes = 0;

        switch (kfs->kfs_type) {
        case KFSkern:
                vap->va_nlink = nkern_dirs;
                vap->va_bytes = vap->va_size = DEV_BSIZE;
                break;

        case KFSdevice:
                vap->va_nlink = 1;
                vap->va_rdev = ap->a_vp->v_rdev;
                break;

        case KFSroot:
                vap->va_nlink = 1;
                vap->va_bytes = vap->va_size = DEV_BSIZE;
                break;

        case KFSsubdir:
                ks = (struct kernfs_subdir *)kfs->kfs_kt->kt_data;
                vap->va_nlink = ks->ks_dirs;
                vap->va_bytes = vap->va_size = DEV_BSIZE;
                break;

        case KFSnull:
        case KFStime:
        case KFSboottime:
        case KFSint:
        case KFSstring:
        case KFShostname:
        case KFSavenrun:
        case KFSmsgbuf:
                vap->va_nlink = 1;
                total = 0;
                do {
                        bf = strbuf;
                        error = kernfs_xread(kfs, total, &bf,
                            sizeof(strbuf), &nread);
                        total += nread;
                } while (error == 0 && nread != 0);
                vap->va_bytes = vap->va_size = total;
                break;

        default:
                error = kernfs_try_fileop(kfs->kfs_type,
                    KERNFS_FILEOP_GETATTR, v, EINVAL);
                break;
        }

        return (error);
}

/*ARGSUSED*/
int
kernfs_setattr(void *v)
{

        /*
         * Silently ignore attribute changes.
         * This allows for open with truncate to have no
         * effect until some data is written.  I want to
         * do it this way because all writes are atomic.
         */
        return (0);
}

int
kernfs_default_xread(void *v)
{
        struct vop_read_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                int  a_ioflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct uio *uio = ap->a_uio;
        struct kernfs_node *kfs = VTOKERN(ap->a_vp);
        char strbuf[KSTRING], *bf;
        int off;
        size_t len;
        int error;

        if (ap->a_vp->v_type == VDIR)
                return EISDIR;

        off = (int)uio->uio_offset;
        /* Don't allow negative offsets */
        if (off < 0)
                return EINVAL;

        bf = strbuf;
        if ((error = kernfs_xread(kfs, off, &bf, sizeof(strbuf), &len)) == 0)
                error = uiomove(bf, len, uio);
        return (error);
}

int
kernfs_read(void *v)
{
        struct vop_read_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                int  a_ioflag;
                struct ucred *a_cred;
        } */ *ap = v;
        struct kernfs_node *kfs = VTOKERN(ap->a_vp);

        if (kfs->kfs_type < KFSlasttype) {
                /* use default function */
                return kernfs_default_xread(v);
        }
        return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_READ, v,
           EOPNOTSUPP);
}

static int
kernfs_default_xwrite(void *v)
{
        struct vop_write_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                int  a_ioflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct kernfs_node *kfs = VTOKERN(ap->a_vp);
        struct uio *uio = ap->a_uio;
        int error;
        size_t xlen;
        char strbuf[KSTRING];

        if (uio->uio_offset != 0)
                return (EINVAL);

        xlen = uimin(uio->uio_resid, KSTRING-1);
        if ((error = uiomove(strbuf, xlen, uio)) != 0)
                return (error);

        if (uio->uio_resid != 0)
                return (EIO);

        strbuf[xlen] = '\0';
        xlen = strlen(strbuf);
        return (kernfs_xwrite(kfs, strbuf, xlen));
}

int
kernfs_write(void *v)
{
        struct vop_write_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                int  a_ioflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct kernfs_node *kfs = VTOKERN(ap->a_vp);

        if (kfs->kfs_type < KFSlasttype) {
                /* use default function */
                return kernfs_default_xwrite(v);
        }
        return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_WRITE, v,
            EOPNOTSUPP);
}

int
kernfs_ioctl(void *v)
{
        struct vop_ioctl_args /* {
                const struct vnodeop_desc *a_desc;
                struct vnode *a_vp;
                u_long a_command;
                void *a_data;
                int a_fflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct kernfs_node *kfs = VTOKERN(ap->a_vp);

        return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_IOCTL, v,
            EPASSTHROUGH);
}

static int
kernfs_setdirentfileno_kt(struct dirent *d, const struct kern_target *kt,
    struct vop_readdir_args *ap)
{
        struct kernfs_node *kfs;
        struct vnode *vp;
        int error;

        if ((error = vcache_get(ap->a_vp->v_mount, &kt, sizeof(kt), &vp)) != 0)
                return error;
        kfs = VTOKERN(vp);
        d->d_fileno = kfs->kfs_fileno;
        vrele(vp);
        return 0;
}

static int
kernfs_setdirentfileno(struct dirent *d, off_t entry,
    struct kernfs_node *thisdir_kfs, const struct kern_target *parent_kt,
    const struct kern_target *kt, struct vop_readdir_args *ap)
{
        const struct kern_target *ikt;
        int error;

        switch (entry) {
        case 0:
                d->d_fileno = thisdir_kfs->kfs_fileno;
                return 0;
        case 1:
                ikt = parent_kt;
                break;
        default:
                ikt = kt;
                break;
        }
        if (ikt != thisdir_kfs->kfs_kt) {
                if ((error = kernfs_setdirentfileno_kt(d, ikt, ap)) != 0)
                        return error;
        } else
                d->d_fileno = thisdir_kfs->kfs_fileno;
        return 0;
}

int
kernfs_readdir(void *v)
{
        struct vop_readdir_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                kauth_cred_t a_cred;
                int *a_eofflag;
                off_t **a_cookies;
                int a_*ncookies;
        } */ *ap = v;
        struct uio *uio = ap->a_uio;
        struct dirent d;
        struct kernfs_node *kfs = VTOKERN(ap->a_vp);
        const struct kern_target *kt;
        const struct dyn_kern_target *dkt = NULL;
        const struct kernfs_subdir *ks;
        off_t i, j;
        int error;
        off_t *cookies = NULL;
        int ncookies = 0, n;

        if (uio->uio_resid < UIO_MX)
                return (EINVAL);
        if (uio->uio_offset < 0)
                return (EINVAL);

        error = 0;
        i = uio->uio_offset;
        memset(&d, 0, sizeof(d));
        d.d_reclen = UIO_MX;
        ncookies = uio->uio_resid / UIO_MX;

        switch (kfs->kfs_type) {
        case KFSkern:
                if (i >= nkern_targets)
                        return (0);

                if (ap->a_ncookies) {
                        ncookies = uimin(ncookies, (nkern_targets - i));
                        cookies = malloc(ncookies * sizeof(off_t), M_TEMP,
                            M_WAITOK);
                        *ap->a_cookies = cookies;
                }

                n = 0;
                for (; i < nkern_targets && uio->uio_resid >= UIO_MX; i++) {
                        if (i < static_nkern_targets)
                                kt = &kern_targets[i];
                        else {
                                if (dkt == NULL) {
                                        dkt = SIMPLEQ_FIRST(&dyn_kern_targets);
                                        for (j = static_nkern_targets; j < i &&
                                                     dkt != NULL; j++)
                                                dkt = SIMPLEQ_NEXT(dkt, dkt_queue);
                                        if (j != i)
                                                break;
                                } else {
                                        dkt = SIMPLEQ_NEXT(dkt, dkt_queue);
                                }
                                if (dkt == NULL)
                                        break;
                                kt = &dkt->dkt_kt;
                        }
                        if (kt->kt_tag == KFSmsgbuf) {
                                if (!logenabled(msgbufp)) {
                                        continue;
                                }
                        }
                        d.d_namlen = kt->kt_namlen;
                        if ((error = kernfs_setdirentfileno(&d, i, kfs,
                            &kern_targets[0], kt, ap)) != 0)
                                break;
                        memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1);
                        d.d_type = kt->kt_type;
                        if ((error = uiomove(&d, UIO_MX, uio)) != 0)
                                break;
                        if (cookies)
                                *cookies++ = i + 1;
                        n++;
                }
                ncookies = n;
                break;

        case KFSroot:
                if (i >= 2)
                        return 0;

                if (ap->a_ncookies) {
                        ncookies = uimin(ncookies, (2 - i));
                        cookies = malloc(ncookies * sizeof(off_t), M_TEMP,
                            M_WAITOK);
                        *ap->a_cookies = cookies;
                }

                n = 0;
                for (; i < 2 && uio->uio_resid >= UIO_MX; i++) {
                        kt = &kern_targets[i];
                        d.d_namlen = kt->kt_namlen;
                        d.d_fileno = KERNFS_FILENO(kt, kt->kt_tag, 0);
                        memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1);
                        d.d_type = kt->kt_type;
                        if ((error = uiomove(&d, UIO_MX, uio)) != 0)
                                break;
                        if (cookies)
                                *cookies++ = i + 1;
                        n++;
                }
                ncookies = n;
                break;

        case KFSsubdir:
                ks = (struct kernfs_subdir *)kfs->kfs_kt->kt_data;
                if (i >= ks->ks_nentries)
                        return (0);

                if (ap->a_ncookies) {
                        ncookies = uimin(ncookies, (ks->ks_nentries - i));
                        cookies = malloc(ncookies * sizeof(off_t), M_TEMP,
                            M_WAITOK);
                        *ap->a_cookies = cookies;
                }

                dkt = SIMPLEQ_FIRST(&ks->ks_entries);
                for (j = 0; j < i && dkt != NULL; j++)
                        dkt = SIMPLEQ_NEXT(dkt, dkt_queue);
                n = 0;
                for (; i < ks->ks_nentries && uio->uio_resid >= UIO_MX; i++) {
                        if (i < 2)
                                kt = &subdir_targets[i];
                        else {
                                /* check if ks_nentries lied to us */
                                if (dkt == NULL)
                                        break;
                                kt = &dkt->dkt_kt;
                                dkt = SIMPLEQ_NEXT(dkt, dkt_queue);
                        }
                        d.d_namlen = kt->kt_namlen;
                        if ((error = kernfs_setdirentfileno(&d, i, kfs,
                            ks->ks_parent, kt, ap)) != 0)
                                break;
                        memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1);
                        d.d_type = kt->kt_type;
                        if ((error = uiomove(&d, UIO_MX, uio)) != 0)
                                break;
                        if (cookies)
                                *cookies++ = i + 1;
                        n++;
                }
                ncookies = n;
                break;

        default:
                error = ENOTDIR;
                break;
        }

        if (ap->a_ncookies) {
                if (error) {
                        if (cookies)
                                free(*ap->a_cookies, M_TEMP);
                        *ap->a_ncookies = 0;
                        *ap->a_cookies = NULL;
                } else
                        *ap->a_ncookies = ncookies;
        }

        uio->uio_offset = i;
        return (error);
}

int
kernfs_inactive(void *v)
{
        struct vop_inactive_v2_args /* {
                struct vnode *a_vp;
                bool *a_recycle;
        } */ *ap = v;

        *ap->a_recycle = false;

        return (0);
}

int
kernfs_reclaim(void *v)
{
        struct vop_reclaim_v2_args /* {
                struct vnode *a_vp;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct kernfs_node *kfs = VTOKERN(vp);

        VOP_UNLOCK(vp);

        vp->v_data = NULL;
        mutex_enter(&kfs_lock);
        TAILQ_REMOVE(&VFSTOKERNFS(vp->v_mount)->nodelist, kfs, kfs_list);
        mutex_exit(&kfs_lock);
        kmem_free(kfs, sizeof(struct kernfs_node));

        return 0;
}

/*
 * Return POSIX pathconf information applicable to special devices.
 */
int
kernfs_pathconf(void *v)
{
        struct vop_pathconf_args /* {
                struct vnode *a_vp;
                int a_name;
                register_t *a_retval;
        } */ *ap = v;

        switch (ap->a_name) {
        case _PC_LINK_MAX:
                *ap->a_retval = LINK_MAX;
                return (0);
        case _PC_MAX_CANON:
                *ap->a_retval = MAX_CANON;
                return (0);
        case _PC_MAX_INPUT:
                *ap->a_retval = MAX_INPUT;
                return (0);
        case _PC_PIPE_BUF:
                *ap->a_retval = PIPE_BUF;
                return (0);
        case _PC_CHOWN_RESTRICTED:
                *ap->a_retval = 1;
                return (0);
        case _PC_VDISABLE:
                *ap->a_retval = _POSIX_VDISABLE;
                return (0);
        case _PC_SYNC_IO:
                *ap->a_retval = 1;
                return (0);
        default:
                return genfs_pathconf(ap);
        }
        /* NOTREACHED */
}

/*
 * Print out the contents of a /dev/fd vnode.
 */
/* ARGSUSED */
int
kernfs_print(void *v)
{

        printf("tag VT_KERNFS, kernfs vnode\n");
        return (0);
}

int
kernfs_getpages(void *v)
{
        struct vop_getpages_args /* {
                struct vnode *a_vp;
                voff_t a_offset;
                struct vm_page **a_m;
                int *a_count;
                int a_centeridx;
                vm_prot_t a_access_type;
                int a_advice;
                int a_flags;
        } */ *ap = v;

        if ((ap->a_flags & PGO_LOCKED) == 0)
                rw_exit(ap->a_vp->v_uobj.vmobjlock);

        return (EFAULT);
}























































































    2 










    4 









    5 

















    5 





    3 
    3 
    2 


    5 










    9 





    5 
    4 
    2 


    9 


















    3 









    4 

















    3 














    3 











    5 


    4 


    5 







































   16 




   15 
    8 
   15 


   16 












   25 













   16 
    7 
   23 

   16 
    7 
   18 

    4 
    1 



   12 


    1 





   22 
   17 
   10 
    9 



   13 

   12 



    6 




    5 





   11 
    6 
   11 
   12 


   12 

   22 












   17 













   10 
    3 
   14 

   12 
    3 
   13 

    3 
    1 



    4 


    1 





   13 
   12 
   11 
   11 



    4 


    4 
    1 
    4 
    4 
    4 


    4 

   14 









    9 













    5 









   12 






    5 
   12 
    2 


   12 

   12 












    4 













    7 









    8 






    2 
    8 
    3 


    8 

    8 

































   10 



    2 



   10 













    5 













































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
/*        $NetBSD: kern_prot.c,v 1.122 2020/05/23 23:42:43 ad Exp $        */

/*
 * Copyright (c) 1982, 1986, 1989, 1990, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_prot.c        8.9 (Berkeley) 2/14/95
 */

/*
 * System calls related to processes and protection
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_prot.c,v 1.122 2020/05/23 23:42:43 ad Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_43.h"
#endif

#include <sys/param.h>
#include <sys/acct.h>
#include <sys/systm.h>
#include <sys/ucred.h>
#include <sys/proc.h>
#include <sys/timeb.h>
#include <sys/times.h>
#include <sys/pool.h>
#include <sys/prot.h>
#include <sys/syslog.h>
#include <sys/uidinfo.h>
#include <sys/kauth.h>

#include <sys/mount.h>
#include <sys/syscallargs.h>

int        sys_getpid(struct lwp *, const void *, register_t *);
int        sys_getpid_with_ppid(struct lwp *, const void *, register_t *);
int        sys_getuid(struct lwp *, const void *, register_t *);
int        sys_getuid_with_euid(struct lwp *, const void *, register_t *);
int        sys_getgid(struct lwp *, const void *, register_t *);
int        sys_getgid_with_egid(struct lwp *, const void *, register_t *);

/* ARGSUSED */
int
sys_getpid(struct lwp *l, const void *v, register_t *retval)
{
        struct proc *p = l->l_proc;

        *retval = p->p_pid;
        return (0);
}

/* ARGSUSED */
int
sys_getpid_with_ppid(struct lwp *l, const void *v, register_t *retval)
{
        struct proc *p = l->l_proc;

        retval[0] = p->p_pid;
        retval[1] = p->p_ppid;
        return (0);
}

/* ARGSUSED */
int
sys_getppid(struct lwp *l, const void *v, register_t *retval)
{
        struct proc *p = l->l_proc;

        *retval = p->p_ppid;
        return (0);
}

/* Get process group ID; note that POSIX getpgrp takes no parameter */
int
sys_getpgrp(struct lwp *l, const void *v, register_t *retval)
{
        struct proc *p = l->l_proc;

        mutex_enter(&proc_lock);
        *retval = p->p_pgrp->pg_id;
        mutex_exit(&proc_lock);
        return (0);
}

/*
 * Return the process group ID of the session leader (session ID)
 * for the specified process.
 */
int
sys_getsid(struct lwp *l, const struct sys_getsid_args *uap, register_t *retval)
{
        /* {
                syscalldarg(pid_t) pid;
        } */
        pid_t pid = SCARG(uap, pid);
        struct proc *p;
        int error = 0;

        mutex_enter(&proc_lock);
        if (pid == 0)
                *retval = l->l_proc->p_session->s_sid;
        else if ((p = proc_find(pid)) != NULL)
                *retval = p->p_session->s_sid;
        else
                error = ESRCH;
        mutex_exit(&proc_lock);

        return error;
}

int
sys_getpgid(struct lwp *l, const struct sys_getpgid_args *uap, register_t *retval)
{
        /* {
                syscallarg(pid_t) pid;
        } */
        pid_t pid = SCARG(uap, pid);
        struct proc *p;
        int error = 0;

        mutex_enter(&proc_lock);
        if (pid == 0)
                *retval = l->l_proc->p_pgid;
        else if ((p = proc_find(pid)) != NULL)
                *retval = p->p_pgid;
        else
                error = ESRCH;
        mutex_exit(&proc_lock);

        return error;
}

/* ARGSUSED */
int
sys_getuid(struct lwp *l, const void *v, register_t *retval)
{

        *retval = kauth_cred_getuid(l->l_cred);
        return (0);
}

/* ARGSUSED */
int
sys_getuid_with_euid(struct lwp *l, const void *v, register_t *retval)
{

        retval[0] = kauth_cred_getuid(l->l_cred);
        retval[1] = kauth_cred_geteuid(l->l_cred);
        return (0);
}

/* ARGSUSED */
int
sys_geteuid(struct lwp *l, const void *v, register_t *retval)
{

        *retval = kauth_cred_geteuid(l->l_cred);
        return (0);
}

/* ARGSUSED */
int
sys_getgid(struct lwp *l, const void *v, register_t *retval)
{

        *retval = kauth_cred_getgid(l->l_cred);
        return (0);
}

/* ARGSUSED */
int
sys_getgid_with_egid(struct lwp *l, const void *v, register_t *retval)
{

        retval[0] = kauth_cred_getgid(l->l_cred);
        retval[1] = kauth_cred_getegid(l->l_cred);
        return (0);
}

/*
 * Get effective group ID.  The "egid" is groups[0], and could be obtained
 * via getgroups.  This syscall exists because it is somewhat painful to do
 * correctly in a library function.
 */
/* ARGSUSED */
int
sys_getegid(struct lwp *l, const void *v, register_t *retval)
{

        *retval = kauth_cred_getegid(l->l_cred);
        return (0);
}

int
sys_getgroups(struct lwp *l, const struct sys_getgroups_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) gidsetsize;
                syscallarg(gid_t *) gidset;
        } */

        *retval = kauth_cred_ngroups(l->l_cred);
        if (SCARG(uap, gidsetsize) == 0)
                return 0;
        if (SCARG(uap, gidsetsize) < (int)*retval)
                return EINVAL;

        return kauth_cred_getgroups(l->l_cred, SCARG(uap, gidset), *retval,
            UIO_USERSPACE);
}

int
sys_setsid(struct lwp *l, const void *v, register_t *retval)
{
        struct proc *p = l->l_proc;
        int error;

        error = proc_enterpgrp(p, p->p_pid, p->p_pid, true);
        *retval = p->p_pid;
        return (error);
}


/*
 * set process group (setpgid/old setpgrp)
 *
 * caller does setpgid(targpid, targpgid)
 *
 * pgid must be in valid range (EINVAL)
 * pid must be caller or child of caller (ESRCH)
 * if a child
 *        pid must be in same session (EPERM)
 *        pid can't have done an exec (EACCES)
 * if pgid != pid
 *         there must exist some pid in same session having pgid (EPERM)
 * pid must not be session leader (EPERM)
 *
 * Permission checks now in proc_enterpgrp()
 */
int
sys_setpgid(struct lwp *l, const struct sys_setpgid_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) pid;
                syscallarg(int) pgid;
        } */
        struct proc *p = l->l_proc;
        pid_t targp, pgid;

        if (SCARG(uap, pgid) < 0)
                return EINVAL;
        if ((targp = SCARG(uap, pid)) == 0)
                targp = p->p_pid;
        if ((pgid = SCARG(uap, pgid)) == 0)
                pgid = targp;

        return proc_enterpgrp(p, targp, pgid, false);
}

/*
 * Set real, effective and saved uids to the requested values.
 * non-root callers can only ever change uids to values that match
 * one of the processes current uid values.
 * This is further restricted by the flags argument.
 */

int
do_setresuid(struct lwp *l, uid_t r, uid_t e, uid_t sv, u_int flags)
{
        struct proc *p = l->l_proc;
        kauth_cred_t cred, ncred;

        ncred = kauth_cred_alloc();

        /* Get a write lock on the process credential. */
        proc_crmod_enter();
        cred = p->p_cred;

        /*
         * Check that the new value is one of the allowed existing values,
         * or that we have root privilege.
         */
        if ((r != -1
            && !((flags & ID_R_EQ_R) && r == kauth_cred_getuid(cred))
            && !((flags & ID_R_EQ_E) && r == kauth_cred_geteuid(cred))
            && !((flags & ID_R_EQ_S) && r == kauth_cred_getsvuid(cred))) ||
            (e != -1
            && !((flags & ID_E_EQ_R) && e == kauth_cred_getuid(cred))
            && !((flags & ID_E_EQ_E) && e == kauth_cred_geteuid(cred))
            && !((flags & ID_E_EQ_S) && e == kauth_cred_getsvuid(cred))) ||
            (sv != -1
            && !((flags & ID_S_EQ_R) && sv == kauth_cred_getuid(cred))
            && !((flags & ID_S_EQ_E) && sv == kauth_cred_geteuid(cred))
            && !((flags & ID_S_EQ_S) && sv == kauth_cred_getsvuid(cred)))) {
                int error;

                error = kauth_authorize_process(cred, KAUTH_PROCESS_SETID,
                    p, NULL, NULL, NULL);
                if (error != 0) {
                         proc_crmod_leave(cred, ncred, false);
                        return error;
                }
        }

        /* If nothing has changed, short circuit the request */
        if ((r == -1 || r == kauth_cred_getuid(cred))
            && (e == -1 || e == kauth_cred_geteuid(cred))
            && (sv == -1 || sv == kauth_cred_getsvuid(cred))) {
                proc_crmod_leave(cred, ncred, false);
                return 0;
        }

        kauth_cred_clone(cred, ncred);

        if (r != -1 && r != kauth_cred_getuid(ncred)) {
                u_long nlwps;

                /* Update count of processes for this user. */
                (void)chgproccnt(kauth_cred_getuid(ncred), -1);
                (void)chgproccnt(r, 1);

                /* The first LWP of a process is excluded. */
                KASSERT(mutex_owned(p->p_lock));
                nlwps = p->p_nlwps - 1;
                (void)chglwpcnt(kauth_cred_getuid(ncred), -nlwps);
                (void)chglwpcnt(r, nlwps);

                kauth_cred_setuid(ncred, r);
        }
        if (sv != -1)
                kauth_cred_setsvuid(ncred, sv);
        if (e != -1)
                kauth_cred_seteuid(ncred, e);

        /* Broadcast our credentials to the process and other LWPs. */
         proc_crmod_leave(ncred, cred, true);

        return 0;
}

/*
 * Set real, effective and saved gids to the requested values.
 * non-root callers can only ever change gids to values that match
 * one of the processes current gid values.
 * This is further restricted by the flags argument.
 */

int
do_setresgid(struct lwp *l, gid_t r, gid_t e, gid_t sv, u_int flags)
{
        struct proc *p = l->l_proc;
        kauth_cred_t cred, ncred;

        ncred = kauth_cred_alloc();

        /* Get a write lock on the process credential. */
        proc_crmod_enter();
        cred = p->p_cred;

        /*
         * check new value is one of the allowed existing values.
         * otherwise, check if we have root privilege.
         */
        if ((r != -1
            && !((flags & ID_R_EQ_R) && r == kauth_cred_getgid(cred))
            && !((flags & ID_R_EQ_E) && r == kauth_cred_getegid(cred))
            && !((flags & ID_R_EQ_S) && r == kauth_cred_getsvgid(cred))) ||
            (e != -1
            && !((flags & ID_E_EQ_R) && e == kauth_cred_getgid(cred))
            && !((flags & ID_E_EQ_E) && e == kauth_cred_getegid(cred))
            && !((flags & ID_E_EQ_S) && e == kauth_cred_getsvgid(cred))) ||
            (sv != -1
            && !((flags & ID_S_EQ_R) && sv == kauth_cred_getgid(cred))
            && !((flags & ID_S_EQ_E) && sv == kauth_cred_getegid(cred))
            && !((flags & ID_S_EQ_S) && sv == kauth_cred_getsvgid(cred)))) {
                int error;

                error = kauth_authorize_process(cred, KAUTH_PROCESS_SETID,
                    p, NULL, NULL, NULL);
                if (error != 0) {
                         proc_crmod_leave(cred, ncred, false);
                        return error;
                }
        }

        /* If nothing has changed, short circuit the request */
        if ((r == -1 || r == kauth_cred_getgid(cred))
            && (e == -1 || e == kauth_cred_getegid(cred))
            && (sv == -1 || sv == kauth_cred_getsvgid(cred))) {
                 proc_crmod_leave(cred, ncred, false);
                return 0;
        }

        kauth_cred_clone(cred, ncred);

        if (r != -1)
                kauth_cred_setgid(ncred, r);
        if (sv != -1)
                kauth_cred_setsvgid(ncred, sv);
        if (e != -1)
                kauth_cred_setegid(ncred, e);

        /* Broadcast our credentials to the process and other LWPs. */
         proc_crmod_leave(ncred, cred, true);

        return 0;
}

/* ARGSUSED */
int
sys_setuid(struct lwp *l, const struct sys_setuid_args *uap, register_t *retval)
{
        /* {
                syscallarg(uid_t) uid;
        } */
        uid_t uid = SCARG(uap, uid);

        return do_setresuid(l, uid, uid, uid,
                            ID_R_EQ_R | ID_E_EQ_R | ID_S_EQ_R);
}

/* ARGSUSED */
int
sys_seteuid(struct lwp *l, const struct sys_seteuid_args *uap, register_t *retval)
{
        /* {
                syscallarg(uid_t) euid;
        } */

        return do_setresuid(l, -1, SCARG(uap, euid), -1, ID_E_EQ_R | ID_E_EQ_S);
}

int
sys_setreuid(struct lwp *l, const struct sys_setreuid_args *uap, register_t *retval)
{
        /* {
                syscallarg(uid_t) ruid;
                syscallarg(uid_t) euid;
        } */
        kauth_cred_t cred = l->l_cred;
        uid_t ruid, euid, svuid;

        ruid = SCARG(uap, ruid);
        euid = SCARG(uap, euid);

        if (ruid == -1)
                ruid = kauth_cred_getuid(cred);
        if (euid == -1)
                euid = kauth_cred_geteuid(cred);

        /* Saved uid is set to the new euid if the ruid changed */
        svuid = (ruid == kauth_cred_getuid(cred)) ? -1 : euid;

        return do_setresuid(l, ruid, euid, svuid,
                            ID_R_EQ_R | ID_R_EQ_E |
                            ID_E_EQ_R | ID_E_EQ_E | ID_E_EQ_S |
                            ID_S_EQ_R | ID_S_EQ_E | ID_S_EQ_S);
}

/* ARGSUSED */
int
sys_setgid(struct lwp *l, const struct sys_setgid_args *uap, register_t *retval)
{
        /* {
                syscallarg(gid_t) gid;
        } */
        gid_t gid = SCARG(uap, gid);

        return do_setresgid(l, gid, gid, gid,
                            ID_R_EQ_R | ID_E_EQ_R | ID_S_EQ_R);
}

/* ARGSUSED */
int
sys_setegid(struct lwp *l, const struct sys_setegid_args *uap, register_t *retval)
{
        /* {
                syscallarg(gid_t) egid;
        } */

        return do_setresgid(l, -1, SCARG(uap, egid), -1, ID_E_EQ_R | ID_E_EQ_S);
}

int
sys_setregid(struct lwp *l, const struct sys_setregid_args *uap, register_t *retval)
{
        /* {
                syscallarg(gid_t) rgid;
                syscallarg(gid_t) egid;
        } */
        kauth_cred_t cred = l->l_cred;
        gid_t rgid, egid, svgid;

        rgid = SCARG(uap, rgid);
        egid = SCARG(uap, egid);

        if (rgid == -1)
                rgid = kauth_cred_getgid(cred);
        if (egid == -1)
                egid = kauth_cred_getegid(cred);

        /* Saved gid is set to the new egid if the rgid changed */
        svgid = rgid == kauth_cred_getgid(cred) ? -1 : egid;

        return do_setresgid(l, rgid, egid, svgid,
                        ID_R_EQ_R | ID_R_EQ_E |
                        ID_E_EQ_R | ID_E_EQ_E | ID_E_EQ_S |
                        ID_S_EQ_R | ID_S_EQ_E | ID_S_EQ_S);
}

int
sys_issetugid(struct lwp *l, const void *v, register_t *retval)
{
        struct proc *p = l->l_proc;

        /*
         * Note: OpenBSD sets a P_SUGIDEXEC flag set at execve() time,
         * we use PK_SUGID because we consider changing the owners as
         * "tainting" as well.
         * This is significant for procs that start as root and "become"
         * a user without an exec - programs cannot know *everything*
         * that libc *might* have put in their data segment.
         */
        *retval = (p->p_flag & PK_SUGID) != 0;
        return (0);
}

/* ARGSUSED */
int
sys_setgroups(struct lwp *l, const struct sys_setgroups_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) gidsetsize;
                syscallarg(const gid_t *) gidset;
        } */
        kauth_cred_t ncred;
        int error;

        ncred = kauth_cred_alloc();
        error = kauth_cred_setgroups(ncred, SCARG(uap, gidset),
            SCARG(uap, gidsetsize), -1, UIO_USERSPACE);
        if (error != 0) {
                kauth_cred_free(ncred);
                return error;
        }

        return kauth_proc_setgroups(l, ncred);
}

/*
 * Get login name, if available.
 */
/* ARGSUSED */
int
sys___getlogin(struct lwp *l, const struct sys___getlogin_args *uap, register_t *retval)
{
        /* {
                syscallarg(char *) namebuf;
                syscallarg(size_t) namelen;
        } */
        struct proc *p = l->l_proc;
        char login[sizeof(p->p_session->s_login)];
        size_t namelen = SCARG(uap, namelen);

        if (namelen > sizeof(login))
                namelen = sizeof(login);
        mutex_enter(&proc_lock);
        memcpy(login, p->p_session->s_login, namelen);
        mutex_exit(&proc_lock);
        return (copyout(login, (void *)SCARG(uap, namebuf), namelen));
}

/*
 * Set login name.
 */
/* ARGSUSED */
int
sys___setlogin(struct lwp *l, const struct sys___setlogin_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) namebuf;
        } */
        struct proc *p = l->l_proc;
        struct session *sp;
        char newname[sizeof sp->s_login + 1];
        int error;

        if ((error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_SETID,
            p, NULL, NULL, NULL)) != 0)
                return (error);
        error = copyinstr(SCARG(uap, namebuf), newname, sizeof newname, NULL);
        if (error != 0)
                return (error == ENAMETOOLONG ? EINVAL : error);

        mutex_enter(&proc_lock);
        sp = p->p_session;
        if (sp->s_flags & S_LOGIN_SET && p->p_pid != sp->s_sid &&
            strncmp(newname, sp->s_login, sizeof sp->s_login) != 0)
                log(LOG_WARNING, "%s (pid %d) changing logname from "
                    "%.*s to %s\n", p->p_comm, p->p_pid,
                    (int)sizeof sp->s_login, sp->s_login, newname);
        sp->s_flags |= S_LOGIN_SET;
        strncpy(sp->s_login, newname, sizeof sp->s_login);
        mutex_exit(&proc_lock);
        return (0);
}










































































































































































































































































































































































































































































































    8 


    9 


    3 




    5 










    8 





























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
/*        $NetBSD: subr_prof.c,v 1.50 2021/08/14 17:51:20 ryo Exp $        */

/*-
 * Copyright (c) 1982, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)subr_prof.c        8.4 (Berkeley) 2/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_prof.c,v 1.50 2021/08/14 17:51:20 ryo Exp $");

#ifdef _KERNEL_OPT
#include "opt_gprof.h"
#include "opt_multiprocessor.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
#include <sys/sysctl.h>

#include <sys/cpu.h>

#ifdef GPROF
#include <sys/malloc.h>
#include <sys/gmon.h>
#include <sys/xcall.h>

MALLOC_DEFINE(M_GPROF, "gprof", "kernel profiling buffer");

static int sysctl_kern_profiling(SYSCTLFN_ARGS);
#ifdef MULTIPROCESSOR
void _gmonparam_merge(struct gmonparam *, struct gmonparam *);
#endif

/*
 * Froms is actually a bunch of unsigned shorts indexing tos
 */
struct gmonparam _gmonparam = { .state = GMON_PROF_OFF };

/* Actual start of the kernel text segment. */
extern char kernel_text[];

extern char etext[];


void
kmstartup(void)
{
        char *cp;
        struct gmonparam *p = &_gmonparam;
        unsigned long size;
        /*
         * Round lowpc and highpc to multiples of the density we're using
         * so the rest of the scaling (here and in gprof) stays in ints.
         */
        p->lowpc = rounddown(((u_long)kernel_text),
                HISTFRACTION * sizeof(HISTCOUNTER));
        p->highpc = roundup((u_long)etext,
                HISTFRACTION * sizeof(HISTCOUNTER));
        p->textsize = p->highpc - p->lowpc;
        printf("Profiling kernel, textsize=%ld [%lx..%lx]\n",
               p->textsize, p->lowpc, p->highpc);
        p->kcountsize = p->textsize / HISTFRACTION;
        p->hashfraction = HASHFRACTION;
        p->fromssize = p->textsize / HASHFRACTION;
        p->tolimit = p->textsize * ARCDENSITY / 100;
        if (p->tolimit < MINARCS)
                p->tolimit = MINARCS;
        else if (p->tolimit > MAXARCS)
                p->tolimit = MAXARCS;
        p->tossize = p->tolimit * sizeof(struct tostruct);

        size = p->kcountsize + p->fromssize + p->tossize;
#ifdef MULTIPROCESSOR
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;
        for (CPU_INFO_FOREACH(cii, ci)) {
                p = malloc(sizeof(struct gmonparam) + size, M_GPROF,
                    M_NOWAIT | M_ZERO);
                if (p == NULL) {
                        printf("No memory for profiling on %s\n",
                            cpu_name(ci));
                        /* cannot profile on this cpu */
                        continue;
                }
                memcpy(p, &_gmonparam, sizeof(_gmonparam));
                ci->ci_gmon = p;

                /*
                 * To allow profiling to be controlled only by the global
                 * _gmonparam.state, set the default value for each CPU to
                 * GMON_PROF_ON. If _gmonparam.state is not ON, mcount will
                 * not be executed.
                 * This is For compatibility of the kgmon(8) kmem interface.
                 */
                p->state = GMON_PROF_ON;

                cp = (char *)(p + 1);
                p->tos = (struct tostruct *)cp;
                p->kcount = (u_short *)(cp + p->tossize);
                p->froms = (u_short *)(cp + p->tossize + p->kcountsize);
        }

        sysctl_createv(NULL, 0, NULL, NULL,
            0, CTLTYPE_NODE, "percpu",
            SYSCTL_DESCR("per cpu profiling information"),
            NULL, 0, NULL, 0,
            CTL_KERN, KERN_PROF, GPROF_PERCPU, CTL_EOL);

        for (CPU_INFO_FOREACH(cii, ci)) {
                if (ci->ci_gmon == NULL)
                        continue;

                sysctl_createv(NULL, 0, NULL, NULL,
                    0, CTLTYPE_NODE, cpu_name(ci),
                    NULL,
                    NULL, 0, NULL, 0,
                    CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci), CTL_EOL);

                sysctl_createv(NULL, 0, NULL, NULL,
                    CTLFLAG_READWRITE, CTLTYPE_INT, "state",
                    SYSCTL_DESCR("Profiling state"),
                    sysctl_kern_profiling, 0, (void *)ci, 0,
                    CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci),
                    GPROF_STATE, CTL_EOL);
                sysctl_createv(NULL, 0, NULL, NULL,
                    CTLFLAG_READWRITE, CTLTYPE_STRUCT, "count",
                    SYSCTL_DESCR("Array of statistical program counters"),
                    sysctl_kern_profiling, 0, (void *)ci, 0,
                    CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci),
                    GPROF_COUNT, CTL_EOL);
                sysctl_createv(NULL, 0, NULL, NULL,
                    CTLFLAG_READWRITE, CTLTYPE_STRUCT, "froms",
                    SYSCTL_DESCR("Array indexed by program counter of "
                    "call-from points"),
                    sysctl_kern_profiling, 0, (void *)ci, 0,
                    CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci),
                    GPROF_FROMS, CTL_EOL);
                sysctl_createv(NULL, 0, NULL, NULL,
                    CTLFLAG_READWRITE, CTLTYPE_STRUCT, "tos",
                    SYSCTL_DESCR("Array of structures describing "
                    "destination of calls and their counts"),
                    sysctl_kern_profiling, 0, (void *)ci, 0,
                    CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci),
                    GPROF_TOS, CTL_EOL);
                sysctl_createv(NULL, 0, NULL, NULL,
                    CTLFLAG_READWRITE, CTLTYPE_STRUCT, "gmonparam",
                    SYSCTL_DESCR("Structure giving the sizes of the above "
                    "arrays"),
                    sysctl_kern_profiling, 0, (void *)ci, 0,
                    CTL_KERN, KERN_PROF, GPROF_PERCPU, cpu_index(ci),
                    GPROF_GMONPARAM, CTL_EOL);
        }

        /*
         * For minimal compatibility of the kgmon(8) kmem interface,
         * the _gmonparam and cpu0:ci_gmon share buffers.
         */
        p = curcpu()->ci_gmon;
        if (p != NULL) {
                _gmonparam.tos = p->tos;
                _gmonparam.kcount = p->kcount;
                _gmonparam.froms = p->froms;
        }
#else /* MULTIPROCESSOR */
        cp = malloc(size, M_GPROF, M_NOWAIT | M_ZERO);
        if (cp == 0) {
                printf("No memory for profiling.\n");
                return;
        }
        p->tos = (struct tostruct *)cp;
        cp += p->tossize;
        p->kcount = (u_short *)cp;
        cp += p->kcountsize;
        p->froms = (u_short *)cp;
#endif /* MULTIPROCESSOR */
}

#ifdef MULTIPROCESSOR
static void
prof_set_state_xc(void *arg1, void *arg2 __unused)
{
        int state = PTRTOUINT64(arg1);
        struct gmonparam *gp = curcpu()->ci_gmon;

        if (gp != NULL)
                gp->state = state;
}
#endif /* MULTIPROCESSOR */

/*
 * Return kernel profiling information.
 */
/*
 * sysctl helper routine for kern.profiling subtree.  enables/disables
 * kernel profiling and gives out copies of the profiling data.
 */
static int
sysctl_kern_profiling(SYSCTLFN_ARGS)
{
        struct sysctlnode node = *rnode;
        struct gmonparam *gp;
        int error;
#ifdef MULTIPROCESSOR
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci, *target_ci;
        uint64_t where;
        int state;
        bool prof_on, do_merge;

        target_ci = (struct cpu_info *)rnode->sysctl_data;
        do_merge = (oldp != NULL) && (target_ci == NULL) &&
            ((node.sysctl_num == GPROF_COUNT) ||
            (node.sysctl_num == GPROF_FROMS) ||
            (node.sysctl_num == GPROF_TOS));

        if (do_merge) {
                /* kern.profiling.{count,froms,tos} */
                unsigned long size;
                char *cp;

                /* allocate temporary gmonparam, and merge results of all CPU */
                size = _gmonparam.kcountsize + _gmonparam.fromssize +
                    _gmonparam.tossize;
                gp = malloc(sizeof(struct gmonparam) + size, M_GPROF,
                    M_NOWAIT | M_ZERO);
                if (gp == NULL)
                        return ENOMEM;
                memcpy(gp, &_gmonparam, sizeof(_gmonparam));
                cp = (char *)(gp + 1);
                gp->tos = (struct tostruct *)cp;
                gp->kcount = (u_short *)(cp + gp->tossize);
                gp->froms = (u_short *)(cp + gp->tossize + gp->kcountsize);

                for (CPU_INFO_FOREACH(cii, ci)) {
                        if (ci->ci_gmon == NULL)
                                continue;
                        _gmonparam_merge(gp, ci->ci_gmon);
                }
        } else if (target_ci != NULL) {
                /* kern.profiling.percpu.* */
                gp = target_ci->ci_gmon;
        } else {
                /* kern.profiling.{state,gmonparam} */
                gp = &_gmonparam;
        }
#else /* MULTIPROCESSOR */
        gp = &_gmonparam;
#endif

        switch (node.sysctl_num) {
        case GPROF_STATE:
#ifdef MULTIPROCESSOR
                /*
                 * if _gmonparam.state is OFF, the state of each CPU is
                 * considered to be OFF, even if it is actually ON.
                 */
                if (_gmonparam.state == GMON_PROF_OFF ||
                    gp->state == GMON_PROF_OFF)
                        state = GMON_PROF_OFF;
                else
                        state = GMON_PROF_ON;
                node.sysctl_data = &state;
#else
                node.sysctl_data = &gp->state;
#endif
                break;
        case GPROF_COUNT:
                node.sysctl_data = gp->kcount;
                node.sysctl_size = gp->kcountsize;
                break;
        case GPROF_FROMS:
                node.sysctl_data = gp->froms;
                node.sysctl_size = gp->fromssize;
                break;
        case GPROF_TOS:
                node.sysctl_data = gp->tos;
                node.sysctl_size = gp->tossize;
                break;
        case GPROF_GMONPARAM:
                node.sysctl_data = gp;
                node.sysctl_size = sizeof(*gp);
                break;
        default:
                return (EOPNOTSUPP);
        }

        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                goto done;

#ifdef MULTIPROCESSOR
        switch (node.sysctl_num) {
        case GPROF_STATE:
                if (target_ci != NULL) {
                        where = xc_unicast(0, prof_set_state_xc,
                            UINT64TOPTR(state), NULL, target_ci);
                        xc_wait(where);

                        /* if even one CPU being profiled, enable perfclock. */
                        prof_on = false;
                        for (CPU_INFO_FOREACH(cii, ci)) {
                                if (ci->ci_gmon == NULL)
                                        continue;
                                if (ci->ci_gmon->state != GMON_PROF_OFF) {
                                        prof_on = true;
                                        break;
                                }
                        }
                        mutex_spin_enter(&proc0.p_stmutex);
                        if (prof_on)
                                startprofclock(&proc0);
                        else
                                stopprofclock(&proc0);
                        mutex_spin_exit(&proc0.p_stmutex);

                        if (prof_on) {
                                _gmonparam.state = GMON_PROF_ON;
                        } else {
                                _gmonparam.state = GMON_PROF_OFF;
                                /*
                                 * when _gmonparam.state and all CPU gmon state
                                 * are OFF, all CPU states should be ON so that
                                 * the entire CPUs profiling can be controlled
                                 * by _gmonparam.state only.
                                 */
                                for (CPU_INFO_FOREACH(cii, ci)) {
                                        if (ci->ci_gmon == NULL)
                                                continue;
                                        ci->ci_gmon->state = GMON_PROF_ON;
                                }
                        }
                } else {
                        _gmonparam.state = state;
                        where = xc_broadcast(0, prof_set_state_xc,
                            UINT64TOPTR(state), NULL);
                        xc_wait(where);

                        mutex_spin_enter(&proc0.p_stmutex);
                        if (state == GMON_PROF_OFF)
                                stopprofclock(&proc0);
                        else
                                startprofclock(&proc0);
                        mutex_spin_exit(&proc0.p_stmutex);
                }
                break;
        case GPROF_COUNT:
                /*
                 * if 'kern.profiling.{count,froms,tos}' is written, the same
                 * data will be written to 'kern.profiling.percpu.cpuN.xxx'
                 */
                if (target_ci == NULL) {
                        for (CPU_INFO_FOREACH(cii, ci)) {
                                if (ci->ci_gmon == NULL)
                                        continue;
                                memmove(ci->ci_gmon->kcount, gp->kcount,
                                    newlen);
                        }
                }
                break;
        case GPROF_FROMS:
                if (target_ci == NULL) {
                        for (CPU_INFO_FOREACH(cii, ci)) {
                                if (ci->ci_gmon == NULL)
                                        continue;
                                memmove(ci->ci_gmon->froms, gp->froms, newlen);
                        }
                }
                break;
        case GPROF_TOS:
                if (target_ci == NULL) {
                        for (CPU_INFO_FOREACH(cii, ci)) {
                                if (ci->ci_gmon == NULL)
                                        continue;
                                memmove(ci->ci_gmon->tos, gp->tos, newlen);
                        }
                }
                break;
        }
#else
        if (node.sysctl_num == GPROF_STATE) {
                mutex_spin_enter(&proc0.p_stmutex);
                if (gp->state == GMON_PROF_OFF)
                        stopprofclock(&proc0);
                else
                        startprofclock(&proc0);
                mutex_spin_exit(&proc0.p_stmutex);
        }
#endif

 done:
#ifdef MULTIPROCESSOR
        if (do_merge)
                free(gp, M_GPROF);
#endif
        return error;
}

SYSCTL_SETUP(sysctl_kern_gprof_setup, "sysctl kern.profiling subtree setup")
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "profiling",
                       SYSCTL_DESCR("Profiling information (available)"),
                       NULL, 0, NULL, 0,
                       CTL_KERN, KERN_PROF, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "state",
                       SYSCTL_DESCR("Profiling state"),
                       sysctl_kern_profiling, 0, NULL, 0,
                       CTL_KERN, KERN_PROF, GPROF_STATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_STRUCT, "count",
                       SYSCTL_DESCR("Array of statistical program counters"),
                       sysctl_kern_profiling, 0, NULL, 0,
                       CTL_KERN, KERN_PROF, GPROF_COUNT, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_STRUCT, "froms",
                       SYSCTL_DESCR("Array indexed by program counter of "
                                    "call-from points"),
                       sysctl_kern_profiling, 0, NULL, 0,
                       CTL_KERN, KERN_PROF, GPROF_FROMS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_STRUCT, "tos",
                       SYSCTL_DESCR("Array of structures describing "
                                    "destination of calls and their counts"),
                       sysctl_kern_profiling, 0, NULL, 0,
                       CTL_KERN, KERN_PROF, GPROF_TOS, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "gmonparam",
                       SYSCTL_DESCR("Structure giving the sizes of the above "
                                    "arrays"),
                       sysctl_kern_profiling, 0, NULL, 0,
                       CTL_KERN, KERN_PROF, GPROF_GMONPARAM, CTL_EOL);
}
#endif /* GPROF */

/*
 * Profiling system call.
 *
 * The scale factor is a fixed point number with 16 bits of fraction, so that
 * 1.0 is represented as 0x10000.  A scale factor of 0 turns off profiling.
 */
/* ARGSUSED */
int
sys_profil(struct lwp *l, const struct sys_profil_args *uap, register_t *retval)
{
        /* {
                syscallarg(char *) samples;
                syscallarg(size_t) size;
                syscallarg(u_long) offset;
                syscallarg(u_int) scale;
        } */
        struct proc *p = l->l_proc;
        struct uprof *upp;

        if (SCARG(uap, scale) > (1 << 16))
                return (EINVAL);
        if (SCARG(uap, scale) == 0) {
                mutex_spin_enter(&p->p_stmutex);
                stopprofclock(p);
                mutex_spin_exit(&p->p_stmutex);
                return (0);
        }
        upp = &p->p_stats->p_prof;

        /* Block profile interrupts while changing state. */
        mutex_spin_enter(&p->p_stmutex);
        upp->pr_off = SCARG(uap, offset);
        upp->pr_scale = SCARG(uap, scale);
        upp->pr_base = SCARG(uap, samples);
        upp->pr_size = SCARG(uap, size);
        startprofclock(p);
        mutex_spin_exit(&p->p_stmutex);

        return (0);
}

/*
 * Scale is a fixed-point number with the binary point 16 bits
 * into the value, and is <= 1.0.  pc is at most 32 bits, so the
 * intermediate result is at most 48 bits.
 */
#define        PC_TO_INDEX(pc, prof) \
        ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
            (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)

/*
 * Collect user-level profiling statistics; called on a profiling tick,
 * when a process is running in user-mode.  This routine may be called
 * from an interrupt context.  We schedule an AST that will vector us
 * to trap() with a context in which copyin and copyout will work.
 * Trap will then call addupc_task().
 *
 * XXX We could use ufetch/ustore here if the profile buffers were
 * wired.
 *
 * Note that we may (rarely) not get around to the AST soon enough, and
 * lose profile ticks when the next tick overwrites this one, but in this
 * case the system is overloaded and the profile is probably already
 * inaccurate.
 */
void
addupc_intr(struct lwp *l, u_long pc)
{
        struct uprof *prof;
        struct proc *p;
        u_int i;

        p = l->l_proc;

        KASSERT(mutex_owned(&p->p_stmutex));

        prof = &p->p_stats->p_prof;
        if (pc < prof->pr_off ||
            (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size)
                return;                        /* out of range; ignore */

        mutex_spin_exit(&p->p_stmutex);

        /* XXXSMP */
        prof->pr_addr = pc;
        prof->pr_ticks++;
        cpu_need_proftick(l);

        mutex_spin_enter(&p->p_stmutex);
}

/*
 * Much like before, but we can afford to take faults here.  If the
 * update fails, we simply turn off profiling.
 */
void
addupc_task(struct lwp *l, u_long pc, u_int ticks)
{
        struct uprof *prof;
        struct proc *p;
        void *addr;
        int error;
        u_int i;
        u_short v;

        p = l->l_proc;

        if (ticks == 0)
                return;

        mutex_spin_enter(&p->p_stmutex);
        prof = &p->p_stats->p_prof;

        /* Testing P_PROFIL may be unnecessary, but is certainly safe. */
        if ((p->p_stflag & PST_PROFIL) == 0 || pc < prof->pr_off ||
            (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) {
                mutex_spin_exit(&p->p_stmutex);
                return;
        }

        addr = prof->pr_base + i;
        mutex_spin_exit(&p->p_stmutex);
        if ((error = copyin(addr, (void *)&v, sizeof(v))) == 0) {
                v += ticks;
                error = copyout((void *)&v, addr, sizeof(v));
        }
        if (error != 0) {
                mutex_spin_enter(&p->p_stmutex);
                stopprofclock(p);
                mutex_spin_exit(&p->p_stmutex);
        }
}
















































































































































































































































































































































































































































































































































































    4 









































    4 







    4 

    4 









    4 




























    4 

























































































































































































































































































































































































































































































































































































































































































    3 

    1 
































    3 













































































































































































































































































































































































































































































































































































































































































































    4 


































    4 











































































    1 







    1 





























    1 













































































































































































    2 























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
/*        $NetBSD: machdep.c,v 1.363 2022/08/20 23:48:50 riastradh Exp $        */

/*
 * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
 *     The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace
 * Simulation Facility, NASA Ames Research Center.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Coyote Point Systems, Inc. which was written under contract to Coyote
 * Point by Jed Davis and Devon O'Dell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*
 * Copyright (c) 2007 Manuel Bouyer.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * William Jolitz.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)machdep.c        7.4 (Berkeley) 6/3/91
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.363 2022/08/20 23:48:50 riastradh Exp $");

#include "opt_modular.h"
#include "opt_user_ldt.h"
#include "opt_ddb.h"
#include "opt_kgdb.h"
#include "opt_cpureset_delay.h"
#include "opt_mtrr.h"
#include "opt_realmem.h"
#include "opt_xen.h"
#include "opt_svs.h"
#include "opt_kaslr.h"
#ifndef XENPV
#include "opt_physmem.h"
#endif
#include "isa.h"
#include "pci.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/signal.h>
#include <sys/signalvar.h>
#include <sys/kernel.h>
#include <sys/cpu.h>
#include <sys/exec.h>
#include <sys/exec_aout.h>        /* for MID_* */
#include <sys/reboot.h>
#include <sys/conf.h>
#include <sys/msgbuf.h>
#include <sys/mount.h>
#include <sys/core.h>
#include <sys/kcore.h>
#include <sys/ucontext.h>
#include <machine/kcore.h>
#include <sys/ras.h>
#include <sys/syscallargs.h>
#include <sys/ksyms.h>
#include <sys/device.h>
#include <sys/lwp.h>
#include <sys/proc.h>
#include <sys/asan.h>
#include <sys/csan.h>
#include <sys/msan.h>

#ifdef KGDB
#include <sys/kgdb.h>
#endif

#include <lib/libkern/entpool.h> /* XXX */

#include <dev/cons.h>
#include <dev/mm.h>

#include <uvm/uvm.h>
#include <uvm/uvm_page.h>

#include <sys/sysctl.h>

#include <machine/cpu.h>
#include <machine/cpu_rng.h>
#include <machine/cpufunc.h>
#include <machine/gdt.h>
#include <machine/intr.h>
#include <machine/pio.h>
#include <machine/psl.h>
#include <machine/reg.h>
#include <machine/specialreg.h>
#include <machine/bootinfo.h>
#include <x86/fpu.h>
#include <x86/dbregs.h>
#include <machine/mtrr.h>
#include <machine/mpbiosvar.h>
#include <machine/pmap_private.h>

#include <x86/bootspace.h>
#include <x86/cputypes.h>
#include <x86/cpuvar.h>
#include <x86/machdep.h>
#include <x86/x86/tsc.h>

#include <dev/isa/isareg.h>
#include <machine/isa_machdep.h>
#include <dev/ic/i8042reg.h>

#ifdef XEN
#include <xen/xen.h>
#include <xen/hypervisor.h>
#include <xen/evtchn.h>
#include <xen/include/public/version.h>
#include <xen/include/public/vcpu.h>
#endif /* XEN */

#ifdef DDB
#include <machine/db_machdep.h>
#include <ddb/db_extern.h>
#include <ddb/db_output.h>
#include <ddb/db_interface.h>
#endif

#include "acpica.h"

#if NACPICA > 0
#include <dev/acpi/acpivar.h>
#define ACPI_MACHDEP_PRIVATE
#include <machine/acpi_machdep.h>
#else
#include <machine/i82489var.h>
#endif

#include "isa.h"
#include "isadma.h"
#include "ksyms.h"

/* the following is used externally (sysctl_hw) */
char machine[] = "amd64";                /* CPU "architecture" */
char machine_arch[] = "x86_64";                /* machine == machine_arch */

#ifdef CPURESET_DELAY
int cpureset_delay = CPURESET_DELAY;
#else
int cpureset_delay = 2000; /* default to 2s */
#endif

int cpu_class = CPUCLASS_686;

#ifdef MTRR
const struct mtrr_funcs *mtrr_funcs;
#endif

int cpu_class;
int use_pae;

#ifndef NO_SPARSE_DUMP
int sparse_dump = 1;

paddr_t max_paddr = 0;
unsigned char *sparse_dump_physmap;
#endif

char *dump_headerbuf, *dump_headerbuf_ptr;
#define dump_headerbuf_size PAGE_SIZE
#define dump_headerbuf_end (dump_headerbuf + dump_headerbuf_size)
#define dump_headerbuf_avail (dump_headerbuf_end - dump_headerbuf_ptr)
daddr_t dump_header_blkno;

size_t dump_nmemsegs;
size_t dump_npages;
size_t dump_header_size;
size_t dump_totalbytesleft;

vaddr_t idt_vaddr;
paddr_t idt_paddr;
vaddr_t gdt_vaddr;
paddr_t gdt_paddr;
vaddr_t ldt_vaddr;
paddr_t ldt_paddr;

static struct vm_map module_map_store;
extern struct vm_map *module_map;
extern struct bootspace bootspace;
extern struct slotspace slotspace;

vaddr_t vm_min_kernel_address __read_mostly = VM_MIN_KERNEL_ADDRESS_DEFAULT;
vaddr_t vm_max_kernel_address __read_mostly = VM_MAX_KERNEL_ADDRESS_DEFAULT;
pd_entry_t *pte_base __read_mostly;

struct vm_map *phys_map = NULL;

extern paddr_t lowmem_rsvd;
extern paddr_t avail_start, avail_end;
#ifdef XENPV
extern paddr_t pmap_pa_start, pmap_pa_end;
#endif

struct nmistore {
        uint64_t cr3;
        uint64_t scratch;
} __packed;

/*
 * Size of memory segments, before any memory is stolen.
 */
phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];
int mem_cluster_cnt;

int cpu_dump(void);
int cpu_dumpsize(void);
u_long cpu_dump_mempagecnt(void);
void dodumpsys(void);
void dumpsys(void);

static void x86_64_proc0_pcb_ldt_init(void);

extern int time_adjusted;        /* XXX no common header */

void dump_misc_init(void);
void dump_seg_prep(void);
int dump_seg_iter(int (*)(paddr_t, paddr_t));

#ifndef NO_SPARSE_DUMP
void sparse_dump_reset(void);
void sparse_dump_mark(void);
void cpu_dump_prep_sparse(void);
#endif

void dump_header_start(void);
int dump_header_flush(void);
int dump_header_addbytes(const void*, size_t);
int dump_header_addseg(paddr_t, paddr_t);
int dump_header_finish(void);

int dump_seg_count_range(paddr_t, paddr_t);
int dumpsys_seg(paddr_t, paddr_t);

void init_bootspace(void);
void init_slotspace(void);
void init_x86_64(paddr_t);

/*
 * Machine-dependent startup code
 */
void
cpu_startup(void)
{
        int x, y;
        vaddr_t minaddr, maxaddr;
        psize_t sz;

        /*
         * For console drivers that require uvm and pmap to be initialized,
         * we'll give them one more chance here...
         */
        consinit();

        /*
         * Initialize error message buffer (at end of core).
         */
        if (msgbuf_p_cnt == 0)
                panic("msgbuf paddr map has not been set up");
        for (x = 0, sz = 0; x < msgbuf_p_cnt; sz += msgbuf_p_seg[x++].sz)
                continue;

        msgbuf_vaddr = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_VAONLY);
        if (msgbuf_vaddr == 0)
                panic("failed to valloc msgbuf_vaddr");

        for (y = 0, sz = 0; y < msgbuf_p_cnt; y++) {
                for (x = 0; x < btoc(msgbuf_p_seg[y].sz); x++, sz += PAGE_SIZE)
                        pmap_kenter_pa((vaddr_t)msgbuf_vaddr + sz,
                            msgbuf_p_seg[y].paddr + x * PAGE_SIZE,
                            VM_PROT_READ|VM_PROT_WRITE, 0);
        }

        pmap_update(pmap_kernel());

        initmsgbuf((void *)msgbuf_vaddr, round_page(sz));

        minaddr = 0;

        /*
         * Allocate a submap for physio.
         */
        phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
            VM_PHYS_SIZE, 0, false, NULL);

        /*
         * Create the module map.
         *
         * The kernel uses RIP-relative addressing with a maximum offset of
         * 2GB. Because of that, we can't put the kernel modules in kernel_map
         * (like i386 does), since kernel_map is too far away in memory from
         * the kernel sections. So we have to create a special module_map.
         *
         * The module map is taken as what is left of the bootstrap memory
         * created in locore/prekern.
         */
        uvm_map_setup(&module_map_store, bootspace.smodule,
            bootspace.emodule, 0);
        module_map_store.pmap = pmap_kernel();
        module_map = &module_map_store;

        /* Say hello. */
        banner();

#if NISA > 0 || NPCI > 0
        /* Safe for i/o port / memory space allocation to use malloc now. */
        x86_bus_space_mallocok();
#endif

#ifdef __HAVE_PCPU_AREA
        cpu_pcpuarea_init(&cpu_info_primary);
#endif
        gdt_init();
        x86_64_proc0_pcb_ldt_init();

        cpu_init_tss(&cpu_info_primary);
#if !defined(XENPV)
        ltr(cpu_info_primary.ci_tss_sel);
#endif

        x86_startup();
}

#ifdef XENPV
/* used in assembly */
void hypervisor_callback(void);
void failsafe_callback(void);
void x86_64_switch_context(struct pcb *);
void x86_64_tls_switch(struct lwp *);

void
x86_64_switch_context(struct pcb *new)
{
        HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), new->pcb_rsp0);
        struct physdev_set_iopl set_iopl;
        set_iopl.iopl = new->pcb_iopl;
        HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
}

void
x86_64_tls_switch(struct lwp *l)
{
        struct cpu_info *ci = curcpu();
        struct pcb *pcb = lwp_getpcb(l);
        struct trapframe *tf = l->l_md.md_regs;
        uint64_t zero = 0;

        /*
         * Raise the IPL to IPL_HIGH. XXX Still needed?
         */
        (void)splhigh();

        /* Update segment registers */
        if (pcb->pcb_flags & PCB_COMPAT32) {
                update_descriptor(&ci->ci_gdt[GUFS_SEL], &pcb->pcb_fs);
                update_descriptor(&ci->ci_gdt[GUGS_SEL], &pcb->pcb_gs);
                setds(GSEL(GUDATA32_SEL, SEL_UPL));
                setes(GSEL(GUDATA32_SEL, SEL_UPL));
                setfs(GSEL(GUDATA32_SEL, SEL_UPL));
                HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, tf->tf_gs);
        } else {
                update_descriptor(&ci->ci_gdt[GUFS_SEL], &zero);
                update_descriptor(&ci->ci_gdt[GUGS_SEL], &zero);
                setds(GSEL(GUDATA_SEL, SEL_UPL));
                setes(GSEL(GUDATA_SEL, SEL_UPL));
                setfs(0);
                HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 0);
                HYPERVISOR_set_segment_base(SEGBASE_FS, pcb->pcb_fs);
                HYPERVISOR_set_segment_base(SEGBASE_GS_USER, pcb->pcb_gs);
        }
}
#endif /* XENPV */

/*
 * Set up proc0's PCB and LDT.
 */
static void
x86_64_proc0_pcb_ldt_init(void)
{
        struct lwp *l = &lwp0;
        struct pcb *pcb = lwp_getpcb(l);

        pcb->pcb_flags = 0;
        pcb->pcb_fs = 0;
        pcb->pcb_gs = 0;
        pcb->pcb_rsp0 = (uvm_lwp_getuarea(l) + USPACE - 16) & ~0xf;
        pcb->pcb_iopl = IOPL_KPL;
        pcb->pcb_dbregs = NULL;
        pcb->pcb_cr0 = rcr0() & ~CR0_TS;
        l->l_md.md_regs = (struct trapframe *)pcb->pcb_rsp0 - 1;

#if !defined(XENPV)
        lldt(GSYSSEL(GLDT_SEL, SEL_KPL));
#else
        xen_set_ldt((vaddr_t)ldtstore, LDT_SIZE >> 3);
        /* Reset TS bit and set kernel stack for interrupt handlers */
        HYPERVISOR_fpu_taskswitch(1);
        HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_rsp0);
        struct physdev_set_iopl set_iopl;
        set_iopl.iopl = pcb->pcb_iopl;
        HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
#endif
}

/*
 * Set up TSS and I/O bitmap.
 */
void
cpu_init_tss(struct cpu_info *ci)
{
#ifdef __HAVE_PCPU_AREA
        const cpuid_t cid = cpu_index(ci);
#endif
        struct cpu_tss *cputss;
        struct nmistore *store;
        uintptr_t p;

#ifdef __HAVE_PCPU_AREA
        cputss = (struct cpu_tss *)&pcpuarea->ent[cid].tss;
#else
        cputss = (struct cpu_tss *)uvm_km_alloc(kernel_map,
            sizeof(struct cpu_tss), 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
#endif

        cputss->tss.tss_iobase = IOMAP_INVALOFF << 16;

        /* DDB stack */
#ifdef __HAVE_PCPU_AREA
        p = (vaddr_t)&pcpuarea->ent[cid].ist0;
#else
        p = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
#endif
        cputss->tss.tss_ist[0] = p + PAGE_SIZE - 16;

        /* double fault */
#ifdef __HAVE_PCPU_AREA
        p = (vaddr_t)&pcpuarea->ent[cid].ist1;
#else
        p = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
#endif
        cputss->tss.tss_ist[1] = p + PAGE_SIZE - 16;

        /* NMI - store a structure at the top of the stack */
#ifdef __HAVE_PCPU_AREA
        p = (vaddr_t)&pcpuarea->ent[cid].ist2;
#else
        p = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
#endif
        cputss->tss.tss_ist[2] = p + PAGE_SIZE - sizeof(struct nmistore);
        store = (struct nmistore *)(p + PAGE_SIZE - sizeof(struct nmistore));
        store->cr3 = pmap_pdirpa(pmap_kernel(), 0);

        /* DB */
#ifdef __HAVE_PCPU_AREA
        p = (vaddr_t)&pcpuarea->ent[cid].ist3;
#else
        p = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
#endif
        cputss->tss.tss_ist[3] = p + PAGE_SIZE - 16;

        ci->ci_tss = cputss;
        ci->ci_tss_sel = tss_alloc(&cputss->tss);
}

void
buildcontext(struct lwp *l, void *catcher, void *f)
{
        struct trapframe *tf = l->l_md.md_regs;

        tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
        tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
        tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
        tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);

        tf->tf_rip = (uint64_t)catcher;
        tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
        tf->tf_rflags &= ~PSL_CLEARSIG;
        tf->tf_rsp = (uint64_t)f;
        tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);

        /* Ensure FP state is sane */
        fpu_sigreset(l);
}

void
sendsig_sigcontext(const ksiginfo_t *ksi, const sigset_t *mask)
{

        printf("sendsig_sigcontext: illegal\n");
        sigexit(curlwp, SIGILL);
}

void
sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask)
{
        struct lwp *l = curlwp;
        struct proc *p = l->l_proc;
        struct sigacts *ps = p->p_sigacts;
        int onstack, error;
        int sig = ksi->ksi_signo;
        struct sigframe_siginfo *fp, frame;
        sig_t catcher = SIGACTION(p, sig).sa_handler;
        struct trapframe *tf = l->l_md.md_regs;
        char *sp;

        KASSERT(mutex_owned(p->p_lock));

        /* Do we need to jump onto the signal stack? */
        onstack =
            (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
            (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;

        /* Allocate space for the signal handler context. */
        if (onstack)
                sp = ((char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size);
        else
                /* AMD64 ABI 128-bytes "red zone". */
                sp = (char *)tf->tf_rsp - 128;

        sp -= sizeof(struct sigframe_siginfo);
        /* Round down the stackpointer to a multiple of 16 for the ABI. */
        fp = (struct sigframe_siginfo *)(((unsigned long)sp & ~15) - 8);

        memset(&frame, 0, sizeof(frame));
        frame.sf_ra = (uint64_t)ps->sa_sigdesc[sig].sd_tramp;
        frame.sf_si._info = ksi->ksi_info;
        frame.sf_uc.uc_flags = _UC_SIGMASK;
        frame.sf_uc.uc_sigmask = *mask;
        frame.sf_uc.uc_link = l->l_ctxlink;
        frame.sf_uc.uc_flags |= (l->l_sigstk.ss_flags & SS_ONSTACK)
            ? _UC_SETSTACK : _UC_CLRSTACK;
        sendsig_reset(l, sig);

        mutex_exit(p->p_lock);
        cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags);
        /* Copyout all the fp regs, the signal handler might expect them. */
        error = copyout(&frame, fp, sizeof frame);
        mutex_enter(p->p_lock);

        if (error != 0) {
                /*
                 * Process has trashed its stack; give it an illegal
                 * instruction to halt it in its tracks.
                 */
                sigexit(l, SIGILL);
                /* NOTREACHED */
        }

        buildcontext(l, catcher, fp);

        tf->tf_rdi = sig;
        tf->tf_rsi = (uint64_t)&fp->sf_si;
        tf->tf_rdx = tf->tf_r15 = (uint64_t)&fp->sf_uc;

        /* Remember that we're now on the signal stack. */
        if (onstack)
                l->l_sigstk.ss_flags |= SS_ONSTACK;

        if ((vaddr_t)catcher >= VM_MAXUSER_ADDRESS) {
                /*
                 * process has given an invalid address for the
                 * handler. Stop it, but do not do it before so
                 * we can return the right info to userland (or in core dump)
                 */
                sigexit(l, SIGILL);
                /* NOTREACHED */
        }
}

struct pcb dumppcb;

void
cpu_reboot(int howto, char *bootstr)
{
        static bool syncdone = false;
        int s = IPL_NONE;
        __USE(s);        /* ugly otherwise */

        if (cold) {
                howto |= RB_HALT;
                goto haltsys;
        }

        boothowto = howto;

        /* i386 maybe_dump() */

        /*
         * If we've panic'd, don't make the situation potentially
         * worse by syncing or unmounting the file systems.
         */
        if ((howto & RB_NOSYNC) == 0 && panicstr == NULL) {
                if (!syncdone) {
                        syncdone = true;
                        /* XXX used to force unmount as well, here */
                        vfs_sync_all(curlwp);
                        /*
                         * If we've been adjusting the clock, the todr
                         * will be out of synch; adjust it now.
                         *
                         * XXX used to do this after unmounting all
                         * filesystems with vfs_shutdown().
                         */
                        if (time_adjusted != 0)
                                resettodr();
                }

                while (vfs_unmountall1(curlwp, false, false) ||
                       config_detach_all(boothowto) ||
                       vfs_unmount_forceone(curlwp))
                        ;        /* do nothing */
        } else {
                int ddb = 0;
#ifdef DDB
                extern int db_active; /* XXX */
                ddb = db_active;
#endif
                if (!ddb)
                        suspendsched();
        }

        pmf_system_shutdown(boothowto);

        /* Disable interrupts. */
        s = splhigh();

        /* Do a dump if requested. */
        if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
                dumpsys();

haltsys:
        doshutdownhooks();

        if ((howto & RB_POWERDOWN) == RB_POWERDOWN) {
#if NACPICA > 0
                if (s != IPL_NONE)
                        splx(s);

                acpi_enter_sleep_state(ACPI_STATE_S5);
#endif
#ifdef XEN
                if (vm_guest == VM_GUEST_XENPV ||
                    vm_guest == VM_GUEST_XENPVH ||
                    vm_guest == VM_GUEST_XENPVHVM)
                        HYPERVISOR_shutdown();
#endif /* XEN */
        }

        cpu_broadcast_halt();

        if (howto & RB_HALT) {
#if NACPICA > 0
                acpi_disable();
#endif

                printf("\n");
                printf("The operating system has halted.\n");
                printf("Please press any key to reboot.\n\n");
                cnpollc(1);        /* for proper keyboard command handling */
                if (cngetc() == 0) {
                        /* no console attached, so just hlt */
                        printf("No keyboard - cannot reboot after all.\n");
                        for(;;) {
                                x86_hlt();
                        }
                }
                cnpollc(0);
        }

        printf("rebooting...\n");
        if (cpureset_delay > 0)
                delay(cpureset_delay * 1000);
        cpu_reset();
        for(;;) ;
        /*NOTREACHED*/
}

/*
 * XXXfvdl share dumpcode.
 */

/*
 * Perform assorted dump-related initialization tasks.  Assumes that
 * the maximum physical memory address will not increase afterwards.
 */
void
dump_misc_init(void)
{
#ifndef NO_SPARSE_DUMP
        int i;
#endif

        if (dump_headerbuf != NULL)
                return; /* already called */

#ifndef NO_SPARSE_DUMP
        for (i = 0; i < mem_cluster_cnt; ++i) {
                paddr_t top = mem_clusters[i].start + mem_clusters[i].size;
                if (max_paddr < top)
                        max_paddr = top;
        }
#ifdef DEBUG
        printf("dump_misc_init: max_paddr = 0x%lx\n",
            (unsigned long)max_paddr);
#endif
        if (max_paddr == 0) {
                printf("Your machine does not initialize mem_clusters; "
                    "sparse_dumps disabled\n");
                sparse_dump = 0;
        } else {
                sparse_dump_physmap = (void *)uvm_km_alloc(kernel_map,
                    roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE),
                    PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO);
        }
#endif
        dump_headerbuf = (void *)uvm_km_alloc(kernel_map,
            dump_headerbuf_size,
            PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO);
        /* XXXjld should check for failure here, disable dumps if so. */
}

#ifndef NO_SPARSE_DUMP
/*
 * Clear the set of pages to include in a sparse dump.
 */
void
sparse_dump_reset(void)
{
        memset(sparse_dump_physmap, 0,
            roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE));
}

/*
 * Include or exclude pages in a sparse dump.
 */
void
sparse_dump_mark(void)
{
        paddr_t p, pstart, pend;
        struct vm_page *pg;
        int i;
        uvm_physseg_t upm;

        /*
         * Mark all memory pages, then unmark pages that are uninteresting.
         * Dereferenceing pg->uobject might crash again if another CPU
         * frees the object out from under us, but we can't lock anything
         * so it's a risk we have to take.
         */

        for (i = 0; i < mem_cluster_cnt; ++i) {
                pstart = mem_clusters[i].start / PAGE_SIZE;
                pend = pstart + mem_clusters[i].size / PAGE_SIZE;

                for (p = pstart; p < pend; p++) {
                        setbit(sparse_dump_physmap, p);
                }
        }
        for (upm = uvm_physseg_get_first();
             uvm_physseg_valid_p(upm);
             upm = uvm_physseg_get_next(upm)) {
                paddr_t pfn;

                /*
                 * We assume that seg->start to seg->end are
                 * uvm_page_physload()ed
                 */
                for (pfn = uvm_physseg_get_start(upm);
                     pfn < uvm_physseg_get_end(upm);
                     pfn++) {
                        pg = PHYS_TO_VM_PAGE(ptoa(pfn));

                        if (pg->uanon || (pg->flags & PG_FREE) ||
                            (pg->uobject && pg->uobject->pgops)) {
                                p = VM_PAGE_TO_PHYS(pg) / PAGE_SIZE;
                                clrbit(sparse_dump_physmap, p);
                        }
                }
        }
}

/*
 * Machine-dependently decides on the contents of a sparse dump, using
 * the above.
 */
void
cpu_dump_prep_sparse(void)
{
        sparse_dump_reset();
        /* XXX could the alternate recursive page table be skipped? */
        sparse_dump_mark();
        /* Memory for I/O buffers could be unmarked here, for example. */
        /* The kernel text could also be unmarked, but gdb would be upset. */
}
#endif

/*
 * Abstractly iterate over the collection of memory segments to be
 * dumped; the callback lacks the customary environment-pointer
 * argument because none of the current users really need one.
 *
 * To be used only after dump_seg_prep is called to set things up.
 */
int
dump_seg_iter(int (*callback)(paddr_t, paddr_t))
{
        int error, i;

#define CALLBACK(start,size) do {     \
        error = callback(start,size); \
        if (error)                    \
                return error;         \
} while(0)

        for (i = 0; i < mem_cluster_cnt; ++i) {
#ifndef NO_SPARSE_DUMP
                /*
                 * The bitmap is scanned within each memory segment,
                 * rather than over its entire domain, in case any
                 * pages outside of the memory proper have been mapped
                 * into kva; they might be devices that wouldn't
                 * appreciate being arbitrarily read, and including
                 * them could also break the assumption that a sparse
                 * dump will always be smaller than a full one.
                 */
                if (sparse_dump && sparse_dump_physmap) {
                        paddr_t p, sp_start, sp_end;
                        int lastset;

                        sp_start = mem_clusters[i].start;
                        sp_end = sp_start + mem_clusters[i].size;
                        sp_start = rounddown(sp_start, PAGE_SIZE); /* unnecessary? */
                        lastset = 0;
                        for (p = sp_start; p < sp_end; p += PAGE_SIZE) {
                                int thisset = isset(sparse_dump_physmap,
                                    p/PAGE_SIZE);

                                if (!lastset && thisset)
                                        sp_start = p;
                                if (lastset && !thisset)
                                        CALLBACK(sp_start, p - sp_start);
                                lastset = thisset;
                        }
                        if (lastset)
                                CALLBACK(sp_start, p - sp_start);
                } else
#endif
                        CALLBACK(mem_clusters[i].start, mem_clusters[i].size);
        }
        return 0;
#undef CALLBACK
}

/*
 * Prepare for an impending core dump: decide what's being dumped and
 * how much space it will take up.
 */
void
dump_seg_prep(void)
{
#ifndef NO_SPARSE_DUMP
        if (sparse_dump && sparse_dump_physmap)
                cpu_dump_prep_sparse();
#endif

        dump_nmemsegs = 0;
        dump_npages = 0;
        dump_seg_iter(dump_seg_count_range);

        dump_header_size = ALIGN(sizeof(kcore_seg_t)) +
            ALIGN(sizeof(cpu_kcore_hdr_t)) +
            ALIGN(dump_nmemsegs * sizeof(phys_ram_seg_t));
        dump_header_size = roundup(dump_header_size, dbtob(1));

        /*
         * savecore(8) will read this to decide how many pages to
         * copy, and cpu_dumpconf has already used the pessimistic
         * value to set dumplo, so it's time to tell the truth.
         */
        dumpsize = dump_npages; /* XXX could these just be one variable? */
}

int
dump_seg_count_range(paddr_t start, paddr_t size)
{
        ++dump_nmemsegs;
        dump_npages += size / PAGE_SIZE;
        return 0;
}

/*
 * A sparse dump's header may be rather large, due to the number of
 * "segments" emitted.  These routines manage a simple output buffer,
 * so that the header can be written to disk incrementally.
 */
void
dump_header_start(void)
{
        dump_headerbuf_ptr = dump_headerbuf;
        dump_header_blkno = dumplo;
}

int
dump_header_flush(void)
{
        const struct bdevsw *bdev;
        size_t to_write;
        int error;

        bdev = bdevsw_lookup(dumpdev);
        to_write = roundup(dump_headerbuf_ptr - dump_headerbuf, dbtob(1));
        error = bdev->d_dump(dumpdev, dump_header_blkno,
            dump_headerbuf, to_write);
        dump_header_blkno += btodb(to_write);
        dump_headerbuf_ptr = dump_headerbuf;
        return error;
}

int
dump_header_addbytes(const void* vptr, size_t n)
{
        const char* ptr = vptr;
        int error;

        while (n > dump_headerbuf_avail) {
                memcpy(dump_headerbuf_ptr, ptr, dump_headerbuf_avail);
                ptr += dump_headerbuf_avail;
                n -= dump_headerbuf_avail;
                dump_headerbuf_ptr = dump_headerbuf_end;
                error = dump_header_flush();
                if (error)
                        return error;
        }
        memcpy(dump_headerbuf_ptr, ptr, n);
        dump_headerbuf_ptr += n;

        return 0;
}

int
dump_header_addseg(paddr_t start, paddr_t size)
{
        phys_ram_seg_t seg = { start, size };

        return dump_header_addbytes(&seg, sizeof(seg));
}

int
dump_header_finish(void)
{
        memset(dump_headerbuf_ptr, 0, dump_headerbuf_avail);
        return dump_header_flush();
}


/*
 * These variables are needed by /sbin/savecore
 */
uint32_t        dumpmag = 0x8fca0101;        /* magic number */
int         dumpsize = 0;                /* pages */
long        dumplo = 0;                 /* blocks */

/*
 * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers
 * for a full (non-sparse) dump.
 */
int
cpu_dumpsize(void)
{
        int size;

        size = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)) +
            ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t));
        if (roundup(size, dbtob(1)) != dbtob(1))
                return (-1);

        return (1);
}

/*
 * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped
 * for a full (non-sparse) dump.
 */
u_long
cpu_dump_mempagecnt(void)
{
        u_long i, n;

        n = 0;
        for (i = 0; i < mem_cluster_cnt; i++)
                n += atop(mem_clusters[i].size);
        return (n);
}

/*
 * cpu_dump: dump the machine-dependent kernel core dump headers.
 */
int
cpu_dump(void)
{
        kcore_seg_t seg;
        cpu_kcore_hdr_t cpuhdr;
        const struct bdevsw *bdev;

        bdev = bdevsw_lookup(dumpdev);
        if (bdev == NULL)
                return (ENXIO);

        /*
         * Generate a segment header.
         */
        CORE_SETMAGIC(seg, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
        seg.c_size = dump_header_size - ALIGN(sizeof(seg));
        (void)dump_header_addbytes(&seg, ALIGN(sizeof(seg)));

        /*
         * Add the machine-dependent header info.
         */
        cpuhdr.ptdpaddr = PDPpaddr;
        cpuhdr.nmemsegs = dump_nmemsegs;
        (void)dump_header_addbytes(&cpuhdr, ALIGN(sizeof(cpuhdr)));

        /*
         * Write out the memory segment descriptors.
         */
        return dump_seg_iter(dump_header_addseg);
}

/*
 * Doadump comes here after turning off memory management and
 * getting on the dump stack, either when called above, or by
 * the auto-restart code.
 */
#define BYTES_PER_DUMP  PAGE_SIZE /* must be a multiple of pagesize XXX small */
static vaddr_t dumpspace;

vaddr_t
reserve_dumppages(vaddr_t p)
{

        dumpspace = p;
        return (p + BYTES_PER_DUMP);
}

int
dumpsys_seg(paddr_t maddr, paddr_t bytes)
{
        u_long i, m, n;
        daddr_t blkno;
        const struct bdevsw *bdev;
        int (*dump)(dev_t, daddr_t, void *, size_t);
        int error;

        if (dumpdev == NODEV)
                return ENODEV;
        bdev = bdevsw_lookup(dumpdev);
        if (bdev == NULL || bdev->d_psize == NULL)
                return ENODEV;

        dump = bdev->d_dump;

        blkno = dump_header_blkno;
        for (i = 0; i < bytes; i += n, dump_totalbytesleft -= n) {
                /* Print out how many MBs we have left to go. */
                if ((dump_totalbytesleft % (1024*1024)) == 0)
                        printf_nolog("%lu ", (unsigned long)
                            (dump_totalbytesleft / (1024 * 1024)));

                /* Limit size for next transfer. */
                n = bytes - i;
                if (n > BYTES_PER_DUMP)
                        n = BYTES_PER_DUMP;

                for (m = 0; m < n; m += NBPG)
                        pmap_kenter_pa(dumpspace + m, maddr + m,
                            VM_PROT_READ, 0);
                pmap_update(pmap_kernel());

                error = (*dump)(dumpdev, blkno, (void *)dumpspace, n);
                pmap_kremove_local(dumpspace, n);
                if (error)
                        return error;
                maddr += n;
                blkno += btodb(n);                /* XXX? */

#if 0        /* XXX this doesn't work.  grr. */
                /* operator aborting dump? */
                if (sget() != NULL)
                        return EINTR;
#endif
        }
        dump_header_blkno = blkno;

        return 0;
}

void
dodumpsys(void)
{
        const struct bdevsw *bdev;
        int dumpend, psize;
        int error;

        if (dumpdev == NODEV)
                return;

        bdev = bdevsw_lookup(dumpdev);
        if (bdev == NULL || bdev->d_psize == NULL)
                return;
        /*
         * For dumps during autoconfiguration,
         * if dump device has already configured...
         */
        if (dumpsize == 0)
                cpu_dumpconf();

        printf("\ndumping to dev %llu,%llu (offset=%ld, size=%d):",
            (unsigned long long)major(dumpdev),
            (unsigned long long)minor(dumpdev), dumplo, dumpsize);

        if (dumplo <= 0 || dumpsize <= 0) {
                printf(" not possible\n");
                return;
        }

        psize = bdev_size(dumpdev);
        printf("\ndump ");
        if (psize == -1) {
                printf("area unavailable\n");
                return;
        }

#if 0        /* XXX this doesn't work.  grr. */
        /* toss any characters present prior to dump */
        while (sget() != NULL); /*syscons and pccons differ */
#endif

        dump_seg_prep();
        dumpend = dumplo + btodb(dump_header_size) + ctod(dump_npages);
        if (dumpend > psize) {
                printf("failed: insufficient space (%d < %d)\n",
                    psize, dumpend);
                goto failed;
        }

        dump_header_start();
        if ((error = cpu_dump()) != 0)
                goto err;
        if ((error = dump_header_finish()) != 0)
                goto err;

        if (dump_header_blkno != dumplo + btodb(dump_header_size)) {
                printf("BAD header size (%ld [written] != %ld [expected])\n",
                    (long)(dump_header_blkno - dumplo),
                    (long)btodb(dump_header_size));
                goto failed;
        }

        dump_totalbytesleft = roundup(ptoa(dump_npages), BYTES_PER_DUMP);
        error = dump_seg_iter(dumpsys_seg);

        if (error == 0 && dump_header_blkno != dumpend) {
                printf("BAD dump size (%ld [written] != %ld [expected])\n",
                    (long)(dumpend - dumplo),
                    (long)(dump_header_blkno - dumplo));
                goto failed;
        }

err:
        switch (error) {

        case ENXIO:
                printf("device bad\n");
                break;

        case EFAULT:
                printf("device not ready\n");
                break;

        case EINVAL:
                printf("area improper\n");
                break;

        case EIO:
                printf("i/o error\n");
                break;

        case EINTR:
                printf("aborted from console\n");
                break;

        case 0:
                printf("succeeded\n");
                break;

        default:
                printf("error %d\n", error);
                break;
        }
failed:
        printf("\n\n");
        delay(5000000);                /* 5 seconds */
}

/*
 * This is called by main to set dumplo and dumpsize.
 * Dumps always skip the first PAGE_SIZE of disk space
 * in case there might be a disk label stored there.
 * If there is extra space, put dump at the end to
 * reduce the chance that swapping trashes it.
 *
 * Sparse dumps can't placed as close to the end as possible, because
 * savecore(8) has to know where to start reading in the dump device
 * before it has access to any of the crashed system's state.
 *
 * Note also that a sparse dump will never be larger than a full one:
 * in order to add a phys_ram_seg_t to the header, at least one page
 * must be removed.
 */
void
cpu_dumpconf(void)
{
        int nblks, dumpblks;        /* size of dump area */

        if (dumpdev == NODEV)
                goto bad;
        nblks = bdev_size(dumpdev);
        if (nblks <= ctod(1))
                goto bad;

        dumpblks = cpu_dumpsize();
        if (dumpblks < 0)
                goto bad;

        /* dumpsize is in page units, and doesn't include headers. */
        dumpsize = cpu_dump_mempagecnt();

        dumpblks += ctod(dumpsize);

        /* If dump won't fit (incl. room for possible label), punt. */
        if (dumpblks > (nblks - ctod(1))) {
#ifndef NO_SPARSE_DUMP
                /* A sparse dump might (and hopefully will) fit. */
                dumplo = ctod(1);
#else
                /* But if we're not configured for that, punt. */
                goto bad;
#endif
        } else {
                /* Put dump at end of partition */
                dumplo = nblks - dumpblks;
        }


        /* Now that we've decided this will work, init ancillary stuff. */
        dump_misc_init();
        return;

 bad:
        dumpsize = 0;
}

/*
 * Clear registers on exec
 */
void
setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
{
        struct pcb *pcb = lwp_getpcb(l);
        struct trapframe *tf;

#ifdef USER_LDT
        pmap_ldt_cleanup(l);
#endif

        fpu_clear(l, pack->ep_osversion >= 699002600
            ? __NetBSD_NPXCW__ : __NetBSD_COMPAT_NPXCW__);
        x86_dbregs_clear(l);

        kpreempt_disable();
        pcb->pcb_flags = 0;
        l->l_proc->p_flag &= ~PK_32;
        l->l_md.md_flags = MDL_IRET;
        cpu_segregs64_zero(l);
        kpreempt_enable();

        tf = l->l_md.md_regs;
        tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
        tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
        tf->tf_rdi = 0;
        tf->tf_rsi = 0;
        tf->tf_rbp = 0;
        tf->tf_rbx = l->l_proc->p_psstrp;
        tf->tf_rdx = 0;
        tf->tf_rcx = 0;
        tf->tf_rax = 0;
        tf->tf_rip = pack->ep_entry;
        tf->tf_cs = LSEL(LUCODE_SEL, SEL_UPL);
        tf->tf_rflags = PSL_USERSET;
        tf->tf_rsp = stack;
        tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL);
}

/*
 * Initialize segments and descriptor tables
 */
char *ldtstore;
char *gdtstore;

void
setgate(struct gate_descriptor *gd, void *func,
    int ist, int type, int dpl, int sel)
{
        vaddr_t vaddr;

        vaddr = ((vaddr_t)gd) & ~PAGE_MASK;

        kpreempt_disable();
        pmap_changeprot_local(vaddr, VM_PROT_READ|VM_PROT_WRITE);

        gd->gd_looffset = (uint64_t)func & 0xffff;
        gd->gd_selector = sel;
        gd->gd_ist = ist;
        gd->gd_type = type;
        gd->gd_dpl = dpl;
        gd->gd_p = 1;
        gd->gd_hioffset = (uint64_t)func >> 16;
        gd->gd_zero = 0;
        gd->gd_xx1 = 0;
        gd->gd_xx2 = 0;
        gd->gd_xx3 = 0;

        pmap_changeprot_local(vaddr, VM_PROT_READ);
        kpreempt_enable();
}

void
unsetgate(struct gate_descriptor *gd)
{
        vaddr_t vaddr;

        vaddr = ((vaddr_t)gd) & ~PAGE_MASK;

        kpreempt_disable();
        pmap_changeprot_local(vaddr, VM_PROT_READ|VM_PROT_WRITE);

        memset(gd, 0, sizeof (*gd));

        pmap_changeprot_local(vaddr, VM_PROT_READ);
        kpreempt_enable();
}

void
setregion(struct region_descriptor *rd, void *base, uint16_t limit)
{
        rd->rd_limit = limit;
        rd->rd_base = (uint64_t)base;
}

/*
 * Note that the base and limit fields are ignored in long mode.
 */
void
set_mem_segment(struct mem_segment_descriptor *sd, void *base, size_t limit,
        int type, int dpl, int gran, int def32, int is64)
{
        sd->sd_lolimit = (unsigned)limit;
        sd->sd_lobase = (unsigned long)base;
        sd->sd_type = type;
        sd->sd_dpl = dpl;
        sd->sd_p = 1;
        sd->sd_hilimit = (unsigned)limit >> 16;
        sd->sd_avl = 0;
        sd->sd_long = is64;
        sd->sd_def32 = def32;
        sd->sd_gran = gran;
        sd->sd_hibase = (unsigned long)base >> 24;
}

void
set_sys_segment(struct sys_segment_descriptor *sd, void *base, size_t limit,
        int type, int dpl, int gran)
{
        memset(sd, 0, sizeof *sd);
        sd->sd_lolimit = (unsigned)limit;
        sd->sd_lobase = (uint64_t)base;
        sd->sd_type = type;
        sd->sd_dpl = dpl;
        sd->sd_p = 1;
        sd->sd_hilimit = (unsigned)limit >> 16;
        sd->sd_gran = gran;
        sd->sd_hibase = (uint64_t)base >> 24;
}

void
cpu_init_idt(struct cpu_info *ci)
{
        struct region_descriptor region;
        idt_descriptor_t *idt;

        idt = ci->ci_idtvec.iv_idt;
        setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
        lidt(&region);
}

#define        IDTVEC(name)        __CONCAT(X, name)
typedef void (vector)(void);
extern vector IDTVEC(syscall);
extern vector IDTVEC(syscall32);
extern vector IDTVEC(osyscall);
extern vector *x86_exceptions[];

#ifndef XENPV
static void
init_x86_64_ksyms(void)
{
#if NKSYMS || defined(DDB) || defined(MODULAR)
        extern int end;
        extern int *esym;
        struct btinfo_symtab *symtab;
        vaddr_t tssym, tesym;

#ifdef DDB
        db_machine_init();
#endif

        symtab = lookup_bootinfo(BTINFO_SYMTAB);
        if (symtab) {
#ifdef KASLR
                tssym = bootspace.head.va;
                tesym = bootspace.head.va; /* (unused...) */
#else
                tssym = (vaddr_t)symtab->ssym + KERNBASE;
                tesym = (vaddr_t)symtab->esym + KERNBASE;
#endif
                ksyms_addsyms_elf(symtab->nsym, (void *)tssym, (void *)tesym);
        } else {
                uintptr_t endp = (uintptr_t)(void *)&end;

                ksyms_addsyms_elf(*(long *)endp,
                    ((long *)endp) + 1, esym);
        }
#endif
}
#endif /* XENPV */

void __noasan
init_bootspace(void)
{
        extern char __rodata_start;
        extern char __data_start;
        extern char __kernel_end;
        size_t i = 0;

        memset(&bootspace, 0, sizeof(bootspace));

        bootspace.head.va = KERNTEXTOFF;
        bootspace.head.pa = KERNTEXTOFF - KERNBASE;
        bootspace.head.sz = 0;

        bootspace.segs[i].type = BTSEG_TEXT;
        bootspace.segs[i].va = KERNTEXTOFF;
        bootspace.segs[i].pa = KERNTEXTOFF - KERNBASE;
        bootspace.segs[i].sz = (size_t)&__rodata_start - KERNTEXTOFF;
        i++;

        bootspace.segs[i].type = BTSEG_RODATA;
        bootspace.segs[i].va = (vaddr_t)&__rodata_start;
        bootspace.segs[i].pa = (paddr_t)&__rodata_start - KERNBASE;
        bootspace.segs[i].sz = (size_t)&__data_start - (size_t)&__rodata_start;
        i++;

        bootspace.segs[i].type = BTSEG_DATA;
        bootspace.segs[i].va = (vaddr_t)&__data_start;
        bootspace.segs[i].pa = (paddr_t)&__data_start - KERNBASE;
        bootspace.segs[i].sz = (size_t)&__kernel_end - (size_t)&__data_start;
        i++;

        bootspace.boot.va = (vaddr_t)&__kernel_end;
        bootspace.boot.pa = (paddr_t)&__kernel_end - KERNBASE;
        bootspace.boot.sz = (size_t)(atdevbase + IOM_SIZE) -
            (size_t)&__kernel_end;

        /* In locore.S, we allocated a tmp va. We will use it now. */
        bootspace.spareva = KERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2;

        /* Virtual address of the L4 page. */
        bootspace.pdir = (vaddr_t)(PDPpaddr + KERNBASE);

        /* Kernel module map. */
        bootspace.smodule = (vaddr_t)atdevbase + IOM_SIZE;
        bootspace.emodule = KERNBASE + NKL2_KIMG_ENTRIES * NBPD_L2;
}

static void
init_pte(void)
{
#ifndef XENPV
        extern uint32_t nox_flag;
        pd_entry_t *pdir = (pd_entry_t *)bootspace.pdir;
        pdir[L4_SLOT_PTE] = PDPpaddr | PTE_W | ((uint64_t)nox_flag << 32) |
            PTE_P;
#endif

        extern pd_entry_t *normal_pdes[3];
        normal_pdes[0] = L2_BASE;
        normal_pdes[1] = L3_BASE;
        normal_pdes[2] = L4_BASE;
}

void
init_slotspace(void)
{
        /*
         * XXX Too early to use cprng(9), or even entropy_extract.
         */
        struct entpool pool;
        size_t randhole;
        vaddr_t randva;
        uint64_t sample;
        vaddr_t va;

        memset(&pool, 0, sizeof pool);
        cpu_rng_early_sample(&sample);
        entpool_enter(&pool, &sample, sizeof sample);

        memset(&slotspace, 0, sizeof(slotspace));

        /* User. [256, because we want to land in >= 256] */
        slotspace.area[SLAREA_USER].sslot = 0;
        slotspace.area[SLAREA_USER].nslot = PDIR_SLOT_USERLIM+1;
        slotspace.area[SLAREA_USER].active = true;

#ifdef XENPV
        /* PTE. */
        slotspace.area[SLAREA_PTE].sslot = PDIR_SLOT_PTE;
        slotspace.area[SLAREA_PTE].nslot = 1;
        slotspace.area[SLAREA_PTE].active = true;
#endif

#ifdef __HAVE_PCPU_AREA
        /* Per-CPU. */
        slotspace.area[SLAREA_PCPU].sslot = PDIR_SLOT_PCPU;
        slotspace.area[SLAREA_PCPU].nslot = 1;
        slotspace.area[SLAREA_PCPU].active = true;
#endif

#ifdef __HAVE_DIRECT_MAP
        /* Direct Map. [Randomized later] */
        slotspace.area[SLAREA_DMAP].active = false;
#endif

#ifdef XENPV
        /* Hypervisor. */
        slotspace.area[SLAREA_HYPV].sslot = 256;
        slotspace.area[SLAREA_HYPV].nslot = 17;
        slotspace.area[SLAREA_HYPV].active = true;
#endif

#ifdef KASAN
        /* ASAN. */
        slotspace.area[SLAREA_ASAN].sslot = L4_SLOT_KASAN;
        slotspace.area[SLAREA_ASAN].nslot = NL4_SLOT_KASAN;
        slotspace.area[SLAREA_ASAN].active = true;
#endif

#ifdef KMSAN
        /* MSAN. */
        slotspace.area[SLAREA_MSAN].sslot = L4_SLOT_KMSAN;
        slotspace.area[SLAREA_MSAN].nslot = NL4_SLOT_KMSAN;
        slotspace.area[SLAREA_MSAN].active = true;
#endif

        /* Kernel. */
        slotspace.area[SLAREA_KERN].sslot = L4_SLOT_KERNBASE;
        slotspace.area[SLAREA_KERN].nslot = 1;
        slotspace.area[SLAREA_KERN].active = true;

        /* Main. */
        cpu_rng_early_sample(&sample);
        entpool_enter(&pool, &sample, sizeof sample);
        entpool_extract(&pool, &randhole, sizeof randhole);
        entpool_extract(&pool, &randva, sizeof randva);
        va = slotspace_rand(SLAREA_MAIN, NKL4_MAX_ENTRIES * NBPD_L4,
            NBPD_L4, randhole, randva); /* TODO: NBPD_L1 */
        vm_min_kernel_address = va;
        vm_max_kernel_address = va + NKL4_MAX_ENTRIES * NBPD_L4;

#ifndef XENPV
        /* PTE. */
        cpu_rng_early_sample(&sample);
        entpool_enter(&pool, &sample, sizeof sample);
        entpool_extract(&pool, &randhole, sizeof randhole);
        entpool_extract(&pool, &randva, sizeof randva);
        va = slotspace_rand(SLAREA_PTE, NBPD_L4, NBPD_L4, randhole, randva);
        pte_base = (pd_entry_t *)va;
#endif

        explicit_memset(&pool, 0, sizeof pool);
}

void
init_x86_64(paddr_t first_avail)
{
        extern void consinit(void);
        struct region_descriptor region;
        struct mem_segment_descriptor *ldt_segp;
        struct idt_vec *iv;
        idt_descriptor_t *idt;
        int x;
        struct pcb *pcb;
        extern vaddr_t lwp0uarea;
#ifndef XENPV
        extern paddr_t local_apic_pa;
#endif

        KASSERT(first_avail % PAGE_SIZE == 0);

#ifdef XENPV
        KASSERT(HYPERVISOR_shared_info != NULL);
        cpu_info_primary.ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[0];
#endif

#ifdef XEN
        if (vm_guest == VM_GUEST_XENPVH)
                xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL);
#endif
        init_pte();

        uvm_lwp_setuarea(&lwp0, lwp0uarea);

        cpu_probe(&cpu_info_primary);
#ifdef SVS
        svs_init();
#endif
        cpu_init_msrs(&cpu_info_primary, true);
#ifndef XENPV
        cpu_speculation_init(&cpu_info_primary);
#endif

        use_pae = 1; /* PAE always enabled in long mode */

        pcb = lwp_getpcb(&lwp0);
#ifdef XENPV
        mutex_init(&pte_lock, MUTEX_DEFAULT, IPL_VM);
        pcb->pcb_cr3 = xen_start_info.pt_base - KERNBASE;
#else
        pcb->pcb_cr3 = PDPpaddr;
#endif

#if NISA > 0 || NPCI > 0
        x86_bus_space_init();
#endif

        pat_init(&cpu_info_primary);

        consinit();        /* XXX SHOULD NOT BE DONE HERE */

        /*
         * Initialize RNG to get entropy ASAP either from CPU
         * RDRAND/RDSEED or from seed on disk.  Must happen after
         * cpu_init_msrs.  Prefer to happen after consinit so we have
         * the opportunity to print useful feedback.
         */
        cpu_rng_init();
        x86_rndseed();

        /*
         * Initialize PAGE_SIZE-dependent variables.
         */
        uvm_md_init();

        uvmexp.ncolors = 2;

        avail_start = first_avail;

#ifndef XENPV
        /*
         * Low memory reservations:
         * Page 0:        BIOS data
         * Page 1:        BIOS callback (not used yet, for symmetry with i386)
         * Page 2:        MP bootstrap code (MP_TRAMPOLINE)
         * Page 3:        ACPI wakeup code (ACPI_WAKEUP_ADDR)
         * Page 4:        Temporary page table for 0MB-4MB
         * Page 5:        Temporary page directory
         * Page 6:        Temporary page map level 3
         * Page 7:        Temporary page map level 4
         */
        lowmem_rsvd = 8 * PAGE_SIZE;

        /* Initialize the memory clusters (needed in pmap_bootstrap). */
        init_x86_clusters();
#else
        /* Parse Xen command line (replace bootinfo) */
        xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL);

        avail_end = ctob(xen_start_info.nr_pages);
        pmap_pa_start = (KERNTEXTOFF - KERNBASE);
        pmap_pa_end = avail_end;
#endif

        /*
         * Call pmap initialization to make new kernel address space.
         * We must do this before loading pages into the VM system.
         */
        pmap_bootstrap(VM_MIN_KERNEL_ADDRESS);

#ifndef XENPV
        /* Internalize the physical pages into the VM system. */
        init_x86_vm(avail_start);
#else
        physmem = xen_start_info.nr_pages;
        uvm_page_physload(atop(avail_start), atop(avail_end),
            atop(avail_start), atop(avail_end), VM_FREELIST_DEFAULT);
#endif

        init_x86_msgbuf();

        kasan_init();
        kcsan_init();
        kmsan_init((void *)lwp0uarea);

        pmap_growkernel(VM_MIN_KERNEL_ADDRESS + 32 * 1024 * 1024);

        kpreempt_disable();

#ifndef XENPV
        pmap_kenter_pa(local_apic_va, local_apic_pa,
            VM_PROT_READ|VM_PROT_WRITE, 0);
        pmap_update(pmap_kernel());
        memset((void *)local_apic_va, 0, PAGE_SIZE);
#endif

        pmap_kenter_pa(idt_vaddr, idt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
        pmap_kenter_pa(gdt_vaddr, gdt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
        pmap_kenter_pa(ldt_vaddr, ldt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
        pmap_update(pmap_kernel());
        memset((void *)idt_vaddr, 0, PAGE_SIZE);
        memset((void *)gdt_vaddr, 0, PAGE_SIZE);
        memset((void *)ldt_vaddr, 0, PAGE_SIZE);

#ifndef XENPV
        pmap_changeprot_local(idt_vaddr, VM_PROT_READ);
#endif

        pmap_update(pmap_kernel());

        iv = &(cpu_info_primary.ci_idtvec);
        idt_vec_init_cpu_md(iv, cpu_index(&cpu_info_primary));
        idt = iv->iv_idt;
        gdtstore = (char *)gdt_vaddr;
        ldtstore = (char *)ldt_vaddr;

        /*
         * Make GDT gates and memory segments.
         */
        set_mem_segment(GDT_ADDR_MEM(gdtstore, GCODE_SEL), 0,
            0xfffff, SDT_MEMERA, SEL_KPL, 1, 0, 1);

        set_mem_segment(GDT_ADDR_MEM(gdtstore, GDATA_SEL), 0,
            0xfffff, SDT_MEMRWA, SEL_KPL, 1, 0, 1);

        set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE_SEL), 0,
            x86_btop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 0, 1);

        set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA_SEL), 0,
            x86_btop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 0, 1);

#ifndef XENPV
        set_sys_segment(GDT_ADDR_SYS(gdtstore, GLDT_SEL), ldtstore,
            LDT_SIZE - 1, SDT_SYSLDT, SEL_KPL, 0);
#endif

        /*
         * Make LDT memory segments.
         */
        *(struct mem_segment_descriptor *)(ldtstore + LUCODE_SEL) =
            *GDT_ADDR_MEM(gdtstore, GUCODE_SEL);
        *(struct mem_segment_descriptor *)(ldtstore + LUDATA_SEL) =
            *GDT_ADDR_MEM(gdtstore, GUDATA_SEL);

        /*
         * 32 bit GDT entries.
         */
        set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE32_SEL), 0,
            x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMERA, SEL_UPL, 1, 1, 0);

        set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA32_SEL), 0,
            x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0);

        set_mem_segment(GDT_ADDR_MEM(gdtstore, GUFS_SEL), 0,
            x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0);

        set_mem_segment(GDT_ADDR_MEM(gdtstore, GUGS_SEL), 0,
            x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0);

        /*
         * 32 bit LDT entries.
         */
        ldt_segp = (struct mem_segment_descriptor *)(ldtstore + LUCODE32_SEL);
        set_mem_segment(ldt_segp, 0, x86_btop(VM_MAXUSER_ADDRESS32) - 1,
            SDT_MEMERA, SEL_UPL, 1, 1, 0);
        ldt_segp = (struct mem_segment_descriptor *)(ldtstore + LUDATA32_SEL);
        set_mem_segment(ldt_segp, 0, x86_btop(VM_MAXUSER_ADDRESS32) - 1,
            SDT_MEMRWA, SEL_UPL, 1, 1, 0);

        /* CPU-specific IDT exceptions. */
        for (x = 0; x < NCPUIDT; x++) {
                int sel, ist;

                /* Reset to default. Special cases below */
                sel = SEL_KPL;
                ist = 0;

                idt_vec_reserve(iv, x);

                switch (x) {
                case 1:        /* DB */
                        ist = 4;
                        break;
                case 2:        /* NMI */
                        ist = 3;
                        break;
                case 3:
                case 4:                        
                        sel = SEL_UPL;
                        break;
                case 8:        /* double fault */
                        ist = 2;
                        break;
#ifdef XENPV                        
                case 18: /* MCA */
                        sel |= 0x4; /* Auto EOI/mask */
                        break;
#endif /* XENPV */                        
                default:
                        break;
                }

                set_idtgate(&idt[x], x86_exceptions[x], ist, SDT_SYS386IGT,
                    sel, GSEL(GCODE_SEL, SEL_KPL));
        }

        /* new-style interrupt gate for syscalls */
        idt_vec_reserve(iv, 128);
        set_idtgate(&idt[128], &IDTVEC(osyscall), 0, SDT_SYS386IGT, SEL_UPL,
            GSEL(GCODE_SEL, SEL_KPL));

        kpreempt_enable();

        setregion(&region, gdtstore, DYNSEL_START - 1);
        lgdt(&region);

#ifdef XENPV
        /* Init Xen callbacks and syscall handlers */
        if (HYPERVISOR_set_callbacks(
            (unsigned long) hypervisor_callback,
            (unsigned long) failsafe_callback,
            (unsigned long) Xsyscall))
                panic("HYPERVISOR_set_callbacks() failed");
#endif /* XENPV */

        cpu_init_idt(&cpu_info_primary);

#ifdef XENPV
        xen_init_ksyms();
#else /* XENPV */
#ifdef XEN
        if (vm_guest == VM_GUEST_XENPVH)
                xen_init_ksyms();
        else
#endif /* XEN */
                init_x86_64_ksyms();
#endif /* XENPV */

#ifndef XENPV
        intr_default_setup();
#else
        events_default_setup();
#endif

        splraise(IPL_HIGH);
        x86_enable_intr();

#ifdef DDB
        if (boothowto & RB_KDB)
                Debugger();
#endif
#ifdef KGDB
        kgdb_port_init();
        if (boothowto & RB_KDB) {
                kgdb_debug_init = 1;
                kgdb_connect(1);
        }
#endif

        pcb->pcb_dbregs = NULL;
        x86_dbregs_init();
}

void
cpu_reset(void)
{
#ifndef XENPV
        idt_descriptor_t *idt;
        vaddr_t vaddr;

        idt = cpu_info_primary.ci_idtvec.iv_idt;
        vaddr = (vaddr_t)idt;
#endif

        x86_disable_intr();

#ifdef XENPV
        HYPERVISOR_reboot();
#else

        x86_reset();

        /*
         * Try to cause a triple fault and watchdog reset by making the IDT
         * invalid and causing a fault.
         */
        kpreempt_disable();
        pmap_changeprot_local(vaddr, VM_PROT_READ|VM_PROT_WRITE);
        memset((void *)idt, 0, NIDT * sizeof(idt[0]));
        kpreempt_enable();
        breakpoint();

#if 0
        /*
         * Try to cause a triple fault and watchdog reset by unmapping the
         * entire address space and doing a TLB flush.
         */
        memset((void *)PTD, 0, PAGE_SIZE);
        tlbflush();
#endif
#endif        /* XENPV */

        for (;;);
}

void
cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
{
        const struct trapframe *tf = l->l_md.md_regs;
        __greg_t ras_rip;

        mcp->__gregs[_REG_RDI] = tf->tf_rdi;
        mcp->__gregs[_REG_RSI] = tf->tf_rsi;
        mcp->__gregs[_REG_RDX] = tf->tf_rdx;
        mcp->__gregs[_REG_R10] = tf->tf_r10;
        mcp->__gregs[_REG_R8]  = tf->tf_r8;
        mcp->__gregs[_REG_R9]  = tf->tf_r9;
        /* argX not touched */
        mcp->__gregs[_REG_RCX] = tf->tf_rcx;
        mcp->__gregs[_REG_R11] = tf->tf_r11;
        mcp->__gregs[_REG_R12] = tf->tf_r12;
        mcp->__gregs[_REG_R13] = tf->tf_r13;
        mcp->__gregs[_REG_R14] = tf->tf_r14;
        mcp->__gregs[_REG_R15] = tf->tf_r15;
        mcp->__gregs[_REG_RBP] = tf->tf_rbp;
        mcp->__gregs[_REG_RBX] = tf->tf_rbx;
        mcp->__gregs[_REG_RAX] = tf->tf_rax;
        mcp->__gregs[_REG_GS]  = 0;
        mcp->__gregs[_REG_FS]  = 0;
        mcp->__gregs[_REG_ES]  = GSEL(GUDATA_SEL, SEL_UPL);
        mcp->__gregs[_REG_DS]  = GSEL(GUDATA_SEL, SEL_UPL);
        mcp->__gregs[_REG_TRAPNO] = tf->tf_trapno;
        mcp->__gregs[_REG_ERR] = tf->tf_err;
        mcp->__gregs[_REG_RIP] = tf->tf_rip;
        mcp->__gregs[_REG_CS]  = LSEL(LUCODE_SEL, SEL_UPL);
        mcp->__gregs[_REG_RFLAGS] = tf->tf_rflags;
        mcp->__gregs[_REG_RSP] = tf->tf_rsp;
        mcp->__gregs[_REG_SS]  = LSEL(LUDATA_SEL, SEL_UPL);

        if ((ras_rip = (__greg_t)ras_lookup(l->l_proc,
            (void *) mcp->__gregs[_REG_RIP])) != -1)
                mcp->__gregs[_REG_RIP] = ras_rip;

        *flags |= _UC_CPU;

        mcp->_mc_tlsbase = (uintptr_t)l->l_private;
        *flags |= _UC_TLSBASE;

        process_read_fpregs_xmm(l, (struct fxsave *)&mcp->__fpregs);
        *flags |= _UC_FPU;
}

int
cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
{
        struct trapframe *tf = l->l_md.md_regs;
        const __greg_t *gr = mcp->__gregs;
        struct proc *p = l->l_proc;
        int error;
        int64_t rflags;

        CTASSERT(sizeof (mcontext_t) == 26 * 8 + 8 + 512);

        if ((flags & _UC_CPU) != 0) {
                error = cpu_mcontext_validate(l, mcp);
                if (error != 0)
                        return error;

                tf->tf_rdi  = gr[_REG_RDI];
                tf->tf_rsi  = gr[_REG_RSI];
                tf->tf_rdx  = gr[_REG_RDX];
                tf->tf_r10  = gr[_REG_R10];
                tf->tf_r8   = gr[_REG_R8];
                tf->tf_r9   = gr[_REG_R9];
                /* argX not touched */
                tf->tf_rcx  = gr[_REG_RCX];
                tf->tf_r11  = gr[_REG_R11];
                tf->tf_r12  = gr[_REG_R12];
                tf->tf_r13  = gr[_REG_R13];
                tf->tf_r14  = gr[_REG_R14];
                tf->tf_r15  = gr[_REG_R15];
                tf->tf_rbp  = gr[_REG_RBP];
                tf->tf_rbx  = gr[_REG_RBX];
                tf->tf_rax  = gr[_REG_RAX];
                tf->tf_gs   = 0;
                tf->tf_fs   = 0;
                tf->tf_es   = GSEL(GUDATA_SEL, SEL_UPL);
                tf->tf_ds   = GSEL(GUDATA_SEL, SEL_UPL);
                /* trapno, err not touched */
                tf->tf_rip  = gr[_REG_RIP];
                tf->tf_cs   = LSEL(LUCODE_SEL, SEL_UPL);
                rflags = tf->tf_rflags;
                rflags &= ~PSL_USER;
                tf->tf_rflags = rflags | (gr[_REG_RFLAGS] & PSL_USER);
                tf->tf_rsp  = gr[_REG_RSP];
                tf->tf_ss   = LSEL(LUDATA_SEL, SEL_UPL);

                l->l_md.md_flags |= MDL_IRET;
        }

        if ((flags & _UC_FPU) != 0)
                process_write_fpregs_xmm(l, (const struct fxsave *)&mcp->__fpregs);

        if ((flags & _UC_TLSBASE) != 0)
                lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);

        mutex_enter(p->p_lock);
        if (flags & _UC_SETSTACK)
                l->l_sigstk.ss_flags |= SS_ONSTACK;
        if (flags & _UC_CLRSTACK)
                l->l_sigstk.ss_flags &= ~SS_ONSTACK;
        mutex_exit(p->p_lock);

        return 0;
}

int
cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp)
{
        struct proc *p __diagused = l->l_proc;
        struct trapframe *tf = l->l_md.md_regs;
        const __greg_t *gr;
        uint16_t sel;

        KASSERT((p->p_flag & PK_32) == 0);
        gr = mcp->__gregs;

        if (((gr[_REG_RFLAGS] ^ tf->tf_rflags) & PSL_USERSTATIC) != 0)
                return EINVAL;

        sel = gr[_REG_ES] & 0xffff;
        if (sel != 0 && !VALID_USER_DSEL(sel))
                return EINVAL;

        sel = gr[_REG_FS] & 0xffff;
        if (sel != 0 && !VALID_USER_DSEL(sel))
                return EINVAL;

        sel = gr[_REG_GS] & 0xffff;
        if (sel != 0 && !VALID_USER_DSEL(sel))
                return EINVAL;

        sel = gr[_REG_DS] & 0xffff;
        if (!VALID_USER_DSEL(sel))
                return EINVAL;

#ifndef XENPV
        sel = gr[_REG_SS] & 0xffff;
        if (!VALID_USER_DSEL(sel))
                return EINVAL;

        sel = gr[_REG_CS] & 0xffff;
        if (!VALID_USER_CSEL(sel))
                return EINVAL;
#endif

        if (gr[_REG_RIP] >= VM_MAXUSER_ADDRESS)
                return EINVAL;

        return 0;
}

int
mm_md_kernacc(void *ptr, vm_prot_t prot, bool *handled)
{
        const vaddr_t v = (vaddr_t)ptr;
        vaddr_t kva, kva_end;
        size_t i;

        kva = bootspace.head.va;
        kva_end = kva + bootspace.head.sz;
        if (v >= kva && v < kva_end) {
                *handled = true;
                return 0;
        }

        for (i = 0; i < BTSPACE_NSEGS; i++) {
                kva = bootspace.segs[i].va;
                kva_end = kva + bootspace.segs[i].sz;
                if (v < kva || v >= kva_end)
                        continue;
                *handled = true;
                if (bootspace.segs[i].type == BTSEG_TEXT ||
                    bootspace.segs[i].type == BTSEG_RODATA) {
                        if (prot & VM_PROT_WRITE) {
                                return EFAULT;
                        }
                }
                return 0;
        }

        kva = bootspace.boot.va;
        kva_end = kva + bootspace.boot.sz;
        if (v >= kva && v < kva_end) {
                *handled = true;
                return 0;
        }

        if (v >= bootspace.smodule && v < bootspace.emodule) {
                *handled = true;
                if (!uvm_map_checkprot(module_map, v, v + 1, prot)) {
                        return EFAULT;
                }
        } else {
                *handled = false;
        }
        return 0;
}

/*
 * Zero out a 64bit LWP's segments registers. Used when exec'ing a new
 * 64bit program.
 */
void
cpu_segregs64_zero(struct lwp *l)
{
        struct trapframe * const tf = l->l_md.md_regs;
        struct pcb *pcb;
        uint64_t zero = 0;

        KASSERT(kpreempt_disabled());
        KASSERT((l->l_proc->p_flag & PK_32) == 0);
        KASSERT(l == curlwp);

        pcb = lwp_getpcb(l);

        tf->tf_fs = 0;
        tf->tf_gs = 0;
        setds(GSEL(GUDATA_SEL, SEL_UPL));
        setes(GSEL(GUDATA_SEL, SEL_UPL));
        setfs(0);
        setusergs(0);

#ifndef XENPV
        wrmsr(MSR_FSBASE, 0);
        wrmsr(MSR_KERNELGSBASE, 0);
#else
        HYPERVISOR_set_segment_base(SEGBASE_FS, 0);
        HYPERVISOR_set_segment_base(SEGBASE_GS_USER, 0);
#endif

        pcb->pcb_fs = 0;
        pcb->pcb_gs = 0;
        update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &zero);
        update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &zero);
}

/*
 * Zero out a 32bit LWP's segments registers. Used when exec'ing a new
 * 32bit program.
 */
void
cpu_segregs32_zero(struct lwp *l)
{
        struct trapframe * const tf = l->l_md.md_regs;
        struct pcb *pcb;
        uint64_t zero = 0;

        KASSERT(kpreempt_disabled());
        KASSERT(l->l_proc->p_flag & PK_32);
        KASSERT(l == curlwp);

        pcb = lwp_getpcb(l);

        tf->tf_fs = 0;
        tf->tf_gs = 0;
        setds(GSEL(GUDATA32_SEL, SEL_UPL));
        setes(GSEL(GUDATA32_SEL, SEL_UPL));
        setfs(0);
        setusergs(0);
        pcb->pcb_fs = 0;
        pcb->pcb_gs = 0;
        update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &zero);
        update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &zero);
}

/*
 * Load an LWP's TLS context, possibly changing the %fs and %gs selectors.
 * Used only for 32-bit processes.
 */
void
cpu_fsgs_reload(struct lwp *l, int fssel, int gssel)
{
        struct trapframe *tf;
        struct pcb *pcb;

        KASSERT(l->l_proc->p_flag & PK_32);
        KASSERT(l == curlwp);

        tf = l->l_md.md_regs;
        fssel &= 0xFFFF;
        gssel &= 0xFFFF;

        pcb = lwp_getpcb(l);
        kpreempt_disable();
        update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &pcb->pcb_fs);
        update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &pcb->pcb_gs);

#ifdef XENPV
        setusergs(gssel);
#endif

        tf->tf_fs = fssel;
        tf->tf_gs = gssel;
        kpreempt_enable();
}

bool
mm_md_direct_mapped_io(void *addr, paddr_t *paddr)
{
        vaddr_t va = (vaddr_t)addr;

#ifdef __HAVE_DIRECT_MAP
        if (va >= PMAP_DIRECT_BASE && va < PMAP_DIRECT_END) {
                *paddr = PMAP_DIRECT_UNMAP(va);
                return true;
        }
#else
        __USE(va);
#endif

        return false;
}

bool
mm_md_direct_mapped_phys(paddr_t paddr, vaddr_t *vaddr)
{
#ifdef __HAVE_DIRECT_MAP
        *vaddr = PMAP_DIRECT_MAP(paddr);
        return true;
#else
        return false;
#endif
}

static void
idt_vec_copy(struct idt_vec *dst, struct idt_vec *src)
{
        idt_descriptor_t *idt_dst;

        idt_dst = dst->iv_idt;

        kpreempt_disable();
        pmap_changeprot_local((vaddr_t)idt_dst, VM_PROT_READ|VM_PROT_WRITE);

        memcpy(idt_dst, src->iv_idt, PAGE_SIZE);
        memcpy(dst->iv_allocmap, src->iv_allocmap, sizeof(dst->iv_allocmap));

        pmap_changeprot_local((vaddr_t)idt_dst, VM_PROT_READ);
        kpreempt_enable();
}

void
idt_vec_init_cpu_md(struct idt_vec *iv, cpuid_t cid)
{
        vaddr_t va;

        if (cid != cpu_index(&cpu_info_primary) &&
            idt_vec_is_pcpu()) {
#ifdef __HAVE_PCPU_AREA
                va = (vaddr_t)&pcpuarea->ent[cid].idt;
#else
                struct vm_page *pg;

                va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
                    UVM_KMF_VAONLY);
                pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO);
                if (pg == NULL) {
                        panic("failed to allocate a page for IDT");
                }
                pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
                    VM_PROT_READ|VM_PROT_WRITE, 0);
                pmap_update(pmap_kernel());
#endif

                memset((void *)va, 0, PAGE_SIZE);
#ifndef XENPV
                pmap_changeprot_local(va, VM_PROT_READ);
#endif
                pmap_update(pmap_kernel());

                iv->iv_idt = (void *)va;
                idt_vec_copy(iv, &(cpu_info_primary.ci_idtvec));
        } else {
                iv->iv_idt = (void *)idt_vaddr;
        }
}





























































































































































   44 





   44 
   43 

















   25 





   25 












   18 
    1 




   18 











   44 










   44 















   44 
















   25 


   25 

   25 







   25 



   25 








   25 

















   19 



   19 

   18 



    7 
   19 




   19 













   19 









   19 









   19 

















   44 







   44 








   44 

   43 

   43 

   44 

   43 

   44 










































































































































































































































































































































































   44 





   44 


   25 




   19 














   43 
   44 

   44 


   25 










   19 
















   44 

   44 




































    3 

    2 

    2 
    2 




































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
/*        $NetBSD: portalgo.c,v 1.11 2017/01/11 13:08:29 ozaki-r Exp $        */

/*
 * Copyright 2011 Vlad Balan
 *
 * Written by Vlad Balan for the NetBSD Foundation.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

/*
 * see:
 *        RFC 6056 Recommendations for Transport-Protocol Port Randomization
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: portalgo.c,v 1.11 2017/01/11 13:08:29 ozaki-r Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#endif

#include <sys/param.h>
#include <sys/errno.h>
#include <sys/kauth.h>
#include <sys/uidinfo.h>
#include <sys/md5.h>
#include <sys/cprng.h>
#include <sys/bitops.h>

#include <net/if.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>

#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_pcb.h>
#endif

#include <netinet/tcp_vtw.h>

#include "portalgo.h"

#define NPROTO 2
#define PORTALGO_TCP 0
#define PORTALGO_UDP 1

#define NAF 2
#define PORTALGO_IPV4 0
#define PORTALGO_IPV6 1

#define NRANGES 2
#define PORTALGO_LOWPORT 0
#define PORTALGO_HIGHPORT 1

#if PORTALGO_DEBUG
static bool portalgo_debug = true;
#define DPRINTF if (portalgo_debug) printf
#else
#define DPRINTF while (/*CONSTCOND*/0) printf
#endif

#ifndef PORTALGO_INET4_DEFAULT
#define PORTALGO_INET4_DEFAULT PORTALGO_BSD
#endif
#ifndef PORTALGO_INET6_DEFAULT
#define PORTALGO_INET6_DEFAULT PORTALGO_BSD
#endif

typedef __BITMAP_TYPE(, uint32_t, 0x10000) bitmap;
#ifdef INET
static int inet4_portalgo = PORTALGO_INET4_DEFAULT;
static bitmap inet4_reserve;
#endif
#ifdef INET6
static int inet6_portalgo = PORTALGO_INET6_DEFAULT;
static bitmap inet6_reserve;
#endif

typedef struct {
        const char *name;
        int (*func)(int, uint16_t *, struct inpcb_hdr *, kauth_cred_t);
} portalgo_algorithm_t;

static int algo_bsd(int, uint16_t *, struct inpcb_hdr *, kauth_cred_t);
static int algo_random_start(int, uint16_t *, struct inpcb_hdr *, kauth_cred_t);
static int algo_random_pick(int, uint16_t *, struct inpcb_hdr *, kauth_cred_t);
static int algo_hash(int, uint16_t *, struct inpcb_hdr *, kauth_cred_t);
static int algo_doublehash(int, uint16_t *, struct inpcb_hdr *, kauth_cred_t);
static int algo_randinc(int, uint16_t *, struct inpcb_hdr *, kauth_cred_t);

static const portalgo_algorithm_t algos[] = {
        {
                .name = "bsd",
                .func = algo_bsd
        },
        {
                .name = "random_start",
                .func = algo_random_start
        },
        {
                .name = "random_pick",
                .func = algo_random_pick
        },
        {
                .name = "hash",
                .func = algo_hash
        },
        {
                .name = "doublehash",
                .func = algo_doublehash
        },
        {
                .name = "randinc",
                .func = algo_randinc
        }
};

#define NALGOS __arraycount(algos)

static uint16_t portalgo_next_ephemeral[NPROTO][NAF][NRANGES][NALGOS];

/*
 * Access the pcb and copy the values of the last port and the ends of
 * the port range.
 */
static int
pcb_getports(struct inpcb_hdr *inp_hdr, uint16_t *lastport,
    uint16_t *mymin, uint16_t *mymax, uint16_t **pnext_ephemeral, int algo)
{
        struct inpcbtable * const table = inp_hdr->inph_table;
        struct socket *so;
        int portalgo_proto;
        int portalgo_af;
        int portalgo_range;

        so = inp_hdr->inph_socket;
        switch (so->so_type) {
        case SOCK_DGRAM: /* UDP or DCCP */
        case SOCK_CONN_DGRAM:
                portalgo_proto = PORTALGO_UDP;
                break;
        case SOCK_STREAM: /* TCP or SCTP */
                portalgo_proto = PORTALGO_TCP;
                break;
        default:
                return EPFNOSUPPORT;
        }

        switch (inp_hdr->inph_af) {
#ifdef INET
        case AF_INET: {
                struct inpcb *inp = (struct inpcb *)(void *)inp_hdr;

                portalgo_af = PORTALGO_IPV4;
                if (inp->inp_flags & INP_LOWPORT) {
                        *mymin = lowportmin;
                        *mymax = lowportmax;
                        *lastport = table->inpt_lastlow;
                        portalgo_range = PORTALGO_LOWPORT;
                } else {
                        *mymin = anonportmin;
                        *mymax = anonportmax;
                        *lastport = table->inpt_lastport;
                        portalgo_range = PORTALGO_HIGHPORT;
                }
                break;
        }
#endif
#ifdef INET6
        case AF_INET6: {
                struct in6pcb *in6p = (struct in6pcb *)(void *)inp_hdr;

                portalgo_af = PORTALGO_IPV6;
                if (in6p->in6p_flags & IN6P_LOWPORT) {
                        *mymin = ip6_lowportmin;
                        *mymax = ip6_lowportmax;
                        *lastport = table->inpt_lastlow;
                        portalgo_range = PORTALGO_LOWPORT;
                } else {
                        *mymin = ip6_anonportmin;
                        *mymax = ip6_anonportmax;
                        *lastport = table->inpt_lastport;
                        portalgo_range = PORTALGO_HIGHPORT;
                }
                break;
        }
#endif
        default:
                return EAFNOSUPPORT;
        }

        if (*mymin > *mymax) {        /* sanity check */
                u_int16_t swp;

                swp = *mymin;
                *mymin = *mymax;
                *mymax = swp;
        }

        DPRINTF("%s mymin:%d mymax:%d lastport:%d\n", __func__,
            *mymin, *mymax, *lastport);

        *pnext_ephemeral = &portalgo_next_ephemeral[portalgo_proto]
            [portalgo_af][portalgo_range][algo];

        DPRINTF("%s portalgo_proto:%d portalgo_af:%d portalgo_range:%d\n",
            __func__, portalgo_proto, portalgo_af, portalgo_range);
        return 0;
}

/*
 * Check whether the port picked by the port randomizer is available
 * and whether KAUTH approves of our choice. This part of the code
 * shamelessly copied from in_pcb.c.
 */
static bool
check_suitable_port(uint16_t port, struct inpcb_hdr *inp_hdr, kauth_cred_t cred)
{
        struct inpcbtable * const table = inp_hdr->inph_table;
#ifdef INET
        vestigial_inpcb_t vestigial;
#endif
        int error;
#ifdef INET6
        struct socket *so;
        int wild = 0;
#endif

        DPRINTF("%s called for argument %d\n", __func__, port);

        switch (inp_hdr->inph_af) {
#ifdef INET
        case AF_INET: { /* IPv4 */
                struct inpcb *inp = (struct inpcb *)(void *)inp_hdr;
                struct inpcb *pcb;
                struct sockaddr_in sin;

                if (__BITMAP_ISSET(port, &inet4_reserve))
                        return false;

                sin.sin_addr = inp->inp_laddr;
                pcb = in_pcblookup_port(table, sin.sin_addr, htons(port), 1,
                    &vestigial);

                DPRINTF("%s in_pcblookup_port returned %p and "
                    "vestigial.valid %d\n",
                    __func__, pcb, vestigial.valid);

                if ((!pcb) && (!vestigial.valid)) {
                        enum kauth_network_req req;

                        /* We have a free port. Check with the secmodel. */
                        if (inp->inp_flags & INP_LOWPORT) {
#ifndef IPNOPRIVPORTS
                                req = KAUTH_REQ_NETWORK_BIND_PRIVPORT;
#else
                                req = KAUTH_REQ_NETWORK_BIND_PORT;
#endif
                        } else
                                req = KAUTH_REQ_NETWORK_BIND_PORT;

                        sin.sin_port = port;
                        error = kauth_authorize_network(cred,
                            KAUTH_NETWORK_BIND,
                            req, inp->inp_socket, &sin, NULL);
                        DPRINTF("%s kauth_authorize_network returned %d\n",
                            __func__, error);

                        if (error == 0) {
                                DPRINTF("%s port approved\n", __func__);
                                return true;        /* KAUTH agrees */
                        }
                }
                break;
        }
#endif
#ifdef INET6
        case AF_INET6: { /* IPv6 */
                struct in6pcb *in6p = (struct in6pcb *)(void *)inp_hdr;
                struct sockaddr_in6 sin6;
                void *t;

                if (__BITMAP_ISSET(port, &inet6_reserve))
                        return false;

                sin6.sin6_addr = in6p->in6p_laddr;
                so = in6p->in6p_socket;

                /* XXX: this is redundant when called from in6_pcbbind */
                if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
                    ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
                        (so->so_options & SO_ACCEPTCONN) == 0))
                        wild = 1;

#ifdef INET
                if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) {
                        t = in_pcblookup_port(table,
                            *(struct in_addr *)&sin6.sin6_addr.s6_addr32[3],
                            htons(port), wild, &vestigial);
                        if (!t && vestigial.valid) {
                                DPRINTF("%s in_pcblookup_port returned "
                                    "a result\n", __func__);
                                return false;
                        }
                } else
#endif
                {
                        t = in6_pcblookup_port(table, &sin6.sin6_addr,
                            htons(port), wild, &vestigial);
                        if (!t && vestigial.valid) {
                                DPRINTF("%s in6_pcblookup_port returned "
                                    "a result\n", __func__);
                                return false;
                        }
                }
                if (t == NULL) {
                        enum kauth_network_req req;

                        /* We have a free port. Check with the secmodel. */
                        if (in6p->in6p_flags & IN6P_LOWPORT) {
#ifndef IPNOPRIVPORTS
                                req = KAUTH_REQ_NETWORK_BIND_PRIVPORT;
#else
                                req = KAUTH_REQ_NETWORK_BIND_PORT;
#endif
                        } else {
                                req = KAUTH_REQ_NETWORK_BIND_PORT;
                        }

                        sin6.sin6_port = port;
                        error = kauth_authorize_network(cred,
                            KAUTH_NETWORK_BIND, req, so, &sin6, NULL);
                        if (error) {
                                /* Secmodel says no. Keep looking. */
                                DPRINTF("%s secmodel says no\n", __func__);
                                return false;
                        }
                        DPRINTF("%s port approved\n", __func__);
                        return true;
                }
                break;
        }
#endif
        default:
                DPRINTF("%s unknown address family\n", __func__);
                return false;
        }
        return false;
}

/* This is the default BSD algorithm, as described in RFC 6056 */
static int
algo_bsd(int algo, uint16_t *port, struct inpcb_hdr *inp_hdr, kauth_cred_t cred)
{
        uint16_t count;
        uint16_t mymin, mymax, lastport;
        uint16_t *next_ephemeral;
        int error;

        DPRINTF("%s called\n", __func__);
        error = pcb_getports(inp_hdr, &lastport, &mymin, &mymax,
            &next_ephemeral, algo);
        if (error)
                return error;
        count = mymax - mymin + 1;
        do {
                uint16_t myport = *next_ephemeral;

                if (myport < mymin || mymax < myport)
                        myport = mymax;
                *next_ephemeral = myport - 1;
                if (check_suitable_port(myport, inp_hdr, cred)) {
                        *port = myport;
                        DPRINTF("%s returning port %d\n", __func__, *port);
                        return 0;
                }
                count--;
        } while (count > 0);

        DPRINTF("%s returning EAGAIN\n", __func__);
        return EAGAIN;
}

/*
 * The straightforward algorithm that increments the port number
 * by a random amount.
 */
static int
algo_random_start(int algo, uint16_t *port, struct inpcb_hdr *inp_hdr,
    kauth_cred_t cred)
{
        uint16_t count, num_ephemeral;
        uint16_t mymin, mymax, lastport;
        uint16_t *next_ephemeral;
        int error;

        DPRINTF("%s called\n", __func__);

        error = pcb_getports(inp_hdr, &lastport, &mymin, &mymax,
            &next_ephemeral, algo);
        if (error)
                return error;

        num_ephemeral = mymax - mymin + 1;

        DPRINTF("num_ephemeral: %u\n", num_ephemeral);

        *next_ephemeral = mymin + (cprng_fast32() % num_ephemeral);

        DPRINTF("next_ephemeral initially: %u\n", *next_ephemeral);

        count = num_ephemeral;

        do {
                if (check_suitable_port(*next_ephemeral, inp_hdr, cred)) {
                        *port = *next_ephemeral;
                        DPRINTF("%s returning port %d\n", __func__, *port);
                        return 0;
                }
                if (*next_ephemeral == mymax) {
                        *next_ephemeral = mymin;
                } else
                        (*next_ephemeral)++;

                count--;


                DPRINTF("next_ephemeral: %u count: %u\n", *next_ephemeral,
                    count);

        } while (count > 0);

        DPRINTF("%s returning EINVAL\n", __func__);

        return EINVAL;
}

/*
 * Since there is no state kept on the ports tried, we might actually
 * give up before exhausting the free ports.
 */
static int
algo_random_pick(int algo, uint16_t *port, struct inpcb_hdr *inp_hdr,
    kauth_cred_t cred)
{
        uint16_t count, num_ephemeral;
        uint16_t mymin, mymax, lastport;
        uint16_t *next_ephemeral;
        int error;

        DPRINTF("%s called\n", __func__);

        error = pcb_getports(inp_hdr, &lastport, &mymin, &mymax,
            &next_ephemeral, algo);
        if (error)
                return error;

        num_ephemeral = mymax - mymin + 1;

        DPRINTF("num_ephemeral: %u\n", num_ephemeral);
        *next_ephemeral = mymin + (cprng_fast32() % num_ephemeral);

        DPRINTF("next_ephemeral initially: %u\n", *next_ephemeral);

        count = num_ephemeral;

        do {
                if (check_suitable_port(*next_ephemeral, inp_hdr, cred)) {
                        *port = *next_ephemeral;
                        DPRINTF("%s returning port %d\n", __func__, *port);
                        return 0;
                }
                *next_ephemeral = mymin +
                    (cprng_fast32() % num_ephemeral);

                count--;

                DPRINTF("next_ephemeral: %u count: %u\n",
                    *next_ephemeral, count);
        } while (count > 0);

        DPRINTF("%s returning EINVAL\n", __func__);

        return EINVAL;
}

/* This is the implementation from FreeBSD, with tweaks */
static uint16_t
Fhash(const struct inpcb_hdr *inp_hdr)
{
        MD5_CTX f_ctx;
        uint32_t Ff[4];
        uint32_t secret_f[4];
        uint32_t offset;
        uint16_t soffset[2];

        cprng_fast(secret_f, sizeof(secret_f));

        MD5Init(&f_ctx);
        switch (inp_hdr->inph_af) {
#ifdef INET
        case AF_INET: {
                const struct inpcb *inp =
                    (const struct inpcb *)(const void *)inp_hdr;
                MD5Update(&f_ctx, (const u_char *)&inp->inp_laddr,
                    sizeof(inp->inp_laddr));
                MD5Update(&f_ctx, (const u_char *)&inp->inp_faddr,
                    sizeof(inp->inp_faddr));
                MD5Update(&f_ctx, (const u_char *)&inp->inp_fport,
                    sizeof(inp->inp_fport));
                break;
        }
#endif
#ifdef INET6
        case AF_INET6: {
                const struct in6pcb *in6p =
                    (const struct in6pcb *)(const void *)inp_hdr;
                MD5Update(&f_ctx, (const u_char *)&in6p->in6p_laddr,
                    sizeof(in6p->in6p_laddr));
                MD5Update(&f_ctx, (const u_char *)&in6p->in6p_faddr,
                    sizeof(in6p->in6p_faddr));
                MD5Update(&f_ctx, (const u_char *)&in6p->in6p_fport,
                    sizeof(in6p->in6p_fport));
                break;
        }
#endif
        default:
                break;
        }
        MD5Update(&f_ctx, (const u_char *)secret_f, sizeof(secret_f));
        MD5Final((u_char *)&Ff, &f_ctx);

        offset = (Ff[0] ^ Ff[1]) ^ (Ff[2] ^ Ff[3]);

        memcpy(&soffset, &offset, sizeof(soffset));

        return soffset[0] ^ soffset[1];
}

/*
 * Checks whether the tuple is complete. If not, marks the pcb for
 * late binding.
 */
static bool
iscompletetuple(struct inpcb_hdr *inp_hdr)
{
#ifdef INET6
        struct in6pcb *in6p;
#endif

        switch (inp_hdr->inph_af) {
#ifdef INET
        case AF_INET: {
                struct inpcb *inp = (struct inpcb *)(void *)inp_hdr;
                if (inp->inp_fport == 0 || in_nullhost(inp->inp_faddr)) {
                        DPRINTF("%s fport or faddr missing, delaying port "
                            "to connect/send\n", __func__);
                        inp->inp_bindportonsend = true;
                        return false;
                } else {
                        inp->inp_bindportonsend = false;
                }
                break;
        }
#endif
#ifdef INET6
        case AF_INET6: {
                in6p = (struct in6pcb *)(void *)inp_hdr;
                if (in6p->in6p_fport == 0 || memcmp(&in6p->in6p_faddr,
                    &in6addr_any, sizeof(in6p->in6p_faddr)) == 0) {
                        DPRINTF("%s fport or faddr missing, delaying port "
                            "to connect/send\n", __func__);
                        in6p->in6p_bindportonsend = true;
                        return false;
                } else {
                        in6p->in6p_bindportonsend = false;
                }
                break;
        }
#endif
        default:
                DPRINTF("%s incorrect address family\n", __func__);
                return false;
        }

        return true;
}

static int
algo_hash(int algo, uint16_t *port, struct inpcb_hdr *inp_hdr,
    kauth_cred_t cred)
{
        uint16_t count, num_ephemeral;
        uint16_t mymin, mymax, lastport;
        uint16_t *next_ephemeral;
        uint16_t offset, myport;
        int error;

        DPRINTF("%s called\n", __func__);

        error = pcb_getports(inp_hdr, &lastport, &mymin, &mymax,
            &next_ephemeral, algo);
        if (error)
                return error;

        if (!iscompletetuple(inp_hdr)) {
                *port = 0;
                return 0;
        }

        /* Ephemeral port selection function */
        num_ephemeral = mymax - mymin + 1;

        DPRINTF("num_ephemeral: %d\n", num_ephemeral);

        offset = Fhash(inp_hdr);

        count = num_ephemeral;
        do {
                myport = mymin + (*next_ephemeral + offset)
                    % num_ephemeral;

                (*next_ephemeral)++;

                if (check_suitable_port(myport, inp_hdr, cred)) {
                        *port = myport;
                        DPRINTF("%s returning port %d\n", __func__, *port);
                        return 0;
                }
                count--;
        } while (count > 0);

        DPRINTF("%s returning EINVAL\n", __func__);

        return EINVAL;
}

static int
algo_doublehash(int algo, uint16_t *port, struct inpcb_hdr *inp_hdr,
    kauth_cred_t cred)
{
        uint16_t count, num_ephemeral;
        uint16_t mymin, mymax, lastport;
        uint16_t *next_ephemeral;
        uint16_t offset, myport;
        static uint16_t dhtable[8];
        size_t idx;
        int error;

        DPRINTF("%s called\n", __func__);

        error = pcb_getports(inp_hdr, &lastport, &mymin, &mymax,
            &next_ephemeral, algo);
        if (error)
                return error;

        if (!iscompletetuple(inp_hdr)) {
                *port = 0;
                return 0;
        }
        /* first time initialization */
        if (dhtable[0] == 0)
                for (size_t i = 0; i < __arraycount(dhtable); i++)
                        dhtable[i] = cprng_fast32() & 0xffff;

        /* Ephemeral port selection function */
        num_ephemeral = mymax - mymin + 1;
        offset = Fhash(inp_hdr);
        idx = Fhash(inp_hdr) % __arraycount(dhtable);        /* G */
        count = num_ephemeral;

        do {
                myport = mymin + (offset + dhtable[idx])
                    % num_ephemeral;
                dhtable[idx]++;

                if (check_suitable_port(myport, inp_hdr, cred)) {
                        *port = myport;
                        DPRINTF("%s returning port %d\n", __func__, *port);
                        return 0;
                }
                count--;

        } while (count > 0);

        DPRINTF("%s returning EINVAL\n", __func__);

        return EINVAL;
}

static int
algo_randinc(int algo, uint16_t *port, struct inpcb_hdr *inp_hdr,
    kauth_cred_t cred)
{
        static const uint16_t N = 500;        /* Determines the trade-off */
        uint16_t count, num_ephemeral;
        uint16_t mymin, mymax, lastport;
        uint16_t *next_ephemeral;
        uint16_t myport;
        int error;

        DPRINTF("%s called\n", __func__);

        error = pcb_getports(inp_hdr, &lastport, &mymin, &mymax,
            &next_ephemeral, algo);
        if (error)
                return error;

        if (*next_ephemeral == 0)
                *next_ephemeral = cprng_fast32() & 0xffff;

        /* Ephemeral port selection function */
        num_ephemeral = mymax - mymin + 1;

        count = num_ephemeral;
        do {
                *next_ephemeral = *next_ephemeral +
                    (cprng_fast32() % N) + 1;
                myport = mymin +
                    (*next_ephemeral % num_ephemeral);

                if (check_suitable_port(myport, inp_hdr, cred)) {
                        *port = myport;
                        DPRINTF("%s returning port %d\n", __func__, *port);
                        return 0;
                }
                count--;
        } while (count > 0);

        return EINVAL;
}

/* The generic function called in order to pick a port. */
int
portalgo_randport(uint16_t *port, struct inpcb_hdr *inp_hdr, kauth_cred_t cred)
{
        int algo, error;
        uint16_t lport;
        int default_algo;

        DPRINTF("%s called\n", __func__);

        if (inp_hdr->inph_portalgo == PORTALGO_DEFAULT) {
                switch (inp_hdr->inph_af) {
#ifdef INET
                case AF_INET:
                        default_algo = inet4_portalgo;
                        break;
#endif
#ifdef INET6
                case AF_INET6:
                        default_algo = inet6_portalgo;
                        break;
#endif
                default:
                        return EINVAL;
                }

                if (default_algo == PORTALGO_DEFAULT)
                        algo = PORTALGO_BSD;
                else
                        algo = default_algo;
        }
        else /* socket specifies the algorithm */
                algo = inp_hdr->inph_portalgo;

        KASSERT(algo >= 0);
        KASSERT(algo < NALGOS);

        switch (inp_hdr->inph_af) {
#ifdef INET
        case AF_INET: {
                char buf[INET_ADDRSTRLEN];
                struct inpcb *inp = (struct inpcb *)(void *)inp_hdr;
                DPRINTF("local addr: %s\n", IN_PRINT(buf, &inp->inp_laddr));
                DPRINTF("local port: %d\n", inp->inp_lport);
                DPRINTF("foreign addr: %s\n", IN_PRINT(buf, &inp->inp_faddr));
                DPRINTF("foreign port: %d\n", inp->inp_fport);
                break;
        }
#endif
#ifdef INET6
        case AF_INET6: {
                char buf[INET6_ADDRSTRLEN];
                struct in6pcb *in6p = (struct in6pcb *)(void *)inp_hdr;

                DPRINTF("local addr: %s\n", IN6_PRINT(buf, &in6p->in6p_laddr));
                DPRINTF("local port: %d\n", in6p->in6p_lport);
                DPRINTF("foreign addr: %s\n", IN6_PRINT(buf,
                    &in6p->in6p_laddr));
                DPRINTF("foreign port: %d\n", in6p->in6p_fport);
                break;
        }
#endif
        default:
                break;
        }

        DPRINTF("%s portalgo = %d\n", __func__, algo);

        error = (*algos[algo].func)(algo, &lport, inp_hdr, cred);
        if (error == 0) {
                *port = lport;
        } else if (error != EAGAIN) {
                uint16_t lastport, mymin, mymax, *pnext_ephemeral;

                error = pcb_getports(inp_hdr, &lastport, &mymin,
                    &mymax, &pnext_ephemeral, algo);
                if (error)
                        return error;
                *port = lastport - 1;
        }
        return error;
}

/* Sets the algorithm to be used globally */
static int
portalgo_algo_name_select(const char *name, int *algo)
{
        size_t ai;

        DPRINTF("%s called\n", __func__);

        for (ai = 0; ai < NALGOS; ai++)
                if (strcmp(algos[ai].name, name) == 0) {
                        DPRINTF("%s: found idx %zu\n", __func__, ai);
                        *algo = ai;
                        return 0;
                }
        return EINVAL;
}

/* Sets the algorithm to be used by the pcb inp. */
int
portalgo_algo_index_select(struct inpcb_hdr *inp, int algo)
{

        DPRINTF("%s called with algo %d for pcb %p\n", __func__, algo, inp );

        if ((algo < 0 || algo >= NALGOS) &&
            (algo != PORTALGO_DEFAULT))
                return EINVAL;

        inp->inph_portalgo = algo;
        return 0;
}

/*
 * The sysctl hook that is supposed to check that we are picking one
 * of the valid algorithms.
 */
static int
sysctl_portalgo_selected(SYSCTLFN_ARGS, int *algo)
{
        struct sysctlnode node;
        int error;
        char newalgo[PORTALGO_MAXLEN];

        DPRINTF("%s called\n", __func__);

        strlcpy(newalgo, algos[*algo].name, sizeof(newalgo));

        node = *rnode;
        node.sysctl_data = newalgo;
        node.sysctl_size = sizeof(newalgo);

        error = sysctl_lookup(SYSCTLFN_CALL(&node));

        DPRINTF("newalgo: %s\n", newalgo);

        if (error || newp == NULL ||
            strncmp(newalgo, algos[*algo].name, sizeof(newalgo)) == 0)
                return error;

#ifdef KAUTH_NETWORK_SOCKET_PORT_RANDOMIZE
        if (l != NULL && (error = kauth_authorize_system(l->l_cred,
            KAUTH_NETWORK_SOCKET, KAUTH_NETWORK_SOCKET_PORT_RANDOMIZE, newname,
            NULL, NULL)) != 0)
                return error;
#endif

        mutex_enter(softnet_lock);
        error = portalgo_algo_name_select(newalgo, algo);
        mutex_exit(softnet_lock);
        return error;
}

static int
sysctl_portalgo_reserve(SYSCTLFN_ARGS, bitmap *bt)
{
        struct sysctlnode node;
        int error;

        DPRINTF("%s called\n", __func__);

        node = *rnode;
        node.sysctl_data = bt;
        node.sysctl_size = sizeof(*bt);

        error = sysctl_lookup(SYSCTLFN_CALL(&node));

        if (error || newp == NULL)
                return error;

#ifdef KAUTH_NETWORK_SOCKET_PORT_RESERVE
        if (l != NULL && (error = kauth_authorize_system(l->l_cred,
            KAUTH_NETWORK_SOCKET, KAUTH_NETWORK_SOCKET_PORT_RESERVE, bt,
            NULL, NULL)) != 0)
                return error;
#endif
        return error;
}

#ifdef INET
/*
 * The sysctl hook that is supposed to check that we are picking one
 * of the valid algorithms.
 */
int
sysctl_portalgo_selected4(SYSCTLFN_ARGS)
{

        return sysctl_portalgo_selected(SYSCTLFN_CALL(rnode), &inet4_portalgo);
}

int
sysctl_portalgo_reserve4(SYSCTLFN_ARGS)
{

        return sysctl_portalgo_reserve(SYSCTLFN_CALL(rnode), &inet4_reserve);
}
#endif

#ifdef INET6
int
sysctl_portalgo_selected6(SYSCTLFN_ARGS)
{

        return sysctl_portalgo_selected(SYSCTLFN_CALL(rnode), &inet6_portalgo);
}

int
sysctl_portalgo_reserve6(SYSCTLFN_ARGS)
{
        return sysctl_portalgo_reserve(SYSCTLFN_CALL(rnode), &inet6_reserve);
}
#endif

/*
 * The sysctl hook that returns the available
 * algorithms.
 */
int
sysctl_portalgo_available(SYSCTLFN_ARGS)
{
        size_t ai, len = 0;
        struct sysctlnode node;
        char availalgo[NALGOS * PORTALGO_MAXLEN];

        DPRINTF("%s called\n", __func__);

        availalgo[0] = '\0';

        for (ai = 0; ai < NALGOS; ai++) {
                len = strlcat(availalgo, algos[ai].name, sizeof(availalgo));
                if (ai < NALGOS - 1)
                        strlcat(availalgo, " ", sizeof(availalgo));
        }

        DPRINTF("available algos: %s\n", availalgo);

        node = *rnode;
        node.sysctl_data = availalgo;
        node.sysctl_size = len;

        return sysctl_lookup(SYSCTLFN_CALL(&node));
}








































































































    9 

   10 































   10 
   10 
















































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
/*        $NetBSD: netbsd32_exec_aout.c,v 1.31 2021/01/19 03:20:13 simonb Exp $        */
/*        from: NetBSD: exec_aout.c,v 1.15 1996/09/26 23:34:46 cgd Exp */

/*
 * Copyright (c) 1998, 2001 Matthew R. Green.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1993, 1994 Christopher G. Demetriou
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Christopher G. Demetriou.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: netbsd32_exec_aout.c,v 1.31 2021/01/19 03:20:13 simonb Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/exec.h>
#include <sys/exec_aout.h>
#include <sys/resourcevar.h>
#include <sys/signal.h>
#include <sys/signalvar.h>

#include <compat/netbsd32/netbsd32.h>
#ifndef EXEC_AOUT
#define EXEC_AOUT
#endif
#include <compat/netbsd32/netbsd32_exec.h>

#include <machine/frame.h>
#include <machine/netbsd32_machdep.h>

#ifdef COMPAT_NOMID
static int netbsd32_exec_aout_nomid(struct lwp *, struct exec_package *);
#endif

/*
 * exec_netbsd32_makecmds(): Check if it's an netbsd32 a.out format
 * executable.
 *
 * Given a lwp pointer and an exec package pointer, see if the referent
 * of the epp is in netbsd32 a.out format.  Check 'standard' magic
 * numbers for this architecture.
 *
 * This function, in the former case, or the hook, in the latter, is
 * responsible for creating a set of vmcmds which can be used to build
 * the process's vm space and inserting them into the exec package.
 */

int
exec_netbsd32_makecmds(struct lwp *l, struct exec_package *epp)
{
        netbsd32_u_long midmag, magic;
        u_short mid;
        int error;
        struct netbsd32_exec *execp = epp->ep_hdr;

        if (epp->ep_hdrvalid < sizeof(struct netbsd32_exec))
                return ENOEXEC;

        midmag = (netbsd32_u_long)ntohl(execp->a_midmag);
        mid = (midmag >> 16) & 0x3ff;
        magic = midmag & 0xffff;

        midmag = mid << 16 | magic;

        /* this is already needed by setup_stack() */
        epp->ep_flags |= EXEC_32;

        switch (midmag) {
        case (NETBSD32_MID_MACHINE << 16) | ZMAGIC:
                error = netbsd32_exec_aout_prep_zmagic(l, epp);
                break;
        case (NETBSD32_MID_MACHINE << 16) | NMAGIC:
                error = netbsd32_exec_aout_prep_nmagic(l, epp);
                break;
        case (NETBSD32_MID_MACHINE << 16) | OMAGIC:
                error = netbsd32_exec_aout_prep_omagic(l, epp);
                break;
        default:
#ifdef COMPAT_NOMID
                error = netbsd32_exec_aout_nomid(l,  epp);
#else
                error = ENOEXEC;
#endif
                break;
        }

        if (error) {
                kill_vmcmds(&epp->ep_vmcmds);
                epp->ep_flags &= ~EXEC_32;
        } else
                epp->ep_flags &= ~EXEC_TOPDOWN_VM;
        return error;
}

/*
 * netbsd32_exec_aout_prep_zmagic(): Prepare a 'native' ZMAGIC binary's
 * exec package
 *
 * First, set of the various offsets/lengths in the exec package.
 *
 * Then, mark the text image busy (so it can be demand paged) or error
 * out if this is not possible.  Finally, set up vmcmds for the
 * text, data, bss, and stack segments.
 */

int
netbsd32_exec_aout_prep_zmagic(struct lwp *l, struct exec_package *epp)
{
        struct netbsd32_exec *execp = epp->ep_hdr;
        int error;

        epp->ep_taddr = AOUT_LDPGSZ;
        epp->ep_tsize = execp->a_text;
        epp->ep_daddr = epp->ep_taddr + execp->a_text;
        epp->ep_dsize = execp->a_data + execp->a_bss;
        epp->ep_entry = execp->a_entry;
        epp->ep_vm_minaddr = exec_vm_minaddr(VM_MIN_ADDRESS);
        epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS32;

        error = vn_marktext(epp->ep_vp);
        if (error)
                return error;

        /* set up command for text segment */
        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, execp->a_text,
            epp->ep_taddr, epp->ep_vp, 0, VM_PROT_READ|VM_PROT_EXECUTE);

        /* set up command for data segment */
        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, execp->a_data,
            epp->ep_daddr, epp->ep_vp, execp->a_text,
            VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        /* set up command for bss segment */
        if (execp->a_bss > 0)
                NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, execp->a_bss,
                    epp->ep_daddr + execp->a_data, NULLVP, 0,
                    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        return (*epp->ep_esch->es_setup_stack)(l, epp);
}

/*
 * netbsd32_exec_aout_prep_nmagic(): Prepare a 'native' NMAGIC binary's
 * exec package
 */

int
netbsd32_exec_aout_prep_nmagic(struct lwp *l, struct exec_package *epp)
{
        struct netbsd32_exec *execp = epp->ep_hdr;
        long bsize, baddr;

        epp->ep_taddr = AOUT_LDPGSZ;
        epp->ep_tsize = execp->a_text;
        epp->ep_daddr = roundup(epp->ep_taddr + execp->a_text, AOUT_LDPGSZ);
        epp->ep_dsize = execp->a_data + execp->a_bss;
        epp->ep_entry = execp->a_entry;
        epp->ep_vm_minaddr = exec_vm_minaddr(VM_MIN_ADDRESS);
        epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS32;

        /* set up command for text segment */
        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_text,
            epp->ep_taddr, epp->ep_vp, sizeof(struct netbsd32_exec),
            VM_PROT_READ|VM_PROT_EXECUTE);

        /* set up command for data segment */
        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_data,
            epp->ep_daddr, epp->ep_vp, execp->a_text + sizeof(struct netbsd32_exec),
            VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        /* set up command for bss segment */
        baddr = roundup(epp->ep_daddr + execp->a_data, PAGE_SIZE);
        bsize = epp->ep_daddr + epp->ep_dsize - baddr;
        if (bsize > 0)
                NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
                    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        return (*epp->ep_esch->es_setup_stack)(l, epp);
}

/*
 * netbsd32_exec_aout_prep_omagic(): Prepare a 'native' OMAGIC binary's
 * exec package
 */

int
netbsd32_exec_aout_prep_omagic(struct lwp *l, struct exec_package *epp)
{
        struct netbsd32_exec *execp = epp->ep_hdr;
        long dsize, bsize, baddr;

        epp->ep_taddr = AOUT_LDPGSZ;
        epp->ep_tsize = execp->a_text;
        epp->ep_daddr = epp->ep_taddr + execp->a_text;
        epp->ep_dsize = execp->a_data + execp->a_bss;
        epp->ep_entry = execp->a_entry;
        epp->ep_vm_minaddr = exec_vm_minaddr(VM_MIN_ADDRESS);
        epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS32;

        /* set up command for text and data segments */
        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn,
            execp->a_text + execp->a_data, epp->ep_taddr, epp->ep_vp,
            sizeof(struct netbsd32_exec), VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        /* set up command for bss segment */
        baddr = roundup(epp->ep_daddr + execp->a_data, PAGE_SIZE);
        bsize = epp->ep_daddr + epp->ep_dsize - baddr;
        if (bsize > 0)
                NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
                    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        /*
         * Make sure (# of pages) mapped above equals (vm_tsize + vm_dsize);
         * obreak(2) relies on this fact. Both `vm_tsize' and `vm_dsize' are
         * computed (in execve(2)) by rounding *up* `ep_tsize' and `ep_dsize'
         * respectively to page boundaries.
         * Compensate `ep_dsize' for the amount of data covered by the last
         * text page.
         */
        dsize = epp->ep_dsize + execp->a_text - roundup(execp->a_text,
                                                        PAGE_SIZE);
        epp->ep_dsize = (dsize > 0) ? dsize : 0;
        return (*epp->ep_esch->es_setup_stack)(l, epp);
}

#ifdef COMPAT_NOMID
/*
 * netbsd32_exec_aout_prep_oldzmagic():
 *        Prepare the vmcmds to build a vmspace for an old ZMAGIC
 *        binary. [386BSD/BSDI/4.4BSD/NetBSD0.8]
 *
 * Cloned from exec_aout_prep_zmagic() in kern/exec_aout.c; a more verbose
 * description of operation is there.
 * There were copies of this in the mac68k, hp300, and i386 ports.
 */
static int
netbsd32_exec_aout_prep_oldzmagic(struct lwp *l, struct exec_package *epp)
{
        struct netbsd32_exec *execp = epp->ep_hdr;
        int error;

        epp->ep_taddr = 0;
        epp->ep_tsize = execp->a_text;
        epp->ep_daddr = epp->ep_taddr + execp->a_text;
        epp->ep_dsize = execp->a_data + execp->a_bss;
        epp->ep_entry = execp->a_entry;
        epp->ep_vm_minaddr = exec_vm_minaddr(VM_MIN_ADDRESS);
        epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS32;

        error = vn_marktext(epp->ep_vp);
        if (error)
                return error;

        /* set up command for text segment */
        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, execp->a_text,
            epp->ep_taddr, epp->ep_vp, PAGE_SIZE, /* XXX CLBYTES? */
            VM_PROT_READ|VM_PROT_EXECUTE);

        /* set up command for data segment */
        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, execp->a_data,
            epp->ep_daddr, epp->ep_vp,
            execp->a_text + PAGE_SIZE, /* XXX CLBYTES? */
            VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        /* set up command for bss segment */
        if (execp->a_bss)
            NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, execp->a_bss,
                epp->ep_daddr + execp->a_data, NULLVP, 0,
                VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        return (*epp->ep_esch->es_setup_stack)(l, epp);
}


/*
 * netbsd32_exec_aout_prep_oldnmagic():
 *        Prepare the vmcmds to build a vmspace for an old NMAGIC
 *        binary. [BSDI]
 *
 * Cloned from exec_aout_prep_nmagic() in kern/exec_aout.c; with text starting
 * at 0.
 * XXX: There must be a better way to share this code.
 */
static int
netbsd32_exec_aout_prep_oldnmagic(struct lwp *l, struct exec_package *epp)
{
        struct netbsd32_exec *execp = epp->ep_hdr;
        long bsize, baddr;

        epp->ep_taddr = 0;
        epp->ep_tsize = execp->a_text;
        epp->ep_daddr = roundup(epp->ep_taddr + execp->a_text, AOUT_LDPGSZ);
        epp->ep_dsize = execp->a_data + execp->a_bss;
        epp->ep_entry = execp->a_entry;
        epp->ep_vm_minaddr = exec_vm_minaddr(VM_MIN_ADDRESS);
        epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS32;

        /* set up command for text segment */
        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_text,
            epp->ep_taddr, epp->ep_vp, sizeof(struct netbsd32_exec),
            VM_PROT_READ|VM_PROT_EXECUTE);

        /* set up command for data segment */
        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_data,
            epp->ep_daddr, epp->ep_vp, execp->a_text + sizeof(struct netbsd32_exec),
            VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        /* set up command for bss segment */
        baddr = roundup(epp->ep_daddr + execp->a_data, PAGE_SIZE);
        bsize = epp->ep_daddr + epp->ep_dsize - baddr;
        if (bsize > 0)
                NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
                    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        return (*epp->ep_esch->es_setup_stack)(l, epp);
}


/*
 * netbsd32_exec_aout_prep_oldomagic():
 *        Prepare the vmcmds to build a vmspace for an old OMAGIC
 *        binary. [BSDI]
 *
 * Cloned from exec_aout_prep_omagic() in kern/exec_aout.c; with text starting
 * at 0.
 * XXX: There must be a better way to share this code.
 */
static int
netbsd32_exec_aout_prep_oldomagic(struct lwp *l, struct exec_package *epp)
{
        struct netbsd32_exec *execp = epp->ep_hdr;
        long dsize, bsize, baddr;

        epp->ep_taddr = 0;
        epp->ep_tsize = execp->a_text;
        epp->ep_daddr = epp->ep_taddr + execp->a_text;
        epp->ep_dsize = execp->a_data + execp->a_bss;
        epp->ep_entry = execp->a_entry;

        /* set up command for text and data segments */
        NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn,
            execp->a_text + execp->a_data, epp->ep_taddr, epp->ep_vp,
            sizeof(struct netbsd32_exec), VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        /* set up command for bss segment */
        baddr = roundup(epp->ep_daddr + execp->a_data, PAGE_SIZE);
        bsize = epp->ep_daddr + epp->ep_dsize - baddr;
        if (bsize > 0)
                NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
                    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);

        /*
         * Make sure (# of pages) mapped above equals (vm_tsize + vm_dsize);
         * obreak(2) relies on this fact. Both `vm_tsize' and `vm_dsize' are
         * computed (in execve(2)) by rounding *up* `ep_tsize' and `ep_dsize'
         * respectively to page boundaries.
         * Compensate `ep_dsize' for the amount of data covered by the last
         * text page.
         */
        dsize = epp->ep_dsize + execp->a_text - roundup(execp->a_text,
                                                        PAGE_SIZE);
        epp->ep_dsize = (dsize > 0) ? dsize : 0;
        return (*epp->ep_esch->es_setup_stack)(l, epp);
}

static int
netbsd32_exec_aout_nomid(struct lwp *l, struct exec_package *epp)
{
        int error;
        u_long midmag, magic;
        u_short mid;
        struct exec *execp = epp->ep_hdr;

        /* check on validity of epp->ep_hdr performed by exec_out_makecmds */

        midmag = ntohl(execp->a_midmag);
        mid = (midmag >> 16) & 0xffff;
        magic = midmag & 0xffff;

        if (magic == 0) {
                magic = (execp->a_midmag & 0xffff);
                mid = MID_ZERO;
        }

        midmag = mid << 16 | magic;

        switch (midmag) {
        case (MID_ZERO << 16) | ZMAGIC:
                /*
                 * 386BSD's ZMAGIC format:
                 */
                return netbsd32_exec_aout_prep_oldzmagic(l, epp);
                break;

        case (MID_ZERO << 16) | QMAGIC:
                /*
                 * BSDI's QMAGIC format:
                 * same as new ZMAGIC format, but with different magic number
                 */
                return netbsd32_exec_aout_prep_zmagic(l, epp);
                break;

        case (MID_ZERO << 16) | NMAGIC:
                /*
                 * BSDI's NMAGIC format:
                 * same as NMAGIC format, but with different magic number
                 * and with text starting at 0.
                 */
                return netbsd32_exec_aout_prep_oldnmagic(l, epp);

        case (MID_ZERO << 16) | OMAGIC:
                /*
                 * BSDI's OMAGIC format:
                 * same as OMAGIC format, but with different magic number
                 * and with text starting at 0.
                 */
                return netbsd32_exec_aout_prep_oldomagic(l, epp);

        default:
                return ENOEXEC;
        }

        return error;
}
#endif

































































































































































































































   12 









   38 
  123 





















































  125 




  125 

  125 




  126 








  126 
































  124 




  124 




  124 


  122 


  123 









  125 





  123 
  124 











































































































    7 










    6 




    6 














    6 



    7 




    7 
















    7 




























































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
/*        $NetBSD: subr_psref.c,v 1.18 2022/02/12 16:31:06 macallan Exp $        */

/*-
 * Copyright (c) 2016 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Passive references
 *
 *        Passive references are references to objects that guarantee the
 *        object will not be destroyed until the reference is released.
 *
 *        Passive references require no interprocessor synchronization to
 *        acquire or release.  However, destroying the target of passive
 *        references requires expensive interprocessor synchronization --
 *        xcalls to determine on which CPUs the object is still in use.
 *
 *        Passive references may be held only on a single CPU and by a
 *        single LWP.  They require the caller to allocate a little stack
 *        space, a struct psref object.  Sleeping while a passive
 *        reference is held is allowed, provided that the owner's LWP is
 *        bound to a CPU -- e.g., the owner is a softint or a bound
 *        kthread.  However, sleeping should be kept to a short duration,
 *        e.g. sleeping on an adaptive lock.
 *
 *        Passive references serve as an intermediate stage between
 *        reference counting and passive serialization (pserialize(9)):
 *
 *        - If you need references to transfer from CPU to CPU or LWP to
 *          LWP, or if you need long-term references, you must use
 *          reference counting, e.g. with atomic operations or locks,
 *          which incurs interprocessor synchronization for every use --
 *          cheaper than an xcall, but not scalable.
 *
 *        - If all users *guarantee* that they will not sleep, then it is
 *          not necessary to use passive references: you may as well just
 *          use the even cheaper pserialize(9), because you have
 *          satisfied the requirements of a pserialize read section.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_psref.c,v 1.18 2022/02/12 16:31:06 macallan Exp $");

#include <sys/param.h>
#include <sys/types.h>
#include <sys/condvar.h>
#include <sys/cpu.h>
#include <sys/intr.h>
#include <sys/kmem.h>
#include <sys/lwp.h>
#include <sys/mutex.h>
#include <sys/percpu.h>
#include <sys/psref.h>
#include <sys/queue.h>
#include <sys/xcall.h>
#include <sys/lwp.h>

SLIST_HEAD(psref_head, psref);

static bool        _psref_held(const struct psref_target *, struct psref_class *,
                    bool);

/*
 * struct psref_class
 *
 *        Private global state for a class of passive reference targets.
 *        Opaque to callers.
 */
struct psref_class {
        kmutex_t                prc_lock;
        kcondvar_t                prc_cv;
        struct percpu                *prc_percpu; /* struct psref_cpu */
        ipl_cookie_t                prc_iplcookie;
        unsigned int                prc_xc_flags;
};

/*
 * struct psref_cpu
 *
 *        Private per-CPU state for a class of passive reference targets.
 *        Not exposed by the API.
 */
struct psref_cpu {
        struct psref_head        pcpu_head;
};

/*
 * Data structures and functions for debugging.
 */
#ifndef PSREF_DEBUG_NITEMS
#define PSREF_DEBUG_NITEMS 16
#endif

struct psref_debug_item {
        void                        *prdi_caller;
        struct psref                *prdi_psref;
};

struct psref_debug {
        int                        prd_refs_peek;
        struct psref_debug_item prd_items[PSREF_DEBUG_NITEMS];
};

#ifdef PSREF_DEBUG
static void psref_debug_acquire(struct psref *);
static void psref_debug_release(struct psref *);

static void psref_debug_lwp_free(void *);

static specificdata_key_t psref_debug_lwp_key;
#endif

/*
 * psref_init()
 */
void
psref_init(void)
{

#ifdef PSREF_DEBUG
        lwp_specific_key_create(&psref_debug_lwp_key, psref_debug_lwp_free);
#endif
}

/*
 * psref_class_create(name, ipl)
 *
 *        Create a new passive reference class, with the given wchan name
 *        and ipl.
 */
struct psref_class *
psref_class_create(const char *name, int ipl)
{
        struct psref_class *class;

        ASSERT_SLEEPABLE();

        class = kmem_alloc(sizeof(*class), KM_SLEEP);
        class->prc_percpu = percpu_alloc(sizeof(struct psref_cpu));
        mutex_init(&class->prc_lock, MUTEX_DEFAULT, ipl);
        cv_init(&class->prc_cv, name);
        class->prc_iplcookie = makeiplcookie(ipl);
        class->prc_xc_flags = XC_HIGHPRI_IPL(ipl);

        return class;
}

static void __diagused
psref_cpu_drained_p(void *p, void *cookie, struct cpu_info *ci __unused)
{
        const struct psref_cpu *pcpu = p;
        bool *retp = cookie;

        if (!SLIST_EMPTY(&pcpu->pcpu_head))
                *retp = false;
}

static bool __diagused
psref_class_drained_p(const struct psref_class *prc)
{
        bool ret = true;

        percpu_foreach(prc->prc_percpu, &psref_cpu_drained_p, &ret);

        return ret;
}

/*
 * psref_class_destroy(class)
 *
 *        Destroy a passive reference class and free memory associated
 *        with it.  All targets in this class must have been drained and
 *        destroyed already.
 */
void
psref_class_destroy(struct psref_class *class)
{

        KASSERT(psref_class_drained_p(class));

        cv_destroy(&class->prc_cv);
        mutex_destroy(&class->prc_lock);
        percpu_free(class->prc_percpu, sizeof(struct psref_cpu));
        kmem_free(class, sizeof(*class));
}

/*
 * psref_target_init(target, class)
 *
 *        Initialize a passive reference target in the specified class.
 *        The caller is responsible for issuing a membar_producer after
 *        psref_target_init and before exposing a pointer to the target
 *        to other CPUs.
 */
void
psref_target_init(struct psref_target *target,
    struct psref_class *class)
{

        target->prt_class = class;
        target->prt_draining = false;
}

#ifdef DEBUG
static bool
psref_exist(struct psref_cpu *pcpu, struct psref *psref)
{
        struct psref *_psref;

        SLIST_FOREACH(_psref, &pcpu->pcpu_head, psref_entry) {
                if (_psref == psref)
                        return true;
        }
        return false;
}

static void
psref_check_duplication(struct psref_cpu *pcpu, struct psref *psref,
    const struct psref_target *target)
{
        bool found = false;

        found = psref_exist(pcpu, psref);
        if (found) {
                panic("The psref is already in the list (acquiring twice?): "
                    "psref=%p target=%p", psref, target);
        }
}

static void
psref_check_existence(struct psref_cpu *pcpu, struct psref *psref,
    const struct psref_target *target)
{
        bool found = false;

        found = psref_exist(pcpu, psref);
        if (!found) {
                panic("The psref isn't in the list (releasing unused psref?): "
                    "psref=%p target=%p", psref, target);
        }
}
#endif /* DEBUG */

/*
 * psref_acquire(psref, target, class)
 *
 *        Acquire a passive reference to the specified target, which must
 *        be in the specified class.
 *
 *        The caller must guarantee that the target will not be destroyed
 *        before psref_acquire returns.
 *
 *        The caller must additionally guarantee that it will not switch
 *        CPUs before releasing the passive reference, either by
 *        disabling kpreemption and avoiding sleeps, or by being in a
 *        softint or in an LWP bound to a CPU.
 */
void
psref_acquire(struct psref *psref, const struct psref_target *target,
    struct psref_class *class)
{
        struct psref_cpu *pcpu;
        int s;

        KASSERTMSG((kpreempt_disabled() || cpu_softintr_p() ||
                ISSET(curlwp->l_pflag, LP_BOUND)),
            "passive references are CPU-local,"
            " but preemption is enabled and the caller is not"
            " in a softint or CPU-bound LWP");
        KASSERTMSG(!target->prt_draining, "psref target already destroyed: %p",
            target);
        KASSERTMSG((target->prt_class == class),
            "mismatched psref target class: %p (ref) != %p (expected)",
            target->prt_class, class);

        /* Block interrupts and acquire the current CPU's reference list.  */
        s = splraiseipl(class->prc_iplcookie);
        pcpu = percpu_getref(class->prc_percpu);

#ifdef DEBUG
        /* Sanity-check if the target is already acquired with the same psref.  */
        psref_check_duplication(pcpu, psref, target);
#endif

        /* Record our reference.  */
        SLIST_INSERT_HEAD(&pcpu->pcpu_head, psref, psref_entry);
        psref->psref_target = target;
        psref->psref_lwp = curlwp;
        psref->psref_cpu = curcpu();

        /* Release the CPU list and restore interrupts.  */
        percpu_putref(class->prc_percpu);
        splx(s);

#if defined(DIAGNOSTIC) || defined(PSREF_DEBUG)
        curlwp->l_psrefs++;
#endif
#ifdef PSREF_DEBUG
        psref_debug_acquire(psref);
#endif
}

/*
 * psref_release(psref, target, class)
 *
 *        Release a passive reference to the specified target, which must
 *        be in the specified class.
 *
 *        The caller must not have switched CPUs or LWPs since acquiring
 *        the passive reference.
 */
void
psref_release(struct psref *psref, const struct psref_target *target,
    struct psref_class *class)
{
        struct psref_cpu *pcpu;
        int s;

        KASSERTMSG((kpreempt_disabled() || cpu_softintr_p() ||
                ISSET(curlwp->l_pflag, LP_BOUND)),
            "passive references are CPU-local,"
            " but preemption is enabled and the caller is not"
            " in a softint or CPU-bound LWP");
        KASSERTMSG((target->prt_class == class),
            "mismatched psref target class: %p (ref) != %p (expected)",
            target->prt_class, class);

        /* Make sure the psref looks sensible.  */
        KASSERTMSG((psref->psref_target == target),
            "passive reference target mismatch: %p (ref) != %p (expected)",
            psref->psref_target, target);
        KASSERTMSG((psref->psref_lwp == curlwp),
            "passive reference transferred from lwp %p to lwp %p",
            psref->psref_lwp, curlwp);
        KASSERTMSG((psref->psref_cpu == curcpu()),
            "passive reference transferred from CPU %u to CPU %u",
            cpu_index(psref->psref_cpu), cpu_index(curcpu()));

        /*
         * Block interrupts and remove the psref from the current CPU's
         * list.  No need to percpu_getref or get the head of the list,
         * and the caller guarantees that we are bound to a CPU anyway
         * (as does blocking interrupts).
         */
        s = splraiseipl(class->prc_iplcookie);
        pcpu = percpu_getref(class->prc_percpu);
#ifdef DEBUG
        /* Sanity-check if the target is surely acquired before.  */
        psref_check_existence(pcpu, psref, target);
#endif
        SLIST_REMOVE(&pcpu->pcpu_head, psref, psref, psref_entry);
        percpu_putref(class->prc_percpu);
        splx(s);

#if defined(DIAGNOSTIC) || defined(PSREF_DEBUG)
        KASSERT(curlwp->l_psrefs > 0);
        curlwp->l_psrefs--;
#endif
#ifdef PSREF_DEBUG
        psref_debug_release(psref);
#endif

        /* If someone is waiting for users to drain, notify 'em.  */
        if (__predict_false(target->prt_draining))
                cv_broadcast(&class->prc_cv);
}

/*
 * psref_copy(pto, pfrom, class)
 *
 *        Copy a passive reference from pfrom, which must be in the
 *        specified class, to pto.  Both pfrom and pto must later be
 *        released with psref_release.
 *
 *        The caller must not have switched CPUs or LWPs since acquiring
 *        pfrom, and must not switch CPUs or LWPs before releasing both
 *        pfrom and pto.
 */
void
psref_copy(struct psref *pto, const struct psref *pfrom,
    struct psref_class *class)
{
        struct psref_cpu *pcpu;
        int s;

        KASSERTMSG((kpreempt_disabled() || cpu_softintr_p() ||
                ISSET(curlwp->l_pflag, LP_BOUND)),
            "passive references are CPU-local,"
            " but preemption is enabled and the caller is not"
            " in a softint or CPU-bound LWP");
        KASSERTMSG((pto != pfrom),
            "can't copy passive reference to itself: %p",
            pto);

        /* Make sure the pfrom reference looks sensible.  */
        KASSERTMSG((pfrom->psref_lwp == curlwp),
            "passive reference transferred from lwp %p to lwp %p",
            pfrom->psref_lwp, curlwp);
        KASSERTMSG((pfrom->psref_cpu == curcpu()),
            "passive reference transferred from CPU %u to CPU %u",
            cpu_index(pfrom->psref_cpu), cpu_index(curcpu()));
        KASSERTMSG((pfrom->psref_target->prt_class == class),
            "mismatched psref target class: %p (ref) != %p (expected)",
            pfrom->psref_target->prt_class, class);

        /* Block interrupts and acquire the current CPU's reference list.  */
        s = splraiseipl(class->prc_iplcookie);
        pcpu = percpu_getref(class->prc_percpu);

        /* Record the new reference.  */
        SLIST_INSERT_HEAD(&pcpu->pcpu_head, pto, psref_entry);
        pto->psref_target = pfrom->psref_target;
        pto->psref_lwp = curlwp;
        pto->psref_cpu = curcpu();

        /* Release the CPU list and restore interrupts.  */
        percpu_putref(class->prc_percpu);
        splx(s);

#if defined(DIAGNOSTIC) || defined(PSREF_DEBUG)
        curlwp->l_psrefs++;
#endif
}

/*
 * struct psreffed
 *
 *        Global state for draining a psref target.
 */
struct psreffed {
        struct psref_class        *class;
        struct psref_target        *target;
        bool                        ret;
};

static void
psreffed_p_xc(void *cookie0, void *cookie1 __unused)
{
        struct psreffed *P = cookie0;

        /*
         * If we hold a psref to the target, then answer true.
         *
         * This is the only dynamic decision that may be made with
         * psref_held.
         *
         * No need to lock anything here: every write transitions from
         * false to true, so there can be no conflicting writes.  No
         * need for a memory barrier here because P->ret is read only
         * after xc_wait, which has already issued any necessary memory
         * barriers.
         */
        if (_psref_held(P->target, P->class, true))
                P->ret = true;
}

static bool
psreffed_p(struct psref_target *target, struct psref_class *class)
{
        struct psreffed P = {
                .class = class,
                .target = target,
                .ret = false,
        };

        if (__predict_true(mp_online)) {
                /*
                 * Ask all CPUs to say whether they hold a psref to the
                 * target.
                 */
                xc_wait(xc_broadcast(class->prc_xc_flags, &psreffed_p_xc, &P,
                                     NULL));
        } else
                psreffed_p_xc(&P, NULL);

        return P.ret;
}

/*
 * psref_target_destroy(target, class)
 *
 *        Destroy a passive reference target.  Waits for all existing
 *        references to drain.  Caller must guarantee no new references
 *        will be acquired once it calls psref_target_destroy, e.g. by
 *        removing the target from a global list first.  May sleep.
 */
void
psref_target_destroy(struct psref_target *target, struct psref_class *class)
{

        ASSERT_SLEEPABLE();

        KASSERTMSG(!target->prt_draining, "psref target already destroyed: %p",
            target);
        KASSERTMSG((target->prt_class == class),
            "mismatched psref target class: %p (ref) != %p (expected)",
            target->prt_class, class);

        /* Request psref_release to notify us when done.  */
        target->prt_draining = true;

        /* Wait until there are no more references on any CPU.  */
        while (psreffed_p(target, class)) {
                /*
                 * This enter/wait/exit business looks wrong, but it is
                 * both necessary, because psreffed_p performs a
                 * low-priority xcall and hence cannot run while a
                 * mutex is locked, and OK, because the wait is timed
                 * -- explicit wakeups are only an optimization.
                 */
                mutex_enter(&class->prc_lock);
                (void)cv_timedwait(&class->prc_cv, &class->prc_lock, 1);
                mutex_exit(&class->prc_lock);
        }

        /* No more references.  Cause subsequent psref_acquire to kassert.  */
        target->prt_class = NULL;
}

static bool
_psref_held(const struct psref_target *target, struct psref_class *class,
    bool lwp_mismatch_ok)
{
        const struct psref_cpu *pcpu;
        const struct psref *psref;
        int s;
        bool held = false;

        KASSERTMSG((kpreempt_disabled() || cpu_softintr_p() ||
                ISSET(curlwp->l_pflag, LP_BOUND)),
            "passive references are CPU-local,"
            " but preemption is enabled and the caller is not"
            " in a softint or CPU-bound LWP");
        KASSERTMSG((target->prt_class == class),
            "mismatched psref target class: %p (ref) != %p (expected)",
            target->prt_class, class);

        /* Block interrupts and acquire the current CPU's reference list.  */
        s = splraiseipl(class->prc_iplcookie);
        pcpu = percpu_getref(class->prc_percpu);

        /* Search through all the references on this CPU.  */
        SLIST_FOREACH(psref, &pcpu->pcpu_head, psref_entry) {
                /* Sanity-check the reference's CPU.  */
                KASSERTMSG((psref->psref_cpu == curcpu()),
                    "passive reference transferred from CPU %u to CPU %u",
                    cpu_index(psref->psref_cpu), cpu_index(curcpu()));

                /* If it doesn't match, skip it and move on.  */
                if (psref->psref_target != target)
                        continue;

                /*
                 * Sanity-check the reference's LWP if we are asserting
                 * via psref_held that this LWP holds it, but not if we
                 * are testing in psref_target_destroy whether any LWP
                 * still holds it.
                 */
                KASSERTMSG((lwp_mismatch_ok || psref->psref_lwp == curlwp),
                    "passive reference transferred from lwp %p to lwp %p",
                    psref->psref_lwp, curlwp);

                /* Stop here and report that we found it.  */
                held = true;
                break;
        }

        /* Release the CPU list and restore interrupts.  */
        percpu_putref(class->prc_percpu);
        splx(s);

        return held;
}

/*
 * psref_held(target, class)
 *
 *        True if the current CPU holds a passive reference to target,
 *        false otherwise.  May be used only inside assertions.
 */
bool
psref_held(const struct psref_target *target, struct psref_class *class)
{

        return _psref_held(target, class, false);
}

#ifdef PSREF_DEBUG
void
psref_debug_init_lwp(struct lwp *l)
{
        struct psref_debug *prd;

        prd = kmem_zalloc(sizeof(*prd), KM_SLEEP);
        lwp_setspecific_by_lwp(l, psref_debug_lwp_key, prd);
}

static void
psref_debug_lwp_free(void *arg)
{
        struct psref_debug *prd = arg;

        kmem_free(prd, sizeof(*prd));
}

static void
psref_debug_acquire(struct psref *psref)
{
        struct psref_debug *prd;
        struct lwp *l = curlwp;
        int s, i;

        prd = lwp_getspecific(psref_debug_lwp_key);
        if (__predict_false(prd == NULL)) {
                psref->psref_debug = NULL;
                return;
        }

        s = splserial();
        if (l->l_psrefs > prd->prd_refs_peek) {
                prd->prd_refs_peek = l->l_psrefs;
                if (__predict_false(prd->prd_refs_peek > PSREF_DEBUG_NITEMS))
                        panic("exceeded PSREF_DEBUG_NITEMS");
        }
        for (i = 0; i < prd->prd_refs_peek; i++) {
                struct psref_debug_item *prdi = &prd->prd_items[i];
                if (prdi->prdi_psref != NULL)
                        continue;
                prdi->prdi_caller = psref->psref_debug;
                prdi->prdi_psref = psref;
                psref->psref_debug = prdi;
                break;
        }
        if (__predict_false(i == prd->prd_refs_peek))
                panic("out of range: %d", i);
        splx(s);
}

static void
psref_debug_release(struct psref *psref)
{
        int s;

        s = splserial();
        if (__predict_true(psref->psref_debug != NULL)) {
                struct psref_debug_item *prdi = psref->psref_debug;
                prdi->prdi_psref = NULL;
        }
        splx(s);
}

void
psref_debug_barrier(void)
{
        struct psref_debug *prd;
        struct lwp *l = curlwp;
        int s, i;

        prd = lwp_getspecific(psref_debug_lwp_key);
        if (__predict_false(prd == NULL))
                return;

        s = splserial();
        for (i = 0; i < prd->prd_refs_peek; i++) {
                struct psref_debug_item *prdi = &prd->prd_items[i];
                if (__predict_true(prdi->prdi_psref == NULL))
                        continue;
                panic("psref leaked: lwp(%p) acquired at %p", l, prdi->prdi_caller);
        }
        prd->prd_refs_peek = 0; /* Reset the counter */
        splx(s);
}
#endif /* PSREF_DEBUG */





































































































  195 







































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
/*        $NetBSD: libkern.h,v 1.144 2021/12/31 14:19:57 riastradh Exp $        */

/*-
 * Copyright (c) 1992, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)libkern.h        8.2 (Berkeley) 8/5/94
 */

#ifndef _LIB_LIBKERN_LIBKERN_H_
#define _LIB_LIBKERN_LIBKERN_H_

#ifdef _KERNEL_OPT
#include "opt_kasan.h"
#include "opt_kcsan.h"
#include "opt_kmsan.h"
#endif

#include <sys/types.h>
#include <sys/inttypes.h>
#include <sys/null.h>

#include <lib/libkern/strlist.h>

#ifndef LIBKERN_INLINE
#define LIBKERN_INLINE        static __inline
#define LIBKERN_BODY
#endif

LIBKERN_INLINE int imax(int, int) __unused;
LIBKERN_INLINE int imin(int, int) __unused;
LIBKERN_INLINE u_int uimax(u_int, u_int) __unused;
LIBKERN_INLINE u_int uimin(u_int, u_int) __unused;
LIBKERN_INLINE long lmax(long, long) __unused;
LIBKERN_INLINE long lmin(long, long) __unused;
LIBKERN_INLINE u_long ulmax(u_long, u_long) __unused;
LIBKERN_INLINE u_long ulmin(u_long, u_long) __unused;
LIBKERN_INLINE int abs(int) __unused;
LIBKERN_INLINE long labs(long) __unused;
LIBKERN_INLINE long long llabs(long long) __unused;
LIBKERN_INLINE intmax_t imaxabs(intmax_t) __unused;

LIBKERN_INLINE int isspace(int) __unused;
LIBKERN_INLINE int isascii(int) __unused;
LIBKERN_INLINE int isupper(int) __unused;
LIBKERN_INLINE int islower(int) __unused;
LIBKERN_INLINE int isalpha(int) __unused;
LIBKERN_INLINE int isalnum(int) __unused;
LIBKERN_INLINE int isdigit(int) __unused;
LIBKERN_INLINE int isxdigit(int) __unused;
LIBKERN_INLINE int iscntrl(int) __unused;
LIBKERN_INLINE int isgraph(int) __unused;
LIBKERN_INLINE int isprint(int) __unused;
LIBKERN_INLINE int ispunct(int) __unused;
LIBKERN_INLINE int toupper(int) __unused;
LIBKERN_INLINE int tolower(int) __unused;

#ifdef LIBKERN_BODY
LIBKERN_INLINE int
imax(int a, int b)
{
        return (a > b ? a : b);
}
LIBKERN_INLINE int
imin(int a, int b)
{
        return (a < b ? a : b);
}
LIBKERN_INLINE long
lmax(long a, long b)
{
        return (a > b ? a : b);
}
LIBKERN_INLINE long
lmin(long a, long b)
{
        return (a < b ? a : b);
}
LIBKERN_INLINE u_int
uimax(u_int a, u_int b)
{
        return (a > b ? a : b);
}
LIBKERN_INLINE u_int
uimin(u_int a, u_int b)
{
        return (a < b ? a : b);
}
LIBKERN_INLINE u_long
ulmax(u_long a, u_long b)
{
        return (a > b ? a : b);
}
LIBKERN_INLINE u_long
ulmin(u_long a, u_long b)
{
        return (a < b ? a : b);
}

LIBKERN_INLINE int
abs(int j)
{
        return(j < 0 ? -j : j);
}

LIBKERN_INLINE long
labs(long j)
{
        return(j < 0 ? -j : j);
}

LIBKERN_INLINE long long
llabs(long long j)
{
        return(j < 0 ? -j : j);
}

LIBKERN_INLINE intmax_t
imaxabs(intmax_t j)
{
        return(j < 0 ? -j : j);
}

LIBKERN_INLINE int
isspace(int ch)
{
        return (ch == ' ' || (ch >= '\t' && ch <= '\r'));
}

LIBKERN_INLINE int
isascii(int ch)
{
        return ((ch & ~0x7f) == 0);
}

LIBKERN_INLINE int
isupper(int ch)
{
        return (ch >= 'A' && ch <= 'Z');
}

LIBKERN_INLINE int
islower(int ch)
{
        return (ch >= 'a' && ch <= 'z');
}

LIBKERN_INLINE int
isalpha(int ch)
{
        return (isupper(ch) || islower(ch));
}

LIBKERN_INLINE int
isalnum(int ch)
{
        return (isalpha(ch) || isdigit(ch));
}

LIBKERN_INLINE int
isdigit(int ch)
{
        return (ch >= '0' && ch <= '9');
}

LIBKERN_INLINE int
isxdigit(int ch)
{
        return (isdigit(ch) ||
            (ch >= 'A' && ch <= 'F') ||
            (ch >= 'a' && ch <= 'f'));
}

LIBKERN_INLINE int
iscntrl(int ch)
{
        return ((ch >= 0x00 && ch <= 0x1F) || ch == 0x7F);
}

LIBKERN_INLINE int
isgraph(int ch)
{
        return (ch != ' ' && isprint(ch));
}

LIBKERN_INLINE int
isprint(int ch)
{
        return (ch >= 0x20 && ch <= 0x7E);
}

LIBKERN_INLINE int
ispunct(int ch)
{
        return (isprint(ch) && ch != ' ' && !isalnum(ch));
}

LIBKERN_INLINE int
toupper(int ch)
{
        if (islower(ch))
                return (ch - 0x20);
        return (ch);
}

LIBKERN_INLINE int
tolower(int ch)
{
        if (isupper(ch))
                return (ch + 0x20);
        return (ch);
}
#endif

#define        __NULL_STMT                do { } while (/* CONSTCOND */ 0)

#define __KASSERTSTR  "kernel %sassertion \"%s\" failed: file \"%s\", line %d "

#ifdef NDEBUG                                                /* tradition! */
#define        assert(e)        ((void)0)
#else
#define        assert(e)        (__predict_true((e)) ? (void)0 :                    \
                            kern_assert(__KASSERTSTR, "", #e, __FILE__, __LINE__))
#endif

#ifdef __COVERITY__
#ifndef DIAGNOSTIC
#define DIAGNOSTIC
#endif
#endif

#ifndef        CTASSERT
#define        CTASSERT(x)                __CTASSERT(x)
#endif
#ifndef        CTASSERT_SIGNED
#define        CTASSERT_SIGNED(x)        __CTASSERT(((typeof(x))-1) < 0)
#endif
#ifndef        CTASSERT_UNSIGNED
#define        CTASSERT_UNSIGNED(x)        __CTASSERT(((typeof(x))-1) >= 0)
#endif

#ifndef DIAGNOSTIC
#define _DIAGASSERT(a)        (void)0
#ifdef lint
#define        KASSERTMSG(e, msg, ...)        /* NOTHING */
#define        KASSERT(e)                /* NOTHING */
#else /* !lint */
/*
 * Make sure the expression compiles, but don't evaluate any of it.  We
 * use sizeof to inhibit evaluation, and cast to long so the expression
 * can be integer- or pointer-valued without bringing in other header
 * files.
 */
#define        KASSERTMSG(e, msg, ...)        ((void)sizeof((long)(e)))
#define        KASSERT(e)                ((void)sizeof((long)(e)))
#endif /* !lint */
#else /* DIAGNOSTIC */
#define _DIAGASSERT(a)        assert(a)
#define        KASSERTMSG(e, msg, ...)                \
                        (__predict_true((e)) ? (void)0 :                    \
                            kern_assert(__KASSERTSTR msg, "diagnostic ", #e,            \
                                __FILE__, __LINE__, ## __VA_ARGS__))

#define        KASSERT(e)        (__predict_true((e)) ? (void)0 :                    \
                            kern_assert(__KASSERTSTR, "diagnostic ", #e,            \
                                __FILE__, __LINE__))
#endif

#ifndef DEBUG
#ifdef lint
#define        KDASSERTMSG(e,msg, ...)        /* NOTHING */
#define        KDASSERT(e)                /* NOTHING */
#else /* lint */
#define        KDASSERTMSG(e,msg, ...)        ((void)0)
#define        KDASSERT(e)                ((void)0)
#endif /* lint */
#else
#define        KDASSERTMSG(e, msg, ...)        \
                        (__predict_true((e)) ? (void)0 :                    \
                            kern_assert(__KASSERTSTR msg, "debugging ", #e,            \
                                __FILE__, __LINE__, ## __VA_ARGS__))

#define        KDASSERT(e)        (__predict_true((e)) ? (void)0 :                    \
                            kern_assert(__KASSERTSTR, "debugging ", #e,            \
                                __FILE__, __LINE__))
#endif

/*
 * XXX: For compatibility we use SMALL_RANDOM by default.
 */
#define SMALL_RANDOM

#ifndef offsetof
#if __GNUC_PREREQ__(4, 0)
#define offsetof(type, member)        __builtin_offsetof(type, member)
#else
#define        offsetof(type, member) \
    ((size_t)(unsigned long)(&(((type *)0)->member)))
#endif
#endif

/*
 * Return the container of an embedded struct.  Given x = &c->f,
 * container_of(x, T, f) yields c, where T is the type of c.  Example:
 *
 *        struct foo { ... };
 *        struct bar {
 *                int b_x;
 *                struct foo b_foo;
 *                ...
 *        };
 *
 *        struct bar b;
 *        struct foo *fp = b.b_foo;
 *
 * Now we can get at b from fp by:
 *
 *        struct bar *bp = container_of(fp, struct bar, b_foo);
 *
 * The 0*sizeof((PTR) - ...) causes the compiler to warn if the type of
 * *fp does not match the type of struct bar::b_foo.
 * We skip the validation for coverity runs to avoid warnings.
 */
#if defined(__COVERITY__) || defined(__LGTM_BOT__)
#define __validate_container_of(PTR, TYPE, FIELD) 0
#define __validate_const_container_of(PTR, TYPE, FIELD) 0
#else
#define __validate_container_of(PTR, TYPE, FIELD)                        \
    (0 * sizeof((PTR) - &((TYPE *)(((char *)(PTR)) -                        \
    offsetof(TYPE, FIELD)))->FIELD))
#define __validate_const_container_of(PTR, TYPE, FIELD)                        \
    (0 * sizeof((PTR) - &((const TYPE *)(((const char *)(PTR)) -        \
    offsetof(TYPE, FIELD)))->FIELD))
#endif

#define        container_of(PTR, TYPE, FIELD)                                        \
    ((TYPE *)(((char *)(PTR)) - offsetof(TYPE, FIELD))                        \
        + __validate_container_of(PTR, TYPE, FIELD))
#define        const_container_of(PTR, TYPE, FIELD)                                \
    ((const TYPE *)(((const char *)(PTR)) - offsetof(TYPE, FIELD))        \
        + __validate_const_container_of(PTR, TYPE, FIELD))

/* Prototypes for which GCC built-ins exist. */
void        *memcpy(void *, const void *, size_t);
int         memcmp(const void *, const void *, size_t);
void        *memset(void *, int, size_t);
#if __GNUC_PREREQ__(2, 95) && !defined(_STANDALONE)
#if defined(_KERNEL) && defined(KASAN)
void        *kasan_memcpy(void *, const void *, size_t);
int         kasan_memcmp(const void *, const void *, size_t);
void        *kasan_memset(void *, int, size_t);
#define        memcpy(d, s, l)                kasan_memcpy(d, s, l)
#define        memcmp(a, b, l)                kasan_memcmp(a, b, l)
#define        memset(d, v, l)                kasan_memset(d, v, l)
#elif defined(_KERNEL) && defined(KCSAN)
void        *kcsan_memcpy(void *, const void *, size_t);
int         kcsan_memcmp(const void *, const void *, size_t);
void        *kcsan_memset(void *, int, size_t);
#define        memcpy(d, s, l)                kcsan_memcpy(d, s, l)
#define        memcmp(a, b, l)                kcsan_memcmp(a, b, l)
#define        memset(d, v, l)                kcsan_memset(d, v, l)
#elif defined(_KERNEL) && defined(KMSAN)
void        *kmsan_memcpy(void *, const void *, size_t);
int         kmsan_memcmp(const void *, const void *, size_t);
void        *kmsan_memset(void *, int, size_t);
#define        memcpy(d, s, l)                kmsan_memcpy(d, s, l)
#define        memcmp(a, b, l)                kmsan_memcmp(a, b, l)
#define        memset(d, v, l)                kmsan_memset(d, v, l)
#else
#define        memcpy(d, s, l)                __builtin_memcpy(d, s, l)
#define        memcmp(a, b, l)                __builtin_memcmp(a, b, l)
#define        memset(d, v, l)                __builtin_memset(d, v, l)
#endif
#endif
void        *memmem(const void *, size_t, const void *, size_t);

char        *strcpy(char *, const char *);
int         strcmp(const char *, const char *);
size_t         strlen(const char *);
#if __GNUC_PREREQ__(2, 95) && !defined(_STANDALONE)
#if defined(_KERNEL) && defined(KASAN)
char        *kasan_strcpy(char *, const char *);
int         kasan_strcmp(const char *, const char *);
size_t         kasan_strlen(const char *);
#define        strcpy(d, s)                kasan_strcpy(d, s)
#define        strcmp(a, b)                kasan_strcmp(a, b)
#define        strlen(a)                kasan_strlen(a)
#elif defined(_KERNEL) && defined(KCSAN)
char        *kcsan_strcpy(char *, const char *);
int         kcsan_strcmp(const char *, const char *);
size_t         kcsan_strlen(const char *);
#define        strcpy(d, s)                kcsan_strcpy(d, s)
#define        strcmp(a, b)                kcsan_strcmp(a, b)
#define        strlen(a)                kcsan_strlen(a)
#elif defined(_KERNEL) && defined(KMSAN)
char        *kmsan_strcpy(char *, const char *);
int         kmsan_strcmp(const char *, const char *);
size_t         kmsan_strlen(const char *);
#define        strcpy(d, s)                kmsan_strcpy(d, s)
#define        strcmp(a, b)                kmsan_strcmp(a, b)
#define        strlen(a)                kmsan_strlen(a)
#else
#define        strcpy(d, s)                __builtin_strcpy(d, s)
#define        strcmp(a, b)                __builtin_strcmp(a, b)
#define        strlen(a)                __builtin_strlen(a)
#endif
#endif
size_t         strnlen(const char *, size_t);
char        *strsep(char **, const char *);

/* Functions for which we always use built-ins. */
#ifdef __GNUC__
#define        alloca(s)                __builtin_alloca(s)
#endif

/* These exist in GCC 3.x, but we don't bother. */
char        *strcat(char *, const char *);
char        *strchr(const char *, int);
char        *strrchr(const char *, int);
#if defined(_KERNEL) && defined(KASAN)
char        *kasan_strcat(char *, const char *);
char        *kasan_strchr(const char *, int);
char        *kasan_strrchr(const char *, int);
#define        strcat(d, s)                kasan_strcat(d, s)
#define        strchr(s, c)                kasan_strchr(s, c)
#define        strrchr(s, c)                kasan_strrchr(s, c)
#elif defined(_KERNEL) && defined(KMSAN)
char        *kmsan_strcat(char *, const char *);
char        *kmsan_strchr(const char *, int);
char        *kmsan_strrchr(const char *, int);
#define        strcat(d, s)                kmsan_strcat(d, s)
#define        strchr(s, c)                kmsan_strchr(s, c)
#define        strrchr(s, c)                kmsan_strrchr(s, c)
#endif
size_t         strcspn(const char *, const char *);
char        *strncpy(char *, const char *, size_t);
char        *strncat(char *, const char *, size_t);
int         strncmp(const char *, const char *, size_t);
char        *strstr(const char *, const char *);
char        *strpbrk(const char *, const char *);
size_t         strspn(const char *, const char *);

/*
 * ffs is an instruction on vax.
 */
int         ffs(int);
#if __GNUC_PREREQ__(2, 95) && (!defined(__vax__) || __GNUC_PREREQ__(4,1))
#define        ffs(x)                __builtin_ffs(x)
#endif

void         kern_assert(const char *, ...)
    __attribute__((__format__(__printf__, 1, 2)));
u_int32_t
        inet_addr(const char *);
struct in_addr;
int        inet_aton(const char *, struct in_addr *);
char        *intoa(u_int32_t);
#define inet_ntoa(a) intoa((a).s_addr)
void        *memchr(const void *, int, size_t);

void        *memmove(void *, const void *, size_t);
#if defined(_KERNEL) && defined(KASAN)
void        *kasan_memmove(void *, const void *, size_t);
#define        memmove(d, s, l)        kasan_memmove(d, s, l)
#elif defined(_KERNEL) && defined(KCSAN)
void        *kcsan_memmove(void *, const void *, size_t);
#define        memmove(d, s, l)        kcsan_memmove(d, s, l)
#elif defined(_KERNEL) && defined(KMSAN)
void        *kmsan_memmove(void *, const void *, size_t);
#define        memmove(d, s, l)        kmsan_memmove(d, s, l)
#endif

int         pmatch(const char *, const char *, const char **);
#ifndef SMALL_RANDOM
void         srandom(unsigned long);
char        *initstate(unsigned long, char *, size_t);
char        *setstate(char *);
#endif /* SMALL_RANDOM */
long         random(void);
void         mi_vector_hash(const void * __restrict, size_t, uint32_t,
            uint32_t[3]);
int         scanc(u_int, const u_char *, const u_char *, int);
int         skpc(int, size_t, u_char *);
int         strcasecmp(const char *, const char *);
size_t         strlcpy(char *, const char *, size_t);
size_t         strlcat(char *, const char *, size_t);
int         strncasecmp(const char *, const char *, size_t);
u_long         strtoul(const char *, char **, int);
long long strtoll(const char *, char **, int);
unsigned long long strtoull(const char *, char **, int);
intmax_t  strtoimax(const char *, char **, int);
uintmax_t strtoumax(const char *, char **, int);
intmax_t strtoi(const char * __restrict, char ** __restrict, int, intmax_t,
    intmax_t, int *);
uintmax_t strtou(const char * __restrict, char ** __restrict, int, uintmax_t,
    uintmax_t, int *);
void         hexdump(void (*)(const char *, ...) __printflike(1, 2),
    const char *, const void *, size_t);

int         snprintb(char *, size_t, const char *, uint64_t);
int         snprintb_m(char *, size_t, const char *, uint64_t, size_t);
int         kheapsort(void *, size_t, size_t, int (*)(const void *, const void *),
                   void *);
uint32_t crc32(uint32_t, const uint8_t *, size_t);
#if __GNUC_PREREQ__(4, 5) \
    && (defined(__alpha_cix__) || defined(__mips_popcount))
#define        popcount        __builtin_popcount
#define        popcountl        __builtin_popcountl
#define        popcountll        __builtin_popcountll
#define        popcount32        __builtin_popcount
#define        popcount64        __builtin_popcountll
#else
unsigned int        popcount(unsigned int) __constfunc;
unsigned int        popcountl(unsigned long) __constfunc;
unsigned int        popcountll(unsigned long long) __constfunc;
unsigned int        popcount32(uint32_t) __constfunc;
unsigned int        popcount64(uint64_t) __constfunc;
#endif

void        *explicit_memset(void *, int, size_t);
int        consttime_memequal(const void *, const void *, size_t);
int        strnvisx(char *, size_t, const char *, size_t, int);
#define VIS_OCTAL        0x01
#define VIS_SAFE        0x20
#define VIS_TRIM        0x40

struct disklabel;
void        disklabel_swap(struct disklabel *, struct disklabel *);
uint16_t dkcksum(const struct disklabel *);
uint16_t dkcksum_sized(const struct disklabel *, size_t);

#endif /* !_LIB_LIBKERN_LIBKERN_H_ */




































































































    4 






    4 










    4 





    4 

    4 







    4 










































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
/*        $NetBSD: tmpfs_mem.c,v 1.13 2020/06/11 19:20:46 ad Exp $        */

/*
 * Copyright (c) 2010, 2011, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * tmpfs memory allocation routines.
 * Implements memory usage accounting and limiting.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tmpfs_mem.c,v 1.13 2020/06/11 19:20:46 ad Exp $");

#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/kmem.h>
#include <sys/namei.h>
#include <sys/pool.h>

#include <fs/tmpfs/tmpfs.h>

extern struct pool        tmpfs_dirent_pool;
extern struct pool        tmpfs_node_pool;

void
tmpfs_mntmem_init(struct tmpfs_mount *mp, uint64_t memlimit)
{

        mutex_init(&mp->tm_acc_lock, MUTEX_DEFAULT, IPL_NONE);
        mp->tm_mem_limit = memlimit;
        mp->tm_bytes_used = 0;
}

void
tmpfs_mntmem_destroy(struct tmpfs_mount *mp)
{

        KASSERT(mp->tm_bytes_used == 0);
        mutex_destroy(&mp->tm_acc_lock);
}

int
tmpfs_mntmem_set(struct tmpfs_mount *mp, uint64_t memlimit)
{
        int error;

        mutex_enter(&mp->tm_acc_lock);
        if (round_page(mp->tm_bytes_used) >= memlimit)
                error = EBUSY;
        else {
                error = 0;
                mp->tm_mem_limit = memlimit;
        }
        mutex_exit(&mp->tm_acc_lock);
        return error;
}

        

/*
 * tmpfs_mem_info: return the number of available memory pages.
 *
 * => If 'total' is true, then return _total_ amount of pages.
 * => If false, then return the amount of _free_ memory pages.
 *
 * Remember to remove uvmexp.freetarg from the returned value to avoid
 * excessive memory usage.
 */
size_t
tmpfs_mem_info(bool total)
{
        size_t size = 0;

        size += uvmexp.swpgavail;
        if (!total) {
                size -= uvmexp.swpgonly;
        }
        size += uvm_availmem(true);
        size += uvmexp.filepages;
        if (size > uvmexp.wired) {
                size -= uvmexp.wired;
        } else {
                size = 0;
        }
        return size;
}

uint64_t
tmpfs_bytes_max(struct tmpfs_mount *mp)
{
        psize_t freepages = tmpfs_mem_info(false);
        int freetarg = uvmexp.freetarg;        // XXX unlocked
        uint64_t avail_mem;

        if (freepages < freetarg) {
                freepages = 0;
        } else {
                freepages -= freetarg;
        }
        avail_mem = round_page(mp->tm_bytes_used) + (freepages << PAGE_SHIFT);
        return MIN(mp->tm_mem_limit, avail_mem);
}

size_t
tmpfs_pages_avail(struct tmpfs_mount *mp)
{

        return (tmpfs_bytes_max(mp) - mp->tm_bytes_used) >> PAGE_SHIFT;
}

bool
tmpfs_mem_incr(struct tmpfs_mount *mp, size_t sz)
{
        uint64_t lim;

        mutex_enter(&mp->tm_acc_lock);
        lim = tmpfs_bytes_max(mp);
        if (mp->tm_bytes_used + sz >= lim) {
                mutex_exit(&mp->tm_acc_lock);
                return false;
        }
        mp->tm_bytes_used += sz;
        mutex_exit(&mp->tm_acc_lock);
        return true;
}

void
tmpfs_mem_decr(struct tmpfs_mount *mp, size_t sz)
{

        mutex_enter(&mp->tm_acc_lock);
        KASSERT(mp->tm_bytes_used >= sz);
        mp->tm_bytes_used -= sz;
        mutex_exit(&mp->tm_acc_lock);
}

struct tmpfs_dirent *
tmpfs_dirent_get(struct tmpfs_mount *mp)
{

        if (!tmpfs_mem_incr(mp, sizeof(struct tmpfs_dirent))) {
                return NULL;
        }
        return pool_get(&tmpfs_dirent_pool, PR_WAITOK);
}

void
tmpfs_dirent_put(struct tmpfs_mount *mp, struct tmpfs_dirent *de)
{

        tmpfs_mem_decr(mp, sizeof(struct tmpfs_dirent));
        pool_put(&tmpfs_dirent_pool, de);
}

struct tmpfs_node *
tmpfs_node_get(struct tmpfs_mount *mp)
{

        if (atomic_inc_uint_nv(&mp->tm_nodes_cnt) >= mp->tm_nodes_max) {
                atomic_dec_uint(&mp->tm_nodes_cnt);
                return NULL;
        }
        if (!tmpfs_mem_incr(mp, sizeof(struct tmpfs_node))) {
                atomic_dec_uint(&mp->tm_nodes_cnt);
                return NULL;
        }
        return pool_get(&tmpfs_node_pool, PR_WAITOK);
}

void
tmpfs_node_put(struct tmpfs_mount *mp, struct tmpfs_node *tn)
{

        atomic_dec_uint(&mp->tm_nodes_cnt);
        tmpfs_mem_decr(mp, sizeof(struct tmpfs_node));
        pool_put(&tmpfs_node_pool, tn);
}

/*
 * Quantum size to round-up the tmpfs names in order to reduce re-allocations.
 */

#define        TMPFS_NAME_QUANTUM        (32)

char *
tmpfs_strname_alloc(struct tmpfs_mount *mp, size_t len)
{
        const size_t sz = roundup2(len, TMPFS_NAME_QUANTUM);

        KASSERT(sz > 0 && sz <= 1024);
        if (!tmpfs_mem_incr(mp, sz)) {
                return NULL;
        }
        return kmem_alloc(sz, KM_SLEEP);
}

void
tmpfs_strname_free(struct tmpfs_mount *mp, char *str, size_t len)
{
        const size_t sz = roundup2(len, TMPFS_NAME_QUANTUM);

        KASSERT(sz > 0 && sz <= 1024);
        tmpfs_mem_decr(mp, sz);
        kmem_free(str, sz);
}

bool
tmpfs_strname_neqlen(struct componentname *fcnp, struct componentname *tcnp)
{
        const size_t fln = fcnp->cn_namelen;
        const size_t tln = tcnp->cn_namelen;

        return (fln != tln) || memcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fln);
}












































































































































































































































































































































































































































































































































































































   30 









   30 



   30 
   30 
   30 




   29 
   30 




   29 
































































   27 
   27 









































































    8 
   27 


















   11 











   11 
   10 

    8 

   11 
   10 











    3 

    3 
    3 
    3 

    3 
    3 

    2 




    3 












    3 

   11 
   11 

   11 














    8 
    8 














    3 
    3 



















   11 
   11 

   10 






   11 






   11 





   11 





   11 






































































































































   11 

   11 





   11 




















    8 








   11 









   11 







   11 




    3 
    8 
    8 


   11 



   11 



















   11 


   11 

   11 


   11 






   11 
   11 


   11 







   11 

   11 








   11 
   11 


   11 
   11 


   11 







    8 

    8 
























































   11 





   10 


   10 
    9 


   10 

   10 
    9 
   10 






























    8 


   26 

















   24 
   24 

    9 


   25 




   24 






   25 













   25 

    1 

   23 







   25 
   25 









   25 



   26 



   26 







   12 
   12 

   12 
   12 
   12 









   12 



   12 


   12 
   12 







   11 
   12 



   10 
   10 














   12 
   10 

   12 






   12 




   12 




   12 










   26 
   26 




















   26 


   26 
   25 



















   25 









   26 




   25 
   25 
   26 



   25 
















   26 











   26 






   10 

   24 



    2 






   23 



   26 



    9 



   26 













































    9 



    9 

    9 

    9 





    8 
    8 
    8 


    8 


    8 


    8 


    8 


    9 





    9 

















   10 

    9 




   10 




   10 




    9 




   10 



   10 




   10 







   10 
   10 
    9 
   10 
   10 
    9 




   10 



















    9 

















    9 










    8 




    8 
















   24 








   24 




   24 























   21 











   22 











   29 
   29 
   29 

   29 
    8 
    8 



    8 


    8 

   29 







   29 
   12 










   11 












   11 












   11 
















   10 



   10 
















   11 










   11 




   11 


   11 









   10 













   11 








   11 


   11 
   11 































   11 

































    4 
    4 






   10 





    6 
    6 





   10 
   10 
   10 
    7 






   10 
    7 

   10 




   10 
   10 
   10 
    7 

    6 




























   11 



   11 








    6 





    6 
    6 

    6 























































































































































































































   24 

   23 

















   24 









   27 


   27 
   26 



   26 






   25 


   25 
   25 





   24 



















































































































































   10 
























  840 
  837 

  820 

  837 





































    6 




    6 
    6 
    4 
    4 









    4 
    4 





    4 

    6 















    4 












    5 

    5 
    5 






















































































































































































































































































































































    9 


















































   24 











   10 




   10 










   10 





   23 

   10 
   24 























































































































































   10 




























































   10 










































































































    6 
   18 





































































   18 




   18 

    8 

   10 
   18 











    2 


    2 






   18 







   18 
   18 
   18 

   10 






   13 








   18 

   18 




   18 
   18 

   10 









   18 





   18 







   18 


   15 











    2 
    2 
    2 

    2 






   18 







   15 



    8 

    7 

   15 































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
/* $NetBSD: subr_autoconf.c,v 1.302 2022/08/12 16:16:12 riastradh Exp $ */

/*
 * Copyright (c) 1996, 2000 Christopher G. Demetriou
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *          This product includes software developed for the
 *          NetBSD Project.  See http://www.NetBSD.org/ for
 *          information about NetBSD.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * --(license Id: LICENSE.proto,v 1.1 2000/06/13 21:40:26 cgd Exp )--
 */

/*
 * Copyright (c) 1992, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This software was developed by the Computer Systems Engineering group
 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
 * contributed to Berkeley.
 *
 * All advertising materials mentioning features or use of this software
 * must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Lawrence Berkeley Laboratories.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from: Header: subr_autoconf.c,v 1.12 93/02/01 19:31:48 torek Exp  (LBL)
 *
 *        @(#)subr_autoconf.c        8.3 (Berkeley) 5/17/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_autoconf.c,v 1.302 2022/08/12 16:16:12 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_ddb.h"
#include "drvctl.h"
#endif

#include <sys/param.h>
#include <sys/device.h>
#include <sys/device_impl.h>
#include <sys/disklabel.h>
#include <sys/conf.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/errno.h>
#include <sys/proc.h>
#include <sys/reboot.h>
#include <sys/kthread.h>
#include <sys/buf.h>
#include <sys/dirent.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/unistd.h>
#include <sys/fcntl.h>
#include <sys/lockf.h>
#include <sys/callout.h>
#include <sys/devmon.h>
#include <sys/cpu.h>
#include <sys/sysctl.h>
#include <sys/stdarg.h>
#include <sys/localcount.h>

#include <sys/disk.h>

#include <sys/rndsource.h>

#include <machine/limits.h>

/*
 * Autoconfiguration subroutines.
 */

/*
 * Device autoconfiguration timings are mixed into the entropy pool.
 */
static krndsource_t rnd_autoconf_source;

/*
 * ioconf.c exports exactly two names: cfdata and cfroots.  All system
 * devices and drivers are found via these tables.
 */
extern struct cfdata cfdata[];
extern const short cfroots[];

/*
 * List of all cfdriver structures.  We use this to detect duplicates
 * when other cfdrivers are loaded.
 */
struct cfdriverlist allcfdrivers = LIST_HEAD_INITIALIZER(&allcfdrivers);
extern struct cfdriver * const cfdriver_list_initial[];

/*
 * Initial list of cfattach's.
 */
extern const struct cfattachinit cfattachinit[];

/*
 * List of cfdata tables.  We always have one such list -- the one
 * built statically when the kernel was configured.
 */
struct cftablelist allcftables = TAILQ_HEAD_INITIALIZER(allcftables);
static struct cftable initcftable;

#define        ROOT ((device_t)NULL)

struct matchinfo {
        cfsubmatch_t fn;
        device_t parent;
        const int *locs;
        void        *aux;
        struct        cfdata *match;
        int        pri;
};

struct alldevs_foray {
        int                        af_s;
        struct devicelist        af_garbage;
};

/*
 * Internal version of the cfargs structure; all versions are
 * canonicalized to this.
 */
struct cfargs_internal {
        union {
                cfsubmatch_t        submatch;/* submatch function (direct config) */
                cfsearch_t        search;         /* search function (indirect config) */
        };
        const char *        iattr;                /* interface attribute */
        const int *        locators;        /* locators array */
        devhandle_t        devhandle;        /* devhandle_t (by value) */
};

static char *number(char *, int);
static void mapply(struct matchinfo *, cfdata_t);
static void config_devdelete(device_t);
static void config_devunlink(device_t, struct devicelist *);
static void config_makeroom(int, struct cfdriver *);
static void config_devlink(device_t);
static void config_alldevs_enter(struct alldevs_foray *);
static void config_alldevs_exit(struct alldevs_foray *);
static void config_add_attrib_dict(device_t);
static device_t        config_attach_internal(device_t, cfdata_t, void *,
                    cfprint_t, const struct cfargs_internal *);

static void config_collect_garbage(struct devicelist *);
static void config_dump_garbage(struct devicelist *);

static void pmflock_debug(device_t, const char *, int);

static device_t deviter_next1(deviter_t *);
static void deviter_reinit(deviter_t *);

struct deferred_config {
        TAILQ_ENTRY(deferred_config) dc_queue;
        device_t dc_dev;
        void (*dc_func)(device_t);
};

TAILQ_HEAD(deferred_config_head, deferred_config);

static struct deferred_config_head deferred_config_queue =
        TAILQ_HEAD_INITIALIZER(deferred_config_queue);
static struct deferred_config_head interrupt_config_queue =
        TAILQ_HEAD_INITIALIZER(interrupt_config_queue);
static int interrupt_config_threads = 8;
static struct deferred_config_head mountroot_config_queue =
        TAILQ_HEAD_INITIALIZER(mountroot_config_queue);
static int mountroot_config_threads = 2;
static lwp_t **mountroot_config_lwpids;
static size_t mountroot_config_lwpids_size;
bool root_is_mounted = false;

static void config_process_deferred(struct deferred_config_head *, device_t);

/* Hooks to finalize configuration once all real devices have been found. */
struct finalize_hook {
        TAILQ_ENTRY(finalize_hook) f_list;
        int (*f_func)(device_t);
        device_t f_dev;
};
static TAILQ_HEAD(, finalize_hook) config_finalize_list =
        TAILQ_HEAD_INITIALIZER(config_finalize_list);
static int config_finalize_done;

/* list of all devices */
static struct devicelist alldevs = TAILQ_HEAD_INITIALIZER(alldevs);
static kmutex_t alldevs_lock __cacheline_aligned;
static devgen_t alldevs_gen = 1;
static int alldevs_nread = 0;
static int alldevs_nwrite = 0;
static bool alldevs_garbage = false;

static struct devicelist config_pending =
    TAILQ_HEAD_INITIALIZER(config_pending);
static kmutex_t config_misc_lock;
static kcondvar_t config_misc_cv;

static bool detachall = false;

#define        STREQ(s1, s2)                        \
        (*(s1) == *(s2) && strcmp((s1), (s2)) == 0)

static bool config_initialized = false;        /* config_init() has been called. */

static int config_do_twiddle;
static callout_t config_twiddle_ch;

static void sysctl_detach_setup(struct sysctllog **);

int no_devmon_insert(const char *, prop_dictionary_t);
int (*devmon_insert_vec)(const char *, prop_dictionary_t) = no_devmon_insert;

typedef int (*cfdriver_fn)(struct cfdriver *);
static int
frob_cfdrivervec(struct cfdriver * const *cfdriverv,
        cfdriver_fn drv_do, cfdriver_fn drv_undo,
        const char *style, bool dopanic)
{
        void (*pr)(const char *, ...) __printflike(1, 2) =
            dopanic ? panic : printf;
        int i, error = 0, e2 __diagused;

        for (i = 0; cfdriverv[i] != NULL; i++) {
                if ((error = drv_do(cfdriverv[i])) != 0) {
                        pr("configure: `%s' driver %s failed: %d",
                            cfdriverv[i]->cd_name, style, error);
                        goto bad;
                }
        }

        KASSERT(error == 0);
        return 0;

 bad:
        printf("\n");
        for (i--; i >= 0; i--) {
                e2 = drv_undo(cfdriverv[i]);
                KASSERT(e2 == 0);
        }

        return error;
}

typedef int (*cfattach_fn)(const char *, struct cfattach *);
static int
frob_cfattachvec(const struct cfattachinit *cfattachv,
        cfattach_fn att_do, cfattach_fn att_undo,
        const char *style, bool dopanic)
{
        const struct cfattachinit *cfai = NULL;
        void (*pr)(const char *, ...) __printflike(1, 2) =
            dopanic ? panic : printf;
        int j = 0, error = 0, e2 __diagused;

        for (cfai = &cfattachv[0]; cfai->cfai_name != NULL; cfai++) {
                for (j = 0; cfai->cfai_list[j] != NULL; j++) {
                        if ((error = att_do(cfai->cfai_name,
                            cfai->cfai_list[j])) != 0) {
                                pr("configure: attachment `%s' "
                                    "of `%s' driver %s failed: %d",
                                    cfai->cfai_list[j]->ca_name,
                                    cfai->cfai_name, style, error);
                                goto bad;
                        }
                }
        }

        KASSERT(error == 0);
        return 0;

 bad:
        /*
         * Rollback in reverse order.  dunno if super-important, but
         * do that anyway.  Although the code looks a little like
         * someone did a little integration (in the math sense).
         */
        printf("\n");
        if (cfai) {
                bool last;

                for (last = false; last == false; ) {
                        if (cfai == &cfattachv[0])
                                last = true;
                        for (j--; j >= 0; j--) {
                                e2 = att_undo(cfai->cfai_name,
                                    cfai->cfai_list[j]);
                                KASSERT(e2 == 0);
                        }
                        if (!last) {
                                cfai--;
                                for (j = 0; cfai->cfai_list[j] != NULL; j++)
                                        ;
                        }
                }
        }

        return error;
}

/*
 * Initialize the autoconfiguration data structures.  Normally this
 * is done by configure(), but some platforms need to do this very
 * early (to e.g. initialize the console).
 */
void
config_init(void)
{

        KASSERT(config_initialized == false);

        mutex_init(&alldevs_lock, MUTEX_DEFAULT, IPL_VM);

        mutex_init(&config_misc_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&config_misc_cv, "cfgmisc");

        callout_init(&config_twiddle_ch, CALLOUT_MPSAFE);

        frob_cfdrivervec(cfdriver_list_initial,
            config_cfdriver_attach, NULL, "bootstrap", true);
        frob_cfattachvec(cfattachinit,
            config_cfattach_attach, NULL, "bootstrap", true);

        initcftable.ct_cfdata = cfdata;
        TAILQ_INSERT_TAIL(&allcftables, &initcftable, ct_list);

        rnd_attach_source(&rnd_autoconf_source, "autoconf", RND_TYPE_UNKNOWN,
            RND_FLAG_COLLECT_TIME);

        config_initialized = true;
}

/*
 * Init or fini drivers and attachments.  Either all or none
 * are processed (via rollback).  It would be nice if this were
 * atomic to outside consumers, but with the current state of
 * locking ...
 */
int
config_init_component(struct cfdriver * const *cfdriverv,
        const struct cfattachinit *cfattachv, struct cfdata *cfdatav)
{
        int error;

        KERNEL_LOCK(1, NULL);

        if ((error = frob_cfdrivervec(cfdriverv,
            config_cfdriver_attach, config_cfdriver_detach, "init", false))!= 0)
                goto out;
        if ((error = frob_cfattachvec(cfattachv,
            config_cfattach_attach, config_cfattach_detach,
            "init", false)) != 0) {
                frob_cfdrivervec(cfdriverv,
                    config_cfdriver_detach, NULL, "init rollback", true);
                goto out;
        }
        if ((error = config_cfdata_attach(cfdatav, 1)) != 0) {
                frob_cfattachvec(cfattachv,
                    config_cfattach_detach, NULL, "init rollback", true);
                frob_cfdrivervec(cfdriverv,
                    config_cfdriver_detach, NULL, "init rollback", true);
                goto out;
        }

        /* Success!  */
        error = 0;

out:        KERNEL_UNLOCK_ONE(NULL);
        return error;
}

int
config_fini_component(struct cfdriver * const *cfdriverv,
        const struct cfattachinit *cfattachv, struct cfdata *cfdatav)
{
        int error;

        KERNEL_LOCK(1, NULL);

        if ((error = config_cfdata_detach(cfdatav)) != 0)
                goto out;
        if ((error = frob_cfattachvec(cfattachv,
            config_cfattach_detach, config_cfattach_attach,
            "fini", false)) != 0) {
                if (config_cfdata_attach(cfdatav, 0) != 0)
                        panic("config_cfdata fini rollback failed");
                goto out;
        }
        if ((error = frob_cfdrivervec(cfdriverv,
            config_cfdriver_detach, config_cfdriver_attach,
            "fini", false)) != 0) {
                frob_cfattachvec(cfattachv,
                    config_cfattach_attach, NULL, "fini rollback", true);
                if (config_cfdata_attach(cfdatav, 0) != 0)
                        panic("config_cfdata fini rollback failed");
                goto out;
        }

        /* Success!  */
        error = 0;

out:        KERNEL_UNLOCK_ONE(NULL);
        return error;
}

void
config_init_mi(void)
{

        if (!config_initialized)
                config_init();

        sysctl_detach_setup(NULL);
}

void
config_deferred(device_t dev)
{

        KASSERT(KERNEL_LOCKED_P());

        config_process_deferred(&deferred_config_queue, dev);
        config_process_deferred(&interrupt_config_queue, dev);
        config_process_deferred(&mountroot_config_queue, dev);
}

static void
config_interrupts_thread(void *cookie)
{
        struct deferred_config *dc;
        device_t dev;

        mutex_enter(&config_misc_lock);
        while ((dc = TAILQ_FIRST(&interrupt_config_queue)) != NULL) {
                TAILQ_REMOVE(&interrupt_config_queue, dc, dc_queue);
                mutex_exit(&config_misc_lock);

                dev = dc->dc_dev;
                (*dc->dc_func)(dev);
                if (!device_pmf_is_registered(dev))
                        aprint_debug_dev(dev,
                            "WARNING: power management not supported\n");
                config_pending_decr(dev);
                kmem_free(dc, sizeof(*dc));

                mutex_enter(&config_misc_lock);
        }
        mutex_exit(&config_misc_lock);

        kthread_exit(0);
}

void
config_create_interruptthreads(void)
{
        int i;

        for (i = 0; i < interrupt_config_threads; i++) {
                (void)kthread_create(PRI_NONE, 0/*XXXSMP */, NULL,
                    config_interrupts_thread, NULL, NULL, "configintr");
        }
}

static void
config_mountroot_thread(void *cookie)
{
        struct deferred_config *dc;

        mutex_enter(&config_misc_lock);
        while ((dc = TAILQ_FIRST(&mountroot_config_queue)) != NULL) {
                TAILQ_REMOVE(&mountroot_config_queue, dc, dc_queue);
                mutex_exit(&config_misc_lock);

                (*dc->dc_func)(dc->dc_dev);
                kmem_free(dc, sizeof(*dc));

                mutex_enter(&config_misc_lock);
        }
        mutex_exit(&config_misc_lock);

        kthread_exit(0);
}

void
config_create_mountrootthreads(void)
{
        int i;

        if (!root_is_mounted)
                root_is_mounted = true;

        mountroot_config_lwpids_size = sizeof(mountroot_config_lwpids) *
                                       mountroot_config_threads;
        mountroot_config_lwpids = kmem_alloc(mountroot_config_lwpids_size,
                                             KM_NOSLEEP);
        KASSERT(mountroot_config_lwpids);
        for (i = 0; i < mountroot_config_threads; i++) {
                mountroot_config_lwpids[i] = 0;
                (void)kthread_create(PRI_NONE, KTHREAD_MUSTJOIN/* XXXSMP */,
                                     NULL, config_mountroot_thread, NULL,
                                     &mountroot_config_lwpids[i],
                                     "configroot");
        }
}

void
config_finalize_mountroot(void)
{
        int i, error;

        for (i = 0; i < mountroot_config_threads; i++) {
                if (mountroot_config_lwpids[i] == 0)
                        continue;

                error = kthread_join(mountroot_config_lwpids[i]);
                if (error)
                        printf("%s: thread %x joined with error %d\n",
                               __func__, i, error);
        }
        kmem_free(mountroot_config_lwpids, mountroot_config_lwpids_size);
}

/*
 * Announce device attach/detach to userland listeners.
 */

int
no_devmon_insert(const char *name, prop_dictionary_t p)
{

        return ENODEV;
}

static void
devmon_report_device(device_t dev, bool isattach)
{
        prop_dictionary_t ev, dict = device_properties(dev);
        const char *parent;
        const char *what;
        const char *where;
        device_t pdev = device_parent(dev);

        /* If currently no drvctl device, just return */
        if (devmon_insert_vec == no_devmon_insert)
                return;

        ev = prop_dictionary_create();
        if (ev == NULL)
                return;

        what = (isattach ? "device-attach" : "device-detach");
        parent = (pdev == NULL ? "root" : device_xname(pdev));
        if (prop_dictionary_get_string(dict, "location", &where)) {
                prop_dictionary_set_string(ev, "location", where);
                aprint_debug("ev: %s %s at %s in [%s]\n",
                    what, device_xname(dev), parent, where); 
        }
        if (!prop_dictionary_set_string(ev, "device", device_xname(dev)) ||
            !prop_dictionary_set_string(ev, "parent", parent)) {
                prop_object_release(ev);
                return;
        }

        if ((*devmon_insert_vec)(what, ev) != 0)
                prop_object_release(ev);
}

/*
 * Add a cfdriver to the system.
 */
int
config_cfdriver_attach(struct cfdriver *cd)
{
        struct cfdriver *lcd;

        /* Make sure this driver isn't already in the system. */
        LIST_FOREACH(lcd, &allcfdrivers, cd_list) {
                if (STREQ(lcd->cd_name, cd->cd_name))
                        return EEXIST;
        }

        LIST_INIT(&cd->cd_attach);
        LIST_INSERT_HEAD(&allcfdrivers, cd, cd_list);

        return 0;
}

/*
 * Remove a cfdriver from the system.
 */
int
config_cfdriver_detach(struct cfdriver *cd)
{
        struct alldevs_foray af;
        int i, rc = 0;

        config_alldevs_enter(&af);
        /* Make sure there are no active instances. */
        for (i = 0; i < cd->cd_ndevs; i++) {
                if (cd->cd_devs[i] != NULL) {
                        rc = EBUSY;
                        break;
                }
        }
        config_alldevs_exit(&af);

        if (rc != 0)
                return rc;

        /* ...and no attachments loaded. */
        if (LIST_EMPTY(&cd->cd_attach) == 0)
                return EBUSY;

        LIST_REMOVE(cd, cd_list);

        KASSERT(cd->cd_devs == NULL);

        return 0;
}

/*
 * Look up a cfdriver by name.
 */
struct cfdriver *
config_cfdriver_lookup(const char *name)
{
        struct cfdriver *cd;

        LIST_FOREACH(cd, &allcfdrivers, cd_list) {
                if (STREQ(cd->cd_name, name))
                        return cd;
        }

        return NULL;
}

/*
 * Add a cfattach to the specified driver.
 */
int
config_cfattach_attach(const char *driver, struct cfattach *ca)
{
        struct cfattach *lca;
        struct cfdriver *cd;

        cd = config_cfdriver_lookup(driver);
        if (cd == NULL)
                return ESRCH;

        /* Make sure this attachment isn't already on this driver. */
        LIST_FOREACH(lca, &cd->cd_attach, ca_list) {
                if (STREQ(lca->ca_name, ca->ca_name))
                        return EEXIST;
        }

        LIST_INSERT_HEAD(&cd->cd_attach, ca, ca_list);

        return 0;
}

/*
 * Remove a cfattach from the specified driver.
 */
int
config_cfattach_detach(const char *driver, struct cfattach *ca)
{
        struct alldevs_foray af;
        struct cfdriver *cd;
        device_t dev;
        int i, rc = 0;

        cd = config_cfdriver_lookup(driver);
        if (cd == NULL)
                return ESRCH;

        config_alldevs_enter(&af);
        /* Make sure there are no active instances. */
        for (i = 0; i < cd->cd_ndevs; i++) {
                if ((dev = cd->cd_devs[i]) == NULL)
                        continue;
                if (dev->dv_cfattach == ca) {
                        rc = EBUSY;
                        break;
                }
        }
        config_alldevs_exit(&af);

        if (rc != 0)
                return rc;

        LIST_REMOVE(ca, ca_list);

        return 0;
}

/*
 * Look up a cfattach by name.
 */
static struct cfattach *
config_cfattach_lookup_cd(struct cfdriver *cd, const char *atname)
{
        struct cfattach *ca;

        LIST_FOREACH(ca, &cd->cd_attach, ca_list) {
                if (STREQ(ca->ca_name, atname))
                        return ca;
        }

        return NULL;
}

/*
 * Look up a cfattach by driver/attachment name.
 */
struct cfattach *
config_cfattach_lookup(const char *name, const char *atname)
{
        struct cfdriver *cd;

        cd = config_cfdriver_lookup(name);
        if (cd == NULL)
                return NULL;

        return config_cfattach_lookup_cd(cd, atname);
}

/*
 * Apply the matching function and choose the best.  This is used
 * a few times and we want to keep the code small.
 */
static void
mapply(struct matchinfo *m, cfdata_t cf)
{
        int pri;

        if (m->fn != NULL) {
                pri = (*m->fn)(m->parent, cf, m->locs, m->aux);
        } else {
                pri = config_match(m->parent, cf, m->aux);
        }
        if (pri > m->pri) {
                m->match = cf;
                m->pri = pri;
        }
}

int
config_stdsubmatch(device_t parent, cfdata_t cf, const int *locs, void *aux)
{
        const struct cfiattrdata *ci;
        const struct cflocdesc *cl;
        int nlocs, i;

        ci = cfiattr_lookup(cfdata_ifattr(cf), parent->dv_cfdriver);
        KASSERT(ci);
        nlocs = ci->ci_loclen;
        KASSERT(!nlocs || locs);
        for (i = 0; i < nlocs; i++) {
                cl = &ci->ci_locdesc[i];
                if (cl->cld_defaultstr != NULL &&
                    cf->cf_loc[i] == cl->cld_default)
                        continue;
                if (cf->cf_loc[i] == locs[i])
                        continue;
                return 0;
        }

        return config_match(parent, cf, aux);
}

/*
 * Helper function: check whether the driver supports the interface attribute
 * and return its descriptor structure.
 */
static const struct cfiattrdata *
cfdriver_get_iattr(const struct cfdriver *cd, const char *ia)
{
        const struct cfiattrdata * const *cpp;

        if (cd->cd_attrs == NULL)
                return 0;

        for (cpp = cd->cd_attrs; *cpp; cpp++) {
                if (STREQ((*cpp)->ci_name, ia)) {
                        /* Match. */
                        return *cpp;
                }
        }
        return 0;
}

static int __diagused
cfdriver_iattr_count(const struct cfdriver *cd)
{
        const struct cfiattrdata * const *cpp;
        int i;

        if (cd->cd_attrs == NULL)
                return 0;

        for (i = 0, cpp = cd->cd_attrs; *cpp; cpp++) {
                i++;
        }
        return i;
}

/*
 * Lookup an interface attribute description by name.
 * If the driver is given, consider only its supported attributes.
 */
const struct cfiattrdata *
cfiattr_lookup(const char *name, const struct cfdriver *cd)
{
        const struct cfdriver *d;
        const struct cfiattrdata *ia;

        if (cd)
                return cfdriver_get_iattr(cd, name);

        LIST_FOREACH(d, &allcfdrivers, cd_list) {
                ia = cfdriver_get_iattr(d, name);
                if (ia)
                        return ia;
        }
        return 0;
}

/*
 * Determine if `parent' is a potential parent for a device spec based
 * on `cfp'.
 */
static int
cfparent_match(const device_t parent, const struct cfparent *cfp)
{
        struct cfdriver *pcd;

        /* We don't match root nodes here. */
        if (cfp == NULL)
                return 0;

        pcd = parent->dv_cfdriver;
        KASSERT(pcd != NULL);

        /*
         * First, ensure this parent has the correct interface
         * attribute.
         */
        if (!cfdriver_get_iattr(pcd, cfp->cfp_iattr))
                return 0;

        /*
         * If no specific parent device instance was specified (i.e.
         * we're attaching to the attribute only), we're done!
         */
        if (cfp->cfp_parent == NULL)
                return 1;

        /*
         * Check the parent device's name.
         */
        if (STREQ(pcd->cd_name, cfp->cfp_parent) == 0)
                return 0;        /* not the same parent */

        /*
         * Make sure the unit number matches.
         */
        if (cfp->cfp_unit == DVUNIT_ANY ||        /* wildcard */
            cfp->cfp_unit == parent->dv_unit)
                return 1;

        /* Unit numbers don't match. */
        return 0;
}

/*
 * Helper for config_cfdata_attach(): check all devices whether it could be
 * parent any attachment in the config data table passed, and rescan.
 */
static void
rescan_with_cfdata(const struct cfdata *cf)
{
        device_t d;
        const struct cfdata *cf1;
        deviter_t di;

        KASSERT(KERNEL_LOCKED_P());

        /*
         * "alldevs" is likely longer than a modules's cfdata, so make it
         * the outer loop.
         */
        for (d = deviter_first(&di, 0); d != NULL; d = deviter_next(&di)) {

                if (!(d->dv_cfattach->ca_rescan))
                        continue;

                for (cf1 = cf; cf1->cf_name; cf1++) {

                        if (!cfparent_match(d, cf1->cf_pspec))
                                continue;

                        (*d->dv_cfattach->ca_rescan)(d,
                                cfdata_ifattr(cf1), cf1->cf_loc);

                        config_deferred(d);
                }
        }
        deviter_release(&di);
}

/*
 * Attach a supplemental config data table and rescan potential
 * parent devices if required.
 */
int
config_cfdata_attach(cfdata_t cf, int scannow)
{
        struct cftable *ct;

        KERNEL_LOCK(1, NULL);

        ct = kmem_alloc(sizeof(*ct), KM_SLEEP);
        ct->ct_cfdata = cf;
        TAILQ_INSERT_TAIL(&allcftables, ct, ct_list);

        if (scannow)
                rescan_with_cfdata(cf);

        KERNEL_UNLOCK_ONE(NULL);

        return 0;
}

/*
 * Helper for config_cfdata_detach: check whether a device is
 * found through any attachment in the config data table.
 */
static int
dev_in_cfdata(device_t d, cfdata_t cf)
{
        const struct cfdata *cf1;

        for (cf1 = cf; cf1->cf_name; cf1++)
                if (d->dv_cfdata == cf1)
                        return 1;

        return 0;
}

/*
 * Detach a supplemental config data table. Detach all devices found
 * through that table (and thus keeping references to it) before.
 */
int
config_cfdata_detach(cfdata_t cf)
{
        device_t d;
        int error = 0;
        struct cftable *ct;
        deviter_t di;

        KERNEL_LOCK(1, NULL);

        for (d = deviter_first(&di, DEVITER_F_RW); d != NULL;
             d = deviter_next(&di)) {
                if (!dev_in_cfdata(d, cf))
                        continue;
                if ((error = config_detach(d, 0)) != 0)
                        break;
        }
        deviter_release(&di);
        if (error) {
                aprint_error_dev(d, "unable to detach instance\n");
                goto out;
        }

        TAILQ_FOREACH(ct, &allcftables, ct_list) {
                if (ct->ct_cfdata == cf) {
                        TAILQ_REMOVE(&allcftables, ct, ct_list);
                        kmem_free(ct, sizeof(*ct));
                        error = 0;
                        goto out;
                }
        }

        /* not found -- shouldn't happen */
        error = EINVAL;

out:        KERNEL_UNLOCK_ONE(NULL);
        return error;
}

/*
 * Invoke the "match" routine for a cfdata entry on behalf of
 * an external caller, usually a direct config "submatch" routine.
 */
int
config_match(device_t parent, cfdata_t cf, void *aux)
{
        struct cfattach *ca;

        KASSERT(KERNEL_LOCKED_P());

        ca = config_cfattach_lookup(cf->cf_name, cf->cf_atname);
        if (ca == NULL) {
                /* No attachment for this entry, oh well. */
                return 0;
        }

        return (*ca->ca_match)(parent, cf, aux);
}

/*
 * Invoke the "probe" routine for a cfdata entry on behalf of
 * an external caller, usually an indirect config "search" routine.
 */
int
config_probe(device_t parent, cfdata_t cf, void *aux)
{
        /*
         * This is currently a synonym for config_match(), but this
         * is an implementation detail; "match" and "probe" routines
         * have different behaviors.
         *
         * XXX config_probe() should return a bool, because there is
         * XXX no match score for probe -- it's either there or it's
         * XXX not, but some ports abuse the return value as a way
         * XXX to attach "critical" devices before "non-critical"
         * XXX devices.
         */
        return config_match(parent, cf, aux);
}

static struct cfargs_internal *
cfargs_canonicalize(const struct cfargs * const cfargs,
    struct cfargs_internal * const store)
{
        struct cfargs_internal *args = store;

        memset(args, 0, sizeof(*args));

        /* If none specified, are all-NULL pointers are good. */
        if (cfargs == NULL) {
                return args;
        }

        /*
         * Only one arguments version is recognized at this time.
         */
        if (cfargs->cfargs_version != CFARGS_VERSION) {
                panic("cfargs_canonicalize: unknown version %lu\n",
                    (unsigned long)cfargs->cfargs_version);
        }

        /*
         * submatch and search are mutually-exclusive.
         */
        if (cfargs->submatch != NULL && cfargs->search != NULL) {
                panic("cfargs_canonicalize: submatch and search are "
                      "mutually-exclusive");
        }
        if (cfargs->submatch != NULL) {
                args->submatch = cfargs->submatch;
        } else if (cfargs->search != NULL) {
                args->search = cfargs->search;
        }

        args->iattr = cfargs->iattr;
        args->locators = cfargs->locators;
        args->devhandle = cfargs->devhandle;

        return args;
}

/*
 * Iterate over all potential children of some device, calling the given
 * function (default being the child's match function) for each one.
 * Nonzero returns are matches; the highest value returned is considered
 * the best match.  Return the `found child' if we got a match, or NULL
 * otherwise.  The `aux' pointer is simply passed on through.
 *
 * Note that this function is designed so that it can be used to apply
 * an arbitrary function to all potential children (its return value
 * can be ignored).
 */
static cfdata_t
config_search_internal(device_t parent, void *aux,
    const struct cfargs_internal * const args)
{
        struct cftable *ct;
        cfdata_t cf;
        struct matchinfo m;

        KASSERT(config_initialized);
        KASSERT(!args->iattr ||
                cfdriver_get_iattr(parent->dv_cfdriver, args->iattr));
        KASSERT(args->iattr ||
                cfdriver_iattr_count(parent->dv_cfdriver) < 2);

        m.fn = args->submatch;                /* N.B. union */
        m.parent = parent;
        m.locs = args->locators;
        m.aux = aux;
        m.match = NULL;
        m.pri = 0;

        TAILQ_FOREACH(ct, &allcftables, ct_list) {
                for (cf = ct->ct_cfdata; cf->cf_name; cf++) {

                        /* We don't match root nodes here. */
                        if (!cf->cf_pspec)
                                continue;

                        /*
                         * Skip cf if no longer eligible, otherwise scan
                         * through parents for one matching `parent', and
                         * try match function.
                         */
                        if (cf->cf_fstate == FSTATE_FOUND)
                                continue;
                        if (cf->cf_fstate == FSTATE_DNOTFOUND ||
                            cf->cf_fstate == FSTATE_DSTAR)
                                continue;

                        /*
                         * If an interface attribute was specified,
                         * consider only children which attach to
                         * that attribute.
                         */
                        if (args->iattr != NULL &&
                            !STREQ(args->iattr, cfdata_ifattr(cf)))
                                continue;

                        if (cfparent_match(parent, cf->cf_pspec))
                                mapply(&m, cf);
                }
        }
        rnd_add_uint32(&rnd_autoconf_source, 0);
        return m.match;
}

cfdata_t
config_search(device_t parent, void *aux, const struct cfargs *cfargs)
{
        cfdata_t cf;
        struct cfargs_internal store;

        cf = config_search_internal(parent, aux,
            cfargs_canonicalize(cfargs, &store));

        return cf;
}

/*
 * Find the given root device.
 * This is much like config_search, but there is no parent.
 * Don't bother with multiple cfdata tables; the root node
 * must always be in the initial table.
 */
cfdata_t
config_rootsearch(cfsubmatch_t fn, const char *rootname, void *aux)
{
        cfdata_t cf;
        const short *p;
        struct matchinfo m;

        m.fn = fn;
        m.parent = ROOT;
        m.aux = aux;
        m.match = NULL;
        m.pri = 0;
        m.locs = 0;
        /*
         * Look at root entries for matching name.  We do not bother
         * with found-state here since only one root should ever be
         * searched (and it must be done first).
         */
        for (p = cfroots; *p >= 0; p++) {
                cf = &cfdata[*p];
                if (strcmp(cf->cf_name, rootname) == 0)
                        mapply(&m, cf);
        }
        return m.match;
}

static const char * const msgs[] = {
[QUIET]                =        "",
[UNCONF]        =        " not configured\n",
[UNSUPP]        =        " unsupported\n",
};

/*
 * The given `aux' argument describes a device that has been found
 * on the given parent, but not necessarily configured.  Locate the
 * configuration data for that device (using the submatch function
 * provided, or using candidates' cd_match configuration driver
 * functions) and attach it, and return its device_t.  If the device was
 * not configured, call the given `print' function and return NULL.
 */
device_t
config_found(device_t parent, void *aux, cfprint_t print,
    const struct cfargs * const cfargs)
{
        cfdata_t cf;
        struct cfargs_internal store;
        const struct cfargs_internal * const args =
            cfargs_canonicalize(cfargs, &store);

        cf = config_search_internal(parent, aux, args);
        if (cf != NULL) {
                return config_attach_internal(parent, cf, aux, print, args);
        }

        if (print) {
                if (config_do_twiddle && cold)
                        twiddle();

                const int pret = (*print)(aux, device_xname(parent));
                KASSERT(pret >= 0);
                KASSERT(pret < __arraycount(msgs));
                KASSERT(msgs[pret] != NULL);
                aprint_normal("%s", msgs[pret]);
        }

        return NULL;
}

/*
 * As above, but for root devices.
 */
device_t
config_rootfound(const char *rootname, void *aux)
{
        cfdata_t cf;
        device_t dev = NULL;

        KERNEL_LOCK(1, NULL);
        if ((cf = config_rootsearch(NULL, rootname, aux)) != NULL)
                dev = config_attach(ROOT, cf, aux, NULL, CFARGS_NONE);
        else
                aprint_error("root device %s not configured\n", rootname);
        KERNEL_UNLOCK_ONE(NULL);
        return dev;
}

/* just like sprintf(buf, "%d") except that it works from the end */
static char *
number(char *ep, int n)
{

        *--ep = 0;
        while (n >= 10) {
                *--ep = (n % 10) + '0';
                n /= 10;
        }
        *--ep = n + '0';
        return ep;
}

/*
 * Expand the size of the cd_devs array if necessary.
 *
 * The caller must hold alldevs_lock. config_makeroom() may release and
 * re-acquire alldevs_lock, so callers should re-check conditions such
 * as alldevs_nwrite == 0 and alldevs_nread == 0 when config_makeroom()
 * returns.
 */
static void
config_makeroom(int n, struct cfdriver *cd)
{
        int ondevs, nndevs;
        device_t *osp, *nsp;

        KASSERT(mutex_owned(&alldevs_lock));
        alldevs_nwrite++;

        for (nndevs = MAX(4, cd->cd_ndevs); nndevs <= n; nndevs += nndevs)
                ;

        while (n >= cd->cd_ndevs) {
                /*
                 * Need to expand the array.
                 */
                ondevs = cd->cd_ndevs;
                osp = cd->cd_devs;

                /*
                 * Release alldevs_lock around allocation, which may
                 * sleep.
                 */
                mutex_exit(&alldevs_lock);
                nsp = kmem_alloc(sizeof(device_t) * nndevs, KM_SLEEP);
                mutex_enter(&alldevs_lock);

                /*
                 * If another thread moved the array while we did
                 * not hold alldevs_lock, try again.
                 */
                if (cd->cd_devs != osp) {
                        mutex_exit(&alldevs_lock);
                        kmem_free(nsp, sizeof(device_t) * nndevs);
                        mutex_enter(&alldevs_lock);
                        continue;
                }

                memset(nsp + ondevs, 0, sizeof(device_t) * (nndevs - ondevs));
                if (ondevs != 0)
                        memcpy(nsp, cd->cd_devs, sizeof(device_t) * ondevs);

                cd->cd_ndevs = nndevs;
                cd->cd_devs = nsp;
                if (ondevs != 0) {
                        mutex_exit(&alldevs_lock);
                        kmem_free(osp, sizeof(device_t) * ondevs);
                        mutex_enter(&alldevs_lock);
                }
        }
        KASSERT(mutex_owned(&alldevs_lock));
        alldevs_nwrite--;
}

/*
 * Put dev into the devices list.
 */
static void
config_devlink(device_t dev)
{

        mutex_enter(&alldevs_lock);

        KASSERT(device_cfdriver(dev)->cd_devs[dev->dv_unit] == dev);

        dev->dv_add_gen = alldevs_gen;
        /* It is safe to add a device to the tail of the list while
         * readers and writers are in the list.
         */
        TAILQ_INSERT_TAIL(&alldevs, dev, dv_list);
        mutex_exit(&alldevs_lock);
}

static void
config_devfree(device_t dev)
{

        KASSERT(dev->dv_flags & DVF_PRIV_ALLOC);
        KASSERTMSG(dev->dv_pending == 0, "%d", dev->dv_pending);

        if (dev->dv_cfattach->ca_devsize > 0)
                kmem_free(dev->dv_private, dev->dv_cfattach->ca_devsize);
        kmem_free(dev, sizeof(*dev));
}

/*
 * Caller must hold alldevs_lock.
 */
static void
config_devunlink(device_t dev, struct devicelist *garbage)
{
        struct device_garbage *dg = &dev->dv_garbage;
        cfdriver_t cd = device_cfdriver(dev);
        int i;

        KASSERT(mutex_owned(&alldevs_lock));
        KASSERTMSG(dev->dv_pending == 0, "%d", dev->dv_pending);

         /* Unlink from device list.  Link to garbage list. */
        TAILQ_REMOVE(&alldevs, dev, dv_list);
        TAILQ_INSERT_TAIL(garbage, dev, dv_list);

        /* Remove from cfdriver's array. */
        cd->cd_devs[dev->dv_unit] = NULL;

        /*
         * If the device now has no units in use, unlink its softc array.
         */
        for (i = 0; i < cd->cd_ndevs; i++) {
                if (cd->cd_devs[i] != NULL)
                        break;
        }
        /* Nothing found.  Unlink, now.  Deallocate, later. */
        if (i == cd->cd_ndevs) {
                dg->dg_ndevs = cd->cd_ndevs;
                dg->dg_devs = cd->cd_devs;
                cd->cd_devs = NULL;
                cd->cd_ndevs = 0;
        }
}

static void
config_devdelete(device_t dev)
{
        struct device_garbage *dg = &dev->dv_garbage;
        device_lock_t dvl = device_getlock(dev);

        KASSERTMSG(dev->dv_pending == 0, "%d", dev->dv_pending);

        if (dg->dg_devs != NULL)
                kmem_free(dg->dg_devs, sizeof(device_t) * dg->dg_ndevs);

        localcount_fini(dev->dv_localcount);
        kmem_free(dev->dv_localcount, sizeof(*dev->dv_localcount));

        cv_destroy(&dvl->dvl_cv);
        mutex_destroy(&dvl->dvl_mtx);

        KASSERT(dev->dv_properties != NULL);
        prop_object_release(dev->dv_properties);

        if (dev->dv_activity_handlers)
                panic("%s with registered handlers", __func__);

        if (dev->dv_locators) {
                size_t amount = *--dev->dv_locators;
                kmem_free(dev->dv_locators, amount);
        }

        config_devfree(dev);
}

static int
config_unit_nextfree(cfdriver_t cd, cfdata_t cf)
{
        int unit = cf->cf_unit;

        if (unit < 0)
                return -1;
        if (cf->cf_fstate == FSTATE_STAR) {
                for (; unit < cd->cd_ndevs; unit++)
                        if (cd->cd_devs[unit] == NULL)
                                break;
                /*
                 * unit is now the unit of the first NULL device pointer,
                 * or max(cd->cd_ndevs,cf->cf_unit).
                 */
        } else {
                if (unit < cd->cd_ndevs && cd->cd_devs[unit] != NULL)
                        unit = -1;
        }
        return unit;
}

static int
config_unit_alloc(device_t dev, cfdriver_t cd, cfdata_t cf)
{
        struct alldevs_foray af;
        int unit;

        config_alldevs_enter(&af);
        for (;;) {
                unit = config_unit_nextfree(cd, cf);
                if (unit == -1)
                        break;
                if (unit < cd->cd_ndevs) {
                        cd->cd_devs[unit] = dev;
                        dev->dv_unit = unit;
                        break;
                }
                config_makeroom(unit, cd);
        }
        config_alldevs_exit(&af);

        return unit;
}

static device_t
config_devalloc(const device_t parent, const cfdata_t cf,
    const struct cfargs_internal * const args)
{
        cfdriver_t cd;
        cfattach_t ca;
        size_t lname, lunit;
        const char *xunit;
        int myunit;
        char num[10];
        device_t dev;
        void *dev_private;
        const struct cfiattrdata *ia;
        device_lock_t dvl;

        cd = config_cfdriver_lookup(cf->cf_name);
        if (cd == NULL)
                return NULL;

        ca = config_cfattach_lookup_cd(cd, cf->cf_atname);
        if (ca == NULL)
                return NULL;

        /* get memory for all device vars */
        KASSERT(ca->ca_flags & DVF_PRIV_ALLOC);
        if (ca->ca_devsize > 0) {
                dev_private = kmem_zalloc(ca->ca_devsize, KM_SLEEP);
        } else {
                dev_private = NULL;
        }
        dev = kmem_zalloc(sizeof(*dev), KM_SLEEP);

        dev->dv_handle = args->devhandle;

        dev->dv_class = cd->cd_class;
        dev->dv_cfdata = cf;
        dev->dv_cfdriver = cd;
        dev->dv_cfattach = ca;
        dev->dv_activity_count = 0;
        dev->dv_activity_handlers = NULL;
        dev->dv_private = dev_private;
        dev->dv_flags = ca->ca_flags;        /* inherit flags from class */
        dev->dv_attaching = curlwp;

        myunit = config_unit_alloc(dev, cd, cf);
        if (myunit == -1) {
                config_devfree(dev);
                return NULL;
        }

        /* compute length of name and decimal expansion of unit number */
        lname = strlen(cd->cd_name);
        xunit = number(&num[sizeof(num)], myunit);
        lunit = &num[sizeof(num)] - xunit;
        if (lname + lunit > sizeof(dev->dv_xname))
                panic("config_devalloc: device name too long");

        dvl = device_getlock(dev);

        mutex_init(&dvl->dvl_mtx, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&dvl->dvl_cv, "pmfsusp");

        memcpy(dev->dv_xname, cd->cd_name, lname);
        memcpy(dev->dv_xname + lname, xunit, lunit);
        dev->dv_parent = parent;
        if (parent != NULL)
                dev->dv_depth = parent->dv_depth + 1;
        else
                dev->dv_depth = 0;
        dev->dv_flags |= DVF_ACTIVE;        /* always initially active */
        if (args->locators) {
                KASSERT(parent); /* no locators at root */
                ia = cfiattr_lookup(cfdata_ifattr(cf), parent->dv_cfdriver);
                dev->dv_locators =
                    kmem_alloc(sizeof(int) * (ia->ci_loclen + 1), KM_SLEEP);
                *dev->dv_locators++ = sizeof(int) * (ia->ci_loclen + 1);
                memcpy(dev->dv_locators, args->locators,
                    sizeof(int) * ia->ci_loclen);
        }
        dev->dv_properties = prop_dictionary_create();
        KASSERT(dev->dv_properties != NULL);

        prop_dictionary_set_string_nocopy(dev->dv_properties,
            "device-driver", dev->dv_cfdriver->cd_name);
        prop_dictionary_set_uint16(dev->dv_properties,
            "device-unit", dev->dv_unit);
        if (parent != NULL) {
                prop_dictionary_set_string(dev->dv_properties,
                    "device-parent", device_xname(parent));
        }

        dev->dv_localcount = kmem_zalloc(sizeof(*dev->dv_localcount),
            KM_SLEEP);
        localcount_init(dev->dv_localcount);

        if (dev->dv_cfdriver->cd_attrs != NULL)
                config_add_attrib_dict(dev);

        return dev;
}

/*
 * Create an array of device attach attributes and add it
 * to the device's dv_properties dictionary.
 *
 * <key>interface-attributes</key>
 * <array>
 *    <dict>
 *       <key>attribute-name</key>
 *       <string>foo</string>
 *       <key>locators</key>
 *       <array>
 *          <dict>
 *             <key>loc-name</key>
 *             <string>foo-loc1</string>
 *          </dict>
 *          <dict>
 *             <key>loc-name</key>
 *             <string>foo-loc2</string>
 *             <key>default</key>
 *             <string>foo-loc2-default</string>
 *          </dict>
 *          ...
 *       </array>
 *    </dict>
 *    ...
 * </array>
 */

static void
config_add_attrib_dict(device_t dev)
{
        int i, j;
        const struct cfiattrdata *ci;
        prop_dictionary_t attr_dict, loc_dict;
        prop_array_t attr_array, loc_array;

        if ((attr_array = prop_array_create()) == NULL)
                return;

        for (i = 0; ; i++) {
                if ((ci = dev->dv_cfdriver->cd_attrs[i]) == NULL)
                        break;
                if ((attr_dict = prop_dictionary_create()) == NULL)
                        break;
                prop_dictionary_set_string_nocopy(attr_dict, "attribute-name",
                    ci->ci_name);

                /* Create an array of the locator names and defaults */

                if (ci->ci_loclen != 0 &&
                    (loc_array = prop_array_create()) != NULL) {
                        for (j = 0; j < ci->ci_loclen; j++) {
                                loc_dict = prop_dictionary_create();
                                if (loc_dict == NULL)
                                        continue;
                                prop_dictionary_set_string_nocopy(loc_dict,
                                    "loc-name", ci->ci_locdesc[j].cld_name);
                                if (ci->ci_locdesc[j].cld_defaultstr != NULL)
                                        prop_dictionary_set_string_nocopy(
                                            loc_dict, "default",
                                            ci->ci_locdesc[j].cld_defaultstr);
                                prop_array_set(loc_array, j, loc_dict);
                                prop_object_release(loc_dict);
                        }
                        prop_dictionary_set_and_rel(attr_dict, "locators",
                            loc_array);
                }
                prop_array_add(attr_array, attr_dict);
                prop_object_release(attr_dict);
        }
        if (i == 0)
                prop_object_release(attr_array);
        else
                prop_dictionary_set_and_rel(dev->dv_properties,
                    "interface-attributes", attr_array);

        return;
}

/*
 * Attach a found device.
 */
static device_t
config_attach_internal(device_t parent, cfdata_t cf, void *aux, cfprint_t print,
    const struct cfargs_internal * const args)
{
        device_t dev;
        struct cftable *ct;
        const char *drvname;
        bool deferred;

        KASSERT(KERNEL_LOCKED_P());

        dev = config_devalloc(parent, cf, args);
        if (!dev)
                panic("config_attach: allocation of device softc failed");

        /* XXX redundant - see below? */
        if (cf->cf_fstate != FSTATE_STAR) {
                KASSERT(cf->cf_fstate == FSTATE_NOTFOUND);
                cf->cf_fstate = FSTATE_FOUND;
        }

        config_devlink(dev);

        if (config_do_twiddle && cold)
                twiddle();
        else
                aprint_naive("Found ");
        /*
         * We want the next two printfs for normal, verbose, and quiet,
         * but not silent (in which case, we're twiddling, instead).
         */
        if (parent == ROOT) {
                aprint_naive("%s (root)", device_xname(dev));
                aprint_normal("%s (root)", device_xname(dev));
        } else {
                aprint_naive("%s at %s", device_xname(dev),
                    device_xname(parent));
                aprint_normal("%s at %s", device_xname(dev),
                    device_xname(parent));
                if (print)
                        (void) (*print)(aux, NULL);
        }

        /*
         * Before attaching, clobber any unfound devices that are
         * otherwise identical.
         * XXX code above is redundant?
         */
        drvname = dev->dv_cfdriver->cd_name;
        TAILQ_FOREACH(ct, &allcftables, ct_list) {
                for (cf = ct->ct_cfdata; cf->cf_name; cf++) {
                        if (STREQ(cf->cf_name, drvname) &&
                            cf->cf_unit == dev->dv_unit) {
                                if (cf->cf_fstate == FSTATE_NOTFOUND)
                                        cf->cf_fstate = FSTATE_FOUND;
                        }
                }
        }
        device_register(dev, aux);

        /* Let userland know */
        devmon_report_device(dev, true);

        /*
         * Prevent detach until the driver's attach function, and all
         * deferred actions, have finished.
         */
        config_pending_incr(dev);

        /* Call the driver's attach function.  */
        (*dev->dv_cfattach->ca_attach)(parent, dev, aux);

        /*
         * Allow other threads to acquire references to the device now
         * that the driver's attach function is done.
         */
        mutex_enter(&config_misc_lock);
        KASSERT(dev->dv_attaching == curlwp);
        dev->dv_attaching = NULL;
        cv_broadcast(&config_misc_cv);
        mutex_exit(&config_misc_lock);

        /*
         * Synchronous parts of attach are done.  Allow detach, unless
         * the driver's attach function scheduled deferred actions.
         */
        config_pending_decr(dev);

        mutex_enter(&config_misc_lock);
        deferred = (dev->dv_pending != 0);
        mutex_exit(&config_misc_lock);

        if (!deferred && !device_pmf_is_registered(dev))
                aprint_debug_dev(dev,
                    "WARNING: power management not supported\n");

        config_process_deferred(&deferred_config_queue, dev);

        device_register_post_config(dev, aux);
        rnd_add_uint32(&rnd_autoconf_source, 0);
        return dev;
}

device_t
config_attach(device_t parent, cfdata_t cf, void *aux, cfprint_t print,
    const struct cfargs *cfargs)
{
        struct cfargs_internal store;

        KASSERT(KERNEL_LOCKED_P());

        return config_attach_internal(parent, cf, aux, print,
            cfargs_canonicalize(cfargs, &store));
}

/*
 * As above, but for pseudo-devices.  Pseudo-devices attached in this
 * way are silently inserted into the device tree, and their children
 * attached.
 *
 * Note that because pseudo-devices are attached silently, any information
 * the attach routine wishes to print should be prefixed with the device
 * name by the attach routine.
 */
device_t
config_attach_pseudo(cfdata_t cf)
{
        device_t dev;

        KERNEL_LOCK(1, NULL);

        struct cfargs_internal args = { };
        dev = config_devalloc(ROOT, cf, &args);
        if (!dev)
                goto out;

        /* XXX mark busy in cfdata */

        if (cf->cf_fstate != FSTATE_STAR) {
                KASSERT(cf->cf_fstate == FSTATE_NOTFOUND);
                cf->cf_fstate = FSTATE_FOUND;
        }

        config_devlink(dev);

#if 0        /* XXXJRT not yet */
        device_register(dev, NULL);        /* like a root node */
#endif

        /* Let userland know */
        devmon_report_device(dev, true);

        /*
         * Prevent detach until the driver's attach function, and all
         * deferred actions, have finished.
         */
        config_pending_incr(dev);

        /* Call the driver's attach function.  */
        (*dev->dv_cfattach->ca_attach)(ROOT, dev, NULL);

        /*
         * Allow other threads to acquire references to the device now
         * that the driver's attach function is done.
         */
        mutex_enter(&config_misc_lock);
        KASSERT(dev->dv_attaching == curlwp);
        dev->dv_attaching = NULL;
        cv_broadcast(&config_misc_cv);
        mutex_exit(&config_misc_lock);

        /*
         * Synchronous parts of attach are done.  Allow detach, unless
         * the driver's attach function scheduled deferred actions.
         */
        config_pending_decr(dev);

        config_process_deferred(&deferred_config_queue, dev);

out:        KERNEL_UNLOCK_ONE(NULL);
        return dev;
}

/*
 * Caller must hold alldevs_lock.
 */
static void
config_collect_garbage(struct devicelist *garbage)
{
        device_t dv;

        KASSERT(!cpu_intr_p());
        KASSERT(!cpu_softintr_p());
        KASSERT(mutex_owned(&alldevs_lock));

        while (alldevs_nwrite == 0 && alldevs_nread == 0 && alldevs_garbage) {
                TAILQ_FOREACH(dv, &alldevs, dv_list) {
                        if (dv->dv_del_gen != 0)
                                break;
                }
                if (dv == NULL) {
                        alldevs_garbage = false;
                        break;
                }
                config_devunlink(dv, garbage);
        }
        KASSERT(mutex_owned(&alldevs_lock));
}

static void
config_dump_garbage(struct devicelist *garbage)
{
        device_t dv;

        while ((dv = TAILQ_FIRST(garbage)) != NULL) {
                TAILQ_REMOVE(garbage, dv, dv_list);
                config_devdelete(dv);
        }
}

static int
config_detach_enter(device_t dev)
{
        struct lwp *l __diagused;
        int error = 0;

        mutex_enter(&config_misc_lock);

        /*
         * Wait until attach has fully completed, and until any
         * concurrent detach (e.g., drvctl racing with USB event
         * thread) has completed.
         *
         * Caller must hold alldevs_nread or alldevs_nwrite (e.g., via
         * deviter) to ensure the winner of the race doesn't free the
         * device leading the loser of the race into use-after-free.
         *
         * XXX Not all callers do this!
         */
        while (dev->dv_pending || dev->dv_detaching) {
                KASSERTMSG(dev->dv_detaching != curlwp,
                    "recursively detaching %s", device_xname(dev));
                error = cv_wait_sig(&config_misc_cv, &config_misc_lock);
                if (error)
                        goto out;
        }

        /*
         * Attach has completed, and no other concurrent detach is
         * running.  Claim the device for detaching.  This will cause
         * all new attempts to acquire references to block.
         */
        KASSERTMSG((l = dev->dv_attaching) == NULL,
            "lwp %ld [%s] @ %p attaching",
            (long)l->l_lid, (l->l_name ? l->l_name : l->l_proc->p_comm), l);
        KASSERTMSG((l = dev->dv_detaching) == NULL,
            "lwp %ld [%s] @ %p detaching",
            (long)l->l_lid, (l->l_name ? l->l_name : l->l_proc->p_comm), l);
        dev->dv_detaching = curlwp;

out:        mutex_exit(&config_misc_lock);
        return error;
}

static void
config_detach_exit(device_t dev)
{
        struct lwp *l __diagused;

        mutex_enter(&config_misc_lock);
        KASSERTMSG((l = dev->dv_detaching) == curlwp,
            "lwp %ld [%s] @ %p detaching",
            (long)l->l_lid, (l->l_name ? l->l_name : l->l_proc->p_comm), l);
        dev->dv_detaching = NULL;
        cv_broadcast(&config_misc_cv);
        mutex_exit(&config_misc_lock);
}

/*
 * Detach a device.  Optionally forced (e.g. because of hardware
 * removal) and quiet.  Returns zero if successful, non-zero
 * (an error code) otherwise.
 *
 * Note that this code wants to be run from a process context, so
 * that the detach can sleep to allow processes which have a device
 * open to run and unwind their stacks.
 */
int
config_detach(device_t dev, int flags)
{
        struct alldevs_foray af;
        struct cftable *ct;
        cfdata_t cf;
        const struct cfattach *ca;
        struct cfdriver *cd;
        device_t d __diagused;
        int rv = 0;

        KERNEL_LOCK(1, NULL);

        cf = dev->dv_cfdata;
        KASSERTMSG((cf == NULL || cf->cf_fstate == FSTATE_FOUND ||
                cf->cf_fstate == FSTATE_STAR),
            "config_detach: %s: bad device fstate: %d",
            device_xname(dev), cf ? cf->cf_fstate : -1);

        cd = dev->dv_cfdriver;
        KASSERT(cd != NULL);

        ca = dev->dv_cfattach;
        KASSERT(ca != NULL);

        /*
         * Only one detach at a time, please -- and not until fully
         * attached.
         */
        rv = config_detach_enter(dev);
        if (rv) {
                KERNEL_UNLOCK_ONE(NULL);
                return rv;
        }

        mutex_enter(&alldevs_lock);
        if (dev->dv_del_gen != 0) {
                mutex_exit(&alldevs_lock);
#ifdef DIAGNOSTIC
                printf("%s: %s is already detached\n", __func__,
                    device_xname(dev));
#endif /* DIAGNOSTIC */
                config_detach_exit(dev);
                KERNEL_UNLOCK_ONE(NULL);
                return ENOENT;
        }
        alldevs_nwrite++;
        mutex_exit(&alldevs_lock);

        /*
         * Call the driver's .ca_detach function, unless it has none or
         * we are skipping it because it's unforced shutdown time and
         * the driver didn't ask to detach on shutdown.
         */
        if (!detachall &&
            (flags & (DETACH_SHUTDOWN|DETACH_FORCE)) == DETACH_SHUTDOWN &&
            (dev->dv_flags & DVF_DETACH_SHUTDOWN) == 0) {
                rv = EOPNOTSUPP;
        } else if (ca->ca_detach != NULL) {
                rv = (*ca->ca_detach)(dev, flags);
        } else
                rv = EOPNOTSUPP;

        /*
         * If it was not possible to detach the device, then we either
         * panic() (for the forced but failed case), or return an error.
         */
        if (rv) {
                /*
                 * Detach failed -- likely EOPNOTSUPP or EBUSY.  Driver
                 * must not have called config_detach_commit.
                 */
                KASSERTMSG(!dev->dv_detached,
                    "%s committed to detaching and then backed out",
                    device_xname(dev));
                if (flags & DETACH_FORCE) {
                        panic("config_detach: forced detach of %s failed (%d)",
                            device_xname(dev), rv);
                }
                goto out;
        }

        /*
         * The device has now been successfully detached.
         */

        /*
         * If .ca_detach didn't commit to detach, then do that for it.
         * This wakes any pending device_lookup_acquire calls so they
         * will fail.
         */
        config_detach_commit(dev);

        /*
         * If it was possible to detach the device, ensure that the
         * device is deactivated.
         */
        dev->dv_flags &= ~DVF_ACTIVE; /* XXXSMP */

        /*
         * Wait for all device_lookup_acquire references -- mostly, for
         * all attempts to open the device -- to drain.  It is the
         * responsibility of .ca_detach to ensure anything with open
         * references will be interrupted and release them promptly,
         * not block indefinitely.  All new attempts to acquire
         * references will fail, as config_detach_commit has arranged
         * by now.
         */
        mutex_enter(&config_misc_lock);
        localcount_drain(dev->dv_localcount,
            &config_misc_cv, &config_misc_lock);
        mutex_exit(&config_misc_lock);

        /* Let userland know */
        devmon_report_device(dev, false);

#ifdef DIAGNOSTIC
        /*
         * Sanity: If you're successfully detached, you should have no
         * children.  (Note that because children must be attached
         * after parents, we only need to search the latter part of
         * the list.)
         */
        mutex_enter(&alldevs_lock);
        for (d = TAILQ_NEXT(dev, dv_list); d != NULL;
            d = TAILQ_NEXT(d, dv_list)) {
                if (d->dv_parent == dev && d->dv_del_gen == 0) {
                        printf("config_detach: detached device %s"
                            " has children %s\n", device_xname(dev),
                            device_xname(d));
                        panic("config_detach");
                }
        }
        mutex_exit(&alldevs_lock);
#endif

        /* notify the parent that the child is gone */
        if (dev->dv_parent) {
                device_t p = dev->dv_parent;
                if (p->dv_cfattach->ca_childdetached)
                        (*p->dv_cfattach->ca_childdetached)(p, dev);
        }

        /*
         * Mark cfdata to show that the unit can be reused, if possible.
         */
        TAILQ_FOREACH(ct, &allcftables, ct_list) {
                for (cf = ct->ct_cfdata; cf->cf_name; cf++) {
                        if (STREQ(cf->cf_name, cd->cd_name)) {
                                if (cf->cf_fstate == FSTATE_FOUND &&
                                    cf->cf_unit == dev->dv_unit)
                                        cf->cf_fstate = FSTATE_NOTFOUND;
                        }
                }
        }

        if (dev->dv_cfdata != NULL && (flags & DETACH_QUIET) == 0)
                aprint_normal_dev(dev, "detached\n");

out:
        config_detach_exit(dev);

        config_alldevs_enter(&af);
        KASSERT(alldevs_nwrite != 0);
        --alldevs_nwrite;
        if (rv == 0 && dev->dv_del_gen == 0) {
                if (alldevs_nwrite == 0 && alldevs_nread == 0)
                        config_devunlink(dev, &af.af_garbage);
                else {
                        dev->dv_del_gen = alldevs_gen;
                        alldevs_garbage = true;
                }
        }
        config_alldevs_exit(&af);

        KERNEL_UNLOCK_ONE(NULL);

        return rv;
}

/*
 * config_detach_commit(dev)
 *
 *        Issued by a driver's .ca_detach routine to notify anyone
 *        waiting in device_lookup_acquire that the driver is committed
 *        to detaching the device, which allows device_lookup_acquire to
 *        wake up and fail immediately.
 *
 *        Safe to call multiple times -- idempotent.  Must be called
 *        during config_detach_enter/exit.  Safe to use with
 *        device_lookup because the device is not actually removed from
 *        the table until after config_detach_exit.
 */
void
config_detach_commit(device_t dev)
{
        struct lwp *l __diagused;

        mutex_enter(&config_misc_lock);
        KASSERTMSG((l = dev->dv_detaching) == curlwp,
            "lwp %ld [%s] @ %p detaching",
            (long)l->l_lid, (l->l_name ? l->l_name : l->l_proc->p_comm), l);
        dev->dv_detached = true;
        cv_broadcast(&config_misc_cv);
        mutex_exit(&config_misc_lock);
}

int
config_detach_children(device_t parent, int flags)
{
        device_t dv;
        deviter_t di;
        int error = 0;

        KASSERT(KERNEL_LOCKED_P());

        for (dv = deviter_first(&di, DEVITER_F_RW); dv != NULL;
             dv = deviter_next(&di)) {
                if (device_parent(dv) != parent)
                        continue;
                if ((error = config_detach(dv, flags)) != 0)
                        break;
        }
        deviter_release(&di);
        return error;
}

device_t
shutdown_first(struct shutdown_state *s)
{
        if (!s->initialized) {
                deviter_init(&s->di, DEVITER_F_SHUTDOWN|DEVITER_F_LEAVES_FIRST);
                s->initialized = true;
        }
        return shutdown_next(s);
}

device_t
shutdown_next(struct shutdown_state *s)
{
        device_t dv;

        while ((dv = deviter_next(&s->di)) != NULL && !device_is_active(dv))
                ;

        if (dv == NULL)
                s->initialized = false;

        return dv;
}

bool
config_detach_all(int how)
{
        static struct shutdown_state s;
        device_t curdev;
        bool progress = false;
        int flags;

        KERNEL_LOCK(1, NULL);

        if ((how & (RB_NOSYNC|RB_DUMP)) != 0)
                goto out;

        if ((how & RB_POWERDOWN) == RB_POWERDOWN)
                flags = DETACH_SHUTDOWN | DETACH_POWEROFF;
        else
                flags = DETACH_SHUTDOWN;

        for (curdev = shutdown_first(&s); curdev != NULL;
             curdev = shutdown_next(&s)) {
                aprint_debug(" detaching %s, ", device_xname(curdev));
                if (config_detach(curdev, flags) == 0) {
                        progress = true;
                        aprint_debug("success.");
                } else
                        aprint_debug("failed.");
        }

out:        KERNEL_UNLOCK_ONE(NULL);
        return progress;
}

static bool
device_is_ancestor_of(device_t ancestor, device_t descendant)
{
        device_t dv;

        for (dv = descendant; dv != NULL; dv = device_parent(dv)) {
                if (device_parent(dv) == ancestor)
                        return true;
        }
        return false;
}

int
config_deactivate(device_t dev)
{
        deviter_t di;
        const struct cfattach *ca;
        device_t descendant;
        int s, rv = 0, oflags;

        for (descendant = deviter_first(&di, DEVITER_F_ROOT_FIRST);
             descendant != NULL;
             descendant = deviter_next(&di)) {
                if (dev != descendant &&
                    !device_is_ancestor_of(dev, descendant))
                        continue;

                if ((descendant->dv_flags & DVF_ACTIVE) == 0)
                        continue;

                ca = descendant->dv_cfattach;
                oflags = descendant->dv_flags;

                descendant->dv_flags &= ~DVF_ACTIVE;
                if (ca->ca_activate == NULL)
                        continue;
                s = splhigh();
                rv = (*ca->ca_activate)(descendant, DVACT_DEACTIVATE);
                splx(s);
                if (rv != 0)
                        descendant->dv_flags = oflags;
        }
        deviter_release(&di);
        return rv;
}

/*
 * Defer the configuration of the specified device until all
 * of its parent's devices have been attached.
 */
void
config_defer(device_t dev, void (*func)(device_t))
{
        struct deferred_config *dc;

        if (dev->dv_parent == NULL)
                panic("config_defer: can't defer config of a root device");

        dc = kmem_alloc(sizeof(*dc), KM_SLEEP);

        config_pending_incr(dev);

        mutex_enter(&config_misc_lock);
#ifdef DIAGNOSTIC
        struct deferred_config *odc;
        TAILQ_FOREACH(odc, &deferred_config_queue, dc_queue) {
                if (odc->dc_dev == dev)
                        panic("config_defer: deferred twice");
        }
#endif
        dc->dc_dev = dev;
        dc->dc_func = func;
        TAILQ_INSERT_TAIL(&deferred_config_queue, dc, dc_queue);
        mutex_exit(&config_misc_lock);
}

/*
 * Defer some autoconfiguration for a device until after interrupts
 * are enabled.
 */
void
config_interrupts(device_t dev, void (*func)(device_t))
{
        struct deferred_config *dc;

        /*
         * If interrupts are enabled, callback now.
         */
        if (cold == 0) {
                (*func)(dev);
                return;
        }

        dc = kmem_alloc(sizeof(*dc), KM_SLEEP);

        config_pending_incr(dev);

        mutex_enter(&config_misc_lock);
#ifdef DIAGNOSTIC
        struct deferred_config *odc;
        TAILQ_FOREACH(odc, &interrupt_config_queue, dc_queue) {
                if (odc->dc_dev == dev)
                        panic("config_interrupts: deferred twice");
        }
#endif
        dc->dc_dev = dev;
        dc->dc_func = func;
        TAILQ_INSERT_TAIL(&interrupt_config_queue, dc, dc_queue);
        mutex_exit(&config_misc_lock);
}

/*
 * Defer some autoconfiguration for a device until after root file system
 * is mounted (to load firmware etc).
 */
void
config_mountroot(device_t dev, void (*func)(device_t))
{
        struct deferred_config *dc;

        /*
         * If root file system is mounted, callback now.
         */
        if (root_is_mounted) {
                (*func)(dev);
                return;
        }

        dc = kmem_alloc(sizeof(*dc), KM_SLEEP);

        mutex_enter(&config_misc_lock);
#ifdef DIAGNOSTIC
        struct deferred_config *odc;
        TAILQ_FOREACH(odc, &mountroot_config_queue, dc_queue) {
                if (odc->dc_dev == dev)
                        panic("%s: deferred twice", __func__);
        }
#endif

        dc->dc_dev = dev;
        dc->dc_func = func;
        TAILQ_INSERT_TAIL(&mountroot_config_queue, dc, dc_queue);
        mutex_exit(&config_misc_lock);
}

/*
 * Process a deferred configuration queue.
 */
static void
config_process_deferred(struct deferred_config_head *queue, device_t parent)
{
        struct deferred_config *dc;

        KASSERT(KERNEL_LOCKED_P());

        mutex_enter(&config_misc_lock);
        dc = TAILQ_FIRST(queue);
        while (dc) {
                if (parent == NULL || dc->dc_dev->dv_parent == parent) {
                        TAILQ_REMOVE(queue, dc, dc_queue);
                        mutex_exit(&config_misc_lock);

                        (*dc->dc_func)(dc->dc_dev);
                        config_pending_decr(dc->dc_dev);
                        kmem_free(dc, sizeof(*dc));

                        mutex_enter(&config_misc_lock);
                        /* Restart, queue might have changed */
                        dc = TAILQ_FIRST(queue);
                } else {
                        dc = TAILQ_NEXT(dc, dc_queue);
                }
        }
        mutex_exit(&config_misc_lock);
}

/*
 * Manipulate the config_pending semaphore.
 */
void
config_pending_incr(device_t dev)
{

        mutex_enter(&config_misc_lock);
        KASSERTMSG(dev->dv_pending < INT_MAX,
            "%s: excess config_pending_incr", device_xname(dev));
        if (dev->dv_pending++ == 0)
                TAILQ_INSERT_TAIL(&config_pending, dev, dv_pending_list);
#ifdef DEBUG_AUTOCONF
        printf("%s: %s %d\n", __func__, device_xname(dev), dev->dv_pending);
#endif
        mutex_exit(&config_misc_lock);
}

void
config_pending_decr(device_t dev)
{

        mutex_enter(&config_misc_lock);
        KASSERTMSG(dev->dv_pending > 0,
            "%s: excess config_pending_decr", device_xname(dev));
        if (--dev->dv_pending == 0) {
                TAILQ_REMOVE(&config_pending, dev, dv_pending_list);
                cv_broadcast(&config_misc_cv);
        }
#ifdef DEBUG_AUTOCONF
        printf("%s: %s %d\n", __func__, device_xname(dev), dev->dv_pending);
#endif
        mutex_exit(&config_misc_lock);
}

/*
 * Register a "finalization" routine.  Finalization routines are
 * called iteratively once all real devices have been found during
 * autoconfiguration, for as long as any one finalizer has done
 * any work.
 */
int
config_finalize_register(device_t dev, int (*fn)(device_t))
{
        struct finalize_hook *f;
        int error = 0;

        KERNEL_LOCK(1, NULL);

        /*
         * If finalization has already been done, invoke the
         * callback function now.
         */
        if (config_finalize_done) {
                while ((*fn)(dev) != 0)
                        /* loop */ ;
                goto out;
        }

        /* Ensure this isn't already on the list. */
        TAILQ_FOREACH(f, &config_finalize_list, f_list) {
                if (f->f_func == fn && f->f_dev == dev) {
                        error = EEXIST;
                        goto out;
                }
        }

        f = kmem_alloc(sizeof(*f), KM_SLEEP);
        f->f_func = fn;
        f->f_dev = dev;
        TAILQ_INSERT_TAIL(&config_finalize_list, f, f_list);

        /* Success!  */
        error = 0;

out:        KERNEL_UNLOCK_ONE(NULL);
        return error;
}

void
config_finalize(void)
{
        struct finalize_hook *f;
        struct pdevinit *pdev;
        extern struct pdevinit pdevinit[];
        int errcnt, rv;

        /*
         * Now that device driver threads have been created, wait for
         * them to finish any deferred autoconfiguration.
         */
        mutex_enter(&config_misc_lock);
        while (!TAILQ_EMPTY(&config_pending)) {
                device_t dev;
                int error;

                error = cv_timedwait(&config_misc_cv, &config_misc_lock,
                    mstohz(1000));
                if (error == EWOULDBLOCK) {
                        aprint_debug("waiting for devices:");
                        TAILQ_FOREACH(dev, &config_pending, dv_pending_list)
                                aprint_debug(" %s", device_xname(dev));
                        aprint_debug("\n");
                }
        }
        mutex_exit(&config_misc_lock);

        KERNEL_LOCK(1, NULL);

        /* Attach pseudo-devices. */
        for (pdev = pdevinit; pdev->pdev_attach != NULL; pdev++)
                (*pdev->pdev_attach)(pdev->pdev_count);

        /* Run the hooks until none of them does any work. */
        do {
                rv = 0;
                TAILQ_FOREACH(f, &config_finalize_list, f_list)
                        rv |= (*f->f_func)(f->f_dev);
        } while (rv != 0);

        config_finalize_done = 1;

        /* Now free all the hooks. */
        while ((f = TAILQ_FIRST(&config_finalize_list)) != NULL) {
                TAILQ_REMOVE(&config_finalize_list, f, f_list);
                kmem_free(f, sizeof(*f));
        }

        KERNEL_UNLOCK_ONE(NULL);

        errcnt = aprint_get_error_count();
        if ((boothowto & (AB_QUIET|AB_SILENT)) != 0 &&
            (boothowto & AB_VERBOSE) == 0) {
                mutex_enter(&config_misc_lock);
                if (config_do_twiddle) {
                        config_do_twiddle = 0;
                        printf_nolog(" done.\n");
                }
                mutex_exit(&config_misc_lock);
        }
        if (errcnt != 0) {
                printf("WARNING: %d error%s while detecting hardware; "
                    "check system log.\n", errcnt,
                    errcnt == 1 ? "" : "s");
        }
}

void
config_twiddle_init(void)
{

        if ((boothowto & (AB_SILENT|AB_VERBOSE)) == AB_SILENT) {
                config_do_twiddle = 1;
        }
        callout_setfunc(&config_twiddle_ch, config_twiddle_fn, NULL);
}

void
config_twiddle_fn(void *cookie)
{

        mutex_enter(&config_misc_lock);
        if (config_do_twiddle) {
                twiddle();
                callout_schedule(&config_twiddle_ch, mstohz(100));
        }
        mutex_exit(&config_misc_lock);
}

static void
config_alldevs_enter(struct alldevs_foray *af)
{
        TAILQ_INIT(&af->af_garbage);
        mutex_enter(&alldevs_lock);
        config_collect_garbage(&af->af_garbage);
}

static void
config_alldevs_exit(struct alldevs_foray *af)
{
        mutex_exit(&alldevs_lock);
        config_dump_garbage(&af->af_garbage);
}

/*
 * device_lookup:
 *
 *        Look up a device instance for a given driver.
 *
 *        Caller is responsible for ensuring the device's state is
 *        stable, either by holding a reference already obtained with
 *        device_lookup_acquire or by otherwise ensuring the device is
 *        attached and can't be detached (e.g., holding an open device
 *        node and ensuring *_detach calls vdevgone).
 *
 *        XXX Find a way to assert this.
 *
 *        Safe for use up to and including interrupt context at IPL_VM.
 *        Never sleeps.
 */
device_t
device_lookup(cfdriver_t cd, int unit)
{
        device_t dv;

        mutex_enter(&alldevs_lock);
        if (unit < 0 || unit >= cd->cd_ndevs)
                dv = NULL;
        else if ((dv = cd->cd_devs[unit]) != NULL && dv->dv_del_gen != 0)
                dv = NULL;
        mutex_exit(&alldevs_lock);

        return dv;
}

/*
 * device_lookup_private:
 *
 *        Look up a softc instance for a given driver.
 */
void *
device_lookup_private(cfdriver_t cd, int unit)
{

        return device_private(device_lookup(cd, unit));
}

/*
 * device_lookup_acquire:
 *
 *        Look up a device instance for a given driver, and return a
 *        reference to it that must be released by device_release.
 *
 *        => If the device is still attaching, blocks until *_attach has
 *           returned.
 *
 *        => If the device is detaching, blocks until *_detach has
 *           returned.  May succeed or fail in that case, depending on
 *           whether *_detach has backed out (EBUSY) or committed to
 *           detaching.
 *
 *        May sleep.
 */
device_t
device_lookup_acquire(cfdriver_t cd, int unit)
{
        device_t dv;

        ASSERT_SLEEPABLE();

        /* XXX This should have a pserialized fast path -- TBD.  */
        mutex_enter(&config_misc_lock);
        mutex_enter(&alldevs_lock);
retry:        if (unit < 0 || unit >= cd->cd_ndevs ||
            (dv = cd->cd_devs[unit]) == NULL ||
            dv->dv_del_gen != 0 ||
            dv->dv_detached) {
                dv = NULL;
        } else {
                /*
                 * Wait for the device to stabilize, if attaching or
                 * detaching.  Either way we must wait for *_attach or
                 * *_detach to complete, and either way we must retry:
                 * even if detaching, *_detach might fail (EBUSY) so
                 * the device may still be there.
                 */
                if ((dv->dv_attaching != NULL && dv->dv_attaching != curlwp) ||
                    dv->dv_detaching != NULL) {
                        mutex_exit(&alldevs_lock);
                        cv_wait(&config_misc_cv, &config_misc_lock);
                        mutex_enter(&alldevs_lock);
                        goto retry;
                }
                localcount_acquire(dv->dv_localcount);
        }
        mutex_exit(&alldevs_lock);
        mutex_exit(&config_misc_lock);

        return dv;
}

/*
 * device_release:
 *
 *        Release a reference to a device acquired with
 *        device_lookup_acquire.
 */
void
device_release(device_t dv)
{

        localcount_release(dv->dv_localcount,
            &config_misc_cv, &config_misc_lock);
}

/*
 * device_find_by_xname:
 *
 *        Returns the device of the given name or NULL if it doesn't exist.
 */
device_t
device_find_by_xname(const char *name)
{
        device_t dv;
        deviter_t di;

        for (dv = deviter_first(&di, 0); dv != NULL; dv = deviter_next(&di)) {
                if (strcmp(device_xname(dv), name) == 0)
                        break;
        }
        deviter_release(&di);

        return dv;
}

/*
 * device_find_by_driver_unit:
 *
 *        Returns the device of the given driver name and unit or
 *        NULL if it doesn't exist.
 */
device_t
device_find_by_driver_unit(const char *name, int unit)
{
        struct cfdriver *cd;

        if ((cd = config_cfdriver_lookup(name)) == NULL)
                return NULL;
        return device_lookup(cd, unit);
}

static bool
match_strcmp(const char * const s1, const char * const s2)
{
        return strcmp(s1, s2) == 0;
}

static bool
match_pmatch(const char * const s1, const char * const s2)
{
        return pmatch(s1, s2, NULL) == 2;
}

static bool
strarray_match_internal(const char ** const strings,
    unsigned int const nstrings, const char * const str,
    unsigned int * const indexp,
    bool (*match_fn)(const char *, const char *))
{
        unsigned int i;

        if (strings == NULL || nstrings == 0) {
                return false;
        }

        for (i = 0; i < nstrings; i++) {
                if ((*match_fn)(strings[i], str)) {
                        *indexp = i;
                        return true;
                }
        }

        return false;
}

static int
strarray_match(const char ** const strings, unsigned int const nstrings,
    const char * const str)
{
        unsigned int idx;

        if (strarray_match_internal(strings, nstrings, str, &idx,
                                    match_strcmp)) {
                return (int)(nstrings - idx);
        }
        return 0;
}

static int
strarray_pmatch(const char ** const strings, unsigned int const nstrings,
    const char * const pattern)
{
        unsigned int idx;

        if (strarray_match_internal(strings, nstrings, pattern, &idx,
                                    match_pmatch)) {
                return (int)(nstrings - idx);
        }
        return 0;
}

static int
device_compatible_match_strarray_internal(
    const char **device_compats, int ndevice_compats,
    const struct device_compatible_entry *driver_compats,
    const struct device_compatible_entry **matching_entryp,
    int (*match_fn)(const char **, unsigned int, const char *))
{
        const struct device_compatible_entry *dce = NULL;
        int rv;

        if (ndevice_compats == 0 || device_compats == NULL ||
            driver_compats == NULL)
                return 0;

        for (dce = driver_compats; dce->compat != NULL; dce++) {
                rv = (*match_fn)(device_compats, ndevice_compats, dce->compat);
                if (rv != 0) {
                        if (matching_entryp != NULL) {
                                *matching_entryp = dce;
                        }
                        return rv;
                }
        }
        return 0;
}

/*
 * device_compatible_match:
 *
 *        Match a driver's "compatible" data against a device's
 *        "compatible" strings.  Returns resulted weighted by
 *        which device "compatible" string was matched.
 */
int
device_compatible_match(const char **device_compats, int ndevice_compats,
    const struct device_compatible_entry *driver_compats)
{
        return device_compatible_match_strarray_internal(device_compats,
            ndevice_compats, driver_compats, NULL, strarray_match);
}

/*
 * device_compatible_pmatch:
 *
 *        Like device_compatible_match(), but uses pmatch(9) to compare
 *        the device "compatible" strings against patterns in the
 *        driver's "compatible" data.
 */
int
device_compatible_pmatch(const char **device_compats, int ndevice_compats,
    const struct device_compatible_entry *driver_compats)
{
        return device_compatible_match_strarray_internal(device_compats,
            ndevice_compats, driver_compats, NULL, strarray_pmatch);
}

static int
device_compatible_match_strlist_internal(
    const char * const device_compats, size_t const device_compatsize,
    const struct device_compatible_entry *driver_compats,
    const struct device_compatible_entry **matching_entryp,
    int (*match_fn)(const char *, size_t, const char *))
{
        const struct device_compatible_entry *dce = NULL;
        int rv;

        if (device_compats == NULL || device_compatsize == 0 ||
            driver_compats == NULL)
                return 0;

        for (dce = driver_compats; dce->compat != NULL; dce++) {
                rv = (*match_fn)(device_compats, device_compatsize,
                    dce->compat);
                if (rv != 0) {
                        if (matching_entryp != NULL) {
                                *matching_entryp = dce;
                        }
                        return rv;
                }
        }
        return 0;
}

/*
 * device_compatible_match_strlist:
 *
 *        Like device_compatible_match(), but take the device
 *        "compatible" strings as an OpenFirmware-style string
 *        list.
 */
int
device_compatible_match_strlist(
    const char * const device_compats, size_t const device_compatsize,
    const struct device_compatible_entry *driver_compats)
{
        return device_compatible_match_strlist_internal(device_compats,
            device_compatsize, driver_compats, NULL, strlist_match);
}

/*
 * device_compatible_pmatch_strlist:
 *
 *        Like device_compatible_pmatch(), but take the device
 *        "compatible" strings as an OpenFirmware-style string
 *        list.
 */
int
device_compatible_pmatch_strlist(
    const char * const device_compats, size_t const device_compatsize,
    const struct device_compatible_entry *driver_compats)
{
        return device_compatible_match_strlist_internal(device_compats,
            device_compatsize, driver_compats, NULL, strlist_pmatch);
}

static int
device_compatible_match_id_internal(
    uintptr_t const id, uintptr_t const mask, uintptr_t const sentinel_id,
    const struct device_compatible_entry *driver_compats,
    const struct device_compatible_entry **matching_entryp)
{
        const struct device_compatible_entry *dce = NULL;

        if (mask == 0)
                return 0;

        for (dce = driver_compats; dce->id != sentinel_id; dce++) {
                if ((id & mask) == dce->id) {
                        if (matching_entryp != NULL) {
                                *matching_entryp = dce;
                        }
                        return 1;
                }
        }
        return 0;
}

/*
 * device_compatible_match_id:
 *
 *        Like device_compatible_match(), but takes a single
 *        unsigned integer device ID.
 */
int
device_compatible_match_id(
    uintptr_t const id, uintptr_t const sentinel_id,
    const struct device_compatible_entry *driver_compats)
{
        return device_compatible_match_id_internal(id, (uintptr_t)-1,
            sentinel_id, driver_compats, NULL);
}

/*
 * device_compatible_lookup:
 *
 *        Look up and return the device_compatible_entry, using the
 *        same matching criteria used by device_compatible_match().
 */
const struct device_compatible_entry *
device_compatible_lookup(const char **device_compats, int ndevice_compats,
                         const struct device_compatible_entry *driver_compats)
{
        const struct device_compatible_entry *dce;

        if (device_compatible_match_strarray_internal(device_compats,
            ndevice_compats, driver_compats, &dce, strarray_match)) {
                return dce;
        }
        return NULL;
}

/*
 * device_compatible_plookup:
 *
 *        Look up and return the device_compatible_entry, using the
 *        same matching criteria used by device_compatible_pmatch().
 */
const struct device_compatible_entry *
device_compatible_plookup(const char **device_compats, int ndevice_compats,
                          const struct device_compatible_entry *driver_compats)
{
        const struct device_compatible_entry *dce;

        if (device_compatible_match_strarray_internal(device_compats,
            ndevice_compats, driver_compats, &dce, strarray_pmatch)) {
                return dce;
        }
        return NULL;
}

/*
 * device_compatible_lookup_strlist:
 *
 *        Like device_compatible_lookup(), but take the device
 *        "compatible" strings as an OpenFirmware-style string
 *        list.
 */
const struct device_compatible_entry *
device_compatible_lookup_strlist(
    const char * const device_compats, size_t const device_compatsize,
    const struct device_compatible_entry *driver_compats)
{
        const struct device_compatible_entry *dce;

        if (device_compatible_match_strlist_internal(device_compats,
            device_compatsize, driver_compats, &dce, strlist_match)) {
                return dce;
        }
        return NULL;
}

/*
 * device_compatible_plookup_strlist:
 *
 *        Like device_compatible_plookup(), but take the device
 *        "compatible" strings as an OpenFirmware-style string
 *        list.
 */
const struct device_compatible_entry *
device_compatible_plookup_strlist(
    const char * const device_compats, size_t const device_compatsize,
    const struct device_compatible_entry *driver_compats)
{
        const struct device_compatible_entry *dce;

        if (device_compatible_match_strlist_internal(device_compats,
            device_compatsize, driver_compats, &dce, strlist_pmatch)) {
                return dce;
        }
        return NULL;
}

/*
 * device_compatible_lookup_id:
 *
 *        Like device_compatible_lookup(), but takes a single
 *        unsigned integer device ID.
 */
const struct device_compatible_entry *
device_compatible_lookup_id(
    uintptr_t const id, uintptr_t const sentinel_id,
    const struct device_compatible_entry *driver_compats)
{
        const struct device_compatible_entry *dce;

        if (device_compatible_match_id_internal(id, (uintptr_t)-1,
            sentinel_id, driver_compats, &dce)) {
                return dce;
        }
        return NULL;
}

/*
 * Power management related functions.
 */

bool
device_pmf_is_registered(device_t dev)
{
        return (dev->dv_flags & DVF_POWER_HANDLERS) != 0;
}

bool
device_pmf_driver_suspend(device_t dev, const pmf_qual_t *qual)
{
        if ((dev->dv_flags & DVF_DRIVER_SUSPENDED) != 0)
                return true;
        if ((dev->dv_flags & DVF_CLASS_SUSPENDED) == 0)
                return false;
        if (pmf_qual_depth(qual) <= DEVACT_LEVEL_DRIVER &&
            dev->dv_driver_suspend != NULL &&
            !(*dev->dv_driver_suspend)(dev, qual))
                return false;

        dev->dv_flags |= DVF_DRIVER_SUSPENDED;
        return true;
}

bool
device_pmf_driver_resume(device_t dev, const pmf_qual_t *qual)
{
        if ((dev->dv_flags & DVF_DRIVER_SUSPENDED) == 0)
                return true;
        if ((dev->dv_flags & DVF_BUS_SUSPENDED) != 0)
                return false;
        if (pmf_qual_depth(qual) <= DEVACT_LEVEL_DRIVER &&
            dev->dv_driver_resume != NULL &&
            !(*dev->dv_driver_resume)(dev, qual))
                return false;

        dev->dv_flags &= ~DVF_DRIVER_SUSPENDED;
        return true;
}

bool
device_pmf_driver_shutdown(device_t dev, int how)
{

        if (*dev->dv_driver_shutdown != NULL &&
            !(*dev->dv_driver_shutdown)(dev, how))
                return false;
        return true;
}

bool
device_pmf_driver_register(device_t dev,
    bool (*suspend)(device_t, const pmf_qual_t *),
    bool (*resume)(device_t, const pmf_qual_t *),
    bool (*shutdown)(device_t, int))
{
        dev->dv_driver_suspend = suspend;
        dev->dv_driver_resume = resume;
        dev->dv_driver_shutdown = shutdown;
        dev->dv_flags |= DVF_POWER_HANDLERS;
        return true;
}

void
device_pmf_driver_deregister(device_t dev)
{
        device_lock_t dvl = device_getlock(dev);

        dev->dv_driver_suspend = NULL;
        dev->dv_driver_resume = NULL;

        mutex_enter(&dvl->dvl_mtx);
        dev->dv_flags &= ~DVF_POWER_HANDLERS;
        while (dvl->dvl_nlock > 0 || dvl->dvl_nwait > 0) {
                /* Wake a thread that waits for the lock.  That
                 * thread will fail to acquire the lock, and then
                 * it will wake the next thread that waits for the
                 * lock, or else it will wake us.
                 */
                cv_signal(&dvl->dvl_cv);
                pmflock_debug(dev, __func__, __LINE__);
                cv_wait(&dvl->dvl_cv, &dvl->dvl_mtx);
                pmflock_debug(dev, __func__, __LINE__);
        }
        mutex_exit(&dvl->dvl_mtx);
}

bool
device_pmf_driver_child_register(device_t dev)
{
        device_t parent = device_parent(dev);

        if (parent == NULL || parent->dv_driver_child_register == NULL)
                return true;
        return (*parent->dv_driver_child_register)(dev);
}

void
device_pmf_driver_set_child_register(device_t dev,
    bool (*child_register)(device_t))
{
        dev->dv_driver_child_register = child_register;
}

static void
pmflock_debug(device_t dev, const char *func, int line)
{
#ifdef PMFLOCK_DEBUG
        device_lock_t dvl = device_getlock(dev);
        const char *curlwp_name;

        if (curlwp->l_name != NULL)
                curlwp_name = curlwp->l_name;
        else
                curlwp_name = curlwp->l_proc->p_comm;

        aprint_debug_dev(dev,
            "%s.%d, %s dvl_nlock %d dvl_nwait %d dv_flags %x\n", func, line,
            curlwp_name, dvl->dvl_nlock, dvl->dvl_nwait, dev->dv_flags);
#endif        /* PMFLOCK_DEBUG */
}

static bool
device_pmf_lock1(device_t dev)
{
        device_lock_t dvl = device_getlock(dev);

        while (device_pmf_is_registered(dev) &&
            dvl->dvl_nlock > 0 && dvl->dvl_holder != curlwp) {
                dvl->dvl_nwait++;
                pmflock_debug(dev, __func__, __LINE__);
                cv_wait(&dvl->dvl_cv, &dvl->dvl_mtx);
                pmflock_debug(dev, __func__, __LINE__);
                dvl->dvl_nwait--;
        }
        if (!device_pmf_is_registered(dev)) {
                pmflock_debug(dev, __func__, __LINE__);
                /* We could not acquire the lock, but some other thread may
                 * wait for it, also.  Wake that thread.
                 */
                cv_signal(&dvl->dvl_cv);
                return false;
        }
        dvl->dvl_nlock++;
        dvl->dvl_holder = curlwp;
        pmflock_debug(dev, __func__, __LINE__);
        return true;
}

bool
device_pmf_lock(device_t dev)
{
        bool rc;
        device_lock_t dvl = device_getlock(dev);

        mutex_enter(&dvl->dvl_mtx);
        rc = device_pmf_lock1(dev);
        mutex_exit(&dvl->dvl_mtx);

        return rc;
}

void
device_pmf_unlock(device_t dev)
{
        device_lock_t dvl = device_getlock(dev);

        KASSERT(dvl->dvl_nlock > 0);
        mutex_enter(&dvl->dvl_mtx);
        if (--dvl->dvl_nlock == 0)
                dvl->dvl_holder = NULL;
        cv_signal(&dvl->dvl_cv);
        pmflock_debug(dev, __func__, __LINE__);
        mutex_exit(&dvl->dvl_mtx);
}

device_lock_t
device_getlock(device_t dev)
{
        return &dev->dv_lock;
}

void *
device_pmf_bus_private(device_t dev)
{
        return dev->dv_bus_private;
}

bool
device_pmf_bus_suspend(device_t dev, const pmf_qual_t *qual)
{
        if ((dev->dv_flags & DVF_BUS_SUSPENDED) != 0)
                return true;
        if ((dev->dv_flags & DVF_CLASS_SUSPENDED) == 0 ||
            (dev->dv_flags & DVF_DRIVER_SUSPENDED) == 0)
                return false;
        if (pmf_qual_depth(qual) <= DEVACT_LEVEL_BUS &&
            dev->dv_bus_suspend != NULL &&
            !(*dev->dv_bus_suspend)(dev, qual))
                return false;

        dev->dv_flags |= DVF_BUS_SUSPENDED;
        return true;
}

bool
device_pmf_bus_resume(device_t dev, const pmf_qual_t *qual)
{
        if ((dev->dv_flags & DVF_BUS_SUSPENDED) == 0)
                return true;
        if (pmf_qual_depth(qual) <= DEVACT_LEVEL_BUS &&
            dev->dv_bus_resume != NULL &&
            !(*dev->dv_bus_resume)(dev, qual))
                return false;

        dev->dv_flags &= ~DVF_BUS_SUSPENDED;
        return true;
}

bool
device_pmf_bus_shutdown(device_t dev, int how)
{

        if (*dev->dv_bus_shutdown != NULL &&
            !(*dev->dv_bus_shutdown)(dev, how))
                return false;
        return true;
}

void
device_pmf_bus_register(device_t dev, void *priv,
    bool (*suspend)(device_t, const pmf_qual_t *),
    bool (*resume)(device_t, const pmf_qual_t *),
    bool (*shutdown)(device_t, int), void (*deregister)(device_t))
{
        dev->dv_bus_private = priv;
        dev->dv_bus_resume = resume;
        dev->dv_bus_suspend = suspend;
        dev->dv_bus_shutdown = shutdown;
        dev->dv_bus_deregister = deregister;
}

void
device_pmf_bus_deregister(device_t dev)
{
        if (dev->dv_bus_deregister == NULL)
                return;
        (*dev->dv_bus_deregister)(dev);
        dev->dv_bus_private = NULL;
        dev->dv_bus_suspend = NULL;
        dev->dv_bus_resume = NULL;
        dev->dv_bus_deregister = NULL;
}

void *
device_pmf_class_private(device_t dev)
{
        return dev->dv_class_private;
}

bool
device_pmf_class_suspend(device_t dev, const pmf_qual_t *qual)
{
        if ((dev->dv_flags & DVF_CLASS_SUSPENDED) != 0)
                return true;
        if (pmf_qual_depth(qual) <= DEVACT_LEVEL_CLASS &&
            dev->dv_class_suspend != NULL &&
            !(*dev->dv_class_suspend)(dev, qual))
                return false;

        dev->dv_flags |= DVF_CLASS_SUSPENDED;
        return true;
}

bool
device_pmf_class_resume(device_t dev, const pmf_qual_t *qual)
{
        if ((dev->dv_flags & DVF_CLASS_SUSPENDED) == 0)
                return true;
        if ((dev->dv_flags & DVF_BUS_SUSPENDED) != 0 ||
            (dev->dv_flags & DVF_DRIVER_SUSPENDED) != 0)
                return false;
        if (pmf_qual_depth(qual) <= DEVACT_LEVEL_CLASS &&
            dev->dv_class_resume != NULL &&
            !(*dev->dv_class_resume)(dev, qual))
                return false;

        dev->dv_flags &= ~DVF_CLASS_SUSPENDED;
        return true;
}

void
device_pmf_class_register(device_t dev, void *priv,
    bool (*suspend)(device_t, const pmf_qual_t *),
    bool (*resume)(device_t, const pmf_qual_t *),
    void (*deregister)(device_t))
{
        dev->dv_class_private = priv;
        dev->dv_class_suspend = suspend;
        dev->dv_class_resume = resume;
        dev->dv_class_deregister = deregister;
}

void
device_pmf_class_deregister(device_t dev)
{
        if (dev->dv_class_deregister == NULL)
                return;
        (*dev->dv_class_deregister)(dev);
        dev->dv_class_private = NULL;
        dev->dv_class_suspend = NULL;
        dev->dv_class_resume = NULL;
        dev->dv_class_deregister = NULL;
}

bool
device_active(device_t dev, devactive_t type)
{
        size_t i;

        if (dev->dv_activity_count == 0)
                return false;

        for (i = 0; i < dev->dv_activity_count; ++i) {
                if (dev->dv_activity_handlers[i] == NULL)
                        break;
                (*dev->dv_activity_handlers[i])(dev, type);
        }

        return true;
}

bool
device_active_register(device_t dev, void (*handler)(device_t, devactive_t))
{
        void (**new_handlers)(device_t, devactive_t);
        void (**old_handlers)(device_t, devactive_t);
        size_t i, old_size, new_size;
        int s;

        old_handlers = dev->dv_activity_handlers;
        old_size = dev->dv_activity_count;

        KASSERT(old_size == 0 || old_handlers != NULL);

        for (i = 0; i < old_size; ++i) {
                KASSERT(old_handlers[i] != handler);
                if (old_handlers[i] == NULL) {
                        old_handlers[i] = handler;
                        return true;
                }
        }

        new_size = old_size + 4;
        new_handlers = kmem_alloc(sizeof(void *) * new_size, KM_SLEEP);

        for (i = 0; i < old_size; ++i)
                new_handlers[i] = old_handlers[i];
        new_handlers[old_size] = handler;
        for (i = old_size+1; i < new_size; ++i)
                new_handlers[i] = NULL;

        s = splhigh();
        dev->dv_activity_count = new_size;
        dev->dv_activity_handlers = new_handlers;
        splx(s);

        if (old_size > 0)
                kmem_free(old_handlers, sizeof(void *) * old_size);

        return true;
}

void
device_active_deregister(device_t dev, void (*handler)(device_t, devactive_t))
{
        void (**old_handlers)(device_t, devactive_t);
        size_t i, old_size;
        int s;

        old_handlers = dev->dv_activity_handlers;
        old_size = dev->dv_activity_count;

        for (i = 0; i < old_size; ++i) {
                if (old_handlers[i] == handler)
                        break;
                if (old_handlers[i] == NULL)
                        return; /* XXX panic? */
        }

        if (i == old_size)
                return; /* XXX panic? */

        for (; i < old_size - 1; ++i) {
                if ((old_handlers[i] = old_handlers[i + 1]) != NULL)
                        continue;

                if (i == 0) {
                        s = splhigh();
                        dev->dv_activity_count = 0;
                        dev->dv_activity_handlers = NULL;
                        splx(s);
                        kmem_free(old_handlers, sizeof(void *) * old_size);
                }
                return;
        }
        old_handlers[i] = NULL;
}

/* Return true iff the device_t `dev' exists at generation `gen'. */
static bool
device_exists_at(device_t dv, devgen_t gen)
{
        return (dv->dv_del_gen == 0 || dv->dv_del_gen > gen) &&
            dv->dv_add_gen <= gen;
}

static bool
deviter_visits(const deviter_t *di, device_t dv)
{
        return device_exists_at(dv, di->di_gen);
}

/*
 * Device Iteration
 *
 * deviter_t: a device iterator.  Holds state for a "walk" visiting
 *     each device_t's in the device tree.
 *
 * deviter_init(di, flags): initialize the device iterator `di'
 *     to "walk" the device tree.  deviter_next(di) will return
 *     the first device_t in the device tree, or NULL if there are
 *     no devices.
 *
 *     `flags' is one or more of DEVITER_F_RW, indicating that the
 *     caller intends to modify the device tree by calling
 *     config_detach(9) on devices in the order that the iterator
 *     returns them; DEVITER_F_ROOT_FIRST, asking for the devices
 *     nearest the "root" of the device tree to be returned, first;
 *     DEVITER_F_LEAVES_FIRST, asking for the devices furthest from
 *     the root of the device tree, first; and DEVITER_F_SHUTDOWN,
 *     indicating both that deviter_init() should not respect any
 *     locks on the device tree, and that deviter_next(di) may run
 *     in more than one LWP before the walk has finished.
 *
 *     Only one DEVITER_F_RW iterator may be in the device tree at
 *     once.
 *
 *     DEVITER_F_SHUTDOWN implies DEVITER_F_RW.
 *
 *     Results are undefined if the flags DEVITER_F_ROOT_FIRST and
 *     DEVITER_F_LEAVES_FIRST are used in combination.
 *
 * deviter_first(di, flags): initialize the device iterator `di'
 *     and return the first device_t in the device tree, or NULL
 *     if there are no devices.  The statement
 *
 *         dv = deviter_first(di);
 *
 *     is shorthand for
 *
 *         deviter_init(di);
 *         dv = deviter_next(di);
 *
 * deviter_next(di): return the next device_t in the device tree,
 *     or NULL if there are no more devices.  deviter_next(di)
 *     is undefined if `di' was not initialized with deviter_init() or
 *     deviter_first().
 *
 * deviter_release(di): stops iteration (subsequent calls to
 *     deviter_next() will return NULL), releases any locks and
 *     resources held by the device iterator.
 *
 * Device iteration does not return device_t's in any particular
 * order.  An iterator will never return the same device_t twice.
 * Device iteration is guaranteed to complete---i.e., if deviter_next(di)
 * is called repeatedly on the same `di', it will eventually return
 * NULL.  It is ok to attach/detach devices during device iteration.
 */
void
deviter_init(deviter_t *di, deviter_flags_t flags)
{
        device_t dv;

        memset(di, 0, sizeof(*di));

        if ((flags & DEVITER_F_SHUTDOWN) != 0)
                flags |= DEVITER_F_RW;

        mutex_enter(&alldevs_lock);
        if ((flags & DEVITER_F_RW) != 0)
                alldevs_nwrite++;
        else
                alldevs_nread++;
        di->di_gen = alldevs_gen++;
        di->di_flags = flags;

        switch (di->di_flags & (DEVITER_F_LEAVES_FIRST|DEVITER_F_ROOT_FIRST)) {
        case DEVITER_F_LEAVES_FIRST:
                TAILQ_FOREACH(dv, &alldevs, dv_list) {
                        if (!deviter_visits(di, dv))
                                continue;
                        di->di_curdepth = MAX(di->di_curdepth, dv->dv_depth);
                }
                break;
        case DEVITER_F_ROOT_FIRST:
                TAILQ_FOREACH(dv, &alldevs, dv_list) {
                        if (!deviter_visits(di, dv))
                                continue;
                        di->di_maxdepth = MAX(di->di_maxdepth, dv->dv_depth);
                }
                break;
        default:
                break;
        }

        deviter_reinit(di);
        mutex_exit(&alldevs_lock);
}

static void
deviter_reinit(deviter_t *di)
{

        KASSERT(mutex_owned(&alldevs_lock));
        if ((di->di_flags & DEVITER_F_RW) != 0)
                di->di_prev = TAILQ_LAST(&alldevs, devicelist);
        else
                di->di_prev = TAILQ_FIRST(&alldevs);
}

device_t
deviter_first(deviter_t *di, deviter_flags_t flags)
{

        deviter_init(di, flags);
        return deviter_next(di);
}

static device_t
deviter_next2(deviter_t *di)
{
        device_t dv;

        KASSERT(mutex_owned(&alldevs_lock));

        dv = di->di_prev;

        if (dv == NULL)
                return NULL;

        if ((di->di_flags & DEVITER_F_RW) != 0)
                di->di_prev = TAILQ_PREV(dv, devicelist, dv_list);
        else
                di->di_prev = TAILQ_NEXT(dv, dv_list);

        return dv;
}

static device_t
deviter_next1(deviter_t *di)
{
        device_t dv;

        KASSERT(mutex_owned(&alldevs_lock));

        do {
                dv = deviter_next2(di);
        } while (dv != NULL && !deviter_visits(di, dv));

        return dv;
}

device_t
deviter_next(deviter_t *di)
{
        device_t dv = NULL;

        mutex_enter(&alldevs_lock);
        switch (di->di_flags & (DEVITER_F_LEAVES_FIRST|DEVITER_F_ROOT_FIRST)) {
        case 0:
                dv = deviter_next1(di);
                break;
        case DEVITER_F_LEAVES_FIRST:
                while (di->di_curdepth >= 0) {
                        if ((dv = deviter_next1(di)) == NULL) {
                                di->di_curdepth--;
                                deviter_reinit(di);
                        } else if (dv->dv_depth == di->di_curdepth)
                                break;
                }
                break;
        case DEVITER_F_ROOT_FIRST:
                while (di->di_curdepth <= di->di_maxdepth) {
                        if ((dv = deviter_next1(di)) == NULL) {
                                di->di_curdepth++;
                                deviter_reinit(di);
                        } else if (dv->dv_depth == di->di_curdepth)
                                break;
                }
                break;
        default:
                break;
        }
        mutex_exit(&alldevs_lock);

        return dv;
}

void
deviter_release(deviter_t *di)
{
        bool rw = (di->di_flags & DEVITER_F_RW) != 0;

        mutex_enter(&alldevs_lock);
        if (rw)
                --alldevs_nwrite;
        else
                --alldevs_nread;
        /* XXX wake a garbage-collection thread */
        mutex_exit(&alldevs_lock);
}

const char *
cfdata_ifattr(const struct cfdata *cf)
{
        return cf->cf_pspec->cfp_iattr;
}

bool
ifattr_match(const char *snull, const char *t)
{
        return (snull == NULL) || strcmp(snull, t) == 0;
}

void
null_childdetached(device_t self, device_t child)
{
        /* do nothing */
}

static void
sysctl_detach_setup(struct sysctllog **clog)
{

        sysctl_createv(clog, 0, NULL, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_BOOL, "detachall",
                SYSCTL_DESCR("Detach all devices at shutdown"),
                NULL, 0, &detachall, 0,
                CTL_KERN, CTL_CREATE, CTL_EOL);
}



































































































   11 
   11 
   11 
   11 

   11 











1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/*        $NetBSD: dead_vfsops.c,v 1.12 2022/07/08 07:44:17 hannken Exp $        */

/*-
 * Copyright (c) 2014 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Juergen Hannken-Illjes.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: dead_vfsops.c,v 1.12 2022/07/08 07:44:17 hannken Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/vnode.h>
#include <sys/mount.h>

#include <miscfs/specfs/specdev.h>

VFS_PROTOS(dead);

static void dead_panic(void);

extern const struct vnodeopv_desc dead_vnodeop_opv_desc;

static const struct vnodeopv_desc * const dead_vnodeopv_descs[] = {
        &dead_vnodeop_opv_desc,
        NULL
};

struct mount *dead_rootmount;

struct vfsops dead_vfsops = {
        .vfs_name = "dead",
        .vfs_min_mount_data = 0,
        .vfs_mount = (void *)dead_panic,
        .vfs_start = (void *)dead_panic,
        .vfs_unmount = (void *)dead_panic,
        .vfs_root = (void *)dead_panic,
        .vfs_quotactl = (void *)dead_panic,
        .vfs_statvfs = (void *)eopnotsupp,
        .vfs_sync = (void *)dead_panic,
        .vfs_vget = (void *)dead_panic,
        .vfs_loadvnode = (void *)dead_panic,
        .vfs_newvnode = dead_newvnode,
        .vfs_fhtovp = (void *)dead_panic,
        .vfs_vptofh = (void *)eopnotsupp,
        .vfs_init = (void *)dead_panic,
        .vfs_reinit = (void *)dead_panic,
        .vfs_done = (void *)dead_panic,
        .vfs_mountroot = (void *)dead_panic,
        .vfs_snapshot = (void *)dead_panic,
        .vfs_extattrctl = (void *)dead_panic,
        .vfs_suspendctl = (void *)dead_panic,
        .vfs_renamelock_enter = (void *)dead_panic,
        .vfs_renamelock_exit = (void *)dead_panic,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = dead_vnodeopv_descs
};

static void
dead_panic(void)
{

        panic("dead fs operation used");
}

/*
 * Create a new anonymous device vnode.
 */
int
dead_newvnode(struct mount *mp, struct vnode *dvp, struct vnode *vp,
    struct vattr *vap, kauth_cred_t cred, void *extra,
    size_t *key_len, const void **new_key)
{

        KASSERT(mp == dead_rootmount);
        KASSERT(dvp == NULL);
        KASSERT(vap->va_type == VCHR || vap->va_type == VBLK);
        KASSERT(vap->va_rdev != VNOVAL);

        vp->v_tag = VT_NON;
        vp->v_type = vap->va_type;
        vp->v_op = spec_vnodeop_p;
        vp->v_vflag |= VV_MPSAFE;
        uvm_vnp_setsize(vp, 0);
        spec_node_init(vp, vap->va_rdev);

        *key_len = 0;
        *new_key = NULL;

        return 0;
}










































































































































































































































































































    3 





    3 











    3 









    3 





























    3 


















    1 











    1 


















  142 






















    1 


























    6 










    1 
    1 




    1 
    1 

    1 
    1 

    3 
















































































































  138 











  138 




  132 





  132 




















  132 
  132 















  138 




















  132 


  138 

  138 













































  138 























  138 













































  132 




























  138 


























































  138 
  132 
  138 
  138 

















    2 



















  137 


  137 


  137 
  137 
































  135 

















  135 


  135 
    1 




  135 





  135 


  135 





  135 
  133 











  135 



  135 




  116 




  135 

  133 




  132 





  132 



  132 

  132 









  132 


  132 


  132 
  132 
  132 
  132 



  132 














  132 







































































  135 





  132 





    1 
















    2 







    1 




    1 
    1 
    2 


    1 














    1 

    1 
    1 



























    3 














    2 



    3 
    2 
















































































































































































    2 




    2 





    2 
    2 


    2 







    1 






    1 






    2 

    2 

    2 




    2 








    2 





    2 






    2 
    1 

    1 
    1 
    1 















    1 

    2 


    2 








    2 
    2 





    2 

    2 










  133 










  133 
  133 
  132 


  132 


  132 



  133 



















































































  133 

  132 

































  134 
  134 


  132 
































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
/*        $NetBSD: procfs_vnops.c,v 1.229 2022/06/17 14:30:37 shm Exp $        */

/*-
 * Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)procfs_vnops.c        8.18 (Berkeley) 5/21/95
 */

/*
 * Copyright (c) 1993 Jan-Simon Pendry
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)procfs_vnops.c        8.18 (Berkeley) 5/21/95
 */

/*
 * procfs vnode interface
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.229 2022/06/17 14:30:37 shm Exp $");

#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/namei.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/dirent.h>
#include <sys/resourcevar.h>
#include <sys/stat.h>
#include <sys/ptrace.h>
#include <sys/kauth.h>
#include <sys/exec.h>

#include <uvm/uvm_extern.h>        /* for PAGE_SIZE */

#include <machine/reg.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/procfs/procfs.h>

/*
 * Vnode Operations.
 *
 */

static int procfs_validfile_linux(struct lwp *, struct mount *);
static int procfs_root_readdir_callback(struct proc *, void *);
static void procfs_dir(pfstype, struct lwp *, struct proc *, char **, char *,
    size_t);

/*
 * This is a list of the valid names in the
 * process-specific sub-directories.  It is
 * used in procfs_lookup and procfs_readdir
 */
static const struct proc_target {
        u_char        pt_type;
        u_char        pt_namlen;
        const char        *pt_name;
        pfstype        pt_pfstype;
        int        (*pt_valid)(struct lwp *, struct mount *);
} proc_targets[] = {
#define N(s) sizeof(s)-1, s
        /*          name                type                validp */
        { DT_DIR, N("."),        PFSproc,        NULL },
        { DT_DIR, N(".."),        PFSroot,        NULL },
        { DT_DIR, N("fd"),        PFSfd,                NULL },
        { DT_DIR, N("task"),        PFStask,        procfs_validfile_linux },
        { DT_LNK, N("cwd"),        PFScwd,                NULL },
        { DT_REG, N("emul"),        PFSemul,        NULL },
        { DT_LNK, N("root"),        PFSchroot,        NULL },
        { DT_REG, N("auxv"),        PFSauxv,        procfs_validauxv },
        { DT_REG, N("cmdline"), PFScmdline,        NULL },
        { DT_REG, N("environ"), PFSenviron,        NULL },
        { DT_LNK, N("exe"),        PFSexe,                procfs_validfile },
        { DT_REG, N("file"),        PFSfile,        procfs_validfile },
        { DT_REG, N("fpregs"),        PFSfpregs,        procfs_validfpregs },
        { DT_REG, N("limit"),        PFSlimit,        NULL },
        { DT_REG, N("map"),        PFSmap,                procfs_validmap },
        { DT_REG, N("maps"),        PFSmaps,        procfs_validmap },
        { DT_REG, N("mem"),        PFSmem,                NULL },
        { DT_REG, N("note"),        PFSnote,        NULL },
        { DT_REG, N("notepg"),        PFSnotepg,        NULL },
        { DT_REG, N("regs"),        PFSregs,        procfs_validregs },
        { DT_REG, N("stat"),        PFSstat,        procfs_validfile_linux },
        { DT_REG, N("statm"),        PFSstatm,        procfs_validfile_linux },
        { DT_REG, N("status"),        PFSstatus,        NULL },
#ifdef __HAVE_PROCFS_MACHDEP
        PROCFS_MACHDEP_NODETYPE_DEFNS
#endif
#undef N
};
static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);

/*
 * List of files in the root directory. Note: the validate function will
 * be called with p == NULL for these ones.
 */
static const struct proc_target proc_root_targets[] = {
#define N(s) sizeof(s)-1, s
        /*          name                    type            validp */
        { DT_REG, N("meminfo"),     PFSmeminfo,        procfs_validfile_linux },
        { DT_REG, N("cpuinfo"),     PFScpuinfo,        procfs_validfile_linux },
        { DT_REG, N("uptime"),      PFSuptime,         procfs_validfile_linux },
        { DT_REG, N("mounts"),            PFSmounts,               procfs_validfile_linux },
        { DT_REG, N("devices"),     PFSdevices,        procfs_validfile_linux },
        { DT_REG, N("stat"),            PFScpustat,        procfs_validfile_linux },
        { DT_REG, N("loadavg"),            PFSloadavg,        procfs_validfile_linux },
        { DT_REG, N("version"),     PFSversion,        procfs_validfile_linux },
#undef N
};
static const int nproc_root_targets =
    sizeof(proc_root_targets) / sizeof(proc_root_targets[0]);

int        procfs_lookup(void *);
int        procfs_open(void *);
int        procfs_close(void *);
int        procfs_access(void *);
int        procfs_getattr(void *);
int        procfs_setattr(void *);
int        procfs_readdir(void *);
int        procfs_readlink(void *);
int        procfs_inactive(void *);
int        procfs_reclaim(void *);
int        procfs_print(void *);
int        procfs_pathconf(void *);
int        procfs_getpages(void *);

static uint8_t fttodt(file_t *);
static int atoi(const char *, size_t);

/*
 * procfs vnode operations.
 */
int (**procfs_vnodeop_p)(void *);
const struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
        { &vop_default_desc, vn_default_error },
        { &vop_parsepath_desc, genfs_parsepath },        /* parsepath */
        { &vop_lookup_desc, procfs_lookup },                /* lookup */
        { &vop_create_desc, genfs_eopnotsupp },                /* create */
        { &vop_mknod_desc, genfs_eopnotsupp },                /* mknod */
        { &vop_open_desc, procfs_open },                /* open */
        { &vop_close_desc, procfs_close },                /* close */
        { &vop_access_desc, procfs_access },                /* access */
        { &vop_accessx_desc, genfs_accessx },                /* accessx */
        { &vop_getattr_desc, procfs_getattr },                /* getattr */
        { &vop_setattr_desc, procfs_setattr },                /* setattr */
        { &vop_read_desc, procfs_rw },                        /* read */
        { &vop_write_desc, procfs_rw },                        /* write */
        { &vop_fallocate_desc, genfs_eopnotsupp },        /* fallocate */
        { &vop_fdiscard_desc, genfs_eopnotsupp },        /* fdiscard */
        { &vop_fcntl_desc, genfs_fcntl },                /* fcntl */
        { &vop_ioctl_desc, genfs_enoioctl },                /* ioctl */
        { &vop_poll_desc, genfs_poll },                        /* poll */
        { &vop_kqfilter_desc, genfs_kqfilter },                /* kqfilter */
        { &vop_revoke_desc, genfs_revoke },                /* revoke */
        { &vop_fsync_desc, genfs_nullop },                /* fsync */
        { &vop_seek_desc, genfs_nullop },                /* seek */
        { &vop_remove_desc, genfs_eopnotsupp },                /* remove */
        { &vop_link_desc, genfs_erofs_link },                /* link */
        { &vop_rename_desc, genfs_eopnotsupp },                /* rename */
        { &vop_mkdir_desc, genfs_eopnotsupp },                /* mkdir */
        { &vop_rmdir_desc, genfs_eopnotsupp },                /* rmdir */
        { &vop_symlink_desc, genfs_erofs_symlink },        /* symlink */
        { &vop_readdir_desc, procfs_readdir },                /* readdir */
        { &vop_readlink_desc, procfs_readlink },        /* readlink */
        { &vop_abortop_desc, genfs_abortop },                /* abortop */
        { &vop_inactive_desc, procfs_inactive },        /* inactive */
        { &vop_reclaim_desc, procfs_reclaim },                /* reclaim */
        { &vop_lock_desc, genfs_lock },                        /* lock */
        { &vop_unlock_desc, genfs_unlock },                /* unlock */
        { &vop_bmap_desc, genfs_eopnotsupp },                /* bmap */
        { &vop_strategy_desc, genfs_badop },                /* strategy */
        { &vop_print_desc, procfs_print },                /* print */
        { &vop_islocked_desc, genfs_islocked },                /* islocked */
        { &vop_pathconf_desc, procfs_pathconf },        /* pathconf */
        { &vop_advlock_desc, genfs_einval },                /* advlock */
        { &vop_getpages_desc, procfs_getpages },        /* getpages */
        { &vop_putpages_desc, genfs_null_putpages },        /* putpages */
        { NULL, NULL }
};
const struct vnodeopv_desc procfs_vnodeop_opv_desc =
        { &procfs_vnodeop_p, procfs_vnodeop_entries };
/*
 * set things up for doing i/o on
 * the pfsnode (vp).  (vp) is locked
 * on entry, and should be left locked
 * on exit.
 *
 * for procfs we don't need to do anything
 * in particular for i/o.  all that is done
 * is to support exclusive open on process
 * memory images.
 */
int
procfs_open(void *v)
{
        struct vop_open_args /* {
                struct vnode *a_vp;
                int  a_mode;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct pfsnode *pfs = VTOPFS(vp);
        struct lwp *l1;
        struct proc *p2;
        int error;

        if ((error =
             procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p2, ENOENT)) != 0)
                return error;

        l1 = curlwp;                                /* tracer */

#define        M2K(m)        (((m) & FREAD) && ((m) & FWRITE) ? \
                 KAUTH_REQ_PROCESS_PROCFS_RW : \
                 (m) & FWRITE ? KAUTH_REQ_PROCESS_PROCFS_WRITE : \
                 KAUTH_REQ_PROCESS_PROCFS_READ)

        mutex_enter(p2->p_lock);
        error = kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_PROCFS,
            p2, pfs, KAUTH_ARG(M2K(ap->a_mode)), NULL);
        mutex_exit(p2->p_lock);
        if (error) {
                procfs_proc_unlock(p2);
                return (error);
        }

#undef M2K

        switch (pfs->pfs_type) {
        case PFSmem:
                if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
                    ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) {
                        error = EBUSY;
                        break;
                }

                if (!proc_isunder(p2, l1)) {
                        error = EPERM;
                        break;
                }

                if (ap->a_mode & FWRITE)
                        pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);

                break;

        case PFSregs:
        case PFSfpregs:
                if (!proc_isunder(p2, l1)) {
                        error = EPERM;
                        break;
                }
                break;

        default:
                break;
        }

        procfs_proc_unlock(p2);
        return (error);
}

/*
 * close the pfsnode (vp) after doing i/o.
 * (vp) is not locked on entry or exit.
 *
 * nothing to do for procfs other than undo
 * any exclusive open flag (see _open above).
 */
int
procfs_close(void *v)
{
        struct vop_close_args /* {
                struct vnode *a_vp;
                int  a_fflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct pfsnode *pfs = VTOPFS(ap->a_vp);

        switch (pfs->pfs_type) {
        case PFSmem:
                if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
                        pfs->pfs_flags &= ~(FWRITE|O_EXCL);
                break;

        default:
                break;
        }

        return (0);
}

/*
 * _inactive is called when the pfsnode
 * is vrele'd and the reference count goes
 * to zero.  (vp) will be on the vnode free
 * list, so to get it back vget() must be
 * used.
 *
 * (vp) is locked on entry, but must be unlocked on exit.
 */
int
procfs_inactive(void *v)
{
        struct vop_inactive_v2_args /* {
                struct vnode *a_vp;
                bool *a_recycle;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct pfsnode *pfs = VTOPFS(vp);

        mutex_enter(&proc_lock);
        *ap->a_recycle = (procfs_proc_find(vp->v_mount, pfs->pfs_pid) == NULL);
        mutex_exit(&proc_lock);

        return (0);
}

/*
 * _reclaim is called when getnewvnode()
 * wants to make use of an entry on the vnode
 * free list.  at this time the filesystem needs
 * to free any private data and remove the node
 * from any private lists.
 */
int
procfs_reclaim(void *v)
{
        struct vop_reclaim_v2_args /* {
                struct vnode *a_vp;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct pfsnode *pfs = VTOPFS(vp);

        VOP_UNLOCK(vp);

        /*
         * To interlock with procfs_revoke_vnodes().
         */
        mutex_enter(vp->v_interlock);
        vp->v_data = NULL;
        mutex_exit(vp->v_interlock);
        kmem_free(pfs, sizeof(*pfs));
        return 0;
}

/*
 * Return POSIX pathconf information applicable to special devices.
 */
int
procfs_pathconf(void *v)
{
        struct vop_pathconf_args /* {
                struct vnode *a_vp;
                int a_name;
                register_t *a_retval;
        } */ *ap = v;

        switch (ap->a_name) {
        case _PC_LINK_MAX:
                *ap->a_retval = LINK_MAX;
                return (0);
        case _PC_MAX_CANON:
                *ap->a_retval = MAX_CANON;
                return (0);
        case _PC_MAX_INPUT:
                *ap->a_retval = MAX_INPUT;
                return (0);
        case _PC_PIPE_BUF:
                *ap->a_retval = PIPE_BUF;
                return (0);
        case _PC_CHOWN_RESTRICTED:
                *ap->a_retval = 1;
                return (0);
        case _PC_VDISABLE:
                *ap->a_retval = _POSIX_VDISABLE;
                return (0);
        case _PC_SYNC_IO:
                *ap->a_retval = 1;
                return (0);
        default:
                return genfs_pathconf(ap);
        }
        /* NOTREACHED */
}

/*
 * _print is used for debugging.
 * just print a readable description
 * of (vp).
 */
int
procfs_print(void *v)
{
        struct vop_print_args /* {
                struct vnode *a_vp;
        } */ *ap = v;
        struct pfsnode *pfs = VTOPFS(ap->a_vp);

        printf("tag VT_PROCFS, type %d, pid %d, mode %x, flags %lx\n",
            pfs->pfs_type, pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
        return 0;
}

/*
 * Works out the path to the target process's current
 * working directory or chroot.  If the caller is in a chroot and
 * can't "reach" the target's cwd or root (or some other error
 * occurs), a "/" is returned for the path.
 */
static void
procfs_dir(pfstype t, struct lwp *caller, struct proc *target, char **bpp,
    char *path, size_t len)
{
        struct cwdinfo *cwdi;
        struct vnode *vp, *rvp;
        char *bp;

        /*
         * Lock target cwdi and take a reference to the vnode
         * we are interested in to prevent it from disappearing
         * before getcwd_common() below.
         */
        rw_enter(&target->p_cwdi->cwdi_lock, RW_READER);
        switch (t) {
        case PFScwd:
                vp = target->p_cwdi->cwdi_cdir;
                break;
        case PFSchroot:
                vp = target->p_cwdi->cwdi_rdir;
                break;
        default:
                rw_exit(&target->p_cwdi->cwdi_lock);
                return;
        }
        if (vp != NULL)
                vref(vp);
        rw_exit(&target->p_cwdi->cwdi_lock);

        cwdi = caller->l_proc->p_cwdi;
        rw_enter(&cwdi->cwdi_lock, RW_READER);

        rvp = cwdi->cwdi_rdir;
        bp = bpp ? *bpp : NULL;

        /*
         * XXX: this horrible kludge avoids locking panics when
         * attempting to lookup links that point to within procfs
         */
        if (vp != NULL && vp->v_tag == VT_PROCFS) {
                if (bpp) {
                        *--bp = '/';
                        *bpp = bp;
                }
                vrele(vp);
                rw_exit(&cwdi->cwdi_lock);
                return;
        }

        if (rvp == NULL)
                rvp = rootvnode;
        if (vp == NULL || getcwd_common(vp, rvp, bp ? &bp : NULL, path,
            len / 2, 0, caller) != 0) {
                if (bpp) {
                        bp = *bpp;
                        *--bp = '/';
                }
        }

        if (bpp)
                *bpp = bp;

        if (vp != NULL)
                vrele(vp);
        rw_exit(&cwdi->cwdi_lock);
}

/*
 * Invent attributes for pfsnode (vp) and store
 * them in (vap).
 * Directories lengths are returned as zero since
 * any real length would require the genuine size
 * to be computed, and nothing cares anyway.
 *
 * this is relatively minimal for procfs.
 */
int
procfs_getattr(void *v)
{
        struct vop_getattr_args /* {
                struct vnode *a_vp;
                struct vattr *a_vap;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct pfsnode *pfs = VTOPFS(vp);
        struct vattr *vap = ap->a_vap;
        struct proc *procp;
        char *path, *bp, bf[16];
        int error;

        /* first check the process still exists */
        switch (pfs->pfs_type) {
        case PFSroot:
        case PFScurproc:
        case PFSself:
                procp = NULL;
                break;

        default:
                error =
                    procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &procp, ENOENT);
                if (error != 0)
                        return (error);
                break;
        }

        switch (pfs->pfs_type) {
        case PFStask:
                if (pfs->pfs_fd == -1) {
                        path = NULL;
                        break;
                }
                /*FALLTHROUGH*/
        case PFScwd:
        case PFSchroot:
                path = malloc(MAXPATHLEN + 4, M_TEMP, M_WAITOK);
                if (path == NULL && procp != NULL) {
                        procfs_proc_unlock(procp);
                        return (ENOMEM);
                }
                break;

        default:
                path = NULL;
                break;
        }

        if (procp != NULL) {
                mutex_enter(procp->p_lock);
                error = kauth_authorize_process(kauth_cred_get(),
                    KAUTH_PROCESS_CANSEE, procp,
                    KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
                mutex_exit(procp->p_lock);
                if (error != 0) {
                            procfs_proc_unlock(procp);
                            if (path != NULL)
                                    free(path, M_TEMP);
                        return (ENOENT);
                }
        }

        error = 0;

        /* start by zeroing out the attributes */
        vattr_null(vap);

        /* next do all the common fields */
        vap->va_type = ap->a_vp->v_type;
        vap->va_mode = pfs->pfs_mode;
        vap->va_fileid = pfs->pfs_fileno;
        vap->va_flags = 0;
        vap->va_blocksize = PAGE_SIZE;

        /*
         * Make all times be current TOD.
         *
         * It would be possible to get the process start
         * time from the p_stats structure, but there's
         * no "file creation" time stamp anyway, and the
         * p_stats structure is not addressable if u. gets
         * swapped out for that process.
         */
        getnanotime(&vap->va_ctime);
        vap->va_atime = vap->va_mtime = vap->va_ctime;
        if (procp)
                TIMEVAL_TO_TIMESPEC(&procp->p_stats->p_start,
                    &vap->va_birthtime);
        else
                getnanotime(&vap->va_birthtime);

        switch (pfs->pfs_type) {
        case PFSmem:
        case PFSregs:
        case PFSfpregs:
#if defined(__HAVE_PROCFS_MACHDEP) && defined(PROCFS_MACHDEP_PROTECT_CASES)
        PROCFS_MACHDEP_PROTECT_CASES
#endif
                /*
                 * If the process has exercised some setuid or setgid
                 * privilege, then rip away read/write permission so
                 * that only root can gain access.
                 */
                if (procp->p_flag & PK_SUGID)
                        vap->va_mode &= ~(S_IRUSR|S_IWUSR);
                /* FALLTHROUGH */
        case PFSstatus:
        case PFSstat:
        case PFSnote:
        case PFSnotepg:
        case PFScmdline:
        case PFSenviron:
        case PFSemul:
        case PFSstatm:

        case PFSmap:
        case PFSmaps:
        case PFSlimit:
        case PFSauxv:
                vap->va_nlink = 1;
                vap->va_uid = kauth_cred_geteuid(procp->p_cred);
                vap->va_gid = kauth_cred_getegid(procp->p_cred);
                break;
        case PFScwd:
        case PFSchroot:
        case PFSmeminfo:
        case PFSdevices:
        case PFScpuinfo:
        case PFSuptime:
        case PFSmounts:
        case PFScpustat:
        case PFSloadavg:
        case PFSversion:
        case PFSexe:
        case PFSself:
        case PFScurproc:
        case PFSroot:
                vap->va_nlink = 1;
                vap->va_uid = vap->va_gid = 0;
                break;

        case PFSproc:
        case PFStask:
        case PFSfile:
        case PFSfd:
                break;

        default:
                panic("%s: %d/1", __func__, pfs->pfs_type);
        }

        /*
         * now do the object specific fields
         *
         * The size could be set from struct reg, but it's hardly
         * worth the trouble, and it puts some (potentially) machine
         * dependent data into this machine-independent code.  If it
         * becomes important then this function should break out into
         * a per-file stat function in the corresponding .c file.
         */

        switch (pfs->pfs_type) {
        case PFSroot:
                vap->va_bytes = vap->va_size = DEV_BSIZE;
                break;

        case PFSself:
        case PFScurproc:
                vap->va_bytes = vap->va_size =
                    snprintf(bf, sizeof(bf), "%ld", (long)curproc->p_pid);
                break;
        case PFStask:
                if (pfs->pfs_fd != -1) {
                        vap->va_nlink = 1;
                        vap->va_uid = 0;
                        vap->va_gid = 0;
                        vap->va_bytes = vap->va_size =
                            snprintf(bf, sizeof(bf), "..");
                        break;
                }
                /*FALLTHROUGH*/
        case PFSfd:
                if (pfs->pfs_fd != -1) {
                        file_t *fp;

                        fp = fd_getfile2(procp, pfs->pfs_fd);
                        if (fp == NULL) {
                                error = EBADF;
                                break;
                        }
                        vap->va_nlink = 1;
                        vap->va_uid = kauth_cred_geteuid(fp->f_cred);
                        vap->va_gid = kauth_cred_getegid(fp->f_cred);
                        switch (fp->f_type) {
                        case DTYPE_VNODE:
                                vap->va_bytes = vap->va_size =
                                    fp->f_vnode->v_size;
                                break;
                        default:
                                vap->va_bytes = vap->va_size = 0;
                                break;
                        }
                        closef(fp);
                        break;
                }
                /*FALLTHROUGH*/
        case PFSproc:
                vap->va_nlink = 2;
                vap->va_uid = kauth_cred_geteuid(procp->p_cred);
                vap->va_gid = kauth_cred_getegid(procp->p_cred);
                vap->va_bytes = vap->va_size = DEV_BSIZE;
                break;

        case PFSfile:
                error = EOPNOTSUPP;
                break;

        case PFSmem:
                vap->va_bytes = vap->va_size =
                        ctob(procp->p_vmspace->vm_tsize +
                                    procp->p_vmspace->vm_dsize +
                                    procp->p_vmspace->vm_ssize);
                break;

        case PFSauxv:
                vap->va_bytes = vap->va_size = procp->p_execsw->es_arglen;
                break;

#if defined(PT_GETREGS) || defined(PT_SETREGS)
        case PFSregs:
                vap->va_bytes = vap->va_size = sizeof(struct reg);
                break;
#endif

#if defined(PT_GETFPREGS) || defined(PT_SETFPREGS)
        case PFSfpregs:
                vap->va_bytes = vap->va_size = sizeof(struct fpreg);
                break;
#endif

        case PFSstatus:
        case PFSstat:
        case PFSnote:
        case PFSnotepg:
        case PFScmdline:
        case PFSenviron:
        case PFSmeminfo:
        case PFSdevices:
        case PFScpuinfo:
        case PFSuptime:
        case PFSmounts:
        case PFScpustat:
        case PFSloadavg:
        case PFSstatm:
        case PFSversion:
                vap->va_bytes = vap->va_size = 0;
                break;
        case PFSlimit:
        case PFSmap:
        case PFSmaps:
                /*
                 * Advise a larger blocksize for the map files, so that
                 * they may be read in one pass.
                 */
                vap->va_blocksize = 4 * PAGE_SIZE;
                vap->va_bytes = vap->va_size = 0;
                break;

        case PFScwd:
        case PFSchroot:
                bp = path + MAXPATHLEN;
                *--bp = '\0';
                procfs_dir(pfs->pfs_type, curlwp, procp, &bp, path,
                     MAXPATHLEN);
                vap->va_bytes = vap->va_size = strlen(bp);
                break;

        case PFSexe:
                vap->va_bytes = vap->va_size = strlen(procp->p_path);
                break;

        case PFSemul:
                vap->va_bytes = vap->va_size = strlen(procp->p_emul->e_name);
                break;

#ifdef __HAVE_PROCFS_MACHDEP
        PROCFS_MACHDEP_NODETYPE_CASES
                error = procfs_machdep_getattr(ap->a_vp, vap, procp);
                break;
#endif

        default:
                panic("%s: %d/2", __func__, pfs->pfs_type);
        }

        if (procp != NULL)
                procfs_proc_unlock(procp);
        if (path != NULL)
                free(path, M_TEMP);

        return (error);
}

/*ARGSUSED*/
int
procfs_setattr(void *v)
{
        /*
         * just fake out attribute setting
         * it's not good to generate an error
         * return, otherwise things like creat()
         * will fail when they try to set the
         * file length to 0.  worse, this means
         * that echo $note > /proc/$pid/note will fail.
         */

        return (0);
}

/*
 * implement access checking.
 *
 * actually, the check for super-user is slightly
 * broken since it will allow read access to write-only
 * objects.  this doesn't cause any particular trouble
 * but does mean that the i/o entry points need to check
 * that the operation really does make sense.
 */
int
procfs_access(void *v)
{
        struct vop_access_args /* {
                struct vnode *a_vp;
                accmode_t a_accmode;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vattr va;
        int error;

        if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred)) != 0)
                return (error);

        return kauth_authorize_vnode(ap->a_cred,
            KAUTH_ACCESS_ACTION(ap->a_accmode, ap->a_vp->v_type, va.va_mode),
            ap->a_vp, NULL, genfs_can_access(ap->a_vp, ap->a_cred,
            va.va_uid, va.va_gid, va.va_mode, NULL, ap->a_accmode));
}

/*
 * lookup.  this is incredibly complicated in the
 * general case, however for most pseudo-filesystems
 * very little needs to be done.
 *
 * Locking isn't hard here, just poorly documented.
 *
 * If we're looking up ".", just vref the parent & return it.
 *
 * If we're looking up "..", unlock the parent, and lock "..". If everything
 * went ok, and we're on the last component and the caller requested the
 * parent locked, try to re-lock the parent. We do this to prevent lock
 * races.
 *
 * For anything else, get the needed node. Then unlock the parent if not
 * the last component or not LOCKPARENT (i.e. if we wouldn't re-lock the
 * parent in the .. case).
 *
 * We try to exit with the parent locked in error cases.
 */
int
procfs_lookup(void *v)
{
        struct vop_lookup_v2_args /* {
                struct vnode * a_dvp;
                struct vnode ** a_vpp;
                struct componentname * a_cnp;
        } */ *ap = v;
        struct componentname *cnp = ap->a_cnp;
        struct vnode **vpp = ap->a_vpp;
        struct vnode *dvp = ap->a_dvp;
        const char *pname = cnp->cn_nameptr;
        const struct proc_target *pt = NULL;
        struct vnode *fvp;
        pid_t pid, vnpid;
        struct pfsnode *pfs;
        struct proc *p = NULL;
        struct lwp *plwp;
        int i, error;
        pfstype type;

        *vpp = NULL;

        if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred)) != 0)
                return (error);

        if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
                return (EROFS);

        if (cnp->cn_namelen == 1 && *pname == '.') {
                *vpp = dvp;
                vref(dvp);
                return (0);
        }

        pfs = VTOPFS(dvp);
        switch (pfs->pfs_type) {
        case PFSroot:
                /*
                 * Shouldn't get here with .. in the root node.
                 */
                if (cnp->cn_flags & ISDOTDOT)
                        return (EIO);

                for (i = 0; i < nproc_root_targets; i++) {
                        pt = &proc_root_targets[i];
                        /*
                         * check for node match.  proc is always NULL here,
                         * so call pt_valid with constant NULL lwp.
                         */
                        if (cnp->cn_namelen == pt->pt_namlen &&
                            memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
                            (pt->pt_valid == NULL ||
                             (*pt->pt_valid)(NULL, dvp->v_mount)))
                                break;
                }

                if (i != nproc_root_targets) {
                        error = procfs_allocvp(dvp->v_mount, vpp, 0,
                            pt->pt_pfstype, -1);
                        return (error);
                }

                if (CNEQ(cnp, "curproc", 7)) {
                        pid = curproc->p_pid;
                        vnpid = 0;
                        type = PFScurproc;
                } else if (CNEQ(cnp, "self", 4)) {
                        pid = curproc->p_pid;
                        vnpid = 0;
                        type = PFSself;
                } else {
                        pid = (pid_t)atoi(pname, cnp->cn_namelen);
                        vnpid = pid;
                        type = PFSproc;
                }

                if (procfs_proc_lock(dvp->v_mount, pid, &p, ESRCH) != 0)
                        break;
                error = procfs_allocvp(dvp->v_mount, vpp, vnpid, type, -1);
                procfs_proc_unlock(p);
                return (error);

        case PFSproc:
                if (cnp->cn_flags & ISDOTDOT) {
                        error = procfs_allocvp(dvp->v_mount, vpp, 0, PFSroot,
                            -1);
                        return (error);
                }

                if (procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p,
                                     ESRCH) != 0)
                        break;

                mutex_enter(p->p_lock);
                LIST_FOREACH(plwp, &p->p_lwps, l_sibling) {
                        if (plwp->l_stat != LSZOMB)
                                break;
                }
                /* Process is exiting if no-LWPS or all LWPs are LSZOMB */
                if (plwp == NULL) {
                        mutex_exit(p->p_lock);
                        procfs_proc_unlock(p);
                        return ESRCH;
                }

                lwp_addref(plwp);
                mutex_exit(p->p_lock);

                for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
                        int found;

                        found = cnp->cn_namelen == pt->pt_namlen &&
                            memcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
                            (pt->pt_valid == NULL
                              || (*pt->pt_valid)(plwp, dvp->v_mount));
                        if (found)
                                break;
                }
                lwp_delref(plwp);

                if (i == nproc_targets) {
                        procfs_proc_unlock(p);
                        break;
                }
                if (pt->pt_pfstype == PFSfile) {
                        fvp = p->p_textvp;
                        /* We already checked that it exists. */
                        vref(fvp);
                        procfs_proc_unlock(p);
                        *vpp = fvp;
                        return (0);
                }

                error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
                    pt->pt_pfstype, -1);
                procfs_proc_unlock(p);
                return (error);

        case PFSfd: {
                int fd;
                file_t *fp;

                if ((error = procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p,
                                              ENOENT)) != 0)
                        return error;

                if (cnp->cn_flags & ISDOTDOT) {
                        error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
                            PFSproc, -1);
                        procfs_proc_unlock(p);
                        return (error);
                }
                fd = atoi(pname, cnp->cn_namelen);

                fp = fd_getfile2(p, fd);
                if (fp == NULL) {
                        procfs_proc_unlock(p);
                        return ENOENT;
                }
                fvp = fp->f_vnode;

                /* Don't show directories */
                if (fp->f_type == DTYPE_VNODE && fvp->v_type != VDIR &&
                    !procfs_proc_is_linux_compat()) {
                        vref(fvp);
                        closef(fp);
                        procfs_proc_unlock(p);
                        *vpp = fvp;
                        return 0;
                }

                closef(fp);
                error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
                    PFSfd, fd);
                procfs_proc_unlock(p);
                return error;
        }
        case PFStask: {
                int xpid;

                if ((error = procfs_proc_lock(dvp->v_mount, pfs->pfs_pid, &p,
                                              ENOENT)) != 0)
                        return error;

                if (cnp->cn_flags & ISDOTDOT) {
                        error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
                            PFSproc, -1);
                        procfs_proc_unlock(p);
                        return (error);
                }
                xpid = atoi(pname, cnp->cn_namelen);

                if (xpid != pfs->pfs_pid) {
                        procfs_proc_unlock(p);
                        return ENOENT;
                }
                error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
                    PFStask, 0);
                procfs_proc_unlock(p);
                return error;
        }
        default:
                return (ENOTDIR);
        }

        return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
}

int
procfs_validfile(struct lwp *l, struct mount *mp)
{
        return l != NULL && l->l_proc != NULL && l->l_proc->p_textvp != NULL;
}

static int
procfs_validfile_linux(struct lwp *l, struct mount *mp)
{
        return procfs_use_linux_compat(mp) &&
            (l == NULL || l->l_proc == NULL || procfs_validfile(l, mp));
}

struct procfs_root_readdir_ctx {
        struct uio *uiop;
        off_t *cookies;
        int ncookies;
        off_t off;
        off_t startoff;
        int error;
};

static int
procfs_root_readdir_callback(struct proc *p, void *arg)
{
        struct procfs_root_readdir_ctx *ctxp = arg;
        struct dirent d;
        struct uio *uiop;
        int error;

        uiop = ctxp->uiop;
        if (uiop->uio_resid < UIO_MX)
                return -1; /* no space */

        if (kauth_authorize_process(kauth_cred_get(),
            KAUTH_PROCESS_CANSEE, p,
            KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL) != 0)
                return 0;

        if (ctxp->off < ctxp->startoff) {
                ctxp->off++;
                return 0;
        }

        memset(&d, 0, UIO_MX);
        d.d_reclen = UIO_MX;
        d.d_fileno = PROCFS_FILENO(p->p_pid, PFSproc, -1);
        d.d_namlen = snprintf(d.d_name,
            UIO_MX - offsetof(struct dirent, d_name), "%ld", (long)p->p_pid);
        d.d_type = DT_DIR;

        mutex_exit(&proc_lock);
        error = uiomove(&d, UIO_MX, uiop);
        mutex_enter(&proc_lock);
        if (error) {
                ctxp->error = error;
                return -1;
        }

        ctxp->ncookies++;
        if (ctxp->cookies)
                *(ctxp->cookies)++ = ctxp->off + 1;
        ctxp->off++;

        return 0;
}

/*
 * readdir returns directory entries from pfsnode (vp).
 *
 * the strategy here with procfs is to generate a single
 * directory entry at a time (struct dirent) and then
 * copy that out to userland using uiomove.  a more efficient
 * though more complex implementation, would try to minimize
 * the number of calls to uiomove().  for procfs, this is
 * hardly worth the added code complexity.
 *
 * this should just be done through read()
 */
int
procfs_readdir(void *v)
{
        struct vop_readdir_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                kauth_cred_t a_cred;
                int *a_eofflag;
                off_t **a_cookies;
                int *a_ncookies;
        } */ *ap = v;
        struct uio *uio = ap->a_uio;
        struct dirent d;
        struct pfsnode *pfs;
        off_t i;
        int error;
        off_t *cookies = NULL;
        int ncookies;
        struct vnode *vp;
        const struct proc_target *pt;
        struct procfs_root_readdir_ctx ctx;
        struct proc *p = NULL;
        struct lwp *l;
        int nfd;
        int nc = 0;

        vp = ap->a_vp;
        pfs = VTOPFS(vp);

        if (uio->uio_resid < UIO_MX)
                return (EINVAL);
        if (uio->uio_offset < 0)
                return (EINVAL);

        error = 0;
        i = uio->uio_offset;
        memset(&d, 0, UIO_MX);
        d.d_reclen = UIO_MX;
        ncookies = uio->uio_resid / UIO_MX;

        switch (pfs->pfs_type) {
        /*
         * this is for the process-specific sub-directories.
         * all that is needed to is copy out all the entries
         * from the procent[] table (top of this file).
         */
        case PFSproc: {

                if (i >= nproc_targets)
                        return 0;

                if (procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, ESRCH) != 0)
                        break;

                if (ap->a_ncookies) {
                        ncookies = uimin(ncookies, (nproc_targets - i));
                        cookies = malloc(ncookies * sizeof (off_t),
                            M_TEMP, M_WAITOK);
                        *ap->a_cookies = cookies;
                }

                for (pt = &proc_targets[i];
                     uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) {
                        if (pt->pt_valid) {
                                /* XXXSMP LWP can disappear */
                                mutex_enter(p->p_lock);
                                l = LIST_FIRST(&p->p_lwps);
                                KASSERT(l != NULL);
                                mutex_exit(p->p_lock);
                                if ((*pt->pt_valid)(l, vp->v_mount) == 0)
                                        continue;
                        }

                        d.d_fileno = PROCFS_FILENO(pfs->pfs_pid,
                            pt->pt_pfstype, -1);
                        d.d_namlen = pt->pt_namlen;
                        memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
                        d.d_type = pt->pt_type;

                        if ((error = uiomove(&d, UIO_MX, uio)) != 0)
                                break;
                        if (cookies)
                                *cookies++ = i + 1;
                }

                procfs_proc_unlock(p);
                    break;
        }
        case PFSfd: {
                file_t *fp;
                int lim;

                if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p,
                                              ESRCH)) != 0)
                        return error;

                /* XXX Should this be by file as well? */
                if (kauth_authorize_process(kauth_cred_get(),
                    KAUTH_PROCESS_CANSEE, p,
                    KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_OPENFILES), NULL,
                    NULL) != 0) {
                            procfs_proc_unlock(p);
                        return ESRCH;
                }

                nfd = atomic_load_consume(&p->p_fd->fd_dt)->dt_nfiles;

                lim = uimin((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
                if (i >= lim) {
                            procfs_proc_unlock(p);
                        return 0;
                }

                if (ap->a_ncookies) {
                        ncookies = uimin(ncookies, (nfd + 2 - i));
                        cookies = malloc(ncookies * sizeof (off_t),
                            M_TEMP, M_WAITOK);
                        *ap->a_cookies = cookies;
                }

                for (; i < 2 && uio->uio_resid >= UIO_MX; i++) {
                        pt = &proc_targets[i];
                        d.d_namlen = pt->pt_namlen;
                        d.d_fileno = PROCFS_FILENO(pfs->pfs_pid,
                            pt->pt_pfstype, -1);
                        (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
                        d.d_type = pt->pt_type;
                        if ((error = uiomove(&d, UIO_MX, uio)) != 0)
                                break;
                        if (cookies)
                                *cookies++ = i + 1;
                        nc++;
                }
                if (error)
                        goto out;
                for (; uio->uio_resid >= UIO_MX && i < nfd; i++) {
                        /* check the descriptor exists */
                        if ((fp = fd_getfile2(p, i - 2)) == NULL)
                                continue;
                        closef(fp);

                        d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, PFSfd, i - 2);
                        d.d_namlen = snprintf(d.d_name, sizeof(d.d_name),
                            "%lld", (long long)(i - 2));
                        d.d_type = fttodt(fp);
                        if ((error = uiomove(&d, UIO_MX, uio)) != 0)
                                break;
                        if (cookies)
                                *cookies++ = i + 1;
                        nc++;
                }
                goto out;
        }
        case PFStask: {

                if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p,
                                              ESRCH)) != 0)
                        return error;

                nfd = 3;        /* ., .., pid */

                if (ap->a_ncookies) {
                        ncookies = uimin(ncookies, (nfd + 2 - i));
                        cookies = malloc(ncookies * sizeof (off_t),
                            M_TEMP, M_WAITOK);
                        *ap->a_cookies = cookies;
                }

                for (; i < 2 && uio->uio_resid >= UIO_MX; i++) {
                        pt = &proc_targets[i];
                        d.d_namlen = pt->pt_namlen;
                        d.d_fileno = PROCFS_FILENO(pfs->pfs_pid,
                            pt->pt_pfstype, -1);
                        (void)memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
                        d.d_type = pt->pt_type;
                        if ((error = uiomove(&d, UIO_MX, uio)) != 0)
                                break;
                        if (cookies)
                                *cookies++ = i + 1;
                        nc++;
                }
                if (error)
                        goto out;
                for (; uio->uio_resid >= UIO_MX && i < nfd; i++) {
                        /* check the descriptor exists */
                        d.d_fileno = PROCFS_FILENO(pfs->pfs_pid, PFStask,
                            i - 2);
                        d.d_namlen = snprintf(d.d_name, sizeof(d.d_name),
                            "%ld", (long)pfs->pfs_pid);
                        d.d_type = DT_LNK;
                        if ((error = uiomove(&d, UIO_MX, uio)) != 0)
                                break;
                        if (cookies)
                                *cookies++ = i + 1;
                        nc++;
                }
                goto out;
        }

        /*
         * this is for the root of the procfs filesystem
         * what is needed are special entries for "curproc"
         * and "self" followed by an entry for each process
         * on allproc.
         */

        case PFSroot: {

                if (ap->a_ncookies) {
                        /*
                         * XXX Potentially allocating too much space here,
                         * but I'm lazy. This loop needs some work.
                         */
                        cookies = malloc(ncookies * sizeof (off_t),
                            M_TEMP, M_WAITOK);
                        *ap->a_cookies = cookies;
                }

                /* 0 ... 3 are static entries. */
                for (; i <= 3 && uio->uio_resid >= UIO_MX; i++) {
                        switch (i) {
                        case 0:                /* `.' */
                        case 1:                /* `..' */
                                d.d_fileno = PROCFS_FILENO(0, PFSroot, -1);
                                d.d_namlen = i + 1;
                                memcpy(d.d_name, "..", d.d_namlen);
                                d.d_name[i + 1] = '\0';
                                d.d_type = DT_DIR;
                                break;

                        case 2:
                                d.d_fileno = PROCFS_FILENO(0, PFScurproc, -1);
                                d.d_namlen = sizeof("curproc") - 1;
                                memcpy(d.d_name, "curproc", sizeof("curproc"));
                                d.d_type = DT_LNK;
                                break;

                        case 3:
                                d.d_fileno = PROCFS_FILENO(0, PFSself, -1);
                                d.d_namlen = sizeof("self") - 1;
                                memcpy(d.d_name, "self", sizeof("self"));
                                d.d_type = DT_LNK;
                                break;
                        }

                        if ((error = uiomove(&d, UIO_MX, uio)) != 0)
                                break;
                        nc++;
                        if (cookies)
                                *cookies++ = i + 1;
                }
                if (error)
                        break;
                /* 4 ... are process entries. */
                ctx.uiop = uio;
                ctx.error = 0;
                ctx.off = 4;
                ctx.startoff = i;
                ctx.cookies = cookies;
                ctx.ncookies = nc;
                proclist_foreach_call(&allproc,
                    procfs_root_readdir_callback, &ctx);
                cookies = ctx.cookies;
                nc = ctx.ncookies;
                error = ctx.error;
                if (error)
                        break;

                /* misc entries. */
                if (i < ctx.off)
                        i = ctx.off;
                if (i >= ctx.off + nproc_root_targets)
                        break;
                error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &p, ESRCH);
                if (error)
                        break;
                for (pt = &proc_root_targets[i - ctx.off];
                    uio->uio_resid >= UIO_MX &&
                    pt < &proc_root_targets[nproc_root_targets];
                    pt++, i++) {
                        if (pt->pt_valid &&
                            (*pt->pt_valid)(NULL, vp->v_mount) == 0)
                                continue;
                        if (kauth_authorize_process(kauth_cred_get(),
                            KAUTH_PROCESS_CANSEE, p,
                            KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY),
                            NULL, NULL) != 0)
                                continue;
                        d.d_fileno = PROCFS_FILENO(0, pt->pt_pfstype, -1);
                        d.d_namlen = pt->pt_namlen;
                        memcpy(d.d_name, pt->pt_name, pt->pt_namlen + 1);
                        d.d_type = pt->pt_type;

                        if ((error = uiomove(&d, UIO_MX, uio)) != 0)
                                break;
                        nc++;
                        if (cookies)
                                *cookies++ = i + 1;
                }
out:
                KASSERT(p != NULL);
                ncookies = nc;
                procfs_proc_unlock(p);
                break;
        }

        default:
                error = ENOTDIR;
                break;
        }

        if (ap->a_ncookies) {
                if (error) {
                        if (cookies)
                                free(*ap->a_cookies, M_TEMP);
                        *ap->a_ncookies = 0;
                        *ap->a_cookies = NULL;
                } else
                        *ap->a_ncookies = ncookies;
        }
        uio->uio_offset = i;
        return (error);
}

/*
 * readlink reads the link of `curproc' and others
 */
int
procfs_readlink(void *v)
{
        struct vop_readlink_args *ap = v;
        char bf[16];                /* should be enough */
        char *bp = bf;
        char *path = NULL;
        int len = 0;
        int error = 0;
        struct vnode *vp = ap->a_vp;
        struct pfsnode *pfs = VTOPFS(vp);
        struct proc *pown = NULL;

        if (pfs->pfs_fileno == PROCFS_FILENO(0, PFScurproc, -1))
                len = snprintf(bf, sizeof(bf), "%ld", (long)curproc->p_pid);
        else if (pfs->pfs_fileno == PROCFS_FILENO(0, PFSself, -1))
                len = snprintf(bf, sizeof(bf), "%s", "curproc");
        else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFStask, 0))
                len = snprintf(bf, sizeof(bf), "..");
        else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFSexe, -1)) {
                if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown,
                                              ESRCH)) != 0)
                        return error;
                bp = pown->p_path;
                len = strlen(bp);
        } else if (pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFScwd, -1) ||
            pfs->pfs_fileno == PROCFS_FILENO(pfs->pfs_pid, PFSchroot, -1)) {
                if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown,
                                              ESRCH)) != 0)
                        return error;
                path = malloc(MAXPATHLEN + 4, M_TEMP, M_WAITOK);
                if (path == NULL) {
                        procfs_proc_unlock(pown);
                        return (ENOMEM);
                }
                bp = path + MAXPATHLEN;
                *--bp = '\0';
                procfs_dir(PROCFS_TYPE(pfs->pfs_fileno), curlwp, pown,
                    &bp, path, MAXPATHLEN);
                len = strlen(bp);
        } else {
                file_t *fp;
                struct vnode *vxp;

                if ((error = procfs_proc_lock(vp->v_mount, pfs->pfs_pid, &pown,
                                              ESRCH)) != 0)
                        return error;

                fp = fd_getfile2(pown, pfs->pfs_fd);
                if (fp == NULL) {
                        procfs_proc_unlock(pown);
                        return EBADF;
                }

                switch (fp->f_type) {
                case DTYPE_VNODE:
                        vxp = fp->f_vnode;
                        if (vxp->v_type != VDIR &&
                            !procfs_proc_is_linux_compat()) {
                                error = EINVAL;
                                break;
                        }
                        if ((path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK))
                            == NULL) {
                                error = ENOMEM;
                                break;
                        }
                        bp = path + MAXPATHLEN;
                        *--bp = '\0';

                        /*
                         * XXX: kludge to avoid locking against ourselves
                         * in getcwd()
                         */
                        if (vxp->v_tag == VT_PROCFS) {
                                *--bp = '/';
                        } else {
                                rw_enter(&curproc->p_cwdi->cwdi_lock,
                                    RW_READER);
                                vp = curproc->p_cwdi->cwdi_rdir;
                                if (vp == NULL)
                                        vp = rootvnode;
                                error = getcwd_common(vxp, vp, &bp, path,
                                    MAXPATHLEN / 2, 0, curlwp);
                                rw_exit(&curproc->p_cwdi->cwdi_lock);
                        }
                        if (error)
                                break;
                        len = strlen(bp);
                        break;

                case DTYPE_MISC:
                        len = snprintf(bf, sizeof(bf), "%s", "[misc]");
                        break;

                case DTYPE_KQUEUE:
                        len = snprintf(bf, sizeof(bf), "%s", "[kqueue]");
                        break;

                case DTYPE_SEM:
                        len = snprintf(bf, sizeof(bf), "%s", "[ksem]");
                        break;

                default:
                        error = EINVAL;
                        break;
                }        
                closef(fp);
        }

        if (error == 0)
                error = uiomove(bp, len, ap->a_uio);
        if (pown)
                procfs_proc_unlock(pown);
        if (path)
                free(path, M_TEMP);
        return error;
}

int
procfs_getpages(void *v)
{
        struct vop_getpages_args /* {
                struct vnode *a_vp;
                voff_t a_offset;
                struct vm_page **a_m;
                int *a_count;
                int a_centeridx;
                vm_prot_t a_access_type;
                int a_advice;
                int a_flags;
        } */ *ap = v;

        if ((ap->a_flags & PGO_LOCKED) == 0)
                rw_exit(ap->a_vp->v_uobj.vmobjlock);

        return (EFAULT);
}

/*
 * convert decimal ascii to int
 */
static int
atoi(const char *b, size_t len)
{
        int p = 0;

        while (len--) {
                char c = *b++;
                if (c < '0' || c > '9')
                        return -1;
                p = 10 * p + (c - '0');
        }

        return p;
}

/**
 * convert DTYPE_XXX to corresponding DT_XXX
 * matching what procfs_loadvnode() does.
 */
static uint8_t
fttodt(file_t *fp)
{
        switch (fp->f_type) {
        case DTYPE_VNODE:
                switch (fp->f_vnode->v_type) {
                case VREG:        return DT_REG;
                case VDIR:        return DT_LNK;        /* symlink */
                case VBLK:        return DT_BLK;
                case VCHR:        return DT_CHR;
                case VLNK:        return DT_LNK;
                case VSOCK:        return DT_SOCK;
                case VFIFO:        return DT_FIFO;
                default:        return DT_UNKNOWN;
                }
        case DTYPE_PIPE:        return DT_FIFO;
        case DTYPE_SOCKET:        return DT_SOCK;
        case DTYPE_KQUEUE:        /*FALLTHROUGH*/
        case DTYPE_MISC:        /*FALLTHROUGH*/
        case DTYPE_SEM:                return DT_LNK;        /* symlinks */
        default:                return DT_UNKNOWN;
        }
}




















































































































   10 










































































































































































   10 








   10 


    9 




    5 
    5 




























   10 







   10 
























































































































    5 







    5 
    5 


    1 
    5 

    5 
    5 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
/*        $NetBSD: kern_todr.c,v 1.47 2021/04/03 12:06:53 simonb Exp $        */

/*-
 * Copyright (c) 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1988 University of Utah.
 * Copyright (c) 1992, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the Systems Programming Group of the University of Utah Computer
 * Science Department and Ralph Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from: Utah Hdr: clock.c 1.18 91/01/21
 *
 *        @(#)clock.c        8.1 (Berkeley) 6/10/93
 */

#include "opt_todr.h"

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_todr.c,v 1.47 2021/04/03 12:06:53 simonb Exp $");

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/timetc.h>
#include <sys/intr.h>
#include <sys/rndsource.h>
#include <sys/mutex.h>

#include <dev/clock_subr.h>        /* hmm.. this should probably move to sys */

static int todr_gettime(todr_chip_handle_t, struct timeval *);
static int todr_settime(todr_chip_handle_t, struct timeval *);

static kmutex_t todr_mutex;
static todr_chip_handle_t todr_handle;
static bool todr_initialized;

/* The minimum reasonable RTC date before preposterousness */
#define        PREPOSTEROUS_YEARS        (2021 - POSIX_BASE_YEAR)

/*
 * todr_init:
 *        Initialize TOD clock data.
 */
void
todr_init(void)
{

        mutex_init(&todr_mutex, MUTEX_DEFAULT, IPL_NONE);
        todr_initialized = true;
}

/*
 * todr_lock:
 *        Acquire the TODR lock.
 */
void
todr_lock(void)
{

        mutex_enter(&todr_mutex);
}

/*
 * todr_unlock:
 *        Release the TODR lock.
 */
void
todr_unlock(void)
{

        mutex_exit(&todr_mutex);
}

/*
 * todr_lock_owned:
 *        Return true if the current thread owns the TODR lock.
 *        This is to be used by diagnostic assertions only.
 */
bool
todr_lock_owned(void)
{

        return mutex_owned(&todr_mutex) ? true : false;
}

/*
 * todr_attach:
 *        Attach the clock device to todr_handle.
 */
void
todr_attach(todr_chip_handle_t todr)
{

        /*
         * todr_init() is called very early in main(), but this is
         * here to catch a case where todr_attach() is called before
         * main().
         */
        KASSERT(todr_initialized);

        todr_lock();
        if (todr_handle) {
                todr_unlock();
                printf("todr_attach: TOD already configured\n");
                return;
        }
        todr_handle = todr;
        todr_unlock();
}

static bool timeset = false;

/*
 * todr_set_systime:
 *        Set up the system's time.  The "base" argument is a best-guess
 *        close-enough value to use if the TOD clock is unavailable or
 *        contains garbage.  Must be called with the TODR lock held.
 */
void
todr_set_systime(time_t base)
{
        bool badbase = false;
        bool waszero = (base == 0);
        bool goodtime = false;
        bool badrtc = false;
        struct timespec ts;
        struct timeval tv;

        KASSERT(todr_lock_owned());

        rnd_add_data(NULL, &base, sizeof(base), 0);

        if (base < 5 * SECS_PER_COMMON_YEAR) {
                struct clock_ymdhms basedate;

                /*
                 * If base is 0, assume filesystem time is just unknown
                 * instead of preposterous. Don't bark.
                 */
                if (base != 0)
                        printf("WARNING: preposterous time in file system\n");
                /* not going to use it anyway, if the chip is readable */
                basedate.dt_year = 2010;
                basedate.dt_mon = 1;
                basedate.dt_day = 1;
                basedate.dt_hour = 12;
                basedate.dt_min = 0;
                basedate.dt_sec = 0;
                base = clock_ymdhms_to_secs(&basedate);
                badbase = true;
        }

        /*
         * Some ports need to be supplied base in order to fabricate a time_t.
         */
        if (todr_handle)
                todr_handle->base_time = base;

        memset(&tv, 0, sizeof(tv));

        if ((todr_handle == NULL) ||
            (todr_gettime(todr_handle, &tv) != 0) ||
            (tv.tv_sec < (PREPOSTEROUS_YEARS * SECS_PER_COMMON_YEAR))) {

                if (todr_handle != NULL)
                        printf("WARNING: preposterous TOD clock time\n");
                else
                        printf("WARNING: no TOD clock present\n");
                badrtc = true;
        } else {
                time_t deltat = tv.tv_sec - base;

                if (deltat < 0)
                        deltat = -deltat;

                if (!badbase && deltat >= 2 * SECS_PER_DAY) {
                        
                        if (tv.tv_sec < base) {
                                /*
                                 * The clock should never go backwards
                                 * relative to filesystem time.  If it
                                 * does by more than the threshold,
                                 * believe the filesystem.
                                 */
                                printf("WARNING: clock lost %" PRId64 " days\n",
                                    deltat / SECS_PER_DAY);
                                badrtc = true;
                        } else {
                                aprint_verbose("WARNING: clock gained %" PRId64
                                    " days\n", deltat / SECS_PER_DAY);
                                goodtime = true;
                        }
                } else {
                        goodtime = true;
                }

                rnd_add_data(NULL, &tv, sizeof(tv), 0);
        }

        /* if the rtc time is bad, use the filesystem time */
        if (badrtc) {
                if (badbase) {
                        printf("WARNING: using default initial time\n");
                } else {
                        printf("WARNING: using filesystem time\n");
                }
                tv.tv_sec = base;
                tv.tv_usec = 0;
        }

        timeset = true;

        ts.tv_sec = tv.tv_sec;
        ts.tv_nsec = tv.tv_usec * 1000;
        tc_setclock(&ts);

        if (waszero || goodtime)
                return;

        printf("WARNING: CHECK AND RESET THE DATE!\n");
}

/*
 * todr_save_systime:
 *        Save the current system time back to the TOD clock.
 *        Must be called with the TODR lock held.
 */
void
todr_save_systime(void)
{
        struct timeval tv;

        KASSERT(todr_lock_owned());

        /*
         * We might have been called by boot() due to a crash early
         * on.  Don't reset the clock chip if we don't know what time
         * it is.
         */
        if (!timeset)
                return;

        getmicrotime(&tv);

        if (tv.tv_sec == 0)
                return;

        if (todr_handle)
                if (todr_settime(todr_handle, &tv) != 0)
                        printf("Cannot set TOD clock time\n");
}

/*
 * inittodr:
 *        Legacy wrapper around todr_set_systime().
 */
void
inittodr(time_t base)
{

        todr_lock();
        todr_set_systime(base);
        todr_unlock();
}

/*
 * resettodr:
 *        Legacy wrapper around todr_save_systime().
 */
void
resettodr(void)
{

        /*
         * If we're shutting down, we don't want to get stuck in case
         * someone was already fiddling with the TOD clock.
         */
        if (shutting_down) {
                if (mutex_tryenter(&todr_mutex) == 0) {
                        printf("WARNING: Cannot set TOD clock time (busy)\n");
                        return;
                }
        } else {
                todr_lock();
        }
        todr_save_systime();
        todr_unlock();
}

#ifdef        TODR_DEBUG
static void
todr_debug(const char *prefix, int rv, struct clock_ymdhms *dt,
    struct timeval *tvp)
{
        struct timeval tv_val;
        struct clock_ymdhms dt_val;

        if (dt == NULL) {
                clock_secs_to_ymdhms(tvp->tv_sec, &dt_val);
                dt = &dt_val;
        }
        if (tvp == NULL) {
                tvp = &tv_val;
                tvp->tv_sec = clock_ymdhms_to_secs(dt);
                tvp->tv_usec = 0;
        }
        printf("%s: rv = %d\n", prefix, rv);
        printf("%s: rtc_offset = %d\n", prefix, rtc_offset);
        printf("%s: %4u/%02u/%02u %02u:%02u:%02u, (wday %d) (epoch %u.%06u)\n",
            prefix,
            (unsigned)dt->dt_year, dt->dt_mon, dt->dt_day,
            dt->dt_hour, dt->dt_min, dt->dt_sec,
            dt->dt_wday, (unsigned)tvp->tv_sec, (unsigned)tvp->tv_usec);
}
#else        /* !TODR_DEBUG */
#define        todr_debug(prefix, rv, dt, tvp)
#endif        /* TODR_DEBUG */

static int
todr_wenable(todr_chip_handle_t todr, int onoff)
{

        if (todr->todr_setwen != NULL)
                return todr->todr_setwen(todr, onoff);
        
        return 0;
}

#define        ENABLE_TODR_WRITES()                                                \
do {                                                                        \
        if ((rv = todr_wenable(tch, 1)) != 0) {                                \
                printf("%s: cannot enable TODR writes\n", __func__);        \
                return rv;                                                \
        }                                                                \
} while (/*CONSTCOND*/0)

#define        DISABLE_TODR_WRITES()                                                \
do {                                                                        \
        if (todr_wenable(tch, 0) != 0)                                        \
                printf("%s: WARNING: cannot disable TODR writes\n",        \
                    __func__);                                                \
} while (/*CONSTCOND*/0)

static int
todr_gettime(todr_chip_handle_t tch, struct timeval *tvp)
{
        int rv;

        /*
         * Write-enable is used even when reading the TODR because
         * writing to registers may be required in order to do so.
         */

        if (tch->todr_gettime) {
                ENABLE_TODR_WRITES();
                rv = tch->todr_gettime(tch, tvp);
                DISABLE_TODR_WRITES();
                /*
                 * Some unconverted ports have their own references to
                 * rtc_offset.   A converted port must not do that.
                 */
                if (rv == 0)
                        tvp->tv_sec += rtc_offset * 60;
                todr_debug("TODR-GET-SECS", rv, NULL, tvp);
                return rv;
        } else if (tch->todr_gettime_ymdhms) {
                struct clock_ymdhms dt = { 0 };
                ENABLE_TODR_WRITES();
                rv = tch->todr_gettime_ymdhms(tch, &dt);
                DISABLE_TODR_WRITES();
                todr_debug("TODR-GET-YMDHMS", rv, &dt, NULL);
                if (rv)
                        return rv;

                /*
                 * Simple sanity checks.  Note that this includes a
                 * value for clocks that can return a leap second.
                 * Note that we don't support double leap seconds,
                 * since this was apparently an error/misunderstanding
                 * on the part of the ISO C committee, and can never
                 * actually occur.  If your clock issues us a double
                 * leap second, it must be broken.  Ultimately, you'd
                 * have to be trying to read time at precisely that
                 * instant to even notice, so even broken clocks will
                 * work the vast majority of the time.  In such a case
                 * it is recommended correction be applied in the
                 * clock driver.
                 */
                if (dt.dt_mon < 1 || dt.dt_mon > 12 ||
                    dt.dt_day < 1 || dt.dt_day > 31 ||
                    dt.dt_hour > 23 || dt.dt_min > 59 || dt.dt_sec > 60) {
                        return EINVAL;
                }
                tvp->tv_sec = clock_ymdhms_to_secs(&dt) + rtc_offset * 60;
                tvp->tv_usec = 0;
                return tvp->tv_sec < 0 ? EINVAL : 0;
        }

        return ENXIO;
}

static int
todr_settime(todr_chip_handle_t tch, struct timeval *tvp)
{
        int rv;

        if (tch->todr_settime) {
                struct timeval copy = *tvp;
                copy.tv_sec -= rtc_offset * 60;
                ENABLE_TODR_WRITES();
                rv = tch->todr_settime(tch, &copy);
                DISABLE_TODR_WRITES();
                todr_debug("TODR-SET-SECS", rv, NULL, tvp);
                return rv;
        } else if (tch->todr_settime_ymdhms) {
                struct clock_ymdhms dt;
                time_t sec = tvp->tv_sec - rtc_offset * 60;
                if (tvp->tv_usec >= 500000)
                        sec++;
                clock_secs_to_ymdhms(sec, &dt);
                ENABLE_TODR_WRITES();
                rv = tch->todr_settime_ymdhms(tch, &dt);
                DISABLE_TODR_WRITES();
                todr_debug("TODR-SET-YMDHMS", rv, &dt, NULL);
                return rv;
        }

        return ENXIO;
}






























































































    3 

















    1 
































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
/*        $NetBSD: msg.h,v 1.6 2019/02/21 03:37:19 mrg Exp $        */

/*
 * SVID compatible msg.h file
 *
 * Author:  Daniel Boulet
 *
 * Copyright 1993 Daniel Boulet and RTMX Inc.
 *
 * This system call was implemented by Daniel Boulet under contract from RTMX.
 *
 * Redistribution and use in source forms, with and without modification,
 * are permitted provided that this entire comment appears intact.
 *
 * Redistribution in binary form may occur without any restrictions.
 * Obviously, it would be nice if you gave credit where credit is due
 * but requiring it would be too onerous.
 *
 * This software is provided ``AS IS'' without any warranties of any kind.
 */

#ifndef _COMPAT_SYS_MSG_H_
#define _COMPAT_SYS_MSG_H_

#include <compat/sys/ipc.h>
/*
 * Old message queue data structure used before NetBSD 1.5.
 */
struct msqid_ds14 {
        struct        ipc_perm14 msg_perm;        /* msg queue permission bits */
        struct        __msg *msg_first;        /* first message in the queue */
        struct        __msg *msg_last;        /* last message in the queue */
        u_long        msg_cbytes;        /* number of bytes in use on the queue */
        u_long        msg_qnum;        /* number of msgs in the queue */
        u_long        msg_qbytes;        /* max # of bytes on the queue */
        pid_t        msg_lspid;        /* pid of last msgsnd() */
        pid_t        msg_lrpid;        /* pid of last msgrcv() */
        int32_t        msg_stime;        /* time of last msgsnd() */
        long        msg_pad1;
        int32_t        msg_rtime;        /* time of last msgrcv() */
        long        msg_pad2;
        int32_t        msg_ctime;        /* time of last msgctl() */
        long        msg_pad3;
        long        msg_pad4[4];
};

struct msqid_ds13 {
        struct ipc_perm        msg_perm;        /* operation permission strucure */
        msgqnum_t        msg_qnum;        /* number of messages in the queue */
        msglen_t        msg_qbytes;        /* max # of bytes in the queue */
        pid_t                msg_lspid;        /* process ID of last msgsend() */
        pid_t                msg_lrpid;        /* process ID of last msgrcv() */
        int32_t                msg_stime;        /* time of last msgsend() */
        int32_t                msg_rtime;        /* time of last msgrcv() */
        int32_t                msg_ctime;        /* time of last change */

        /*
         * These members are private and used only in the internal
         * implementation of this interface.
         */
        struct __msg        *_msg_first;        /* first message in the queue */
        struct __msg        *_msg_last;        /* last message in the queue */
        msglen_t        _msg_cbytes;        /* # of bytes currently in queue */
};

/* Warning: 64-bit structure padding is needed here */
struct msgid_ds_sysctl50 {
        struct                ipc_perm_sysctl msg_perm;
        uint64_t        msg_qnum;
        uint64_t        msg_qbytes;
        uint64_t        _msg_cbytes;
        pid_t                msg_lspid;
        pid_t                msg_lrpid;
        int32_t                msg_stime;
        int32_t                msg_rtime;
        int32_t                msg_ctime;
        int32_t                pad;
};
struct msg_sysctl_info50 {
        struct        msginfo msginfo;
        struct        msgid_ds_sysctl50 msgids[1];
};

__BEGIN_DECLS
static __inline void __msqid_ds14_to_native(const struct msqid_ds14 *, struct msqid_ds *);
static __inline void __native_to_msqid_ds14(const struct msqid_ds *, struct msqid_ds14 *);
static __inline void __msqid_ds13_to_native(const struct msqid_ds13 *, struct msqid_ds *);
static __inline void __native_to_msqid_ds13(const struct msqid_ds *, struct msqid_ds13 *);

static __inline void
__msqid_ds13_to_native(const struct msqid_ds13 *omsqbuf, struct msqid_ds *msqbuf)
{

        memset(msqbuf, 0, sizeof *msqbuf);
        msqbuf->msg_perm = omsqbuf->msg_perm;

#define        CVT(x)        msqbuf->x = omsqbuf->x
        CVT(msg_qnum);
        CVT(msg_qbytes);
        CVT(msg_lspid);
        CVT(msg_lrpid);
        CVT(msg_stime);
        CVT(msg_rtime);
        CVT(msg_ctime);
#undef CVT
}

static __inline void
__native_to_msqid_ds13(const struct msqid_ds *msqbuf, struct msqid_ds13 *omsqbuf)
{

        memset(omsqbuf, 0, sizeof(*omsqbuf));
        omsqbuf->msg_perm = msqbuf->msg_perm;

#define        CVT(x)        omsqbuf->x = msqbuf->x
#define        CVTI(x)        omsqbuf->x = (int)msqbuf->x
        CVT(msg_qnum);
        CVT(msg_qbytes);
        CVT(msg_lspid);
        CVT(msg_lrpid);
        CVTI(msg_stime);
        CVTI(msg_rtime);
        CVTI(msg_ctime);
#undef CVT
#undef CVTI

        /*
         * Not part of the API, but some programs might look at it.
         */
        omsqbuf->_msg_cbytes = msqbuf->_msg_cbytes;
}

static __inline void
__msqid_ds14_to_native(const struct msqid_ds14 *omsqbuf, struct msqid_ds *msqbuf)
{

        memset(msqbuf, 0, sizeof *msqbuf);
        __ipc_perm14_to_native(&omsqbuf->msg_perm, &msqbuf->msg_perm);

#define        CVT(x)        msqbuf->x = omsqbuf->x
        CVT(msg_qnum);
        CVT(msg_qbytes);
        CVT(msg_lspid);
        CVT(msg_lrpid);
        CVT(msg_stime);
        CVT(msg_rtime);
        CVT(msg_ctime);
#undef CVT
}

static __inline void
__native_to_msqid_ds14(const struct msqid_ds *msqbuf, struct msqid_ds14 *omsqbuf)
{

        memset(omsqbuf, 0, sizeof *omsqbuf);
        __native_to_ipc_perm14(&msqbuf->msg_perm, &omsqbuf->msg_perm);

#define        CVT(x)        omsqbuf->x = msqbuf->x
#define        CVTI(x)        omsqbuf->x = (int)msqbuf->x
        CVT(msg_qnum);
        CVT(msg_qbytes);
        CVT(msg_lspid);
        CVT(msg_lrpid);
        CVTI(msg_stime);
        CVTI(msg_rtime);
        CVTI(msg_ctime);
#undef CVT
#undef CVTI
}

int        __msgctl13(int, int, struct msqid_ds13 *);
int        __msgctl14(int, int, struct msqid_ds14 *);
int        __msgctl50(int, int, struct msqid_ds *);
__END_DECLS

#endif /* !_COMPAT_SYS_MSG_H_ */

































































  438 
  438 

  421 












   20 














   20 














  438 




  438 

























  442 

















  417 

  411 


  438 




  437 







   11 

















































































  438 



























  437 




  442 














  438 












    5 





  426 




  426 






































    5 

    5 







  438 
















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
/*        $NetBSD: bufq_priocscan.c,v 1.21 2017/05/04 11:03:27 kamil Exp $        */

/*-
 * Copyright (c)2004,2005,2006,2008,2009,2011,2012 YAMAMOTO Takashi,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: bufq_priocscan.c,v 1.21 2017/05/04 11:03:27 kamil Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/bufq_impl.h>
#include <sys/kmem.h>
#include <sys/rbtree.h>
#include <sys/module.h>

#undef        PRIOCSCAN_USE_GLOBAL_POSITION

/*
 * Cyclical scan (CSCAN)
 */

struct cscan_key {
        daddr_t        k_rawblkno;
        int k_cylinder;
};

struct cscan_queue {
        rb_tree_t cq_buffers;                /* ordered list of buffers */
#if !defined(PRIOCSCAN_USE_GLOBAL_POSITION)
        struct cscan_key cq_lastkey;        /* key of last request */
#endif /* !defined(PRIOCSCAN_USE_GLOBAL_POSITION) */
        int cq_sortby;                        /* BUFQ_SORT_MASK */
        rb_tree_ops_t cq_ops;
};

static signed int
buf_cmp(const struct buf *b1, const struct buf *b2, int sortby)
{

        if (buf_inorder(b2, b1, sortby)) {
                return 1;        /* b1 > b2 */
        }
        if (buf_inorder(b1, b2, sortby)) {
                return -1;        /* b1 < b2 */
        }
        return 0;
}

/* return positive if n1 > n2 */
static signed int
cscan_tree_compare_nodes(void *context, const void *n1, const void *n2)
{
        const struct cscan_queue * const q = context;
        const struct buf * const b1 = n1;
        const struct buf * const b2 = n2;
        const int sortby = q->cq_sortby;
        const int diff = buf_cmp(b1, b2, sortby);

        /*
         * XXX rawblkno/cylinder might not be unique.  eg. unbuffered i/o
         */

        if (diff != 0) {
                return diff;
        }

        /*
         * XXX rawblkno/cylinder might not be unique.  eg. unbuffered i/o
         */
        if (b1 > b2) {
                return 1;
        }
        if (b1 < b2) {
                return -1;
        }
        return 0;
}

/* return positive if n1 > k2 */
static signed int
cscan_tree_compare_key(void *context, const void *n1, const void *k2)
{
        const struct cscan_queue * const q = context;
        const struct buf * const b1 = n1;
        const struct cscan_key * const key = k2;
        const struct buf tmp = {
                .b_rawblkno = key->k_rawblkno,
                .b_cylinder = key->k_cylinder,
        };
        const struct buf *b2 = &tmp;
        const int sortby = q->cq_sortby;

        return buf_cmp(b1, b2, sortby);
}

static void __unused
cscan_dump(struct cscan_queue *cq)
{
        const int sortby = cq->cq_sortby;
        struct buf *bp;

        RB_TREE_FOREACH(bp, &cq->cq_buffers) {
                if (sortby == BUFQ_SORT_RAWBLOCK) {
                        printf(" %jd", (intmax_t)bp->b_rawblkno);
                } else {
                        printf(" %jd/%jd",
                            (intmax_t)bp->b_cylinder, (intmax_t)bp->b_rawblkno);
                }
        }
}

static inline bool
cscan_empty(struct cscan_queue *q)
{

        /* XXX this might do more work than necessary */
        return rb_tree_iterate(&q->cq_buffers, NULL, RB_DIR_LEFT) == NULL;
}

static void
cscan_put(struct cscan_queue *q, struct buf *bp)
{
        struct buf *obp __diagused;

        obp = rb_tree_insert_node(&q->cq_buffers, bp);
        KASSERT(obp == bp); /* see cscan_tree_compare_nodes */
}

static struct buf *
cscan_get(struct cscan_queue *q, int remove, struct cscan_key *key)
{
        struct buf *bp;

        bp = rb_tree_find_node_geq(&q->cq_buffers, key);
        KDASSERT(bp == NULL || cscan_tree_compare_key(q, bp, key) >= 0);
        if (bp == NULL) {
                bp = rb_tree_iterate(&q->cq_buffers, NULL, RB_DIR_LEFT);
                KDASSERT(cscan_tree_compare_key(q, bp, key) < 0);
        }
        if (bp != NULL && remove) {
#if defined(DEBUG)
                struct buf *nbp;
#endif /* defined(DEBUG) */

                rb_tree_remove_node(&q->cq_buffers, bp);
                /*
                 * remember the head position.
                 */
                key->k_cylinder = bp->b_cylinder;
                key->k_rawblkno = bp->b_rawblkno + (bp->b_bcount >> DEV_BSHIFT);
#if defined(DEBUG)
                nbp = rb_tree_find_node_geq(&q->cq_buffers, key);
                if (nbp != NULL && cscan_tree_compare_nodes(q, nbp, bp) < 0) {
                        panic("%s: wrong order %p < %p\n", __func__,
                            nbp, bp);
                }
#endif /* defined(DEBUG) */
        }
        return bp;
}

static void
cscan_init(struct cscan_queue *q, int sortby)
{
        static const rb_tree_ops_t cscan_tree_ops = {
                .rbto_compare_nodes = cscan_tree_compare_nodes,
                .rbto_compare_key = cscan_tree_compare_key,
                .rbto_node_offset = offsetof(struct buf, b_u.u_rbnode),
                .rbto_context = NULL,
        };

        q->cq_sortby = sortby;
        /* XXX copy ops to workaround rbtree.h API limitation */
        q->cq_ops = cscan_tree_ops;
        q->cq_ops.rbto_context = q;
        rb_tree_init(&q->cq_buffers, &q->cq_ops);
}

/*
 * Per-prioritiy CSCAN.
 *
 * XXX probably we should have a way to raise
 * priority of the on-queue requests.
 */
#define        PRIOCSCAN_NQUEUE        3

struct priocscan_queue {
        struct cscan_queue q_queue;
        unsigned int q_burst;
};

struct bufq_priocscan {
        struct priocscan_queue bq_queue[PRIOCSCAN_NQUEUE];

#if defined(PRIOCSCAN_USE_GLOBAL_POSITION)
        /*
         * XXX using "global" head position can reduce positioning time
         * when switching between queues.
         * although it might affect against fairness.
         */
        struct cscan_key bq_lastkey;
#endif
};

/*
 * how many requests to serve when having pending requests on other queues.
 *
 * XXX tune
 * be careful: while making these values larger likely
 * increases the total throughput, it can also increase latencies
 * for some workloads.
 */
const int priocscan_burst[] = {
        64, 16, 4
};

static void bufq_priocscan_init(struct bufq_state *);
static void bufq_priocscan_put(struct bufq_state *, struct buf *);
static struct buf *bufq_priocscan_get(struct bufq_state *, int);

BUFQ_DEFINE(priocscan, 40, bufq_priocscan_init);

static inline struct cscan_queue *bufq_priocscan_selectqueue(
    struct bufq_priocscan *, const struct buf *);

static inline struct cscan_queue *
bufq_priocscan_selectqueue(struct bufq_priocscan *q, const struct buf *bp)
{
        static const int priocscan_priomap[] = {
                [BPRIO_TIMENONCRITICAL] = 2,
                [BPRIO_TIMELIMITED] = 1,
                [BPRIO_TIMECRITICAL] = 0
        };

        return &q->bq_queue[priocscan_priomap[BIO_GETPRIO(bp)]].q_queue;
}

static void
bufq_priocscan_put(struct bufq_state *bufq, struct buf *bp)
{
        struct bufq_priocscan *q = bufq_private(bufq);
        struct cscan_queue *cq;

        cq = bufq_priocscan_selectqueue(q, bp);
        cscan_put(cq, bp);
}

static struct buf *
bufq_priocscan_get(struct bufq_state *bufq, int remove)
{
        struct bufq_priocscan *q = bufq_private(bufq);
        struct priocscan_queue *pq, *npq;
        struct priocscan_queue *first; /* highest priority non-empty queue */
        const struct priocscan_queue *epq;
        struct buf *bp;
        bool single; /* true if there's only one non-empty queue */

        /*
         * find the highest priority non-empty queue.
         */
        pq = &q->bq_queue[0];
        epq = pq + PRIOCSCAN_NQUEUE;
        for (; pq < epq; pq++) {
                if (!cscan_empty(&pq->q_queue)) {
                        break;
                }
        }
        if (pq == epq) {
                /*
                 * all our queues are empty.  there's nothing to serve.
                 */
                return NULL;
        }
        first = pq;

        /*
         * scan the rest of queues.
         *
         * if we have two or more non-empty queues, we serve the highest
         * priority one with non-zero burst count.
         */
        single = true;
        for (npq = pq + 1; npq < epq; npq++) {
                if (!cscan_empty(&npq->q_queue)) {
                        /*
                         * we found another non-empty queue.
                         * it means that a queue needs to consume its burst
                         * count to be served.
                         */
                        single = false;

                        /*
                         * check if our current candidate queue has already
                         * exhausted its burst count.
                         */
                        if (pq->q_burst > 0) {
                                break;
                        }
                        pq = npq;
                }
        }
        if (single) {
                /*
                 * there's only a non-empty queue.
                 * just serve it without consuming its burst count.
                 */
                KASSERT(pq == first);
        } else {
                /*
                 * there are two or more non-empty queues.
                 */
                if (pq->q_burst == 0) {
                        /*
                         * no queues can be served because they have already
                         * exhausted their burst count.
                         */
                        unsigned int i;
#ifdef DEBUG
                        for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
                                pq = &q->bq_queue[i];
                                if (!cscan_empty(&pq->q_queue) && pq->q_burst) {
                                        panic("%s: inconsist", __func__);
                                }
                        }
#endif /* DEBUG */
                        /*
                         * reset burst counts.
                         */
                        if (remove) {
                                for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
                                        pq = &q->bq_queue[i];
                                        pq->q_burst = priocscan_burst[i];
                                }
                        }

                        /*
                         * serve the highest priority non-empty queue.
                         */
                        pq = first;
                }
                /*
                 * consume the burst count.
                 *
                 * XXX account only by number of requests.  is it good enough?
                 */
                if (remove) {
                        KASSERT(pq->q_burst > 0);
                        pq->q_burst--;
                }
        }

        /*
         * finally, get a request from the selected queue.
         */
        KDASSERT(!cscan_empty(&pq->q_queue));
        bp = cscan_get(&pq->q_queue, remove,
#if defined(PRIOCSCAN_USE_GLOBAL_POSITION)
            &q->bq_lastkey
#else /* defined(PRIOCSCAN_USE_GLOBAL_POSITION) */
            &pq->q_queue.cq_lastkey
#endif /* defined(PRIOCSCAN_USE_GLOBAL_POSITION) */
            );
        KDASSERT(bp != NULL);
        KDASSERT(&pq->q_queue == bufq_priocscan_selectqueue(q, bp));

        return bp;
}

static struct buf *
bufq_priocscan_cancel(struct bufq_state *bufq, struct buf *bp)
{
        struct bufq_priocscan * const q = bufq_private(bufq);
        unsigned int i;

        for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
                struct cscan_queue * const cq = &q->bq_queue[i].q_queue;
                struct buf *it;

                /*
                 * XXX probably could be faster but the cancel functionality
                 * is not widely used anyway.
                 */
                RB_TREE_FOREACH(it, &cq->cq_buffers) {
                        if (it == bp) {
                                rb_tree_remove_node(&cq->cq_buffers, bp);
                                return bp;
                        }
                }
        }
        return NULL;
}

static void
bufq_priocscan_fini(struct bufq_state *bufq)
{

        KASSERT(bufq->bq_private != NULL);
        kmem_free(bufq->bq_private, sizeof(struct bufq_priocscan));
}

static void
bufq_priocscan_init(struct bufq_state *bufq)
{
        struct bufq_priocscan *q;
        const int sortby = bufq->bq_flags & BUFQ_SORT_MASK;
        unsigned int i;

        bufq->bq_get = bufq_priocscan_get;
        bufq->bq_put = bufq_priocscan_put;
        bufq->bq_cancel = bufq_priocscan_cancel;
        bufq->bq_fini = bufq_priocscan_fini;
        bufq->bq_private = kmem_zalloc(sizeof(struct bufq_priocscan), KM_SLEEP);

        q = bufq->bq_private;
        for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
                struct cscan_queue *cq = &q->bq_queue[i].q_queue;

                cscan_init(cq, sortby);
        }
}

MODULE(MODULE_CLASS_BUFQ, bufq_priocscan, NULL);

static int
bufq_priocscan_modcmd(modcmd_t cmd, void *opaque)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                return bufq_register(&bufq_strat_priocscan);
        case MODULE_CMD_FINI:
                return bufq_unregister(&bufq_strat_priocscan);
        default:
                return ENOTTY;
        }
}


































































































































































































































































  515 




   10 




    7 









    7 





















  146 

  129 
























   10 






















  516 



















  416 



    4 






    2 












   68 













  416 

















   13 

  420 




   16 

















  421 

















    9 


    6 























    9 

















  403 




  362 





  363 
    3 





    2 











  360 







  203 





    9 


    9 

































    8 







  403 










 1388 

    1 



 1387 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
/* $NetBSD: secmodel_securelevel.c,v 1.37 2020/12/05 17:33:53 thorpej Exp $ */
/*-
 * Copyright (c) 2006 Elad Efrat <elad@NetBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * This file contains kauth(9) listeners needed to implement the traditional
 * NetBSD securelevel.
 *
 * The securelevel is a system-global indication on what operations are
 * allowed or not. It affects all users, including root.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: secmodel_securelevel.c,v 1.37 2020/12/05 17:33:53 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_insecure.h"
#endif /* _KERNEL_OPT */

#include <sys/types.h>
#include <sys/param.h>
#include <sys/kauth.h>

#include <sys/conf.h>
#include <sys/mount.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <sys/module.h>

#include <miscfs/specfs/specdev.h>

#include <secmodel/secmodel.h>
#include <secmodel/securelevel/securelevel.h>

MODULE(MODULE_CLASS_SECMODEL, securelevel, NULL);

static int securelevel;

static kauth_listener_t l_system, l_process, l_network, l_machdep, l_device,
    l_vnode;

static secmodel_t securelevel_sm;

/*
 * Sysctl helper routine for securelevel. Ensures that the value only rises
 * unless the caller is init.
 */
int
secmodel_securelevel_sysctl(SYSCTLFN_ARGS)
{
        int newsecurelevel, error;
        struct sysctlnode node;

        newsecurelevel = securelevel;
        node = *rnode;
        node.sysctl_data = &newsecurelevel;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return (error);

        if ((newsecurelevel < securelevel) && (l->l_proc != initproc))
                return (EPERM);

        securelevel = newsecurelevel;

        return (error);
}

SYSCTL_SETUP(sysctl_security_securelevel_setup, "securelevel sysctl")
{
        const struct sysctlnode *rnode, *rnode2;

        sysctl_createv(clog, 0, NULL, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "models", NULL,
                       NULL, 0, NULL, 0,
                       CTL_SECURITY, CTL_CREATE, CTL_EOL);

        /* Compatibility: security.models.bsd44 */
        rnode2 = rnode;
        sysctl_createv(clog, 0, &rnode2, &rnode2,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "bsd44", NULL,
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);

        /* Compatibility: security.models.bsd44.securelevel */
        sysctl_createv(clog, 0, &rnode2, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "securelevel",
                       SYSCTL_DESCR("System security level"),
                       secmodel_securelevel_sysctl, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "securelevel", NULL,
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "name", NULL,
                       NULL, 0, __UNCONST(SECMODEL_SECURELEVEL_NAME), 0,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "securelevel",
                       SYSCTL_DESCR("System security level"),
                       secmodel_securelevel_sysctl, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);

        /* Compatibility: kern.securelevel */

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "securelevel",
                       SYSCTL_DESCR("System security level"),
                       secmodel_securelevel_sysctl, 0, NULL, 0,
                       CTL_KERN, KERN_SECURELVL, CTL_EOL);
}

void
secmodel_securelevel_init(void)
{
#ifdef INSECURE
        securelevel = -1;
#else
        securelevel = 0;
#endif /* INSECURE */
}

void
secmodel_securelevel_start(void)
{
        l_system = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
            secmodel_securelevel_system_cb, NULL);
        l_process = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
            secmodel_securelevel_process_cb, NULL);
        l_network = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
            secmodel_securelevel_network_cb, NULL);
        l_machdep = kauth_listen_scope(KAUTH_SCOPE_MACHDEP,
            secmodel_securelevel_machdep_cb, NULL);
        l_device = kauth_listen_scope(KAUTH_SCOPE_DEVICE,
            secmodel_securelevel_device_cb, NULL);
        l_vnode = kauth_listen_scope(KAUTH_SCOPE_VNODE,
            secmodel_securelevel_vnode_cb, NULL);
}

void
secmodel_securelevel_stop(void)
{
        kauth_unlisten_scope(l_system);
        kauth_unlisten_scope(l_process);
        kauth_unlisten_scope(l_network);
        kauth_unlisten_scope(l_machdep);
        kauth_unlisten_scope(l_device);
        kauth_unlisten_scope(l_vnode);
}

static int
securelevel_eval(const char *what, void *arg, void *ret)
{
        int error = 0;

        if (strcasecmp(what, "is-securelevel-above") == 0) {
                int level = (int)(uintptr_t)arg;
                bool *bp = ret;

                *bp = (securelevel > level);
        } else {
                error = ENOENT;
        }

        return error;
}

static int
securelevel_modcmd(modcmd_t cmd, void *arg)
{
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
                secmodel_securelevel_init();
                error = secmodel_register(&securelevel_sm,
                    SECMODEL_SECURELEVEL_ID, SECMODEL_SECURELEVEL_NAME,
                    NULL, securelevel_eval, NULL);
                if (error != 0)
                        printf("securelevel_modcmd::init: secmodel_register "
                            "returned %d\n", error);

                secmodel_securelevel_start();
                break;

        case MODULE_CMD_FINI:
                secmodel_securelevel_stop();

                error = secmodel_deregister(securelevel_sm);
                if (error != 0)
                        printf("securelevel_modcmd::fini: secmodel_deregister "
                            "returned %d\n", error);

                break;

        case MODULE_CMD_AUTOUNLOAD:
                error = EPERM;
                break;

        default:
                error = ENOTTY;
                break;
        }

        return (error);
}

/*
 * kauth(9) listener
 *
 * Security model: Traditional NetBSD
 * Scope: System
 * Responsibility: Securelevel
 */
int
secmodel_securelevel_system_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        int result;
        enum kauth_system_req req;

        result = KAUTH_RESULT_DEFER;
        req = (enum kauth_system_req)(uintptr_t)arg0;

        switch (action) {
        case KAUTH_SYSTEM_CHSYSFLAGS:
                /* Deprecated. */
                if (securelevel > 0)
                        result = KAUTH_RESULT_DENY;
                break;

        case KAUTH_SYSTEM_TIME:
                switch (req) {
                case KAUTH_REQ_SYSTEM_TIME_RTCOFFSET:
                        if (securelevel > 0)
                                result = KAUTH_RESULT_DENY;
                        break;

                case KAUTH_REQ_SYSTEM_TIME_SYSTEM: {
                        struct timespec *ts = arg1;
                        struct timespec *delta = arg2;

                        if (securelevel > 1 && time_wraps(ts, delta))
                                result = KAUTH_RESULT_DENY;

                        break;
                }

                default:
                        break;
                }
                break;

        case KAUTH_SYSTEM_MAP_VA_ZERO:
                if (securelevel > 0)
                        result = KAUTH_RESULT_DENY;
                break;

        case KAUTH_SYSTEM_MODULE:
                if (securelevel > 0)
                        result = KAUTH_RESULT_DENY;
                break;

        case KAUTH_SYSTEM_MOUNT:
                switch (req) {
                case KAUTH_REQ_SYSTEM_MOUNT_NEW:
                        if (securelevel > 1)
                                result = KAUTH_RESULT_DENY;

                        break;

                case KAUTH_REQ_SYSTEM_MOUNT_UPDATE:
                        if (securelevel > 1) {
                                struct mount *mp = arg1;
                                u_long flags = (u_long)arg2;

                                /* Can only degrade from read/write to read-only. */
                                if (flags != (mp->mnt_flag | MNT_RDONLY | MNT_RELOAD |
                                    MNT_FORCE | MNT_UPDATE))
                                        result = KAUTH_RESULT_DENY;
                        }

                        break;

                default:
                        break;
                }

                break;

        case KAUTH_SYSTEM_SYSCTL:
                switch (req) {
                case KAUTH_REQ_SYSTEM_SYSCTL_ADD:
                case KAUTH_REQ_SYSTEM_SYSCTL_DELETE:
                case KAUTH_REQ_SYSTEM_SYSCTL_DESC:
                        if (securelevel > 0)
                                result = KAUTH_RESULT_DENY;
                        break;

                default:
                        break;
                }
                break;

        case KAUTH_SYSTEM_SETIDCORE:
                if (securelevel > 0)
                        result = KAUTH_RESULT_DENY;
                break;

        case KAUTH_SYSTEM_DEBUG:
        default:
                break;
        }

        return (result);
}

/*
 * kauth(9) listener
 *
 * Security model: Traditional NetBSD
 * Scope: Process
 * Responsibility: Securelevel
 */
int
secmodel_securelevel_process_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        struct proc *p;
        int result;

        result = KAUTH_RESULT_DEFER;
        p = arg0;

        switch (action) {
        case KAUTH_PROCESS_PROCFS: {
                enum kauth_process_req req;

                req = (enum kauth_process_req)(uintptr_t)arg2;
                switch (req) {
                case KAUTH_REQ_PROCESS_PROCFS_READ:
                        break;

                case KAUTH_REQ_PROCESS_PROCFS_RW:
                case KAUTH_REQ_PROCESS_PROCFS_WRITE:
                        if ((p == initproc) && (securelevel > -1))
                                result = KAUTH_RESULT_DENY;

                        break;

                default:
                        break;
                }

                break;
                }

        case KAUTH_PROCESS_PTRACE:
                if ((p == initproc) && (securelevel > -1))
                        result = KAUTH_RESULT_DENY;

                break;

        case KAUTH_PROCESS_CORENAME:
                if (securelevel > 1)
                        result = KAUTH_RESULT_DENY;
                break;

        default:
                break;
        }

        return (result);
}

/*
 * kauth(9) listener
 *
 * Security model: Traditional NetBSD
 * Scope: Network
 * Responsibility: Securelevel
 */
int
secmodel_securelevel_network_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        int result;
        enum kauth_network_req req;

        result = KAUTH_RESULT_DEFER;
        req = (enum kauth_network_req)(uintptr_t)arg0;

        switch (action) {
        case KAUTH_NETWORK_FIREWALL:
                switch (req) {
                case KAUTH_REQ_NETWORK_FIREWALL_FW:
                case KAUTH_REQ_NETWORK_FIREWALL_NAT:
                        if (securelevel > 1)
                                result = KAUTH_RESULT_DENY;
                        break;

                default:
                        break;
                }
                break;

        case KAUTH_NETWORK_FORWSRCRT:
                if (securelevel > 0)
                        result = KAUTH_RESULT_DENY;
                break;

        default:
                break;
        }

        return (result);
}

/*
 * kauth(9) listener
 *
 * Security model: Traditional NetBSD
 * Scope: Machdep
 * Responsibility: Securelevel
 */
int
secmodel_securelevel_machdep_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        int result;

        result = KAUTH_RESULT_DEFER;

        switch (action) {
        case KAUTH_MACHDEP_IOPERM_SET:
        case KAUTH_MACHDEP_IOPL:
                if (securelevel > 0)
                        result = KAUTH_RESULT_DENY;
                break;

        case KAUTH_MACHDEP_UNMANAGEDMEM:
                if (securelevel > 0)
                        result = KAUTH_RESULT_DENY;
                break;

        case KAUTH_MACHDEP_SVS_DISABLE:
                /* Deprecated. */
                if (securelevel > 0)
                        result = KAUTH_RESULT_DENY;
                break;

        case KAUTH_MACHDEP_CPU_UCODE_APPLY:
                if (securelevel > 1)
                        result = KAUTH_RESULT_DENY;
                break;

        default:
                break;
        }

        return (result);
}

/*
 * kauth(9) listener
 *
 * Security model: Traditional NetBSD
 * Scope: Device
 * Responsibility: Securelevel
 */
int
secmodel_securelevel_device_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        int result;

        result = KAUTH_RESULT_DEFER;

        switch (action) {
        case KAUTH_DEVICE_RAWIO_SPEC: {
                struct vnode *vp;
                enum kauth_device_req req;

                req = (enum kauth_device_req)(uintptr_t)arg0;
                vp = arg1;

                KASSERT(vp != NULL);

                /* Handle /dev/mem and /dev/kmem. */
                if (iskmemvp(vp)) {
                        switch (req) {
                        case KAUTH_REQ_DEVICE_RAWIO_SPEC_READ:
                                break;

                        case KAUTH_REQ_DEVICE_RAWIO_SPEC_WRITE:
                        case KAUTH_REQ_DEVICE_RAWIO_SPEC_RW:
                                if (securelevel > 0)
                                        result = KAUTH_RESULT_DENY;

                                break;

                        default:
                                break;
                        }

                        break;
                }

                switch (req) {
                case KAUTH_REQ_DEVICE_RAWIO_SPEC_READ:
                        break;

                case KAUTH_REQ_DEVICE_RAWIO_SPEC_WRITE:
                case KAUTH_REQ_DEVICE_RAWIO_SPEC_RW: {
                        int error;

                        error = rawdev_mounted(vp, NULL);

                        /* Not a disk. */
                        if (error == EINVAL)
                                break;

                        if (error && securelevel > 0)
                                result = KAUTH_RESULT_DENY;

                        if (securelevel > 1)
                                result = KAUTH_RESULT_DENY;

                        break;
                        }

                default:
                        break;
                }

                break;
                }

        case KAUTH_DEVICE_RAWIO_PASSTHRU:
                if (securelevel > 0) {
                        u_long bits;

                        bits = (u_long)arg0;

                        KASSERT(bits != 0);
                        KASSERT((bits & ~KAUTH_REQ_DEVICE_RAWIO_PASSTHRU_ALL) == 0);

                        if (bits & ~KAUTH_REQ_DEVICE_RAWIO_PASSTHRU_READCONF)
                                result = KAUTH_RESULT_DENY;
                }

                break;

        case KAUTH_DEVICE_GPIO_PINSET:
                if (securelevel > 0)
                        result = KAUTH_RESULT_DENY;
                break;

        case KAUTH_DEVICE_RND_ADDDATA_ESTIMATE:
                if (securelevel > 1)
                        result = KAUTH_RESULT_DENY;
                break;

        default:
                break;
        }

        return (result);
}

int
secmodel_securelevel_vnode_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        int result;

        result = KAUTH_RESULT_DEFER;

        if ((action & KAUTH_VNODE_WRITE_SYSFLAGS) &&
            (action & KAUTH_VNODE_HAS_SYSFLAGS)) {
                if (securelevel > 0)
                        result = KAUTH_RESULT_DENY;
        }

        return (result);
}


































































    2 

























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/*        $NetBSD: rf_acctrace.c,v 1.24 2011/05/01 06:22:54 mrg Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Mark Holland
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*****************************************************************************
 *
 * acctrace.c -- code to support collecting information about each access
 *
 *****************************************************************************/


#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_acctrace.c,v 1.24 2011/05/01 06:22:54 mrg Exp $");

#include <sys/stat.h>
#include <sys/types.h>
#include <dev/raidframe/raidframevar.h>

#include "rf_threadstuff.h"
#include "rf_debugMem.h"
#include "rf_acctrace.h"
#include "rf_general.h"
#include "rf_raid.h"
#include "rf_etimer.h"
#include "rf_hist.h"
#include "rf_shutdown.h"

#if RF_ACC_TRACE > 0
static long numTracesSoFar;

rf_declare_mutex2(rf_tracing_mutex);

static void
rf_ShutdownAccessTrace(void *unused)
{
        rf_destroy_mutex2(rf_tracing_mutex);
}

int
rf_ConfigureAccessTrace(RF_ShutdownList_t **listp)
{
        numTracesSoFar = 0;
        rf_init_mutex2(rf_tracing_mutex, IPL_VM);
        rf_ShutdownCreate(listp, rf_ShutdownAccessTrace, NULL);
        return (0);
}

/* install a trace record.  cause a flush to disk or to the trace
 * collector daemon if the trace buffer is at least 1/2 full.
 */
void
rf_LogTraceRec(RF_Raid_t *raid, RF_AccTraceEntry_t *rec)
{
        RF_AccTotals_t *acc = &raid->acc_totals;

        if (((rf_maxNumTraces >= 0) && (numTracesSoFar >= rf_maxNumTraces)))
                return;

        /* update AccTotals for this device */
        if (!raid->keep_acc_totals)
                return;
        acc->num_log_ents++;
        if (rec->reconacc) {
                acc->recon_start_to_fetch_us += rec->specific.recon.recon_start_to_fetch_us;
                acc->recon_fetch_to_return_us += rec->specific.recon.recon_fetch_to_return_us;
                acc->recon_return_to_submit_us += rec->specific.recon.recon_return_to_submit_us;
                acc->recon_num_phys_ios += rec->num_phys_ios;
                acc->recon_phys_io_us += rec->phys_io_us;
                acc->recon_diskwait_us += rec->diskwait_us;
                acc->recon_reccount++;
        } else {
                RF_HIST_ADD(acc->tot_hist, rec->total_us);
                RF_HIST_ADD(acc->dw_hist, rec->diskwait_us);
                /* count of physical ios which are too big.  often due to
                 * thermal recalibration */
                /* if bigvals > 0, you should probably ignore this data set */
                if (rec->diskwait_us > 100000)
                        acc->bigvals++;
                acc->total_us += rec->total_us;
                acc->suspend_ovhd_us += rec->specific.user.suspend_ovhd_us;
                acc->map_us += rec->specific.user.map_us;
                acc->lock_us += rec->specific.user.lock_us;
                acc->dag_create_us += rec->specific.user.dag_create_us;
                acc->dag_retry_us += rec->specific.user.dag_retry_us;
                acc->exec_us += rec->specific.user.exec_us;
                acc->cleanup_us += rec->specific.user.cleanup_us;
                acc->exec_engine_us += rec->specific.user.exec_engine_us;
                acc->xor_us += rec->xor_us;
                acc->q_us += rec->q_us;
                acc->plog_us += rec->plog_us;
                acc->diskqueue_us += rec->diskqueue_us;
                acc->diskwait_us += rec->diskwait_us;
                acc->num_phys_ios += rec->num_phys_ios;
                acc->phys_io_us = rec->phys_io_us;
                acc->user_reccount++;
        }
}
#endif /* RF_ACC_TRACE > 0 */





































































































































































































































    8 
    8 


    8 














    8 



























    8 













































































































































































































































    8 


    8 



    8 











    8 
















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
/* $NetBSD: kern_fileassoc.c,v 1.36 2014/07/10 15:00:28 christos Exp $ */

/*-
 * Copyright (c) 2006 Elad Efrat <elad@NetBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_fileassoc.c,v 1.36 2014/07/10 15:00:28 christos Exp $");

#include "opt_fileassoc.h"

#include <sys/param.h>
#include <sys/mount.h>
#include <sys/queue.h>
#include <sys/vnode.h>
#include <sys/errno.h>
#include <sys/fileassoc.h>
#include <sys/specificdata.h>
#include <sys/hash.h>
#include <sys/kmem.h>
#include <sys/once.h>

#define        FILEASSOC_INITIAL_TABLESIZE        128

static specificdata_domain_t fileassoc_domain = NULL;
static specificdata_key_t fileassoc_mountspecific_key;
static ONCE_DECL(control);

/*
 * Assoc entry.
 * Includes the assoc name for identification and private clear callback.
 */
struct fileassoc {
        LIST_ENTRY(fileassoc) assoc_list;
        const char *assoc_name;                                /* Name. */
        fileassoc_cleanup_cb_t assoc_cleanup_cb;        /* Clear callback. */
        specificdata_key_t assoc_key;
};

static LIST_HEAD(, fileassoc) fileassoc_list;

/* An entry in the per-mount hash table. */
struct fileassoc_file {
        fhandle_t *faf_handle;                                /* File handle */
        specificdata_reference faf_data;                /* Assoc data. */
        u_int faf_nassocs;                                /* # of assocs. */
        LIST_ENTRY(fileassoc_file) faf_list;                /* List pointer. */
};

LIST_HEAD(fileassoc_hash_entry, fileassoc_file);

struct fileassoc_table {
        struct fileassoc_hash_entry *tbl_hash;
        u_long tbl_mask;                                /* Hash table mask. */
        size_t tbl_nslots;                                /* Number of slots. */
        size_t tbl_nused;                                /* # of used slots. */
        specificdata_reference tbl_data;
};

/*
 * Hashing function: Takes a number modulus the mask to give back an
 * index into the hash table.
 */
#define FILEASSOC_HASH(tbl, handle)        \
        (hash32_buf((handle), FHANDLE_SIZE(handle), HASH32_BUF_INIT) \
         & ((tbl)->tbl_mask))

static void *
file_getdata(struct fileassoc_file *faf, const struct fileassoc *assoc)
{

        return specificdata_getspecific(fileassoc_domain, &faf->faf_data,
            assoc->assoc_key);
}

static void
file_setdata(struct fileassoc_file *faf, const struct fileassoc *assoc,
    void *data)
{

        specificdata_setspecific(fileassoc_domain, &faf->faf_data,
            assoc->assoc_key, data);
}

static void
file_cleanup(struct fileassoc_file *faf, const struct fileassoc *assoc)
{
        fileassoc_cleanup_cb_t cb;
        void *data;

        cb = assoc->assoc_cleanup_cb;
        if (cb == NULL) {
                return;
        }
        data = file_getdata(faf, assoc);
        (*cb)(data);
}

static void
file_free(struct fileassoc_file *faf)
{
        struct fileassoc *assoc;

        LIST_REMOVE(faf, faf_list);

        LIST_FOREACH(assoc, &fileassoc_list, assoc_list) {
                file_cleanup(faf, assoc);
        }
        vfs_composefh_free(faf->faf_handle);
        specificdata_fini(fileassoc_domain, &faf->faf_data);
        kmem_free(faf, sizeof(*faf));
}

static void
table_dtor(void *v)
{
        struct fileassoc_table *tbl = v;
        u_long i;

        /* Remove all entries from the table and lists */
        for (i = 0; i < tbl->tbl_nslots; i++) {
                struct fileassoc_file *faf;

                while ((faf = LIST_FIRST(&tbl->tbl_hash[i])) != NULL) {
                        file_free(faf);
                }
        }

        /* Remove hash table and sysctl node */
        hashdone(tbl->tbl_hash, HASH_LIST, tbl->tbl_mask);
        specificdata_fini(fileassoc_domain, &tbl->tbl_data);
        kmem_free(tbl, sizeof(*tbl));
}

/*
 * Initialize the fileassoc subsystem.
 */
static int
fileassoc_init(void)
{
        int error;

        error = mount_specific_key_create(&fileassoc_mountspecific_key,
            table_dtor);
        if (error) {
                return error;
        }
        fileassoc_domain = specificdata_domain_create();

        return 0;
}

/*
 * Register a new assoc.
 */
int
fileassoc_register(const char *name, fileassoc_cleanup_cb_t cleanup_cb,
    fileassoc_t *result)
{
        int error;
        specificdata_key_t key;
        struct fileassoc *assoc;

        error = RUN_ONCE(&control, fileassoc_init);
        if (error) {
                return error;
        }
        error = specificdata_key_create(fileassoc_domain, &key, NULL);
        if (error) {
                return error;
        }
        assoc = kmem_alloc(sizeof(*assoc), KM_SLEEP);
        assoc->assoc_name = name;
        assoc->assoc_cleanup_cb = cleanup_cb;
        assoc->assoc_key = key;

        LIST_INSERT_HEAD(&fileassoc_list, assoc, assoc_list);

        *result = assoc;

        return 0;
}

/*
 * Deregister an assoc.
 */
int
fileassoc_deregister(fileassoc_t assoc)
{

        LIST_REMOVE(assoc, assoc_list);
        specificdata_key_delete(fileassoc_domain, assoc->assoc_key);
        kmem_free(assoc, sizeof(*assoc));

        return 0;
}

/*
 * Get the hash table for the specified device.
 */
static struct fileassoc_table *
fileassoc_table_lookup(struct mount *mp)
{
        int error;

        error = RUN_ONCE(&control, fileassoc_init);
        if (error) {
                return NULL;
        }
        return mount_getspecific(mp, fileassoc_mountspecific_key);
}

/*
 * Perform a lookup on a hash table.  If hint is non-zero then use the value
 * of the hint as the identifier instead of performing a lookup for the
 * fileid.
 */
static struct fileassoc_file *
fileassoc_file_lookup(struct vnode *vp, fhandle_t *hint)
{
        struct fileassoc_table *tbl;
        struct fileassoc_hash_entry *hash_entry;
        struct fileassoc_file *faf;
        size_t indx;
        fhandle_t *th;
        int error;

        tbl = fileassoc_table_lookup(vp->v_mount);
        if (tbl == NULL) {
                return NULL;
        }

        if (hint == NULL) {
                error = vfs_composefh_alloc(vp, &th);
                if (error)
                        return (NULL);
        } else {
                th = hint;
        }

        indx = FILEASSOC_HASH(tbl, th);
        hash_entry = &(tbl->tbl_hash[indx]);

        LIST_FOREACH(faf, hash_entry, faf_list) {
                if (((FHANDLE_FILEID(faf->faf_handle)->fid_len ==
                     FHANDLE_FILEID(th)->fid_len)) &&
                    (memcmp(FHANDLE_FILEID(faf->faf_handle), FHANDLE_FILEID(th),
                           (FHANDLE_FILEID(th))->fid_len) == 0)) {
                        break;
                }
        }

        if (hint == NULL)
                vfs_composefh_free(th);

        return faf;
}

/*
 * Return assoc data associated with a vnode.
 */
void *
fileassoc_lookup(struct vnode *vp, fileassoc_t assoc)
{
        struct fileassoc_file *faf;

        faf = fileassoc_file_lookup(vp, NULL);
        if (faf == NULL)
                return (NULL);

        return file_getdata(faf, assoc);
}

static struct fileassoc_table *
fileassoc_table_resize(struct fileassoc_table *tbl)
{
        struct fileassoc_table *newtbl;
        u_long i;

        /*
         * Allocate a new table. Like the condition in fileassoc_file_add(),
         * this is also temporary -- just double the number of slots.
         */
        newtbl = kmem_zalloc(sizeof(*newtbl), KM_SLEEP);
        newtbl->tbl_nslots = (tbl->tbl_nslots * 2);
        if (newtbl->tbl_nslots < tbl->tbl_nslots)
                newtbl->tbl_nslots = tbl->tbl_nslots;
        newtbl->tbl_hash = hashinit(newtbl->tbl_nslots, HASH_LIST,
            true, &newtbl->tbl_mask);
        newtbl->tbl_nused = 0;
        specificdata_init(fileassoc_domain, &newtbl->tbl_data);

        /* XXX we need to make sure nothing uses fileassoc here! */

        for (i = 0; i < tbl->tbl_nslots; i++) {
                struct fileassoc_file *faf;

                while ((faf = LIST_FIRST(&tbl->tbl_hash[i])) != NULL) {
                        struct fileassoc_hash_entry *hash_entry;
                        size_t indx;

                        LIST_REMOVE(faf, faf_list);

                        indx = FILEASSOC_HASH(newtbl, faf->faf_handle);
                        hash_entry = &(newtbl->tbl_hash[indx]);

                        LIST_INSERT_HEAD(hash_entry, faf, faf_list);

                        newtbl->tbl_nused++;
                }
        }

        if (tbl->tbl_nused != newtbl->tbl_nused)
                panic("fileassoc_table_resize: inconsistency detected! "
                    "needed %zu entries, got %zu", tbl->tbl_nused,
                    newtbl->tbl_nused);

        hashdone(tbl->tbl_hash, HASH_LIST, tbl->tbl_mask);
        specificdata_fini(fileassoc_domain, &tbl->tbl_data);
        kmem_free(tbl, sizeof(*tbl));

        return (newtbl);
}

/*
 * Create a new fileassoc table.
 */
static struct fileassoc_table *
fileassoc_table_add(struct mount *mp)
{
        struct fileassoc_table *tbl;

        /* Check for existing table for device. */
        tbl = fileassoc_table_lookup(mp);
        if (tbl != NULL)
                return (tbl);

        /* Allocate and initialize a table. */
        tbl = kmem_zalloc(sizeof(*tbl), KM_SLEEP);
        tbl->tbl_nslots = FILEASSOC_INITIAL_TABLESIZE;
        tbl->tbl_hash = hashinit(tbl->tbl_nslots, HASH_LIST, true,
            &tbl->tbl_mask);
        tbl->tbl_nused = 0;
        specificdata_init(fileassoc_domain, &tbl->tbl_data);

        mount_setspecific(mp, fileassoc_mountspecific_key, tbl);

        return (tbl);
}

/*
 * Delete a table.
 */
int
fileassoc_table_delete(struct mount *mp)
{
        struct fileassoc_table *tbl;

        tbl = fileassoc_table_lookup(mp);
        if (tbl == NULL)
                return (EEXIST);

        mount_setspecific(mp, fileassoc_mountspecific_key, NULL);
        table_dtor(tbl);

        return (0);
}

/*
 * Run a callback for each assoc in a table.
 */
int
fileassoc_table_run(struct mount *mp, fileassoc_t assoc, fileassoc_cb_t cb,
    void *cookie)
{
        struct fileassoc_table *tbl;
        u_long i;

        tbl = fileassoc_table_lookup(mp);
        if (tbl == NULL)
                return (EEXIST);

        for (i = 0; i < tbl->tbl_nslots; i++) {
                struct fileassoc_file *faf;

                LIST_FOREACH(faf, &tbl->tbl_hash[i], faf_list) {
                        void *data;

                        data = file_getdata(faf, assoc);
                        if (data != NULL)
                                cb(data, cookie);
                }
        }

        return (0);
}

/*
 * Clear a table for a given assoc.
 */
int
fileassoc_table_clear(struct mount *mp, fileassoc_t assoc)
{
        struct fileassoc_table *tbl;
        u_long i;

        tbl = fileassoc_table_lookup(mp);
        if (tbl == NULL)
                return (EEXIST);

        for (i = 0; i < tbl->tbl_nslots; i++) {
                struct fileassoc_file *faf;

                LIST_FOREACH(faf, &tbl->tbl_hash[i], faf_list) {
                        file_cleanup(faf, assoc);
                        file_setdata(faf, assoc, NULL);
                }
        }

        return (0);
}

/*
 * Add a file entry to a table.
 */
static struct fileassoc_file *
fileassoc_file_add(struct vnode *vp, fhandle_t *hint)
{
        struct fileassoc_table *tbl;
        struct fileassoc_hash_entry *hash_entry;
        struct fileassoc_file *faf;
        size_t indx;
        fhandle_t *th;
        int error;

        if (hint == NULL) {
                error = vfs_composefh_alloc(vp, &th);
                if (error)
                        return (NULL);
        } else
                th = hint;

        faf = fileassoc_file_lookup(vp, th);
        if (faf != NULL) {
                if (hint == NULL)
                        vfs_composefh_free(th);

                return (faf);
        }

        tbl = fileassoc_table_lookup(vp->v_mount);
        if (tbl == NULL) {
                tbl = fileassoc_table_add(vp->v_mount);
        }

        indx = FILEASSOC_HASH(tbl, th);
        hash_entry = &(tbl->tbl_hash[indx]);

        faf = kmem_zalloc(sizeof(*faf), KM_SLEEP);
        faf->faf_handle = th;
        specificdata_init(fileassoc_domain, &faf->faf_data);
        LIST_INSERT_HEAD(hash_entry, faf, faf_list);

        /*
         * This decides when we need to resize the table. For now,
         * resize it whenever we "filled" up the number of slots it
         * has. That's not really true unless of course we had zero
         * collisions. Think positive! :)
         */
        if (++(tbl->tbl_nused) == tbl->tbl_nslots) { 
                struct fileassoc_table *newtbl;

                newtbl = fileassoc_table_resize(tbl);
                mount_setspecific(vp->v_mount, fileassoc_mountspecific_key,
                    newtbl);
        }

        return (faf);
}

/*
 * Delete a file entry from a table.
 */
int
fileassoc_file_delete(struct vnode *vp)
{
        struct fileassoc_table *tbl;
        struct fileassoc_file *faf;

        /* Pre-check if fileassoc is used. XXX */
        if (!fileassoc_domain) {
                return ENOENT;
        }
        KERNEL_LOCK(1, NULL);

        faf = fileassoc_file_lookup(vp, NULL);
        if (faf == NULL) {
                KERNEL_UNLOCK_ONE(NULL);
                return (ENOENT);
        }

        file_free(faf);

        tbl = fileassoc_table_lookup(vp->v_mount);
        KASSERT(tbl != NULL);
        --(tbl->tbl_nused); /* XXX gc? */

        KERNEL_UNLOCK_ONE(NULL);

        return (0);
}

/*
 * Add an assoc to a vnode.
 */
int
fileassoc_add(struct vnode *vp, fileassoc_t assoc, void *data)
{
        struct fileassoc_file *faf;
        void *olddata;

        faf = fileassoc_file_lookup(vp, NULL);
        if (faf == NULL) {
                faf = fileassoc_file_add(vp, NULL);
                if (faf == NULL)
                        return (ENOTDIR);
        }

        olddata = file_getdata(faf, assoc);
        if (olddata != NULL)
                return (EEXIST);

        file_setdata(faf, assoc, data);

        faf->faf_nassocs++;

        return (0);
}

/*
 * Clear an assoc from a vnode.
 */
int
fileassoc_clear(struct vnode *vp, fileassoc_t assoc)
{
        struct fileassoc_file *faf;

        faf = fileassoc_file_lookup(vp, NULL);
        if (faf == NULL)
                return (ENOENT);

        file_cleanup(faf, assoc);
        file_setdata(faf, assoc, NULL);

        --(faf->faf_nassocs); /* XXX gc? */

        return (0);
}































































































































































































































































































































































































































































   11 








   11 
   11 

   11 

   11 
   11 

    2 











   11 


   11 




   11 























  176 






  176 



  177 
  177 

  176 



  176 
  177 
  176 


  118 



  176 


  119 

  119 
  118 
  119 












  201 
  201 
  200 
  200 


  201 














  200 



  201 








  201 














  155 









  155 


  154 






  155 

   90 







  154 
  155 












  153 








   55 













   55 




   55 
   55 


   55 
























   55 













   55 







  154 





















   11 


   11 
   11 

   11 


   11 













   11 


















  558 







































  556 







  556 
  558 





  558 














  556 







  557 



  553 






  119 





























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
/*        $NetBSD: uvm_km.c,v 1.162 2022/08/06 05:55:37 chs Exp $        */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * Copyright (c) 1991, 1993, The Regents of the University of California.
 *
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * The Mach Operating System project at Carnegie-Mellon University.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vm_kern.c   8.3 (Berkeley) 1/12/94
 * from: Id: uvm_km.c,v 1.1.2.14 1998/02/06 05:19:27 chs Exp
 *
 *
 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*
 * uvm_km.c: handle kernel memory allocation and management
 */

/*
 * overview of kernel memory management:
 *
 * the kernel virtual address space is mapped by "kernel_map."   kernel_map
 * starts at VM_MIN_KERNEL_ADDRESS and goes to VM_MAX_KERNEL_ADDRESS.
 * note that VM_MIN_KERNEL_ADDRESS is equal to vm_map_min(kernel_map).
 *
 * the kernel_map has several "submaps."   submaps can only appear in
 * the kernel_map (user processes can't use them).   submaps "take over"
 * the management of a sub-range of the kernel's address space.  submaps
 * are typically allocated at boot time and are never released.   kernel
 * virtual address space that is mapped by a submap is locked by the
 * submap's lock -- not the kernel_map's lock.
 *
 * thus, the useful feature of submaps is that they allow us to break
 * up the locking and protection of the kernel address space into smaller
 * chunks.
 *
 * the vm system has several standard kernel submaps/arenas, including:
 *   kmem_arena => used for kmem/pool (memoryallocators(9))
 *   pager_map => used to map "buf" structures into kernel space
 *   exec_map => used during exec to handle exec args
 *   etc...
 *
 * The kmem_arena is a "special submap", as it lives in a fixed map entry
 * within the kernel_map and is controlled by vmem(9).
 *
 * the kernel allocates its private memory out of special uvm_objects whose
 * reference count is set to UVM_OBJ_KERN (thus indicating that the objects
 * are "special" and never die).   all kernel objects should be thought of
 * as large, fixed-sized, sparsely populated uvm_objects.   each kernel
 * object is equal to the size of kernel virtual address space (i.e. the
 * value "VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS").
 *
 * note that just because a kernel object spans the entire kernel virtual
 * address space doesn't mean that it has to be mapped into the entire space.
 * large chunks of a kernel object's space go unused either because
 * that area of kernel VM is unmapped, or there is some other type of
 * object mapped into that range (e.g. a vnode).    for submap's kernel
 * objects, the only part of the object that can ever be populated is the
 * offsets that are managed by the submap.
 *
 * note that the "offset" in a kernel object is always the kernel virtual
 * address minus the VM_MIN_KERNEL_ADDRESS (aka vm_map_min(kernel_map)).
 * example:
 *   suppose VM_MIN_KERNEL_ADDRESS is 0xf8000000 and the kernel does a
 *   uvm_km_alloc(kernel_map, PAGE_SIZE) [allocate 1 wired down page in the
 *   kernel map].    if uvm_km_alloc returns virtual address 0xf8235000,
 *   then that means that the page at offset 0x235000 in kernel_object is
 *   mapped at 0xf8235000.
 *
 * kernel object have one other special property: when the kernel virtual
 * memory mapping them is unmapped, the backing memory in the object is
 * freed right away.   this is done with the uvm_km_pgremove() function.
 * this has to be done because there is no backing store for kernel pages
 * and no need to save them after they are no longer referenced.
 *
 * Generic arenas:
 *
 * kmem_arena:
 *        Main arena controlling the kernel KVA used by other arenas.
 *
 * kmem_va_arena:
 *        Implements quantum caching in order to speedup allocations and
 *        reduce fragmentation.  The pool(9), unless created with a custom
 *        meta-data allocator, and kmem(9) subsystems use this arena.
 *
 * Arenas for meta-data allocations are used by vmem(9) and pool(9).
 * These arenas cannot use quantum cache.  However, kmem_va_meta_arena
 * compensates this by importing larger chunks from kmem_arena.
 *
 * kmem_va_meta_arena:
 *        Space for meta-data.
 *
 * kmem_meta_arena:
 *        Imports from kmem_va_meta_arena.  Allocations from this arena are
 *        backed with the pages.
 *
 * Arena stacking:
 *
 *        kmem_arena
 *                kmem_va_arena
 *                kmem_va_meta_arena
 *                        kmem_meta_arena
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_km.c,v 1.162 2022/08/06 05:55:37 chs Exp $");

#include "opt_uvmhist.h"

#include "opt_kmempages.h"

#ifndef NKMEMPAGES
#define NKMEMPAGES 0
#endif

/*
 * Defaults for lower and upper-bounds for the kmem_arena page count.
 * Can be overridden by kernel config options.
 */
#ifndef NKMEMPAGES_MIN
#define NKMEMPAGES_MIN NKMEMPAGES_MIN_DEFAULT
#endif

#ifndef NKMEMPAGES_MAX
#define NKMEMPAGES_MAX NKMEMPAGES_MAX_DEFAULT
#endif


#include <sys/param.h>
#include <sys/systm.h>
#include <sys/atomic.h>
#include <sys/proc.h>
#include <sys/pool.h>
#include <sys/vmem.h>
#include <sys/vmem_impl.h>
#include <sys/kmem.h>
#include <sys/msan.h>

#include <uvm/uvm.h>

/*
 * global data structures
 */

struct vm_map *kernel_map = NULL;

/*
 * local data structues
 */

static struct vm_map                kernel_map_store;
static struct vm_map_entry        kernel_image_mapent_store;
static struct vm_map_entry        kernel_kmem_mapent_store;

int nkmempages = 0;
vaddr_t kmembase;
vsize_t kmemsize;

static struct vmem kmem_arena_store;
vmem_t *kmem_arena = NULL;
static struct vmem kmem_va_arena_store;
vmem_t *kmem_va_arena;

/*
 * kmeminit_nkmempages: calculate the size of kmem_arena.
 */
void
kmeminit_nkmempages(void)
{
        int npages;

        if (nkmempages != 0) {
                /*
                 * It's already been set (by us being here before)
                 * bail out now;
                 */
                return;
        }

#if defined(NKMEMPAGES_MAX_UNLIMITED) && !defined(KMSAN)
        npages = physmem;
#else

#if defined(KMSAN)
        npages = (physmem / 4);
#elif defined(PMAP_MAP_POOLPAGE)
        npages = (physmem / 4);
#else
        npages = (physmem / 3) * 2;
#endif /* defined(PMAP_MAP_POOLPAGE) */

#if !defined(NKMEMPAGES_MAX_UNLIMITED)
        if (npages > NKMEMPAGES_MAX)
                npages = NKMEMPAGES_MAX;
#endif

#endif

        if (npages < NKMEMPAGES_MIN)
                npages = NKMEMPAGES_MIN;

        nkmempages = npages;
}

/*
 * uvm_km_bootstrap: init kernel maps and objects to reflect reality (i.e.
 * KVM already allocated for text, data, bss, and static data structures).
 *
 * => KVM is defined by VM_MIN_KERNEL_ADDRESS/VM_MAX_KERNEL_ADDRESS.
 *    we assume that [vmin -> start] has already been allocated and that
 *    "end" is the end.
 */

void
uvm_km_bootstrap(vaddr_t start, vaddr_t end)
{
        bool kmem_arena_small;
        vaddr_t base = VM_MIN_KERNEL_ADDRESS;
        struct uvm_map_args args;
        int error;

        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist, "start=%#jx end=%#jx", start, end, 0,0);

        kmeminit_nkmempages();
        kmemsize = (vsize_t)nkmempages * PAGE_SIZE;
        kmem_arena_small = kmemsize < 64 * 1024 * 1024;

        UVMHIST_LOG(maphist, "kmemsize=%#jx", kmemsize, 0,0,0);

        /*
         * next, init kernel memory objects.
         */

        /* kernel_object: for pageable anonymous kernel memory */
        uvm_kernel_object = uao_create(VM_MAX_KERNEL_ADDRESS -
                                VM_MIN_KERNEL_ADDRESS, UAO_FLAG_KERNOBJ);

        /*
         * init the map and reserve any space that might already
         * have been allocated kernel space before installing.
         */

        uvm_map_setup(&kernel_map_store, base, end, VM_MAP_PAGEABLE);
        kernel_map_store.pmap = pmap_kernel();
        if (start != base) {
                error = uvm_map_prepare(&kernel_map_store,
                    base, start - base,
                    NULL, UVM_UNKNOWN_OFFSET, 0,
                    UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
                                    UVM_ADV_RANDOM, UVM_FLAG_FIXED), &args);
                if (!error) {
                        kernel_image_mapent_store.flags =
                            UVM_MAP_KERNEL | UVM_MAP_STATIC | UVM_MAP_NOMERGE;
                        error = uvm_map_enter(&kernel_map_store, &args,
                            &kernel_image_mapent_store);
                }

                if (error)
                        panic(
                            "uvm_km_bootstrap: could not reserve space for kernel");

                kmembase = args.uma_start + args.uma_size;
        } else {
                kmembase = base;
        }

        error = uvm_map_prepare(&kernel_map_store,
            kmembase, kmemsize,
            NULL, UVM_UNKNOWN_OFFSET, 0,
            UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
                            UVM_ADV_RANDOM, UVM_FLAG_FIXED), &args);
        if (!error) {
                kernel_kmem_mapent_store.flags =
                    UVM_MAP_KERNEL | UVM_MAP_STATIC | UVM_MAP_NOMERGE;
                error = uvm_map_enter(&kernel_map_store, &args,
                    &kernel_kmem_mapent_store);
        }

        if (error)
                panic("uvm_km_bootstrap: could not reserve kernel kmem");

        /*
         * install!
         */

        kernel_map = &kernel_map_store;

        pool_subsystem_init();

        kmem_arena = vmem_init(&kmem_arena_store, "kmem",
            kmembase, kmemsize, PAGE_SIZE, NULL, NULL, NULL,
            0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM);
#ifdef PMAP_GROWKERNEL
        /*
         * kmem_arena VA allocations happen independently of uvm_map.
         * grow kernel to accommodate the kmem_arena.
         */
        if (uvm_maxkaddr < kmembase + kmemsize) {
                uvm_maxkaddr = pmap_growkernel(kmembase + kmemsize);
                KASSERTMSG(uvm_maxkaddr >= kmembase + kmemsize,
                    "%#"PRIxVADDR" %#"PRIxVADDR" %#"PRIxVSIZE,
                    uvm_maxkaddr, kmembase, kmemsize);
        }
#endif

        vmem_subsystem_init(kmem_arena);

        UVMHIST_LOG(maphist, "kmem vmem created (base=%#jx, size=%#jx",
            kmembase, kmemsize, 0,0);

        kmem_va_arena = vmem_init(&kmem_va_arena_store, "kva",
            0, 0, PAGE_SIZE, vmem_alloc, vmem_free, kmem_arena,
            (kmem_arena_small ? 4 : VMEM_QCACHE_IDX_MAX) * PAGE_SIZE,
            VM_NOSLEEP, IPL_VM);

        UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
}

/*
 * uvm_km_init: init the kernel maps virtual memory caches
 * and start the pool/kmem allocator.
 */
void
uvm_km_init(void)
{
        kmem_init();
}

/*
 * uvm_km_suballoc: allocate a submap in the kernel map.   once a submap
 * is allocated all references to that area of VM must go through it.  this
 * allows the locking of VAs in kernel_map to be broken up into regions.
 *
 * => if `fixed' is true, *vmin specifies where the region described
 *   pager_map => used to map "buf" structures into kernel space
 *      by the submap must start
 * => if submap is non NULL we use that as the submap, otherwise we
 *        alloc a new map
 */

struct vm_map *
uvm_km_suballoc(struct vm_map *map, vaddr_t *vmin /* IN/OUT */,
    vaddr_t *vmax /* OUT */, vsize_t size, int flags, bool fixed,
    struct vm_map *submap)
{
        int mapflags = UVM_FLAG_NOMERGE | (fixed ? UVM_FLAG_FIXED : 0);
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        KASSERT(vm_map_pmap(map) == pmap_kernel());

        size = round_page(size);        /* round up to pagesize */

        /*
         * first allocate a blank spot in the parent map
         */

        if (uvm_map(map, vmin, size, NULL, UVM_UNKNOWN_OFFSET, 0,
            UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
            UVM_ADV_RANDOM, mapflags)) != 0) {
                panic("%s: unable to allocate space in parent map", __func__);
        }

        /*
         * set VM bounds (vmin is filled in by uvm_map)
         */

        *vmax = *vmin + size;

        /*
         * add references to pmap and create or init the submap
         */

        pmap_reference(vm_map_pmap(map));
        if (submap == NULL) {
                submap = kmem_alloc(sizeof(*submap), KM_SLEEP);
        }
        uvm_map_setup(submap, *vmin, *vmax, flags);
        submap->pmap = vm_map_pmap(map);

        /*
         * now let uvm_map_submap plug in it...
         */

        if (uvm_map_submap(map, *vmin, *vmax, submap) != 0)
                panic("uvm_km_suballoc: submap allocation failed");

        return(submap);
}

/*
 * uvm_km_pgremove: remove pages from a kernel uvm_object and KVA.
 */

void
uvm_km_pgremove(vaddr_t startva, vaddr_t endva)
{
        struct uvm_object * const uobj = uvm_kernel_object;
        const voff_t start = startva - vm_map_min(kernel_map);
        const voff_t end = endva - vm_map_min(kernel_map);
        struct vm_page *pg;
        voff_t curoff, nextoff;
        int swpgonlydelta = 0;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        KASSERT(VM_MIN_KERNEL_ADDRESS <= startva);
        KASSERT(startva < endva);
        KASSERT(endva <= VM_MAX_KERNEL_ADDRESS);

        rw_enter(uobj->vmobjlock, RW_WRITER);
        pmap_remove(pmap_kernel(), startva, endva);
        for (curoff = start; curoff < end; curoff = nextoff) {
                nextoff = curoff + PAGE_SIZE;
                pg = uvm_pagelookup(uobj, curoff);
                if (pg != NULL && pg->flags & PG_BUSY) {
                        uvm_pagewait(pg, uobj->vmobjlock, "km_pgrm");
                        rw_enter(uobj->vmobjlock, RW_WRITER);
                        nextoff = curoff;
                        continue;
                }

                /*
                 * free the swap slot, then the page.
                 */

                if (pg == NULL &&
                    uao_find_swslot(uobj, curoff >> PAGE_SHIFT) > 0) {
                        swpgonlydelta++;
                }
                uao_dropswap(uobj, curoff >> PAGE_SHIFT);
                if (pg != NULL) {
                        uvm_pagefree(pg);
                }
        }
        rw_exit(uobj->vmobjlock);

        if (swpgonlydelta > 0) {
                KASSERT(uvmexp.swpgonly >= swpgonlydelta);
                atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta);
        }
}


/*
 * uvm_km_pgremove_intrsafe: like uvm_km_pgremove(), but for non object backed
 *    regions.
 *
 * => when you unmap a part of anonymous kernel memory you want to toss
 *    the pages right away.    (this is called from uvm_unmap_...).
 * => none of the pages will ever be busy, and none of them will ever
 *    be on the active or inactive queues (because they have no object).
 */

void
uvm_km_pgremove_intrsafe(struct vm_map *map, vaddr_t start, vaddr_t end)
{
#define __PGRM_BATCH 16
        struct vm_page *pg;
        paddr_t pa[__PGRM_BATCH];
        int npgrm, i;
        vaddr_t va, batch_vastart;

        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        KASSERT(VM_MAP_IS_KERNEL(map));
        KASSERTMSG(vm_map_min(map) <= start,
            "vm_map_min(map) [%#"PRIxVADDR"] <= start [%#"PRIxVADDR"]"
            " (size=%#"PRIxVSIZE")",
            vm_map_min(map), start, end - start);
        KASSERT(start < end);
        KASSERT(end <= vm_map_max(map));

        for (va = start; va < end;) {
                batch_vastart = va;
                /* create a batch of at most __PGRM_BATCH pages to free */
                for (i = 0;
                     i < __PGRM_BATCH && va < end;
                     va += PAGE_SIZE) {
                        if (!pmap_extract(pmap_kernel(), va, &pa[i])) {
                                continue;
                        }
                        i++;
                }
                npgrm = i;
                /* now remove the mappings */
                pmap_kremove(batch_vastart, va - batch_vastart);
                /* and free the pages */
                for (i = 0; i < npgrm; i++) {
                        pg = PHYS_TO_VM_PAGE(pa[i]);
                        KASSERT(pg);
                        KASSERT(pg->uobject == NULL && pg->uanon == NULL);
                        KASSERT((pg->flags & PG_BUSY) == 0);
                        uvm_pagefree(pg);
                }
        }
#undef __PGRM_BATCH
}

#if defined(DEBUG)
void
uvm_km_check_empty(struct vm_map *map, vaddr_t start, vaddr_t end)
{
        vaddr_t va;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        KDASSERT(VM_MAP_IS_KERNEL(map));
        KDASSERT(vm_map_min(map) <= start);
        KDASSERT(start < end);
        KDASSERT(end <= vm_map_max(map));

        for (va = start; va < end; va += PAGE_SIZE) {
                paddr_t pa;

                if (pmap_extract(pmap_kernel(), va, &pa)) {
                        panic("uvm_km_check_empty: va %p has pa %#llx",
                            (void *)va, (long long)pa);
                }
                /*
                 * kernel_object should not have pages for the corresponding
                 * region.  check it.
                 *
                 * why trylock?  because:
                 * - caller might not want to block.
                 * - we can recurse when allocating radix_node for
                 *   kernel_object.
                 */
                if (rw_tryenter(uvm_kernel_object->vmobjlock, RW_READER)) {
                        struct vm_page *pg;

                        pg = uvm_pagelookup(uvm_kernel_object,
                            va - vm_map_min(kernel_map));
                        rw_exit(uvm_kernel_object->vmobjlock);
                        if (pg) {
                                panic("uvm_km_check_empty: "
                                    "has page hashed at %p",
                                    (const void *)va);
                        }
                }
        }
}
#endif /* defined(DEBUG) */

/*
 * uvm_km_alloc: allocate an area of kernel memory.
 *
 * => NOTE: we can return 0 even if we can wait if there is not enough
 *        free VM space in the map... caller should be prepared to handle
 *        this case.
 * => we return KVA of memory allocated
 */

vaddr_t
uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
{
        vaddr_t kva, loopva;
        vaddr_t offset;
        vsize_t loopsize;
        struct vm_page *pg;
        struct uvm_object *obj;
        int pgaflags;
        vm_prot_t prot, vaprot;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        KASSERT(vm_map_pmap(map) == pmap_kernel());
        KASSERT((flags & UVM_KMF_TYPEMASK) == UVM_KMF_WIRED ||
                (flags & UVM_KMF_TYPEMASK) == UVM_KMF_PAGEABLE ||
                (flags & UVM_KMF_TYPEMASK) == UVM_KMF_VAONLY);
        KASSERT((flags & UVM_KMF_VAONLY) != 0 || (flags & UVM_KMF_COLORMATCH) == 0);
        KASSERT((flags & UVM_KMF_COLORMATCH) == 0 || (flags & UVM_KMF_VAONLY) != 0);

        /*
         * setup for call
         */

        kva = vm_map_min(map);        /* hint */
        size = round_page(size);
        obj = (flags & UVM_KMF_PAGEABLE) ? uvm_kernel_object : NULL;
        UVMHIST_LOG(maphist,"  (map=%#jx, obj=%#jx, size=%#jx, flags=%#jx)",
            (uintptr_t)map, (uintptr_t)obj, size, flags);

        /*
         * allocate some virtual space
         */

        vaprot = (flags & UVM_KMF_EXEC) ? UVM_PROT_ALL : UVM_PROT_RW;
        if (__predict_false(uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET,
            align, UVM_MAPFLAG(vaprot, UVM_PROT_ALL, UVM_INH_NONE,
            UVM_ADV_RANDOM,
            (flags & (UVM_KMF_TRYLOCK | UVM_KMF_NOWAIT | UVM_KMF_WAITVA
             | UVM_KMF_COLORMATCH)))) != 0)) {
                UVMHIST_LOG(maphist, "<- done (no VM)",0,0,0,0);
                return(0);
        }

        /*
         * if all we wanted was VA, return now
         */

        if (flags & (UVM_KMF_VAONLY | UVM_KMF_PAGEABLE)) {
                UVMHIST_LOG(maphist,"<- done valloc (kva=%#jx)", kva,0,0,0);
                return(kva);
        }

        /*
         * recover object offset from virtual address
         */

        offset = kva - vm_map_min(kernel_map);
        UVMHIST_LOG(maphist, "  kva=%#jx, offset=%#jx", kva, offset,0,0);

        /*
         * now allocate and map in the memory... note that we are the only ones
         * whom should ever get a handle on this area of VM.
         */

        loopva = kva;
        loopsize = size;

        pgaflags = UVM_FLAG_COLORMATCH;
        if (flags & UVM_KMF_NOWAIT)
                pgaflags |= UVM_PGA_USERESERVE;
        if (flags & UVM_KMF_ZERO)
                pgaflags |= UVM_PGA_ZERO;
        prot = VM_PROT_READ | VM_PROT_WRITE;
        if (flags & UVM_KMF_EXEC)
                prot |= VM_PROT_EXECUTE;
        while (loopsize) {
                KASSERTMSG(!pmap_extract(pmap_kernel(), loopva, NULL),
                    "loopva=%#"PRIxVADDR, loopva);

                pg = uvm_pagealloc_strat(NULL, offset, NULL, pgaflags,
#ifdef UVM_KM_VMFREELIST
                   UVM_PGA_STRAT_ONLY, UVM_KM_VMFREELIST
#else
                   UVM_PGA_STRAT_NORMAL, 0
#endif
                   );

                /*
                 * out of memory?
                 */

                if (__predict_false(pg == NULL)) {
                        if ((flags & UVM_KMF_NOWAIT) ||
                            ((flags & UVM_KMF_CANFAIL) && !uvm_reclaimable())) {
                                /* free everything! */
                                uvm_km_free(map, kva, size,
                                    flags & UVM_KMF_TYPEMASK);
                                return (0);
                        } else {
                                uvm_wait("km_getwait2");        /* sleep here */
                                continue;
                        }
                }

                pg->flags &= ~PG_BUSY;        /* new page */
                UVM_PAGE_OWN(pg, NULL);

                /*
                 * map it in
                 */

                pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg),
                    prot, PMAP_KMPAGE);
                loopva += PAGE_SIZE;
                offset += PAGE_SIZE;
                loopsize -= PAGE_SIZE;
        }

        pmap_update(pmap_kernel());

        if ((flags & UVM_KMF_ZERO) == 0) {
                kmsan_orig((void *)kva, size, KMSAN_TYPE_UVM, __RET_ADDR);
                kmsan_mark((void *)kva, size, KMSAN_STATE_UNINIT);
        }

        UVMHIST_LOG(maphist,"<- done (kva=%#jx)", kva,0,0,0);
        return(kva);
}

/*
 * uvm_km_protect: change the protection of an allocated area
 */

int
uvm_km_protect(struct vm_map *map, vaddr_t addr, vsize_t size, vm_prot_t prot)
{
        return uvm_map_protect(map, addr, addr + round_page(size), prot, false);
}

/*
 * uvm_km_free: free an area of kernel memory
 */

void
uvm_km_free(struct vm_map *map, vaddr_t addr, vsize_t size, uvm_flag_t flags)
{
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        KASSERT((flags & UVM_KMF_TYPEMASK) == UVM_KMF_WIRED ||
                (flags & UVM_KMF_TYPEMASK) == UVM_KMF_PAGEABLE ||
                (flags & UVM_KMF_TYPEMASK) == UVM_KMF_VAONLY);
        KASSERT((addr & PAGE_MASK) == 0);
        KASSERT(vm_map_pmap(map) == pmap_kernel());

        size = round_page(size);

        if (flags & UVM_KMF_PAGEABLE) {
                uvm_km_pgremove(addr, addr + size);
        } else if (flags & UVM_KMF_WIRED) {
                /*
                 * Note: uvm_km_pgremove_intrsafe() extracts mapping, thus
                 * remove it after.  See comment below about KVA visibility.
                 */
                uvm_km_pgremove_intrsafe(map, addr, addr + size);
        }

        /*
         * Note: uvm_unmap_remove() calls pmap_update() for us, before
         * KVA becomes globally available.
         */

        uvm_unmap1(map, addr, addr + size, UVM_FLAG_VAONLY);
}

/* Sanity; must specify both or none. */
#if (defined(PMAP_MAP_POOLPAGE) || defined(PMAP_UNMAP_POOLPAGE)) && \
    (!defined(PMAP_MAP_POOLPAGE) || !defined(PMAP_UNMAP_POOLPAGE))
#error Must specify MAP and UNMAP together.
#endif

#if defined(PMAP_ALLOC_POOLPAGE) && \
    !defined(PMAP_MAP_POOLPAGE) && !defined(PMAP_UNMAP_POOLPAGE)
#error Must specify ALLOC with MAP and UNMAP
#endif

int
uvm_km_kmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags,
    vmem_addr_t *addr)
{
        struct vm_page *pg;
        vmem_addr_t va;
        int rc;
        vaddr_t loopva;
        vsize_t loopsize;

        size = round_page(size);

#if defined(PMAP_MAP_POOLPAGE)
        if (size == PAGE_SIZE) {
again:
#ifdef PMAP_ALLOC_POOLPAGE
                pg = PMAP_ALLOC_POOLPAGE((flags & VM_SLEEP) ?
                   0 : UVM_PGA_USERESERVE);
#else
                pg = uvm_pagealloc(NULL, 0, NULL,
                   (flags & VM_SLEEP) ? 0 : UVM_PGA_USERESERVE);
#endif /* PMAP_ALLOC_POOLPAGE */
                if (__predict_false(pg == NULL)) {
                        if (flags & VM_SLEEP) {
                                uvm_wait("plpg");
                                goto again;
                        }
                        return ENOMEM;
                }
                va = PMAP_MAP_POOLPAGE(VM_PAGE_TO_PHYS(pg));
                KASSERT(va != 0);
                *addr = va;
                return 0;
        }
#endif /* PMAP_MAP_POOLPAGE */

        rc = vmem_alloc(vm, size, flags, &va);
        if (rc != 0)
                return rc;

#ifdef PMAP_GROWKERNEL
        /*
         * These VA allocations happen independently of uvm_map
         * so this allocation must not extend beyond the current limit.
         */
        KASSERTMSG(uvm_maxkaddr >= va + size,
            "%#"PRIxVADDR" %#"PRIxPTR" %#zx",
            uvm_maxkaddr, va, size);
#endif

        loopva = va;
        loopsize = size;

        while (loopsize) {
                paddr_t pa __diagused;
                KASSERTMSG(!pmap_extract(pmap_kernel(), loopva, &pa),
                    "loopva=%#"PRIxVADDR" loopsize=%#"PRIxVSIZE
                    " pa=%#"PRIxPADDR" vmem=%p",
                    loopva, loopsize, pa, vm);

                pg = uvm_pagealloc(NULL, loopva, NULL,
                    UVM_FLAG_COLORMATCH
                    | ((flags & VM_SLEEP) ? 0 : UVM_PGA_USERESERVE));
                if (__predict_false(pg == NULL)) {
                        if (flags & VM_SLEEP) {
                                uvm_wait("plpg");
                                continue;
                        } else {
                                uvm_km_pgremove_intrsafe(kernel_map, va,
                                    va + size);
                                vmem_free(vm, va, size);
                                return ENOMEM;
                        }
                }

                pg->flags &= ~PG_BUSY;        /* new page */
                UVM_PAGE_OWN(pg, NULL);
                pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg),
                    VM_PROT_READ|VM_PROT_WRITE, PMAP_KMPAGE);

                loopva += PAGE_SIZE;
                loopsize -= PAGE_SIZE;
        }
        pmap_update(pmap_kernel());

        *addr = va;

        return 0;
}

void
uvm_km_kmem_free(vmem_t *vm, vmem_addr_t addr, size_t size)
{

        size = round_page(size);
#if defined(PMAP_UNMAP_POOLPAGE)
        if (size == PAGE_SIZE) {
                paddr_t pa;

                pa = PMAP_UNMAP_POOLPAGE(addr);
                uvm_pagefree(PHYS_TO_VM_PAGE(pa));
                return;
        }
#endif /* PMAP_UNMAP_POOLPAGE */
        uvm_km_pgremove_intrsafe(kernel_map, addr, addr + size);
        pmap_update(pmap_kernel());

        vmem_free(vm, addr, size);
}

bool
uvm_km_va_starved_p(void)
{
        vmem_size_t total;
        vmem_size_t free;

        if (kmem_arena == NULL)
                return false;

        total = vmem_size(kmem_arena, VMEM_ALLOC|VMEM_FREE);
        free = vmem_size(kmem_arena, VMEM_FREE);

        return (free < (total / 10));
}








































































































   16 



















   10 













   10 
    9 





   10 




    9 














    9 








    7 

    5 
    5 



















   16 


   16 


























   29 

   29 









   26 
    2 


    3 
    1 

    3 
















   23 

   17 











   15 
    9 







    8 




















   12 
    9 







   12 
    5 
   12 













   12 














   12 












    5 







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
/*        $NetBSD: uvm_readahead.c,v 1.13 2020/05/19 21:45:35 ad Exp $        */

/*-
 * Copyright (c)2003, 2005, 2009 YAMAMOTO Takashi,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * uvm_object read-ahead
 *
 * TODO:
 *        - tune.
 *        - handle multiple streams.
 *        - find a better way to deal with PGO_LOCKED pager requests.
 *          (currently just ignored)
 *        - consider the amount of memory in the system.
 *        - consider the speed of the underlying device.
 *        - consider filesystem block size / block layout.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_readahead.c,v 1.13 2020/05/19 21:45:35 ad Exp $");

#include <sys/param.h>
#include <sys/pool.h>

#include <uvm/uvm.h>
#include <uvm/uvm_readahead.h>

#if defined(READAHEAD_DEBUG)
#define        DPRINTF(a)        printf a
#else /* defined(READAHEAD_DEBUG) */
#define        DPRINTF(a)        /* nothing */
#endif /* defined(READAHEAD_DEBUG) */

/*
 * uvm_ractx: read-ahead context.
 */

struct uvm_ractx {
        int ra_flags;
#define        RA_VALID        1
        off_t ra_winstart;        /* window start offset */
        size_t ra_winsize;        /* window size */
        off_t ra_next;                /* next offset to read-ahead */
};

#if defined(sun2) || defined(sun3)
/* XXX: on sun2 and sun3 MAXPHYS is 0xe000 */
#undef MAXPHYS        
#define MAXPHYS                0x8000        /* XXX */
#endif

#define        RA_WINSIZE_INIT        MAXPHYS                        /* initial window size */
#define        RA_WINSIZE_MAX        (MAXPHYS * 16)                /* max window size */
#define        RA_WINSIZE_SEQENTIAL        RA_WINSIZE_MAX        /* fixed window size used for
                                                   SEQUENTIAL hint */
#define        RA_MINSIZE        (MAXPHYS * 2)                /* min size to start i/o */
#define        RA_IOCHUNK        MAXPHYS                        /* read-ahead i/o chunk size */

static off_t ra_startio(struct uvm_object *, off_t, size_t);
static struct uvm_ractx *ra_allocctx(void);
static void ra_freectx(struct uvm_ractx *);

static struct pool_cache ractx_cache;

/*
 * uvm_ra_init: initialize readahead module.
 */

void
uvm_ra_init(void)
{

        pool_cache_bootstrap(&ractx_cache, sizeof(struct uvm_ractx), 0, 0, 0,
            "ractx", NULL, IPL_NONE, NULL, NULL, NULL);
}

static struct uvm_ractx *
ra_allocctx(void)
{

        return pool_cache_get(&ractx_cache, PR_NOWAIT);
}

static void
ra_freectx(struct uvm_ractx *ra)
{

        pool_cache_put(&ractx_cache, ra);
}

/*
 * ra_startio: start i/o for read-ahead.
 *
 * => start i/o for each RA_IOCHUNK sized chunk.
 * => return offset to which we started i/o.
 */

static off_t
ra_startio(struct uvm_object *uobj, off_t off, size_t sz)
{
        const off_t endoff = off + sz;

        DPRINTF(("%s: uobj=%p, off=%" PRIu64 ", endoff=%" PRIu64 "\n",
            __func__, uobj, off, endoff));

        KASSERT(rw_write_held(uobj->vmobjlock));

        /*
         * Don't issue read-ahead if the last page of the range is already cached.
         * The assumption is that since the access is sequential, the intermediate
         * pages would have similar LRU stats, and hence likely to be still in cache
         * too. This speeds up I/O using cache, since it avoids lookups and temporary
         * allocations done by full pgo_get.
         */
        struct vm_page *pg = uvm_pagelookup(uobj, trunc_page(endoff - 1));
        if (pg != NULL) {
                DPRINTF(("%s:  off=%" PRIu64 ", sz=%zu already cached\n",
                    __func__, off, sz));
                return endoff;
        }

        off = trunc_page(off);
        while (off < endoff) {
                const size_t chunksize = RA_IOCHUNK;
                int error;
                size_t donebytes;
                int npages;
                int orignpages;
                size_t bytelen;

                KASSERT((chunksize & (chunksize - 1)) == 0);
                KASSERT((off & PAGE_MASK) == 0);
                bytelen = ((off + chunksize) & -(off_t)chunksize) - off;
                KASSERT((bytelen & PAGE_MASK) == 0);
                npages = orignpages = bytelen >> PAGE_SHIFT;
                KASSERT(npages != 0);

                /*
                 * use UVM_ADV_RANDOM to avoid recursion.
                 */

                error = (*uobj->pgops->pgo_get)(uobj, off, NULL,
                    &npages, 0, VM_PROT_READ, UVM_ADV_RANDOM, PGO_NOTIMESTAMP);
                rw_enter(uobj->vmobjlock, RW_WRITER);
                DPRINTF(("%s:  off=%" PRIu64 ", bytelen=%zu -> %d\n",
                    __func__, off, bytelen, error));
                if (error != 0 && error != EBUSY) {
                        if (error != EINVAL) { /* maybe past EOF */
                                DPRINTF(("%s: error=%d\n", __func__, error));
                        }
                        break;
                }
                KASSERT(orignpages == npages);
                donebytes = orignpages << PAGE_SHIFT;
                off += donebytes;
        }

        return off;
}

/* ------------------------------------------------------------ */

/*
 * uvm_ra_allocctx: allocate a context.
 */

struct uvm_ractx *
uvm_ra_allocctx(void)
{
        struct uvm_ractx *ra;

        ra = ra_allocctx();
        if (ra != NULL) {
                ra->ra_flags = 0;
        }

        return ra;
}

/*
 * uvm_ra_freectx: free a context.
 */

void
uvm_ra_freectx(struct uvm_ractx *ra)
{

        KASSERT(ra != NULL);
        ra_freectx(ra);
}

/*
 * uvm_ra_request: update a read-ahead context and start i/o if appropriate.
 *
 * => called when [reqoff, reqoff+reqsize) is requested.
 * => object must be locked by caller, will return locked.
 */

void
uvm_ra_request(struct uvm_ractx *ra, int advice, struct uvm_object *uobj,
    off_t reqoff, size_t reqsize)
{

        KASSERT(rw_write_held(uobj->vmobjlock));

        if (ra == NULL || advice == UVM_ADV_RANDOM) {
                return;
        }

        if (advice == UVM_ADV_SEQUENTIAL) {

                /*
                 * always do read-ahead with a large window.
                 */

                if ((ra->ra_flags & RA_VALID) == 0) {
                        ra->ra_winstart = ra->ra_next = 0;
                        ra->ra_flags |= RA_VALID;
                }
                if (reqoff < ra->ra_winstart) {
                        ra->ra_next = reqoff;
                }
                ra->ra_winsize = RA_WINSIZE_SEQENTIAL;
                goto do_readahead;
        }

        /*
         * a request with UVM_ADV_NORMAL hint.  (ie. no hint)
         *
         * we keep a sliding window in order to determine:
         *        - if the previous read-ahead was successful or not.
         *        - how many bytes to read-ahead.
         */

        /*
         * if it's the first request for this context,
         * initialize context and return.
         */

        if ((ra->ra_flags & RA_VALID) == 0) {
initialize:
                ra->ra_winstart = ra->ra_next = reqoff + reqsize;
                ra->ra_winsize = RA_WINSIZE_INIT;
                ra->ra_flags |= RA_VALID;
                goto done;
        }

        /*
         * if it isn't in our window,
         * initialize context and return.
         * (read-ahead miss)
         */

        if (reqoff < ra->ra_winstart ||
            ra->ra_winstart + ra->ra_winsize < reqoff) {

                /*
                 * ... unless we seem to be reading the same chunk repeatedly.
                 *
                 * XXX should have some margin?
                 */

                if (reqoff + reqsize == ra->ra_winstart) {
                        DPRINTF(("%s: %p: same block: off=%" PRIu64
                            ", size=%zd, winstart=%" PRIu64 "\n",
                            __func__, ra, reqoff, reqsize, ra->ra_winstart));
                        goto done;
                }
                goto initialize;
        }

        /*
         * it's in our window. (read-ahead hit)
         *        - start read-ahead i/o if appropriate.
         *        - advance and enlarge window.
         */

do_readahead:

        /*
         * don't bother to read-ahead behind current request.
         */

        if (reqoff > ra->ra_next) {
                ra->ra_next = reqoff;
        }

        /*
         * try to make [reqoff, reqoff+ra_winsize) in-core.
         * note that [reqoff, ra_next) is considered already done.
         */

        if (reqoff + ra->ra_winsize > ra->ra_next) {
                off_t raoff = MAX(reqoff, ra->ra_next);
                size_t rasize = reqoff + ra->ra_winsize - ra->ra_next;

#if defined(DIAGNOSTIC)
                if (rasize > RA_WINSIZE_MAX) {
                        printf("%s: corrupted context", __func__);
                        rasize = RA_WINSIZE_MAX;
                }
#endif /* defined(DIAGNOSTIC) */

                /*
                 * issue read-ahead only if we can start big enough i/o.
                 * otherwise we end up with a stream of small i/o.
                 */

                if (rasize >= RA_MINSIZE) {
                        off_t next;

                        next = ra_startio(uobj, raoff, rasize);
                        ra->ra_next = next;
                }
        }

        /*
         * update window.
         *
         * enlarge window by reqsize, so that it grows in a predictable manner
         * regardless of the size of each read(2).
         */

        ra->ra_winstart = reqoff + reqsize;
        ra->ra_winsize = MIN(RA_WINSIZE_MAX, ra->ra_winsize + reqsize);

done:;
}

int
uvm_readahead(struct uvm_object *uobj, off_t off, off_t size)
{

        /*
         * don't allow too much read-ahead.
         */
        if (size > RA_WINSIZE_MAX) {
                size = RA_WINSIZE_MAX;
        }
        rw_enter(uobj->vmobjlock, RW_WRITER);
        ra_startio(uobj, off, size);
        rw_exit(uobj->vmobjlock);
        return 0;
}












































































































































































   22 


    2 
   22 


   22 
   22 













    1 





    1 



    1 




















































    1 































































































   10 

   10 


   10 









   10 
    1 



    9 


    9 

















    1 





    1 


    1 







    1 




    1 




















    1 


























































































































































































































































   16 






   16 

    1 



    1 



    1 


    1 



    1 










    2 

    1 

    1 



    2 

    1 

    1 



    1 



    2 
    1 

    1 



    2 
    1 

    1 



    1 


    1 




    1 




    1 







   16 

   16 














    1 








    1 
    1 







































    1 

    1 










    1 












    1 





    1 


















    1 










    1 




    1 


















































































    1 
    1 








































    2 






    2 

    2 
    2 




    2 
    2 



    2 




    2 



















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
/*        $NetBSD: if_tun.c,v 1.173 2022/03/28 12:33:22 riastradh Exp $        */

/*
 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
 * Nottingham University 1987.
 *
 * This source may be freely distributed, however I would be interested
 * in any changes that are made.
 *
 * This driver takes packets off the IP i/f and hands them up to a
 * user process to have its wicked way with. This driver has its
 * roots in a similar driver written by Phil Cockcroft (formerly) at
 * UCL. This driver is based much more on read/write/poll mode of
 * operation though.
 */

/*
 * tun - tunnel software network interface.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_tun.c,v 1.173 2022/03/28 12:33:22 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#endif

#include <sys/param.h>

#include <sys/buf.h>
#include <sys/conf.h>
#include <sys/cpu.h>
#include <sys/device.h>
#include <sys/file.h>
#include <sys/ioctl.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/lwp.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/poll.h>
#include <sys/select.h>
#include <sys/signalvar.h>
#include <sys/socket.h>

#include <net/bpf.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/route.h>

#ifdef INET
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/if_inarp.h>
#endif

#include <net/if_tun.h>

#include "ioconf.h"

#define TUNDEBUG        if (tundebug) printf
int        tundebug = 0;

extern int ifqmaxlen;

static LIST_HEAD(, tun_softc) tun_softc_list;
static LIST_HEAD(, tun_softc) tunz_softc_list;
static kmutex_t tun_softc_lock;

static int        tun_ioctl(struct ifnet *, u_long, void *);
static int        tun_output(struct ifnet *, struct mbuf *,
                        const struct sockaddr *, const struct rtentry *rt);
static int        tun_clone_create(struct if_clone *, int);
static int        tun_clone_destroy(struct ifnet *);

static struct if_clone tun_cloner =
    IF_CLONE_INITIALIZER("tun", tun_clone_create, tun_clone_destroy);

static void tunattach0(struct tun_softc *);
static void tun_enable(struct tun_softc *, const struct ifaddr *);
static void tun_i_softintr(void *);
static void tun_o_softintr(void *);
#ifdef ALTQ
static void tunstart(struct ifnet *);
#endif
static struct tun_softc *tun_find_unit(dev_t);
static struct tun_softc *tun_find_zunit(int);

static dev_type_open(tunopen);
static dev_type_close(tunclose);
static dev_type_read(tunread);
static dev_type_write(tunwrite);
static dev_type_ioctl(tunioctl);
static dev_type_poll(tunpoll);
static dev_type_kqfilter(tunkqfilter);

const struct cdevsw tun_cdevsw = {
        .d_open = tunopen,
        .d_close = tunclose,
        .d_read = tunread,
        .d_write = tunwrite,
        .d_ioctl = tunioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = tunpoll,
        .d_mmap = nommap,
        .d_kqfilter = tunkqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER | D_MPSAFE
};

#ifdef _MODULE
devmajor_t tun_bmajor = -1, tun_cmajor = -1;
#endif

void
tunattach(int unused)
{

        /*
         * Nothing to do here, initialization is handled by the
         * module initialization code in tuninit() below).
         */
}

static void
tuninit(void)
{

        mutex_init(&tun_softc_lock, MUTEX_DEFAULT, IPL_NONE);
        LIST_INIT(&tun_softc_list);
        LIST_INIT(&tunz_softc_list);
        if_clone_attach(&tun_cloner);
#ifdef _MODULE
        devsw_attach("tun", NULL, &tun_bmajor, &tun_cdevsw, &tun_cmajor);
#endif
}

static int
tundetach(void)
{

        if_clone_detach(&tun_cloner);
#ifdef _MODULE
        devsw_detach(NULL, &tun_cdevsw);
#endif

        if (!LIST_EMPTY(&tun_softc_list) || !LIST_EMPTY(&tunz_softc_list)) {
#ifdef _MODULE
                devsw_attach("tun", NULL, &tun_bmajor, &tun_cdevsw, &tun_cmajor);
#endif
                if_clone_attach(&tun_cloner);
                return EBUSY;
}

        mutex_destroy(&tun_softc_lock);

        return 0;
}

/*
 * Find driver instance from dev_t.
 * Returns with tp locked (if found).
 */
static struct tun_softc *
tun_find_unit(dev_t dev)
{
        struct tun_softc *tp;
        int unit = minor(dev);

        mutex_enter(&tun_softc_lock);
        LIST_FOREACH(tp, &tun_softc_list, tun_list)
                if (unit == tp->tun_unit)
                        break;
        if (tp)
                mutex_enter(&tp->tun_lock);
        mutex_exit(&tun_softc_lock);

        return tp;
}

/*
 * Find zombie driver instance by unit number.
 * Remove tp from list and return it unlocked (if found).
 */
static struct tun_softc *
tun_find_zunit(int unit)
{
        struct tun_softc *tp;

        mutex_enter(&tun_softc_lock);
        LIST_FOREACH(tp, &tunz_softc_list, tun_list)
                if (unit == tp->tun_unit)
                        break;
        if (tp)
                LIST_REMOVE(tp, tun_list);
        mutex_exit(&tun_softc_lock);
        KASSERTMSG(!tp || (tp->tun_flags & (TUN_INITED|TUN_OPEN)) == TUN_OPEN,
            "tun%d: inconsistent flags: %x", unit, tp->tun_flags);

        return tp;
}

static void
tun_init(struct tun_softc *tp, int unit)
{

        tp->tun_unit = unit;
        mutex_init(&tp->tun_lock, MUTEX_DEFAULT, IPL_SOFTNET);
        cv_init(&tp->tun_cv, "tunread");
        selinit(&tp->tun_rsel);
        selinit(&tp->tun_wsel);

        tp->tun_osih = softint_establish(SOFTINT_CLOCK, tun_o_softintr, tp);
        tp->tun_isih = softint_establish(SOFTINT_CLOCK, tun_i_softintr, tp);
}

static void
tun_fini(struct tun_softc *tp)
{

        softint_disestablish(tp->tun_isih);
        softint_disestablish(tp->tun_osih);

        seldestroy(&tp->tun_wsel);
        seldestroy(&tp->tun_rsel);
        mutex_destroy(&tp->tun_lock);
        cv_destroy(&tp->tun_cv);
}

static struct tun_softc *
tun_alloc(int unit)
{
        struct tun_softc *tp;

        tp = kmem_zalloc(sizeof(*tp), KM_SLEEP);
        tun_init(tp, unit);

        return tp;
}

static void
tun_recycle(struct tun_softc *tp)
{

        memset(&tp->tun_if, 0, sizeof(struct ifnet)); /* XXX ??? */
}

static void
tun_free(struct tun_softc *tp)
{

        tun_fini(tp);
        kmem_free(tp, sizeof(*tp));
}

static int
tun_clone_create(struct if_clone *ifc, int unit)
{
        struct tun_softc *tp;

        if ((tp = tun_find_zunit(unit)) == NULL) {
                tp = tun_alloc(unit);
        } else {
                tun_recycle(tp);
        }

        if_initname(&tp->tun_if, ifc->ifc_name, unit);
        tunattach0(tp);
        tp->tun_flags |= TUN_INITED;

        mutex_enter(&tun_softc_lock);
        LIST_INSERT_HEAD(&tun_softc_list, tp, tun_list);
        mutex_exit(&tun_softc_lock);

        return 0;
}

static void
tunattach0(struct tun_softc *tp)
{
        struct ifnet *ifp;

        ifp = &tp->tun_if;
        ifp->if_softc = tp;
        ifp->if_mtu = TUNMTU;
        ifp->if_ioctl = tun_ioctl;
        ifp->if_output = tun_output;
#ifdef ALTQ
        ifp->if_start = tunstart;
#endif
        ifp->if_flags = IFF_POINTOPOINT;
        ifp->if_type = IFT_TUNNEL;
        ifp->if_snd.ifq_maxlen = ifqmaxlen;
        ifp->if_dlt = DLT_NULL;
        IFQ_SET_READY(&ifp->if_snd);
        if_attach(ifp);
        ifp->if_link_state = LINK_STATE_DOWN;
        if_alloc_sadl(ifp);
        bpf_attach(ifp, DLT_NULL, sizeof(uint32_t));
}

static int
tun_clone_destroy(struct ifnet *ifp)
{
        struct tun_softc *tp = (void *)ifp;
        bool zombie = false;

        IF_PURGE(&ifp->if_snd);
        ifp->if_flags &= ~IFF_RUNNING;

        mutex_enter(&tun_softc_lock);
        mutex_enter(&tp->tun_lock);
        LIST_REMOVE(tp, tun_list);
        if (tp->tun_flags & TUN_OPEN) {
                /* Hang on to storage until last close. */
                tp->tun_flags &= ~TUN_INITED;
                LIST_INSERT_HEAD(&tunz_softc_list, tp, tun_list);
                zombie = true;
        }
        mutex_exit(&tun_softc_lock);

        cv_broadcast(&tp->tun_cv);
        if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
                fownsignal(tp->tun_pgid, SIGIO, POLL_HUP, 0, NULL);
        selnotify(&tp->tun_rsel, 0, NOTE_SUBMIT);
        mutex_exit(&tp->tun_lock);

        bpf_detach(ifp);
        if_detach(ifp);

        if (!zombie) {
                tun_free(tp);
        }

        return 0;
}

/*
 * tunnel open - must be superuser & the device must be
 * configured in
 */
static int
tunopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct ifnet        *ifp;
        struct tun_softc *tp;
        int        error;

        error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE_TUN,
            KAUTH_REQ_NETWORK_INTERFACE_TUN_ADD, NULL, NULL, NULL);
        if (error)
                return error;

        tp = tun_find_unit(dev);

        if (tp == NULL) {
                (void)tun_clone_create(&tun_cloner, minor(dev));
                tp = tun_find_unit(dev);
                if (tp == NULL) {
                        return ENXIO;
                }
        }

        if (tp->tun_flags & TUN_OPEN) {
                mutex_exit(&tp->tun_lock);
                return EBUSY;
        }

        ifp = &tp->tun_if;
        tp->tun_flags |= TUN_OPEN;
        TUNDEBUG("%s: open\n", ifp->if_xname);
        if_link_state_change(ifp, LINK_STATE_UP);

        mutex_exit(&tp->tun_lock);

        return error;
}

/*
 * tunclose - close the device - mark i/f down & delete
 * routing info
 */
int
tunclose(dev_t dev, int flag, int mode,
    struct lwp *l)
{
        struct tun_softc *tp;
        struct ifnet        *ifp;

        if ((tp = tun_find_zunit(minor(dev))) != NULL) {
                /* interface was "destroyed" before the close */
                tun_free(tp);
                return 0;
        }

        if ((tp = tun_find_unit(dev)) == NULL)
                goto out_nolock;

        ifp = &tp->tun_if;

        tp->tun_flags &= ~TUN_OPEN;

        tp->tun_pgid = 0;
        selnotify(&tp->tun_rsel, 0, NOTE_SUBMIT);

        TUNDEBUG ("%s: closed\n", ifp->if_xname);
        mutex_exit(&tp->tun_lock);

        /*
         * junk all pending output
         */
        IFQ_PURGE(&ifp->if_snd);

        if (ifp->if_flags & IFF_UP) {
                if_down(ifp);
                if (ifp->if_flags & IFF_RUNNING) {
                        /* find internet addresses and delete routes */
                        struct ifaddr *ifa;
                        IFADDR_READER_FOREACH(ifa, ifp) {
#if defined(INET) || defined(INET6)
                                if (ifa->ifa_addr->sa_family == AF_INET ||
                                    ifa->ifa_addr->sa_family == AF_INET6) {
                                        rtinit(ifa, (int)RTM_DELETE,
                                               tp->tun_flags & TUN_DSTADDR
                                                        ? RTF_HOST
                                                        : 0);
                                }
#endif
                        }
                }
        }

        if_link_state_change(ifp, LINK_STATE_DOWN);

out_nolock:
        return 0;
}

static void
tun_enable(struct tun_softc *tp, const struct ifaddr *ifa)
{
        struct ifnet        *ifp = &tp->tun_if;

        TUNDEBUG("%s: %s\n", __func__, ifp->if_xname);

        mutex_enter(&tp->tun_lock);
        tp->tun_flags &= ~(TUN_IASET|TUN_DSTADDR);

        switch (ifa->ifa_addr->sa_family) {
#ifdef INET
        case AF_INET: {
                struct sockaddr_in *sin;

                sin = satosin(ifa->ifa_addr);
                if (sin && sin->sin_addr.s_addr)
                        tp->tun_flags |= TUN_IASET;

                if (ifp->if_flags & IFF_POINTOPOINT) {
                        sin = satosin(ifa->ifa_dstaddr);
                        if (sin && sin->sin_addr.s_addr)
                                tp->tun_flags |= TUN_DSTADDR;
                }
                break;
            }
#endif
#ifdef INET6
        case AF_INET6: {
                struct sockaddr_in6 *sin;

                sin = satosin6(ifa->ifa_addr);
                if (!IN6_IS_ADDR_UNSPECIFIED(&sin->sin6_addr))
                        tp->tun_flags |= TUN_IASET;

                if (ifp->if_flags & IFF_POINTOPOINT) {
                        sin = satosin6(ifa->ifa_dstaddr);
                        if (sin && !IN6_IS_ADDR_UNSPECIFIED(&sin->sin6_addr))
                                tp->tun_flags |= TUN_DSTADDR;
                } else
                        tp->tun_flags &= ~TUN_DSTADDR;
                break;
            }
#endif /* INET6 */
        default:
                break;
        }
        ifp->if_flags |= IFF_UP | IFF_RUNNING;
        mutex_exit(&tp->tun_lock);
}

/*
 * Process an ioctl request.
 */
static int
tun_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
        struct tun_softc *tp = (struct tun_softc *)(ifp->if_softc);
        struct ifreq *ifr = (struct ifreq *)data;
        struct ifaddr *ifa = (struct ifaddr *)data;
        int error = 0;

        switch (cmd) {
        case SIOCINITIFADDR:
                tun_enable(tp, ifa);
                ifa->ifa_rtrequest = p2p_rtrequest;
                TUNDEBUG("%s: address set\n", ifp->if_xname);
                break;
        case SIOCSIFBRDADDR:
                TUNDEBUG("%s: broadcast address set\n", ifp->if_xname);
                break;
        case SIOCSIFMTU:
                if (ifr->ifr_mtu > TUNMTU || ifr->ifr_mtu < 576) {
                        error = EINVAL;
                        break;
                }
                TUNDEBUG("%s: interface mtu set\n", ifp->if_xname);
                if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
                        error = 0;
                break;
        case SIOCADDMULTI:
        case SIOCDELMULTI:
                if (ifr == NULL) {
                        error = EAFNOSUPPORT;           /* XXX */
                        break;
                }
                switch (ifreq_getaddr(cmd, ifr)->sa_family) {
#ifdef INET
                case AF_INET:
                        break;
#endif
#ifdef INET6
                case AF_INET6:
                        break;
#endif
                default:
                        error = EAFNOSUPPORT;
                        break;
                }
                break;
        default:
                error = ifioctl_common(ifp, cmd, data);
        }

        return error;
}

/*
 * tun_output - queue packets from higher level ready to put out.
 */
static int
tun_output(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
    const struct rtentry *rt)
{
        struct tun_softc *tp = ifp->if_softc;
        int                error;
#if defined(INET) || defined(INET6)
        int                mlen;
        uint32_t        *af;
#endif

        mutex_enter(&tp->tun_lock);
        TUNDEBUG ("%s: tun_output\n", ifp->if_xname);

        if ((tp->tun_flags & TUN_READY) != TUN_READY) {
                TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname,
                          tp->tun_flags);
                error = EHOSTDOWN;
                mutex_exit(&tp->tun_lock);
                goto out;
        }
        // XXXrmind
        mutex_exit(&tp->tun_lock);

        /*
         * if the queueing discipline needs packet classification,
         * do it before prepending link headers.
         */
        IFQ_CLASSIFY(&ifp->if_snd, m0, dst->sa_family);

        bpf_mtap_af(ifp, dst->sa_family, m0, BPF_D_OUT);

        if ((error = pfil_run_hooks(ifp->if_pfil, &m0, ifp, PFIL_OUT)) != 0)
                goto out;
        if (m0 == NULL)
                goto out;

        switch(dst->sa_family) {
#ifdef INET6
        case AF_INET6:
#endif
#ifdef INET
        case AF_INET:
#endif
#if defined(INET) || defined(INET6)
                if (tp->tun_flags & TUN_PREPADDR) {
                        /* Simple link-layer header */
                        M_PREPEND(m0, dst->sa_len, M_DONTWAIT);
                        if (m0 == NULL) {
                                IF_DROP(&ifp->if_snd);
                                error = ENOBUFS;
                                goto out;
                        }
                        memcpy(mtod(m0, char *), dst, dst->sa_len);
                }

                if (tp->tun_flags & TUN_IFHEAD) {
                        /* Prepend the address family */
                        M_PREPEND(m0, sizeof(*af), M_DONTWAIT);
                        if (m0 == NULL) {
                                IF_DROP(&ifp->if_snd);
                                error = ENOBUFS;
                                goto out;
                        }
                        af = mtod(m0,uint32_t *);
                        *af = htonl(dst->sa_family);
                } else {
#ifdef INET
                        if (dst->sa_family != AF_INET)
#endif
                        {
                                error = EAFNOSUPPORT;
                                goto out;
                        }
                }
                /* FALLTHROUGH */
        case AF_UNSPEC:
                mlen = m0->m_pkthdr.len;
                IFQ_ENQUEUE(&ifp->if_snd, m0, error);
                if (error) {
                        if_statinc(ifp, if_collisions);
                        error = EAFNOSUPPORT;
                        m0 = NULL;
                        goto out;
                }
                if_statadd2(ifp, if_opackets, 1, if_obytes, mlen);
                break;
#endif
        default:
                error = EAFNOSUPPORT;
                goto out;
        }

        mutex_enter(&tp->tun_lock);
        cv_broadcast(&tp->tun_cv);
        if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
                softint_schedule(tp->tun_isih);
        selnotify(&tp->tun_rsel, 0, NOTE_SUBMIT);
        mutex_exit(&tp->tun_lock);
out:
        if (error && m0)
                m_freem(m0);

        return error;
}

static void
tun_i_softintr(void *cookie)
{
        struct tun_softc *tp = cookie;

        if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
                fownsignal(tp->tun_pgid, SIGIO, POLL_IN, POLLIN|POLLRDNORM,
                    NULL);
}

static void
tun_o_softintr(void *cookie)
{
        struct tun_softc *tp = cookie;

        if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
                fownsignal(tp->tun_pgid, SIGIO, POLL_OUT, POLLOUT|POLLWRNORM,
                    NULL);
}

/*
 * the cdevsw interface is now pretty minimal.
 */
int
tunioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        struct tun_softc *tp;
        int error = 0;

        tp = tun_find_unit(dev);

        /* interface was "destroyed" already */
        if (tp == NULL) {
                return ENXIO;
        }

        switch (cmd) {
        case TUNSDEBUG:
                tundebug = *(int *)data;
                break;

        case TUNGDEBUG:
                *(int *)data = tundebug;
                break;

        case TUNSIFMODE:
                switch (*(int *)data & (IFF_POINTOPOINT|IFF_BROADCAST)) {
                case IFF_POINTOPOINT:
                case IFF_BROADCAST:
                        if (tp->tun_if.if_flags & IFF_UP) {
                                error = EBUSY;
                                goto out;
                        }
                        tp->tun_if.if_flags &=
                                ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
                        tp->tun_if.if_flags |= *(int *)data;
                        break;
                default:
                        error = EINVAL;
                        goto out;
                }
                break;

        case TUNSLMODE:
                if (*(int *)data) {
                        tp->tun_flags |= TUN_PREPADDR;
                        tp->tun_flags &= ~TUN_IFHEAD;
                } else
                        tp->tun_flags &= ~TUN_PREPADDR;
                break;

        case TUNSIFHEAD:
                if (*(int *)data) {
                        tp->tun_flags |= TUN_IFHEAD;
                        tp->tun_flags &= ~TUN_PREPADDR;
                } else
                        tp->tun_flags &= ~TUN_IFHEAD;
                break;

        case TUNGIFHEAD:
                *(int *)data = (tp->tun_flags & TUN_IFHEAD);
                break;

        case FIONBIO:
                if (*(int *)data)
                        tp->tun_flags |= TUN_NBIO;
                else
                        tp->tun_flags &= ~TUN_NBIO;
                break;

        case FIOASYNC:
                if (*(int *)data)
                        tp->tun_flags |= TUN_ASYNC;
                else
                        tp->tun_flags &= ~TUN_ASYNC;
                break;

        case FIONREAD:
                if (tp->tun_if.if_snd.ifq_head)
                        *(int *)data = tp->tun_if.if_snd.ifq_head->m_pkthdr.len;
                else
                        *(int *)data = 0;
                break;

        case TIOCSPGRP:
        case FIOSETOWN:
                error = fsetown(&tp->tun_pgid, cmd, data);
                break;

        case TIOCGPGRP:
        case FIOGETOWN:
                error = fgetown(tp->tun_pgid, cmd, data);
                break;

        default:
                error = ENOTTY;
        }

out:
        mutex_exit(&tp->tun_lock);

        return error;
}

/*
 * The cdevsw read interface - reads a packet at a time, or at
 * least as much of a packet as can be read.
 */
int
tunread(dev_t dev, struct uio *uio, int ioflag)
{
        struct tun_softc *tp;
        struct ifnet        *ifp;
        struct mbuf        *m, *m0;
        int                error = 0, len;

        tp = tun_find_unit(dev);

        /* interface was "destroyed" already */
        if (tp == NULL) {
                return ENXIO;
        }

        ifp = &tp->tun_if;

        TUNDEBUG ("%s: read\n", ifp->if_xname);
        if ((tp->tun_flags & TUN_READY) != TUN_READY) {
                TUNDEBUG ("%s: not ready 0%o\n", ifp->if_xname, tp->tun_flags);
                error = EHOSTDOWN;
                goto out;
        }

        do {
                IFQ_DEQUEUE(&ifp->if_snd, m0);
                if (m0 == 0) {
                        if (tp->tun_flags & TUN_NBIO) {
                                error = EWOULDBLOCK;
                                goto out;
                        }
                        if (cv_wait_sig(&tp->tun_cv, &tp->tun_lock)) {
                                error = EINTR;
                                goto out;
                        }
                }
        } while (m0 == 0);

        mutex_exit(&tp->tun_lock);

        /* Copy the mbuf chain */
        while (m0 && uio->uio_resid > 0 && error == 0) {
                len = uimin(uio->uio_resid, m0->m_len);
                if (len != 0)
                        error = uiomove(mtod(m0, void *), len, uio);
                m0 = m = m_free(m0);
        }

        if (m0) {
                TUNDEBUG("Dropping mbuf\n");
                m_freem(m0);
        }
        if (error)
                if_statinc(ifp, if_ierrors);

        return error;

out:
        mutex_exit(&tp->tun_lock);

        return error;
}

/*
 * the cdevsw write interface - an atomic write is a packet - or else!
 */
int
tunwrite(dev_t dev, struct uio *uio, int ioflag)
{
        struct tun_softc *tp;
        struct ifnet        *ifp;
        struct mbuf        *top, **mp, *m;
        pktqueue_t        *pktq;
        struct sockaddr        dst;
        int                error = 0, tlen, mlen;
        uint32_t        family;

        tp = tun_find_unit(dev);
        if (tp == NULL) {
                /* Interface was "destroyed" already. */
                return ENXIO;
        }

        /* Unlock until we've got the data */
        mutex_exit(&tp->tun_lock);

        ifp = &tp->tun_if;

        TUNDEBUG("%s: tunwrite\n", ifp->if_xname);

        if (tp->tun_flags & TUN_PREPADDR) {
                if (uio->uio_resid < sizeof(dst)) {
                        error = EIO;
                        goto out0;
                }
                error = uiomove((void *)&dst, sizeof(dst), uio);
                if (error)
                        goto out0;
                if (dst.sa_len > sizeof(dst)) {
                        /* Duh.. */
                        int n = dst.sa_len - sizeof(dst);
                        while (n--) {
                                char discard;
                                error = uiomove(&discard, 1, uio);
                                if (error) {
                                        goto out0;
                                }
                        }
                }
        } else if (tp->tun_flags & TUN_IFHEAD) {
                if (uio->uio_resid < sizeof(family)){
                        error = EIO;
                        goto out0;
                }
                error = uiomove((void *)&family, sizeof(family), uio);
                if (error)
                        goto out0;
                dst.sa_family = ntohl(family);
        } else {
#ifdef INET
                dst.sa_family = AF_INET;
#endif
        }

        if (uio->uio_resid == 0 || uio->uio_resid > TUNMTU) {
                TUNDEBUG("%s: len=%lu!\n", ifp->if_xname,
                    (unsigned long)uio->uio_resid);
                error = EIO;
                goto out0;
        }

        switch (dst.sa_family) {
#ifdef INET
        case AF_INET:
                pktq = ip_pktq;
                break;
#endif
#ifdef INET6
        case AF_INET6:
                pktq = ip6_pktq;
                break;
#endif
        default:
                error = EAFNOSUPPORT;
                goto out0;
        }

        tlen = uio->uio_resid;

        /* get a header mbuf */
        MGETHDR(m, M_DONTWAIT, MT_DATA);
        if (m == NULL) {
                error = ENOBUFS;
                goto out0;
        }
        mlen = MHLEN;

        top = NULL;
        mp = &top;
        while (error == 0 && uio->uio_resid > 0) {
                m->m_len = uimin(mlen, uio->uio_resid);
                error = uiomove(mtod(m, void *), m->m_len, uio);
                *mp = m;
                mp = &m->m_next;
                if (error == 0 && uio->uio_resid > 0) {
                        MGET(m, M_DONTWAIT, MT_DATA);
                        if (m == NULL) {
                                error = ENOBUFS;
                                break;
                        }
                        mlen = MLEN;
                }
        }
        if (error) {
                if (top != NULL)
                        m_freem(top);
                if_statinc(ifp, if_ierrors);
                goto out0;
        }

        top->m_pkthdr.len = tlen;
        m_set_rcvif(top, ifp);

        bpf_mtap_af(ifp, dst.sa_family, top, BPF_D_IN);

        if ((error = pfil_run_hooks(ifp->if_pfil, &top, ifp, PFIL_IN)) != 0)
                goto out0;
        if (top == NULL)
                goto out0;

        mutex_enter(&tp->tun_lock);
        if ((tp->tun_flags & TUN_INITED) == 0) {
                /* Interface was destroyed */
                error = ENXIO;
                goto out;
        }
        kpreempt_disable();
        if (__predict_false(!pktq_enqueue(pktq, top, 0))) {
                if_statinc(ifp, if_collisions);
                mutex_exit(&tp->tun_lock);
                error = ENOBUFS;
                m_freem(top);
                goto out0;
        }
        kpreempt_enable();
        if_statadd2(ifp, if_ipackets, 1, if_ibytes, tlen);
out:
        mutex_exit(&tp->tun_lock);
out0:
        return error;
}

#ifdef ALTQ
/*
 * Start packet transmission on the interface.
 * when the interface queue is rate-limited by ALTQ or TBR,
 * if_start is needed to drain packets from the queue in order
 * to notify readers when outgoing packets become ready.
 */
static void
tunstart(struct ifnet *ifp)
{
        struct tun_softc *tp = ifp->if_softc;

        if (!ALTQ_IS_ENABLED(&ifp->if_snd) && !TBR_IS_ENABLED(&ifp->if_snd))
                return;

        mutex_enter(&tp->tun_lock);
        if (!IF_IS_EMPTY(&ifp->if_snd)) {
                cv_broadcast(&tp->tun_cv);
                if (tp->tun_flags & TUN_ASYNC && tp->tun_pgid)
                        softint_schedule(tp->tun_osih);

                selnotify(&tp->tun_rsel, 0, NOTE_SUBMIT);
        }
        mutex_exit(&tp->tun_lock);
}
#endif /* ALTQ */
/*
 * tunpoll - the poll interface, this is only useful on reads
 * really. The write detect always returns true, write never blocks
 * anyway, it either accepts the packet or drops it.
 */
int
tunpoll(dev_t dev, int events, struct lwp *l)
{
        struct tun_softc *tp;
        struct ifnet        *ifp;
        int revents = 0;

        tp = tun_find_unit(dev);
        if (tp == NULL) {
                /* Interface was "destroyed" already. */
                return 0;
        }
        ifp = &tp->tun_if;

        TUNDEBUG("%s: tunpoll\n", ifp->if_xname);

        if (events & (POLLIN | POLLRDNORM)) {
                if (!IFQ_IS_EMPTY(&ifp->if_snd)) {
                        TUNDEBUG("%s: tunpoll q=%d\n", ifp->if_xname,
                            ifp->if_snd.ifq_len);
                        revents |= events & (POLLIN | POLLRDNORM);
                } else {
                        TUNDEBUG("%s: tunpoll waiting\n", ifp->if_xname);
                        selrecord(l, &tp->tun_rsel);
                }
        }

        if (events & (POLLOUT | POLLWRNORM))
                revents |= events & (POLLOUT | POLLWRNORM);

        mutex_exit(&tp->tun_lock);

        return revents;
}

static void
filt_tunrdetach(struct knote *kn)
{
        struct tun_softc *tp = kn->kn_hook;

        mutex_enter(&tp->tun_lock);
        selremove_knote(&tp->tun_rsel, kn);
        mutex_exit(&tp->tun_lock);
}

static int
filt_tunread(struct knote *kn, long hint)
{
        struct tun_softc *tp = kn->kn_hook;
        struct ifnet *ifp = &tp->tun_if;
        struct mbuf *m;
        int ready;

        if (hint & NOTE_SUBMIT)
                KASSERT(mutex_owned(&tp->tun_lock));
        else
                mutex_enter(&tp->tun_lock);

        IF_POLL(&ifp->if_snd, m);
        ready = (m != NULL);
        for (kn->kn_data = 0; m != NULL; m = m->m_next)
                kn->kn_data += m->m_len;

        if (hint & NOTE_SUBMIT)
                KASSERT(mutex_owned(&tp->tun_lock));
        else
                mutex_exit(&tp->tun_lock);

        return ready;
}

static const struct filterops tunread_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_tunrdetach,
        .f_event = filt_tunread,
};

int
tunkqfilter(dev_t dev, struct knote *kn)
{
        struct tun_softc *tp;
        int rv = 0;

        tp = tun_find_unit(dev);
        if (tp == NULL)
                goto out_nolock;

        switch (kn->kn_filter) {
        case EVFILT_READ:
                kn->kn_fop = &tunread_filtops;
                kn->kn_hook = tp;
                selrecord_knote(&tp->tun_rsel, kn);
                break;

        case EVFILT_WRITE:
                kn->kn_fop = &seltrue_filtops;
                break;

        default:
                rv = EINVAL;
                goto out;
        }

out:
        mutex_exit(&tp->tun_lock);
out_nolock:
        return rv;
}

/*
 * Module infrastructure
 */
#include "if_module.h"

IF_MODULE(MODULE_CLASS_DRIVER, tun, NULL)


































































































































































  257 















  318 




  201 
  321 
  311 






























































































 1537 


  908 

    1 















 1536 
  869 

 1474 




  908 




































































































































  869 




  871 














  865 






















 1532 




 1531 










   71 

 1504 


 1278 





 1528 





 1528 




 1312 



 1312 







  586 


   41 
   39 



















  587 
















  258 











    2 




    1 














    1 







































  258 


    2 




    2 

  908 

































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
/*        $NetBSD: trap.c,v 1.128 2020/09/05 07:26:37 maxv Exp $        */

/*
 * Copyright (c) 1998, 2000, 2017 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum, and by Maxime Villard.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1990 The Regents of the University of California.
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the University of Utah, and William Jolitz.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)trap.c        7.4 (Berkeley) 5/13/91
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.128 2020/09/05 07:26:37 maxv Exp $");

#include "opt_ddb.h"
#include "opt_kgdb.h"
#include "opt_xen.h"
#include "opt_dtrace.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/acct.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/ras.h>
#include <sys/signal.h>
#include <sys/syscall.h>
#include <sys/cpu.h>
#include <sys/ucontext.h>
#include <sys/module_hook.h>
#include <sys/compat_stub.h>

#include <uvm/uvm_extern.h>

#include <machine/cpufunc.h>
#include <x86/fpu.h>
#include <x86/dbregs.h>
#include <machine/psl.h>
#include <machine/reg.h>
#include <machine/trap.h>
#include <machine/userret.h>
#include <machine/db_machdep.h>

#include <x86/nmi.h>

#ifndef XENPV
#include "isa.h"
#endif

#include <sys/kgdb.h>

#ifdef KDTRACE_HOOKS
#include <sys/dtrace_bsd.h>
/*
 * This is a hook which is initialized by the dtrace module to handle traps
 * which might occur during DTrace probe execution.
 */
dtrace_trap_func_t dtrace_trap_func = NULL;
dtrace_doubletrap_func_t dtrace_doubletrap_func = NULL;
#endif

/*
 * Module hook for amd64_oosyscall
 */
struct amd64_oosyscall_hook_t amd64_oosyscall_hook;

void nmitrap(struct trapframe *);
void doubletrap(struct trapframe *);
void trap(struct trapframe *);

const char * const trap_type[] = {
        "privileged instruction fault",                /*  0 T_PRIVINFLT */
        "breakpoint trap",                        /*  1 T_BPTFLT */
        "arithmetic trap",                        /*  2 T_ARITHTRAP */
        "asynchronous system trap",                /*  3 T_ASTFLT */
        "protection fault",                        /*  4 T_PROTFLT */
        "trace trap",                                /*  5 T_TRCTRAP */
        "page fault",                                /*  6 T_PAGEFLT */
        "alignment fault",                        /*  7 T_ALIGNFLT */
        "integer divide fault",                        /*  8 T_DIVIDE */
        "non-maskable interrupt",                /*  9 T_NMI */
        "overflow trap",                        /* 10 T_OFLOW */
        "bounds check fault",                        /* 11 T_BOUND */
        "FPU not available fault",                /* 12 T_DNA */
        "double fault",                                /* 13 T_DOUBLEFLT */
        "FPU operand fetch fault",                /* 14 T_FPOPFLT */
        "invalid TSS fault",                        /* 15 T_TSSFLT */
        "segment not present fault",                /* 16 T_SEGNPFLT */
        "stack fault",                                /* 17 T_STKFLT */
        "machine check fault",                        /* 18 T_MCA */
        "SSE FP exception",                        /* 19 T_XMM */
        "reserved trap",                        /* 20 T_RESERVED */
};
int trap_types = __arraycount(trap_type);

#ifdef TRAP_SIGDEBUG
static void sigdebug(const struct trapframe *, const ksiginfo_t *, int);
#define SIGDEBUG(a, b, c) sigdebug(a, b, c)
#else
#define SIGDEBUG(a, b, c)
#endif

static void
onfault_restore(struct trapframe *frame, void *onfault, int error)
{
        frame->tf_rip = (uintptr_t)onfault;
        frame->tf_rax = error;
}

static void *
onfault_handler(const struct pcb *pcb, const struct trapframe *tf)
{
        struct onfault_table {
                uintptr_t start;
                uintptr_t end;
                void *handler;
        };
        extern const struct onfault_table onfault_table[];
        const struct onfault_table *p;
        uintptr_t pc;

        if (pcb->pcb_onfault != NULL) {
                return pcb->pcb_onfault;
        }

        pc = tf->tf_rip;
        for (p = onfault_table; p->start; p++) {
                if (p->start <= pc && pc < p->end) {
                        return p->handler;
                }
        }
        return NULL;
}

static void
trap_print(const struct trapframe *frame, const lwp_t *l)
{
        const int type = frame->tf_trapno;

        if (frame->tf_trapno < trap_types) {
                printf("fatal %s", trap_type[type]);
        } else {
                printf("unknown trap %d", type);
        }
        printf(" in %s mode\n", (type & T_USER) ? "user" : "supervisor");

        printf("trap type %d code %#lx rip %#lx cs %#lx rflags %#lx cr2 %#lx "
            "ilevel %#x rsp %#lx\n",
            type, frame->tf_err, (u_long)frame->tf_rip, frame->tf_cs,
            frame->tf_rflags, rcr2(), curcpu()->ci_ilevel, frame->tf_rsp);

        printf("curlwp %p pid %d.%d lowest kstack %p\n",
            l, l->l_proc->p_pid, l->l_lid, KSTACK_LOWEST_ADDR(l));
}

void
nmitrap(struct trapframe *frame)
{
        const int type = T_NMI;

        if (nmi_dispatch(frame))
                return;
        /* NMI can be hooked up to a pushbutton for debugging */
        if (kgdb_trap(type, frame))
                return;
        if (kdb_trap(type, 0, frame))
                return;
        /* machine/parity/power fail/"kitchen sink" faults */

        x86_nmi();
}

void
doubletrap(struct trapframe *frame)
{
        const int type = T_DOUBLEFLT;
        struct lwp *l = curlwp;

        trap_print(frame, l);

        if (kdb_trap(type, 0, frame))
                return;
        if (kgdb_trap(type, frame))
                return;

        panic("double fault");
}

/*
 * trap(frame): exception, fault, and trap interface to BSD kernel.
 *
 * This common code is called from assembly language IDT gate entry routines
 * that prepare a suitable stack frame, and restore this frame after the
 * exception has been processed. Note that the effect is as if the arguments
 * were passed call by reference.
 *
 * Note that the fpu traps (07 T_DNA, 10 T_ARITHTRAP and 13 T_XMM)
 * jump directly into the code in x86/fpu.c so they get processed
 * without interrupts being enabled.
 */
void
trap(struct trapframe *frame)
{
        struct lwp *l = curlwp;
        struct proc *p;
        struct pcb *pcb;
        extern char kcopy_fault[];
        ksiginfo_t ksi;
        void *onfault;
        int type, error;
        uint64_t cr2;
        bool pfail;

        if (__predict_true(l != NULL)) {
                pcb = lwp_getpcb(l);
                p = l->l_proc;
        } else {
                /*
                 * This can happen eg on break points in early on boot.
                 */
                pcb = NULL;
                p = NULL;
        }
        type = frame->tf_trapno;

        if (!KERNELMODE(frame->tf_cs)) {
                type |= T_USER;
                l->l_md.md_regs = frame;
                LWP_CACHE_CREDS(l, p);
        }

#ifdef KDTRACE_HOOKS
        /*
         * A trap can occur while DTrace executes a probe. Before
         * executing the probe, DTrace blocks re-scheduling and sets
         * a flag in its per-cpu flags to indicate that it doesn't
         * want to fault. On returning from the probe, the no-fault
         * flag is cleared and finally re-scheduling is enabled.
         *
         * If the DTrace kernel module has registered a trap handler,
         * call it and if it returns non-zero, assume that it has
         * handled the trap and modified the trap frame so that this
         * function can return normally.
         */
        if ((type == T_PROTFLT || type == T_PAGEFLT) &&
            dtrace_trap_func != NULL) {
                if ((*dtrace_trap_func)(frame, type)) {
                        return;
                }
        }
#endif

        switch (type) {

        default:
        we_re_toast:
                trap_print(frame, l);

                if (kdb_trap(type, 0, frame))
                        return;
                if (kgdb_trap(type, frame))
                        return;
                /*
                 * If this is a breakpoint, don't panic if we're not connected.
                 */
                if (type == T_BPTFLT && kgdb_disconnected()) {
                        printf("kgdb: ignored %s\n", trap_type[type]);
                        return;
                }
                panic("trap");
                /*NOTREACHED*/

        case T_PROTFLT:
        case T_SEGNPFLT:
        case T_ALIGNFLT:
        case T_STKFLT:
        case T_TSSFLT:
                if (p == NULL)
                        goto we_re_toast;

                /* Check for copyin/copyout fault. */
                onfault = onfault_handler(pcb, frame);
                if (onfault != NULL) {
                        onfault_restore(frame, onfault, EFAULT);
                        return;
                }

                goto we_re_toast;

        case T_PROTFLT|T_USER:                /* protection fault */
        {        int hook_ret;

                MODULE_HOOK_CALL(amd64_oosyscall_hook, (p, frame),
                        ENOSYS, hook_ret);
                if (hook_ret == 0) {
                        /* Do the syscall */
                        p->p_md.md_syscall(frame);
                        goto out;
                }
        }
                /* FALLTHROUGH */
        case T_TSSFLT|T_USER:
        case T_SEGNPFLT|T_USER:
        case T_STKFLT|T_USER:
        case T_ALIGNFLT|T_USER:
                KSI_INIT_TRAP(&ksi);
                ksi.ksi_trap = type & ~T_USER;
                ksi.ksi_addr = (void *)frame->tf_rip;
                switch (type) {
                case T_SEGNPFLT|T_USER:
                case T_STKFLT|T_USER:
                        ksi.ksi_signo = SIGBUS;
                        ksi.ksi_code = BUS_ADRERR;
                        break;
                case T_TSSFLT|T_USER:
                        ksi.ksi_signo = SIGBUS;
                        ksi.ksi_code = BUS_OBJERR;
                        break;
                case T_ALIGNFLT|T_USER:
                        ksi.ksi_signo = SIGBUS;
                        ksi.ksi_code = BUS_ADRALN;
                        break;
                case T_PROTFLT|T_USER:
                        ksi.ksi_signo = SIGSEGV;
                        ksi.ksi_code = SEGV_ACCERR;
                        break;
                default:
                        KASSERT(0);
                        break;
                }
                goto trapsignal;

        case T_PRIVINFLT|T_USER:        /* privileged instruction fault */
        case T_FPOPFLT|T_USER:                /* coprocessor operand fault */
                KSI_INIT_TRAP(&ksi);
                ksi.ksi_signo = SIGILL;
                ksi.ksi_trap = type & ~T_USER;
                ksi.ksi_addr = (void *) frame->tf_rip;
                switch (type) {
                case T_PRIVINFLT|T_USER:
                        ksi.ksi_code = ILL_PRVOPC;
                        break;
                case T_FPOPFLT|T_USER:
                        ksi.ksi_code = ILL_COPROC;
                        break;
                default:
                        KASSERT(0);
                        break;
                }
                goto trapsignal;

        case T_ASTFLT|T_USER:
                /* Allow process switch. */
                //curcpu()->ci_data.cpu_nast++;
                if (l->l_pflag & LP_OWEUPC) {
                        l->l_pflag &= ~LP_OWEUPC;
                        ADDUPROF(l);
                }
                goto out;

        case T_BOUND|T_USER:
        case T_OFLOW|T_USER:
        case T_DIVIDE|T_USER:
                KSI_INIT_TRAP(&ksi);
                ksi.ksi_signo = SIGFPE;
                ksi.ksi_trap = type & ~T_USER;
                ksi.ksi_addr = (void *)frame->tf_rip;
                switch (type) {
                case T_BOUND|T_USER:
                        ksi.ksi_code = FPE_FLTSUB;
                        break;
                case T_OFLOW|T_USER:
                        ksi.ksi_code = FPE_INTOVF;
                        break;
                case T_DIVIDE|T_USER:
                        ksi.ksi_code = FPE_INTDIV;
                        break;
                default:
                        KASSERT(0);
                        break;
                }
                goto trapsignal;

        case T_PAGEFLT:
                /* Allow page faults in kernel mode. */
                if (__predict_false(l == NULL))
                        goto we_re_toast;

                onfault = pcb->pcb_onfault;

                if (cpu_intr_p() || (l->l_pflag & LP_INTR) != 0) {
                        goto we_re_toast;
                }

                cr2 = rcr2();

                if (frame->tf_err & PGEX_I) {
                        /* SMEP might have brought us here */
                        if (cr2 < VM_MAXUSER_ADDRESS) {
                                printf("prevented execution of %p (SMEP)\n",
                                    (void *)cr2);
                                goto we_re_toast;
                        }
                }

                if ((frame->tf_err & PGEX_P) &&
                    cr2 < VM_MAXUSER_ADDRESS) {
                        /* SMAP might have brought us here */
                        if (onfault_handler(pcb, frame) == NULL) {
                                printf("prevented access to %p (SMAP)\n",
                                    (void *)cr2);
                                goto we_re_toast;
                        }
                }

                goto pagefltcommon;

        case T_PAGEFLT|T_USER: {
                register vaddr_t va;
                register struct vmspace *vm;
                register struct vm_map *map;
                vm_prot_t ftype;
                extern struct vm_map *kernel_map;

                cr2 = rcr2();
                if (p->p_emul->e_usertrap != NULL &&
                    (*p->p_emul->e_usertrap)(l, cr2, frame) != 0)
                        return;
pagefltcommon:
                vm = p->p_vmspace;
                if (__predict_false(vm == NULL)) {
                        goto we_re_toast;
                }
                pcb->pcb_cr2 = cr2;
                va = trunc_page((vaddr_t)cr2);
                /*
                 * It is only a kernel address space fault iff:
                 *        1. (type & T_USER) == 0  and
                 *        2. pcb_onfault not set or
                 *        3. pcb_onfault set but supervisor space fault
                 * The last can occur during an exec() copyin where the
                 * argument space is lazy-allocated.
                 */
                if (type == T_PAGEFLT && va >= VM_MIN_KERNEL_ADDRESS)
                        map = kernel_map;
                else
                        map = &vm->vm_map;
                if (frame->tf_err & PGEX_W)
                        ftype = VM_PROT_WRITE;
                else if (frame->tf_err & PGEX_I)
                        ftype = VM_PROT_EXECUTE;
                else
                        ftype = VM_PROT_READ;

#ifdef DIAGNOSTIC
                if (map == kernel_map && va == 0) {
                        printf("trap: bad kernel access at %lx\n", va);
                        goto we_re_toast;
                }
#endif
                /* Fault the original page in. */
                onfault = pcb->pcb_onfault;
                pcb->pcb_onfault = NULL;
                error = uvm_fault(map, va, ftype);
                pcb->pcb_onfault = onfault;
                if (error == 0) {
                        if (map != kernel_map && (void *)va >= vm->vm_maxsaddr)
                                uvm_grow(p, va);

                        pfail = false;
                        while (type == T_PAGEFLT) {
                                /*
                                 * we need to switch pmap now if we're in
                                 * the middle of copyin/out.
                                 *
                                 * but we don't need to do so for kcopy as
                                 * it never touch userspace.
                                  */
                                kpreempt_disable();
                                if (curcpu()->ci_want_pmapload) {
                                        onfault = onfault_handler(pcb, frame);
                                        if (onfault != kcopy_fault) {
                                                pmap_load();
                                        }
                                }
                                /*
                                 * We need to keep the pmap loaded and
                                 * so avoid being preempted until back
                                 * into the copy functions.  Disable
                                 * interrupts at the hardware level before
                                 * re-enabling preemption.  Interrupts
                                 * will be re-enabled by 'iret' when
                                 * returning back out of the trap stub.
                                 * They'll only be re-enabled when the
                                 * program counter is once again in
                                 * the copy functions, and so visible
                                 * to cpu_kpreempt_exit().
                                 */
#ifndef XENPV
                                x86_disable_intr();
#endif
                                l->l_nopreempt--;
                                if (l->l_nopreempt > 0 || !l->l_dopreempt ||
                                    pfail) {
                                        return;
                                }
#ifndef XENPV
                                x86_enable_intr();
#endif
                                /*
                                 * If preemption fails for some reason,
                                 * don't retry it.  The conditions won't
                                 * change under our nose.
                                 */
                                pfail = kpreempt(0);
                        }
                        goto out;
                }

                if (type == T_PAGEFLT) {
                        onfault = onfault_handler(pcb, frame);
                        if (onfault != NULL) {
                                onfault_restore(frame, onfault, error);
                                return;
                        }

                        printf("uvm_fault(%p, 0x%lx, %d) -> %x\n",
                            map, va, ftype, error);
                        goto we_re_toast;
                }

                KSI_INIT_TRAP(&ksi);
                ksi.ksi_trap = type & ~T_USER;
                ksi.ksi_addr = (void *)cr2;
                switch (error) {
                case EINVAL:
                        ksi.ksi_signo = SIGBUS;
                        ksi.ksi_code = BUS_ADRERR;
                        break;
                case EACCES:
                        ksi.ksi_signo = SIGSEGV;
                        ksi.ksi_code = SEGV_ACCERR;
                        error = EFAULT;
                        break;
                case ENOMEM:
                        ksi.ksi_signo = SIGKILL;
                        printf("UVM: pid %d.%d (%s), uid %d killed: "
                            "out of swap\n", p->p_pid, l->l_lid, p->p_comm,
                            l->l_cred ?  kauth_cred_geteuid(l->l_cred) : -1);
                        break;
                default:
                        ksi.ksi_signo = SIGSEGV;
                        ksi.ksi_code = SEGV_MAPERR;
                        break;
                }

                SIGDEBUG(frame, &ksi, error);
                 (*p->p_emul->e_trapsignal)(l, &ksi);
                break;
        }

        case T_TRCTRAP:
                /*
                 * Ignore debug register trace traps due to
                 * accesses in the user's address space, which
                 * can happen under several conditions such as
                 * if a user sets a watchpoint on a buffer and
                 * then passes that buffer to a system call.
                 * We still want to get TRCTRAPS for addresses
                 * in kernel space because that is useful when
                 * debugging the kernel.
                 */
                if (x86_dbregs_user_trap())
                        break;

                goto we_re_toast;

        case T_BPTFLT|T_USER:                /* bpt instruction fault */
        case T_TRCTRAP|T_USER:                /* trace trap */
                /*
                 * Don't go single-stepping into a RAS.
                 */
                if (p->p_raslist == NULL ||
                    (ras_lookup(p, (void *)frame->tf_rip) == (void *)-1)) {
                        KSI_INIT_TRAP(&ksi);
                        ksi.ksi_signo = SIGTRAP;
                        ksi.ksi_trap = type & ~T_USER;
                        if (x86_dbregs_user_trap()) {
                                x86_dbregs_store_dr6(l);
                                ksi.ksi_code = TRAP_DBREG;
                        } else if (type == (T_BPTFLT|T_USER))
                                ksi.ksi_code = TRAP_BRKPT;
                        else
                                ksi.ksi_code = TRAP_TRACE;
                        (*p->p_emul->e_trapsignal)(l, &ksi);
                }
                break;
        }

        if ((type & T_USER) == 0)
                return;
out:
        userret(l);
        return;
trapsignal:
        SIGDEBUG(frame, &ksi, 0);
        (*p->p_emul->e_trapsignal)(l, &ksi);
        userret(l);
}

/*
 * startlwp: start of a new LWP.
 */
void
startlwp(void *arg)
{
        ucontext_t *uc = arg;
        lwp_t *l = curlwp;
        int error __diagused;

        error = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags);
        KASSERT(error == 0);

        kmem_free(uc, sizeof(ucontext_t));
        userret(l);
}

#ifdef TRAP_SIGDEBUG
static void
frame_dump(const struct trapframe *tf, struct pcb *pcb)
{

        printf("trapframe %p\n", tf);
        printf("rip %#018lx  rsp %#018lx  rfl %#018lx\n",
            tf->tf_rip, tf->tf_rsp, tf->tf_rflags);
        printf("rdi %#018lx  rsi %#018lx  rdx %#018lx\n",
            tf->tf_rdi, tf->tf_rsi, tf->tf_rdx);
        printf("rcx %#018lx  r8  %#018lx  r9  %#018lx\n",
            tf->tf_rcx, tf->tf_r8, tf->tf_r9);
        printf("r10 %#018lx  r11 %#018lx  r12 %#018lx\n",
            tf->tf_r10, tf->tf_r11, tf->tf_r12);
        printf("r13 %#018lx  r14 %#018lx  r15 %#018lx\n",
            tf->tf_r13, tf->tf_r14, tf->tf_r15);
        printf("rbp %#018lx  rbx %#018lx  rax %#018lx\n",
            tf->tf_rbp, tf->tf_rbx, tf->tf_rax);
        printf("cs %#04lx  ds %#04lx  es %#04lx  "
            "fs %#04lx  gs %#04lx  ss %#04lx\n",
            tf->tf_cs & 0xffff, tf->tf_ds & 0xffff, tf->tf_es & 0xffff,
            tf->tf_fs & 0xffff, tf->tf_gs & 0xffff, tf->tf_ss & 0xffff);
        printf("fsbase %#018lx gsbase %#018lx\n", pcb->pcb_fs, pcb->pcb_gs);
        printf("\n");
        hexdump(printf, "Stack dump", tf, 256);
}

static void
sigdebug(const struct trapframe *tf, const ksiginfo_t *ksi, int e)
{
        struct lwp *l = curlwp;
        struct proc *p = l->l_proc;

        printf("pid %d.%d (%s): signal %d code=%d (trap %#lx) "
            "@rip %#lx addr %#lx error=%d\n",
            p->p_pid, l->l_lid, p->p_comm, ksi->ksi_signo, ksi->ksi_code,
            tf->tf_trapno, tf->tf_rip, rcr2(), e);
        frame_dump(tf, lwp_getpcb(l));
}
#endif
















































































































































































































































    2 











    2 

























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
/*        $NetBSD: rf_dagutils.c,v 1.58 2021/07/23 00:54:45 oster Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Authors: Mark Holland, William V. Courtright II, Jim Zelenka
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/******************************************************************************
 *
 * rf_dagutils.c -- utility routines for manipulating dags
 *
 *****************************************************************************/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_dagutils.c,v 1.58 2021/07/23 00:54:45 oster Exp $");

#include <dev/raidframe/raidframevar.h>

#include "rf_archs.h"
#include "rf_threadstuff.h"
#include "rf_raid.h"
#include "rf_dag.h"
#include "rf_dagutils.h"
#include "rf_dagfuncs.h"
#include "rf_general.h"
#include "rf_map.h"
#include "rf_shutdown.h"

#define SNUM_DIFF(_a_,_b_) (((_a_)>(_b_))?((_a_)-(_b_)):((_b_)-(_a_)))

const RF_RedFuncs_t rf_xorFuncs = {
        rf_RegularXorFunc, "Reg Xr",
        rf_SimpleXorFunc, "Simple Xr"};

const RF_RedFuncs_t rf_xorRecoveryFuncs = {
        rf_RecoveryXorFunc, "Recovery Xr",
        rf_RecoveryXorFunc, "Recovery Xr"};

#if RF_DEBUG_VALIDATE_DAG
static void rf_RecurPrintDAG(RF_DagNode_t *, int, int);
static void rf_PrintDAG(RF_DagHeader_t *);
static int rf_ValidateBranch(RF_DagNode_t *, int *, int *,
                             RF_DagNode_t **, int);
static void rf_ValidateBranchVisitedBits(RF_DagNode_t *, int, int);
static void rf_ValidateVisitedBits(RF_DagHeader_t *);
#endif /* RF_DEBUG_VALIDATE_DAG */

/* The maximum number of nodes in a DAG is bounded by

(2 * raidPtr->Layout->numDataCol) + (1 * layoutPtr->numParityCol) +
        (1 * 2 * layoutPtr->numParityCol) + 3

which is:  2*RF_MAXCOL+1*2+1*2*2+3

For RF_MAXCOL of 40, this works out to 89.  We use this value to provide an estimate
on the maximum size needed for RF_DAGPCACHE_SIZE.  For RF_MAXCOL of 40, this structure
would be 534 bytes.  Too much to have on-hand in a RF_DagNode_t, but should be ok to
have a few kicking around.
*/
#define RF_DAGPCACHE_SIZE ((2*RF_MAXCOL+1*2+1*2*2+3) *(RF_MAX(sizeof(RF_DagParam_t), sizeof(RF_DagNode_t *))))


/******************************************************************************
 *
 * InitNode - initialize a dag node
 *
 * the size of the propList array is always the same as that of the
 * successors array.
 *
 *****************************************************************************/
void
rf_InitNode(RF_DagNode_t *node, RF_NodeStatus_t initstatus, int commit,
    void (*doFunc) (RF_DagNode_t *node),
    void (*undoFunc) (RF_DagNode_t *node),
    void (*wakeFunc) (void *node, int status),
    int nSucc, int nAnte, int nParam, int nResult,
    RF_DagHeader_t *hdr, const char *name, RF_AllocListElem_t *alist)
{
        void  **ptrs;
        int     nptrs;
        RF_Raid_t *raidPtr;
        
        if (nAnte > RF_MAX_ANTECEDENTS)
                RF_PANIC();
        node->status = initstatus;
        node->commitNode = commit;
        node->doFunc = doFunc;
        node->undoFunc = undoFunc;
        node->wakeFunc = wakeFunc;
        node->numParams = nParam;
        node->numResults = nResult;
        node->numAntecedents = nAnte;
        node->numAntDone = 0;
        node->next = NULL;
        /* node->list_next = NULL */  /* Don't touch this here!
                                         It may already be
                                         in use by the caller! */
        node->numSuccedents = nSucc;
        node->name = name;
        node->dagHdr = hdr;
        node->big_dag_ptrs = NULL;
        node->big_dag_params = NULL;
        node->visited = 0;

        RF_ASSERT(hdr != NULL);
        raidPtr = hdr->raidPtr;
        
        /* allocate all the pointers with one call to malloc */
        nptrs = nSucc + nAnte + nResult + nSucc;

        if (nptrs <= RF_DAG_PTRCACHESIZE) {
                /*
                 * The dag_ptrs field of the node is basically some scribble
                 * space to be used here. We could get rid of it, and always
                 * allocate the range of pointers, but that's expensive. So,
                 * we pick a "common case" size for the pointer cache. Hopefully,
                 * we'll find that:
                 * (1) Generally, nptrs doesn't exceed RF_DAG_PTRCACHESIZE by
                 *     only a little bit (least efficient case)
                 * (2) Generally, ntprs isn't a lot less than RF_DAG_PTRCACHESIZE
                 *     (wasted memory)
                 */
                ptrs = (void **) node->dag_ptrs;
        } else if (nptrs <= (RF_DAGPCACHE_SIZE / sizeof(RF_DagNode_t *))) {
                node->big_dag_ptrs = rf_AllocDAGPCache(raidPtr);
                ptrs = (void **) node->big_dag_ptrs;
        } else {
                ptrs = RF_MallocAndAdd(nptrs * sizeof(*ptrs), alist);
        }
        node->succedents = (nSucc) ? (RF_DagNode_t **) ptrs : NULL;
        node->antecedents = (nAnte) ? (RF_DagNode_t **) (ptrs + nSucc) : NULL;
        node->results = (nResult) ? (void **) (ptrs + nSucc + nAnte) : NULL;
        node->propList = (nSucc) ? (RF_PropHeader_t **) (ptrs + nSucc + nAnte + nResult) : NULL;

        if (nParam) {
                if (nParam <= RF_DAG_PARAMCACHESIZE) {
                        node->params = (RF_DagParam_t *) node->dag_params;
                } else if (nParam <= (RF_DAGPCACHE_SIZE / sizeof(RF_DagParam_t))) {
                        node->big_dag_params = rf_AllocDAGPCache(raidPtr);
                        node->params = node->big_dag_params;
                } else {
                        node->params = RF_MallocAndAdd(
                            nParam * sizeof(*node->params), alist);
                }
        } else {
                node->params = NULL;
        }
}



/******************************************************************************
 *
 * allocation and deallocation routines
 *
 *****************************************************************************/

void
rf_FreeDAG(RF_DagHeader_t *dag_h)
{
        RF_AccessStripeMapHeader_t *asmap, *t_asmap;
        RF_PhysDiskAddr_t *pda;
        RF_DagNode_t *tmpnode;
        RF_DagHeader_t *nextDag;
        RF_Raid_t *raidPtr;

        if (dag_h)
                raidPtr = dag_h->raidPtr;
        
        while (dag_h) {
                nextDag = dag_h->next;
                rf_FreeAllocList(dag_h->allocList);
                for (asmap = dag_h->asmList; asmap;) {
                        t_asmap = asmap;
                        asmap = asmap->next;
                        rf_FreeAccessStripeMap(raidPtr, t_asmap);
                }
                while (dag_h->pda_cleanup_list) {
                        pda = dag_h->pda_cleanup_list;
                        dag_h->pda_cleanup_list = dag_h->pda_cleanup_list->next;
                        rf_FreePhysDiskAddr(raidPtr, pda);
                }
                while (dag_h->nodes) {
                        tmpnode = dag_h->nodes;
                        dag_h->nodes = dag_h->nodes->list_next;
                        rf_FreeDAGNode(raidPtr, tmpnode);
                }
                rf_FreeDAGHeader(raidPtr, dag_h);
                dag_h = nextDag;
        }
}

#define RF_MAX_FREE_DAGH 128
#define RF_MIN_FREE_DAGH  32

#define RF_MAX_FREE_DAGNODE 512 /* XXX Tune this... */
#define RF_MIN_FREE_DAGNODE 128 /* XXX Tune this... */

#define RF_MAX_FREE_DAGLIST 128
#define RF_MIN_FREE_DAGLIST  32

#define RF_MAX_FREE_DAGPCACHE 128
#define RF_MIN_FREE_DAGPCACHE   8

#define RF_MAX_FREE_FUNCLIST 128
#define RF_MIN_FREE_FUNCLIST  32

#define RF_MAX_FREE_BUFFERS 128
#define RF_MIN_FREE_BUFFERS  32

static void rf_ShutdownDAGs(void *);
static void
rf_ShutdownDAGs(void *arg)
{
        RF_Raid_t *raidPtr;

        raidPtr = (RF_Raid_t *) arg;

        pool_destroy(&raidPtr->pools.dagh);
        pool_destroy(&raidPtr->pools.dagnode);
        pool_destroy(&raidPtr->pools.daglist);
        pool_destroy(&raidPtr->pools.dagpcache);
        pool_destroy(&raidPtr->pools.funclist);
}

int
rf_ConfigureDAGs(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                 RF_Config_t *cfgPtr)
{

        rf_pool_init(raidPtr, raidPtr->poolNames.dagnode, &raidPtr->pools.dagnode, sizeof(RF_DagNode_t),
                     "dagnode", RF_MIN_FREE_DAGNODE, RF_MAX_FREE_DAGNODE);
        rf_pool_init(raidPtr, raidPtr->poolNames.dagh, &raidPtr->pools.dagh, sizeof(RF_DagHeader_t),
                     "dagh", RF_MIN_FREE_DAGH, RF_MAX_FREE_DAGH);
        rf_pool_init(raidPtr, raidPtr->poolNames.daglist, &raidPtr->pools.daglist, sizeof(RF_DagList_t),
                     "daglist", RF_MIN_FREE_DAGLIST, RF_MAX_FREE_DAGLIST);
        rf_pool_init(raidPtr, raidPtr->poolNames.dagpcache, &raidPtr->pools.dagpcache, RF_DAGPCACHE_SIZE,
                     "dagpcache", RF_MIN_FREE_DAGPCACHE, RF_MAX_FREE_DAGPCACHE);
        rf_pool_init(raidPtr, raidPtr->poolNames.funclist, &raidPtr->pools.funclist, sizeof(RF_FuncList_t),
                     "funclist", RF_MIN_FREE_FUNCLIST, RF_MAX_FREE_FUNCLIST);
        rf_ShutdownCreate(listp, rf_ShutdownDAGs, raidPtr);

        return (0);
}

RF_DagHeader_t *
rf_AllocDAGHeader(RF_Raid_t *raidPtr)
{
        return pool_get(&raidPtr->pools.dagh, PR_WAITOK | PR_ZERO);
}

void
rf_FreeDAGHeader(RF_Raid_t *raidPtr, RF_DagHeader_t * dh)
{
        pool_put(&raidPtr->pools.dagh, dh);
}

RF_DagNode_t *
rf_AllocDAGNode(RF_Raid_t *raidPtr)
{
        return pool_get(&raidPtr->pools.dagnode, PR_WAITOK | PR_ZERO);
}

void
rf_FreeDAGNode(RF_Raid_t *raidPtr, RF_DagNode_t *node)
{
        if (node->big_dag_ptrs) {
                rf_FreeDAGPCache(raidPtr, node->big_dag_ptrs);
        }
        if (node->big_dag_params) {
                rf_FreeDAGPCache(raidPtr, node->big_dag_params);
        }
        pool_put(&raidPtr->pools.dagnode, node);
}

RF_DagList_t *
rf_AllocDAGList(RF_Raid_t *raidPtr)
{
        return pool_get(&raidPtr->pools.daglist, PR_WAITOK | PR_ZERO);
}

void
rf_FreeDAGList(RF_Raid_t *raidPtr, RF_DagList_t *dagList)
{
        pool_put(&raidPtr->pools.daglist, dagList);
}

void *
rf_AllocDAGPCache(RF_Raid_t *raidPtr)
{
        return pool_get(&raidPtr->pools.dagpcache, PR_WAITOK | PR_ZERO);
}

void
rf_FreeDAGPCache(RF_Raid_t *raidPtr, void *p)
{
        pool_put(&raidPtr->pools.dagpcache, p);
}

RF_FuncList_t *
rf_AllocFuncList(RF_Raid_t *raidPtr)
{
        return pool_get(&raidPtr->pools.funclist, PR_WAITOK | PR_ZERO);
}

void
rf_FreeFuncList(RF_Raid_t *raidPtr, RF_FuncList_t *funcList)
{
        pool_put(&raidPtr->pools.funclist, funcList);
}

/* allocates a stripe buffer -- a buffer large enough to hold all the data
   in an entire stripe.
*/

void *
rf_AllocStripeBuffer(RF_Raid_t *raidPtr, RF_DagHeader_t *dag_h,
    int size)
{
        RF_VoidPointerListElem_t *vple;
        void *p;

        RF_ASSERT((size <= (raidPtr->numCol * (raidPtr->Layout.sectorsPerStripeUnit <<
                                               raidPtr->logBytesPerSector))));

        p =  malloc( raidPtr->numCol * (raidPtr->Layout.sectorsPerStripeUnit <<
                                        raidPtr->logBytesPerSector),
                     M_RAIDFRAME, M_NOWAIT);
        if (!p) {
                rf_lock_mutex2(raidPtr->mutex);
                if (raidPtr->stripebuf_count > 0) {
                        vple = raidPtr->stripebuf;
                        raidPtr->stripebuf = vple->next;
                        p = vple->p;
                        rf_FreeVPListElem(raidPtr, vple);
                        raidPtr->stripebuf_count--;
                } else {
#ifdef DIAGNOSTIC
                        printf("raid%d: Help!  Out of emergency full-stripe buffers!\n", raidPtr->raidid);
#endif
                }
                rf_unlock_mutex2(raidPtr->mutex);
                if (!p) {
                        /* We didn't get a buffer... not much we can do other than wait,
                           and hope that someone frees up memory for us.. */
                        p = malloc( raidPtr->numCol * (raidPtr->Layout.sectorsPerStripeUnit <<
                                                       raidPtr->logBytesPerSector), M_RAIDFRAME, M_WAITOK);
                }
        }
        memset(p, 0, raidPtr->numCol * (raidPtr->Layout.sectorsPerStripeUnit << raidPtr->logBytesPerSector));

        vple = rf_AllocVPListElem(raidPtr);
        vple->p = p;
        vple->next = dag_h->desc->stripebufs;
        dag_h->desc->stripebufs = vple;

        return (p);
}


void
rf_FreeStripeBuffer(RF_Raid_t *raidPtr, RF_VoidPointerListElem_t *vple)
{
        rf_lock_mutex2(raidPtr->mutex);
        if (raidPtr->stripebuf_count < raidPtr->numEmergencyStripeBuffers) {
                /* just tack it in */
                vple->next = raidPtr->stripebuf;
                raidPtr->stripebuf = vple;
                raidPtr->stripebuf_count++;
        } else {
                free(vple->p, M_RAIDFRAME);
                rf_FreeVPListElem(raidPtr, vple);
        }
        rf_unlock_mutex2(raidPtr->mutex);
}

/* allocates a buffer big enough to hold the data described by the
caller (ie. the data of the associated PDA).  Glue this buffer
into our dag_h cleanup structure. */

void *
rf_AllocBuffer(RF_Raid_t *raidPtr, RF_DagHeader_t *dag_h, int size)
{
        RF_VoidPointerListElem_t *vple;
        void *p;

        p = rf_AllocIOBuffer(raidPtr, size);
        vple = rf_AllocVPListElem(raidPtr);
        vple->p = p;
        vple->next = dag_h->desc->iobufs;
        dag_h->desc->iobufs = vple;

        return (p);
}

void *
rf_AllocIOBuffer(RF_Raid_t *raidPtr, int size)
{
        RF_VoidPointerListElem_t *vple;
        void *p;

        RF_ASSERT((size <= (raidPtr->Layout.sectorsPerStripeUnit <<
                           raidPtr->logBytesPerSector)));

        p =  malloc( raidPtr->Layout.sectorsPerStripeUnit <<
                                 raidPtr->logBytesPerSector,
                                 M_RAIDFRAME, M_NOWAIT);
        if (!p) {
                rf_lock_mutex2(raidPtr->mutex);
                if (raidPtr->iobuf_count > 0) {
                        vple = raidPtr->iobuf;
                        raidPtr->iobuf = vple->next;
                        p = vple->p;
                        rf_FreeVPListElem(raidPtr, vple);
                        raidPtr->iobuf_count--;
                } else {
#ifdef DIAGNOSTIC
                        printf("raid%d: Help!  Out of emergency buffers!\n", raidPtr->raidid);
#endif
                }
                rf_unlock_mutex2(raidPtr->mutex);
                if (!p) {
                        /* We didn't get a buffer... not much we can do other than wait,
                           and hope that someone frees up memory for us.. */
                        p = malloc( raidPtr->Layout.sectorsPerStripeUnit <<
                                    raidPtr->logBytesPerSector,
                                    M_RAIDFRAME, M_WAITOK);
                }
        }
        memset(p, 0, raidPtr->Layout.sectorsPerStripeUnit << raidPtr->logBytesPerSector);
        return (p);
}

void
rf_FreeIOBuffer(RF_Raid_t *raidPtr, RF_VoidPointerListElem_t *vple)
{
        rf_lock_mutex2(raidPtr->mutex);
        if (raidPtr->iobuf_count < raidPtr->numEmergencyBuffers) {
                /* just tack it in */
                vple->next = raidPtr->iobuf;
                raidPtr->iobuf = vple;
                raidPtr->iobuf_count++;
        } else {
                free(vple->p, M_RAIDFRAME);
                rf_FreeVPListElem(raidPtr, vple);
        }
        rf_unlock_mutex2(raidPtr->mutex);
}



#if RF_DEBUG_VALIDATE_DAG
/******************************************************************************
 *
 * debug routines
 *
 *****************************************************************************/

char   *
rf_NodeStatusString(RF_DagNode_t *node)
{
        switch (node->status) {
        case rf_wait:
                return ("wait");
        case rf_fired:
                return ("fired");
        case rf_good:
                return ("good");
        case rf_bad:
                return ("bad");
        default:
                return ("?");
        }
}

void
rf_PrintNodeInfoString(RF_DagNode_t *node)
{
        RF_PhysDiskAddr_t *pda;
        int     (*df) (RF_DagNode_t *) = node->doFunc;
        int     i, lk, unlk;
        void   *bufPtr;

        if ((df == rf_DiskReadFunc) || (df == rf_DiskWriteFunc)
            || (df == rf_DiskReadMirrorIdleFunc)
            || (df == rf_DiskReadMirrorPartitionFunc)) {
                pda = (RF_PhysDiskAddr_t *) node->params[0].p;
                bufPtr = (void *) node->params[1].p;
                lk = 0;
                unlk = 0;
                RF_ASSERT(!(lk && unlk));
                printf("c %d offs %ld nsect %d buf 0x%lx %s\n", pda->col,
                    (long) pda->startSector, (int) pda->numSector, (long) bufPtr,
                    (lk) ? "LOCK" : ((unlk) ? "UNLK" : " "));
                return;
        }
        if ((df == rf_SimpleXorFunc) || (df == rf_RegularXorFunc)
            || (df == rf_RecoveryXorFunc)) {
                printf("result buf 0x%lx\n", (long) node->results[0]);
                for (i = 0; i < node->numParams - 1; i += 2) {
                        pda = (RF_PhysDiskAddr_t *) node->params[i].p;
                        bufPtr = (RF_PhysDiskAddr_t *) node->params[i + 1].p;
                        printf("    buf 0x%lx c%d offs %ld nsect %d\n",
                            (long) bufPtr, pda->col,
                            (long) pda->startSector, (int) pda->numSector);
                }
                return;
        }
#if RF_INCLUDE_PARITYLOGGING > 0
        if (df == rf_ParityLogOverwriteFunc || df == rf_ParityLogUpdateFunc) {
                for (i = 0; i < node->numParams - 1; i += 2) {
                        pda = (RF_PhysDiskAddr_t *) node->params[i].p;
                        bufPtr = (RF_PhysDiskAddr_t *) node->params[i + 1].p;
                        printf(" c%d offs %ld nsect %d buf 0x%lx\n",
                            pda->col, (long) pda->startSector,
                            (int) pda->numSector, (long) bufPtr);
                }
                return;
        }
#endif                                /* RF_INCLUDE_PARITYLOGGING > 0 */

        if ((df == rf_TerminateFunc) || (df == rf_NullNodeFunc)) {
                printf("\n");
                return;
        }
        printf("?\n");
}
#ifdef DEBUG
static void
rf_RecurPrintDAG(RF_DagNode_t *node, int depth, int unvisited)
{
        char   *anttype;
        int     i;

        node->visited = (unvisited) ? 0 : 1;
        printf("(%d) %d C%d %s: %s,s%d %d/%d,a%d/%d,p%d,r%d S{", depth,
            node->nodeNum, node->commitNode, node->name, rf_NodeStatusString(node),
            node->numSuccedents, node->numSuccFired, node->numSuccDone,
            node->numAntecedents, node->numAntDone, node->numParams, node->numResults);
        for (i = 0; i < node->numSuccedents; i++) {
                printf("%d%s", node->succedents[i]->nodeNum,
                    ((i == node->numSuccedents - 1) ? "\0" : " "));
        }
        printf("} A{");
        for (i = 0; i < node->numAntecedents; i++) {
                switch (node->antType[i]) {
                case rf_trueData:
                        anttype = "T";
                        break;
                case rf_antiData:
                        anttype = "A";
                        break;
                case rf_outputData:
                        anttype = "O";
                        break;
                case rf_control:
                        anttype = "C";
                        break;
                default:
                        anttype = "?";
                        break;
                }
                printf("%d(%s)%s", node->antecedents[i]->nodeNum, anttype, (i == node->numAntecedents - 1) ? "\0" : " ");
        }
        printf("}; ");
        rf_PrintNodeInfoString(node);
        for (i = 0; i < node->numSuccedents; i++) {
                if (node->succedents[i]->visited == unvisited)
                        rf_RecurPrintDAG(node->succedents[i], depth + 1, unvisited);
        }
}

static void
rf_PrintDAG(RF_DagHeader_t *dag_h)
{
        int     unvisited, i;
        char   *status;

        /* set dag status */
        switch (dag_h->status) {
        case rf_enable:
                status = "enable";
                break;
        case rf_rollForward:
                status = "rollForward";
                break;
        case rf_rollBackward:
                status = "rollBackward";
                break;
        default:
                status = "illegal!";
                break;
        }
        /* find out if visited bits are currently set or clear */
        unvisited = dag_h->succedents[0]->visited;

        printf("DAG type:  %s\n", dag_h->creator);
        printf("format is (depth) num commit type: status,nSucc nSuccFired/nSuccDone,nAnte/nAnteDone,nParam,nResult S{x} A{x(type)};  info\n");
        printf("(0) %d Hdr: %s, s%d, (commit %d/%d) S{", dag_h->nodeNum,
            status, dag_h->numSuccedents, dag_h->numCommitNodes, dag_h->numCommits);
        for (i = 0; i < dag_h->numSuccedents; i++) {
                printf("%d%s", dag_h->succedents[i]->nodeNum,
                    ((i == dag_h->numSuccedents - 1) ? "\0" : " "));
        }
        printf("};\n");
        for (i = 0; i < dag_h->numSuccedents; i++) {
                if (dag_h->succedents[i]->visited == unvisited)
                        rf_RecurPrintDAG(dag_h->succedents[i], 1, unvisited);
        }
}
#endif
/* assigns node numbers */
int
rf_AssignNodeNums(RF_DagHeader_t * dag_h)
{
        int     unvisited, i, nnum;
        RF_DagNode_t *node;

        nnum = 0;
        unvisited = dag_h->succedents[0]->visited;

        dag_h->nodeNum = nnum++;
        for (i = 0; i < dag_h->numSuccedents; i++) {
                node = dag_h->succedents[i];
                if (node->visited == unvisited) {
                        nnum = rf_RecurAssignNodeNums(dag_h->succedents[i], nnum, unvisited);
                }
        }
        return (nnum);
}

int
rf_RecurAssignNodeNums(RF_DagNode_t *node, int num, int unvisited)
{
        int     i;

        node->visited = (unvisited) ? 0 : 1;

        node->nodeNum = num++;
        for (i = 0; i < node->numSuccedents; i++) {
                if (node->succedents[i]->visited == unvisited) {
                        num = rf_RecurAssignNodeNums(node->succedents[i], num, unvisited);
                }
        }
        return (num);
}
/* set the header pointers in each node to "newptr" */
void
rf_ResetDAGHeaderPointers(RF_DagHeader_t *dag_h, RF_DagHeader_t *newptr)
{
        int     i;
        for (i = 0; i < dag_h->numSuccedents; i++)
                if (dag_h->succedents[i]->dagHdr != newptr)
                        rf_RecurResetDAGHeaderPointers(dag_h->succedents[i], newptr);
}

void
rf_RecurResetDAGHeaderPointers(RF_DagNode_t *node, RF_DagHeader_t *newptr)
{
        int     i;
        node->dagHdr = newptr;
        for (i = 0; i < node->numSuccedents; i++)
                if (node->succedents[i]->dagHdr != newptr)
                        rf_RecurResetDAGHeaderPointers(node->succedents[i], newptr);
}


void
rf_PrintDAGList(RF_DagHeader_t * dag_h)
{
        int     i = 0;

        for (; dag_h; dag_h = dag_h->next) {
                rf_AssignNodeNums(dag_h);
                printf("\n\nDAG %d IN LIST:\n", i++);
                rf_PrintDAG(dag_h);
        }
}

static int
rf_ValidateBranch(RF_DagNode_t *node, int *scount, int *acount,
                  RF_DagNode_t **nodes, int unvisited)
{
        int     i, retcode = 0;

        /* construct an array of node pointers indexed by node num */
        node->visited = (unvisited) ? 0 : 1;
        nodes[node->nodeNum] = node;

        if (node->next != NULL) {
                printf("INVALID DAG: next pointer in node is not NULL\n");
                retcode = 1;
        }
        if (node->status != rf_wait) {
                printf("INVALID DAG: Node status is not wait\n");
                retcode = 1;
        }
        if (node->numAntDone != 0) {
                printf("INVALID DAG: numAntDone is not zero\n");
                retcode = 1;
        }
        if (node->doFunc == rf_TerminateFunc) {
                if (node->numSuccedents != 0) {
                        printf("INVALID DAG: Terminator node has succedents\n");
                        retcode = 1;
                }
        } else {
                if (node->numSuccedents == 0) {
                        printf("INVALID DAG: Non-terminator node has no succedents\n");
                        retcode = 1;
                }
        }
        for (i = 0; i < node->numSuccedents; i++) {
                if (!node->succedents[i]) {
                        printf("INVALID DAG: succedent %d of node %s is NULL\n", i, node->name);
                        retcode = 1;
                }
                scount[node->succedents[i]->nodeNum]++;
        }
        for (i = 0; i < node->numAntecedents; i++) {
                if (!node->antecedents[i]) {
                        printf("INVALID DAG: antecedent %d of node %s is NULL\n", i, node->name);
                        retcode = 1;
                }
                acount[node->antecedents[i]->nodeNum]++;
        }
        for (i = 0; i < node->numSuccedents; i++) {
                if (node->succedents[i]->visited == unvisited) {
                        if (rf_ValidateBranch(node->succedents[i], scount,
                                acount, nodes, unvisited)) {
                                retcode = 1;
                        }
                }
        }
        return (retcode);
}

static void
rf_ValidateBranchVisitedBits(RF_DagNode_t *node, int unvisited, int rl)
{
        int     i;

        RF_ASSERT(node->visited == unvisited);
        for (i = 0; i < node->numSuccedents; i++) {
                if (node->succedents[i] == NULL) {
                        printf("node=%lx node->succedents[%d] is NULL\n", (long) node, i);
                        RF_ASSERT(0);
                }
                rf_ValidateBranchVisitedBits(node->succedents[i], unvisited, rl + 1);
        }
}
/* NOTE:  never call this on a big dag, because it is exponential
 * in execution time
 */
static void
rf_ValidateVisitedBits(RF_DagHeader_t *dag)
{
        int     i, unvisited;

        unvisited = dag->succedents[0]->visited;

        for (i = 0; i < dag->numSuccedents; i++) {
                if (dag->succedents[i] == NULL) {
                        printf("dag=%lx dag->succedents[%d] is NULL\n", (long) dag, i);
                        RF_ASSERT(0);
                }
                rf_ValidateBranchVisitedBits(dag->succedents[i], unvisited, 0);
        }
}
/* validate a DAG.  _at entry_ verify that:
 *   -- numNodesCompleted is zero
 *   -- node queue is null
 *   -- dag status is rf_enable
 *   -- next pointer is null on every node
 *   -- all nodes have status wait
 *   -- numAntDone is zero in all nodes
 *   -- terminator node has zero successors
 *   -- no other node besides terminator has zero successors
 *   -- no successor or antecedent pointer in a node is NULL
 *   -- number of times that each node appears as a successor of another node
 *      is equal to the antecedent count on that node
 *   -- number of times that each node appears as an antecedent of another node
 *      is equal to the succedent count on that node
 *   -- what else?
 */
int
rf_ValidateDAG(RF_DagHeader_t *dag_h)
{
        int     i, nodecount;
        int    *scount, *acount;/* per-node successor and antecedent counts */
        RF_DagNode_t **nodes;        /* array of ptrs to nodes in dag */
        int     retcode = 0;
        int     unvisited;
        int     commitNodeCount = 0;

        if (rf_validateVisitedDebug)
                rf_ValidateVisitedBits(dag_h);

        if (dag_h->numNodesCompleted != 0) {
                printf("INVALID DAG: num nodes completed is %d, should be 0\n", dag_h->numNodesCompleted);
                retcode = 1;
                goto validate_dag_bad;
        }
        if (dag_h->status != rf_enable) {
                printf("INVALID DAG: not enabled\n");
                retcode = 1;
                goto validate_dag_bad;
        }
        if (dag_h->numCommits != 0) {
                printf("INVALID DAG: numCommits != 0 (%d)\n", dag_h->numCommits);
                retcode = 1;
                goto validate_dag_bad;
        }
        if (dag_h->numSuccedents != 1) {
                /* currently, all dags must have only one succedent */
                printf("INVALID DAG: numSuccedents !1 (%d)\n", dag_h->numSuccedents);
                retcode = 1;
                goto validate_dag_bad;
        }
        nodecount = rf_AssignNodeNums(dag_h);

        unvisited = dag_h->succedents[0]->visited;

        scount = RF_Malloc(nodecount * sizeof(*scount));
        acount = RF_Malloc(nodecount * sizeof(*acount));
        nodes = RF_Malloc(nodecount * sizeof(*nodes));
        for (i = 0; i < dag_h->numSuccedents; i++) {
                if ((dag_h->succedents[i]->visited == unvisited)
                    && rf_ValidateBranch(dag_h->succedents[i], scount,
                        acount, nodes, unvisited)) {
                        retcode = 1;
                }
        }
        /* start at 1 to skip the header node */
        for (i = 1; i < nodecount; i++) {
                if (nodes[i]->commitNode)
                        commitNodeCount++;
                if (nodes[i]->doFunc == NULL) {
                        printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name);
                        retcode = 1;
                        goto validate_dag_out;
                }
                if (nodes[i]->undoFunc == NULL) {
                        printf("INVALID DAG: node %s has an undefined doFunc\n", nodes[i]->name);
                        retcode = 1;
                        goto validate_dag_out;
                }
                if (nodes[i]->numAntecedents != scount[nodes[i]->nodeNum]) {
                        printf("INVALID DAG: node %s has %d antecedents but appears as a succedent %d times\n",
                            nodes[i]->name, nodes[i]->numAntecedents, scount[nodes[i]->nodeNum]);
                        retcode = 1;
                        goto validate_dag_out;
                }
                if (nodes[i]->numSuccedents != acount[nodes[i]->nodeNum]) {
                        printf("INVALID DAG: node %s has %d succedents but appears as an antecedent %d times\n",
                            nodes[i]->name, nodes[i]->numSuccedents, acount[nodes[i]->nodeNum]);
                        retcode = 1;
                        goto validate_dag_out;
                }
        }

        if (dag_h->numCommitNodes != commitNodeCount) {
                printf("INVALID DAG: incorrect commit node count.  hdr->numCommitNodes (%d) found (%d) commit nodes in graph\n",
                    dag_h->numCommitNodes, commitNodeCount);
                retcode = 1;
                goto validate_dag_out;
        }
validate_dag_out:
        RF_Free(scount, nodecount * sizeof(int));
        RF_Free(acount, nodecount * sizeof(int));
        RF_Free(nodes, nodecount * sizeof(RF_DagNode_t *));
        if (retcode)
                rf_PrintDAGList(dag_h);

        if (rf_validateVisitedDebug)
                rf_ValidateVisitedBits(dag_h);

        return (retcode);

validate_dag_bad:
        rf_PrintDAGList(dag_h);
        return (retcode);
}

#endif /* RF_DEBUG_VALIDATE_DAG */

/******************************************************************************
 *
 * misc construction routines
 *
 *****************************************************************************/

void
rf_redirect_asm(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap)
{
        int     ds = (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) ? 1 : 0;
        int     fcol = raidPtr->reconControl->fcol;
        int     scol = raidPtr->reconControl->spareCol;
        RF_PhysDiskAddr_t *pda;

        RF_ASSERT(raidPtr->status == rf_rs_reconstructing);
        for (pda = asmap->physInfo; pda; pda = pda->next) {
                if (pda->col == fcol) {
#if RF_DEBUG_DAG
                        if (rf_dagDebug) {
                                if (!rf_CheckRUReconstructed(raidPtr->reconControl->reconMap,
                                        pda->startSector)) {
                                        RF_PANIC();
                                }
                        }
#endif
                        /* printf("Remapped data for large write\n"); */
                        if (ds) {
                                raidPtr->Layout.map->MapSector(raidPtr, pda->raidAddress,
                                    &pda->col, &pda->startSector, RF_REMAP);
                        } else {
                                pda->col = scol;
                        }
                }
        }
        for (pda = asmap->parityInfo; pda; pda = pda->next) {
                if (pda->col == fcol) {
#if RF_DEBUG_DAG
                        if (rf_dagDebug) {
                                if (!rf_CheckRUReconstructed(raidPtr->reconControl->reconMap, pda->startSector)) {
                                        RF_PANIC();
                                }
                        }
#endif
                }
                if (ds) {
                        (raidPtr->Layout.map->MapParity) (raidPtr, pda->raidAddress, &pda->col, &pda->startSector, RF_REMAP);
                } else {
                        pda->col = scol;
                }
        }
}


/* this routine allocates read buffers and generates stripe maps for the
 * regions of the array from the start of the stripe to the start of the
 * access, and from the end of the access to the end of the stripe.  It also
 * computes and returns the number of DAG nodes needed to read all this data.
 * Note that this routine does the wrong thing if the access is fully
 * contained within one stripe unit, so we RF_ASSERT against this case at the
 * start.
 *
 * layoutPtr - in: layout information
 * asmap     - in: access stripe map
 * dag_h     - in: header of the dag to create
 * new_asm_h - in: ptr to array of 2 headers.  to be filled in
 * nRodNodes - out: num nodes to be generated to read unaccessed data
 * sosBuffer, eosBuffer - out: pointers to newly allocated buffer
 */
void
rf_MapUnaccessedPortionOfStripe(RF_Raid_t *raidPtr,
                                RF_RaidLayout_t *layoutPtr,
                                RF_AccessStripeMap_t *asmap,
                                RF_DagHeader_t *dag_h,
                                RF_AccessStripeMapHeader_t **new_asm_h,
                                int *nRodNodes,
                                char **sosBuffer, char **eosBuffer,
                                RF_AllocListElem_t *allocList)
{
        RF_RaidAddr_t sosRaidAddress, eosRaidAddress;
        RF_SectorNum_t sosNumSector, eosNumSector;

        RF_ASSERT(asmap->numStripeUnitsAccessed > (layoutPtr->numDataCol / 2));
        /* generate an access map for the region of the array from start of
         * stripe to start of access */
        new_asm_h[0] = new_asm_h[1] = NULL;
        *nRodNodes = 0;
        if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->raidAddress)) {
                sosRaidAddress = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
                sosNumSector = asmap->raidAddress - sosRaidAddress;
                *sosBuffer = rf_AllocStripeBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, sosNumSector));
                new_asm_h[0] = rf_MapAccess(raidPtr, sosRaidAddress, sosNumSector, *sosBuffer, RF_DONT_REMAP);
                new_asm_h[0]->next = dag_h->asmList;
                dag_h->asmList = new_asm_h[0];
                *nRodNodes += new_asm_h[0]->stripeMap->numStripeUnitsAccessed;

                RF_ASSERT(new_asm_h[0]->stripeMap->next == NULL);
                /* we're totally within one stripe here */
                if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE)
                        rf_redirect_asm(raidPtr, new_asm_h[0]->stripeMap);
        }
        /* generate an access map for the region of the array from end of
         * access to end of stripe */
        if (!rf_RaidAddressStripeAligned(layoutPtr, asmap->endRaidAddress)) {
                eosRaidAddress = asmap->endRaidAddress;
                eosNumSector = rf_RaidAddressOfNextStripeBoundary(layoutPtr, eosRaidAddress) - eosRaidAddress;
                *eosBuffer = rf_AllocStripeBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, eosNumSector));
                new_asm_h[1] = rf_MapAccess(raidPtr, eosRaidAddress, eosNumSector, *eosBuffer, RF_DONT_REMAP);
                new_asm_h[1]->next = dag_h->asmList;
                dag_h->asmList = new_asm_h[1];
                *nRodNodes += new_asm_h[1]->stripeMap->numStripeUnitsAccessed;

                RF_ASSERT(new_asm_h[1]->stripeMap->next == NULL);
                /* we're totally within one stripe here */
                if (asmap->flags & RF_ASM_REDIR_LARGE_WRITE)
                        rf_redirect_asm(raidPtr, new_asm_h[1]->stripeMap);
        }
}



/* returns non-zero if the indicated ranges of stripe unit offsets overlap */
int
rf_PDAOverlap(RF_RaidLayout_t *layoutPtr,
              RF_PhysDiskAddr_t *src, RF_PhysDiskAddr_t *dest)
{
        RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector);
        RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector);
        /* use -1 to be sure we stay within SU */
        RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector - 1);
        RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector - 1);
        return ((RF_MAX(soffs, doffs) <= RF_MIN(send, dend)) ? 1 : 0);
}


/* GenerateFailedAccessASMs
 *
 * this routine figures out what portion of the stripe needs to be read
 * to effect the degraded read or write operation.  It's primary function
 * is to identify everything required to recover the data, and then
 * eliminate anything that is already being accessed by the user.
 *
 * The main result is two new ASMs, one for the region from the start of the
 * stripe to the start of the access, and one for the region from the end of
 * the access to the end of the stripe.  These ASMs describe everything that
 * needs to be read to effect the degraded access.  Other results are:
 *    nXorBufs -- the total number of buffers that need to be XORed together to
 *                recover the lost data,
 *    rpBufPtr -- ptr to a newly-allocated buffer to hold the parity.  If NULL
 *                at entry, not allocated.
 *    overlappingPDAs --
 *                describes which of the non-failed PDAs in the user access
 *                overlap data that needs to be read to effect recovery.
 *                overlappingPDAs[i]==1 if and only if, neglecting the failed
 *                PDA, the ith pda in the input asm overlaps data that needs
 *                to be read for recovery.
 */
 /* in: asm - ASM for the actual access, one stripe only */
 /* in: failedPDA - which component of the access has failed */
 /* in: dag_h - header of the DAG we're going to create */
 /* out: new_asm_h - the two new ASMs */
 /* out: nXorBufs - the total number of xor bufs required */
 /* out: rpBufPtr - a buffer for the parity read */
void
rf_GenerateFailedAccessASMs(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
                            RF_PhysDiskAddr_t *failedPDA,
                            RF_DagHeader_t *dag_h,
                            RF_AccessStripeMapHeader_t **new_asm_h,
                            int *nXorBufs, char **rpBufPtr,
                            char *overlappingPDAs,
                            RF_AllocListElem_t *allocList)
{
        RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);

        /* s=start, e=end, s=stripe, a=access, f=failed, su=stripe unit */
        RF_RaidAddr_t sosAddr, sosEndAddr, eosStartAddr, eosAddr;
        RF_PhysDiskAddr_t *pda;
        int     foundit, i;

        foundit = 0;
        /* first compute the following raid addresses: start of stripe,
         * (sosAddr) MIN(start of access, start of failed SU),   (sosEndAddr)
         * MAX(end of access, end of failed SU),       (eosStartAddr) end of
         * stripe (i.e. start of next stripe)   (eosAddr) */
        sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
        sosEndAddr = RF_MIN(asmap->raidAddress, rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, failedPDA->raidAddress));
        eosStartAddr = RF_MAX(asmap->endRaidAddress, rf_RaidAddressOfNextStripeUnitBoundary(layoutPtr, failedPDA->raidAddress));
        eosAddr = rf_RaidAddressOfNextStripeBoundary(layoutPtr, asmap->raidAddress);

        /* now generate access stripe maps for each of the above regions of
         * the stripe.  Use a dummy (NULL) buf ptr for now */

        new_asm_h[0] = (sosAddr != sosEndAddr) ? rf_MapAccess(raidPtr, sosAddr, sosEndAddr - sosAddr, NULL, RF_DONT_REMAP) : NULL;
        new_asm_h[1] = (eosStartAddr != eosAddr) ? rf_MapAccess(raidPtr, eosStartAddr, eosAddr - eosStartAddr, NULL, RF_DONT_REMAP) : NULL;

        /* walk through the PDAs and range-restrict each SU to the region of
         * the SU touched on the failed PDA.  also compute total data buffer
         * space requirements in this step.  Ignore the parity for now. */
        /* Also count nodes to find out how many bufs need to be xored together */
        (*nXorBufs) = 1;        /* in read case, 1 is for parity.  In write
                                 * case, 1 is for failed data */

        if (new_asm_h[0]) {
                new_asm_h[0]->next = dag_h->asmList;
                dag_h->asmList = new_asm_h[0];
                for (pda = new_asm_h[0]->stripeMap->physInfo; pda; pda = pda->next) {
                        rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_NOBUFFER, 0);
                        pda->bufPtr = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
                }
                (*nXorBufs) += new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
        }
        if (new_asm_h[1]) {
                new_asm_h[1]->next = dag_h->asmList;
                dag_h->asmList = new_asm_h[1];
                for (pda = new_asm_h[1]->stripeMap->physInfo; pda; pda = pda->next) {
                        rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_NOBUFFER, 0);
                        pda->bufPtr = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
                }
                (*nXorBufs) += new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
        }

        /* allocate a buffer for parity */
        if (rpBufPtr)
                *rpBufPtr = rf_AllocBuffer(raidPtr, dag_h, failedPDA->numSector << raidPtr->logBytesPerSector);

        /* the last step is to figure out how many more distinct buffers need
         * to get xor'd to produce the missing unit.  there's one for each
         * user-data read node that overlaps the portion of the failed unit
         * being accessed */

        for (foundit = i = 0, pda = asmap->physInfo; pda; i++, pda = pda->next) {
                if (pda == failedPDA) {
                        i--;
                        foundit = 1;
                        continue;
                }
                if (rf_PDAOverlap(layoutPtr, pda, failedPDA)) {
                        overlappingPDAs[i] = 1;
                        (*nXorBufs)++;
                }
        }
        if (!foundit) {
                RF_ERRORMSG("GenerateFailedAccessASMs: did not find failedPDA in asm list\n");
                RF_ASSERT(0);
        }
#if RF_DEBUG_DAG
        if (rf_degDagDebug) {
                if (new_asm_h[0]) {
                        printf("First asm:\n");
                        rf_PrintFullAccessStripeMap(new_asm_h[0], 1);
                }
                if (new_asm_h[1]) {
                        printf("Second asm:\n");
                        rf_PrintFullAccessStripeMap(new_asm_h[1], 1);
                }
        }
#endif
}


/* adjusts the offset and number of sectors in the destination pda so that
 * it covers at most the region of the SU covered by the source PDA.  This
 * is exclusively a restriction:  the number of sectors indicated by the
 * target PDA can only shrink.
 *
 * For example:  s = sectors within SU indicated by source PDA
 *               d = sectors within SU indicated by dest PDA
 *               r = results, stored in dest PDA
 *
 * |--------------- one stripe unit ---------------------|
 * |           sssssssssssssssssssssssssssssssss         |
 * |    ddddddddddddddddddddddddddddddddddddddddddddd    |
 * |           rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr         |
 *
 * Another example:
 *
 * |--------------- one stripe unit ---------------------|
 * |           sssssssssssssssssssssssssssssssss         |
 * |    ddddddddddddddddddddddd                          |
 * |           rrrrrrrrrrrrrrrr                          |
 *
 */
void
rf_RangeRestrictPDA(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *src,
                    RF_PhysDiskAddr_t *dest, int dobuffer, int doraidaddr)
{
        RF_RaidLayout_t *layoutPtr = &raidPtr->Layout;
        RF_SectorNum_t soffs = rf_StripeUnitOffset(layoutPtr, src->startSector);
        RF_SectorNum_t doffs = rf_StripeUnitOffset(layoutPtr, dest->startSector);
        RF_SectorNum_t send = rf_StripeUnitOffset(layoutPtr, src->startSector + src->numSector - 1);        /* use -1 to be sure we
                                                                                                         * stay within SU */
        RF_SectorNum_t dend = rf_StripeUnitOffset(layoutPtr, dest->startSector + dest->numSector - 1);
        RF_SectorNum_t subAddr = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->startSector);        /* stripe unit boundary */

        dest->startSector = subAddr + RF_MAX(soffs, doffs);
        dest->numSector = subAddr + RF_MIN(send, dend) + 1 - dest->startSector;

        if (dobuffer)
                dest->bufPtr = (char *)(dest->bufPtr) + ((soffs > doffs) ? rf_RaidAddressToByte(raidPtr, soffs - doffs) : 0);
        if (doraidaddr) {
                dest->raidAddress = rf_RaidAddressOfPrevStripeUnitBoundary(layoutPtr, dest->raidAddress) +
                    rf_StripeUnitOffset(layoutPtr, dest->startSector);
        }
}

#if (RF_INCLUDE_CHAINDECLUSTER > 0)

/*
 * Want the highest of these primes to be the largest one
 * less than the max expected number of columns (won't hurt
 * to be too small or too large, but won't be optimal, either)
 * --jimz
 */
#define NLOWPRIMES 8
static int lowprimes[NLOWPRIMES] = {2, 3, 5, 7, 11, 13, 17, 19};
/*****************************************************************************
 * compute the workload shift factor.  (chained declustering)
 *
 * return nonzero if access should shift to secondary, otherwise,
 * access is to primary
 *****************************************************************************/
int
rf_compute_workload_shift(RF_Raid_t *raidPtr, RF_PhysDiskAddr_t *pda)
{
        /*
         * variables:
         *  d   = column of disk containing primary
         *  f   = column of failed disk
         *  n   = number of disks in array
         *  sd  = "shift distance" (number of columns that d is to the right of f)
         *  v   = numerator of redirection ratio
         *  k   = denominator of redirection ratio
         */
        RF_RowCol_t d, f, sd, n;
        int     k, v, ret, i;

        n = raidPtr->numCol;

        /* assign column of primary copy to d */
        d = pda->col;

        /* assign column of dead disk to f */
        for (f = 0; ((!RF_DEAD_DISK(raidPtr->Disks[f].status)) && (f < n)); f++)
                continue;

        RF_ASSERT(f < n);
        RF_ASSERT(f != d);

        sd = (f > d) ? (n + d - f) : (d - f);
        RF_ASSERT(sd < n);

        /*
         * v of every k accesses should be redirected
         *
         * v/k := (n-1-sd)/(n-1)
         */
        v = (n - 1 - sd);
        k = (n - 1);

#if 1
        /*
         * XXX
         * Is this worth it?
         *
         * Now reduce the fraction, by repeatedly factoring
         * out primes (just like they teach in elementary school!)
         */
        for (i = 0; i < NLOWPRIMES; i++) {
                if (lowprimes[i] > v)
                        break;
                while (((v % lowprimes[i]) == 0) && ((k % lowprimes[i]) == 0)) {
                        v /= lowprimes[i];
                        k /= lowprimes[i];
                }
        }
#endif

        raidPtr->hist_diskreq[d]++;
        if (raidPtr->hist_diskreq[d] > v) {
                ret = 0;        /* do not redirect */
        } else {
                ret = 1;        /* redirect */
        }

#if 0
        printf("d=%d f=%d sd=%d v=%d k=%d ret=%d h=%d\n", d, f, sd, v, k, ret,
            raidPtr->hist_diskreq[d]);
#endif

        if (raidPtr->hist_diskreq[d] >= k) {
                /* reset counter */
                raidPtr->hist_diskreq[d] = 0;
        }
        return (ret);
}
#endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */

/*
 * Disk selection routines
 */

/*
 * Selects the disk with the shortest queue from a mirror pair.
 * Both the disk I/Os queued in RAIDframe as well as those at the physical
 * disk are counted as members of the "queue"
 */
void
rf_SelectMirrorDiskIdle(RF_DagNode_t * node)
{
        RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr;
        RF_RowCol_t colData, colMirror;
        int     dataQueueLength, mirrorQueueLength, usemirror;
        RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *) node->params[0].p;
        RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *) node->params[4].p;
        RF_PhysDiskAddr_t *tmp_pda;
        RF_RaidDisk_t *disks = raidPtr->Disks;
        RF_DiskQueue_t *dqs = raidPtr->Queues, *dataQueue, *mirrorQueue;

        /* return the [row col] of the disk with the shortest queue */
        colData = data_pda->col;
        colMirror = mirror_pda->col;
        dataQueue = &(dqs[colData]);
        mirrorQueue = &(dqs[colMirror]);

#ifdef RF_LOCK_QUEUES_TO_READ_LEN
        RF_LOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle");
#endif                                /* RF_LOCK_QUEUES_TO_READ_LEN */
        dataQueueLength = dataQueue->queueLength + dataQueue->numOutstanding;
#ifdef RF_LOCK_QUEUES_TO_READ_LEN
        RF_UNLOCK_QUEUE_MUTEX(dataQueue, "SelectMirrorDiskIdle");
        RF_LOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle");
#endif                                /* RF_LOCK_QUEUES_TO_READ_LEN */
        mirrorQueueLength = mirrorQueue->queueLength + mirrorQueue->numOutstanding;
#ifdef RF_LOCK_QUEUES_TO_READ_LEN
        RF_UNLOCK_QUEUE_MUTEX(mirrorQueue, "SelectMirrorDiskIdle");
#endif                                /* RF_LOCK_QUEUES_TO_READ_LEN */

        usemirror = 0;
        if (RF_DEAD_DISK(disks[colMirror].status)) {
                usemirror = 0;
        } else
                if (RF_DEAD_DISK(disks[colData].status)) {
                        usemirror = 1;
                } else
                        if (raidPtr->parity_good == RF_RAID_DIRTY) {
                                /* Trust only the main disk */
                                usemirror = 0;
                        } else
                                if (dataQueueLength < mirrorQueueLength) {
                                        usemirror = 0;
                                } else
                                        if (mirrorQueueLength < dataQueueLength) {
                                                usemirror = 1;
                                        } else {
                                                /* queues are equal length. attempt
                                                 * cleverness. */
                                                if (SNUM_DIFF(dataQueue->last_deq_sector, data_pda->startSector)
                                                    <= SNUM_DIFF(mirrorQueue->last_deq_sector, mirror_pda->startSector)) {
                                                        usemirror = 0;
                                                } else {
                                                        usemirror = 1;
                                                }
                                        }

        if (usemirror) {
                /* use mirror (parity) disk, swap params 0 & 4 */
                tmp_pda = data_pda;
                node->params[0].p = mirror_pda;
                node->params[4].p = tmp_pda;
        } else {
                /* use data disk, leave param 0 unchanged */
        }
        /* printf("dataQueueLength %d, mirrorQueueLength
         * %d\n",dataQueueLength, mirrorQueueLength); */
}
#if (RF_INCLUDE_CHAINDECLUSTER > 0) || (RF_INCLUDE_INTERDECLUSTER > 0) || (RF_DEBUG_VALIDATE_DAG > 0)
/*
 * Do simple partitioning. This assumes that
 * the data and parity disks are laid out identically.
 */
void
rf_SelectMirrorDiskPartition(RF_DagNode_t * node)
{
        RF_Raid_t *raidPtr = (RF_Raid_t *) node->dagHdr->raidPtr;
        RF_RowCol_t colData, colMirror;
        RF_PhysDiskAddr_t *data_pda = (RF_PhysDiskAddr_t *) node->params[0].p;
        RF_PhysDiskAddr_t *mirror_pda = (RF_PhysDiskAddr_t *) node->params[4].p;
        RF_PhysDiskAddr_t *tmp_pda;
        RF_RaidDisk_t *disks = raidPtr->Disks;
        int     usemirror;

        /* return the [row col] of the disk with the shortest queue */
        colData = data_pda->col;
        colMirror = mirror_pda->col;

        usemirror = 0;
        if (RF_DEAD_DISK(disks[colMirror].status)) {
                usemirror = 0;
        } else
                if (RF_DEAD_DISK(disks[colData].status)) {
                        usemirror = 1;
                } else
                        if (raidPtr->parity_good == RF_RAID_DIRTY) {
                                /* Trust only the main disk */
                                usemirror = 0;
                        } else
                                if (data_pda->startSector <
                                    (disks[colData].numBlocks / 2)) {
                                        usemirror = 0;
                                } else {
                                        usemirror = 1;
                                }

        if (usemirror) {
                /* use mirror (parity) disk, swap params 0 & 4 */
                tmp_pda = data_pda;
                node->params[0].p = mirror_pda;
                node->params[4].p = tmp_pda;
        } else {
                /* use data disk, leave param 0 unchanged */
        }
}
#endif














































































































































































































































































    1 































































































   26 














    2 




   24 





   24 
   21 

   20 







   23 

   20 
    2 



   18 







   21 




   21 














   20 








   18 



   20 













    2 
   19 







    1 




   19 








   19 


   19 






   20 




   17 
   17 
   17 

   19 




    6 

    4 

   25 

    2 


   25 










    2 

   68 
    2 





    2 
    2 


    1 




   66 


   63 







   11 
   10 
    1 
    1 




    6 

   68 














   17 
   17 






   17 








   16 
    1 


   16 













    4 


    4 

    4 
    1 


    4 
    4 


    4 




















    3 
    3 

    3 

    2 


    2 






    2 

    2 
    2 
    1 









    1 


    2 
























   14 
   14 



   14 

   14 










   13 

   13 










    9 
    7 


    9 


   12 



















    4 

    4 


    4 












    1 
































    1 
    1 

    1 







    2 
    2 

    2 





























   29 
   29 







   29 
    6 







   23 



   23 






   21 




   26 


    3 

    3 

   28 







    2 































































   74 









































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
/*        $NetBSD: raw_ip6.c,v 1.178 2022/05/28 10:36:23 andvar Exp $        */
/*        $KAME: raw_ip6.c,v 1.82 2001/07/23 18:57:56 jinmei Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)raw_ip.c        8.2 (Berkeley) 1/4/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: raw_ip6.c,v 1.178 2022/05/28 10:36:23 andvar Exp $");

#ifdef _KERNEL_OPT
#include "opt_ipsec.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/protosw.h>
#include <sys/socketvar.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kauth.h>
#include <sys/kmem.h>

#include <net/if.h>
#include <net/if_types.h>
#include <net/net_stats.h>

#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/ip6_private.h>
#include <netinet6/ip6_mroute.h>
#include <netinet/icmp6.h>
#include <netinet6/icmp6_private.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6protosw.h>
#include <netinet6/scope6_var.h>
#include <netinet6/raw_ip6.h>

#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
#endif

#include "faith.h"
#if defined(NFAITH) && 0 < NFAITH
#include <net/if_faith.h>
#endif

extern struct inpcbtable rawcbtable;
struct        inpcbtable raw6cbtable;
#define ifatoia6(ifa)        ((struct in6_ifaddr *)(ifa))

/*
 * Raw interface to IP6 protocol.
 */

static percpu_t *rip6stat_percpu;

#define        RIP6_STATINC(x)                _NET_STATINC(rip6stat_percpu, x)

static void sysctl_net_inet6_raw6_setup(struct sysctllog **);

/*
 * Initialize raw connection block queue.
 */
void
rip6_init(void)
{

        sysctl_net_inet6_raw6_setup(NULL);
        in6_pcbinit(&raw6cbtable, 1, 1);

        rip6stat_percpu = percpu_alloc(sizeof(uint64_t) * RIP6_NSTATS);
}

static void
rip6_sbappendaddr(struct in6pcb *last, struct ip6_hdr *ip6,
    const struct sockaddr *sa, int hlen, struct mbuf *n)
{
        struct mbuf *opts = NULL;

        if (last->in6p_flags & IN6P_CONTROLOPTS)
                ip6_savecontrol(last, &opts, ip6, n);

        m_adj(n, hlen);

        if (sbappendaddr(&last->in6p_socket->so_rcv, sa, n, opts) == 0) {
                soroverflow(last->in6p_socket);
                m_freem(n);
                if (opts)
                        m_freem(opts);
                RIP6_STATINC(RIP6_STAT_FULLSOCK);
        } else {
                sorwakeup(last->in6p_socket);
        }
}

/*
 * Setup generic address and protocol structures
 * for raw_input routine, then pass them along with
 * mbuf chain.
 */
int
rip6_input(struct mbuf **mp, int *offp, int proto)
{
        struct mbuf *m = *mp;
        struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
        struct inpcb_hdr *inph;
        struct in6pcb *in6p;
        struct in6pcb *last = NULL;
        struct sockaddr_in6 rip6src;
        struct mbuf *n;

        RIP6_STATINC(RIP6_STAT_IPACKETS);

#if defined(NFAITH) && 0 < NFAITH
        if (faithprefix(&ip6->ip6_dst)) {
                /* send icmp6 host unreach? */
                m_freem(m);
                return IPPROTO_DONE;
        }
#endif

        sockaddr_in6_init(&rip6src, &ip6->ip6_src, 0, 0, 0);
        if (sa6_recoverscope(&rip6src) != 0) {
                /* XXX: should be impossible. */
                m_freem(m);
                return IPPROTO_DONE;
        }

        TAILQ_FOREACH(inph, &raw6cbtable.inpt_queue, inph_queue) {
                in6p = (struct in6pcb *)inph;
                if (in6p->in6p_af != AF_INET6)
                        continue;
                if (in6p->in6p_ip6.ip6_nxt &&
                    in6p->in6p_ip6.ip6_nxt != proto)
                        continue;
                if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
                    !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
                        continue;
                if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
                    !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
                        continue;
                if (in6p->in6p_cksum != -1) {
                        RIP6_STATINC(RIP6_STAT_ISUM);
                        if (in6_cksum(m, proto, *offp,
                            m->m_pkthdr.len - *offp)) {
                                RIP6_STATINC(RIP6_STAT_BADSUM);
                                continue;
                        }
                }

                if (last == NULL) {
                        ;
                }
#ifdef IPSEC
                else if (ipsec_used && ipsec_in_reject(m, last)) {
                        /* do not inject data into pcb */
                }
#endif
                else if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) {
                        rip6_sbappendaddr(last, ip6, sin6tosa(&rip6src),
                            *offp, n);
                }

                last = in6p;
        }

#ifdef IPSEC
        if (ipsec_used && last && ipsec_in_reject(m, last)) {
                m_freem(m);
                IP6_STATDEC(IP6_STAT_DELIVERED);
                /* do not inject data into pcb */
        } else
#endif
        if (last != NULL) {
                rip6_sbappendaddr(last, ip6, sin6tosa(&rip6src), *offp, m);
        } else {
                RIP6_STATINC(RIP6_STAT_NOSOCK);
                if (m->m_flags & M_MCAST)
                        RIP6_STATINC(RIP6_STAT_NOSOCKMCAST);
                if (proto == IPPROTO_NONE)
                        m_freem(m);
                else {
                        int s;
                        struct ifnet *rcvif = m_get_rcvif(m, &s);
                        const int prvnxt = ip6_get_prevhdr(m, *offp);
                        in6_ifstat_inc(rcvif, ifs6_in_protounknown);
                        m_put_rcvif(rcvif, &s);
                        icmp6_error(m, ICMP6_PARAM_PROB,
                            ICMP6_PARAMPROB_NEXTHEADER,
                            prvnxt);
                }
                IP6_STATDEC(IP6_STAT_DELIVERED);
        }
        return IPPROTO_DONE;
}

void *
rip6_ctlinput(int cmd, const struct sockaddr *sa, void *d)
{
        struct ip6_hdr *ip6;
        struct ip6ctlparam *ip6cp = NULL;
        const struct sockaddr_in6 *sa6_src = NULL;
        void *cmdarg;
        void (*notify)(struct in6pcb *, int) = in6_rtchange;
        int nxt;

        if (sa->sa_family != AF_INET6 ||
            sa->sa_len != sizeof(struct sockaddr_in6))
                return NULL;

        if ((unsigned)cmd >= PRC_NCMDS)
                return NULL;
        if (PRC_IS_REDIRECT(cmd))
                notify = in6_rtchange, d = NULL;
        else if (cmd == PRC_HOSTDEAD)
                d = NULL;
        else if (cmd == PRC_MSGSIZE)
                ; /* special code is present, see below */
        else if (inet6ctlerrmap[cmd] == 0)
                return NULL;

        /* if the parameter is from icmp6, decode it. */
        if (d != NULL) {
                ip6cp = (struct ip6ctlparam *)d;
                ip6 = ip6cp->ip6c_ip6;
                cmdarg = ip6cp->ip6c_cmdarg;
                sa6_src = ip6cp->ip6c_src;
                nxt = ip6cp->ip6c_nxt;
        } else {
                ip6 = NULL;
                cmdarg = NULL;
                sa6_src = &sa6_any;
                nxt = -1;
        }

        if (ip6 && cmd == PRC_MSGSIZE) {
                const struct sockaddr_in6 *sa6 = (const struct sockaddr_in6 *)sa;
                int valid = 0;
                struct in6pcb *in6p;

                /*
                 * Check to see if we have a valid raw IPv6 socket
                 * corresponding to the address in the ICMPv6 message
                 * payload, and the protocol (ip6_nxt) meets the socket.
                 * XXX chase extension headers, or pass final nxt value
                 * from icmp6_notify_error()
                 */
                in6p = NULL;
                in6p = in6_pcblookup_connect(&raw6cbtable, &sa6->sin6_addr, 0,
                                             (const struct in6_addr *)&sa6_src->sin6_addr, 0, 0, 0);
#if 0
                if (!in6p) {
                        /*
                         * As the use of sendto(2) is fairly popular,
                         * we may want to allow non-connected pcb too.
                         * But it could be too weak against attacks...
                         * We should at least check if the local
                         * address (= s) is really ours.
                         */
                        in6p = in6_pcblookup_bind(&raw6cbtable,
                            &sa6->sin6_addr, 0, 0);
                }
#endif

                if (in6p && in6p->in6p_ip6.ip6_nxt &&
                    in6p->in6p_ip6.ip6_nxt == nxt)
                        valid++;

                /*
                 * Depending on the value of "valid" and routing table
                 * size (mtudisc_{hi,lo}wat), we will:
                 * - recalculate the new MTU and create the
                 *   corresponding routing entry, or
                 * - ignore the MTU change notification.
                 */
                icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);

                /*
                 * regardless of if we called icmp6_mtudisc_update(),
                 * we need to call in6_pcbnotify(), to notify path MTU
                 * change to the userland (RFC3542), because some
                 * unconnected sockets may share the same destination
                 * and want to know the path MTU.
                 */
        }

        (void) in6_pcbnotify(&raw6cbtable, sa, 0,
            sin6tocsa(sa6_src), 0, cmd, cmdarg, notify);
        return NULL;
}

/*
 * Generate IPv6 header and pass packet to ip6_output.
 * Tack on options user may have setup with control call.
 */
int
rip6_output(struct mbuf *m, struct socket * const so,
    struct sockaddr_in6 * const dstsock, struct mbuf * const control)
{
        struct in6_addr *dst;
        struct ip6_hdr *ip6;
        struct in6pcb *in6p;
        u_int        plen = m->m_pkthdr.len;
        int error = 0;
        struct ip6_pktopts opt, *optp = NULL;
        struct ifnet *oifp = NULL;
        int type, code;                /* for ICMPv6 output statistics only */
        int scope_ambiguous = 0;
        int bound = curlwp_bind();
        struct psref psref;

        in6p = sotoin6pcb(so);

        dst = &dstsock->sin6_addr;
        if (control) {
                if ((error = ip6_setpktopts(control, &opt,
                    in6p->in6p_outputopts,
                    kauth_cred_get(), so->so_proto->pr_protocol)) != 0) {
                        goto bad;
                }
                optp = &opt;
        } else
                optp = in6p->in6p_outputopts;

        /*
         * Check and convert scope zone ID into internal form.
         * XXX: we may still need to determine the zone later.
         */
        if (!(so->so_state & SS_ISCONNECTED)) {
                if (dstsock->sin6_scope_id == 0 && !ip6_use_defzone)
                        scope_ambiguous = 1;
                if ((error = sa6_embedscope(dstsock, ip6_use_defzone)) != 0)
                        goto bad;
        }

        /*
         * For an ICMPv6 packet, we should know its type and code
         * to update statistics.
         */
        if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
                struct icmp6_hdr *icmp6;
                if (m->m_len < sizeof(struct icmp6_hdr) &&
                    (m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) {
                        error = ENOBUFS;
                        goto bad;
                }
                icmp6 = mtod(m, struct icmp6_hdr *);
                type = icmp6->icmp6_type;
                code = icmp6->icmp6_code;
        } else {
                type = 0;
                code = 0;
        }

        M_PREPEND(m, sizeof(*ip6), M_DONTWAIT);
        if (!m) {
                error = ENOBUFS;
                goto bad;
        }
        ip6 = mtod(m, struct ip6_hdr *);

        /*
         * Next header might not be ICMP6 but use its pseudo header anyway.
         */
        ip6->ip6_dst = *dst;

        /*
         * Source address selection.
         */
        error = in6_selectsrc(dstsock, optp, in6p->in6p_moptions,
            &in6p->in6p_route, &in6p->in6p_laddr, &oifp, &psref, &ip6->ip6_src);
        if (error != 0)
                goto bad;

        if (oifp && scope_ambiguous) {
                /*
                 * Application should provide a proper zone ID or the use of
                 * default zone IDs should be enabled.  Unfortunately, some
                 * applications do not behave as it should, so we need a
                 * workaround.  Even if an appropriate ID is not determined
                 * (when it's required), if we can determine the outgoing
                 * interface. determine the zone ID based on the interface.
                 */
                error = in6_setscope(&dstsock->sin6_addr, oifp, NULL);
                if (error != 0)
                        goto bad;
        }
        ip6->ip6_dst = dstsock->sin6_addr;

        /* fill in the rest of the IPv6 header fields */
        ip6->ip6_flow = in6p->in6p_flowinfo & IPV6_FLOWINFO_MASK;
        ip6->ip6_vfc  &= ~IPV6_VERSION_MASK;
        ip6->ip6_vfc  |= IPV6_VERSION;
        /* ip6_plen will be filled in ip6_output, so not fill it here. */
        ip6->ip6_nxt   = in6p->in6p_ip6.ip6_nxt;
        ip6->ip6_hlim = in6_selecthlim(in6p, oifp);

        if_put(oifp, &psref);
        oifp = NULL;

        if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 ||
            in6p->in6p_cksum != -1) {
                const uint8_t nxt = ip6->ip6_nxt;
                int off;
                u_int16_t sum;

                /* compute checksum */
                if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
                        off = offsetof(struct icmp6_hdr, icmp6_cksum);
                else
                        off = in6p->in6p_cksum;
                if (plen < off + 1) {
                        error = EINVAL;
                        goto bad;
                }
                off += sizeof(struct ip6_hdr);

                sum = 0;
                m = m_copyback_cow(m, off, sizeof(sum), (void *)&sum,
                    M_DONTWAIT);
                if (m == NULL) {
                        error = ENOBUFS;
                        goto bad;
                }
                sum = in6_cksum(m, nxt, sizeof(*ip6), plen);
                m = m_copyback_cow(m, off, sizeof(sum), (void *)&sum,
                    M_DONTWAIT);
                if (m == NULL) {
                        error = ENOBUFS;
                        goto bad;
                }
        }

        {
                struct ifnet *ret_oifp = NULL;

                error = ip6_output(m, optp, &in6p->in6p_route, 0,
                    in6p->in6p_moptions, in6p, &ret_oifp);
                if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
                        if (ret_oifp)
                                icmp6_ifoutstat_inc(ret_oifp, type, code);
                        ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
                } else
                        RIP6_STATINC(RIP6_STAT_OPACKETS);
        }

        goto freectl;

 bad:
        if (m)
                m_freem(m);

 freectl:
        if (control) {
                ip6_clearpktopts(&opt, -1);
                m_freem(control);
        }
        if_put(oifp, &psref);
        curlwp_bindx(bound);
        return error;
}

/*
 * Raw IPv6 socket option processing.
 */
int
rip6_ctloutput(int op, struct socket *so, struct sockopt *sopt)
{
        int error = 0;

        if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_NOHEADER) {
                int optval;

                /* need to fiddle w/ opt(IPPROTO_IPV6, IPV6_CHECKSUM)? */
                if (op == PRCO_GETOPT) {
                        optval = 1;
                        error = sockopt_set(sopt, &optval, sizeof(optval));
                } else if (op == PRCO_SETOPT) {
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                goto out;
                        if (optval == 0)
                                error = EINVAL;
                }

                goto out;
        } else if (sopt->sopt_level != IPPROTO_IPV6)
                return ip6_ctloutput(op, so, sopt);

        switch (sopt->sopt_name) {
        case MRT6_INIT:
        case MRT6_DONE:
        case MRT6_ADD_MIF:
        case MRT6_DEL_MIF:
        case MRT6_ADD_MFC:
        case MRT6_DEL_MFC:
        case MRT6_PIM:
                if (op == PRCO_SETOPT)
                        error = ip6_mrouter_set(so, sopt);
                else if (op == PRCO_GETOPT)
                        error = ip6_mrouter_get(so, sopt);
                else
                        error = EINVAL;
                break;
        case IPV6_CHECKSUM:
                return ip6_raw_ctloutput(op, so, sopt);
        default:
                return ip6_ctloutput(op, so, sopt);
        }
 out:
        return error;
}

extern        u_long rip6_sendspace;
extern        u_long rip6_recvspace;

int
rip6_attach(struct socket *so, int proto)
{
        struct in6pcb *in6p;
        int s, error;

        KASSERT(sotoin6pcb(so) == NULL);
        sosetlock(so);

        error = kauth_authorize_network(kauth_cred_get(),
            KAUTH_NETWORK_SOCKET, KAUTH_REQ_NETWORK_SOCKET_RAWSOCK,
            KAUTH_ARG(AF_INET6),
            KAUTH_ARG(SOCK_RAW),
            KAUTH_ARG(so->so_proto->pr_protocol));
        if (error) {
                return error;
        }
        s = splsoftnet();
        error = soreserve(so, rip6_sendspace, rip6_recvspace);
        if (error) {
                splx(s);
                return error;
        }
        if ((error = in6_pcballoc(so, &raw6cbtable)) != 0) {
                splx(s);
                return error;
        }
        splx(s);
        in6p = sotoin6pcb(so);
        in6p->in6p_ip6.ip6_nxt = proto;
        in6p->in6p_cksum = -1;

        in6p->in6p_icmp6filt = kmem_alloc(sizeof(struct icmp6_filter), KM_SLEEP);
        ICMP6_FILTER_SETPASSALL(in6p->in6p_icmp6filt);
        KASSERT(solocked(so));
        return error;
}

static void
rip6_detach(struct socket *so)
{
        struct in6pcb *in6p = sotoin6pcb(so);

        KASSERT(solocked(so));
        KASSERT(in6p != NULL);

        if (so == ip6_mrouter) {
                ip6_mrouter_done();
        }
        /* xxx: RSVP */
        if (in6p->in6p_icmp6filt != NULL) {
                kmem_free(in6p->in6p_icmp6filt, sizeof(struct icmp6_filter));
                in6p->in6p_icmp6filt = NULL;
        }
        in6_pcbdetach(in6p);
}

static int
rip6_accept(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
rip6_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct in6pcb *in6p = sotoin6pcb(so);
        struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
        struct ifaddr *ifa = NULL;
        int error = 0;
        int s;

        KASSERT(solocked(so));
        KASSERT(in6p != NULL);
        KASSERT(nam != NULL);

        if (addr->sin6_len != sizeof(*addr))
                return EINVAL;
        if (IFNET_READER_EMPTY() || addr->sin6_family != AF_INET6)
                return EADDRNOTAVAIL;

        if ((error = sa6_embedscope(addr, ip6_use_defzone)) != 0)
                return error;

        /*
         * we don't support mapped address here, it would confuse
         * users so reject it
         */
        if (IN6_IS_ADDR_V4MAPPED(&addr->sin6_addr))
                return EADDRNOTAVAIL;
        s = pserialize_read_enter();
        if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) &&
            (ifa = ifa_ifwithaddr(sin6tosa(addr))) == NULL) {
                error = EADDRNOTAVAIL;
                goto out;
        }
        if (ifa && (ifatoia6(ifa))->ia6_flags &
            (IN6_IFF_ANYCAST | IN6_IFF_DUPLICATED)) {
                error = EADDRNOTAVAIL;
                goto out;
        }

        in6p->in6p_laddr = addr->sin6_addr;
        error = 0;
out:
        pserialize_read_exit(s);
        return error;
}

static int
rip6_listen(struct socket *so, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
rip6_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct in6pcb *in6p = sotoin6pcb(so);
        struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
        struct in6_addr in6a;
        struct ifnet *ifp = NULL;
        int scope_ambiguous = 0;
        int error = 0;
        struct psref psref;
        int bound;

        KASSERT(solocked(so));
        KASSERT(in6p != NULL);
        KASSERT(nam != NULL);

        if (IFNET_READER_EMPTY())
                return EADDRNOTAVAIL;
        if (addr->sin6_family != AF_INET6)
                return EAFNOSUPPORT;
        if (addr->sin6_len != sizeof(*addr))
                return EINVAL;

        /*
         * Application should provide a proper zone ID or the use of
         * default zone IDs should be enabled.  Unfortunately, some
         * applications do not behave as it should, so we need a
         * workaround.  Even if an appropriate ID is not determined,
         * we'll see if we can determine the outgoing interface.  If we
         * can, determine the zone ID based on the interface below.
         */
        if (addr->sin6_scope_id == 0 && !ip6_use_defzone)
                scope_ambiguous = 1;
        if ((error = sa6_embedscope(addr, ip6_use_defzone)) != 0)
                return error;

        bound = curlwp_bind();
        /* Source address selection. XXX: need pcblookup? */
        error = in6_selectsrc(addr, in6p->in6p_outputopts,
            in6p->in6p_moptions, &in6p->in6p_route,
            &in6p->in6p_laddr, &ifp, &psref, &in6a);
        if (error != 0)
                goto out;
        /* XXX: see above */
        if (ifp && scope_ambiguous &&
            (error = in6_setscope(&addr->sin6_addr, ifp, NULL)) != 0) {
                goto out;
        }
        in6p->in6p_laddr = in6a;
        in6p->in6p_faddr = addr->sin6_addr;
        soisconnected(so);
out:
        if_put(ifp, &psref);
        curlwp_bindx(bound);
        return error;
}

static int
rip6_connect2(struct socket *so, struct socket *so2)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
rip6_disconnect(struct socket *so)
{
        struct in6pcb *in6p = sotoin6pcb(so);

        KASSERT(solocked(so));
        KASSERT(in6p != NULL);

        if ((so->so_state & SS_ISCONNECTED) == 0)
                return ENOTCONN;

        in6p->in6p_faddr = in6addr_any;
        so->so_state &= ~SS_ISCONNECTED;        /* XXX */
        return 0;
}

static int
rip6_shutdown(struct socket *so)
{
        KASSERT(solocked(so));

        /*
         * Mark the connection as being incapable of further input.
         */
        socantsendmore(so);
        return 0;
}

static int
rip6_abort(struct socket *so)
{
        KASSERT(solocked(so));

        soisdisconnected(so);
        rip6_detach(so);
        return 0;
}

static int
rip6_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
{
        return in6_control(so, cmd, nam, ifp);
}

static int
rip6_stat(struct socket *so, struct stat *ub)
{
        KASSERT(solocked(so));

        /* stat: don't bother with a blocksize */
        return 0;
}

static int
rip6_peeraddr(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));
        KASSERT(sotoin6pcb(so) != NULL);
        KASSERT(nam != NULL);

        in6_setpeeraddr(sotoin6pcb(so), (struct sockaddr_in6 *)nam);
        return 0;
}

static int
rip6_sockaddr(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));
        KASSERT(sotoin6pcb(so) != NULL);
        KASSERT(nam != NULL);

        in6_setsockaddr(sotoin6pcb(so), (struct sockaddr_in6 *)nam);
        return 0;
}

static int
rip6_rcvd(struct socket *so, int flags, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
rip6_recvoob(struct socket *so, struct mbuf *m, int flags)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
rip6_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
    struct mbuf *control, struct lwp *l)
{
        struct in6pcb *in6p = sotoin6pcb(so);
        struct sockaddr_in6 tmp;
        struct sockaddr_in6 *dst;
        int error = 0;

        KASSERT(solocked(so));
        KASSERT(in6p != NULL);
        KASSERT(m != NULL);

        /*
         * Ship a packet out. The appropriate raw output
         * routine handles any messaging necessary.
         */

        /* always copy sockaddr to avoid overwrites */
        if (so->so_state & SS_ISCONNECTED) {
                if (nam) {
                        error = EISCONN;
                        goto release;
                }
                /* XXX */
                sockaddr_in6_init(&tmp, &in6p->in6p_faddr, 0, 0, 0);
                dst = &tmp;
        } else {
                if (nam == NULL) {
                        error = ENOTCONN;
                        goto release;
                }
                tmp = *(struct sockaddr_in6 *)nam;
                dst = &tmp;

                if (dst->sin6_family != AF_INET6) {
                        error = EAFNOSUPPORT;
                        goto release;
                }
                if (dst->sin6_len != sizeof(*dst)) {
                        error = EINVAL;
                        goto release;
                }
        }
        error = rip6_output(m, so, dst, control);
        m = NULL;

release:
        if (m)
                m_freem(m);

        return error;
}

static int
rip6_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
{
        KASSERT(solocked(so));

        m_freem(m);
        m_freem(control);

        return EOPNOTSUPP;
}

static int
rip6_purgeif(struct socket *so, struct ifnet *ifp)
{

        mutex_enter(softnet_lock);
        in6_pcbpurgeif0(&raw6cbtable, ifp);
#ifdef NET_MPSAFE
        mutex_exit(softnet_lock);
#endif
        in6_purgeif(ifp);
#ifdef NET_MPSAFE
        mutex_enter(softnet_lock);
#endif
        in6_pcbpurgeif(&raw6cbtable, ifp);
        mutex_exit(softnet_lock);

        return 0;
}

static int
sysctl_net_inet6_raw6_stats(SYSCTLFN_ARGS)
{

        return (NETSTAT_SYSCTL(rip6stat_percpu, RIP6_NSTATS));
}

static void
sysctl_net_inet6_raw6_setup(struct sysctllog **clog)
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "inet6", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET6, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "raw6",
                       SYSCTL_DESCR("Raw IPv6 settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_RAW, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "pcblist",
                       SYSCTL_DESCR("Raw IPv6 control block list"),
                       sysctl_inpcblist, 0, &raw6cbtable, 0,
                       CTL_NET, PF_INET6, IPPROTO_RAW,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "stats",
                       SYSCTL_DESCR("Raw IPv6 statistics"),
                       sysctl_net_inet6_raw6_stats, 0, NULL, 0,
                       CTL_NET, PF_INET6, IPPROTO_RAW, RAW6CTL_STATS,
                       CTL_EOL);
}

PR_WRAP_USRREQS(rip6)
#define        rip6_attach                rip6_attach_wrapper
#define        rip6_detach                rip6_detach_wrapper
#define        rip6_accept                rip6_accept_wrapper
#define        rip6_bind                rip6_bind_wrapper
#define        rip6_listen                rip6_listen_wrapper
#define        rip6_connect                rip6_connect_wrapper
#define        rip6_connect2                rip6_connect2_wrapper
#define        rip6_disconnect                rip6_disconnect_wrapper
#define        rip6_shutdown                rip6_shutdown_wrapper
#define        rip6_abort                rip6_abort_wrapper
#define        rip6_ioctl                rip6_ioctl_wrapper
#define        rip6_stat                rip6_stat_wrapper
#define        rip6_peeraddr                rip6_peeraddr_wrapper
#define        rip6_sockaddr                rip6_sockaddr_wrapper
#define        rip6_rcvd                rip6_rcvd_wrapper
#define        rip6_recvoob                rip6_recvoob_wrapper
#define        rip6_send                rip6_send_wrapper
#define        rip6_sendoob                rip6_sendoob_wrapper
#define        rip6_purgeif                rip6_purgeif_wrapper

const struct pr_usrreqs rip6_usrreqs = {
        .pr_attach        = rip6_attach,
        .pr_detach        = rip6_detach,
        .pr_accept        = rip6_accept,
        .pr_bind        = rip6_bind,
        .pr_listen        = rip6_listen,
        .pr_connect        = rip6_connect,
        .pr_connect2        = rip6_connect2,
        .pr_disconnect        = rip6_disconnect,
        .pr_shutdown        = rip6_shutdown,
        .pr_abort        = rip6_abort,
        .pr_ioctl        = rip6_ioctl,
        .pr_stat        = rip6_stat,
        .pr_peeraddr        = rip6_peeraddr,
        .pr_sockaddr        = rip6_sockaddr,
        .pr_rcvd        = rip6_rcvd,
        .pr_recvoob        = rip6_recvoob,
        .pr_send        = rip6_send,
        .pr_sendoob        = rip6_sendoob,
        .pr_purgeif        = rip6_purgeif,
};
















































































































































































































































































































































 2738 











































































 2141 






 2141 
   12 
   12 










  884 





   15 

   65 


   65 






  643 

  848 








 2094 
 2091 
 1835 
 2093 



















  876 



























































 4840 








 4834 
 4832 


 4833 

 1816 
 4829 




























  240 
   79 













1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
/*        $NetBSD: lwp.h,v 1.217 2022/07/23 19:15:29 mrg Exp $        */

/*
 * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2010, 2019, 2020
 *    The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Nathan J. Williams and Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _SYS_LWP_H_
#define _SYS_LWP_H_

#if defined(_KERNEL) || defined(_KMEMUSER)

#include <sys/param.h>
#include <sys/time.h>
#include <sys/queue.h>
#include <sys/callout.h>
#include <sys/kcpuset.h>
#include <sys/mutex.h>
#include <sys/condvar.h>
#include <sys/signalvar.h>
#include <sys/sched.h>
#include <sys/specificdata.h>
#include <sys/syncobj.h>
#include <sys/resource.h>

#if defined(_KERNEL)
struct lwp;
/* forward declare this for <machine/cpu.h> so it can get l_cpu. */
static __inline struct cpu_info *lwp_getcpu(struct lwp *);
#include <machine/cpu.h>                /* curcpu() and cpu_info */
#include <sys/atomic.h>
#ifdef _KERNEL_OPT
#include "opt_kcov.h"
#include "opt_kmsan.h"
#include "opt_maxlwp.h"
#endif
#endif

#include <machine/proc.h>                /* Machine-dependent proc substruct. */

/*
 * Lightweight process.  Field markings and the corresponding locks:
 *
 * a:        proc_lock
 * c:        condition variable interlock, passed to cv_wait()
 * l:        *l_mutex
 * p:        l_proc->p_lock
 * s:        spc_mutex, which may or may not be referenced by l_mutex
 * S:        l_selcluster->sc_lock
 * (:        unlocked, stable
 * !:        unlocked, may only be reliably accessed by the LWP itself
 *
 * Fields are clustered together by usage (to increase the likelihood
 * of cache hits) and by size (to reduce dead space in the structure).
 */

#include <sys/pcu.h>

struct lockdebug;
struct sysent;

struct lwp {
        /* Must not be zeroed on free. */
        struct cpu_info *volatile l_cpu;/* s: CPU we're on if LSONPROC */
        kmutex_t * volatile l_mutex;        /* l: ptr to mutex on sched state */
        struct turnstile *l_ts;                /* l: current turnstile */
        int                l_stat;                /* l: overall LWP status */
        int                l__reserved;        /*  : padding - reuse as needed */

        /* Scheduling and overall state. */
#define        l_startzero l_runq
        TAILQ_ENTRY(lwp) l_runq;        /* s: run queue */
        union {
                void *        info;                /* s: scheduler-specific structure */
                u_int        timeslice;        /* l: time-quantum for SCHED_M2 */
        } l_sched;
        void                *l_addr;        /* l: PCB address; use lwp_getpcb() */
        struct mdlwp        l_md;                /* l: machine-dependent fields. */
        struct bintime         l_rtime;        /* l: real time */
        struct bintime        l_stime;        /* l: start time (while ONPROC) */
        int                l_flag;                /* l: misc flag values */
        u_int                l_swtime;        /* l: time swapped in or out */
        u_int                l_rticks;        /* l: Saved start time of run */
        u_int                l_rticksum;        /* l: Sum of ticks spent running */
        u_int                l_slpticks;        /* l: Saved start time of sleep */
        u_int                l_slpticksum;        /* l: Sum of ticks spent sleeping */
        int                l_biglocks;        /* l: biglock count before sleep */
        int                l_class;        /* l: scheduling class */
        int                l_kpriority;        /* !: has kernel priority boost */
        pri_t                l_kpribase;        /* !: kernel priority base level */
        pri_t                l_priority;        /* l: scheduler priority */
        pri_t                l_inheritedprio;/* l: inherited priority */
        pri_t                l_protectprio;        /* l: for PTHREAD_PRIO_PROTECT */
        pri_t                l_auxprio;        /* l: max(inherit,protect) priority */
        int                l_protectdepth;        /* l: for PTHREAD_PRIO_PROTECT */
        u_int                l_cpticks;        /* (: Ticks of CPU time */
        psetid_t        l_psid;                /* l: assigned processor-set ID */
        fixpt_t                l_pctcpu;        /* p: %cpu during l_swtime */
        fixpt_t                l_estcpu;        /* l: cpu time for SCHED_4BSD */
        volatile uint64_t l_ncsw;        /* l: total context switches */
        volatile uint64_t l_nivcsw;        /* l: involuntary context switches */
        SLIST_HEAD(, turnstile) l_pi_lenders; /* l: ts lending us priority */
        struct cpu_info *l_target_cpu;        /* l: target CPU to migrate */
        struct lwpctl        *l_lwpctl;        /* p: lwpctl block kernel address */
        struct lcpage        *l_lcpage;        /* p: lwpctl containing page */
        kcpuset_t        *l_affinity;        /* l: CPU set for affinity */

        /* Synchronisation. */
        struct syncobj        *l_syncobj;        /* l: sync object operations set */
        LIST_ENTRY(lwp) l_sleepchain;        /* l: sleep queue */
        wchan_t                l_wchan;        /* l: sleep address */
        const char        *l_wmesg;        /* l: reason for sleep */
        struct sleepq        *l_sleepq;        /* l: current sleep queue */
        callout_t        l_timeout_ch;        /* !: callout for tsleep */
        kcondvar_t        l_waitcv;        /* a: vfork() wait */
        u_int                l_slptime;        /* l: time since last blocked */
        bool                l_vforkwaiting;        /* a: vfork() waiting */

        /* User-space synchronization. */
        uintptr_t        l_robust_head;        /* !: list of robust futexes */
        uint32_t        l___rsvd1;        /* reserved for future use */

#if PCU_UNIT_COUNT > 0
        struct cpu_info        * volatile l_pcu_cpu[PCU_UNIT_COUNT];
        uint32_t        l_pcu_valid;
#endif

        /* Process level and global state, misc. */
        lwpid_t                l_lid;                /* (: LWP identifier; local to proc */
        LIST_ENTRY(lwp)        l_list;                /* a: entry on list of all LWPs */
        void                *l_ctxlink;        /* p: uc_link {get,set}context */
        struct proc        *l_proc;        /* p: parent process */
        LIST_ENTRY(lwp)        l_sibling;        /* p: entry on proc's list of LWPs */
        char                *l_name;        /* (: name, optional */
        lwpid_t                l_waiter;        /* p: first LWP waiting on us */
        lwpid_t         l_waitingfor;        /* p: specific LWP we are waiting on */
        int                l_prflag;        /* p: process level flags */
        u_int                l_refcnt;        /* p: reference count on this LWP */

        /* State of select() or poll(). */
        int                l_selflag;        /* S: polling state flags */
        int                l_selret;        /* S: return value of select/poll */
        SLIST_HEAD(,selinfo) l_selwait;        /* S: descriptors waited on */
        uintptr_t        l_selrec;        /* !: argument for selrecord() */
        struct selcluster *l_selcluster;/* !: associated cluster data */
        void *                l_selbits;        /* (: select() bit-field */
        size_t                l_selni;        /* (: size of a single bit-field */

        /* Signals. */
        int                l_sigrestore;        /* p: need to restore old sig mask */
        sigset_t        l_sigwaitset;        /* p: signals being waited for */
        kcondvar_t        l_sigcv;        /* p: for sigsuspend() */
        struct ksiginfo        *l_sigwaited;        /* p: delivered signals from set */
        sigpend_t        *l_sigpendset;        /* p: XXX issignal()/postsig() baton */
        LIST_ENTRY(lwp)        l_sigwaiter;        /* p: chain on list of waiting LWPs */
        stack_t                l_sigstk;        /* p: sp & on stack state variable */
        sigset_t        l_sigmask;        /* p: signal mask */
        sigpend_t        l_sigpend;        /* p: signals to this LWP */
        sigset_t        l_sigoldmask;        /* p: mask for sigpause */

        /* Private data. */
        specificdata_reference
                l_specdataref;                /* !: subsystem lwp-specific data */
        struct timespec l_ktrcsw;        /* !: for ktrace CSW trace XXX */
        void                *l_private;        /* !: svr4-style lwp-private data */
        struct lwp        *l_switchto;        /* !: mi_switch: switch to this LWP */
        struct kauth_cred *l_cred;        /* !: cached credentials */
        struct filedesc        *l_fd;                /* !: cached copy of proc::p_fd */
        void                *l_emuldata;        /* !: kernel lwp-private data */
        struct fstrans_lwp_info *l_fstrans; /* (: fstrans private data */
        u_short                l_shlocks;        /* !: lockdebug: shared locks held */
        u_short                l_exlocks;        /* !: lockdebug: excl. locks held */
        u_short                l_psrefs;        /* !: count of psref held */
        u_short                l_blcnt;        /* !: count of kernel_lock held */
        volatile int        l_nopreempt;        /* !: don't preempt me! */
        volatile u_int        l_dopreempt;        /* s: kernel preemption pending */
        int                l_pflag;        /* !: LWP private flags */
        int                l_dupfd;        /* !: side return from cloning devs XXX */
        const struct sysent * volatile l_sysent;/* !: currently active syscall */
        struct rusage        l_ru;                /* !: accounting information */
        uint64_t        l_pfailtime;        /* !: for kernel preemption */
        uintptr_t        l_pfailaddr;        /* !: for kernel preemption */
        uintptr_t        l_pfaillock;        /* !: for kernel preemption */
        _TAILQ_HEAD(,struct lockdebug,volatile) l_ld_locks;/* !: locks held by LWP */
        volatile void        *l_ld_wanted;        /* !: lock currently wanted by LWP */
        uintptr_t        l_rwcallsite;        /* !: rwlock actual callsite */
        int                l_tcgen;        /* !: for timecounter removal */

        /* These are only used by 'options SYSCALL_TIMES'. */
        uint32_t        l_syscall_time;        /* !: time epoch for current syscall */
        uint64_t        *l_syscall_counter; /* !: counter for current process */

        struct kdtrace_thread *l_dtrace; /* (: DTrace-specific data. */

#ifdef KMSAN
        void                *l_kmsan; /* !: KMSAN private data. */
#endif
#ifdef KCOV
        void                *l_kcov; /* !: KCOV private data. */
#endif
};

/*
 * UAREA_PCB_OFFSET: an offset of PCB structure in the uarea.  MD code may
 * define it in <machine/proc.h>, to indicate a different uarea layout.
 */
#ifndef UAREA_PCB_OFFSET
#define        UAREA_PCB_OFFSET        0
#endif

LIST_HEAD(lwplist, lwp);                /* A list of LWPs. */

#ifdef _KERNEL
extern struct lwplist        alllwp;                /* List of all LWPs. */
extern lwp_t                lwp0;                /* LWP for proc0. */
extern int                maxlwp __read_mostly;        /* max number of lwps */
#ifndef MAXLWP
#define        MAXLWP                4096                /* default max */
#endif
#ifndef MAXMAXLWP
#define MAXMAXLWP        65535                /* absolute max */
#endif
#endif

#endif /* _KERNEL || _KMEMUSER */

/*
 * These flags are kept in l_flag, and they are modified only with the LWP
 * locked.
 */
#define        LW_IDLE                0x00000001 /* Idle lwp. */
#define        LW_LWPCTL        0x00000002 /* Adjust lwpctl in userret */
#define        LW_STIMO        0x00000040 /* Sleep timed out */
#define        LW_SINTR        0x00000080 /* Sleep is interruptible. */
#define        LW_CATCHINTR        0x00000100 /* LW_SINTR intent; see sleepq_block(). */
#define        LW_SYSTEM        0x00000200 /* Kernel thread */
#define        LW_SYSTEM_FPU        0x00000400 /* Kernel thread with vector/FP enabled */
#define        LW_DBGSUSPEND        0x00010000 /* Suspend by debugger */
#define        LW_WSUSPEND        0x00020000 /* Suspend before return to user */
#define        LW_BATCH        0x00040000 /* LWP tends to hog CPU */
#define        LW_WCORE        0x00080000 /* Stop for core dump on return to user */
#define        LW_WEXIT        0x00100000 /* Exit before return to user */
#define        LW_PENDSIG        0x01000000 /* Pending signal for us */
#define        LW_CANCELLED        0x02000000 /* tsleep should not sleep */
#define        LW_WREBOOT        0x08000000 /* System is rebooting, please suspend */
#define        LW_UNPARKED        0x10000000 /* Unpark op pending */
#define        LW_RUMP_CLEAR        0x40000000 /* Clear curlwp in RUMP scheduler */
#define        LW_RUMP_QEXIT        0x80000000 /* LWP should exit ASAP */

/*
 * The second set of flags is kept in l_pflag, and they are modified only by
 * the LWP itself, or modified when it's known the LWP cannot be running.
 * LP_RUNNING is typically updated with the LWP locked, but not always in
 * the case of soft interrupt handlers.
 */
#define        LP_KTRACTIVE        0x00000001 /* Executing ktrace operation */
#define        LP_KTRCSW        0x00000002 /* ktrace context switch marker */
#define        LP_KTRCSWUSER        0x00000004 /* ktrace context switch marker */
        /*                 0x00000008    was LP_PIDLID */
#define        LP_OWEUPC        0x00000010 /* Owe user profiling tick */
#define        LP_MPSAFE        0x00000020 /* Starts life without kernel_lock */
#define        LP_INTR                0x00000040 /* Soft interrupt handler */
#define        LP_SYSCTLWRITE        0x00000080 /* sysctl write lock held */
#define        LP_MUSTJOIN        0x00000100 /* Must join kthread on exit */
#define        LP_SINGLESTEP        0x00000400 /* Single step thread in ptrace(2) */
#define        LP_TIMEINTR        0x00010000 /* Time this soft interrupt */
#define        LP_PREEMPTING        0x00020000 /* mi_switch called involuntarily */
#define        LP_RUNNING        0x20000000 /* Active on a CPU */
#define        LP_TELEPORT        0x40000000 /* Teleport to new CPU on preempt() */
#define        LP_BOUND        0x80000000 /* Bound to a CPU */

/*
 * The third set of flags is kept in l_prflag and they are modified only
 * with p_lock held.
 */
#define        LPR_DETACHED        0x00800000 /* Won't be waited for. */
#define        LPR_CRMOD        0x00000100 /* Credentials modified */
#define        LPR_DRAINING        0x80000000 /* Draining references before exiting */

/*
 * Mask indicating that there is "exceptional" work to be done on return to
 * user.
 */
#define        LW_USERRET        \
    (LW_WEXIT | LW_PENDSIG | LW_WREBOOT | LW_WSUSPEND | LW_WCORE | LW_LWPCTL)

/*
 * Status values.
 *
 * A note about LSRUN and LSONPROC: LSRUN indicates that a process is
 * runnable but *not* yet running, i.e. is on a run queue.  LSONPROC
 * indicates that the process is actually executing on a CPU, i.e.
 * it is no longer on a run queue.
 *
 * These values are set in stone and must not be reused with future changes.
 */
#define        LSIDL                1        /* Process being created by fork. */
#define        LSRUN                2        /* Currently runnable. */
#define        LSSLEEP                3        /* Sleeping on an address. */
#define        LSSTOP                4        /* Process debugging or suspension. */
#define        LSZOMB                5        /* Awaiting collection by parent. */
/* define        LSDEAD        6        Process is almost a zombie. (removed in 5.0) */
#define        LSONPROC        7        /* Process is currently on a CPU. */
#define        LSSUSPENDED        8        /* Not running, not signalable. */

#if defined(_KERNEL) || defined(_KMEMUSER)
static __inline void *
lwp_getpcb(struct lwp *l)
{

        return l->l_addr;
}
#endif /* _KERNEL || _KMEMUSER */

#ifdef _KERNEL
#define        LWP_CACHE_CREDS(l, p)                                                \
do {                                                                        \
        (void)p;                                                        \
        if (__predict_false((l)->l_prflag & LPR_CRMOD))                        \
                lwp_update_creds(l);                                        \
} while (/* CONSTCOND */ 0)

void        lwpinit(void);
void        lwp0_init(void);

void        lwp_startup(lwp_t *, lwp_t *);
void        startlwp(void *);

int        lwp_locked(lwp_t *, kmutex_t *);
kmutex_t *lwp_setlock(lwp_t *, kmutex_t *);
void        lwp_unlock_to(lwp_t *, kmutex_t *);
int        lwp_trylock(lwp_t *);
void        lwp_addref(lwp_t *);
void        lwp_delref(lwp_t *);
void        lwp_delref2(lwp_t *);
bool        lwp_drainrefs(lwp_t *);
bool        lwp_alive(lwp_t *);
lwp_t        *lwp_find_first(proc_t *);

int        lwp_wait(lwp_t *, lwpid_t, lwpid_t *, bool);
void        lwp_continue(lwp_t *);
void        lwp_unsleep(lwp_t *, bool);
void        lwp_unstop(lwp_t *);
void        lwp_exit(lwp_t *);
int        lwp_suspend(lwp_t *, lwp_t *);
int        lwp_create1(lwp_t *, const void *, size_t, u_long, lwpid_t *);
void        lwp_start(lwp_t *, int);
void        lwp_update_creds(lwp_t *);
void        lwp_migrate(lwp_t *, struct cpu_info *);
lwp_t *        lwp_find2(pid_t, lwpid_t);
lwp_t *        lwp_find(proc_t *, int);
void        lwp_userret(lwp_t *);
void        lwp_need_userret(lwp_t *);
void        lwp_free(lwp_t *, bool, bool);
uint64_t lwp_pctr(void);
int        lwp_setprivate(lwp_t *, void *);
int        do_lwp_create(lwp_t *, void *, u_long, lwp_t **, const sigset_t *,
    const stack_t *);

void        lwp_thread_cleanup(lwp_t *);

void        lwpinit_specificdata(void);
int        lwp_specific_key_create(specificdata_key_t *, specificdata_dtor_t);
void        lwp_specific_key_delete(specificdata_key_t);
void        lwp_initspecific(lwp_t *);
void        lwp_finispecific(lwp_t *);
void        *lwp_getspecific(specificdata_key_t);
#if defined(_LWP_API_PRIVATE)
void        *_lwp_getspecific_by_lwp(lwp_t *, specificdata_key_t);
#endif
void        lwp_setspecific(specificdata_key_t, void *);
void        lwp_setspecific_by_lwp(lwp_t *, specificdata_key_t, void *);

/* Syscalls. */
int        lwp_park(clockid_t, int, struct timespec *);
int        lwp_unpark(const lwpid_t *, const u_int);

/* DDB. */
void        lwp_whatis(uintptr_t, void (*)(const char *, ...) __printflike(1, 2));

/*
 * Lock an LWP. XXX _MODULE
 */
static __inline void
lwp_lock(lwp_t *l)
{
        kmutex_t *old = atomic_load_consume(&l->l_mutex);

        /*
         * Note: mutex_spin_enter() will have posted a read barrier.
         * Re-test l->l_mutex.  If it has changed, we need to try again.
         */
        mutex_spin_enter(old);
        while (__predict_false(atomic_load_relaxed(&l->l_mutex) != old)) {
                mutex_spin_exit(old);
                old = atomic_load_consume(&l->l_mutex);
                mutex_spin_enter(old);
        }
}

/*
 * Unlock an LWP. XXX _MODULE
 */
static __inline void
lwp_unlock(lwp_t *l)
{
        mutex_spin_exit(l->l_mutex);
}

static __inline void
lwp_changepri(lwp_t *l, pri_t pri)
{
        KASSERT(mutex_owned(l->l_mutex));

        if (l->l_priority == pri)
                return;

        (*l->l_syncobj->sobj_changepri)(l, pri);
        KASSERT(l->l_priority == pri);
}

static __inline void
lwp_lendpri(lwp_t *l, pri_t pri)
{
        KASSERT(mutex_owned(l->l_mutex));

        (*l->l_syncobj->sobj_lendpri)(l, pri);
        KASSERT(l->l_inheritedprio == pri);
}

static __inline pri_t
lwp_eprio(lwp_t *l)
{
        pri_t pri;

        pri = l->l_priority;
        if ((l->l_flag & LW_SYSTEM) == 0 && l->l_kpriority && pri < PRI_KERNEL)
                pri = (pri >> 1) + l->l_kpribase;
        return MAX(l->l_auxprio, pri);
}

int lwp_create(lwp_t *, struct proc *, vaddr_t, int, void *, size_t,
    void (*)(void *), void *, lwp_t **, int, const sigset_t *, const stack_t *);

/*
 * XXX _MODULE
 * We should provide real stubs for the below that modules can use.
 */

static __inline void
spc_lock(struct cpu_info *ci)
{
        mutex_spin_enter(ci->ci_schedstate.spc_mutex);
}

static __inline void
spc_unlock(struct cpu_info *ci)
{
        mutex_spin_exit(ci->ci_schedstate.spc_mutex);
}

static __inline void
spc_dlock(struct cpu_info *ci1, struct cpu_info *ci2)
{
        struct schedstate_percpu *spc1 = &ci1->ci_schedstate;
        struct schedstate_percpu *spc2 = &ci2->ci_schedstate;

        KASSERT(ci1 != ci2);
        if (ci1 < ci2) {
                mutex_spin_enter(spc1->spc_mutex);
                mutex_spin_enter(spc2->spc_mutex);
        } else {
                mutex_spin_enter(spc2->spc_mutex);
                mutex_spin_enter(spc1->spc_mutex);
        }
}

/*
 * Allow machine-dependent code to override curlwp in <machine/cpu.h> for
 * its own convenience.  Otherwise, we declare it as appropriate.
 */
#if !defined(curlwp)
#if defined(MULTIPROCESSOR)
#define        curlwp                curcpu()->ci_curlwp        /* Current running LWP */
#else
extern struct lwp        *curlwp;                /* Current running LWP */
#endif /* MULTIPROCESSOR */
#endif /* ! curlwp */
#define        curproc                (curlwp->l_proc)

/*
 * This provides a way for <machine/cpu.h> to get l_cpu for curlwp before
 * struct lwp is defined.
 */
static __inline struct cpu_info *
lwp_getcpu(struct lwp *l)
{
        return l->l_cpu;
}

static __inline bool
CURCPU_IDLE_P(void)
{
        struct cpu_info *ci = curcpu();
        return ci->ci_onproc == ci->ci_data.cpu_idlelwp;
}

/*
 * Disable and re-enable preemption.  Only for low-level kernel
 * use.  Device drivers and anything that could potentially be
 * compiled as a module should use kpreempt_disable() and
 * kpreempt_enable().
 */
static __inline void
KPREEMPT_DISABLE(lwp_t *l)
{

        KASSERT(l == curlwp);
        l->l_nopreempt++;
        __insn_barrier();
}

static __inline void
KPREEMPT_ENABLE(lwp_t *l)
{

        KASSERT(l == curlwp);
        KASSERT(l->l_nopreempt > 0);
        __insn_barrier();
        if (--l->l_nopreempt != 0)
                return;
        __insn_barrier();
        if (__predict_false(l->l_dopreempt))
                kpreempt(0);
        __insn_barrier();
}

/* For lwp::l_dopreempt */
#define        DOPREEMPT_ACTIVE        0x01
#define        DOPREEMPT_COUNTED        0x02

/*
 * Prevent curlwp from migrating between CPUs between curlwp_bind and
 * curlwp_bindx. One use case is psref(9) that has a contract that
 * forbids migrations.
 */
static __inline int
curlwp_bind(void)
{
        int bound;

        bound = curlwp->l_pflag & LP_BOUND;
        curlwp->l_pflag |= LP_BOUND;
        __insn_barrier();

        return bound;
}

static __inline void
curlwp_bindx(int bound)
{

        KASSERT(curlwp->l_pflag & LP_BOUND);
        __insn_barrier();
        curlwp->l_pflag ^= bound ^ LP_BOUND;
}

#endif /* _KERNEL */

/* Flags for _lwp_create(), as per Solaris. */
#define        LWP_DETACHED        0x00000040
#define        LWP_SUSPENDED        0x00000080

/* Kernel-internal flags for LWP creation. */
        /*                0x40000000        was LWP_PIDLID */
#define        LWP_VFORK        0x80000000

#endif        /* !_SYS_LWP_H_ */






































































































































































































































































































































































































































































































































































































































































































































































































    6 










































































    8 



























































    8 








    8 
















    8 




    8 

    7 





    8 
    8 
    8 




    8 
    8 
    8 
    8 
    8 




    8 

    8 












    8 

















    8 
    8 



    8 
    8 






    8 









    8 



































    8 





    8 





    8 










    8 






    8 





























    8 




























    8 






    8 


    8 


    8 






























    8 



























    8 
    8 





    8 
    8 
    8 
    8 

    8 
    8 
    8 



    8 














    8 
    8 

    8 
    8 

    8 





    8 


    8 












    8 
    8 

    8 


    8 
    8 
    8 
    8 




































    8 



























    6 







    6 
    6 





    6 
















    6 
    6 



    6 


    6 
































    6 


















    6 


    6 



    6 


    6 


























    8 
    8 


    7 





    8 














    8 







    8 


    7 




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    8 

    8 



















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    8 











    8 


















    8 


















    8 



    8 





    8 

































    8 





































    8 









    8 




    8 









    8 






    8 











    8 

    8 











    8 








    8 






    8 

    8 




    8 





    8 










    8 
















    6 

    6 

    6 




    6 

    6 


    6 
    6 

    6 


    6 















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    8 

    8 
    8 
    8 






    8 








    8 









    8 





































    8 
    8 





    8 



















    8 















    8 
    8 







    8 

























    8 





    8 





    8 

    8 

    8 







    8 
    8 

    8 







    8 



    8 








    8 

    8 





































































































































































































































































































































































































































































































































































































































































































































































































































































































    8 

















    8 








    8 

    8 




















































































































































































    8 
    8 


    8 
    8 


    8 

    8 












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    8 

    8 



















    8 






















    8 





















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
6779
6780
6781
6782
6783
6784
6785
6786
6787
6788
6789
6790
6791
6792
6793
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804
6805
6806
6807
6808
6809
6810
6811
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
6859
6860
6861
6862
6863
6864
6865
6866
6867
6868
6869
6870
6871
6872
6873
6874
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897
6898
6899
6900
6901
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935
6936
6937
6938
6939
6940
6941
6942
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6963
6964
6965
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987
6988
6989
6990
6991
6992
6993
6994
6995
6996
6997
6998
6999
7000
7001
7002
7003
7004
7005
7006
7007
7008
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052
7053
7054
7055
7056
7057
7058
7059
7060
7061
7062
7063
7064
7065
7066
7067
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088
7089
7090
7091
7092
7093
7094
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114
7115
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140
7141
7142
7143
7144
7145
7146
7147
7148
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181
7182
7183
7184
7185
7186
7187
7188
7189
7190
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217
7218
7219
7220
7221
7222
7223
7224
7225
7226
7227
7228
7229
7230
7231
7232
7233
7234
7235
7236
7237
7238
7239
7240
7241
7242
7243
7244
7245
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258
7259
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293
7294
7295
7296
7297
7298
7299
7300
7301
7302
7303
7304
7305
7306
7307
7308
7309
7310
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347
7348
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364
7365
7366
7367
7368
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7400
7401
7402
7403
7404
7405
7406
7407
7408
7409
7410
7411
7412
7413
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429
7430
7431
7432
7433
7434
7435
7436
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447
7448
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458
7459
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470
7471
7472
7473
7474
7475
7476
7477
7478
7479
7480
7481
7482
7483
7484
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495
7496
7497
7498
7499
7500
7501
7502
7503
7504
7505
7506
7507
7508
7509
7510
7511
7512
7513
7514
7515
7516
7517
7518
7519
7520
7521
7522
7523
7524
7525
7526
7527
7528
7529
7530
7531
7532
7533
7534
7535
7536
7537
7538
7539
7540
7541
7542
7543
7544
7545
7546
7547
7548
7549
7550
7551
7552
7553
7554
7555
7556
7557
7558
7559
7560
7561
7562
7563
7564
7565
7566
7567
7568
7569
7570
7571
7572
7573
7574
7575
7576
7577
7578
7579
7580
7581
7582
7583
7584
7585
7586
7587
7588
7589
7590
7591
7592
7593
7594
7595
7596
7597
7598
7599
7600
7601
7602
7603
7604
7605
7606
7607
7608
7609
7610
7611
7612
7613
7614
7615
7616
7617
7618
7619
7620
7621
7622
7623
7624
7625
7626
7627
7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647
7648
7649
7650
7651
7652
7653
7654
7655
7656
7657
7658
7659
7660
7661
7662
7663
7664
7665
7666
7667
7668
7669
7670
7671
7672
7673
7674
7675
7676
7677
7678
7679
7680
7681
7682
7683
7684
7685
7686
7687
7688
7689
7690
7691
7692
7693
7694
7695
7696
7697
7698
7699
7700
7701
7702
7703
7704
7705
7706
7707
7708
7709
7710
7711
7712
7713
7714
7715
7716
7717
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727
7728
7729
7730
7731
7732
7733
7734
7735
7736
7737
7738
7739
7740
7741
7742
7743
7744
7745
7746
7747
7748
7749
7750
7751
7752
7753
7754
7755
7756
7757
7758
7759
7760
7761
7762
7763
7764
7765
7766
7767
7768
7769
7770
7771
7772
7773
7774
7775
7776
7777
7778
7779
7780
7781
7782
7783
7784
7785
7786
7787
7788
7789
7790
7791
7792
7793
7794
7795
7796
7797
7798
7799
7800
7801
7802
7803
7804
7805
7806
7807
7808
7809
7810
7811
7812
7813
7814
7815
7816
7817
7818
7819
7820
7821
7822
7823
7824
7825
7826
7827
7828
7829
7830
7831
7832
7833
7834
7835
7836
7837
7838
7839
7840
7841
7842
7843
7844
7845
7846
7847
7848
7849
7850
7851
7852
7853
7854
7855
7856
7857
7858
7859
7860
7861
7862
7863
7864
7865
7866
7867
7868
7869
7870
7871
7872
7873
7874
7875
7876
7877
7878
7879
7880
7881
7882
7883
7884
7885
7886
7887
7888
7889
7890
7891
7892
7893
7894
7895
7896
7897
7898
7899
7900
7901
7902
7903
7904
7905
7906
7907
7908
7909
7910
7911
7912
7913
7914
7915
7916
7917
7918
7919
7920
7921
7922
7923
7924
7925
7926
7927
7928
7929
7930
7931
7932
7933
7934
7935
7936
7937
7938
7939
7940
7941
7942
7943
7944
7945
7946
7947
7948
7949
7950
7951
7952
7953
7954
7955
7956
7957
7958
7959
7960
7961
7962
7963
7964
7965
7966
7967
7968
7969
7970
7971
7972
7973
7974
7975
7976
7977
7978
7979
7980
7981
7982
7983
7984
7985
7986
7987
7988
7989
7990
7991
7992
7993
7994
7995
7996
7997
7998
7999
8000
8001
8002
8003
8004
8005
8006
8007
8008
8009
8010
8011
8012
8013
8014
8015
8016
8017
8018
8019
8020
8021
8022
8023
8024
8025
8026
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8040
8041
8042
8043
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8059
8060
8061
8062
8063
8064
8065
8066
8067
8068
8069
8070
8071
8072
8073
8074
8075
8076
8077
8078
8079
8080
8081
8082
8083
8084
8085
8086
8087
8088
8089
8090
8091
8092
8093
8094
8095
8096
8097
8098
8099
8100
8101
8102
8103
8104
8105
8106
8107
8108
8109
8110
8111
8112
8113
8114
8115
8116
8117
8118
8119
8120
8121
8122
8123
8124
8125
8126
8127
8128
8129
8130
8131
8132
8133
8134
8135
8136
8137
8138
8139
8140
8141
8142
8143
8144
8145
8146
8147
8148
8149
8150
8151
8152
8153
8154
8155
8156
8157
8158
8159
8160
8161
8162
8163
8164
8165
8166
8167
8168
8169
8170
8171
8172
8173
8174
8175
8176
8177
8178
8179
8180
8181
8182
8183
8184
8185
8186
8187
8188
8189
8190
8191
8192
8193
8194
8195
8196
8197
8198
8199
8200
8201
8202
8203
8204
8205
8206
8207
8208
8209
8210
8211
8212
8213
8214
8215
8216
8217
8218
8219
8220
8221
8222
8223
8224
8225
8226
8227
8228
8229
8230
8231
8232
8233
8234
8235
8236
8237
8238
8239
8240
8241
8242
8243
8244
8245
8246
8247
8248
8249
8250
8251
8252
8253
8254
8255
8256
8257
8258
8259
8260
8261
8262
8263
8264
8265
8266
8267
8268
8269
8270
8271
8272
8273
8274
8275
8276
8277
8278
8279
8280
8281
8282
8283
8284
8285
8286
8287
8288
8289
8290
8291
8292
8293
8294
8295
8296
8297
8298
8299
8300
8301
8302
8303
8304
8305
8306
8307
8308
8309
8310
8311
8312
8313
8314
8315
8316
8317
8318
8319
8320
8321
8322
8323
8324
8325
8326
8327
8328
8329
8330
8331
8332
8333
8334
8335
8336
8337
8338
8339
8340
8341
8342
8343
8344
8345
8346
8347
8348
8349
8350
8351
8352
8353
8354
8355
8356
8357
8358
8359
8360
8361
8362
8363
8364
8365
8366
8367
8368
8369
8370
8371
8372
8373
8374
8375
8376
8377
8378
8379
8380
8381
8382
8383
8384
8385
8386
8387
8388
8389
8390
8391
8392
8393
8394
8395
8396
8397
8398
8399
8400
8401
8402
8403
8404
8405
8406
8407
8408
8409
8410
8411
8412
8413
8414
8415
8416
8417
8418
8419
8420
8421
8422
8423
8424
8425
8426
8427
8428
8429
8430
8431
8432
8433
8434
8435
8436
8437
8438
8439
8440
8441
8442
8443
8444
8445
8446
8447
8448
8449
8450
8451
8452
8453
8454
8455
8456
8457
8458
8459
8460
8461
8462
8463
8464
8465
8466
8467
8468
8469
8470
8471
8472
8473
8474
8475
8476
8477
8478
8479
8480
8481
8482
8483
8484
8485
8486
8487
8488
8489
8490
8491
8492
8493
8494
8495
8496
8497
8498
8499
8500
8501
8502
8503
8504
8505
8506
8507
8508
8509
8510
8511
8512
8513
8514
8515
8516
8517
8518
8519
8520
8521
8522
8523
8524
8525
8526
8527
8528
8529
8530
8531
8532
8533
8534
8535
8536
8537
8538
8539
8540
8541
8542
8543
8544
8545
8546
8547
8548
8549
8550
8551
8552
8553
8554
8555
8556
8557
8558
8559
8560
8561
8562
8563
8564
8565
8566
8567
8568
8569
8570
8571
8572
8573
8574
8575
8576
8577
8578
8579
8580
8581
8582
8583
8584
8585
8586
8587
8588
8589
8590
8591
8592
8593
8594
8595
8596
8597
8598
8599
8600
8601
8602
8603
8604
8605
8606
8607
8608
8609
8610
8611
8612
8613
8614
8615
8616
8617
8618
8619
8620
8621
8622
8623
8624
8625
8626
8627
8628
8629
8630
8631
8632
8633
8634
8635
8636
8637
8638
8639
8640
8641
8642
8643
8644
8645
8646
8647
8648
8649
8650
8651
8652
8653
8654
8655
8656
8657
8658
8659
8660
8661
8662
8663
8664
8665
8666
8667
8668
8669
8670
8671
8672
8673
8674
8675
8676
8677
8678
8679
8680
8681
8682
8683
8684
8685
8686
8687
8688
8689
8690
8691
8692
8693
8694
8695
8696
8697
8698
8699
8700
8701
8702
8703
8704
8705
8706
8707
8708
8709
8710
8711
8712
8713
8714
8715
8716
8717
8718
8719
8720
8721
8722
8723
8724
8725
8726
8727
8728
8729
8730
8731
8732
8733
8734
8735
8736
8737
8738
8739
8740
8741
8742
8743
8744
8745
8746
8747
8748
8749
8750
8751
8752
8753
8754
8755
8756
8757
8758
8759
8760
8761
8762
8763
8764
8765
8766
8767
8768
8769
8770
8771
8772
8773
8774
8775
8776
8777
8778
8779
8780
8781
8782
8783
8784
8785
8786
8787
8788
8789
8790
8791
8792
8793
8794
8795
8796
8797
8798
8799
8800
8801
8802
8803
8804
8805
8806
8807
8808
8809
8810
8811
8812
8813
8814
8815
8816
8817
8818
8819
8820
8821
8822
8823
8824
8825
8826
8827
8828
8829
8830
8831
8832
8833
8834
8835
8836
8837
8838
8839
8840
8841
8842
8843
8844
8845
8846
8847
8848
8849
8850
8851
8852
8853
8854
8855
8856
8857
8858
8859
8860
8861
8862
8863
8864
8865
8866
8867
8868
8869
8870
8871
8872
8873
8874
8875
8876
8877
8878
8879
8880
8881
8882
8883
8884
8885
8886
8887
8888
8889
8890
8891
8892
8893
8894
8895
8896
8897
8898
8899
8900
8901
8902
8903
8904
8905
8906
8907
8908
8909
8910
8911
8912
8913
8914
8915
8916
8917
8918
8919
8920
8921
8922
8923
8924
8925
8926
8927
8928
8929
8930
8931
8932
8933
8934
8935
8936
8937
8938
8939
8940
8941
8942
8943
8944
8945
8946
8947
8948
8949
8950
8951
8952
8953
8954
8955
8956
8957
8958
8959
8960
8961
8962
8963
8964
8965
8966
8967
8968
8969
8970
8971
8972
8973
8974
8975
8976
8977
8978
8979
8980
8981
8982
8983
8984
8985
8986
8987
8988
8989
8990
8991
8992
8993
8994
8995
8996
8997
8998
8999
9000
9001
9002
9003
9004
9005
9006
9007
9008
9009
9010
9011
9012
9013
9014
9015
9016
9017
9018
9019
9020
9021
9022
9023
9024
9025
9026
9027
9028
9029
9030
9031
9032
9033
9034
9035
9036
9037
9038
9039
9040
9041
9042
9043
9044
9045
9046
9047
9048
9049
9050
9051
9052
9053
9054
9055
9056
9057
9058
9059
9060
9061
9062
9063
9064
9065
9066
9067
9068
9069
9070
9071
9072
9073
9074
9075
9076
9077
9078
9079
9080
9081
9082
9083
9084
9085
9086
9087
9088
9089
9090
9091
9092
9093
9094
9095
9096
9097
9098
9099
9100
9101
9102
9103
9104
9105
9106
9107
9108
9109
9110
9111
9112
9113
9114
9115
9116
9117
9118
9119
9120
9121
9122
9123
9124
9125
9126
9127
9128
9129
9130
9131
9132
9133
9134
9135
9136
9137
9138
9139
9140
9141
9142
9143
9144
9145
9146
9147
9148
9149
9150
9151
9152
9153
9154
9155
9156
9157
9158
9159
9160
9161
9162
9163
9164
9165
9166
9167
9168
9169
9170
9171
9172
9173
9174
9175
9176
9177
9178
9179
9180
9181
9182
9183
9184
9185
9186
9187
9188
9189
9190
9191
9192
9193
9194
9195
9196
9197
9198
9199
9200
9201
9202
9203
9204
9205
9206
9207
9208
/*        $NetBSD: audio.c,v 1.135 2022/08/13 06:47:41 isaki Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1991-1993 Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the Computer Systems
 *        Engineering Group at Lawrence Berkeley Laboratory.
 * 4. Neither the name of the University nor of the Laboratory may be used
 *    to endorse or promote products derived from this software without
 *    specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Terminology: "sample", "channel", "frame", "block", "track":
 *
 *  channel       frame
 *   |           ........
 *   v           :      :                                    \
 *        +------:------:------:-  -+------+ : +------+-..   |
 *  #0(L) |sample|sample|sample| .. |sample| : |sample|      |
 *        +------:------:------:-  -+------+ : +------+-..   |
 *  #1(R) |sample|sample|sample| .. |sample| : |sample|      |
 *        +------:------:------:-  -+------+ : +------+-..   | track
 *   :           :      :                    :               |
 *        +------:------:------:-  -+------+ : +------+-..   |
 *        |sample|sample|sample| .. |sample| : |sample|      |
 *        +------:------:------:-  -+------+ : +------+-..   |
 *               :      :                                    /
 *               ........
 *
 *        \--------------------------------/   \--------..
 *                     block
 *
 * - A "frame" is the minimum unit in the time axis direction, and consists
 *   of samples for the number of channels.
 * - A "block" is basic length of processing.  The audio layer basically
 *   handles audio data stream block by block, asks underlying hardware to
 *   process them block by block, and then the hardware raises interrupt by
 *   each block.
 * - A "track" is single completed audio stream.
 *
 * For example, the hardware block is assumed to be 10 msec, and your audio
 * track consists of 2.1(=3) channels 44.1kHz 16bit PCM,
 *
 * "channel" = 3
 * "sample" = 2 [bytes]
 * "frame" = 2 [bytes/sample] * 3 [channels] = 6 [bytes]
 * "block" = 44100 [Hz] * (10/1000) [seconds] * 6 [bytes/frame] = 2646 [bytes]
 *
 * The terminologies shown here are only for this MI audio layer.  Note that
 * different terminologies may be used in each manufacturer's datasheet, and
 * each MD driver may follow it.  For example, what we call a "block" is
 * called a "frame" in sys/dev/pci/yds.c.
 */

/*
 * Locking: there are three locks per device.
 *
 * - sc_lock, provided by the underlying driver.  This is an adaptive lock,
 *   returned in the second parameter to hw_if->get_locks().  It is known
 *   as the "thread lock".
 *
 *   It serializes access to state in all places except the
 *   driver's interrupt service routine.  This lock is taken from process
 *   context (example: access to /dev/audio).  It is also taken from soft
 *   interrupt handlers in this module, primarily to serialize delivery of
 *   wakeups.  This lock may be used/provided by modules external to the
 *   audio subsystem, so take care not to introduce a lock order problem.
 *   LONG TERM SLEEPS MUST NOT OCCUR WITH THIS LOCK HELD.
 *
 * - sc_intr_lock, provided by the underlying driver.  This may be either a
 *   spinlock (at IPL_SCHED or IPL_VM) or an adaptive lock (IPL_NONE or
 *   IPL_SOFT*), returned in the first parameter to hw_if->get_locks().  It
 *   is known as the "interrupt lock".
 *
 *   It provides atomic access to the device's hardware state, and to audio
 *   channel data that may be accessed by the hardware driver's ISR.
 *   In all places outside the ISR, sc_lock must be held before taking
 *   sc_intr_lock.  This is to ensure that groups of hardware operations are
 *   made atomically.  SLEEPS CANNOT OCCUR WITH THIS LOCK HELD.
 *
 * - sc_exlock, private to this module.  This is a variable protected by
 *   sc_lock.  It is known as the "critical section".
 *   Some operations release sc_lock in order to allocate memory, to wait
 *   for in-flight I/O to complete, to copy to/from user context, etc.
 *   sc_exlock provides a critical section even under the circumstance.
 *   "+" in following list indicates the interfaces which necessary to be
 *   protected by sc_exlock.
 *
 * List of hardware interface methods, and which locks are held when each
 * is called by this module:
 *
 *        METHOD                        INTR        THREAD  NOTES
 *        ----------------------- ------- -------        -------------------------
 *        open                         x        x +
 *        close                         x        x +
 *        query_format                -        x
 *        set_format                -        x
 *        round_blocksize                -        x
 *        commit_settings                -        x
 *        init_output                 x        x
 *        init_input                 x        x
 *        start_output                 x        x +
 *        start_input                 x        x +
 *        halt_output                 x        x +
 *        halt_input                 x        x +
 *        speaker_ctl                 x        x
 *        getdev                         -        -
 *        set_port                 -        x +
 *        get_port                 -        x +
 *        query_devinfo                 -        x
 *        allocm                         -        - +
 *        freem                         -        - +
 *        round_buffersize         -        x
 *        get_props                 -        -        Called at attach time
 *        trigger_output                 x        x +
 *        trigger_input                 x        x +
 *        dev_ioctl                 -        x
 *        get_locks                 -        -        Called at attach time
 *
 * In addition, there is an additional lock.
 *
 * - track->lock.  This is an atomic variable and is similar to the
 *   "interrupt lock".  This is one for each track.  If any thread context
 *   (and software interrupt context) and hardware interrupt context who
 *   want to access some variables on this track, they must acquire this
 *   lock before.  It protects track's consistency between hardware
 *   interrupt context and others.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: audio.c,v 1.135 2022/08/13 06:47:41 isaki Exp $");

#ifdef _KERNEL_OPT
#include "audio.h"
#include "midi.h"
#endif

#if NAUDIO > 0

#include <sys/types.h>
#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/audioio.h>
#include <sys/conf.h>
#include <sys/cpu.h>
#include <sys/device.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/intr.h>
#include <sys/ioctl.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/module.h>
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/select.h>
#include <sys/signalvar.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/syslog.h>
#include <sys/vnode.h>

#include <dev/audio/audio_if.h>
#include <dev/audio/audiovar.h>
#include <dev/audio/audiodef.h>
#include <dev/audio/linear.h>
#include <dev/audio/mulaw.h>

#include <machine/endian.h>

#include <uvm/uvm_extern.h>

#include "ioconf.h"

/*
 * 0: No debug logs
 * 1: action changes like open/close/set_format/mmap...
 * 2: + normal operations like read/write/ioctl...
 * 3: + TRACEs except interrupt
 * 4: + TRACEs including interrupt
 */
//#define AUDIO_DEBUG 1

#if defined(AUDIO_DEBUG)

int audiodebug = AUDIO_DEBUG;
static void audio_vtrace(struct audio_softc *sc, const char *, const char *,
        const char *, va_list);
static void audio_trace(struct audio_softc *sc, const char *, const char *, ...)
        __printflike(3, 4);
static void audio_tracet(const char *, audio_track_t *, const char *, ...)
        __printflike(3, 4);
static void audio_tracef(const char *, audio_file_t *, const char *, ...)
        __printflike(3, 4);

/* XXX sloppy memory logger */
static void audio_mlog_init(void);
static void audio_mlog_free(void);
static void audio_mlog_softintr(void *);
extern void audio_mlog_flush(void);
extern void audio_mlog_printf(const char *, ...);

static int mlog_refs;                /* reference counter */
static char *mlog_buf[2];        /* double buffer */
static int mlog_buflen;                /* buffer length */
static int mlog_used;                /* used length */
static int mlog_full;                /* number of dropped lines by buffer full */
static int mlog_drop;                /* number of dropped lines by busy */
static volatile uint32_t mlog_inuse;        /* in-use */
static int mlog_wpage;                /* active page */
static void *mlog_sih;                /* softint handle */

static void
audio_mlog_init(void)
{
        mlog_refs++;
        if (mlog_refs > 1)
                return;
        mlog_buflen = 4096;
        mlog_buf[0] = kmem_zalloc(mlog_buflen, KM_SLEEP);
        mlog_buf[1] = kmem_zalloc(mlog_buflen, KM_SLEEP);
        mlog_used = 0;
        mlog_full = 0;
        mlog_drop = 0;
        mlog_inuse = 0;
        mlog_wpage = 0;
        mlog_sih = softint_establish(SOFTINT_SERIAL, audio_mlog_softintr, NULL);
        if (mlog_sih == NULL)
                printf("%s: softint_establish failed\n", __func__);
}

static void
audio_mlog_free(void)
{
        mlog_refs--;
        if (mlog_refs > 0)
                return;

        audio_mlog_flush();
        if (mlog_sih)
                softint_disestablish(mlog_sih);
        kmem_free(mlog_buf[0], mlog_buflen);
        kmem_free(mlog_buf[1], mlog_buflen);
}

/*
 * Flush memory buffer.
 * It must not be called from hardware interrupt context.
 */
void
audio_mlog_flush(void)
{
        if (mlog_refs == 0)
                return;

        /* Nothing to do if already in use ? */
        if (atomic_swap_32(&mlog_inuse, 1) == 1)
                return;
        membar_acquire();

        int rpage = mlog_wpage;
        mlog_wpage ^= 1;
        mlog_buf[mlog_wpage][0] = '\0';
        mlog_used = 0;

        atomic_store_release(&mlog_inuse, 0);

        if (mlog_buf[rpage][0] != '\0') {
                printf("%s", mlog_buf[rpage]);
                if (mlog_drop > 0)
                        printf("mlog_drop %d\n", mlog_drop);
                if (mlog_full > 0)
                        printf("mlog_full %d\n", mlog_full);
        }
        mlog_full = 0;
        mlog_drop = 0;
}

static void
audio_mlog_softintr(void *cookie)
{
        audio_mlog_flush();
}

void
audio_mlog_printf(const char *fmt, ...)
{
        int len;
        va_list ap;

        if (atomic_swap_32(&mlog_inuse, 1) == 1) {
                /* already inuse */
                mlog_drop++;
                return;
        }
        membar_acquire();

        va_start(ap, fmt);
        len = vsnprintf(
            mlog_buf[mlog_wpage] + mlog_used,
            mlog_buflen - mlog_used,
            fmt, ap);
        va_end(ap);

        mlog_used += len;
        if (mlog_buflen - mlog_used <= 1) {
                mlog_full++;
        }

        atomic_store_release(&mlog_inuse, 0);

        if (mlog_sih)
                softint_schedule(mlog_sih);
}

/* trace functions */
static void
audio_vtrace(struct audio_softc *sc, const char *funcname, const char *header,
        const char *fmt, va_list ap)
{
        char buf[256];
        int n;

        n = 0;
        buf[0] = '\0';
        n += snprintf(buf + n, sizeof(buf) - n, "%s@%d %s",
            funcname, device_unit(sc->sc_dev), header);
        n += vsnprintf(buf + n, sizeof(buf) - n, fmt, ap);

        if (cpu_intr_p()) {
                audio_mlog_printf("%s\n", buf);
        } else {
                audio_mlog_flush();
                printf("%s\n", buf);
        }
}

static void
audio_trace(struct audio_softc *sc, const char *funcname, const char *fmt, ...)
{
        va_list ap;

        va_start(ap, fmt);
        audio_vtrace(sc, funcname, "", fmt, ap);
        va_end(ap);
}

static void
audio_tracet(const char *funcname, audio_track_t *track, const char *fmt, ...)
{
        char hdr[16];
        va_list ap;

        snprintf(hdr, sizeof(hdr), "#%d ", track->id);
        va_start(ap, fmt);
        audio_vtrace(track->mixer->sc, funcname, hdr, fmt, ap);
        va_end(ap);
}

static void
audio_tracef(const char *funcname, audio_file_t *file, const char *fmt, ...)
{
        char hdr[32];
        char phdr[16], rhdr[16];
        va_list ap;

        phdr[0] = '\0';
        rhdr[0] = '\0';
        if (file->ptrack)
                snprintf(phdr, sizeof(phdr), "#%d", file->ptrack->id);
        if (file->rtrack)
                snprintf(rhdr, sizeof(rhdr), "#%d", file->rtrack->id);
        snprintf(hdr, sizeof(hdr), "{%s,%s} ", phdr, rhdr);

        va_start(ap, fmt);
        audio_vtrace(file->sc, funcname, hdr, fmt, ap);
        va_end(ap);
}

#define DPRINTF(n, fmt...)        do {        \
        if (audiodebug >= (n)) {        \
                audio_mlog_flush();        \
                printf(fmt);                \
        }                                \
} while (0)
#define TRACE(n, fmt...)        do { \
        if (audiodebug >= (n)) audio_trace(sc, __func__, fmt); \
} while (0)
#define TRACET(n, t, fmt...)        do { \
        if (audiodebug >= (n)) audio_tracet(__func__, t, fmt); \
} while (0)
#define TRACEF(n, f, fmt...)        do { \
        if (audiodebug >= (n)) audio_tracef(__func__, f, fmt); \
} while (0)

struct audio_track_debugbuf {
        char usrbuf[32];
        char codec[32];
        char chvol[32];
        char chmix[32];
        char freq[32];
        char outbuf[32];
};

static void
audio_track_bufstat(audio_track_t *track, struct audio_track_debugbuf *buf)
{

        memset(buf, 0, sizeof(*buf));

        snprintf(buf->outbuf, sizeof(buf->outbuf), " out=%d/%d/%d",
            track->outbuf.head, track->outbuf.used, track->outbuf.capacity);
        if (track->freq.filter)
                snprintf(buf->freq, sizeof(buf->freq), " f=%d/%d/%d",
                    track->freq.srcbuf.head,
                    track->freq.srcbuf.used,
                    track->freq.srcbuf.capacity);
        if (track->chmix.filter)
                snprintf(buf->chmix, sizeof(buf->chmix), " m=%d",
                    track->chmix.srcbuf.used);
        if (track->chvol.filter)
                snprintf(buf->chvol, sizeof(buf->chvol), " v=%d",
                    track->chvol.srcbuf.used);
        if (track->codec.filter)
                snprintf(buf->codec, sizeof(buf->codec), " e=%d",
                    track->codec.srcbuf.used);
        snprintf(buf->usrbuf, sizeof(buf->usrbuf), " usr=%d/%d/H%d",
            track->usrbuf.head, track->usrbuf.used, track->usrbuf_usedhigh);
}
#else
#define DPRINTF(n, fmt...)        do { } while (0)
#define TRACE(n, fmt, ...)        do { } while (0)
#define TRACET(n, t, fmt, ...)        do { } while (0)
#define TRACEF(n, f, fmt, ...)        do { } while (0)
#endif

#define SPECIFIED(x)        ((x) != ~0)
#define SPECIFIED_CH(x)        ((x) != (u_char)~0)

/*
 * Default hardware blocksize in msec.
 *
 * We use 10 msec for most modern platforms.  This period is good enough to
 * play audio and video synchronizely.
 * In contrast, for very old platforms, this is usually too short and too
 * severe.  Also such platforms usually can not play video confortably, so
 * it's not so important to make the blocksize shorter.  If the platform
 * defines its own value as __AUDIO_BLK_MS in its <machine/param.h>, it
 * uses this instead.
 *
 * In either case, you can overwrite AUDIO_BLK_MS by your kernel
 * configuration file if you wish.
 */
#if !defined(AUDIO_BLK_MS)
# if defined(__AUDIO_BLK_MS)
#  define AUDIO_BLK_MS __AUDIO_BLK_MS
# else
#  define AUDIO_BLK_MS (10)
# endif
#endif

/* Device timeout in msec */
#define AUDIO_TIMEOUT        (3000)

/* #define AUDIO_PM_IDLE */
#ifdef AUDIO_PM_IDLE
int audio_idle_timeout = 30;
#endif

/* Number of elements of async mixer's pid */
#define AM_CAPACITY        (4)

struct portname {
        const char *name;
        int mask;
};

static int audiomatch(device_t, cfdata_t, void *);
static void audioattach(device_t, device_t, void *);
static int audiodetach(device_t, int);
static int audioactivate(device_t, enum devact);
static void audiochilddet(device_t, device_t);
static int audiorescan(device_t, const char *, const int *);

static int audio_modcmd(modcmd_t, void *);

#ifdef AUDIO_PM_IDLE
static void audio_idle(void *);
static void audio_activity(device_t, devactive_t);
#endif

static bool audio_suspend(device_t dv, const pmf_qual_t *);
static bool audio_resume(device_t dv, const pmf_qual_t *);
static void audio_volume_down(device_t);
static void audio_volume_up(device_t);
static void audio_volume_toggle(device_t);

static void audio_mixer_capture(struct audio_softc *);
static void audio_mixer_restore(struct audio_softc *);

static void audio_softintr_rd(void *);
static void audio_softintr_wr(void *);

static void audio_printf(struct audio_softc *, const char *, ...)
        __printflike(2, 3);
static int audio_exlock_mutex_enter(struct audio_softc *);
static void audio_exlock_mutex_exit(struct audio_softc *);
static int audio_exlock_enter(struct audio_softc *);
static void audio_exlock_exit(struct audio_softc *);
static struct audio_softc *audio_sc_acquire_fromfile(audio_file_t *,
        struct psref *);
static void audio_sc_release(struct audio_softc *, struct psref *);
static int audio_track_waitio(struct audio_softc *, audio_track_t *);

static int audioclose(struct file *);
static int audioread(struct file *, off_t *, struct uio *, kauth_cred_t, int);
static int audiowrite(struct file *, off_t *, struct uio *, kauth_cred_t, int);
static int audioioctl(struct file *, u_long, void *);
static int audiopoll(struct file *, int);
static int audiokqfilter(struct file *, struct knote *);
static int audiommap(struct file *, off_t *, size_t, int, int *, int *,
        struct uvm_object **, int *);
static int audiostat(struct file *, struct stat *);

static void filt_audiowrite_detach(struct knote *);
static int  filt_audiowrite_event(struct knote *, long);
static void filt_audioread_detach(struct knote *);
static int  filt_audioread_event(struct knote *, long);

static int audio_open(dev_t, struct audio_softc *, int, int, struct lwp *,
        audio_file_t **);
static int audio_close(struct audio_softc *, audio_file_t *);
static void audio_unlink(struct audio_softc *, audio_file_t *);
static int audio_read(struct audio_softc *, struct uio *, int, audio_file_t *);
static int audio_write(struct audio_softc *, struct uio *, int, audio_file_t *);
static void audio_file_clear(struct audio_softc *, audio_file_t *);
static int audio_ioctl(dev_t, struct audio_softc *, u_long, void *, int,
        struct lwp *, audio_file_t *);
static int audio_poll(struct audio_softc *, int, struct lwp *, audio_file_t *);
static int audio_kqfilter(struct audio_softc *, audio_file_t *, struct knote *);
static int audio_mmap(struct audio_softc *, off_t *, size_t, int, int *, int *,
        struct uvm_object **, int *, audio_file_t *);

static int audioctl_open(dev_t, struct audio_softc *, int, int, struct lwp *);

static void audio_pintr(void *);
static void audio_rintr(void *);

static int audio_query_devinfo(struct audio_softc *, mixer_devinfo_t *);

static int audio_track_inputblk_as_usrbyte(const audio_track_t *, int);
static int audio_track_readablebytes(const audio_track_t *);
static int audio_file_setinfo(struct audio_softc *, audio_file_t *,
        const struct audio_info *);
static int audio_track_setinfo_check(audio_track_t *,
        audio_format2_t *, const struct audio_prinfo *);
static void audio_track_setinfo_water(audio_track_t *,
        const struct audio_info *);
static int audio_hw_setinfo(struct audio_softc *, const struct audio_info *,
        struct audio_info *);
static int audio_hw_set_format(struct audio_softc *, int,
        const audio_format2_t *, const audio_format2_t *,
        audio_filter_reg_t *, audio_filter_reg_t *);
static int audiogetinfo(struct audio_softc *, struct audio_info *, int,
        audio_file_t *);
static bool audio_can_playback(struct audio_softc *);
static bool audio_can_capture(struct audio_softc *);
static int audio_check_params(audio_format2_t *);
static int audio_mixers_init(struct audio_softc *sc, int,
        const audio_format2_t *, const audio_format2_t *,
        const audio_filter_reg_t *, const audio_filter_reg_t *);
static int audio_select_freq(const struct audio_format *);
static int audio_hw_probe(struct audio_softc *, audio_format2_t *, int);
static int audio_hw_validate_format(struct audio_softc *, int,
        const audio_format2_t *);
static int audio_mixers_set_format(struct audio_softc *,
        const struct audio_info *);
static void audio_mixers_get_format(struct audio_softc *, struct audio_info *);
static int audio_sysctl_blk_ms(SYSCTLFN_PROTO);
static int audio_sysctl_multiuser(SYSCTLFN_PROTO);
#if defined(AUDIO_DEBUG)
static int audio_sysctl_debug(SYSCTLFN_PROTO);
static void audio_format2_tostr(char *, size_t, const audio_format2_t *);
static void audio_print_format2(const char *, const audio_format2_t *) __unused;
#endif

static void *audio_realloc(void *, size_t);
static void audio_free_usrbuf(audio_track_t *);

static audio_track_t *audio_track_create(struct audio_softc *,
        audio_trackmixer_t *);
static void audio_track_destroy(audio_track_t *);
static audio_filter_t audio_track_get_codec(audio_track_t *,
        const audio_format2_t *, const audio_format2_t *);
static int audio_track_set_format(audio_track_t *, audio_format2_t *);
static void audio_track_play(audio_track_t *);
static int audio_track_drain(struct audio_softc *, audio_track_t *);
static void audio_track_record(audio_track_t *);
static void audio_track_clear(struct audio_softc *, audio_track_t *);

static int audio_mixer_init(struct audio_softc *, int,
        const audio_format2_t *, const audio_filter_reg_t *);
static void audio_mixer_destroy(struct audio_softc *, audio_trackmixer_t *);
static void audio_pmixer_start(struct audio_softc *, bool);
static void audio_pmixer_process(struct audio_softc *);
static void audio_pmixer_agc(audio_trackmixer_t *, int);
static int  audio_pmixer_mix_track(audio_trackmixer_t *, audio_track_t *, int);
static void audio_pmixer_output(struct audio_softc *);
static int  audio_pmixer_halt(struct audio_softc *);
static void audio_rmixer_start(struct audio_softc *);
static void audio_rmixer_process(struct audio_softc *);
static void audio_rmixer_input(struct audio_softc *);
static int  audio_rmixer_halt(struct audio_softc *);

static void mixer_init(struct audio_softc *);
static int mixer_open(dev_t, struct audio_softc *, int, int, struct lwp *);
static int mixer_close(struct audio_softc *, audio_file_t *);
static int mixer_ioctl(struct audio_softc *, u_long, void *, int, struct lwp *);
static void mixer_async_add(struct audio_softc *, pid_t);
static void mixer_async_remove(struct audio_softc *, pid_t);
static void mixer_signal(struct audio_softc *);

static int au_portof(struct audio_softc *, char *, int);

static void au_setup_ports(struct audio_softc *, struct au_mixer_ports *,
        mixer_devinfo_t *, const struct portname *);
static int au_set_lr_value(struct audio_softc *, mixer_ctrl_t *, int, int);
static int au_get_lr_value(struct audio_softc *, mixer_ctrl_t *, int *, int *);
static int au_set_gain(struct audio_softc *, struct au_mixer_ports *, int, int);
static void au_get_gain(struct audio_softc *, struct au_mixer_ports *,
        u_int *, u_char *);
static int au_set_port(struct audio_softc *, struct au_mixer_ports *, u_int);
static int au_get_port(struct audio_softc *, struct au_mixer_ports *);
static int au_set_monitor_gain(struct audio_softc *, int);
static int au_get_monitor_gain(struct audio_softc *);
static int audio_get_port(struct audio_softc *, mixer_ctrl_t *);
static int audio_set_port(struct audio_softc *, mixer_ctrl_t *);

static __inline struct audio_params
format2_to_params(const audio_format2_t *f2)
{
        audio_params_t p;

        /* validbits/precision <-> precision/stride */
        p.sample_rate = f2->sample_rate;
        p.channels    = f2->channels;
        p.encoding    = f2->encoding;
        p.validbits   = f2->precision;
        p.precision   = f2->stride;
        return p;
}

static __inline audio_format2_t
params_to_format2(const struct audio_params *p)
{
        audio_format2_t f2;

        /* precision/stride <-> validbits/precision */
        f2.sample_rate = p->sample_rate;
        f2.channels    = p->channels;
        f2.encoding    = p->encoding;
        f2.precision   = p->validbits;
        f2.stride      = p->precision;
        return f2;
}

/* Return true if this track is a playback track. */
static __inline bool
audio_track_is_playback(const audio_track_t *track)
{

        return ((track->mode & AUMODE_PLAY) != 0);
}

#if 0
/* Return true if this track is a recording track. */
static __inline bool
audio_track_is_record(const audio_track_t *track)
{

        return ((track->mode & AUMODE_RECORD) != 0);
}
#endif

#if 0 /* XXX Not used yet */
/*
 * Convert 0..255 volume used in userland to internal presentation 0..256.
 */
static __inline u_int
audio_volume_to_inner(u_int v)
{

        return v < 127 ? v : v + 1;
}

/*
 * Convert 0..256 internal presentation to 0..255 volume used in userland.
 */
static __inline u_int
audio_volume_to_outer(u_int v)
{

        return v < 127 ? v : v - 1;
}
#endif /* 0 */

static dev_type_open(audioopen);
/* XXXMRG use more dev_type_xxx */

static int
audiounit(dev_t dev)
{

        return AUDIOUNIT(dev);
}

const struct cdevsw audio_cdevsw = {
        .d_open = audioopen,
        .d_close = noclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = noioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_cfdriver = &audio_cd,
        .d_devtounit = audiounit,
        .d_flag = D_OTHER | D_MPSAFE
};

const struct fileops audio_fileops = {
        .fo_name = "audio",
        .fo_read = audioread,
        .fo_write = audiowrite,
        .fo_ioctl = audioioctl,
        .fo_fcntl = fnullop_fcntl,
        .fo_stat = audiostat,
        .fo_poll = audiopoll,
        .fo_close = audioclose,
        .fo_mmap = audiommap,
        .fo_kqfilter = audiokqfilter,
        .fo_restart = fnullop_restart
};

/* The default audio mode: 8 kHz mono mu-law */
static const struct audio_params audio_default = {
        .sample_rate = 8000,
        .encoding = AUDIO_ENCODING_ULAW,
        .precision = 8,
        .validbits = 8,
        .channels = 1,
};

static const char *encoding_names[] = {
        "none",
        AudioEmulaw,
        AudioEalaw,
        "pcm16",
        "pcm8",
        AudioEadpcm,
        AudioEslinear_le,
        AudioEslinear_be,
        AudioEulinear_le,
        AudioEulinear_be,
        AudioEslinear,
        AudioEulinear,
        AudioEmpeg_l1_stream,
        AudioEmpeg_l1_packets,
        AudioEmpeg_l1_system,
        AudioEmpeg_l2_stream,
        AudioEmpeg_l2_packets,
        AudioEmpeg_l2_system,
        AudioEac3,
};

/*
 * Returns encoding name corresponding to AUDIO_ENCODING_*.
 * Note that it may return a local buffer because it is mainly for debugging.
 */
const char *
audio_encoding_name(int encoding)
{
        static char buf[16];

        if (0 <= encoding && encoding < __arraycount(encoding_names)) {
                return encoding_names[encoding];
        } else {
                snprintf(buf, sizeof(buf), "enc=%d", encoding);
                return buf;
        }
}

/*
 * Supported encodings used by AUDIO_GETENC.
 * index and flags are set by code.
 * XXX is there any needs for SLINEAR_OE:>=16/ULINEAR_OE:>=16 ?
 */
static const audio_encoding_t audio_encodings[] = {
        { 0, AudioEmulaw,        AUDIO_ENCODING_ULAW,                8,  0 },
        { 0, AudioEalaw,        AUDIO_ENCODING_ALAW,                8,  0 },
        { 0, AudioEslinear,        AUDIO_ENCODING_SLINEAR,                8,  0 },
        { 0, AudioEulinear,        AUDIO_ENCODING_ULINEAR,                8,  0 },
        { 0, AudioEslinear_le,        AUDIO_ENCODING_SLINEAR_LE,        16, 0 },
        { 0, AudioEulinear_le,        AUDIO_ENCODING_ULINEAR_LE,        16, 0 },
        { 0, AudioEslinear_be,        AUDIO_ENCODING_SLINEAR_BE,        16, 0 },
        { 0, AudioEulinear_be,        AUDIO_ENCODING_ULINEAR_BE,        16, 0 },
#if defined(AUDIO_SUPPORT_LINEAR24)
        { 0, AudioEslinear_le,        AUDIO_ENCODING_SLINEAR_LE,        24, 0 },
        { 0, AudioEulinear_le,        AUDIO_ENCODING_ULINEAR_LE,        24, 0 },
        { 0, AudioEslinear_be,        AUDIO_ENCODING_SLINEAR_BE,        24, 0 },
        { 0, AudioEulinear_be,        AUDIO_ENCODING_ULINEAR_BE,        24, 0 },
#endif
        { 0, AudioEslinear_le,        AUDIO_ENCODING_SLINEAR_LE,        32, 0 },
        { 0, AudioEulinear_le,        AUDIO_ENCODING_ULINEAR_LE,        32, 0 },
        { 0, AudioEslinear_be,        AUDIO_ENCODING_SLINEAR_BE,        32, 0 },
        { 0, AudioEulinear_be,        AUDIO_ENCODING_ULINEAR_BE,        32, 0 },
};

static const struct portname itable[] = {
        { AudioNmicrophone,        AUDIO_MICROPHONE },
        { AudioNline,                AUDIO_LINE_IN },
        { AudioNcd,                AUDIO_CD },
        { 0, 0 }
};
static const struct portname otable[] = {
        { AudioNspeaker,        AUDIO_SPEAKER },
        { AudioNheadphone,        AUDIO_HEADPHONE },
        { AudioNline,                AUDIO_LINE_OUT },
        { 0, 0 }
};

static struct psref_class *audio_psref_class __read_mostly;

CFATTACH_DECL3_NEW(audio, sizeof(struct audio_softc),
    audiomatch, audioattach, audiodetach, audioactivate, audiorescan,
    audiochilddet, DVF_DETACH_SHUTDOWN);

static int
audiomatch(device_t parent, cfdata_t match, void *aux)
{
        struct audio_attach_args *sa;

        sa = aux;
        DPRINTF(1, "%s: type=%d sa=%p hw=%p\n",
             __func__, sa->type, sa, sa->hwif);
        return (sa->type == AUDIODEV_TYPE_AUDIO) ? 1 : 0;
}

static void
audioattach(device_t parent, device_t self, void *aux)
{
        struct audio_softc *sc;
        struct audio_attach_args *sa;
        const struct audio_hw_if *hw_if;
        audio_format2_t phwfmt;
        audio_format2_t rhwfmt;
        audio_filter_reg_t pfil;
        audio_filter_reg_t rfil;
        const struct sysctlnode *node;
        void *hdlp;
        bool has_playback;
        bool has_capture;
        bool has_indep;
        bool has_fulldup;
        int mode;
        int error;

        sc = device_private(self);
        sc->sc_dev = self;
        sa = (struct audio_attach_args *)aux;
        hw_if = sa->hwif;
        hdlp = sa->hdl;

        if (hw_if == NULL) {
                panic("audioattach: missing hw_if method");
        }
        if (hw_if->get_locks == NULL || hw_if->get_props == NULL) {
                aprint_error(": missing mandatory method\n");
                return;
        }

        hw_if->get_locks(hdlp, &sc->sc_intr_lock, &sc->sc_lock);
        sc->sc_props = hw_if->get_props(hdlp);

        has_playback = (sc->sc_props & AUDIO_PROP_PLAYBACK);
        has_capture  = (sc->sc_props & AUDIO_PROP_CAPTURE);
        has_indep    = (sc->sc_props & AUDIO_PROP_INDEPENDENT);
        has_fulldup  = (sc->sc_props & AUDIO_PROP_FULLDUPLEX);

#ifdef DIAGNOSTIC
        if (hw_if->query_format == NULL ||
            hw_if->set_format == NULL ||
            hw_if->getdev == NULL ||
            hw_if->set_port == NULL ||
            hw_if->get_port == NULL ||
            hw_if->query_devinfo == NULL) {
                aprint_error(": missing mandatory method\n");
                return;
        }
        if (has_playback) {
                if ((hw_if->start_output == NULL &&
                     hw_if->trigger_output == NULL) ||
                    hw_if->halt_output == NULL) {
                        aprint_error(": missing playback method\n");
                }
        }
        if (has_capture) {
                if ((hw_if->start_input == NULL &&
                     hw_if->trigger_input == NULL) ||
                    hw_if->halt_input == NULL) {
                        aprint_error(": missing capture method\n");
                }
        }
#endif

        sc->hw_if = hw_if;
        sc->hw_hdl = hdlp;
        sc->hw_dev = parent;

        sc->sc_exlock = 1;
        sc->sc_blk_ms = AUDIO_BLK_MS;
        SLIST_INIT(&sc->sc_files);
        cv_init(&sc->sc_exlockcv, "audiolk");
        sc->sc_am_capacity = 0;
        sc->sc_am_used = 0;
        sc->sc_am = NULL;

        /* MMAP is now supported by upper layer.  */
        sc->sc_props |= AUDIO_PROP_MMAP;

        KASSERT(has_playback || has_capture);
        /* Unidirectional device must have neither FULLDUP nor INDEPENDENT. */
        if (!has_playback || !has_capture) {
                KASSERT(!has_indep);
                KASSERT(!has_fulldup);
        }

        mode = 0;
        if (has_playback) {
                aprint_normal(": playback");
                mode |= AUMODE_PLAY;
        }
        if (has_capture) {
                aprint_normal("%c capture", has_playback ? ',' : ':');
                mode |= AUMODE_RECORD;
        }
        if (has_playback && has_capture) {
                if (has_fulldup)
                        aprint_normal(", full duplex");
                else
                        aprint_normal(", half duplex");

                if (has_indep)
                        aprint_normal(", independent");
        }

        aprint_naive("\n");
        aprint_normal("\n");

        /* probe hw params */
        memset(&phwfmt, 0, sizeof(phwfmt));
        memset(&rhwfmt, 0, sizeof(rhwfmt));
        memset(&pfil, 0, sizeof(pfil));
        memset(&rfil, 0, sizeof(rfil));
        if (has_indep) {
                int perror, rerror;

                /* On independent devices, probe separately. */
                perror = audio_hw_probe(sc, &phwfmt, AUMODE_PLAY);
                rerror = audio_hw_probe(sc, &rhwfmt, AUMODE_RECORD);
                if (perror && rerror) {
                        aprint_error_dev(self,
                            "audio_hw_probe failed: perror=%d, rerror=%d\n",
                            perror, rerror);
                        goto bad;
                }
                if (perror) {
                        mode &= ~AUMODE_PLAY;
                        aprint_error_dev(self, "audio_hw_probe failed: "
                            "errno=%d, playback disabled\n", perror);
                }
                if (rerror) {
                        mode &= ~AUMODE_RECORD;
                        aprint_error_dev(self, "audio_hw_probe failed: "
                            "errno=%d, capture disabled\n", rerror);
                }
        } else {
                /*
                 * On non independent devices or uni-directional devices,
                 * probe once (simultaneously).
                 */
                audio_format2_t *fmt = has_playback ? &phwfmt : &rhwfmt;
                error = audio_hw_probe(sc, fmt, mode);
                if (error) {
                        aprint_error_dev(self,
                            "audio_hw_probe failed: errno=%d\n", error);
                        goto bad;
                }
                if (has_playback && has_capture)
                        rhwfmt = phwfmt;
        }

        /* Init hardware. */
        /* hw_probe() also validates [pr]hwfmt.  */
        error = audio_hw_set_format(sc, mode, &phwfmt, &rhwfmt, &pfil, &rfil);
        if (error) {
                aprint_error_dev(self,
                    "audio_hw_set_format failed: errno=%d\n", error);
                goto bad;
        }

        /*
         * Init track mixers.  If at least one direction is available on
         * attach time, we assume a success.
         */
        error = audio_mixers_init(sc, mode, &phwfmt, &rhwfmt, &pfil, &rfil);
        if (sc->sc_pmixer == NULL && sc->sc_rmixer == NULL) {
                aprint_error_dev(self,
                    "audio_mixers_init failed: errno=%d\n", error);
                goto bad;
        }

        sc->sc_psz = pserialize_create();
        psref_target_init(&sc->sc_psref, audio_psref_class);

        selinit(&sc->sc_wsel);
        selinit(&sc->sc_rsel);

        /* Initial parameter of /dev/sound */
        sc->sc_sound_pparams = params_to_format2(&audio_default);
        sc->sc_sound_rparams = params_to_format2(&audio_default);
        sc->sc_sound_ppause = false;
        sc->sc_sound_rpause = false;

        /* XXX TODO: consider about sc_ai */

        mixer_init(sc);
        TRACE(2, "inputs ports=0x%x, input master=%d, "
            "output ports=0x%x, output master=%d",
            sc->sc_inports.allports, sc->sc_inports.master,
            sc->sc_outports.allports, sc->sc_outports.master);

        sysctl_createv(&sc->sc_log, 0, NULL, &node,
            0,
            CTLTYPE_NODE, device_xname(sc->sc_dev),
            SYSCTL_DESCR("audio test"),
            NULL, 0,
            NULL, 0,
            CTL_HW,
            CTL_CREATE, CTL_EOL);

        if (node != NULL) {
                sysctl_createv(&sc->sc_log, 0, NULL, NULL,
                    CTLFLAG_READWRITE,
                    CTLTYPE_INT, "blk_ms",
                    SYSCTL_DESCR("blocksize in msec"),
                    audio_sysctl_blk_ms, 0, (void *)sc, 0,
                    CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);

                sysctl_createv(&sc->sc_log, 0, NULL, NULL,
                    CTLFLAG_READWRITE,
                    CTLTYPE_BOOL, "multiuser",
                    SYSCTL_DESCR("allow multiple user access"),
                    audio_sysctl_multiuser, 0, (void *)sc, 0,
                    CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);

#if defined(AUDIO_DEBUG)
                sysctl_createv(&sc->sc_log, 0, NULL, NULL,
                    CTLFLAG_READWRITE,
                    CTLTYPE_INT, "debug",
                    SYSCTL_DESCR("debug level (0..4)"),
                    audio_sysctl_debug, 0, (void *)sc, 0,
                    CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL);
#endif
        }

#ifdef AUDIO_PM_IDLE
        callout_init(&sc->sc_idle_counter, 0);
        callout_setfunc(&sc->sc_idle_counter, audio_idle, self);
#endif

        if (!pmf_device_register(self, audio_suspend, audio_resume))
                aprint_error_dev(self, "couldn't establish power handler\n");
#ifdef AUDIO_PM_IDLE
        if (!device_active_register(self, audio_activity))
                aprint_error_dev(self, "couldn't register activity handler\n");
#endif

        if (!pmf_event_register(self, PMFE_AUDIO_VOLUME_DOWN,
            audio_volume_down, true))
                aprint_error_dev(self, "couldn't add volume down handler\n");
        if (!pmf_event_register(self, PMFE_AUDIO_VOLUME_UP,
            audio_volume_up, true))
                aprint_error_dev(self, "couldn't add volume up handler\n");
        if (!pmf_event_register(self, PMFE_AUDIO_VOLUME_TOGGLE,
            audio_volume_toggle, true))
                aprint_error_dev(self, "couldn't add volume toggle handler\n");

#ifdef AUDIO_PM_IDLE
        callout_schedule(&sc->sc_idle_counter, audio_idle_timeout * hz);
#endif

#if defined(AUDIO_DEBUG)
        audio_mlog_init();
#endif

        audiorescan(self, NULL, NULL);
        sc->sc_exlock = 0;
        return;

bad:
        /* Clearing hw_if means that device is attached but disabled. */
        sc->hw_if = NULL;
        sc->sc_exlock = 0;
        aprint_error_dev(sc->sc_dev, "disabled\n");
        return;
}

/*
 * Initialize hardware mixer.
 * This function is called from audioattach().
 */
static void
mixer_init(struct audio_softc *sc)
{
        mixer_devinfo_t mi;
        int iclass, mclass, oclass, rclass;
        int record_master_found, record_source_found;

        iclass = mclass = oclass = rclass = -1;
        sc->sc_inports.index = -1;
        sc->sc_inports.master = -1;
        sc->sc_inports.nports = 0;
        sc->sc_inports.isenum = false;
        sc->sc_inports.allports = 0;
        sc->sc_inports.isdual = false;
        sc->sc_inports.mixerout = -1;
        sc->sc_inports.cur_port = -1;
        sc->sc_outports.index = -1;
        sc->sc_outports.master = -1;
        sc->sc_outports.nports = 0;
        sc->sc_outports.isenum = false;
        sc->sc_outports.allports = 0;
        sc->sc_outports.isdual = false;
        sc->sc_outports.mixerout = -1;
        sc->sc_outports.cur_port = -1;
        sc->sc_monitor_port = -1;
        /*
         * Read through the underlying driver's list, picking out the class
         * names from the mixer descriptions. We'll need them to decode the
         * mixer descriptions on the next pass through the loop.
         */
        mutex_enter(sc->sc_lock);
        for(mi.index = 0; ; mi.index++) {
                if (audio_query_devinfo(sc, &mi) != 0)
                        break;
                 /*
                  * The type of AUDIO_MIXER_CLASS merely introduces a class.
                  * All the other types describe an actual mixer.
                  */
                if (mi.type == AUDIO_MIXER_CLASS) {
                        if (strcmp(mi.label.name, AudioCinputs) == 0)
                                iclass = mi.mixer_class;
                        if (strcmp(mi.label.name, AudioCmonitor) == 0)
                                mclass = mi.mixer_class;
                        if (strcmp(mi.label.name, AudioCoutputs) == 0)
                                oclass = mi.mixer_class;
                        if (strcmp(mi.label.name, AudioCrecord) == 0)
                                rclass = mi.mixer_class;
                }
        }
        mutex_exit(sc->sc_lock);

        /* Allocate save area.  Ensure non-zero allocation. */
        sc->sc_nmixer_states = mi.index;
        sc->sc_mixer_state = kmem_zalloc(sizeof(sc->sc_mixer_state[0]) *
            (sc->sc_nmixer_states + 1), KM_SLEEP);

        /*
         * This is where we assign each control in the "audio" model, to the
         * underlying "mixer" control.  We walk through the whole list once,
         * assigning likely candidates as we come across them.
         */
        record_master_found = 0;
        record_source_found = 0;
        mutex_enter(sc->sc_lock);
        for(mi.index = 0; ; mi.index++) {
                if (audio_query_devinfo(sc, &mi) != 0)
                        break;
                KASSERT(mi.index < sc->sc_nmixer_states);
                if (mi.type == AUDIO_MIXER_CLASS)
                        continue;
                if (mi.mixer_class == iclass) {
                        /*
                         * AudioCinputs is only a fallback, when we don't
                         * find what we're looking for in AudioCrecord, so
                         * check the flags before accepting one of these.
                         */
                        if (strcmp(mi.label.name, AudioNmaster) == 0
                            && record_master_found == 0)
                                sc->sc_inports.master = mi.index;
                        if (strcmp(mi.label.name, AudioNsource) == 0
                            && record_source_found == 0) {
                                if (mi.type == AUDIO_MIXER_ENUM) {
                                    int i;
                                    for(i = 0; i < mi.un.e.num_mem; i++)
                                        if (strcmp(mi.un.e.member[i].label.name,
                                                    AudioNmixerout) == 0)
                                                sc->sc_inports.mixerout =
                                                    mi.un.e.member[i].ord;
                                }
                                au_setup_ports(sc, &sc->sc_inports, &mi,
                                    itable);
                        }
                        if (strcmp(mi.label.name, AudioNdac) == 0 &&
                            sc->sc_outports.master == -1)
                                sc->sc_outports.master = mi.index;
                } else if (mi.mixer_class == mclass) {
                        if (strcmp(mi.label.name, AudioNmonitor) == 0)
                                sc->sc_monitor_port = mi.index;
                } else if (mi.mixer_class == oclass) {
                        if (strcmp(mi.label.name, AudioNmaster) == 0)
                                sc->sc_outports.master = mi.index;
                        if (strcmp(mi.label.name, AudioNselect) == 0)
                                au_setup_ports(sc, &sc->sc_outports, &mi,
                                    otable);
                } else if (mi.mixer_class == rclass) {
                        /*
                         * These are the preferred mixers for the audio record
                         * controls, so set the flags here, but don't check.
                         */
                        if (strcmp(mi.label.name, AudioNmaster) == 0) {
                                sc->sc_inports.master = mi.index;
                                record_master_found = 1;
                        }
#if 1        /* Deprecated. Use AudioNmaster. */
                        if (strcmp(mi.label.name, AudioNrecord) == 0) {
                                sc->sc_inports.master = mi.index;
                                record_master_found = 1;
                        }
                        if (strcmp(mi.label.name, AudioNvolume) == 0) {
                                sc->sc_inports.master = mi.index;
                                record_master_found = 1;
                        }
#endif
                        if (strcmp(mi.label.name, AudioNsource) == 0) {
                                if (mi.type == AUDIO_MIXER_ENUM) {
                                    int i;
                                    for(i = 0; i < mi.un.e.num_mem; i++)
                                        if (strcmp(mi.un.e.member[i].label.name,
                                                    AudioNmixerout) == 0)
                                                sc->sc_inports.mixerout =
                                                    mi.un.e.member[i].ord;
                                }
                                au_setup_ports(sc, &sc->sc_inports, &mi,
                                    itable);
                                record_source_found = 1;
                        }
                }
        }
        mutex_exit(sc->sc_lock);
}

static int
audioactivate(device_t self, enum devact act)
{
        struct audio_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                mutex_enter(sc->sc_lock);
                sc->sc_dying = true;
                cv_broadcast(&sc->sc_exlockcv);
                mutex_exit(sc->sc_lock);
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

static int
audiodetach(device_t self, int flags)
{
        struct audio_softc *sc;
        struct audio_file *file;
        int maj, mn;
        int error;

        sc = device_private(self);
        TRACE(2, "flags=%d", flags);

        /* device is not initialized */
        if (sc->hw_if == NULL)
                return 0;

        /* Start draining existing accessors of the device. */
        error = config_detach_children(self, flags);
        if (error)
                return error;

        /*
         * Prevent new opens and wait for existing opens to complete.
         */
        maj = cdevsw_lookup_major(&audio_cdevsw);
        mn = device_unit(self);
        vdevgone(maj, mn|SOUND_DEVICE, mn|SOUND_DEVICE, VCHR);
        vdevgone(maj, mn|AUDIO_DEVICE, mn|AUDIO_DEVICE, VCHR);
        vdevgone(maj, mn|AUDIOCTL_DEVICE, mn|AUDIOCTL_DEVICE, VCHR);
        vdevgone(maj, mn|MIXER_DEVICE, mn|MIXER_DEVICE, VCHR);

        /*
         * This waits currently running sysctls to finish if exists.
         * After this, no more new sysctls will come.
         */
        sysctl_teardown(&sc->sc_log);

        mutex_enter(sc->sc_lock);
        sc->sc_dying = true;
        cv_broadcast(&sc->sc_exlockcv);
        if (sc->sc_pmixer)
                cv_broadcast(&sc->sc_pmixer->outcv);
        if (sc->sc_rmixer)
                cv_broadcast(&sc->sc_rmixer->outcv);

        /* Prevent new users */
        SLIST_FOREACH(file, &sc->sc_files, entry) {
                atomic_store_relaxed(&file->dying, true);
        }
        mutex_exit(sc->sc_lock);

        /*
         * Wait for existing users to drain.
         * - pserialize_perform waits for all pserialize_read sections on
         *   all CPUs; after this, no more new psref_acquire can happen.
         * - psref_target_destroy waits for all extant acquired psrefs to
         *   be psref_released.
         */
        pserialize_perform(sc->sc_psz);
        psref_target_destroy(&sc->sc_psref, audio_psref_class);

        /*
         * We are now guaranteed that there are no calls to audio fileops
         * that hold sc, and any new calls with files that were for sc will
         * fail.  Thus, we now have exclusive access to the softc.
         */
        sc->sc_exlock = 1;

        /*
         * Clean up all open instances.
         */
        mutex_enter(sc->sc_lock);
        while ((file = SLIST_FIRST(&sc->sc_files)) != NULL) {
                mutex_enter(sc->sc_intr_lock);
                SLIST_REMOVE_HEAD(&sc->sc_files, entry);
                mutex_exit(sc->sc_intr_lock);
                if (file->ptrack || file->rtrack) {
                        mutex_exit(sc->sc_lock);
                        audio_unlink(sc, file);
                        mutex_enter(sc->sc_lock);
                }
        }
        mutex_exit(sc->sc_lock);

        pmf_event_deregister(self, PMFE_AUDIO_VOLUME_DOWN,
            audio_volume_down, true);
        pmf_event_deregister(self, PMFE_AUDIO_VOLUME_UP,
            audio_volume_up, true);
        pmf_event_deregister(self, PMFE_AUDIO_VOLUME_TOGGLE,
            audio_volume_toggle, true);

#ifdef AUDIO_PM_IDLE
        callout_halt(&sc->sc_idle_counter, sc->sc_lock);

        device_active_deregister(self, audio_activity);
#endif

        pmf_device_deregister(self);

        /* Free resources */
        if (sc->sc_pmixer) {
                audio_mixer_destroy(sc, sc->sc_pmixer);
                kmem_free(sc->sc_pmixer, sizeof(*sc->sc_pmixer));
        }
        if (sc->sc_rmixer) {
                audio_mixer_destroy(sc, sc->sc_rmixer);
                kmem_free(sc->sc_rmixer, sizeof(*sc->sc_rmixer));
        }
        if (sc->sc_am)
                kern_free(sc->sc_am);

        seldestroy(&sc->sc_wsel);
        seldestroy(&sc->sc_rsel);

#ifdef AUDIO_PM_IDLE
        callout_destroy(&sc->sc_idle_counter);
#endif

        cv_destroy(&sc->sc_exlockcv);

#if defined(AUDIO_DEBUG)
        audio_mlog_free();
#endif

        return 0;
}

static void
audiochilddet(device_t self, device_t child)
{

        /* we hold no child references, so do nothing */
}

static int
audiosearch(device_t parent, cfdata_t cf, const int *locs, void *aux)
{

        if (config_probe(parent, cf, aux))
                config_attach(parent, cf, aux, NULL,
                    CFARGS_NONE);

        return 0;
}

static int
audiorescan(device_t self, const char *ifattr, const int *locators)
{
        struct audio_softc *sc = device_private(self);

        config_search(sc->sc_dev, NULL,
            CFARGS(.search = audiosearch));

        return 0;
}

/*
 * Called from hardware driver.  This is where the MI audio driver gets
 * probed/attached to the hardware driver.
 */
device_t
audio_attach_mi(const struct audio_hw_if *ahwp, void *hdlp, device_t dev)
{
        struct audio_attach_args arg;

#ifdef DIAGNOSTIC
        if (ahwp == NULL) {
                aprint_error("audio_attach_mi: NULL\n");
                return 0;
        }
#endif
        arg.type = AUDIODEV_TYPE_AUDIO;
        arg.hwif = ahwp;
        arg.hdl = hdlp;
        return config_found(dev, &arg, audioprint,
            CFARGS(.iattr = "audiobus"));
}

/*
 * audio_printf() outputs fmt... with the audio device name and MD device
 * name prefixed.  If the message is considered to be related to the MD
 * driver, use this one instead of device_printf().
 */
static void
audio_printf(struct audio_softc *sc, const char *fmt, ...)
{
        va_list ap;

        printf("%s(%s): ", device_xname(sc->sc_dev), device_xname(sc->hw_dev));
        va_start(ap, fmt);
        vprintf(fmt, ap);
        va_end(ap);
}

/*
 * Enter critical section and also keep sc_lock.
 * If successful, returns 0 with sc_lock held.  Otherwise returns errno.
 * Must be called without sc_lock held.
 */
static int
audio_exlock_mutex_enter(struct audio_softc *sc)
{
        int error;

        mutex_enter(sc->sc_lock);
        if (sc->sc_dying) {
                mutex_exit(sc->sc_lock);
                return EIO;
        }

        while (__predict_false(sc->sc_exlock != 0)) {
                error = cv_wait_sig(&sc->sc_exlockcv, sc->sc_lock);
                if (sc->sc_dying)
                        error = EIO;
                if (error) {
                        mutex_exit(sc->sc_lock);
                        return error;
                }
        }

        /* Acquire */
        sc->sc_exlock = 1;
        return 0;
}

/*
 * Exit critical section and exit sc_lock.
 * Must be called with sc_lock held.
 */
static void
audio_exlock_mutex_exit(struct audio_softc *sc)
{

        KASSERT(mutex_owned(sc->sc_lock));

        sc->sc_exlock = 0;
        cv_broadcast(&sc->sc_exlockcv);
        mutex_exit(sc->sc_lock);
}

/*
 * Enter critical section.
 * If successful, it returns 0.  Otherwise returns errno.
 * Must be called without sc_lock held.
 * This function returns without sc_lock held.
 */
static int
audio_exlock_enter(struct audio_softc *sc)
{
        int error;

        error = audio_exlock_mutex_enter(sc);
        if (error)
                return error;
        mutex_exit(sc->sc_lock);
        return 0;
}

/*
 * Exit critical section.
 * Must be called without sc_lock held.
 */
static void
audio_exlock_exit(struct audio_softc *sc)
{

        mutex_enter(sc->sc_lock);
        audio_exlock_mutex_exit(sc);
}

/*
 * Get sc from file, and increment reference counter for this sc.
 * This is intended to be used for methods other than open.
 * If successful, returns sc.  Otherwise returns NULL.
 */
struct audio_softc *
audio_sc_acquire_fromfile(audio_file_t *file, struct psref *refp)
{
        int s;
        bool dying;

        /* Block audiodetach while we acquire a reference */
        s = pserialize_read_enter();

        /* If close or audiodetach already ran, tough -- no more audio */
        dying = atomic_load_relaxed(&file->dying);
        if (dying) {
                pserialize_read_exit(s);
                return NULL;
        }

        /* Acquire a reference */
        psref_acquire(refp, &file->sc->sc_psref, audio_psref_class);

        /* Now sc won't go away until we drop the reference count */
        pserialize_read_exit(s);

        return file->sc;
}

/*
 * Decrement reference counter for this sc.
 */
void
audio_sc_release(struct audio_softc *sc, struct psref *refp)
{

        psref_release(refp, &sc->sc_psref, audio_psref_class);
}

/*
 * Wait for I/O to complete, releasing sc_lock.
 * Must be called with sc_lock held.
 */
static int
audio_track_waitio(struct audio_softc *sc, audio_track_t *track)
{
        int error;

        KASSERT(track);
        KASSERT(mutex_owned(sc->sc_lock));

        /* Wait for pending I/O to complete. */
        error = cv_timedwait_sig(&track->mixer->outcv, sc->sc_lock,
            mstohz(AUDIO_TIMEOUT));
        if (sc->sc_suspending) {
                /* If it's about to suspend, ignore timeout error. */
                if (error == EWOULDBLOCK) {
                        TRACET(2, track, "timeout (suspending)");
                        return 0;
                }
        }
        if (sc->sc_dying) {
                error = EIO;
        }
        if (error) {
                TRACET(2, track, "cv_timedwait_sig failed %d", error);
                if (error == EWOULDBLOCK)
                        audio_printf(sc, "device timeout\n");
        } else {
                TRACET(3, track, "wakeup");
        }
        return error;
}

/*
 * Try to acquire track lock.
 * It doesn't block if the track lock is already acquired.
 * Returns true if the track lock was acquired, or false if the track
 * lock was already acquired.
 */
static __inline bool
audio_track_lock_tryenter(audio_track_t *track)
{

        if (atomic_swap_uint(&track->lock, 1) != 0)
                return false;
        membar_acquire();
        return true;
}

/*
 * Acquire track lock.
 */
static __inline void
audio_track_lock_enter(audio_track_t *track)
{

        /* Don't sleep here. */
        while (audio_track_lock_tryenter(track) == false)
                SPINLOCK_BACKOFF_HOOK;
}

/*
 * Release track lock.
 */
static __inline void
audio_track_lock_exit(audio_track_t *track)
{

        atomic_store_release(&track->lock, 0);
}


static int
audioopen(dev_t dev, int flags, int ifmt, struct lwp *l)
{
        struct audio_softc *sc;
        int error;

        /*
         * Find the device.  Because we wired the cdevsw to the audio
         * autoconf instance, the system ensures it will not go away
         * until after we return.
         */
        sc = device_lookup_private(&audio_cd, AUDIOUNIT(dev));
        if (sc == NULL || sc->hw_if == NULL)
                return ENXIO;

        error = audio_exlock_enter(sc);
        if (error)
                return error;

        device_active(sc->sc_dev, DVA_SYSTEM);
        switch (AUDIODEV(dev)) {
        case SOUND_DEVICE:
        case AUDIO_DEVICE:
                error = audio_open(dev, sc, flags, ifmt, l, NULL);
                break;
        case AUDIOCTL_DEVICE:
                error = audioctl_open(dev, sc, flags, ifmt, l);
                break;
        case MIXER_DEVICE:
                error = mixer_open(dev, sc, flags, ifmt, l);
                break;
        default:
                error = ENXIO;
                break;
        }
        audio_exlock_exit(sc);

        return error;
}

static int
audioclose(struct file *fp)
{
        struct audio_softc *sc;
        struct psref sc_ref;
        audio_file_t *file;
        int bound;
        int error;
        dev_t dev;

        KASSERT(fp->f_audioctx);
        file = fp->f_audioctx;
        dev = file->dev;
        error = 0;

        /*
         * audioclose() must
         * - unplug track from the trackmixer (and unplug anything from softc),
         *   if sc exists.
         * - free all memory objects, regardless of sc.
         */

        bound = curlwp_bind();
        sc = audio_sc_acquire_fromfile(file, &sc_ref);
        if (sc) {
                switch (AUDIODEV(dev)) {
                case SOUND_DEVICE:
                case AUDIO_DEVICE:
                        error = audio_close(sc, file);
                        break;
                case AUDIOCTL_DEVICE:
                        mutex_enter(sc->sc_lock);
                        mutex_enter(sc->sc_intr_lock);
                        SLIST_REMOVE(&sc->sc_files, file, audio_file, entry);
                        mutex_exit(sc->sc_intr_lock);
                        mutex_exit(sc->sc_lock);
                        error = 0;
                        break;
                case MIXER_DEVICE:
                        mutex_enter(sc->sc_lock);
                        mutex_enter(sc->sc_intr_lock);
                        SLIST_REMOVE(&sc->sc_files, file, audio_file, entry);
                        mutex_exit(sc->sc_intr_lock);
                        mutex_exit(sc->sc_lock);
                        error = mixer_close(sc, file);
                        break;
                default:
                        error = ENXIO;
                        break;
                }

                audio_sc_release(sc, &sc_ref);
        }
        curlwp_bindx(bound);

        /* Free memory objects anyway */
        TRACEF(2, file, "free memory");
        if (file->ptrack)
                audio_track_destroy(file->ptrack);
        if (file->rtrack)
                audio_track_destroy(file->rtrack);
        kmem_free(file, sizeof(*file));
        fp->f_audioctx = NULL;

        return error;
}

static int
audioread(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
        int ioflag)
{
        struct audio_softc *sc;
        struct psref sc_ref;
        audio_file_t *file;
        int bound;
        int error;
        dev_t dev;

        KASSERT(fp->f_audioctx);
        file = fp->f_audioctx;
        dev = file->dev;

        bound = curlwp_bind();
        sc = audio_sc_acquire_fromfile(file, &sc_ref);
        if (sc == NULL) {
                error = EIO;
                goto done;
        }

        if (fp->f_flag & O_NONBLOCK)
                ioflag |= IO_NDELAY;

        switch (AUDIODEV(dev)) {
        case SOUND_DEVICE:
        case AUDIO_DEVICE:
                error = audio_read(sc, uio, ioflag, file);
                break;
        case AUDIOCTL_DEVICE:
        case MIXER_DEVICE:
                error = ENODEV;
                break;
        default:
                error = ENXIO;
                break;
        }

        audio_sc_release(sc, &sc_ref);
done:
        curlwp_bindx(bound);
        return error;
}

static int
audiowrite(struct file *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
        int ioflag)
{
        struct audio_softc *sc;
        struct psref sc_ref;
        audio_file_t *file;
        int bound;
        int error;
        dev_t dev;

        KASSERT(fp->f_audioctx);
        file = fp->f_audioctx;
        dev = file->dev;

        bound = curlwp_bind();
        sc = audio_sc_acquire_fromfile(file, &sc_ref);
        if (sc == NULL) {
                error = EIO;
                goto done;
        }

        if (fp->f_flag & O_NONBLOCK)
                ioflag |= IO_NDELAY;

        switch (AUDIODEV(dev)) {
        case SOUND_DEVICE:
        case AUDIO_DEVICE:
                error = audio_write(sc, uio, ioflag, file);
                break;
        case AUDIOCTL_DEVICE:
        case MIXER_DEVICE:
                error = ENODEV;
                break;
        default:
                error = ENXIO;
                break;
        }

        audio_sc_release(sc, &sc_ref);
done:
        curlwp_bindx(bound);
        return error;
}

static int
audioioctl(struct file *fp, u_long cmd, void *addr)
{
        struct audio_softc *sc;
        struct psref sc_ref;
        audio_file_t *file;
        struct lwp *l = curlwp;
        int bound;
        int error;
        dev_t dev;

        KASSERT(fp->f_audioctx);
        file = fp->f_audioctx;
        dev = file->dev;

        bound = curlwp_bind();
        sc = audio_sc_acquire_fromfile(file, &sc_ref);
        if (sc == NULL) {
                error = EIO;
                goto done;
        }

        switch (AUDIODEV(dev)) {
        case SOUND_DEVICE:
        case AUDIO_DEVICE:
        case AUDIOCTL_DEVICE:
                mutex_enter(sc->sc_lock);
                device_active(sc->sc_dev, DVA_SYSTEM);
                mutex_exit(sc->sc_lock);
                if (IOCGROUP(cmd) == IOCGROUP(AUDIO_MIXER_READ))
                        error = mixer_ioctl(sc, cmd, addr, fp->f_flag, l);
                else
                        error = audio_ioctl(dev, sc, cmd, addr, fp->f_flag, l,
                            file);
                break;
        case MIXER_DEVICE:
                error = mixer_ioctl(sc, cmd, addr, fp->f_flag, l);
                break;
        default:
                error = ENXIO;
                break;
        }

        audio_sc_release(sc, &sc_ref);
done:
        curlwp_bindx(bound);
        return error;
}

static int
audiostat(struct file *fp, struct stat *st)
{
        struct audio_softc *sc;
        struct psref sc_ref;
        audio_file_t *file;
        int bound;
        int error;

        KASSERT(fp->f_audioctx);
        file = fp->f_audioctx;

        bound = curlwp_bind();
        sc = audio_sc_acquire_fromfile(file, &sc_ref);
        if (sc == NULL) {
                error = EIO;
                goto done;
        }

        error = 0;
        memset(st, 0, sizeof(*st));

        st->st_dev = file->dev;
        st->st_uid = kauth_cred_geteuid(fp->f_cred);
        st->st_gid = kauth_cred_getegid(fp->f_cred);
        st->st_mode = S_IFCHR;

        audio_sc_release(sc, &sc_ref);
done:
        curlwp_bindx(bound);
        return error;
}

static int
audiopoll(struct file *fp, int events)
{
        struct audio_softc *sc;
        struct psref sc_ref;
        audio_file_t *file;
        struct lwp *l = curlwp;
        int bound;
        int revents;
        dev_t dev;

        KASSERT(fp->f_audioctx);
        file = fp->f_audioctx;
        dev = file->dev;

        bound = curlwp_bind();
        sc = audio_sc_acquire_fromfile(file, &sc_ref);
        if (sc == NULL) {
                revents = POLLERR;
                goto done;
        }

        switch (AUDIODEV(dev)) {
        case SOUND_DEVICE:
        case AUDIO_DEVICE:
                revents = audio_poll(sc, events, l, file);
                break;
        case AUDIOCTL_DEVICE:
        case MIXER_DEVICE:
                revents = 0;
                break;
        default:
                revents = POLLERR;
                break;
        }

        audio_sc_release(sc, &sc_ref);
done:
        curlwp_bindx(bound);
        return revents;
}

static int
audiokqfilter(struct file *fp, struct knote *kn)
{
        struct audio_softc *sc;
        struct psref sc_ref;
        audio_file_t *file;
        dev_t dev;
        int bound;
        int error;

        KASSERT(fp->f_audioctx);
        file = fp->f_audioctx;
        dev = file->dev;

        bound = curlwp_bind();
        sc = audio_sc_acquire_fromfile(file, &sc_ref);
        if (sc == NULL) {
                error = EIO;
                goto done;
        }

        switch (AUDIODEV(dev)) {
        case SOUND_DEVICE:
        case AUDIO_DEVICE:
                error = audio_kqfilter(sc, file, kn);
                break;
        case AUDIOCTL_DEVICE:
        case MIXER_DEVICE:
                error = ENODEV;
                break;
        default:
                error = ENXIO;
                break;
        }

        audio_sc_release(sc, &sc_ref);
done:
        curlwp_bindx(bound);
        return error;
}

static int
audiommap(struct file *fp, off_t *offp, size_t len, int prot, int *flagsp,
        int *advicep, struct uvm_object **uobjp, int *maxprotp)
{
        struct audio_softc *sc;
        struct psref sc_ref;
        audio_file_t *file;
        dev_t dev;
        int bound;
        int error;

        KASSERT(len > 0);

        KASSERT(fp->f_audioctx);
        file = fp->f_audioctx;
        dev = file->dev;

        bound = curlwp_bind();
        sc = audio_sc_acquire_fromfile(file, &sc_ref);
        if (sc == NULL) {
                error = EIO;
                goto done;
        }

        mutex_enter(sc->sc_lock);
        device_active(sc->sc_dev, DVA_SYSTEM); /* XXXJDM */
        mutex_exit(sc->sc_lock);

        switch (AUDIODEV(dev)) {
        case SOUND_DEVICE:
        case AUDIO_DEVICE:
                error = audio_mmap(sc, offp, len, prot, flagsp, advicep,
                    uobjp, maxprotp, file);
                break;
        case AUDIOCTL_DEVICE:
        case MIXER_DEVICE:
        default:
                error = ENOTSUP;
                break;
        }

        audio_sc_release(sc, &sc_ref);
done:
        curlwp_bindx(bound);
        return error;
}


/* Exported interfaces for audiobell. */

/*
 * Open for audiobell.
 * It stores allocated file to *filep.
 * If successful returns 0, otherwise errno.
 */
int
audiobellopen(dev_t dev, audio_file_t **filep)
{
        device_t audiodev = NULL;
        struct audio_softc *sc;
        bool exlock = false;
        int error;

        /*
         * Find the autoconf instance and make sure it doesn't go away
         * while we are opening it.
         */
        audiodev = device_lookup_acquire(&audio_cd, AUDIOUNIT(dev));
        if (audiodev == NULL) {
                error = ENXIO;
                goto out;
        }

        /* If attach failed, it's hopeless -- give up.  */
        sc = device_private(audiodev);
        if (sc->hw_if == NULL) {
                error = ENXIO;
                goto out;
        }

        /* Take the exclusive configuration lock.  */
        error = audio_exlock_enter(sc);
        if (error)
                goto out;
        exlock = true;

        /* Open the audio device.  */
        device_active(sc->sc_dev, DVA_SYSTEM);
        error = audio_open(dev, sc, FWRITE, 0, curlwp, filep);

out:        if (exlock)
                audio_exlock_exit(sc);
        if (audiodev)
                device_release(audiodev);
        return error;
}

/* Close for audiobell */
int
audiobellclose(audio_file_t *file)
{
        struct audio_softc *sc;
        struct psref sc_ref;
        int bound;
        int error;

        error = 0;
        /*
         * audiobellclose() must
         * - unplug track from the trackmixer if sc exist.
         * - free all memory objects, regardless of sc.
         */
        bound = curlwp_bind();
        sc = audio_sc_acquire_fromfile(file, &sc_ref);
        if (sc) {
                error = audio_close(sc, file);
                audio_sc_release(sc, &sc_ref);
        }
        curlwp_bindx(bound);

        /* Free memory objects anyway */
        KASSERT(file->ptrack);
        audio_track_destroy(file->ptrack);
        KASSERT(file->rtrack == NULL);
        kmem_free(file, sizeof(*file));
        return error;
}

/* Set sample rate for audiobell */
int
audiobellsetrate(audio_file_t *file, u_int sample_rate)
{
        struct audio_softc *sc;
        struct psref sc_ref;
        struct audio_info ai;
        int bound;
        int error;

        bound = curlwp_bind();
        sc = audio_sc_acquire_fromfile(file, &sc_ref);
        if (sc == NULL) {
                error = EIO;
                goto done1;
        }

        AUDIO_INITINFO(&ai);
        ai.play.sample_rate = sample_rate;

        error = audio_exlock_enter(sc);
        if (error)
                goto done2;
        error = audio_file_setinfo(sc, file, &ai);
        audio_exlock_exit(sc);

done2:
        audio_sc_release(sc, &sc_ref);
done1:
        curlwp_bindx(bound);
        return error;
}

/* Playback for audiobell */
int
audiobellwrite(audio_file_t *file, struct uio *uio)
{
        struct audio_softc *sc;
        struct psref sc_ref;
        int bound;
        int error;

        bound = curlwp_bind();
        sc = audio_sc_acquire_fromfile(file, &sc_ref);
        if (sc == NULL) {
                error = EIO;
                goto done;
        }

        error = audio_write(sc, uio, 0, file);

        audio_sc_release(sc, &sc_ref);
done:
        curlwp_bindx(bound);
        return error;
}


/*
 * Audio driver
 */

/*
 * Must be called with sc_exlock held and without sc_lock held.
 */
int
audio_open(dev_t dev, struct audio_softc *sc, int flags, int ifmt,
        struct lwp *l, audio_file_t **bellfile)
{
        struct audio_info ai;
        struct file *fp;
        audio_file_t *af;
        audio_ring_t *hwbuf;
        bool fullduplex;
        bool cred_held;
        bool hw_opened;
        bool rmixer_started;
        bool inserted;
        int fd;
        int error;

        KASSERT(sc->sc_exlock);

        TRACE(1, "%sdev=%s flags=0x%x po=%d ro=%d",
            (audiodebug >= 3) ? "start " : "",
            ISDEVSOUND(dev) ? "sound" : "audio",
            flags, sc->sc_popens, sc->sc_ropens);

        fp = NULL;
        cred_held = false;
        hw_opened = false;
        rmixer_started = false;
        inserted = false;

        af = kmem_zalloc(sizeof(*af), KM_SLEEP);
        af->sc = sc;
        af->dev = dev;
        if ((flags & FWRITE) != 0 && audio_can_playback(sc))
                af->mode |= AUMODE_PLAY | AUMODE_PLAY_ALL;
        if ((flags & FREAD) != 0 && audio_can_capture(sc))
                af->mode |= AUMODE_RECORD;
        if (af->mode == 0) {
                error = ENXIO;
                goto bad;
        }

        fullduplex = (sc->sc_props & AUDIO_PROP_FULLDUPLEX);

        /*
         * On half duplex hardware,
         * 1. if mode is (PLAY | REC), let mode PLAY.
         * 2. if mode is PLAY, let mode PLAY if no rec tracks, otherwise error.
         * 3. if mode is REC, let mode REC if no play tracks, otherwise error.
         */
        if (fullduplex == false) {
                if ((af->mode & AUMODE_PLAY)) {
                        if (sc->sc_ropens != 0) {
                                TRACE(1, "record track already exists");
                                error = ENODEV;
                                goto bad;
                        }
                        /* Play takes precedence */
                        af->mode &= ~AUMODE_RECORD;
                }
                if ((af->mode & AUMODE_RECORD)) {
                        if (sc->sc_popens != 0) {
                                TRACE(1, "play track already exists");
                                error = ENODEV;
                                goto bad;
                        }
                }
        }

        /* Create tracks */
        if ((af->mode & AUMODE_PLAY))
                af->ptrack = audio_track_create(sc, sc->sc_pmixer);
        if ((af->mode & AUMODE_RECORD))
                af->rtrack = audio_track_create(sc, sc->sc_rmixer);

        /* Set parameters */
        AUDIO_INITINFO(&ai);
        if (bellfile) {
                /* If audiobell, only sample_rate will be set later. */
                ai.play.sample_rate   = audio_default.sample_rate;
                ai.play.encoding      = AUDIO_ENCODING_SLINEAR_NE;
                ai.play.channels      = 1;
                ai.play.precision     = 16;
                ai.play.pause         = 0;
        } else if (ISDEVAUDIO(dev)) {
                /* If /dev/audio, initialize everytime. */
                ai.play.sample_rate   = audio_default.sample_rate;
                ai.play.encoding      = audio_default.encoding;
                ai.play.channels      = audio_default.channels;
                ai.play.precision     = audio_default.precision;
                ai.play.pause         = 0;
                ai.record.sample_rate = audio_default.sample_rate;
                ai.record.encoding    = audio_default.encoding;
                ai.record.channels    = audio_default.channels;
                ai.record.precision   = audio_default.precision;
                ai.record.pause       = 0;
        } else {
                /* If /dev/sound, take over the previous parameters. */
                ai.play.sample_rate   = sc->sc_sound_pparams.sample_rate;
                ai.play.encoding      = sc->sc_sound_pparams.encoding;
                ai.play.channels      = sc->sc_sound_pparams.channels;
                ai.play.precision     = sc->sc_sound_pparams.precision;
                ai.play.pause         = sc->sc_sound_ppause;
                ai.record.sample_rate = sc->sc_sound_rparams.sample_rate;
                ai.record.encoding    = sc->sc_sound_rparams.encoding;
                ai.record.channels    = sc->sc_sound_rparams.channels;
                ai.record.precision   = sc->sc_sound_rparams.precision;
                ai.record.pause       = sc->sc_sound_rpause;
        }
        error = audio_file_setinfo(sc, af, &ai);
        if (error)
                goto bad;

        if (sc->sc_popens + sc->sc_ropens == 0) {
                /* First open */

                sc->sc_cred = kauth_cred_get();
                kauth_cred_hold(sc->sc_cred);
                cred_held = true;

                if (sc->hw_if->open) {
                        int hwflags;

                        /*
                         * Call hw_if->open() only at first open of
                         * combination of playback and recording.
                         * On full duplex hardware, the flags passed to
                         * hw_if->open() is always (FREAD | FWRITE)
                         * regardless of this open()'s flags.
                         * see also dev/isa/aria.c
                         * On half duplex hardware, the flags passed to
                         * hw_if->open() is either FREAD or FWRITE.
                         * see also arch/evbarm/mini2440/audio_mini2440.c
                         */
                        if (fullduplex) {
                                hwflags = FREAD | FWRITE;
                        } else {
                                /* Construct hwflags from af->mode. */
                                hwflags = 0;
                                if ((af->mode & AUMODE_PLAY) != 0)
                                        hwflags |= FWRITE;
                                if ((af->mode & AUMODE_RECORD) != 0)
                                        hwflags |= FREAD;
                        }

                        mutex_enter(sc->sc_lock);
                        mutex_enter(sc->sc_intr_lock);
                        error = sc->hw_if->open(sc->hw_hdl, hwflags);
                        mutex_exit(sc->sc_intr_lock);
                        mutex_exit(sc->sc_lock);
                        if (error)
                                goto bad;
                }
                /*
                 * Regardless of whether we called hw_if->open (whether
                 * hw_if->open exists) or not, we move to the Opened phase
                 * here.  Therefore from this point, we have to call
                 * hw_if->close (if exists) whenever abort.
                 * Note that both of hw_if->{open,close} are optional.
                 */
                hw_opened = true;

                /*
                 * Set speaker mode when a half duplex.
                 * XXX I'm not sure this is correct.
                 */
                if (1/*XXX*/) {
                        if (sc->hw_if->speaker_ctl) {
                                int on;
                                if (af->ptrack) {
                                        on = 1;
                                } else {
                                        on = 0;
                                }
                                mutex_enter(sc->sc_lock);
                                mutex_enter(sc->sc_intr_lock);
                                error = sc->hw_if->speaker_ctl(sc->hw_hdl, on);
                                mutex_exit(sc->sc_intr_lock);
                                mutex_exit(sc->sc_lock);
                                if (error)
                                        goto bad;
                        }
                }
        } else if (sc->sc_multiuser == false) {
                uid_t euid = kauth_cred_geteuid(kauth_cred_get());
                if (euid != 0 && euid != kauth_cred_geteuid(sc->sc_cred)) {
                        error = EPERM;
                        goto bad;
                }
        }

        /* Call init_output if this is the first playback open. */
        if (af->ptrack && sc->sc_popens == 0) {
                if (sc->hw_if->init_output) {
                        hwbuf = &sc->sc_pmixer->hwbuf;
                        mutex_enter(sc->sc_lock);
                        mutex_enter(sc->sc_intr_lock);
                        error = sc->hw_if->init_output(sc->hw_hdl,
                            hwbuf->mem,
                            hwbuf->capacity *
                            hwbuf->fmt.channels * hwbuf->fmt.stride / NBBY);
                        mutex_exit(sc->sc_intr_lock);
                        mutex_exit(sc->sc_lock);
                        if (error)
                                goto bad;
                }
        }
        /*
         * Call init_input and start rmixer, if this is the first recording
         * open.  See pause consideration notes.
         */
        if (af->rtrack && sc->sc_ropens == 0) {
                if (sc->hw_if->init_input) {
                        hwbuf = &sc->sc_rmixer->hwbuf;
                        mutex_enter(sc->sc_lock);
                        mutex_enter(sc->sc_intr_lock);
                        error = sc->hw_if->init_input(sc->hw_hdl,
                            hwbuf->mem,
                            hwbuf->capacity *
                            hwbuf->fmt.channels * hwbuf->fmt.stride / NBBY);
                        mutex_exit(sc->sc_intr_lock);
                        mutex_exit(sc->sc_lock);
                        if (error)
                                goto bad;
                }

                mutex_enter(sc->sc_lock);
                audio_rmixer_start(sc);
                mutex_exit(sc->sc_lock);
                rmixer_started = true;
        }

        /*
         * This is the last sc_lock section in the function, so we have to
         * examine sc_dying again before starting the rest tasks.  Because
         * audiodeatch() may have been invoked (and it would set sc_dying)
         * from the time audioopen() was executed until now.  If it happens,
         * audiodetach() may already have set file->dying for all sc_files
         * that exist at that point, so that audioopen() must abort without
         * inserting af to sc_files, in order to keep consistency.
         */
        mutex_enter(sc->sc_lock);
        if (sc->sc_dying) {
                mutex_exit(sc->sc_lock);
                error = ENXIO;
                goto bad;
        }

        /* Count up finally */
        if (af->ptrack)
                sc->sc_popens++;
        if (af->rtrack)
                sc->sc_ropens++;
        mutex_enter(sc->sc_intr_lock);
        SLIST_INSERT_HEAD(&sc->sc_files, af, entry);
        mutex_exit(sc->sc_intr_lock);
        mutex_exit(sc->sc_lock);
        inserted = true;

        if (bellfile) {
                *bellfile = af;
        } else {
                error = fd_allocfile(&fp, &fd);
                if (error)
                        goto bad;

                error = fd_clone(fp, fd, flags, &audio_fileops, af);
                KASSERTMSG(error == EMOVEFD, "error=%d", error);
        }

        /* Be nothing else after fd_clone */

        TRACEF(3, af, "done");
        return error;

bad:
        if (inserted) {
                mutex_enter(sc->sc_lock);
                mutex_enter(sc->sc_intr_lock);
                SLIST_REMOVE(&sc->sc_files, af, audio_file, entry);
                mutex_exit(sc->sc_intr_lock);
                if (af->ptrack)
                        sc->sc_popens--;
                if (af->rtrack)
                        sc->sc_ropens--;
                mutex_exit(sc->sc_lock);
        }

        if (rmixer_started) {
                mutex_enter(sc->sc_lock);
                audio_rmixer_halt(sc);
                mutex_exit(sc->sc_lock);
        }

        if (hw_opened) {
                if (sc->hw_if->close) {
                        mutex_enter(sc->sc_lock);
                        mutex_enter(sc->sc_intr_lock);
                        sc->hw_if->close(sc->hw_hdl);
                        mutex_exit(sc->sc_intr_lock);
                        mutex_exit(sc->sc_lock);
                }
        }
        if (cred_held) {
                kauth_cred_free(sc->sc_cred);
        }

        /*
         * Since track here is not yet linked to sc_files,
         * you can call track_destroy() without sc_intr_lock.
         */
        if (af->rtrack) {
                audio_track_destroy(af->rtrack);
                af->rtrack = NULL;
        }
        if (af->ptrack) {
                audio_track_destroy(af->ptrack);
                af->ptrack = NULL;
        }

        kmem_free(af, sizeof(*af));
        return error;
}

/*
 * Must be called without sc_lock nor sc_exlock held.
 */
int
audio_close(struct audio_softc *sc, audio_file_t *file)
{
        int error;

        /*
         * Drain first.
         * It must be done before unlinking(acquiring exlock).
         */
        if (file->ptrack) {
                mutex_enter(sc->sc_lock);
                audio_track_drain(sc, file->ptrack);
                mutex_exit(sc->sc_lock);
        }

        mutex_enter(sc->sc_lock);
        mutex_enter(sc->sc_intr_lock);
        SLIST_REMOVE(&sc->sc_files, file, audio_file, entry);
        mutex_exit(sc->sc_intr_lock);
        mutex_exit(sc->sc_lock);

        error = audio_exlock_enter(sc);
        if (error) {
                /*
                 * If EIO, this sc is about to detach.  In this case, even if
                 * we don't do subsequent _unlink(), audiodetach() will do it.
                 */
                if (error == EIO)
                        return error;

                /* XXX This should not happen but what should I do ? */
                panic("%s: can't acquire exlock: errno=%d", __func__, error);
        }
        audio_unlink(sc, file);
        audio_exlock_exit(sc);

        return 0;
}

/*
 * Unlink this file, but not freeing memory here.
 * Must be called with sc_exlock held and without sc_lock held.
 */
static void
audio_unlink(struct audio_softc *sc, audio_file_t *file)
{
        kauth_cred_t cred = NULL;
        int error;

        mutex_enter(sc->sc_lock);

        TRACEF(1, file, "%spid=%d.%d po=%d ro=%d",
            (audiodebug >= 3) ? "start " : "",
            (int)curproc->p_pid, (int)curlwp->l_lid,
            sc->sc_popens, sc->sc_ropens);
        KASSERTMSG(sc->sc_popens + sc->sc_ropens > 0,
            "sc->sc_popens=%d, sc->sc_ropens=%d",
            sc->sc_popens, sc->sc_ropens);

        device_active(sc->sc_dev, DVA_SYSTEM);

        if (file->ptrack) {
                TRACET(3, file->ptrack, "dropframes=%" PRIu64,
                    file->ptrack->dropframes);

                KASSERT(sc->sc_popens > 0);
                sc->sc_popens--;

                /* Call hw halt_output if this is the last playback track. */
                if (sc->sc_popens == 0 && sc->sc_pbusy) {
                        error = audio_pmixer_halt(sc);
                        if (error) {
                                audio_printf(sc,
                                    "halt_output failed: errno=%d (ignored)\n",
                                    error);
                        }
                }

                /* Restore mixing volume if all tracks are gone. */
                if (sc->sc_popens == 0) {
                        /* intr_lock is not necessary, but just manners. */
                        mutex_enter(sc->sc_intr_lock);
                        sc->sc_pmixer->volume = 256;
                        sc->sc_pmixer->voltimer = 0;
                        mutex_exit(sc->sc_intr_lock);
                }
        }
        if (file->rtrack) {
                TRACET(3, file->rtrack, "dropframes=%" PRIu64,
                    file->rtrack->dropframes);

                KASSERT(sc->sc_ropens > 0);
                sc->sc_ropens--;

                /* Call hw halt_input if this is the last recording track. */
                if (sc->sc_ropens == 0 && sc->sc_rbusy) {
                        error = audio_rmixer_halt(sc);
                        if (error) {
                                audio_printf(sc,
                                    "halt_input failed: errno=%d (ignored)\n",
                                    error);
                        }
                }

        }

        /* Call hw close if this is the last track. */
        if (sc->sc_popens + sc->sc_ropens == 0) {
                if (sc->hw_if->close) {
                        TRACE(2, "hw_if close");
                        mutex_enter(sc->sc_intr_lock);
                        sc->hw_if->close(sc->hw_hdl);
                        mutex_exit(sc->sc_intr_lock);
                }
                cred = sc->sc_cred;
                sc->sc_cred = NULL;
        }

        mutex_exit(sc->sc_lock);
        if (cred)
                kauth_cred_free(cred);

        TRACE(3, "done");
}

/*
 * Must be called without sc_lock nor sc_exlock held.
 */
int
audio_read(struct audio_softc *sc, struct uio *uio, int ioflag,
        audio_file_t *file)
{
        audio_track_t *track;
        audio_ring_t *usrbuf;
        audio_ring_t *input;
        int error;

        /*
         * On half-duplex hardware, O_RDWR is treated as O_WRONLY.
         * However read() system call itself can be called because it's
         * opened with O_RDWR.  So in this case, deny this read().
         */
        track = file->rtrack;
        if (track == NULL) {
                return EBADF;
        }

        /* I think it's better than EINVAL. */
        if (track->mmapped)
                return EPERM;

        TRACET(2, track, "resid=%zd ioflag=0x%x", uio->uio_resid, ioflag);

#ifdef AUDIO_PM_IDLE
        error = audio_exlock_mutex_enter(sc);
        if (error)
                return error;

        if (device_is_active(&sc->sc_dev) || sc->sc_idle)
                device_active(&sc->sc_dev, DVA_SYSTEM);

        /* In recording, unlike playback, read() never operates rmixer. */

        audio_exlock_mutex_exit(sc);
#endif

        usrbuf = &track->usrbuf;
        input = track->input;
        error = 0;

        while (uio->uio_resid > 0 && error == 0) {
                int bytes;

                TRACET(3, track,
                    "while resid=%zd input=%d/%d/%d usrbuf=%d/%d/C%d",
                    uio->uio_resid,
                    input->head, input->used, input->capacity,
                    usrbuf->head, usrbuf->used, usrbuf->capacity);

                /* Wait when buffers are empty. */
                mutex_enter(sc->sc_lock);
                for (;;) {
                        bool empty;
                        audio_track_lock_enter(track);
                        empty = (input->used == 0 && usrbuf->used == 0);
                        audio_track_lock_exit(track);
                        if (!empty)
                                break;

                        if ((ioflag & IO_NDELAY)) {
                                mutex_exit(sc->sc_lock);
                                return EWOULDBLOCK;
                        }

                        TRACET(3, track, "sleep");
                        error = audio_track_waitio(sc, track);
                        if (error) {
                                mutex_exit(sc->sc_lock);
                                return error;
                        }
                }
                mutex_exit(sc->sc_lock);

                audio_track_lock_enter(track);
                /* Convert one block if possible. */
                if (usrbuf->used == 0 && input->used > 0) {
                        audio_track_record(track);
                }

                /* uiomove from usrbuf as many bytes as possible. */
                bytes = uimin(usrbuf->used, uio->uio_resid);
                error = uiomove((uint8_t *)usrbuf->mem + usrbuf->head, bytes,
                    uio);
                if (error) {
                        audio_track_lock_exit(track);
                        device_printf(sc->sc_dev,
                            "%s: uiomove(%d) failed: errno=%d\n",
                            __func__, bytes, error);
                        goto abort;
                }
                auring_take(usrbuf, bytes);
                TRACET(3, track, "uiomove(len=%d) usrbuf=%d/%d/C%d",
                    bytes,
                    usrbuf->head, usrbuf->used, usrbuf->capacity);

                audio_track_lock_exit(track);
        }

abort:
        return error;
}


/*
 * Clear file's playback and/or record track buffer immediately.
 */
static void
audio_file_clear(struct audio_softc *sc, audio_file_t *file)
{

        if (file->ptrack)
                audio_track_clear(sc, file->ptrack);
        if (file->rtrack)
                audio_track_clear(sc, file->rtrack);
}

/*
 * Must be called without sc_lock nor sc_exlock held.
 */
int
audio_write(struct audio_softc *sc, struct uio *uio, int ioflag,
        audio_file_t *file)
{
        audio_track_t *track;
        audio_ring_t *usrbuf;
        audio_ring_t *outbuf;
        int error;

        track = file->ptrack;
        if (track == NULL)
                return EPERM;

        /* I think it's better than EINVAL. */
        if (track->mmapped)
                return EPERM;

        TRACET(2, track, "%sresid=%zd pid=%d.%d ioflag=0x%x",
            audiodebug >= 3 ? "begin " : "",
            uio->uio_resid, (int)curproc->p_pid, (int)curlwp->l_lid, ioflag);

        if (uio->uio_resid == 0) {
                track->eofcounter++;
                return 0;
        }

        error = audio_exlock_mutex_enter(sc);
        if (error)
                return error;

#ifdef AUDIO_PM_IDLE
        if (device_is_active(&sc->sc_dev) || sc->sc_idle)
                device_active(&sc->sc_dev, DVA_SYSTEM);
#endif

        /*
         * The first write starts pmixer.
         */
        if (sc->sc_pbusy == false)
                audio_pmixer_start(sc, false);
        audio_exlock_mutex_exit(sc);

        usrbuf = &track->usrbuf;
        outbuf = &track->outbuf;
        track->pstate = AUDIO_STATE_RUNNING;
        error = 0;

        while (uio->uio_resid > 0 && error == 0) {
                int bytes;

                TRACET(3, track, "while resid=%zd usrbuf=%d/%d/H%d",
                    uio->uio_resid,
                    usrbuf->head, usrbuf->used, track->usrbuf_usedhigh);

                /* Wait when buffers are full. */
                mutex_enter(sc->sc_lock);
                for (;;) {
                        bool full;
                        audio_track_lock_enter(track);
                        full = (usrbuf->used >= track->usrbuf_usedhigh &&
                            outbuf->used >= outbuf->capacity);
                        audio_track_lock_exit(track);
                        if (!full)
                                break;

                        if ((ioflag & IO_NDELAY)) {
                                error = EWOULDBLOCK;
                                mutex_exit(sc->sc_lock);
                                goto abort;
                        }

                        TRACET(3, track, "sleep usrbuf=%d/H%d",
                            usrbuf->used, track->usrbuf_usedhigh);
                        error = audio_track_waitio(sc, track);
                        if (error) {
                                mutex_exit(sc->sc_lock);
                                goto abort;
                        }
                }
                mutex_exit(sc->sc_lock);

                audio_track_lock_enter(track);

                /* uiomove to usrbuf as many bytes as possible. */
                bytes = uimin(track->usrbuf_usedhigh - usrbuf->used,
                    uio->uio_resid);
                while (bytes > 0) {
                        int tail = auring_tail(usrbuf);
                        int len = uimin(bytes, usrbuf->capacity - tail);
                        error = uiomove((uint8_t *)usrbuf->mem + tail, len,
                            uio);
                        if (error) {
                                audio_track_lock_exit(track);
                                device_printf(sc->sc_dev,
                                    "%s: uiomove(%d) failed: errno=%d\n",
                                    __func__, len, error);
                                goto abort;
                        }
                        auring_push(usrbuf, len);
                        TRACET(3, track, "uiomove(len=%d) usrbuf=%d/%d/C%d",
                            len,
                            usrbuf->head, usrbuf->used, usrbuf->capacity);
                        bytes -= len;
                }

                /* Convert them as many blocks as possible. */
                while (usrbuf->used >= track->usrbuf_blksize &&
                    outbuf->used < outbuf->capacity) {
                        audio_track_play(track);
                }

                audio_track_lock_exit(track);
        }

abort:
        TRACET(3, track, "done error=%d", error);
        return error;
}

/*
 * Must be called without sc_lock nor sc_exlock held.
 */
int
audio_ioctl(dev_t dev, struct audio_softc *sc, u_long cmd, void *addr, int flag,
        struct lwp *l, audio_file_t *file)
{
        struct audio_offset *ao;
        struct audio_info ai;
        audio_track_t *track;
        audio_encoding_t *ae;
        audio_format_query_t *query;
        u_int stamp;
        u_int offset;
        int val;
        int index;
        int error;

#if defined(AUDIO_DEBUG)
        const char *ioctlnames[] = {
                "AUDIO_GETINFO",        /* 21 */
                "AUDIO_SETINFO",        /* 22 */
                "AUDIO_DRAIN",                /* 23 */
                "AUDIO_FLUSH",                /* 24 */
                "AUDIO_WSEEK",                /* 25 */
                "AUDIO_RERROR",                /* 26 */
                "AUDIO_GETDEV",                /* 27 */
                "AUDIO_GETENC",                /* 28 */
                "AUDIO_GETFD",                /* 29 */
                "AUDIO_SETFD",                /* 30 */
                "AUDIO_PERROR",                /* 31 */
                "AUDIO_GETIOFFS",        /* 32 */
                "AUDIO_GETOOFFS",        /* 33 */
                "AUDIO_GETPROPS",        /* 34 */
                "AUDIO_GETBUFINFO",        /* 35 */
                "AUDIO_SETCHAN",        /* 36 */
                "AUDIO_GETCHAN",        /* 37 */
                "AUDIO_QUERYFORMAT",        /* 38 */
                "AUDIO_GETFORMAT",        /* 39 */
                "AUDIO_SETFORMAT",        /* 40 */
        };
        char pre[64];
        int nameidx = (cmd & 0xff);
        if (21 <= nameidx && nameidx <= 21 + __arraycount(ioctlnames)) {
                snprintf(pre, sizeof(pre), "pid=%d.%d %s",
                    (int)curproc->p_pid, (int)l->l_lid,
                    ioctlnames[nameidx - 21]);
        } else {
                snprintf(pre, sizeof(pre), "pid=%d.%d (%lu,'%c',%u)",
                    (int)curproc->p_pid, (int)l->l_lid,
                    IOCPARM_LEN(cmd), (char)IOCGROUP(cmd), nameidx);
        }
#endif

        error = 0;
        switch (cmd) {
        case FIONBIO:
                /* All handled in the upper FS layer. */
                break;

        case FIONREAD:
                /* Get the number of bytes that can be read. */
                track = file->rtrack;
                if (track) {
                        val = audio_track_readablebytes(track);
                        *(int *)addr = val;
                        TRACET(2, track, "pid=%d.%d FIONREAD bytes=%d",
                            (int)curproc->p_pid, (int)l->l_lid, val);
                } else {
                        TRACEF(2, file, "pid=%d.%d FIONREAD no track",
                            (int)curproc->p_pid, (int)l->l_lid);
                }
                break;

        case FIOASYNC:
                /* Set/Clear ASYNC I/O. */
                if (*(int *)addr) {
                        file->async_audio = curproc->p_pid;
                } else {
                        file->async_audio = 0;
                }
                TRACEF(2, file, "pid=%d.%d FIOASYNC %s",
                    (int)curproc->p_pid, (int)l->l_lid,
                    file->async_audio ? "on" : "off");
                break;

        case AUDIO_FLUSH:
                /* XXX TODO: clear errors and restart? */
                TRACEF(2, file, "%s", pre);
                audio_file_clear(sc, file);
                break;

        case AUDIO_PERROR:
        case AUDIO_RERROR:
                /*
                 * Number of dropped bytes during playback/record.  We don't
                 * know where or when they were dropped (including conversion
                 * stage).  Therefore, the number of accurate bytes or samples
                 * is also unknown.
                 */
                track = (cmd == AUDIO_PERROR) ? file->ptrack : file->rtrack;
                if (track) {
                        val = frametobyte(&track->usrbuf.fmt,
                            track->dropframes);
                        *(int *)addr = val;
                        TRACET(2, track, "%s bytes=%d", pre, val);
                } else {
                        TRACEF(2, file, "%s no track", pre);
                }
                break;

        case AUDIO_GETIOFFS:
                ao = (struct audio_offset *)addr;
                track = file->rtrack;
                if (track == NULL) {
                        ao->samples = 0;
                        ao->deltablks = 0;
                        ao->offset = 0;
                        TRACEF(2, file, "%s no rtrack", pre);
                        break;
                }
                mutex_enter(sc->sc_lock);
                mutex_enter(sc->sc_intr_lock);
                /* figure out where next transfer will start */
                stamp = track->stamp;
                offset = auring_tail(track->input);
                mutex_exit(sc->sc_intr_lock);
                mutex_exit(sc->sc_lock);

                /* samples will overflow soon but is as per spec. */
                ao->samples = stamp * track->usrbuf_blksize;
                ao->deltablks = stamp - track->last_stamp;
                ao->offset = audio_track_inputblk_as_usrbyte(track, offset);
                TRACET(2, track, "%s samples=%u deltablks=%u offset=%u",
                    pre, ao->samples, ao->deltablks, ao->offset);

                track->last_stamp = stamp;
                break;

        case AUDIO_GETOOFFS:
                ao = (struct audio_offset *)addr;
                track = file->ptrack;
                if (track == NULL) {
                        ao->samples = 0;
                        ao->deltablks = 0;
                        ao->offset = 0;
                        TRACEF(2, file, "%s no ptrack", pre);
                        break;
                }
                mutex_enter(sc->sc_lock);
                mutex_enter(sc->sc_intr_lock);
                /* figure out where next transfer will start */
                stamp = track->stamp;
                offset = track->usrbuf.head;
                mutex_exit(sc->sc_intr_lock);
                mutex_exit(sc->sc_lock);

                /* samples will overflow soon but is as per spec. */
                ao->samples = stamp * track->usrbuf_blksize;
                ao->deltablks = stamp - track->last_stamp;
                ao->offset = offset;
                TRACET(2, track, "%s samples=%u deltablks=%u offset=%u",
                    pre, ao->samples, ao->deltablks, ao->offset);

                track->last_stamp = stamp;
                break;

        case AUDIO_WSEEK:
                track = file->ptrack;
                if (track) {
                        val = track->usrbuf.used;
                        *(u_long *)addr = val;
                        TRACET(2, track, "%s bytes=%d", pre, val);
                } else {
                        TRACEF(2, file, "%s no ptrack", pre);
                }
                break;

        case AUDIO_SETINFO:
                TRACEF(2, file, "%s", pre);
                error = audio_exlock_enter(sc);
                if (error)
                        break;
                error = audio_file_setinfo(sc, file, (struct audio_info *)addr);
                if (error) {
                        audio_exlock_exit(sc);
                        break;
                }
                if (ISDEVSOUND(dev))
                        error = audiogetinfo(sc, &sc->sc_ai, 0, file);
                audio_exlock_exit(sc);
                break;

        case AUDIO_GETINFO:
                TRACEF(2, file, "%s", pre);
                error = audio_exlock_enter(sc);
                if (error)
                        break;
                error = audiogetinfo(sc, (struct audio_info *)addr, 1, file);
                audio_exlock_exit(sc);
                break;

        case AUDIO_GETBUFINFO:
                TRACEF(2, file, "%s", pre);
                error = audio_exlock_enter(sc);
                if (error)
                        break;
                error = audiogetinfo(sc, (struct audio_info *)addr, 0, file);
                audio_exlock_exit(sc);
                break;

        case AUDIO_DRAIN:
                track = file->ptrack;
                if (track) {
                        TRACET(2, track, "%s", pre);
                        mutex_enter(sc->sc_lock);
                        error = audio_track_drain(sc, track);
                        mutex_exit(sc->sc_lock);
                } else {
                        TRACEF(2, file, "%s no ptrack", pre);
                }
                break;

        case AUDIO_GETDEV:
                TRACEF(2, file, "%s", pre);
                error = sc->hw_if->getdev(sc->hw_hdl, (audio_device_t *)addr);
                break;

        case AUDIO_GETENC:
                ae = (audio_encoding_t *)addr;
                index = ae->index;
                TRACEF(2, file, "%s index=%d", pre, index);
                if (index < 0 || index >= __arraycount(audio_encodings)) {
                        error = EINVAL;
                        break;
                }
                *ae = audio_encodings[index];
                ae->index = index;
                /*
                 * EMULATED always.
                 * EMULATED flag at that time used to mean that it could
                 * not be passed directly to the hardware as-is.  But
                 * currently, all formats including hardware native is not
                 * passed directly to the hardware.  So I set EMULATED
                 * flag for all formats.
                 */
                ae->flags = AUDIO_ENCODINGFLAG_EMULATED;
                break;

        case AUDIO_GETFD:
                /*
                 * Returns the current setting of full duplex mode.
                 * If HW has full duplex mode and there are two mixers,
                 * it is full duplex.  Otherwise half duplex.
                 */
                error = audio_exlock_enter(sc);
                if (error)
                        break;
                val = (sc->sc_props & AUDIO_PROP_FULLDUPLEX)
                    && (sc->sc_pmixer && sc->sc_rmixer);
                audio_exlock_exit(sc);
                *(int *)addr = val;
                TRACEF(2, file, "%s fulldup=%d", pre, val);
                break;

        case AUDIO_GETPROPS:
                val = sc->sc_props;
                *(int *)addr = val;
#if defined(AUDIO_DEBUG)
                char pbuf[64];
                snprintb(pbuf, sizeof(pbuf), "\x10"
                    "\6CAPTURE" "\5PLAY" "\3INDEP" "\2MMAP" "\1FULLDUP", val);
                TRACEF(2, file, "%s %s", pre, pbuf);
#endif
                break;

        case AUDIO_QUERYFORMAT:
                query = (audio_format_query_t *)addr;
                TRACEF(2, file, "%s index=%u", pre, query->index);
                mutex_enter(sc->sc_lock);
                error = sc->hw_if->query_format(sc->hw_hdl, query);
                mutex_exit(sc->sc_lock);
                /* Hide internal information */
                query->fmt.driver_data = NULL;
                break;

        case AUDIO_GETFORMAT:
                TRACEF(2, file, "%s", pre);
                error = audio_exlock_enter(sc);
                if (error)
                        break;
                audio_mixers_get_format(sc, (struct audio_info *)addr);
                audio_exlock_exit(sc);
                break;

        case AUDIO_SETFORMAT:
                TRACEF(2, file, "%s", pre);
                error = audio_exlock_enter(sc);
                audio_mixers_get_format(sc, &ai);
                error = audio_mixers_set_format(sc, (struct audio_info *)addr);
                if (error) {
                        /* Rollback */
                        audio_mixers_set_format(sc, &ai);
                }
                audio_exlock_exit(sc);
                break;

        case AUDIO_SETFD:
        case AUDIO_SETCHAN:
        case AUDIO_GETCHAN:
                /* Obsoleted */
                TRACEF(2, file, "%s", pre);
                break;

        default:
                TRACEF(2, file, "%s", pre);
                if (sc->hw_if->dev_ioctl) {
                        mutex_enter(sc->sc_lock);
                        error = sc->hw_if->dev_ioctl(sc->hw_hdl,
                            cmd, addr, flag, l);
                        mutex_exit(sc->sc_lock);
                } else {
                        error = EINVAL;
                }
                break;
        }

        if (error)
                TRACEF(2, file, "%s error=%d", pre, error);
        return error;
}

/*
 * Convert n [frames] of the input buffer to bytes in the usrbuf format.
 * n is in frames but should be a multiple of frame/block.  Note that the
 * usrbuf's frame/block and the input buffer's frame/block may be different
 * (i.e., if frequencies are different).
 *
 * This function is for recording track only.
 */
static int
audio_track_inputblk_as_usrbyte(const audio_track_t *track, int n)
{
        int input_fpb;

        /*
         * In the input buffer on recording track, these are the same.
         * input_fpb = frame_per_block(track->mixer, &track->input->fmt);
         */
        input_fpb = track->mixer->frames_per_block;

        return (n / input_fpb) * track->usrbuf_blksize;
}

/*
 * Returns the number of bytes that can be read on recording buffer.
 */
static int
audio_track_readablebytes(const audio_track_t *track)
{
        int bytes;

        KASSERT(track);
        KASSERT(track->mode == AUMODE_RECORD);

        /*
         * For recording, track->input is the main block-unit buffer and
         * track->usrbuf holds less than one block of byte data ("fragment").
         * Note that the input buffer is in frames and the usrbuf is in bytes.
         *
         * Actual total capacity of these two buffers is
         *  input->capacity [frames] + usrbuf.capacity [bytes],
         * but only input->capacity is reported to userland as buffer_size.
         * So, even if the total used bytes exceed input->capacity, report it
         * as input->capacity for consistency.
         */
        bytes = audio_track_inputblk_as_usrbyte(track, track->input->used);
        if (track->input->used < track->input->capacity) {
                bytes += track->usrbuf.used;
        }
        return bytes;
}

/*
 * Must be called without sc_lock nor sc_exlock held.
 */
int
audio_poll(struct audio_softc *sc, int events, struct lwp *l,
        audio_file_t *file)
{
        audio_track_t *track;
        int revents;
        bool in_is_valid;
        bool out_is_valid;

#if defined(AUDIO_DEBUG)
#define POLLEV_BITMAP "\177\020" \
            "b\10WRBAND\0" \
            "b\7RDBAND\0" "b\6RDNORM\0" "b\5NVAL\0" "b\4HUP\0" \
            "b\3ERR\0" "b\2OUT\0" "b\1PRI\0" "b\0IN\0"
        char evbuf[64];
        snprintb(evbuf, sizeof(evbuf), POLLEV_BITMAP, events);
        TRACEF(2, file, "pid=%d.%d events=%s",
            (int)curproc->p_pid, (int)l->l_lid, evbuf);
#endif

        revents = 0;
        in_is_valid = false;
        out_is_valid = false;
        if (events & (POLLIN | POLLRDNORM)) {
                track = file->rtrack;
                if (track) {
                        int used;
                        in_is_valid = true;
                        used = audio_track_readablebytes(track);
                        if (used > 0)
                                revents |= events & (POLLIN | POLLRDNORM);
                }
        }
        if (events & (POLLOUT | POLLWRNORM)) {
                track = file->ptrack;
                if (track) {
                        out_is_valid = true;
                        if (track->usrbuf.used <= track->usrbuf_usedlow)
                                revents |= events & (POLLOUT | POLLWRNORM);
                }
        }

        if (revents == 0) {
                mutex_enter(sc->sc_lock);
                if (in_is_valid) {
                        TRACEF(3, file, "selrecord rsel");
                        selrecord(l, &sc->sc_rsel);
                }
                if (out_is_valid) {
                        TRACEF(3, file, "selrecord wsel");
                        selrecord(l, &sc->sc_wsel);
                }
                mutex_exit(sc->sc_lock);
        }

#if defined(AUDIO_DEBUG)
        snprintb(evbuf, sizeof(evbuf), POLLEV_BITMAP, revents);
        TRACEF(2, file, "revents=%s", evbuf);
#endif
        return revents;
}

static const struct filterops audioread_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_audioread_detach,
        .f_event = filt_audioread_event,
};

static void
filt_audioread_detach(struct knote *kn)
{
        struct audio_softc *sc;
        audio_file_t *file;

        file = kn->kn_hook;
        sc = file->sc;
        TRACEF(3, file, "called");

        mutex_enter(sc->sc_lock);
        selremove_knote(&sc->sc_rsel, kn);
        mutex_exit(sc->sc_lock);
}

static int
filt_audioread_event(struct knote *kn, long hint)
{
        audio_file_t *file;
        audio_track_t *track;

        file = kn->kn_hook;
        track = file->rtrack;

        /*
         * kn_data must contain the number of bytes can be read.
         * The return value indicates whether the event occurs or not.
         */

        if (track == NULL) {
                /* can not read with this descriptor. */
                kn->kn_data = 0;
                return 0;
        }

        kn->kn_data = audio_track_readablebytes(track);
        TRACEF(3, file, "data=%" PRId64, kn->kn_data);
        return kn->kn_data > 0;
}

static const struct filterops audiowrite_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_audiowrite_detach,
        .f_event = filt_audiowrite_event,
};

static void
filt_audiowrite_detach(struct knote *kn)
{
        struct audio_softc *sc;
        audio_file_t *file;

        file = kn->kn_hook;
        sc = file->sc;
        TRACEF(3, file, "called");

        mutex_enter(sc->sc_lock);
        selremove_knote(&sc->sc_wsel, kn);
        mutex_exit(sc->sc_lock);
}

static int
filt_audiowrite_event(struct knote *kn, long hint)
{
        audio_file_t *file;
        audio_track_t *track;

        file = kn->kn_hook;
        track = file->ptrack;

        /*
         * kn_data must contain the number of bytes can be write.
         * The return value indicates whether the event occurs or not.
         */

        if (track == NULL) {
                /* can not write with this descriptor. */
                kn->kn_data = 0;
                return 0;
        }

        kn->kn_data = track->usrbuf_usedhigh - track->usrbuf.used;
        TRACEF(3, file, "data=%" PRId64, kn->kn_data);
        return (track->usrbuf.used < track->usrbuf_usedlow);
}

/*
 * Must be called without sc_lock nor sc_exlock held.
 */
int
audio_kqfilter(struct audio_softc *sc, audio_file_t *file, struct knote *kn)
{
        struct selinfo *sip;

        TRACEF(3, file, "kn=%p kn_filter=%x", kn, (int)kn->kn_filter);

        switch (kn->kn_filter) {
        case EVFILT_READ:
                sip = &sc->sc_rsel;
                kn->kn_fop = &audioread_filtops;
                break;

        case EVFILT_WRITE:
                sip = &sc->sc_wsel;
                kn->kn_fop = &audiowrite_filtops;
                break;

        default:
                return EINVAL;
        }

        kn->kn_hook = file;

        mutex_enter(sc->sc_lock);
        selrecord_knote(sip, kn);
        mutex_exit(sc->sc_lock);

        return 0;
}

/*
 * Must be called without sc_lock nor sc_exlock held.
 */
int
audio_mmap(struct audio_softc *sc, off_t *offp, size_t len, int prot,
        int *flagsp, int *advicep, struct uvm_object **uobjp, int *maxprotp,
        audio_file_t *file)
{
        audio_track_t *track;
        struct uvm_object *uobj;
        vaddr_t vstart;
        vsize_t vsize;
        int error;

        TRACEF(1, file, "off=%jd, len=%ju, prot=%d",
            (intmax_t)(*offp), (uintmax_t)len, prot);

        KASSERT(len > 0);

        if (*offp < 0)
                return EINVAL;

#if 0
        /* XXX
         * The idea here was to use the protection to determine if
         * we are mapping the read or write buffer, but it fails.
         * The VM system is broken in (at least) two ways.
         * 1) If you map memory VM_PROT_WRITE you SIGSEGV
         *    when writing to it, so VM_PROT_READ|VM_PROT_WRITE
         *    has to be used for mmapping the play buffer.
         * 2) Even if calling mmap() with VM_PROT_READ|VM_PROT_WRITE
         *    audio_mmap will get called at some point with VM_PROT_READ
         *    only.
         * So, alas, we always map the play buffer for now.
         */
        if (prot == (VM_PROT_READ|VM_PROT_WRITE) ||
            prot == VM_PROT_WRITE)
                track = file->ptrack;
        else if (prot == VM_PROT_READ)
                track = file->rtrack;
        else
                return EINVAL;
#else
        track = file->ptrack;
#endif
        if (track == NULL)
                return EACCES;

        /* XXX TODO: what happens when mmap twice. */
        if (track->mmapped)
                return EIO;

        /* Create a uvm anonymous object */
        vsize = roundup2(MAX(track->usrbuf.capacity, PAGE_SIZE), PAGE_SIZE);
        if (*offp + len > vsize)
                return EOVERFLOW;
        uobj = uao_create(vsize, 0);

        /* Map it into the kernel virtual address space */
        vstart = 0;
        error = uvm_map(kernel_map, &vstart, vsize, uobj, 0, 0,
            UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE,
            UVM_ADV_RANDOM, 0));
        if (error) {
                device_printf(sc->sc_dev, "uvm_map failed: errno=%d\n", error);
                uao_detach(uobj);        /* release reference */
                return error;
        }

        error = uvm_map_pageable(kernel_map, vstart, vstart + vsize,
            false, 0);
        if (error) {
                device_printf(sc->sc_dev, "uvm_map_pageable failed: errno=%d\n",
                    error);
                goto abort;
        }

        error = audio_exlock_mutex_enter(sc);
        if (error)
                goto abort;

        /*
         * mmap() will start playing immediately.  XXX Maybe we lack API...
         * If no one has played yet, start pmixer here.
         */
        if (sc->sc_pbusy == false)
                audio_pmixer_start(sc, true);
        audio_exlock_mutex_exit(sc);

        /* Finally, replace the usrbuf from kmem to uvm. */
        audio_track_lock_enter(track);
        kmem_free(track->usrbuf.mem, track->usrbuf_allocsize);
        track->usrbuf.mem = (void *)vstart;
        track->usrbuf_allocsize = vsize;
        memset(track->usrbuf.mem, 0, vsize);
        track->mmapped = true;
        audio_track_lock_exit(track);

        /* Acquire a reference for the mmap.  munmap will release. */
        uao_reference(uobj);
        *uobjp = uobj;
        *maxprotp = prot;
        *advicep = UVM_ADV_RANDOM;
        *flagsp = MAP_SHARED;

        return 0;

abort:
        uvm_unmap(kernel_map, vstart, vstart + vsize);
        /* uvm_unmap also detach uobj */
        return error;
}

/*
 * /dev/audioctl has to be able to open at any time without interference
 * with any /dev/audio or /dev/sound.
 * Must be called with sc_exlock held and without sc_lock held.
 */
static int
audioctl_open(dev_t dev, struct audio_softc *sc, int flags, int ifmt,
        struct lwp *l)
{
        struct file *fp;
        audio_file_t *af;
        int fd;
        int error;

        KASSERT(sc->sc_exlock);

        TRACE(1, "called");

        error = fd_allocfile(&fp, &fd);
        if (error)
                return error;

        af = kmem_zalloc(sizeof(*af), KM_SLEEP);
        af->sc = sc;
        af->dev = dev;

        mutex_enter(sc->sc_lock);
        if (sc->sc_dying) {
                mutex_exit(sc->sc_lock);
                kmem_free(af, sizeof(*af));
                fd_abort(curproc, fp, fd);
                return ENXIO;
        }
        mutex_enter(sc->sc_intr_lock);
        SLIST_INSERT_HEAD(&sc->sc_files, af, entry);
        mutex_exit(sc->sc_intr_lock);
        mutex_exit(sc->sc_lock);

        error = fd_clone(fp, fd, flags, &audio_fileops, af);
        KASSERTMSG(error == EMOVEFD, "error=%d", error);

        return error;
}

/*
 * Free 'mem' if available, and initialize the pointer.
 * For this reason, this is implemented as macro.
 */
#define audio_free(mem)        do {        \
        if (mem != NULL) {        \
                kern_free(mem);        \
                mem = NULL;        \
        }        \
} while (0)

/*
 * (Re)allocate 'memblock' with specified 'bytes'.
 * bytes must not be 0.
 * This function never returns NULL.
 */
static void *
audio_realloc(void *memblock, size_t bytes)
{

        KASSERT(bytes != 0);
        if (memblock)
                kern_free(memblock);
        return kern_malloc(bytes, M_WAITOK);
}

/*
 * Free usrbuf (if available).
 */
static void
audio_free_usrbuf(audio_track_t *track)
{
        vaddr_t vstart;
        vsize_t vsize;

        if (track->usrbuf_allocsize != 0) {
                if (track->mmapped) {
                        /*
                         * Unmap the kernel mapping.  uvm_unmap releases the
                         * reference to the uvm object, and this should be the
                         * last virtual mapping of the uvm object, so no need
                         * to explicitly release (`detach') the object.
                         */
                        vstart = (vaddr_t)track->usrbuf.mem;
                        vsize = track->usrbuf_allocsize;
                        uvm_unmap(kernel_map, vstart, vstart + vsize);
                        track->mmapped = false;
                } else {
                        kmem_free(track->usrbuf.mem, track->usrbuf_allocsize);
                }
        }
        track->usrbuf.mem = NULL;
        track->usrbuf.capacity = 0;
        track->usrbuf_allocsize = 0;
}

/*
 * This filter changes the volume for each channel.
 * arg->context points track->ch_volume[].
 */
static void
audio_track_chvol(audio_filter_arg_t *arg)
{
        int16_t *ch_volume;
        const aint_t *s;
        aint_t *d;
        u_int i;
        u_int ch;
        u_int channels;

        DIAGNOSTIC_filter_arg(arg);
        KASSERTMSG(arg->srcfmt->channels == arg->dstfmt->channels,
            "arg->srcfmt->channels=%d, arg->dstfmt->channels=%d",
            arg->srcfmt->channels, arg->dstfmt->channels);
        KASSERT(arg->context != NULL);
        KASSERTMSG(arg->srcfmt->channels <= AUDIO_MAX_CHANNELS,
            "arg->srcfmt->channels=%d", arg->srcfmt->channels);

        s = arg->src;
        d = arg->dst;
        ch_volume = arg->context;

        channels = arg->srcfmt->channels;
        for (i = 0; i < arg->count; i++) {
                for (ch = 0; ch < channels; ch++) {
                        aint2_t val;
                        val = *s++;
                        val = AUDIO_SCALEDOWN(val * ch_volume[ch], 8);
                        *d++ = (aint_t)val;
                }
        }
}

/*
 * This filter performs conversion from stereo (or more channels) to mono.
 */
static void
audio_track_chmix_mixLR(audio_filter_arg_t *arg)
{
        const aint_t *s;
        aint_t *d;
        u_int i;

        DIAGNOSTIC_filter_arg(arg);

        s = arg->src;
        d = arg->dst;

        for (i = 0; i < arg->count; i++) {
                *d++ = AUDIO_SCALEDOWN(s[0], 1) + AUDIO_SCALEDOWN(s[1], 1);
                s += arg->srcfmt->channels;
        }
}

/*
 * This filter performs conversion from mono to stereo (or more channels).
 */
static void
audio_track_chmix_dupLR(audio_filter_arg_t *arg)
{
        const aint_t *s;
        aint_t *d;
        u_int i;
        u_int ch;
        u_int dstchannels;

        DIAGNOSTIC_filter_arg(arg);

        s = arg->src;
        d = arg->dst;
        dstchannels = arg->dstfmt->channels;

        for (i = 0; i < arg->count; i++) {
                d[0] = s[0];
                d[1] = s[0];
                s++;
                d += dstchannels;
        }
        if (dstchannels > 2) {
                d = arg->dst;
                for (i = 0; i < arg->count; i++) {
                        for (ch = 2; ch < dstchannels; ch++) {
                                d[ch] = 0;
                        }
                        d += dstchannels;
                }
        }
}

/*
 * This filter shrinks M channels into N channels.
 * Extra channels are discarded.
 */
static void
audio_track_chmix_shrink(audio_filter_arg_t *arg)
{
        const aint_t *s;
        aint_t *d;
        u_int i;
        u_int ch;

        DIAGNOSTIC_filter_arg(arg);

        s = arg->src;
        d = arg->dst;

        for (i = 0; i < arg->count; i++) {
                for (ch = 0; ch < arg->dstfmt->channels; ch++) {
                        *d++ = s[ch];
                }
                s += arg->srcfmt->channels;
        }
}

/*
 * This filter expands M channels into N channels.
 * Silence is inserted for missing channels.
 */
static void
audio_track_chmix_expand(audio_filter_arg_t *arg)
{
        const aint_t *s;
        aint_t *d;
        u_int i;
        u_int ch;
        u_int srcchannels;
        u_int dstchannels;

        DIAGNOSTIC_filter_arg(arg);

        s = arg->src;
        d = arg->dst;

        srcchannels = arg->srcfmt->channels;
        dstchannels = arg->dstfmt->channels;
        for (i = 0; i < arg->count; i++) {
                for (ch = 0; ch < srcchannels; ch++) {
                        *d++ = *s++;
                }
                for (; ch < dstchannels; ch++) {
                        *d++ = 0;
                }
        }
}

/*
 * This filter performs frequency conversion (up sampling).
 * It uses linear interpolation.
 */
static void
audio_track_freq_up(audio_filter_arg_t *arg)
{
        audio_track_t *track;
        audio_ring_t *src;
        audio_ring_t *dst;
        const aint_t *s;
        aint_t *d;
        aint_t prev[AUDIO_MAX_CHANNELS];
        aint_t curr[AUDIO_MAX_CHANNELS];
        aint_t grad[AUDIO_MAX_CHANNELS];
        u_int i;
        u_int t;
        u_int step;
        u_int channels;
        u_int ch;
        int srcused;

        track = arg->context;
        KASSERT(track);
        src = &track->freq.srcbuf;
        dst = track->freq.dst;
        DIAGNOSTIC_ring(dst);
        DIAGNOSTIC_ring(src);
        KASSERT(src->used > 0);
        KASSERTMSG(src->fmt.channels == dst->fmt.channels,
            "src->fmt.channels=%d dst->fmt.channels=%d",
            src->fmt.channels, dst->fmt.channels);
        KASSERTMSG(src->head % track->mixer->frames_per_block == 0,
            "src->head=%d track->mixer->frames_per_block=%d",
            src->head, track->mixer->frames_per_block);

        s = arg->src;
        d = arg->dst;

        /*
         * In order to facilitate interpolation for each block, slide (delay)
         * input by one sample.  As a result, strictly speaking, the output
         * phase is delayed by 1/dstfreq.  However, I believe there is no
         * observable impact.
         *
         * Example)
         * srcfreq:dstfreq = 1:3
         *
         *  A - -
         *  |
         *  |
         *  |     B - -
         *  +-----+-----> input timeframe
         *  0     1
         *
         *  0     1
         *  +-----+-----> input timeframe
         *  |     A
         *  |   x   x
         *  | x       x
         *  x          (B)
         *  +-+-+-+-+-+-> output timeframe
         *  0 1 2 3 4 5
         */

        /* Last samples in previous block */
        channels = src->fmt.channels;
        for (ch = 0; ch < channels; ch++) {
                prev[ch] = track->freq_prev[ch];
                curr[ch] = track->freq_curr[ch];
                grad[ch] = curr[ch] - prev[ch];
        }

        step = track->freq_step;
        t = track->freq_current;
//#define FREQ_DEBUG
#if defined(FREQ_DEBUG)
#define PRINTF(fmt...)        printf(fmt)
#else
#define PRINTF(fmt...)        do { } while (0)
#endif
        srcused = src->used;
        PRINTF("upstart step=%d leap=%d", step, track->freq_leap);
        PRINTF(" srcused=%d arg->count=%u", src->used, arg->count);
        PRINTF(" prev=%d curr=%d grad=%d", prev[0], curr[0], grad[0]);
        PRINTF(" t=%d\n", t);

        for (i = 0; i < arg->count; i++) {
                PRINTF("i=%d t=%5d", i, t);
                if (t >= 65536) {
                        for (ch = 0; ch < channels; ch++) {
                                prev[ch] = curr[ch];
                                curr[ch] = *s++;
                                grad[ch] = curr[ch] - prev[ch];
                        }
                        PRINTF(" prev=%d s[%d]=%d",
                            prev[0], src->used - srcused, curr[0]);

                        /* Update */
                        t -= 65536;
                        srcused--;
                        if (srcused < 0) {
                                PRINTF(" break\n");
                                break;
                        }
                }

                for (ch = 0; ch < channels; ch++) {
                        *d++ = prev[ch] + (aint2_t)grad[ch] * t / 65536;
#if defined(FREQ_DEBUG)
                        if (ch == 0)
                                printf(" t=%5d *d=%d", t, d[-1]);
#endif
                }
                t += step;

                PRINTF("\n");
        }
        PRINTF("end prev=%d curr=%d\n", prev[0], curr[0]);

        auring_take(src, src->used);
        auring_push(dst, i);

        /* Adjust */
        t += track->freq_leap;

        track->freq_current = t;
        for (ch = 0; ch < channels; ch++) {
                track->freq_prev[ch] = prev[ch];
                track->freq_curr[ch] = curr[ch];
        }
}

/*
 * This filter performs frequency conversion (down sampling).
 * It uses simple thinning.
 */
static void
audio_track_freq_down(audio_filter_arg_t *arg)
{
        audio_track_t *track;
        audio_ring_t *src;
        audio_ring_t *dst;
        const aint_t *s0;
        aint_t *d;
        u_int i;
        u_int t;
        u_int step;
        u_int ch;
        u_int channels;

        track = arg->context;
        KASSERT(track);
        src = &track->freq.srcbuf;
        dst = track->freq.dst;

        DIAGNOSTIC_ring(dst);
        DIAGNOSTIC_ring(src);
        KASSERT(src->used > 0);
        KASSERTMSG(src->fmt.channels == dst->fmt.channels,
            "src->fmt.channels=%d dst->fmt.channels=%d",
            src->fmt.channels, dst->fmt.channels);
        KASSERTMSG(src->head % track->mixer->frames_per_block == 0,
            "src->head=%d track->mixer->frames_per_block=%d",
            src->head, track->mixer->frames_per_block);

        s0 = arg->src;
        d = arg->dst;
        t = track->freq_current;
        step = track->freq_step;
        channels = dst->fmt.channels;
        PRINTF("downstart step=%d leap=%d", step, track->freq_leap);
        PRINTF(" srcused=%d arg->count=%u", src->used, arg->count);
        PRINTF(" t=%d\n", t);

        for (i = 0; i < arg->count && t / 65536 < src->used; i++) {
                const aint_t *s;
                PRINTF("i=%4d t=%10d", i, t);
                s = s0 + (t / 65536) * channels;
                PRINTF(" s=%5ld", (s - s0) / channels);
                for (ch = 0; ch < channels; ch++) {
                        if (ch == 0) PRINTF(" *s=%d", s[ch]);
                        *d++ = s[ch];
                }
                PRINTF("\n");
                t += step;
        }
        t += track->freq_leap;
        PRINTF("end t=%d\n", t);
        auring_take(src, src->used);
        auring_push(dst, i);
        track->freq_current = t % 65536;
}

/*
 * Creates track and returns it.
 * Must be called without sc_lock held.
 */
audio_track_t *
audio_track_create(struct audio_softc *sc, audio_trackmixer_t *mixer)
{
        audio_track_t *track;
        static int newid = 0;

        track = kmem_zalloc(sizeof(*track), KM_SLEEP);

        track->id = newid++;
        track->mixer = mixer;
        track->mode = mixer->mode;

        /* Do TRACE after id is assigned. */
        TRACET(3, track, "for %s",
            mixer->mode == AUMODE_PLAY ? "playback" : "recording");

#if defined(AUDIO_SUPPORT_TRACK_VOLUME)
        track->volume = 256;
#endif
        for (int i = 0; i < AUDIO_MAX_CHANNELS; i++) {
                track->ch_volume[i] = 256;
        }

        return track;
}

/*
 * Release all resources of the track and track itself.
 * track must not be NULL.  Don't specify the track within the file
 * structure linked from sc->sc_files.
 */
static void
audio_track_destroy(audio_track_t *track)
{

        KASSERT(track);

        audio_free_usrbuf(track);
        audio_free(track->codec.srcbuf.mem);
        audio_free(track->chvol.srcbuf.mem);
        audio_free(track->chmix.srcbuf.mem);
        audio_free(track->freq.srcbuf.mem);
        audio_free(track->outbuf.mem);

        kmem_free(track, sizeof(*track));
}

/*
 * It returns encoding conversion filter according to src and dst format.
 * If it is not a convertible pair, it returns NULL.  Either src or dst
 * must be internal format.
 */
static audio_filter_t
audio_track_get_codec(audio_track_t *track, const audio_format2_t *src,
        const audio_format2_t *dst)
{

        if (audio_format2_is_internal(src)) {
                if (dst->encoding == AUDIO_ENCODING_ULAW) {
                        return audio_internal_to_mulaw;
                } else if (dst->encoding == AUDIO_ENCODING_ALAW) {
                        return audio_internal_to_alaw;
                } else if (audio_format2_is_linear(dst)) {
                        switch (dst->stride) {
                        case 8:
                                return audio_internal_to_linear8;
                        case 16:
                                return audio_internal_to_linear16;
#if defined(AUDIO_SUPPORT_LINEAR24)
                        case 24:
                                return audio_internal_to_linear24;
#endif
                        case 32:
                                return audio_internal_to_linear32;
                        default:
                                TRACET(1, track, "unsupported %s stride %d",
                                    "dst", dst->stride);
                                goto abort;
                        }
                }
        } else if (audio_format2_is_internal(dst)) {
                if (src->encoding == AUDIO_ENCODING_ULAW) {
                        return audio_mulaw_to_internal;
                } else if (src->encoding == AUDIO_ENCODING_ALAW) {
                        return audio_alaw_to_internal;
                } else if (audio_format2_is_linear(src)) {
                        switch (src->stride) {
                        case 8:
                                return audio_linear8_to_internal;
                        case 16:
                                return audio_linear16_to_internal;
#if defined(AUDIO_SUPPORT_LINEAR24)
                        case 24:
                                return audio_linear24_to_internal;
#endif
                        case 32:
                                return audio_linear32_to_internal;
                        default:
                                TRACET(1, track, "unsupported %s stride %d",
                                    "src", src->stride);
                                goto abort;
                        }
                }
        }

        TRACET(1, track, "unsupported encoding");
abort:
#if defined(AUDIO_DEBUG)
        if (audiodebug >= 2) {
                char buf[100];
                audio_format2_tostr(buf, sizeof(buf), src);
                TRACET(2, track, "src %s", buf);
                audio_format2_tostr(buf, sizeof(buf), dst);
                TRACET(2, track, "dst %s", buf);
        }
#endif
        return NULL;
}

/*
 * Initialize the codec stage of this track as necessary.
 * If successful, it initializes the codec stage as necessary, stores updated
 * last_dst in *last_dstp in any case, and returns 0.
 * Otherwise, it returns errno without modifying *last_dstp.
 */
static int
audio_track_init_codec(audio_track_t *track, audio_ring_t **last_dstp)
{
        audio_ring_t *last_dst;
        audio_ring_t *srcbuf;
        audio_format2_t *srcfmt;
        audio_format2_t *dstfmt;
        audio_filter_arg_t *arg;
        u_int len;
        int error;

        KASSERT(track);

        last_dst = *last_dstp;
        dstfmt = &last_dst->fmt;
        srcfmt = &track->inputfmt;
        srcbuf = &track->codec.srcbuf;
        error = 0;

        if (srcfmt->encoding != dstfmt->encoding
         || srcfmt->precision != dstfmt->precision
         || srcfmt->stride != dstfmt->stride) {
                track->codec.dst = last_dst;

                srcbuf->fmt = *dstfmt;
                srcbuf->fmt.encoding = srcfmt->encoding;
                srcbuf->fmt.precision = srcfmt->precision;
                srcbuf->fmt.stride = srcfmt->stride;

                track->codec.filter = audio_track_get_codec(track,
                    &srcbuf->fmt, dstfmt);
                if (track->codec.filter == NULL) {
                        error = EINVAL;
                        goto abort;
                }

                srcbuf->head = 0;
                srcbuf->used = 0;
                srcbuf->capacity = frame_per_block(track->mixer, &srcbuf->fmt);
                len = auring_bytelen(srcbuf);
                srcbuf->mem = audio_realloc(srcbuf->mem, len);

                arg = &track->codec.arg;
                arg->srcfmt = &srcbuf->fmt;
                arg->dstfmt = dstfmt;
                arg->context = NULL;

                *last_dstp = srcbuf;
                return 0;
        }

abort:
        track->codec.filter = NULL;
        audio_free(srcbuf->mem);
        return error;
}

/*
 * Initialize the chvol stage of this track as necessary.
 * If successful, it initializes the chvol stage as necessary, stores updated
 * last_dst in *last_dstp in any case, and returns 0.
 * Otherwise, it returns errno without modifying *last_dstp.
 */
static int
audio_track_init_chvol(audio_track_t *track, audio_ring_t **last_dstp)
{
        audio_ring_t *last_dst;
        audio_ring_t *srcbuf;
        audio_format2_t *srcfmt;
        audio_format2_t *dstfmt;
        audio_filter_arg_t *arg;
        u_int len;
        int error;

        KASSERT(track);

        last_dst = *last_dstp;
        dstfmt = &last_dst->fmt;
        srcfmt = &track->inputfmt;
        srcbuf = &track->chvol.srcbuf;
        error = 0;

        /* Check whether channel volume conversion is necessary. */
        bool use_chvol = false;
        for (int ch = 0; ch < srcfmt->channels; ch++) {
                if (track->ch_volume[ch] != 256) {
                        use_chvol = true;
                        break;
                }
        }

        if (use_chvol == true) {
                track->chvol.dst = last_dst;
                track->chvol.filter = audio_track_chvol;

                srcbuf->fmt = *dstfmt;
                /* no format conversion occurs */

                srcbuf->head = 0;
                srcbuf->used = 0;
                srcbuf->capacity = frame_per_block(track->mixer, &srcbuf->fmt);
                len = auring_bytelen(srcbuf);
                srcbuf->mem = audio_realloc(srcbuf->mem, len);

                arg = &track->chvol.arg;
                arg->srcfmt = &srcbuf->fmt;
                arg->dstfmt = dstfmt;
                arg->context = track->ch_volume;

                *last_dstp = srcbuf;
                return 0;
        }

        track->chvol.filter = NULL;
        audio_free(srcbuf->mem);
        return error;
}

/*
 * Initialize the chmix stage of this track as necessary.
 * If successful, it initializes the chmix stage as necessary, stores updated
 * last_dst in *last_dstp in any case, and returns 0.
 * Otherwise, it returns errno without modifying *last_dstp.
 */
static int
audio_track_init_chmix(audio_track_t *track, audio_ring_t **last_dstp)
{
        audio_ring_t *last_dst;
        audio_ring_t *srcbuf;
        audio_format2_t *srcfmt;
        audio_format2_t *dstfmt;
        audio_filter_arg_t *arg;
        u_int srcch;
        u_int dstch;
        u_int len;
        int error;

        KASSERT(track);

        last_dst = *last_dstp;
        dstfmt = &last_dst->fmt;
        srcfmt = &track->inputfmt;
        srcbuf = &track->chmix.srcbuf;
        error = 0;

        srcch = srcfmt->channels;
        dstch = dstfmt->channels;
        if (srcch != dstch) {
                track->chmix.dst = last_dst;

                if (srcch >= 2 && dstch == 1) {
                        track->chmix.filter = audio_track_chmix_mixLR;
                } else if (srcch == 1 && dstch >= 2) {
                        track->chmix.filter = audio_track_chmix_dupLR;
                } else if (srcch > dstch) {
                        track->chmix.filter = audio_track_chmix_shrink;
                } else {
                        track->chmix.filter = audio_track_chmix_expand;
                }

                srcbuf->fmt = *dstfmt;
                srcbuf->fmt.channels = srcch;

                srcbuf->head = 0;
                srcbuf->used = 0;
                /* XXX The buffer size should be able to calculate. */
                srcbuf->capacity = frame_per_block(track->mixer, &srcbuf->fmt);
                len = auring_bytelen(srcbuf);
                srcbuf->mem = audio_realloc(srcbuf->mem, len);

                arg = &track->chmix.arg;
                arg->srcfmt = &srcbuf->fmt;
                arg->dstfmt = dstfmt;
                arg->context = NULL;

                *last_dstp = srcbuf;
                return 0;
        }

        track->chmix.filter = NULL;
        audio_free(srcbuf->mem);
        return error;
}

/*
 * Initialize the freq stage of this track as necessary.
 * If successful, it initializes the freq stage as necessary, stores updated
 * last_dst in *last_dstp in any case, and returns 0.
 * Otherwise, it returns errno without modifying *last_dstp.
 */
static int
audio_track_init_freq(audio_track_t *track, audio_ring_t **last_dstp)
{
        audio_ring_t *last_dst;
        audio_ring_t *srcbuf;
        audio_format2_t *srcfmt;
        audio_format2_t *dstfmt;
        audio_filter_arg_t *arg;
        uint32_t srcfreq;
        uint32_t dstfreq;
        u_int dst_capacity;
        u_int mod;
        u_int len;
        int error;

        KASSERT(track);

        last_dst = *last_dstp;
        dstfmt = &last_dst->fmt;
        srcfmt = &track->inputfmt;
        srcbuf = &track->freq.srcbuf;
        error = 0;

        srcfreq = srcfmt->sample_rate;
        dstfreq = dstfmt->sample_rate;
        if (srcfreq != dstfreq) {
                track->freq.dst = last_dst;

                memset(track->freq_prev, 0, sizeof(track->freq_prev));
                memset(track->freq_curr, 0, sizeof(track->freq_curr));

                /* freq_step is the ratio of src/dst when let dst 65536. */
                track->freq_step = (uint64_t)srcfreq * 65536 / dstfreq;

                dst_capacity = frame_per_block(track->mixer, dstfmt);
                mod = (uint64_t)srcfreq * 65536 % dstfreq;
                track->freq_leap = (mod * dst_capacity + dstfreq / 2) / dstfreq;

                if (track->freq_step < 65536) {
                        track->freq.filter = audio_track_freq_up;
                        /* In order to carry at the first time. */
                        track->freq_current = 65536;
                } else {
                        track->freq.filter = audio_track_freq_down;
                        track->freq_current = 0;
                }

                srcbuf->fmt = *dstfmt;
                srcbuf->fmt.sample_rate = srcfreq;

                srcbuf->head = 0;
                srcbuf->used = 0;
                srcbuf->capacity = frame_per_block(track->mixer, &srcbuf->fmt);
                len = auring_bytelen(srcbuf);
                srcbuf->mem = audio_realloc(srcbuf->mem, len);

                arg = &track->freq.arg;
                arg->srcfmt = &srcbuf->fmt;
                arg->dstfmt = dstfmt;
                arg->context = track;

                *last_dstp = srcbuf;
                return 0;
        }

        track->freq.filter = NULL;
        audio_free(srcbuf->mem);
        return error;
}

/*
 * There are two unit of buffers; A block buffer and a byte buffer.  Both use
 * audio_ring_t.  Internally, audio data is always handled in block unit.
 * Converting format, sythesizing tracks, transferring from/to the hardware,
 * and etc.  Only one exception is usrbuf.  To transfer with userland, usrbuf
 * is buffered in byte unit.
 * For playing back, write(2) writes arbitrary length of data to usrbuf.
 * When one block is filled, it is sent to the next stage (converting and/or
 * synthesizing).
 * For recording, the rmixer writes one block length of data to input buffer
 * (the bottom stage buffer) each time.  read(2) (converts one block if usrbuf
 * is empty and then) reads arbitrary length of data from usrbuf.
 *
 * The following charts show the data flow and buffer types for playback and
 * recording track.  In this example, both have two conversion stages, codec
 * and freq.  Every [**] represents a buffer described below.
 *
 * On playback track:
 *
 *               write(2)
 *                |
 *                | uiomove
 *                v
 *  usrbuf       [BB|BB ... BB|BB]     .. Byte ring buffer
 *                |
 *                | memcpy one block
 *                v
 *  codec.srcbuf [FF]                  .. 1 block (ring) buffer
 *       .dst ----+
 *                |
 *                | convert
 *                v
 *  freq.srcbuf  [FF]                  .. 1 block (ring) buffer
 *      .dst  ----+
 *                |
 *                | convert
 *                v
 *  outbuf       [FF|FF|FF|FF]         .. NBLKOUT blocks ring buffer
 *                |
 *                v
 *               pmixer
 *
 * There are three different types of buffers:
 *
 *  [BB|BB ... BB|BB]  usrbuf.  Is the buffer closest to userland.  Mandatory.
 *                     This is a byte buffer and its length is basically less
 *                     than or equal to 64KB or at least AUMINNOBLK blocks.
 *
 *  [FF]               Interim conversion stage's srcbuf if necessary.
 *                     This is one block (ring) buffer counted in frames.
 *
 *  [FF|FF|FF|FF]      outbuf.  Is the buffer closest to pmixer.  Mandatory.
 *                     This is NBLKOUT blocks ring buffer counted in frames.
 *
 *
 * On recording track:
 *
 *               read(2)
 *                ^
 *                | uiomove
 *                |
 *  usrbuf       [BB]                  .. Byte (ring) buffer
 *                ^
 *                | memcpy one block
 *                |
 *  outbuf       [FF]                  .. 1 block (ring) buffer
 *                ^
 *                | convert
 *                |
 *  codec.dst ----+
 *       .srcbuf [FF]                  .. 1 block (ring) buffer
 *                ^
 *                | convert
 *                |
 *  freq.dst  ----+
 *      .srcbuf  [FF|FF ... FF|FF]     .. NBLKIN blocks ring buffer
 *                ^
 *                |
 *               rmixer
 *
 * There are also three different types of buffers.
 *
 *  [BB]               usrbuf.  Is the buffer closest to userland.  Mandatory.
 *                     This is a byte buffer and its length is one block.
 *                     This buffer holds only "fragment".
 *
 *  [FF]               Interim conversion stage's srcbuf (or outbuf).
 *                     This is one block (ring) buffer counted in frames.
 *
 *  [FF|FF ... FF|FF]  The bottom conversion stage's srcbuf (or outbuf).
 *                     This is the buffer closest to rmixer, and mandatory.
 *                     This is NBLKIN blocks ring buffer counted in frames.
 *                     Also pointed by *input.
 */

/*
 * Set the userland format of this track.
 * usrfmt argument should have been previously verified by
 * audio_track_setinfo_check().
 * This function may release and reallocate all internal conversion buffers.
 * It returns 0 if successful.  Otherwise it returns errno with clearing all
 * internal buffers.
 * It must be called without sc_intr_lock since uvm_* routines require non
 * intr_lock state.
 * It must be called with track lock held since it may release and reallocate
 * outbuf.
 */
static int
audio_track_set_format(audio_track_t *track, audio_format2_t *usrfmt)
{
        audio_ring_t *last_dst;
        int is_playback;
        u_int newbufsize;
        u_int newvsize;
        u_int len;
        int error;

        KASSERT(track);

        is_playback = audio_track_is_playback(track);

        /* Once mmap is called, the track format cannot be changed. */
        if (track->mmapped)
                return EIO;

        /* usrbuf is the closest buffer to the userland. */
        track->usrbuf.fmt = *usrfmt;

        /*
         * Usrbuf.
         * On the playback track, its capacity is less than or equal to 64KB
         * (for historical reason) and must be a multiple of a block
         * (constraint in this implementation).  But at least AUMINNOBLK
         * blocks.
         * On the recording track, its capacity is one block.
         */
        /*
         * For references, one block size (in 40msec) is:
         *  320 bytes    = 204 blocks/64KB for mulaw/8kHz/1ch
         *  7680 bytes   = 8 blocks/64KB for s16/48kHz/2ch
         *  30720 bytes  = 90 KB/3blocks for s16/48kHz/8ch
         *  61440 bytes  = 180 KB/3blocks for s16/96kHz/8ch
         *  245760 bytes = 720 KB/3blocks for s32/192kHz/8ch
         *
         * For example,
         * 1) If usrbuf_blksize = 7056 (s16/44.1k/2ch) and PAGE_SIZE = 8192,
         *     newbufsize = rounddown(65536 / 7056) = 63504
         *     newvsize = roundup2(63504, PAGE_SIZE) = 65536
         *    Therefore it maps 8 * 8K pages and usrbuf->capacity = 63504.
         *
         * 2) If usrbuf_blksize = 7680 (s16/48k/2ch) and PAGE_SIZE = 4096,
         *     newbufsize = rounddown(65536 / 7680) = 61440
         *     newvsize = roundup2(61440, PAGE_SIZE) = 61440 (= 15 pages)
         *    Therefore it maps 15 * 4K pages and usrbuf->capacity = 61440.
         */
        track->usrbuf_blksize = frametobyte(&track->usrbuf.fmt,
            frame_per_block(track->mixer, &track->usrbuf.fmt));
        track->usrbuf.head = 0;
        track->usrbuf.used = 0;
        if (is_playback) {
                newbufsize = track->usrbuf_blksize * AUMINNOBLK;
                if (newbufsize < 65536)
                        newbufsize = rounddown(65536, track->usrbuf_blksize);
                newvsize = roundup2(newbufsize, PAGE_SIZE);
        } else {
                newbufsize = track->usrbuf_blksize;
                newvsize = track->usrbuf_blksize;
        }
        /*
         * Reallocate only if the number of pages changes.
         * This is because we expect kmem to allocate memory on per page
         * basis if the request size is about 64KB.
         */
        if (newvsize != track->usrbuf_allocsize) {
                if (track->usrbuf_allocsize != 0) {
                        kmem_free(track->usrbuf.mem, track->usrbuf_allocsize);
                }
                TRACET(2, track, "usrbuf_allocsize %d -> %d",
                    track->usrbuf_allocsize, newvsize);
                track->usrbuf.mem = kmem_alloc(newvsize, KM_SLEEP);
                track->usrbuf_allocsize = newvsize;
        }
        track->usrbuf.capacity = newbufsize;

        /* Recalc water mark. */
        if (is_playback) {
                /* Set high at 100%, low at 75%. */
                track->usrbuf_usedhigh = track->usrbuf.capacity;
                track->usrbuf_usedlow = track->usrbuf.capacity * 3 / 4;
        } else {
                /* Set high at 100%, low at 0%. (But not used) */
                track->usrbuf_usedhigh = track->usrbuf.capacity;
                track->usrbuf_usedlow = 0;
        }

        /* Stage buffer */
        last_dst = &track->outbuf;
        if (is_playback) {
                /* On playback, initialize from the mixer side in order. */
                track->inputfmt = *usrfmt;
                track->outbuf.fmt =  track->mixer->track_fmt;

                if ((error = audio_track_init_freq(track, &last_dst)) != 0)
                        goto error;
                if ((error = audio_track_init_chmix(track, &last_dst)) != 0)
                        goto error;
                if ((error = audio_track_init_chvol(track, &last_dst)) != 0)
                        goto error;
                if ((error = audio_track_init_codec(track, &last_dst)) != 0)
                        goto error;
        } else {
                /* On recording, initialize from userland side in order. */
                track->inputfmt = track->mixer->track_fmt;
                track->outbuf.fmt = *usrfmt;

                if ((error = audio_track_init_codec(track, &last_dst)) != 0)
                        goto error;
                if ((error = audio_track_init_chvol(track, &last_dst)) != 0)
                        goto error;
                if ((error = audio_track_init_chmix(track, &last_dst)) != 0)
                        goto error;
                if ((error = audio_track_init_freq(track, &last_dst)) != 0)
                        goto error;
        }
#if 0
        /* debug */
        if (track->freq.filter) {
                audio_print_format2("freq src", &track->freq.srcbuf.fmt);
                audio_print_format2("freq dst", &track->freq.dst->fmt);
        }
        if (track->chmix.filter) {
                audio_print_format2("chmix src", &track->chmix.srcbuf.fmt);
                audio_print_format2("chmix dst", &track->chmix.dst->fmt);
        }
        if (track->chvol.filter) {
                audio_print_format2("chvol src", &track->chvol.srcbuf.fmt);
                audio_print_format2("chvol dst", &track->chvol.dst->fmt);
        }
        if (track->codec.filter) {
                audio_print_format2("codec src", &track->codec.srcbuf.fmt);
                audio_print_format2("codec dst", &track->codec.dst->fmt);
        }
#endif

        /* Stage input buffer */
        track->input = last_dst;

        /*
         * Output buffer.
         * On the playback track, its capacity is NBLKOUT blocks.
         * On the recording track, its capacity is 1 block.
         */
        track->outbuf.head = 0;
        track->outbuf.used = 0;
        track->outbuf.capacity = frame_per_block(track->mixer,
            &track->outbuf.fmt);
        if (is_playback)
                track->outbuf.capacity *= NBLKOUT;
        len = auring_bytelen(&track->outbuf);
        track->outbuf.mem = audio_realloc(track->outbuf.mem, len);

        /*
         * On the recording track, expand the input stage buffer, which is
         * the closest buffer to rmixer, to NBLKIN blocks.
         * Note that input buffer may point to outbuf.
         */
        if (!is_playback) {
                int input_fpb;

                input_fpb = frame_per_block(track->mixer, &track->input->fmt);
                track->input->capacity = input_fpb * NBLKIN;
                len = auring_bytelen(track->input);
                track->input->mem = audio_realloc(track->input->mem, len);
        }

#if defined(AUDIO_DEBUG)
        if (audiodebug >= 3) {
                struct audio_track_debugbuf m;

                memset(&m, 0, sizeof(m));
                snprintf(m.outbuf, sizeof(m.outbuf), " out=%d",
                    track->outbuf.capacity * frametobyte(&track->outbuf.fmt,1));
                if (track->freq.filter)
                        snprintf(m.freq, sizeof(m.freq), " freq=%d",
                            track->freq.srcbuf.capacity *
                            frametobyte(&track->freq.srcbuf.fmt, 1));
                if (track->chmix.filter)
                        snprintf(m.chmix, sizeof(m.chmix), " chmix=%d",
                            track->chmix.srcbuf.capacity *
                            frametobyte(&track->chmix.srcbuf.fmt, 1));
                if (track->chvol.filter)
                        snprintf(m.chvol, sizeof(m.chvol), " chvol=%d",
                            track->chvol.srcbuf.capacity *
                            frametobyte(&track->chvol.srcbuf.fmt, 1));
                if (track->codec.filter)
                        snprintf(m.codec, sizeof(m.codec), " codec=%d",
                            track->codec.srcbuf.capacity *
                            frametobyte(&track->codec.srcbuf.fmt, 1));
                snprintf(m.usrbuf, sizeof(m.usrbuf),
                    " usr=%d", track->usrbuf.capacity);

                if (is_playback) {
                        TRACET(0, track, "bufsize%s%s%s%s%s%s",
                            m.outbuf, m.freq, m.chmix,
                            m.chvol, m.codec, m.usrbuf);
                } else {
                        TRACET(0, track, "bufsize%s%s%s%s%s%s",
                            m.freq, m.chmix, m.chvol,
                            m.codec, m.outbuf, m.usrbuf);
                }
        }
#endif
        return 0;

error:
        audio_free_usrbuf(track);
        audio_free(track->codec.srcbuf.mem);
        audio_free(track->chvol.srcbuf.mem);
        audio_free(track->chmix.srcbuf.mem);
        audio_free(track->freq.srcbuf.mem);
        audio_free(track->outbuf.mem);
        return error;
}

/*
 * Fill silence frames (as the internal format) up to 1 block
 * if the ring is not empty and less than 1 block.
 * It returns the number of appended frames.
 */
static int
audio_append_silence(audio_track_t *track, audio_ring_t *ring)
{
        int fpb;
        int n;

        KASSERT(track);
        KASSERT(audio_format2_is_internal(&ring->fmt));

        /* XXX is n correct? */
        /* XXX memset uses frametobyte()? */

        if (ring->used == 0)
                return 0;

        fpb = frame_per_block(track->mixer, &ring->fmt);
        if (ring->used >= fpb)
                return 0;

        n = (ring->capacity - ring->used) % fpb;

        KASSERTMSG(auring_get_contig_free(ring) >= n,
            "auring_get_contig_free(ring)=%d n=%d",
            auring_get_contig_free(ring), n);

        memset(auring_tailptr_aint(ring), 0,
            n * ring->fmt.channels * sizeof(aint_t));
        auring_push(ring, n);
        return n;
}

/*
 * Execute the conversion stage.
 * It prepares arg from this stage and executes stage->filter.
 * It must be called only if stage->filter is not NULL.
 *
 * For stages other than frequency conversion, the function increments
 * src and dst counters here.  For frequency conversion stage, on the
 * other hand, the function does not touch src and dst counters and
 * filter side has to increment them.
 */
static void
audio_apply_stage(audio_track_t *track, audio_stage_t *stage, bool isfreq)
{
        audio_filter_arg_t *arg;
        int srccount;
        int dstcount;
        int count;

        KASSERT(track);
        KASSERT(stage->filter);

        srccount = auring_get_contig_used(&stage->srcbuf);
        dstcount = auring_get_contig_free(stage->dst);

        if (isfreq) {
                KASSERTMSG(srccount > 0, "freq but srccount=%d", srccount);
                count = uimin(dstcount, track->mixer->frames_per_block);
        } else {
                count = uimin(srccount, dstcount);
        }

        if (count > 0) {
                arg = &stage->arg;
                arg->src = auring_headptr(&stage->srcbuf);
                arg->dst = auring_tailptr(stage->dst);
                arg->count = count;

                stage->filter(arg);

                if (!isfreq) {
                        auring_take(&stage->srcbuf, count);
                        auring_push(stage->dst, count);
                }
        }
}

/*
 * Produce output buffer for playback from user input buffer.
 * It must be called only if usrbuf is not empty and outbuf is
 * available at least one free block.
 */
static void
audio_track_play(audio_track_t *track)
{
        audio_ring_t *usrbuf;
        audio_ring_t *input;
        int count;
        int framesize;
        int bytes;

        KASSERT(track);
        KASSERT(track->lock);
        TRACET(4, track, "start pstate=%d", track->pstate);

        /* At this point usrbuf must not be empty. */
        KASSERT(track->usrbuf.used > 0);
        /* Also, outbuf must be available at least one block. */
        count = auring_get_contig_free(&track->outbuf);
        KASSERTMSG(count >= frame_per_block(track->mixer, &track->outbuf.fmt),
            "count=%d fpb=%d",
            count, frame_per_block(track->mixer, &track->outbuf.fmt));

        usrbuf = &track->usrbuf;
        input = track->input;

        /*
         * framesize is always 1 byte or more since all formats supported as
         * usrfmt(=input) have 8bit or more stride.
         */
        framesize = frametobyte(&input->fmt, 1);
        KASSERT(framesize >= 1);

        /* The next stage of usrbuf (=input) must be available. */
        KASSERT(auring_get_contig_free(input) > 0);

        /*
         * Copy usrbuf up to 1block to input buffer.
         * count is the number of frames to copy from usrbuf.
         * bytes is the number of bytes to copy from usrbuf.  However it is
         * not copied less than one frame.
         */
        count = uimin(usrbuf->used, track->usrbuf_blksize) / framesize;
        bytes = count * framesize;

        if (usrbuf->head + bytes < usrbuf->capacity) {
                memcpy((uint8_t *)input->mem + auring_tail(input) * framesize,
                    (uint8_t *)usrbuf->mem + usrbuf->head,
                    bytes);
                auring_push(input, count);
                auring_take(usrbuf, bytes);
        } else {
                int bytes1;
                int bytes2;

                bytes1 = auring_get_contig_used(usrbuf);
                KASSERTMSG(bytes1 % framesize == 0,
                    "bytes1=%d framesize=%d", bytes1, framesize);
                memcpy((uint8_t *)input->mem + auring_tail(input) * framesize,
                    (uint8_t *)usrbuf->mem + usrbuf->head,
                    bytes1);
                auring_push(input, bytes1 / framesize);
                auring_take(usrbuf, bytes1);

                bytes2 = bytes - bytes1;
                memcpy((uint8_t *)input->mem + auring_tail(input) * framesize,
                    (uint8_t *)usrbuf->mem + usrbuf->head,
                    bytes2);
                auring_push(input, bytes2 / framesize);
                auring_take(usrbuf, bytes2);
        }

        /* Encoding conversion */
        if (track->codec.filter)
                audio_apply_stage(track, &track->codec, false);

        /* Channel volume */
        if (track->chvol.filter)
                audio_apply_stage(track, &track->chvol, false);

        /* Channel mix */
        if (track->chmix.filter)
                audio_apply_stage(track, &track->chmix, false);

        /* Frequency conversion */
        /*
         * Since the frequency conversion needs correction for each block,
         * it rounds up to 1 block.
         */
        if (track->freq.filter) {
                int n;
                n = audio_append_silence(track, &track->freq.srcbuf);
                if (n > 0) {
                        TRACET(4, track,
                            "freq.srcbuf add silence %d -> %d/%d/%d",
                            n,
                            track->freq.srcbuf.head,
                            track->freq.srcbuf.used,
                            track->freq.srcbuf.capacity);
                }
                if (track->freq.srcbuf.used > 0) {
                        audio_apply_stage(track, &track->freq, true);
                }
        }

        if (bytes < track->usrbuf_blksize) {
                /*
                 * Clear all conversion buffer pointer if the conversion was
                 * not exactly one block.  These conversion stage buffers are
                 * certainly circular buffers because of symmetry with the
                 * previous and next stage buffer.  However, since they are
                 * treated as simple contiguous buffers in operation, so head
                 * always should point 0.  This may happen during drain-age.
                 */
                TRACET(4, track, "reset stage");
                if (track->codec.filter) {
                        KASSERT(track->codec.srcbuf.used == 0);
                        track->codec.srcbuf.head = 0;
                }
                if (track->chvol.filter) {
                        KASSERT(track->chvol.srcbuf.used == 0);
                        track->chvol.srcbuf.head = 0;
                }
                if (track->chmix.filter) {
                        KASSERT(track->chmix.srcbuf.used == 0);
                        track->chmix.srcbuf.head = 0;
                }
                if (track->freq.filter) {
                        KASSERT(track->freq.srcbuf.used == 0);
                        track->freq.srcbuf.head = 0;
                }
        }

        track->stamp++;

#if defined(AUDIO_DEBUG)
        if (audiodebug >= 3) {
                struct audio_track_debugbuf m;
                audio_track_bufstat(track, &m);
                TRACET(0, track, "end%s%s%s%s%s%s",
                    m.outbuf, m.freq, m.chvol, m.chmix, m.codec, m.usrbuf);
        }
#endif
}

/*
 * Produce user output buffer for recording from input buffer.
 */
static void
audio_track_record(audio_track_t *track)
{
        audio_ring_t *outbuf;
        audio_ring_t *usrbuf;
        int count;
        int bytes;
        int framesize;

        KASSERT(track);
        KASSERT(track->lock);

        if (auring_get_contig_used(track->input) == 0) {
                TRACET(4, track, "input->used == 0");
                return;
        }

        /* Frequency conversion */
        if (track->freq.filter) {
                if (track->freq.srcbuf.used > 0) {
                        audio_apply_stage(track, &track->freq, true);
                        /* XXX should input of freq be from beginning of buf? */
                }
        }

        /* Channel mix */
        if (track->chmix.filter)
                audio_apply_stage(track, &track->chmix, false);

        /* Channel volume */
        if (track->chvol.filter)
                audio_apply_stage(track, &track->chvol, false);

        /* Encoding conversion */
        if (track->codec.filter)
                audio_apply_stage(track, &track->codec, false);

        /* Copy outbuf to usrbuf */
        outbuf = &track->outbuf;
        usrbuf = &track->usrbuf;
        /* usrbuf should be empty. */
        KASSERT(usrbuf->used == 0);
        /*
         * framesize is always 1 byte or more since all formats supported
         * as usrfmt(=output) have 8bit or more stride.
         */
        framesize = frametobyte(&outbuf->fmt, 1);
        KASSERT(framesize >= 1);
        /*
         * count is the number of frames to copy to usrbuf.
         * bytes is the number of bytes to copy to usrbuf.
         */
        count = outbuf->used;
        count = uimin(count, track->usrbuf_blksize / framesize);
        bytes = count * framesize;
        if (auring_tail(usrbuf) + bytes < usrbuf->capacity) {
                memcpy((uint8_t *)usrbuf->mem + auring_tail(usrbuf),
                    (uint8_t *)outbuf->mem + outbuf->head * framesize,
                    bytes);
                auring_push(usrbuf, bytes);
                auring_take(outbuf, count);
        } else {
                int bytes1;
                int bytes2;

                bytes1 = auring_get_contig_free(usrbuf);
                KASSERTMSG(bytes1 % framesize == 0,
                    "bytes1=%d framesize=%d", bytes1, framesize);
                memcpy((uint8_t *)usrbuf->mem + auring_tail(usrbuf),
                    (uint8_t *)outbuf->mem + outbuf->head * framesize,
                    bytes1);
                auring_push(usrbuf, bytes1);
                auring_take(outbuf, bytes1 / framesize);

                bytes2 = bytes - bytes1;
                memcpy((uint8_t *)usrbuf->mem + auring_tail(usrbuf),
                    (uint8_t *)outbuf->mem + outbuf->head * framesize,
                    bytes2);
                auring_push(usrbuf, bytes2);
                auring_take(outbuf, bytes2 / framesize);
        }

#if defined(AUDIO_DEBUG)
        if (audiodebug >= 3) {
                struct audio_track_debugbuf m;
                audio_track_bufstat(track, &m);
                TRACET(0, track, "end%s%s%s%s%s%s",
                    m.freq, m.chvol, m.chmix, m.codec, m.outbuf, m.usrbuf);
        }
#endif
}

/*
 * Calculate blktime [msec] from mixer(.hwbuf.fmt).
 * Must be called with sc_exlock held.
 */
static u_int
audio_mixer_calc_blktime(struct audio_softc *sc, audio_trackmixer_t *mixer)
{
        audio_format2_t *fmt;
        u_int blktime;
        u_int frames_per_block;

        KASSERT(sc->sc_exlock);

        fmt = &mixer->hwbuf.fmt;
        blktime = sc->sc_blk_ms;

        /*
         * If stride is not multiples of 8, special treatment is necessary.
         * For now, it is only x68k's vs(4), 4 bit/sample ADPCM.
         */
        if (fmt->stride == 4) {
                frames_per_block = fmt->sample_rate * blktime / 1000;
                if ((frames_per_block & 1) != 0)
                        blktime *= 2;
        }
#ifdef DIAGNOSTIC
        else if (fmt->stride % NBBY != 0) {
                panic("unsupported HW stride %d", fmt->stride);
        }
#endif

        return blktime;
}

/*
 * Initialize the mixer corresponding to the mode.
 * Set AUMODE_PLAY to the 'mode' for playback or AUMODE_RECORD for recording.
 * sc->sc_[pr]mixer (corresponding to the 'mode') must be zero-filled.
 * This function returns 0 on successful.  Otherwise returns errno.
 * Must be called with sc_exlock held and without sc_lock held.
 */
static int
audio_mixer_init(struct audio_softc *sc, int mode,
        const audio_format2_t *hwfmt, const audio_filter_reg_t *reg)
{
        char codecbuf[64];
        char blkdmsbuf[8];
        audio_trackmixer_t *mixer;
        void (*softint_handler)(void *);
        int len;
        int blksize;
        int capacity;
        size_t bufsize;
        int hwblks;
        int blkms;
        int blkdms;
        int error;

        KASSERT(hwfmt != NULL);
        KASSERT(reg != NULL);
        KASSERT(sc->sc_exlock);

        error = 0;
        if (mode == AUMODE_PLAY)
                mixer = sc->sc_pmixer;
        else
                mixer = sc->sc_rmixer;

        mixer->sc = sc;
        mixer->mode = mode;

        mixer->hwbuf.fmt = *hwfmt;
        mixer->volume = 256;
        mixer->blktime_d = 1000;
        mixer->blktime_n = audio_mixer_calc_blktime(sc, mixer);
        sc->sc_blk_ms = mixer->blktime_n;
        hwblks = NBLKHW;

        mixer->frames_per_block = frame_per_block(mixer, &mixer->hwbuf.fmt);
        blksize = frametobyte(&mixer->hwbuf.fmt, mixer->frames_per_block);
        if (sc->hw_if->round_blocksize) {
                int rounded;
                audio_params_t p = format2_to_params(&mixer->hwbuf.fmt);
                mutex_enter(sc->sc_lock);
                rounded = sc->hw_if->round_blocksize(sc->hw_hdl, blksize,
                    mode, &p);
                mutex_exit(sc->sc_lock);
                TRACE(1, "round_blocksize %d -> %d", blksize, rounded);
                if (rounded != blksize) {
                        if ((rounded * NBBY) % (mixer->hwbuf.fmt.stride *
                            mixer->hwbuf.fmt.channels) != 0) {
                                audio_printf(sc,
                                    "round_blocksize returned blocksize "
                                    "indivisible by framesize: "
                                    "blksize=%d rounded=%d "
                                    "stride=%ubit channels=%u\n",
                                    blksize, rounded,
                                    mixer->hwbuf.fmt.stride,
                                    mixer->hwbuf.fmt.channels);
                                return EINVAL;
                        }
                        /* Recalculation */
                        blksize = rounded;
                        mixer->frames_per_block = blksize * NBBY /
                            (mixer->hwbuf.fmt.stride *
                             mixer->hwbuf.fmt.channels);
                }
        }
        mixer->blktime_n = mixer->frames_per_block;
        mixer->blktime_d = mixer->hwbuf.fmt.sample_rate;

        capacity = mixer->frames_per_block * hwblks;
        bufsize = frametobyte(&mixer->hwbuf.fmt, capacity);
        if (sc->hw_if->round_buffersize) {
                size_t rounded;
                mutex_enter(sc->sc_lock);
                rounded = sc->hw_if->round_buffersize(sc->hw_hdl, mode,
                    bufsize);
                mutex_exit(sc->sc_lock);
                TRACE(1, "round_buffersize %zd -> %zd", bufsize, rounded);
                if (rounded < bufsize) {
                        /* buffersize needs NBLKHW blocks at least. */
                        audio_printf(sc,
                            "round_buffersize returned too small buffersize: "
                            "buffersize=%zd blksize=%d\n",
                            rounded, blksize);
                        return EINVAL;
                }
                if (rounded % blksize != 0) {
                        /* buffersize/blksize constraint mismatch? */
                        audio_printf(sc,
                            "round_buffersize returned buffersize indivisible "
                            "by blksize: buffersize=%zu blksize=%d\n",
                            rounded, blksize);
                        return EINVAL;
                }
                if (rounded != bufsize) {
                        /* Recalculation */
                        bufsize = rounded;
                        hwblks = bufsize / blksize;
                        capacity = mixer->frames_per_block * hwblks;
                }
        }
        TRACE(1, "buffersize for %s = %zu",
            (mode == AUMODE_PLAY) ? "playback" : "recording",
            bufsize);
        mixer->hwbuf.capacity = capacity;

        if (sc->hw_if->allocm) {
                /* sc_lock is not necessary for allocm */
                mixer->hwbuf.mem = sc->hw_if->allocm(sc->hw_hdl, mode, bufsize);
                if (mixer->hwbuf.mem == NULL) {
                        audio_printf(sc, "allocm(%zu) failed\n", bufsize);
                        return ENOMEM;
                }
        } else {
                mixer->hwbuf.mem = kmem_alloc(bufsize, KM_SLEEP);
        }

        /* From here, audio_mixer_destroy is necessary to exit. */
        if (mode == AUMODE_PLAY) {
                cv_init(&mixer->outcv, "audiowr");
        } else {
                cv_init(&mixer->outcv, "audiord");
        }

        if (mode == AUMODE_PLAY) {
                softint_handler = audio_softintr_wr;
        } else {
                softint_handler = audio_softintr_rd;
        }
        mixer->sih = softint_establish(SOFTINT_SERIAL | SOFTINT_MPSAFE,
            softint_handler, sc);
        if (mixer->sih == NULL) {
                device_printf(sc->sc_dev, "softint_establish failed\n");
                goto abort;
        }

        mixer->track_fmt.encoding = AUDIO_ENCODING_SLINEAR_NE;
        mixer->track_fmt.precision = AUDIO_INTERNAL_BITS;
        mixer->track_fmt.stride = AUDIO_INTERNAL_BITS;
        mixer->track_fmt.channels = mixer->hwbuf.fmt.channels;
        mixer->track_fmt.sample_rate = mixer->hwbuf.fmt.sample_rate;

        if (mixer->hwbuf.fmt.encoding == AUDIO_ENCODING_SLINEAR_OE &&
            mixer->hwbuf.fmt.precision == AUDIO_INTERNAL_BITS) {
                mixer->swap_endian = true;
                TRACE(1, "swap_endian");
        }

        if (mode == AUMODE_PLAY) {
                /* Mixing buffer */
                mixer->mixfmt = mixer->track_fmt;
                mixer->mixfmt.precision *= 2;
                mixer->mixfmt.stride *= 2;
                /* XXX TODO: use some macros? */
                len = mixer->frames_per_block * mixer->mixfmt.channels *
                    mixer->mixfmt.stride / NBBY;
                mixer->mixsample = audio_realloc(mixer->mixsample, len);
        } else {
                /* No mixing buffer for recording */
        }

        if (reg->codec) {
                mixer->codec = reg->codec;
                mixer->codecarg.context = reg->context;
                if (mode == AUMODE_PLAY) {
                        mixer->codecarg.srcfmt = &mixer->track_fmt;
                        mixer->codecarg.dstfmt = &mixer->hwbuf.fmt;
                } else {
                        mixer->codecarg.srcfmt = &mixer->hwbuf.fmt;
                        mixer->codecarg.dstfmt = &mixer->track_fmt;
                }
                mixer->codecbuf.fmt = mixer->track_fmt;
                mixer->codecbuf.capacity = mixer->frames_per_block;
                len = auring_bytelen(&mixer->codecbuf);
                mixer->codecbuf.mem = audio_realloc(mixer->codecbuf.mem, len);
        }

        /* Succeeded so display it. */
        codecbuf[0] = '\0';
        if (mixer->codec || mixer->swap_endian) {
                snprintf(codecbuf, sizeof(codecbuf), " %s %s:%d",
                    (mode == AUMODE_PLAY) ? "->" : "<-",
                    audio_encoding_name(mixer->hwbuf.fmt.encoding),
                    mixer->hwbuf.fmt.precision);
        }
        blkms = mixer->blktime_n * 1000 / mixer->blktime_d;
        blkdms = (mixer->blktime_n * 10000 / mixer->blktime_d) % 10;
        blkdmsbuf[0] = '\0';
        if (blkdms != 0) {
                snprintf(blkdmsbuf, sizeof(blkdmsbuf), ".%1d", blkdms);
        }
        aprint_normal_dev(sc->sc_dev,
            "%s:%d%s %dch %dHz, blk %d bytes (%d%sms) for %s\n",
            audio_encoding_name(mixer->track_fmt.encoding),
            mixer->track_fmt.precision,
            codecbuf,
            mixer->track_fmt.channels,
            mixer->track_fmt.sample_rate,
            blksize,
            blkms, blkdmsbuf,
            (mode == AUMODE_PLAY) ? "playback" : "recording");

        return 0;

abort:
        audio_mixer_destroy(sc, mixer);
        return error;
}

/*
 * Releases all resources of 'mixer'.
 * Note that it does not release the memory area of 'mixer' itself.
 * Must be called with sc_exlock held and without sc_lock held.
 */
static void
audio_mixer_destroy(struct audio_softc *sc, audio_trackmixer_t *mixer)
{
        int bufsize;

        KASSERT(sc->sc_exlock == 1);

        bufsize = frametobyte(&mixer->hwbuf.fmt, mixer->hwbuf.capacity);

        if (mixer->hwbuf.mem != NULL) {
                if (sc->hw_if->freem) {
                        /* sc_lock is not necessary for freem */
                        sc->hw_if->freem(sc->hw_hdl, mixer->hwbuf.mem, bufsize);
                } else {
                        kmem_free(mixer->hwbuf.mem, bufsize);
                }
                mixer->hwbuf.mem = NULL;
        }

        audio_free(mixer->codecbuf.mem);
        audio_free(mixer->mixsample);

        cv_destroy(&mixer->outcv);

        if (mixer->sih) {
                softint_disestablish(mixer->sih);
                mixer->sih = NULL;
        }
}

/*
 * Starts playback mixer.
 * Must be called only if sc_pbusy is false.
 * Must be called with sc_lock && sc_exlock held.
 * Must not be called from the interrupt context.
 */
static void
audio_pmixer_start(struct audio_softc *sc, bool force)
{
        audio_trackmixer_t *mixer;
        int minimum;

        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);
        KASSERT(sc->sc_pbusy == false);

        mutex_enter(sc->sc_intr_lock);

        mixer = sc->sc_pmixer;
        TRACE(2, "%smixseq=%d hwseq=%d hwbuf=%d/%d/%d%s",
            (audiodebug >= 3) ? "begin " : "",
            (int)mixer->mixseq, (int)mixer->hwseq,
            mixer->hwbuf.head, mixer->hwbuf.used, mixer->hwbuf.capacity,
            force ? " force" : "");

        /* Need two blocks to start normally. */
        minimum = (force) ? 1 : 2;
        while (mixer->hwbuf.used < mixer->frames_per_block * minimum) {
                audio_pmixer_process(sc);
        }

        /* Start output */
        audio_pmixer_output(sc);
        sc->sc_pbusy = true;

        TRACE(3, "end   mixseq=%d hwseq=%d hwbuf=%d/%d/%d",
            (int)mixer->mixseq, (int)mixer->hwseq,
            mixer->hwbuf.head, mixer->hwbuf.used, mixer->hwbuf.capacity);

        mutex_exit(sc->sc_intr_lock);
}

/*
 * When playing back with MD filter:
 *
 *           track track ...
 *               v v
 *                +  mix (with aint2_t)
 *                |  master volume (with aint2_t)
 *                v
 *    mixsample [::::]                  wide-int 1 block (ring) buffer
 *                |
 *                |  convert aint2_t -> aint_t
 *                v
 *    codecbuf  [....]                  1 block (ring) buffer
 *                |
 *                |  convert to hw format
 *                v
 *    hwbuf     [............]          NBLKHW blocks ring buffer
 *
 * When playing back without MD filter:
 *
 *    mixsample [::::]                  wide-int 1 block (ring) buffer
 *                |
 *                |  convert aint2_t -> aint_t
 *                |  (with byte swap if necessary)
 *                v
 *    hwbuf     [............]          NBLKHW blocks ring buffer
 *
 * mixsample: slinear_NE, wide internal precision, HW ch, HW freq.
 * codecbuf:  slinear_NE, internal precision,      HW ch, HW freq.
 * hwbuf:     HW encoding, HW precision,           HW ch, HW freq.
 */

/*
 * Performs track mixing and converts it to hwbuf.
 * Note that this function doesn't transfer hwbuf to hardware.
 * Must be called with sc_intr_lock held.
 */
static void
audio_pmixer_process(struct audio_softc *sc)
{
        audio_trackmixer_t *mixer;
        audio_file_t *f;
        int frame_count;
        int sample_count;
        int mixed;
        int i;
        aint2_t *m;
        aint_t *h;

        mixer = sc->sc_pmixer;

        frame_count = mixer->frames_per_block;
        KASSERTMSG(auring_get_contig_free(&mixer->hwbuf) >= frame_count,
            "auring_get_contig_free()=%d frame_count=%d",
            auring_get_contig_free(&mixer->hwbuf), frame_count);
        sample_count = frame_count * mixer->mixfmt.channels;

        mixer->mixseq++;

        /* Mix all tracks */
        mixed = 0;
        SLIST_FOREACH(f, &sc->sc_files, entry) {
                audio_track_t *track = f->ptrack;

                if (track == NULL)
                        continue;

                if (track->is_pause) {
                        TRACET(4, track, "skip; paused");
                        continue;
                }

                /* Skip if the track is used by process context. */
                if (audio_track_lock_tryenter(track) == false) {
                        TRACET(4, track, "skip; in use");
                        continue;
                }

                /* Emulate mmap'ped track */
                if (track->mmapped) {
                        auring_push(&track->usrbuf, track->usrbuf_blksize);
                        TRACET(4, track, "mmap; usr=%d/%d/C%d",
                            track->usrbuf.head,
                            track->usrbuf.used,
                            track->usrbuf.capacity);
                }

                if (track->outbuf.used < mixer->frames_per_block &&
                    track->usrbuf.used > 0) {
                        TRACET(4, track, "process");
                        audio_track_play(track);
                }

                if (track->outbuf.used > 0) {
                        mixed = audio_pmixer_mix_track(mixer, track, mixed);
                } else {
                        TRACET(4, track, "skip; empty");
                }

                audio_track_lock_exit(track);
        }

        if (mixed == 0) {
                /* Silence */
                memset(mixer->mixsample, 0,
                    frametobyte(&mixer->mixfmt, frame_count));
        } else {
                if (mixed > 1) {
                        /* If there are multiple tracks, do auto gain control */
                        audio_pmixer_agc(mixer, sample_count);
                }

                /* Apply master volume */
                if (mixer->volume < 256) {
                        m = mixer->mixsample;
                        for (i = 0; i < sample_count; i++) {
                                *m = AUDIO_SCALEDOWN(*m * mixer->volume, 8);
                                m++;
                        }

                        /*
                         * Recover the volume gradually at the pace of
                         * several times per second.  If it's too fast, you
                         * can recognize that the volume changes up and down
                         * quickly and it's not so comfortable.
                         */
                        mixer->voltimer += mixer->blktime_n;
                        if (mixer->voltimer * 4 >= mixer->blktime_d) {
                                mixer->volume++;
                                mixer->voltimer = 0;
#if defined(AUDIO_DEBUG_AGC)
                                TRACE(1, "volume recover: %d", mixer->volume);
#endif
                        }
                }
        }

        /*
         * The rest is the hardware part.
         */

        if (mixer->codec) {
                h = auring_tailptr_aint(&mixer->codecbuf);
        } else {
                h = auring_tailptr_aint(&mixer->hwbuf);
        }

        m = mixer->mixsample;
        if (mixer->swap_endian) {
                for (i = 0; i < sample_count; i++) {
                        *h++ = bswap16(*m++);
                }
        } else {
                for (i = 0; i < sample_count; i++) {
                        *h++ = *m++;
                }
        }

        /* Hardware driver's codec */
        if (mixer->codec) {
                auring_push(&mixer->codecbuf, frame_count);
                mixer->codecarg.src = auring_headptr(&mixer->codecbuf);
                mixer->codecarg.dst = auring_tailptr(&mixer->hwbuf);
                mixer->codecarg.count = frame_count;
                mixer->codec(&mixer->codecarg);
                auring_take(&mixer->codecbuf, mixer->codecarg.count);
        }

        auring_push(&mixer->hwbuf, frame_count);

        TRACE(4, "done mixseq=%d hwbuf=%d/%d/%d%s",
            (int)mixer->mixseq,
            mixer->hwbuf.head, mixer->hwbuf.used, mixer->hwbuf.capacity,
            (mixed == 0) ? " silent" : "");
}

/*
 * Do auto gain control.
 * Must be called sc_intr_lock held.
 */
static void
audio_pmixer_agc(audio_trackmixer_t *mixer, int sample_count)
{
        struct audio_softc *sc __unused;
        aint2_t val;
        aint2_t maxval;
        aint2_t minval;
        aint2_t over_plus;
        aint2_t over_minus;
        aint2_t *m;
        int newvol;
        int i;

        sc = mixer->sc;

        /* Overflow detection */
        maxval = AINT_T_MAX;
        minval = AINT_T_MIN;
        m = mixer->mixsample;
        for (i = 0; i < sample_count; i++) {
                val = *m++;
                if (val > maxval)
                        maxval = val;
                else if (val < minval)
                        minval = val;
        }

        /* Absolute value of overflowed amount */
        over_plus = maxval - AINT_T_MAX;
        over_minus = AINT_T_MIN - minval;

        if (over_plus > 0 || over_minus > 0) {
                if (over_plus > over_minus) {
                        newvol = (int)((aint2_t)AINT_T_MAX * 256 / maxval);
                } else {
                        newvol = (int)((aint2_t)AINT_T_MIN * 256 / minval);
                }

                /*
                 * Change the volume only if new one is smaller.
                 * Reset the timer even if the volume isn't changed.
                 */
                if (newvol <= mixer->volume) {
                        mixer->volume = newvol;
                        mixer->voltimer = 0;
#if defined(AUDIO_DEBUG_AGC)
                        TRACE(1, "auto volume adjust: %d", mixer->volume);
#endif
                }
        }
}

/*
 * Mix one track.
 * 'mixed' specifies the number of tracks mixed so far.
 * It returns the number of tracks mixed.  In other words, it returns
 * mixed + 1 if this track is mixed.
 */
static int
audio_pmixer_mix_track(audio_trackmixer_t *mixer, audio_track_t *track,
        int mixed)
{
        int count;
        int sample_count;
        int remain;
        int i;
        const aint_t *s;
        aint2_t *d;

        /* XXX TODO: Is this necessary for now? */
        if (mixer->mixseq < track->seq)
                return mixed;

        count = auring_get_contig_used(&track->outbuf);
        count = uimin(count, mixer->frames_per_block);

        s = auring_headptr_aint(&track->outbuf);
        d = mixer->mixsample;

        /*
         * Apply track volume with double-sized integer and perform
         * additive synthesis.
         *
         * XXX If you limit the track volume to 1.0 or less (<= 256),
         *     it would be better to do this in the track conversion stage
         *     rather than here.  However, if you accept the volume to
         *     be greater than 1.0 (> 256), it's better to do it here.
         *     Because the operation here is done by double-sized integer.
         */
        sample_count = count * mixer->mixfmt.channels;
        if (mixed == 0) {
                /* If this is the first track, assignment can be used. */
#if defined(AUDIO_SUPPORT_TRACK_VOLUME)
                if (track->volume != 256) {
                        for (i = 0; i < sample_count; i++) {
                                aint2_t v;
                                v = *s++;
                                *d++ = AUDIO_SCALEDOWN(v * track->volume, 8)
                        }
                } else
#endif
                {
                        for (i = 0; i < sample_count; i++) {
                                *d++ = ((aint2_t)*s++);
                        }
                }
                /* Fill silence if the first track is not filled. */
                for (; i < mixer->frames_per_block * mixer->mixfmt.channels; i++)
                        *d++ = 0;
        } else {
                /* If this is the second or later, add it. */
#if defined(AUDIO_SUPPORT_TRACK_VOLUME)
                if (track->volume != 256) {
                        for (i = 0; i < sample_count; i++) {
                                aint2_t v;
                                v = *s++;
                                *d++ += AUDIO_SCALEDOWN(v * track->volume, 8);
                        }
                } else
#endif
                {
                        for (i = 0; i < sample_count; i++) {
                                *d++ += ((aint2_t)*s++);
                        }
                }
        }

        auring_take(&track->outbuf, count);
        /*
         * The counters have to align block even if outbuf is less than
         * one block. XXX Is this still necessary?
         */
        remain = mixer->frames_per_block - count;
        if (__predict_false(remain != 0)) {
                auring_push(&track->outbuf, remain);
                auring_take(&track->outbuf, remain);
        }

        /*
         * Update track sequence.
         * mixseq has previous value yet at this point.
         */
        track->seq = mixer->mixseq + 1;

        return mixed + 1;
}

/*
 * Output one block from hwbuf to HW.
 * Must be called with sc_intr_lock held.
 */
static void
audio_pmixer_output(struct audio_softc *sc)
{
        audio_trackmixer_t *mixer;
        audio_params_t params;
        void *start;
        void *end;
        int blksize;
        int error;

        mixer = sc->sc_pmixer;
        TRACE(4, "pbusy=%d hwbuf=%d/%d/%d",
            sc->sc_pbusy,
            mixer->hwbuf.head, mixer->hwbuf.used, mixer->hwbuf.capacity);
        KASSERTMSG(mixer->hwbuf.used >= mixer->frames_per_block,
            "mixer->hwbuf.used=%d mixer->frames_per_block=%d",
            mixer->hwbuf.used, mixer->frames_per_block);

        blksize = frametobyte(&mixer->hwbuf.fmt, mixer->frames_per_block);

        if (sc->hw_if->trigger_output) {
                /* trigger (at once) */
                if (!sc->sc_pbusy) {
                        start = mixer->hwbuf.mem;
                        end = (uint8_t *)start + auring_bytelen(&mixer->hwbuf);
                        params = format2_to_params(&mixer->hwbuf.fmt);

                        error = sc->hw_if->trigger_output(sc->hw_hdl,
                            start, end, blksize, audio_pintr, sc, &params);
                        if (error) {
                                audio_printf(sc,
                                    "trigger_output failed: errno=%d\n",
                                    error);
                                return;
                        }
                }
        } else {
                /* start (everytime) */
                start = auring_headptr(&mixer->hwbuf);

                error = sc->hw_if->start_output(sc->hw_hdl,
                    start, blksize, audio_pintr, sc);
                if (error) {
                        audio_printf(sc,
                            "start_output failed: errno=%d\n", error);
                        return;
                }
        }
}

/*
 * This is an interrupt handler for playback.
 * It is called with sc_intr_lock held.
 *
 * It is usually called from hardware interrupt.  However, note that
 * for some drivers (e.g. uaudio) it is called from software interrupt.
 */
static void
audio_pintr(void *arg)
{
        struct audio_softc *sc;
        audio_trackmixer_t *mixer;

        sc = arg;
        KASSERT(mutex_owned(sc->sc_intr_lock));

        if (sc->sc_dying)
                return;
        if (sc->sc_pbusy == false) {
#if defined(DIAGNOSTIC)
                audio_printf(sc, "DIAGNOSTIC: %s raised stray interrupt\n",
                    device_xname(sc->hw_dev));
#endif
                return;
        }

        mixer = sc->sc_pmixer;
        mixer->hw_complete_counter += mixer->frames_per_block;
        mixer->hwseq++;

        auring_take(&mixer->hwbuf, mixer->frames_per_block);

        TRACE(4,
            "HW_INT ++hwseq=%" PRIu64 " cmplcnt=%" PRIu64 " hwbuf=%d/%d/%d",
            mixer->hwseq, mixer->hw_complete_counter,
            mixer->hwbuf.head, mixer->hwbuf.used, mixer->hwbuf.capacity);

#if defined(AUDIO_HW_SINGLE_BUFFER)
        /*
         * Create a new block here and output it immediately.
         * It makes a latency lower but needs machine power.
         */
        audio_pmixer_process(sc);
        audio_pmixer_output(sc);
#else
        /*
         * It is called when block N output is done.
         * Output immediately block N+1 created by the last interrupt.
         * And then create block N+2 for the next interrupt.
         * This method makes playback robust even on slower machines.
         * Instead the latency is increased by one block.
         */

        /* At first, output ready block. */
        if (mixer->hwbuf.used >= mixer->frames_per_block) {
                audio_pmixer_output(sc);
        }

        bool later = false;

        if (mixer->hwbuf.used < mixer->frames_per_block) {
                later = true;
        }

        /* Then, process next block. */
        audio_pmixer_process(sc);

        if (later) {
                audio_pmixer_output(sc);
        }
#endif

        /*
         * When this interrupt is the real hardware interrupt, disabling
         * preemption here is not necessary.  But some drivers (e.g. uaudio)
         * emulate it by software interrupt, so kpreempt_disable is necessary.
         */
        kpreempt_disable();
        softint_schedule(mixer->sih);
        kpreempt_enable();
}

/*
 * Starts record mixer.
 * Must be called only if sc_rbusy is false.
 * Must be called with sc_lock && sc_exlock held.
 * Must not be called from the interrupt context.
 */
static void
audio_rmixer_start(struct audio_softc *sc)
{

        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);
        KASSERT(sc->sc_rbusy == false);

        mutex_enter(sc->sc_intr_lock);

        TRACE(2, "%s", (audiodebug >= 3) ? "begin" : "");
        audio_rmixer_input(sc);
        sc->sc_rbusy = true;
        TRACE(3, "end");

        mutex_exit(sc->sc_intr_lock);
}

/*
 * When recording with MD filter:
 *
 *    hwbuf     [............]          NBLKHW blocks ring buffer
 *                |
 *                | convert from hw format
 *                v
 *    codecbuf  [....]                  1 block (ring) buffer
 *               |  |
 *               v  v
 *            track track ...
 *
 * When recording without MD filter:
 *
 *    hwbuf     [............]          NBLKHW blocks ring buffer
 *               |  |
 *               v  v
 *            track track ...
 *
 * hwbuf:     HW encoding, HW precision, HW ch, HW freq.
 * codecbuf:  slinear_NE, internal precision, HW ch, HW freq.
 */

/*
 * Distribute a recorded block to all recording tracks.
 */
static void
audio_rmixer_process(struct audio_softc *sc)
{
        audio_trackmixer_t *mixer;
        audio_ring_t *mixersrc;
        audio_file_t *f;
        aint_t *p;
        int count;
        int bytes;
        int i;

        mixer = sc->sc_rmixer;

        /*
         * count is the number of frames to be retrieved this time.
         * count should be one block.
         */
        count = auring_get_contig_used(&mixer->hwbuf);
        count = uimin(count, mixer->frames_per_block);
        if (count <= 0) {
                TRACE(4, "count %d: too short", count);
                return;
        }
        bytes = frametobyte(&mixer->track_fmt, count);

        /* Hardware driver's codec */
        if (mixer->codec) {
                mixer->codecarg.src = auring_headptr(&mixer->hwbuf);
                mixer->codecarg.dst = auring_tailptr(&mixer->codecbuf);
                mixer->codecarg.count = count;
                mixer->codec(&mixer->codecarg);
                auring_take(&mixer->hwbuf, mixer->codecarg.count);
                auring_push(&mixer->codecbuf, mixer->codecarg.count);
                mixersrc = &mixer->codecbuf;
        } else {
                mixersrc = &mixer->hwbuf;
        }

        if (mixer->swap_endian) {
                /* inplace conversion */
                p = auring_headptr_aint(mixersrc);
                for (i = 0; i < count * mixer->track_fmt.channels; i++, p++) {
                        *p = bswap16(*p);
                }
        }

        /* Distribute to all tracks. */
        SLIST_FOREACH(f, &sc->sc_files, entry) {
                audio_track_t *track = f->rtrack;
                audio_ring_t *input;

                if (track == NULL)
                        continue;

                if (track->is_pause) {
                        TRACET(4, track, "skip; paused");
                        continue;
                }

                if (audio_track_lock_tryenter(track) == false) {
                        TRACET(4, track, "skip; in use");
                        continue;
                }

                /*
                 * If the track buffer has less than one block of free space,
                 * make one block free.
                 */
                input = track->input;
                if (input->capacity - input->used < mixer->frames_per_block) {
                        int drops = mixer->frames_per_block -
                            (input->capacity - input->used);
                        track->dropframes += drops;
                        TRACET(4, track, "drop %d frames: inp=%d/%d/%d",
                            drops,
                            input->head, input->used, input->capacity);
                        auring_take(input, drops);
                }

                KASSERTMSG(auring_tail(input) % mixer->frames_per_block == 0,
                    "inputtail=%d mixer->frames_per_block=%d",
                    auring_tail(input), mixer->frames_per_block);
                memcpy(auring_tailptr_aint(input),
                    auring_headptr_aint(mixersrc),
                    bytes);
                auring_push(input, count);

                track->stamp++;

                audio_track_lock_exit(track);
        }

        auring_take(mixersrc, count);
}

/*
 * Input one block from HW to hwbuf.
 * Must be called with sc_intr_lock held.
 */
static void
audio_rmixer_input(struct audio_softc *sc)
{
        audio_trackmixer_t *mixer;
        audio_params_t params;
        void *start;
        void *end;
        int blksize;
        int error;

        mixer = sc->sc_rmixer;
        blksize = frametobyte(&mixer->hwbuf.fmt, mixer->frames_per_block);

        if (sc->hw_if->trigger_input) {
                /* trigger (at once) */
                if (!sc->sc_rbusy) {
                        start = mixer->hwbuf.mem;
                        end = (uint8_t *)start + auring_bytelen(&mixer->hwbuf);
                        params = format2_to_params(&mixer->hwbuf.fmt);

                        error = sc->hw_if->trigger_input(sc->hw_hdl,
                            start, end, blksize, audio_rintr, sc, &params);
                        if (error) {
                                audio_printf(sc,
                                    "trigger_input failed: errno=%d\n",
                                    error);
                                return;
                        }
                }
        } else {
                /* start (everytime) */
                start = auring_tailptr(&mixer->hwbuf);

                error = sc->hw_if->start_input(sc->hw_hdl,
                    start, blksize, audio_rintr, sc);
                if (error) {
                        audio_printf(sc,
                            "start_input failed: errno=%d\n", error);
                        return;
                }
        }
}

/*
 * This is an interrupt handler for recording.
 * It is called with sc_intr_lock.
 *
 * It is usually called from hardware interrupt.  However, note that
 * for some drivers (e.g. uaudio) it is called from software interrupt.
 */
static void
audio_rintr(void *arg)
{
        struct audio_softc *sc;
        audio_trackmixer_t *mixer;

        sc = arg;
        KASSERT(mutex_owned(sc->sc_intr_lock));

        if (sc->sc_dying)
                return;
        if (sc->sc_rbusy == false) {
#if defined(DIAGNOSTIC)
                audio_printf(sc, "DIAGNOSTIC: %s raised stray interrupt\n",
                    device_xname(sc->hw_dev));
#endif
                return;
        }

        mixer = sc->sc_rmixer;
        mixer->hw_complete_counter += mixer->frames_per_block;
        mixer->hwseq++;

        auring_push(&mixer->hwbuf, mixer->frames_per_block);

        TRACE(4,
            "HW_INT ++hwseq=%" PRIu64 " cmplcnt=%" PRIu64 " hwbuf=%d/%d/%d",
            mixer->hwseq, mixer->hw_complete_counter,
            mixer->hwbuf.head, mixer->hwbuf.used, mixer->hwbuf.capacity);

        /* Distrubute recorded block */
        audio_rmixer_process(sc);

        /* Request next block */
        audio_rmixer_input(sc);

        /*
         * When this interrupt is the real hardware interrupt, disabling
         * preemption here is not necessary.  But some drivers (e.g. uaudio)
         * emulate it by software interrupt, so kpreempt_disable is necessary.
         */
        kpreempt_disable();
        softint_schedule(mixer->sih);
        kpreempt_enable();
}

/*
 * Halts playback mixer.
 * This function also clears related parameters, so call this function
 * instead of calling halt_output directly.
 * Must be called only if sc_pbusy is true.
 * Must be called with sc_lock && sc_exlock held.
 */
static int
audio_pmixer_halt(struct audio_softc *sc)
{
        int error;

        TRACE(2, "called");
        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);

        mutex_enter(sc->sc_intr_lock);
        error = sc->hw_if->halt_output(sc->hw_hdl);

        /* Halts anyway even if some error has occurred. */
        sc->sc_pbusy = false;
        sc->sc_pmixer->hwbuf.head = 0;
        sc->sc_pmixer->hwbuf.used = 0;
        sc->sc_pmixer->mixseq = 0;
        sc->sc_pmixer->hwseq = 0;
        mutex_exit(sc->sc_intr_lock);

        return error;
}

/*
 * Halts recording mixer.
 * This function also clears related parameters, so call this function
 * instead of calling halt_input directly.
 * Must be called only if sc_rbusy is true.
 * Must be called with sc_lock && sc_exlock held.
 */
static int
audio_rmixer_halt(struct audio_softc *sc)
{
        int error;

        TRACE(2, "called");
        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);

        mutex_enter(sc->sc_intr_lock);
        error = sc->hw_if->halt_input(sc->hw_hdl);

        /* Halts anyway even if some error has occurred. */
        sc->sc_rbusy = false;
        sc->sc_rmixer->hwbuf.head = 0;
        sc->sc_rmixer->hwbuf.used = 0;
        sc->sc_rmixer->mixseq = 0;
        sc->sc_rmixer->hwseq = 0;
        mutex_exit(sc->sc_intr_lock);

        return error;
}

/*
 * Flush this track.
 * Halts all operations, clears all buffers, reset error counters.
 * XXX I'm not sure...
 */
static void
audio_track_clear(struct audio_softc *sc, audio_track_t *track)
{

        KASSERT(track);
        TRACET(3, track, "clear");

        audio_track_lock_enter(track);

        /* Clear all internal parameters. */
        track->usrbuf.used = 0;
        track->usrbuf.head = 0;
        if (track->codec.filter) {
                track->codec.srcbuf.used = 0;
                track->codec.srcbuf.head = 0;
        }
        if (track->chvol.filter) {
                track->chvol.srcbuf.used = 0;
                track->chvol.srcbuf.head = 0;
        }
        if (track->chmix.filter) {
                track->chmix.srcbuf.used = 0;
                track->chmix.srcbuf.head = 0;
        }
        if (track->freq.filter) {
                track->freq.srcbuf.used = 0;
                track->freq.srcbuf.head = 0;
                if (track->freq_step < 65536)
                        track->freq_current = 65536;
                else
                        track->freq_current = 0;
                memset(track->freq_prev, 0, sizeof(track->freq_prev));
                memset(track->freq_curr, 0, sizeof(track->freq_curr));
        }
        /* Clear buffer, then operation halts naturally. */
        track->outbuf.used = 0;

        /* Clear counters. */
        track->stamp = 0;
        track->last_stamp = 0;
        track->dropframes = 0;

        audio_track_lock_exit(track);
}

/*
 * Drain the track.
 * track must be present and for playback.
 * If successful, it returns 0.  Otherwise returns errno.
 * Must be called with sc_lock held.
 */
static int
audio_track_drain(struct audio_softc *sc, audio_track_t *track)
{
        audio_trackmixer_t *mixer;
        int done;
        int error;

        KASSERT(track);
        TRACET(3, track, "start");
        mixer = track->mixer;
        KASSERT(mutex_owned(sc->sc_lock));

        /* Ignore them if pause. */
        if (track->is_pause) {
                TRACET(3, track, "pause -> clear");
                track->pstate = AUDIO_STATE_CLEAR;
        }
        /* Terminate early here if there is no data in the track. */
        if (track->pstate == AUDIO_STATE_CLEAR) {
                TRACET(3, track, "no need to drain");
                return 0;
        }
        track->pstate = AUDIO_STATE_DRAINING;

        for (;;) {
                /* I want to display it before condition evaluation. */
                TRACET(3, track, "pid=%d.%d trkseq=%d hwseq=%d out=%d/%d/%d",
                    (int)curproc->p_pid, (int)curlwp->l_lid,
                    (int)track->seq, (int)mixer->hwseq,
                    track->outbuf.head, track->outbuf.used,
                    track->outbuf.capacity);

                /* Condition to terminate */
                audio_track_lock_enter(track);
                done = (track->usrbuf.used < frametobyte(&track->inputfmt, 1) &&
                    track->outbuf.used == 0 &&
                    track->seq <= mixer->hwseq);
                audio_track_lock_exit(track);
                if (done)
                        break;

                TRACET(3, track, "sleep");
                error = audio_track_waitio(sc, track);
                if (error)
                        return error;

                /* XXX call audio_track_play here ? */
        }

        track->pstate = AUDIO_STATE_CLEAR;
        TRACET(3, track, "done");
        return 0;
}

/*
 * Send signal to process.
 * This is intended to be called only from audio_softintr_{rd,wr}.
 * Must be called without sc_intr_lock held.
 */
static inline void
audio_psignal(struct audio_softc *sc, pid_t pid, int signum)
{
        proc_t *p;

        KASSERT(pid != 0);

        /*
         * psignal() must be called without spin lock held.
         */

        mutex_enter(&proc_lock);
        p = proc_find(pid);
        if (p)
                psignal(p, signum);
        mutex_exit(&proc_lock);
}

/*
 * This is software interrupt handler for record.
 * It is called from recording hardware interrupt everytime.
 * It does:
 * - Deliver SIGIO for all async processes.
 * - Notify to audio_read() that data has arrived.
 * - selnotify() for select/poll-ing processes.
 */
/*
 * XXX If a process issues FIOASYNC between hardware interrupt and
 *     software interrupt, (stray) SIGIO will be sent to the process
 *     despite the fact that it has not receive recorded data yet.
 */
static void
audio_softintr_rd(void *cookie)
{
        struct audio_softc *sc = cookie;
        audio_file_t *f;
        pid_t pid;

        mutex_enter(sc->sc_lock);

        SLIST_FOREACH(f, &sc->sc_files, entry) {
                audio_track_t *track = f->rtrack;

                if (track == NULL)
                        continue;

                TRACET(4, track, "broadcast; inp=%d/%d/%d",
                    track->input->head,
                    track->input->used,
                    track->input->capacity);

                pid = f->async_audio;
                if (pid != 0) {
                        TRACEF(4, f, "sending SIGIO %d", pid);
                        audio_psignal(sc, pid, SIGIO);
                }
        }

        /* Notify that data has arrived. */
        selnotify(&sc->sc_rsel, 0, NOTE_SUBMIT);
        cv_broadcast(&sc->sc_rmixer->outcv);

        mutex_exit(sc->sc_lock);
}

/*
 * This is software interrupt handler for playback.
 * It is called from playback hardware interrupt everytime.
 * It does:
 * - Deliver SIGIO for all async and writable (used < lowat) processes.
 * - Notify to audio_write() that outbuf block available.
 * - selnotify() for select/poll-ing processes if there are any writable
 *   (used < lowat) processes.  Checking each descriptor will be done by
 *   filt_audiowrite_event().
 */
static void
audio_softintr_wr(void *cookie)
{
        struct audio_softc *sc = cookie;
        audio_file_t *f;
        bool found;
        pid_t pid;

        TRACE(4, "called");
        found = false;

        mutex_enter(sc->sc_lock);

        SLIST_FOREACH(f, &sc->sc_files, entry) {
                audio_track_t *track = f->ptrack;

                if (track == NULL)
                        continue;

                TRACET(4, track, "broadcast; trkseq=%d out=%d/%d/%d",
                    (int)track->seq,
                    track->outbuf.head,
                    track->outbuf.used,
                    track->outbuf.capacity);

                /*
                 * Send a signal if the process is async mode and
                 * used is lower than lowat.
                 */
                if (track->usrbuf.used <= track->usrbuf_usedlow &&
                    !track->is_pause) {
                        /* For selnotify */
                        found = true;
                        /* For SIGIO */
                        pid = f->async_audio;
                        if (pid != 0) {
                                TRACEF(4, f, "sending SIGIO %d", pid);
                                audio_psignal(sc, pid, SIGIO);
                        }
                }
        }

        /*
         * Notify for select/poll when someone become writable.
         * It needs sc_lock (and not sc_intr_lock).
         */
        if (found) {
                TRACE(4, "selnotify");
                selnotify(&sc->sc_wsel, 0, NOTE_SUBMIT);
        }

        /* Notify to audio_write() that outbuf available. */
        cv_broadcast(&sc->sc_pmixer->outcv);

        mutex_exit(sc->sc_lock);
}

/*
 * Check (and convert) the format *p came from userland.
 * If successful, it writes back the converted format to *p if necessary and
 * returns 0.  Otherwise returns errno (*p may be changed even in this case).
 */
static int
audio_check_params(audio_format2_t *p)
{

        /*
         * Convert obsolete AUDIO_ENCODING_PCM encodings.
         *
         * AUDIO_ENCODING_PCM16 == AUDIO_ENCODING_LINEAR
         * So, it's always signed, as in SunOS.
         *
         * AUDIO_ENCODING_PCM8 == AUDIO_ENCODING_LINEAR8
         * So, it's always unsigned, as in SunOS.
         */
        if (p->encoding == AUDIO_ENCODING_PCM16) {
                p->encoding = AUDIO_ENCODING_SLINEAR;
        } else if (p->encoding == AUDIO_ENCODING_PCM8) {
                if (p->precision == 8)
                        p->encoding = AUDIO_ENCODING_ULINEAR;
                else
                        return EINVAL;
        }

        /*
         * Convert obsoleted AUDIO_ENCODING_[SU]LINEAR without endianness
         * suffix.
         */
        if (p->encoding == AUDIO_ENCODING_SLINEAR)
                p->encoding = AUDIO_ENCODING_SLINEAR_NE;
        if (p->encoding == AUDIO_ENCODING_ULINEAR)
                p->encoding = AUDIO_ENCODING_ULINEAR_NE;

        switch (p->encoding) {
        case AUDIO_ENCODING_ULAW:
        case AUDIO_ENCODING_ALAW:
                if (p->precision != 8)
                        return EINVAL;
                break;
        case AUDIO_ENCODING_ADPCM:
                if (p->precision != 4 && p->precision != 8)
                        return EINVAL;
                break;
        case AUDIO_ENCODING_SLINEAR_LE:
        case AUDIO_ENCODING_SLINEAR_BE:
        case AUDIO_ENCODING_ULINEAR_LE:
        case AUDIO_ENCODING_ULINEAR_BE:
                if (p->precision !=  8 && p->precision != 16 &&
                    p->precision != 24 && p->precision != 32)
                        return EINVAL;

                /* 8bit format does not have endianness. */
                if (p->precision == 8) {
                        if (p->encoding == AUDIO_ENCODING_SLINEAR_OE)
                                p->encoding = AUDIO_ENCODING_SLINEAR_NE;
                        if (p->encoding == AUDIO_ENCODING_ULINEAR_OE)
                                p->encoding = AUDIO_ENCODING_ULINEAR_NE;
                }

                if (p->precision > p->stride)
                        return EINVAL;
                break;
        case AUDIO_ENCODING_MPEG_L1_STREAM:
        case AUDIO_ENCODING_MPEG_L1_PACKETS:
        case AUDIO_ENCODING_MPEG_L1_SYSTEM:
        case AUDIO_ENCODING_MPEG_L2_STREAM:
        case AUDIO_ENCODING_MPEG_L2_PACKETS:
        case AUDIO_ENCODING_MPEG_L2_SYSTEM:
        case AUDIO_ENCODING_AC3:
                break;
        default:
                return EINVAL;
        }

        /* sanity check # of channels*/
        if (p->channels < 1 || p->channels > AUDIO_MAX_CHANNELS)
                return EINVAL;

        return 0;
}

/*
 * Initialize playback and record mixers.
 * mode (AUMODE_{PLAY,RECORD}) indicates the mixer to be initialized.
 * phwfmt and rhwfmt indicate the hardware format.  pfil and rfil indicate
 * the filter registration information.  These four must not be NULL.
 * If successful returns 0.  Otherwise returns errno.
 * Must be called with sc_exlock held and without sc_lock held.
 * Must not be called if there are any tracks.
 * Caller should check that the initialization succeed by whether
 * sc_[pr]mixer is not NULL.
 */
static int
audio_mixers_init(struct audio_softc *sc, int mode,
        const audio_format2_t *phwfmt, const audio_format2_t *rhwfmt,
        const audio_filter_reg_t *pfil, const audio_filter_reg_t *rfil)
{
        int error;

        KASSERT(phwfmt != NULL);
        KASSERT(rhwfmt != NULL);
        KASSERT(pfil != NULL);
        KASSERT(rfil != NULL);
        KASSERT(sc->sc_exlock);

        if ((mode & AUMODE_PLAY)) {
                if (sc->sc_pmixer == NULL) {
                        sc->sc_pmixer = kmem_zalloc(sizeof(*sc->sc_pmixer),
                            KM_SLEEP);
                } else {
                        /* destroy() doesn't free memory. */
                        audio_mixer_destroy(sc, sc->sc_pmixer);
                        memset(sc->sc_pmixer, 0, sizeof(*sc->sc_pmixer));
                }
                error = audio_mixer_init(sc, AUMODE_PLAY, phwfmt, pfil);
                if (error) {
                        /* audio_mixer_init already displayed error code */
                        audio_printf(sc, "configuring playback mode failed\n");
                        kmem_free(sc->sc_pmixer, sizeof(*sc->sc_pmixer));
                        sc->sc_pmixer = NULL;
                        return error;
                }
        }
        if ((mode & AUMODE_RECORD)) {
                if (sc->sc_rmixer == NULL) {
                        sc->sc_rmixer = kmem_zalloc(sizeof(*sc->sc_rmixer),
                            KM_SLEEP);
                } else {
                        /* destroy() doesn't free memory. */
                        audio_mixer_destroy(sc, sc->sc_rmixer);
                        memset(sc->sc_rmixer, 0, sizeof(*sc->sc_rmixer));
                }
                error = audio_mixer_init(sc, AUMODE_RECORD, rhwfmt, rfil);
                if (error) {
                        /* audio_mixer_init already displayed error code */
                        audio_printf(sc, "configuring record mode failed\n");
                        kmem_free(sc->sc_rmixer, sizeof(*sc->sc_rmixer));
                        sc->sc_rmixer = NULL;
                        return error;
                }
        }

        return 0;
}

/*
 * Select a frequency.
 * Prioritize 48kHz and 44.1kHz.  Otherwise choose the highest one.
 * XXX Better algorithm?
 */
static int
audio_select_freq(const struct audio_format *fmt)
{
        int freq;
        int high;
        int low;
        int j;

        if (fmt->frequency_type == 0) {
                low = fmt->frequency[0];
                high = fmt->frequency[1];
                freq = 48000;
                if (low <= freq && freq <= high) {
                        return freq;
                }
                freq = 44100;
                if (low <= freq && freq <= high) {
                        return freq;
                }
                return high;
        } else {
                for (j = 0; j < fmt->frequency_type; j++) {
                        if (fmt->frequency[j] == 48000) {
                                return fmt->frequency[j];
                        }
                }
                high = 0;
                for (j = 0; j < fmt->frequency_type; j++) {
                        if (fmt->frequency[j] == 44100) {
                                return fmt->frequency[j];
                        }
                        if (fmt->frequency[j] > high) {
                                high = fmt->frequency[j];
                        }
                }
                return high;
        }
}

/*
 * Choose the most preferred hardware format.
 * If successful, it will store the chosen format into *cand and return 0.
 * Otherwise, return errno.
 * Must be called without sc_lock held.
 */
static int
audio_hw_probe(struct audio_softc *sc, audio_format2_t *cand, int mode)
{
        audio_format_query_t query;
        int cand_score;
        int score;
        int i;
        int error;

        /*
         * Score each formats and choose the highest one.
         *
         *                 +---- priority(0-3)
         *                 |+--- encoding/precision
         *                 ||+-- channels
         * score = 0x000000PEC
         */

        cand_score = 0;
        for (i = 0; ; i++) {
                memset(&query, 0, sizeof(query));
                query.index = i;

                mutex_enter(sc->sc_lock);
                error = sc->hw_if->query_format(sc->hw_hdl, &query);
                mutex_exit(sc->sc_lock);
                if (error == EINVAL)
                        break;
                if (error)
                        return error;

#if defined(AUDIO_DEBUG)
                DPRINTF(1, "fmt[%d] %c%c pri=%d %s,%d/%dbit,%dch,", i,
                    (query.fmt.mode & AUMODE_PLAY)   ? 'P' : '-',
                    (query.fmt.mode & AUMODE_RECORD) ? 'R' : '-',
                    query.fmt.priority,
                    audio_encoding_name(query.fmt.encoding),
                    query.fmt.validbits,
                    query.fmt.precision,
                    query.fmt.channels);
                if (query.fmt.frequency_type == 0) {
                        DPRINTF(1, "{%d-%d",
                            query.fmt.frequency[0], query.fmt.frequency[1]);
                } else {
                        int j;
                        for (j = 0; j < query.fmt.frequency_type; j++) {
                                DPRINTF(1, "%c%d",
                                    (j == 0) ? '{' : ',',
                                    query.fmt.frequency[j]);
                        }
                }
                DPRINTF(1, "}\n");
#endif

                if ((query.fmt.mode & mode) == 0) {
                        DPRINTF(1, "fmt[%d] skip; mode not match %d\n", i,
                            mode);
                        continue;
                }

                if (query.fmt.priority < 0) {
                        DPRINTF(1, "fmt[%d] skip; unsupported encoding\n", i);
                        continue;
                }

                /* Score */
                score = (query.fmt.priority & 3) * 0x100;
                if (query.fmt.encoding == AUDIO_ENCODING_SLINEAR_NE &&
                    query.fmt.validbits == AUDIO_INTERNAL_BITS &&
                    query.fmt.precision == AUDIO_INTERNAL_BITS) {
                        score += 0x20;
                } else if (query.fmt.encoding == AUDIO_ENCODING_SLINEAR_OE &&
                    query.fmt.validbits == AUDIO_INTERNAL_BITS &&
                    query.fmt.precision == AUDIO_INTERNAL_BITS) {
                        score += 0x10;
                }

                /* Do not prefer surround formats */
                if (query.fmt.channels <= 2)
                        score += query.fmt.channels;

                if (score < cand_score) {
                        DPRINTF(1, "fmt[%d] skip; score 0x%x < 0x%x\n", i,
                            score, cand_score);
                        continue;
                }

                /* Update candidate */
                cand_score = score;
                cand->encoding    = query.fmt.encoding;
                cand->precision   = query.fmt.validbits;
                cand->stride      = query.fmt.precision;
                cand->channels    = query.fmt.channels;
                cand->sample_rate = audio_select_freq(&query.fmt);
                DPRINTF(1, "fmt[%d] candidate (score=0x%x)"
                    " pri=%d %s,%d/%d,%dch,%dHz\n", i,
                    cand_score, query.fmt.priority,
                    audio_encoding_name(query.fmt.encoding),
                    cand->precision, cand->stride,
                    cand->channels, cand->sample_rate);
        }

        if (cand_score == 0) {
                DPRINTF(1, "%s no fmt\n", __func__);
                return ENXIO;
        }
        DPRINTF(1, "%s selected: %s,%d/%d,%dch,%dHz\n", __func__,
            audio_encoding_name(cand->encoding),
            cand->precision, cand->stride, cand->channels, cand->sample_rate);
        return 0;
}

/*
 * Validate fmt with query_format.
 * If fmt is included in the result of query_format, returns 0.
 * Otherwise returns EINVAL.
 * Must be called without sc_lock held.
 */
static int
audio_hw_validate_format(struct audio_softc *sc, int mode,
        const audio_format2_t *fmt)
{
        audio_format_query_t query;
        struct audio_format *q;
        int index;
        int error;
        int j;

        for (index = 0; ; index++) {
                query.index = index;
                mutex_enter(sc->sc_lock);
                error = sc->hw_if->query_format(sc->hw_hdl, &query);
                mutex_exit(sc->sc_lock);
                if (error == EINVAL)
                        break;
                if (error)
                        return error;

                q = &query.fmt;
                /*
                 * Note that fmt is audio_format2_t (precision/stride) but
                 * q is audio_format_t (validbits/precision).
                 */
                if ((q->mode & mode) == 0) {
                        continue;
                }
                if (fmt->encoding != q->encoding) {
                        continue;
                }
                if (fmt->precision != q->validbits) {
                        continue;
                }
                if (fmt->stride != q->precision) {
                        continue;
                }
                if (fmt->channels != q->channels) {
                        continue;
                }
                if (q->frequency_type == 0) {
                        if (fmt->sample_rate < q->frequency[0] ||
                            fmt->sample_rate > q->frequency[1]) {
                                continue;
                        }
                } else {
                        for (j = 0; j < q->frequency_type; j++) {
                                if (fmt->sample_rate == q->frequency[j])
                                        break;
                        }
                        if (j == query.fmt.frequency_type) {
                                continue;
                        }
                }

                /* Matched. */
                return 0;
        }

        return EINVAL;
}

/*
 * Set track mixer's format depending on ai->mode.
 * If AUMODE_PLAY is set in ai->mode, it set up the playback mixer
 * with ai.play.*.
 * If AUMODE_RECORD is set in ai->mode, it set up the recording mixer
 * with ai.record.*.
 * All other fields in ai are ignored.
 * If successful returns 0.  Otherwise returns errno.
 * This function does not roll back even if it fails.
 * Must be called with sc_exlock held and without sc_lock held.
 */
static int
audio_mixers_set_format(struct audio_softc *sc, const struct audio_info *ai)
{
        audio_format2_t phwfmt;
        audio_format2_t rhwfmt;
        audio_filter_reg_t pfil;
        audio_filter_reg_t rfil;
        int mode;
        int error;

        KASSERT(sc->sc_exlock);

        /*
         * Even when setting either one of playback and recording,
         * both must be halted.
         */
        if (sc->sc_popens + sc->sc_ropens > 0)
                return EBUSY;

        if (!SPECIFIED(ai->mode) || ai->mode == 0)
                return ENOTTY;

        mode = ai->mode;
        if ((mode & AUMODE_PLAY)) {
                phwfmt.encoding    = ai->play.encoding;
                phwfmt.precision   = ai->play.precision;
                phwfmt.stride      = ai->play.precision;
                phwfmt.channels    = ai->play.channels;
                phwfmt.sample_rate = ai->play.sample_rate;
        }
        if ((mode & AUMODE_RECORD)) {
                rhwfmt.encoding    = ai->record.encoding;
                rhwfmt.precision   = ai->record.precision;
                rhwfmt.stride      = ai->record.precision;
                rhwfmt.channels    = ai->record.channels;
                rhwfmt.sample_rate = ai->record.sample_rate;
        }

        /* On non-independent devices, use the same format for both. */
        if ((sc->sc_props & AUDIO_PROP_INDEPENDENT) == 0) {
                if (mode == AUMODE_RECORD) {
                        phwfmt = rhwfmt;
                } else {
                        rhwfmt = phwfmt;
                }
                mode = AUMODE_PLAY | AUMODE_RECORD;
        }

        /* Then, unset the direction not exist on the hardware. */
        if ((sc->sc_props & AUDIO_PROP_PLAYBACK) == 0)
                mode &= ~AUMODE_PLAY;
        if ((sc->sc_props & AUDIO_PROP_CAPTURE) == 0)
                mode &= ~AUMODE_RECORD;

        /* debug */
        if ((mode & AUMODE_PLAY)) {
                TRACE(1, "play=%s/%d/%d/%dch/%dHz",
                    audio_encoding_name(phwfmt.encoding),
                    phwfmt.precision,
                    phwfmt.stride,
                    phwfmt.channels,
                    phwfmt.sample_rate);
        }
        if ((mode & AUMODE_RECORD)) {
                TRACE(1, "rec =%s/%d/%d/%dch/%dHz",
                    audio_encoding_name(rhwfmt.encoding),
                    rhwfmt.precision,
                    rhwfmt.stride,
                    rhwfmt.channels,
                    rhwfmt.sample_rate);
        }

        /* Check the format */
        if ((mode & AUMODE_PLAY)) {
                if (audio_hw_validate_format(sc, AUMODE_PLAY, &phwfmt)) {
                        TRACE(1, "invalid format");
                        return EINVAL;
                }
        }
        if ((mode & AUMODE_RECORD)) {
                if (audio_hw_validate_format(sc, AUMODE_RECORD, &rhwfmt)) {
                        TRACE(1, "invalid format");
                        return EINVAL;
                }
        }

        /* Configure the mixers. */
        memset(&pfil, 0, sizeof(pfil));
        memset(&rfil, 0, sizeof(rfil));
        error = audio_hw_set_format(sc, mode, &phwfmt, &rhwfmt, &pfil, &rfil);
        if (error)
                return error;

        error = audio_mixers_init(sc, mode, &phwfmt, &rhwfmt, &pfil, &rfil);
        if (error)
                return error;

        /*
         * Reinitialize the sticky parameters for /dev/sound.
         * If the number of the hardware channels becomes less than the number
         * of channels that sticky parameters remember, subsequent /dev/sound
         * open will fail.  To prevent this, reinitialize the sticky
         * parameters whenever the hardware format is changed.
         */
        sc->sc_sound_pparams = params_to_format2(&audio_default);
        sc->sc_sound_rparams = params_to_format2(&audio_default);
        sc->sc_sound_ppause = false;
        sc->sc_sound_rpause = false;

        return 0;
}

/*
 * Store current mixers format into *ai.
 * Must be called with sc_exlock held.
 */
static void
audio_mixers_get_format(struct audio_softc *sc, struct audio_info *ai)
{

        KASSERT(sc->sc_exlock);

        /*
         * There is no stride information in audio_info but it doesn't matter.
         * trackmixer always treats stride and precision as the same.
         */
        AUDIO_INITINFO(ai);
        ai->mode = 0;
        if (sc->sc_pmixer) {
                audio_format2_t *fmt = &sc->sc_pmixer->track_fmt;
                ai->play.encoding    = fmt->encoding;
                ai->play.precision   = fmt->precision;
                ai->play.channels    = fmt->channels;
                ai->play.sample_rate = fmt->sample_rate;
                ai->mode |= AUMODE_PLAY;
        }
        if (sc->sc_rmixer) {
                audio_format2_t *fmt = &sc->sc_rmixer->track_fmt;
                ai->record.encoding    = fmt->encoding;
                ai->record.precision   = fmt->precision;
                ai->record.channels    = fmt->channels;
                ai->record.sample_rate = fmt->sample_rate;
                ai->mode |= AUMODE_RECORD;
        }
}

/*
 * audio_info details:
 *
 * ai.{play,record}.sample_rate                (R/W)
 * ai.{play,record}.encoding                (R/W)
 * ai.{play,record}.precision                (R/W)
 * ai.{play,record}.channels                (R/W)
 *        These specify the playback or recording format.
 *        Ignore members within an inactive track.
 *
 * ai.mode                                (R/W)
 *        It specifies the playback or recording mode, AUMODE_*.
 *        Currently, a mode change operation by ai.mode after opening is
 *        prohibited.  In addition, AUMODE_PLAY_ALL no longer makes sense.
 *        However, it's possible to get or to set for backward compatibility.
 *
 * ai.{hiwat,lowat}                        (R/W)
 *        These specify the high water mark and low water mark for playback
 *        track.  The unit is block.
 *
 * ai.{play,record}.gain                (R/W)
 *        It specifies the HW mixer volume in 0-255.
 *        It is historical reason that the gain is connected to HW mixer.
 *
 * ai.{play,record}.balance                (R/W)
 *        It specifies the left-right balance of HW mixer in 0-64.
 *        32 means the center.
 *        It is historical reason that the balance is connected to HW mixer.
 *
 * ai.{play,record}.port                (R/W)
 *        It specifies the input/output port of HW mixer.
 *
 * ai.monitor_gain                        (R/W)
 *        It specifies the recording monitor gain(?) of HW mixer.
 *
 * ai.{play,record}.pause                (R/W)
 *        Non-zero means the track is paused.
 *
 * ai.play.seek                                (R/-)
 *        It indicates the number of bytes written but not processed.
 * ai.record.seek                        (R/-)
 *        It indicates the number of bytes to be able to read.
 *
 * ai.{play,record}.avail_ports                (R/-)
 *        Mixer info.
 *
 * ai.{play,record}.buffer_size                (R/-)
 *        It indicates the buffer size in bytes.  Internally it means usrbuf.
 *
 * ai.{play,record}.samples                (R/-)
 *        It indicates the total number of bytes played or recorded.
 *
 * ai.{play,record}.eof                        (R/-)
 *        It indicates the number of times reached EOF(?).
 *
 * ai.{play,record}.error                (R/-)
 *        Non-zero indicates overflow/underflow has occurred.
 *
 * ai.{play,record}.waiting                (R/-)
 *        Non-zero indicates that other process waits to open.
 *        It will never happen anymore.
 *
 * ai.{play,record}.open                (R/-)
 *        Non-zero indicates the direction is opened by this process(?).
 *        XXX Is this better to indicate that "the device is opened by
 *        at least one process"?
 *
 * ai.{play,record}.active                (R/-)
 *        Non-zero indicates that I/O is currently active.
 *
 * ai.blocksize                                (R/-)
 *        It indicates the block size in bytes.
 *        XXX The blocksize of playback and recording may be different.
 */

/*
 * Pause consideration:
 *
 * Pausing/unpausing never affect [pr]mixer.  This single rule makes
 * operation simple.  Note that playback and recording are asymmetric.
 *
 * For playback,
 *  1. Any playback open doesn't start pmixer regardless of initial pause
 *     state of this track.
 *  2. The first write access among playback tracks only starts pmixer
 *     regardless of this track's pause state.
 *  3. Even a pause of the last playback track doesn't stop pmixer.
 *  4. The last close of all playback tracks only stops pmixer.
 *
 * For recording,
 *  1. The first recording open only starts rmixer regardless of initial
 *     pause state of this track.
 *  2. Even a pause of the last track doesn't stop rmixer.
 *  3. The last close of all recording tracks only stops rmixer.
 */

/*
 * Set both track's parameters within a file depending on ai.
 * Update sc_sound_[pr]* if set.
 * Must be called with sc_exlock held and without sc_lock held.
 */
static int
audio_file_setinfo(struct audio_softc *sc, audio_file_t *file,
        const struct audio_info *ai)
{
        const struct audio_prinfo *pi;
        const struct audio_prinfo *ri;
        audio_track_t *ptrack;
        audio_track_t *rtrack;
        audio_format2_t pfmt;
        audio_format2_t rfmt;
        int pchanges;
        int rchanges;
        int mode;
        struct audio_info saved_ai;
        audio_format2_t saved_pfmt;
        audio_format2_t saved_rfmt;
        int error;

        KASSERT(sc->sc_exlock);

        pi = &ai->play;
        ri = &ai->record;
        pchanges = 0;
        rchanges = 0;

        ptrack = file->ptrack;
        rtrack = file->rtrack;

#if defined(AUDIO_DEBUG)
        if (audiodebug >= 2) {
                char buf[256];
                char p[64];
                int buflen;
                int plen;
#define SPRINTF(var, fmt...) do {        \
        var##len += snprintf(var + var##len, sizeof(var) - var##len, fmt); \
} while (0)

                buflen = 0;
                plen = 0;
                if (SPECIFIED(pi->encoding))
                        SPRINTF(p, "/%s", audio_encoding_name(pi->encoding));
                if (SPECIFIED(pi->precision))
                        SPRINTF(p, "/%dbit", pi->precision);
                if (SPECIFIED(pi->channels))
                        SPRINTF(p, "/%dch", pi->channels);
                if (SPECIFIED(pi->sample_rate))
                        SPRINTF(p, "/%dHz", pi->sample_rate);
                if (plen > 0)
                        SPRINTF(buf, ",play.param=%s", p + 1);

                plen = 0;
                if (SPECIFIED(ri->encoding))
                        SPRINTF(p, "/%s", audio_encoding_name(ri->encoding));
                if (SPECIFIED(ri->precision))
                        SPRINTF(p, "/%dbit", ri->precision);
                if (SPECIFIED(ri->channels))
                        SPRINTF(p, "/%dch", ri->channels);
                if (SPECIFIED(ri->sample_rate))
                        SPRINTF(p, "/%dHz", ri->sample_rate);
                if (plen > 0)
                        SPRINTF(buf, ",record.param=%s", p + 1);

                if (SPECIFIED(ai->mode))
                        SPRINTF(buf, ",mode=%d", ai->mode);
                if (SPECIFIED(ai->hiwat))
                        SPRINTF(buf, ",hiwat=%d", ai->hiwat);
                if (SPECIFIED(ai->lowat))
                        SPRINTF(buf, ",lowat=%d", ai->lowat);
                if (SPECIFIED(ai->play.gain))
                        SPRINTF(buf, ",play.gain=%d", ai->play.gain);
                if (SPECIFIED(ai->record.gain))
                        SPRINTF(buf, ",record.gain=%d", ai->record.gain);
                if (SPECIFIED_CH(ai->play.balance))
                        SPRINTF(buf, ",play.balance=%d", ai->play.balance);
                if (SPECIFIED_CH(ai->record.balance))
                        SPRINTF(buf, ",record.balance=%d", ai->record.balance);
                if (SPECIFIED(ai->play.port))
                        SPRINTF(buf, ",play.port=%d", ai->play.port);
                if (SPECIFIED(ai->record.port))
                        SPRINTF(buf, ",record.port=%d", ai->record.port);
                if (SPECIFIED(ai->monitor_gain))
                        SPRINTF(buf, ",monitor_gain=%d", ai->monitor_gain);
                if (SPECIFIED_CH(ai->play.pause))
                        SPRINTF(buf, ",play.pause=%d", ai->play.pause);
                if (SPECIFIED_CH(ai->record.pause))
                        SPRINTF(buf, ",record.pause=%d", ai->record.pause);

                if (buflen > 0)
                        TRACE(2, "specified %s", buf + 1);
        }
#endif

        AUDIO_INITINFO(&saved_ai);
        /* XXX shut up gcc */
        memset(&saved_pfmt, 0, sizeof(saved_pfmt));
        memset(&saved_rfmt, 0, sizeof(saved_rfmt));

        /*
         * Set default value and save current parameters.
         * For backward compatibility, use sticky parameters for nonexistent
         * track.
         */
        if (ptrack) {
                pfmt = ptrack->usrbuf.fmt;
                saved_pfmt = ptrack->usrbuf.fmt;
                saved_ai.play.pause = ptrack->is_pause;
        } else {
                pfmt = sc->sc_sound_pparams;
        }
        if (rtrack) {
                rfmt = rtrack->usrbuf.fmt;
                saved_rfmt = rtrack->usrbuf.fmt;
                saved_ai.record.pause = rtrack->is_pause;
        } else {
                rfmt = sc->sc_sound_rparams;
        }
        saved_ai.mode = file->mode;

        /*
         * Overwrite if specified.
         */
        mode = file->mode;
        if (SPECIFIED(ai->mode)) {
                /*
                 * Setting ai->mode no longer does anything because it's
                 * prohibited to change playback/recording mode after open
                 * and AUMODE_PLAY_ALL is obsoleted.  However, it still
                 * keeps the state of AUMODE_PLAY_ALL itself for backward
                 * compatibility.
                 * In the internal, only file->mode has the state of
                 * AUMODE_PLAY_ALL flag and track->mode in both track does
                 * not have.
                 */
                if ((file->mode & AUMODE_PLAY)) {
                        mode = (file->mode & (AUMODE_PLAY | AUMODE_RECORD))
                            | (ai->mode & AUMODE_PLAY_ALL);
                }
        }

        pchanges = audio_track_setinfo_check(ptrack, &pfmt, pi);
        if (pchanges == -1) {
#if defined(AUDIO_DEBUG)
                TRACEF(1, file, "check play.params failed: "
                    "%s %ubit %uch %uHz",
                    audio_encoding_name(pi->encoding),
                    pi->precision,
                    pi->channels,
                    pi->sample_rate);
#endif
                return EINVAL;
        }

        rchanges = audio_track_setinfo_check(rtrack, &rfmt, ri);
        if (rchanges == -1) {
#if defined(AUDIO_DEBUG)
                TRACEF(1, file, "check record.params failed: "
                    "%s %ubit %uch %uHz",
                    audio_encoding_name(ri->encoding),
                    ri->precision,
                    ri->channels,
                    ri->sample_rate);
#endif
                return EINVAL;
        }

        if (SPECIFIED(ai->mode)) {
                pchanges = 1;
                rchanges = 1;
        }

        /*
         * Even when setting either one of playback and recording,
         * both track must be halted.
         */
        if (pchanges || rchanges) {
                audio_file_clear(sc, file);
#if defined(AUDIO_DEBUG)
                char nbuf[16];
                char fmtbuf[64];
                if (pchanges) {
                        if (ptrack) {
                                snprintf(nbuf, sizeof(nbuf), "%d", ptrack->id);
                        } else {
                                snprintf(nbuf, sizeof(nbuf), "-");
                        }
                        audio_format2_tostr(fmtbuf, sizeof(fmtbuf), &pfmt);
                        DPRINTF(1, "audio track#%s play mode: %s\n",
                            nbuf, fmtbuf);
                }
                if (rchanges) {
                        if (rtrack) {
                                snprintf(nbuf, sizeof(nbuf), "%d", rtrack->id);
                        } else {
                                snprintf(nbuf, sizeof(nbuf), "-");
                        }
                        audio_format2_tostr(fmtbuf, sizeof(fmtbuf), &rfmt);
                        DPRINTF(1, "audio track#%s rec  mode: %s\n",
                            nbuf, fmtbuf);
                }
#endif
        }

        /* Set mixer parameters */
        mutex_enter(sc->sc_lock);
        error = audio_hw_setinfo(sc, ai, &saved_ai);
        mutex_exit(sc->sc_lock);
        if (error)
                goto abort1;

        /*
         * Set to track and update sticky parameters.
         */
        error = 0;
        file->mode = mode;

        if (SPECIFIED_CH(pi->pause)) {
                if (ptrack)
                        ptrack->is_pause = pi->pause;
                sc->sc_sound_ppause = pi->pause;
        }
        if (pchanges) {
                if (ptrack) {
                        audio_track_lock_enter(ptrack);
                        error = audio_track_set_format(ptrack, &pfmt);
                        audio_track_lock_exit(ptrack);
                        if (error) {
                                TRACET(1, ptrack, "set play.params failed");
                                goto abort2;
                        }
                }
                sc->sc_sound_pparams = pfmt;
        }
        /* Change water marks after initializing the buffers. */
        if (SPECIFIED(ai->hiwat) || SPECIFIED(ai->lowat)) {
                if (ptrack)
                        audio_track_setinfo_water(ptrack, ai);
        }

        if (SPECIFIED_CH(ri->pause)) {
                if (rtrack)
                        rtrack->is_pause = ri->pause;
                sc->sc_sound_rpause = ri->pause;
        }
        if (rchanges) {
                if (rtrack) {
                        audio_track_lock_enter(rtrack);
                        error = audio_track_set_format(rtrack, &rfmt);
                        audio_track_lock_exit(rtrack);
                        if (error) {
                                TRACET(1, rtrack, "set record.params failed");
                                goto abort3;
                        }
                }
                sc->sc_sound_rparams = rfmt;
        }

        return 0;

        /* Rollback */
abort3:
        if (error != ENOMEM) {
                rtrack->is_pause = saved_ai.record.pause;
                audio_track_lock_enter(rtrack);
                audio_track_set_format(rtrack, &saved_rfmt);
                audio_track_lock_exit(rtrack);
        }
        sc->sc_sound_rpause = saved_ai.record.pause;
        sc->sc_sound_rparams = saved_rfmt;
abort2:
        if (ptrack && error != ENOMEM) {
                ptrack->is_pause = saved_ai.play.pause;
                audio_track_lock_enter(ptrack);
                audio_track_set_format(ptrack, &saved_pfmt);
                audio_track_lock_exit(ptrack);
        }
        sc->sc_sound_ppause = saved_ai.play.pause;
        sc->sc_sound_pparams = saved_pfmt;
        file->mode = saved_ai.mode;
abort1:
        mutex_enter(sc->sc_lock);
        audio_hw_setinfo(sc, &saved_ai, NULL);
        mutex_exit(sc->sc_lock);

        return error;
}

/*
 * Write SPECIFIED() parameters within info back to fmt.
 * Note that track can be NULL here.
 * Return value of 1 indicates that fmt is modified.
 * Return value of 0 indicates that fmt is not modified.
 * Return value of -1 indicates that error EINVAL has occurred.
 */
static int
audio_track_setinfo_check(audio_track_t *track,
        audio_format2_t *fmt, const struct audio_prinfo *info)
{
        const audio_format2_t *hwfmt;
        int changes;

        changes = 0;
        if (SPECIFIED(info->sample_rate)) {
                if (info->sample_rate < AUDIO_MIN_FREQUENCY)
                        return -1;
                if (info->sample_rate > AUDIO_MAX_FREQUENCY)
                        return -1;
                fmt->sample_rate = info->sample_rate;
                changes = 1;
        }
        if (SPECIFIED(info->encoding)) {
                fmt->encoding = info->encoding;
                changes = 1;
        }
        if (SPECIFIED(info->precision)) {
                fmt->precision = info->precision;
                /* we don't have API to specify stride */
                fmt->stride = info->precision;
                changes = 1;
        }
        if (SPECIFIED(info->channels)) {
                /*
                 * We can convert between monaural and stereo each other.
                 * We can reduce than the number of channels that the hardware
                 * supports.
                 */
                if (info->channels > 2) {
                        if (track) {
                                hwfmt = &track->mixer->hwbuf.fmt;
                                if (info->channels > hwfmt->channels)
                                        return -1;
                        } else {
                                /*
                                 * This should never happen.
                                 * If track == NULL, channels should be <= 2.
                                 */
                                return -1;
                        }
                }
                fmt->channels = info->channels;
                changes = 1;
        }

        if (changes) {
                if (audio_check_params(fmt) != 0)
                        return -1;
        }

        return changes;
}

/*
 * Change water marks for playback track if specified.
 */
static void
audio_track_setinfo_water(audio_track_t *track, const struct audio_info *ai)
{
        u_int blks;
        u_int maxblks;
        u_int blksize;

        KASSERT(audio_track_is_playback(track));

        blksize = track->usrbuf_blksize;
        maxblks = track->usrbuf.capacity / blksize;

        if (SPECIFIED(ai->hiwat)) {
                blks = ai->hiwat;
                if (blks > maxblks)
                        blks = maxblks;
                if (blks < 2)
                        blks = 2;
                track->usrbuf_usedhigh = blks * blksize;
        }
        if (SPECIFIED(ai->lowat)) {
                blks = ai->lowat;
                if (blks > maxblks - 1)
                        blks = maxblks - 1;
                track->usrbuf_usedlow = blks * blksize;
        }
        if (SPECIFIED(ai->hiwat) || SPECIFIED(ai->lowat)) {
                if (track->usrbuf_usedlow > track->usrbuf_usedhigh - blksize) {
                        track->usrbuf_usedlow = track->usrbuf_usedhigh -
                            blksize;
                }
        }
}

/*
 * Set hardware part of *newai.
 * The parameters handled here are *.port, *.gain, *.balance and monitor_gain.
 * If oldai is specified, previous parameters are stored.
 * This function itself does not roll back if error occurred.
 * Must be called with sc_lock && sc_exlock held.
 */
static int
audio_hw_setinfo(struct audio_softc *sc, const struct audio_info *newai,
        struct audio_info *oldai)
{
        const struct audio_prinfo *newpi;
        const struct audio_prinfo *newri;
        struct audio_prinfo *oldpi;
        struct audio_prinfo *oldri;
        u_int pgain;
        u_int rgain;
        u_char pbalance;
        u_char rbalance;
        int error;

        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);

        /* XXX shut up gcc */
        oldpi = NULL;
        oldri = NULL;

        newpi = &newai->play;
        newri = &newai->record;
        if (oldai) {
                oldpi = &oldai->play;
                oldri = &oldai->record;
        }
        error = 0;

        /*
         * It looks like unnecessary to halt HW mixers to set HW mixers.
         * mixer_ioctl(MIXER_WRITE) also doesn't halt.
         */

        if (SPECIFIED(newpi->port)) {
                if (oldai)
                        oldpi->port = au_get_port(sc, &sc->sc_outports);
                error = au_set_port(sc, &sc->sc_outports, newpi->port);
                if (error) {
                        audio_printf(sc,
                            "setting play.port=%d failed: errno=%d\n",
                            newpi->port, error);
                        goto abort;
                }
        }
        if (SPECIFIED(newri->port)) {
                if (oldai)
                        oldri->port = au_get_port(sc, &sc->sc_inports);
                error = au_set_port(sc, &sc->sc_inports, newri->port);
                if (error) {
                        audio_printf(sc,
                            "setting record.port=%d failed: errno=%d\n",
                            newri->port, error);
                        goto abort;
                }
        }

        /* play.{gain,balance} */
        if (SPECIFIED(newpi->gain) || SPECIFIED_CH(newpi->balance)) {
                au_get_gain(sc, &sc->sc_outports, &pgain, &pbalance);
                if (oldai) {
                        oldpi->gain = pgain;
                        oldpi->balance = pbalance;
                }

                if (SPECIFIED(newpi->gain))
                        pgain = newpi->gain;
                if (SPECIFIED_CH(newpi->balance))
                        pbalance = newpi->balance;
                error = au_set_gain(sc, &sc->sc_outports, pgain, pbalance);
                if (error) {
                        audio_printf(sc,
                            "setting play.gain=%d/balance=%d failed: "
                            "errno=%d\n",
                            pgain, pbalance, error);
                        goto abort;
                }
        }

        /* record.{gain,balance} */
        if (SPECIFIED(newri->gain) || SPECIFIED_CH(newri->balance)) {
                au_get_gain(sc, &sc->sc_inports, &rgain, &rbalance);
                if (oldai) {
                        oldri->gain = rgain;
                        oldri->balance = rbalance;
                }

                if (SPECIFIED(newri->gain))
                        rgain = newri->gain;
                if (SPECIFIED_CH(newri->balance))
                        rbalance = newri->balance;
                error = au_set_gain(sc, &sc->sc_inports, rgain, rbalance);
                if (error) {
                        audio_printf(sc,
                            "setting record.gain=%d/balance=%d failed: "
                            "errno=%d\n",
                            rgain, rbalance, error);
                        goto abort;
                }
        }

        if (SPECIFIED(newai->monitor_gain) && sc->sc_monitor_port != -1) {
                if (oldai)
                        oldai->monitor_gain = au_get_monitor_gain(sc);
                error = au_set_monitor_gain(sc, newai->monitor_gain);
                if (error) {
                        audio_printf(sc,
                            "setting monitor_gain=%d failed: errno=%d\n",
                            newai->monitor_gain, error);
                        goto abort;
                }
        }

        /* XXX TODO */
        /* sc->sc_ai = *ai; */

        error = 0;
abort:
        return error;
}

/*
 * Setup the hardware with mixer format phwfmt, rhwfmt.
 * The arguments have following restrictions:
 * - setmode is the direction you want to set, AUMODE_PLAY or AUMODE_RECORD,
 *   or both.
 * - phwfmt and rhwfmt must not be NULL regardless of setmode.
 * - On non-independent devices, phwfmt and rhwfmt must have the same
 *   parameters.
 * - pfil and rfil must be zero-filled.
 * If successful,
 * - pfil, rfil will be filled with filter information specified by the
 *   hardware driver if necessary.
 * and then returns 0.  Otherwise returns errno.
 * Must be called without sc_lock held.
 */
static int
audio_hw_set_format(struct audio_softc *sc, int setmode,
        const audio_format2_t *phwfmt, const audio_format2_t *rhwfmt,
        audio_filter_reg_t *pfil, audio_filter_reg_t *rfil)
{
        audio_params_t pp, rp;
        int error;

        KASSERT(phwfmt != NULL);
        KASSERT(rhwfmt != NULL);

        pp = format2_to_params(phwfmt);
        rp = format2_to_params(rhwfmt);

        mutex_enter(sc->sc_lock);
        error = sc->hw_if->set_format(sc->hw_hdl, setmode,
            &pp, &rp, pfil, rfil);
        if (error) {
                mutex_exit(sc->sc_lock);
                audio_printf(sc, "set_format failed: errno=%d\n", error);
                return error;
        }

        if (sc->hw_if->commit_settings) {
                error = sc->hw_if->commit_settings(sc->hw_hdl);
                if (error) {
                        mutex_exit(sc->sc_lock);
                        audio_printf(sc,
                            "commit_settings failed: errno=%d\n", error);
                        return error;
                }
        }
        mutex_exit(sc->sc_lock);

        return 0;
}

/*
 * Fill audio_info structure.  If need_mixerinfo is true, it will also
 * fill the hardware mixer information.
 * Must be called with sc_exlock held and without sc_lock held.
 */
static int
audiogetinfo(struct audio_softc *sc, struct audio_info *ai, int need_mixerinfo,
        audio_file_t *file)
{
        struct audio_prinfo *ri, *pi;
        audio_track_t *track;
        audio_track_t *ptrack;
        audio_track_t *rtrack;
        int gain;

        KASSERT(sc->sc_exlock);

        ri = &ai->record;
        pi = &ai->play;
        ptrack = file->ptrack;
        rtrack = file->rtrack;

        memset(ai, 0, sizeof(*ai));

        if (ptrack) {
                pi->sample_rate = ptrack->usrbuf.fmt.sample_rate;
                pi->channels    = ptrack->usrbuf.fmt.channels;
                pi->precision   = ptrack->usrbuf.fmt.precision;
                pi->encoding    = ptrack->usrbuf.fmt.encoding;
                pi->pause       = ptrack->is_pause;
        } else {
                /* Use sticky parameters if the track is not available. */
                pi->sample_rate = sc->sc_sound_pparams.sample_rate;
                pi->channels    = sc->sc_sound_pparams.channels;
                pi->precision   = sc->sc_sound_pparams.precision;
                pi->encoding    = sc->sc_sound_pparams.encoding;
                pi->pause       = sc->sc_sound_ppause;
        }
        if (rtrack) {
                ri->sample_rate = rtrack->usrbuf.fmt.sample_rate;
                ri->channels    = rtrack->usrbuf.fmt.channels;
                ri->precision   = rtrack->usrbuf.fmt.precision;
                ri->encoding    = rtrack->usrbuf.fmt.encoding;
                ri->pause       = rtrack->is_pause;
        } else {
                /* Use sticky parameters if the track is not available. */
                ri->sample_rate = sc->sc_sound_rparams.sample_rate;
                ri->channels    = sc->sc_sound_rparams.channels;
                ri->precision   = sc->sc_sound_rparams.precision;
                ri->encoding    = sc->sc_sound_rparams.encoding;
                ri->pause       = sc->sc_sound_rpause;
        }

        if (ptrack) {
                pi->seek = ptrack->usrbuf.used;
                pi->samples = ptrack->stamp * ptrack->usrbuf_blksize;
                pi->eof = ptrack->eofcounter;
                pi->error = (ptrack->dropframes != 0) ? 1 : 0;
                pi->open = 1;
                pi->buffer_size = ptrack->usrbuf.capacity;
        }
        pi->waiting = 0;                /* open never hangs */
        pi->active = sc->sc_pbusy;

        if (rtrack) {
                ri->seek = audio_track_readablebytes(rtrack);
                ri->samples = rtrack->stamp * rtrack->usrbuf_blksize;
                ri->eof = 0;
                ri->error = (rtrack->dropframes != 0) ? 1 : 0;
                ri->open = 1;
                ri->buffer_size = audio_track_inputblk_as_usrbyte(rtrack,
                    rtrack->input->capacity);
        }
        ri->waiting = 0;                /* open never hangs */
        ri->active = sc->sc_rbusy;

        /*
         * XXX There may be different number of channels between playback
         *     and recording, so that blocksize also may be different.
         *     But struct audio_info has an united blocksize...
         *     Here, I use play info precedencely if ptrack is available,
         *     otherwise record info.
         *
         * XXX hiwat/lowat is a playback-only parameter.  What should I
         *     return for a record-only descriptor?
         */
        track = ptrack ? ptrack : rtrack;
        if (track) {
                ai->blocksize = track->usrbuf_blksize;
                ai->hiwat = track->usrbuf_usedhigh / track->usrbuf_blksize;
                ai->lowat = track->usrbuf_usedlow / track->usrbuf_blksize;
        }
        ai->mode = file->mode;

        /*
         * For backward compatibility, we have to pad these five fields
         * a fake non-zero value even if there are no tracks.
         */
        if (ptrack == NULL)
                pi->buffer_size = 65536;
        if (rtrack == NULL)
                ri->buffer_size = 65536;
        if (ptrack == NULL && rtrack == NULL) {
                ai->blocksize = 2048;
                ai->hiwat = ai->play.buffer_size / ai->blocksize;
                ai->lowat = ai->hiwat * 3 / 4;
        }

        if (need_mixerinfo) {
                mutex_enter(sc->sc_lock);

                pi->port = au_get_port(sc, &sc->sc_outports);
                ri->port = au_get_port(sc, &sc->sc_inports);

                pi->avail_ports = sc->sc_outports.allports;
                ri->avail_ports = sc->sc_inports.allports;

                au_get_gain(sc, &sc->sc_outports, &pi->gain, &pi->balance);
                au_get_gain(sc, &sc->sc_inports, &ri->gain, &ri->balance);

                if (sc->sc_monitor_port != -1) {
                        gain = au_get_monitor_gain(sc);
                        if (gain != -1)
                                ai->monitor_gain = gain;
                }
                mutex_exit(sc->sc_lock);
        }

        return 0;
}

/*
 * Return true if playback is configured.
 * This function can be used after audioattach.
 */
static bool
audio_can_playback(struct audio_softc *sc)
{

        return (sc->sc_pmixer != NULL);
}

/*
 * Return true if recording is configured.
 * This function can be used after audioattach.
 */
static bool
audio_can_capture(struct audio_softc *sc)
{

        return (sc->sc_rmixer != NULL);
}

/*
 * Get the afp->index'th item from the valid one of format[].
 * If found, stores it to afp->fmt and returns 0.  Otherwise return EINVAL.
 *
 * This is common routines for query_format.
 * If your hardware driver has struct audio_format[], the simplest case
 * you can write your query_format interface as follows:
 *
 * struct audio_format foo_format[] = { ... };
 *
 * int
 * foo_query_format(void *hdl, audio_format_query_t *afp)
 * {
 *   return audio_query_format(foo_format, __arraycount(foo_format), afp);
 * }
 */
int
audio_query_format(const struct audio_format *format, int nformats,
        audio_format_query_t *afp)
{
        const struct audio_format *f;
        int idx;
        int i;

        idx = 0;
        for (i = 0; i < nformats; i++) {
                f = &format[i];
                if (!AUFMT_IS_VALID(f))
                        continue;
                if (afp->index == idx) {
                        afp->fmt = *f;
                        return 0;
                }
                idx++;
        }
        return EINVAL;
}

/*
 * This function is provided for the hardware driver's set_format() to
 * find index matches with 'param' from array of audio_format_t 'formats'.
 * 'mode' is either of AUMODE_PLAY or AUMODE_RECORD.
 * It returns the matched index and never fails.  Because param passed to
 * set_format() is selected from query_format().
 * This function will be an alternative to auconv_set_converter() to
 * find index.
 */
int
audio_indexof_format(const struct audio_format *formats, int nformats,
        int mode, const audio_params_t *param)
{
        const struct audio_format *f;
        int index;
        int j;

        for (index = 0; index < nformats; index++) {
                f = &formats[index];

                if (!AUFMT_IS_VALID(f))
                        continue;
                if ((f->mode & mode) == 0)
                        continue;
                if (f->encoding != param->encoding)
                        continue;
                if (f->validbits != param->precision)
                        continue;
                if (f->channels != param->channels)
                        continue;

                if (f->frequency_type == 0) {
                        if (param->sample_rate < f->frequency[0] ||
                            param->sample_rate > f->frequency[1])
                                continue;
                } else {
                        for (j = 0; j < f->frequency_type; j++) {
                                if (param->sample_rate == f->frequency[j])
                                        break;
                        }
                        if (j == f->frequency_type)
                                continue;
                }

                /* Then, matched */
                return index;
        }

        /* Not matched.  This should not be happened. */
        panic("%s: cannot find matched format\n", __func__);
}

/*
 * Get or set hardware blocksize in msec.
 * XXX It's for debug.
 */
static int
audio_sysctl_blk_ms(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        struct audio_softc *sc;
        audio_format2_t phwfmt;
        audio_format2_t rhwfmt;
        audio_filter_reg_t pfil;
        audio_filter_reg_t rfil;
        int t;
        int old_blk_ms;
        int mode;
        int error;

        node = *rnode;
        sc = node.sysctl_data;

        error = audio_exlock_enter(sc);
        if (error)
                return error;

        old_blk_ms = sc->sc_blk_ms;
        t = old_blk_ms;
        node.sysctl_data = &t;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                goto abort;

        if (t < 0) {
                error = EINVAL;
                goto abort;
        }

        if (sc->sc_popens + sc->sc_ropens > 0) {
                error = EBUSY;
                goto abort;
        }
        sc->sc_blk_ms = t;
        mode = 0;
        if (sc->sc_pmixer) {
                mode |= AUMODE_PLAY;
                phwfmt = sc->sc_pmixer->hwbuf.fmt;
        }
        if (sc->sc_rmixer) {
                mode |= AUMODE_RECORD;
                rhwfmt = sc->sc_rmixer->hwbuf.fmt;
        }

        /* re-init hardware */
        memset(&pfil, 0, sizeof(pfil));
        memset(&rfil, 0, sizeof(rfil));
        error = audio_hw_set_format(sc, mode, &phwfmt, &rhwfmt, &pfil, &rfil);
        if (error) {
                goto abort;
        }

        /* re-init track mixer */
        error = audio_mixers_init(sc, mode, &phwfmt, &rhwfmt, &pfil, &rfil);
        if (error) {
                /* Rollback */
                sc->sc_blk_ms = old_blk_ms;
                audio_mixers_init(sc, mode, &phwfmt, &rhwfmt, &pfil, &rfil);
                goto abort;
        }
        error = 0;
abort:
        audio_exlock_exit(sc);
        return error;
}

/*
 * Get or set multiuser mode.
 */
static int
audio_sysctl_multiuser(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        struct audio_softc *sc;
        bool t;
        int error;

        node = *rnode;
        sc = node.sysctl_data;

        error = audio_exlock_enter(sc);
        if (error)
                return error;

        t = sc->sc_multiuser;
        node.sysctl_data = &t;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                goto abort;

        sc->sc_multiuser = t;
        error = 0;
abort:
        audio_exlock_exit(sc);
        return error;
}

#if defined(AUDIO_DEBUG)
/*
 * Get or set debug verbose level. (0..4)
 * XXX It's for debug.
 * XXX It is not separated per device.
 */
static int
audio_sysctl_debug(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int t;
        int error;

        node = *rnode;
        t = audiodebug;
        node.sysctl_data = &t;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        if (t < 0 || t > 4)
                return EINVAL;
        audiodebug = t;
        printf("audio: audiodebug = %d\n", audiodebug);
        return 0;
}
#endif /* AUDIO_DEBUG */

#ifdef AUDIO_PM_IDLE
static void
audio_idle(void *arg)
{
        device_t dv = arg;
        struct audio_softc *sc = device_private(dv);

#ifdef PNP_DEBUG
        extern int pnp_debug_idle;
        if (pnp_debug_idle)
                printf("%s: idle handler called\n", device_xname(dv));
#endif

        sc->sc_idle = true;

        /* XXX joerg Make pmf_device_suspend handle children? */
        if (!pmf_device_suspend(dv, PMF_Q_SELF))
                return;

        if (!pmf_device_suspend(sc->hw_dev, PMF_Q_SELF))
                pmf_device_resume(dv, PMF_Q_SELF);
}

static void
audio_activity(device_t dv, devactive_t type)
{
        struct audio_softc *sc = device_private(dv);

        if (type != DVA_SYSTEM)
                return;

        callout_schedule(&sc->sc_idle_counter, audio_idle_timeout * hz);

        sc->sc_idle = false;
        if (!device_is_active(dv)) {
                /* XXX joerg How to deal with a failing resume... */
                pmf_device_resume(sc->hw_dev, PMF_Q_SELF);
                pmf_device_resume(dv, PMF_Q_SELF);
        }
}
#endif

static bool
audio_suspend(device_t dv, const pmf_qual_t *qual)
{
        struct audio_softc *sc = device_private(dv);
        int error;

        error = audio_exlock_mutex_enter(sc);
        if (error)
                return error;
        sc->sc_suspending = true;
        audio_mixer_capture(sc);

        if (sc->sc_pbusy) {
                audio_pmixer_halt(sc);
                /* Reuse this as need-to-restart flag while suspending */
                sc->sc_pbusy = true;
        }
        if (sc->sc_rbusy) {
                audio_rmixer_halt(sc);
                /* Reuse this as need-to-restart flag while suspending */
                sc->sc_rbusy = true;
        }

#ifdef AUDIO_PM_IDLE
        callout_halt(&sc->sc_idle_counter, sc->sc_lock);
#endif
        audio_exlock_mutex_exit(sc);

        return true;
}

static bool
audio_resume(device_t dv, const pmf_qual_t *qual)
{
        struct audio_softc *sc = device_private(dv);
        struct audio_info ai;
        int error;

        error = audio_exlock_mutex_enter(sc);
        if (error)
                return error;

        sc->sc_suspending = false;
        audio_mixer_restore(sc);
        /* XXX ? */
        AUDIO_INITINFO(&ai);
        audio_hw_setinfo(sc, &ai, NULL);

        /*
         * During from suspend to resume here, sc_[pr]busy is used as
         * need-to-restart flag temporarily.  After this point,
         * sc_[pr]busy is returned to its original usage (busy flag).
         * And note that sc_[pr]busy must be false to call [pr]mixer_start().
         */
        if (sc->sc_pbusy) {
                /* pmixer_start() requires pbusy is false */
                sc->sc_pbusy = false;
                audio_pmixer_start(sc, true);
        }
        if (sc->sc_rbusy) {
                /* rmixer_start() requires rbusy is false */
                sc->sc_rbusy = false;
                audio_rmixer_start(sc);
        }

        audio_exlock_mutex_exit(sc);

        return true;
}

#if defined(AUDIO_DEBUG)
static void
audio_format2_tostr(char *buf, size_t bufsize, const audio_format2_t *fmt)
{
        int n;

        n = 0;
        n += snprintf(buf + n, bufsize - n, "%s",
            audio_encoding_name(fmt->encoding));
        if (fmt->precision == fmt->stride) {
                n += snprintf(buf + n, bufsize - n, " %dbit", fmt->precision);
        } else {
                n += snprintf(buf + n, bufsize - n, " %d/%dbit",
                        fmt->precision, fmt->stride);
        }

        snprintf(buf + n, bufsize - n, " %uch %uHz",
            fmt->channels, fmt->sample_rate);
}
#endif

#if defined(AUDIO_DEBUG)
static void
audio_print_format2(const char *s, const audio_format2_t *fmt)
{
        char fmtstr[64];

        audio_format2_tostr(fmtstr, sizeof(fmtstr), fmt);
        printf("%s %s\n", s, fmtstr);
}
#endif

#ifdef DIAGNOSTIC
void
audio_diagnostic_format2(const char *where, const audio_format2_t *fmt)
{

        KASSERTMSG(fmt, "called from %s", where);

        /* XXX MSM6258 vs(4) only has 4bit stride format. */
        if (fmt->encoding == AUDIO_ENCODING_ADPCM) {
                KASSERTMSG(fmt->stride == 4 || fmt->stride == 8,
                    "called from %s: fmt->stride=%d", where, fmt->stride);
        } else {
                KASSERTMSG(fmt->stride % NBBY == 0,
                    "called from %s: fmt->stride=%d", where, fmt->stride);
        }
        KASSERTMSG(fmt->precision <= fmt->stride,
            "called from %s: fmt->precision=%d fmt->stride=%d",
            where, fmt->precision, fmt->stride);
        KASSERTMSG(1 <= fmt->channels && fmt->channels <= AUDIO_MAX_CHANNELS,
            "called from %s: fmt->channels=%d", where, fmt->channels);

        /* XXX No check for encodings? */
}

void
audio_diagnostic_filter_arg(const char *where, const audio_filter_arg_t *arg)
{

        KASSERT(arg != NULL);
        KASSERT(arg->src != NULL);
        KASSERT(arg->dst != NULL);
        audio_diagnostic_format2(where, arg->srcfmt);
        audio_diagnostic_format2(where, arg->dstfmt);
        KASSERT(arg->count > 0);
}

void
audio_diagnostic_ring(const char *where, const audio_ring_t *ring)
{

        KASSERTMSG(ring, "called from %s", where);
        audio_diagnostic_format2(where, &ring->fmt);
        KASSERTMSG(0 <= ring->capacity && ring->capacity < INT_MAX / 2,
            "called from %s: ring->capacity=%d", where, ring->capacity);
        KASSERTMSG(0 <= ring->used && ring->used <= ring->capacity,
            "called from %s: ring->used=%d ring->capacity=%d",
            where, ring->used, ring->capacity);
        if (ring->capacity == 0) {
                KASSERTMSG(ring->mem == NULL,
                    "called from %s: capacity == 0 but mem != NULL", where);
        } else {
                KASSERTMSG(ring->mem != NULL,
                    "called from %s: capacity != 0 but mem == NULL", where);
                KASSERTMSG(0 <= ring->head && ring->head < ring->capacity,
                    "called from %s: ring->head=%d ring->capacity=%d",
                    where, ring->head, ring->capacity);
        }
}
#endif /* DIAGNOSTIC */


/*
 * Mixer driver
 */

/*
 * Must be called without sc_lock held.
 */
int
mixer_open(dev_t dev, struct audio_softc *sc, int flags, int ifmt,
        struct lwp *l)
{
        struct file *fp;
        audio_file_t *af;
        int error, fd;

        TRACE(1, "flags=0x%x", flags);

        error = fd_allocfile(&fp, &fd);
        if (error)
                return error;

        af = kmem_zalloc(sizeof(*af), KM_SLEEP);
        af->sc = sc;
        af->dev = dev;

        mutex_enter(sc->sc_lock);
        if (sc->sc_dying) {
                mutex_exit(sc->sc_lock);
                kmem_free(af, sizeof(*af));
                fd_abort(curproc, fp, fd);
                return ENXIO;
        }
        mutex_enter(sc->sc_intr_lock);
        SLIST_INSERT_HEAD(&sc->sc_files, af, entry);
        mutex_exit(sc->sc_intr_lock);
        mutex_exit(sc->sc_lock);

        error = fd_clone(fp, fd, flags, &audio_fileops, af);
        KASSERT(error == EMOVEFD);

        return error;
}

/*
 * Add a process to those to be signalled on mixer activity.
 * If the process has already been added, do nothing.
 * Must be called with sc_exlock held and without sc_lock held.
 */
static void
mixer_async_add(struct audio_softc *sc, pid_t pid)
{
        int i;

        KASSERT(sc->sc_exlock);

        /* If already exists, returns without doing anything. */
        for (i = 0; i < sc->sc_am_used; i++) {
                if (sc->sc_am[i] == pid)
                        return;
        }

        /* Extend array if necessary. */
        if (sc->sc_am_used >= sc->sc_am_capacity) {
                sc->sc_am_capacity += AM_CAPACITY;
                sc->sc_am = kern_realloc(sc->sc_am,
                    sc->sc_am_capacity * sizeof(pid_t), M_WAITOK);
                TRACE(2, "realloc am_capacity=%d", sc->sc_am_capacity);
        }

        TRACE(2, "am[%d]=%d", sc->sc_am_used, (int)pid);
        sc->sc_am[sc->sc_am_used++] = pid;
}

/*
 * Remove a process from those to be signalled on mixer activity.
 * If the process has not been added, do nothing.
 * Must be called with sc_exlock held and without sc_lock held.
 */
static void
mixer_async_remove(struct audio_softc *sc, pid_t pid)
{
        int i;

        KASSERT(sc->sc_exlock);

        for (i = 0; i < sc->sc_am_used; i++) {
                if (sc->sc_am[i] == pid) {
                        sc->sc_am[i] = sc->sc_am[--sc->sc_am_used];
                        TRACE(2, "am[%d](%d) removed, used=%d",
                            i, (int)pid, sc->sc_am_used);

                        /* Empty array if no longer necessary. */
                        if (sc->sc_am_used == 0) {
                                kern_free(sc->sc_am);
                                sc->sc_am = NULL;
                                sc->sc_am_capacity = 0;
                                TRACE(2, "released");
                        }
                        return;
                }
        }
}

/*
 * Signal all processes waiting for the mixer.
 * Must be called with sc_exlock held.
 */
static void
mixer_signal(struct audio_softc *sc)
{
        proc_t *p;
        int i;

        KASSERT(sc->sc_exlock);

        for (i = 0; i < sc->sc_am_used; i++) {
                mutex_enter(&proc_lock);
                p = proc_find(sc->sc_am[i]);
                if (p)
                        psignal(p, SIGIO);
                mutex_exit(&proc_lock);
        }
}

/*
 * Close a mixer device
 */
int
mixer_close(struct audio_softc *sc, audio_file_t *file)
{
        int error;

        error = audio_exlock_enter(sc);
        if (error)
                return error;
        TRACE(1, "called");
        mixer_async_remove(sc, curproc->p_pid);
        audio_exlock_exit(sc);

        return 0;
}

/*
 * Must be called without sc_lock nor sc_exlock held.
 */
int
mixer_ioctl(struct audio_softc *sc, u_long cmd, void *addr, int flag,
        struct lwp *l)
{
        mixer_devinfo_t *mi;
        mixer_ctrl_t *mc;
        int val;
        int error;

#if defined(AUDIO_DEBUG)
        char pre[64];
        snprintf(pre, sizeof(pre), "pid=%d.%d",
            (int)curproc->p_pid, (int)l->l_lid);
#endif
        error = EINVAL;

        /* we can return cached values if we are sleeping */
        if (cmd != AUDIO_MIXER_READ) {
                mutex_enter(sc->sc_lock);
                device_active(sc->sc_dev, DVA_SYSTEM);
                mutex_exit(sc->sc_lock);
        }

        switch (cmd) {
        case FIOASYNC:
                val = *(int *)addr;
                TRACE(2, "%s FIOASYNC %s", pre, val ? "on" : "off");
                error = audio_exlock_enter(sc);
                if (error)
                        break;
                if (val) {
                        mixer_async_add(sc, curproc->p_pid);
                } else {
                        mixer_async_remove(sc, curproc->p_pid);
                }
                audio_exlock_exit(sc);
                break;

        case AUDIO_GETDEV:
                TRACE(2, "%s AUDIO_GETDEV", pre);
                error = sc->hw_if->getdev(sc->hw_hdl, (audio_device_t *)addr);
                break;

        case AUDIO_MIXER_DEVINFO:
                TRACE(2, "%s AUDIO_MIXER_DEVINFO", pre);
                mi = (mixer_devinfo_t *)addr;

                mi->un.v.delta = 0; /* default */
                mutex_enter(sc->sc_lock);
                error = audio_query_devinfo(sc, mi);
                mutex_exit(sc->sc_lock);
                break;

        case AUDIO_MIXER_READ:
                TRACE(2, "%s AUDIO_MIXER_READ", pre);
                mc = (mixer_ctrl_t *)addr;

                error = audio_exlock_mutex_enter(sc);
                if (error)
                        break;
                if (device_is_active(sc->hw_dev))
                        error = audio_get_port(sc, mc);
                else if (mc->dev < 0 || mc->dev >= sc->sc_nmixer_states)
                        error = ENXIO;
                else {
                        int dev = mc->dev;
                        memcpy(mc, &sc->sc_mixer_state[dev],
                            sizeof(mixer_ctrl_t));
                        error = 0;
                }
                audio_exlock_mutex_exit(sc);
                break;

        case AUDIO_MIXER_WRITE:
                TRACE(2, "%s AUDIO_MIXER_WRITE", pre);
                error = audio_exlock_mutex_enter(sc);
                if (error)
                        break;
                error = audio_set_port(sc, (mixer_ctrl_t *)addr);
                if (error) {
                        audio_exlock_mutex_exit(sc);
                        break;
                }

                if (sc->hw_if->commit_settings) {
                        error = sc->hw_if->commit_settings(sc->hw_hdl);
                        if (error) {
                                audio_exlock_mutex_exit(sc);
                                break;
                        }
                }
                mutex_exit(sc->sc_lock);
                mixer_signal(sc);
                audio_exlock_exit(sc);
                break;

        default:
                TRACE(2, "(%lu,'%c',%lu)",
                    IOCPARM_LEN(cmd), (char)IOCGROUP(cmd), cmd & 0xff);
                if (sc->hw_if->dev_ioctl) {
                        mutex_enter(sc->sc_lock);
                        error = sc->hw_if->dev_ioctl(sc->hw_hdl,
                            cmd, addr, flag, l);
                        mutex_exit(sc->sc_lock);
                } else
                        error = EINVAL;
                break;
        }

        if (error)
                TRACE(2, "error=%d", error);
        return error;
}

/*
 * Must be called with sc_lock held.
 */
int
au_portof(struct audio_softc *sc, char *name, int class)
{
        mixer_devinfo_t mi;

        KASSERT(mutex_owned(sc->sc_lock));

        for (mi.index = 0; audio_query_devinfo(sc, &mi) == 0; mi.index++) {
                if (mi.mixer_class == class && strcmp(mi.label.name, name) == 0)
                        return mi.index;
        }
        return -1;
}

/*
 * Must be called with sc_lock held.
 */
void
au_setup_ports(struct audio_softc *sc, struct au_mixer_ports *ports,
        mixer_devinfo_t *mi, const struct portname *tbl)
{
        int i, j;

        KASSERT(mutex_owned(sc->sc_lock));

        ports->index = mi->index;
        if (mi->type == AUDIO_MIXER_ENUM) {
                ports->isenum = true;
                for(i = 0; tbl[i].name; i++)
                    for(j = 0; j < mi->un.e.num_mem; j++)
                        if (strcmp(mi->un.e.member[j].label.name,
                                                    tbl[i].name) == 0) {
                                ports->allports |= tbl[i].mask;
                                ports->aumask[ports->nports] = tbl[i].mask;
                                ports->misel[ports->nports] =
                                    mi->un.e.member[j].ord;
                                ports->miport[ports->nports] =
                                    au_portof(sc, mi->un.e.member[j].label.name,
                                    mi->mixer_class);
                                if (ports->mixerout != -1 &&
                                    ports->miport[ports->nports] != -1)
                                        ports->isdual = true;
                                ++ports->nports;
                        }
        } else if (mi->type == AUDIO_MIXER_SET) {
                for(i = 0; tbl[i].name; i++)
                    for(j = 0; j < mi->un.s.num_mem; j++)
                        if (strcmp(mi->un.s.member[j].label.name,
                                                tbl[i].name) == 0) {
                                ports->allports |= tbl[i].mask;
                                ports->aumask[ports->nports] = tbl[i].mask;
                                ports->misel[ports->nports] =
                                    mi->un.s.member[j].mask;
                                ports->miport[ports->nports] =
                                    au_portof(sc, mi->un.s.member[j].label.name,
                                    mi->mixer_class);
                                ++ports->nports;
                        }
        }
}

/*
 * Must be called with sc_lock && sc_exlock held.
 */
int
au_set_lr_value(struct audio_softc *sc, mixer_ctrl_t *ct, int l, int r)
{

        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);

        ct->type = AUDIO_MIXER_VALUE;
        ct->un.value.num_channels = 2;
        ct->un.value.level[AUDIO_MIXER_LEVEL_LEFT] = l;
        ct->un.value.level[AUDIO_MIXER_LEVEL_RIGHT] = r;
        if (audio_set_port(sc, ct) == 0)
                return 0;
        ct->un.value.num_channels = 1;
        ct->un.value.level[AUDIO_MIXER_LEVEL_MONO] = (l+r)/2;
        return audio_set_port(sc, ct);
}

/*
 * Must be called with sc_lock && sc_exlock held.
 */
int
au_get_lr_value(struct audio_softc *sc, mixer_ctrl_t *ct, int *l, int *r)
{
        int error;

        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);

        ct->un.value.num_channels = 2;
        if (audio_get_port(sc, ct) == 0) {
                *l = ct->un.value.level[AUDIO_MIXER_LEVEL_LEFT];
                *r = ct->un.value.level[AUDIO_MIXER_LEVEL_RIGHT];
        } else {
                ct->un.value.num_channels = 1;
                error = audio_get_port(sc, ct);
                if (error)
                        return error;
                *r = *l = ct->un.value.level[AUDIO_MIXER_LEVEL_MONO];
        }
        return 0;
}

/*
 * Must be called with sc_lock && sc_exlock held.
 */
int
au_set_gain(struct audio_softc *sc, struct au_mixer_ports *ports,
        int gain, int balance)
{
        mixer_ctrl_t ct;
        int i, error;
        int l, r;
        u_int mask;
        int nset;

        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);

        if (balance == AUDIO_MID_BALANCE) {
                l = r = gain;
        } else if (balance < AUDIO_MID_BALANCE) {
                l = gain;
                r = (balance * gain) / AUDIO_MID_BALANCE;
        } else {
                r = gain;
                l = ((AUDIO_RIGHT_BALANCE - balance) * gain)
                    / AUDIO_MID_BALANCE;
        }
        TRACE(2, "gain=%d balance=%d, l=%d r=%d", gain, balance, l, r);

        if (ports->index == -1) {
        usemaster:
                if (ports->master == -1)
                        return 0; /* just ignore it silently */
                ct.dev = ports->master;
                error = au_set_lr_value(sc, &ct, l, r);
        } else {
                ct.dev = ports->index;
                if (ports->isenum) {
                        ct.type = AUDIO_MIXER_ENUM;
                        error = audio_get_port(sc, &ct);
                        if (error)
                                return error;
                        if (ports->isdual) {
                                if (ports->cur_port == -1)
                                        ct.dev = ports->master;
                                else
                                        ct.dev = ports->miport[ports->cur_port];
                                error = au_set_lr_value(sc, &ct, l, r);
                        } else {
                                for(i = 0; i < ports->nports; i++)
                                    if (ports->misel[i] == ct.un.ord) {
                                            ct.dev = ports->miport[i];
                                            if (ct.dev == -1 ||
                                                au_set_lr_value(sc, &ct, l, r))
                                                    goto usemaster;
                                            else
                                                    break;
                                    }
                        }
                } else {
                        ct.type = AUDIO_MIXER_SET;
                        error = audio_get_port(sc, &ct);
                        if (error)
                                return error;
                        mask = ct.un.mask;
                        nset = 0;
                        for(i = 0; i < ports->nports; i++) {
                                if (ports->misel[i] & mask) {
                                    ct.dev = ports->miport[i];
                                    if (ct.dev != -1 &&
                                        au_set_lr_value(sc, &ct, l, r) == 0)
                                            nset++;
                                }
                        }
                        if (nset == 0)
                                goto usemaster;
                }
        }
        if (!error)
                mixer_signal(sc);
        return error;
}

/*
 * Must be called with sc_lock && sc_exlock held.
 */
void
au_get_gain(struct audio_softc *sc, struct au_mixer_ports *ports,
        u_int *pgain, u_char *pbalance)
{
        mixer_ctrl_t ct;
        int i, l, r, n;
        int lgain, rgain;

        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);

        lgain = AUDIO_MAX_GAIN / 2;
        rgain = AUDIO_MAX_GAIN / 2;
        if (ports->index == -1) {
        usemaster:
                if (ports->master == -1)
                        goto bad;
                ct.dev = ports->master;
                ct.type = AUDIO_MIXER_VALUE;
                if (au_get_lr_value(sc, &ct, &lgain, &rgain))
                        goto bad;
        } else {
                ct.dev = ports->index;
                if (ports->isenum) {
                        ct.type = AUDIO_MIXER_ENUM;
                        if (audio_get_port(sc, &ct))
                                goto bad;
                        ct.type = AUDIO_MIXER_VALUE;
                        if (ports->isdual) {
                                if (ports->cur_port == -1)
                                        ct.dev = ports->master;
                                else
                                        ct.dev = ports->miport[ports->cur_port];
                                au_get_lr_value(sc, &ct, &lgain, &rgain);
                        } else {
                                for(i = 0; i < ports->nports; i++)
                                    if (ports->misel[i] == ct.un.ord) {
                                            ct.dev = ports->miport[i];
                                            if (ct.dev == -1 ||
                                                au_get_lr_value(sc, &ct,
                                                                &lgain, &rgain))
                                                    goto usemaster;
                                            else
                                                    break;
                                    }
                        }
                } else {
                        ct.type = AUDIO_MIXER_SET;
                        if (audio_get_port(sc, &ct))
                                goto bad;
                        ct.type = AUDIO_MIXER_VALUE;
                        lgain = rgain = n = 0;
                        for(i = 0; i < ports->nports; i++) {
                                if (ports->misel[i] & ct.un.mask) {
                                        ct.dev = ports->miport[i];
                                        if (ct.dev == -1 ||
                                            au_get_lr_value(sc, &ct, &l, &r))
                                                goto usemaster;
                                        else {
                                                lgain += l;
                                                rgain += r;
                                                n++;
                                        }
                                }
                        }
                        if (n != 0) {
                                lgain /= n;
                                rgain /= n;
                        }
                }
        }
bad:
        if (lgain == rgain) {        /* handles lgain==rgain==0 */
                *pgain = lgain;
                *pbalance = AUDIO_MID_BALANCE;
        } else if (lgain < rgain) {
                *pgain = rgain;
                /* balance should be > AUDIO_MID_BALANCE */
                *pbalance = AUDIO_RIGHT_BALANCE -
                        (AUDIO_MID_BALANCE * lgain) / rgain;
        } else /* lgain > rgain */ {
                *pgain = lgain;
                /* balance should be < AUDIO_MID_BALANCE */
                *pbalance = (AUDIO_MID_BALANCE * rgain) / lgain;
        }
}

/*
 * Must be called with sc_lock && sc_exlock held.
 */
int
au_set_port(struct audio_softc *sc, struct au_mixer_ports *ports, u_int port)
{
        mixer_ctrl_t ct;
        int i, error, use_mixerout;

        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);

        use_mixerout = 1;
        if (port == 0) {
                if (ports->allports == 0)
                        return 0;                /* Allow this special case. */
                else if (ports->isdual) {
                        if (ports->cur_port == -1) {
                                return 0;
                        } else {
                                port = ports->aumask[ports->cur_port];
                                ports->cur_port = -1;
                                use_mixerout = 0;
                        }
                }
        }
        if (ports->index == -1)
                return EINVAL;
        ct.dev = ports->index;
        if (ports->isenum) {
                if (port & (port-1))
                        return EINVAL; /* Only one port allowed */
                ct.type = AUDIO_MIXER_ENUM;
                error = EINVAL;
                for(i = 0; i < ports->nports; i++)
                        if (ports->aumask[i] == port) {
                                if (ports->isdual && use_mixerout) {
                                        ct.un.ord = ports->mixerout;
                                        ports->cur_port = i;
                                } else {
                                        ct.un.ord = ports->misel[i];
                                }
                                error = audio_set_port(sc, &ct);
                                break;
                        }
        } else {
                ct.type = AUDIO_MIXER_SET;
                ct.un.mask = 0;
                for(i = 0; i < ports->nports; i++)
                        if (ports->aumask[i] & port)
                                ct.un.mask |= ports->misel[i];
                if (port != 0 && ct.un.mask == 0)
                        error = EINVAL;
                else
                        error = audio_set_port(sc, &ct);
        }
        if (!error)
                mixer_signal(sc);
        return error;
}

/*
 * Must be called with sc_lock && sc_exlock held.
 */
int
au_get_port(struct audio_softc *sc, struct au_mixer_ports *ports)
{
        mixer_ctrl_t ct;
        int i, aumask;

        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);

        if (ports->index == -1)
                return 0;
        ct.dev = ports->index;
        ct.type = ports->isenum ? AUDIO_MIXER_ENUM : AUDIO_MIXER_SET;
        if (audio_get_port(sc, &ct))
                return 0;
        aumask = 0;
        if (ports->isenum) {
                if (ports->isdual && ports->cur_port != -1) {
                        if (ports->mixerout == ct.un.ord)
                                aumask = ports->aumask[ports->cur_port];
                        else
                                ports->cur_port = -1;
                }
                if (aumask == 0)
                        for(i = 0; i < ports->nports; i++)
                                if (ports->misel[i] == ct.un.ord)
                                        aumask = ports->aumask[i];
        } else {
                for(i = 0; i < ports->nports; i++)
                        if (ct.un.mask & ports->misel[i])
                                aumask |= ports->aumask[i];
        }
        return aumask;
}

/*
 * It returns 0 if success, otherwise errno.
 * Must be called only if sc->sc_monitor_port != -1.
 * Must be called with sc_lock && sc_exlock held.
 */
static int
au_set_monitor_gain(struct audio_softc *sc, int monitor_gain)
{
        mixer_ctrl_t ct;

        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);

        ct.dev = sc->sc_monitor_port;
        ct.type = AUDIO_MIXER_VALUE;
        ct.un.value.num_channels = 1;
        ct.un.value.level[AUDIO_MIXER_LEVEL_MONO] = monitor_gain;
        return audio_set_port(sc, &ct);
}

/*
 * It returns monitor gain if success, otherwise -1.
 * Must be called only if sc->sc_monitor_port != -1.
 * Must be called with sc_lock && sc_exlock held.
 */
static int
au_get_monitor_gain(struct audio_softc *sc)
{
        mixer_ctrl_t ct;

        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);

        ct.dev = sc->sc_monitor_port;
        ct.type = AUDIO_MIXER_VALUE;
        ct.un.value.num_channels = 1;
        if (audio_get_port(sc, &ct))
                return -1;
        return ct.un.value.level[AUDIO_MIXER_LEVEL_MONO];
}

/*
 * Must be called with sc_lock && sc_exlock held.
 */
static int
audio_set_port(struct audio_softc *sc, mixer_ctrl_t *mc)
{

        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);

        return sc->hw_if->set_port(sc->hw_hdl, mc);
}

/*
 * Must be called with sc_lock && sc_exlock held.
 */
static int
audio_get_port(struct audio_softc *sc, mixer_ctrl_t *mc)
{

        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);

        return sc->hw_if->get_port(sc->hw_hdl, mc);
}

/*
 * Must be called with sc_lock && sc_exlock held.
 */
static void
audio_mixer_capture(struct audio_softc *sc)
{
        mixer_devinfo_t mi;
        mixer_ctrl_t *mc;

        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);

        for (mi.index = 0;; mi.index++) {
                if (audio_query_devinfo(sc, &mi) != 0)
                        break;
                KASSERT(mi.index < sc->sc_nmixer_states);
                if (mi.type == AUDIO_MIXER_CLASS)
                        continue;
                mc = &sc->sc_mixer_state[mi.index];
                mc->dev = mi.index;
                mc->type = mi.type;
                mc->un.value.num_channels = mi.un.v.num_channels;
                (void)audio_get_port(sc, mc);
        }

        return;
}

/*
 * Must be called with sc_lock && sc_exlock held.
 */
static void
audio_mixer_restore(struct audio_softc *sc)
{
        mixer_devinfo_t mi;
        mixer_ctrl_t *mc;

        KASSERT(mutex_owned(sc->sc_lock));
        KASSERT(sc->sc_exlock);

        for (mi.index = 0; ; mi.index++) {
                if (audio_query_devinfo(sc, &mi) != 0)
                        break;
                if (mi.type == AUDIO_MIXER_CLASS)
                        continue;
                mc = &sc->sc_mixer_state[mi.index];
                (void)audio_set_port(sc, mc);
        }
        if (sc->hw_if->commit_settings)
                sc->hw_if->commit_settings(sc->hw_hdl);

        return;
}

static void
audio_volume_down(device_t dv)
{
        struct audio_softc *sc = device_private(dv);
        mixer_devinfo_t mi;
        int newgain;
        u_int gain;
        u_char balance;

        if (audio_exlock_mutex_enter(sc) != 0)
                return;
        if (sc->sc_outports.index == -1 && sc->sc_outports.master != -1) {
                mi.index = sc->sc_outports.master;
                mi.un.v.delta = 0;
                if (audio_query_devinfo(sc, &mi) == 0) {
                        au_get_gain(sc, &sc->sc_outports, &gain, &balance);
                        newgain = gain - mi.un.v.delta;
                        if (newgain < AUDIO_MIN_GAIN)
                                newgain = AUDIO_MIN_GAIN;
                        au_set_gain(sc, &sc->sc_outports, newgain, balance);
                }
        }
        audio_exlock_mutex_exit(sc);
}

static void
audio_volume_up(device_t dv)
{
        struct audio_softc *sc = device_private(dv);
        mixer_devinfo_t mi;
        u_int gain, newgain;
        u_char balance;

        if (audio_exlock_mutex_enter(sc) != 0)
                return;
        if (sc->sc_outports.index == -1 && sc->sc_outports.master != -1) {
                mi.index = sc->sc_outports.master;
                mi.un.v.delta = 0;
                if (audio_query_devinfo(sc, &mi) == 0) {
                        au_get_gain(sc, &sc->sc_outports, &gain, &balance);
                        newgain = gain + mi.un.v.delta;
                        if (newgain > AUDIO_MAX_GAIN)
                                newgain = AUDIO_MAX_GAIN;
                        au_set_gain(sc, &sc->sc_outports, newgain, balance);
                }
        }
        audio_exlock_mutex_exit(sc);
}

static void
audio_volume_toggle(device_t dv)
{
        struct audio_softc *sc = device_private(dv);
        u_int gain, newgain;
        u_char balance;

        if (audio_exlock_mutex_enter(sc) != 0)
                return;
        au_get_gain(sc, &sc->sc_outports, &gain, &balance);
        if (gain != 0) {
                sc->sc_lastgain = gain;
                newgain = 0;
        } else
                newgain = sc->sc_lastgain;
        au_set_gain(sc, &sc->sc_outports, newgain, balance);
        audio_exlock_mutex_exit(sc);
}

/*
 * Must be called with sc_lock held.
 */
static int
audio_query_devinfo(struct audio_softc *sc, mixer_devinfo_t *di)
{

        KASSERT(mutex_owned(sc->sc_lock));

        return sc->hw_if->query_devinfo(sc->hw_hdl, di);
}

#endif /* NAUDIO > 0 */

#if NAUDIO == 0 && (NMIDI > 0 || NMIDIBUS > 0)
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/audioio.h>
#include <dev/audio/audio_if.h>
#endif

#if NAUDIO > 0 || (NMIDI > 0 || NMIDIBUS > 0)
int
audioprint(void *aux, const char *pnp)
{
        struct audio_attach_args *arg;
        const char *type;

        if (pnp != NULL) {
                arg = aux;
                switch (arg->type) {
                case AUDIODEV_TYPE_AUDIO:
                        type = "audio";
                        break;
                case AUDIODEV_TYPE_MIDI:
                        type = "midi";
                        break;
                case AUDIODEV_TYPE_OPL:
                        type = "opl";
                        break;
                case AUDIODEV_TYPE_MPU:
                        type = "mpu";
                        break;
                case AUDIODEV_TYPE_AUX:
                        type = "aux";
                        break;
                default:
                        panic("audioprint: unknown type %d", arg->type);
                }
                aprint_normal("%s at %s", type, pnp);
        }
        return UNCONF;
}

#endif /* NAUDIO > 0 || (NMIDI > 0 || NMIDIBUS > 0) */

#ifdef _MODULE

devmajor_t audio_bmajor = -1, audio_cmajor = -1;

#include "ioconf.c"

#endif

MODULE(MODULE_CLASS_DRIVER, audio, NULL);

static int
audio_modcmd(modcmd_t cmd, void *arg)
{
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
                /* XXX interrupt level? */
                audio_psref_class = psref_class_create("audio", IPL_SOFTSERIAL);
#ifdef _MODULE
                error = devsw_attach(audio_cd.cd_name, NULL, &audio_bmajor,
                    &audio_cdevsw, &audio_cmajor);
                if (error)
                        break;

                error = config_init_component(cfdriver_ioconf_audio,
                    cfattach_ioconf_audio, cfdata_ioconf_audio);
                if (error) {
                        devsw_detach(NULL, &audio_cdevsw);
                }
#endif
                break;
        case MODULE_CMD_FINI:
#ifdef _MODULE
                error = config_fini_component(cfdriver_ioconf_audio,
                   cfattach_ioconf_audio, cfdata_ioconf_audio);
                if (error == 0)
                        devsw_detach(NULL, &audio_cdevsw);
#endif
                if (error == 0)
                        psref_class_destroy(audio_psref_class);
                break;
        default:
                error = ENOTTY;
                break;
        }

        return error;
}






















































































































































































































    3 


    3 
    3 
























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
/*        $NetBSD: if_upgt.c,v 1.33 2022/07/01 01:07:32 riastradh Exp $        */
/*        $OpenBSD: if_upgt.c,v 1.49 2010/04/20 22:05:43 tedu Exp $ */

/*
 * Copyright (c) 2007 Marcus Glocker <mglocker@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_upgt.c,v 1.33 2022/07/01 01:07:32 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/callout.h>
#include <sys/device.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/sockio.h>
#include <sys/systm.h>
#include <sys/vnode.h>
#include <sys/bus.h>
#include <sys/endian.h>
#include <sys/intr.h>

#include <net/bpf.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_ether.h>
#include <net/if_media.h>
#include <net/if_types.h>

#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_radiotap.h>

#include <dev/firmload.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdevs.h>

#include <dev/usb/if_upgtvar.h>

/*
 * Driver for the USB PrismGT devices.
 *
 * For now just USB 2.0 devices with the GW3887 chipset are supported.
 * The driver has been written based on the firmware version 2.13.1.0_LM87.
 *
 * TODO's:
 * - Fix MONITOR mode (MAC filter).
 * - Add HOSTAP mode.
 * - Add IBSS mode.
 * - Support the USB 1.0 devices (NET2280, ISL3880, ISL3886 chipsets).
 *
 * Parts of this driver has been influenced by reading the p54u driver
 * written by Jean-Baptiste Note <jean-baptiste.note@m4x.org> and
 * Sebastien Bourdeauducq <lekernel@prism54.org>.
 */

#ifdef UPGT_DEBUG
int upgt_debug = 2;
#define DPRINTF(l, x...) do { if ((l) <= upgt_debug) printf(x); } while (0)
#else
#define DPRINTF(l, x...)
#endif

/*
 * Prototypes.
 */
static int        upgt_match(device_t, cfdata_t, void *);
static void        upgt_attach(device_t, device_t, void *);
static int        upgt_detach(device_t, int);
static int        upgt_activate(device_t, devact_t);

static void        upgt_attach_hook(device_t);
static int        upgt_device_type(struct upgt_softc *, uint16_t, uint16_t);
static int        upgt_device_init(struct upgt_softc *);
static int        upgt_mem_init(struct upgt_softc *);
static uint32_t        upgt_mem_alloc(struct upgt_softc *);
static void        upgt_mem_free(struct upgt_softc *, uint32_t);
static int        upgt_fw_alloc(struct upgt_softc *);
static void        upgt_fw_free(struct upgt_softc *);
static int        upgt_fw_verify(struct upgt_softc *);
static int        upgt_fw_load(struct upgt_softc *);
static int        upgt_fw_copy(char *, char *, int);
static int        upgt_eeprom_read(struct upgt_softc *);
static int        upgt_eeprom_parse(struct upgt_softc *);
static void        upgt_eeprom_parse_hwrx(struct upgt_softc *, uint8_t *);
static void        upgt_eeprom_parse_freq3(struct upgt_softc *, uint8_t *, int);
static void        upgt_eeprom_parse_freq4(struct upgt_softc *, uint8_t *, int);
static void        upgt_eeprom_parse_freq6(struct upgt_softc *, uint8_t *, int);

static int        upgt_ioctl(struct ifnet *, u_long, void *);
static int        upgt_init(struct ifnet *);
static void        upgt_stop(struct upgt_softc *);
static int        upgt_media_change(struct ifnet *);
static void        upgt_newassoc(struct ieee80211_node *, int);
static int        upgt_newstate(struct ieee80211com *, enum ieee80211_state,
                    int);
static void        upgt_newstate_task(void *);
static void        upgt_next_scan(void *);
static void        upgt_start(struct ifnet *);
static void        upgt_watchdog(struct ifnet *);
static void        upgt_tx_task(void *);
static void        upgt_tx_done(struct upgt_softc *, uint8_t *);
static void        upgt_rx_cb(struct usbd_xfer *, void *, usbd_status);
static void        upgt_rx(struct upgt_softc *, uint8_t *, int);
static void        upgt_setup_rates(struct upgt_softc *);
static uint8_t        upgt_rx_rate(struct upgt_softc *, const int);
static int        upgt_set_macfilter(struct upgt_softc *, uint8_t);
static int        upgt_set_channel(struct upgt_softc *, unsigned);
static void        upgt_set_led(struct upgt_softc *, int);
static void        upgt_set_led_blink(void *);
static int        upgt_get_stats(struct upgt_softc *);

static int        upgt_alloc_tx(struct upgt_softc *);
static int        upgt_alloc_rx(struct upgt_softc *);
static int        upgt_alloc_cmd(struct upgt_softc *);
static void        upgt_free_tx(struct upgt_softc *);
static void        upgt_free_rx(struct upgt_softc *);
static void        upgt_free_cmd(struct upgt_softc *);
static int        upgt_bulk_xmit(struct upgt_softc *, struct upgt_data *,
                    struct usbd_pipe *, uint32_t *, int);

#if 0
static void        upgt_hexdump(void *, int);
#endif
static uint32_t        upgt_crc32_le(const void *, size_t);
static uint32_t        upgt_chksum_le(const uint32_t *, size_t);

CFATTACH_DECL_NEW(upgt, sizeof(struct upgt_softc),
        upgt_match, upgt_attach, upgt_detach, upgt_activate);

static const struct usb_devno upgt_devs_1[] = {
        /* version 1 devices */
        { USB_VENDOR_ALCATELT,                USB_PRODUCT_ALCATELT_ST120G },
        { USB_VENDOR_SMC,                USB_PRODUCT_SMC_2862WG_V1 }
};

static const struct usb_devno upgt_devs_2[] = {
        /* version 2 devices */
        { USB_VENDOR_ACCTON,                USB_PRODUCT_ACCTON_PRISM_GT },
        { USB_VENDOR_ALCATELT,                USB_PRODUCT_ALCATELT_ST121G },
        { USB_VENDOR_BELKIN,                USB_PRODUCT_BELKIN_F5D7050 },
        { USB_VENDOR_CISCOLINKSYS,        USB_PRODUCT_CISCOLINKSYS_WUSB54AG },
        { USB_VENDOR_CISCOLINKSYS,        USB_PRODUCT_CISCOLINKSYS_WUSB54GV2 },
        { USB_VENDOR_CONCEPTRONIC2,        USB_PRODUCT_CONCEPTRONIC2_PRISM_GT },
        { USB_VENDOR_COREGA,                USB_PRODUCT_COREGA_CGWLUSB2GTST },
        { USB_VENDOR_DELL,                USB_PRODUCT_DELL_PRISM_GT_1 },
        { USB_VENDOR_DELL,                USB_PRODUCT_DELL_PRISM_GT_2 },
        { USB_VENDOR_DLINK,                USB_PRODUCT_DLINK_DWLG122A2 },
        { USB_VENDOR_FSC,                USB_PRODUCT_FSC_E5400 },
        { USB_VENDOR_GLOBESPAN,                USB_PRODUCT_GLOBESPAN_PRISM_GT_1 },
        { USB_VENDOR_GLOBESPAN,                USB_PRODUCT_GLOBESPAN_PRISM_GT_2 },
        { USB_VENDOR_INTERSIL,                USB_PRODUCT_INTERSIL_PRISM_GT },
        { USB_VENDOR_PHEENET,                USB_PRODUCT_PHEENET_GWU513 },
        { USB_VENDOR_PHILIPS,                USB_PRODUCT_PHILIPS_CPWUA054 },
        { USB_VENDOR_SHARP,                USB_PRODUCT_SHARP_RUITZ1016YCZZ },
        { USB_VENDOR_SMC,                USB_PRODUCT_SMC_2862WG },
        { USB_VENDOR_USR,                USB_PRODUCT_USR_USR5422 },
        { USB_VENDOR_WISTRONNEWEB,        USB_PRODUCT_WISTRONNEWEB_UR045G },
        { USB_VENDOR_CONEXANT,                USB_PRODUCT_CONEXANT_PRISM_GT_1 },
        { USB_VENDOR_CONEXANT,                USB_PRODUCT_CONEXANT_PRISM_GT_2 },
        { USB_VENDOR_ZCOM,                USB_PRODUCT_ZCOM_MD40900 },
        { USB_VENDOR_ZCOM,                USB_PRODUCT_ZCOM_XG703A }
};

static int
firmware_load(const char *dname, const char *iname, uint8_t **ucodep,
    size_t *sizep)
{
        firmware_handle_t fh;
        int error;

        if ((error = firmware_open(dname, iname, &fh)) != 0)
                return error;
        *sizep = firmware_get_size(fh);
        if ((*ucodep = firmware_malloc(*sizep)) == NULL) {
                firmware_close(fh);
                return ENOMEM;
        }
        if ((error = firmware_read(fh, 0, *ucodep, *sizep)) != 0)
                firmware_free(*ucodep, *sizep);
        firmware_close(fh);

        return error;
}

static int
upgt_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        if (usb_lookup(upgt_devs_1, uaa->uaa_vendor, uaa->uaa_product) != NULL)
                return UMATCH_VENDOR_PRODUCT;

        if (usb_lookup(upgt_devs_2, uaa->uaa_vendor, uaa->uaa_product) != NULL)
                return UMATCH_VENDOR_PRODUCT;

        return UMATCH_NONE;
}

static void
upgt_attach(device_t parent, device_t self, void *aux)
{
        struct upgt_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        usbd_status error;
        char *devinfop;
        int i;

        aprint_naive("\n");
        aprint_normal("\n");

        /*
         * Attach USB device.
         */
        sc->sc_dev = self;
        sc->sc_udev = uaa->uaa_device;
        sc->sc_init_state = UPGT_INIT_NONE;

        devinfop = usbd_devinfo_alloc(sc->sc_udev, 0);
        aprint_normal_dev(sc->sc_dev, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        /* check device type */
        if (upgt_device_type(sc, uaa->uaa_vendor, uaa->uaa_product) != 0)
                return;

        /* set configuration number */
        error = usbd_set_config_no(sc->sc_udev, UPGT_CONFIG_NO, 0);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(error));
                return;
        }

        /* get the first interface handle */
        error = usbd_device2interface_handle(sc->sc_udev, UPGT_IFACE_INDEX,
            &sc->sc_iface);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not get interface handle\n");
                return;
        }

        /* find endpoints */
        id = usbd_get_interface_descriptor(sc->sc_iface);
        sc->sc_rx_no = sc->sc_tx_no = -1;
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(sc->sc_dev,
                            "no endpoint descriptor for iface %d\n", i);
                        return;
                }

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK)
                        sc->sc_tx_no = ed->bEndpointAddress;
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK)
                        sc->sc_rx_no = ed->bEndpointAddress;

                /*
                 * 0x01 TX pipe
                 * 0x81 RX pipe
                 *
                 * Deprecated scheme (not used with fw version >2.5.6.x):
                 * 0x02 TX MGMT pipe
                 * 0x82 TX MGMT pipe
                 */
                if (sc->sc_tx_no != -1 && sc->sc_rx_no != -1)
                        break;
        }
        if (sc->sc_rx_no == -1 || sc->sc_tx_no == -1) {
                aprint_error_dev(sc->sc_dev, "missing endpoint\n");
                return;
        }

        /* setup tasks and timeouts */
        usb_init_task(&sc->sc_task_newstate, upgt_newstate_task, sc, 0);
        usb_init_task(&sc->sc_task_tx, upgt_tx_task, sc, 0);
        callout_init(&sc->scan_to, 0);
        callout_setfunc(&sc->scan_to, upgt_next_scan, sc);
        callout_init(&sc->led_to, 0);
        callout_setfunc(&sc->led_to, upgt_set_led_blink, sc);
        sc->sc_init_state = UPGT_INIT_INITED;

        /*
         * Open TX and RX USB bulk pipes.
         */
        error = usbd_open_pipe(sc->sc_iface, sc->sc_tx_no, USBD_EXCLUSIVE_USE,
            &sc->sc_tx_pipeh);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not open TX pipe: %s\n", usbd_errstr(error));
                goto fail;
        }
        error = usbd_open_pipe(sc->sc_iface, sc->sc_rx_no, USBD_EXCLUSIVE_USE,
            &sc->sc_rx_pipeh);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev, "could not open RX pipe: %s\n",
                    usbd_errstr(error));
                goto fail;
        }

        /*
         * Allocate TX, RX, and CMD xfers.
         */
        if (upgt_alloc_tx(sc) != 0)
                goto fail;
        if (upgt_alloc_rx(sc) != 0)
                goto fail;
        if (upgt_alloc_cmd(sc) != 0)
                goto fail;

        /*
         * We need the firmware loaded from file system to complete the attach.
         */
        config_mountroot(self, upgt_attach_hook);

        return;
fail:
        aprint_error_dev(sc->sc_dev, "%s failed\n", __func__);
}

static void
upgt_attach_hook(device_t arg)
{
        struct upgt_softc *sc = device_private(arg);
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        usbd_status error;
        int i;

        /*
         * Load firmware file into memory.
         */
        if (upgt_fw_alloc(sc) != 0)
                goto fail;

        /*
         * Initialize the device.
         */
        if (upgt_device_init(sc) != 0)
                goto fail;

        /*
         * Verify the firmware.
         */
        if (upgt_fw_verify(sc) != 0)
                goto fail;

        /*
         * Calculate device memory space.
         */
        if (sc->sc_memaddr_frame_start == 0 || sc->sc_memaddr_frame_end == 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not find memory space addresses on FW\n");
                goto fail;
        }
        sc->sc_memaddr_frame_end -= UPGT_MEMSIZE_RX + 1;
        sc->sc_memaddr_rx_start = sc->sc_memaddr_frame_end + 1;

        DPRINTF(1, "%s: memory address frame start=0x%08x\n",
            device_xname(sc->sc_dev), sc->sc_memaddr_frame_start);
        DPRINTF(1, "%s: memory address frame end=0x%08x\n",
            device_xname(sc->sc_dev), sc->sc_memaddr_frame_end);
        DPRINTF(1, "%s: memory address rx start=0x%08x\n",
            device_xname(sc->sc_dev), sc->sc_memaddr_rx_start);

        upgt_mem_init(sc);

        /*
         * Load the firmware.
         */
        if (upgt_fw_load(sc) != 0)
                goto fail;

        /*
         * Startup the RX pipe.
         */
        struct upgt_data *data_rx = &sc->rx_data;

        usbd_setup_xfer(data_rx->xfer, data_rx, data_rx->buf,
            MCLBYTES, USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, upgt_rx_cb);
        error = usbd_transfer(data_rx->xfer);
        if (error != USBD_NORMAL_COMPLETION && error != USBD_IN_PROGRESS) {
                aprint_error_dev(sc->sc_dev,
                    "could not queue RX transfer\n");
                goto fail;
        }
        usbd_delay_ms(sc->sc_udev, 100);

        /*
         * Read the whole EEPROM content and parse it.
         */
        if (upgt_eeprom_read(sc) != 0)
                goto fail;
        if (upgt_eeprom_parse(sc) != 0)
                goto fail;

        /*
         * Setup the 802.11 device.
         */
        ic->ic_ifp = ifp;
        ic->ic_phytype = IEEE80211_T_OFDM;
        ic->ic_opmode = IEEE80211_M_STA;
        ic->ic_state = IEEE80211_S_INIT;
        ic->ic_caps =
            IEEE80211_C_MONITOR |
            IEEE80211_C_SHPREAMBLE |
            IEEE80211_C_SHSLOT;

        ic->ic_sup_rates[IEEE80211_MODE_11B] = ieee80211_std_rateset_11b;
        ic->ic_sup_rates[IEEE80211_MODE_11G] = ieee80211_std_rateset_11g;

        for (i = 1; i <= 14; i++) {
                ic->ic_channels[i].ic_freq =
                    ieee80211_ieee2mhz(i, IEEE80211_CHAN_2GHZ);
                ic->ic_channels[i].ic_flags =
                    IEEE80211_CHAN_CCK | IEEE80211_CHAN_OFDM |
                    IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ;
        }

        ifp->if_softc = sc;
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_init = upgt_init;
        ifp->if_ioctl = upgt_ioctl;
        ifp->if_start = upgt_start;
        ifp->if_watchdog = upgt_watchdog;
        IFQ_SET_READY(&ifp->if_snd);
        memcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);

        if_attach(ifp);
        ieee80211_ifattach(ic);
        ic->ic_newassoc = upgt_newassoc;

        sc->sc_newstate = ic->ic_newstate;
        ic->ic_newstate = upgt_newstate;

        /* XXX media locking needs revisiting */
        mutex_init(&sc->sc_media_mtx, MUTEX_DEFAULT, IPL_SOFTUSB);
        ieee80211_media_init_with_lock(ic,
            upgt_media_change, ieee80211_media_status, &sc->sc_media_mtx);

        bpf_attach2(ifp, DLT_IEEE802_11_RADIO,
            sizeof(struct ieee80211_frame) + IEEE80211_RADIOTAP_HDRLEN,
            &sc->sc_drvbpf);

        sc->sc_rxtap_len = sizeof(sc->sc_rxtapu);
        sc->sc_rxtap.wr_ihdr.it_len = htole16(sc->sc_rxtap_len);
        sc->sc_rxtap.wr_ihdr.it_present = htole32(UPGT_RX_RADIOTAP_PRESENT);

        sc->sc_txtap_len = sizeof(sc->sc_txtapu);
        sc->sc_txtap.wt_ihdr.it_len = htole16(sc->sc_txtap_len);
        sc->sc_txtap.wt_ihdr.it_present = htole32(UPGT_TX_RADIOTAP_PRESENT);

        aprint_normal_dev(sc->sc_dev, "address %s\n",
            ether_sprintf(ic->ic_myaddr));

        ieee80211_announce(ic);

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        /* device attached */
        sc->sc_flags |= UPGT_DEVICE_ATTACHED;

        return;
fail:
        aprint_error_dev(sc->sc_dev, "%s failed\n", __func__);
}

static int
upgt_detach(device_t self, int flags)
{
        struct upgt_softc *sc = device_private(self);
        struct ifnet *ifp = &sc->sc_if;
        struct ieee80211com *ic = &sc->sc_ic;
        int s;

        DPRINTF(1, "%s: %s\n", device_xname(sc->sc_dev), __func__);

        if (sc->sc_init_state < UPGT_INIT_INITED)
                return 0;

        s = splnet();

        if (ifp->if_flags & IFF_RUNNING)
                upgt_stop(sc);

        /* remove tasks and timeouts */
        callout_halt(&sc->scan_to, NULL);
        callout_halt(&sc->led_to, NULL);
        usb_rem_task_wait(sc->sc_udev, &sc->sc_task_newstate, USB_TASKQ_DRIVER,
            NULL);
        usb_rem_task_wait(sc->sc_udev, &sc->sc_task_tx, USB_TASKQ_DRIVER,
            NULL);
        callout_destroy(&sc->scan_to);
        callout_destroy(&sc->led_to);

        /* abort and close TX / RX pipes */
        if (sc->sc_tx_pipeh != NULL) {
                usbd_abort_pipe(sc->sc_tx_pipeh);
        }
        if (sc->sc_rx_pipeh != NULL) {
                usbd_abort_pipe(sc->sc_rx_pipeh);
        }

        /* free xfers */
        upgt_free_tx(sc);
        upgt_free_rx(sc);
        upgt_free_cmd(sc);

        /* Close TX / RX pipes */
        if (sc->sc_tx_pipeh != NULL) {
                usbd_close_pipe(sc->sc_tx_pipeh);
        }
        if (sc->sc_rx_pipeh != NULL) {
                usbd_close_pipe(sc->sc_rx_pipeh);
        }

        /* free firmware */
        upgt_fw_free(sc);

        if (sc->sc_flags & UPGT_DEVICE_ATTACHED) {
                /* detach interface */
                bpf_detach(ifp);
                ieee80211_ifdetach(ic);
                if_detach(ifp);
        }

        splx(s);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return 0;
}

static int
upgt_activate(device_t self, devact_t act)
{
        struct upgt_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                if_deactivate(&sc->sc_if);
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

static int
upgt_device_type(struct upgt_softc *sc, uint16_t vendor, uint16_t product)
{

        if (usb_lookup(upgt_devs_1, vendor, product) != NULL) {
                sc->sc_device_type = 1;
                /* XXX */
                aprint_error_dev(sc->sc_dev,
                    "version 1 devices not supported yet\n");
                return 1;
        } else
                sc->sc_device_type = 2;

        return 0;
}

static int
upgt_device_init(struct upgt_softc *sc)
{
        struct upgt_data *data_cmd = &sc->cmd_data;
        const uint8_t init_cmd[] = { 0x7e, 0x7e, 0x7e, 0x7e };
        int len;

        len = sizeof(init_cmd);
        memcpy(data_cmd->buf, init_cmd, len);
        if (upgt_bulk_xmit(sc, data_cmd, sc->sc_tx_pipeh, &len, 0) != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not send device init string\n");
                return EIO;
        }
        usbd_delay_ms(sc->sc_udev, 100);

        DPRINTF(1, "%s: device initialized\n", device_xname(sc->sc_dev));

        return 0;
}

static int
upgt_mem_init(struct upgt_softc *sc)
{
        int i;

        for (i = 0; i < UPGT_MEMORY_MAX_PAGES; i++) {
                sc->sc_memory.page[i].used = 0;

                if (i == 0) {
                        /*
                         * The first memory page is always reserved for
                         * command data.
                         */
                        sc->sc_memory.page[i].addr =
                            sc->sc_memaddr_frame_start + MCLBYTES;
                } else {
                        sc->sc_memory.page[i].addr =
                            sc->sc_memory.page[i - 1].addr + MCLBYTES;
                }

                if (sc->sc_memory.page[i].addr + MCLBYTES >=
                    sc->sc_memaddr_frame_end)
                        break;

                DPRINTF(2, "%s: memory address page %d=0x%08x\n",
                    device_xname(sc->sc_dev), i, sc->sc_memory.page[i].addr);
        }

        sc->sc_memory.pages = i;

        DPRINTF(2, "%s: memory pages=%d\n",
            device_xname(sc->sc_dev), sc->sc_memory.pages);

        return 0;
}

static uint32_t
upgt_mem_alloc(struct upgt_softc *sc)
{
        int i;

        for (i = 0; i < sc->sc_memory.pages; i++) {
                if (sc->sc_memory.page[i].used == 0) {
                        sc->sc_memory.page[i].used = 1;
                        return sc->sc_memory.page[i].addr;
                }
        }

        return 0;
}

static void
upgt_mem_free(struct upgt_softc *sc, uint32_t addr)
{
        int i;

        for (i = 0; i < sc->sc_memory.pages; i++) {
                if (sc->sc_memory.page[i].addr == addr) {
                        sc->sc_memory.page[i].used = 0;
                        return;
                }
        }

        aprint_error_dev(sc->sc_dev, "could not free memory address 0x%08x\n",
            addr);
}


static int
upgt_fw_alloc(struct upgt_softc *sc)
{
        const char *name = "upgt-gw3887";
        int error;

        if (sc->sc_fw == NULL) {
                error = firmware_load("upgt", name, &sc->sc_fw,
                    &sc->sc_fw_size);
                if (error != 0) {
                        if (error == ENOENT) {
                                /*
                                 * The firmware file for upgt(4) is not in
                                 * the default distribution due to its lisence
                                 * so explicitly notify it if the firmware file
                                 * is not found.
                                 */
                                aprint_error_dev(sc->sc_dev,
                                    "firmware file %s is not installed\n",
                                    name);
                                aprint_error_dev(sc->sc_dev,
                                    "(it is not included in the default"
                                    " distribution)\n");
                                aprint_error_dev(sc->sc_dev,
                                    "see upgt(4) man page for details about "
                                    "firmware installation\n");
                        } else {
                                aprint_error_dev(sc->sc_dev,
                                    "could not read firmware %s\n", name);
                        }
                        return EIO;
                }
        }

        DPRINTF(1, "%s: firmware %s allocated\n", device_xname(sc->sc_dev),
            name);

        return 0;
}

static void
upgt_fw_free(struct upgt_softc *sc)
{

        if (sc->sc_fw != NULL) {
                firmware_free(sc->sc_fw, sc->sc_fw_size);
                sc->sc_fw = NULL;
                DPRINTF(1, "%s: firmware freed\n", device_xname(sc->sc_dev));
        }
}

static int
upgt_fw_verify(struct upgt_softc *sc)
{
        struct upgt_fw_bra_option *bra_option;
        uint32_t bra_option_type, bra_option_len;
        uint32_t *uc;
        int offset, bra_end = 0;

        /*
         * Seek to beginning of Boot Record Area (BRA).
         */
        for (offset = 0; offset < sc->sc_fw_size; offset += sizeof(*uc)) {
                uc = (uint32_t *)(sc->sc_fw + offset);
                if (*uc == 0)
                        break;
        }
        for (; offset < sc->sc_fw_size; offset += sizeof(*uc)) {
                uc = (uint32_t *)(sc->sc_fw + offset);
                if (*uc != 0)
                        break;
        }
        if (offset == sc->sc_fw_size) {
                aprint_error_dev(sc->sc_dev,
                    "firmware Boot Record Area not found\n");
                return EIO;
        }
        DPRINTF(1, "%s: firmware Boot Record Area found at offset %d\n",
            device_xname(sc->sc_dev), offset);

        /*
         * Parse Boot Record Area (BRA) options.
         */
        while (offset < sc->sc_fw_size && bra_end == 0) {
                /* get current BRA option */
                bra_option = (struct upgt_fw_bra_option *)(sc->sc_fw + offset);
                bra_option_type = le32toh(bra_option->type);
                bra_option_len = le32toh(bra_option->len) * sizeof(*uc);

                switch (bra_option_type) {
                case UPGT_BRA_TYPE_FW:
                        DPRINTF(1, "%s: UPGT_BRA_TYPE_FW len=%d\n",
                            device_xname(sc->sc_dev), bra_option_len);

                        if (bra_option_len != UPGT_BRA_FWTYPE_SIZE) {
                                aprint_error_dev(sc->sc_dev,
                                    "wrong UPGT_BRA_TYPE_FW len\n");
                                return EIO;
                        }
                        if (memcmp(UPGT_BRA_FWTYPE_LM86, bra_option->data,
                            bra_option_len) == 0) {
                                sc->sc_fw_type = UPGT_FWTYPE_LM86;
                                break;
                        }
                        if (memcmp(UPGT_BRA_FWTYPE_LM87, bra_option->data,
                            bra_option_len) == 0) {
                                sc->sc_fw_type = UPGT_FWTYPE_LM87;
                                break;
                        }
                        if (memcmp(UPGT_BRA_FWTYPE_FMAC, bra_option->data,
                            bra_option_len) == 0) {
                                sc->sc_fw_type = UPGT_FWTYPE_FMAC;
                                break;
                        }
                        aprint_error_dev(sc->sc_dev,
                            "unsupported firmware type\n");
                        return EIO;
                case UPGT_BRA_TYPE_VERSION:
                        DPRINTF(1, "%s: UPGT_BRA_TYPE_VERSION len=%d\n",
                            device_xname(sc->sc_dev), bra_option_len);
                        break;
                case UPGT_BRA_TYPE_DEPIF:
                        DPRINTF(1, "%s: UPGT_BRA_TYPE_DEPIF len=%d\n",
                            device_xname(sc->sc_dev), bra_option_len);
                        break;
                case UPGT_BRA_TYPE_EXPIF:
                        DPRINTF(1, "%s: UPGT_BRA_TYPE_EXPIF len=%d\n",
                            device_xname(sc->sc_dev), bra_option_len);
                        break;
                case UPGT_BRA_TYPE_DESCR:
                        DPRINTF(1, "%s: UPGT_BRA_TYPE_DESCR len=%d\n",
                            device_xname(sc->sc_dev), bra_option_len);

                        struct upgt_fw_bra_descr *descr =
                                (struct upgt_fw_bra_descr *)bra_option->data;

                        sc->sc_memaddr_frame_start =
                            le32toh(descr->memaddr_space_start);
                        sc->sc_memaddr_frame_end =
                            le32toh(descr->memaddr_space_end);

                        DPRINTF(2, "%s: memory address space start=0x%08x\n",
                            device_xname(sc->sc_dev),
                            sc->sc_memaddr_frame_start);
                        DPRINTF(2, "%s: memory address space end=0x%08x\n",
                            device_xname(sc->sc_dev),
                            sc->sc_memaddr_frame_end);
                        break;
                case UPGT_BRA_TYPE_END:
                        DPRINTF(1, "%s: UPGT_BRA_TYPE_END len=%d\n",
                            device_xname(sc->sc_dev), bra_option_len);
                        bra_end = 1;
                        break;
                default:
                        DPRINTF(1, "%s: unknown BRA option len=%d\n",
                            device_xname(sc->sc_dev), bra_option_len);
                        return EIO;
                }

                /* jump to next BRA option */
                offset += sizeof(struct upgt_fw_bra_option) + bra_option_len;
        }

        DPRINTF(1, "%s: firmware verified\n", device_xname(sc->sc_dev));

        return 0;
}

static int
upgt_fw_load(struct upgt_softc *sc)
{
        struct upgt_data *data_cmd = &sc->cmd_data;
        struct upgt_data *data_rx = &sc->rx_data;
        struct upgt_fw_x2_header *x2;
        const uint8_t start_fwload_cmd[] = { 0x3c, 0x0d };
        int offset, bsize, n, i, len;
        uint32_t crc;

        /* send firmware start load command */
        len = sizeof(start_fwload_cmd);
        memcpy(data_cmd->buf, start_fwload_cmd, len);
        if (upgt_bulk_xmit(sc, data_cmd, sc->sc_tx_pipeh, &len, 0) != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not send start_firmware_load command\n");
                return EIO;
        }

        /* send X2 header */
        len = sizeof(struct upgt_fw_x2_header);
        x2 = (struct upgt_fw_x2_header *)data_cmd->buf;
        memcpy(x2->signature, UPGT_X2_SIGNATURE, UPGT_X2_SIGNATURE_SIZE);
        x2->startaddr = htole32(UPGT_MEMADDR_FIRMWARE_START);
        x2->len = htole32(sc->sc_fw_size);
        x2->crc = upgt_crc32_le(data_cmd->buf + UPGT_X2_SIGNATURE_SIZE,
            sizeof(struct upgt_fw_x2_header) - UPGT_X2_SIGNATURE_SIZE -
            sizeof(uint32_t));
        if (upgt_bulk_xmit(sc, data_cmd, sc->sc_tx_pipeh, &len, 0) != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not send firmware X2 header\n");
                return EIO;
        }

        /* download firmware */
        for (offset = 0; offset < sc->sc_fw_size; offset += bsize) {
                if (sc->sc_fw_size - offset > UPGT_FW_BLOCK_SIZE)
                        bsize = UPGT_FW_BLOCK_SIZE;
                else
                        bsize = sc->sc_fw_size - offset;

                n = upgt_fw_copy(sc->sc_fw + offset, data_cmd->buf, bsize);

                DPRINTF(1, "%s: FW offset=%d, read=%d, sent=%d\n",
                    device_xname(sc->sc_dev), offset, n, bsize);

                if (upgt_bulk_xmit(sc, data_cmd, sc->sc_tx_pipeh, &bsize, 0)
                    != 0) {
                        aprint_error_dev(sc->sc_dev,
                            "error while downloading firmware block\n");
                        return EIO;
                }

                bsize = n;
        }
        DPRINTF(1, "%s: firmware downloaded\n", device_xname(sc->sc_dev));

        /* load firmware */
        crc = upgt_crc32_le(sc->sc_fw, sc->sc_fw_size);
        *((uint32_t *)(data_cmd->buf)    ) = crc;
        *((uint8_t  *)(data_cmd->buf) + 4) = 'g';
        *((uint8_t  *)(data_cmd->buf) + 5) = '\r';
        len = 6;
        if (upgt_bulk_xmit(sc, data_cmd, sc->sc_tx_pipeh, &len, 0) != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not send load_firmware command\n");
                return EIO;
        }

        for (i = 0; i < UPGT_FIRMWARE_TIMEOUT; i++) {
                len = UPGT_FW_BLOCK_SIZE;
                memset(data_rx->buf, 0, 2);
                if (upgt_bulk_xmit(sc, data_rx, sc->sc_rx_pipeh, &len,
                    USBD_SHORT_XFER_OK) != 0) {
                        aprint_error_dev(sc->sc_dev,
                            "could not read firmware response\n");
                        return EIO;
                }

                if (memcmp(data_rx->buf, "OK", 2) == 0)
                        break;        /* firmware load was successful */
        }
        if (i == UPGT_FIRMWARE_TIMEOUT) {
                aprint_error_dev(sc->sc_dev, "firmware load failed\n");
                return EIO;
        }
        DPRINTF(1, "%s: firmware loaded\n", device_xname(sc->sc_dev));

        return 0;
}

/*
 * While copying the version 2 firmware, we need to replace two characters:
 *
 * 0x7e -> 0x7d 0x5e
 * 0x7d -> 0x7d 0x5d
 */
static int
upgt_fw_copy(char *src, char *dst, int size)
{
        int i, j;

        for (i = 0, j = 0; i < size && j < size; i++) {
                switch (src[i]) {
                case 0x7e:
                        dst[j] = 0x7d;
                        j++;
                        dst[j] = 0x5e;
                        j++;
                        break;
                case 0x7d:
                        dst[j] = 0x7d;
                        j++;
                        dst[j] = 0x5d;
                        j++;
                        break;
                default:
                        dst[j] = src[i];
                        j++;
                        break;
                }
        }

        return i;
}

static int
upgt_eeprom_read(struct upgt_softc *sc)
{
        struct upgt_data *data_cmd = &sc->cmd_data;
        struct upgt_lmac_mem *mem;
        struct upgt_lmac_eeprom        *eeprom;
        int offset, block, len;

        offset = 0;
        block = UPGT_EEPROM_BLOCK_SIZE;
        while (offset < UPGT_EEPROM_SIZE) {
                DPRINTF(1, "%s: request EEPROM block (offset=%d, len=%d)\n",
                    device_xname(sc->sc_dev), offset, block);

                /*
                 * Transmit the URB containing the CMD data.
                 */
                len = sizeof(*mem) + sizeof(*eeprom) + block;

                memset(data_cmd->buf, 0, len);

                mem = (struct upgt_lmac_mem *)data_cmd->buf;
                mem->addr = htole32(sc->sc_memaddr_frame_start +
                    UPGT_MEMSIZE_FRAME_HEAD);

                eeprom = (struct upgt_lmac_eeprom *)(mem + 1);
                eeprom->header1.flags = 0;
                eeprom->header1.type = UPGT_H1_TYPE_CTRL;
                eeprom->header1.len = htole16((
                    sizeof(struct upgt_lmac_eeprom) -
                    sizeof(struct upgt_lmac_header)) + block);

                eeprom->header2.reqid = htole32(sc->sc_memaddr_frame_start);
                eeprom->header2.type = htole16(UPGT_H2_TYPE_EEPROM);
                eeprom->header2.flags = 0;

                eeprom->offset = htole16(offset);
                eeprom->len = htole16(block);

                mem->chksum = upgt_chksum_le((uint32_t *)eeprom,
                    len - sizeof(*mem));

                if (upgt_bulk_xmit(sc, data_cmd, sc->sc_tx_pipeh, &len,
                    USBD_FORCE_SHORT_XFER) != 0) {
                        aprint_error_dev(sc->sc_dev,
                            "could not transmit EEPROM data URB\n");
                        return EIO;
                }

                mutex_enter(&sc->sc_mtx);
                int res = cv_timedwait(&sc->sc_cv, &sc->sc_mtx, UPGT_USB_TIMEOUT);
                mutex_exit(&sc->sc_mtx);
                if (res) {
                        aprint_error_dev(sc->sc_dev,
                            "timeout while waiting for EEPROM data\n");
                        return EIO;
                }

                offset += block;
                if (UPGT_EEPROM_SIZE - offset < block)
                        block = UPGT_EEPROM_SIZE - offset;
        }

        return 0;
}

static int
upgt_eeprom_parse(struct upgt_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct upgt_eeprom_header *eeprom_header;
        struct upgt_eeprom_option *eeprom_option;
        uint16_t option_len;
        uint16_t option_type;
        uint16_t preamble_len;
        int option_end = 0;

        /* calculate eeprom options start offset */
        eeprom_header = (struct upgt_eeprom_header *)sc->sc_eeprom;
        preamble_len = le16toh(eeprom_header->preamble_len);
        eeprom_option = (struct upgt_eeprom_option *)(sc->sc_eeprom +
            (sizeof(struct upgt_eeprom_header) + preamble_len));

        while (!option_end) {
                /* the eeprom option length is stored in words */
                option_len =
                    (le16toh(eeprom_option->len) - 1) * sizeof(uint16_t);
                option_type =
                    le16toh(eeprom_option->type);

                switch (option_type) {
                case UPGT_EEPROM_TYPE_NAME:
                        DPRINTF(1, "%s: EEPROM name len=%d\n",
                            device_xname(sc->sc_dev), option_len);
                        break;
                case UPGT_EEPROM_TYPE_SERIAL:
                        DPRINTF(1, "%s: EEPROM serial len=%d\n",
                            device_xname(sc->sc_dev), option_len);
                        break;
                case UPGT_EEPROM_TYPE_MAC:
                        DPRINTF(1, "%s: EEPROM mac len=%d\n",
                            device_xname(sc->sc_dev), option_len);

                        IEEE80211_ADDR_COPY(ic->ic_myaddr, eeprom_option->data);
                        break;
                case UPGT_EEPROM_TYPE_HWRX:
                        DPRINTF(1, "%s: EEPROM hwrx len=%d\n",
                            device_xname(sc->sc_dev), option_len);

                        upgt_eeprom_parse_hwrx(sc, eeprom_option->data);
                        break;
                case UPGT_EEPROM_TYPE_CHIP:
                        DPRINTF(1, "%s: EEPROM chip len=%d\n",
                            device_xname(sc->sc_dev), option_len);
                        break;
                case UPGT_EEPROM_TYPE_FREQ3:
                        DPRINTF(1, "%s: EEPROM freq3 len=%d\n",
                            device_xname(sc->sc_dev), option_len);

                        upgt_eeprom_parse_freq3(sc, eeprom_option->data,
                            option_len);
                        break;
                case UPGT_EEPROM_TYPE_FREQ4:
                        DPRINTF(1, "%s: EEPROM freq4 len=%d\n",
                            device_xname(sc->sc_dev), option_len);

                        upgt_eeprom_parse_freq4(sc, eeprom_option->data,
                            option_len);
                        break;
                case UPGT_EEPROM_TYPE_FREQ5:
                        DPRINTF(1, "%s: EEPROM freq5 len=%d\n",
                            device_xname(sc->sc_dev), option_len);
                        break;
                case UPGT_EEPROM_TYPE_FREQ6:
                        DPRINTF(1, "%s: EEPROM freq6 len=%d\n",
                            device_xname(sc->sc_dev), option_len);

                        upgt_eeprom_parse_freq6(sc, eeprom_option->data,
                            option_len);
                        break;
                case UPGT_EEPROM_TYPE_END:
                        DPRINTF(1, "%s: EEPROM end len=%d\n",
                            device_xname(sc->sc_dev), option_len);
                        option_end = 1;
                        break;
                case UPGT_EEPROM_TYPE_OFF:
                        DPRINTF(1, "%s: EEPROM off without end option\n",
                            device_xname(sc->sc_dev));
                        return EIO;
                default:
                        DPRINTF(1, "%s: EEPROM unknown type 0x%04x len=%d\n",
                            device_xname(sc->sc_dev), option_type, option_len);
                        break;
                }

                /* jump to next EEPROM option */
                eeprom_option = (struct upgt_eeprom_option *)
                    (eeprom_option->data + option_len);
        }

        return 0;
}

static void
upgt_eeprom_parse_hwrx(struct upgt_softc *sc, uint8_t *data)
{
        struct upgt_eeprom_option_hwrx *option_hwrx;

        option_hwrx = (struct upgt_eeprom_option_hwrx *)data;

        sc->sc_eeprom_hwrx = option_hwrx->rxfilter - UPGT_EEPROM_RX_CONST;

        DPRINTF(2, "%s: hwrx option value=0x%04x\n",
            device_xname(sc->sc_dev), sc->sc_eeprom_hwrx);
}

static void
upgt_eeprom_parse_freq3(struct upgt_softc *sc, uint8_t *data, int len)
{
        struct upgt_eeprom_freq3_header *freq3_header;
        struct upgt_lmac_freq3 *freq3;
        int i, elements, flags;
        unsigned channel;

        freq3_header = (struct upgt_eeprom_freq3_header *)data;
        freq3 = (struct upgt_lmac_freq3 *)(freq3_header + 1);

        flags = freq3_header->flags;
        elements = freq3_header->elements;

        DPRINTF(2, "%s: flags=0x%02x\n", device_xname(sc->sc_dev), flags);
        DPRINTF(2, "%s: elements=%d\n", device_xname(sc->sc_dev), elements);
        __USE(flags);

        for (i = 0; i < elements; i++) {
                channel = ieee80211_mhz2ieee(le16toh(freq3[i].freq), 0);

                sc->sc_eeprom_freq3[channel] = freq3[i];

                DPRINTF(2, "%s: frequency=%d, channel=%d\n",
                    device_xname(sc->sc_dev),
                    le16toh(sc->sc_eeprom_freq3[channel].freq), channel);
        }
}

static void
upgt_eeprom_parse_freq4(struct upgt_softc *sc, uint8_t *data, int len)
{
        struct upgt_eeprom_freq4_header *freq4_header;
        struct upgt_eeprom_freq4_1 *freq4_1;
        struct upgt_eeprom_freq4_2 *freq4_2;
        int i, j, elements, settings, flags;
        unsigned channel;

        freq4_header = (struct upgt_eeprom_freq4_header *)data;
        freq4_1 = (struct upgt_eeprom_freq4_1 *)(freq4_header + 1);

        flags = freq4_header->flags;
        elements = freq4_header->elements;
        settings = freq4_header->settings;

        /* we need this value later */
        sc->sc_eeprom_freq6_settings = freq4_header->settings;

        DPRINTF(2, "%s: flags=0x%02x\n", device_xname(sc->sc_dev), flags);
        DPRINTF(2, "%s: elements=%d\n", device_xname(sc->sc_dev), elements);
        DPRINTF(2, "%s: settings=%d\n", device_xname(sc->sc_dev), settings);
        __USE(flags);

        for (i = 0; i < elements; i++) {
                channel = ieee80211_mhz2ieee(le16toh(freq4_1[i].freq), 0);

                freq4_2 = (struct upgt_eeprom_freq4_2 *)freq4_1[i].data;

                for (j = 0; j < settings; j++) {
                        sc->sc_eeprom_freq4[channel][j].cmd = freq4_2[j];
                        sc->sc_eeprom_freq4[channel][j].pad = 0;
                }

                DPRINTF(2, "%s: frequency=%d, channel=%d\n",
                    device_xname(sc->sc_dev),
                    le16toh(freq4_1[i].freq), channel);
        }
}

static void
upgt_eeprom_parse_freq6(struct upgt_softc *sc, uint8_t *data, int len)
{
        struct upgt_lmac_freq6 *freq6;
        int i, elements;
        unsigned channel;

        freq6 = (struct upgt_lmac_freq6 *)data;

        elements = len / sizeof(struct upgt_lmac_freq6);

        DPRINTF(2, "%s: elements=%d\n", device_xname(sc->sc_dev), elements);

        for (i = 0; i < elements; i++) {
                channel = ieee80211_mhz2ieee(le16toh(freq6[i].freq), 0);

                sc->sc_eeprom_freq6[channel] = freq6[i];

                DPRINTF(2, "%s: frequency=%d, channel=%d\n",
                    device_xname(sc->sc_dev),
                    le16toh(sc->sc_eeprom_freq6[channel].freq), channel);
        }
}

static int
upgt_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
        struct upgt_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        int s, error = 0;

        s = splnet();

        switch (cmd) {
        case SIOCSIFFLAGS:
                if ((error = ifioctl_common(ifp, cmd, data)) != 0)
                        break;
                if (ifp->if_flags & IFF_UP) {
                        if ((ifp->if_flags & IFF_RUNNING) == 0)
                                upgt_init(ifp);
                } else {
                        if (ifp->if_flags & IFF_RUNNING)
                                upgt_stop(sc);
                }
                break;
        case SIOCADDMULTI:
        case SIOCDELMULTI:
                if ((error = ether_ioctl(ifp, cmd, data)) == ENETRESET) {
                        /* setup multicast filter, etc */
                        error = 0;
                }
                break;
        default:
                error = ieee80211_ioctl(ic, cmd, data);
                break;
        }

        if (error == ENETRESET) {
                if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) ==
                    (IFF_UP | IFF_RUNNING))
                        upgt_init(ifp);
                error = 0;
        }

        splx(s);

        return error;
}

static int
upgt_init(struct ifnet *ifp)
{
        struct upgt_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;

        DPRINTF(1, "%s: %s\n", device_xname(sc->sc_dev), __func__);

        if (ifp->if_flags & IFF_RUNNING)
                upgt_stop(sc);

        ifp->if_flags |= IFF_RUNNING;
        ifp->if_flags &= ~IFF_OACTIVE;

        IEEE80211_ADDR_COPY(ic->ic_myaddr, CLLADDR(ifp->if_sadl));

        /* setup device rates */
        upgt_setup_rates(sc);

        if (ic->ic_opmode == IEEE80211_M_MONITOR)
                ieee80211_new_state(ic, IEEE80211_S_RUN, -1);
        else
                ieee80211_new_state(ic, IEEE80211_S_SCAN, -1);

        return 0;
}

static void
upgt_stop(struct upgt_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;

        DPRINTF(1, "%s: %s\n", device_xname(sc->sc_dev), __func__);

        /* device down */
        ifp->if_timer = 0;
        ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);

        /* change device back to initial state */
        ieee80211_new_state(ic, IEEE80211_S_INIT, -1);
}

static int
upgt_media_change(struct ifnet *ifp)
{
        struct upgt_softc *sc = ifp->if_softc;
        int error;

        DPRINTF(1, "%s: %s\n", device_xname(sc->sc_dev), __func__);

        if ((error = ieee80211_media_change(ifp)) != ENETRESET)
                return error;

        if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) ==
            (IFF_UP | IFF_RUNNING)) {
                /* give pending USB transfers a chance to finish */
                usbd_delay_ms(sc->sc_udev, 100);
                upgt_init(ifp);
        }

        return 0;
}

static void
upgt_newassoc(struct ieee80211_node *ni, int isnew)
{

        ni->ni_txrate = 0;
}

static int
upgt_newstate(struct ieee80211com *ic, enum ieee80211_state nstate, int arg)
{
        struct upgt_softc *sc = ic->ic_ifp->if_softc;

        /*
         * XXXSMP: This does not wait for the task, if it is in flight,
         * to complete.  If this code works at all, it must rely on the
         * kernel lock to serialize with the USB task thread.
         */
        usb_rem_task(sc->sc_udev, &sc->sc_task_newstate);
        callout_stop(&sc->scan_to);

        /* do it in a process context */
        sc->sc_state = nstate;
        sc->sc_arg = arg;
        usb_add_task(sc->sc_udev, &sc->sc_task_newstate, USB_TASKQ_DRIVER);

        return 0;
}

static void
upgt_newstate_task(void *arg)
{
        struct upgt_softc *sc = arg;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_node *ni;
        unsigned channel;

        mutex_enter(&sc->sc_mtx);

        switch (sc->sc_state) {
        case IEEE80211_S_INIT:
                DPRINTF(1, "%s: newstate is IEEE80211_S_INIT\n",
                    device_xname(sc->sc_dev));

                /* do not accept any frames if the device is down */
                upgt_set_macfilter(sc, IEEE80211_S_INIT);
                upgt_set_led(sc, UPGT_LED_OFF);
                break;
        case IEEE80211_S_SCAN:
                DPRINTF(1, "%s: newstate is IEEE80211_S_SCAN\n",
                    device_xname(sc->sc_dev));

                channel = ieee80211_chan2ieee(ic, ic->ic_curchan);
                upgt_set_channel(sc, channel);
                upgt_set_macfilter(sc, IEEE80211_S_SCAN);
                callout_schedule(&sc->scan_to, hz / 5);
                break;
        case IEEE80211_S_AUTH:
                DPRINTF(1, "%s: newstate is IEEE80211_S_AUTH\n",
                    device_xname(sc->sc_dev));

                channel = ieee80211_chan2ieee(ic, ic->ic_curchan);
                upgt_set_channel(sc, channel);
                break;
        case IEEE80211_S_ASSOC:
                DPRINTF(1, "%s: newstate is IEEE80211_S_ASSOC\n",
                    device_xname(sc->sc_dev));

                channel = ieee80211_chan2ieee(ic, ic->ic_curchan);
                upgt_set_channel(sc, channel);
                break;
        case IEEE80211_S_RUN:
                DPRINTF(1, "%s: newstate is IEEE80211_S_RUN\n",
                    device_xname(sc->sc_dev));

                channel = ieee80211_chan2ieee(ic, ic->ic_curchan);
                upgt_set_channel(sc, channel);

                ni = ic->ic_bss;

                /*
                 * TX rate control is done by the firmware.
                 * Report the maximum rate which is available therefore.
                 */
                ni->ni_txrate = ni->ni_rates.rs_nrates - 1;

                if (ic->ic_opmode != IEEE80211_M_MONITOR)
                        upgt_set_macfilter(sc, IEEE80211_S_RUN);
                upgt_set_led(sc, UPGT_LED_ON);
                break;
        }

        mutex_exit(&sc->sc_mtx);

        sc->sc_newstate(ic, sc->sc_state, sc->sc_arg);
}

static void
upgt_next_scan(void *arg)
{
        struct upgt_softc *sc = arg;
        struct ieee80211com *ic = &sc->sc_ic;

        DPRINTF(2, "%s: %s\n", device_xname(sc->sc_dev), __func__);

        if (ic->ic_state == IEEE80211_S_SCAN)
                ieee80211_next_scan(ic);
}

static void
upgt_start(struct ifnet *ifp)
{
        struct upgt_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ether_header *eh;
        struct ieee80211_node *ni;
        struct mbuf *m;
        int i;

        /* don't transmit packets if interface is busy or down */
        if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
                return;

        DPRINTF(2, "%s: %s\n", device_xname(sc->sc_dev), __func__);

        for (i = 0; i < UPGT_TX_COUNT; i++) {
                struct upgt_data *data_tx = &sc->tx_data[i];

                if (data_tx->m != NULL)
                        continue;

                IF_POLL(&ic->ic_mgtq, m);
                if (m != NULL) {
                        /* management frame */
                        IF_DEQUEUE(&ic->ic_mgtq, m);

                        ni = M_GETCTX(m, struct ieee80211_node *);
                        M_CLEARCTX(m);

                        bpf_mtap3(ic->ic_rawbpf, m, BPF_D_OUT);

                        if ((data_tx->addr = upgt_mem_alloc(sc)) == 0) {
                                aprint_error_dev(sc->sc_dev,
                                    "no free prism memory\n");
                                m_freem(m);
                                if_statinc(ifp, if_oerrors);
                                break;
                        }
                        data_tx->ni = ni;
                        data_tx->m = m;
                        sc->tx_queued++;
                } else {
                        /* data frame */
                        if (ic->ic_state != IEEE80211_S_RUN)
                                break;

                        IFQ_POLL(&ifp->if_snd, m);
                        if (m == NULL)
                                break;

                        IFQ_DEQUEUE(&ifp->if_snd, m);
                        if (m->m_len < sizeof(struct ether_header) &&
                            !(m = m_pullup(m, sizeof(struct ether_header))))
                                continue;

                        eh = mtod(m, struct ether_header *);
                        ni = ieee80211_find_txnode(ic, eh->ether_dhost);
                        if (ni == NULL) {
                                m_freem(m);
                                continue;
                        }

                        bpf_mtap(ifp, m, BPF_D_OUT);

                        m = ieee80211_encap(ic, m, ni);
                        if (m == NULL) {
                                ieee80211_free_node(ni);
                                continue;
                        }

                        bpf_mtap3(ic->ic_rawbpf, m, BPF_D_OUT);

                        if ((data_tx->addr = upgt_mem_alloc(sc)) == 0) {
                                aprint_error_dev(sc->sc_dev,
                                    "no free prism memory\n");
                                m_freem(m);
                                ieee80211_free_node(ni);
                                if_statinc(ifp, if_oerrors);
                                break;
                        }
                        data_tx->ni = ni;
                        data_tx->m = m;
                        sc->tx_queued++;
                }
        }

        if (sc->tx_queued > 0) {
                DPRINTF(2, "%s: tx_queued=%d\n",
                    device_xname(sc->sc_dev), sc->tx_queued);
                /* process the TX queue in process context */
                ifp->if_timer = 5;
                ifp->if_flags |= IFF_OACTIVE;
                usb_rem_task(sc->sc_udev, &sc->sc_task_tx);
                usb_add_task(sc->sc_udev, &sc->sc_task_tx, USB_TASKQ_DRIVER);
        }
}

static void
upgt_watchdog(struct ifnet *ifp)
{
        struct upgt_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;

        if (ic->ic_state == IEEE80211_S_INIT)
                return;

        aprint_error_dev(sc->sc_dev, "watchdog timeout\n");

        /* TODO: what shall we do on TX timeout? */

        ieee80211_watchdog(ic);
}

static void
upgt_tx_task(void *arg)
{
        struct upgt_softc *sc = arg;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_frame *wh;
        struct ieee80211_key *k;
        struct ifnet *ifp = &sc->sc_if;
        struct upgt_lmac_mem *mem;
        struct upgt_lmac_tx_desc *txdesc;
        struct mbuf *m;
        uint32_t addr;
        int i, len, pad, s;
        usbd_status error;

        mutex_enter(&sc->sc_mtx);
        upgt_set_led(sc, UPGT_LED_BLINK);
        mutex_exit(&sc->sc_mtx);

        s = splnet();

        for (i = 0; i < UPGT_TX_COUNT; i++) {
                struct upgt_data *data_tx = &sc->tx_data[i];

                if (data_tx->m == NULL)
                        continue;

                m = data_tx->m;
                addr = data_tx->addr + UPGT_MEMSIZE_FRAME_HEAD;

                /*
                 * Software crypto.
                 */
                wh = mtod(m, struct ieee80211_frame *);

                if (wh->i_fc[1] & IEEE80211_FC1_WEP) {
                        k = ieee80211_crypto_encap(ic, data_tx->ni, m);
                        if (k == NULL) {
                                m_freem(m);
                                data_tx->m = NULL;
                                ieee80211_free_node(data_tx->ni);
                                data_tx->ni = NULL;
                                if_statinc(ifp, if_oerrors);
                                break;
                        }

                        /* in case packet header moved, reset pointer */
                        wh = mtod(m, struct ieee80211_frame *);
                }

                /*
                 * Transmit the URB containing the TX data.
                 */
                memset(data_tx->buf, 0, sizeof(*mem) + sizeof(*txdesc));

                mem = (struct upgt_lmac_mem *)data_tx->buf;
                mem->addr = htole32(addr);

                txdesc = (struct upgt_lmac_tx_desc *)(mem + 1);

                /* XXX differ between data and mgmt frames? */
                txdesc->header1.flags = UPGT_H1_FLAGS_TX_DATA;
                txdesc->header1.type = UPGT_H1_TYPE_TX_DATA;
                txdesc->header1.len = htole16(m->m_pkthdr.len);

                txdesc->header2.reqid = htole32(data_tx->addr);
                txdesc->header2.type = htole16(UPGT_H2_TYPE_TX_ACK_YES);
                txdesc->header2.flags = htole16(UPGT_H2_FLAGS_TX_ACK_YES);

                if ((wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) ==
                    IEEE80211_FC0_TYPE_MGT) {
                        /* always send mgmt frames at lowest rate (DS1) */
                        memset(txdesc->rates, 0x10, sizeof(txdesc->rates));
                } else {
                        memcpy(txdesc->rates, sc->sc_cur_rateset,
                            sizeof(txdesc->rates));
                }
                txdesc->type = htole32(UPGT_TX_DESC_TYPE_DATA);
                txdesc->pad3[0] = UPGT_TX_DESC_PAD3_SIZE;

                if (sc->sc_drvbpf != NULL) {
                        struct upgt_tx_radiotap_header *tap = &sc->sc_txtap;

                        tap->wt_flags = 0;
                        tap->wt_rate = 0;        /* TODO: where to get from? */
                        tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq);
                        tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags);

                        bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_txtap_len, m,
                            BPF_D_OUT);
                }

                /* copy frame below our TX descriptor header */
                m_copydata(m, 0, m->m_pkthdr.len,
                    data_tx->buf + sizeof(*mem) + sizeof(*txdesc));

                /* calculate frame size */
                len = sizeof(*mem) + sizeof(*txdesc) + m->m_pkthdr.len;

                if (len & 3) {
                        /* we need to align the frame to a 4 byte boundary */
                        pad = 4 - (len & 3);
                        memset(data_tx->buf + len, 0, pad);
                        len += pad;
                }

                /* calculate frame checksum */
                mem->chksum = upgt_chksum_le((uint32_t *)txdesc,
                    len - sizeof(*mem));

                /* we do not need the mbuf anymore */
                m_freem(m);
                data_tx->m = NULL;

                ieee80211_free_node(data_tx->ni);
                data_tx->ni = NULL;

                DPRINTF(2, "%s: TX start data sending\n",
                    device_xname(sc->sc_dev));

                usbd_setup_xfer(data_tx->xfer, data_tx, data_tx->buf, len,
                    USBD_FORCE_SHORT_XFER, UPGT_USB_TIMEOUT, NULL);
                error = usbd_transfer(data_tx->xfer);
                if (error != USBD_NORMAL_COMPLETION &&
                    error != USBD_IN_PROGRESS) {
                        aprint_error_dev(sc->sc_dev,
                            "could not transmit TX data URB\n");
                        if_statinc(ifp, if_oerrors);
                        break;
                }

                DPRINTF(2, "%s: TX sent (%d bytes)\n",
                    device_xname(sc->sc_dev), len);
        }

        splx(s);

        /*
         * If we don't regulary read the device statistics, the RX queue
         * will stall.  It's strange, but it works, so we keep reading
         * the statistics here.  *shrug*
         */
        mutex_enter(&sc->sc_mtx);
        upgt_get_stats(sc);
        mutex_exit(&sc->sc_mtx);
}

static void
upgt_tx_done(struct upgt_softc *sc, uint8_t *data)
{
        struct ifnet *ifp = &sc->sc_if;
        struct upgt_lmac_tx_done_desc *desc;
        int i, s;

        s = splnet();

        desc = (struct upgt_lmac_tx_done_desc *)data;

        for (i = 0; i < UPGT_TX_COUNT; i++) {
                struct upgt_data *data_tx = &sc->tx_data[i];

                if (data_tx->addr == le32toh(desc->header2.reqid)) {
                        upgt_mem_free(sc, data_tx->addr);
                        data_tx->addr = 0;

                        sc->tx_queued--;
                        if_statinc(ifp, if_opackets);

                        DPRINTF(2, "%s: TX done: ", device_xname(sc->sc_dev));
                        DPRINTF(2, "memaddr=0x%08x, status=0x%04x, rssi=%d, ",
                            le32toh(desc->header2.reqid),
                            le16toh(desc->status),
                            le16toh(desc->rssi));
                        DPRINTF(2, "seq=%d\n", le16toh(desc->seq));
                        break;
                }
        }

        if (sc->tx_queued == 0) {
                /* TX queued was processed, continue */
                ifp->if_timer = 0;
                ifp->if_flags &= ~IFF_OACTIVE;
                upgt_start(ifp);
        }

        splx(s);
}

static void
upgt_rx_cb(struct usbd_xfer *xfer, void * priv, usbd_status status)
{
        struct upgt_data *data_rx = priv;
        struct upgt_softc *sc = data_rx->sc;
        int len;
        struct upgt_lmac_header *header;
        struct upgt_lmac_eeprom *eeprom;
        uint8_t h1_type;
        uint16_t h2_type;

        DPRINTF(3, "%s: %s\n", device_xname(sc->sc_dev), __func__);

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;
                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->sc_rx_pipeh);
                goto skip;
        }
        usbd_get_xfer_status(xfer, NULL, NULL, &len, NULL);

        /*
         * Check what type of frame came in.
         */
        header = (struct upgt_lmac_header *)(data_rx->buf + 4);

        h1_type = header->header1.type;
        h2_type = le16toh(header->header2.type);

        if (h1_type == UPGT_H1_TYPE_CTRL &&
            h2_type == UPGT_H2_TYPE_EEPROM) {
                eeprom = (struct upgt_lmac_eeprom *)(data_rx->buf + 4);
                uint16_t eeprom_offset = le16toh(eeprom->offset);
                uint16_t eeprom_len = le16toh(eeprom->len);

                DPRINTF(2, "%s: received EEPROM block (offset=%d, len=%d)\n",
                        device_xname(sc->sc_dev), eeprom_offset, eeprom_len);

                mutex_enter(&sc->sc_mtx);
                memcpy(sc->sc_eeprom + eeprom_offset,
                    data_rx->buf + sizeof(struct upgt_lmac_eeprom) + 4,
                    eeprom_len);

                /* EEPROM data has arrived in time, wakeup upgt_eeprom_read */
                /* Note eeprom data arrived */
                cv_broadcast(&sc->sc_cv);
                mutex_exit(&sc->sc_mtx);
        } else
        if (h1_type == UPGT_H1_TYPE_CTRL &&
            h2_type == UPGT_H2_TYPE_TX_DONE) {
                DPRINTF(2, "%s: received 802.11 TX done\n",
                    device_xname(sc->sc_dev));

                upgt_tx_done(sc, data_rx->buf + 4);
        } else
        if (h1_type == UPGT_H1_TYPE_RX_DATA ||
            h1_type == UPGT_H1_TYPE_RX_DATA_MGMT) {
                DPRINTF(3, "%s: received 802.11 RX data\n",
                    device_xname(sc->sc_dev));

                upgt_rx(sc, data_rx->buf + 4, le16toh(header->header1.len));
        } else
        if (h1_type == UPGT_H1_TYPE_CTRL &&
            h2_type == UPGT_H2_TYPE_STATS) {
                DPRINTF(2, "%s: received statistic data\n",
                    device_xname(sc->sc_dev));

                /* TODO: what could we do with the statistic data? */
        } else {
                /* ignore unknown frame types */
                DPRINTF(1, "%s: received unknown frame type 0x%02x\n",
                    device_xname(sc->sc_dev), header->header1.type);
        }

skip:        /* setup new transfer */
        usbd_setup_xfer(xfer, data_rx, data_rx->buf, MCLBYTES,
            USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, upgt_rx_cb);
        (void)usbd_transfer(xfer);
}

static void
upgt_rx(struct upgt_softc *sc, uint8_t *data, int pkglen)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        struct upgt_lmac_rx_desc *rxdesc;
        struct ieee80211_frame *wh;
        struct ieee80211_node *ni;
        struct mbuf *m;
        int s;

        /* access RX packet descriptor */
        rxdesc = (struct upgt_lmac_rx_desc *)data;

        /* create mbuf which is suitable for strict alignment archs */
        m = m_devget(rxdesc->data, pkglen, 0, ifp);
        if (m == NULL) {
                DPRINTF(1, "%s: could not create RX mbuf\n",
                   device_xname(sc->sc_dev));
                if_statinc(ifp, if_ierrors);
                return;
        }

        s = splnet();

        if (sc->sc_drvbpf != NULL) {
                struct upgt_rx_radiotap_header *tap = &sc->sc_rxtap;

                tap->wr_flags = IEEE80211_RADIOTAP_F_FCS;
                tap->wr_rate = upgt_rx_rate(sc, rxdesc->rate);
                tap->wr_chan_freq = htole16(ic->ic_curchan->ic_freq);
                tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags);
                tap->wr_antsignal = rxdesc->rssi;

                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_rxtap_len, m, BPF_D_IN);
        }

        /* trim FCS */
        m_adj(m, -IEEE80211_CRC_LEN);

        wh = mtod(m, struct ieee80211_frame *);
        ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh);

        /* push the frame up to the 802.11 stack */
        ieee80211_input(ic, m, ni, rxdesc->rssi, 0);

        /* node is no longer needed */
        ieee80211_free_node(ni);

        splx(s);

        DPRINTF(3, "%s: RX done\n", device_xname(sc->sc_dev));
}

static void
upgt_setup_rates(struct upgt_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;

        /*
         * 0x01 = OFMD6   0x10 = DS1
         * 0x04 = OFDM9   0x11 = DS2
         * 0x06 = OFDM12  0x12 = DS5
         * 0x07 = OFDM18  0x13 = DS11
         * 0x08 = OFDM24
         * 0x09 = OFDM36
         * 0x0a = OFDM48
         * 0x0b = OFDM54
         */
        const uint8_t rateset_auto_11b[] =
            { 0x13, 0x13, 0x12, 0x11, 0x11, 0x10, 0x10, 0x10 };
        const uint8_t rateset_auto_11g[] =
            { 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x04, 0x01 };
        const uint8_t rateset_fix_11bg[] =
            { 0x10, 0x11, 0x12, 0x13, 0x01, 0x04, 0x06, 0x07,
              0x08, 0x09, 0x0a, 0x0b };

        if (ic->ic_fixed_rate == IEEE80211_FIXED_RATE_NONE) {
                /*
                 * Automatic rate control is done by the device.
                 * We just pass the rateset from which the device
                 * will pickup a rate.
                 */
                if (ic->ic_curmode == IEEE80211_MODE_11B)
                        memcpy(sc->sc_cur_rateset, rateset_auto_11b,
                            sizeof(sc->sc_cur_rateset));
                if (ic->ic_curmode == IEEE80211_MODE_11G ||
                    ic->ic_curmode == IEEE80211_MODE_AUTO)
                        memcpy(sc->sc_cur_rateset, rateset_auto_11g,
                            sizeof(sc->sc_cur_rateset));
        } else {
                /* set a fixed rate */
                memset(sc->sc_cur_rateset, rateset_fix_11bg[ic->ic_fixed_rate],
                    sizeof(sc->sc_cur_rateset));
        }
}

static uint8_t
upgt_rx_rate(struct upgt_softc *sc, const int rate)
{
        struct ieee80211com *ic = &sc->sc_ic;

        if (ic->ic_curmode == IEEE80211_MODE_11B) {
                if (rate < 0 || rate > 3)
                        /* invalid rate */
                        return 0;

                switch (rate) {
                case 0:
                        return 2;
                case 1:
                        return 4;
                case 2:
                        return 11;
                case 3:
                        return 22;
                default:
                        return 0;
                }
        }

        if (ic->ic_curmode == IEEE80211_MODE_11G) {
                if (rate < 0 || rate > 11)
                        /* invalid rate */
                        return 0;

                switch (rate) {
                case 0:
                        return 2;
                case 1:
                        return 4;
                case 2:
                        return 11;
                case 3:
                        return 22;
                case 4:
                        return 12;
                case 5:
                        return 18;
                case 6:
                        return 24;
                case 7:
                        return 36;
                case 8:
                        return 48;
                case 9:
                        return 72;
                case 10:
                        return 96;
                case 11:
                        return 108;
                default:
                        return 0;
                }
        }

        return 0;
}

static int
upgt_set_macfilter(struct upgt_softc *sc, uint8_t state)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_node *ni = ic->ic_bss;
        struct upgt_data *data_cmd = &sc->cmd_data;
        struct upgt_lmac_mem *mem;
        struct upgt_lmac_filter *filter;
        int len;
        const uint8_t broadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };

        /*
         * Transmit the URB containing the CMD data.
         */
        len = sizeof(*mem) + sizeof(*filter);

        memset(data_cmd->buf, 0, len);

        mem = (struct upgt_lmac_mem *)data_cmd->buf;
        mem->addr = htole32(sc->sc_memaddr_frame_start +
            UPGT_MEMSIZE_FRAME_HEAD);

        filter = (struct upgt_lmac_filter *)(mem + 1);

        filter->header1.flags = UPGT_H1_FLAGS_TX_NO_CALLBACK;
        filter->header1.type = UPGT_H1_TYPE_CTRL;
        filter->header1.len = htole16(
            sizeof(struct upgt_lmac_filter) -
            sizeof(struct upgt_lmac_header));

        filter->header2.reqid = htole32(sc->sc_memaddr_frame_start);
        filter->header2.type = htole16(UPGT_H2_TYPE_MACFILTER);
        filter->header2.flags = 0;

        switch (state) {
        case IEEE80211_S_INIT:
                DPRINTF(1, "%s: set MAC filter to INIT\n",
                    device_xname(sc->sc_dev));

                filter->type = htole16(UPGT_FILTER_TYPE_RESET);
                break;
        case IEEE80211_S_SCAN:
                DPRINTF(1, "%s: set MAC filter to SCAN (bssid %s)\n",
                    device_xname(sc->sc_dev), ether_sprintf(broadcast));

                filter->type = htole16(UPGT_FILTER_TYPE_NONE);
                IEEE80211_ADDR_COPY(filter->dst, ic->ic_myaddr);
                IEEE80211_ADDR_COPY(filter->src, broadcast);
                filter->unknown1 = htole16(UPGT_FILTER_UNKNOWN1);
                filter->rxaddr = htole32(sc->sc_memaddr_rx_start);
                filter->unknown2 = htole16(UPGT_FILTER_UNKNOWN2);
                filter->rxhw = htole32(sc->sc_eeprom_hwrx);
                filter->unknown3 = htole16(UPGT_FILTER_UNKNOWN3);
                break;
        case IEEE80211_S_RUN:
                DPRINTF(1, "%s: set MAC filter to RUN (bssid %s)\n",
                    device_xname(sc->sc_dev), ether_sprintf(ni->ni_bssid));

                filter->type = htole16(UPGT_FILTER_TYPE_STA);
                IEEE80211_ADDR_COPY(filter->dst, ic->ic_myaddr);
                IEEE80211_ADDR_COPY(filter->src, ni->ni_bssid);
                filter->unknown1 = htole16(UPGT_FILTER_UNKNOWN1);
                filter->rxaddr = htole32(sc->sc_memaddr_rx_start);
                filter->unknown2 = htole16(UPGT_FILTER_UNKNOWN2);
                filter->rxhw = htole32(sc->sc_eeprom_hwrx);
                filter->unknown3 = htole16(UPGT_FILTER_UNKNOWN3);
                break;
        default:
                aprint_error_dev(sc->sc_dev,
                    "MAC filter does not know that state\n");
                break;
        }

        mem->chksum = upgt_chksum_le((uint32_t *)filter, sizeof(*filter));

        if (upgt_bulk_xmit(sc, data_cmd, sc->sc_tx_pipeh, &len, 0) != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not transmit macfilter CMD data URB\n");
                return EIO;
        }

        return 0;
}

static int
upgt_set_channel(struct upgt_softc *sc, unsigned channel)
{
        struct upgt_data *data_cmd = &sc->cmd_data;
        struct upgt_lmac_mem *mem;
        struct upgt_lmac_channel *chan;
        int len;

        DPRINTF(1, "%s: %s: %d\n", device_xname(sc->sc_dev), __func__,
            channel);

        /*
         * Transmit the URB containing the CMD data.
         */
        len = sizeof(*mem) + sizeof(*chan);

        memset(data_cmd->buf, 0, len);

        mem = (struct upgt_lmac_mem *)data_cmd->buf;
        mem->addr = htole32(sc->sc_memaddr_frame_start +
            UPGT_MEMSIZE_FRAME_HEAD);

        chan = (struct upgt_lmac_channel *)(mem + 1);

        chan->header1.flags = UPGT_H1_FLAGS_TX_NO_CALLBACK;
        chan->header1.type = UPGT_H1_TYPE_CTRL;
        chan->header1.len = htole16(
            sizeof(struct upgt_lmac_channel) -
            sizeof(struct upgt_lmac_header));

        chan->header2.reqid = htole32(sc->sc_memaddr_frame_start);
        chan->header2.type = htole16(UPGT_H2_TYPE_CHANNEL);
        chan->header2.flags = 0;

        chan->unknown1 = htole16(UPGT_CHANNEL_UNKNOWN1);
        chan->unknown2 = htole16(UPGT_CHANNEL_UNKNOWN2);
        chan->freq6 = sc->sc_eeprom_freq6[channel];
        chan->settings = sc->sc_eeprom_freq6_settings;
        chan->unknown3 = UPGT_CHANNEL_UNKNOWN3;

        memcpy(chan->freq3_1, &sc->sc_eeprom_freq3[channel].data,
            sizeof(chan->freq3_1));

        memcpy(chan->freq4, &sc->sc_eeprom_freq4[channel],
            sizeof(sc->sc_eeprom_freq4[channel]));

        memcpy(chan->freq3_2, &sc->sc_eeprom_freq3[channel].data,
            sizeof(chan->freq3_2));

        mem->chksum = upgt_chksum_le((uint32_t *)chan, sizeof(*chan));

        if (upgt_bulk_xmit(sc, data_cmd, sc->sc_tx_pipeh, &len, 0) != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not transmit channel CMD data URB\n");
                return EIO;
        }

        return 0;
}

static void
upgt_set_led(struct upgt_softc *sc, int action)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct upgt_data *data_cmd = &sc->cmd_data;
        struct upgt_lmac_mem *mem;
        struct upgt_lmac_led *led;
        struct timeval t;
        int len;

        /*
         * Transmit the URB containing the CMD data.
         */
        len = sizeof(*mem) + sizeof(*led);

        memset(data_cmd->buf, 0, len);

        mem = (struct upgt_lmac_mem *)data_cmd->buf;
        mem->addr = htole32(sc->sc_memaddr_frame_start +
            UPGT_MEMSIZE_FRAME_HEAD);

        led = (struct upgt_lmac_led *)(mem + 1);

        led->header1.flags = UPGT_H1_FLAGS_TX_NO_CALLBACK;
        led->header1.type = UPGT_H1_TYPE_CTRL;
        led->header1.len = htole16(
            sizeof(struct upgt_lmac_led) -
            sizeof(struct upgt_lmac_header));

        led->header2.reqid = htole32(sc->sc_memaddr_frame_start);
        led->header2.type = htole16(UPGT_H2_TYPE_LED);
        led->header2.flags = 0;

        switch (action) {
        case UPGT_LED_OFF:
                led->mode = htole16(UPGT_LED_MODE_SET);
                led->action_fix = 0;
                led->action_tmp = htole16(UPGT_LED_ACTION_OFF);
                led->action_tmp_dur = 0;
                break;
        case UPGT_LED_ON:
                led->mode = htole16(UPGT_LED_MODE_SET);
                led->action_fix = 0;
                led->action_tmp = htole16(UPGT_LED_ACTION_ON);
                led->action_tmp_dur = 0;
                break;
        case UPGT_LED_BLINK:
                if (ic->ic_state != IEEE80211_S_RUN)
                        return;
                if (sc->sc_led_blink)
                        /* previous blink was not finished */
                        return;
                led->mode = htole16(UPGT_LED_MODE_SET);
                led->action_fix = htole16(UPGT_LED_ACTION_OFF);
                led->action_tmp = htole16(UPGT_LED_ACTION_ON);
                led->action_tmp_dur = htole16(UPGT_LED_ACTION_TMP_DUR);
                /* lock blink */
                sc->sc_led_blink = 1;
                t.tv_sec = 0;
                t.tv_usec = UPGT_LED_ACTION_TMP_DUR * 1000L;
                callout_schedule(&sc->led_to, tvtohz(&t));
                break;
        default:
                return;
        }

        mem->chksum = upgt_chksum_le((uint32_t *)led, sizeof(*led));

        if (upgt_bulk_xmit(sc, data_cmd, sc->sc_tx_pipeh, &len, 0) != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not transmit led CMD URB\n");
        }
}

static void
upgt_set_led_blink(void *arg)
{
        struct upgt_softc *sc = arg;

        /* blink finished, we are ready for a next one */
        sc->sc_led_blink = 0;
        callout_stop(&sc->led_to);
}

static int
upgt_get_stats(struct upgt_softc *sc)
{
        struct upgt_data *data_cmd = &sc->cmd_data;
        struct upgt_lmac_mem *mem;
        struct upgt_lmac_stats *stats;
        int len;

        /*
         * Transmit the URB containing the CMD data.
         */
        len = sizeof(*mem) + sizeof(*stats);

        memset(data_cmd->buf, 0, len);

        mem = (struct upgt_lmac_mem *)data_cmd->buf;
        mem->addr = htole32(sc->sc_memaddr_frame_start +
            UPGT_MEMSIZE_FRAME_HEAD);

        stats = (struct upgt_lmac_stats *)(mem + 1);

        stats->header1.flags = 0;
        stats->header1.type = UPGT_H1_TYPE_CTRL;
        stats->header1.len = htole16(
            sizeof(struct upgt_lmac_stats) -
            sizeof(struct upgt_lmac_header));

        stats->header2.reqid = htole32(sc->sc_memaddr_frame_start);
        stats->header2.type = htole16(UPGT_H2_TYPE_STATS);
        stats->header2.flags = 0;

        mem->chksum = upgt_chksum_le((uint32_t *)stats, sizeof(*stats));

        if (upgt_bulk_xmit(sc, data_cmd, sc->sc_tx_pipeh, &len, 0) != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not transmit statistics CMD data URB\n");
                return EIO;
        }

        return 0;

}

static int
upgt_alloc_tx(struct upgt_softc *sc)
{
        int i;

        sc->tx_queued = 0;

        for (i = 0; i < UPGT_TX_COUNT; i++) {
                struct upgt_data *data_tx = &sc->tx_data[i];

                data_tx->sc = sc;

                int err = usbd_create_xfer(sc->sc_tx_pipeh, MCLBYTES,
                    USBD_FORCE_SHORT_XFER, 0, &data_tx->xfer);
                if (err) {
                        aprint_error_dev(sc->sc_dev,
                            "could not allocate TX xfer\n");
                        return err;
                }

                data_tx->buf = usbd_get_buffer(data_tx->xfer);
        }

        return 0;
}

static int
upgt_alloc_rx(struct upgt_softc *sc)
{
        struct upgt_data *data_rx = &sc->rx_data;

        data_rx->sc = sc;

        int err = usbd_create_xfer(sc->sc_rx_pipeh, MCLBYTES,
            0, 0, &data_rx->xfer);
        if (err) {
                aprint_error_dev(sc->sc_dev, "could not allocate RX xfer\n");
                return err;
        }

        data_rx->buf = usbd_get_buffer(data_rx->xfer);

        return 0;
}

static int
upgt_alloc_cmd(struct upgt_softc *sc)
{
        struct upgt_data *data_cmd = &sc->cmd_data;

        data_cmd->sc = sc;

        int err = usbd_create_xfer(sc->sc_tx_pipeh, MCLBYTES,
            USBD_FORCE_SHORT_XFER, 0, &data_cmd->xfer);
        if (err) {
                aprint_error_dev(sc->sc_dev, "could not allocate RX xfer\n");
                return err;
        }

        data_cmd->buf = usbd_get_buffer(data_cmd->xfer);

        cv_init(&sc->sc_cv, "upgteeprom");
        mutex_init(&sc->sc_mtx, MUTEX_DEFAULT, IPL_NONE);

        return 0;
}

static void
upgt_free_tx(struct upgt_softc *sc)
{
        int i;

        for (i = 0; i < UPGT_TX_COUNT; i++) {
                struct upgt_data *data_tx = &sc->tx_data[i];

                if (data_tx->xfer != NULL) {
                        usbd_destroy_xfer(data_tx->xfer);
                        data_tx->xfer = NULL;
                }

                data_tx->ni = NULL;
        }
}

static void
upgt_free_rx(struct upgt_softc *sc)
{
        struct upgt_data *data_rx = &sc->rx_data;

        if (data_rx->xfer != NULL) {
                usbd_destroy_xfer(data_rx->xfer);
                data_rx->xfer = NULL;
        }

        data_rx->ni = NULL;
}

static void
upgt_free_cmd(struct upgt_softc *sc)
{
        struct upgt_data *data_cmd = &sc->cmd_data;

        if (data_cmd->xfer == NULL)
                return;

        mutex_destroy(&sc->sc_mtx);
        cv_destroy(&sc->sc_cv);

        usbd_destroy_xfer(data_cmd->xfer);
        data_cmd->xfer = NULL;
}

static int
upgt_bulk_xmit(struct upgt_softc *sc, struct upgt_data *data,
    struct usbd_pipe *pipeh, uint32_t *size, int flags)
{
        usbd_status status;

        status = usbd_bulk_transfer(data->xfer, pipeh, flags, UPGT_USB_TIMEOUT,
            data->buf, size);
        if (status != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(sc->sc_dev, "%s: error %s\n", __func__,
                    usbd_errstr(status));
                return EIO;
        }

        return 0;
}

#if 0
static void
upgt_hexdump(void *buf, int len)
{
        int i;

        for (i = 0; i < len; i++) {
                if (i % 16 == 0)
                        printf("%s%5i:", i ? "\n" : "", i);
                if (i % 4 == 0)
                        printf(" ");
                printf("%02x", (int)*((uint8_t *)buf + i));
        }
        printf("\n");
}
#endif

static uint32_t
upgt_crc32_le(const void *buf, size_t size)
{
        uint32_t crc;

        crc = ether_crc32_le(buf, size);

        /* apply final XOR value as common for CRC-32 */
        crc = htole32(crc ^ 0xffffffffU);

        return crc;
}

/*
 * The firmware awaits a checksum for each frame we send to it.
 * The algorithm used is uncommon but somehow similar to CRC32.
 */
static uint32_t
upgt_chksum_le(const uint32_t *buf, size_t size)
{
        int i;
        uint32_t crc = 0;

        for (i = 0; i < size; i += sizeof(uint32_t)) {
                crc = htole32(crc ^ *buf++);
                crc = htole32((crc >> 5) ^ (crc << 3));
        }

        return crc;
}
























































































































































  105 





























    1 
    1 















    1 



    1 













    9 

    8 
    9 
    8 
    8 




    1 
    9 










    8 


    8 
    3 
    7 

















































































































    2 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
/*        $NetBSD: tty_conf.c,v 1.57 2021/08/09 20:49:10 andvar Exp $        */

/*-
 * Copyright (c) 2005, 2007 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1982, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)tty_conf.c        8.5 (Berkeley) 1/9/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tty_conf.c,v 1.57 2021/08/09 20:49:10 andvar Exp $");

#define TTY_ALLOW_PRIVATE

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/tty.h>
#include <sys/ttycom.h>
#include <sys/conf.h>
#include <sys/mutex.h>
#include <sys/queue.h>

static struct linesw termios_disc = {
        .l_name = "termios",
        .l_open = ttylopen,
        .l_close = ttylclose,
        .l_read = ttread,
        .l_write = ttwrite,
        .l_ioctl = ttynullioctl,
        .l_rint = ttyinput,
        .l_start = ttstart,
        .l_modem = ttymodem,
        .l_poll = ttpoll
};

/*
 * This is for the benefit of old BSD TTY compatibility, but since it is
 * identical to termios (except for the name), don't bother conditionalizing
 * it.
 */
static struct linesw ntty_disc = {        /* old NTTYDISC */
        .l_name = "ntty",
        .l_open = ttylopen,
        .l_close = ttylclose,
        .l_read = ttread,
        .l_write = ttwrite,
        .l_ioctl = ttynullioctl,
        .l_rint = ttyinput,
        .l_start = ttstart,
        .l_modem = ttymodem,
        .l_poll = ttpoll
};

static LIST_HEAD(, linesw) ttyldisc_list = LIST_HEAD_INITIALIZER(ttyldisc_head);

/*
 * Note: We don't bother refcounting termios_disc and ntty_disc; they can't
 * be removed from the list, and termios_disc is likely to have very many
 * references (could we overflow the count?).
 */
#define        TTYLDISC_ISSTATIC(disc)                                        \
        ((disc) == &termios_disc || (disc) == &ntty_disc)

#define        TTYLDISC_HOLD(disc)                                        \
do {                                                                \
        if (! TTYLDISC_ISSTATIC(disc)) {                        \
                KASSERT((disc)->l_refcnt != UINT_MAX);                \
                (disc)->l_refcnt++;                                \
        }                                                        \
} while (/*CONSTCOND*/0)

#define        TTYLDISC_RELE(disc)                                        \
do {                                                                \
        if (! TTYLDISC_ISSTATIC(disc)) {                        \
                KASSERT((disc)->l_refcnt != 0);                        \
                (disc)->l_refcnt--;                                \
        }                                                        \
} while (/*CONSTCOND*/0)

#define        TTYLDISC_ISINUSE(disc)                                        \
        (TTYLDISC_ISSTATIC(disc) || (disc)->l_refcnt != 0)

/*
 * Do nothing specific version of line
 * discipline specific ioctl command.
 */
/*ARGSUSED*/
int
ttynullioctl(struct tty *tp, u_long cmd, void *data, int flags, struct lwp *l)
{

        return (EPASSTHROUGH);
}

/*
 * Return error to line discipline
 * specific poll call.
 */
/*ARGSUSED*/
int
ttyerrpoll(struct tty *tp, int events, struct lwp *l)
{

        return (POLLERR);
}

void
ttyldisc_init(void)
{

        if (ttyldisc_attach(&termios_disc) != 0)
                panic("ttyldisc_init: termios_disc");
        if (ttyldisc_attach(&ntty_disc) != 0)
                panic("ttyldisc_init: ntty_disc");
}

static struct linesw *
ttyldisc_lookup_locked(const char *name)
{
        struct linesw *disc;

        LIST_FOREACH(disc, &ttyldisc_list, l_list) {
                if (strcmp(name, disc->l_name) == 0)
                        return (disc);
        }

        return (NULL);
}

/*
 * Look up a line discipline by its name.  Caller holds a reference on
 * the returned line discipline.
 */
struct linesw *
ttyldisc_lookup(const char *name)
{
        struct linesw *disc;

        mutex_spin_enter(&tty_lock);
        disc = ttyldisc_lookup_locked(name);
        if (disc != NULL)
                TTYLDISC_HOLD(disc);
        mutex_spin_exit(&tty_lock);

        return (disc);
}

/*
 * Look up a line discipline by its legacy number.  Caller holds a
 * reference on the returned line discipline.
 */
struct linesw *
ttyldisc_lookup_bynum(int num)
{
        struct linesw *disc;

        mutex_spin_enter(&tty_lock);

        LIST_FOREACH(disc, &ttyldisc_list, l_list) {
                if (disc->l_no == num) {
                        TTYLDISC_HOLD(disc);
                        mutex_spin_exit(&tty_lock);
                        return (disc);
                }
        }

        mutex_spin_exit(&tty_lock);
        return (NULL);
}

/*
 * Release a reference on a line discipline previously added by
 * ttyldisc_lookup() or ttyldisc_lookup_bynum().
 */
void
ttyldisc_release(struct linesw *disc)
{

        if (disc == NULL)
                return;

        mutex_spin_enter(&tty_lock);
        TTYLDISC_RELE(disc);
        mutex_spin_exit(&tty_lock);
}

#define        TTYLDISC_LEGACY_NUMBER_MIN        10
#define        TTYLDISC_LEGACY_NUMBER_MAX        INT_MAX

static void
ttyldisc_assign_legacy_number(struct linesw *disc)
{
        static const struct {
                const char *name;
                int num;
        } table[] = {
                { "termios",                TTYDISC },
                { "ntty",                2 /* XXX old NTTYDISC */ },
                { "tablet",                TABLDISC },
                { "slip",                SLIPDISC },
                { "ppp",                PPPDISC },
                { "strip",                STRIPDISC },
                { "hdlc",                HDLCDISC },
                { NULL,                        0 }
        };
        struct linesw *ldisc;
        int i;

        for (i = 0; table[i].name != NULL; i++) {
                if (strcmp(disc->l_name, table[i].name) == 0) {
                        disc->l_no = table[i].num;
                        return;
                }
        }

        disc->l_no = TTYLDISC_LEGACY_NUMBER_MIN;

        LIST_FOREACH(ldisc, &ttyldisc_list, l_list) {
                if (disc->l_no == ldisc->l_no) {
                        KASSERT(disc->l_no < TTYLDISC_LEGACY_NUMBER_MAX);
                        disc->l_no++;
                }
        }
}

/*
 * Register a line discipline.
 */
int
ttyldisc_attach(struct linesw *disc)
{

        KASSERT(disc->l_name != NULL);
        KASSERT(disc->l_open != NULL);
        KASSERT(disc->l_close != NULL);
        KASSERT(disc->l_read != NULL);
        KASSERT(disc->l_write != NULL);
        KASSERT(disc->l_ioctl != NULL);
        KASSERT(disc->l_rint != NULL);
        KASSERT(disc->l_start != NULL);
        KASSERT(disc->l_modem != NULL);
        KASSERT(disc->l_poll != NULL);

        /* You are not allowed to exceed TTLINEDNAMELEN */
        if (strlen(disc->l_name) >= TTLINEDNAMELEN)
                return (ENAMETOOLONG);

        mutex_spin_enter(&tty_lock);

        if (ttyldisc_lookup_locked(disc->l_name) != NULL) {
                mutex_spin_exit(&tty_lock);
                return (EEXIST);
        }

        ttyldisc_assign_legacy_number(disc);
        LIST_INSERT_HEAD(&ttyldisc_list, disc, l_list);

        mutex_spin_exit(&tty_lock);

        return (0);
}

/*
 * Remove a line discipline.
 */
int
ttyldisc_detach(struct linesw *disc)
{
#ifdef DIAGNOSTIC
        struct linesw *ldisc = ttyldisc_lookup(disc->l_name);

        KASSERT(ldisc != NULL);
        KASSERT(ldisc == disc);
        ttyldisc_release(ldisc);
#endif

        mutex_spin_enter(&tty_lock);

        if (TTYLDISC_ISINUSE(disc)) {
                mutex_spin_exit(&tty_lock);
                return (EBUSY);
        }

        LIST_REMOVE(disc, l_list);

        mutex_spin_exit(&tty_lock);

        return (0);
}

/*
 * Return the default line discipline.
 */
struct linesw *
ttyldisc_default(void)
{

        return (&termios_disc);
}

















































 1140 





















 1133 












   44 
   44 











 1139 
 1140 


 1139 










 1123 
 1124 

























 1138 





 1129 






 1140 


   43 



   43 





 1138 





   93 
















   94 



 1125 
 1127 


 1128 
 1127 


 1128 

 1126 
   37 


 1125 
 1127 




 1137 














 1139 


 1140 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
/*        $NetBSD: uvm_page_array.c,v 1.9 2020/05/26 21:52:12 ad Exp $        */

/*-
 * Copyright (c)2011 YAMAMOTO Takashi,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_page_array.c,v 1.9 2020/05/26 21:52:12 ad Exp $");

#include <sys/param.h>
#include <sys/systm.h>

#include <uvm/uvm_extern.h>
#include <uvm/uvm_object.h>
#include <uvm/uvm_page.h>
#include <uvm/uvm_page_array.h>

/*
 * uvm_page_array_init: initialize the array.
 */

void
uvm_page_array_init(struct uvm_page_array *ar, struct uvm_object *uobj,
    unsigned int flags)
{

        ar->ar_idx = 0;
        ar->ar_npages = 0;
        ar->ar_uobj = uobj;
        ar->ar_flags = flags;
}

/*
 * uvm_page_array_fini: clean up the array.
 */

void
uvm_page_array_fini(struct uvm_page_array *ar)
{

        /*
         * currently nothing to do.
         */
#if defined(DIAGNOSTIC)
        /*
         * poison to trigger assertion in uvm_page_array_peek to
         * detect usage errors.
         */
        ar->ar_npages = 1;
        ar->ar_idx = 1000;
#endif /* defined(DIAGNOSTIC) */
}

/*
 * uvm_page_array_clear: forget the cached pages and initialize the array.
 */

void
uvm_page_array_clear(struct uvm_page_array *ar)
{

        KASSERT(ar->ar_idx <= ar->ar_npages);
        ar->ar_idx = 0;
        ar->ar_npages = 0;
}

/*
 * uvm_page_array_peek: return the next cached page.
 */

struct vm_page *
uvm_page_array_peek(struct uvm_page_array *ar)
{

        KASSERT(ar->ar_idx <= ar->ar_npages);
        if (ar->ar_idx == ar->ar_npages) {
                return NULL;
        }
        return ar->ar_pages[ar->ar_idx];
}

/*
 * uvm_page_array_advance: advance the array to the next cached page
 */

void
uvm_page_array_advance(struct uvm_page_array *ar)
{

        KASSERT(ar->ar_idx <= ar->ar_npages);
        ar->ar_idx++;
        KASSERT(ar->ar_idx <= ar->ar_npages);
}

/*
 * uvm_page_array_fill: lookup pages and keep them cached.
 *
 * return 0 on success.  in that case, cache the result in the array
 * so that they will be picked by later uvm_page_array_peek.
 *
 * nwant is a number of pages to fetch.  a caller should consider it a hint.
 * nwant == 0 means a caller have no specific idea.
 *
 * return ENOENT if no pages are found.
 *
 * called with object lock held.
 */

int
uvm_page_array_fill(struct uvm_page_array *ar, voff_t off, unsigned int nwant)
{
        unsigned int npages;
#if defined(DEBUG)
        unsigned int i;
#endif /* defined(DEBUG) */
        unsigned int maxpages = __arraycount(ar->ar_pages);
        struct uvm_object *uobj = ar->ar_uobj;
        const int flags = ar->ar_flags;
        const bool dense = (flags & UVM_PAGE_ARRAY_FILL_DENSE) != 0;
        const bool backward = (flags & UVM_PAGE_ARRAY_FILL_BACKWARD) != 0;
        int error = 0;

        if (nwant != 0 && nwant < maxpages) {
                maxpages = nwant;
        }
#if 0 /* called from DDB for "show obj/f" without lock */
        KASSERT(rw_lock_held(uobj->vmobjlock));
#endif
        KASSERT(uvm_page_array_peek(ar) == NULL);
        if ((flags & UVM_PAGE_ARRAY_FILL_DIRTY) != 0) {
                unsigned int tagmask = UVM_PAGE_DIRTY_TAG;

                if ((flags & UVM_PAGE_ARRAY_FILL_WRITEBACK) != 0) {
                        tagmask |= UVM_PAGE_WRITEBACK_TAG;
                }
                npages =
                    (backward ? radix_tree_gang_lookup_tagged_node_reverse :
                    radix_tree_gang_lookup_tagged_node)(
                    &uobj->uo_pages, off >> PAGE_SHIFT, (void **)ar->ar_pages,
                    maxpages, dense, tagmask);
        } else {
                npages =
                    (backward ? radix_tree_gang_lookup_node_reverse :
                    radix_tree_gang_lookup_node)(
                    &uobj->uo_pages, off >> PAGE_SHIFT, (void **)ar->ar_pages,
                    maxpages, dense);
        }
        if (npages == 0) {
                if (flags != 0) {
                        /*
                         * if dense or looking for tagged entries (or
                         * working backwards), fail right away.
                         */
                        npages = 0;
                } else {
                        /*
                         * there's nothing else to be found with the current
                         * set of arguments, in the current version of the
                         * tree.
                         *
                         * minimize repeated tree lookups by "finding" a
                         * null pointer, in case the caller keeps looping (a
                         * common use case).
                         */
                        npages = 1;
                        ar->ar_pages[0] = NULL;
                }
                error = ENOENT;
        }
        KASSERT(npages <= maxpages);
        ar->ar_npages = npages;
        ar->ar_idx = 0;
#if defined(DEBUG)
        for (i = 0; error == 0 && i < ar->ar_npages; i++) {
                struct vm_page * const pg = ar->ar_pages[i];

                KASSERT(pg != NULL);
                KDASSERT(pg->uobject == uobj);
                if (backward) {
                        KDASSERT(pg->offset <= off);
                        KDASSERT(i == 0 ||
                            pg->offset < ar->ar_pages[i - 1]->offset);
                } else {
                        KDASSERT(pg->offset >= off);
                        KDASSERT(i == 0 ||
                            pg->offset > ar->ar_pages[i - 1]->offset);
                }
        }
#endif /* defined(DEBUG) */
        return error;
}

/*
 * uvm_page_array_fill_and_peek:
 * same as uvm_page_array_peek except that, if the array is empty, try to fill
 * it first.
 */

struct vm_page *
uvm_page_array_fill_and_peek(struct uvm_page_array *ar, voff_t off,
    unsigned int nwant)
{
        int error;

        if (ar->ar_idx != ar->ar_npages) {
                return ar->ar_pages[ar->ar_idx];
        }
        error = uvm_page_array_fill(ar, off, nwant);
        if (error != 0) {
                return NULL;
        }
        return uvm_page_array_peek(ar);
}





































































































    3 












    8 

















































































































































































































    2 














    9 




















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
/*        $NetBSD: kern_info_43.c,v 1.40 2021/09/07 11:43:02 riastradh Exp $        */

/*
 * Copyright (c) 1982, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)subr_xxx.c        8.1 (Berkeley) 6/10/93
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_info_43.c,v 1.40 2021/09/07 11:43:02 riastradh Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/syslog.h>
#include <sys/unistd.h>
#include <sys/resourcevar.h>
#include <sys/kauth.h>
#include <sys/cpu.h>

#include <uvm/uvm_extern.h>
#include <sys/sysctl.h>

#include <sys/mount.h>
#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>
#include <compat/sys/time.h>

#include <compat/common/compat_mod.h>

static struct syscall_package kern_info_43_syscalls[] = {
        { SYS_compat_43_ogetdtablesize, 0,
            (sy_call_t *)compat_43_sys_getdtablesize },
        { SYS_compat_43_ogethostid, 0, (sy_call_t *)compat_43_sys_gethostid },
        { SYS_compat_43_ogethostname, 0,
            (sy_call_t *)compat_43_sys_gethostname },
        { SYS_compat_43_ogetkerninfo, 0,
            (sy_call_t *)compat_43_sys_getkerninfo },
        { SYS_compat_43_osethostid, 0, (sy_call_t *)compat_43_sys_sethostid },
        { SYS_compat_43_osethostname, 0,
            (sy_call_t *)compat_43_sys_sethostname },
        { 0, 0, NULL }
};

int
compat_43_sys_getdtablesize(struct lwp *l, const void *v, register_t *retval)
{
        struct proc *p = l->l_proc;

        mutex_enter(p->p_lock);
        *retval = uimin((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles);
        mutex_exit(p->p_lock);
        return (0);
}


/* ARGSUSED */
int
compat_43_sys_gethostid(struct lwp *l, const void *v, register_t *retval)
{

        *(int32_t *)retval = hostid;
        return (0);
}


/*ARGSUSED*/
int
compat_43_sys_gethostname(struct lwp *l, const struct compat_43_sys_gethostname_args *uap, register_t *retval)
{
        /* {
                syscallarg(char *) hostname;
                syscallarg(u_int) len;
        } */
        int name[2];
        size_t sz;

        name[0] = CTL_KERN;
        name[1] = KERN_HOSTNAME;
        sz = SCARG(uap, len);
        return (old_sysctl(&name[0], 2, SCARG(uap, hostname), &sz, 0, 0, l));
}

#define        KINFO_PROC                (0<<8)
#define        KINFO_RT                (1<<8)
#define        KINFO_VNODE                (2<<8)
#define        KINFO_FILE                (3<<8)
#define        KINFO_METER                (4<<8)
#define        KINFO_LOADAVG                (5<<8)
#define        KINFO_CLOCKRATE                (6<<8)
#define        KINFO_BSDI_SYSINFO        (101<<8)


/*
 * The string data is appended to the end of the bsdi_si structure during
 * copyout. The "char *" offsets in the bsdi_si struct are relative to the
 * base of the bsdi_si struct.
 */
struct bsdi_si {
        char    *machine;
        char    *cpu_model;
        long    ncpu;
        long    cpuspeed;
        long    hwflags;
        u_long  physmem;
        u_long  usermem;
        u_long  pagesize;

        char    *ostype;
        char    *osrelease;
        long    os_revision;
        long    posix1_version;
        char    *version;

        long    hz;
        long    profhz;
        int     ngroups_max;
        long    arg_max;
        long    open_max;
        long    child_max;

        struct  timeval50 boottime;
        char    *hostname;
};

int
compat_43_sys_getkerninfo(struct lwp *l, const struct compat_43_sys_getkerninfo_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) op;
                syscallarg(char *) where;
                syscallarg(int *) size;
                syscallarg(int) arg;
        } */
        int error, name[6];
        int isize;
        size_t size;

        if (!SCARG(uap, size))
                return EINVAL;

        if ((error = copyin(SCARG(uap, size), &isize, sizeof(isize))) != 0)
                return error;

        if (isize < 0 || isize > 4096)
                return EINVAL;

        size = isize;

        switch (SCARG(uap, op) & 0xff00) {

        case KINFO_RT:
                name[0] = CTL_NET;
                name[1] = PF_ROUTE;
                name[2] = 0;
                name[3] = (SCARG(uap, op) & 0xff0000) >> 16;
                name[4] = SCARG(uap, op) & 0xff;
                name[5] = SCARG(uap, arg);
                error = old_sysctl(&name[0], 6, SCARG(uap, where), &size,
                                   NULL, 0, l);
                break;

        case KINFO_VNODE:
                name[0] = CTL_KERN;
                name[1] = KERN_VNODE;
                error = old_sysctl(&name[0], 2, SCARG(uap, where), &size,
                                   NULL, 0, l);
                break;

        case KINFO_PROC:
                name[0] = CTL_KERN;
                name[1] = KERN_PROC;
                name[2] = SCARG(uap, op) & 0xff;
                name[3] = SCARG(uap, arg);
                error = old_sysctl(&name[0], 4, SCARG(uap, where), &size,
                                   NULL, 0, l);
                break;

        case KINFO_FILE:
                name[0] = CTL_KERN;
                name[1] = KERN_FILE;
                error = old_sysctl(&name[0], 2, SCARG(uap, where), &size,
                                   NULL, 0, l);
                break;

        case KINFO_METER:
                name[0] = CTL_VM;
                name[1] = VM_METER;
                error = old_sysctl(&name[0], 2, SCARG(uap, where), &size,
                                   NULL, 0, l);
                break;

        case KINFO_LOADAVG:
                name[0] = CTL_VM;
                name[1] = VM_LOADAVG;
                error = old_sysctl(&name[0], 2, SCARG(uap, where), &size,
                                   NULL, 0, l);
                break;

        case KINFO_CLOCKRATE:
                name[0] = CTL_KERN;
                name[1] = KERN_CLOCKRATE;
                error = old_sysctl(&name[0], 2, SCARG(uap, where), &size,
                                   NULL, 0, l);
                break;


        case KINFO_BSDI_SYSINFO:
                {
                        size_t len;
                        struct bsdi_si *usi =
                            (struct bsdi_si *) SCARG(uap, where);
                        struct bsdi_si ksi;
                        struct timeval tv;
                        const char *cpu_model = cpu_getmodel();
                        char *us = (char *) &usi[1];

                        if (usi == NULL) {
                                size = sizeof(ksi) +
                                    strlen(ostype) + strlen(cpu_model) +
                                    strlen(osrelease) + strlen(machine) +
                                    strlen(version) + strlen(hostname) + 6;
                                error = 0;
                                break;
                        }

                        memset(&ksi, 0, sizeof(ksi));

#define COPY(fld)                                                        \
                        ksi.fld = us - (u_long) usi;                        \
                        if ((error = copyoutstr(fld, us, 1024, &len)) != 0)\
                                return error;                                \
                        us += len

                        COPY(machine);
                        COPY(cpu_model);
                        ksi.ncpu = ncpu;                /* XXX */
                        ksi.cpuspeed = 40;                /* XXX */
                        ksi.hwflags = 0;                /* XXX */
                        ksi.physmem = ctob(physmem);
                        ksi.usermem = ctob(physmem);        /* XXX */
                        ksi.pagesize = PAGE_SIZE;

                        COPY(ostype);
                        COPY(osrelease);
                        ksi.os_revision = NetBSD;        /* XXX */
                        ksi.posix1_version = _POSIX_VERSION;
                        COPY(version);                        /* XXX */

                        ksi.hz = hz;
                        ksi.profhz = profhz;
                        ksi.ngroups_max = NGROUPS_MAX;
                        ksi.arg_max = ARG_MAX;
                        ksi.open_max = OPEN_MAX;
                        ksi.child_max = CHILD_MAX;

                        getmicroboottime(&tv);
                        timeval_to_timeval50(&tv, &ksi.boottime);
                        COPY(hostname);

                        size = (us - (char *) &usi[1]) + sizeof(ksi);

                        if ((error = copyout(&ksi, usi, sizeof(ksi))) != 0)
                                return error;
                }
                break;

        default:
                return (EOPNOTSUPP);
        }
        if (error)
                return (error);
        *retval = size;
        if (SCARG(uap, size))
                error = copyout((void *)&size, (void *)SCARG(uap, size),
                    sizeof(size));
        return (error);
}


/* ARGSUSED */
int
compat_43_sys_sethostid(struct lwp *l, const struct compat_43_sys_sethostid_args *uap, register_t *retval)
{
        long uhostid;
        int name[2];

        uhostid = SCARG(uap, hostid);
        name[0] = CTL_KERN;
        name[1] = KERN_HOSTID;

        return (old_sysctl(&name[0], 2, 0, 0, &uhostid, sizeof(long), l));
}


/* ARGSUSED */
int
compat_43_sys_sethostname(struct lwp *l, const struct compat_43_sys_sethostname_args *uap, register_t *retval)
{
        int name[2];

        name[0] = CTL_KERN;
        name[1] = KERN_HOSTNAME;
        return (old_sysctl(&name[0], 2, 0, 0, SCARG(uap, hostname),
                           SCARG(uap, len), l));
}

int
kern_info_43_init(void)
{

        return syscall_establish(NULL, kern_info_43_syscalls);
}

int
kern_info_43_fini(void)
{

        return syscall_disestablish(NULL, kern_info_43_syscalls);
}






















































































































































































































































































































































































































































































































































































































































































  415 



































  415 











1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
/*        $NetBSD: sys_sched.c,v 1.49 2020/05/23 23:42:43 ad Exp $        */

/*
 * Copyright (c) 2008, 2011 Mindaugas Rasiukevicius <rmind at NetBSD org>
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * System calls relating to the scheduler.
 *
 * Lock order:
 *
 *        cpu_lock ->
 *            proc_lock ->
 *                proc_t::p_lock ->
 *                    lwp_t::lwp_lock
 *
 * TODO:
 *  - Handle pthread_setschedprio() as defined by POSIX;
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.49 2020/05/23 23:42:43 ad Exp $");

#include <sys/param.h>

#include <sys/cpu.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/lwp.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/pset.h>
#include <sys/sched.h>
#include <sys/syscallargs.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/types.h>
#include <sys/unistd.h>

static struct sysctllog *sched_sysctl_log;
static kauth_listener_t sched_listener;

/*
 * Convert user priority or the in-kernel priority or convert the current
 * priority to the appropriate range according to the policy change.
 */
static pri_t
convert_pri(lwp_t *l, int policy, pri_t pri)
{

        /* Convert user priority to the in-kernel */
        if (pri != PRI_NONE) {
                /* Only for real-time threads */
                KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
                KASSERT(policy != SCHED_OTHER);
                return PRI_USER_RT + pri;
        }

        /* Neither policy, nor priority change */
        if (l->l_class == policy)
                return l->l_priority;

        /* Time-sharing -> real-time */
        if (l->l_class == SCHED_OTHER) {
                KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
                return PRI_USER_RT;
        }

        /* Real-time -> time-sharing */
        if (policy == SCHED_OTHER) {
                KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
                /*
                 * this is a bit arbitrary because the priority is dynamic
                 * for SCHED_OTHER threads and will likely be changed by
                 * the scheduler soon anyway.
                 */
                return l->l_priority - PRI_USER_RT;
        }

        /* Real-time -> real-time */
        return l->l_priority;
}

int
do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
    const struct sched_param *params)
{
        struct proc *p;
        struct lwp *t;
        pri_t pri;
        u_int lcnt;
        int error;

        error = 0;

        pri = params->sched_priority;

        /* If no parameters specified, just return (this should not happen) */
        if (pri == PRI_NONE && policy == SCHED_NONE)
                return 0;

        /* Validate scheduling class */
        if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
                return EINVAL;

        /* Validate priority */
        if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
                return EINVAL;

        if (pid != 0) {
                /* Find the process */
                mutex_enter(&proc_lock);
                p = proc_find(pid);
                if (p == NULL) {
                        mutex_exit(&proc_lock);
                        return ESRCH;
                }
                mutex_enter(p->p_lock);
                mutex_exit(&proc_lock);
                /* Disallow modification of system processes */
                if ((p->p_flag & PK_SYSTEM) != 0) {
                        mutex_exit(p->p_lock);
                        return EPERM;
                }
        } else {
                /* Use the calling process */
                p = curlwp->l_proc;
                mutex_enter(p->p_lock);
        }

        /* Find the LWP(s) */
        lcnt = 0;
        LIST_FOREACH(t, &p->p_lwps, l_sibling) {
                pri_t kpri;
                int lpolicy;

                if (lid && lid != t->l_lid)
                        continue;

                lcnt++;
                lwp_lock(t);
                lpolicy = (policy == SCHED_NONE) ? t->l_class : policy;

                /* Disallow setting of priority for SCHED_OTHER threads */
                if (lpolicy == SCHED_OTHER && pri != PRI_NONE) {
                        lwp_unlock(t);
                        error = EINVAL;
                        break;
                }

                /* Convert priority, if needed */
                kpri = convert_pri(t, lpolicy, pri);

                /* Check the permission */
                error = kauth_authorize_process(kauth_cred_get(),
                    KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
                    KAUTH_ARG(kpri));
                if (error) {
                        lwp_unlock(t);
                        break;
                }

                /* Set the scheduling class, change the priority */
                t->l_class = lpolicy;
                lwp_changepri(t, kpri);
                lwp_unlock(t);
        }
        mutex_exit(p->p_lock);
        return (lcnt == 0) ? ESRCH : error;
}

/*
 * Set scheduling parameters.
 */
int
sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(pid_t) pid;
                syscallarg(lwpid_t) lid;
                syscallarg(int) policy;
                syscallarg(const struct sched_param *) params;
        } */
        struct sched_param params;
        int error;

        /* Get the parameters from the user-space */
        error = copyin(SCARG(uap, params), &params, sizeof(params));
        if (error)
                goto out;

        error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
            SCARG(uap, policy), &params);
out:
        return error;
}

/*
 * do_sched_getparam:
 *
 * if lid=0, returns the parameter of the first LWP in the process.
 */
int
do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
    struct sched_param *params)
{
        struct sched_param lparams;
        struct lwp *t;
        int error, lpolicy;

        if (pid < 0 || lid < 0)
                return EINVAL;

        t = lwp_find2(pid, lid); /* acquire p_lock */
        if (t == NULL)
                return ESRCH;

        /* Check the permission */
        error = kauth_authorize_process(kauth_cred_get(),
            KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
        if (error != 0) {
                mutex_exit(t->l_proc->p_lock);
                return error;
        }

        lwp_lock(t);
        lparams.sched_priority = t->l_priority;
        lpolicy = t->l_class;
        lwp_unlock(t);
        mutex_exit(t->l_proc->p_lock);

        /*
         * convert to the user-visible priority value.
         * it's an inversion of convert_pri().
         *
         * the SCHED_OTHER case is a bit arbitrary given that
         *        - we don't allow setting the priority.
         *        - the priority is dynamic.
         */
        switch (lpolicy) {
        case SCHED_OTHER:
                lparams.sched_priority -= PRI_USER;
                break;
        case SCHED_RR:
        case SCHED_FIFO:
                lparams.sched_priority -= PRI_USER_RT;
                break;
        }

        if (policy != NULL)
                *policy = lpolicy;

        if (params != NULL)
                *params = lparams;

        return error;
}

/*
 * Get scheduling parameters.
 */
int
sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(pid_t) pid;
                syscallarg(lwpid_t) lid;
                syscallarg(int *) policy;
                syscallarg(struct sched_param *) params;
        } */
        struct sched_param params;
        int error, policy;

        error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
            &params);
        if (error)
                goto out;

        error = copyout(&params, SCARG(uap, params), sizeof(params));
        if (error == 0 && SCARG(uap, policy) != NULL)
                error = copyout(&policy, SCARG(uap, policy), sizeof(int));
out:
        return error;
}

/*
 * Allocate the CPU set, and get it from userspace.
 */
static int
genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size)
{
        kcpuset_t *kset;
        int error;

        kcpuset_create(&kset, true);
        error = kcpuset_copyin(sset, kset, size);
        if (error) {
                kcpuset_unuse(kset, NULL);
        } else {
                *dset = kset;
        }
        return error;
}

/*
 * Set affinity.
 */
int
sys__sched_setaffinity(struct lwp *l,
    const struct sys__sched_setaffinity_args *uap, register_t *retval)
{
        /* {
                syscallarg(pid_t) pid;
                syscallarg(lwpid_t) lid;
                syscallarg(size_t) size;
                syscallarg(const cpuset_t *) cpuset;
        } */
        kcpuset_t *kcset, *kcpulst = NULL;
        struct cpu_info *ici, *ci;
        struct proc *p;
        struct lwp *t;
        CPU_INFO_ITERATOR cii;
        bool alloff;
        lwpid_t lid;
        u_int lcnt;
        int error;

        error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size));
        if (error)
                return error;

        /*
         * Traverse _each_ CPU to:
         *  - Check that CPUs in the mask have no assigned processor set.
         *  - Check that at least one CPU from the mask is online.
         *  - Find the first target CPU to migrate.
         *
         * To avoid the race with CPU online/offline calls and processor sets,
         * cpu_lock will be locked for the entire operation.
         */
        ci = NULL;
        alloff = false;
        mutex_enter(&cpu_lock);
        for (CPU_INFO_FOREACH(cii, ici)) {
                struct schedstate_percpu *ispc;

                if (!kcpuset_isset(kcset, cpu_index(ici))) {
                        continue;
                }

                ispc = &ici->ci_schedstate;
                /* Check that CPU is not in the processor-set */
                if (ispc->spc_psid != PS_NONE) {
                        error = EPERM;
                        goto out;
                }
                /* Skip offline CPUs */
                if (ispc->spc_flags & SPCF_OFFLINE) {
                        alloff = true;
                        continue;
                }
                /* Target CPU to migrate */
                if (ci == NULL) {
                        ci = ici;
                }
        }
        if (ci == NULL) {
                if (alloff) {
                        /* All CPUs in the set are offline */
                        error = EPERM;
                        goto out;
                }
                /* Empty set */
                kcpuset_unuse(kcset, &kcpulst);
                kcset = NULL;
        }

        if (SCARG(uap, pid) != 0) {
                /* Find the process */
                mutex_enter(&proc_lock);
                p = proc_find(SCARG(uap, pid));
                if (p == NULL) {
                        mutex_exit(&proc_lock);
                        error = ESRCH;
                        goto out;
                }
                mutex_enter(p->p_lock);
                mutex_exit(&proc_lock);
                /* Disallow modification of system processes. */
                if ((p->p_flag & PK_SYSTEM) != 0) {
                        mutex_exit(p->p_lock);
                        error = EPERM;
                        goto out;
                }
        } else {
                /* Use the calling process */
                p = l->l_proc;
                mutex_enter(p->p_lock);
        }

        /*
         * Check the permission.
         */
        error = kauth_authorize_process(l->l_cred,
            KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
        if (error != 0) {
                mutex_exit(p->p_lock);
                goto out;
        }

        /* Iterate through LWP(s). */
        lcnt = 0;
        lid = SCARG(uap, lid);
        LIST_FOREACH(t, &p->p_lwps, l_sibling) {
                if (lid && lid != t->l_lid) {
                        continue;
                }
                lwp_lock(t);
                /* No affinity for zombie LWPs. */
                if (t->l_stat == LSZOMB) {
                        lwp_unlock(t);
                        continue;
                }
                /* First, release existing affinity, if any. */
                if (t->l_affinity) {
                        kcpuset_unuse(t->l_affinity, &kcpulst);
                }
                if (kcset) {
                        /*
                         * Hold a reference on affinity mask, assign mask to
                         * LWP and migrate it to another CPU (unlocks LWP).
                         */
                        kcpuset_use(kcset);
                        t->l_affinity = kcset;
                        lwp_migrate(t, ci);
                } else {
                        /* Old affinity mask is released, just clear. */
                        t->l_affinity = NULL;
                        lwp_unlock(t);
                }
                lcnt++;
        }
        mutex_exit(p->p_lock);
        if (lcnt == 0) {
                error = ESRCH;
        }
out:
        mutex_exit(&cpu_lock);

        /*
         * Drop the initial reference (LWPs, if any, have the ownership now),
         * and destroy whatever is in the G/C list, if filled.
         */
        if (kcset) {
                kcpuset_unuse(kcset, &kcpulst);
        }
        if (kcpulst) {
                kcpuset_destroy(kcpulst);
        }
        return error;
}

/*
 * Get affinity.
 */
int
sys__sched_getaffinity(struct lwp *l,
    const struct sys__sched_getaffinity_args *uap, register_t *retval)
{
        /* {
                syscallarg(pid_t) pid;
                syscallarg(lwpid_t) lid;
                syscallarg(size_t) size;
                syscallarg(cpuset_t *) cpuset;
        } */
        struct lwp *t;
        kcpuset_t *kcset;
        int error;

        if (SCARG(uap, pid) < 0 || SCARG(uap, lid) < 0)
                return EINVAL;

        error = genkcpuset(&kcset, SCARG(uap, cpuset), SCARG(uap, size));
        if (error)
                return error;

        /* Locks the LWP */
        t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
        if (t == NULL) {
                error = ESRCH;
                goto out;
        }
        /* Check the permission */
        if (kauth_authorize_process(l->l_cred,
            KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
                mutex_exit(t->l_proc->p_lock);
                error = EPERM;
                goto out;
        }
        lwp_lock(t);
        if (t->l_affinity) {
                kcpuset_copy(kcset, t->l_affinity);
        } else {
                kcpuset_zero(kcset);
        }
        lwp_unlock(t);
        mutex_exit(t->l_proc->p_lock);

        error = kcpuset_copyout(kcset, SCARG(uap, cpuset), SCARG(uap, size));
out:
        kcpuset_unuse(kcset, NULL);
        return error;
}

/*
 * Priority protection for PTHREAD_PRIO_PROTECT. This is a weak
 * analogue of priority inheritance: temp raise the priority
 * of the caller when accessing a protected resource.
 */
int 
sys__sched_protect(struct lwp *l, 
    const struct sys__sched_protect_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) priority;
                syscallarg(int *) opriority;
        } */
        int error;
        pri_t pri;

        KASSERT(l->l_inheritedprio == -1);
        KASSERT(l->l_auxprio == -1 || l->l_auxprio == l->l_protectprio);
        
        pri = SCARG(uap, priority);
        error = 0;
        lwp_lock(l);
        if (pri == -1) {
                /* back out priority changes */
                switch(l->l_protectdepth) {
                case 0:
                        error = EINVAL;
                        break;
                case 1:
                        l->l_protectdepth = 0;
                        l->l_protectprio = -1;
                        l->l_auxprio = -1;
                        break;
                default:
                        l->l_protectdepth--;
                        break;
                }
        } else if (pri < 0) {
                /* Just retrieve the current value, for debugging */
                if (l->l_protectprio == -1)
                        error = ENOENT;
                else
                        *retval = l->l_protectprio - PRI_USER_RT;
        } else if (__predict_false(pri < SCHED_PRI_MIN ||
            pri > SCHED_PRI_MAX || l->l_priority > pri + PRI_USER_RT)) {
                /* must fail if existing priority is higher */
                error = EPERM;
        } else {
                /* play along but make no changes if not a realtime LWP. */
                l->l_protectdepth++;
                pri += PRI_USER_RT;
                if (__predict_true(l->l_class != SCHED_OTHER && 
                    pri > l->l_protectprio)) {
                        l->l_protectprio = pri;
                        l->l_auxprio = pri;
                }
        }
        lwp_unlock(l);

        return error;
}

/*
 * Yield.
 */
int
sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
{

        yield();
        return 0;
}

/*
 * Sysctl nodes and initialization.
 */
static void
sysctl_sched_setup(struct sysctllog **clog)
{
        const struct sysctlnode *node = NULL;

        sysctl_createv(clog, 0, NULL, NULL,
                CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                CTLTYPE_INT, "posix_sched",
                SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
                             "Process Scheduling option to which the "
                             "system attempts to conform"),
                NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
                CTL_KERN, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, &node,
                CTLFLAG_PERMANENT,
                CTLTYPE_NODE, "sched",
                SYSCTL_DESCR("Scheduler options"),
                NULL, 0, NULL, 0,
                CTL_KERN, CTL_CREATE, CTL_EOL);

        if (node == NULL)
                return;

        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
                CTLTYPE_INT, "pri_min",
                SYSCTL_DESCR("Minimal POSIX real-time priority"),
                NULL, SCHED_PRI_MIN, NULL, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
                CTLTYPE_INT, "pri_max",
                SYSCTL_DESCR("Maximal POSIX real-time priority"),
                NULL, SCHED_PRI_MAX, NULL, 0,
                CTL_CREATE, CTL_EOL);
}

static int
sched_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        struct proc *p;
        int result;

        result = KAUTH_RESULT_DEFER;
        p = arg0;

        switch (action) {
        case KAUTH_PROCESS_SCHEDULER_GETPARAM:
                if (kauth_cred_uidmatch(cred, p->p_cred))
                        result = KAUTH_RESULT_ALLOW;
                break;

        case KAUTH_PROCESS_SCHEDULER_SETPARAM:
                if (kauth_cred_uidmatch(cred, p->p_cred)) {
                        struct lwp *l;
                        int policy;
                        pri_t priority;

                        l = arg1;
                        policy = (int)(unsigned long)arg2;
                        priority = (pri_t)(unsigned long)arg3;

                        if ((policy == l->l_class ||
                            (policy != SCHED_FIFO && policy != SCHED_RR)) &&
                            priority <= l->l_priority)
                                result = KAUTH_RESULT_ALLOW;
                }

                break;

        case KAUTH_PROCESS_SCHEDULER_GETAFFINITY:
                result = KAUTH_RESULT_ALLOW;
                break;

        case KAUTH_PROCESS_SCHEDULER_SETAFFINITY:
                /* Privileged; we let the secmodel handle this. */
                break;

        default:
                break;
        }

        return result;
}

void
sched_init(void)
{

        sysctl_sched_setup(&sched_sysctl_log);

        sched_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
            sched_listener_cb, NULL);
}































































   24 

   24 


   24 

    5 





    1 





    8 




    7 

    3 


    5 





    5 


    1 





    1 





    9 
   22 



    9 























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
/*      $NetBSD: clockctl_50.c,v 1.4 2019/12/12 02:15:42 pgoyette Exp $ */

/*-
 * Copyright (c) 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Emmanuel Dreyfus.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: clockctl_50.c,v 1.4 2019/12/12 02:15:42 pgoyette Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/ioctl.h>
#include <sys/device.h>
#include <sys/time.h>
#include <sys/conf.h>
#include <sys/timex.h>
#include <sys/kauth.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/compat_stub.h>

#include <sys/clockctl.h>
#include <compat/sys/clockctl.h>
#include <compat/sys/time_types.h>

int
compat50_clockctlioctl(dev_t dev, u_long cmd, void *data, int flags,
    struct lwp *l)
{
        int error = 0;
        const struct cdevsw *cd = cdevsw_lookup(dev);

        if (cd == NULL || cd->d_ioctl == NULL)
                return ENXIO;

        switch (cmd) {
        case CLOCKCTL_OSETTIMEOFDAY: {
                struct timeval50 tv50;
                struct timeval tv;
                struct clockctl50_settimeofday *args = data;

                error = copyin(args->tv, &tv50, sizeof(tv50));
                if (error)
                        return (error);
                timeval50_to_timeval(&tv50, &tv);
                error = settimeofday1(&tv, false, args->tzp, l, false);
                break;
        }
        case CLOCKCTL_OADJTIME: {
                struct timeval atv, oldatv;
                struct timeval50 atv50;
                struct clockctl50_adjtime *args = data;

                if (args->delta) {
                        error = copyin(args->delta, &atv50, sizeof(atv50));
                        if (error)
                                return (error);
                        timeval50_to_timeval(&atv50, &atv);
                }
                adjtime1(args->delta ? &atv : NULL,
                    args->olddelta ? &oldatv : NULL, l->l_proc);
                if (args->olddelta) {
                        timeval_to_timeval50(&oldatv, &atv50);
                        error = copyout(&atv50, args->olddelta, sizeof(atv50));
                }
                break;
        }
        case CLOCKCTL_OCLOCK_SETTIME: {
                struct timespec50 tp50;
                struct timespec tp;
                struct clockctl50_clock_settime *args = data;

                error = copyin(args->tp, &tp50, sizeof(tp50));
                if (error)
                        return (error);
                timespec50_to_timespec(&tp50, &tp);
                error = clock_settime1(l->l_proc, args->clock_id, &tp, true);
                break;
        }
        case CLOCKCTL_ONTP_ADJTIME: {
                if (vec_ntp_timestatus == NULL) {
                        error = ENOTTY;
                        break;
                }
                /* The ioctl number changed but the data did not change. */
                error = (cd->d_ioctl)(dev, CLOCKCTL_NTP_ADJTIME,
                    data, flags, l);
                break;
        }
        default:
                error = ENOTTY;
        }

        return (error);
}

void
clockctl_50_init(void)
{

        MODULE_HOOK_SET(clockctl_ioctl_50_hook, compat50_clockctlioctl);
}

void
clockctl_50_fini(void)
{

        MODULE_HOOK_UNSET(clockctl_ioctl_50_hook);
}





































































    2 
    2 
    2 

    2 



    2 



















    2 



























    1 


























































































































































    3 




    3 




    3 









    3 

















    1 





    1 


    1 

    1 

    1 




    1 










    1 




    1 








    1 

    1 












    1 


    1 

    1 









































    2 

    2 









    2 










    2 


















    1 






















    2 










    3 

    3 




    1 








    1 


    1 







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
/*        $NetBSD: rfcomm_upper.c,v 1.23 2018/09/03 16:29:36 riastradh Exp $        */

/*-
 * Copyright (c) 2006 Itronix Inc.
 * All rights reserved.
 *
 * Written by Iain Hibbert for Itronix Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of Itronix Inc. may not be used to endorse
 *    or promote products derived from this software without specific
 *    prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rfcomm_upper.c,v 1.23 2018/09/03 16:29:36 riastradh Exp $");

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/kmem.h>
#include <sys/socketvar.h>
#include <sys/systm.h>

#include <netbt/bluetooth.h>
#include <netbt/hci.h>
#include <netbt/l2cap.h>
#include <netbt/rfcomm.h>

/****************************************************************************
 *
 *        RFCOMM DLC - Upper Protocol API
 *
 * Currently the only 'Port Emulation Entity' is the RFCOMM socket code
 * but it is should be possible to provide a pseudo-device for a direct
 * tty interface.
 */

/*
 * rfcomm_attach_pcb(handle, proto, upper)
 *
 * attach a new RFCOMM DLC to handle, populate with reasonable defaults
 */
int
rfcomm_attach_pcb(struct rfcomm_dlc **handle,
                const struct btproto *proto, void *upper)
{
        struct rfcomm_dlc *dlc;

        KASSERT(handle != NULL);
        KASSERT(proto != NULL);
        KASSERT(upper != NULL);

        dlc = kmem_intr_zalloc(sizeof(struct rfcomm_dlc), KM_NOSLEEP);
        if (dlc == NULL)
                return ENOMEM;

        dlc->rd_state = RFCOMM_DLC_CLOSED;
        dlc->rd_mtu = rfcomm_mtu_default;

        dlc->rd_proto = proto;
        dlc->rd_upper = upper;

        dlc->rd_laddr.bt_len = sizeof(struct sockaddr_bt);
        dlc->rd_laddr.bt_family = AF_BLUETOOTH;
        dlc->rd_laddr.bt_psm = L2CAP_PSM_RFCOMM;

        dlc->rd_raddr.bt_len = sizeof(struct sockaddr_bt);
        dlc->rd_raddr.bt_family = AF_BLUETOOTH;
        dlc->rd_raddr.bt_psm = L2CAP_PSM_RFCOMM;

        dlc->rd_lmodem = RFCOMM_MSC_RTC | RFCOMM_MSC_RTR | RFCOMM_MSC_DV;

        callout_init(&dlc->rd_timeout, 0);
        callout_setfunc(&dlc->rd_timeout, rfcomm_dlc_timeout, dlc);

        *handle = dlc;
        return 0;
}

/*
 * rfcomm_bind_pcb(dlc, sockaddr)
 *
 * bind DLC to local address
 */
int
rfcomm_bind_pcb(struct rfcomm_dlc *dlc, struct sockaddr_bt *addr)
{

        if (dlc->rd_state != RFCOMM_DLC_CLOSED)
                return EINVAL;

        memcpy(&dlc->rd_laddr, addr, sizeof(struct sockaddr_bt));
        return 0;
}

/*
 * rfcomm_sockaddr_pcb(dlc, sockaddr)
 *
 * return local address
 */
int
rfcomm_sockaddr_pcb(struct rfcomm_dlc *dlc, struct sockaddr_bt *addr)
{

        memcpy(addr, &dlc->rd_laddr, sizeof(struct sockaddr_bt));
        return 0;
}

/*
 * rfcomm_connect_pcb(dlc, sockaddr)
 *
 * Initiate connection of RFCOMM DLC to remote address.
 */
int
rfcomm_connect_pcb(struct rfcomm_dlc *dlc, struct sockaddr_bt *dest)
{
        struct rfcomm_session *rs;
        int err = 0;

        if (dlc->rd_state != RFCOMM_DLC_CLOSED)
                return EISCONN;

        memcpy(&dlc->rd_raddr, dest, sizeof(struct sockaddr_bt));

        if (dlc->rd_raddr.bt_channel < RFCOMM_CHANNEL_MIN
            || dlc->rd_raddr.bt_channel > RFCOMM_CHANNEL_MAX
            || bdaddr_any(&dlc->rd_raddr.bt_bdaddr))
                return EDESTADDRREQ;

        if (dlc->rd_raddr.bt_psm == L2CAP_PSM_ANY)
                dlc->rd_raddr.bt_psm = L2CAP_PSM_RFCOMM;
        else if (dlc->rd_raddr.bt_psm != L2CAP_PSM_RFCOMM
            && (dlc->rd_raddr.bt_psm < 0x1001
            || L2CAP_PSM_INVALID(dlc->rd_raddr.bt_psm)))
                return EINVAL;

        /*
         * We are allowed only one RFCOMM session between any 2 Bluetooth
         * devices, so see if there is a session already otherwise create
         * one and set it connecting.
         */
        rs = rfcomm_session_lookup(&dlc->rd_laddr, &dlc->rd_raddr);
        if (rs == NULL) {
                rs = rfcomm_session_alloc(&rfcomm_session_active,
                                                &dlc->rd_laddr);
                if (rs == NULL)
                        return ENOMEM;

                rs->rs_flags |= RFCOMM_SESSION_INITIATOR;
                rs->rs_state = RFCOMM_SESSION_WAIT_CONNECT;

                err = l2cap_connect_pcb(rs->rs_l2cap, &dlc->rd_raddr);
                if (err) {
                        rfcomm_session_free(rs);
                        return err;
                }

                /*
                 * This session will start up automatically when its
                 * L2CAP channel is connected.
                 */
        }

        /* construct DLC */
        dlc->rd_dlci = RFCOMM_MKDLCI(IS_INITIATOR(rs) ? 0:1, dest->bt_channel);
        if (rfcomm_dlc_lookup(rs, dlc->rd_dlci))
                return EBUSY;

        l2cap_sockaddr_pcb(rs->rs_l2cap, &dlc->rd_laddr);

        /*
         * attach the DLC to the session and start it off
         */
        dlc->rd_session = rs;
        dlc->rd_state = RFCOMM_DLC_WAIT_SESSION;
        LIST_INSERT_HEAD(&rs->rs_dlcs, dlc, rd_next);

        if (rs->rs_state == RFCOMM_SESSION_OPEN)
                err = rfcomm_dlc_connect(dlc);

        return err;
}

/*
 * rfcomm_peeraddr_pcb(dlc, sockaddr)
 *
 * return remote address
 */
int
rfcomm_peeraddr_pcb(struct rfcomm_dlc *dlc, struct sockaddr_bt *addr)
{

        memcpy(addr, &dlc->rd_raddr, sizeof(struct sockaddr_bt));
        return 0;
}

/*
 * rfcomm_disconnect_pcb(dlc, linger)
 *
 * disconnect RFCOMM DLC
 */
int
rfcomm_disconnect_pcb(struct rfcomm_dlc *dlc, int linger)
{
        struct rfcomm_session *rs = dlc->rd_session;
        int err = 0;

        KASSERT(dlc != NULL);

        switch (dlc->rd_state) {
        case RFCOMM_DLC_CLOSED:
        case RFCOMM_DLC_LISTEN:
                return EINVAL;

        case RFCOMM_DLC_WAIT_SEND_UA:
                err = rfcomm_session_send_frame(rs,
                                RFCOMM_FRAME_DM, dlc->rd_dlci);

                /* fall through */
        case RFCOMM_DLC_WAIT_SESSION:
        case RFCOMM_DLC_WAIT_CONNECT:
        case RFCOMM_DLC_WAIT_SEND_SABM:
                rfcomm_dlc_close(dlc, 0);
                break;

        case RFCOMM_DLC_OPEN:
                if (dlc->rd_txbuf != NULL && linger != 0) {
                        dlc->rd_flags |= RFCOMM_DLC_SHUTDOWN;
                        break;
                }

                /* else fall through */
        case RFCOMM_DLC_WAIT_RECV_UA:
                dlc->rd_state = RFCOMM_DLC_WAIT_DISCONNECT;
                err = rfcomm_session_send_frame(rs, RFCOMM_FRAME_DISC,
                                                        dlc->rd_dlci);
                callout_schedule(&dlc->rd_timeout, rfcomm_ack_timeout * hz);
                break;

        case RFCOMM_DLC_WAIT_DISCONNECT:
                err = EALREADY;
                break;

        default:
                UNKNOWN(dlc->rd_state);
                break;
        }

        return err;
}

/*
 * rfcomm_detach_pcb(handle)
 *
 * detach RFCOMM DLC from handle
 */
void
rfcomm_detach_pcb(struct rfcomm_dlc **handle)
{
        struct rfcomm_dlc *dlc = *handle;

        if (dlc->rd_state != RFCOMM_DLC_CLOSED)
                rfcomm_dlc_close(dlc, 0);

        if (dlc->rd_txbuf != NULL) {
                m_freem(dlc->rd_txbuf);
                dlc->rd_txbuf = NULL;
        }

        dlc->rd_upper = NULL;
        *handle = NULL;

        /*
         * If callout is invoking we can't free the DLC so
         * mark it and let the callout release it.
         */
        if (callout_invoking(&dlc->rd_timeout))
                dlc->rd_flags |= RFCOMM_DLC_DETACH;
        else {
                callout_destroy(&dlc->rd_timeout);
                kmem_intr_free(dlc, sizeof(*dlc));
        }
}

/*
 * rfcomm_listen_pcb(dlc)
 *
 * This DLC is a listener. We look for an existing listening session
 * with a matching address to attach to or else create a new one on
 * the listeners list. If the ANY channel is given, allocate the first
 * available for the session.
 */
int
rfcomm_listen_pcb(struct rfcomm_dlc *dlc)
{
        struct rfcomm_session *rs;
        struct rfcomm_dlc *used;
        struct sockaddr_bt addr;
        int err, channel;

        if (dlc->rd_state != RFCOMM_DLC_CLOSED)
                return EISCONN;

        if (dlc->rd_laddr.bt_channel != RFCOMM_CHANNEL_ANY
            && (dlc->rd_laddr.bt_channel < RFCOMM_CHANNEL_MIN
            || dlc->rd_laddr.bt_channel > RFCOMM_CHANNEL_MAX))
                return EADDRNOTAVAIL;

        if (dlc->rd_laddr.bt_psm == L2CAP_PSM_ANY)
                dlc->rd_laddr.bt_psm = L2CAP_PSM_RFCOMM;
        else if (dlc->rd_laddr.bt_psm != L2CAP_PSM_RFCOMM
            && (dlc->rd_laddr.bt_psm < 0x1001
            || L2CAP_PSM_INVALID(dlc->rd_laddr.bt_psm)))
                return EADDRNOTAVAIL;

        LIST_FOREACH(rs, &rfcomm_session_listen, rs_next) {
                l2cap_sockaddr_pcb(rs->rs_l2cap, &addr);

                if (addr.bt_psm != dlc->rd_laddr.bt_psm)
                        continue;

                if (bdaddr_same(&dlc->rd_laddr.bt_bdaddr, &addr.bt_bdaddr))
                        break;
        }

        if (rs == NULL) {
                rs = rfcomm_session_alloc(&rfcomm_session_listen,
                                                &dlc->rd_laddr);
                if (rs == NULL)
                        return ENOMEM;

                rs->rs_state = RFCOMM_SESSION_LISTEN;

                err = l2cap_listen_pcb(rs->rs_l2cap);
                if (err) {
                        rfcomm_session_free(rs);
                        return err;
                }
        }

        if (dlc->rd_laddr.bt_channel == RFCOMM_CHANNEL_ANY) {
                channel = RFCOMM_CHANNEL_MIN;
                used = LIST_FIRST(&rs->rs_dlcs);

                while (used != NULL) {
                        if (used->rd_laddr.bt_channel == channel) {
                                if (channel++ == RFCOMM_CHANNEL_MAX)
                                        return EADDRNOTAVAIL;

                                used = LIST_FIRST(&rs->rs_dlcs);
                        } else {
                                used = LIST_NEXT(used, rd_next);
                        }
                }

                dlc->rd_laddr.bt_channel = channel;
        }

        dlc->rd_session = rs;
        dlc->rd_state = RFCOMM_DLC_LISTEN;
        LIST_INSERT_HEAD(&rs->rs_dlcs, dlc, rd_next);

        return 0;
}

/*
 * rfcomm_send_pcb(dlc, mbuf)
 *
 * Output data on DLC. This is streamed data, so we add it
 * to our buffer and start the DLC, which will assemble
 * packets and send them if it can.
 */
int
rfcomm_send_pcb(struct rfcomm_dlc *dlc, struct mbuf *m)
{

        if (dlc->rd_txbuf != NULL) {
                dlc->rd_txbuf->m_pkthdr.len += m->m_pkthdr.len;
                m_cat(dlc->rd_txbuf, m);
        } else {
                dlc->rd_txbuf = m;
        }

        if (dlc->rd_state == RFCOMM_DLC_OPEN)
                rfcomm_dlc_start(dlc);

        return 0;
}

/*
 * rfcomm_rcvd_pcb(dlc, space)
 *
 * Indicate space now available in receive buffer
 *
 * This should be used to give an initial value of the receive buffer
 * size when the DLC is attached and anytime data is cleared from the
 * buffer after that.
 */
int
rfcomm_rcvd_pcb(struct rfcomm_dlc *dlc, size_t space)
{

        KASSERT(dlc != NULL);

        dlc->rd_rxsize = space;

        /*
         * if we are using credit based flow control, we may
         * want to send some credits..
         */
        if (dlc->rd_state == RFCOMM_DLC_OPEN
            && (dlc->rd_session->rs_flags & RFCOMM_SESSION_CFC))
                rfcomm_dlc_start(dlc);

        return 0;
}

/*
 * rfcomm_setopt(dlc, sopt)
 *
 * set DLC options
 */
int
rfcomm_setopt(struct rfcomm_dlc *dlc, const struct sockopt *sopt)
{
        int mode, err = 0;
        uint16_t mtu;

        switch (sopt->sopt_name) {
        case SO_RFCOMM_MTU:
                err = sockopt_get(sopt, &mtu, sizeof(mtu));
                if (err)
                        break;

                if (mtu < RFCOMM_MTU_MIN || mtu > RFCOMM_MTU_MAX)
                        err = EINVAL;
                else if (dlc->rd_state == RFCOMM_DLC_CLOSED)
                        dlc->rd_mtu = mtu;
                else
                        err = EBUSY;

                break;

        case SO_RFCOMM_LM:
                err = sockopt_getint(sopt, &mode);
                if (err)
                        break;

                mode &= (RFCOMM_LM_SECURE | RFCOMM_LM_ENCRYPT | RFCOMM_LM_AUTH);

                if (mode & RFCOMM_LM_SECURE)
                        mode |= RFCOMM_LM_ENCRYPT;

                if (mode & RFCOMM_LM_ENCRYPT)
                        mode |= RFCOMM_LM_AUTH;

                dlc->rd_mode = mode;

                if (dlc->rd_state == RFCOMM_DLC_OPEN)
                        err = rfcomm_dlc_setmode(dlc);

                break;

        default:
                err = ENOPROTOOPT;
                break;
        }
        return err;
}

/*
 * rfcomm_getopt(dlc, sopt)
 *
 * get DLC options
 */
int
rfcomm_getopt(struct rfcomm_dlc *dlc, struct sockopt *sopt)
{
        struct rfcomm_fc_info fc;

        switch (sopt->sopt_name) {
        case SO_RFCOMM_MTU:
                return sockopt_set(sopt, &dlc->rd_mtu, sizeof(uint16_t));

        case SO_RFCOMM_FC_INFO:
                memset(&fc, 0, sizeof(fc));
                fc.lmodem = dlc->rd_lmodem;
                fc.rmodem = dlc->rd_rmodem;
                fc.tx_cred = uimax(dlc->rd_txcred, 0xff);
                fc.rx_cred = uimax(dlc->rd_rxcred, 0xff);
                if (dlc->rd_session
                    && (dlc->rd_session->rs_flags & RFCOMM_SESSION_CFC))
                        fc.cfc = 1;

                return sockopt_set(sopt, &fc, sizeof(fc));

        case SO_RFCOMM_LM:
                return sockopt_setint(sopt, dlc->rd_mode);

        default:
                break;
        }

        return ENOPROTOOPT;
}































































































































































  903 


  902 













   10 
   10 











    8 


    2 

    2 
    2 
    2 
    2 
    2 
    2 






    2 


    2 


    1 









  953 












   20 

  951 
  606 
  951 
  951 





   20 


   19 



   20 
















   87 

   87 
   87 




   79 
   79 
   78 

   79 
    3 

   78 
   79 














  126 



























   80 




   79 
   80 

   80 















   19 
   19 

   19 
   19 
    9 
    9 
    9 
    9 


    3 
    3 

    9 







   19 



   19 
   19 
    9 

   18 



    8 













  230 
  902 






  905 





  905 




   63 














  904 

  902 
  902 
  902 
  901 
  902 
  902 













   63 


















  419 
 1703 
 1701 

 1701 







  905 













    2 

   37 

    2 
















 1701 




 1702 

 1700 
  786 






 1702 





    5 






    5 
    5 



















  620 
























 1674 


 1673 
  782 




 1662 


 1663 

 1659 




   33 
















  460 



  460 


  460 











   22 



   22 
   22 












   61 

   61 
   60 

   61 

    2 

    2 



















   62 




   62 




   62 









    2 






   61 
   61 


   61 
   55 
   55 


   61 


   60 





























   62 






   62 



   62 

   62 




   61 















   55 


   55 



















































































































































  478 
  476 
  454 



  453 
  416 

  404 
  454 
    1 





  453 






  452 








  452 









  453 



  452 




  453 


  452 







  453 


























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
/*        $NetBSD: vfs_trans.c,v 1.68 2022/08/22 09:13:08 hannken Exp $        */

/*-
 * Copyright (c) 2007, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Juergen Hannken-Illjes.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.68 2022/08/22 09:13:08 hannken Exp $");

/*
 * File system transaction operations.
 */

#ifdef _KERNEL_OPT
#include "opt_ddb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/atomic.h>
#include <sys/buf.h>
#include <sys/hash.h>
#include <sys/kmem.h>
#include <sys/mount.h>
#include <sys/pserialize.h>
#include <sys/vnode.h>
#include <sys/fstrans.h>
#include <sys/proc.h>
#include <sys/pool.h>

#include <miscfs/specfs/specdev.h>

#define FSTRANS_MOUNT_HASHSIZE        32

enum fstrans_lock_type {
        FSTRANS_LAZY,                        /* Granted while not suspended */
        FSTRANS_SHARED                        /* Granted while not suspending */
};

struct fscow_handler {
        LIST_ENTRY(fscow_handler) ch_list;
        int (*ch_func)(void *, struct buf *, bool);
        void *ch_arg;
};
struct fstrans_lwp_info {
        struct fstrans_lwp_info *fli_succ;
        struct lwp *fli_self;
        struct mount *fli_mount;
        struct fstrans_lwp_info *fli_alias;
        struct fstrans_mount_info *fli_mountinfo;
        int fli_trans_cnt;
        int fli_alias_cnt;
        int fli_cow_cnt;
        enum fstrans_lock_type fli_lock_type;
        LIST_ENTRY(fstrans_lwp_info) fli_list;
};
struct fstrans_mount_info {
        enum fstrans_state fmi_state;
        unsigned int fmi_ref_cnt;
        bool fmi_gone;
        bool fmi_cow_change;
        SLIST_ENTRY(fstrans_mount_info) fmi_hash;
        LIST_HEAD(, fscow_handler) fmi_cow_handler;
        struct mount *fmi_mount;
        struct fstrans_mount_info *fmi_lower_info;
        struct lwp *fmi_owner;
};
SLIST_HEAD(fstrans_mount_hashhead, fstrans_mount_info);

static kmutex_t vfs_suspend_lock        /* Serialize suspensions. */
    __cacheline_aligned;
static kmutex_t fstrans_lock                /* Fstrans big lock. */
    __cacheline_aligned;
static kcondvar_t fstrans_state_cv;        /* Fstrans or cow state changed. */
static kcondvar_t fstrans_count_cv;        /* Fstrans or cow count changed. */
static pserialize_t fstrans_psz;        /* Pserialize state. */
static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head;
                                        /* List of all fstrans_lwp_info. */
static pool_cache_t fstrans_lwp_cache;        /* Cache of fstrans_lwp_info. */

static u_long fstrans_mount_hashmask;
static struct fstrans_mount_hashhead *fstrans_mount_hashtab;
static int fstrans_gone_count;                /* Number of fstrans_mount_info gone. */

static inline uint32_t fstrans_mount_hash(struct mount *);
static inline struct fstrans_mount_info *fstrans_mount_get(struct mount *);
static void fstrans_mount_dtor(struct fstrans_mount_info *);
static void fstrans_clear_lwp_info(void);
static inline struct fstrans_lwp_info *
    fstrans_get_lwp_info(struct mount *, bool);
static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *);
static int fstrans_lwp_pcc(void *, void *, int);
static void fstrans_lwp_pcd(void *, void *);
static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int);
static bool grant_lock(const struct fstrans_mount_info *,
    const enum fstrans_lock_type);
static bool state_change_done(const struct fstrans_mount_info *);
static bool cow_state_change_done(const struct fstrans_mount_info *);
static void cow_change_enter(struct fstrans_mount_info *);
static void cow_change_done(struct fstrans_mount_info *);

extern struct mount *dead_rootmount;

/*
 * Initialize.
 */
void
fstrans_init(void)
{

        mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&fstrans_state_cv, "fstchg");
        cv_init(&fstrans_count_cv, "fstcnt");
        fstrans_psz = pserialize_create();
        LIST_INIT(&fstrans_fli_head);
        fstrans_lwp_cache = pool_cache_init(sizeof(struct fstrans_lwp_info),
            coherency_unit, 0, 0, "fstlwp", NULL, IPL_NONE,
            fstrans_lwp_pcc, fstrans_lwp_pcd, NULL);
        KASSERT(fstrans_lwp_cache != NULL);
        fstrans_mount_hashtab = hashinit(FSTRANS_MOUNT_HASHSIZE, HASH_SLIST,
            true, &fstrans_mount_hashmask);
}

/*
 * pool_cache constructor for fstrans_lwp_info.  Updating the global list
 * produces cache misses on MP.  Minimise by keeping free entries on list.
 */
int
fstrans_lwp_pcc(void *arg, void *obj, int flags)
{
        struct fstrans_lwp_info *fli = obj;

        memset(fli, 0, sizeof(*fli));

        mutex_enter(&fstrans_lock);
        LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list);
        mutex_exit(&fstrans_lock);

        return 0;
}

/*
 * pool_cache destructor
 */
void
fstrans_lwp_pcd(void *arg, void *obj)
{
        struct fstrans_lwp_info *fli = obj;

        mutex_enter(&fstrans_lock);
        LIST_REMOVE(fli, fli_list);
        mutex_exit(&fstrans_lock);
}

/*
 * Deallocate lwp state.
 */
void
fstrans_lwp_dtor(lwp_t *l)
{
        struct fstrans_lwp_info *fli, *fli_next;

        if (l->l_fstrans == NULL)
                return;

        mutex_enter(&fstrans_lock);
        for (fli = l->l_fstrans; fli; fli = fli_next) {
                KASSERT(fli->fli_trans_cnt == 0);
                KASSERT(fli->fli_cow_cnt == 0);
                KASSERT(fli->fli_self == l);
                if (fli->fli_mount != NULL)
                        fstrans_mount_dtor(fli->fli_mountinfo);
                fli_next = fli->fli_succ;
                fli->fli_alias_cnt = 0;
                fli->fli_mount = NULL;
                fli->fli_alias = NULL;
                fli->fli_mountinfo = NULL;
                fli->fli_self = NULL;
        }
        mutex_exit(&fstrans_lock);

        for (fli = l->l_fstrans; fli; fli = fli_next) {
                fli_next = fli->fli_succ;
                pool_cache_put(fstrans_lwp_cache, fli);
        }
        l->l_fstrans = NULL;
}

/*
 * mount pointer to hash
 */
static inline uint32_t
fstrans_mount_hash(struct mount *mp)
{

        return hash32_buf(&mp, sizeof(mp), HASH32_BUF_INIT) &
            fstrans_mount_hashmask;
}

/*
 * retrieve fstrans_mount_info by mount or NULL
 */
static inline struct fstrans_mount_info *
fstrans_mount_get(struct mount *mp)
{
        uint32_t indx;
        struct fstrans_mount_info *fmi, *fmi_lower;

        KASSERT(mutex_owned(&fstrans_lock));

        indx = fstrans_mount_hash(mp);
        SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash) {
                if (fmi->fmi_mount == mp) {
                        if (__predict_false(mp->mnt_lower != NULL &&
                            fmi->fmi_lower_info == NULL)) {
                                /*
                                 * Intern the lower/lowest mount into
                                 * this mount info on first lookup.
                                 */
                                KASSERT(fmi->fmi_ref_cnt == 1);

                                fmi_lower = fstrans_mount_get(mp->mnt_lower);
                                if (fmi_lower && fmi_lower->fmi_lower_info)
                                        fmi_lower = fmi_lower->fmi_lower_info;
                                if (fmi_lower == NULL)
                                        return NULL;
                                fmi->fmi_lower_info = fmi_lower;
                                fmi->fmi_lower_info->fmi_ref_cnt += 1;
                        }
                        return fmi;
                }
        }

        return NULL;
}

/*
 * Dereference mount state.
 */
static void
fstrans_mount_dtor(struct fstrans_mount_info *fmi)
{

        KASSERT(mutex_owned(&fstrans_lock));

        KASSERT(fmi != NULL);
        fmi->fmi_ref_cnt -= 1;
        if (__predict_true(fmi->fmi_ref_cnt > 0)) {
                return;
        }

        KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
        KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL);
        KASSERT(fmi->fmi_owner == NULL);

        if (fmi->fmi_lower_info)
                fstrans_mount_dtor(fmi->fmi_lower_info);

        KASSERT(fstrans_gone_count > 0);
        fstrans_gone_count -= 1;

        kmem_free(fmi->fmi_mount, sizeof(*fmi->fmi_mount));
        kmem_free(fmi, sizeof(*fmi));
}

/*
 * Allocate mount state.
 */
int
fstrans_mount(struct mount *mp)
{
        uint32_t indx;
        struct fstrans_mount_info *newfmi;

        indx = fstrans_mount_hash(mp);

        newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP);
        newfmi->fmi_state = FSTRANS_NORMAL;
        newfmi->fmi_ref_cnt = 1;
        newfmi->fmi_gone = false;
        LIST_INIT(&newfmi->fmi_cow_handler);
        newfmi->fmi_cow_change = false;
        newfmi->fmi_mount = mp;
        newfmi->fmi_lower_info = NULL;
        newfmi->fmi_owner = NULL;

        mutex_enter(&fstrans_lock);
        SLIST_INSERT_HEAD(&fstrans_mount_hashtab[indx], newfmi, fmi_hash);
        mutex_exit(&fstrans_lock);

        return 0;
}

/*
 * Deallocate mount state.
 */
void
fstrans_unmount(struct mount *mp)
{
        uint32_t indx;
        struct fstrans_mount_info *fmi;

        indx = fstrans_mount_hash(mp);

        mutex_enter(&fstrans_lock);
        fmi = fstrans_mount_get(mp);
        KASSERT(fmi != NULL);
        fmi->fmi_gone = true;
        SLIST_REMOVE(&fstrans_mount_hashtab[indx],
            fmi, fstrans_mount_info, fmi_hash);
        fstrans_gone_count += 1;
        fstrans_mount_dtor(fmi);
        mutex_exit(&fstrans_lock);
}

/*
 * Clear mount entries whose mount is gone.
 */
static void
fstrans_clear_lwp_info(void)
{
        struct fstrans_lwp_info **p, *fli, *tofree = NULL;

        /*
         * Scan our list clearing entries whose mount is gone.
         */
        mutex_enter(&fstrans_lock);
        for (p = &curlwp->l_fstrans; *p; ) {
                fli = *p;
                if (fli->fli_mount != NULL &&
                    fli->fli_mountinfo->fmi_gone &&
                    fli->fli_trans_cnt == 0 &&
                    fli->fli_cow_cnt == 0 &&
                    fli->fli_alias_cnt == 0) {
                        *p = (*p)->fli_succ;
                        fstrans_mount_dtor(fli->fli_mountinfo);
                        if (fli->fli_alias) {
                                KASSERT(fli->fli_alias->fli_alias_cnt > 0);
                                fli->fli_alias->fli_alias_cnt--;
                        }
                        fli->fli_mount = NULL;
                        fli->fli_alias = NULL;
                        fli->fli_mountinfo = NULL;
                        fli->fli_self = NULL;
                        p = &curlwp->l_fstrans;
                        fli->fli_succ = tofree;
                        tofree = fli;
                } else {
                        p = &(*p)->fli_succ;
                }
        }
#ifdef DIAGNOSTIC
        for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ)
                if (fli->fli_alias != NULL)
                        KASSERT(fli->fli_alias->fli_self == curlwp);
#endif /* DIAGNOSTIC */
        mutex_exit(&fstrans_lock);

        while (tofree != NULL) {
                fli = tofree;
                tofree = fli->fli_succ;
                pool_cache_put(fstrans_lwp_cache, fli);
        }
}

/*
 * Allocate and return per lwp info for this mount.
 */
static struct fstrans_lwp_info *
fstrans_alloc_lwp_info(struct mount *mp)
{
        struct fstrans_lwp_info *fli, *fli_lower;
        struct fstrans_mount_info *fmi;

        for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) {
                if (fli->fli_mount == mp)
                        return fli;
        }

        /*
         * Lookup mount info and get lower mount per lwp info.
         */
        mutex_enter(&fstrans_lock);
        fmi = fstrans_mount_get(mp);
        if (fmi == NULL) {
                mutex_exit(&fstrans_lock);
                return NULL;
        }
        fmi->fmi_ref_cnt += 1;
        mutex_exit(&fstrans_lock);

        if (fmi->fmi_lower_info) {
                fli_lower =
                    fstrans_alloc_lwp_info(fmi->fmi_lower_info->fmi_mount);
                if (fli_lower == NULL) {
                        mutex_enter(&fstrans_lock);
                        fstrans_mount_dtor(fmi);
                        mutex_exit(&fstrans_lock);

                        return NULL;
                }
        } else {
                fli_lower = NULL;
        }

        /*
         * Allocate a new entry.
         */
        fli = pool_cache_get(fstrans_lwp_cache, PR_WAITOK);
        KASSERT(fli->fli_trans_cnt == 0);
        KASSERT(fli->fli_cow_cnt == 0);
        KASSERT(fli->fli_alias_cnt == 0);
        KASSERT(fli->fli_mount == NULL);
        KASSERT(fli->fli_alias == NULL);
        KASSERT(fli->fli_mountinfo == NULL);
        KASSERT(fli->fli_self == NULL);

        /*
         * Attach the mount info and alias.
         */

        fli->fli_self = curlwp;
        fli->fli_mount = mp;
        fli->fli_mountinfo = fmi;

        fli->fli_succ = curlwp->l_fstrans;
        curlwp->l_fstrans = fli;

        if (fli_lower) {
                fli->fli_alias = fli_lower;
                fli->fli_alias->fli_alias_cnt++;
                fli = fli->fli_alias;
        }

        return fli;
}

/*
 * Retrieve the per lwp info for this mount allocating if necessary.
 */
static inline struct fstrans_lwp_info *
fstrans_get_lwp_info(struct mount *mp, bool do_alloc)
{
        struct fstrans_lwp_info *fli;

        /*
         * Scan our list for a match.
         */
        for (fli = curlwp->l_fstrans; fli; fli = fli->fli_succ) {
                if (fli->fli_mount == mp) {
                        KASSERT((mp->mnt_lower == NULL) ==
                            (fli->fli_alias == NULL));
                        if (fli->fli_alias != NULL)
                                fli = fli->fli_alias;
                        break;
                }
        }

        if (do_alloc) {
                if (__predict_false(fli == NULL))
                        fli = fstrans_alloc_lwp_info(mp);
        }

        return fli;
}

/*
 * Check if this lock type is granted at this state.
 */
static bool
grant_lock(const struct fstrans_mount_info *fmi,
    const enum fstrans_lock_type type)
{

        if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL))
                return true;
        if (fmi->fmi_owner == curlwp)
                return true;
        if  (fmi->fmi_state == FSTRANS_SUSPENDING && type == FSTRANS_LAZY)
                return true;

        return false;
}

/*
 * Start a transaction.  If this thread already has a transaction on this
 * file system increment the reference counter.
 */
static inline int
_fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
{
        int s;
        struct fstrans_lwp_info *fli;
        struct fstrans_mount_info *fmi;

        ASSERT_SLEEPABLE();

        fli = fstrans_get_lwp_info(mp, true);
        if (fli == NULL)
                return 0;
        fmi = fli->fli_mountinfo;

        if (fli->fli_trans_cnt > 0) {
                fli->fli_trans_cnt += 1;

                return 0;
        }

        s = pserialize_read_enter();
        if (__predict_true(grant_lock(fmi, lock_type))) {
                fli->fli_trans_cnt = 1;
                fli->fli_lock_type = lock_type;
                pserialize_read_exit(s);

                return 0;
        }
        pserialize_read_exit(s);

        if (! wait)
                return EBUSY;

        mutex_enter(&fstrans_lock);
        while (! grant_lock(fmi, lock_type))
                cv_wait(&fstrans_state_cv, &fstrans_lock);
        fli->fli_trans_cnt = 1;
        fli->fli_lock_type = lock_type;
        mutex_exit(&fstrans_lock);

        return 0;
}

void
fstrans_start(struct mount *mp)
{
        int error __diagused;

        error = _fstrans_start(mp, FSTRANS_SHARED, 1);
        KASSERT(error == 0);
}

int
fstrans_start_nowait(struct mount *mp)
{

        return _fstrans_start(mp, FSTRANS_SHARED, 0);
}

void
fstrans_start_lazy(struct mount *mp)
{
        int error __diagused;

        error = _fstrans_start(mp, FSTRANS_LAZY, 1);
        KASSERT(error == 0);
}

/*
 * Finish a transaction.
 */
void
fstrans_done(struct mount *mp)
{
        int s;
        struct fstrans_lwp_info *fli;
        struct fstrans_mount_info *fmi;

        fli = fstrans_get_lwp_info(mp, false);
        if (fli == NULL)
                return;
        fmi = fli->fli_mountinfo;
        KASSERT(fli->fli_trans_cnt > 0);

        if (fli->fli_trans_cnt > 1) {
                fli->fli_trans_cnt -= 1;

                return;
        }

        if (__predict_false(fstrans_gone_count > 0))
                fstrans_clear_lwp_info();

        s = pserialize_read_enter();
        if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) {
                fli->fli_trans_cnt = 0;
                pserialize_read_exit(s);

                return;
        }
        pserialize_read_exit(s);

        mutex_enter(&fstrans_lock);
        fli->fli_trans_cnt = 0;
        cv_signal(&fstrans_count_cv);
        mutex_exit(&fstrans_lock);
}

/*
 * Check if we hold an lock.
 */
int
fstrans_held(struct mount *mp)
{
        struct fstrans_lwp_info *fli;
        struct fstrans_mount_info *fmi;

        KASSERT(mp != dead_rootmount);

        fli = fstrans_get_lwp_info(mp, false);
        if (fli == NULL)
                return 0;
        fmi = fli->fli_mountinfo;

        return (fli->fli_trans_cnt > 0 || fmi->fmi_owner == curlwp);
}

/*
 * Check if this thread has an exclusive lock.
 */
int
fstrans_is_owner(struct mount *mp)
{
        struct fstrans_lwp_info *fli;
        struct fstrans_mount_info *fmi;

        KASSERT(mp != dead_rootmount);

        fli = fstrans_get_lwp_info(mp, false);
        if (fli == NULL)
                return 0;
        fmi = fli->fli_mountinfo;

        return (fmi->fmi_owner == curlwp);
}

/*
 * True, if no thread is in a transaction not granted at the current state.
 */
static bool
state_change_done(const struct fstrans_mount_info *fmi)
{
        struct fstrans_lwp_info *fli;

        KASSERT(mutex_owned(&fstrans_lock));

        LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
                if (fli->fli_mountinfo != fmi)
                        continue;
                if (fli->fli_trans_cnt == 0)
                        continue;
                if (fli->fli_self == curlwp)
                        continue;
                if (grant_lock(fmi, fli->fli_lock_type))
                        continue;

                return false;
        }

        return true;
}

/*
 * Set new file system state.
 */
int
fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
{
        int error;
        enum fstrans_state old_state;
        struct fstrans_lwp_info *fli;
        struct fstrans_mount_info *fmi;

        KASSERT(mp != dead_rootmount);

        fli = fstrans_get_lwp_info(mp, true);
        if (fli == NULL)
                return ENOENT;
        fmi = fli->fli_mountinfo;
        old_state = fmi->fmi_state;
        if (old_state == new_state)
                return 0;

        mutex_enter(&fstrans_lock);
        fmi->fmi_state = new_state;
        pserialize_perform(fstrans_psz);

        /*
         * All threads see the new state now.
         * Wait for transactions invalid at this state to leave.
         */
        error = 0;
        while (! state_change_done(fmi)) {
                error = cv_wait_sig(&fstrans_count_cv, &fstrans_lock);
                if (error) {
                        new_state = fmi->fmi_state = FSTRANS_NORMAL;
                        break;
                }
        }
        if (old_state != new_state) {
                if (old_state == FSTRANS_NORMAL) {
                        KASSERT(fmi->fmi_owner == NULL);
                        fmi->fmi_owner = curlwp;
                }
                if (new_state == FSTRANS_NORMAL) {
                        KASSERT(fmi->fmi_owner == curlwp);
                        fmi->fmi_owner = NULL;
                }
        }
        cv_broadcast(&fstrans_state_cv);
        mutex_exit(&fstrans_lock);

        return error;
}

/*
 * Get current file system state.
 */
enum fstrans_state
fstrans_getstate(struct mount *mp)
{
        struct fstrans_lwp_info *fli;
        struct fstrans_mount_info *fmi;

        KASSERT(mp != dead_rootmount);

        fli = fstrans_get_lwp_info(mp, true);
        KASSERT(fli != NULL);
        fmi = fli->fli_mountinfo;

        return fmi->fmi_state;
}

/*
 * Request a filesystem to suspend all operations.
 */
int
vfs_suspend(struct mount *mp, int nowait)
{
        struct fstrans_lwp_info *fli;
        int error;

        if (mp == dead_rootmount)
                return EOPNOTSUPP;

        fli = fstrans_get_lwp_info(mp, true);
        if (fli == NULL)
                return ENOENT;

        if (nowait) {
                if (!mutex_tryenter(&vfs_suspend_lock))
                        return EWOULDBLOCK;
        } else
                mutex_enter(&vfs_suspend_lock);

        if ((error = VFS_SUSPENDCTL(fli->fli_mount, SUSPEND_SUSPEND)) != 0) {
                mutex_exit(&vfs_suspend_lock);
                return error;
        }

        if ((mp->mnt_iflag & IMNT_GONE) != 0) {
                vfs_resume(mp);
                return ENOENT;
        }

        return 0;
}

/*
 * Request a filesystem to resume all operations.
 */
void
vfs_resume(struct mount *mp)
{
        struct fstrans_lwp_info *fli;

        KASSERT(mp != dead_rootmount);

        fli = fstrans_get_lwp_info(mp, false);
        mp = fli->fli_mount;

        VFS_SUSPENDCTL(mp, SUSPEND_RESUME);
        mutex_exit(&vfs_suspend_lock);
}


/*
 * True, if no thread is running a cow handler.
 */
static bool
cow_state_change_done(const struct fstrans_mount_info *fmi)
{
        struct fstrans_lwp_info *fli;

        KASSERT(mutex_owned(&fstrans_lock));
        KASSERT(fmi->fmi_cow_change);

        LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
                if (fli->fli_mount != fmi->fmi_mount)
                        continue;
                if (fli->fli_cow_cnt == 0)
                        continue;

                return false;
        }

        return true;
}

/*
 * Prepare for changing this mounts cow list.
 * Returns with fstrans_lock locked.
 */
static void
cow_change_enter(struct fstrans_mount_info *fmi)
{

        mutex_enter(&fstrans_lock);

        /*
         * Wait for other threads changing the list.
         */
        while (fmi->fmi_cow_change)
                cv_wait(&fstrans_state_cv, &fstrans_lock);

        /*
         * Wait until all threads are aware of a state change.
         */
        fmi->fmi_cow_change = true;
        pserialize_perform(fstrans_psz);

        while (! cow_state_change_done(fmi))
                cv_wait(&fstrans_count_cv, &fstrans_lock);
}

/*
 * Done changing this mounts cow list.
 */
static void
cow_change_done(struct fstrans_mount_info *fmi)
{

        KASSERT(mutex_owned(&fstrans_lock));

        fmi->fmi_cow_change = false;
        pserialize_perform(fstrans_psz);

        cv_broadcast(&fstrans_state_cv);

        mutex_exit(&fstrans_lock);
}

/*
 * Add a handler to this mount.
 */
int
fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
    void *arg)
{
        struct fstrans_mount_info *fmi;
        struct fscow_handler *newch;

        KASSERT(mp != dead_rootmount);

        mutex_enter(&fstrans_lock);
        fmi = fstrans_mount_get(mp);
        KASSERT(fmi != NULL);
        fmi->fmi_ref_cnt += 1;
        mutex_exit(&fstrans_lock);

        newch = kmem_alloc(sizeof(*newch), KM_SLEEP);
        newch->ch_func = func;
        newch->ch_arg = arg;

        cow_change_enter(fmi);
        LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list);
        cow_change_done(fmi);

        return 0;
}

/*
 * Remove a handler from this mount.
 */
int
fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool),
    void *arg)
{
        struct fstrans_mount_info *fmi;
        struct fscow_handler *hp = NULL;

        KASSERT(mp != dead_rootmount);

        mutex_enter(&fstrans_lock);
        fmi = fstrans_mount_get(mp);
        KASSERT(fmi != NULL);
        mutex_exit(&fstrans_lock);

        cow_change_enter(fmi);
        LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
                if (hp->ch_func == func && hp->ch_arg == arg)
                        break;
        if (hp != NULL) {
                LIST_REMOVE(hp, ch_list);
                kmem_free(hp, sizeof(*hp));
        }
        fstrans_mount_dtor(fmi);
        cow_change_done(fmi);

        return hp ? 0 : EINVAL;
}

/*
 * Check for need to copy block that is about to be written.
 */
int
fscow_run(struct buf *bp, bool data_valid)
{
        int error, s;
        struct mount *mp;
        struct fstrans_lwp_info *fli;
        struct fstrans_mount_info *fmi;
        struct fscow_handler *hp;

        /*
         * First check if we need run the copy-on-write handler.
         */
        if ((bp->b_flags & B_COWDONE))
                return 0;
        if (bp->b_vp == NULL) {
                bp->b_flags |= B_COWDONE;
                return 0;
        }
        if (bp->b_vp->v_type == VBLK)
                mp = spec_node_getmountedfs(bp->b_vp);
        else
                mp = bp->b_vp->v_mount;
        if (mp == NULL || mp == dead_rootmount) {
                bp->b_flags |= B_COWDONE;
                return 0;
        }

        fli = fstrans_get_lwp_info(mp, true);
        KASSERT(fli != NULL);
        fmi = fli->fli_mountinfo;

        /*
         * On non-recursed run check if other threads
         * want to change the list.
         */
        if (fli->fli_cow_cnt == 0) {
                s = pserialize_read_enter();
                if (__predict_false(fmi->fmi_cow_change)) {
                        pserialize_read_exit(s);
                        mutex_enter(&fstrans_lock);
                        while (fmi->fmi_cow_change)
                                cv_wait(&fstrans_state_cv, &fstrans_lock);
                        fli->fli_cow_cnt = 1;
                        mutex_exit(&fstrans_lock);
                } else {
                        fli->fli_cow_cnt = 1;
                        pserialize_read_exit(s);
                }
        } else
                fli->fli_cow_cnt += 1;

        /*
         * Run all copy-on-write handlers, stop on error.
         */
        error = 0;
        LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
                if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
                        break;
         if (error == 0)
                 bp->b_flags |= B_COWDONE;

        /*
         * Check if other threads want to change the list.
         */
        if (fli->fli_cow_cnt > 1) {
                fli->fli_cow_cnt -= 1;
        } else {
                s = pserialize_read_enter();
                if (__predict_false(fmi->fmi_cow_change)) {
                        pserialize_read_exit(s);
                        mutex_enter(&fstrans_lock);
                        fli->fli_cow_cnt = 0;
                        cv_signal(&fstrans_count_cv);
                        mutex_exit(&fstrans_lock);
                } else {
                        fli->fli_cow_cnt = 0;
                        pserialize_read_exit(s);
                }
        }

        return error;
}

#if defined(DDB)
void fstrans_dump(int);

static void
fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
{
        char prefix[9];
        struct fstrans_lwp_info *fli;

        snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
        LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
                if (fli->fli_self != l)
                        continue;
                if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
                        if (! verbose)
                                continue;
                }
                printf("%-8s", prefix);
                if (verbose)
                        printf(" @%p", fli);
                if (fli->fli_mount == dead_rootmount)
                        printf(" <dead>");
                else if (fli->fli_mount != NULL)
                        printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
                else
                        printf(" NULL");
                if (fli->fli_alias != NULL) {
                        struct mount *amp = fli->fli_alias->fli_mount;

                        printf(" alias");
                        if (verbose)
                                printf(" @%p", fli->fli_alias);
                        if (amp == NULL)
                                printf(" NULL");
                        else
                                printf(" (%s)", amp->mnt_stat.f_mntonname);
                }
                if (fli->fli_mountinfo && fli->fli_mountinfo->fmi_gone)
                        printf(" gone");
                if (fli->fli_trans_cnt == 0) {
                        printf(" -");
                } else {
                        switch (fli->fli_lock_type) {
                        case FSTRANS_LAZY:
                                printf(" lazy");
                                break;
                        case FSTRANS_SHARED:
                                printf(" shared");
                                break;
                        default:
                                printf(" %#x", fli->fli_lock_type);
                                break;
                        }
                }
                printf(" %d cow %d alias %d\n",
                    fli->fli_trans_cnt, fli->fli_cow_cnt, fli->fli_alias_cnt);
                prefix[0] = '\0';
        }
}

static void
fstrans_print_mount(struct mount *mp, int verbose)
{
        uint32_t indx;
        struct fstrans_mount_info *fmi;

        indx = fstrans_mount_hash(mp);
        SLIST_FOREACH(fmi, &fstrans_mount_hashtab[indx], fmi_hash)
                if (fmi->fmi_mount == mp)
                        break;

        if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
                return;

        printf("%-16s ", mp->mnt_stat.f_mntonname);
        if (fmi == NULL) {
                printf("(null)\n");
                return;
        }
        printf("owner %p ", fmi->fmi_owner);
        switch (fmi->fmi_state) {
        case FSTRANS_NORMAL:
                printf("state normal\n");
                break;
        case FSTRANS_SUSPENDING:
                printf("state suspending\n");
                break;
        case FSTRANS_SUSPENDED:
                printf("state suspended\n");
                break;
        default:
                printf("state %#x\n", fmi->fmi_state);
                break;
        }
}

void
fstrans_dump(int full)
{
        const struct proclist_desc *pd;
        struct proc *p;
        struct lwp *l;
        struct mount *mp;

        printf("Fstrans locks by lwp:\n");
        for (pd = proclists; pd->pd_list != NULL; pd++)
                PROCLIST_FOREACH(p, pd->pd_list)
                        LIST_FOREACH(l, &p->p_lwps, l_sibling)
                                fstrans_print_lwp(p, l, full == 1);

        printf("Fstrans state by mount:\n");
        for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp))
                fstrans_print_mount(mp, full == 1);
}
#endif /* defined(DDB) */





























































































































































































































































































































    3 


    2 






    4 


    2 






    1 





























    1 












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
/*        $NetBSD: ext2fs_vfsops.c,v 1.221 2022/05/22 11:27:36 andvar Exp $        */

/*
 * Copyright (c) 1989, 1991, 1993, 1994
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ffs_vfsops.c        8.14 (Berkeley) 11/28/94
 * Modified for ext2fs by Manuel Bouyer.
 */

/*
 * Copyright (c) 1997 Manuel Bouyer.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *        @(#)ffs_vfsops.c        8.14 (Berkeley) 11/28/94
 * Modified for ext2fs by Manuel Bouyer.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ext2fs_vfsops.c,v 1.221 2022/05/22 11:27:36 andvar Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/namei.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/socket.h>
#include <sys/mount.h>
#include <sys/buf.h>
#include <sys/device.h>
#include <sys/file.h>
#include <sys/disklabel.h>
#include <sys/ioctl.h>
#include <sys/errno.h>
#include <sys/pool.h>
#include <sys/lock.h>
#include <sys/conf.h>
#include <sys/kauth.h>
#include <sys/module.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>

#include <ufs/ufs/quota.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/dir.h>
#include <ufs/ufs/ufs_extern.h>

#include <ufs/ext2fs/ext2fs.h>
#include <ufs/ext2fs/ext2fs_dir.h>
#include <ufs/ext2fs/ext2fs_extern.h>

MODULE(MODULE_CLASS_VFS, ext2fs, "ufs");

int ext2fs_sbupdate(struct ufsmount *, int);
static int ext2fs_sbfill(struct m_ext2fs *, int);

extern const struct vnodeopv_desc ext2fs_vnodeop_opv_desc;
extern const struct vnodeopv_desc ext2fs_specop_opv_desc;
extern const struct vnodeopv_desc ext2fs_fifoop_opv_desc;

const struct vnodeopv_desc * const ext2fs_vnodeopv_descs[] = {
        &ext2fs_vnodeop_opv_desc,
        &ext2fs_specop_opv_desc,
        &ext2fs_fifoop_opv_desc,
        NULL,
};

struct vfsops ext2fs_vfsops = {
        .vfs_name = MOUNT_EXT2FS,
        .vfs_min_mount_data = sizeof (struct ufs_args),
        .vfs_mount = ext2fs_mount,
        .vfs_start = ufs_start,
        .vfs_unmount = ext2fs_unmount,
        .vfs_root = ufs_root,
        .vfs_quotactl = ufs_quotactl,
        .vfs_statvfs = ext2fs_statvfs,
        .vfs_sync = ext2fs_sync,
        .vfs_vget = ufs_vget,
        .vfs_loadvnode = ext2fs_loadvnode,
        .vfs_newvnode = ext2fs_newvnode,
        .vfs_fhtovp = ext2fs_fhtovp,
        .vfs_vptofh = ext2fs_vptofh,
        .vfs_init = ext2fs_init,
        .vfs_reinit = ext2fs_reinit,
        .vfs_done = ext2fs_done,
        .vfs_mountroot = ext2fs_mountroot,
        .vfs_snapshot = (void *)eopnotsupp,
        .vfs_extattrctl = vfs_stdextattrctl,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_renamelock_enter = genfs_renamelock_enter,
        .vfs_renamelock_exit = genfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = ext2fs_vnodeopv_descs
};

static const struct genfs_ops ext2fs_genfsops = {
        .gop_size = genfs_size,
        .gop_alloc = ext2fs_gop_alloc,
        .gop_write = genfs_gop_write,
        .gop_markupdate = ufs_gop_markupdate,
        .gop_putrange = genfs_gop_putrange,
};

static const struct ufs_ops ext2fs_ufsops = {
        .uo_itimes = ext2fs_itimes,
        .uo_update = ext2fs_update,
        .uo_bufrd = ext2fs_bufrd,
        .uo_bufwr = ext2fs_bufwr,
};

/* Fill in the inode uid/gid from ext2 halves.  */
void
ext2fs_set_inode_guid(struct inode *ip)
{

        ip->i_gid = ip->i_e2fs_gid;
        ip->i_uid = ip->i_e2fs_uid;
        if (ip->i_e2fs->e2fs.e2fs_rev > E2FS_REV0) {
                ip->i_gid |= ip->i_e2fs_gid_high << 16;
                ip->i_uid |= ip->i_e2fs_uid_high << 16;
        }
}

SYSCTL_SETUP(ext2fs_sysctl_setup, "ext2fs sysctl")
{

                sysctl_createv(clog, 0, NULL, NULL,
                               CTLFLAG_PERMANENT,
                               CTLTYPE_NODE, "ext2fs",
                               SYSCTL_DESCR("Linux EXT2FS file system"),
                               NULL, 0, NULL, 0,
                               CTL_VFS, 17, CTL_EOL);
                /*
                 * XXX the "17" above could be dynamic, thereby eliminating
                 * one more instance of the "number to vfs" mapping problem,
                 * but "17" is the order as taken from sys/mount.h
                 */
}

static int
ext2fs_modcmd(modcmd_t cmd, void *arg)
{
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = vfs_attach(&ext2fs_vfsops);
                if (error != 0)
                        break;
                break;
        case MODULE_CMD_FINI:
                error = vfs_detach(&ext2fs_vfsops);
                if (error != 0)
                        break;
                break;
        default:
                error = ENOTTY;
                break;
        }

        return error;
}

/*
 * XXX Same structure as FFS inodes?  Should we share a common pool?
 */
struct pool ext2fs_inode_pool;

extern u_long ext2gennumber;

void
ext2fs_init(void)
{

        pool_init(&ext2fs_inode_pool, sizeof(struct inode), 0, 0, 0,
            "ext2fsinopl", &pool_allocator_nointr, IPL_NONE);
        ufs_init();
}

void
ext2fs_reinit(void)
{
        ufs_reinit();
}

void
ext2fs_done(void)
{

        ufs_done();
        pool_destroy(&ext2fs_inode_pool);
}

static void
ext2fs_sb_setmountinfo(struct m_ext2fs *fs, struct mount *mp)
{
        (void)strlcpy(fs->e2fs_fsmnt, mp->mnt_stat.f_mntonname,
            sizeof(fs->e2fs_fsmnt));
        if (fs->e2fs_ronly == 0 && fs->e2fs.e2fs_rev > E2FS_REV0) {
                (void)strlcpy(fs->e2fs.e2fs_fsmnt, mp->mnt_stat.f_mntonname,
                    sizeof(fs->e2fs.e2fs_fsmnt));

                fs->e2fs.e2fs_mtime = time_second;
                fs->e2fs.e2fs_mnt_count++;

                fs->e2fs_fmod = 1;
        }
}

/*
 * Called by main() when ext2fs is going to be mounted as root.
 *
 * Name is updated by mount(8) after booting.
 */

int
ext2fs_mountroot(void)
{
        extern struct vnode *rootvp;
        struct m_ext2fs *fs;
        struct mount *mp;
        struct ufsmount *ump;
        int error;

        if (device_class(root_device) != DV_DISK)
                return ENODEV;

        if ((error = vfs_rootmountalloc(MOUNT_EXT2FS, "root_device", &mp))) {
                vrele(rootvp);
                return error;
        }

        if ((error = ext2fs_mountfs(rootvp, mp)) != 0) {
                vfs_unbusy(mp);
                vfs_rele(mp);
                return error;
        }
        mountlist_append(mp);
        ump = VFSTOUFS(mp);
        fs = ump->um_e2fs;
        ext2fs_sb_setmountinfo(fs, mp);
        (void)ext2fs_statvfs(mp, &mp->mnt_stat);
        vfs_unbusy(mp);
        setrootfstime((time_t)fs->e2fs.e2fs_wtime);
        return 0;
}

/*
 * VFS Operations.
 *
 * mount system call
 */
int
ext2fs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct lwp *l = curlwp;
        struct vnode *devvp;
        struct ufs_args *args = data;
        struct ufsmount *ump = NULL;
        struct m_ext2fs *fs;
        int error = 0, flags, update;
        mode_t accessmode;

        if (args == NULL)
                return EINVAL;
        if (*data_len < sizeof *args)
                return EINVAL;

        if (mp->mnt_flag & MNT_GETARGS) {
                ump = VFSTOUFS(mp);
                if (ump == NULL)
                        return EIO;
                memset(args, 0, sizeof *args);
                args->fspec = NULL;
                *data_len = sizeof *args;
                return 0;
        }

        update = mp->mnt_flag & MNT_UPDATE;

        /* Check arguments */
        if (args->fspec != NULL) {
                /*
                 * Look up the name and verify that it's sane.
                 */
                error = namei_simple_user(args->fspec,
                                        NSM_FOLLOW_NOEMULROOT, &devvp);
                if (error != 0)
                        return error;

                if (!update) {
                        /*
                         * Be sure this is a valid block device
                         */
                        if (devvp->v_type != VBLK)
                                error = ENOTBLK;
                        else if (bdevsw_lookup(devvp->v_rdev) == NULL)
                                error = ENXIO;
                } else {
                        /*
                         * Be sure we're still naming the same device
                         * used for our initial mount
                         */
                        ump = VFSTOUFS(mp);
                        if (devvp != ump->um_devvp) {
                                if (devvp->v_rdev != ump->um_devvp->v_rdev)
                                        error = EINVAL;
                                else {
                                        vrele(devvp);
                                        devvp = ump->um_devvp;
                                        vref(devvp);
                                }
                        }
                }
        } else {
                if (!update) {
                        /* New mounts must have a filename for the device */
                        return EINVAL;
                } else {
                        ump = VFSTOUFS(mp);
                        devvp = ump->um_devvp;
                        vref(devvp);
                }
        }

        /*
         * If mount by non-root, then verify that user has necessary
         * permissions on the device.
         *
         * Permission to update a mount is checked higher, so here we presume
         * updating the mount is okay (for example, as far as securelevel goes)
         * which leaves us with the normal check.
         */
        if (error == 0) {
                accessmode = VREAD;
                if (update ?
                    (mp->mnt_iflag & IMNT_WANTRDWR) != 0 :
                    (mp->mnt_flag & MNT_RDONLY) == 0)
                        accessmode |= VWRITE;
                vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
                error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
                    KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp,
                    KAUTH_ARG(accessmode));
                VOP_UNLOCK(devvp);
        }

        if (error) {
                vrele(devvp);
                return error;
        }

        if (!update) {
                int xflags;

                if (mp->mnt_flag & MNT_RDONLY)
                        xflags = FREAD;
                else
                        xflags = FREAD|FWRITE;
                vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
                error = VOP_OPEN(devvp, xflags, FSCRED);
                VOP_UNLOCK(devvp);
                if (error)
                        goto fail;
                error = ext2fs_mountfs(devvp, mp);
                if (error) {
                        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
                        (void)VOP_CLOSE(devvp, xflags, NOCRED);
                        VOP_UNLOCK(devvp);
                        goto fail;
                }

                ump = VFSTOUFS(mp);
                fs = ump->um_e2fs;
        } else {
                /*
                 * Update the mount.
                 */

                /*
                 * The initial mount got a reference on this
                 * device, so drop the one obtained via
                 * namei(), above.
                 */
                vrele(devvp);

                ump = VFSTOUFS(mp);
                fs = ump->um_e2fs;
                if (fs->e2fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
                        /*
                         * Changing from r/w to r/o
                         */
                        flags = WRITECLOSE;
                        if (mp->mnt_flag & MNT_FORCE)
                                flags |= FORCECLOSE;
                        error = ext2fs_flushfiles(mp, flags);
                        if (error == 0 &&
                            ext2fs_cgupdate(ump, MNT_WAIT) == 0 &&
                            (fs->e2fs.e2fs_state & E2FS_ERRORS) == 0) {
                                fs->e2fs.e2fs_state = E2FS_ISCLEAN;
                                (void) ext2fs_sbupdate(ump, MNT_WAIT);
                        }
                        if (error)
                                return error;
                        fs->e2fs_ronly = 1;
                }

                if (mp->mnt_flag & MNT_RELOAD) {
                        error = ext2fs_reload(mp, l->l_cred, l);
                        if (error)
                                return error;
                }

                if (fs->e2fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) {
                        /*
                         * Changing from read-only to read/write
                         */
                        fs->e2fs_ronly = 0;
                        if (fs->e2fs.e2fs_state == E2FS_ISCLEAN)
                                fs->e2fs.e2fs_state = 0;
                        else
                                fs->e2fs.e2fs_state = E2FS_ERRORS;
                        fs->e2fs_fmod = 1;
                }
                if (args->fspec == NULL)
                        return 0;
        }

        error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
            UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
        if (error == 0)
                ext2fs_sb_setmountinfo(fs, mp);

        if (fs->e2fs_fmod != 0) {        /* XXX */
                fs->e2fs_fmod = 0;
                if (fs->e2fs.e2fs_state == 0)
                        fs->e2fs.e2fs_wtime = time_second;
                else
                        printf("%s: file system not clean; please fsck(8)\n",
                                mp->mnt_stat.f_mntfromname);
                (void) ext2fs_cgupdate(ump, MNT_WAIT);
        }
        return error;

fail:
        vrele(devvp);
        return error;
}

/*
 * Sanity check the disk vnode content, and copy it over to inode structure.
 */
static int
ext2fs_loadvnode_content(struct m_ext2fs *fs, ino_t ino, struct buf *bp, struct inode *ip)
{
        struct ext2fs_dinode *din;
        int error = 0;

        din = (struct ext2fs_dinode *)((char *)bp->b_data + (ino_to_fsbo(fs, ino) * EXT2_DINODE_SIZE(fs)));

        /* sanity checks - inode data NOT byteswapped at this point */
        if (EXT2_DINODE_FITS(din, e2di_extra_isize, EXT2_DINODE_SIZE(fs))
            && (EXT2_DINODE_SIZE(fs) - EXT2_REV0_DINODE_SIZE) < fs2h16(din->e2di_extra_isize))
        {
                printf("ext2fs: inode %"PRIu64" bad extra_isize %u",
                        ino, din->e2di_extra_isize);
                error = EINVAL;
                goto bad;
        }

        /* everything alright, proceed with copy */
        if (ip->i_din.e2fs_din == NULL)
                ip->i_din.e2fs_din = kmem_alloc(EXT2_DINODE_SIZE(fs), KM_SLEEP);

        e2fs_iload(din, ip->i_din.e2fs_din, EXT2_DINODE_SIZE(fs));

        ext2fs_set_inode_guid(ip);

    bad:
        return error;
}

/*
 * Reload all incore data for a filesystem (used after running fsck on
 * the root filesystem and finding things to fix). The filesystem must
 * be mounted read-only.
 *
 * Things to do to update the mount:
 *        1) invalidate all cached meta-data.
 *        2) re-read superblock from disk.
 *        3) re-read summary information from disk.
 *        4) invalidate all inactive vnodes.
 *        5) invalidate all cached file data.
 *        6) re-read inode data for all active vnodes.
 */
int
ext2fs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
{
        struct vnode *vp, *devvp;
        struct inode *ip;
        struct buf *bp;
        struct m_ext2fs *fs;
        struct ext2fs *newfs;
        int i, error;
        struct ufsmount *ump;
        struct vnode_iterator *marker;

        if ((mp->mnt_flag & MNT_RDONLY) == 0)
                return EINVAL;

        ump = VFSTOUFS(mp);
        /*
         * Step 1: invalidate all cached meta-data.
         */
        devvp = ump->um_devvp;
        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
        error = vinvalbuf(devvp, 0, cred, l, 0, 0);
        VOP_UNLOCK(devvp);
        if (error)
                panic("ext2fs_reload: dirty1");

        fs = ump->um_e2fs;
        /*
         * Step 2: re-read superblock from disk. Copy in new superblock, and compute
         * in-memory values.
         */
        error = bread(devvp, SBLOCK, SBSIZE, 0, &bp);
        if (error)
                return error;
        newfs = (struct ext2fs *)bp->b_data;
        e2fs_sbload(newfs, &fs->e2fs);

        brelse(bp, 0);

        error = ext2fs_sbfill(fs, (mp->mnt_flag & MNT_RDONLY) != 0);
        if (error)
                return error;

        /*
         * Step 3: re-read summary information from disk.
         */
        for (i = 0; i < fs->e2fs_ngdb; i++) {
                error = bread(devvp ,
                    EXT2_FSBTODB(fs, fs->e2fs.e2fs_first_dblock +
                    1 /* superblock */ + i),
                    fs->e2fs_bsize, 0, &bp);
                if (error) {
                        return error;
                }
                e2fs_cgload((struct ext2_gd *)bp->b_data,
                    &fs->e2fs_gd[i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
                    fs->e2fs_bsize);
                brelse(bp, 0);
        }

        vfs_vnode_iterator_init(mp, &marker);
        while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) {
                /*
                 * Step 4: invalidate all inactive vnodes.
                 */
                if (vrecycle(vp))
                        continue;
                /*
                 * Step 5: invalidate all cached file data.
                 */
                if (vn_lock(vp, LK_EXCLUSIVE)) {
                        vrele(vp);
                        continue;
                }
                if (vinvalbuf(vp, 0, cred, l, 0, 0))
                        panic("ext2fs_reload: dirty2");
                /*
                 * Step 6: re-read inode data for all active vnodes.
                 */
                ip = VTOI(vp);
                error = bread(devvp, EXT2_FSBTODB(fs, ino_to_fsba(fs, ip->i_number)),
                    (int)fs->e2fs_bsize, 0, &bp);
                if (error) {
                        vput(vp);
                        break;
                }
                error = ext2fs_loadvnode_content(fs, ip->i_number, bp, ip);
                brelse(bp, 0);
                if (error) {
                        vput(vp);
                        break;
                }

                vput(vp);
        }
        vfs_vnode_iterator_destroy(marker);
        return error;
}

/*
 * Common code for mount and mountroot
 */
int
ext2fs_mountfs(struct vnode *devvp, struct mount *mp)
{
        struct lwp *l = curlwp;
        struct ufsmount *ump;
        struct buf *bp;
        struct ext2fs *fs;
        struct m_ext2fs *m_fs;
        dev_t dev;
        int error, i, ronly;
        kauth_cred_t cred;

        dev = devvp->v_rdev;
        cred = l->l_cred;

        /* Flush out any old buffers remaining from a previous use. */
        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
        error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0);
        VOP_UNLOCK(devvp);
        if (error)
                return error;

        ronly = (mp->mnt_flag & MNT_RDONLY) != 0;

        bp = NULL;
        ump = NULL;

        /* Read the superblock from disk, and swap it directly. */
        error = bread(devvp, SBLOCK, SBSIZE, 0, &bp);
        if (error)
                goto out;
        fs = (struct ext2fs *)bp->b_data;
        m_fs = kmem_zalloc(sizeof(*m_fs), KM_SLEEP);
        e2fs_sbload(fs, &m_fs->e2fs);

        brelse(bp, 0);
        bp = NULL;

        /* Once swapped, validate and fill in the superblock. */
        error = ext2fs_sbfill(m_fs, ronly);
        if (error) {
                kmem_free(m_fs, sizeof(*m_fs));
                goto out;
        }
        m_fs->e2fs_ronly = ronly;

        ump = kmem_zalloc(sizeof(*ump), KM_SLEEP);
        ump->um_fstype = UFS1;
        ump->um_ops = &ext2fs_ufsops;
        ump->um_e2fs = m_fs;

        if (ronly == 0) {
                if (m_fs->e2fs.e2fs_state == E2FS_ISCLEAN)
                        m_fs->e2fs.e2fs_state = 0;
                else
                        m_fs->e2fs.e2fs_state = E2FS_ERRORS;
                m_fs->e2fs_fmod = 1;
        }

        /* XXX: should be added in ext2fs_sbfill()? */
        m_fs->e2fs_gd = kmem_alloc(m_fs->e2fs_ngdb * m_fs->e2fs_bsize, KM_SLEEP);
        for (i = 0; i < m_fs->e2fs_ngdb; i++) {
                error = bread(devvp,
                    EXT2_FSBTODB(m_fs, m_fs->e2fs.e2fs_first_dblock +
                    1 /* superblock */ + i),
                    m_fs->e2fs_bsize, 0, &bp);
                if (error) {
                        kmem_free(m_fs->e2fs_gd,
                            m_fs->e2fs_ngdb * m_fs->e2fs_bsize);
                        goto out;
                }
                e2fs_cgload((struct ext2_gd *)bp->b_data,
                    &m_fs->e2fs_gd[
                        i * m_fs->e2fs_bsize / sizeof(struct ext2_gd)],
                    m_fs->e2fs_bsize);
                brelse(bp, 0);
                bp = NULL;
        }

        error = ext2fs_cg_verify_and_initialize(devvp, m_fs, ronly);
        if (error) {
                kmem_free(m_fs->e2fs_gd, m_fs->e2fs_ngdb * m_fs->e2fs_bsize);
                goto out;
        }

        mp->mnt_data = ump;
        mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
        mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_EXT2FS);
        mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
        mp->mnt_stat.f_namemax = EXT2FS_MAXNAMLEN;
        mp->mnt_flag |= MNT_LOCAL;
        mp->mnt_dev_bshift = DEV_BSHIFT;        /* XXX */
        mp->mnt_fs_bshift = m_fs->e2fs_bshift;
        mp->mnt_iflag |= IMNT_DTYPE | IMNT_SHRLOOKUP;
        ump->um_flags = 0;
        ump->um_mountp = mp;
        ump->um_dev = dev;
        ump->um_devvp = devvp;
        ump->um_nindir = EXT2_NINDIR(m_fs);
        ump->um_lognindir = ffs(EXT2_NINDIR(m_fs)) - 1;
        ump->um_bptrtodb = m_fs->e2fs_fsbtodb;
        ump->um_seqinc = 1; /* no frags */
        ump->um_maxsymlinklen = EXT2_MAXSYMLINKLEN;
        ump->um_dirblksiz = m_fs->e2fs_bsize;
        ump->um_maxfilesize = ((uint64_t)0x80000000 * m_fs->e2fs_bsize - 1);
        spec_node_setmountedfs(devvp, mp);
        return 0;

out:
        if (bp != NULL)
                brelse(bp, 0);
        if (ump) {
                kmem_free(ump->um_e2fs, sizeof(*m_fs));
                kmem_free(ump, sizeof(*ump));
                mp->mnt_data = NULL;
        }
        return error;
}

/*
 * unmount system call
 */
int
ext2fs_unmount(struct mount *mp, int mntflags)
{
        struct ufsmount *ump;
        struct m_ext2fs *fs;
        int error, flags;

        flags = 0;
        if (mntflags & MNT_FORCE)
                flags |= FORCECLOSE;
        if ((error = ext2fs_flushfiles(mp, flags)) != 0)
                return error;
        ump = VFSTOUFS(mp);
        fs = ump->um_e2fs;
        if (fs->e2fs_ronly == 0 &&
                ext2fs_cgupdate(ump, MNT_WAIT) == 0 &&
                (fs->e2fs.e2fs_state & E2FS_ERRORS) == 0) {
                fs->e2fs.e2fs_state = E2FS_ISCLEAN;
                (void) ext2fs_sbupdate(ump, MNT_WAIT);
        }
        if (ump->um_devvp->v_type != VBAD)
                spec_node_setmountedfs(ump->um_devvp, NULL);
        vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_CLOSE(ump->um_devvp, fs->e2fs_ronly ? FREAD : FREAD|FWRITE,
            NOCRED);
        vput(ump->um_devvp);
        kmem_free(fs->e2fs_gd, fs->e2fs_ngdb * fs->e2fs_bsize);
        kmem_free(fs, sizeof(*fs));
        kmem_free(ump, sizeof(*ump));
        mp->mnt_data = NULL;
        mp->mnt_flag &= ~MNT_LOCAL;
        return error;
}

/*
 * Flush out all the files in a filesystem.
 */
int
ext2fs_flushfiles(struct mount *mp, int flags)
{
        extern int doforce;
        int error;

        if (!doforce)
                flags &= ~FORCECLOSE;
        error = vflush(mp, NULLVP, flags);
        return error;
}

/*
 * Get file system statistics.
 */
int
ext2fs_statvfs(struct mount *mp, struct statvfs *sbp)
{
        struct ufsmount *ump;
        struct m_ext2fs *fs;
        uint32_t overhead, overhead_per_group, ngdb;
        int i, ngroups;

        ump = VFSTOUFS(mp);
        fs = ump->um_e2fs;
        if (fs->e2fs.e2fs_magic != E2FS_MAGIC)
                panic("ext2fs_statvfs");

        /*
         * Compute the overhead (FS structures)
         */
        overhead_per_group =
            1 /* block bitmap */ +
            1 /* inode bitmap */ +
            fs->e2fs_itpg;
        overhead = fs->e2fs.e2fs_first_dblock +
            fs->e2fs_ncg * overhead_per_group;
        if (EXT2F_HAS_COMPAT_FEATURE(fs, EXT2F_COMPAT_SPARSESUPER2)) {
                /*
                 * Superblock and group descriptions is in group zero,
                 * then optionally 0, 1 or 2 extra copies.
                 */
                ngroups = 1
                        + (fs->e2fs.e4fs_backup_bgs[0] ? 1 : 0)
                        + (fs->e2fs.e4fs_backup_bgs[1] ? 1 : 0);
        } else if (EXT2F_HAS_ROCOMPAT_FEATURE(fs, EXT2F_ROCOMPAT_SPARSESUPER)) {
                for (i = 0, ngroups = 0; i < fs->e2fs_ncg; i++) {
                        if (cg_has_sb(i))
                                ngroups++;
                }
        } else {
                ngroups = fs->e2fs_ncg;
        }
        ngdb = fs->e2fs_ngdb;
        if (EXT2F_HAS_COMPAT_FEATURE(fs, EXT2F_COMPAT_RESIZE))
                ngdb += fs->e2fs.e2fs_reserved_ngdb;
        overhead += ngroups * (1 /* superblock */ + ngdb);

        sbp->f_bsize = fs->e2fs_bsize;
        sbp->f_frsize = MINBSIZE << fs->e2fs.e2fs_fsize;
        sbp->f_iosize = fs->e2fs_bsize;
        sbp->f_blocks = fs->e2fs.e2fs_bcount - overhead;
        sbp->f_bfree = fs->e2fs.e2fs_fbcount;
        sbp->f_bresvd = fs->e2fs.e2fs_rbcount;
        if (sbp->f_bfree > sbp->f_bresvd)
                sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd;
        else
                sbp->f_bavail = 0;
        sbp->f_files =  fs->e2fs.e2fs_icount;
        sbp->f_ffree = fs->e2fs.e2fs_ficount;
        sbp->f_favail = fs->e2fs.e2fs_ficount;
        sbp->f_fresvd = 0;
        copy_statvfs_info(sbp, mp);
        return 0;
}

static bool
ext2fs_sync_selector(void *cl, struct vnode *vp)
{
        struct inode *ip;

        KASSERT(mutex_owned(vp->v_interlock));

        ip = VTOI(vp);
        /*
         * Skip the vnode/inode if inaccessible.
         */
        if (ip == NULL || vp->v_type == VNON)
                return false;

        if (((ip->i_flag &
              (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
             LIST_EMPTY(&vp->v_dirtyblkhd) &&
             (vp->v_iflag & VI_ONWORKLST) == 0))
                return false;
        return true;
}

/*
 * Go through the disk queues to initiate sandbagged IO;
 * go through the inodes to write those that have been modified;
 * initiate the writing of the super block if it has been modified.
 *
 * Note: we are always called with the filesystem marked `MPBUSY'.
 */
int
ext2fs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
{
        struct vnode *vp;
        struct ufsmount *ump = VFSTOUFS(mp);
        struct m_ext2fs *fs;
        struct vnode_iterator *marker;
        int error, allerror = 0;

        fs = ump->um_e2fs;
        if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) {        /* XXX */
                printf("fs = %s\n", fs->e2fs_fsmnt);
                panic("update: rofs mod");
        }

        /*
         * Write back each (modified) inode.
         */
        vfs_vnode_iterator_init(mp, &marker);
        while ((vp = vfs_vnode_iterator_next(marker, ext2fs_sync_selector,
            NULL)))
        {
                error = vn_lock(vp, LK_EXCLUSIVE);
                if (error) {
                        vrele(vp);
                        continue;
                }
                if (vp->v_type == VREG && waitfor == MNT_LAZY)
                        error = ext2fs_update(vp, NULL, NULL, 0);
                else
                        error = VOP_FSYNC(vp, cred,
                            waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0);
                if (error)
                        allerror = error;
                vput(vp);
        }
        vfs_vnode_iterator_destroy(marker);
        /*
         * Force stale file system control information to be flushed.
         */
        if (waitfor != MNT_LAZY) {
                vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
                if ((error = VOP_FSYNC(ump->um_devvp, cred,
                    waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0)) != 0)
                        allerror = error;
                VOP_UNLOCK(ump->um_devvp);
        }
        /*
         * Write back modified superblock.
         */
        if (fs->e2fs_fmod != 0) {
                fs->e2fs_fmod = 0;
                fs->e2fs.e2fs_wtime = time_second;
                if ((error = ext2fs_cgupdate(ump, waitfor)))
                        allerror = error;
        }
        return allerror;
}

/*
 * Load inode from disk and initialize vnode.
 */
static int
ext2fs_init_vnode(struct ufsmount *ump, struct vnode *vp, ino_t ino)
{
        struct m_ext2fs *fs;
        struct inode *ip;
        struct buf *bp;
        int error;

        fs = ump->um_e2fs;

        /* Read in the disk contents for the inode, copy into the inode. */
        error = bread(ump->um_devvp, EXT2_FSBTODB(fs, ino_to_fsba(fs, ino)),
            (int)fs->e2fs_bsize, 0, &bp);
        if (error)
                return error;

        /* Allocate and initialize inode. */
        ip = pool_get(&ext2fs_inode_pool, PR_WAITOK);
        memset(ip, 0, sizeof(struct inode));
        ip->i_vnode = vp;
        ip->i_ump = ump;
        ip->i_e2fs = fs;
        ip->i_dev = ump->um_dev;
        ip->i_number = ino;
        ip->i_e2fs_last_lblk = 0;
        ip->i_e2fs_last_blk = 0;

        error = ext2fs_loadvnode_content(fs, ino, bp, ip);
        brelse(bp, 0);
        if (error) {
                pool_put(&ext2fs_inode_pool, ip);
                return error;
        }

        /* If the inode was deleted, reset all fields */
        if (ip->i_e2fs_dtime != 0) {
                ip->i_e2fs_mode = 0;
                (void)ext2fs_setsize(ip, 0);
                (void)ext2fs_setnblock(ip, 0);
                memset(ip->i_e2fs_blocks, 0, sizeof(ip->i_e2fs_blocks));
        }

        /* Initialise vnode with this inode. */
        vp->v_tag = VT_EXT2FS;
        vp->v_op = ext2fs_vnodeop_p;
        vp->v_data = ip;

        /* Initialize genfs node. */
        genfs_node_init(vp, &ext2fs_genfsops);

        return 0;
}

/*
 * Read an inode from disk and initialize this vnode / inode pair.
 * Caller assures no other thread will try to load this inode.
 */
int
ext2fs_loadvnode(struct mount *mp, struct vnode *vp,
    const void *key, size_t key_len, const void **new_key)
{
        ino_t ino;
        struct inode *ip;
        struct ufsmount *ump;
        int error;

        KASSERT(key_len == sizeof(ino));
        memcpy(&ino, key, key_len);
        ump = VFSTOUFS(mp);

        error = ext2fs_init_vnode(ump, vp, ino);
        if (error)
                return error;

        ip = VTOI(vp);

        /* Initialize the vnode from the inode. */
        ext2fs_vinit(mp, ext2fs_specop_p, ext2fs_fifoop_p, &vp);

        /* Finish inode initialization. */
        ip->i_devvp = ump->um_devvp;
        vref(ip->i_devvp);

        /*
         * Set up a generation number for this inode if it does not
         * already have one. This should only happen on old filesystems.
         */

        if (ip->i_e2fs_gen == 0) {
                if (++ext2gennumber < (u_long)time_second)
                        ext2gennumber = time_second;
                ip->i_e2fs_gen = ext2gennumber;
                if ((mp->mnt_flag & MNT_RDONLY) == 0)
                        ip->i_flag |= IN_MODIFIED;
        }
        uvm_vnp_setsize(vp, ext2fs_size(ip));
        *new_key = &ip->i_number;
        return 0;
}

/*
 * Create a new inode on disk and initialize this vnode / inode pair.
 */
int
ext2fs_newvnode(struct mount *mp, struct vnode *dvp, struct vnode *vp,
    struct vattr *vap, kauth_cred_t cred, void *extra,
    size_t *key_len, const void **new_key)
{
        ino_t ino;
        struct inode *ip, *pdir;
        struct m_ext2fs *fs;
        struct ufsmount *ump;
        int error, mode;

        KASSERT(dvp->v_mount == mp);
        KASSERT(vap->va_type != VNON);

        *key_len = sizeof(ino);

        pdir = VTOI(dvp);
        fs = pdir->i_e2fs;
        ump = VFSTOUFS(mp);
        mode = MAKEIMODE(vap->va_type, vap->va_mode);

        /* Allocate fresh inode. */
        error = ext2fs_valloc(dvp, mode, cred, &ino);
        if (error)
                return error;

        /* Attach inode to vnode. */
        error = ext2fs_init_vnode(ump, vp, ino);
        if (error) {
                ext2fs_vfree(dvp, ino, mode);
                return error;
        }

        ip = VTOI(vp);

        KASSERT(!E2FS_HAS_GD_CSUM(fs) || (fs->e2fs_gd[ino_to_cg(fs, ino)].ext2bgd_flags & h2fs16(E2FS_BG_INODE_ZEROED)) != 0);

        /* check for already used inode; makes sense only for ZEROED itable */
        if (__predict_false(ip->i_e2fs_mode && ip->i_e2fs_nlink != 0)) {
                printf("mode = 0%o, nlinks %d, inum = %llu, fs = %s\n",
                    ip->i_e2fs_mode, ip->i_e2fs_nlink,
                    (unsigned long long)ip->i_number, fs->e2fs_fsmnt);
                panic("ext2fs_valloc: dup alloc");
        }

        memset(ip->i_din.e2fs_din, 0, EXT2_DINODE_SIZE(fs));

        /*
         * Set up a new generation number for this inode.
         */
        if (++ext2gennumber < time_second)
                ext2gennumber = time_second;
        ip->i_e2fs_gen = ext2gennumber;

        ip->i_uid = kauth_cred_geteuid(cred);
        ip->i_e2fs_uid = ip->i_uid & 0xffff;
        ip->i_e2fs_gid = pdir->i_e2fs_gid;
        if (ip->i_e2fs->e2fs.e2fs_rev > E2FS_REV0) {
                ip->i_e2fs_uid_high = (ip->i_uid >> 16) & 0xffff;
                ip->i_e2fs_gid_high = pdir->i_e2fs_gid_high;
        } else {
                ip->i_e2fs_uid_high = 0;
                ip->i_e2fs_gid_high = 0;
        }
        ip->i_gid = ip->i_e2fs_gid | (ip->i_e2fs_gid_high << 16);
        ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
        ip->i_e2fs_mode = mode;
        vp->v_type = IFTOVT(mode);
        ip->i_e2fs_nlink = 1;

        /* Authorize setting SGID if needed. */
        if (ip->i_e2fs_mode & ISGID) {
                error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY,
                    vp, NULL, genfs_can_chmod(vp, cred, ip->i_uid, ip->i_gid,
                    mode));
                if (error)
                        ip->i_e2fs_mode &= ~ISGID;
        }

        /* Initialize extra_isize according to what is set in superblock */
        if (EXT2F_HAS_ROCOMPAT_FEATURE(ip->i_e2fs, EXT2F_ROCOMPAT_EXTRA_ISIZE)
            && EXT2_DINODE_SIZE(ip->i_e2fs) > EXT2_REV0_DINODE_SIZE) {
                ip->i_din.e2fs_din->e2di_extra_isize = ip->i_e2fs->e2fs.e4fs_want_extra_isize;
        }

        /* Set create time if possible */
        if (EXT2_DINODE_FITS(ip->i_din.e2fs_din, e2di_crtime, EXT2_DINODE_SIZE(ip->i_e2fs))) {
                struct timespec now;
                vfs_timestamp(&now);
                EXT2_DINODE_TIME_SET(&now, ip->i_din.e2fs_din, e2di_crtime, EXT2_DINODE_SIZE(ip->i_e2fs));
        }

        /* Initialize the vnode from the inode. */
        ext2fs_vinit(mp, ext2fs_specop_p, ext2fs_fifoop_p, &vp);

        /* Finish inode initialization. */
        ip->i_devvp = ump->um_devvp;
        vref(ip->i_devvp);

        uvm_vnp_setsize(vp, ext2fs_size(ip));
        *new_key = &ip->i_number;
        return 0;
}

/*
 * File handle to vnode
 *
 * Have to be really careful about stale file handles:
 * - check that the inode number is valid
 * - call ext2fs_vget() to get the locked inode
 * - check for an unallocated inode (i_mode == 0)
 */
int
ext2fs_fhtovp(struct mount *mp, struct fid *fhp, int lktype, struct vnode **vpp)
{
        struct inode *ip;
        struct vnode *nvp;
        int error;
        struct ufid ufh;
        struct m_ext2fs *fs;

        if (fhp->fid_len != sizeof(struct ufid))
                return EINVAL;

        memcpy(&ufh, fhp, sizeof(struct ufid));
        fs = VFSTOUFS(mp)->um_e2fs;
        if ((ufh.ufid_ino < EXT2_FIRSTINO && ufh.ufid_ino != EXT2_ROOTINO) ||
                ufh.ufid_ino >= fs->e2fs_ncg * fs->e2fs.e2fs_ipg)
                return ESTALE;

        if ((error = VFS_VGET(mp, ufh.ufid_ino, lktype, &nvp)) != 0) {
                *vpp = NULLVP;
                return error;
        }
        ip = VTOI(nvp);
        if (ip->i_e2fs_mode == 0 || ip->i_e2fs_dtime != 0 ||
                ip->i_e2fs_gen != ufh.ufid_gen) {
                vput(nvp);
                *vpp = NULLVP;
                return ESTALE;
        }
        *vpp = nvp;
        return 0;
}

/*
 * Vnode pointer to File handle
 */
/* ARGSUSED */
int
ext2fs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
{
        struct inode *ip;
        struct ufid ufh;

        if (*fh_size < sizeof(struct ufid)) {
                *fh_size = sizeof(struct ufid);
                return E2BIG;
        }
        *fh_size = sizeof(struct ufid);

        ip = VTOI(vp);
        memset(&ufh, 0, sizeof(ufh));
        ufh.ufid_len = sizeof(struct ufid);
        ufh.ufid_ino = ip->i_number;
        ufh.ufid_gen = ip->i_e2fs_gen;
        memcpy(fhp, &ufh, sizeof(ufh));
        return 0;
}

/*
 * Write a superblock and associated information back to disk.
 */
int
ext2fs_sbupdate(struct ufsmount *mp, int waitfor)
{
        struct m_ext2fs *fs = mp->um_e2fs;
        struct buf *bp;
        int error = 0;

        bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0);
        e2fs_sbsave(&fs->e2fs, (struct ext2fs*)bp->b_data);
        if (waitfor == MNT_WAIT)
                error = bwrite(bp);
        else
                bawrite(bp);
        return error;
}

int
ext2fs_cgupdate(struct ufsmount *mp, int waitfor)
{
        struct m_ext2fs *fs = mp->um_e2fs;
        struct buf *bp;
        int i, error = 0, allerror = 0;

        allerror = ext2fs_sbupdate(mp, waitfor);
        for (i = 0; i < fs->e2fs_ngdb; i++) {
                bp = getblk(mp->um_devvp, EXT2_FSBTODB(fs,
                    fs->e2fs.e2fs_first_dblock +
                    1 /* superblock */ + i), fs->e2fs_bsize, 0, 0);
                e2fs_cgsave(&fs->e2fs_gd[
                    i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
                    (struct ext2_gd *)bp->b_data, fs->e2fs_bsize);
                if (waitfor == MNT_WAIT)
                        error = bwrite(bp);
                else
                        bawrite(bp);
        }

        if (!allerror && error)
                allerror = error;
        return allerror;
}

/*
 * Fill in the m_fs structure, and validate the fields of the superblock.
 * NOTE: here, the superblock is already swapped.
 */
static int
ext2fs_sbfill(struct m_ext2fs *m_fs, int ronly)
{
        uint32_t u32;
        struct ext2fs *fs = &m_fs->e2fs;

        /*
         * General sanity checks
         */
        if (fs->e2fs_magic != E2FS_MAGIC)
                return EINVAL;
        if (fs->e2fs_rev > E2FS_REV1) {
                printf("ext2fs: unsupported revision number: %x\n", fs->e2fs_rev);
                return EINVAL;
        }
        if (fs->e2fs_log_bsize > 2) {
                /* block size = 1024|2048|4096 */
                printf("ext2fs: bad block size: %d\n", fs->e2fs_log_bsize);
                return EINVAL;
        }
        if (fs->e2fs_bpg == 0) {
                printf("ext2fs: zero blocks per group\n");
                return EINVAL;
        }
        if (fs->e2fs_ipg == 0) {
                printf("ext2fs: zero inodes per group\n");
                return EINVAL;
        }

        if (fs->e2fs_first_dblock >= fs->e2fs_bcount) {
                printf("ext2fs: invalid first data block\n");
                return EINVAL;
        }
        if (fs->e2fs_rbcount > fs->e2fs_bcount ||
            fs->e2fs_fbcount > fs->e2fs_bcount) {
                printf("ext2fs: invalid block count\n");
                return EINVAL;
        }

        /*
         * Compute the fields of the superblock
         */
        u32 = fs->e2fs_bcount - fs->e2fs_first_dblock; /* > 0 */
        m_fs->e2fs_ncg = howmany(u32, fs->e2fs_bpg);
        if (m_fs->e2fs_ncg == 0) {
                printf("ext2fs: invalid number of cylinder groups\n");
                return EINVAL;
        }

        m_fs->e2fs_fsbtodb = fs->e2fs_log_bsize + LOG_MINBSIZE - DEV_BSHIFT;
        m_fs->e2fs_bsize = MINBSIZE << fs->e2fs_log_bsize;
        m_fs->e2fs_bshift = LOG_MINBSIZE + fs->e2fs_log_bsize;
        m_fs->e2fs_qbmask = m_fs->e2fs_bsize - 1;
        m_fs->e2fs_bmask = ~m_fs->e2fs_qbmask;

        if ((u32 = m_fs->e2fs_bsize / sizeof(struct ext2_gd)) == 0) {
                /* Unlikely to happen */
                printf("ext2fs: invalid block size\n");
                return EINVAL;
        }
        m_fs->e2fs_ngdb = howmany(m_fs->e2fs_ncg, u32);
        if (m_fs->e2fs_ngdb == 0) {
                printf("ext2fs: invalid number of group descriptor blocks\n");
                return EINVAL;
        }

        if (m_fs->e2fs_bsize < EXT2_DINODE_SIZE(m_fs)) {
                printf("ext2fs: invalid inode size\n");
                return EINVAL;
        }
        m_fs->e2fs_ipb = m_fs->e2fs_bsize / EXT2_DINODE_SIZE(m_fs);

        m_fs->e2fs_itpg = fs->e2fs_ipg / m_fs->e2fs_ipb;

        /*
         * Revision-specific checks
         */
        if (fs->e2fs_rev > E2FS_REV0) {
                char buf[256];
                if (fs->e2fs_first_ino != EXT2_FIRSTINO) {
                        printf("ext2fs: unsupported first inode position\n");
                        return EINVAL;
                }
                u32 = fs->e2fs_features_incompat & ~EXT2F_INCOMPAT_SUPP;
                if (u32) {
                        snprintb(buf, sizeof(buf), EXT2F_INCOMPAT_BITS, u32);
                        printf("ext2fs: unsupported incompat features: %s\n", buf);
#ifndef EXT2_IGNORE_INCOMPAT_FEATURES
                        return EINVAL;
#endif
                }
                u32 = fs->e2fs_features_rocompat & ~EXT2F_ROCOMPAT_SUPP;
                if (!ronly && u32) {
                        snprintb(buf, sizeof(buf), EXT2F_ROCOMPAT_BITS, u32);
                        printf("ext2fs: unsupported ro-incompat features: %s\n",
                            buf);
#ifndef EXT2_IGNORE_ROCOMPAT_FEATURES
                        return EROFS;
#endif
                }
                if (fs->e2fs_inode_size == 0 || !powerof2(fs->e2fs_inode_size) || fs->e2fs_inode_size > m_fs->e2fs_bsize) {
                        printf("ext2fs: bad inode size\n");
                        return EINVAL;
                }
        }

        return 0;
}








































































































    3 

    3 






    3 


















































































































    3 












    1 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
/*-
 * Copyright (c) 2019 Mindaugas Rasiukevicius <rmind at noxt eu>
 * Copyright (c) 2013 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * NPF network interface handling.
 *
 * NPF uses its own interface IDs (npf-if-id).  These IDs start from 1.
 * Zero is reserved to indicate "no interface" case or an interface of
 * no interest (i.e. not registered).
 *
 * This module provides an interface to primarily handle the following:
 *
 * - Bind a symbolic interface name to NPF interface ID.
 * - Associate NPF interface ID when the network interface is attached.
 *
 * When NPF configuration is (re)loaded, each referenced network interface
 * name is registered with a unique ID.  If the network interface is already
 * attached, then the ID is associated with it immediately; otherwise, IDs
 * are associated/disassociated on interface events which are monitored
 * using pfil(9) hooks.
 *
 * To avoid race conditions when an active NPF configuration is updated or
 * interfaces are detached/attached, the interface names are never removed
 * and therefore IDs are never re-assigned.  The only point when interface
 * names and IDs are cleared is when the configuration is flushed.
 *
 * A linear counter is used for IDs.
 */

#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_if.c,v 1.13 2020/05/30 14:16:56 rmind Exp $");

#include <sys/param.h>
#include <sys/types.h>
#include <sys/kmem.h>
#include <net/if.h>
#endif

#include "npf_impl.h"

typedef struct npf_ifmap {
        char                ifname[IFNAMSIZ + 1];
} npf_ifmap_t;

#define        NPF_IFMAP_NOID                        (0U)
#define        NPF_IFMAP_SLOT2ID(npf, slot)        ((npf)->ifmap_off + (slot) + 1)
#define        NPF_IFMAP_ID2SLOT(npf, id)        \
    ((id) - atomic_load_relaxed(&(npf)->ifmap_off) - 1)

void
npf_ifmap_init(npf_t *npf, const npf_ifops_t *ifops)
{
        const size_t nbytes = sizeof(npf_ifmap_t) * NPF_MAX_IFMAP;

        KASSERT(ifops != NULL);
        ifops->flush(npf, (void *)(uintptr_t)0);

        mutex_init(&npf->ifmap_lock, MUTEX_DEFAULT, IPL_SOFTNET);
        npf->ifmap = kmem_zalloc(nbytes, KM_SLEEP);
        npf->ifmap_cnt = 0;
        npf->ifmap_off = 0;
        npf->ifops = ifops;
}

void
npf_ifmap_fini(npf_t *npf)
{
        const size_t nbytes = sizeof(npf_ifmap_t) * NPF_MAX_IFMAP;
        mutex_destroy(&npf->ifmap_lock);
        kmem_free(npf->ifmap, nbytes);
}

static unsigned
npf_ifmap_lookup(npf_t *npf, const char *ifname)
{
        KASSERT(mutex_owned(&npf->ifmap_lock));

        for (unsigned i = 0; i < npf->ifmap_cnt; i++) {
                npf_ifmap_t *ifmap = &npf->ifmap[i];

                if (strcmp(ifmap->ifname, ifname) == 0) {
                        return NPF_IFMAP_SLOT2ID(npf, i);
                }
        }
        return NPF_IFMAP_NOID;
}

/*
 * npf_ifmap_register: register an interface name; return an assigned
 * NPF network ID on success (non-zero).
 *
 * This routine is mostly called on NPF configuration (re)load for the
 * interfaces names referenced by the rules.
 */
unsigned
npf_ifmap_register(npf_t *npf, const char *ifname)
{
        npf_ifmap_t *ifmap;
        unsigned id, i;
        ifnet_t *ifp;

        mutex_enter(&npf->ifmap_lock);
        if ((id = npf_ifmap_lookup(npf, ifname)) != NPF_IFMAP_NOID) {
                goto out;
        }
        if (npf->ifmap_cnt == NPF_MAX_IFMAP) {
                printf("npf_ifmap_new: out of slots; bump NPF_MAX_IFMAP\n");
                id = NPF_IFMAP_NOID;
                goto out;
        }
        KASSERT(npf->ifmap_cnt < NPF_MAX_IFMAP);

        /* Allocate a new slot and convert and assign an ID. */
        i = npf->ifmap_cnt++;
        ifmap = &npf->ifmap[i];
        strlcpy(ifmap->ifname, ifname, IFNAMSIZ);
        id = NPF_IFMAP_SLOT2ID(npf, i);

        if ((ifp = npf->ifops->lookup(npf, ifname)) != NULL) {
                npf->ifops->setmeta(npf, ifp, (void *)(uintptr_t)id);
        }
out:
        mutex_exit(&npf->ifmap_lock);
        return id;
}

void
npf_ifmap_flush(npf_t *npf)
{
        mutex_enter(&npf->ifmap_lock);
        npf->ifops->flush(npf, (void *)(uintptr_t)NPF_IFMAP_NOID);
        for (unsigned i = 0; i < npf->ifmap_cnt; i++) {
                npf->ifmap[i].ifname[0] = '\0';
        }
        npf->ifmap_cnt = 0;

        /*
         * Reset the ID counter if reaching the overflow; this is not
         * realistic, but we maintain correctness.
         */
        if (npf->ifmap_off < (UINT_MAX - NPF_MAX_IFMAP)) {
                npf->ifmap_off += NPF_MAX_IFMAP;
        } else {
                npf->ifmap_off = 0;
        }
        mutex_exit(&npf->ifmap_lock);
}

/*
 * npf_ifmap_getid: get the ID for the given network interface.
 *
 * => This routine is typically called from the packet handler when
 *    matching whether the packet is on particular network interface.
 *
 * => This routine is lock-free; if the NPF configuration is flushed
 *    while the packet is in-flight, the ID will not match because we
 *    keep the IDs linear.
 */
unsigned
npf_ifmap_getid(npf_t *npf, const ifnet_t *ifp)
{
        const unsigned id = (uintptr_t)npf->ifops->getmeta(npf, ifp);
        return id;
}

/*
 * npf_ifmap_copylogname: this function is toxic; it can return garbage
 * as we don't lock, but it is only used temporarily and only for logging.
 */
void
npf_ifmap_copylogname(npf_t *npf, unsigned id, char *buf, size_t len)
{
        const unsigned i = NPF_IFMAP_ID2SLOT(npf, id);

        membar_consumer();

        if (id != NPF_IFMAP_NOID && i < NPF_MAX_IFMAP) {
                /*
                 * Lock-free access is safe as there is an extra byte
                 * with a permanent NUL terminator at the end.
                 */
                const npf_ifmap_t *ifmap = &npf->ifmap[i];
                strlcpy(buf, ifmap->ifname, MIN(len, IFNAMSIZ));
        } else {
                strlcpy(buf, "???", len);
        }
}

void
npf_ifmap_copyname(npf_t *npf, unsigned id, char *buf, size_t len)
{
        mutex_enter(&npf->ifmap_lock);
        npf_ifmap_copylogname(npf, id, buf, len);
        mutex_exit(&npf->ifmap_lock);
}

__dso_public void
npfk_ifmap_attach(npf_t *npf, ifnet_t *ifp)
{
        const npf_ifops_t *ifops = npf->ifops;
        unsigned id;

        mutex_enter(&npf->ifmap_lock);
        id = npf_ifmap_lookup(npf, ifops->getname(npf, ifp));
        ifops->setmeta(npf, ifp, (void *)(uintptr_t)id);
        mutex_exit(&npf->ifmap_lock);
}

__dso_public void
npfk_ifmap_detach(npf_t *npf, ifnet_t *ifp)
{
        /* Diagnostic. */
        mutex_enter(&npf->ifmap_lock);
        npf->ifops->setmeta(npf, ifp, (void *)(uintptr_t)NPF_IFMAP_NOID);
        mutex_exit(&npf->ifmap_lock);
}






























































































   47 





















   47 
   43 











   47 










   46 

   43 



























   43 













   42 













   42 










   42 


   42 










   42 
   42 
   42 






   41 




   42 























   42 












   57 








   43 

   40 


   57 










   48 







   57 
    4 
    2 







   53 






   53 
    6 

    1 






    6 


    6 




    6 




    6 



    1 








   53 















   53 




   47 

   47 




   47 




   53 


   53 




   53 
   46 
   53 



   51 

   54 

   46 
   54 
    2 
   54 
   54 
   54 















   10 

   27 















   27 



   26 
    1 






















    8 






    7 


    6 

















    2 




    5 

    7 











    3 






    3 




    3 

    3 











    3 





    3 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
/* $NetBSD: vfs_getcwd.c,v 1.61 2021/06/29 22:39:21 dholland Exp $ */

/*-
 * Copyright (c) 1999, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Bill Sommerfeld.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_getcwd.c,v 1.61 2021/06/29 22:39:21 dholland Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/uio.h>
#include <sys/kmem.h>
#include <sys/dirent.h>
#include <sys/kauth.h>

#include <ufs/ufs/dir.h>        /* XXX only for DIRBLKSIZ */

#include <sys/syscallargs.h>

/*
 * Vnode variable naming conventions in this file:
 *
 * rvp: the current root we're aiming towards.
 * lvp, *lvpp: the "lower" vnode
 * uvp, *uvpp: the "upper" vnode.
 *
 * Since all the vnodes we're dealing with are directories, and the
 * lookups are going *up* in the filesystem rather than *down*, the
 * usual "pvp" (parent) or "dvp" (directory) naming conventions are
 * too confusing.
 */

/*
 * XXX Will infinite loop in certain cases if a directory read reliably
 *        returns EINVAL on last block.
 * XXX is EINVAL the right thing to return if a directory is malformed?
 */

/*
 * XXX Untested vs. mount -o union; probably does the wrong thing.
 */

/*
 * Find parent vnode of *lvpp, return in *uvpp
 *
 * If we care about the name, scan it looking for name of directory
 * entry pointing at lvp.
 *
 * Place the name in the buffer which starts at bufp, immediately
 * before *bpp, and move bpp backwards to point at the start of it.
 *
 * On entry, *lvpp is a locked vnode reference; on exit, it is vput and NULL'ed
 * On exit, *uvpp is either NULL or is a locked vnode reference.
 */
static int
getcwd_scandir(struct vnode *lvp, struct vnode **uvpp, char **bpp,
    char *bufp, struct lwp *l)
{
        int     error = 0;
        int     eofflag;
        off_t   off;
        int     tries;
        struct uio uio;
        struct iovec iov;
        char   *dirbuf = NULL;
        int        dirbuflen;
        ino_t   fileno;
        struct vattr va;
        struct vnode *uvp = NULL;
        kauth_cred_t cred = l->l_cred;
        struct componentname cn;
        int len, reclen;
        tries = 0;

        /* Need exclusive for UFS VOP_GETATTR (itimes) & VOP_LOOKUP. */
        KASSERT(VOP_ISLOCKED(lvp) == LK_EXCLUSIVE);

        /*
         * If we want the filename, get some info we need while the
         * current directory is still locked.
         */
        if (bufp != NULL) {
                error = VOP_GETATTR(lvp, &va, cred);
                if (error) {
                        VOP_UNLOCK(lvp);
                        *uvpp = NULL;
                        return error;
                }
        }

        /*
         * Ok, we have to do it the hard way..
         * Next, get parent vnode using lookup of ..
         */
        cn.cn_nameiop = LOOKUP;
        cn.cn_flags = ISLASTCN | ISDOTDOT | RDONLY;
        cn.cn_cred = cred;
        cn.cn_nameptr = "..";
        cn.cn_namelen = 2;

        /* At this point, lvp is locked  */
        error = VOP_LOOKUP(lvp, uvpp, &cn);
        VOP_UNLOCK(lvp);
        if (error) {
                *uvpp = NULL;
                return error;
        }
        uvp = *uvpp;
        /* If we don't care about the pathname, we're done */
        if (bufp == NULL) {
                return 0;
        }

        fileno = va.va_fileid;

        /* I guess UFS_DIRBLKSIZ is a good guess at a good size to use? */
        dirbuflen = UFS_DIRBLKSIZ;
        if (dirbuflen < va.va_blocksize)
                dirbuflen = va.va_blocksize;
        dirbuf = kmem_alloc(dirbuflen, KM_SLEEP);

        /* Now lvp is unlocked, try to lock uvp */
        error = vn_lock(uvp, LK_SHARED);
        if (error) {
                vrele(uvp);
                *uvpp = NULL;
                return error;
        }

#if 0
unionread:
#endif
        off = 0;
        do {
                /* call VOP_READDIR of parent */
                iov.iov_base = dirbuf;
                iov.iov_len = dirbuflen;

                uio.uio_iov = &iov;
                uio.uio_iovcnt = 1;
                uio.uio_offset = off;
                uio.uio_resid = dirbuflen;
                uio.uio_rw = UIO_READ;
                UIO_SETUP_SYSSPACE(&uio);

                eofflag = 0;

                error = VOP_READDIR(uvp, &uio, cred, &eofflag, 0, 0);

                off = uio.uio_offset;

                /*
                 * Try again if NFS tosses its cookies.
                 * XXX this can still loop forever if the directory is busted
                 * such that the second or subsequent page of it always
                 * returns EINVAL
                 */
                if ((error == EINVAL) && (tries < 3)) {
                        off = 0;
                        tries++;
                        continue;        /* once more, with feeling */
                }

                if (!error) {
                        char   *cpos;
                        struct dirent *dp;

                        cpos = dirbuf;
                        tries = 0;

                        /* scan directory page looking for matching vnode */
                        for (len = (dirbuflen - uio.uio_resid); len > 0;
                            len -= reclen) {
                                dp = (struct dirent *) cpos;
                                reclen = dp->d_reclen;

                                /* check for malformed directory.. */
                                if (reclen < _DIRENT_MINSIZE(dp) ||
                                    reclen > len) {
                                        error = EINVAL;
                                        goto out;
                                }
                                /*
                                 * XXX should perhaps do VOP_LOOKUP to
                                 * check that we got back to the right place,
                                 * but getting the locking games for that
                                 * right would be heinous.
                                 */
                                if ((dp->d_type != DT_WHT) &&
                                    (dp->d_fileno == fileno)) {
                                        char *bp = *bpp;

                                        bp -= dp->d_namlen;
                                        if (bp <= bufp) {
                                                error = ERANGE;
                                                goto out;
                                        }
                                        memcpy(bp, dp->d_name, dp->d_namlen);
                                        error = 0;
                                        *bpp = bp;
                                        goto out;
                                }
                                cpos += reclen;
                        }
                } else
                        goto out;
        } while (!eofflag);
#if 0
        /*
         * Deal with mount -o union, which unions only the
         * root directory of the mount.
         */
        if ((uvp->v_vflag & VV_ROOT) &&
            (uvp->v_mount->mnt_flag & MNT_UNION)) {
                struct vnode *tvp = uvp;

                uvp = uvp->v_mount->mnt_vnodecovered;
                vput(tvp);
                vref(uvp);
                *uvpp = uvp;
                vn_lock(uvp, LK_SHARED | LK_RETRY);
                goto unionread;
        }
#endif
        error = ENOENT;

out:
        VOP_UNLOCK(uvp);
        kmem_free(dirbuf, dirbuflen);
        return error;
}

/*
 * common routine shared by sys___getcwd() and vn_isunder()
 */
int
getcwd_common(struct vnode *lvp, struct vnode *rvp, char **bpp, char *bufp,
    int limit, int flags, struct lwp *l)
{
        struct cwdinfo *cwdi = l->l_proc->p_cwdi;
        kauth_cred_t cred = l->l_cred;
        struct vnode *uvp = NULL;
        char *bp = NULL;
        int error;
        accmode_t accmode = VEXEC;

        error = 0;
        if (rvp == NULL) {
                rvp = cwdi->cwdi_rdir;
                if (rvp == NULL)
                        rvp = rootvnode;
        }

        vref(rvp);
        vref(lvp);

        /*
         * Error handling invariant:
         * Before a `goto out':
         *        lvp is either NULL, or held.
         *        uvp is either NULL, or held.
         */

        if (bufp)
                bp = *bpp;

        /*
         * this loop will terminate when one of the following happens:
         *        - we hit the root
         *        - getdirentries or lookup fails
         *        - we run out of space in the buffer.
         */
        if (lvp == rvp) {
                if (bp)
                        *(--bp) = '/';
                goto out;
        }
        do {
                /*
                 * access check here is optional, depending on
                 * whether or not caller cares.
                 */
                int chkaccess = (flags & GETCWD_CHECK_ACCESS);
                bool locked = false;

                /*
                 * step up if we're a covered vnode..
                 * check access on the first vnode only.
                 */
                if (lvp->v_vflag & VV_ROOT) {
                        vn_lock(lvp, LK_SHARED | LK_RETRY);
                        if (chkaccess) {
                                error = VOP_ACCESS(lvp, accmode, cred);
                                if (error) {
                                        VOP_UNLOCK(lvp);
                                        goto out;
                                }
                                chkaccess = 0;
                        }
                        while (lvp->v_vflag & VV_ROOT) {
                                struct vnode *tvp;

                                if (lvp == rvp) {
                                        VOP_UNLOCK(lvp);
                                        goto out;
                                }

                                tvp = lvp->v_mount->mnt_vnodecovered;
                                /*
                                 * hodie natus est radici frater
                                 */
                                if (tvp == NULL) {
                                        VOP_UNLOCK(lvp);
                                        error = ENOENT;
                                        goto out;
                                }
                                vref(tvp);
                                vput(lvp);
                                lvp = tvp;
                                if (lvp->v_vflag & VV_ROOT)
                                        vn_lock(lvp, LK_SHARED | LK_RETRY);
                        }
                }

                /* Do we need to check access to the directory? */
                if (chkaccess && !cache_have_id(lvp)) {
                        /* Need exclusive for UFS VOP_GETATTR (itimes) & VOP_LOOKUP. */
                        vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
                        error = VOP_ACCESS(lvp, accmode, cred);
                        if (error) {
                                VOP_UNLOCK(lvp);
                                goto out;
                        }
                        chkaccess = 0;
                        locked = true;
                }

                /*
                 * Look in the name cache; if that fails, look in the
                 * directory..
                 */
                error = cache_revlookup(lvp, &uvp, &bp, bufp, chkaccess,
                    accmode);
                if (error == -1) {
                        if (!locked) {
                                locked = true;
                                vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
                        }
                        if (lvp->v_type != VDIR) {
                                VOP_UNLOCK(lvp);
                                error = ENOTDIR;
                                goto out;
                        }
                        error = getcwd_scandir(lvp, &uvp, &bp, bufp, l);
                        /* lvp now unlocked */
                } else if (locked) {
                        VOP_UNLOCK(lvp);
                }
                if (error)
                        goto out;
#if DIAGNOSTIC
                if (bufp && (bp <= bufp)) {
                        panic("getcwd: oops, went back too far");
                }
#endif
                accmode = VEXEC | VREAD;
                if (bp)
                        *(--bp) = '/';
                vrele(lvp);
                lvp = uvp;
                uvp = NULL;
                limit--;
        } while ((lvp != rvp) && (limit > 0));

out:
        if (bpp)
                *bpp = bp;
        if (uvp)
                vrele(uvp);
        if (lvp)
                vrele(lvp);
        vrele(rvp);
        return error;
}

/*
 * Check if one directory can be found inside another in the directory
 * hierarchy.
 *
 * Intended to be used in chroot, chdir, fchdir, etc., to ensure that
 * chroot() actually means something.
 */
int
vn_isunder(struct vnode *lvp, struct vnode *rvp, struct lwp *l)
{
        int error;

        error = getcwd_common(lvp, rvp, NULL, NULL, MAXPATHLEN / 2, 0, l);

        if (!error)
                return 1;
        else
                return 0;
}

/*
 * Returns true if proc p1's root directory equal to or under p2's
 * root directory.
 *
 * Intended to be used from ptrace/procfs sorts of things.
 */

int
proc_isunder(struct proc *p1, struct lwp *l2)
{
        struct vnode *r1 = p1->p_cwdi->cwdi_rdir;
        struct vnode *r2 = l2->l_proc->p_cwdi->cwdi_rdir;

        if (r1 == NULL)
                return (r2 == NULL);
        else if (r2 == NULL)
                return 1;
        else
                return vn_isunder(r1, r2, l2);
}

/*
 * Find pathname of process's current directory.
 *
 * Use vfs vnode-to-name reverse cache; if that fails, fall back
 * to reading directory contents.
 */

int
sys___getcwd(struct lwp *l, const struct sys___getcwd_args *uap, register_t *retval)
{
        /* {
                syscallarg(char *) bufp;
                syscallarg(size_t) length;
        } */

        int     error;
        char   *path;
        char   *bp, *bend;
        int     len = SCARG(uap, length);
        int        lenused;
        struct        cwdinfo *cwdi;

        if (len > MAXPATHLEN * 4)
                len = MAXPATHLEN * 4;
        else if (len < 2)
                return ERANGE;

        path = kmem_alloc(len, KM_SLEEP);
        bp = &path[len];
        bend = bp;
        *(--bp) = '\0';

        /*
         * 5th argument here is "max number of vnodes to traverse".
         * Since each entry takes up at least 2 bytes in the output buffer,
         * limit it to N/2 vnodes for an N byte buffer.
         */
        cwdi = l->l_proc->p_cwdi;
        rw_enter(&cwdi->cwdi_lock, RW_READER);
        error = getcwd_common(cwdi->cwdi_cdir, NULL, &bp, path, 
            len/2, GETCWD_CHECK_ACCESS, l);
        rw_exit(&cwdi->cwdi_lock);

        if (error)
                goto out;
        lenused = bend - bp;
        *retval = lenused;
        /* put the result into user buffer */
        error = copyout(bp, SCARG(uap, bufp), lenused);

out:
        kmem_free(path, len);
        return error;
}

/*
 * Try to find a pathname for a vnode.  Since there is no mapping vnode ->
 * parent directory, this needs the namecache to succeed.  Caller holds a
 * reference to the vnode.
 */
int
vnode_to_path(char *path, size_t len, struct vnode *vp, struct lwp *curl,
    struct proc *p)
{
        struct proc *curp = curl->l_proc;
        int error, lenused, elen;
        char *bp, *bend;
        struct vnode *dvp;

        KASSERT(vrefcnt(vp) > 0);

        bp = bend = &path[len];
        *(--bp) = '\0';

        error = cache_revlookup(vp, &dvp, &bp, path, false, 0);
        if (error != 0)
                return (error == -1 ? ENOENT : error);

        *(--bp) = '/';
        error = getcwd_common(dvp, NULL, &bp, path, len / 2,
            GETCWD_CHECK_ACCESS, curl);
        vrele(dvp);
        if (error != 0)
                return error;

        /*
         * Strip off emulation path for emulated processes looking at
         * the maps file of a process of the same emulation. (Won't
         * work if /emul/xxx is a symlink..)
         */
        if (curp->p_emul == p->p_emul && curp->p_emul->e_path != NULL) {
                elen = strlen(curp->p_emul->e_path);
                if (!strncmp(bp, curp->p_emul->e_path, elen))
                        bp = &bp[elen];
        }

        lenused = bend - bp;

        memcpy(path, bp, lenused);
        path[lenused] = '\0';

        return 0;
}























































































































































    2 


































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
/*        $NetBSD: drm_module.c,v 1.31 2022/07/19 22:24:47 riastradh Exp $        */

/*-
 * Copyright (c) 2013 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: drm_module.c,v 1.31 2022/07/19 22:24:47 riastradh Exp $");

#include <sys/types.h>
#include <sys/condvar.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/once.h>
#include <sys/reboot.h>
#include <sys/systm.h>

#include <linux/mutex.h>

#include <drm/drm_agpsupport.h>
#include <drm/drm_bridge.h>
#include <drm/drm_device.h>
#include <drm/drm_encoder_slave.h>
#include <drm/drm_panel.h>
#include <drm/drm_print.h>
#include <drm/drm_sysctl.h>

#include "../dist/drm/drm_crtc_internal.h"
#include "../dist/drm/drm_internal.h"

/*
 * XXX This is stupid.
 *
 * 1. Builtin modules are broken: they don't get initialized before
 *    autoconf matches devices, but we need the initialization to be
 *    run in order to match and attach drmkms drivers.
 *
 * 2. The following dependencies are _not_ correct:
 *    - drmkms can't depend on agp because not all drmkms drivers run
 *      on platforms guaranteed to have pci, let alone agp
 *    - drmkms_pci can't depend on agp because not all _pci_ has agp
 *      (e.g., tegra)
 *    - radeon (e.g.) can't depend on agp because not all radeon
 *      devices are on platforms guaranteed to have agp
 *
 * 3. We need to register the agp hooks before we try to attach a
 *    device.
 *
 * 4. The only mechanism we have to force this is the
 *    mumblefrotz_guarantee_initialized kludge.
 *
 * 5. We don't know if we even _can_ call
 *    drmkms_agp_guarantee_initialized unless we know NAGP.
 *
 * 6. We don't know NAGP unless we include "agp.h".
 *
 * 7. We can't include "agp.h" if the platform has agp.
 *
 * 8. The way we determine whether we have agp is NAGP.
 *
 * 9. @!*#&^@&*@!&^#@
 */
#if defined(__powerpc__) || defined(__i386__) || defined(__x86_64__)
#include "agp.h"
#endif

/*
 * XXX I2C stuff should be moved to a separate drmkms_i2c module.
 */
MODULE(MODULE_CLASS_DRIVER, drmkms, "drmkms_linux,sysmon_power");

struct mutex        drm_global_mutex;

struct drm_sysctl_def drm_def = DRM_SYSCTL_INIT();

static int
drm_init(void)
{
        extern int linux_guarantee_initialized(void);
        int error;

        error = linux_guarantee_initialized();
        if (error)
                return error;

        extern bool drm_core_init_complete;
        drm_core_init_complete = true;

        drm_agp_hooks_init();
#if NAGP > 0
        extern int drmkms_agp_guarantee_initialized(void);
        error = drmkms_agp_guarantee_initialized();
        if (error) {
                drm_agp_hooks_fini();
                return error;
        }
#endif

        if (ISSET(boothowto, AB_DEBUG))
                __drm_debug = DRM_UT_DRIVER;

        spin_lock_init(&drm_minor_lock);
        idr_init(&drm_minors_idr);
        _init_srcu_struct(&drm_unplug_srcu, "drmunplg");
        linux_mutex_init(&drm_global_mutex);
        linux_mutex_init(&drm_kernel_fb_helper_lock);
        drm_connector_ida_init();
        drm_panel_init_lock();
        drm_bridge_init_lock();
        drm_sysctl_init(&drm_def);
        drm_i2c_encoders_init();

        return 0;
}

int
drm_guarantee_initialized(void)
{
#ifdef _MODULE
        return 0;
#else
        static ONCE_DECL(drm_init_once);

        return RUN_ONCE(&drm_init_once, &drm_init);
#endif
}

static void
drm_fini(void)
{

        drm_i2c_encoders_fini();
        drm_sysctl_fini(&drm_def);
        drm_bridge_fini_lock();
        drm_panel_fini_lock();
        drm_connector_ida_destroy();
        linux_mutex_destroy(&drm_kernel_fb_helper_lock);
        linux_mutex_destroy(&drm_global_mutex);
        cleanup_srcu_struct(&drm_unplug_srcu);
        idr_destroy(&drm_minors_idr);
        spin_lock_destroy(&drm_minor_lock);
        drm_agp_hooks_fini();
}

int
drm_irq_by_busid(struct drm_device *dev, void *data, struct drm_file *file)
{

        return -ENODEV;
}

static int
drmkms_modcmd(modcmd_t cmd, void *arg __unused)
{
#ifdef _MODULE
        devmajor_t bmajor = NODEVMAJOR, cmajor = NODEVMAJOR;
#endif
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                error = drm_init();
#else
                error = drm_guarantee_initialized();
#endif
                if (error)
                        return error;
#ifdef _MODULE
                error = devsw_attach("drm", NULL, &bmajor,
                    &drm_cdevsw, &cmajor);
                if (error) {
                        aprint_error("drmkms: unable to attach devsw: %d\n",
                            error);
                        return error;
                }
#endif
                return 0;

        case MODULE_CMD_FINI:
#ifdef _MODULE
                devsw_detach(NULL, &drm_cdevsw);
#endif
                drm_fini();
                return 0;

        default:
                return ENOTTY;
        }
}





































































































































































































   71 




   71 










   71 




   71 














   87 
   87 


   87 





























































































  500 
  500 
  500 















  185 
  184 
  184 



























 2052 


 2055 
 2054 
 2052 

 2053 

































  337 








































  283 
  283 


























  434 
  436 

  434 









 1669 


 1670 





 1670 


 1670 































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
/*        $NetBSD: subr_kcpuset.c,v 1.14 2022/04/09 23:38:33 riastradh Exp $        */

/*-
 * Copyright (c) 2011 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Kernel CPU set implementation.
 *
 * Interface can be used by kernel subsystems as a unified dynamic CPU
 * bitset implementation handling many CPUs.  Facility also supports early
 * use by MD code on boot, as it fixups bitsets on further boot.
 *
 * TODO:
 * - Handle "reverse" bitset on fixup/grow.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_kcpuset.c,v 1.14 2022/04/09 23:38:33 riastradh Exp $");

#include <sys/param.h>
#include <sys/types.h>

#include <sys/atomic.h>
#include <sys/sched.h>
#include <sys/kcpuset.h>
#include <sys/pool.h>

/* Number of CPUs to support. */
#define        KC_MAXCPUS                roundup2(MAXCPUS, 32)

/*
 * Structure of dynamic CPU set in the kernel.
 */
struct kcpuset {
        uint32_t                bits[0];
};

typedef struct kcpuset_impl {
        /* Reference count. */
        u_int                        kc_refcnt;
        /* Next to free, if non-NULL (used when multiple references). */ 
        struct kcpuset *        kc_next;
        /* Actual variable-sized field of bits. */
        struct kcpuset                kc_field;
} kcpuset_impl_t;

#define        KC_BITS_OFF                (offsetof(struct kcpuset_impl, kc_field))
#define        KC_GETSTRUCT(b)                ((kcpuset_impl_t *)((char *)(b) - KC_BITS_OFF))
#define        KC_GETCSTRUCT(b)        ((const kcpuset_impl_t *)((const char *)(b) - KC_BITS_OFF))

/* Sizes of a single bitset. */
#define        KC_SHIFT                5
#define        KC_MASK                        31

/* An array of noted early kcpuset creations and data. */
#define        KC_SAVE_NITEMS                8

/* Structures for early boot mechanism (must be statically initialised). */
static kcpuset_t **                kc_noted_early[KC_SAVE_NITEMS];
static uint32_t                        kc_bits_early[KC_SAVE_NITEMS];
static int                        kc_last_idx = 0;
static bool                        kc_initialised = false;

#define        KC_BITSIZE_EARLY        sizeof(kc_bits_early[0])
#define        KC_NFIELDS_EARLY        1

/*
 * The size of whole bitset fields and amount of fields.
 * The whole size must statically initialise for early case.
 */
static size_t                        kc_bitsize __read_mostly = KC_BITSIZE_EARLY;
static size_t                        kc_nfields __read_mostly = KC_NFIELDS_EARLY;

static pool_cache_t                kc_cache __read_mostly;

static kcpuset_t *                kcpuset_create_raw(bool);

/*
 * kcpuset_sysinit: initialize the subsystem, transfer early boot cases
 * to dynamically allocated sets.
 */
void
kcpuset_sysinit(void)
{
        kcpuset_t *kc_dynamic[KC_SAVE_NITEMS], *kcp;
        int i, s;

        /* Set a kcpuset_t sizes. */
        kc_nfields = (KC_MAXCPUS >> KC_SHIFT);
        kc_bitsize = sizeof(uint32_t) * kc_nfields;
        KASSERT(kc_nfields != 0 && kc_bitsize != 0);

        kc_cache = pool_cache_init(sizeof(kcpuset_impl_t) + kc_bitsize,
            coherency_unit, 0, 0, "kcpuset", NULL, IPL_NONE, NULL, NULL, NULL);

        /* First, pre-allocate kcpuset entries. */
        for (i = 0; i < kc_last_idx; i++) {
                kcp = kcpuset_create_raw(true);
                kc_dynamic[i] = kcp;
        }

        /*
         * Prepare to convert all early noted kcpuset uses to dynamic sets.
         * All processors, except the one we are currently running (primary),
         * must not be spinned yet.  Since MD facilities can use kcpuset,
         * raise the IPL to high.
         */
        KASSERT(mp_online == false);

        s = splhigh();
        for (i = 0; i < kc_last_idx; i++) {
                /*
                 * Transfer the bits from early static storage to the kcpuset.
                 */
                KASSERT(kc_bitsize >= KC_BITSIZE_EARLY);
                memcpy(kc_dynamic[i], &kc_bits_early[i], KC_BITSIZE_EARLY);

                /*
                 * Store the new pointer, pointing to the allocated kcpuset.
                 * Note: we are not in an interrupt context and it is the only
                 * CPU running - thus store is safe (e.g. no need for pointer
                 * variable to be volatile).
                 */
                *kc_noted_early[i] = kc_dynamic[i];
        }
        kc_initialised = true;
        kc_last_idx = 0;
        splx(s);
}

/*
 * kcpuset_early_ptr: note an early boot use by saving the pointer and
 * returning a pointer to a static, temporary bit field.
 */
static kcpuset_t *
kcpuset_early_ptr(kcpuset_t **kcptr)
{
        kcpuset_t *kcp;
        int s;

        s = splhigh();
        if (kc_last_idx < KC_SAVE_NITEMS) {
                /*
                 * Save the pointer, return pointer to static early field.
                 * Need to zero it out.
                 */
                kc_noted_early[kc_last_idx] = kcptr;
                kcp = (kcpuset_t *)&kc_bits_early[kc_last_idx];
                kc_last_idx++;
                memset(kcp, 0, KC_BITSIZE_EARLY);
                KASSERT(kc_bitsize == KC_BITSIZE_EARLY);
        } else {
                panic("kcpuset(9): all early-use entries exhausted; "
                    "increase KC_SAVE_NITEMS\n");
        }
        splx(s);

        return kcp;
}

/*
 * Routines to create or destroy the CPU set.
 * Early boot case is handled.
 */

static kcpuset_t *
kcpuset_create_raw(bool zero)
{
        kcpuset_impl_t *kc;

        kc = pool_cache_get(kc_cache, PR_WAITOK);
        kc->kc_refcnt = 1;
        kc->kc_next = NULL;

        if (zero) {
                memset(&kc->kc_field, 0, kc_bitsize);
        }

        /* Note: return pointer to the actual field of bits. */
        KASSERT((uint8_t *)kc + KC_BITS_OFF == (uint8_t *)&kc->kc_field);
        return &kc->kc_field;
}

void
kcpuset_create(kcpuset_t **retkcp, bool zero)
{
        if (__predict_false(!kc_initialised)) {
                /* Early boot use - special case. */
                *retkcp = kcpuset_early_ptr(retkcp);
                return;
        }
        *retkcp = kcpuset_create_raw(zero);
}

void
kcpuset_clone(kcpuset_t **retkcp, const kcpuset_t *kcp)
{
        kcpuset_create(retkcp, false);
        memcpy(*retkcp, kcp, kc_bitsize);
}

void
kcpuset_destroy(kcpuset_t *kcp)
{
        kcpuset_impl_t *kc;

        KASSERT(kc_initialised);
        KASSERT(kcp != NULL);

        do {
                kc = KC_GETSTRUCT(kcp);
                kcp = kc->kc_next;
                pool_cache_put(kc_cache, kc);
        } while (kcp);
}

/*
 * Routines to reference/unreference the CPU set.
 * Note: early boot case is not supported by these routines.
 */

void
kcpuset_use(kcpuset_t *kcp)
{
        kcpuset_impl_t *kc = KC_GETSTRUCT(kcp);

        KASSERT(kc_initialised);
        atomic_inc_uint(&kc->kc_refcnt);
}

void
kcpuset_unuse(kcpuset_t *kcp, kcpuset_t **lst)
{
        kcpuset_impl_t *kc = KC_GETSTRUCT(kcp);

        KASSERT(kc_initialised);
        KASSERT(kc->kc_refcnt > 0);

        membar_release();
        if (atomic_dec_uint_nv(&kc->kc_refcnt) != 0) {
                return;
        }
        membar_acquire();
        KASSERT(kc->kc_next == NULL);
        if (lst == NULL) {
                kcpuset_destroy(kcp);
                return;
        }
        kc->kc_next = *lst;
        *lst = kcp;
}

/*
 * Routines to transfer the CPU set from / to userspace.
 * Note: early boot case is not supported by these routines.
 */

int
kcpuset_copyin(const cpuset_t *ucp, kcpuset_t *kcp, size_t len)
{
        kcpuset_impl_t *kc __diagused = KC_GETSTRUCT(kcp);

        KASSERT(kc_initialised);
        KASSERT(kc->kc_refcnt > 0);
        KASSERT(kc->kc_next == NULL);

        if (len > kc_bitsize) { /* XXX */
                return EINVAL;
        }
        return copyin(ucp, kcp, len);
}

int
kcpuset_copyout(kcpuset_t *kcp, cpuset_t *ucp, size_t len)
{
        kcpuset_impl_t *kc __diagused = KC_GETSTRUCT(kcp);

        KASSERT(kc_initialised);
        KASSERT(kc->kc_refcnt > 0);
        KASSERT(kc->kc_next == NULL);

        if (len > kc_bitsize) { /* XXX */
                return EINVAL;
        }
        return copyout(kcp, ucp, len);
}

void
kcpuset_export_u32(const kcpuset_t *kcp, uint32_t *bitfield, size_t len)
{
        size_t rlen = MIN(kc_bitsize, len);

        KASSERT(kcp != NULL);
        memcpy(bitfield, kcp->bits, rlen);
}

/*
 * Routines to change bit field - zero, fill, copy, set, unset, etc.
 */

void
kcpuset_zero(kcpuset_t *kcp)
{

        KASSERT(!kc_initialised || KC_GETSTRUCT(kcp)->kc_refcnt > 0);
        KASSERT(!kc_initialised || KC_GETSTRUCT(kcp)->kc_next == NULL);
        memset(kcp, 0, kc_bitsize);
}

void
kcpuset_fill(kcpuset_t *kcp)
{

        KASSERT(!kc_initialised || KC_GETSTRUCT(kcp)->kc_refcnt > 0);
        KASSERT(!kc_initialised || KC_GETSTRUCT(kcp)->kc_next == NULL);
        memset(kcp, ~0, kc_bitsize);
}

void
kcpuset_copy(kcpuset_t *dkcp, const kcpuset_t *skcp)
{

        KASSERT(!kc_initialised || KC_GETSTRUCT(dkcp)->kc_refcnt > 0);
        KASSERT(!kc_initialised || KC_GETSTRUCT(dkcp)->kc_next == NULL);
        memcpy(dkcp, skcp, kc_bitsize);
}

void
kcpuset_set(kcpuset_t *kcp, cpuid_t i)
{
        const size_t j = i >> KC_SHIFT;

        KASSERT(!kc_initialised || KC_GETSTRUCT(kcp)->kc_next == NULL);
        KASSERT(j < kc_nfields);

        kcp->bits[j] |= __BIT(i & KC_MASK);
}

void
kcpuset_clear(kcpuset_t *kcp, cpuid_t i)
{
        const size_t j = i >> KC_SHIFT;

        KASSERT(!kc_initialised || KC_GETCSTRUCT(kcp)->kc_next == NULL);
        KASSERT(j < kc_nfields);

        kcp->bits[j] &= ~(__BIT(i & KC_MASK));
}

bool
kcpuset_isset(const kcpuset_t *kcp, cpuid_t i)
{
        const size_t j = i >> KC_SHIFT;

        KASSERT(kcp != NULL);
        KASSERT(!kc_initialised || KC_GETCSTRUCT(kcp)->kc_refcnt > 0);
        KASSERT(!kc_initialised || KC_GETCSTRUCT(kcp)->kc_next == NULL);
        KASSERT(j < kc_nfields);

        return ((__BIT(i & KC_MASK)) & kcp->bits[j]) != 0;
}

bool
kcpuset_isotherset(const kcpuset_t *kcp, cpuid_t i)
{
        const size_t j2 = i >> KC_SHIFT;
        const uint32_t mask = ~(__BIT(i & KC_MASK));

        for (size_t j = 0; j < kc_nfields; j++) {
                const uint32_t bits = kcp->bits[j];
                if (bits && (j != j2 || (bits & mask) != 0)) {
                        return true;
                }
        }
        return false;
}

bool
kcpuset_iszero(const kcpuset_t *kcp)
{

        for (size_t j = 0; j < kc_nfields; j++) {
                if (kcp->bits[j] != 0) {
                        return false;
                }
        }
        return true;
}

bool
kcpuset_match(const kcpuset_t *kcp1, const kcpuset_t *kcp2)
{

        return memcmp(kcp1, kcp2, kc_bitsize) == 0;
}

bool
kcpuset_intersecting_p(const kcpuset_t *kcp1, const kcpuset_t *kcp2)
{

        for (size_t j = 0; j < kc_nfields; j++) {
                if (kcp1->bits[j] & kcp2->bits[j])
                        return true;
        }
        return false;
}

cpuid_t
kcpuset_ffs(const kcpuset_t *kcp)
{

        for (size_t j = 0; j < kc_nfields; j++) {
                if (kcp->bits[j])
                        return 32 * j + ffs(kcp->bits[j]);
        }
        return 0;
}

cpuid_t
kcpuset_ffs_intersecting(const kcpuset_t *kcp1, const kcpuset_t *kcp2)
{

        for (size_t j = 0; j < kc_nfields; j++) {
                uint32_t bits = kcp1->bits[j] & kcp2->bits[j];
                if (bits)
                        return 32 * j + ffs(bits);
        }
        return 0;
}

void
kcpuset_merge(kcpuset_t *kcp1, const kcpuset_t *kcp2)
{

        for (size_t j = 0; j < kc_nfields; j++) {
                kcp1->bits[j] |= kcp2->bits[j];
        }
}

void
kcpuset_intersect(kcpuset_t *kcp1, const kcpuset_t *kcp2)
{

        for (size_t j = 0; j < kc_nfields; j++) {
                kcp1->bits[j] &= kcp2->bits[j];
        }
}

void
kcpuset_remove(kcpuset_t *kcp1, const kcpuset_t *kcp2)
{

        for (size_t j = 0; j < kc_nfields; j++) {
                kcp1->bits[j] &= ~kcp2->bits[j];
        }
}

int
kcpuset_countset(const kcpuset_t *kcp)
{
        int count = 0;

        for (size_t j = 0; j < kc_nfields; j++) {
                count += popcount32(kcp->bits[j]);
        }
        return count;
}

/*
 * Routines to set/clear the flags atomically.
 */

void
kcpuset_atomic_set(kcpuset_t *kcp, cpuid_t i)
{
        const size_t j = i >> KC_SHIFT;

        KASSERT(j < kc_nfields);
        atomic_or_32(&kcp->bits[j], __BIT(i & KC_MASK));
}

void
kcpuset_atomic_clear(kcpuset_t *kcp, cpuid_t i)
{
        const size_t j = i >> KC_SHIFT;

        KASSERT(j < kc_nfields);
        atomic_and_32(&kcp->bits[j], ~(__BIT(i & KC_MASK)));
}

void
kcpuset_atomicly_intersect(kcpuset_t *kcp1, const kcpuset_t *kcp2)
{

        for (size_t j = 0; j < kc_nfields; j++) {
                if (kcp2->bits[j])
                        atomic_and_32(&kcp1->bits[j], kcp2->bits[j]);
        }
}

void
kcpuset_atomicly_merge(kcpuset_t *kcp1, const kcpuset_t *kcp2)
{

        for (size_t j = 0; j < kc_nfields; j++) {
                if (kcp2->bits[j])
                        atomic_or_32(&kcp1->bits[j], kcp2->bits[j]);
        }
}

void
kcpuset_atomicly_remove(kcpuset_t *kcp1, const kcpuset_t *kcp2)
{

        for (size_t j = 0; j < kc_nfields; j++) {
                if (kcp2->bits[j])
                        atomic_and_32(&kcp1->bits[j], ~kcp2->bits[j]);
        }
}









































































  145 
  282 







  278 




















  140 






















   10 












   10 


    9 
















   10 










    7 
    6 
    7 
    7 


    7 
    6 
    6 




    6 







    2 
    2 







    6 



    2 
    6 
    2 

    6 

    6 


    2 

    1 









    6 


    2 










    6 

    6 
    2 
    6 
    2 


    8 


























  163 




















  158 










  149 

  162 






  162 










  161 






  148 



  147 



   13 







   13 



   13 
   13 

   13 





   13 







   13 
  125 

  125 


  121 



  121 




  104 









   34 


   33 





  104 




  135 


  133 









    6 
    4 

  127 
  133 

    5 
    4 

    4 

    4 
    2 

    1 




  124 

    3 

  114 

















    8 












   11 


   10 

   10 

    8 
    4 




















    8 
    5 




    4 


    1 









    7 

    7 
    5 

    7 
    9 













   16 












   16 




   16 
















   13 
   13 















   22 











   20 





   21 















   12 







   11 






   12 
















   28 






   27 


   24 





   10 









    2 











    3 











    9 




































   63 














   62 


   61 



   60 


   47 















   15 













   15 
















    9 


   11 

   10 




    8 









    5 





























  131 

    6 

  133 







  135 
    8 

  127 












  135 

    6 

    4 


    3 

    2 
    2 









  130 









  129 
   28 

   28 

   22 


    6 


  102 
  102 
   88 



  129 




    4 
    4 










  124 







  100 
   14 
   13 

   13 

    2 








   12 

  126 
    1 











   15 
   15 







































    2 




    2 



    2 
    2 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
/*        $NetBSD: uvm_mmap.c,v 1.184 2022/07/07 11:29:18 rin Exp $        */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * Copyright (c) 1991, 1993 The Regents of the University of California.
 * Copyright (c) 1988 University of Utah.
 *
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the Systems Programming Group of the University of Utah Computer
 * Science Department.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
 *      @(#)vm_mmap.c   8.5 (Berkeley) 5/19/94
 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
 */

/*
 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
 * function.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.184 2022/07/07 11:29:18 rin Exp $");

#include "opt_compat_netbsd.h"
#include "opt_pax.h"

#include <sys/param.h>
#include <sys/types.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/resourcevar.h>
#include <sys/mman.h>
#include <sys/pax.h>

#include <sys/syscallargs.h>

#include <uvm/uvm.h>
#include <uvm/uvm_device.h>

static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t,
    int, int, struct uvm_object *, voff_t, vsize_t);

static int
range_test(const struct vm_map *map, vaddr_t addr, vsize_t size, bool ismmap)
{
        vaddr_t vm_min_address = vm_map_min(map);
        vaddr_t vm_max_address = vm_map_max(map);
        vaddr_t eaddr = addr + size;
        int res = 0;

        if (addr < vm_min_address)
                return EINVAL;
        if (eaddr > vm_max_address)
                return ismmap ? EFBIG : EINVAL;
        if (addr > eaddr) /* no wrapping! */
                return ismmap ? EOVERFLOW : EINVAL;

#ifdef MD_MMAP_RANGE_TEST
        res = MD_MMAP_RANGE_TEST(addr, eaddr);
#endif

        return res;
}

/*
 * align the address to a page boundary, and adjust the size accordingly
 */
static int
round_and_check(const struct vm_map *map, vaddr_t *addr, vsize_t *size)
{
        const vsize_t pageoff = (vsize_t)(*addr & PAGE_MASK);

        *addr -= pageoff;

        if (*size != 0) {
                *size += pageoff;
                *size = (vsize_t)round_page(*size);
        } else if (*addr + *size < *addr) {
                return ENOMEM;
        }

        return range_test(map, *addr, *size, false);
}

/*
 * sys_mincore: determine if pages are in core or not.
 */

/* ARGSUSED */
int
sys_mincore(struct lwp *l, const struct sys_mincore_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(void *) addr;
                syscallarg(size_t) len;
                syscallarg(char *) vec;
        } */
        struct proc *p = l->l_proc;
        struct vm_page *pg;
        char *vec, pgi;
        struct uvm_object *uobj;
        struct vm_amap *amap;
        struct vm_anon *anon;
        struct vm_map_entry *entry;
        vaddr_t start, end, lim;
        struct vm_map *map;
        vsize_t len;
        int error = 0;
        size_t npgs;

        map = &p->p_vmspace->vm_map;

        start = (vaddr_t)SCARG(uap, addr);
        len = SCARG(uap, len);
        vec = SCARG(uap, vec);

        if (start & PAGE_MASK)
                return EINVAL;
        len = round_page(len);
        end = start + len;
        if (end <= start)
                return EINVAL;

        /*
         * Lock down vec, so our returned status isn't outdated by
         * storing the status byte for a page.
         */

        npgs = len >> PAGE_SHIFT;
        error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE);
        if (error) {
                return error;
        }
        vm_map_lock_read(map);

        if (uvm_map_lookup_entry(map, start, &entry) == false) {
                error = ENOMEM;
                goto out;
        }

        for (/* nothing */;
             entry != &map->header && entry->start < end;
             entry = entry->next) {
                KASSERT(!UVM_ET_ISSUBMAP(entry));
                KASSERT(start >= entry->start);

                /* Make sure there are no holes. */
                if (entry->end < end &&
                     (entry->next == &map->header ||
                      entry->next->start > entry->end)) {
                        error = ENOMEM;
                        goto out;
                }

                lim = end < entry->end ? end : entry->end;

                /*
                 * Special case for objects with no "real" pages.  Those
                 * are always considered resident (mapped devices).
                 */

                if (UVM_ET_ISOBJ(entry)) {
                        KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
                        if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
                                for (/* nothing */; start < lim;
                                     start += PAGE_SIZE, vec++)
                                        ustore_char(vec, 1);
                                continue;
                        }
                }

                amap = entry->aref.ar_amap;        /* upper layer */
                uobj = entry->object.uvm_obj;        /* lower layer */

                if (amap != NULL)
                        amap_lock(amap, RW_READER);
                if (uobj != NULL)
                        rw_enter(uobj->vmobjlock, RW_READER);

                for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
                        pgi = 0;
                        if (amap != NULL) {
                                /* Check the upper layer first. */
                                anon = amap_lookup(&entry->aref,
                                    start - entry->start);
                                /* Don't need to lock anon here. */
                                if (anon != NULL && anon->an_page != NULL) {

                                        /*
                                         * Anon has the page for this entry
                                         * offset.
                                         */

                                        pgi = 1;
                                }
                        }
                        if (uobj != NULL && pgi == 0) {
                                /* Check the lower layer. */
                                pg = uvm_pagelookup(uobj,
                                    entry->offset + (start - entry->start));
                                if (pg != NULL) {

                                        /*
                                         * Object has the page for this entry
                                         * offset.
                                         */

                                        pgi = 1;
                                }
                        }
                        (void) ustore_char(vec, pgi);
                }
                if (uobj != NULL)
                        rw_exit(uobj->vmobjlock);
                if (amap != NULL)
                        amap_unlock(amap);
        }

 out:
        vm_map_unlock_read(map);
        uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs);
        return error;
}

/*
 * sys_mmap: mmap system call.
 *
 * => file offset and address may not be page aligned
 *    - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
 *    - if address isn't page aligned the mapping starts at trunc_page(addr)
 *      and the return value is adjusted up by the page offset.
 */

int
sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval)
{
        /* {
                syscallarg(void *) addr;
                syscallarg(size_t) len;
                syscallarg(int) prot;
                syscallarg(int) flags;
                syscallarg(int) fd;
                syscallarg(long) pad;
                syscallarg(off_t) pos;
        } */
        struct proc *p = l->l_proc;
        vaddr_t addr;
        off_t pos;
        vsize_t size, pageoff;
        vm_prot_t prot, maxprot, extraprot;
        int flags, fd, advice;
        vaddr_t defaddr = 0;        /* XXXGCC */
        bool addrhint = false;
        struct file *fp = NULL;
        struct uvm_object *uobj;
        int error;
#ifdef PAX_ASLR
        vaddr_t orig_addr;
#endif /* PAX_ASLR */

        /*
         * first, extract syscall args from the uap.
         */

        addr = (vaddr_t)SCARG(uap, addr);
        size = (vsize_t)SCARG(uap, len);
        prot = SCARG(uap, prot) & VM_PROT_ALL;
        extraprot = PROT_MPROTECT_EXTRACT(SCARG(uap, prot));
        flags = SCARG(uap, flags);
        fd = SCARG(uap, fd);
        pos = SCARG(uap, pos);

#ifdef PAX_ASLR
        orig_addr = addr;
#endif /* PAX_ASLR */

        if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
                return EINVAL;

        if (size == 0 && (flags & MAP_ANON) == 0)
                return EINVAL;

        /*
         * Align file position and save offset into page.  Adjust size
         * so that it is an integral multiple of the page size.
         */
        pageoff = pos & PAGE_MASK;
        pos -= pageoff;
        KASSERT(PAGE_MASK <= __type_max(vsize_t));
        KASSERT((__type_max(vsize_t) - PAGE_SIZE + 1) % PAGE_SIZE == 0);
        if (size > __type_max(vsize_t) - PAGE_SIZE + 1 - pageoff)
                return ENOMEM;
        /*
         * size + pageoff <= VSIZE_MAX + 1 - PAGE_SIZE, and the
         * right-hand side is an integral multiple of the page size, so
         * round_page(size + pageoff) <= VSIZE_MAX + 1 - PAGE_SIZE.
         */
        size = round_page(size + pageoff);

        /*
         * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
         */
        if (flags & MAP_FIXED) {
                /* ensure address and file offset are aligned properly */
                addr -= pageoff;
                if (addr & PAGE_MASK)
                        return EINVAL;

                error = range_test(&p->p_vmspace->vm_map, addr, size, true);
                if (error) {
                        return error;
                }
        } else if (addr == 0 || !(flags & MAP_TRYFIXED)) {
                /*
                 * not fixed: make sure we skip over the largest
                 * possible heap for non-topdown mapping arrangements.
                 * we will refine our guess later (e.g. to account for
                 * VAC, etc)
                 */

                defaddr = p->p_emul->e_vm_default_addr(p,
                    (vaddr_t)p->p_vmspace->vm_daddr, size,
                    p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);

                if (addr == 0 || !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN))
                        addr = MAX(addr, defaddr);
                else
                        addr = MIN(addr, defaddr);

                /*
                 * If addr is nonzero and not the default, then the
                 * address is a hint.
                 */
                addrhint = (addr != 0 && addr != defaddr);
        }

        /*
         * check for file mappings (i.e. not anonymous) and verify file.
         */

        advice = UVM_ADV_NORMAL;
        if ((flags & MAP_ANON) == 0) {
                KASSERT(size != 0);

                if ((fp = fd_getfile(fd)) == NULL)
                        return EBADF;

                if (fp->f_ops->fo_mmap == NULL) {
                        error = ENODEV;
                        goto out;
                }
                error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags,
                    &advice, &uobj, &maxprot);
                if (error) {
                        goto out;
                }
                if (uobj == NULL) {
                        flags |= MAP_ANON;
                        fd_putfile(fd);
                        fp = NULL;
                        goto is_anon;
                }
        } else {                /* MAP_ANON case */
                /*
                 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
                 */
                if (fd != -1)
                        return EINVAL;

 is_anon:                /* label for SunOS style /dev/zero */
                uobj = NULL;
                maxprot = VM_PROT_ALL;
                pos = 0;
        }

        maxprot = PAX_MPROTECT_MAXPROTECT(l, prot, extraprot, maxprot);
        if (((prot | extraprot) & maxprot) != (prot | extraprot)) {
                error = EACCES;
                goto out;
        }
        if ((error = PAX_MPROTECT_VALIDATE(l, prot)))
                goto out;

        pax_aslr_mmap(l, &addr, orig_addr, flags);

        /*
         * Now let kernel internal function uvm_mmap do the work.
         *
         * If the user provided a hint, take a reference to uobj in
         * case the first attempt to satisfy the hint fails, so we can
         * try again with the default address.
         */
        if (addrhint) {
                if (uobj)
                        (*uobj->pgops->pgo_reference)(uobj);
        }
        error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
            flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
        if (addrhint) {
                if (error) {
                        addr = defaddr;
                        pax_aslr_mmap(l, &addr, orig_addr, flags);
                        error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size,
                            prot, maxprot, flags, advice, uobj, pos,
                            p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
                } else if (uobj) {
                        /* Release the exta reference we took.  */
                        (*uobj->pgops->pgo_detach)(uobj);
                }
        }

        /* remember to add offset */
        *retval = (register_t)(addr + pageoff);

 out:
        if (fp != NULL)
                fd_putfile(fd);

        return error;
}

/*
 * sys___msync13: the msync system call (a front-end for flush)
 */

int
sys___msync13(struct lwp *l, const struct sys___msync13_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(void *) addr;
                syscallarg(size_t) len;
                syscallarg(int) flags;
        } */
        struct proc *p = l->l_proc;
        vaddr_t addr;
        vsize_t size;
        struct vm_map *map;
        int error, flags, uvmflags;
        bool rv;

        /*
         * extract syscall args from the uap
         */

        addr = (vaddr_t)SCARG(uap, addr);
        size = (vsize_t)SCARG(uap, len);
        flags = SCARG(uap, flags);

        /* sanity check flags */
        if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
            (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
            (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
                return EINVAL;
        if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
                flags |= MS_SYNC;

        /*
         * get map
         */
        map = &p->p_vmspace->vm_map;

        if (round_and_check(map, &addr, &size))
                return ENOMEM;

        /*
         * XXXCDC: do we really need this semantic?
         *
         * XXX Gak!  If size is zero we are supposed to sync "all modified
         * pages with the region containing addr".  Unfortunately, we
         * don't really keep track of individual mmaps so we approximate
         * by flushing the range of the map entry containing addr.
         * This can be incorrect if the region splits or is coalesced
         * with a neighbor.
         */

        if (size == 0) {
                struct vm_map_entry *entry;

                vm_map_lock_read(map);
                rv = uvm_map_lookup_entry(map, addr, &entry);
                if (rv == true) {
                        addr = entry->start;
                        size = entry->end - entry->start;
                }
                vm_map_unlock_read(map);
                if (rv == false)
                        return EINVAL;
        }

        /*
         * translate MS_ flags into PGO_ flags
         */

        uvmflags = PGO_CLEANIT;
        if (flags & MS_INVALIDATE)
                uvmflags |= PGO_FREE;
        if (flags & MS_SYNC)
                uvmflags |= PGO_SYNCIO;

        error = uvm_map_clean(map, addr, addr+size, uvmflags);
        return error;
}

/*
 * sys_munmap: unmap a users memory
 */

int
sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval)
{
        /* {
                syscallarg(void *) addr;
                syscallarg(size_t) len;
        } */
        struct proc *p = l->l_proc;
        vaddr_t addr;
        vsize_t size;
        struct vm_map *map;
        struct vm_map_entry *dead_entries;

        /*
         * get syscall args.
         */

        addr = (vaddr_t)SCARG(uap, addr);
        size = (vsize_t)SCARG(uap, len);

        map = &p->p_vmspace->vm_map;

        if (round_and_check(map, &addr, &size))
                return EINVAL;

        if (size == 0)
                return 0;

        vm_map_lock(map);
#if 0
        /*
         * interesting system call semantic: make sure entire range is
         * allocated before allowing an unmap.
         */
        if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
                vm_map_unlock(map);
                return EINVAL;
        }
#endif
        uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0);
        vm_map_unlock(map);
        if (dead_entries != NULL)
                uvm_unmap_detach(dead_entries, 0);
        return 0;
}

/*
 * sys_mprotect: the mprotect system call
 */

int
sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(void *) addr;
                syscallarg(size_t) len;
                syscallarg(int) prot;
        } */
        struct proc *p = l->l_proc;
        vaddr_t addr;
        vsize_t size;
        vm_prot_t prot;
        int error;

        /*
         * extract syscall args from uap
         */

        addr = (vaddr_t)SCARG(uap, addr);
        size = (vsize_t)SCARG(uap, len);
        prot = SCARG(uap, prot) & VM_PROT_ALL;

        if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
                return EINVAL;

        error = uvm_map_protect_user(l, addr, addr + size, prot);
        return error;
}

/*
 * sys_minherit: the minherit system call
 */

int
sys_minherit(struct lwp *l, const struct sys_minherit_args *uap,
   register_t *retval)
{
        /* {
                syscallarg(void *) addr;
                syscallarg(int) len;
                syscallarg(int) inherit;
        } */
        struct proc *p = l->l_proc;
        vaddr_t addr;
        vsize_t size;
        vm_inherit_t inherit;
        int error;

        addr = (vaddr_t)SCARG(uap, addr);
        size = (vsize_t)SCARG(uap, len);
        inherit = SCARG(uap, inherit);

        if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
                return EINVAL;

        error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size,
            inherit);
        return error;
}

/*
 * sys_madvise: give advice about memory usage.
 */

/* ARGSUSED */
int
sys_madvise(struct lwp *l, const struct sys_madvise_args *uap,
   register_t *retval)
{
        /* {
                syscallarg(void *) addr;
                syscallarg(size_t) len;
                syscallarg(int) behav;
        } */
        struct proc *p = l->l_proc;
        vaddr_t addr;
        vsize_t size;
        int advice, error;

        addr = (vaddr_t)SCARG(uap, addr);
        size = (vsize_t)SCARG(uap, len);
        advice = SCARG(uap, behav);

        if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
                return EINVAL;

        switch (advice) {
        case MADV_NORMAL:
        case MADV_RANDOM:
        case MADV_SEQUENTIAL:
                error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
                    advice);
                break;

        case MADV_WILLNEED:

                /*
                 * Activate all these pages, pre-faulting them in if
                 * necessary.
                 */
                error = uvm_map_willneed(&p->p_vmspace->vm_map,
                    addr, addr + size);
                break;

        case MADV_DONTNEED:

                /*
                 * Deactivate all these pages.  We don't need them
                 * any more.  We don't, however, toss the data in
                 * the pages.
                 */

                error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
                    PGO_DEACTIVATE);
                break;

        case MADV_FREE:

                /*
                 * These pages contain no valid data, and may be
                 * garbage-collected.  Toss all resources, including
                 * any swap space in use.
                 */

                error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
                    PGO_FREE);
                break;

        case MADV_SPACEAVAIL:

                /*
                 * XXXMRG What is this?  I think it's:
                 *
                 *        Ensure that we have allocated backing-store
                 *        for these pages.
                 *
                 * This is going to require changes to the page daemon,
                 * as it will free swap space allocated to pages in core.
                 * There's also what to do for device/file/anonymous memory.
                 */

                return EINVAL;

        default:
                return EINVAL;
        }

        return error;
}

/*
 * sys_mlock: memory lock
 */

int
sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval)
{
        /* {
                syscallarg(const void *) addr;
                syscallarg(size_t) len;
        } */
        struct proc *p = l->l_proc;
        vaddr_t addr;
        vsize_t size;
        int error;

        /*
         * extract syscall args from uap
         */

        addr = (vaddr_t)SCARG(uap, addr);
        size = (vsize_t)SCARG(uap, len);

        if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
                return ENOMEM;

        if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
                return EAGAIN;

        if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
            p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
                return EAGAIN;

        error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false,
            0);
        if (error == EFAULT)
                error = ENOMEM;
        return error;
}

/*
 * sys_munlock: unlock wired pages
 */

int
sys_munlock(struct lwp *l, const struct sys_munlock_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const void *) addr;
                syscallarg(size_t) len;
        } */
        struct proc *p = l->l_proc;
        vaddr_t addr;
        vsize_t size;

        /*
         * extract syscall args from uap
         */

        addr = (vaddr_t)SCARG(uap, addr);
        size = (vsize_t)SCARG(uap, len);

        if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
                return ENOMEM;

        if (uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true, 0))
                return ENOMEM;

        return 0;
}

/*
 * sys_mlockall: lock all pages mapped into an address space.
 */

int
sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) flags;
        } */
        struct proc *p = l->l_proc;
        int error, flags;

        flags = SCARG(uap, flags);

        if (flags == 0 || (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
                return EINVAL;

        error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
            p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
        return error;
}

/*
 * sys_munlockall: unlock all pages mapped into an address space.
 */

int
sys_munlockall(struct lwp *l, const void *v, register_t *retval)
{
        struct proc *p = l->l_proc;

        (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
        return 0;
}

/*
 * uvm_mmap: internal version of mmap
 *
 * - used by sys_mmap and various framebuffers
 * - uobj is a struct uvm_object pointer or NULL for MAP_ANON
 * - caller must page-align the file offset
 *
 * XXX This appears to leak the uobj in various error branches?  Need
 * to clean up the contract around uobj reference.
 */

static int
uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
    vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj,
    voff_t foff, vsize_t locklimit)
{
        vaddr_t align = 0;
        int error;
        uvm_flag_t uvmflag = 0;

        /*
         * check params
         */

        if (size == 0)
                return 0;
        if (foff & PAGE_MASK)
                return EINVAL;
        if ((prot & maxprot) != prot)
                return EINVAL;

        /*
         * for non-fixed mappings, round off the suggested address.
         * for fixed mappings, check alignment.
         */

        if ((flags & MAP_FIXED) == 0) {
                *addr = round_page(*addr);
        } else {
                if (*addr & PAGE_MASK)
                        return EINVAL;
                uvmflag |= UVM_FLAG_FIXED | UVM_FLAG_UNMAP;
        }

        /*
         * Try to see if any requested alignment can even be attemped.
         * Make sure we can express the alignment (asking for a >= 4GB
         * alignment on an ILP32 architecure make no sense) and the
         * alignment is at least for a page sized quanitiy.  If the
         * request was for a fixed mapping, make sure supplied address
         * adheres to the request alignment.
         */
        align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT;
        if (align) {
                if (align >= sizeof(vaddr_t) * NBBY)
                        return EINVAL;
                align = 1UL << align;
                if (align < PAGE_SIZE)
                        return EINVAL;
                if (align >= vm_map_max(map))
                        return ENOMEM;
                if (flags & MAP_FIXED) {
                        if ((*addr & (align-1)) != 0)
                                return EINVAL;
                        align = 0;
                }
        }

        /*
         * check resource limits
         */

        if (!VM_MAP_IS_KERNEL(map) &&
            (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) >
            curproc->p_rlimit[RLIMIT_AS].rlim_cur))
                return ENOMEM;

        /*
         * handle anon vs. non-anon mappings.   for non-anon mappings attach
         * to underlying vm object.
         */

        if (flags & MAP_ANON) {
                KASSERT(uobj == NULL);
                foff = UVM_UNKNOWN_OFFSET;
                if ((flags & MAP_SHARED) == 0)
                        /* XXX: defer amap create */
                        uvmflag |= UVM_FLAG_COPYONW;
                else
                        /* shared: create amap now */
                        uvmflag |= UVM_FLAG_OVERLAY;

        } else {
                KASSERT(uobj != NULL);
                if ((flags & MAP_SHARED) == 0) {
                        uvmflag |= UVM_FLAG_COPYONW;
                }
        }

        uvmflag = UVM_MAPFLAG(prot, maxprot,
            (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice,
            uvmflag);
        error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
        if (error) {
                if (uobj)
                        uobj->pgops->pgo_detach(uobj);
                return error;
        }

        /*
         * POSIX 1003.1b -- if our address space was configured
         * to lock all future mappings, wire the one we just made.
         *
         * Also handle the MAP_WIRED flag here.
         */

        if (prot == VM_PROT_NONE) {

                /*
                 * No more work to do in this case.
                 */

                return 0;
        }
        if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) {
                vm_map_lock(map);
                if (atop(size) + uvmexp.wired > uvmexp.wiredmax ||
                    (locklimit != 0 &&
                     size + ptoa(pmap_wired_count(vm_map_pmap(map))) >
                     locklimit)) {
                        vm_map_unlock(map);
                        uvm_unmap(map, *addr, *addr + size);
                        return ENOMEM;
                }

                /*
                 * uvm_map_pageable() always returns the map unlocked.
                 */

                error = uvm_map_pageable(map, *addr, *addr + size,
                    false, UVM_LK_ENTER);
                if (error) {
                        uvm_unmap(map, *addr, *addr + size);
                        return error;
                }
                return 0;
        }
        return 0;
}

vaddr_t
uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown)
{

        if (topdown)
                return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz);
        else
                return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz);
}

int
uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev,
    off_t off)
{
        struct uvm_object *uobj;
        int error, flags, prot;

        KASSERT(len > 0);

        flags = MAP_SHARED;
        prot = VM_PROT_READ | VM_PROT_WRITE;
        if (*addrp)
                flags |= MAP_FIXED;
        else
                *addrp = (void *)p->p_emul->e_vm_default_addr(p,
                    (vaddr_t)p->p_vmspace->vm_daddr, len,
                    p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);

        uobj = udv_attach(dev, prot, off, len);
        if (uobj == NULL)
                return EINVAL;

        error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
            (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM, uobj, off,
            p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
        return error;
}

int
uvm_mmap_anon(struct proc *p, void **addrp, size_t len)
{
        int error, flags, prot;

        flags = MAP_PRIVATE | MAP_ANON;
        prot = VM_PROT_READ | VM_PROT_WRITE;
        if (*addrp)
                flags |= MAP_FIXED;
        else
                *addrp = (void *)p->p_emul->e_vm_default_addr(p,
                    (vaddr_t)p->p_vmspace->vm_daddr, len,
                    p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);

        error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
            (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL, NULL, 0,
            p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
        return error;
}




































































































































































































































































































































































































































































































































































































































   14 



   14 

   14 



   12 
    9 
    9 





    5 




















    5 
















    5 
   12 














































































































































































































































































































































































































































































































































   14 



   14 





   14 


















































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
/*        $NetBSD: if_arp.c,v 1.307 2021/02/19 14:51:59 christos Exp $        */

/*
 * Copyright (c) 1998, 2000, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Public Access Networks Corporation ("Panix").  It was developed under
 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)if_ether.c        8.2 (Berkeley) 9/26/94
 */

/*
 * Ethernet address resolution protocol.
 * TODO:
 *        add "inuse/lock" bit (or ref. count) along with valid bit
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_arp.c,v 1.307 2021/02/19 14:51:59 christos Exp $");

#ifdef _KERNEL_OPT
#include "opt_ddb.h"
#include "opt_inet.h"
#include "opt_net_mpsafe.h"
#endif

#ifdef INET

#include "arp.h"
#include "bridge.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/kmem.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/timetc.h>
#include <sys/kernel.h>
#include <sys/errno.h>
#include <sys/ioctl.h>
#include <sys/syslog.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/domain.h>
#include <sys/sysctl.h>
#include <sys/socketvar.h>
#include <sys/percpu.h>
#include <sys/cprng.h>
#include <sys/kmem.h>

#include <net/ethertypes.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/if_ether.h>
#include <net/if_llatbl.h>
#include <net/nd.h>
#include <net/route.h>
#include <net/net_stats.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/if_inarp.h>

#include "arcnet.h"
#if NARCNET > 0
#include <net/if_arc.h>
#endif
#include "carp.h"
#if NCARP > 0
#include <netinet/ip_carp.h>
#endif

/*
 * ARP trailer negotiation.  Trailer protocol is not IP specific,
 * but ARP request/response use IP addresses.
 */
#define ETHERTYPE_IPTRAILERS ETHERTYPE_TRAIL

/* timers */
static int arp_reachable = REACHABLE_TIME;
static int arp_retrans = RETRANS_TIMER;
static int arp_perform_nud = 1;

static bool arp_nud_enabled(struct ifnet *);
static unsigned int arp_llinfo_reachable(struct ifnet *);
static unsigned int arp_llinfo_retrans(struct ifnet *);
static union l3addr *arp_llinfo_holdsrc(struct llentry *, union l3addr *);
static void arp_llinfo_output(struct ifnet *, const union l3addr *,
    const union l3addr *, const uint8_t *, const union l3addr *);
static void arp_llinfo_missed(struct ifnet *, const union l3addr *,
    int16_t, struct mbuf *);
static void arp_free(struct llentry *, int);

static struct nd_domain arp_nd_domain = {
        .nd_family = AF_INET,
        .nd_delay = 5,                /* delay first probe time 5 second */
        .nd_mmaxtries = 3,        /* maximum broadcast query */
        .nd_umaxtries = 3,        /* maximum unicast query */
        .nd_retransmultiple = BACKOFF_MULTIPLE,
        .nd_maxretrans = MAX_RETRANS_TIMER,
        .nd_maxnudhint = 0,        /* max # of subsequent upper layer hints */
        .nd_maxqueuelen = 1,        /* max # of packets in unresolved ND entries */
        .nd_nud_enabled = arp_nud_enabled,
        .nd_reachable = arp_llinfo_reachable,
        .nd_retrans = arp_llinfo_retrans,
        .nd_holdsrc = arp_llinfo_holdsrc,
        .nd_output = arp_llinfo_output,
        .nd_missed = arp_llinfo_missed,
        .nd_free = arp_free,
};

int ip_dad_count = PROBE_NUM;
#ifdef ARP_DEBUG
int arp_debug = 1;
#else
int arp_debug = 0;
#endif

static void arp_init(void);
static void arp_dad_init(void);

static void arprequest(struct ifnet *,
    const struct in_addr *, const struct in_addr *,
    const uint8_t *, const uint8_t *);
static void arpannounce1(struct ifaddr *);
static struct sockaddr *arp_setgate(struct rtentry *, struct sockaddr *,
    const struct sockaddr *);
static struct llentry *arpcreate(struct ifnet *,
    const struct in_addr *, const struct sockaddr *, int);
static void in_arpinput(struct mbuf *);
static void in_revarpinput(struct mbuf *);
static void revarprequest(struct ifnet *);

static void arp_drainstub(void);

struct dadq;
static void arp_dad_timer(struct dadq *);
static void arp_dad_start(struct ifaddr *);
static void arp_dad_stop(struct ifaddr *);
static void arp_dad_duplicated(struct ifaddr *, const struct sockaddr_dl *);

struct ifqueue arpintrq = {
        .ifq_head = NULL,
        .ifq_tail = NULL,
        .ifq_len = 0,
        .ifq_maxlen = 50,
        .ifq_drops = 0,
};
static int useloopback = 1;        /* use loopback interface for local traffic */

static percpu_t *arpstat_percpu;

#define        ARP_STAT_GETREF()        _NET_STAT_GETREF(arpstat_percpu)
#define        ARP_STAT_PUTREF()        _NET_STAT_PUTREF(arpstat_percpu)

#define        ARP_STATINC(x)                _NET_STATINC(arpstat_percpu, x)
#define        ARP_STATADD(x, v)        _NET_STATADD(arpstat_percpu, x, v)

/* revarp state */
static struct in_addr myip, srv_ip;
static int myip_initialized = 0;
static int revarp_in_progress = 0;
static struct ifnet *myip_ifp = NULL;

static int arp_drainwanted;

static int log_movements = 0;
static int log_permanent_modify = 1;
static int log_wrong_iface = 1;

DOMAIN_DEFINE(arpdomain);        /* forward declare and add to link set */

static void
arp_fasttimo(void)
{
        if (arp_drainwanted) {
                arp_drain();
                arp_drainwanted = 0;
        }
}

static const struct protosw arpsw[] = {
        {
                .pr_type = 0,
                .pr_domain = &arpdomain,
                .pr_protocol = 0,
                .pr_flags = 0,
                .pr_input = 0,
                .pr_ctlinput = 0,
                .pr_ctloutput = 0,
                .pr_usrreqs = 0,
                .pr_init = arp_init,
                .pr_fasttimo = arp_fasttimo,
                .pr_slowtimo = 0,
                .pr_drain = arp_drainstub,
        }
};

struct domain arpdomain = {
        .dom_family = PF_ARP,
        .dom_name = "arp",
        .dom_protosw = arpsw,
        .dom_protoswNPROTOSW = &arpsw[__arraycount(arpsw)],
#ifdef MBUFTRACE
        .dom_mowner = MOWNER_INIT("internet", "arp"),
#endif
};

static void sysctl_net_inet_arp_setup(struct sysctllog **);

void
arp_init(void)
{

        sysctl_net_inet_arp_setup(NULL);
        arpstat_percpu = percpu_alloc(sizeof(uint64_t) * ARP_NSTATS);
        IFQ_LOCK_INIT(&arpintrq);

#ifdef MBUFTRACE
        MOWNER_ATTACH(&arpdomain.dom_mowner);
#endif

        nd_attach_domain(&arp_nd_domain);
        arp_dad_init();
}

static void
arp_drainstub(void)
{
        arp_drainwanted = 1;
}

/*
 * ARP protocol drain routine.  Called when memory is in short supply.
 * Called at splvm();  don't acquire softnet_lock as can be called from
 * hardware interrupt handlers.
 */
void
arp_drain(void)
{

        lltable_drain(AF_INET);
}

/*
 * We set the gateway for RTF_CLONING routes to a "prototype"
 * link-layer sockaddr whose interface type (if_type) and interface
 * index (if_index) fields are prepared.
 */
static struct sockaddr *
arp_setgate(struct rtentry *rt, struct sockaddr *gate,
    const struct sockaddr *netmask)
{
        const struct ifnet *ifp = rt->rt_ifp;
        uint8_t namelen = strlen(ifp->if_xname);
        uint8_t addrlen = ifp->if_addrlen;

        /*
         * XXX: If this is a manually added route to interface
         * such as older version of routed or gated might provide,
         * restore cloning bit.
         */
        if ((rt->rt_flags & RTF_HOST) == 0 && netmask != NULL &&
            satocsin(netmask)->sin_addr.s_addr != 0xffffffff)
                rt->rt_flags |= RTF_CONNECTED;

        if ((rt->rt_flags & (RTF_CONNECTED | RTF_LOCAL))) {
                union {
                        struct sockaddr sa;
                        struct sockaddr_storage ss;
                        struct sockaddr_dl sdl;
                } u;
                /*
                 * Case 1: This route should come from a route to iface.
                 */
                sockaddr_dl_init(&u.sdl, sizeof(u.ss),
                    ifp->if_index, ifp->if_type, NULL, namelen, NULL, addrlen);
                rt_setgate(rt, &u.sa);
                gate = rt->rt_gateway;
        }
        return gate;
}

/*
 * Parallel to llc_rtrequest.
 */
void
arp_rtrequest(int req, struct rtentry *rt, const struct rt_addrinfo *info)
{
        struct sockaddr *gate = rt->rt_gateway;
        struct in_ifaddr *ia;
        struct ifaddr *ifa;
        struct ifnet *ifp = rt->rt_ifp;
        int bound;
        int s;

        if (req == RTM_LLINFO_UPD) {
                if ((ifa = info->rti_ifa) != NULL)
                        arpannounce1(ifa);
                return;
        }

        if ((rt->rt_flags & RTF_GATEWAY) != 0) {
                if (req != RTM_ADD)
                        return;

                /*
                 * linklayers with particular link MTU limitation.
                 */
                switch(ifp->if_type) {
#if NARCNET > 0
                case IFT_ARCNET:
                    {
                        int arcipifmtu;

                        if (ifp->if_flags & IFF_LINK0)
                                arcipifmtu = arc_ipmtu;
                        else
                                arcipifmtu = ARCMTU;
                        if (ifp->if_mtu > arcipifmtu)
                                rt->rt_rmx.rmx_mtu = arcipifmtu;
                        break;
                    }
#endif
                }
                return;
        }

        switch (req) {
        case RTM_SETGATE:
                gate = arp_setgate(rt, gate, info->rti_info[RTAX_NETMASK]);
                break;
        case RTM_ADD:
                gate = arp_setgate(rt, gate, info->rti_info[RTAX_NETMASK]);
                if (gate == NULL) {
                        log(LOG_ERR, "%s: arp_setgate failed\n", __func__);
                        break;
                }
                if ((rt->rt_flags & RTF_CONNECTED) ||
                    (rt->rt_flags & RTF_LOCAL)) {
                        /*
                         * linklayers with particular link MTU limitation.
                         */
                        switch (ifp->if_type) {
#if NARCNET > 0
                        case IFT_ARCNET:
                            {
                                int arcipifmtu;
                                if (ifp->if_flags & IFF_LINK0)
                                        arcipifmtu = arc_ipmtu;
                                else
                                        arcipifmtu = ARCMTU;

                                if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0 &&
                                    (rt->rt_rmx.rmx_mtu > arcipifmtu ||
                                     (rt->rt_rmx.rmx_mtu == 0 &&
                                      ifp->if_mtu > arcipifmtu)))
                                        rt->rt_rmx.rmx_mtu = arcipifmtu;
                                break;
                            }
#endif
                        }
                        if (rt->rt_flags & RTF_CONNECTED)
                                break;
                }

                bound = curlwp_bind();
                /* Announce a new entry if requested. */
                if (rt->rt_flags & RTF_ANNOUNCE) {
                        struct psref psref;
                        ia = in_get_ia_on_iface_psref(
                            satocsin(rt_getkey(rt))->sin_addr, ifp, &psref);
                        if (ia != NULL) {
                                arpannounce(ifp, &ia->ia_ifa,
                                    CLLADDR(satocsdl(gate)));
                                ia4_release(ia, &psref);
                        }
                }

                if (gate->sa_family != AF_LINK ||
                    gate->sa_len < sockaddr_dl_measure(0, ifp->if_addrlen)) {
                        log(LOG_DEBUG, "%s: bad gateway value\n", __func__);
                        goto out;
                }

                satosdl(gate)->sdl_type = ifp->if_type;
                satosdl(gate)->sdl_index = ifp->if_index;

                /*
                 * If the route is for a broadcast address mark it as such.
                 * This way we can avoid an expensive call to in_broadcast()
                 * in ip_output() most of the time (because the route passed
                 * to ip_output() is almost always a host route).
                 */
                if (rt->rt_flags & RTF_HOST &&
                    !(rt->rt_flags & RTF_BROADCAST) &&
                    in_broadcast(satocsin(rt_getkey(rt))->sin_addr, rt->rt_ifp))
                        rt->rt_flags |= RTF_BROADCAST;
                /* There is little point in resolving the broadcast address */
                if (rt->rt_flags & RTF_BROADCAST)
                        goto out;

                /*
                 * When called from rt_ifa_addlocal, we cannot depend on that
                 * the address (rt_getkey(rt)) exits in the address list of the
                 * interface. So check RTF_LOCAL instead.
                 */
                if (rt->rt_flags & RTF_LOCAL) {
                        if (useloopback) {
                                rt->rt_ifp = lo0ifp;
                                rt->rt_rmx.rmx_mtu = 0;
                        }
                        goto out;
                }

                s = pserialize_read_enter();
                ia = in_get_ia_on_iface(satocsin(rt_getkey(rt))->sin_addr, ifp);
                if (ia == NULL) {
                        pserialize_read_exit(s);
                        goto out;
                }

                if (useloopback) {
                        rt->rt_ifp = lo0ifp;
                        rt->rt_rmx.rmx_mtu = 0;
                }
                rt->rt_flags |= RTF_LOCAL;

                if (ISSET(info->rti_flags, RTF_DONTCHANGEIFA)) {
                        pserialize_read_exit(s);
                        goto out;
                }
                /*
                 * make sure to set rt->rt_ifa to the interface
                 * address we are using, otherwise we will have trouble
                 * with source address selection.
                 */
                ifa = &ia->ia_ifa;
                if (ifa != rt->rt_ifa)
                        /* Assume it doesn't sleep */
                        rt_replace_ifa(rt, ifa);
                pserialize_read_exit(s);
        out:
                curlwp_bindx(bound);
                break;
        }
}

/*
 * Broadcast an ARP request. Caller specifies:
 *        - arp header source ip address
 *        - arp header target ip address
 *        - arp header source ethernet address
 */
static void
arprequest(struct ifnet *ifp,
    const struct in_addr *sip, const struct in_addr *tip,
    const uint8_t *saddr, const uint8_t *taddr)
{
        struct mbuf *m;
        struct arphdr *ah;
        struct sockaddr sa;
        uint64_t *arps;

        KASSERT(sip != NULL);
        KASSERT(tip != NULL);
        KASSERT(saddr != NULL);

        if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
                return;
        MCLAIM(m, &arpdomain.dom_mowner);
        switch (ifp->if_type) {
        case IFT_IEEE1394:
                m->m_len = sizeof(*ah) + 2 * sizeof(struct in_addr) +
                    ifp->if_addrlen;
                break;
        default:
                m->m_len = sizeof(*ah) + 2 * sizeof(struct in_addr) +
                    2 * ifp->if_addrlen;
                break;
        }
        m->m_pkthdr.len = m->m_len;
        m_align(m, m->m_len);
        ah = mtod(m, struct arphdr *);
        memset(ah, 0, m->m_len);
        switch (ifp->if_type) {
        case IFT_IEEE1394:        /* RFC2734 */
                /* fill it now for ar_tpa computation */
                ah->ar_hrd = htons(ARPHRD_IEEE1394);
                break;
        default:
                /* ifp->if_output will fill ar_hrd */
                break;
        }
        ah->ar_pro = htons(ETHERTYPE_IP);
        ah->ar_hln = ifp->if_addrlen;                /* hardware address length */
        ah->ar_pln = sizeof(struct in_addr);        /* protocol address length */
        ah->ar_op = htons(ARPOP_REQUEST);
        memcpy(ar_sha(ah), saddr, ah->ar_hln);
        if (taddr == NULL)
                m->m_flags |= M_BCAST;
        else
                memcpy(ar_tha(ah), taddr, ah->ar_hln);
        memcpy(ar_spa(ah), sip, ah->ar_pln);
        memcpy(ar_tpa(ah), tip, ah->ar_pln);
        sa.sa_family = AF_ARP;
        sa.sa_len = 2;
        arps = ARP_STAT_GETREF();
        arps[ARP_STAT_SNDTOTAL]++;
        arps[ARP_STAT_SENDREQUEST]++;
        ARP_STAT_PUTREF();
        if_output_lock(ifp, ifp, m, &sa, NULL);
}

void
arpannounce(struct ifnet *ifp, struct ifaddr *ifa, const uint8_t *enaddr)
{
        struct in_ifaddr *ia = ifatoia(ifa);
        struct in_addr *ip = &IA_SIN(ifa)->sin_addr;

        if (ia->ia4_flags & (IN_IFF_NOTREADY | IN_IFF_DETACHED)) {
                ARPLOG(LOG_DEBUG, "%s not ready\n", ARPLOGADDR(ip));
                return;
        }
        arprequest(ifp, ip, ip, enaddr, NULL);
}

static void
arpannounce1(struct ifaddr *ifa)
{

        arpannounce(ifa->ifa_ifp, ifa, CLLADDR(ifa->ifa_ifp->if_sadl));
}

/*
 * Resolve an IP address into an ethernet address.  If success, desten is
 * filled in. If there is no entry in arptab, set one up and broadcast a
 * request for the IP address. Hold onto this mbuf and resend it once the
 * address is finally resolved.
 *
 * A return value of 0 indicates that desten has been filled in and the packet
 * should be sent normally; a return value of EWOULDBLOCK indicates that the
 * packet has been held pending resolution. Any other value indicates an
 * error.
 */
int
arpresolve(struct ifnet *ifp, const struct rtentry *rt, struct mbuf *m,
    const struct sockaddr *dst, void *desten, size_t destlen)
{
        struct llentry *la;
        const char *create_lookup;
        int error;

#if NCARP > 0
        if (rt != NULL && rt->rt_ifp->if_type == IFT_CARP)
                ifp = rt->rt_ifp;
#endif

        KASSERT(m != NULL);

        la = arplookup(ifp, NULL, dst, 0);
        if (la == NULL)
                goto notfound;

        if (la->la_flags & LLE_VALID && la->ln_state == ND_LLINFO_REACHABLE) {
                KASSERT(destlen >= ifp->if_addrlen);
                memcpy(desten, &la->ll_addr, ifp->if_addrlen);
                LLE_RUNLOCK(la);
                return 0;
        }

notfound:
        if (ifp->if_flags & IFF_NOARP) {
                if (la != NULL)
                        LLE_RUNLOCK(la);
                error = ENOTSUP;
                goto bad;
        }

        if (la == NULL) {
                struct rtentry *_rt;

                create_lookup = "create";
                _rt = rtalloc1(dst, 0);
                IF_AFDATA_WLOCK(ifp);
                la = lla_create(LLTABLE(ifp), LLE_EXCLUSIVE, dst, _rt);
                IF_AFDATA_WUNLOCK(ifp);
                if (_rt != NULL)
                        rt_unref(_rt);
                if (la == NULL)
                        ARP_STATINC(ARP_STAT_ALLOCFAIL);
                else
                        la->ln_state = ND_LLINFO_NOSTATE;
        } else if (LLE_TRY_UPGRADE(la) == 0) {
                create_lookup = "lookup";
                LLE_RUNLOCK(la);
                IF_AFDATA_RLOCK(ifp);
                la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
                IF_AFDATA_RUNLOCK(ifp);
        }

        error = EINVAL;
        if (la == NULL) {
                log(LOG_DEBUG,
                    "%s: failed to %s llentry for %s on %s\n",
                    __func__, create_lookup, inet_ntoa(satocsin(dst)->sin_addr),
                    ifp->if_xname);
                goto bad;
        }

        error = nd_resolve(la, rt, m, desten, destlen);
        return error;

bad:
        m_freem(m);
        return error;
}

/*
 * Common length and type checks are done here,
 * then the protocol-specific routine is called.
 */
void
arpintr(void)
{
        struct mbuf *m;
        struct arphdr *ar;
        int s;
        int arplen;
        struct ifnet *rcvif;
        bool badhrd;

        SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
        for (;;) {

                IFQ_LOCK(&arpintrq);
                IF_DEQUEUE(&arpintrq, m);
                IFQ_UNLOCK(&arpintrq);
                if (m == NULL)
                        goto out;
                if ((m->m_flags & M_PKTHDR) == 0)
                        panic("arpintr");

                MCLAIM(m, &arpdomain.dom_mowner);
                ARP_STATINC(ARP_STAT_RCVTOTAL);

                if (__predict_false(m->m_len < sizeof(*ar))) {
                        if ((m = m_pullup(m, sizeof(*ar))) == NULL)
                                goto badlen;
                }
                ar = mtod(m, struct arphdr *);
                KASSERT(ACCESSIBLE_POINTER(ar, struct arphdr));

                rcvif = m_get_rcvif(m, &s);
                if (__predict_false(rcvif == NULL)) {
                        ARP_STATINC(ARP_STAT_RCVNOINT);
                        goto free;
                }

                /*
                 * We don't want non-IEEE1394 ARP packets on IEEE1394
                 * interfaces, and vice versa. Our life depends on that.
                 */
                if (ntohs(ar->ar_hrd) == ARPHRD_IEEE1394)
                        badhrd = rcvif->if_type != IFT_IEEE1394;
                else
                        badhrd = rcvif->if_type == IFT_IEEE1394;

                m_put_rcvif(rcvif, &s);

                if (badhrd) {
                        ARP_STATINC(ARP_STAT_RCVBADPROTO);
                        goto free;
                }

                arplen = sizeof(*ar) + 2 * ar->ar_hln + 2 * ar->ar_pln;
                if (__predict_false(m->m_len < arplen)) {
                        if ((m = m_pullup(m, arplen)) == NULL)
                                goto badlen;
                        ar = mtod(m, struct arphdr *);
                        KASSERT(ACCESSIBLE_POINTER(ar, struct arphdr));
                }

                switch (ntohs(ar->ar_pro)) {
                case ETHERTYPE_IP:
                case ETHERTYPE_IPTRAILERS:
                        in_arpinput(m);
                        continue;
                default:
                        ARP_STATINC(ARP_STAT_RCVBADPROTO);
                        goto free;
                }

badlen:
                ARP_STATINC(ARP_STAT_RCVBADLEN);
free:
                m_freem(m);
        }

out:
        SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
        return; /* XXX gcc */
}

/*
 * ARP for Internet protocols on 10 Mb/s Ethernet. Algorithm is that given in
 * RFC 826. In addition, a sanity check is performed on the sender protocol
 * address, to catch impersonators.
 *
 * We no longer handle negotiations for use of trailer protocol: formerly, ARP
 * replied for protocol type ETHERTYPE_TRAIL sent along with IP replies if we
 * wanted trailers sent to us, and also sent them in response to IP replies.
 * This allowed either end to announce the desire to receive trailer packets.
 *
 * We no longer reply to requests for ETHERTYPE_TRAIL protocol either, but
 * formerly didn't normally send requests.
 */
static void
in_arpinput(struct mbuf *m)
{
        struct arphdr *ah;
        struct ifnet *ifp, *rcvif = NULL;
        struct llentry *la = NULL;
        struct in_ifaddr *ia = NULL;
#if NBRIDGE > 0
        struct in_ifaddr *bridge_ia = NULL;
#endif
#if NCARP > 0
        uint32_t count = 0, index = 0;
#endif
        struct sockaddr sa;
        struct in_addr isaddr, itaddr, myaddr;
        int op, rt_cmd, new_state = 0;
        void *tha;
        uint64_t *arps;
        struct psref psref, psref_ia;
        int s;
        char ipbuf[INET_ADDRSTRLEN];
        bool find_source, do_dad;

        if (__predict_false(m_makewritable(&m, 0, m->m_pkthdr.len, M_DONTWAIT)))
                goto out;
        ah = mtod(m, struct arphdr *);
        op = ntohs(ah->ar_op);

        if (ah->ar_pln != sizeof(struct in_addr))
                goto out;

        ifp = if_get_bylla(ar_sha(ah), ah->ar_hln, &psref);
        if (ifp) {
                /* it's from me, ignore it. */
                if_put(ifp, &psref);
                ARP_STATINC(ARP_STAT_RCVLOCALSHA);
                goto out;
        }

        rcvif = ifp = m_get_rcvif_psref(m, &psref);
        if (__predict_false(rcvif == NULL))
                goto out;
        if (rcvif->if_flags & IFF_NOARP)
                goto out;

        memcpy(&isaddr, ar_spa(ah), sizeof(isaddr));
        memcpy(&itaddr, ar_tpa(ah), sizeof(itaddr));

        if (m->m_flags & (M_BCAST|M_MCAST))
                ARP_STATINC(ARP_STAT_RCVMCAST);

        /*
         * Search for a matching interface address
         * or any address on the interface to use
         * as a dummy address in the rest of this function.
         *
         * First try and find the source address for early
         * duplicate address detection.
         */
        if (in_nullhost(isaddr)) {
                if (in_nullhost(itaddr)) /* very bogus ARP */
                        goto out;
                find_source = false;
                myaddr = itaddr;
        } else {
                find_source = true;
                myaddr = isaddr;
        }
        s = pserialize_read_enter();
again:
        IN_ADDRHASH_READER_FOREACH(ia, myaddr.s_addr) {
                if (!in_hosteq(ia->ia_addr.sin_addr, myaddr))
                        continue;
#if NCARP > 0
                if (ia->ia_ifp->if_type == IFT_CARP &&
                    ((ia->ia_ifp->if_flags & (IFF_UP|IFF_RUNNING)) ==
                    (IFF_UP|IFF_RUNNING))) {
                        index++;
                        /* XXX: ar_hln? */
                        if (ia->ia_ifp == rcvif && (ah->ar_hln >= 6) &&
                            carp_iamatch(ia, ar_sha(ah),
                            &count, index)) {
                                break;
                        }
                } else
#endif
                if (ia->ia_ifp == rcvif)
                        break;
#if NBRIDGE > 0
                /*
                 * If the interface we received the packet on
                 * is part of a bridge, check to see if we need
                 * to "bridge" the packet to ourselves at this
                 * layer.  Note we still prefer a perfect match,
                 * but allow this weaker match if necessary.
                 */
                if (rcvif->if_bridge != NULL &&
                    rcvif->if_bridge == ia->ia_ifp->if_bridge)
                        bridge_ia = ia;
#endif
        }

#if NBRIDGE > 0
        if (ia == NULL && bridge_ia != NULL) {
                ia = bridge_ia;
                m_put_rcvif_psref(rcvif, &psref);
                rcvif = NULL;
                /* FIXME */
                ifp = bridge_ia->ia_ifp;
        }
#endif

        /* If we failed to find the source address then find
         * the target address. */
        if (ia == NULL && find_source && !in_nullhost(itaddr)) {
                find_source = false;
                myaddr = itaddr;
                goto again;
        }

        if (ia != NULL)
                ia4_acquire(ia, &psref_ia);
        pserialize_read_exit(s);

        if (ah->ar_hln != ifp->if_addrlen) {
                ARP_STATINC(ARP_STAT_RCVBADLEN);
                log(LOG_WARNING,
                    "arp from %s: addr len: new %d, i/f %d (ignored)\n",
                    IN_PRINT(ipbuf, &isaddr), ah->ar_hln, ifp->if_addrlen);
                goto out;
        }

        /* Only do DaD if we have a matching address. */
        do_dad = (ia != NULL);

        if (ia == NULL) {
                ia = in_get_ia_on_iface_psref(isaddr, rcvif, &psref_ia);
                if (ia == NULL) {
                        ia = in_get_ia_from_ifp_psref(ifp, &psref_ia);
                        if (ia == NULL) {
                                ARP_STATINC(ARP_STAT_RCVNOINT);
                                goto out;
                        }
                }
        }

        myaddr = ia->ia_addr.sin_addr;

        /* XXX checks for bridge case? */
        if (!memcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
                ARP_STATINC(ARP_STAT_RCVBCASTSHA);
                log(LOG_ERR,
                    "%s: arp: link address is broadcast for IP address %s!\n",
                    ifp->if_xname, IN_PRINT(ipbuf, &isaddr));
                goto out;
        }

        /*
         * If the source IP address is zero, this is an RFC 5227 ARP probe
         */
        if (in_nullhost(isaddr))
                ARP_STATINC(ARP_STAT_RCVZEROSPA);
        else if (in_hosteq(isaddr, myaddr))
                ARP_STATINC(ARP_STAT_RCVLOCALSPA);

        if (in_nullhost(itaddr))
                ARP_STATINC(ARP_STAT_RCVZEROTPA);

        /*
         * DAD check, RFC 5227.
         * Collision on sender address is always a duplicate.
         * Collision on target address is only a duplicate
         * IF the sender address is the null host (ie a DAD probe)
         * AND the message was broadcast
         * AND our address is either tentative or duplicated
         * If it was unicast then it's a valid Unicast Poll from RFC 1122.
         */
        if (do_dad &&
            (in_hosteq(isaddr, myaddr) ||
            (in_nullhost(isaddr) && in_hosteq(itaddr, myaddr) &&
             m->m_flags & M_BCAST &&
             ia->ia4_flags & (IN_IFF_TENTATIVE | IN_IFF_DUPLICATED))))
        {
                struct sockaddr_dl sdl, *sdlp;

                sdlp = sockaddr_dl_init(&sdl, sizeof(sdl),
                    ifp->if_index, ifp->if_type,
                    NULL, 0, ar_sha(ah), ah->ar_hln);
                arp_dad_duplicated((struct ifaddr *)ia, sdlp);
                goto out;
        }

        /*
         * If the target IP address is zero, ignore the packet.
         * This prevents the code below from trying to answer
         * when we are using IP address zero (booting).
         */
        if (in_nullhost(itaddr))
                goto out;

        if (in_nullhost(isaddr))
                goto reply;

        if (in_hosteq(itaddr, myaddr))
                la = arpcreate(ifp, &isaddr, NULL, 1);
        else
                la = arplookup(ifp, &isaddr, NULL, 1);
        if (la == NULL)
                goto reply;

        if ((la->la_flags & LLE_VALID) &&
            memcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen))
        {
                char llabuf[LLA_ADDRSTRLEN], *llastr;

                llastr = lla_snprintf(llabuf, sizeof(llabuf),
                    ar_sha(ah), ah->ar_hln);

                if (la->la_flags & LLE_STATIC) {
                        ARP_STATINC(ARP_STAT_RCVOVERPERM);
                        if (!log_permanent_modify)
                                goto out;
                        log(LOG_INFO,
                            "%s tried to overwrite permanent arp info"
                            " for %s\n", llastr, IN_PRINT(ipbuf, &isaddr));
                        goto out;
                } else if (la->lle_tbl->llt_ifp != ifp) {
                        /* XXX should not happen? */
                        ARP_STATINC(ARP_STAT_RCVOVERINT);
                        if (!log_wrong_iface)
                                goto out;
                        log(LOG_INFO,
                            "%s on %s tried to overwrite "
                            "arp info for %s on %s\n",
                            llastr,
                            ifp->if_xname, IN_PRINT(ipbuf, &isaddr),
                            la->lle_tbl->llt_ifp->if_xname);
                                goto out;
                } else {
                        ARP_STATINC(ARP_STAT_RCVOVER);
                        if (log_movements)
                                log(LOG_INFO, "arp info overwritten "
                                    "for %s by %s\n",
                                    IN_PRINT(ipbuf, &isaddr), llastr);
                }
                rt_cmd = RTM_CHANGE;
                new_state = ND_LLINFO_STALE;
        } else {
                if (op == ARPOP_REPLY && in_hosteq(itaddr, myaddr)) {
                        /* This was a solicited ARP reply. */
                        la->ln_byhint = 0;
                        new_state = ND_LLINFO_REACHABLE;
                }
                rt_cmd = la->la_flags & LLE_VALID ? 0 : RTM_ADD;
        }

        KASSERT(ifp->if_sadl->sdl_alen == ifp->if_addrlen);

        KASSERT(sizeof(la->ll_addr) >= ifp->if_addrlen);
        memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
        la->la_flags |= LLE_VALID;
        la->ln_asked = 0;
        if (new_state != 0) {
                la->ln_state = new_state;

                if (new_state != ND_LLINFO_REACHABLE ||
                    !(la->la_flags & LLE_STATIC))
                {
                        int timer = ND_TIMER_GC;

                        if (new_state == ND_LLINFO_REACHABLE)
                                timer = ND_TIMER_REACHABLE;
                        nd_set_timer(la, timer);
                }
        }

        if (rt_cmd != 0) {
                struct sockaddr_in sin;

                sockaddr_in_init(&sin, &la->r_l3addr.addr4, 0);
                rt_clonedmsg(rt_cmd, NULL, sintosa(&sin), ar_sha(ah), ifp);
        }

        if (la->la_hold != NULL) {
                int n = la->la_numheld;
                struct mbuf *m_hold, *m_hold_next;
                struct sockaddr_in sin;

                sockaddr_in_init(&sin, &la->r_l3addr.addr4, 0);

                m_hold = la->la_hold;
                la->la_hold = NULL;
                la->la_numheld = 0;
                /*
                 * We have to unlock here because if_output would call
                 * arpresolve
                 */
                LLE_WUNLOCK(la);
                ARP_STATADD(ARP_STAT_DFRSENT, n);
                ARP_STATADD(ARP_STAT_DFRTOTAL, n);
                for (; m_hold != NULL; m_hold = m_hold_next) {
                        m_hold_next = m_hold->m_nextpkt;
                        m_hold->m_nextpkt = NULL;
                        if_output_lock(ifp, ifp, m_hold, sintosa(&sin), NULL);
                }
        } else
                LLE_WUNLOCK(la);
        la = NULL;

reply:
        if (la != NULL) {
                LLE_WUNLOCK(la);
                la = NULL;
        }
        if (op != ARPOP_REQUEST) {
                if (op == ARPOP_REPLY)
                        ARP_STATINC(ARP_STAT_RCVREPLY);
                goto out;
        }
        ARP_STATINC(ARP_STAT_RCVREQUEST);
        if (in_hosteq(itaddr, myaddr)) {
                /* If our address is unusable, don't reply */
                if (ia->ia4_flags & (IN_IFF_NOTREADY | IN_IFF_DETACHED))
                        goto out;
                /* I am the target */
                tha = ar_tha(ah);
                if (tha)
                        memcpy(tha, ar_sha(ah), ah->ar_hln);
                memcpy(ar_sha(ah), CLLADDR(ifp->if_sadl), ah->ar_hln);
        } else {
                /* Proxy ARP */
                struct llentry *lle = NULL;
                struct sockaddr_in sin;

#if NCARP > 0
                if (ifp->if_type == IFT_CARP) {
                        struct ifnet *_rcvif = m_get_rcvif(m, &s);
                        int iftype = 0;
                        if (__predict_true(_rcvif != NULL))
                                iftype = _rcvif->if_type;
                        m_put_rcvif(_rcvif, &s);
                        if (iftype != IFT_CARP)
                                goto out;
                }
#endif

                tha = ar_tha(ah);

                sockaddr_in_init(&sin, &itaddr, 0);

                IF_AFDATA_RLOCK(ifp);
                lle = lla_lookup(LLTABLE(ifp), 0, (struct sockaddr *)&sin);
                IF_AFDATA_RUNLOCK(ifp);

                if ((lle != NULL) && (lle->la_flags & LLE_PUB)) {
                        if (tha)
                                memcpy(tha, ar_sha(ah), ah->ar_hln);
                        memcpy(ar_sha(ah), &lle->ll_addr, ah->ar_hln);
                        LLE_RUNLOCK(lle);
                } else {
                        if (lle != NULL)
                                LLE_RUNLOCK(lle);
                        goto out;
                }
        }
        ia4_release(ia, &psref_ia);

        /*
         * XXX XXX: Here we're recycling the mbuf. But the mbuf could have
         * other mbufs in its chain, and just overwriting m->m_pkthdr.len
         * would be wrong in this case (the length becomes smaller than the
         * real chain size).
         *
         * This can theoretically cause bugs in the lower layers (drivers,
         * and L2encap), in some corner cases.
         */
        memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
        memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
        ah->ar_op = htons(ARPOP_REPLY);
        ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */
        switch (ifp->if_type) {
        case IFT_IEEE1394:
                /* ieee1394 arp reply is broadcast */
                m->m_flags &= ~M_MCAST;
                m->m_flags |= M_BCAST;
                m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + ah->ar_hln;
                break;
        default:
                m->m_flags &= ~(M_BCAST|M_MCAST); /* never reply by broadcast */
                m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln);
                break;
        }
        m->m_pkthdr.len = m->m_len;
        sa.sa_family = AF_ARP;
        sa.sa_len = 2;
        arps = ARP_STAT_GETREF();
        arps[ARP_STAT_SNDTOTAL]++;
        arps[ARP_STAT_SNDREPLY]++;
        ARP_STAT_PUTREF();
        if_output_lock(ifp, ifp, m, &sa, NULL);
        if (rcvif != NULL)
                m_put_rcvif_psref(rcvif, &psref);
        return;

out:
        if (la != NULL)
                LLE_WUNLOCK(la);
        if (ia != NULL)
                ia4_release(ia, &psref_ia);
        if (rcvif != NULL)
                m_put_rcvif_psref(rcvif, &psref);
        m_freem(m);
}

/*
 * Lookup or a new address in arptab.
 */
struct llentry *
arplookup(struct ifnet *ifp, const struct in_addr *addr,
    const struct sockaddr *sa, int wlock)
{
        struct sockaddr_in sin;
        struct llentry *la;
        int flags = wlock ? LLE_EXCLUSIVE : 0;

        if (sa == NULL) {
                KASSERT(addr != NULL);
                sockaddr_in_init(&sin, addr, 0);
                sa = sintocsa(&sin);
        }

        IF_AFDATA_RLOCK(ifp);
        la = lla_lookup(LLTABLE(ifp), flags, sa);
        IF_AFDATA_RUNLOCK(ifp);

        return la;
}

static struct llentry *
arpcreate(struct ifnet *ifp, const struct in_addr *addr,
    const struct sockaddr *sa, int wlock)
{
        struct sockaddr_in sin;
        struct llentry *la;
        int flags = wlock ? LLE_EXCLUSIVE : 0;

        if (sa == NULL) {
                KASSERT(addr != NULL);
                sockaddr_in_init(&sin, addr, 0);
                sa = sintocsa(&sin);
        }

        la = arplookup(ifp, addr, sa, wlock);

        if (la == NULL) {
                struct rtentry *rt;

                rt = rtalloc1(sa, 0);
                IF_AFDATA_WLOCK(ifp);
                la = lla_create(LLTABLE(ifp), flags, sa, rt);
                IF_AFDATA_WUNLOCK(ifp);
                if (rt != NULL)
                        rt_unref(rt);

                if (la != NULL)
                        la->ln_state = ND_LLINFO_NOSTATE;
        }

        return la;
}

int
arpioctl(u_long cmd, void *data)
{

        return EOPNOTSUPP;
}

void
arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
{
        struct in_ifaddr *ia = (struct in_ifaddr *)ifa;

        ifa->ifa_rtrequest = arp_rtrequest;
        ifa->ifa_flags |= RTF_CONNECTED;

        /* ARP will handle DAD for this address. */
        if (in_nullhost(IA_SIN(ifa)->sin_addr)) {
                if (ia->ia_dad_stop != NULL)        /* safety */
                        ia->ia_dad_stop(ifa);
                ia->ia_dad_start = NULL;
                ia->ia_dad_stop = NULL;
                ia->ia4_flags &= ~IN_IFF_TENTATIVE;
        } else {
                ia->ia_dad_start = arp_dad_start;
                ia->ia_dad_stop = arp_dad_stop;
                if (ia->ia4_flags & IN_IFF_TRYTENTATIVE && ip_dad_enabled())
                        ia->ia4_flags |= IN_IFF_TENTATIVE;
                else
                        arpannounce1(ifa);
        }
}

static bool
arp_nud_enabled(__unused struct ifnet *ifp)
{

        return arp_perform_nud != 0;
}

static unsigned int
arp_llinfo_reachable(__unused struct ifnet *ifp)
{

        return arp_reachable;
}

static unsigned int
arp_llinfo_retrans(__unused struct ifnet *ifp)
{

        return arp_retrans;
}

/*
 * Gets source address of the first packet in hold queue
 * and stores it in @src.
 * Returns pointer to @src (if hold queue is not empty) or NULL.
 */
static union l3addr *
arp_llinfo_holdsrc(struct llentry *ln, union l3addr *src)
{
        struct ip *ip;

        if (ln == NULL || ln->ln_hold == NULL)
                return NULL;

        /*
         * assuming every packet in ln_hold has the same IP header
         */
        ip = mtod(ln->ln_hold, struct ip *);
        /* XXX pullup? */
        if (sizeof(*ip) < ln->ln_hold->m_len)
                src->addr4 = ip->ip_src;
        else
                src = NULL;

        return src;
}

static void
arp_llinfo_output(struct ifnet *ifp, __unused const union l3addr *daddr,
    const union l3addr *taddr, const uint8_t *tlladdr,
    const union l3addr *hsrc)
{
        struct in_addr tip = taddr->addr4, sip = zeroin_addr;
        const uint8_t *slladdr = CLLADDR(ifp->if_sadl);

        if (hsrc != NULL) {
                struct in_ifaddr *ia;
                struct psref psref;

                ia = in_get_ia_on_iface_psref(hsrc->addr4, ifp, &psref);
                if (ia != NULL) {
                        sip = hsrc->addr4;
                        ia4_release(ia, &psref);
                }
        }

        if (sip.s_addr == INADDR_ANY) {
                struct sockaddr_in dst;
                struct rtentry *rt;

                sockaddr_in_init(&dst, &tip, 0);
                rt = rtalloc1(sintosa(&dst), 0);
                if (rt != NULL) {
                        if (rt->rt_ifp == ifp &&
                            rt->rt_ifa != NULL &&
                            rt->rt_ifa->ifa_addr->sa_family == AF_INET)
                                sip = satosin(rt->rt_ifa->ifa_addr)->sin_addr;
                        rt_unref(rt);
                }
                if (sip.s_addr == INADDR_ANY) {
                        char ipbuf[INET_ADDRSTRLEN];

                        log(LOG_DEBUG, "source can't be "
                            "determined: dst=%s\n",
                            IN_PRINT(ipbuf, &tip));
                        return;
                }
        }

        arprequest(ifp, &sip, &tip, slladdr, tlladdr);
}


static void
arp_llinfo_missed(struct ifnet *ifp, const union l3addr *taddr,
    __unused int16_t type, struct mbuf *m)
{
        struct in_addr mdaddr = zeroin_addr;
        struct sockaddr_in dsin, tsin;
        struct sockaddr *sa;

        if (m != NULL) {
                struct ip *ip = mtod(m, struct ip *);

                if (sizeof(*ip) < m->m_len)
                        mdaddr = ip->ip_src;

                /* ip_input() will send ICMP_UNREACH_HOST, not us. */
                m_freem(m);
        }

        if (mdaddr.s_addr != INADDR_ANY) {
                sockaddr_in_init(&dsin, &mdaddr, 0);
                sa = sintosa(&dsin);
        } else
                sa = NULL;

        sockaddr_in_init(&tsin, &taddr->addr4, 0);
        rt_clonedmsg(RTM_MISS, sa, sintosa(&tsin), NULL, ifp);
}

static void
arp_free(struct llentry *ln, int gc)
{
        struct ifnet *ifp;

        KASSERT(ln != NULL);
        LLE_WLOCK_ASSERT(ln);

        ifp = ln->lle_tbl->llt_ifp;

        if (ln->la_flags & LLE_VALID || gc) {
                struct sockaddr_in sin;
                const char *lladdr;

                sockaddr_in_init(&sin, &ln->r_l3addr.addr4, 0);
                lladdr = ln->la_flags & LLE_VALID ?
                    (const char *)&ln->ll_addr : NULL;
                rt_clonedmsg(RTM_DELETE, NULL, sintosa(&sin), lladdr, ifp);
        }

        /*
         * Save to unlock. We still hold an extra reference and will not
         * free(9) in llentry_free() if someone else holds one as well.
         */
        LLE_WUNLOCK(ln);
        IF_AFDATA_LOCK(ifp);
        LLE_WLOCK(ln);

        lltable_free_entry(LLTABLE(ifp), ln);

        IF_AFDATA_UNLOCK(ifp);
}

/*
 * Upper-layer reachability hint for Neighbor Unreachability Detection.
 *
 * XXX cost-effective methods?
 */
void
arp_nud_hint(struct rtentry *rt)
{
        struct llentry *ln;
        struct ifnet *ifp;

        if (rt == NULL)
                return;

        ifp = rt->rt_ifp;
        ln = arplookup(ifp, NULL, rt_getkey(rt), 1);
        nd_nud_hint(ln);
}

TAILQ_HEAD(dadq_head, dadq);
struct dadq {
        TAILQ_ENTRY(dadq) dad_list;
        struct ifaddr *dad_ifa;
        int dad_count;                /* max ARP to send */
        int dad_arp_tcount;        /* # of trials to send ARP */
        int dad_arp_ocount;        /* ARP sent so far */
        int dad_arp_announce;        /* max ARP announcements */
        int dad_arp_acount;        /* # of announcements */
        struct callout dad_timer_ch;
};

static struct dadq_head dadq;
static int dad_maxtry = 15;     /* max # of *tries* to transmit DAD packet */
static kmutex_t arp_dad_lock;

static void
arp_dad_init(void)
{

        TAILQ_INIT(&dadq);
        mutex_init(&arp_dad_lock, MUTEX_DEFAULT, IPL_NONE);
}

static struct dadq *
arp_dad_find(struct ifaddr *ifa)
{
        struct dadq *dp;

        KASSERT(mutex_owned(&arp_dad_lock));

        TAILQ_FOREACH(dp, &dadq, dad_list) {
                if (dp->dad_ifa == ifa)
                        return dp;
        }
        return NULL;
}

static void
arp_dad_starttimer(struct dadq *dp, int ticks)
{

        callout_reset(&dp->dad_timer_ch, ticks,
            (void (*)(void *))arp_dad_timer, dp);
}

static void
arp_dad_stoptimer(struct dadq *dp)
{

        KASSERT(mutex_owned(&arp_dad_lock));

        TAILQ_REMOVE(&dadq, dp, dad_list);
        /* Tell the timer that dp is being destroyed. */
        dp->dad_ifa = NULL;
        callout_halt(&dp->dad_timer_ch, &arp_dad_lock);
}

static void
arp_dad_destroytimer(struct dadq *dp)
{

        callout_destroy(&dp->dad_timer_ch);
        KASSERT(dp->dad_ifa == NULL);
        kmem_intr_free(dp, sizeof(*dp));
}

static void
arp_dad_output(struct dadq *dp, struct ifaddr *ifa)
{
        struct in_ifaddr *ia = (struct in_ifaddr *)ifa;
        struct ifnet *ifp = ifa->ifa_ifp;
        struct in_addr sip;

        dp->dad_arp_tcount++;
        if ((ifp->if_flags & IFF_UP) == 0)
                return;
        if ((ifp->if_flags & IFF_RUNNING) == 0)
                return;

        dp->dad_arp_tcount = 0;
        dp->dad_arp_ocount++;

        memset(&sip, 0, sizeof(sip));
        arprequest(ifa->ifa_ifp, &sip, &ia->ia_addr.sin_addr,
            CLLADDR(ifa->ifa_ifp->if_sadl), NULL);
}

/*
 * Start Duplicate Address Detection (DAD) for specified interface address.
 */
static void
arp_dad_start(struct ifaddr *ifa)
{
        struct in_ifaddr *ia = (struct in_ifaddr *)ifa;
        struct dadq *dp;
        char ipbuf[INET_ADDRSTRLEN];

        /*
         * If we don't need DAD, don't do it.
         * - DAD is disabled
         */
        if (!(ia->ia4_flags & IN_IFF_TENTATIVE)) {
                log(LOG_DEBUG,
                    "%s: called with non-tentative address %s(%s)\n", __func__,
                    IN_PRINT(ipbuf, &ia->ia_addr.sin_addr),
                    ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
                return;
        }
        if (!ip_dad_enabled()) {
                ia->ia4_flags &= ~IN_IFF_TENTATIVE;
                rt_addrmsg(RTM_NEWADDR, ifa);
                arpannounce1(ifa);
                return;
        }
        KASSERT(ifa->ifa_ifp != NULL);
        if (!(ifa->ifa_ifp->if_flags & IFF_UP))
                return;

        dp = kmem_intr_alloc(sizeof(*dp), KM_NOSLEEP);

        mutex_enter(&arp_dad_lock);
        if (arp_dad_find(ifa) != NULL) {
                mutex_exit(&arp_dad_lock);
                /* DAD already in progress */
                if (dp != NULL)
                        kmem_intr_free(dp, sizeof(*dp));
                return;
        }

        if (dp == NULL) {
                mutex_exit(&arp_dad_lock);
                log(LOG_ERR, "%s: memory allocation failed for %s(%s)\n",
                    __func__, IN_PRINT(ipbuf, &ia->ia_addr.sin_addr),
                    ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
                return;
        }

        /*
         * Send ARP packet for DAD, ip_dad_count times.
         * Note that we must delay the first transmission.
         */
        callout_init(&dp->dad_timer_ch, CALLOUT_MPSAFE);
        dp->dad_ifa = ifa;
        ifaref(ifa);        /* just for safety */
        dp->dad_count = ip_dad_count;
        dp->dad_arp_announce = 0; /* Will be set when starting to announce */
        dp->dad_arp_acount = dp->dad_arp_ocount = dp->dad_arp_tcount = 0;
        TAILQ_INSERT_TAIL(&dadq, (struct dadq *)dp, dad_list);

        ARPLOG(LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
            ARPLOGADDR(&ia->ia_addr.sin_addr));

        arp_dad_starttimer(dp, cprng_fast32() % (PROBE_WAIT * hz));

        mutex_exit(&arp_dad_lock);
}

/*
 * terminate DAD unconditionally.  used for address removals.
 */
static void
arp_dad_stop(struct ifaddr *ifa)
{
        struct dadq *dp;

        mutex_enter(&arp_dad_lock);
        dp = arp_dad_find(ifa);
        if (dp == NULL) {
                mutex_exit(&arp_dad_lock);
                /* DAD wasn't started yet */
                return;
        }

        arp_dad_stoptimer(dp);

        mutex_exit(&arp_dad_lock);

        arp_dad_destroytimer(dp);
        ifafree(ifa);
}

static void
arp_dad_timer(struct dadq *dp)
{
        struct ifaddr *ifa;
        struct in_ifaddr *ia;
        char ipbuf[INET_ADDRSTRLEN];
        bool need_free = false;

        KERNEL_LOCK_UNLESS_NET_MPSAFE();
        mutex_enter(&arp_dad_lock);

        ifa = dp->dad_ifa;
        if (ifa == NULL) {
                /* dp is being destroyed by someone.  Do nothing. */
                goto done;
        }

        ia = (struct in_ifaddr *)ifa;
        if (ia->ia4_flags & IN_IFF_DUPLICATED) {
                log(LOG_ERR, "%s: called with duplicate address %s(%s)\n",
                    __func__, IN_PRINT(ipbuf, &ia->ia_addr.sin_addr),
                    ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
                goto done;
        }
        if ((ia->ia4_flags & IN_IFF_TENTATIVE) == 0 && dp->dad_arp_acount == 0)
        {
                log(LOG_ERR, "%s: called with non-tentative address %s(%s)\n",
                    __func__, IN_PRINT(ipbuf, &ia->ia_addr.sin_addr),
                    ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
                goto done;
        }

        /* timeouted with IFF_{RUNNING,UP} check */
        if (dp->dad_arp_tcount > dad_maxtry) {
                ARPLOG(LOG_INFO, "%s: could not run DAD, driver problem?\n",
                    if_name(ifa->ifa_ifp));

                arp_dad_stoptimer(dp);
                need_free = true;
                goto done;
        }

        /* Need more checks? */
        if (dp->dad_arp_ocount < dp->dad_count) {
                int adelay;

                /*
                 * We have more ARP to go.  Send ARP packet for DAD.
                 */
                arp_dad_output(dp, ifa);
                if (dp->dad_arp_ocount < dp->dad_count)
                        adelay = (PROBE_MIN * hz) +
                            (cprng_fast32() %
                            ((PROBE_MAX * hz) - (PROBE_MIN * hz)));
                else
                        adelay = ANNOUNCE_WAIT * hz;
                arp_dad_starttimer(dp, adelay);
                goto done;
        } else if (dp->dad_arp_acount == 0) {
                /*
                 * We are done with DAD.
                 * No duplicate address found.
                 */
                ia->ia4_flags &= ~IN_IFF_TENTATIVE;
                rt_addrmsg(RTM_NEWADDR, ifa);
                ARPLOG(LOG_DEBUG,
                    "%s: DAD complete for %s - no duplicates found\n",
                    if_name(ifa->ifa_ifp), ARPLOGADDR(&ia->ia_addr.sin_addr));
                dp->dad_arp_announce = ANNOUNCE_NUM;
                goto announce;
        } else if (dp->dad_arp_acount < dp->dad_arp_announce) {
announce:
                /*
                 * Announce the address.
                 */
                arpannounce1(ifa);
                dp->dad_arp_acount++;
                if (dp->dad_arp_acount < dp->dad_arp_announce) {
                        arp_dad_starttimer(dp, ANNOUNCE_INTERVAL * hz);
                        goto done;
                }
                ARPLOG(LOG_DEBUG,
                    "%s: ARP announcement complete for %s\n",
                    if_name(ifa->ifa_ifp), ARPLOGADDR(&ia->ia_addr.sin_addr));
        }

        arp_dad_stoptimer(dp);
        need_free = true;
done:
        mutex_exit(&arp_dad_lock);

        if (need_free) {
                arp_dad_destroytimer(dp);
                KASSERT(ifa != NULL);
                ifafree(ifa);
        }

        KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
}

static void
arp_dad_duplicated(struct ifaddr *ifa, const struct sockaddr_dl *from)
{
        struct in_ifaddr *ia = ifatoia(ifa);
        struct ifnet *ifp = ifa->ifa_ifp;
        char ipbuf[INET_ADDRSTRLEN], llabuf[LLA_ADDRSTRLEN];
        const char *iastr, *llastr;

        iastr = IN_PRINT(ipbuf, &ia->ia_addr.sin_addr);
        if (__predict_false(from == NULL))
                llastr = NULL;
        else
                llastr = lla_snprintf(llabuf, sizeof(llabuf),
                    CLLADDR(from), from->sdl_alen);

        if (ia->ia4_flags & (IN_IFF_TENTATIVE|IN_IFF_DUPLICATED)) {
                log(LOG_ERR,
                    "%s: DAD duplicate address %s from %s\n",
                    if_name(ifp), iastr, llastr);
        } else if (ia->ia_dad_defended == 0 ||
                   ia->ia_dad_defended < time_uptime - DEFEND_INTERVAL) {
                ia->ia_dad_defended = time_uptime;
                arpannounce1(ifa);
                log(LOG_ERR,
                    "%s: DAD defended address %s from %s\n",
                    if_name(ifp), iastr, llastr);
                return;
        } else {
                /* If DAD is disabled, just report the duplicate. */
                if (!ip_dad_enabled()) {
                        log(LOG_ERR,
                            "%s: DAD ignoring duplicate address %s from %s\n",
                            if_name(ifp), iastr, llastr);
                        return;
                }
                log(LOG_ERR,
                    "%s: DAD defence failed for %s from %s\n",
                    if_name(ifp), iastr, llastr);
        }

        arp_dad_stop(ifa);

        ia->ia4_flags &= ~IN_IFF_TENTATIVE;
        if ((ia->ia4_flags & IN_IFF_DUPLICATED) == 0) {
                ia->ia4_flags |= IN_IFF_DUPLICATED;
                /* Inform the routing socket of the duplicate address */
                rt_addrmsg_src(RTM_NEWADDR, ifa, (const struct sockaddr *)from);
        }
}

/*
 * Called from 10 Mb/s Ethernet interrupt handlers
 * when ether packet type ETHERTYPE_REVARP
 * is received.  Common length and type checks are done here,
 * then the protocol-specific routine is called.
 */
void
revarpinput(struct mbuf *m)
{
        struct arphdr *ar;
        int arplen;

        arplen = sizeof(struct arphdr);
        if (m->m_len < arplen && (m = m_pullup(m, arplen)) == NULL)
                return;
        ar = mtod(m, struct arphdr *);

        if (ntohs(ar->ar_hrd) == ARPHRD_IEEE1394) {
                goto out;
        }

        arplen = sizeof(struct arphdr) + 2 * (ar->ar_hln + ar->ar_pln);
        if (m->m_len < arplen && (m = m_pullup(m, arplen)) == NULL)
                return;
        ar = mtod(m, struct arphdr *);

        switch (ntohs(ar->ar_pro)) {
        case ETHERTYPE_IP:
        case ETHERTYPE_IPTRAILERS:
                in_revarpinput(m);
                return;

        default:
                break;
        }

out:
        m_freem(m);
}

/*
 * RARP for Internet protocols on 10 Mb/s Ethernet.
 * Algorithm is that given in RFC 903.
 * We are only using for bootstrap purposes to get an ip address for one of
 * our interfaces.  Thus we support no user-interface.
 *
 * Since the contents of the RARP reply are specific to the interface that
 * sent the request, this code must ensure that they are properly associated.
 *
 * Note: also supports ARP via RARP packets, per the RFC.
 */
void
in_revarpinput(struct mbuf *m)
{
        struct arphdr *ah;
        void *tha;
        int op;
        struct ifnet *rcvif;
        int s;

        ah = mtod(m, struct arphdr *);
        op = ntohs(ah->ar_op);

        rcvif = m_get_rcvif(m, &s);
        if (__predict_false(rcvif == NULL))
                goto out;
        if (rcvif->if_flags & IFF_NOARP)
                goto out;

        switch (rcvif->if_type) {
        case IFT_IEEE1394:
                /* ARP without target hardware address is not supported */
                goto out;
        default:
                break;
        }

        switch (op) {
        case ARPOP_REQUEST:
        case ARPOP_REPLY:        /* per RFC */
                m_put_rcvif(rcvif, &s);
                in_arpinput(m);
                return;
        case ARPOP_REVREPLY:
                break;
        case ARPOP_REVREQUEST:        /* handled by rarpd(8) */
        default:
                goto out;
        }
        if (!revarp_in_progress)
                goto out;
        if (rcvif != myip_ifp) /* !same interface */
                goto out;
        if (myip_initialized)
                goto wake;
        tha = ar_tha(ah);
        if (tha == NULL)
                goto out;
        if (ah->ar_pln != sizeof(struct in_addr))
                goto out;
        if (ah->ar_hln != rcvif->if_sadl->sdl_alen)
                goto out;
        if (memcmp(tha, CLLADDR(rcvif->if_sadl), rcvif->if_sadl->sdl_alen))
                goto out;
        memcpy(&srv_ip, ar_spa(ah), sizeof(srv_ip));
        memcpy(&myip, ar_tpa(ah), sizeof(myip));
        myip_initialized = 1;
wake:        /* Do wakeup every time in case it was missed. */
        wakeup((void *)&myip);

out:
        m_put_rcvif(rcvif, &s);
        m_freem(m);
}

/*
 * Send a RARP request for the ip address of the specified interface.
 * The request should be RFC 903-compliant.
 */
static void
revarprequest(struct ifnet *ifp)
{
        struct sockaddr sa;
        struct mbuf *m;
        struct arphdr *ah;
        void *tha;

        if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
                return;
        MCLAIM(m, &arpdomain.dom_mowner);
        m->m_len = sizeof(*ah) + 2*sizeof(struct in_addr) +
            2*ifp->if_addrlen;
        m->m_pkthdr.len = m->m_len;
        m_align(m, m->m_len);
        ah = mtod(m, struct arphdr *);
        memset(ah, 0, m->m_len);
        ah->ar_pro = htons(ETHERTYPE_IP);
        ah->ar_hln = ifp->if_addrlen;                /* hardware address length */
        ah->ar_pln = sizeof(struct in_addr);        /* protocol address length */
        ah->ar_op = htons(ARPOP_REVREQUEST);

        memcpy(ar_sha(ah), CLLADDR(ifp->if_sadl), ah->ar_hln);
        tha = ar_tha(ah);
        if (tha == NULL) {
                m_free(m);
                return;
        }
        memcpy(tha, CLLADDR(ifp->if_sadl), ah->ar_hln);

        sa.sa_family = AF_ARP;
        sa.sa_len = 2;
        m->m_flags |= M_BCAST;

        if_output_lock(ifp, ifp, m, &sa, NULL);
}

/*
 * RARP for the ip address of the specified interface, but also
 * save the ip address of the server that sent the answer.
 * Timeout if no response is received.
 */
int
revarpwhoarewe(struct ifnet *ifp, struct in_addr *serv_in,
    struct in_addr *clnt_in)
{
        int result, count = 20;

        myip_initialized = 0;
        myip_ifp = ifp;

        revarp_in_progress = 1;
        while (count--) {
                revarprequest(ifp);
                result = tsleep((void *)&myip, PSOCK, "revarp", hz/2);
                if (result != EWOULDBLOCK)
                        break;
        }
        revarp_in_progress = 0;

        if (!myip_initialized)
                return ENETUNREACH;

        memcpy(serv_in, &srv_ip, sizeof(*serv_in));
        memcpy(clnt_in, &myip, sizeof(*clnt_in));
        return 0;
}

void
arp_stat_add(int type, uint64_t count)
{
        ARP_STATADD(type, count);
}

static int
sysctl_net_inet_arp_stats(SYSCTLFN_ARGS)
{

        return NETSTAT_SYSCTL(arpstat_percpu, ARP_NSTATS);
}

static void
sysctl_net_inet_arp_setup(struct sysctllog **clog)
{
        const struct sysctlnode *node;

        sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT,
                        CTLTYPE_NODE, "inet", NULL,
                        NULL, 0, NULL, 0,
                        CTL_NET, PF_INET, CTL_EOL);
        sysctl_createv(clog, 0, NULL, &node,
                        CTLFLAG_PERMANENT,
                        CTLTYPE_NODE, "arp",
                        SYSCTL_DESCR("Address Resolution Protocol"),
                        NULL, 0, NULL, 0,
                        CTL_NET, PF_INET, CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nd_delay",
                       SYSCTL_DESCR("First probe delay time"),
                       NULL, 0, &arp_nd_domain.nd_delay, 0,
                       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nd_bmaxtries",
                       SYSCTL_DESCR("Number of broadcast discovery attempts"),
                       NULL, 0, &arp_nd_domain.nd_mmaxtries, 0,
                       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nd_umaxtries",
                       SYSCTL_DESCR("Number of unicast discovery attempts"),
                       NULL, 0, &arp_nd_domain.nd_umaxtries, 0,
                       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nd_reachable",
                       SYSCTL_DESCR("Reachable time"),
                       NULL, 0, &arp_reachable, 0,
                       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nd_retrans",
                       SYSCTL_DESCR("Retransmission time"),
                       NULL, 0, &arp_retrans, 0,
                       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nd_nud",
                       SYSCTL_DESCR("Perform neighbour unreachability detection"),
                       NULL, 0, &arp_perform_nud, 0,
                       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "nd_maxnudhint",
                       SYSCTL_DESCR("Maximum neighbor unreachable hint count"),
                       NULL, 0, &arp_nd_domain.nd_maxnudhint, 0,
                       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "maxqueuelen",
                       SYSCTL_DESCR("max packet queue len for a unresolved ARP"),
                       NULL, 1, &arp_nd_domain.nd_maxqueuelen, 0,
                       CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT,
                        CTLTYPE_STRUCT, "stats",
                        SYSCTL_DESCR("ARP statistics"),
                        sysctl_net_inet_arp_stats, 0, NULL, 0,
                        CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                        CTLTYPE_INT, "log_movements",
                        SYSCTL_DESCR("log ARP replies from MACs different than"
                            " the one in the cache"),
                        NULL, 0, &log_movements, 0,
                        CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                        CTLTYPE_INT, "log_permanent_modify",
                        SYSCTL_DESCR("log ARP replies from MACs different than"
                            " the one in the permanent arp entry"),
                        NULL, 0, &log_permanent_modify, 0,
                        CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                        CTLTYPE_INT, "log_wrong_iface",
                        SYSCTL_DESCR("log ARP packets arriving on the wrong"
                            " interface"),
                        NULL, 0, &log_wrong_iface, 0,
                        CTL_NET,PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                        CTLTYPE_INT, "debug",
                        SYSCTL_DESCR("Enable ARP DAD debug output"),
                        NULL, 0, &arp_debug, 0,
                        CTL_NET, PF_INET, node->sysctl_num, CTL_CREATE, CTL_EOL);
}

#endif /* INET */




























































































































































































































































































































































































































































































































































































    9 
    7 
    9 





    9 
    6 
    9 











   18 











































   31 






























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
/*        $NetBSD: in6.h,v 1.101 2021/07/31 10:12:04 andvar Exp $        */
/*        $KAME: in6.h,v 1.83 2001/03/29 02:55:07 jinmei Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)in.h        8.3 (Berkeley) 1/3/94
 */

#ifndef _NETINET6_IN6_H_
#define _NETINET6_IN6_H_

#include <sys/featuretest.h>

#ifndef __KAME_NETINET_IN_H_INCLUDED_
#error "do not include netinet6/in6.h directly, include netinet/in.h.  see RFC2553"
#endif

#include <sys/socket.h>
#include <sys/endian.h>                /* ntohl */

/*
 * Identification of the network protocol stack
 * for *BSD-current/release: http://www.kame.net/dev/cvsweb.cgi/kame/COVERAGE
 * has the table of implementation/integration differences.
 */
#define __KAME__
#define __KAME_VERSION                "NetBSD-current"

/*
 * Local port number conventions:
 *
 * Ports < IPPORT_RESERVED are reserved for privileged processes (e.g. root),
 * unless a kernel is compiled with IPNOPRIVPORTS defined.
 *
 * When a user does a bind(2) or connect(2) with a port number of zero,
 * a non-conflicting local port address is chosen.
 *
 * The default range is IPPORT_ANONMIN to IPPORT_ANONMAX, although
 * that is settable by sysctl(3); net.inet.ip.anonportmin and
 * net.inet.ip.anonportmax respectively.
 *
 * A user may set the IPPROTO_IP option IP_PORTRANGE to change this
 * default assignment range.
 *
 * The value IP_PORTRANGE_DEFAULT causes the default behavior.
 *
 * The value IP_PORTRANGE_HIGH is the same as IP_PORTRANGE_DEFAULT,
 * and exists only for FreeBSD compatibility purposes.
 *
 * The value IP_PORTRANGE_LOW changes the range to the "low" are
 * that is (by convention) restricted to privileged processes.
 * This convention is based on "vouchsafe" principles only.
 * It is only secure if you trust the remote host to restrict these ports.
 * The range is IPPORT_RESERVEDMIN to IPPORT_RESERVEDMAX.
 */

#if defined(_NETBSD_SOURCE)
#define        IPV6PORT_RESERVED        1024
#define        IPV6PORT_ANONMIN        49152
#define        IPV6PORT_ANONMAX        65535
#define        IPV6PORT_RESERVEDMIN        600
#define        IPV6PORT_RESERVEDMAX        (IPV6PORT_RESERVED-1)
#endif

/*
 * IPv6 address
 */
struct in6_addr {
        union {
                __uint8_t   __u6_addr8[16];
                __uint16_t  __u6_addr16[8];
                uint32_t  __u6_addr32[4];
        } __u6_addr;                        /* 128-bit IP6 address */
};

#define s6_addr   __u6_addr.__u6_addr8
#ifdef _KERNEL        /* XXX nonstandard */
#define s6_addr8  __u6_addr.__u6_addr8
#define s6_addr16 __u6_addr.__u6_addr16
#define s6_addr32 __u6_addr.__u6_addr32
#endif

#define INET6_ADDRSTRLEN        46

/*
 * Socket address for IPv6
 */
#if defined(_NETBSD_SOURCE)
#define SIN6_LEN
#endif
struct sockaddr_in6 {
        uint8_t                sin6_len;        /* length of this struct(socklen_t)*/
        sa_family_t        sin6_family;        /* AF_INET6 (sa_family_t) */
        in_port_t        sin6_port;        /* Transport layer port */
        uint32_t        sin6_flowinfo;        /* IP6 flow information */
        struct in6_addr        sin6_addr;        /* IP6 address */
        uint32_t        sin6_scope_id;        /* scope zone index */
};

/*
 * Local definition for masks
 */
#ifdef _KERNEL        /* XXX nonstandard */
#define IN6MASK0        {{{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}}
#define IN6MASK32        {{{ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, \
                            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}}
#define IN6MASK64        {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \
                            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}}
#define IN6MASK96        {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \
                            0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }}}
#define IN6MASK128        {{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \
                            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }}}
#endif

#ifdef _KERNEL
extern const struct sockaddr_in6 sa6_any;

extern const struct in6_addr in6mask0;
extern const struct in6_addr in6mask32;
extern const struct in6_addr in6mask64;
extern const struct in6_addr in6mask96;
extern const struct in6_addr in6mask128;
#endif /* _KERNEL */

/*
 * Macros started with IPV6_ADDR is KAME local
 */
#ifdef _KERNEL        /* XXX nonstandard */
#if BYTE_ORDER == BIG_ENDIAN
#define IPV6_ADDR_INT32_ONE        1
#define IPV6_ADDR_INT32_TWO        2
#define IPV6_ADDR_INT32_MNL        0xff010000
#define IPV6_ADDR_INT32_MLL        0xff020000
#define IPV6_ADDR_INT32_SMP        0x0000ffff
#define IPV6_ADDR_INT16_ULL        0xfe80
#define IPV6_ADDR_INT16_USL        0xfec0
#define IPV6_ADDR_INT16_MLL        0xff02
#elif BYTE_ORDER == LITTLE_ENDIAN
#define IPV6_ADDR_INT32_ONE        0x01000000
#define IPV6_ADDR_INT32_TWO        0x02000000
#define IPV6_ADDR_INT32_MNL        0x000001ff
#define IPV6_ADDR_INT32_MLL        0x000002ff
#define IPV6_ADDR_INT32_SMP        0xffff0000
#define IPV6_ADDR_INT16_ULL        0x80fe
#define IPV6_ADDR_INT16_USL        0xc0fe
#define IPV6_ADDR_INT16_MLL        0x02ff
#endif
#endif

/*
 * Definition of some useful macros to handle IP6 addresses
 */
#define IN6ADDR_ANY_INIT \
        {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}}
#define IN6ADDR_LOOPBACK_INIT \
        {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}}
#define IN6ADDR_NODELOCAL_ALLNODES_INIT \
        {{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}}
#define IN6ADDR_LINKLOCAL_ALLNODES_INIT \
        {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}}
#define IN6ADDR_LINKLOCAL_ALLROUTERS_INIT \
        {{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }}}

extern const struct in6_addr in6addr_any;
extern const struct in6_addr in6addr_loopback;
extern const struct in6_addr in6addr_nodelocal_allnodes;
extern const struct in6_addr in6addr_linklocal_allnodes;
extern const struct in6_addr in6addr_linklocal_allrouters;

#define IN6_ARE_ADDR_EQUAL(a, b)                        \
    (memcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0)

/*
 * Unspecified
 */
#define IN6_IS_ADDR_UNSPECIFIED(a)        \
        ((a)->__u6_addr.__u6_addr32[0] == 0 &&        \
         (a)->__u6_addr.__u6_addr32[1] == 0 &&        \
         (a)->__u6_addr.__u6_addr32[2] == 0 &&        \
         (a)->__u6_addr.__u6_addr32[3] == 0)

/*
 * Loopback
 */
#define IN6_IS_ADDR_LOOPBACK(a)                \
        ((a)->__u6_addr.__u6_addr32[0] == 0 &&        \
         (a)->__u6_addr.__u6_addr32[1] == 0 &&        \
         (a)->__u6_addr.__u6_addr32[2] == 0 &&        \
         (a)->__u6_addr.__u6_addr32[3] == ntohl(1))

/*
 * IPv4 compatible
 */
#define IN6_IS_ADDR_V4COMPAT(a)                \
        ((a)->__u6_addr.__u6_addr32[0] == 0 &&        \
         (a)->__u6_addr.__u6_addr32[1] == 0 &&        \
         (a)->__u6_addr.__u6_addr32[2] == 0 &&        \
         (a)->__u6_addr.__u6_addr32[3] != 0 &&        \
         (a)->__u6_addr.__u6_addr32[3] != ntohl(1))

/*
 * Mapped
 */
#define IN6_IS_ADDR_V4MAPPED(a)                      \
        ((a)->__u6_addr.__u6_addr32[0] == 0 &&        \
         (a)->__u6_addr.__u6_addr32[1] == 0 &&        \
         (a)->__u6_addr.__u6_addr32[2] == ntohl(0x0000ffff))

/*
 * KAME Scope Values
 */

#ifdef _KERNEL        /* XXX nonstandard */
#define IPV6_ADDR_SCOPE_NODELOCAL        0x01
#define IPV6_ADDR_SCOPE_INTFACELOCAL        0x01
#define IPV6_ADDR_SCOPE_LINKLOCAL        0x02
#define IPV6_ADDR_SCOPE_SITELOCAL        0x05
#define IPV6_ADDR_SCOPE_ORGLOCAL        0x08        /* just used in this file */
#define IPV6_ADDR_SCOPE_GLOBAL                0x0e
#else
#define __IPV6_ADDR_SCOPE_NODELOCAL        0x01
#define __IPV6_ADDR_SCOPE_LINKLOCAL        0x02
#define __IPV6_ADDR_SCOPE_SITELOCAL        0x05
#define __IPV6_ADDR_SCOPE_ORGLOCAL        0x08        /* just used in this file */
#define __IPV6_ADDR_SCOPE_GLOBAL        0x0e
#endif

/*
 * Unicast Scope
 * Note that we must check topmost 10 bits only, not 16 bits (see RFC2373).
 */
#define IN6_IS_ADDR_LINKLOCAL(a)        \
        (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0x80))
#define IN6_IS_ADDR_SITELOCAL(a)        \
        (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0xc0))

/*
 * Multicast
 */
#define IN6_IS_ADDR_MULTICAST(a)        ((a)->s6_addr[0] == 0xff)

#ifdef _KERNEL        /* XXX nonstandard */
#define IPV6_ADDR_MC_SCOPE(a)                ((a)->s6_addr[1] & 0x0f)
#else
#define __IPV6_ADDR_MC_SCOPE(a)                ((a)->s6_addr[1] & 0x0f)
#endif

/*
 * Multicast Scope
 */
#ifdef _KERNEL        /* refers nonstandard items */
#define IN6_IS_ADDR_MC_NODELOCAL(a)        \
        (IN6_IS_ADDR_MULTICAST(a) &&        \
         (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_NODELOCAL))
#define IN6_IS_ADDR_MC_INTFACELOCAL(a)        \
        (IN6_IS_ADDR_MULTICAST(a) &&        \
         (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_INTFACELOCAL))
#define IN6_IS_ADDR_MC_LINKLOCAL(a)        \
        (IN6_IS_ADDR_MULTICAST(a) &&        \
         (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_LINKLOCAL))
#define IN6_IS_ADDR_MC_SITELOCAL(a)        \
        (IN6_IS_ADDR_MULTICAST(a) &&         \
         (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_SITELOCAL))
#define IN6_IS_ADDR_MC_ORGLOCAL(a)        \
        (IN6_IS_ADDR_MULTICAST(a) &&        \
         (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_ORGLOCAL))
#define IN6_IS_ADDR_MC_GLOBAL(a)        \
        (IN6_IS_ADDR_MULTICAST(a) &&        \
         (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_GLOBAL))
#else
#define IN6_IS_ADDR_MC_NODELOCAL(a)        \
        (IN6_IS_ADDR_MULTICAST(a) &&        \
         (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_NODELOCAL))
#define IN6_IS_ADDR_MC_LINKLOCAL(a)        \
        (IN6_IS_ADDR_MULTICAST(a) &&        \
         (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_LINKLOCAL))
#define IN6_IS_ADDR_MC_SITELOCAL(a)        \
        (IN6_IS_ADDR_MULTICAST(a) &&         \
         (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_SITELOCAL))
#define IN6_IS_ADDR_MC_ORGLOCAL(a)        \
        (IN6_IS_ADDR_MULTICAST(a) &&        \
         (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_ORGLOCAL))
#define IN6_IS_ADDR_MC_GLOBAL(a)        \
        (IN6_IS_ADDR_MULTICAST(a) &&        \
         (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_GLOBAL))
#endif

#ifdef _KERNEL        /* nonstandard */
/*
 * KAME Scope
 */
#define IN6_IS_SCOPE_LINKLOCAL(a)        \
        ((IN6_IS_ADDR_LINKLOCAL(a)) ||        \
         (IN6_IS_ADDR_MC_LINKLOCAL(a)))

#define        IN6_IS_SCOPE_EMBEDDABLE(__a)        \
    (IN6_IS_SCOPE_LINKLOCAL(__a) || IN6_IS_ADDR_MC_INTFACELOCAL(__a))

#define IFA6_IS_DEPRECATED(a) \
        ((a)->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME && \
         (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \
         (a)->ia6_lifetime.ia6t_pltime)
#define IFA6_IS_INVALID(a) \
        ((a)->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME && \
         (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \
         (a)->ia6_lifetime.ia6t_vltime)
#endif

/*
 * Options for use with [gs]etsockopt at the IPV6 level.
 * First word of comment is data type; bool is stored in int.
 */
/* no hdrincl */
#if 0
/* These are deprecated non-standard options which are no longer supported. */
#define IPV6_OPTIONS                1  /* buf/ip6_opts; set/get IP6 options */
#define IPV6_RECVOPTS                5  /* bool; receive all IP6 opts w/dgram */
#define IPV6_RECVRETOPTS        6  /* bool; receive IP6 opts for response */
#define IPV6_RECVDSTADDR        7  /* bool; receive IP6 dst addr w/dgram */
#define IPV6_RETOPTS                8  /* ip6_opts; set/get IP6 options */
#endif
#define IPV6_SOCKOPT_RESERVED1        3  /* reserved for future use */
#define IPV6_UNICAST_HOPS        4  /* int; IP6 hops */
#define IPV6_MULTICAST_IF        9  /* u_int; set/get IP6 multicast i/f  */
#define IPV6_MULTICAST_HOPS        10 /* int; set/get IP6 multicast hops */
#define IPV6_MULTICAST_LOOP        11 /* u_int; set/get IP6 multicast loopback */
/* The join and leave membership option numbers need to match with the v4 ones */
#define IPV6_JOIN_GROUP                12 /* ip6_mreq; join a group membership */
#define IPV6_LEAVE_GROUP        13 /* ip6_mreq; leave a group membership */
#define IPV6_PORTRANGE                14 /* int; range to choose for unspec port */
#if defined(_NETBSD_SOURCE)
#define IPV6_PORTALGO                17 /* int; port selection algo (rfc6056) */
#define ICMP6_FILTER                18 /* icmp6_filter; icmp6 filter */
#endif
/* RFC2292 options */
#ifdef _KERNEL
#define IPV6_2292PKTINFO        19 /* bool; send/recv if, src/dst addr */
#define IPV6_2292HOPLIMIT        20 /* bool; hop limit */
#define IPV6_2292NEXTHOP        21 /* bool; next hop addr */
#define IPV6_2292HOPOPTS        22 /* bool; hop-by-hop option */
#define IPV6_2292DSTOPTS        23 /* bool; destination option */
#define IPV6_2292RTHDR                24 /* bool; routing header */
#define IPV6_2292PKTOPTIONS        25 /* buf/cmsghdr; set/get IPv6 options */
#endif
#define IPV6_CHECKSUM                26 /* int; checksum offset for raw socket */
#define IPV6_V6ONLY                27 /* bool; make AF_INET6 sockets v6 only */

#define IPV6_IPSEC_POLICY        28 /* struct; get/set security policy */
#define IPV6_FAITH                29 /* bool; accept FAITH'ed connections */

/* new socket options introduced in RFC3542 */
#define IPV6_RTHDRDSTOPTS       35 /* ip6_dest; send dst option before rthdr */

#define IPV6_RECVPKTINFO        36 /* bool; recv if, dst addr */
#define IPV6_RECVHOPLIMIT       37 /* bool; recv hop limit */
#define IPV6_RECVRTHDR          38 /* bool; recv routing header */
#define IPV6_RECVHOPOPTS        39 /* bool; recv hop-by-hop option */
#define IPV6_RECVDSTOPTS        40 /* bool; recv dst option after rthdr */
#ifdef _KERNEL
#define IPV6_RECVRTHDRDSTOPTS   41 /* bool; recv dst option before rthdr */
#endif
#define IPV6_USE_MIN_MTU        42 /* bool; send packets at the minimum MTU */
#define IPV6_RECVPATHMTU        43 /* bool; notify an according MTU */
#define IPV6_PATHMTU                44 /* mtuinfo; get the current path MTU (sopt),
                                      4 bytes int; MTU notification (cmsg) */

/* more new socket options introduced in RFC3542 */
#define IPV6_PKTINFO                46 /* in6_pktinfo; send if, src addr */
#define IPV6_HOPLIMIT                47 /* int; send hop limit */
#define IPV6_NEXTHOP                48 /* sockaddr; next hop addr */
#define IPV6_HOPOPTS                49 /* ip6_hbh; send hop-by-hop option */
#define IPV6_DSTOPTS                50 /* ip6_dest; send dst option before rthdr */
#define IPV6_RTHDR                51 /* ip6_rthdr; send routing header */

#define IPV6_RECVTCLASS                57 /* bool; recv traffic class values */
#ifdef _KERNEL
#define IPV6_OTCLASS                58 /* u_int8_t; send traffic class value */
#endif

#define IPV6_TCLASS                61 /* int; send traffic class value */
#define IPV6_DONTFRAG                62 /* bool; disable IPv6 fragmentation */
#define IPV6_PREFER_TEMPADDR        63 /* int; prefer temporary address as
                                    * the source address */
#define IPV6_BINDANY                64 /* bool: allow bind to any address */
/* to define items, should talk with KAME guys first, for *BSD compatibility */

#define IPV6_RTHDR_LOOSE     0 /* this hop need not be a neighbor. XXX old spec */
#define IPV6_RTHDR_STRICT    1 /* this hop must be a neighbor. XXX old spec */
#define IPV6_RTHDR_TYPE_0    0 /* IPv6 routing header type 0 */

/*
 * Defaults and limits for options
 */
#define IPV6_DEFAULT_MULTICAST_HOPS 1        /* normally limit m'casts to 1 hop  */
#define IPV6_DEFAULT_MULTICAST_LOOP 1        /* normally hear sends if a member  */

/*
 * Argument structure for IPV6_JOIN_GROUP and IPV6_LEAVE_GROUP.
 */
struct ipv6_mreq {
        struct in6_addr        ipv6mr_multiaddr;
        unsigned int        ipv6mr_interface;
};

/*
 * IPV6_PKTINFO: Packet information(RFC2292 sec 5)
 */
struct in6_pktinfo {
        struct in6_addr        ipi6_addr;        /* src/dst IPv6 address */
        unsigned int        ipi6_ifindex;        /* send/recv interface index */
};

/*
 * Control structure for IPV6_RECVPATHMTU socket option.
 */
struct ip6_mtuinfo {
        struct sockaddr_in6 ip6m_addr;        /* or sockaddr_storage? */
        uint32_t ip6m_mtu;
};

/*
 * Argument for IPV6_PORTRANGE:
 * - which range to search when port is unspecified at bind() or connect()
 */
#define        IPV6_PORTRANGE_DEFAULT        0        /* default range */
#define        IPV6_PORTRANGE_HIGH        1        /* "high" - request firewall bypass */
#define        IPV6_PORTRANGE_LOW        2        /* "low" - vouchsafe security */

#if defined(_NETBSD_SOURCE)
/*
 * Definitions for inet6 sysctl operations.
 *
 * Third level is protocol number.
 * Fourth level is desired variable within that protocol.
 */
/*
 * Names for IP sysctl objects
 */
#define IPV6CTL_FORWARDING        1        /* act as router */
#define IPV6CTL_SENDREDIRECTS        2        /* may send redirects when forwarding*/
#define IPV6CTL_DEFHLIM                3        /* default Hop-Limit */
/* IPV6CTL_DEFMTU=4, never implemented */
#define IPV6CTL_FORWSRCRT        5        /* forward source-routed dgrams */
#define IPV6CTL_STATS                6        /* stats */
#define IPV6CTL_MRTSTATS        7        /* multicast forwarding stats */
#define IPV6CTL_MRTPROTO        8        /* multicast routing protocol */
#define IPV6CTL_MAXFRAGPACKETS        9        /* max packets reassembly queue */
#define IPV6CTL_SOURCECHECK        10        /* verify source route and intf */
#define IPV6CTL_SOURCECHECK_LOGINT 11        /* minimum logging interval */
/* 12 was IPV6CTL_ACCEPT_RTADV */
#define IPV6CTL_KEEPFAITH        13
#define IPV6CTL_LOG_INTERVAL        14
#define IPV6CTL_HDRNESTLIMIT        15
#define IPV6CTL_DAD_COUNT        16
#define IPV6CTL_AUTO_FLOWLABEL        17
#define IPV6CTL_DEFMCASTHLIM        18
#define IPV6CTL_GIF_HLIM        19        /* default HLIM for gif encap packet */
#define IPV6CTL_KAME_VERSION        20
#define IPV6CTL_USE_DEPRECATED        21        /* use deprecated addr (RFC2462 5.5.4) */
/* 22 was IPV6CTL_RR_PRUNE */
/* 23: reserved */
#define IPV6CTL_V6ONLY                24
/* 25 to 27: reserved */
#define IPV6CTL_ANONPORTMIN        28        /* minimum ephemeral port */
#define IPV6CTL_ANONPORTMAX        29        /* maximum ephemeral port */
#define IPV6CTL_LOWPORTMIN        30        /* minimum reserved port */
#define IPV6CTL_LOWPORTMAX        31        /* maximum reserved port */
/* 32 to 34: reserved */
#define IPV6CTL_AUTO_LINKLOCAL        35        /* automatic link-local addr assign */
/* 36 to 37: reserved */
#define IPV6CTL_ADDRCTLPOLICY        38        /* get/set address selection policy */
#define IPV6CTL_USE_DEFAULTZONE        39        /* use default scope zone */
/* 40: reserved */
#define IPV6CTL_MAXFRAGS        41        /* max fragments */
#define IPV6CTL_IFQ                42        /* IPv6 packet input queue */
/* 43 was IPV6CTL_RTADV_MAXROUTES */
/* 44 was IPV6CTL_RTADV_NUMROUTES */
#define IPV6CTL_GIF_PMTU        45        /* gif(4) Path MTU setting */
#define IPV6CTL_IPSEC_HLIM        46        /* default HLIM for ipsecif encap packet */
#define IPV6CTL_IPSEC_PMTU        47        /* ipsecif(4) Path MTU setting */
#endif /* _NETBSD_SOURCE */

#ifdef _KERNEL
struct cmsghdr;

/*
 * in6_cksum_phdr:
 *
 *        Compute significant parts of the IPv6 checksum pseudo-header
 *        for use in a delayed TCP/UDP checksum calculation.
 *
 *        Args:
 *
 *                src                Source IPv6 address
 *                dst                Destination IPv6 address
 *                len                htonl(proto-hdr-len)
 *                nxt                htonl(next-proto-number)
 *
 *        NOTE: We expect the src and dst addresses to be 16-bit
 *        aligned!
 */
static __inline u_int16_t __unused
in6_cksum_phdr(const struct in6_addr *src, const struct in6_addr *dst,
    u_int32_t len, u_int32_t nxt)
{
        u_int32_t sum = 0;
        const u_int16_t *w;

        /*LINTED*/
        w = (const u_int16_t *) src;
        sum += w[0];
        if (!IN6_IS_SCOPE_LINKLOCAL(src))
                sum += w[1];
        sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5];
        sum += w[6]; sum += w[7];

        /*LINTED*/
        w = (const u_int16_t *) dst;
        sum += w[0];
        if (!IN6_IS_SCOPE_LINKLOCAL(dst))
                sum += w[1];
        sum += w[2]; sum += w[3]; sum += w[4]; sum += w[5];
        sum += w[6]; sum += w[7];

        sum += (u_int16_t)(len >> 16) + (u_int16_t)(len /*& 0xffff*/);

        sum += (u_int16_t)(nxt >> 16) + (u_int16_t)(nxt /*& 0xffff*/);

        sum = (u_int16_t)(sum >> 16) + (u_int16_t)(sum /*& 0xffff*/);

        if (sum > 0xffff)
                sum -= 0xffff;

        return (sum);
}

struct mbuf;
struct ifnet;
int sockaddr_in6_cmp(const struct sockaddr *, const struct sockaddr *);
struct sockaddr *sockaddr_in6_externalize(struct sockaddr *, socklen_t,
    const struct sockaddr *);
int        in6_cksum(struct mbuf *, u_int8_t, u_int32_t, u_int32_t);
int        in6_localaddr(const struct in6_addr *);
int        in6_addrscope(const struct in6_addr *);
struct        in6_ifaddr *in6_ifawithifp(struct ifnet *, struct in6_addr *);
extern void in6_if_link_up(struct ifnet *);
extern void in6_if_link_down(struct ifnet *);
extern void in6_if_link_state_change(struct ifnet *, int);
extern void in6_if_up(struct ifnet *);
extern void in6_if_down(struct ifnet *);
extern void addrsel_policy_init(void);
extern        u_char        ip6_protox[];

struct ip6_hdr;
int in6_tunnel_validate(const struct ip6_hdr *, const struct in6_addr *,
        const struct in6_addr *);

#define        satosin6(sa)        ((struct sockaddr_in6 *)(sa))
#define        satocsin6(sa)        ((const struct sockaddr_in6 *)(sa))
#define        sin6tosa(sin6)        ((struct sockaddr *)(sin6))
#define        sin6tocsa(sin6)        ((const struct sockaddr *)(sin6))
#define        ifatoia6(ifa)        ((struct in6_ifaddr *)(ifa))

static __inline void
sockaddr_in6_init1(struct sockaddr_in6 *sin6, const struct in6_addr *addr,
    in_port_t port, uint32_t flowinfo, uint32_t scope_id)
{
        sin6->sin6_port = port;
        sin6->sin6_flowinfo = flowinfo;
        sin6->sin6_addr = *addr;
        sin6->sin6_scope_id = scope_id;
}

static __inline void
sockaddr_in6_init(struct sockaddr_in6 *sin6, const struct in6_addr *addr,
    in_port_t port, uint32_t flowinfo, uint32_t scope_id)
{
        sin6->sin6_family = AF_INET6;
        sin6->sin6_len = sizeof(*sin6);
        sockaddr_in6_init1(sin6, addr, port, flowinfo, scope_id);
}

static __inline struct sockaddr *
sockaddr_in6_alloc(const struct in6_addr *addr, in_port_t port,
    uint32_t flowinfo, uint32_t scope_id, int flags)
{
        struct sockaddr *sa;

        if ((sa = sockaddr_alloc(AF_INET6, sizeof(struct sockaddr_in6),
            flags)) == NULL)
                return NULL;

        sockaddr_in6_init1(satosin6(sa), addr, port, flowinfo, scope_id);

        return sa;
}
#endif /* _KERNEL */

#if defined(_NETBSD_SOURCE)

#include <machine/ansi.h>

#ifdef        _BSD_SIZE_T_
typedef        _BSD_SIZE_T_                size_t;
#define        _SIZE_T
#undef        _BSD_SIZE_T_
#endif

#include <sys/cdefs.h>

__BEGIN_DECLS
struct cmsghdr;

void        in6_in_2_v4mapin6(const struct in_addr *, struct in6_addr *);
void        in6_sin6_2_sin(struct sockaddr_in *, struct sockaddr_in6 *);
void        in6_sin_2_v4mapsin6(const struct sockaddr_in *, struct sockaddr_in6 *);
void        in6_sin6_2_sin_in_sock(struct sockaddr *);
void        in6_sin_2_v4mapsin6_in_sock(struct sockaddr **);

#define INET6_IS_ADDR_LINKLOCAL                1
#define INET6_IS_ADDR_MC_LINKLOCAL        2
#define INET6_IS_ADDR_SITELOCAL                4
void        inet6_getscopeid(struct sockaddr_in6 *, int);
void        inet6_putscopeid(struct sockaddr_in6 *, int);

extern int inet6_option_space(int);
extern int inet6_option_init(void *, struct cmsghdr **, int);
extern int inet6_option_append(struct cmsghdr *, const uint8_t *,
        int, int);
extern uint8_t *inet6_option_alloc(struct cmsghdr *, int, int, int);
extern int inet6_option_next(const struct cmsghdr *, uint8_t **);
extern int inet6_option_find(const struct cmsghdr *, uint8_t **, int);

extern size_t inet6_rthdr_space(int, int);
extern struct cmsghdr *inet6_rthdr_init(void *, int);
extern int inet6_rthdr_add(struct cmsghdr *, const struct in6_addr *,
                unsigned int);
extern int inet6_rthdr_lasthop(struct cmsghdr *, unsigned int);
#if 0 /* not implemented yet */
extern int inet6_rthdr_reverse(const struct cmsghdr *, struct cmsghdr *);
#endif
extern int inet6_rthdr_segments(const struct cmsghdr *);
extern struct in6_addr *inet6_rthdr_getaddr(struct cmsghdr *, int);
extern int inet6_rthdr_getflags(const struct cmsghdr *, int);

extern int inet6_opt_init(void *, socklen_t);
extern int inet6_opt_append(void *, socklen_t, int, uint8_t,
                socklen_t, uint8_t, void **);
extern int inet6_opt_finish(void *, socklen_t, int);
extern int inet6_opt_set_val(void *, int, void *, socklen_t);

extern int inet6_opt_next(void *, socklen_t, int, uint8_t *,
                socklen_t *, void **);
extern int inet6_opt_find(void *, socklen_t, int, uint8_t,
                socklen_t *, void **);
extern int inet6_opt_get_val(void *, int, void *, socklen_t);
extern socklen_t inet6_rth_space(int, int);
extern void *inet6_rth_init(void *, socklen_t, int, int);
extern int inet6_rth_add(void *, const struct in6_addr *);
extern int inet6_rth_reverse(const void *, void *);
extern int inet6_rth_segments(const void *);
extern struct in6_addr *inet6_rth_getaddr(const void *, int);
__END_DECLS
#endif /* _NETBSD_SOURCE */

#if defined(_KERNEL) || defined(_TEST)
int        in6_print(char *, size_t, const struct in6_addr *);
#define IN6_PRINT(b, a) (in6_print((b), sizeof(b), (a)), (b))
int        sin6_print(char *, size_t, const void *);
#endif

#endif /* !_NETINET6_IN6_H_ */
































































































    5 



    3 
    5 






    3 














    3 






















































    1 










    3 













   14 















   14 















    6 




















































































  142 









  142 
  141 
  137 


  142 













































  329 
























   26 






   26 


   27 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
/*        $NetBSD: cons.c,v 1.79 2022/08/22 00:20:56 riastradh Exp $        */

/*
 * Copyright (c) 1988 University of Utah.
 * Copyright (c) 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the Systems Programming Group of the University of Utah Computer
 * Science Department.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from: Utah $Hdr: cons.c 1.7 92/01/21$
 *
 *        @(#)cons.c        8.2 (Berkeley) 1/12/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: cons.c,v 1.79 2022/08/22 00:20:56 riastradh Exp $");

#include <sys/param.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/tty.h>
#include <sys/file.h>
#include <sys/conf.h>
#include <sys/vnode.h>
#include <sys/kauth.h>
#include <sys/mutex.h>

#include <dev/cons.h>

#include "nullcons.h"

dev_type_open(cnopen);
dev_type_close(cnclose);
dev_type_read(cnread);
dev_type_write(cnwrite);
dev_type_ioctl(cnioctl);
dev_type_poll(cnpoll);
dev_type_kqfilter(cnkqfilter);

static bool cn_redirect(dev_t *, int, int *);

const struct cdevsw cons_cdevsw = {
        .d_open = cnopen,
        .d_close = cnclose,
        .d_read = cnread,
        .d_write = cnwrite,
        .d_ioctl = cnioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = cnpoll,
        .d_mmap = nommap,
        .d_kqfilter = cnkqfilter,
        .d_discard = nodiscard,
        .d_flag = D_TTY
};

struct        tty *constty = NULL;        /* virtual console output device */
struct        consdev *cn_tab;        /* physical console device info */
struct        vnode *cn_devvp[2];        /* vnode for underlying device. */

int
cnopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        dev_t cndev;
        int unit, error;

        unit = minor(dev);
        if (unit > 1)
                return ENODEV;

        if (cn_tab == NULL)
                return (0);

        /*
         * always open the 'real' console device, so we don't get nailed
         * later.  This follows normal device semantics; they always get
         * open() calls.
         */
        cndev = cn_tab->cn_dev;
#if NNULLCONS > 0
        if (cndev == NODEV) {
                nullconsattach(0);
        }
#else /* NNULLCONS > 0 */
        if (cndev == NODEV) {
                /*
                 * This is most likely an error in the console attach
                 * code. Panicking looks better than jumping into nowhere
                 * through cdevsw below....
                 */
                panic("cnopen: no console device");
        }
#endif /* NNULLCONS > 0 */
        if (dev == cndev) {
                /*
                 * This causes cnopen() to be called recursively, which
                 * is generally a bad thing.  It is often caused when
                 * dev == 0 and cn_dev has not been set, but was probably
                 * initialised to 0.
                 */
                panic("cnopen: cn_tab->cn_dev == dev");
        }
        if (cn_devvp[unit] != NULLVP)
                return 0;
        if ((error = cdevvp(cndev, &cn_devvp[unit])) != 0) {
                printf("cnopen: unable to get vnode reference\n");
                return error;
        }
        vn_lock(cn_devvp[unit], LK_EXCLUSIVE | LK_RETRY);
        error = VOP_OPEN(cn_devvp[unit], flag, kauth_cred_get());
        VOP_UNLOCK(cn_devvp[unit]);
        return error;
}

int
cnclose(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct vnode *vp;
        int unit, error;

        unit = minor(dev);

        if (cn_tab == NULL)
                return (0);

        vp = cn_devvp[unit];
        cn_devvp[unit] = NULL;
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_CLOSE(vp, flag, kauth_cred_get());
        VOP_UNLOCK(vp);
        vrele(vp);
        return error;
}

int
cnread(dev_t dev, struct uio *uio, int flag)
{
        int error;

        /*
         * If we would redirect input, punt.  This will keep strange
         * things from happening to people who are using the real
         * console.  Nothing should be using /dev/console for
         * input (except a shell in single-user mode, but then,
         * one wouldn't TIOCCONS then).
         */
        if (!cn_redirect(&dev, 1, &error))
                return error;
        return cdev_read(dev, uio, flag);
}

int
cnwrite(dev_t dev, struct uio *uio, int flag)
{
        int error;

        /* Redirect output, if that's appropriate. */
        if (!cn_redirect(&dev, 0, &error))
                return error;
        return cdev_write(dev, uio, flag);
}

int
cnioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        int error;

        error = 0;

        /*
         * Superuser can always use this to wrest control of console
         * output from the "virtual" console.
         */
        if (cmd == TIOCCONS && constty != NULL) {
                error = kauth_authorize_device_tty(l->l_cred,
                    KAUTH_DEVICE_TTY_VIRTUAL, constty);
                if (!error)
                        constty = NULL;
                return (error);
        }

        /*
         * Redirect the ioctl, if that's appropriate.
         * Note that strange things can happen, if a program does
         * ioctls on /dev/console, then the console is redirected
         * out from under it.
         */
        if (!cn_redirect(&dev, 0, &error))
                return error;
        return cdev_ioctl(dev, cmd, data, flag, l);
}

/*ARGSUSED*/
int
cnpoll(dev_t dev, int events, struct lwp *l)
{
        int error;

        /*
         * Redirect the poll, if that's appropriate.
         * I don't want to think of the possible side effects
         * of console redirection here.
         */
        if (!cn_redirect(&dev, 0, &error))
                return POLLHUP;
        return cdev_poll(dev, events, l);
}

/*ARGSUSED*/
int
cnkqfilter(dev_t dev, struct knote *kn)
{
        int error;

        /*
         * Redirect the kqfilter, if that's appropriate.
         * I don't want to think of the possible side effects
         * of console redirection here.
         */
        if (!cn_redirect(&dev, 0, &error))
                return error;
        return cdev_kqfilter(dev, kn);
}

int
cngetc(void)
{
        if (cn_tab == NULL)
                return (0);
        int s = splhigh();
        for (;;) {
                const int rv = (*cn_tab->cn_getc)(cn_tab->cn_dev);
                if (rv >= 0) {
                        splx(s);
                        return rv;
                }
                docritpollhooks();
        }
}

int
cngetsn(char *cp, int size)
{
        char *lp;
        int c, len;

        cnpollc(1);

        lp = cp;
        len = 0;
        for (;;) {
                c = cngetc();
                switch (c) {
                case '\n':
                case '\r':
                        printf("\n");
                        *lp++ = '\0';
                        cnpollc(0);
                        return (len);
                case '\b':
                case '\177':
                case '#':
                        if (len) {
                                --len;
                                --lp;
                                printf("\b \b");
                        }
                        continue;
                case '@':
                case 'u'&037:        /* CTRL-u */
                        len = 0;
                        lp = cp;
                        printf("\n");
                        continue;
                default:
                        if (len + 1 >= size || c < ' ') {
                                printf("\007");
                                continue;
                        }
                        printf("%c", c);
                        ++len;
                        *lp++ = c;
                }
        }
}

void
cnputc(int c)
{

        if (cn_tab == NULL)
                return;

/*
 * XXX
 * for some reason this causes ARCS firmware to output an endless stream of
 * whitespaces with n32 kernels, so use the pre-1.74 code for now until I can
 * figure out why this happens
 */
#ifndef sgimips
        if (c) {
                if (c == '\n') {
                        (*cn_tab->cn_putc)(cn_tab->cn_dev, '\r');
                        docritpollhooks();
                }
                (*cn_tab->cn_putc)(cn_tab->cn_dev, c);
        }
#else
        if (c) {
                (*cn_tab->cn_putc)(cn_tab->cn_dev, c);
                if (c == '\n') {
                        docritpollhooks();
                        (*cn_tab->cn_putc)(cn_tab->cn_dev, '\r');
                }
        }
#endif
}

void
cnpollc(int on)
{
        static int refcount = 0;

        if (cn_tab == NULL)
                return;
        if (!on)
                --refcount;
        if (refcount == 0)
                (*cn_tab->cn_pollc)(cn_tab->cn_dev, on);
        if (on)
                ++refcount;
}

void
nullcnpollc(dev_t dev, int on)
{

}

void
cnbell(u_int pitch, u_int period, u_int volume)
{

        if (cn_tab == NULL || cn_tab->cn_bell == NULL)
                return;
        (*cn_tab->cn_bell)(cn_tab->cn_dev, pitch, period, volume);
}

void
cnflush(void)
{
        if (cn_tab == NULL || cn_tab->cn_flush == NULL)
                return;
        (*cn_tab->cn_flush)(cn_tab->cn_dev);
}

void
cnhalt(void)
{
        if (cn_tab == NULL || cn_tab->cn_halt == NULL)
                return;
        (*cn_tab->cn_halt)(cn_tab->cn_dev);
}

/*
 * Redirect output, if that's appropriate.  If there's no real console,
 * return ENXIO.
 *
 * Call with tty_mutex held.
 */
static bool
cn_redirect(dev_t *devp, int is_read, int *error)
{
        dev_t dev = *devp;

        *error = ENXIO;
        if (constty != NULL && minor(dev) == 0 &&
            (cn_tab == NULL || (cn_tab->cn_pri != CN_REMOTE))) {
                if (is_read) {
                        *error = 0;
                        return false;
                }
                dev = constty->t_dev;
        } else if (cn_tab == NULL)
                return false;
        else
                dev = cn_tab->cn_dev;
        *devp = dev;
        return true;
}





















































































































































































































  401 





















































































  401 





























































    4 
    4 



    4 

    4 




    3 











    3 










    5 


    5 


    5 
    5 

    5 
    5 






































































































    1 























































    2 
    2 

    2 


































    2 
    2 

    2 
    1 

    2 
    2 






































































    2 



    2 









    2 


















    8 







    8 


    7 

    3 







    2 






    1 










    4 

    1 


    8 


    1 





    1 
















































































































































   24 










































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
/*        $NetBSD: hci_socket.c,v 1.47 2019/09/28 07:10:55 plunky Exp $        */

/*-
 * Copyright (c) 2005 Iain Hibbert.
 * Copyright (c) 2006 Itronix Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of Itronix Inc. may not be used to endorse
 *    or promote products derived from this software without specific
 *    prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: hci_socket.c,v 1.47 2019/09/28 07:10:55 plunky Exp $");

/* load symbolic names */
#ifdef BLUETOOTH_DEBUG
#define PRUREQUESTS
#define PRCOREQUESTS
#endif

#include <sys/param.h>
#include <sys/domain.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/systm.h>

#include <netbt/bluetooth.h>
#include <netbt/hci.h>

/*******************************************************************************
 *
 * HCI SOCK_RAW Sockets - for control of Bluetooth Devices
 *
 */

/*
 * the raw HCI protocol control block
 */
struct hci_pcb {
        struct socket                *hp_socket;        /* socket */
        kauth_cred_t                hp_cred;        /* owner credential */
        unsigned int                hp_flags;        /* flags */
        bdaddr_t                hp_laddr;        /* local address */
        bdaddr_t                hp_raddr;        /* remote address */
        struct hci_filter        hp_efilter;        /* user event filter */
        struct hci_filter        hp_pfilter;        /* user packet filter */
        LIST_ENTRY(hci_pcb)        hp_next;        /* next HCI pcb */
};

/* hp_flags */
#define HCI_DIRECTION                (1<<1)        /* direction control messages */
#define HCI_PROMISCUOUS                (1<<2)        /* listen to all units */

LIST_HEAD(hci_pcb_list, hci_pcb) hci_pcb = LIST_HEAD_INITIALIZER(hci_pcb);

/* sysctl defaults */
int hci_sendspace = HCI_CMD_PKT_SIZE;
int hci_recvspace = 4096;

/* unprivileged commands opcode table */
static const struct {
        uint16_t        opcode;
        uint8_t                offs;        /* 0 - 63 */
        uint8_t                mask;        /* bit 0 - 7 */
        uint8_t                length;        /* approved length */
} hci_cmds[] = {
        { HCI_CMD_INQUIRY,
          0,  0x01, sizeof(hci_inquiry_cp) },
        { HCI_CMD_REMOTE_NAME_REQ,
          2,  0x08, sizeof(hci_remote_name_req_cp) },
        { HCI_CMD_READ_REMOTE_FEATURES,
          2,  0x20, sizeof(hci_read_remote_features_cp) },
        { HCI_CMD_READ_REMOTE_EXTENDED_FEATURES,
          2,  0x40, sizeof(hci_read_remote_extended_features_cp) },
        { HCI_CMD_READ_REMOTE_VER_INFO,
          2,  0x80, sizeof(hci_read_remote_ver_info_cp) },
        { HCI_CMD_READ_CLOCK_OFFSET,
          3,  0x01, sizeof(hci_read_clock_offset_cp) },
        { HCI_CMD_READ_LMP_HANDLE,
          3,  0x02, sizeof(hci_read_lmp_handle_cp) },
        { HCI_CMD_ROLE_DISCOVERY,
          4,  0x80, sizeof(hci_role_discovery_cp) },
        { HCI_CMD_READ_LINK_POLICY_SETTINGS,
          5,  0x02, sizeof(hci_read_link_policy_settings_cp) },
        { HCI_CMD_READ_DEFAULT_LINK_POLICY_SETTINGS,
          5,  0x08, 0 },
        { HCI_CMD_READ_PIN_TYPE,
          6,  0x04, 0 },
        { HCI_CMD_READ_LOCAL_NAME,
          7,  0x02, 0 },
        { HCI_CMD_READ_CON_ACCEPT_TIMEOUT,
          7,  0x04, 0 },
        { HCI_CMD_READ_PAGE_TIMEOUT,
          7,  0x10, 0 },
        { HCI_CMD_READ_SCAN_ENABLE,
          7,  0x40, 0 },
        { HCI_CMD_READ_PAGE_SCAN_ACTIVITY,
          8,  0x01, 0 },
        { HCI_CMD_READ_INQUIRY_SCAN_ACTIVITY,
          8,  0x04, 0 },
        { HCI_CMD_READ_AUTH_ENABLE,
          8,  0x10, 0 },
        { HCI_CMD_READ_ENCRYPTION_MODE,
          8,  0x40, 0 },
        { HCI_CMD_READ_UNIT_CLASS,
          9,  0x01, 0 },
        { HCI_CMD_READ_VOICE_SETTING,
          9,  0x04, 0 },
        { HCI_CMD_READ_AUTO_FLUSH_TIMEOUT,
          9,  0x10, sizeof(hci_read_auto_flush_timeout_cp) },
        { HCI_CMD_READ_NUM_BROADCAST_RETRANS,
          9,  0x40, 0 },
        { HCI_CMD_READ_HOLD_MODE_ACTIVITY,
          10, 0x01, 0 },
        { HCI_CMD_READ_XMIT_LEVEL,
          10, 0x04, sizeof(hci_read_xmit_level_cp) },
        { HCI_CMD_READ_SCO_FLOW_CONTROL,
          10, 0x08, 0 },
        { HCI_CMD_READ_LINK_SUPERVISION_TIMEOUT,
          11, 0x01, sizeof(hci_read_link_supervision_timeout_cp) },
        { HCI_CMD_READ_NUM_SUPPORTED_IAC,
          11, 0x04, 0 },
        { HCI_CMD_READ_IAC_LAP,
          11, 0x08, 0 },
        { HCI_CMD_READ_PAGE_SCAN_PERIOD,
          11, 0x20, 0 },
        { HCI_CMD_READ_PAGE_SCAN,
          11, 0x80, 0 },
        { HCI_CMD_READ_INQUIRY_SCAN_TYPE,
          12, 0x10, 0 },
        { HCI_CMD_READ_INQUIRY_MODE,
          12, 0x40, 0 },
        { HCI_CMD_READ_PAGE_SCAN_TYPE,
          13, 0x01, 0 },
        { HCI_CMD_READ_AFH_ASSESSMENT,
          13, 0x04, 0 },
        { HCI_CMD_READ_LOCAL_VER,
          14, 0x08, 0 },
        { HCI_CMD_READ_LOCAL_COMMANDS,
          14, 0x10, 0 },
        { HCI_CMD_READ_LOCAL_FEATURES,
          14, 0x20, 0 },
        { HCI_CMD_READ_LOCAL_EXTENDED_FEATURES,
          14, 0x40, sizeof(hci_read_local_extended_features_cp) },
        { HCI_CMD_READ_BUFFER_SIZE,
          14, 0x80, 0 },
        { HCI_CMD_READ_COUNTRY_CODE,
          15, 0x01, 0 },
        { HCI_CMD_READ_BDADDR,
          15, 0x02, 0 },
        { HCI_CMD_READ_FAILED_CONTACT_CNTR,
          15, 0x04, sizeof(hci_read_failed_contact_cntr_cp) },
        { HCI_CMD_READ_LINK_QUALITY,
          15, 0x10, sizeof(hci_read_link_quality_cp) },
        { HCI_CMD_READ_RSSI,
          15, 0x20, sizeof(hci_read_rssi_cp) },
        { HCI_CMD_READ_AFH_CHANNEL_MAP,
          15, 0x40, sizeof(hci_read_afh_channel_map_cp) },
        { HCI_CMD_READ_CLOCK,
          15, 0x80, sizeof(hci_read_clock_cp) },
        { HCI_CMD_READ_LOOPBACK_MODE,
          16, 0x01, 0 },
        { HCI_CMD_READ_EXTENDED_INQUIRY_RSP,
          17, 0x01, 0 },
        { HCI_CMD_READ_SIMPLE_PAIRING_MODE,
          17, 0x20, 0 },
        { HCI_CMD_READ_INQUIRY_RSP_XMIT_POWER,
          18, 0x01, 0 },
        { HCI_CMD_READ_DEFAULT_ERRDATA_REPORTING,
          18, 0x04, 0 },
        { HCI_CMD_READ_ENCRYPTION_KEY_SIZE,
          20, 0x10, sizeof(hci_read_encryption_key_size_cp) },
};

/*
 * supply a basic device send/recv policy
 */
static int
hci_device_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        int i, result;

        result = KAUTH_RESULT_DEFER;

        switch (action) {
        case KAUTH_DEVICE_BLUETOOTH_SEND: {
                struct hci_unit *unit = (struct hci_unit *)arg0;
                hci_cmd_hdr_t *hdr = (hci_cmd_hdr_t *)arg1;

                /*
                 * Allow sending unprivileged commands if the packet size
                 * is correct and the unit claims to support it
                 */

                if (hdr->type != HCI_CMD_PKT)
                        break;

                for (i = 0; i < __arraycount(hci_cmds); i++) {
                        if (hdr->opcode == hci_cmds[i].opcode
                            && hdr->length == hci_cmds[i].length
                            && (unit->hci_cmds[hci_cmds[i].offs] & hci_cmds[i].mask)) {
                                result = KAUTH_RESULT_ALLOW;
                                break;
                        }
                }

                break;
                }

        case KAUTH_DEVICE_BLUETOOTH_RECV:
                switch((uint8_t)(uintptr_t)arg0) {
                case HCI_CMD_PKT: {
                        uint16_t opcode = (uint16_t)(uintptr_t)arg1;

                        /*
                         * Allow to see any unprivileged command packet
                         */

                        for (i = 0; i < __arraycount(hci_cmds); i++) {
                                if (opcode == hci_cmds[i].opcode) {
                                        result = KAUTH_RESULT_ALLOW;
                                        break;
                                }
                        }

                        break;
                        }

                case HCI_EVENT_PKT: {
                        uint8_t event = (uint8_t)(uintptr_t)arg1;

                        /*
                         * Allow to receive most events
                         */

                        switch (event) {
                        case HCI_EVENT_RETURN_LINK_KEYS:
                        case HCI_EVENT_LINK_KEY_NOTIFICATION:
                        case HCI_EVENT_USER_CONFIRM_REQ:
                        case HCI_EVENT_USER_PASSKEY_NOTIFICATION:
                        case HCI_EVENT_VENDOR:
                                break;

                        default:
                                result = KAUTH_RESULT_ALLOW;
                                break;
                        }

                            break;
                        }

                case HCI_ACL_DATA_PKT:
                case HCI_SCO_DATA_PKT: {
                        /* uint16_t handle = (uint16_t)(uintptr_t)arg1; */
                        /*
                         * don't normally allow receiving data packets
                         */
                        break;
                        }

                default:
                        break;
                }

                break;

        default:
                break;
        }

        return result;
}

/*
 * HCI protocol init routine,
 * - set up a kauth listener to provide basic packet access policy
 */
void
hci_init(void)
{

        if (kauth_listen_scope(KAUTH_SCOPE_DEVICE, hci_device_cb, NULL) == NULL)
                panic("Bluetooth HCI: cannot listen on device scope");
}

/*
 * When command packet reaches the device, we can drop
 * it from the socket buffer (called from hci_output_acl)
 */
void
hci_drop(void *arg)
{
        struct socket *so = arg;

        sbdroprecord(&so->so_snd);
        sowwakeup(so);
}

/*
 * HCI socket is going away and has some pending packets. We let them
 * go by design, but remove the context pointer as it will be invalid
 * and we no longer need to be notified.
 */
static void
hci_cmdwait_flush(struct socket *so)
{
        struct hci_unit *unit;
        struct socket *ctx;
        struct mbuf *m;

        DPRINTF("flushing %p\n", so);

        SIMPLEQ_FOREACH(unit, &hci_unit_list, hci_next) {
                m = MBUFQ_FIRST(&unit->hci_cmdwait);
                while (m != NULL) {
                        ctx = M_GETCTX(m, struct socket *);
                        if (ctx == so)
                                M_SETCTX(m, NULL);

                        m = MBUFQ_NEXT(m);
                }
        }
}

static int
hci_attach(struct socket *so, int proto)
{
        struct hci_pcb *pcb;
        int error;

        KASSERT(so->so_pcb == NULL);

        if (so->so_lock == NULL) {
                mutex_obj_hold(bt_lock);
                so->so_lock = bt_lock;
                solock(so);
        }
        KASSERT(solocked(so));

        error = soreserve(so, hci_sendspace, hci_recvspace);
        if (error) {
                return error;
        }

        pcb = kmem_zalloc(sizeof(struct hci_pcb), KM_SLEEP);
        pcb->hp_cred = kauth_cred_dup(curlwp->l_cred);
        pcb->hp_socket = so;

        /*
         * Set default user filter. By default, socket only passes
         * Command_Complete and Command_Status Events.
         */
        hci_filter_set(HCI_EVENT_COMMAND_COMPL, &pcb->hp_efilter);
        hci_filter_set(HCI_EVENT_COMMAND_STATUS, &pcb->hp_efilter);
        hci_filter_set(HCI_EVENT_PKT, &pcb->hp_pfilter);

        LIST_INSERT_HEAD(&hci_pcb, pcb, hp_next);
        so->so_pcb = pcb;

        return 0;
}

static void
hci_detach(struct socket *so)
{
        struct hci_pcb *pcb;

        pcb = (struct hci_pcb *)so->so_pcb;
        KASSERT(pcb != NULL);

        if (so->so_snd.sb_mb != NULL)
                hci_cmdwait_flush(so);

        if (pcb->hp_cred != NULL)
                kauth_cred_free(pcb->hp_cred);

        so->so_pcb = NULL;
        LIST_REMOVE(pcb, hp_next);
        kmem_free(pcb, sizeof(*pcb));
}

static int
hci_accept(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
hci_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct hci_pcb *pcb = so->so_pcb;
        struct sockaddr_bt *sa = (struct sockaddr_bt *)nam;

        KASSERT(solocked(so));
        KASSERT(pcb != NULL);
        KASSERT(nam != NULL);

        if (sa->bt_len != sizeof(struct sockaddr_bt))
                return EINVAL;

        if (sa->bt_family != AF_BLUETOOTH)
                return EAFNOSUPPORT;

        bdaddr_copy(&pcb->hp_laddr, &sa->bt_bdaddr);

        if (bdaddr_any(&sa->bt_bdaddr))
                pcb->hp_flags |= HCI_PROMISCUOUS;
        else
                pcb->hp_flags &= ~HCI_PROMISCUOUS;

        return 0;
}

static int
hci_listen(struct socket *so, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
hci_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct hci_pcb *pcb = so->so_pcb;
        struct sockaddr_bt *sa = (struct sockaddr_bt *)nam;

        KASSERT(solocked(so));
        KASSERT(pcb != NULL);
        KASSERT(nam != NULL);

        if (sa->bt_len != sizeof(struct sockaddr_bt))
                return EINVAL;

        if (sa->bt_family != AF_BLUETOOTH)
                return EAFNOSUPPORT;

        if (hci_unit_lookup(&sa->bt_bdaddr) == NULL)
                return EADDRNOTAVAIL;

        bdaddr_copy(&pcb->hp_raddr, &sa->bt_bdaddr);
        soisconnected(so);
        return 0;
}

static int
hci_connect2(struct socket *so, struct socket *so2)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
hci_disconnect(struct socket *so)
{
        struct hci_pcb *pcb = so->so_pcb;

        KASSERT(solocked(so));
        KASSERT(pcb != NULL);

        bdaddr_copy(&pcb->hp_raddr, BDADDR_ANY);

        /* XXX we cannot call soisdisconnected() here, as it sets
         * SS_CANTRCVMORE and SS_CANTSENDMORE. The problem being,
         * that soisconnected() does not clear these and if you
         * try to reconnect this socket (which is permitted) you
         * get a broken pipe when you try to write any data.
         */
        so->so_state &= ~SS_ISCONNECTED;
        return 0;
}

static int
hci_shutdown(struct socket *so)
{
        KASSERT(solocked(so));

        socantsendmore(so);
        return 0;
}

static int
hci_abort(struct socket *so)
{
        KASSERT(solocked(so));

        soisdisconnected(so);
        hci_detach(so);
        return 0;
}

static int
hci_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
{
        int err;
        mutex_enter(bt_lock);
        err = hci_ioctl_pcb(cmd, nam);
        mutex_exit(bt_lock);
        return err;
}

static int
hci_stat(struct socket *so, struct stat *ub)
{
        KASSERT(solocked(so));

        return 0;
}

static int
hci_peeraddr(struct socket *so, struct sockaddr *nam)
{
        struct hci_pcb *pcb = (struct hci_pcb *)so->so_pcb;
        struct sockaddr_bt *sa = (struct sockaddr_bt *)nam;

        KASSERT(solocked(so));
        KASSERT(pcb != NULL);
        KASSERT(nam != NULL);

        memset(sa, 0, sizeof(struct sockaddr_bt));
        sa->bt_len = sizeof(struct sockaddr_bt);
        sa->bt_family = AF_BLUETOOTH;
        bdaddr_copy(&sa->bt_bdaddr, &pcb->hp_raddr);
        return 0;
}

static int
hci_sockaddr(struct socket *so, struct sockaddr *nam)
{
        struct hci_pcb *pcb = (struct hci_pcb *)so->so_pcb;
        struct sockaddr_bt *sa = (struct sockaddr_bt *)nam;

        KASSERT(solocked(so));
        KASSERT(pcb != NULL);
        KASSERT(nam != NULL);

        memset(sa, 0, sizeof(struct sockaddr_bt));
        sa->bt_len = sizeof(struct sockaddr_bt);
        sa->bt_family = AF_BLUETOOTH;
        bdaddr_copy(&sa->bt_bdaddr, &pcb->hp_laddr);
        return 0;
}

static int
hci_rcvd(struct socket *so, int flags, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
hci_recvoob(struct socket *so, struct mbuf *m, int flags)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
hci_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
    struct mbuf *control, struct lwp *l)
{
        struct hci_pcb *pcb = so->so_pcb;
        struct sockaddr_bt *sa = (struct sockaddr_bt *)nam;
        struct hci_unit *unit;
        struct mbuf *m0;
        hci_cmd_hdr_t hdr;
        int err = 0;

        KASSERT(solocked(so));
        KASSERT(pcb != NULL);
        KASSERT(m != NULL);

        if (control) /* have no use for this */
                m_freem(control);

        if (sa) {
                if (sa->bt_len != sizeof(struct sockaddr_bt)) {
                        err = EINVAL;
                        goto bad;
                }

                if (sa->bt_family != AF_BLUETOOTH) {
                        err = EAFNOSUPPORT;
                        goto bad;
                }
        }

         /*
         * this came from userland, so we check it out first
         */

        /* wants at least a header to start with */
        if (m->m_pkthdr.len < sizeof(hdr)) {
                err = EMSGSIZE;
                goto bad;
        }
        m_copydata(m, 0, sizeof(hdr), &hdr);
        hdr.opcode = le16toh(hdr.opcode);

        /* only allows CMD packets to be sent */
        if (hdr.type != HCI_CMD_PKT) {
                err = EINVAL;
                goto bad;
        }

        /* validates packet length */
        if (m->m_pkthdr.len != sizeof(hdr) + hdr.length) {
                err = EMSGSIZE;
                goto bad;
        }

        /* finds destination */
        unit = hci_unit_lookup((sa ? &sa->bt_bdaddr : &pcb->hp_raddr));
        if (unit == NULL) {
                err = ENETDOWN;
                goto bad;
        }

        /* security checks for unprivileged users */
        if (pcb->hp_cred != NULL
            && kauth_authorize_device(pcb->hp_cred,
            KAUTH_DEVICE_BLUETOOTH_SEND,
            unit, &hdr, NULL, NULL) != 0) {
                err = EPERM;
                goto bad;
        }

        /* makess a copy for precious to keep */
        m0 = m_copypacket(m, M_DONTWAIT);
        if (m0 == NULL) {
                err = ENOMEM;
                goto bad;
        }
        sbappendrecord(&pcb->hp_socket->so_snd, m0);
        M_SETCTX(m, pcb->hp_socket);        /* enable drop callback */

        DPRINTFN(2, "(%s) opcode (%03x|%04x)\n", device_xname(unit->hci_dev),
                HCI_OGF(hdr.opcode), HCI_OCF(hdr.opcode));

        /* Sendss it */
        if (unit->hci_num_cmd_pkts == 0)
                MBUFQ_ENQUEUE(&unit->hci_cmdwait, m);
        else
                hci_output_cmd(unit, m);

        return 0;

bad:
        DPRINTF("packet (%d bytes) not sent (error %d)\n",
                        m->m_pkthdr.len, err);
        if (m)
                m_freem(m);

        return err;
}

static int
hci_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
{
        KASSERT(solocked(so));

        m_freem(m);
        m_freem(control);

        return EOPNOTSUPP;
}

static int
hci_purgeif(struct socket *so, struct ifnet *ifp)
{

        return EOPNOTSUPP;
}

/*
 * get/set socket options
 */
int
hci_ctloutput(int req, struct socket *so, struct sockopt *sopt)
{
        struct hci_pcb *pcb = (struct hci_pcb *)so->so_pcb;
        int optval, err = 0;

        DPRINTFN(2, "req %s\n", prcorequests[req]);

        if (pcb == NULL)
                return EINVAL;

        if (sopt->sopt_level != BTPROTO_HCI)
                return ENOPROTOOPT;

        switch(req) {
        case PRCO_GETOPT:
                switch (sopt->sopt_name) {
                case SO_HCI_EVT_FILTER:
                        err = sockopt_set(sopt, &pcb->hp_efilter,
                            sizeof(struct hci_filter));

                        break;

                case SO_HCI_PKT_FILTER:
                        err = sockopt_set(sopt, &pcb->hp_pfilter,
                            sizeof(struct hci_filter));

                        break;

                case SO_HCI_DIRECTION:
                        err = sockopt_setint(sopt,
                            (pcb->hp_flags & HCI_DIRECTION ? 1 : 0));

                        break;

                default:
                        err = ENOPROTOOPT;
                        break;
                }
                break;

        case PRCO_SETOPT:
                switch (sopt->sopt_name) {
                case SO_HCI_EVT_FILTER:        /* set event filter */
                        err = sockopt_get(sopt, &pcb->hp_efilter,
                            sizeof(pcb->hp_efilter));

                        break;

                case SO_HCI_PKT_FILTER:        /* set packet filter */
                        err = sockopt_get(sopt, &pcb->hp_pfilter,
                            sizeof(pcb->hp_pfilter));

                        break;

                case SO_HCI_DIRECTION:        /* request direction ctl messages */
                        err = sockopt_getint(sopt, &optval);
                        if (err)
                                break;

                        if (optval)
                                pcb->hp_flags |= HCI_DIRECTION;
                        else
                                pcb->hp_flags &= ~HCI_DIRECTION;
                        break;

                default:
                        err = ENOPROTOOPT;
                        break;
                }
                break;

        default:
                err = ENOPROTOOPT;
                break;
        }

        return err;
}

/*
 * HCI mbuf tap routine
 *
 * copy packets to any raw HCI sockets that wish (and are
 * permitted) to see them
 */
void
hci_mtap(struct mbuf *m, struct hci_unit *unit)
{
        struct hci_pcb *pcb;
        struct mbuf *m0, *ctlmsg, **ctl;
        struct sockaddr_bt sa;
        uint8_t type;
        uint8_t event;
        uint16_t arg1;

        KASSERT(m->m_len >= sizeof(type));

        type = *mtod(m, uint8_t *);

        memset(&sa, 0, sizeof(sa));
        sa.bt_len = sizeof(struct sockaddr_bt);
        sa.bt_family = AF_BLUETOOTH;
        bdaddr_copy(&sa.bt_bdaddr, &unit->hci_bdaddr);

        LIST_FOREACH(pcb, &hci_pcb, hp_next) {
                /*
                 * filter according to source address
                 */
                if ((pcb->hp_flags & HCI_PROMISCUOUS) == 0
                    && bdaddr_same(&pcb->hp_laddr, &sa.bt_bdaddr) == 0)
                        continue;

                /*
                 * filter according to packet type filter
                 */
                if (hci_filter_test(type, &pcb->hp_pfilter) == 0)
                        continue;

                /*
                 * filter according to event/security filters
                 */
                switch(type) {
                case HCI_EVENT_PKT:
                        KASSERT(m->m_len >= sizeof(hci_event_hdr_t));

                        event = mtod(m, hci_event_hdr_t *)->event;

                        if (hci_filter_test(event, &pcb->hp_efilter) == 0)
                                continue;

                        arg1 = event;
                        break;

                case HCI_CMD_PKT:
                        KASSERT(m->m_len >= sizeof(hci_cmd_hdr_t));
                        arg1 = le16toh(mtod(m, hci_cmd_hdr_t *)->opcode);
                        break;

                case HCI_ACL_DATA_PKT:
                        KASSERT(m->m_len >= sizeof(hci_acldata_hdr_t));
                        arg1 = le16toh(mtod(m, hci_acldata_hdr_t *)->con_handle);
                        arg1 = HCI_CON_HANDLE(arg1);
                        break;

                case HCI_SCO_DATA_PKT:
                        KASSERT(m->m_len >= sizeof(hci_scodata_hdr_t));
                        arg1 = le16toh(mtod(m, hci_scodata_hdr_t *)->con_handle);
                        arg1 = HCI_CON_HANDLE(arg1);
                        break;

                default:
                        arg1 = 0;
                        break;
                }

                if (pcb->hp_cred != NULL
                    && kauth_authorize_device(pcb->hp_cred,
                    KAUTH_DEVICE_BLUETOOTH_RECV,
                    KAUTH_ARG(type), KAUTH_ARG(arg1), NULL, NULL) != 0)
                        continue;

                /*
                 * create control messages
                 */
                ctlmsg = NULL;
                ctl = &ctlmsg;
                if (pcb->hp_flags & HCI_DIRECTION) {
                        int dir = m->m_flags & M_LINK0 ? 1 : 0;

                        *ctl = sbcreatecontrol(&dir, sizeof(dir),
                            SCM_HCI_DIRECTION, BTPROTO_HCI);

                        if (*ctl != NULL)
                                ctl = &((*ctl)->m_next);
                }
                if (pcb->hp_socket->so_options & SO_TIMESTAMP) {
                        struct timeval tv;

                        microtime(&tv);
                        *ctl = sbcreatecontrol(&tv, sizeof(tv),
                            SCM_TIMESTAMP, SOL_SOCKET);

                        if (*ctl != NULL)
                                ctl = &((*ctl)->m_next);
                }

                /*
                 * copy to socket
                 */
                m0 = m_copypacket(m, M_DONTWAIT);
                if (m0 && sbappendaddr(&pcb->hp_socket->so_rcv,
                                (struct sockaddr *)&sa, m0, ctlmsg)) {
                        sorwakeup(pcb->hp_socket);
                } else {
                        m_freem(ctlmsg);
                        m_freem(m0);
                }
        }
}

PR_WRAP_USRREQS(hci)

#define        hci_attach                hci_attach_wrapper
#define        hci_detach                hci_detach_wrapper
#define        hci_accept                hci_accept_wrapper
#define        hci_bind                hci_bind_wrapper
#define        hci_listen                hci_listen_wrapper
#define        hci_connect                hci_connect_wrapper
#define        hci_connect2                hci_connect2_wrapper
#define        hci_disconnect                hci_disconnect_wrapper
#define        hci_shutdown                hci_shutdown_wrapper
#define        hci_abort                hci_abort_wrapper
#define        hci_ioctl                hci_ioctl_wrapper
#define        hci_stat                hci_stat_wrapper
#define        hci_peeraddr                hci_peeraddr_wrapper
#define        hci_sockaddr                hci_sockaddr_wrapper
#define        hci_rcvd                hci_rcvd_wrapper
#define        hci_recvoob                hci_recvoob_wrapper
#define        hci_send                hci_send_wrapper
#define        hci_sendoob                hci_sendoob_wrapper
#define        hci_purgeif                hci_purgeif_wrapper

const struct pr_usrreqs hci_usrreqs = {
        .pr_attach        = hci_attach,
        .pr_detach        = hci_detach,
        .pr_accept        = hci_accept,
        .pr_bind        = hci_bind,
        .pr_listen        = hci_listen,
        .pr_connect        = hci_connect,
        .pr_connect2        = hci_connect2,
        .pr_disconnect        = hci_disconnect,
        .pr_shutdown        = hci_shutdown,
        .pr_abort        = hci_abort,
        .pr_ioctl        = hci_ioctl,
        .pr_stat        = hci_stat,
        .pr_peeraddr        = hci_peeraddr,
        .pr_sockaddr        = hci_sockaddr,
        .pr_rcvd        = hci_rcvd,
        .pr_recvoob        = hci_recvoob,
        .pr_send        = hci_send,
        .pr_sendoob        = hci_sendoob,
        .pr_purgeif        = hci_purgeif,
};














































































   18 

   18 




   16 


   16 
   18 










   12 



   11 


   12 






    2 


   12 








   14 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/*        $NetBSD: raw_cb.c,v 1.24 2017/09/25 01:56:22 ozaki-r Exp $        */

/*
 * Copyright (c) 1980, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)raw_cb.c        8.1 (Berkeley) 6/10/93
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: raw_cb.c,v 1.24 2017/09/25 01:56:22 ozaki-r Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/kmem.h>

#include <net/if.h>
#include <net/route.h>
#include <net/raw_cb.h>
#include <netinet/in.h>

/*
 * Routines to manage the raw protocol control blocks.
 *
 * TODO:
 *        hash lookups by protocol family/protocol + address family
 *        take care of unique address problems per AF?
 *        redo address binding to allow wildcards
 */

static u_long                raw_sendspace = RAWSNDQ;
static u_long                raw_recvspace = RAWRCVQ;

/*
 * Allocate a nominal amount of buffer space for the socket.
 */
int
raw_attach(struct socket *so, int proto, struct rawcbhead *rawcbhead)
{
        struct rawcb *rp;
        int error;

        /*
         * It is assumed that raw_attach() is called after space has been
         * allocated for the rawcb; consumer protocols may simply allocate
         * type struct rawcb, or a wrapper data structure that begins with a
         * struct rawcb.
         */
        rp = sotorawcb(so);
        KASSERT(rp != NULL);
        sosetlock(so);

        if ((error = soreserve(so, raw_sendspace, raw_recvspace)) != 0) {
                return error;
        }
        rp->rcb_socket = so;
        rp->rcb_proto.sp_family = so->so_proto->pr_domain->dom_family;
        rp->rcb_proto.sp_protocol = proto;
        LIST_INSERT_HEAD(rawcbhead, rp, rcb_list);
        KASSERT(solocked(so));

        return 0;
}

/*
 * Detach the raw connection block and discard socket resources.
 */
void
raw_detach(struct socket *so)
{
        struct rawcb *rp = sotorawcb(so);
        const size_t rcb_len = rp->rcb_len;

        KASSERT(rp != NULL);
        KASSERT(solocked(so));

        /* Remove the last reference. */
        LIST_REMOVE(rp, rcb_list);
        so->so_pcb = NULL;

        /* Note: sofree() drops the socket's lock. */
        sofree(so);
        kmem_free(rp, rcb_len);
        if (so->so_lock != softnet_lock) {
                so->so_lock = softnet_lock;
                mutex_obj_hold(softnet_lock);
        }
        mutex_enter(softnet_lock);
}

/*
 * Disconnect and possibly release resources.
 */
void
raw_disconnect(struct rawcb *rp)
{
        struct socket *so = rp->rcb_socket;

        if (so->so_state & SS_NOFDREF) {
                raw_detach(so);
        }
}












































































































































































































































































































































































































































































































































































































































































































































































































































































    1 







































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
/*        $NetBSD: drm_drv.h,v 1.8 2021/12/19 11:09:47 riastradh Exp $        */

/*
 * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
 * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
 * Copyright (c) 2009-2010, Code Aurora Forum.
 * Copyright 2016 Intel Corp.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef _DRM_DRV_H_
#define _DRM_DRV_H_

#include <linux/list.h>
#include <linux/irqreturn.h>
#include <linux/ktime.h>

#include <drm/drm_device.h>

struct drm_file;
struct drm_gem_object;
struct drm_master;
struct drm_minor;
struct dma_buf_attachment;
struct drm_display_mode;
struct drm_mode_create_dumb;
struct drm_printer;

/**
 * enum drm_driver_feature - feature flags
 *
 * See &drm_driver.driver_features, drm_device.driver_features and
 * drm_core_check_feature().
 */
enum drm_driver_feature {
        /**
         * @DRIVER_GEM:
         *
         * Driver use the GEM memory manager. This should be set for all modern
         * drivers.
         */
        DRIVER_GEM                        = BIT(0),
        /**
         * @DRIVER_MODESET:
         *
         * Driver supports mode setting interfaces (KMS).
         */
        DRIVER_MODESET                        = BIT(1),
        /**
         * @DRIVER_RENDER:
         *
         * Driver supports dedicated render nodes. See also the :ref:`section on
         * render nodes <drm_render_node>` for details.
         */
        DRIVER_RENDER                        = BIT(3),
        /**
         * @DRIVER_ATOMIC:
         *
         * Driver supports the full atomic modesetting userspace API. Drivers
         * which only use atomic internally, but do not the support the full
         * userspace API (e.g. not all properties converted to atomic, or
         * multi-plane updates are not guaranteed to be tear-free) should not
         * set this flag.
         */
        DRIVER_ATOMIC                        = BIT(4),
        /**
         * @DRIVER_SYNCOBJ:
         *
         * Driver supports &drm_syncobj for explicit synchronization of command
         * submission.
         */
        DRIVER_SYNCOBJ                  = BIT(5),
        /**
         * @DRIVER_SYNCOBJ_TIMELINE:
         *
         * Driver supports the timeline flavor of &drm_syncobj for explicit
         * synchronization of command submission.
         */
        DRIVER_SYNCOBJ_TIMELINE         = BIT(6),

        /* IMPORTANT: Below are all the legacy flags, add new ones above. */

        /**
         * @DRIVER_USE_AGP:
         *
         * Set up DRM AGP support, see drm_agp_init(), the DRM core will manage
         * AGP resources. New drivers don't need this.
         */
        DRIVER_USE_AGP                        = BIT(25),
        /**
         * @DRIVER_LEGACY:
         *
         * Denote a legacy driver using shadow attach. Do not use.
         */
        DRIVER_LEGACY                        = BIT(26),
        /**
         * @DRIVER_PCI_DMA:
         *
         * Driver is capable of PCI DMA, mapping of PCI DMA buffers to userspace
         * will be enabled. Only for legacy drivers. Do not use.
         */
        DRIVER_PCI_DMA                        = BIT(27),
        /**
         * @DRIVER_SG:
         *
         * Driver can perform scatter/gather DMA, allocation and mapping of
         * scatter/gather buffers will be enabled. Only for legacy drivers. Do
         * not use.
         */
        DRIVER_SG                        = BIT(28),

        /**
         * @DRIVER_HAVE_DMA:
         *
         * Driver supports DMA, the userspace DMA API will be supported. Only
         * for legacy drivers. Do not use.
         */
        DRIVER_HAVE_DMA                        = BIT(29),
        /**
         * @DRIVER_HAVE_IRQ:
         *
         * Legacy irq support. Only for legacy drivers. Do not use.
         *
         * New drivers can either use the drm_irq_install() and
         * drm_irq_uninstall() helper functions, or roll their own irq support
         * code by calling request_irq() directly.
         */
        DRIVER_HAVE_IRQ                        = BIT(30),
        /**
         * @DRIVER_KMS_LEGACY_CONTEXT:
         *
         * Used only by nouveau for backwards compatibility with existing
         * userspace.  Do not use.
         */
        DRIVER_KMS_LEGACY_CONTEXT        = BIT(31),
};

/**
 * struct drm_driver - DRM driver structure
 *
 * This structure represent the common code for a family of cards. There will be
 * one &struct drm_device for each card present in this family. It contains lots
 * of vfunc entries, and a pile of those probably should be moved to more
 * appropriate places like &drm_mode_config_funcs or into a new operations
 * structure for GEM drivers.
 */
struct drm_driver {
        /**
         * @load:
         *
         * Backward-compatible driver callback to complete
         * initialization steps after the driver is registered.  For
         * this reason, may suffer from race conditions and its use is
         * deprecated for new drivers.  It is therefore only supported
         * for existing drivers not yet converted to the new scheme.
         * See drm_dev_init() and drm_dev_register() for proper and
         * race-free way to set up a &struct drm_device.
         *
         * This is deprecated, do not use!
         *
         * Returns:
         *
         * Zero on success, non-zero value on failure.
         */
        int (*load) (struct drm_device *, unsigned long flags);

        /**
         * @open:
         *
         * Driver callback when a new &struct drm_file is opened. Useful for
         * setting up driver-private data structures like buffer allocators,
         * execution contexts or similar things. Such driver-private resources
         * must be released again in @postclose.
         *
         * Since the display/modeset side of DRM can only be owned by exactly
         * one &struct drm_file (see &drm_file.is_master and &drm_device.master)
         * there should never be a need to set up any modeset related resources
         * in this callback. Doing so would be a driver design bug.
         *
         * Returns:
         *
         * 0 on success, a negative error code on failure, which will be
         * promoted to userspace as the result of the open() system call.
         */
        int (*open) (struct drm_device *, struct drm_file *);

        /**
         * @postclose:
         *
         * One of the driver callbacks when a new &struct drm_file is closed.
         * Useful for tearing down driver-private data structures allocated in
         * @open like buffer allocators, execution contexts or similar things.
         *
         * Since the display/modeset side of DRM can only be owned by exactly
         * one &struct drm_file (see &drm_file.is_master and &drm_device.master)
         * there should never be a need to tear down any modeset related
         * resources in this callback. Doing so would be a driver design bug.
         */
        void (*postclose) (struct drm_device *, struct drm_file *);

        /**
         * @lastclose:
         *
         * Called when the last &struct drm_file has been closed and there's
         * currently no userspace client for the &struct drm_device.
         *
         * Modern drivers should only use this to force-restore the fbdev
         * framebuffer using drm_fb_helper_restore_fbdev_mode_unlocked().
         * Anything else would indicate there's something seriously wrong.
         * Modern drivers can also use this to execute delayed power switching
         * state changes, e.g. in conjunction with the :ref:`vga_switcheroo`
         * infrastructure.
         *
         * This is called after @postclose hook has been called.
         *
         * NOTE:
         *
         * All legacy drivers use this callback to de-initialize the hardware.
         * This is purely because of the shadow-attach model, where the DRM
         * kernel driver does not really own the hardware. Instead ownershipe is
         * handled with the help of userspace through an inheritedly racy dance
         * to set/unset the VT into raw mode.
         *
         * Legacy drivers initialize the hardware in the @firstopen callback,
         * which isn't even called for modern drivers.
         */
        void (*lastclose) (struct drm_device *);

        /**
         * @unload:
         *
         * Reverse the effects of the driver load callback.  Ideally,
         * the clean up performed by the driver should happen in the
         * reverse order of the initialization.  Similarly to the load
         * hook, this handler is deprecated and its usage should be
         * dropped in favor of an open-coded teardown function at the
         * driver layer.  See drm_dev_unregister() and drm_dev_put()
         * for the proper way to remove a &struct drm_device.
         *
         * The unload() hook is called right after unregistering
         * the device.
         *
         */
        void (*unload) (struct drm_device *);

        /**
         * @release:
         *
         * Optional callback for destroying device data after the final
         * reference is released, i.e. the device is being destroyed. Drivers
         * using this callback are responsible for calling drm_dev_fini()
         * to finalize the device and then freeing the struct themselves.
         */
        void (*release) (struct drm_device *);

        /**
         * @get_vblank_counter:
         *
         * Driver callback for fetching a raw hardware vblank counter for the
         * CRTC specified with the pipe argument.  If a device doesn't have a
         * hardware counter, the driver can simply leave the hook as NULL.
         * The DRM core will account for missed vblank events while interrupts
         * where disabled based on system timestamps.
         *
         * Wraparound handling and loss of events due to modesetting is dealt
         * with in the DRM core code, as long as drivers call
         * drm_crtc_vblank_off() and drm_crtc_vblank_on() when disabling or
         * enabling a CRTC.
         *
         * This is deprecated and should not be used by new drivers.
         * Use &drm_crtc_funcs.get_vblank_counter instead.
         *
         * Returns:
         *
         * Raw vblank counter value.
         */
        u32 (*get_vblank_counter) (struct drm_device *dev, unsigned int pipe);

        /**
         * @enable_vblank:
         *
         * Enable vblank interrupts for the CRTC specified with the pipe
         * argument.
         *
         * This is deprecated and should not be used by new drivers.
         * Use &drm_crtc_funcs.enable_vblank instead.
         *
         * Returns:
         *
         * Zero on success, appropriate errno if the given @crtc's vblank
         * interrupt cannot be enabled.
         */
        int (*enable_vblank) (struct drm_device *dev, unsigned int pipe);

        /**
         * @disable_vblank:
         *
         * Disable vblank interrupts for the CRTC specified with the pipe
         * argument.
         *
         * This is deprecated and should not be used by new drivers.
         * Use &drm_crtc_funcs.disable_vblank instead.
         */
        void (*disable_vblank) (struct drm_device *dev, unsigned int pipe);

        /**
         * @get_scanout_position:
         *
         * Called by vblank timestamping code.
         *
         * Returns the current display scanout position from a crtc, and an
         * optional accurate ktime_get() timestamp of when position was
         * measured. Note that this is a helper callback which is only used if a
         * driver uses drm_calc_vbltimestamp_from_scanoutpos() for the
         * @get_vblank_timestamp callback.
         *
         * Parameters:
         *
         * dev:
         *     DRM device.
         * pipe:
         *     Id of the crtc to query.
         * in_vblank_irq:
         *     True when called from drm_crtc_handle_vblank().  Some drivers
         *     need to apply some workarounds for gpu-specific vblank irq quirks
         *     if flag is set.
         * vpos:
         *     Target location for current vertical scanout position.
         * hpos:
         *     Target location for current horizontal scanout position.
         * stime:
         *     Target location for timestamp taken immediately before
         *     scanout position query. Can be NULL to skip timestamp.
         * etime:
         *     Target location for timestamp taken immediately after
         *     scanout position query. Can be NULL to skip timestamp.
         * mode:
         *     Current display timings.
         *
         * Returns vpos as a positive number while in active scanout area.
         * Returns vpos as a negative number inside vblank, counting the number
         * of scanlines to go until end of vblank, e.g., -1 means "one scanline
         * until start of active scanout / end of vblank."
         *
         * Returns:
         *
         * True on success, false if a reliable scanout position counter could
         * not be read out.
         *
         * FIXME:
         *
         * Since this is a helper to implement @get_vblank_timestamp, we should
         * move it to &struct drm_crtc_helper_funcs, like all the other
         * helper-internal hooks.
         */
        bool (*get_scanout_position) (struct drm_device *dev, unsigned int pipe,
                                      bool in_vblank_irq, int *vpos, int *hpos,
                                      ktime_t *stime, ktime_t *etime,
                                      const struct drm_display_mode *mode);

        /**
         * @get_vblank_timestamp:
         *
         * Called by drm_get_last_vbltimestamp(). Should return a precise
         * timestamp when the most recent VBLANK interval ended or will end.
         *
         * Specifically, the timestamp in @vblank_time should correspond as
         * closely as possible to the time when the first video scanline of
         * the video frame after the end of VBLANK will start scanning out,
         * the time immediately after end of the VBLANK interval. If the
         * @crtc is currently inside VBLANK, this will be a time in the future.
         * If the @crtc is currently scanning out a frame, this will be the
         * past start time of the current scanout. This is meant to adhere
         * to the OpenML OML_sync_control extension specification.
         *
         * Paramters:
         *
         * dev:
         *     dev DRM device handle.
         * pipe:
         *     crtc for which timestamp should be returned.
         * max_error:
         *     Maximum allowable timestamp error in nanoseconds.
         *     Implementation should strive to provide timestamp
         *     with an error of at most max_error nanoseconds.
         *     Returns true upper bound on error for timestamp.
         * vblank_time:
         *     Target location for returned vblank timestamp.
         * in_vblank_irq:
         *     True when called from drm_crtc_handle_vblank().  Some drivers
         *     need to apply some workarounds for gpu-specific vblank irq quirks
         *     if flag is set.
         *
         * Returns:
         *
         * True on success, false on failure, which means the core should
         * fallback to a simple timestamp taken in drm_crtc_handle_vblank().
         *
         * FIXME:
         *
         * We should move this hook to &struct drm_crtc_funcs like all the other
         * vblank hooks.
         */
        bool (*get_vblank_timestamp) (struct drm_device *dev, unsigned int pipe,
                                     int *max_error,
                                     ktime_t *vblank_time,
                                     bool in_vblank_irq);

        /**
         * @irq_handler:
         *
         * Interrupt handler called when using drm_irq_install(). Not used by
         * drivers which implement their own interrupt handling.
         */
        irqreturn_t(*irq_handler) (DRM_IRQ_ARGS);

        /**
         * @irq_preinstall:
         *
         * Optional callback used by drm_irq_install() which is called before
         * the interrupt handler is registered. This should be used to clear out
         * any pending interrupts (from e.g. firmware based drives) and reset
         * the interrupt handling registers.
         */
        void (*irq_preinstall) (struct drm_device *dev);

        /**
         * @irq_postinstall:
         *
         * Optional callback used by drm_irq_install() which is called after
         * the interrupt handler is registered. This should be used to enable
         * interrupt generation in the hardware.
         */
        int (*irq_postinstall) (struct drm_device *dev);

        /**
         * @irq_uninstall:
         *
         * Optional callback used by drm_irq_uninstall() which is called before
         * the interrupt handler is unregistered. This should be used to disable
         * interrupt generation in the hardware.
         */
        void (*irq_uninstall) (struct drm_device *dev);

#ifdef __NetBSD__
        int (*request_irq)(struct drm_device *, int);
        void (*free_irq)(struct drm_device *);
#endif

        /**
         * @master_create:
         *
         * Called whenever a new master is created. Only used by vmwgfx.
         */
        int (*master_create)(struct drm_device *dev, struct drm_master *master);

        /**
         * @master_destroy:
         *
         * Called whenever a master is destroyed. Only used by vmwgfx.
         */
        void (*master_destroy)(struct drm_device *dev, struct drm_master *master);

        /**
         * @master_set:
         *
         * Called whenever the minor master is set. Only used by vmwgfx.
         */
        int (*master_set)(struct drm_device *dev, struct drm_file *file_priv,
                          bool from_open);
        /**
         * @master_drop:
         *
         * Called whenever the minor master is dropped. Only used by vmwgfx.
         */
        void (*master_drop)(struct drm_device *dev, struct drm_file *file_priv);

        /**
         * @debugfs_init:
         *
         * Allows drivers to create driver-specific debugfs files.
         */
        int (*debugfs_init)(struct drm_minor *minor);

        /**
         * @gem_free_object: deconstructor for drm_gem_objects
         *
         * This is deprecated and should not be used by new drivers. Use
         * &drm_gem_object_funcs.free instead.
         */
        void (*gem_free_object) (struct drm_gem_object *obj);

        /**
         * @gem_free_object_unlocked: deconstructor for drm_gem_objects
         *
         * This is deprecated and should not be used by new drivers. Use
         * &drm_gem_object_funcs.free instead.
         * Compared to @gem_free_object this is not encumbered with
         * &drm_device.struct_mutex legacy locking schemes.
         */
        void (*gem_free_object_unlocked) (struct drm_gem_object *obj);

        /**
         * @gem_open_object:
         *
         * This callback is deprecated in favour of &drm_gem_object_funcs.open.
         *
         * Driver hook called upon gem handle creation
         */
        int (*gem_open_object) (struct drm_gem_object *, struct drm_file *);

        /**
         * @gem_close_object:
         *
         * This callback is deprecated in favour of &drm_gem_object_funcs.close.
         *
         * Driver hook called upon gem handle release
         */
        void (*gem_close_object) (struct drm_gem_object *, struct drm_file *);

        /**
         * @gem_print_info:
         *
         * This callback is deprecated in favour of
         * &drm_gem_object_funcs.print_info.
         *
         * If driver subclasses struct &drm_gem_object, it can implement this
         * optional hook for printing additional driver specific info.
         *
         * drm_printf_indent() should be used in the callback passing it the
         * indent argument.
         *
         * This callback is called from drm_gem_print_info().
         */
        void (*gem_print_info)(struct drm_printer *p, unsigned int indent,
                               const struct drm_gem_object *obj);

        /**
         * @gem_create_object: constructor for gem objects
         *
         * Hook for allocating the GEM object struct, for use by the CMA and
         * SHMEM GEM helpers.
         */
        struct drm_gem_object *(*gem_create_object)(struct drm_device *dev,
                                                    size_t size);
        /**
         * @prime_handle_to_fd:
         *
         * Main PRIME export function. Should be implemented with
         * drm_gem_prime_handle_to_fd() for GEM based drivers.
         *
         * For an in-depth discussion see :ref:`PRIME buffer sharing
         * documentation <prime_buffer_sharing>`.
         */
        int (*prime_handle_to_fd)(struct drm_device *dev, struct drm_file *file_priv,
                                uint32_t handle, uint32_t flags, int *prime_fd);
        /**
         * @prime_fd_to_handle:
         *
         * Main PRIME import function. Should be implemented with
         * drm_gem_prime_fd_to_handle() for GEM based drivers.
         *
         * For an in-depth discussion see :ref:`PRIME buffer sharing
         * documentation <prime_buffer_sharing>`.
         */
        int (*prime_fd_to_handle)(struct drm_device *dev, struct drm_file *file_priv,
                                int prime_fd, uint32_t *handle);
        /**
         * @gem_prime_export:
         *
         * Export hook for GEM drivers. Deprecated in favour of
         * &drm_gem_object_funcs.export.
         */
        struct dma_buf * (*gem_prime_export)(struct drm_gem_object *obj,
                                             int flags);
        /**
         * @gem_prime_import:
         *
         * Import hook for GEM drivers.
         *
         * This defaults to drm_gem_prime_import() if not set.
         */
        struct drm_gem_object * (*gem_prime_import)(struct drm_device *dev,
                                struct dma_buf *dma_buf);

        /**
         * @gem_prime_pin:
         *
         * Deprecated hook in favour of &drm_gem_object_funcs.pin.
         */
        int (*gem_prime_pin)(struct drm_gem_object *obj);

        /**
         * @gem_prime_unpin:
         *
         * Deprecated hook in favour of &drm_gem_object_funcs.unpin.
         */
        void (*gem_prime_unpin)(struct drm_gem_object *obj);


        /**
         * @gem_prime_get_sg_table:
         *
         * Deprecated hook in favour of &drm_gem_object_funcs.get_sg_table.
         */
        struct sg_table *(*gem_prime_get_sg_table)(struct drm_gem_object *obj);

        /**
         * @gem_prime_import_sg_table:
         *
         * Optional hook used by the PRIME helper functions
         * drm_gem_prime_import() respectively drm_gem_prime_import_dev().
         */
        struct drm_gem_object *(*gem_prime_import_sg_table)(
                                struct drm_device *dev,
                                struct dma_buf_attachment *attach,
                                struct sg_table *sgt);
        /**
         * @gem_prime_vmap:
         *
         * Deprecated vmap hook for GEM drivers. Please use
         * &drm_gem_object_funcs.vmap instead.
         */
        void *(*gem_prime_vmap)(struct drm_gem_object *obj);

        /**
         * @gem_prime_vunmap:
         *
         * Deprecated vunmap hook for GEM drivers. Please use
         * &drm_gem_object_funcs.vunmap instead.
         */
        void (*gem_prime_vunmap)(struct drm_gem_object *obj, void *vaddr);

        /**
         * @gem_prime_mmap:
         *
         * mmap hook for GEM drivers, used to implement dma-buf mmap in the
         * PRIME helpers.
         *
         * FIXME: There's way too much duplication going on here, and also moved
         * to &drm_gem_object_funcs.
         */
#ifdef __NetBSD__
        int (*gem_prime_mmap)(struct drm_gem_object *obj, off_t *offp,
            size_t len, int prot, int *flagsp, int *advicep,
            struct uvm_object **uobjp, int *maxprotp);
#else
        int (*gem_prime_mmap)(struct drm_gem_object *obj,
                                struct vm_area_struct *vma);
#endif

        /**
         * @dumb_create:
         *
         * This creates a new dumb buffer in the driver's backing storage manager (GEM,
         * TTM or something else entirely) and returns the resulting buffer handle. This
         * handle can then be wrapped up into a framebuffer modeset object.
         *
         * Note that userspace is not allowed to use such objects for render
         * acceleration - drivers must create their own private ioctls for such a use
         * case.
         *
         * Width, height and depth are specified in the &drm_mode_create_dumb
         * argument. The callback needs to fill the handle, pitch and size for
         * the created buffer.
         *
         * Called by the user via ioctl.
         *
         * Returns:
         *
         * Zero on success, negative errno on failure.
         */
        int (*dumb_create)(struct drm_file *file_priv,
                           struct drm_device *dev,
                           struct drm_mode_create_dumb *args);
        /**
         * @dumb_map_offset:
         *
         * Allocate an offset in the drm device node's address space to be able to
         * memory map a dumb buffer.
         *
         * The default implementation is drm_gem_create_mmap_offset(). GEM based
         * drivers must not overwrite this.
         *
         * Called by the user via ioctl.
         *
         * Returns:
         *
         * Zero on success, negative errno on failure.
         */
        int (*dumb_map_offset)(struct drm_file *file_priv,
                               struct drm_device *dev, uint32_t handle,
                               uint64_t *offset);
        /**
         * @dumb_destroy:
         *
         * This destroys the userspace handle for the given dumb backing storage buffer.
         * Since buffer objects must be reference counted in the kernel a buffer object
         * won't be immediately freed if a framebuffer modeset object still uses it.
         *
         * Called by the user via ioctl.
         *
         * The default implementation is drm_gem_dumb_destroy(). GEM based drivers
         * must not overwrite this.
         *
         * Returns:
         *
         * Zero on success, negative errno on failure.
         */
        int (*dumb_destroy)(struct drm_file *file_priv,
                            struct drm_device *dev,
                            uint32_t handle);

        /**
         * @gem_vm_ops: Driver private ops for this object
         *
         * For GEM drivers this is deprecated in favour of
         * &drm_gem_object_funcs.vm_ops.
         */
#ifdef __NetBSD__
        int (*mmap_object)(struct drm_device *, off_t, size_t, int,
            struct uvm_object **, voff_t *, struct file *);
        const struct uvm_pagerops *gem_uvm_ops;
#else
        const struct vm_operations_struct *gem_vm_ops;
#endif

        /** @major: driver major number */
        int major;
        /** @minor: driver minor number */
        int minor;
        /** @patchlevel: driver patch level */
        int patchlevel;
        /** @name: driver name */
        const char *name;
        /** @desc: driver description */
        const char *desc;
        /** @date: driver date */
        const char *date;

        /**
         * @driver_features:
         * Driver features, see &enum drm_driver_feature. Drivers can disable
         * some features on a per-instance basis using
         * &drm_device.driver_features.
         */
        u32 driver_features;

        /**
         * @ioctls:
         *
         * Array of driver-private IOCTL description entries. See the chapter on
         * :ref:`IOCTL support in the userland interfaces
         * chapter<drm_driver_ioctl>` for the full details.
         */

        const struct drm_ioctl_desc *ioctls;
        /** @num_ioctls: Number of entries in @ioctls. */
        int num_ioctls;

        /**
         * @fops:
         *
         * File operations for the DRM device node. See the discussion in
         * :ref:`file operations<drm_driver_fops>` for in-depth coverage and
         * some examples.
         */
        const struct file_operations *fops;

#ifdef __NetBSD__
        int (*ioctl_override)(struct file *, unsigned long, void *);
#endif

        /* Everything below here is for legacy driver, never use! */
        /* private: */

        /* List of devices hanging off this driver with stealth attach. */
        struct list_head legacy_dev_list;
        int (*firstopen) (struct drm_device *);
        void (*preclose) (struct drm_device *, struct drm_file *file_priv);
        int (*dma_ioctl) (struct drm_device *dev, void *data, struct drm_file *file_priv);
        int (*dma_quiescent) (struct drm_device *);
        int (*context_dtor) (struct drm_device *dev, int context);
        int dev_priv_size;
};

int drm_dev_init(struct drm_device *dev,
                 struct drm_driver *driver,
                 struct device *parent);
int devm_drm_dev_init(struct device *parent,
                      struct drm_device *dev,
                      struct drm_driver *driver);
void drm_dev_fini(struct drm_device *dev);

struct drm_device *drm_dev_alloc(struct drm_driver *driver,
                                 struct device *parent);
int drm_dev_register(struct drm_device *dev, unsigned long flags);
void drm_dev_unregister(struct drm_device *dev);

void drm_dev_get(struct drm_device *dev);
void drm_dev_put(struct drm_device *dev);
void drm_put_dev(struct drm_device *dev);
bool drm_dev_enter(struct drm_device *dev, int *idx);
void drm_dev_exit(int idx);
void drm_dev_unplug(struct drm_device *dev);

/**
 * drm_dev_is_unplugged - is a DRM device unplugged
 * @dev: DRM device
 *
 * This function can be called to check whether a hotpluggable is unplugged.
 * Unplugging itself is singalled through drm_dev_unplug(). If a device is
 * unplugged, these two functions guarantee that any store before calling
 * drm_dev_unplug() is visible to callers of this function after it completes
 *
 * WARNING: This function fundamentally races against drm_dev_unplug(). It is
 * recommended that drivers instead use the underlying drm_dev_enter() and
 * drm_dev_exit() function pairs.
 */
static inline bool drm_dev_is_unplugged(struct drm_device *dev)
{
        int idx;

        if (drm_dev_enter(dev, &idx)) {
                drm_dev_exit(idx);
                return false;
        }

        return true;
}

/**
 * drm_core_check_feature - check driver feature flags
 * @dev: DRM device to check
 * @feature: feature flag
 *
 * This checks @dev for driver features, see &drm_driver.driver_features,
 * &drm_device.driver_features, and the various &enum drm_driver_feature flags.
 *
 * Returns true if the @feature is supported, false otherwise.
 */
static inline bool drm_core_check_feature(const struct drm_device *dev, u32 feature)
{
        return dev->driver->driver_features & dev->driver_features & feature;
}

/**
 * drm_drv_uses_atomic_modeset - check if the driver implements
 * atomic_commit()
 * @dev: DRM device
 *
 * This check is useful if drivers do not have DRIVER_ATOMIC set but
 * have atomic modesetting internally implemented.
 */
static inline bool drm_drv_uses_atomic_modeset(struct drm_device *dev)
{
        return drm_core_check_feature(dev, DRIVER_ATOMIC) ||
                (dev->mode_config.funcs && dev->mode_config.funcs->atomic_commit != NULL);
}


int drm_dev_set_unique(struct drm_device *dev, const char *name);


#endif















































































































    3 
    3 




































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
/*        $NetBSD: uark.c,v 1.18 2021/08/07 16:19:17 thorpej Exp $        */
/*        $OpenBSD: uark.c,v 1.13 2009/10/13 19:33:17 pirofti Exp $        */

/*
 * Copyright (c) 2006 Jonathan Gray <jsg@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uark.c,v 1.18 2021/08/07 16:19:17 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/conf.h>
#include <sys/tty.h>
#include <sys/device.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <dev/usb/ucomvar.h>

#ifdef UARK_DEBUG
#define DPRINTFN(n, x)  do { if (uarkdebug > (n)) printf x; } while (0)
int        uarkebug = 0;
#else
#define DPRINTFN(n, x)
#endif
#define DPRINTF(x) DPRINTFN(0, x)

#define UARKBUFSZ                256
#define UARK_CONFIG_NO        0
#define UARK_IFACE_NO                0

#define UARK_SET_DATA_BITS(x)        (x - 5)

#define UARK_PARITY_NONE        0x00
#define UARK_PARITY_ODD                0x08
#define UARK_PARITY_EVEN        0x18

#define UARK_STOP_BITS_1        0x00
#define UARK_STOP_BITS_2        0x04

#define UARK_BAUD_REF                3000000

#define UARK_WRITE                0x40
#define UARK_READ                0xc0

#define UARK_REQUEST                0xfe

struct uark_softc {
        device_t                sc_dev;
        struct usbd_device *        sc_udev;
        struct usbd_interface *        sc_iface;
        device_t                sc_subdev;

        u_char                        sc_msr;
        u_char                        sc_lsr;

        bool                        sc_dying;
};

static void        uark_get_status(void *, int portno, u_char *lsr, u_char *msr);
static void        uark_set(void *, int, int, int);
static int        uark_param(void *, int, struct termios *);
static int        uark_open(void *, int);
static void        uark_break(void *, int, int);
static int        uark_cmd(struct uark_softc *, uint16_t, uint16_t);

static const struct ucom_methods uark_methods = {
        .ucom_get_status = uark_get_status,
        .ucom_set = uark_set,
        .ucom_param = uark_param,
        .ucom_open = uark_open,
};

static const struct usb_devno uark_devs[] = {
        { USB_VENDOR_ARKMICROCHIPS, USB_PRODUCT_ARKMICROCHIPS_USBSERIAL },
};

static int        uark_match(device_t, cfdata_t, void *);
static void        uark_attach(device_t, device_t, void *);
static int        uark_detach(device_t, int);

CFATTACH_DECL_NEW(uark, sizeof(struct uark_softc), uark_match, uark_attach,
    uark_detach, NULL);

static int
uark_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return (usb_lookup(uark_devs, uaa->uaa_vendor, uaa->uaa_product)
            != NULL) ? UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
uark_attach(device_t parent, device_t self, void *aux)
{
        struct uark_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        char *devinfop;
        struct ucom_attach_args ucaa;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        usbd_status error;
        int i;

        memset(&ucaa, 0, sizeof(ucaa));
        sc->sc_dev = self;

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_naive("\n");
        aprint_normal("\n");
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        sc->sc_udev = dev;
        sc->sc_dying = false;

        if (usbd_set_config_index(sc->sc_udev, UARK_CONFIG_NO, 1) != 0) {
                aprint_error_dev(self, "could not set configuration no\n");
                sc->sc_dying = true;
                return;
        }

        /* get the first interface handle */
        error = usbd_device2interface_handle(sc->sc_udev, UARK_IFACE_NO,
            &sc->sc_iface);
        if (error != 0) {
                aprint_error_dev(self, "could not get interface handle\n");
                sc->sc_dying = true;
                return;
        }

        id = usbd_get_interface_descriptor(sc->sc_iface);

        ucaa.ucaa_bulkin = ucaa.ucaa_bulkout = -1;
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "no endpoint descriptor found for %d\n", i);
                        sc->sc_dying = true;
                        return;
                }

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK)
                        ucaa.ucaa_bulkin = ed->bEndpointAddress;
                else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK)
                        ucaa.ucaa_bulkout = ed->bEndpointAddress;
        }

        if (ucaa.ucaa_bulkin == -1 || ucaa.ucaa_bulkout == -1) {
                aprint_error_dev(self, "missing endpoint\n");
                sc->sc_dying = true;
                return;
        }

        ucaa.ucaa_ibufsize = UARKBUFSZ;
        ucaa.ucaa_obufsize = UARKBUFSZ;
        ucaa.ucaa_ibufsizepad = UARKBUFSZ;
        ucaa.ucaa_opkthdrlen = 0;
        ucaa.ucaa_device = sc->sc_udev;
        ucaa.ucaa_iface = sc->sc_iface;
        ucaa.ucaa_methods = &uark_methods;
        ucaa.ucaa_arg = sc;
        ucaa.ucaa_info = NULL;

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        sc->sc_subdev = config_found(self, &ucaa, ucomprint,
            CFARGS(.submatch = ucomsubmatch));

        return;
}

static int
uark_detach(device_t self, int flags)
{
        struct uark_softc *sc = device_private(self);
        int rv = 0;

        sc->sc_dying = true;

        if (sc->sc_subdev != NULL) {
                rv = config_detach(sc->sc_subdev, flags);
                sc->sc_subdev = NULL;
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return rv;
}

static void
uark_set(void *vsc, int portno, int reg, int onoff)
{
        struct uark_softc *sc = vsc;

        if (sc->sc_dying)
                return;

        switch (reg) {
        case UCOM_SET_BREAK:
                uark_break(sc, portno, onoff);
                return;
        case UCOM_SET_DTR:
        case UCOM_SET_RTS:
        default:
                return;
        }
}

static int
uark_param(void *vsc, int portno, struct termios *t)
{
        struct uark_softc *sc = (struct uark_softc *)vsc;
        int data;

        if (sc->sc_dying)
                return EIO;

        switch (t->c_ospeed) {
        case 300:
        case 600:
        case 1200:
        case 1800:
        case 2400:
        case 4800:
        case 9600:
        case 19200:
        case 38400:
        case 57600:
        case 115200:
                uark_cmd(sc, 3, 0x83);
                uark_cmd(sc, 0, (UARK_BAUD_REF / t->c_ospeed) & 0xFF);
                uark_cmd(sc, 1, (UARK_BAUD_REF / t->c_ospeed) >> 8);
                uark_cmd(sc, 3, 0x03);
                break;
        default:
                return EINVAL;
        }

        if (ISSET(t->c_cflag, CSTOPB))
                data = UARK_STOP_BITS_2;
        else
                data = UARK_STOP_BITS_1;

        if (ISSET(t->c_cflag, PARENB)) {
                if (ISSET(t->c_cflag, PARODD))
                        data |= UARK_PARITY_ODD;
                else
                        data |= UARK_PARITY_EVEN;
        } else
                data |= UARK_PARITY_NONE;

        switch (ISSET(t->c_cflag, CSIZE)) {
        case CS5:
                data |= UARK_SET_DATA_BITS(5);
                break;
        case CS6:
                data |= UARK_SET_DATA_BITS(6);
                break;
        case CS7:
                data |= UARK_SET_DATA_BITS(7);
                break;
        case CS8:
                data |= UARK_SET_DATA_BITS(8);
                break;
        }

        uark_cmd(sc, 3, 0x00);
        uark_cmd(sc, 3, data);

#if 0
        /* XXX flow control */
        if (ISSET(t->c_cflag, CRTSCTS)) {
                /*  rts/cts flow ctl */
        } else if (ISSET(t->c_iflag, IXON|IXOFF)) {
                /*  xon/xoff flow ctl */
        } else {
                /* disable flow ctl */
        }
#endif

        return 0;
}

static int
uark_open(void *arg, int portno)
{
        struct uark_softc *sc = arg;

        if (sc->sc_dying)
                return EIO;

        return 0;
}

static void
uark_get_status(void *vsc, int portno, u_char *lsr, u_char *msr)
{
        struct uark_softc *sc = vsc;

        if (sc->sc_dying)
                return;

        *msr = sc->sc_msr;
        *lsr = sc->sc_lsr;
}

static void
uark_break(void *vsc, int portno, int onoff)
{
#if 0
        struct uark_softc *sc = vsc;

        if (sc->sc_dying)
                return;

#ifdef UARK_DEBUG
        aprint_normal_dev(sc->sc_dev, "break %s!\n", onoff ? "on" : "off");
#endif

        if (onoff)
                /* break on */
                uark_cmd(sc, 4, 0x01);
        else
                uark_cmd(sc, 4, 0x00);
#endif
}

static int
uark_cmd(struct uark_softc *sc, uint16_t index, uint16_t value)
{
        usb_device_request_t req;
        usbd_status err;

        req.bmRequestType = UARK_WRITE;
        req.bRequest = UARK_REQUEST;
        USETW(req.wValue, value);
        USETW(req.wIndex, index);
        USETW(req.wLength, 0);
        err = usbd_do_request(sc->sc_udev, &req, NULL);

        if (err)
                return EIO;

        return 0;
}



































































    9 















    4 



    4 

    2 






    2 









    1 



















    1 




































    4 
    8 

















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
/*        $NetBSD: compat_50_quota.c,v 1.3 2020/03/09 00:42:36 pgoyette Exp $ */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: compat_50_quota.c,v 1.3 2020/03/09 00:42:36 pgoyette Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/module.h>
#include <sys/namei.h>
#include <sys/param.h>
#include <sys/quota.h>
#include <sys/quotactl.h>
#include <sys/systm.h>
#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>
#include <sys/vfs_syscalls.h>
#include <sys/vnode.h>

#include <ufs/ufs/quota1.h>

static const struct syscall_package vfs_syscalls_50_quota_syscalls[] = {
        { SYS_compat_50_quotactl, 0, (sy_call_t *)compat_50_sys_quotactl },
        { 0, 0, NULL }
};

/* ARGSUSED */
int   
compat_50_sys_quotactl(struct lwp *l, const struct compat_50_sys_quotactl_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) cmd;
                syscallarg(int) uid;
                syscallarg(void *) arg; 
        } */
        struct vnode *vp;
        struct mount *mp;
        int q1cmd;
        int idtype;
        char *qfile;
        struct dqblk dqblk;
        struct quotakey key;
        struct quotaval blocks, files;
        struct quotastat qstat;
        int error;

        error = namei_simple_user(SCARG(uap, path),
                                NSM_FOLLOW_TRYEMULROOT, &vp);
        if (error != 0)
                return (error);       

        mp = vp->v_mount;
        q1cmd = SCARG(uap, cmd);
        idtype = quota_idtype_from_ufs(q1cmd & SUBCMDMASK);

        switch ((q1cmd & ~SUBCMDMASK) >> SUBCMDSHIFT) {
        case Q_QUOTAON:
                qfile = PNBUF_GET();
                error = copyinstr(SCARG(uap, arg), qfile, PATH_MAX, NULL);
                if (error != 0) {
                        PNBUF_PUT(qfile);
                        break;
                }

                error = vfs_quotactl_quotaon(mp, idtype, qfile);

                PNBUF_PUT(qfile);
                break;

        case Q_QUOTAOFF:
                error = vfs_quotactl_quotaoff(mp, idtype);
                break;

        case Q_GETQUOTA:
                key.qk_idtype = idtype;
                key.qk_id = SCARG(uap, uid);

                key.qk_objtype = QUOTA_OBJTYPE_BLOCKS;
                error = vfs_quotactl_get(mp, &key, &blocks);
                if (error) {
                        break;
                }

                key.qk_objtype = QUOTA_OBJTYPE_FILES;
                error = vfs_quotactl_get(mp, &key, &files);
                if (error) {
                        break;
                }

                quotavals_to_dqblk(&blocks, &files, &dqblk);
                error = copyout(&dqblk, SCARG(uap, arg), sizeof(dqblk));
                break;
                
        case Q_SETQUOTA:
                error = copyin(SCARG(uap, arg), &dqblk, sizeof(dqblk));
                if (error) {
                        break;
                }
                dqblk_to_quotavals(&dqblk, &blocks, &files);

                key.qk_idtype = idtype;
                key.qk_id = SCARG(uap, uid);

                key.qk_objtype = QUOTA_OBJTYPE_BLOCKS;
                error = vfs_quotactl_put(mp, &key, &blocks);
                if (error) {
                        break;
                }

                key.qk_objtype = QUOTA_OBJTYPE_FILES;
                error = vfs_quotactl_put(mp, &key, &files);
                break;
                
        case Q_SYNC:
                /*
                 * not supported but used only to see if quota is supported,
                 * emulate with stat
                 *
                 * XXX should probably be supported
                 */
                (void)idtype; /* not used */

                error = vfs_quotactl_stat(mp, &qstat);
                break;

        case Q_SETUSE:
        default:
                error = EOPNOTSUPP;
                break;
        }

        vrele(vp);
        return error;
}

MODULE(MODULE_CLASS_EXEC, compat_50_quota, "compat_50,ufs");

static int
compat_50_quota_modcmd(modcmd_t cmd, void *arg)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                return syscall_establish(NULL, vfs_syscalls_50_quota_syscalls);
        case MODULE_CMD_FINI:
                return syscall_disestablish(NULL, vfs_syscalls_50_quota_syscalls);
        default:
                return ENOTTY;
        }
}





































































































































































































































    2 


    1 





    2 






















































































































   13 







    1 






   13 




   12 
   12 



















   13 










   13 


   12 



   13 


   13 




























    3 




    3 














    3 






























    5 




    5 





    5 
    5 





    5 























    1 






    5 
    1 









    5 

    5 

    5 



    5 















    5 
    5 










    5 















    5 
    5 
    1 
    1 
    1 
    1 



    4 










    4 
    4 












    4 





    4 




    4 


    4 







    4 







    4 



















    4 



    4 






    1 


    1 






























    3 






    3 








    3 











    3 
    3 




    3 
    1 


    2 
    1 
    3 






    3 
















    5 




    5 



















    7 
    7 

    5 




















   13 

   12 

    1 

   13 









   13 














   13 






   13 
    5 

    5 



   12 


    3 


   13 

   13 


   13 


    3 
    3 




    6 






    7 


    3 


   13 
    3 














  108 





  106 





























   44 
















   46 



  107 

    2 

    2 

    1 





    2 




    1 




    1 




    2 




    2 




    2 
    2 





    2 
    1 




    1 



    1 




    1 





    1 
    1 









    2 



    1 


    1 



    1 


    1 


    3 
    1 
    4 


    2 



    1 




    2 



    1 









    2 





    2 




    1 






   11 



    3 
    2 
















   10 

    1 


    9 



    3 

    9 



    9 

    8 

    1 





    1 







    9 




    1 
    9 

    9 







    9 







    1 

   10 



    8 
    7 









    6 




    2 




    1 

    1 




    5 


    7 








    7 



    2 

    1 






    1 




























    5 








    5 
    1 
    1 


    1 






    4 

    1 


    3 


    3 
    2 


    1 







    1 




    1 
















    5 








    2 


    1 





    2 





























   42 











   39 
   39 

   39 



   38 
   38 



   98 








    6 

    4 


    6 
    4 
    4 

    6 
    6 


    4 
    5 
    5 

    5 



    6 
















































































































    6 

    6 

    6 

    2 
    2 


    6 












   13 
   12 
    1 
    1 






   11 










    6 










    7 

    7 



    7 










   14 

   13 
   12 
   12 
   13 




   14 
   12 

    1 
   12 











   12 



































































   10 
   14 










    9 
    2 



    7 
    9 











   19 

    2 



    2 








   17 



   14 



   19 

   19 


























































    5 




    5 









    5 











    4 



















    4 
    2 












    2 

























































    2 


    4 





    2 




    4 
    4 
    1 



    3 








































































    1 















    5 

    5 










    5 























    5 





    5 

    3 










































    5 






    5 
    5 

    4 

    5 





    5 
    5 
















    5 
    5 
    5 


    5 






    5 





    5 


    5 
    2 













    4 







    5 


    4 
    4 



    5 


    4 





    1 


    5 








    5 






    5 



    5 
    2 



    3 
















    8 
    8 


    8 









































































































































    4 

    4 
    4 
    4 

    4 

    4 
    4 
    1 
    2 




    2 

    4 










   25 

   27 

    1 
   27 










    2 
    2 
    2 
    2 

















   20 
   20 

   19 
   20 












    5 









    5 

    5 














































    5 





    5 


















































































    5 

    5 

    5 






























    5 

    5 



    4 
    2 
    4 


    5 

    5 

















    8 

    8 
    8 



    8 
    8 


    4 

    4 


















































































































































    5 
















  403 
  400 




   17 








   16 


































    6 

    6 

    6 
    6 













































































    3 
    3 





    2 
    2 





























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
/*        $NetBSD: tty.c,v 1.301 2022/04/07 21:46:51 riastradh Exp $        */

/*-
 * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1982, 1986, 1990, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)tty.c        8.13 (Berkeley) 1/9/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tty.c,v 1.301 2022/04/07 21:46:51 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
#endif

#define TTY_ALLOW_PRIVATE

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/ioctl.h>
#include <sys/proc.h>
#define        TTYDEFCHARS
#include <sys/tty.h>
#undef        TTYDEFCHARS
#include <sys/file.h>
#include <sys/conf.h>
#include <sys/cpu.h>
#include <sys/dkstat.h>
#include <sys/uio.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/syslog.h>
#include <sys/kmem.h>
#include <sys/signalvar.h>
#include <sys/resourcevar.h>
#include <sys/poll.h>
#include <sys/kprintf.h>
#include <sys/namei.h>
#include <sys/sysctl.h>
#include <sys/kauth.h>
#include <sys/intr.h>
#include <sys/ioctl_compat.h>
#include <sys/module.h>
#include <sys/bitops.h>
#include <sys/compat_stub.h>

#ifdef COMPAT_60
#include <compat/sys/ttycom.h>
#endif /* COMPAT_60 */

static int        ttnread(struct tty *);
static void        ttyblock(struct tty *);
static void        ttyecho(int, struct tty *);
static void        ttyrubo(struct tty *, int);
static void        ttyprintf_nolock(struct tty *, const char *fmt, ...)
    __printflike(2, 3);
static int        proc_compare_wrapper(struct proc *, struct proc *);
static void        ttysigintr(void *);

/* Symbolic sleep message strings. */
const char        ttclos[] = "ttycls";
const char        ttopen[] = "ttyopn";
const char        ttybg[] = "ttybg";
const char        ttyin[] = "ttyin";
const char        ttyout[] = "ttyout";

/*
 * Used to determine whether we still have a connection.  This is true in
 * one of 3 cases:
 * 1) We have carrier.
 * 2) It's a locally attached terminal, and we are therefore ignoring carrier.
 * 3) We're using a flow control mechanism that overloads the carrier signal.
 */
#define        CONNECTED(tp)        (ISSET(tp->t_state, TS_CARR_ON) ||        \
                         ISSET(tp->t_cflag, CLOCAL | MDMBUF))

/*
 * Table with character classes and parity. The 8th bit indicates parity,
 * the 7th bit indicates the character is an alphameric or underscore (for
 * ALTWERASE), and the low 6 bits indicate delay type.  If the low 6 bits
 * are 0 then the character needs no special processing on output; classes
 * other than 0 might be translated or (not currently) require delays.
 */
#define        E        0x00        /* Even parity. */
#define        O        0x80        /* Odd parity. */
#define        PARITY(c)        (char_type[c] & O)

#define        ALPHA        0x40        /* Alpha or underscore. */
#define        ISALPHA(c)        (char_type[(c) & TTY_CHARMASK] & ALPHA)

#define        CCLASSMASK        0x3f
#define        CCLASS(c)        (char_type[c] & CCLASSMASK)

#define        BS        BACKSPACE
#define        CC        CONTROL
#define        CR        RETURN
#define        NA        ORDINARY | ALPHA
#define        NL        NEWLINE
#define        NO        ORDINARY
#define        TB        TAB
#define        VT        VTAB

unsigned char const char_type[] = {
        E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC,        /* nul - bel */
        O|BS, E|TB, E|NL, O|CC, E|VT, O|CR, O|CC, E|CC,        /* bs - si */
        O|CC, E|CC, E|CC, O|CC, E|CC, O|CC, O|CC, E|CC,        /* dle - etb */
        E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC,        /* can - us */
        O|NO, E|NO, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO,        /* sp - ' */
        E|NO, O|NO, O|NO, E|NO, O|NO, E|NO, E|NO, O|NO,        /* ( - / */
        E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA,        /* 0 - 7 */
        O|NA, E|NA, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO,        /* 8 - ? */
        O|NO, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA,        /* @ - G */
        E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA,        /* H - O */
        E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA,        /* P - W */
        O|NA, E|NA, E|NA, O|NO, E|NO, O|NO, O|NO, O|NA,        /* X - _ */
        E|NO, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA,        /* ` - g */
        O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA,        /* h - o */
        O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA,        /* p - w */
        E|NA, O|NA, O|NA, E|NO, O|NO, E|NO, E|NO, O|CC,        /* x - del */
        /*
         * Meta chars; should be settable per character set;
         * for now, treat them all as normal characters.
         */
        NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
        NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
        NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
        NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
        NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
        NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
        NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
        NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
        NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
        NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
        NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
        NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
        NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
        NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
        NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
        NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
};
#undef        BS
#undef        CC
#undef        CR
#undef        NA
#undef        NL
#undef        NO
#undef        TB
#undef        VT

static struct ttylist_head tty_sigqueue = TAILQ_HEAD_INITIALIZER(tty_sigqueue);
static void *tty_sigsih;

struct ttylist_head ttylist = TAILQ_HEAD_INITIALIZER(ttylist);
int tty_count;
kmutex_t tty_lock;

struct ptm_pty *ptm = NULL;

uint64_t tk_cancc;
uint64_t tk_nin;
uint64_t tk_nout;
uint64_t tk_rawcc;

static kauth_listener_t tty_listener;

#define        TTY_MINQSIZE        0x00400
#define        TTY_MAXQSIZE        0x10000
int tty_qsize = TTY_MINQSIZE;

static int
tty_get_qsize(int *qsize, int newsize)
{
        if (newsize <= 0)
                return EINVAL;

        newsize = 1 << ilog2(newsize);        /* Make it a power of two */

        if (newsize < TTY_MINQSIZE || newsize > TTY_MAXQSIZE)
                return EINVAL;

        *qsize = newsize;
        return 0;
}

static int
tty_set_qsize(struct tty *tp, int newsize)
{
        struct clist rawq, canq, outq;
        struct clist orawq, ocanq, ooutq;

        clalloc(&rawq, newsize, 1);
        clalloc(&canq, newsize, 1);
        clalloc(&outq, newsize, 0);

        mutex_spin_enter(&tty_lock);

        if (tp->t_outq.c_cc != 0) {
                mutex_spin_exit(&tty_lock);
                clfree(&rawq);
                clfree(&canq);
                clfree(&outq);
                return EBUSY;
        }

        orawq = tp->t_rawq;
        ocanq = tp->t_canq;
        ooutq = tp->t_outq;

        tp->t_qsize = newsize;
        tp->t_rawq = rawq;
        tp->t_canq = canq;
        tp->t_outq = outq;

        ttsetwater(tp);

        mutex_spin_exit(&tty_lock);

        clfree(&orawq);
        clfree(&ocanq);
        clfree(&ooutq);

        return 0;
}

static int
sysctl_kern_tty_qsize(SYSCTLFN_ARGS)
{
        int newsize;
        int error;
        struct sysctlnode node;
        node = *rnode;
        node.sysctl_data = &newsize;

        newsize = tty_qsize;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;


        return tty_get_qsize(&tty_qsize, newsize);
}

static void
sysctl_kern_tty_setup(void)
{
        const struct sysctlnode *rnode, *cnode;

        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "tkstat",
                       SYSCTL_DESCR("Number of characters sent and received "
                                    "on ttys"),
                       NULL, 0, NULL, 0,
                       CTL_KERN, KERN_TKSTAT, CTL_EOL);

        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_QUAD, "nin",
                       SYSCTL_DESCR("Total number of tty input characters"),
                       NULL, 0, &tk_nin, 0,
                       CTL_KERN, KERN_TKSTAT, KERN_TKSTAT_NIN, CTL_EOL);
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_QUAD, "nout",
                       SYSCTL_DESCR("Total number of tty output characters"),
                       NULL, 0, &tk_nout, 0,
                       CTL_KERN, KERN_TKSTAT, KERN_TKSTAT_NOUT, CTL_EOL);
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_QUAD, "cancc",
                       SYSCTL_DESCR("Number of canonical tty input characters"),
                       NULL, 0, &tk_cancc, 0,
                       CTL_KERN, KERN_TKSTAT, KERN_TKSTAT_CANCC, CTL_EOL);
        sysctl_createv(NULL, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_QUAD, "rawcc",
                       SYSCTL_DESCR("Number of raw tty input characters"),
                       NULL, 0, &tk_rawcc, 0,
                       CTL_KERN, KERN_TKSTAT, KERN_TKSTAT_RAWCC, CTL_EOL);

        sysctl_createv(NULL, 0, NULL, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "tty", NULL,
                       NULL, 0, NULL, 0,
                       CTL_KERN, CTL_CREATE, CTL_EOL);
        sysctl_createv(NULL, 0, &rnode, &cnode,
                       CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                       CTLTYPE_INT, "qsize",
                       SYSCTL_DESCR("TTY input and output queue size"),
                       sysctl_kern_tty_qsize, 0, &tty_qsize, 0,
                       CTL_CREATE, CTL_EOL);
}

int
ttyopen(struct tty *tp, int dialout, int nonblock)
{
        int        error;

        error = 0;

        mutex_spin_enter(&tty_lock);

        if (dialout) {
                /*
                 * If the device is already open for non-dialout, fail.
                 * Otherwise, set TS_DIALOUT to block any pending non-dialout
                 * opens.
                 */
                if (ISSET(tp->t_state, TS_ISOPEN) &&
                    !ISSET(tp->t_state, TS_DIALOUT)) {
                        error = EBUSY;
                        goto out;
                }
                SET(tp->t_state, TS_DIALOUT);
        } else {
                if (!nonblock) {
                        /*
                         * Wait for carrier.  Also wait for any dialout
                         * processes to close the tty first.
                         */
                        while (ISSET(tp->t_state, TS_DIALOUT) ||
                               !CONNECTED(tp)) {
                                tp->t_wopen++;
                                error = ttysleep(tp, &tp->t_rawcv, true, 0);
                                tp->t_wopen--;
                                if (error)
                                        goto out;
                        }
                } else {
                        /*
                         * Don't allow a non-blocking non-dialout open if the
                         * device is already open for dialout.
                         */
                        if (ISSET(tp->t_state, TS_DIALOUT)) {
                                error = EBUSY;
                                goto out;
                        }
                }
        }

out:
        mutex_spin_exit(&tty_lock);
        return (error);
}

/*
 * Initial open of tty, or (re)entry to standard tty line discipline.
 */
int
ttylopen(dev_t device, struct tty *tp)
{

        mutex_spin_enter(&tty_lock);
        tp->t_dev = device;
        if (!ISSET(tp->t_state, TS_ISOPEN)) {
                SET(tp->t_state, TS_ISOPEN);
                memset(&tp->t_winsize, 0, sizeof(tp->t_winsize));
                tp->t_flags = 0;
        }
        mutex_spin_exit(&tty_lock);
        if (tp->t_qsize != tty_qsize)
                tty_set_qsize(tp, tty_qsize);
        return (0);
}

/*
 * Interrupt any pending I/O and make it fail.  Used before close to
 * interrupt pending open/read/write/&c. and make it fail promptly.
 */
void
ttycancel(struct tty *tp)
{

        mutex_spin_enter(&tty_lock);
        tp->t_state |= TS_CANCEL;
        cv_broadcast(&tp->t_outcv);
        cv_broadcast(&tp->t_rawcv);
        mutex_spin_exit(&tty_lock);
}

/*
 * Handle close() on a tty line: flush and set to initial state,
 * bumping generation number so that pending read/write calls
 * can detect recycling of the tty.
 */
int
ttyclose(struct tty *tp)
{
        extern struct tty *constty;        /* Temporary virtual console. */
        struct session *sess;

        mutex_spin_enter(&tty_lock);

        if (constty == tp)
                constty = NULL;

        ttyflush(tp, FREAD | FWRITE);

        tp->t_gen++;
        tp->t_pgrp = NULL;
        tp->t_state = 0;
        sess = tp->t_session;
        tp->t_session = NULL;

        mutex_spin_exit(&tty_lock);

        if (sess != NULL) {
                mutex_enter(&proc_lock);
                /* Releases proc_lock. */
                proc_sessrele(sess);
        }
        return (0);
}

#define        FLUSHQ(q) {                                                        \
        if ((q)->c_cc)                                                        \
                ndflush(q, (q)->c_cc);                                        \
}

/*
 * This macro is used in canonical mode input processing, where a read
 * request shall not return unless a 'line delimiter' ('\n') or 'break'
 * (EOF, EOL, EOL2) character (or a signal) has been received. As EOL2
 * is an extension to the POSIX.1 defined set of special characters,
 * recognize it only if IEXTEN is set in the set of local flags.
 */
#define        TTBREAKC(c, lflg)                                                \
        ((c) == '\n' || (((c) == cc[VEOF] || (c) == cc[VEOL] ||                \
        ((c) == cc[VEOL2] && ISSET(lflg, IEXTEN))) && (c) != _POSIX_VDISABLE))



/*
 * ttyinput() helper.
 * Call with the tty lock held.
 */
/* XXX static */ int
ttyinput_wlock(int c, struct tty *tp)
{
        int        iflag, lflag, i, error;
        u_char        *cc;

        KASSERT(mutex_owned(&tty_lock));

        /*
         * If input is pending take it first.
         */
        lflag = tp->t_lflag;
        if (ISSET(lflag, PENDIN))
                ttypend(tp);
        /*
         * Gather stats.
         */
        if (ISSET(lflag, ICANON)) {
                ++tk_cancc;
                ++tp->t_cancc;
        } else {
                ++tk_rawcc;
                ++tp->t_rawcc;
        }
        ++tk_nin;

        cc = tp->t_cc;

        /*
         * Handle exceptional conditions (break, parity, framing).
         */
        iflag = tp->t_iflag;
        if ((error = (ISSET(c, TTY_ERRORMASK))) != 0) {
                CLR(c, TTY_ERRORMASK);
                if (ISSET(error, TTY_FE) && c == 0) {                /* Break. */
                        if (ISSET(iflag, IGNBRK))
                                return (0);
                        else if (ISSET(iflag, BRKINT)) {
                                ttyflush(tp, FREAD | FWRITE);
                                ttysig(tp, TTYSIG_PG1, SIGINT);
                                return (0);
                        } else if (ISSET(iflag, PARMRK))
                                goto parmrk;
                } else if ((ISSET(error, TTY_PE) && ISSET(iflag, INPCK)) ||
                    ISSET(error, TTY_FE)) {
                        if (ISSET(iflag, IGNPAR))
                                return (0);
                        else if (ISSET(iflag, PARMRK)) {
 parmrk:                        (void)putc(0377 | TTY_QUOTE, &tp->t_rawq);
                                (void)putc(0    | TTY_QUOTE, &tp->t_rawq);
                                (void)putc(c    | TTY_QUOTE, &tp->t_rawq);
                                return (0);
                        } else
                                c = 0;
                }
        } else if (c == 0377 &&
            ISSET(iflag, ISTRIP|IGNPAR|INPCK|PARMRK) == (INPCK|PARMRK)) {
                /* "Escape" a valid character of '\377'. */
                (void)putc(0377 | TTY_QUOTE, &tp->t_rawq);
                (void)putc(0377 | TTY_QUOTE, &tp->t_rawq);
                goto endcase;
        }

        /*
         * In tandem mode, check high water mark.
         */
        if (ISSET(iflag, IXOFF) || ISSET(tp->t_cflag, CHWFLOW))
                ttyblock(tp);
        if (!ISSET(tp->t_state, TS_TYPEN) && ISSET(iflag, ISTRIP))
                CLR(c, 0x80);
        if (!ISSET(lflag, EXTPROC)) {
                /*
                 * Check for literal nexting very first
                 */
                if (ISSET(tp->t_state, TS_LNCH)) {
                        SET(c, TTY_QUOTE);
                        CLR(tp->t_state, TS_LNCH);
                }
                /*
                 * Scan for special characters.  This code
                 * is really just a big case statement with
                 * non-constant cases.  The bottom of the
                 * case statement is labeled ``endcase'', so goto
                 * it after a case match, or similar.
                 */

                /*
                 * Control chars which aren't controlled
                 * by ICANON, ISIG, or IXON.
                 */
                if (ISSET(lflag, IEXTEN)) {
                        if (CCEQ(cc[VLNEXT], c)) {
                                if (ISSET(lflag, ECHO)) {
                                        if (ISSET(lflag, ECHOE)) {
                                                (void)ttyoutput('^', tp);
                                                (void)ttyoutput('\b', tp);
                                        } else
                                                ttyecho(c, tp);
                                }
                                SET(tp->t_state, TS_LNCH);
                                goto endcase;
                        }
                        if (CCEQ(cc[VDISCARD], c)) {
                                if (ISSET(lflag, FLUSHO))
                                        CLR(tp->t_lflag, FLUSHO);
                                else {
                                        ttyflush(tp, FWRITE);
                                        ttyecho(c, tp);
                                        if (tp->t_rawq.c_cc + tp->t_canq.c_cc)
                                                ttyretype(tp);
                                        SET(tp->t_lflag, FLUSHO);
                                }
                                goto startoutput;
                        }
                }
                /*
                 * Signals.
                 */
                if (ISSET(lflag, ISIG)) {
                        if (CCEQ(cc[VINTR], c) || CCEQ(cc[VQUIT], c)) {
                                if (!ISSET(lflag, NOFLSH))
                                        ttyflush(tp, FREAD | FWRITE);
                                ttyecho(c, tp);
                                ttysig(tp, TTYSIG_PG1, CCEQ(cc[VINTR], c) ?
                                    SIGINT : SIGQUIT);
                                goto endcase;
                        }
                        if (CCEQ(cc[VSUSP], c)) {
                                if (!ISSET(lflag, NOFLSH))
                                        ttyflush(tp, FREAD);
                                ttyecho(c, tp);
                                ttysig(tp, TTYSIG_PG1, SIGTSTP);
                                goto endcase;
                        }
                }
                /*
                 * Handle start/stop characters.
                 */
                if (ISSET(iflag, IXON)) {
                        if (CCEQ(cc[VSTOP], c)) {
                                if (!ISSET(tp->t_state, TS_TTSTOP)) {
                                        SET(tp->t_state, TS_TTSTOP);
                                        cdev_stop(tp, 0);
                                        return (0);
                                }
                                if (!CCEQ(cc[VSTART], c))
                                        return (0);
                                /*
                                 * if VSTART == VSTOP then toggle
                                 */
                                goto endcase;
                        }
                        if (CCEQ(cc[VSTART], c))
                                goto restartoutput;
                }
                /*
                 * IGNCR, ICRNL, & INLCR
                 */
                if (c == '\r') {
                        if (ISSET(iflag, IGNCR))
                                goto endcase;
                        else if (ISSET(iflag, ICRNL))
                                c = '\n';
                } else if (c == '\n' && ISSET(iflag, INLCR))
                        c = '\r';
        }
        if (!ISSET(lflag, EXTPROC) && ISSET(lflag, ICANON)) {
                /*
                 * From here on down canonical mode character
                 * processing takes place.
                 */
                /*
                 * erase (^H / ^?)
                 */
                if (CCEQ(cc[VERASE], c)) {
                        if (tp->t_rawq.c_cc)
                                ttyrub(unputc(&tp->t_rawq), tp);
                        goto endcase;
                }
                /*
                 * kill (^U)
                 */
                if (CCEQ(cc[VKILL], c)) {
                        if (ISSET(lflag, ECHOKE) &&
                            tp->t_rawq.c_cc == tp->t_rocount &&
                            !ISSET(lflag, ECHOPRT))
                                while (tp->t_rawq.c_cc)
                                        ttyrub(unputc(&tp->t_rawq), tp);
                        else {
                                ttyecho(c, tp);
                                if (ISSET(lflag, ECHOK) ||
                                    ISSET(lflag, ECHOKE))
                                        ttyecho('\n', tp);
                                FLUSHQ(&tp->t_rawq);
                                tp->t_rocount = 0;
                        }
                        CLR(tp->t_state, TS_LOCAL);
                        goto endcase;
                }
                /*
                 * Extensions to the POSIX.1 GTI set of functions.
                 */
                if (ISSET(lflag, IEXTEN)) {
                        /*
                         * word erase (^W)
                         */
                        if (CCEQ(cc[VWERASE], c)) {
                                int alt = ISSET(lflag, ALTWERASE);
                                int ctype;

                                /*
                                 * erase whitespace
                                 */
                                while ((c = unputc(&tp->t_rawq)) == ' ' ||
                                    c == '\t')
                                        ttyrub(c, tp);
                                if (c == -1)
                                        goto endcase;
                                /*
                                 * erase last char of word and remember the
                                 * next chars type (for ALTWERASE)
                                 */
                                ttyrub(c, tp);
                                c = unputc(&tp->t_rawq);
                                if (c == -1)
                                        goto endcase;
                                if (c == ' ' || c == '\t') {
                                        (void)putc(c, &tp->t_rawq);
                                        goto endcase;
                                }
                                ctype = ISALPHA(c);
                                /*
                                 * erase rest of word
                                 */
                                do {
                                        ttyrub(c, tp);
                                        c = unputc(&tp->t_rawq);
                                        if (c == -1)
                                                goto endcase;
                                } while (c != ' ' && c != '\t' &&
                                    (alt == 0 || ISALPHA(c) == ctype));
                                (void)putc(c, &tp->t_rawq);
                                goto endcase;
                        }
                        /*
                         * reprint line (^R)
                         */
                        if (CCEQ(cc[VREPRINT], c)) {
                                ttyretype(tp);
                                goto endcase;
                        }
                        /*
                         * ^T - kernel info and generate SIGINFO
                         */
                        if (CCEQ(cc[VSTATUS], c)) {
                                ttysig(tp, TTYSIG_PG1, SIGINFO);
                                goto endcase;
                        }
                }
        }
        /*
         * Check for input buffer overflow
         */
        if (tp->t_rawq.c_cc + tp->t_canq.c_cc >= TTYHOG) {
                if (ISSET(iflag, IMAXBEL)) {
                        if (tp->t_outq.c_cc < tp->t_hiwat)
                                (void)ttyoutput(CTRL('g'), tp);
                } else
                        ttyflush(tp, FREAD | FWRITE);
                goto endcase;
        }
        /*
         * Put data char in q for user and
         * wakeup on seeing a line delimiter.
         */
        if (putc(c, &tp->t_rawq) >= 0) {
                if (!ISSET(lflag, ICANON)) {
                        ttwakeup(tp);
                        ttyecho(c, tp);
                        goto endcase;
                }
                if (TTBREAKC(c, lflag)) {
                        tp->t_rocount = 0;
                        catq(&tp->t_rawq, &tp->t_canq);
                        ttwakeup(tp);
                } else if (tp->t_rocount++ == 0)
                        tp->t_rocol = tp->t_column;
                if (ISSET(tp->t_state, TS_ERASE)) {
                        /*
                         * end of prterase \.../
                         */
                        CLR(tp->t_state, TS_ERASE);
                        (void)ttyoutput('/', tp);
                }
                i = tp->t_column;
                ttyecho(c, tp);
                if (CCEQ(cc[VEOF], c) && ISSET(lflag, ECHO)) {
                        /*
                         * Place the cursor over the '^' of the ^D.
                         */
                        i = uimin(2, tp->t_column - i);
                        while (i > 0) {
                                (void)ttyoutput('\b', tp);
                                i--;
                        }
                }
        }
 endcase:
        /*
         * IXANY means allow any character to restart output.
         */
        if (ISSET(tp->t_state, TS_TTSTOP) &&
            !ISSET(iflag, IXANY) && cc[VSTART] != cc[VSTOP]) {
                return (0);
        }
 restartoutput:
        CLR(tp->t_lflag, FLUSHO);
        CLR(tp->t_state, TS_TTSTOP);
 startoutput:
        return (ttstart(tp));
}

/*
 * Process input of a single character received on a tty.
 *
 * XXX - this is a hack, all drivers must changed to acquire the
 *         lock before calling linesw->l_rint()
 */
int
ttyinput(int c, struct tty *tp)
{
        int error;

        /*
         * Unless the receiver is enabled, drop incoming data.
         */
        if (!ISSET(tp->t_cflag, CREAD))
                return (0);

        mutex_spin_enter(&tty_lock);
        error = ttyinput_wlock(c, tp);
        mutex_spin_exit(&tty_lock);

        return (error);
}

/*
 * Output a single character on a tty, doing output processing
 * as needed (expanding tabs, newline processing, etc.).
 * Returns < 0 if succeeds, otherwise returns char to resend.
 * Must be recursive.
 *
 * Call with tty lock held.
 */
int
ttyoutput(int c, struct tty *tp)
{
        long        oflag;
        int        col, notout;

        KASSERT(mutex_owned(&tty_lock));

        oflag = tp->t_oflag;
        if (!ISSET(oflag, OPOST)) {
                tk_nout++;
                tp->t_outcc++;
                if (!ISSET(tp->t_lflag, FLUSHO) && putc(c, &tp->t_outq))
                        return (c);
                return (-1);
        }
        /*
         * Do tab expansion if OXTABS is set.  Special case if we do external
         * processing, we don't do the tab expansion because we'll probably
         * get it wrong.  If tab expansion needs to be done, let it happen
         * externally.
         */
        CLR(c, ~TTY_CHARMASK);
        if (c == '\t' &&
            ISSET(oflag, OXTABS) && !ISSET(tp->t_lflag, EXTPROC)) {
                c = 8 - (tp->t_column & 7);
                if (ISSET(tp->t_lflag, FLUSHO)) {
                        notout = 0;
                } else {
                        notout = b_to_q("        ", c, &tp->t_outq);
                        c -= notout;
                        tk_nout += c;
                        tp->t_outcc += c;
                }
                tp->t_column += c;
                return (notout ? '\t' : -1);
        }
        if (c == CEOT && ISSET(oflag, ONOEOT))
                return (-1);

        /*
         * Newline translation: if ONLCR is set,
         * translate newline into "\r\n".
         */
        if (c == '\n' && ISSET(tp->t_oflag, ONLCR)) {
                tk_nout++;
                tp->t_outcc++;
                if (!ISSET(tp->t_lflag, FLUSHO) && putc('\r', &tp->t_outq))
                        return (c);
        }
        /* If OCRNL is set, translate "\r" into "\n". */
        else if (c == '\r' && ISSET(tp->t_oflag, OCRNL))
                c = '\n';
        /* If ONOCR is set, don't transmit CRs when on column 0. */
        else if (c == '\r' && ISSET(tp->t_oflag, ONOCR) && tp->t_column == 0)
                return (-1);

        tk_nout++;
        tp->t_outcc++;
        if (!ISSET(tp->t_lflag, FLUSHO) && putc(c, &tp->t_outq))
                return (c);

        col = tp->t_column;
        switch (CCLASS(c)) {
        case BACKSPACE:
                if (col > 0)
                        --col;
                break;
        case CONTROL:
                break;
        case NEWLINE:
                if (ISSET(tp->t_oflag, ONLCR | ONLRET))
                        col = 0;
                break;
        case RETURN:
                col = 0;
                break;
        case ORDINARY:
                ++col;
                break;
        case TAB:
                col = (col + 8) & ~7;
                break;
        }
        tp->t_column = col;
        return (-1);
}

/*
 * Ioctls for all tty devices.  Called after line-discipline specific ioctl
 * has been called to do discipline-specific functions and/or reject any
 * of these ioctl commands.
 */
/* ARGSUSED */
int
ttioctl(struct tty *tp, u_long cmd, void *data, int flag, struct lwp *l)
{
        extern struct tty *constty;        /* Temporary virtual console. */
        struct proc *p;
        struct linesw        *lp;
        int                s, error;
        struct pathbuf *pb;
        struct nameidata nd;
        char                infobuf[200];

        KASSERT(l != NULL);
        p = l->l_proc;

        /* If the ioctl involves modification, hang if in the background. */
        switch (cmd) {
        case  TIOCFLUSH:
        case  TIOCDRAIN:
        case  TIOCSBRK:
        case  TIOCCBRK:
        case  TIOCSTART:
        case  TIOCSETA:
        case  TIOCSETD:
        case  TIOCSLINED:
        case  TIOCSETAF:
        case  TIOCSETAW:
#ifdef notdef
        case  TIOCSPGRP:
        case  FIOSETOWN:
#endif
        case  TIOCSTAT:
        case  TIOCSTI:
        case  TIOCSWINSZ:
        case  TIOCSQSIZE:
        case  TIOCLBIC:
        case  TIOCLBIS:
        case  TIOCLSET:
        case  TIOCSETC:
        case OTIOCSETD:
        case  TIOCSETN:
        case  TIOCSETP:
        case  TIOCSLTC:
                mutex_spin_enter(&tty_lock);
                while (isbackground(curproc, tp) &&
                    p->p_pgrp->pg_jobc && (p->p_lflag & PL_PPWAIT) == 0 &&
                    !sigismasked(l, SIGTTOU)) {
                        mutex_spin_exit(&tty_lock);

                        mutex_enter(&proc_lock);
                        pgsignal(p->p_pgrp, SIGTTOU, 1);
                        mutex_exit(&proc_lock);
                        
                        mutex_spin_enter(&tty_lock);
                        error = ttypause(tp, hz);
                        if (error) {
                                mutex_spin_exit(&tty_lock);
                                return (error);
                        }
                }
                mutex_spin_exit(&tty_lock);
                break;
        }

        switch (cmd) {                        /* Process the ioctl. */
        case FIOASYNC:                        /* set/clear async i/o */
                mutex_spin_enter(&tty_lock);
                if (*(int *)data)
                        SET(tp->t_state, TS_ASYNC);
                else
                        CLR(tp->t_state, TS_ASYNC);
                mutex_spin_exit(&tty_lock);
                break;
        case FIONBIO:                        /* set/clear non-blocking i/o */
                break;                        /* XXX: delete. */
        case FIONREAD:                        /* get # bytes to read */
                mutex_spin_enter(&tty_lock);
                *(int *)data = ttnread(tp);
                mutex_spin_exit(&tty_lock);
                break;
        case FIONWRITE:                        /* get # bytes to written & unsent */
                mutex_spin_enter(&tty_lock);
                *(int *)data = tp->t_outq.c_cc;
                mutex_spin_exit(&tty_lock);
                break;
        case FIONSPACE:                        /* get # bytes to written & unsent */
                mutex_spin_enter(&tty_lock);
                *(int *)data = tp->t_outq.c_cn - tp->t_outq.c_cc;
                mutex_spin_exit(&tty_lock);
                break;
        case TIOCEXCL:                        /* set exclusive use of tty */
                mutex_spin_enter(&tty_lock);
                SET(tp->t_state, TS_XCLUDE);
                mutex_spin_exit(&tty_lock);
                break;
        case TIOCFLUSH: {                /* flush buffers */
                int flags = *(int *)data;

                if (flags == 0)
                        flags = FREAD | FWRITE;
                else
                        flags &= FREAD | FWRITE;
                mutex_spin_enter(&tty_lock);
                ttyflush(tp, flags);
                mutex_spin_exit(&tty_lock);
                break;
        }
        case TIOCCONS:                        /* become virtual console */
                if (*(int *)data) {
                        if (constty && constty != tp &&
                            ISSET(constty->t_state, TS_CARR_ON | TS_ISOPEN) ==
                            (TS_CARR_ON | TS_ISOPEN))
                                return EBUSY;

                        pb = pathbuf_create("/dev/console");
                        if (pb == NULL) {
                                return ENOMEM;
                        }
                        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, pb);
                        if ((error = namei(&nd)) != 0) {
                                pathbuf_destroy(pb);
                                return error;
                        }
                        error = VOP_ACCESS(nd.ni_vp, VREAD, l->l_cred);
                        vput(nd.ni_vp);
                        pathbuf_destroy(pb);
                        if (error)
                                return error;

                        constty = tp;
                } else if (tp == constty)
                        constty = NULL;
                break;
        case TIOCDRAIN:                        /* wait till output drained */
                if ((error = ttywait(tp)) != 0)
                        return (error);
                break;
        case TIOCGETA: {                /* get termios struct */
                struct termios *t = (struct termios *)data;

                memcpy(t, &tp->t_termios, sizeof(struct termios));
                break;
        }
        case TIOCGETD:                        /* get line discipline (old) */
                *(int *)data = tp->t_linesw->l_no;
                break;
        case TIOCGLINED:                /* get line discipline (new) */
                (void)strncpy((char *)data, tp->t_linesw->l_name,
                    TTLINEDNAMELEN - 1);
                break;
        case TIOCGWINSZ:                /* get window size */
                *(struct winsize *)data = tp->t_winsize;
                break;
        case TIOCGQSIZE:
                *(int *)data = tp->t_qsize;
                break;
        case FIOGETOWN:
                mutex_enter(&proc_lock);
                if (tp->t_session != NULL && !isctty(p, tp)) {
                        mutex_exit(&proc_lock);
                        return (ENOTTY);
                }
                *(int *)data = tp->t_pgrp ? -tp->t_pgrp->pg_id : 0;
                mutex_exit(&proc_lock);
                break;
        case TIOCGPGRP:                        /* get pgrp of tty */
                mutex_enter(&proc_lock);
                if (!isctty(p, tp)) {
                        mutex_exit(&proc_lock);
                        return (ENOTTY);
                }
                *(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
                mutex_exit(&proc_lock);
                break;
        case TIOCGSID:                        /* get sid of tty */
                mutex_enter(&proc_lock);
                if (!isctty(p, tp)) {
                        mutex_exit(&proc_lock);
                        return (ENOTTY);
                }
                *(int *)data = tp->t_session->s_sid;
                mutex_exit(&proc_lock);
                break;
#ifdef TIOCHPCL
        case TIOCHPCL:                        /* hang up on last close */
                mutex_spin_enter(&tty_lock);
                SET(tp->t_cflag, HUPCL);
                mutex_spin_exit(&tty_lock);
                break;
#endif
        case TIOCNXCL:                        /* reset exclusive use of tty */
                mutex_spin_enter(&tty_lock);
                CLR(tp->t_state, TS_XCLUDE);
                mutex_spin_exit(&tty_lock);
                break;
        case TIOCOUTQ:                        /* output queue size */
                *(int *)data = tp->t_outq.c_cc;
                break;
        case TIOCSETA:                        /* set termios struct */
        case TIOCSETAW:                        /* drain output, set */
        case TIOCSETAF: {                /* drn out, fls in, set */
                struct termios *t = (struct termios *)data;

                if (cmd == TIOCSETAW || cmd == TIOCSETAF) {
                        if ((error = ttywait(tp)) != 0)
                                return (error);

                        if (cmd == TIOCSETAF) {
                                mutex_spin_enter(&tty_lock);
                                ttyflush(tp, FREAD);
                                mutex_spin_exit(&tty_lock);
                        }
                }

                s = spltty();
                /*
                 * XXXSMP - some drivers call back on us from t_param(), so
                 *            don't take the tty spin lock here.
                 *            require t_param() to unlock upon callback?
                 */
                /* wanted here: mutex_spin_enter(&tty_lock); */
                if (!ISSET(t->c_cflag, CIGNORE)) {
                        /*
                         * Set device hardware.
                         */
                        if (tp->t_param && (error = (*tp->t_param)(tp, t))) {
                                /* wanted here: mutex_spin_exit(&tty_lock); */
                                splx(s);
                                return (error);
                        } else {
                                tp->t_cflag = t->c_cflag;
                                tp->t_ispeed = t->c_ispeed;
                                tp->t_ospeed = t->c_ospeed;
                                if (t->c_ospeed == 0)
                                        ttysig(tp, TTYSIG_LEADER, SIGHUP);
                        }
                        ttsetwater(tp);
                }

                /* delayed lock acquiring */
                mutex_spin_enter(&tty_lock);
                if (cmd != TIOCSETAF) {
                        if (ISSET(t->c_lflag, ICANON) !=
                            ISSET(tp->t_lflag, ICANON)) {
                                if (ISSET(t->c_lflag, ICANON)) {
                                        SET(tp->t_lflag, PENDIN);
                                        ttwakeup(tp);
                                } else {
                                        struct clist tq;

                                        catq(&tp->t_rawq, &tp->t_canq);
                                        tq = tp->t_rawq;
                                        tp->t_rawq = tp->t_canq;
                                        tp->t_canq = tq;
                                        CLR(tp->t_lflag, PENDIN);
                                }
                        }
                }
                tp->t_iflag = t->c_iflag;
                tp->t_oflag = t->c_oflag;
                /*
                 * Make the EXTPROC bit read only.
                 */
                if (ISSET(tp->t_lflag, EXTPROC))
                        SET(t->c_lflag, EXTPROC);
                else
                        CLR(t->c_lflag, EXTPROC);
                tp->t_lflag = t->c_lflag | ISSET(tp->t_lflag, PENDIN);
                memcpy(tp->t_cc, t->c_cc, sizeof(t->c_cc));
                mutex_spin_exit(&tty_lock);
                splx(s);
                break;
        }
        case TIOCSETD:                        /* set line discipline (old) */
                lp = ttyldisc_lookup_bynum(*(int *)data);
                goto setldisc;

        case TIOCSLINED: {                /* set line discipline (new) */
                char *name = (char *)data;
                dev_t device;

                /* Null terminate to prevent buffer overflow */
                name[TTLINEDNAMELEN - 1] = '\0';
                lp = ttyldisc_lookup(name);
 setldisc:
                if (lp == NULL)
                        return (ENXIO);

                if (lp != tp->t_linesw) {
                        device = tp->t_dev;
                        s = spltty();
                        (*tp->t_linesw->l_close)(tp, flag);
                        error = (*lp->l_open)(device, tp);
                        if (error) {
                                (void)(*tp->t_linesw->l_open)(device, tp);
                                splx(s);
                                ttyldisc_release(lp);
                                return (error);
                        }
                        ttyldisc_release(tp->t_linesw);
                        tp->t_linesw = lp;
                        splx(s);
                } else {
                        /* Drop extra reference. */
                        ttyldisc_release(lp);
                }
                break;
        }
        case TIOCSTART:                        /* start output, like ^Q */
                mutex_spin_enter(&tty_lock);
                if (ISSET(tp->t_state, TS_TTSTOP) ||
                    ISSET(tp->t_lflag, FLUSHO)) {
                        CLR(tp->t_lflag, FLUSHO);
                        CLR(tp->t_state, TS_TTSTOP);
                        ttstart(tp);
                }
                mutex_spin_exit(&tty_lock);
                break;
        case TIOCSTI:                        /* simulate terminal input */
                if ((error = kauth_authorize_device_tty(l->l_cred,
                    KAUTH_DEVICE_TTY_STI, tp)) != 0) {
                        if (!ISSET(flag, FREAD))
                                return EPERM;
                        if (!isctty(p, tp))
                                return EACCES;
                        if (tp->t_session->s_leader->p_cred != p->p_cred)
                                return error;
                }
                (*tp->t_linesw->l_rint)(*(u_char *)data, tp);
                break;
        case TIOCSTOP:                        /* stop output, like ^S */
        {
                mutex_spin_enter(&tty_lock);
                if (!ISSET(tp->t_state, TS_TTSTOP)) {
                        SET(tp->t_state, TS_TTSTOP);
                        cdev_stop(tp, 0);
                }
                mutex_spin_exit(&tty_lock);
                break;
        }
        case TIOCSCTTY:                        /* become controlling tty */
                mutex_enter(&proc_lock);
                mutex_spin_enter(&tty_lock);

                /* Session ctty vnode pointer set in vnode layer. */
                if (!SESS_LEADER(p) ||
                    ((p->p_session->s_ttyvp || tp->t_session) &&
                    (tp->t_session != p->p_session))) {
                        mutex_spin_exit(&tty_lock);
                        mutex_exit(&proc_lock);
                        return (EPERM);
                }

                /*
                 * `p_session' acquires a reference.
                 * But note that if `t_session' is set at this point,
                 * it must equal `p_session', in which case the session
                 * already has the correct reference count.
                 */
                if (tp->t_session == NULL) {
                        proc_sesshold(p->p_session);
                }
                tp->t_session = p->p_session;
                tp->t_pgrp = p->p_pgrp;
                p->p_session->s_ttyp = tp;
                p->p_lflag |= PL_CONTROLT;
                mutex_spin_exit(&tty_lock);
                mutex_exit(&proc_lock);
                break;
        case FIOSETOWN: {                /* set pgrp of tty */
                pid_t pgid = *(pid_t *)data;
                struct pgrp *pgrp;

                mutex_enter(&proc_lock);
                if (tp->t_session != NULL && !isctty(p, tp)) {
                        mutex_exit(&proc_lock);
                        return (ENOTTY);
                }

                if (pgid < 0) {
                        if (pgid == INT_MIN) {
                                mutex_exit(&proc_lock);
                                return (EINVAL);
                        }
                        pgrp = pgrp_find(-pgid);
                        if (pgrp == NULL) {
                                mutex_exit(&proc_lock);
                                return (EINVAL);
                        }
                } else {
                        struct proc *p1;
                        p1 = proc_find(pgid);
                        if (!p1) {
                                mutex_exit(&proc_lock);
                                return (ESRCH);
                        }
                        pgrp = p1->p_pgrp;
                }

                if (pgrp->pg_session != p->p_session) {
                        mutex_exit(&proc_lock);
                        return (EPERM);
                }
                mutex_spin_enter(&tty_lock);
                tp->t_pgrp = pgrp;
                mutex_spin_exit(&tty_lock);
                mutex_exit(&proc_lock);
                break;
        }
        case TIOCSPGRP: {                /* set pgrp of tty */
                struct pgrp *pgrp;
                pid_t pgid = *(pid_t *)data;

                if (pgid == NO_PGID)
                        return EINVAL;

                mutex_enter(&proc_lock);
                if (!isctty(p, tp)) {
                        mutex_exit(&proc_lock);
                        return (ENOTTY);
                }
                pgrp = pgrp_find(pgid);
                if (pgrp == NULL || pgrp->pg_session != p->p_session) {
                        mutex_exit(&proc_lock);
                        return (EPERM);
                }
                mutex_spin_enter(&tty_lock);
                tp->t_pgrp = pgrp;
                mutex_spin_exit(&tty_lock);
                mutex_exit(&proc_lock);
                break;
        }
        case TIOCSTAT:                        /* get load avg stats */
                mutex_enter(&proc_lock);
                ttygetinfo(tp, 0, infobuf, sizeof(infobuf));
                mutex_exit(&proc_lock);

                mutex_spin_enter(&tty_lock);
                ttyputinfo(tp, infobuf);
                mutex_spin_exit(&tty_lock);
                break;
        case TIOCSWINSZ:                /* set window size */
                mutex_spin_enter(&tty_lock);
                if (memcmp((void *)&tp->t_winsize, data,
                    sizeof(struct winsize))) {
                        tp->t_winsize = *(struct winsize *)data;
                        ttysig(tp, TTYSIG_PG1, SIGWINCH);
                }
                mutex_spin_exit(&tty_lock);
                break;
        case TIOCSQSIZE:
                if ((error = tty_get_qsize(&s, *(int *)data)) == 0 &&
                    s != tp->t_qsize)
                        error = tty_set_qsize(tp, s);
                return error;

        case TIOCSBRK:
        case TIOCCBRK:
        case TIOCSDTR:
        case TIOCCDTR:
        case TIOCSFLAGS:
        case TIOCGFLAGS:
        case TIOCMSET:
        case TIOCMGET:
        case TIOCMBIS:
        case TIOCMBIC:
                /* Handled by the driver layer */
                return EPASSTHROUGH;

        case TIOCEXT:
        case TIOCPTSNAME:
        case TIOCGRANTPT:
        case TIOCPKT:
        case TIOCUCNTL:
        case TIOCREMOTE:
        case TIOCSIG:
                /* for ptys */
                return EPASSTHROUGH;

        default:
                /* Pass through various console ioctls */
                switch (IOCGROUP(cmd)) {
                case 'c':        /* syscons console */
                case 'v':        /* usl console, video - where one letter */
                case 'K':        /* usl console, keyboard - aint enough */
                case 'V':        /* pcvt compat */
                case 'W':        /* wscons console */
                        return EPASSTHROUGH;
                default:
                        break;
                }

                /* We may have to load the compat_60 module for this. */
                (void)module_autoload("compat_60", MODULE_CLASS_EXEC);
                MODULE_HOOK_CALL(tty_ttioctl_60_hook,
                    (tp, cmd, data, flag, l), enosys(), error);
                if (error != EPASSTHROUGH)
                        return error;

                /* We may have to load the compat_43 module for this. */
                (void)module_autoload("compat_43", MODULE_CLASS_EXEC);
                MODULE_HOOK_CALL(tty_ttioctl_43_hook,
                    (tp, cmd, data, flag, l), enosys(), error);
                return error;
        }
        return (0);
}

int
ttpoll(struct tty *tp, int events, struct lwp *l)
{
        int        revents;

        revents = 0;
        mutex_spin_enter(&tty_lock);
        if (events & (POLLIN | POLLRDNORM))
                if (ttnread(tp) > 0)
                        revents |= events & (POLLIN | POLLRDNORM);

        if (events & (POLLOUT | POLLWRNORM))
                if (tp->t_outq.c_cc <= tp->t_lowat)
                        revents |= events & (POLLOUT | POLLWRNORM);

        if (events & POLLHUP)
                if (!CONNECTED(tp))
                        revents |= POLLHUP;

        if (revents == 0) {
                if (events & (POLLIN | POLLHUP | POLLRDNORM))
                        selrecord(l, &tp->t_rsel);

                if (events & (POLLOUT | POLLWRNORM))
                        selrecord(l, &tp->t_wsel);
        }

        mutex_spin_exit(&tty_lock);

        return (revents);
}

static void
filt_ttyrdetach(struct knote *kn)
{
        struct tty        *tp;

        tp = kn->kn_hook;
        mutex_spin_enter(&tty_lock);
        selremove_knote(&tp->t_rsel, kn);
        mutex_spin_exit(&tty_lock);
}

static int
filt_ttyread(struct knote *kn, long hint)
{
        struct tty        *tp;
        int rv;

        tp = kn->kn_hook;
        if ((hint & NOTE_SUBMIT) == 0)
                mutex_spin_enter(&tty_lock);
        kn->kn_data = ttnread(tp);
        rv = kn->kn_data > 0;
        if ((hint & NOTE_SUBMIT) == 0)
                mutex_spin_exit(&tty_lock);
        return rv;
}

static void
filt_ttywdetach(struct knote *kn)
{
        struct tty        *tp;

        tp = kn->kn_hook;
        mutex_spin_enter(&tty_lock);
        selremove_knote(&tp->t_wsel, kn);
        mutex_spin_exit(&tty_lock);
}

static int
filt_ttywrite(struct knote *kn, long hint)
{
        struct tty        *tp;
        int                canwrite;

        tp = kn->kn_hook;
        if ((hint & NOTE_SUBMIT) == 0)
                mutex_spin_enter(&tty_lock);
        kn->kn_data = tp->t_outq.c_cn - tp->t_outq.c_cc;
        canwrite = (tp->t_outq.c_cc <= tp->t_lowat) && CONNECTED(tp);
        if ((hint & NOTE_SUBMIT) == 0)
                mutex_spin_exit(&tty_lock);
        return (canwrite);
}

static const struct filterops ttyread_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_ttyrdetach,
        .f_event = filt_ttyread,
};

static const struct filterops ttywrite_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_ttywdetach,
        .f_event = filt_ttywrite,
};

int
ttykqfilter(dev_t dev, struct knote *kn)
{
        struct tty        *tp;
        struct selinfo        *sip;

        if ((tp = cdev_tty(dev)) == NULL)
                return (ENXIO);

        switch (kn->kn_filter) {
        case EVFILT_READ:
                sip = &tp->t_rsel;
                kn->kn_fop = &ttyread_filtops;
                break;
        case EVFILT_WRITE:
                sip = &tp->t_wsel;
                kn->kn_fop = &ttywrite_filtops;
                break;
        default:
                return EINVAL;
        }

        kn->kn_hook = tp;

        mutex_spin_enter(&tty_lock);
        selrecord_knote(sip, kn);
        mutex_spin_exit(&tty_lock);

        return (0);
}

/*
 * Find the number of chars ready to be read from this tty.
 * Call with the tty lock held.
 */
static int
ttnread(struct tty *tp)
{
        int        nread;

        KASSERT(mutex_owned(&tty_lock));

        if (ISSET(tp->t_lflag, PENDIN))
                ttypend(tp);
        nread = tp->t_canq.c_cc;
        if (!ISSET(tp->t_lflag, ICANON)) {
                nread += tp->t_rawq.c_cc;
                if (nread < tp->t_cc[VMIN] && !tp->t_cc[VTIME])
                        nread = 0;
        }
        return (nread);
}

/*
 * Wait for output to drain, or if this times out, flush it.
 */
static int
ttywait_timo(struct tty *tp, int timo)
{
        int        error;

        error = 0;

        mutex_spin_enter(&tty_lock);
        while ((tp->t_outq.c_cc || ISSET(tp->t_state, TS_BUSY)) &&
            CONNECTED(tp) && tp->t_oproc) {
                (*tp->t_oproc)(tp);
                error = ttysleep(tp, &tp->t_outcv, true, timo);
                if (error == EWOULDBLOCK)
                        ttyflush(tp, FWRITE);
                if (error)
                        break;
        }
        mutex_spin_exit(&tty_lock);

        return (error);
}

/*
 * Wait for output to drain.
 */
int
ttywait(struct tty *tp)
{
        return ttywait_timo(tp, 0);
}

/*
 * Flush if successfully wait.
 */
int
ttywflush(struct tty *tp)
{
        int        error;

        error = ttywait_timo(tp, 5 * hz);
        if (error == 0 || error == EWOULDBLOCK) {
                mutex_spin_enter(&tty_lock);
                ttyflush(tp, FREAD);
                mutex_spin_exit(&tty_lock);
        }
        return (error);
}

/*
 * Flush tty read and/or write queues, notifying anyone waiting.
 * Call with the tty lock held.
 */
void
ttyflush(struct tty *tp, int rw)
{

        KASSERT(mutex_owned(&tty_lock));

        if (rw & FREAD) {
                FLUSHQ(&tp->t_canq);
                FLUSHQ(&tp->t_rawq);
                tp->t_rocount = 0;
                tp->t_rocol = 0;
                CLR(tp->t_state, TS_LOCAL);
                ttwakeup(tp);
        }
        if (rw & FWRITE) {
                CLR(tp->t_state, TS_TTSTOP);
                cdev_stop(tp, rw);
                FLUSHQ(&tp->t_outq);
                cv_broadcast(&tp->t_outcv);
                selnotify(&tp->t_wsel, 0, NOTE_SUBMIT);
        }
}

/*
 * Copy in the default termios characters.
 */
void
ttychars(struct tty *tp)
{

        memcpy(tp->t_cc, ttydefchars, sizeof(ttydefchars));
}

/*
 * Send stop character on input overflow.
 * Call with the tty lock held.
 */
static void
ttyblock(struct tty *tp)
{
        int        total;

        KASSERT(mutex_owned(&tty_lock));

        total = tp->t_rawq.c_cc + tp->t_canq.c_cc;
        if (tp->t_rawq.c_cc > TTYHOG) {
                ttyflush(tp, FREAD | FWRITE);
                CLR(tp->t_state, TS_TBLOCK);
        }
        /*
         * Block further input iff: current input > threshold
         * AND input is available to user program.
         */
        if (total >= TTYHOG / 2 &&
            !ISSET(tp->t_state, TS_TBLOCK) &&
            (!ISSET(tp->t_lflag, ICANON) || tp->t_canq.c_cc > 0)) {
                if (ISSET(tp->t_iflag, IXOFF) &&
                    tp->t_cc[VSTOP] != _POSIX_VDISABLE &&
                    putc(tp->t_cc[VSTOP], &tp->t_outq) == 0) {
                        SET(tp->t_state, TS_TBLOCK);
                        ttstart(tp);
                }
                /* Try to block remote output via hardware flow control. */
                if (ISSET(tp->t_cflag, CHWFLOW) && tp->t_hwiflow &&
                    (*tp->t_hwiflow)(tp, 1) != 0)
                        SET(tp->t_state, TS_TBLOCK);
        }
}

/*
 * Delayed line discipline output
 */
void
ttrstrt(void *tp_arg)
{
        struct tty        *tp;

#ifdef DIAGNOSTIC
        if (tp_arg == NULL)
                panic("ttrstrt");
#endif
        tp = tp_arg;
        mutex_spin_enter(&tty_lock);

        CLR(tp->t_state, TS_TIMEOUT);
        ttstart(tp); /* XXX - Shouldn't this be tp->l_start(tp)? */

        mutex_spin_exit(&tty_lock);
}

/*
 * start a line discipline
 * Always call with tty lock held?
 */
int
ttstart(struct tty *tp)
{

        if (tp->t_oproc != NULL)        /* XXX: Kludge for pty. */
                (*tp->t_oproc)(tp);
        return (0);
}

/*
 * "close" a line discipline
 */
int
ttylclose(struct tty *tp, int flag)
{

        if (flag & FNONBLOCK) {
                mutex_spin_enter(&tty_lock);
                ttyflush(tp, FREAD | FWRITE);
                mutex_spin_exit(&tty_lock);
        } else
                ttywflush(tp);
        return (0);
}

/*
 * Handle modem control transition on a tty.
 * Flag indicates new state of carrier.
 * Returns 0 if the line should be turned off, otherwise 1.
 */
int
ttymodem(struct tty *tp, int flag)
{

        mutex_spin_enter(&tty_lock);
        if (flag == 0) {
                if (ISSET(tp->t_state, TS_CARR_ON)) {
                        /*
                         * Lost carrier.
                         */
                        CLR(tp->t_state, TS_CARR_ON);
                        if (ISSET(tp->t_state, TS_ISOPEN) && !CONNECTED(tp)) {
                                ttysig(tp, TTYSIG_LEADER, SIGHUP);
                                ttyflush(tp, FREAD | FWRITE);
                                mutex_spin_exit(&tty_lock);
                                return (0);
                        }
                }
        } else {
                if (!ISSET(tp->t_state, TS_CARR_ON)) {
                        /*
                         * Carrier now on.
                         */
                        SET(tp->t_state, TS_CARR_ON);
                        ttwakeup(tp);
                }
        }
        mutex_spin_exit(&tty_lock);

        return (1);
}

/*
 * Default modem control routine (for other line disciplines).
 * Return argument flag, to turn off device on carrier drop.
 */
int
nullmodem(struct tty *tp, int flag)
{

        mutex_spin_enter(&tty_lock);
        if (flag)
                SET(tp->t_state, TS_CARR_ON);
        else {
                CLR(tp->t_state, TS_CARR_ON);
                if (!CONNECTED(tp)) {
                        ttysig(tp, TTYSIG_LEADER, SIGHUP);
                        mutex_spin_exit(&tty_lock);
                        return (0);
                }
        }
        mutex_spin_exit(&tty_lock);

        return (1);
}

/*
 * Reinput pending characters after state switch.
 */
void
ttypend(struct tty *tp)
{
        struct clist        tq;
        int                c;

        KASSERT(mutex_owned(&tty_lock));

        CLR(tp->t_lflag, PENDIN);
        SET(tp->t_state, TS_TYPEN);
        tq = tp->t_rawq;
        tp->t_rawq.c_cc = 0;
        tp->t_rawq.c_cf = tp->t_rawq.c_cl = 0;
        while ((c = getc(&tq)) >= 0)
                ttyinput_wlock(c, tp);
        CLR(tp->t_state, TS_TYPEN);
}

/*
 * Process a read call on a tty device.
 */
int
ttread(struct tty *tp, struct uio *uio, int flag)
{
        struct clist        *qp;
        u_char                *cc;
        struct proc        *p;
        int                c, first, error, has_stime, last_cc;
        long                lflag, slp;
        struct timeval        now, stime;

        if (uio->uio_resid == 0)
                return 0;

        stime.tv_usec = 0;        /* XXX gcc */
        stime.tv_sec = 0;        /* XXX gcc */

        cc = tp->t_cc;
        p = curproc;
        error = 0;
        has_stime = 0;
        last_cc = 0;
        slp = 0;

 loop:
        mutex_spin_enter(&tty_lock);
        lflag = tp->t_lflag;
        /*
         * take pending input first
         */
        if (ISSET(lflag, PENDIN))
                ttypend(tp);

        /*
         * Hang process if it's in the background.
         */
        if (isbackground(p, tp)) {
                if (sigismasked(curlwp, SIGTTIN) ||
                    p->p_lflag & PL_PPWAIT || p->p_pgrp->pg_jobc == 0) {
                        mutex_spin_exit(&tty_lock);
                        return (EIO);
                }
                mutex_spin_exit(&tty_lock);

                mutex_enter(&proc_lock);
                pgsignal(p->p_pgrp, SIGTTIN, 1);
                mutex_exit(&proc_lock);

                mutex_spin_enter(&tty_lock);
                error = ttypause(tp, hz);
                mutex_spin_exit(&tty_lock);
                if (error)
                        return (error);
                goto loop;
        }

        if (!ISSET(lflag, ICANON)) {
                int m = cc[VMIN];
                long t = cc[VTIME];

                qp = &tp->t_rawq;
                /*
                 * Check each of the four combinations.
                 * (m > 0 && t == 0) is the normal read case.
                 * It should be fairly efficient, so we check that and its
                 * companion case (m == 0 && t == 0) first.
                 * For the other two cases, we compute the target sleep time
                 * into slp.
                 */
                if (t == 0) {
                        if (qp->c_cc < m)
                                goto sleep;
                        goto read;
                }
                t *= hz;                /* time in deca-ticks */
/*
 * Time difference in deca-ticks, split division to avoid numeric overflow.
 * Ok for hz < ~200kHz
 */
#define        diff(t1, t2) (((t1).tv_sec - (t2).tv_sec) * 10 * hz + \
                         ((t1).tv_usec - (t2).tv_usec) / 100 * hz / 1000)
                if (m > 0) {
                        if (qp->c_cc <= 0)
                                goto sleep;
                        if (qp->c_cc >= m)
                                goto read;
                        if (!has_stime) {
                                /* first character, start timer */
                                has_stime = 1;
                                getmicrotime(&stime);
                                slp = t;
                        } else if (qp->c_cc > last_cc) {
                                /* got a character, restart timer */
                                getmicrotime(&stime);
                                slp = t;
                        } else {
                                /* nothing, check expiration */
                                getmicrotime(&now);
                                slp = t - diff(now, stime);
                        }
                } else {        /* m == 0 */
                        if (qp->c_cc > 0)
                                goto read;
                        if (!has_stime) {
                                has_stime = 1;
                                getmicrotime(&stime);
                                slp = t;
                        } else {
                                getmicrotime(&now);
                                slp = t - diff(now, stime);
                        }
                }
                last_cc = qp->c_cc;
#undef diff
                if (slp > 0) {
                        /*
                         * Convert deca-ticks back to ticks.
                         * Rounding down may make us wake up just short
                         * of the target, so we round up.
                         * Maybe we should do 'slp/10 + 1' because the
                         * first tick maybe almost immediate.
                         * However it is more useful for a program that sets
                         * VTIME=10 to wakeup every second not every 1.01
                         * seconds (if hz=100).
                         */
                        slp = (slp + 9)/ 10;
                        goto sleep;
                }
        } else if ((qp = &tp->t_canq)->c_cc <= 0) {
                int        carrier;

 sleep:
                /*
                 * If there is no input, sleep on rawq
                 * awaiting hardware receipt and notification.
                 * If we have data, we don't need to check for carrier.
                 */
                carrier = CONNECTED(tp);
                if (!carrier && ISSET(tp->t_state, TS_ISOPEN)) {
                        mutex_spin_exit(&tty_lock);
                        return (0);        /* EOF */
                }
                if (!has_stime || slp <= 0) {
                        if (flag & IO_NDELAY) {
                                mutex_spin_exit(&tty_lock);
                                return (EWOULDBLOCK);
                        }
                }
                error = ttysleep(tp, &tp->t_rawcv, true, slp);
                mutex_spin_exit(&tty_lock);
                /* VMIN == 0: any quantity read satisfies */
                if (cc[VMIN] == 0 && error == EWOULDBLOCK)
                        return (0);
                if (error && error != EWOULDBLOCK)
                        return (error);
                goto loop;
        }
 read:

        /*
         * Input present, check for input mapping and processing.
         */
        first = 1;
        while ((c = getc(qp)) >= 0) {
                /*
                 * delayed suspend (^Y)
                 */
                if (CCEQ(cc[VDSUSP], c) &&
                    ISSET(lflag, IEXTEN|ISIG) == (IEXTEN|ISIG)) {
                        ttysig(tp, TTYSIG_PG1, SIGTSTP);
                        if (first) {
                                error = ttypause(tp, hz);
                                if (error)
                                        break;
                                mutex_spin_exit(&tty_lock);
                                goto loop;
                        }
                        break;
                }
                /*
                 * Interpret EOF only in canonical mode.
                 */
                if (CCEQ(cc[VEOF], c) && ISSET(lflag, ICANON))
                        break;
                /*
                 * Give user character.
                 */
                mutex_spin_exit(&tty_lock);
                 error = ureadc(c, uio);
                mutex_spin_enter(&tty_lock);
                if (error)
                        break;
                 if (uio->uio_resid == 0)
                        break;
                /*
                 * In canonical mode check for a "break character"
                 * marking the end of a "line of input".
                 */
                if (ISSET(lflag, ICANON) && TTBREAKC(c, lflag))
                        break;
                first = 0;
        }

        /*
         * Look to unblock output now that (presumably)
         * the input queue has gone down.
         */
        if (ISSET(tp->t_state, TS_TBLOCK) && tp->t_rawq.c_cc < TTYHOG / 5) {
                if (ISSET(tp->t_iflag, IXOFF) &&
                    cc[VSTART] != _POSIX_VDISABLE &&
                    putc(cc[VSTART], &tp->t_outq) == 0) {
                        CLR(tp->t_state, TS_TBLOCK);
                        ttstart(tp);
                }
                /* Try to unblock remote output via hardware flow control. */
                if (ISSET(tp->t_cflag, CHWFLOW) && tp->t_hwiflow &&
                    (*tp->t_hwiflow)(tp, 0) != 0)
                        CLR(tp->t_state, TS_TBLOCK);
        }
        mutex_spin_exit(&tty_lock);

        return (error);
}

/*
 * Check the output queue on tp for space for a kernel message (from uprintf
 * or tprintf).  Allow some space over the normal hiwater mark so we don't
 * lose messages due to normal flow control, but don't let the tty run amok.
 * Sleeps here are not interruptible, but we return prematurely if new signals
 * arrive.
 * Call with tty lock held.
 */
static int
ttycheckoutq_wlock(struct tty *tp, int wait)
{
        int        hiwat, error;

        KASSERT(mutex_owned(&tty_lock));

        hiwat = tp->t_hiwat;
        if (tp->t_outq.c_cc > hiwat + 200)
                while (tp->t_outq.c_cc > hiwat) {
                        ttstart(tp);
                        if (wait == 0)
                                return (0);
                        error = ttysleep(tp, &tp->t_outcv, true, hz);
                        if (error == EINTR)
                                wait = 0;
                }

        return (1);
}

int
ttycheckoutq(struct tty *tp, int wait)
{
        int        r;

        mutex_spin_enter(&tty_lock);
        r = ttycheckoutq_wlock(tp, wait);
        mutex_spin_exit(&tty_lock);

        return (r);
}

/*
 * Process a write call on a tty device.
 */
int
ttwrite(struct tty *tp, struct uio *uio, int flag)
{
        u_char                *cp;
        struct proc        *p;
        int                cc, ce, i, hiwat, error;
        u_char                obuf[OBUFSIZ];

        cp = NULL;
        hiwat = tp->t_hiwat;
        error = 0;
        cc = 0;
 loop:
        mutex_spin_enter(&tty_lock);
        if (!CONNECTED(tp)) {
                if (ISSET(tp->t_state, TS_ISOPEN)) {
                        mutex_spin_exit(&tty_lock);
                        return (EIO);
                } else if (flag & IO_NDELAY) {
                        mutex_spin_exit(&tty_lock);
                        error = EWOULDBLOCK;
                        goto out;
                } else {
                        /* Sleep awaiting carrier. */
                        error = ttysleep(tp, &tp->t_rawcv, true, 0);
                        mutex_spin_exit(&tty_lock);
                        if (error)
                                goto out;
                        goto loop;
                }
        }

        /*
         * Hang the process if it's in the background.
         */
        p = curproc;
        if (isbackground(p, tp) &&
            ISSET(tp->t_lflag, TOSTOP) && (p->p_lflag & PL_PPWAIT) == 0 &&
            !sigismasked(curlwp, SIGTTOU)) {
                if (p->p_pgrp->pg_jobc == 0) {
                        error = EIO;
                        mutex_spin_exit(&tty_lock);
                        goto out;
                }
                mutex_spin_exit(&tty_lock);

                mutex_enter(&proc_lock);
                pgsignal(p->p_pgrp, SIGTTOU, 1);
                mutex_exit(&proc_lock);

                mutex_spin_enter(&tty_lock);
                error = ttypause(tp, hz);
                mutex_spin_exit(&tty_lock);
                if (error)
                        goto out;
                goto loop;
        }
        mutex_spin_exit(&tty_lock);

        /*
         * Process the user's data in at most OBUFSIZ chunks.  Perform any
         * output translation.  Keep track of high water mark, sleep on
         * overflow awaiting device aid in acquiring new space.
         */
        while (uio->uio_resid > 0 || cc > 0) {
                if (ISSET(tp->t_lflag, FLUSHO)) {
                        uio->uio_resid = 0;
                        return (0);
                }
                if (tp->t_outq.c_cc > hiwat)
                        goto ovhiwat;
                /*
                 * Grab a hunk of data from the user, unless we have some
                 * leftover from last time.
                 */
                if (cc == 0) {
                        cc = uimin(uio->uio_resid, OBUFSIZ);
                        cp = obuf;
                        error = uiomove(cp, cc, uio);
                        if (error) {
                                cc = 0;
                                goto out;
                        }
                }
                /*
                 * If nothing fancy need be done, grab those characters we
                 * can handle without any of ttyoutput's processing and
                 * just transfer them to the output q.  For those chars
                 * which require special processing (as indicated by the
                 * bits in char_type), call ttyoutput.  After processing
                 * a hunk of data, look for FLUSHO so ^O's will take effect
                 * immediately.
                 */
                mutex_spin_enter(&tty_lock);
                while (cc > 0) {
                        if (!ISSET(tp->t_oflag, OPOST))
                                ce = cc;
                        else {
                                ce = cc - scanc((u_int)cc, cp, char_type,
                                    CCLASSMASK);
                                /*
                                 * If ce is zero, then we're processing
                                 * a special character through ttyoutput.
                                 */
                                if (ce == 0) {
                                        tp->t_rocount = 0;
                                        if (ttyoutput(*cp, tp) >= 0) {
                                                /* out of space */
                                                mutex_spin_exit(&tty_lock);
                                                goto overfull;
                                        }
                                        cp++;
                                        cc--;
                                        if (ISSET(tp->t_lflag, FLUSHO) ||
                                            tp->t_outq.c_cc > hiwat) {
                                                mutex_spin_exit(&tty_lock);
                                                goto ovhiwat;
                                        }
                                        continue;
                                }
                        }
                        /*
                         * A bunch of normal characters have been found.
                         * Transfer them en masse to the output queue and
                         * continue processing at the top of the loop.
                         * If there are any further characters in this
                         * <= OBUFSIZ chunk, the first should be a character
                         * requiring special handling by ttyoutput.
                         */
                        tp->t_rocount = 0;
                        i = b_to_q(cp, ce, &tp->t_outq);
                        ce -= i;
                        tp->t_column += ce;
                        cp += ce, cc -= ce, tk_nout += ce;
                        tp->t_outcc += ce;
                        if (i > 0) {
                                /* out of space */
                                mutex_spin_exit(&tty_lock);
                                goto overfull;
                        }
                        if (ISSET(tp->t_lflag, FLUSHO) ||
                            tp->t_outq.c_cc > hiwat)
                                break;
                }
                ttstart(tp);
                mutex_spin_exit(&tty_lock);
        }

 out:
        /*
         * If cc is nonzero, we leave the uio structure inconsistent, as the
         * offset and iov pointers have moved forward, but it doesn't matter
         * (the call will either return short or restart with a new uio).
         */
        uio->uio_resid += cc;
        return (error);

 overfull:
        /*
         * Since we are using ring buffers, if we can't insert any more into
         * the output queue, we can assume the ring is full and that someone
         * forgot to set the high water mark correctly.  We set it and then
         * proceed as normal.
         */
        hiwat = tp->t_outq.c_cc - 1;

 ovhiwat:
        mutex_spin_enter(&tty_lock);
        ttstart(tp);
        /*
         * This can only occur if FLUSHO is set in t_lflag,
         * or if ttstart/oproc is synchronous (or very fast).
         */
        if (tp->t_outq.c_cc <= hiwat) {
                mutex_spin_exit(&tty_lock);
                goto loop;
        }
        if (flag & IO_NDELAY) {
                mutex_spin_exit(&tty_lock);
                error = EWOULDBLOCK;
                goto out;
        }
        error = ttysleep(tp, &tp->t_outcv, true, 0);
        mutex_spin_exit(&tty_lock);
        if (error)
                goto out;
        goto loop;
}

/*
 * Try to pull more output from the producer.  Return non-zero if
 * there is output ready to be sent.
 */
bool
ttypull(struct tty *tp)
{

        /* XXXSMP not yet KASSERT(mutex_owned(&tty_lock)); */

        if (tp->t_outq.c_cc <= tp->t_lowat) {
                cv_broadcast(&tp->t_outcv);
                selnotify(&tp->t_wsel, 0, NOTE_SUBMIT);
        }
        return tp->t_outq.c_cc != 0;
}

/*
 * Rubout one character from the rawq of tp
 * as cleanly as possible.
 * Called with tty lock held.
 */
void
ttyrub(int c, struct tty *tp)
{
        u_char        *cp;
        int        savecol, tabc;

        KASSERT(mutex_owned(&tty_lock));

        if (!ISSET(tp->t_lflag, ECHO) || ISSET(tp->t_lflag, EXTPROC))
                return;
        CLR(tp->t_lflag, FLUSHO);
        if (ISSET(tp->t_lflag, ECHOE)) {
                if (tp->t_rocount == 0) {
                        /*
                         * Screwed by ttwrite; retype
                         */
                        ttyretype(tp);
                        return;
                }
                if (c == ('\t' | TTY_QUOTE) || c == ('\n' | TTY_QUOTE))
                        ttyrubo(tp, 2);
                else {
                        CLR(c, ~TTY_CHARMASK);
                        switch (CCLASS(c)) {
                        case ORDINARY:
                                ttyrubo(tp, 1);
                                break;
                        case BACKSPACE:
                        case CONTROL:
                        case NEWLINE:
                        case RETURN:
                        case VTAB:
                                if (ISSET(tp->t_lflag, ECHOCTL))
                                        ttyrubo(tp, 2);
                                break;
                        case TAB:
                                if (tp->t_rocount < tp->t_rawq.c_cc) {
                                        ttyretype(tp);
                                        return;
                                }
                                savecol = tp->t_column;
                                SET(tp->t_state, TS_CNTTB);
                                SET(tp->t_lflag, FLUSHO);
                                tp->t_column = tp->t_rocol;
                                for (cp = firstc(&tp->t_rawq, &tabc); cp;
                                    cp = nextc(&tp->t_rawq, cp, &tabc))
                                        ttyecho(tabc, tp);
                                CLR(tp->t_lflag, FLUSHO);
                                CLR(tp->t_state, TS_CNTTB);

                                /* savecol will now be length of the tab. */
                                savecol -= tp->t_column;
                                tp->t_column += savecol;
                                if (savecol > 8)
                                        savecol = 8;        /* overflow screw */
                                while (--savecol >= 0)
                                        (void)ttyoutput('\b', tp);
                                break;
                        default:                        /* XXX */
                                (void)printf("ttyrub: would panic c = %d, "
                                    "val = %d\n", c, CCLASS(c));
                        }
                }
        } else if (ISSET(tp->t_lflag, ECHOPRT)) {
                if (!ISSET(tp->t_state, TS_ERASE)) {
                        SET(tp->t_state, TS_ERASE);
                        (void)ttyoutput('\\', tp);
                }
                ttyecho(c, tp);
        } else
                ttyecho(tp->t_cc[VERASE], tp);
        --tp->t_rocount;
}

/*
 * Back over cnt characters, erasing them.
 * Called with tty lock held.
 */
static void
ttyrubo(struct tty *tp, int cnt)
{

        KASSERT(mutex_owned(&tty_lock));

        while (cnt-- > 0) {
                (void)ttyoutput('\b', tp);
                (void)ttyoutput(' ', tp);
                (void)ttyoutput('\b', tp);
        }
}

/*
 * ttyretype --
 *        Reprint the rawq line.  Note, it is assumed that c_cc has already
 *        been checked.
 *
 * Called with tty lock held.
 */
void
ttyretype(struct tty *tp)
{
        u_char        *cp;
        int        c;

        KASSERT(mutex_owned(&tty_lock));

        /* Echo the reprint character. */
        if (tp->t_cc[VREPRINT] != _POSIX_VDISABLE)
                ttyecho(tp->t_cc[VREPRINT], tp);

        (void)ttyoutput('\n', tp);

        for (cp = firstc(&tp->t_canq, &c); cp; cp = nextc(&tp->t_canq, cp, &c))
                ttyecho(c, tp);
        for (cp = firstc(&tp->t_rawq, &c); cp; cp = nextc(&tp->t_rawq, cp, &c))
                ttyecho(c, tp);
        CLR(tp->t_state, TS_ERASE);

        tp->t_rocount = tp->t_rawq.c_cc;
        tp->t_rocol = 0;
}

/*
 * Echo a typed character to the terminal.
 * Called with tty lock held.
 */
static void
ttyecho(int c, struct tty *tp)
{

        KASSERT(mutex_owned(&tty_lock));

        if (!ISSET(tp->t_state, TS_CNTTB))
                CLR(tp->t_lflag, FLUSHO);
        if ((!ISSET(tp->t_lflag, ECHO) &&
            (!ISSET(tp->t_lflag, ECHONL) || c != '\n')) ||
            ISSET(tp->t_lflag, EXTPROC))
                return;
        if (((ISSET(tp->t_lflag, ECHOCTL) &&
            (ISSET(c, TTY_CHARMASK) <= 037 && c != '\t' && c != '\n')) ||
            ISSET(c, TTY_CHARMASK) == 0177)) {
                (void)ttyoutput('^', tp);
                CLR(c, ~TTY_CHARMASK);
                if (c == 0177)
                        c = '?';
                else
                        c += 'A' - 1;
        }
        (void)ttyoutput(c, tp);
}

/*
 * Wake up any readers on a tty.
 * Called with tty lock held.
 */
void
ttwakeup(struct tty *tp)
{

        KASSERT(mutex_owned(&tty_lock));

        selnotify(&tp->t_rsel, 0, NOTE_SUBMIT);
        if (ISSET(tp->t_state, TS_ASYNC))
                ttysig(tp, TTYSIG_PG2, SIGIO);
        cv_broadcast(&tp->t_rawcv);
}

/*
 * Look up a code for a specified speed in a conversion table;
 * used by drivers to map software speed values to hardware parameters.
 */
int
ttspeedtab(int speed, const struct speedtab *table)
{

        for (; table->sp_speed != -1; table++)
                if (table->sp_speed == speed)
                        return (table->sp_code);
        return (-1);
}

/*
 * Set tty hi and low water marks.
 *
 * Try to arrange the dynamics so there's about one second
 * from hi to low water.
 */
void
ttsetwater(struct tty *tp)
{
        int        cps, x;

        /* XXX not yet KASSERT(mutex_owned(&tty_lock)); */

#define        CLAMP(x, h, l)        ((x) > h ? h : ((x) < l) ? l : (x))

        cps = tp->t_ospeed / 10;
        tp->t_lowat = x = CLAMP(cps / 2, TTMAXLOWAT, TTMINLOWAT);
        x += cps;
        x = CLAMP(x, TTMAXHIWAT, TTMINHIWAT);
        tp->t_hiwat = roundup(x, TTROUND);
#undef        CLAMP
}

/*
 * Prepare report on state of foreground process group.
 * Call with &proc_lock held.
 */
void
ttygetinfo(struct tty *tp, int fromsig, char *buf, size_t bufsz)
{
        struct lwp        *l;
        struct proc        *p, *pick = NULL;
        struct timeval        utime, stime;
        int                tmp;
        fixpt_t                pctcpu = 0;
        const char        *msg = NULL;
        char                lmsg[100];
        long                rss;
        bool                again = false;

        KASSERT(mutex_owned(&proc_lock));

        *buf = '\0';

 retry:
        if (tp->t_session == NULL)
                msg = "not a controlling terminal\n";
        else if (tp->t_pgrp == NULL)
                msg = "no foreground process group\n";
        else if ((p = LIST_FIRST(&tp->t_pgrp->pg_members)) == NULL)
                msg = "empty foreground process group\n";
        else {
                /* Pick interesting process. */
                for (; p != NULL; p = LIST_NEXT(p, p_pglist)) {
                        struct proc *oldpick;

                        if (pick == NULL) {
                                pick = p;
                                continue;
                        }
                        if (pick->p_lock < p->p_lock) {
                                mutex_enter(pick->p_lock);
                                mutex_enter(p->p_lock);
                        } else if (pick->p_lock > p->p_lock) {
                                mutex_enter(p->p_lock);
                                mutex_enter(pick->p_lock);
                        } else
                                mutex_enter(p->p_lock);
                        oldpick = pick;
                        if (proc_compare_wrapper(pick, p))
                                pick = p;
                        mutex_exit(p->p_lock);
                        if (p->p_lock != oldpick->p_lock)
                                mutex_exit(oldpick->p_lock);
                }

                if (pick != NULL) {
                        mutex_enter(pick->p_lock);
                        if (P_ZOMBIE(pick)) {
                                mutex_exit(pick->p_lock);
                                pick = NULL;
                                if (!again) {
                                        again = true;
                                        goto retry;
                                }
                                msg = "found only zombie processes\n";
                        }
                        if (pick && fromsig &&
                            (SIGACTION_PS(pick->p_sigacts, SIGINFO).sa_flags &
                            SA_NOKERNINFO)) {
                                mutex_exit(pick->p_lock);
                                return;
                        }
                }
        }

        /* Print load average. */
        tmp = (averunnable.ldavg[0] * 100 + FSCALE / 2) >> FSHIFT;
        snprintf(lmsg, sizeof(lmsg), "load: %d.%02d ", tmp / 100, tmp % 100);
        strlcat(buf, lmsg, bufsz);

        if (pick == NULL) {
                strlcat(buf, msg, bufsz);
                return;
        }

        snprintf(lmsg, sizeof(lmsg), " cmd: %s %d [", pick->p_comm,
            pick->p_pid);
        strlcat(buf, lmsg, bufsz);

        KASSERT(mutex_owned(pick->p_lock));
        LIST_FOREACH(l, &pick->p_lwps, l_sibling) {
                const char *lp;
                lwp_lock(l);
#ifdef LWP_PC
#define FMT_RUN "%#"PRIxVADDR
#define VAL_RUNNING (vaddr_t)LWP_PC(l)
#define VAL_RUNNABLE (vaddr_t)LWP_PC(l)
#else
#define FMT_RUN "%s"
#define VAL_RUNNING "running"
#define VAL_RUNNABLE "runnable"
#endif
                switch (l->l_stat) {
                case LSONPROC:
                        snprintf(lmsg, sizeof(lmsg), FMT_RUN"/%d", VAL_RUNNING,
                            cpu_index(l->l_cpu));
                        lp = lmsg;
                        break;
                case LSRUN:
                        snprintf(lmsg, sizeof(lmsg), FMT_RUN, VAL_RUNNABLE);
                        lp = lmsg;
                        break;
                default:
                        lp = l->l_wchan ? l->l_wmesg : "iowait";
                        break;
                }
                strlcat(buf, lp, bufsz);
                strlcat(buf, LIST_NEXT(l, l_sibling) != NULL ? " " : "] ",
                    bufsz);
                pctcpu += l->l_pctcpu;
                lwp_unlock(l);
        }
        pctcpu += pick->p_pctcpu;
        calcru(pick, &utime, &stime, NULL, NULL);
        mutex_exit(pick->p_lock);

        /* Round up and print user+system time, %CPU and RSS. */
        utime.tv_usec += 5000;
        if (utime.tv_usec >= 1000000) {
                utime.tv_sec += 1;
                utime.tv_usec -= 1000000;
        }
        stime.tv_usec += 5000;
        if (stime.tv_usec >= 1000000) {
                stime.tv_sec += 1;
                stime.tv_usec -= 1000000;
        }
#define        pgtok(a)        (((u_long) ((a) * PAGE_SIZE) / 1024))
        tmp = (pctcpu * 10000 + FSCALE / 2) >> FSHIFT;
        if (pick->p_stat == SIDL || P_ZOMBIE(pick))
                rss = 0;
        else
                rss = pgtok(vm_resident_count(pick->p_vmspace));

        snprintf(lmsg, sizeof(lmsg), "%ld.%02ldu %ld.%02lds %d%% %ldk",
            (long)utime.tv_sec, (long)utime.tv_usec / 10000,
            (long)stime.tv_sec, (long)stime.tv_usec / 10000,
            tmp / 100, rss);
        strlcat(buf, lmsg, bufsz);
}

/*
 * Print report on state of foreground process group.
 * Call with tty_lock held.
 */
void
ttyputinfo(struct tty *tp, char *buf)
{

        KASSERT(mutex_owned(&tty_lock));

        if (ttycheckoutq_wlock(tp, 0) == 0)
                return;
        ttyprintf_nolock(tp, "%s\n", buf);
        tp->t_rocount = 0;        /* so pending input will be retyped if BS */
}

/*
 * Returns 1 if p2 has a better chance being the active foreground process
 * in a terminal instead of p1.
 */
static int
proc_compare_wrapper(struct proc *p1, struct proc *p2)
{
        lwp_t *l1, *l2;

        KASSERT(mutex_owned(p1->p_lock));
        KASSERT(mutex_owned(p2->p_lock));

        l1 = LIST_FIRST(&p1->p_lwps);
        l2 = LIST_FIRST(&p2->p_lwps);

        return proc_compare(p1, l1, p2, l2);
}

/*
 * Output char to tty; console putchar style.
 * Can be called with tty lock held through kprintf() machinery..
 */
int
tputchar(int c, int flags, struct tty *tp)
{
        int r = 0;

        if ((flags & NOLOCK) == 0)
                mutex_spin_enter(&tty_lock);
        if (!CONNECTED(tp)) {
                r = -1;
                goto out;
        }
        if (c == '\n')
                (void)ttyoutput('\r', tp);
        (void)ttyoutput(c, tp);
        ttstart(tp);
out:
        if ((flags & NOLOCK) == 0)
                mutex_spin_exit(&tty_lock);
        return (r);
}

/*
 * Sleep on chan, returning ERESTART if tty changed while we napped and
 * returning any errors (e.g. EINTR/EWOULDBLOCK) reported by
 * cv_timedwait(_sig).
 * If the tty is revoked, restarting a pending call will redo validation done
 * at the start of the call.
 *
 * Must be called with the tty lock held.
 */
int
ttysleep(struct tty *tp, kcondvar_t *cv, bool catch_p, int timo)
{
        int        error;
        short        gen;

        KASSERT(mutex_owned(&tty_lock));

        gen = tp->t_gen;
        if (ISSET(tp->t_state, TS_CANCEL))
                error = ERESTART;
        else if (cv == NULL)
                error = kpause("ttypause", catch_p, timo, &tty_lock);
        else if (catch_p)
                error = cv_timedwait_sig(cv, &tty_lock, timo);
        else
                error = cv_timedwait(cv, &tty_lock, timo);
        if (error != 0)
                return (error);
        return (tp->t_gen == gen ? 0 : ERESTART);
}

int
ttypause(struct tty *tp, int timo)
{
        int error;

        error = ttysleep(tp, NULL, true, timo);
        if (error == EWOULDBLOCK)
                error = 0;
        return error;
}

/*
 * Attach a tty to the tty list.
 *
 * This should be called ONLY once per real tty (including pty's).
 * eg, on the sparc, the keyboard and mouse have struct tty's that are
 * distinctly NOT usable as tty's, and thus should not be attached to
 * the ttylist.  This is why this call is not done from tty_alloc().
 *
 * Device drivers should attach tty's at a similar time that they are
 * allocated, or, for the case of statically allocated struct tty's
 * either in the attach or (first) open routine.
 */
void
tty_attach(struct tty *tp)
{

        mutex_spin_enter(&tty_lock);
        TAILQ_INSERT_TAIL(&ttylist, tp, tty_link);
        ++tty_count;
        mutex_spin_exit(&tty_lock);
}

/*
 * Remove a tty from the tty list.
 */
void
tty_detach(struct tty *tp)
{

        mutex_spin_enter(&tty_lock);
        --tty_count;
#ifdef DIAGNOSTIC
        if (tty_count < 0)
                panic("tty_detach: tty_count < 0");
#endif
        TAILQ_REMOVE(&ttylist, tp, tty_link);
        mutex_spin_exit(&tty_lock);
}

/*
 * Allocate a tty structure and its associated buffers.
 */
struct tty *
tty_alloc(void)
{
        struct tty *tp;
        int i;

        tp = kmem_zalloc(sizeof(*tp), KM_SLEEP);
        callout_init(&tp->t_rstrt_ch, 0);
        callout_setfunc(&tp->t_rstrt_ch, ttrstrt, tp);
        tp->t_qsize = tty_qsize;
        clalloc(&tp->t_rawq, tp->t_qsize, 1);
        cv_init(&tp->t_rawcv, "ttyraw");
        cv_init(&tp->t_rawcvf, "ttyrawf");
        clalloc(&tp->t_canq, tp->t_qsize, 1);
        cv_init(&tp->t_cancv, "ttycan");
        cv_init(&tp->t_cancvf, "ttycanf");
        /* output queue doesn't need quoting */
        clalloc(&tp->t_outq, tp->t_qsize, 0);
        cv_init(&tp->t_outcv, "ttyout");
        cv_init(&tp->t_outcvf, "ttyoutf");
        /* Set default line discipline. */
        tp->t_linesw = ttyldisc_default();
        tp->t_dev = NODEV;
        selinit(&tp->t_rsel);
        selinit(&tp->t_wsel);
        for (i = 0; i < TTYSIG_COUNT; i++)  {
                sigemptyset(&tp->t_sigs[i]);
        }

        return tp;
}

/*
 * Free a tty structure and its buffers.
 *
 * Be sure to call tty_detach() for any tty that has been
 * tty_attach()ed.
 */
void
tty_free(struct tty *tp)
{
        int i;

        mutex_enter(&proc_lock);
        mutex_enter(&tty_lock);
        for (i = 0; i < TTYSIG_COUNT; i++)
                sigemptyset(&tp->t_sigs[i]);
        if (tp->t_sigcount != 0)
                TAILQ_REMOVE(&tty_sigqueue, tp, t_sigqueue);
        mutex_exit(&tty_lock);
        mutex_exit(&proc_lock);

        callout_halt(&tp->t_rstrt_ch, NULL);
        callout_destroy(&tp->t_rstrt_ch);
        ttyldisc_release(tp->t_linesw);
        clfree(&tp->t_rawq);
        clfree(&tp->t_canq);
        clfree(&tp->t_outq);
        cv_destroy(&tp->t_rawcv);
        cv_destroy(&tp->t_rawcvf);
        cv_destroy(&tp->t_cancv);
        cv_destroy(&tp->t_cancvf);
        cv_destroy(&tp->t_outcv);
        cv_destroy(&tp->t_outcvf);
        seldestroy(&tp->t_rsel);
        seldestroy(&tp->t_wsel);
        kmem_free(tp, sizeof(*tp));
}

/*
 * tty_unit: map dev_t to tty unit number, as with TTUNIT
 *
 * => defined as function for use with struct cdevsw::d_devtounit
 * => not for drivers with different unit numbering, e.g. TTUNIT(d) >> 4
 */
int
tty_unit(dev_t dev)
{
        return TTUNIT(dev);
}

/*
 * ttyprintf_nolock: send a message to a specific tty, without locking.
 *
 * => should be used only by tty driver or anything that knows the
 *    underlying tty will not be revoked(2)'d away.  [otherwise,
 *    use tprintf]
 */
static void
ttyprintf_nolock(struct tty *tp, const char *fmt, ...)
{
        va_list ap;

        /* No mutex needed; going to process TTY. */
        va_start(ap, fmt);
        kprintf(fmt, TOTTY|NOLOCK, tp, NULL, ap);
        va_end(ap);
}

static int
tty_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        struct tty *tty;
        int result;

        result = KAUTH_RESULT_DEFER;

        if (action != KAUTH_DEVICE_TTY_OPEN)
                return result;

        tty = arg0;

        /* If it's not opened, we allow. */
        if ((tty->t_state & TS_ISOPEN) == 0)
                result = KAUTH_RESULT_ALLOW;
        else {
                /*
                 * If it's opened, we can only allow if it's not exclusively
                 * opened; otherwise, that's a privileged operation and we
                 * let the secmodel handle it.
                 */
                if ((tty->t_state & TS_XCLUDE) == 0)
                        result = KAUTH_RESULT_ALLOW;
        }

        return result;
}

/*
 * Initialize the tty subsystem.
 */
void
tty_init(void)
{

        mutex_init(&tty_lock, MUTEX_DEFAULT, IPL_VM);
        tty_sigsih = softint_establish(SOFTINT_CLOCK, ttysigintr, NULL);
        KASSERT(tty_sigsih != NULL);

        tty_listener = kauth_listen_scope(KAUTH_SCOPE_DEVICE,
            tty_listener_cb, NULL);

        sysctl_kern_tty_setup();
}

/*
 * Send a signal from a tty to its process group or session leader.
 * Handoff to the target is deferred to a soft interrupt.
 */
void
ttysig(struct tty *tp, enum ttysigtype st, int sig)
{
        sigset_t *sp;

        /* XXXSMP not yet KASSERT(mutex_owned(&tty_lock)); */

        sp = &tp->t_sigs[st];
        if (sigismember(sp, sig))
                return;
        sigaddset(sp, sig);
        if (tp->t_sigcount++ == 0)
                TAILQ_INSERT_TAIL(&tty_sigqueue, tp, t_sigqueue);
        softint_schedule(tty_sigsih);
}

/*
 * Deliver deferred signals from ttys.  Note that the process groups
 * and sessions associated with the ttys may have changed from when
 * the signal was originally sent, but in practice it should not matter.
 * For signals produced as a result of a syscall, the soft interrupt
 * will fire before the syscall returns to the user.
 */
static void
ttysigintr(void *cookie)
{
        struct tty *tp;
        enum ttysigtype st;
        struct pgrp *pgrp;
        struct session *sess;
        int sig, lflag;
        char infobuf[200];

        mutex_enter(&proc_lock);
        mutex_spin_enter(&tty_lock);
        while ((tp = TAILQ_FIRST(&tty_sigqueue)) != NULL) {
                KASSERT(tp->t_sigcount > 0);
                for (st = TTYSIG_PG1; st < TTYSIG_COUNT; st++) {
                        if ((sig = firstsig(&tp->t_sigs[st])) != 0)
                                break;
                }
                KASSERT(st < TTYSIG_COUNT);
                sigdelset(&tp->t_sigs[st], sig);
                if (--tp->t_sigcount == 0)
                        TAILQ_REMOVE(&tty_sigqueue, tp, t_sigqueue);
                pgrp = tp->t_pgrp;
                sess = tp->t_session;
                lflag = tp->t_lflag;
                if (sig == SIGINFO) {
                        if (ISSET(tp->t_state, TS_SIGINFO)) {
                                /* Via ioctl: ignore tty option. */
                                tp->t_state &= ~TS_SIGINFO;
                                lflag |= ISIG;
                        }
                        if (!ISSET(lflag, NOKERNINFO)) {
                                mutex_spin_exit(&tty_lock);
                                ttygetinfo(tp, 1, infobuf, sizeof(infobuf));
                                mutex_spin_enter(&tty_lock);
                                ttyputinfo(tp, infobuf);
                        }
                        if (!ISSET(lflag, ISIG))
                                continue;
                }
                mutex_spin_exit(&tty_lock);
                KASSERT(sig != 0);
                switch (st) {
                case TTYSIG_PG1:
                        if (pgrp != NULL)
                                pgsignal(pgrp, sig, 1);
                        break;
                case TTYSIG_PG2:
                        if (pgrp != NULL)
                                pgsignal(pgrp, sig, sess != NULL);
                        break;
                case TTYSIG_LEADER:
                        if (sess != NULL && sess->s_leader != NULL)
                                psignal(sess->s_leader, sig);
                        break;
                default:
                        /* NOTREACHED */
                        break;
                }
                mutex_spin_enter(&tty_lock);
        }
        mutex_spin_exit(&tty_lock);
        mutex_exit(&proc_lock);
}

unsigned char
tty_getctrlchar(struct tty *tp, unsigned which)
{
        KASSERT(which < NCCS);
        return tp->t_cc[which];
}

void
tty_setctrlchar(struct tty *tp, unsigned which, unsigned char val)
{
        KASSERT(which < NCCS);
        tp->t_cc[which] = val;
}

int
tty_try_xonxoff(struct tty *tp, unsigned char c)
{
    const struct cdevsw *cdev;

    if (tp->t_iflag & IXON) {
        if (c == tp->t_cc[VSTOP] && tp->t_cc[VSTOP] != _POSIX_VDISABLE) {
            if ((tp->t_state & TS_TTSTOP) == 0) {
                tp->t_state |= TS_TTSTOP;
                cdev = cdevsw_lookup(tp->t_dev);
                if (cdev != NULL)
                        (*cdev->d_stop)(tp, 0);
            }
            return 0;
        }
        if (c == tp->t_cc[VSTART] && tp->t_cc[VSTART] != _POSIX_VDISABLE) {
            tp->t_state &= ~TS_TTSTOP;
            if (tp->t_oproc != NULL) {
                mutex_spin_enter(&tty_lock);        /* XXX */
                (*tp->t_oproc)(tp);
                mutex_spin_exit(&tty_lock);        /* XXX */
            }
            return 0;
        }
    }
    return EAGAIN;
}
































































































































































































































































































































































































































































































































































    2 



















































































































































































    2 





















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
/*        $NetBSD: st.c,v 1.243 2022/02/23 21:54:41 andvar Exp $ */

/*-
 * Copyright (c) 1998, 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Originally written by Julian Elischer (julian@tfs.com)
 * for TRW Financial Systems for use under the MACH(2.5) operating system.
 *
 * TRW Financial Systems, in accordance with their agreement with Carnegie
 * Mellon University, makes this software available to CMU to distribute
 * or use in any manner that they see fit as long as this message is kept with
 * the software. For this reason TFS also grants any other persons or
 * organisations permission to use or modify this software.
 *
 * TFS supplies this software to be publicly redistributed
 * on the understanding that TFS is not responsible for the correct
 * functioning of this software in any circumstances.
 *
 * Ported to run under 386BSD by Julian Elischer (julian@tfs.com) Sept 1992
 * major changes by Julian Elischer (julian@jules.dialix.oz.au) May 1993
 *
 * A lot of rewhacking done by mjacob (mjacob@nas.nasa.gov).
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: st.c,v 1.243 2022/02/23 21:54:41 andvar Exp $");

#ifdef _KERNEL_OPT
#include "opt_scsi.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/fcntl.h>
#include <sys/errno.h>
#include <sys/ioctl.h>
#include <sys/malloc.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/proc.h>
#include <sys/mtio.h>
#include <sys/device.h>
#include <sys/conf.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/iostat.h>
#include <sys/sysctl.h>

#include <dev/scsipi/scsi_spc.h>
#include <dev/scsipi/scsipi_all.h>
#include <dev/scsipi/scsi_all.h>
#include <dev/scsipi/scsi_tape.h>
#include <dev/scsipi/scsipiconf.h>
#include <dev/scsipi/scsipi_base.h>
#include <dev/scsipi/stvar.h>

/* Defines for device specific stuff */
#define DEF_FIXED_BSIZE  512

#define STMODE(z)        ( minor(z)       & 0x03)
#define STDSTY(z)        ((minor(z) >> 2) & 0x03)
#define STUNIT(z)        ((minor(z) >> 4)       )
#define STNMINOR        16

#define NORMAL_MODE        0
#define NOREW_MODE        1
#define EJECT_MODE        2
#define CTRL_MODE        3

#ifndef                ST_MOUNT_DELAY
#define                ST_MOUNT_DELAY                0
#endif

static dev_type_open(stopen);
static dev_type_close(stclose);
static dev_type_read(stread);
static dev_type_write(stwrite);
static dev_type_ioctl(stioctl);
static dev_type_strategy(ststrategy);
static dev_type_dump(stdump);

const struct bdevsw st_bdevsw = {
        .d_open = stopen,
        .d_close = stclose,
        .d_strategy = ststrategy,
        .d_ioctl = stioctl,
        .d_dump = stdump,
        .d_psize = nosize,
        .d_discard = nodiscard,
        .d_flag = D_TAPE | D_MPSAFE
};

const struct cdevsw st_cdevsw = {
        .d_open = stopen,
        .d_close = stclose,
        .d_read = stread,
        .d_write = stwrite,
        .d_ioctl = stioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_TAPE | D_MPSAFE
};

/*
 * Define various devices that we know mis-behave in some way,
 * and note how they are bad, so we can correct for them
 */

static const struct st_quirk_inquiry_pattern st_quirk_patterns[] = {
        {{T_SEQUENTIAL, T_REMOV,
         "        ", "                ", "    "}, {0, 0, {
                {ST_Q_FORCE_BLKSIZE, 512, 0},                /* minor 0-3 */
                {ST_Q_FORCE_BLKSIZE, 512, QIC_24},        /* minor 4-7 */
                {ST_Q_FORCE_BLKSIZE, 0, HALFINCH_1600},        /* minor 8-11 */
                {ST_Q_FORCE_BLKSIZE, 0, HALFINCH_6250}        /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "TANDBERG", " TDC 3600       ", ""},     {0, 12, {
                {0, 0, 0},                                /* minor 0-3 */
                {ST_Q_FORCE_BLKSIZE, 0, QIC_525},        /* minor 4-7 */
                {0, 0, QIC_150},                        /* minor 8-11 */
                {0, 0, QIC_120}                                /* minor 12-15 */
        }}},
         {{T_SEQUENTIAL, T_REMOV,
          "TANDBERG", " TDC 3800       ", ""},     {0, 0, {
                {ST_Q_FORCE_BLKSIZE, 512, 0},                /* minor 0-3 */
                {0, 0, QIC_525},                        /* minor 4-7 */
                {0, 0, QIC_150},                        /* minor 8-11 */
                {0, 0, QIC_120}                                /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
          "TANDBERG", " SLR5 4/8GB     ", ""},     {0, 0, {
                {ST_Q_FORCE_BLKSIZE, 1024, 0},                /* minor 0-3 */
                {0, 0, 0},                                /* minor 4-7 */
                {0, 0, 0},                                /* minor 8-11 */
                {0, 0, 0}                                /* minor 12-15 */
        }}},
        /*
         * lacking a manual for the 4200, it's not clear what the
         * specific density codes should be- the device is a 2.5GB
         * capable QIC drive, those density codes aren't readily
         * available. The 'default' will just have to do.
         */
         {{T_SEQUENTIAL, T_REMOV,
          "TANDBERG", " TDC 4200       ", ""},     {0, 0, {
                {ST_Q_FORCE_BLKSIZE, 512, 0},                /* minor 0-3 */
                {0, 0, QIC_525},                        /* minor 4-7 */
                {0, 0, QIC_150},                        /* minor 8-11 */
                {0, 0, QIC_120}                                /* minor 12-15 */
        }}},
        /*
         * At least -005 and -007 need this.  I'll assume they all do unless I
         * hear otherwise.  - mycroft, 31MAR1994
         */
        {{T_SEQUENTIAL, T_REMOV,
         "ARCHIVE ", "VIPER 2525 25462", ""},     {0, 0, {
                {ST_Q_SENSE_HELP, 0, 0},                /* minor 0-3 */
                {ST_Q_SENSE_HELP, 0, QIC_525},                /* minor 4-7 */
                {0, 0, QIC_150},                        /* minor 8-11 */
                {0, 0, QIC_120}                                /* minor 12-15 */
        }}},
        /*
         * One user reports that this works for his tape drive.  It probably
         * needs more work.  - mycroft, 09APR1994
         */
        {{T_SEQUENTIAL, T_REMOV,
         "SANKYO  ", "CP525           ", ""},    {0, 0, {
                {ST_Q_FORCE_BLKSIZE, 512, 0},                /* minor 0-3 */
                {ST_Q_FORCE_BLKSIZE, 512, QIC_525},        /* minor 4-7 */
                {0, 0, QIC_150},                        /* minor 8-11 */
                {0, 0, QIC_120}                                /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "ANRITSU ", "DMT780          ", ""},     {0, 0, {
                {ST_Q_FORCE_BLKSIZE, 512, 0},                /* minor 0-3 */
                {ST_Q_FORCE_BLKSIZE, 512, QIC_525},        /* minor 4-7 */
                {0, 0, QIC_150},                        /* minor 8-11 */
                {0, 0, QIC_120}                                /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "ARCHIVE ", "VIPER 150  21247", ""},     {ST_Q_ERASE_NOIMM, 12, {
                {ST_Q_SENSE_HELP, 0, 0},                /* minor 0-3 */
                {0, 0, QIC_150},                        /* minor 4-7 */
                {0, 0, QIC_120},                        /* minor 8-11 */
                {0, 0, QIC_24}                                /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "ARCHIVE ", "VIPER 150  21531", ""},     {ST_Q_ERASE_NOIMM, 12, {
                {ST_Q_SENSE_HELP, 0, 0},                /* minor 0-3 */
                {0, 0, QIC_150},                        /* minor 4-7 */
                {0, 0, QIC_120},                        /* minor 8-11 */
                {0, 0, QIC_24}                                /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "WANGTEK ", "5099ES SCSI", ""},          {0, 0, {
                {ST_Q_FORCE_BLKSIZE, 512, 0},                /* minor 0-3 */
                {0, 0, QIC_11},                                /* minor 4-7 */
                {0, 0, QIC_24},                                /* minor 8-11 */
                {0, 0, QIC_24}                                /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "WANGTEK ", "5150ES SCSI", ""},          {0, 0, {
                {ST_Q_FORCE_BLKSIZE, 512, 0},                /* minor 0-3 */
                {0, 0, QIC_24},                                /* minor 4-7 */
                {0, 0, QIC_120},                        /* minor 8-11 */
                {0, 0, QIC_150}                                /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "WANGTEK ", "5525ES SCSI REV7", ""},     {0, 0, {
                {0, 0, 0},                                /* minor 0-3 */
                {ST_Q_BLKSIZE, 0, QIC_525},                /* minor 4-7 */
                {0, 0, QIC_150},                        /* minor 8-11 */
                {0, 0, QIC_120}                                /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "WangDAT ", "Model 1300      ", ""},     {0, 0, {
                {0, 0, 0},                                /* minor 0-3 */
                {ST_Q_FORCE_BLKSIZE, 512, DDS},                /* minor 4-7 */
                {ST_Q_FORCE_BLKSIZE, 1024, DDS},        /* minor 8-11 */
                {ST_Q_FORCE_BLKSIZE, 0, DDS}                /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "EXABYTE ", "EXB-8200        ", "263H"}, {0, 5, {
                {0, 0, 0},                                /* minor 0-3 */
                {0, 0, 0},                                /* minor 4-7 */
                {0, 0, 0},                                /* minor 8-11 */
                {0, 0, 0}                                /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "STK",      "9490",             ""},
                                {ST_Q_FORCE_BLKSIZE, 0, {
                {0, 0, 0},                                /* minor 0-3 */
                {0, 0, 0},                                /* minor 4-7 */
                {0, 0, 0},                                /* minor 8-11 */
                {0, 0, 0}                                /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "STK",      "SD-3",             ""},
                                {ST_Q_FORCE_BLKSIZE, 0, {
                {0, 0, 0},                                /* minor 0-3 */
                {0, 0, 0},                                /* minor 4-7 */
                {0, 0, 0},                                /* minor 8-11 */
                {0, 0, 0}                                /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "IBM",      "03590",            ""},     {ST_Q_IGNORE_LOADS, 0, {
                {0, 0, 0},                                /* minor 0-3 */
                {0, 0, 0},                                /* minor 4-7 */
                {0, 0, 0},                                /* minor 8-11 */
                {0, 0, 0}                                /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "HP      ", "T4000s          ", ""},     {ST_Q_UNIMODAL, 0, {
                {0, 0, QIC_3095},                        /* minor 0-3 */
                {0, 0, QIC_3095},                        /* minor 4-7 */
                {0, 0, QIC_3095},                        /* minor 8-11 */
                {0, 0, QIC_3095},                        /* minor 12-15 */
        }}},
#if 0
        {{T_SEQUENTIAL, T_REMOV,
         "EXABYTE ", "EXB-8200        ", ""},     {0, 12, {
                {0, 0, 0},                                /* minor 0-3 */
                {0, 0, 0},                                /* minor 4-7 */
                {0, 0, 0},                                /* minor 8-11 */
                {0, 0, 0}                                /* minor 12-15 */
        }}},
#endif
        {{T_SEQUENTIAL, T_REMOV,
         "TEAC    ", "MT-2ST/N50      ", ""},     {ST_Q_IGNORE_LOADS, 0, {
                {0, 0, 0},                                /* minor 0-3 */
                {0, 0, 0},                                /* minor 4-7 */
                {0, 0, 0},                                /* minor 8-11 */
                {0, 0, 0}                                /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "OnStream", "ADR50 Drive", ""},          {ST_Q_UNIMODAL, 0, {
                {ST_Q_FORCE_BLKSIZE, 512, 0},                /* minor 0-3 */
                {ST_Q_FORCE_BLKSIZE, 512, 0},                /* minor 4-7 */
                {ST_Q_FORCE_BLKSIZE, 512, 0},                /* minor 8-11 */
                {ST_Q_FORCE_BLKSIZE, 512, 0},                /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "OnStream DI-30",      "",   "1.0"},  {ST_Q_NOFILEMARKS, 0, {
                {0, 0, 0},                              /* minor 0-3 */
                {0, 0, 0},                              /* minor 4-7 */
                {0, 0, 0},                              /* minor 8-11 */
                {0, 0, 0}                               /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "NCR H621", "0-STD-03-46F880 ", ""},     {ST_Q_NOPREVENT, 0, {
                {0, 0, 0},                               /* minor 0-3 */
                {0, 0, 0},                               /* minor 4-7 */
                {0, 0, 0},                               /* minor 8-11 */
                {0, 0, 0}                               /* minor 12-15 */
        }}},
        {{T_SEQUENTIAL, T_REMOV,
         "Seagate STT3401A", "hp0atxa", ""},        {0, 0, {
                {ST_Q_FORCE_BLKSIZE, 512, 0},                /* minor 0-3 */
                {ST_Q_FORCE_BLKSIZE, 1024, 0},                /* minor 4-7 */
                {ST_Q_FORCE_BLKSIZE, 512, 0},                /* minor 8-11 */
                {ST_Q_FORCE_BLKSIZE, 512, 0}                /* minor 12-15 */
        }}},
};

#define NOEJECT 0
#define EJECT 1

static void        st_identify_drive(struct st_softc *,
                    struct scsipi_inquiry_pattern *);
static void        st_loadquirks(struct st_softc *);
static int        st_mount_tape(dev_t, int);
static void        st_unmount(struct st_softc *, boolean);
static int        st_decide_mode(struct st_softc *, boolean);
static void        ststart(struct scsipi_periph *);
static int        ststart1(struct scsipi_periph *, struct buf *, int *);
static void        strestart(void *);
static void        stdone(struct scsipi_xfer *, int);
static int        st_read(struct st_softc *, char *, int, int);
static int        st_space(struct st_softc *, int, u_int, int);
static int        st_write_filemarks(struct st_softc *, int, int);
static int        st_check_eod(struct st_softc *, boolean, int *, int);
static int        st_load(struct st_softc *, u_int, int);
static int        st_rewind(struct st_softc *, u_int, int);
static int        st_interpret_sense(struct scsipi_xfer *);
static int        st_touch_tape(struct st_softc *);
static int        st_erase(struct st_softc *, int full, int flags);
static void     st_updatefilepos(struct st_softc *);
static int        st_rdpos(struct st_softc *, int, uint32_t *);
static int        st_setpos(struct st_softc *, int, uint32_t *);

static const struct scsipi_periphsw st_switch = {
        st_interpret_sense,
        ststart,
        NULL,
        stdone
};

#if defined(ST_ENABLE_EARLYWARN)
#define        ST_INIT_FLAGS        ST_EARLYWARN
#else
#define        ST_INIT_FLAGS        0
#endif

/*
 * The routine called by the low level scsi routine when it discovers
 * A device suitable for this driver
 */
void
stattach(device_t parent, device_t self, void *aux)
{
        struct st_softc *st = device_private(self);
        struct scsipibus_attach_args *sa = aux;
        struct scsipi_periph *periph = sa->sa_periph;

        SC_DEBUG(periph, SCSIPI_DB2, ("stattach: "));
        st->sc_dev = self;

        /* Store information needed to contact our base driver */
        st->sc_periph = periph;
        periph->periph_dev = st->sc_dev;
        periph->periph_switch = &st_switch;

        /* Set initial flags  */
        st->flags = ST_INIT_FLAGS;

        /* Set up the buf queues for this device */
        bufq_alloc(&st->buf_queue, "fcfs", 0);
        bufq_alloc(&st->buf_defer, "fcfs", 0);
        callout_init(&st->sc_callout, 0);
        mutex_init(&st->sc_iolock, MUTEX_DEFAULT, IPL_VM);

        /*
         * Check if the drive is a known criminal and take
         * Any steps needed to bring it into line
         */
        st_identify_drive(st, &sa->sa_inqbuf);
        aprint_naive("\n");
        aprint_normal("\n");
        /* Use the subdriver to request information regarding the drive.  */
        aprint_normal_dev(self, "%s", st->quirkdata ? "quirks apply, " : "");
        if (scsipi_test_unit_ready(periph,
            XS_CTL_DISCOVERY | XS_CTL_SILENT | XS_CTL_IGNORE_MEDIA_CHANGE) ||
            st->ops(st, ST_OPS_MODESENSE,
            XS_CTL_DISCOVERY | XS_CTL_SILENT | XS_CTL_IGNORE_MEDIA_CHANGE))
                aprint_normal("drive empty\n");
        else {
                aprint_normal("density code %d, ", st->media_density);
                if (st->media_blksize > 0)
                        aprint_normal("%d-byte", st->media_blksize);
                else
                        aprint_normal("variable");
                aprint_normal(" blocks, write-%s\n",
                    (st->flags & ST_READONLY) ? "protected" : "enabled");
        }

        st->stats = iostat_alloc(IOSTAT_TAPE, parent,
            device_xname(st->sc_dev));

        rnd_attach_source(&st->rnd_source, device_xname(st->sc_dev),
            RND_TYPE_TAPE, RND_FLAG_DEFAULT);
}

int
stdetach(device_t self, int flags)
{
        struct st_softc *st = device_private(self);
        struct scsipi_periph *periph = st->sc_periph;
        struct scsipi_channel *chan = periph->periph_channel;
        int bmaj, cmaj, mn;

        /* locate the major number */
        bmaj = bdevsw_lookup_major(&st_bdevsw);
        cmaj = cdevsw_lookup_major(&st_cdevsw);

        /* kill any pending restart */
        callout_halt(&st->sc_callout, NULL);

        mutex_enter(chan_mtx(chan));

        /* Kill off any queued buffers. */
        bufq_drain(st->buf_defer);
        bufq_drain(st->buf_queue);

        /* Kill off any pending commands. */
        scsipi_kill_pending(st->sc_periph);

        mutex_exit(chan_mtx(chan));

        bufq_free(st->buf_defer);
        bufq_free(st->buf_queue);
        mutex_destroy(&st->sc_iolock);

        /* Nuke the vnodes for any open instances */
        mn = STUNIT(device_unit(self));
        vdevgone(bmaj, mn, mn+STNMINOR-1, VBLK);
        vdevgone(cmaj, mn, mn+STNMINOR-1, VCHR);

        iostat_free(st->stats);

        /* Unhook the entropy source. */
        rnd_detach_source(&st->rnd_source);

        return 0;
}

/*
 * Use the inquiry routine in 'scsi_base' to get drive info so we can
 * Further tailor our behaviour.
 */
static void
st_identify_drive(struct st_softc *st, struct scsipi_inquiry_pattern *inqbuf)
{
        const struct st_quirk_inquiry_pattern *finger;
        int priority;

        finger = scsipi_inqmatch(inqbuf,
            st_quirk_patterns,
            sizeof(st_quirk_patterns) / sizeof(st_quirk_patterns[0]),
            sizeof(st_quirk_patterns[0]), &priority);
        if (priority != 0) {
                st->quirkdata = &finger->quirkdata;
                st->drive_quirks = finger->quirkdata.quirks;
                st->quirks = finger->quirkdata.quirks;        /* start value */
                st->page_0_size = finger->quirkdata.page_0_size;
                KASSERT(st->page_0_size <= MAX_PAGE_0_SIZE);
                st_loadquirks(st);
        }
}

/*
 * initialise the subdevices to the default (QUIRK) state.
 * this will remove any setting made by the system operator or previous
 * operations.
 */
static void
st_loadquirks(struct st_softc *st)
{
        const struct        modes *mode;
        struct        modes *mode2;
        int i;

        mode = st->quirkdata->modes;
        mode2 = st->modes;
        for (i = 0; i < 4; i++) {
                memset(mode2, 0, sizeof(struct modes));
                st->modeflags[i] &= ~(BLKSIZE_SET_BY_QUIRK |
                    DENSITY_SET_BY_QUIRK | BLKSIZE_SET_BY_USER |
                    DENSITY_SET_BY_USER);
                if ((mode->quirks | st->drive_quirks) & ST_Q_FORCE_BLKSIZE) {
                        mode2->blksize = mode->blksize;
                        st->modeflags[i] |= BLKSIZE_SET_BY_QUIRK;
                }
                if (mode->density) {
                        mode2->density = mode->density;
                        st->modeflags[i] |= DENSITY_SET_BY_QUIRK;
                }
                mode2->quirks |= mode->quirks;
                mode++;
                mode2++;
        }
}

/* open the device. */
static int
stopen(dev_t dev, int flags, int mode, struct lwp *l)
{
        u_int stmode, dsty;
        int error, sflags, unit, tries, ntries;
        struct st_softc *st;
        struct scsipi_periph *periph;
        struct scsipi_adapter *adapt;

        unit = STUNIT(dev);
        st = device_lookup_private(&st_cd, unit);
        if (st == NULL)
                return ENXIO;

        stmode = STMODE(dev);
        dsty = STDSTY(dev);

        periph = st->sc_periph;
        adapt = periph->periph_channel->chan_adapter;

        SC_DEBUG(periph, SCSIPI_DB1,
            ("open: dev=0x%"PRIx64" (unit %d (of %d))\n", dev, unit,
            st_cd.cd_ndevs));

        /* Only allow one at a time */
        if (periph->periph_flags & PERIPH_OPEN) {
                aprint_error_dev(st->sc_dev, "already open\n");
                return EBUSY;
        }

        if ((error = scsipi_adapter_addref(adapt)) != 0)
                return error;

        /* clear any latched errors. */
        st->mt_resid = 0;
        st->mt_erreg = 0;
        st->asc = 0;
        st->ascq = 0;

        /*
         * Catch any unit attention errors. Be silent about this
         * unless we're already mounted. We ignore media change
         * if we're in control mode or not mounted yet.
         */
        if ((st->flags & ST_MOUNTED) == 0 || stmode == CTRL_MODE) {
#ifdef SCSIDEBUG
                sflags = XS_CTL_IGNORE_MEDIA_CHANGE;
#else
                sflags = XS_CTL_SILENT|XS_CTL_IGNORE_MEDIA_CHANGE;
#endif
        } else
                sflags = 0;

        /*
         * If we're already mounted or we aren't configured for
         * a mount delay, only try a test unit ready once. Otherwise,
         * try up to ST_MOUNT_DELAY times with a rest interval of
         * one second between each try.
         */
        if ((st->flags & ST_MOUNTED) || ST_MOUNT_DELAY == 0)
                ntries = 1;
        else
                ntries = ST_MOUNT_DELAY;

        for (error = tries = 0; tries < ntries; tries++) {
                int slpintr, oflags;

                /*
                 * If we had no error, or we're opening the control mode
                 * device, we jump out right away.
                 */
                error = scsipi_test_unit_ready(periph, sflags);
                if (error == 0 || stmode == CTRL_MODE)
                        break;

                /*
                 * We had an error.
                 *
                 * If we're already mounted or we aren't configured for
                 * a mount delay, or the error isn't a NOT READY error,
                 * skip to the error exit now.
                 */
                if ((st->flags & ST_MOUNTED) || ST_MOUNT_DELAY == 0 ||
                    (st->mt_key != SKEY_NOT_READY)) {
                        device_printf(st->sc_dev,
                                      "mount error (sense key=%d) - "
                                      "terminating mount session\n",
                                      st->mt_key);
                        /*
                         * the following should not trigger unless
                         * something serious happened while the device
                         * was open (PREVENT MEDIUM REMOVAL in effect)
                         */
                        if (st->flags & ST_WRITTEN &&
                            st->mt_key == SKEY_UNIT_ATTENTION) {
                                /*
                                 * device / media state may have changed
                                 * refrain from writing missing file marks
                                 * onto potentially newly inserted/formatted
                                 * media (e. g. emergency EJECT/RESET/etc.)
                                 */
                                st->flags &= ~(ST_WRITTEN|ST_FM_WRITTEN);

                                device_printf(st->sc_dev,
                                    "CAUTION: file marks/data may be missing"
                                    " - ASC = 0x%02x, ASCQ = 0x%02x\n",
                                              st->asc, st->ascq);
                        }
                        goto bad;
                }

                /* clear any latched errors. */
                st->mt_resid = 0;
                st->mt_erreg = 0;
                st->asc = 0;
                st->ascq = 0;

                /*
                 * Fake that we have the device open so
                 * we block other apps from getting in.
                 */
                oflags = periph->periph_flags;
                periph->periph_flags |= PERIPH_OPEN;

                slpintr = kpause("stload", true, hz, NULL);

                periph->periph_flags = oflags;        /* restore flags */
                if (slpintr != 0 && slpintr != EWOULDBLOCK) {
                        device_printf(st->sc_dev, "load interrupted\n");
                        goto bad;
                }
        }

        /*
         * If the mode is 3 (e.g. minor = 3,7,11,15) then the device has
         * been opened to set defaults and perform other, usually non-I/O
         * related, operations. In this case, do a quick check to see
         * whether the unit actually had a tape loaded (this will be known
         * as to whether or not we got a NOT READY for the above
         * unit attention). If a tape is there, go do a mount sequence.
         */
        if (stmode == CTRL_MODE &&
            st->mt_key != SKEY_NO_SENSE &&
            st->mt_key != SKEY_UNIT_ATTENTION) {
                periph->periph_flags |= PERIPH_OPEN;
                return 0;
        }

        /*
         * If we get this far and had an error set, that means we failed
         * to pass the 'test unit ready' test for the non-controlmode device,
         * so we bounce the open.
         */
        if (error)
                return error;

        /* Else, we're now committed to saying we're open. */
        periph->periph_flags |= PERIPH_OPEN; /* unit attn are now errors */

        /*
         * If it's a different mode, or if the media has been
         * invalidated, unmount the tape from the previous
         * session but continue with open processing
         */
        if (st->last_dsty != dsty ||
            (periph->periph_flags & PERIPH_MEDIA_LOADED) == 0)
                st_unmount(st, NOEJECT);

        /*
         * If we are not mounted, then we should start a new
         * mount session.
         */
        if (!(st->flags & ST_MOUNTED)) {
                if ((error = st_mount_tape(dev, flags)) != 0)
                        goto bad;
                st->last_dsty = dsty;
        }
        if (!(st->quirks & ST_Q_NOPREVENT)) {
                scsipi_prevent(periph, SPAMR_PREVENT_DT,
                    XS_CTL_IGNORE_ILLEGAL_REQUEST | XS_CTL_IGNORE_NOT_READY);
        }

        SC_DEBUG(periph, SCSIPI_DB2, ("open complete\n"));
        return 0;

bad:
        st_unmount(st, NOEJECT);
        scsipi_adapter_delref(adapt);
        periph->periph_flags &= ~PERIPH_OPEN;
        return error;
}

static int
stclose(dev_t dev, int flags, int mode, struct lwp *l)
{
        int stxx, error = 0;
        struct st_softc *st = device_lookup_private(&st_cd, STUNIT(dev));
        struct scsipi_periph *periph = st->sc_periph;
        struct scsipi_adapter *adapt = periph->periph_channel->chan_adapter;

        SC_DEBUG(st->sc_periph, SCSIPI_DB1, ("closing\n"));

        /*
         * Make sure that a tape opened in write-only mode will have
         * file marks written on it when closed, even if not written to.
         *
         * This is for SUN compatibility. Actually, the Sun way of
         * things is to:
         *
         *        only write filemarks if there are fmks to be written and
         *                   - open for write (possibly read/write)
         *                - the last operation was a write
         *         or:
         *                - opened for wronly
         *                - no data was written (including filemarks)
         */

        stxx = st->flags & (ST_WRITTEN | ST_FM_WRITTEN);
        if ((flags & FWRITE) != 0) {
                int nm = 0;
#ifdef ST_SUNCOMPAT
                /*
                 * on request only
                 * original compat code has not been working
                 * since ~1998
                 */
                if ((flags & O_ACCMODE) == FWRITE && (stxx == 0)) {
                        st->flags |= ST_WRITTEN;
                        SC_DEBUG(st->sc_periph, SCSIPI_DB3,
                                 ("SUN compatibility: write FM(s) at close\n"));
                }
#endif
                error = st_check_eod(st, FALSE, &nm, 0);
                SC_DEBUG(st->sc_periph, SCSIPI_DB3,
                         ("wrote %d FM(s) at close error=%d\n", nm, error));
        }

        /* Allow robots to eject tape if needed.  */
        if (!(st->quirks & ST_Q_NOPREVENT)) {
                scsipi_prevent(periph, SPAMR_ALLOW,
                    XS_CTL_IGNORE_ILLEGAL_REQUEST | XS_CTL_IGNORE_NOT_READY);
        }

        switch (STMODE(dev)) {
        case NORMAL_MODE:
                st_unmount(st, NOEJECT);
                break;
        case NOREW_MODE:
        case CTRL_MODE:
                /*
                 * Leave mounted unless media seems to have been removed.
                 *
                 * Otherwise, if we're to terminate a tape with more than one
                 * filemark [ and because we're not rewinding here ], backspace
                 * one filemark so that later appends will see an unbroken
                 * sequence of:
                 *
                 *        file - FMK - file - FMK ... file - FMK FMK (EOM)
                 */
                if ((periph->periph_flags & PERIPH_MEDIA_LOADED) == 0) {
                        st_unmount(st, NOEJECT);
                } else if (error == 0) {
                        /*
                         * ST_WRITTEN was preserved from above.
                         *
                         * All we need to know here is:
                         *
                         *        Were we writing this tape and was the last
                         *        operation a write?
                         *
                         *        Are there supposed to be 2FM at EOD?
                         *
                         * If both statements are true, then we backspace
                         * one filemark.
                         */
                        stxx &= ~ST_FM_WRITTEN;
                        stxx |= (st->flags & ST_2FM_AT_EOD);
                        if ((flags & FWRITE) != 0 &&
                            (stxx == (ST_2FM_AT_EOD|ST_WRITTEN))) {
                                error = st_space(st, -1, SP_FILEMARKS, 0);
                                SC_DEBUG(st->sc_periph, SCSIPI_DB3, ("st_space(-1) error=%d\n", error));
                        } else {
                                SC_DEBUG(st->sc_periph, SCSIPI_DB3, ("no backspacing - flags = 0x%x, stxx=0x%x, st->flags=0x%x\n", flags, stxx, st->flags));
                        }
                } else {
                        SC_DEBUG(st->sc_periph, SCSIPI_DB3, ("error %d from st_check_eod\n", error));
                }
        
                break;
        case EJECT_MODE:
                st_unmount(st, EJECT);
                break;
        }

        KASSERTMSG((st->flags & ST_WRITTEN) == 0,
                   "pending ST_WRITTEN flag NOT cleared (flags=0x%x)", st->flags);
        
        scsipi_wait_drain(periph);

        scsipi_adapter_delref(adapt);
        periph->periph_flags &= ~PERIPH_OPEN;

        return error;
}

/*
 * Start a new mount session.
 * Copy in all the default parameters from the selected device mode.
 * and try guess any that seem to be defaulted.
 */
static int
st_mount_tape(dev_t dev, int flags)
{
        int unit;
        u_int dsty;
        struct st_softc *st;
        struct scsipi_periph *periph;
        int error = 0;

        unit = STUNIT(dev);
        dsty = STDSTY(dev);
        st = device_lookup_private(&st_cd, unit);
        periph = st->sc_periph;

        if (st->flags & ST_MOUNTED)
                return 0;

        SC_DEBUG(periph, SCSIPI_DB1, ("mounting\n "));
        st->flags |= ST_NEW_MOUNT;
        st->quirks = st->drive_quirks | st->modes[dsty].quirks;
        /*
         * If the media is new, then make sure we give it a chance to
         * to do a 'load' instruction.  (We assume it is new.)
         */
        if ((error = st_load(st, LD_LOAD, XS_CTL_SILENT)) != 0)
                return error;
        /*
         * Throw another dummy instruction to catch
         * 'Unit attention' errors. Many drives give
         * these after doing a Load instruction (with
         * the MEDIUM MAY HAVE CHANGED asc/ascq).
         */
        scsipi_test_unit_ready(periph, XS_CTL_SILENT);        /* XXX */

        /*
         * Some devices can't tell you much until they have been
         * asked to look at the media. This quirk does this.
         */
        if (st->quirks & ST_Q_SENSE_HELP)
                if ((error = st_touch_tape(st)) != 0)
                        return error;
        /*
         * Load the physical device parameters
         * loads: blkmin, blkmax
         */
        if ((error = st->ops(st, ST_OPS_RBL, 0)) != 0)
                return error;
        /*
         * Load the media dependent parameters
         * includes: media_blksize,media_density,numblks
         * As we have a tape in, it should be reflected here.
         * If not you may need the "quirk" above.
         */
        if ((error = st->ops(st, ST_OPS_MODESENSE, 0)) != 0)
                return error;
        /*
         * If we have gained a permanent density from somewhere,
         * then use it in preference to the one supplied by
         * default by the driver.
         */
        if (st->modeflags[dsty] & (DENSITY_SET_BY_QUIRK | DENSITY_SET_BY_USER))
                st->density = st->modes[dsty].density;
        else
                st->density = st->media_density;
        /*
         * If we have gained a permanent blocksize
         * then use it in preference to the one supplied by
         * default by the driver.
         */
        st->flags &= ~ST_FIXEDBLOCKS;
        if (st->modeflags[dsty] &
            (BLKSIZE_SET_BY_QUIRK | BLKSIZE_SET_BY_USER)) {
                st->blksize = st->modes[dsty].blksize;
                if (st->blksize)
                        st->flags |= ST_FIXEDBLOCKS;
        } else {
                if ((error = st_decide_mode(st, FALSE)) != 0)
                        return error;
        }
        if ((error = st->ops(st, ST_OPS_MODESELECT, 0)) != 0) {
                /* ATAPI will return ENODEV for this, and this may be OK */
                if (error != ENODEV) {
                        aprint_error_dev(st->sc_dev,
                            "cannot set selected mode\n");
                        return error;
                }
        }
        st->flags &= ~ST_NEW_MOUNT;
        st->flags |= ST_MOUNTED;
        periph->periph_flags |= PERIPH_MEDIA_LOADED;        /* move earlier? */
        st->blkno = st->fileno = (daddr_t) 0;
        return 0;
}

/*
 * End the present mount session.
 * Rewind, and optionally eject the tape.
 * Reset various flags to indicate that all new
 * operations require another mount operation
 */
static void
st_unmount(struct st_softc *st, boolean eject)
{
        struct scsipi_periph *periph = st->sc_periph;
        int nmarks;

        if ((st->flags & ST_MOUNTED) == 0)
                return;
        SC_DEBUG(periph, SCSIPI_DB1, ("unmounting\n"));
        st_check_eod(st, FALSE, &nmarks, XS_CTL_IGNORE_NOT_READY);
        st_rewind(st, 0, XS_CTL_IGNORE_NOT_READY);

        /*
         * Section 9.3.3 of the SCSI specs states that a device shall return
         * the density value specified in the last successful MODE SELECT
         * after an unload operation, in case it is not able to
         * automatically determine the density of the new medium.
         *
         * So we instruct the device to use the default density, which will
         * prevent the use of stale density values (in particular,
         * in st_touch_tape().
         */
        st->density = 0;
        if (st->ops(st, ST_OPS_MODESELECT, 0) != 0) {
                aprint_error_dev(st->sc_dev,
                    "WARNING: cannot revert to default density\n");
        }

        if (eject) {
                if (!(st->quirks & ST_Q_NOPREVENT)) {
                        scsipi_prevent(periph, SPAMR_ALLOW,
                            XS_CTL_IGNORE_ILLEGAL_REQUEST |
                            XS_CTL_IGNORE_NOT_READY);
                }
                st_load(st, LD_UNLOAD, XS_CTL_IGNORE_NOT_READY);
                st->blkno = st->fileno = (daddr_t) -1;
        } else {
                st->blkno = st->fileno = (daddr_t) 0;
        }
        st->flags &= ~(ST_MOUNTED | ST_NEW_MOUNT);
        periph->periph_flags &= ~PERIPH_MEDIA_LOADED;
}

/*
 * Given all we know about the device, media, mode, 'quirks' and
 * initial operation, make a decision as to how we should be set
 * to run (regarding blocking and EOD marks)
 */
int
st_decide_mode(struct st_softc *st, boolean first_read)
{

        SC_DEBUG(st->sc_periph, SCSIPI_DB2, ("starting block mode decision\n"));

        /*
         * If the drive can only handle fixed-length blocks and only at
         * one size, perhaps we should just do that.
         */
        if (st->blkmin && (st->blkmin == st->blkmax)) {
                st->flags |= ST_FIXEDBLOCKS;
                st->blksize = st->blkmin;
                SC_DEBUG(st->sc_periph, SCSIPI_DB3,
                    ("blkmin == blkmax of %d\n", st->blkmin));
                goto done;
        }
        /*
         * If the tape density mandates (or even suggests) use of fixed
         * or variable-length blocks, comply.
         */
        switch (st->density) {
        case HALFINCH_800:
        case HALFINCH_1600:
        case HALFINCH_6250:
        case DDS:
                st->flags &= ~ST_FIXEDBLOCKS;
                st->blksize = 0;
                SC_DEBUG(st->sc_periph, SCSIPI_DB3,
                    ("density specified variable\n"));
                goto done;
        case QIC_11:
        case QIC_24:
        case QIC_120:
        case QIC_150:
        case QIC_525:
        case QIC_1320:
        case QIC_3095:
        case QIC_3220:
                st->flags |= ST_FIXEDBLOCKS;
                if (st->media_blksize > 0)
                        st->blksize = st->media_blksize;
                else
                        st->blksize = DEF_FIXED_BSIZE;
                SC_DEBUG(st->sc_periph, SCSIPI_DB3,
                    ("density specified fixed\n"));
                goto done;
        }
        /*
         * If we're about to read the tape, perhaps we should choose
         * fixed or variable-length blocks and block size according to
         * what the drive found on the tape.
         */
        if (first_read &&
            (!(st->quirks & ST_Q_BLKSIZE) || (st->media_blksize == 0) ||
            (st->media_blksize == DEF_FIXED_BSIZE) ||
            (st->media_blksize == 1024))) {
                if (st->media_blksize > 0)
                        st->flags |= ST_FIXEDBLOCKS;
                else
                        st->flags &= ~ST_FIXEDBLOCKS;
                st->blksize = st->media_blksize;
                SC_DEBUG(st->sc_periph, SCSIPI_DB3,
                    ("Used media_blksize of %d\n", st->media_blksize));
                goto done;
        }
        /*
         * We're getting no hints from any direction.  Choose variable-
         * length blocks arbitrarily.
         */
        st->flags &= ~ST_FIXEDBLOCKS;
        st->blksize = 0;
        SC_DEBUG(st->sc_periph, SCSIPI_DB3,
            ("Give up and default to variable mode\n"));

done:
        /*
         * Decide whether or not to write two file marks to signify end-
         * of-data.  Make the decision as a function of density.  If
         * the decision is not to use a second file mark, the SCSI BLANK
         * CHECK condition code will be recognized as end-of-data when
         * first read.
         * (I think this should be a by-product of fixed/variable..julian)
         */
        switch (st->density) {
/*      case 8 mm:   What is the SCSI density code for 8 mm, anyway? */
        case QIC_11:
        case QIC_24:
        case QIC_120:
        case QIC_150:
        case QIC_525:
        case QIC_1320:
        case QIC_3095:
        case QIC_3220:
                st->flags &= ~ST_2FM_AT_EOD;
                break;
        default:
                st->flags |= ST_2FM_AT_EOD;
        }
        return 0;
}

/*
 * Actually translate the requested transfer into
 * one the physical driver can understand
 * The transfer is described by a buf and will include
 * only one physical transfer.
 */
static void
ststrategy(struct buf *bp)
{
        struct st_softc *st = device_lookup_private(&st_cd, STUNIT(bp->b_dev));
        struct scsipi_periph *periph = st->sc_periph;
        struct scsipi_channel *chan = periph->periph_channel;

        SC_DEBUG(periph, SCSIPI_DB1,
            ("ststrategy %d bytes @ blk %" PRId64 "\n", bp->b_bcount,
                bp->b_blkno));
        /* If it's a null transfer, return immediately */
        if (bp->b_bcount == 0)
                goto abort;

        /* If offset is negative, error */
        if (bp->b_blkno < 0) {
                SC_DEBUG(periph, SCSIPI_DB3,
                         ("EINVAL: ststrategy negative blockcount %" PRId64 "\n", bp->b_blkno));
                bp->b_error = EINVAL;
                goto abort;
        }

        /* Odd sized request on fixed drives are verboten */
        if (st->flags & ST_FIXEDBLOCKS) {
                if (bp->b_bcount % st->blksize) {
                        aprint_error_dev(st->sc_dev, "bad request, must be multiple of %d\n",
                            st->blksize);
                        bp->b_error = EIO;
                        goto abort;
                }
        }
        /* as are out-of-range requests on variable drives. */
        else if (bp->b_bcount < st->blkmin ||
            (st->blkmax && bp->b_bcount > st->blkmax)) {
                aprint_error_dev(st->sc_dev, "bad request, must be between %d and %d\n",
                    st->blkmin, st->blkmax);
                bp->b_error = EIO;
                goto abort;
        }
        mutex_enter(chan_mtx(chan));

        /*
         * Place it in the queue of activities for this tape
         * at the end (a bit silly because we only have on user..
         * (but it could fork()))
         */
        bufq_put(st->buf_queue, bp);

        /*
         * Tell the device to get going on the transfer if it's
         * not doing anything, otherwise just wait for completion
         * (All a bit silly if we're only allowing 1 open but..)
         */
        ststart(periph);

        mutex_exit(chan_mtx(chan));
        return;
abort:
        /*
         * Reset the residue because we didn't do anything,
         * and send the buffer back as done.
         */
        bp->b_resid = bp->b_bcount;
        biodone(bp);
        return;
}

/*
 * ststart looks to see if there is a buf waiting for the device
 * and that the device is not already busy. If the device is busy,
 * the request is deferred and retried on the next attempt.
 * If both are true, ststart creates a scsi command to perform
 * the transfer required.
 *
 * The transfer request will call scsipi_done on completion,
 * which will in turn call this routine again so that the next
 * queued transfer is performed. The bufs are queued by the
 * strategy routine (ststrategy)
 *
 * This routine is also called after other non-queued requests
 * have been made of the scsi driver, to ensure that the queue
 * continues to be drained.
 * ststart() is called with channel lock held
 */
static int
ststart1(struct scsipi_periph *periph, struct buf *bp, int *errnop)
{
        struct st_softc *st = device_private(periph->periph_dev);
        struct scsipi_channel *chan = periph->periph_channel;
        struct scsi_rw_tape cmd;
        struct scsipi_xfer *xs;
        int flags, error, complete = 1;

        SC_DEBUG(periph, SCSIPI_DB2, ("ststart1 "));

        mutex_enter(chan_mtx(chan));

        if (periph->periph_active >= periph->periph_openings) {
                error = EAGAIN;
                goto out;
        }

        /* if a special awaits, let it proceed first */
        if (periph->periph_flags & PERIPH_WAITING) {
                periph->periph_flags &= ~PERIPH_WAITING;
                cv_broadcast(periph_cv_periph(periph));
                error = EAGAIN;
                goto out;
        }

        /*
         * If the device has been unmounted by the user
         * then throw away all requests until done.
         */
        if (__predict_false((st->flags & ST_MOUNTED) == 0 ||
            (periph->periph_flags & PERIPH_MEDIA_LOADED) == 0)) {
                error = EIO;
                goto out;
        }

        /*
         * only FIXEDBLOCK devices have pending I/O or space operations.
         */
        if (st->flags & ST_FIXEDBLOCKS) {
                /*
                 * If we are at a filemark but have not reported it yet
                 * then we should report it now
                 */
                if (st->flags & ST_AT_FILEMARK) {
                        if ((bp->b_flags & B_READ) == B_WRITE) {
                                /*
                                 * Handling of ST_AT_FILEMARK in
                                 * st_space will fill in the right file
                                 * mark count.
                                 * Back up over filemark
                                 */
                                if (st_space(st, 0, SP_FILEMARKS, 0)) {
                                        error = EIO;
                                        goto out;
                                }
                        } else {
                                error = 0;
                                st->flags &= ~ST_AT_FILEMARK;
                                goto out;
                        }
                }
        }
        /*
         * If we are at EOM but have not reported it
         * yet then we should report it now.
         */
        if (st->flags & (ST_EOM_PENDING|ST_EIO_PENDING)) {
                error = 0;
                if (st->flags & ST_EIO_PENDING)
                        error = EIO;
                st->flags &= ~(ST_EOM_PENDING|ST_EIO_PENDING);
                goto out;
        }

        /* Fill out the scsi command */
        memset(&cmd, 0, sizeof(cmd));
        flags = XS_CTL_NOSLEEP | XS_CTL_ASYNC;
        if ((bp->b_flags & B_READ) == B_WRITE) {
                cmd.opcode = WRITE;
                st->flags &= ~ST_FM_WRITTEN;
                flags |= XS_CTL_DATA_OUT;
        } else {
                cmd.opcode = READ;
                flags |= XS_CTL_DATA_IN;
        }

        /*
         * Handle "fixed-block-mode" tape drives by using the
         * block count instead of the length.
         */
        if (st->flags & ST_FIXEDBLOCKS) {
                cmd.byte2 |= SRW_FIXED;
                _lto3b(bp->b_bcount / st->blksize, cmd.len);
        } else
                _lto3b(bp->b_bcount, cmd.len);

        /* Clear 'position updated' indicator */
        st->flags &= ~ST_POSUPDATED;

        /* go ask the adapter to do all this for us */
        xs = scsipi_make_xs_locked(periph,
            (struct scsipi_generic *)&cmd, sizeof(cmd),
            (u_char *)bp->b_data, bp->b_bcount,
            0, ST_IO_TIME, bp, flags);
        if (__predict_false(xs == NULL)) {
                /*
                 * out of memory. Keep this buffer in the queue, and
                 * retry later.
                 */
                callout_reset(&st->sc_callout, hz / 2, strestart,
                    periph);
                error = EAGAIN;
                goto out;
        }

        error = scsipi_execute_xs(xs);
        /* with a scsipi_xfer preallocated, scsipi_command can't fail */
        KASSERT(error == 0);
        if (error == 0)
                complete = 0;

out:
        mutex_exit(chan_mtx(chan));

        *errnop = error;
        return complete;
}

static void
ststart(struct scsipi_periph *periph)
{
        struct st_softc *st = device_private(periph->periph_dev);
        struct scsipi_channel *chan = periph->periph_channel;
        struct buf *bp;
        int error, complete;

        SC_DEBUG(periph, SCSIPI_DB2, ("ststart "));

        mutex_exit(chan_mtx(chan));
        mutex_enter(&st->sc_iolock);

        while ((bp = bufq_get(st->buf_defer)) != NULL
               || (bp = bufq_get(st->buf_queue)) != NULL) {

                iostat_busy(st->stats);
                mutex_exit(&st->sc_iolock);

                complete = ststart1(periph, bp, &error);

                mutex_enter(&st->sc_iolock);
                if (complete) {
                        iostat_unbusy(st->stats, 0,
                                      ((bp->b_flags & B_READ) == B_READ));
                        if (error == EAGAIN) {
                                bufq_put(st->buf_defer, bp);
                                break;
                        }
                }
                mutex_exit(&st->sc_iolock);

                if (complete) {
                        bp->b_error = error;
                        bp->b_resid = bp->b_bcount;
                        biodone(bp);
                }

                mutex_enter(&st->sc_iolock);
        }

        mutex_exit(&st->sc_iolock);
        mutex_enter(chan_mtx(chan));
}

static void
strestart(void *v)
{
        struct scsipi_periph *periph = (struct scsipi_periph *)v;
        struct scsipi_channel *chan = periph->periph_channel;

        mutex_enter(chan_mtx(chan));
        ststart((struct scsipi_periph *)v);
        mutex_exit(chan_mtx(chan));
}

static void
stdone(struct scsipi_xfer *xs, int error)
{
        struct st_softc *st = device_private(xs->xs_periph->periph_dev);
        struct buf *bp = xs->bp;

        if (bp) {
                bp->b_error = error;
                bp->b_resid = xs->resid;
                /*
                 * buggy device ? A SDLT320 can report an info
                 * field of 0x3de8000 on a Media Error/Write Error
                 * for this CBD: 0x0a 00 00 80 00 00
                 */
                if (bp->b_resid > bp->b_bcount || bp->b_resid < 0)
                        bp->b_resid = bp->b_bcount;

                mutex_enter(&st->sc_iolock);

                if ((bp->b_flags & B_READ) == B_WRITE)
                        st->flags |= ST_WRITTEN;
                else
                        st->flags &= ~ST_WRITTEN;

                iostat_unbusy(st->stats, bp->b_bcount,
                             ((bp->b_flags & B_READ) == B_READ));

                if ((st->flags & ST_POSUPDATED) == 0) {
                        if (error) {
                                st->fileno = st->blkno = -1;
                        } else if (st->blkno != -1) {
                                if (st->flags & ST_FIXEDBLOCKS)
                                        st->blkno +=
                                            (bp->b_bcount / st->blksize);
                                else
                                        st->blkno++;
                        }
                }

                mutex_exit(&st->sc_iolock);

                rnd_add_uint32(&st->rnd_source, bp->b_blkno);

                biodone(bp);
        }
}

static int
stread(dev_t dev, struct uio *uio, int iomode)
{
        struct st_softc *st = device_lookup_private(&st_cd, STUNIT(dev));

        int r = physio(ststrategy, NULL, dev, B_READ,
                       st->sc_periph->periph_channel->chan_adapter->adapt_minphys, uio);

        SC_DEBUG(st->sc_periph, SCSIPI_DB1, ("[stread: result=%d]\n", r));

        return r;
}

static int
stwrite(dev_t dev, struct uio *uio, int iomode)
{
        struct st_softc *st = device_lookup_private(&st_cd, STUNIT(dev));

        int r = physio(ststrategy, NULL, dev, B_WRITE,
            st->sc_periph->periph_channel->chan_adapter->adapt_minphys, uio);

        SC_DEBUG(st->sc_periph, SCSIPI_DB1, ("[stwrite: result=%d]\n", r));

        return r;
}

/*
 * Perform special action on behalf of the user;
 * knows about the internals of this device
 */
static int
stioctl(dev_t dev, u_long cmd, void *arg, int flag, struct lwp *l)
{
        int error = 0;
        int unit;
        int number, nmarks, dsty;
        int flags;
        struct st_softc *st;
        int hold_blksize;
        uint8_t hold_density;
        struct mtop *mt = (struct mtop *) arg;

        /* Find the device that the user is talking about */
        flags = 0;                /* give error messages, act on errors etc. */
        unit = STUNIT(dev);
        dsty = STDSTY(dev);
        st = device_lookup_private(&st_cd, unit);
        hold_blksize = st->blksize;
        hold_density = st->density;

        switch ((u_int)cmd) {
        case MTIOCGET: {
                struct mtget *g = (struct mtget *) arg;
                /*
                 * (to get the current state of READONLY)
                 */
                error = st->ops(st, ST_OPS_MODESENSE, XS_CTL_SILENT);
                if (error) {
                        /*
                         * Ignore the error if in control mode;
                         * this is mandated by st(4).
                         */
                        if (STMODE(dev) != CTRL_MODE)
                                break;
                        error = 0;
                }
                SC_DEBUG(st->sc_periph, SCSIPI_DB1, ("[ioctl: get status]\n"));
                memset(g, 0, sizeof(struct mtget));
                g->mt_type = MT_ISAR;        /* Ultrix compat *//*? */
                g->mt_blksiz = st->blksize;
                g->mt_density = st->density;
                g->mt_mblksiz[0] = st->modes[0].blksize;
                g->mt_mblksiz[1] = st->modes[1].blksize;
                g->mt_mblksiz[2] = st->modes[2].blksize;
                g->mt_mblksiz[3] = st->modes[3].blksize;
                g->mt_mdensity[0] = st->modes[0].density;
                g->mt_mdensity[1] = st->modes[1].density;
                g->mt_mdensity[2] = st->modes[2].density;
                g->mt_mdensity[3] = st->modes[3].density;
                g->mt_fileno = st->fileno;
                g->mt_blkno = st->blkno;
                if (st->flags & ST_READONLY)
                        g->mt_dsreg |= MT_DS_RDONLY;
                if (st->flags & ST_MOUNTED)
                        g->mt_dsreg |= MT_DS_MOUNTED;
                g->mt_resid = st->mt_resid;
                g->mt_erreg = st->mt_erreg;
                /*
                 * clear latched errors.
                 */
                st->mt_resid = 0;
                st->mt_erreg = 0;
                st->asc = 0;
                st->ascq = 0;
                break;
        }
        case MTIOCTOP: {
                SC_DEBUG(st->sc_periph, SCSIPI_DB1,
                    ("[ioctl: op=0x%x count=0x%x]\n", mt->mt_op,
                        mt->mt_count));

                /* compat: in U*x it is a short */
                number = mt->mt_count;
                switch ((short) (mt->mt_op)) {
                case MTWEOF:        /* write an end-of-file record */
                        error = st_write_filemarks(st, number, flags);
                        break;
                case MTBSF:        /* backward space file */
                        number = -number;
                        /* FALLTHROUGH */
                case MTFSF:        /* forward space file */
                        error = st_check_eod(st, FALSE, &nmarks, flags);
                        if (!error)
                                error = st_space(st, number - nmarks,
                                    SP_FILEMARKS, flags);
                        break;
                case MTBSR:        /* backward space record */
                        number = -number;
                        /* FALLTHROUGH */
                case MTFSR:        /* forward space record */
                        error = st_check_eod(st, true, &nmarks, flags);
                        if (!error)
                                error = st_space(st, number, SP_BLKS, flags);
                        break;
                case MTREW:        /* rewind */
                        error = st_rewind(st, 0, flags);
                        break;
                case MTOFFL:        /* rewind and put the drive offline */
                        st_unmount(st, EJECT);
                        break;
                case MTNOP:        /* no operation, sets status only */
                        break;
                case MTRETEN:        /* retension the tape */
                        error = st_load(st, LD_RETENSION, flags);
                        if (!error)
                                error = st_load(st, LD_LOAD, flags);
                        break;
                case MTEOM:        /* forward space to end of media */
                        error = st_check_eod(st, FALSE, &nmarks, flags);
                        if (!error)
                                error = st_space(st, 1, SP_EOM, flags);
                        break;
                case MTCACHE:        /* enable controller cache */
                        st->flags &= ~ST_DONTBUFFER;
                        goto try_new_value;
                case MTNOCACHE:        /* disable controller cache */
                        st->flags |= ST_DONTBUFFER;
                        goto try_new_value;
                case MTERASE:        /* erase volume */
                        error = st_erase(st, number, flags);
                        break;
                case MTSETBSIZ:        /* Set block size for device */
#ifdef        NOTYET
                        if (!(st->flags & ST_NEW_MOUNT)) {
                                uprintf("re-mount tape before changing "
                                    "blocksize");
                                error = EINVAL;
                                break;
                        }
#endif
                        if (number == 0)
                                st->flags &= ~ST_FIXEDBLOCKS;
                        else {
                                if ((st->blkmin || st->blkmax) &&
                                    (number < st->blkmin ||
                                    number > st->blkmax)) {
                                        error = EINVAL;
                                        break;
                                }
                                st->flags |= ST_FIXEDBLOCKS;
                        }
                        st->blksize = number;
                        st->flags |= ST_BLOCK_SET;        /*XXX */
                        goto try_new_value;
                case MTSETDNSTY:        /* Set density for device and mode */
                        /*
                         * Any number >= 0 and <= 0xff is legal. Numbers
                         * above 0x80 are 'vendor unique'.
                         */
                        if (number < 0 || number > 255) {
                                error = EINVAL;
                                break;
                        } else
                                st->density = number;
                        goto try_new_value;
                case MTCMPRESS:
                        error = st->ops(st, (number == 0) ?
                            ST_OPS_CMPRSS_OFF : ST_OPS_CMPRSS_ON,
                            XS_CTL_SILENT);
                        break;
                case MTEWARN:
                        if (number)
                                st->flags |= ST_EARLYWARN;
                        else
                                st->flags &= ~ST_EARLYWARN;
                        break;

                default:
                        error = EINVAL;
                }
                break;
        }
        case MTIOCIEOT:
        case MTIOCEEOT:
                break;
        case MTIOCRDSPOS:
                error = st_rdpos(st, 0, (uint32_t *)arg);
                break;
        case MTIOCRDHPOS:
                error = st_rdpos(st, 1, (uint32_t *)arg);
                break;
        case MTIOCSLOCATE:
                error = st_setpos(st, 0, (uint32_t *)arg);
                break;
        case MTIOCHLOCATE:
                error = st_setpos(st, 1, (uint32_t *)arg);
                break;
        default:
                error = scsipi_do_ioctl(st->sc_periph, dev, cmd, arg, flag, l);
                break;
        }
        return error;

try_new_value:
        /*
         * Check that the mode being asked for is aggreeable to the
         * drive. If not, put it back the way it was.
         *
         * If in control mode, we can make (persistent) mode changes
         * even if no medium is loaded (see st(4)).
         */
        if ((STMODE(dev) != CTRL_MODE || (st->flags & ST_MOUNTED) != 0) &&
            (error = st->ops(st, ST_OPS_MODESELECT, 0)) != 0) {
                /* put it back as it was */
                aprint_error_dev(st->sc_dev, "cannot set selected mode\n");
                st->density = hold_density;
                st->blksize = hold_blksize;
                if (st->blksize)
                        st->flags |= ST_FIXEDBLOCKS;
                else
                        st->flags &= ~ST_FIXEDBLOCKS;
                return error;
        }
        /*
         * As the drive liked it, if we are setting a new default,
         * set it into the structures as such.
         *
         * The means for deciding this are not finalised yet- but
         * if the device was opened in Control Mode, the values
         * are persistent now across mounts.
         */
        if (STMODE(dev) == CTRL_MODE) {
                switch ((short) (mt->mt_op)) {
                case MTSETBSIZ:
                        st->modes[dsty].blksize = st->blksize;
                        st->modeflags[dsty] |= BLKSIZE_SET_BY_USER;
                        break;
                case MTSETDNSTY:
                        st->modes[dsty].density = st->density;
                        st->modeflags[dsty] |= DENSITY_SET_BY_USER;
                        break;
                }
        }
        return 0;
}

/* Do a synchronous read. */
static int
st_read(struct st_softc *st, char *bf, int size, int flags)
{
        struct scsi_rw_tape cmd;

        /* If it's a null transfer, return immediately */
        if (size == 0)
                return 0;
        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = READ;
        if (st->flags & ST_FIXEDBLOCKS) {
                cmd.byte2 |= SRW_FIXED;
                _lto3b(size / (st->blksize ? st->blksize : DEF_FIXED_BSIZE),
                    cmd.len);
        } else
                _lto3b(size, cmd.len);
        return scsipi_command(st->sc_periph,
            (void *)&cmd, sizeof(cmd), (void *)bf, size, 0, ST_IO_TIME, NULL,
            flags | XS_CTL_DATA_IN);
}

/* issue an erase command */
static int
st_erase(struct st_softc *st, int full, int flags)
{
        int tmo;
        struct scsi_erase cmd;

        /*
         * Full erase means set LONG bit in erase command, which asks
         * the drive to erase the entire unit.  Without this bit, we're
         * asking the drive to write an erase gap.
         */
        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = ERASE;
        if (full) {
                cmd.byte2 = SE_LONG;
                tmo = ST_SPC_TIME;
        } else
                tmo = ST_IO_TIME;

        /*
         * XXX We always do this asynchronously, for now, unless the device
         * has the ST_Q_ERASE_NOIMM quirk.  How long should we wait if we
         * want to (eventually) to it synchronously?
         */
        if ((st->quirks & ST_Q_ERASE_NOIMM) == 0)
                cmd.byte2 |= SE_IMMED;

        return scsipi_command(st->sc_periph, (void *)&cmd, sizeof(cmd), 0, 0,
            ST_RETRIES, tmo, NULL, flags);
}

/* skip N blocks/filemarks/seq filemarks/eom */
static int
st_space(struct st_softc *st, int number, u_int what, int flags)
{
        struct scsi_space cmd;
        int error;

        switch (what) {
        case SP_BLKS:
                if (st->flags & ST_PER_ACTION) {
                        if (number > 0) {
                                st->flags &= ~ST_PER_ACTION;
                                return EIO;
                        } else if (number < 0) {
                                if (st->flags & ST_AT_FILEMARK) {
                                        /*
                                         * Handling of ST_AT_FILEMARK
                                         * in st_space will fill in the
                                         * right file mark count.
                                         */
                                        error = st_space(st, 0, SP_FILEMARKS,
                                            flags);
                                        if (error)
                                                return error;
                                }
                                if (st->flags & ST_BLANK_READ) {
                                        st->flags &= ~ST_BLANK_READ;
                                        return EIO;
                                }
                                st->flags &= ~(ST_EIO_PENDING|ST_EOM_PENDING);
                        }
                }
                break;
        case SP_FILEMARKS:
                if (st->flags & ST_EIO_PENDING) {
                        if (number > 0) {
                                /* pretend we just discovered the error */
                                st->flags &= ~ST_EIO_PENDING;
                                return EIO;
                        } else if (number < 0) {
                                /* back away from the error */
                                st->flags &= ~ST_EIO_PENDING;
                        }
                }
                if (st->flags & ST_AT_FILEMARK) {
                        st->flags &= ~ST_AT_FILEMARK;
                        number--;
                }
                if ((st->flags & ST_BLANK_READ) && (number < 0)) {
                        /* back away from unwritten tape */
                        st->flags &= ~ST_BLANK_READ;
                        number++;        /* XXX dubious */
                }
                break;
        case SP_EOM:
                if (st->flags & ST_EOM_PENDING) {
                        /* we're already there */
                        st->flags &= ~ST_EOM_PENDING;
                        return 0;
                }
                if (st->flags & ST_EIO_PENDING) {
                        /* pretend we just discovered the error */
                        st->flags &= ~ST_EIO_PENDING;
                        return EIO;
                }
                if (st->flags & ST_AT_FILEMARK)
                        st->flags &= ~ST_AT_FILEMARK;
                break;
        }
        if (number == 0)
                return 0;

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = SPACE;
        cmd.byte2 = what;
        _lto3b(number, cmd.number);

        st->flags &= ~ST_POSUPDATED;
        st->last_ctl_resid = 0;
        error = scsipi_command(st->sc_periph, (void *)&cmd, sizeof(cmd), 0, 0,
            0, ST_SPC_TIME, NULL, flags);

        if (error == 0 && (st->flags & ST_POSUPDATED) == 0) {
                number = number - st->last_ctl_resid;
                if (what == SP_BLKS) {
                        if (st->blkno != -1)
                                st->blkno += number;
                } else if (what == SP_FILEMARKS) {
                        if (st->fileno != -1) {
                                st->fileno += number;
                                if (number > 0)
                                        st->blkno = 0;
                                else if (number < 0)
                                        st->blkno = -1;
                        }
                } else if (what == SP_EOM) {
                        st_updatefilepos(st);
                }
        }
        return error;
}

/*
 * write N filemarks
 */
static int
st_write_filemarks(struct st_softc *st, int number, int flags)
{
        int error;
        struct scsi_write_filemarks cmd;

        /*
         * It's hard to write a negative number of file marks.
         * Don't try.
         */
        if (number < 0) {
                SC_DEBUG(st->sc_periph, SCSIPI_DB3,
                         ("EINVAL: st_write_filemarks not writing %d file marks\n", number));
                return EINVAL;
        }
        
        switch (number) {
        case 0:                /* really a command to sync the drive's buffers */
                break;
        case 1:
                if (st->flags & ST_FM_WRITTEN)        /* already have one down */
                        st->flags &= ~ST_WRITTEN;
                else
                        st->flags |= ST_FM_WRITTEN;
                st->flags &= ~ST_PER_ACTION;
                break;
        default:
                st->flags &= ~(ST_PER_ACTION | ST_WRITTEN);
        }

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = WRITE_FILEMARKS;
        if (SCSIPI_BUSTYPE_TYPE(scsipi_periph_bustype(st->sc_periph)) ==
            SCSIPI_BUSTYPE_ATAPI)
                cmd.byte2 = SR_IMMED;
        /*
         * The ATAPI Onstream DI-30 doesn't support writing filemarks, but
         * WRITE_FILEMARKS is still used to flush the buffer
         */
        if ((st->quirks & ST_Q_NOFILEMARKS) == 0)
                _lto3b(number, cmd.number);

        /* XXX WE NEED TO BE ABLE TO GET A RESIDIUAL XXX */
        error = scsipi_command(st->sc_periph, (void *)&cmd, sizeof(cmd), 0, 0,
            0, ST_IO_TIME * 4, NULL, flags);
        if (error == 0 && st->fileno != -1)
                st->fileno += number;
        return error;
}

/*
 * Make sure the right number of file marks is on tape if the
 * tape has been written.  If the position argument is true,
 * leave the tape positioned where it was originally.
 *
 * nmarks returns the number of marks to skip (or, if position
 * true, which were skipped) to get back original position.
 */
static int
st_check_eod(struct st_softc *st, boolean position, int *nmarks, int flags)
{
        int error;

        switch (st->flags & (ST_WRITTEN | ST_FM_WRITTEN | ST_2FM_AT_EOD)) {
        default:
                *nmarks = 0;
                return 0;
        case ST_WRITTEN:
        case ST_WRITTEN | ST_FM_WRITTEN | ST_2FM_AT_EOD:
                *nmarks = 1;
                break;
        case ST_WRITTEN | ST_2FM_AT_EOD:
                *nmarks = 2;
        }
        error = st_write_filemarks(st, *nmarks, flags);
        if (position && !error)
                error = st_space(st, -*nmarks, SP_FILEMARKS, flags);
        return error;
}

/* load/unload/retension */
static int
st_load(struct st_softc *st, u_int type, int flags)
{
        int error;
        struct scsi_load cmd;

        if (type != LD_LOAD) {
                int nmarks;

                error = st_check_eod(st, FALSE, &nmarks, flags);
                if (error) {
                        aprint_error_dev(st->sc_dev,
                            "failed to write closing filemarks at "
                            "unload, errno=%d\n", error);
                        return error;
                }
        }
        if (st->quirks & ST_Q_IGNORE_LOADS) {
                if (type == LD_LOAD)
                        /*
                         * If we ignore loads, at least we should try a rewind.
                         */
                        return st_rewind(st, 0, flags);
                /* otherwise, we should do what's asked of us */
        }

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = LOAD;
        if (SCSIPI_BUSTYPE_TYPE(scsipi_periph_bustype(st->sc_periph)) ==
            SCSIPI_BUSTYPE_ATAPI)
                cmd.byte2 = SR_IMMED;
        cmd.how = type;

        error = scsipi_command(st->sc_periph, (void *)&cmd, sizeof(cmd), 0, 0,
            ST_RETRIES, ST_SPC_TIME, NULL, flags);
        if (error) {
                aprint_error_dev(st->sc_dev, "error %d in st_load (op %d)\n",
                    error, type);
        }
        return error;
}

/* Rewind the device */
static int
st_rewind(struct st_softc *st, u_int immediate, int flags)
{
        struct scsi_rewind cmd;
        int error;
        int nmarks;
        int timeout;

        error = st_check_eod(st, FALSE, &nmarks, flags);
        if (error) {
                aprint_error_dev(st->sc_dev,
                    "failed to write closing filemarks at "
                    "rewind, errno=%d\n", error);
                return error;
        }
        st->flags &= ~ST_PER_ACTION;

        /* If requestor asked for immediate response, set a short timeout */
        timeout = immediate ? ST_CTL_TIME : ST_SPC_TIME;

        /* ATAPI tapes always need immediate to be set */
        if (scsipi_periph_bustype(st->sc_periph) == SCSIPI_BUSTYPE_ATAPI)
                immediate = SR_IMMED;

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = REWIND;
        cmd.byte2 = immediate;

        error = scsipi_command(st->sc_periph, (void *)&cmd, sizeof(cmd), 0, 0,
            ST_RETRIES, timeout, NULL, flags);
        if (error) {
                aprint_error_dev(st->sc_dev, "error %d trying to rewind\n",
                    error);
                /* lost position */
                st->fileno = st->blkno = -1;
        } else
                st->fileno = st->blkno = 0;
        return error;
}

static void
st_updatefilepos(struct st_softc *st)
{
        int error;
        uint8_t posdata[32];
        struct scsi_tape_read_position cmd;

        memset(&cmd, 0, sizeof(cmd));
        memset(&posdata, 0, sizeof(posdata));
        cmd.opcode = READ_POSITION;
        cmd.byte1 = 6;  /* service action: LONG FORM */

        error = scsipi_command(st->sc_periph, (void *)&cmd, sizeof(cmd),
            (void *)&posdata, sizeof(posdata), ST_RETRIES, ST_CTL_TIME, NULL,
            XS_CTL_SILENT | XS_CTL_DATA_IN);

        if (error == 0) {
#ifdef SCSIPI_DEBUG
                if (st->sc_periph->periph_dbflags & SCSIPI_DB3) {
                        int hard;

                        printf("posdata: ");
                        for (hard = 0; hard < sizeof(posdata); hard++)
                                printf("%02x ", posdata[hard] & 0xff);
                        printf("\n");
                }
#endif
                if (posdata[0] & 0xC) { /* Block|Mark Position Unknown */
                        SC_DEBUG(st->sc_periph, SCSIPI_DB3,
                                 ("st_updatefilepos block/mark position unknown (0x%02x)\n",
                                  posdata[0]));
                } else {
                        st->fileno = _8btol(&posdata[16]);
                        st->blkno = 0;
                        SC_DEBUG(st->sc_periph, SCSIPI_DB3,
                                 ("st_updatefilepos file position %"PRId64"\n",
                                  st->fileno));
                        return;
                }
        } else {
                SC_DEBUG(st->sc_periph, SCSIPI_DB3,
                         ("st_updatefilepos READ POSITION(LONG_FORM) failed (error=%d)\n",
                          error));
        }
        st->fileno = -1;
        st->blkno = -1;
}

static int
st_rdpos(struct st_softc *st, int hard, uint32_t *blkptr)
{
        int error;
        uint8_t posdata[20];
        struct scsi_tape_read_position cmd;

        /*
         * We try and flush any buffered writes here if we were writing
         * and we're trying to get hardware block position. It eats
         * up performance substantially, but I'm wary of drive firmware.
         *
         * I think that *logical* block position is probably okay-
         * but hardware block position might have to wait for data
         * to hit media to be valid. Caveat Emptor.
         */

        if (hard && (st->flags & ST_WRITTEN)) {
                /* First flush any pending writes... */
                error = st_write_filemarks(st, 0, XS_CTL_SILENT);
                /*
                 * The latter case is for 'write protected' tapes
                 * which are too stupid to recognize a zero count
                 * for writing filemarks as a no-op.
                 */
                if (error != 0 && error != EACCES && error != EROFS)
                        return error;
        }

        memset(&cmd, 0, sizeof(cmd));
        memset(&posdata, 0, sizeof(posdata));
        cmd.opcode = READ_POSITION;
        if (hard)
                cmd.byte1 = 1;

        error = scsipi_command(st->sc_periph, (void *)&cmd, sizeof(cmd),
            (void *)&posdata, sizeof(posdata), ST_RETRIES, ST_CTL_TIME, NULL,
            XS_CTL_SILENT | XS_CTL_DATA_IN);

        if (error == 0) {
#if        0
                printf("posdata:");
                for (hard = 0; hard < sizeof(posdata); hard++)
                        printf("%02x ", posdata[hard] & 0xff);
                printf("\n");
#endif
                if (posdata[0] & 0x4) {        /* Block Position Unknown */
                        SC_DEBUG(st->sc_periph, SCSIPI_DB3,
                                 ("EINVAL: strdpos block position unknown\n"));
                        error = EINVAL;
                }        
                else
                        *blkptr = _4btol(&posdata[4]);
        }
        return error;
}

static int
st_setpos(struct st_softc *st, int hard, uint32_t *blkptr)
{
        int error;
        struct scsi_tape_locate cmd;

        /*
         * We used to try and flush any buffered writes here.
         * Now we push this onto user applications to either
         * flush the pending writes themselves (via a zero count
         * WRITE FILEMARKS command) or they can trust their tape
         * drive to do this correctly for them.
         *
         * There are very ugly performance limitations otherwise.
         */

        memset(&cmd, 0, sizeof(cmd));
        cmd.opcode = LOCATE;
        if (hard)
                cmd.byte2 = 1 << 2;
        _lto4b(*blkptr, cmd.blkaddr);
        error = scsipi_command(st->sc_periph, (void *)&cmd, sizeof(cmd), 0, 0,
            ST_RETRIES, ST_SPC_TIME, NULL, 0);
        /*
         * Note file && block number position now unknown (if
         * these things ever start being maintained in this driver)
         */
        st->fileno = st->blkno = -1;
        return error;
}


/*
 * Look at the returned sense and act on the error and determine
 * the unix error number to pass back..., 0 (== report no error),
 * -1 = retry the operation, -2 continue error processing.
 */
static int
st_interpret_sense(struct scsipi_xfer *xs)
{
        struct scsipi_periph *periph = xs->xs_periph;
        struct scsi_sense_data *sense = &xs->sense.scsi_sense;
        struct buf *bp = xs->bp;
        struct st_softc *st = device_private(periph->periph_dev);
        int retval = EJUSTRETURN;
        int doprint = ((xs->xs_control & XS_CTL_SILENT) == 0);
        uint8_t key;
        int32_t info;

        /*
         * If it isn't a extended or extended/deferred error, let
         * the generic code handle it.
         */
        if (SSD_RCODE(sense->response_code) != SSD_RCODE_CURRENT &&
            SSD_RCODE(sense->response_code) != SSD_RCODE_DEFERRED)
                return retval;

        if (sense->response_code & SSD_RCODE_VALID)
                info = _4btol(sense->info);
        else
                info = (st->flags & ST_FIXEDBLOCKS) ?
                    xs->datalen / st->blksize : xs->datalen;
        key = SSD_SENSE_KEY(sense->flags);
        st->mt_erreg = key;
        st->asc = sense->asc;
        st->ascq = sense->ascq;
        st->mt_resid = (short) info;

        if (key == SKEY_NOT_READY && st->asc == 0x4 && st->ascq == 0x1) {
                /* Not Ready, Logical Unit Is in Process Of Becoming Ready */
                if (!callout_pending(&periph->periph_callout))
                        scsipi_periph_freeze(periph, 1);
                callout_reset(&periph->periph_callout,
                    hz, scsipi_periph_timed_thaw, periph);
                return ERESTART;
        }

        /* If the device is not open yet, let generic handle */
        if ((periph->periph_flags & PERIPH_OPEN) == 0)
                return retval;

        xs->resid = info;
        if (st->flags & ST_FIXEDBLOCKS) {
                if (bp) {
                        xs->resid *= st->blksize;
                        st->last_io_resid = xs->resid;
                } else
                        st->last_ctl_resid = xs->resid;
                if (key == SKEY_VOLUME_OVERFLOW) {
                        st->flags |= ST_EIO_PENDING;
                        if (bp)
                                bp->b_resid = xs->resid;
                } else if (sense->flags & SSD_EOM) {
                        if ((st->flags & ST_EARLYWARN) == 0)
                                st->flags |= ST_EIO_PENDING;
                        st->flags |= ST_EOM_PENDING;
                        if (bp) {
#if 0
                                bp->b_resid = xs->resid;
#else
                                /*
                                 * Grotesque as it seems, the few times
                                 * I've actually seen a non-zero resid,
                                 * the tape drive actually lied and had
                                 * written all the data!
                                 */
                                bp->b_resid = 0;
#endif
                        }
                }
                if (sense->flags & SSD_FILEMARK) {
                        st->flags |= ST_AT_FILEMARK;
                        if (bp)
                                bp->b_resid = xs->resid;
                        if (st->fileno != (daddr_t) -1) {
                                st->fileno++;
                                st->blkno = 0;
                                st->flags |= ST_POSUPDATED;
                        }
                }
                if (sense->flags & SSD_ILI) {
                        st->flags |= ST_EIO_PENDING;
                        if (bp)
                                bp->b_resid = xs->resid;
                        if (sense->response_code & SSD_RCODE_VALID &&
                            (xs->xs_control & XS_CTL_SILENT) == 0)
                                aprint_error_dev(st->sc_dev,
                                    "block wrong size, %d blocks residual\n",
                                    info);

                        /*
                         * This quirk code helps the drive read
                         * the first tape block, regardless of
                         * format.  That is required for these
                         * drives to return proper MODE SENSE
                         * information.
                         */
                        if ((st->quirks & ST_Q_SENSE_HELP) &&
                            (periph->periph_flags & PERIPH_MEDIA_LOADED) == 0)
                                st->blksize -= 512;
                        else if ((st->flags & ST_POSUPDATED) == 0) {
                                if (st->blkno != (daddr_t) -1) {
                                        st->blkno +=
                                            (xs->datalen / st->blksize);
                                        st->flags |= ST_POSUPDATED;
                                }
                        }
                }
                /*
                 * If data wanted and no data was transferred, do it immediately
                 */
                if (xs->datalen && xs->resid >= xs->datalen) {
                        if (st->flags & ST_EIO_PENDING)
                                return EIO;
                        if (st->flags & ST_AT_FILEMARK) {
                                if (bp)
                                        bp->b_resid = xs->resid;
                                return 0;
                        }
                }
        } else {                /* must be variable mode */
                if (bp)
                        st->last_io_resid = xs->resid;
                else
                        st->last_ctl_resid = xs->resid;
                if (sense->flags & SSD_EOM) {
                        /*
                         * The current semantics of this
                         * driver requires EOM detection
                         * to return EIO unless early
                         * warning detection is enabled
                         * for variable mode (this is always
                         * on for fixed block mode).
                         */
                        if (st->flags & ST_EARLYWARN) {
                                st->flags |= ST_EOM_PENDING;
                                retval = 0;
                        } else {
                                retval = EIO;
                                /*
                                 * If we return an error we can't claim to
                                 * have transferred all data.
                                 */
                                if (xs->resid == 0)
                                        xs->resid = xs->datalen;
                        }

                        /*
                         * If it's an unadorned EOM detection,
                         * suppress printing an error.
                         */
                        if (key == SKEY_NO_SENSE) {
                                doprint = 0;
                        }
                } else if (sense->flags & SSD_FILEMARK) {
                        retval = 0;
                        if (st->fileno != (daddr_t) -1) {
                                st->fileno++;
                                st->blkno = 0;
                                st->flags |= ST_POSUPDATED;
                        }
                } else if (sense->flags & SSD_ILI) {
                        if (info < 0) {
                                /*
                                 * The tape record was bigger than the read
                                 * we issued.
                                 */
                                if ((xs->xs_control & XS_CTL_SILENT) == 0) {
                                        aprint_error_dev(st->sc_dev,
                                            "%d-byte tape record too big"
                                            " for %d-byte user buffer\n",
                                            xs->datalen - info, xs->datalen);
                                }
                                retval = EIO;
                        } else {
                                retval = 0;
                                if (st->blkno != (daddr_t) -1) {
                                        st->blkno++;
                                        st->flags |= ST_POSUPDATED;
                                }
                        }
                }
                if (bp)
                        bp->b_resid = xs->resid;
        }

#ifndef SCSIPI_DEBUG
        if (retval == 0 && key == SKEY_NO_SENSE)
                doprint = 0;
#endif
        if (key == SKEY_BLANK_CHECK) {
                /*
                 * This quirk code helps the drive read the
                 * first tape block, regardless of format.  That
                 * is required for these drives to return proper
                 * MODE SENSE information.
                 */
                if ((st->quirks & ST_Q_SENSE_HELP) &&
                    (periph->periph_flags & PERIPH_MEDIA_LOADED) == 0) {
                        /* still starting */
                        st->blksize -= 512;
                } else if (!(st->flags & (ST_2FM_AT_EOD | ST_BLANK_READ))) {
                        st->flags |= ST_BLANK_READ;
                        xs->resid = xs->datalen;
                        if (bp) {
                                bp->b_resid = xs->resid;
                                /* return an EOF */
                        }
                        retval = 0;
                        /* lost position */
                        st->fileno = st->blkno = -1;
                }
        }

        /*
         * If generic sense processing will continue, we should not
         * print sense info here.
         */
        if (retval == EJUSTRETURN)
                doprint = 0;

        if (doprint) {
                /* Print verbose sense info if possible */
                if (scsipi_print_sense(xs, 0) != 0)
                        return retval;

                /* Print less-verbose sense info */
                scsipi_printaddr(periph);
                printf("Sense Key 0x%02x", key);
                if ((sense->response_code & SSD_RCODE_VALID) != 0) {
                        switch (key) {
                        case SKEY_NOT_READY:
                        case SKEY_ILLEGAL_REQUEST:
                        case SKEY_UNIT_ATTENTION:
                        case SKEY_DATA_PROTECT:
                                break;
                        case SKEY_VOLUME_OVERFLOW:
                        case SKEY_BLANK_CHECK:
                                printf(", requested size: %d (decimal)", info);
                                break;
                        case SKEY_ABORTED_COMMAND:
                                if (xs->xs_retries)
                                        printf(", retrying");
                                printf(", cmd 0x%x, info 0x%x",
                                    xs->cmd->opcode, info);
                                break;
                        default:
                                printf(", info = %d (decimal)", info);
                        }
                }
                if (sense->extra_len != 0) {
                        int n;
                        printf(", data =");
                        for (n = 0; n < sense->extra_len; n++)
                                printf(" %02x", sense->csi[n]);
                }
                printf("\n");
        }
        return retval;
}

/*
 * The quirk here is that the drive returns some value to st_mode_sense
 * incorrectly until the tape has actually passed by the head.
 *
 * The method is to set the drive to large fixed-block state (user-specified
 * density and 1024-byte blocks), then read and rewind to get it to sense the
 * tape.  If that doesn't work, try 512-byte fixed blocks.  If that doesn't
 * work, as a last resort, try variable- length blocks.  The result will be
 * the ability to do an accurate st_mode_sense.
 *
 * We know we can do a rewind because we just did a load, which implies rewind.
 * Rewind seems preferable to space backward if we have a virgin tape.
 *
 * The rest of the code for this quirk is in ILI processing and BLANK CHECK
 * error processing, both part of st_interpret_sense.
 */
static int
st_touch_tape(struct st_softc *st)
{
        char *bf;
        int readsize;
        int error;

        bf = malloc(1024, M_TEMP, M_WAITOK);
        if ((error = st->ops(st, ST_OPS_MODESENSE, 0)) != 0)
                goto bad;

        /*
         * If the block size is already known from the
         * sense data, use it. Else start probing at 1024.
         */
        if (st->media_blksize > 0)
                st->blksize = st->media_blksize;
        else
                st->blksize = 1024;

        do {
                switch (st->blksize) {
                case 512:
                case 1024:
                        readsize = st->blksize;
                        st->flags |= ST_FIXEDBLOCKS;
                        break;
                default:
                        readsize = 1;
                        st->flags &= ~ST_FIXEDBLOCKS;
                }
                if ((error = st->ops(st, ST_OPS_MODESELECT, XS_CTL_SILENT))
                    != 0) {
                        /*
                         * The device did not agree with the proposed
                         * block size. If we exhausted our options,
                         * return failure, else try another.
                         */
                        if (readsize == 1)
                                goto bad;
                        st->blksize -= 512;
                        continue;
                }
                st_read(st, bf, readsize, XS_CTL_SILENT);        /* XXX */
                if ((error = st_rewind(st, 0, 0)) != 0) {
bad:                        free(bf, M_TEMP);
                        return error;
                }
        } while (readsize != 1 && readsize > st->blksize);

        free(bf, M_TEMP);
        return 0;
}

static int
stdump(dev_t dev, daddr_t blkno, void *va, size_t size)
{
        /* Not implemented. */
        return ENXIO;
}

/*
 * Send a filled out parameter structure to the drive to
 * set it into the desire modes etc.
 */
int
st_mode_select(struct st_softc *st, int flags)
{
        u_int select_len;
        struct select {
                struct scsi_mode_parameter_header_6 header;
                struct scsi_general_block_descriptor blk_desc;
                u_char sense_data[MAX_PAGE_0_SIZE];
        } select;
        struct scsipi_periph *periph = st->sc_periph;

        select_len = sizeof(select.header) + sizeof(select.blk_desc) +
                     st->page_0_size;

        /*
         * This quirk deals with drives that have only one valid mode
         * and think this gives them license to reject all mode selects,
         * even if the selected mode is the one that is supported.
         */
        if (st->quirks & ST_Q_UNIMODAL) {
                SC_DEBUG(periph, SCSIPI_DB3,
                    ("not setting density 0x%x blksize 0x%x\n",
                    st->density, st->blksize));
                return 0;
        }

        /* Set up for a mode select */
        memset(&select, 0, sizeof(select));
        select.header.blk_desc_len = sizeof(struct
            scsi_general_block_descriptor);
        select.header.dev_spec &= ~SMH_DSP_BUFF_MODE;
        select.blk_desc.density = st->density;
        if (st->flags & ST_DONTBUFFER)
                select.header.dev_spec |= SMH_DSP_BUFF_MODE_OFF;
        else
                select.header.dev_spec |= SMH_DSP_BUFF_MODE_ON;
        if (st->flags & ST_FIXEDBLOCKS)
                _lto3b(st->blksize, select.blk_desc.blklen);
        if (st->page_0_size)
                memcpy(select.sense_data, st->sense_data, st->page_0_size);

        /* do the command */
        return scsipi_mode_select(periph, 0, &select.header, select_len,
                                  flags, ST_RETRIES, ST_CTL_TIME);
}










































































































































































































































































































































    1 

    1 
















    1 

    1 






















    1 






    1 


    1 
    1 














    1 

























    1 

    1 



































































































































































    2 


    2 



    2 

    2 




    2 





















    1 




    1 


    1 

    1 

    1 
    1 
    1 



















    1 





    1 


    1 


    1 



    1 

    1 










    1 
    1 


























































































































































































































































































































































































































































































   10 



















































   10 







































   10 























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
/*        $NetBSD: pci_machdep.c,v 1.91 2022/05/24 14:00:23 bouyer Exp $        */

/*-
 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1996 Christopher G. Demetriou.  All rights reserved.
 * Copyright (c) 1994 Charles M. Hannum.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by Charles M. Hannum.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Machine-specific functions for PCI autoconfiguration.
 *
 * On PCs, there are two methods of generating PCI configuration cycles.
 * We try to detect the appropriate mechanism for this machine and set
 * up a few function pointers to access the correct method directly.
 *
 * The configuration method can be hard-coded in the config file by
 * using `options PCI_CONF_MODE=N', where `N' is the configuration mode
 * as defined in section 3.6.4.1, `Generating Configuration Cycles'.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: pci_machdep.c,v 1.91 2022/05/24 14:00:23 bouyer Exp $");

#include <sys/types.h>
#include <sys/param.h>
#include <sys/time.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/device.h>
#include <sys/bus.h>
#include <sys/cpu.h>
#include <sys/kmem.h>

#include <uvm/uvm_extern.h>

#include <machine/bus_private.h>

#include <machine/pio.h>
#include <machine/lock.h>

#include <dev/isa/isareg.h>
#include <dev/isa/isavar.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pccbbreg.h>
#include <dev/pci/pcidevs.h>
#include <dev/pci/ppbvar.h>
#include <dev/pci/genfb_pcivar.h>

#include <dev/wsfb/genfbvar.h>
#include <arch/x86/include/genfb_machdep.h>
#include <dev/ic/vgareg.h>

#include "acpica.h"
#include "genfb.h"
#include "isa.h"
#include "opt_acpi.h"
#include "opt_ddb.h"
#include "opt_mpbios.h"
#include "opt_puc.h"
#include "opt_vga.h"
#include "pci.h"
#include "wsdisplay.h"
#include "com.h"

#ifdef DDB
#include <machine/db_machdep.h>
#include <ddb/db_sym.h>
#include <ddb/db_extern.h>
#endif

#ifdef VGA_POST
#include <x86/vga_post.h>
#endif

#include <x86/cpuvar.h>

#include <machine/autoconf.h>
#include <machine/bootinfo.h>

#ifdef MPBIOS
#include <machine/mpbiosvar.h>
#endif

#if NACPICA > 0
#include <machine/mpacpi.h>
#if !defined(NO_PCI_EXTENDED_CONFIG)
#include <dev/acpi/acpivar.h>
#include <dev/acpi/acpi_mcfg.h>
#endif
#endif

#include <machine/mpconfig.h>

#if NCOM > 0
#include <dev/pci/puccn.h>
#endif

#ifndef XENPV
#include <x86/efi.h>
#endif

#include "opt_pci_conf_mode.h"

#ifdef PCI_CONF_MODE
#if (PCI_CONF_MODE == 1) || (PCI_CONF_MODE == 2)
static int pci_mode = PCI_CONF_MODE;
#else
#error Invalid PCI configuration mode.
#endif
#else
static int pci_mode = -1;
#endif

struct pci_conf_lock {
        uint32_t cl_cpuno;        /* 0: unlocked
                                 * 1 + n: locked by CPU n (0 <= n)
                                 */
        uint32_t cl_sel;        /* the address that's being read. */
};

static void pci_conf_unlock(struct pci_conf_lock *);
static uint32_t pci_conf_selector(pcitag_t, int);
static unsigned int pci_conf_port(pcitag_t, int);
static void pci_conf_select(uint32_t);
static void pci_conf_lock(struct pci_conf_lock *, uint32_t);
static void pci_bridge_hook(pci_chipset_tag_t, pcitag_t, void *);
struct pci_bridge_hook_arg {
        void (*func)(pci_chipset_tag_t, pcitag_t, void *);
        void *arg;
};

#define        PCI_MODE1_ENABLE        0x80000000UL
#define        PCI_MODE1_ADDRESS_REG        0x0cf8
#define        PCI_MODE1_DATA_REG        0x0cfc

#define        PCI_MODE2_ENABLE_REG        0x0cf8
#define        PCI_MODE2_FORWARD_REG        0x0cfa

#define _tag(b, d, f) \
        {.mode1 = PCI_MODE1_ENABLE | ((b) << 16) | ((d) << 11) | ((f) << 8)}
#define _qe(bus, dev, fcn, vend, prod) \
        {_tag(bus, dev, fcn), PCI_ID_CODE(vend, prod)}
const struct {
        pcitag_t tag;
        pcireg_t id;
} pcim1_quirk_tbl[] = {
        _qe(0, 0, 0, PCI_VENDOR_INVALID, 0x0000), /* patchable */
        _qe(0, 0, 0, PCI_VENDOR_COMPAQ, PCI_PRODUCT_COMPAQ_TRIFLEX1),
        /* XXX Triflex2 not tested */
        _qe(0, 0, 0, PCI_VENDOR_COMPAQ, PCI_PRODUCT_COMPAQ_TRIFLEX2),
        _qe(0, 0, 0, PCI_VENDOR_COMPAQ, PCI_PRODUCT_COMPAQ_TRIFLEX4),
#if 0
        /* Triton needed for Connectix Virtual PC */
        _qe(0, 0, 0, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82437FX),
        /* Connectix Virtual PC 5 has a 440BX */
        _qe(0, 0, 0, PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443BX_NOAGP),
        /* Parallels Desktop for Mac */
        _qe(0, 2, 0, PCI_VENDOR_PARALLELS, PCI_PRODUCT_PARALLELS_VIDEO),
        _qe(0, 3, 0, PCI_VENDOR_PARALLELS, PCI_PRODUCT_PARALLELS_TOOLS),
        /* SIS 740 */
        _qe(0, 0, 0, PCI_VENDOR_SIS, PCI_PRODUCT_SIS_740),
        /* SIS 741 */
        _qe(0, 0, 0, PCI_VENDOR_SIS, PCI_PRODUCT_SIS_741),
        /* VIA Technologies VX900 */
        _qe(0, 0, 0, PCI_VENDOR_VIATECH, PCI_PRODUCT_VIATECH_VX900_HB)
#endif
};
#undef _tag
#undef _qe

/* arch/xen does not support MSI/MSI-X yet. */
#ifdef __HAVE_PCI_MSI_MSIX
#define PCI_QUIRK_DISABLE_MSI        1 /* Neigher MSI nor MSI-X work */
#define PCI_QUIRK_DISABLE_MSIX        2 /* MSI-X does not work */
#define PCI_QUIRK_ENABLE_MSI_VM        3 /* Older chipset in VM where MSI and MSI-X works */

#define _dme(vend, prod) \
        { PCI_QUIRK_DISABLE_MSI, PCI_ID_CODE(vend, prod) }
#define _dmxe(vend, prod) \
        { PCI_QUIRK_DISABLE_MSIX, PCI_ID_CODE(vend, prod) }
#define _emve(vend, prod) \
        { PCI_QUIRK_ENABLE_MSI_VM, PCI_ID_CODE(vend, prod) }
const struct {
        int type;
        pcireg_t id;
} pci_msi_quirk_tbl[] = {
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_PCMC),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82437FX),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82437MX),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82437VX),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82439HX),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82439TX),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443GX),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443GX_AGP),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82440MX),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82441FX),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443BX),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443BX_AGP),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443BX_NOAGP),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443GX_NOAGP),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443LX),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443LX_AGP),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82810_MCH),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82810E_MCH),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82815_FULL_HUB),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82820_MCH),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82830MP_IO_1),
        _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82840_HB),
        _dme(PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE_PCHB),
        _dme(PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE2_PCHB),
        _dme(PCI_VENDOR_AMD, PCI_PRODUCT_AMD_SC751_SC),
        _dme(PCI_VENDOR_AMD, PCI_PRODUCT_AMD_SC761_SC),
        _dme(PCI_VENDOR_AMD, PCI_PRODUCT_AMD_SC762_NB),

        _emve(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82441FX), /* QEMU */
        _emve(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443BX), /* VMWare */
};
#undef _dme
#undef _dmxe
#undef _emve
#endif /* __HAVE_PCI_MSI_MSIX */

/*
 * PCI doesn't have any special needs; just use the generic versions
 * of these functions.
 */
struct x86_bus_dma_tag pci_bus_dma_tag = {
        ._tag_needs_free        = 0,
#if defined(_LP64) || defined(PAE)
        ._bounce_thresh                = PCI32_DMA_BOUNCE_THRESHOLD,
        ._bounce_alloc_lo        = ISA_DMA_BOUNCE_THRESHOLD,
        ._bounce_alloc_hi        = PCI32_DMA_BOUNCE_THRESHOLD,
#else
        ._bounce_thresh                = 0,
        ._bounce_alloc_lo        = 0,
        ._bounce_alloc_hi        = 0,
#endif
        ._may_bounce                = NULL,
};

#ifdef _LP64
struct x86_bus_dma_tag pci_bus_dma64_tag = {
        ._tag_needs_free        = 0,
        ._bounce_thresh                = 0,
        ._bounce_alloc_lo        = 0,
        ._bounce_alloc_hi        = 0,
        ._may_bounce                = NULL,
};
#endif

static struct pci_conf_lock cl0 = {
          .cl_cpuno = 0UL
        , .cl_sel = 0UL
};

static struct pci_conf_lock * const cl = &cl0;

#if NGENFB > 0 && NACPICA > 0 && defined(VGA_POST)
extern int acpi_md_vbios_reset;
extern int acpi_md_vesa_modenum;
#endif

static struct genfb_colormap_callback gfb_cb;
static struct genfb_pmf_callback pmf_cb;
static struct genfb_mode_callback mode_cb;
#ifdef VGA_POST
static struct vga_post *vga_posth = NULL;
#endif

static void
pci_conf_lock(struct pci_conf_lock *ocl, uint32_t sel)
{
        uint32_t cpuno;

        KASSERT(sel != 0);

        kpreempt_disable();
        cpuno = cpu_number() + 1;
        /* If the kernel enters pci_conf_lock() through an interrupt
         * handler, then the CPU may already hold the lock.
         *
         * If the CPU does not already hold the lock, spin until
         * we can acquire it.
         */
        if (cpuno == cl->cl_cpuno) {
                ocl->cl_cpuno = cpuno;
        } else {
#ifdef LOCKDEBUG
                u_int spins = 0;
#endif
                u_int count;
                count = SPINLOCK_BACKOFF_MIN;

                ocl->cl_cpuno = 0;

                while (atomic_cas_32(&cl->cl_cpuno, 0, cpuno) != 0) {
                        SPINLOCK_BACKOFF(count);
#ifdef LOCKDEBUG
                        if (SPINLOCK_SPINOUT(spins)) {
                                panic("%s: cpu %" PRId32
                                    " spun out waiting for cpu %" PRId32,
                                    __func__, cpuno, cl->cl_cpuno);
                        }
#endif
                }
        }

        /* Only one CPU can be here, so an interlocked atomic_swap(3)
         * is not necessary.
         *
         * Evaluating atomic_cas_32_ni()'s argument, cl->cl_sel,
         * and applying atomic_cas_32_ni() is not an atomic operation,
         * however, any interrupt that, in the middle of the
         * operation, modifies cl->cl_sel, will also restore
         * cl->cl_sel.  So cl->cl_sel will have the same value when
         * we apply atomic_cas_32_ni() as when we evaluated it,
         * before.
         */
        ocl->cl_sel = atomic_cas_32_ni(&cl->cl_sel, cl->cl_sel, sel);
        pci_conf_select(sel);
}

static void
pci_conf_unlock(struct pci_conf_lock *ocl)
{
        atomic_cas_32_ni(&cl->cl_sel, cl->cl_sel, ocl->cl_sel);
        pci_conf_select(ocl->cl_sel);
        if (ocl->cl_cpuno != cl->cl_cpuno)
                atomic_cas_32(&cl->cl_cpuno, cl->cl_cpuno, ocl->cl_cpuno);
        kpreempt_enable();
}

static uint32_t
pci_conf_selector(pcitag_t tag, int reg)
{
        static const pcitag_t mode2_mask = {
                .mode2 = {
                          .enable = 0xff
                        , .forward = 0xff
                }
        };

        switch (pci_mode) {
        case 1:
                return tag.mode1 | reg;
        case 2:
                return tag.mode1 & mode2_mask.mode1;
        default:
                panic("%s: mode %d not configured", __func__, pci_mode);
        }
}

static unsigned int
pci_conf_port(pcitag_t tag, int reg)
{
        switch (pci_mode) {
        case 1:
                return PCI_MODE1_DATA_REG;
        case 2:
                return tag.mode2.port | reg;
        default:
                panic("%s: mode %d not configured", __func__, pci_mode);
        }
}

static void
pci_conf_select(uint32_t sel)
{
        pcitag_t tag;

        switch (pci_mode) {
        case 1:
                outl(PCI_MODE1_ADDRESS_REG, sel);
                return;
        case 2:
                tag.mode1 = sel;
                outb(PCI_MODE2_ENABLE_REG, tag.mode2.enable);
                if (tag.mode2.enable != 0)
                        outb(PCI_MODE2_FORWARD_REG, tag.mode2.forward);
                return;
        default:
                panic("%s: mode %d not configured", __func__, pci_mode);
        }
}

static int
pci_mode_check(void)
{
        pcireg_t x;
        pcitag_t t;
        int device;
        const int maxdev = pci_bus_maxdevs(NULL, 0);

        for (device = 0; device < maxdev; device++) {
                t = pci_make_tag(NULL, 0, device, 0);
                x = pci_conf_read(NULL, t, PCI_CLASS_REG);
                if (PCI_CLASS(x) == PCI_CLASS_BRIDGE &&
                    PCI_SUBCLASS(x) == PCI_SUBCLASS_BRIDGE_HOST)
                        return 0;
                x = pci_conf_read(NULL, t, PCI_ID_REG);
                switch (PCI_VENDOR(x)) {
                case PCI_VENDOR_COMPAQ:
                case PCI_VENDOR_INTEL:
                case PCI_VENDOR_VIATECH:
                        return 0;
                }
        }
        return -1;
}
#ifdef __HAVE_PCI_MSI_MSIX
static int
pci_has_msi_quirk(pcireg_t id, int type)
{
        int i;

        for (i = 0; i < __arraycount(pci_msi_quirk_tbl); i++) {
                if (id == pci_msi_quirk_tbl[i].id &&
                    type == pci_msi_quirk_tbl[i].type)
                        return 1;
        }

        return 0;
}
#endif

void
pci_attach_hook(device_t parent, device_t self, struct pcibus_attach_args *pba)
{
#ifdef __HAVE_PCI_MSI_MSIX
        pci_chipset_tag_t pc = pba->pba_pc;
        pcitag_t tag;
        pcireg_t id, class;
#endif

        if (pba->pba_bus == 0)
                aprint_normal(": configuration mode %d", pci_mode);
#ifdef MPBIOS
        mpbios_pci_attach_hook(parent, self, pba);
#endif
#if NACPICA > 0
        mpacpi_pci_attach_hook(parent, self, pba);
#endif
#if NACPICA > 0 && !defined(NO_PCI_EXTENDED_CONFIG)
        acpimcfg_map_bus(self, pba->pba_pc, pba->pba_bus);
#endif

#ifdef __HAVE_PCI_MSI_MSIX
        /*
         * In order to decide whether the system supports MSI we look
         * at the host bridge, which should be device 0 function 0 on
         * bus 0.  It is better to not enable MSI on systems that
         * support it than the other way around, so be conservative
         * here.  So we don't enable MSI if we don't find a host
         * bridge there.  We also deliberately don't enable MSI on
         * chipsets from low-end manifacturers like VIA and SiS.
         */
        tag = pci_make_tag(pc, 0, 0, 0);
        id = pci_conf_read(pc, tag, PCI_ID_REG);
        class = pci_conf_read(pc, tag, PCI_CLASS_REG);

        if (PCI_CLASS(class) != PCI_CLASS_BRIDGE ||
            PCI_SUBCLASS(class) != PCI_SUBCLASS_BRIDGE_HOST)
                return;

        /* VMware and KVM use old chipset, but they can use MSI/MSI-X */
        if ((cpu_feature[1] & CPUID2_RAZ)
            && (pci_has_msi_quirk(id, PCI_QUIRK_ENABLE_MSI_VM))) {
                        pba->pba_flags |= PCI_FLAGS_MSI_OKAY;
                        pba->pba_flags |= PCI_FLAGS_MSIX_OKAY;
        } else if (pci_has_msi_quirk(id, PCI_QUIRK_DISABLE_MSI)) {
                pba->pba_flags &= ~PCI_FLAGS_MSI_OKAY;
                pba->pba_flags &= ~PCI_FLAGS_MSIX_OKAY;
                aprint_verbose("\n");
                aprint_verbose_dev(self,
                    "This pci host supports neither MSI nor MSI-X.");
        } else if (pci_has_msi_quirk(id, PCI_QUIRK_DISABLE_MSIX)) {
                pba->pba_flags |= PCI_FLAGS_MSI_OKAY;
                pba->pba_flags &= ~PCI_FLAGS_MSIX_OKAY;
                aprint_verbose("\n");
                aprint_verbose_dev(self,
                    "This pci host does not support MSI-X.");
#if NACPICA > 0
        } else if (acpi_active &&
                   AcpiGbl_FADT.Header.Revision >= 4 &&
                   (AcpiGbl_FADT.BootFlags & ACPI_FADT_NO_MSI) != 0) {
                pba->pba_flags &= ~PCI_FLAGS_MSI_OKAY;
                pba->pba_flags &= ~PCI_FLAGS_MSIX_OKAY;
                aprint_verbose("\n");
                aprint_verbose_dev(self,
                    "MSI support disabled via ACPI IAPC_BOOT_ARCH flag.\n");
#endif
        } else {
                pba->pba_flags |= PCI_FLAGS_MSI_OKAY;
                pba->pba_flags |= PCI_FLAGS_MSIX_OKAY;
        }

        /*
         * Don't enable MSI on a HyperTransport bus.  In order to
         * determine that bus 0 is a HyperTransport bus, we look at
         * device 24 function 0, which is the HyperTransport
         * host/primary interface integrated on most 64-bit AMD CPUs.
         * If that device has a HyperTransport capability, bus 0 must
         * be a HyperTransport bus and we disable MSI.
         */
        if (24 < pci_bus_maxdevs(pc, 0)) {
                tag = pci_make_tag(pc, 0, 24, 0);
                if (pci_get_capability(pc, tag, PCI_CAP_LDT, NULL, NULL)) {
                        pba->pba_flags &= ~PCI_FLAGS_MSI_OKAY;
                        pba->pba_flags &= ~PCI_FLAGS_MSIX_OKAY;
                }
        }

#endif /* __HAVE_PCI_MSI_MSIX */
}

int
pci_bus_maxdevs(pci_chipset_tag_t pc, int busno)
{
        /*
         * Bus number is irrelevant.  If Configuration Mechanism 2 is in
         * use, can only have devices 0-15 on any bus.  If Configuration
         * Mechanism 1 is in use, can have devices 0-32 (i.e. the `normal'
         * range).
         */
        if (pci_mode == 2)
                return (16);
        else
                return (32);
}

pcitag_t
pci_make_tag(pci_chipset_tag_t pc, int bus, int device, int function)
{
        pci_chipset_tag_t ipc;
        pcitag_t tag;

        for (ipc = pc; ipc != NULL; ipc = ipc->pc_super) {
                if ((ipc->pc_present & PCI_OVERRIDE_MAKE_TAG) == 0)
                        continue;
                return (*ipc->pc_ov->ov_make_tag)(ipc->pc_ctx,
                    pc, bus, device, function);
        }

        switch (pci_mode) {
        case 1:
                if (bus >= 256 || device >= 32 || function >= 8)
                        panic("%s: bad request(%d, %d, %d)", __func__,
                            bus, device, function);

                tag.mode1 = PCI_MODE1_ENABLE |
                            (bus << 16) | (device << 11) | (function << 8);
                return tag;
        case 2:
                if (bus >= 256 || device >= 16 || function >= 8)
                        panic("%s: bad request(%d, %d, %d)", __func__,
                            bus, device, function);

                tag.mode2.port = 0xc000 | (device << 8);
                tag.mode2.enable = 0xf0 | (function << 1);
                tag.mode2.forward = bus;
                return tag;
        default:
                panic("%s: mode %d not configured", __func__, pci_mode);
        }
}

void
pci_decompose_tag(pci_chipset_tag_t pc, pcitag_t tag,
    int *bp, int *dp, int *fp)
{
        pci_chipset_tag_t ipc;

        for (ipc = pc; ipc != NULL; ipc = ipc->pc_super) {
                if ((ipc->pc_present & PCI_OVERRIDE_DECOMPOSE_TAG) == 0)
                        continue;
                (*ipc->pc_ov->ov_decompose_tag)(ipc->pc_ctx,
                    pc, tag, bp, dp, fp);
                return;
        }

        switch (pci_mode) {
        case 1:
                if (bp != NULL)
                        *bp = (tag.mode1 >> 16) & 0xff;
                if (dp != NULL)
                        *dp = (tag.mode1 >> 11) & 0x1f;
                if (fp != NULL)
                        *fp = (tag.mode1 >> 8) & 0x7;
                return;
        case 2:
                if (bp != NULL)
                        *bp = tag.mode2.forward & 0xff;
                if (dp != NULL)
                        *dp = (tag.mode2.port >> 8) & 0xf;
                if (fp != NULL)
                        *fp = (tag.mode2.enable >> 1) & 0x7;
                return;
        default:
                panic("%s: mode %d not configured", __func__, pci_mode);
        }
}

pcireg_t
pci_conf_read(pci_chipset_tag_t pc, pcitag_t tag, int reg)
{
        pci_chipset_tag_t ipc;
        pcireg_t data;
        struct pci_conf_lock ocl;
        int dev;

        KASSERT((reg & 0x3) == 0);

        for (ipc = pc; ipc != NULL; ipc = ipc->pc_super) {
                if ((ipc->pc_present & PCI_OVERRIDE_CONF_READ) == 0)
                        continue;
                return (*ipc->pc_ov->ov_conf_read)(ipc->pc_ctx, pc, tag, reg);
        }

        pci_decompose_tag(pc, tag, NULL, &dev, NULL);
        if (__predict_false(pci_mode == 2 && dev >= 16))
                return (pcireg_t) -1;

        if (reg < 0)
                return (pcireg_t) -1;
        if (reg >= PCI_CONF_SIZE) {
#if NACPICA > 0 && !defined(NO_PCI_EXTENDED_CONFIG)
                if (reg >= PCI_EXTCONF_SIZE)
                        return (pcireg_t) -1;
                acpimcfg_conf_read(pc, tag, reg, &data);
                return data;
#else
                return (pcireg_t) -1;
#endif
        }

        pci_conf_lock(&ocl, pci_conf_selector(tag, reg));
        data = inl(pci_conf_port(tag, reg));
        pci_conf_unlock(&ocl);
        return data;
}

void
pci_conf_write(pci_chipset_tag_t pc, pcitag_t tag, int reg, pcireg_t data)
{
        pci_chipset_tag_t ipc;
        struct pci_conf_lock ocl;
        int dev;

        KASSERT((reg & 0x3) == 0);

        for (ipc = pc; ipc != NULL; ipc = ipc->pc_super) {
                if ((ipc->pc_present & PCI_OVERRIDE_CONF_WRITE) == 0)
                        continue;
                (*ipc->pc_ov->ov_conf_write)(ipc->pc_ctx, pc, tag, reg,
                    data);
                return;
        }

        pci_decompose_tag(pc, tag, NULL, &dev, NULL);
        if (__predict_false(pci_mode == 2 && dev >= 16)) {
                return;
        }

        if (reg < 0)
                return;
        if (reg >= PCI_CONF_SIZE) {
#if NACPICA > 0 && !defined(NO_PCI_EXTENDED_CONFIG)
                if (reg >= PCI_EXTCONF_SIZE)
                        return;
                acpimcfg_conf_write(pc, tag, reg, data);
#endif
                return;
        }

        pci_conf_lock(&ocl, pci_conf_selector(tag, reg));
        outl(pci_conf_port(tag, reg), data);
        pci_conf_unlock(&ocl);
}

#ifdef XENPV
void
pci_conf_write16(pci_chipset_tag_t pc, pcitag_t tag, int reg, uint16_t data)
{
        pci_chipset_tag_t ipc;
        struct pci_conf_lock ocl;
        int dev;

        KASSERT((reg & 0x1) == 0);

        for (ipc = pc; ipc != NULL; ipc = ipc->pc_super) {
                if ((ipc->pc_present & PCI_OVERRIDE_CONF_WRITE) == 0)
                        continue;
                panic("pci_conf_write16 and override");
        }

        pci_decompose_tag(pc, tag, NULL, &dev, NULL);
        if (__predict_false(pci_mode == 2 && dev >= 16)) {
                return;
        }

        if (reg < 0)
                return;
        if (reg >= PCI_CONF_SIZE) {
#if NACPICA > 0 && !defined(NO_PCI_EXTENDED_CONFIG)
                if (reg >= PCI_EXTCONF_SIZE)
                        return;
                panic("pci_conf_write16 and reg >= PCI_CONF_SIZE");
#endif
                return;
        }

        pci_conf_lock(&ocl, pci_conf_selector(tag, reg & ~0x3));
        outl(pci_conf_port(tag, reg & ~0x3) + (reg & 0x3), data);
        pci_conf_unlock(&ocl);
}
#endif /* XENPV */

void
pci_mode_set(int mode)
{
        KASSERT(pci_mode == -1 || pci_mode == mode);

        pci_mode = mode;
}

int
pci_mode_detect(void)
{
        uint32_t sav, val;
        int i;
        pcireg_t idreg;

        if (pci_mode != -1)
                return pci_mode;

        /*
         * We try to divine which configuration mode the host bridge wants.
         */

        sav = inl(PCI_MODE1_ADDRESS_REG);

        pci_mode = 1; /* assume this for now */
        /*
         * catch some known buggy implementations of mode 1
         */
        for (i = 0; i < __arraycount(pcim1_quirk_tbl); i++) {
                pcitag_t t;

                if (PCI_VENDOR(pcim1_quirk_tbl[i].id) == PCI_VENDOR_INVALID)
                        continue;
                t.mode1 = pcim1_quirk_tbl[i].tag.mode1;
                idreg = pci_conf_read(NULL, t, PCI_ID_REG); /* needs "pci_mode" */
                if (idreg == pcim1_quirk_tbl[i].id) {
#ifdef DEBUG
                        printf("%s: known mode 1 PCI chipset (%08x)\n",
                            __func__, idreg);
#endif
                        return (pci_mode);
                }
        }

#if 0
        extern char cpu_brand_string[];
        const char *reason, *system_vendor, *system_product;
        if (memcmp(cpu_brand_string, "QEMU", 4) == 0)
                /* PR 45671, https://bugs.launchpad.net/qemu/+bug/897771 */
                reason = "QEMU";
        else if ((system_vendor = pmf_get_platform("system-vendor")) != NULL &&
            strcmp(system_vendor, "Xen") == 0 &&
            (system_product = pmf_get_platform("system-product")) != NULL &&
            strcmp(system_product, "HVM domU") == 0)
                reason = "Xen";
        else
                reason = NULL;

        if (reason) {
#ifdef DEBUG
                printf("%s: forcing PCI mode 1 for %s\n", __func__, reason);
#endif
                return (pci_mode);
        }
#endif
        /*
         * Strong check for standard compliant mode 1:
         * 1. bit 31 ("enable") can be set
         * 2. byte/word access does not affect register
         */
        outl(PCI_MODE1_ADDRESS_REG, PCI_MODE1_ENABLE);
        outb(PCI_MODE1_ADDRESS_REG + 3, 0);
        outw(PCI_MODE1_ADDRESS_REG + 2, 0);
        val = inl(PCI_MODE1_ADDRESS_REG);
        if ((val & 0x80fffffc) != PCI_MODE1_ENABLE) {
#ifdef DEBUG
                printf("%s: mode 1 enable failed (%x)\n", __func__, val);
#endif
                /* Try out mode 1 to see if we can find a host bridge. */
                if (pci_mode_check() == 0) {
#ifdef DEBUG
                        printf("%s: mode 1 functional, using\n", __func__);
#endif
                        return (pci_mode);
                }
                goto not1;
        }
        outl(PCI_MODE1_ADDRESS_REG, 0);
        val = inl(PCI_MODE1_ADDRESS_REG);
        if ((val & 0x80fffffc) != 0)
                goto not1;
        return (pci_mode);
not1:
        outl(PCI_MODE1_ADDRESS_REG, sav);

        /*
         * This mode 2 check is quite weak (and known to give false
         * positives on some Compaq machines).
         * However, this doesn't matter, because this is the
         * last test, and simply no PCI devices will be found if
         * this happens.
         */
        outb(PCI_MODE2_ENABLE_REG, 0);
        outb(PCI_MODE2_FORWARD_REG, 0);
        if (inb(PCI_MODE2_ENABLE_REG) != 0 ||
            inb(PCI_MODE2_FORWARD_REG) != 0)
                goto not2;
        return (pci_mode = 2);
not2:

        return (pci_mode = 0);
}

void
pci_device_foreach(pci_chipset_tag_t pc, int maxbus,
        void (*func)(pci_chipset_tag_t, pcitag_t, void *), void *context)
{
        pci_device_foreach_min(pc, 0, maxbus, func, context);
}

void
pci_device_foreach_min(pci_chipset_tag_t pc, int minbus, int maxbus,
        void (*func)(pci_chipset_tag_t, pcitag_t, void *), void *context)
{
        const struct pci_quirkdata *qd;
        int bus, device, function, maxdevs, nfuncs;
        pcireg_t id, bhlcr;
        pcitag_t tag;

        for (bus = minbus; bus <= maxbus; bus++) {
                maxdevs = pci_bus_maxdevs(pc, bus);
                for (device = 0; device < maxdevs; device++) {
                        tag = pci_make_tag(pc, bus, device, 0);
                        id = pci_conf_read(pc, tag, PCI_ID_REG);

                        /* Invalid vendor ID value? */
                        if (PCI_VENDOR(id) == PCI_VENDOR_INVALID)
                                continue;
                        /* XXX Not invalid, but we've done this ~forever. */
                        if (PCI_VENDOR(id) == 0)
                                continue;

                        qd = pci_lookup_quirkdata(PCI_VENDOR(id),
                                PCI_PRODUCT(id));

                        bhlcr = pci_conf_read(pc, tag, PCI_BHLC_REG);
                        if (PCI_HDRTYPE_MULTIFN(bhlcr) ||
                             (qd != NULL &&
                             (qd->quirks & PCI_QUIRK_MULTIFUNCTION) != 0))
                                nfuncs = 8;
                        else
                                nfuncs = 1;

                        for (function = 0; function < nfuncs; function++) {
                                tag = pci_make_tag(pc, bus, device, function);
                                id = pci_conf_read(pc, tag, PCI_ID_REG);

                                /* Invalid vendor ID value? */
                                if (PCI_VENDOR(id) == PCI_VENDOR_INVALID)
                                        continue;
                                /*
                                 * XXX Not invalid, but we've done this
                                 * ~forever.
                                 */
                                if (PCI_VENDOR(id) == 0)
                                        continue;
                                (*func)(pc, tag, context);
                        }
                }
        }
}

void
pci_bridge_foreach(pci_chipset_tag_t pc, int minbus, int maxbus,
        void (*func)(pci_chipset_tag_t, pcitag_t, void *), void *ctx)
{
        struct pci_bridge_hook_arg bridge_hook;

        bridge_hook.func = func;
        bridge_hook.arg = ctx;

        pci_device_foreach_min(pc, minbus, maxbus, pci_bridge_hook,
                &bridge_hook);
}

static void
pci_bridge_hook(pci_chipset_tag_t pc, pcitag_t tag, void *ctx)
{
        struct pci_bridge_hook_arg *bridge_hook = (void *)ctx;
        pcireg_t reg;

        reg = pci_conf_read(pc, tag, PCI_CLASS_REG);
        if (PCI_CLASS(reg) == PCI_CLASS_BRIDGE &&
            (PCI_SUBCLASS(reg) == PCI_SUBCLASS_BRIDGE_PCI ||
                PCI_SUBCLASS(reg) == PCI_SUBCLASS_BRIDGE_CARDBUS)) {
                (*bridge_hook->func)(pc, tag, bridge_hook->arg);
        }
}

static const void *
bit_to_function_pointer(const struct pci_overrides *ov, uint64_t bit)
{
        switch (bit) {
        case PCI_OVERRIDE_CONF_READ:
                return ov->ov_conf_read;
        case PCI_OVERRIDE_CONF_WRITE:
                return ov->ov_conf_write;
        case PCI_OVERRIDE_INTR_MAP:
                return ov->ov_intr_map;
        case PCI_OVERRIDE_INTR_STRING:
                return ov->ov_intr_string;
        case PCI_OVERRIDE_INTR_EVCNT:
                return ov->ov_intr_evcnt;
        case PCI_OVERRIDE_INTR_ESTABLISH:
                return ov->ov_intr_establish;
        case PCI_OVERRIDE_INTR_DISESTABLISH:
                return ov->ov_intr_disestablish;
        case PCI_OVERRIDE_MAKE_TAG:
                return ov->ov_make_tag;
        case PCI_OVERRIDE_DECOMPOSE_TAG:
                return ov->ov_decompose_tag;
        default:
                return NULL;
        }
}

void
pci_chipset_tag_destroy(pci_chipset_tag_t pc)
{
        kmem_free(pc, sizeof(struct pci_chipset_tag));
}

int
pci_chipset_tag_create(pci_chipset_tag_t opc, const uint64_t present,
    const struct pci_overrides *ov, void *ctx, pci_chipset_tag_t *pcp)
{
        uint64_t bit, bits, nbits;
        pci_chipset_tag_t pc;
        const void *fp;

        if (ov == NULL || present == 0)
                return EINVAL;

        pc = kmem_alloc(sizeof(struct pci_chipset_tag), KM_SLEEP);
        pc->pc_super = opc;

        for (bits = present; bits != 0; bits = nbits) {
                nbits = bits & (bits - 1);
                bit = nbits ^ bits;
                if ((fp = bit_to_function_pointer(ov, bit)) == NULL) {
#ifdef DEBUG
                        printf("%s: missing bit %" PRIx64 "\n", __func__, bit);
#endif
                        goto einval;
                }
        }

        pc->pc_ov = ov;
        pc->pc_present = present;
        pc->pc_ctx = ctx;

        *pcp = pc;

        return 0;
einval:
        kmem_free(pc, sizeof(struct pci_chipset_tag));
        return EINVAL;
}

static void
x86_genfb_set_mapreg(void *opaque, int index, int r, int g, int b)
{
        outb(IO_VGA + VGA_DAC_ADDRW, index);
        outb(IO_VGA + VGA_DAC_PALETTE, (uint8_t)r >> 2);
        outb(IO_VGA + VGA_DAC_PALETTE, (uint8_t)g >> 2);
        outb(IO_VGA + VGA_DAC_PALETTE, (uint8_t)b >> 2);
}

static bool
x86_genfb_setmode(struct genfb_softc *sc, int newmode)
{
#if NGENFB > 0
# if NACPICA > 0 && defined(VGA_POST)
        static int curmode = WSDISPLAYIO_MODE_EMUL;
# endif

        switch (newmode) {
        case WSDISPLAYIO_MODE_EMUL:
# if NACPICA > 0 && defined(VGA_POST)
                if (curmode != newmode) {
                        if (vga_posth != NULL && acpi_md_vesa_modenum != 0) {
                                vga_post_set_vbe(vga_posth,
                                    acpi_md_vesa_modenum);
                        }
                }
# endif
                break;
        }

# if NACPICA > 0 && defined(VGA_POST)
        curmode = newmode;
# endif
#endif
        return true;
}

static bool
x86_genfb_suspend(device_t dev, const pmf_qual_t *qual)
{
        return true;
}

static bool
x86_genfb_resume(device_t dev, const pmf_qual_t *qual)
{
#if NGENFB > 0
        struct pci_genfb_softc *psc = device_private(dev);

#if NACPICA > 0 && defined(VGA_POST)
        if (vga_posth != NULL && acpi_md_vbios_reset == 2) {
                vga_post_call(vga_posth);
                if (acpi_md_vesa_modenum != 0)
                        vga_post_set_vbe(vga_posth, acpi_md_vesa_modenum);
        }
#endif
        genfb_restore_palette(&psc->sc_gen);
#endif

        return true;
}

static void
populate_fbinfo(device_t dev, prop_dictionary_t dict)
{
#if NWSDISPLAY > 0 && NGENFB > 0
        extern struct vcons_screen x86_genfb_console_screen;
        struct rasops_info *ri = &x86_genfb_console_screen.scr_ri;
#endif
        const void *fbptr = lookup_bootinfo(BTINFO_FRAMEBUFFER);
        struct btinfo_framebuffer fbinfo;

        if (fbptr == NULL)
                return;

        memcpy(&fbinfo, fbptr, sizeof(fbinfo));

        if (fbinfo.physaddr != 0) {
                prop_dictionary_set_uint32(dict, "width", fbinfo.width);
                prop_dictionary_set_uint32(dict, "height", fbinfo.height);
                prop_dictionary_set_uint8(dict, "depth", fbinfo.depth);
                prop_dictionary_set_uint16(dict, "linebytes", fbinfo.stride);

                prop_dictionary_set_uint64(dict, "address", fbinfo.physaddr);
#if NWSDISPLAY > 0 && NGENFB > 0
                if (ri->ri_bits != NULL) {
                        prop_dictionary_set_uint64(dict, "virtual_address",
                            ri->ri_hwbits != NULL ?
                            (vaddr_t)ri->ri_hworigbits :
                            (vaddr_t)ri->ri_origbits);
                }
#endif
        }
#if notyet
        prop_dictionary_set_bool(dict, "splash", 
            (fbinfo.flags & BI_FB_SPLASH) != 0);
#endif
        if (fbinfo.depth == 8) {
                gfb_cb.gcc_cookie = NULL;
                gfb_cb.gcc_set_mapreg = x86_genfb_set_mapreg;
                prop_dictionary_set_uint64(dict, "cmap_callback",
                    (uint64_t)(uintptr_t)&gfb_cb);
        }
        if (fbinfo.physaddr != 0) {
                mode_cb.gmc_setmode = x86_genfb_setmode;
                prop_dictionary_set_uint64(dict, "mode_callback",
                    (uint64_t)(uintptr_t)&mode_cb);
        }

#if NWSDISPLAY > 0 && NGENFB > 0
        if (device_is_a(dev, "genfb")) {
                prop_dictionary_set_bool(dict, "enable_shadowfb",
                    ri->ri_hwbits != NULL);

                x86_genfb_set_console_dev(dev);
#ifdef DDB
                db_trap_callback = x86_genfb_ddb_trap_callback;
#endif
        }
#endif
}

device_t
device_pci_register(device_t dev, void *aux)
{
        device_t parent = device_parent(dev);

        device_pci_props_register(dev, aux);

        /*
         * Handle network interfaces here, the attachment information is
         * not available driver-independently later.
         *
         * For disks, there is nothing useful available at attach time.
         */
        if (device_class(dev) == DV_IFNET) {
                struct btinfo_netif *bin = lookup_bootinfo(BTINFO_NETIF);
                if (bin == NULL)
                        return NULL;

                /*
                 * We don't check the driver name against the device name
                 * passed by the boot ROM.  The ROM should stay usable if
                 * the driver becomes obsolete.  The physical attachment
                 * information (checked below) must be sufficient to
                 * identify the device.
                 */
                if (bin->bus == BI_BUS_PCI && device_is_a(parent, "pci")) {
                        struct pci_attach_args *paa = aux;
                        int b, d, f;

                        /*
                         * Calculate BIOS representation of:
                         *
                         *        <bus,device,function>
                         *
                         * and compare.
                         */
                        pci_decompose_tag(paa->pa_pc, paa->pa_tag, &b, &d, &f);
                        if (bin->addr.tag == ((b << 8) | (d << 3) | f))
                                return dev;

#ifndef XENPV
                        /*
                         * efiboot reports parent ppb bus/device/function.
                         */
                        device_t grand = device_parent(parent);
                        if (efi_probe() && grand && device_is_a(grand, "ppb")) {
                                struct ppb_softc *ppb_sc = device_private(grand);
                                pci_decompose_tag(ppb_sc->sc_pc, ppb_sc->sc_tag,
                                    &b, &d, &f);
                                if (bin->addr.tag == ((b << 8) | (d << 3) | f))
                                        return dev;
                        }
#endif
                }
        }
        if (parent && device_is_a(parent, "pci") &&
            x86_found_console == false) {
                struct pci_attach_args *pa = aux;

                if (PCI_CLASS(pa->pa_class) == PCI_CLASS_DISPLAY) {
                        prop_dictionary_t dict = device_properties(dev);
                        /*
                         * framebuffer drivers other than genfb can work
                         * without the address property
                         */
                        populate_fbinfo(dev, dict);

#if 1 && NWSDISPLAY > 0 && NGENFB > 0
                        /* XXX */
                        if (device_is_a(dev, "genfb")) {
                                prop_dictionary_set_bool(dict, "is_console",
                                    genfb_is_console());
                        } else
#endif
                        prop_dictionary_set_bool(dict, "is_console", true);

                        prop_dictionary_set_bool(dict, "clear-screen", false);
#if NWSDISPLAY > 0 && NGENFB > 0
                        extern struct vcons_screen x86_genfb_console_screen;
                        prop_dictionary_set_uint16(dict, "cursor-row",
                            x86_genfb_console_screen.scr_ri.ri_crow);
#endif
#if notyet
                        prop_dictionary_set_bool(dict, "splash",
                            (fbinfo->flags & BI_FB_SPLASH) != 0);
#endif
                        pmf_cb.gpc_suspend = x86_genfb_suspend;
                        pmf_cb.gpc_resume = x86_genfb_resume;
                        prop_dictionary_set_uint64(dict,
                            "pmf_callback", (uint64_t)(uintptr_t)&pmf_cb);
#ifdef VGA_POST
                        vga_posth = vga_post_init(pa->pa_bus, pa->pa_device,
                            pa->pa_function);
#endif
                        x86_found_console = true;
                        return NULL;
                }
        }
        return NULL;
}

#ifndef PUC_CNBUS
#define PUC_CNBUS 0
#endif

#if NCOM > 0
int
cpu_puc_cnprobe(struct consdev *cn, struct pci_attach_args *pa)
{
        pci_mode_detect();
        pa->pa_iot = x86_bus_space_io;
        pa->pa_memt = x86_bus_space_mem;
        pa->pa_pc = 0;
        pa->pa_tag = pci_make_tag(0, PUC_CNBUS, pci_bus_maxdevs(NULL, 0) - 1,
                                  0);

        return 0;
}
#endif






























































































































    6 









































































    4 









    4 









    4 































    4 








    4 







    3 












    4 
    1 









    4 









    4 




    4 
    4 





    4 
    4 




    4 










    6 

























    6 









    6 
    6 
    6 

    6 























































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
/*        $NetBSD: nist_hash_drbg.c,v 1.3 2019/09/19 18:29:55 riastradh Exp $        */

/*-
 * Copyright (c) 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * This file implements Hash_DRBG, a `deterministic random bit
 * generator' (more commonly known in lay terms and in the cryptography
 * literature as a pseudorandom bit generator or pseudorandom number
 * generator), described in
 *
 *        Elaine Barker and John Kelsey, `Recommendation for Random
 *        Number Generation Using Deterministic Random Bit Generators',
 *        NIST SP800-90A, June 2015.
 *
 * This code is meant to work in userland or in kernel.  For a test
 * program, compile with -DNIST_HASH_DRBG_MAIN to define a `main'
 * function; for verbose debugging output, compile with
 * -DNIST_HASH_DRBG_DEBUG, mainly useful if you need to change
 * something and have to diagnose what's wrong with the known-answer
 * tests.
 */

#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: nist_hash_drbg.c,v 1.3 2019/09/19 18:29:55 riastradh Exp $");
#endif

#include <sys/param.h>
#include <sys/types.h>
#include <sys/sha2.h>

#ifdef _KERNEL
#include <sys/systm.h>                        /* memcpy */
#include <lib/libkern/libkern.h>        /* KASSERT */
#define        ASSERT                KASSERT
#else
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#define        ASSERT                assert
#define        CTASSERT        __CTASSERT
#endif

#include "nist_hash_drbg.h"

#define        secret        /* must not use in variable-time operations; should zero */
#define        arraycount(A)        (sizeof(A)/sizeof(A[0]))

CTASSERT(0 < NIST_HASH_DRBG_RESEED_INTERVAL);
CTASSERT(NIST_HASH_DRBG_RESEED_INTERVAL <= INT_MAX);
CTASSERT(NIST_HASH_DRBG_RESEED_INTERVAL <= ~(~0ull << 48));

/* Instantiation: SHA-256 */
#define        HASH_LENGTH        SHA256_DIGEST_LENGTH
#define        HASH_CTX        SHA256_CTX
#define        hash_init        SHA256_Init
#define        hash_update        SHA256_Update
#define        hash_final        SHA256_Final

#define        SEEDLEN_BYTES        NIST_HASH_DRBG_SEEDLEN_BYTES

struct hvec {
        const void        *hv_base;
        size_t                hv_len;
};

static void        hashgen(secret uint8_t *, size_t,
                    const secret uint8_t[SEEDLEN_BYTES]);
static void        add8(secret uint8_t *, size_t, const secret uint8_t *, size_t);
static void        hash_df(secret void *, size_t, const struct hvec *, size_t);
static void        hash_df_block(secret void *, uint8_t, uint8_t[4],
                    const struct hvec *, size_t);

/* 10.1.1 Hash_DRBG */

int
nist_hash_drbg_destroy(struct nist_hash_drbg *D)
{

        explicit_memset(D, 0, sizeof(*D));
        D->reseed_counter = UINT_MAX; /* paranoia: make generate fail */

        /* Always return zero for hysterical raisins.  (XXX) */
        return 0;
}

/* 10.1.1.2 Instantiation of Hash_DRBG */

int
nist_hash_drbg_instantiate(secret struct nist_hash_drbg *D,
    const secret void *entropy, size_t entropylen,
    const void *nonce, size_t noncelen,
    const void *personalization, size_t personalizationlen)
{
        /*
         * 1. seed_material = entropy_input || nonce || personalization_string
         */
        const struct hvec seed_material[] = {
                { .hv_base = entropy, .hv_len = entropylen },
                { .hv_base = nonce, .hv_len = noncelen },
                { .hv_base = personalization, .hv_len = personalizationlen },
        };

        /*
         * 2. seed = Hash_df(seed_material, seedlen)
         * 3. V = seed
         */
        CTASSERT(sizeof D->V == SEEDLEN_BYTES);
        hash_df(D->V, sizeof D->V, seed_material, arraycount(seed_material));

        /* 4. C = Hash_df((0x00 || V), seedlen) */
        const struct hvec hv[] = {
                { .hv_base = (const uint8_t[]) {0x00}, .hv_len = 1 },
                { .hv_base = D->V, .hv_len = sizeof D->V },
        };
        CTASSERT(sizeof D->C == SEEDLEN_BYTES);
        hash_df(D->C, sizeof D->C, hv, arraycount(hv));

        /* 5. reseed_counter = 1 */
        D->reseed_counter = 1;

        /* Always return zero for hysterical raisins.  (XXX) */
        return 0;
}

/* 10.1.1.3 Reseeding a Hash_DRBG Instantiation */

int
nist_hash_drbg_reseed(secret struct nist_hash_drbg *D,
    const secret void *entropy, size_t entropylen,
    const void *additional, size_t additionallen)
{
        /* 1. seed_material = 0x01 || V || entropy_input || additional_input */
        const struct hvec seed_material[] = {
                { .hv_base = (const uint8_t[]) {0x01}, .hv_len = 1 },
                { .hv_base = D->V, .hv_len = sizeof D->V },
                { .hv_base = entropy, .hv_len = entropylen },
                { .hv_base = additional, .hv_len = additionallen },
        };
        uint8_t seed[SEEDLEN_BYTES];

        /*
         * 2. seed = Hash_df(seed_material, seedlen)
         * 3. V = seed
         */
        CTASSERT(sizeof D->V == SEEDLEN_BYTES);
        hash_df(seed, sizeof seed, seed_material, arraycount(seed_material));
        memcpy(D->V, seed, sizeof D->V);

        /* 3. C = Hash_df((0x00 || V), seedlen) */
        const struct hvec hv[] = {
                { .hv_base = (const uint8_t[]) {0x00}, .hv_len = 1 },
                { .hv_base = D->V, .hv_len = sizeof D->V },
        };
        CTASSERT(sizeof D->C == SEEDLEN_BYTES);
        hash_df(D->C, sizeof D->C, hv, arraycount(hv));

        /* 5. reseed_counter = 1 */
        D->reseed_counter = 1;

        /* Always return zero for hysterical raisins.  (XXX) */
        return 0;
}

/* 10.1.1.4 Generating Pseudorandom Bits Using Hash_DRBG */

int
nist_hash_drbg_generate(secret struct nist_hash_drbg *D,
    secret void *output, size_t outputlen,
    const void *additional, size_t additionallen)
{
        secret HASH_CTX ctx;
        secret uint8_t H[HASH_LENGTH];
        uint8_t reseed_counter[4];

        ASSERT(outputlen <= NIST_HASH_DRBG_MAX_REQUEST_BYTES);

        /*
         * 1. If reseed_counter > reseed_interval, then return an
         * indication that a reseed is required.
         */
        if (D->reseed_counter > NIST_HASH_DRBG_RESEED_INTERVAL)
                return 1;

        /* 2. If (additional_input != Null), then do: */
        if (additionallen) {
                /* 2.1 w = Hash(0x02 || V || additional_input) */
                secret uint8_t w[HASH_LENGTH];

                hash_init(&ctx);
                hash_update(&ctx, (const uint8_t[]) {0x02}, 1);
                hash_update(&ctx, D->V, sizeof D->V);
                hash_update(&ctx, additional, additionallen);
                hash_final(w, &ctx);

                /* 2.2 V = (V + w) mod 2^seedlen */
                add8(D->V, sizeof D->V, w, sizeof w);

                explicit_memset(w, 0, sizeof w);
        }

        /* 3. (returned_bits) = Hashgen(requested_number_of_bits, V) */
        hashgen(output, outputlen, D->V);

        /* 4. H = Hash(0x03 || V) */
        hash_init(&ctx);
        hash_update(&ctx, (const uint8_t[]) {0x03}, 1);
        hash_update(&ctx, D->V, sizeof D->V);
        hash_final(H, &ctx);

        /* 5. V = (V + H + C + reseed_counter) mod 2^seedlen */
        be32enc(reseed_counter, D->reseed_counter);
        add8(D->V, sizeof D->V, H, sizeof H);
        add8(D->V, sizeof D->V, D->C, sizeof D->C);
        add8(D->V, sizeof D->V, reseed_counter, sizeof reseed_counter);

        /* 6. reseed_counter = reseed_counter + 1 */
        D->reseed_counter++;

        explicit_memset(&ctx, 0, sizeof ctx);
        explicit_memset(H, 0, sizeof H);

        /* 7. Return SUCCESS, ... */
        return 0;
}

/*
 * p := H(V) || H(V + 1) || H(V + 2) || ...
 */
static void
hashgen(secret uint8_t *p, size_t n, const secret uint8_t V[SEEDLEN_BYTES])
{
        secret uint8_t data[SEEDLEN_BYTES];
        secret HASH_CTX ctx;

        /* Save a copy so that we can increment it.  */
        memcpy(data, V, SEEDLEN_BYTES);

        /* Generate block by block into p directly.  */
        while (HASH_LENGTH <= n) {
                hash_init(&ctx);
                hash_update(&ctx, data, SEEDLEN_BYTES);
                hash_final(p, &ctx);

                p += HASH_LENGTH;
                n -= HASH_LENGTH;
                add8(data, sizeof data, (const uint8_t[]) {1}, 1);
        }

        /*
         * If any partial block requested, generate a full block and
         * copy the part we need.
         */
        if (n) {
                secret uint8_t t[HASH_LENGTH];

                hash_init(&ctx);
                hash_update(&ctx, data, SEEDLEN_BYTES);
                hash_final(t, &ctx);

                memcpy(p, t, n);
                explicit_memset(t, 0, sizeof t);
        }

        explicit_memset(data, 0, sizeof data);
        explicit_memset(&ctx, 0, sizeof ctx);
}

/*
 * s := s + a (big-endian, radix-2^8)
 */
static void
add8(secret uint8_t *s, size_t slen, const secret uint8_t *a, size_t alen)
{
        const size_t smax = slen - 1, amax = alen - 1;
        size_t i;
        secret unsigned c = 0;

        /* 2^8 c + s_i := s_i + a_i + c */
        for (i = 0; i < MIN(slen, alen); i++) {
                c += s[smax - i] + a[amax - i];
                s[smax - i] = c & 0xff;
                c >>= 8;
        }

        /* 2^8 c + s_i := s_i + c */
        for (; i < slen; i++) {
                c += s[smax - i];
                s[smax - i] = c & 0xff;
                c >>= 8;
        }

        explicit_memset(&c, 0, sizeof c);
}

/* 10.4.1 Derivation Function Using a Hash Function (Hash_df) */

static void
hash_df(void *h, size_t hlen, const struct hvec *input, size_t inputlen)
{
        uint8_t *p = h;
        size_t n = hlen;
        uint8_t counter = 1;
        uint8_t hbits[4];

        ASSERT(hlen <= 255*HASH_LENGTH);
        ASSERT(hlen <= UINT32_MAX/8);
        be32enc(hbits, 8*hlen);

        while (HASH_LENGTH <= n) {
                hash_df_block(p, counter++, hbits, input, inputlen);
                p += HASH_LENGTH;
                n -= HASH_LENGTH;
        }

        if (n) {
                secret uint8_t t[HASH_LENGTH];

                hash_df_block(t, counter, hbits, input, inputlen);
                memcpy(p, t, n);

                explicit_memset(t, 0, sizeof t);
        }
}

static void
hash_df_block(secret void *h, uint8_t counter, uint8_t hbits[4],
    const struct hvec *input, size_t inputlen)
{
        secret HASH_CTX ctx;
        size_t i;

        /*
         * Hash_df Process, step 4.1:
         * Hash(counter || no_of_bits_to_return || input_string)
         */
        hash_init(&ctx);
        hash_update(&ctx, &counter, 1);
        hash_update(&ctx, hbits, 4);
        for (i = 0; i < inputlen; i++) {
                if (input[i].hv_len)
                        hash_update(&ctx, input[i].hv_base, input[i].hv_len);
        }
        hash_final(h, &ctx);

        explicit_memset(&ctx, 0, sizeof ctx);
}

/*
 * Known-answer test vectors for Hash_DRBG with SHA-256
 */

/* Hash_DRBG.PDF, p. 190 */
static const uint8_t kat_entropy[3][SEEDLEN_BYTES] = {
        [0] = {
                0x00,0x01,0x02,0x03, 0x04,0x05,0x06,0x07,
                0x08,0x09,0x0a,0x0b, 0x0c,0x0d,0x0e,0x0f,
                0x10,0x11,0x12,0x13, 0x14,0x15,0x16,0x17,
                0x18,0x19,0x1a,0x1b, 0x1c,0x1d,0x1e,0x1f,
                0x20,0x21,0x22,0x23, 0x24,0x25,0x26,0x27,
                0x28,0x29,0x2a,0x2b, 0x2c,0x2d,0x2e,0x2f,
                0x30,0x31,0x32,0x33, 0x34,0x35,0x36,
        },
        [1] = {                        /* for reseed1 */
                0x80,0x81,0x82,0x83, 0x84,0x85,0x86,0x87,
                0x88,0x89,0x8a,0x8b, 0x8c,0x8d,0x8e,0x8f,
                0x90,0x91,0x92,0x93, 0x94,0x95,0x96,0x97,
                0x98,0x99,0x9a,0x9b, 0x9c,0x9d,0x9e,0x9f,
                0xa0,0xa1,0xa2,0xa3, 0xa4,0xa5,0xa6,0xa7,
                0xa8,0xa9,0xaa,0xab, 0xac,0xad,0xae,0xaf,
                0xb0,0xb1,0xb2,0xb3, 0xb4,0xb5,0xb6,
        },
        [2] = {                        /* for reseed2 */
                0xc0,0xc1,0xc2,0xc3, 0xc4,0xc5,0xc6,0xc7,
                0xc8,0xc9,0xca,0xcb, 0xcc,0xcd,0xce,0xcf,
                0xd0,0xd1,0xd2,0xd3, 0xd4,0xd5,0xd6,0xd7,
                0xd8,0xd9,0xda,0xdb, 0xdc,0xdd,0xde,0xdf,
                0xe0,0xe1,0xe2,0xe3, 0xe4,0xe5,0xe6,0xe7,
                0xe8,0xe9,0xea,0xeb, 0xec,0xed,0xee,0xef,
                0xf0,0xf1,0xf2,0xf3, 0xf4,0xf5,0xf6,
        },
};

static const uint8_t kat_nonce[] = {
        0x20,0x21,0x22,0x23, 0x24,0x25,0x26,0x27,
};

static const struct hvec kat_zero = { .hv_base = 0, .hv_len = 0 };

static const struct hvec kat_personalization = {
        .hv_len = 55,
        .hv_base = (const void *)(const uint8_t[]) { /* p. 208 */
                0x40,0x41,0x42,0x43, 0x44,0x45,0x46,0x47,
                0x48,0x49,0x4a,0x4b, 0x4c,0x4d,0x4e,0x4f,
                0x50,0x51,0x52,0x53, 0x54,0x55,0x56,0x57,
                0x58,0x59,0x5a,0x5b, 0x5c,0x5d,0x5e,0x5f,
                0x60,0x61,0x62,0x63, 0x64,0x65,0x66,0x67,
                0x68,0x69,0x6a,0x6b, 0x6c,0x6d,0x6e,0x6f,
                0x70,0x71,0x72,0x73, 0x74,0x75,0x76,
        },
};

static const struct hvec *const kat_no_additional[] = {
        [0] = &kat_zero,
        [1] = &kat_zero,
};

static const struct hvec *const kat_additional[] = {
        [0] = &(const struct hvec) {
                .hv_len = 55,
                .hv_base = (const void *)(const uint8_t[]) {
                        0x60,0x61,0x62,0x63, 0x64,0x65,0x66,0x67,
                        0x68,0x69,0x6a,0x6b, 0x6c,0x6d,0x6e,0x6f,
                        0x70,0x71,0x72,0x73, 0x74,0x75,0x76,0x77,
                        0x78,0x79,0x7a,0x7b, 0x7c,0x7d,0x7e,0x7f,
                        0x80,0x81,0x82,0x83, 0x84,0x85,0x86,0x87,
                        0x88,0x89,0x8a,0x8b, 0x8c,0x8d,0x8e,0x8f,
                        0x90,0x91,0x92,0x93, 0x94,0x95,0x96,
                },
        },
        [1] = &(const struct hvec) {
                .hv_len = 55,
                .hv_base = (const void *)(const uint8_t[]) {
                        0xa0,0xa1,0xa2,0xa3, 0xa4,0xa5,0xa6,0xa7,
                        0xa8,0xa9,0xaa,0xab, 0xac,0xad,0xae,0xaf,
                        0xb0,0xb1,0xb2,0xb3, 0xb4,0xb5,0xb6,0xb7,
                        0xb8,0xb9,0xba,0xbb, 0xbc,0xbd,0xbe,0xbf,
                        0xc0,0xc1,0xc2,0xc3, 0xc4,0xc5,0xc6,0xc7,
                        0xc8,0xc9,0xca,0xcb, 0xcc,0xcd,0xce,0xcf,
                        0xd0,0xd1,0xd2,0xd3, 0xd4,0xd5,0xd6,
                },
        },
};

static const struct {
        const struct hvec *personalization;
        const struct hvec *const *additional;
        bool reseed;
        uint8_t C[SEEDLEN_BYTES];
        uint8_t V[3][SEEDLEN_BYTES];
        uint8_t rnd_val[2][64];
} kat[] = {
        [0] = {                        /* Hash_DRBG.pdf, p. 190 */
                .personalization = &kat_zero,
                .additional = kat_no_additional,
                .reseed = false,
                .C = {                /* p. 193 */
                        0xe1,0x5d,0xe4,0xa8, 0xe3,0xb1,0x41,0x9b,
                        0x61,0xd5,0x34,0xf1, 0x5d,0xbd,0x31,0xee,
                        0x19,0xec,0x59,0x5f, 0x8b,0x98,0x11,0x1a,
                        0x94,0xf5,0x22,0x37, 0xad,0x5d,0x66,0xf0,
                        0xcf,0xaa,0xfd,0xdc, 0x90,0x19,0x59,0x02,
                        0xe9,0x79,0xf7,0x9b, 0x65,0x35,0x7f,0xea,
                        0x85,0x99,0x8e,0x4e, 0x37,0xd2,0xc1,
                },
                .V = {
                        [0] = {                /* p. 192 */
                                0xab,0x41,0xcd,0xe4, 0x37,0xab,0x8b,0x09,
                                0x1c,0xa7,0xc5,0x75, 0x5d,0x10,0xf0,0x11,
                                0x0c,0x1d,0xbd,0x46, 0x2f,0x22,0x6c,0xfd,
                                0xab,0xfb,0xb0,0x4a, 0x8b,0xcd,0xef,0x95,
                                0x16,0x7d,0x84,0xaf, 0x64,0x12,0x8c,0x0d,
                                0x71,0xf4,0xd5,0xb8, 0xc0,0xed,0xfb,0xbe,
                                0x3d,0xf4,0x04,0x48, 0xd2,0xd8,0xe1,
                        },
                        [1] = {                /* p. 195 */
                                0x8c,0x9f,0xb2,0x8d, 0x1b,0x5c,0xcc,0xa4,
                                0x7e,0x7c,0xfa,0x66, 0xba,0xce,0x21,0xff,
                                0x26,0x0a,0x16,0xa5, 0xba,0xba,0x7f,0x14,
                                0x4e,0x75,0x79,0x36, 0x8e,0x99,0x55,0xbe,
                                0xfb,0xe7,0x00,0xee, 0xf8,0x72,0x77,0x6b,
                                0x17,0xae,0xff,0xd5, 0x3d,0x76,0xf4,0xe3,
                                0xbe,0x65,0xe8,0xc9, 0x4b,0x70,0x8f,
                        },
                        [2] = {                /* p. 197 */
                                0x6d,0xfd,0x97,0x35, 0xff,0x0e,0x0e,0x3f,
                                0xe0,0x52,0x2f,0x58, 0x18,0x8b,0x53,0xed,
                                0x3f,0xf6,0x70,0x05, 0x46,0x52,0x90,0x44,
                                0xb6,0x2b,0xe1,0x7d, 0x1b,0x1c,0x21,0xd0,
                                0x91,0xb0,0x89,0xb1, 0x77,0x47,0x95,0xdb,
                                0x14,0x22,0xa8,0x6c, 0x95,0x46,0x34,0x80,
                                0x76,0xb4,0xb6,0x21, 0xc7,0x2f,0x91,
                        },
                },
                .rnd_val = {
                        [0] = {
                                0x77,0xe0,0x5a,0x0e, 0x7d,0xc7,0x8a,0xb5,
                                0xd8,0x93,0x4d,0x5e, 0x93,0xe8,0x2c,0x06,
                                0xa0,0x7c,0x04,0xce, 0xe6,0xc9,0xc5,0x30,
                                0x45,0xee,0xb4,0x85, 0x87,0x27,0x77,0xcf,
                                0x3b,0x3e,0x35,0xc4, 0x74,0xf9,0x76,0xb8,
                                0x94,0xbf,0x30,0x1a, 0x86,0xfa,0x65,0x1f,
                                0x46,0x39,0x70,0xe8, 0x9d,0x4a,0x05,0x34,
                                0xb2,0xec,0xad,0x29, 0xec,0x04,0x4e,0x7e,
                        },
                        {
                                0x5f,0xf4,0xba,0x49, 0x3c,0x40,0xcf,0xff,
                                0x3b,0x01,0xe4,0x72, 0xc5,0x75,0x66,0x8c,
                                0xce,0x38,0x80,0xb9, 0x29,0x0b,0x05,0xbf,
                                0xed,0xe5,0xec,0x96, 0xed,0x5e,0x9b,0x28,
                                0x98,0x50,0x8b,0x09, 0xbc,0x80,0x0e,0xee,
                                0x09,0x9a,0x3c,0x90, 0x60,0x2a,0xbd,0x4b,
                                0x1d,0x4f,0x34,0x3d, 0x49,0x7c,0x60,0x55,
                                0xc8,0x7b,0xb9,0x56, 0xd5,0x3b,0xf3,0x51,
                        },
                },
        },

        [1] = {                        /* Hash_DRBG.pdf, p. 198 */
                .personalization = &kat_zero,
                .additional = kat_additional,
                .reseed = false,
                .C = {                /* p. 201 */
                        0xe1,0x5d,0xe4,0xa8, 0xe3,0xb1,0x41,0x9b,
                        0x61,0xd5,0x34,0xf1, 0x5d,0xbd,0x31,0xee,
                        0x19,0xec,0x59,0x5f, 0x8b,0x98,0x11,0x1a,
                        0x94,0xf5,0x22,0x37, 0xad,0x5d,0x66,0xf0,
                        0xcf,0xaa,0xfd,0xdc, 0x90,0x19,0x59,0x02,
                        0xe9,0x79,0xf7,0x9b, 0x65,0x35,0x7f,0xea,
                        0x85,0x99,0x8e,0x4e, 0x37,0xd2,0xc1,
                },
                .V = {
                        [0] = {        /* p. 200 */
                                0xab,0x41,0xcd,0xe4, 0x37,0xab,0x8b,0x09,
                                0x1c,0xa7,0xc5,0x75, 0x5d,0x10,0xf0,0x11,
                                0x0c,0x1d,0xbd,0x46, 0x2f,0x22,0x6c,0xfd,
                                0xab,0xfb,0xb0,0x4a, 0x8b,0xcd,0xef,0x95,
                                0x16,0x7d,0x84,0xaf, 0x64,0x12,0x8c,0x0d,
                                0x71,0xf4,0xd5,0xb8, 0xc0,0xed,0xfb,0xbe,
                                0x3d,0xf4,0x04,0x48, 0xd2,0xd8,0xe1,
                        },
                        [1] = {        /* p. 204 */
                                0x8c,0x9f,0xb2,0x8d, 0x1b,0x5c,0xcc,0xa4,
                                0x7e,0x7c,0xfa,0x66, 0xba,0xce,0x21,0xff,
                                0x26,0x0a,0x16,0xa5, 0xba,0xba,0x7f,0x1f,
                                0xd3,0x3b,0x30,0x79, 0x8f,0xb2,0x9a,0x0f,
                                0xba,0x66,0x65,0x02, 0x7d,0x7f,0x10,0x58,
                                0x71,0xbf,0xb4,0x40, 0xdf,0xbe,0xde,0x81,
                                0xd0,0x4d,0x22,0xdf, 0xf7,0x89,0xe1,
                        },
                        [2] = {        /* p. 207 */
                                0x6d,0xfd,0x97,0x35, 0xff,0x0e,0x0e,0x3f,
                                0xe0,0x52,0x2f,0x58, 0x18,0x8b,0x53,0xed,
                                0x3f,0xf6,0x70,0x05, 0x46,0x52,0x90,0xe1,
                                0x7c,0x5a,0xd8,0x2d, 0xa9,0x2a,0x05,0x01,
                                0xaa,0x66,0x3a,0xa6, 0x9f,0xa5,0xa0,0xb0,
                                0x81,0x2b,0x4b,0x4f, 0xaf,0xf3,0xfe,0xce,
                                0x79,0xcc,0xf6,0xaa, 0xde,0xc1,0xd0,
                        },
                },
                .rnd_val = {
                        [0] = {        /* p. 203 */
                                0x51,0x07,0x24,0xb9, 0x3a,0xe9,0xa1,0x82,
                                0x70,0xe4,0x84,0x73, 0x71,0x1d,0x88,0x24,
                                0x63,0x1b,0xaa,0x7f, 0x1d,0x9a,0xc9,0x28,
                                0x4e,0x7e,0xc8,0xf3, 0x63,0x7f,0x7a,0x74,
                                0x3b,0x36,0x44,0xeb, 0x96,0xc9,0x86,0x27,
                                0xc8,0xfd,0x40,0x5a, 0x7a,0x46,0x03,0xf3,
                                0x8c,0xff,0x7c,0x89, 0xe9,0xc1,0x33,0xf5,
                                0x85,0x1f,0x40,0xe9, 0x20,0x30,0xfe,0xa2,
                        },
                        [1] = {        /* p. 206 */
                                0x62,0x53,0xda,0x3a, 0xae,0x8b,0x88,0xa3,
                                0xb7,0x46,0xe4,0xc8, 0xb2,0x63,0x5c,0x54,
                                0x0f,0x6e,0x9e,0xa7, 0x15,0x7e,0xe6,0x9d,
                                0xd7,0x1e,0xfb,0x2e, 0x8f,0xf7,0xbb,0xe1,
                                0xe3,0x33,0x68,0x88, 0x38,0xdd,0x7d,0xe4,
                                0x9c,0xc8,0x89,0x90, 0x30,0x9c,0x96,0xcd,
                                0xb2,0xab,0x92,0x95, 0x74,0x36,0xbf,0x83,
                                0xd1,0xbd,0x83,0x08, 0x19,0xc7,0x48,0xca,
                        },
                },
        },

        [2] = {                        /* Hash_DRBG.pdf, p. 208 */
                .personalization = &kat_personalization,
                .additional = kat_no_additional,
                .reseed = false,
                .C = {                /* p. 211 */
                        0x44,0x74,0x8a,0x78, 0xb1,0x6e,0x75,0x55,
                        0x9f,0x88,0x1d,0x51, 0xc1,0x5d,0xfe,0x6c,
                        0x52,0xcf,0xb0,0xbb, 0x71,0x62,0x01,0x69,
                        0xc7,0x93,0x34,0x27, 0x67,0xe7,0xf8,0x87,
                        0x5f,0x42,0xcb,0x6a, 0x20,0xc8,0x9d,0x7c,
                        0x6e,0xf3,0xdc,0x61, 0x0d,0x8f,0xf2,0x03,
                        0xd6,0x76,0x6c,0xed, 0x19,0x19,0xd0,
                },
                .V = {
                        [0] = {        /* p. 210 */
                                0xa3,0xe9,0x4e,0x39, 0x26,0xfd,0xa1,0x69,
                                0xc3,0x03,0xd6,0x64, 0x38,0x39,0x05,0xe0,
                                0xd7,0x99,0x62,0xd1, 0x65,0x44,0x6d,0x63,
                                0xbd,0xa6,0x54,0xd1, 0x32,0xf7,0x2d,0xb4,
                                0x71,0x56,0x4b,0x45, 0x6f,0xf2,0xee,0xc8,
                                0x36,0x42,0x2a,0xcc, 0x5a,0x02,0x99,0x35,
                                0xa7,0x99,0x29,0x90, 0x94,0xa1,0xca,
                        },
                        [1] = {        /* p. 213 */
                                0xe8,0x5d,0xd8,0xb1, 0xd8,0x6c,0x16,0xbf,
                                0x62,0x8b,0xf3,0xb5, 0xf9,0x97,0x04,0x4d,
                                0x2a,0x69,0x13,0x8c, 0xd6,0xa6,0x6e,0xe7,
                                0x36,0xdb,0xaa,0x3b, 0xf1,0xd0,0x28,0x3b,
                                0x71,0x7b,0x33,0x6e, 0xb3,0xae,0x5b,0xdd,
                                0x04,0x17,0x2e,0xa2, 0x6e,0x5a,0x48,0xf3,
                                0xb3,0xfb,0xab,0xf8, 0x2f,0x76,0x79,
                        },
                        [2] = {        /* p. 215 */
                                0x2c,0xd2,0x63,0x2a, 0x89,0xda,0x8c,0x15,
                                0x02,0x14,0x11,0x07, 0xba,0xf5,0x02,0xb9,
                                0x7d,0x38,0xc4,0x48, 0x48,0x08,0x71,0x0a,
                                0x66,0xf8,0x40,0x11, 0xd7,0x02,0x8d,0x14,
                                0xd3,0x15,0x5a,0x73, 0x79,0xad,0xd5,0x3c,
                                0xc8,0xea,0x84,0xd0, 0xfc,0x64,0x1d,0xfc,
                                0x62,0x9e,0x06,0x19, 0x1f,0x5f,0x6d,
                        },
                },
                .rnd_val = {
                        [0] = {        /* p. 213 */
                                0x4a,0x62,0x66,0x4f, 0x26,0x6e,0xe5,0x37,
                                0xb9,0x0d,0x64,0xb0, 0x5e,0x1d,0x81,0x3d,
                                0x28,0xb1,0x59,0xa9, 0x79,0xf1,0x50,0x9d,
                                0xde,0x31,0xb7,0x1d, 0xa4,0x3d,0x54,0x6e,
                                0xe8,0xe7,0x86,0x78, 0x20,0x2d,0xc2,0x37,
                                0xad,0x4a,0xfe,0x7d, 0xf3,0x10,0xc9,0xa4,
                                0x13,0xe3,0x8a,0xaf, 0x41,0x7d,0x2d,0x22,
                                0x5a,0xa3,0x65,0xec, 0x4a,0x7d,0x29,0x96,
                        },
                        [1] = {        /* p. 215 */
                                0x59,0x58,0x3d,0x3c, 0x0a,0xc3,0x71,0x30,
                                0xc4,0x78,0x9a,0x83, 0x11,0xb8,0xca,0x8f,
                                0x98,0x5e,0xf1,0xe8, 0xf9,0x4d,0x95,0x4e,
                                0x32,0xe3,0x44,0xa6, 0x21,0xc2,0x4b,0x2f,
                                0x37,0x1d,0xa9,0xba, 0x3c,0x33,0x15,0x3f,
                                0x09,0xe5,0x51,0x45, 0xe7,0x62,0x92,0x6b,
                                0x73,0xac,0x14,0x7a, 0x1e,0x86,0x31,0xd1,
                                0xcc,0xd0,0x85,0x67, 0xcf,0x67,0x7c,0x72,
                        },
                },
        },

        [3] = {                        /* Hash_DRBG.pdf, p. 215 */
                .personalization = &kat_personalization,
                .additional = kat_additional,
                .reseed = false,
                .C = {                /* p. 220 */
                        0x44,0x74,0x8a,0x78, 0xb1,0x6e,0x75,0x55,
                        0x9f,0x88,0x1d,0x51, 0xc1,0x5d,0xfe,0x6c,
                        0x52,0xcf,0xb0,0xbb, 0x71,0x62,0x01,0x69,
                        0xc7,0x93,0x34,0x27, 0x67,0xe7,0xf8,0x87,
                        0x5f,0x42,0xcb,0x6a, 0x20,0xc8,0x9d,0x7c,
                        0x6e,0xf3,0xdc,0x61, 0x0d,0x8f,0xf2,0x03,
                        0xd6,0x76,0x6c,0xed, 0x19,0x19,0xd0,
                },
                .V = {
                        [0] = {        /* p. 218 */
                                0xa3,0xe9,0x4e,0x39, 0x26,0xfd,0xa1,0x69,
                                0xc3,0x03,0xd6,0x64, 0x38,0x39,0x05,0xe0,
                                0xd7,0x99,0x62,0xd1, 0x65,0x44,0x6d,0x63,
                                0xbd,0xa6,0x54,0xd1, 0x32,0xf7,0x2d,0xb4,
                                0x71,0x56,0x4b,0x45, 0x6f,0xf2,0xee,0xc8,
                                0x36,0x42,0x2a,0xcc, 0x5a,0x02,0x99,0x35,
                                0xa7,0x99,0x29,0x90, 0x94,0xa1,0xca,
                        },
                        [1] = {        /* p. 222 */
                                0xe8,0x5d,0xd8,0xb1, 0xd8,0x6c,0x16,0xbf,
                                0x62,0x8b,0xf3,0xb5, 0xf9,0x97,0x04,0x4d,
                                0x2a,0x69,0x13,0x8c, 0xd6,0xa6,0x6f,0x8c,
                                0xa8,0x7b,0x87,0x43, 0x50,0x20,0x2e,0x1d,
                                0x8a,0xb0,0xb5,0xad, 0x47,0xac,0xc2,0x75,
                                0x40,0x28,0x9f,0xe3, 0xa8,0xe3,0x1f,0x7b,
                                0x56,0x58,0xdd,0xd1, 0x96,0x94,0x89,
                        },
                        [2] = {        /* p. 225 */
                                0x2c,0xd2,0x63,0x2a, 0x89,0xda,0x8c,0x15,
                                0x02,0x14,0x11,0x07, 0xba,0xf5,0x02,0xb9,
                                0x7d,0x38,0xc4,0x48, 0x48,0x08,0x71,0xb2,
                                0x77,0xae,0xc7,0xff, 0x8d,0xa2,0x3c,0x71,
                                0xef,0xf5,0x9d,0xc2, 0x4e,0x5e,0x4c,0x7f,
                                0x58,0x47,0xb0,0xc1, 0x2f,0x6a,0x59,0x9e,
                                0x6b,0x2e,0xda,0xc0, 0x30,0x6b,0xcd,
                        },
                },
                .rnd_val = {        /* p. 222 */
                        [0] = {
                                0xe0,0xb9,0x7c,0x82, 0x12,0x68,0xfd,0x3b,
                                0xb2,0xca,0xbf,0xd1, 0xf9,0x54,0x84,0x78,
                                0xae,0x8a,0x60,0x41, 0x7f,0x7b,0x09,0x4a,
                                0x26,0x13,0x95,0x46, 0x06,0x2b,0x52,0x1c,
                                0xfd,0x33,0xe4,0xe3, 0x9b,0x9d,0xcd,0x0a,
                                0x3d,0xa1,0x52,0x09, 0xc7,0x2a,0xdb,0xe5,
                                0x8c,0x20,0xab,0x34, 0x07,0x02,0x69,0x51,
                                0x29,0x7a,0xd2,0x54, 0x30,0x75,0x53,0xa5,
                        },
                        [1] = {        /* p. 225 */
                                0xc1,0xac,0xd3,0xad, 0xa4,0xc8,0xc4,0x95,
                                0xbf,0x17,0x9d,0xb5, 0x98,0x22,0xc3,0x51,
                                0xbc,0x47,0x9a,0xbe, 0x4e,0xb2,0x8f,0x84,
                                0x39,0x57,0xb1,0x1e, 0x3c,0x2b,0xc0,0x48,
                                0x83,0x96,0x42,0x97, 0x97,0x5b,0xd7,0x2d,
                                0x10,0x24,0xab,0xcf, 0x6f,0x66,0x15,0xd7,
                                0xf5,0xb4,0xfd,0x1e, 0x40,0xa6,0x4e,0xeb,
                                0x45,0xba,0x21,0x81, 0xb8,0x39,0x37,0xed,
                        },
                },
        },

        [4] = {                        /* Hash_DRBG.pdf, p. 225 */
                .personalization = &kat_zero,
                .additional = kat_no_additional,
                .reseed = true,
                .C = {                /* p. 229 */
                        0xe1,0x5d,0xe4,0xa8, 0xe3,0xb1,0x41,0x9b,
                        0x61,0xd5,0x34,0xf1, 0x5d,0xbd,0x31,0xee,
                        0x19,0xec,0x59,0x5f, 0x8b,0x98,0x11,0x1a,
                        0x94,0xf5,0x22,0x37, 0xad,0x5d,0x66,0xf0,
                        0xcf,0xaa,0xfd,0xdc, 0x90,0x19,0x59,0x02,
                        0xe9,0x79,0xf7,0x9b, 0x65,0x35,0x7f,0xea,
                        0x85,0x99,0x8e,0x4e, 0x37,0xd2,0xc1,
                },
                .V = {
                        [0] = {        /* p. 227 */
                                0xab,0x41,0xcd,0xe4, 0x37,0xab,0x8b,0x09,
                                0x1c,0xa7,0xc5,0x75, 0x5d,0x10,0xf0,0x11,
                                0x0c,0x1d,0xbd,0x46, 0x2f,0x22,0x6c,0xfd,
                                0xab,0xfb,0xb0,0x4a, 0x8b,0xcd,0xef,0x95,
                                0x16,0x7d,0x84,0xaf, 0x64,0x12,0x8c,0x0d,
                                0x71,0xf4,0xd5,0xb8, 0xc0,0xed,0xfb,0xbe,
                                0x3d,0xf4,0x04,0x48, 0xd2,0xd8,0xe1,
                        },
                        [1] = {        /* p. 234 */
                                0x23,0x97,0x6c,0x61, 0x63,0xd7,0xe2,0x4a,
                                0x1a,0x03,0x8f,0x2b, 0x2b,0x64,0x67,0x97,
                                0x50,0xca,0x9e,0xd8, 0xd1,0x40,0x69,0x8d,
                                0x64,0x22,0x39,0x7b, 0x02,0x96,0x9e,0x6e,
                                0xcd,0xd2,0x9d,0xac, 0xc5,0x76,0x7e,0x2c,
                                0xc2,0xd0,0xa1,0x56, 0xc8,0x7a,0xd0,0xb3,
                                0x57,0x89,0x05,0x07, 0xe0,0x37,0x77,
                        },
                        [2] = {        /* p. 239 */
                                0x92,0xfb,0x0e,0x48, 0x0e,0x86,0x99,0x13,
                                0xc7,0xad,0x45,0xc7, 0xe3,0xfd,0x46,0x10,
                                0x17,0xe5,0xa6,0xb7, 0x70,0xf3,0x3b,0x31,
                                0x3c,0x38,0x83,0xf1, 0xcc,0x56,0x71,0x89,
                                0x45,0x21,0xf5,0xed, 0xe6,0x2e,0xaa,0xb0,
                                0x83,0xb1,0x41,0xa7, 0x5b,0x5c,0xc0,0x22,
                                0x60,0x5a,0x8a,0x3d, 0xc7,0x1b,0xa7,
                        },
                },
                .rnd_val = {
                        [0] = {        /* p. 234 */
                                0x92,0x27,0x55,0x23, 0xc7,0x0e,0x56,0x7b,
                                0xcf,0x9b,0x35,0xec, 0x50,0xb9,0x33,0xf8,
                                0x12,0x61,0x6d,0xf5, 0x86,0xb7,0xf7,0x2e,
                                0xe1,0xbc,0x77,0x35, 0xa5,0xc2,0x65,0x43,
                                0x73,0xcb,0xbc,0x72, 0x31,0x6d,0xff,0x84,
                                0x20,0xa3,0x3b,0xf0, 0x2b,0x97,0xac,0x8d,
                                0x19,0x52,0x58,0x3f, 0x27,0x0a,0xcd,0x70,
                                0x05,0xcc,0x02,0x7f, 0x4c,0xf1,0x18,0x7e,
                        },
                        [1] = {        /* p. 239 */
                                0x68,0x1a,0x46,0xb2, 0xaa,0x86,0x94,0xa0,
                                0xfe,0x4d,0xee,0xa7, 0x20,0x92,0x7a,0x84,
                                0xea,0xaa,0x98,0x5e, 0x59,0xc1,0x9f,0x8b,
                                0xe0,0x98,0x4d,0x8c, 0xbe,0xf8,0xc6,0x9b,
                                0x75,0x41,0x67,0x64, 0x19,0x46,0xe0,0x40,
                                0xee,0x20,0x43,0xe1, 0xcc,0xb2,0x9d,0xcf,
                                0x06,0x3c,0x0a,0x50, 0x83,0x0e,0x42,0x8e,
                                0x6d,0xca,0x26,0x2e, 0xcd,0x77,0xc5,0x42,
                        },
                },
        },

        [5] = {                        /* Hash_DRBG.pdf, p. 239 */
                .personalization = &kat_zero,
                .additional = kat_additional,
                .reseed = true,
                .C = {                /* p. 243 */
                        0xe1,0x5d,0xe4,0xa8, 0xe3,0xb1,0x41,0x9b,
                        0x61,0xd5,0x34,0xf1, 0x5d,0xbd,0x31,0xee,
                        0x19,0xec,0x59,0x5f, 0x8b,0x98,0x11,0x1a,
                        0x94,0xf5,0x22,0x37, 0xad,0x5d,0x66,0xf0,
                        0xcf,0xaa,0xfd,0xdc, 0x90,0x19,0x59,0x02,
                        0xe9,0x79,0xf7,0x9b, 0x65,0x35,0x7f,0xea,
                        0x85,0x99,0x8e,0x4e, 0x37,0xd2,0xc1,
                },
                .V = {
                        [0] = {        /* p. 242 */
                                0xab,0x41,0xcd,0xe4, 0x37,0xab,0x8b,0x09,
                                0x1c,0xa7,0xc5,0x75, 0x5d,0x10,0xf0,0x11,
                                0x0c,0x1d,0xbd,0x46, 0x2f,0x22,0x6c,0xfd,
                                0xab,0xfb,0xb0,0x4a, 0x8b,0xcd,0xef,0x95,
                                0x16,0x7d,0x84,0xaf, 0x64,0x12,0x8c,0x0d,
                                0x71,0xf4,0xd5,0xb8, 0xc0,0xed,0xfb,0xbe,
                                0x3d,0xf4,0x04,0x48, 0xd2,0xd8,0xe1,
                        },
                        [1] = {        /* p. 249 */
                                0xb3,0x74,0x95,0x46, 0x81,0xcf,0xc9,0x5b,
                                0x8d,0xb8,0x39,0x52, 0x8c,0x71,0x08,0x83,
                                0x5e,0xb4,0xf3,0x0a, 0xd9,0x1c,0xbe,0x9e,
                                0xa0,0xd5,0x45,0xcc, 0xfd,0x18,0x13,0x2a,
                                0xf1,0xd3,0x76,0x8f, 0x47,0x02,0x77,0x2b,
                                0x69,0x15,0x9f,0x2c, 0xc0,0x7f,0x48,0x74,
                                0x1e,0xb5,0xb2,0xb1, 0x22,0x11,0x25,
                        },
                        [2] = {        /* p. 254 */
                                0xbf,0xe3,0xd6,0x81, 0xa2,0x0f,0xbe,0x39,
                                0x03,0x8f,0x4d,0x66, 0x77,0x7c,0x1b,0xe5,
                                0x79,0xee,0xb4,0x85, 0x7b,0x42,0xf2,0x1c,
                                0x3f,0x59,0x8b,0x59, 0x62,0xb7,0xaa,0x48,
                                0x0e,0xa5,0x65,0xfe, 0xea,0xbd,0xfb,0xd6,
                                0xa7,0xec,0xcb,0x96, 0x02,0xc1,0x4b,0xfa,
                                0x30,0xf0,0xf9,0x81, 0x90,0x0c,0xd0,
                        },
                },
                .rnd_val = {
                        [0] = {        /* p. 249 */
                                0x11,0x60,0x1b,0x72, 0xca,0x60,0x89,0x73,
                                0x6b,0x20,0x47,0x44, 0xb2,0x9d,0xa1,0xaa,
                                0xaf,0xba,0xca,0xa5, 0x28,0x8f,0x06,0xbe,
                                0x48,0x45,0x69,0xcc, 0xed,0xbe,0xce,0x03,
                                0xe8,0x22,0xea,0xa5, 0xb1,0x4f,0x0e,0x04,
                                0x94,0x8c,0x05,0xcd, 0x3c,0xc2,0xe2,0x88,
                                0x9a,0x89,0xfa,0x03, 0xd6,0x5d,0x4d,0x74,
                                0xac,0x50,0xff,0x6b, 0xd8,0x56,0xe5,0x79,
                        },
                        [1] = {        /* p. 255 */
                                0x05,0x5b,0xc1,0x28, 0xcc,0x2d,0x0e,0x25,
                                0x0f,0x47,0xe4,0xe4, 0xf5,0x82,0x37,0x5d,
                                0xe3,0xee,0x5e,0x9f, 0xe8,0x31,0x68,0x74,
                                0x97,0xe5,0xaf,0x1e, 0x7c,0xb6,0x9e,0xfd,
                                0xeb,0xd2,0xfd,0x31, 0xc7,0xce,0x2b,0xba,
                                0x0d,0xbc,0x6c,0x74, 0xc8,0xa2,0x0a,0x7d,
                                0x72,0xf6,0x0e,0x6d, 0x9f,0x63,0xed,0x50,
                                0x9e,0x96,0x3e,0x54, 0xa6,0x9e,0x90,0x48,
                        },
                },
        },

        [6] = {                        /* Hash_DRBG.pdf, p. 255 */
                .personalization = &kat_personalization,
                .additional = kat_no_additional,
                .reseed = true,
                .C = {                /* p. 259 */
                        0x44,0x74,0x8a,0x78, 0xb1,0x6e,0x75,0x55,
                        0x9f,0x88,0x1d,0x51, 0xc1,0x5d,0xfe,0x6c,
                        0x52,0xcf,0xb0,0xbb, 0x71,0x62,0x01,0x69,
                        0xc7,0x93,0x34,0x27, 0x67,0xe7,0xf8,0x87,
                        0x5f,0x42,0xcb,0x6a, 0x20,0xc8,0x9d,0x7c,
                        0x6e,0xf3,0xdc,0x61, 0x0d,0x8f,0xf2,0x03,
                        0xd6,0x76,0x6c,0xed, 0x19,0x19,0xd0,
                },
                .V = {
                        [0] = {        /* p. 257 */
                                0xa3,0xe9,0x4e,0x39, 0x26,0xfd,0xa1,0x69,
                                0xc3,0x03,0xd6,0x64, 0x38,0x39,0x05,0xe0,
                                0xd7,0x99,0x62,0xd1, 0x65,0x44,0x6d,0x63,
                                0xbd,0xa6,0x54,0xd1, 0x32,0xf7,0x2d,0xb4,
                                0x71,0x56,0x4b,0x45, 0x6f,0xf2,0xee,0xc8,
                                0x36,0x42,0x2a,0xcc, 0x5a,0x02,0x99,0x35,
                                0xa7,0x99,0x29,0x90, 0x94,0xa1,0xca,
                        },
                        [1] = {        /* p. 264 */
                                0xaa,0x11,0x1b,0x0e, 0xd5,0x6c,0xf4,0xa6,
                                0xcc,0xe4,0xad,0xe7, 0xf1,0x1b,0x06,0x10,
                                0x45,0xbf,0x10,0x92, 0xcb,0xb3,0x8f,0xf3,
                                0x23,0x95,0xea,0x62, 0xd2,0x6b,0x27,0xc8,
                                0x86,0x89,0x45,0xc5, 0x93,0xba,0x70,0xc3,
                                0x84,0xad,0xad,0x45, 0x77,0x1c,0x93,0xb0,
                                0x9c,0x27,0x69,0x07, 0x52,0xd1,0xd8,
                        },
                        [2] = {        /* p. 269 */
                                0x5f,0x0f,0xd4,0x0c, 0x8c,0x82,0xef,0x41,
                                0x03,0x14,0xb8,0x30, 0xc2,0x0f,0xcc,0xea,
                                0x71,0x59,0x18,0x9a, 0xea,0x13,0xe8,0x48,
                                0x75,0x68,0x68,0x18, 0xcd,0x4f,0x12,0xb9,
                                0xde,0xa8,0x82,0x58, 0x16,0xa4,0x13,0xa2,
                                0x95,0x72,0x5e,0xb3, 0x3e,0x33,0xb9,0xad,
                                0xfe,0xe0,0xb1,0xc2, 0x34,0x0a,0xe0,
                        },
                },
                .rnd_val = {
                        [0] = {        /* p. 264 */
                                0x7a,0x33,0xd3,0x90, 0x33,0xf8,0x60,0x58,
                                0x9f,0x37,0x5e,0x73, 0x35,0x30,0x75,0x52,
                                0x96,0x58,0xbb,0xed, 0x99,0xc8,0xa0,0xef,
                                0x5e,0x28,0xb3,0x51, 0xb2,0xdf,0x33,0x58,
                                0xb3,0xd8,0x9b,0xac, 0x72,0x25,0xdf,0x9e,
                                0x3b,0xcd,0x08,0x36, 0xb9,0x9b,0x5d,0xbf,
                                0x36,0x3a,0x17,0x0c, 0x7b,0xb9,0xbe,0x41,
                                0xa4,0xaa,0x97,0x44, 0x5e,0xce,0xe4,0x1e,
                        },
                        [1] = {        /* p. 269 */
                                0x04,0x1a,0xbd,0x94, 0x07,0x9a,0x05,0x71,
                                0x88,0x5f,0x16,0x65, 0x94,0x4e,0x0e,0x7f,
                                0x1b,0xfa,0xcd,0xea, 0xea,0xe9,0xd4,0x4e,
                                0xed,0xc1,0x1f,0xad, 0xd8,0x4c,0x34,0xc7,
                                0xca,0xa7,0x3d,0x09, 0xa0,0x19,0x31,0x93,
                                0xfa,0x40,0xa1,0x9f, 0x64,0x4f,0x04,0x8d,
                                0x2a,0x54,0x17,0x04, 0x25,0x53,0xdf,0x52,
                                0x51,0x74,0x1b,0x40, 0xea,0xcf,0xeb,0x98,
                        },
                },
        },

        [7] = {                        /* Hash_DRBG.pdf, p. 269 */
                .personalization = &kat_personalization,
                .additional = kat_additional,
                .reseed = true,
                .C = {                /* p. 274 */
                        0x44,0x74,0x8a,0x78, 0xb1,0x6e,0x75,0x55,
                        0x9f,0x88,0x1d,0x51, 0xc1,0x5d,0xfe,0x6c,
                        0x52,0xcf,0xb0,0xbb, 0x71,0x62,0x01,0x69,
                        0xc7,0x93,0x34,0x27, 0x67,0xe7,0xf8,0x87,
                        0x5f,0x42,0xcb,0x6a, 0x20,0xc8,0x9d,0x7c,
                        0x6e,0xf3,0xdc,0x61, 0x0d,0x8f,0xf2,0x03,
                        0xd6,0x76,0x6c,0xed, 0x19,0x19,0xd0,
                },
                .V = {
                        [0] = {        /* p. 272 */
                                0xa3,0xe9,0x4e,0x39, 0x26,0xfd,0xa1,0x69,
                                0xc3,0x03,0xd6,0x64, 0x38,0x39,0x05,0xe0,
                                0xd7,0x99,0x62,0xd1, 0x65,0x44,0x6d,0x63,
                                0xbd,0xa6,0x54,0xd1, 0x32,0xf7,0x2d,0xb4,
                                0x71,0x56,0x4b,0x45, 0x6f,0xf2,0xee,0xc8,
                                0x36,0x42,0x2a,0xcc, 0x5a,0x02,0x99,0x35,
                                0xa7,0x99,0x29,0x90, 0x94,0xa1,0xca,
                        },
                        [1] = {        /* p. 279 */
                                0xaa,0xf6,0xb9,0x9b, 0x7f,0x84,0xb0,0x36,
                                0xe1,0xcc,0xbc,0x9d, 0x57,0x3a,0x36,0xb8,
                                0xbd,0xd4,0x7c,0x35, 0x8b,0xb5,0xf3,0xc1,
                                0xd6,0xe7,0x90,0x3a, 0xaa,0x29,0xf1,0xc8,
                                0x7a,0xe6,0x66,0xb8, 0x86,0x93,0xbe,0xf4,
                                0x6c,0x51,0xc2,0x4c, 0x47,0xbe,0xfe,0x4b,
                                0x35,0x75,0x4d,0xcb, 0xfa,0x1e,0x7d,
                        },
                        [2] = {        /* p. 285 */
                                0x0c,0x75,0x77,0x4d, 0x61,0x02,0x69,0xad,
                                0x5b,0xb4,0xab,0xbb, 0x14,0x83,0x23,0xc9,
                                0x78,0x9f,0x8f,0x76, 0x25,0xcc,0x34,0x33,
                                0x7c,0x03,0x47,0x2d, 0x9a,0x0c,0x4f,0xac,
                                0x30,0xbe,0xd2,0xdd, 0xde,0x64,0xb8,0x7a,
                                0x2c,0x70,0x67,0x52, 0xc2,0x1a,0xc0,0x11,
                                0x27,0x43,0x59,0x2c, 0x4f,0xdf,0x67,
                        },
                },
                .rnd_val = {        /* p. 279 */
                        [0] = {
                                0x88,0x97,0x32,0x97, 0x5b,0x36,0xe8,0xe2,
                                0xe7,0xb7,0x40,0x50, 0xae,0xa1,0x71,0x39,
                                0xda,0x2b,0x86,0x34, 0xdc,0xe2,0x13,0x3b,
                                0x06,0x34,0x74,0x3f, 0x47,0x75,0x57,0xab,
                                0x7b,0x84,0x4e,0xd3, 0xf2,0xa4,0x6c,0xc6,
                                0x3e,0xb2,0x32,0x86, 0x46,0x4c,0x51,0xd5,
                                0xd7,0x69,0x71,0xc4, 0x7b,0xc5,0xb5,0x5f,
                                0xed,0x72,0xa8,0x04, 0x3c,0xbf,0x66,0x4f,
                        },
                        [1] = {
                                0xbf,0x49,0xb8,0x89, 0xba,0x98,0x4d,0x34,
                                0x63,0x87,0xe8,0x64, 0x7e,0x98,0xbb,0x99,
                                0xcd,0x41,0xa3,0x2f, 0xbe,0xc1,0xfc,0xb3,
                                0xb6,0xa1,0xb7,0xd9, 0x93,0x2b,0xa7,0xe1,
                                0x1e,0xe6,0xbb,0xd9, 0x24,0x40,0x5a,0x2c,
                                0x7f,0xca,0x89,0x0a, 0x5e,0x9a,0x8d,0xea,
                                0x66,0xac,0x0c,0xac, 0xa0,0xca,0x7b,0xc1,
                                0x8d,0x74,0xfb,0xc0, 0x2a,0x11,0xe4,0x53,
                        },
                },
        },
};

#ifdef NIST_HASH_DRBG_DEBUG
#define        DPRINTF(fmt, ...)                                                      \
        printf("%s:%d: " fmt, __func__, __LINE__, ##__VA_ARGS__)
#define        DUSE(v)
#else
#define        DPRINTF(fmt, ...)
#define        DUSE(v)                        (void)(v)
#endif

#define        CHECK(i, name, actual, expected, n) do                                      \
{                                                                              \
        CTASSERT(sizeof(actual) == (n));                                      \
        ok &= check(i, name, actual, expected, (n));                              \
} while (0)

static bool
check(unsigned katno, const char *name, const uint8_t *actual,
    const uint8_t *expected, size_t n)
{
        bool ok = true;
        size_t i;

        DUSE(katno);
        DUSE(name);

        for (i = 0; i < n; i++) {
                if (actual[i] != expected[i]) {
                        DPRINTF("KAT %u %s[%zu] = %02x, expected %02x\n",
                            katno, name, i, actual[i], expected[i]);
                        ok = false;
                }
        }

        return ok;
}

#ifdef NIST_HASH_DRBG_MAIN
int
main(void)
{
        int ret;

        ret = nist_hash_drbg_initialize();

        fflush(stdout);
        return ret || ferror(stdout);
}
#endif

int
nist_hash_drbg_initialize(void)
{
        const unsigned truncation[] = { 0, 1, 32, 63 };
        bool ok = true;
        size_t i, j;

        for (i = 0; i < arraycount(kat); i++) {
                for (j = 0; j < arraycount(truncation); j++) {
                        const unsigned trunc = truncation[j];
                        struct nist_hash_drbg drbg, *D = &drbg;
                        uint8_t rnd_val[64];
                        unsigned reseed_counter;

                        nist_hash_drbg_instantiate(D,
                            kat_entropy[0], sizeof kat_entropy[0],
                            kat_nonce, sizeof kat_nonce,
                            kat[i].personalization->hv_base,
                            kat[i].personalization->hv_len);
                        reseed_counter = 1;
                        CHECK(i, "C", D->C, kat[i].C, SEEDLEN_BYTES);
                        CHECK(i, "V[0]", D->V, kat[i].V[0], SEEDLEN_BYTES);
                        if (D->reseed_counter != reseed_counter) {
                                DPRINTF("bad reseed counter: %u, expected %u",
                                    D->reseed_counter, reseed_counter);
                                ok = false;
                        }

                        if (kat[i].reseed) {
                                nist_hash_drbg_reseed(D,
                                    kat_entropy[1], sizeof kat_entropy[1],
                                    kat[i].additional[0]->hv_base,
                                    kat[i].additional[0]->hv_len);
                        }

                        nist_hash_drbg_generate(D, rnd_val,
                            sizeof(rnd_val) - trunc,
                            kat[i].reseed ? 0 : kat[i].additional[0]->hv_base,
                            kat[i].reseed ? 0 : kat[i].additional[0]->hv_len);
                        reseed_counter++;
                        CHECK(i, "V[1]", D->V, kat[i].V[1], SEEDLEN_BYTES);
                        ASSERT(sizeof(kat[i].rnd_val[0]) - trunc <=
                            sizeof rnd_val);
                        check(i, "rnd_val[0]", rnd_val, kat[i].rnd_val[0],
                            sizeof(kat[i].rnd_val[0]) - trunc);
                        if (D->reseed_counter != reseed_counter) {
                                DPRINTF("bad reseed counter: %u, expected %u",
                                    D->reseed_counter, reseed_counter);
                                ok = false;
                        }

                        if (kat[i].reseed) {
                                nist_hash_drbg_reseed(D,
                                    kat_entropy[2], sizeof kat_entropy[2],
                                    kat[i].additional[1]->hv_base,
                                    kat[i].additional[1]->hv_len);
                                reseed_counter = 1;
                        }

                        nist_hash_drbg_generate(D, rnd_val,
                            sizeof(rnd_val) - trunc,
                            kat[i].reseed ? 0 : kat[i].additional[1]->hv_base,
                            kat[i].reseed ? 0 : kat[i].additional[1]->hv_len);
                        reseed_counter++;
                        CHECK(i, "V[2]", D->V, kat[i].V[2], SEEDLEN_BYTES);
                        ASSERT(sizeof(kat[i].rnd_val[1]) - trunc <=
                            sizeof rnd_val);
                        check(i, "rnd_val[1]", rnd_val, kat[i].rnd_val[1],
                            sizeof(kat[i].rnd_val[1]) - trunc);
                        if (D->reseed_counter != reseed_counter) {
                                DPRINTF("bad reseed counter: %u, expected %u",
                                    D->reseed_counter, reseed_counter);
                                ok = false;
                        }

                        nist_hash_drbg_destroy(D);
                }
        }

        if (!ok)
                return 1;
        return 0;
}







































































































  665 


  666 




  667 






  666 
  665 


















   76 
   76 

   76 

   74 


   76 


   76 




























































































   77 


   78 

    3 


   76 






   30 


   30 

   30 



























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
/*        $NetBSD: ipi.c,v 1.30 2019/12/01 15:34:46 ad Exp $        */

/*-
 * Copyright (c) 2000, 2008, 2009, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by RedBack Networks Inc.
 *
 * Author: Bill Sommerfeld
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ipi.c,v 1.30 2019/12/01 15:34:46 ad Exp $");

#include "opt_mtrr.h"

#include <sys/param.h>
#include <sys/device.h>
#include <sys/systm.h>
#include <sys/atomic.h>
#include <sys/intr.h>
#include <sys/ipi.h>
#include <sys/cpu.h>
#include <sys/xcall.h>

#ifdef MULTIPROCESSOR

#include <machine/cpufunc.h>
#include <machine/cpuvar.h>
#include <machine/i82093var.h>
#include <machine/i82489reg.h>
#include <machine/i82489var.h>
#include <machine/mtrr.h>
#include <machine/gdt.h>

#include "acpica.h"

#include <x86/fpu.h>

static void        x86_ipi_ast(struct cpu_info *);
static void        x86_ipi_halt(struct cpu_info *);
static void        x86_ipi_kpreempt(struct cpu_info *);
static void        x86_ipi_xcall(struct cpu_info *);
static void        x86_ipi_generic(struct cpu_info *);

#ifdef MTRR
static void        x86_ipi_reload_mtrr(struct cpu_info *);
#else
#define                x86_ipi_reload_mtrr        NULL
#endif

#if NACPICA > 0
void        acpi_cpu_sleep(struct cpu_info *);
#else
#define        acpi_cpu_sleep        NULL
#endif

static void        x86_ipi_synch_fpu(struct cpu_info *);

void (* const ipifunc[X86_NIPI])(struct cpu_info *) =
{
        x86_ipi_halt,                /* X86_IPI_HALT */
        x86_ipi_ast,                /* X86_IPI_AST */
        x86_ipi_generic,        /* X86_IPI_GENERIC */
        x86_ipi_synch_fpu,        /* X86_IPI_SYNCH_FPU */
        x86_ipi_reload_mtrr,        /* X86_IPI_MTRR */
        NULL,                        /* X86_IPI_GDT */
        x86_ipi_xcall,                /* X86_IPI_XCALL */
        acpi_cpu_sleep,                /* X86_IPI_ACPI_CPU_SLEEP */
        x86_ipi_kpreempt        /* X86_IPI_KPREEMPT */
};

/*
 * x86 IPI interface.
 */

int
x86_send_ipi(struct cpu_info *ci, int ipimask)
{
        uint32_t o, n;
        int ret = 0;

        /* Don't send IPI to CPU which isn't (yet) running. */
        if (__predict_false((ci->ci_flags & CPUF_RUNNING) == 0))
                return ENOENT;

        /* Set in new IPI bit, and capture previous state. */
        for (o = 0;; o = n) {
                n = atomic_cas_32(&ci->ci_ipis, o, o | ipimask);
                if (__predict_true(o == n)) {
                        break;
                }
        }

        /* If no IPI already pending, send one. */
        if (o == 0) {
                ret = x86_ipi(LAPIC_IPI_VECTOR, ci->ci_cpuid, LAPIC_DLMODE_FIXED);
                if (ret != 0) {
                        printf("ipi of %x from %s to %s failed\n",
                            ipimask,
                            device_xname(curcpu()->ci_dev),
                            device_xname(ci->ci_dev));
                }
        }

        return ret;
}

void
x86_broadcast_ipi(int ipimask)
{
        struct cpu_info *ci, *self = curcpu();
        int count = 0;
        CPU_INFO_ITERATOR cii;

        for (CPU_INFO_FOREACH(cii, ci)) {
                if (ci == self)
                        continue;
                if ((ci->ci_flags & CPUF_RUNNING) == 0)
                        continue;
                atomic_or_32(&ci->ci_ipis, ipimask);
                count++;
        }
        if (!count)
                return;

        x86_ipi(LAPIC_IPI_VECTOR, LAPIC_DEST_ALLEXCL, LAPIC_DLMODE_FIXED);
}

void
x86_ipi_handler(void)
{
        struct cpu_info *ci = curcpu();
        uint32_t pending;
        int bit;

        pending = atomic_swap_32(&ci->ci_ipis, 0);

        KDASSERT((pending >> X86_NIPI) == 0);
        while ((bit = ffs(pending)) != 0) {
                bit--;
                pending &= ~(1 << bit);
                ci->ci_ipi_events[bit].ev_count++;
                (*ipifunc[bit])(ci);
        }
}

/*
 * Common x86 IPI handlers.
 */

static void
x86_ipi_halt(struct cpu_info *ci)
{

        x86_disable_intr();
        atomic_and_32(&ci->ci_flags, ~CPUF_RUNNING);

        for (;;) {
                x86_hlt();
        }
}

static void
x86_ipi_synch_fpu(struct cpu_info *ci)
{

        panic("%s: impossible", __func__);
}

#ifdef MTRR
static void
x86_ipi_reload_mtrr(struct cpu_info *ci)
{

        if (mtrr_funcs != NULL) {
                /*
                 * mtrr_reload_cpu() is a macro in mtrr.h which picks
                 * the appropriate function to use.
                 */
                mtrr_reload_cpu(ci);
        }
}
#endif

static void
x86_ipi_kpreempt(struct cpu_info *ci)
{

        softint_trigger(1 << SIR_PREEMPT);
}

static void
x86_ipi_ast(struct cpu_info *ci)
{

        aston(ci->ci_onproc);
}

/*
 * MD support for xcall(9) interface.
 */

static void
x86_ipi_xcall(struct cpu_info *ci)
{
        xc_ipi_handler();
}

static void
x86_ipi_generic(struct cpu_info *ci)
{
        ipi_cpu_handler();
}

void
xc_send_ipi(struct cpu_info *ci)
{

        KASSERT(kpreempt_disabled());
        KASSERT(curcpu() != ci);

        if (ci) {
                /* Unicast: remote CPU. */
                x86_send_ipi(ci, X86_IPI_XCALL);
        } else {
                /* Broadcast: all, but local CPU (caller will handle it). */
                x86_broadcast_ipi(X86_IPI_XCALL);
        }
}

void
cpu_ipi(struct cpu_info *ci)
{
        KASSERT(kpreempt_disabled());
        KASSERT(curcpu() != ci);

        if (ci) {
                /* Unicast: remote CPU. */
                x86_send_ipi(ci, X86_IPI_GENERIC);
        } else {
                /* Broadcast: all, but local CPU (caller will handle it). */
                x86_broadcast_ipi(X86_IPI_GENERIC);
        }
}

#else

int
x86_send_ipi(struct cpu_info *ci, int ipimask)
{

        return 0;
}

void
x86_broadcast_ipi(int ipimask)
{

}

void
cpu_ipi(struct cpu_info *ci)
{
}

#endif

















































 3270 
 2702 


 3269 
 2884 


 3269 
 3043 


 3268 
 3091 


 3270 

 3114 















    2 



    2 



    2 



    2 



    2 



    2 

    2 














    3 



    3 
    4 


    4 
    4 


    4 



    4 
    2 


    4 

    1 















   88 
   89 


   89 
   83 


   89 
   88 


   88 
   43 


   88 
   85 


   89 

   80 





















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
/*        $NetBSD: bitops.h,v 1.15 2021/09/12 15:22:05 rillig Exp $        */

/*-
 * Copyright (c) 2007, 2010 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas and Joerg Sonnenberger.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#ifndef _SYS_BITOPS_H_
#define _SYS_BITOPS_H_

#include <sys/stdint.h>

/*
 * Find First Set functions
 */
#ifndef ffs32
static __inline int __unused
ffs32(uint32_t _n)
{
        int _v;

        if (!_n)
                return 0;

        _v = 1;
        if ((_n & 0x0000FFFFU) == 0) {
                _n >>= 16;
                _v += 16;
        }
        if ((_n & 0x000000FFU) == 0) {
                _n >>= 8;
                _v += 8;
        }
        if ((_n & 0x0000000FU) == 0) {
                _n >>= 4;
                _v += 4;
        }
        if ((_n & 0x00000003U) == 0) {
                _n >>= 2;
                _v += 2;
        }
        if ((_n & 0x00000001U) == 0) {
                _n >>= 1;
                _v += 1;
        }
        return _v;
}
#endif

#ifndef ffs64
static __inline int __unused
ffs64(uint64_t _n)
{
        int _v;

        if (!_n)
                return 0;

        _v = 1;
        if ((_n & 0x00000000FFFFFFFFULL) == 0) {
                _n >>= 32;
                _v += 32;
        }
        if ((_n & 0x000000000000FFFFULL) == 0) {
                _n >>= 16;
                _v += 16;
        }
        if ((_n & 0x00000000000000FFULL) == 0) {
                _n >>= 8;
                _v += 8;
        }
        if ((_n & 0x000000000000000FULL) == 0) {
                _n >>= 4;
                _v += 4;
        }
        if ((_n & 0x0000000000000003ULL) == 0) {
                _n >>= 2;
                _v += 2;
        }
        if ((_n & 0x0000000000000001ULL) == 0) {
                _n >>= 1;
                _v += 1;
        }
        return _v;
}
#endif

/*
 * Find Last Set functions
 */
#ifndef fls32
static __inline int __unused
fls32(uint32_t _n)
{
        int _v;

        if (!_n)
                return 0;

        _v = 32;
        if ((_n & 0xFFFF0000U) == 0) {
                _n <<= 16;
                _v -= 16;
        }
        if ((_n & 0xFF000000U) == 0) {
                _n <<= 8;
                _v -= 8;
        }
        if ((_n & 0xF0000000U) == 0) {
                _n <<= 4;
                _v -= 4;
        }
        if ((_n & 0xC0000000U) == 0) {
                _n <<= 2;
                _v -= 2;
        }
        if ((_n & 0x80000000U) == 0) {
                _n <<= 1;
                _v -= 1;
        }
        return _v;
}
#endif

#ifndef fls64
static __inline int __unused
fls64(uint64_t _n)
{
        int _v;

        if (!_n)
                return 0;

        _v = 64;
        if ((_n & 0xFFFFFFFF00000000ULL) == 0) {
                _n <<= 32;
                _v -= 32;
        }
        if ((_n & 0xFFFF000000000000ULL) == 0) {
                _n <<= 16;
                _v -= 16;
        }
        if ((_n & 0xFF00000000000000ULL) == 0) {
                _n <<= 8;
                _v -= 8;
        }
        if ((_n & 0xF000000000000000ULL) == 0) {
                _n <<= 4;
                _v -= 4;
        }
        if ((_n & 0xC000000000000000ULL) == 0) {
                _n <<= 2;
                _v -= 2;
        }
        if ((_n & 0x8000000000000000ULL) == 0) {
                _n <<= 1;
                _v -= 1;
        }
        return _v;
}
#endif

/*
 * Integer logarithm, returns -1 on error. Inspired by the linux
 * version written by David Howells.
 */
#define _ilog2_helper(_n, _x)        ((_n) & (1ULL << (_x))) ? _x :
#define _ilog2_const(_n) ( \
        _ilog2_helper(_n, 63) \
        _ilog2_helper(_n, 62) \
        _ilog2_helper(_n, 61) \
        _ilog2_helper(_n, 60) \
        _ilog2_helper(_n, 59) \
        _ilog2_helper(_n, 58) \
        _ilog2_helper(_n, 57) \
        _ilog2_helper(_n, 56) \
        _ilog2_helper(_n, 55) \
        _ilog2_helper(_n, 54) \
        _ilog2_helper(_n, 53) \
        _ilog2_helper(_n, 52) \
        _ilog2_helper(_n, 51) \
        _ilog2_helper(_n, 50) \
        _ilog2_helper(_n, 49) \
        _ilog2_helper(_n, 48) \
        _ilog2_helper(_n, 47) \
        _ilog2_helper(_n, 46) \
        _ilog2_helper(_n, 45) \
        _ilog2_helper(_n, 44) \
        _ilog2_helper(_n, 43) \
        _ilog2_helper(_n, 42) \
        _ilog2_helper(_n, 41) \
        _ilog2_helper(_n, 40) \
        _ilog2_helper(_n, 39) \
        _ilog2_helper(_n, 38) \
        _ilog2_helper(_n, 37) \
        _ilog2_helper(_n, 36) \
        _ilog2_helper(_n, 35) \
        _ilog2_helper(_n, 34) \
        _ilog2_helper(_n, 33) \
        _ilog2_helper(_n, 32) \
        _ilog2_helper(_n, 31) \
        _ilog2_helper(_n, 30) \
        _ilog2_helper(_n, 29) \
        _ilog2_helper(_n, 28) \
        _ilog2_helper(_n, 27) \
        _ilog2_helper(_n, 26) \
        _ilog2_helper(_n, 25) \
        _ilog2_helper(_n, 24) \
        _ilog2_helper(_n, 23) \
        _ilog2_helper(_n, 22) \
        _ilog2_helper(_n, 21) \
        _ilog2_helper(_n, 20) \
        _ilog2_helper(_n, 19) \
        _ilog2_helper(_n, 18) \
        _ilog2_helper(_n, 17) \
        _ilog2_helper(_n, 16) \
        _ilog2_helper(_n, 15) \
        _ilog2_helper(_n, 14) \
        _ilog2_helper(_n, 13) \
        _ilog2_helper(_n, 12) \
        _ilog2_helper(_n, 11) \
        _ilog2_helper(_n, 10) \
        _ilog2_helper(_n,  9) \
        _ilog2_helper(_n,  8) \
        _ilog2_helper(_n,  7) \
        _ilog2_helper(_n,  6) \
        _ilog2_helper(_n,  5) \
        _ilog2_helper(_n,  4) \
        _ilog2_helper(_n,  3) \
        _ilog2_helper(_n,  2) \
        _ilog2_helper(_n,  1) \
        _ilog2_helper(_n,  0) \
        -1)

#define ilog2(_n) \
( \
        __builtin_constant_p(_n) ?  _ilog2_const(_n) : \
        ((sizeof(_n) > 4 ? fls64(_n) : fls32(_n)) - 1) \
)

static __inline void
fast_divide32_prepare(uint32_t _div, uint32_t * __restrict _m,
    uint8_t *__restrict _s1, uint8_t *__restrict _s2)
{
        uint64_t _mt;
        int _l;

        _l = fls32(_div - 1);
        _mt = (uint64_t)(0x100000000ULL * ((1ULL << _l) - _div));
        *_m = (uint32_t)(_mt / _div + 1);
        *_s1 = (_l > 1) ? 1U : (uint8_t)_l;
        *_s2 = (_l == 0) ? 0 : (uint8_t)(_l - 1);
}

/* ARGSUSED */
static __inline uint32_t
fast_divide32(uint32_t _v, uint32_t _div __unused, uint32_t _m, uint8_t _s1,
    uint8_t _s2)
{
        uint32_t _t;

        _t = (uint32_t)(((uint64_t)_v * _m) >> 32);
        return (_t + ((_v - _t) >> _s1)) >> _s2;
}

static __inline uint32_t
fast_remainder32(uint32_t _v, uint32_t _div, uint32_t _m, uint8_t _s1,
    uint8_t _s2)
{

        return _v - _div * fast_divide32(_v, _div, _m, _s1, _s2);
}

#define __BITMAP_TYPE(__s, __t, __n) struct __s { \
    __t _b[__BITMAP_SIZE(__t, __n)]; \
}

#define __BITMAP_BITS(__t)                (sizeof(__t) * NBBY)
#define __BITMAP_SHIFT(__t)                (ilog2(__BITMAP_BITS(__t)))
#define __BITMAP_MASK(__t)                (__BITMAP_BITS(__t) - 1)
#define __BITMAP_SIZE(__t, __n) \
    (((__n) + (__BITMAP_BITS(__t) - 1)) / __BITMAP_BITS(__t))
#define __BITMAP_BIT(__n, __v) \
    ((__typeof__((__v)->_b[0]))1 << ((__n) & __BITMAP_MASK(*(__v)->_b)))
#define __BITMAP_WORD(__n, __v) \
    ((__n) >> __BITMAP_SHIFT(*(__v)->_b))

#define __BITMAP_SET(__n, __v) \
    ((__v)->_b[__BITMAP_WORD(__n, __v)] |= __BITMAP_BIT(__n, __v))
#define __BITMAP_CLR(__n, __v) \
    ((__v)->_b[__BITMAP_WORD(__n, __v)] &= ~__BITMAP_BIT(__n, __v))
#define __BITMAP_ISSET(__n, __v) \
    ((__v)->_b[__BITMAP_WORD(__n, __v)] & __BITMAP_BIT(__n, __v))

#if __GNUC_PREREQ__(2, 95)
#define        __BITMAP_ZERO(__v) \
    (void)__builtin_memset((__v), 0, sizeof(*__v))
#else
#define __BITMAP_ZERO(__v) do {                                                \
        size_t __i;                                                        \
        for (__i = 0; __i < __arraycount((__v)->_b); __i++)                \
                (__v)->_b[__i] = 0;                                        \
        } while (/* CONSTCOND */ 0)
#endif /* GCC 2.95 */

#endif /* _SYS_BITOPS_H_ */

























































































































































































































































   20 
    8 
   20 
   20 
   19 












































    4 

   15 











   19 


   19 



   18 




   18 
   17 





    8 








    8 







    7 



















    7 


    7 


    3 









    7 





    7 
    3 

    3 





    7 











    3 


    3 









































    5 



    6 
    6 

    6 
    2 







    2 





    2 

    2 





    2 
    1 

    1 




    2 

    1 
    1 

    2 
    2 
    2 



    2 
















































































































    1 









    1 






    1 
    1 




    1 
    1 






















    3 










    3 


    3 
    2 


    2 



    2 








    2 



    2 
    2 












    2 


    1 
    3 










   11 










    9 





    1 
    1 












    1 








    2 
    1 




    1 
    1 

















    1 

































    1 
































    1 










    9 

















    2 




    2 

    2 


    2 
    1 




    2 

    1 


    2 

    2 





















































































































    8 

    8 

    8 
    3 


    3 














    2 



    2 



    2 








    1 
















    1 

    1 
































    1 

    1 

































    1 









    1 



    1 






























































    2 





    2 













    1 








































    2 















































































































































    7 













    7 








    7 


    7 
    7 
    4 






    3 









    3 








    1 







































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
/*        $NetBSD: sequencer.c,v 1.81 2022/07/01 01:04:59 riastradh Exp $        */

/*
 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (augustss@NetBSD.org) and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Locking:
 *
 * o sc_lock: provides atomic access to all data structures.  Taken from
 *   both process and soft interrupt context.
 *
 * o sc_dvlock: serializes operations on /dev/sequencer.  Taken from
 *   process context.  Dropped while waiting for data in sequencerread()
 *   to allow concurrent reads/writes while no data available.
 *
 * o sc_isopen: we allow only one concurrent open, only to prevent user
 *   and/or application error.
 *
 * o MIDI softc locks.  These can be spinlocks and there can be many of
 *   them, because we can open many MIDI devices.  We take these only in two
 *   places: when enabling redirection from the MIDI device and when
 *   disabling it (open/close).  midiseq_in() is called by the MIDI driver
 *   with its own lock held when passing data into this module.  To avoid
 *   lock order and context problems, we package the received message as a
 *   sequencer_pcqitem_t and put onto a producer-consumer queue.  A soft
 *   interrupt is scheduled to dequeue and decode the message later where we
 *   can safely acquire the sequencer device's sc_lock.  PCQ is lockless for
 *   multiple producer, single consumer settings like this one.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sequencer.c,v 1.81 2022/07/01 01:04:59 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "midi.h"
#endif

#include <sys/param.h>
#include <sys/types.h>

#include <sys/atomic.h>
#include <sys/audioio.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/fcntl.h>
#include <sys/intr.h>
#include <sys/ioctl.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/midiio.h>
#include <sys/pcq.h>
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/select.h>
#include <sys/signalvar.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/vnode.h>
#include <sys/vnode.h>

#include <dev/midi_if.h>
#include <dev/midivar.h>
#include <dev/sequencervar.h>

#include "ioconf.h"

/*
 * XXX Kludge.  This module uses midi_cd, and depends on the `midi'
 * module, but there's no obvious way to get midi_cd declared in
 * ioconf.h without actually pulling MIDI into the module in
 * sys/modules/sequencer/sequencer.ioconf.  Please fix me!
 *
 * XXX XXX XXX Apparently sequencer.ioconf doesn't actually make the
 * sequencer cdev!  Did this ever work?
 *
 * XXX XXX XXX Apparently there are even some kernels that include a
 * sequencer pseudo-device but exclude any midi device.  How do they
 * even link??
 */
extern struct cfdriver midi_cd;
#ifdef _MODULE
extern struct cfdriver sequencer_cd;
#endif

#define ADDTIMEVAL(a, b) ( \
        (a)->tv_sec += (b)->tv_sec, \
        (a)->tv_usec += (b)->tv_usec, \
        (a)->tv_usec > 1000000 ? ((a)->tv_sec++, (a)->tv_usec -= 1000000) : 0\
        )

#define SUBTIMEVAL(a, b) ( \
        (a)->tv_sec -= (b)->tv_sec, \
        (a)->tv_usec -= (b)->tv_usec, \
        (a)->tv_usec < 0 ? ((a)->tv_sec--, (a)->tv_usec += 1000000) : 0\
        )

#ifdef AUDIO_DEBUG
#define DPRINTF(x)        if (sequencerdebug) printf x
#define DPRINTFN(n,x)        if (sequencerdebug >= (n)) printf x
int        sequencerdebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

#define SEQ_NOTE_MAX 128
#define SEQ_NOTE_XXX 255

#define RECALC_USPERDIV(t) \
((t)->usperdiv = 60*1000000L/((t)->tempo_beatpermin*(t)->timebase_divperbeat))

typedef union sequencer_pcqitem {
        void        *qi_ptr;
        char        qi_msg[4];
} sequencer_pcqitem_t;

static void seq_reset(struct sequencer_softc *);
static int seq_do_command(struct sequencer_softc *, seq_event_t *);
static int seq_do_chnvoice(struct sequencer_softc *, seq_event_t *);
static int seq_do_chncommon(struct sequencer_softc *, seq_event_t *);
static void seq_timer_waitabs(struct sequencer_softc *, uint32_t);
static int seq_do_timing(struct sequencer_softc *, seq_event_t *);
static int seq_do_local(struct sequencer_softc *, seq_event_t *);
static int seq_do_sysex(struct sequencer_softc *, seq_event_t *);
static int seq_do_fullsize(struct sequencer_softc *, seq_event_t *,
    struct uio *);
static int seq_input_event(struct sequencer_softc *, seq_event_t *);
static int seq_drain(struct sequencer_softc *);
static void seq_startoutput(struct sequencer_softc *);
static void seq_timeout(void *);
static int seq_to_new(seq_event_t *, struct uio *);
static void seq_softintr(void *);

static int midiseq_out(struct midi_dev *, u_char *, u_int, int);
static struct midi_dev *midiseq_open(int, int);
static void midiseq_close(struct midi_dev *);
static void midiseq_reset(struct midi_dev *);
static int midiseq_noteon(struct midi_dev *, int, int, seq_event_t *);
static int midiseq_noteoff(struct midi_dev *, int, int, seq_event_t *);
static int midiseq_keypressure(struct midi_dev *, int, int, seq_event_t *);
static int midiseq_pgmchange(struct midi_dev *, int, seq_event_t *);
static int midiseq_chnpressure(struct midi_dev *, int, seq_event_t *);
static int midiseq_ctlchange(struct midi_dev *, int, seq_event_t *);
static int midiseq_pitchbend(struct midi_dev *, int, seq_event_t *);
static int midiseq_loadpatch(struct midi_dev *, struct sysex_info *,
    struct uio *);
void midiseq_in(struct midi_dev *, u_char *, int);

static dev_type_open(sequenceropen);
static dev_type_close(sequencerclose);
static dev_type_read(sequencerread);
static dev_type_write(sequencerwrite);
static dev_type_ioctl(sequencerioctl);
static dev_type_poll(sequencerpoll);
static dev_type_kqfilter(sequencerkqfilter);

const struct cdevsw sequencer_cdevsw = {
        .d_open = sequenceropen,
        .d_close = sequencerclose,
        .d_read = sequencerread,
        .d_write = sequencerwrite,
        .d_ioctl = sequencerioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = sequencerpoll,
        .d_mmap = nommap,
        .d_kqfilter = sequencerkqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER | D_MPSAFE
};
static LIST_HEAD(, sequencer_softc) sequencers =
    LIST_HEAD_INITIALIZER(sequencers);
static kmutex_t sequencer_lock;

static void
sequencerdestroy(struct sequencer_softc *sc)
{

        callout_halt(&sc->sc_callout, &sc->lock);
        callout_destroy(&sc->sc_callout);
        softint_disestablish(sc->sih);
        cv_destroy(&sc->rchan);
        cv_destroy(&sc->wchan);
        cv_destroy(&sc->lchan);
        if (sc->pcq)
                pcq_destroy(sc->pcq);
        kmem_free(sc, sizeof(*sc));
}

static struct sequencer_softc *
sequencercreate(int unit)
{
        struct sequencer_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);

        sc->sc_unit = unit;
        callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
        sc->sih = softint_establish(SOFTINT_NET | SOFTINT_MPSAFE,
            seq_softintr, sc);
        mutex_init(&sc->lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&sc->rchan, "midiseqr");
        cv_init(&sc->wchan, "midiseqw");
        cv_init(&sc->lchan, "midiseql");
        sc->pcq = pcq_create(SEQ_MAXQ, KM_SLEEP);
        if (sc->pcq == NULL) {
                sequencerdestroy(sc);
                return NULL;
        }
        return sc;
}


static struct sequencer_softc *
sequencerget(int unit)
{
        struct sequencer_softc *sc;

        KASSERTMSG(unit >= 0, "unit=%d", unit);

        if (unit < 0)
                return NULL;

        mutex_enter(&sequencer_lock);
        LIST_FOREACH(sc, &sequencers, sc_link) {
                if (sc->sc_unit == unit) {
                        mutex_exit(&sequencer_lock);
                        return sc;
                }
        }
        mutex_exit(&sequencer_lock);

        /*
         * XXXSMP -- nothing excludes another thread from creating the
         * same unit here
         */
        if ((sc = sequencercreate(unit)) == NULL)
                return NULL;

        mutex_enter(&sequencer_lock);
        LIST_INSERT_HEAD(&sequencers, sc, sc_link);
        mutex_exit(&sequencer_lock);

        return sc;
}

#ifdef notyet
static void
sequencerput(struct sequencer_softc *sc)
{

        mutex_enter(&sequencer_lock);
        LIST_REMOVE(sc, sc_link);
        mutex_exit(&sequencer_lock);
        sequencerdestroy(sc);
}
#endif

void
sequencerattach(int n)
{

        mutex_init(&sequencer_lock, MUTEX_DEFAULT, IPL_NONE);
}

/*
 * Release reference to device acquired with sequencer_enter().
 */
static void
sequencer_exit(struct sequencer_softc *sc)
{

        sc->dvlock--;
        cv_broadcast(&sc->lchan);
        mutex_exit(&sc->lock);
}

/*
 * Look up sequencer device and acquire locks for device access.
 */
static int
sequencer_enter(dev_t dev, struct sequencer_softc **scp)
{
        struct sequencer_softc *sc;

        /* First, find the device and take sc_lock. */
        if ((sc = sequencerget(SEQUENCERUNIT(dev))) == NULL)
                return ENXIO;

        mutex_enter(&sc->lock);
        while (sc->dvlock) {
                cv_wait(&sc->lchan, &sc->lock);
        }
        sc->dvlock++;
        if (sc->dying) {
                sequencer_exit(sc);
                return EIO;
        }
        *scp = sc;
        return 0;
}

static int
sequenceropen(dev_t dev, int flags, int ifmt, struct lwp *l)
{
        struct sequencer_softc *sc;
        struct midi_dev *md;
        struct midi_softc *msc;
        int error, unit, mdno;

        DPRINTF(("sequenceropen\n"));

        if ((error = sequencer_enter(dev, &sc)) != 0)
                return error;
        if (sc->isopen != 0) {
                sequencer_exit(sc);
                return EBUSY;
        }

        if (SEQ_IS_OLD(SEQUENCERUNIT(dev)))
                sc->mode = SEQ_OLD;
        else
                sc->mode = SEQ_NEW;
        sc->isopen++;
        sc->flags = flags & (FREAD|FWRITE);
        sc->pbus = 0;
        sc->async = 0;
        sc->input_stamp = ~0;

        sc->nmidi = 0;
        sc->ndevs = midi_unit_count();
        sc->timer.timebase_divperbeat = 100;
        sc->timer.tempo_beatpermin = 60;
        RECALC_USPERDIV(&sc->timer);
        sc->timer.divs_lastevent = sc->timer.divs_lastchange = 0;
        microtime(&sc->timer.reftime);

        SEQ_QINIT(&sc->inq);
        SEQ_QINIT(&sc->outq);
        sc->lowat = SEQ_MAXQ / 2;

        if (sc->ndevs > 0) {
                mutex_exit(&sc->lock);
                sc->devs = kmem_alloc(sc->ndevs * sizeof(struct midi_dev *),
                    KM_SLEEP);
                for (unit = 0; unit < sc->ndevs; unit++) {
                        md = midiseq_open(unit, flags);
                        if (md) {
                                sc->devs[sc->nmidi++] = md;
                                md->seq = sc;
                                md->doingsysex = 0;
                                DPRINTF(("%s: midi unit %d opened as seq %p\n",
                                    __func__, unit, md));
                        } else {
                                DPRINTF(("%s: midi unit %d not opened as seq\n",
                                    __func__, unit));
                        }
                }
                mutex_enter(&sc->lock);
        } else {
                sc->devs = NULL;
        }

        /* Only now redirect input from MIDI devices. */
        for (mdno = 0; mdno < sc->nmidi; mdno++) {
                msc = device_lookup_private(&midi_cd, sc->devs[mdno]->unit);
                if (msc) {
                        mutex_enter(msc->lock);
                        msc->seqopen = 1;
                        mutex_exit(msc->lock);
                }
        }

        seq_reset(sc);
        sequencer_exit(sc);

        DPRINTF(("%s: mode=%d, nmidi=%d\n", __func__, sc->mode, sc->nmidi));
        return 0;
}

static int
seq_drain(struct sequencer_softc *sc)
{
        int error;

        KASSERT(mutex_owned(&sc->lock));

        DPRINTFN(3, ("seq_drain: %p, len=%d\n", sc, SEQ_QLEN(&sc->outq)));
        seq_startoutput(sc);
        error = 0;
        while (!SEQ_QEMPTY(&sc->outq) && !error)
                error = cv_timedwait_sig(&sc->wchan, &sc->lock, 60*hz);
        return error;
}

static void
seq_timeout(void *addr)
{
        struct sequencer_softc *sc = addr;
        proc_t *p;
        pid_t pid;

        DPRINTFN(4, ("seq_timeout: %p\n", sc));

        mutex_enter(&sc->lock);
        if (sc->timeout == 0) {
                mutex_exit(&sc->lock);
                return;
        }
        sc->timeout = 0;
        seq_startoutput(sc);
        if (SEQ_QLEN(&sc->outq) >= sc->lowat) {
                mutex_exit(&sc->lock);
                return;
        }
        cv_broadcast(&sc->wchan);
        selnotify(&sc->wsel, 0, NOTE_SUBMIT);
        if ((pid = sc->async) != 0) {
                mutex_enter(&proc_lock);
                if ((p = proc_find(pid)) != NULL)
                        psignal(p, SIGIO);
                mutex_exit(&proc_lock);
        }
        mutex_exit(&sc->lock);
}

static void
seq_startoutput(struct sequencer_softc *sc)
{
        struct sequencer_queue *q = &sc->outq;
        seq_event_t cmd;

        KASSERT(mutex_owned(&sc->lock));

        if (sc->timeout)
                return;
        DPRINTFN(4, ("seq_startoutput: %p, len=%d\n", sc, SEQ_QLEN(q)));
        while (!SEQ_QEMPTY(q) && !sc->timeout) {
                SEQ_QGET(q, cmd);
                seq_do_command(sc, &cmd);
        }
}

static int
sequencerclose(dev_t dev, int flags, int ifmt, struct lwp *l)
{
        struct sequencer_softc *sc;
        struct midi_softc *msc;
        int unit, error;

        DPRINTF(("%s: %"PRIx64"\n", __func__, dev));

        if ((error = sequencer_enter(dev, &sc)) != 0)
                return error;
        seq_drain(sc);
        if (sc->timeout) {
                callout_halt(&sc->sc_callout, &sc->lock);
                sc->timeout = 0;
        }
        /* Bin input from MIDI devices. */
        for (unit = 0; unit < sc->nmidi; unit++) {
                msc = device_lookup_private(&midi_cd, unit);
                if (msc) {
                        mutex_enter(msc->lock);
                        msc->seqopen = 0;
                        mutex_exit(msc->lock);
                }
        }
        mutex_exit(&sc->lock);

        for (unit = 0; unit < sc->nmidi; unit++)
                if (sc->devs[unit] != NULL)
                        midiseq_close(sc->devs[unit]);
        if (sc->devs != NULL) {
                KASSERT(sc->ndevs > 0);
                kmem_free(sc->devs, sc->ndevs * sizeof(struct midi_dev *));
                sc->devs = NULL;
        }

        mutex_enter(&sc->lock);
        sc->isopen = 0;
        sequencer_exit(sc);

        DPRINTF(("%s: %"PRIx64" done\n", __func__, dev));

        return 0;
}

static int
seq_input_event(struct sequencer_softc *sc, seq_event_t *cmd)
{
        struct sequencer_queue *q;

        KASSERT(mutex_owned(&sc->lock));

        DPRINTFN(2, ("seq_input_event: %02x %02x %02x %02x %02x "
            "%02x %02x %02x\n", cmd->tag,
            cmd->unknown.byte[0], cmd->unknown.byte[1],
            cmd->unknown.byte[2], cmd->unknown.byte[3],
            cmd->unknown.byte[4], cmd->unknown.byte[5],
            cmd->unknown.byte[6]));
        q = &sc->inq;
        if (SEQ_QFULL(q))
                return ENOMEM;
        SEQ_QPUT(q, *cmd);
        cv_broadcast(&sc->rchan);
        selnotify(&sc->rsel, 0, NOTE_SUBMIT);
        if (sc->async != 0) {
                proc_t *p;

                mutex_enter(&proc_lock);
                if ((p = proc_find(sc->async)) != NULL)
                        psignal(p, SIGIO);
                mutex_exit(&proc_lock);
        }
        return 0;
}

static void
seq_softintr(void *addr)
{
        struct sequencer_softc *sc;
        struct timeval now;
        seq_event_t ev;
        int status, chan, unit;
        sequencer_pcqitem_t qi;
        u_long t;

        sc = addr;

        mutex_enter(&sc->lock);

        qi.qi_ptr = pcq_get(sc->pcq);
        if (qi.qi_ptr == NULL) {
                mutex_exit(&sc->lock);
                return;
        }
        KASSERT((qi.qi_msg[3] & 0x80) != 0);
        unit = qi.qi_msg[3] & ~0x80;
        status = MIDI_GET_STATUS(qi.qi_msg[0]);
        chan = MIDI_GET_CHAN(qi.qi_msg[0]);
        switch (status) {
        case MIDI_NOTEON: /* midi(4) always canonicalizes hidden note-off */
                ev = SEQ_MK_CHN(NOTEON, .device=unit, .channel=chan,
                    .key=qi.qi_msg[1], .velocity=qi.qi_msg[2]);
                break;
        case MIDI_NOTEOFF:
                ev = SEQ_MK_CHN(NOTEOFF, .device=unit, .channel=chan,
                    .key=qi.qi_msg[1], .velocity=qi.qi_msg[2]);
                break;
        case MIDI_KEY_PRESSURE:
                ev = SEQ_MK_CHN(KEY_PRESSURE, .device=unit, .channel=chan,
                    .key=qi.qi_msg[1], .pressure=qi.qi_msg[2]);
                break;
        case MIDI_CTL_CHANGE: /* XXX not correct for MSB */
                ev = SEQ_MK_CHN(CTL_CHANGE, .device=unit, .channel=chan,
                    .controller=qi.qi_msg[1], .value=qi.qi_msg[2]);
                break;
        case MIDI_PGM_CHANGE:
                ev = SEQ_MK_CHN(PGM_CHANGE, .device=unit, .channel=chan,
                    .program=qi.qi_msg[1]);
                break;
        case MIDI_CHN_PRESSURE:
                ev = SEQ_MK_CHN(CHN_PRESSURE, .device=unit, .channel=chan,
                    .pressure=qi.qi_msg[1]);
                break;
        case MIDI_PITCH_BEND:
                ev = SEQ_MK_CHN(PITCH_BEND, .device=unit, .channel=chan,
                    .value=(qi.qi_msg[1] & 0x7f) | ((qi.qi_msg[2] & 0x7f) << 7));
                break;
        default: /* this is now the point where MIDI_ACKs disappear */
                mutex_exit(&sc->lock);
                return;
        }
        microtime(&now);
        if (!sc->timer.running)
                now = sc->timer.stoptime;
        SUBTIMEVAL(&now, &sc->timer.reftime);
        t = now.tv_sec * 1000000 + now.tv_usec;
        t /= sc->timer.usperdiv;
        t += sc->timer.divs_lastchange;
        if (t != sc->input_stamp) {
                seq_input_event(sc, &SEQ_MK_TIMING(WAIT_ABS, .divisions=t));
                sc->input_stamp = t; /* XXX what happens if timer is reset? */
        }
        seq_input_event(sc, &ev);
        mutex_exit(&sc->lock);
}

static int
sequencerread(dev_t dev, struct uio *uio, int ioflag)
{
        struct sequencer_softc *sc;
        struct sequencer_queue *q;
        seq_event_t ev;
        int error;

        DPRINTFN(2, ("sequencerread: %"PRIx64", count=%d, ioflag=%x\n",
           dev, (int)uio->uio_resid, ioflag));

        if ((error = sequencer_enter(dev, &sc)) != 0)
                return error;
        q = &sc->inq;

        if (sc->mode == SEQ_OLD) {
                sequencer_exit(sc);
                DPRINTFN(-1,("sequencerread: old read\n"));
                return EINVAL; /* XXX unimplemented */
        }
        while (SEQ_QEMPTY(q)) {
                if (ioflag & IO_NDELAY) {
                        error = EWOULDBLOCK;
                        break;
                }
                /* Drop lock to allow concurrent read/write. */
                KASSERT(sc->dvlock != 0);
                sc->dvlock--;
                error = cv_wait_sig(&sc->rchan, &sc->lock);
                while (sc->dvlock != 0) {
                        cv_wait(&sc->lchan, &sc->lock);
                }
                sc->dvlock++;
                if (error) {
                        break;
                }
        }
        while (uio->uio_resid >= sizeof(ev) && !error && !SEQ_QEMPTY(q)) {
                SEQ_QGET(q, ev);
                mutex_exit(&sc->lock);
                error = uiomove(&ev, sizeof(ev), uio);
                mutex_enter(&sc->lock);
        }
        sequencer_exit(sc);
        return error;
}

static int
sequencerwrite(dev_t dev, struct uio *uio, int ioflag)
{
        struct sequencer_softc *sc;
        struct sequencer_queue *q;
        int error;
        seq_event_t cmdbuf;
        int size;

        DPRINTFN(2, ("sequencerwrite: %"PRIx64", count=%d\n", dev,
            (int)uio->uio_resid));

        if ((error = sequencer_enter(dev, &sc)) != 0)
                return error;
        q = &sc->outq;

        size = sc->mode == SEQ_NEW ? sizeof cmdbuf : SEQOLD_CMDSIZE;
        while (uio->uio_resid >= size && error == 0) {
                mutex_exit(&sc->lock);
                error = uiomove(&cmdbuf, size, uio);
                if (error == 0) {
                        if (sc->mode == SEQ_OLD && seq_to_new(&cmdbuf, uio)) {
                                mutex_enter(&sc->lock);
                                continue;
                        }
                        if (cmdbuf.tag == SEQ_FULLSIZE) {
                                /* We do it like OSS does, asynchronously */
                                error = seq_do_fullsize(sc, &cmdbuf, uio);
                                if (error == 0) {
                                        mutex_enter(&sc->lock);
                                        continue;
                                }
                        }
                }
                mutex_enter(&sc->lock);
                if (error != 0) {
                        break;
                }
                while (SEQ_QFULL(q)) {
                        seq_startoutput(sc);
                        if (SEQ_QFULL(q)) {
                                if (ioflag & IO_NDELAY) {
                                        error = EWOULDBLOCK;
                                        break;
                                }
                                error = cv_wait_sig(&sc->wchan, &sc->lock);
                                if (error) {
                                         break;
                                }
                        }
                }
                if (error == 0) {
                        SEQ_QPUT(q, cmdbuf);
                }
        }
        if (error == 0) {
                seq_startoutput(sc);
        } else {
                DPRINTFN(2, ("sequencerwrite: error=%d\n", error));
        }
        sequencer_exit(sc);
        return error;
}

static int
sequencerioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
{
        struct sequencer_softc *sc;
        struct synth_info *si;
        struct midi_dev *md;
        int devno, error, t;
        struct timeval now;
        u_long tx;

        DPRINTFN(2, ("sequencerioctl: %"PRIx64" cmd=0x%08lx\n", dev, cmd));

        if ((error = sequencer_enter(dev, &sc)) != 0)
                return error;
        switch (cmd) {
        case FIONBIO:
                /* All handled in the upper FS layer. */
                break;

        case FIOASYNC:
                if (*(int *)addr) {
                        if (sc->async != 0) {
                                error = EBUSY;
                                break;
                        }
                        sc->async = curproc->p_pid;
                        DPRINTF(("%s: FIOASYNC %d\n", __func__,
                            sc->async));
                } else {
                        sc->async = 0;
                }
                break;

        case SEQUENCER_RESET:
                seq_reset(sc);
                break;

        case SEQUENCER_PANIC:
                seq_reset(sc);
                /* Do more?  OSS doesn't */
                break;

        case SEQUENCER_SYNC:
                if (sc->flags != FREAD)
                        seq_drain(sc);
                break;

        case SEQUENCER_INFO:
                si = (struct synth_info*)addr;
                devno = si->device;
                if (devno < 0 || devno >= sc->nmidi) {
                        error = EINVAL;
                        break;
                }
                md = sc->devs[devno];
                strncpy(si->name, md->name, sizeof si->name);
                si->synth_type = SYNTH_TYPE_MIDI;
                si->synth_subtype = md->subtype;
                si->nr_voices = md->nr_voices;
                si->instr_bank_size = md->instr_bank_size;
                si->capabilities = md->capabilities;
                break;

        case SEQUENCER_NRSYNTHS:
                *(int *)addr = sc->nmidi;
                break;

        case SEQUENCER_NRMIDIS:
                *(int *)addr = sc->nmidi;
                break;

        case SEQUENCER_OUTOFBAND:
                DPRINTFN(3, ("sequencer_ioctl: OOB=%02x %02x %02x %02x %02x %02x %02x %02x\n",
                    *(u_char *)addr, *((u_char *)addr+1),
                    *((u_char *)addr+2), *((u_char *)addr+3),
                    *((u_char *)addr+4), *((u_char *)addr+5),
                    *((u_char *)addr+6), *((u_char *)addr+7)));
                if ((sc->flags & FWRITE) == 0) {
                        error = EBADF;
                } else {
                        error = seq_do_command(sc, (seq_event_t *)addr);
                }
                break;

        case SEQUENCER_TMR_TIMEBASE:
                t = *(int *)addr;
                if (t < 1)
                        t = 1;
                if (t > 10000)
                        t = 10000;
                *(int *)addr = t;
                sc->timer.timebase_divperbeat = t;
                sc->timer.divs_lastchange = sc->timer.divs_lastevent;
                microtime(&sc->timer.reftime);
                RECALC_USPERDIV(&sc->timer);
                break;

        case SEQUENCER_TMR_START:
                error = seq_do_timing(sc, &SEQ_MK_TIMING(START));
                break;

        case SEQUENCER_TMR_STOP:
                error = seq_do_timing(sc, &SEQ_MK_TIMING(STOP));
                break;

        case SEQUENCER_TMR_CONTINUE:
                error = seq_do_timing(sc, &SEQ_MK_TIMING(CONTINUE));
                break;

        case SEQUENCER_TMR_TEMPO:
                error = seq_do_timing(sc,
                    &SEQ_MK_TIMING(TEMPO, .bpm=*(int *)addr));
                RECALC_USPERDIV(&sc->timer);
                if (error == 0)
                        *(int *)addr = sc->timer.tempo_beatpermin;
                break;

        case SEQUENCER_TMR_SOURCE:
                *(int *)addr = SEQUENCER_TMR_INTERNAL;
                break;

        case SEQUENCER_TMR_METRONOME:
                /* noop */
                break;

        case SEQUENCER_THRESHOLD:
                t = SEQ_MAXQ - *(int *)addr / sizeof (seq_event_rec);
                if (t < 1)
                        t = 1;
                if (t > SEQ_MAXQ)
                        t = SEQ_MAXQ;
                sc->lowat = t;
                break;

        case SEQUENCER_CTRLRATE:
                *(int *)addr = (sc->timer.tempo_beatpermin
                    *sc->timer.timebase_divperbeat + 30) / 60;
                break;

        case SEQUENCER_GETTIME:
                microtime(&now);
                SUBTIMEVAL(&now, &sc->timer.reftime);
                tx = now.tv_sec * 1000000 + now.tv_usec;
                tx /= sc->timer.usperdiv;
                tx += sc->timer.divs_lastchange;
                *(int *)addr = tx;
                break;

        default:
                DPRINTFN(-1,("sequencer_ioctl: unimpl %08lx\n", cmd));
                error = EINVAL;
                break;
        }
        sequencer_exit(sc);

        return error;
}

static int
sequencerpoll(dev_t dev, int events, struct lwp *l)
{
        struct sequencer_softc *sc;
        int revents = 0;

        if ((sc = sequencerget(SEQUENCERUNIT(dev))) == NULL)
                return ENXIO;

        DPRINTF(("%s: %p events=0x%x\n", __func__, sc, events));

        mutex_enter(&sc->lock);
        if (events & (POLLIN | POLLRDNORM))
                if ((sc->flags&FREAD) && !SEQ_QEMPTY(&sc->inq))
                        revents |= events & (POLLIN | POLLRDNORM);

        if (events & (POLLOUT | POLLWRNORM))
                if ((sc->flags&FWRITE) && SEQ_QLEN(&sc->outq) < sc->lowat)
                        revents |= events & (POLLOUT | POLLWRNORM);

        if (revents == 0) {
                if ((sc->flags&FREAD) && (events & (POLLIN | POLLRDNORM)))
                        selrecord(l, &sc->rsel);

                if ((sc->flags&FWRITE) && (events & (POLLOUT | POLLWRNORM)))
                        selrecord(l, &sc->wsel);
        }
        mutex_exit(&sc->lock);

        return revents;
}

static void
filt_sequencerrdetach(struct knote *kn)
{
        struct sequencer_softc *sc = kn->kn_hook;

        mutex_enter(&sc->lock);
        selremove_knote(&sc->rsel, kn);
        mutex_exit(&sc->lock);
}

static int
filt_sequencerread(struct knote *kn, long hint)
{
        struct sequencer_softc *sc = kn->kn_hook;
        int rv;

        if (hint != NOTE_SUBMIT) {
                mutex_enter(&sc->lock);
        }
        if (SEQ_QEMPTY(&sc->inq)) {
                rv = 0;
        } else {
                kn->kn_data = sizeof(seq_event_rec);
                rv = 1;
        }
        if (hint != NOTE_SUBMIT) {
                mutex_exit(&sc->lock);
        }
        return rv;
}

static const struct filterops sequencerread_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_sequencerrdetach,
        .f_event = filt_sequencerread,
};

static void
filt_sequencerwdetach(struct knote *kn)
{
        struct sequencer_softc *sc = kn->kn_hook;

        mutex_enter(&sc->lock);
        selremove_knote(&sc->wsel, kn);
        mutex_exit(&sc->lock);
}

static int
filt_sequencerwrite(struct knote *kn, long hint)
{
        struct sequencer_softc *sc = kn->kn_hook;
        int rv;

        if (hint != NOTE_SUBMIT) {
                mutex_enter(&sc->lock);
        }
        if (SEQ_QLEN(&sc->outq) >= sc->lowat) {
                rv = 0;
        } else {
                kn->kn_data = sizeof(seq_event_rec);
                rv = 1;
        }
        if (hint != NOTE_SUBMIT) {
                mutex_exit(&sc->lock);
        }
        return rv;
}

static const struct filterops sequencerwrite_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_sequencerwdetach,
        .f_event = filt_sequencerwrite,
};

static int
sequencerkqfilter(dev_t dev, struct knote *kn)
{
        struct sequencer_softc *sc;
        struct selinfo *sip;

        if ((sc = sequencerget(SEQUENCERUNIT(dev))) == NULL)
                return ENXIO;

        switch (kn->kn_filter) {
        case EVFILT_READ:
                sip = &sc->rsel;
                kn->kn_fop = &sequencerread_filtops;
                break;

        case EVFILT_WRITE:
                sip = &sc->wsel;
                kn->kn_fop = &sequencerwrite_filtops;
                break;

        default:
                return EINVAL;
        }

        kn->kn_hook = sc;

        mutex_enter(&sc->lock);
        selrecord_knote(sip, kn);
        mutex_exit(&sc->lock);

        return 0;
}

static void
seq_reset(struct sequencer_softc *sc)
{
        int i, chn;
        struct midi_dev *md;

        KASSERT(mutex_owned(&sc->lock));

        if (!(sc->flags & FWRITE))
                return;
        for (i = 0; i < sc->nmidi; i++) {
                md = sc->devs[i];
                midiseq_reset(md);
                for (chn = 0; chn < MAXCHAN; chn++) {
                        midiseq_ctlchange(md, chn, &SEQ_MK_CHN(CTL_CHANGE,
                            .controller=MIDI_CTRL_NOTES_OFF));
                        midiseq_ctlchange(md, chn, &SEQ_MK_CHN(CTL_CHANGE,
                            .controller=MIDI_CTRL_RESET));
                        midiseq_pitchbend(md, chn, &SEQ_MK_CHN(PITCH_BEND,
                            .value=MIDI_BEND_NEUTRAL));
                }
        }
}

static int
seq_do_command(struct sequencer_softc *sc, seq_event_t *b)
{
        int dev;

        KASSERT(mutex_owned(&sc->lock));

        DPRINTFN(4, ("seq_do_command: %p cmd=0x%02x\n", sc, b->timing.op));

        switch(b->tag) {
        case SEQ_LOCAL:
                return seq_do_local(sc, b);
        case SEQ_TIMING:
                return seq_do_timing(sc, b);
        case SEQ_CHN_VOICE:
                return seq_do_chnvoice(sc, b);
        case SEQ_CHN_COMMON:
                return seq_do_chncommon(sc, b);
        case SEQ_SYSEX:
                return seq_do_sysex(sc, b);
        /* COMPAT */
        case SEQOLD_MIDIPUTC:
                dev = b->putc.device;
                if (dev < 0 || dev >= sc->nmidi)
                        return ENXIO;
                return midiseq_out(sc->devs[dev], &b->putc.byte, 1, 0);
        default:
                DPRINTFN(-1,("seq_do_command: unimpl command %02x\n", b->tag));
                return EINVAL;
        }
}

static int
seq_do_chnvoice(struct sequencer_softc *sc, seq_event_t *b)
{
        int dev;
        int error;
        struct midi_dev *md;

        KASSERT(mutex_owned(&sc->lock));

        dev = b->voice.device;
        if (dev < 0 || dev >= sc->nmidi ||
            b->voice.channel > 15 ||
            b->voice.key >= SEQ_NOTE_MAX)
                return ENXIO;
        md = sc->devs[dev];
        switch(b->voice.op) {
        case MIDI_NOTEON: /* no need to special-case hidden noteoff here */
                error = midiseq_noteon(md, b->voice.channel, b->voice.key, b);
                break;
        case MIDI_NOTEOFF:
                error = midiseq_noteoff(md, b->voice.channel, b->voice.key, b);
                break;
        case MIDI_KEY_PRESSURE:
                error = midiseq_keypressure(md,
                    b->voice.channel, b->voice.key, b);
                break;
        default:
                DPRINTFN(-1,("seq_do_chnvoice: unimpl command %02x\n",
                        b->voice.op));
                error = EINVAL;
                break;
        }
        return error;
}

static int
seq_do_chncommon(struct sequencer_softc *sc, seq_event_t *b)
{
        int dev;
        int error;
        struct midi_dev *md;

        KASSERT(mutex_owned(&sc->lock));

        dev = b->common.device;
        if (dev < 0 || dev >= sc->nmidi ||
            b->common.channel > 15)
                return ENXIO;
        md = sc->devs[dev];
        DPRINTFN(2,("seq_do_chncommon: %02x\n", b->common.op));

        error = 0;
        switch(b->common.op) {
        case MIDI_PGM_CHANGE:
                error = midiseq_pgmchange(md, b->common.channel, b);
                break;
        case MIDI_CTL_CHANGE:
                error = midiseq_ctlchange(md, b->common.channel, b);
                break;
        case MIDI_PITCH_BEND:
                error = midiseq_pitchbend(md, b->common.channel, b);
                break;
        case MIDI_CHN_PRESSURE:
                error = midiseq_chnpressure(md, b->common.channel, b);
                break;
        default:
                DPRINTFN(-1,("seq_do_chncommon: unimpl command %02x\n",
                        b->common.op));
                error = EINVAL;
                break;
        }
        return error;
}

static int
seq_do_local(struct sequencer_softc *sc, seq_event_t *b)
{

        KASSERT(mutex_owned(&sc->lock));

        return EINVAL;
}

static int
seq_do_sysex(struct sequencer_softc *sc, seq_event_t *b)
{
        int dev, i;
        struct midi_dev *md;
        uint8_t *bf = b->sysex.buffer;

        KASSERT(mutex_owned(&sc->lock));

        dev = b->sysex.device;
        if (dev < 0 || dev >= sc->nmidi)
                return ENXIO;
        DPRINTF(("%s: dev=%d\n", __func__, dev));
        md = sc->devs[dev];

        if (!md->doingsysex) {
                midiseq_out(md, (uint8_t[]){MIDI_SYSEX_START}, 1, 0);
                md->doingsysex = 1;
        }

        for (i = 0; i < 6 && bf[i] != 0xff; i++)
                ;
        midiseq_out(md, bf, i, 0);
        if (i < 6 || (i > 0 && bf[i-1] == MIDI_SYSEX_END))
                md->doingsysex = 0;
        return 0;
}

static void
seq_timer_waitabs(struct sequencer_softc *sc, uint32_t divs)
{
        struct timeval when;
        long long usec;
        struct syn_timer *t;
        int ticks;

        KASSERT(mutex_owned(&sc->lock));

        t = &sc->timer;
        t->divs_lastevent = divs;
        divs -= t->divs_lastchange;
        usec = (long long)divs * (long long)t->usperdiv; /* convert to usec */
        when.tv_sec = usec / 1000000;
        when.tv_usec = usec % 1000000;
        DPRINTFN(4, ("seq_timer_waitabs: adjdivs=%d, sleep when=%"PRId64".%06"PRId64,
                     divs, when.tv_sec, (uint64_t)when.tv_usec));
        ADDTIMEVAL(&when, &t->reftime); /* abstime for end */
        ticks = tvhzto(&when);
        DPRINTFN(4, (" when+start=%"PRId64".%06"PRId64", tick=%d\n",
                     when.tv_sec, (uint64_t)when.tv_usec, ticks));
        if (ticks > 0) {
#ifdef DIAGNOSTIC
                if (ticks > 20 * hz) {
                        /* Waiting more than 20s */
                        printf("seq_timer_waitabs: funny ticks=%d, "
                               "usec=%lld\n", ticks, usec);
                }
#endif
                sc->timeout = 1;
                callout_reset(&sc->sc_callout, ticks,
                    seq_timeout, sc);
        }
#ifdef SEQUENCER_DEBUG
        else if (tick < 0)
                DPRINTF(("%s: ticks = %d\n", __func__, ticks));
#endif
}

static int
seq_do_timing(struct sequencer_softc *sc, seq_event_t *b)
{
        struct syn_timer *t = &sc->timer;
        struct timeval when;
        int error;

        KASSERT(mutex_owned(&sc->lock));

        error = 0;
        switch(b->timing.op) {
        case TMR_WAIT_REL:
                seq_timer_waitabs(sc,
                    b->t_WAIT_REL.divisions + t->divs_lastevent);
                break;
        case TMR_WAIT_ABS:
                seq_timer_waitabs(sc, b->t_WAIT_ABS.divisions);
                break;
        case TMR_START:
                microtime(&t->reftime);
                t->divs_lastevent = t->divs_lastchange = 0;
                t->running = 1;
                break;
        case TMR_STOP:
                microtime(&t->stoptime);
                t->running = 0;
                break;
        case TMR_CONTINUE:
                if (t->running)
                        break;
                microtime(&when);
                SUBTIMEVAL(&when, &t->stoptime);
                ADDTIMEVAL(&t->reftime, &when);
                t->running = 1;
                break;
        case TMR_TEMPO:
                /* bpm is unambiguously MIDI clocks per minute / 24 */
                /* (24 MIDI clocks are usually but not always a quarter note) */
                if (b->t_TEMPO.bpm < 8) /* where are these limits specified? */
                        t->tempo_beatpermin = 8;
                else if (b->t_TEMPO.bpm > 360) /* ? */
                        t->tempo_beatpermin = 360;
                else
                        t->tempo_beatpermin = b->t_TEMPO.bpm;
                t->divs_lastchange = t->divs_lastevent;
                microtime(&t->reftime);
                RECALC_USPERDIV(t);
                break;
        case TMR_ECHO:
                error = seq_input_event(sc, b);
                break;
        case TMR_RESET:
                t->divs_lastevent = t->divs_lastchange = 0;
                microtime(&t->reftime);
                break;
        case TMR_SPP:
        case TMR_TIMESIG:
                DPRINTF(("%s: unimplemented %02x\n", __func__, b->timing.op));
                error = EINVAL; /* not quite accurate... */
                break;
        default:
                DPRINTF(("%s: unknown %02x\n", __func__, b->timing.op));
                error = EINVAL;
                break;
        }
        return error;
}

static int
seq_do_fullsize(struct sequencer_softc *sc, seq_event_t *b, struct uio *uio)
{
        struct sysex_info sysex;
        u_int dev;

        CTASSERT(sizeof(seq_event_rec) == SEQ_SYSEX_HDRSIZE);
        memcpy(&sysex, b, sizeof(*b));
        dev = sysex.device_no;
        if (/* dev < 0 || */ dev >= sc->nmidi)
                return ENXIO;
        DPRINTFN(2, ("seq_do_fullsize: fmt=%04x, dev=%d, len=%d\n",
                     sysex.key, dev, sysex.len));
        return midiseq_loadpatch(sc->devs[dev], &sysex, uio);
}

/*
 * Convert an old sequencer event to a new one.
 * NOTE: on entry, *ev may contain valid data only in the first 4 bytes.
 * That may be true even on exit (!) in the case of SEQOLD_MIDIPUTC; the
 * caller will only look at the first bytes in that case anyway. Ugly? Sure.
 */
static int
seq_to_new(seq_event_t *ev, struct uio *uio)
{
        int cmd, chan, note, parm;
        uint32_t tmp_delay;
        int error;
        uint8_t *bfp;

        cmd = ev->tag;
        bfp = ev->unknown.byte;
        chan = *bfp++;
        note = *bfp++;
        parm = *bfp++;
        DPRINTFN(3, ("seq_to_new: 0x%02x %d %d %d\n", cmd, chan, note, parm));

        if (cmd >= 0x80) {
                /* Fill the event record */
                if (uio->uio_resid >= sizeof *ev - SEQOLD_CMDSIZE) {
                        error = uiomove(bfp, sizeof *ev - SEQOLD_CMDSIZE, uio);
                        if (error)
                                return error;
                } else
                        return EINVAL;
        }

        switch(cmd) {
        case SEQOLD_NOTEOFF:
                /*
                 * What's with the SEQ_NOTE_XXX?  In OSS this seems to have
                 * been undocumented magic for messing with the overall volume
                 * of a 'voice', equated precariously with 'channel' and
                 * pretty much unimplementable except by directly frobbing a
                 * synth chip. For us, who treat everything as interfaced over
                 * MIDI, this will just be unceremoniously discarded as
                 * invalid in midiseq_noteoff, making the whole event an
                 * elaborate no-op, and that doesn't seem to be any different
                 * from what happens on linux with a MIDI-interfaced device,
                 * by the way. The moral is ... use the new /dev/music API, ok?
                 */
                *ev = SEQ_MK_CHN(NOTEOFF, .device=0, .channel=chan,
                    .key=SEQ_NOTE_XXX, .velocity=parm);
                break;
        case SEQOLD_NOTEON:
                *ev = SEQ_MK_CHN(NOTEON,
                    .device=0, .channel=chan, .key=note, .velocity=parm);
                break;
        case SEQOLD_WAIT:
                /*
                 * This event cannot even /exist/ on non-littleendian machines,
                 * and so help me, that's exactly the way OSS defined it.
                 * Also, the OSS programmer's guide states (p. 74, v1.11)
                 * that seqold time units are system clock ticks, unlike
                 * the new 'divisions' which are determined by timebase. In
                 * that case we would need to do scaling here - but no such
                 * behavior is visible in linux either--which also treats this
                 * value, surprisingly, as an absolute, not relative, time.
                 * My guess is that this event has gone unused so long that
                 * nobody could agree we got it wrong no matter what we do.
                 */
                tmp_delay = *(uint32_t *)ev >> 8;
                *ev = SEQ_MK_TIMING(WAIT_ABS, .divisions=tmp_delay);
                break;
        case SEQOLD_SYNCTIMER:
                /*
                 * The TMR_RESET event is not defined in any OSS materials
                 * I can find; it may have been invented here just to provide
                 * an accurate _to_new translation of this event.
                 */
                *ev = SEQ_MK_TIMING(RESET);
                break;
        case SEQOLD_PGMCHANGE:
                *ev = SEQ_MK_CHN(PGM_CHANGE,
                    .device=0, .channel=chan, .program=note);
                break;
        case SEQOLD_MIDIPUTC:
                break;                /* interpret in normal mode */
        case SEQOLD_ECHO:
        case SEQOLD_PRIVATE:
        case SEQOLD_EXTENDED:
        default:
                DPRINTF(("%s: not impl 0x%02x\n", __func__, cmd));
                return EINVAL;
        /* In case new-style events show up */
        case SEQ_TIMING:
        case SEQ_CHN_VOICE:
        case SEQ_CHN_COMMON:
        case SEQ_FULLSIZE:
                break;
        }
        return 0;
}

/**********************************************/

void
midiseq_in(struct midi_dev *md, u_char *msg, int len)
{
        struct sequencer_softc *sc;
        sequencer_pcqitem_t qi;

        DPRINTFN(2, ("midiseq_in: %p %02x %02x %02x\n",
                     md, msg[0], msg[1], msg[2]));

        sc = md->seq;

        qi.qi_msg[0] = msg[0];
        qi.qi_msg[1] = msg[1];
        qi.qi_msg[2] = msg[2];
        qi.qi_msg[3] = md->unit | 0x80;        /* ensure non-zero value of qi_ptr */
        pcq_put(sc->pcq, qi.qi_ptr);
        softint_schedule(sc->sih);
}

static struct midi_dev *
midiseq_open(int unit, int flags)
{
        int error;
        struct midi_dev *md;
        struct midi_softc *sc;
        struct midi_info mi;
        int major;
        dev_t dev;
        vnode_t *vp;
        int oflags;

        major = devsw_name2chr("midi", NULL, 0);
        dev = makedev(major, unit);

        DPRINTFN(2, ("midiseq_open: %d %d\n", unit, flags));

        error = cdevvp(dev, &vp);
        if (error)
                return NULL;
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_OPEN(vp, flags, kauth_cred_get());
        VOP_UNLOCK(vp);
        if (error) {
                vrele(vp);
                return NULL;
        }

        /* Only after we have acquired reference via VOP_OPEN(). */
        midi_getinfo(dev, &mi);
        oflags = flags;
        if ((mi.props & MIDI_PROP_CAN_INPUT) == 0)
                flags &= ~FREAD;
        if ((flags & (FREAD|FWRITE)) == 0) {
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                VOP_CLOSE(vp, oflags, kauth_cred_get());
                VOP_UNLOCK(vp);
                vrele(vp);
                return NULL;
        }

        sc = device_lookup_private(&midi_cd, unit);
        md = kmem_zalloc(sizeof(*md), KM_SLEEP);
        md->unit = unit;
        md->name = mi.name;
        md->subtype = 0;
        md->nr_voices = 128;        /* XXX */
        md->instr_bank_size = 128; /* XXX */
        md->vp = vp;
        if (mi.props & MIDI_PROP_CAN_INPUT)
                md->capabilities |= SYNTH_CAP_INPUT;
        sc->seq_md = md;
        return md;
}

static void
midiseq_close(struct midi_dev *md)
{

        DPRINTFN(2, ("midiseq_close: %d\n", md->unit));
        (void)vn_close(md->vp, 0, kauth_cred_get());
        kmem_free(md, sizeof(*md));
}

static void
midiseq_reset(struct midi_dev *md)
{
        /* XXX send GM reset? */
        DPRINTFN(3, ("midiseq_reset: %d\n", md->unit));
}

static int
midiseq_out(struct midi_dev *md, u_char *bf, u_int cc, int chk)
{
        DPRINTFN(5, ("midiseq_out: md=%p, unit=%d, bf[0]=0x%02x, cc=%d\n",
                     md, md->unit, bf[0], cc));

        /* midi(4) does running status compression where appropriate. */
        return midi_writebytes(md->unit, bf, cc);
}

/*
 * If the writing process hands us a hidden note-off in a note-on event,
 * we will simply write it that way; no need to special case it here,
 * as midi(4) will always canonicalize or compress as appropriate anyway.
 */
static int
midiseq_noteon(struct midi_dev *md, int chan, int key, seq_event_t *ev)
{

        return midiseq_out(md, (uint8_t[]){
            MIDI_NOTEON | chan, key, ev->c_NOTEON.velocity & 0x7f}, 3, 1);
}

static int
midiseq_noteoff(struct midi_dev *md, int chan, int key, seq_event_t *ev)
{

        return midiseq_out(md, (uint8_t[]){
            MIDI_NOTEOFF | chan, key, ev->c_NOTEOFF.velocity & 0x7f}, 3, 1);
}

static int
midiseq_keypressure(struct midi_dev *md, int chan, int key, seq_event_t *ev)
{

        return midiseq_out(md, (uint8_t[]){
            MIDI_KEY_PRESSURE | chan, key,
            ev->c_KEY_PRESSURE.pressure & 0x7f}, 3, 1);
}

static int
midiseq_pgmchange(struct midi_dev *md, int chan, seq_event_t *ev)
{

        if (ev->c_PGM_CHANGE.program > 127)
                return EINVAL;
        return midiseq_out(md, (uint8_t[]){
            MIDI_PGM_CHANGE | chan, ev->c_PGM_CHANGE.program}, 2, 1);
}

static int
midiseq_chnpressure(struct midi_dev *md, int chan, seq_event_t *ev)
{

        if (ev->c_CHN_PRESSURE.pressure > 127)
                return EINVAL;
        return midiseq_out(md, (uint8_t[]){
            MIDI_CHN_PRESSURE | chan, ev->c_CHN_PRESSURE.pressure}, 2, 1);
}

static int
midiseq_ctlchange(struct midi_dev *md, int chan, seq_event_t *ev)
{

        if (ev->c_CTL_CHANGE.controller > 127)
                return EINVAL;
        return midiseq_out( md, (uint8_t[]){
            MIDI_CTL_CHANGE | chan, ev->c_CTL_CHANGE.controller,
            ev->c_CTL_CHANGE.value & 0x7f /* XXX this is SO wrong */
            }, 3, 1);
}

static int
midiseq_pitchbend(struct midi_dev *md, int chan, seq_event_t *ev)
{

        return midiseq_out(md, (uint8_t[]){
            MIDI_PITCH_BEND | chan,
            ev->c_PITCH_BEND.value & 0x7f,
            (ev->c_PITCH_BEND.value >> 7) & 0x7f}, 3, 1);
}

static int
midiseq_loadpatch(struct midi_dev *md,
                  struct sysex_info *sysex, struct uio *uio)
{
        struct sequencer_softc *sc;
        u_char c, bf[128];
        int i, cc, error;

        if (sysex->key != SEQ_SYSEX_PATCH) {
                DPRINTFN(-1,("midiseq_loadpatch: bad patch key 0x%04x\n",
                             sysex->key));
                return EINVAL;
        }
        if (uio->uio_resid < sysex->len)
                /* adjust length, should be an error */
                sysex->len = uio->uio_resid;

        DPRINTFN(2, ("midiseq_loadpatch: len=%d\n", sysex->len));
        if (sysex->len == 0)
                return EINVAL;
        error = uiomove(&c, 1, uio);
        if (error)
                return error;
        if (c != MIDI_SYSEX_START)                /* must start like this */
                return EINVAL;
        sc = md->seq;
        mutex_enter(&sc->lock);
        error = midiseq_out(md, &c, 1, 0);
        mutex_exit(&sc->lock);
        if (error)
                return error;
        --sysex->len;
        while (sysex->len > 0) {
                cc = sysex->len;
                if (cc > sizeof bf)
                        cc = sizeof bf;
                error = uiomove(bf, cc, uio);
                if (error)
                        break;
                for(i = 0; i < cc && !MIDI_IS_STATUS(bf[i]); i++)
                        ;
                /*
                 * XXX midi(4)'s buffer might not accommodate this, and the
                 * function will not block us (though in this case we have
                 * a process and could in principle block).
                 */
                mutex_enter(&sc->lock);
                error = midiseq_out(md, bf, i, 0);
                mutex_exit(&sc->lock);
                if (error)
                        break;
                sysex->len -= i;
                if (i != cc)
                        break;
        }
        /*
         * Any leftover data in uio is rubbish;
         * the SYSEX should be one write ending in SYSEX_END.
         */
        uio->uio_resid = 0;
        c = MIDI_SYSEX_END;
        mutex_enter(&sc->lock);
        error = midiseq_out(md, &c, 1, 0);
        mutex_exit(&sc->lock);
        return error;
}

#if NMIDI == 0
static dev_type_open(midiopen);
static dev_type_close(midiclose);

/*
 * If someone has a sequencer, but no midi devices there will
 * be unresolved references, so we provide little stubs.
 */

int
midi_unit_count(void)
{
        return 0;
}

static int
midiopen(dev_t dev, int flags, int ifmt, struct lwp *l)
{
        return ENXIO;
}

void
midi_getinfo(dev_t dev, struct midi_info *mi)
{
        mi->name = "Dummy MIDI device";
        mi->props = 0;
}

static int
midiclose(dev_t dev, int flags, int ifmt, struct lwp *l)
{
        return ENXIO;
}

int
midi_writebytes(int unit, u_char *bf, int cc)
{
        return ENXIO;
}
#endif /* NMIDI == 0 */











































































































































































    3 



    3 































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
/*        $NetBSD: usscanner.c,v 1.50 2021/08/07 16:19:17 thorpej Exp $        */

/*
 * Copyright (c) 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) and LLoyd Parkes.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * This driver is partly based on information taken from the Linux driver
 * by John Fremlin, Oliver Neukum, and Jeremy Hall.
 */
/*
 * Protocol:
 * Send raw SCSI command on the bulk-out pipe.
 * If output command then
 *     send further data on the bulk-out pipe
 * else if input command then
 *     read data on the bulk-in pipe
 * else
 *     don't do anything.
 * Read status byte on the interrupt pipe (which doesn't seem to be
 * an interrupt pipe at all).  This operation sometimes times out.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: usscanner.c,v 1.50 2021/08/07 16:19:17 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include "scsibus.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/lwp.h>
#include <sys/device.h>
#include <sys/conf.h>
#include <sys/buf.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>

#include <dev/usb/usbdevs.h>

#include <sys/scsiio.h>
#include <dev/scsipi/scsi_spc.h>
#include <dev/scsipi/scsi_all.h>
#include <dev/scsipi/scsipi_all.h>
#include <dev/scsipi/scsiconf.h>
#include <dev/scsipi/atapiconf.h>

#ifdef USSCANNER_DEBUG
#define DPRINTF(x)        if (usscannerdebug) printf x
#define DPRINTFN(n,x)        if (usscannerdebug>(n)) printf x
int        usscannerdebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif


#define USSCANNER_CONFIG_NO                1
#define USSCANNER_IFACE_IDX                0

#define USSCANNER_SCSIID_HOST        0x00
#define USSCANNER_SCSIID_DEVICE        0x01

#define USSCANNER_MAX_TRANSFER_SIZE        MAXPHYS

#define USSCANNER_TIMEOUT 2000

struct usscanner_softc {
        device_t                sc_dev;
        struct usbd_device        *sc_udev;
        struct usbd_interface        *sc_iface;

        int                        sc_in_addr;
        struct usbd_pipe        *sc_in_pipe;

        int                        sc_intr_addr;
        struct usbd_pipe        *sc_intr_pipe;
        struct usbd_xfer        *sc_intr_xfer;
        u_char                        sc_status;

        int                        sc_out_addr;
        struct usbd_pipe        *sc_out_pipe;

        struct usbd_xfer        *sc_cmd_xfer;
        void                        *sc_cmd_buffer;
        struct usbd_xfer        *sc_datain_xfer;
        void                        *sc_datain_buffer;
        struct usbd_xfer        *sc_dataout_xfer;
        void                        *sc_dataout_buffer;

        int                        sc_state;
#define UAS_IDLE        0
#define UAS_CMD                1
#define UAS_DATA        2
#define UAS_SENSECMD        3
#define UAS_SENSEDATA        4
#define UAS_STATUS        5

        struct scsipi_xfer        *sc_xs;

        device_t                sc_child;        /* child device, for detach */

        struct scsipi_adapter        sc_adapter;
        struct scsipi_channel        sc_channel;

        int                        sc_refcnt;
        char                        sc_dying;
};


Static void usscanner_cleanup(struct usscanner_softc *);
Static void usscanner_scsipi_request(struct scsipi_channel *,
                                scsipi_adapter_req_t, void *);
Static void usscanner_scsipi_minphys(struct buf *);
Static void usscanner_done(struct usscanner_softc *);
Static void usscanner_sense(struct usscanner_softc *);
typedef void callback(struct usbd_xfer *, void *, usbd_status);
Static callback usscanner_intr_cb;
Static callback usscanner_cmd_cb;
Static callback usscanner_data_cb;
Static callback usscanner_sensecmd_cb;
Static callback usscanner_sensedata_cb;

static int usscanner_match(device_t, cfdata_t, void *);
static void usscanner_attach(device_t, device_t, void *);
static void usscanner_childdet(device_t, device_t);
static int usscanner_detach(device_t, int);
static int usscanner_activate(device_t, enum devact);

CFATTACH_DECL2_NEW(usscanner, sizeof(struct usscanner_softc),
    usscanner_match, usscanner_attach, usscanner_detach, usscanner_activate,
    NULL, usscanner_childdet);

static int
usscanner_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        DPRINTFN(50,("usscanner_match\n"));

        if (uaa->uaa_vendor == USB_VENDOR_HP &&
            uaa->uaa_product == USB_PRODUCT_HP_5300C)
                return UMATCH_VENDOR_PRODUCT;
        else
                return UMATCH_NONE;
}

static void
usscanner_attach(device_t parent, device_t self, void *aux)
{
        struct usscanner_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *        dev = uaa->uaa_device;
        struct usbd_interface *        iface;
        char                        *devinfop;
        usbd_status                err;
        usb_endpoint_descriptor_t *ed;
        uint8_t                        epcount;
        int                        i;
        int error;

        DPRINTFN(10,("usscanner_attach: sc=%p\n", sc));

        sc->sc_dev = self;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        err = usbd_set_config_no(dev, USSCANNER_CONFIG_NO, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration, err=%s\n",
                    usbd_errstr(err));
                return;
        }

        err = usbd_device2interface_handle(dev, USSCANNER_IFACE_IDX, &iface);
        if (err) {
                aprint_error_dev(self, "getting interface handle failed\n");
                return;
        }

        sc->sc_udev = dev;
        sc->sc_iface = iface;

        epcount = 0;
        (void)usbd_endpoint_count(iface, &epcount);

        sc->sc_in_addr = -1;
        sc->sc_intr_addr = -1;
        sc->sc_out_addr = -1;
        for (i = 0; i < epcount; i++) {
                ed = usbd_interface2endpoint_descriptor(iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self, "couldn't get ep %d\n", i);
                        return;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        sc->sc_in_addr = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        sc->sc_intr_addr = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        sc->sc_out_addr = ed->bEndpointAddress;
                }
        }
        if (sc->sc_in_addr == -1 || sc->sc_intr_addr == -1 ||
            sc->sc_out_addr == -1) {
                aprint_error_dev(self, "missing endpoint\n");
                return;
        }

        err = usbd_open_pipe(sc->sc_iface, sc->sc_in_addr,
                             USBD_EXCLUSIVE_USE, &sc->sc_in_pipe);
        if (err) {
                aprint_error_dev(self, "open in pipe failed, err=%d\n", err);
                return;
        }

        /* The interrupt endpoint must be opened as a normal pipe. */
        err = usbd_open_pipe(sc->sc_iface, sc->sc_intr_addr,
                             USBD_EXCLUSIVE_USE, &sc->sc_intr_pipe);

        if (err) {
                aprint_error_dev(self, "open intr pipe failed, err=%d\n", err);
                usscanner_cleanup(sc);
                return;
        }
        err = usbd_open_pipe(sc->sc_iface, sc->sc_out_addr,
                             USBD_EXCLUSIVE_USE, &sc->sc_out_pipe);
        if (err) {
                aprint_error_dev(self, "open out pipe failed, err=%d\n",  err);
                usscanner_cleanup(sc);
                return;
        }

        /* XXX too big */
        error = usbd_create_xfer(sc->sc_out_pipe, USSCANNER_MAX_TRANSFER_SIZE,
            0, 0, &sc->sc_cmd_xfer);
        if (error) {
                aprint_error_dev(self, "alloc cmd xfer failed, error=%d\n",
                    error);
                usscanner_cleanup(sc);
                return;
        }

        sc->sc_cmd_buffer = usbd_get_buffer(sc->sc_cmd_xfer);

        error = usbd_create_xfer(sc->sc_intr_pipe, 1, 0, 0, &sc->sc_intr_xfer);
        if (error) {
                aprint_error_dev(self, "alloc intr xfer failed, error=%d\n",
                    error);
                usscanner_cleanup(sc);
                return;
        }

        error = usbd_create_xfer(sc->sc_in_pipe, USSCANNER_MAX_TRANSFER_SIZE,
            0, 0, &sc->sc_datain_xfer);
        if (error) {
                aprint_error_dev(self, "alloc data xfer failed, error=%d\n",
                    error);
                usscanner_cleanup(sc);
                return;
        }
        sc->sc_datain_buffer = usbd_get_buffer(sc->sc_datain_xfer);

        error = usbd_create_xfer(sc->sc_out_pipe, USSCANNER_MAX_TRANSFER_SIZE,
            0, 0, &sc->sc_dataout_xfer);
        if (error) {
                aprint_error_dev(self, "alloc data xfer failed, err=%d\n", err);
                usscanner_cleanup(sc);
                return;
        }
        sc->sc_dataout_buffer = usbd_get_buffer(sc->sc_dataout_xfer);

        /*
         * Fill in the adapter.
         */
        sc->sc_adapter.adapt_request = usscanner_scsipi_request;
        sc->sc_adapter.adapt_dev = sc->sc_dev;
        sc->sc_adapter.adapt_nchannels = 1;
        sc->sc_adapter.adapt_openings = 1;
        sc->sc_adapter.adapt_max_periph = 1;
        sc->sc_adapter.adapt_minphys = usscanner_scsipi_minphys;

#if NSCSIBUS > 0
        /*
         * fill in the scsipi_channel.
         */
        sc->sc_channel.chan_adapter = &sc->sc_adapter;
        sc->sc_channel.chan_bustype = &scsi_bustype;
        sc->sc_channel.chan_channel = 0;
        sc->sc_channel.chan_ntargets = USSCANNER_SCSIID_DEVICE + 1;
        sc->sc_channel.chan_nluns = 1;
        sc->sc_channel.chan_id = USSCANNER_SCSIID_HOST;

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        sc->sc_child = config_found(sc->sc_dev, &sc->sc_channel, scsiprint,
            CFARGS_NONE);

        DPRINTFN(10, ("usscanner_attach: %p\n", sc->sc_udev));

        return;

#else
        /* No SCSI bus, just ignore it */
        usscanner_cleanup(sc);

        aprint_error_dev(self,
            "no scsibus configured, see usscanner(4) for details\n");

        return;

#endif
}

static void
usscanner_childdet(device_t self, device_t child)
{
        struct usscanner_softc *sc = device_private(self);

        KASSERT(sc->sc_child == NULL);
        sc->sc_child = NULL;
}

static int
usscanner_detach(device_t self, int flags)
{
        struct usscanner_softc *sc = device_private(self);
        int rv, s;

        DPRINTF(("usscanner_detach: sc=%p flags=%d\n", sc, flags));

        sc->sc_dying = 1;
        /* Abort all pipes.  Causes processes waiting for transfer to wake. */
        if (sc->sc_in_pipe != NULL)
                usbd_abort_pipe(sc->sc_in_pipe);
        if (sc->sc_intr_pipe != NULL)
                usbd_abort_pipe(sc->sc_intr_pipe);
        if (sc->sc_out_pipe != NULL)
                usbd_abort_pipe(sc->sc_out_pipe);

        s = splusb();
        if (--sc->sc_refcnt >= 0) {
                /* Wait for processes to go away. */
                usb_detach_waitold(sc->sc_dev);
        }
        splx(s);

        if (sc->sc_child != NULL)
                rv = config_detach(sc->sc_child, flags);
        else
                rv = 0;

        if (sc->sc_udev != NULL)
                usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev,
                    sc->sc_dev);

        return rv;
}

Static void
usscanner_cleanup(struct usscanner_softc *sc)
{
        if (sc->sc_cmd_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_cmd_xfer);
                sc->sc_cmd_xfer = NULL;
        }
        if (sc->sc_datain_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_datain_xfer);
                sc->sc_datain_xfer = NULL;
        }
        if (sc->sc_dataout_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_dataout_xfer);
                sc->sc_dataout_xfer = NULL;
        }
        if (sc->sc_in_pipe != NULL) {
                usbd_close_pipe(sc->sc_in_pipe);
                sc->sc_in_pipe = NULL;
        }
        if (sc->sc_intr_pipe != NULL) {
                usbd_close_pipe(sc->sc_intr_pipe);
                sc->sc_intr_pipe = NULL;
        }
        if (sc->sc_out_pipe != NULL) {
                usbd_close_pipe(sc->sc_out_pipe);
                sc->sc_out_pipe = NULL;
        }
}

static int
usscanner_activate(device_t self, enum devact act)
{
        struct usscanner_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

Static void
usscanner_scsipi_minphys(struct buf *bp)
{
        if (bp->b_bcount > USSCANNER_MAX_TRANSFER_SIZE)
                bp->b_bcount = USSCANNER_MAX_TRANSFER_SIZE;
        minphys(bp);
}

Static void
usscanner_sense(struct usscanner_softc *sc)
{
        struct scsipi_xfer *xs = sc->sc_xs;
        struct scsipi_periph *periph = xs->xs_periph;
        struct scsi_request_sense sense_cmd;
        usbd_status err;

        /* fetch sense data */
        memset(&sense_cmd, 0, sizeof(sense_cmd));
        sense_cmd.opcode = SCSI_REQUEST_SENSE;
        sense_cmd.byte2 = periph->periph_lun << SCSI_CMD_LUN_SHIFT;
        sense_cmd.length = sizeof(xs->sense);

        sc->sc_state = UAS_SENSECMD;
        memcpy(sc->sc_cmd_buffer, &sense_cmd, sizeof(sense_cmd));

        usbd_setup_xfer(sc->sc_cmd_xfer, sc, sc->sc_cmd_buffer,
            sizeof(sense_cmd), 0, USSCANNER_TIMEOUT,
            usscanner_sensecmd_cb);
        err = usbd_transfer(sc->sc_cmd_xfer);
        if (err == USBD_IN_PROGRESS)
                return;

        xs->error = XS_DRIVER_STUFFUP;
        usscanner_done(sc);
}

Static void
usscanner_intr_cb(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct usscanner_softc *sc = priv;
        int s;

        DPRINTFN(10, ("usscanner_data_cb status=%d\n", status));

#ifdef USSCANNER_DEBUG
        if (sc->sc_state != UAS_STATUS) {
                printf("%s: !UAS_STATUS\n", device_xname(sc->sc_dev));
        }
        if (sc->sc_status != 0) {
                printf("%s: status byte=0x%02x\n", device_xname(sc->sc_dev),
                    sc->sc_status);
        }
#endif
        /* XXX what should we do on non-0 status */

        sc->sc_state = UAS_IDLE;

        s = splbio();
        KERNEL_LOCK(1, curlwp);
        scsipi_done(sc->sc_xs);
        KERNEL_UNLOCK_ONE(curlwp);
        splx(s);
}

Static void
usscanner_data_cb(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct usscanner_softc *sc = priv;
        struct scsipi_xfer *xs = sc->sc_xs;
        uint32_t len;

        DPRINTFN(10, ("usscanner_data_cb status=%d\n", status));

#ifdef USSCANNER_DEBUG
        if (sc->sc_state != UAS_DATA) {
                printf("%s: !UAS_DATA\n", device_xname(sc->sc_dev));
        }
#endif

        usbd_get_xfer_status(xfer, NULL, NULL, &len, NULL);

        xs->resid = xs->datalen - len;

        switch (status) {
        case USBD_NORMAL_COMPLETION:
                xs->error = XS_NOERROR;
                break;
        case USBD_TIMEOUT:
                xs->error = XS_TIMEOUT;
                break;
        case USBD_CANCELLED:
                if (xs->error == XS_SENSE) {
                        usscanner_sense(sc);
                        return;
                }
                break;
        default:
                xs->error = XS_DRIVER_STUFFUP; /* XXX ? */
                break;
        }
        usscanner_done(sc);
}

Static void
usscanner_sensedata_cb(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct usscanner_softc *sc = priv;
        struct scsipi_xfer *xs = sc->sc_xs;
        uint32_t len;

        DPRINTFN(10, ("usscanner_sensedata_cb status=%d\n", status));

#ifdef USSCANNER_DEBUG
        if (sc->sc_state != UAS_SENSEDATA) {
                printf("%s: !UAS_SENSEDATA\n", device_xname(sc->sc_dev));
        }
#endif

        usbd_get_xfer_status(xfer, NULL, NULL, &len, NULL);

        switch (status) {
        case USBD_NORMAL_COMPLETION:
                memcpy(&xs->sense, sc->sc_datain_buffer, len);
                if (len < sizeof(xs->sense))
                        xs->error = XS_SHORTSENSE;
                break;
        case USBD_TIMEOUT:
                xs->error = XS_TIMEOUT;
                break;
        case USBD_CANCELLED:
                xs->error = XS_RESET;
                break;
        default:
                xs->error = XS_DRIVER_STUFFUP; /* XXX ? */
                break;
        }
        usscanner_done(sc);
}

Static void
usscanner_done(struct usscanner_softc *sc)
{
        struct scsipi_xfer *xs = sc->sc_xs;
        usbd_status err;

        DPRINTFN(10,("usscanner_done: error=%d\n", sc->sc_xs->error));

        sc->sc_state = UAS_STATUS;
        usbd_setup_xfer(sc->sc_intr_xfer, sc, &sc->sc_status, 1,
            USBD_SHORT_XFER_OK, USSCANNER_TIMEOUT, usscanner_intr_cb);
        err = usbd_transfer(sc->sc_intr_xfer);
        if (err == USBD_IN_PROGRESS)
                return;
        xs->error = XS_DRIVER_STUFFUP;
}

Static void
usscanner_sensecmd_cb(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct usscanner_softc *sc = priv;
        struct scsipi_xfer *xs = sc->sc_xs;
        usbd_status err;

        DPRINTFN(10, ("usscanner_sensecmd_cb status=%d\n", status));

#ifdef USSCANNER_DEBUG
        if (usscannerdebug > 15)
                xs->xs_periph->periph_flags |= 1; /* XXX 1 */

        if (sc->sc_state != UAS_SENSECMD) {
                aprint_error_dev(sc->sc_dev, "!UAS_SENSECMD\n");
                xs->error = XS_DRIVER_STUFFUP;
                goto done;
        }
#endif

        switch (status) {
        case USBD_NORMAL_COMPLETION:
                break;
        case USBD_TIMEOUT:
                xs->error = XS_TIMEOUT;
                goto done;
        default:
                xs->error = XS_DRIVER_STUFFUP; /* XXX ? */
                goto done;
        }

        sc->sc_state = UAS_SENSEDATA;
        usbd_setup_xfer(sc->sc_datain_xfer, sc, sc->sc_datain_buffer,
            sizeof(xs->sense), USBD_SHORT_XFER_OK,
            USSCANNER_TIMEOUT, usscanner_sensedata_cb);
        err = usbd_transfer(sc->sc_datain_xfer);
        if (err == USBD_IN_PROGRESS)
                return;
        xs->error = XS_DRIVER_STUFFUP;
 done:
        usscanner_done(sc);
}

Static void
usscanner_cmd_cb(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct usscanner_softc *sc = priv;
        struct scsipi_xfer *xs = sc->sc_xs;
        struct usbd_xfer *dxfer;
        usbd_status err;

        DPRINTFN(10, ("usscanner_cmd_cb status=%d\n", status));

#ifdef USSCANNER_DEBUG
        if (usscannerdebug > 15)
                xs->xs_periph->periph_flags |= 1;        /* XXX 1 */

        if (sc->sc_state != UAS_CMD) {
                aprint_error_dev(sc->sc_dev, "!UAS_CMD\n");
                xs->error = XS_DRIVER_STUFFUP;
                goto done;
        }
#endif

        switch (status) {
        case USBD_NORMAL_COMPLETION:
                break;
        case USBD_TIMEOUT:
                xs->error = XS_TIMEOUT;
                goto done;
        case USBD_CANCELLED:
                goto done;
        default:
                xs->error = XS_DRIVER_STUFFUP; /* XXX ? */
                goto done;
        }

        if (xs->datalen == 0) {
                DPRINTFN(4, ("usscanner_cmd_cb: no data phase\n"));
                xs->error = XS_NOERROR;
                goto done;
        }

        if (xs->xs_control & XS_CTL_DATA_IN) {
                DPRINTFN(4, ("usscanner_cmd_cb: data in len=%d\n",
                             xs->datalen));
                dxfer = sc->sc_datain_xfer;
        } else {
                DPRINTFN(4, ("usscanner_cmd_cb: data out len=%d\n",
                             xs->datalen));
                dxfer = sc->sc_dataout_xfer;
        }
        sc->sc_state = UAS_DATA;
        usbd_setup_xfer(dxfer, sc, xs->data, xs->datalen,
            USBD_SHORT_XFER_OK, xs->timeout, usscanner_data_cb);
        err = usbd_transfer(dxfer);
        if (err == USBD_IN_PROGRESS)
                return;
        xs->error = XS_DRIVER_STUFFUP;

 done:
        usscanner_done(sc);
}

Static void
usscanner_scsipi_request(struct scsipi_channel *chan, scsipi_adapter_req_t req,
    void *arg)
{
        struct scsipi_xfer *xs;
        struct usscanner_softc *sc =
            device_private(chan->chan_adapter->adapt_dev);
        usbd_status err;

        switch (req) {
        case ADAPTER_REQ_RUN_XFER:
                xs = arg;

                DPRINTFN(8, ("%s: usscanner_scsipi_request: %d:%d "
                    "xs=%p cmd=0x%02x datalen=%d (quirks=%#x, poll=%d)\n",
                    device_xname(sc->sc_dev),
                    xs->xs_periph->periph_target, xs->xs_periph->periph_lun,
                    xs, xs->cmd->opcode, xs->datalen,
                    xs->xs_periph->periph_quirks,
                    xs->xs_control & XS_CTL_POLL));

                if (sc->sc_dying) {
                        xs->error = XS_DRIVER_STUFFUP;
                        goto done;
                }

#ifdef USSCANNER_DEBUG
                if (xs->xs_periph->periph_target != USSCANNER_SCSIID_DEVICE) {
                        DPRINTF(("%s: wrong SCSI ID %d\n",
                            device_xname(sc->sc_dev),
                            xs->xs_periph->periph_target));
                        xs->error = XS_DRIVER_STUFFUP;
                        goto done;
                }
                if (sc->sc_state != UAS_IDLE) {
                        printf("%s: !UAS_IDLE\n", device_xname(sc->sc_dev));
                        xs->error = XS_DRIVER_STUFFUP;
                        goto done;
                }
#endif

                if (xs->datalen > USSCANNER_MAX_TRANSFER_SIZE) {
                        aprint_normal_dev(sc->sc_dev,
                            "usscanner_scsipi_request: large datalen, %d\n",
                            xs->datalen);
                        xs->error = XS_DRIVER_STUFFUP;
                        goto done;
                }

                DPRINTFN(4, ("%s: usscanner_scsipi_request: async cmdlen=%d"
                    " datalen=%d\n", device_xname(sc->sc_dev), xs->cmdlen,
                    xs->datalen));
                sc->sc_state = UAS_CMD;
                sc->sc_xs = xs;
                memcpy(sc->sc_cmd_buffer, xs->cmd, xs->cmdlen);
                usbd_setup_xfer(sc->sc_cmd_xfer, sc, sc->sc_cmd_buffer,
                    xs->cmdlen, 0, USSCANNER_TIMEOUT, usscanner_cmd_cb);
                err = usbd_transfer(sc->sc_cmd_xfer);
                if (err != USBD_IN_PROGRESS) {
                        xs->error = XS_DRIVER_STUFFUP;
                        goto done;
                }

                return;


 done:
                sc->sc_state = UAS_IDLE;
                KERNEL_LOCK(1, curlwp);
                scsipi_done(xs);
                KERNEL_UNLOCK_ONE(curlwp);
                return;

        case ADAPTER_REQ_GROW_RESOURCES:
                /* XXX Not supported. */
                return;
        case ADAPTER_REQ_SET_XFER_MODE:
                /* XXX Not supported. */
                return;
        }

}






























































    3 






    3 







































































    3 



    3 
    3 
    3 








    3 










    3 
    3 

    3 


    3 






    3 
    3 












    3 












    3 






    3 


    3 





    3 








    3 
    3 




    3 







    3 




























    3 





































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
/*        $NetBSD: firmload.c,v 1.23 2021/06/29 22:40:53 dholland Exp $        */

/*-
 * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: firmload.c,v 1.23 2021/06/29 22:40:53 dholland Exp $");

/*
 * The firmload API provides an interface for device drivers to access
 * firmware images that must be loaded onto their devices.
 */

#include <sys/param.h>
#include <sys/fcntl.h>
#include <sys/filedesc.h>
#include <sys/kmem.h>
#include <sys/namei.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <sys/kauth.h>
#include <sys/lwp.h>

#include <dev/firmload.h>

struct firmware_handle {
        struct vnode        *fh_vp;
        off_t                 fh_size;
};

static firmware_handle_t
firmware_handle_alloc(void)
{

        return (kmem_alloc(sizeof(struct firmware_handle), KM_SLEEP));
}

static void
firmware_handle_free(firmware_handle_t fh)
{

        kmem_free(fh, sizeof(*fh));
}

#if !defined(FIRMWARE_PATHS)
#define        FIRMWARE_PATHS                \
        "/libdata/firmware:/usr/libdata/firmware:/usr/pkg/libdata/firmware:/usr/pkg/libdata"
#endif

static char firmware_paths[PATH_MAX+1] = FIRMWARE_PATHS;

static int
sysctl_hw_firmware_path(SYSCTLFN_ARGS)
{
        int error, i;
        char newpath[PATH_MAX+1];
        struct sysctlnode node;
        char expected_char;

        node = *rnode;
        node.sysctl_data = &newpath[0];
        memcpy(node.sysctl_data, rnode->sysctl_data, PATH_MAX+1);
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return (error);
        
        /*
         * Make sure that all of the paths in the new path list are
         * absolute.
         *
         * When sysctl_lookup() deals with a string, it's guaranteed
         * to come back nul-terminated.
         */
        expected_char = '/';
        for (i = 0; i < PATH_MAX+1; i++) {
                if (expected_char != 0 && newpath[i] != expected_char)
                            return (EINVAL);
                if (newpath[i] == '\0')
                        break;
                else if (newpath[i] == ':')
                        expected_char = '/';
                else
                        expected_char = 0;
        }

        memcpy(rnode->sysctl_data, node.sysctl_data, PATH_MAX+1);

        return (0);
}

SYSCTL_SETUP(sysctl_hw_firmware_setup, "sysctl hw.firmware subtree setup")
{
        const struct sysctlnode *firmware_node;
        
        if (sysctl_createv(clog, 0, NULL, &firmware_node,
            CTLFLAG_PERMANENT,
            CTLTYPE_NODE, "firmware", NULL,
            NULL, 0, NULL, 0,
            CTL_HW, CTL_CREATE, CTL_EOL) != 0)
                    return;

        sysctl_createv(clog, 0, NULL, NULL,
            CTLFLAG_READWRITE,
            CTLTYPE_STRING, "path",
            SYSCTL_DESCR("Device firmware loading path list"),
            sysctl_hw_firmware_path, 0, firmware_paths, PATH_MAX+1,
            CTL_HW, firmware_node->sysctl_num, CTL_CREATE, CTL_EOL);
}

static char *
firmware_path_next(const char *drvname, const char *imgname, char *pnbuf,
    char **prefixp)
{
        char *prefix = *prefixp;
        size_t maxprefix, i;

        if (prefix == NULL                /* terminated early */
            || *prefix != '/') {        /* empty or not absolute */
                *prefixp = NULL;
                    return (NULL);
        }

        /*
         * Compute the max path prefix based on the length of the provided
         * names.
         */
        maxprefix = MAXPATHLEN -
                (1 /* / */
                 + strlen(drvname)
                 + 1 /* / */
                 + strlen(imgname)
                 + 1 /* terminating NUL */);

        /* Check for underflow (size_t is unsigned). */
        if (maxprefix > MAXPATHLEN) {
                *prefixp = NULL;
                return (NULL);
        }

        for (i = 0; i < maxprefix; i++) {
                if (*prefix == ':' || *prefix == '\0')
                        break;
                pnbuf[i] = *prefix++;
        }

        if (*prefix != ':' && *prefix != '\0') {
                /* Path prefix was too long. */
                *prefixp = NULL;
                return (NULL);
        }

        if (*prefix != '\0')
                prefix++;
        *prefixp = prefix;

        KASSERT(MAXPATHLEN >= i);
        snprintf(pnbuf + i, MAXPATHLEN - i, "/%s/%s", drvname, imgname);

        return (pnbuf);
}

static char *
firmware_path_first(const char *drvname, const char *imgname, char *pnbuf,
    char **prefixp)
{

        *prefixp = firmware_paths;
        return (firmware_path_next(drvname, imgname, pnbuf, prefixp));
}

/*
 * firmware_open:
 *
 *        Open a firmware image and return its handle.
 */
int
firmware_open(const char *drvname, const char *imgname, firmware_handle_t *fhp)
{
        struct pathbuf *pb;
        struct vattr va;
        char *pnbuf, *path, *prefix;
        firmware_handle_t fh;
        struct vnode *vp;
        int error;
        extern struct cwdinfo cwdi0;

        if (drvname == NULL || imgname == NULL)
                return (EINVAL);

        if (cwdi0.cwdi_cdir == NULL) {
                printf("firmware_open(%s/%s) called too early.\n",
                        drvname, imgname);
                return ENOENT;
        }

        pnbuf = PNBUF_GET();
        KASSERT(pnbuf != NULL);

        fh = firmware_handle_alloc();
        KASSERT(fh != NULL);

        error = 0;
        for (path = firmware_path_first(drvname, imgname, pnbuf, &prefix);
             path != NULL;
             path = firmware_path_next(drvname, imgname, pnbuf, &prefix)) {
                pb = pathbuf_create(path);
                if (pb == NULL) {
                        error = ENOMEM;
                        break;
                }
                error = vn_open(NULL, pb, NOCHROOT, FREAD, 0, &vp, NULL, NULL);
                pathbuf_destroy(pb);
                if (error == ENOENT) {
                        continue;
                }
                break;
        }

        PNBUF_PUT(pnbuf);
        if (error) {
                firmware_handle_free(fh);
                return (error);
        }

        error = VOP_GETATTR(vp, &va, kauth_cred_get());
        if (error) {
                VOP_UNLOCK(vp);
                (void)vn_close(vp, FREAD, kauth_cred_get());
                firmware_handle_free(fh);
                return (error);
        }

        if (va.va_type != VREG) {
                VOP_UNLOCK(vp);
                (void)vn_close(vp, FREAD, kauth_cred_get());
                firmware_handle_free(fh);
                return (EINVAL);
        }

        /* XXX Mark as busy text file. */

        fh->fh_vp = vp;
        fh->fh_size = va.va_size;

        VOP_UNLOCK(vp);

        *fhp = fh;
        return (0);
}

/*
 * firmware_close:
 *
 *        Close a firmware image.
 */
int
firmware_close(firmware_handle_t fh)
{
        int error;

        error = vn_close(fh->fh_vp, FREAD, kauth_cred_get());
        firmware_handle_free(fh);
        return (error);
}

/*
 * firmware_get_size:
 *
 *        Return the total size of a firmware image.
 */
off_t
firmware_get_size(firmware_handle_t fh)
{

        return (fh->fh_size);
}

/*
 * firmware_read:
 *
 *        Read data from a firmware image at the specified offset into
 *        the provided buffer.
 */
int
firmware_read(firmware_handle_t fh, off_t offset, void *buf, size_t len)
{

        return (vn_rdwr(UIO_READ, fh->fh_vp, buf, len, offset,
                        UIO_SYSSPACE, 0, kauth_cred_get(), NULL, curlwp));
}

/*
 * firmware_malloc:
 *
 *        Allocate a firmware buffer of the specified size.
 *
 *        NOTE: This routine may block.
 */
void *
firmware_malloc(size_t size)
{

        return (kmem_alloc(size, KM_SLEEP));
}

/*
 * firmware_free:
 *
 *        Free a previously allocated firmware buffer.
 */
/*ARGSUSED*/
void
firmware_free(void *v, size_t size)
{

        kmem_free(v, size);
}



































































































































































































































































































    3 



















   95 










































   86 
   86 

   86 


   86 













  875 








   13 

   13 


   13 


   11 









   99 



   98 



   20 










































   76 

   76 












   13 






    4 



   13 

    4 


   13 


















  122 










    6 


























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
/*        $NetBSD: sched_4bsd.c,v 1.45 2021/08/09 19:57:57 andvar Exp $        */

/*
 * Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008, 2019, 2020
 *     The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran, and
 * Daniel Sieger.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1990, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)kern_synch.c        8.9 (Berkeley) 5/19/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sched_4bsd.c,v 1.45 2021/08/09 19:57:57 andvar Exp $");

#include "opt_ddb.h"
#include "opt_lockdebug.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/cpu.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/sysctl.h>
#include <sys/lockdebug.h>
#include <sys/intr.h>
#include <sys/atomic.h>

static void updatepri(struct lwp *);
static void resetpriority(struct lwp *);

extern unsigned int sched_pstats_ticks; /* defined in kern_synch.c */

/* Number of hardclock ticks per sched_tick() */
u_int sched_rrticks __read_mostly;

/*
 * Force switch among equal priority processes every 100ms.
 * Called from hardclock every hz/10 == sched_rrticks hardclock ticks.
 */
/* ARGSUSED */
void
sched_tick(struct cpu_info *ci)
{
        struct schedstate_percpu *spc = &ci->ci_schedstate;
        pri_t pri = PRI_NONE;
        lwp_t *l;

        spc->spc_ticks = sched_rrticks;

        if (CURCPU_IDLE_P()) {
                spc_lock(ci);
                sched_resched_cpu(ci, MAXPRI_KTHREAD, true);
                /* spc now unlocked */
                return;
        }
        l = ci->ci_onproc;
        if (l == NULL) {
                return;
        }
        /*
         * Can only be spc_lwplock or a turnstile lock at this point
         * (if we interrupted priority inheritance trylock dance).
         */
        KASSERT(l->l_mutex != spc->spc_mutex);
        switch (l->l_class) {
        case SCHED_FIFO:
                /* No timeslicing for FIFO jobs. */
                break;
        case SCHED_RR:
                /* Force it into mi_switch() to look for other jobs to run. */
                pri = MAXPRI_KERNEL_RT;
                break;
        default:
                if (spc->spc_flags & SPCF_SHOULDYIELD) {
                        /*
                         * Process is stuck in kernel somewhere, probably
                         * due to buggy or inefficient code.  Force a
                         * kernel preemption.
                         */
                        pri = MAXPRI_KERNEL_RT;
                } else if (spc->spc_flags & SPCF_SEENRR) {
                        /*
                         * The process has already been through a roundrobin
                         * without switching and may be hogging the CPU.
                         * Indicate that the process should yield.
                         */
                        pri = MAXPRI_KTHREAD;
                        spc->spc_flags |= SPCF_SHOULDYIELD;
                } else if ((spc->spc_flags & SPCF_1STCLASS) == 0) {
                        /*
                         * For SMT or asymmetric systems push a little
                         * harder: if this is not a 1st class CPU, try to
                         * find a better one to run this LWP.
                         */
                        pri = MAXPRI_KTHREAD;
                        spc->spc_flags |= SPCF_SHOULDYIELD;
                } else {
                        spc->spc_flags |= SPCF_SEENRR;
                }
                break;
        }

        if (pri != PRI_NONE) {
                spc_lock(ci);
                sched_resched_cpu(ci, pri, true);
                /* spc now unlocked */
        }
}

/*
 * Why PRIO_MAX - 2? From setpriority(2):
 *
 *        prio is a value in the range -20 to 20.  The default priority is
 *        0; lower priorities cause more favorable scheduling.  A value of
 *        19 or 20 will schedule a process only when nothing at priority <=
 *        0 is runnable.
 *
 * This gives estcpu influence over 18 priority levels, and leaves nice
 * with 40 levels.  One way to think about it is that nice has 20 levels
 * either side of estcpu's 18.
 */
#define        ESTCPU_SHIFT        11
#define        ESTCPU_MAX        ((PRIO_MAX - 2) << ESTCPU_SHIFT)
#define        ESTCPU_ACCUM        (1 << (ESTCPU_SHIFT - 1))
#define        ESTCPULIM(e)        uimin((e), ESTCPU_MAX)

/*
 * The main parameter used by this algorithm is 'l_estcpu'. It is an estimate
 * of the recent CPU utilization of the thread.
 *
 * l_estcpu is:
 *  - increased each time the hardclock ticks and the thread is found to
 *    be executing, in sched_schedclock() called from hardclock()
 *  - decreased (filtered) on each sched tick, in sched_pstats_hook()
 * If the lwp is sleeping for more than a second, we don't touch l_estcpu: it
 * will be updated in sched_setrunnable() when the lwp wakes up, in burst mode
 * (ie, we decrease it n times).
 *
 * Note that hardclock updates l_estcpu and l_cpticks independently.
 *
 * -----------------------------------------------------------------------------
 *
 * Here we describe how l_estcpu is decreased.
 *
 * Constants for digital decay (filter):
 *     90% of l_estcpu usage in (5 * loadavg) seconds
 *
 * We wish to decay away 90% of l_estcpu in (5 * loadavg) seconds. That is, we
 * want to compute a value of decay such that the following loop:
 *     for (i = 0; i < (5 * loadavg); i++)
 *         l_estcpu *= decay;
 * will result in
 *     l_estcpu *= 0.1;
 * for all values of loadavg.
 *
 * Mathematically this loop can be expressed by saying:
 *     decay ** (5 * loadavg) ~= .1
 *
 * And finally, the corresponding value of decay we're using is:
 *     decay = (2 * loadavg) / (2 * loadavg + 1)
 *
 * -----------------------------------------------------------------------------
 *
 * Now, let's prove that the value of decay stated above will always fulfill
 * the equation:
 *     decay ** (5 * loadavg) ~= .1
 *
 * If we compute b as:
 *     b = 2 * loadavg
 * then
 *     decay = b / (b + 1)
 *
 * We now need to prove two things:
 *     1) Given [factor ** (5 * loadavg) =~ .1], prove [factor == b/(b+1)].
 *     2) Given [b/(b+1) ** power =~ .1], prove [power == (5 * loadavg)].
 *
 * Facts:
 *   * For x real: exp(x) = 0! + x**1/1! + x**2/2! + ...
 *     Therefore, for x close to zero, exp(x) =~ 1 + x.
 *     In turn, for b large enough, exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
 *
 *   * For b large enough, (b-1)/b =~ b/(b+1).
 *
 *   * For x belonging to [-1;1[, ln(1-x) = - x - x**2/2 - x**3/3 - ...
 *     Therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
 *
 *   * ln(0.1) =~ -2.30
 *
 * Proof of (1):
 *     factor ** (5 * loadavg) =~ 0.1
 *  => ln(factor) =~ -2.30 / (5 * loadavg)
 *  => factor =~ exp(-1 / ((5 / 2.30) * loadavg))
 *            =~ exp(-1 / (2 * loadavg))
 *            =~ exp(-1 / b)
 *            =~ (b - 1) / b
 *            =~ b / (b + 1)
 *            =~ (2 * loadavg) / ((2 * loadavg) + 1)
 *
 * Proof of (2):
 *     (b / (b + 1)) ** power =~ .1
 *  => power * ln(b / (b + 1)) =~ -2.30
 *  => power * (-1 / (b + 1)) =~ -2.30
 *  => power =~ 2.30 * (b + 1)
 *  => power =~ 4.60 * loadavg + 2.30
 *  => power =~ 5 * loadavg
 *
 * Conclusion: decay = (2 * loadavg) / (2 * loadavg + 1)
 */

/* See calculations above */
#define        loadfactor(loadavg)  (2 * (loadavg))

static fixpt_t
decay_cpu(fixpt_t loadfac, fixpt_t estcpu)
{

        if (estcpu == 0) {
                return 0;
        }

#if !defined(_LP64)
        /* avoid 64bit arithmetics. */
#define        FIXPT_MAX ((fixpt_t)((UINTMAX_C(1) << sizeof(fixpt_t) * CHAR_BIT) - 1))
        if (__predict_true(loadfac <= FIXPT_MAX / ESTCPU_MAX)) {
                return estcpu * loadfac / (loadfac + FSCALE);
        }
#endif

        return (uint64_t)estcpu * loadfac / (loadfac + FSCALE);
}

static fixpt_t
decay_cpu_batch(fixpt_t loadfac, fixpt_t estcpu, unsigned int n)
{

        /*
         * For all load averages >= 1 and max l_estcpu of (255 << ESTCPU_SHIFT),
         * if we slept for at least seven times the loadfactor, we will decay
         * l_estcpu to less than (1 << ESTCPU_SHIFT), and therefore we can
         * return zero directly.
         *
         * Note that our ESTCPU_MAX is actually much smaller than
         * (255 << ESTCPU_SHIFT).
         */
        if ((n << FSHIFT) >= 7 * loadfac) {
                return 0;
        }

        while (estcpu != 0 && n > 1) {
                estcpu = decay_cpu(loadfac, estcpu);
                n--;
        }

        return estcpu;
}

/*
 * sched_pstats_hook:
 *
 * Periodically called from sched_pstats(); used to recalculate priorities.
 */
void
sched_pstats_hook(struct lwp *l, int batch)
{
        fixpt_t loadfac;

        /*
         * If the LWP has slept an entire second, stop recalculating
         * its priority until it wakes up.
         */
        KASSERT(lwp_locked(l, NULL));
        if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
            l->l_stat == LSSUSPENDED) {
                if (l->l_slptime > 1) {
                        return;
                }
        }

        loadfac = loadfactor(averunnable.ldavg[0]);
        l->l_estcpu = decay_cpu(loadfac, l->l_estcpu);
        resetpriority(l);
}

/*
 * Recalculate the priority of an LWP after it has slept for a while.
 */
static void
updatepri(struct lwp *l)
{
        fixpt_t loadfac;

        KASSERT(lwp_locked(l, NULL));
        KASSERT(l->l_slptime > 1);

        loadfac = loadfactor(averunnable.ldavg[0]);

        l->l_slptime--; /* the first time was done in sched_pstats */
        l->l_estcpu = decay_cpu_batch(loadfac, l->l_estcpu, l->l_slptime);
        resetpriority(l);
}

void
sched_rqinit(void)
{

}

void
sched_setrunnable(struct lwp *l)
{

         if (l->l_slptime > 1)
                 updatepri(l);
}

void
sched_nice(struct proc *p, int n)
{
        struct lwp *l;

        KASSERT(mutex_owned(p->p_lock));

        p->p_nice = n;
        LIST_FOREACH(l, &p->p_lwps, l_sibling) {
                lwp_lock(l);
                resetpriority(l);
                lwp_unlock(l);
        }
}

/*
 * Recompute the priority of an LWP.  Arrange to reschedule if
 * the resulting priority is better than that of the current LWP.
 */
static void
resetpriority(struct lwp *l)
{
        pri_t pri;
        struct proc *p = l->l_proc;

        KASSERT(lwp_locked(l, NULL));

        if (l->l_class != SCHED_OTHER)
                return;

        /* See comments above ESTCPU_SHIFT definition. */
        pri = (PRI_KERNEL - 1) - (l->l_estcpu >> ESTCPU_SHIFT) - p->p_nice;
        pri = imax(pri, 0);
        if (pri != l->l_priority)
                lwp_changepri(l, pri);
}

/*
 * We adjust the priority of the current LWP.  The priority of a LWP
 * gets worse as it accumulates CPU time.  The CPU usage estimator (l_estcpu)
 * is increased here.  The formula for computing priorities will compute a
 * different value each time l_estcpu increases. This can cause a switch,
 * but unless the priority crosses a PPQ boundary the actual queue will not
 * change.  The CPU usage estimator ramps up quite quickly when the process
 * is running (linearly), and decays away exponentially, at a rate which is
 * proportionally slower when the system is busy.  The basic principle is
 * that the system will 90% forget that the process used a lot of CPU time
 * in (5 * loadavg) seconds.  This causes the system to favor processes which
 * haven't run much recently, and to round-robin among other processes.
 */
void
sched_schedclock(struct lwp *l)
{

        if (l->l_class != SCHED_OTHER)
                return;

        KASSERT(!CURCPU_IDLE_P());
        l->l_estcpu = ESTCPULIM(l->l_estcpu + ESTCPU_ACCUM);
        lwp_lock(l);
        resetpriority(l);
        lwp_unlock(l);
}

/*
 * sched_proc_fork:
 *
 *        Inherit the parent's scheduler history.
 */
void
sched_proc_fork(struct proc *parent, struct proc *child)
{
        lwp_t *pl;

        KASSERT(mutex_owned(parent->p_lock));

        pl = LIST_FIRST(&parent->p_lwps);
        child->p_estcpu_inherited = pl->l_estcpu;
        child->p_forktime = sched_pstats_ticks;
}

/*
 * sched_proc_exit:
 *
 *        Chargeback parents for the sins of their children.
 */
void
sched_proc_exit(struct proc *parent, struct proc *child)
{
        fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
        fixpt_t estcpu;
        lwp_t *pl, *cl;

        /* XXX Only if parent != init?? */

        mutex_enter(parent->p_lock);
        pl = LIST_FIRST(&parent->p_lwps);
        cl = LIST_FIRST(&child->p_lwps);
        estcpu = decay_cpu_batch(loadfac, child->p_estcpu_inherited,
            sched_pstats_ticks - child->p_forktime);
        if (cl->l_estcpu > estcpu) {
                lwp_lock(pl);
                pl->l_estcpu = ESTCPULIM(pl->l_estcpu + cl->l_estcpu - estcpu);
                lwp_unlock(pl);
        }
        mutex_exit(parent->p_lock);
}

void
sched_wakeup(struct lwp *l)
{

}

void
sched_slept(struct lwp *l)
{

}

void
sched_lwp_fork(struct lwp *l1, struct lwp *l2)
{

        l2->l_estcpu = l1->l_estcpu;
}

void
sched_lwp_collect(struct lwp *t)
{
        lwp_t *l;

        /* Absorb estcpu value of collected LWP. */
        l = curlwp;
        lwp_lock(l);
        l->l_estcpu += t->l_estcpu;
        lwp_unlock(l);
}

void
sched_oncpu(lwp_t *l)
{

}

void
sched_newts(lwp_t *l)
{

}

/*
 * Sysctl nodes and initialization.
 */

static int
sysctl_sched_rtts(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int rttsms = hztoms(sched_rrticks);

        node = *rnode;
        node.sysctl_data = &rttsms;
        return sysctl_lookup(SYSCTLFN_CALL(&node));
}

SYSCTL_SETUP(sysctl_sched_4bsd_setup, "sysctl sched setup")
{
        const struct sysctlnode *node = NULL;

        sysctl_createv(clog, 0, NULL, &node,
                CTLFLAG_PERMANENT,
                CTLTYPE_NODE, "sched",
                SYSCTL_DESCR("Scheduler options"),
                NULL, 0, NULL, 0,
                CTL_KERN, CTL_CREATE, CTL_EOL);

        if (node == NULL)
                return;

        sched_rrticks = hz / 10;

        sysctl_createv(NULL, 0, &node, NULL,
                CTLFLAG_PERMANENT,
                CTLTYPE_STRING, "name", NULL,
                NULL, 0, __UNCONST("4.4BSD"), 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(NULL, 0, &node, NULL,
                CTLFLAG_PERMANENT,
                CTLTYPE_INT, "rtts",
                SYSCTL_DESCR("Round-robin time quantum (in milliseconds)"),
                sysctl_sched_rtts, 0, NULL, 0,
                CTL_CREATE, CTL_EOL);
}





































































































































































































   11 



























   78 


































  111 
  111 






  111 
























  105 














    2 
    3 
    3 









    3 

















  111 
  111 






  111 

   78 






  111 





   84 

   52 

   83 














   42 

    4 

   42 




   41 
   41 

   41 









   41 

   41 

























































   78 
   78 


   78 




























































   78 


    3 
















    2 
    3 

    3 


   76 


   78 







  111 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
/*        $NetBSD: subr_xcall.c,v 1.34 2020/12/22 01:57:29 ad Exp $        */

/*-
 * Copyright (c) 2007-2010, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran and Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Cross call support
 *
 * Background
 *
 *        Sometimes it is necessary to modify hardware state that is tied
 *        directly to individual CPUs (such as a CPU's local timer), and
 *        these updates can not be done remotely by another CPU.  The LWP
 *        requesting the update may be unable to guarantee that it will be
 *        running on the CPU where the update must occur, when the update
 *        occurs.
 *
 *        Additionally, it's sometimes necessary to modify per-CPU software
 *        state from a remote CPU.  Where these update operations are so
 *        rare or the access to the per-CPU data so frequent that the cost
 *        of using locking or atomic operations to provide coherency is
 *        prohibitive, another way must be found.
 *
 *        Cross calls help to solve these types of problem by allowing
 *        any LWP in the system to request that an arbitrary function be
 *        executed on a specific CPU.
 *
 * Implementation
 *
 *        A slow mechanism for making low priority cross calls is
 *        provided.  The function to be executed runs on the remote CPU
 *        within a bound kthread.  No queueing is provided, and the
 *        implementation uses global state.  The function being called may
 *        block briefly on locks, but in doing so must be careful to not
 *        interfere with other cross calls in the system.  The function is
 *        called with thread context and not from a soft interrupt, so it
 *        can ensure that it is not interrupting other code running on the
 *        CPU, and so has exclusive access to the CPU.  Since this facility
 *        is heavyweight, it's expected that it will not be used often.
 *
 *        Cross calls must not allocate memory, as the pagedaemon uses cross
 *        calls (and memory allocation may need to wait on the pagedaemon).
 *
 *        A low-overhead mechanism for high priority calls (XC_HIGHPRI) is
 *        also provided.  The function to be executed runs in software
 *        interrupt context at IPL_SOFTSERIAL level, and is expected to
 *        be very lightweight, e.g. avoid blocking.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_xcall.c,v 1.34 2020/12/22 01:57:29 ad Exp $");

#include <sys/types.h>
#include <sys/param.h>
#include <sys/xcall.h>
#include <sys/mutex.h>
#include <sys/condvar.h>
#include <sys/evcnt.h>
#include <sys/kthread.h>
#include <sys/cpu.h>
#include <sys/atomic.h>

#ifdef _RUMPKERNEL
#include "rump_private.h"
#endif

/* Cross-call state box. */
typedef struct {
        kmutex_t        xc_lock;
        kcondvar_t        xc_busy;
        xcfunc_t        xc_func;
        void *                xc_arg1;
        void *                xc_arg2;
        uint64_t        xc_headp;
        uint64_t        xc_donep;
        unsigned int        xc_ipl;
} xc_state_t;

/* Bit indicating high (1) or low (0) priority. */
#define        XC_PRI_BIT        (1ULL << 63)

/* Low priority xcall structures. */
static xc_state_t        xc_low_pri        __cacheline_aligned;

/* High priority xcall structures. */
static xc_state_t        xc_high_pri        __cacheline_aligned;
static void *                xc_sihs[4]        __cacheline_aligned;

/* Event counters. */
static struct evcnt        xc_unicast_ev        __cacheline_aligned;
static struct evcnt        xc_broadcast_ev        __cacheline_aligned;

static void                xc_init(void);
static void                xc_thread(void *);

static inline uint64_t        xc_highpri(xcfunc_t, void *, void *, struct cpu_info *,
                            unsigned int);
static inline uint64_t        xc_lowpri(xcfunc_t, void *, void *, struct cpu_info *);

/* The internal form of IPL */
#define XC_IPL_MASK                0xff00
/*
 * Assign 0 to XC_IPL_SOFTSERIAL to treat IPL_SOFTSERIAL as the default value
 * (just XC_HIGHPRI).
 */
#define XC_IPL_SOFTSERIAL        0
#define XC_IPL_SOFTNET                1
#define XC_IPL_SOFTBIO                2
#define XC_IPL_SOFTCLOCK        3
#define XC_IPL_MAX                XC_IPL_SOFTCLOCK

CTASSERT(XC_IPL_MAX <= __arraycount(xc_sihs));

/*
 * xc_init:
 *
 *        Initialize low and high priority cross-call structures.
 */
static void
xc_init(void)
{
        xc_state_t *xclo = &xc_low_pri, *xchi = &xc_high_pri;

        memset(xclo, 0, sizeof(xc_state_t));
        mutex_init(&xclo->xc_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&xclo->xc_busy, "xclocv");

        memset(xchi, 0, sizeof(xc_state_t));
        mutex_init(&xchi->xc_lock, MUTEX_DEFAULT, IPL_SOFTSERIAL);
        cv_init(&xchi->xc_busy, "xchicv");

        /* Set up a softint for each IPL_SOFT*. */
#define SETUP_SOFTINT(xipl, sipl) do {                                        \
                xc_sihs[(xipl)] = softint_establish( (sipl) | SOFTINT_MPSAFE,\
                    xc__highpri_intr, NULL);                                \
                KASSERT(xc_sihs[(xipl)] != NULL);                        \
        } while (0)

        SETUP_SOFTINT(XC_IPL_SOFTSERIAL, SOFTINT_SERIAL);
        /*
         * If a IPL_SOFTXXX have the same value of the previous, we don't use
         * the IPL (see xc_encode_ipl).  So we don't need to allocate a softint
         * for it.
         */
#if IPL_SOFTNET != IPL_SOFTSERIAL
        SETUP_SOFTINT(XC_IPL_SOFTNET, SOFTINT_NET);
#endif
#if IPL_SOFTBIO != IPL_SOFTNET
        SETUP_SOFTINT(XC_IPL_SOFTBIO, SOFTINT_BIO);
#endif
#if IPL_SOFTCLOCK != IPL_SOFTBIO
        SETUP_SOFTINT(XC_IPL_SOFTCLOCK, SOFTINT_CLOCK);
#endif

#undef SETUP_SOFTINT

        evcnt_attach_dynamic(&xc_unicast_ev, EVCNT_TYPE_MISC, NULL,
           "crosscall", "unicast");
        evcnt_attach_dynamic(&xc_broadcast_ev, EVCNT_TYPE_MISC, NULL,
           "crosscall", "broadcast");
}

/*
 * Encode an IPL to a form that can be embedded into flags of xc_broadcast
 * or xc_unicast.
 */
unsigned int
xc_encode_ipl(int ipl)
{

        switch (ipl) {
        case IPL_SOFTSERIAL:
                return __SHIFTIN(XC_IPL_SOFTSERIAL, XC_IPL_MASK);
        /* IPL_SOFT* can be the same value (e.g., on sparc or mips). */
#if IPL_SOFTNET != IPL_SOFTSERIAL
        case IPL_SOFTNET:
                return __SHIFTIN(XC_IPL_SOFTNET, XC_IPL_MASK);
#endif
#if IPL_SOFTBIO != IPL_SOFTNET
        case IPL_SOFTBIO:
                return __SHIFTIN(XC_IPL_SOFTBIO, XC_IPL_MASK);
#endif
#if IPL_SOFTCLOCK != IPL_SOFTBIO
        case IPL_SOFTCLOCK:
                return __SHIFTIN(XC_IPL_SOFTCLOCK, XC_IPL_MASK);
#endif
        }

        panic("Invalid IPL: %d", ipl);
}

/*
 * Extract an XC_IPL from flags of xc_broadcast or xc_unicast.
 */
static inline unsigned int
xc_extract_ipl(unsigned int flags)
{

        return __SHIFTOUT(flags, XC_IPL_MASK);
}

/*
 * xc_init_cpu:
 *
 *        Initialize the cross-call subsystem.  Called once for each CPU
 *        in the system as they are attached.
 */
void
xc_init_cpu(struct cpu_info *ci)
{
        static bool again = false;
        int error __diagused;

        if (!again) {
                /* Autoconfiguration will prevent re-entry. */
                xc_init();
                again = true;
        }
        cv_init(&ci->ci_data.cpu_xcall, "xcall");
        error = kthread_create(PRI_XCALL, KTHREAD_MPSAFE, ci, xc_thread,
            NULL, NULL, "xcall/%u", ci->ci_index);
        KASSERT(error == 0);
}

/*
 * xc_broadcast:
 *
 *        Trigger a call on all CPUs in the system.
 */
uint64_t
xc_broadcast(unsigned int flags, xcfunc_t func, void *arg1, void *arg2)
{

        KASSERT(!cpu_intr_p() && !cpu_softintr_p());
        ASSERT_SLEEPABLE();

        if (__predict_false(!mp_online)) {
                (*func)(arg1, arg2);
                return 0;
        }

        if ((flags & XC_HIGHPRI) != 0) {
                int ipl = xc_extract_ipl(flags);
                return xc_highpri(func, arg1, arg2, NULL, ipl);
        } else {
                return xc_lowpri(func, arg1, arg2, NULL);
        }
}

static void
xc_nop(void *arg1, void *arg2)
{

        return;
}

/*
 * xc_barrier:
 *
 *        Broadcast a nop to all CPUs in the system.
 */
void
xc_barrier(unsigned int flags)
{
        uint64_t where;

        where = xc_broadcast(flags, xc_nop, NULL, NULL);
        xc_wait(where);
}

/*
 * xc_unicast:
 *
 *        Trigger a call on one CPU.
 */
uint64_t
xc_unicast(unsigned int flags, xcfunc_t func, void *arg1, void *arg2,
           struct cpu_info *ci)
{
        int s;

        KASSERT(ci != NULL);
        KASSERT(!cpu_intr_p() && !cpu_softintr_p());
        ASSERT_SLEEPABLE();

        if (__predict_false(!mp_online)) {
                KASSERT(ci == curcpu());
                s = splsoftserial();
                (*func)(arg1, arg2);
                splx(s);
                return 0;
        }

        if ((flags & XC_HIGHPRI) != 0) {
                int ipl = xc_extract_ipl(flags);
                return xc_highpri(func, arg1, arg2, ci, ipl);
        } else {
                return xc_lowpri(func, arg1, arg2, ci);
        }
}

/*
 * xc_wait:
 *
 *        Wait for a cross call to complete.
 */
void
xc_wait(uint64_t where)
{
        xc_state_t *xc;

        KASSERT(!cpu_intr_p() && !cpu_softintr_p());
        ASSERT_SLEEPABLE();

        if (__predict_false(!mp_online)) {
                return;
        }

        /* Determine whether it is high or low priority cross-call. */
        if ((where & XC_PRI_BIT) != 0) {
                xc = &xc_high_pri;
                where &= ~XC_PRI_BIT;
        } else {
                xc = &xc_low_pri;
        }

#ifdef __HAVE_ATOMIC64_LOADSTORE
        /* Fast path, if already done. */
        if (atomic_load_acquire(&xc->xc_donep) >= where) {
                return;
        }
#endif

        /* Slow path: block until awoken. */
        mutex_enter(&xc->xc_lock);
        while (xc->xc_donep < where) {
                cv_wait(&xc->xc_busy, &xc->xc_lock);
        }
        mutex_exit(&xc->xc_lock);
}

/*
 * xc_lowpri:
 *
 *        Trigger a low priority call on one or more CPUs.
 */
static inline uint64_t
xc_lowpri(xcfunc_t func, void *arg1, void *arg2, struct cpu_info *ci)
{
        xc_state_t *xc = &xc_low_pri;
        CPU_INFO_ITERATOR cii;
        uint64_t where;

        mutex_enter(&xc->xc_lock);
        while (xc->xc_headp != xc->xc_donep) {
                cv_wait(&xc->xc_busy, &xc->xc_lock);
        }
        xc->xc_arg1 = arg1;
        xc->xc_arg2 = arg2;
        xc->xc_func = func;
        if (ci == NULL) {
                xc_broadcast_ev.ev_count++;
                for (CPU_INFO_FOREACH(cii, ci)) {
                        if ((ci->ci_schedstate.spc_flags & SPCF_RUNNING) == 0)
                                continue;
                        xc->xc_headp += 1;
                        ci->ci_data.cpu_xcall_pending = true;
                        cv_signal(&ci->ci_data.cpu_xcall);
                }
        } else {
                xc_unicast_ev.ev_count++;
                xc->xc_headp += 1;
                ci->ci_data.cpu_xcall_pending = true;
                cv_signal(&ci->ci_data.cpu_xcall);
        }
        KASSERT(xc->xc_donep < xc->xc_headp);
        where = xc->xc_headp;
        mutex_exit(&xc->xc_lock);

        /* Return a low priority ticket. */
        KASSERT((where & XC_PRI_BIT) == 0);
        return where;
}

/*
 * xc_thread:
 *
 *        One thread per-CPU to dispatch low priority calls.
 */
static void
xc_thread(void *cookie)
{
        struct cpu_info *ci = curcpu();
        xc_state_t *xc = &xc_low_pri;
        void *arg1, *arg2;
        xcfunc_t func;

        mutex_enter(&xc->xc_lock);
        for (;;) {
                while (!ci->ci_data.cpu_xcall_pending) {
                        if (xc->xc_headp == xc->xc_donep) {
                                cv_broadcast(&xc->xc_busy);
                        }
                        cv_wait(&ci->ci_data.cpu_xcall, &xc->xc_lock);
                        KASSERT(ci == curcpu());
                }
                ci->ci_data.cpu_xcall_pending = false;
                func = xc->xc_func;
                arg1 = xc->xc_arg1;
                arg2 = xc->xc_arg2;
                mutex_exit(&xc->xc_lock);

                KASSERT(func != NULL);
                (*func)(arg1, arg2);

                mutex_enter(&xc->xc_lock);
#ifdef __HAVE_ATOMIC64_LOADSTORE
                atomic_store_release(&xc->xc_donep, xc->xc_donep + 1);
#else
                xc->xc_donep++;
#endif
        }
        /* NOTREACHED */
}

/*
 * xc_ipi_handler:
 *
 *        Handler of cross-call IPI.
 */
void
xc_ipi_handler(void)
{
        xc_state_t *xc = & xc_high_pri;

        KASSERT(xc->xc_ipl < __arraycount(xc_sihs));
        KASSERT(xc_sihs[xc->xc_ipl] != NULL);

        /* Executes xc__highpri_intr() via software interrupt. */
        softint_schedule(xc_sihs[xc->xc_ipl]);
}

/*
 * xc__highpri_intr:
 *
 *        A software interrupt handler for high priority calls.
 */
void
xc__highpri_intr(void *dummy)
{
        xc_state_t *xc = &xc_high_pri;
        void *arg1, *arg2;
        xcfunc_t func;

        KASSERTMSG(!cpu_intr_p(), "high priority xcall for function %p",
            xc->xc_func);
        /*
         * Lock-less fetch of function and its arguments.
         * Safe since it cannot change at this point.
         */
        func = xc->xc_func;
        arg1 = xc->xc_arg1;
        arg2 = xc->xc_arg2;

        KASSERT(func != NULL);
        (*func)(arg1, arg2);

        /*
         * Note the request as done, and if we have reached the head,
         * cross-call has been processed - notify waiters, if any.
         */
        mutex_enter(&xc->xc_lock);
        KASSERT(xc->xc_donep < xc->xc_headp);
#ifdef __HAVE_ATOMIC64_LOADSTORE
        atomic_store_release(&xc->xc_donep, xc->xc_donep + 1);
#else
        xc->xc_donep++;
#endif
        if (xc->xc_donep == xc->xc_headp) {
                cv_broadcast(&xc->xc_busy);
        }
        mutex_exit(&xc->xc_lock);
}

/*
 * xc_highpri:
 *
 *        Trigger a high priority call on one or more CPUs.
 */
static inline uint64_t
xc_highpri(xcfunc_t func, void *arg1, void *arg2, struct cpu_info *ci,
    unsigned int ipl)
{
        xc_state_t *xc = &xc_high_pri;
        uint64_t where;

        mutex_enter(&xc->xc_lock);
        while (xc->xc_headp != xc->xc_donep) {
                cv_wait(&xc->xc_busy, &xc->xc_lock);
        }
        xc->xc_func = func;
        xc->xc_arg1 = arg1;
        xc->xc_arg2 = arg2;
        xc->xc_headp += (ci ? 1 : ncpu);
        xc->xc_ipl = ipl;
        where = xc->xc_headp;
        mutex_exit(&xc->xc_lock);

        /*
         * Send the IPI once lock is released.
         * Note: it will handle the local CPU case.
         */

#ifdef _RUMPKERNEL
        rump_xc_highpri(ci);
#else
#ifdef MULTIPROCESSOR
        kpreempt_disable();
        if (curcpu() == ci) {
                /* Unicast: local CPU. */
                xc_ipi_handler();
        } else if (ci) {
                /* Unicast: remote CPU. */
                xc_send_ipi(ci);
        } else {
                /* Broadcast: all, including local. */
                xc_send_ipi(NULL);
                xc_ipi_handler();
        }
        kpreempt_enable();
#else
        KASSERT(ci == NULL || curcpu() == ci);
        xc_ipi_handler();
#endif
#endif

        /* Indicate a high priority ticket. */
        return (where | XC_PRI_BIT);
}
































































































































 2213 














































































































    4 

  515 

















    2 















   83 
















  147 











































    7 





















   10 










































    1 





















































  513 


































  416 







  204 






























   70 





















  416 




































  420 










   56 













   12 




















    2 





























    3 




















































   10 












   29 













































  315 








































  420 






































    9 







    9 










































    2 



















   57 







  403 














 1374 




 1387 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
/* $NetBSD: secmodel_suser.c,v 1.55 2020/09/08 14:12:57 christos Exp $ */
/*-
 * Copyright (c) 2006 Elad Efrat <elad@NetBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * This file contains kauth(9) listeners needed to implement the traditional
 * NetBSD superuser access restrictions.
 *
 * There are two main resources a request can be issued to: user-owned and
 * system owned. For the first, traditional Unix access checks are done, as
 * well as superuser checks. If needed, the request context is examined before
 * a decision is made. For the latter, usually only superuser checks are done
 * as normal users are not allowed to access system resources.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: secmodel_suser.c,v 1.55 2020/09/08 14:12:57 christos Exp $");

#include <sys/types.h>
#include <sys/param.h>
#include <sys/kauth.h>

#include <sys/mutex.h>
#include <sys/mount.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <sys/proc.h>
#include <sys/module.h>

#include <secmodel/secmodel.h>
#include <secmodel/suser/suser.h>

MODULE(MODULE_CLASS_SECMODEL, suser, NULL);

static kauth_listener_t l_generic, l_system, l_process, l_network, l_machdep,
    l_device, l_vnode;

static secmodel_t suser_sm;

SYSCTL_SETUP(sysctl_security_suser_setup, "secmodel_user sysctl")
{
        const struct sysctlnode *rnode;

        sysctl_createv(clog, 0, NULL, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "models", NULL,
                       NULL, 0, NULL, 0,
                       CTL_SECURITY, CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "suser", NULL,
                       NULL, 0, NULL, 0,
                       CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "name", NULL,
                       NULL, 0, __UNCONST(SECMODEL_SUSER_NAME), 0,
                       CTL_CREATE, CTL_EOL);
}

void
secmodel_suser_init(void)
{

}

void
secmodel_suser_start(void)
{
        l_generic = kauth_listen_scope(KAUTH_SCOPE_GENERIC,
            secmodel_suser_generic_cb, NULL);
        l_system = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
            secmodel_suser_system_cb, NULL);
        l_process = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
            secmodel_suser_process_cb, NULL);
        l_network = kauth_listen_scope(KAUTH_SCOPE_NETWORK,
            secmodel_suser_network_cb, NULL);
        l_machdep = kauth_listen_scope(KAUTH_SCOPE_MACHDEP,
            secmodel_suser_machdep_cb, NULL);
        l_device = kauth_listen_scope(KAUTH_SCOPE_DEVICE,
            secmodel_suser_device_cb, NULL);
        l_vnode = kauth_listen_scope(KAUTH_SCOPE_VNODE,
            secmodel_suser_vnode_cb, NULL);
}

void
secmodel_suser_stop(void)
{
        kauth_unlisten_scope(l_generic);
        kauth_unlisten_scope(l_system);
        kauth_unlisten_scope(l_process);
        kauth_unlisten_scope(l_network);
        kauth_unlisten_scope(l_machdep);
        kauth_unlisten_scope(l_device);
        kauth_unlisten_scope(l_vnode);
}

static bool
suser_isroot(kauth_cred_t cred)
{
        return kauth_cred_geteuid(cred) == 0;
}

static int
suser_eval(const char *what, void *arg, void *ret)
{
        int error = 0;

        if (strcasecmp(what, "is-root") == 0) {
                kauth_cred_t cred = arg;
                bool *bp = ret;

                *bp = suser_isroot(cred);
        } else {
                error = ENOENT;
        }

        return error;
}

static int
suser_modcmd(modcmd_t cmd, void *arg)
{
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = secmodel_register(&suser_sm,
                    SECMODEL_SUSER_ID, SECMODEL_SUSER_NAME,
                    NULL, suser_eval, NULL);
                if (error != 0)
                        printf("suser_modcmd::init: secmodel_register "
                            "returned %d\n", error);

                secmodel_suser_init();
                secmodel_suser_start();
                break;

        case MODULE_CMD_FINI:
                secmodel_suser_stop();

                error = secmodel_deregister(suser_sm);
                if (error != 0)
                        printf("suser_modcmd::fini: secmodel_deregister "
                            "returned %d\n", error);

                break;

        case MODULE_CMD_AUTOUNLOAD:
                error = EPERM;
                break;

        default:
                error = ENOTTY;
                break;
        }

        return (error);
}

/*
 * kauth(9) listener
 *
 * Security model: Traditional NetBSD
 * Scope: Generic
 * Responsibility: Superuser access
 */
int
secmodel_suser_generic_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        bool isroot;
        int result;

        isroot = suser_isroot(cred);
        result = KAUTH_RESULT_DEFER;

        switch (action) {
        case KAUTH_GENERIC_ISSUSER:
                if (isroot)
                        result = KAUTH_RESULT_ALLOW;
                break;

        default:
                break;
        }

        return (result);
}

/*
 * kauth(9) listener
 *
 * Security model: Traditional NetBSD
 * Scope: System
 * Responsibility: Superuser access
 */
int
secmodel_suser_system_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        bool isroot;
        int result;
        enum kauth_system_req req;

        isroot = suser_isroot(cred);
        result = KAUTH_RESULT_DEFER;
        req = (enum kauth_system_req)(uintptr_t)arg0;

        switch (action) {
        case KAUTH_SYSTEM_CPU:
                switch (req) {
                case KAUTH_REQ_SYSTEM_CPU_SETSTATE:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;

                        break;

                default:
                        break;
                }

                break;

        case KAUTH_SYSTEM_DEVMAPPER:
                if (isroot)
                        result = KAUTH_RESULT_ALLOW;

                break;

        case KAUTH_SYSTEM_FS_QUOTA:
                switch (req) {
                case KAUTH_REQ_SYSTEM_FS_QUOTA_GET:
                case KAUTH_REQ_SYSTEM_FS_QUOTA_ONOFF:
                case KAUTH_REQ_SYSTEM_FS_QUOTA_MANAGE:
                case KAUTH_REQ_SYSTEM_FS_QUOTA_NOLIMIT:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;
                        break;

                default:
                        break;
                }

                break;

        case KAUTH_SYSTEM_SYSVIPC:
                switch (req) {
                case KAUTH_REQ_SYSTEM_SYSVIPC_BYPASS:
                case KAUTH_REQ_SYSTEM_SYSVIPC_SHM_LOCK:
                case KAUTH_REQ_SYSTEM_SYSVIPC_SHM_UNLOCK:
                case KAUTH_REQ_SYSTEM_SYSVIPC_MSGQ_OVERSIZE:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;

                        break;

                default:
                        break;
                }

                break;

        case KAUTH_SYSTEM_MOUNT:
                switch (req) {
                case KAUTH_REQ_SYSTEM_MOUNT_DEVICE:
                case KAUTH_REQ_SYSTEM_MOUNT_GET:
                case KAUTH_REQ_SYSTEM_MOUNT_NEW:
                case KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT:
                case KAUTH_REQ_SYSTEM_MOUNT_UPDATE:
                case KAUTH_REQ_SYSTEM_MOUNT_UMAP:
                        if (isroot) {
                                result = KAUTH_RESULT_ALLOW;
                                break;
                        }

                        break;

                default:
                        break;
                }

                break;

        case KAUTH_SYSTEM_MQUEUE:
                if (isroot)
                        result = KAUTH_RESULT_ALLOW;

                break;

        case KAUTH_SYSTEM_PSET:
                switch (req) {
                case KAUTH_REQ_SYSTEM_PSET_ASSIGN:
                case KAUTH_REQ_SYSTEM_PSET_BIND:
                case KAUTH_REQ_SYSTEM_PSET_CREATE:
                case KAUTH_REQ_SYSTEM_PSET_DESTROY:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;

                        break;

                default:
                        break;
                }

                break;

        case KAUTH_SYSTEM_TIME:
                switch (req) {
                case KAUTH_REQ_SYSTEM_TIME_ADJTIME:
                case KAUTH_REQ_SYSTEM_TIME_NTPADJTIME:
                case KAUTH_REQ_SYSTEM_TIME_TIMECOUNTERS:
                case KAUTH_REQ_SYSTEM_TIME_SYSTEM:
                case KAUTH_REQ_SYSTEM_TIME_RTCOFFSET:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;
                        break;

                default:
                        break;
                }
                break;

        case KAUTH_SYSTEM_SEMAPHORE:
                if (isroot)
                        result = KAUTH_RESULT_ALLOW;

                break;

        case KAUTH_SYSTEM_SYSCTL:
                switch (req) {
                case KAUTH_REQ_SYSTEM_SYSCTL_ADD:
                case KAUTH_REQ_SYSTEM_SYSCTL_DELETE:
                case KAUTH_REQ_SYSTEM_SYSCTL_DESC:
                case KAUTH_REQ_SYSTEM_SYSCTL_MODIFY:
                case KAUTH_REQ_SYSTEM_SYSCTL_PRVT:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;
                        break;

                default:
                        break;
                }

                break;

        case KAUTH_SYSTEM_SWAPCTL:
        case KAUTH_SYSTEM_ACCOUNTING:
        case KAUTH_SYSTEM_REBOOT:
        case KAUTH_SYSTEM_CHROOT:
        case KAUTH_SYSTEM_FILEHANDLE:
        case KAUTH_SYSTEM_MKNOD:
        case KAUTH_SYSTEM_SETIDCORE:
        case KAUTH_SYSTEM_MODULE:
        case KAUTH_SYSTEM_FS_RESERVEDSPACE:
        case KAUTH_SYSTEM_MAP_VA_ZERO:
        case KAUTH_SYSTEM_FS_EXTATTR:
        case KAUTH_SYSTEM_FS_SNAPSHOT:
                if (isroot)
                        result = KAUTH_RESULT_ALLOW;
                break;

        case KAUTH_SYSTEM_DEBUG:
                break;

        case KAUTH_SYSTEM_CHSYSFLAGS:
                /* Deprecated. */
                if (isroot)
                        result = KAUTH_RESULT_ALLOW;

                break;

        case KAUTH_SYSTEM_VERIEXEC:
                switch (req) {
                case KAUTH_REQ_SYSTEM_VERIEXEC_ACCESS:
                case KAUTH_REQ_SYSTEM_VERIEXEC_MODIFY:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;

                        break;

                default:
                        break;
                }

                break;

        case KAUTH_SYSTEM_LFS:
                switch (req) {
                case KAUTH_REQ_SYSTEM_LFS_MARKV:
                case KAUTH_REQ_SYSTEM_LFS_BMAPV:
                case KAUTH_REQ_SYSTEM_LFS_SEGCLEAN:
                case KAUTH_REQ_SYSTEM_LFS_SEGWAIT:
                case KAUTH_REQ_SYSTEM_LFS_FCNTL:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;

                default:
                        break;
                }

                break;

        case KAUTH_SYSTEM_INTR:
                switch (req) {
                case KAUTH_REQ_SYSTEM_INTR_AFFINITY:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;

                        break;

                default:
                        break;
                }

                break;

        case KAUTH_SYSTEM_KERNADDR:
                if (isroot)
                        result = KAUTH_RESULT_ALLOW;

                break;

        default:
                break;
        }

        return (result);
}

/*
 * kauth(9) listener
 *
 * Security model: Traditional NetBSD
 * Scope: Process
 * Responsibility: Superuser access
 */
int
secmodel_suser_process_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        bool isroot;
        int result;

        isroot = suser_isroot(cred);
        result = KAUTH_RESULT_DEFER;

        switch (action) {
        case KAUTH_PROCESS_SIGNAL:
        case KAUTH_PROCESS_KTRACE:
        case KAUTH_PROCESS_PROCFS:
        case KAUTH_PROCESS_PTRACE:
        case KAUTH_PROCESS_SCHEDULER_GETPARAM:
        case KAUTH_PROCESS_SCHEDULER_SETPARAM:
        case KAUTH_PROCESS_SCHEDULER_GETAFFINITY:
        case KAUTH_PROCESS_SCHEDULER_SETAFFINITY:
        case KAUTH_PROCESS_SETID:
        case KAUTH_PROCESS_KEVENT_FILTER:
        case KAUTH_PROCESS_NICE:
        case KAUTH_PROCESS_FORK:
        case KAUTH_PROCESS_CORENAME:
        case KAUTH_PROCESS_STOPFLAG:
                if (isroot)
                        result = KAUTH_RESULT_ALLOW;

                break;

        case KAUTH_PROCESS_CANSEE: {
                unsigned long req;

                req = (unsigned long)arg1;

                switch (req) {
                case KAUTH_REQ_PROCESS_CANSEE_ARGS:
                case KAUTH_REQ_PROCESS_CANSEE_ENTRY:
                case KAUTH_REQ_PROCESS_CANSEE_OPENFILES:
                case KAUTH_REQ_PROCESS_CANSEE_EPROC:
                case KAUTH_REQ_PROCESS_CANSEE_KPTR:
                        if (isroot) {
                                result = KAUTH_RESULT_ALLOW;
                                break;
                        }

                        break;

                case KAUTH_REQ_PROCESS_CANSEE_ENV:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;

                        break;

                default:
                        break;
                }

                break;
                }

        case KAUTH_PROCESS_RLIMIT: {
                enum kauth_process_req req;

                req = (enum kauth_process_req)(uintptr_t)arg1;

                switch (req) {
                case KAUTH_REQ_PROCESS_RLIMIT_SET:
                case KAUTH_REQ_PROCESS_RLIMIT_GET:
                case KAUTH_REQ_PROCESS_RLIMIT_BYPASS:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;

                        break;

                default:
                        break;
                }

                break;
                }

        default:
                break;
        }

        return (result);
}

/*
 * kauth(9) listener
 *
 * Security model: Traditional NetBSD
 * Scope: Network
 * Responsibility: Superuser access
 */
int
secmodel_suser_network_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        bool isroot;
        int result;
        enum kauth_network_req req;

        isroot = suser_isroot(cred);
        result = KAUTH_RESULT_DEFER;
        req = (enum kauth_network_req)(uintptr_t)arg0;

        switch (action) {
        case KAUTH_NETWORK_ALTQ:
                switch (req) {
                case KAUTH_REQ_NETWORK_ALTQ_AFMAP:
                case KAUTH_REQ_NETWORK_ALTQ_BLUE:
                case KAUTH_REQ_NETWORK_ALTQ_CBQ:
                case KAUTH_REQ_NETWORK_ALTQ_CDNR:
                case KAUTH_REQ_NETWORK_ALTQ_CONF:
                case KAUTH_REQ_NETWORK_ALTQ_FIFOQ:
                case KAUTH_REQ_NETWORK_ALTQ_HFSC:
                case KAUTH_REQ_NETWORK_ALTQ_JOBS:
                case KAUTH_REQ_NETWORK_ALTQ_PRIQ:
                case KAUTH_REQ_NETWORK_ALTQ_RED:
                case KAUTH_REQ_NETWORK_ALTQ_RIO:
                case KAUTH_REQ_NETWORK_ALTQ_WFQ:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;
                        break;

                default:
                        break;
                }

                break;

        case KAUTH_NETWORK_BIND:
                switch (req) {
                case KAUTH_REQ_NETWORK_BIND_PORT:
                case KAUTH_REQ_NETWORK_BIND_PRIVPORT:
                case KAUTH_REQ_NETWORK_BIND_ANYADDR:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;
                        break;

                default:
                        break;
                }
                break;

        case KAUTH_NETWORK_FIREWALL:
                switch (req) {
                case KAUTH_REQ_NETWORK_FIREWALL_FW:
                case KAUTH_REQ_NETWORK_FIREWALL_NAT:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;

                        break;

                default:
                        break;
                }
                break;

        case KAUTH_NETWORK_FORWSRCRT:
        case KAUTH_NETWORK_ROUTE:
                if (isroot)
                        result = KAUTH_RESULT_ALLOW;

                break;

        case KAUTH_NETWORK_INTERFACE:
                switch (req) {
                case KAUTH_REQ_NETWORK_INTERFACE_GET:
                case KAUTH_REQ_NETWORK_INTERFACE_SET:
                case KAUTH_REQ_NETWORK_INTERFACE_GETPRIV:
                case KAUTH_REQ_NETWORK_INTERFACE_SETPRIV:
                case KAUTH_REQ_NETWORK_INTERFACE_FIRMWARE:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;
                        break;

                default:
                        break;
                }
                break;

        case KAUTH_NETWORK_INTERFACE_BRIDGE:
                switch (req) {
                case KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_GETPRIV:
                case KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_SETPRIV:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;
                        break;

                default:
                        break;
                }

                break;

        case KAUTH_NETWORK_INTERFACE_PPP:
                switch (req) {
                case KAUTH_REQ_NETWORK_INTERFACE_PPP_ADD:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;
                        break;

                default:
                        break;
                }

                break;

        case KAUTH_NETWORK_INTERFACE_PVC:
                switch (req) {
                case KAUTH_REQ_NETWORK_INTERFACE_PVC_ADD:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;

                        break;

                default:
                        break;
                }

                break;

        case KAUTH_NETWORK_INTERFACE_SLIP:
                switch (req) {
                case KAUTH_REQ_NETWORK_INTERFACE_SLIP_ADD:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;
                        break;

                default:
                        break;
                }

                break;

        case KAUTH_NETWORK_INTERFACE_STRIP:
                switch (req) {
                case KAUTH_REQ_NETWORK_INTERFACE_STRIP_ADD:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;
                        break;

                default:
                        break;
                }

                break;

        case KAUTH_NETWORK_INTERFACE_TUN:
                switch (req) {
                case KAUTH_REQ_NETWORK_INTERFACE_TUN_ADD:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;
                        break;

                default:
                        break;
                }

                break;

        case KAUTH_NETWORK_IPV6:
                switch (req) {
                case KAUTH_REQ_NETWORK_IPV6_HOPBYHOP:
                case KAUTH_REQ_NETWORK_IPV6_JOIN_MULTICAST:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;

                        break;

                default:
                        break;
                }

                break;

        case KAUTH_NETWORK_NFS:
                switch (req) {
                case KAUTH_REQ_NETWORK_NFS_EXPORT:
                case KAUTH_REQ_NETWORK_NFS_SVC:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;

                        break;

                default:
                        break;
                }
                break;

        case KAUTH_NETWORK_SMB:
                switch (req) {
                case KAUTH_REQ_NETWORK_SMB_SHARE_ACCESS:
                case KAUTH_REQ_NETWORK_SMB_SHARE_CREATE:
                case KAUTH_REQ_NETWORK_SMB_VC_ACCESS:
                case KAUTH_REQ_NETWORK_SMB_VC_CREATE:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;

                        break;

                default:
                        break;
                }

                break;

        case KAUTH_NETWORK_SOCKET:
                switch (req) {
                case KAUTH_REQ_NETWORK_SOCKET_DROP:
                case KAUTH_REQ_NETWORK_SOCKET_OPEN:
                case KAUTH_REQ_NETWORK_SOCKET_RAWSOCK:
                case KAUTH_REQ_NETWORK_SOCKET_SETPRIV:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;
                        break;

                case KAUTH_REQ_NETWORK_SOCKET_CANSEE:
                        if (isroot) {
                                result = KAUTH_RESULT_ALLOW;
                                break;
                        }

                        break;

                default:
                        break;
                }

                break;

        case KAUTH_NETWORK_IPSEC:
                switch (req) {
                case KAUTH_REQ_NETWORK_IPSEC_BYPASS:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;

                        break;

                default:
                        break;
                }

                break;

        default:
                break;
        }

        return (result);
}

/*
 * kauth(9) listener
 *
 * Security model: Traditional NetBSD
 * Scope: Machdep
 * Responsibility: Superuser access
 */
int
secmodel_suser_machdep_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        bool isroot;
        int result;

        isroot = suser_isroot(cred);
        result = KAUTH_RESULT_DEFER;

        switch (action) {
        case KAUTH_MACHDEP_CPU_UCODE_APPLY:
        case KAUTH_MACHDEP_IOPERM_GET:
        case KAUTH_MACHDEP_LDT_GET:
        case KAUTH_MACHDEP_LDT_SET:
        case KAUTH_MACHDEP_MTRR_GET:
        case KAUTH_MACHDEP_CACHEFLUSH:
        case KAUTH_MACHDEP_IOPERM_SET:
        case KAUTH_MACHDEP_IOPL:
        case KAUTH_MACHDEP_MTRR_SET:
        case KAUTH_MACHDEP_NVRAM:
        case KAUTH_MACHDEP_UNMANAGEDMEM:
        case KAUTH_MACHDEP_PXG:
                if (isroot)
                        result = KAUTH_RESULT_ALLOW;
                break;

        case KAUTH_MACHDEP_SVS_DISABLE:
                /* Deprecated. */
                if (isroot)
                        result = KAUTH_RESULT_ALLOW;
                break;

        default:
                break;
        }

        return (result);
}

/*
 * kauth(9) listener
 *
 * Security model: Traditional NetBSD
 * Scope: Device
 * Responsibility: Superuser access
 */
int
secmodel_suser_device_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        bool isroot;
        int result;

        isroot = suser_isroot(cred);
        result = KAUTH_RESULT_DEFER;

        switch (action) {
        case KAUTH_DEVICE_BLUETOOTH_SETPRIV:
        case KAUTH_DEVICE_BLUETOOTH_SEND:
        case KAUTH_DEVICE_BLUETOOTH_RECV:
        case KAUTH_DEVICE_TTY_OPEN:
        case KAUTH_DEVICE_TTY_PRIVSET:
        case KAUTH_DEVICE_TTY_STI:
        case KAUTH_DEVICE_TTY_VIRTUAL:
        case KAUTH_DEVICE_RND_ADDDATA:
        case KAUTH_DEVICE_RND_ADDDATA_ESTIMATE:
        case KAUTH_DEVICE_RND_GETPRIV:
        case KAUTH_DEVICE_RND_SETPRIV:
        case KAUTH_DEVICE_WSCONS_KEYBOARD_BELL:
        case KAUTH_DEVICE_WSCONS_KEYBOARD_KEYREPEAT:
        case KAUTH_DEVICE_NVMM_CTL:
                if (isroot)
                        result = KAUTH_RESULT_ALLOW;
                break;

        case KAUTH_DEVICE_BLUETOOTH_BCSP:
        case KAUTH_DEVICE_BLUETOOTH_BTUART: {
                enum kauth_device_req req;

                req = (enum kauth_device_req)(uintptr_t)arg0;
                switch (req) {
                case KAUTH_REQ_DEVICE_BLUETOOTH_BCSP_ADD:
                case KAUTH_REQ_DEVICE_BLUETOOTH_BTUART_ADD:
                        if (isroot)
                                result = KAUTH_RESULT_ALLOW;
                        break;

                default:
                        break;
                }

                break;
                }

        case KAUTH_DEVICE_GPIO_PINSET:
                /*
                 * root can access gpio pins, secmodel_securlevel can veto
                 * this decision.
                 */
                if (isroot)
                        result = KAUTH_RESULT_ALLOW;
                break;

        default:
                break;
        }

        return (result);
}

int
secmodel_suser_vnode_cb(kauth_cred_t cred, kauth_action_t action,
    void *cookie, void *arg0, void *arg1, void *arg2, void *arg3)
{
        bool isroot;
        int result;

        isroot = suser_isroot(cred);
        result = KAUTH_RESULT_DEFER;

        if (isroot) {
                /* Superuser can execute only if the file's executable. */
                if ((action & KAUTH_VNODE_EXECUTE) == 0 ||
                    (action & KAUTH_VNODE_IS_EXEC))
                        result = KAUTH_RESULT_ALLOW;
        }

        return (result);
}





















































 1414 



























































































































































































 1533 



























































































 1912 
















































































































































    8 


























































 1674 















 1908 



















  585 





    9 


























































 2034 

























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
/*        $NetBSD: cpufunc.h,v 1.42 2020/10/24 07:14:29 mgorny Exp $        */

/*
 * Copyright (c) 1998, 2007, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _X86_CPUFUNC_H_
#define        _X86_CPUFUNC_H_

/*
 * Functions to provide access to x86-specific instructions.
 */

#include <sys/cdefs.h>
#include <sys/types.h>

#include <machine/segments.h>
#include <machine/specialreg.h>

#ifdef _KERNEL
#if defined(_KERNEL_OPT)
#include "opt_xen.h"
#endif

static inline void
x86_pause(void)
{
        __asm volatile ("pause");
}

void        x86_lfence(void);
void        x86_sfence(void);
void        x86_mfence(void);
void        x86_flush(void);
void        x86_hlt(void);
void        x86_stihlt(void);
void        tlbflush(void);
void        tlbflushg(void);
void        invlpg(vaddr_t);
void        wbinvd(void);
void        breakpoint(void);

#define INVPCID_ADDRESS                0
#define INVPCID_CONTEXT                1
#define INVPCID_ALL                2
#define INVPCID_ALL_NONGLOBAL        3

static inline void
invpcid(register_t op, uint64_t pcid, vaddr_t va)
{
        struct {
                uint64_t pcid;
                uint64_t addr;
        } desc = {
                .pcid = pcid,
                .addr = va
        };

        __asm volatile (
                "invpcid %[desc],%[op]"
                :
                : [desc] "m" (desc), [op] "r" (op)
                : "memory"
        );
}

extern uint64_t (*rdtsc)(void);

#define _SERIALIZE_lfence        __asm volatile ("lfence")
#define _SERIALIZE_mfence        __asm volatile ("mfence")
#define _SERIALIZE_cpuid        __asm volatile ("xor %%eax, %%eax;cpuid" ::: \
            "eax", "ebx", "ecx", "edx");

#define RDTSCFUNC(fence)                        \
static inline uint64_t                                \
rdtsc_##fence(void)                                \
{                                                \
        uint32_t low, high;                        \
                                                \
        _SERIALIZE_##fence;                        \
        __asm volatile (                        \
                "rdtsc"                                \
                : "=a" (low), "=d" (high)        \
                :                                \
        );                                        \
                                                \
        return (low | ((uint64_t)high << 32));        \
}

RDTSCFUNC(lfence)
RDTSCFUNC(mfence)
RDTSCFUNC(cpuid)

#undef _SERIALIZE_LFENCE
#undef _SERIALIZE_MFENCE
#undef _SERIALIZE_CPUID


#ifndef XENPV
struct x86_hotpatch_source {
        uint8_t *saddr;
        uint8_t *eaddr;
};

struct x86_hotpatch_descriptor {
        uint8_t name;
        uint8_t nsrc;
        const struct x86_hotpatch_source *srcs[];
};

void        x86_hotpatch(uint8_t, uint8_t);
void        x86_patch(bool);
#endif

void        x86_monitor(const void *, uint32_t, uint32_t);
void        x86_mwait(uint32_t, uint32_t);

static inline void
x86_cpuid2(uint32_t eax, uint32_t ecx, uint32_t *regs)
{
        uint32_t ebx, edx;

        __asm volatile (
                "cpuid"
                : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
                : "a" (eax), "c" (ecx)
        );

        regs[0] = eax;
        regs[1] = ebx;
        regs[2] = ecx;
        regs[3] = edx;
}
#define x86_cpuid(a,b)        x86_cpuid2((a), 0, (b))

/* -------------------------------------------------------------------------- */

void        lidt(struct region_descriptor *);
void        lldt(u_short);
void        ltr(u_short);

static inline uint16_t
x86_getss(void)
{
        uint16_t val;

        __asm volatile (
                "mov        %%ss,%[val]"
                : [val] "=r" (val)
                :
        );
        return val;
}

static inline void
setds(uint16_t val)
{
        __asm volatile (
                "mov        %[val],%%ds"
                :
                : [val] "r" (val)
        );
}

static inline void
setes(uint16_t val)
{
        __asm volatile (
                "mov        %[val],%%es"
                :
                : [val] "r" (val)
        );
}

static inline void
setfs(uint16_t val)
{
        __asm volatile (
                "mov        %[val],%%fs"
                :
                : [val] "r" (val)
        );
}

void        setusergs(int);

/* -------------------------------------------------------------------------- */

#define FUNC_CR(crnum)                                        \
        static inline void lcr##crnum(register_t val)        \
        {                                                \
                __asm volatile (                                \
                        "mov        %[val],%%cr" #crnum        \
                        :                                \
                        : [val] "r" (val)                \
                        : "memory"                        \
                );                                        \
        }                                                \
        static inline register_t rcr##crnum(void)        \
        {                                                \
                register_t val;                                \
                __asm volatile (                                \
                        "mov        %%cr" #crnum ",%[val]"        \
                        : [val] "=r" (val)                \
                        :                                \
                );                                        \
                return val;                                \
        }

#define PROTO_CR(crnum)                                        \
        void lcr##crnum(register_t);                        \
        register_t rcr##crnum(void);

#ifndef XENPV
FUNC_CR(0)
FUNC_CR(2)
FUNC_CR(3)
#else
PROTO_CR(0)
PROTO_CR(2)
PROTO_CR(3)
#endif

FUNC_CR(4)
FUNC_CR(8)

/* -------------------------------------------------------------------------- */

#define FUNC_DR(drnum)                                        \
        static inline void ldr##drnum(register_t val)        \
        {                                                \
                __asm volatile (                                \
                        "mov        %[val],%%dr" #drnum        \
                        :                                \
                        : [val] "r" (val)                \
                );                                        \
        }                                                \
        static inline register_t rdr##drnum(void)        \
        {                                                \
                register_t val;                                \
                __asm volatile (                                \
                        "mov        %%dr" #drnum ",%[val]"        \
                        : [val] "=r" (val)                \
                        :                                \
                );                                        \
                return val;                                \
        }

#define PROTO_DR(drnum)                                        \
        register_t rdr##drnum(void);                        \
        void ldr##drnum(register_t);

#ifndef XENPV
FUNC_DR(0)
FUNC_DR(1)
FUNC_DR(2)
FUNC_DR(3)
FUNC_DR(6)
FUNC_DR(7)
#else
PROTO_DR(0)
PROTO_DR(1)
PROTO_DR(2)
PROTO_DR(3)
PROTO_DR(6)
PROTO_DR(7)
#endif

/* -------------------------------------------------------------------------- */

union savefpu;

static inline void
fninit(void)
{
        __asm volatile ("fninit" ::: "memory");
}

static inline void
fnclex(void)
{
        __asm volatile ("fnclex");
}

static inline void
fnstcw(uint16_t *val)
{
        __asm volatile (
                "fnstcw        %[val]"
                : [val] "=m" (*val)
                :
        );
}

static inline void
fnstsw(uint16_t *val)
{
        __asm volatile (
                "fnstsw        %[val]"
                : [val] "=m" (*val)
                :
        );
}

static inline void
clts(void)
{
        __asm volatile ("clts" ::: "memory");
}

void        stts(void);

static inline void
x86_stmxcsr(uint32_t *val)
{
        __asm volatile (
                "stmxcsr %[val]"
                : [val] "=m" (*val)
                :
        );
}

static inline void
x86_ldmxcsr(uint32_t *val)
{
        __asm volatile (
                "ldmxcsr %[val]"
                :
                : [val] "m" (*val)
        );
}

void        fldummy(void);

static inline uint64_t
rdxcr(uint32_t xcr)
{
        uint32_t low, high;

        __asm volatile (
                "xgetbv"
                : "=a" (low), "=d" (high)
                : "c" (xcr)
        );

        return (low | ((uint64_t)high << 32));
}

static inline void
wrxcr(uint32_t xcr, uint64_t val)
{
        uint32_t low, high;

        low = val;
        high = val >> 32;
        __asm volatile (
                "xsetbv"
                :
                : "a" (low), "d" (high), "c" (xcr)
        );
}

static inline void
fnsave(void *addr)
{
        uint8_t *area = addr;

        __asm volatile (
                "fnsave        %[area]"
                : [area] "=m" (*area)
                :
                : "memory"
        );
}

static inline void
frstor(const void *addr)
{
        const uint8_t *area = addr;

        __asm volatile (
                "frstor        %[area]"
                :
                : [area] "m" (*area)
                : "memory"
        );
}

static inline void
fxsave(void *addr)
{
        uint8_t *area = addr;

        __asm volatile (
                "fxsave        %[area]"
                : [area] "=m" (*area)
                :
                : "memory"
        );
}

static inline void
fxrstor(const void *addr)
{
        const uint8_t *area = addr;

        __asm volatile (
                "fxrstor %[area]"
                :
                : [area] "m" (*area)
                : "memory"
        );
}

static inline void
xsave(void *addr, uint64_t mask)
{
        uint8_t *area = addr;
        uint32_t low, high;

        low = mask;
        high = mask >> 32;
        __asm volatile (
                "xsave        %[area]"
                : [area] "=m" (*area)
                : "a" (low), "d" (high)
                : "memory"
        );
}

static inline void
xsaveopt(void *addr, uint64_t mask)
{
        uint8_t *area = addr;
        uint32_t low, high;

        low = mask;
        high = mask >> 32;
        __asm volatile (
                "xsaveopt %[area]"
                : [area] "=m" (*area)
                : "a" (low), "d" (high)
                : "memory"
        );
}

static inline void
xrstor(const void *addr, uint64_t mask)
{
        const uint8_t *area = addr;
        uint32_t low, high;

        low = mask;
        high = mask >> 32;
        __asm volatile (
                "xrstor %[area]"
                :
                : [area] "m" (*area), "a" (low), "d" (high)
                : "memory"
        );
}

#ifdef __x86_64__
static inline void
fxsave64(void *addr)
{
        uint8_t *area = addr;

        __asm volatile (
                "fxsave64        %[area]"
                : [area] "=m" (*area)
                :
                : "memory"
        );
}

static inline void
fxrstor64(const void *addr)
{
        const uint8_t *area = addr;

        __asm volatile (
                "fxrstor64 %[area]"
                :
                : [area] "m" (*area)
                : "memory"
        );
}

static inline void
xsave64(void *addr, uint64_t mask)
{
        uint8_t *area = addr;
        uint32_t low, high;

        low = mask;
        high = mask >> 32;
        __asm volatile (
                "xsave64        %[area]"
                : [area] "=m" (*area)
                : "a" (low), "d" (high)
                : "memory"
        );
}

static inline void
xsaveopt64(void *addr, uint64_t mask)
{
        uint8_t *area = addr;
        uint32_t low, high;

        low = mask;
        high = mask >> 32;
        __asm volatile (
                "xsaveopt64 %[area]"
                : [area] "=m" (*area)
                : "a" (low), "d" (high)
                : "memory"
        );
}

static inline void
xrstor64(const void *addr, uint64_t mask)
{
        const uint8_t *area = addr;
        uint32_t low, high;

        low = mask;
        high = mask >> 32;
        __asm volatile (
                "xrstor64 %[area]"
                :
                : [area] "m" (*area), "a" (low), "d" (high)
                : "memory"
        );
}
#endif

/* -------------------------------------------------------------------------- */

#ifdef XENPV
void x86_disable_intr(void);
void x86_enable_intr(void);
#else
static inline void
x86_disable_intr(void)
{
        __asm volatile ("cli" ::: "memory");
}

static inline void
x86_enable_intr(void)
{
        __asm volatile ("sti" ::: "memory");
}
#endif /* XENPV */

/* Use read_psl, write_psl when saving and restoring interrupt state. */
u_long        x86_read_psl(void);
void        x86_write_psl(u_long);

/* Use read_flags, write_flags to adjust other members of %eflags. */
u_long        x86_read_flags(void);
void        x86_write_flags(u_long);

void        x86_reset(void);

/* -------------------------------------------------------------------------- */

/* 
 * Some of the undocumented AMD64 MSRs need a 'passcode' to access.
 * See LinuxBIOSv2: src/cpu/amd/model_fxx/model_fxx_init.c
 */
#define        OPTERON_MSR_PASSCODE        0x9c5a203aU

static inline uint64_t
rdmsr(u_int msr)
{
        uint32_t low, high;

        __asm volatile (
                "rdmsr"
                : "=a" (low), "=d" (high)
                : "c" (msr)
        );

        return (low | ((uint64_t)high << 32));
}

static inline uint64_t
rdmsr_locked(u_int msr)
{
        uint32_t low, high, pass = OPTERON_MSR_PASSCODE;

        __asm volatile (
                "rdmsr"
                : "=a" (low), "=d" (high)
                : "c" (msr), "D" (pass)
        );

        return (low | ((uint64_t)high << 32));
}

int        rdmsr_safe(u_int, uint64_t *);

static inline void
wrmsr(u_int msr, uint64_t val)
{
        uint32_t low, high;

        low = val;
        high = val >> 32;
        __asm volatile (
                "wrmsr"
                :
                : "a" (low), "d" (high), "c" (msr)
                : "memory"
        );
}

static inline void
wrmsr_locked(u_int msr, uint64_t val)
{
        uint32_t low, high, pass = OPTERON_MSR_PASSCODE;

        low = val;
        high = val >> 32;
        __asm volatile (
                "wrmsr"
                :
                : "a" (low), "d" (high), "c" (msr), "D" (pass)
                : "memory"
        );
}

#endif /* _KERNEL */

#endif /* !_X86_CPUFUNC_H_ */































































































































































































































































































































































































































 2505 














 2502 






 2507 





 3268 



 1764 


 3272 
 3270 



 3178 
 3263 
 3263 






 3263 





  420 




  421 











  119 





  119 




  118 


  119 













  810 












  811 










 2372 



 2385 



 2385 













 2378 















 1416 

  140 





 1363 
 1416 




  182 



   88 

  100 





  182 








 2505 




 2494 




 1313 




 2512 


 2508 







 2509 
    2 


    2 











    2 




    2 
    2 
    2 


    2 


    2 




    2 

    2 





    2 

    2 
































































    5 



    4 






    5 




    2 











    5 






    5 







    5 


    5 





















    6 


    6 
    6 




    5 
    5 



    5 
    5 





    5 
    4 
    5 









    5 
    5 





    5 



    5 



    5 


    5 







    5 







































    5 




    2 




    4 







    5 







    5 


    2 

    2 



    2 








    2 

    5 






    5 


















    5 
    5 
    5 





    5 
    5 
    5 



    5 
    5 

    5 
    5 







    3 








    3 


    3 



    3 





    3 




    3 


    3 
    3 


    3 

    2 

    3 






















  426 


  141 













 3321 
 3318 


 3326 



 3323 
 3136 


 3313 
 1602 



 3317 






 3300 

 3298 





































 3276 








 3302 


  417 








  416 






    4 








    2 


    2 








 3298 
 3273 



  810 

 3294 


  864 
  865 





  865 
  865 

 3288 

 3067 







 3073 
 3073 



 3289 





 3292 







 3287 

 3275 


 3275 
    2 











 2509 


 2509 

















 2507 
 2504 






  220 

 2506 

















 2505 
  442 

  442 
  442 


  443 
  443 












  442 








 2492 
 2437 
 2437 


 2507 




 2518 





 1031 

 2512 






















  427 
    2 

    2 










  224 



  427 


  427 

  354 

  124 












  353 

  427 

  426 









    3 
    3 







    2 



    2 
    2 
    2 










  427 





  427 






  427 





  378 

  141 

  427 












  253 

  427 




  427 





   15 


   15 


   15 












  427 
  427 

  425 
  427 
















   29 
   29 

    1 











 3086 

 1189 

 3088 


























    2 


















































































































































































































































































































































































































































































































































































































































































































































   21 






 2517 













 2514 







































































































































































































 3317 
 3316 





 3318 















 3317 






































































 3317 
 3318 

 3313 














 3270 

















 3271 





 3271 

 3271 

   45 





 3272 














 3325 



 3328 




 3322 
 3136 


 3316 
 1584 























 3266 






 3314 












 3317 
 3271 






















































































 2515 


 2518 


















































































































    3 










    2 


    2 
    2 







    2 





  411 






  410 






    2 





    9 






    9 

























































    3 
    3 



    3 
















 2451 



 2445 










 2512 













































    5 














    4 


    5 

    5 












 3181 

 3187 


























 2522 

 2415 

 2414 


































 2517 































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
/*        $NetBSD: subr_pool.c,v 1.285 2022/07/16 10:20:21 simonb Exp $        */

/*
 * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010, 2014, 2015, 2018,
 *     2020, 2021 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
 * Simulation Facility, NASA Ames Research Center; by Andrew Doran, and by
 * Maxime Villard.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.285 2022/07/16 10:20:21 simonb Exp $");

#ifdef _KERNEL_OPT
#include "opt_ddb.h"
#include "opt_lockdebug.h"
#include "opt_pool.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/bitops.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/vmem.h>
#include <sys/pool.h>
#include <sys/syslog.h>
#include <sys/debug.h>
#include <sys/lock.h>
#include <sys/lockdebug.h>
#include <sys/xcall.h>
#include <sys/cpu.h>
#include <sys/atomic.h>
#include <sys/asan.h>
#include <sys/msan.h>
#include <sys/fault.h>

#include <uvm/uvm_extern.h>

/*
 * Pool resource management utility.
 *
 * Memory is allocated in pages which are split into pieces according to
 * the pool item size. Each page is kept on one of three lists in the
 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
 * for empty, full and partially-full pages respectively. The individual
 * pool items are on a linked list headed by `ph_itemlist' in each page
 * header. The memory for building the page list is either taken from
 * the allocated pages themselves (for small pool items) or taken from
 * an internal pool of page headers (`phpool').
 */

/* List of all pools. Non static as needed by 'vmstat -m' */
TAILQ_HEAD(, pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head);

/* Private pool for page header structures */
#define        PHPOOL_MAX        8
static struct pool phpool[PHPOOL_MAX];
#define        PHPOOL_FREELIST_NELEM(idx) \
        (((idx) == 0) ? BITMAP_MIN_SIZE : BITMAP_SIZE * (1 << (idx)))

#if !defined(KMSAN) && (defined(DIAGNOSTIC) || defined(KASAN))
#define POOL_REDZONE
#endif

#if defined(POOL_QUARANTINE)
#define POOL_NOCACHE
#endif

#ifdef POOL_REDZONE
# ifdef KASAN
#  define POOL_REDZONE_SIZE 8
# else
#  define POOL_REDZONE_SIZE 2
# endif
static void pool_redzone_init(struct pool *, size_t);
static void pool_redzone_fill(struct pool *, void *);
static void pool_redzone_check(struct pool *, void *);
static void pool_cache_redzone_check(pool_cache_t, void *);
#else
# define pool_redzone_init(pp, sz)                __nothing
# define pool_redzone_fill(pp, ptr)                __nothing
# define pool_redzone_check(pp, ptr)                __nothing
# define pool_cache_redzone_check(pc, ptr)        __nothing
#endif

#ifdef KMSAN
static inline void pool_get_kmsan(struct pool *, void *);
static inline void pool_put_kmsan(struct pool *, void *);
static inline void pool_cache_get_kmsan(pool_cache_t, void *);
static inline void pool_cache_put_kmsan(pool_cache_t, void *);
#else
#define pool_get_kmsan(pp, ptr)                __nothing
#define pool_put_kmsan(pp, ptr)                __nothing
#define pool_cache_get_kmsan(pc, ptr)        __nothing
#define pool_cache_put_kmsan(pc, ptr)        __nothing
#endif

#ifdef POOL_QUARANTINE
static void pool_quarantine_init(struct pool *);
static void pool_quarantine_flush(struct pool *);
static bool pool_put_quarantine(struct pool *, void *,
    struct pool_pagelist *);
#else
#define pool_quarantine_init(a)                        __nothing
#define pool_quarantine_flush(a)                __nothing
#define pool_put_quarantine(a, b, c)                false
#endif

#ifdef POOL_NOCACHE
static bool pool_cache_put_nocache(pool_cache_t, void *);
#else
#define pool_cache_put_nocache(a, b)                false
#endif

#define NO_CTOR        __FPTRCAST(int (*)(void *, void *, int), nullop)
#define NO_DTOR        __FPTRCAST(void (*)(void *, void *), nullop)

#define pc_has_pser(pc) (((pc)->pc_roflags & PR_PSERIALIZE) != 0)
#define pc_has_ctor(pc) ((pc)->pc_ctor != NO_CTOR)
#define pc_has_dtor(pc) ((pc)->pc_dtor != NO_DTOR)

#define pp_has_pser(pp) (((pp)->pr_roflags & PR_PSERIALIZE) != 0)

#define pool_barrier()        xc_barrier(0)

/*
 * Pool backend allocators.
 *
 * Each pool has a backend allocator that handles allocation, deallocation,
 * and any additional draining that might be needed.
 *
 * We provide two standard allocators:
 *
 *        pool_allocator_kmem - the default when no allocator is specified
 *
 *        pool_allocator_nointr - used for pools that will not be accessed
 *        in interrupt context.
 */
void *pool_page_alloc(struct pool *, int);
void pool_page_free(struct pool *, void *);

static void *pool_page_alloc_meta(struct pool *, int);
static void pool_page_free_meta(struct pool *, void *);

struct pool_allocator pool_allocator_kmem = {
        .pa_alloc = pool_page_alloc,
        .pa_free = pool_page_free,
        .pa_pagesz = 0
};

struct pool_allocator pool_allocator_nointr = {
        .pa_alloc = pool_page_alloc,
        .pa_free = pool_page_free,
        .pa_pagesz = 0
};

struct pool_allocator pool_allocator_meta = {
        .pa_alloc = pool_page_alloc_meta,
        .pa_free = pool_page_free_meta,
        .pa_pagesz = 0
};

#define POOL_ALLOCATOR_BIG_BASE 13
static struct pool_allocator pool_allocator_big[] = {
        {
                .pa_alloc = pool_page_alloc,
                .pa_free = pool_page_free,
                .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 0),
        },
        {
                .pa_alloc = pool_page_alloc,
                .pa_free = pool_page_free,
                .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 1),
        },
        {
                .pa_alloc = pool_page_alloc,
                .pa_free = pool_page_free,
                .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 2),
        },
        {
                .pa_alloc = pool_page_alloc,
                .pa_free = pool_page_free,
                .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 3),
        },
        {
                .pa_alloc = pool_page_alloc,
                .pa_free = pool_page_free,
                .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 4),
        },
        {
                .pa_alloc = pool_page_alloc,
                .pa_free = pool_page_free,
                .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 5),
        },
        {
                .pa_alloc = pool_page_alloc,
                .pa_free = pool_page_free,
                .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 6),
        },
        {
                .pa_alloc = pool_page_alloc,
                .pa_free = pool_page_free,
                .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 7),
        },
        {
                .pa_alloc = pool_page_alloc,
                .pa_free = pool_page_free,
                .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 8),
        },
        {
                .pa_alloc = pool_page_alloc,
                .pa_free = pool_page_free,
                .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 9),
        },
        {
                .pa_alloc = pool_page_alloc,
                .pa_free = pool_page_free,
                .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 10),
        },
        {
                .pa_alloc = pool_page_alloc,
                .pa_free = pool_page_free,
                .pa_pagesz = 1 << (POOL_ALLOCATOR_BIG_BASE + 11),
        }
};

static int pool_bigidx(size_t);

/* # of seconds to retain page after last use */
int pool_inactive_time = 10;

/* Next candidate for drainage (see pool_drain()) */
static struct pool *drainpp;

/* This lock protects both pool_head and drainpp. */
static kmutex_t pool_head_lock;
static kcondvar_t pool_busy;

/* This lock protects initialization of a potentially shared pool allocator */
static kmutex_t pool_allocator_lock;

static unsigned int poolid_counter = 0;

typedef uint32_t pool_item_bitmap_t;
#define        BITMAP_SIZE        (CHAR_BIT * sizeof(pool_item_bitmap_t))
#define        BITMAP_MASK        (BITMAP_SIZE - 1)
#define        BITMAP_MIN_SIZE        (CHAR_BIT * sizeof(((struct pool_item_header *)NULL)->ph_u2))

struct pool_item_header {
        /* Page headers */
        LIST_ENTRY(pool_item_header)
                                ph_pagelist;        /* pool page list */
        union {
                /* !PR_PHINPAGE */
                struct {
                        SPLAY_ENTRY(pool_item_header)
                                phu_node;        /* off-page page headers */
                } phu_offpage;
                /* PR_PHINPAGE */
                struct {
                        unsigned int phu_poolid;
                } phu_onpage;
        } ph_u1;
        void *                        ph_page;        /* this page's address */
        uint32_t                ph_time;        /* last referenced */
        uint16_t                ph_nmissing;        /* # of chunks in use */
        uint16_t                ph_off;                /* start offset in page */
        union {
                /* !PR_USEBMAP */
                struct {
                        LIST_HEAD(, pool_item)
                                phu_itemlist;        /* chunk list for this page */
                } phu_normal;
                /* PR_USEBMAP */
                struct {
                        pool_item_bitmap_t phu_bitmap[1];
                } phu_notouch;
        } ph_u2;
};
#define ph_node                ph_u1.phu_offpage.phu_node
#define ph_poolid        ph_u1.phu_onpage.phu_poolid
#define ph_itemlist        ph_u2.phu_normal.phu_itemlist
#define ph_bitmap        ph_u2.phu_notouch.phu_bitmap

#define PHSIZE        ALIGN(sizeof(struct pool_item_header))

CTASSERT(offsetof(struct pool_item_header, ph_u2) +
    BITMAP_MIN_SIZE / CHAR_BIT == sizeof(struct pool_item_header));

#if defined(DIAGNOSTIC) && !defined(KASAN)
#define POOL_CHECK_MAGIC
#endif

struct pool_item {
#ifdef POOL_CHECK_MAGIC
        u_int pi_magic;
#endif
#define        PI_MAGIC 0xdeaddeadU
        /* Other entries use only this list entry */
        LIST_ENTRY(pool_item)        pi_list;
};

#define        POOL_NEEDS_CATCHUP(pp)                                                \
        ((pp)->pr_nitems < (pp)->pr_minitems ||                                \
         (pp)->pr_npages < (pp)->pr_minpages)
#define        POOL_OBJ_TO_PAGE(pp, v)                                                \
        (void *)((uintptr_t)v & pp->pr_alloc->pa_pagemask)

/*
 * Pool cache management.
 *
 * Pool caches provide a way for constructed objects to be cached by the
 * pool subsystem.  This can lead to performance improvements by avoiding
 * needless object construction/destruction; it is deferred until absolutely
 * necessary.
 *
 * Caches are grouped into cache groups.  Each cache group references up
 * to PCG_NUMOBJECTS constructed objects.  When a cache allocates an
 * object from the pool, it calls the object's constructor and places it
 * into a cache group.  When a cache group frees an object back to the
 * pool, it first calls the object's destructor.  This allows the object
 * to persist in constructed form while freed to the cache.
 *
 * The pool references each cache, so that when a pool is drained by the
 * pagedaemon, it can drain each individual cache as well.  Each time a
 * cache is drained, the most idle cache group is freed to the pool in
 * its entirety.
 *
 * Pool caches are laid on top of pools.  By layering them, we can avoid
 * the complexity of cache management for pools which would not benefit
 * from it.
 */

static struct pool pcg_normal_pool;
static struct pool pcg_large_pool;
static struct pool cache_pool;
static struct pool cache_cpu_pool;

static pcg_t *volatile pcg_large_cache __cacheline_aligned;
static pcg_t *volatile pcg_normal_cache __cacheline_aligned;

/* List of all caches. */
TAILQ_HEAD(,pool_cache) pool_cache_head =
    TAILQ_HEAD_INITIALIZER(pool_cache_head);

int pool_cache_disable;                /* global disable for caching */
static const pcg_t pcg_dummy;        /* zero sized: always empty, yet always full */

static bool        pool_cache_put_slow(pool_cache_t, pool_cache_cpu_t *, int,
                                    void *);
static bool        pool_cache_get_slow(pool_cache_t, pool_cache_cpu_t *, int,
                                    void **, paddr_t *, int);
static void        pool_cache_cpu_init1(struct cpu_info *, pool_cache_t);
static int        pool_cache_invalidate_groups(pool_cache_t, pcg_t *);
static void        pool_cache_invalidate_cpu(pool_cache_t, u_int);
static void        pool_cache_transfer(pool_cache_t);
static int        pool_pcg_get(pcg_t *volatile *, pcg_t **);
static int        pool_pcg_put(pcg_t *volatile *, pcg_t *);
static pcg_t *        pool_pcg_trunc(pcg_t *volatile *);

static int        pool_catchup(struct pool *);
static void        pool_prime_page(struct pool *, void *,
                    struct pool_item_header *);
static void        pool_update_curpage(struct pool *);

static int        pool_grow(struct pool *, int);
static void        *pool_allocator_alloc(struct pool *, int);
static void        pool_allocator_free(struct pool *, void *);

static void pool_print_pagelist(struct pool *, struct pool_pagelist *,
        void (*)(const char *, ...) __printflike(1, 2));
static void pool_print1(struct pool *, const char *,
        void (*)(const char *, ...) __printflike(1, 2));

static int pool_chk_page(struct pool *, const char *,
                         struct pool_item_header *);

/* -------------------------------------------------------------------------- */

static inline unsigned int
pr_item_bitmap_index(const struct pool *pp, const struct pool_item_header *ph,
    const void *v)
{
        const char *cp = v;
        unsigned int idx;

        KASSERT(pp->pr_roflags & PR_USEBMAP);
        idx = (cp - (char *)ph->ph_page - ph->ph_off) / pp->pr_size;

        if (__predict_false(idx >= pp->pr_itemsperpage)) {
                panic("%s: [%s] %u >= %u", __func__, pp->pr_wchan, idx,
                    pp->pr_itemsperpage);
        }

        return idx;
}

static inline void
pr_item_bitmap_put(const struct pool *pp, struct pool_item_header *ph,
    void *obj)
{
        unsigned int idx = pr_item_bitmap_index(pp, ph, obj);
        pool_item_bitmap_t *bitmap = ph->ph_bitmap + (idx / BITMAP_SIZE);
        pool_item_bitmap_t mask = 1U << (idx & BITMAP_MASK);

        if (__predict_false((*bitmap & mask) != 0)) {
                panic("%s: [%s] %p already freed", __func__, pp->pr_wchan, obj);
        }

        *bitmap |= mask;
}

static inline void *
pr_item_bitmap_get(const struct pool *pp, struct pool_item_header *ph)
{
        pool_item_bitmap_t *bitmap = ph->ph_bitmap;
        unsigned int idx;
        int i;

        for (i = 0; ; i++) {
                int bit;

                KASSERT((i * BITMAP_SIZE) < pp->pr_itemsperpage);
                bit = ffs32(bitmap[i]);
                if (bit) {
                        pool_item_bitmap_t mask;

                        bit--;
                        idx = (i * BITMAP_SIZE) + bit;
                        mask = 1U << bit;
                        KASSERT((bitmap[i] & mask) != 0);
                        bitmap[i] &= ~mask;
                        break;
                }
        }
        KASSERT(idx < pp->pr_itemsperpage);
        return (char *)ph->ph_page + ph->ph_off + idx * pp->pr_size;
}

static inline void
pr_item_bitmap_init(const struct pool *pp, struct pool_item_header *ph)
{
        pool_item_bitmap_t *bitmap = ph->ph_bitmap;
        const int n = howmany(pp->pr_itemsperpage, BITMAP_SIZE);
        int i;

        for (i = 0; i < n; i++) {
                bitmap[i] = (pool_item_bitmap_t)-1;
        }
}

/* -------------------------------------------------------------------------- */

static inline void
pr_item_linkedlist_put(const struct pool *pp, struct pool_item_header *ph,
    void *obj)
{
        struct pool_item *pi = obj;

        KASSERT(!pp_has_pser(pp));

#ifdef POOL_CHECK_MAGIC
        pi->pi_magic = PI_MAGIC;
#endif

        if (pp->pr_redzone) {
                /*
                 * Mark the pool_item as valid. The rest is already
                 * invalid.
                 */
                kasan_mark(pi, sizeof(*pi), sizeof(*pi), 0);
        }

        LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
}

static inline void *
pr_item_linkedlist_get(struct pool *pp, struct pool_item_header *ph)
{
        struct pool_item *pi;
        void *v;

        v = pi = LIST_FIRST(&ph->ph_itemlist);
        if (__predict_false(v == NULL)) {
                mutex_exit(&pp->pr_lock);
                panic("%s: [%s] page empty", __func__, pp->pr_wchan);
        }
        KASSERTMSG((pp->pr_nitems > 0),
            "%s: [%s] nitems %u inconsistent on itemlist",
            __func__, pp->pr_wchan, pp->pr_nitems);
#ifdef POOL_CHECK_MAGIC
        KASSERTMSG((pi->pi_magic == PI_MAGIC),
            "%s: [%s] free list modified: "
            "magic=%x; page %p; item addr %p", __func__,
            pp->pr_wchan, pi->pi_magic, ph->ph_page, pi);
#endif

        /*
         * Remove from item list.
         */
        LIST_REMOVE(pi, pi_list);

        return v;
}

/* -------------------------------------------------------------------------- */

static inline void
pr_phinpage_check(struct pool *pp, struct pool_item_header *ph, void *page,
    void *object)
{
        if (__predict_false((void *)ph->ph_page != page)) {
                panic("%s: [%s] item %p not part of pool", __func__,
                    pp->pr_wchan, object);
        }
        if (__predict_false((char *)object < (char *)page + ph->ph_off)) {
                panic("%s: [%s] item %p below item space", __func__,
                    pp->pr_wchan, object);
        }
        if (__predict_false(ph->ph_poolid != pp->pr_poolid)) {
                panic("%s: [%s] item %p poolid %u != %u", __func__,
                    pp->pr_wchan, object, ph->ph_poolid, pp->pr_poolid);
        }
}

static inline void
pc_phinpage_check(pool_cache_t pc, void *object)
{
        struct pool_item_header *ph;
        struct pool *pp;
        void *page;

        pp = &pc->pc_pool;
        page = POOL_OBJ_TO_PAGE(pp, object);
        ph = (struct pool_item_header *)page;

        pr_phinpage_check(pp, ph, page, object);
}

/* -------------------------------------------------------------------------- */

static inline int
phtree_compare(struct pool_item_header *a, struct pool_item_header *b)
{

        /*
         * We consider pool_item_header with smaller ph_page bigger. This
         * unnatural ordering is for the benefit of pr_find_pagehead.
         */
        if (a->ph_page < b->ph_page)
                return 1;
        else if (a->ph_page > b->ph_page)
                return -1;
        else
                return 0;
}

SPLAY_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare);
SPLAY_GENERATE(phtree, pool_item_header, ph_node, phtree_compare);

static inline struct pool_item_header *
pr_find_pagehead_noalign(struct pool *pp, void *v)
{
        struct pool_item_header *ph, tmp;

        tmp.ph_page = (void *)(uintptr_t)v;
        ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp);
        if (ph == NULL) {
                ph = SPLAY_ROOT(&pp->pr_phtree);
                if (ph != NULL && phtree_compare(&tmp, ph) >= 0) {
                        ph = SPLAY_NEXT(phtree, &pp->pr_phtree, ph);
                }
                KASSERT(ph == NULL || phtree_compare(&tmp, ph) < 0);
        }

        return ph;
}

/*
 * Return the pool page header based on item address.
 */
static inline struct pool_item_header *
pr_find_pagehead(struct pool *pp, void *v)
{
        struct pool_item_header *ph, tmp;

        if ((pp->pr_roflags & PR_NOALIGN) != 0) {
                ph = pr_find_pagehead_noalign(pp, v);
        } else {
                void *page = POOL_OBJ_TO_PAGE(pp, v);
                if ((pp->pr_roflags & PR_PHINPAGE) != 0) {
                        ph = (struct pool_item_header *)page;
                        pr_phinpage_check(pp, ph, page, v);
                } else {
                        tmp.ph_page = page;
                        ph = SPLAY_FIND(phtree, &pp->pr_phtree, &tmp);
                }
        }

        KASSERT(ph == NULL || ((pp->pr_roflags & PR_PHINPAGE) != 0) ||
            ((char *)ph->ph_page <= (char *)v &&
            (char *)v < (char *)ph->ph_page + pp->pr_alloc->pa_pagesz));
        return ph;
}

static void
pr_pagelist_free(struct pool *pp, struct pool_pagelist *pq)
{
        struct pool_item_header *ph;

        while ((ph = LIST_FIRST(pq)) != NULL) {
                LIST_REMOVE(ph, ph_pagelist);
                pool_allocator_free(pp, ph->ph_page);
                if ((pp->pr_roflags & PR_PHINPAGE) == 0)
                        pool_put(pp->pr_phpool, ph);
        }
}

/*
 * Remove a page from the pool.
 */
static inline void
pr_rmpage(struct pool *pp, struct pool_item_header *ph,
     struct pool_pagelist *pq)
{

        KASSERT(mutex_owned(&pp->pr_lock));

        /*
         * If the page was idle, decrement the idle page count.
         */
        if (ph->ph_nmissing == 0) {
                KASSERT(pp->pr_nidle != 0);
                KASSERTMSG((pp->pr_nitems >= pp->pr_itemsperpage),
                    "%s: [%s] nitems=%u < itemsperpage=%u", __func__,
                    pp->pr_wchan, pp->pr_nitems, pp->pr_itemsperpage);
                pp->pr_nidle--;
        }

        pp->pr_nitems -= pp->pr_itemsperpage;

        /*
         * Unlink the page from the pool and queue it for release.
         */
        LIST_REMOVE(ph, ph_pagelist);
        if (pp->pr_roflags & PR_PHINPAGE) {
                if (__predict_false(ph->ph_poolid != pp->pr_poolid)) {
                        panic("%s: [%s] ph %p poolid %u != %u",
                            __func__, pp->pr_wchan, ph, ph->ph_poolid,
                            pp->pr_poolid);
                }
        } else {
                SPLAY_REMOVE(phtree, &pp->pr_phtree, ph);
        }
        LIST_INSERT_HEAD(pq, ph, ph_pagelist);

        pp->pr_npages--;
        pp->pr_npagefree++;

        pool_update_curpage(pp);
}

/*
 * Initialize all the pools listed in the "pools" link set.
 */
void
pool_subsystem_init(void)
{
        size_t size;
        int idx;

        mutex_init(&pool_head_lock, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&pool_allocator_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&pool_busy, "poolbusy");

        /*
         * Initialize private page header pool and cache magazine pool if we
         * haven't done so yet.
         */
        for (idx = 0; idx < PHPOOL_MAX; idx++) {
                static char phpool_names[PHPOOL_MAX][6+1+6+1];
                int nelem;
                size_t sz;

                nelem = PHPOOL_FREELIST_NELEM(idx);
                KASSERT(nelem != 0);
                snprintf(phpool_names[idx], sizeof(phpool_names[idx]),
                    "phpool-%d", nelem);
                sz = offsetof(struct pool_item_header,
                    ph_bitmap[howmany(nelem, BITMAP_SIZE)]);
                pool_init(&phpool[idx], sz, 0, 0, 0,
                    phpool_names[idx], &pool_allocator_meta, IPL_VM);
        }

        size = sizeof(pcg_t) +
            (PCG_NOBJECTS_NORMAL - 1) * sizeof(pcgpair_t);
        pool_init(&pcg_normal_pool, size, coherency_unit, 0, 0,
            "pcgnormal", &pool_allocator_meta, IPL_VM);

        size = sizeof(pcg_t) +
            (PCG_NOBJECTS_LARGE - 1) * sizeof(pcgpair_t);
        pool_init(&pcg_large_pool, size, coherency_unit, 0, 0,
            "pcglarge", &pool_allocator_meta, IPL_VM);

        pool_init(&cache_pool, sizeof(struct pool_cache), coherency_unit,
            0, 0, "pcache", &pool_allocator_meta, IPL_NONE);

        pool_init(&cache_cpu_pool, sizeof(pool_cache_cpu_t), coherency_unit,
            0, 0, "pcachecpu", &pool_allocator_meta, IPL_NONE);
}

static inline bool
pool_init_is_phinpage(const struct pool *pp)
{
        size_t pagesize;

        if (pp->pr_roflags & PR_PHINPAGE) {
                return true;
        }
        if (pp->pr_roflags & (PR_NOTOUCH | PR_NOALIGN)) {
                return false;
        }

        pagesize = pp->pr_alloc->pa_pagesz;

        /*
         * Threshold: the item size is below 1/16 of a page size, and below
         * 8 times the page header size. The latter ensures we go off-page
         * if the page header would make us waste a rather big item.
         */
        if (pp->pr_size < MIN(pagesize / 16, PHSIZE * 8)) {
                return true;
        }

        /* Put the header into the page if it doesn't waste any items. */
        if (pagesize / pp->pr_size == (pagesize - PHSIZE) / pp->pr_size) {
                return true;
        }

        return false;
}

static inline bool
pool_init_is_usebmap(const struct pool *pp)
{
        size_t bmapsize;

        if (pp->pr_roflags & PR_NOTOUCH) {
                return true;
        }

        /*
         * If we're off-page, go with a bitmap.
         */
        if (!(pp->pr_roflags & PR_PHINPAGE)) {
                return true;
        }

        /*
         * If we're on-page, and the page header can already contain a bitmap
         * big enough to cover all the items of the page, go with a bitmap.
         */
        bmapsize = roundup(PHSIZE, pp->pr_align) -
            offsetof(struct pool_item_header, ph_bitmap[0]);
        KASSERT(bmapsize % sizeof(pool_item_bitmap_t) == 0);
        if (pp->pr_itemsperpage <= bmapsize * CHAR_BIT) {
                return true;
        }

        return false;
}

/*
 * Initialize the given pool resource structure.
 *
 * We export this routine to allow other kernel parts to declare
 * static pools that must be initialized before kmem(9) is available.
 */
void
pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
    const char *wchan, struct pool_allocator *palloc, int ipl)
{
        struct pool *pp1;
        size_t prsize;
        int itemspace, slack;

        /* XXX ioff will be removed. */
        KASSERT(ioff == 0);

#ifdef DEBUG
        if (__predict_true(!cold))
                mutex_enter(&pool_head_lock);
        /*
         * Check that the pool hasn't already been initialised and
         * added to the list of all pools.
         */
        TAILQ_FOREACH(pp1, &pool_head, pr_poollist) {
                if (pp == pp1)
                        panic("%s: [%s] already initialised", __func__,
                            wchan);
        }
        if (__predict_true(!cold))
                mutex_exit(&pool_head_lock);
#endif

        if (palloc == NULL)
                palloc = &pool_allocator_kmem;

        if (!cold)
                mutex_enter(&pool_allocator_lock);
        if (palloc->pa_refcnt++ == 0) {
                if (palloc->pa_pagesz == 0)
                        palloc->pa_pagesz = PAGE_SIZE;

                TAILQ_INIT(&palloc->pa_list);

                mutex_init(&palloc->pa_lock, MUTEX_DEFAULT, IPL_VM);
                palloc->pa_pagemask = ~(palloc->pa_pagesz - 1);
                palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1;
        }
        if (!cold)
                mutex_exit(&pool_allocator_lock);

        /*
         * PR_PSERIALIZE implies PR_NOTOUCH; freed objects must remain
         * valid until the the backing page is returned to the system.
         */
        if (flags & PR_PSERIALIZE) {
                flags |= PR_NOTOUCH;
        }

        if (align == 0)
                align = ALIGN(1);

        prsize = size;
        if ((flags & PR_NOTOUCH) == 0 && prsize < sizeof(struct pool_item))
                prsize = sizeof(struct pool_item);

        prsize = roundup(prsize, align);
        KASSERTMSG((prsize <= palloc->pa_pagesz),
            "%s: [%s] pool item size (%zu) larger than page size (%u)",
            __func__, wchan, prsize, palloc->pa_pagesz);

        /*
         * Initialize the pool structure.
         */
        LIST_INIT(&pp->pr_emptypages);
        LIST_INIT(&pp->pr_fullpages);
        LIST_INIT(&pp->pr_partpages);
        pp->pr_cache = NULL;
        pp->pr_curpage = NULL;
        pp->pr_npages = 0;
        pp->pr_minitems = 0;
        pp->pr_minpages = 0;
        pp->pr_maxpages = UINT_MAX;
        pp->pr_roflags = flags;
        pp->pr_flags = 0;
        pp->pr_size = prsize;
        pp->pr_reqsize = size;
        pp->pr_align = align;
        pp->pr_wchan = wchan;
        pp->pr_alloc = palloc;
        pp->pr_poolid = atomic_inc_uint_nv(&poolid_counter);
        pp->pr_nitems = 0;
        pp->pr_nout = 0;
        pp->pr_hardlimit = UINT_MAX;
        pp->pr_hardlimit_warning = NULL;
        pp->pr_hardlimit_ratecap.tv_sec = 0;
        pp->pr_hardlimit_ratecap.tv_usec = 0;
        pp->pr_hardlimit_warning_last.tv_sec = 0;
        pp->pr_hardlimit_warning_last.tv_usec = 0;
        pp->pr_drain_hook = NULL;
        pp->pr_drain_hook_arg = NULL;
        pp->pr_freecheck = NULL;
        pp->pr_redzone = false;
        pool_redzone_init(pp, size);
        pool_quarantine_init(pp);

        /*
         * Decide whether to put the page header off-page to avoid wasting too
         * large a part of the page or too big an item. Off-page page headers
         * go on a hash table, so we can match a returned item with its header
         * based on the page address.
         */
        if (pool_init_is_phinpage(pp)) {
                /* Use the beginning of the page for the page header */
                itemspace = palloc->pa_pagesz - roundup(PHSIZE, align);
                pp->pr_itemoffset = roundup(PHSIZE, align);
                pp->pr_roflags |= PR_PHINPAGE;
        } else {
                /* The page header will be taken from our page header pool */
                itemspace = palloc->pa_pagesz;
                pp->pr_itemoffset = 0;
                SPLAY_INIT(&pp->pr_phtree);
        }

        pp->pr_itemsperpage = itemspace / pp->pr_size;
        KASSERT(pp->pr_itemsperpage != 0);

        /*
         * Decide whether to use a bitmap or a linked list to manage freed
         * items.
         */
        if (pool_init_is_usebmap(pp)) {
                pp->pr_roflags |= PR_USEBMAP;
        }

        /*
         * If we're off-page, then we're using a bitmap; choose the appropriate
         * pool to allocate page headers, whose size varies depending on the
         * bitmap. If we're on-page, nothing to do.
         */
        if (!(pp->pr_roflags & PR_PHINPAGE)) {
                int idx;

                KASSERT(pp->pr_roflags & PR_USEBMAP);

                for (idx = 0; pp->pr_itemsperpage > PHPOOL_FREELIST_NELEM(idx);
                    idx++) {
                        /* nothing */
                }
                if (idx >= PHPOOL_MAX) {
                        /*
                         * if you see this panic, consider to tweak
                         * PHPOOL_MAX and PHPOOL_FREELIST_NELEM.
                         */
                        panic("%s: [%s] too large itemsperpage(%d) for "
                            "PR_USEBMAP", __func__,
                            pp->pr_wchan, pp->pr_itemsperpage);
                }
                pp->pr_phpool = &phpool[idx];
        } else {
                pp->pr_phpool = NULL;
        }

        /*
         * Use the slack between the chunks and the page header
         * for "cache coloring".
         */
        slack = itemspace - pp->pr_itemsperpage * pp->pr_size;
        pp->pr_maxcolor = rounddown(slack, align);
        pp->pr_curcolor = 0;

        pp->pr_nget = 0;
        pp->pr_nfail = 0;
        pp->pr_nput = 0;
        pp->pr_npagealloc = 0;
        pp->pr_npagefree = 0;
        pp->pr_hiwat = 0;
        pp->pr_nidle = 0;
        pp->pr_refcnt = 0;

        mutex_init(&pp->pr_lock, MUTEX_DEFAULT, ipl);
        cv_init(&pp->pr_cv, wchan);
        pp->pr_ipl = ipl;

        /* Insert into the list of all pools. */
        if (!cold)
                mutex_enter(&pool_head_lock);
        TAILQ_FOREACH(pp1, &pool_head, pr_poollist) {
                if (strcmp(pp1->pr_wchan, pp->pr_wchan) > 0)
                        break;
        }
        if (pp1 == NULL)
                TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist);
        else
                TAILQ_INSERT_BEFORE(pp1, pp, pr_poollist);
        if (!cold)
                mutex_exit(&pool_head_lock);

        /* Insert this into the list of pools using this allocator. */
        if (!cold)
                mutex_enter(&palloc->pa_lock);
        TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list);
        if (!cold)
                mutex_exit(&palloc->pa_lock);
}

/*
 * De-commission a pool resource.
 */
void
pool_destroy(struct pool *pp)
{
        struct pool_pagelist pq;
        struct pool_item_header *ph;

        pool_quarantine_flush(pp);

        /* Remove from global pool list */
        mutex_enter(&pool_head_lock);
        while (pp->pr_refcnt != 0)
                cv_wait(&pool_busy, &pool_head_lock);
        TAILQ_REMOVE(&pool_head, pp, pr_poollist);
        if (drainpp == pp)
                drainpp = NULL;
        mutex_exit(&pool_head_lock);

        /* Remove this pool from its allocator's list of pools. */
        mutex_enter(&pp->pr_alloc->pa_lock);
        TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list);
        mutex_exit(&pp->pr_alloc->pa_lock);

        mutex_enter(&pool_allocator_lock);
        if (--pp->pr_alloc->pa_refcnt == 0)
                mutex_destroy(&pp->pr_alloc->pa_lock);
        mutex_exit(&pool_allocator_lock);

        mutex_enter(&pp->pr_lock);

        KASSERT(pp->pr_cache == NULL);
        KASSERTMSG((pp->pr_nout == 0),
            "%s: [%s] pool busy: still out: %u", __func__, pp->pr_wchan,
            pp->pr_nout);
        KASSERT(LIST_EMPTY(&pp->pr_fullpages));
        KASSERT(LIST_EMPTY(&pp->pr_partpages));

        /* Remove all pages */
        LIST_INIT(&pq);
        while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
                pr_rmpage(pp, ph, &pq);

        mutex_exit(&pp->pr_lock);

        pr_pagelist_free(pp, &pq);
        cv_destroy(&pp->pr_cv);
        mutex_destroy(&pp->pr_lock);
}

void
pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg)
{

        /* XXX no locking -- must be used just after pool_init() */
        KASSERTMSG((pp->pr_drain_hook == NULL),
            "%s: [%s] already set", __func__, pp->pr_wchan);
        pp->pr_drain_hook = fn;
        pp->pr_drain_hook_arg = arg;
}

static struct pool_item_header *
pool_alloc_item_header(struct pool *pp, void *storage, int flags)
{
        struct pool_item_header *ph;

        if ((pp->pr_roflags & PR_PHINPAGE) != 0)
                ph = storage;
        else
                ph = pool_get(pp->pr_phpool, flags);

        return ph;
}

/*
 * Grab an item from the pool.
 */
void *
pool_get(struct pool *pp, int flags)
{
        struct pool_item_header *ph;
        void *v;

        KASSERT(!(flags & PR_NOWAIT) != !(flags & PR_WAITOK));
        KASSERTMSG((pp->pr_itemsperpage != 0),
            "%s: [%s] pr_itemsperpage is zero, "
            "pool not initialized?", __func__, pp->pr_wchan);
        KASSERTMSG((!(cpu_intr_p() || cpu_softintr_p())
                || pp->pr_ipl != IPL_NONE || cold || panicstr != NULL),
            "%s: [%s] is IPL_NONE, but called from interrupt context",
            __func__, pp->pr_wchan);
        if (flags & PR_WAITOK) {
                ASSERT_SLEEPABLE();
        }

        if (flags & PR_NOWAIT) {
                if (fault_inject())
                        return NULL;
        }

        mutex_enter(&pp->pr_lock);
 startover:
        /*
         * Check to see if we've reached the hard limit.  If we have,
         * and we can wait, then wait until an item has been returned to
         * the pool.
         */
        KASSERTMSG((pp->pr_nout <= pp->pr_hardlimit),
            "%s: %s: crossed hard limit", __func__, pp->pr_wchan);
        if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) {
                if (pp->pr_drain_hook != NULL) {
                        /*
                         * Since the drain hook is going to free things
                         * back to the pool, unlock, call the hook, re-lock,
                         * and check the hardlimit condition again.
                         */
                        mutex_exit(&pp->pr_lock);
                        (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags);
                        mutex_enter(&pp->pr_lock);
                        if (pp->pr_nout < pp->pr_hardlimit)
                                goto startover;
                }

                if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) {
                        /*
                         * XXX: A warning isn't logged in this case.  Should
                         * it be?
                         */
                        pp->pr_flags |= PR_WANTED;
                        do {
                                cv_wait(&pp->pr_cv, &pp->pr_lock);
                        } while (pp->pr_flags & PR_WANTED);
                        goto startover;
                }

                /*
                 * Log a message that the hard limit has been hit.
                 */
                if (pp->pr_hardlimit_warning != NULL &&
                    ratecheck(&pp->pr_hardlimit_warning_last,
                              &pp->pr_hardlimit_ratecap))
                        log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning);

                pp->pr_nfail++;

                mutex_exit(&pp->pr_lock);
                KASSERT((flags & (PR_NOWAIT|PR_LIMITFAIL)) != 0);
                return NULL;
        }

        /*
         * The convention we use is that if `curpage' is not NULL, then
         * it points at a non-empty bucket. In particular, `curpage'
         * never points at a page header which has PR_PHINPAGE set and
         * has no items in its bucket.
         */
        if ((ph = pp->pr_curpage) == NULL) {
                int error;

                KASSERTMSG((pp->pr_nitems == 0),
                    "%s: [%s] curpage NULL, inconsistent nitems %u",
                    __func__, pp->pr_wchan, pp->pr_nitems);

                /*
                 * Call the back-end page allocator for more memory.
                 * Release the pool lock, as the back-end page allocator
                 * may block.
                 */
                error = pool_grow(pp, flags);
                if (error != 0) {
                        /*
                         * pool_grow aborts when another thread
                         * is allocating a new page. Retry if it
                         * waited for it.
                         */
                        if (error == ERESTART)
                                goto startover;

                        /*
                         * We were unable to allocate a page or item
                         * header, but we released the lock during
                         * allocation, so perhaps items were freed
                         * back to the pool.  Check for this case.
                         */
                        if (pp->pr_curpage != NULL)
                                goto startover;

                        pp->pr_nfail++;
                        mutex_exit(&pp->pr_lock);
                        KASSERT((flags & (PR_NOWAIT|PR_LIMITFAIL)) != 0);
                        return NULL;
                }

                /* Start the allocation process over. */
                goto startover;
        }
        if (pp->pr_roflags & PR_USEBMAP) {
                KASSERTMSG((ph->ph_nmissing < pp->pr_itemsperpage),
                    "%s: [%s] pool page empty", __func__, pp->pr_wchan);
                v = pr_item_bitmap_get(pp, ph);
        } else {
                v = pr_item_linkedlist_get(pp, ph);
        }
        pp->pr_nitems--;
        pp->pr_nout++;
        if (ph->ph_nmissing == 0) {
                KASSERT(pp->pr_nidle > 0);
                pp->pr_nidle--;

                /*
                 * This page was previously empty.  Move it to the list of
                 * partially-full pages.  This page is already curpage.
                 */
                LIST_REMOVE(ph, ph_pagelist);
                LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
        }
        ph->ph_nmissing++;
        if (ph->ph_nmissing == pp->pr_itemsperpage) {
                KASSERTMSG(((pp->pr_roflags & PR_USEBMAP) ||
                        LIST_EMPTY(&ph->ph_itemlist)),
                    "%s: [%s] nmissing (%u) inconsistent", __func__,
                        pp->pr_wchan, ph->ph_nmissing);
                /*
                 * This page is now full.  Move it to the full list
                 * and select a new current page.
                 */
                LIST_REMOVE(ph, ph_pagelist);
                LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist);
                pool_update_curpage(pp);
        }

        pp->pr_nget++;

        /*
         * If we have a low water mark and we are now below that low
         * water mark, add more items to the pool.
         */
        if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
                /*
                 * XXX: Should we log a warning?  Should we set up a timeout
                 * to try again in a second or so?  The latter could break
                 * a caller's assumptions about interrupt protection, etc.
                 */
        }

        mutex_exit(&pp->pr_lock);
        KASSERT((((vaddr_t)v) & (pp->pr_align - 1)) == 0);
        FREECHECK_OUT(&pp->pr_freecheck, v);
        pool_redzone_fill(pp, v);
        pool_get_kmsan(pp, v);
        if (flags & PR_ZERO)
                memset(v, 0, pp->pr_reqsize);
        return v;
}

/*
 * Internal version of pool_put().  Pool is already locked/entered.
 */
static void
pool_do_put(struct pool *pp, void *v, struct pool_pagelist *pq)
{
        struct pool_item_header *ph;

        KASSERT(mutex_owned(&pp->pr_lock));
        pool_redzone_check(pp, v);
        pool_put_kmsan(pp, v);
        FREECHECK_IN(&pp->pr_freecheck, v);
        LOCKDEBUG_MEM_CHECK(v, pp->pr_size);

        KASSERTMSG((pp->pr_nout > 0),
            "%s: [%s] putting with none out", __func__, pp->pr_wchan);

        if (__predict_false((ph = pr_find_pagehead(pp, v)) == NULL)) {
                panic("%s: [%s] page header missing", __func__,  pp->pr_wchan);
        }

        /*
         * Return to item list.
         */
        if (pp->pr_roflags & PR_USEBMAP) {
                pr_item_bitmap_put(pp, ph, v);
        } else {
                pr_item_linkedlist_put(pp, ph, v);
        }
        KDASSERT(ph->ph_nmissing != 0);
        ph->ph_nmissing--;
        pp->pr_nput++;
        pp->pr_nitems++;
        pp->pr_nout--;

        /* Cancel "pool empty" condition if it exists */
        if (pp->pr_curpage == NULL)
                pp->pr_curpage = ph;

        if (pp->pr_flags & PR_WANTED) {
                pp->pr_flags &= ~PR_WANTED;
                cv_broadcast(&pp->pr_cv);
        }

        /*
         * If this page is now empty, do one of two things:
         *
         *        (1) If we have more pages than the page high water mark,
         *            free the page back to the system.  ONLY CONSIDER
         *            FREEING BACK A PAGE IF WE HAVE MORE THAN OUR MINIMUM PAGE
         *            CLAIM.
         *
         *        (2) Otherwise, move the page to the empty page list.
         *
         * Either way, select a new current page (so we use a partially-full
         * page if one is available).
         */
        if (ph->ph_nmissing == 0) {
                pp->pr_nidle++;
                if (pp->pr_nitems - pp->pr_itemsperpage >= pp->pr_minitems &&
                    pp->pr_npages > pp->pr_minpages &&
                    pp->pr_npages > pp->pr_maxpages) {
                        pr_rmpage(pp, ph, pq);
                } else {
                        LIST_REMOVE(ph, ph_pagelist);
                        LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);

                        /*
                         * Update the timestamp on the page.  A page must
                         * be idle for some period of time before it can
                         * be reclaimed by the pagedaemon.  This minimizes
                         * ping-pong'ing for memory.
                         *
                         * note for 64-bit time_t: truncating to 32-bit is not
                         * a problem for our usage.
                         */
                        ph->ph_time = time_uptime;
                }
                pool_update_curpage(pp);
        }

        /*
         * If the page was previously completely full, move it to the
         * partially-full list and make it the current page.  The next
         * allocation will get the item from this page, instead of
         * further fragmenting the pool.
         */
        else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) {
                LIST_REMOVE(ph, ph_pagelist);
                LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist);
                pp->pr_curpage = ph;
        }
}

void
pool_put(struct pool *pp, void *v)
{
        struct pool_pagelist pq;

        LIST_INIT(&pq);

        mutex_enter(&pp->pr_lock);
        if (!pool_put_quarantine(pp, v, &pq)) {
                pool_do_put(pp, v, &pq);
        }
        mutex_exit(&pp->pr_lock);

        pr_pagelist_free(pp, &pq);
}

/*
 * pool_grow: grow a pool by a page.
 *
 * => called with pool locked.
 * => unlock and relock the pool.
 * => return with pool locked.
 */

static int
pool_grow(struct pool *pp, int flags)
{
        struct pool_item_header *ph;
        char *storage;

        /*
         * If there's a pool_grow in progress, wait for it to complete
         * and try again from the top.
         */
        if (pp->pr_flags & PR_GROWING) {
                if (flags & PR_WAITOK) {
                        do {
                                cv_wait(&pp->pr_cv, &pp->pr_lock);
                        } while (pp->pr_flags & PR_GROWING);
                        return ERESTART;
                } else {
                        if (pp->pr_flags & PR_GROWINGNOWAIT) {
                                /*
                                 * This needs an unlock/relock dance so
                                 * that the other caller has a chance to
                                 * run and actually do the thing.  Note
                                 * that this is effectively a busy-wait.
                                 */
                                mutex_exit(&pp->pr_lock);
                                mutex_enter(&pp->pr_lock);
                                return ERESTART;
                        }
                        return EWOULDBLOCK;
                }
        }
        pp->pr_flags |= PR_GROWING;
        if (flags & PR_WAITOK)
                mutex_exit(&pp->pr_lock);
        else
                pp->pr_flags |= PR_GROWINGNOWAIT;

        storage = pool_allocator_alloc(pp, flags);
        if (__predict_false(storage == NULL))
                goto out;

        ph = pool_alloc_item_header(pp, storage, flags);
        if (__predict_false(ph == NULL)) {
                pool_allocator_free(pp, storage);
                goto out;
        }

        if (flags & PR_WAITOK)
                mutex_enter(&pp->pr_lock);
        pool_prime_page(pp, storage, ph);
        pp->pr_npagealloc++;
        KASSERT(pp->pr_flags & PR_GROWING);
        pp->pr_flags &= ~(PR_GROWING|PR_GROWINGNOWAIT);
        /*
         * If anyone was waiting for pool_grow, notify them that we
         * may have just done it.
         */
        cv_broadcast(&pp->pr_cv);
        return 0;
out:
        if (flags & PR_WAITOK)
                mutex_enter(&pp->pr_lock);
        KASSERT(pp->pr_flags & PR_GROWING);
        pp->pr_flags &= ~(PR_GROWING|PR_GROWINGNOWAIT);
        return ENOMEM;
}

void
pool_prime(struct pool *pp, int n)
{

        mutex_enter(&pp->pr_lock);
        pp->pr_minpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
        if (pp->pr_maxpages <= pp->pr_minpages)
                pp->pr_maxpages = pp->pr_minpages + 1;        /* XXX */
        while (pp->pr_npages < pp->pr_minpages)
                (void) pool_grow(pp, PR_WAITOK);
        mutex_exit(&pp->pr_lock);
}

/*
 * Add a page worth of items to the pool.
 *
 * Note, we must be called with the pool descriptor LOCKED.
 */
static void
pool_prime_page(struct pool *pp, void *storage, struct pool_item_header *ph)
{
        const unsigned int align = pp->pr_align;
        struct pool_item *pi;
        void *cp = storage;
        int n;

        KASSERT(mutex_owned(&pp->pr_lock));
        KASSERTMSG(((pp->pr_roflags & PR_NOALIGN) ||
                (((uintptr_t)cp & (pp->pr_alloc->pa_pagesz - 1)) == 0)),
            "%s: [%s] unaligned page: %p", __func__, pp->pr_wchan, cp);

        /*
         * Insert page header.
         */
        LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
        LIST_INIT(&ph->ph_itemlist);
        ph->ph_page = storage;
        ph->ph_nmissing = 0;
        ph->ph_time = time_uptime;
        if (pp->pr_roflags & PR_PHINPAGE)
                ph->ph_poolid = pp->pr_poolid;
        else
                SPLAY_INSERT(phtree, &pp->pr_phtree, ph);

        pp->pr_nidle++;

        /*
         * The item space starts after the on-page header, if any.
         */
        ph->ph_off = pp->pr_itemoffset;

        /*
         * Color this page.
         */
        ph->ph_off += pp->pr_curcolor;
        cp = (char *)cp + ph->ph_off;
        if ((pp->pr_curcolor += align) > pp->pr_maxcolor)
                pp->pr_curcolor = 0;

        KASSERT((((vaddr_t)cp) & (align - 1)) == 0);

        /*
         * Insert remaining chunks on the bucket list.
         */
        n = pp->pr_itemsperpage;
        pp->pr_nitems += n;

        if (pp->pr_roflags & PR_USEBMAP) {
                pr_item_bitmap_init(pp, ph);
        } else {
                while (n--) {
                        pi = (struct pool_item *)cp;

                        KASSERT((((vaddr_t)pi) & (align - 1)) == 0);

                        /* Insert on page list */
                        LIST_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list);
#ifdef POOL_CHECK_MAGIC
                        pi->pi_magic = PI_MAGIC;
#endif
                        cp = (char *)cp + pp->pr_size;

                        KASSERT((((vaddr_t)cp) & (align - 1)) == 0);
                }
        }

        /*
         * If the pool was depleted, point at the new page.
         */
        if (pp->pr_curpage == NULL)
                pp->pr_curpage = ph;

        if (++pp->pr_npages > pp->pr_hiwat)
                pp->pr_hiwat = pp->pr_npages;
}

/*
 * Used by pool_get() when nitems drops below the low water mark.  This
 * is used to catch up pr_nitems with the low water mark.
 *
 * Note 1, we never wait for memory here, we let the caller decide what to do.
 *
 * Note 2, we must be called with the pool already locked, and we return
 * with it locked.
 */
static int
pool_catchup(struct pool *pp)
{
        int error = 0;

        while (POOL_NEEDS_CATCHUP(pp)) {
                error = pool_grow(pp, PR_NOWAIT);
                if (error) {
                        if (error == ERESTART)
                                continue;
                        break;
                }
        }
        return error;
}

static void
pool_update_curpage(struct pool *pp)
{

        pp->pr_curpage = LIST_FIRST(&pp->pr_partpages);
        if (pp->pr_curpage == NULL) {
                pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages);
        }
        KASSERT((pp->pr_curpage == NULL && pp->pr_nitems == 0) ||
            (pp->pr_curpage != NULL && pp->pr_nitems > 0));
}

void
pool_setlowat(struct pool *pp, int n)
{

        mutex_enter(&pp->pr_lock);
        pp->pr_minitems = n;

        /* Make sure we're caught up with the newly-set low water mark. */
        if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) {
                /*
                 * XXX: Should we log a warning?  Should we set up a timeout
                 * to try again in a second or so?  The latter could break
                 * a caller's assumptions about interrupt protection, etc.
                 */
        }

        mutex_exit(&pp->pr_lock);
}

void
pool_sethiwat(struct pool *pp, int n)
{

        mutex_enter(&pp->pr_lock);

        pp->pr_maxitems = n;

        mutex_exit(&pp->pr_lock);
}

void
pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap)
{

        mutex_enter(&pp->pr_lock);

        pp->pr_hardlimit = n;
        pp->pr_hardlimit_warning = warnmess;
        pp->pr_hardlimit_ratecap.tv_sec = ratecap;
        pp->pr_hardlimit_warning_last.tv_sec = 0;
        pp->pr_hardlimit_warning_last.tv_usec = 0;

        pp->pr_maxpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;

        mutex_exit(&pp->pr_lock);
}

unsigned int
pool_nget(struct pool *pp)
{

        return pp->pr_nget;
}

unsigned int
pool_nput(struct pool *pp)
{

        return pp->pr_nput;
}

/*
 * Release all complete pages that have not been used recently.
 *
 * Must not be called from interrupt context.
 */
int
pool_reclaim(struct pool *pp)
{
        struct pool_item_header *ph, *phnext;
        struct pool_pagelist pq;
        struct pool_cache *pc;
        uint32_t curtime;
        bool klock;
        int rv;

        KASSERT(!cpu_intr_p() && !cpu_softintr_p());

        if (pp->pr_drain_hook != NULL) {
                /*
                 * The drain hook must be called with the pool unlocked.
                 */
                (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT);
        }

        /*
         * XXXSMP Because we do not want to cause non-MPSAFE code
         * to block.
         */
        if (pp->pr_ipl == IPL_SOFTNET || pp->pr_ipl == IPL_SOFTCLOCK ||
            pp->pr_ipl == IPL_SOFTSERIAL) {
                KERNEL_LOCK(1, NULL);
                klock = true;
        } else
                klock = false;

        /* Reclaim items from the pool's cache (if any). */
        if ((pc = atomic_load_consume(&pp->pr_cache)) != NULL)
                pool_cache_invalidate(pc);

        if (mutex_tryenter(&pp->pr_lock) == 0) {
                if (klock) {
                        KERNEL_UNLOCK_ONE(NULL);
                }
                return 0;
        }

        LIST_INIT(&pq);

        curtime = time_uptime;

        for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
                phnext = LIST_NEXT(ph, ph_pagelist);

                /* Check our minimum page claim */
                if (pp->pr_npages <= pp->pr_minpages)
                        break;

                KASSERT(ph->ph_nmissing == 0);
                if (curtime - ph->ph_time < pool_inactive_time)
                        continue;

                /*
                 * If freeing this page would put us below the minimum free items
                 * or the minimum pages, stop now.
                 */
                if (pp->pr_nitems - pp->pr_itemsperpage < pp->pr_minitems ||
                    pp->pr_npages - 1 < pp->pr_minpages)
                        break;

                pr_rmpage(pp, ph, &pq);
        }

        mutex_exit(&pp->pr_lock);

        if (LIST_EMPTY(&pq))
                rv = 0;
        else {
                pr_pagelist_free(pp, &pq);
                rv = 1;
        }

        if (klock) {
                KERNEL_UNLOCK_ONE(NULL);
        }

        return rv;
}

/*
 * Drain pools, one at a time. The drained pool is returned within ppp.
 *
 * Note, must never be called from interrupt context.
 */
bool
pool_drain(struct pool **ppp)
{
        bool reclaimed;
        struct pool *pp;

        KASSERT(!TAILQ_EMPTY(&pool_head));

        pp = NULL;

        /* Find next pool to drain, and add a reference. */
        mutex_enter(&pool_head_lock);
        do {
                if (drainpp == NULL) {
                        drainpp = TAILQ_FIRST(&pool_head);
                }
                if (drainpp != NULL) {
                        pp = drainpp;
                        drainpp = TAILQ_NEXT(pp, pr_poollist);
                }
                /*
                 * Skip completely idle pools.  We depend on at least
                 * one pool in the system being active.
                 */
        } while (pp == NULL || pp->pr_npages == 0);
        pp->pr_refcnt++;
        mutex_exit(&pool_head_lock);

        /* Drain the cache (if any) and pool.. */
        reclaimed = pool_reclaim(pp);

        /* Finally, unlock the pool. */
        mutex_enter(&pool_head_lock);
        pp->pr_refcnt--;
        cv_broadcast(&pool_busy);
        mutex_exit(&pool_head_lock);

        if (ppp != NULL)
                *ppp = pp;

        return reclaimed;
}

/*
 * Calculate the total number of pages consumed by pools.
 */
int
pool_totalpages(void)
{

        mutex_enter(&pool_head_lock);
        int pages = pool_totalpages_locked();
        mutex_exit(&pool_head_lock);

        return pages;
}

int
pool_totalpages_locked(void)
{
        struct pool *pp;
        uint64_t total = 0;

        TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
                uint64_t bytes =
                    (uint64_t)pp->pr_npages * pp->pr_alloc->pa_pagesz;

                if ((pp->pr_roflags & PR_RECURSIVE) != 0)
                        bytes -= ((uint64_t)pp->pr_nout * pp->pr_size);
                total += bytes;
        }

        return atop(total);
}

/*
 * Diagnostic helpers.
 */

void
pool_printall(const char *modif, void (*pr)(const char *, ...))
{
        struct pool *pp;

        TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
                pool_printit(pp, modif, pr);
        }
}

void
pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...))
{

        if (pp == NULL) {
                (*pr)("Must specify a pool to print.\n");
                return;
        }

        pool_print1(pp, modif, pr);
}

static void
pool_print_pagelist(struct pool *pp, struct pool_pagelist *pl,
    void (*pr)(const char *, ...))
{
        struct pool_item_header *ph;

        LIST_FOREACH(ph, pl, ph_pagelist) {
                (*pr)("\t\tpage %p, nmissing %d, time %" PRIu32 "\n",
                    ph->ph_page, ph->ph_nmissing, ph->ph_time);
#ifdef POOL_CHECK_MAGIC
                struct pool_item *pi;
                if (!(pp->pr_roflags & PR_USEBMAP)) {
                        LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) {
                                if (pi->pi_magic != PI_MAGIC) {
                                        (*pr)("\t\t\titem %p, magic 0x%x\n",
                                            pi, pi->pi_magic);
                                }
                        }
                }
#endif
        }
}

static void
pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...))
{
        struct pool_item_header *ph;
        pool_cache_t pc;
        pcg_t *pcg;
        pool_cache_cpu_t *cc;
        uint64_t cpuhit, cpumiss, pchit, pcmiss;
        uint32_t nfull;
        int i;
        bool print_log = false, print_pagelist = false, print_cache = false;
        bool print_short = false, skip_empty = false;
        char c;

        while ((c = *modif++) != '\0') {
                if (c == 'l')
                        print_log = true;
                if (c == 'p')
                        print_pagelist = true;
                if (c == 'c')
                        print_cache = true;
                if (c == 's')
                        print_short = true;
                if (c == 'S')
                        skip_empty = true;
        }

        if (skip_empty && pp->pr_nget == 0)
                return;

        if ((pc = atomic_load_consume(&pp->pr_cache)) != NULL) {
                (*pr)("POOLCACHE");
        } else {
                (*pr)("POOL");
        }

        /* Single line output. */
        if (print_short) {
                (*pr)(" %s:%p:%u:%u:%u:%u:%u:%u:%u:%u:%u:%u\n",
                    pp->pr_wchan, pp, pp->pr_size, pp->pr_align, pp->pr_npages,
                    pp->pr_nitems, pp->pr_nout, pp->pr_nget, pp->pr_nput,
                    pp->pr_npagealloc, pp->pr_npagefree, pp->pr_nidle);
                return;
        }

        (*pr)(" %s: size %u, align %u, ioff %u, roflags 0x%08x\n",
            pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset,
            pp->pr_roflags);
        (*pr)("\tpool %p, alloc %p\n", pp, pp->pr_alloc);
        (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
            pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
        (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
            pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);

        (*pr)("\tnget %lu, nfail %lu, nput %lu\n",
            pp->pr_nget, pp->pr_nfail, pp->pr_nput);
        (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
            pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);

        if (!print_pagelist)
                goto skip_pagelist;

        if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL)
                (*pr)("\n\tempty page list:\n");
        pool_print_pagelist(pp, &pp->pr_emptypages, pr);
        if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL)
                (*pr)("\n\tfull page list:\n");
        pool_print_pagelist(pp, &pp->pr_fullpages, pr);
        if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL)
                (*pr)("\n\tpartial-page list:\n");
        pool_print_pagelist(pp, &pp->pr_partpages, pr);

        if (pp->pr_curpage == NULL)
                (*pr)("\tno current page\n");
        else
                (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);

 skip_pagelist:
        if (print_log)
                goto skip_log;

        (*pr)("\n");

 skip_log:

#define PR_GROUPLIST(pcg)                                                \
        (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail);                \
        for (i = 0; i < pcg->pcg_size; i++) {                                \
                if (pcg->pcg_objects[i].pcgo_pa !=                        \
                    POOL_PADDR_INVALID) {                                \
                        (*pr)("\t\t\t%p, 0x%llx\n",                        \
                            pcg->pcg_objects[i].pcgo_va,                \
                            (unsigned long long)                        \
                            pcg->pcg_objects[i].pcgo_pa);                \
                } else {                                                \
                        (*pr)("\t\t\t%p\n",                                \
                            pcg->pcg_objects[i].pcgo_va);                \
                }                                                        \
        }

        if (pc != NULL) {
                cpuhit = 0;
                cpumiss = 0;
                pcmiss = 0;
                nfull = 0;
                for (i = 0; i < __arraycount(pc->pc_cpus); i++) {
                        if ((cc = pc->pc_cpus[i]) == NULL)
                                continue;
                        cpuhit += cc->cc_hits;
                        cpumiss += cc->cc_misses;
                        pcmiss += cc->cc_pcmisses;
                        nfull += cc->cc_nfull;
                }
                pchit = cpumiss - pcmiss;
                (*pr)("\tcpu layer hits %llu misses %llu\n", cpuhit, cpumiss);
                (*pr)("\tcache layer hits %llu misses %llu\n", pchit, pcmiss);
                (*pr)("\tcache layer full groups %u\n", nfull);
                if (print_cache) {
                        (*pr)("\tfull cache groups:\n");
                        for (pcg = pc->pc_fullgroups; pcg != NULL;
                            pcg = pcg->pcg_next) {
                                PR_GROUPLIST(pcg);
                        }
                }
        }
#undef PR_GROUPLIST
}

static int
pool_chk_page(struct pool *pp, const char *label, struct pool_item_header *ph)
{
        struct pool_item *pi;
        void *page;
        int n;

        if ((pp->pr_roflags & PR_NOALIGN) == 0) {
                page = POOL_OBJ_TO_PAGE(pp, ph);
                if (page != ph->ph_page &&
                    (pp->pr_roflags & PR_PHINPAGE) != 0) {
                        if (label != NULL)
                                printf("%s: ", label);
                        printf("pool(%p:%s): page inconsistency: page %p;"
                               " at page head addr %p (p %p)\n", pp,
                                pp->pr_wchan, ph->ph_page,
                                ph, page);
                        return 1;
                }
        }

        if ((pp->pr_roflags & PR_USEBMAP) != 0)
                return 0;

        for (pi = LIST_FIRST(&ph->ph_itemlist), n = 0;
             pi != NULL;
             pi = LIST_NEXT(pi,pi_list), n++) {

#ifdef POOL_CHECK_MAGIC
                if (pi->pi_magic != PI_MAGIC) {
                        if (label != NULL)
                                printf("%s: ", label);
                        printf("pool(%s): free list modified: magic=%x;"
                               " page %p; item ordinal %d; addr %p\n",
                                pp->pr_wchan, pi->pi_magic, ph->ph_page,
                                n, pi);
                        panic("pool");
                }
#endif
                if ((pp->pr_roflags & PR_NOALIGN) != 0) {
                        continue;
                }
                page = POOL_OBJ_TO_PAGE(pp, pi);
                if (page == ph->ph_page)
                        continue;

                if (label != NULL)
                        printf("%s: ", label);
                printf("pool(%p:%s): page inconsistency: page %p;"
                       " item ordinal %d; addr %p (p %p)\n", pp,
                        pp->pr_wchan, ph->ph_page,
                        n, pi, page);
                return 1;
        }
        return 0;
}


int
pool_chk(struct pool *pp, const char *label)
{
        struct pool_item_header *ph;
        int r = 0;

        mutex_enter(&pp->pr_lock);
        LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) {
                r = pool_chk_page(pp, label, ph);
                if (r) {
                        goto out;
                }
        }
        LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
                r = pool_chk_page(pp, label, ph);
                if (r) {
                        goto out;
                }
        }
        LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
                r = pool_chk_page(pp, label, ph);
                if (r) {
                        goto out;
                }
        }

out:
        mutex_exit(&pp->pr_lock);
        return r;
}

/*
 * pool_cache_init:
 *
 *        Initialize a pool cache.
 */
pool_cache_t
pool_cache_init(size_t size, u_int align, u_int align_offset, u_int flags,
    const char *wchan, struct pool_allocator *palloc, int ipl,
    int (*ctor)(void *, void *, int), void (*dtor)(void *, void *), void *arg)
{
        pool_cache_t pc;

        pc = pool_get(&cache_pool, PR_WAITOK);
        if (pc == NULL)
                return NULL;

        pool_cache_bootstrap(pc, size, align, align_offset, flags, wchan,
           palloc, ipl, ctor, dtor, arg);

        return pc;
}

/*
 * pool_cache_bootstrap:
 *
 *        Kernel-private version of pool_cache_init().  The caller
 *        provides initial storage.
 */
void
pool_cache_bootstrap(pool_cache_t pc, size_t size, u_int align,
    u_int align_offset, u_int flags, const char *wchan,
    struct pool_allocator *palloc, int ipl,
    int (*ctor)(void *, void *, int), void (*dtor)(void *, void *),
    void *arg)
{
        CPU_INFO_ITERATOR cii;
        pool_cache_t pc1;
        struct cpu_info *ci;
        struct pool *pp;
        unsigned int ppflags;

        pp = &pc->pc_pool;
        if (palloc == NULL && ipl == IPL_NONE) {
                if (size > PAGE_SIZE) {
                        int bigidx = pool_bigidx(size);

                        palloc = &pool_allocator_big[bigidx];
                        flags |= PR_NOALIGN;
                } else
                        palloc = &pool_allocator_nointr;
        }

        ppflags = flags;
        if (ctor == NULL) {
                ctor = NO_CTOR;
        }
        if (dtor == NULL) {
                dtor = NO_DTOR;
        } else {
                /*
                 * If we have a destructor, then the pool layer does not
                 * need to worry about PR_PSERIALIZE.
                 */
                ppflags &= ~PR_PSERIALIZE;
        }

        pool_init(pp, size, align, align_offset, ppflags, wchan, palloc, ipl);

        pc->pc_fullgroups = NULL;
        pc->pc_partgroups = NULL;
        pc->pc_ctor = ctor;
        pc->pc_dtor = dtor;
        pc->pc_arg  = arg;
        pc->pc_refcnt = 0;
        pc->pc_roflags = flags;
        pc->pc_freecheck = NULL;

        if ((flags & PR_LARGECACHE) != 0) {
                pc->pc_pcgsize = PCG_NOBJECTS_LARGE;
                pc->pc_pcgpool = &pcg_large_pool;
                pc->pc_pcgcache = &pcg_large_cache;
        } else {
                pc->pc_pcgsize = PCG_NOBJECTS_NORMAL;
                pc->pc_pcgpool = &pcg_normal_pool;
                pc->pc_pcgcache = &pcg_normal_cache;
        }

        /* Allocate per-CPU caches. */
        memset(pc->pc_cpus, 0, sizeof(pc->pc_cpus));
        pc->pc_ncpu = 0;
        if (ncpu < 2) {
                /* XXX For sparc: boot CPU is not attached yet. */
                pool_cache_cpu_init1(curcpu(), pc);
        } else {
                for (CPU_INFO_FOREACH(cii, ci)) {
                        pool_cache_cpu_init1(ci, pc);
                }
        }

        /* Add to list of all pools. */
        if (__predict_true(!cold))
                mutex_enter(&pool_head_lock);
        TAILQ_FOREACH(pc1, &pool_cache_head, pc_cachelist) {
                if (strcmp(pc1->pc_pool.pr_wchan, pc->pc_pool.pr_wchan) > 0)
                        break;
        }
        if (pc1 == NULL)
                TAILQ_INSERT_TAIL(&pool_cache_head, pc, pc_cachelist);
        else
                TAILQ_INSERT_BEFORE(pc1, pc, pc_cachelist);
        if (__predict_true(!cold))
                mutex_exit(&pool_head_lock);

        atomic_store_release(&pp->pr_cache, pc);
}

/*
 * pool_cache_destroy:
 *
 *        Destroy a pool cache.
 */
void
pool_cache_destroy(pool_cache_t pc)
{

        pool_cache_bootstrap_destroy(pc);
        pool_put(&cache_pool, pc);
}

/*
 * pool_cache_bootstrap_destroy:
 *
 *        Destroy a pool cache.
 */
void
pool_cache_bootstrap_destroy(pool_cache_t pc)
{
        struct pool *pp = &pc->pc_pool;
        u_int i;

        /* Remove it from the global list. */
        mutex_enter(&pool_head_lock);
        while (pc->pc_refcnt != 0)
                cv_wait(&pool_busy, &pool_head_lock);
        TAILQ_REMOVE(&pool_cache_head, pc, pc_cachelist);
        mutex_exit(&pool_head_lock);

        /* First, invalidate the entire cache. */
        pool_cache_invalidate(pc);

        /* Disassociate it from the pool. */
        mutex_enter(&pp->pr_lock);
        atomic_store_relaxed(&pp->pr_cache, NULL);
        mutex_exit(&pp->pr_lock);

        /* Destroy per-CPU data */
        for (i = 0; i < __arraycount(pc->pc_cpus); i++)
                pool_cache_invalidate_cpu(pc, i);

        /* Finally, destroy it. */
        pool_destroy(pp);
}

/*
 * pool_cache_cpu_init1:
 *
 *        Called for each pool_cache whenever a new CPU is attached.
 */
static void
pool_cache_cpu_init1(struct cpu_info *ci, pool_cache_t pc)
{
        pool_cache_cpu_t *cc;
        int index;

        index = ci->ci_index;

        KASSERT(index < __arraycount(pc->pc_cpus));

        if ((cc = pc->pc_cpus[index]) != NULL) {
                return;
        }

        /*
         * The first CPU is 'free'.  This needs to be the case for
         * bootstrap - we may not be able to allocate yet.
         */
        if (pc->pc_ncpu == 0) {
                cc = &pc->pc_cpu0;
                pc->pc_ncpu = 1;
        } else {
                pc->pc_ncpu++;
                cc = pool_get(&cache_cpu_pool, PR_WAITOK);
        }

        cc->cc_current = __UNCONST(&pcg_dummy);
        cc->cc_previous = __UNCONST(&pcg_dummy);
        cc->cc_pcgcache = pc->pc_pcgcache;
        cc->cc_hits = 0;
        cc->cc_misses = 0;
        cc->cc_pcmisses = 0;
        cc->cc_contended = 0;
        cc->cc_nfull = 0;
        cc->cc_npart = 0;

        pc->pc_cpus[index] = cc;
}

/*
 * pool_cache_cpu_init:
 *
 *        Called whenever a new CPU is attached.
 */
void
pool_cache_cpu_init(struct cpu_info *ci)
{
        pool_cache_t pc;

        mutex_enter(&pool_head_lock);
        TAILQ_FOREACH(pc, &pool_cache_head, pc_cachelist) {
                pc->pc_refcnt++;
                mutex_exit(&pool_head_lock);

                pool_cache_cpu_init1(ci, pc);

                mutex_enter(&pool_head_lock);
                pc->pc_refcnt--;
                cv_broadcast(&pool_busy);
        }
        mutex_exit(&pool_head_lock);
}

/*
 * pool_cache_reclaim:
 *
 *        Reclaim memory from a pool cache.
 */
bool
pool_cache_reclaim(pool_cache_t pc)
{

        return pool_reclaim(&pc->pc_pool);
}

static inline void
pool_cache_pre_destruct(pool_cache_t pc)
{
        /*
         * Perform a passive serialization barrier before destructing
         * a batch of one or more objects.
         */
        if (__predict_false(pc_has_pser(pc))) {
                pool_barrier();
        }
}

static void
pool_cache_destruct_object1(pool_cache_t pc, void *object)
{
        (*pc->pc_dtor)(pc->pc_arg, object);
        pool_put(&pc->pc_pool, object);
}

/*
 * pool_cache_destruct_object:
 *
 *        Force destruction of an object and its release back into
 *        the pool.
 */
void
pool_cache_destruct_object(pool_cache_t pc, void *object)
{

        FREECHECK_IN(&pc->pc_freecheck, object);

        pool_cache_pre_destruct(pc);
        pool_cache_destruct_object1(pc, object);
}

/*
 * pool_cache_invalidate_groups:
 *
 *        Invalidate a chain of groups and destruct all objects.  Return the
 *        number of groups that were invalidated.
 */
static int
pool_cache_invalidate_groups(pool_cache_t pc, pcg_t *pcg)
{
        void *object;
        pcg_t *next;
        int i, n;

        if (pcg == NULL) {
                return 0;
        }

        pool_cache_pre_destruct(pc);

        for (n = 0; pcg != NULL; pcg = next, n++) {
                next = pcg->pcg_next;

                for (i = 0; i < pcg->pcg_avail; i++) {
                        object = pcg->pcg_objects[i].pcgo_va;
                        pool_cache_destruct_object1(pc, object);
                }

                if (pcg->pcg_size == PCG_NOBJECTS_LARGE) {
                        pool_put(&pcg_large_pool, pcg);
                } else {
                        KASSERT(pcg->pcg_size == PCG_NOBJECTS_NORMAL);
                        pool_put(&pcg_normal_pool, pcg);
                }
        }
        return n;
}

/*
 * pool_cache_invalidate:
 *
 *        Invalidate a pool cache (destruct and release all of the
 *        cached objects).  Does not reclaim objects from the pool.
 *
 *        Note: For pool caches that provide constructed objects, there
 *        is an assumption that another level of synchronization is occurring
 *        between the input to the constructor and the cache invalidation.
 *
 *        Invalidation is a costly process and should not be called from
 *        interrupt context.
 */
void
pool_cache_invalidate(pool_cache_t pc)
{
        uint64_t where;
        pcg_t *pcg;
        int n, s;

        KASSERT(!cpu_intr_p() && !cpu_softintr_p());

        if (ncpu < 2 || !mp_online) {
                /*
                 * We might be called early enough in the boot process
                 * for the CPU data structures to not be fully initialized.
                 * In this case, transfer the content of the local CPU's
                 * cache back into global cache as only this CPU is currently
                 * running.
                 */
                pool_cache_transfer(pc);
        } else {
                /*
                 * Signal all CPUs that they must transfer their local
                 * cache back to the global pool then wait for the xcall to
                 * complete.
                 */
                where = xc_broadcast(0,
                    __FPTRCAST(xcfunc_t, pool_cache_transfer), pc, NULL);
                xc_wait(where);
        }

        /* Now dequeue and invalidate everything. */
        pcg = pool_pcg_trunc(&pcg_normal_cache);
        (void)pool_cache_invalidate_groups(pc, pcg);

        pcg = pool_pcg_trunc(&pcg_large_cache);
        (void)pool_cache_invalidate_groups(pc, pcg);

        pcg = pool_pcg_trunc(&pc->pc_fullgroups);
        n = pool_cache_invalidate_groups(pc, pcg);
        s = splvm();
        ((pool_cache_cpu_t *)pc->pc_cpus[curcpu()->ci_index])->cc_nfull -= n;
        splx(s);

        pcg = pool_pcg_trunc(&pc->pc_partgroups);
        n = pool_cache_invalidate_groups(pc, pcg);
        s = splvm();
        ((pool_cache_cpu_t *)pc->pc_cpus[curcpu()->ci_index])->cc_npart -= n;
        splx(s);
}

/*
 * pool_cache_invalidate_cpu:
 *
 *        Invalidate all CPU-bound cached objects in pool cache, the CPU being
 *        identified by its associated index.
 *        It is caller's responsibility to ensure that no operation is
 *        taking place on this pool cache while doing this invalidation.
 *        WARNING: as no inter-CPU locking is enforced, trying to invalidate
 *        pool cached objects from a CPU different from the one currently running
 *        may result in an undefined behaviour.
 */
static void
pool_cache_invalidate_cpu(pool_cache_t pc, u_int index)
{
        pool_cache_cpu_t *cc;
        pcg_t *pcg;

        if ((cc = pc->pc_cpus[index]) == NULL)
                return;

        if ((pcg = cc->cc_current) != &pcg_dummy) {
                pcg->pcg_next = NULL;
                pool_cache_invalidate_groups(pc, pcg);
        }
        if ((pcg = cc->cc_previous) != &pcg_dummy) {
                pcg->pcg_next = NULL;
                pool_cache_invalidate_groups(pc, pcg);
        }
        if (cc != &pc->pc_cpu0)
                pool_put(&cache_cpu_pool, cc);

}

void
pool_cache_set_drain_hook(pool_cache_t pc, void (*fn)(void *, int), void *arg)
{

        pool_set_drain_hook(&pc->pc_pool, fn, arg);
}

void
pool_cache_setlowat(pool_cache_t pc, int n)
{

        pool_setlowat(&pc->pc_pool, n);
}

void
pool_cache_sethiwat(pool_cache_t pc, int n)
{

        pool_sethiwat(&pc->pc_pool, n);
}

void
pool_cache_sethardlimit(pool_cache_t pc, int n, const char *warnmess, int ratecap)
{

        pool_sethardlimit(&pc->pc_pool, n, warnmess, ratecap);
}

void
pool_cache_prime(pool_cache_t pc, int n)
{

        pool_prime(&pc->pc_pool, n);
}

unsigned int
pool_cache_nget(pool_cache_t pc)
{

        return pool_nget(&pc->pc_pool);
}

unsigned int
pool_cache_nput(pool_cache_t pc)
{

        return pool_nput(&pc->pc_pool);
}

/*
 * pool_pcg_get:
 *
 *        Get a cache group from the specified list.  Return true if
 *        contention was encountered.  Must be called at IPL_VM because
 *        of spin wait vs. kernel_lock.
 */
static int
pool_pcg_get(pcg_t *volatile *head, pcg_t **pcgp)
{
        int count = SPINLOCK_BACKOFF_MIN;
        pcg_t *o, *n;

        for (o = atomic_load_relaxed(head);; o = n) {
                if (__predict_false(o == &pcg_dummy)) {
                        /* Wait for concurrent get to complete. */
                        SPINLOCK_BACKOFF(count);
                        n = atomic_load_relaxed(head);
                        continue;
                }
                if (__predict_false(o == NULL)) {
                        break;
                }
                /* Lock out concurrent get/put. */
                n = atomic_cas_ptr(head, o, __UNCONST(&pcg_dummy));
                if (o == n) {
                        /* Fetch pointer to next item and then unlock. */
#ifndef __HAVE_ATOMIC_AS_MEMBAR
                        membar_datadep_consumer(); /* alpha */
#endif
                        n = atomic_load_relaxed(&o->pcg_next);
                        atomic_store_release(head, n);
                        break;
                }
        }
        *pcgp = o;
        return count != SPINLOCK_BACKOFF_MIN;
}

/*
 * pool_pcg_trunc:
 *
 *        Chop out entire list of pool cache groups.
 */
static pcg_t *
pool_pcg_trunc(pcg_t *volatile *head)
{
        int count = SPINLOCK_BACKOFF_MIN, s;
        pcg_t *o, *n;

        s = splvm();
        for (o = atomic_load_relaxed(head);; o = n) {
                if (__predict_false(o == &pcg_dummy)) {
                        /* Wait for concurrent get to complete. */
                        SPINLOCK_BACKOFF(count);
                        n = atomic_load_relaxed(head);
                        continue;
                }
                n = atomic_cas_ptr(head, o, NULL);
                if (o == n) {
                        splx(s);
#ifndef __HAVE_ATOMIC_AS_MEMBAR
                        membar_datadep_consumer(); /* alpha */
#endif
                        return o;
                }
        }
}

/*
 * pool_pcg_put:
 *
 *        Put a pool cache group to the specified list.  Return true if
 *        contention was encountered.  Must be called at IPL_VM because of
 *        spin wait vs. kernel_lock.
 */
static int
pool_pcg_put(pcg_t *volatile *head, pcg_t *pcg)
{
        int count = SPINLOCK_BACKOFF_MIN;
        pcg_t *o, *n;

        for (o = atomic_load_relaxed(head);; o = n) {
                if (__predict_false(o == &pcg_dummy)) {
                        /* Wait for concurrent get to complete. */
                        SPINLOCK_BACKOFF(count);
                        n = atomic_load_relaxed(head);
                        continue;
                }
                pcg->pcg_next = o;
#ifndef __HAVE_ATOMIC_AS_MEMBAR
                membar_release();
#endif
                n = atomic_cas_ptr(head, o, pcg);
                if (o == n) {
                        return count != SPINLOCK_BACKOFF_MIN;
                }
        }
}

static bool __noinline
pool_cache_get_slow(pool_cache_t pc, pool_cache_cpu_t *cc, int s,
    void **objectp, paddr_t *pap, int flags)
{
        pcg_t *pcg, *cur;
        void *object;

        KASSERT(cc->cc_current->pcg_avail == 0);
        KASSERT(cc->cc_previous->pcg_avail == 0);

        cc->cc_misses++;

        /*
         * If there's a full group, release our empty group back to the
         * cache.  Install the full group as cc_current and return.
         */
        cc->cc_contended += pool_pcg_get(&pc->pc_fullgroups, &pcg);
        if (__predict_true(pcg != NULL)) {
                KASSERT(pcg->pcg_avail == pcg->pcg_size);
                if (__predict_true((cur = cc->cc_current) != &pcg_dummy)) {
                        KASSERT(cur->pcg_avail == 0);
                        (void)pool_pcg_put(cc->cc_pcgcache, cur);
                }
                cc->cc_nfull--;
                cc->cc_current = pcg;
                return true;
        }

        /*
         * Nothing available locally or in cache.  Take the slow
         * path: fetch a new object from the pool and construct
         * it.
         */
        cc->cc_pcmisses++;
        splx(s);

        object = pool_get(&pc->pc_pool, flags);
        *objectp = object;
        if (__predict_false(object == NULL)) {
                KASSERT((flags & (PR_NOWAIT|PR_LIMITFAIL)) != 0);
                return false;
        }

        if (__predict_false((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0)) {
                pool_put(&pc->pc_pool, object);
                *objectp = NULL;
                return false;
        }

        KASSERT((((vaddr_t)object) & (pc->pc_pool.pr_align - 1)) == 0);

        if (pap != NULL) {
#ifdef POOL_VTOPHYS
                *pap = POOL_VTOPHYS(object);
#else
                *pap = POOL_PADDR_INVALID;
#endif
        }

        FREECHECK_OUT(&pc->pc_freecheck, object);
        return false;
}

/*
 * pool_cache_get{,_paddr}:
 *
 *        Get an object from a pool cache (optionally returning
 *        the physical address of the object).
 */
void *
pool_cache_get_paddr(pool_cache_t pc, int flags, paddr_t *pap)
{
        pool_cache_cpu_t *cc;
        pcg_t *pcg;
        void *object;
        int s;

        KASSERT(!(flags & PR_NOWAIT) != !(flags & PR_WAITOK));
        KASSERTMSG((!cpu_intr_p() && !cpu_softintr_p()) ||
            (pc->pc_pool.pr_ipl != IPL_NONE || cold || panicstr != NULL),
            "%s: [%s] is IPL_NONE, but called from interrupt context",
            __func__, pc->pc_pool.pr_wchan);

        if (flags & PR_WAITOK) {
                ASSERT_SLEEPABLE();
        }

        if (flags & PR_NOWAIT) {
                if (fault_inject())
                        return NULL;
        }

        /* Lock out interrupts and disable preemption. */
        s = splvm();
        while (/* CONSTCOND */ true) {
                /* Try and allocate an object from the current group. */
                cc = pc->pc_cpus[curcpu()->ci_index];
                 pcg = cc->cc_current;
                if (__predict_true(pcg->pcg_avail > 0)) {
                        object = pcg->pcg_objects[--pcg->pcg_avail].pcgo_va;
                        if (__predict_false(pap != NULL))
                                *pap = pcg->pcg_objects[pcg->pcg_avail].pcgo_pa;
#if defined(DIAGNOSTIC)
                        pcg->pcg_objects[pcg->pcg_avail].pcgo_va = NULL;
                        KASSERT(pcg->pcg_avail < pcg->pcg_size);
                        KASSERT(object != NULL);
#endif
                        cc->cc_hits++;
                        splx(s);
                        FREECHECK_OUT(&pc->pc_freecheck, object);
                        pool_redzone_fill(&pc->pc_pool, object);
                        pool_cache_get_kmsan(pc, object);
                        return object;
                }

                /*
                 * That failed.  If the previous group isn't empty, swap
                 * it with the current group and allocate from there.
                 */
                pcg = cc->cc_previous;
                if (__predict_true(pcg->pcg_avail > 0)) {
                        cc->cc_previous = cc->cc_current;
                        cc->cc_current = pcg;
                        continue;
                }

                /*
                 * Can't allocate from either group: try the slow path.
                 * If get_slow() allocated an object for us, or if
                 * no more objects are available, it will return false.
                 * Otherwise, we need to retry.
                 */
                if (!pool_cache_get_slow(pc, cc, s, &object, pap, flags)) {
                        if (object != NULL) {
                                kmsan_orig(object, pc->pc_pool.pr_size,
                                    KMSAN_TYPE_POOL, __RET_ADDR);
                        }
                        break;
                }
        }

        /*
         * We would like to KASSERT(object || (flags & PR_NOWAIT)), but
         * pool_cache_get can fail even in the PR_WAITOK case, if the
         * constructor fails.
         */
        return object;
}

static bool __noinline
pool_cache_put_slow(pool_cache_t pc, pool_cache_cpu_t *cc, int s, void *object)
{
        pcg_t *pcg, *cur;

        KASSERT(cc->cc_current->pcg_avail == cc->cc_current->pcg_size);
        KASSERT(cc->cc_previous->pcg_avail == cc->cc_previous->pcg_size);

        cc->cc_misses++;

        /*
         * Try to get an empty group from the cache.  If there are no empty
         * groups in the cache then allocate one.
         */
        (void)pool_pcg_get(cc->cc_pcgcache, &pcg);
        if (__predict_false(pcg == NULL)) {
                if (__predict_true(!pool_cache_disable)) {
                        pcg = pool_get(pc->pc_pcgpool, PR_NOWAIT);
                }
                if (__predict_true(pcg != NULL)) {
                        pcg->pcg_avail = 0;
                        pcg->pcg_size = pc->pc_pcgsize;
                }
        }

        /*
         * If there's a empty group, release our full group back to the
         * cache.  Install the empty group to the local CPU and return.
         */
        if (pcg != NULL) {
                KASSERT(pcg->pcg_avail == 0);
                if (__predict_false(cc->cc_previous == &pcg_dummy)) {
                        cc->cc_previous = pcg;
                } else {
                        cur = cc->cc_current;
                        if (__predict_true(cur != &pcg_dummy)) {
                                KASSERT(cur->pcg_avail == cur->pcg_size);
                                cc->cc_contended +=
                                    pool_pcg_put(&pc->pc_fullgroups, cur);
                                cc->cc_nfull++;
                        }
                        cc->cc_current = pcg;
                }
                return true;
        }

        /*
         * Nothing available locally or in cache, and we didn't
         * allocate an empty group.  Take the slow path and destroy
         * the object here and now.
         */
        cc->cc_pcmisses++;
        splx(s);
        pool_cache_destruct_object(pc, object);

        return false;
}

/*
 * pool_cache_put{,_paddr}:
 *
 *        Put an object back to the pool cache (optionally caching the
 *        physical address of the object).
 */
void
pool_cache_put_paddr(pool_cache_t pc, void *object, paddr_t pa)
{
        pool_cache_cpu_t *cc;
        pcg_t *pcg;
        int s;

        KASSERT(object != NULL);
        pool_cache_put_kmsan(pc, object);
        pool_cache_redzone_check(pc, object);
        FREECHECK_IN(&pc->pc_freecheck, object);

        if (pc->pc_pool.pr_roflags & PR_PHINPAGE) {
                pc_phinpage_check(pc, object);
        }

        if (pool_cache_put_nocache(pc, object)) {
                return;
        }

        /* Lock out interrupts and disable preemption. */
        s = splvm();
        while (/* CONSTCOND */ true) {
                /* If the current group isn't full, release it there. */
                cc = pc->pc_cpus[curcpu()->ci_index];
                 pcg = cc->cc_current;
                if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) {
                        pcg->pcg_objects[pcg->pcg_avail].pcgo_va = object;
                        pcg->pcg_objects[pcg->pcg_avail].pcgo_pa = pa;
                        pcg->pcg_avail++;
                        cc->cc_hits++;
                        splx(s);
                        return;
                }

                /*
                 * That failed.  If the previous group isn't full, swap
                 * it with the current group and try again.
                 */
                pcg = cc->cc_previous;
                if (__predict_true(pcg->pcg_avail < pcg->pcg_size)) {
                        cc->cc_previous = cc->cc_current;
                        cc->cc_current = pcg;
                        continue;
                }

                /*
                 * Can't free to either group: try the slow path.
                 * If put_slow() releases the object for us, it
                 * will return false.  Otherwise we need to retry.
                 */
                if (!pool_cache_put_slow(pc, cc, s, object))
                        break;
        }
}

/*
 * pool_cache_transfer:
 *
 *        Transfer objects from the per-CPU cache to the global cache.
 *        Run within a cross-call thread.
 */
static void
pool_cache_transfer(pool_cache_t pc)
{
        pool_cache_cpu_t *cc;
        pcg_t *prev, *cur;
        int s;

        s = splvm();
        cc = pc->pc_cpus[curcpu()->ci_index];
        cur = cc->cc_current;
        cc->cc_current = __UNCONST(&pcg_dummy);
        prev = cc->cc_previous;
        cc->cc_previous = __UNCONST(&pcg_dummy);
        if (cur != &pcg_dummy) {
                if (cur->pcg_avail == cur->pcg_size) {
                        (void)pool_pcg_put(&pc->pc_fullgroups, cur);
                        cc->cc_nfull++;
                } else if (cur->pcg_avail == 0) {
                        (void)pool_pcg_put(pc->pc_pcgcache, cur);
                } else {
                        (void)pool_pcg_put(&pc->pc_partgroups, cur);
                        cc->cc_npart++;
                }
        }
        if (prev != &pcg_dummy) {
                if (prev->pcg_avail == prev->pcg_size) {
                        (void)pool_pcg_put(&pc->pc_fullgroups, prev);
                        cc->cc_nfull++;
                } else if (prev->pcg_avail == 0) {
                        (void)pool_pcg_put(pc->pc_pcgcache, prev);
                } else {
                        (void)pool_pcg_put(&pc->pc_partgroups, prev);
                        cc->cc_npart++;
                }
        }
        splx(s);
}

static int
pool_bigidx(size_t size)
{
        int i;

        for (i = 0; i < __arraycount(pool_allocator_big); i++) {
                if (1 << (i + POOL_ALLOCATOR_BIG_BASE) >= size)
                        return i;
        }
        panic("pool item size %zu too large, use a custom allocator", size);
}

static void *
pool_allocator_alloc(struct pool *pp, int flags)
{
        struct pool_allocator *pa = pp->pr_alloc;
        void *res;

        res = (*pa->pa_alloc)(pp, flags);
        if (res == NULL && (flags & PR_WAITOK) == 0) {
                /*
                 * We only run the drain hook here if PR_NOWAIT.
                 * In other cases, the hook will be run in
                 * pool_reclaim().
                 */
                if (pp->pr_drain_hook != NULL) {
                        (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags);
                        res = (*pa->pa_alloc)(pp, flags);
                }
        }
        return res;
}

static void
pool_allocator_free(struct pool *pp, void *v)
{
        struct pool_allocator *pa = pp->pr_alloc;

        if (pp->pr_redzone) {
                KASSERT(!pp_has_pser(pp));
                kasan_mark(v, pa->pa_pagesz, pa->pa_pagesz, 0);
        } else if (__predict_false(pp_has_pser(pp))) {
                /*
                 * Perform a passive serialization barrier before freeing
                 * the pool page back to the system.
                 */
                pool_barrier();
        }
        (*pa->pa_free)(pp, v);
}

void *
pool_page_alloc(struct pool *pp, int flags)
{
        const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP;
        vmem_addr_t va;
        int ret;

        ret = uvm_km_kmem_alloc(kmem_va_arena, pp->pr_alloc->pa_pagesz,
            vflags | VM_INSTANTFIT, &va);

        return ret ? NULL : (void *)va;
}

void
pool_page_free(struct pool *pp, void *v)
{

        uvm_km_kmem_free(kmem_va_arena, (vaddr_t)v, pp->pr_alloc->pa_pagesz);
}

static void *
pool_page_alloc_meta(struct pool *pp, int flags)
{
        const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP;
        vmem_addr_t va;
        int ret;

        ret = vmem_alloc(kmem_meta_arena, pp->pr_alloc->pa_pagesz,
            vflags | VM_INSTANTFIT, &va);

        return ret ? NULL : (void *)va;
}

static void
pool_page_free_meta(struct pool *pp, void *v)
{

        vmem_free(kmem_meta_arena, (vmem_addr_t)v, pp->pr_alloc->pa_pagesz);
}

#ifdef KMSAN
static inline void
pool_get_kmsan(struct pool *pp, void *p)
{
        kmsan_orig(p, pp->pr_size, KMSAN_TYPE_POOL, __RET_ADDR);
        kmsan_mark(p, pp->pr_size, KMSAN_STATE_UNINIT);
}

static inline void
pool_put_kmsan(struct pool *pp, void *p)
{
        kmsan_mark(p, pp->pr_size, KMSAN_STATE_INITED);
}

static inline void
pool_cache_get_kmsan(pool_cache_t pc, void *p)
{
        if (__predict_false(pc_has_ctor(pc))) {
                return;
        }
        pool_get_kmsan(&pc->pc_pool, p);
}

static inline void
pool_cache_put_kmsan(pool_cache_t pc, void *p)
{
        pool_put_kmsan(&pc->pc_pool, p);
}
#endif

#ifdef POOL_QUARANTINE
static void
pool_quarantine_init(struct pool *pp)
{
        pp->pr_quar.rotor = 0;
        memset(&pp->pr_quar, 0, sizeof(pp->pr_quar));
}

static void
pool_quarantine_flush(struct pool *pp)
{
        pool_quar_t *quar = &pp->pr_quar;
        struct pool_pagelist pq;
        size_t i;

        LIST_INIT(&pq);

        mutex_enter(&pp->pr_lock);
        for (i = 0; i < POOL_QUARANTINE_DEPTH; i++) {
                if (quar->list[i] == 0)
                        continue;
                pool_do_put(pp, (void *)quar->list[i], &pq);
        }
        mutex_exit(&pp->pr_lock);

        pr_pagelist_free(pp, &pq);
}

static bool
pool_put_quarantine(struct pool *pp, void *v, struct pool_pagelist *pq)
{
        pool_quar_t *quar = &pp->pr_quar;
        uintptr_t old;

        if (pp->pr_roflags & PR_NOTOUCH) {
                return false;
        }

        pool_redzone_check(pp, v);

        old = quar->list[quar->rotor];
        quar->list[quar->rotor] = (uintptr_t)v;
        quar->rotor = (quar->rotor + 1) % POOL_QUARANTINE_DEPTH;
        if (old != 0) {
                pool_do_put(pp, (void *)old, pq);
        }

        return true;
}
#endif

#ifdef POOL_NOCACHE
static bool
pool_cache_put_nocache(pool_cache_t pc, void *p)
{
        pool_cache_destruct_object(pc, p);
        return true;
}
#endif

#ifdef POOL_REDZONE
#if defined(_LP64)
# define PRIME 0x9e37fffffffc0000UL
#else /* defined(_LP64) */
# define PRIME 0x9e3779b1
#endif /* defined(_LP64) */
#define STATIC_BYTE        0xFE
CTASSERT(POOL_REDZONE_SIZE > 1);

#ifndef KASAN
static inline uint8_t
pool_pattern_generate(const void *p)
{
        return (uint8_t)(((uintptr_t)p) * PRIME
           >> ((sizeof(uintptr_t) - sizeof(uint8_t))) * CHAR_BIT);
}
#endif

static void
pool_redzone_init(struct pool *pp, size_t requested_size)
{
        size_t redzsz;
        size_t nsz;

#ifdef KASAN
        redzsz = requested_size;
        kasan_add_redzone(&redzsz);
        redzsz -= requested_size;
#else
        redzsz = POOL_REDZONE_SIZE;
#endif

        if (pp->pr_roflags & PR_NOTOUCH) {
                pp->pr_redzone = false;
                return;
        }

        /*
         * We may have extended the requested size earlier; check if
         * there's naturally space in the padding for a red zone.
         */
        if (pp->pr_size - requested_size >= redzsz) {
                pp->pr_reqsize_with_redzone = requested_size + redzsz;
                pp->pr_redzone = true;
                return;
        }

        /*
         * No space in the natural padding; check if we can extend a
         * bit the size of the pool.
         *
         * Avoid using redzone for allocations half of a page or larger.
         * For pagesize items, we'd waste a whole new page (could be
         * unmapped?), and for half pagesize items, approximately half
         * the space is lost (eg, 4K pages, you get one 2K allocation.)
         */
        nsz = roundup(pp->pr_size + redzsz, pp->pr_align);
        if (nsz <= (pp->pr_alloc->pa_pagesz / 2)) {
                /* Ok, we can */
                pp->pr_size = nsz;
                pp->pr_reqsize_with_redzone = requested_size + redzsz;
                pp->pr_redzone = true;
        } else {
                /* No space for a red zone... snif :'( */
                pp->pr_redzone = false;
                aprint_debug("pool redzone disabled for '%s'\n", pp->pr_wchan);
        }
}

static void
pool_redzone_fill(struct pool *pp, void *p)
{
        if (!pp->pr_redzone)
                return;
        KASSERT(!pp_has_pser(pp));
#ifdef KASAN
        kasan_mark(p, pp->pr_reqsize, pp->pr_reqsize_with_redzone,
            KASAN_POOL_REDZONE);
#else
        uint8_t *cp, pat;
        const uint8_t *ep;

        cp = (uint8_t *)p + pp->pr_reqsize;
        ep = cp + POOL_REDZONE_SIZE;

        /*
         * We really don't want the first byte of the red zone to be '\0';
         * an off-by-one in a string may not be properly detected.
         */
        pat = pool_pattern_generate(cp);
        *cp = (pat == '\0') ? STATIC_BYTE: pat;
        cp++;

        while (cp < ep) {
                *cp = pool_pattern_generate(cp);
                cp++;
        }
#endif
}

static void
pool_redzone_check(struct pool *pp, void *p)
{
        if (!pp->pr_redzone)
                return;
        KASSERT(!pp_has_pser(pp));
#ifdef KASAN
        kasan_mark(p, 0, pp->pr_reqsize_with_redzone, KASAN_POOL_FREED);
#else
        uint8_t *cp, pat, expected;
        const uint8_t *ep;

        cp = (uint8_t *)p + pp->pr_reqsize;
        ep = cp + POOL_REDZONE_SIZE;

        pat = pool_pattern_generate(cp);
        expected = (pat == '\0') ? STATIC_BYTE: pat;
        if (__predict_false(*cp != expected)) {
                panic("%s: [%s] 0x%02x != 0x%02x", __func__,
                    pp->pr_wchan, *cp, expected);
        }
        cp++;

        while (cp < ep) {
                expected = pool_pattern_generate(cp);
                if (__predict_false(*cp != expected)) {
                        panic("%s: [%s] 0x%02x != 0x%02x", __func__,
                            pp->pr_wchan, *cp, expected);
                }
                cp++;
        }
#endif
}

static void
pool_cache_redzone_check(pool_cache_t pc, void *p)
{
#ifdef KASAN
        /*
         * If there is a ctor/dtor, or if the cache objects use
         * passive serialization, leave the data as valid.
         */
        if (__predict_false(pc_has_ctor(pc) || pc_has_dtor(pc) ||
            pc_has_pser(pc))) {
                return;
        }
#endif
        pool_redzone_check(&pc->pc_pool, p);
}

#endif /* POOL_REDZONE */

#if defined(DDB)
static bool
pool_in_page(struct pool *pp, struct pool_item_header *ph, uintptr_t addr)
{

        return (uintptr_t)ph->ph_page <= addr &&
            addr < (uintptr_t)ph->ph_page + pp->pr_alloc->pa_pagesz;
}

static bool
pool_in_item(struct pool *pp, void *item, uintptr_t addr)
{

        return (uintptr_t)item <= addr && addr < (uintptr_t)item + pp->pr_size;
}

static bool
pool_in_cg(struct pool *pp, struct pool_cache_group *pcg, uintptr_t addr)
{
        int i;

        if (pcg == NULL) {
                return false;
        }
        for (i = 0; i < pcg->pcg_avail; i++) {
                if (pool_in_item(pp, pcg->pcg_objects[i].pcgo_va, addr)) {
                        return true;
                }
        }
        return false;
}

static bool
pool_allocated(struct pool *pp, struct pool_item_header *ph, uintptr_t addr)
{

        if ((pp->pr_roflags & PR_USEBMAP) != 0) {
                unsigned int idx = pr_item_bitmap_index(pp, ph, (void *)addr);
                pool_item_bitmap_t *bitmap =
                    ph->ph_bitmap + (idx / BITMAP_SIZE);
                pool_item_bitmap_t mask = 1 << (idx & BITMAP_MASK);

                return (*bitmap & mask) == 0;
        } else {
                struct pool_item *pi;

                LIST_FOREACH(pi, &ph->ph_itemlist, pi_list) {
                        if (pool_in_item(pp, pi, addr)) {
                                return false;
                        }
                }
                return true;
        }
}

void
pool_whatis(uintptr_t addr, void (*pr)(const char *, ...))
{
        struct pool *pp;

        TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
                struct pool_item_header *ph;
                struct pool_cache *pc;
                uintptr_t item;
                bool allocated = true;
                bool incache = false;
                bool incpucache = false;
                char cpucachestr[32];

                if ((pp->pr_roflags & PR_PHINPAGE) != 0) {
                        LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) {
                                if (pool_in_page(pp, ph, addr)) {
                                        goto found;
                                }
                        }
                        LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) {
                                if (pool_in_page(pp, ph, addr)) {
                                        allocated =
                                            pool_allocated(pp, ph, addr);
                                        goto found;
                                }
                        }
                        LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) {
                                if (pool_in_page(pp, ph, addr)) {
                                        allocated = false;
                                        goto found;
                                }
                        }
                        continue;
                } else {
                        ph = pr_find_pagehead_noalign(pp, (void *)addr);
                        if (ph == NULL || !pool_in_page(pp, ph, addr)) {
                                continue;
                        }
                        allocated = pool_allocated(pp, ph, addr);
                }
found:
                if (allocated &&
                    (pc = atomic_load_consume(&pp->pr_cache)) != NULL) {
                        struct pool_cache_group *pcg;
                        int i;

                        for (pcg = pc->pc_fullgroups; pcg != NULL;
                            pcg = pcg->pcg_next) {
                                if (pool_in_cg(pp, pcg, addr)) {
                                        incache = true;
                                        goto print;
                                }
                        }
                        for (i = 0; i < __arraycount(pc->pc_cpus); i++) {
                                pool_cache_cpu_t *cc;

                                if ((cc = pc->pc_cpus[i]) == NULL) {
                                        continue;
                                }
                                if (pool_in_cg(pp, cc->cc_current, addr) ||
                                    pool_in_cg(pp, cc->cc_previous, addr)) {
                                        struct cpu_info *ci =
                                            cpu_lookup(i);

                                        incpucache = true;
                                        snprintf(cpucachestr,
                                            sizeof(cpucachestr),
                                            "cached by CPU %u",
                                            ci->ci_index);
                                        goto print;
                                }
                        }
                }
print:
                item = (uintptr_t)ph->ph_page + ph->ph_off;
                item = item + rounddown(addr - item, pp->pr_size);
                (*pr)("%p is %p+%zu in POOL '%s' (%s)\n",
                    (void *)addr, item, (size_t)(addr - item),
                    pp->pr_wchan,
                    incpucache ? cpucachestr :
                    incache ? "cached" : allocated ? "allocated" : "free");
        }
}
#endif /* defined(DDB) */

static int
pool_sysctl(SYSCTLFN_ARGS)
{
        struct pool_sysctl data;
        struct pool *pp;
        struct pool_cache *pc;
        pool_cache_cpu_t *cc;
        int error;
        size_t i, written;

        if (oldp == NULL) {
                *oldlenp = 0;
                TAILQ_FOREACH(pp, &pool_head, pr_poollist)
                        *oldlenp += sizeof(data);
                return 0;
        }

        memset(&data, 0, sizeof(data));
        error = 0;
        written = 0;
        mutex_enter(&pool_head_lock);
        TAILQ_FOREACH(pp, &pool_head, pr_poollist) {
                if (written + sizeof(data) > *oldlenp)
                        break;
                pp->pr_refcnt++;
                strlcpy(data.pr_wchan, pp->pr_wchan, sizeof(data.pr_wchan));
                data.pr_pagesize = pp->pr_alloc->pa_pagesz;
                data.pr_flags = pp->pr_roflags | pp->pr_flags;
#define COPY(field) data.field = pp->field
                COPY(pr_size);

                COPY(pr_itemsperpage);
                COPY(pr_nitems);
                COPY(pr_nout);
                COPY(pr_hardlimit);
                COPY(pr_npages);
                COPY(pr_minpages);
                COPY(pr_maxpages);

                COPY(pr_nget);
                COPY(pr_nfail);
                COPY(pr_nput);
                COPY(pr_npagealloc);
                COPY(pr_npagefree);
                COPY(pr_hiwat);
                COPY(pr_nidle);
#undef COPY

                data.pr_cache_nmiss_pcpu = 0;
                data.pr_cache_nhit_pcpu = 0;
                data.pr_cache_nmiss_global = 0;
                data.pr_cache_nempty = 0;
                data.pr_cache_ncontended = 0;
                data.pr_cache_npartial = 0;
                if ((pc = atomic_load_consume(&pp->pr_cache)) != NULL) {
                        uint32_t nfull = 0;
                        data.pr_cache_meta_size = pc->pc_pcgsize;
                        for (i = 0; i < pc->pc_ncpu; ++i) {
                                cc = pc->pc_cpus[i];
                                if (cc == NULL)
                                        continue;
                                data.pr_cache_ncontended += cc->cc_contended;
                                data.pr_cache_nmiss_pcpu += cc->cc_misses;
                                data.pr_cache_nhit_pcpu += cc->cc_hits;
                                data.pr_cache_nmiss_global += cc->cc_pcmisses;
                                nfull += cc->cc_nfull; /* 32-bit rollover! */
                                data.pr_cache_npartial += cc->cc_npart;
                        }
                        data.pr_cache_nfull = nfull;
                } else {
                        data.pr_cache_meta_size = 0;
                        data.pr_cache_nfull = 0;
                }
                data.pr_cache_nhit_global = data.pr_cache_nmiss_pcpu -
                    data.pr_cache_nmiss_global;

                if (pp->pr_refcnt == UINT_MAX) /* XXX possible? */
                        continue;
                mutex_exit(&pool_head_lock);
                error = sysctl_copyout(l, &data, oldp, sizeof(data));
                mutex_enter(&pool_head_lock);
                if (--pp->pr_refcnt == 0)
                        cv_broadcast(&pool_busy);
                if (error)
                        break;
                written += sizeof(data);
                oldp = (char *)oldp + sizeof(data);
        }
        mutex_exit(&pool_head_lock);

        *oldlenp = written;
        return error;
}

SYSCTL_SETUP(sysctl_pool_setup, "sysctl kern.pool setup")
{
        const struct sysctlnode *rnode = NULL;

        sysctl_createv(clog, 0, NULL, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "pool",
                       SYSCTL_DESCR("Get pool statistics"),
                       pool_sysctl, 0, NULL, 0,
                       CTL_KERN, CTL_CREATE, CTL_EOL);
}


















































































































    4 




















    6 






    4 











    8 






    5 










    5 
























    8 




















    9 








    8 
    5 

    4 


    7 





    7 









    5 














    5 

    5 


    1 

    5 

    1 

    5 
    8 













    4 



















    2 
    2 

























    2 














    8 







    8 


    6 







    2 













    2 













    3 










   14 




    9 

    8 


















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
/*        $NetBSD: uipc_syscalls_43.c,v 1.51 2019/01/27 02:08:39 pgoyette Exp $        */

/*
 * Copyright (c) 1982, 1986, 1989, 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)uipc_syscalls.c        8.4 (Berkeley) 2/21/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uipc_syscalls_43.c,v 1.51 2019/01/27 02:08:39 pgoyette Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/syslog.h>
#include <sys/unistd.h>
#include <sys/resourcevar.h>
#include <sys/mbuf.h>                /* for MLEN */
#include <sys/protosw.h>

#include <sys/mount.h>
#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>

#include <net/if.h>
#include <net/bpf.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <net/if_gre.h>
#include <net/if_tap.h>
#include <net80211/ieee80211_ioctl.h>
#include <netinet6/in6_var.h>
#include <netinet6/nd6.h>
#include <compat/sys/socket.h>
#include <compat/sys/sockio.h>

#include <compat/common/compat_util.h>
#include <compat/common/compat_mod.h>

#include <uvm/uvm_extern.h>

/*
 * Following 4.3 syscalls were not versioned, even through they should
 * have been:
 * connect(2), bind(2), sendto(2)
 */

static struct syscall_package uipc_syscalls_43_syscalls[] = {
        { SYS_compat_43_oaccept, 0, (sy_call_t *)compat_43_sys_accept },
        { SYS_compat_43_ogetpeername, 0,
            (sy_call_t *)compat_43_sys_getpeername },      
        { SYS_compat_43_ogetsockname, 0,
            (sy_call_t *)compat_43_sys_getsockname },
        { SYS_compat_43_orecv, 0, (sy_call_t *)compat_43_sys_recv },
        { SYS_compat_43_orecvfrom, 0, (sy_call_t *)compat_43_sys_recvfrom },
        { SYS_compat_43_orecvmsg, 0, (sy_call_t *)compat_43_sys_recvmsg },
        { SYS_compat_43_osend, 0, (sy_call_t *)compat_43_sys_send },
        { SYS_compat_43_osendmsg, 0, (sy_call_t *)compat_43_sys_sendmsg },
        { 0, 0, NULL }
};

static int compat_43_sa_put(void *);

int
compat_43_sys_accept(struct lwp *l, const struct compat_43_sys_accept_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) s;
                syscallarg(void *) name;
                syscallarg(int *) anamelen;
        } */
        int error;

        if ((error = sys_accept(l, (const struct sys_accept_args *)uap, retval)) != 0)
                return error;

        if (SCARG(uap, name)
            && (error = compat_43_sa_put(SCARG(uap, name))))
                return (error);

        return 0;
}

int
compat_43_sys_getpeername(struct lwp *l, const struct compat_43_sys_getpeername_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fdes;
                syscallarg(void *) asa;
                syscallarg(int *) alen;
        } */

        int error;

        if ((error = sys_getpeername(l, (const struct sys_getpeername_args *)uap, retval)) != 0)
                return error;

        if ((error = compat_43_sa_put(SCARG(uap, asa))))
                return (error);

        return 0;
}

int
compat_43_sys_getsockname(struct lwp *l, const struct compat_43_sys_getsockname_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fdes;
                syscallarg(void *) asa;
                syscallarg(int *) alen;
        } */
        int error;

        if ((error = sys_getsockname(l, (const struct sys_getsockname_args *)uap, retval)) != 0)
                return error;

        if ((error = compat_43_sa_put(SCARG(uap, asa))))
                return (error);

        return 0;
}

int
compat_43_sys_recv(struct lwp *l, const struct compat_43_sys_recv_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) s;
                syscallarg(void *) buf;
                syscallarg(int) len;
                syscallarg(int) flags;
        } */
        struct sys_recvfrom_args bra;

        SCARG(&bra, s) = SCARG(uap, s);
        SCARG(&bra, buf) = SCARG(uap, buf);
        SCARG(&bra, len) = (size_t) SCARG(uap, len);
        SCARG(&bra, flags) = SCARG(uap, flags);
        SCARG(&bra, from) = NULL;
        SCARG(&bra, fromlenaddr) = NULL;

        return (sys_recvfrom(l, &bra, retval));
}

int
compat_43_sys_recvfrom(struct lwp *l, const struct compat_43_sys_recvfrom_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) s;
                syscallarg(void *) buf;
                syscallarg(size_t) len;
                syscallarg(int) flags;
                syscallarg(void *) from;
                syscallarg(int *) fromlenaddr;
        } */
        int error;

        if ((error = sys_recvfrom(l, (const struct sys_recvfrom_args *)uap, retval)))
                return (error);

        if (SCARG(uap, from) && (error = compat_43_sa_put(SCARG(uap, from))))
                return (error);

        return (0);
}

/*
 * Old recvmsg. Arrange necessary structures, calls generic code and
 * adjusts results accordingly.
 */
int
compat_43_sys_recvmsg(struct lwp *l, const struct compat_43_sys_recvmsg_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) s;
                syscallarg(struct omsghdr *) msg;
                syscallarg(int) flags;
        } */
        struct omsghdr omsg;
        struct msghdr msg;
        struct mbuf *from, *control;
        int error;

        error = copyin(SCARG(uap, msg), &omsg, sizeof (struct omsghdr));
        if (error)
                return (error);

        if (omsg.msg_accrights == NULL)
                omsg.msg_accrightslen = 0;
        /* it was this way in 4.4BSD */
        if (omsg.msg_accrightslen > MLEN)
                return EINVAL;

        msg.msg_name        = omsg.msg_name;
        msg.msg_namelen = omsg.msg_namelen;
        msg.msg_iovlen        = omsg.msg_iovlen;
        msg.msg_iov        = omsg.msg_iov;
        msg.msg_flags        = (SCARG(uap, flags) & MSG_USERFLAGS) | MSG_IOVUSRSPACE;

        error = do_sys_recvmsg(l, SCARG(uap, s), &msg, &from,
            omsg.msg_accrights != NULL ? &control : NULL, retval);
        if (error != 0)
                return error;

        /*
         * If there is any control information and it's SCM_RIGHTS,
         * pass it back to the program.
         * XXX: maybe there can be more than one chunk of control data?
         */
        if (omsg.msg_accrights && control != NULL) {
                struct cmsghdr *cmsg = mtod(control, struct cmsghdr *);

                if (cmsg->cmsg_level == SOL_SOCKET
                    && cmsg->cmsg_type == SCM_RIGHTS
                    && cmsg->cmsg_len < omsg.msg_accrightslen
                    && copyout(CMSG_DATA(cmsg), omsg.msg_accrights,
                            cmsg->cmsg_len) == 0) {
                        omsg.msg_accrightslen = cmsg->cmsg_len;
                        free_control_mbuf(l, control, control->m_next);
                } else {
                        omsg.msg_accrightslen = 0;
                        free_control_mbuf(l, control, control);
                }
        } else
                omsg.msg_accrightslen = 0;

        if (from != NULL)
                /* convert from sockaddr sa_family to osockaddr one here */
                mtod(from, struct osockaddr *)->sa_family =
                                    mtod(from, struct sockaddr *)->sa_family;

        error = copyout_sockname((struct sockaddr *)omsg.msg_name, &omsg.msg_namelen, 0, from);
        if (from != NULL)
                m_free(from);

        if (error != 0)
                 error = copyout(&omsg, SCARG(uap, msg), sizeof(omsg));

        return error;
}

int
compat_43_sys_send(struct lwp *l, const struct compat_43_sys_send_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) s;
                syscallarg(void *) buf;
                syscallarg(int) len;
                syscallarg(int) flags;
        } */
        struct sys_sendto_args bsa;

        SCARG(&bsa, s)                = SCARG(uap, s);
        SCARG(&bsa, buf)        = SCARG(uap, buf);
        SCARG(&bsa, len)        = SCARG(uap, len);
        SCARG(&bsa, flags)        = SCARG(uap, flags);
        SCARG(&bsa, to)                = NULL;
        SCARG(&bsa, tolen)        = 0;

        return (sys_sendto(l, &bsa, retval));
}

int
compat43_set_accrights(struct msghdr *msg, void *accrights, int accrightslen)
{
        struct cmsghdr *cmsg;
        int error;
        struct mbuf *ctl;
        u_int clen;

        if (accrights == NULL || accrightslen == 0) {
                msg->msg_control = NULL;
                msg->msg_controllen = 0;
                return 0;
        }

        clen = CMSG_SPACE(accrightslen);
        /* it was (almost) this way in 4.4BSD */
        if (accrightslen < 0 || clen > MLEN)
                return EINVAL;

        ctl = m_get(M_WAIT, MT_CONTROL);
        ctl->m_len = clen;
        cmsg = mtod(ctl, struct cmsghdr *);
        cmsg->cmsg_len                = CMSG_SPACE(accrightslen);
        cmsg->cmsg_level        = SOL_SOCKET;
        cmsg->cmsg_type         = SCM_RIGHTS;

        error = copyin(accrights, CMSG_DATA(cmsg), accrightslen);
        if (error) {
                m_free(ctl);
                return error;
        }

        msg->msg_control = ctl;
        msg->msg_controllen = clen;
        msg->msg_flags |= MSG_CONTROLMBUF;
        return 0;
}

/*
 * Old sendmsg. Arrange necessary structures, call generic code and
 * adjust the results accordingly for old code.
 */
int
compat_43_sys_sendmsg(struct lwp *l, const struct compat_43_sys_sendmsg_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) s;
                syscallarg(void *) msg;
                syscallarg(int) flags;
        } */
        struct omsghdr omsg;
        struct msghdr msg;
        int error;
        struct mbuf *nam;
        struct osockaddr *osa;
        struct sockaddr *sa;

        error = copyin(SCARG(uap, msg), &omsg, sizeof (struct omsghdr));
        if (error != 0)
                return (error);

        msg.msg_iovlen = omsg.msg_iovlen;
        msg.msg_iov = omsg.msg_iov;

        error = sockargs(&nam, omsg.msg_name, omsg.msg_namelen,
            UIO_USERSPACE, MT_SONAME);
        if (error != 0)
                return (error);

        sa = mtod(nam, struct sockaddr *);
        osa = mtod(nam, struct osockaddr *);
        sa->sa_family = osa->sa_family;
        sa->sa_len = omsg.msg_namelen;

        msg.msg_flags = MSG_IOVUSRSPACE | MSG_NAMEMBUF;

        msg.msg_name = nam;
        msg.msg_namelen = omsg.msg_namelen;
        error = compat43_set_accrights(&msg, omsg.msg_accrights,
            omsg.msg_accrightslen);
        if (error != 0)
                goto bad;

        return do_sys_sendmsg(l, SCARG(uap, s), &msg, SCARG(uap, flags),
            retval);

    bad:
        if (nam != NULL)
                m_free(nam);

        return (error);
}

static int
compat_43_sa_put(void *from)
{
        struct osockaddr *osa = (struct osockaddr *) from;
        struct sockaddr sa;
        struct osockaddr *kosa;
        int error, len;

        /*
         * Only read/write the sockaddr family and length, the rest is
         * not changed.
         */
        len = sizeof(sa.sa_len) + sizeof(sa.sa_family);

        error = copyin((void *) osa, (void *) &sa, len);
        if (error)
                return (error);

        /* Note: we convert from sockaddr sa_family to osockaddr one here */
        kosa = (struct osockaddr *) &sa;
        kosa->sa_family = sa.sa_family;
        error = copyout(kosa, osa, len);
        if (error)
                return (error);

        return (0);
}

int
uipc_syscalls_43_init(void)
{

        return syscall_establish(NULL, uipc_syscalls_43_syscalls);
}

int
uipc_syscalls_43_fini(void)
{

        return syscall_disestablish(NULL, uipc_syscalls_43_syscalls);
}







































































    5 



    2 




    1 

    1 






    1 



    1 






    2 







    1 


























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/*        $NetBSD: rndpseudo_50.c,v 1.7 2020/04/30 03:30:10 riastradh Exp $        */

/*-
 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Michael Graff <explorer@flame.org> and Thor Lancelot Simon.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rndpseudo_50.c,v 1.7 2020/04/30 03:30:10 riastradh Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/file.h>
#include <sys/module_hook.h>
#include <sys/compat_stub.h>

#include <compat/sys/rnd.h>
#include <compat/common/compat_mod.h>

/*
 * Convert from rndsource_t to rndsource50_t, for the results from
 * RNDGETNUM50 and RNDGETNAME50.
 */
static void
rndsource_to_rndsource50(rndsource_t *r, rndsource50_t *r50)
{
        memset(r50, 0, sizeof(*r50));
        strlcpy(r50->name, r->name, sizeof(r50->name));
        r50->total = r->total;
        r50->type = r->type;
        r50->flags = r->flags;
}

/*
 * COMPAT_50 handling for rnd_ioctl.  This is called from rnd_ioctl.
 *
 * It also handles the case of (COMPAT_50 && COMPAT_NETBSD32).
 */
int
compat_50_rnd_ioctl(struct file *fp, u_long cmd, void *addr)
{
        int ret = 0;

        switch (cmd) {

        case RNDGETSRCNUM50:
        {
                rndstat_t rstbuf = {.start = 0};
                rndstat50_t *rst50 = (rndstat50_t *)addr;
                size_t count;

                if (rst50->count > RND_MAXSTATCOUNT50)
                        return EINVAL;

                rstbuf.start = rst50->start;
                rstbuf.count = rst50->count;

                ret = (fp->f_ops->fo_ioctl)(fp, RNDGETSRCNUM, &rstbuf);
                if (ret != 0)
                        return ret;

                for (count = 0; count < rst50->count; count++) {
                        rndsource_to_rndsource50(&rstbuf.source[count],
                            &rst50->source[count]);
                }
                rst50->count = rstbuf.count;

                break;
        }

        case RNDGETSRCNAME50:
        {
                rndstat_name_t rstnmbuf = {.name[0] = 0};
                rndstat_name50_t *rstnm50;
                rstnm50 = (rndstat_name50_t *)addr;

                strlcpy(rstnmbuf.name, rstnm50->name, sizeof(rstnmbuf.name));

                ret = (fp->f_ops->fo_ioctl)(fp, RNDGETSRCNAME, &rstnmbuf);
                if (ret != 0)
                        return ret;

                rndsource_to_rndsource50(&rstnmbuf.source, &rstnm50->source);

                break;
        }

        default:
                return ENOTTY;
        }

        return ret;
}

void
rndpseudo_50_init(void)
{

        MODULE_HOOK_SET(rnd_ioctl_50_hook, compat_50_rnd_ioctl);
}

void
rndpseudo_50_fini(void)
{

        MODULE_HOOK_UNSET(rnd_ioctl_50_hook);
}





















































































   28 

























    2 
    2 
    2 

    2 

    2 














    2 
    2 

    2 
    2 
    2 






































    2 

    2 

    2 


    2 











    1 
    2 



    2 













    2 



    1 
















    2 
    2 
    2 


    2 




    2 



    2 

    2 















    2 

    2 
    2 













    1 
    1 














    2 

    2 




    2 

    2 














    1 







































    6 
    6 




    6 




    6 




    6 
    6 

    6 



    5 
    1 













    4 
    1 







    3 





    3 

    3 
    3 

    3 


    2 


    2 

    1 











































































































































































































































   10 




    6 





































    9 











   10 



    9 






    9 
    1 
    9 








    6 
    6 








    6 
















    6 


    3 
    9 











    6 





















































































































































































































   32 






















   29 

















































   30 








   30 


   31 
   29 


   31 





   30 

   30 
   27 



   28 


   28 

   29 


   29 



   31 











   32 
   29 

    2 





































































































    1 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
/*        $NetBSD: prop_object.c,v 1.35 2022/08/07 23:49:46 riastradh Exp $        */

/*-
 * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "prop_object_impl.h"
#include <prop/prop_object.h>

#ifdef _PROP_NEED_REFCNT_MTX
static pthread_mutex_t _prop_refcnt_mtx = PTHREAD_MUTEX_INITIALIZER;
#endif /* _PROP_NEED_REFCNT_MTX */

#if !defined(_KERNEL) && !defined(_STANDALONE)
#include <sys/mman.h>
#include <sys/stat.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <unistd.h>
#endif

#ifdef _STANDALONE
void *
_prop_standalone_calloc(size_t size)
{
        void *rv;

        rv = alloc(size);
        if (rv != NULL)
                memset(rv, 0, size);

        return (rv);
}

void *
_prop_standalone_realloc(void *v, size_t size)
{
        void *rv;

        rv = alloc(size);
        if (rv != NULL) {
                memcpy(rv, v, size);        /* XXX */
                dealloc(v, 0);                /* XXX */
        }

        return (rv);
}
#endif /* _STANDALONE */

/*
 * _prop_object_init --
 *        Initialize an object.  Called when sub-classes create
 *        an instance.
 */
void
_prop_object_init(struct _prop_object *po, const struct _prop_object_type *pot)
{

        po->po_type = pot;
        po->po_refcnt = 1;
}

/*
 * _prop_object_fini --
 *        Finalize an object.  Called when sub-classes destroy
 *        an instance.
 */
/*ARGSUSED*/
void
_prop_object_fini(struct _prop_object *po _PROP_ARG_UNUSED)
{
        /* Nothing to do, currently. */
}

/*
 * _prop_object_externalize_start_tag --
 *        Append an XML-style start tag to the externalize buffer.
 */
bool
_prop_object_externalize_start_tag(
    struct _prop_object_externalize_context *ctx, const char *tag)
{
        unsigned int i;

        for (i = 0; i < ctx->poec_depth; i++) {
                if (_prop_object_externalize_append_char(ctx, '\t') == false)
                        return (false);
        }
        if (_prop_object_externalize_append_char(ctx, '<') == false ||
            _prop_object_externalize_append_cstring(ctx, tag) == false ||
            _prop_object_externalize_append_char(ctx, '>') == false)
                return (false);

        return (true);
}

/*
 * _prop_object_externalize_end_tag --
 *        Append an XML-style end tag to the externalize buffer.
 */
bool
_prop_object_externalize_end_tag(
    struct _prop_object_externalize_context *ctx, const char *tag)
{

        if (_prop_object_externalize_append_char(ctx, '<') == false ||
            _prop_object_externalize_append_char(ctx, '/') == false ||
            _prop_object_externalize_append_cstring(ctx, tag) == false ||
            _prop_object_externalize_append_char(ctx, '>') == false ||
            _prop_object_externalize_append_char(ctx, '\n') == false)
                return (false);

        return (true);
}

/*
 * _prop_object_externalize_empty_tag --
 *        Append an XML-style empty tag to the externalize buffer.
 */
bool
_prop_object_externalize_empty_tag(
    struct _prop_object_externalize_context *ctx, const char *tag)
{
        unsigned int i;

        for (i = 0; i < ctx->poec_depth; i++) {
                if (_prop_object_externalize_append_char(ctx, '\t') == false)
                        return (false);
        }

        if (_prop_object_externalize_append_char(ctx, '<') == false ||
            _prop_object_externalize_append_cstring(ctx, tag) == false ||
            _prop_object_externalize_append_char(ctx, '/') == false ||
            _prop_object_externalize_append_char(ctx, '>') == false ||
            _prop_object_externalize_append_char(ctx, '\n') == false)
                    return (false);

        return (true);
}

/*
 * _prop_object_externalize_append_cstring --
 *        Append a C string to the externalize buffer.
 */
bool
_prop_object_externalize_append_cstring(
    struct _prop_object_externalize_context *ctx, const char *cp)
{

        while (*cp != '\0') {
                if (_prop_object_externalize_append_char(ctx,
                                                (unsigned char) *cp) == false)
                        return (false);
                cp++;
        }

        return (true);
}

/*
 * _prop_object_externalize_append_encoded_cstring --
 *        Append an encoded C string to the externalize buffer.
 */
bool
_prop_object_externalize_append_encoded_cstring(
    struct _prop_object_externalize_context *ctx, const char *cp)
{

        while (*cp != '\0') {
                switch (*cp) {
                case '<':
                        if (_prop_object_externalize_append_cstring(ctx,
                                        "&lt;") == false)
                                return (false);
                        break;
                case '>':
                        if (_prop_object_externalize_append_cstring(ctx,
                                        "&gt;") == false)
                                return (false);
                        break;
                case '&':
                        if (_prop_object_externalize_append_cstring(ctx,
                                        "&amp;") == false)
                                return (false);
                        break;
                default:
                        if (_prop_object_externalize_append_char(ctx,
                                        (unsigned char) *cp) == false)
                                return (false);
                        break;
                }
                cp++;
        }

        return (true);
}

#define        BUF_EXPAND                256

/*
 * _prop_object_externalize_append_char --
 *        Append a single character to the externalize buffer.
 */
bool
_prop_object_externalize_append_char(
    struct _prop_object_externalize_context *ctx, unsigned char c)
{

        _PROP_ASSERT(ctx->poec_capacity != 0);
        _PROP_ASSERT(ctx->poec_buf != NULL);
        _PROP_ASSERT(ctx->poec_len <= ctx->poec_capacity);

        if (ctx->poec_len == ctx->poec_capacity) {
                char *cp = _PROP_REALLOC(ctx->poec_buf,
                                         ctx->poec_capacity + BUF_EXPAND,
                                         M_TEMP);
                if (cp == NULL)
                        return (false);
                ctx->poec_capacity = ctx->poec_capacity + BUF_EXPAND;
                ctx->poec_buf = cp;
        }

        ctx->poec_buf[ctx->poec_len++] = c;

        return (true);
}

/*
 * _prop_object_externalize_header --
 *        Append the standard XML header to the externalize buffer.
 */
bool
_prop_object_externalize_header(struct _prop_object_externalize_context *ctx)
{
        static const char _plist_xml_header[] =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<!DOCTYPE plist PUBLIC \"-//Apple Computer//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">\n";

        if (_prop_object_externalize_append_cstring(ctx,
                                                 _plist_xml_header) == false ||
            _prop_object_externalize_start_tag(ctx,
                                       "plist version=\"1.0\"") == false ||
            _prop_object_externalize_append_char(ctx, '\n') == false)
                return (false);

        return (true);
}

/*
 * _prop_object_externalize_footer --
 *        Append the standard XML footer to the externalize buffer.  This
 *        also NUL-terminates the buffer.
 */
bool
_prop_object_externalize_footer(struct _prop_object_externalize_context *ctx)
{

        if (_prop_object_externalize_end_tag(ctx, "plist") == false ||
            _prop_object_externalize_append_char(ctx, '\0') == false)
                return (false);

        return (true);
}

/*
 * _prop_object_externalize_context_alloc --
 *        Allocate an externalize context.
 */
struct _prop_object_externalize_context *
_prop_object_externalize_context_alloc(void)
{
        struct _prop_object_externalize_context *ctx;

        ctx = _PROP_MALLOC(sizeof(*ctx), M_TEMP);
        if (ctx != NULL) {
                ctx->poec_buf = _PROP_MALLOC(BUF_EXPAND, M_TEMP);
                if (ctx->poec_buf == NULL) {
                        _PROP_FREE(ctx, M_TEMP);
                        return (NULL);
                }
                ctx->poec_len = 0;
                ctx->poec_capacity = BUF_EXPAND;
                ctx->poec_depth = 0;
        }
        return (ctx);
}

/*
 * _prop_object_externalize_context_free --
 *        Free an externalize context.
 */
void
_prop_object_externalize_context_free(
                struct _prop_object_externalize_context *ctx)
{

        /* Buffer is always freed by the caller. */
        _PROP_FREE(ctx, M_TEMP);
}

/*
 * _prop_object_internalize_skip_comment --
 *        Skip the body and end tag of a comment.
 */
static bool
_prop_object_internalize_skip_comment(
                                struct _prop_object_internalize_context *ctx)
{
        const char *cp = ctx->poic_cp;

        while (!_PROP_EOF(*cp)) {
                if (cp[0] == '-' &&
                    cp[1] == '-' &&
                    cp[2] == '>') {
                        ctx->poic_cp = cp + 3;
                        return (true);
                }
                cp++;
        }

        return (false);                /* ran out of buffer */
}

/*
 * _prop_object_internalize_find_tag --
 *        Find the next tag in an XML stream.  Optionally compare the found
 *        tag to an expected tag name.  State of the context is undefined
 *        if this routine returns false.  Upon success, the context points
 *        to the first octet after the tag.
 */
bool
_prop_object_internalize_find_tag(struct _prop_object_internalize_context *ctx,
                      const char *tag, _prop_tag_type_t type)
{
        const char *cp;
        size_t taglen;

        if (tag != NULL)
                taglen = strlen(tag);
        else
                taglen = 0;

 start_over:
        cp = ctx->poic_cp;

        /*
         * Find the start of the tag.
         */
        while (_PROP_ISSPACE(*cp))
                cp++;
        if (_PROP_EOF(*cp))
                return (false);

        if (*cp != '<')
                return (false);

        ctx->poic_tag_start = cp++;
        if (_PROP_EOF(*cp))
                return (false);

        if (*cp == '!') {
                if (cp[1] != '-' || cp[2] != '-')
                        return (false);
                /*
                 * Comment block -- only allowed if we are allowed to
                 * return a start tag.
                 */
                if (type == _PROP_TAG_TYPE_END)
                        return (false);
                ctx->poic_cp = cp + 3;
                if (_prop_object_internalize_skip_comment(ctx) == false)
                        return (false);
                goto start_over;
        }

        if (*cp == '/') {
                if (type != _PROP_TAG_TYPE_END &&
                    type != _PROP_TAG_TYPE_EITHER)
                        return (false);
                cp++;
                if (_PROP_EOF(*cp))
                        return (false);
                ctx->poic_tag_type = _PROP_TAG_TYPE_END;
        } else {
                if (type != _PROP_TAG_TYPE_START &&
                    type != _PROP_TAG_TYPE_EITHER)
                        return (false);
                ctx->poic_tag_type = _PROP_TAG_TYPE_START;
        }

        ctx->poic_tagname = cp;

        while (!_PROP_ISSPACE(*cp) && *cp != '/' && *cp != '>') {
                if (_PROP_EOF(*cp))
                        return (false);
                cp++;
        }

        ctx->poic_tagname_len = cp - ctx->poic_tagname;

        /* Make sure this is the tag we're looking for. */
        if (tag != NULL &&
            (taglen != ctx->poic_tagname_len ||
             memcmp(tag, ctx->poic_tagname, taglen) != 0))
                return (false);

        /* Check for empty tag. */
        if (*cp == '/') {
                if (ctx->poic_tag_type != _PROP_TAG_TYPE_START)
                        return(false);                /* only valid on start tags */
                ctx->poic_is_empty_element = true;
                cp++;
                if (_PROP_EOF(*cp) || *cp != '>')
                        return (false);
        } else
                ctx->poic_is_empty_element = false;

        /* Easy case of no arguments. */
        if (*cp == '>') {
                ctx->poic_tagattr = NULL;
                ctx->poic_tagattr_len = 0;
                ctx->poic_tagattrval = NULL;
                ctx->poic_tagattrval_len = 0;
                ctx->poic_cp = cp + 1;
                return (true);
        }

        _PROP_ASSERT(!_PROP_EOF(*cp));
        cp++;
        if (_PROP_EOF(*cp))
                return (false);

        while (_PROP_ISSPACE(*cp))
                cp++;
        if (_PROP_EOF(*cp))
                return (false);

        ctx->poic_tagattr = cp;

        while (!_PROP_ISSPACE(*cp) && *cp != '=') {
                if (_PROP_EOF(*cp))
                        return (false);
                cp++;
        }

        ctx->poic_tagattr_len = cp - ctx->poic_tagattr;

        cp++;
        if (*cp != '\"')
                return (false);
        cp++;
        if (_PROP_EOF(*cp))
                return (false);

        ctx->poic_tagattrval = cp;
        while (*cp != '\"') {
                if (_PROP_EOF(*cp))
                        return (false);
                cp++;
        }
        ctx->poic_tagattrval_len = cp - ctx->poic_tagattrval;

        cp++;
        if (*cp != '>')
                return (false);

        ctx->poic_cp = cp + 1;
        return (true);
}

/*
 * _prop_object_internalize_decode_string --
 *        Decode an encoded string.
 */
bool
_prop_object_internalize_decode_string(
                                struct _prop_object_internalize_context *ctx,
                                char *target, size_t targsize, size_t *sizep,
                                const char **cpp)
{
        const char *src;
        size_t tarindex;
        char c;

        tarindex = 0;
        src = ctx->poic_cp;

        for (;;) {
                if (_PROP_EOF(*src))
                        return (false);
                if (*src == '<') {
                        break;
                }

                if ((c = *src) == '&') {
                        if (src[1] == 'a' &&
                            src[2] == 'm' &&
                            src[3] == 'p' &&
                            src[4] == ';') {
                                    c = '&';
                                src += 5;
                        } else if (src[1] == 'l' &&
                                   src[2] == 't' &&
                                   src[3] == ';') {
                                c = '<';
                                src += 4;
                        } else if (src[1] == 'g' &&
                                   src[2] == 't' &&
                                   src[3] == ';') {
                                c = '>';
                                src += 4;
                        } else if (src[1] == 'a' &&
                                   src[2] == 'p' &&
                                   src[3] == 'o' &&
                                   src[4] == 's' &&
                                   src[5] == ';') {
                                c = '\'';
                                src += 6;
                        } else if (src[1] == 'q' &&
                                   src[2] == 'u' &&
                                   src[3] == 'o' &&
                                   src[4] == 't' &&
                                   src[5] == ';') {
                                c = '\"';
                                src += 6;
                        } else
                                return (false);
                } else
                        src++;
                if (target) {
                        if (tarindex >= targsize)
                                return (false);
                        target[tarindex] = c;
                }
                tarindex++;
        }

        _PROP_ASSERT(*src == '<');
        if (sizep != NULL)
                *sizep = tarindex;
        if (cpp != NULL)
                *cpp = src;

        return (true);
}

/*
 * _prop_object_internalize_match --
 *        Returns true if the two character streams match.
 */
bool
_prop_object_internalize_match(const char *str1, size_t len1,
                               const char *str2, size_t len2)
{

        return (len1 == len2 && memcmp(str1, str2, len1) == 0);
}

#define        INTERNALIZER(t, f)                        \
{        t,        sizeof(t) - 1,                f        }

static const struct _prop_object_internalizer {
        const char                        *poi_tag;
        size_t                                poi_taglen;
        prop_object_internalizer_t        poi_intern;
} _prop_object_internalizer_table[] = {
        INTERNALIZER("array", _prop_array_internalize),

        INTERNALIZER("true", _prop_bool_internalize),
        INTERNALIZER("false", _prop_bool_internalize),

        INTERNALIZER("data", _prop_data_internalize),

        INTERNALIZER("dict", _prop_dictionary_internalize),

        INTERNALIZER("integer", _prop_number_internalize),

        INTERNALIZER("string", _prop_string_internalize),

        { 0, 0, NULL }
};

#undef INTERNALIZER

/*
 * _prop_object_internalize_by_tag --
 *        Determine the object type from the tag in the context and
 *        internalize it.
 */
prop_object_t
_prop_object_internalize_by_tag(struct _prop_object_internalize_context *ctx)
{
        const struct _prop_object_internalizer *poi;
        prop_object_t obj, parent_obj;
        void *data, *iter;
        prop_object_internalizer_continue_t iter_func;
        struct _prop_stack stack;

        _prop_stack_init(&stack);

match_start:
        for (poi = _prop_object_internalizer_table;
             poi->poi_tag != NULL; poi++) {
                if (_prop_object_internalize_match(ctx->poic_tagname,
                                                   ctx->poic_tagname_len,
                                                   poi->poi_tag,
                                                   poi->poi_taglen))
                        break;
        }
        if ((poi == NULL) || (poi->poi_tag == NULL)) {
                while (_prop_stack_pop(&stack, &obj, &iter, &data, NULL)) {
                        iter_func = (prop_object_internalizer_continue_t)iter;
                        (*iter_func)(&stack, &obj, ctx, data, NULL);
                }

                return (NULL);
        }

        obj = NULL;
        if (!(*poi->poi_intern)(&stack, &obj, ctx))
                goto match_start;

        parent_obj = obj;
        while (_prop_stack_pop(&stack, &parent_obj, &iter, &data, NULL)) {
                iter_func = (prop_object_internalizer_continue_t)iter;
                if (!(*iter_func)(&stack, &parent_obj, ctx, data, obj))
                        goto match_start;
                obj = parent_obj;
        }

        return (parent_obj);
}

prop_object_t
_prop_generic_internalize(const char *xml, const char *master_tag)
{
        prop_object_t obj = NULL;
        struct _prop_object_internalize_context *ctx;

        ctx = _prop_object_internalize_context_alloc(xml);
        if (ctx == NULL)
                return (NULL);

        /* We start with a <plist> tag. */
        if (_prop_object_internalize_find_tag(ctx, "plist",
                                              _PROP_TAG_TYPE_START) == false)
                goto out;

        /* Plist elements cannot be empty. */
        if (ctx->poic_is_empty_element)
                goto out;

        /*
         * We don't understand any plist attributes, but Apple XML
         * property lists often have a "version" attribute.  If we
         * see that one, we simply ignore it.
         */
        if (ctx->poic_tagattr != NULL &&
            !_PROP_TAGATTR_MATCH(ctx, "version"))
                goto out;

        /* Next we expect to see opening master_tag. */
        if (_prop_object_internalize_find_tag(ctx, master_tag,
                                              _PROP_TAG_TYPE_START) == false)
                goto out;

        obj = _prop_object_internalize_by_tag(ctx);
        if (obj == NULL)
                goto out;

        /*
         * We've advanced past the closing master_tag.
         * Now we want </plist>.
         */
        if (_prop_object_internalize_find_tag(ctx, "plist",
                                              _PROP_TAG_TYPE_END) == false) {
                prop_object_release(obj);
                obj = NULL;
        }

 out:
         _prop_object_internalize_context_free(ctx);
        return (obj);
}

/*
 * _prop_object_internalize_context_alloc --
 *        Allocate an internalize context.
 */
struct _prop_object_internalize_context *
_prop_object_internalize_context_alloc(const char *xml)
{
        struct _prop_object_internalize_context *ctx;

        ctx = _PROP_MALLOC(sizeof(*ctx), M_TEMP);
        if (ctx == NULL)
                return (NULL);

        ctx->poic_xml = ctx->poic_cp = xml;

        /*
         * Skip any whitespace and XML preamble stuff that we don't
         * know about / care about.
         */
        for (;;) {
                while (_PROP_ISSPACE(*xml))
                        xml++;
                if (_PROP_EOF(*xml) || *xml != '<')
                        goto bad;

#define        MATCH(str)        (strncmp(&xml[1], str, strlen(str)) == 0)

                /*
                 * Skip over the XML preamble that Apple XML property
                 * lists usually include at the top of the file.
                 */
                if (MATCH("?xml ") ||
                    MATCH("!DOCTYPE plist")) {
                        while (*xml != '>' && !_PROP_EOF(*xml))
                                xml++;
                        if (_PROP_EOF(*xml))
                                goto bad;
                        xml++;        /* advance past the '>' */
                        continue;
                }

                if (MATCH("<!--")) {
                        ctx->poic_cp = xml + 4;
                        if (_prop_object_internalize_skip_comment(ctx) == false)
                                goto bad;
                        xml = ctx->poic_cp;
                        continue;
                }

#undef MATCH

                /*
                 * We don't think we should skip it, so let's hope we can
                 * parse it.
                 */
                break;
        }

        ctx->poic_cp = xml;
        return (ctx);
 bad:
        _PROP_FREE(ctx, M_TEMP);
        return (NULL);
}

/*
 * _prop_object_internalize_context_free --
 *        Free an internalize context.
 */
void
_prop_object_internalize_context_free(
                struct _prop_object_internalize_context *ctx)
{

        _PROP_FREE(ctx, M_TEMP);
}

#if !defined(_KERNEL) && !defined(_STANDALONE)
/*
 * _prop_object_externalize_file_dirname --
 *        dirname(3), basically.  We have to roll our own because the
 *        system dirname(3) isn't reentrant.
 */
static void
_prop_object_externalize_file_dirname(const char *path, char *result)
{
        const char *lastp;
        size_t len;

        /*
         * If `path' is a NULL pointer or points to an empty string,
         * return ".".
         */
        if (path == NULL || *path == '\0')
                goto singledot;

        /* String trailing slashes, if any. */
        lastp = path + strlen(path) - 1;
        while (lastp != path && *lastp == '/')
                lastp--;

        /* Terminate path at the last occurrence of '/'. */
        do {
                if (*lastp == '/') {
                        /* Strip trailing slashes, if any. */
                        while (lastp != path && *lastp == '/')
                                lastp--;

                        /* ...and copy the result into the result buffer. */
                        len = (lastp - path) + 1 /* last char */;
                        if (len > (PATH_MAX - 1))
                                len = PATH_MAX - 1;

                        memcpy(result, path, len);
                        result[len] = '\0';
                        return;
                }
        } while (--lastp >= path);

         /* No /'s found, return ".". */
 singledot:
        strcpy(result, ".");
}

/*
 * _prop_object_externalize_write_file --
 *        Write an externalized dictionary to the specified file.
 *        The file is written atomically from the caller's perspective,
 *        and the mode set to 0666 modified by the caller's umask.
 */
bool
_prop_object_externalize_write_file(const char *fname, const char *xml,
    size_t len)
{
        char tname[PATH_MAX];
        int fd;
        int save_errno;
        mode_t myumask;

        if (len > SSIZE_MAX) {
                errno = EFBIG;
                return (false);
        }

        /*
         * Get the directory name where the file is to be written
         * and create the temporary file.
         */
        _prop_object_externalize_file_dirname(fname, tname);
#define PLISTTMP "/.plistXXXXXX"
        if (strlen(tname) + strlen(PLISTTMP) >= sizeof(tname)) {
                errno = ENAMETOOLONG;
                return (false);
        }
        strcat(tname, PLISTTMP);
#undef PLISTTMP

        if ((fd = mkstemp(tname)) == -1)
                return (false);

        if (write(fd, xml, len) != (ssize_t)len)
                goto bad;

        if (fsync(fd) == -1)
                goto bad;

        myumask = umask(0);
        (void)umask(myumask);
        if (fchmod(fd, 0666 & ~myumask) == -1)
                goto bad;

        (void) close(fd);
        fd = -1;

        if (rename(tname, fname) == -1)
                goto bad;

        return (true);

 bad:
        save_errno = errno;
        if (fd != -1)
                (void) close(fd);
        (void) unlink(tname);
        errno = save_errno;
        return (false);
}

/*
 * _prop_object_internalize_map_file --
 *        Map a file for the purpose of internalizing it.
 */
struct _prop_object_internalize_mapped_file *
_prop_object_internalize_map_file(const char *fname)
{
        struct stat sb;
        struct _prop_object_internalize_mapped_file *mf;
        size_t pgsize = (size_t)sysconf(_SC_PAGESIZE);
        size_t pgmask = pgsize - 1;
        bool need_guard = false;
        int fd;

        mf = _PROP_MALLOC(sizeof(*mf), M_TEMP);
        if (mf == NULL)
                return (NULL);

        fd = open(fname, O_RDONLY, 0400);
        if (fd == -1) {
                _PROP_FREE(mf, M_TEMP);
                return (NULL);
        }

        if (fstat(fd, &sb) == -1) {
                (void) close(fd);
                _PROP_FREE(mf, M_TEMP);
                return (NULL);
        }
        mf->poimf_mapsize = ((size_t)sb.st_size + pgmask) & ~pgmask;
        if (mf->poimf_mapsize < (size_t)sb.st_size) {
                (void) close(fd);
                _PROP_FREE(mf, M_TEMP);
                return (NULL);
        }

        /*
         * If the file length is an integral number of pages, then we
         * need to map a guard page at the end in order to provide the
         * necessary NUL-termination of the buffer.
         */
        if ((sb.st_size & pgmask) == 0)
                need_guard = true;

        mf->poimf_xml = mmap(NULL, need_guard ? mf->poimf_mapsize + pgsize
                                                  : mf->poimf_mapsize,
                            PROT_READ, MAP_FILE|MAP_SHARED, fd, (off_t)0);
        (void) close(fd);
        if (mf->poimf_xml == MAP_FAILED) {
                _PROP_FREE(mf, M_TEMP);
                return (NULL);
        }
#ifdef POSIX_MADV_SEQUENTIAL
        (void) posix_madvise(mf->poimf_xml, mf->poimf_mapsize,
            POSIX_MADV_SEQUENTIAL);
#endif

        if (need_guard) {
                if (mmap(mf->poimf_xml + mf->poimf_mapsize,
                         pgsize, PROT_READ,
                         MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1,
                         (off_t)0) == MAP_FAILED) {
                        (void) munmap(mf->poimf_xml, mf->poimf_mapsize);
                        _PROP_FREE(mf, M_TEMP);
                        return (NULL);
                }
                mf->poimf_mapsize += pgsize;
        }

        return (mf);
}

/*
 * _prop_object_internalize_unmap_file --
 *        Unmap a file previously mapped for internalizing.
 */
void
_prop_object_internalize_unmap_file(
    struct _prop_object_internalize_mapped_file *mf)
{

#ifdef POSIX_MADV_DONTNEED
        (void) posix_madvise(mf->poimf_xml, mf->poimf_mapsize,
            POSIX_MADV_DONTNEED);
#endif
        (void) munmap(mf->poimf_xml, mf->poimf_mapsize);
        _PROP_FREE(mf, M_TEMP);
}
#endif /* !_KERNEL && !_STANDALONE */

/*
 * prop_object_retain --
 *        Increment the reference count on an object.
 */
void
prop_object_retain(prop_object_t obj)
{
        struct _prop_object *po = obj;
        uint32_t ncnt __unused;

        _PROP_ATOMIC_INC32_NV(&po->po_refcnt, ncnt);
        _PROP_ASSERT(ncnt != 0);
}

/*
 * prop_object_release_emergency
 *        A direct free with prop_object_release failed.
 *        Walk down the tree until a leaf is found and
 *        free that. Do not recurse to avoid stack overflows.
 *
 *        This is a slow edge condition, but necessary to
 *        guarantee that an object can always be freed.
 */
static void
prop_object_release_emergency(prop_object_t obj)
{
        struct _prop_object *po;
        void (*unlock)(void);
        prop_object_t parent = NULL;
        uint32_t ocnt;

        for (;;) {
                po = obj;
                _PROP_ASSERT(obj);

                if (po->po_type->pot_lock != NULL)
                po->po_type->pot_lock();

                /* Save pointerto unlock function */
                unlock = po->po_type->pot_unlock;

                /* Dance a bit to make sure we always get the non-racy ocnt */
                _PROP_ATOMIC_DEC32_NV(&po->po_refcnt, ocnt);
                ocnt++;
                _PROP_ASSERT(ocnt != 0);

                if (ocnt != 1) {
                        if (unlock != NULL)
                                unlock();
                        break;
                }

                _PROP_ASSERT(po->po_type);
                if ((po->po_type->pot_free)(NULL, &obj) ==
                    _PROP_OBJECT_FREE_DONE) {
                        if (unlock != NULL)
                                unlock();
                        break;
                }

                if (unlock != NULL)
                        unlock();

                parent = po;
                _PROP_ATOMIC_INC32(&po->po_refcnt);
        }
        _PROP_ASSERT(parent);
        /* One object was just freed. */
        po = parent;
        (*po->po_type->pot_emergency_free)(parent);
}

/*
 * prop_object_release --
 *        Decrement the reference count on an object.
 *
 *        Free the object if we are releasing the final
 *        reference.
 */
void
prop_object_release(prop_object_t obj)
{
        struct _prop_object *po;
        struct _prop_stack stack;
        void (*unlock)(void);
        int ret;
        uint32_t ocnt;

        _prop_stack_init(&stack);

        do {
                do {
                        po = obj;
                        _PROP_ASSERT(obj);

                        if (po->po_type->pot_lock != NULL)
                                po->po_type->pot_lock();

                        /* Save pointer to object unlock function */
                        unlock = po->po_type->pot_unlock;

                        _PROP_ATOMIC_DEC32_NV(&po->po_refcnt, ocnt);
                        ocnt++;
                        _PROP_ASSERT(ocnt != 0);

                        if (ocnt != 1) {
                                ret = 0;
                                if (unlock != NULL)
                                        unlock();
                                break;
                        }

                        ret = (po->po_type->pot_free)(&stack, &obj);

                        if (unlock != NULL)
                                unlock();

                        if (ret == _PROP_OBJECT_FREE_DONE)
                                break;

                        _PROP_ATOMIC_INC32(&po->po_refcnt);
                } while (ret == _PROP_OBJECT_FREE_RECURSE);
                if (ret == _PROP_OBJECT_FREE_FAILED)
                        prop_object_release_emergency(obj);
        } while (_prop_stack_pop(&stack, &obj, NULL, NULL, NULL));
}

/*
 * prop_object_type --
 *        Return the type of an object.
 */
prop_type_t
prop_object_type(prop_object_t obj)
{
        struct _prop_object *po = obj;

        if (obj == NULL)
                return (PROP_TYPE_UNKNOWN);

        return (po->po_type->pot_type);
}

/*
 * prop_object_equals --
 *        Returns true if thw two objects are equivalent.
 */
bool
prop_object_equals(prop_object_t obj1, prop_object_t obj2)
{
        return (prop_object_equals_with_error(obj1, obj2, NULL));
}

bool
prop_object_equals_with_error(prop_object_t obj1, prop_object_t obj2,
    bool *error_flag)
{
        struct _prop_object *po1;
        struct _prop_object *po2;
        void *stored_pointer1, *stored_pointer2;
        prop_object_t next_obj1, next_obj2;
        struct _prop_stack stack;
        _prop_object_equals_rv_t ret;

        _prop_stack_init(&stack);
        if (error_flag)
                *error_flag = false;

 start_subtree:
        stored_pointer1 = NULL;
        stored_pointer2 = NULL;
        po1 = obj1;
        po2 = obj2;

        if (po1->po_type != po2->po_type)
                return (false);

 continue_subtree:
        ret = (*po1->po_type->pot_equals)(obj1, obj2,
                                          &stored_pointer1, &stored_pointer2,
                                          &next_obj1, &next_obj2);
        if (ret == _PROP_OBJECT_EQUALS_FALSE)
                goto finish;
        if (ret == _PROP_OBJECT_EQUALS_TRUE) {
                if (!_prop_stack_pop(&stack, &obj1, &obj2,
                                     &stored_pointer1, &stored_pointer2))
                        return true;
                po1 = obj1;
                po2 = obj2;
                goto continue_subtree;
        }
        _PROP_ASSERT(ret == _PROP_OBJECT_EQUALS_RECURSE);

        if (!_prop_stack_push(&stack, obj1, obj2,
                              stored_pointer1, stored_pointer2)) {
                if (error_flag)
                        *error_flag = true;
                goto finish;
        }
        obj1 = next_obj1;
        obj2 = next_obj2;
        goto start_subtree;

finish:
        while (_prop_stack_pop(&stack, &obj1, &obj2, NULL, NULL)) {
                po1 = obj1;
                (*po1->po_type->pot_equals_finish)(obj1, obj2);
        }
        return (false);
}

/*
 * prop_object_iterator_next --
 *        Return the next item during an iteration.
 */
prop_object_t
prop_object_iterator_next(prop_object_iterator_t pi)
{

        return ((*pi->pi_next_object)(pi));
}

/*
 * prop_object_iterator_reset --
 *        Reset the iterator to the first object so as to restart
 *        iteration.
 */
void
prop_object_iterator_reset(prop_object_iterator_t pi)
{

        (*pi->pi_reset)(pi);
}

/*
 * prop_object_iterator_release --
 *        Release the object iterator.
 */
void
prop_object_iterator_release(prop_object_iterator_t pi)
{

        prop_object_release(pi->pi_obj);
        _PROP_FREE(pi, M_TEMP);
}









































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
/*        $NetBSD: kref.h,v 1.13 2022/04/09 23:43:39 riastradh Exp $        */

/*-
 * Copyright (c) 2013 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _LINUX_KREF_H_
#define _LINUX_KREF_H_

#include <sys/types.h>
#include <sys/atomic.h>
#include <sys/systm.h>

#include <linux/atomic.h>
#include <linux/refcount.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>

struct kref {
        unsigned int kr_count;
};

static inline void
kref_init(struct kref *kref)
{
        atomic_store_relaxed(&kref->kr_count, 1);
}

static inline void
kref_get(struct kref *kref)
{
        const unsigned int count __unused =
            atomic_inc_uint_nv(&kref->kr_count);

        KASSERTMSG((count > 1), "getting released kref");
}

static inline bool
kref_get_unless_zero(struct kref *kref)
{
        unsigned count;

        do {
                count = atomic_load_relaxed(&kref->kr_count);
                if ((count == 0) || (count == UINT_MAX))
                        return false;
        } while (atomic_cas_uint(&kref->kr_count, count, (count + 1)) !=
            count);

        return true;
}

static inline int
kref_sub(struct kref *kref, unsigned int count, void (*release)(struct kref *))
{
        unsigned int old, new;

#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_release();
#endif

        do {
                old = atomic_load_relaxed(&kref->kr_count);
                KASSERTMSG((count <= old), "overreleasing kref: %u - %u",
                    old, count);
                new = (old - count);
        } while (atomic_cas_uint(&kref->kr_count, old, new) != old);

        if (new == 0) {
#ifndef __HAVE_ATOMIC_AS_MEMBAR
                membar_acquire();
#endif
                (*release)(kref);
                return 1;
        }

        return 0;
}

static inline int
kref_put_lock(struct kref *kref, void (*release)(struct kref *),
    spinlock_t *interlock)
{
        unsigned int old, new;

#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_release();
#endif

        do {
                old = atomic_load_relaxed(&kref->kr_count);
                KASSERT(old > 0);
                if (old == 1) {
                        spin_lock(interlock);
                        if (atomic_add_int_nv(&kref->kr_count, -1) == 0) {
#ifndef __HAVE_ATOMIC_AS_MEMBAR
                                membar_acquire();
#endif
                                (*release)(kref);
                                return 1;
                        }
                        spin_unlock(interlock);
                        return 0;
                }
                new = (old - 1);
        } while (atomic_cas_uint(&kref->kr_count, old, new) != old);

        return 0;
}

static inline int
kref_put(struct kref *kref, void (*release)(struct kref *))
{

        return kref_sub(kref, 1, release);
}

static inline int
kref_put_mutex(struct kref *kref, void (*release)(struct kref *),
    struct mutex *interlock)
{
        unsigned int old, new;

#ifndef __HAVE_ATOMIC_AS_MEMBAR
        membar_release();
#endif

        do {
                old = atomic_load_relaxed(&kref->kr_count);
                KASSERT(old > 0);
                if (old == 1) {
                        mutex_lock(interlock);
                        if (atomic_add_int_nv(&kref->kr_count, -1) == 0) {
#ifndef __HAVE_ATOMIC_AS_MEMBAR
                                membar_acquire();
#endif
                                (*release)(kref);
                                return 1;
                        }
                        mutex_unlock(interlock);
                        return 0;
                }
                new = (old - 1);
        } while (atomic_cas_uint(&kref->kr_count, old, new) != old);

        return 0;
}

static inline unsigned
kref_read(const struct kref *kref)
{

        return atomic_load_relaxed(&kref->kr_count);
}

/*
 * Not native to Linux.  Mostly used for assertions...
 */

static inline bool
kref_referenced_p(struct kref *kref)
{

        return (0 < kref->kr_count);
}

static inline bool
kref_exclusive_p(struct kref *kref)
{

        KASSERT(0 < kref->kr_count);
        return (kref->kr_count == 1);
}

#endif  /* _LINUX_KREF_H_ */






















































































































    2 














































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
/*        $NetBSD: statvfs.h,v 1.4 2021/09/07 11:43:05 riastradh Exp $         */

/*-
 * Copyright (c) 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef        _COMPAT_SYS_STATVFS_H_
#define        _COMPAT_SYS_STATVFS_H_

#include <sys/statvfs.h>

struct statvfs90 {
        unsigned long        f_flag;                /* copy of mount exported flags */
        unsigned long        f_bsize;        /* file system block size */
        unsigned long        f_frsize;        /* fundamental file system block size */
        unsigned long        f_iosize;        /* optimal file system block size */

        /* The following are in units of f_frsize */
        fsblkcnt_t        f_blocks;        /* number of blocks in file system, */
        fsblkcnt_t        f_bfree;        /* free blocks avail in file system */
        fsblkcnt_t        f_bavail;        /* free blocks avail to non-root */
        fsblkcnt_t        f_bresvd;        /* blocks reserved for root */

        fsfilcnt_t        f_files;        /* total file nodes in file system */
        fsfilcnt_t        f_ffree;        /* free file nodes in file system */
        fsfilcnt_t        f_favail;        /* free file nodes avail to non-root */
        fsfilcnt_t        f_fresvd;        /* file nodes reserved for root */

        uint64_t          f_syncreads;        /* count of sync reads since mount */
        uint64_t          f_syncwrites;        /* count of sync writes since mount */

        uint64_t          f_asyncreads;        /* count of async reads since mount */
        uint64_t          f_asyncwrites;        /* count of async writes since mount */

        fsid_t                f_fsidx;        /* NetBSD compatible fsid */
        unsigned long        f_fsid;                /* Posix compatible fsid */
        unsigned long        f_namemax;        /* maximum filename length */
        uid_t                f_owner;        /* user that mounted the file system */

        uint32_t        f_spare[4];        /* spare space */

        char        f_fstypename[_VFS_NAMELEN]; /* fs type name */
        char        f_mntonname[_VFS_MNAMELEN];  /* directory on which mounted */
        char        f_mntfromname[_VFS_MNAMELEN];  /* mounted file system */
};

__BEGIN_DECLS
#ifndef _KERNEL
#include <string.h>
#endif

static __inline void
statvfs_to_statvfs90(const struct statvfs *s, struct statvfs90 *s90)
{

        memset(s90, 0, sizeof(*s90));

        s90->f_flag = s->f_flag;
        s90->f_bsize = s->f_bsize;
        s90->f_frsize = s->f_frsize;
        s90->f_iosize = s->f_iosize;

        s90->f_blocks = s->f_blocks;
        s90->f_bfree = s->f_bfree;
        s90->f_bavail = s->f_bavail;
        s90->f_bresvd = s->f_bresvd;

        s90->f_files = s->f_files;
        s90->f_ffree = s->f_ffree;
        s90->f_favail = s->f_favail;
        s90->f_fresvd = s->f_fresvd;

        s90->f_syncreads = s->f_syncreads;
        s90->f_syncwrites = s->f_syncwrites;

        s90->f_asyncreads = s->f_asyncreads;
        s90->f_asyncwrites = s->f_asyncwrites;

        s90->f_fsidx = s->f_fsidx;
        s90->f_fsid = s->f_fsid;
        s90->f_namemax = s->f_namemax;
        s90->f_owner = s->f_owner;

        memcpy(s90->f_fstypename, s->f_fstypename, sizeof(s90->f_fstypename));
        memcpy(s90->f_mntonname, s->f_mntonname, sizeof(s90->f_mntonname));
        memcpy(s90->f_mntfromname, s->f_mntfromname, sizeof(s90->f_mntfromname));
}

#ifdef _KERNEL
static __inline int
statvfs_to_statvfs90_copy(const void *vs, void *vs90, size_t l)
{
        struct statvfs90 *s90 = kmem_zalloc(sizeof(*s90), KM_SLEEP);
        int error;

        statvfs_to_statvfs90(vs, s90);
        error = copyout(s90, vs90, sizeof(*s90));
        kmem_free(s90, sizeof(*s90));

        return error;
}
#else

#ifdef __LIBC12_SOURCE__

int        __compat_statvfs(const char *__restrict, struct statvfs90 *__restrict);
int        __compat_statvfs1(const char *__restrict, struct statvfs90 *__restrict,
    int);

int        __compat_fstatvfs(int, struct statvfs90 *);
int        __compat_fstatvfs1(int, struct statvfs90 *, int);

int        __compat___getmntinfo13(struct statvfs90 **, int);

int        __compat___fhstatvfs40(const void *, size_t, struct statvfs90 *);
int        __compat___fhstatvfs140(const void *, size_t, struct statvfs90 *, int);

int        __compat_getvfsstat(struct statvfs90 *, size_t, int);

int        __statvfs90(const char *__restrict, struct statvfs *__restrict);
int        __statvfs190(const char *__restrict, struct statvfs *__restrict, int);

int        __fstatvfs90(int, struct statvfs *);
int        __fstatvfs190(int, struct statvfs *, int);

int        __fhstatvfs90(const void *, size_t, struct statvfs *);
int        __fhstatvfs190(const void *, size_t, struct statvfs *, int);

int        __getvfsstat90(struct statvfs *, size_t, int);

int        __getmntinfo90(struct statvfs **, int);

#endif /* __LIBC12_SOURCE__ */

#endif /* _KERNEL */

__END_DECLS

#endif /* !_COMPAT_SYS_STATVFS_H_ */


































































    5 
    5 
    5 

    5 




    5 

























    5 











    1 


    1 
    1 











    1 














































































































































    4 





    4 




    4 
































    1 
    2 





    2 

    2 










    1 
    1 








    2 




















    2 















































































    1 












































    1 











    6 

    1 


    1 


    5 


    1 





    1 







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
/*        $NetBSD: l2cap_upper.c,v 1.19 2016/12/12 15:58:45 maya Exp $        */

/*-
 * Copyright (c) 2005 Iain Hibbert.
 * Copyright (c) 2006 Itronix Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of Itronix Inc. may not be used to endorse
 *    or promote products derived from this software without specific
 *    prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: l2cap_upper.c,v 1.19 2016/12/12 15:58:45 maya Exp $");

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/systm.h>

#include <netbt/bluetooth.h>
#include <netbt/hci.h>
#include <netbt/l2cap.h>

/*******************************************************************************
 *
 *        L2CAP Channel - Upper Protocol API
 */

/*
 * l2cap_attach_pcb(handle, btproto, upper)
 *
 *        attach new l2cap_channel to handle, populate
 *        with reasonable defaults
 */
int
l2cap_attach_pcb(struct l2cap_channel **handle,
                const struct btproto *proto, void *upper)
{
        struct l2cap_channel *chan;

        KASSERT(handle != NULL);
        KASSERT(proto != NULL);
        KASSERT(upper != NULL);

        chan = malloc(sizeof(struct l2cap_channel), M_BLUETOOTH,
                        M_NOWAIT | M_ZERO);
        if (chan == NULL)
                return ENOMEM;

        chan->lc_proto = proto;
        chan->lc_upper = upper;

        chan->lc_state = L2CAP_CLOSED;

        chan->lc_lcid = L2CAP_NULL_CID;
        chan->lc_rcid = L2CAP_NULL_CID;

        chan->lc_laddr.bt_len = sizeof(struct sockaddr_bt);
        chan->lc_laddr.bt_family = AF_BLUETOOTH;
        chan->lc_laddr.bt_psm = L2CAP_PSM_ANY;

        chan->lc_raddr.bt_len = sizeof(struct sockaddr_bt);
        chan->lc_raddr.bt_family = AF_BLUETOOTH;
        chan->lc_raddr.bt_psm = L2CAP_PSM_ANY;

        chan->lc_imtu = L2CAP_MTU_DEFAULT;
        chan->lc_omtu = L2CAP_MTU_DEFAULT;
        chan->lc_flush = L2CAP_FLUSH_TIMO_DEFAULT;

        memcpy(&chan->lc_iqos, &l2cap_default_qos, sizeof(l2cap_qos_t));
        memcpy(&chan->lc_oqos, &l2cap_default_qos, sizeof(l2cap_qos_t));

        MBUFQ_INIT(&chan->lc_txq);

        *handle = chan;
        return 0;
}

/*
 * l2cap_bind_pcb(l2cap_channel, sockaddr)
 *
 *        set local address of channel
 */
int
l2cap_bind_pcb(struct l2cap_channel *chan, struct sockaddr_bt *addr)
{

        if (chan->lc_lcid != L2CAP_NULL_CID)
                return EINVAL;

        memcpy(&chan->lc_laddr, addr, sizeof(struct sockaddr_bt));
        return 0;
}

/*
 * l2cap_sockaddr_pcb(l2cap_channel, sockaddr)
 *
 *        get local address of channel
 */
int
l2cap_sockaddr_pcb(struct l2cap_channel *chan, struct sockaddr_bt *addr)
{

        memcpy(addr, &chan->lc_laddr, sizeof(struct sockaddr_bt));
        return 0;
}

/*
 * l2cap_connect_pcb(l2cap_channel, sockaddr)
 *
 *        Initiate a connection to destination. This corresponds to
 *        "Open Channel Request" in the L2CAP specification and will
 *        result in one of the following:
 *
 *                proto->connected(upper)
 *                proto->disconnected(upper, error)
 *
 *        and, optionally
 *                proto->connecting(upper)
 */
int
l2cap_connect_pcb(struct l2cap_channel *chan, struct sockaddr_bt *dest)
{
        struct hci_unit *unit;
        int err;

        memcpy(&chan->lc_raddr, dest, sizeof(struct sockaddr_bt));

        if (L2CAP_PSM_INVALID(chan->lc_raddr.bt_psm))
                return EINVAL;

        if (bdaddr_any(&chan->lc_raddr.bt_bdaddr))
                return EDESTADDRREQ;

        /* set local address if it needs setting */
        if (bdaddr_any(&chan->lc_laddr.bt_bdaddr)) {
                err = hci_route_lookup(&chan->lc_laddr.bt_bdaddr,
                                        &chan->lc_raddr.bt_bdaddr);
                if (err)
                        return err;
        }

        unit = hci_unit_lookup(&chan->lc_laddr.bt_bdaddr);
        if (unit == NULL)
                return EHOSTUNREACH;

        /* attach to active list */
        err = l2cap_cid_alloc(chan);
        if (err)
                return err;

        /* open link to remote device */
        chan->lc_link = hci_acl_open(unit, &chan->lc_raddr.bt_bdaddr);
        if (chan->lc_link == NULL)
                return EHOSTUNREACH;

        /* set the link mode */
        err = l2cap_setmode(chan);
        if (err == EINPROGRESS) {
                chan->lc_state = L2CAP_WAIT_SEND_CONNECT_REQ;
                (*chan->lc_proto->connecting)(chan->lc_upper);
                return 0;
        }
        if (err)
                goto fail;

        /*
         * We can queue a connect request now even though the link may
         * not yet be open; Our mode setting is assured, and the queue
         * will be started automatically at the right time.
         */
        chan->lc_state = L2CAP_WAIT_RECV_CONNECT_RSP;
        err = l2cap_send_connect_req(chan);
        if (err)
                goto fail;

        return 0;

fail:
        chan->lc_state = L2CAP_CLOSED;
        hci_acl_close(chan->lc_link, err);
        chan->lc_link = NULL;
        return err;
}

/*
 * l2cap_peeraddr_pcb(l2cap_channel, sockaddr)
 *
 *        get remote address of channel
 */
int
l2cap_peeraddr_pcb(struct l2cap_channel *chan, struct sockaddr_bt *addr)
{

        memcpy(addr, &chan->lc_raddr, sizeof(struct sockaddr_bt));
        return 0;
}

/*
 * l2cap_disconnect_pcb(l2cap_channel, linger)
 *
 *        Initiate L2CAP disconnection. This corresponds to
 *        "Close Channel Request" in the L2CAP specification
 *        and will result in a call to
 *
 *                proto->disconnected(upper, error)
 *
 *        when the disconnection is complete. If linger is set,
 *        the call will not be made until data has flushed from
 *        the queue.
 */
int
l2cap_disconnect_pcb(struct l2cap_channel *chan, int linger)
{
        int err = 0;

        if (chan->lc_state == L2CAP_CLOSED
            || chan->lc_state == L2CAP_WAIT_DISCONNECT)
                return EINVAL;

        chan->lc_flags |= L2CAP_SHUTDOWN;

        /*
         * no need to do anything unless the queue is empty or
         * we are not lingering..
         */
        if ((MBUFQ_FIRST(&chan->lc_txq) == NULL && chan->lc_pending == 0)
            || linger == 0) {
                chan->lc_state = L2CAP_WAIT_DISCONNECT;
                err = l2cap_send_disconnect_req(chan);
                if (err)
                        l2cap_close(chan, err);
        }
        return err;
}

/*
 * l2cap_detach_pcb(handle)
 *
 *        Detach l2cap channel from handle & close it down
 */
void
l2cap_detach_pcb(struct l2cap_channel **handle)
{
        struct l2cap_channel *chan;

        chan = *handle;
        *handle = NULL;

        if (chan->lc_state != L2CAP_CLOSED)
                l2cap_close(chan, 0);

        if (chan->lc_lcid != L2CAP_NULL_CID) {
                LIST_REMOVE(chan, lc_ncid);
                chan->lc_lcid = L2CAP_NULL_CID;
        }

        MBUFQ_DRAIN(&chan->lc_txq);

        /*
         * Could implement some kind of delayed expunge to make sure that the
         * CID is really dead before it becomes available for reuse?
         */

        free(chan, M_BLUETOOTH);
}

/*
 * l2cap_listen_pcb(l2cap_channel)
 *
 *        Use this channel as a listening post (until detached). This will
 *        result in calls to:
 *
 *                proto->newconn(upper, laddr, raddr)
 *
 *        for incoming connections matching the psm and local address of
 *        the channel. NULL address is permitted and matches any device.
 *        If L2CAP_PSM_ANY is bound the next higher unused value from the
 *        dynamic range (above 0x1001) will be selected.
 *
 *        The upper layer should create and return a new channel.
 *
 *        You cannot use this channel for anything else subsequent to this call
 */
int
l2cap_listen_pcb(struct l2cap_channel *chan)
{
        struct l2cap_channel *used, *prev = NULL;
        uint32_t psm;

        if (chan->lc_lcid != L2CAP_NULL_CID)
                return EINVAL;

        /*
         * This is simplistic but its not really worth spending a
         * lot of time looking for an unused PSM..
         */
        if (chan->lc_laddr.bt_psm == L2CAP_PSM_ANY) {
                psm = 0x1001;
                used = LIST_FIRST(&l2cap_listen_list);

                if (used != NULL && used->lc_laddr.bt_psm >= psm) {
                        psm = used->lc_laddr.bt_psm + 0x0002;
                        if ((psm & 0x0100) != 0)
                                psm += 0x0100;

                        if (psm > UINT16_MAX)
                                return EADDRNOTAVAIL;
                }

                chan->lc_laddr.bt_psm = psm;
        } else if (L2CAP_PSM_INVALID(chan->lc_laddr.bt_psm))
                return EINVAL;

        /*
         * This CID is irrelevant, as the channel is not stored on the active
         * list and the socket code does not allow operations on listening
         * sockets, but we set it so the detach code knows to LIST_REMOVE the
         * channel.
         */
        chan->lc_lcid = L2CAP_SIGNAL_CID;

        /*
         * The list of listening channels is stored in an order such that new
         * listeners dont usurp current listeners, but that specific listening
         * takes precedence over promiscuous, and the connect request code can
         * easily use the first matching entry.
         */
        LIST_FOREACH(used, &l2cap_listen_list, lc_ncid) {
                if (used->lc_laddr.bt_psm < chan->lc_laddr.bt_psm)
                        break;

                if (used->lc_laddr.bt_psm == chan->lc_laddr.bt_psm
                        && bdaddr_any(&used->lc_laddr.bt_bdaddr)
                        && !bdaddr_any(&chan->lc_laddr.bt_bdaddr))
                        break;

                prev = used;
        }

        if (prev == NULL)
                LIST_INSERT_HEAD(&l2cap_listen_list, chan, lc_ncid);
        else
                LIST_INSERT_AFTER(prev, chan, lc_ncid);

        return 0;
}

/*
 * l2cap_send_pcb(l2cap_channel, mbuf)
 *
 *        Output SDU on channel described by channel. This corresponds
 *        to "Send Data Request" in the L2CAP specification. The upper
 *        layer will be notified when SDU's have completed sending by a
 *        call to:
 *
 *                proto->complete(upper, n)
 *
 *        (currently n == 1)
 *
 *        Note: I'm not sure how this will work out, but I think that
 *        if outgoing Retransmission Mode or Flow Control Mode is
 *        negotiated then this call will not be made until the SDU has
 *        been acknowledged by the peer L2CAP entity. For 'Best Effort'
 *        it will be made when the packet has cleared the controller
 *        buffers.
 *
 *        We only support Basic mode so far, so encapsulate with a
 *        B-Frame header and start sending if we are not already
 */
int
l2cap_send_pcb(struct l2cap_channel *chan, struct mbuf *m)
{
        l2cap_hdr_t *hdr;
        int plen;

        if (chan->lc_state == L2CAP_CLOSED) {
                m_freem(m);
                return ENOTCONN;
        }

        plen = m->m_pkthdr.len;

        DPRINTFN(5, "send %d bytes on CID #%d (pending = %d)\n",
                plen, chan->lc_lcid, chan->lc_pending);

        /* Encapsulate with B-Frame */
        M_PREPEND(m, sizeof(l2cap_hdr_t), M_DONTWAIT);
        if (m == NULL)
                return ENOMEM;

        hdr = mtod(m, l2cap_hdr_t *);
        hdr->length = htole16(plen);
        hdr->dcid = htole16(chan->lc_rcid);

        /* Queue it on our list */
        MBUFQ_ENQUEUE(&chan->lc_txq, m);

        /* If we are not sending, then start doing so */
        if (chan->lc_pending == 0)
                return l2cap_start(chan);

        return 0;
}

/*
 * l2cap_setopt(l2cap_channel, sopt)
 *
 *        Apply configuration options to channel. This corresponds to
 *        "Configure Channel Request" in the L2CAP specification.
 *
 *        for SO_L2CAP_LM, the settings will take effect when the
 *        channel is established. If the channel is already open,
 *        a call to
 *                proto->linkmode(upper, new)
 *
 *        will be made when the change is complete.
 */
int
l2cap_setopt(struct l2cap_channel *chan, const struct sockopt *sopt)
{
        int mode, err = 0;
        uint16_t mtu;

        switch (sopt->sopt_name) {
        case SO_L2CAP_IMTU:        /* set Incoming MTU */
                err = sockopt_get(sopt, &mtu, sizeof(mtu));
                if (err)
                        break;

                if (mtu < L2CAP_MTU_MINIMUM)
                        err = EINVAL;
                else if (chan->lc_state == L2CAP_CLOSED)
                        chan->lc_imtu = mtu;
                else
                        err = EBUSY;

                break;

        case SO_L2CAP_LM:        /* set link mode */
                err = sockopt_getint(sopt, &mode);
                if (err)
                        break;

                mode &= (L2CAP_LM_SECURE | L2CAP_LM_ENCRYPT | L2CAP_LM_AUTH);

                if (mode & L2CAP_LM_SECURE)
                        mode |= L2CAP_LM_ENCRYPT;

                if (mode & L2CAP_LM_ENCRYPT)
                        mode |= L2CAP_LM_AUTH;

                chan->lc_mode = mode;

                if (chan->lc_state == L2CAP_OPEN)
                        err = l2cap_setmode(chan);

                break;

        case SO_L2CAP_OQOS:        /* set Outgoing QoS flow spec */
        case SO_L2CAP_FLUSH:        /* set Outgoing Flush Timeout */
        default:
                err = ENOPROTOOPT;
                break;
        }

        return err;
}

/*
 * l2cap_getopt(l2cap_channel, sopt)
 *
 *        Return configuration parameters.
 */
int
l2cap_getopt(struct l2cap_channel *chan, struct sockopt *sopt)
{

        switch (sopt->sopt_name) {
        case SO_L2CAP_IMTU:        /* get Incoming MTU */
                return sockopt_set(sopt, &chan->lc_imtu, sizeof(uint16_t));

        case SO_L2CAP_OMTU:        /* get Outgoing MTU */
                return sockopt_set(sopt, &chan->lc_omtu, sizeof(uint16_t));

        case SO_L2CAP_IQOS:        /* get Incoming QoS flow spec */
                return sockopt_set(sopt, &chan->lc_iqos, sizeof(l2cap_qos_t));

        case SO_L2CAP_OQOS:        /* get Outgoing QoS flow spec */
                return sockopt_set(sopt, &chan->lc_oqos, sizeof(l2cap_qos_t));

        case SO_L2CAP_FLUSH:        /* get Flush Timeout */
                return sockopt_set(sopt, &chan->lc_flush, sizeof(uint16_t));

        case SO_L2CAP_LM:        /* get link mode */
                return sockopt_setint(sopt, chan->lc_mode);

        default:
                break;
        }

        return ENOPROTOOPT;
}






































































































































































































    3 





    1 
    1 
    1 




    3 




    3 



    3 













































    1 

    1 
















    1 






    1 
    1 


    1 



    1 








    1 









    1 

    1 
    1 



    1 

    1 




    1 



    1 
























































































    4 





    4 







    4 











































    4 





    4 
    4 










































































































































































































































































































































































































































































    3 

    3 
    3 


    3 


    3 







































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
/*        $NetBSD: ppp_tty.c,v 1.70 2022/05/04 07:48:35 andvar Exp $        */
/*        Id: ppp_tty.c,v 1.3 1996/07/01 01:04:11 paulus Exp         */

/*
 * ppp_tty.c - Point-to-Point Protocol (PPP) driver for asynchronous
 *                tty devices.
 *
 * Copyright (c) 1984-2000 Carnegie Mellon University. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The name "Carnegie Mellon University" must not be used to
 *    endorse or promote products derived from this software without
 *    prior written permission. For permission or any legal
 *    details, please contact
 *      Office of Technology Transfer
 *      Carnegie Mellon University
 *      5000 Forbes Avenue
 *      Pittsburgh, PA  15213-3890
 *      (412) 268-4387, fax: (412) 268-7395
 *      tech-transfer@andrew.cmu.edu
 *
 * 4. Redistributions of any form whatsoever must retain the following
 *    acknowledgment:
 *    "This product includes software developed by Computing Services
 *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
 *
 * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
 * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 * Based on:
 *        @(#)if_sl.c        7.6.1.2 (Berkeley) 2/15/89
 *
 * Copyright (c) 1987 Regents of the University of California.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms are permitted
 * provided that the above copyright notice and this paragraph are
 * duplicated in all such forms and that any documentation,
 * advertising materials, and other materials related to such
 * distribution and use acknowledge that the software was developed
 * by the University of California, Berkeley.  The name of the
 * University may not be used to endorse or promote products derived
 * from this software without specific prior written permission.
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 *
 * Serial Line interface
 *
 * Rick Adams
 * Center for Seismic Studies
 * 1300 N 17th Street, Suite 1450
 * Arlington, Virginia 22209
 * (703)276-7900
 * rick@seismo.ARPA
 * seismo!rick
 *
 * Pounded on heavily by Chris Torek (chris@mimsy.umd.edu, umcp-cs!chris).
 * Converted to 4.3BSD Beta by Chris Torek.
 * Other changes made at Berkeley, based in part on code by Kirk Smith.
 *
 * Converted to 4.3BSD+ 386BSD by Brad Parker (brad@cayman.com)
 * Added VJ tcp header compression; more unified ioctls
 *
 * Extensively modified by Paul Mackerras (paulus@cs.anu.edu.au).
 * Cleaned up a lot of the mbuf-related code to fix bugs that
 * caused system crashes and packet corruption.  Changed pppstart
 * so that it doesn't just give up with a "collision" if the whole
 * packet doesn't fit in the output ring buffer.
 *
 * Added priority queueing for interactive IP packets, following
 * the model of if_sl.c, plus hooks for bpf.
 * Paul Mackerras (paulus@cs.anu.edu.au).
 */

/* from if_sl.c,v 1.11 84/10/04 12:54:47 rick Exp */
/* from NetBSD: if_ppp.c,v 1.15.2.2 1994/07/28 05:17:58 cgd Exp */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ppp_tty.c,v 1.70 2022/05/04 07:48:35 andvar Exp $");

#ifdef _KERNEL_OPT
#include "ppp.h"
#include "opt_ppp.h"
#endif
#define VJC
#define PPP_COMPRESS

#include <sys/param.h>
#include <sys/proc.h>
#include <sys/mbuf.h>
#include <sys/dkstat.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/file.h>
#include <sys/tty.h>
#include <sys/kernel.h>
#include <sys/conf.h>
#include <sys/vnode.h>
#include <sys/systm.h>
#include <sys/kauth.h>

#include <net/if.h>
#include <net/if_types.h>

#ifdef VJC
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <net/slcompress.h>
#endif

#include <net/bpf.h>
#include <net/ppp_defs.h>
#include <net/if_ppp.h>
#include <net/if_pppvar.h>

static int        pppopen(dev_t dev, struct tty *tp);
static int        pppclose(struct tty *tp, int flag);
static int        pppread(struct tty *tp, struct uio *uio, int flag);
static int        pppwrite(struct tty *tp, struct uio *uio, int flag);
static int        ppptioctl(struct tty *tp, u_long cmd, void *data, int flag,
                          struct lwp *);
static int        pppinput(int c, struct tty *tp);
static int        pppstart(struct tty *tp);

struct linesw ppp_disc = {        /* XXX should be static */
        .l_name = "ppp",
        .l_open = pppopen,
        .l_close = pppclose,
        .l_read = pppread,
        .l_write = pppwrite,
        .l_ioctl = ppptioctl,
        .l_rint = pppinput,
        .l_start = pppstart,
        .l_modem = ttymodem,
        .l_poll = ttpoll
};

static void        ppprcvframe(struct ppp_softc *sc, struct mbuf *m);
static uint16_t pppfcs(uint16_t fcs, const uint8_t *cp, int len);
static void        pppsyncstart(struct ppp_softc *sc);
static void        pppasyncstart(struct ppp_softc *);
static void        pppasyncctlp(struct ppp_softc *);
static void        pppasyncrelinq(struct ppp_softc *);
static void        ppp_timeout(void *);
static void        pppgetm(struct ppp_softc *sc);
static void        pppdumpb(u_char *b, int l);
static void        ppplogchar(struct ppp_softc *, int);
static void        pppdumpframe(struct ppp_softc *sc, struct mbuf* m, int xmit);

/*
 * Does c need to be escaped?
 */
#define ESCAPE_P(c)        (sc->sc_asyncmap[(c) >> 5] & (1U << ((c) & 0x1F)))

/*
 * Procedures for using an async tty interface for PPP.
 */

/* This is a NetBSD-1.0 or later kernel. */
#define CCOUNT(q)        ((q)->c_cc)

#define PPP_LOWAT        100        /* Process more output when < LOWAT on queue */
#define        PPP_HIWAT        400        /* Don't start a new packet if HIWAT on que */

/*
 * Line specific open routine for async tty devices.
 * Attach the given tty to the first available ppp unit.
 * Called from device open routine or ttioctl.
 */
/* ARGSUSED */
static int
pppopen(dev_t dev, struct tty *tp)
{
    struct lwp *l = curlwp;                /* XXX */
    struct ppp_softc *sc;
    int error, s;

    error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_INTERFACE_PPP,
        KAUTH_REQ_NETWORK_INTERFACE_PPP_ADD, NULL, NULL, NULL);
    if (error)
        return (error);

    s = spltty();

    if (tp->t_linesw == &ppp_disc) {
        sc = (struct ppp_softc *) tp->t_sc;
        if (sc != NULL && sc->sc_devp == (void *) tp) {
            splx(s);
            return (0);
        }
    }

    if ((sc = pppalloc(l->l_proc->p_pid)) == NULL) {
        splx(s);
        return ENXIO;
    }

    if (sc->sc_relinq)
        (*sc->sc_relinq)(sc);        /* get previous owner to relinquish the unit */

    /* Switch DLT to PPP-over-serial. */
    bpf_change_type(&sc->sc_if, DLT_PPP_SERIAL, PPP_HDRLEN);

    sc->sc_ilen = 0;
    sc->sc_m = NULL;
    memset(sc->sc_asyncmap, 0, sizeof(sc->sc_asyncmap));
    sc->sc_asyncmap[0] = 0xffffffff;
    sc->sc_asyncmap[3] = 0x60000000;
    sc->sc_rasyncmap = 0;
    sc->sc_devp = (void *) tp;
    sc->sc_start = pppasyncstart;
    sc->sc_ctlp = pppasyncctlp;
    sc->sc_relinq = pppasyncrelinq;
    sc->sc_outm = NULL;
    pppgetm(sc);
    sc->sc_if.if_flags |= IFF_RUNNING;
    sc->sc_if.if_baudrate = tp->t_ospeed;

    tp->t_sc = (void *) sc;
    mutex_spin_enter(&tty_lock);
    ttyflush(tp, FREAD | FWRITE);
    mutex_spin_exit(&tty_lock);

    splx(s);
    return (0);
}

/*
 * Line specific close routine, called from device close routine
 * and from ttioctl.
 * Detach the tty from the ppp unit.
 * Mimics part of ttyclose().
 */
static int
pppclose(struct tty *tp, int flag)
{
    struct ppp_softc *sc;
    int s;

    s = spltty();
    mutex_spin_enter(&tty_lock);
    ttyflush(tp, FREAD|FWRITE);
    mutex_spin_exit(&tty_lock);        /* XXX */
    ttyldisc_release(tp->t_linesw);
    tp->t_linesw = ttyldisc_default();
    sc = (struct ppp_softc *) tp->t_sc;
    if (sc != NULL) {
        tp->t_sc = NULL;
        if (tp == (struct tty *) sc->sc_devp) {
            pppasyncrelinq(sc);
            pppdealloc(sc);
        }
    }
    splx(s);
    return 0;
}

/*
 * Relinquish the interface unit to another device.
 */
static void
pppasyncrelinq(struct ppp_softc *sc)
{
    int s;

    /* Change DLT to back none. */
    bpf_change_type(&sc->sc_if, DLT_NULL, 0);

    s = spltty();
    if (sc->sc_outm) {
        m_freem(sc->sc_outm);
        sc->sc_outm = NULL;
    }
    if (sc->sc_m) {
        m_freem(sc->sc_m);
        sc->sc_m = NULL;
    }
    if (sc->sc_flags & SC_TIMEOUT) {
        callout_stop(&sc->sc_timo_ch);
        sc->sc_flags &= ~SC_TIMEOUT;
    }
    splx(s);
}

/*
 * Line specific (tty) read routine.
 */
static int
pppread(struct tty *tp, struct uio *uio, int flag)
{
    struct ppp_softc *sc = (struct ppp_softc *)tp->t_sc;
    struct mbuf *m, *m0;
    int error = 0;

    if (sc == NULL)
        return 0;
    /*
     * Loop waiting for input, checking that nothing disastrous
     * happens in the meantime.
     */
    mutex_spin_enter(&tty_lock);
    for (;;) {
        if (tp != (struct tty *) sc->sc_devp ||
            tp->t_linesw != &ppp_disc) {
            mutex_spin_exit(&tty_lock);
            return 0;
        }
        if (sc->sc_inq.ifq_head != NULL)
            break;
        if ((tp->t_state & TS_CARR_ON) == 0 && (tp->t_cflag & CLOCAL) == 0
            && (tp->t_state & TS_ISOPEN)) {
            mutex_spin_exit(&tty_lock);
            return 0;                /* end of file */
        }
        if (tp->t_state & TS_ASYNC || flag & IO_NDELAY) {
            mutex_spin_exit(&tty_lock);
            return (EWOULDBLOCK);
        }
        error = ttysleep(tp, &tp->t_rawcv, true, 0);
        if (error) {
            mutex_spin_exit(&tty_lock);
            return error;
        }
    }

    /* Pull place-holder byte out of canonical queue */
    getc(&tp->t_canq);

    /* Get the packet from the input queue */
    IF_DEQUEUE(&sc->sc_inq, m0);
    mutex_spin_exit(&tty_lock);

    for (m = m0; m && uio->uio_resid; m = m->m_next)
        if ((error = uiomove(mtod(m, u_char *), m->m_len, uio)) != 0)
            break;
    m_freem(m0);
    return (error);
}

/*
 * Line specific (tty) write routine.
 */
static int
pppwrite(struct tty *tp, struct uio *uio, int flag)
{
    struct ppp_softc *sc = (struct ppp_softc *)tp->t_sc;
    struct mbuf *m, *m0;
    struct sockaddr dst;
    int len, error;

    if ((tp->t_state & TS_CARR_ON) == 0 && (tp->t_cflag & CLOCAL) == 0)
        return 0;                /* wrote 0 bytes */
    if (tp->t_linesw != &ppp_disc)
        return (EINVAL);
    if (sc == NULL || tp != (struct tty *) sc->sc_devp)
        return EIO;
    if (uio->uio_resid > sc->sc_if.if_mtu + PPP_HDRLEN ||
        uio->uio_resid < PPP_HDRLEN)
        return (EMSGSIZE);

    MGETHDR(m0, M_WAIT, MT_DATA);
    if (m0 == NULL)
        return ENOBUFS;

    m0->m_len = 0;
    m0->m_pkthdr.len = uio->uio_resid;
    m_reset_rcvif(m0);

    if (uio->uio_resid >= MCLBYTES / 2)
        MCLGET(m0, M_DONTWAIT);

    for (m = m0; uio->uio_resid;) {
        len = M_TRAILINGSPACE(m);
        if (len > uio->uio_resid)
            len = uio->uio_resid;
        if ((error = uiomove(mtod(m, u_char *), len, uio)) != 0) {
            m_freem(m0);
            return (error);
        }
        m->m_len = len;

        if (uio->uio_resid == 0)
            break;

        MGET(m->m_next, M_WAIT, MT_DATA);
        if (m->m_next == NULL) {
            m_freem(m0);
            return ENOBUFS;
        }
        m = m->m_next;
        m->m_len = 0;
    }
    dst.sa_family = AF_UNSPEC;
    bcopy(mtod(m0, u_char *), dst.sa_data, PPP_HDRLEN);
    m_adj(m0, PPP_HDRLEN);
    return if_output_lock(&sc->sc_if, &sc->sc_if, m0, &dst, (struct rtentry *)0);
}

/*
 * Line specific (tty) ioctl routine.
 * This discipline requires that tty device drivers call
 * the line specific l_ioctl routine from their ioctl routines.
 */
/* ARGSUSED */
static int
ppptioctl(struct tty *tp, u_long cmd, void *data, int flag, struct lwp *l)
{
    struct ppp_softc *sc = (struct ppp_softc *) tp->t_sc;
    int error, s;

    if (sc == NULL)
        return (EPASSTHROUGH);

    KERNEL_LOCK(1, NULL);

    if (tp != (struct tty *) sc->sc_devp) {
        error = EPASSTHROUGH;
        goto out;
    }

    error = 0;
    switch (cmd) {
    case TIOCRCVFRAME:
            ppprcvframe(sc,*((struct mbuf **)data));
        break;

    case PPPIOCSASYNCMAP:
        if ((error = kauth_authorize_device_tty(l->l_cred,
           KAUTH_DEVICE_TTY_PRIVSET, tp)) != 0)
            break;
        sc->sc_asyncmap[0] = *(u_int *)data;
        break;

    case PPPIOCGASYNCMAP:
        *(u_int *)data = sc->sc_asyncmap[0];
        break;

    case PPPIOCSRASYNCMAP:
        if ((error = kauth_authorize_device_tty(l->l_cred,
          KAUTH_DEVICE_TTY_PRIVSET, tp)) != 0)
            break;
        sc->sc_rasyncmap = *(u_int *)data;
        break;

    case PPPIOCGRASYNCMAP:
        *(u_int *)data = sc->sc_rasyncmap;
        break;

    case PPPIOCSXASYNCMAP:
        if ((error = kauth_authorize_device_tty(l->l_cred,
          KAUTH_DEVICE_TTY_PRIVSET, tp)) != 0)
            break;
        s = spltty();
        bcopy(data, sc->sc_asyncmap, sizeof(sc->sc_asyncmap));
        sc->sc_asyncmap[1] = 0;                    /* mustn't escape 0x20 - 0x3f */
        sc->sc_asyncmap[2] &= ~0x40000000;  /* mustn't escape 0x5e */
        sc->sc_asyncmap[3] |= 0x60000000;   /* must escape 0x7d, 0x7e */
        splx(s);
        break;

    case PPPIOCGXASYNCMAP:
        bcopy(sc->sc_asyncmap, data, sizeof(sc->sc_asyncmap));
        break;

    default:
        error = pppioctl(sc, cmd, data, flag, l);
        if (error == 0 && cmd == PPPIOCSMRU)
            pppgetm(sc);
    }

 out:
    KERNEL_UNLOCK_ONE(NULL);
    return error;
}

/* receive a complete ppp frame from device in synchronous
 * hdlc mode. caller gives up ownership of mbuf
 */
static void
ppprcvframe(struct ppp_softc *sc, struct mbuf *m)
{
        int len, s;
        struct mbuf *n;
        u_char hdr[4];
        int hlen,count;

        for (n=m,len=0;n != NULL;n = n->m_next)
                len += n->m_len;
        if (len==0) {
                m_freem(m);
                return;
        }

        /* extract PPP header from mbuf chain (1 to 4 bytes) */
        for (n=m,hlen=0;n!=NULL && hlen<sizeof(hdr);n=n->m_next) {
                count = (sizeof(hdr)-hlen) < n->m_len ?
                                sizeof(hdr)-hlen : n->m_len;
                bcopy(mtod(n,u_char*),&hdr[hlen],count);
                hlen+=count;
        }

        s = spltty();

        /* if AFCF compressed then prepend AFCF */
        if (hdr[0] != PPP_ALLSTATIONS) {
                if (sc->sc_flags & SC_REJ_COMP_AC) {
                        if (sc->sc_flags & SC_DEBUG)
                                printf(
                                    "%s: garbage received: 0x%x (need 0xFF)\n",
                                    sc->sc_if.if_xname, hdr[0]);
                        goto bail;
                }
                M_PREPEND(m,2,M_DONTWAIT);
                if (m==NULL) {
                        splx(s);
                        return;
                }
                hdr[3] = hdr[1];
                hdr[2] = hdr[0];
                hdr[0] = PPP_ALLSTATIONS;
                hdr[1] = PPP_UI;
                len += 2;
        }

        /* if protocol field compressed, add MSB of protocol field = 0 */
        if (hdr[2] & 1) {
                /* a compressed protocol */
                M_PREPEND(m,1,M_DONTWAIT);
                if (m==NULL) {
                        splx(s);
                        return;
                }
                hdr[3] = hdr[2];
                hdr[2] = 0;
                len++;
        }

        /* valid LSB of protocol field has bit0 set */
        if (!(hdr[3] & 1)) {
                if (sc->sc_flags & SC_DEBUG)
                        printf("%s: bad protocol %x\n", sc->sc_if.if_xname,
                                (hdr[2] << 8) + hdr[3]);
                goto bail;
        }

        /* packet beyond configured mru? */
        if (len > sc->sc_mru + PPP_HDRLEN) {
                if (sc->sc_flags & SC_DEBUG)
                        printf("%s: packet too big\n", sc->sc_if.if_xname);
                goto bail;
        }

        /* add expanded 4 byte header to mbuf chain */
        for (n=m,hlen=0;n!=NULL && hlen<sizeof(hdr);n=n->m_next) {
                count = (sizeof(hdr)-hlen) < n->m_len ?
                                sizeof(hdr)-hlen : n->m_len;
                bcopy(&hdr[hlen],mtod(n,u_char*),count);
                hlen+=count;
        }

        /* if_ppp.c requires the PPP header and IP header */
        /* to be contiguous */
        count = len < MHLEN ? len : MHLEN;
        if (m->m_len < count) {
                m = m_pullup(m,count);
                if (m==NULL)
                        goto bail;
        }

        sc->sc_stats.ppp_ibytes += len;

        if (sc->sc_flags & SC_LOG_RAWIN)
                pppdumpframe(sc,m,0);

        ppppktin(sc, m, 0);
        splx(s);
        return;
bail:
        m_freem(m);
        splx(s);
}

/*
 * FCS lookup table as calculated by genfcstab.
 */
static const uint16_t fcstab[256] = {
        0x0000,        0x1189,        0x2312,        0x329b,        0x4624,        0x57ad,        0x6536,        0x74bf,
        0x8c48,        0x9dc1,        0xaf5a,        0xbed3,        0xca6c,        0xdbe5,        0xe97e,        0xf8f7,
        0x1081,        0x0108,        0x3393,        0x221a,        0x56a5,        0x472c,        0x75b7,        0x643e,
        0x9cc9,        0x8d40,        0xbfdb,        0xae52,        0xdaed,        0xcb64,        0xf9ff,        0xe876,
        0x2102,        0x308b,        0x0210,        0x1399,        0x6726,        0x76af,        0x4434,        0x55bd,
        0xad4a,        0xbcc3,        0x8e58,        0x9fd1,        0xeb6e,        0xfae7,        0xc87c,        0xd9f5,
        0x3183,        0x200a,        0x1291,        0x0318,        0x77a7,        0x662e,        0x54b5,        0x453c,
        0xbdcb,        0xac42,        0x9ed9,        0x8f50,        0xfbef,        0xea66,        0xd8fd,        0xc974,
        0x4204,        0x538d,        0x6116,        0x709f,        0x0420,        0x15a9,        0x2732,        0x36bb,
        0xce4c,        0xdfc5,        0xed5e,        0xfcd7,        0x8868,        0x99e1,        0xab7a,        0xbaf3,
        0x5285,        0x430c,        0x7197,        0x601e,        0x14a1,        0x0528,        0x37b3,        0x263a,
        0xdecd,        0xcf44,        0xfddf,        0xec56,        0x98e9,        0x8960,        0xbbfb,        0xaa72,
        0x6306,        0x728f,        0x4014,        0x519d,        0x2522,        0x34ab,        0x0630,        0x17b9,
        0xef4e,        0xfec7,        0xcc5c,        0xddd5,        0xa96a,        0xb8e3,        0x8a78,        0x9bf1,
        0x7387,        0x620e,        0x5095,        0x411c,        0x35a3,        0x242a,        0x16b1,        0x0738,
        0xffcf,        0xee46,        0xdcdd,        0xcd54,        0xb9eb,        0xa862,        0x9af9,        0x8b70,
        0x8408,        0x9581,        0xa71a,        0xb693,        0xc22c,        0xd3a5,        0xe13e,        0xf0b7,
        0x0840,        0x19c9,        0x2b52,        0x3adb,        0x4e64,        0x5fed,        0x6d76,        0x7cff,
        0x9489,        0x8500,        0xb79b,        0xa612,        0xd2ad,        0xc324,        0xf1bf,        0xe036,
        0x18c1,        0x0948,        0x3bd3,        0x2a5a,        0x5ee5,        0x4f6c,        0x7df7,        0x6c7e,
        0xa50a,        0xb483,        0x8618,        0x9791,        0xe32e,        0xf2a7,        0xc03c,        0xd1b5,
        0x2942,        0x38cb,        0x0a50,        0x1bd9,        0x6f66,        0x7eef,        0x4c74,        0x5dfd,
        0xb58b,        0xa402,        0x9699,        0x8710,        0xf3af,        0xe226,        0xd0bd,        0xc134,
        0x39c3,        0x284a,        0x1ad1,        0x0b58,        0x7fe7,        0x6e6e,        0x5cf5,        0x4d7c,
        0xc60c,        0xd785,        0xe51e,        0xf497,        0x8028,        0x91a1,        0xa33a,        0xb2b3,
        0x4a44,        0x5bcd,        0x6956,        0x78df,        0x0c60,        0x1de9,        0x2f72,        0x3efb,
        0xd68d,        0xc704,        0xf59f,        0xe416,        0x90a9,        0x8120,        0xb3bb,        0xa232,
        0x5ac5,        0x4b4c,        0x79d7,        0x685e,        0x1ce1,        0x0d68,        0x3ff3,        0x2e7a,
        0xe70e,        0xf687,        0xc41c,        0xd595,        0xa12a,        0xb0a3,        0x8238,        0x93b1,
        0x6b46,        0x7acf,        0x4854,        0x59dd,        0x2d62,        0x3ceb,        0x0e70,        0x1ff9,
        0xf78f,        0xe606,        0xd49d,        0xc514,        0xb1ab,        0xa022,        0x92b9,        0x8330,
        0x7bc7,        0x6a4e,        0x58d5,        0x495c,        0x3de3,        0x2c6a,        0x1ef1,        0x0f78
};

/*
 * Calculate a new FCS given the current FCS and the new data.
 */
static uint16_t
pppfcs(uint16_t fcs, const uint8_t *cp, int len)
{
    while (len--)
        fcs = PPP_FCS(fcs, *cp++);
    return (fcs);
}

/* This gets called at splsoftnet from pppasyncstart at various times
 * when there is data ready to be sent.
 */
static void
pppsyncstart(struct ppp_softc *sc)
{
        struct tty *tp = (struct tty *) sc->sc_devp;
        struct mbuf *m, *n;
        const struct cdevsw *cdev;
        int len;

        for(m = sc->sc_outm;;) {
                if (m == NULL) {
                        m = ppp_dequeue(sc);        /* get new packet */
                        if (m == NULL)
                                break;                /* no more packets */
                        if (sc->sc_flags & SC_DEBUG)
                                pppdumpframe(sc,m,1);
                }
                for(n=m,len=0;n!=NULL;n=n->m_next)
                        len += n->m_len;

                /* call device driver IOCTL to transmit a frame */
                cdev = cdevsw_lookup(tp->t_dev);
                if (cdev == NULL ||
                    (*cdev->d_ioctl)(tp->t_dev, TIOCXMTFRAME, (void *)&m,
                                     0, 0)) {
                        /* busy or error, set as current packet */
                        sc->sc_outm = m;
                        break;
                }
                sc->sc_outm = m = NULL;
                sc->sc_stats.ppp_obytes += len;
        }
}

/*
 * This gets called at splsoftnet from if_ppp.c at various times
 * when there is data ready to be sent.
 */
static void
pppasyncstart(struct ppp_softc *sc)
{
    struct tty *tp = (struct tty *) sc->sc_devp;
    struct mbuf *m;
    int len;
    u_char *start, *stop, *cp;
    int n, ndone, done, idle;
    struct mbuf *m2;

    if (sc->sc_flags & SC_SYNC){
        pppsyncstart(sc);
        return;
    }

    mutex_spin_enter(&tty_lock);

    idle = 0;
    while (CCOUNT(&tp->t_outq) < PPP_HIWAT) {
        /*
         * See if we have an existing packet partly sent.
         * If not, get a new packet and start sending it.
         */
        m = sc->sc_outm;
        if (m == NULL) {
            /*
             * Get another packet to be sent.
             */
            m = ppp_dequeue(sc);
            if (m == NULL) {
                idle = 1;
                break;
            }

            /*
             * The extra PPP_FLAG will start up a new packet, and thus
             * will flush any accumulated garbage.  We do this whenever
             * the line may have been idle for some time.
             */
            if (CCOUNT(&tp->t_outq) == 0) {
                ++sc->sc_stats.ppp_obytes;
                (void) putc(PPP_FLAG, &tp->t_outq);
            }

            /* Calculate the FCS for the first mbuf's worth. */
            sc->sc_outfcs = pppfcs(PPP_INITFCS, mtod(m, uint8_t *), m->m_len);
        }

        for (;;) {
            start = mtod(m, u_char *);
            len = m->m_len;
            stop = start + len;
            while (len > 0) {
                /*
                 * Find out how many bytes in the string we can
                 * handle without doing something special.
                 */
                for (cp = start; cp < stop; cp++)
                    if (ESCAPE_P(*cp))
                        break;
                n = cp - start;
                if (n) {
                    /* NetBSD (0.9 or later), 4.3-Reno or similar. */
                    ndone = n - b_to_q(start, n, &tp->t_outq);
                    len -= ndone;
                    start += ndone;
                    sc->sc_stats.ppp_obytes += ndone;

                    if (ndone < n)
                        break;        /* packet doesn't fit */
                }
                /*
                 * If there are characters left in the mbuf,
                 * the first one must be special.
                 * Put it out in a different form.
                 */
                if (len) {
                    if (putc(PPP_ESCAPE, &tp->t_outq))
                        break;
                    if (putc(*start ^ PPP_TRANS, &tp->t_outq)) {
                        (void) unputc(&tp->t_outq);
                        break;
                    }
                    sc->sc_stats.ppp_obytes += 2;
                    start++;
                    len--;
                }
            }

            /*
             * If we didn't empty this mbuf, remember where we're up to.
             * If we emptied the last mbuf, try to add the FCS and closing
             * flag, and if we can't, leave sc_outm pointing to m, but with
             * m->m_len == 0, to remind us to output the FCS and flag later.
             */
            done = len == 0;
            if (done && m->m_next == NULL) {
                u_char *p, *q;
                int c;
                u_char endseq[8];

                /*
                 * We may have to escape the bytes in the FCS.
                 */
                p = endseq;
                c = ~sc->sc_outfcs & 0xFF;
                if (ESCAPE_P(c)) {
                    *p++ = PPP_ESCAPE;
                    *p++ = c ^ PPP_TRANS;
                } else
                    *p++ = c;
                c = (~sc->sc_outfcs >> 8) & 0xFF;
                if (ESCAPE_P(c)) {
                    *p++ = PPP_ESCAPE;
                    *p++ = c ^ PPP_TRANS;
                } else
                    *p++ = c;
                *p++ = PPP_FLAG;

                /*
                 * Try to output the FCS and flag.  If the bytes
                 * don't all fit, back out.
                 */
                for (q = endseq; q < p; ++q)
                    if (putc(*q, &tp->t_outq)) {
                        done = 0;
                        for (; q > endseq; --q)
                            unputc(&tp->t_outq);
                        break;
                    }
                if (done)
                    sc->sc_stats.ppp_obytes += q - endseq;
            }

            if (!done) {
                /* remember where we got to */
                m->m_data = start;
                m->m_len = len;
                break;
            }

            /* Finished with this mbuf; free it and move on. */
            m = m2 = m_free(m);
            if (m == NULL) {
                /* Finished a packet */
                break;
            }
            sc->sc_outfcs = pppfcs(sc->sc_outfcs, mtod(m, uint8_t *), m->m_len);
        }

        /*
         * If m == NULL, we have finished a packet.
         * If m != NULL, we've either done as much work this time
         * as we need to, or else we've filled up the output queue.
         */
        sc->sc_outm = m;
        if (m)
            break;
    }

    /* Call pppstart to start output again if necessary. */
    pppstart(tp);

    /*
     * This timeout is needed for operation on a pseudo-tty,
     * because the pty code doesn't call pppstart after it has
     * drained the t_outq.
     */
    if (!idle && (sc->sc_flags & SC_TIMEOUT) == 0) {
        callout_reset(&sc->sc_timo_ch, 1, ppp_timeout, sc);
        sc->sc_flags |= SC_TIMEOUT;
    }

    mutex_spin_exit(&tty_lock);
}

/*
 * This gets called when a received packet is placed on
 * the inq, at splsoftnet.
 */
static void
pppasyncctlp(struct ppp_softc *sc)
{
    struct tty *tp;

    /* Put a placeholder byte in canq for ttselect()/ttnread(). */
    mutex_spin_enter(&tty_lock);
    tp = (struct tty *) sc->sc_devp;
    putc(0, &tp->t_canq);
    ttwakeup(tp);
    mutex_spin_exit(&tty_lock);
}

/*
 * Start output on async tty interface.  If the transmit queue
 * has drained sufficiently, arrange for pppasyncstart to be
 * called later at splsoftnet.
 * Called at spltty or higher.
 */
static int
pppstart(struct tty *tp)
{
    struct ppp_softc *sc = (struct ppp_softc *) tp->t_sc;

    /*
     * If there is stuff in the output queue, send it now.
     * We are being called in lieu of ttstart and must do what it would.
     */
    if (tp->t_oproc != NULL)
        (*tp->t_oproc)(tp);

    /*
     * If the transmit queue has drained and the tty has not hung up
     * or been disconnected from the ppp unit, then tell if_ppp.c that
     * we need more output.
     */
    if ((CCOUNT(&tp->t_outq) >= PPP_LOWAT)
        && ((sc == NULL) || (sc->sc_flags & SC_TIMEOUT)))
        return 0;
#ifdef ALTQ
    /*
     * if ALTQ is enabled, don't invoke NETISR_PPP.
     * pppintr() could loop without doing anything useful
     * under rate-limiting.
     */
    if (ALTQ_IS_ENABLED(&sc->sc_if.if_snd))
        return 0;
#endif
    if (!((tp->t_state & TS_CARR_ON) == 0 && (tp->t_cflag & CLOCAL) == 0)
        && sc != NULL && tp == (struct tty *) sc->sc_devp) {
        ppp_restart(sc);
    }

    return 0;
}

/*
 * Timeout routine - try to start some more output.
 */
static void
ppp_timeout(void *x)
{
    struct ppp_softc *sc = (struct ppp_softc *) x;
    struct tty *tp = (struct tty *) sc->sc_devp;

    mutex_spin_enter(&tty_lock);
    sc->sc_flags &= ~SC_TIMEOUT;
    pppstart(tp);
    mutex_spin_exit(&tty_lock);
}

/*
 * Allocate enough mbuf to handle current MRU.
 */
static void
pppgetm(struct ppp_softc *sc)
{
    struct mbuf *m, **mp;
    int len;

    mp = &sc->sc_m;
    for (len = sc->sc_mru + PPP_HDRLEN + PPP_FCSLEN; len > 0; ){
        if ((m = *mp) == NULL) {
            MGETHDR(m, M_DONTWAIT, MT_DATA);
            if (m == NULL)
                break;
            *mp = m;
            MCLGET(m, M_DONTWAIT);
        }
        len -= M_BUFSIZE(m);
        mp = &m->m_next;
    }
}

/*
 * tty interface receiver interrupt.
 */
static const unsigned paritytab[8] = {
    0x96696996, 0x69969669, 0x69969669, 0x96696996,
    0x69969669, 0x96696996, 0x96696996, 0x69969669
};

static int
pppinput(int c, struct tty *tp)
{
    struct ppp_softc *sc;
    struct mbuf *m;
    int ilen, s;
    int result;

    sc = (struct ppp_softc *) tp->t_sc;
    if (sc == NULL || tp != (struct tty *) sc->sc_devp)
        return 0;

    ++tk_nin;
    ++sc->sc_stats.ppp_ibytes;

    if (c & TTY_FE) {
        /* framing error or overrun on this char - abort packet */
        if (sc->sc_flags & SC_DEBUG)
            printf("%s: bad char %x\n", sc->sc_if.if_xname, c);
        goto flush;
    }

    c &= 0xff;

    /*
     * Handle software flow control of output.
     */
    result = tty_try_xonxoff(tp, c);
    if (result == 0) {
            /* Character was recognized and consumed. */
            return 0;
    }
    /* Character wasn't consumed, continue processing it. */

    s = spltty();
    if (c & 0x80)
        sc->sc_flags |= SC_RCV_B7_1;
    else
        sc->sc_flags |= SC_RCV_B7_0;
    if (paritytab[c >> 5] & (1U << (c & 0x1F)))
        sc->sc_flags |= SC_RCV_ODDP;
    else
        sc->sc_flags |= SC_RCV_EVNP;
    splx(s);

    ppplogchar(sc, c);

    if (c == PPP_FLAG) {
        ilen = sc->sc_ilen;
        sc->sc_ilen = 0;

        if ((sc->sc_flags & SC_LOG_RAWIN) && sc->sc_rawin.count > 0)
            ppplogchar(sc, -1);

        /*
         * If SC_ESCAPED is set, then we've seen the packet
         * abort sequence "}~".
         */
        if (sc->sc_flags & (SC_FLUSH | SC_ESCAPED)
            || (ilen > 0 && sc->sc_fcs != PPP_GOODFCS)) {
            s = spltty();
            sc->sc_flags |= SC_PKTLOST;        /* note the dropped packet */
            if ((sc->sc_flags & (SC_FLUSH | SC_ESCAPED)) == 0){
                if (sc->sc_flags & SC_DEBUG)
                    printf("%s: bad fcs %x\n", sc->sc_if.if_xname,
                        sc->sc_fcs);
                if_statinc(&sc->sc_if, if_ierrors);
                sc->sc_stats.ppp_ierrors++;
            } else
                sc->sc_flags &= ~(SC_FLUSH | SC_ESCAPED);
            splx(s);
            return 0;
        }

        if (ilen < PPP_HDRLEN + PPP_FCSLEN) {
            if (ilen) {
                if (sc->sc_flags & SC_DEBUG)
                    printf("%s: too short (%d)\n", sc->sc_if.if_xname, ilen);
                s = spltty();
                if_statinc(&sc->sc_if, if_ierrors);
                sc->sc_stats.ppp_ierrors++;
                sc->sc_flags |= SC_PKTLOST;
                splx(s);
            }
            return 0;
        }

        /*
         * Remove FCS trailer.  Somewhat painful...
         */
        ilen -= 2;
        if (--sc->sc_mc->m_len == 0) {
            for (m = sc->sc_m; m->m_next != sc->sc_mc; m = m->m_next)
                ;
            sc->sc_mc = m;
        }
        sc->sc_mc->m_len--;

        /* excise this mbuf chain */
        m = sc->sc_m;
        sc->sc_m = sc->sc_mc->m_next;
        sc->sc_mc->m_next = NULL;

        ppppktin(sc, m, sc->sc_flags & SC_PKTLOST);
        if (sc->sc_flags & SC_PKTLOST) {
            s = spltty();
            sc->sc_flags &= ~SC_PKTLOST;
            splx(s);
        }

        pppgetm(sc);
        return 0;
    }

    if (sc->sc_flags & SC_FLUSH) {
        if (sc->sc_flags & SC_LOG_FLUSH)
            ppplogchar(sc, c);
        return 0;
    }

    if (c < 0x20 && (sc->sc_rasyncmap & (1U << c)))
        return 0;

    s = spltty();
    if (sc->sc_flags & SC_ESCAPED) {
        sc->sc_flags &= ~SC_ESCAPED;
        c ^= PPP_TRANS;
    } else if (c == PPP_ESCAPE) {
        sc->sc_flags |= SC_ESCAPED;
        splx(s);
        return 0;
    }
    splx(s);

    /*
     * Initialize buffer on first octet received.
     * First octet could be address or protocol (when compressing
     * address/control).
     * Second octet is control.
     * Third octet is first or second (when compressing protocol)
     * octet of protocol.
     * Fourth octet is second octet of protocol.
     */
    if (sc->sc_ilen == 0) {
        /* reset the first input mbuf */
        if (sc->sc_m == NULL) {
            pppgetm(sc);
            if (sc->sc_m == NULL) {
                if (sc->sc_flags & SC_DEBUG)
                    printf("%s: no input mbufs!\n", sc->sc_if.if_xname);
                goto flush;
            }
        }
        m = sc->sc_m;
        m->m_len = 0;
        MRESETDATA(m);
        sc->sc_mc = m;
        sc->sc_mp = mtod(m, char *);
        sc->sc_fcs = PPP_INITFCS;
        if (c != PPP_ALLSTATIONS) {
            if (sc->sc_flags & SC_REJ_COMP_AC) {
                if (sc->sc_flags & SC_DEBUG)
                    printf("%s: garbage received: 0x%x (need 0xFF)\n",
                    sc->sc_if.if_xname, c);
                goto flush;
            }
            *sc->sc_mp++ = PPP_ALLSTATIONS;
            *sc->sc_mp++ = PPP_UI;
            sc->sc_ilen += 2;
            m->m_len += 2;
        }
    }
    if (sc->sc_ilen == 1 && c != PPP_UI) {
        if (sc->sc_flags & SC_DEBUG)
            printf("%s: missing UI (0x3), got 0x%x\n",
                sc->sc_if.if_xname, c);
        goto flush;
    }
    if (sc->sc_ilen == 2 && (c & 1) == 1) {
        /* a compressed protocol */
        *sc->sc_mp++ = 0;
        sc->sc_ilen++;
        sc->sc_mc->m_len++;
    }
    if (sc->sc_ilen == 3 && (c & 1) == 0) {
        if (sc->sc_flags & SC_DEBUG)
            printf("%s: bad protocol %x\n", sc->sc_if.if_xname,
                (sc->sc_mp[-1] << 8) + c);
        goto flush;
    }

    /* packet beyond configured mru? */
    if (++sc->sc_ilen > sc->sc_mru + PPP_HDRLEN + PPP_FCSLEN) {
        if (sc->sc_flags & SC_DEBUG)
            printf("%s: packet too big\n", sc->sc_if.if_xname);
        goto flush;
    }

    /* is this mbuf full? */
    m = sc->sc_mc;
    if (M_TRAILINGSPACE(m) <= 0) {
        if (m->m_next == NULL) {
            pppgetm(sc);
            if (m->m_next == NULL) {
                if (sc->sc_flags & SC_DEBUG)
                    printf("%s: too few input mbufs!\n", sc->sc_if.if_xname);
                goto flush;
            }
        }
        sc->sc_mc = m = m->m_next;
        m->m_len = 0;
        MRESETDATA(m);
        sc->sc_mp = mtod(m, char *);
    }

    ++m->m_len;
    *sc->sc_mp++ = c;
    sc->sc_fcs = PPP_FCS(sc->sc_fcs, c);
    return 0;

 flush:
    if (!(sc->sc_flags & SC_FLUSH)) {
        s = spltty();
        if_statinc(&sc->sc_if, if_ierrors);
        sc->sc_stats.ppp_ierrors++;
        sc->sc_flags |= SC_FLUSH;
        splx(s);
        if (sc->sc_flags & SC_LOG_FLUSH)
            ppplogchar(sc, c);
    }
    return 0;
}

#define MAX_DUMP_BYTES        128

static void
ppplogchar(struct ppp_softc *sc, int c)
{
    if (c >= 0) {
        sc->sc_rawin.buf[sc->sc_rawin_start++] = c;
        if (sc->sc_rawin.count < sizeof(sc->sc_rawin.buf))
            sc->sc_rawin.count++;
    }
    if (sc->sc_rawin_start >= sizeof(sc->sc_rawin.buf)
        || (c < 0 && sc->sc_rawin_start > 0)) {
        if (sc->sc_flags & (SC_LOG_FLUSH|SC_LOG_RAWIN)) {
            printf("%s input: ", sc->sc_if.if_xname);
            pppdumpb(sc->sc_rawin.buf, sc->sc_rawin_start);
        }
        if (c < 0)
            sc->sc_rawin.count = 0;
        sc->sc_rawin_start = 0;
    }
}

static void
pppdumpb(u_char *b, int l)
{
    char bf[3*MAX_DUMP_BYTES+4];
    char *bp = bf;

    while (l--) {
        if (bp >= bf + sizeof(bf) - 3) {
            *bp++ = '>';
            break;
        }
        *bp++ = hexdigits[*b >> 4]; /* convert byte to ascii hex */
        *bp++ = hexdigits[*b++ & 0xf];
        *bp++ = ' ';
    }

    *bp = 0;
    printf("%s\n", bf);
}

static void
pppdumpframe(struct ppp_softc *sc, struct mbuf *m, int xmit)
{
        int i,lcount,copycount,count;
        char lbuf[16];
        char *data;

        if (m == NULL)
                return;

        for(count=m->m_len,data=mtod(m,char*);m != NULL;) {
                /* build a line of output */
                for(lcount=0;lcount < sizeof(lbuf);lcount += copycount) {
                        if (!count) {
                                m = m->m_next;
                                if (m == NULL)
                                        break;
                                count = m->m_len;
                                data  = mtod(m,char*);
                        }
                        copycount = (count > sizeof(lbuf)-lcount) ?
                                        sizeof(lbuf)-lcount : count;
                        bcopy(data,&lbuf[lcount],copycount);
                        data  += copycount;
                        count -= copycount;
                }

                /* output line (hex 1st, then ascii) */
                printf("%s %s:", sc->sc_if.if_xname,
                    xmit ? "output" : "input ");
                for(i=0;i<lcount;i++)
                        printf("%02x ",(u_char)lbuf[i]);
                for(;i<sizeof(lbuf);i++)
                        printf("   ");
                for(i=0;i<lcount;i++)
                        printf("%c",(lbuf[i] >= 040 &&
                            lbuf[i] <= 0176) ? lbuf[i] : '.');
                printf("\n");
        }
}





























































































































  513 
  515 




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
/*        $NetBSD: sys_mqueue.c,v 1.48 2020/05/23 23:42:43 ad Exp $        */

/*
 * Copyright (c) 2007-2011 Mindaugas Rasiukevicius <rmind at NetBSD org>
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Implementation of POSIX message queues.
 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001.
 *
 * Locking
 *
 * Global list of message queues (mqueue_head) is protected by mqlist_lock.
 * Each message queue and its members are protected by mqueue::mq_mtx.
 * Note that proc_t::p_mqueue_cnt is updated atomically.
 *
 * Lock order:
 *
 *        mqlist_lock ->
 *                mqueue::mq_mtx
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_mqueue.c,v 1.48 2020/05/23 23:42:43 ad Exp $");

#include <sys/param.h>
#include <sys/types.h>
#include <sys/atomic.h>

#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/kauth.h>
#include <sys/lwp.h>
#include <sys/mqueue.h>
#include <sys/module.h>
#include <sys/poll.h>
#include <sys/select.h>
#include <sys/signal.h>
#include <sys/signalvar.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/syscall.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>

#include <miscfs/genfs/genfs.h>

MODULE(MODULE_CLASS_MISC, mqueue, NULL);

/* System-wide limits. */
static u_int                        mq_open_max = MQ_OPEN_MAX;
static u_int                        mq_prio_max = MQ_PRIO_MAX;
static u_int                        mq_max_msgsize = 16 * MQ_DEF_MSGSIZE;
static u_int                        mq_def_maxmsg = 32;
static u_int                        mq_max_maxmsg = 16 * 32;

static pool_cache_t                mqmsg_cache        __read_mostly;
static kmutex_t                        mqlist_lock        __cacheline_aligned;
static LIST_HEAD(, mqueue)        mqueue_head        __cacheline_aligned;

static kauth_listener_t                mq_listener;

static int        mqueue_sysinit(void);
static int        mqueue_sysfini(bool);
static int        mq_poll_fop(file_t *, int);
static int        mq_stat_fop(file_t *, struct stat *);
static int        mq_close_fop(file_t *);

static const struct fileops mqops = {
        .fo_name = "mq",
        .fo_read = fbadop_read,
        .fo_write = fbadop_write,
        .fo_ioctl = fbadop_ioctl,
        .fo_fcntl = fnullop_fcntl,
        .fo_poll = mq_poll_fop,
        .fo_stat = mq_stat_fop,
        .fo_close = mq_close_fop,
        .fo_kqfilter = fnullop_kqfilter,
        .fo_restart = fnullop_restart,
};

static const struct syscall_package mqueue_syscalls[] = {
        { SYS_mq_open, 0, (sy_call_t *)sys_mq_open },
        { SYS_mq_close, 0, (sy_call_t *)sys_mq_close },
        { SYS_mq_unlink, 0, (sy_call_t *)sys_mq_unlink },
        { SYS_mq_getattr, 0, (sy_call_t *)sys_mq_getattr },
        { SYS_mq_setattr, 0, (sy_call_t *)sys_mq_setattr },
        { SYS_mq_notify, 0, (sy_call_t *)sys_mq_notify },
        { SYS_mq_send, 0, (sy_call_t *)sys_mq_send },
        { SYS_mq_receive, 0, (sy_call_t *)sys_mq_receive },
        { SYS___mq_timedsend50, 0, (sy_call_t *)sys___mq_timedsend50 },
        { SYS___mq_timedreceive50, 0, (sy_call_t *)sys___mq_timedreceive50 },
        { 0, 0, NULL }
};

static int
mq_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        mqueue_t *mq;
        int result;

        if (action != KAUTH_SYSTEM_MQUEUE)
                return KAUTH_RESULT_DEFER;

        result = KAUTH_RESULT_DEFER;

        mq = arg1;

        if (kauth_cred_geteuid(cred) == mq->mq_euid)
                result = KAUTH_RESULT_ALLOW;

        return result;
}

/*
 * Initialisation and unloading of POSIX message queue subsystem.
 */

static int
mqueue_sysinit(void)
{
        int error;

        mqmsg_cache = pool_cache_init(MQ_DEF_MSGSIZE, coherency_unit,
            0, 0, "mqmsgpl", NULL, IPL_NONE, NULL, NULL, NULL);
        mutex_init(&mqlist_lock, MUTEX_DEFAULT, IPL_NONE);
        LIST_INIT(&mqueue_head);

        error = syscall_establish(NULL, mqueue_syscalls);
        mq_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
            mq_listener_cb, NULL);
        return error;
}

static int
mqueue_sysfini(bool interface)
{

        if (interface) {
                int error;
                bool inuse;

                /* Stop syscall activity. */
                error = syscall_disestablish(NULL, mqueue_syscalls);
                if (error)
                        return error;
                /* Check if there are any message queues in use. */
                mutex_enter(&mqlist_lock);
                inuse = !LIST_EMPTY(&mqueue_head);
                mutex_exit(&mqlist_lock);
                if (inuse) {
                        error = syscall_establish(NULL, mqueue_syscalls);
                        KASSERT(error == 0);
                        return EBUSY;
                }
        }

        kauth_unlisten_scope(mq_listener);

        mutex_destroy(&mqlist_lock);
        pool_cache_destroy(mqmsg_cache);
        return 0;
}

/*
 * Module interface.
 */
static int
mqueue_modcmd(modcmd_t cmd, void *arg)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                return mqueue_sysinit();
        case MODULE_CMD_FINI:
                return mqueue_sysfini(true);
        default:
                return ENOTTY;
        }
}

/*
 * Free the message.
 */
static void
mqueue_freemsg(struct mq_msg *msg, const size_t size)
{

        if (size > MQ_DEF_MSGSIZE) {
                kmem_free(msg, size);
        } else {
                pool_cache_put(mqmsg_cache, msg);
        }
}

/*
 * Destroy the message queue.
 */
static void
mqueue_destroy(struct mqueue *mq)
{
        struct mq_msg *msg;
        size_t msz;
        u_int i;

        /* Note MQ_PQSIZE + 1. */
        for (i = 0; i <= MQ_PQSIZE; i++) {
                while ((msg = TAILQ_FIRST(&mq->mq_head[i])) != NULL) {
                        TAILQ_REMOVE(&mq->mq_head[i], msg, msg_queue);
                        msz = sizeof(struct mq_msg) + msg->msg_len;
                        mqueue_freemsg(msg, msz);
                }
        }
        if (mq->mq_name) {
                kmem_free(mq->mq_name, MQ_NAMELEN);
        }
        seldestroy(&mq->mq_rsel);
        seldestroy(&mq->mq_wsel);
        cv_destroy(&mq->mq_send_cv);
        cv_destroy(&mq->mq_recv_cv);
        mutex_destroy(&mq->mq_mtx);
        kmem_free(mq, sizeof(struct mqueue));
}

/*
 * mqueue_lookup: lookup for file name in general list of message queues.
 *
 * => locks the message queue on success
 */
static mqueue_t *
mqueue_lookup(const char *name)
{
        mqueue_t *mq;

        KASSERT(mutex_owned(&mqlist_lock));

        LIST_FOREACH(mq, &mqueue_head, mq_list) {
                if (strncmp(mq->mq_name, name, MQ_NAMELEN) == 0) {
                        mutex_enter(&mq->mq_mtx);
                        return mq;
                }
        }
        return NULL;
}

/*
 * mqueue_get: get the mqueue from the descriptor.
 *
 * => locks the message queue, if found.
 * => holds a reference on the file descriptor.
 */
int
mqueue_get(mqd_t mqd, int fflag, mqueue_t **mqret)
{
        const int fd = (int)mqd;
        mqueue_t *mq;
        file_t *fp;

        fp = fd_getfile(fd);
        if (__predict_false(fp == NULL)) {
                return EBADF;
        }
        if (__predict_false(fp->f_type != DTYPE_MQUEUE)) {
                fd_putfile(fd);
                return EBADF;
        }
        if (fflag && (fp->f_flag & fflag) == 0) {
                fd_putfile(fd);
                return EBADF;
        }
        mq = fp->f_mqueue;
        mutex_enter(&mq->mq_mtx);

        *mqret = mq;
        return 0;
}

/*
 * mqueue_linear_insert: perform linear insert according to the message
 * priority into the reserved queue (MQ_PQRESQ).  Reserved queue is a
 * sorted list used only when mq_prio_max is increased via sysctl.
 */
static inline void
mqueue_linear_insert(struct mqueue *mq, struct mq_msg *msg)
{
        struct mq_msg *mit;

        TAILQ_FOREACH(mit, &mq->mq_head[MQ_PQRESQ], msg_queue) {
                if (msg->msg_prio > mit->msg_prio)
                        break;
        }
        if (mit == NULL) {
                TAILQ_INSERT_TAIL(&mq->mq_head[MQ_PQRESQ], msg, msg_queue);
        } else {
                TAILQ_INSERT_BEFORE(mit, msg, msg_queue);
        }
}

static int
mq_stat_fop(file_t *fp, struct stat *st)
{
        struct mqueue *mq = fp->f_mqueue;

        memset(st, 0, sizeof(*st));

        mutex_enter(&mq->mq_mtx);
        st->st_mode = mq->mq_mode;
        st->st_uid = mq->mq_euid;
        st->st_gid = mq->mq_egid;
        st->st_atimespec = mq->mq_atime;
        st->st_mtimespec = mq->mq_mtime;
        st->st_ctimespec = st->st_birthtimespec = mq->mq_btime;
        st->st_uid = kauth_cred_geteuid(fp->f_cred);
        st->st_gid = kauth_cred_getegid(fp->f_cred);
        mutex_exit(&mq->mq_mtx);

        return 0;
}

static int
mq_poll_fop(file_t *fp, int events)
{
        struct mqueue *mq = fp->f_mqueue;
        struct mq_attr *mqattr;
        int revents = 0;

        mutex_enter(&mq->mq_mtx);
        mqattr = &mq->mq_attrib;
        if (events & (POLLIN | POLLRDNORM)) {
                /* Ready for receiving, if there are messages in the queue. */
                if (mqattr->mq_curmsgs)
                        revents |= events & (POLLIN | POLLRDNORM);
                else
                        selrecord(curlwp, &mq->mq_rsel);
        }
        if (events & (POLLOUT | POLLWRNORM)) {
                /* Ready for sending, if the message queue is not full. */
                if (mqattr->mq_curmsgs < mqattr->mq_maxmsg)
                        revents |= events & (POLLOUT | POLLWRNORM);
                else
                        selrecord(curlwp, &mq->mq_wsel);
        }
        mutex_exit(&mq->mq_mtx);

        return revents;
}

static int
mq_close_fop(file_t *fp)
{
        proc_t *p = curproc;
        mqueue_t *mq = fp->f_mqueue;
        bool destroy = false;

        mutex_enter(&mq->mq_mtx);
        KASSERT(mq->mq_refcnt > 0);
        if (--mq->mq_refcnt == 0) {
                /* Destroy if the last reference and unlinked. */
                destroy = (mq->mq_attrib.mq_flags & MQ_UNLINKED) != 0;
        }
        mutex_exit(&mq->mq_mtx);

        if (destroy) {
                mqueue_destroy(mq);
        }
        atomic_dec_uint(&p->p_mqueue_cnt);
        return 0;
}

static int
mqueue_access(mqueue_t *mq, int access, kauth_cred_t cred)
{
        accmode_t accmode = 0;

        /* Note the difference between VREAD/VWRITE and FREAD/FWRITE. */
        if (access & FREAD) {
                accmode |= VREAD;
        }
        if (access & FWRITE) {
                accmode |= VWRITE;
        }
        if (genfs_can_access(NULL, cred, mq->mq_euid, mq->mq_egid,
            mq->mq_mode, NULL, accmode)) {
                return EACCES;
        }
        return 0;
}

static int
mqueue_create(lwp_t *l, char *name, struct mq_attr *attr, mode_t mode,
    int oflag, mqueue_t **mqret)
{
        proc_t *p = l->l_proc;
        struct cwdinfo *cwdi = p->p_cwdi;
        mqueue_t *mq;
        u_int i;

        /* Empty name is invalid. */
        if (name[0] == '\0') {
                return EINVAL;
        }

        /* Check for mqueue attributes. */
        if (attr) {
                if (attr->mq_maxmsg <= 0 || attr->mq_maxmsg > mq_max_maxmsg ||
                    attr->mq_msgsize <= 0 ||
                    attr->mq_msgsize > mq_max_msgsize) {
                        return EINVAL;
                }
                attr->mq_curmsgs = 0;
        }

        /*
         * Allocate new message queue, initialize data structures, copy the
         * name attributes.  Note that the initial reference is set here.
         */
        mq = kmem_zalloc(sizeof(mqueue_t), KM_SLEEP);

        mutex_init(&mq->mq_mtx, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&mq->mq_send_cv, "mqsendcv");
        cv_init(&mq->mq_recv_cv, "mqrecvcv");
        for (i = 0; i < (MQ_PQSIZE + 1); i++) {
                TAILQ_INIT(&mq->mq_head[i]);
        }
        selinit(&mq->mq_rsel);
        selinit(&mq->mq_wsel);
        mq->mq_name = name;
        mq->mq_refcnt = 1;

        if (attr != NULL) {
                memcpy(&mq->mq_attrib, attr, sizeof(struct mq_attr));
        } else {
                memset(&mq->mq_attrib, 0, sizeof(struct mq_attr));
                mq->mq_attrib.mq_maxmsg = mq_def_maxmsg;
                mq->mq_attrib.mq_msgsize = MQ_DEF_MSGSIZE - sizeof(struct mq_msg);
        }

        CTASSERT((O_MASK & (MQ_UNLINKED | MQ_RECEIVE)) == 0);
        mq->mq_attrib.mq_flags = (O_MASK & oflag);

        /* Store mode and effective UID with GID. */
        mq->mq_mode = ((mode & ~cwdi->cwdi_cmask) & ALLPERMS) & ~S_ISTXT;
        mq->mq_euid = kauth_cred_geteuid(l->l_cred);
        mq->mq_egid = kauth_cred_getegid(l->l_cred);

        *mqret = mq;
        return 0;
}

/*
 * Helper function for mq_open() - note that "u_name" is a userland pointer,
 * while "attr" is a kernel pointer!
 */
int
mq_handle_open(struct lwp *l, const char *u_name, int oflag, mode_t mode,
    struct mq_attr *attr, register_t *retval)
{
        struct proc *p = l->l_proc;
        struct mqueue *mq, *mq_new = NULL;
        int mqd, error;
        file_t *fp;
        char *name;

        /* Get the name from the user-space. */
        name = kmem_alloc(MQ_NAMELEN, KM_SLEEP);
        error = copyinstr(u_name, name, MQ_NAMELEN - 1, NULL);
        if (error) {
                kmem_free(name, MQ_NAMELEN);
                return error;
        }

        /* Allocate file structure and descriptor. */
        error = fd_allocfile(&fp, &mqd);
        if (error) {
                kmem_free(name, MQ_NAMELEN);
                return error;
        }

        /* Account and check for the limit. */
        if (atomic_inc_uint_nv(&p->p_mqueue_cnt) > mq_open_max) {
                atomic_dec_uint(&p->p_mqueue_cnt);
                error = EMFILE;
                goto err;
        }

        fp->f_type = DTYPE_MQUEUE;
        fp->f_flag = FFLAGS(oflag) & (FREAD | FWRITE);
        fp->f_ops = &mqops;

        if (oflag & O_CREAT) {
                /* Create a new message queue. */
                error = mqueue_create(l, name, attr, mode, oflag, &mq_new);
                if (error) {
                        goto err;
                }
                KASSERT(mq_new != NULL);
        }

        /* Lookup for a message queue with such name. */
        mutex_enter(&mqlist_lock);
        mq = mqueue_lookup(name);
        if (mq) {
                KASSERT(mutex_owned(&mq->mq_mtx));
                mutex_exit(&mqlist_lock);

                /* Check for exclusive create. */
                if (oflag & O_EXCL) {
                        mutex_exit(&mq->mq_mtx);
                        error = EEXIST;
                        goto err;
                }

                /* Verify permissions. */
                if (mqueue_access(mq, fp->f_flag, l->l_cred) != 0) {
                        mutex_exit(&mq->mq_mtx);
                        error = EACCES;
                        goto err;
                }

                /* If we have the access, add a new reference. */
                mq->mq_refcnt++;
                mutex_exit(&mq->mq_mtx);
        } else {
                /* Fail if not found and not creating. */
                if ((oflag & O_CREAT) == 0) {
                        mutex_exit(&mqlist_lock);
                        KASSERT(mq_new == NULL);
                        error = ENOENT;
                        goto err;
                }

                /* Initial timestamps. */
                mq = mq_new;
                getnanotime(&mq->mq_btime);
                mq->mq_atime = mq->mq_mtime = mq->mq_btime;

                /*
                 * Finally, insert message queue into the list.
                 * Note: it already has the initial reference.
                 */
                LIST_INSERT_HEAD(&mqueue_head, mq, mq_list);
                mutex_exit(&mqlist_lock);

                mq_new = NULL;
                name = NULL;
        }
        KASSERT(mq != NULL);
        fp->f_mqueue = mq;
        fd_affix(p, fp, mqd);
        *retval = mqd;
err:
        if (error) {
                fd_abort(p, fp, mqd);
        }
        if (mq_new) {
                /* Note: will free the 'name'. */
                mqueue_destroy(mq_new);
        } else if (name) {
                kmem_free(name, MQ_NAMELEN);
        }
        return error;
}

/*
 * General mqueue system calls.
 */

int
sys_mq_open(struct lwp *l, const struct sys_mq_open_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const char *) name;
                syscallarg(int) oflag;
                syscallarg(mode_t) mode;
                syscallarg(struct mq_attr) attr;
        } */
        struct mq_attr *attr = NULL, a;
        int error;

        if ((SCARG(uap, oflag) & O_EXEC) != 0)
                return EINVAL;

        if ((SCARG(uap, oflag) & O_CREAT) != 0 && SCARG(uap, attr) != NULL) {
                error = copyin(SCARG(uap, attr), &a, sizeof(a));
                if (error)
                        return error;
                attr = &a;
        }

        return mq_handle_open(l, SCARG(uap, name), SCARG(uap, oflag),
            SCARG(uap, mode), attr, retval);
}

int
sys_mq_close(struct lwp *l, const struct sys_mq_close_args *uap,
    register_t *retval)
{

        return sys_close(l, (const void *)uap, retval);
}

/*
 * Primary mq_recv1() function.
 */
int
mq_recv1(mqd_t mqdes, void *msg_ptr, size_t msg_len, u_int *msg_prio,
    struct timespec *ts, ssize_t *mlen)
{
        struct mqueue *mq;
        struct mq_msg *msg = NULL;
        struct mq_attr *mqattr;
        u_int idx;
        int error;

        error = mqueue_get(mqdes, FREAD, &mq);
        if (error) {
                return error;
        }
        getnanotime(&mq->mq_atime);
        mqattr = &mq->mq_attrib;

        /* Check the message size limits */
        if (msg_len < mqattr->mq_msgsize) {
                error = EMSGSIZE;
                goto error;
        }

        /* Check if queue is empty */
        while (mqattr->mq_curmsgs == 0) {
                int t;

                if (mqattr->mq_flags & O_NONBLOCK) {
                        error = EAGAIN;
                        goto error;
                }
                if (ts) {
                        error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, ts, &t,
                            NULL);
                        if (error)
                                goto error;
                } else
                        t = 0;
                /*
                 * Block until someone sends the message.
                 * While doing this, notification should not be sent.
                 */
                mqattr->mq_flags |= MQ_RECEIVE;
                error = cv_timedwait_sig(&mq->mq_send_cv, &mq->mq_mtx, t);
                mqattr->mq_flags &= ~MQ_RECEIVE;
                if (error || (mqattr->mq_flags & MQ_UNLINKED)) {
                        error = (error == EWOULDBLOCK) ? ETIMEDOUT : EINTR;
                        goto error;
                }
        }

        /*
         * Find the highest priority message, and remove it from the queue.
         * At first, reserved queue is checked, bitmap is next.
         */
        msg = TAILQ_FIRST(&mq->mq_head[MQ_PQRESQ]);
        if (__predict_true(msg == NULL)) {
                idx = ffs(mq->mq_bitmap);
                msg = TAILQ_FIRST(&mq->mq_head[idx]);
                KASSERT(msg != NULL);
        } else {
                idx = MQ_PQRESQ;
        }
        TAILQ_REMOVE(&mq->mq_head[idx], msg, msg_queue);

        /* Unmark the bit, if last message. */
        if (__predict_true(idx) && TAILQ_EMPTY(&mq->mq_head[idx])) {
                KASSERT((MQ_PQSIZE - idx) == msg->msg_prio);
                mq->mq_bitmap &= ~(1U << --idx);
        }

        /* Decrement the counter and signal waiter, if any */
        mqattr->mq_curmsgs--;
        cv_signal(&mq->mq_recv_cv);

        /* Ready for sending now */
        selnotify(&mq->mq_wsel, POLLOUT | POLLWRNORM, 0);
error:
        mutex_exit(&mq->mq_mtx);
        fd_putfile((int)mqdes);
        if (error)
                return error;

        /*
         * Copy the data to the user-space.
         * Note: According to POSIX, no message should be removed from the
         * queue in case of fail - this would be violated.
         */
        *mlen = msg->msg_len;
        error = copyout(msg->msg_ptr, msg_ptr, msg->msg_len);
        if (error == 0 && msg_prio)
                error = copyout(&msg->msg_prio, msg_prio, sizeof(unsigned));
        mqueue_freemsg(msg, sizeof(struct mq_msg) + msg->msg_len);

        return error;
}

int
sys_mq_receive(struct lwp *l, const struct sys_mq_receive_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(mqd_t) mqdes;
                syscallarg(char *) msg_ptr;
                syscallarg(size_t) msg_len;
                syscallarg(unsigned *) msg_prio;
        } */
        ssize_t mlen;
        int error;

        error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr),
            SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL, &mlen);
        if (error == 0)
                *retval = mlen;

        return error;
}

int
sys___mq_timedreceive50(struct lwp *l,
    const struct sys___mq_timedreceive50_args *uap, register_t *retval)
{
        /* {
                syscallarg(mqd_t) mqdes;
                syscallarg(char *) msg_ptr;
                syscallarg(size_t) msg_len;
                syscallarg(unsigned *) msg_prio;
                syscallarg(const struct timespec *) abs_timeout;
        } */
        struct timespec ts, *tsp;
        ssize_t mlen;
        int error;

        /* Get and convert time value */
        if (SCARG(uap, abs_timeout)) {
                error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts));
                if (error)
                        return error;
                tsp = &ts;
        } else {
                tsp = NULL;
        }

        error = mq_recv1(SCARG(uap, mqdes), SCARG(uap, msg_ptr),
            SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp, &mlen);
        if (error == 0)
                *retval = mlen;

        return error;
}

/*
 * Primary mq_send1() function.
 */
int
mq_send1(mqd_t mqdes, const char *msg_ptr, size_t msg_len, u_int msg_prio,
    struct timespec *ts)
{
        struct mqueue *mq;
        struct mq_msg *msg;
        struct mq_attr *mqattr;
        struct proc *notify = NULL;
        ksiginfo_t ksi;
        size_t size;
        int error;

        /* Check the priority range */
        if (msg_prio >= mq_prio_max)
                return EINVAL;

        /* Allocate a new message */
        if (msg_len > mq_max_msgsize)
                return EMSGSIZE;
        size = sizeof(struct mq_msg) + msg_len;
        if (size > mq_max_msgsize)
                return EMSGSIZE;

        if (size > MQ_DEF_MSGSIZE) {
                msg = kmem_alloc(size, KM_SLEEP);
        } else {
                msg = pool_cache_get(mqmsg_cache, PR_WAITOK);
        }

        /* Get the data from user-space */
        error = copyin(msg_ptr, msg->msg_ptr, msg_len);
        if (error) {
                mqueue_freemsg(msg, size);
                return error;
        }
        msg->msg_len = msg_len;
        msg->msg_prio = msg_prio;

        error = mqueue_get(mqdes, FWRITE, &mq);
        if (error) {
                mqueue_freemsg(msg, size);
                return error;
        }
        getnanotime(&mq->mq_mtime);
        mqattr = &mq->mq_attrib;

        /* Check the message size limit */
        if (msg_len <= 0 || msg_len > mqattr->mq_msgsize) {
                error = EMSGSIZE;
                goto error;
        }

        /* Check if queue is full */
        while (mqattr->mq_curmsgs >= mqattr->mq_maxmsg) {
                int t;

                if (mqattr->mq_flags & O_NONBLOCK) {
                        error = EAGAIN;
                        goto error;
                }
                if (ts) {
                        error = ts2timo(CLOCK_REALTIME, TIMER_ABSTIME, ts, &t,
                            NULL);
                        if (error)
                                goto error;
                } else
                        t = 0;
                /* Block until queue becomes available */
                error = cv_timedwait_sig(&mq->mq_recv_cv, &mq->mq_mtx, t);
                if (error || (mqattr->mq_flags & MQ_UNLINKED)) {
                        error = (error == EWOULDBLOCK) ? ETIMEDOUT : error;
                        goto error;
                }
        }
        KASSERT(mqattr->mq_curmsgs < mqattr->mq_maxmsg);

        /*
         * Insert message into the queue, according to the priority.
         * Note the difference between index and priority.
         */
        if (__predict_true(msg_prio < MQ_PQSIZE)) {
                u_int idx = MQ_PQSIZE - msg_prio;

                KASSERT(idx != MQ_PQRESQ);
                TAILQ_INSERT_TAIL(&mq->mq_head[idx], msg, msg_queue);
                mq->mq_bitmap |= (1U << --idx);
        } else {
                mqueue_linear_insert(mq, msg);
        }

        /* Check for the notify */
        if (mqattr->mq_curmsgs == 0 && mq->mq_notify_proc &&
            (mqattr->mq_flags & MQ_RECEIVE) == 0 &&
            mq->mq_sig_notify.sigev_notify == SIGEV_SIGNAL) {
                /* Initialize the signal */
                KSI_INIT(&ksi);
                ksi.ksi_signo = mq->mq_sig_notify.sigev_signo;
                ksi.ksi_code = SI_MESGQ;
                ksi.ksi_value = mq->mq_sig_notify.sigev_value;
                /* Unregister the process */
                notify = mq->mq_notify_proc;
                mq->mq_notify_proc = NULL;
        }

        /* Increment the counter and signal waiter, if any */
        mqattr->mq_curmsgs++;
        cv_signal(&mq->mq_send_cv);

        /* Ready for receiving now */
        selnotify(&mq->mq_rsel, POLLIN | POLLRDNORM, 0);
error:
        mutex_exit(&mq->mq_mtx);
        fd_putfile((int)mqdes);

        if (error) {
                mqueue_freemsg(msg, size);
        } else if (notify) {
                /* Send the notify, if needed */
                mutex_enter(&proc_lock);
                kpsignal(notify, &ksi, NULL);
                mutex_exit(&proc_lock);
        }
        return error;
}

int
sys_mq_send(struct lwp *l, const struct sys_mq_send_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(mqd_t) mqdes;
                syscallarg(const char *) msg_ptr;
                syscallarg(size_t) msg_len;
                syscallarg(unsigned) msg_prio;
        } */

        return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr),
            SCARG(uap, msg_len), SCARG(uap, msg_prio), NULL);
}

int
sys___mq_timedsend50(struct lwp *l, const struct sys___mq_timedsend50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(mqd_t) mqdes;
                syscallarg(const char *) msg_ptr;
                syscallarg(size_t) msg_len;
                syscallarg(unsigned) msg_prio;
                syscallarg(const struct timespec *) abs_timeout;
        } */
        struct timespec ts, *tsp;
        int error;

        /* Get and convert time value */
        if (SCARG(uap, abs_timeout)) {
                error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts));
                if (error)
                        return error;
                tsp = &ts;
        } else {
                tsp = NULL;
        }

        return mq_send1(SCARG(uap, mqdes), SCARG(uap, msg_ptr),
            SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp);
}

int
sys_mq_notify(struct lwp *l, const struct sys_mq_notify_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(mqd_t) mqdes;
                syscallarg(const struct sigevent *) notification;
        } */
        struct mqueue *mq;
        struct sigevent sig;
        int error;

        if (SCARG(uap, notification)) {
                /* Get the signal from user-space */
                error = copyin(SCARG(uap, notification), &sig,
                    sizeof(struct sigevent));
                if (error)
                        return error;
                if (sig.sigev_notify == SIGEV_SIGNAL &&
                    (sig.sigev_signo <=0 || sig.sigev_signo >= NSIG))
                        return EINVAL;
        }

        error = mqueue_get(SCARG(uap, mqdes), 0, &mq);
        if (error) {
                return error;
        }
        if (SCARG(uap, notification)) {
                /* Register notification: set the signal and target process */
                if (mq->mq_notify_proc == NULL) {
                        memcpy(&mq->mq_sig_notify, &sig,
                            sizeof(struct sigevent));
                        mq->mq_notify_proc = l->l_proc;
                } else {
                        /* Fail if someone else already registered */
                        error = EBUSY;
                }
        } else {
                /* Unregister the notification */
                mq->mq_notify_proc = NULL;
        }
        mutex_exit(&mq->mq_mtx);
        fd_putfile((int)SCARG(uap, mqdes));

        return error;
}

int
sys_mq_getattr(struct lwp *l, const struct sys_mq_getattr_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(mqd_t) mqdes;
                syscallarg(struct mq_attr *) mqstat;
        } */
        struct mqueue *mq;
        struct mq_attr attr;
        int error;

        error = mqueue_get(SCARG(uap, mqdes), 0, &mq);
        if (error) {
                return error;
        }
        memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr));
        mutex_exit(&mq->mq_mtx);
        fd_putfile((int)SCARG(uap, mqdes));

        return copyout(&attr, SCARG(uap, mqstat), sizeof(struct mq_attr));
}

int
sys_mq_setattr(struct lwp *l, const struct sys_mq_setattr_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(mqd_t) mqdes;
                syscallarg(const struct mq_attr *) mqstat;
                syscallarg(struct mq_attr *) omqstat;
        } */
        struct mqueue *mq;
        struct mq_attr attr;
        int error, nonblock;

        error = copyin(SCARG(uap, mqstat), &attr, sizeof(struct mq_attr));
        if (error)
                return error;
        nonblock = (attr.mq_flags & O_NONBLOCK);

        error = mqueue_get(SCARG(uap, mqdes), 0, &mq);
        if (error) {
                return error;
        }

        /* Copy the old attributes, if needed */
        if (SCARG(uap, omqstat)) {
                memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr));
        }

        /* Ignore everything, except O_NONBLOCK */
        if (nonblock)
                mq->mq_attrib.mq_flags |= O_NONBLOCK;
        else
                mq->mq_attrib.mq_flags &= ~O_NONBLOCK;

        mutex_exit(&mq->mq_mtx);
        fd_putfile((int)SCARG(uap, mqdes));

        /*
         * Copy the data to the user-space.
         * Note: According to POSIX, the new attributes should not be set in
         * case of fail - this would be violated.
         */
        if (SCARG(uap, omqstat))
                error = copyout(&attr, SCARG(uap, omqstat),
                    sizeof(struct mq_attr));

        return error;
}

int
sys_mq_unlink(struct lwp *l, const struct sys_mq_unlink_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(const char *) name;
        } */
        mqueue_t *mq;
        char *name;
        int error, refcnt = 0;

        /* Get the name from the user-space */
        name = kmem_alloc(MQ_NAMELEN, KM_SLEEP);
        error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL);
        if (error) {
                kmem_free(name, MQ_NAMELEN);
                return error;
        }

        mutex_enter(&mqlist_lock);
        mq = mqueue_lookup(name);
        if (mq == NULL) {
                error = ENOENT;
                goto err;
        }
        KASSERT(mutex_owned(&mq->mq_mtx));

        /* Verify permissions. */
        if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MQUEUE, 0, mq,
            NULL, NULL)) {
                mutex_exit(&mq->mq_mtx);
                error = EACCES;
                goto err;
        }

        /* Remove and destroy if no references. */
        LIST_REMOVE(mq, mq_list);
        refcnt = mq->mq_refcnt;
        if (refcnt) {
                /* Mark as unlinked, if there are references. */
                mq->mq_attrib.mq_flags |= MQ_UNLINKED;
        }

        /* Wake up waiters, if there are any. */
        cv_broadcast(&mq->mq_send_cv);
        cv_broadcast(&mq->mq_recv_cv);

        selnotify(&mq->mq_rsel, POLLHUP, 0);
        selnotify(&mq->mq_wsel, POLLHUP, 0);

        mutex_exit(&mq->mq_mtx);
err:
        mutex_exit(&mqlist_lock);
        /*
         * If last reference - destroy the message queue.  Otherwise,
         * the last mq_close() call will do that.
         */
        if (!error && refcnt == 0) {
                mqueue_destroy(mq);
        }
        kmem_free(name, MQ_NAMELEN);

        return error;
}

/*
 * System control nodes.
 */
SYSCTL_SETUP(mqueue_sysctl_init, "mqueue systl")
{
        const struct sysctlnode *node = NULL;

        sysctl_createv(clog, 0, NULL, NULL,
                CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
                CTLTYPE_INT, "posix_msg",
                SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
                             "Message Passing option to which the "
                             "system attempts to conform"),
                NULL, _POSIX_MESSAGE_PASSING, NULL, 0,
                CTL_KERN, CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, NULL, &node,
                CTLFLAG_PERMANENT,
                CTLTYPE_NODE, "mqueue",
                SYSCTL_DESCR("Message queue options"),
                NULL, 0, NULL, 0,
                CTL_KERN, CTL_CREATE, CTL_EOL);

        if (node == NULL)
                return;

        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "mq_open_max",
                SYSCTL_DESCR("Maximal number of message queue descriptors "
                             "that process could open"),
                NULL, 0, &mq_open_max, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "mq_prio_max",
                SYSCTL_DESCR("Maximal priority of the message"),
                NULL, 0, &mq_prio_max, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "mq_max_msgsize",
                SYSCTL_DESCR("Maximal allowed size of the message"),
                NULL, 0, &mq_max_msgsize, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "mq_def_maxmsg",
                SYSCTL_DESCR("Default maximal message count"),
                NULL, 0, &mq_def_maxmsg, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "mq_max_maxmsg",
                SYSCTL_DESCR("Maximal allowed message count"),
                NULL, 0, &mq_max_maxmsg, 0,
                CTL_CREATE, CTL_EOL);

        return;
}

/*
 * Debugging.
 */
#if defined(DDB)

void
mqueue_print_list(void (*pr)(const char *, ...))
{
        struct mqueue *mq;

        (*pr)("Global list of the message queues:\n");
        (*pr)("%20s %10s %8s %8s %3s %4s %4s %4s\n",
            "Name", "Ptr", "Mode", "Flags",  "Ref",
            "MaxMsg", "MsgSze", "CurMsg");
        LIST_FOREACH(mq, &mqueue_head, mq_list) {
                (*pr)("%20s %10p %8x %8x %3u %6lu %6lu %6lu\n",
                    mq->mq_name, mq, mq->mq_mode,
                    mq->mq_attrib.mq_flags, mq->mq_refcnt,
                    mq->mq_attrib.mq_maxmsg, mq->mq_attrib.mq_msgsize,
                    mq->mq_attrib.mq_curmsgs);
        }
}

#endif /* defined(DDB) */








































































































































    9 
    6 





    6 






    6 

    6 

















    6 



    6 









    6 




    9 
    7 
    5 



    9 
    7 
    5 



    9 
    7 

    5 
    2 






    2 



    9 
    6 
    3 

    1 



    4 
    1 






    1 



    8 
    9 
    1 



















    3 



    3 




    3 
    3 







    1 
    1 
    2 
    2 
    2 


    3 



















    2 




    2 





















    4 



    3 
    4 
    3 




    3 
    1 















    4 






















    4 
    4 



    4 
    4 



    4 








    4 





    4 
    4 











    4 










































    3 

    3 























    6 





    6 









    6 
    2 
    6 
    3 

    6 









    6 











































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
/*        $NetBSD: fifo_vnops.c,v 1.91 2021/10/11 01:07:36 thorpej Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1990, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)fifo_vnops.c        8.10 (Berkeley) 5/27/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: fifo_vnops.c,v 1.91 2021/10/11 01:07:36 thorpej Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/time.h>
#include <sys/namei.h>
#include <sys/vnode.h>
#include <sys/socket.h>
#include <sys/protosw.h>
#include <sys/socketvar.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/file.h>
#include <sys/errno.h>
#include <sys/kmem.h>
#include <sys/un.h>
#include <sys/poll.h>
#include <sys/event.h>
#include <sys/condvar.h>

#include <miscfs/fifofs/fifo.h>
#include <miscfs/genfs/genfs.h>

/*
 * This structure is associated with the FIFO vnode and stores
 * the state associated with the FIFO.
 */
struct fifoinfo {
        struct socket        *fi_readsock;
        struct socket        *fi_writesock;
        kcondvar_t        fi_rcv;
        int                fi_readers;
        kcondvar_t        fi_wcv;
        int                fi_writers;
};

/*
 * Trivial lookup routine that always fails.
 */
/* ARGSUSED */
static int
fifo_lookup(void *v)
{
        struct vop_lookup_v2_args /* {
                struct vnode                *a_dvp;
                struct vnode                **a_vpp;
                struct componentname        *a_cnp;
        } */ *ap = v;

        *ap->a_vpp = NULL;
        return (ENOTDIR);
}

/*
 * Open called to set up a new instance of a fifo or
 * to find an active instance of a fifo.
 */
static int
fifo_open(void *v)
{
        struct vop_open_args /* {
                struct vnode        *a_vp;
                int                a_mode;
                kauth_cred_t        a_cred;
        } */ *ap = v;
        struct lwp        *l = curlwp;
        struct vnode        *vp;
        struct fifoinfo        *fip;
        struct socket        *rso, *wso;
        int                error;

        vp = ap->a_vp;
        KASSERT(VOP_ISLOCKED(vp));

        if ((fip = vp->v_fifoinfo) == NULL) {
                fip = kmem_alloc(sizeof(*fip), KM_SLEEP);
                error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, l, NULL);
                if (error != 0) {
                        kmem_free(fip, sizeof(*fip));
                        return (error);
                }
                fip->fi_readsock = rso;
                error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, l, rso);
                if (error != 0) {
                        (void)soclose(rso);
                        kmem_free(fip, sizeof(*fip));
                        return (error);
                }
                fip->fi_writesock = wso;
                solock(wso);
                if ((error = unp_connect2(wso, rso)) != 0) {
                        sounlock(wso);
                        (void)soclose(wso);
                        (void)soclose(rso);
                        kmem_free(fip, sizeof(*fip));
                        return (error);
                }

                /*
                 * FIFOs must be readable when there is at least 1
                 * byte of data available in the receive buffer.
                 *
                 * FIFOs must be writable when there is space for
                 * at least PIPE_BUF bytes in the send buffer.
                 * If we're increasing the low water mark for the
                 * send buffer, then mimic how soreserve() would
                 * have set the high water mark.
                 */
                rso->so_rcv.sb_lowat = 1;
                if (wso->so_snd.sb_lowat < PIPE_BUF) {
                        wso->so_snd.sb_hiwat = PIPE_BUF * 2;
                }
                wso->so_snd.sb_lowat = PIPE_BUF;

                fip->fi_readers = 0;
                fip->fi_writers = 0;
                wso->so_state |= SS_CANTRCVMORE;
                rso->so_state |= SS_CANTSENDMORE;
                cv_init(&fip->fi_rcv, "fiford");
                cv_init(&fip->fi_wcv, "fifowr");
                vp->v_fifoinfo = fip;
        } else {
                wso = fip->fi_writesock;
                rso = fip->fi_readsock;
                solock(wso);
        }

        if (ap->a_mode & FREAD) {
                if (fip->fi_readers++ == 0) {
                        wso->so_state &= ~SS_CANTSENDMORE;
                        cv_broadcast(&fip->fi_wcv);
                }
        }
        if (ap->a_mode & FWRITE) {
                if (fip->fi_writers++ == 0) {
                        rso->so_state &= ~SS_CANTRCVMORE;
                        cv_broadcast(&fip->fi_rcv);
                }
        }
        if (ap->a_mode & FREAD) {
                if (ap->a_mode & O_NONBLOCK) {
                } else {
                        while (!soreadable(rso) && fip->fi_writers == 0) {
                                VOP_UNLOCK(vp);
                                error = cv_wait_sig(&fip->fi_rcv,
                                    wso->so_lock);
                                sounlock(wso);
                                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                                if (error)
                                        goto bad;
                                solock(wso);
                        }
                }
        }
        if (ap->a_mode & FWRITE) {
                if (ap->a_mode & O_NONBLOCK) {
                        if (fip->fi_readers == 0) {
                                error = ENXIO;
                                sounlock(wso);
                                goto bad;
                        }
                } else {
                        while (fip->fi_readers == 0) {
                                VOP_UNLOCK(vp);
                                error = cv_wait_sig(&fip->fi_wcv,
                                    wso->so_lock);
                                sounlock(wso);
                                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                                if (error)
                                        goto bad;
                                solock(wso);
                        }
                }
        }
        sounlock(wso);
        return (0);
 bad:
        VOP_CLOSE(vp, ap->a_mode, ap->a_cred);
        return (error);
}

/*
 * Vnode op for read
 */
/* ARGSUSED */
static int
fifo_read(void *v)
{
        struct vop_read_args /* {
                struct vnode        *a_vp;
                struct uio        *a_uio;
                int                a_ioflag;
                kauth_cred_t        a_cred;
        } */ *ap = v;
        struct uio        *uio;
        struct socket        *rso;
        int                error, sflags;
        size_t                startresid;

        uio = ap->a_uio;
        rso = ap->a_vp->v_fifoinfo->fi_readsock;
#ifdef DIAGNOSTIC
        if (uio->uio_rw != UIO_READ)
                panic("fifo_read mode");
#endif
        if (uio->uio_resid == 0)
                return (0);
        startresid = uio->uio_resid;
        VOP_UNLOCK(ap->a_vp);
        sflags = (ap->a_ioflag & IO_NDELAY) ? MSG_NBIO : 0;
        error = (*rso->so_receive)(rso, NULL, uio, NULL, NULL, &sflags);
        /*
         * Clear EOF indication after first such return.
         */
        if (error == 0 && uio->uio_resid == startresid)
                rso->so_state &= ~SS_CANTRCVMORE;
        if (ap->a_ioflag & IO_NDELAY) {
                if (error == EWOULDBLOCK &&
                    ap->a_vp->v_fifoinfo->fi_writers == 0)
                        error = 0;
        }
        vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
        return (error);
}

/*
 * Vnode op for write
 */
/* ARGSUSED */
static int
fifo_write(void *v)
{
        struct vop_write_args /* {
                struct vnode        *a_vp;
                struct uio        *a_uio;
                int                a_ioflag;
                kauth_cred_t        a_cred;
        } */ *ap = v;
        struct socket        *wso;
        int                error, sflags;

        wso = ap->a_vp->v_fifoinfo->fi_writesock;
#ifdef DIAGNOSTIC
        if (ap->a_uio->uio_rw != UIO_WRITE)
                panic("fifo_write mode");
#endif
        VOP_UNLOCK(ap->a_vp);
        sflags = (ap->a_ioflag & IO_NDELAY) ? MSG_NBIO : 0;
        error = (*wso->so_send)(wso, NULL, ap->a_uio, 0, NULL, sflags, curlwp);
        vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
        return (error);
}

/*
 * Device ioctl operation.
 */
/* ARGSUSED */
static int
fifo_ioctl(void *v)
{
        struct vop_ioctl_args /* {
                struct vnode        *a_vp;
                u_long                a_command;
                void                *a_data;
                int                a_fflag;
                kauth_cred_t        a_cred;
                struct lwp        *a_l;
        } */ *ap = v;
        struct file        filetmp;
        int                error;

        if (ap->a_command == FIONBIO)
                return (0);
        if (ap->a_fflag & FREAD) {
                filetmp.f_data = ap->a_vp->v_fifoinfo->fi_readsock;
                error = soo_ioctl(&filetmp, ap->a_command, ap->a_data);
                if (error)
                        return (error);
        }
        if (ap->a_fflag & FWRITE) {
                filetmp.f_data = ap->a_vp->v_fifoinfo->fi_writesock;
                error = soo_ioctl(&filetmp, ap->a_command, ap->a_data);
                if (error)
                        return (error);
        }
        return (0);
}

/* ARGSUSED */
static int
fifo_poll(void *v)
{
        struct vop_poll_args /* {
                struct vnode        *a_vp;
                int                a_events;
        } */ *ap = v;
        struct socket        *rso = ap->a_vp->v_fifoinfo->fi_readsock;
        struct socket        *wso = ap->a_vp->v_fifoinfo->fi_writesock;
        struct socket        *lso = NULL;
        int                events;

        /*
         * N.B. We're using a slightly different naming convention
         * for these variables that most poll handlers.
         */
        int                revents = 0;
        int                wevents = 0;

        if (rso != NULL) {
                lso = rso;
        } else if (wso != NULL) {
                lso = wso;
        }

        if (lso == NULL) {
                /* No associated sockets -> no events to report. */
                return 0;
        }

        KASSERT(rso == NULL || lso->so_lock == rso->so_lock);
        KASSERT(wso == NULL || lso->so_lock == wso->so_lock);

        solock(lso);

        if (rso != NULL) {
                events = ap->a_events & (POLLIN | POLLRDNORM);
                if (events != 0 && soreadable(rso)) {
                        revents |= events;
                }
                if (rso->so_state & SS_CANTRCVMORE) {
                        revents |= POLLHUP;
                }
                /*
                 * We always selrecord the read side here regardless
                 * of the caller's read interest because we need to
                 * action POLLHUP.
                 */
                if (revents == 0) {
                        selrecord(curlwp, &rso->so_rcv.sb_sel);
                        rso->so_rcv.sb_flags |= SB_NOTIFY;
                }
        }

        /* POSIX sez: POLLHUP and POLLOUT are mutually-exclusive. */
        if (wso != NULL && (revents & POLLHUP) == 0) {
                events = ap->a_events & (POLLOUT | POLLWRNORM);
                if (events != 0 && sowritable(wso)) {
                        wevents |= events;
                }
                if (wevents == 0 && events != 0) {
                        selrecord(curlwp, &wso->so_snd.sb_sel);
                        wso->so_snd.sb_flags |= SB_NOTIFY;
                }
        }

        sounlock(lso);

        return (revents | wevents);
}

static int
fifo_inactive(void *v)
{
        struct vop_inactive_v2_args /* {
                struct vnode        *a_vp;
                struct lwp        *a_l;
        } */ *ap __unused = v;

        return (0);
}

/*
 * This is a noop, simply returning what one has been given.
 */
static int
fifo_bmap(void *v)
{
        struct vop_bmap_args /* {
                struct vnode        *a_vp;
                daddr_t                a_bn;
                struct vnode        **a_vpp;
                daddr_t                *a_bnp;
                int                *a_runp;
        } */ *ap = v;

        if (ap->a_vpp != NULL)
                *ap->a_vpp = ap->a_vp;
        if (ap->a_bnp != NULL)
                *ap->a_bnp = ap->a_bn;
        if (ap->a_runp != NULL)
                *ap->a_runp = 0;
        return (0);
}

/*
 * This is like socantrcvmore(), but we send the POLL_HUP code.
 */
static void
fifo_socantrcvmore(struct socket *so)
{
        KASSERT(solocked(so));

        so->so_state |= SS_CANTRCVMORE;
        if (sb_notify(&so->so_rcv)) {
                sowakeup(so, &so->so_rcv, POLL_HUP);
        }
}

/*
 * Device close routine
 */
/* ARGSUSED */
static int
fifo_close(void *v)
{
        struct vop_close_args /* {
                struct vnode        *a_vp;
                int                a_fflag;
                kauth_cred_t        a_cred;
                struct lwp        *a_l;
        } */ *ap = v;
        struct vnode        *vp;
        struct fifoinfo        *fip;
        struct socket *wso, *rso;
        int isrevoke;

        vp = ap->a_vp;
        fip = vp->v_fifoinfo;
        isrevoke = (ap->a_fflag & (FREAD | FWRITE | FNONBLOCK)) == FNONBLOCK;
        wso = fip->fi_writesock;
        rso = fip->fi_readsock;
        solock(wso);
        if (isrevoke) {
                if (fip->fi_readers != 0) {
                        fip->fi_readers = 0;
                        socantsendmore(wso);
                }
                if (fip->fi_writers != 0) {
                        fip->fi_writers = 0;
                        fifo_socantrcvmore(rso);
                }
        } else {
                if ((ap->a_fflag & FREAD) && --fip->fi_readers == 0)
                        socantsendmore(wso);
                if ((ap->a_fflag & FWRITE) && --fip->fi_writers == 0)
                        fifo_socantrcvmore(rso);
        }
        if ((fip->fi_readers + fip->fi_writers) == 0) {
                sounlock(wso);
                (void) soclose(rso);
                (void) soclose(wso);
                cv_destroy(&fip->fi_rcv);
                cv_destroy(&fip->fi_wcv);
                kmem_free(fip, sizeof(*fip));
                vp->v_fifoinfo = NULL;
        } else
                sounlock(wso);
        return (0);
}

/*
 * Print out internal contents of a fifo vnode.
 */
static void
fifo_printinfo(struct vnode *vp)
{
        struct fifoinfo        *fip;

        fip = vp->v_fifoinfo;
        printf(", fifo with %d readers and %d writers",
            fip->fi_readers, fip->fi_writers);
}

/*
 * Print out the contents of a fifo vnode.
 */
static int
fifo_print(void *v)
{
        struct vop_print_args /* {
                struct vnode        *a_vp;
        } */ *ap = v;

        /*
         * We are most likely being called with the vnode belonging
         * to some file system and this is not printed.
         */
        if (ap->a_vp->v_tag == VT_NON)
                printf("tag VT_NON");

        fifo_printinfo(ap->a_vp);
        printf("\n");
        return 0;
}

/*
 * Return POSIX pathconf information applicable to fifo's.
 */
static int
fifo_pathconf(void *v)
{
        struct vop_pathconf_args /* {
                struct vnode        *a_vp;
                int                a_name;
                register_t        *a_retval;
        } */ *ap = v;

        switch (ap->a_name) {
        case _PC_LINK_MAX:
                *ap->a_retval = LINK_MAX;
                return (0);
        case _PC_PIPE_BUF:
                *ap->a_retval = PIPE_BUF;
                return (0);
        case _PC_CHOWN_RESTRICTED:
                *ap->a_retval = 1;
                return (0);
        case _PC_SYNC_IO:
                *ap->a_retval = 1;
                return (0);
        default:
                return genfs_pathconf(ap);
        }
        /* NOTREACHED */
}

static void
filt_fifordetach(struct knote *kn)
{
        struct socket *so;

        so = (struct socket *)kn->kn_hook;
        solock(so);
        if (selremove_knote(&so->so_rcv.sb_sel, kn))
                so->so_rcv.sb_flags &= ~SB_KNOTE;
        sounlock(so);
}

static int
filt_fiforead(struct knote *kn, long hint)
{
        struct socket *so;
        int rv;

        so = (struct socket *)kn->kn_hook;
        if (hint != NOTE_SUBMIT)
                solock(so);
        kn->kn_data = so->so_rcv.sb_cc;
        if (so->so_state & SS_CANTRCVMORE) {
                knote_set_eof(kn, 0);
                rv = 1;
        } else {
                knote_clear_eof(kn);
                rv = (kn->kn_data >= so->so_rcv.sb_lowat);
        }
        if (hint != NOTE_SUBMIT)
                sounlock(so);
        return rv;
}

static void
filt_fifowdetach(struct knote *kn)
{
        struct socket *so;

        so = (struct socket *)kn->kn_hook;
        solock(so);
        if (selremove_knote(&so->so_snd.sb_sel, kn))
                so->so_snd.sb_flags &= ~SB_KNOTE;
        sounlock(so);
}

static int
filt_fifowrite(struct knote *kn, long hint)
{
        struct socket *so;
        int rv;

        so = (struct socket *)kn->kn_hook;
        if (hint != NOTE_SUBMIT)
                solock(so);
        kn->kn_data = sbspace(&so->so_snd);
        if (so->so_state & SS_CANTSENDMORE) {
                knote_set_eof(kn, 0);
                rv = 1;
        } else {
                knote_clear_eof(kn);
                rv = (kn->kn_data >= so->so_snd.sb_lowat);
        }
        if (hint != NOTE_SUBMIT)
                sounlock(so);
        return rv;
}

static const struct filterops fiforead_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_fifordetach,
        .f_event = filt_fiforead,
};

static const struct filterops fifowrite_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_fifowdetach,
        .f_event = filt_fifowrite,
};

/* ARGSUSED */
static int
fifo_kqfilter(void *v)
{
        struct vop_kqfilter_args /* {
                struct vnode *a_vp;
                struct knote *a_kn;
        } */ *ap = v;
        struct socket        *so;
        struct sockbuf        *sb;

        switch (ap->a_kn->kn_filter) {
        case EVFILT_READ:
                so = (struct socket *)ap->a_vp->v_fifoinfo->fi_readsock;
                ap->a_kn->kn_fop = &fiforead_filtops;
                sb = &so->so_rcv;
                break;
        case EVFILT_WRITE:
                so = (struct socket *)ap->a_vp->v_fifoinfo->fi_writesock;
                ap->a_kn->kn_fop = &fifowrite_filtops;
                sb = &so->so_snd;
                break;
        default:
                return (EINVAL);
        }

        ap->a_kn->kn_hook = so;

        solock(so);
        selrecord_knote(&sb->sb_sel, ap->a_kn);
        sb->sb_flags |= SB_KNOTE;
        sounlock(so);

        return (0);
}

int (**fifo_vnodeop_p)(void *);
const struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
        { &vop_default_desc, vn_default_error },
        { &vop_parsepath_desc, genfs_parsepath },        /* parsepath */
        { &vop_lookup_desc, fifo_lookup },                /* lookup */
        { &vop_create_desc, genfs_badop },                /* create */
        { &vop_mknod_desc, genfs_badop },                /* mknod */
        { &vop_open_desc, fifo_open },                        /* open */
        { &vop_close_desc, fifo_close },                /* close */
        { &vop_access_desc, genfs_ebadf },                /* access */
        { &vop_accessx_desc, genfs_accessx },                /* accessx */
        { &vop_getattr_desc, genfs_ebadf },                /* getattr */
        { &vop_setattr_desc, genfs_ebadf },                /* setattr */
        { &vop_read_desc, fifo_read },                        /* read */
        { &vop_write_desc, fifo_write },                /* write */
        { &vop_fallocate_desc, genfs_eopnotsupp },        /* fallocate */
        { &vop_fdiscard_desc, genfs_eopnotsupp },        /* fdiscard */
        { &vop_ioctl_desc, fifo_ioctl },                /* ioctl */
        { &vop_poll_desc, fifo_poll },                        /* poll */
        { &vop_kqfilter_desc, fifo_kqfilter },                /* kqfilter */
        { &vop_revoke_desc, genfs_revoke },                /* revoke */
        { &vop_mmap_desc, genfs_badop },                /* mmap */
        { &vop_fsync_desc, genfs_nullop },                /* fsync */
        { &vop_seek_desc, genfs_badop },                /* seek */
        { &vop_remove_desc, genfs_badop },                /* remove */
        { &vop_link_desc, genfs_badop },                /* link */
        { &vop_rename_desc, genfs_badop },                /* rename */
        { &vop_mkdir_desc, genfs_badop },                /* mkdir */
        { &vop_rmdir_desc, genfs_badop },                /* rmdir */
        { &vop_symlink_desc, genfs_badop },                /* symlink */
        { &vop_readdir_desc, genfs_badop },                /* readdir */
        { &vop_readlink_desc, genfs_badop },                /* readlink */
        { &vop_abortop_desc, genfs_badop },                /* abortop */
        { &vop_inactive_desc, fifo_inactive },                /* inactive */
        { &vop_reclaim_desc, genfs_nullop },                /* reclaim */
        { &vop_lock_desc, genfs_lock },                        /* lock */
        { &vop_unlock_desc, genfs_unlock },                /* unlock */
        { &vop_bmap_desc, fifo_bmap },                        /* bmap */
        { &vop_strategy_desc, genfs_badop },                /* strategy */
        { &vop_print_desc, fifo_print },                /* print */
        { &vop_islocked_desc, genfs_islocked },                /* islocked */
        { &vop_pathconf_desc, fifo_pathconf },                /* pathconf */
        { &vop_advlock_desc, genfs_einval },                /* advlock */
        { &vop_bwrite_desc, genfs_nullop },                /* bwrite */
        { &vop_putpages_desc, genfs_null_putpages },        /* putpages */
        { NULL, NULL }
};
const struct vnodeopv_desc fifo_vnodeop_opv_desc =
        { &fifo_vnodeop_p, fifo_vnodeop_entries };






























































































































































































    5 






    5 



























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
/*        $NetBSD: coda_vnops.c,v 1.118 2022/03/27 16:24:58 christos Exp $        */

/*
 *
 *             Coda: an Experimental Distributed File System
 *                              Release 3.1
 *
 *           Copyright (c) 1987-1998 Carnegie Mellon University
 *                          All Rights Reserved
 *
 * Permission  to  use, copy, modify and distribute this software and its
 * documentation is hereby granted,  provided  that  both  the  copyright
 * notice  and  this  permission  notice  appear  in  all  copies  of the
 * software, derivative works or  modified  versions,  and  any  portions
 * thereof, and that both notices appear in supporting documentation, and
 * that credit is given to Carnegie Mellon University  in  all  documents
 * and publicity pertaining to direct or indirect use of this code or its
 * derivatives.
 *
 * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
 * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
 * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
 * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
 * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
 * ANY DERIVATIVE WORK.
 *
 * Carnegie  Mellon  encourages  users  of  this  software  to return any
 * improvements or extensions that  they  make,  and  to  grant  Carnegie
 * Mellon the rights to redistribute these changes without encumbrance.
 *
 *         @(#) coda/coda_vnops.c,v 1.1.1.1 1998/08/29 21:26:46 rvb Exp $
 */

/*
 * Mach Operating System
 * Copyright (c) 1990 Carnegie-Mellon University
 * Copyright (c) 1989 Carnegie-Mellon University
 * All rights reserved.  The CMU software License Agreement specifies
 * the terms and conditions for use and redistribution.
 */

/*
 * This code was written for the Coda file system at Carnegie Mellon
 * University.  Contributers include David Steere, James Kistler, and
 * M. Satyanarayanan.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: coda_vnops.c,v 1.118 2022/03/27 16:24:58 christos Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/errno.h>
#include <sys/acct.h>
#include <sys/file.h>
#include <sys/uio.h>
#include <sys/namei.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/select.h>
#include <sys/vnode.h>
#include <sys/kauth.h>
#include <sys/dirent.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>

#include <coda/coda.h>
#include <coda/cnode.h>
#include <coda/coda_vnops.h>
#include <coda/coda_venus.h>
#include <coda/coda_opstats.h>
#include <coda/coda_subr.h>
#include <coda/coda_namecache.h>
#include <coda/coda_pioctl.h>

/*
 * These flags select various performance enhancements.
 */
int coda_attr_cache  = 1;       /* Set to cache attributes in the kernel */
int coda_symlink_cache = 1;     /* Set to cache symbolic link information */
int coda_access_cache = 1;      /* Set to handle some access checks directly */

/* structure to keep track of vfs calls */

struct coda_op_stats coda_vnodeopstats[CODA_VNODEOPS_SIZE];

#define MARK_ENTRY(op) (coda_vnodeopstats[op].entries++)
#define MARK_INT_SAT(op) (coda_vnodeopstats[op].sat_intrn++)
#define MARK_INT_FAIL(op) (coda_vnodeopstats[op].unsat_intrn++)
#define MARK_INT_GEN(op) (coda_vnodeopstats[op].gen_intrn++)

/* What we are delaying for in printf */
static int coda_lockdebug = 0;

#define ENTRY if(coda_vnop_print_entry) myprintf(("Entered %s\n",__func__))

/* Definition of the vnode operation vector */

const struct vnodeopv_entry_desc coda_vnodeop_entries[] = {
    { &vop_default_desc, coda_vop_error },
    { &vop_parsepath_desc, genfs_parsepath },        /* parsepath */
    { &vop_lookup_desc, coda_lookup },                /* lookup */
    { &vop_create_desc, coda_create },                /* create */
    { &vop_mknod_desc, coda_vop_error },        /* mknod */
    { &vop_open_desc, coda_open },                /* open */
    { &vop_close_desc, coda_close },                /* close */
    { &vop_access_desc, coda_access },                /* access */
    { &vop_accessx_desc, genfs_accessx },        /* access */
    { &vop_getattr_desc, coda_getattr },        /* getattr */
    { &vop_setattr_desc, coda_setattr },        /* setattr */
    { &vop_read_desc, coda_read },                /* read */
    { &vop_write_desc, coda_write },                /* write */
    { &vop_fallocate_desc, genfs_eopnotsupp },        /* fallocate */
    { &vop_fdiscard_desc, genfs_eopnotsupp },        /* fdiscard */
    { &vop_fcntl_desc, genfs_fcntl },                /* fcntl */
    { &vop_ioctl_desc, coda_ioctl },                /* ioctl */
    { &vop_mmap_desc, genfs_mmap },                /* mmap */
    { &vop_fsync_desc, coda_fsync },                /* fsync */
    { &vop_remove_desc, coda_remove },                /* remove */
    { &vop_link_desc, coda_link },                /* link */
    { &vop_rename_desc, coda_rename },                /* rename */
    { &vop_mkdir_desc, coda_mkdir },                /* mkdir */
    { &vop_rmdir_desc, coda_rmdir },                /* rmdir */
    { &vop_symlink_desc, coda_symlink },        /* symlink */
    { &vop_readdir_desc, coda_readdir },        /* readdir */
    { &vop_readlink_desc, coda_readlink },        /* readlink */
    { &vop_abortop_desc, coda_abortop },        /* abortop */
    { &vop_inactive_desc, coda_inactive },        /* inactive */
    { &vop_reclaim_desc, coda_reclaim },        /* reclaim */
    { &vop_lock_desc, coda_lock },                /* lock */
    { &vop_unlock_desc, coda_unlock },                /* unlock */
    { &vop_bmap_desc, coda_bmap },                /* bmap */
    { &vop_strategy_desc, coda_strategy },        /* strategy */
    { &vop_print_desc, coda_vop_error },        /* print */
    { &vop_islocked_desc, coda_islocked },        /* islocked */
    { &vop_pathconf_desc, coda_pathconf },        /* pathconf */
    { &vop_advlock_desc, coda_vop_nop },        /* advlock */
    { &vop_bwrite_desc, coda_vop_error },        /* bwrite */
    { &vop_seek_desc, genfs_seek },                /* seek */
    { &vop_poll_desc, genfs_poll },                /* poll */
    { &vop_getpages_desc, coda_getpages },        /* getpages */
    { &vop_putpages_desc, coda_putpages },        /* putpages */
    { NULL, NULL }
};

static void coda_print_vattr(struct vattr *);

int (**coda_vnodeop_p)(void *);
const struct vnodeopv_desc coda_vnodeop_opv_desc =
        { &coda_vnodeop_p, coda_vnodeop_entries };

/* Definitions of NetBSD vnodeop interfaces */

/*
 * A generic error routine.  Return EIO without looking at arguments.
 */
int
coda_vop_error(void *anon) {
    struct vnodeop_desc **desc = (struct vnodeop_desc **)anon;

    if (codadebug) {
        myprintf(("%s: Vnode operation %s called (error).\n",
            __func__, (*desc)->vdesc_name));
    }

    return EIO;
}

/* A generic do-nothing. */
int
coda_vop_nop(void *anon) {
    struct vnodeop_desc **desc = (struct vnodeop_desc **)anon;

    if (codadebug) {
        myprintf(("Vnode operation %s called, but unsupported\n",
                  (*desc)->vdesc_name));
    }
   return (0);
}

int
coda_vnodeopstats_init(void)
{
        int i;

        for(i=0;i<CODA_VNODEOPS_SIZE;i++) {
                coda_vnodeopstats[i].opcode = i;
                coda_vnodeopstats[i].entries = 0;
                coda_vnodeopstats[i].sat_intrn = 0;
                coda_vnodeopstats[i].unsat_intrn = 0;
                coda_vnodeopstats[i].gen_intrn = 0;
        }

        return 0;
}

/*
 * XXX The entire relationship between VOP_OPEN and having a container
 * file (via venus_open) needs to be reexamined.  In particular, it's
 * valid to open/mmap/close and then reference.  Instead of doing
 * VOP_OPEN when getpages needs a container, we should do the
 * venus_open part, and record that the vnode has opened the container
 * for getpages, and do the matching logical close on coda_inactive.
 * Further, coda_rdwr needs a container file, and sometimes needs to
 * do the equivalent of open (core dumps).
 */
/*
 * coda_open calls Venus to return the device and inode of the
 * container file, and then obtains a vnode for that file.  The
 * container vnode is stored in the coda vnode, and a reference is
 * added for each open file.
 */
int
coda_open(void *v)
{
    /*
     * NetBSD can pass the O_EXCL flag in mode, even though the check
     * has already happened.  Venus defensively assumes that if open
     * is passed the EXCL, it must be a bug.  We strip the flag here.
     */
/* true args */
    struct vop_open_args *ap = v;
    vnode_t *vp = ap->a_vp;
    struct cnode *cp = VTOC(vp);
    int flag = ap->a_mode & (~O_EXCL);
    kauth_cred_t cred = ap->a_cred;
/* locals */
    int error;
    dev_t dev;                        /* container file device, inode, vnode */
    ino_t inode;
    vnode_t *container_vp;

    MARK_ENTRY(CODA_OPEN_STATS);

    KASSERT(VOP_ISLOCKED(vp));
    /* Check for open of control file. */
    if (IS_CTL_VP(vp)) {
        /* if (WRITABLE(flag)) */
        if (flag & (FWRITE | O_TRUNC | O_CREAT | O_EXCL)) {
            MARK_INT_FAIL(CODA_OPEN_STATS);
            return(EACCES);
        }
        MARK_INT_SAT(CODA_OPEN_STATS);
        return(0);
    }

    error = venus_open(vtomi(vp), &cp->c_fid, flag, cred, curlwp, &dev, &inode);
    if (error)
        return (error);
    if (!error) {
            CODADEBUG(CODA_OPEN, myprintf((
                "%s: dev 0x%llx inode %llu result %d\n", __func__,
                (unsigned long long)dev, (unsigned long long)inode, error));)
    }

    /* 
     * Obtain locked and referenced container vnode from container
     * device/inode.
     */
    error = coda_grab_vnode(vp, dev, inode, &container_vp);
    if (error)
        return (error);

    /* Save the vnode pointer for the container file. */
    if (cp->c_ovp == NULL) {
        cp->c_ovp = container_vp;
    } else {
        if (cp->c_ovp != container_vp)
            /*
             * Perhaps venus returned a different container, or
             * something else went wrong.
             */
            panic("%s: cp->c_ovp != container_vp", __func__);
    }
    cp->c_ocount++;

    /* Flush the attribute cache if writing the file. */
    if (flag & FWRITE) {
        cp->c_owrite++;
        cp->c_flags &= ~C_VATTR;
    }

    /* 
     * Save the <device, inode> pair for the container file to speed
     * up subsequent reads while closed (mmap, program execution).
     * This is perhaps safe because venus will invalidate the node
     * before changing the container file mapping.
     */
    cp->c_device = dev;
    cp->c_inode = inode;

    /* Open the container file. */
    error = VOP_OPEN(container_vp, flag, cred);
    /* 
     * Drop the lock on the container, after we have done VOP_OPEN
     * (which requires a locked vnode).
     */
    VOP_UNLOCK(container_vp);
    return(error);
}

/*
 * Close the cache file used for I/O and notify Venus.
 */
int
coda_close(void *v)
{
/* true args */
    struct vop_close_args *ap = v;
    vnode_t *vp = ap->a_vp;
    struct cnode *cp = VTOC(vp);
    int flag = ap->a_fflag;
    kauth_cred_t cred = ap->a_cred;
/* locals */
    int error;

    MARK_ENTRY(CODA_CLOSE_STATS);

    /* Check for close of control file. */
    if (IS_CTL_VP(vp)) {
        MARK_INT_SAT(CODA_CLOSE_STATS);
        return(0);
    }

    /*
     * XXX The IS_UNMOUNTING part of this is very suspect.
     */ 
    if (IS_UNMOUNTING(cp)) {
        if (cp->c_ovp) {
#ifdef        CODA_VERBOSE
            printf("%s: destroying container %d, ufs vp %p of vp %p/cp %p\n",
                __func__, vrefcnt(vp), cp->c_ovp, vp, cp);
#endif
#ifdef        hmm
            vgone(cp->c_ovp);
#else
            vn_lock(cp->c_ovp, LK_EXCLUSIVE | LK_RETRY);
            VOP_CLOSE(cp->c_ovp, flag, cred); /* Do errors matter here? */
            vput(cp->c_ovp);
#endif
        } else {
#ifdef        CODA_VERBOSE
            printf("%s: NO container vp %p/cp %p\n", __func__, vp, cp);
#endif
        }
        return ENODEV;
    }

    /* Lock the container node, and VOP_CLOSE it. */
    vn_lock(cp->c_ovp, LK_EXCLUSIVE | LK_RETRY);
    VOP_CLOSE(cp->c_ovp, flag, cred); /* Do errors matter here? */
    /*
     * Drop the lock we just obtained, and vrele the container vnode.
     * Decrement reference counts, and clear container vnode pointer on
     * last close.
     */
    vput(cp->c_ovp);
    if (flag & FWRITE)
        --cp->c_owrite;
    if (--cp->c_ocount == 0)
        cp->c_ovp = NULL;

    error = venus_close(vtomi(vp), &cp->c_fid, flag, cred, curlwp);

    CODADEBUG(CODA_CLOSE, myprintf(("%s: result %d\n", __func__, error)); )
    return(error);
}

int
coda_read(void *v)
{
    struct vop_read_args *ap = v;

    ENTRY;
    return(coda_rdwr(ap->a_vp, ap->a_uio, UIO_READ,
                    ap->a_ioflag, ap->a_cred, curlwp));
}

int
coda_write(void *v)
{
    struct vop_write_args *ap = v;

    ENTRY;
    return(coda_rdwr(ap->a_vp, ap->a_uio, UIO_WRITE,
                    ap->a_ioflag, ap->a_cred, curlwp));
}

int
coda_rdwr(vnode_t *vp, struct uio *uiop, enum uio_rw rw, int ioflag,
        kauth_cred_t cred, struct lwp *l)
{
/* upcall decl */
  /* NOTE: container file operation!!! */
/* locals */
    struct cnode *cp = VTOC(vp);
    vnode_t *cfvp = cp->c_ovp;
    struct proc *p = l->l_proc;
    int opened_internally = 0;
    int error = 0;

    MARK_ENTRY(CODA_RDWR_STATS);

    CODADEBUG(CODA_RDWR, myprintf(("coda_rdwr(%d, %p, %lu, %lld)\n", rw,
        uiop->uio_iov->iov_base, (unsigned long) uiop->uio_resid,
        (long long) uiop->uio_offset)); )

    /* Check for rdwr of control object. */
    if (IS_CTL_VP(vp)) {
        MARK_INT_FAIL(CODA_RDWR_STATS);
        return(EINVAL);
    }

    /* Redirect the request to UFS. */

    /*
     * If file is not already open this must be a page
     * {read,write} request.  Iget the cache file's inode
     * pointer if we still have its <device, inode> pair.
     * Otherwise, we must do an internal open to derive the
     * pair.
     * XXX Integrate this into a coherent strategy for container
     * file acquisition.
     */
    if (cfvp == NULL) {
        /*
         * If we're dumping core, do the internal open. Otherwise
         * venus won't have the correct size of the core when
         * it's completely written.
         */
        if (cp->c_inode != 0 && !(p && (p->p_acflag & ACORE))) {
#ifdef CODA_VERBOSE
            printf("%s: grabbing container vnode, losing reference\n",
                __func__);
#endif
            /* Get locked and refed vnode. */
            error = coda_grab_vnode(vp, cp->c_device, cp->c_inode, &cfvp);
            if (error) {
                MARK_INT_FAIL(CODA_RDWR_STATS);
                return(error);
            }
            /* 
             * Drop lock. 
             * XXX Where is reference released.
             */
            VOP_UNLOCK(cfvp);
        }
        else {
#ifdef CODA_VERBOSE
            printf("%s: internal VOP_OPEN\n", __func__);
#endif
            opened_internally = 1;
            MARK_INT_GEN(CODA_OPEN_STATS);
            error = VOP_OPEN(vp, (rw == UIO_READ ? FREAD : FWRITE), cred);
#ifdef        CODA_VERBOSE
            printf("%s: Internally Opening %p\n", __func__, vp);
#endif
            if (error) {
                MARK_INT_FAIL(CODA_RDWR_STATS);
                return(error);
            }
            cfvp = cp->c_ovp;
        }
    }

    /* Have UFS handle the call. */
    CODADEBUG(CODA_RDWR, myprintf(("%s: fid = %s, refcnt = %d\n", __func__,
        coda_f2s(&cp->c_fid), vrefcnt(CTOV(cp)))); )

    if (rw == UIO_READ) {
        error = VOP_READ(cfvp, uiop, ioflag, cred);
    } else {
        error = VOP_WRITE(cfvp, uiop, ioflag, cred);
    }

    if (error)
        MARK_INT_FAIL(CODA_RDWR_STATS);
    else
        MARK_INT_SAT(CODA_RDWR_STATS);

    /* Do an internal close if necessary. */
    if (opened_internally) {
        MARK_INT_GEN(CODA_CLOSE_STATS);
        (void)VOP_CLOSE(vp, (rw == UIO_READ ? FREAD : FWRITE), cred);
    }

    /* Invalidate cached attributes if writing. */
    if (rw == UIO_WRITE)
        cp->c_flags &= ~C_VATTR;
    return(error);
}

int
coda_ioctl(void *v)
{
/* true args */
    struct vop_ioctl_args *ap = v;
    vnode_t *vp = ap->a_vp;
    int com = ap->a_command;
    void *data = ap->a_data;
    int flag = ap->a_fflag;
    kauth_cred_t cred = ap->a_cred;
/* locals */
    int error;
    vnode_t *tvp;
    struct PioctlData *iap = (struct PioctlData *)data;
    namei_simple_flags_t sflags;

    MARK_ENTRY(CODA_IOCTL_STATS);

    CODADEBUG(CODA_IOCTL, myprintf(("in coda_ioctl on %s\n", iap->path));)

    /* Don't check for operation on a dying object, for ctlvp it
       shouldn't matter */

    /* Must be control object to succeed. */
    if (!IS_CTL_VP(vp)) {
        MARK_INT_FAIL(CODA_IOCTL_STATS);
        CODADEBUG(CODA_IOCTL, myprintf(("%s error: vp != ctlvp", __func__));)
        return (EOPNOTSUPP);
    }
    /* Look up the pathname. */

    /* Should we use the name cache here? It would get it from
       lookupname sooner or later anyway, right? */

    sflags = iap->follow ? NSM_FOLLOW_NOEMULROOT : NSM_NOFOLLOW_NOEMULROOT;
    error = namei_simple_user(iap->path, sflags, &tvp);

    if (error) {
        MARK_INT_FAIL(CODA_IOCTL_STATS);
        CODADEBUG(CODA_IOCTL, myprintf(("%s error: lookup returns %d\n",
            __func__, error));)
        return(error);
    }

    /*
     * Make sure this is a coda style cnode, but it may be a
     * different vfsp
     */
    /* XXX: this totally violates the comment about vtagtype in vnode.h */
    if (tvp->v_tag != VT_CODA) {
        vrele(tvp);
        MARK_INT_FAIL(CODA_IOCTL_STATS);
        CODADEBUG(CODA_IOCTL, myprintf(("%s error: %s not a coda object\n",
            __func__, iap->path));)
        return(EINVAL);
    }

    if (iap->vi.in_size > VC_MAXDATASIZE || iap->vi.out_size > VC_MAXDATASIZE) {
        vrele(tvp);
        return(EINVAL);
    }
    error = venus_ioctl(vtomi(tvp), &((VTOC(tvp))->c_fid), com, flag, data,
        cred, curlwp);

    if (error)
        MARK_INT_FAIL(CODA_IOCTL_STATS);
    else
        CODADEBUG(CODA_IOCTL, myprintf(("Ioctl returns %d \n", error)); )

    vrele(tvp);
    return(error);
}

/*
 * To reduce the cost of a user-level venus;we cache attributes in
 * the kernel.  Each cnode has storage allocated for an attribute. If
 * c_vattr is valid, return a reference to it. Otherwise, get the
 * attributes from venus and store them in the cnode.  There is some
 * question if this method is a security leak. But I think that in
 * order to make this call, the user must have done a lookup and
 * opened the file, and therefore should already have access.
 */
int
coda_getattr(void *v)
{
/* true args */
    struct vop_getattr_args *ap = v;
    vnode_t *vp = ap->a_vp;
    struct cnode *cp = VTOC(vp);
    struct vattr *vap = ap->a_vap;
    kauth_cred_t cred = ap->a_cred;
/* locals */
    int error;

    MARK_ENTRY(CODA_GETATTR_STATS);

    /* Check for getattr of control object. */
    if (IS_CTL_VP(vp)) {
        MARK_INT_FAIL(CODA_GETATTR_STATS);
        return(ENOENT);
    }

    /* Check to see if the attributes have already been cached */
    if (VALID_VATTR(cp)) {
        CODADEBUG(CODA_GETATTR, { myprintf(("%s: attr cache hit: %s\n",
            __func__, coda_f2s(&cp->c_fid)));})
        CODADEBUG(CODA_GETATTR, if (!(codadebug & ~CODA_GETATTR))
            coda_print_vattr(&cp->c_vattr); )

        *vap = cp->c_vattr;
        MARK_INT_SAT(CODA_GETATTR_STATS);
        return(0);
    }

    error = venus_getattr(vtomi(vp), &cp->c_fid, cred, curlwp, vap);

    if (!error) {
        CODADEBUG(CODA_GETATTR, myprintf(("%s miss %s: result %d\n",
            __func__, coda_f2s(&cp->c_fid), error)); )

        CODADEBUG(CODA_GETATTR, if (!(codadebug & ~CODA_GETATTR))
            coda_print_vattr(vap);        )

        /* If not open for write, store attributes in cnode */
        if ((cp->c_owrite == 0) && (coda_attr_cache)) {
            cp->c_vattr = *vap;
            cp->c_flags |= C_VATTR;
        }

    }
    return(error);
}

int
coda_setattr(void *v)
{
/* true args */
    struct vop_setattr_args *ap = v;
    vnode_t *vp = ap->a_vp;
    struct cnode *cp = VTOC(vp);
    struct vattr *vap = ap->a_vap;
    kauth_cred_t cred = ap->a_cred;
/* locals */
    int error;

    MARK_ENTRY(CODA_SETATTR_STATS);

    /* Check for setattr of control object. */
    if (IS_CTL_VP(vp)) {
        MARK_INT_FAIL(CODA_SETATTR_STATS);
        return(ENOENT);
    }

    if (codadebug & CODADBGMSK(CODA_SETATTR)) {
        coda_print_vattr(vap);
    }
    error = venus_setattr(vtomi(vp), &cp->c_fid, vap, cred, curlwp);

    if (!error)
        cp->c_flags &= ~C_VATTR;

    CODADEBUG(CODA_SETATTR,        myprintf(("setattr %d\n", error)); )
    return(error);
}

int
coda_access(void *v)
{
/* true args */
    struct vop_access_args *ap = v;
    vnode_t *vp = ap->a_vp;
    struct cnode *cp = VTOC(vp);
    accmode_t accmode = ap->a_accmode;
    kauth_cred_t cred = ap->a_cred;
/* locals */
    int error;

    MARK_ENTRY(CODA_ACCESS_STATS);

    KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0);
    /* Check for access of control object.  Only read access is
       allowed on it. */
    if (IS_CTL_VP(vp)) {
        /* bogus hack - all will be marked as successes */
        MARK_INT_SAT(CODA_ACCESS_STATS);
        return(((accmode & VREAD) && !(accmode & (VWRITE | VEXEC)))
               ? 0 : EACCES);
    }

    /*
     * if the file is a directory, and we are checking exec (eg lookup)
     * access, and the file is in the namecache, then the user must have
     * lookup access to it.
     */
    if (coda_access_cache) {
        if ((vp->v_type == VDIR) && (accmode & VEXEC)) {
            if (coda_nc_lookup(cp, ".", 1, cred)) {
                MARK_INT_SAT(CODA_ACCESS_STATS);
                return(0);                     /* it was in the cache */
            }
        }
    }

    error = venus_access(vtomi(vp), &cp->c_fid, accmode, cred, curlwp);

    return(error);
}

/*
 * CODA abort op, called after namei() when a CREATE/DELETE isn't actually
 * done. If a buffer has been saved in anticipation of a coda_create or
 * a coda_remove, delete it.
 */
/* ARGSUSED */
int
coda_abortop(void *v)
{
/* true args */
    struct vop_abortop_args /* {
        vnode_t *a_dvp;
        struct componentname *a_cnp;
    } */ *ap = v;

    (void)ap;
/* upcall decl */
/* locals */

    return (0);
}

int
coda_readlink(void *v)
{
/* true args */
    struct vop_readlink_args *ap = v;
    vnode_t *vp = ap->a_vp;
    struct cnode *cp = VTOC(vp);
    struct uio *uiop = ap->a_uio;
    kauth_cred_t cred = ap->a_cred;
/* locals */
    struct lwp *l = curlwp;
    int error;
    char *str;
    int len;

    MARK_ENTRY(CODA_READLINK_STATS);

    /* Check for readlink of control object. */
    if (IS_CTL_VP(vp)) {
        MARK_INT_FAIL(CODA_READLINK_STATS);
        return(ENOENT);
    }

    if ((coda_symlink_cache) && (VALID_SYMLINK(cp))) { /* symlink was cached */
        uiop->uio_rw = UIO_READ;
        error = uiomove(cp->c_symlink, (int)cp->c_symlen, uiop);
        if (error)
            MARK_INT_FAIL(CODA_READLINK_STATS);
        else
            MARK_INT_SAT(CODA_READLINK_STATS);
        return(error);
    }

    error = venus_readlink(vtomi(vp), &cp->c_fid, cred, l, &str, &len);

    if (!error) {
        uiop->uio_rw = UIO_READ;
        error = uiomove(str, len, uiop);

        if (coda_symlink_cache) {
            cp->c_symlink = str;
            cp->c_symlen = len;
            cp->c_flags |= C_SYMLINK;
        } else
            CODA_FREE(str, len);
    }

    CODADEBUG(CODA_READLINK, myprintf(("in readlink result %d\n",error));)
    return(error);
}

int
coda_fsync(void *v)
{
/* true args */
    struct vop_fsync_args *ap = v;
    vnode_t *vp = ap->a_vp;
    struct cnode *cp = VTOC(vp);
    kauth_cred_t cred = ap->a_cred;
/* locals */
    vnode_t *convp = cp->c_ovp;
    int error;

    MARK_ENTRY(CODA_FSYNC_STATS);

    /* Check for fsync on an unmounting object */
    /* The NetBSD kernel, in its infinite wisdom, can try to fsync
     * after an unmount has been initiated.  This is a Bad Thing,
     * which we have to avoid.  Not a legitimate failure for stats.
     */
    if (IS_UNMOUNTING(cp)) {
        return(ENODEV);
    }

    /* Check for fsync of control object or unitialized cnode. */
    if (IS_CTL_VP(vp) || vp->v_type == VNON) {
        MARK_INT_SAT(CODA_FSYNC_STATS);
        return(0);
    }

    if (convp)
            VOP_FSYNC(convp, cred, MNT_WAIT, 0, 0);

    /*
     * We can expect fsync on any vnode at all if venus is pruging it.
     * Venus can't very well answer the fsync request, now can it?
     * Hopefully, it won't have to, because hopefully, venus preserves
     * the (possibly untrue) invariant that it never purges an open
     * vnode.  Hopefully.
     */
    if (cp->c_flags & C_PURGING) {
        return(0);
    }

    error = venus_fsync(vtomi(vp), &cp->c_fid, cred, curlwp);

    CODADEBUG(CODA_FSYNC, myprintf(("in fsync result %d\n",error)); )
    return(error);
}

/*
 * vp is locked on entry, and we must unlock it.
 * XXX This routine is suspect and probably needs rewriting.
 */
int
coda_inactive(void *v)
{
/* true args */
    struct vop_inactive_v2_args *ap = v;
    vnode_t *vp = ap->a_vp;
    struct cnode *cp = VTOC(vp);
    kauth_cred_t cred __unused = NULL;

    /* We don't need to send inactive to venus - DCS */
    MARK_ENTRY(CODA_INACTIVE_STATS);

    if (IS_CTL_VP(vp)) {
        MARK_INT_SAT(CODA_INACTIVE_STATS);
        return 0;
    }

    CODADEBUG(CODA_INACTIVE, myprintf(("in inactive, %s, vfsp %p\n",
                                  coda_f2s(&cp->c_fid), vp->v_mount));)

    if (vp->v_mount->mnt_data == NULL) {
        myprintf(("Help! vfsp->vfs_data was NULL, but vnode %p wasn't dying\n", vp));
        panic("badness in coda_inactive");
    }

#ifdef CODA_VERBOSE
    /* Sanity checks that perhaps should be panic. */
    if (vrefcnt(vp) > 1)
        printf("%s: %p usecount %d\n", __func__, vp, vrefcnt(vp));
    if (cp->c_ovp != NULL)
        printf("%s: %p ovp != NULL\n", __func__, vp);
#endif
    /* XXX Do we need to VOP_CLOSE container vnodes? */
    if (!IS_UNMOUNTING(cp))
        *ap->a_recycle = true;

    MARK_INT_SAT(CODA_INACTIVE_STATS);
    return(0);
}

/*
 * Coda does not use the normal namecache, but a private version.
 * Consider how to use the standard facility instead.
 */
int
coda_lookup(void *v)
{
/* true args */
    struct vop_lookup_v2_args *ap = v;
    /* (locked) vnode of dir in which to do lookup */
    vnode_t *dvp = ap->a_dvp;
    struct cnode *dcp = VTOC(dvp);
    /* output variable for result */
    vnode_t **vpp = ap->a_vpp;
    /* name to lookup */
    struct componentname *cnp = ap->a_cnp;
    kauth_cred_t cred = cnp->cn_cred;
    struct lwp *l = curlwp;
/* locals */
    struct cnode *cp;
    const char *nm = cnp->cn_nameptr;
    int len = cnp->cn_namelen;
    CodaFid VFid;
    int        vtype;
    int error = 0;

    MARK_ENTRY(CODA_LOOKUP_STATS);

    CODADEBUG(CODA_LOOKUP, myprintf(("%s: %s in %s\n", __func__,
        nm, coda_f2s(&dcp->c_fid)));)

    /*
     * XXX componentname flags in MODMASK are not handled at all
     */

    /*
     * The overall strategy is to switch on the lookup type and get a
     * result vnode that is vref'd but not locked.
     */

    /* Check for lookup of control object. */
    if (IS_CTL_NAME(dvp, nm, len)) {
        *vpp = coda_ctlvp;
        vref(*vpp);
        MARK_INT_SAT(CODA_LOOKUP_STATS);
        goto exit;
    }

    /* Avoid trying to hand venus an unreasonably long name. */
    if (len+1 > CODA_MAXNAMLEN) {
        MARK_INT_FAIL(CODA_LOOKUP_STATS);
        CODADEBUG(CODA_LOOKUP, myprintf(("%s: name too long:, %s (%s)\n",
            __func__, coda_f2s(&dcp->c_fid), nm));)
        *vpp = (vnode_t *)0;
        error = EINVAL;
        goto exit;
    }

    /*
     * Try to resolve the lookup in the minicache.  If that fails, ask
     * venus to do the lookup.  XXX The interaction between vnode
     * locking and any locking that coda does is not clear.
     */
    cp = coda_nc_lookup(dcp, nm, len, cred);
    if (cp) {
        *vpp = CTOV(cp);
        vref(*vpp);
        CODADEBUG(CODA_LOOKUP,
                 myprintf(("lookup result %d vpp %p\n",error,*vpp));)
    } else {
        /* The name wasn't cached, so ask Venus. */
        error = venus_lookup(vtomi(dvp), &dcp->c_fid, nm, len, cred, l, &VFid,
            &vtype);

        if (error) {
            MARK_INT_FAIL(CODA_LOOKUP_STATS);
            CODADEBUG(CODA_LOOKUP, myprintf(("%s: lookup error on %s (%s)%d\n",
                __func__, coda_f2s(&dcp->c_fid), nm, error));)
            *vpp = (vnode_t *)0;
        } else {
            MARK_INT_SAT(CODA_LOOKUP_STATS);
            CODADEBUG(CODA_LOOKUP, myprintf(("%s: %s type %o result %d\n",
                __func__, coda_f2s(&VFid), vtype, error)); )

            cp = make_coda_node(&VFid, dvp->v_mount, vtype);
            *vpp = CTOV(cp);
            /* vpp is now vrefed. */

            /*
             * Unless this vnode is marked CODA_NOCACHE, enter it into
             * the coda name cache to avoid a future venus round-trip.
             * XXX Interaction with componentname NOCACHE is unclear.
             */
            if (!(vtype & CODA_NOCACHE))
                coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));
        }
    }

 exit:
    /*
     * If we are creating, and this was the last name to be looked up,
     * and the error was ENOENT, then make the leaf NULL and return
     * success.
     * XXX Check against new lookup rules.
     */
    if (((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME))
        && (cnp->cn_flags & ISLASTCN)
        && (error == ENOENT))
    {
        error = EJUSTRETURN;
        *ap->a_vpp = NULL;
    }

    return(error);
}

/*ARGSUSED*/
int
coda_create(void *v)
{
/* true args */
    struct vop_create_v3_args *ap = v;
    vnode_t *dvp = ap->a_dvp;
    struct cnode *dcp = VTOC(dvp);
    struct vattr *va = ap->a_vap;
    int exclusive = 1;
    int mode = ap->a_vap->va_mode;
    vnode_t **vpp = ap->a_vpp;
    struct componentname  *cnp = ap->a_cnp;
    kauth_cred_t cred = cnp->cn_cred;
    struct lwp *l = curlwp;
/* locals */
    int error;
    struct cnode *cp;
    const char *nm = cnp->cn_nameptr;
    int len = cnp->cn_namelen;
    CodaFid VFid;
    struct vattr attr;

    MARK_ENTRY(CODA_CREATE_STATS);

    /* All creates are exclusive XXX */
    /* I'm assuming the 'mode' argument is the file mode bits XXX */

    /* Check for create of control object. */
    if (IS_CTL_NAME(dvp, nm, len)) {
        *vpp = (vnode_t *)0;
        MARK_INT_FAIL(CODA_CREATE_STATS);
        return(EACCES);
    }

    error = venus_create(vtomi(dvp), &dcp->c_fid, nm, len, exclusive, mode, va, cred, l, &VFid, &attr);

    if (!error) {

        /*
         * XXX Violation of venus/kernel invariants is a difficult case,
         * but venus should not be able to cause a panic.
         */
        /* If this is an exclusive create, panic if the file already exists. */
        /* Venus should have detected the file and reported EEXIST. */

        if ((exclusive == 1) &&
            (coda_find(&VFid) != NULL))
            panic("cnode existed for newly created file!");

        cp = make_coda_node(&VFid, dvp->v_mount, attr.va_type);
        *vpp = CTOV(cp);

        /* XXX vnodeops doesn't say this argument can be changed. */
        /* Update va to reflect the new attributes. */
        (*va) = attr;

        /* Update the attribute cache and mark it as valid */
        if (coda_attr_cache) {
            VTOC(*vpp)->c_vattr = attr;
            VTOC(*vpp)->c_flags |= C_VATTR;
        }

        /* Invalidate parent's attr cache (modification time has changed). */
        VTOC(dvp)->c_flags &= ~C_VATTR;

        /* enter the new vnode in the Name Cache */
        coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));

        CODADEBUG(CODA_CREATE, myprintf(("%s: %s, result %d\n", __func__,
            coda_f2s(&VFid), error)); )
    } else {
        *vpp = (vnode_t *)0;
        CODADEBUG(CODA_CREATE, myprintf(("%s: create error %d\n", __func__,
            error));)
    }

    if (!error) {
#ifdef CODA_VERBOSE
        if ((cnp->cn_flags & LOCKLEAF) == 0)
            /* This should not happen; flags are for lookup only. */
            printf("%s: LOCKLEAF not set!\n", __func__);
#endif
    }

    return(error);
}

int
coda_remove(void *v)
{
/* true args */
    struct vop_remove_v3_args *ap = v;
    vnode_t *dvp = ap->a_dvp;
    struct cnode *cp = VTOC(dvp);
    vnode_t *vp = ap->a_vp;
    struct componentname  *cnp = ap->a_cnp;
    kauth_cred_t cred = cnp->cn_cred;
    struct lwp *l = curlwp;
/* locals */
    int error;
    const char *nm = cnp->cn_nameptr;
    int len = cnp->cn_namelen;
    struct cnode *tp;

    MARK_ENTRY(CODA_REMOVE_STATS);

    CODADEBUG(CODA_REMOVE, myprintf(("%s: %s in %s\n", __func__,
        nm, coda_f2s(&cp->c_fid)));)

    /* Remove the file's entry from the CODA Name Cache */
    /* We're being conservative here, it might be that this person
     * doesn't really have sufficient access to delete the file
     * but we feel zapping the entry won't really hurt anyone -- dcs
     */
    /* I'm gonna go out on a limb here. If a file and a hardlink to it
     * exist, and one is removed, the link count on the other will be
     * off by 1. We could either invalidate the attrs if cached, or
     * fix them. I'll try to fix them. DCS 11/8/94
     */
    tp = coda_nc_lookup(VTOC(dvp), nm, len, cred);
    if (tp) {
        if (VALID_VATTR(tp)) {        /* If attrs are cached */
            if (tp->c_vattr.va_nlink > 1) {        /* If it's a hard link */
                tp->c_vattr.va_nlink--;
            }
        }

        coda_nc_zapfile(VTOC(dvp), nm, len);
        /* No need to flush it if it doesn't exist! */
    }
    /* Invalidate the parent's attr cache, the modification time has changed */
    VTOC(dvp)->c_flags &= ~C_VATTR;

    /* Check for remove of control object. */
    if (IS_CTL_NAME(dvp, nm, len)) {
        MARK_INT_FAIL(CODA_REMOVE_STATS);
        return(ENOENT);
    }

    error = venus_remove(vtomi(dvp), &cp->c_fid, nm, len, cred, l);

    CODADEBUG(CODA_REMOVE, myprintf(("in remove result %d\n",error)); )

    /*
     * Unlock and release child (avoiding double if ".").
     */
    if (dvp == vp) {
        vrele(vp);
    } else {
        vput(vp);
    }

    return(error);
}

/*
 * dvp is the directory where the link is to go, and is locked.
 * vp is the object to be linked to, and is unlocked.
 * At exit, we must unlock dvp, and vput dvp.
 */
int
coda_link(void *v)
{
/* true args */
    struct vop_link_v2_args *ap = v;
    vnode_t *vp = ap->a_vp;
    struct cnode *cp = VTOC(vp);
    vnode_t *dvp = ap->a_dvp;
    struct cnode *dcp = VTOC(dvp);
    struct componentname *cnp = ap->a_cnp;
    kauth_cred_t cred = cnp->cn_cred;
    struct lwp *l = curlwp;
/* locals */
    int error;
    const char *nm = cnp->cn_nameptr;
    int len = cnp->cn_namelen;

    MARK_ENTRY(CODA_LINK_STATS);

    if (codadebug & CODADBGMSK(CODA_LINK)) {

        myprintf(("%s: vp fid: %s\n", __func__, coda_f2s(&cp->c_fid)));
        myprintf(("%s: dvp fid: %s)\n", __func__, coda_f2s(&dcp->c_fid)));

    }
    if (codadebug & CODADBGMSK(CODA_LINK)) {
        myprintf(("%s: vp fid: %s\n", __func__, coda_f2s(&cp->c_fid)));
        myprintf(("%s: dvp fid: %s\n", __func__, coda_f2s(&dcp->c_fid)));

    }

    /* Check for link to/from control object. */
    if (IS_CTL_NAME(dvp, nm, len) || IS_CTL_VP(vp)) {
        MARK_INT_FAIL(CODA_LINK_STATS);
        return(EACCES);
    }

    /* If linking . to a name, error out earlier. */
    if (vp == dvp) {
#ifdef CODA_VERBOSE
        printf("%s coda_link vp==dvp\n", __func__);
#endif
        error = EISDIR;
        goto exit;
    }

    /* XXX Why does venus_link need the vnode to be locked?*/
    if ((error = vn_lock(vp, LK_EXCLUSIVE)) != 0) {
#ifdef CODA_VERBOSE
        printf("%s: couldn't lock vnode %p\n", __func__, vp);
#endif
        error = EFAULT;                /* XXX better value */
        goto exit;
    }
    error = kauth_authorize_vnode(cnp->cn_cred, KAUTH_VNODE_ADD_LINK, vp,
        dvp, 0);
    if (error)
            goto exit;
    error = venus_link(vtomi(vp), &cp->c_fid, &dcp->c_fid, nm, len, cred, l);
    VOP_UNLOCK(vp);

    /* Invalidate parent's attr cache (the modification time has changed). */
    VTOC(dvp)->c_flags &= ~C_VATTR;
    /* Invalidate child's attr cache (XXX why). */
    VTOC(vp)->c_flags &= ~C_VATTR;

    CODADEBUG(CODA_LINK,        myprintf(("in link result %d\n",error)); )

exit:
    return(error);
}

int
coda_rename(void *v)
{
/* true args */
    struct vop_rename_args *ap = v;
    vnode_t *odvp = ap->a_fdvp;
    struct cnode *odcp = VTOC(odvp);
    struct componentname  *fcnp = ap->a_fcnp;
    vnode_t *ndvp = ap->a_tdvp;
    struct cnode *ndcp = VTOC(ndvp);
    struct componentname  *tcnp = ap->a_tcnp;
    kauth_cred_t cred = fcnp->cn_cred;
    struct lwp *l = curlwp;
/* true args */
    int error;
    const char *fnm = fcnp->cn_nameptr;
    int flen = fcnp->cn_namelen;
    const char *tnm = tcnp->cn_nameptr;
    int tlen = tcnp->cn_namelen;

    MARK_ENTRY(CODA_RENAME_STATS);

    /* Hmmm.  The vnodes are already looked up.  Perhaps they are locked?
       This could be Bad. XXX */
#ifdef OLD_DIAGNOSTIC
    if ((fcnp->cn_cred != tcnp->cn_cred)
        || (fcnp->cn_lwp != tcnp->cn_lwp))
    {
        panic("%s: component names don't agree", __func__);
    }
#endif

    /* Check for rename involving control object. */
    if (IS_CTL_NAME(odvp, fnm, flen) || IS_CTL_NAME(ndvp, tnm, tlen)) {
        MARK_INT_FAIL(CODA_RENAME_STATS);
        return(EACCES);
    }

    /* Problem with moving directories -- need to flush entry for .. */
    if (odvp != ndvp) {
        struct cnode *ovcp = coda_nc_lookup(VTOC(odvp), fnm, flen, cred);
        if (ovcp) {
            vnode_t *ovp = CTOV(ovcp);
            if ((ovp) &&
                (ovp->v_type == VDIR)) /* If it's a directory */
                coda_nc_zapfile(VTOC(ovp),"..", 2);
        }
    }

    /* Remove the entries for both source and target files */
    coda_nc_zapfile(VTOC(odvp), fnm, flen);
    coda_nc_zapfile(VTOC(ndvp), tnm, tlen);

    /* Invalidate the parent's attr cache, the modification time has changed */
    VTOC(odvp)->c_flags &= ~C_VATTR;
    VTOC(ndvp)->c_flags &= ~C_VATTR;

    if (flen+1 > CODA_MAXNAMLEN) {
        MARK_INT_FAIL(CODA_RENAME_STATS);
        error = EINVAL;
        goto exit;
    }

    if (tlen+1 > CODA_MAXNAMLEN) {
        MARK_INT_FAIL(CODA_RENAME_STATS);
        error = EINVAL;
        goto exit;
    }

    error = venus_rename(vtomi(odvp), &odcp->c_fid, &ndcp->c_fid, fnm, flen, tnm, tlen, cred, l);

 exit:
    CODADEBUG(CODA_RENAME, myprintf(("in rename result %d\n",error));)
    /* XXX - do we need to call cache pureg on the moved vnode? */
    cache_purge(ap->a_fvp);

    /* It seems to be incumbent on us to drop locks on all four vnodes */
    /* From-vnodes are not locked, only ref'd.  To-vnodes are locked. */

    vrele(ap->a_fvp);
    vrele(odvp);

    if (ap->a_tvp) {
        if (ap->a_tvp == ndvp) {
            vrele(ap->a_tvp);
        } else {
            vput(ap->a_tvp);
        }
    }

    vput(ndvp);
    return(error);
}

int
coda_mkdir(void *v)
{
/* true args */
    struct vop_mkdir_v3_args *ap = v;
    vnode_t *dvp = ap->a_dvp;
    struct cnode *dcp = VTOC(dvp);
    struct componentname  *cnp = ap->a_cnp;
    struct vattr *va = ap->a_vap;
    vnode_t **vpp = ap->a_vpp;
    kauth_cred_t cred = cnp->cn_cred;
    struct lwp *l = curlwp;
/* locals */
    int error;
    const char *nm = cnp->cn_nameptr;
    int len = cnp->cn_namelen;
    struct cnode *cp;
    CodaFid VFid;
    struct vattr ova;

    MARK_ENTRY(CODA_MKDIR_STATS);

    /* Check for mkdir of target object. */
    if (IS_CTL_NAME(dvp, nm, len)) {
        *vpp = (vnode_t *)0;
        MARK_INT_FAIL(CODA_MKDIR_STATS);
        return(EACCES);
    }

    if (len+1 > CODA_MAXNAMLEN) {
        *vpp = (vnode_t *)0;
        MARK_INT_FAIL(CODA_MKDIR_STATS);
        return(EACCES);
    }

    error = venus_mkdir(vtomi(dvp), &dcp->c_fid, nm, len, va, cred, l, &VFid, &ova);

    if (!error) {
        if (coda_find(&VFid) != NULL)
            panic("cnode existed for newly created directory!");


        cp =  make_coda_node(&VFid, dvp->v_mount, va->va_type);
        *vpp = CTOV(cp);

        /* enter the new vnode in the Name Cache */
        coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));

        /* as a side effect, enter "." and ".." for the directory */
        coda_nc_enter(VTOC(*vpp), ".", 1, cred, VTOC(*vpp));
        coda_nc_enter(VTOC(*vpp), "..", 2, cred, VTOC(dvp));

        if (coda_attr_cache) {
            VTOC(*vpp)->c_vattr = ova;                /* update the attr cache */
            VTOC(*vpp)->c_flags |= C_VATTR;        /* Valid attributes in cnode */
        }

        /* Invalidate the parent's attr cache, the modification time has changed */
        VTOC(dvp)->c_flags &= ~C_VATTR;

        CODADEBUG( CODA_MKDIR, myprintf(("%s: %s result %d\n", __func__,
            coda_f2s(&VFid), error)); )
    } else {
        *vpp = (vnode_t *)0;
        CODADEBUG(CODA_MKDIR, myprintf(("%s error %d\n", __func__, error));)
    }

    return(error);
}

int
coda_rmdir(void *v)
{
/* true args */
    struct vop_rmdir_v2_args *ap = v;
    vnode_t *dvp = ap->a_dvp;
    struct cnode *dcp = VTOC(dvp);
    vnode_t *vp = ap->a_vp;
    struct componentname  *cnp = ap->a_cnp;
    kauth_cred_t cred = cnp->cn_cred;
    struct lwp *l = curlwp;
/* true args */
    int error;
    const char *nm = cnp->cn_nameptr;
    int len = cnp->cn_namelen;
    struct cnode *cp;

    MARK_ENTRY(CODA_RMDIR_STATS);

    /* Check for rmdir of control object. */
    if (IS_CTL_NAME(dvp, nm, len)) {
        MARK_INT_FAIL(CODA_RMDIR_STATS);
        return(ENOENT);
    }

    /* Can't remove . in self. */
    if (dvp == vp) {
#ifdef CODA_VERBOSE
        printf("%s: dvp == vp\n", __func__);
#endif
        error = EINVAL;
        goto exit;
    }

    /*
     * The caller may not have adequate permissions, and the venus
     * operation may fail, but it doesn't hurt from a correctness
     * viewpoint to invalidate cache entries.
     * XXX Why isn't this done after the venus_rmdir call?
     */
    /* Look up child in name cache (by name, from parent). */
    cp = coda_nc_lookup(dcp, nm, len, cred);
    /* If found, remove all children of the child (., ..). */
    if (cp) coda_nc_zapParentfid(&(cp->c_fid), NOT_DOWNCALL);

    /* Remove child's own entry. */
    coda_nc_zapfile(dcp, nm, len);

    /* Invalidate parent's attr cache (the modification time has changed). */
    dcp->c_flags &= ~C_VATTR;

    error = venus_rmdir(vtomi(dvp), &dcp->c_fid, nm, len, cred, l);

    CODADEBUG(CODA_RMDIR, myprintf(("in rmdir result %d\n", error)); )

exit:
    /* unlock and release child */
    if (dvp == vp) {
        vrele(vp);
    } else {
        vput(vp);
    }

    return(error);
}

int
coda_symlink(void *v)
{
/* true args */
    struct vop_symlink_v3_args *ap = v;
    vnode_t *dvp = ap->a_dvp;
    struct cnode *dcp = VTOC(dvp);
    /* a_vpp is used in place below */
    struct componentname *cnp = ap->a_cnp;
    struct vattr *tva = ap->a_vap;
    char *path = ap->a_target;
    kauth_cred_t cred = cnp->cn_cred;
    struct lwp *l = curlwp;
/* locals */
    int error;
    u_long saved_cn_flags;
    const char *nm = cnp->cn_nameptr;
    int len = cnp->cn_namelen;
    int plen = strlen(path);

    /*
     * Here's the strategy for the moment: perform the symlink, then
     * do a lookup to grab the resulting vnode.  I know this requires
     * two communications with Venus for a new symbolic link, but
     * that's the way the ball bounces.  I don't yet want to change
     * the way the Mach symlink works.  When Mach support is
     * deprecated, we should change symlink so that the common case
     * returns the resultant vnode in a vpp argument.
     */

    MARK_ENTRY(CODA_SYMLINK_STATS);

    /* Check for symlink of control object. */
    if (IS_CTL_NAME(dvp, nm, len)) {
        MARK_INT_FAIL(CODA_SYMLINK_STATS);
        error = EACCES; 
        goto exit;
    }

    if (plen+1 > CODA_MAXPATHLEN) {
        MARK_INT_FAIL(CODA_SYMLINK_STATS);
        error = EINVAL;
        goto exit;
    }

    if (len+1 > CODA_MAXNAMLEN) {
        MARK_INT_FAIL(CODA_SYMLINK_STATS);
        error = EINVAL;
        goto exit;
    }

    error = venus_symlink(vtomi(dvp), &dcp->c_fid, path, plen, nm, len, tva, cred, l);

    /* Invalidate the parent's attr cache (modification time has changed). */
    dcp->c_flags &= ~C_VATTR;

    if (!error) {
        /*
         * VOP_SYMLINK is not defined to pay attention to cnp->cn_flags;
         * these are defined only for VOP_LOOKUP.   We desire to reuse
         * cnp for a VOP_LOOKUP operation, and must be sure to not pass
         * stray flags passed to us.  Such stray flags can occur because
         * sys_symlink makes a namei call and then reuses the
         * componentname structure.
         */
        /*
         * XXX Arguably we should create our own componentname structure
         * and not reuse the one that was passed in.
         */
        saved_cn_flags = cnp->cn_flags;
        cnp->cn_flags &= ~(MODMASK | OPMASK);
        cnp->cn_flags |= LOOKUP;
        error = VOP_LOOKUP(dvp, ap->a_vpp, cnp);
        cnp->cn_flags = saved_cn_flags;
    }

 exit:
    CODADEBUG(CODA_SYMLINK, myprintf(("in symlink result %d\n",error)); )
    return(error);
}

/*
 * Read directory entries.
 */
int
coda_readdir(void *v)
{
/* true args */
        struct vop_readdir_args *ap = v;
        vnode_t *vp = ap->a_vp;
        struct cnode *cp = VTOC(vp);
        struct uio *uiop = ap->a_uio;
        kauth_cred_t cred = ap->a_cred;
        int *eofflag = ap->a_eofflag;
/* upcall decl */
/* locals */
        size_t initial_resid = uiop->uio_resid;
        int error = 0;
        int opened_internally = 0;
        int ncookies;
        char *buf;
        struct vnode *cvp;
        struct dirent *dirp;

        MARK_ENTRY(CODA_READDIR_STATS);

        CODADEBUG(CODA_READDIR, myprintf(("%s: (%p, %lu, %lld)\n", __func__,
            uiop->uio_iov->iov_base, (unsigned long) uiop->uio_resid,
            (long long) uiop->uio_offset)); )

        /* Check for readdir of control object. */
        if (IS_CTL_VP(vp)) {
                MARK_INT_FAIL(CODA_READDIR_STATS);
                return ENOENT;
        }

        /* If directory is not already open do an "internal open" on it. */
        if (cp->c_ovp == NULL) {
                opened_internally = 1;
                MARK_INT_GEN(CODA_OPEN_STATS);
                error = VOP_OPEN(vp, FREAD, cred);
#ifdef        CODA_VERBOSE
                printf("%s: Internally Opening %p\n", __func__, vp);
#endif
                if (error)
                        return error;
                KASSERT(cp->c_ovp != NULL);
        }
        cvp = cp->c_ovp;

        CODADEBUG(CODA_READDIR, myprintf(("%s: fid = %s, refcnt = %d\n",
            __func__, coda_f2s(&cp->c_fid), vrefcnt(cvp))); )

        if (ap->a_ncookies) {
                ncookies = ap->a_uio->uio_resid / _DIRENT_RECLEN(dirp, 1);
                *ap->a_ncookies = 0;
                *ap->a_cookies = malloc(ncookies * sizeof (off_t),
                    M_TEMP, M_WAITOK);
        }
        buf = kmem_alloc(CODA_DIRBLKSIZ, KM_SLEEP);
        dirp = kmem_alloc(sizeof(*dirp), KM_SLEEP);
        vn_lock(cvp, LK_EXCLUSIVE | LK_RETRY);

        while (error == 0) {
                size_t resid = 0;
                char *dp, *ep;

                if (!ALIGNED_POINTER(uiop->uio_offset, uint32_t)) {
                        error = EINVAL;
                        break;
                }
                error = vn_rdwr(UIO_READ, cvp, buf,
                    CODA_DIRBLKSIZ, uiop->uio_offset,
                    UIO_SYSSPACE, IO_NODELOCKED, cred, &resid, curlwp);
                if (error || resid == CODA_DIRBLKSIZ)
                        break;
                for (dp = buf, ep = dp + CODA_DIRBLKSIZ - resid; dp < ep; ) {
                        off_t off;
                        struct venus_dirent *vd = (struct venus_dirent *)dp;

                        if (!ALIGNED_POINTER(vd, uint32_t) ||
                            !ALIGNED_POINTER(vd->d_reclen, uint32_t) ||
                            vd->d_reclen == 0) {
                                error = EINVAL;
                                break;
                        }
                        if (dp + vd->d_reclen > ep) {
                                error = ENAMETOOLONG;
                                break;
                        }
                        if (vd->d_namlen == 0) {
                                uiop->uio_offset += vd->d_reclen;
                                dp += vd->d_reclen;
                                continue;
                        }

                        dirp->d_fileno = vd->d_fileno;
                        dirp->d_type = vd->d_type;
                        dirp->d_namlen = vd->d_namlen;
                        dirp->d_reclen = _DIRENT_SIZE(dirp);
                        strlcpy(dirp->d_name, vd->d_name, dirp->d_namlen + 1);

                        if (uiop->uio_resid < dirp->d_reclen) {
                                error = ENAMETOOLONG;
                                break;
                        }

                        off = uiop->uio_offset;
                        error = uiomove(dirp, dirp->d_reclen, uiop);
                        uiop->uio_offset = off;
                        if (error)
                                break;

                        uiop->uio_offset += vd->d_reclen;
                        dp += vd->d_reclen;
                        if (ap->a_ncookies)
                                (*ap->a_cookies)[(*ap->a_ncookies)++] =
                                    uiop->uio_offset;
                }
        }

        VOP_UNLOCK(cvp);
        kmem_free(dirp, sizeof(*dirp));
        kmem_free(buf, CODA_DIRBLKSIZ);
        if (eofflag && error == 0)
                *eofflag = 1;
        if (uiop->uio_resid < initial_resid && error == ENAMETOOLONG)
                error = 0;
        if (ap->a_ncookies && error) {
                free(*ap->a_cookies, M_TEMP);
                *ap->a_ncookies = 0;
                *ap->a_cookies = NULL;
        }
        if (error)
                MARK_INT_FAIL(CODA_READDIR_STATS);
        else
                MARK_INT_SAT(CODA_READDIR_STATS);

        /* Do an "internal close" if necessary. */
        if (opened_internally) {
                MARK_INT_GEN(CODA_CLOSE_STATS);
                (void)VOP_CLOSE(vp, FREAD, cred);
        }

        return error;
}

/*
 * Convert from file system blocks to device blocks
 */
int
coda_bmap(void *v)
{
    /* XXX on the global proc */
/* true args */
    struct vop_bmap_args *ap = v;
    vnode_t *vp __unused = ap->a_vp;        /* file's vnode */
    daddr_t bn __unused = ap->a_bn;        /* fs block number */
    vnode_t **vpp = ap->a_vpp;                        /* RETURN vp of device */
    daddr_t *bnp __unused = ap->a_bnp;        /* RETURN device block number */
    struct lwp *l __unused = curlwp;
/* upcall decl */
/* locals */

        *vpp = (vnode_t *)0;
        myprintf(("coda_bmap called!\n"));
        return(EINVAL);
}

/*
 * I don't think the following two things are used anywhere, so I've
 * commented them out
 *
 * struct buf *async_bufhead;
 * int async_daemon_count;
 */
int
coda_strategy(void *v)
{
/* true args */
    struct vop_strategy_args *ap = v;
    struct buf *bp __unused = ap->a_bp;
    struct lwp *l __unused = curlwp;
/* upcall decl */
/* locals */

        myprintf(("coda_strategy called!  "));
        return(EINVAL);
}

int
coda_reclaim(void *v)
{
/* true args */
    struct vop_reclaim_v2_args *ap = v;
    vnode_t *vp = ap->a_vp;
    struct cnode *cp = VTOC(vp);
/* upcall decl */
/* locals */

    VOP_UNLOCK(vp);

/*
 * Forced unmount/flush will let vnodes with non zero use be destroyed!
 */
    ENTRY;

    if (IS_UNMOUNTING(cp)) {
#ifdef        DEBUG
        if (VTOC(vp)->c_ovp) {
            if (IS_UNMOUNTING(cp))
                printf("%s: c_ovp not void: vp %p, cp %p\n", __func__, vp, cp);
        }
#endif
    } else {
#ifdef OLD_DIAGNOSTIC
        if (vrefcnt(vp) != 0)
            print("%s: pushing active %p\n", __func__, vp);
        if (VTOC(vp)->c_ovp) {
            panic("%s: c_ovp not void", __func__);
        }
#endif
    }
    /* If an array has been allocated to hold the symlink, deallocate it */
    if ((coda_symlink_cache) && (VALID_SYMLINK(cp))) {
        if (cp->c_symlink == NULL)
            panic("%s: null symlink pointer in cnode", __func__);

        CODA_FREE(cp->c_symlink, cp->c_symlen);
        cp->c_flags &= ~C_SYMLINK;
        cp->c_symlen = 0;
    }

    mutex_enter(vp->v_interlock);
    mutex_enter(&cp->c_lock);
    SET_VTOC(vp) = NULL;
    mutex_exit(&cp->c_lock);
    mutex_exit(vp->v_interlock);
    mutex_destroy(&cp->c_lock);
    kmem_free(cp, sizeof(*cp));

    return (0);
}

int
coda_lock(void *v)
{
/* true args */
    struct vop_lock_args *ap = v;
    vnode_t *vp = ap->a_vp;
    struct cnode *cp = VTOC(vp);
/* upcall decl */
/* locals */

    ENTRY;

    if (coda_lockdebug) {
        myprintf(("Attempting lock on %s\n",
                  coda_f2s(&cp->c_fid)));
    }

    return genfs_lock(v);
}

int
coda_unlock(void *v)
{
/* true args */
    struct vop_unlock_args *ap = v;
    vnode_t *vp = ap->a_vp;
    struct cnode *cp = VTOC(vp);
/* upcall decl */
/* locals */

    ENTRY;
    if (coda_lockdebug) {
        myprintf(("Attempting unlock on %s\n",
                  coda_f2s(&cp->c_fid)));
    }

    return genfs_unlock(v);
}

int
coda_islocked(void *v)
{
/* true args */
    ENTRY;

    return genfs_islocked(v);
}

int
coda_pathconf(void *v)
{
        struct vop_pathconf_args *ap = v;

        switch (ap->a_name) {
        default:
                return EINVAL;
        }
        /* NOTREACHED */
}

/*
 * Given a device and inode, obtain a locked vnode.  One reference is
 * obtained and passed back to the caller.
 */
int
coda_grab_vnode(vnode_t *uvp, dev_t dev, ino_t ino, vnode_t **vpp)
{
    int           error;
    struct mount *mp;

    /* Obtain mount point structure from device. */
    if (!(mp = devtomp(dev))) {
        myprintf(("%s: devtomp(0x%llx) returns NULL\n", __func__,
            (unsigned long long)dev));
        return(ENXIO);
    }

    /*
     * Obtain vnode from mount point and inode.
     */
    error = VFS_VGET(mp, ino, LK_EXCLUSIVE, vpp);
    if (error) {
        myprintf(("%s: iget/vget(0x%llx, %llu) returns %p, err %d\n", __func__,
            (unsigned long long)dev, (unsigned long long)ino, *vpp, error));
        return(ENOENT);
    }
    /* share the underlying vnode lock with the coda vnode */
    vshareilock(*vpp, uvp);
    KASSERT(VOP_ISLOCKED(*vpp));
    return(0);
}

static void
coda_print_vattr(struct vattr *attr)
{
    const char *typestr;

    switch (attr->va_type) {
    case VNON:
        typestr = "VNON";
        break;
    case VREG:
        typestr = "VREG";
        break;
    case VDIR:
        typestr = "VDIR";
        break;
    case VBLK:
        typestr = "VBLK";
        break;
    case VCHR:
        typestr = "VCHR";
        break;
    case VLNK:
        typestr = "VLNK";
        break;
    case VSOCK:
        typestr = "VSCK";
        break;
    case VFIFO:
        typestr = "VFFO";
        break;
    case VBAD:
        typestr = "VBAD";
        break;
    default:
        typestr = "????";
        break;
    }


    myprintf(("attr: type %s mode %d uid %d gid %d fsid %d rdev %d\n",
              typestr, (int)attr->va_mode, (int)attr->va_uid,
              (int)attr->va_gid, (int)attr->va_fsid, (int)attr->va_rdev));

    myprintf(("      fileid %d nlink %d size %d blocksize %d bytes %d\n",
              (int)attr->va_fileid, (int)attr->va_nlink,
              (int)attr->va_size,
              (int)attr->va_blocksize,(int)attr->va_bytes));
    myprintf(("      gen %ld flags %ld vaflags %d\n",
              attr->va_gen, attr->va_flags, attr->va_vaflags));
    myprintf(("      atime sec %d nsec %d\n",
              (int)attr->va_atime.tv_sec, (int)attr->va_atime.tv_nsec));
    myprintf(("      mtime sec %d nsec %d\n",
              (int)attr->va_mtime.tv_sec, (int)attr->va_mtime.tv_nsec));
    myprintf(("      ctime sec %d nsec %d\n",
              (int)attr->va_ctime.tv_sec, (int)attr->va_ctime.tv_nsec));
}

/*
 * Return a vnode for the given fid.
 * If no cnode exists for this fid create one and put it
 * in a table hashed by coda_f2i().  If the cnode for
 * this fid is already in the table return it (ref count is
 * incremented by coda_find.  The cnode will be flushed from the
 * table when coda_inactive calls coda_unsave.
 */
struct cnode *
make_coda_node(CodaFid *fid, struct mount *fvsp, short type)
{
        int error __diagused;
        struct vnode *vp;
        struct cnode *cp;

        error = vcache_get(fvsp, fid, sizeof(CodaFid), &vp);
        KASSERT(error == 0);

        mutex_enter(vp->v_interlock);
        cp = VTOC(vp);
        KASSERT(cp != NULL);
        mutex_enter(&cp->c_lock);
        mutex_exit(vp->v_interlock);

        if (vp->v_type != type) {
                if (vp->v_type == VCHR || vp->v_type == VBLK)
                        spec_node_destroy(vp);
                vp->v_type = type;
                if (type == VCHR || type == VBLK)
                        spec_node_init(vp, NODEV);
                uvm_vnp_setsize(vp, 0);
        }
        mutex_exit(&cp->c_lock);

        return cp;
}

/*
 * coda_getpages may be called on a vnode which has not been opened,
 * e.g. to fault in pages to execute a program.  In that case, we must
 * open the file to get the container.  The vnode may or may not be
 * locked, and we must leave it in the same state.
 */
int
coda_getpages(void *v)
{
        struct vop_getpages_args /* {
                vnode_t *a_vp;
                voff_t a_offset;
                struct vm_page **a_m;
                int *a_count;
                int a_centeridx;
                vm_prot_t a_access_type;
                int a_advice;
                int a_flags;
        } */ *ap = v;
        vnode_t *vp = ap->a_vp, *cvp;
        struct cnode *cp = VTOC(vp);
        struct lwp *l = curlwp;
        kauth_cred_t cred = l->l_cred;
        int error, cerror;
        int waslocked;               /* 1 if vnode lock was held on entry */
        int didopen = 0;        /* 1 if we opened container file */
        krw_t op;

        /*
         * Handle a case that uvm_fault doesn't quite use yet.
         * See layer_vnops.c. for inspiration.
         */
        if (ap->a_flags & PGO_LOCKED) {
                return EBUSY;
        }

        KASSERT(rw_lock_held(vp->v_uobj.vmobjlock));

        /* Check for control object. */
        if (IS_CTL_VP(vp)) {
#ifdef CODA_VERBOSE
                printf("%s: control object %p\n", __func__, vp);
#endif
                return(EINVAL);
        }

        /*
         * XXX It's really not ok to be releasing the lock we get,
         * because we could be overlapping with another call to
         * getpages and drop a lock they are relying on.  We need to
         * figure out whether getpages ever is called holding the
         * lock, and if we should serialize getpages calls by some
         * mechanism.
         */
        /* XXX VOP_ISLOCKED() may not be used for lock decisions. */
        op = rw_lock_op(vp->v_uobj.vmobjlock);
        waslocked = VOP_ISLOCKED(vp);

        /* Get container file if not already present. */
        cvp = cp->c_ovp;
        if (cvp == NULL) {
                /*
                 * VOP_OPEN requires a locked vnode.  We must avoid
                 * locking the vnode if it is already locked, and
                 * leave it in the same state on exit.
                 */
                if (waslocked == 0) {
                        rw_exit(vp->v_uobj.vmobjlock);
                        cerror = vn_lock(vp, LK_EXCLUSIVE);
                        if (cerror) {
#ifdef CODA_VERBOSE
                                printf("%s: can't lock vnode %p\n",
                                    __func__, vp);
#endif
                                return cerror;
                        }
#ifdef CODA_VERBOSE
                        printf("%s: locked vnode %p\n", __func__, vp);
#endif
                }

                /*
                 * Open file (causes upcall to venus).
                 * XXX Perhaps we should not fully open the file, but
                 * simply obtain a container file.
                 */
                /* XXX Is it ok to do this while holding the mutex? */
                cerror = VOP_OPEN(vp, FREAD, cred);

                if (cerror) {
#ifdef CODA_VERBOSE
                        printf("%s: cannot open vnode %p => %d\n", __func__,
                            vp, cerror);
#endif
                        if (waslocked == 0)
                                VOP_UNLOCK(vp);
                        return cerror;
                }

#ifdef CODA_VERBOSE
                printf("%s: opened vnode %p\n", __func__, vp);
#endif
                cvp = cp->c_ovp;
                didopen = 1;
                if (waslocked == 0)
                        rw_enter(vp->v_uobj.vmobjlock, op);
        }
        KASSERT(cvp != NULL);

        /* Munge the arg structure to refer to the container vnode. */
        KASSERT(cvp->v_uobj.vmobjlock == vp->v_uobj.vmobjlock);
        ap->a_vp = cp->c_ovp;

        /* Finally, call getpages on it. */
        error = VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);

        /* If we opened the vnode, we must close it. */
        if (didopen) {
                /*
                 * VOP_CLOSE requires a locked vnode, but we are still
                 * holding the lock (or riding a caller's lock).
                 */
                cerror = VOP_CLOSE(vp, FREAD, cred);
#ifdef CODA_VERBOSE
                if (cerror != 0)
                        /* XXX How should we handle this? */
                        printf("%s: closed vnode %p -> %d\n", __func__,
                            vp, cerror);
#endif

                /* If we obtained a lock, drop it. */
                if (waslocked == 0)
                        VOP_UNLOCK(vp);
        }

        return error;
}

/*
 * The protocol requires v_interlock to be held by the caller.
 */
int
coda_putpages(void *v)
{
        struct vop_putpages_args /* {
                vnode_t *a_vp;
                voff_t a_offlo;
                voff_t a_offhi;
                int a_flags;
        } */ *ap = v;
        vnode_t *vp = ap->a_vp, *cvp;
        struct cnode *cp = VTOC(vp);
        int error;

        KASSERT(rw_write_held(vp->v_uobj.vmobjlock));

        /* Check for control object. */
        if (IS_CTL_VP(vp)) {
                rw_exit(vp->v_uobj.vmobjlock);
#ifdef CODA_VERBOSE
                printf("%s: control object %p\n", __func__, vp);
#endif
                return 0;
        }

        /*
         * If container object is not present, then there are no pages
         * to put; just return without error.  This happens all the
         * time, apparently during discard of a closed vnode (which
         * trivially can't have dirty pages).
         */
        cvp = cp->c_ovp;
        if (cvp == NULL) {
                rw_exit(vp->v_uobj.vmobjlock);
                return 0;
        }

        /* Munge the arg structure to refer to the container vnode. */
        KASSERT(cvp->v_uobj.vmobjlock == vp->v_uobj.vmobjlock);
        ap->a_vp = cvp;

        /* Finally, call putpages on it. */
        error = VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);

        return error;
}

























































































































































 2945 














   86 



  144 












































  947 
  978 











  626 











  345 
  346 
















  207 

  205 
  206 






  144 


  145 

   76 




























 2930 
 2933 

 2923 
 2920 
 2921 











 2921 





 2670 
 2669 





 2916 







 2921 
 2915 














  207 







  207 
   76 



   76 



   75 
   75 








  187 








  187 
   20 


  184 


    1 




  184 
  183 

  184 





  127 


  124 



 2917 








 2923 

 2923 
 2914 















 2686 








 2942 

 2688 

 2687 

 2815 
 2813 










 2942 













  145 


  145 

  145 













   87 
   93 
   94 

   93 

    1 

    1 



    1 



   94 

   94 










   87 

    2 



   86 



















 2367 



  940 
  940 



 2367 
 2366 

 2364 






 2366 
 2365 
 2365 



 2364 




















































































































  641 




  641 





    8 
    8 
    6 




  641 
  641 
  641 
  641 
  641 
















  660 

  659 
  660 













 2467 

 2469 














 2612 

 2613 














 1369 












  184 


  112 

  105 
























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
/*        $NetBSD: kern_rwlock.c,v 1.66 2022/04/09 23:46:19 riastradh Exp $        */

/*-
 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020
 *     The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe and Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Kernel reader/writer lock implementation, modeled after those
 * found in Solaris, a description of which can be found in:
 *
 *        Solaris Internals: Core Kernel Architecture, Jim Mauro and
 *            Richard McDougall.
 *
 * The NetBSD implementation differs from that described in the book, in
 * that the locks are partially adaptive.  Lock waiters spin wait while a
 * lock is write held and the holder is still running on a CPU.  The method
 * of choosing which threads to awaken when a lock is released also differs,
 * mainly to take account of the partially adaptive behaviour.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.66 2022/04/09 23:46:19 riastradh Exp $");

#include "opt_lockdebug.h"

#define        __RWLOCK_PRIVATE

#include <sys/param.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/sched.h>
#include <sys/sleepq.h>
#include <sys/systm.h>
#include <sys/lockdebug.h>
#include <sys/cpu.h>
#include <sys/atomic.h>
#include <sys/lock.h>
#include <sys/pserialize.h>

#include <dev/lockstat.h>

#include <machine/rwlock.h>

/*
 * LOCKDEBUG
 */

#define        RW_DEBUG_P(rw)                (((rw)->rw_owner & RW_NODEBUG) == 0)

#define        RW_WANTLOCK(rw, op) \
    LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \
        (uintptr_t)__builtin_return_address(0), op == RW_READER);
#define        RW_LOCKED(rw, op) \
    LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \
        (uintptr_t)__builtin_return_address(0), op == RW_READER);
#define        RW_UNLOCKED(rw, op) \
    LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \
        (uintptr_t)__builtin_return_address(0), op == RW_READER);

/*
 * DIAGNOSTIC
 */

#if defined(DIAGNOSTIC)
#define        RW_ASSERT(rw, cond) \
do { \
        if (__predict_false(!(cond))) \
                rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
} while (/* CONSTCOND */ 0)
#else
#define        RW_ASSERT(rw, cond)        /* nothing */
#endif        /* DIAGNOSTIC */

/*
 * Memory barriers.
 */
#ifdef __HAVE_ATOMIC_AS_MEMBAR
#define        RW_MEMBAR_ACQUIRE()
#define        RW_MEMBAR_RELEASE()
#define        RW_MEMBAR_PRODUCER()
#else
#define        RW_MEMBAR_ACQUIRE()                membar_acquire()
#define        RW_MEMBAR_RELEASE()                membar_release()
#define        RW_MEMBAR_PRODUCER()                membar_producer()
#endif

/*
 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
 */
#ifdef LOCKDEBUG
#undef        __HAVE_RW_STUBS
#endif

#ifndef __HAVE_RW_STUBS
__strong_alias(rw_enter,rw_vector_enter);
__strong_alias(rw_exit,rw_vector_exit);
__strong_alias(rw_tryenter,rw_vector_tryenter);
#endif

static void        rw_abort(const char *, size_t, krwlock_t *, const char *);
static void        rw_dump(const volatile void *, lockop_printer_t);
static lwp_t        *rw_owner(wchan_t);

lockops_t rwlock_lockops = {
        .lo_name = "Reader / writer lock",
        .lo_type = LOCKOPS_SLEEP,
        .lo_dump = rw_dump,
};

syncobj_t rw_syncobj = {
        .sobj_flag        = SOBJ_SLEEPQ_SORTED,
        .sobj_unsleep        = turnstile_unsleep,
        .sobj_changepri        = turnstile_changepri,
        .sobj_lendpri        = sleepq_lendpri,
        .sobj_owner        = rw_owner,
};

/*
 * rw_cas:
 *
 *        Do an atomic compare-and-swap on the lock word.
 */
static inline uintptr_t
rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n)
{

        return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner,
            (void *)o, (void *)n);
}

/*
 * rw_swap:
 *
 *        Do an atomic swap of the lock word.  This is used only when it's
 *        known that the lock word is set up such that it can't be changed
 *        behind us (assert this), so there's no point considering the result.
 */
static inline void
rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n)
{

        n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner,
            (void *)n);

        RW_ASSERT(rw, n == o);
        RW_ASSERT(rw, (o & RW_HAS_WAITERS) != 0);
}

/*
 * rw_dump:
 *
 *        Dump the contents of a rwlock structure.
 */
static void
rw_dump(const volatile void *cookie, lockop_printer_t pr)
{
        const volatile krwlock_t *rw = cookie;

        pr("owner/count  : %#018lx flags    : %#018x\n",
            (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
}

/*
 * rw_abort:
 *
 *        Dump information about an error and panic the system.  This
 *        generates a lot of machine code in the DIAGNOSTIC case, so
 *        we ask the compiler to not inline it.
 */
static void __noinline
rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg)
{

        if (panicstr != NULL)
                return;

        LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg);
}

/*
 * rw_init:
 *
 *        Initialize a rwlock for use.
 */
void
_rw_init(krwlock_t *rw, uintptr_t return_address)
{

#ifdef LOCKDEBUG
        /* XXX only because the assembly stubs can't handle RW_NODEBUG */
        if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address))
                rw->rw_owner = 0;
        else
                rw->rw_owner = RW_NODEBUG;
#else
        rw->rw_owner = 0;
#endif
}

void
rw_init(krwlock_t *rw)
{

        _rw_init(rw, (uintptr_t)__builtin_return_address(0));
}

/*
 * rw_destroy:
 *
 *        Tear down a rwlock.
 */
void
rw_destroy(krwlock_t *rw)
{

        RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0);
        LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw);
}

/*
 * rw_oncpu:
 *
 *        Return true if an rwlock owner is running on a CPU in the system.
 *        If the target is waiting on the kernel big lock, then we must
 *        release it.  This is necessary to avoid deadlock.
 */
static bool
rw_oncpu(uintptr_t owner)
{
#ifdef MULTIPROCESSOR
        struct cpu_info *ci;
        lwp_t *l;

        KASSERT(kpreempt_disabled());

        if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) {
                return false;
        }

        /*
         * See lwp_dtor() why dereference of the LWP pointer is safe.
         * We must have kernel preemption disabled for that.
         */
        l = (lwp_t *)(owner & RW_THREAD);
        ci = l->l_cpu;

        if (ci && ci->ci_curlwp == l) {
                /* Target is running; do we need to block? */
                return (ci->ci_biglock_wanted != l);
        }
#endif
        /* Not running.  It may be safe to block now. */
        return false;
}

/*
 * rw_vector_enter:
 *
 *        Acquire a rwlock.
 */
void
rw_vector_enter(krwlock_t *rw, const krw_t op)
{
        uintptr_t owner, incr, need_wait, set_wait, curthread, next;
        turnstile_t *ts;
        int queue;
        lwp_t *l;
        LOCKSTAT_TIMER(slptime);
        LOCKSTAT_TIMER(slpcnt);
        LOCKSTAT_TIMER(spintime);
        LOCKSTAT_COUNTER(spincnt);
        LOCKSTAT_FLAG(lsflag);

        l = curlwp;
        curthread = (uintptr_t)l;

        RW_ASSERT(rw, !cpu_intr_p());
        RW_ASSERT(rw, curthread != 0);
        RW_WANTLOCK(rw, op);

        if (panicstr == NULL) {
                KDASSERT(pserialize_not_in_read_section());
                LOCKDEBUG_BARRIER(&kernel_lock, 1);
        }

        /*
         * We play a slight trick here.  If we're a reader, we want
         * increment the read count.  If we're a writer, we want to
         * set the owner field and the WRITE_LOCKED bit.
         *
         * In the latter case, we expect those bits to be zero,
         * therefore we can use an add operation to set them, which
         * means an add operation for both cases.
         */
        if (__predict_true(op == RW_READER)) {
                incr = RW_READ_INCR;
                set_wait = RW_HAS_WAITERS;
                need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
                queue = TS_READER_Q;
        } else {
                RW_ASSERT(rw, op == RW_WRITER);
                incr = curthread | RW_WRITE_LOCKED;
                set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
                need_wait = RW_WRITE_LOCKED | RW_THREAD;
                queue = TS_WRITER_Q;
        }

        LOCKSTAT_ENTER(lsflag);

        KPREEMPT_DISABLE(curlwp);
        for (owner = rw->rw_owner;;) {
                /*
                 * Read the lock owner field.  If the need-to-wait
                 * indicator is clear, then try to acquire the lock.
                 */
                if ((owner & need_wait) == 0) {
                        next = rw_cas(rw, owner, (owner + incr) &
                            ~RW_WRITE_WANTED);
                        if (__predict_true(next == owner)) {
                                /* Got it! */
                                RW_MEMBAR_ACQUIRE();
                                break;
                        }

                        /*
                         * Didn't get it -- spin around again (we'll
                         * probably sleep on the next iteration).
                         */
                        owner = next;
                        continue;
                }
                if (__predict_false(RW_OWNER(rw) == curthread)) {
                        rw_abort(__func__, __LINE__, rw,
                            "locking against myself");
                }
                /*
                 * If the lock owner is running on another CPU, and
                 * there are no existing waiters, then spin.
                 */
                if (rw_oncpu(owner)) {
                        LOCKSTAT_START_TIMER(lsflag, spintime);
                        u_int count = SPINLOCK_BACKOFF_MIN;
                        do {
                                KPREEMPT_ENABLE(curlwp);
                                SPINLOCK_BACKOFF(count);
                                KPREEMPT_DISABLE(curlwp);
                                owner = rw->rw_owner;
                        } while (rw_oncpu(owner));
                        LOCKSTAT_STOP_TIMER(lsflag, spintime);
                        LOCKSTAT_COUNT(spincnt, 1);
                        if ((owner & need_wait) == 0)
                                continue;
                }

                /*
                 * Grab the turnstile chain lock.  Once we have that, we
                 * can adjust the waiter bits and sleep queue.
                 */
                ts = turnstile_lookup(rw);

                /*
                 * Mark the rwlock as having waiters.  If the set fails,
                 * then we may not need to sleep and should spin again.
                 * Reload rw_owner because turnstile_lookup() may have
                 * spun on the turnstile chain lock.
                 */
                owner = rw->rw_owner;
                if ((owner & need_wait) == 0 || rw_oncpu(owner)) {
                        turnstile_exit(rw);
                        continue;
                }
                next = rw_cas(rw, owner, owner | set_wait);
                /* XXX membar? */
                if (__predict_false(next != owner)) {
                        turnstile_exit(rw);
                        owner = next;
                        continue;
                }

                LOCKSTAT_START_TIMER(lsflag, slptime);
                turnstile_block(ts, queue, rw, &rw_syncobj);
                LOCKSTAT_STOP_TIMER(lsflag, slptime);
                LOCKSTAT_COUNT(slpcnt, 1);

                /*
                 * No need for a memory barrier because of context switch.
                 * If not handed the lock, then spin again.
                 */
                if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
                        break;

                owner = rw->rw_owner;
        }
        KPREEMPT_ENABLE(curlwp);

        LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK |
            (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime,
            (l->l_rwcallsite != 0 ? l->l_rwcallsite :
              (uintptr_t)__builtin_return_address(0)));
        LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime,
            (l->l_rwcallsite != 0 ? l->l_rwcallsite :
              (uintptr_t)__builtin_return_address(0)));
        LOCKSTAT_EXIT(lsflag);

        RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
            (op == RW_READER && RW_COUNT(rw) != 0));
        RW_LOCKED(rw, op);
}

/*
 * rw_vector_exit:
 *
 *        Release a rwlock.
 */
void
rw_vector_exit(krwlock_t *rw)
{
        uintptr_t curthread, owner, decr, newown, next;
        turnstile_t *ts;
        int rcnt, wcnt;
        lwp_t *l;

        l = curlwp;
        curthread = (uintptr_t)l;
        RW_ASSERT(rw, curthread != 0);

        /*
         * Again, we use a trick.  Since we used an add operation to
         * set the required lock bits, we can use a subtract to clear
         * them, which makes the read-release and write-release path
         * the same.
         */
        owner = rw->rw_owner;
        if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
                RW_UNLOCKED(rw, RW_WRITER);
                RW_ASSERT(rw, RW_OWNER(rw) == curthread);
                decr = curthread | RW_WRITE_LOCKED;
        } else {
                RW_UNLOCKED(rw, RW_READER);
                RW_ASSERT(rw, RW_COUNT(rw) != 0);
                decr = RW_READ_INCR;
        }

        /*
         * Compute what we expect the new value of the lock to be. Only
         * proceed to do direct handoff if there are waiters, and if the
         * lock would become unowned.
         */
        RW_MEMBAR_RELEASE();
        for (;;) {
                newown = (owner - decr);
                if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
                        break;
                next = rw_cas(rw, owner, newown);
                if (__predict_true(next == owner))
                        return;
                owner = next;
        }

        /*
         * Grab the turnstile chain lock.  This gets the interlock
         * on the sleep queue.  Once we have that, we can adjust the
         * waiter bits.
         */
        ts = turnstile_lookup(rw);
        owner = rw->rw_owner;
        RW_ASSERT(rw, ts != NULL);
        RW_ASSERT(rw, (owner & RW_HAS_WAITERS) != 0);

        wcnt = TS_WAITERS(ts, TS_WRITER_Q);
        rcnt = TS_WAITERS(ts, TS_READER_Q);

        /*
         * Give the lock away.
         *
         * If we are releasing a write lock, then prefer to wake all
         * outstanding readers.  Otherwise, wake one writer if there
         * are outstanding readers, or all writers if there are no
         * pending readers.  If waking one specific writer, the writer
         * is handed the lock here.  If waking multiple writers, we
         * set WRITE_WANTED to block out new readers, and let them
         * do the work of acquiring the lock in rw_vector_enter().
         */
        if (rcnt == 0 || decr == RW_READ_INCR) {
                RW_ASSERT(rw, wcnt != 0);
                RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0);

                if (rcnt != 0) {
                        /* Give the lock to the longest waiting writer. */
                        l = TS_FIRST(ts, TS_WRITER_Q);
                        newown = (uintptr_t)l | (owner & RW_NODEBUG);
                        newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS;
                        if (wcnt > 1)
                                newown |= RW_WRITE_WANTED;
                        rw_swap(rw, owner, newown);
                        turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
                } else {
                        /* Wake all writers and let them fight it out. */
                        newown = owner & RW_NODEBUG;
                        newown |= RW_WRITE_WANTED;
                        rw_swap(rw, owner, newown);
                        turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL);
                }
        } else {
                RW_ASSERT(rw, rcnt != 0);

                /*
                 * Give the lock to all blocked readers.  If there
                 * is a writer waiting, new readers that arrive
                 * after the release will be blocked out.
                 */
                newown = owner & RW_NODEBUG;
                newown += rcnt << RW_READ_COUNT_SHIFT;
                if (wcnt != 0)
                        newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
                        
                /* Wake up all sleeping readers. */
                rw_swap(rw, owner, newown);
                turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
        }
}

/*
 * rw_vector_tryenter:
 *
 *        Try to acquire a rwlock.
 */
int
rw_vector_tryenter(krwlock_t *rw, const krw_t op)
{
        uintptr_t curthread, owner, incr, need_wait, next;
        lwp_t *l;

        l = curlwp;
        curthread = (uintptr_t)l;

        RW_ASSERT(rw, curthread != 0);

        if (op == RW_READER) {
                incr = RW_READ_INCR;
                need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
        } else {
                RW_ASSERT(rw, op == RW_WRITER);
                incr = curthread | RW_WRITE_LOCKED;
                need_wait = RW_WRITE_LOCKED | RW_THREAD;
        }

        for (owner = rw->rw_owner;; owner = next) {
                if (__predict_false((owner & need_wait) != 0))
                        return 0;
                next = rw_cas(rw, owner, owner + incr);
                if (__predict_true(next == owner)) {
                        /* Got it! */
                        break;
                }
        }

        RW_WANTLOCK(rw, op);
        RW_LOCKED(rw, op);
        RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
            (op == RW_READER && RW_COUNT(rw) != 0));

        RW_MEMBAR_ACQUIRE();
        return 1;
}

/*
 * rw_downgrade:
 *
 *        Downgrade a write lock to a read lock.
 */
void
rw_downgrade(krwlock_t *rw)
{
        uintptr_t owner, curthread, newown, next;
        turnstile_t *ts;
        int rcnt, wcnt;
        lwp_t *l;

        l = curlwp;
        curthread = (uintptr_t)l;
        RW_ASSERT(rw, curthread != 0);
        RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
        RW_ASSERT(rw, RW_OWNER(rw) == curthread);
        RW_UNLOCKED(rw, RW_WRITER);
#if !defined(DIAGNOSTIC)
        __USE(curthread);
#endif

        RW_MEMBAR_PRODUCER();

        for (owner = rw->rw_owner;; owner = next) {
                /*
                 * If there are no waiters we can do this the easy way.  Try
                 * swapping us down to one read hold.  If it fails, the lock
                 * condition has changed and we most likely now have
                 * waiters.
                 */
                if ((owner & RW_HAS_WAITERS) == 0) {
                        newown = (owner & RW_NODEBUG);
                        next = rw_cas(rw, owner, newown + RW_READ_INCR);
                        if (__predict_true(next == owner)) {
                                RW_LOCKED(rw, RW_READER);
                                RW_ASSERT(rw,
                                    (rw->rw_owner & RW_WRITE_LOCKED) == 0);
                                RW_ASSERT(rw, RW_COUNT(rw) != 0);
                                return;
                        }
                        continue;
                }

                /*
                 * Grab the turnstile chain lock.  This gets the interlock
                 * on the sleep queue.  Once we have that, we can adjust the
                 * waiter bits.
                 */
                ts = turnstile_lookup(rw);
                RW_ASSERT(rw, ts != NULL);

                rcnt = TS_WAITERS(ts, TS_READER_Q);
                wcnt = TS_WAITERS(ts, TS_WRITER_Q);

                if (rcnt == 0) {
                        /*
                         * If there are no readers, just preserve the
                         * waiters bits, swap us down to one read hold and
                         * return.
                         */
                        RW_ASSERT(rw, wcnt != 0);
                        RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
                        RW_ASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);

                        newown = owner & RW_NODEBUG;
                        newown |= RW_READ_INCR | RW_HAS_WAITERS |
                            RW_WRITE_WANTED;
                        next = rw_cas(rw, owner, newown);
                        turnstile_exit(rw);
                        if (__predict_true(next == owner))
                                break;
                } else {
                        /*
                         * Give the lock to all blocked readers.  We may
                         * retain one read hold if downgrading.  If there is
                         * a writer waiting, new readers will be blocked
                         * out.
                         */
                        newown = owner & RW_NODEBUG;
                        newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
                        if (wcnt != 0)
                                newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;

                        next = rw_cas(rw, owner, newown);
                        if (__predict_true(next == owner)) {
                                /* Wake up all sleeping readers. */
                                turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
                                break;
                        }
                        turnstile_exit(rw);
                }
        }

        RW_WANTLOCK(rw, RW_READER);
        RW_LOCKED(rw, RW_READER);
        RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
        RW_ASSERT(rw, RW_COUNT(rw) != 0);
}

/*
 * rw_tryupgrade:
 *
 *        Try to upgrade a read lock to a write lock.  We must be the only
 *        reader.
 */
int
rw_tryupgrade(krwlock_t *rw)
{
        uintptr_t owner, curthread, newown, next;
        struct lwp *l;

        l = curlwp;
        curthread = (uintptr_t)l;
        RW_ASSERT(rw, curthread != 0);
        RW_ASSERT(rw, rw_read_held(rw));

        for (owner = RW_READ_INCR;; owner = next) {
                newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
                next = rw_cas(rw, owner, newown);
                if (__predict_true(next == owner)) {
                        RW_MEMBAR_PRODUCER();
                        break;
                }
                RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0);
                if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) {
                        RW_ASSERT(rw, (next & RW_THREAD) != 0);
                        return 0;
                }
        }

        RW_UNLOCKED(rw, RW_READER);
        RW_WANTLOCK(rw, RW_WRITER);
        RW_LOCKED(rw, RW_WRITER);
        RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
        RW_ASSERT(rw, RW_OWNER(rw) == curthread);

        return 1;
}

/*
 * rw_read_held:
 *
 *        Returns true if the rwlock is held for reading.  Must only be
 *        used for diagnostic assertions, and never be used to make
 *         decisions about how to use a rwlock.
 */
int
rw_read_held(krwlock_t *rw)
{
        uintptr_t owner;

        if (rw == NULL)
                return 0;
        owner = rw->rw_owner;
        return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
}

/*
 * rw_write_held:
 *
 *        Returns true if the rwlock is held for writing.  Must only be
 *        used for diagnostic assertions, and never be used to make
 *        decisions about how to use a rwlock.
 */
int
rw_write_held(krwlock_t *rw)
{

        if (rw == NULL)
                return 0;
        return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) ==
            (RW_WRITE_LOCKED | (uintptr_t)curlwp);
}

/*
 * rw_lock_held:
 *
 *        Returns true if the rwlock is held for reading or writing.  Must
 *        only be used for diagnostic assertions, and never be used to make
 *        decisions about how to use a rwlock.
 */
int
rw_lock_held(krwlock_t *rw)
{

        if (rw == NULL)
                return 0;
        return (rw->rw_owner & RW_THREAD) != 0;
}

/*
 * rw_lock_op:
 *
 *        For a rwlock that is known to be held by the caller, return
 *        RW_READER or RW_WRITER to describe the hold type.
 */
krw_t
rw_lock_op(krwlock_t *rw)
{

        RW_ASSERT(rw, rw_lock_held(rw));

        return (rw->rw_owner & RW_WRITE_LOCKED) != 0 ? RW_WRITER : RW_READER;
}

/*
 * rw_owner:
 *
 *        Return the current owner of an RW lock, but only if it is write
 *        held.  Used for priority inheritance.
 */
static lwp_t *
rw_owner(wchan_t obj)
{
        krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
        uintptr_t owner = rw->rw_owner;

        if ((owner & RW_WRITE_LOCKED) == 0)
                return NULL;

        return (void *)(owner & RW_THREAD);
}

/*
 * rw_owner_running:
 *
 *        Return true if a RW lock is unheld, or write held and the owner is
 *        running on a CPU.  For the pagedaemon.
 */
bool
rw_owner_running(const krwlock_t *rw)
{
#ifdef MULTIPROCESSOR
        uintptr_t owner;
        bool rv;

        kpreempt_disable();
        owner = rw->rw_owner;
        rv = (owner & RW_THREAD) == 0 || rw_oncpu(owner);
        kpreempt_enable();
        return rv;
#else
        return rw_owner(rw) == curlwp;
#endif
}





































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































 2036 





 2036 


 2036 
















































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
/*        $NetBSD: spectre.c,v 1.36 2021/10/07 12:52:27 msaitoh Exp $        */

/*
 * Copyright (c) 2018-2019 NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Maxime Villard.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Mitigations for the SpectreV2, SpectreV4, MDS and TAA CPU flaws.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: spectre.c,v 1.36 2021/10/07 12:52:27 msaitoh Exp $");

#include "opt_spectre.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/cpu.h>
#include <sys/sysctl.h>
#include <sys/xcall.h>

#include <machine/cpufunc.h>
#include <machine/cpuvar.h>
#include <machine/specialreg.h>
#include <machine/frameasm.h>

#include <x86/cputypes.h>

enum v2_mitigation {
        V2_MITIGATION_NONE,
        V2_MITIGATION_AMD_DIS_IND,
        V2_MITIGATION_INTEL_IBRS,
        V2_MITIGATION_INTEL_ENHANCED_IBRS
};

enum v4_mitigation {
        V4_MITIGATION_NONE,
        V4_MITIGATION_INTEL_SSBD,
        V4_MITIGATION_INTEL_SSB_NO,
        V4_MITIGATION_AMD_SSB_NO,
        V4_MITIGATION_AMD_NONARCH_F15H,
        V4_MITIGATION_AMD_NONARCH_F16H,
        V4_MITIGATION_AMD_NONARCH_F17H
};

static enum v2_mitigation v2_mitigation_method = V2_MITIGATION_NONE;
static enum v4_mitigation v4_mitigation_method = V4_MITIGATION_NONE;

static bool v2_mitigation_enabled __read_mostly = false;
static bool v4_mitigation_enabled __read_mostly = false;

static char v2_mitigation_name[64] = "(none)";
static char v4_mitigation_name[64] = "(none)";

/* --------------------------------------------------------------------- */

static void
v2_set_name(void)
{
        char name[64] = "";
        size_t nmitig = 0;

#if defined(SPECTRE_V2_GCC_MITIGATION)
        strlcat(name, "[GCC retpoline]", sizeof(name));
        nmitig++;
#endif

        if (!v2_mitigation_enabled) {
                if (nmitig == 0)
                        strlcat(name, "(none)", sizeof(name));
        } else {
                if (nmitig)
                        strlcat(name, " + ", sizeof(name));
                switch (v2_mitigation_method) {
                case V2_MITIGATION_AMD_DIS_IND:
                        strlcat(name, "[AMD DIS_IND]", sizeof(name));
                        break;
                case V2_MITIGATION_INTEL_IBRS:
                        strlcat(name, "[Intel IBRS]", sizeof(name));
                        break;
                case V2_MITIGATION_INTEL_ENHANCED_IBRS:
                        strlcat(name, "[Intel Enhanced IBRS]", sizeof(name));
                        break;
                default:
                        panic("%s: impossible", __func__);
                }
        }

        strlcpy(v2_mitigation_name, name,
            sizeof(v2_mitigation_name));
}

static void
v2_detect_method(void)
{
        struct cpu_info *ci = curcpu();
        u_int descs[4];
        uint64_t msr;

        if (cpu_vendor == CPUVENDOR_INTEL) {
                if (cpuid_level >= 7) {
                        x86_cpuid(7, descs);

                        if (descs[3] & CPUID_SEF_IBRS) {
                                if (descs[3] & CPUID_SEF_ARCH_CAP) {
                                        msr = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
                                        if (msr & IA32_ARCH_IBRS_ALL) {
                                                v2_mitigation_method =
                                                    V2_MITIGATION_INTEL_ENHANCED_IBRS;
                                                return;
                                        }
                                }
#ifdef __x86_64__
                                v2_mitigation_method = V2_MITIGATION_INTEL_IBRS;
                                return;
#endif
                        }
                }
                v2_mitigation_method = V2_MITIGATION_NONE;
        } else if (cpu_vendor == CPUVENDOR_AMD) {
                /*
                 * The AMD Family 10h manual documents the IC_CFG.DIS_IND bit.
                 * This bit disables the Indirect Branch Predictor.
                 *
                 * Families 12h and 16h are believed to have this bit too, but
                 * their manuals don't document it.
                 */
                switch (CPUID_TO_FAMILY(ci->ci_signature)) {
                case 0x10:
                        v2_mitigation_method = V2_MITIGATION_AMD_DIS_IND;
                        break;
                default:
                        v2_mitigation_method = V2_MITIGATION_NONE;
                        break;
                }
        } else {
                v2_mitigation_method = V2_MITIGATION_NONE;
        }
}

/* -------------------------------------------------------------------------- */

static volatile unsigned long ibrs_cpu_barrier1 __cacheline_aligned;
static volatile unsigned long ibrs_cpu_barrier2 __cacheline_aligned;

#ifdef __x86_64__
/* IBRS_ENTER. */
extern uint8_t noibrs_enter, noibrs_enter_end;
extern uint8_t ibrs_enter, ibrs_enter_end;
static const struct x86_hotpatch_source hp_noibrs_enter_source = {
        .saddr = &noibrs_enter,
        .eaddr = &noibrs_enter_end
};
static const struct x86_hotpatch_source hp_ibrs_enter_source = {
        .saddr = &ibrs_enter,
        .eaddr = &ibrs_enter_end
};
static const struct x86_hotpatch_descriptor hp_ibrs_enter_desc = {
        .name = HP_NAME_IBRS_ENTER,
        .nsrc = 2,
        .srcs = { &hp_noibrs_enter_source, &hp_ibrs_enter_source }
};
__link_set_add_rodata(x86_hotpatch_descriptors, hp_ibrs_enter_desc);

/* IBRS_LEAVE. */
extern uint8_t noibrs_leave, noibrs_leave_end;
extern uint8_t ibrs_leave, ibrs_leave_end;
static const struct x86_hotpatch_source hp_noibrs_leave_source = {
        .saddr = &noibrs_leave,
        .eaddr = &noibrs_leave_end
};
static const struct x86_hotpatch_source hp_ibrs_leave_source = {
        .saddr = &ibrs_leave,
        .eaddr = &ibrs_leave_end
};
static const struct x86_hotpatch_descriptor hp_ibrs_leave_desc = {
        .name = HP_NAME_IBRS_LEAVE,
        .nsrc = 2,
        .srcs = { &hp_noibrs_leave_source, &hp_ibrs_leave_source }
};
__link_set_add_rodata(x86_hotpatch_descriptors, hp_ibrs_leave_desc);

static void
ibrs_disable_hotpatch(void)
{
        x86_hotpatch(HP_NAME_IBRS_ENTER, /* noibrs */ 0);
        x86_hotpatch(HP_NAME_IBRS_LEAVE, /* noibrs */ 0);
}

static void
ibrs_enable_hotpatch(void)
{
        x86_hotpatch(HP_NAME_IBRS_ENTER, /* ibrs */ 1);
        x86_hotpatch(HP_NAME_IBRS_LEAVE, /* ibrs */ 1);
}
#else
/* IBRS not supported on i386 */
static void
ibrs_disable_hotpatch(void)
{
        panic("%s: impossible", __func__);
}
static void
ibrs_enable_hotpatch(void)
{
        panic("%s: impossible", __func__);
}
#endif

/* -------------------------------------------------------------------------- */

static void
mitigation_v2_apply_cpu(struct cpu_info *ci, bool enabled)
{
        uint64_t msr;

        switch (v2_mitigation_method) {
        case V2_MITIGATION_NONE:
                panic("impossible");
        case V2_MITIGATION_INTEL_IBRS:
                /* cpu0 is the one that does the hotpatch job */
                if (ci == &cpu_info_primary) {
                        if (enabled) {
                                ibrs_enable_hotpatch();
                        } else {
                                ibrs_disable_hotpatch();
                        }
                }
                if (!enabled) {
                        wrmsr(MSR_IA32_SPEC_CTRL, 0);
                }
                break;
        case V2_MITIGATION_INTEL_ENHANCED_IBRS:
                msr = rdmsr(MSR_IA32_SPEC_CTRL);
                if (enabled) {
                        msr |= IA32_SPEC_CTRL_IBRS;
                } else {
                        msr &= ~IA32_SPEC_CTRL_IBRS;
                }
                wrmsr(MSR_IA32_SPEC_CTRL, msr);
                break;
        case V2_MITIGATION_AMD_DIS_IND:
                msr = rdmsr(MSR_IC_CFG);
                if (enabled) {
                        msr |= IC_CFG_DIS_IND;
                } else {
                        msr &= ~IC_CFG_DIS_IND;
                }
                wrmsr(MSR_IC_CFG, msr);
                break;
        }
}

/*
 * Note: IBRS requires hotpatching, so we need barriers.
 */
static void
mitigation_v2_change_cpu(void *arg1, void *arg2)
{
        struct cpu_info *ci = curcpu();
        bool enabled = arg1 != NULL;
        u_long psl = 0;

        /* Rendez-vous 1 (IBRS only). */
        if (v2_mitigation_method == V2_MITIGATION_INTEL_IBRS) {
                psl = x86_read_psl();
                x86_disable_intr();

                atomic_dec_ulong(&ibrs_cpu_barrier1);
                while (atomic_cas_ulong(&ibrs_cpu_barrier1, 0, 0) != 0) {
                        x86_pause();
                }
        }

        mitigation_v2_apply_cpu(ci, enabled);

        /* Rendez-vous 2 (IBRS only). */
        if (v2_mitigation_method == V2_MITIGATION_INTEL_IBRS) {
                atomic_dec_ulong(&ibrs_cpu_barrier2);
                while (atomic_cas_ulong(&ibrs_cpu_barrier2, 0, 0) != 0) {
                        x86_pause();
                }

                /* Write back and invalidate cache, flush pipelines. */
                wbinvd();
                x86_flush();

                x86_write_psl(psl);
        }
}

static int
mitigation_v2_change(bool enabled)
{
        uint64_t xc;

        v2_detect_method();

        switch (v2_mitigation_method) {
        case V2_MITIGATION_NONE:
                printf("[!] No mitigation available\n");
                return EOPNOTSUPP;
        case V2_MITIGATION_AMD_DIS_IND:
        case V2_MITIGATION_INTEL_IBRS:
        case V2_MITIGATION_INTEL_ENHANCED_IBRS:
                /* Initialize the barriers */
                ibrs_cpu_barrier1 = ncpu;
                ibrs_cpu_barrier2 = ncpu;

                printf("[+] %s SpectreV2 Mitigation...",
                    enabled ? "Enabling" : "Disabling");
                xc = xc_broadcast(XC_HIGHPRI, mitigation_v2_change_cpu,
                    (void *)enabled, NULL);
                xc_wait(xc);
                printf(" done!\n");
                v2_mitigation_enabled = enabled;
                v2_set_name();
                return 0;
        default:
                panic("impossible");
        }
}

static int
sysctl_machdep_spectreV2_mitigated(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int error;
        bool val;

        val = *(bool *)rnode->sysctl_data;

        node = *rnode;
        node.sysctl_data = &val;

        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error != 0 || newp == NULL)
                return error;

        if (val == v2_mitigation_enabled)
                return 0;
        return mitigation_v2_change(val);
}

/* -------------------------------------------------------------------------- */

static void
v4_set_name(void)
{
        char name[64] = "";

        if (!v4_mitigation_enabled) {
                strlcat(name, "(none)", sizeof(name));
        } else {
                switch (v4_mitigation_method) {
                case V4_MITIGATION_NONE:
                        panic("%s: impossible", __func__);
                case V4_MITIGATION_INTEL_SSBD:
                        strlcat(name, "[Intel SSBD]", sizeof(name));
                        break;
                case V4_MITIGATION_INTEL_SSB_NO:
                        strlcat(name, "[Intel SSB_NO]", sizeof(name));
                        break;
                case V4_MITIGATION_AMD_SSB_NO:
                        strlcat(name, "[AMD SSB_NO]", sizeof(name));
                        break;
                case V4_MITIGATION_AMD_NONARCH_F15H:
                case V4_MITIGATION_AMD_NONARCH_F16H:
                case V4_MITIGATION_AMD_NONARCH_F17H:
                        strlcat(name, "[AMD NONARCH]", sizeof(name));
                        break;
                }
        }

        strlcpy(v4_mitigation_name, name,
            sizeof(v4_mitigation_name));
}

static void
v4_detect_method(void)
{
        struct cpu_info *ci = curcpu();
        u_int descs[4];
        uint64_t msr;

        if (cpu_vendor == CPUVENDOR_INTEL) {
                if (cpu_info_primary.ci_feat_val[7] & CPUID_SEF_ARCH_CAP) {
                        msr = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
                        if (msr & IA32_ARCH_SSB_NO) {
                                /* Not vulnerable to SpectreV4. */
                                v4_mitigation_method = V4_MITIGATION_INTEL_SSB_NO;
                                return;
                        }
                }
                if (cpuid_level >= 7) {
                        x86_cpuid(7, descs);
                        if (descs[3] & CPUID_SEF_SSBD) {
                                /* descs[3] = %edx */
                                v4_mitigation_method = V4_MITIGATION_INTEL_SSBD;
                                return;
                        }
                }
        } else if (cpu_vendor == CPUVENDOR_AMD) {
                switch (CPUID_TO_FAMILY(ci->ci_signature)) {
                case 0x15:
                        v4_mitigation_method = V4_MITIGATION_AMD_NONARCH_F15H;
                        return;
                case 0x16:
                        v4_mitigation_method = V4_MITIGATION_AMD_NONARCH_F16H;
                        return;
                case 0x17:
                        v4_mitigation_method = V4_MITIGATION_AMD_NONARCH_F17H;
                        return;
                default:
                        if (cpu_info_primary.ci_max_ext_cpuid < 0x80000008) {
                                break;
                        }
                        x86_cpuid(0x80000008, descs);
                        if (descs[1] & CPUID_CAPEX_SSB_NO) {
                                /* Not vulnerable to SpectreV4. */
                                v4_mitigation_method = V4_MITIGATION_AMD_SSB_NO;
                                return;
                        }

                        break;
                }
        }

        v4_mitigation_method = V4_MITIGATION_NONE;
}

static void
mitigation_v4_apply_cpu(bool enabled)
{
        uint64_t msr, msrval = 0, msrbit = 0;

        switch (v4_mitigation_method) {
        case V4_MITIGATION_NONE:
        case V4_MITIGATION_INTEL_SSB_NO:
        case V4_MITIGATION_AMD_SSB_NO:
                panic("impossible");
        case V4_MITIGATION_INTEL_SSBD:
                msrval = MSR_IA32_SPEC_CTRL;
                msrbit = IA32_SPEC_CTRL_SSBD;
                break;
        case V4_MITIGATION_AMD_NONARCH_F15H:
                msrval = MSR_LS_CFG;
                msrbit = LS_CFG_DIS_SSB_F15H;
                break;
        case V4_MITIGATION_AMD_NONARCH_F16H:
                msrval = MSR_LS_CFG;
                msrbit = LS_CFG_DIS_SSB_F16H;
                break;
        case V4_MITIGATION_AMD_NONARCH_F17H:
                msrval = MSR_LS_CFG;
                msrbit = LS_CFG_DIS_SSB_F17H;
                break;
        }

        msr = rdmsr(msrval);
        if (enabled) {
                msr |= msrbit;
        } else {
                msr &= ~msrbit;
        }
        wrmsr(msrval, msr);
}

static void
mitigation_v4_change_cpu(void *arg1, void *arg2)
{
        bool enabled = arg1 != NULL;

        mitigation_v4_apply_cpu(enabled);
}

static int
mitigation_v4_change(bool enabled)
{
        uint64_t xc;

        v4_detect_method();

        switch (v4_mitigation_method) {
        case V4_MITIGATION_NONE:
                printf("[!] No mitigation available\n");
                return EOPNOTSUPP;
        case V4_MITIGATION_INTEL_SSBD:
        case V4_MITIGATION_AMD_NONARCH_F15H:
        case V4_MITIGATION_AMD_NONARCH_F16H:
        case V4_MITIGATION_AMD_NONARCH_F17H:
                printf("[+] %s SpectreV4 Mitigation...",
                    enabled ? "Enabling" : "Disabling");
                xc = xc_broadcast(0, mitigation_v4_change_cpu,
                    (void *)enabled, NULL);
                xc_wait(xc);
                printf(" done!\n");
                v4_mitigation_enabled = enabled;
                v4_set_name();
                return 0;
        case V4_MITIGATION_INTEL_SSB_NO:
        case V4_MITIGATION_AMD_SSB_NO:
                printf("[+] The CPU is not affected by SpectreV4\n");
                return 0;
        default:
                panic("impossible");
        }
}

static int
sysctl_machdep_spectreV4_mitigated(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int error;
        bool val;

        val = *(bool *)rnode->sysctl_data;

        node = *rnode;
        node.sysctl_data = &val;

        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error != 0 || newp == NULL)
                return error;

        if (val == v4_mitigation_enabled)
                return 0;
        return mitigation_v4_change(val);
}

/* -------------------------------------------------------------------------- */

enum mds_mitigation {
        MDS_MITIGATION_NONE,
        MDS_MITIGATION_VERW,
        MDS_MITIGATION_MDS_NO
};

static char mds_mitigation_name[64] = "(none)";

static enum mds_mitigation mds_mitigation_method = MDS_MITIGATION_NONE;
static bool mds_mitigation_enabled __read_mostly = false;

static volatile unsigned long mds_cpu_barrier1 __cacheline_aligned;
static volatile unsigned long mds_cpu_barrier2 __cacheline_aligned;

#ifdef __x86_64__
/* MDS_LEAVE. */
extern uint8_t nomds_leave, nomds_leave_end;
extern uint8_t mds_leave, mds_leave_end;
static const struct x86_hotpatch_source hp_nomds_leave_source = {
        .saddr = &nomds_leave,
        .eaddr = &nomds_leave_end
};
static const struct x86_hotpatch_source hp_mds_leave_source = {
        .saddr = &mds_leave,
        .eaddr = &mds_leave_end
};
static const struct x86_hotpatch_descriptor hp_mds_leave_desc = {
        .name = HP_NAME_MDS_LEAVE,
        .nsrc = 2,
        .srcs = { &hp_nomds_leave_source, &hp_mds_leave_source }
};
__link_set_add_rodata(x86_hotpatch_descriptors, hp_mds_leave_desc);

static void
mds_disable_hotpatch(void)
{
        x86_hotpatch(HP_NAME_MDS_LEAVE, /* nomds */ 0);
}

static void
mds_enable_hotpatch(void)
{
        x86_hotpatch(HP_NAME_MDS_LEAVE, /* mds */ 1);
}
#else
/* MDS not supported on i386 */
static void
mds_disable_hotpatch(void)
{
        panic("%s: impossible", __func__);
}
static void
mds_enable_hotpatch(void)
{
        panic("%s: impossible", __func__);
}
#endif

static void
mitigation_mds_apply_cpu(struct cpu_info *ci, bool enabled)
{
        switch (mds_mitigation_method) {
        case MDS_MITIGATION_NONE:
        case MDS_MITIGATION_MDS_NO:
                panic("impossible");
        case MDS_MITIGATION_VERW:
                /* cpu0 is the one that does the hotpatch job */
                if (ci == &cpu_info_primary) {
                        if (enabled) {
                                mds_enable_hotpatch();
                        } else {
                                mds_disable_hotpatch();
                        }
                }
                break;
        }
}

static void
mitigation_mds_change_cpu(void *arg1, void *arg2)
{
        struct cpu_info *ci = curcpu();
        bool enabled = arg1 != NULL;
        u_long psl = 0;

        /* Rendez-vous 1. */
        psl = x86_read_psl();
        x86_disable_intr();

        atomic_dec_ulong(&mds_cpu_barrier1);
        while (atomic_cas_ulong(&mds_cpu_barrier1, 0, 0) != 0) {
                x86_pause();
        }

        mitigation_mds_apply_cpu(ci, enabled);

        /* Rendez-vous 2. */
        atomic_dec_ulong(&mds_cpu_barrier2);
        while (atomic_cas_ulong(&mds_cpu_barrier2, 0, 0) != 0) {
                x86_pause();
        }

        /* Write back and invalidate cache, flush pipelines. */
        wbinvd();
        x86_flush();

        x86_write_psl(psl);
}

static void
mds_detect_method(void)
{
        u_int descs[4];
        uint64_t msr;

        if (cpu_vendor != CPUVENDOR_INTEL) {
                mds_mitigation_method = MDS_MITIGATION_MDS_NO;
                return;
        }

        if (cpuid_level < 7) {
                return;
        }

        x86_cpuid(0x7, descs);
        if (descs[3] & CPUID_SEF_ARCH_CAP) {
                msr = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
                if (msr & IA32_ARCH_MDS_NO) {
                        mds_mitigation_method = MDS_MITIGATION_MDS_NO;
                        return;
                }
        }

#ifdef __x86_64__
        if (descs[3] & CPUID_SEF_MD_CLEAR) {
                mds_mitigation_method = MDS_MITIGATION_VERW;
        }
#endif
}

static void
mds_set_name(void)
{
        char name[64] = "";

        if (!mds_mitigation_enabled) {
                strlcat(name, "(none)", sizeof(name));
        } else {
                switch (mds_mitigation_method) {
                case MDS_MITIGATION_NONE:
                        panic("%s: impossible", __func__);
                case MDS_MITIGATION_MDS_NO:
                        strlcat(name, "[MDS_NO]", sizeof(name));
                        break;
                case MDS_MITIGATION_VERW:
                        strlcat(name, "[VERW]", sizeof(name));
                        break;
                }
        }

        strlcpy(mds_mitigation_name, name,
            sizeof(mds_mitigation_name));
}

static int
mitigation_mds_change(bool enabled)
{
        uint64_t xc;

        mds_detect_method();

        switch (mds_mitigation_method) {
        case MDS_MITIGATION_NONE:
                printf("[!] No mitigation available\n");
                return EOPNOTSUPP;
        case MDS_MITIGATION_VERW:
                /* Initialize the barriers */
                mds_cpu_barrier1 = ncpu;
                mds_cpu_barrier2 = ncpu;

                printf("[+] %s MDS Mitigation...",
                    enabled ? "Enabling" : "Disabling");
                xc = xc_broadcast(XC_HIGHPRI, mitigation_mds_change_cpu,
                    (void *)enabled, NULL);
                xc_wait(xc);
                printf(" done!\n");
                mds_mitigation_enabled = enabled;
                mds_set_name();
                return 0;
        case MDS_MITIGATION_MDS_NO:
                printf("[+] The CPU is not affected by MDS\n");
                return 0;
        default:
                panic("impossible");
        }
}

static int
sysctl_machdep_mds_mitigated(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int error;
        bool val;

        val = *(bool *)rnode->sysctl_data;

        node = *rnode;
        node.sysctl_data = &val;

        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error != 0 || newp == NULL)
                return error;

        if (val == mds_mitigation_enabled)
                return 0;
        return mitigation_mds_change(val);
}

/* -------------------------------------------------------------------------- */

enum taa_mitigation {
        TAA_MITIGATION_NONE,
        TAA_MITIGATION_TAA_NO,
        TAA_MITIGATION_MDS,
        TAA_MITIGATION_RTM_DISABLE
};

static char taa_mitigation_name[64] = "(none)";

static enum taa_mitigation taa_mitigation_method = TAA_MITIGATION_NONE;
static bool taa_mitigation_enabled __read_mostly = false;
static bool *taa_mitigation_enabled_ptr = &taa_mitigation_enabled;

static void
mitigation_taa_apply_cpu(struct cpu_info *ci, bool enabled)
{
        uint64_t msr;

        switch (taa_mitigation_method) {
        case TAA_MITIGATION_NONE:
        case TAA_MITIGATION_TAA_NO:
        case TAA_MITIGATION_MDS:
                panic("impossible");
        case TAA_MITIGATION_RTM_DISABLE:
                msr = rdmsr(MSR_IA32_TSX_CTRL);
                if (enabled) {
                        msr |= IA32_TSX_CTRL_RTM_DISABLE;
                } else {
                        msr &= ~IA32_TSX_CTRL_RTM_DISABLE;
                }
                wrmsr(MSR_IA32_TSX_CTRL, msr);
                break;
        }
}

static void
mitigation_taa_change_cpu(void *arg1, void *arg2)
{
        struct cpu_info *ci = curcpu();
        bool enabled = arg1 != NULL;

        mitigation_taa_apply_cpu(ci, enabled);
}

static void
taa_detect_method(void)
{
        u_int descs[4];
        uint64_t msr;

        taa_mitigation_enabled_ptr = &taa_mitigation_enabled;

        if (cpu_vendor != CPUVENDOR_INTEL) {
                taa_mitigation_method = TAA_MITIGATION_TAA_NO;
                return;
        }
        if (!(cpu_feature[5] & CPUID_SEF_RTM)) {
                taa_mitigation_method = TAA_MITIGATION_TAA_NO;
                return;
        }

        /*
         * If the CPU doesn't have MDS_NO set, then the TAA mitigation is based
         * on the MDS mitigation.
         */
        if (cpuid_level < 7) {
                taa_mitigation_method = TAA_MITIGATION_MDS;
                taa_mitigation_enabled_ptr = &mds_mitigation_enabled;
                return;
        }
        x86_cpuid(0x7, descs);
        if (!(descs[3] & CPUID_SEF_ARCH_CAP)) {
                taa_mitigation_method = TAA_MITIGATION_MDS;
                taa_mitigation_enabled_ptr = &mds_mitigation_enabled;
                return;
        }
        msr = rdmsr(MSR_IA32_ARCH_CAPABILITIES);
        if (!(msr & IA32_ARCH_MDS_NO)) {
                taa_mitigation_method = TAA_MITIGATION_MDS;
                taa_mitigation_enabled_ptr = &mds_mitigation_enabled;
                return;
        }

        /*
         * Otherwise, we need the TAA-specific mitigation.
         */
        if (msr & IA32_ARCH_TAA_NO) {
                taa_mitigation_method = TAA_MITIGATION_TAA_NO;
                return;
        }
        if (msr & IA32_ARCH_TSX_CTRL) {
                taa_mitigation_method = TAA_MITIGATION_RTM_DISABLE;
                return;
        }
}

static void
taa_set_name(void)
{
        char name[64] = "";

        switch (taa_mitigation_method) {
        case TAA_MITIGATION_NONE:
                strlcpy(name, "(none)", sizeof(name));
                break;
        case TAA_MITIGATION_TAA_NO:
                strlcpy(name, "[TAA_NO]", sizeof(name));
                break;
        case TAA_MITIGATION_MDS:
                strlcpy(name, "[MDS]", sizeof(name));
                break;
        case TAA_MITIGATION_RTM_DISABLE:
                if (!taa_mitigation_enabled) {
                        strlcpy(name, "(none)", sizeof(name));
                } else {
                        strlcpy(name, "[RTM_DISABLE]", sizeof(name));
                }
                break;
        }

        strlcpy(taa_mitigation_name, name, sizeof(taa_mitigation_name));
}

static int
mitigation_taa_change(bool enabled)
{
        uint64_t xc;

        taa_detect_method();

        switch (taa_mitigation_method) {
        case TAA_MITIGATION_NONE:
                printf("[!] No mitigation available\n");
                return EOPNOTSUPP;
        case TAA_MITIGATION_TAA_NO:
                printf("[+] The CPU is not affected by TAA\n");
                return 0;
        case TAA_MITIGATION_MDS:
                printf("[!] Mitigation based on MDS, use machdep.mds\n");
                taa_set_name();
                return EINVAL;
        case TAA_MITIGATION_RTM_DISABLE:
                printf("[+] %s TAA Mitigation...",
                    enabled ? "Enabling" : "Disabling");
                xc = xc_broadcast(XC_HIGHPRI, mitigation_taa_change_cpu,
                    (void *)enabled, NULL);
                xc_wait(xc);
                printf(" done!\n");
                taa_mitigation_enabled = enabled;
                taa_set_name();
                return 0;
        default:
                panic("impossible");
        }
}

static int
sysctl_machdep_taa_mitigated(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        int error;
        bool val;

        val = *(bool *)rnode->sysctl_data;

        node = *rnode;
        node.sysctl_data = &val;

        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error != 0 || newp == NULL)
                return error;

        if (val == *taa_mitigation_enabled_ptr)
                return 0;
        return mitigation_taa_change(val);
}

/* -------------------------------------------------------------------------- */

void speculation_barrier(struct lwp *, struct lwp *);

void
speculation_barrier(struct lwp *oldlwp, struct lwp *newlwp)
{
        /*
         * Speculation barriers are applicable only to Spectre V2.
         */
        if (!v2_mitigation_enabled)
                return;

        /*
         * From kernel thread to kernel thread, no need for a barrier.
         */
        if ((oldlwp->l_flag & LW_SYSTEM) && (newlwp->l_flag & LW_SYSTEM))
                return;

        switch (v2_mitigation_method) {
        case V2_MITIGATION_INTEL_IBRS:
                wrmsr(MSR_IA32_PRED_CMD, IA32_PRED_CMD_IBPB);
                break;
        default:
                /* nothing */
                break;
        }
}

/*
 * cpu0 is the one that detects the method and sets the global 'enabled'
 * variable for each mitigation.
 */
void
cpu_speculation_init(struct cpu_info *ci)
{
        /*
         * Spectre V2.
         */
        if (ci == &cpu_info_primary) {
                v2_detect_method();
                v2_mitigation_enabled =
                    (v2_mitigation_method != V2_MITIGATION_NONE);
                v2_set_name();
        }
        if (v2_mitigation_method != V2_MITIGATION_NONE) {
                mitigation_v2_apply_cpu(ci, true);
        }

        /*
         * Spectre V4.
         *
         * Disabled by default, as recommended by AMD, but can be enabled
         * dynamically. We only detect if the CPU is not vulnerable, to
         * mark it as 'mitigated' in the sysctl.
         */
#if 0
        if (ci == &cpu_info_primary) {
                v4_detect_method();
                v4_mitigation_enabled =
                    (v4_mitigation_method != V4_MITIGATION_NONE);
                v4_set_name();
        }
        if (v4_mitigation_method != V4_MITIGATION_NONE &&
            v4_mitigation_method != V4_MITIGATION_INTEL_SSB_NO &&
            v4_mitigation_method != V4_MITIGATION_AMD_SSB_NO) {
                mitigation_v4_apply_cpu(ci, true);
        }
#else
        if (ci == &cpu_info_primary) {
                v4_detect_method();
                if (v4_mitigation_method == V4_MITIGATION_INTEL_SSB_NO ||
                    v4_mitigation_method == V4_MITIGATION_AMD_SSB_NO) {
                        v4_mitigation_enabled = true;
                        v4_set_name();
                }
        }
#endif

        /*
         * Microarchitectural Data Sampling.
         */
        if (ci == &cpu_info_primary) {
                mds_detect_method();
                mds_mitigation_enabled =
                    (mds_mitigation_method != MDS_MITIGATION_NONE);
                mds_set_name();
        }
        if (mds_mitigation_method != MDS_MITIGATION_NONE &&
            mds_mitigation_method != MDS_MITIGATION_MDS_NO) {
                mitigation_mds_apply_cpu(ci, true);
        }

        /*
         * TSX Asynchronous Abort.
         */
        if (ci == &cpu_info_primary) {
                taa_detect_method();
                taa_mitigation_enabled =
                    (taa_mitigation_method == TAA_MITIGATION_RTM_DISABLE) ||
                    (taa_mitigation_method == TAA_MITIGATION_TAA_NO);
                taa_set_name();
        }
        if (taa_mitigation_method == TAA_MITIGATION_RTM_DISABLE) {
                mitigation_taa_apply_cpu(ci, true);
        }
}

void sysctl_speculation_init(struct sysctllog **);

void
sysctl_speculation_init(struct sysctllog **clog)
{
        const struct sysctlnode *spec_rnode;

        /* SpectreV1 */
        spec_rnode = NULL;
        sysctl_createv(clog, 0, NULL, &spec_rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "spectre_v1", NULL,
                       NULL, 0, NULL, 0,
                       CTL_MACHDEP, CTL_CREATE);
        sysctl_createv(clog, 0, &spec_rnode, &spec_rnode,
                       CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
                       CTLTYPE_BOOL, "mitigated",
                       SYSCTL_DESCR("Whether Spectre Variant 1 is mitigated"),
                       NULL, 0 /* mitigated=0 */, NULL, 0,
                       CTL_CREATE, CTL_EOL);

        /* SpectreV2 */
        spec_rnode = NULL;
        sysctl_createv(clog, 0, NULL, &spec_rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "spectre_v2", NULL,
                       NULL, 0, NULL, 0,
                       CTL_MACHDEP, CTL_CREATE);
        sysctl_createv(clog, 0, &spec_rnode, NULL,
                       CTLFLAG_READWRITE,
                       CTLTYPE_BOOL, "hwmitigated",
                       SYSCTL_DESCR("Whether Spectre Variant 2 is HW-mitigated"),
                       sysctl_machdep_spectreV2_mitigated, 0,
                       &v2_mitigation_enabled, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &spec_rnode, NULL,
                       CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
                       CTLTYPE_BOOL, "swmitigated",
                       SYSCTL_DESCR("Whether Spectre Variant 2 is SW-mitigated"),
#if defined(SPECTRE_V2_GCC_MITIGATION)
                       NULL, 1,
#else
                       NULL, 0,
#endif
                       NULL, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &spec_rnode, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "method",
                       SYSCTL_DESCR("Mitigation method in use"),
                       NULL, 0,
                       v2_mitigation_name, 0,
                       CTL_CREATE, CTL_EOL);

        /* SpectreV4 */
        spec_rnode = NULL;
        sysctl_createv(clog, 0, NULL, &spec_rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "spectre_v4", NULL,
                       NULL, 0, NULL, 0,
                       CTL_MACHDEP, CTL_CREATE);
        sysctl_createv(clog, 0, &spec_rnode, NULL,
                       CTLFLAG_READWRITE,
                       CTLTYPE_BOOL, "mitigated",
                       SYSCTL_DESCR("Whether Spectre Variant 4 is mitigated"),
                       sysctl_machdep_spectreV4_mitigated, 0,
                       &v4_mitigation_enabled, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &spec_rnode, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "method",
                       SYSCTL_DESCR("Mitigation method in use"),
                       NULL, 0,
                       v4_mitigation_name, 0,
                       CTL_CREATE, CTL_EOL);

        /* Microarchitectural Data Sampling */
        spec_rnode = NULL;
        sysctl_createv(clog, 0, NULL, &spec_rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "mds", NULL,
                       NULL, 0, NULL, 0,
                       CTL_MACHDEP, CTL_CREATE);
        sysctl_createv(clog, 0, &spec_rnode, NULL,
                       CTLFLAG_READWRITE,
                       CTLTYPE_BOOL, "mitigated",
                       SYSCTL_DESCR("Whether MDS is mitigated"),
                       sysctl_machdep_mds_mitigated, 0,
                       &mds_mitigation_enabled, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &spec_rnode, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "method",
                       SYSCTL_DESCR("Mitigation method in use"),
                       NULL, 0,
                       mds_mitigation_name, 0,
                       CTL_CREATE, CTL_EOL);

        /* TSX Asynchronous Abort */
        spec_rnode = NULL;
        sysctl_createv(clog, 0, NULL, &spec_rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "taa", NULL,
                       NULL, 0, NULL, 0,
                       CTL_MACHDEP, CTL_CREATE);
        sysctl_createv(clog, 0, &spec_rnode, NULL,
                       CTLFLAG_READWRITE,
                       CTLTYPE_BOOL, "mitigated",
                       SYSCTL_DESCR("Whether TAA is mitigated"),
                       sysctl_machdep_taa_mitigated, 0,
                       taa_mitigation_enabled_ptr, 0,
                       CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &spec_rnode, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRING, "method",
                       SYSCTL_DESCR("Mitigation method in use"),
                       NULL, 0,
                       taa_mitigation_name, 0,
                       CTL_CREATE, CTL_EOL);
}















































































































































































































































































































    5 






    5 













































    5 






















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
/* $NetBSD: pcppi.c,v 1.47 2021/08/07 16:19:12 thorpej Exp $ */

/*
 * Copyright (c) 1996 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Chris G. Demetriou
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: pcppi.c,v 1.47 2021/08/07 16:19:12 thorpej Exp $");

#include "attimer.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/device.h>
#include <sys/errno.h>
#include <sys/bus.h>
#include <sys/mutex.h>
#include <sys/condvar.h>
#include <sys/tty.h>

#include <dev/ic/attimervar.h>

#include <dev/isa/isareg.h>
#include <dev/isa/isavar.h>
#include <dev/isa/pcppireg.h>
#include <dev/isa/pcppivar.h>

#include "pckbd.h"
#if NPCKBD > 0
#include <dev/pckbport/pckbdvar.h>

void        pcppi_pckbd_bell(void *, u_int, u_int, u_int, int);
#endif

int        pcppi_match(device_t, cfdata_t, void *);
void        pcppi_isa_attach(device_t, device_t, void *);
void        pcppi_childdet(device_t, device_t);
int        pcppi_rescan(device_t, const char *, const int *);

CFATTACH_DECL3_NEW(pcppi, sizeof(struct pcppi_softc),
    pcppi_match, pcppi_isa_attach, pcppi_detach, NULL, pcppi_rescan,
    pcppi_childdet, DVF_DETACH_SHUTDOWN);

static int pcppisearch(device_t, cfdata_t, const int *, void *);
static void pcppi_bell_stop(struct pcppi_softc *);
static void pcppi_bell_callout(void *);

#if NATTIMER > 0
static void pcppi_attach_speaker(device_t);
static void pcppi_detach_speaker(struct pcppi_softc *);
#endif

int
pcppi_match(device_t parent, cfdata_t match, void *aux)
{
        struct isa_attach_args *ia = aux;
        bus_space_handle_t ppi_ioh;
        int have_ppi, rv;
        u_int8_t v, nv;

        if (ISA_DIRECT_CONFIG(ia))
                return (0);

        /* If values are hardwired to something that they can't be, punt. */
        if (ia->ia_nio < 1 ||
            (ia->ia_io[0].ir_addr != ISA_UNKNOWN_PORT &&
            ia->ia_io[0].ir_addr != IO_PPI))
                return (0);

        if (ia->ia_niomem > 0 &&
            (ia->ia_iomem[0].ir_addr != ISA_UNKNOWN_IOMEM))
                return (0);

        if (ia->ia_nirq > 0 &&
            (ia->ia_irq[0].ir_irq != ISA_UNKNOWN_IRQ))
                return (0);

        if (ia->ia_ndrq > 0 &&
            (ia->ia_drq[0].ir_drq != ISA_UNKNOWN_DRQ))
                return (0);

        rv = 0;
        have_ppi = 0;

        if (bus_space_map(ia->ia_iot, IO_PPI, 1, 0, &ppi_ioh))
                goto lose;
        have_ppi = 1;

        /*
         * Check for existence of PPI.  Realistically, this is either going to
         * be here or nothing is going to be here.
         *
         * We don't want to have any chance of changing speaker output (which
         * this test might, if it crashes in the middle, or something;
         * normally it's be to quick to produce anthing audible), but
         * many "combo chip" mock-PPI's don't seem to support the top bit
         * of Port B as a settable bit.  The bottom bit has to be settable,
         * since the speaker driver hardware still uses it.
         */
        v = bus_space_read_1(ia->ia_iot, ppi_ioh, 0);                /* XXX */
        bus_space_write_1(ia->ia_iot, ppi_ioh, 0, v ^ 0x01);        /* XXX */
        nv = bus_space_read_1(ia->ia_iot, ppi_ioh, 0);                /* XXX */
        if (((nv ^ v) & 0x01) == 0x01)
                rv = 1;
        bus_space_write_1(ia->ia_iot, ppi_ioh, 0, v);                /* XXX */
        nv = bus_space_read_1(ia->ia_iot, ppi_ioh, 0);                /* XXX */
        if (((nv ^ v) & 0x01) != 0x00) {
                rv = 0;
                goto lose;
        }

        /*
         * We assume that the programmable interval timer is there.
         */

lose:
        if (have_ppi)
                bus_space_unmap(ia->ia_iot, ppi_ioh, 1);
        if (rv) {
                ia->ia_io[0].ir_addr = IO_PPI;
                ia->ia_io[0].ir_size = 1;
                ia->ia_nio = 1;

                ia->ia_niomem = 0;
                ia->ia_nirq = 0;
                ia->ia_ndrq = 0;
        }
        return (rv);
}

void
pcppi_isa_attach(device_t parent, device_t self, void *aux)
{
        struct pcppi_softc *sc = device_private(self);
        struct isa_attach_args *ia = aux;
        bus_space_tag_t iot;

        sc->sc_dv = self;
        sc->sc_iot = iot = ia->ia_iot;

        sc->sc_size = 1;
        if (bus_space_map(iot, IO_PPI, sc->sc_size, 0, &sc->sc_ppi_ioh))
                panic("pcppi_attach: couldn't map");

        aprint_naive("\n");
        aprint_normal("\n");
        pcppi_attach(sc);
}

void
pcppi_childdet(device_t self, device_t child)
{

        /* we hold no child references, so do nothing */
}

int
pcppi_detach(device_t self, int flags)
{
        int rc;
        struct pcppi_softc *sc = device_private(self);

#if NATTIMER > 0
        pcppi_detach_speaker(sc);
#endif

        if ((rc = config_detach_children(sc->sc_dv, flags)) != 0)
                return rc;

        pmf_device_deregister(self);

#if NPCKBD > 0
        pckbd_unhook_bell(pcppi_pckbd_bell, sc);
#endif
        mutex_spin_enter(&tty_lock);
        pcppi_bell_stop(sc);
        mutex_spin_exit(&tty_lock);

        callout_halt(&sc->sc_bell_ch, NULL);
        callout_destroy(&sc->sc_bell_ch);

        cv_destroy(&sc->sc_slp);

        bus_space_unmap(sc->sc_iot, sc->sc_ppi_ioh, sc->sc_size);

        return 0;
}

void
pcppi_attach(struct pcppi_softc *sc)
{
        device_t self = sc->sc_dv;

        callout_init(&sc->sc_bell_ch, CALLOUT_MPSAFE);
        callout_setfunc(&sc->sc_bell_ch, pcppi_bell_callout, sc);
        cv_init(&sc->sc_slp, "bell");

        sc->sc_bellactive = sc->sc_bellpitch = 0;

#if NPCKBD > 0
        /* Provide a beeper for the PC Keyboard, if there isn't one already. */
        pckbd_hookup_bell(pcppi_pckbd_bell, sc);
#endif
#if NATTIMER > 0
        config_defer(sc->sc_dv, pcppi_attach_speaker);
#endif
        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        pcppi_rescan(self, NULL, NULL);
}

int
pcppi_rescan(device_t self, const char *ifattr, const int *locators)
{
        struct pcppi_softc *sc = device_private(self);
        struct pcppi_attach_args pa;

        pa.pa_cookie = sc;
        pa.pa_bell_func = pcppi_bell;

        config_search(sc->sc_dv, &pa,
            CFARGS(.search = pcppisearch));

        return 0;
}

static int
pcppisearch(device_t parent, cfdata_t cf, const int *locs, void *aux)
{

        if (config_probe(parent, cf, aux))
                config_attach(parent, cf, aux, NULL,
                    CFARGS(.locators = locs));

        return 0;
}

#if NATTIMER > 0
static void
pcppi_detach_speaker(struct pcppi_softc *sc)
{
        if (sc->sc_timer != NULL) {
                attimer_detach_speaker(sc->sc_timer);
                sc->sc_timer = NULL;
        }
}

static void
pcppi_attach_speaker(device_t self)
{
        struct pcppi_softc *sc = device_private(self);

        if ((sc->sc_timer = attimer_attach_speaker()) == NULL)
                aprint_error_dev(self, "could not find any available timer\n");
        else {
                aprint_normal_dev(sc->sc_timer, "attached to %s\n",
                    device_xname(self));
        }
}
#endif

void
pcppi_bell(pcppi_tag_t self, int pitch, int period, int slp)
{

        mutex_spin_enter(&tty_lock);
        pcppi_bell_locked(self, pitch, period, slp);
        mutex_spin_exit(&tty_lock);
}

void
pcppi_bell_locked(pcppi_tag_t self, int pitch, int period, int slp)
{
        struct pcppi_softc *sc = self;

        if (sc->sc_bellactive) {
                if (sc->sc_timeout) {
                        sc->sc_timeout = 0;
                        callout_stop(&sc->sc_bell_ch);
                }
                cv_broadcast(&sc->sc_slp);
        }
        if (pitch == 0 || period == 0) {
                pcppi_bell_stop(sc);
                sc->sc_bellpitch = 0;
                return;
        }
        if (!sc->sc_bellactive || sc->sc_bellpitch != pitch) {
#if NATTIMER > 0
                if (sc->sc_timer != NULL)
                        attimer_set_pitch(sc->sc_timer, pitch);
#endif
                /* enable speaker */
                bus_space_write_1(sc->sc_iot, sc->sc_ppi_ioh, 0,
                        bus_space_read_1(sc->sc_iot, sc->sc_ppi_ioh, 0)
                        | PIT_SPKR);
        }
        sc->sc_bellpitch = pitch;

        sc->sc_bellactive = 1;
        if (slp & PCPPI_BELL_POLL) {
                delay((period * 1000000) / hz);
                pcppi_bell_stop(sc);
        } else {
                sc->sc_timeout = 1;
                callout_schedule(&sc->sc_bell_ch, period);
                if (slp & PCPPI_BELL_SLEEP) {
                        cv_wait_sig(&sc->sc_slp, &tty_lock);
                }
        }
}

static void
pcppi_bell_callout(void *arg)
{
        struct pcppi_softc *sc = arg;

        mutex_spin_enter(&tty_lock);
        if (sc->sc_timeout != 0) {
                pcppi_bell_stop(sc);
        }
        mutex_spin_exit(&tty_lock);
}

static void
pcppi_bell_stop(struct pcppi_softc *sc)
{

        sc->sc_timeout = 0;

        /* disable bell */
        bus_space_write_1(sc->sc_iot, sc->sc_ppi_ioh, 0,
                          bus_space_read_1(sc->sc_iot, sc->sc_ppi_ioh, 0)
                          & ~PIT_SPKR);
        sc->sc_bellactive = 0;
        cv_broadcast(&sc->sc_slp);
}

#if NPCKBD > 0
void
pcppi_pckbd_bell(void *arg, u_int pitch, u_int period, u_int volume,
    int poll)
{

        /*
         * Comes in as ms, goes out at ticks; volume ignored.
         */
        pcppi_bell_locked(arg, pitch, (period * hz) / 1000,
            poll ? PCPPI_BELL_POLL : 0);
}
#endif /* NPCKBD > 0 */






























































   12 






    7 





   11 
    5 

   10 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
/*        $NetBSD: sysv_shm_50.c,v 1.5 2019/12/15 16:48:26 tsutsui Exp $        */

/*-
 * Copyright (c) 1999 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sysv_shm_50.c,v 1.5 2019/12/15 16:48:26 tsutsui Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/signal.h>
#include <sys/proc.h>
#include <sys/shm.h>

#ifndef SYSVSHM
#define        SYSVSHM
#endif

#include <sys/syscallargs.h>

#include <compat/sys/shm.h>

int
compat_50_sys___shmctl13(struct lwp *l, const struct compat_50_sys___shmctl13_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) shmid;
                syscallarg(int) cmd;
                syscallarg(struct shmid_ds13 *) buf;
        } */
        struct shmid_ds shmbuf;
        struct shmid_ds13 oshmbuf;
        int cmd, error;

        cmd = SCARG(uap, cmd);

        if (cmd == IPC_SET) {
                error = copyin(SCARG(uap, buf), &oshmbuf, sizeof(oshmbuf));
                if (error)
                        return (error);
                __shmid_ds13_to_native(&oshmbuf, &shmbuf);
        }

        error = shmctl1(l, SCARG(uap, shmid), cmd,
            (cmd == IPC_SET || cmd == IPC_STAT) ? &shmbuf : NULL);

        if (error == 0 && cmd == IPC_STAT) {
                __native_to_shmid_ds13(&shmbuf, &oshmbuf);
                error = copyout(&oshmbuf, SCARG(uap, buf), sizeof(oshmbuf));
        }

        return (error);
}



































































































    2 






    2 

    2 








    2 

    2 
















    2 























    2 


















    2 

    2 











































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
/*        $NetBSD: rf_engine.c,v 1.53 2019/10/10 03:43:59 christos Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: William V. Courtright II, Mark Holland, Rachad Youssef
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/****************************************************************************
 *                                                                          *
 * engine.c -- code for DAG execution engine                                *
 *                                                                          *
 * Modified to work as follows (holland):                                   *
 *   A user-thread calls into DispatchDAG, which fires off the nodes that   *
 *   are direct successors to the header node.  DispatchDAG then returns,   *
 *   and the rest of the I/O continues asynchronously.  As each node        *
 *   completes, the node execution function calls FinishNode().  FinishNode *
 *   scans the list of successors to the node and increments the antecedent *
 *   counts.  Each node that becomes enabled is placed on a central node    *
 *   queue.  A dedicated dag-execution thread grabs nodes off of this       *
 *   queue and fires them.                                                  *
 *                                                                          *
 *   NULL nodes are never fired.                                            *
 *                                                                          *
 *   Terminator nodes are never fired, but rather cause the callback        *
 *   associated with the DAG to be invoked.                                 *
 *                                                                          *
 *   If a node fails, the dag either rolls forward to the completion or     *
 *   rolls back, undoing previously-completed nodes and fails atomically.   *
 *   The direction of recovery is determined by the location of the failed  *
 *   node in the graph.  If the failure occurred before the commit node in   *
 *   the graph, backward recovery is used.  Otherwise, forward recovery is  *
 *   used.                                                                  *
 *                                                                          *
 ****************************************************************************/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_engine.c,v 1.53 2019/10/10 03:43:59 christos Exp $");

#include <sys/errno.h>

#include "rf_threadstuff.h"
#include "rf_dag.h"
#include "rf_engine.h"
#include "rf_etimer.h"
#include "rf_general.h"
#include "rf_dagutils.h"
#include "rf_shutdown.h"
#include "rf_raid.h"
#include "rf_kintf.h"
#include "rf_paritymap.h"

static void rf_ShutdownEngine(void *);
static void DAGExecutionThread(RF_ThreadArg_t arg);
static void rf_RaidIOThread(RF_ThreadArg_t arg);

/* synchronization primitives for this file.  DO_WAIT should be enclosed in a while loop. */

#define DO_LOCK(_r_) \
        rf_lock_mutex2((_r_)->node_queue_mutex)

#define DO_UNLOCK(_r_) \
        rf_unlock_mutex2((_r_)->node_queue_mutex)

#define        DO_WAIT(_r_) \
        rf_wait_cond2((_r_)->node_queue_cv, (_r_)->node_queue_mutex)

#define        DO_SIGNAL(_r_) \
        rf_broadcast_cond2((_r_)->node_queue_cv)        /* XXX rf_signal_cond2? */

static void
rf_ShutdownEngine(void *arg)
{
        RF_Raid_t *raidPtr;

        raidPtr = (RF_Raid_t *) arg;

        /* Tell the rf_RaidIOThread to shutdown */
        rf_lock_mutex2(raidPtr->iodone_lock);

        raidPtr->shutdown_raidio = 1;
        rf_signal_cond2(raidPtr->iodone_cv);

        /* ...and wait for it to tell us it has finished */
        while (raidPtr->shutdown_raidio)
                rf_wait_cond2(raidPtr->iodone_cv, raidPtr->iodone_lock);

        rf_unlock_mutex2(raidPtr->iodone_lock);

         /* Now shut down the DAG execution engine. */
         DO_LOCK(raidPtr);
          raidPtr->shutdown_engine = 1;
          DO_SIGNAL(raidPtr);

        /* ...and wait for it to tell us it has finished */
        while (raidPtr->shutdown_engine)
                DO_WAIT(raidPtr);

         DO_UNLOCK(raidPtr);

        rf_destroy_mutex2(raidPtr->node_queue_mutex);
        rf_destroy_cond2(raidPtr->node_queue_cv);

        rf_destroy_mutex2(raidPtr->iodone_lock);
        rf_destroy_cond2(raidPtr->iodone_cv);
}

int
rf_ConfigureEngine(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                   RF_Config_t *cfgPtr)
{

        /*
         * Initialise iodone for the IO thread.
         */
        TAILQ_INIT(&(raidPtr->iodone));
        rf_init_mutex2(raidPtr->iodone_lock, IPL_VM);
        rf_init_cond2(raidPtr->iodone_cv, "raidiow");

        rf_init_mutex2(raidPtr->node_queue_mutex, IPL_VM);
        rf_init_cond2(raidPtr->node_queue_cv, "rfnodeq");
        raidPtr->node_queue = NULL;
        raidPtr->dags_in_flight = 0;

        /* we create the execution thread only once per system boot. no need
         * to check return code b/c the kernel panics if it can't create the
         * thread. */
#if RF_DEBUG_ENGINE
        if (rf_engineDebug) {
                printf("raid%d: Creating engine thread\n", raidPtr->raidid);
        }
#endif
        if (RF_CREATE_ENGINE_THREAD(raidPtr->engine_thread,
                                    DAGExecutionThread, raidPtr,
                                    "raid%d", raidPtr->raidid)) {
                printf("raid%d: Unable to create engine thread\n",
                       raidPtr->raidid);
                return (ENOMEM);
        }
        if (RF_CREATE_ENGINE_THREAD(raidPtr->engine_helper_thread,
                                    rf_RaidIOThread, raidPtr,
                                    "raidio%d", raidPtr->raidid)) {
                printf("raid%d: Unable to create raidio thread\n",
                       raidPtr->raidid);
                return (ENOMEM);
        }
#if RF_DEBUG_ENGINE
        if (rf_engineDebug) {
                printf("raid%d: Created engine thread\n", raidPtr->raidid);
        }
#endif

        /* engine thread is now running and waiting for work */
#if RF_DEBUG_ENGINE
        if (rf_engineDebug) {
                printf("raid%d: Engine thread running and waiting for events\n", raidPtr->raidid);
        }
#endif
        rf_ShutdownCreate(listp, rf_ShutdownEngine, raidPtr);

        return (0);
}

#if 0
static int
BranchDone(RF_DagNode_t *node)
{
        int     i;

        /* return true if forward execution is completed for a node and its
         * succedents */
        switch (node->status) {
        case rf_wait:
                /* should never be called in this state */
                RF_PANIC();
                break;
        case rf_fired:
                /* node is currently executing, so we're not done */
                return (RF_FALSE);
        case rf_good:
                /* for each succedent recursively check branch */
                for (i = 0; i < node->numSuccedents; i++)
                        if (!BranchDone(node->succedents[i]))
                                return RF_FALSE;
                return RF_TRUE;        /* node and all succedent branches aren't in
                                 * fired state */
        case rf_bad:
                /* succedents can't fire */
                return (RF_TRUE);
        case rf_recover:
                /* should never be called in this state */
                RF_PANIC();
                break;
        case rf_undone:
        case rf_panic:
                /* XXX need to fix this case */
                /* for now, assume that we're done */
                return (RF_TRUE);
        default:
                /* illegal node status */
                RF_PANIC();
                break;
        }
}
#endif

static int
NodeReady(RF_DagNode_t *node)
{
        int     ready;

        ready = RF_FALSE;

        switch (node->dagHdr->status) {
        case rf_enable:
        case rf_rollForward:
                if ((node->status == rf_wait) &&
                    (node->numAntecedents == node->numAntDone))
                        ready = RF_TRUE;
                break;
        case rf_rollBackward:
                RF_ASSERT(node->numSuccDone <= node->numSuccedents);
                RF_ASSERT(node->numSuccFired <= node->numSuccedents);
                RF_ASSERT(node->numSuccFired <= node->numSuccDone);
                if ((node->status == rf_good) &&
                    (node->numSuccDone == node->numSuccedents))
                        ready = RF_TRUE;
                break;
        default:
                printf("Execution engine found illegal DAG status in NodeReady\n");
                RF_PANIC();
                break;
        }

        return (ready);
}



/* user context and dag-exec-thread context: Fire a node.  The node's
 * status field determines which function, do or undo, to be fired.
 * This routine assumes that the node's status field has alread been
 * set to "fired" or "recover" to indicate the direction of execution.
 */
static void
FireNode(RF_DagNode_t *node)
{
        switch (node->status) {
        case rf_fired:
                /* fire the do function of a node */
#if RF_DEBUG_ENGINE
                if (rf_engineDebug) {
                        printf("raid%d: Firing node 0x%lx (%s)\n",
                               node->dagHdr->raidPtr->raidid,
                               (unsigned long) node, node->name);
                }
#endif
                if (node->flags & RF_DAGNODE_FLAG_YIELD) {
#if defined(__NetBSD__) && defined(_KERNEL)
                        /* thread_block(); */
                        /* printf("Need to block the thread here...\n");  */
                        /* XXX thread_block is actually mentioned in
                         * /usr/include/vm/vm_extern.h */
#else
                        thread_block();
#endif
                }
                (*(node->doFunc)) (node);
                break;
        case rf_recover:
                /* fire the undo function of a node */
#if RF_DEBUG_ENGINE
                if (rf_engineDebug) {
                        printf("raid%d: Firing (undo) node 0x%lx (%s)\n",
                               node->dagHdr->raidPtr->raidid,
                               (unsigned long) node, node->name);
                }
#endif
                if (node->flags & RF_DAGNODE_FLAG_YIELD)
#if defined(__NetBSD__) && defined(_KERNEL)
                        /* thread_block(); */
                        /* printf("Need to block the thread here...\n"); */
                        /* XXX thread_block is actually mentioned in
                         * /usr/include/vm/vm_extern.h */
#else
                        thread_block();
#endif
                (*(node->undoFunc)) (node);
                break;
        default:
                RF_PANIC();
                break;
        }
}



/* user context:
 * Attempt to fire each node in a linear array.
 * The entire list is fired atomically.
 */
static void
FireNodeArray(int numNodes, RF_DagNode_t **nodeList)
{
        RF_DagStatus_t dstat;
        RF_DagNode_t *node;
        int     i, j;

        /* first, mark all nodes which are ready to be fired */
        for (i = 0; i < numNodes; i++) {
                node = nodeList[i];
                dstat = node->dagHdr->status;
                RF_ASSERT((node->status == rf_wait) ||
                          (node->status == rf_good));
                if (NodeReady(node)) {
                        if ((dstat == rf_enable) ||
                            (dstat == rf_rollForward)) {
                                RF_ASSERT(node->status == rf_wait);
                                if (node->commitNode)
                                        node->dagHdr->numCommits++;
                                node->status = rf_fired;
                                for (j = 0; j < node->numAntecedents; j++)
                                        node->antecedents[j]->numSuccFired++;
                        } else {
                                RF_ASSERT(dstat == rf_rollBackward);
                                RF_ASSERT(node->status == rf_good);
                                /* only one commit node per graph */
                                RF_ASSERT(node->commitNode == RF_FALSE);
                                node->status = rf_recover;
                        }
                }
        }
        /* now, fire the nodes */
        for (i = 0; i < numNodes; i++) {
                if ((nodeList[i]->status == rf_fired) ||
                    (nodeList[i]->status == rf_recover))
                        FireNode(nodeList[i]);
        }
}


/* user context:
 * Attempt to fire each node in a linked list.
 * The entire list is fired atomically.
 */
static void
FireNodeList(RF_DagNode_t *nodeList)
{
        RF_DagNode_t *node, *next;
        RF_DagStatus_t dstat;
        int     j;

        if (nodeList) {
                /* first, mark all nodes which are ready to be fired */
                for (node = nodeList; node; node = next) {
                        next = node->next;
                        dstat = node->dagHdr->status;
                        RF_ASSERT((node->status == rf_wait) ||
                                  (node->status == rf_good));
                        if (NodeReady(node)) {
                                if ((dstat == rf_enable) ||
                                    (dstat == rf_rollForward)) {
                                        RF_ASSERT(node->status == rf_wait);
                                        if (node->commitNode)
                                                node->dagHdr->numCommits++;
                                        node->status = rf_fired;
                                        for (j = 0; j < node->numAntecedents; j++)
                                                node->antecedents[j]->numSuccFired++;
                                } else {
                                        RF_ASSERT(dstat == rf_rollBackward);
                                        RF_ASSERT(node->status == rf_good);
                                        /* only one commit node per graph */
                                        RF_ASSERT(node->commitNode == RF_FALSE);
                                        node->status = rf_recover;
                                }
                        }
                }
                /* now, fire the nodes */
                for (node = nodeList; node; node = next) {
                        next = node->next;
                        if ((node->status == rf_fired) ||
                            (node->status == rf_recover))
                                FireNode(node);
                }
        }
}
/* interrupt context:
 * for each succedent
 *    propagate required results from node to succedent
 *    increment succedent's numAntDone
 *    place newly-enable nodes on node queue for firing
 *
 * To save context switches, we don't place NIL nodes on the node queue,
 * but rather just process them as if they had fired.  Note that NIL nodes
 * that are the direct successors of the header will actually get fired by
 * DispatchDAG, which is fine because no context switches are involved.
 *
 * Important:  when running at user level, this can be called by any
 * disk thread, and so the increment and check of the antecedent count
 * must be locked.  I used the node queue mutex and locked down the
 * entire function, but this is certainly overkill.
 */
static void
PropagateResults(RF_DagNode_t *node, int context)
{
        RF_DagNode_t *s, *a;
        RF_Raid_t *raidPtr;
        int     i;
        RF_DagNode_t *finishlist = NULL;        /* a list of NIL nodes to be
                                                 * finished */
        RF_DagNode_t *skiplist = NULL;        /* list of nodes with failed truedata
                                         * antecedents */
        RF_DagNode_t *firelist = NULL;        /* a list of nodes to be fired */
        RF_DagNode_t *q = NULL, *qh = NULL, *next;
        int     j, skipNode;

        raidPtr = node->dagHdr->raidPtr;

        DO_LOCK(raidPtr);

        /* debug - validate fire counts */
        for (i = 0; i < node->numAntecedents; i++) {
                a = *(node->antecedents + i);
                RF_ASSERT(a->numSuccFired >= a->numSuccDone);
                RF_ASSERT(a->numSuccFired <= a->numSuccedents);
                a->numSuccDone++;
        }

        switch (node->dagHdr->status) {
        case rf_enable:
        case rf_rollForward:
                for (i = 0; i < node->numSuccedents; i++) {
                        s = *(node->succedents + i);
                        RF_ASSERT(s->status == rf_wait);
                        (s->numAntDone)++;
                        if (s->numAntDone == s->numAntecedents) {
                                /* look for NIL nodes */
                                if (s->doFunc == rf_NullNodeFunc) {
                                        /* don't fire NIL nodes, just process
                                         * them */
                                        s->next = finishlist;
                                        finishlist = s;
                                } else {
                                        /* look to see if the node is to be
                                         * skipped */
                                        skipNode = RF_FALSE;
                                        for (j = 0; j < s->numAntecedents; j++)
                                                if ((s->antType[j] == rf_trueData) && (s->antecedents[j]->status == rf_bad))
                                                        skipNode = RF_TRUE;
                                        if (skipNode) {
                                                /* this node has one or more
                                                 * failed true data
                                                 * dependencies, so skip it */
                                                s->next = skiplist;
                                                skiplist = s;
                                        } else
                                                /* add s to list of nodes (q)
                                                 * to execute */
                                                if (context != RF_INTR_CONTEXT) {
                                                        /* we only have to
                                                         * enqueue if we're at
                                                         * intr context */
                                                        /* put node on
                                                           a list to
                                                           be fired
                                                           after we
                                                           unlock */
                                                        s->next = firelist;
                                                        firelist = s;
                                                } else {
                                                        /* enqueue the
                                                           node for
                                                           the dag
                                                           exec thread
                                                           to fire */
                                                        RF_ASSERT(NodeReady(s));
                                                        if (q) {
                                                                q->next = s;
                                                                q = s;
                                                        } else {
                                                                qh = q = s;
                                                                qh->next = NULL;
                                                        }
                                                }
                                }
                        }
                }

                if (q) {
                        /* xfer our local list of nodes to the node queue */
                        q->next = raidPtr->node_queue;
                        raidPtr->node_queue = qh;
                        DO_SIGNAL(raidPtr);
                }
                DO_UNLOCK(raidPtr);

                for (; skiplist; skiplist = next) {
                        next = skiplist->next;
                        skiplist->status = rf_skipped;
                        for (i = 0; i < skiplist->numAntecedents; i++) {
                                skiplist->antecedents[i]->numSuccFired++;
                        }
                        if (skiplist->commitNode) {
                                skiplist->dagHdr->numCommits++;
                        }
                        rf_FinishNode(skiplist, context);
                }
                for (; finishlist; finishlist = next) {
                        /* NIL nodes: no need to fire them */
                        next = finishlist->next;
                        finishlist->status = rf_good;
                        for (i = 0; i < finishlist->numAntecedents; i++) {
                                finishlist->antecedents[i]->numSuccFired++;
                        }
                        if (finishlist->commitNode)
                                finishlist->dagHdr->numCommits++;
                        /*
                         * Okay, here we're calling rf_FinishNode() on
                         * nodes that have the null function as their
                         * work proc. Such a node could be the
                         * terminal node in a DAG. If so, it will
                         * cause the DAG to complete, which will in
                         * turn free memory used by the DAG, which
                         * includes the node in question. Thus, we
                         * must avoid referencing the node at all
                         * after calling rf_FinishNode() on it.  */
                        rf_FinishNode(finishlist, context);        /* recursive call */
                }
                /* fire all nodes in firelist */
                FireNodeList(firelist);
                break;

        case rf_rollBackward:
                for (i = 0; i < node->numAntecedents; i++) {
                        a = *(node->antecedents + i);
                        RF_ASSERT(a->status == rf_good);
                        RF_ASSERT(a->numSuccDone <= a->numSuccedents);
                        RF_ASSERT(a->numSuccDone <= a->numSuccFired);

                        if (a->numSuccDone == a->numSuccFired) {
                                if (a->undoFunc == rf_NullNodeFunc) {
                                        /* don't fire NIL nodes, just process
                                         * them */
                                        a->next = finishlist;
                                        finishlist = a;
                                } else {
                                        if (context != RF_INTR_CONTEXT) {
                                                /* we only have to enqueue if
                                                 * we're at intr context */
                                                /* put node on a list to be
                                                   fired after we unlock */
                                                a->next = firelist;

                                                firelist = a;
                                        } else {
                                                /* enqueue the node for the
                                                   dag exec thread to fire */
                                                RF_ASSERT(NodeReady(a));
                                                if (q) {
                                                        q->next = a;
                                                        q = a;
                                                } else {
                                                        qh = q = a;
                                                        qh->next = NULL;
                                                }
                                        }
                                }
                        }
                }
                if (q) {
                        /* xfer our local list of nodes to the node queue */
                        q->next = raidPtr->node_queue;
                        raidPtr->node_queue = qh;
                        DO_SIGNAL(raidPtr);
                }
                DO_UNLOCK(raidPtr);
                for (; finishlist; finishlist = next) {
                        /* NIL nodes: no need to fire them */
                        next = finishlist->next;
                        finishlist->status = rf_good;
                        /*
                         * Okay, here we're calling rf_FinishNode() on
                         * nodes that have the null function as their
                         * work proc. Such a node could be the first
                         * node in a DAG. If so, it will cause the DAG
                         * to complete, which will in turn free memory
                         * used by the DAG, which includes the node in
                         * question. Thus, we must avoid referencing
                         * the node at all after calling
                         * rf_FinishNode() on it.  */
                        rf_FinishNode(finishlist, context);        /* recursive call */
                }
                /* fire all nodes in firelist */
                FireNodeList(firelist);

                break;
        default:
                printf("Engine found illegal DAG status in PropagateResults()\n");
                RF_PANIC();
                break;
        }
}



/*
 * Process a fired node which has completed
 */
static void
ProcessNode(RF_DagNode_t *node, int context)
{
#if RF_DEBUG_ENGINE
        RF_Raid_t *raidPtr;

        raidPtr = node->dagHdr->raidPtr;
#endif

        switch (node->status) {
        case rf_good:
                /* normal case, don't need to do anything */
                break;
        case rf_bad:
                if ((node->dagHdr->numCommits > 0) ||
                    (node->dagHdr->numCommitNodes == 0)) {
                        /* crossed commit barrier */
                        node->dagHdr->status = rf_rollForward;
#if RF_DEBUG_ENGINE
                        if (rf_engineDebug) {
                                printf("raid%d: node (%s) returned fail, rolling forward\n", raidPtr->raidid, node->name);
                        }
#endif
                } else {
                        /* never reached commit barrier */
                        node->dagHdr->status = rf_rollBackward;
#if RF_DEBUG_ENGINE
                        if (rf_engineDebug) {
                                printf("raid%d: node (%s) returned fail, rolling backward\n", raidPtr->raidid, node->name);
                        }
#endif
                }
                break;
        case rf_undone:
                /* normal rollBackward case, don't need to do anything */
                break;
        case rf_panic:
                /* an undo node failed!!! */
                printf("UNDO of a node failed!!!\n");
                break;
        default:
                printf("node finished execution with an illegal status!!!\n");
                RF_PANIC();
                break;
        }

        /* enqueue node's succedents (antecedents if rollBackward) for
         * execution */
        PropagateResults(node, context);
}



/* user context or dag-exec-thread context:
 * This is the first step in post-processing a newly-completed node.
 * This routine is called by each node execution function to mark the node
 * as complete and fire off any successors that have been enabled.
 */
void
rf_FinishNode(RF_DagNode_t *node, int context)
{
        node->dagHdr->numNodesCompleted++;
        ProcessNode(node, context);
}


/* user context: submit dag for execution, return non-zero if we have
 * to wait for completion.  if and only if we return non-zero, we'll
 * cause cbFunc to get invoked with cbArg when the DAG has completed.
 *
 * for now we always return 1.  If the DAG does not cause any I/O,
 * then the callback may get invoked before DispatchDAG returns.
 * There's code in state 5 of ContinueRaidAccess to handle this.
 *
 * All we do here is fire the direct successors of the header node.
 * The DAG execution thread does the rest of the dag processing.  */
int
rf_DispatchDAG(RF_DagHeader_t *dag, void (*cbFunc) (void *),
               void *cbArg)
{
        RF_Raid_t *raidPtr;

        raidPtr = dag->raidPtr;
#if RF_ACC_TRACE > 0
        if (dag->tracerec) {
                RF_ETIMER_START(dag->tracerec->timer);
        }
#endif
#if DEBUG
#if RF_DEBUG_VALIDATE_DAG
        if (rf_engineDebug || rf_validateDAGDebug) {
                if (rf_ValidateDAG(dag))
                        RF_PANIC();
        }
#endif
#endif
#if RF_DEBUG_ENGINE
        if (rf_engineDebug) {
                printf("raid%d: Entering DispatchDAG\n", raidPtr->raidid);
        }
#endif
        raidPtr->dags_in_flight++;        /* debug only:  blow off proper
                                         * locking */
        dag->cbFunc = cbFunc;
        dag->cbArg = cbArg;
        dag->numNodesCompleted = 0;
        dag->status = rf_enable;
        FireNodeArray(dag->numSuccedents, dag->succedents);
        return (1);
}
/* dedicated kernel thread: the thread that handles all DAG node
 * firing.  To minimize locking and unlocking, we grab a copy of the
 * entire node queue and then set the node queue to NULL before doing
 * any firing of nodes.  This way we only have to release the lock
 * once.  Of course, it's probably rare that there's more than one
 * node in the queue at any one time, but it sometimes happens.
 */

static void
DAGExecutionThread(RF_ThreadArg_t arg)
{
        RF_DagNode_t *nd, *local_nq, *term_nq, *fire_nq;
        RF_Raid_t *raidPtr;

        raidPtr = (RF_Raid_t *) arg;

#if RF_DEBUG_ENGINE
        if (rf_engineDebug) {
                printf("raid%d: Engine thread is running\n", raidPtr->raidid);
        }
#endif

        DO_LOCK(raidPtr);
        while (!raidPtr->shutdown_engine) {

                while (raidPtr->node_queue != NULL) {
                        local_nq = raidPtr->node_queue;
                        fire_nq = NULL;
                        term_nq = NULL;
                        raidPtr->node_queue = NULL;
                        DO_UNLOCK(raidPtr);

                        /* first, strip out the terminal nodes */
                        while (local_nq) {
                                nd = local_nq;
                                local_nq = local_nq->next;
                                switch (nd->dagHdr->status) {
                                case rf_enable:
                                case rf_rollForward:
                                        if (nd->numSuccedents == 0) {
                                                /* end of the dag, add to
                                                 * callback list */
                                                nd->next = term_nq;
                                                term_nq = nd;
                                        } else {
                                                /* not the end, add to the
                                                 * fire queue */
                                                nd->next = fire_nq;
                                                fire_nq = nd;
                                        }
                                        break;
                                case rf_rollBackward:
                                        if (nd->numAntecedents == 0) {
                                                /* end of the dag, add to the
                                                 * callback list */
                                                nd->next = term_nq;
                                                term_nq = nd;
                                        } else {
                                                /* not the end, add to the
                                                 * fire queue */
                                                nd->next = fire_nq;
                                                fire_nq = nd;
                                        }
                                        break;
                                default:
                                        RF_PANIC();
                                        break;
                                }
                        }

                        /* execute callback of dags which have reached the
                         * terminal node */
                        while (term_nq) {
                                nd = term_nq;
                                term_nq = term_nq->next;
                                nd->next = NULL;
                                (nd->dagHdr->cbFunc) (nd->dagHdr->cbArg);
                                raidPtr->dags_in_flight--;        /* debug only */
                        }

                        /* fire remaining nodes */
                        FireNodeList(fire_nq);

                        DO_LOCK(raidPtr);
                }
                while (!raidPtr->shutdown_engine &&
                       raidPtr->node_queue == NULL) {
                        DO_WAIT(raidPtr);
                }
        }

        /* Let rf_ShutdownEngine know that we're done... */
        raidPtr->shutdown_engine = 0;
        DO_SIGNAL(raidPtr);

        DO_UNLOCK(raidPtr);

        kthread_exit(0);
}

/*
 * rf_RaidIOThread() -- When I/O to a component begins, raidstrategy()
 * puts the I/O on a buffer queue, and then signals raidPtr->iodone.  If
 * necessary, this function calls raidstart() to initiate the I/O.
 * When I/O to a component completes, KernelWakeupFunc() puts the
 * completed request onto raidPtr->iodone TAILQ.  This function looks
 * after requests on that queue by calling rf_DiskIOComplete() for the
 * request, and by calling any required CompleteFunc for the request.  
 */

static void
rf_RaidIOThread(RF_ThreadArg_t arg)
{
        RF_Raid_t *raidPtr;
        RF_DiskQueueData_t *req;

        raidPtr = (RF_Raid_t *) arg;

        rf_lock_mutex2(raidPtr->iodone_lock);

        while (!raidPtr->shutdown_raidio) {
                /* if there is nothing to do, then snooze. */
                if (TAILQ_EMPTY(&(raidPtr->iodone)) &&
                    rf_buf_queue_check(raidPtr)) {
                        rf_wait_cond2(raidPtr->iodone_cv, raidPtr->iodone_lock);
                }

                /* Check for deferred parity-map-related work. */
                if (raidPtr->parity_map != NULL) {
                        rf_unlock_mutex2(raidPtr->iodone_lock);
                        rf_paritymap_checkwork(raidPtr->parity_map);
                        rf_lock_mutex2(raidPtr->iodone_lock);
                }

                /* See what I/Os, if any, have arrived */
                while ((req = TAILQ_FIRST(&(raidPtr->iodone))) != NULL) {
                        TAILQ_REMOVE(&(raidPtr->iodone), req, iodone_entries);
                        rf_unlock_mutex2(raidPtr->iodone_lock);
                        rf_DiskIOComplete(req->queue, req, req->error);
                        (req->CompleteFunc) (req->argument, req->error);
                        rf_lock_mutex2(raidPtr->iodone_lock);
                }

                /* process any pending outgoing IO */
                rf_unlock_mutex2(raidPtr->iodone_lock);
                raidstart(raidPtr);
                rf_lock_mutex2(raidPtr->iodone_lock);

        }

        /* Let rf_ShutdownEngine know that we're done... */
        raidPtr->shutdown_raidio = 0;
        rf_signal_cond2(raidPtr->iodone_cv);

        rf_unlock_mutex2(raidPtr->iodone_lock);

        kthread_exit(0);
}


























































































































































































































    2 



    3 








    4 


    2 


    1 






    1 
























































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
/*        $NetBSD: cd9660_vfsops.c,v 1.97 2022/05/03 07:33:07 hannken Exp $        */

/*-
 * Copyright (c) 1994
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley
 * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
 * Support code is derived from software contributed to Berkeley
 * by Atsushi Murai (amurai@spec.co.jp).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)cd9660_vfsops.c        8.18 (Berkeley) 5/22/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: cd9660_vfsops.c,v 1.97 2022/05/03 07:33:07 hannken Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>
#include <sys/mount.h>
#include <sys/buf.h>
#include <sys/file.h>
#include <sys/disklabel.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/cdio.h>
#include <sys/errno.h>
#include <sys/malloc.h>
#include <sys/pool.h>
#include <sys/stat.h>
#include <sys/conf.h>
#include <sys/dirent.h>
#include <sys/kauth.h>
#include <sys/module.h>

#include <fs/cd9660/iso.h>
#include <fs/cd9660/cd9660_extern.h>
#include <fs/cd9660/iso_rrip.h>
#include <fs/cd9660/cd9660_node.h>
#include <fs/cd9660/cd9660_mount.h>

MODULE(MODULE_CLASS_VFS, cd9660, NULL);

MALLOC_JUSTDEFINE(M_ISOFSMNT, "ISOFS mount", "ISOFS mount structure");

extern const struct vnodeopv_desc cd9660_vnodeop_opv_desc;
extern const struct vnodeopv_desc cd9660_specop_opv_desc;
extern const struct vnodeopv_desc cd9660_fifoop_opv_desc;

const struct vnodeopv_desc * const cd9660_vnodeopv_descs[] = {
        &cd9660_vnodeop_opv_desc,
        &cd9660_specop_opv_desc,
        &cd9660_fifoop_opv_desc,
        NULL,
};

struct vfsops cd9660_vfsops = {
        .vfs_name = MOUNT_CD9660,
        .vfs_min_mount_data = sizeof (struct iso_args),
        .vfs_mount = cd9660_mount,
        .vfs_start = cd9660_start,
        .vfs_unmount = cd9660_unmount,
        .vfs_root = cd9660_root,
        .vfs_quotactl = (void *)eopnotsupp,
        .vfs_statvfs = cd9660_statvfs,
        .vfs_sync = cd9660_sync,
        .vfs_vget = cd9660_vget,
        .vfs_loadvnode = cd9660_loadvnode,
        .vfs_fhtovp = cd9660_fhtovp,
        .vfs_vptofh = cd9660_vptofh,
        .vfs_init = cd9660_init,
        .vfs_reinit = cd9660_reinit,
        .vfs_done = cd9660_done,
        .vfs_mountroot = cd9660_mountroot,
        .vfs_snapshot = (void *)eopnotsupp,
        .vfs_extattrctl = vfs_stdextattrctl,
        .vfs_suspendctl = genfs_suspendctl,
        .vfs_renamelock_enter = genfs_renamelock_enter,
        .vfs_renamelock_exit = genfs_renamelock_exit,
        .vfs_fsync = (void *)eopnotsupp,
        .vfs_opv_descs = cd9660_vnodeopv_descs
};

static const struct genfs_ops cd9660_genfsops = {
        .gop_size = genfs_size,
};

/*
 * Called by vfs_mountroot when iso is going to be mounted as root.
 *
 * Name is updated by mount(8) after booting.
 */
static int iso_makemp(struct iso_mnt *isomp, struct buf *bp, int *ea_len);
static int iso_mountfs(struct vnode *devvp, struct mount *mp,
                struct lwp *l, struct iso_args *argp);

SYSCTL_SETUP(cd9660_sysctl_setup, "cd9660 sysctl")
{

                sysctl_createv(clog, 0, NULL, NULL,
                               CTLFLAG_PERMANENT, CTLTYPE_NODE, "cd9660",
                               SYSCTL_DESCR("ISO-9660 file system"),
                               NULL, 0, NULL, 0,
                               CTL_VFS, 14, CTL_EOL);
                sysctl_createv(clog, 0, NULL, NULL,
                               CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                               CTLTYPE_INT, "utf8_joliet",
                               SYSCTL_DESCR("Encode Joliet filenames to UTF-8"),
                               NULL, 0, &cd9660_utf8_joliet, 0,
                               CTL_VFS, 14, CD9660_UTF8_JOLIET, CTL_EOL);
                /*
                 * XXX the "14" above could be dynamic, thereby eliminating
                 * one more instance of the "number to vfs" mapping problem,
                 * but "14" is the order as taken from sys/mount.h
                 */
}

static int
cd9660_modcmd(modcmd_t cmd, void *arg)
{
        int error;

        switch (cmd) {
        case MODULE_CMD_INIT:
                error = vfs_attach(&cd9660_vfsops);
                if (error != 0)
                        break;
                break;
        case MODULE_CMD_FINI:
                error = vfs_detach(&cd9660_vfsops);
                if (error != 0)
                        break;
                break;
        default:
                error = ENOTTY;
                break;
        }

        return (error);
}

int
cd9660_mountroot(void)
{
        struct mount *mp;
        struct lwp *l = curlwp;
        int error;
        struct iso_args args;

        if (device_class(root_device) != DV_DISK)
                return (ENODEV);

        if ((error = vfs_rootmountalloc(MOUNT_CD9660, "root_device", &mp))
                        != 0) {
                vrele(rootvp);
                return (error);
        }

        args.flags = ISOFSMNT_ROOT;
        if ((error = iso_mountfs(rootvp, mp, l, &args)) != 0) {
                vfs_unbusy(mp);
                vfs_rele(mp);
                return (error);
        }
        mountlist_append(mp);
        (void)cd9660_statvfs(mp, &mp->mnt_stat);
        vfs_unbusy(mp);
        return (0);
}

/*
 * VFS Operations.
 *
 * mount system call
 */
int
cd9660_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
{
        struct lwp *l = curlwp;
        struct vnode *devvp;
        struct iso_args *args = data;
        int error;
        struct iso_mnt *imp = VFSTOISOFS(mp);

        if (args == NULL)
                return EINVAL;
        if (*data_len < sizeof *args)
                return EINVAL;

        if (mp->mnt_flag & MNT_GETARGS) {
                if (imp == NULL)
                        return EIO;
                args->fspec = NULL;
                args->flags = imp->im_flags;
                *data_len = sizeof (*args);
                return 0;
        }

        if ((mp->mnt_flag & MNT_RDONLY) == 0)
                return (EROFS);

        if ((mp->mnt_flag & MNT_UPDATE) && args->fspec == NULL)
                return EINVAL;

        /*
         * Not an update, or updating the name: look up the name
         * and verify that it refers to a sensible block device.
         */
        error = namei_simple_user(args->fspec,
                                NSM_FOLLOW_NOEMULROOT, &devvp);
        if (error != 0)
                return (error);

        if (devvp->v_type != VBLK) {
                vrele(devvp);
                return ENOTBLK;
        }
        if (bdevsw_lookup(devvp->v_rdev) == NULL) {
                vrele(devvp);
                return ENXIO;
        }
        /*
         * If mount by non-root, then verify that user has necessary
         * permissions on the device.
         */
        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
        error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
            KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(VREAD));
        if (error) {
                goto fail;
        }
        if ((mp->mnt_flag & MNT_UPDATE) == 0) {
                error = VOP_OPEN(devvp, FREAD, FSCRED);
                if (error)
                        goto fail;
                VOP_UNLOCK(devvp);
                error = iso_mountfs(devvp, mp, l, args);
                vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
                if (error) {
                        (void)VOP_CLOSE(devvp, FREAD, NOCRED);
                        goto fail;
                }
                VOP_UNLOCK(devvp);
                /* reference to devvp is donated through iso_mountfs */
        } else {
                if (devvp != imp->im_devvp &&
                    devvp->v_rdev != imp->im_devvp->v_rdev) {
                        error = EINVAL;                /* needs translation */
                        goto fail;
                }
                VOP_UNLOCK(devvp);
                vrele(devvp);
        }
        return set_statvfs_info(path, UIO_USERSPACE, args->fspec, UIO_USERSPACE,
            mp->mnt_op->vfs_name, mp, l);

fail:
        VOP_UNLOCK(devvp);
        vrele(devvp);
        return (error);
}

/*
 * Make a mount point from a volume descriptor
 */
static int
iso_makemp(struct iso_mnt *isomp, struct buf *bp, int *ea_len)
{
        struct iso_primary_descriptor *pri;
        int logical_block_size;
        struct iso_directory_record *rootp;

        pri = (struct iso_primary_descriptor *)bp->b_data;

        logical_block_size = isonum_723 (pri->logical_block_size);

        if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE
            || (logical_block_size & (logical_block_size - 1)) != 0)
                return -1;

        rootp = (struct iso_directory_record *)pri->root_directory_record;

        isomp->logical_block_size = logical_block_size;
        isomp->volume_space_size = isonum_733 (pri->volume_space_size);
        memcpy(isomp->root, rootp, sizeof(isomp->root));
        isomp->root_extent = isonum_733 (rootp->extent);
        isomp->root_size = isonum_733 (rootp->size);
        isomp->im_joliet_level = 0;

        isomp->im_bmask = logical_block_size - 1;
        isomp->im_bshift = 0;
        while ((1 << isomp->im_bshift) < isomp->logical_block_size)
                isomp->im_bshift++;

        if (ea_len != NULL)
                *ea_len = isonum_711(rootp->ext_attr_length);

        return 0;
}

/*
 * Common code for mount and mountroot
 */
static int
iso_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l,
        struct iso_args *argp)
{
        struct iso_mnt *isomp = (struct iso_mnt *)0;
        struct buf *bp = NULL, *pribp = NULL, *supbp = NULL;
        dev_t dev = devvp->v_rdev;
        int error = EINVAL;
        int ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
        int iso_bsize;
        int iso_blknum;
        int joliet_level;
        struct iso_volume_descriptor *vdp;
        struct iso_supplementary_descriptor *sup;
        int sess = 0;
        int ext_attr_length;
        struct disklabel label;

        if (!ronly)
                return EROFS;

        /* Flush out any old buffers remaining from a previous use. */
        vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
        error = vinvalbuf(devvp, V_SAVE, l->l_cred, l, 0, 0);
        VOP_UNLOCK(devvp);
        if (error != 0)
                return (error);

        /* This is the "logical sector size".  The standard says this
         * should be 2048 or the physical sector size on the device,
         * whichever is greater.  For now, we'll just use a constant.
         */
        iso_bsize = ISO_DEFAULT_BLOCK_SIZE;

        error = VOP_IOCTL(devvp, DIOCGDINFO, &label, FREAD, FSCRED);
        if (!error) {
                /* XXX more sanity checks? */
                sess = label.d_partitions[DISKPART(dev)].p_cdsession;
        } else {
                /* fallback to old method */
                error = VOP_IOCTL(devvp, CDIOREADMSADDR, &sess, 0, FSCRED);
                if (error)
                        sess = 0;        /* never mind */
        }
#ifdef ISO_DEBUG
        printf("isofs: session offset (part %"PRId32") %d\n", DISKPART(dev), sess);
#endif

        for (iso_blknum = 16; iso_blknum < 100; iso_blknum++) {
                if ((error = bread(devvp, (iso_blknum+sess) * btodb(iso_bsize),
                                   iso_bsize, 0, &bp)) != 0)
                        goto out;

                vdp = (struct iso_volume_descriptor *)bp->b_data;
                if (memcmp(vdp->id, ISO_STANDARD_ID, sizeof(vdp->id)) != 0) {
                        error = EINVAL;
                        goto out;
                }

                switch (isonum_711(vdp->type)) {
                case ISO_VD_PRIMARY:
                        if (pribp == NULL) {
                                pribp = bp;
                                bp = NULL;
                        }
                        break;

                case ISO_VD_SUPPLEMENTARY:
                        if (supbp == NULL) {
                                supbp = bp;
                                bp = NULL;
                        }
                        break;

                default:
                        break;
                }

                if (isonum_711 (vdp->type) == ISO_VD_END) {
                        brelse(bp, 0);
                        bp = NULL;
                        break;
                }

                if (bp != NULL) {
                        brelse(bp, 0);
                        bp = NULL;
                }
        }

        if (pribp == NULL) {
                error = EINVAL;
                goto out;
        }

        isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK);
        memset(isomp, 0, sizeof *isomp);
        if (iso_makemp(isomp, pribp, &ext_attr_length) == -1) {
                error = EINVAL;
                goto out;
        }

        isomp->volume_space_size += sess;

        brelse(pribp, BC_AGE);
        pribp = NULL;

        mp->mnt_data = isomp;
        mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
        mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_CD9660);
        mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
        mp->mnt_stat.f_namemax = ISO_MAXNAMLEN;
        mp->mnt_flag |= MNT_LOCAL;
        mp->mnt_iflag |= IMNT_MPSAFE | IMNT_SHRLOOKUP;
        mp->mnt_dev_bshift = iso_bsize;
        mp->mnt_fs_bshift = isomp->im_bshift;
        isomp->im_mountp = mp;
        isomp->im_dev = dev;
        isomp->im_devvp = devvp;

        /* Check the Rock Ridge Extension support */
        if (!(argp->flags & ISOFSMNT_NORRIP)) {
                struct iso_directory_record *rootp;

                if ((error = bread(isomp->im_devvp,
                                   (isomp->root_extent + ext_attr_length) <<
                                   (isomp->im_bshift - DEV_BSHIFT),
                                   isomp->logical_block_size,
                                   0, &bp)) != 0)
                    goto out;

                rootp = (struct iso_directory_record *)bp->b_data;

                if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) {
                    argp->flags  |= ISOFSMNT_NORRIP;
                } else {
                    argp->flags  &= ~ISOFSMNT_GENS;
                }

                /*
                 * The contents are valid,
                 * but they will get reread as part of another vnode, so...
                 */
                brelse(bp, BC_AGE);
                bp = NULL;
        }
        isomp->im_flags = argp->flags & (ISOFSMNT_NORRIP | ISOFSMNT_GENS |
                 ISOFSMNT_EXTATT | ISOFSMNT_NOJOLIET | ISOFSMNT_RRCASEINS);

        if (isomp->im_flags & ISOFSMNT_GENS)
                isomp->iso_ftype = ISO_FTYPE_9660;
        else if (isomp->im_flags & ISOFSMNT_NORRIP) {
                isomp->iso_ftype = ISO_FTYPE_DEFAULT;
                if (argp->flags & ISOFSMNT_NOCASETRANS)
                        isomp->im_flags |= ISOFSMNT_NOCASETRANS;
        } else
                isomp->iso_ftype = ISO_FTYPE_RRIP;

        /* Check the Joliet Extension support */
        if ((argp->flags & ISOFSMNT_NORRIP) != 0 &&
            (argp->flags & ISOFSMNT_NOJOLIET) == 0 &&
            supbp != NULL) {
                joliet_level = 0;
                sup = (struct iso_supplementary_descriptor *)supbp->b_data;

                if ((isonum_711(sup->flags) & 1) == 0) {
                        if (memcmp(sup->escape, "%/@", 3) == 0)
                                joliet_level = 1;
                        if (memcmp(sup->escape, "%/C", 3) == 0)
                                joliet_level = 2;
                        if (memcmp(sup->escape, "%/E", 3) == 0)
                                joliet_level = 3;
                }
                if (joliet_level != 0) {
                        if (iso_makemp(isomp, supbp, NULL) == -1) {
                                error = EINVAL;
                                goto out;
                        }
                        isomp->im_joliet_level = joliet_level;
                }
        }

        if (supbp != NULL) {
                brelse(supbp, 0);
                supbp = NULL;
        }

        spec_node_setmountedfs(devvp, mp);

        return 0;
out:
        if (bp)
                brelse(bp, 0);
        if (pribp)
                brelse(pribp, 0);
        if (supbp)
                brelse(supbp, 0);
        if (isomp) {
                free(isomp, M_ISOFSMNT);
                mp->mnt_data = NULL;
        }
        return error;
}

/*
 * Make a filesystem operational.
 * Nothing to do at the moment.
 */
/* ARGSUSED */
int
cd9660_start(struct mount *mp, int flags)
{
        return 0;
}

/*
 * unmount system call
 */
int
cd9660_unmount(struct mount *mp, int mntflags)
{
        struct iso_mnt *isomp;
        int error, flags = 0;

        if (mntflags & MNT_FORCE)
                flags |= FORCECLOSE;
        if ((error = vflush(mp, NULLVP, flags)) != 0)
                return (error);

        isomp = VFSTOISOFS(mp);

        if (isomp->im_devvp->v_type != VBAD)
                spec_node_setmountedfs(isomp->im_devvp, NULL);

        vn_lock(isomp->im_devvp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED);
        vput(isomp->im_devvp);
        free(isomp, M_ISOFSMNT);
        mp->mnt_data = NULL;
        mp->mnt_flag &= ~MNT_LOCAL;
        return (error);
}

/*
 * Return root of a filesystem
 */
int
cd9660_root(struct mount *mp, int lktype, struct vnode **vpp)
{
        struct iso_mnt *imp = VFSTOISOFS(mp);
        struct iso_directory_record *dp =
            (struct iso_directory_record *)imp->root;
        ino_t ino = isodirino(dp, imp);

        return cd9660_vget(mp, ino, lktype, vpp);
}

/*
 * Get file system statistics.
 */
int
cd9660_statvfs(struct mount *mp, struct statvfs *sbp)
{
        struct iso_mnt *isomp;

        isomp = VFSTOISOFS(mp);

        sbp->f_bsize = isomp->logical_block_size;
        sbp->f_frsize = sbp->f_bsize;
        sbp->f_iosize = sbp->f_bsize;        /* XXX */
        sbp->f_blocks = isomp->volume_space_size;
        sbp->f_bfree = 0; /* total free blocks */
        sbp->f_bavail = 0; /* blocks free for non superuser */
        sbp->f_bresvd = 0; /* total reserved blocks */
        sbp->f_files =  0; /* total files */
        sbp->f_ffree = 0; /* free file nodes */
        sbp->f_favail = 0; /* free file nodes for non superuser */
        sbp->f_fresvd = 0; /* reserved file nodes */
        copy_statvfs_info(sbp, mp);
        /* Use the first spare for flags: */
        sbp->f_spare[0] = isomp->im_flags;
        return 0;
}

/* ARGSUSED */
int
cd9660_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
{
        return 0;
}

/*
 * File handle to vnode
 *
 * Have to be really careful about stale file handles:
 * - check that the inode number is in range
 * - call iget() to get the locked inode
 * - check for an unallocated inode (i_mode == 0)
 * - check that the generation number matches
 */

struct ifid {
        ushort        ifid_len;
        ushort        ifid_pad;
        ino_t        ifid_ino;
#ifdef        ISOFS_DBG
        u_long        ifid_start;
#endif
};

/* ARGSUSED */
int
cd9660_fhtovp(struct mount *mp, struct fid *fhp, int lktype, struct vnode **vpp)
{
        struct ifid ifh;
        struct iso_node *ip;
        struct vnode *nvp;
        int error;

        if (fhp->fid_len != sizeof(ifh))
                return EINVAL;

        memcpy(&ifh, fhp, sizeof(ifh));
#ifdef        ISOFS_DBG
        printf("fhtovp: ino %"PRIu64", start %lu\n",
            ifh.ifid_ino, ifh.ifid_start);
#endif

        if ((error = VFS_VGET(mp, ifh.ifid_ino, lktype, &nvp)) != 0) {
                *vpp = NULLVP;
                return (error);
        }
        ip = VTOI(nvp);
        if (ip->inode.iso_mode == 0) {
                vput(nvp);
                *vpp = NULLVP;
                return (ESTALE);
        }
        *vpp = nvp;
        return (0);
}

int
cd9660_vget(struct mount *mp, ino_t ino, int lktype, struct vnode **vpp)
{
        int error;

        error = vcache_get(mp, &ino, sizeof(ino), vpp);
        if (error)
                return error;
        error = vn_lock(*vpp, lktype);
        if (error) {
                vrele(*vpp);
                *vpp = NULL;
                return error;
        }
        return 0;
}

int
cd9660_loadvnode(struct mount *mp, struct vnode *vp,
    const void *key, size_t key_len, const void **new_key)
{
        struct iso_mnt *imp;
        struct iso_node *ip;
        struct iso_directory_record *isodir;
        struct buf *bp;
        dev_t dev;
        ino_t ino;
        int lbn, off;
        int error;

        KASSERT(key_len == sizeof(ino));
        memcpy(&ino, key, key_len);
        imp = VFSTOISOFS(mp);
        dev = imp->im_dev;

        ip = pool_get(&cd9660_node_pool, PR_WAITOK);

        memset(ip, 0, sizeof(struct iso_node));
        ip->i_vnode = vp;
        ip->i_dev = dev;
        ip->i_number = ino;
        ip->i_mnt = imp;
        ip->i_devvp = imp->im_devvp;

        lbn = cd9660_lblkno(imp, ino);
        if (lbn >= imp->volume_space_size) {
                pool_put(&cd9660_node_pool, ip);
                printf("fhtovp: lbn exceed volume space %d\n", lbn);
                return (ESTALE);
        }

        off = cd9660_blkoff(imp, ino);
        if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) {
                pool_put(&cd9660_node_pool, ip);
                printf("fhtovp: crosses block boundary %d\n",
                    off + ISO_DIRECTORY_RECORD_SIZE);
                return (ESTALE);
        }

        error = bread(imp->im_devvp,
                      lbn << (imp->im_bshift - DEV_BSHIFT),
                      imp->logical_block_size, 0, &bp);
        if (error) {
                pool_put(&cd9660_node_pool, ip);
                printf("fhtovp: bread error %d\n",error);
                return (error);
        }
        isodir = (struct iso_directory_record *)((char *)bp->b_data + off);

        if (off + isonum_711(isodir->length) > imp->logical_block_size) {
                pool_put(&cd9660_node_pool, ip);
                brelse(bp, 0);
                printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n",
                    off +isonum_711(isodir->length), off,
                    isonum_711(isodir->length));
                return (ESTALE);
        }

#if 0
        if (isonum_733(isodir->extent) +
            isonum_711(isodir->ext_attr_length) != ifhp->ifid_start) {
                pool_put(&cd9660_node_pool, ip);
                if (bp != 0)
                        brelse(bp, 0);
                printf("fhtovp: file start miss %d vs %d\n",
                    isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length),
                    ifhp->ifid_start);
                return (ESTALE);
        }
#endif

        ip->iso_extent = isonum_733(isodir->extent);
        ip->i_size = isonum_733(isodir->size);
        ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent;

        vp->v_tag = VT_ISOFS;
        vp->v_op = cd9660_vnodeop_p;
        vp->v_data = ip;
        genfs_node_init(vp, &cd9660_genfsops);

        /*
         * Setup time stamp, attribute
         */
        switch (imp->iso_ftype) {
        default:        /* ISO_FTYPE_9660 */
            {
                struct buf *bp2;
                if ((imp->im_flags & ISOFSMNT_EXTATT)
                    && (off = isonum_711(isodir->ext_attr_length)))
                        cd9660_blkatoff(vp, (off_t)-(off << imp->im_bshift),
                            NULL, &bp2);
                else
                        bp2 = NULL;
                cd9660_defattr(isodir, ip, bp2);
                cd9660_deftstamp(isodir, ip, bp2);
                if (bp2)
                        brelse(bp2, 0);
                break;
            }
        case ISO_FTYPE_RRIP:
                cd9660_rrip_analyze(isodir, ip, imp);
                break;
        }

        brelse(bp, 0);

        /*
         * Initialize the associated vnode
         */
        switch (vp->v_type = IFTOVT(ip->inode.iso_mode)) {
        case VFIFO:
                vp->v_op = cd9660_fifoop_p;
                break;
        case VCHR:
        case VBLK:
                /*
                 * if device, look at device number table for translation
                 */
                vp->v_op = cd9660_specop_p;
                spec_node_init(vp, ip->inode.iso_rdev);
                break;
        case VLNK:
        case VNON:
        case VSOCK:
        case VDIR:
        case VBAD:
                break;
        case VREG:
                uvm_vnp_setsize(vp, ip->i_size);
                break;
        }

        if (vp->v_type != VREG)
                uvm_vnp_setsize(vp, 0);

        if (ip->iso_extent == imp->root_extent)
                vp->v_vflag |= VV_ROOT;

        /*
         * XXX need generation number?
         */

        *new_key = &ip->i_number;
        return 0;
}

/*
 * Vnode pointer to File handle
 */
/* ARGSUSED */
int
cd9660_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
{
        struct iso_node *ip = VTOI(vp);
        struct ifid ifh;

        if (*fh_size < sizeof(struct ifid)) {
                *fh_size = sizeof(struct ifid);
                return E2BIG;
        }
        *fh_size = sizeof(struct ifid);

        memset(&ifh, 0, sizeof(ifh));
        ifh.ifid_len = sizeof(struct ifid);
        ifh.ifid_ino = ip->i_number;
#ifdef        ISOFS_DBG
        ifh.ifid_start = ip->iso_start;
#endif
        memcpy(fhp, &ifh, sizeof(ifh));

#ifdef        ISOFS_DBG
        printf("vptofh: ino %"PRIu64", start %lu\n",
            ifh.ifid_ino,ifh.ifid_start);
#endif
        return 0;
}













































































































    6 

    6 
    6 
    6 



















































    1 
    1 



































































































































































































































































































































    7 
    7 


    7 






    6 





    1 

    7 





    4 

    4 


    4 
    4 
    4 
    4 











































































    9 



    9 


    9 

    9 























    9 




















    9 
    9 





    1 

    1 





























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
/*        $NetBSD: igmp.c,v 1.70 2020/05/15 06:34:34 maxv Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Internet Group Management Protocol (IGMP) routines.
 *
 * Written by Steve Deering, Stanford, May 1988.
 * Modified by Rosen Sharma, Stanford, Aug 1994.
 * Modified by Bill Fenner, Xerox PARC, Feb 1995.
 *
 * MULTICAST Revision: 1.3
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: igmp.c,v 1.70 2020/05/15 06:34:34 maxv Exp $");

#ifdef _KERNEL_OPT
#include "opt_mrouting.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/systm.h>
#include <sys/cprng.h>
#include <sys/sysctl.h>

#include <net/if.h>
#include <net/net_stats.h>

#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/igmp.h>
#include <netinet/igmp_var.h>

/*
 * Per-interface router version information.
 */
typedef struct router_info {
        LIST_ENTRY(router_info) rti_link;
        ifnet_t *        rti_ifp;
        int                rti_type;        /* type of router on this interface */
        int                rti_age;        /* time since last v1 query */
} router_info_t;

/*
 * The router-info list and the timer flag are protected by in_multilock.
 *
 * Lock order:
 *
 *        softnet_lock ->
 *                in_multilock
 */
static struct pool        igmp_rti_pool                __cacheline_aligned;
static LIST_HEAD(, router_info)        rti_head        __cacheline_aligned;
static int                igmp_timers_on                __cacheline_aligned;
static percpu_t *        igmpstat_percpu                __read_mostly;

#define        IGMP_STATINC(x)                _NET_STATINC(igmpstat_percpu, x)

static void                igmp_sendpkt(struct in_multi *, int);
static int                rti_fill(struct in_multi *);
static router_info_t *        rti_find(struct ifnet *);
static void                rti_delete(struct ifnet *);
static void                sysctl_net_inet_igmp_setup(struct sysctllog **);

/*
 * rti_fill: associate router information with the given multicast group;
 * if there is no router information for the interface, then create it.
 */
static int
rti_fill(struct in_multi *inm)
{
        router_info_t *rti;

        KASSERT(in_multi_lock_held());

        LIST_FOREACH(rti, &rti_head, rti_link) {
                if (rti->rti_ifp == inm->inm_ifp) {
                        inm->inm_rti = rti;
                        return rti->rti_type == IGMP_v1_ROUTER ?
                            IGMP_v1_HOST_MEMBERSHIP_REPORT :
                            IGMP_v2_HOST_MEMBERSHIP_REPORT;
                }
        }
        rti = pool_get(&igmp_rti_pool, PR_NOWAIT);
        if (rti == NULL) {
                return 0;
        }
        rti->rti_ifp = inm->inm_ifp;
        rti->rti_type = IGMP_v2_ROUTER;
        LIST_INSERT_HEAD(&rti_head, rti, rti_link);
        inm->inm_rti = rti;
        return IGMP_v2_HOST_MEMBERSHIP_REPORT;
}

/*
 * rti_find: lookup or create router information for the given interface.
 */
static router_info_t *
rti_find(ifnet_t *ifp)
{
        router_info_t *rti;

        KASSERT(in_multi_lock_held());

        LIST_FOREACH(rti, &rti_head, rti_link) {
                if (rti->rti_ifp == ifp)
                        return rti;
        }
        rti = pool_get(&igmp_rti_pool, PR_NOWAIT);
        if (rti == NULL) {
                return NULL;
        }
        rti->rti_ifp = ifp;
        rti->rti_type = IGMP_v2_ROUTER;
        LIST_INSERT_HEAD(&rti_head, rti, rti_link);
        return rti;
}

/*
 * rti_delete: remove and free the router information entry for the
 * given interface.
 */
static void
rti_delete(ifnet_t *ifp)
{
        router_info_t *rti;

        KASSERT(in_multi_lock_held());

        LIST_FOREACH(rti, &rti_head, rti_link) {
                if (rti->rti_ifp == ifp) {
                        LIST_REMOVE(rti, rti_link);
                        pool_put(&igmp_rti_pool, rti);
                        break;
                }
        }
}

void
igmp_init(void)
{
        pool_init(&igmp_rti_pool, sizeof(router_info_t), 0, 0, 0,
            "igmppl", NULL, IPL_SOFTNET);
        igmpstat_percpu = percpu_alloc(sizeof(uint64_t) * IGMP_NSTATS);
        sysctl_net_inet_igmp_setup(NULL);
        LIST_INIT(&rti_head);
}

void
igmp_input(struct mbuf *m, int off, int proto)
{
        ifnet_t *ifp;
        struct ip *ip = mtod(m, struct ip *);
        struct igmp *igmp;
        u_int minlen, timer;
        struct in_multi *inm;
        struct in_ifaddr *ia;
        int ip_len, iphlen;
        struct psref psref;

        iphlen = off;

        IGMP_STATINC(IGMP_STAT_RCV_TOTAL);

        /*
         * Validate lengths
         */
        minlen = iphlen + IGMP_MINLEN;
        ip_len = ntohs(ip->ip_len);
        if (ip_len < minlen) {
                IGMP_STATINC(IGMP_STAT_RCV_TOOSHORT);
                m_freem(m);
                return;
        }
        if (((m->m_flags & M_EXT) && (ip->ip_src.s_addr & IN_CLASSA_NET) == 0)
            || m->m_len < minlen) {
                if ((m = m_pullup(m, minlen)) == NULL) {
                        IGMP_STATINC(IGMP_STAT_RCV_TOOSHORT);
                        return;
                }
                ip = mtod(m, struct ip *);
        }

        /*
         * Validate checksum
         */
        m->m_data += iphlen;
        m->m_len -= iphlen;
        igmp = mtod(m, struct igmp *);
        /* No need to assert alignment here. */
        if (in_cksum(m, ip_len - iphlen)) {
                IGMP_STATINC(IGMP_STAT_RCV_BADSUM);
                m_freem(m);
                return;
        }
        m->m_data -= iphlen;
        m->m_len += iphlen;

        ifp = m_get_rcvif_psref(m, &psref);
        if (__predict_false(ifp == NULL))
                goto drop;

        switch (igmp->igmp_type) {

        case IGMP_HOST_MEMBERSHIP_QUERY:
                IGMP_STATINC(IGMP_STAT_RCV_QUERIES);

                if (ifp->if_flags & IFF_LOOPBACK)
                        break;

                if (igmp->igmp_code == 0) {
                        struct in_multistep step;
                        router_info_t *rti;

                        if (ip->ip_dst.s_addr != INADDR_ALLHOSTS_GROUP) {
                                IGMP_STATINC(IGMP_STAT_RCV_BADQUERIES);
                                goto drop;
                        }

                        in_multi_lock(RW_WRITER);
                        rti = rti_find(ifp);
                        if (rti == NULL) {
                                in_multi_unlock();
                                break;
                        }
                        rti->rti_type = IGMP_v1_ROUTER;
                        rti->rti_age = 0;

                        /*
                         * Start the timers in all of our membership records
                         * for the interface on which the query arrived,
                         * except those that are already running and those
                         * that belong to a "local" group (224.0.0.X).
                         */

                        inm = in_first_multi(&step);
                        while (inm != NULL) {
                                if (inm->inm_ifp == ifp &&
                                    inm->inm_timer == 0 &&
                                    !IN_LOCAL_GROUP(inm->inm_addr.s_addr)) {
                                        inm->inm_state = IGMP_DELAYING_MEMBER;
                                        inm->inm_timer = IGMP_RANDOM_DELAY(
                                            IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ);
                                        igmp_timers_on = true;
                                }
                                inm = in_next_multi(&step);
                        }
                        in_multi_unlock();
                } else {
                        struct in_multistep step;

                        if (!IN_MULTICAST(ip->ip_dst.s_addr)) {
                                IGMP_STATINC(IGMP_STAT_RCV_BADQUERIES);
                                goto drop;
                        }

                        timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE;
                        if (timer == 0)
                                timer = 1;

                        /*
                         * Start the timers in all of our membership records
                         * for the interface on which the query arrived,
                         * except those that are already running and those
                         * that belong to a "local" group (224.0.0.X).  For
                         * timers already running, check if they need to be
                         * reset.
                         */
                        in_multi_lock(RW_WRITER);
                        inm = in_first_multi(&step);
                        while (inm != NULL) {
                                if (inm->inm_ifp == ifp &&
                                    !IN_LOCAL_GROUP(inm->inm_addr.s_addr) &&
                                    (ip->ip_dst.s_addr == INADDR_ALLHOSTS_GROUP ||
                                     in_hosteq(ip->ip_dst, inm->inm_addr))) {
                                        switch (inm->inm_state) {
                                        case IGMP_DELAYING_MEMBER:
                                                if (inm->inm_timer <= timer)
                                                        break;
                                                /* FALLTHROUGH */
                                        case IGMP_IDLE_MEMBER:
                                        case IGMP_LAZY_MEMBER:
                                        case IGMP_AWAKENING_MEMBER:
                                                inm->inm_state =
                                                    IGMP_DELAYING_MEMBER;
                                                inm->inm_timer =
                                                    IGMP_RANDOM_DELAY(timer);
                                                igmp_timers_on = true;
                                                break;
                                        case IGMP_SLEEPING_MEMBER:
                                                inm->inm_state =
                                                    IGMP_AWAKENING_MEMBER;
                                                break;
                                        }
                                }
                                inm = in_next_multi(&step);
                        }
                        in_multi_unlock();
                }

                break;

        case IGMP_v1_HOST_MEMBERSHIP_REPORT:
                IGMP_STATINC(IGMP_STAT_RCV_REPORTS);

                if (ifp->if_flags & IFF_LOOPBACK)
                        break;

                if (!IN_MULTICAST(igmp->igmp_group.s_addr) ||
                    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
                        IGMP_STATINC(IGMP_STAT_RCV_BADREPORTS);
                        goto drop;
                }

                /*
                 * KLUDGE: if the IP source address of the report has an
                 * unspecified (i.e., zero) subnet number, as is allowed for
                 * a booting host, replace it with the correct subnet number
                 * so that a process-level multicast routing daemon can
                 * determine which subnet it arrived from.  This is necessary
                 * to compensate for the lack of any way for a process to
                 * determine the arrival interface of an incoming packet.
                 */
                if ((ip->ip_src.s_addr & IN_CLASSA_NET) == 0) {
                        int s = pserialize_read_enter();
                        ia = in_get_ia_from_ifp(ifp); /* XXX */
                        if (ia)
                                ip->ip_src.s_addr = ia->ia_subnet;
                        pserialize_read_exit(s);
                }

                /*
                 * If we belong to the group being reported, stop
                 * our timer for that group.
                 */
                in_multi_lock(RW_WRITER);
                inm = in_lookup_multi(igmp->igmp_group, ifp);
                if (inm != NULL) {
                        inm->inm_timer = 0;
                        IGMP_STATINC(IGMP_STAT_RCV_OURREPORTS);

                        switch (inm->inm_state) {
                        case IGMP_IDLE_MEMBER:
                        case IGMP_LAZY_MEMBER:
                        case IGMP_AWAKENING_MEMBER:
                        case IGMP_SLEEPING_MEMBER:
                                inm->inm_state = IGMP_SLEEPING_MEMBER;
                                break;
                        case IGMP_DELAYING_MEMBER:
                                if (inm->inm_rti->rti_type == IGMP_v1_ROUTER)
                                        inm->inm_state = IGMP_LAZY_MEMBER;
                                else
                                        inm->inm_state = IGMP_SLEEPING_MEMBER;
                                break;
                        }
                }
                in_multi_unlock();
                break;

        case IGMP_v2_HOST_MEMBERSHIP_REPORT: {
                int s = pserialize_read_enter();
#ifdef MROUTING
                /*
                 * Make sure we don't hear our own membership report.  Fast
                 * leave requires knowing that we are the only member of a
                 * group.
                 */
                ia = in_get_ia_from_ifp(ifp);        /* XXX */
                if (ia && in_hosteq(ip->ip_src, ia->ia_addr.sin_addr)) {
                        pserialize_read_exit(s);
                        break;
                }
#endif

                IGMP_STATINC(IGMP_STAT_RCV_REPORTS);

                if (ifp->if_flags & IFF_LOOPBACK) {
                        pserialize_read_exit(s);
                        break;
                }

                if (!IN_MULTICAST(igmp->igmp_group.s_addr) ||
                    !in_hosteq(igmp->igmp_group, ip->ip_dst)) {
                        IGMP_STATINC(IGMP_STAT_RCV_BADREPORTS);
                        pserialize_read_exit(s);
                        goto drop;
                }

                /*
                 * KLUDGE: if the IP source address of the report has an
                 * unspecified (i.e., zero) subnet number, as is allowed for
                 * a booting host, replace it with the correct subnet number
                 * so that a process-level multicast routing daemon can
                 * determine which subnet it arrived from.  This is necessary
                 * to compensate for the lack of any way for a process to
                 * determine the arrival interface of an incoming packet.
                 */
                if ((ip->ip_src.s_addr & IN_CLASSA_NET) == 0) {
#ifndef MROUTING
                        ia = in_get_ia_from_ifp(ifp); /* XXX */
#endif
                        if (ia)
                                ip->ip_src.s_addr = ia->ia_subnet;
                }
                pserialize_read_exit(s);

                /*
                 * If we belong to the group being reported, stop
                 * our timer for that group.
                 */
                in_multi_lock(RW_WRITER);
                inm = in_lookup_multi(igmp->igmp_group, ifp);
                if (inm != NULL) {
                        inm->inm_timer = 0;
                        IGMP_STATINC(IGMP_STAT_RCV_OURREPORTS);

                        switch (inm->inm_state) {
                        case IGMP_DELAYING_MEMBER:
                        case IGMP_IDLE_MEMBER:
                        case IGMP_AWAKENING_MEMBER:
                                inm->inm_state = IGMP_LAZY_MEMBER;
                                break;
                        case IGMP_LAZY_MEMBER:
                        case IGMP_SLEEPING_MEMBER:
                                break;
                        }
                }
                in_multi_unlock();
                break;
            }
        }
        m_put_rcvif_psref(ifp, &psref);

        /*
         * Pass all valid IGMP packets up to any process(es) listening
         * on a raw IGMP socket.
         */
        /*
         * Currently, igmp_input() is always called holding softnet_lock
         * by ipintr()(!NET_MPSAFE) or PR_INPUT_WRAP()(NET_MPSAFE).
         */
        KASSERT(mutex_owned(softnet_lock));
        rip_input(m, iphlen, proto);
        return;

drop:
        m_put_rcvif_psref(ifp, &psref);
        m_freem(m);
        return;
}

int
igmp_joingroup(struct in_multi *inm)
{
        KASSERT(in_multi_lock_held());
        inm->inm_state = IGMP_IDLE_MEMBER;

        if (!IN_LOCAL_GROUP(inm->inm_addr.s_addr) &&
            (inm->inm_ifp->if_flags & IFF_LOOPBACK) == 0) {
                int report_type;

                report_type = rti_fill(inm);
                if (report_type == 0) {
                        return ENOMEM;
                }
                igmp_sendpkt(inm, report_type);
                inm->inm_state = IGMP_DELAYING_MEMBER;
                inm->inm_timer = IGMP_RANDOM_DELAY(
                    IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ);
                igmp_timers_on = true;
        } else
                inm->inm_timer = 0;

        return 0;
}

void
igmp_leavegroup(struct in_multi *inm)
{
        KASSERT(in_multi_lock_held());

        switch (inm->inm_state) {
        case IGMP_DELAYING_MEMBER:
        case IGMP_IDLE_MEMBER:
                if (!IN_LOCAL_GROUP(inm->inm_addr.s_addr) &&
                    (inm->inm_ifp->if_flags & IFF_LOOPBACK) == 0)
                        if (inm->inm_rti->rti_type != IGMP_v1_ROUTER)
                                igmp_sendpkt(inm, IGMP_HOST_LEAVE_MESSAGE);
                break;
        case IGMP_LAZY_MEMBER:
        case IGMP_AWAKENING_MEMBER:
        case IGMP_SLEEPING_MEMBER:
                break;
        }
}

void
igmp_fasttimo(void)
{
        struct in_multi *inm;
        struct in_multistep step;

        /*
         * Quick check to see if any work needs to be done, in order
         * to minimize the overhead of fasttimo processing.
         */
        if (!igmp_timers_on) {
                return;
        }

        /* XXX: Needed for ip_output(). */
        SOFTNET_LOCK_UNLESS_NET_MPSAFE();

        in_multi_lock(RW_WRITER);
        igmp_timers_on = false;
        inm = in_first_multi(&step);
        while (inm != NULL) {
                if (inm->inm_timer == 0) {
                        /* do nothing */
                } else if (--inm->inm_timer == 0) {
                        if (inm->inm_state == IGMP_DELAYING_MEMBER) {
                                if (inm->inm_rti->rti_type == IGMP_v1_ROUTER)
                                        igmp_sendpkt(inm,
                                            IGMP_v1_HOST_MEMBERSHIP_REPORT);
                                else
                                        igmp_sendpkt(inm,
                                            IGMP_v2_HOST_MEMBERSHIP_REPORT);
                                inm->inm_state = IGMP_IDLE_MEMBER;
                        }
                } else {
                        igmp_timers_on = true;
                }
                inm = in_next_multi(&step);
        }
        in_multi_unlock();
        SOFTNET_UNLOCK_UNLESS_NET_MPSAFE();
}

void
igmp_slowtimo(void)
{
        router_info_t *rti;

        in_multi_lock(RW_WRITER);
        LIST_FOREACH(rti, &rti_head, rti_link) {
                if (rti->rti_type == IGMP_v1_ROUTER &&
                    ++rti->rti_age >= IGMP_AGE_THRESHOLD) {
                        rti->rti_type = IGMP_v2_ROUTER;
                }
        }
        in_multi_unlock();
}

/*
 * igmp_sendpkt: construct an IGMP packet, given the multicast structure
 * and the type, and send the datagram.
 */
static void
igmp_sendpkt(struct in_multi *inm, int type)
{
        struct mbuf *m;
        struct igmp *igmp;
        struct ip *ip;
        struct ip_moptions imo;

        KASSERT(in_multi_lock_held());

        MGETHDR(m, M_DONTWAIT, MT_HEADER);
        if (m == NULL)
                return;
        KASSERT(max_linkhdr + sizeof(struct ip) + IGMP_MINLEN <= MHLEN);

        m->m_data += max_linkhdr;
        m->m_len = sizeof(struct ip) + IGMP_MINLEN;
        m->m_pkthdr.len = sizeof(struct ip) + IGMP_MINLEN;

        ip = mtod(m, struct ip *);
        ip->ip_tos = 0;
        ip->ip_len = htons(sizeof(struct ip) + IGMP_MINLEN);
        ip->ip_off = htons(0);
        ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
        ip->ip_p = IPPROTO_IGMP;
        ip->ip_src = zeroin_addr;
        ip->ip_dst = inm->inm_addr;

        m->m_data += sizeof(struct ip);
        m->m_len -= sizeof(struct ip);
        igmp = mtod(m, struct igmp *);
        igmp->igmp_type = type;
        igmp->igmp_code = 0;
        igmp->igmp_group = inm->inm_addr;
        igmp->igmp_cksum = 0;
        igmp->igmp_cksum = in_cksum(m, IGMP_MINLEN);
        m->m_data -= sizeof(struct ip);
        m->m_len += sizeof(struct ip);

        imo.imo_multicast_if_index = if_get_index(inm->inm_ifp);
        imo.imo_multicast_ttl = 1;

        /*
         * Request loopback of the report if we are acting as a multicast
         * router, so that the process-level routing demon can hear it.
         */
#ifdef MROUTING
        extern struct socket *ip_mrouter;
        imo.imo_multicast_loop = (ip_mrouter != NULL);
#else
        imo.imo_multicast_loop = 0;
#endif

        /*
         * Note: IP_IGMP_MCAST indicates that in_multilock is held.
         * The caller must still acquire softnet_lock for ip_output().
         */
#ifndef NET_MPSAFE
        KASSERT(mutex_owned(softnet_lock));
#endif
        ip_output(m, NULL, NULL, IP_IGMP_MCAST, &imo, NULL);
        IGMP_STATINC(IGMP_STAT_SND_REPORTS);
}

void
igmp_purgeif(ifnet_t *ifp)
{
        in_multi_lock(RW_WRITER);
        rti_delete(ifp);
        in_multi_unlock();
}

static int
sysctl_net_inet_igmp_stats(SYSCTLFN_ARGS)
{
        return NETSTAT_SYSCTL(igmpstat_percpu, IGMP_NSTATS);
}

static void
sysctl_net_inet_igmp_setup(struct sysctllog **clog)
{
        sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT,
                        CTLTYPE_NODE, "inet", NULL,
                        NULL, 0, NULL, 0,
                        CTL_NET, PF_INET, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT,
                        CTLTYPE_NODE, "igmp",
                        SYSCTL_DESCR("Internet Group Management Protocol"),
                        NULL, 0, NULL, 0,
                        CTL_NET, PF_INET, IPPROTO_IGMP, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                        CTLFLAG_PERMANENT,
                        CTLTYPE_STRUCT, "stats",
                        SYSCTL_DESCR("IGMP statistics"),
                        sysctl_net_inet_igmp_stats, 0, NULL, 0,
                        CTL_NET, PF_INET, IPPROTO_IGMP, CTL_CREATE, CTL_EOL);
}
































































































































































































   42 











   42 
   41 

   41 



    4 


   41 
   41 
   40 




   37 

   37 










   37 




   36 



   36 
   36 
   35 




   35 










   36 



   36 











































































   36 








   36 
   36 
   36 















   36 
    3 
   36 





   36 


















   36 































   36 


   27 










   36 



    9 
   35 
   35 
   36 


   36 


    7 














   57 













   57 


   57 

   56 








   57 

   56 







   56 
   10 
    5 
















    5 




    4 


    1 

    3 



    1 

    1 
    1 

    5 







   51 
    8 







   44 





















   42 



   42 


   41 
    5 



   37 


   56 

    6 















   50 


   50 
   50 




















   50 












































   50 


   48 



   48 











   49 









   49 

    9 
   49 


















   13 

    5 
    5 








   52 
    2 




   50 




   42 



















   32 











   16 





   42 

   51 












   40 

   28 
   32 

   13 







   14 


   14 

    5 
    6 











   19 











    1 



   17 



   19 







   19 



   19 



   19 



























   36 

































































































































































































   36 



































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
/*        $NetBSD: in6_src.c,v 1.88 2021/08/10 06:29:56 kardel Exp $        */
/*        $KAME: in6_src.c,v 1.159 2005/10/19 01:40:32 t-momose Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)in_pcb.c        8.2 (Berkeley) 1/4/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: in6_src.c,v 1.88 2021/08/10 06:29:56 kardel Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/ioctl.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/kauth.h>

#include <net/if.h>
#include <net/if_types.h>
#include <net/route.h>

#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/portalgo.h>
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6_var.h>
#include <netinet6/ip6_private.h>
#include <netinet6/nd6.h>
#include <netinet6/scope6_var.h>

#ifdef MIP6
#include <netinet6/mip6.h>
#include <netinet6/mip6_var.h>
#include "mip.h"
#if NMIP > 0
#include <net/if_mip.h>
#endif /* NMIP > 0 */
#endif /* MIP6 */

#include <netinet/tcp_vtw.h>

#define ADDR_LABEL_NOTAPP (-1)
struct in6_addrpolicy defaultaddrpolicy;

int ip6_prefer_tempaddr = 0;

static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
        struct ip6_moptions *, struct route *, struct ifnet **, struct psref *);

static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *);

static void init_policy_queue(void);
static int add_addrsel_policyent(struct in6_addrpolicy *);
static int delete_addrsel_policyent(struct in6_addrpolicy *);
static int walk_addrsel_policy(int (*)(struct in6_addrpolicy *, void *),
                                    void *);
static int dump_addrsel_policyent(struct in6_addrpolicy *, void *);
static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);

#define        IFA6_IS_VALIDATED(ia) \
        (((ia)->ia6_flags & (IN6_IFF_TENTATIVE | IN6_IFF_DETACHED)) == 0)

/*
 * Return an IPv6 address, which is the most appropriate for a given
 * destination and user specified options.
 * If necessary, this function lookups the routing table and returns
 * an entry to the caller for later use.
 */
#if 0                                /* disabled ad-hoc */
#define REPLACE(r) do {\
        char _buf1[INET6_ADDRSTRLEN], _buf2[INET6_ADDRSTRLEN]; \
        if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
                sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
                ip6stat.ip6s_sources_rule[(r)]++; \
        printf("%s: replace %s with %s by %d\n", __func__, ia_best ? \
            IN6_PRINT(_buf1, &ia_best->ia_addr.sin6_addr) : "none", \
            IN6_PRINT(_buf2, &ia->ia_addr.sin6_addr), (r)); \
        goto replace; \
} while(/*CONSTCOND*/0)
#define NEXT(r) do {\
        if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
                sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
                ip6stat.ip6s_sources_rule[(r)]++; \
        printf("%s: keep %s against %s by %d\n", ia_best ? \
            IN6_PRINT(_buf1, &ia_best->ia_addr.sin6_addr) : "none", \
            IN6_PRINT(_buf2, &ia->ia_addr.sin6_addr), (r)); \
        goto next;                 /* XXX: we can't use 'continue' here */ \
} while(/*CONSTCOND*/0)
#define BREAK(r) do { \
        if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \
                sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \
                ip6stat.ip6s_sources_rule[(r)]++; \
        goto out;                 /* XXX: we can't use 'break' here */ \
} while(/*CONSTCOND*/0)
#else
#define REPLACE(r) goto replace
#define NEXT(r) goto next
#define BREAK(r) goto out
#endif

/*
 * Called inside pserialize critical section. Don't sleep/block.
 */
static struct in6_ifaddr *
in6_select_best_ia(struct sockaddr_in6 *dstsock, struct in6_addr *dst,
    const struct ifnet *ifp, const struct ip6_pktopts *opts,
    const u_int32_t odstzone)
{
        struct in6_ifaddr *ia, *ia_best = NULL;
        int dst_scope = -1, best_scope = -1, best_matchlen = -1;
        struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL;

        IN6_ADDRLIST_READER_FOREACH(ia) {
                int new_scope = -1, new_matchlen = -1;
                struct in6_addrpolicy *new_policy = NULL;
                u_int32_t srczone, osrczone, dstzone;
                struct in6_addr src;
                struct ifnet *ifp1 = ia->ia_ifp;
                int prefer_tempaddr;

                /*
                 * We'll never take an address that breaks the scope zone
                 * of the destination.  We also skip an address if its zone
                 * does not contain the outgoing interface.
                 * XXX: we should probably use sin6_scope_id here.
                 */
                if (in6_setscope(dst, ifp1, &dstzone) ||
                    odstzone != dstzone) {
                        continue;
                }
                src = ia->ia_addr.sin6_addr;

                /* Skip the scope test in impossible cases */
                if (!(ifp->if_flags & IFF_LOOPBACK) &&
                    IN6_IS_ADDR_LOOPBACK(&src))
                        continue;

                if (in6_setscope(&src, ifp, &osrczone) ||
                    in6_setscope(&src, ifp1, &srczone) ||
                    osrczone != srczone) {
                        continue;
                }

                /* avoid unusable addresses */
                if ((ia->ia6_flags & (IN6_IFF_DUPLICATED | IN6_IFF_ANYCAST)))
                        continue;
                if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia))
                        continue;

#if defined(MIP6) && NMIP > 0
                /* avoid unusable home addresses. */
                if ((ia->ia6_flags & IN6_IFF_HOME) &&
                    !mip6_ifa6_is_addr_valid_hoa(ia))
                        continue;
#endif /* MIP6 && NMIP > 0 */

                /* Rule 1: Prefer same address */
                if (IN6_ARE_ADDR_EQUAL(dst, &ia->ia_addr.sin6_addr)) {
                        ia_best = ia;
                        BREAK(1); /* there should be no better candidate */
                }

                if (ia_best == NULL)
                        REPLACE(1);

                /* Rule 2: Prefer appropriate scope */
                if (dst_scope < 0)
                        dst_scope = in6_addrscope(dst);
                new_scope = in6_addrscope(&ia->ia_addr.sin6_addr);
                if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) {
                        if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0)
                                REPLACE(2);
                        NEXT(2);
                } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) {
                        if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0)
                                NEXT(2);
                        REPLACE(2);
                }

                /*
                 * Rule 3: Avoid deprecated addresses.  Note that the case of
                 * !ip6_use_deprecated is already rejected above.
                 * Treat unvalidated addresses as deprecated here.
                 */
                if (IFA6_IS_VALIDATED(ia_best) && !IFA6_IS_VALIDATED(ia))
                        NEXT(3);
                if (!IFA6_IS_VALIDATED(ia_best) && IFA6_IS_VALIDATED(ia))
                        REPLACE(3);
                if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia))
                        NEXT(3);
                if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia))
                        REPLACE(3);

                /* Rule 4: Prefer home addresses */
#if defined(MIP6) && NMIP > 0
                if (!MIP6_IS_MN)
                        goto skip_rule4;

                if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 &&
                    (ia->ia6_flags & IN6_IFF_HOME) == 0) {
                        /* both address are not home addresses. */
                        goto skip_rule4;
                }

                /*
                 * If SA is simultaneously a home address and care-of
                 * address and SB is not, then prefer SA. Similarly,
                 * if SB is simultaneously a home address and care-of
                 * address and SA is not, then prefer SB.
                 */
                if (((ia_best->ia6_flags & IN6_IFF_HOME) != 0 &&
                        ia_best->ia_ifp->if_type != IFT_MIP)
                    &&
                    ((ia->ia6_flags & IN6_IFF_HOME) != 0 &&
                        ia->ia_ifp->if_type == IFT_MIP))
                        NEXT(4);
                if (((ia_best->ia6_flags & IN6_IFF_HOME) != 0 &&
                        ia_best->ia_ifp->if_type == IFT_MIP)
                    &&
                    ((ia->ia6_flags & IN6_IFF_HOME) != 0 &&
                        ia->ia_ifp->if_type != IFT_MIP))
                        REPLACE(4);
                if (ip6po_usecoa == 0) {
                        /*
                         * If SA is just a home address and SB is just
                         * a care-of address, then prefer
                         * SA. Similarly, if SB is just a home address
                         * and SA is just a care-of address, then
                         * prefer SB.
                         */
                        if ((ia_best->ia6_flags & IN6_IFF_HOME) != 0 &&
                            (ia->ia6_flags & IN6_IFF_HOME) == 0) {
                                NEXT(4);
                        }
                        if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 &&
                            (ia->ia6_flags & IN6_IFF_HOME) != 0) {
                                REPLACE(4);
                        }
                } else {
                        /*
                         * a sender don't want to use a home address
                         * because:
                         *
                         * 1) we cannot use.  (ex. NS or NA to global
                         * addresses.)
                         *
                         * 2) a user specified not to use.
                         * (ex. mip6control -u)
                         */
                        if ((ia_best->ia6_flags & IN6_IFF_HOME) == 0 &&
                            (ia->ia6_flags & IN6_IFF_HOME) != 0) {
                                /* XXX breaks stat */
                                NEXT(0);
                        }
                        if ((ia_best->ia6_flags & IN6_IFF_HOME) != 0 &&
                            (ia->ia6_flags & IN6_IFF_HOME) == 0) {
                                /* XXX breaks stat */
                                REPLACE(0);
                        }
                }
        skip_rule4:
#endif /* MIP6 && NMIP > 0 */

                /* Rule 5: Prefer outgoing interface */
                if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
                        NEXT(5);
                if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
                        REPLACE(5);

                /*
                 * Rule 6: Prefer matching label
                 * Note that best_policy should be non-NULL here.
                 */
                if (dst_policy == NULL)
                        dst_policy = lookup_addrsel_policy(dstsock);
                if (dst_policy->label != ADDR_LABEL_NOTAPP) {
                        new_policy = lookup_addrsel_policy(&ia->ia_addr);
                        if (dst_policy->label == best_policy->label &&
                            dst_policy->label != new_policy->label)
                                NEXT(6);
                        if (dst_policy->label != best_policy->label &&
                            dst_policy->label == new_policy->label)
                                REPLACE(6);
                }

                /*
                 * Rule 7: Prefer public addresses.
                 * We allow users to reverse the logic by configuring
                 * a sysctl variable, so that privacy conscious users can
                 * always prefer temporary addresses.
                 */
                if (opts == NULL ||
                    opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) {
                        prefer_tempaddr = ip6_prefer_tempaddr;
                } else if (opts->ip6po_prefer_tempaddr ==
                    IP6PO_TEMPADDR_NOTPREFER) {
                        prefer_tempaddr = 0;
                } else
                        prefer_tempaddr = 1;
                if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
                    (ia->ia6_flags & IN6_IFF_TEMPORARY)) {
                        if (prefer_tempaddr)
                                REPLACE(7);
                        else
                                NEXT(7);
                }
                if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
                    !(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
                        if (prefer_tempaddr)
                                NEXT(7);
                        else
                                REPLACE(7);
                }

                /*
                 * Rule 8: prefer addresses on alive interfaces.
                 * This is a KAME specific rule.
                 */
                if ((ia_best->ia_ifp->if_flags & IFF_UP) &&
                    !(ia->ia_ifp->if_flags & IFF_UP))
                        NEXT(8);
                if (!(ia_best->ia_ifp->if_flags & IFF_UP) &&
                    (ia->ia_ifp->if_flags & IFF_UP))
                        REPLACE(8);

                /*
                 * Rule 9: prefer addresses on "preferred" interfaces.
                 * This is a KAME specific rule.
                 */
#ifdef notyet                        /* until introducing address selection */
#define NDI_BEST ND_IFINFO(ia_best->ia_ifp)
#define NDI_NEW  ND_IFINFO(ia->ia_ifp)
                if ((NDI_BEST->flags & ND6_IFF_PREFER_SOURCE) &&
                    !(NDI_NEW->flags & ND6_IFF_PREFER_SOURCE))
                        NEXT(9);
                if (!(NDI_BEST->flags & ND6_IFF_PREFER_SOURCE) &&
                    (NDI_NEW->flags & ND6_IFF_PREFER_SOURCE))
                        REPLACE(9);
#undef NDI_BEST
#undef NDI_NEW
#endif

                /*
                 * Rule 14: Use longest matching prefix.
                 * Note: in the address selection draft, this rule is
                 * documented as "Rule 8".  However, since it is also
                 * documented that this rule can be overridden, we assign
                 * a large number so that it is easy to assign smaller numbers
                 * to more preferred rules.
                 */
                new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, dst);
                if (best_matchlen < new_matchlen)
                        REPLACE(14);
                if (new_matchlen < best_matchlen)
                        NEXT(14);

                /* Rule 15 is reserved. */

                /*
                 * Last resort: just keep the current candidate.
                 * Or, do we need more rules?
                 */
                continue;

          replace:
                ia_best = ia;
                best_scope = (new_scope >= 0 ? new_scope :
                              in6_addrscope(&ia_best->ia_addr.sin6_addr));
                best_policy = (new_policy ? new_policy :
                               lookup_addrsel_policy(&ia_best->ia_addr));
                best_matchlen = (new_matchlen >= 0 ? new_matchlen :
                                 in6_matchlen(&ia_best->ia_addr.sin6_addr,
                                              dst));

          next:
                continue;

          out:
                break;
        }

        return ia_best;
}
#undef REPLACE
#undef BREAK
#undef NEXT

int
in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
        struct ip6_moptions *mopts, struct route *ro, struct in6_addr *laddr,
        struct ifnet **ifpp, struct psref *psref, struct in6_addr *ret_ia6)
{
        struct in6_addr dst;
        struct ifnet *ifp = NULL;
        struct in6_ifaddr *ia = NULL;
        struct in6_pktinfo *pi = NULL;
        u_int32_t odstzone;
        int error = 0, iferror;
#if defined(MIP6) && NMIP > 0
        u_int8_t ip6po_usecoa = 0;
#endif /* MIP6 && NMIP > 0 */
        struct psref local_psref;
        int bound = curlwp_bind();
#define PSREF (psref == NULL) ? &local_psref : psref
        int s;

        KASSERT((ifpp != NULL && psref != NULL) ||
                (ifpp == NULL && psref == NULL));

        dst = dstsock->sin6_addr; /* make a copy for local operation */
        if (ifpp)
                *ifpp = NULL;

        /*
         * Try to determine the outgoing interface for the given destination.
         * We do this regardless of whether the socket is bound, since the
         * caller may need this information as a side effect of the call
         * to this function (e.g., for identifying the appropriate scope zone
         * ID).
         */
        iferror = in6_selectif(dstsock, opts, mopts, ro, &ifp, PSREF);
        if (ifpp != NULL)
                *ifpp = ifp;

        /*
         * If the source address is explicitly specified by the caller,
         * check if the requested source address is indeed a unicast address
         * assigned to the node, and can be used as the packet's source
         * address.  If everything is okay, use the address as source.
         */
        if (opts && (pi = opts->ip6po_pktinfo) &&
            !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
                struct sockaddr_in6 srcsock;
                struct in6_ifaddr *ia6;
                int _s;
                struct ifaddr *ifa;

                /*
                 * Determine the appropriate zone id of the source based on
                 * the zone of the destination and the outgoing interface.
                 * If the specified address is ambiguous wrt the scope zone,
                 * the interface must be specified; otherwise, ifa_ifwithaddr()
                 * will fail matching the address.
                 */
                memset(&srcsock, 0, sizeof(srcsock));
                srcsock.sin6_family = AF_INET6;
                srcsock.sin6_len = sizeof(srcsock);
                srcsock.sin6_addr = pi->ipi6_addr;
                if (ifp) {
                        error = in6_setscope(&srcsock.sin6_addr, ifp, NULL);
                        if (error != 0)
                                goto exit;
                }

                _s = pserialize_read_enter();
                ifa = ifa_ifwithaddr(sin6tosa(&srcsock));
                if ((ia6 = ifatoia6(ifa)) == NULL ||
                    ia6->ia6_flags &
                    (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY)) {
                        pserialize_read_exit(_s);
                        error = EADDRNOTAVAIL;
                        goto exit;
                }
                pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */
                if (ifpp)
                        *ifpp = ifp;
                *ret_ia6 = ia6->ia_addr.sin6_addr;
                pserialize_read_exit(_s);
                goto exit;
        }

        /*
         * If the socket has already bound the source, just use it.  We don't
         * care at the moment whether in6_selectif() succeeded above, even
         * though it would eventually cause an error.
         */
        if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) {
                *ret_ia6 = *laddr;
                goto exit;
        }

        /*
         * The outgoing interface is crucial in the general selection procedure
         * below.  If it is not known at this point, we fail.
         */
        if (ifp == NULL) {
                error = iferror;
                goto exit;
        }

        /*
         * If the address is not yet determined, choose the best one based on
         * the outgoing interface and the destination address.
         */

#if defined(MIP6) && NMIP > 0
        /*
         * a caller can specify IP6PO_USECOA to not to use a home
         * address.  for example, the case that the neighbour
         * unreachability detection to the global address.
         */
        if (opts != NULL &&
            (opts->ip6po_flags & IP6PO_USECOA) != 0) {
                ip6po_usecoa = 1;
        }
#endif /* MIP6 && NMIP > 0 */

        error = in6_setscope(&dst, ifp, &odstzone);
        if (error != 0)
                goto exit;

        s = pserialize_read_enter();

        ia = in6_select_best_ia(dstsock, &dst, ifp, opts, odstzone);
        if (ia == NULL) {
                pserialize_read_exit(s);
                error = EADDRNOTAVAIL;
                goto exit;
        }
        *ret_ia6 = ia->ia_addr.sin6_addr;

        pserialize_read_exit(s);
exit:
        if (ifpp == NULL)
                if_put(ifp, PSREF);
        curlwp_bindx(bound);
        return error;
#undef PSREF
}

int
in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
    struct route **ro, struct rtentry **retrt, bool count_discard)
{
        int error = 0;
        struct rtentry *rt = NULL;
        union {
                struct sockaddr                dst;
                struct sockaddr_in        dst4;
                struct sockaddr_in6        dst6;
        } u;

        KASSERT(ro != NULL);
        KASSERT(*ro != NULL);
        KASSERT(retrt != NULL);

#if 0
        if (dstsock->sin6_addr.s6_addr32[0] == 0 &&
            dstsock->sin6_addr.s6_addr32[1] == 0 &&
            !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) {
                char ip6buf[INET6_ADDRSTRLEN];
                printf("%s: strange destination %s\n", __func__,
                       IN6_PRINT(ip6buf, &dstsock->sin6_addr));
        } else {
                char ip6buf[INET6_ADDRSTRLEN];
                printf("%s: destination = %s%%%d\n", __func__,
                       IN6_PRINT(ip6buf, &dstsock->sin6_addr),
                       dstsock->sin6_scope_id); /* for debug */
        }
#endif

        /*
         * If the next hop address for the packet is specified by the caller,
         * use it as the gateway.
         */
        if (opts && opts->ip6po_nexthop) {
                struct route *ron;
                struct sockaddr_in6 *sin6_next;

                sin6_next = satosin6(opts->ip6po_nexthop);

                /* at this moment, we only support AF_INET6 next hops */
                if (sin6_next->sin6_family != AF_INET6) {
                        IP6_STATINC(IP6_STAT_ODROPPED);
                        error = EAFNOSUPPORT; /* or should we proceed? */
                        goto done;
                }

                /*
                 * If the next hop is an IPv6 address, then the node identified
                 * by that address must be a neighbor of the sending host.
                 */
                ron = &opts->ip6po_nextroute;
                rt = rtcache_lookup(ron, sin6tosa(sin6_next));
                if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) != 0 ||
                    !nd6_is_addr_neighbor(sin6_next, rt->rt_ifp)) {
                        if (rt != NULL) {
                                if (count_discard)
                                        in6_ifstat_inc(rt->rt_ifp,
                                            ifs6_out_discard);
                                rtcache_unref(rt, ron);
                                rt = NULL;
                        }
                        rtcache_free(ron);
                        error = EHOSTUNREACH;
                        goto done;
                }
                *ro = ron;

                goto done;
        }

        /*
         * Use a cached route if it exists and is valid, else try to allocate
         * a new one.  Note that we should check the address family of the
         * cached destination, in case of sharing the cache with IPv4.
         *
         * for V4 mapped addresses we want to pick up the v4 route
         * see PR kern/56348
         */
        if (IN6_IS_ADDR_V4MAPPED(&dstsock->sin6_addr)) {
                in6_sin6_2_sin(&u.dst4, dstsock);
        } else {
                u.dst6 = *dstsock;
                u.dst6.sin6_scope_id = 0;
        }

        rt = rtcache_lookup1(*ro, &u.dst, 1);

        if (rt == NULL)
                error = EHOSTUNREACH;

        /*
         * Check if the outgoing interface conflicts with
         * the interface specified by ipi6_ifindex (if specified).
         * Note that loopback interface is always okay.
         * (this may happen when we are sending a packet to one of
         *  our own addresses.)
         */
        if (opts && opts->ip6po_pktinfo && opts->ip6po_pktinfo->ipi6_ifindex) {
                if (rt != NULL && !(rt->rt_ifp->if_flags & IFF_LOOPBACK) &&
                    rt->rt_ifp->if_index != opts->ip6po_pktinfo->ipi6_ifindex) {
                        if (count_discard)
                                in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard);
                        error = EHOSTUNREACH;
                        rt = NULL;
                }
        }

done:
        if (error == EHOSTUNREACH)
                IP6_STATINC(IP6_STAT_NOROUTE);
        *retrt = rt;
        return error;
}

static int
in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 
        struct ip6_moptions *mopts, struct route *ro, struct ifnet **retifp,
        struct psref *psref)
{
        int error = 0;
        struct rtentry *rt = NULL;
        struct in6_addr *dst;
        struct in6_pktinfo *pi = NULL;

        KASSERT(retifp != NULL);
        *retifp = NULL;
        dst = &dstsock->sin6_addr;

        /* If the caller specify the outgoing interface explicitly, use it. */
        if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) {
                /* XXX boundary check is assumed to be already done. */
                *retifp = if_get_byindex(pi->ipi6_ifindex, psref);
                if (*retifp != NULL)
                        return 0;
                goto getroute;
        }

        /*
         * If the destination address is a multicast address and the outgoing
         * interface for the address is specified by the caller, use it.
         */
        if (IN6_IS_ADDR_MULTICAST(dst) && mopts != NULL) {
                *retifp = if_get_byindex(mopts->im6o_multicast_if_index, psref);
                if (*retifp != NULL)
                        return 0; /* we do not need a route for multicast. */
        }

getroute:
        error = in6_selectroute(dstsock, opts, &ro, &rt, false);
        if (error != 0)
                return error;

        *retifp = if_get_byindex(rt->rt_ifp->if_index, psref);

        /*
         * do not use a rejected or black hole route.
         * XXX: this check should be done in the L2 output routine.
         * However, if we skipped this check here, we'd see the following
         * scenario:
         * - install a rejected route for a scoped address prefix
         *   (like fe80::/10)
         * - send a packet to a destination that matches the scoped prefix,
         *   with ambiguity about the scope zone.
         * - pick the outgoing interface from the route, and disambiguate the
         *   scope zone with the interface.
         * - ip6_output() would try to get another route with the "new"
         *   destination, which may be valid.
         * - we'd see no error on output.
         * Although this may not be very harmful, it should still be confusing.
         * We thus reject the case here.
         */
        if ((rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) {
                error = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
                /* XXX: ifp can be returned with psref even if error */
                goto out;
        }

        /*
         * Adjust the "outgoing" interface.  If we're going to loop the packet
         * back to ourselves, the ifp would be the loopback interface.
         * However, we'd rather know the interface associated to the
         * destination address (which should probably be one of our own
         * addresses.)
         */
        if (rt->rt_ifa->ifa_ifp != *retifp &&
            !if_is_deactivated(rt->rt_ifa->ifa_ifp)) {
                if_put(*retifp, psref);
                *retifp = rt->rt_ifa->ifa_ifp;
                if_acquire(*retifp, psref);
        }
out:
        rtcache_unref(rt, ro);
        return error;
}

/*
 * Default hop limit selection. The precedence is as follows:
 * 1. Hoplimit value specified via ioctl.
 * 2. (If the outgoing interface is detected) the current
 *     hop limit of the interface specified by router advertisement.
 * 3. The system default hoplimit.
*/
int
in6_selecthlim(struct in6pcb *in6p, struct ifnet *ifp)
{
        if (in6p && in6p->in6p_hops >= 0)
                return (in6p->in6p_hops);
        else if (ifp)
                return (ND_IFINFO(ifp)->chlim);
        else
                return (ip6_defhlim);
}

int
in6_selecthlim_rt(struct in6pcb *in6p)
{
        struct rtentry *rt;

        if (in6p == NULL)
                return in6_selecthlim(in6p, NULL);

        rt = rtcache_validate(&in6p->in6p_route);
        if (rt != NULL) {
                int ret = in6_selecthlim(in6p, rt->rt_ifp);
                rtcache_unref(rt, &in6p->in6p_route);
                return ret;
        } else
                return in6_selecthlim(in6p, NULL);
}

/*
 * Find an empty port and set it to the specified PCB.
 */
int
in6_pcbsetport(struct sockaddr_in6 *sin6, struct in6pcb *in6p, struct lwp *l)
{
        struct socket *so = in6p->in6p_socket;
        struct inpcbtable *table = in6p->in6p_table;
        u_int16_t lport, *lastport;
        enum kauth_network_req req;
        int error = 0;

        if (in6p->in6p_flags & IN6P_LOWPORT) {
#ifndef IPNOPRIVPORTS
                req = KAUTH_REQ_NETWORK_BIND_PRIVPORT;
#else
                req = KAUTH_REQ_NETWORK_BIND_PORT;
#endif
                lastport = &table->inpt_lastlow;
        } else {
                req = KAUTH_REQ_NETWORK_BIND_PORT;

                lastport = &table->inpt_lastport;
        }

        /* XXX-kauth: KAUTH_REQ_NETWORK_BIND_AUTOASSIGN_{,PRIV}PORT */
        error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_BIND, req, so,
            sin6, NULL);
        if (error)
                return (EACCES);

       /*
        * Use RFC6056 randomized port selection
        */
        error = portalgo_randport(&lport, &in6p->in6p_head, l->l_cred);
        if (error)
                return error;

        in6p->in6p_flags |= IN6P_ANONPORT;
        *lastport = lport;
        in6p->in6p_lport = htons(lport);
        in6_pcbstate(in6p, IN6P_BOUND);
        return (0);                /* success */
}

void
addrsel_policy_init(void)
{
        init_policy_queue();

        /* initialize the "last resort" policy */
        memset(&defaultaddrpolicy, 0, sizeof(defaultaddrpolicy));
        defaultaddrpolicy.label = ADDR_LABEL_NOTAPP;
}

/*
 * XXX: NOMPSAFE if a policy is set
 */
static struct in6_addrpolicy *
lookup_addrsel_policy(struct sockaddr_in6 *key)
{
        struct in6_addrpolicy *match = NULL;

        match = match_addrsel_policy(key);

        if (match == NULL)
                match = &defaultaddrpolicy;
        else
                match->use++;

        return (match);
}

/*
 * Subroutines to manage the address selection policy table via sysctl.
 */
struct sel_walkarg {
        size_t        w_total;
        size_t        w_given;
        void *        w_where;
        void *w_limit;
};

int sysctl_net_inet6_addrctlpolicy(SYSCTLFN_ARGS);
int
sysctl_net_inet6_addrctlpolicy(SYSCTLFN_ARGS)
{
        int error = 0;
        int s;

        s = splsoftnet();

        if (newp) {
                error = EPERM;
                goto end;
        }
        if (oldp && oldlenp == NULL) {
                error = EINVAL;
                goto end;
        }
        if (oldp || oldlenp) {
                struct sel_walkarg w;
                size_t oldlen = *oldlenp;

                memset(&w, 0, sizeof(w));
                w.w_given = oldlen;
                w.w_where = oldp;
                if (oldp)
                        w.w_limit = (char *)oldp + oldlen;

                error = walk_addrsel_policy(dump_addrsel_policyent, &w);

                *oldlenp = w.w_total;
                if (oldp && w.w_total > oldlen && error == 0)
                        error = ENOMEM;
        }

  end:
        splx(s);

        return (error);
}

int
in6_src_ioctl(u_long cmd, void *data)
{
        int i;
        struct in6_addrpolicy ent0;

        if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
                return (EOPNOTSUPP); /* check for safety */

        ent0 = *(struct in6_addrpolicy *)data;

        if (ent0.label == ADDR_LABEL_NOTAPP)
                return (EINVAL);
        /* check if the prefix mask is consecutive. */
        if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
                return (EINVAL);
        /* clear trailing garbages (if any) of the prefix address. */
        for (i = 0; i < 4; i++) {
                ent0.addr.sin6_addr.s6_addr32[i] &=
                        ent0.addrmask.sin6_addr.s6_addr32[i];
        }
        ent0.use = 0;

        switch (cmd) {
        case SIOCAADDRCTL_POLICY:
                return (add_addrsel_policyent(&ent0));
        case SIOCDADDRCTL_POLICY:
                return (delete_addrsel_policyent(&ent0));
        }

        return (0);                /* XXX: compromise compilers */
}

/*
 * The followings are implementation of the policy table using a
 * simple tail queue.
 * XXX such details should be hidden.
 * XXX implementation using binary tree should be more efficient.
 */
struct addrsel_policyent {
        TAILQ_ENTRY(addrsel_policyent) ape_entry;
        struct in6_addrpolicy ape_policy;
};

TAILQ_HEAD(addrsel_policyhead, addrsel_policyent);

struct addrsel_policyhead addrsel_policytab;

static void
init_policy_queue(void)
{
        TAILQ_INIT(&addrsel_policytab);
}

static int
add_addrsel_policyent(struct in6_addrpolicy *newpolicy)
{
        struct addrsel_policyent *newpol, *pol;

        /* duplication check */
        TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
                if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr,
                    &pol->ape_policy.addr.sin6_addr) &&
                    IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr,
                    &pol->ape_policy.addrmask.sin6_addr)) {
                        return (EEXIST);        /* or override it? */
                }
        }

        newpol = malloc(sizeof(*newpol), M_IFADDR, M_WAITOK|M_ZERO);

        /* XXX: should validate entry */
        newpol->ape_policy = *newpolicy;

        TAILQ_INSERT_TAIL(&addrsel_policytab, newpol, ape_entry);

        return (0);
}

static int
delete_addrsel_policyent(struct in6_addrpolicy *key)
{
        struct addrsel_policyent *pol;

        /* search for the entry in the table */
        for (pol = TAILQ_FIRST(&addrsel_policytab); pol;
             pol = TAILQ_NEXT(pol, ape_entry)) {
                if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr,
                    &pol->ape_policy.addr.sin6_addr) &&
                    IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr,
                    &pol->ape_policy.addrmask.sin6_addr)) {
                        break;
                }
        }
        if (pol == NULL) {
                return (ESRCH);
        }

        TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry);

        return (0);
}

static int
walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), void *w)
{
        struct addrsel_policyent *pol;
        int error = 0;

        TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) {
                if ((error = (*callback)(&pol->ape_policy, w)) != 0)
                        return error;
        }

        return error;
}

static int
dump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg)
{
        int error = 0;
        struct sel_walkarg *w = arg;

        if (w->w_where && (char *)w->w_where + sizeof(*pol) <= (char *)w->w_limit) {
                if ((error = copyout(pol, w->w_where, sizeof(*pol))) != 0)
                        return error;
                w->w_where = (char *)w->w_where + sizeof(*pol);
        }
        w->w_total += sizeof(*pol);

        return error;
}

static struct in6_addrpolicy *
match_addrsel_policy(struct sockaddr_in6 *key)
{
        struct addrsel_policyent *pent;
        struct in6_addrpolicy *bestpol = NULL, *pol;
        int matchlen, bestmatchlen = -1;
        u_char *mp, *ep, *k, *p, m;

        for (pent = TAILQ_FIRST(&addrsel_policytab); pent;
             pent = TAILQ_NEXT(pent, ape_entry)) {
                matchlen = 0;

                pol = &pent->ape_policy;
                mp = (u_char *)&pol->addrmask.sin6_addr;
                ep = mp + 16;        /* XXX: scope field? */
                k = (u_char *)&key->sin6_addr;
                p = (u_char *)&pol->addr.sin6_addr;
                for (; mp < ep && *mp; mp++, k++, p++) {
                        m = *mp;
                        if ((*k & m) != *p)
                                goto next; /* not match */
                        if (m == 0xff) /* short cut for a typical case */
                                matchlen += 8;
                        else {
                                while (m >= 0x80) {
                                        matchlen++;
                                        m <<= 1;
                                }
                        }
                }

                /* matched.  check if this is better than the current best. */
                if (bestpol == NULL ||
                    matchlen > bestmatchlen) {
                        bestpol = pol;
                        bestmatchlen = matchlen;
                }

          next:
                continue;
        }

        return (bestpol);
}

































































    3 












    2 

















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/*        $NetBSD: sem.h,v 1.7 2019/02/21 03:37:19 mrg Exp $        */

/*
 * SVID compatible sem.h file
 *
 * Author: Daniel Boulet
 */

#ifndef _COMPAT_SYS_SEM_H_
#define _COMPAT_SYS_SEM_H_

#include <compat/sys/ipc.h>

struct semid_ds14 {
        struct ipc_perm14 sem_perm;        /* operation permission struct */
        struct __sem        *sem_base;        /* pointer to first semaphore in set */
        unsigned short        sem_nsems;        /* number of sems in set */
        int32_t                sem_otime;        /* last operation time */
        long                sem_pad1;        /* SVABI/386 says I need this here */
        int32_t                sem_ctime;        /* last change time */
                                            /* Times measured in secs since */
                                            /* 00:00:00 GMT, Jan. 1, 1970 */
        long                sem_pad2;        /* SVABI/386 says I need this here */
        long                sem_pad3[4];        /* SVABI/386 says I need this here */
};

struct semid_ds13 {
        struct ipc_perm        sem_perm;        /* operation permission structure */
        unsigned short        sem_nsems;        /* number of semaphores in set */
        int32_t                sem_otime;        /* last semop() time */
        int32_t                sem_ctime;        /* last time changed by semctl() */

        /*
         * These members are private and used only in the internal
         * implementation of this interface.
         */
        struct __sem        *_sem_base;        /* pointer to first semaphore in set */
};

/* Warning: 64-bit structure padding is needed here */
struct semid_ds_sysctl50 {
        struct        ipc_perm_sysctl sem_perm;
        int16_t        sem_nsems;
        int16_t        pad2;
        int32_t        pad3;
        int32_t        sem_otime;
        int32_t        sem_ctime;
};

struct sem_sysctl_info50 {
        struct        seminfo seminfo;
        struct        semid_ds_sysctl50 semids[1];
};

__BEGIN_DECLS
static __inline void        __semid_ds14_to_native(const struct semid_ds14 *, struct semid_ds *);
static __inline void        __native_to_semid_ds14(const struct semid_ds *, struct semid_ds14 *);
static __inline void        __semid_ds13_to_native(const struct semid_ds13 *, struct semid_ds *);
static __inline void        __native_to_semid_ds13(const struct semid_ds *, struct semid_ds13 *);

static __inline void
__semid_ds13_to_native(const struct semid_ds13  *osembuf, struct semid_ds *sembuf)
{

        sembuf->sem_perm = osembuf->sem_perm;

#define        CVT(x)        sembuf->x = osembuf->x
        CVT(sem_nsems);
        CVT(sem_otime);
        CVT(sem_ctime);
#undef CVT
}

static __inline void
__native_to_semid_ds13(const struct semid_ds *sembuf, struct semid_ds13 *osembuf)
{

        memset(osembuf, 0, sizeof *osembuf);
        osembuf->sem_perm = sembuf->sem_perm;

#define        CVT(x)        osembuf->x = sembuf->x
#define        CVTI(x)        osembuf->x = (int)sembuf->x
        CVT(sem_nsems);
        CVTI(sem_otime);
        CVTI(sem_ctime);
#undef CVT
#undef CVTI
}

static __inline void
__semid_ds14_to_native(const struct semid_ds14  *osembuf, struct semid_ds *sembuf)
{

        __ipc_perm14_to_native(&osembuf->sem_perm, &sembuf->sem_perm);

#define        CVT(x)        sembuf->x = osembuf->x
        CVT(sem_nsems);
        CVT(sem_otime);
        CVT(sem_ctime);
#undef CVT
}

static __inline void
__native_to_semid_ds14(const struct semid_ds *sembuf, struct semid_ds14 *osembuf)
{

        memset(osembuf, 0, sizeof *osembuf);
        __native_to_ipc_perm14(&sembuf->sem_perm, &osembuf->sem_perm);

#define        CVT(x)        osembuf->x = sembuf->x
#define        CVTI(x)        osembuf->x = (int)sembuf->x
        CVT(sem_nsems);
        CVTI(sem_otime);
        CVTI(sem_ctime);
#undef CVT
#undef CVTI
}

int        semctl(int, int, int, ...);
int        __semctl(int, int, int, union __semun *);
int        __semctl13(int, int, int, ...);
int        __semctl14(int, int, int, ...);
int        __semctl50(int, int, int, ...);
int        ____semctl50(int, int, int, ...);
__END_DECLS

#endif /* !_COMPAT_SYS_SEM_H_ */















































































































































































































   20 




    9 



    7 


    6 



   20 

















   70 



   82 













   26 


   56 





   82 









   82 













   69 

   53 






   25 







   25 




   25 

    5 




   25 










   68 







   68 



   67 

   72 








   30 







   29 






    2 










   29 
   28 
   27 


   27 
   15 
    9 
   14 
    8 
   14 
    3 
   23 

   24 












   81 




   23 
   26 


   21 

   17 
   17 






   15 

   11 




   11 

   11 


   14 

   10 
    8 






    8 


   18 



   19 














   62 


   61 




   63 







































   63 





























   62 

   11 



   51 



   59 

   47 
   52 
   53 
    6 









   56 
   55 


   54 













   26 



   26 



   50 







    2 































   24 


   43 



   13 
   24 







   22 








    4 












































    5 

































































  133 

  132 

    5 









    5 














    5 
    5 
    5 


    5 


  132 




    2 

    2 
    2 




























   19 













   19 
   19 








   19 

   19 

   19 



































  395 




















  112 










    1 

    1 






    1 
    1 
    1 

    1 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
/*        $NetBSD: sys_select.c,v 1.60 2022/06/29 22:27:01 riastradh Exp $        */

/*-
 * Copyright (c) 2007, 2008, 2009, 2010, 2019, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran and Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)sys_generic.c        8.9 (Berkeley) 2/14/95
 */

/*
 * System calls of synchronous I/O multiplexing subsystem.
 *
 * Locking
 *
 * Two locks are used: <object-lock> and selcluster_t::sc_lock.
 *
 * The <object-lock> might be a device driver or another subsystem, e.g.
 * socket or pipe.  This lock is not exported, and thus invisible to this
 * subsystem.  Mainly, synchronisation between selrecord() and selnotify()
 * routines depends on this lock, as it will be described in the comments.
 *
 * Lock order
 *
 *        <object-lock> ->
 *                selcluster_t::sc_lock
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.60 2022/06/29 22:27:01 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/filedesc.h>
#include <sys/file.h>
#include <sys/proc.h>
#include <sys/socketvar.h>
#include <sys/signalvar.h>
#include <sys/uio.h>
#include <sys/kernel.h>
#include <sys/lwp.h>
#include <sys/poll.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
#include <sys/cpu.h>
#include <sys/atomic.h>
#include <sys/socketvar.h>
#include <sys/sleepq.h>
#include <sys/sysctl.h>
#include <sys/bitops.h>

/* Flags for lwp::l_selflag. */
#define        SEL_RESET        0        /* awoken, interrupted, or not yet polling */
#define        SEL_SCANNING        1        /* polling descriptors */
#define        SEL_BLOCKING        2        /* blocking and waiting for event */
#define        SEL_EVENT        3        /* interrupted, events set directly */

/*
 * Per-cluster state for select()/poll().  For a system with fewer
 * than 64 CPUs, this gives us per-CPU clusters.
 */
#define        SELCLUSTERS        64
#define        SELCLUSTERMASK        (SELCLUSTERS - 1)

typedef struct selcluster {
        kmutex_t        *sc_lock;
        sleepq_t        sc_sleepq;
        uint64_t        sc_mask;
        int                sc_ncoll;
} selcluster_t;

static inline int        selscan(char *, const int, const size_t, register_t *);
static inline int        pollscan(struct pollfd *, const int, register_t *);
static void                selclear(void);

static const int sel_flag[] = {
        POLLRDNORM | POLLHUP | POLLERR,
        POLLWRNORM | POLLHUP | POLLERR,
        POLLRDBAND
};

/* 
 * LWPs are woken using the sleep queue only due to a collision, the case
 * with the maximum Suck Factor.  Save the cost of sorting for named waiters
 * by inserting in LIFO order.  In the future it would be preferable to not
 * enqueue LWPs at all, unless subject to a collision.
 */
syncobj_t select_sobj = {
        .sobj_flag        = SOBJ_SLEEPQ_LIFO,
        .sobj_unsleep        = sleepq_unsleep,
        .sobj_changepri        = sleepq_changepri,
        .sobj_lendpri        = sleepq_lendpri,
        .sobj_owner        = syncobj_noowner,
};

static selcluster_t        *selcluster[SELCLUSTERS] __read_mostly;
static int                direct_select __read_mostly = 0;

/* Operations: either select() or poll(). */
const char                selop_select[] = "select";
const char                selop_poll[] = "poll";

/*
 * Select system call.
 */
int
sys___pselect50(struct lwp *l, const struct sys___pselect50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                                nd;
                syscallarg(fd_set *)                        in;
                syscallarg(fd_set *)                        ou;
                syscallarg(fd_set *)                        ex;
                syscallarg(const struct timespec *)        ts;
                syscallarg(sigset_t *)                        mask;
        } */
        struct timespec        ats, *ts = NULL;
        sigset_t        amask, *mask = NULL;
        int                error;

        if (SCARG(uap, ts)) {
                error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
                if (error)
                        return error;
                ts = &ats;
        }
        if (SCARG(uap, mask) != NULL) {
                error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
                if (error)
                        return error;
                mask = &amask;
        }

        return selcommon(retval, SCARG(uap, nd), SCARG(uap, in),
            SCARG(uap, ou), SCARG(uap, ex), ts, mask);
}

int
sys___select50(struct lwp *l, const struct sys___select50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                        nd;
                syscallarg(fd_set *)                in;
                syscallarg(fd_set *)                ou;
                syscallarg(fd_set *)                ex;
                syscallarg(struct timeval *)        tv;
        } */
        struct timeval atv;
        struct timespec ats, *ts = NULL;
        int error;

        if (SCARG(uap, tv)) {
                error = copyin(SCARG(uap, tv), (void *)&atv, sizeof(atv));
                if (error)
                        return error;

                if (atv.tv_usec < 0 || atv.tv_usec >= 1000000)
                        return EINVAL;

                TIMEVAL_TO_TIMESPEC(&atv, &ats);
                ts = &ats;
        }

        return selcommon(retval, SCARG(uap, nd), SCARG(uap, in),
            SCARG(uap, ou), SCARG(uap, ex), ts, NULL);
}

/*
 * sel_do_scan: common code to perform the scan on descriptors.
 */
static int
sel_do_scan(const char *opname, void *fds, const int nf, const size_t ni,
    struct timespec *ts, sigset_t *mask, register_t *retval)
{
        lwp_t                * const l = curlwp;
        selcluster_t        *sc;
        kmutex_t        *lock;
        struct timespec        sleepts;
        int                error, timo;

        timo = 0;
        if (ts && inittimeleft(ts, &sleepts) == -1) {
                return EINVAL;
        }

        if (__predict_false(mask))
                sigsuspendsetup(l, mask);

        /*
         * We may context switch during or at any time after picking a CPU
         * and cluster to associate with, but it doesn't matter.  In the
         * unlikely event we migrate elsewhere all we risk is a little lock
         * contention; correctness is not sacrificed.
         */
        sc = curcpu()->ci_data.cpu_selcluster;
        lock = sc->sc_lock;
        l->l_selcluster = sc;

        if (opname == selop_select) {
                l->l_selbits = fds;
                l->l_selni = ni;
        } else {
                l->l_selbits = NULL;
        }

        for (;;) {
                int ncoll;

                SLIST_INIT(&l->l_selwait);
                l->l_selret = 0;

                /*
                 * No need to lock.  If this is overwritten by another value
                 * while scanning, we will retry below.  We only need to see
                 * exact state from the descriptors that we are about to poll,
                 * and lock activity resulting from fo_poll is enough to
                 * provide an up to date value for new polling activity.
                 */
                if (ts && (ts->tv_sec | ts->tv_nsec | direct_select) == 0) {
                        /* Non-blocking: no need for selrecord()/selclear() */
                        l->l_selflag = SEL_RESET;
                } else {
                        l->l_selflag = SEL_SCANNING;
                }
                ncoll = sc->sc_ncoll;
                membar_release();

                if (opname == selop_select) {
                        error = selscan((char *)fds, nf, ni, retval);
                } else {
                        error = pollscan((struct pollfd *)fds, nf, retval);
                }
                if (error || *retval)
                        break;
                if (ts && (timo = gettimeleft(ts, &sleepts)) <= 0)
                        break;
                /*
                 * Acquire the lock and perform the (re)checks.  Note, if
                 * collision has occurred, then our state does not matter,
                 * as we must perform re-scan.  Therefore, check it first.
                 */
state_check:
                mutex_spin_enter(lock);
                if (__predict_false(sc->sc_ncoll != ncoll)) {
                        /* Collision: perform re-scan. */
                        mutex_spin_exit(lock);
                        selclear();
                        continue;
                }
                if (__predict_true(l->l_selflag == SEL_EVENT)) {
                        /* Events occurred, they are set directly. */
                        mutex_spin_exit(lock);
                        break;
                }
                if (__predict_true(l->l_selflag == SEL_RESET)) {
                        /* Events occurred, but re-scan is requested. */
                        mutex_spin_exit(lock);
                        selclear();
                        continue;
                }
                /* Nothing happen, therefore - sleep. */
                l->l_selflag = SEL_BLOCKING;
                l->l_kpriority = true;
                sleepq_enter(&sc->sc_sleepq, l, lock);
                sleepq_enqueue(&sc->sc_sleepq, sc, opname, &select_sobj, true);
                error = sleepq_block(timo, true, &select_sobj);
                if (error != 0) {
                        break;
                }
                /* Awoken: need to check the state. */
                goto state_check;
        }
        selclear();

        /* Add direct events if any. */
        if (l->l_selflag == SEL_EVENT) {
                KASSERT(l->l_selret != 0);
                *retval += l->l_selret;
        }

        if (__predict_false(mask))
                sigsuspendteardown(l);

        /* select and poll are not restarted after signals... */
        if (error == ERESTART)
                return EINTR;
        if (error == EWOULDBLOCK)
                return 0;
        return error;
}

int
selcommon(register_t *retval, int nd, fd_set *u_in, fd_set *u_ou,
    fd_set *u_ex, struct timespec *ts, sigset_t *mask)
{
        char                smallbits[howmany(FD_SETSIZE, NFDBITS) *
                            sizeof(fd_mask) * 6];
        char                 *bits;
        int                error, nf;
        size_t                ni;

        if (nd < 0)
                return (EINVAL);
        nf = atomic_load_consume(&curlwp->l_fd->fd_dt)->dt_nfiles;
        if (nd > nf) {
                /* forgiving; slightly wrong */
                nd = nf;
        }
        ni = howmany(nd, NFDBITS) * sizeof(fd_mask);
        if (ni * 6 > sizeof(smallbits))
                bits = kmem_alloc(ni * 6, KM_SLEEP);
        else
                bits = smallbits;

#define        getbits(name, x)                                                \
        if (u_ ## name) {                                                \
                error = copyin(u_ ## name, bits + ni * x, ni);                \
                if (error)                                                \
                        goto fail;                                        \
        } else                                                                \
                memset(bits + ni * x, 0, ni);
        getbits(in, 0);
        getbits(ou, 1);
        getbits(ex, 2);
#undef        getbits

        error = sel_do_scan(selop_select, bits, nd, ni, ts, mask, retval);
        if (error == 0 && u_in != NULL)
                error = copyout(bits + ni * 3, u_in, ni);
        if (error == 0 && u_ou != NULL)
                error = copyout(bits + ni * 4, u_ou, ni);
        if (error == 0 && u_ex != NULL)
                error = copyout(bits + ni * 5, u_ex, ni);
 fail:
        if (bits != smallbits)
                kmem_free(bits, ni * 6);
        return (error);
}

static inline int
selscan(char *bits, const int nfd, const size_t ni, register_t *retval)
{
        fd_mask *ibitp, *obitp;
        int msk, i, j, fd, n;
        file_t *fp;
        lwp_t *l;

        ibitp = (fd_mask *)(bits + ni * 0);
        obitp = (fd_mask *)(bits + ni * 3);
        n = 0;
        l = curlwp;

        memset(obitp, 0, ni * 3);
        for (msk = 0; msk < 3; msk++) {
                for (i = 0; i < nfd; i += NFDBITS) {
                        fd_mask ibits, obits;

                        ibits = *ibitp;
                        obits = 0;
                        while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
                                ibits &= ~(1U << j);
                                if ((fp = fd_getfile(fd)) == NULL)
                                        return (EBADF);
                                /*
                                 * Setup an argument to selrecord(), which is
                                 * a file descriptor number.
                                 */
                                l->l_selrec = fd;
                                if ((*fp->f_ops->fo_poll)(fp, sel_flag[msk])) {
                                        if (!direct_select) {
                                                /*
                                                 * Have events: do nothing in
                                                 * selrecord().
                                                 */
                                                l->l_selflag = SEL_RESET;
                                        }
                                        obits |= (1U << j);
                                        n++;
                                }
                                fd_putfile(fd);
                        }
                        if (obits != 0) {
                                if (direct_select) {
                                        kmutex_t *lock;
                                        lock = l->l_selcluster->sc_lock;
                                        mutex_spin_enter(lock);
                                        *obitp |= obits;
                                        mutex_spin_exit(lock);
                                } else {
                                        *obitp |= obits;
                                }
                        }
                        ibitp++;
                        obitp++;
                }
        }
        *retval = n;
        return (0);
}

/*
 * Poll system call.
 */
int
sys_poll(struct lwp *l, const struct sys_poll_args *uap, register_t *retval)
{
        /* {
                syscallarg(struct pollfd *)        fds;
                syscallarg(u_int)                nfds;
                syscallarg(int)                        timeout;
        } */
        struct timespec        ats, *ts = NULL;

        if (SCARG(uap, timeout) != INFTIM) {
                ats.tv_sec = SCARG(uap, timeout) / 1000;
                ats.tv_nsec = (SCARG(uap, timeout) % 1000) * 1000000;
                ts = &ats;
        }

        return pollcommon(retval, SCARG(uap, fds), SCARG(uap, nfds), ts, NULL);
}

/*
 * Poll system call.
 */
int
sys___pollts50(struct lwp *l, const struct sys___pollts50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(struct pollfd *)                fds;
                syscallarg(u_int)                        nfds;
                syscallarg(const struct timespec *)        ts;
                syscallarg(const sigset_t *)                mask;
        } */
        struct timespec        ats, *ts = NULL;
        sigset_t        amask, *mask = NULL;
        int                error;

        if (SCARG(uap, ts)) {
                error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
                if (error)
                        return error;
                ts = &ats;
        }
        if (SCARG(uap, mask)) {
                error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
                if (error)
                        return error;
                mask = &amask;
        }

        return pollcommon(retval, SCARG(uap, fds), SCARG(uap, nfds), ts, mask);
}

int
pollcommon(register_t *retval, struct pollfd *u_fds, u_int nfds,
    struct timespec *ts, sigset_t *mask)
{
        struct pollfd        smallfds[32];
        struct pollfd        *fds;
        int                error;
        size_t                ni;

        if (nfds > curlwp->l_proc->p_rlimit[RLIMIT_NOFILE].rlim_max + 1000) {
                /*
                 * Prevent userland from causing over-allocation.
                 * Raising the default limit too high can still cause
                 * a lot of memory to be allocated, but this also means
                 * that the file descriptor array will also be large.
                 *
                 * To reduce the memory requirements here, we could 
                 * process the 'fds' array in chunks, but that
                 * is a lot of code that isn't normally useful.
                 * (Or just move the copyin/out into pollscan().)
                 *
                 * Historically the code silently truncated 'fds' to
                 * dt_nfiles entries - but that does cause issues.
                 *
                 * Using the max limit equivalent to sysctl
                 * kern.maxfiles is the moral equivalent of OPEN_MAX
                 * as specified by POSIX.
                 *
                 * We add a slop of 1000 in case the resource limit was
                 * changed after opening descriptors or the same descriptor
                 * was specified more than once.
                 */
                return EINVAL;
        }
        ni = nfds * sizeof(struct pollfd);
        if (ni > sizeof(smallfds))
                fds = kmem_alloc(ni, KM_SLEEP);
        else
                fds = smallfds;

        error = copyin(u_fds, fds, ni);
        if (error)
                goto fail;

        error = sel_do_scan(selop_poll, fds, nfds, ni, ts, mask, retval);
        if (error == 0)
                error = copyout(fds, u_fds, ni);
 fail:
        if (fds != smallfds)
                kmem_free(fds, ni);
        return (error);
}

static inline int
pollscan(struct pollfd *fds, const int nfd, register_t *retval)
{
        file_t *fp;
        int i, n = 0, revents;

        for (i = 0; i < nfd; i++, fds++) {
                fds->revents = 0;
                if (fds->fd < 0) {
                        revents = 0;
                } else if ((fp = fd_getfile(fds->fd)) == NULL) {
                        revents = POLLNVAL;
                } else {
                        /*
                         * Perform poll: registers select request or returns
                         * the events which are set.  Setup an argument for
                         * selrecord(), which is a pointer to struct pollfd.
                         */
                        curlwp->l_selrec = (uintptr_t)fds;
                        revents = (*fp->f_ops->fo_poll)(fp,
                            fds->events | POLLERR | POLLHUP);
                        fd_putfile(fds->fd);
                }
                if (revents) {
                        if (!direct_select)  {
                                /* Have events: do nothing in selrecord(). */
                                curlwp->l_selflag = SEL_RESET;
                        }
                        fds->revents = revents;
                        n++;
                }
        }
        *retval = n;
        return (0);
}

int
seltrue(dev_t dev, int events, lwp_t *l)
{

        return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
}

/*
 * Record a select request.  Concurrency issues:
 *
 * The caller holds the same lock across calls to selrecord() and
 * selnotify(), so we don't need to consider a concurrent wakeup
 * while in this routine.
 *
 * The only activity we need to guard against is selclear(), called by
 * another thread that is exiting sel_do_scan().
 * `sel_lwp' can only become non-NULL while the caller's lock is held,
 * so it cannot become non-NULL due to a change made by another thread
 * while we are in this routine.  It can only become _NULL_ due to a
 * call to selclear().
 *
 * If it is non-NULL and != selector there is the potential for
 * selclear() to be called by another thread.  If either of those
 * conditions are true, we're not interested in touching the `named
 * waiter' part of the selinfo record because we need to record a
 * collision.  Hence there is no need for additional locking in this
 * routine.
 */
void
selrecord(lwp_t *selector, struct selinfo *sip)
{
        selcluster_t *sc;
        lwp_t *other;

        KASSERT(selector == curlwp);

        sc = selector->l_selcluster;
        other = sip->sel_lwp;

        if (selector->l_selflag == SEL_RESET) {
                /* 0. We're not going to block - will poll again if needed. */
        } else if (other == selector) {
                /* 1. We (selector) already claimed to be the first LWP. */
                KASSERT(sip->sel_cluster == sc);
        } else if (other == NULL) {
                /*
                 * 2. No first LWP, therefore we (selector) are the first.
                 *
                 * There may be unnamed waiters (collisions).  Issue a memory
                 * barrier to ensure that we access sel_lwp (above) before
                 * other fields - this guards against a call to selclear().
                 */
                membar_acquire();
                sip->sel_lwp = selector;
                SLIST_INSERT_HEAD(&selector->l_selwait, sip, sel_chain);
                /* Copy the argument, which is for selnotify(). */
                sip->sel_fdinfo = selector->l_selrec;
                /* Replace selinfo's lock with the chosen cluster's lock. */
                sip->sel_cluster = sc;
        } else {
                /* 3. Multiple waiters: record a collision. */
                sip->sel_collision |= sc->sc_mask;
                KASSERT(sip->sel_cluster != NULL);
        }
}

/*
 * Record a knote.
 *
 * The caller holds the same lock as for selrecord().
 */
void
selrecord_knote(struct selinfo *sip, struct knote *kn)
{
        klist_insert(&sip->sel_klist, kn);
}

/*
 * Remove a knote.
 *
 * The caller holds the same lock as for selrecord().
 *
 * Returns true if the last knote was removed and the list
 * is now empty.
 */
bool
selremove_knote(struct selinfo *sip, struct knote *kn)
{
        return klist_remove(&sip->sel_klist, kn);
}

/*
 * sel_setevents: a helper function for selnotify(), to set the events
 * for LWP sleeping in selcommon() or pollcommon().
 */
static inline bool
sel_setevents(lwp_t *l, struct selinfo *sip, const int events)
{
        const int oflag = l->l_selflag;
        int ret = 0;

        /*
         * If we require re-scan or it was required by somebody else,
         * then just (re)set SEL_RESET and return.
         */
        if (__predict_false(events == 0 || oflag == SEL_RESET)) {
                l->l_selflag = SEL_RESET;
                return true;
        }
        /*
         * Direct set.  Note: select state of LWP is locked.  First,
         * determine whether it is selcommon() or pollcommon().
         */
        if (l->l_selbits != NULL) {
                const size_t ni = l->l_selni;
                fd_mask *fds = (fd_mask *)l->l_selbits;
                fd_mask *ofds = (fd_mask *)((char *)fds + ni * 3);
                const int fd = sip->sel_fdinfo, fbit = 1 << (fd & __NFDMASK);
                const int idx = fd >> __NFDSHIFT;
                int n;

                for (n = 0; n < 3; n++) {
                        if ((fds[idx] & fbit) != 0 &&
                            (ofds[idx] & fbit) == 0 &&
                            (sel_flag[n] & events)) {
                                ofds[idx] |= fbit;
                                ret++;
                        }
                        fds = (fd_mask *)((char *)fds + ni);
                        ofds = (fd_mask *)((char *)ofds + ni);
                }
        } else {
                struct pollfd *pfd = (void *)sip->sel_fdinfo;
                int revents = events & (pfd->events | POLLERR | POLLHUP);

                if (revents) {
                        if (pfd->revents == 0)
                                ret = 1;
                        pfd->revents |= revents;
                }
        }
        /* Check whether there are any events to return. */
        if (!ret) {
                return false;
        }
        /* Indicate direct set and note the event (cluster lock is held). */
        l->l_selflag = SEL_EVENT;
        l->l_selret += ret;
        return true;
}

/*
 * Do a wakeup when a selectable event occurs.  Concurrency issues:
 *
 * As per selrecord(), the caller's object lock is held.  If there
 * is a named waiter, we must acquire the associated selcluster's lock
 * in order to synchronize with selclear() and pollers going to sleep
 * in sel_do_scan().
 *
 * sip->sel_cluser cannot change at this point, as it is only changed
 * in selrecord(), and concurrent calls to selrecord() are locked
 * out by the caller.
 */
void
selnotify(struct selinfo *sip, int events, long knhint)
{
        selcluster_t *sc;
        uint64_t mask;
        int index, oflag;
        lwp_t *l;
        kmutex_t *lock;

        KNOTE(&sip->sel_klist, knhint);

        if (sip->sel_lwp != NULL) {
                /* One named LWP is waiting. */
                sc = sip->sel_cluster;
                lock = sc->sc_lock;
                mutex_spin_enter(lock);
                /* Still there? */
                if (sip->sel_lwp != NULL) {
                        /*
                         * Set the events for our LWP and indicate that.
                         * Otherwise, request for a full re-scan.
                         */
                        l = sip->sel_lwp;
                        oflag = l->l_selflag;

                        if (!direct_select) {
                                l->l_selflag = SEL_RESET;
                        } else if (!sel_setevents(l, sip, events)) {
                                /* No events to return. */
                                mutex_spin_exit(lock);
                                return;
                        }

                        /*
                         * If thread is sleeping, wake it up.  If it's not
                         * yet asleep, it will notice the change in state
                         * and will re-poll the descriptors.
                         */
                        if (oflag == SEL_BLOCKING && l->l_mutex == lock) {
                                KASSERT(l->l_wchan == sc);
                                sleepq_unsleep(l, false);
                        }
                }
                mutex_spin_exit(lock);
        }

        if ((mask = sip->sel_collision) != 0) {
                /*
                 * There was a collision (multiple waiters): we must
                 * inform all potentially interested waiters.
                 */
                sip->sel_collision = 0;
                do {
                        index = ffs64(mask) - 1;
                        mask ^= __BIT(index);
                        sc = selcluster[index];
                        lock = sc->sc_lock;
                        mutex_spin_enter(lock);
                        sc->sc_ncoll++;
                        sleepq_wake(&sc->sc_sleepq, sc, (u_int)-1, lock);
                } while (__predict_false(mask != 0));
        }
}

/*
 * Remove an LWP from all objects that it is waiting for.  Concurrency
 * issues:
 *
 * The object owner's (e.g. device driver) lock is not held here.  Calls
 * can be made to selrecord() and we do not synchronize against those
 * directly using locks.  However, we use `sel_lwp' to lock out changes.
 * Before clearing it we must use memory barriers to ensure that we can
 * safely traverse the list of selinfo records.
 */
static void
selclear(void)
{
        struct selinfo *sip, *next;
        selcluster_t *sc;
        lwp_t *l;
        kmutex_t *lock;

        l = curlwp;
        sc = l->l_selcluster;
        lock = sc->sc_lock;

        /*
         * If the request was non-blocking, or we found events on the first
         * descriptor, there will be no need to clear anything - avoid
         * taking the lock.
         */
        if (SLIST_EMPTY(&l->l_selwait)) {
                return;
        }

        mutex_spin_enter(lock);
        for (sip = SLIST_FIRST(&l->l_selwait); sip != NULL; sip = next) {
                KASSERT(sip->sel_lwp == l);
                KASSERT(sip->sel_cluster == l->l_selcluster);

                /*
                 * Read link to next selinfo record, if any.
                 * It's no longer safe to touch `sip' after clearing
                 * `sel_lwp', so ensure that the read of `sel_chain'
                 * completes before the clearing of sel_lwp becomes
                 * globally visible.
                 */
                next = SLIST_NEXT(sip, sel_chain);
                /* Release the record for another named waiter to use. */
                atomic_store_release(&sip->sel_lwp, NULL);
        }
        mutex_spin_exit(lock);
}

/*
 * Initialize the select/poll system calls.  Called once for each
 * CPU in the system, as they are attached.
 */
void
selsysinit(struct cpu_info *ci)
{
        selcluster_t *sc;
        u_int index;

        /* If already a cluster in place for this bit, re-use. */
        index = cpu_index(ci) & SELCLUSTERMASK;
        sc = selcluster[index];
        if (sc == NULL) {
                sc = kmem_alloc(roundup2(sizeof(selcluster_t),
                    coherency_unit) + coherency_unit, KM_SLEEP);
                sc = (void *)roundup2((uintptr_t)sc, coherency_unit);
                sc->sc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
                sleepq_init(&sc->sc_sleepq);
                sc->sc_ncoll = 0;
                sc->sc_mask = __BIT(index);
                selcluster[index] = sc;
        }
        ci->ci_data.cpu_selcluster = sc;
}

/*
 * Initialize a selinfo record.
 */
void
selinit(struct selinfo *sip)
{

        memset(sip, 0, sizeof(*sip));
        klist_init(&sip->sel_klist);
}

/*
 * Destroy a selinfo record.  The owning object must not gain new
 * references while this is in progress: all activity on the record
 * must be stopped.
 *
 * Concurrency issues: we only need guard against a call to selclear()
 * by a thread exiting sel_do_scan().  The caller has prevented further
 * references being made to the selinfo record via selrecord(), and it
 * will not call selnotify() again.
 */
void
seldestroy(struct selinfo *sip)
{
        selcluster_t *sc;
        kmutex_t *lock;
        lwp_t *l;

        klist_fini(&sip->sel_klist);

        if (sip->sel_lwp == NULL)
                return;

        /*
         * Lock out selclear().  The selcluster pointer can't change while
         * we are here since it is only ever changed in selrecord(),
         * and that will not be entered again for this record because
         * it is dying.
         */
        KASSERT(sip->sel_cluster != NULL);
        sc = sip->sel_cluster;
        lock = sc->sc_lock;
        mutex_spin_enter(lock);
        if ((l = sip->sel_lwp) != NULL) {
                /*
                 * This should rarely happen, so although SLIST_REMOVE()
                 * is slow, using it here is not a problem.
                 */
                KASSERT(l->l_selcluster == sc);
                SLIST_REMOVE(&l->l_selwait, sip, selinfo, sel_chain);
                sip->sel_lwp = NULL;
        }
        mutex_spin_exit(lock);
}

/*
 * System control nodes.
 */
SYSCTL_SETUP(sysctl_select_setup, "sysctl select setup")
{

        sysctl_createv(clog, 0, NULL, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "direct_select",
                SYSCTL_DESCR("Enable/disable direct select (for testing)"),
                NULL, 0, &direct_select, 0,
                CTL_KERN, CTL_CREATE, CTL_EOL);
}





























































   13 



   13 
   13 















































   13 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/*        $NetBSD: in4_cksum.c,v 1.20 2014/11/30 18:15:41 christos Exp $        */

/*-
 * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: in4_cksum.c,v 1.20 2014/11/30 18:15:41 christos Exp $");

#include <sys/param.h>
#include <sys/mbuf.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>

/*
 * Checksum of the IPv4 pseudo header.
 *
 * off is supposed to be the skipped IPv4 header, len is the payload size.
 */
#ifdef DIAGNOSTIC
#define PANIC(a,...)        panic(a, __VA_ARGS__)
#else
#define PANIC(a,...)        do { \
        printf(a, __VA_ARGS__); \
        return -1; \
} while (/*CONSTCOND*/0)
#endif

int
in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len)
{
        uint32_t sum;
        uint16_t *w;

        if (__predict_false(m->m_len < sizeof(struct ip)))
                PANIC("%s: mbuf %d too short for IP header %zu", __func__,
                    m->m_len, sizeof(struct ip));

        if (nxt == 0)
                return cpu_in_cksum(m, len, off, 0);

        if (__predict_false(off < sizeof(struct ip)))
                PANIC("%s: offset %d too short for IP header %zu", __func__,
                    off, sizeof(struct ip));

        /*
         * Compute the equivalent of:
         * struct ipovly ip;
         *
         * bzero(sizeof(*ip));
         * ip.ih_pr = nxt;
         * ip.ip_len = htons(len);
         * ip.ih_src = mtod(m, struct ip *)->ip_src;
         * ip.ih_dst = mtod(m, struct ip *)->ip_dst;
         * sum = one_add(&ip);
         */

#if BYTE_ORDER == LITTLE_ENDIAN
        sum = ((len & 0xffff) + nxt) << 8;
#else
        sum = (len & 0xffff) + nxt;
#endif
        w = (uint16_t *)(mtod(m, char  *) + offsetof(struct ip, ip_src));
        if (__predict_true((uintptr_t)w % 2 == 0)) {
                sum += w[0];
                sum += w[1];
                sum += w[2];
                sum += w[3];
        } else {
                uint32_t partial;
                w = (void *)((uintptr_t)w - 1);
#if BYTE_ORDER == LITTLE_ENDIAN
                partial = w[0] & 0xff00;
#else
                partial = w[0] & 0x00ff;
#endif
                partial += w[1];
                partial += w[2];
                partial += w[3];
#if BYTE_ORDER == LITTLE_ENDIAN
                partial += w[4] & 0x00ff;
#else
                partial += w[4] & 0xff00;
#endif
                sum += partial << 8;
        }

        return cpu_in_cksum(m, len, off, sum);
}

























































































































































































































  415 


   67 


    1 

























   40 
   39 









































  414 









    3 





   67 




   66 













   67 








   67 








    3 





    3 





    3 









   15 





   14 





   14 





   14 





   13 






   13 



























   18 
































   57 





   55 










   55 
   52 




































































    1 













    3 

    7 














    2 





    2 









    1 



    1 






















    3 






    2 




    2 


    2 

    2 


    2 

    2 


    2 

    2 


    2 

    2 


    2 

    2 
    3 

    2 








    3 






    2 


    2 
















    3 





    2 



    2 

    1 



    1 



    1 









    1 
    1 


    1 


    1 










    1 
    1 
    1 



    1 


    1 










    3 






    3 

    2 



    2 
    1 



    1 









    1 
    1 


    1 




    1 




    1 



    1 

    2 

    2 

















    3 



    3 

    3 

    1 

    1 









    3 



















    3 

















    3 

    1 
    1 














    2 

    2 
    2 






    3 



    3 













    3 















    8 






    6 
    1 

    5 
    7 





   14 

   15 










   18 








    2 



    2 



    2 


    3 

    3 
    2 







    6 
















    3 






    4 


    4 



















    6 
    6 









    5 

    4 
    5 






   70 


















    4 








   63 


   63 







   63 





   42 





   27 






    3 















    4 



    3 















    2 



    3 

    1 














    3 















    9 
    2 





    7 






    9 











    7 





    7 








    2 




    1 
































    1 








    6 
    6 

























    6 





    5 
    5 









    5 

    5 





    5 
    4 



    5 
    1 












    1 


    1 
    1 

    1 

    7 




    1 













    2 



    1 

    1 






















   13 




















    1 












    2 



    1 



    3 



    4 















    3 




   64 

   35 


   66 
   61 
   65 
   63 





























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
/*        $NetBSD: sys_ptrace_common.c,v 1.92 2021/08/09 20:49:10 andvar Exp $        */

/*-
 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        from: @(#)sys_process.c        8.1 (Berkeley) 6/10/93
 */

/*-
 * Copyright (c) 1993 Jan-Simon Pendry.
 * Copyright (c) 1994 Christopher G. Demetriou.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Berkeley and its contributors.
 * 4. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        from: @(#)sys_process.c        8.1 (Berkeley) 6/10/93
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_ptrace_common.c,v 1.92 2021/08/09 20:49:10 andvar Exp $");

#ifdef _KERNEL_OPT
#include "opt_ptrace.h"
#include "opt_ktrace.h"
#include "opt_pax.h"
#include "opt_compat_netbsd32.h"
#endif

#if defined(__HAVE_COMPAT_NETBSD32) && !defined(COMPAT_NETBSD32) \
    && !defined(_RUMPKERNEL)
#define COMPAT_NETBSD32
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/exec.h>
#include <sys/pax.h>
#include <sys/ptrace.h>
#include <sys/uio.h>
#include <sys/ras.h>
#include <sys/kmem.h>
#include <sys/kauth.h>
#include <sys/mount.h>
#include <sys/syscallargs.h>
#include <sys/module.h>
#include <sys/condvar.h>
#include <sys/mutex.h>
#include <sys/compat_stub.h>

#include <uvm/uvm_extern.h>

#include <machine/reg.h>

# ifdef PTRACE_DEBUG
#  define DPRINTF(a) uprintf a
# else
#  define DPRINTF(a)
# endif

static kauth_listener_t ptrace_listener;
static int process_auxv_offset(struct proc *, struct uio *);

extern int user_va0_disable;

#if 0
static int ptrace_cbref;
static kmutex_t ptrace_mtx;
static kcondvar_t ptrace_cv;
#endif

#ifdef PT_GETREGS
# define case_PT_GETREGS        case PT_GETREGS:
#else
# define case_PT_GETREGS
#endif

#ifdef PT_SETREGS
# define case_PT_SETREGS        case PT_SETREGS:
#else
# define case_PT_SETREGS
#endif

#ifdef PT_GETFPREGS
# define case_PT_GETFPREGS        case PT_GETFPREGS:
#else
# define case_PT_GETFPREGS
#endif

#ifdef PT_SETFPREGS
# define case_PT_SETFPREGS        case PT_SETFPREGS:
#else
# define case_PT_SETFPREGS
#endif

#ifdef PT_GETDBREGS
# define case_PT_GETDBREGS        case PT_GETDBREGS:
#else
# define case_PT_GETDBREGS
#endif

#ifdef PT_SETDBREGS
# define case_PT_SETDBREGS        case PT_SETDBREGS:
#else
# define case_PT_SETDBREGS
#endif

static int
ptrace_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        struct proc *p;
        int result;
#ifdef PT_SETDBREGS
        extern int user_set_dbregs;
#endif

        result = KAUTH_RESULT_DEFER;
        p = arg0;

#if 0
        mutex_enter(&ptrace_mtx);
        ptrace_cbref++;
        mutex_exit(&ptrace_mtx);
#endif
        if (action != KAUTH_PROCESS_PTRACE)
                goto out;

        switch ((u_long)arg1) {
#ifdef PT_SETDBREGS
        case_PT_SETDBREGS
                if (kauth_cred_getuid(cred) != 0 && user_set_dbregs == 0) {
                        result = KAUTH_RESULT_DENY;
                        break;
                }
#endif
                /* FALLTHROUGH */
        case PT_TRACE_ME:
        case PT_ATTACH:
        case PT_WRITE_I:
        case PT_WRITE_D:
        case PT_READ_I:
        case PT_READ_D:
        case PT_IO:
        case_PT_GETREGS
        case_PT_SETREGS
        case_PT_GETFPREGS
        case_PT_SETFPREGS
        case_PT_GETDBREGS
        case PT_SET_EVENT_MASK:
        case PT_GET_EVENT_MASK:
        case PT_GET_PROCESS_STATE:
        case PT_SET_SIGINFO:
        case PT_GET_SIGINFO:
#ifdef __HAVE_PTRACE_MACHDEP
        PTRACE_MACHDEP_REQUEST_CASES
#endif
                if (kauth_cred_getuid(cred) != kauth_cred_getuid(p->p_cred) ||
                    ISSET(p->p_flag, PK_SUGID)) {
                        break;
                }

                result = KAUTH_RESULT_ALLOW;

        break;

#ifdef PT_STEP
        case PT_STEP:
        case PT_SETSTEP:
        case PT_CLEARSTEP:
#endif
        case PT_CONTINUE:
        case PT_KILL:
        case PT_DETACH:
        case PT_LWPINFO:
        case PT_SYSCALL:
        case PT_SYSCALLEMU:
        case PT_DUMPCORE:
        case PT_RESUME:
        case PT_SUSPEND:
        case PT_STOP:
        case PT_LWPSTATUS:
        case PT_LWPNEXT:
        case PT_SET_SIGPASS:
        case PT_GET_SIGPASS:
                result = KAUTH_RESULT_ALLOW;
                break;

        default:
                break;
        }

 out:
#if 0
        mutex_enter(&ptrace_mtx);
        if (--ptrace_cbref == 0)
                cv_broadcast(&ptrace_cv);
        mutex_exit(&ptrace_mtx);
#endif

        return result;
}

static struct proc *
ptrace_find(struct lwp *l, int req, pid_t pid)
{
        struct proc *t;

        /* "A foolish consistency..." XXX */
        if (req == PT_TRACE_ME) {
                t = l->l_proc;
                mutex_enter(t->p_lock);
                return t;
        }

        /* Find the process we're supposed to be operating on. */
        t = proc_find(pid);
        if (t == NULL)
                return NULL;

        /* XXX-elad */
        mutex_enter(t->p_lock);
        int error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
            t, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
        if (error) {
                mutex_exit(t->p_lock);
                return NULL;
        }
        return t;
}

static int
ptrace_allowed(struct lwp *l, int req, struct proc *t, struct proc *p,
    bool *locked)
{
        *locked = false;

        /*
         * Grab a reference on the process to prevent it from execing or
         * exiting.
         */
        if (!rw_tryenter(&t->p_reflock, RW_READER))
                return EBUSY;

        *locked = true;

        /* Make sure we can operate on it. */
        switch (req) {
        case PT_TRACE_ME:
                /*
                 * You can't say to the parent of a process to start tracing if:
                 *        (1) the parent is initproc,
                 */
                if (p->p_pptr == initproc)
                        return EPERM;

                /*
                 *        (2) the process is initproc, or
                 */
                if (p == initproc)
                        return EPERM;

                /*
                 *        (3) the child is already traced.
                 */
                if (ISSET(p->p_slflag, PSL_TRACED))
                        return EBUSY;

                return 0;

        case PT_ATTACH:
                /*
                 * You can't attach to a process if:
                 *        (1) it's the process that's doing the attaching,
                 */
                if (t == p)
                        return EINVAL;

                /*
                 *        (2) it's a system process,
                 */
                if (t->p_flag & PK_SYSTEM)
                        return EPERM;

                /*
                 *        (3) the tracer is initproc,
                 */
                if (p == initproc)
                        return EPERM;

                /*
                 *        (4) it's already being traced,
                 */
                if (ISSET(t->p_slflag, PSL_TRACED))
                        return EBUSY;

                /*
                 *        (5) it's a vfork(2)ed parent of the current process, or
                 */
                if (ISSET(p->p_lflag, PL_PPWAIT) && p->p_pptr == t)
                        return EPERM;

                /*
                 *         (6) the tracer is chrooted, and its root directory is
                 *             not at or above the root directory of the tracee
                 */
                mutex_exit(t->p_lock);        /* XXXSMP */
                int tmp = proc_isunder(t, l);
                mutex_enter(t->p_lock);        /* XXXSMP */
                if (!tmp)
                        return EPERM;
                return 0;

        case PT_READ_I:
        case PT_READ_D:
        case PT_WRITE_I:
        case PT_WRITE_D:
        case PT_IO:
        case PT_SET_SIGINFO:
        case PT_GET_SIGINFO:
        case_PT_GETREGS
        case_PT_SETREGS
        case_PT_GETFPREGS
        case_PT_SETFPREGS
        case_PT_GETDBREGS
        case_PT_SETDBREGS
#ifdef __HAVE_PTRACE_MACHDEP
        PTRACE_MACHDEP_REQUEST_CASES
#endif
                /*
                 * You can't read/write the memory or registers of a process
                 * if the tracer is chrooted, and its root directory is not at
                 * or above the root directory of the tracee.
                 */
                mutex_exit(t->p_lock);        /* XXXSMP */
                tmp = proc_isunder(t, l);
                mutex_enter(t->p_lock);        /* XXXSMP */
                if (!tmp)
                        return EPERM;
                /*FALLTHROUGH*/

        case PT_CONTINUE:
        case PT_KILL:
        case PT_DETACH:
        case PT_LWPINFO:
        case PT_SYSCALL:
        case PT_SYSCALLEMU:
        case PT_DUMPCORE:
#ifdef PT_STEP
        case PT_STEP:
        case PT_SETSTEP:
        case PT_CLEARSTEP:
#endif
        case PT_SET_EVENT_MASK:
        case PT_GET_EVENT_MASK:
        case PT_GET_PROCESS_STATE:
        case PT_RESUME:
        case PT_SUSPEND:
        case PT_STOP:
        case PT_LWPSTATUS:
        case PT_LWPNEXT:
        case PT_SET_SIGPASS:
        case PT_GET_SIGPASS:
                /*
                 * You can't do what you want to the process if:
                 *        (1) It's not being traced at all,
                 */
                if (!ISSET(t->p_slflag, PSL_TRACED))
                        return EPERM;

                /*
                 *        (2) it's not being traced by _you_, or
                 */
                if (t->p_pptr != p) {
                        DPRINTF(("parent %d != %d\n", t->p_pptr->p_pid,
                            p->p_pid));
                        return EBUSY;
                }

                /*
                 *        (3) it's not currently stopped.
                 *
                 *        As an exception allow PT_KILL and PT_STOP here.
                 */
                if (req != PT_KILL && req != PT_STOP &&
                    (t->p_stat != SSTOP || !t->p_waited /* XXXSMP */)) {
                        DPRINTF(("stat %d flag %d\n", t->p_stat,
                            !t->p_waited));
                        return EBUSY;
                }
                return 0;

        default:                        /* It was not a legal request. */
                return EINVAL;
        }
}

static int
ptrace_needs_hold(int req)
{
        switch (req) {
#ifdef PT_STEP
        case PT_STEP:
#endif
        case PT_CONTINUE:
        case PT_DETACH:
        case PT_KILL:
        case PT_SYSCALL:
        case PT_SYSCALLEMU:
        case PT_ATTACH:
        case PT_TRACE_ME:
        case PT_GET_SIGINFO:
        case PT_SET_SIGINFO:
        case PT_STOP:
                return 1;
        default:
                return 0;
        }
}

static int
ptrace_get_siginfo(struct proc *t, struct ptrace_methods *ptm, void *addr,
    size_t data)
{
        struct ptrace_siginfo psi;

        memset(&psi, 0, sizeof(psi));
        psi.psi_siginfo._info = t->p_sigctx.ps_info;
        psi.psi_lwpid = t->p_sigctx.ps_lwp;
        DPRINTF(("%s: lwp=%d signal=%d\n", __func__, psi.psi_lwpid,
            psi.psi_siginfo.si_signo));

        return ptm->ptm_copyout_siginfo(&psi, addr, data);
}

static int
ptrace_set_siginfo(struct proc *t, struct lwp **lt, struct ptrace_methods *ptm,
    void *addr, size_t data)
{
        struct ptrace_siginfo psi;

        int error = ptm->ptm_copyin_siginfo(&psi, addr, data);
        if (error)
                return error;

        /* Check that the data is a valid signal number or zero. */
        if (psi.psi_siginfo.si_signo < 0 || psi.psi_siginfo.si_signo >= NSIG)
                return EINVAL;

        t->p_sigctx.ps_faked = true;
        t->p_sigctx.ps_info = psi.psi_siginfo._info;
        t->p_sigctx.ps_lwp = psi.psi_lwpid;
        DPRINTF(("%s: lwp=%d signal=%d\n", __func__, psi.psi_lwpid,
            psi.psi_siginfo.si_signo));
        return 0;
}

static int
ptrace_get_sigpass(struct proc *t, void *addr, size_t data)
{
        sigset_t set;

        if (data > sizeof(set) || data <= 0) {
                DPRINTF(("%s: invalid data: %zu < %zu <= 0\n",
                        __func__, sizeof(set), data));
                return EINVAL;
        }

        set = t->p_sigctx.ps_sigpass;

        return copyout(&set, addr, data);
}

static int
ptrace_set_sigpass(struct proc *t, void *addr, size_t data)
{
        sigset_t set;
        int error;

        if (data > sizeof(set) || data <= 0) {
                DPRINTF(("%s: invalid data: %zu < %zu <= 0\n",
                        __func__, sizeof(set), data));
                return EINVAL;
        }

        memset(&set, 0, sizeof(set));

        if ((error = copyin(addr, &set, data)))
                return error;

        /* We catch SIGSTOP and cannot intercept SIGKILL. */
        sigminusset(&sigcantmask, &set);

        t->p_sigctx.ps_sigpass = set;

        return 0;
}

static int
ptrace_get_event_mask(struct proc *t, void *addr, size_t data)
{
        struct ptrace_event pe;

        if (data != sizeof(pe)) {
                DPRINTF(("%s: %zu != %zu\n", __func__, data, sizeof(pe)));
                return EINVAL;
        }
        memset(&pe, 0, sizeof(pe));
        pe.pe_set_event = ISSET(t->p_slflag, PSL_TRACEFORK) ?
            PTRACE_FORK : 0;
        pe.pe_set_event |= ISSET(t->p_slflag, PSL_TRACEVFORK) ?
            PTRACE_VFORK : 0;
        pe.pe_set_event |= ISSET(t->p_slflag, PSL_TRACEVFORK_DONE) ?
            PTRACE_VFORK_DONE : 0;
        pe.pe_set_event |= ISSET(t->p_slflag, PSL_TRACELWP_CREATE) ?
            PTRACE_LWP_CREATE : 0;
        pe.pe_set_event |= ISSET(t->p_slflag, PSL_TRACELWP_EXIT) ?
            PTRACE_LWP_EXIT : 0;
        pe.pe_set_event |= ISSET(t->p_slflag, PSL_TRACEPOSIX_SPAWN) ?
            PTRACE_POSIX_SPAWN : 0;
        DPRINTF(("%s: lwp=%d event=%#x\n", __func__,
            t->p_sigctx.ps_lwp, pe.pe_set_event));
        return copyout(&pe, addr, sizeof(pe));
}

static int
ptrace_set_event_mask(struct proc *t, void *addr, size_t data)
{
        struct ptrace_event pe;
        int error;

        if (data != sizeof(pe)) {
                DPRINTF(("%s: %zu != %zu\n", __func__, data, sizeof(pe)));
                return EINVAL;
        }
        if ((error = copyin(addr, &pe, sizeof(pe))) != 0)
                return error;

        DPRINTF(("%s: lwp=%d event=%#x\n", __func__,
            t->p_sigctx.ps_lwp, pe.pe_set_event));
        if (pe.pe_set_event & PTRACE_FORK)
                SET(t->p_slflag, PSL_TRACEFORK);
        else
                CLR(t->p_slflag, PSL_TRACEFORK);

        if (pe.pe_set_event & PTRACE_VFORK)
                SET(t->p_slflag, PSL_TRACEVFORK);
        else
                CLR(t->p_slflag, PSL_TRACEVFORK);

        if (pe.pe_set_event & PTRACE_VFORK_DONE)
                SET(t->p_slflag, PSL_TRACEVFORK_DONE);
        else
                CLR(t->p_slflag, PSL_TRACEVFORK_DONE);

        if (pe.pe_set_event & PTRACE_LWP_CREATE)
                SET(t->p_slflag, PSL_TRACELWP_CREATE);
        else
                CLR(t->p_slflag, PSL_TRACELWP_CREATE);

        if (pe.pe_set_event & PTRACE_LWP_EXIT)
                SET(t->p_slflag, PSL_TRACELWP_EXIT);
        else
                CLR(t->p_slflag, PSL_TRACELWP_EXIT);

        if (pe.pe_set_event & PTRACE_POSIX_SPAWN)
                SET(t->p_slflag, PSL_TRACEPOSIX_SPAWN);
        else
                CLR(t->p_slflag, PSL_TRACEPOSIX_SPAWN);

        return 0;
}

static int
ptrace_get_process_state(struct proc *t, void *addr, size_t data)
{
        struct _ksiginfo *si;
        struct ptrace_state ps;

        if (data != sizeof(ps)) {
                DPRINTF(("%s: %zu != %zu\n", __func__, data, sizeof(ps)));
                return EINVAL;
        }

        if (t->p_sigctx.ps_info._signo != SIGTRAP ||
            (t->p_sigctx.ps_info._code != TRAP_CHLD &&
                t->p_sigctx.ps_info._code != TRAP_LWP)) {
                memset(&ps, 0, sizeof(ps));
        } else {
                si = &t->p_sigctx.ps_info;

                KASSERT(si->_reason._ptrace_state._pe_report_event > 0);
                KASSERT(si->_reason._ptrace_state._option._pe_other_pid > 0);

                ps.pe_report_event = si->_reason._ptrace_state._pe_report_event;

                CTASSERT(sizeof(ps.pe_other_pid) == sizeof(ps.pe_lwp));
                ps.pe_other_pid =
                        si->_reason._ptrace_state._option._pe_other_pid;
        }

        DPRINTF(("%s: lwp=%d event=%#x pid=%d lwp=%d\n", __func__,
            t->p_sigctx.ps_lwp, ps.pe_report_event,
            ps.pe_other_pid, ps.pe_lwp));
        return copyout(&ps, addr, sizeof(ps));
}

static int
ptrace_lwpinfo(struct proc *t, struct lwp **lt, void *addr, size_t data)
{
        struct ptrace_lwpinfo pl;

        if (data != sizeof(pl)) {
                DPRINTF(("%s: %zu != %zu\n", __func__, data, sizeof(pl)));
                return EINVAL;
        }
        int error = copyin(addr, &pl, sizeof(pl));
        if (error)
                return error;

        lwpid_t tmp = pl.pl_lwpid;
        lwp_delref(*lt);
        mutex_enter(t->p_lock);
        if (tmp == 0)
                *lt = lwp_find_first(t);
        else {
                *lt = lwp_find(t, tmp);
                if (*lt == NULL) {
                        mutex_exit(t->p_lock);
                        return ESRCH;
                }
                *lt = LIST_NEXT(*lt, l_sibling);
        }

        while (*lt != NULL && (!lwp_alive(*lt) ||
               ((*lt)->l_flag & LW_SYSTEM) != 0))
                *lt = LIST_NEXT(*lt, l_sibling);

        pl.pl_lwpid = 0;
        pl.pl_event = 0;
        if (*lt) {
                lwp_addref(*lt);
                pl.pl_lwpid = (*lt)->l_lid;

                if ((*lt)->l_flag & LW_WSUSPEND)
                        pl.pl_event = PL_EVENT_SUSPENDED;
                /*
                 * If we match the lwp, or it was sent to every lwp,
                 * we set PL_EVENT_SIGNAL.
                 * XXX: ps_lwp == 0 means everyone and noone, so
                 * check ps_signo too.
                 */
                else if ((*lt)->l_lid == t->p_sigctx.ps_lwp
                         || (t->p_sigctx.ps_lwp == 0 &&
                             t->p_sigctx.ps_info._signo)) {
                        DPRINTF(("%s: lwp=%d siglwp=%d signo %d\n", __func__,
                            pl.pl_lwpid, t->p_sigctx.ps_lwp,
                            t->p_sigctx.ps_info._signo));
                        pl.pl_event = PL_EVENT_SIGNAL;
                }
        }
        mutex_exit(t->p_lock);
        DPRINTF(("%s: lwp=%d event=%#x\n", __func__,
            pl.pl_lwpid, pl.pl_event));

        return copyout(&pl, addr, sizeof(pl));
}

static int
ptrace_lwpstatus(struct proc *t, struct ptrace_methods *ptm, struct lwp **lt,
    void *addr, size_t data, bool next)
{
        struct ptrace_lwpstatus pls;
        struct lwp *l;
        int error;

        if (data > sizeof(pls) || data < sizeof(lwpid_t)) {
                DPRINTF(("%s: invalid data: %zu < %zu < %zu\n",
                        __func__, sizeof(lwpid_t), data, sizeof(pls)));
                return EINVAL;
        }
        error = copyin(addr, &pls.pl_lwpid, sizeof(lwpid_t));
        if (error)
                return error;

        if (next) {
                lwp_delref(*lt);
                lwpid_t tmp = pls.pl_lwpid;
                mutex_enter(t->p_lock);
                if (tmp == 0)
                        *lt = lwp_find_first(t);
                else {
                        *lt = lwp_find(t, tmp);
                        if (*lt == NULL) {
                                mutex_exit(t->p_lock);
                                return ESRCH;
                        }
                        *lt = LIST_NEXT(*lt, l_sibling);
                }

                while (*lt != NULL && (!lwp_alive(*lt) ||
                       ((*lt)->l_flag & LW_SYSTEM) != 0))
                        *lt = LIST_NEXT(*lt, l_sibling);

                if (*lt == NULL) {
                        memset(&pls, 0, sizeof(pls));
                        mutex_exit(t->p_lock);
                        goto out;
                }
                lwp_addref(*lt);
                mutex_exit(t->p_lock);

                pls.pl_lwpid = (*lt)->l_lid;
        } else {
                if ((error = ptrace_update_lwp(t, lt, pls.pl_lwpid)) != 0)
                        return error;
        }

        l = *lt;

        ptrace_read_lwpstatus(l, &pls);

out:
        DPRINTF(("%s: lwp=%d sigpend=%02x%02x%02x%02x sigmask=%02x%02x%02x%02x "
           "name='%s' private=%p\n", __func__, pls.pl_lwpid,
            pls.pl_sigpend.__bits[0], pls.pl_sigpend.__bits[1],
            pls.pl_sigpend.__bits[2], pls.pl_sigpend.__bits[3],
            pls.pl_sigmask.__bits[0], pls.pl_sigmask.__bits[1],
            pls.pl_sigmask.__bits[2], pls.pl_sigmask.__bits[3],
            pls.pl_name, pls.pl_private));

        return ptm->ptm_copyout_lwpstatus(&pls, addr, data);
}

static int
ptrace_startstop(struct proc *t, struct lwp **lt, int rq, void *addr,
    size_t data)
{
        int error;

        if ((error = ptrace_update_lwp(t, lt, data)) != 0)
                return error;

        DPRINTF(("%s: lwp=%d request=%d\n", __func__, (*lt)->l_lid, rq));
        lwp_lock(*lt);
        if (rq == PT_SUSPEND)
                (*lt)->l_flag |= LW_DBGSUSPEND;
        else {
                (*lt)->l_flag &= ~LW_DBGSUSPEND;
                if ((*lt)->l_flag != LSSUSPENDED)
                        (*lt)->l_stat = LSSTOP;
        }
        lwp_unlock(*lt);
        return 0;
}

#ifdef PT_REGISTERS
static int
ptrace_uio_dir(int req)
{
        switch (req) {
        case_PT_GETREGS
        case_PT_GETFPREGS
        case_PT_GETDBREGS
                return UIO_READ;
        case_PT_SETREGS
        case_PT_SETFPREGS
        case_PT_SETDBREGS
                return UIO_WRITE;
        default:
                return -1;
        }
}

static int
ptrace_regs(struct lwp *l, struct lwp **lt, int rq, struct ptrace_methods *ptm,
    void *addr, size_t data)
{
        int error;
        struct proc *p, *t;
        struct vmspace *vm;

        p = l->l_proc;                /* tracer */
        t = (*lt)->l_proc;        /* traced */

        if ((error = ptrace_update_lwp(t, lt, data)) != 0)
                return error;

        int dir = ptrace_uio_dir(rq);
        size_t size;
        int (*func)(struct lwp *, struct lwp *, struct uio *);

        DPRINTF(("%s: lwp=%d request=%d\n", __func__, l->l_lid, rq));

        switch (rq) {
#if defined(PT_SETREGS) || defined(PT_GETREGS)
        case_PT_GETREGS
        case_PT_SETREGS
                if (!process_validregs(*lt))
                        return EINVAL;
                size = PROC_REGSZ(p);
                func = ptm->ptm_doregs;
                break;
#endif
#if defined(PT_SETFPREGS) || defined(PT_GETFPREGS)
        case_PT_GETFPREGS
        case_PT_SETFPREGS
                if (!process_validfpregs(*lt))
                        return EINVAL;
                size = PROC_FPREGSZ(p);
                func = ptm->ptm_dofpregs;
                break;
#endif
#if defined(PT_SETDBREGS) || defined(PT_GETDBREGS)
        case_PT_GETDBREGS
        case_PT_SETDBREGS
                if (!process_validdbregs(*lt))
                        return EINVAL;
                size = PROC_DBREGSZ(p);
                func = ptm->ptm_dodbregs;
                break;
#endif
        default:
                return EINVAL;
        }

        error = proc_vmspace_getref(l->l_proc, &vm);
        if (error)
                return error;

        struct uio uio;
        struct iovec iov;

        iov.iov_base = addr;
        iov.iov_len = size;
        uio.uio_iov = &iov;
        uio.uio_iovcnt = 1;
        uio.uio_offset = 0;
        uio.uio_resid = iov.iov_len;
        uio.uio_rw = dir;
        uio.uio_vmspace = vm;

        error = (*func)(l, *lt, &uio);
        uvmspace_free(vm);
        return error;
}
#endif

static int
ptrace_sendsig(struct lwp *l, int req, struct proc *t, struct lwp *lt, int signo, int resume_all)
{
        ksiginfo_t ksi;

        /* Finally, deliver the requested signal (or none). */
        if (t->p_stat == SSTOP) {
                /*
                 * Unstop the process.  If it needs to take a
                 * signal, make all efforts to ensure that at
                 * an LWP runs to see it.
                 */
                t->p_xsig = signo;

                /*
                 * signo > 0 check prevents a potential panic, as
                 * sigismember(&...,0) is invalid check and signo
                 * can be equal to 0 as a special case of no-signal.
                 */
                if (signo > 0 && sigismember(&stopsigmask, signo)) {
                        t->p_waited = 0;
                        child_psignal(t, 0);
                } else if (resume_all)
                        proc_unstop(t);
                else
                        lwp_unstop(lt);
                return 0;
        }

        KASSERT(req == PT_KILL || req == PT_STOP || req == PT_ATTACH);

        KSI_INIT(&ksi);
        ksi.ksi_signo = signo;
        ksi.ksi_code = SI_USER;
        ksi.ksi_pid = l->l_proc->p_pid;
        ksi.ksi_uid = kauth_cred_geteuid(l->l_cred);

        t->p_sigctx.ps_faked = false;

        DPRINTF(("%s: pid=%d.%d signal=%d resume_all=%d\n", __func__, t->p_pid,
            lt->l_lid, signo, resume_all));

        return kpsignal2(t, &ksi);
}

static int
ptrace_dumpcore(struct lwp *lt, char *path, size_t len)
{
        int error;
        if (path != NULL) {

                if (len >= MAXPATHLEN)
                        return EINVAL;

                char *src = path;
                path = kmem_alloc(len + 1, KM_SLEEP);
                error = copyin(src, path, len);
                if (error)
                        goto out;
                path[len] = '\0';
        }
        DPRINTF(("%s: lwp=%d\n", __func__, lt->l_lid));
        MODULE_HOOK_CALL(coredump_hook, (lt, path), 0, error);
out:
        if (path)
                kmem_free(path, len + 1);
        return error;
}

static int
ptrace_doio(struct lwp *l, struct proc *t, struct lwp *lt,
    struct ptrace_io_desc *piod, void *addr, bool sysspace)
{
        struct uio uio;
        struct iovec iov;
        int error, tmp;

        error = 0;
        iov.iov_base = piod->piod_addr;
        iov.iov_len = piod->piod_len;
        uio.uio_iov = &iov;
        uio.uio_iovcnt = 1;
        uio.uio_offset = (off_t)(unsigned long)piod->piod_offs;
        uio.uio_resid = piod->piod_len;

        DPRINTF(("%s: lwp=%d request=%d\n", __func__, l->l_lid, piod->piod_op));

        switch (piod->piod_op) {
        case PIOD_READ_D:
        case PIOD_READ_I:
                uio.uio_rw = UIO_READ;
                break;
        case PIOD_WRITE_D:
        case PIOD_WRITE_I:
                /*
                 * Can't write to a RAS
                 */
                if (ras_lookup(t, addr) != (void *)-1) {
                        return EACCES;
                }
                uio.uio_rw = UIO_WRITE;
                break;
        case PIOD_READ_AUXV:
                uio.uio_rw = UIO_READ;
                tmp = t->p_execsw->es_arglen;
                if (uio.uio_offset > tmp)
                        return EIO;
                if (uio.uio_resid > tmp - uio.uio_offset)
                        uio.uio_resid = tmp - uio.uio_offset;
                piod->piod_len = iov.iov_len = uio.uio_resid;
                error = process_auxv_offset(t, &uio);
                break;
        default:
                error = EINVAL;
                break;
        }

        if (error)
                return error;

        if (sysspace) {
                uio.uio_vmspace = vmspace_kernel();
        } else {
                error = proc_vmspace_getref(l->l_proc, &uio.uio_vmspace);
                if (error)
                        return error;
        }

        error = process_domem(l, lt, &uio);
        if (!sysspace)
                uvmspace_free(uio.uio_vmspace);
        if (error)
                return error;
        piod->piod_len -= uio.uio_resid;
        return 0;
}

int
do_ptrace(struct ptrace_methods *ptm, struct lwp *l, int req, pid_t pid,
    void *addr, int data, register_t *retval)
{
        struct proc *p = l->l_proc;
        struct lwp *lt = NULL;
        struct lwp *lt2;
        struct proc *t;                                /* target process */
        struct ptrace_io_desc piod;
        int error, write, tmp, pheld;
        int signo = 0;
        int resume_all;
        bool locked;
        error = 0;

        /*
         * If attaching or detaching, we need to get a write hold on the
         * proclist lock so that we can re-parent the target process.
         */
        mutex_enter(&proc_lock);

        t = ptrace_find(l, req, pid);
        if (t == NULL) {
                mutex_exit(&proc_lock);
                return ESRCH;
        }

        pheld = 1;
        if ((error = ptrace_allowed(l, req, t, p, &locked)) != 0)
                goto out;

        if ((error = kauth_authorize_process(l->l_cred,
            KAUTH_PROCESS_PTRACE, t, KAUTH_ARG(req), NULL, NULL)) != 0)
                goto out;

        if ((lt = lwp_find_first(t)) == NULL) {
            error = ESRCH;
            goto out;
        }

        /* Do single-step fixup if needed. */
        FIX_SSTEP(t);
        KASSERT(lt != NULL);
        lwp_addref(lt);

        /*
         * Which locks do we need held? XXX Ugly.
         */
        if ((pheld = ptrace_needs_hold(req)) == 0) {
                mutex_exit(t->p_lock);
                mutex_exit(&proc_lock);
        }

        /* Now do the operation. */
        write = 0;
        *retval = 0;
        tmp = 0;
        resume_all = 1;

        switch (req) {
        case PT_TRACE_ME:
                /* Just set the trace flag. */
                SET(t->p_slflag, PSL_TRACED);
                t->p_opptr = t->p_pptr;
                break;

        /*
         * The I and D separate address space has been inherited from PDP-11.
         * The 16-bit UNIX started with a single address space per program,
         * but was extended to two 16-bit (2 x 64kb) address spaces.
         *
         * We no longer maintain this feature in maintained architectures, but
         * we keep the API for backward compatibility. Currently the I and D
         * operations are exactly the same and not distinguished in debuggers.
         */
        case PT_WRITE_I:
        case PT_WRITE_D:
                write = 1;
                tmp = data;
                /* FALLTHROUGH */
        case PT_READ_I:
        case PT_READ_D:
                piod.piod_addr = &tmp;
                piod.piod_len = sizeof(tmp);
                piod.piod_offs = addr;
                piod.piod_op = write ? PIOD_WRITE_D : PIOD_READ_D;
                if ((error = ptrace_doio(l, t, lt, &piod, addr, true)) != 0)
                        break;
                /*
                 * For legacy reasons we treat here two results as success:
                 *  - incomplete transfer  piod.piod_len < sizeof(tmp)
                 *  - no transfer          piod.piod_len == 0
                 *
                 * This means that there is no way to determine whether
                 * transfer operation was performed in PT_WRITE and PT_READ
                 * calls.
                 */
                if (!write)
                        *retval = tmp;
                break;

        case PT_IO:
                if ((error = ptm->ptm_copyin_piod(&piod, addr, data)) != 0)
                        break;
                if (piod.piod_len < 1) {
                        error = EINVAL;
                        break;
                }
                if ((error = ptrace_doio(l, t, lt, &piod, addr, false)) != 0)
                        break;
                /*
                 * For legacy reasons we treat here two results as success:
                 *  - incomplete transfer  piod.piod_len < sizeof(tmp)
                 *  - no transfer          piod.piod_len == 0
                 */
                error = ptm->ptm_copyout_piod(&piod, addr, data);
                break;

        case PT_DUMPCORE:
                error = ptrace_dumpcore(lt, addr, data);
                break;

#ifdef PT_STEP
        case PT_STEP:
                /*
                 * From the 4.4BSD PRM:
                 * "Execution continues as in request PT_CONTINUE; however
                 * as soon as possible after execution of at least one
                 * instruction, execution stops again. [ ... ]"
                 */
#endif
        case PT_CONTINUE:
        case PT_SYSCALL:
        case PT_DETACH:
                if (req == PT_SYSCALL) {
                        if (!ISSET(t->p_slflag, PSL_SYSCALL)) {
                                SET(t->p_slflag, PSL_SYSCALL);
#ifdef __HAVE_SYSCALL_INTERN
                                (*t->p_emul->e_syscall_intern)(t);
#endif
                        }
                } else {
                        if (ISSET(t->p_slflag, PSL_SYSCALL)) {
                                CLR(t->p_slflag, PSL_SYSCALL);
#ifdef __HAVE_SYSCALL_INTERN
                                (*t->p_emul->e_syscall_intern)(t);
#endif
                        }
                }
                t->p_trace_enabled = trace_is_enabled(t);

                /*
                 * Pick up the LWPID, if supplied.  There are two cases:
                 * data < 0 : step or continue single thread, lwp = -data
                 * data > 0 in PT_STEP : step this thread, continue others
                 * For operations other than PT_STEP, data > 0 means
                 * data is the signo to deliver to the process.
                 */
                tmp = data;
                if (tmp >= 0) {
#ifdef PT_STEP
                        if (req == PT_STEP)
                                signo = 0;
                        else
#endif
                        {
                                signo = tmp;
                                tmp = 0;        /* don't search for LWP */
                        }
                } else if (tmp == INT_MIN) {
                        error = ESRCH;
                        break;
                } else {
                        tmp = -tmp;
                }

                if (tmp > 0) {
                        if (req == PT_DETACH) {
                                error = EINVAL;
                                break;
                        }
                        lwp_delref2 (lt);
                        lt = lwp_find(t, tmp);
                        if (lt == NULL) {
                                error = ESRCH;
                                break;
                        }
                        lwp_addref(lt);
                        resume_all = 0;
                        signo = 0;
                }

                /*
                 * From the 4.4BSD PRM:
                 * "The data argument is taken as a signal number and the
                 * child's execution continues at location addr as if it
                 * incurred that signal.  Normally the signal number will
                 * be either 0 to indicate that the signal that caused the
                 * stop should be ignored, or that value fetched out of
                 * the process's image indicating which signal caused
                 * the stop.  If addr is (int *)1 then execution continues
                 * from where it stopped."
                 */

                /* Check that the data is a valid signal number or zero. */
                if (signo < 0 || signo >= NSIG) {
                        error = EINVAL;
                        break;
                }

                /* Prevent process deadlock */
                if (resume_all) {
#ifdef PT_STEP
                        if (req == PT_STEP) {
                                if (lt->l_flag &
                                    (LW_WSUSPEND | LW_DBGSUSPEND)) {
                                        error = EDEADLK;
                                        break;
                                }
                        } else
#endif
                        {
                                error = EDEADLK;
                                LIST_FOREACH(lt2, &t->p_lwps, l_sibling) {
                                        if ((lt2->l_flag &
                                            (LW_WSUSPEND | LW_DBGSUSPEND)) == 0
                                            ) {
                                                error = 0;
                                                break;
                                        }
                                }
                                if (error != 0)
                                        break;
                        }
                } else {
                        if (lt->l_flag & (LW_WSUSPEND | LW_DBGSUSPEND)) {
                                error = EDEADLK;
                                break;
                        }
                }

                /*
                 * Reject setting program counter to 0x0 if VA0 is disabled.
                 *
                 * Not all kernels implement this feature to set Program
                 * Counter in one go in PT_CONTINUE and similar operations.
                 * This causes portability issues as passing address 0x0
                 * on these kernels is no-operation, but can cause failure
                 * in most cases on NetBSD.
                 */
                if (user_va0_disable && addr == 0) {
                        error = EINVAL;
                        break;
                }

                /* If the address parameter is not (int *)1, set the pc. */
                if ((int *)addr != (int *)1) {
                        error = process_set_pc(lt, addr);
                        if (error != 0)
                                break;
                }
#ifdef PT_STEP
                /*
                 * Arrange for a single-step, if that's requested and possible.
                 * More precisely, set the single step status as requested for
                 * the requested thread, and clear it for other threads.
                 */
                LIST_FOREACH(lt2, &t->p_lwps, l_sibling) {
                        error = process_sstep(lt2,
                            ISSET(lt2->l_pflag, LP_SINGLESTEP));
                        if (error)
                                break;
                }
                if (error)
                        break;
                error = process_sstep(lt,
                    ISSET(lt->l_pflag, LP_SINGLESTEP) || req == PT_STEP);
                if (error)
                        break;
#endif
                if (req == PT_DETACH) {
                        CLR(t->p_slflag,
                            PSL_TRACED|PSL_TRACEDCHILD|PSL_SYSCALL);

                        /* clear sigpass mask */
                        sigemptyset(&t->p_sigctx.ps_sigpass);

                        /* give process back to original parent or init */
                        if (t->p_opptr != t->p_pptr) {
                                struct proc *pp = t->p_opptr;
                                proc_reparent(t, pp ? pp : initproc);
                        }

                        /* not being traced any more */
                        t->p_opptr = NULL;

                        /* clear single step */
                        LIST_FOREACH(lt2, &t->p_lwps, l_sibling) {
                                CLR(lt2->l_pflag, LP_SINGLESTEP);
                        }
                        CLR(lt->l_pflag, LP_SINGLESTEP);
                }
        sendsig:
                error = ptrace_sendsig(l, req, t, lt, signo, resume_all);
                break;

        case PT_SYSCALLEMU:
                if (!ISSET(t->p_slflag, PSL_SYSCALL) || t->p_stat != SSTOP) {
                        error = EINVAL;
                        break;
                }
                SET(t->p_slflag, PSL_SYSCALLEMU);
                break;

#ifdef PT_STEP
        case PT_SETSTEP:
                write = 1;

                /* FALLTHROUGH */
        case PT_CLEARSTEP:
                /* write = 0 done above. */
                if ((error = ptrace_update_lwp(t, &lt, data)) != 0)
                        break;

                if (write)
                        SET(lt->l_pflag, LP_SINGLESTEP);
                else
                        CLR(lt->l_pflag, LP_SINGLESTEP);
                break;
#endif

        case PT_KILL:
                /* just send the process a KILL signal. */
                signo = SIGKILL;
                goto sendsig;        /* in PT_CONTINUE, above. */

        case PT_STOP:
                /* just send the process a STOP signal. */
                signo = SIGSTOP;
                goto sendsig;        /* in PT_CONTINUE, above. */

        case PT_ATTACH:
                /*
                 * Go ahead and set the trace flag.
                 * Save the old parent (it's reset in
                 *   _DETACH, and also in kern_exit.c:wait4()
                 * Reparent the process so that the tracing
                 *   proc gets to see all the action.
                 * Stop the target.
                 */
                proc_changeparent(t, p);
                signo = SIGSTOP;
                goto sendsig;

        case PT_GET_EVENT_MASK:
                error = ptrace_get_event_mask(t, addr, data);
                break;

        case PT_SET_EVENT_MASK:
                error = ptrace_set_event_mask(t, addr, data);
                break;

        case PT_GET_PROCESS_STATE:
                error = ptrace_get_process_state(t, addr, data);
                break;

        case PT_LWPINFO:
                error = ptrace_lwpinfo(t, &lt, addr, data);
                break;

        case PT_SET_SIGINFO:
                error = ptrace_set_siginfo(t, &lt, ptm, addr, data);
                break;

        case PT_GET_SIGINFO:
                error = ptrace_get_siginfo(t, ptm, addr, data);
                break;

        case PT_RESUME:
        case PT_SUSPEND:
                error = ptrace_startstop(t, &lt, req, addr, data);
                break;

        case PT_LWPSTATUS:
                error = ptrace_lwpstatus(t, ptm, &lt, addr, data, false);
                break;

        case PT_LWPNEXT:
                error = ptrace_lwpstatus(t, ptm, &lt, addr, data, true);
                break;

        case PT_SET_SIGPASS:
                error = ptrace_set_sigpass(t, addr, data);
                break;

        case PT_GET_SIGPASS:
                error = ptrace_get_sigpass(t, addr, data);
                break;

#ifdef PT_REGISTERS
        case_PT_SETREGS
        case_PT_GETREGS
        case_PT_SETFPREGS
        case_PT_GETFPREGS
        case_PT_SETDBREGS
        case_PT_GETDBREGS
                error = ptrace_regs(l, &lt, req, ptm, addr, data);
                break;
#endif

#ifdef __HAVE_PTRACE_MACHDEP
        PTRACE_MACHDEP_REQUEST_CASES
                error = ptrace_machdep_dorequest(l, &lt, req, addr, data);
                break;
#endif
        }

out:
        if (pheld) {
                mutex_exit(t->p_lock);
                mutex_exit(&proc_lock);
        }
        if (lt != NULL)
                lwp_delref(lt);
        if (locked)
                rw_exit(&t->p_reflock);

        return error;
}

static int
process_auxv_offset(struct proc *p, struct uio *uio)
{
        struct ps_strings pss;
        int error;
        off_t off = (off_t)p->p_psstrp;

        if ((error = copyin_psstrings(p, &pss)) != 0)
                return error;

        if (pss.ps_envstr == NULL)
                return EIO;

#ifdef COMPAT_NETBSD32
        if (p->p_flag & PK_32)
                uio->uio_offset += (off_t)((vaddr_t)pss.ps_envstr +
                    sizeof(uint32_t) * (pss.ps_nenvstr + 1));
        else
#endif
                uio->uio_offset += (off_t)(vaddr_t)(pss.ps_envstr +
                    pss.ps_nenvstr + 1);

#ifdef __MACHINE_STACK_GROWS_UP
        if (uio->uio_offset < off)
                return EIO;
#else
        if (uio->uio_offset > off)
                return EIO;
        if ((uio->uio_offset + uio->uio_resid) > off)
                uio->uio_resid = off - uio->uio_offset;
#endif
        return 0;
}

MODULE(MODULE_CLASS_EXEC, ptrace_common, NULL);
 
static int
ptrace_common_init(void)
{

#if 0
        mutex_init(&ptrace_mtx, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&ptrace_cv, "ptracecb");
        ptrace_cbref = 0;
#endif
        ptrace_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
            ptrace_listener_cb, NULL);
        return 0;
}

static int
ptrace_common_fini(void)
{

        kauth_unlisten_scope(ptrace_listener);

#if 0
        /* Make sure no-one is executing our kauth listener */

        mutex_enter(&ptrace_mtx);
        while (ptrace_cbref != 0)
                cv_wait(&ptrace_cv, &ptrace_mtx);
        mutex_exit(&ptrace_mtx);
        mutex_destroy(&ptrace_mtx);
        cv_destroy(&ptrace_cv);
#endif

        return 0;
}

static int
ptrace_common_modcmd(modcmd_t cmd, void *arg)
{
        int error;
 
        switch (cmd) {
        case MODULE_CMD_INIT:
                error = ptrace_common_init();
                break;
        case MODULE_CMD_FINI:
                error = ptrace_common_fini();
                break;
        default:
                ptrace_hooks();
                error = ENOTTY;
                break;
        }
        return error;
}












































































































































    2 



    2 













































































































    1 
    1 




    1 


    1 











    1 











    1 
































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
/*        $NetBSD: dbregs.c,v 1.15 2020/01/31 08:55:38 maxv Exp $        */

/*
 * Copyright (c) 2016 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/param.h>
#include <sys/types.h>
#include <sys/lwp.h>
#include <sys/pool.h>
#include <x86/cpufunc.h>
#include <x86/dbregs.h>

#include <uvm/uvm_prot.h>
#include <uvm/uvm_pmap.h>

#include <machine/pmap.h>

struct pool x86_dbregspl;
static struct dbreg initdbstate;

#define X86_BREAKPOINT_CONDITION_DETECTED        ( \
        X86_DR6_DR0_BREAKPOINT_CONDITION_DETECTED | \
        X86_DR6_DR1_BREAKPOINT_CONDITION_DETECTED | \
        X86_DR6_DR2_BREAKPOINT_CONDITION_DETECTED | \
        X86_DR6_DR3_BREAKPOINT_CONDITION_DETECTED )

#define X86_GLOBAL_BREAKPOINT        ( \
        X86_DR7_GLOBAL_DR0_BREAKPOINT | \
        X86_DR7_GLOBAL_DR1_BREAKPOINT | \
        X86_DR7_GLOBAL_DR2_BREAKPOINT | \
        X86_DR7_GLOBAL_DR3_BREAKPOINT )

void
x86_dbregs_init(void)
{
        /* DR0-DR3 should always be 0 */
        initdbstate.dr[0] = rdr0();
        initdbstate.dr[1] = rdr1();
        initdbstate.dr[2] = rdr2();
        initdbstate.dr[3] = rdr3();
        /* DR4-DR5 are reserved - skip */
        /* DR6 and DR7 contain predefined nonzero bits */
        initdbstate.dr[6] = rdr6();
        initdbstate.dr[7] = rdr7();
        /* DR8-DR15 are reserved - skip */

        /*
         * Explicitly reset some bits just in case they could be
         * set by brave software/hardware before the kernel boot.
         */
        initdbstate.dr[6] &= ~X86_BREAKPOINT_CONDITION_DETECTED;
        initdbstate.dr[7] &= ~X86_DR7_GENERAL_DETECT_ENABLE;

        pool_init(&x86_dbregspl, sizeof(struct dbreg), 16, 0, 0, "dbregs",
            NULL, IPL_NONE);
}

static void
x86_dbregs_reset(void)
{
        /*
         * It's sufficient to just disable Debug Control Register (DR7).
         * It will deactivate hardware watchpoints.
         */
        ldr7(0);

        /*
         * However at some point we need to clear Debug Status Registers
         * (DR6). The CPU will never do it automatically.
         *
         * Clear BREAKPOINT_CONDITION_DETECTED bits and ignore the rest.
         */
        ldr6(rdr6() & ~X86_BREAKPOINT_CONDITION_DETECTED);
}

void
x86_dbregs_clear(struct lwp *l)
{
        struct pcb *pcb = lwp_getpcb(l);
        struct dbreg *dbregs;

        KASSERT(l == curlwp);

        if (__predict_true(pcb->pcb_dbregs == NULL)) {
                KASSERT((pcb->pcb_flags & PCB_DBREGS) == 0);
                return;
        }

        dbregs = pcb->pcb_dbregs;

        kpreempt_disable();
        pcb->pcb_dbregs = NULL;
        pcb->pcb_flags &= ~PCB_DBREGS;
        x86_dbregs_reset();
        kpreempt_enable();

        pool_put(&x86_dbregspl, dbregs);
}

void
x86_dbregs_abandon(struct lwp *l)
{
        struct pcb *pcb = lwp_getpcb(l);

        kpreempt_disable();
        pcb->pcb_flags &= ~PCB_DBREGS;
        x86_dbregs_reset();
        kpreempt_enable();
}

void
x86_dbregs_read(struct lwp *l, struct dbreg *regs)
{
        struct pcb *pcb = lwp_getpcb(l);

        if (pcb->pcb_dbregs == NULL) {
                pcb->pcb_dbregs = pool_get(&x86_dbregspl, PR_WAITOK);
                memcpy(pcb->pcb_dbregs, &initdbstate, sizeof(initdbstate));
                pcb->pcb_flags |= PCB_DBREGS;
        }
        memcpy(regs, pcb->pcb_dbregs, sizeof(*regs));
}

void
x86_dbregs_save(struct lwp *l)
{
        struct pcb *pcb = lwp_getpcb(l);

        if (!(pcb->pcb_flags & PCB_DBREGS)) {
                return;
        }

        KASSERT(pcb->pcb_dbregs != NULL);

        pcb->pcb_dbregs->dr[0] = rdr0();
        pcb->pcb_dbregs->dr[1] = rdr1();
        pcb->pcb_dbregs->dr[2] = rdr2();
        pcb->pcb_dbregs->dr[3] = rdr3();

        pcb->pcb_dbregs->dr[6] = rdr6();
        pcb->pcb_dbregs->dr[7] = rdr7();
}

void
x86_dbregs_restore(struct lwp *l)
{
        struct pcb *pcb = lwp_getpcb(l);

        if (!(pcb->pcb_flags & PCB_DBREGS)) {
                return;
        }

        KASSERT(pcb->pcb_dbregs != NULL);

        ldr0(pcb->pcb_dbregs->dr[0]);
        ldr1(pcb->pcb_dbregs->dr[1]);
        ldr2(pcb->pcb_dbregs->dr[2]);
        ldr3(pcb->pcb_dbregs->dr[3]);

        ldr6(pcb->pcb_dbregs->dr[6]);
        ldr7(pcb->pcb_dbregs->dr[7]);
}

void
x86_dbregs_store_dr6(struct lwp *l)
{
        struct pcb *pcb = lwp_getpcb(l);

        KASSERT(l == curlwp);
        KASSERT(pcb->pcb_dbregs != NULL);

        pcb->pcb_dbregs->dr[6] = rdr6();
}

int
x86_dbregs_user_trap(void)
{
        register_t dr7, dr6;
        register_t bp;

        dr7 = rdr7();
        if ((dr7 & X86_GLOBAL_BREAKPOINT) == 0) {
                /*
                 * All Global Breakpoint bits are zero, thus the trap couldn't
                 * have been caused by the hardware debug registers.
                 */
                return 0;
        }

        dr6 = rdr6();
        bp = dr6 & X86_BREAKPOINT_CONDITION_DETECTED;

        if (!bp) {
                /*
                 * None of the breakpoint bits are set, meaning this
                 * trap was not caused by any of the debug registers.
                 */
                return 0;
        }

        /*
         * At least one of the breakpoints was hit, check to see
         * which ones and if any of them are user space addresses.
         */

        if (bp & X86_DR6_DR0_BREAKPOINT_CONDITION_DETECTED)
                if (rdr0() < (vaddr_t)VM_MAXUSER_ADDRESS)
                        return 1;

        if (bp & X86_DR6_DR1_BREAKPOINT_CONDITION_DETECTED)
                if (rdr1() < (vaddr_t)VM_MAXUSER_ADDRESS)
                        return 1;

        if (bp & X86_DR6_DR2_BREAKPOINT_CONDITION_DETECTED)
                if (rdr2() < (vaddr_t)VM_MAXUSER_ADDRESS)
                        return 1;

        if (bp & X86_DR6_DR3_BREAKPOINT_CONDITION_DETECTED)
                if (rdr3() < (vaddr_t)VM_MAXUSER_ADDRESS)
                        return 1;

        return 0;
}

int
x86_dbregs_validate(const struct dbreg *regs)
{
        size_t i;

        /* Check that DR0-DR3 contain user-space address */
        for (i = 0; i < X86_DBREGS; i++) {
                if (regs->dr[i] >= (vaddr_t)VM_MAXUSER_ADDRESS)
                        return EINVAL;
        }

#ifndef i386
        if (regs->dr[6] & X86_DR6_MBZ) {
                return EINVAL;
        }
        if (regs->dr[7] & X86_DR7_MBZ) {
                return EINVAL;
        }
#endif
        if (regs->dr[7] & X86_DR7_GENERAL_DETECT_ENABLE) {
                return EINVAL;
        }

        /*
         * Skip checks for reserved registers (DR4-DR5, DR8-DR15).
         */

        return 0;
}

void
x86_dbregs_write(struct lwp *l, const struct dbreg *regs)
{
        struct pcb *pcb = lwp_getpcb(l);

        if (pcb->pcb_dbregs == NULL) {
                pcb->pcb_dbregs = pool_get(&x86_dbregspl, PR_WAITOK);
        }

        memcpy(pcb->pcb_dbregs, regs, sizeof(*regs));
        pcb->pcb_flags |= PCB_DBREGS;
}

/*
 * Called with preemption disabled.
 */
void
x86_dbregs_switch(struct lwp *oldlwp, struct lwp *newlwp)
{
        struct pcb *oldpcb, *newpcb;
        bool olddb, newdb;

        oldpcb = lwp_getpcb(oldlwp);
        newpcb = lwp_getpcb(newlwp);

        olddb = (oldpcb->pcb_flags & PCB_DBREGS) != 0;
        newdb = (newpcb->pcb_flags & PCB_DBREGS) != 0;

        if (__predict_true(!olddb && !newdb)) {
                /* fast path */
                return;
        }

        if (olddb) {
                x86_dbregs_save(oldlwp);
        }
        if (newdb) {
                x86_dbregs_restore(newlwp);
        } else if (olddb) {
                x86_dbregs_reset();
        }
}






































































   12 





























   37 
























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
/*        $NetBSD: if_stats.h,v 1.3 2021/06/29 21:19:58 riastradh Exp $        */

/*-
 * Copyright (c) 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _NET_IF_STATS_H_
#define _NET_IF_STATS_H_

#include <net/net_stats.h>

/*
 * Interface statistics.  All values are unsigned 64-bit.
 */
typedef enum {
        if_ipackets                = 0,        /* packets received on interface */
        if_ierrors                = 1,        /* input errors on interface */
        if_opackets                = 2,        /* packets sent on interface */
        if_oerrors                = 3,        /* output errors on interface */
        if_collisions                = 4,        /* collisions on csma interfaces */
        if_ibytes                = 5,        /* total number of octets received */
        if_obytes                = 6,        /* total number of octets sent */
        if_imcasts                = 7,        /* packets received via multicast */
        if_omcasts                = 8,        /* packets sent via multicast */
        if_iqdrops                = 9,        /* dropped on input, this interface */
        if_noproto                = 10,        /* destined for unsupported protocol */

        IF_NSTATS                = 11
} if_stat_t;

#ifdef _KERNEL

#define        IF_STAT_GETREF(ifp)        _NET_STAT_GETREF((ifp)->if_stats)
#define        IF_STAT_PUTREF(ifp)        _NET_STAT_PUTREF((ifp)->if_stats)

static inline void
if_statinc(ifnet_t *ifp, if_stat_t x)
{
        _NET_STATINC((ifp)->if_stats, x);
}

static inline void
if_statinc_ref(net_stat_ref_t nsr, if_stat_t x)
{
        _NET_STATINC_REF(nsr, x);
}

static inline void
if_statdec(ifnet_t *ifp, if_stat_t x)
{
        _NET_STATDEC((ifp)->if_stats, x);
}

static inline void
if_statdec_ref(net_stat_ref_t nsr, if_stat_t x)
{
        _NET_STATDEC_REF(nsr, x);
}

static inline void
if_statadd(ifnet_t *ifp, if_stat_t x, uint64_t v)
{
        _NET_STATADD((ifp)->if_stats, x, v);
}

static inline void
if_statadd_ref(net_stat_ref_t nsr, if_stat_t x, uint64_t v)
{
        _NET_STATADD_REF(nsr, x, v);
}

static inline void
if_statadd2(ifnet_t *ifp, if_stat_t x1, uint64_t v1, if_stat_t x2, uint64_t v2)
{
        net_stat_ref_t _nsr_ = IF_STAT_GETREF(ifp);
        _NET_STATADD_REF(_nsr_, x1, v1);
        _NET_STATADD_REF(_nsr_, x2, v2);
        IF_STAT_PUTREF(ifp);
}

static inline void
if_statsub(ifnet_t *ifp, if_stat_t x, uint64_t v)
{
        _NET_STATSUB((ifp)->if_stats, x, v);
}

static inline void
if_statsub_ref(net_stat_ref_t nsr, if_stat_t x, uint64_t v)
{
        _NET_STATSUB_REF(nsr, x, v);
}

void        if_stats_init(ifnet_t *);
void        if_stats_fini(ifnet_t *);
void        if_stats_to_if_data(ifnet_t *, struct if_data *, bool);

#endif /* _KERNEL */

#endif /* !_NET_IF_STATS_H_ */





























































































































































































































































































































































































































































































































































   36 


   38 
   38 




   38 

   38 































































































   39 

   39 

   39 


   39 







   39 
   37 
   37 













   39 






   38 

   39 

   38 




































































































































































































































































































































































































































































































































  166 

  166 





































































    3 












    3 











    3 











    3 







    3 

















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
/*        $NetBSD: usb.c,v 1.200 2022/03/13 11:28:52 riastradh Exp $        */

/*
 * Copyright (c) 1998, 2002, 2008, 2012 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology and Matthew R. Green (mrg@eterna.com.au).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * USB specifications and other documentation can be found at
 * http://www.usb.org/developers/docs/ and
 * http://www.usb.org/developers/devclass_docs/
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: usb.c,v 1.200 2022/03/13 11:28:52 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#include "opt_ddb.h"
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/device.h>
#include <sys/kthread.h>
#include <sys/proc.h>
#include <sys/conf.h>
#include <sys/fcntl.h>
#include <sys/poll.h>
#include <sys/select.h>
#include <sys/vnode.h>
#include <sys/signalvar.h>
#include <sys/intr.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/bus.h>
#include <sys/once.h>
#include <sys/atomic.h>
#include <sys/sysctl.h>
#include <sys/compat_stub.h>
#include <sys/sdt.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usb_verbose.h>
#include <dev/usb/usb_quirks.h>
#include <dev/usb/usbhist.h>
#include <dev/usb/usb_sdt.h>

#include "ioconf.h"

#if defined(USB_DEBUG)

#ifndef USBHIST_SIZE
#define USBHIST_SIZE 50000
#endif

static struct kern_history_ent usbhistbuf[USBHIST_SIZE];
USBHIST_DEFINE(usbhist) = KERNHIST_INITIALIZER(usbhist, usbhistbuf);

#endif

#define USB_DEV_MINOR 255

#ifdef USB_DEBUG
/*
 * 0  - do usual exploration
 * 1  - do not use timeout exploration
 * >1 - do no exploration
 */
int        usb_noexplore = 0;

int        usbdebug = 0;
SYSCTL_SETUP(sysctl_hw_usb_setup, "sysctl hw.usb setup")
{
        int err;
        const struct sysctlnode *rnode;
        const struct sysctlnode *cnode;

        err = sysctl_createv(clog, 0, NULL, &rnode,
            CTLFLAG_PERMANENT, CTLTYPE_NODE, "usb",
            SYSCTL_DESCR("usb global controls"),
            NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);

        if (err)
                goto fail;

        /* control debugging printfs */
        err = sysctl_createv(clog, 0, &rnode, &cnode,
            CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
            "debug", SYSCTL_DESCR("Enable debugging output"),
            NULL, 0, &usbdebug, sizeof(usbdebug), CTL_CREATE, CTL_EOL);
        if (err)
                goto fail;

        return;
fail:
        aprint_error("%s: sysctl_createv failed (err = %d)\n", __func__, err);
}
#else
#define        usb_noexplore 0
#endif

#define        DPRINTF(FMT,A,B,C,D)        USBHIST_LOG(usbdebug,FMT,A,B,C,D)
#define        DPRINTFN(N,FMT,A,B,C,D)        USBHIST_LOGN(usbdebug,N,FMT,A,B,C,D)

struct usb_softc {
#if 0
        device_t        sc_dev;                /* base device */
#endif
        struct usbd_bus *sc_bus;        /* USB controller */
        struct usbd_port sc_port;        /* dummy port for root hub */

        struct lwp        *sc_event_thread;
        struct lwp        *sc_attach_thread;

        char                sc_dying;
        bool                sc_pmf_registered;
};

struct usb_taskq {
        TAILQ_HEAD(, usb_task) tasks;
        kmutex_t lock;
        kcondvar_t cv;
        struct lwp *task_thread_lwp;
        const char *name;
        struct usb_task *current_task;
};

static struct usb_taskq usb_taskq[USB_NUM_TASKQS];

/* XXX wrong place */
#ifdef KDTRACE_HOOKS
#define        __dtrace_used
#else
#define        __dtrace_used        __unused
#endif

SDT_PROVIDER_DEFINE(usb);

SDT_PROBE_DEFINE3(usb, kernel, task, add,
    "struct usbd_device *"/*dev*/, "struct usb_task *"/*task*/, "int"/*q*/);
SDT_PROBE_DEFINE2(usb, kernel, task, rem__start,
    "struct usbd_device *"/*dev*/, "struct usb_task *"/*task*/);
SDT_PROBE_DEFINE3(usb, kernel, task, rem__done,
    "struct usbd_device *"/*dev*/,
    "struct usb_task *"/*task*/,
    "bool"/*removed*/);
SDT_PROBE_DEFINE4(usb, kernel, task, rem__wait__start,
    "struct usbd_device *"/*dev*/,
    "struct usb_task *"/*task*/,
    "int"/*queue*/,
    "kmutex_t *"/*interlock*/);
SDT_PROBE_DEFINE5(usb, kernel, task, rem__wait__done,
    "struct usbd_device *"/*dev*/,
    "struct usb_task *"/*task*/,
    "int"/*queue*/,
    "kmutex_t *"/*interlock*/,
    "bool"/*done*/);

SDT_PROBE_DEFINE1(usb, kernel, task, start,  "struct usb_task *"/*task*/);
SDT_PROBE_DEFINE1(usb, kernel, task, done,  "struct usb_task *"/*task*/);

SDT_PROBE_DEFINE1(usb, kernel, bus, needs__explore,
    "struct usbd_bus *"/*bus*/);
SDT_PROBE_DEFINE1(usb, kernel, bus, needs__reattach,
    "struct usbd_bus *"/*bus*/);
SDT_PROBE_DEFINE1(usb, kernel, bus, discover__start,
    "struct usbd_bus *"/*bus*/);
SDT_PROBE_DEFINE1(usb, kernel, bus, discover__done,
    "struct usbd_bus *"/*bus*/);
SDT_PROBE_DEFINE1(usb, kernel, bus, explore__start,
    "struct usbd_bus *"/*bus*/);
SDT_PROBE_DEFINE1(usb, kernel, bus, explore__done,
    "struct usbd_bus *"/*bus*/);

SDT_PROBE_DEFINE1(usb, kernel, event, add,  "struct usb_event *"/*uep*/);
SDT_PROBE_DEFINE1(usb, kernel, event, drop,  "struct usb_event *"/*uep*/);

dev_type_open(usbopen);
dev_type_close(usbclose);
dev_type_read(usbread);
dev_type_ioctl(usbioctl);
dev_type_poll(usbpoll);
dev_type_kqfilter(usbkqfilter);

const struct cdevsw usb_cdevsw = {
        .d_open = usbopen,
        .d_close = usbclose,
        .d_read = usbread,
        .d_write = nowrite,
        .d_ioctl = usbioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = usbpoll,
        .d_mmap = nommap,
        .d_kqfilter = usbkqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

Static void        usb_discover(struct usb_softc *);
Static void        usb_create_event_thread(device_t);
Static void        usb_event_thread(void *);
Static void        usb_task_thread(void *);

/*
 * Count of USB busses
 */
int nusbbusses = 0;

#define USB_MAX_EVENTS 100
struct usb_event_q {
        struct usb_event ue;
        SIMPLEQ_ENTRY(usb_event_q) next;
};
Static SIMPLEQ_HEAD(, usb_event_q) usb_events =
        SIMPLEQ_HEAD_INITIALIZER(usb_events);
Static int usb_nevents = 0;
Static struct selinfo usb_selevent;
Static kmutex_t usb_event_lock;
Static kcondvar_t usb_event_cv;
/* XXX this is gross and broken */
Static proc_t *usb_async_proc;  /* process that wants USB SIGIO */
Static void *usb_async_sih;
Static int usb_dev_open = 0;
Static struct usb_event *usb_alloc_event(void);
Static void usb_free_event(struct usb_event *);
Static void usb_add_event(int, struct usb_event *);
Static int usb_get_next_event(struct usb_event *);
Static void usb_async_intr(void *);
Static void usb_soft_intr(void *);

Static const char *usbrev_str[] = USBREV_STR;

static int usb_match(device_t, cfdata_t, void *);
static void usb_attach(device_t, device_t, void *);
static int usb_detach(device_t, int);
static int usb_activate(device_t, enum devact);
static void usb_childdet(device_t, device_t);
static int usb_once_init(void);
static void usb_doattach(device_t);

CFATTACH_DECL3_NEW(usb, sizeof(struct usb_softc),
    usb_match, usb_attach, usb_detach, usb_activate, NULL, usb_childdet,
    DVF_DETACH_SHUTDOWN);

static const char *taskq_names[] = USB_TASKQ_NAMES;

int
usb_match(device_t parent, cfdata_t match, void *aux)
{
        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);

        return UMATCH_GENERIC;
}

void
usb_attach(device_t parent, device_t self, void *aux)
{
        static ONCE_DECL(init_control);
        struct usb_softc *sc = device_private(self);
        int usbrev;

        sc->sc_bus = aux;
        usbrev = sc->sc_bus->ub_revision;

        cv_init(&sc->sc_bus->ub_needsexplore_cv, "usbevt");
        cv_init(&sc->sc_bus->ub_rhxfercv, "usbrhxfer");
        sc->sc_pmf_registered = false;

        aprint_naive("\n");
        aprint_normal(": USB revision %s", usbrev_str[usbrev]);
        switch (usbrev) {
        case USBREV_1_0:
        case USBREV_1_1:
        case USBREV_2_0:
        case USBREV_3_0:
        case USBREV_3_1:
                break;
        default:
                aprint_error(", not supported\n");
                sc->sc_dying = 1;
                return;
        }
        aprint_normal("\n");

        /* XXX we should have our own level */
        sc->sc_bus->ub_soft = softint_establish(SOFTINT_USB | SOFTINT_MPSAFE,
            usb_soft_intr, sc->sc_bus);
        if (sc->sc_bus->ub_soft == NULL) {
                aprint_error("%s: can't register softintr\n",
                             device_xname(self));
                sc->sc_dying = 1;
                return;
        }

        sc->sc_bus->ub_methods->ubm_getlock(sc->sc_bus, &sc->sc_bus->ub_lock);
        KASSERT(sc->sc_bus->ub_lock != NULL);

        RUN_ONCE(&init_control, usb_once_init);
        config_interrupts(self, usb_doattach);
}

#ifdef DDB
#include <machine/db_machdep.h>
#include <ddb/db_output.h>
#include <ddb/db_command.h>

static void
db_usb_xfer(db_expr_t addr, bool have_addr, db_expr_t count,
    const char *modif)
{
        struct usbd_xfer *xfer = (struct usbd_xfer *)(uintptr_t)addr;

        if (!have_addr) {
                db_printf("%s: need usbd_xfer address\n", __func__);
                return;
        }

        db_printf("usb xfer: %p pipe %p priv %p buffer %p\n",
            xfer, xfer->ux_pipe, xfer->ux_priv, xfer->ux_buffer);
        db_printf(" len %x actlen %x flags %x timeout %x status %x\n",
            xfer->ux_length, xfer->ux_actlen, xfer->ux_flags, xfer->ux_timeout,
            xfer->ux_status);
        db_printf(" callback %p done %x state %x tm_set %x tm_reset %x\n",
            xfer->ux_callback, xfer->ux_done, xfer->ux_state,
            xfer->ux_timeout_set, xfer->ux_timeout_reset);
}

static void
db_usb_xferlist(db_expr_t addr, bool have_addr, db_expr_t count,
    const char *modif)
{
        struct usbd_pipe *pipe = (struct usbd_pipe *)(uintptr_t)addr;
        struct usbd_xfer *xfer;

        if (!have_addr) {
                db_printf("%s: need usbd_pipe address\n", __func__);
                return;
        }

        db_printf("usb pipe: %p\n", pipe);
        unsigned xfercount = 0;
        SIMPLEQ_FOREACH(xfer, &pipe->up_queue, ux_next) {
                db_printf("  xfer = %p%s", xfer,
                    xfercount == 0 || xfercount % 2 == 0 ? "" : "\n");
                xfercount++;
        }
}

static const struct db_command db_usb_command_table[] = {
        { DDB_ADD_CMD("usbxfer",        db_usb_xfer,        0, 
          "display a USB xfer structure",
          NULL, NULL) },
        { DDB_ADD_CMD("usbxferlist",        db_usb_xferlist,        0, 
          "display a USB xfer structure given pipe",
          NULL, NULL) },
        { DDB_END_CMD },
};

static void
usb_init_ddb(void)
{

        (void)db_register_tbl(DDB_SHOW_CMD, db_usb_command_table);
}
#else
#define usb_init_ddb() /* nothing */
#endif

static int
usb_once_init(void)
{
        struct usb_taskq *taskq;
        int i;

        USBHIST_LINK_STATIC(usbhist);

        selinit(&usb_selevent);
        mutex_init(&usb_event_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&usb_event_cv, "usbrea");

        for (i = 0; i < USB_NUM_TASKQS; i++) {
                taskq = &usb_taskq[i];

                TAILQ_INIT(&taskq->tasks);
                /*
                 * Since USB task methods usb_{add,rem}_task are callable
                 * from any context, we have to make this lock a spinlock.
                 */
                mutex_init(&taskq->lock, MUTEX_DEFAULT, IPL_USB);
                cv_init(&taskq->cv, "usbtsk");
                taskq->name = taskq_names[i];
                taskq->current_task = NULL;
                if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
                    usb_task_thread, taskq, &taskq->task_thread_lwp,
                    "%s", taskq->name)) {
                        printf("unable to create task thread: %s\n", taskq->name);
                        panic("usb_create_event_thread task");
                }
                /*
                 * XXX we should make sure these threads are alive before
                 * end up using them in usb_doattach().
                 */
        }

        KASSERT(usb_async_sih == NULL);
        usb_async_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE,
           usb_async_intr, NULL);

        usb_init_ddb();

        return 0;
}

static void
usb_doattach(device_t self)
{
        struct usb_softc *sc = device_private(self);
        struct usbd_device *dev;
        usbd_status err;
        int speed;
        struct usb_event *ue;

        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);

        KASSERT(KERNEL_LOCKED_P());

        /* Protected by KERNEL_LOCK */
        nusbbusses++;

        sc->sc_bus->ub_usbctl = self;
        sc->sc_port.up_power = USB_MAX_POWER;

        switch (sc->sc_bus->ub_revision) {
        case USBREV_1_0:
        case USBREV_1_1:
                speed = USB_SPEED_FULL;
                break;
        case USBREV_2_0:
                speed = USB_SPEED_HIGH;
                break;
        case USBREV_3_0:
                speed = USB_SPEED_SUPER;
                break;
        case USBREV_3_1:
                speed = USB_SPEED_SUPER_PLUS;
                break;
        default:
                panic("usb_doattach");
        }

        ue = usb_alloc_event();
        ue->u.ue_ctrlr.ue_bus = device_unit(self);
        usb_add_event(USB_EVENT_CTRLR_ATTACH, ue);

        sc->sc_attach_thread = curlwp;
        err = usbd_new_device(self, sc->sc_bus, 0, speed, 0,
                  &sc->sc_port);
        sc->sc_attach_thread = NULL;
        if (!err) {
                dev = sc->sc_port.up_dev;
                if (dev->ud_hub == NULL) {
                        sc->sc_dying = 1;
                        aprint_error("%s: root device is not a hub\n",
                                     device_xname(self));
                        return;
                }
                sc->sc_bus->ub_roothub = dev;
                usb_create_event_thread(self);
        } else {
                aprint_error("%s: root hub problem, error=%s\n",
                             device_xname(self), usbd_errstr(err));
                sc->sc_dying = 1;
        }

        /*
         * Drop this reference after the first set of attachments in the
         * event thread.
         */
        config_pending_incr(self);

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");
        else
                sc->sc_pmf_registered = true;

        return;
}

void
usb_create_event_thread(device_t self)
{
        struct usb_softc *sc = device_private(self);

        if (kthread_create(PRI_NONE, 0, NULL,
            usb_event_thread, sc, &sc->sc_event_thread,
            "%s", device_xname(self))) {
                printf("%s: unable to create event thread for\n",
                       device_xname(self));
                panic("usb_create_event_thread");
        }
}

bool
usb_in_event_thread(device_t dev)
{
        struct usb_softc *sc;

        if (cold)
                return true;

        for (; dev; dev = device_parent(dev)) {
                if (device_is_a(dev, "usb"))
                        break;
        }
        if (dev == NULL)
                return false;
        sc = device_private(dev);

        return curlwp == sc->sc_event_thread || curlwp == sc->sc_attach_thread;
}

/*
 * Add a task to be performed by the task thread.  This function can be
 * called from any context and the task will be executed in a process
 * context ASAP.
 */
void
usb_add_task(struct usbd_device *dev, struct usb_task *task, int queue)
{
        struct usb_taskq *taskq;

        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);
        SDT_PROBE3(usb, kernel, task, add,  dev, task, queue);

        KASSERT(0 <= queue);
        KASSERT(queue < USB_NUM_TASKQS);
        taskq = &usb_taskq[queue];
        mutex_enter(&taskq->lock);
        if (atomic_cas_uint(&task->queue, USB_NUM_TASKQS, queue) ==
            USB_NUM_TASKQS) {
                DPRINTFN(2, "task=%#jx", (uintptr_t)task, 0, 0, 0);
                TAILQ_INSERT_TAIL(&taskq->tasks, task, next);
                cv_signal(&taskq->cv);
        } else {
                DPRINTFN(2, "task=%#jx on q", (uintptr_t)task, 0, 0, 0);
        }
        mutex_exit(&taskq->lock);
}

/*
 * usb_rem_task(dev, task)
 *
 *        If task is queued to run, remove it from the queue.  Return
 *        true if it successfully removed the task from the queue, false
 *        if not.
 *
 *        Caller is _not_ guaranteed that the task is not running when
 *        this is done.
 *
 *        Never sleeps.
 */
bool
usb_rem_task(struct usbd_device *dev, struct usb_task *task)
{
        unsigned queue;

        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);
        SDT_PROBE2(usb, kernel, task, rem__start,  dev, task);

        while ((queue = task->queue) != USB_NUM_TASKQS) {
                struct usb_taskq *taskq = &usb_taskq[queue];
                mutex_enter(&taskq->lock);
                if (__predict_true(task->queue == queue)) {
                        TAILQ_REMOVE(&taskq->tasks, task, next);
                        task->queue = USB_NUM_TASKQS;
                        mutex_exit(&taskq->lock);
                        SDT_PROBE3(usb, kernel, task, rem__done,
                            dev, task, true);
                        return true; /* removed from the queue */
                }
                mutex_exit(&taskq->lock);
        }

        SDT_PROBE3(usb, kernel, task, rem__done,  dev, task, false);
        return false;                /* was not removed from the queue */
}

/*
 * usb_rem_task_wait(dev, task, queue, interlock)
 *
 *        If task is scheduled to run, remove it from the queue.  If it
 *        may have already begun to run, drop interlock if not null, wait
 *        for it to complete, and reacquire interlock if not null.
 *        Return true if it successfully removed the task from the queue,
 *        false if not.
 *
 *        Caller MUST guarantee that task will not be scheduled on a
 *        _different_ queue, at least until after this returns.
 *
 *        If caller guarantees that task will not be scheduled on the
 *        same queue before this returns, then caller is guaranteed that
 *        the task is not running at all when this returns.
 *
 *        May sleep.
 */
bool
usb_rem_task_wait(struct usbd_device *dev, struct usb_task *task, int queue,
    kmutex_t *interlock)
{
        struct usb_taskq *taskq;
        int queue1;
        bool removed;

        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);
        SDT_PROBE4(usb, kernel, task, rem__wait__start,
            dev, task, queue, interlock);
        ASSERT_SLEEPABLE();
        KASSERT(0 <= queue);
        KASSERT(queue < USB_NUM_TASKQS);

        taskq = &usb_taskq[queue];
        mutex_enter(&taskq->lock);
        queue1 = task->queue;
        if (queue1 == USB_NUM_TASKQS) {
                /*
                 * It is not on the queue.  It may be about to run, or
                 * it may have already finished running -- there is no
                 * stopping it now.  Wait for it if it is running.
                 */
                if (interlock)
                        mutex_exit(interlock);
                while (taskq->current_task == task)
                        cv_wait(&taskq->cv, &taskq->lock);
                removed = false;
        } else {
                /*
                 * It is still on the queue.  We can stop it before the
                 * task thread will run it.
                 */
                KASSERTMSG(queue1 == queue, "task %p on q%d expected on q%d",
                    task, queue1, queue);
                TAILQ_REMOVE(&taskq->tasks, task, next);
                task->queue = USB_NUM_TASKQS;
                removed = true;
        }
        mutex_exit(&taskq->lock);

        /*
         * If there's an interlock, and we dropped it to wait,
         * reacquire it.
         */
        if (interlock && !removed)
                mutex_enter(interlock);

        SDT_PROBE5(usb, kernel, task, rem__wait__done,
            dev, task, queue, interlock, removed);
        return removed;
}

/*
 * usb_task_pending(dev, task)
 *
 *        True if task is queued, false if not.  Note that if task is
 *        already running, it is not considered queued.
 *
 *        For _negative_ diagnostic assertions only:
 *
 *                KASSERT(!usb_task_pending(dev, task));
 */
bool
usb_task_pending(struct usbd_device *dev, struct usb_task *task)
{

        return task->queue != USB_NUM_TASKQS;
}

void
usb_event_thread(void *arg)
{
        struct usb_softc *sc = arg;
        struct usbd_bus *bus = sc->sc_bus;

        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);

        KASSERT(KERNEL_LOCKED_P());

        /*
         * In case this controller is a companion controller to an
         * EHCI controller we need to wait until the EHCI controller
         * has grabbed the port.
         * XXX It would be nicer to do this with a tsleep(), but I don't
         * know how to synchronize the creation of the threads so it
         * will work.
         */
        if (bus->ub_revision < USBREV_2_0) {
                usb_delay_ms(bus, 500);
        }

        /* Make sure first discover does something. */
        mutex_enter(bus->ub_lock);
        sc->sc_bus->ub_needsexplore = 1;
        usb_discover(sc);
        mutex_exit(bus->ub_lock);

        /* Drop the config_pending reference from attach. */
        config_pending_decr(bus->ub_usbctl);

        mutex_enter(bus->ub_lock);
        while (!sc->sc_dying) {
#if 0 /* not yet */
                while (sc->sc_bus->ub_usepolling)
                        kpause("usbpoll", true, hz, bus->ub_lock);
#endif

                if (usb_noexplore < 2)
                        usb_discover(sc);

                cv_timedwait(&bus->ub_needsexplore_cv,
                    bus->ub_lock, usb_noexplore ? 0 : hz * 60);

                DPRINTFN(2, "sc %#jx woke up", (uintptr_t)sc, 0, 0, 0);
        }
        sc->sc_event_thread = NULL;

        /* In case parent is waiting for us to exit. */
        cv_signal(&bus->ub_needsexplore_cv);
        mutex_exit(bus->ub_lock);

        DPRINTF("sc %#jx exit", (uintptr_t)sc, 0, 0, 0);
        kthread_exit(0);
}

void
usb_task_thread(void *arg)
{
        struct usb_task *task;
        struct usb_taskq *taskq;
        bool mpsafe;

        taskq = arg;

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "start taskq %#jx",
            (uintptr_t)taskq, 0, 0, 0);

        mutex_enter(&taskq->lock);
        for (;;) {
                task = TAILQ_FIRST(&taskq->tasks);
                if (task == NULL) {
                        cv_wait(&taskq->cv, &taskq->lock);
                        task = TAILQ_FIRST(&taskq->tasks);
                }
                DPRINTFN(2, "woke up task=%#jx", (uintptr_t)task, 0, 0, 0);
                if (task != NULL) {
                        mpsafe = ISSET(task->flags, USB_TASKQ_MPSAFE);
                        TAILQ_REMOVE(&taskq->tasks, task, next);
                        task->queue = USB_NUM_TASKQS;
                        taskq->current_task = task;
                        mutex_exit(&taskq->lock);

                        if (!mpsafe)
                                KERNEL_LOCK(1, curlwp);
                        SDT_PROBE1(usb, kernel, task, start,  task);
                        task->fun(task->arg);
                        /* Can't dereference task after this point.  */
                        SDT_PROBE1(usb, kernel, task, done,  task);
                        if (!mpsafe)
                                KERNEL_UNLOCK_ONE(curlwp);

                        mutex_enter(&taskq->lock);
                        KASSERTMSG(taskq->current_task == task,
                            "somebody scribbled on usb taskq %p", taskq);
                        taskq->current_task = NULL;
                        cv_broadcast(&taskq->cv);
                }
        }
        mutex_exit(&taskq->lock);
}

int
usbctlprint(void *aux, const char *pnp)
{
        /* only "usb"es can attach to host controllers */
        if (pnp)
                aprint_normal("usb at %s", pnp);

        return UNCONF;
}

int
usbopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        int unit = minor(dev);
        struct usb_softc *sc;

        if (nusbbusses == 0)
                return ENXIO;

        if (unit == USB_DEV_MINOR) {
                if (usb_dev_open)
                        return EBUSY;
                usb_dev_open = 1;
                mutex_enter(&proc_lock);
                atomic_store_relaxed(&usb_async_proc, NULL);
                mutex_exit(&proc_lock);
                return 0;
        }

        sc = device_lookup_private(&usb_cd, unit);
        if (!sc)
                return ENXIO;

        if (sc->sc_dying)
                return EIO;

        return 0;
}

int
usbread(dev_t dev, struct uio *uio, int flag)
{
        struct usb_event *ue;
        struct usb_event_old *ueo = NULL;        /* XXXGCC */
        int useold = 0;
        int error, n;

        if (minor(dev) != USB_DEV_MINOR)
                return ENXIO;

        switch (uio->uio_resid) {
        case sizeof(struct usb_event_old):
                ueo = kmem_zalloc(sizeof(struct usb_event_old), KM_SLEEP);
                useold = 1;
                /* FALLTHROUGH */
        case sizeof(struct usb_event):
                ue = usb_alloc_event();
                break;
        default:
                return EINVAL;
        }

        error = 0;
        mutex_enter(&usb_event_lock);
        for (;;) {
                n = usb_get_next_event(ue);
                if (n != 0)
                        break;
                if (flag & IO_NDELAY) {
                        error = EWOULDBLOCK;
                        break;
                }
                error = cv_wait_sig(&usb_event_cv, &usb_event_lock);
                if (error)
                        break;
        }
        mutex_exit(&usb_event_lock);
        if (!error) {
                if (useold) { /* copy fields to old struct */
                        MODULE_HOOK_CALL(usb_subr_copy_30_hook,
                            (ue, ueo, uio), enosys(), error);
                        if (error == ENOSYS)
                                error = EINVAL;

                        if (!error)
                                error = uiomove((void *)ueo, sizeof(*ueo), uio);
                } else
                        error = uiomove((void *)ue, sizeof(*ue), uio);
        }
        usb_free_event(ue);
        if (ueo)
                kmem_free(ueo, sizeof(struct usb_event_old));

        return error;
}

int
usbclose(dev_t dev, int flag, int mode,
    struct lwp *l)
{
        int unit = minor(dev);

        if (unit == USB_DEV_MINOR) {
                mutex_enter(&proc_lock);
                atomic_store_relaxed(&usb_async_proc, NULL);
                mutex_exit(&proc_lock);
                usb_dev_open = 0;
        }

        return 0;
}

int
usbioctl(dev_t devt, u_long cmd, void *data, int flag, struct lwp *l)
{
        struct usb_softc *sc;
        int unit = minor(devt);

        USBHIST_FUNC(); USBHIST_CALLARGS(usbdebug, "cmd %#jx", cmd, 0, 0, 0);

        if (unit == USB_DEV_MINOR) {
                switch (cmd) {
                case FIONBIO:
                        /* All handled in the upper FS layer. */
                        return 0;

                case FIOASYNC:
                        mutex_enter(&proc_lock);
                        atomic_store_relaxed(&usb_async_proc,
                            *(int *)data ? l->l_proc : NULL);
                        mutex_exit(&proc_lock);
                        return 0;

                default:
                        return EINVAL;
                }
        }

        sc = device_lookup_private(&usb_cd, unit);

        if (sc->sc_dying)
                return EIO;

        int error = 0;
        switch (cmd) {
#ifdef USB_DEBUG
        case USB_SETDEBUG:
                if (!(flag & FWRITE))
                        return EBADF;
                usbdebug  = ((*(int *)data) & 0x000000ff);
                break;
#endif /* USB_DEBUG */
        case USB_REQUEST:
        {
                struct usb_ctl_request *ur = (void *)data;
                int len = UGETW(ur->ucr_request.wLength);
                struct iovec iov;
                struct uio uio;
                void *ptr = 0;
                int addr = ur->ucr_addr;
                usbd_status err;

                if (!(flag & FWRITE)) {
                        error = EBADF;
                        goto fail;
                }

                DPRINTF("USB_REQUEST addr=%jd len=%jd", addr, len, 0, 0);
                if (len < 0 || len > 32768) {
                        error = EINVAL;
                        goto fail;
                }
                if (addr < 0 || addr >= USB_MAX_DEVICES) {
                        error = EINVAL;
                        goto fail;
                }
                size_t dindex = usb_addr2dindex(addr);
                if (sc->sc_bus->ub_devices[dindex] == NULL) {
                        error = EINVAL;
                        goto fail;
                }
                if (len != 0) {
                        iov.iov_base = (void *)ur->ucr_data;
                        iov.iov_len = len;
                        uio.uio_iov = &iov;
                        uio.uio_iovcnt = 1;
                        uio.uio_resid = len;
                        uio.uio_offset = 0;
                        uio.uio_rw =
                                ur->ucr_request.bmRequestType & UT_READ ?
                                UIO_READ : UIO_WRITE;
                        uio.uio_vmspace = l->l_proc->p_vmspace;
                        ptr = kmem_alloc(len, KM_SLEEP);
                        if (uio.uio_rw == UIO_WRITE) {
                                error = uiomove(ptr, len, &uio);
                                if (error)
                                        goto ret;
                        }
                }
                err = usbd_do_request_flags(sc->sc_bus->ub_devices[dindex],
                          &ur->ucr_request, ptr, ur->ucr_flags, &ur->ucr_actlen,
                          USBD_DEFAULT_TIMEOUT);
                if (err) {
                        error = EIO;
                        goto ret;
                }
                if (len > ur->ucr_actlen)
                        len = ur->ucr_actlen;
                if (len != 0) {
                        if (uio.uio_rw == UIO_READ) {
                                error = uiomove(ptr, len, &uio);
                                if (error)
                                        goto ret;
                        }
                }
        ret:
                if (ptr) {
                        len = UGETW(ur->ucr_request.wLength);
                        kmem_free(ptr, len);
                }
                break;
        }

        case USB_DEVICEINFO:
        {
                struct usbd_device *dev;
                struct usb_device_info *di = (void *)data;
                int addr = di->udi_addr;

                if (addr < 0 || addr >= USB_MAX_DEVICES) {
                        error = EINVAL;
                        goto fail;
                }
                size_t dindex = usb_addr2dindex(addr);
                if ((dev = sc->sc_bus->ub_devices[dindex]) == NULL) {
                        error = ENXIO;
                        goto fail;
                }
                usbd_fill_deviceinfo(dev, di, 1);
                break;
        }

        case USB_DEVICEINFO_OLD:
        {
                struct usbd_device *dev;
                struct usb_device_info_old *di = (void *)data;
                int addr = di->udi_addr;

                if (addr < 1 || addr >= USB_MAX_DEVICES) {
                        error = EINVAL;
                        goto fail;
                }
                size_t dindex = usb_addr2dindex(addr);
                if ((dev = sc->sc_bus->ub_devices[dindex]) == NULL) {
                        error = ENXIO;
                        goto fail;
                }
                MODULE_HOOK_CALL(usb_subr_fill_30_hook,
                    (dev, di, 1, usbd_devinfo_vp, usbd_printBCD),
                    enosys(), error);
                if (error == ENOSYS)
                        error = EINVAL;
                if (error)
                        goto fail;
                break;
        }

        case USB_DEVICESTATS:
                *(struct usb_device_stats *)data = sc->sc_bus->ub_stats;
                break;

        default:
                error = EINVAL;
        }

fail:

        DPRINTF("... done (error = %jd)", error, 0, 0, 0);

        return error;
}

int
usbpoll(dev_t dev, int events, struct lwp *l)
{
        int revents, mask;

        if (minor(dev) == USB_DEV_MINOR) {
                revents = 0;
                mask = POLLIN | POLLRDNORM;

                mutex_enter(&usb_event_lock);
                if (events & mask && usb_nevents > 0)
                        revents |= events & mask;
                if (revents == 0 && events & mask)
                        selrecord(l, &usb_selevent);
                mutex_exit(&usb_event_lock);

                return revents;
        } else {
                return 0;
        }
}

static void
filt_usbrdetach(struct knote *kn)
{

        mutex_enter(&usb_event_lock);
        selremove_knote(&usb_selevent, kn);
        mutex_exit(&usb_event_lock);
}

static int
filt_usbread(struct knote *kn, long hint)
{

        if (usb_nevents == 0)
                return 0;

        kn->kn_data = sizeof(struct usb_event);
        return 1;
}

static const struct filterops usbread_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_usbrdetach,
        .f_event = filt_usbread,
};

int
usbkqfilter(dev_t dev, struct knote *kn)
{

        switch (kn->kn_filter) {
        case EVFILT_READ:
                if (minor(dev) != USB_DEV_MINOR)
                        return 1;
                kn->kn_fop = &usbread_filtops;
                break;

        default:
                return EINVAL;
        }

        kn->kn_hook = NULL;

        mutex_enter(&usb_event_lock);
        selrecord_knote(&usb_selevent, kn);
        mutex_exit(&usb_event_lock);

        return 0;
}

/* Explore device tree from the root. */
Static void
usb_discover(struct usb_softc *sc)
{
        struct usbd_bus *bus = sc->sc_bus;

        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);

        KASSERT(KERNEL_LOCKED_P());
        KASSERT(mutex_owned(bus->ub_lock));

        if (usb_noexplore > 1)
                return;

        /*
         * We need mutual exclusion while traversing the device tree,
         * but this is guaranteed since this function is only called
         * from the event thread for the controller.
         *
         * Also, we now have bus->ub_lock held, and in combination
         * with ub_exploring, avoids interferring with polling.
         */
        SDT_PROBE1(usb, kernel, bus, discover__start,  bus);
        while (bus->ub_needsexplore && !sc->sc_dying) {
                bus->ub_needsexplore = 0;
                mutex_exit(sc->sc_bus->ub_lock);
                SDT_PROBE1(usb, kernel, bus, explore__start,  bus);
                bus->ub_roothub->ud_hub->uh_explore(bus->ub_roothub);
                SDT_PROBE1(usb, kernel, bus, explore__done,  bus);
                mutex_enter(bus->ub_lock);
        }
        SDT_PROBE1(usb, kernel, bus, discover__done,  bus);
}

void
usb_needs_explore(struct usbd_device *dev)
{

        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);
        SDT_PROBE1(usb, kernel, bus, needs__explore,  dev->ud_bus);

        mutex_enter(dev->ud_bus->ub_lock);
        dev->ud_bus->ub_needsexplore = 1;
        cv_signal(&dev->ud_bus->ub_needsexplore_cv);
        mutex_exit(dev->ud_bus->ub_lock);
}

void
usb_needs_reattach(struct usbd_device *dev)
{

        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);
        SDT_PROBE1(usb, kernel, bus, needs__reattach,  dev->ud_bus);

        mutex_enter(dev->ud_bus->ub_lock);
        dev->ud_powersrc->up_reattach = 1;
        dev->ud_bus->ub_needsexplore = 1;
        cv_signal(&dev->ud_bus->ub_needsexplore_cv);
        mutex_exit(dev->ud_bus->ub_lock);
}

/* Called at with usb_event_lock held. */
int
usb_get_next_event(struct usb_event *ue)
{
        struct usb_event_q *ueq;

        KASSERT(mutex_owned(&usb_event_lock));

        if (usb_nevents <= 0)
                return 0;
        ueq = SIMPLEQ_FIRST(&usb_events);
#ifdef DIAGNOSTIC
        if (ueq == NULL) {
                printf("usb: usb_nevents got out of sync! %d\n", usb_nevents);
                usb_nevents = 0;
                return 0;
        }
#endif
        if (ue)
                *ue = ueq->ue;
        SIMPLEQ_REMOVE_HEAD(&usb_events, next);
        usb_free_event((struct usb_event *)(void *)ueq);
        usb_nevents--;
        return 1;
}

void
usbd_add_dev_event(int type, struct usbd_device *udev)
{
        struct usb_event *ue = usb_alloc_event();

        usbd_fill_deviceinfo(udev, &ue->u.ue_device, false);
        usb_add_event(type, ue);
}

void
usbd_add_drv_event(int type, struct usbd_device *udev, device_t dev)
{
        struct usb_event *ue = usb_alloc_event();

        ue->u.ue_driver.ue_cookie = udev->ud_cookie;
        strncpy(ue->u.ue_driver.ue_devname, device_xname(dev),
            sizeof(ue->u.ue_driver.ue_devname));
        usb_add_event(type, ue);
}

Static struct usb_event *
usb_alloc_event(void)
{
        /* Yes, this is right; we allocate enough so that we can use it later */
        return kmem_zalloc(sizeof(struct usb_event_q), KM_SLEEP);
}

Static void
usb_free_event(struct usb_event *uep)
{
        kmem_free(uep, sizeof(struct usb_event_q));
}

Static void
usb_add_event(int type, struct usb_event *uep)
{
        struct usb_event_q *ueq;
        struct timeval thetime;

        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);

        microtime(&thetime);
        /* Don't want to wait here with usb_event_lock held */
        ueq = (struct usb_event_q *)(void *)uep;
        ueq->ue = *uep;
        ueq->ue.ue_type = type;
        TIMEVAL_TO_TIMESPEC(&thetime, &ueq->ue.ue_time);
        SDT_PROBE1(usb, kernel, event, add,  uep);

        mutex_enter(&usb_event_lock);
        if (++usb_nevents >= USB_MAX_EVENTS) {
                /* Too many queued events, drop an old one. */
                DPRINTF("event dropped", 0, 0, 0, 0);
#ifdef KDTRACE_HOOKS
                struct usb_event oue;
                if (usb_get_next_event(&oue))
                        SDT_PROBE1(usb, kernel, event, drop,  &oue);
#else
                usb_get_next_event(NULL);
#endif
        }
        SIMPLEQ_INSERT_TAIL(&usb_events, ueq, next);
        cv_signal(&usb_event_cv);
        selnotify(&usb_selevent, 0, 0);
        if (atomic_load_relaxed(&usb_async_proc) != NULL) {
                kpreempt_disable();
                softint_schedule(usb_async_sih);
                kpreempt_enable();
        }
        mutex_exit(&usb_event_lock);
}

Static void
usb_async_intr(void *cookie)
{
        proc_t *proc;

        mutex_enter(&proc_lock);
        if ((proc = atomic_load_relaxed(&usb_async_proc)) != NULL)
                psignal(proc, SIGIO);
        mutex_exit(&proc_lock);
}

Static void
usb_soft_intr(void *arg)
{
        struct usbd_bus *bus = arg;

        mutex_enter(bus->ub_lock);
        bus->ub_methods->ubm_softint(bus);
        mutex_exit(bus->ub_lock);
}

void
usb_schedsoftintr(struct usbd_bus *bus)
{

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "polling=%jd", bus->ub_usepolling, 0, 0, 0);

        /* In case the bus never finished setting up. */
        if (__predict_false(bus->ub_soft == NULL))
                return;

        if (bus->ub_usepolling) {
                bus->ub_methods->ubm_softint(bus);
        } else {
                kpreempt_disable();
                softint_schedule(bus->ub_soft);
                kpreempt_enable();
        }
}

int
usb_activate(device_t self, enum devact act)
{
        struct usb_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

void
usb_childdet(device_t self, device_t child)
{
        int i;
        struct usb_softc *sc = device_private(self);
        struct usbd_device *dev;

        if ((dev = sc->sc_port.up_dev) == NULL || dev->ud_subdevlen == 0)
                return;

        for (i = 0; i < dev->ud_subdevlen; i++)
                if (dev->ud_subdevs[i] == child)
                        dev->ud_subdevs[i] = NULL;
}

int
usb_detach(device_t self, int flags)
{
        struct usb_softc *sc = device_private(self);
        struct usb_event *ue;
        int rc;

        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);

        /* Make all devices disconnect. */
        if (sc->sc_port.up_dev != NULL &&
            (rc = usb_disconnect_port(&sc->sc_port, self, flags)) != 0)
                return rc;

        if (sc->sc_pmf_registered)
                pmf_device_deregister(self);
        /* Kill off event thread. */
        sc->sc_dying = 1;
        while (sc->sc_event_thread != NULL) {
                mutex_enter(sc->sc_bus->ub_lock);
                cv_signal(&sc->sc_bus->ub_needsexplore_cv);
                cv_timedwait(&sc->sc_bus->ub_needsexplore_cv,
                    sc->sc_bus->ub_lock, hz * 60);
                mutex_exit(sc->sc_bus->ub_lock);
        }
        DPRINTF("event thread dead", 0, 0, 0, 0);

        if (sc->sc_bus->ub_soft != NULL) {
                softint_disestablish(sc->sc_bus->ub_soft);
                sc->sc_bus->ub_soft = NULL;
        }

        ue = usb_alloc_event();
        ue->u.ue_ctrlr.ue_bus = device_unit(self);
        usb_add_event(USB_EVENT_CTRLR_DETACH, ue);

        cv_destroy(&sc->sc_bus->ub_needsexplore_cv);
        cv_destroy(&sc->sc_bus->ub_rhxfercv);

        return 0;
}
































































































































































































































































































































































    3 
    3 



















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
/*        $NetBSD: udl.c,v 1.28 2022/05/17 05:05:20 andvar Exp $        */

/*-
 * Copyright (c) 2009 FUKAUMI Naoki.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2009 Marcus Glocker <mglocker@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*
 * Driver for the ``DisplayLink DL-1x0 / DL-1x5'' graphic chips based
 * on the reversed engineered specifications of Florian Echtler
 * <floe at butterbrot dot org>:
 *
 *         http://floe.butterbrot.org/displaylink/doku.php
 *
 * This driver was written by Marcus Glocker for OpenBSD and ported to
 * NetBSD by FUKAUMI Naoki with many modification.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: udl.c,v 1.28 2022/05/17 05:05:20 andvar Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/device.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/kmem.h>
#include <sys/kthread.h>
#include <sys/condvar.h>

#include <sys/bus.h>
#include <sys/endian.h>

#include <uvm/uvm_extern.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usb_mem.h>
#include <dev/usb/usbdevs.h>

#include <dev/firmload.h>

#include <dev/videomode/videomode.h>
#include <dev/videomode/edidvar.h>

#include <dev/wscons/wsconsio.h>
#include <dev/wscons/wsdisplayvar.h>
#include <dev/rasops/rasops.h>

#include <dev/usb/udl.h>
#ifdef notyet
#include <dev/usb/udlio.h>
#endif

/*
 * Defines.
 */
#ifdef UDL_DEBUG
#define DPRINTF(x)        do { if (udl_debug) printf x; } while (0)
#define DPRINTFN(n, x)        do { if (udl_debug >= (n)) printf x; } while (0)
int udl_debug = 1;
#else
#define DPRINTF(x)        do {} while (0)
#define DPRINTFN(n, x)        do {} while (0)
#endif

/*
 * Prototypes.
 */
static int                udl_match(device_t, cfdata_t, void *);
static void                udl_attach(device_t, device_t, void *);
static int                udl_detach(device_t, int);

static int                udl_ioctl(void *, void *, u_long, void *, int,
                            struct lwp *);
static paddr_t                udl_mmap(void *, void *, off_t, int);
static int                udl_alloc_screen(void *, const struct wsscreen_descr *,
                            void **, int *, int *, long *);
static void                udl_free_screen(void *, void *);
static int                udl_show_screen(void *, void *, int,
                            void (*)(void *, int, int), void *);

static void                udl_comp_load(struct udl_softc *);
static void                udl_comp_unload(struct udl_softc *);
static int                udl_fbmem_alloc(struct udl_softc *);
static void                udl_fbmem_free(struct udl_softc *);
static int                udl_cmdq_alloc(struct udl_softc *);
static void                udl_cmdq_free(struct udl_softc *);
static struct udl_cmdq *udl_cmdq_get(struct udl_softc *sc);
static void                udl_cmdq_put(struct udl_softc *sc,
                            struct udl_cmdq *cmdq);
static void                udl_cmdq_flush(struct udl_softc *);

static void                udl_cursor(void *, int, int, int);
static void                udl_putchar(void *, int, int, u_int, long);
static void                udl_copycols(void *, int, int, int, int);
static void                udl_erasecols(void *, int, int, int, long);
static void                udl_copyrows(void *, int, int, int);
static void                udl_eraserows(void *, int, int, long);

static void                udl_restore_char(struct rasops_info *);
static void                udl_draw_char(struct rasops_info *, uint16_t *, u_int,
                            int, int);
static void                udl_copy_rect(struct udl_softc *, int, int, int, int,
                            int, int);
static void                udl_fill_rect(struct udl_softc *, uint16_t, int, int,
                            int, int);
#ifdef notyet
static void                udl_draw_rect(struct udl_softc *,
                            struct udl_ioctl_damage *);
static void                udl_draw_rect_comp(struct udl_softc *,
                            struct udl_ioctl_damage *);
#endif

static inline void        udl_copy_line(struct udl_softc *, int, int, int);
static inline void        udl_fill_line(struct udl_softc *, uint16_t, int, int);
static inline void        udl_draw_line(struct udl_softc *, uint16_t *, int,
                            int);
#ifdef notyet
static inline void        udl_draw_line_comp(struct udl_softc *, uint16_t *, int,
                            int);
#endif

static int                udl_cmd_send(struct udl_softc *);
static void                udl_cmd_send_async(struct udl_softc *);
static void                udl_cmd_send_async_cb(struct usbd_xfer *,
                            void *, usbd_status);

static int                udl_ctrl_msg(struct udl_softc *, uint8_t, uint8_t,
                            uint16_t, uint16_t, uint8_t *, uint16_t);
static int                udl_init(struct udl_softc *);
static void                udl_read_edid(struct udl_softc *);
static void                udl_set_address(struct udl_softc *, int, int, int,
                            int);
static void                udl_blank(struct udl_softc *, int);
static uint16_t                udl_lfsr(uint16_t);
static int                udl_set_resolution(struct udl_softc *,
                            const struct videomode *);
static const struct videomode *udl_videomode_lookup(const char *);
static void                udl_update_thread(void *);
static inline void udl_startstop(struct udl_softc *, bool);

static inline void
udl_cmd_add_1(struct udl_softc *sc, uint8_t val)
{

        *sc->sc_cmd_buf++ = val;
}

static inline void
udl_cmd_add_2(struct udl_softc *sc, uint16_t val)
{

        be16enc(sc->sc_cmd_buf, val);
        sc->sc_cmd_buf += 2;
}

static inline void
udl_cmd_add_3(struct udl_softc *sc, uint32_t val)
{

        udl_cmd_add_2(sc, val >> 8);
        udl_cmd_add_1(sc, val);
}

static inline void
udl_cmd_add_4(struct udl_softc *sc, uint32_t val)
{

        be32enc(sc->sc_cmd_buf, val);
        sc->sc_cmd_buf += 4;
}

static inline void
udl_cmd_add_buf(struct udl_softc *sc, uint16_t *buf, int width)
{
#if BYTE_ORDER == BIG_ENDIAN
        memcpy(sc->sc_cmd_buf, buf, width * 2);
        sc->sc_cmd_buf += width * 2;
#else
        uint16_t *endp;

        endp = buf + width;

        if (((uintptr_t)sc->sc_cmd_buf & 1) == 0) {
                while (buf < endp) {
                        *(uint16_t *)sc->sc_cmd_buf = htobe16(*buf++);
                        sc->sc_cmd_buf += 2;
                }
        } else {
                while (buf < endp) {
                        be16enc(sc->sc_cmd_buf, *buf++);
                        sc->sc_cmd_buf += 2;
                }
        }
#endif
}

static inline void
udl_reg_write_1(struct udl_softc *sc, uint8_t reg, uint8_t val)
{

        udl_cmd_add_4(sc, (UDL_BULK_SOC << 24) |
            (UDL_BULK_CMD_REG_WRITE_1 << 16) | (reg << 8) | val);
}

static inline void
udl_reg_write_2(struct udl_softc *sc, uint8_t reg, uint16_t val)
{

        udl_reg_write_1(sc, reg++, val >> 8);
        udl_reg_write_1(sc, reg, val);
}

static inline void
udl_reg_write_3(struct udl_softc *sc, uint8_t reg, uint32_t val)
{

        udl_reg_write_1(sc, reg++, val >> 16);
        udl_reg_write_1(sc, reg++, val >> 8);
        udl_reg_write_1(sc, reg, val);
}

/* XXX */
static int
firmware_load(const char *dname, const char *iname, uint8_t **ucodep,
    size_t *sizep)
{
        firmware_handle_t fh;
        int error;

        if ((error = firmware_open(dname, iname, &fh)) != 0)
                return error;
        *sizep = firmware_get_size(fh);
        if ((*ucodep = firmware_malloc(*sizep)) == NULL) {
                firmware_close(fh);
                return ENOMEM;
        }
        if ((error = firmware_read(fh, 0, *ucodep, *sizep)) != 0)
                firmware_free(*ucodep, *sizep);
        firmware_close(fh);

        return error;
}

/*
 * Driver glue.
 */
CFATTACH_DECL_NEW(udl, sizeof(struct udl_softc),
        udl_match, udl_attach, udl_detach, NULL);

/*
 * wsdisplay glue.
 */
static struct wsdisplay_accessops udl_accessops = {
        udl_ioctl,
        udl_mmap,
        udl_alloc_screen,
        udl_free_screen,
        udl_show_screen,
        NULL,
        NULL,
        NULL,
};

/*
 * Matching devices.
 */
static const struct usb_devno udl_devs[] = {
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_GUC2020 },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LD220 },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LD190 },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_U70 },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_POLARIS2 },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_VCUD60 },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_CONV },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_DLDVI },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_USBRGB },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LCDUSB7X },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LCDUSB10X },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_VGA10 },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_WSDVI },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_EC008 },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_GXDVIU2 },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_GXDVIU2B },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LCD4300U },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LCD8000U },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_HPDOCK },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_NL571 },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_M01061 },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_NBDOCK },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_SWDVI },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LUM70 },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LCD8000UD_DVI },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LDEWX015U },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_MIMO },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_PLUGABLE },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LT1421WIDE },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_SD_U2VDH },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_UM7X0 },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_FYDVI },
        { USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_FYDVI2 }
};

static int
udl_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        if (usb_lookup(udl_devs, uaa->uaa_vendor, uaa->uaa_product) != NULL)
                return UMATCH_VENDOR_PRODUCT;

        return UMATCH_NONE;
}

static void
udl_attach(device_t parent, device_t self, void *aux)
{
        struct udl_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct wsemuldisplaydev_attach_args aa;
        const struct videomode *vmp;
        usbd_status error;
        char *devinfop;

        aprint_naive("\n");
        aprint_normal("\n");

        sc->sc_dev = self;
        sc->sc_udev = uaa->uaa_device;
        sc->sc_init_state = UDL_INIT_NONE;

        devinfop = usbd_devinfo_alloc(sc->sc_udev, 0);
        aprint_normal_dev(sc->sc_dev, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        /*
         * Set device configuration descriptor number.
         */
        error = usbd_set_config_no(sc->sc_udev, 1, 0);
        if (error != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(error));
                return;
        }

        /*
         * Create device handle to interface descriptor.
         */
        error = usbd_device2interface_handle(sc->sc_udev, 0, &sc->sc_iface);
        if (error != USBD_NORMAL_COMPLETION)
                return;

        /*
         * Open bulk TX pipe.
         */
        error = usbd_open_pipe(sc->sc_iface, 1, USBD_EXCLUSIVE_USE,
            &sc->sc_tx_pipeh);
        if (error != USBD_NORMAL_COMPLETION)
                return;

#ifdef UDL_EVENT_COUNTERS
        evcnt_attach_dynamic(&sc->sc_ev_cmdq_get, EVCNT_TYPE_MISC, NULL,
            device_xname(sc->sc_dev), "udl_cmdq_get");
        evcnt_attach_dynamic(&sc->sc_ev_cmdq_put, EVCNT_TYPE_MISC, NULL,
            device_xname(sc->sc_dev), "udl_cmdq_put");
        evcnt_attach_dynamic(&sc->sc_ev_cmdq_wait, EVCNT_TYPE_MISC, NULL,
            device_xname(sc->sc_dev), "udl_cmdq_wait");
        evcnt_attach_dynamic(&sc->sc_ev_cmdq_timeout, EVCNT_TYPE_MISC, NULL,
            device_xname(sc->sc_dev), "udl_cmdq_timeout");
#endif
        cv_init(&sc->sc_cv, device_xname(sc->sc_dev));
        mutex_init(&sc->sc_mtx, MUTEX_DEFAULT, IPL_TTY); /* XXX for tty_lock */
        sc->sc_init_state = UDL_INIT_MIDWAY;

        /*
         * Allocate bulk command queue.
         */
        if (udl_cmdq_alloc(sc) != 0)
                return;

        if ((sc->sc_cmd_cur = udl_cmdq_get(sc)) == NULL)
                return;
        UDL_CMD_BUFINIT(sc);

        /*
         * Initialize chip.
         */
        if (udl_init(sc) != 0)
                return;

        udl_read_edid(sc);

        /*
         * Initialize resolution.
         */
#ifndef UDL_VIDEOMODE
        if (sc->sc_ei.edid_nmodes != 0 &&
            sc->sc_ei.edid_preferred_mode != NULL)
                vmp = sc->sc_ei.edid_preferred_mode;
        else
#define UDL_VIDEOMODE        "640x480x60"
#endif
                vmp = udl_videomode_lookup(UDL_VIDEOMODE);

        if (vmp == NULL)
                return;

        sc->sc_width = vmp->hdisplay;
        sc->sc_height = vmp->vdisplay;
        sc->sc_offscreen = sc->sc_height * 3 / 2;
        sc->sc_depth = 16;

        if (udl_set_resolution(sc, vmp) != 0)
                return;

        sc->sc_defaultscreen.name = "default";
        sc->sc_screens[0] = &sc->sc_defaultscreen;
        sc->sc_screenlist.nscreens = 1;
        sc->sc_screenlist.screens = sc->sc_screens;

        /*
         * Set initial wsdisplay emulation mode.
         */
        sc->sc_mode = WSDISPLAYIO_MODE_EMUL;

        /*
         * Attach wsdisplay.
         */
        aa.console = 0;
        aa.scrdata = &sc->sc_screenlist;
        aa.accessops = &udl_accessops;
        aa.accesscookie = sc;

        sc->sc_wsdisplay =
            config_found(sc->sc_dev, &aa, wsemuldisplaydevprint, CFARGS_NONE);

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        mutex_init(&sc->sc_thread_mtx, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&sc->sc_thread_cv, "udlcv");
        sc->sc_dying = false;
        sc->sc_thread_stop = true;
        kthread_create(PRI_BIO, KTHREAD_MPSAFE | KTHREAD_MUSTJOIN, NULL,
            udl_update_thread, sc, &sc->sc_thread, "udlupd");

        sc->sc_init_state = UDL_INIT_INITED;
}

static int
udl_detach(device_t self, int flags)
{
        struct udl_softc *sc = device_private(self);

        /*
         * Close bulk TX pipe.
         */
        if (sc->sc_tx_pipeh != NULL) {
                usbd_abort_pipe(sc->sc_tx_pipeh);
        }

        if (sc->sc_init_state >= UDL_INIT_MIDWAY) {
                /*
                 * Free command xfer buffers.
                 */
                udl_cmdq_flush(sc);
                udl_cmdq_free(sc);
        }

        if (sc->sc_tx_pipeh != NULL) {
                usbd_close_pipe(sc->sc_tx_pipeh);
        }

        /*
         * Free Huffman table.
         */
        udl_comp_unload(sc);

        if (sc->sc_init_state >= UDL_INIT_INITED) {
                /*
                 * Free framebuffer memory.
                 */
                udl_fbmem_free(sc);

                mutex_enter(&sc->sc_thread_mtx);
                sc->sc_dying = true;
                cv_broadcast(&sc->sc_thread_cv);
                mutex_exit(&sc->sc_thread_mtx);
                kthread_join(sc->sc_thread);
                cv_destroy(&sc->sc_thread_cv);
                mutex_destroy(&sc->sc_thread_mtx);
        }

        if (sc->sc_init_state >= UDL_INIT_MIDWAY) {
                cv_destroy(&sc->sc_cv);
                mutex_destroy(&sc->sc_mtx);
        }

        if (sc->sc_init_state >= UDL_INIT_INITED) {
                /*
                 * Detach wsdisplay.
                 */
                if (sc->sc_wsdisplay != NULL)
                        config_detach(sc->sc_wsdisplay, DETACH_FORCE);

                usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev,
                    sc->sc_dev);
        }

        if (sc->sc_init_state >= UDL_INIT_MIDWAY) {
#ifdef UDL_EVENT_COUNTERS
                evcnt_detach(&sc->sc_ev_cmdq_get);
                evcnt_detach(&sc->sc_ev_cmdq_put);
                evcnt_detach(&sc->sc_ev_cmdq_wait);
                evcnt_detach(&sc->sc_ev_cmdq_timeout);
#endif
        }

        return 0;
}

static int
udl_ioctl(void *v, void *vs, u_long cmd, void *data, int flag, struct lwp *l)
{
        struct udl_softc *sc = v;
#ifdef notyet
        struct udl_ioctl_damage *d;
#endif
        struct wsdisplay_fbinfo *wdf;
        u_int mode;

        switch (cmd) {
        case WSDISPLAYIO_GTYPE:
                *(u_int *)data = WSDISPLAY_TYPE_DL;
                return 0;

        case WSDISPLAYIO_GINFO:
                wdf = (struct wsdisplay_fbinfo *)data;
                wdf->height = sc->sc_height;
                wdf->width = sc->sc_width;
                wdf->depth = sc->sc_depth;
                wdf->cmsize = 0;
                return 0;

        case WSDISPLAYIO_GVIDEO:
                *(u_int *)data = sc->sc_blank;
                return 0;

        case WSDISPLAYIO_SVIDEO:
                mode = *(u_int *)data;
                if (mode == sc->sc_blank)
                        return 0;
                switch (mode) {
                case WSDISPLAYIO_VIDEO_OFF:
                        udl_startstop(sc, true);
                        udl_blank(sc, 1);
                        break;
                case WSDISPLAYIO_VIDEO_ON:
                        udl_blank(sc, 0);
                        break;
                default:
                        return EINVAL;
                }
                if (UDL_CMD_BUFSIZE(sc) > 0)
                        udl_cmd_send_async(sc);
                udl_cmdq_flush(sc);
                sc->sc_blank = mode;
                return 0;

        case WSDISPLAYIO_SMODE:
                mode = *(u_int *)data;
                if (mode == sc->sc_mode)
                        return 0;
                switch (mode) {
                case WSDISPLAYIO_MODE_EMUL:
                        udl_startstop(sc, true);
                        /* clear screen */
                        udl_fill_rect(sc, 0, 0, 0, sc->sc_width,
                            sc->sc_height);
                        if (UDL_CMD_BUFSIZE(sc) > 0)
                                udl_cmd_send_async(sc);
                        udl_cmdq_flush(sc);
                        udl_comp_unload(sc);
                        break;
                case WSDISPLAYIO_MODE_DUMBFB:
                        if (UDL_CMD_BUFSIZE(sc) > 0)
                                udl_cmd_send_async(sc);
                        udl_cmdq_flush(sc);
                        udl_comp_load(sc);
                        udl_startstop(sc, false);
                        break;
                default:
                        return EINVAL;
                }
                sc->sc_mode = mode;
                return 0;

        case WSDISPLAYIO_LINEBYTES:
                *(u_int *)data = sc->sc_width * (sc->sc_depth / 8);
                return 0;

#ifdef notyet
        /*
         * XXX
         * OpenBSD allows device specific ioctl()s and use this
         * UDLIO_DAMAGE for the damage extension ops of X servers.
         * Before blindly pulling such interfaces, probably we should
         * discuss how such devices should be handled which have
         * in-direct framebuffer memories that should be transferred
         * per updated rectangle regions via MI wscons APIs.
         */
        case UDLIO_DAMAGE:
                d = (struct udl_ioctl_damage *)data;
                d->status = UDLIO_STATUS_OK;
                if (sc->sc_flags & UDL_COMPRDY)
                        udl_draw_rect_comp(sc, d);
                else
                        udl_draw_rect(sc, d);
                return 0;
#endif
        }

        return EPASSTHROUGH;
}

static paddr_t
udl_mmap(void *v, void *vs, off_t off, int prot)
{
        struct udl_softc *sc = v;
        vaddr_t vaddr;
        paddr_t paddr;
        bool rv __diagused;

        if (off < 0 || off > roundup2(UDL_FBMEM_SIZE(sc), PAGE_SIZE))
                return -1;

        /* allocate framebuffer memory */
        if (udl_fbmem_alloc(sc) != 0)
                return -1;

        udl_startstop(sc, false);

        vaddr = (vaddr_t)sc->sc_fbmem + off;
        rv = pmap_extract(pmap_kernel(), vaddr, &paddr);
        KASSERT(rv);
        paddr += vaddr & PGOFSET;

        /* XXX we need MI paddr_t -> mmap cookie API */
#if defined(__aarch64__)
#define PTOMMAP(paddr)        aarch64_btop((char *)paddr)
#elif defined(__alpha__)
#define PTOMMAP(paddr)        alpha_btop((char *)paddr)
#elif defined(__arm__)
#define PTOMMAP(paddr)        arm_btop((u_long)paddr)
#elif defined(__hppa__)
#define PTOMMAP(paddr)        btop((u_long)paddr)
#elif defined(__i386__) || defined(__x86_64__)
#define PTOMMAP(paddr)        x86_btop(paddr)
#elif defined(__m68k__)
#define PTOMMAP(paddr)        m68k_btop((char *)paddr)
#elif defined(__mips__)
#define PTOMMAP(paddr)        mips_btop(paddr)
#elif defined(__powerpc__)
#define PTOMMAP(paddr)        (paddr)
#elif defined(__sh__)
#define PTOMMAP(paddr)        sh3_btop(paddr)
#elif defined(__sparc__)
#define PTOMMAP(paddr)        (paddr)
#elif defined(__sparc64__)
#define PTOMMAP(paddr)        atop(paddr)
#elif defined(__vax__)
#define PTOMMAP(paddr)        btop((u_int)paddr)
#endif

        return PTOMMAP(paddr);
}

static int
udl_alloc_screen(void *v, const struct wsscreen_descr *type,
    void **cookiep, int *curxp, int *curyp, long *attrp)
{
        struct udl_softc *sc = v;

        if (sc->sc_nscreens > 0)
                return ENOMEM;

        /*
         * Initialize rasops.
         */
        sc->sc_ri.ri_depth = sc->sc_depth;
        sc->sc_ri.ri_bits = NULL;
        sc->sc_ri.ri_width = sc->sc_width;
        sc->sc_ri.ri_height = sc->sc_height;
        sc->sc_ri.ri_stride = sc->sc_width * (sc->sc_depth / 8);
        sc->sc_ri.ri_hw = sc;
        sc->sc_ri.ri_flg = 0;

        if (sc->sc_depth == 16) {
                sc->sc_ri.ri_rnum = 5;
                sc->sc_ri.ri_gnum = 6;
                sc->sc_ri.ri_bnum = 5;
                sc->sc_ri.ri_rpos = 11;
                sc->sc_ri.ri_gpos = 5;
                sc->sc_ri.ri_bpos = 0;
        }

        rasops_init(&sc->sc_ri, sc->sc_height / 8, sc->sc_width / 8);

        sc->sc_ri.ri_ops.cursor = udl_cursor;
        sc->sc_ri.ri_ops.putchar = udl_putchar;
        sc->sc_ri.ri_ops.copycols = udl_copycols;
        sc->sc_ri.ri_ops.erasecols = udl_erasecols;
        sc->sc_ri.ri_ops.copyrows = udl_copyrows;
        sc->sc_ri.ri_ops.eraserows = udl_eraserows;

        sc->sc_ri.ri_ops.allocattr(&sc->sc_ri, 0, 0, 0, attrp);

        sc->sc_defaultscreen.ncols = sc->sc_ri.ri_cols;
        sc->sc_defaultscreen.nrows = sc->sc_ri.ri_rows;
        sc->sc_defaultscreen.textops = &sc->sc_ri.ri_ops;
        sc->sc_defaultscreen.fontwidth = sc->sc_ri.ri_font->fontwidth;
        sc->sc_defaultscreen.fontheight = sc->sc_ri.ri_font->fontheight;
        sc->sc_defaultscreen.capabilities = sc->sc_ri.ri_caps;

        *cookiep = &sc->sc_ri;
        *curxp = 0;
        *curyp = 0;

        sc->sc_nscreens++;

        return 0;
}

static void
udl_free_screen(void *v, void *cookie)
{
        struct udl_softc *sc = v;

        sc->sc_nscreens--;
}

static int
udl_show_screen(void *v, void *cookie, int waitok,
    void (*cb)(void *, int, int), void *cbarg)
{

        return 0;
}

static inline void
udl_cmd_add_decomptable(struct udl_softc *sc, uint8_t *buf, int len)
{

        udl_cmd_add_2(sc, (UDL_BULK_SOC << 8) | UDL_BULK_CMD_DECOMP);
        udl_cmd_add_4(sc, 0x263871cd);        /* magic number */
        udl_cmd_add_4(sc, 0x00000200);        /* 512 byte chunks */
        memcpy(sc->sc_cmd_buf, buf, len);
        sc->sc_cmd_buf += len;
}

static void
udl_comp_load(struct udl_softc *sc)
{
        struct udl_huffman *h;
        uint8_t *decomp;
        size_t decomp_size;
        int error, i;

        if (!(sc->sc_flags & UDL_DECOMPRDY)) {
                error = firmware_load("udl", "udl-decomp", &decomp,
                    &decomp_size);
                if (error != 0) {
                        aprint_error_dev(sc->sc_dev,
                            "error %d, could not read decomp table %s!\n",
                            error, "udl-decomp");
                        return;
                }
                udl_cmd_add_decomptable(sc, decomp, decomp_size);
                firmware_free(decomp, decomp_size);
                if (udl_cmd_send(sc) != 0)
                        return;
                sc->sc_flags |= UDL_DECOMPRDY;
        }

        if (!(sc->sc_flags & UDL_COMPRDY)) {
                error = firmware_load("udl", "udl-comp", &sc->sc_huffman,
                    &sc->sc_huffman_size);
                if (error != 0) {
                        aprint_error_dev(sc->sc_dev,
                            "error %d, could not read huffman table %s!\n",
                            error, "udl-comp");
                        return;
                }
                h = (struct udl_huffman *)sc->sc_huffman;
                for (i = 0; i < UDL_HUFFMAN_RECORDS; i++)
                        h[i].bit_pattern = be32toh(h[i].bit_pattern);
                sc->sc_huffman_base = sc->sc_huffman + UDL_HUFFMAN_BASE;
                sc->sc_flags |= UDL_COMPRDY;
        }
}

static void
udl_comp_unload(struct udl_softc *sc)
{

        if (sc->sc_flags & UDL_COMPRDY) {
                firmware_free(sc->sc_huffman, sc->sc_huffman_size);
                sc->sc_huffman = NULL;
                sc->sc_huffman_size = 0;
                sc->sc_flags &= ~UDL_COMPRDY;
        }
}

static int
udl_fbmem_alloc(struct udl_softc *sc)
{

        mutex_enter(&sc->sc_thread_mtx);
        if (sc->sc_fbmem == NULL)
                sc->sc_fbmem = kmem_zalloc(UDL_FBMEM_SIZE(sc), KM_SLEEP);
        if (sc->sc_fbmem_prev == NULL)
                sc->sc_fbmem_prev = kmem_zalloc(UDL_FBMEM_SIZE(sc), KM_SLEEP);
        mutex_exit(&sc->sc_thread_mtx);

        return 0;
}

static void
udl_fbmem_free(struct udl_softc *sc)
{

        mutex_enter(&sc->sc_thread_mtx);
        if (sc->sc_fbmem != NULL) {
                kmem_free(sc->sc_fbmem, UDL_FBMEM_SIZE(sc));
                sc->sc_fbmem = NULL;
        }
        if (sc->sc_fbmem_prev != NULL) {
                kmem_free(sc->sc_fbmem_prev, UDL_FBMEM_SIZE(sc));
                sc->sc_fbmem_prev = NULL;
        }
        mutex_exit(&sc->sc_thread_mtx);
}

static int
udl_cmdq_alloc(struct udl_softc *sc)
{
        struct udl_cmdq *cmdq;
        int i;

        TAILQ_INIT(&sc->sc_freecmd);
        TAILQ_INIT(&sc->sc_xfercmd);

        for (i = 0; i < UDL_NCMDQ; i++) {
                cmdq = &sc->sc_cmdq[i];

                cmdq->cq_sc = sc;

                int err = usbd_create_xfer(sc->sc_tx_pipeh,
                    UDL_CMD_BUFFER_SIZE, 0, 0, &cmdq->cq_xfer);
                if (err) {
                        aprint_error_dev(sc->sc_dev,
                            "%s: can't allocate xfer handle!\n", __func__);
                        goto error;
                }

                cmdq->cq_buf = usbd_get_buffer(cmdq->cq_xfer);

                TAILQ_INSERT_TAIL(&sc->sc_freecmd, cmdq, cq_chain);
        }

        return 0;

 error:
        udl_cmdq_free(sc);
        return -1;
}

static void
udl_cmdq_free(struct udl_softc *sc)
{
        struct udl_cmdq *cmdq;
        int i;

        for (i = 0; i < UDL_NCMDQ; i++) {
                cmdq = &sc->sc_cmdq[i];

                if (cmdq->cq_xfer != NULL) {
                        usbd_destroy_xfer(cmdq->cq_xfer);
                        cmdq->cq_xfer = NULL;
                        cmdq->cq_buf = NULL;
                }
        }
}

static struct udl_cmdq *
udl_cmdq_get(struct udl_softc *sc)
{
        struct udl_cmdq *cmdq;

        cmdq = TAILQ_FIRST(&sc->sc_freecmd);
        if (cmdq != NULL) {
                TAILQ_REMOVE(&sc->sc_freecmd, cmdq, cq_chain);
                UDL_EVCNT_INCR(&sc->sc_ev_cmdq_get);
        }

        return cmdq;
}

static void
udl_cmdq_put(struct udl_softc *sc, struct udl_cmdq *cmdq)
{

        TAILQ_INSERT_TAIL(&sc->sc_freecmd, cmdq, cq_chain);
        UDL_EVCNT_INCR(&sc->sc_ev_cmdq_put);
}

static void
udl_cmdq_flush(struct udl_softc *sc)
{

        mutex_enter(&sc->sc_mtx);
        while (TAILQ_FIRST(&sc->sc_xfercmd) != NULL)
                cv_wait(&sc->sc_cv, &sc->sc_mtx);
        mutex_exit(&sc->sc_mtx);
}

static void
udl_cursor(void *cookie, int on, int row, int col)
{
        struct rasops_info *ri = cookie;
        struct udl_softc *sc = ri->ri_hw;
        int x, y, width, height;

        if (ri->ri_flg & RI_CURSOR)
                udl_restore_char(ri);

        ri->ri_crow = row;
        ri->ri_ccol = col;

        if (on != 0) {
                ri->ri_flg |= RI_CURSOR;

                x = col * ri->ri_font->fontwidth;
                y = row * ri->ri_font->fontheight;
                width = ri->ri_font->fontwidth;
                height = ri->ri_font->fontheight;

                /* save the last character block to off-screen */
                udl_copy_rect(sc, x, y, 0, sc->sc_offscreen, width, height);

                /* draw cursor */
                udl_fill_rect(sc, 0xffff, x, y, width, 1);
                udl_fill_rect(sc, 0xffff, x, y + 1, 1, height - 2);
                udl_fill_rect(sc, 0xffff, x + width - 1, y + 1, 1, height - 2);
                udl_fill_rect(sc, 0xffff, x, y + height - 1, width, 1);

                udl_cmd_send_async(sc);
        } else
                ri->ri_flg &= ~RI_CURSOR;
}

static void
udl_putchar(void *cookie, int row, int col, u_int uc, long attr)
{
        struct rasops_info *ri = cookie;
        struct udl_softc *sc = ri->ri_hw;
        uint16_t rgb16[2];
        int fg, bg, underline, x, y, width, height;

        rasops_unpack_attr(attr, &fg, &bg, &underline);
        rgb16[1] = (uint16_t)ri->ri_devcmap[fg];
        rgb16[0] = (uint16_t)ri->ri_devcmap[bg];

        x = col * ri->ri_font->fontwidth;
        y = row * ri->ri_font->fontheight;
        width = ri->ri_font->fontwidth;
        height = ri->ri_font->fontheight;

        if (uc == ' ') {
                /*
                 * Writing a block for the space character instead rendering
                 * it from font bits is more slim.
                 */
                udl_fill_rect(sc, rgb16[0], x, y, width, height);
        } else {
                /* render a character from font bits */
                udl_draw_char(ri, rgb16, uc, x, y);
        }

        if (underline != 0)
                udl_fill_rect(sc, rgb16[1], x, y + height - 1, width, 1);

#if 0
        udl_cmd_send_async(sc);
#endif
}

static void
udl_copycols(void *cookie, int row, int src, int dst, int num)
{
        struct rasops_info *ri = cookie;
        struct udl_softc *sc = ri->ri_hw;
        int sx, dx, y, width, height;

        sx = src * ri->ri_font->fontwidth;
        dx = dst * ri->ri_font->fontwidth;
        y = row * ri->ri_font->fontheight;
        width = num * ri->ri_font->fontwidth;
        height = ri->ri_font->fontheight;

        /* copy row block to off-screen first to fix overlay-copy problem */
        udl_copy_rect(sc, sx, y, 0, sc->sc_offscreen, width, height);

        /* copy row block back from off-screen now */
        udl_copy_rect(sc, 0, sc->sc_offscreen, dx, y, width, height);
#if 0
        udl_cmd_send_async(sc);
#endif
}

static void
udl_erasecols(void *cookie, int row, int col, int num, long attr)
{
        struct rasops_info *ri = cookie;
        struct udl_softc *sc = ri->ri_hw;
        uint16_t rgb16;
        int fg, bg, x, y, width, height;

        rasops_unpack_attr(attr, &fg, &bg, NULL);
        rgb16 = (uint16_t)ri->ri_devcmap[bg];

        x = col * ri->ri_font->fontwidth;
        y = row * ri->ri_font->fontheight;
        width = num * ri->ri_font->fontwidth;
        height = ri->ri_font->fontheight;

        udl_fill_rect(sc, rgb16, x, y, width, height);
#if 0
        udl_cmd_send_async(sc);
#endif
}

static void
udl_copyrows(void *cookie, int src, int dst, int num)
{
        struct rasops_info *ri = cookie;
        struct udl_softc *sc = ri->ri_hw;
        int sy, ey, dy, width, height;

        width = ri->ri_emuwidth;
        height = ri->ri_font->fontheight;

        if (dst < src) {
                sy = src * height;
                ey = (src + num) * height;
                dy = dst * height;

                while (sy < ey) {
                        udl_copy_rect(sc, 0, sy, 0, dy, width, height);
                        sy += height;
                        dy += height;
                }
        } else {
                sy = (src + num) * height;
                ey = src * height;
                dy = (dst + num) * height;

                while (sy > ey) {
                        sy -= height;
                        dy -= height;
                        udl_copy_rect(sc, 0, sy, 0, dy, width, height);
                }
        }
#if 0
        udl_cmd_send_async(sc);
#endif
}

static void
udl_eraserows(void *cookie, int row, int num, long attr)
{
        struct rasops_info *ri = cookie;
        struct udl_softc *sc = ri->ri_hw;
        uint16_t rgb16;
        int fg, bg, y, width, height;

        rasops_unpack_attr(attr, &fg, &bg, NULL);
        rgb16 = (uint16_t)ri->ri_devcmap[bg];

        y = row * ri->ri_font->fontheight;
        width = ri->ri_emuwidth;
        height = num * ri->ri_font->fontheight;

        udl_fill_rect(sc, rgb16, 0, y, width, height);
#if 0
        udl_cmd_send_async(sc);
#endif
}

static void
udl_restore_char(struct rasops_info *ri)
{
        struct udl_softc *sc = ri->ri_hw;
        int x, y, width, height;

        x = ri->ri_ccol * ri->ri_font->fontwidth;
        y = ri->ri_crow * ri->ri_font->fontheight;
        width = ri->ri_font->fontwidth;
        height = ri->ri_font->fontheight;

        /* restore the last saved character from off-screen */
        udl_copy_rect(sc, 0, sc->sc_offscreen, x, y, width, height);
}

static void
udl_draw_char(struct rasops_info *ri, uint16_t *rgb16, u_int uc, int x, int y)
{
        struct udl_softc *sc = ri->ri_hw;
        struct wsdisplay_font *font = ri->ri_font;
        uint32_t fontbits;
        uint16_t pixels[32];
        uint8_t *fontbase;
        int i, soff, eoff;

        soff = y * sc->sc_width + x;
        eoff = (y + font->fontheight) * sc->sc_width + x;
        fontbase = (uint8_t *)font->data + (uc - font->firstchar) *
            ri->ri_fontscale;

        while (soff < eoff) {
                fontbits = 0;
                switch (font->stride) {
                case 4:
                        fontbits |= fontbase[3];
                        /* FALLTHROUGH */
                case 3:
                        fontbits |= fontbase[2] << 8;
                        /* FALLTHROUGH */
                case 2:
                        fontbits |= fontbase[1] << 16;
                        /* FALLTHROUGH */
                case 1:
                        fontbits |= fontbase[0] << 24;
                }
                fontbase += font->stride;

                for (i = 0; i < font->fontwidth; i++) {
                        pixels[i] = rgb16[(fontbits >> 31) & 1];
                        fontbits <<= 1;
                }

                udl_draw_line(sc, pixels, soff, font->fontwidth);
                soff += sc->sc_width;
        }
}

static void
udl_copy_rect(struct udl_softc *sc, int sx, int sy, int dx, int dy, int width,
    int height)
{
        int sbase, soff, ebase, eoff, dbase, doff, width_cur;

        sbase = sy * sc->sc_width;
        ebase = (sy + height) * sc->sc_width;
        dbase = dy * sc->sc_width;

        while (width > 0) {
                soff = sbase + sx;
                eoff = ebase + sx;
                doff = dbase + dx;

                if (width >= UDL_CMD_WIDTH_MAX)
                        width_cur = UDL_CMD_WIDTH_MAX;
                else
                        width_cur = width;

                while (soff < eoff) {
                        udl_copy_line(sc, soff, doff, width_cur);
                        soff += sc->sc_width;
                        doff += sc->sc_width;
                }

                sx += width_cur;
                dx += width_cur;
                width -= width_cur;
        }
}

static void
udl_fill_rect(struct udl_softc *sc, uint16_t rgb16, int x, int y, int width,
    int height)
{
        int sbase, soff, ebase, eoff, width_cur;

        sbase = y * sc->sc_width;
        ebase = (y + height) * sc->sc_width;

        while (width > 0) {
                soff = sbase + x;
                eoff = ebase + x;

                if (width >= UDL_CMD_WIDTH_MAX)
                        width_cur = UDL_CMD_WIDTH_MAX;
                else
                        width_cur = width;

                while (soff < eoff) {
                        udl_fill_line(sc, rgb16, soff, width_cur);
                        soff += sc->sc_width;
                }

                x += width_cur;
                width -= width_cur;
        }
}

#ifdef notyet
static void
udl_draw_rect(struct udl_softc *sc, struct udl_ioctl_damage *d)
{
        int sbase, soff, ebase, eoff, x, y, width, width_cur, height;

        x = d->x1;
        y = d->y1;
        width = d->x2 - d->x1;
        height = d->y2 - d->y1;
        sbase = y * sc->sc_width;
        ebase = (y + height) * sc->sc_width;

        while (width > 0) {
                soff = sbase + x;
                eoff = ebase + x;

                if (width >= UDL_CMD_WIDTH_MAX)
                        width_cur = UDL_CMD_WIDTH_MAX;
                else
                        width_cur = width;

                while (soff < eoff) {
                        udl_draw_line(sc, (uint16_t *)sc->sc_fbmem + soff,
                            soff, width_cur);
                        soff += sc->sc_width;
                }

                x += width_cur;
                width -= width_cur;
        }

        udl_cmd_send_async(sc);
}

static void
udl_draw_rect_comp(struct udl_softc *sc, struct udl_ioctl_damage *d)
{
        int soff, eoff, x, y, width, height;

        x = d->x1;
        y = d->y1;
        width = d->x2 - d->x1;
        height = d->y2 - d->y1;
        soff = y * sc->sc_width + x;
        eoff = (y + height) * sc->sc_width + x;

        udl_reg_write_1(sc, UDL_REG_SYNC, 0xff);
        sc->sc_cmd_cblen = 4;

        while (soff < eoff) {
                udl_draw_line_comp(sc, (uint16_t *)sc->sc_fbmem + soff, soff,
                    width);
                soff += sc->sc_width;
        }

        udl_cmd_send_async(sc);
}
#endif

static inline void
udl_copy_line(struct udl_softc *sc, int soff, int doff, int width)
{

        if (__predict_false((UDL_CMD_BUFSIZE(sc) + UDL_CMD_COPY_SIZE + 2) >
            UDL_CMD_BUFFER_SIZE))
                udl_cmd_send_async(sc);

        udl_cmd_add_2(sc, (UDL_BULK_SOC << 8) | UDL_BULK_CMD_FB_COPY16);
        udl_cmd_add_4(sc, ((doff * 2) << 8) | (width & 0xff));

        udl_cmd_add_3(sc, soff * 2);
}

static inline void
udl_fill_line(struct udl_softc *sc, uint16_t rgb16, int off, int width)
{

        if (__predict_false((UDL_CMD_BUFSIZE(sc) + UDL_CMD_FILL_SIZE + 2) >
            UDL_CMD_BUFFER_SIZE))
                udl_cmd_send_async(sc);

        udl_cmd_add_2(sc, (UDL_BULK_SOC << 8) | UDL_BULK_CMD_FB_RLE16);
        udl_cmd_add_4(sc, ((off * 2) << 8) | (width & 0xff));

        udl_cmd_add_1(sc, width);
        udl_cmd_add_2(sc, rgb16);
}

static inline void
udl_draw_line(struct udl_softc *sc, uint16_t *buf, int off, int width)
{

        if (__predict_false(
            (UDL_CMD_BUFSIZE(sc) + UDL_CMD_DRAW_SIZE(width) + 2) >
            UDL_CMD_BUFFER_SIZE))
                udl_cmd_send_async(sc);

        udl_cmd_add_2(sc, (UDL_BULK_SOC << 8) | UDL_BULK_CMD_FB_WRITE16);
        udl_cmd_add_4(sc, ((off * 2) << 8) | (width & 0xff));

        udl_cmd_add_buf(sc, buf, width);
}

#ifdef notyet
static inline int
udl_cmd_add_buf_comp(struct udl_softc *sc, uint16_t *buf, int width)
{
        struct udl_huffman *h;
        uint16_t *startp, *endp;
        uint32_t bit_pattern;
        uint16_t prev;
        int16_t diff;
        uint8_t bit_count, bit_pos, bit_rem, curlen;

        startp = buf;
        if (width >= UDL_CMD_WIDTH_MAX)
                endp = buf + UDL_CMD_WIDTH_MAX;
        else
                endp = buf + width;

        prev = bit_pos = *sc->sc_cmd_buf = 0;
        bit_rem = 8;

        /*
         * Generate a sub-block with maximal 256 pixels compressed data.
         */
        while (buf < endp) {
                /* get difference between current and previous pixel */
                diff = *buf - prev;

                /* get the huffman difference bit sequence */
                h = (struct udl_huffman *)sc->sc_huffman_base + diff;
                bit_count = h->bit_count;
                bit_pattern = h->bit_pattern;

                curlen = (bit_pos + bit_count + 7) / 8;
                if (__predict_false((sc->sc_cmd_cblen + curlen + 1) >
                    UDL_CMD_COMP_BLOCK_SIZE))
                        break;

                /* generate one pixel compressed data */
                while (bit_count >= bit_rem) {
                        *sc->sc_cmd_buf++ |=
                            (bit_pattern & ((1 << bit_rem) - 1)) << bit_pos;
                        *sc->sc_cmd_buf = 0;
                        sc->sc_cmd_cblen++;
                        bit_count -= bit_rem;
                        bit_pattern >>= bit_rem;
                        bit_pos = 0;
                        bit_rem = 8;
                }

                if (bit_count > 0) {
                        *sc->sc_cmd_buf |=
                            (bit_pattern & ((1 << bit_count) - 1)) << bit_pos;
                        bit_pos += bit_count;
                        bit_rem -= bit_count;
                }

                prev = *buf++;
        }

        /*
         * If we have bits left in our last byte, round up to the next
         * byte, so we don't overwrite them.
         */
        if (bit_pos > 0) {
                sc->sc_cmd_buf++;
                sc->sc_cmd_cblen++;
        }

        /* return how many pixels we have compressed */
        return buf - startp;
}

static inline void
udl_draw_line_comp(struct udl_softc *sc, uint16_t *buf, int off, int width)
{
        uint8_t *widthp;
        int width_cur;

        while (width > 0) {
                if (__predict_false(
                    (sc->sc_cmd_cblen + UDL_CMD_COMP_MIN_SIZE + 1) >
                    UDL_CMD_COMP_BLOCK_SIZE)) {
                        if (UDL_CMD_BUFSIZE(sc) < UDL_CMD_COMP_THRESHOLD) {
                                while (sc->sc_cmd_cblen <
                                    UDL_CMD_COMP_BLOCK_SIZE) {
                                        *sc->sc_cmd_buf++ = 0;
                                        sc->sc_cmd_cblen++;
                                }
                        } else
                                udl_cmd_send_async(sc);
                        udl_reg_write_1(sc, UDL_REG_SYNC, 0xff);
                        sc->sc_cmd_cblen = 4;
                }

                udl_cmd_add_2(sc, (UDL_BULK_SOC << 8) |
                    (UDL_BULK_CMD_FB_WRITE16 | UDL_BULK_CMD_FB_COMP));
                udl_cmd_add_4(sc, (off * 2) << 8);

                widthp = sc->sc_cmd_buf - 1;

                sc->sc_cmd_cblen += UDL_CMD_HEADER_SIZE;

                width_cur = udl_cmd_add_buf_comp(sc, buf, width);

                *widthp = width_cur;
                buf += width_cur;
                off += width_cur;
                width -= width_cur;
        }
}
#endif

static int
udl_cmd_send(struct udl_softc *sc)
{
        struct udl_cmdq *cmdq;
        usbd_status error;
        uint32_t len;

        cmdq = sc->sc_cmd_cur;

        /* mark end of command stack */
        udl_cmd_add_2(sc, (UDL_BULK_SOC << 8) | UDL_BULK_CMD_EOC);

        len = UDL_CMD_BUFSIZE(sc);

        /* do xfer */
        error = usbd_bulk_transfer(cmdq->cq_xfer, sc->sc_tx_pipeh, 0,
            USBD_NO_TIMEOUT, cmdq->cq_buf, &len);

        UDL_CMD_BUFINIT(sc);

        if (error != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(sc->sc_dev, "%s: %s!\n", __func__,
                    usbd_errstr(error));
                return -1;
        }

        return 0;
}

static void
udl_cmd_send_async(struct udl_softc *sc)
{
        struct udl_cmdq *cmdq;
        usbd_status error;
        uint32_t len;

#if 1
        /*
         * XXX
         * All tty ops for wsemul are called with tty_lock spin mutex held,
         * so we can't call cv_wait(9) here to acquire a free buffer.
         * For now, all commands and data for wsemul ops are discarded
         * if there is no free command buffer, and then screen text might
         * be corrupted on large scroll ops etc.
         *
         * Probably we have to reorganize the giant tty_lock mutex, or
         * change wsdisplay APIs (especially wsdisplaystart()) to return
         * a number of actually handled characters as OpenBSD does, but
         * the latter one requires whole API changes around rasops(9) etc.
         */
        if (sc->sc_mode == WSDISPLAYIO_MODE_EMUL) {
                if (TAILQ_FIRST(&sc->sc_freecmd) == NULL) {
                        UDL_CMD_BUFINIT(sc);
                        return;
                }
        }
#endif

        cmdq = sc->sc_cmd_cur;

        /* mark end of command stack */
        udl_cmd_add_2(sc, (UDL_BULK_SOC << 8) | UDL_BULK_CMD_EOC);

        len = UDL_CMD_BUFSIZE(sc);

        /* do xfer */
        mutex_enter(&sc->sc_mtx);
        usbd_setup_xfer(cmdq->cq_xfer, cmdq, cmdq->cq_buf,
            len, 0, USBD_NO_TIMEOUT, udl_cmd_send_async_cb);
        error = usbd_transfer(cmdq->cq_xfer);
        if (error != USBD_NORMAL_COMPLETION && error != USBD_IN_PROGRESS) {
                aprint_error_dev(sc->sc_dev, "%s: %s!\n", __func__,
                    usbd_errstr(error));
                mutex_exit(&sc->sc_mtx);
                goto end;
        }

        TAILQ_INSERT_TAIL(&sc->sc_xfercmd, cmdq, cq_chain);
        cmdq = udl_cmdq_get(sc);
        mutex_exit(&sc->sc_mtx);
        while (cmdq == NULL) {
                int err;
                UDL_EVCNT_INCR(&sc->sc_ev_cmdq_wait);
                mutex_enter(&sc->sc_mtx);
                err = cv_timedwait(&sc->sc_cv, &sc->sc_mtx,
                    mstohz(100) /* XXX is this needed? */);
                if (err != 0) {
                        DPRINTF(("%s: %s: cv timeout (error = %d)\n",
                            device_xname(sc->sc_dev), __func__, err));
                        UDL_EVCNT_INCR(&sc->sc_ev_cmdq_timeout);
                }
                cmdq = udl_cmdq_get(sc);
                mutex_exit(&sc->sc_mtx);
        }
        sc->sc_cmd_cur = cmdq;
 end:
        UDL_CMD_BUFINIT(sc);
}

static void
udl_cmd_send_async_cb(struct usbd_xfer *xfer, void * priv,
    usbd_status status)
{
        struct udl_cmdq *cmdq = priv;
        struct udl_softc *sc = cmdq->cq_sc;

        if (status != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(sc->sc_dev, "%s: %s!\n", __func__,
                    usbd_errstr(status));

                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;
                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->sc_tx_pipeh);
        }

        mutex_enter(&sc->sc_mtx);
        TAILQ_REMOVE(&sc->sc_xfercmd, cmdq, cq_chain);
        udl_cmdq_put(sc, cmdq);

        /* signal xfer op that sleeps for a free xfer buffer */
        cv_signal(&sc->sc_cv);
        mutex_exit(&sc->sc_mtx);
}

static int
udl_ctrl_msg(struct udl_softc *sc, uint8_t rt, uint8_t r, uint16_t index,
    uint16_t value, uint8_t *buf, uint16_t len)
{
        usb_device_request_t req;
        usbd_status error;

        req.bmRequestType = rt;
        req.bRequest = r;
        USETW(req.wIndex, index);
        USETW(req.wValue, value);
        USETW(req.wLength, len);

        error = usbd_do_request(sc->sc_udev, &req, buf);
        if (error != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(sc->sc_dev, "%s: %s!\n", __func__,
                    usbd_errstr(error));
                return -1;
        }

        return 0;
}

static int
udl_init(struct udl_softc *sc)
{
        static uint8_t key[16] = {
            0x57, 0xcd, 0xdc, 0xa7, 0x1c, 0x88, 0x5e, 0x15,
            0x60, 0xfe, 0xc6, 0x97, 0x16, 0x3d, 0x47, 0xf2
        };
        uint8_t status[4], val;

        if (udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE, UDL_CTRL_CMD_READ_STATUS,
            0x0000, 0x0000, status, sizeof(status)) != 0)
                return -1;

        if (udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE, UDL_CTRL_CMD_READ_1,
            0xc484, 0x0000, &val, 1) != 0)
                return -1;

        val = 1;
        if (udl_ctrl_msg(sc, UT_WRITE_VENDOR_DEVICE, UDL_CTRL_CMD_WRITE_1,
            0xc41f, 0x0000, &val, 1) != 0)
                return -1;

        if (udl_ctrl_msg(sc, UT_WRITE_VENDOR_DEVICE, UDL_CTRL_CMD_SET_KEY,
            0x0000, 0x0000, key, sizeof(key)) != 0)
                return -1;

        val = 0;
        if (udl_ctrl_msg(sc, UT_WRITE_VENDOR_DEVICE, UDL_CTRL_CMD_WRITE_1,
            0xc40b, 0x0000, &val, 1) != 0)
                return -1;

        return 0;
}

static void
udl_read_edid(struct udl_softc *sc)
{
        uint8_t buf[64], edid[128];
        int offset;

        memset(&sc->sc_ei, 0, sizeof(struct edid_info));

        offset = 0;
        if (udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE, UDL_CTRL_CMD_READ_EDID,
            0x00a1, (offset << 8), buf, 64) != 0)
                return;
        if (buf[0] != 0)
                return;
        memcpy(&edid[offset], &buf[1], 63);
        offset += 63;

        if (udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE, UDL_CTRL_CMD_READ_EDID,
            0x00a1, (offset << 8), buf, 64) != 0)
                return;
        if (buf[0] != 0)
                return;
        memcpy(&edid[offset], &buf[1], 63);
        offset += 63;

        if (udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE, UDL_CTRL_CMD_READ_EDID,
            0x00a1, (offset << 8), buf, 3) != 0)
                return;
        if (buf[0] != 0)
                return;
        memcpy(&edid[offset], &buf[1], 2);

        if (edid_parse(edid, &sc->sc_ei) == 0) {
#ifdef UDL_DEBUG
                edid_print(&sc->sc_ei);
#endif
        }
}

static void
udl_set_address(struct udl_softc *sc, int start16, int stride16, int start8,
    int stride8)
{
        udl_reg_write_1(sc, UDL_REG_SYNC, 0x00);
        udl_reg_write_3(sc, UDL_REG_ADDR_START16, start16);
        udl_reg_write_3(sc, UDL_REG_ADDR_STRIDE16, stride16);
        udl_reg_write_3(sc, UDL_REG_ADDR_START8, start8);
        udl_reg_write_3(sc, UDL_REG_ADDR_STRIDE8, stride8);
        udl_reg_write_1(sc, UDL_REG_SYNC, 0xff);
}

static void
udl_blank(struct udl_softc *sc, int blank)
{

        if (blank != 0)
                udl_reg_write_1(sc, UDL_REG_BLANK, UDL_REG_BLANK_ON);
        else
                udl_reg_write_1(sc, UDL_REG_BLANK, UDL_REG_BLANK_OFF);
        udl_reg_write_1(sc, UDL_REG_SYNC, 0xff);
}

static uint16_t
udl_lfsr(uint16_t count)
{
        uint16_t val = 0xffff;

        while (count > 0) {
                val = (uint16_t)(val << 1) | ((uint16_t)(
                    (uint16_t)(val << 0) ^
                    (uint16_t)(val << 11) ^
                    (uint16_t)(val << 13) ^
                    (uint16_t)(val << 14)
                    ) >> 15);
                count--;
        }

        return val;
}

static int
udl_set_resolution(struct udl_softc *sc, const struct videomode *vmp)
{
        uint16_t val;
        int start16, stride16, start8, stride8;

        /* set video memory offsets */
        start16 = 0;
        stride16 = sc->sc_width * 2;
        start8 = stride16 * sc->sc_height;
        stride8 = sc->sc_width;
        udl_set_address(sc, start16, stride16, start8, stride8);

        /* write resolution values */
        udl_reg_write_1(sc, UDL_REG_SYNC, 0x00);
        udl_reg_write_1(sc, UDL_REG_COLORDEPTH, UDL_REG_COLORDEPTH_16);
        val = vmp->htotal - vmp->hsync_start;
        udl_reg_write_2(sc, UDL_REG_XDISPLAYSTART, udl_lfsr(val));
        val += vmp->hdisplay;
        udl_reg_write_2(sc, UDL_REG_XDISPLAYEND, udl_lfsr(val));
        val = vmp->vtotal - vmp->vsync_start;
        udl_reg_write_2(sc, UDL_REG_YDISPLAYSTART, udl_lfsr(val));
        val += vmp->vdisplay;
        udl_reg_write_2(sc, UDL_REG_YDISPLAYEND, udl_lfsr(val));
        val = vmp->htotal - 1;
        udl_reg_write_2(sc, UDL_REG_XENDCOUNT, udl_lfsr(val));
        val = vmp->hsync_end - vmp->hsync_start + 1;
        if (vmp->flags & VID_PHSYNC) {
                udl_reg_write_2(sc, UDL_REG_HSYNCSTART, udl_lfsr(1));
                udl_reg_write_2(sc, UDL_REG_HSYNCEND, udl_lfsr(val));
        } else {
                udl_reg_write_2(sc, UDL_REG_HSYNCSTART, udl_lfsr(val));
                udl_reg_write_2(sc, UDL_REG_HSYNCEND, udl_lfsr(1));
        }
        val = vmp->hdisplay;
        udl_reg_write_2(sc, UDL_REG_HPIXELS, val);
        val = vmp->vtotal;
        udl_reg_write_2(sc, UDL_REG_YENDCOUNT, udl_lfsr(val));
        val = vmp->vsync_end - vmp->vsync_start;
        if (vmp->flags & VID_PVSYNC) {
                udl_reg_write_2(sc, UDL_REG_VSYNCSTART, udl_lfsr(0));
                udl_reg_write_2(sc, UDL_REG_VSYNCEND, udl_lfsr(val));
        } else {
                udl_reg_write_2(sc, UDL_REG_VSYNCSTART, udl_lfsr(val));
                udl_reg_write_2(sc, UDL_REG_VSYNCEND, udl_lfsr(0));
        }
        val = vmp->vdisplay;
        udl_reg_write_2(sc, UDL_REG_VPIXELS, val);
        val = vmp->dot_clock / 5;
        udl_reg_write_2(sc, UDL_REG_PIXELCLOCK5KHZ, bswap16(val));
        udl_reg_write_1(sc, UDL_REG_SYNC, 0xff);

        if (udl_cmd_send(sc) != 0)
                return -1;

        /* clear screen */
        udl_fill_rect(sc, 0, 0, 0, sc->sc_width, sc->sc_height);

        if (udl_cmd_send(sc) != 0)
                return -1;

        /* show framebuffer content */
        udl_blank(sc, 0);

        if (udl_cmd_send(sc) != 0)
                return -1;

        sc->sc_blank = WSDISPLAYIO_VIDEO_ON;

        return 0;
}

static const struct videomode *
udl_videomode_lookup(const char *name)
{
        int i;

        for (i = 0; i < videomode_count; i++)
                if (strcmp(name, videomode_list[i].name) == 0)
                        return &videomode_list[i];

        return NULL;
}

static void
udl_update_thread(void *v)
{
        struct udl_softc *sc = v;
        int stride;
#ifdef notyet
        bool update = false;
        int linecount, x, y;
        uint16_t *fb, *fbcopy;
        uint8_t *curfb;
#else
        uint16_t *fb;
        int offs;
#endif

        mutex_enter(&sc->sc_thread_mtx);

        for (;;) {
                stride = uimin(sc->sc_width, UDL_CMD_WIDTH_MAX - 8);
                if (sc->sc_dying == true) {
                        mutex_exit(&sc->sc_thread_mtx);
                        kthread_exit(0);
                }

                if (sc->sc_thread_stop == true || sc->sc_fbmem == NULL)
                        goto thread_wait;

#ifdef notyet
                curfb = kmem_zalloc(UDL_FBMEM_SIZE(sc), KM_SLEEP);
                memcpy(curfb, sc->sc_fbmem, sc->sc_height * sc->sc_width * 2);
                fb = (uint16_t *)curfb;
                fbcopy = (uint16_t *)sc->sc_fbmem_prev;
                for (y = 0; y < sc->sc_height; y++) {
                        linecount = 0;
                        update = false;
                        for (x = 0; x < sc->sc_width; x++) {
                                if (linecount >= stride) {
                                        udl_draw_line(sc, &fb[y * sc->sc_width
                                            + x - linecount], y * sc->sc_width
                                            + x - linecount, linecount);
                                        linecount = 0;
                                        update = false;
                                }
                                if (fb[y * sc->sc_width + x] ^ fbcopy[y *
                                    sc->sc_width + x]) {
                                        update = true;
                                        linecount ++;
                                } else if (update == true) {
                                        udl_draw_line(sc, &fb[y * sc->sc_width
                                            + x - linecount], y * sc->sc_width
                                            + x - linecount, linecount);
                                        linecount = 0;
                                        update = false;
                                }
                        }
                        if (linecount) {
                                udl_draw_line(sc, &fb[y * sc->sc_width + x -
                                    linecount], y * sc->sc_width  + x -
                                    linecount, linecount);
                        }
                }
                memcpy(sc->sc_fbmem_prev, curfb, sc->sc_height * sc->sc_width
                    * 2);
                kmem_free(curfb, UDL_FBMEM_SIZE(sc));
#else
                fb = (uint16_t *)sc->sc_fbmem;
                for (offs = 0; offs < sc->sc_height * sc->sc_width; offs += stride)
                        udl_draw_line(sc, &fb[offs], offs, stride);

#endif

                kpause("udlslp", false, (40 * hz)/1000 + 1, &sc->sc_thread_mtx);
                continue;

thread_wait:
                cv_wait(&sc->sc_thread_cv, &sc->sc_thread_mtx);
        }
}

static inline void
udl_startstop(struct udl_softc *sc, bool stop)
{
        mutex_enter(&sc->sc_thread_mtx);
        sc->sc_thread_stop = stop;
        if (!stop)
                cv_broadcast(&sc->sc_thread_cv);
        mutex_exit(&sc->sc_thread_mtx);
}
























































































    1 
    1 
    1 
    1 
    1 
    1 
    1 

    1 


    1 













































































































































































    1 







    1 










































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
/*        $NetBSD: hci_unit.c,v 1.16 2021/08/07 16:19:18 thorpej Exp $        */

/*-
 * Copyright (c) 2005 Iain Hibbert.
 * Copyright (c) 2006 Itronix Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of Itronix Inc. may not be used to endorse
 *    or promote products derived from this software without specific
 *    prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: hci_unit.c,v 1.16 2021/08/07 16:19:18 thorpej Exp $");

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/systm.h>
#include <sys/intr.h>
#include <sys/socketvar.h>

#include <netbt/bluetooth.h>
#include <netbt/hci.h>

struct hci_unit_list hci_unit_list = SIMPLEQ_HEAD_INITIALIZER(hci_unit_list);

MALLOC_DEFINE(M_BLUETOOTH, "Bluetooth", "Bluetooth System Memory");

/*
 * HCI Input Queue max lengths.
 */
int hci_eventq_max = 20;
int hci_aclrxq_max = 50;
int hci_scorxq_max = 50;

/*
 * This is the default minimum command set supported by older
 * devices. Anything conforming to 1.2 spec or later will get
 * updated during init.
 */
static const uint8_t hci_cmds_v10[HCI_COMMANDS_SIZE] = {
        0xff, 0xff, 0xff, 0x01, 0xfe, 0xff, 0xff, 0xff,
        0xff, 0xff, 0xff, 0x7f, 0x32, 0x03, 0xb8, 0xfe,
        0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};

/*
 * bluetooth unit functions
 */
static void hci_intr (void *);

struct hci_unit *
hci_attach_pcb(const struct hci_if *hci_if, device_t dev, uint16_t flags)
{
        struct hci_unit *unit;

        KASSERT(dev != NULL);
        KASSERT(hci_if->enable != NULL);
        KASSERT(hci_if->disable != NULL);
        KASSERT(hci_if->output_cmd != NULL);
        KASSERT(hci_if->output_acl != NULL);
        KASSERT(hci_if->output_sco != NULL);
        KASSERT(hci_if->get_stats != NULL);

        unit = malloc(sizeof(struct hci_unit), M_BLUETOOTH, M_ZERO | M_WAITOK);
        KASSERT(unit != NULL);

        unit->hci_dev = dev;
        unit->hci_if = hci_if;
        unit->hci_flags = flags;

        mutex_init(&unit->hci_devlock, MUTEX_DRIVER, hci_if->ipl);
        cv_init(&unit->hci_init, "hci_init");

        MBUFQ_INIT(&unit->hci_eventq);
        MBUFQ_INIT(&unit->hci_aclrxq);
        MBUFQ_INIT(&unit->hci_scorxq);
        MBUFQ_INIT(&unit->hci_cmdwait);
        MBUFQ_INIT(&unit->hci_scodone);

        TAILQ_INIT(&unit->hci_links);
        LIST_INIT(&unit->hci_memos);

        mutex_enter(bt_lock);
        SIMPLEQ_INSERT_TAIL(&hci_unit_list, unit, hci_next);
        mutex_exit(bt_lock);

        return unit;
}

void
hci_detach_pcb(struct hci_unit *unit)
{

        mutex_enter(bt_lock);
        hci_disable(unit);

        SIMPLEQ_REMOVE(&hci_unit_list, unit, hci_unit, hci_next);
        mutex_exit(bt_lock);

        cv_destroy(&unit->hci_init);
        mutex_destroy(&unit->hci_devlock);
        free(unit, M_BLUETOOTH);
}

int
hci_enable(struct hci_unit *unit)
{
        int err;

        /*
         * Bluetooth spec says that a device can accept one
         * command on power up until they send a Command Status
         * or Command Complete event with more information, but
         * it seems that some devices cant and prefer to send a
         * No-op Command Status packet when they are ready.
         */
        unit->hci_num_cmd_pkts = (unit->hci_flags & BTF_POWER_UP_NOOP) ? 0 : 1;
        unit->hci_num_acl_pkts = 0;
        unit->hci_num_sco_pkts = 0;

        /*
         * only allow the basic packet types until
         * the features report is in
         */
        unit->hci_acl_mask = HCI_PKT_DM1 | HCI_PKT_DH1;
        unit->hci_packet_type = unit->hci_acl_mask;

        memcpy(unit->hci_cmds, hci_cmds_v10, HCI_COMMANDS_SIZE);

        unit->hci_rxint = softint_establish(SOFTINT_NET, &hci_intr, unit);
        if (unit->hci_rxint == NULL)
                return EIO;

        err = (*unit->hci_if->enable)(unit->hci_dev);
        if (err)
                goto bad1;

        unit->hci_flags |= BTF_RUNNING;

        /*
         * Reset the device, this will trigger initialisation
         * and wake us up.
         */
        unit->hci_flags |= BTF_INIT;

        err = hci_send_cmd(unit, HCI_CMD_RESET, NULL, 0);
        if (err)
                goto bad2;

        while (unit->hci_flags & BTF_INIT) {
                err = cv_timedwait_sig(&unit->hci_init, bt_lock, 5 * hz);
                if (err)
                        goto bad2;

                /* XXX
                 * "What If", while we were sleeping, the device
                 * was removed and detached? Ho Hum.
                 */
        }

        /*
         * Attach Bluetooth Device Hub
         */
        unit->hci_bthub = config_found(unit->hci_dev, &unit->hci_bdaddr, NULL,
            CFARGS(.iattr = "btbus"));

        return 0;

bad2:
        (*unit->hci_if->disable)(unit->hci_dev);
        unit->hci_flags &= ~BTF_RUNNING;
bad1:
        softint_disestablish(unit->hci_rxint);
        unit->hci_rxint = NULL;

        return err;
}

void
hci_disable(struct hci_unit *unit)
{
        struct hci_link *link, *next;
        struct hci_memo *memo;
        int acl;

        if (unit->hci_bthub) {
                device_t hub;

                hub = unit->hci_bthub;
                unit->hci_bthub = NULL;

                mutex_exit(bt_lock);
                config_detach(hub, DETACH_FORCE);
                mutex_enter(bt_lock);
        }

        if (unit->hci_rxint) {
                softint_disestablish(unit->hci_rxint);
                unit->hci_rxint = NULL;
        }

        (*unit->hci_if->disable)(unit->hci_dev);
        unit->hci_flags &= ~BTF_RUNNING;

        /*
         * close down any links, take care to close SCO first since
         * they may depend on ACL links.
         */
        for (acl = 0 ; acl < 2 ; acl++) {
                next = TAILQ_FIRST(&unit->hci_links);
                while ((link = next) != NULL) {
                        next = TAILQ_NEXT(link, hl_next);
                        if (acl || link->hl_type != HCI_LINK_ACL)
                                hci_link_free(link, ECONNABORTED);
                }
        }

        while ((memo = LIST_FIRST(&unit->hci_memos)) != NULL)
                hci_memo_free(memo);

        /* (no need to hold hci_devlock, the driver is disabled) */

        MBUFQ_DRAIN(&unit->hci_eventq);
        unit->hci_eventqlen = 0;

        MBUFQ_DRAIN(&unit->hci_aclrxq);
        unit->hci_aclrxqlen = 0;

        MBUFQ_DRAIN(&unit->hci_scorxq);
        unit->hci_scorxqlen = 0;

        MBUFQ_DRAIN(&unit->hci_cmdwait);
        MBUFQ_DRAIN(&unit->hci_scodone);
}

struct hci_unit *
hci_unit_lookup(const bdaddr_t *addr)
{
        struct hci_unit *unit;

        SIMPLEQ_FOREACH(unit, &hci_unit_list, hci_next) {
                if ((unit->hci_flags & BTF_UP) == 0)
                        continue;

                if (bdaddr_same(&unit->hci_bdaddr, addr))
                        break;
        }

        return unit;
}

/*
 * update num_cmd_pkts and push on pending commands queue
 */
void
hci_num_cmds(struct hci_unit *unit, uint8_t num)
{
        struct mbuf *m;

        unit->hci_num_cmd_pkts = num;

        while (unit->hci_num_cmd_pkts > 0 && MBUFQ_FIRST(&unit->hci_cmdwait)) {
                MBUFQ_DEQUEUE(&unit->hci_cmdwait, m);
                hci_output_cmd(unit, m);
        }
}

/*
 * construct and queue a HCI command packet
 */
int
hci_send_cmd(struct hci_unit *unit, uint16_t opcode, void *buf, uint8_t len)
{
        struct mbuf *m;
        hci_cmd_hdr_t *p;

        KASSERT(unit != NULL);

        m = m_gethdr(M_DONTWAIT, MT_DATA);
        if (m == NULL)
                return ENOMEM;

        p = mtod(m, hci_cmd_hdr_t *);
        p->type = HCI_CMD_PKT;
        p->opcode = htole16(opcode);
        p->length = len;
        m->m_pkthdr.len = m->m_len = sizeof(hci_cmd_hdr_t);

        if (len) {
                KASSERT(buf != NULL);

                m_copyback(m, sizeof(hci_cmd_hdr_t), len, buf);
                if (m->m_pkthdr.len != (sizeof(hci_cmd_hdr_t) + len)) {
                        m_freem(m);
                        return ENOMEM;
                }
        }

        DPRINTFN(2, "(%s) opcode (%3.3x|%4.4x)\n", device_xname(unit->hci_dev),
                HCI_OGF(opcode), HCI_OCF(opcode));

        /* and send it on */
        if (unit->hci_num_cmd_pkts == 0)
                MBUFQ_ENQUEUE(&unit->hci_cmdwait, m);
        else
                hci_output_cmd(unit, m);

        return 0;
}

/*
 * Incoming packet processing. Since the code is single threaded
 * in any case (IPL_SOFTNET), we handle it all in one interrupt function
 * picking our way through more important packets first so that hopefully
 * we will never get clogged up with bulk data.
 */
static void
hci_intr(void *arg)
{
        struct hci_unit *unit = arg;
        struct mbuf *m;

        mutex_enter(bt_lock);
another:
        mutex_enter(&unit->hci_devlock);

        if (unit->hci_eventqlen > 0) {
                MBUFQ_DEQUEUE(&unit->hci_eventq, m);
                unit->hci_eventqlen--;
                mutex_exit(&unit->hci_devlock);

                KASSERT(m != NULL);

                DPRINTFN(10, "(%s) recv event, len = %d\n",
                                device_xname(unit->hci_dev), m->m_pkthdr.len);

                m->m_flags |= M_LINK0;        /* mark incoming packet */
                hci_mtap(m, unit);
                hci_event(m, unit);

                goto another;
        }

        if (unit->hci_scorxqlen > 0) {
                MBUFQ_DEQUEUE(&unit->hci_scorxq, m);
                unit->hci_scorxqlen--;
                mutex_exit(&unit->hci_devlock);

                KASSERT(m != NULL);

                DPRINTFN(10, "(%s) recv SCO, len = %d\n",
                                device_xname(unit->hci_dev), m->m_pkthdr.len);

                m->m_flags |= M_LINK0;        /* mark incoming packet */
                hci_mtap(m, unit);
                hci_sco_recv(m, unit);

                goto another;
        }

        if (unit->hci_aclrxqlen > 0) {
                MBUFQ_DEQUEUE(&unit->hci_aclrxq, m);
                unit->hci_aclrxqlen--;
                mutex_exit(&unit->hci_devlock);

                KASSERT(m != NULL);

                DPRINTFN(10, "(%s) recv ACL, len = %d\n",
                                device_xname(unit->hci_dev), m->m_pkthdr.len);

                m->m_flags |= M_LINK0;        /* mark incoming packet */
                hci_mtap(m, unit);
                hci_acl_recv(m, unit);

                goto another;
        }

        MBUFQ_DEQUEUE(&unit->hci_scodone, m);
        if (m != NULL) {
                struct hci_link *link;

                mutex_exit(&unit->hci_devlock);

                DPRINTFN(11, "(%s) complete SCO\n",
                                device_xname(unit->hci_dev));

                TAILQ_FOREACH(link, &unit->hci_links, hl_next) {
                        if (link == M_GETCTX(m, struct hci_link *)) {
                                hci_sco_complete(link, 1);
                                break;
                        }
                }

                unit->hci_num_sco_pkts++;
                m_freem(m);

                goto another;
        }

        mutex_exit(&unit->hci_devlock);
        mutex_exit(bt_lock);

        DPRINTFN(10, "done\n");
}

/**********************************************************************
 *
 * IO routines
 *
 * input & complete routines will be called from device drivers,
 * possibly in interrupt context. We return success or failure to
 * enable proper accounting but we own the mbuf.
 */

bool
hci_input_event(struct hci_unit *unit, struct mbuf *m)
{
        bool rv;

        mutex_enter(&unit->hci_devlock);

        if (unit->hci_eventqlen > hci_eventq_max || unit->hci_rxint == NULL) {
                DPRINTF("(%s) dropped event packet.\n", device_xname(unit->hci_dev));
                m_freem(m);
                rv = false;
        } else {
                unit->hci_eventqlen++;
                MBUFQ_ENQUEUE(&unit->hci_eventq, m);
                softint_schedule(unit->hci_rxint);
                rv = true;
        }

        mutex_exit(&unit->hci_devlock);
        return rv;
}

bool
hci_input_acl(struct hci_unit *unit, struct mbuf *m)
{
        bool rv;

        mutex_enter(&unit->hci_devlock);

        if (unit->hci_aclrxqlen > hci_aclrxq_max || unit->hci_rxint == NULL) {
                DPRINTF("(%s) dropped ACL packet.\n", device_xname(unit->hci_dev));
                m_freem(m);
                rv = false;
        } else {
                unit->hci_aclrxqlen++;
                MBUFQ_ENQUEUE(&unit->hci_aclrxq, m);
                softint_schedule(unit->hci_rxint);
                rv = true;
        }

        mutex_exit(&unit->hci_devlock);
        return rv;
}

bool
hci_input_sco(struct hci_unit *unit, struct mbuf *m)
{
        bool rv;

        mutex_enter(&unit->hci_devlock);

        if (unit->hci_scorxqlen > hci_scorxq_max || unit->hci_rxint == NULL) {
                DPRINTF("(%s) dropped SCO packet.\n", device_xname(unit->hci_dev));
                m_freem(m);
                rv = false;
        } else {
                unit->hci_scorxqlen++;
                MBUFQ_ENQUEUE(&unit->hci_scorxq, m);
                softint_schedule(unit->hci_rxint);
                rv = true;
        }

        mutex_exit(&unit->hci_devlock);
        return rv;
}

void
hci_output_cmd(struct hci_unit *unit, struct mbuf *m)
{
        void *arg;

        hci_mtap(m, unit);

        DPRINTFN(10, "(%s) num_cmd_pkts=%d\n", device_xname(unit->hci_dev),
                                               unit->hci_num_cmd_pkts);

        unit->hci_num_cmd_pkts--;

        /*
         * If context is set, this was from a HCI raw socket
         * and a record needs to be dropped from the sockbuf.
         */
        arg = M_GETCTX(m, void *);
        if (arg != NULL)
                hci_drop(arg);

        (*unit->hci_if->output_cmd)(unit->hci_dev, m);
}

void
hci_output_acl(struct hci_unit *unit, struct mbuf *m)
{

        hci_mtap(m, unit);

        DPRINTFN(10, "(%s) num_acl_pkts=%d\n", device_xname(unit->hci_dev),
                                               unit->hci_num_acl_pkts);

        unit->hci_num_acl_pkts--;
        (*unit->hci_if->output_acl)(unit->hci_dev, m);
}

void
hci_output_sco(struct hci_unit *unit, struct mbuf *m)
{

        hci_mtap(m, unit);

        DPRINTFN(10, "(%s) num_sco_pkts=%d\n", device_xname(unit->hci_dev),
                                               unit->hci_num_sco_pkts);

        unit->hci_num_sco_pkts--;
        (*unit->hci_if->output_sco)(unit->hci_dev, m);
}

bool
hci_complete_sco(struct hci_unit *unit, struct mbuf *m)
{

        if (unit->hci_rxint == NULL) {
                DPRINTFN(10, "(%s) complete SCO!\n", device_xname(unit->hci_dev));
                m_freem(m);
                return false;
        }

        mutex_enter(&unit->hci_devlock);

        MBUFQ_ENQUEUE(&unit->hci_scodone, m);
        softint_schedule(unit->hci_rxint);

        mutex_exit(&unit->hci_devlock);
        return true;
}


























































































    1 










    1 




















































































































































































    1 



















































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
/*        $NetBSD: at_control.c,v 1.42 2021/09/21 15:01:59 christos Exp $         */

/*
 * Copyright (c) 1990,1994 Regents of The University of Michigan.
 * All Rights Reserved.
 *
 * Permission to use, copy, modify, and distribute this software and
 * its documentation for any purpose and without fee is hereby granted,
 * provided that the above copyright notice appears in all copies and
 * that both that copyright notice and this permission notice appear
 * in supporting documentation, and that the name of The University
 * of Michigan not be used in advertising or publicity pertaining to
 * distribution of the software without specific, written prior
 * permission. This software is supplied as is without expressed or
 * implied warranties of any kind.
 *
 * This product includes software developed by the University of
 * California, Berkeley and its contributors.
 *
 *        Research Systems Unix Group
 *        The University of Michigan
 *        c/o Wesley Craig
 *        535 W. William Street
 *        Ann Arbor, Michigan
 *        +1-313-764-2278
 *        netatalk@umich.edu
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: at_control.c,v 1.42 2021/09/21 15:01:59 christos Exp $");

#include "opt_atalk.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/ioctl.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/kauth.h>
#include <net/if.h>
#include <net/route.h>
#include <net/if_ether.h>
#include <netinet/in.h>
#undef s_net

#include <netatalk/at.h>
#include <netatalk/at_var.h>
#include <netatalk/aarp.h>
#include <netatalk/phase2.h>
#include <netatalk/at_extern.h>

static int aa_dorangeroute(struct ifaddr * ifa,
    u_int first, u_int last, int cmd);
static int aa_addsingleroute(struct ifaddr * ifa,
    struct at_addr * addr, struct at_addr * mask);
static int aa_delsingleroute(struct ifaddr * ifa,
    struct at_addr * addr, struct at_addr * mask);
static int aa_dosingleroute(struct ifaddr * ifa, struct at_addr * addr,
    struct at_addr * mask, int cmd, int flags);
static int at_scrub(struct ifnet * ifp, struct at_ifaddr * aa);
static int at_ifinit(struct ifnet *, struct at_ifaddr *,
    const struct sockaddr_at *);
#if 0
static void aa_clean(void);
#endif

#define sateqaddr(a,b)        ((a)->sat_len == (b)->sat_len && \
                         (a)->sat_family == (b)->sat_family && \
                         (a)->sat_addr.s_net == (b)->sat_addr.s_net && \
                         (a)->sat_addr.s_node == (b)->sat_addr.s_node )

int
at_control(u_long cmd, void *data, struct ifnet *ifp)
{
        struct ifreq   *ifr = (struct ifreq *) data;
        const struct sockaddr_at *csat;
        struct netrange *nr;
        const struct netrange *cnr;
        struct at_aliasreq *ifra = (struct at_aliasreq *) data;
        struct at_ifaddr *aa0;
        struct at_ifaddr *aa = 0;

        /*
         * If we have an ifp, then find the matching at_ifaddr if it exists
         */
        if (ifp)
                for (aa = at_ifaddr.tqh_first; aa; aa = aa->aa_list.tqe_next)
                        if (aa->aa_ifp == ifp)
                                break;

        /*
         * In this first switch table we are basically getting ready for
         * the second one, by getting the atalk-specific things set up
         * so that they start to look more similar to other protocols etc.
         */

        switch (cmd) {
        case SIOCAIFADDR:
        case SIOCDIFADDR:
                /*
                 * If we have an appletalk sockaddr, scan forward of where
                 * we are now on the at_ifaddr list to find one with a matching
                 * address on this interface.
                 * This may leave aa pointing to the first address on the
                 * NEXT interface!
                 */
                if (ifra->ifra_addr.sat_family == AF_APPLETALK) {
                        for (; aa; aa = aa->aa_list.tqe_next)
                                if (aa->aa_ifp == ifp &&
                                    sateqaddr(&aa->aa_addr, &ifra->ifra_addr))
                                        break;
                }
                /*
                 * If we a retrying to delete an address but didn't find such,
                 * then return with an error
                 */
                if (cmd == SIOCDIFADDR && aa == 0)
                        return (EADDRNOTAVAIL);
                /* FALLTHROUGH */

        case SIOCSIFADDR:
                /*
                 * If we are not superuser, then we don't get to do these
                 * ops.
                 */
                if (kauth_authorize_network(kauth_cred_get(),
                    KAUTH_NETWORK_INTERFACE,
                    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
                    NULL) != 0)
                        return (EPERM);

                csat = satocsat(ifreq_getaddr(cmd, ifr));
                cnr = (const struct netrange *)csat->sat_zero;
                if (cnr->nr_phase == 1) {
                        /*
                         * Look for a phase 1 address on this interface.
                         * This may leave aa pointing to the first address on
                         * the NEXT interface!
                         */
                        for (; aa; aa = aa->aa_list.tqe_next) {
                                if (aa->aa_ifp == ifp &&
                                    (aa->aa_flags & AFA_PHASE2) == 0)
                                        break;
                        }
                } else {        /* default to phase 2 */
                        /*
                         * Look for a phase 2 address on this interface.
                         * This may leave aa pointing to the first address on
                         * the NEXT interface!
                         */
                        for (; aa; aa = aa->aa_list.tqe_next) {
                                if (aa->aa_ifp == ifp &&
                                    (aa->aa_flags & AFA_PHASE2))
                                        break;
                        }
                }

                if (ifp == 0)
                        panic("at_control");

                /*
                 * If we failed to find an existing at_ifaddr entry, then we
                 * allocate a fresh one.
                 * XXX change this to use malloc
                 */
                if (aa == (struct at_ifaddr *) 0) {
                        aa = (struct at_ifaddr *)
                            malloc(sizeof(struct at_ifaddr), M_IFADDR,
                            M_WAITOK|M_ZERO);

                        if (aa == NULL)
                                return (ENOBUFS);

                        callout_init(&aa->aa_probe_ch, 0);

                        if ((aa0 = at_ifaddr.tqh_first) != NULL) {
                                /*
                                 * Don't let the loopback be first, since the
                                 * first address is the machine's default
                                 * address for binding.
                                 * If it is, stick ourself in front, otherwise
                                 * go to the back of the list.
                                 */
                                if (aa0->aa_ifp->if_flags & IFF_LOOPBACK) {
                                        TAILQ_INSERT_HEAD(&at_ifaddr, aa,
                                            aa_list);
                                } else {
                                        TAILQ_INSERT_TAIL(&at_ifaddr, aa,
                                            aa_list);
                                }
                        } else {
                                TAILQ_INSERT_TAIL(&at_ifaddr, aa, aa_list);
                        }
                        ifaref(&aa->aa_ifa);
                        ifa_psref_init(&aa->aa_ifa);

                        /*
                         * Find the end of the interface's addresses
                         * and link our new one on the end
                         */
                        ifa_insert(ifp, &aa->aa_ifa);

                        /*
                         * As the at_ifaddr contains the actual sockaddrs,
                         * and the ifaddr itself, link them al together
                         * correctly.
                         */
                        aa->aa_ifa.ifa_addr =
                            (struct sockaddr *) &aa->aa_addr;
                        aa->aa_ifa.ifa_dstaddr =
                            (struct sockaddr *) &aa->aa_addr;
                        aa->aa_ifa.ifa_netmask =
                            (struct sockaddr *) &aa->aa_netmask;

                        /*
                         * Set/clear the phase 2 bit.
                         */
                        if (cnr->nr_phase == 1)
                                aa->aa_flags &= ~AFA_PHASE2;
                        else
                                aa->aa_flags |= AFA_PHASE2;

                        /*
                         * and link it all together
                         */
                        aa->aa_ifp = ifp;
                } else {
                        /*
                         * If we DID find one then we clobber any routes
                         * dependent on it..
                         */
                        at_scrub(ifp, aa);
                }
                break;

        case SIOCGIFADDR:
                csat = satocsat(ifreq_getaddr(cmd, ifr));
                cnr = (const struct netrange *)csat->sat_zero;
                if (cnr->nr_phase == 1) {
                        /*
                         * If the request is specifying phase 1, then
                         * only look at a phase one address
                         */
                        for (; aa; aa = aa->aa_list.tqe_next) {
                                if (aa->aa_ifp == ifp &&
                                    (aa->aa_flags & AFA_PHASE2) == 0)
                                        break;
                        }
                } else if (cnr->nr_phase == 2) {
                        /*
                         * If the request is specifying phase 2, then
                         * only look at a phase two address
                         */
                        for (; aa; aa = aa->aa_list.tqe_next) {
                                if (aa->aa_ifp == ifp &&
                                    (aa->aa_flags & AFA_PHASE2))
                                        break;
                        }
                } else {
                        /*
                         * default to everything
                         */
                        for (; aa; aa = aa->aa_list.tqe_next) {
                                if (aa->aa_ifp == ifp)
                                        break;
                        }
                }

                if (aa == (struct at_ifaddr *) 0)
                        return (EADDRNOTAVAIL);
                break;
        }

        /*
         * By the time this switch is run we should be able to assume that
         * the "aa" pointer is valid when needed.
         */
        switch (cmd) {
        case SIOCGIFADDR: {
                union {
                        struct sockaddr sa;
                        struct sockaddr_at sat;
                } u;

                /*
                 * copy the contents of the sockaddr blindly.
                 */
                sockaddr_copy(&u.sa, sizeof(u),
                    (const struct sockaddr *)&aa->aa_addr);
                /*
                 * and do some cleanups
                 */
                nr = (struct netrange *)&u.sat.sat_zero;
                nr->nr_phase = (aa->aa_flags & AFA_PHASE2) ? 2 : 1;
                nr->nr_firstnet = aa->aa_firstnet;
                nr->nr_lastnet = aa->aa_lastnet;
                ifreq_setaddr(cmd, ifr, &u.sa);
                break;
        }

        case SIOCSIFADDR:
                return at_ifinit(ifp, aa,
                    (const struct sockaddr_at *)ifreq_getaddr(cmd, ifr));

        case SIOCAIFADDR:
                if (sateqaddr(&ifra->ifra_addr, &aa->aa_addr))
                        return 0;
                return at_ifinit(ifp, aa,
                    (const struct sockaddr_at *)ifreq_getaddr(cmd, ifr));

        case SIOCDIFADDR:
                at_purgeaddr(&aa->aa_ifa);
                break;

        default:
                return ENOTTY;
        }
        return (0);
}

void
at_purgeaddr(struct ifaddr *ifa)
{
        struct ifnet *ifp = ifa->ifa_ifp;
        struct at_ifaddr *aa = (void *) ifa;

        /*
         * scrub all routes.. didn't we just DO this? XXX yes, del it
         * XXX above XXX not necessarily true anymore
         */
        at_scrub(ifp, aa);

        /*
         * remove the ifaddr from the interface
         */
        ifa_remove(ifp, &aa->aa_ifa);
        TAILQ_REMOVE(&at_ifaddr, aa, aa_list);
        ifafree(&aa->aa_ifa);
}

void
at_purgeif(struct ifnet *ifp)
{
        if_purgeaddrs(ifp, AF_APPLETALK, at_purgeaddr);
}

/*
 * Given an interface and an at_ifaddr (supposedly on that interface) remove
 * any routes that depend on this. Why ifp is needed I'm not sure, as
 * aa->at_ifaddr.ifa_ifp should be the same.
 */
static int
at_scrub(struct ifnet *ifp, struct at_ifaddr *aa)
{
        int error = 0;

        if (aa->aa_flags & AFA_ROUTE) {
                if (ifp->if_flags & IFF_LOOPBACK)
                        error = aa_delsingleroute(&aa->aa_ifa,
                            &aa->aa_addr.sat_addr, &aa->aa_netmask.sat_addr);
                else if (ifp->if_flags & IFF_POINTOPOINT)
                        error = rtinit(&aa->aa_ifa, RTM_DELETE, RTF_HOST);
                else if (ifp->if_flags & IFF_BROADCAST)
                        error = aa_dorangeroute(&aa->aa_ifa,
                            ntohs(aa->aa_firstnet), ntohs(aa->aa_lastnet),
                            RTM_DELETE);

                aa->aa_ifa.ifa_flags &= ~IFA_ROUTE;
                aa->aa_flags &= ~AFA_ROUTE;
        }
        return error;
}

/*
 * given an at_ifaddr,a sockaddr_at and an ifp,
 * bang them all together at high speed and see what happens
 */
static int
at_ifinit(struct ifnet *ifp, struct at_ifaddr *aa, const struct sockaddr_at *sat)
{
        struct netrange nr, onr;
        struct sockaddr_at oldaddr;
        int             s = splnet(), error = 0, i, j;
        int             netinc, nodeinc, nnets;
        u_short         net;

        /*
         * save the old addresses in the at_ifaddr just in case we need them.
         */
        oldaddr = aa->aa_addr;
        onr.nr_firstnet = aa->aa_firstnet;
        onr.nr_lastnet = aa->aa_lastnet;

        /*
         * take the address supplied as an argument, and add it to the
         * at_ifnet (also given). Remember ing to update
         * those parts of the at_ifaddr that need special processing
         */
        memset(AA_SAT(aa), 0, sizeof(struct sockaddr_at));
        memcpy(&nr, sat->sat_zero, sizeof(struct netrange));
        memcpy(AA_SAT(aa)->sat_zero, sat->sat_zero, sizeof(struct netrange));
        nnets = ntohs(nr.nr_lastnet) - ntohs(nr.nr_firstnet) + 1;
        aa->aa_firstnet = nr.nr_firstnet;
        aa->aa_lastnet = nr.nr_lastnet;

#ifdef NETATALKDEBUG
        printf("at_ifinit: %s: %u.%u range %u-%u phase %d\n",
            ifp->if_xname,
            ntohs(sat->sat_addr.s_net), sat->sat_addr.s_node,
            ntohs(aa->aa_firstnet), ntohs(aa->aa_lastnet),
            (aa->aa_flags & AFA_PHASE2) ? 2 : 1);
#endif

        /*
         * We could eliminate the need for a second phase 1 probe (post
         * autoconf) if we check whether we're resetting the node. Note
         * that phase 1 probes use only nodes, not net.node pairs.  Under
         * phase 2, both the net and node must be the same.
         */
        AA_SAT(aa)->sat_len = sat->sat_len;
        AA_SAT(aa)->sat_family = AF_APPLETALK;
        if (ifp->if_flags & IFF_LOOPBACK) {
                AA_SAT(aa)->sat_addr.s_net = sat->sat_addr.s_net;
                AA_SAT(aa)->sat_addr.s_node = sat->sat_addr.s_node;
#if 0
        } else if (fp->if_flags & IFF_POINTOPOINT) {
                /* unimplemented */
                /*
                 * we'd have to copy the dstaddr field over from the sat
                 * but it's not clear that it would contain the right info..
                 */
#endif
        } else {
                /*
                 * We are a normal (probably ethernet) interface.
                 * apply the new address to the interface structures etc.
                 * We will probe this address on the net first, before
                 * applying it to ensure that it is free.. If it is not, then
                 * we will try a number of other randomly generated addresses
                 * in this net and then increment the net.  etc.etc. until
                 * we find an unused address.
                 */
                aa->aa_flags |= AFA_PROBING;        /* if not loopback we Must
                                                 * probe? */
                if (aa->aa_flags & AFA_PHASE2) {
                        if (sat->sat_addr.s_net == ATADDR_ANYNET) {
                                /*
                                 * If we are phase 2, and the net was not
                                 * specified * then we select a random net
                                 * within the supplied netrange.
                                 * XXX use /dev/random?
                                 */
                                if (nnets != 1) {
                                        net = ntohs(nr.nr_firstnet) +
                                            time_second % (nnets - 1);
                                } else {
                                        net = ntohs(nr.nr_firstnet);
                                }
                        } else {
                                /*
                                 * if a net was supplied, then check that it
                                 * is within the netrange. If it is not then
                                 * replace the old values and return an error
                                 */
                                if (ntohs(sat->sat_addr.s_net) <
                                    ntohs(nr.nr_firstnet) ||
                                    ntohs(sat->sat_addr.s_net) >
                                    ntohs(nr.nr_lastnet)) {
                                        aa->aa_addr = oldaddr;
                                        aa->aa_firstnet = onr.nr_firstnet;
                                        aa->aa_lastnet = onr.nr_lastnet;
                                        splx(s);
                                        return (EINVAL);
                                }
                                /*
                                 * otherwise just use the new net number..
                                 */
                                net = ntohs(sat->sat_addr.s_net);
                        }
                } else {
                        /*
                         * we must be phase one, so just use whatever we were
                         * given. I guess it really isn't going to be used...
                         * RIGHT?
                         */
                        net = ntohs(sat->sat_addr.s_net);
                }

                /*
                 * set the node part of the address into the ifaddr. If it's
                 * not specified, be random about it... XXX use /dev/random?
                 */
                if (sat->sat_addr.s_node == ATADDR_ANYNODE) {
                        AA_SAT(aa)->sat_addr.s_node = time_second;
                } else {
                        AA_SAT(aa)->sat_addr.s_node = sat->sat_addr.s_node;
                }

                /*
                 * step through the nets in the range starting at the
                 * (possibly random) start point.
                 */
                for (i = nnets, netinc = 1; i > 0; net = ntohs(nr.nr_firstnet) +
                     ((net - ntohs(nr.nr_firstnet) + netinc) % nnets), i--) {
                        AA_SAT(aa)->sat_addr.s_net = htons(net);

                        /*
                         * using a rather strange stepping method,
                         * stagger through the possible node addresses
                         * Once again, starting at the (possibly random)
                         * initial node address.
                         */
                        for (j = 0, nodeinc = time_second | 1; j < 256;
                             j++, AA_SAT(aa)->sat_addr.s_node += nodeinc) {
                                if (AA_SAT(aa)->sat_addr.s_node > 253 ||
                                    AA_SAT(aa)->sat_addr.s_node < 1) {
                                        continue;
                                }
                                aa->aa_probcnt = 10;

                                /*
                                 * start off the probes as an asynchronous
                                 * activity. though why wait 200mSec?
                                 */
                                callout_reset(&aa->aa_probe_ch, hz / 5,
                                    aarpprobe, ifp);
                                if (tsleep(aa, PPAUSE | PCATCH, "at_ifinit",
                                    0)) {
                                        /*
                                         * theoretically we shouldn't time out
                                         * here so if we returned with an error.
                                         */
                                        printf("at_ifinit: timeout?!\n");
                                        aa->aa_addr = oldaddr;
                                        aa->aa_firstnet = onr.nr_firstnet;
                                        aa->aa_lastnet = onr.nr_lastnet;
                                        splx(s);
                                        return (EINTR);
                                }
                                /*
                                 * The async activity should have woken us
                                 * up. We need to see if it was successful in
                                 * finding a free spot, or if we need to
                                 * iterate to the next address to try.
                                 */
                                if ((aa->aa_flags & AFA_PROBING) == 0)
                                        break;
                        }

                        /*
                         * of course we need to break out through two loops...
                         */
                        if ((aa->aa_flags & AFA_PROBING) == 0)
                                break;

                        /* reset node for next network */
                        AA_SAT(aa)->sat_addr.s_node = time_second;
                }

                /*
                 * if we are still trying to probe, then we have finished all
                 * the possible addresses, so we need to give up
                 */
                if (aa->aa_flags & AFA_PROBING) {
                        aa->aa_addr = oldaddr;
                        aa->aa_firstnet = onr.nr_firstnet;
                        aa->aa_lastnet = onr.nr_lastnet;
                        splx(s);
                        return (EADDRINUSE);
                }
        }

        /*
         * Now that we have selected an address, we need to tell the
         * interface about it, just in case it needs to adjust something.
         */
        if ((error = if_addr_init(ifp, &aa->aa_ifa, true)) != 0) {
                /*
                 * of course this could mean that it objects violently
                 * so if it does, we back out again..
                 */
                aa->aa_addr = oldaddr;
                aa->aa_firstnet = onr.nr_firstnet;
                aa->aa_lastnet = onr.nr_lastnet;
                splx(s);
                return (error);
        }
        /*
         * set up the netmask part of the at_ifaddr and point the appropriate
         * pointer in the ifaddr to it. probably pointless, but what the
         * heck.. XXX
         */
        memset(&aa->aa_netmask, 0, sizeof(aa->aa_netmask));
        aa->aa_netmask.sat_len = sizeof(struct sockaddr_at);
        aa->aa_netmask.sat_family = AF_APPLETALK;
        aa->aa_netmask.sat_addr.s_net = 0xffff;
        aa->aa_netmask.sat_addr.s_node = 0;
#if 0
        aa->aa_ifa.ifa_netmask = (struct sockaddr *) &(aa->aa_netmask);/* XXX */
#endif

        /*
         * Initialize broadcast (or remote p2p) address
         */
        memset(&aa->aa_broadaddr, 0, sizeof(aa->aa_broadaddr));
        aa->aa_broadaddr.sat_len = sizeof(struct sockaddr_at);
        aa->aa_broadaddr.sat_family = AF_APPLETALK;

        aa->aa_ifa.ifa_metric = ifp->if_metric;
        if (ifp->if_flags & IFF_BROADCAST) {
                aa->aa_broadaddr.sat_addr.s_net = htons(ATADDR_ANYNET);
                aa->aa_broadaddr.sat_addr.s_node = ATADDR_BCAST;
                aa->aa_ifa.ifa_broadaddr =
                    (struct sockaddr *) &aa->aa_broadaddr;
                /* add the range of routes needed */
                error = aa_dorangeroute(&aa->aa_ifa,
                    ntohs(aa->aa_firstnet), ntohs(aa->aa_lastnet), RTM_ADD);
        } else if (ifp->if_flags & IFF_POINTOPOINT) {
                struct at_addr  rtaddr, rtmask;

                memset(&rtaddr, 0, sizeof(rtaddr));
                memset(&rtmask, 0, sizeof(rtmask));
                /* fill in the far end if we know it here XXX */
                aa->aa_ifa.ifa_dstaddr = (struct sockaddr *) & aa->aa_dstaddr;
                error = aa_addsingleroute(&aa->aa_ifa, &rtaddr, &rtmask);
        } else if (ifp->if_flags & IFF_LOOPBACK) {
                struct at_addr  rtaddr, rtmask;

                memset(&rtaddr, 0, sizeof(rtaddr));
                memset(&rtmask, 0, sizeof(rtmask));
                rtaddr.s_net = AA_SAT(aa)->sat_addr.s_net;
                rtaddr.s_node = AA_SAT(aa)->sat_addr.s_node;
                rtmask.s_net = 0xffff;
                rtmask.s_node = 0x0;
                error = aa_addsingleroute(&aa->aa_ifa, &rtaddr, &rtmask);
        }
        /*
         * of course if we can't add these routes we back out, but it's getting
         * risky by now XXX
         */
        if (error) {
                at_scrub(ifp, aa);
                aa->aa_addr = oldaddr;
                aa->aa_firstnet = onr.nr_firstnet;
                aa->aa_lastnet = onr.nr_lastnet;
                splx(s);
                return (error);
        }
        /*
         * note that the address has a route associated with it....
         */
        aa->aa_ifa.ifa_flags |= IFA_ROUTE;
        aa->aa_flags |= AFA_ROUTE;
        splx(s);
        return (0);
}

/*
 * check whether a given address is a broadcast address for us..
 */
int
at_broadcast(const struct sockaddr_at *sat)
{
        struct at_ifaddr *aa;

        /*
         * If the node is not right, it can't be a broadcast
         */
        if (sat->sat_addr.s_node != ATADDR_BCAST)
                return 0;

        /*
         * If the node was right then if the net is right, it's a broadcast
         */
        if (sat->sat_addr.s_net == ATADDR_ANYNET)
                return 1;

        /*
         * failing that, if the net is one we have, it's a broadcast as well.
         */
        for (aa = at_ifaddr.tqh_first; aa; aa = aa->aa_list.tqe_next) {
                if ((aa->aa_ifp->if_flags & IFF_BROADCAST)
                    && (ntohs(sat->sat_addr.s_net) >= ntohs(aa->aa_firstnet)
                  && ntohs(sat->sat_addr.s_net) <= ntohs(aa->aa_lastnet)))
                        return 1;
        }
        return 0;
}


/*
 * aa_dorangeroute()
 *
 * Add a route for a range of networks from bot to top - 1.
 * Algorithm:
 *
 * Split the range into two subranges such that the middle
 * of the two ranges is the point where the highest bit of difference
 * between the two addresses, makes its transition
 * Each of the upper and lower ranges might not exist, or might be
 * representable by 1 or more netmasks. In addition, if both
 * ranges can be represented by the same netmask, then teh can be merged
 * by using the next higher netmask..
 */

static int
aa_dorangeroute(struct ifaddr *ifa, u_int bot, u_int top, int cmd)
{
        u_int           mask1;
        struct at_addr  addr;
        struct at_addr  mask;
        int             error;

        /*
         * slight sanity check
         */
        if (bot > top)
                return (EINVAL);

        addr.s_node = 0;
        mask.s_node = 0;
        /*
         * just start out with the lowest boundary
         * and keep extending the mask till it's too big.
         */

        while (bot <= top) {
                mask1 = 1;
                while (((bot & ~mask1) >= bot)
                       && ((bot | mask1) <= top)) {
                        mask1 <<= 1;
                        mask1 |= 1;
                }
                mask1 >>= 1;
                mask.s_net = htons(~mask1);
                addr.s_net = htons(bot);
                if (cmd == RTM_ADD) {
                        error = aa_addsingleroute(ifa, &addr, &mask);
                        if (error) {
                                /* XXX clean up? */
                                return (error);
                        }
                } else {
                        error = aa_delsingleroute(ifa, &addr, &mask);
                }
                bot = (bot | mask1) + 1;
        }
        return 0;
}

static int
aa_addsingleroute(struct ifaddr *ifa, struct at_addr *addr, struct at_addr *mask)
{
        int error;

#ifdef NETATALKDEBUG
        printf("aa_addsingleroute: %x.%x mask %x.%x ...",
               ntohs(addr->s_net), addr->s_node,
               ntohs(mask->s_net), mask->s_node);
#endif

        error = aa_dosingleroute(ifa, addr, mask, RTM_ADD, RTF_UP);
#ifdef NETATALKDEBUG
        if (error)
                printf("aa_addsingleroute: error %d\n", error);
#endif
        return (error);
}

static int
aa_delsingleroute(struct ifaddr *ifa, struct at_addr *addr, struct at_addr *mask)
{
        int error;

#ifdef NETATALKDEBUG
        printf("aa_delsingleroute: %x.%x mask %x.%x ...",
               ntohs(addr->s_net), addr->s_node,
               ntohs(mask->s_net), mask->s_node);
#endif

        error = aa_dosingleroute(ifa, addr, mask, RTM_DELETE, 0);
#ifdef NETATALKDEBUG
        if (error)
                printf("aa_delsingleroute: error %d\n", error);
#endif
        return (error);
}

static int
aa_dosingleroute(struct ifaddr *ifa, struct at_addr *at_addr, struct at_addr *at_mask, int cmd, int flags)
{
        struct sockaddr_at addr, mask, *gate;

        memset(&addr, 0, sizeof(addr));
        memset(&mask, 0, sizeof(mask));
        addr.sat_family = AF_APPLETALK;
        addr.sat_len = sizeof(struct sockaddr_at);
        addr.sat_addr.s_net = at_addr->s_net;
        addr.sat_addr.s_node = at_addr->s_node;
        mask.sat_family = AF_APPLETALK;
        mask.sat_len = sizeof(struct sockaddr_at);
        mask.sat_addr.s_net = at_mask->s_net;
        mask.sat_addr.s_node = at_mask->s_node;

        if (at_mask->s_node) {
                gate = satosat(ifa->ifa_dstaddr);
                flags |= RTF_HOST;
        } else {
                gate = satosat(ifa->ifa_addr);
        }

#ifdef NETATALKDEBUG
        printf("on %s %x.%x\n", (flags & RTF_HOST) ? "host" : "net",
               ntohs(gate->sat_addr.s_net), gate->sat_addr.s_node);
#endif
        return (rtrequest(cmd, (struct sockaddr *) &addr,
            (struct sockaddr *) gate, (struct sockaddr *) &mask, flags, NULL));
}

#if 0
static void
aa_clean(void)
{
        struct at_ifaddr *aa;
        struct ifaddr  *ifa;
        struct ifnet   *ifp;

        while ((aa = TAILQ_FIRST(&at_ifaddr)) != NULL) {
                TAILQ_REMOVE(&at_ifaddr, aa, aa_list);
                ifp = aa->aa_ifp;
                at_scrub(ifp, aa);
                IFADDR_READER_FOREACH(ifa, ifp) {
                        if (ifa == &aa->aa_ifa)
                                break;
                }
                if (ifa == NULL)
                        panic("aa not present");
                ifa_remove(ifp, ifa);
        }
}
#endif



































































































































































































































































    1 












    1 
    1 

























































    1 


































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
/*        $NetBSD: md.c,v 1.85 2020/05/14 08:34:18 msaitoh Exp $        */

/*
 * Copyright (c) 1995 Gordon W. Ross, Leo Weppelman.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * This implements a general-purpose memory-disk.
 * See md.h for notes on the config types.
 *
 * Note that this driver provides the same functionality
 * as the MFS filesystem hack, but this is better because
 * you can use this for any filesystem type you'd like!
 *
 * Credit for most of the kmem ramdisk code goes to:
 *   Leo Weppelman (atari) and Phil Nelson (pc532)
 * Credit for the ideas behind the "user space memory" code goes
 * to the authors of the MFS implementation.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: md.c,v 1.85 2020/05/14 08:34:18 msaitoh Exp $");

#ifdef _KERNEL_OPT
#include "opt_md.h"
#else
#define MEMORY_DISK_SERVER 1
#endif

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/device.h>
#include <sys/disk.h>
#include <sys/stat.h>
#include <sys/proc.h>
#include <sys/conf.h>
#include <sys/disklabel.h>

#include <uvm/uvm_extern.h>

#include <dev/md.h>

#include "ioconf.h"
/*
 * The user-space functionality is included by default.
 * Use  `options MEMORY_DISK_SERVER=0' to turn it off.
 */
#ifndef MEMORY_DISK_SERVER
#error MEMORY_DISK_SERVER should be defined by opt_md.h
#endif        /* MEMORY_DISK_SERVER */

/*
 * We should use the raw partition for ioctl.
 */
#define MD_UNIT(unit)        DISKUNIT(unit)

/* autoconfig stuff... */

struct md_softc {
        device_t sc_dev;        /* Self. */
        struct disk sc_dkdev;        /* hook for generic disk handling */
        struct md_conf sc_md;
        kmutex_t sc_lock;        /* Protect self. */
        kcondvar_t sc_cv;        /* Wait here for work. */
        struct bufq_state *sc_buflist;
};
/* shorthand for fields in sc_md: */
#define sc_addr sc_md.md_addr
#define sc_size sc_md.md_size
#define sc_type sc_md.md_type

static void        md_attach(device_t, device_t, void *);
static int        md_detach(device_t, int);

static dev_type_open(mdopen);
static dev_type_close(mdclose);
static dev_type_read(mdread);
static dev_type_write(mdwrite);
static dev_type_ioctl(mdioctl);
static dev_type_strategy(mdstrategy);
static dev_type_size(mdsize);

const struct bdevsw md_bdevsw = {
        .d_open = mdopen,
        .d_close = mdclose,
        .d_strategy = mdstrategy,
        .d_ioctl = mdioctl,
        .d_dump = nodump,
        .d_psize = mdsize,
        .d_discard = nodiscard,
        .d_flag = D_DISK | D_MPSAFE
};

const struct cdevsw md_cdevsw = {
        .d_open = mdopen,
        .d_close = mdclose,
        .d_read = mdread,
        .d_write = mdwrite,
        .d_ioctl = mdioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_DISK | D_MPSAFE
};

static const struct dkdriver mddkdriver = {
        .d_strategy = mdstrategy,
        .d_minphys = minphys
};

CFATTACH_DECL3_NEW(md, sizeof(struct md_softc),
        0, md_attach, md_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);

static kmutex_t md_device_lock;                /* Protect unit creation / deletion. */
extern size_t md_root_size;

static void md_set_disklabel(struct md_softc *);

/*
 * This is called if we are configured as a pseudo-device
 */
void
mdattach(int n)
{

        mutex_init(&md_device_lock, MUTEX_DEFAULT, IPL_NONE);
        if (config_cfattach_attach(md_cd.cd_name, &md_ca)) {
                aprint_error("%s: cfattach_attach failed\n", md_cd.cd_name);
                return;
        }
}

static void
md_attach(device_t parent, device_t self, void *aux)
{
        struct md_softc *sc = device_private(self);

        sc->sc_dev = self;
        sc->sc_type = MD_UNCONFIGURED;
        mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&sc->sc_cv, "mdidle");
        bufq_alloc(&sc->sc_buflist, "fcfs", 0);

        /* XXX - Could accept aux info here to set the config. */
#ifdef        MEMORY_DISK_HOOKS
        /*
         * This external function might setup a pre-loaded disk.
         * All it would need to do is setup the md_conf struct.
         * See sys/dev/md_root.c for an example.
         */
        md_attach_hook(device_unit(self), &sc->sc_md);
#endif

        /*
         * Initialize and attach the disk structure.
         */
        disk_init(&sc->sc_dkdev, device_xname(self), &mddkdriver);
        disk_attach(&sc->sc_dkdev);

        if (sc->sc_type != MD_UNCONFIGURED)
                md_set_disklabel(sc);

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");
}

static int
md_detach(device_t self, int flags)
{
        struct md_softc *sc = device_private(self);
        int rc;

        rc = 0;
        mutex_enter(&sc->sc_dkdev.dk_openlock);
        if (sc->sc_dkdev.dk_openmask == 0 && sc->sc_type == MD_UNCONFIGURED)
                ;        /* nothing to do */
        else if ((flags & DETACH_FORCE) == 0)
                rc = EBUSY;
        mutex_exit(&sc->sc_dkdev.dk_openlock);

        if (rc != 0)
                return rc;

        pmf_device_deregister(self);
        disk_detach(&sc->sc_dkdev);
        disk_destroy(&sc->sc_dkdev);
        bufq_free(sc->sc_buflist);
        mutex_destroy(&sc->sc_lock);
        cv_destroy(&sc->sc_cv);
        return 0;
}

/*
 * operational routines:
 * open, close, read, write, strategy,
 * ioctl, dump, size
 */

#if MEMORY_DISK_SERVER
static int        md_server_loop(struct md_softc *sc);
static int        md_ioctl_server(struct md_softc *sc, struct md_conf *umd,
                    struct lwp *l);
#endif        /* MEMORY_DISK_SERVER */
static int        md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd,
                    struct lwp *l);

static int
mdsize(dev_t dev)
{
        struct md_softc *sc;
        int res;

        sc = device_lookup_private(&md_cd, MD_UNIT(dev));
        if (sc == NULL)
                return 0;

        mutex_enter(&sc->sc_lock);
        if (sc->sc_type == MD_UNCONFIGURED)
                res = 0;
        else
                res = sc->sc_size >> DEV_BSHIFT;
        mutex_exit(&sc->sc_lock);

        return res;
}

static int
mdopen(dev_t dev, int flag, int fmt, struct lwp *l)
{
        int unit;
        int part = DISKPART(dev);
        int pmask = 1 << part;
        cfdata_t cf;
        struct md_softc *sc;
        struct disk *dk;
#ifdef        MEMORY_DISK_HOOKS
        bool configured;
#endif

        mutex_enter(&md_device_lock);
        unit = MD_UNIT(dev);
        sc = device_lookup_private(&md_cd, unit);
        if (sc == NULL) {
                if (part != RAW_PART) {
                        mutex_exit(&md_device_lock);
                        return ENXIO;
                }
                cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
                cf->cf_name = md_cd.cd_name;
                cf->cf_atname = md_cd.cd_name;
                cf->cf_unit = unit;
                cf->cf_fstate = FSTATE_STAR;
                sc = device_private(config_attach_pseudo(cf));
                if (sc == NULL) {
                        mutex_exit(&md_device_lock);
                        return ENOMEM;
                }
        }

        dk = &sc->sc_dkdev;

        /*
         * The raw partition is used for ioctl to configure.
         */
        if (part == RAW_PART)
                goto ok;

#ifdef        MEMORY_DISK_HOOKS
        /* Call the open hook to allow loading the device. */
        configured = (sc->sc_type != MD_UNCONFIGURED);
        md_open_hook(unit, &sc->sc_md);
        /* initialize disklabel if the device is configured in open hook */
        if (!configured && sc->sc_type != MD_UNCONFIGURED)
                md_set_disklabel(sc);
#endif

        /*
         * This is a normal, "slave" device, so
         * enforce initialized.
         */
        if (sc->sc_type == MD_UNCONFIGURED) {
                mutex_exit(&md_device_lock);
                return ENXIO;
        }

ok:
        /* XXX duplicates code in dk_open().  Call dk_open(), instead? */
        mutex_enter(&dk->dk_openlock);
        /* Mark our unit as open. */
        switch (fmt) {
        case S_IFCHR:
                dk->dk_copenmask |= pmask;
                break;
        case S_IFBLK:
                dk->dk_bopenmask |= pmask;
                break;
        }

        dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;

        mutex_exit(&dk->dk_openlock);
        mutex_exit(&md_device_lock);
        return 0;
}

static int
mdclose(dev_t dev, int flag, int fmt, struct lwp *l)
{
        int part = DISKPART(dev);
        int pmask = 1 << part;
        int error;
        cfdata_t cf;
        struct md_softc *sc;
        struct disk *dk;

        sc = device_lookup_private(&md_cd, MD_UNIT(dev));
        if (sc == NULL)
                return ENXIO;

        dk = &sc->sc_dkdev;

        mutex_enter(&dk->dk_openlock);

        switch (fmt) {
        case S_IFCHR:
                dk->dk_copenmask &= ~pmask;
                break;
        case S_IFBLK:
                dk->dk_bopenmask &= ~pmask;
                break;
        }
        dk->dk_openmask = dk->dk_copenmask | dk->dk_bopenmask;
        if (dk->dk_openmask != 0) {
                mutex_exit(&dk->dk_openlock);
                return 0;
        }

        mutex_exit(&dk->dk_openlock);

        mutex_enter(&md_device_lock);
        cf = device_cfdata(sc->sc_dev);
        error = config_detach(sc->sc_dev, DETACH_QUIET);
        if (! error)
                free(cf, M_DEVBUF);
        mutex_exit(&md_device_lock);
        return error;
}

static int
mdread(dev_t dev, struct uio *uio, int flags)
{
        struct md_softc *sc;

        sc = device_lookup_private(&md_cd, MD_UNIT(dev));

        if (sc == NULL || sc->sc_type == MD_UNCONFIGURED)
                return ENXIO;

        return (physio(mdstrategy, NULL, dev, B_READ, minphys, uio));
}

static int
mdwrite(dev_t dev, struct uio *uio, int flags)
{
        struct md_softc *sc;

        sc = device_lookup_private(&md_cd, MD_UNIT(dev));

        if (sc == NULL || sc->sc_type == MD_UNCONFIGURED)
                return ENXIO;

        return (physio(mdstrategy, NULL, dev, B_WRITE, minphys, uio));
}

/*
 * Handle I/O requests, either directly, or
 * by passing them to the server process.
 */
static void
mdstrategy(struct buf *bp)
{
        struct md_softc        *sc;
        void *        addr;
        size_t off, xfer;
        bool is_read;

        sc = device_lookup_private(&md_cd, MD_UNIT(bp->b_dev));

        if (sc == NULL || sc->sc_type == MD_UNCONFIGURED) {
                bp->b_error = ENXIO;
                goto done;
        }

        mutex_enter(&sc->sc_lock);

        switch (sc->sc_type) {
#if MEMORY_DISK_SERVER
        case MD_UMEM_SERVER:
                /* Just add this job to the server's queue. */
                bufq_put(sc->sc_buflist, bp);
                cv_signal(&sc->sc_cv);
                mutex_exit(&sc->sc_lock);
                /* see md_server_loop() */
                /* no biodone in this case */
                return;
#endif        /* MEMORY_DISK_SERVER */

        case MD_KMEM_FIXED:
        case MD_KMEM_ALLOCATED:
                /* These are in kernel space.  Access directly. */
                is_read = ((bp->b_flags & B_READ) == B_READ);
                bp->b_resid = bp->b_bcount;
                off = (bp->b_blkno << DEV_BSHIFT);
                if (off >= sc->sc_size) {
                        if (is_read)
                                break;        /* EOF */
                        goto set_eio;
                }
                xfer = bp->b_resid;
                if (xfer > (sc->sc_size - off))
                        xfer = (sc->sc_size - off);
                addr = (char *)sc->sc_addr + off;
                disk_busy(&sc->sc_dkdev);
                if (is_read)
                        memcpy(bp->b_data, addr, xfer);
                else
                        memcpy(addr, bp->b_data, xfer);
                disk_unbusy(&sc->sc_dkdev, xfer, is_read);
                bp->b_resid -= xfer;
                break;

        default:
                bp->b_resid = bp->b_bcount;
        set_eio:
                bp->b_error = EIO;
                break;
        }
        mutex_exit(&sc->sc_lock);

 done:

        biodone(bp);
}

static int
mdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        struct md_softc *sc;
        struct md_conf *umd;
        int error;

        if ((sc = device_lookup_private(&md_cd, MD_UNIT(dev))) == NULL)
                return ENXIO;

        if (sc->sc_type != MD_UNCONFIGURED) {
                error = disk_ioctl(&sc->sc_dkdev, dev, cmd, data, flag, l); 
                if (error != EPASSTHROUGH) {
                        return error;
                }
        }

        /* If this is not the raw partition, punt! */
        if (DISKPART(dev) != RAW_PART) {
                return ENOTTY;
        }

        mutex_enter(&sc->sc_lock);
        umd = (struct md_conf *)data;
        error = EINVAL;
        switch (cmd) {
        case MD_GETCONF:
                *umd = sc->sc_md;
                error = 0;
                break;

        case MD_SETCONF:
                /* Can only set it once. */
                if (sc->sc_type != MD_UNCONFIGURED)
                        break;
                switch (umd->md_type) {
                case MD_KMEM_ALLOCATED:
                        error = md_ioctl_kalloc(sc, umd, l);
                        break;
#if MEMORY_DISK_SERVER
                case MD_UMEM_SERVER:
                        error = md_ioctl_server(sc, umd, l);
                        break;
#endif        /* MEMORY_DISK_SERVER */
                default:
                        break;
                }
                break;
        }
        mutex_exit(&sc->sc_lock);
        return error;
}

static void
md_set_disklabel(struct md_softc *sc)
{
        struct disk_geom *dg = &sc->sc_dkdev.dk_geom;
        struct disklabel *lp = sc->sc_dkdev.dk_label;
        struct partition *pp;

        memset(lp, 0, sizeof(*lp));

        lp->d_secsize = DEV_BSIZE;
        lp->d_secperunit = sc->sc_size / DEV_BSIZE;
        if (lp->d_secperunit >= (32*64)) {
                lp->d_nsectors = 32;
                lp->d_ntracks = 64;
                lp->d_ncylinders = lp->d_secperunit / (32*64);
        } else {
                lp->d_nsectors = 1;
                lp->d_ntracks = 1;
                lp->d_ncylinders = lp->d_secperunit;
        }
        lp->d_secpercyl = lp->d_ntracks*lp->d_nsectors;

        strncpy(lp->d_typename, md_cd.cd_name, sizeof(lp->d_typename));
        lp->d_type = DKTYPE_MD;
        strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
        lp->d_rpm = 3600;
        lp->d_interleave = 1;
        lp->d_flags = 0;

        pp = &lp->d_partitions[0];
        pp->p_offset = 0;
        pp->p_size = lp->d_secperunit;
        pp->p_fstype = FS_BSDFFS;

        pp = &lp->d_partitions[RAW_PART];
        pp->p_offset = 0;
        pp->p_size = lp->d_secperunit;
        pp->p_fstype = FS_UNUSED;

        lp->d_npartitions = RAW_PART+1;
        lp->d_magic = DISKMAGIC;
        lp->d_magic2 = DISKMAGIC;
        lp->d_checksum = dkcksum(lp);

        memset(dg, 0, sizeof(*dg));

        dg->dg_secsize = lp->d_secsize;
        dg->dg_secperunit = lp->d_secperunit;
        dg->dg_nsectors = lp->d_nsectors;
        dg->dg_ntracks = lp->d_ntracks = 64;
        dg->dg_ncylinders = lp->d_ncylinders;

        disk_set_info(sc->sc_dev, &sc->sc_dkdev, NULL);
}

/*
 * Handle ioctl MD_SETCONF for (sc_type == MD_KMEM_ALLOCATED)
 * Just allocate some kernel memory and return.
 */
static int
md_ioctl_kalloc(struct md_softc *sc, struct md_conf *umd,
    struct lwp *l)
{
        vaddr_t addr;
        vsize_t size;

        mutex_exit(&sc->sc_lock);

        /* Sanity check the size. */
        size = umd->md_size;
        addr = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);

        mutex_enter(&sc->sc_lock);

        if (!addr)
                return ENOMEM;

        /* If another thread beat us to configure this unit:  fail. */
        if (sc->sc_type != MD_UNCONFIGURED) {
                uvm_km_free(kernel_map, addr, size, UVM_KMF_WIRED);
                return EINVAL;
        }

        /* This unit is now configured. */
        sc->sc_addr = (void *)addr;         /* kernel space */
        sc->sc_size = (size_t)size;
        sc->sc_type = MD_KMEM_ALLOCATED;
        md_set_disklabel(sc);
        return 0;
}

#if MEMORY_DISK_SERVER

/*
 * Handle ioctl MD_SETCONF for (sc_type == MD_UMEM_SERVER)
 * Set config, then become the I/O server for this unit.
 */
static int
md_ioctl_server(struct md_softc *sc, struct md_conf *umd,
    struct lwp *l)
{
        vaddr_t end;
        int error;

        KASSERT(mutex_owned(&sc->sc_lock));

        /* Sanity check addr, size. */
        end = (vaddr_t) ((char *)umd->md_addr + umd->md_size);

        if (
#ifndef _RUMPKERNEL
            /*
             * On some architectures (e.g. powerpc) rump kernel provides
             * "safe" low defaults which make this test fail since malloc
             * does return higher addresses than the "safe" default.
             */
            (end >= VM_MAXUSER_ADDRESS) ||
#endif
            (end < ((vaddr_t) umd->md_addr)))
                return EINVAL;

        /* This unit is now configured. */
        sc->sc_addr = umd->md_addr;         /* user space */
        sc->sc_size = umd->md_size;
        sc->sc_type = MD_UMEM_SERVER;
        md_set_disklabel(sc);

        /* Become the server daemon */
        error = md_server_loop(sc);

        /* This server is now going away! */
        sc->sc_type = MD_UNCONFIGURED;
        sc->sc_addr = 0;
        sc->sc_size = 0;

        return (error);
}

static int
md_server_loop(struct md_softc *sc)
{
        struct buf *bp;
        void *addr;        /* user space address */
        size_t off;        /* offset into "device" */
        size_t xfer;        /* amount to transfer */
        int error;
        bool is_read;

        KASSERT(mutex_owned(&sc->sc_lock));

        for (;;) {
                /* Wait for some work to arrive. */
                while ((bp = bufq_get(sc->sc_buflist)) == NULL) {
                        error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock);
                        if (error)
                                return error;
                }

                /* Do the transfer to/from user space. */
                mutex_exit(&sc->sc_lock);
                error = 0;
                is_read = ((bp->b_flags & B_READ) == B_READ);
                bp->b_resid = bp->b_bcount;
                off = (bp->b_blkno << DEV_BSHIFT);
                if (off >= sc->sc_size) {
                        if (is_read)
                                goto done;        /* EOF (not an error) */
                        error = EIO;
                        goto done;
                }
                xfer = bp->b_resid;
                if (xfer > (sc->sc_size - off))
                        xfer = (sc->sc_size - off);
                addr = (char *)sc->sc_addr + off;
                disk_busy(&sc->sc_dkdev);
                if (is_read)
                        error = copyin(addr, bp->b_data, xfer);
                else
                        error = copyout(bp->b_data, addr, xfer);
                disk_unbusy(&sc->sc_dkdev, (error ? 0 : xfer), is_read);
                if (!error)
                        bp->b_resid -= xfer;

        done:
                if (error) {
                        bp->b_error = error;
                }
                biodone(bp);
                mutex_enter(&sc->sc_lock);
        }
}
#endif        /* MEMORY_DISK_SERVER */


















































































































































































































































































































































































































































































































































































































































































































































































































    3 
    3 

















































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
/*        $NetBSD: if_ure.c,v 1.57 2022/08/20 14:08:59 riastradh Exp $        */
/*        $OpenBSD: if_ure.c,v 1.10 2018/11/02 21:32:30 jcs Exp $        */

/*-
 * Copyright (c) 2015-2016 Kevin Lo <kevlo@FreeBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/* RealTek RTL8152/RTL8153 10/100/Gigabit USB Ethernet device */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_ure.c,v 1.57 2022/08/20 14:08:59 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#include "opt_inet.h"
#endif

#include <sys/param.h>
#include <sys/cprng.h>

#include <net/route.h>

#include <dev/usb/usbnet.h>

#include <netinet/in_offload.h>                /* XXX for in_undefer_cksum() */
#ifdef INET6
#include <netinet/in.h>
#include <netinet6/in6_offload.h>        /* XXX for in6_undefer_cksum() */
#endif

#include <dev/ic/rtl81x9reg.h>                /* XXX for RTK_GMEDIASTAT */
#include <dev/usb/if_urereg.h>
#include <dev/usb/if_urevar.h>

#define URE_PRINTF(un, fmt, args...) \
        device_printf((un)->un_dev, "%s: " fmt, __func__, ##args);

#define URE_DEBUG
#ifdef URE_DEBUG
#define DPRINTF(x)        do { if (uredebug) printf x; } while (0)
#define DPRINTFN(n, x)        do { if (uredebug >= (n)) printf x; } while (0)
int        uredebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n, x)
#endif

#define ETHER_IS_ZERO(addr) \
        (!(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]))

static const struct usb_devno ure_devs[] = {
        { USB_VENDOR_REALTEK, USB_PRODUCT_REALTEK_RTL8152 },
        { USB_VENDOR_REALTEK, USB_PRODUCT_REALTEK_RTL8153 }
};

#define URE_BUFSZ        (16 * 1024)

static void        ure_reset(struct usbnet *);
static uint32_t        ure_txcsum(struct mbuf *);
static int        ure_rxcsum(struct ifnet *, struct ure_rxpkt *);
static void        ure_rtl8152_init(struct usbnet *);
static void        ure_rtl8153_init(struct usbnet *);
static void        ure_disable_teredo(struct usbnet *);
static void        ure_init_fifo(struct usbnet *);

static void        ure_uno_stop(struct ifnet *, int);
static void        ure_uno_mcast(struct ifnet *);
static int        ure_uno_mii_read_reg(struct usbnet *, int, int, uint16_t *);
static int        ure_uno_mii_write_reg(struct usbnet *, int, int, uint16_t);
static void        ure_uno_miibus_statchg(struct ifnet *);
static unsigned ure_uno_tx_prepare(struct usbnet *, struct mbuf *,
                                   struct usbnet_chain *);
static void        ure_uno_rx_loop(struct usbnet *, struct usbnet_chain *,
                                uint32_t);
static int        ure_uno_init(struct ifnet *);

static int        ure_match(device_t, cfdata_t, void *);
static void        ure_attach(device_t, device_t, void *);

CFATTACH_DECL_NEW(ure, sizeof(struct usbnet), ure_match, ure_attach,
    usbnet_detach, usbnet_activate);

static const struct usbnet_ops ure_ops = {
        .uno_stop = ure_uno_stop,
        .uno_mcast = ure_uno_mcast,
        .uno_read_reg = ure_uno_mii_read_reg,
        .uno_write_reg = ure_uno_mii_write_reg,
        .uno_statchg = ure_uno_miibus_statchg,
        .uno_tx_prepare = ure_uno_tx_prepare,
        .uno_rx_loop = ure_uno_rx_loop,
        .uno_init = ure_uno_init,
};

static int
ure_ctl(struct usbnet *un, uint8_t rw, uint16_t val, uint16_t index,
    void *buf, int len)
{
        usb_device_request_t req;
        usbd_status err;

        if (usbnet_isdying(un))
                return 0;

        if (rw == URE_CTL_WRITE)
                req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        else
                req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = UR_SET_ADDRESS;
        USETW(req.wValue, val);
        USETW(req.wIndex, index);
        USETW(req.wLength, len);

        DPRINTFN(5, ("ure_ctl: rw %d, val %04hu, index %04hu, len %d\n",
            rw, val, index, len));
        err = usbd_do_request(un->un_udev, &req, buf);
        if (err) {
                DPRINTF(("ure_ctl: error %d\n", err));
                if (rw == URE_CTL_READ)
                        memset(buf, 0, len);
                return -1;
        }

        return 0;
}

static int
ure_read_mem(struct usbnet *un, uint16_t addr, uint16_t index,
    void *buf, int len)
{
        return ure_ctl(un, URE_CTL_READ, addr, index, buf, len);
}

static int
ure_write_mem(struct usbnet *un, uint16_t addr, uint16_t index,
    void *buf, int len)
{
        return ure_ctl(un, URE_CTL_WRITE, addr, index, buf, len);
}

static uint8_t
ure_read_1(struct usbnet *un, uint16_t reg, uint16_t index)
{
        uint32_t val;
        uint8_t temp[4];
        uint8_t shift;

        shift = (reg & 3) << 3;
        reg &= ~3;

        ure_read_mem(un, reg, index, &temp, 4);
        val = UGETDW(temp);
        val >>= shift;

        return val & 0xff;
}

static uint16_t
ure_read_2(struct usbnet *un, uint16_t reg, uint16_t index)
{
        uint32_t val;
        uint8_t temp[4];
        uint8_t shift;

        shift = (reg & 2) << 3;
        reg &= ~3;

        ure_read_mem(un, reg, index, &temp, 4);
        val = UGETDW(temp);
        val >>= shift;

        return val & 0xffff;
}

static uint32_t
ure_read_4(struct usbnet *un, uint16_t reg, uint16_t index)
{
        uint8_t temp[4];

        ure_read_mem(un, reg, index, &temp, 4);
        return UGETDW(temp);
}

static int
ure_write_1(struct usbnet *un, uint16_t reg, uint16_t index, uint32_t val)
{
        uint16_t byen;
        uint8_t temp[4];
        uint8_t shift;

        byen = URE_BYTE_EN_BYTE;
        shift = reg & 3;
        val &= 0xff;

        if (reg & 3) {
                byen <<= shift;
                val <<= (shift << 3);
                reg &= ~3;
        }

        USETDW(temp, val);
        return ure_write_mem(un, reg, index | byen, &temp, 4);
}

static int
ure_write_2(struct usbnet *un, uint16_t reg, uint16_t index, uint32_t val)
{
        uint16_t byen;
        uint8_t temp[4];
        uint8_t shift;

        byen = URE_BYTE_EN_WORD;
        shift = reg & 2;
        val &= 0xffff;

        if (reg & 2) {
                byen <<= shift;
                val <<= (shift << 3);
                reg &= ~3;
        }

        USETDW(temp, val);
        return ure_write_mem(un, reg, index | byen, &temp, 4);
}

static int
ure_write_4(struct usbnet *un, uint16_t reg, uint16_t index, uint32_t val)
{
        uint8_t temp[4];

        USETDW(temp, val);
        return ure_write_mem(un, reg, index | URE_BYTE_EN_DWORD, &temp, 4);
}

static uint16_t
ure_ocp_reg_read(struct usbnet *un, uint16_t addr)
{
        uint16_t reg;

        ure_write_2(un, URE_PLA_OCP_GPHY_BASE, URE_MCU_TYPE_PLA, addr & 0xf000);
        reg = (addr & 0x0fff) | 0xb000;

        return ure_read_2(un, reg, URE_MCU_TYPE_PLA);
}

static void
ure_ocp_reg_write(struct usbnet *un, uint16_t addr, uint16_t data)
{
        uint16_t reg;

        ure_write_2(un, URE_PLA_OCP_GPHY_BASE, URE_MCU_TYPE_PLA, addr & 0xf000);
        reg = (addr & 0x0fff) | 0xb000;

        ure_write_2(un, reg, URE_MCU_TYPE_PLA, data);
}

static int
ure_uno_mii_read_reg(struct usbnet *un, int phy, int reg, uint16_t *val)
{

        if (un->un_phyno != phy) {
                *val = 0;
                return EINVAL;
        }

        /* Let the rgephy driver read the URE_PLA_PHYSTATUS register. */
        if (reg == RTK_GMEDIASTAT) {
                *val = ure_read_1(un, URE_PLA_PHYSTATUS, URE_MCU_TYPE_PLA);
                return USBD_NORMAL_COMPLETION;
        }

        *val = ure_ocp_reg_read(un, URE_OCP_BASE_MII + reg * 2);

        return 0;
}

static int
ure_uno_mii_write_reg(struct usbnet *un, int phy, int reg, uint16_t val)
{

        if (un->un_phyno != phy)
                return EINVAL;

        ure_ocp_reg_write(un, URE_OCP_BASE_MII + reg * 2, val);

        return 0;
}

static void
ure_uno_miibus_statchg(struct ifnet *ifp)
{
        struct usbnet * const un = ifp->if_softc;
        struct mii_data * const mii = usbnet_mii(un);

        if (usbnet_isdying(un))
                return;

        if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
            (IFM_ACTIVE | IFM_AVALID)) {
                switch (IFM_SUBTYPE(mii->mii_media_active)) {
                case IFM_10_T:
                case IFM_100_TX:
                        usbnet_set_link(un, true);
                        break;
                case IFM_1000_T:
                        if ((un->un_flags & URE_FLAG_8152) != 0)
                                break;
                        usbnet_set_link(un, true);
                        break;
                default:
                        break;
                }
        }
}

static void
ure_uno_mcast(struct ifnet *ifp)
{
        struct usbnet *un = ifp->if_softc;
        struct ethercom *ec = usbnet_ec(un);
        struct ether_multi *enm;
        struct ether_multistep step;
        uint32_t mchash[2] = { 0, 0 };
        uint32_t h = 0, rxmode;

        if (usbnet_isdying(un))
                return;

        rxmode = ure_read_4(un, URE_PLA_RCR, URE_MCU_TYPE_PLA);
        rxmode &= ~(URE_RCR_AAP | URE_RCR_AM);
        /* continue to accept my own DA and bcast frames */

        ETHER_LOCK(ec);
        if (usbnet_ispromisc(un)) {
                ec->ec_flags |= ETHER_F_ALLMULTI;
                ETHER_UNLOCK(ec);
                /* run promisc. mode */
                rxmode |= URE_RCR_AM;        /* ??? */
                rxmode |= URE_RCR_AAP;
                goto update;
        }
        ec->ec_flags &= ~ETHER_F_ALLMULTI;
        ETHER_FIRST_MULTI(step, ec, enm);
        while (enm != NULL) {
                if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
                        ec->ec_flags |= ETHER_F_ALLMULTI;
                        ETHER_UNLOCK(ec);
                        /* accept all mcast frames */
                        rxmode |= URE_RCR_AM;
                        mchash[0] = mchash[1] = ~0U; /* necessary ?? */
                        goto update;
                }
                h = ether_crc32_be(enm->enm_addrlo, ETHER_ADDR_LEN);
                mchash[h >> 31] |= 1 << ((h >> 26) & 0x1f);
                ETHER_NEXT_MULTI(step, enm);
        }
        ETHER_UNLOCK(ec);
        if (h != 0) {
                rxmode |= URE_RCR_AM;        /* activate mcast hash filter */
                h = bswap32(mchash[0]);
                mchash[0] = bswap32(mchash[1]);
                mchash[1] = h;
        }
 update:
        ure_write_4(un, URE_PLA_MAR0, URE_MCU_TYPE_PLA, mchash[0]);
        ure_write_4(un, URE_PLA_MAR4, URE_MCU_TYPE_PLA, mchash[1]);
        ure_write_4(un, URE_PLA_RCR, URE_MCU_TYPE_PLA, rxmode);
}

static void
ure_reset(struct usbnet *un)
{
        int i;

        ure_write_1(un, URE_PLA_CR, URE_MCU_TYPE_PLA, URE_CR_RST);

        for (i = 0; i < URE_TIMEOUT; i++) {
                if (usbnet_isdying(un))
                        return;
                if (!(ure_read_1(un, URE_PLA_CR, URE_MCU_TYPE_PLA) &
                    URE_CR_RST))
                        break;
                usbd_delay_ms(un->un_udev, 10);
        }
        if (i == URE_TIMEOUT)
                URE_PRINTF(un, "reset never completed\n");
}

static int
ure_uno_init(struct ifnet *ifp)
{
        struct usbnet * const un = ifp->if_softc;
        uint8_t eaddr[8];

        /* Set MAC address. */
        memset(eaddr, 0, sizeof(eaddr));
        memcpy(eaddr, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN);
        ure_write_1(un, URE_PLA_CRWECR, URE_MCU_TYPE_PLA, URE_CRWECR_CONFIG);
        ure_write_mem(un, URE_PLA_IDR, URE_MCU_TYPE_PLA | URE_BYTE_EN_SIX_BYTES,
            eaddr, 8);
        ure_write_1(un, URE_PLA_CRWECR, URE_MCU_TYPE_PLA, URE_CRWECR_NORAML);

        /* Reset the packet filter. */
        ure_write_2(un, URE_PLA_FMC, URE_MCU_TYPE_PLA,
            ure_read_2(un, URE_PLA_FMC, URE_MCU_TYPE_PLA) &
            ~URE_FMC_FCR_MCU_EN);
        ure_write_2(un, URE_PLA_FMC, URE_MCU_TYPE_PLA,
            ure_read_2(un, URE_PLA_FMC, URE_MCU_TYPE_PLA) |
            URE_FMC_FCR_MCU_EN);

        /* Enable transmit and receive. */
        ure_write_1(un, URE_PLA_CR, URE_MCU_TYPE_PLA,
            ure_read_1(un, URE_PLA_CR, URE_MCU_TYPE_PLA) | URE_CR_RE |
            URE_CR_TE);

        ure_write_2(un, URE_PLA_MISC_1, URE_MCU_TYPE_PLA,
            ure_read_2(un, URE_PLA_MISC_1, URE_MCU_TYPE_PLA) &
            ~URE_RXDY_GATED_EN);

        return 0;
}

static void
ure_uno_stop(struct ifnet *ifp, int disable __unused)
{
        struct usbnet * const un = ifp->if_softc;

        ure_reset(un);
}

static void
ure_rtl8152_init(struct usbnet *un)
{
        uint32_t pwrctrl;

        /* Disable ALDPS. */
        ure_ocp_reg_write(un, URE_OCP_ALDPS_CONFIG, URE_ENPDNPS | URE_LINKENA |
            URE_DIS_SDSAVE);
        usbd_delay_ms(un->un_udev, 20);

        if (un->un_flags & URE_FLAG_VER_4C00) {
                ure_write_2(un, URE_PLA_LED_FEATURE, URE_MCU_TYPE_PLA,
                    ure_read_2(un, URE_PLA_LED_FEATURE, URE_MCU_TYPE_PLA) &
                    ~URE_LED_MODE_MASK);
        }

        ure_write_2(un, URE_USB_UPS_CTRL, URE_MCU_TYPE_USB,
            ure_read_2(un, URE_USB_UPS_CTRL, URE_MCU_TYPE_USB) &
            ~URE_POWER_CUT);
        ure_write_2(un, URE_USB_PM_CTRL_STATUS, URE_MCU_TYPE_USB,
            ure_read_2(un, URE_USB_PM_CTRL_STATUS, URE_MCU_TYPE_USB) &
            ~URE_RESUME_INDICATE);

        ure_write_2(un, URE_PLA_PHY_PWR, URE_MCU_TYPE_PLA,
            ure_read_2(un, URE_PLA_PHY_PWR, URE_MCU_TYPE_PLA) |
            URE_TX_10M_IDLE_EN | URE_PFM_PWM_SWITCH);
        pwrctrl = ure_read_4(un, URE_PLA_MAC_PWR_CTRL, URE_MCU_TYPE_PLA);
        pwrctrl &= ~URE_MCU_CLK_RATIO_MASK;
        pwrctrl |= URE_MCU_CLK_RATIO | URE_D3_CLK_GATED_EN;
        ure_write_4(un, URE_PLA_MAC_PWR_CTRL, URE_MCU_TYPE_PLA, pwrctrl);
        ure_write_2(un, URE_PLA_GPHY_INTR_IMR, URE_MCU_TYPE_PLA,
            URE_GPHY_STS_MSK | URE_SPEED_DOWN_MSK | URE_SPDWN_RXDV_MSK |
            URE_SPDWN_LINKCHG_MSK);

        /* Enable Rx aggregation. */
        ure_write_2(un, URE_USB_USB_CTRL, URE_MCU_TYPE_USB,
            ure_read_2(un, URE_USB_USB_CTRL, URE_MCU_TYPE_USB) &
            ~URE_RX_AGG_DISABLE);

        /* Disable ALDPS. */
        ure_ocp_reg_write(un, URE_OCP_ALDPS_CONFIG, URE_ENPDNPS | URE_LINKENA |
            URE_DIS_SDSAVE);
        usbd_delay_ms(un->un_udev, 20);

        ure_init_fifo(un);

        ure_write_1(un, URE_USB_TX_AGG, URE_MCU_TYPE_USB,
            URE_TX_AGG_MAX_THRESHOLD);
        ure_write_4(un, URE_USB_RX_BUF_TH, URE_MCU_TYPE_USB, URE_RX_THR_HIGH);
        ure_write_4(un, URE_USB_TX_DMA, URE_MCU_TYPE_USB,
            URE_TEST_MODE_DISABLE | URE_TX_SIZE_ADJUST1);
}

static void
ure_rtl8153_init(struct usbnet *un)
{
        uint16_t val;
        uint8_t u1u2[8];
        int i;

        /* Disable ALDPS. */
        ure_ocp_reg_write(un, URE_OCP_POWER_CFG,
            ure_ocp_reg_read(un, URE_OCP_POWER_CFG) & ~URE_EN_ALDPS);
        usbd_delay_ms(un->un_udev, 20);

        memset(u1u2, 0x00, sizeof(u1u2));
        ure_write_mem(un, URE_USB_TOLERANCE,
            URE_MCU_TYPE_USB | URE_BYTE_EN_SIX_BYTES, u1u2, sizeof(u1u2));

        for (i = 0; i < URE_TIMEOUT; i++) {
                if (usbnet_isdying(un))
                        return;
                if (ure_read_2(un, URE_PLA_BOOT_CTRL, URE_MCU_TYPE_PLA) &
                    URE_AUTOLOAD_DONE)
                        break;
                usbd_delay_ms(un->un_udev, 10);
        }
        if (i == URE_TIMEOUT)
                URE_PRINTF(un, "timeout waiting for chip autoload\n");

        for (i = 0; i < URE_TIMEOUT; i++) {
                if (usbnet_isdying(un))
                        return;
                val = ure_ocp_reg_read(un, URE_OCP_PHY_STATUS) &
                    URE_PHY_STAT_MASK;
                if (val == URE_PHY_STAT_LAN_ON || val == URE_PHY_STAT_PWRDN)
                        break;
                usbd_delay_ms(un->un_udev, 10);
        }
        if (i == URE_TIMEOUT)
                URE_PRINTF(un, "timeout waiting for phy to stabilize\n");

        ure_write_2(un, URE_USB_U2P3_CTRL, URE_MCU_TYPE_USB,
            ure_read_2(un, URE_USB_U2P3_CTRL, URE_MCU_TYPE_USB) &
            ~URE_U2P3_ENABLE);

        if (un->un_flags & URE_FLAG_VER_5C10) {
                val = ure_read_2(un, URE_USB_SSPHYLINK2, URE_MCU_TYPE_USB);
                val &= ~URE_PWD_DN_SCALE_MASK;
                val |= URE_PWD_DN_SCALE(96);
                ure_write_2(un, URE_USB_SSPHYLINK2, URE_MCU_TYPE_USB, val);

                ure_write_1(un, URE_USB_USB2PHY, URE_MCU_TYPE_USB,
                    ure_read_1(un, URE_USB_USB2PHY, URE_MCU_TYPE_USB) |
                    URE_USB2PHY_L1 | URE_USB2PHY_SUSPEND);
        } else if (un->un_flags & URE_FLAG_VER_5C20) {
                ure_write_1(un, URE_PLA_DMY_REG0, URE_MCU_TYPE_PLA,
                    ure_read_1(un, URE_PLA_DMY_REG0, URE_MCU_TYPE_PLA) &
                    ~URE_ECM_ALDPS);
        }
        if (un->un_flags & (URE_FLAG_VER_5C20 | URE_FLAG_VER_5C30)) {
                val = ure_read_1(un, URE_USB_CSR_DUMMY1, URE_MCU_TYPE_USB);
                if (ure_read_2(un, URE_USB_BURST_SIZE, URE_MCU_TYPE_USB) ==
                    0)
                        val &= ~URE_DYNAMIC_BURST;
                else
                        val |= URE_DYNAMIC_BURST;
                ure_write_1(un, URE_USB_CSR_DUMMY1, URE_MCU_TYPE_USB, val);
        }

        ure_write_1(un, URE_USB_CSR_DUMMY2, URE_MCU_TYPE_USB,
            ure_read_1(un, URE_USB_CSR_DUMMY2, URE_MCU_TYPE_USB) |
            URE_EP4_FULL_FC);

        ure_write_2(un, URE_USB_WDT11_CTRL, URE_MCU_TYPE_USB,
            ure_read_2(un, URE_USB_WDT11_CTRL, URE_MCU_TYPE_USB) &
            ~URE_TIMER11_EN);

        ure_write_2(un, URE_PLA_LED_FEATURE, URE_MCU_TYPE_PLA,
            ure_read_2(un, URE_PLA_LED_FEATURE, URE_MCU_TYPE_PLA) &
            ~URE_LED_MODE_MASK);

        if ((un->un_flags & URE_FLAG_VER_5C10) &&
            un->un_udev->ud_speed != USB_SPEED_SUPER)
                val = URE_LPM_TIMER_500MS;
        else
                val = URE_LPM_TIMER_500US;
        ure_write_1(un, URE_USB_LPM_CTRL, URE_MCU_TYPE_USB,
            val | URE_FIFO_EMPTY_1FB | URE_ROK_EXIT_LPM);

        val = ure_read_2(un, URE_USB_AFE_CTRL2, URE_MCU_TYPE_USB);
        val &= ~URE_SEN_VAL_MASK;
        val |= URE_SEN_VAL_NORMAL | URE_SEL_RXIDLE;
        ure_write_2(un, URE_USB_AFE_CTRL2, URE_MCU_TYPE_USB, val);

        ure_write_2(un, URE_USB_CONNECT_TIMER, URE_MCU_TYPE_USB, 0x0001);

        ure_write_2(un, URE_USB_POWER_CUT, URE_MCU_TYPE_USB,
            ure_read_2(un, URE_USB_POWER_CUT, URE_MCU_TYPE_USB) &
            ~(URE_PWR_EN | URE_PHASE2_EN));
        ure_write_2(un, URE_USB_MISC_0, URE_MCU_TYPE_USB,
            ure_read_2(un, URE_USB_MISC_0, URE_MCU_TYPE_USB) &
            ~URE_PCUT_STATUS);

        memset(u1u2, 0xff, sizeof(u1u2));
        ure_write_mem(un, URE_USB_TOLERANCE,
            URE_MCU_TYPE_USB | URE_BYTE_EN_SIX_BYTES, u1u2, sizeof(u1u2));

        ure_write_2(un, URE_PLA_MAC_PWR_CTRL, URE_MCU_TYPE_PLA,
            URE_ALDPS_SPDWN_RATIO);
        ure_write_2(un, URE_PLA_MAC_PWR_CTRL2, URE_MCU_TYPE_PLA,
            URE_EEE_SPDWN_RATIO);
        ure_write_2(un, URE_PLA_MAC_PWR_CTRL3, URE_MCU_TYPE_PLA,
            URE_PKT_AVAIL_SPDWN_EN | URE_SUSPEND_SPDWN_EN |
            URE_U1U2_SPDWN_EN | URE_L1_SPDWN_EN);
        ure_write_2(un, URE_PLA_MAC_PWR_CTRL4, URE_MCU_TYPE_PLA,
            URE_PWRSAVE_SPDWN_EN | URE_RXDV_SPDWN_EN | URE_TX10MIDLE_EN |
            URE_TP100_SPDWN_EN | URE_TP500_SPDWN_EN | URE_TP1000_SPDWN_EN |
            URE_EEE_SPDWN_EN);

        val = ure_read_2(un, URE_USB_U2P3_CTRL, URE_MCU_TYPE_USB);
        if (!(un->un_flags & (URE_FLAG_VER_5C00 | URE_FLAG_VER_5C10)))
                val |= URE_U2P3_ENABLE;
        else
                val &= ~URE_U2P3_ENABLE;
        ure_write_2(un, URE_USB_U2P3_CTRL, URE_MCU_TYPE_USB, val);

        memset(u1u2, 0x00, sizeof(u1u2));
        ure_write_mem(un, URE_USB_TOLERANCE,
            URE_MCU_TYPE_USB | URE_BYTE_EN_SIX_BYTES, u1u2, sizeof(u1u2));

        /* Disable ALDPS. */
        ure_ocp_reg_write(un, URE_OCP_POWER_CFG,
            ure_ocp_reg_read(un, URE_OCP_POWER_CFG) & ~URE_EN_ALDPS);
        usbd_delay_ms(un->un_udev, 20);

        ure_init_fifo(un);

        /* Enable Rx aggregation. */
        ure_write_2(un, URE_USB_USB_CTRL, URE_MCU_TYPE_USB,
            ure_read_2(un, URE_USB_USB_CTRL, URE_MCU_TYPE_USB) &
            ~URE_RX_AGG_DISABLE);

        val = ure_read_2(un, URE_USB_U2P3_CTRL, URE_MCU_TYPE_USB);
        if (!(un->un_flags & (URE_FLAG_VER_5C00 | URE_FLAG_VER_5C10)))
                val |= URE_U2P3_ENABLE;
        else
                val &= ~URE_U2P3_ENABLE;
        ure_write_2(un, URE_USB_U2P3_CTRL, URE_MCU_TYPE_USB, val);

        memset(u1u2, 0xff, sizeof(u1u2));
        ure_write_mem(un, URE_USB_TOLERANCE,
            URE_MCU_TYPE_USB | URE_BYTE_EN_SIX_BYTES, u1u2, sizeof(u1u2));
}

static void
ure_disable_teredo(struct usbnet *un)
{
        ure_write_4(un, URE_PLA_TEREDO_CFG, URE_MCU_TYPE_PLA,
            ure_read_4(un, URE_PLA_TEREDO_CFG, URE_MCU_TYPE_PLA) &
            ~(URE_TEREDO_SEL | URE_TEREDO_RS_EVENT_MASK | URE_OOB_TEREDO_EN));
        ure_write_2(un, URE_PLA_WDT6_CTRL, URE_MCU_TYPE_PLA,
            URE_WDT6_SET_MODE);
        ure_write_2(un, URE_PLA_REALWOW_TIMER, URE_MCU_TYPE_PLA, 0);
        ure_write_4(un, URE_PLA_TEREDO_TIMER, URE_MCU_TYPE_PLA, 0);
}

static void
ure_init_fifo(struct usbnet *un)
{
        uint32_t rxmode, rx_fifo1, rx_fifo2;
        int i;

        ure_write_2(un, URE_PLA_MISC_1, URE_MCU_TYPE_PLA,
            ure_read_2(un, URE_PLA_MISC_1, URE_MCU_TYPE_PLA) |
            URE_RXDY_GATED_EN);

        ure_disable_teredo(un);

        rxmode = ure_read_4(un, URE_PLA_RCR, URE_MCU_TYPE_PLA);
        rxmode &= ~URE_RCR_ACPT_ALL;
        rxmode |= URE_RCR_APM | URE_RCR_AB; /* accept my own DA and bcast */
        ure_write_4(un, URE_PLA_RCR, URE_MCU_TYPE_PLA, rxmode);

        if (!(un->un_flags & URE_FLAG_8152)) {
                if (un->un_flags & (URE_FLAG_VER_5C00 | URE_FLAG_VER_5C10 |
                    URE_FLAG_VER_5C20))
                        ure_ocp_reg_write(un, URE_OCP_ADC_CFG,
                            URE_CKADSEL_L | URE_ADC_EN | URE_EN_EMI_L);
                if (un->un_flags & URE_FLAG_VER_5C00)
                        ure_ocp_reg_write(un, URE_OCP_EEE_CFG,
                            ure_ocp_reg_read(un, URE_OCP_EEE_CFG) &
                            ~URE_CTAP_SHORT_EN);
                ure_ocp_reg_write(un, URE_OCP_POWER_CFG,
                    ure_ocp_reg_read(un, URE_OCP_POWER_CFG) |
                    URE_EEE_CLKDIV_EN);
                ure_ocp_reg_write(un, URE_OCP_DOWN_SPEED,
                    ure_ocp_reg_read(un, URE_OCP_DOWN_SPEED) |
                    URE_EN_10M_BGOFF);
                ure_ocp_reg_write(un, URE_OCP_POWER_CFG,
                    ure_ocp_reg_read(un, URE_OCP_POWER_CFG) |
                    URE_EN_10M_PLLOFF);
                ure_ocp_reg_write(un, URE_OCP_SRAM_ADDR, URE_SRAM_IMPEDANCE);
                ure_ocp_reg_write(un, URE_OCP_SRAM_DATA, 0x0b13);
                ure_write_2(un, URE_PLA_PHY_PWR, URE_MCU_TYPE_PLA,
                    ure_read_2(un, URE_PLA_PHY_PWR, URE_MCU_TYPE_PLA) |
                    URE_PFM_PWM_SWITCH);

                /* Enable LPF corner auto tune. */
                ure_ocp_reg_write(un, URE_OCP_SRAM_ADDR, URE_SRAM_LPF_CFG);
                ure_ocp_reg_write(un, URE_OCP_SRAM_DATA, 0xf70f);

                /* Adjust 10M amplitude. */
                ure_ocp_reg_write(un, URE_OCP_SRAM_ADDR, URE_SRAM_10M_AMP1);
                ure_ocp_reg_write(un, URE_OCP_SRAM_DATA, 0x00af);
                ure_ocp_reg_write(un, URE_OCP_SRAM_ADDR, URE_SRAM_10M_AMP2);
                ure_ocp_reg_write(un, URE_OCP_SRAM_DATA, 0x0208);
        }

        ure_reset(un);

        ure_write_1(un, URE_PLA_CR, URE_MCU_TYPE_PLA, 0);

        ure_write_1(un, URE_PLA_OOB_CTRL, URE_MCU_TYPE_PLA,
            ure_read_1(un, URE_PLA_OOB_CTRL, URE_MCU_TYPE_PLA) &
            ~URE_NOW_IS_OOB);

        ure_write_2(un, URE_PLA_SFF_STS_7, URE_MCU_TYPE_PLA,
            ure_read_2(un, URE_PLA_SFF_STS_7, URE_MCU_TYPE_PLA) &
            ~URE_MCU_BORW_EN);
        for (i = 0; i < URE_TIMEOUT; i++) {
                if (usbnet_isdying(un))
                        return;
                if (ure_read_1(un, URE_PLA_OOB_CTRL, URE_MCU_TYPE_PLA) &
                    URE_LINK_LIST_READY)
                        break;
                usbd_delay_ms(un->un_udev, 10);
        }
        if (i == URE_TIMEOUT)
                URE_PRINTF(un, "timeout waiting for OOB control\n");
        ure_write_2(un, URE_PLA_SFF_STS_7, URE_MCU_TYPE_PLA,
            ure_read_2(un, URE_PLA_SFF_STS_7, URE_MCU_TYPE_PLA) |
            URE_RE_INIT_LL);
        for (i = 0; i < URE_TIMEOUT; i++) {
                if (usbnet_isdying(un))
                        return;
                if (ure_read_1(un, URE_PLA_OOB_CTRL, URE_MCU_TYPE_PLA) &
                    URE_LINK_LIST_READY)
                        break;
                usbd_delay_ms(un->un_udev, 10);
        }
        if (i == URE_TIMEOUT)
                URE_PRINTF(un, "timeout waiting for OOB control\n");

        ure_write_2(un, URE_PLA_CPCR, URE_MCU_TYPE_PLA,
            ure_read_2(un, URE_PLA_CPCR, URE_MCU_TYPE_PLA) &
            ~URE_CPCR_RX_VLAN);
        ure_write_2(un, URE_PLA_TCR0, URE_MCU_TYPE_PLA,
            ure_read_2(un, URE_PLA_TCR0, URE_MCU_TYPE_PLA) |
            URE_TCR0_AUTO_FIFO);

        /* Configure Rx FIFO threshold and coalescing. */
        ure_write_4(un, URE_PLA_RXFIFO_CTRL0, URE_MCU_TYPE_PLA,
            URE_RXFIFO_THR1_NORMAL);
        if (un->un_udev->ud_speed == USB_SPEED_FULL) {
                rx_fifo1 = URE_RXFIFO_THR2_FULL;
                rx_fifo2 = URE_RXFIFO_THR3_FULL;
        } else {
                rx_fifo1 = URE_RXFIFO_THR2_HIGH;
                rx_fifo2 = URE_RXFIFO_THR3_HIGH;
        }
        ure_write_4(un, URE_PLA_RXFIFO_CTRL1, URE_MCU_TYPE_PLA, rx_fifo1);
        ure_write_4(un, URE_PLA_RXFIFO_CTRL2, URE_MCU_TYPE_PLA, rx_fifo2);

        /* Configure Tx FIFO threshold. */
        ure_write_4(un, URE_PLA_TXFIFO_CTRL, URE_MCU_TYPE_PLA,
            URE_TXFIFO_THR_NORMAL);
}

static int
ure_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return usb_lookup(ure_devs, uaa->uaa_vendor, uaa->uaa_product) != NULL ?
            UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
ure_attach(device_t parent, device_t self, void *aux)
{
        USBNET_MII_DECL_DEFAULT(unm);
        struct usbnet * const un = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        int error, i;
        uint16_t ver;
        uint8_t eaddr[8]; /* 2byte padded */
        char *devinfop;
        uint32_t maclo, machi;

        aprint_naive("\n");
        aprint_normal("\n");
        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        un->un_dev = self;
        un->un_udev = dev;
        un->un_sc = un;
        un->un_ops = &ure_ops;
        un->un_rx_xfer_flags = USBD_SHORT_XFER_OK;
        un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER;
        un->un_rx_list_cnt = URE_RX_LIST_CNT;
        un->un_tx_list_cnt = URE_TX_LIST_CNT;
        un->un_rx_bufsz = URE_BUFSZ;
        un->un_tx_bufsz = URE_BUFSZ;

#define URE_CONFIG_NO        1 /* XXX */
        error = usbd_set_config_no(dev, URE_CONFIG_NO, 1);
        if (error) {
                aprint_error_dev(self, "failed to set configuration: %s\n",
                    usbd_errstr(error));
                return; /* XXX */
        }

        if (uaa->uaa_product == USB_PRODUCT_REALTEK_RTL8152)
                un->un_flags |= URE_FLAG_8152;

#define URE_IFACE_IDX  0 /* XXX */
        error = usbd_device2interface_handle(dev, URE_IFACE_IDX, &un->un_iface);
        if (error) {
                aprint_error_dev(self, "failed to get interface handle: %s\n",
                    usbd_errstr(error));
                return; /* XXX */
        }

        id = usbd_get_interface_descriptor(un->un_iface);
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(un->un_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self, "couldn't get ep %d\n", i);
                        return; /* XXX */
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress;
                }
        }

        /* Set these up now for ure_ctl().  */
        usbnet_attach(un);

        un->un_phyno = 0;

        ver = ure_read_2(un, URE_PLA_TCR1, URE_MCU_TYPE_PLA) & URE_VERSION_MASK;
        switch (ver) {
        case 0x4c00:
                un->un_flags |= URE_FLAG_VER_4C00;
                break;
        case 0x4c10:
                un->un_flags |= URE_FLAG_VER_4C10;
                break;
        case 0x5c00:
                un->un_flags |= URE_FLAG_VER_5C00;
                break;
        case 0x5c10:
                un->un_flags |= URE_FLAG_VER_5C10;
                break;
        case 0x5c20:
                un->un_flags |= URE_FLAG_VER_5C20;
                break;
        case 0x5c30:
                un->un_flags |= URE_FLAG_VER_5C30;
                break;
        default:
                /* fake addr?  or just fail? */
                break;
        }
        aprint_normal_dev(self, "RTL%d %sver %04x\n",
            (un->un_flags & URE_FLAG_8152) ? 8152 : 8153,
            (un->un_flags != 0) ? "" : "unknown ",
            ver);

        if (un->un_flags & URE_FLAG_8152)
                ure_rtl8152_init(un);
        else
                ure_rtl8153_init(un);

        if ((un->un_flags & URE_FLAG_VER_4C00) ||
            (un->un_flags & URE_FLAG_VER_4C10))
                ure_read_mem(un, URE_PLA_IDR, URE_MCU_TYPE_PLA, eaddr,
                    sizeof(eaddr));
        else
                ure_read_mem(un, URE_PLA_BACKUP, URE_MCU_TYPE_PLA, eaddr,
                    sizeof(eaddr));
        if (ETHER_IS_ZERO(eaddr)) {
                maclo = 0x00f2 | (cprng_strong32() & 0xffff0000);
                machi = cprng_strong32() & 0xffff;
                eaddr[0] = maclo & 0xff;
                eaddr[1] = (maclo >> 8) & 0xff;
                eaddr[2] = (maclo >> 16) & 0xff;
                eaddr[3] = (maclo >> 24) & 0xff;
                eaddr[4] = machi & 0xff;
                eaddr[5] = (machi >> 8) & 0xff;
        }
        memcpy(un->un_eaddr, eaddr, sizeof(un->un_eaddr));

        struct ifnet *ifp = usbnet_ifp(un);

        /*
         * We don't support TSOv4 and v6 for now, that are required to
         * be handled in software for some cases.
         */
        ifp->if_capabilities = IFCAP_CSUM_IPv4_Tx |
            IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx;
#ifdef INET6
        ifp->if_capabilities |= IFCAP_CSUM_TCPv6_Tx | IFCAP_CSUM_UDPv6_Tx;
#endif
        if (un->un_flags & ~URE_FLAG_VER_4C00) {
                ifp->if_capabilities |= IFCAP_CSUM_IPv4_Rx |
                    IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_UDPv4_Rx |
                    IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_UDPv6_Rx;
        }
        struct ethercom *ec = usbnet_ec(un);
        ec->ec_capabilities = ETHERCAP_VLAN_MTU;
#ifdef notyet
        ec->ec_capabilities |= ETHERCAP_JUMBO_MTU;
#endif

        unm.un_mii_phyloc = un->un_phyno;
        usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST,
            0, &unm);
}

static void
ure_uno_rx_loop(struct usbnet *un, struct usbnet_chain *c, uint32_t total_len)
{
        struct ifnet *ifp = usbnet_ifp(un);
        uint8_t *buf = c->unc_buf;
        uint16_t pkt_len = 0;
        uint16_t pkt_count = 0;
        struct ure_rxpkt rxhdr;

        do {
                if (total_len < sizeof(rxhdr)) {
                        DPRINTF(("too few bytes left for a packet header\n"));
                        if_statinc(ifp, if_ierrors);
                        return;
                }

                buf += roundup(pkt_len, 8);

                memcpy(&rxhdr, buf, sizeof(rxhdr));
                total_len -= sizeof(rxhdr);

                pkt_len = le32toh(rxhdr.ure_pktlen) & URE_RXPKT_LEN_MASK;
                DPRINTFN(4, ("next packet is %d bytes\n", pkt_len));
                if (pkt_len > total_len) {
                        DPRINTF(("not enough bytes left for next packet\n"));
                        if_statinc(ifp, if_ierrors);
                        return;
                }

                total_len -= roundup(pkt_len, 8);
                buf += sizeof(rxhdr);

                usbnet_enqueue(un, buf, pkt_len - ETHER_CRC_LEN,
                               ure_rxcsum(ifp, &rxhdr), 0, 0);

                pkt_count++;
                
        } while (total_len > 0);

        if (pkt_count)
                rnd_add_uint32(usbnet_rndsrc(un), pkt_count);
}

static int
ure_rxcsum(struct ifnet *ifp, struct ure_rxpkt *rp)
{
        int enabled = ifp->if_csum_flags_rx, flags = 0;
        uint32_t csum, misc;

        if (enabled == 0)
                return 0;

        csum = le32toh(rp->ure_csum);
        misc = le32toh(rp->ure_misc);

        if (csum & URE_RXPKT_IPV4_CS) {
                flags |= M_CSUM_IPv4;
                if (csum & URE_RXPKT_TCP_CS)
                        flags |= M_CSUM_TCPv4;
                if (csum & URE_RXPKT_UDP_CS)
                        flags |= M_CSUM_UDPv4;
        } else if (csum & URE_RXPKT_IPV6_CS) {
                flags = 0;
                if (csum & URE_RXPKT_TCP_CS)
                        flags |= M_CSUM_TCPv6;
                if (csum & URE_RXPKT_UDP_CS)
                        flags |= M_CSUM_UDPv6;
        }

        flags &= enabled;
        if (__predict_false((flags & M_CSUM_IPv4) &&
            (misc & URE_RXPKT_IP_F)))
                flags |= M_CSUM_IPv4_BAD;
        if (__predict_false(
           ((flags & (M_CSUM_TCPv4 | M_CSUM_TCPv6)) && (misc & URE_RXPKT_TCP_F))
        || ((flags & (M_CSUM_UDPv4 | M_CSUM_UDPv6)) && (misc & URE_RXPKT_UDP_F))
        ))
                flags |= M_CSUM_TCP_UDP_BAD;

        return flags;
}

static unsigned
ure_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c)
{
        struct ure_txpkt txhdr;
        uint32_t frm_len = 0;
        uint8_t *buf = c->unc_buf;

        if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz - sizeof(txhdr))
                return 0;

        /* header */
        txhdr.ure_pktlen = htole32(m->m_pkthdr.len | URE_TXPKT_TX_FS |
            URE_TXPKT_TX_LS);
        txhdr.ure_csum = htole32(ure_txcsum(m));
        memcpy(buf, &txhdr, sizeof(txhdr));
        buf += sizeof(txhdr);
        frm_len = sizeof(txhdr);

        /* packet */
        m_copydata(m, 0, m->m_pkthdr.len, buf);
        frm_len += m->m_pkthdr.len;

        DPRINTFN(2, ("tx %d bytes\n", frm_len));

        return frm_len;
}

/*
 * We need to calculate L4 checksum in software, if the offset of
 * L4 header is larger than 0x7ff = 2047.
 */
static uint32_t
ure_txcsum(struct mbuf *m)
{
        struct ether_header *eh;
        int flags = m->m_pkthdr.csum_flags;
        uint32_t data = m->m_pkthdr.csum_data;
        uint32_t reg = 0;
        int l3off, l4off;
        uint16_t type;

        if (flags == 0)
                return 0;

        if (__predict_true(m->m_len >= (int)sizeof(*eh))) {
                eh = mtod(m, struct ether_header *);
                type = eh->ether_type;
        } else
                m_copydata(m, offsetof(struct ether_header, ether_type),
                    sizeof(type), &type);
        switch (type = htons(type)) {
        case ETHERTYPE_IP:
        case ETHERTYPE_IPV6:
                l3off = ETHER_HDR_LEN;
                break;
        case ETHERTYPE_VLAN:
                l3off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
                break;
        default:
                return 0;
        }

        if (flags & (M_CSUM_TCPv4 | M_CSUM_UDPv4)) {
                l4off = l3off + M_CSUM_DATA_IPv4_IPHL(data);
                if (__predict_false(l4off > URE_L4_OFFSET_MAX)) {
                        in_undefer_cksum(m, l3off, flags);
                        return 0;
                }
                reg |= URE_TXPKT_IPV4_CS;
                if (flags & M_CSUM_TCPv4)
                        reg |= URE_TXPKT_TCP_CS;
                else
                        reg |= URE_TXPKT_UDP_CS;
                reg |= l4off << URE_L4_OFFSET_SHIFT;
        }
#ifdef INET6
        else if (flags & (M_CSUM_TCPv6 | M_CSUM_UDPv6)) {
                l4off = l3off + M_CSUM_DATA_IPv6_IPHL(data);
                if (__predict_false(l4off > URE_L4_OFFSET_MAX)) {
                        in6_undefer_cksum(m, l3off, flags);
                        return 0;
                }
                reg |= URE_TXPKT_IPV6_CS;
                if (flags & M_CSUM_TCPv6)
                        reg |= URE_TXPKT_TCP_CS;
                else
                        reg |= URE_TXPKT_UDP_CS;
                reg |= l4off << URE_L4_OFFSET_SHIFT;
        }
#endif
        else if (flags & M_CSUM_IPv4)
                reg |= URE_TXPKT_IPV4_CS;

        return reg;
}

#ifdef _MODULE
#include "ioconf.c"
#endif

USBNET_MODULE(ure)




























































































    4 
    4 



    4 





    4 



    4 






    4 
    4 













































    1 


    1 





























    2 
























    1 



















































    1 
    1 

    1 
























































































    7 







    7 
    5 

    6 

    5 



    1 

















































































































   14 










































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
/*        $NetBSD: l2cap_socket.c,v 1.36 2019/01/28 12:53:01 martin Exp $        */

/*-
 * Copyright (c) 2005 Iain Hibbert.
 * Copyright (c) 2006 Itronix Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of Itronix Inc. may not be used to endorse
 *    or promote products derived from this software without specific
 *    prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: l2cap_socket.c,v 1.36 2019/01/28 12:53:01 martin Exp $");

/* load symbolic names */
#ifdef BLUETOOTH_DEBUG
#define PRUREQUESTS
#define PRCOREQUESTS
#endif

#include <sys/param.h>
#include <sys/domain.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/systm.h>

#include <netbt/bluetooth.h>
#include <netbt/l2cap.h>

/*
 * L2CAP Sockets
 *
 *        SOCK_SEQPACKET - normal L2CAP connection
 *
 *        SOCK_DGRAM - connectionless L2CAP - XXX not yet
 */

static void l2cap_connecting(void *);
static void l2cap_connected(void *);
static void l2cap_disconnected(void *, int);
static void *l2cap_newconn(void *, struct sockaddr_bt *, struct sockaddr_bt *);
static void l2cap_complete(void *, int);
static void l2cap_linkmode(void *, int);
static void l2cap_input(void *, struct mbuf *);

static const struct btproto l2cap_proto = {
        l2cap_connecting,
        l2cap_connected,
        l2cap_disconnected,
        l2cap_newconn,
        l2cap_complete,
        l2cap_linkmode,
        l2cap_input,
};

/* sysctl variables */
int l2cap_sendspace = 4096;
int l2cap_recvspace = 4096;

static int
l2cap_attach(struct socket *so, int proto)
{
        int error;

        KASSERT(so->so_pcb == NULL);

        if (so->so_lock == NULL) {
                mutex_obj_hold(bt_lock);
                so->so_lock = bt_lock;
                solock(so);
        }
        KASSERT(solocked(so));

        /*
         * For L2CAP socket PCB we just use an l2cap_channel structure
         * since we have nothing to add..
         */
        error = soreserve(so, l2cap_sendspace, l2cap_recvspace);
        if (error)
                return error;

        return l2cap_attach_pcb((struct l2cap_channel **)&so->so_pcb,
                                &l2cap_proto, so);
}

static void
l2cap_detach(struct socket *so)
{
        KASSERT(so->so_pcb != NULL);
        l2cap_detach_pcb((struct l2cap_channel **)&so->so_pcb);
        KASSERT(so->so_pcb == NULL);
}

static int
l2cap_accept(struct socket *so, struct sockaddr *nam)
{
        struct l2cap_channel *pcb = so->so_pcb;

        KASSERT(solocked(so));
        KASSERT(nam != NULL);

        if (pcb == NULL)
                return EINVAL;

        return l2cap_peeraddr_pcb(pcb, (struct sockaddr_bt *)nam);
}

static int
l2cap_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct l2cap_channel *pcb = so->so_pcb;
        struct sockaddr_bt *sa = (struct sockaddr_bt *)nam;

        KASSERT(solocked(so));
        KASSERT(nam != NULL);

        if (pcb == NULL)
                return EINVAL;

        if (sa->bt_len != sizeof(struct sockaddr_bt))
                return EINVAL;

        if (sa->bt_family != AF_BLUETOOTH)
                return EAFNOSUPPORT;

        return l2cap_bind_pcb(pcb, sa);
}

static int
l2cap_listen(struct socket *so, struct lwp *l)
{
        struct l2cap_channel *pcb = so->so_pcb;

        KASSERT(solocked(so));

        if (pcb == NULL)
                return EINVAL;

        return l2cap_listen_pcb(pcb);
}

static int
l2cap_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct l2cap_channel *pcb = so->so_pcb;
        struct sockaddr_bt *sa = (struct sockaddr_bt *)nam;

        KASSERT(solocked(so));
        KASSERT(nam != NULL);

        if (pcb == NULL)
                return EINVAL;

        if (sa->bt_len != sizeof(struct sockaddr_bt))
                return EINVAL;

        if (sa->bt_family != AF_BLUETOOTH)
                return EAFNOSUPPORT;

        soisconnecting(so);
        return l2cap_connect_pcb(pcb, sa);
}

static int
l2cap_connect2(struct socket *so, struct socket *so2)
{
        KASSERT(solocked(so));

        if (so->so_pcb == NULL)
                return EINVAL;

        return EOPNOTSUPP;
}

static int
l2cap_disconnect(struct socket *so)
{
        struct l2cap_channel *pcb = so->so_pcb;

        KASSERT(solocked(so));

        if (pcb == NULL)
                return EINVAL;

        soisdisconnecting(so);
        return l2cap_disconnect_pcb(pcb, so->so_linger);
}

static int
l2cap_shutdown(struct socket *so)
{
        KASSERT(solocked(so));

        socantsendmore(so);
        return 0;
}

static int
l2cap_abort(struct socket *so)
{
        struct l2cap_channel *pcb = so->so_pcb;

        KASSERT(solocked(so));

        if (pcb == NULL)
                return EINVAL;

        l2cap_disconnect_pcb(pcb, 0);
        soisdisconnected(so);
        l2cap_detach(so);
        return 0;
}

static int
l2cap_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
{
        return EPASSTHROUGH;
}

static int
l2cap_stat(struct socket *so, struct stat *ub)
{
        KASSERT(solocked(so));

        return 0;
}

static int
l2cap_peeraddr(struct socket *so, struct sockaddr *nam)
{
        struct l2cap_channel *pcb = so->so_pcb;

        KASSERT(solocked(so));
        KASSERT(pcb != NULL);
        KASSERT(nam != NULL);

        return l2cap_peeraddr_pcb(pcb, (struct sockaddr_bt *)nam);
}

static int
l2cap_sockaddr(struct socket *so, struct sockaddr *nam)
{
        struct l2cap_channel *pcb = so->so_pcb;

        KASSERT(solocked(so));
        KASSERT(pcb != NULL);
        KASSERT(nam != NULL);

        return l2cap_sockaddr_pcb(pcb, (struct sockaddr_bt *)nam);
}

static int
l2cap_rcvd(struct socket *so, int flags, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
l2cap_recvoob(struct socket *so, struct mbuf *m, int flags)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
l2cap_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
    struct mbuf *control, struct lwp *l)
{
        struct l2cap_channel *pcb = so->so_pcb;
        struct mbuf *m0;
        int error = 0;

        KASSERT(solocked(so));
        KASSERT(m != NULL);

        if (control)
                m_freem(control);

        if (pcb == NULL) {
                error = EINVAL;
                goto release;
        }

        if (m->m_pkthdr.len == 0)
                goto release;

        if (m->m_pkthdr.len > pcb->lc_omtu) {
                error = EMSGSIZE;
                goto release;
        }

        m0 = m_copypacket(m, M_DONTWAIT);
        if (m0 == NULL) {
                error = ENOMEM;
                goto release;
        }

        sbappendrecord(&so->so_snd, m);
        return l2cap_send_pcb(pcb, m0);

release:
        if (m)
                m_freem(m);

        return error;
}

static int
l2cap_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
{
        KASSERT(solocked(so));

        m_freem(m);
        m_freem(control);

        return EOPNOTSUPP;
}

static int
l2cap_purgeif(struct socket *so, struct ifnet *ifp)
{

        return EOPNOTSUPP;
}

/*
 * l2cap_ctloutput(req, socket, sockopt)
 *
 *        Apply configuration commands to channel. This corresponds to
 *        "Reconfigure Channel Request" in the L2CAP specification.
 */
int
l2cap_ctloutput(int req, struct socket *so, struct sockopt *sopt)
{
        struct l2cap_channel *pcb = so->so_pcb;
        int err = 0;

        DPRINTFN(2, "%s\n", prcorequests[req]);

        if (pcb == NULL)
                return EINVAL;

        if (sopt->sopt_level != BTPROTO_L2CAP)
                return ENOPROTOOPT;

        switch(req) {
        case PRCO_GETOPT:
                err = l2cap_getopt(pcb, sopt);
                break;

        case PRCO_SETOPT:
                err = l2cap_setopt(pcb, sopt);
                break;

        default:
                err = ENOPROTOOPT;
                break;
        }

        return err;
}

/**********************************************************************
 *
 *        L2CAP Protocol socket callbacks
 *
 */

static void
l2cap_connecting(void *arg)
{
        struct socket *so = arg;

        DPRINTF("Connecting\n");
        soisconnecting(so);
}

static void
l2cap_connected(void *arg)
{
        struct socket *so = arg;

        DPRINTF("Connected\n");
        soisconnected(so);
}

static void
l2cap_disconnected(void *arg, int err)
{
        struct socket *so = arg;

        DPRINTF("Disconnected (%d)\n", err);

        so->so_error = err;
        soisdisconnected(so);
}

static void *
l2cap_newconn(void *arg, struct sockaddr_bt *laddr,
    struct sockaddr_bt *raddr)
{
        struct socket *so = arg;

        DPRINTF("New Connection\n");
        so = sonewconn(so, false);
        if (so == NULL)
                return NULL;

        soisconnecting(so);

        return so->so_pcb;
}

static void
l2cap_complete(void *arg, int count)
{
        struct socket *so = arg;

        while (count-- > 0)
                sbdroprecord(&so->so_snd);

        sowwakeup(so);
}

static void
l2cap_linkmode(void *arg, int new)
{
        struct socket *so = arg;
        struct sockopt sopt;
        int mode;

        DPRINTF("auth %s, encrypt %s, secure %s\n",
                (new & L2CAP_LM_AUTH ? "on" : "off"),
                (new & L2CAP_LM_ENCRYPT ? "on" : "off"),
                (new & L2CAP_LM_SECURE ? "on" : "off"));

        sockopt_init(&sopt, BTPROTO_L2CAP, SO_L2CAP_LM, 0);
        (void)l2cap_getopt(so->so_pcb, &sopt);
        (void)sockopt_getint(&sopt, &mode);
        sockopt_destroy(&sopt);

        if (((mode & L2CAP_LM_AUTH) && !(new & L2CAP_LM_AUTH))
            || ((mode & L2CAP_LM_ENCRYPT) && !(new & L2CAP_LM_ENCRYPT))
            || ((mode & L2CAP_LM_SECURE) && !(new & L2CAP_LM_SECURE)))
                l2cap_disconnect_pcb(so->so_pcb, 0);
}

static void
l2cap_input(void *arg, struct mbuf *m)
{
        struct socket *so = arg;

        if (m->m_pkthdr.len > sbspace(&so->so_rcv)) {
                printf("%s: packet (%d bytes) dropped (socket buffer full)\n",
                        __func__, m->m_pkthdr.len);
                m_freem(m);
                return;
        }

        DPRINTFN(10, "received %d bytes\n", m->m_pkthdr.len);

        sbappendrecord(&so->so_rcv, m);
        sorwakeup(so);
}

PR_WRAP_USRREQS(l2cap)

#define        l2cap_attach                l2cap_attach_wrapper
#define        l2cap_detach                l2cap_detach_wrapper
#define        l2cap_accept                l2cap_accept_wrapper
#define        l2cap_bind                l2cap_bind_wrapper
#define        l2cap_listen                l2cap_listen_wrapper
#define        l2cap_connect                l2cap_connect_wrapper
#define        l2cap_connect2                l2cap_connect2_wrapper
#define        l2cap_disconnect        l2cap_disconnect_wrapper
#define        l2cap_shutdown                l2cap_shutdown_wrapper
#define        l2cap_abort                l2cap_abort_wrapper
#define        l2cap_ioctl                l2cap_ioctl_wrapper
#define        l2cap_stat                l2cap_stat_wrapper
#define        l2cap_peeraddr                l2cap_peeraddr_wrapper
#define        l2cap_sockaddr                l2cap_sockaddr_wrapper
#define        l2cap_rcvd                l2cap_rcvd_wrapper
#define        l2cap_recvoob                l2cap_recvoob_wrapper
#define        l2cap_send                l2cap_send_wrapper
#define        l2cap_sendoob                l2cap_sendoob_wrapper
#define        l2cap_purgeif                l2cap_purgeif_wrapper

const struct pr_usrreqs l2cap_usrreqs = {
        .pr_attach        = l2cap_attach,
        .pr_detach        = l2cap_detach,
        .pr_accept        = l2cap_accept,
        .pr_bind        = l2cap_bind,
        .pr_listen        = l2cap_listen,
        .pr_connect        = l2cap_connect,
        .pr_connect2        = l2cap_connect2,
        .pr_disconnect        = l2cap_disconnect,
        .pr_shutdown        = l2cap_shutdown,
        .pr_abort        = l2cap_abort,
        .pr_ioctl        = l2cap_ioctl,
        .pr_stat        = l2cap_stat,
        .pr_peeraddr        = l2cap_peeraddr,
        .pr_sockaddr        = l2cap_sockaddr,
        .pr_rcvd        = l2cap_rcvd,
        .pr_recvoob        = l2cap_recvoob,
        .pr_send        = l2cap_send,
        .pr_sendoob        = l2cap_sendoob,
        .pr_purgeif        = l2cap_purgeif,
};


























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   10 

   10 









































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
/*        $NetBSD: hyperv.c,v 1.15 2022/05/20 13:55:16 nonaka Exp $        */

/*-
 * Copyright (c) 2009-2012,2016-2017 Microsoft Corp.
 * Copyright (c) 2012 NetApp Inc.
 * Copyright (c) 2012 Citrix Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice unmodified, this list of conditions, and the following
 *    disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/**
 * Implements low-level interactions with Hyper-V/Azure
 */
#include <sys/cdefs.h>
#ifdef __KERNEL_RCSID
__KERNEL_RCSID(0, "$NetBSD: hyperv.c,v 1.15 2022/05/20 13:55:16 nonaka Exp $");
#endif
#ifdef __FBSDID
__FBSDID("$FreeBSD: head/sys/dev/hyperv/vmbus/hyperv.c 331757 2018-03-30 02:25:12Z emaste $");
#endif

#ifdef _KERNEL_OPT
#include "lapic.h"
#include "genfb.h"
#include "opt_ddb.h"
#include "vmbus.h"
#include "wsdisplay.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/bus.h>
#include <sys/cpu.h>
#include <sys/kmem.h>
#include <sys/module.h>
#include <sys/pmf.h>
#include <sys/sysctl.h>
#include <sys/timetc.h>

#include <uvm/uvm_extern.h>

#include <machine/autoconf.h>
#include <machine/bootinfo.h>
#include <machine/cpufunc.h>
#include <machine/cputypes.h>
#include <machine/cpuvar.h>
#include <machine/cpu_counter.h>
#include <x86/apicvar.h>
#include <x86/efi.h>

#include <dev/wsfb/genfbvar.h>
#include <x86/genfb_machdep.h>

#include <x86/x86/hypervreg.h>
#include <x86/x86/hypervvar.h>
#include <dev/hyperv/vmbusvar.h>
#include <dev/hyperv/genfb_vmbusvar.h>

#ifdef DDB
#include <machine/db_machdep.h>
#include <ddb/db_sym.h>
#include <ddb/db_extern.h>
#endif

struct hyperv_softc {
        device_t                sc_dev;

        struct sysctllog        *sc_log;
};

struct hyperv_hypercall_ctx {
        void                *hc_addr;
        paddr_t                hc_paddr;
};

struct hyperv_percpu_data {
        int        pd_idtvec;
};

static struct hyperv_hypercall_ctx hyperv_hypercall_ctx;

static char hyperv_hypercall_page[PAGE_SIZE]
    __section(".text") __aligned(PAGE_SIZE) = { 0xcc };

static u_int        hyperv_get_timecount(struct timecounter *);

static u_int hyperv_features;                /* CPUID_HV_MSR_ */
static u_int hyperv_recommends;

static u_int hyperv_pm_features;
static u_int hyperv_features3;

static char hyperv_version_str[64];
static char hyperv_features_str[256];
static char hyperv_pm_features_str[256];
static char hyperv_features3_str[256];

uint32_t hyperv_vcpuid[MAXCPUS];

static struct timecounter hyperv_timecounter = {
        .tc_get_timecount = hyperv_get_timecount,
        .tc_counter_mask = 0xffffffff,
        .tc_frequency = HYPERV_TIMER_FREQ,
        .tc_name = "Hyper-V",
        .tc_quality = 2000,
};

static void        hyperv_proc_dummy(void *, struct cpu_info *);

struct hyperv_proc {
        hyperv_proc_t        func;
        void                *arg;
};

static struct hyperv_proc hyperv_event_proc = {
        .func = hyperv_proc_dummy,
};

static struct hyperv_proc hyperv_message_proc = {
        .func = hyperv_proc_dummy,
};

static int        hyperv_match(device_t, cfdata_t, void *);
static void        hyperv_attach(device_t, device_t, void *);
static int        hyperv_detach(device_t, int);

CFATTACH_DECL_NEW(hyperv, sizeof(struct hyperv_softc),
    hyperv_match, hyperv_attach, hyperv_detach, NULL);

static void        hyperv_hypercall_memfree(void);
static bool        hyperv_init_hypercall(void);
static int        hyperv_sysctl_setup_root(struct hyperv_softc *);

static u_int
hyperv_get_timecount(struct timecounter *tc)
{

        return (u_int)rdmsr(MSR_HV_TIME_REF_COUNT);
}

static uint64_t
hyperv_tc64_rdmsr(void)
{

        return rdmsr(MSR_HV_TIME_REF_COUNT);
}

#ifdef __amd64__
/*
 * Reference TSC
 */
struct hyperv_ref_tsc {
        struct hyperv_reftsc        *tsc_ref;
        paddr_t                        tsc_paddr;
};

static struct hyperv_ref_tsc hyperv_ref_tsc;

static u_int        hyperv_tsc_timecount(struct timecounter *);

static struct timecounter hyperv_tsc_timecounter = {
        .tc_get_timecount = hyperv_tsc_timecount,
        .tc_counter_mask = 0xffffffff,
        .tc_frequency = HYPERV_TIMER_FREQ,
        .tc_name = "Hyper-V-TSC",
        .tc_quality = 3000,
};

static __inline u_int
atomic_load_acq_int(volatile u_int *p)
{
        u_int r = *p;
        __insn_barrier();
        return r;
}

static uint64_t
hyperv_tc64_tsc(void)
{
        struct hyperv_reftsc *tsc_ref = hyperv_ref_tsc.tsc_ref;
        uint32_t seq;

        while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) {
                uint64_t disc, ret, tsc;
                uint64_t scale = tsc_ref->tsc_scale;
                int64_t ofs = tsc_ref->tsc_ofs;

                tsc = cpu_counter();

                /* ret = ((tsc * scale) >> 64) + ofs */
                __asm__ __volatile__ ("mulq %3" :
                    "=d" (ret), "=a" (disc) :
                    "a" (tsc), "r" (scale));
                ret += ofs;

                __insn_barrier();
                if (tsc_ref->tsc_seq == seq)
                        return ret;

                /* Sequence changed; re-sync. */
        }
        /* Fallback to the generic timecounter, i.e. rdmsr. */
        return rdmsr(MSR_HV_TIME_REF_COUNT);
}

static u_int
hyperv_tsc_timecount(struct timecounter *tc __unused)
{

        return hyperv_tc64_tsc();
}

static bool
hyperv_tsc_tcinit(void)
{
        uint64_t orig_msr, msr;

        if ((hyperv_features &
             (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC)) !=
            (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC) ||
            (cpu_feature[0] & CPUID_SSE2) == 0)        /* SSE2 for mfence/lfence */
                return false;

        hyperv_ref_tsc.tsc_ref = (void *)uvm_km_alloc(kernel_map,
            PAGE_SIZE, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO);
        if (hyperv_ref_tsc.tsc_ref == NULL) {
                aprint_error("Hyper-V: reference TSC page allocation failed\n");
                return false;
        }

        if (!pmap_extract(pmap_kernel(), (vaddr_t)hyperv_ref_tsc.tsc_ref,
            &hyperv_ref_tsc.tsc_paddr)) {
                aprint_error("Hyper-V: reference TSC page setup failed\n");
                uvm_km_free(kernel_map, (vaddr_t)hyperv_ref_tsc.tsc_ref,
                    PAGE_SIZE, UVM_KMF_WIRED);
                hyperv_ref_tsc.tsc_ref = NULL;
                return false;
        }

        orig_msr = rdmsr(MSR_HV_REFERENCE_TSC);
        msr = MSR_HV_REFTSC_ENABLE | (orig_msr & MSR_HV_REFTSC_RSVD_MASK) |
            (atop(hyperv_ref_tsc.tsc_paddr) << MSR_HV_REFTSC_PGSHIFT);
        wrmsr(MSR_HV_REFERENCE_TSC, msr);

        /* Install 64 bits timecounter method for other modules to use. */
        hyperv_tc64 = hyperv_tc64_tsc;

        /* Register "enlightened" timecounter. */
        tc_init(&hyperv_tsc_timecounter);

        return true;
}
#endif /* __amd64__ */

static void
delay_tc(unsigned int n)
{
        struct timecounter *tc;
        uint64_t end, now;
        u_int last, u;

        tc = timecounter;
        if (tc->tc_quality <= 0) {
                x86_delay(n);
                return;
        }

        now = 0;
        end = tc->tc_frequency * n / 1000000;
        last = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
        do {
                x86_pause();
                u = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
                if (u < last)
                        now += tc->tc_counter_mask - last + u + 1;
                else
                        now += u - last;
                last = u;
        } while (now < end);
}

static void
delay_msr(unsigned int n)
{
        uint64_t end, now;
        u_int last, u;

        now = 0;
        end = HYPERV_TIMER_FREQ * n / 1000000ULL;
        last = (u_int)rdmsr(MSR_HV_TIME_REF_COUNT);
        do {
                x86_pause();
                u = (u_int)rdmsr(MSR_HV_TIME_REF_COUNT);
                if (u < last)
                        now += 0xffffffff - last + u + 1;
                else
                        now += u - last;
                last = u;
        } while (now < end);
}

static __inline uint64_t
hyperv_hypercall_md(volatile void *hc_addr, uint64_t in_val, uint64_t in_paddr,
    uint64_t out_paddr)
{
        uint64_t status;

#ifdef __amd64__
        __asm__ __volatile__ ("mov %0, %%r8" : : "r" (out_paddr): "r8");
        __asm__ __volatile__ ("call *%3" : "=a" (status) : "c" (in_val),
            "d" (in_paddr), "m" (hc_addr));
#else
        uint32_t in_val_hi = in_val >> 32;
        uint32_t in_val_lo = in_val & 0xFFFFFFFF;
        uint32_t status_hi, status_lo;
        uint32_t in_paddr_hi = in_paddr >> 32;
        uint32_t in_paddr_lo = in_paddr & 0xFFFFFFFF;
        uint32_t out_paddr_hi = out_paddr >> 32;
        uint32_t out_paddr_lo = out_paddr & 0xFFFFFFFF;

        __asm__ __volatile__ ("call *%8" : "=d" (status_hi), "=a" (status_lo) :
            "d" (in_val_hi), "a" (in_val_lo),
            "b" (in_paddr_hi), "c" (in_paddr_lo),
            "D" (out_paddr_hi), "S" (out_paddr_lo),
            "m" (hc_addr));
        status = status_lo | ((uint64_t)status_hi << 32);
#endif

        return status;
}

uint64_t
hyperv_hypercall(uint64_t control, paddr_t in_paddr, paddr_t out_paddr)
{

        if (hyperv_hypercall_ctx.hc_addr == NULL)
                return ~HYPERCALL_STATUS_SUCCESS;

        return hyperv_hypercall_md(hyperv_hypercall_ctx.hc_addr, control,
            in_paddr, out_paddr);
}

static bool
hyperv_probe(u_int *maxleaf, u_int *features, u_int *pm_features,
    u_int *features3)
{
        u_int regs[4];

        if (vm_guest != VM_GUEST_HV)
                return false;

        x86_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
        *maxleaf = regs[0];
        if (*maxleaf < CPUID_LEAF_HV_LIMITS)
                return false;

        x86_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
        if (regs[0] != CPUID_HV_IFACE_HYPERV)
                return false;

        x86_cpuid(CPUID_LEAF_HV_FEATURES, regs);
        if (!(regs[0] & CPUID_HV_MSR_HYPERCALL)) {
                /*
                 * Hyper-V w/o Hypercall is impossible; someone
                 * is faking Hyper-V.
                 */
                return false;
        }

        *features = regs[0];
        *pm_features = regs[2];
        *features3 = regs[3];

        return true;
}

static bool
hyperv_identify(void)
{
        char buf[256];
        u_int regs[4];
        u_int maxleaf;

        if (!hyperv_probe(&maxleaf, &hyperv_features, &hyperv_pm_features,
            &hyperv_features3))
                return false;

        x86_cpuid(CPUID_LEAF_HV_IDENTITY, regs);
        hyperv_ver_major = regs[1] >> 16;
        snprintf(hyperv_version_str, sizeof(hyperv_version_str),
            "%d.%d.%d [SP%d]",
            hyperv_ver_major, regs[1] & 0xffff, regs[0], regs[2]);
        aprint_verbose("Hyper-V Version: %s\n", hyperv_version_str);

        snprintb(hyperv_features_str, sizeof(hyperv_features_str),
            "\020"
            "\001VPRUNTIME"        /* MSR_HV_VP_RUNTIME */
            "\002TMREFCNT"        /* MSR_HV_TIME_REF_COUNT */
            "\003SYNIC"                /* MSRs for SynIC */
            "\004SYNTM"                /* MSRs for SynTimer */
            "\005APIC"                /* MSR_HV_{EOI,ICR,TPR} */
            "\006HYPERCALL"        /* MSR_HV_{GUEST_OS_ID,HYPERCALL} */
            "\007VPINDEX"        /* MSR_HV_VP_INDEX */
            "\010RESET"                /* MSR_HV_RESET */
            "\011STATS"                /* MSR_HV_STATS_ */
            "\012REFTSC"        /* MSR_HV_REFERENCE_TSC */
            "\013IDLE"                /* MSR_HV_GUEST_IDLE */
            "\014TMFREQ"        /* MSR_HV_{TSC,APIC}_FREQUENCY */
            "\015DEBUG",        /* MSR_HV_SYNTH_DEBUG_ */
            hyperv_features);
        aprint_verbose("  Features=%s\n", hyperv_features_str);
        snprintb(buf, sizeof(buf),
            "\020"
            "\005C3HPET",        /* HPET is required for C3 state */
            (hyperv_pm_features & ~CPUPM_HV_CSTATE_MASK));
        snprintf(hyperv_pm_features_str, sizeof(hyperv_pm_features_str),
            "%s [C%u]", buf, CPUPM_HV_CSTATE(hyperv_pm_features));
        aprint_verbose("  PM Features=%s\n", hyperv_pm_features_str);
        snprintb(hyperv_features3_str, sizeof(hyperv_features3_str),
            "\020"
            "\001MWAIT"                /* MWAIT */
            "\002DEBUG"                /* guest debug support */
            "\003PERFMON"        /* performance monitor */
            "\004PCPUDPE"        /* physical CPU dynamic partition event */
            "\005XMMHC"                /* hypercall input through XMM regs */
            "\006IDLE"                /* guest idle support */
            "\007SLEEP"                /* hypervisor sleep support */
            "\010NUMA"                /* NUMA distance query support */
            "\011TMFREQ"        /* timer frequency query (TSC, LAPIC) */
            "\012SYNCMC"        /* inject synthetic machine checks */
            "\013CRASH"                /* MSRs for guest crash */
            "\014DEBUGMSR"        /* MSRs for guest debug */
            "\015NPIEP"                /* NPIEP */
            "\016HVDIS",        /* disabling hypervisor */
            hyperv_features3);
        aprint_verbose("  Features3=%s\n", hyperv_features3_str);

        x86_cpuid(CPUID_LEAF_HV_RECOMMENDS, regs);
        hyperv_recommends = regs[0];
        aprint_verbose("  Recommends: %08x %08x\n", regs[0], regs[1]);

        x86_cpuid(CPUID_LEAF_HV_LIMITS, regs);
        aprint_verbose("  Limits: Vcpu:%d Lcpu:%d Int:%d\n",
            regs[0], regs[1], regs[2]);

        if (maxleaf >= CPUID_LEAF_HV_HWFEATURES) {
                x86_cpuid(CPUID_LEAF_HV_HWFEATURES, regs);
                aprint_verbose("  HW Features: %08x, AMD: %08x\n",
                    regs[0], regs[3]);
        }

        return true;
}

void
hyperv_early_init(void)
{
        u_int features, pm_features, features3;
        u_int maxleaf;
        int i;

        if (!hyperv_probe(&maxleaf, &features, &pm_features, &features3))
                return;

        if (features & CPUID_HV_MSR_TIME_REFCNT)
                x86_delay = delay_func = delay_msr;

        if (features & CPUID_HV_MSR_VP_INDEX) {
                /* Save virtual processor id. */
                hyperv_vcpuid[0] = rdmsr(MSR_HV_VP_INDEX);
        } else {
                /* Set virtual processor id to 0 for compatibility. */
                hyperv_vcpuid[0] = 0;
        }
        for (i = 1; i < MAXCPUS; i++)
                hyperv_vcpuid[i] = hyperv_vcpuid[0];
}

void
hyperv_init_cpu(struct cpu_info *ci)
{
        u_int features, pm_features, features3;
        u_int maxleaf;

        if (!hyperv_probe(&maxleaf, &features, &pm_features, &features3))
                return;

        if (features & CPUID_HV_MSR_VP_INDEX)
                hyperv_vcpuid[ci->ci_index] = rdmsr(MSR_HV_VP_INDEX);
}

uint32_t
hyperv_get_vcpuid(cpuid_t cpu)
{

        if (cpu < MAXCPUS)
                return hyperv_vcpuid[cpu];
        return 0;
}

static bool
hyperv_init(void)
{

        if (!hyperv_identify()) {
                /* Not Hyper-V; reset guest id to the generic one. */
                if (vm_guest == VM_GUEST_HV)
                        vm_guest = VM_GUEST_VM;
                return false;
        }

        /* Set guest id */
        wrmsr(MSR_HV_GUEST_OS_ID, MSR_HV_GUESTID_OSTYPE_NETBSD |
            (uint64_t)__NetBSD_Version__ << MSR_HV_GUESTID_VERSION_SHIFT);

        if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT) {
                /* Register Hyper-V timecounter */
                tc_init(&hyperv_timecounter);

                /*
                 * Install 64 bits timecounter method for other modules to use.
                 */
                hyperv_tc64 = hyperv_tc64_rdmsr;
#ifdef __amd64__
                hyperv_tsc_tcinit();
#endif

                /* delay with timecounter */
                x86_delay = delay_func = delay_tc;
        }

#if NLAPIC > 0
        if ((hyperv_features & CPUID_HV_MSR_TIME_FREQ) &&
            (hyperv_features3 & CPUID3_HV_TIME_FREQ))
                lapic_per_second = rdmsr(MSR_HV_APIC_FREQUENCY);
#endif

        return hyperv_init_hypercall();
}

static bool
hyperv_is_initialized(void)
{
        uint64_t msr;

        if (vm_guest != VM_GUEST_HV)
                return false;
        if (rdmsr_safe(MSR_HV_HYPERCALL, &msr) == EFAULT)
                return false;
        return (msr & MSR_HV_HYPERCALL_ENABLE) ? true : false;
}

static int
hyperv_match(device_t parent, cfdata_t cf, void *aux)
{
        struct cpufeature_attach_args *cfaa = aux;
        struct cpu_info *ci = cfaa->ci;

        if (strcmp(cfaa->name, "vm") != 0)
                return 0;
        if ((ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY)) == 0)
                return 0;
        if (vm_guest != VM_GUEST_HV)
                return 0;

        return 1;
}

static void
hyperv_attach(device_t parent, device_t self, void *aux)
{
        struct hyperv_softc *sc = device_private(self);

        sc->sc_dev = self;

        aprint_naive("\n");
        aprint_normal(": Hyper-V\n");

        if (!hyperv_is_initialized()) {
                if (rdmsr(MSR_HV_GUEST_OS_ID) == 0) {
                        if (!hyperv_init()) {
                                aprint_error_dev(self, "initialize failed\n");
                                return;
                        }
                }
                hyperv_init_hypercall();
        }

        (void) pmf_device_register(self, NULL, NULL);

        (void) hyperv_sysctl_setup_root(sc);
}

static int
hyperv_detach(device_t self, int flags)
{
        struct hyperv_softc *sc = device_private(self);
        uint64_t hc;

        /* Disable Hypercall */
        hc = rdmsr(MSR_HV_HYPERCALL);
        wrmsr(MSR_HV_HYPERCALL, hc & MSR_HV_HYPERCALL_RSVD_MASK);
        hyperv_hypercall_memfree();

        if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT)
                tc_detach(&hyperv_timecounter);

        wrmsr(MSR_HV_GUEST_OS_ID, 0);

        pmf_device_deregister(self);

        if (sc->sc_log != NULL) {
                sysctl_teardown(&sc->sc_log);
                sc->sc_log = NULL;
        }

        return 0;
}

void
hyperv_intr(void)
{
        struct cpu_info *ci = curcpu();

        (*hyperv_event_proc.func)(hyperv_event_proc.arg, ci);
        (*hyperv_message_proc.func)(hyperv_message_proc.arg, ci);
}

void hyperv_hypercall_intr(struct trapframe *);
void
hyperv_hypercall_intr(struct trapframe *frame __unused)
{
        struct cpu_info *ci = curcpu();

        ci->ci_isources[LIR_HV]->is_evcnt.ev_count++;

        hyperv_intr();
}

static void
hyperv_proc_dummy(void *arg __unused, struct cpu_info *ci __unused)
{
}

void
hyperv_set_event_proc(void (*func)(void *, struct cpu_info *), void *arg)
{

        hyperv_event_proc.func = func;
        hyperv_event_proc.arg = arg;
}

void
hyperv_set_message_proc(void (*func)(void *, struct cpu_info *), void *arg)
{

        hyperv_message_proc.func = func;
        hyperv_message_proc.arg = arg;
}

static void
hyperv_hypercall_memfree(void)
{

        hyperv_hypercall_ctx.hc_addr = NULL;
}

static bool
hyperv_init_hypercall(void)
{
        uint64_t hc, hc_orig;

        hyperv_hypercall_ctx.hc_addr = hyperv_hypercall_page;
        hyperv_hypercall_ctx.hc_paddr = vtophys((vaddr_t)hyperv_hypercall_page);
        KASSERT(hyperv_hypercall_ctx.hc_paddr != 0);

        /* Get the 'reserved' bits, which requires preservation. */
        hc_orig = rdmsr(MSR_HV_HYPERCALL);

        /*
         * Setup the Hypercall page.
         *
         * NOTE: 'reserved' bits MUST be preserved.
         */
        hc = (atop(hyperv_hypercall_ctx.hc_paddr) << MSR_HV_HYPERCALL_PGSHIFT) |
            (hc_orig & MSR_HV_HYPERCALL_RSVD_MASK) |
            MSR_HV_HYPERCALL_ENABLE;
        wrmsr(MSR_HV_HYPERCALL, hc);

        /*
         * Confirm that Hypercall page did get setup.
         */
        hc = rdmsr(MSR_HV_HYPERCALL);
        if (!(hc & MSR_HV_HYPERCALL_ENABLE)) {
                aprint_error("Hyper-V: Hypercall setup failed\n");
                hyperv_hypercall_memfree();
                /* Can't perform any Hyper-V specific actions */
                vm_guest = VM_GUEST_VM;
                return false;
        }

        return true;
}

int
hyperv_hypercall_enabled(void)
{

        return hyperv_is_initialized();
}

int
hyperv_synic_supported(void)
{

        return (hyperv_features & CPUID_HV_MSR_SYNIC) ? 1 : 0;
}

int
hyperv_is_gen1(void)
{

        return !efi_probe();
}

void
hyperv_send_eom(void)
{

        wrmsr(MSR_HV_EOM, 0);
}

void
vmbus_init_interrupts_md(struct vmbus_softc *sc, cpuid_t cpu)
{
        extern void Xintr_hyperv_hypercall(void);
        struct vmbus_percpu_data *pd;
        struct hyperv_percpu_data *hv_pd;
        struct cpu_info *ci;
        struct idt_vec *iv;
        int hyperv_idtvec;
        cpuid_t cpu0;

        cpu0 = cpu_index(&cpu_info_primary);

        if (cpu == cpu0 || idt_vec_is_pcpu()) {
                /*
                 * All Hyper-V ISR required resources are setup, now let's find a
                 * free IDT vector for Hyper-V ISR and set it up.
                 */
                ci = cpu_lookup(cpu);
                iv = &ci->ci_idtvec;
                mutex_enter(&cpu_lock);
                hyperv_idtvec = idt_vec_alloc(iv,
                    APIC_LEVEL(NIPL), IDT_INTR_HIGH);
                mutex_exit(&cpu_lock);
                KASSERT(hyperv_idtvec > 0);
                idt_vec_set(iv, hyperv_idtvec, Xintr_hyperv_hypercall);
        } else {
                pd = &sc->sc_percpu[cpu0];
                hv_pd = pd->md_cookie;
                KASSERT(hv_pd != NULL && hv_pd->pd_idtvec > 0);
                hyperv_idtvec = hv_pd->pd_idtvec;
        }

        hv_pd = kmem_zalloc(sizeof(*hv_pd), KM_SLEEP);
        hv_pd->pd_idtvec = hyperv_idtvec;
        pd = &sc->sc_percpu[cpu];
        pd->md_cookie = (void *)hv_pd;
}

void
vmbus_deinit_interrupts_md(struct vmbus_softc *sc, cpuid_t cpu)
{
        struct vmbus_percpu_data *pd;
        struct hyperv_percpu_data *hv_pd;
        struct cpu_info *ci;
        struct idt_vec *iv;

        pd = &sc->sc_percpu[cpu];
        hv_pd = pd->md_cookie;
        KASSERT(hv_pd != NULL);

        if (cpu == cpu_index(&cpu_info_primary) ||
            idt_vec_is_pcpu()) {
                ci = cpu_lookup(cpu);
                iv = &ci->ci_idtvec;

                if (hv_pd->pd_idtvec > 0) {
                        idt_vec_free(iv, hv_pd->pd_idtvec);
                }
        }

        pd->md_cookie = NULL;
        kmem_free(hv_pd, sizeof(*hv_pd));
}

void
vmbus_init_synic_md(struct vmbus_softc *sc, cpuid_t cpu)
{
        extern void Xintr_hyperv_hypercall(void);
        struct vmbus_percpu_data *pd;
        struct hyperv_percpu_data *hv_pd;
        uint64_t val, orig;
        uint32_t sint;
        int hyperv_idtvec;

        pd = &sc->sc_percpu[cpu];
        hv_pd = pd->md_cookie;
        hyperv_idtvec = hv_pd->pd_idtvec;

        /*
         * Setup the SynIC message.
         */
        orig = rdmsr(MSR_HV_SIMP);
        val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) |
            (atop(hyperv_dma_get_paddr(&pd->simp_dma)) << MSR_HV_SIMP_PGSHIFT);
        wrmsr(MSR_HV_SIMP, val);

        /*
         * Setup the SynIC event flags.
         */
        orig = rdmsr(MSR_HV_SIEFP);
        val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) |
            (atop(hyperv_dma_get_paddr(&pd->siep_dma)) << MSR_HV_SIEFP_PGSHIFT);
        wrmsr(MSR_HV_SIEFP, val);

        /*
         * Configure and unmask SINT for message and event flags.
         */
        sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
        orig = rdmsr(sint);
        val = hyperv_idtvec | MSR_HV_SINT_AUTOEOI |
            (orig & MSR_HV_SINT_RSVD_MASK);
        wrmsr(sint, val);

        /*
         * Configure and unmask SINT for timer.
         */
        sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
        orig = rdmsr(sint);
        val = hyperv_idtvec | MSR_HV_SINT_AUTOEOI |
            (orig & MSR_HV_SINT_RSVD_MASK);
        wrmsr(sint, val);

        /*
         * All done; enable SynIC.
         */
        orig = rdmsr(MSR_HV_SCONTROL);
        val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK);
        wrmsr(MSR_HV_SCONTROL, val);
}

void
vmbus_deinit_synic_md(struct vmbus_softc *sc, cpuid_t cpu)
{
        uint64_t orig;
        uint32_t sint;

        /*
         * Disable SynIC.
         */
        orig = rdmsr(MSR_HV_SCONTROL);
        wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK));

        /*
         * Mask message and event flags SINT.
         */
        sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE;
        orig = rdmsr(sint);
        wrmsr(sint, orig | MSR_HV_SINT_MASKED);

        /*
         * Mask timer SINT.
         */
        sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER;
        orig = rdmsr(sint);
        wrmsr(sint, orig | MSR_HV_SINT_MASKED);

        /*
         * Teardown SynIC message.
         */
        orig = rdmsr(MSR_HV_SIMP);
        wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK));

        /*
         * Teardown SynIC event flags.
         */
        orig = rdmsr(MSR_HV_SIEFP);
        wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK));
}

static int
hyperv_sysctl_setup(struct hyperv_softc *sc,
    const struct sysctlnode *hyperv_node)
{
        int error;

        error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
            CTLFLAG_READONLY, CTLTYPE_STRING, "version", NULL,
            NULL, 0, hyperv_version_str,
            0, CTL_CREATE, CTL_EOL);
        if (error)
                return error;

        error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
            CTLFLAG_READONLY, CTLTYPE_STRING, "features", NULL,
            NULL, 0, hyperv_features_str,
            0, CTL_CREATE, CTL_EOL);
        if (error)
                return error;

        error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
            CTLFLAG_READONLY, CTLTYPE_STRING, "pm_features", NULL,
            NULL, 0, hyperv_pm_features_str,
            0, CTL_CREATE, CTL_EOL);
        if (error)
                return error;

        error = sysctl_createv(&sc->sc_log, 0, &hyperv_node, NULL,
            CTLFLAG_READONLY, CTLTYPE_STRING, "features3", NULL,
            NULL, 0, hyperv_features3_str,
            0, CTL_CREATE, CTL_EOL);
        if (error)
                return error;

        return 0;
}

static int
hyperv_sysctl_setup_root(struct hyperv_softc *sc)
{
        const struct sysctlnode *machdep_node, *hyperv_node;
        int error;

        error = sysctl_createv(&sc->sc_log, 0, NULL, &machdep_node,
            CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL,
            NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL);
        if (error)
                goto fail;

        error = sysctl_createv(&sc->sc_log, 0, &machdep_node, &hyperv_node,
            CTLFLAG_PERMANENT, CTLTYPE_NODE, "hyperv", NULL,
            NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
        if (error)
                goto fail;

        error = hyperv_sysctl_setup(sc, hyperv_node);
        if (error)
                goto fail;

        return 0;

fail:
        sysctl_teardown(&sc->sc_log);
        sc->sc_log = NULL;
        return error;
}

MODULE(MODULE_CLASS_DRIVER, hyperv, NULL);

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
hyperv_modcmd(modcmd_t cmd, void *aux)
{
        int rv = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                rv = config_init_component(cfdriver_ioconf_hyperv,
                    cfattach_ioconf_hyperv, cfdata_ioconf_hyperv);
#endif
                hyperv_init();
                break;

        case MODULE_CMD_FINI:
#ifdef _MODULE
                rv = config_fini_component(cfdriver_ioconf_hyperv,
                    cfattach_ioconf_hyperv, cfdata_ioconf_hyperv);
#endif
                break;

        default:
                rv = ENOTTY;
                break;
        }

        return rv;
}

#if NVMBUS > 0
/*
 * genfb at vmbus
 */
static struct genfb_pmf_callback pmf_cb;
static struct genfb_mode_callback mode_cb;

static bool
x86_genfb_setmode(struct genfb_softc *sc, int newmode)
{
        return true;
}

static bool
x86_genfb_suspend(device_t dev, const pmf_qual_t *qual)
{
        return true;
}

static bool
x86_genfb_resume(device_t dev, const pmf_qual_t *qual)
{
#if NGENFB > 0
        struct genfb_vmbus_softc *sc = device_private(dev);

        genfb_restore_palette(&sc->sc_gen);
#endif
        return true;
}

static void
populate_fbinfo(device_t dev, prop_dictionary_t dict)
{
#if NWSDISPLAY > 0 && NGENFB > 0
        extern struct vcons_screen x86_genfb_console_screen;
        struct rasops_info *ri = &x86_genfb_console_screen.scr_ri;
#endif
        const void *fbptr = lookup_bootinfo(BTINFO_FRAMEBUFFER);
        struct btinfo_framebuffer fbinfo;

        if (fbptr == NULL)
                return;

        memcpy(&fbinfo, fbptr, sizeof(fbinfo));

        if (fbinfo.physaddr != 0) {
                prop_dictionary_set_uint32(dict, "width", fbinfo.width);
                prop_dictionary_set_uint32(dict, "height", fbinfo.height);
                prop_dictionary_set_uint8(dict, "depth", fbinfo.depth);
                prop_dictionary_set_uint16(dict, "linebytes", fbinfo.stride);

                prop_dictionary_set_uint64(dict, "address", fbinfo.physaddr);
#if NWSDISPLAY > 0 && NGENFB > 0
                if (ri->ri_bits != NULL) {
                        prop_dictionary_set_uint64(dict, "virtual_address",
                            ri->ri_hwbits != NULL ?
                            (vaddr_t)ri->ri_hworigbits :
                            (vaddr_t)ri->ri_origbits);
                }
#endif
        }
#if notyet
        prop_dictionary_set_bool(dict, "splash",
            (fbinfo.flags & BI_FB_SPLASH) != 0);
#endif
#if 0
        if (fbinfo.depth == 8) {
                gfb_cb.gcc_cookie = NULL;
                gfb_cb.gcc_set_mapreg = x86_genfb_set_mapreg;
                prop_dictionary_set_uint64(dict, "cmap_callback",
                    (uint64_t)(uintptr_t)&gfb_cb);
        }
#endif
        if (fbinfo.physaddr != 0) {
                mode_cb.gmc_setmode = x86_genfb_setmode;
                prop_dictionary_set_uint64(dict, "mode_callback",
                    (uint64_t)(uintptr_t)&mode_cb);
        }

#if NWSDISPLAY > 0 && NGENFB > 0
        if (device_is_a(dev, "genfb")) {
                prop_dictionary_set_bool(dict, "enable_shadowfb",
                    ri->ri_hwbits != NULL);

                x86_genfb_set_console_dev(dev);
#ifdef DDB
                db_trap_callback = x86_genfb_ddb_trap_callback;
#endif
        }
#endif
}
#endif

device_t
device_hyperv_register(device_t dev, void *aux)
{
#if NVMBUS > 0
        device_t parent = device_parent(dev);

        if (parent && device_is_a(parent, "vmbus") && !x86_found_console) {
                struct vmbus_attach_args *aa = aux;

                if (memcmp(aa->aa_type, &hyperv_guid_video,
                    sizeof(*aa->aa_type)) == 0) {
                        prop_dictionary_t dict = device_properties(dev);

                        /* Initialize genfb for serial console */
                        x86_genfb_init();

                        /*
                         * framebuffer drivers other than genfb can work
                         * without the address property
                         */
                        populate_fbinfo(dev, dict);

#if 1 && NWSDISPLAY > 0 && NGENFB > 0
                        /* XXX */
                        if (device_is_a(dev, "genfb")) {
                                prop_dictionary_set_bool(dict, "is_console",
                                    genfb_is_console());
                        } else
#endif
                        prop_dictionary_set_bool(dict, "is_console", true);

                        prop_dictionary_set_bool(dict, "clear-screen", false);
#if NWSDISPLAY > 0 && NGENFB > 0
                        extern struct vcons_screen x86_genfb_console_screen;
                        prop_dictionary_set_uint16(dict, "cursor-row",
                            x86_genfb_console_screen.scr_ri.ri_crow);
#endif
                        pmf_cb.gpc_suspend = x86_genfb_suspend;
                        pmf_cb.gpc_resume = x86_genfb_resume;
                        prop_dictionary_set_uint64(dict, "pmf_callback",
                            (uint64_t)(uintptr_t)&pmf_cb);
                        x86_found_console = true;
                        return NULL;
                }
        }
#endif
        return NULL;
}



































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
/*        $NetBSD: in6_var.h,v 1.104 2020/06/16 17:12:18 maxv Exp $        */
/*        $KAME: in6_var.h,v 1.81 2002/06/08 11:16:51 itojun Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1985, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)in_var.h        8.1 (Berkeley) 6/10/93
 */

#ifndef _NETINET6_IN6_VAR_H_
#define _NETINET6_IN6_VAR_H_

#include <sys/callout.h>
#include <sys/ioccom.h>

/*
 * Interface address, Internet version.  One of these structures
 * is allocated for each interface with an Internet address.
 * The ifaddr structure contains the protocol-independent part
 * of the structure and is assumed to be first.
 */

/*
 * pltime/vltime are just for future reference (required to implements 2
 * hour rule for hosts).  they should never be modified by nd6_timeout or
 * anywhere else.
 *        userland -> kernel: accept pltime/vltime
 *        kernel -> userland: throw up everything
 *        in kernel: modify preferred/expire only
 */
struct in6_addrlifetime {
        time_t ia6t_expire;        /* valid lifetime expiration time */
        time_t ia6t_preferred;        /* preferred lifetime expiration time */
        u_int32_t ia6t_vltime;        /* valid lifetime */
        u_int32_t ia6t_pltime;        /* prefix lifetime */
};

struct lltable;
struct nd_kifinfo;
struct in6_ifextra {
        struct in6_ifstat *in6_ifstat;
        struct icmp6_ifstat *icmp6_ifstat;
        struct nd_kifinfo *nd_ifinfo;
        struct scope6_id *scope6_id;
        struct lltable *lltable;
};

LIST_HEAD(in6_multihead, in6_multi);
struct        in6_ifaddr {
        struct        ifaddr ia_ifa;                /* protocol-independent info */
#define        ia_ifp                ia_ifa.ifa_ifp
#define ia_flags        ia_ifa.ifa_flags
        struct        sockaddr_in6 ia_addr;        /* interface address */
        struct        sockaddr_in6 ia_net;        /* network number of interface */
        struct        sockaddr_in6 ia_dstaddr; /* space for destination addr */
        struct        sockaddr_in6 ia_prefixmask; /* prefix mask */
        u_int32_t ia_plen;                /* prefix length */
        /* DEPRECATED. Keep it to avoid breaking kvm(3) users */
        struct        in6_ifaddr *ia_next;        /* next in6 list of IP6 addresses */
        /* DEPRECATED. Keep it to avoid breaking kvm(3) users */
        struct        in6_multihead _ia6_multiaddrs;
                                        /* list of multicast addresses */
        int        ia6_flags;

        struct in6_addrlifetime ia6_lifetime;
        time_t        ia6_createtime; /* the creation time of this address, which is
                                 * currently used for temporary addresses only.
                                 */
        time_t        ia6_updatetime;

        /* multicast addresses joined from the kernel */
        LIST_HEAD(, in6_multi_mship) ia6_memberships;

#ifdef _KERNEL
        struct pslist_entry        ia6_pslist_entry;
#endif
};

#ifdef _KERNEL
static __inline void
ia6_acquire(struct in6_ifaddr *ia, struct psref *psref)
{

        KASSERT(ia != NULL);
        ifa_acquire(&ia->ia_ifa, psref);
}

static __inline void
ia6_release(struct in6_ifaddr *ia, struct psref *psref)
{

        if (ia == NULL)
                return;
        ifa_release(&ia->ia_ifa, psref);
}
#endif

/* control structure to manage address selection policy */
struct in6_addrpolicy {
        struct sockaddr_in6 addr; /* prefix address */
        struct sockaddr_in6 addrmask; /* prefix mask */
        int preced;                /* precedence */
        int label;                /* matching label */
        u_quad_t use;                /* statistics */
};

/*
 * IPv6 interface statistics, as defined in RFC2465 Ipv6IfStatsEntry (p12).
 */
struct in6_ifstat {
        u_quad_t ifs6_in_receive;        /* # of total input datagram */
        u_quad_t ifs6_in_hdrerr;        /* # of datagrams with invalid hdr */
        u_quad_t ifs6_in_toobig;        /* # of datagrams exceeded MTU */
        u_quad_t ifs6_in_noroute;        /* # of datagrams with no route */
        u_quad_t ifs6_in_addrerr;        /* # of datagrams with invalid dst */
        u_quad_t ifs6_in_protounknown;        /* # of datagrams with unknown proto */
                                        /* NOTE: increment on final dst if */
        u_quad_t ifs6_in_truncated;        /* # of truncated datagrams */
        u_quad_t ifs6_in_discard;        /* # of discarded datagrams */
                                        /* NOTE: fragment timeout is not here */
        u_quad_t ifs6_in_deliver;        /* # of datagrams delivered to ULP */
                                        /* NOTE: increment on final dst if */
        u_quad_t ifs6_out_forward;        /* # of datagrams forwarded */
                                        /* NOTE: increment on outgoing if */
        u_quad_t ifs6_out_request;        /* # of outgoing datagrams from ULP */
                                        /* NOTE: does not include forwrads */
        u_quad_t ifs6_out_discard;        /* # of discarded datagrams */
        u_quad_t ifs6_out_fragok;        /* # of datagrams fragmented */
        u_quad_t ifs6_out_fragfail;        /* # of datagrams failed on fragment */
        u_quad_t ifs6_out_fragcreat;        /* # of fragment datagrams */
                                        /* NOTE: this is # after fragment */
        u_quad_t ifs6_reass_reqd;        /* # of incoming fragmented packets */
                                        /* NOTE: increment on final dst if */
        u_quad_t ifs6_reass_ok;                /* # of reassembled packets */
                                        /* NOTE: this is # after reass */
                                        /* NOTE: increment on final dst if */
        u_quad_t ifs6_reass_fail;        /* # of reass failures */
                                        /* NOTE: may not be packet count */
                                        /* NOTE: increment on final dst if */
        u_quad_t ifs6_in_mcast;                /* # of inbound multicast datagrams */
        u_quad_t ifs6_out_mcast;        /* # of outbound multicast datagrams */
};

/*
 * ICMPv6 interface statistics, as defined in RFC2466 Ipv6IfIcmpEntry.
 * XXX: I'm not sure if this file is the right place for this structure...
 */
struct icmp6_ifstat {
        /*
         * Input statistics
         */
        /* ipv6IfIcmpInMsgs, total # of input messages */
        u_quad_t ifs6_in_msg;
        /* ipv6IfIcmpInErrors, # of input error messages */
        u_quad_t ifs6_in_error;
        /* ipv6IfIcmpInDestUnreachs, # of input dest unreach errors */
        u_quad_t ifs6_in_dstunreach;
        /* ipv6IfIcmpInAdminProhibs, # of input administratively prohibited errs */
        u_quad_t ifs6_in_adminprohib;
        /* ipv6IfIcmpInTimeExcds, # of input time exceeded errors */
        u_quad_t ifs6_in_timeexceed;
        /* ipv6IfIcmpInParmProblems, # of input parameter problem errors */
        u_quad_t ifs6_in_paramprob;
        /* ipv6IfIcmpInPktTooBigs, # of input packet too big errors */
        u_quad_t ifs6_in_pkttoobig;
        /* ipv6IfIcmpInEchos, # of input echo requests */
        u_quad_t ifs6_in_echo;
        /* ipv6IfIcmpInEchoReplies, # of input echo replies */
        u_quad_t ifs6_in_echoreply;
        /* ipv6IfIcmpInRouterSolicits, # of input router solicitations */
        u_quad_t ifs6_in_routersolicit;
        /* ipv6IfIcmpInRouterAdvertisements, # of input router advertisements */
        u_quad_t ifs6_in_routeradvert;
        /* ipv6IfIcmpInNeighborSolicits, # of input neighbor solicitations */
        u_quad_t ifs6_in_neighborsolicit;
        /* ipv6IfIcmpInNeighborAdvertisements, # of input neighbor advertisements */
        u_quad_t ifs6_in_neighboradvert;
        /* ipv6IfIcmpInRedirects, # of input redirects */
        u_quad_t ifs6_in_redirect;
        /* ipv6IfIcmpInGroupMembQueries, # of input MLD queries */
        u_quad_t ifs6_in_mldquery;
        /* ipv6IfIcmpInGroupMembResponses, # of input MLD reports */
        u_quad_t ifs6_in_mldreport;
        /* ipv6IfIcmpInGroupMembReductions, # of input MLD done */
        u_quad_t ifs6_in_mlddone;

        /*
         * Output statistics. We should solve unresolved routing problem...
         */
        /* ipv6IfIcmpOutMsgs, total # of output messages */
        u_quad_t ifs6_out_msg;
        /* ipv6IfIcmpOutErrors, # of output error messages */
        u_quad_t ifs6_out_error;
        /* ipv6IfIcmpOutDestUnreachs, # of output dest unreach errors */
        u_quad_t ifs6_out_dstunreach;
        /* ipv6IfIcmpOutAdminProhibs, # of output administratively prohibited errs */
        u_quad_t ifs6_out_adminprohib;
        /* ipv6IfIcmpOutTimeExcds, # of output time exceeded errors */
        u_quad_t ifs6_out_timeexceed;
        /* ipv6IfIcmpOutParmProblems, # of output parameter problem errors */
        u_quad_t ifs6_out_paramprob;
        /* ipv6IfIcmpOutPktTooBigs, # of output packet too big errors */
        u_quad_t ifs6_out_pkttoobig;
        /* ipv6IfIcmpOutEchos, # of output echo requests */
        u_quad_t ifs6_out_echo;
        /* ipv6IfIcmpOutEchoReplies, # of output echo replies */
        u_quad_t ifs6_out_echoreply;
        /* ipv6IfIcmpOutRouterSolicits, # of output router solicitations */
        u_quad_t ifs6_out_routersolicit;
        /* ipv6IfIcmpOutRouterAdvertisements, # of output router advertisements */
        u_quad_t ifs6_out_routeradvert;
        /* ipv6IfIcmpOutNeighborSolicits, # of output neighbor solicitations */
        u_quad_t ifs6_out_neighborsolicit;
        /* ipv6IfIcmpOutNeighborAdvertisements, # of output neighbor advertisements */
        u_quad_t ifs6_out_neighboradvert;
        /* ipv6IfIcmpOutRedirects, # of output redirects */
        u_quad_t ifs6_out_redirect;
        /* ipv6IfIcmpOutGroupMembQueries, # of output MLD queries */
        u_quad_t ifs6_out_mldquery;
        /* ipv6IfIcmpOutGroupMembResponses, # of output MLD reports */
        u_quad_t ifs6_out_mldreport;
        /* ipv6IfIcmpOutGroupMembReductions, # of output MLD done */
        u_quad_t ifs6_out_mlddone;
};

/*
 * If you make changes that change the size of in6_ifreq,
 * make sure you fix compat/netinet6/in6_var.h
 */
struct        in6_ifreq {
        char        ifr_name[IFNAMSIZ];
        union {
                struct        sockaddr_in6 ifru_addr;
                struct        sockaddr_in6 ifru_dstaddr;
                short        ifru_flags;
                int        ifru_flags6;
                int        ifru_metric;
                void *        ifru_data;
                struct in6_addrlifetime ifru_lifetime;
                struct in6_ifstat ifru_stat;
                struct icmp6_ifstat ifru_icmp6stat;
        } ifr_ifru;
};

struct        in6_aliasreq {
        char        ifra_name[IFNAMSIZ];
        struct        sockaddr_in6 ifra_addr;
        struct        sockaddr_in6 ifra_dstaddr;
        struct        sockaddr_in6 ifra_prefixmask;
        int        ifra_flags;
        struct in6_addrlifetime ifra_lifetime;
};

/*
 * Given a pointer to an in6_ifaddr (ifaddr),
 * return a pointer to the addr as a sockaddr_in6
 */
#define IA6_IN6(ia)        (&((ia)->ia_addr.sin6_addr))
#define IA6_DSTIN6(ia)        (&((ia)->ia_dstaddr.sin6_addr))
#define IA6_MASKIN6(ia)        (&((ia)->ia_prefixmask.sin6_addr))
#define IA6_SIN6(ia)        (&((ia)->ia_addr))
#define IA6_DSTSIN6(ia)        (&((ia)->ia_dstaddr))
#define IFA_IN6(x)        (&((struct sockaddr_in6 *)((x)->ifa_addr))->sin6_addr)
#define IFA_DSTIN6(x)        (&((struct sockaddr_in6 *)((x)->ifa_dstaddr))->sin6_addr)

#ifdef _KERNEL
#define IN6_ARE_MASKED_ADDR_EQUAL(d, a, m)        (        \
        (((d)->s6_addr32[0] ^ (a)->s6_addr32[0]) & (m)->s6_addr32[0]) == 0 && \
        (((d)->s6_addr32[1] ^ (a)->s6_addr32[1]) & (m)->s6_addr32[1]) == 0 && \
        (((d)->s6_addr32[2] ^ (a)->s6_addr32[2]) & (m)->s6_addr32[2]) == 0 && \
        (((d)->s6_addr32[3] ^ (a)->s6_addr32[3]) & (m)->s6_addr32[3]) == 0 )
#endif

#define SIOCSIFADDR_IN6                 _IOW('i', 12, struct in6_ifreq)
#define SIOCGIFADDR_IN6                _IOWR('i', 33, struct in6_ifreq)

#ifdef _KERNEL
/*
 * SIOCSxxx ioctls should be unused (see comments in in6.c), but
 * we do not shift numbers for binary compatibility.
 */
#define SIOCSIFDSTADDR_IN6         _IOW('i', 14, struct in6_ifreq)
#define SIOCSIFNETMASK_IN6         _IOW('i', 22, struct in6_ifreq)
#endif

#define SIOCGIFDSTADDR_IN6        _IOWR('i', 34, struct in6_ifreq)
#define SIOCGIFNETMASK_IN6        _IOWR('i', 37, struct in6_ifreq)

#define SIOCDIFADDR_IN6                 _IOW('i', 25, struct in6_ifreq)
/* 26 was OSIOCAIFADDR_IN6 */

/* 70 was OSIOCSIFPHYADDR_IN6 */
#define        SIOCGIFPSRCADDR_IN6        _IOWR('i', 71, struct in6_ifreq)
#define        SIOCGIFPDSTADDR_IN6        _IOWR('i', 72, struct in6_ifreq)

#define SIOCGIFAFLAG_IN6        _IOWR('i', 73, struct in6_ifreq)

/*
 * 74 was SIOCGDRLST_IN6
 * 75 was SIOCGPRLST_IN6
 * 76 was OSIOCGIFINFO_IN6
 * 77 was SIOCSNDFLUSH_IN6
 */
#define SIOCGNBRINFO_IN6        _IOWR('i', 78, struct in6_nbrinfo)
/*
 * 79 was SIOCSPFXFLUSH_IN6
 * 80 was SIOCSRTRFLUSH_IN6
 * 81 was SIOCGIFALIFETIME_IN6
 */
#if 0
/* withdrawn - do not reuse number 82 */
#define SIOCSIFALIFETIME_IN6        _IOWR('i', 82, struct in6_ifreq)
#endif
#define SIOCGIFSTAT_IN6                _IOWR('i', 83, struct in6_ifreq)
#define SIOCGIFSTAT_ICMP6        _IOWR('i', 84, struct in6_ifreq)

/*
 * 85 was SIOCSDEFIFACE_IN6
 * 86 was SIOCGDEFIFACE_IN6
 * 87 was OSIOCSIFINFO_FLAGS
 * 100 was SIOCSIFPREFIX_IN6
 * 101 was SIOCGIFPREFIX_IN6
 * 102 was SIOCDIFPREFIX_IN6
 * 103 was SIOCAIFPREFIX_IN6
 * 104 was SIOCCIFPREFIX_IN6
 * 105 was SIOCSGIFPREFIX_IN6
 */
#define SIOCGIFALIFETIME_IN6        _IOWR('i', 106, struct in6_ifreq)
#define SIOCAIFADDR_IN6                _IOW('i', 107, struct in6_aliasreq)
/* 108 was OSIOCGIFINFO_IN6_90
 * 109 was OSIOCSIFINFO_IN6_90 */
#define SIOCSIFPHYADDR_IN6      _IOW('i', 110, struct in6_aliasreq)
/* 110 - 112 are defined in net/if_pppoe.h */
#define SIOCGIFINFO_IN6                _IOWR('i', 113, struct in6_ndireq)
#define SIOCSIFINFO_IN6                _IOWR('i', 114, struct in6_ndireq)
#define SIOCSIFINFO_FLAGS        _IOWR('i', 115, struct in6_ndireq)

/* XXX: Someone decided to switch to 'u' here for unknown reasons! */
#define SIOCGETSGCNT_IN6        _IOWR('u', 106, \
                                      struct sioc_sg_req6) /* get s,g pkt cnt */
#define SIOCGETMIFCNT_IN6        _IOWR('u', 107, \
                                      struct sioc_mif_req6) /* get pkt cnt per if */
#define SIOCAADDRCTL_POLICY        _IOW('u', 108, struct in6_addrpolicy)
#define SIOCDADDRCTL_POLICY        _IOW('u', 109, struct in6_addrpolicy)

#define IN6_IFF_ANYCAST                0x01        /* anycast address */
#define IN6_IFF_TENTATIVE        0x02        /* tentative address */
#define IN6_IFF_DUPLICATED        0x04        /* DAD detected duplicate */
#define IN6_IFF_DETACHED        0x08        /* may be detached from the link */
#define IN6_IFF_DEPRECATED        0x10        /* deprecated address */
#define IN6_IFF_NODAD                0x20        /* don't perform DAD on this address
                                         * (used only at first SIOC* call)
                                         */
#define IN6_IFF_AUTOCONF        0x40        /* autoconfigurable address. */
#define IN6_IFF_TEMPORARY        0x80        /* temporary (anonymous) address. */

#define IN6_IFFBITS \
    "\020\1ANYCAST\2TENTATIVE\3DUPLICATED\4DETACHED\5DEPRECATED\6NODAD" \
    "\7AUTOCONF\10TEMPORARY"


/* do not input/output */
#define IN6_IFF_NOTREADY (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)

#ifdef _KERNEL
#define IN6_ARE_SCOPE_CMP(a,b) ((a)-(b))
#define IN6_ARE_SCOPE_EQUAL(a,b) ((a)==(b))
#endif

#ifdef _KERNEL

#include <sys/mutex.h>
#include <sys/pserialize.h>

#include <net/pktqueue.h>

extern pktqueue_t *ip6_pktq;

MALLOC_DECLARE(M_IP6OPT);

extern struct pslist_head        in6_ifaddr_list;
extern kmutex_t                        in6_ifaddr_lock;

#define IN6_ADDRLIST_ENTRY_INIT(__ia) \
        PSLIST_ENTRY_INIT((__ia), ia6_pslist_entry)
#define IN6_ADDRLIST_ENTRY_DESTROY(__ia) \
        PSLIST_ENTRY_DESTROY((__ia), ia6_pslist_entry)
#define IN6_ADDRLIST_READER_EMPTY() \
        (PSLIST_READER_FIRST(&in6_ifaddr_list, struct in6_ifaddr, \
                             ia6_pslist_entry) == NULL)
#define IN6_ADDRLIST_READER_FIRST() \
        PSLIST_READER_FIRST(&in6_ifaddr_list, struct in6_ifaddr, \
                            ia6_pslist_entry)
#define IN6_ADDRLIST_READER_NEXT(__ia) \
        PSLIST_READER_NEXT((__ia), struct in6_ifaddr, ia6_pslist_entry)
#define IN6_ADDRLIST_READER_FOREACH(__ia) \
        PSLIST_READER_FOREACH((__ia), &in6_ifaddr_list, \
                              struct in6_ifaddr, ia6_pslist_entry)
#define IN6_ADDRLIST_WRITER_INSERT_HEAD(__ia) \
        PSLIST_WRITER_INSERT_HEAD(&in6_ifaddr_list, (__ia), ia6_pslist_entry)
#define IN6_ADDRLIST_WRITER_REMOVE(__ia) \
        PSLIST_WRITER_REMOVE((__ia), ia6_pslist_entry)
#define IN6_ADDRLIST_WRITER_FOREACH(__ia) \
        PSLIST_WRITER_FOREACH((__ia), &in6_ifaddr_list, struct in6_ifaddr, \
                              ia6_pslist_entry)
#define IN6_ADDRLIST_WRITER_FIRST() \
        PSLIST_WRITER_FIRST(&in6_ifaddr_list, struct in6_ifaddr, \
                            ia6_pslist_entry)
#define IN6_ADDRLIST_WRITER_NEXT(__ia) \
        PSLIST_WRITER_NEXT((__ia), struct in6_ifaddr, ia6_pslist_entry)
#define IN6_ADDRLIST_WRITER_INSERT_AFTER(__ia, __new) \
        PSLIST_WRITER_INSERT_AFTER((__ia), (__new), ia6_pslist_entry)
#define IN6_ADDRLIST_WRITER_EMPTY() \
        (PSLIST_WRITER_FIRST(&in6_ifaddr_list, struct in6_ifaddr, \
            ia6_pslist_entry) == NULL)
#define IN6_ADDRLIST_WRITER_INSERT_TAIL(__new)                                \
        do {                                                                \
                if (IN6_ADDRLIST_WRITER_EMPTY()) {                        \
                        IN6_ADDRLIST_WRITER_INSERT_HEAD((__new));        \
                } else {                                                \
                        struct in6_ifaddr *__ia;                        \
                        IN6_ADDRLIST_WRITER_FOREACH(__ia) {                \
                                if (IN6_ADDRLIST_WRITER_NEXT(__ia) == NULL) { \
                                        IN6_ADDRLIST_WRITER_INSERT_AFTER(__ia,\
                                            (__new));                        \
                                        break;                                \
                                }                                        \
                        }                                                \
                }                                                        \
        } while (0)

#define in6_ifstat_inc(ifp, tag) \
do {                                                                \
        if (ifp)                                                \
                ((struct in6_ifextra *)((ifp)->if_afdata[AF_INET6]))->in6_ifstat->tag++; \
} while (/*CONSTCOND*/ 0)

extern const struct in6_addr zeroin6_addr;
extern const u_char inet6ctlerrmap[];
extern bool in6_present;

/*
 * Macro for finding the internet address structure (in6_ifaddr) corresponding
 * to a given interface (ifnet structure).
 */
static __inline struct in6_ifaddr *
in6_get_ia_from_ifp(struct ifnet *ifp)
{
        struct ifaddr *ifa;

        IFADDR_READER_FOREACH(ifa, ifp) {
                if (ifa->ifa_addr->sa_family == AF_INET6)
                        break;
        }
        return (struct in6_ifaddr *)ifa;
}

static __inline struct in6_ifaddr *
in6_get_ia_from_ifp_psref(struct ifnet *ifp, struct psref *psref)
{
        struct in6_ifaddr *ia;
        int s;

        s = pserialize_read_enter();
        ia = in6_get_ia_from_ifp(ifp);
        if (ia != NULL)
                ia6_acquire(ia, psref);
        pserialize_read_exit(s);

        return ia;
}
#endif /* _KERNEL */

/*
 * Multi-cast membership entry.  One for each group/ifp that a PCB
 * belongs to.
 */
struct in6_multi_mship {
        struct        in6_multi *i6mm_maddr;        /* Multicast address pointer */
        LIST_ENTRY(in6_multi_mship) i6mm_chain;  /* multicast options chain */
};

struct        in6_multi {
        LIST_ENTRY(in6_multi) in6m_entry; /* list glue */
        struct        in6_addr in6m_addr;        /* IP6 multicast address */
        struct        ifnet *in6m_ifp;        /* back pointer to ifnet */
        /* DEPRECATED. Keep it to avoid breaking kvm(3) users */
        struct        in6_ifaddr *_in6m_ia;        /* back pointer to in6_ifaddr */
        u_int        in6m_refcount;                /* # membership claims by sockets */
        u_int        in6m_state;                /* state of the membership */
        int        in6m_timer;                /* delay to send the 1st report */
        struct timeval in6m_timer_expire; /* when the timer expires */
        callout_t in6m_timer_ch;
};
 
#define IN6M_TIMER_UNDEF -1


#ifdef _KERNEL
/* flags to in6_update_ifa */
#define IN6_IFAUPDATE_DADDELAY        0x1 /* first time to configure an address */

#if 0
/*
 * Macros for looking up the in6_multi_mship record for a given IP6 multicast
 * address on a given interface. If no matching record is found, "imm"
 * returns NULL.
 */
static __inline struct in6_multi_mship *
in6_lookup_mship(struct in6_addr *addr, struct ifnet *ifp,
    struct ip6_moptions *imop)
{
        struct in6_multi_mship *imm;

        LIST_FOREACH(imm, &imop->im6o_memberships, i6mm_chain) {
                if (imm->i6mm_maddr->in6m_ifp != ifp)
                            continue;
                if (IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
                    addr))
                        break;
        }
        return imm;
}

#define IN6_LOOKUP_MSHIP(__addr, __ifp, __imop, __imm)                        \
/* struct in6_addr __addr; */                                                \
/* struct ifnet *__ifp; */                                                \
/* struct ip6_moptions *__imop */                                        \
/* struct in6_multi_mship *__imm; */                                        \
do {                                                                        \
        (__imm) = in6_lookup_mship(&(__addr), (__ifp), (__imop));        \
} while (/*CONSTCOND*/ 0)
#endif

void        in6_init(void);

void        in6_multi_lock(int);
void        in6_multi_unlock(void);
bool        in6_multi_locked(int);
struct in6_multi *
        in6_lookup_multi(const struct in6_addr *, const struct ifnet *);
bool        in6_multi_group(const struct in6_addr *, const struct ifnet *);
void        in6_purge_multi(struct ifnet *);
struct        in6_multi *in6_addmulti(struct in6_addr *, struct ifnet *,
        int *, int);
void        in6_delmulti(struct in6_multi *);
void        in6_delmulti_locked(struct in6_multi *);
void        in6_lookup_and_delete_multi(const struct in6_addr *,
            const struct ifnet *);
struct in6_multi_mship *in6_joingroup(struct ifnet *, struct in6_addr *,
        int *, int);
int        in6_leavegroup(struct in6_multi_mship *);
int        in6_mask2len(struct in6_addr *, u_char *);
int        in6_control(struct socket *, u_long, void *, struct ifnet *);
int        in6_update_ifa(struct ifnet *, struct in6_aliasreq *, int);
void        in6_purgeaddr(struct ifaddr *);
void        in6_purgeif(struct ifnet *);
void        *in6_domifattach(struct ifnet *);
void        in6_domifdetach(struct ifnet *, void *);
void        in6_ifremlocal(struct ifaddr *);
void        in6_ifaddlocal(struct ifaddr *);
struct in6_ifaddr *
        in6ifa_ifpforlinklocal(const struct ifnet *, int);
struct in6_ifaddr *
        in6ifa_ifpforlinklocal_psref(const struct ifnet *, int, struct psref *);
struct in6_ifaddr *
        in6ifa_ifpwithaddr(const struct ifnet *, const struct in6_addr *);
struct in6_ifaddr *
        in6ifa_ifpwithaddr_psref(const struct ifnet *, const struct in6_addr *,
            struct psref *);
struct in6_ifaddr *in6ifa_ifwithaddr(const struct in6_addr *, uint32_t);
int        in6_matchlen(struct in6_addr *, struct in6_addr *);
void        in6_prefixlen2mask(struct in6_addr *, int);
void        in6_purge_mcast_references(struct in6_multi *);

int        ip6flow_fastforward(struct mbuf **); /* IPv6 fast forward routine */

int in6_src_ioctl(u_long, void *);
int        in6_is_addr_deprecated(struct sockaddr_in6 *);
struct in6pcb;

#define        LLTABLE6(ifp)        (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->lltable)

void        in6_sysctl_multicast_setup(struct sysctllog **);

#endif /* _KERNEL */

#endif /* !_NETINET6_IN6_VAR_H_ */




































































































































    1 

    1 
































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
/*        $NetBSD: vfs_xattr.c,v 1.36 2021/06/27 09:13:08 christos Exp $        */

/*-
 * Copyright (c) 2005, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * VFS extended attribute support.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_xattr.c,v 1.36 2021/06/27 09:13:08 christos Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/uio.h>
#include <sys/extattr.h>
#include <sys/xattr.h>
#include <sys/sysctl.h>
#include <sys/syscallargs.h>
#include <sys/kauth.h>
#include <sys/ktrace.h>

#include <miscfs/genfs/genfs.h>

static void
ktr_xattr_name(const char *str)
{
        ktrkuser("xattr-name", (void *)__UNCONST(str), strlen(str));
}

static void
ktr_xattr_val(const void *data, size_t cnt)
{
        ktruser("xattr-val", __UNCONST(data), cnt, 0);
}

/*
 * Credential check based on process requesting service, and per-attribute
 * permissions.
 *
 * NOTE: Vnode must be locked.
 */
int
extattr_check_cred(struct vnode *vp, int attrspace, kauth_cred_t cred,
    int access)
{

        if (cred == NOCRED)
                return (0);

        return kauth_authorize_vnode(cred, kauth_extattr_action(access), vp,
            NULL, genfs_can_extattr(vp, cred, access, attrspace));
}

/*
 * Default vfs_extattrctl routine for file systems that do not support
 * it.
 */
/*ARGSUSED*/
int
vfs_stdextattrctl(struct mount *mp, int cmt, struct vnode *vp,
    int attrnamespace, const char *attrname)
{

        if (vp != NULL)
                VOP_UNLOCK(vp);
        return (EOPNOTSUPP);
}

/*
 * Push extended attribute configuration information into the file
 * system.
 *
 * NOTE: Not all file systems that support extended attributes will
 * require the use of this system call.
 */
int
sys_extattrctl(struct lwp *l, const struct sys_extattrctl_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) cmd;
                syscallarg(const char *) filename;
                syscallarg(int) attrnamespace;
                syscallarg(const char *) attrname;
        } */
        struct vnode *path_vp, *file_vp;
        struct pathbuf *file_pb;
        struct nameidata file_nd;
        char attrname[EXTATTR_MAXNAMELEN];
        int error;

        if (SCARG(uap, attrname) != NULL) {
                error = copyinstr(SCARG(uap, attrname), attrname,
                    sizeof(attrname), NULL);
                if (error)
                        return (error);
        }

        error = namei_simple_user(SCARG(uap, path),
                                NSM_FOLLOW_NOEMULROOT, &path_vp);
        if (error) {
                return (error);
        }

        file_vp = NULL;
        if (SCARG(uap, filename) != NULL) {
                error = pathbuf_copyin(SCARG(uap, filename), &file_pb);
                if (error) {
                        vrele(path_vp);
                        return (error);
                }
                NDINIT(&file_nd, LOOKUP, FOLLOW | LOCKLEAF, file_pb);
                error = namei(&file_nd);
                if (error) {
                        pathbuf_destroy(file_pb);
                        vrele(path_vp);
                        return (error);
                }
                file_vp = file_nd.ni_vp;
                pathbuf_destroy(file_pb);
        }

        error = VFS_EXTATTRCTL(path_vp->v_mount, SCARG(uap, cmd), file_vp,
            SCARG(uap, attrnamespace),
            SCARG(uap, attrname) != NULL ? attrname : NULL);

        if (file_vp != NULL)
                vrele(file_vp);
        vrele(path_vp);

        return (error);
}

/*****************************************************************************
 * Internal routines to manipulate file system extended attributes:
 *        - set
 *        - get
 *        - delete
 *        - list
 *****************************************************************************/

/*
 * extattr_set_vp:
 *
 *        Set a named extended attribute on a file or directory.
 */
static int
extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname,
    const void *data, size_t nbytes, struct lwp *l, register_t *retval,
    int flag)
{
        struct uio auio;
        struct iovec aiov;
        ssize_t cnt;
        int error;

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);

        if (flag) {
                size_t attrlen;

                error = VOP_GETEXTATTR(vp, attrnamespace, attrname, NULL,
                                       &attrlen, l->l_cred);

                switch (error) {
                case ENODATA:
                        if (flag & XATTR_REPLACE)
                                goto done;
                        break;
                case 0:
                        if (flag & XATTR_CREATE) {
                                error = EEXIST;
                                goto done;
                        }
                        break;
                default:
                        goto done;
                        break;
                }
        }

        aiov.iov_base = __UNCONST(data);        /* XXXUNCONST kills const */
        aiov.iov_len = nbytes;
        auio.uio_iov = &aiov;
        auio.uio_iovcnt = 1;
        auio.uio_offset = 0;
        if (nbytes > INT_MAX) {
                error = EINVAL;
                goto done;
        }
        auio.uio_resid = nbytes;
        auio.uio_rw = UIO_WRITE;
        KASSERT(l == curlwp);
        auio.uio_vmspace = l->l_proc->p_vmspace;
        cnt = nbytes;

        ktr_xattr_name(attrname);
        ktr_xattr_val(data, nbytes);

        error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, l->l_cred);
        cnt -= auio.uio_resid;
        retval[0] = cnt;

 done:
        VOP_UNLOCK(vp);
        return (error);
}

/*
 * extattr_get_vp:
 *
 *        Get a named extended attribute on a file or directory.
 */
static int
extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname,
    void *data, size_t nbytes, struct lwp *l, register_t *retval)
{
        struct uio auio, *auiop;
        struct iovec aiov;
        ssize_t cnt;
        size_t size, *sizep;
        int error;

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);

        /*
         * Slightly unusual semantics: if the user provides a NULL data
         * pointer, they don't want to receive the data, just the maximum
         * read length.
         */
        auiop = NULL;
        sizep = NULL;
        cnt = 0;
        if (data != NULL) {
                aiov.iov_base = data;
                aiov.iov_len = nbytes;
                auio.uio_iov = &aiov;
                auio.uio_offset = 0;
                if (nbytes > INT_MAX) {
                        error = EINVAL;
                        goto done;
                }
                auio.uio_resid = nbytes;
                auio.uio_rw = UIO_READ;
                KASSERT(l == curlwp);
                auio.uio_vmspace = l->l_proc->p_vmspace;
                auiop = &auio;
                cnt = nbytes;
        } else
                sizep = &size;

        ktr_xattr_name(attrname);

        error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep,
            l->l_cred);

        if (auiop != NULL) {
                cnt -= auio.uio_resid;
                retval[0] = cnt;

                ktr_xattr_val(data, cnt);
        } else
                retval[0] = size;

 done:
        VOP_UNLOCK(vp);
        return (error);
}

/*
 * extattr_delete_vp:
 *
 *        Delete a named extended attribute on a file or directory.
 */
static int
extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname,
    struct lwp *l)
{
        int error;

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);

        ktr_xattr_name(attrname);

        error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, l->l_cred);
        if (error == EOPNOTSUPP)
                error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL,
                    l->l_cred);

        VOP_UNLOCK(vp);
        return (error);
}

/*
 * extattr_list_vp:
 *
 *        Retrieve a list of extended attributes on a file or directory.
 */
static int
extattr_list_vp(struct vnode *vp, int attrnamespace, void *data, size_t nbytes,
    int flag, struct lwp *l, register_t *retval)
{
        struct uio auio, *auiop;
        size_t size, *sizep;
        struct iovec aiov;
        ssize_t cnt;
        int error;

        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);

        auiop = NULL;
        sizep = NULL;
        cnt = 0;
        if (data != NULL) {
                aiov.iov_base = data;
                aiov.iov_len = nbytes;
                auio.uio_iov = &aiov;
                auio.uio_offset = 0;
                if (nbytes > INT_MAX) {
                        error = EINVAL;
                        goto done;
                }
                auio.uio_resid = nbytes;
                auio.uio_rw = UIO_READ;
                KASSERT(l == curlwp);
                auio.uio_vmspace = l->l_proc->p_vmspace;
                auiop = &auio;
                cnt = nbytes;
        } else
                sizep = &size;

        error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep, 
                                flag, l->l_cred);

        if (auiop != NULL) {
                cnt -= auio.uio_resid;
                retval[0] = cnt;

                ktruser("xattr-list", data, cnt, 0);
        } else
                retval[0] = size;

 done:
        VOP_UNLOCK(vp);
        return (error);
}

/*****************************************************************************
 * BSD <sys/extattr.h> API for file system extended attributes
 *****************************************************************************/

int
sys_extattr_set_fd(struct lwp *l, const struct sys_extattr_set_fd_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(int) attrnamespace;
                syscallarg(const char *) attrname;
                syscallarg(const void *) data;
                syscallarg(size_t) nbytes;
        } */
        struct file *fp;
        struct vnode *vp;
        char attrname[EXTATTR_MAXNAMELEN];
        int error;

        error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
            NULL);
        if (error)
                return (error);

        error = fd_getvnode(SCARG(uap, fd), &fp);
        if (error)
                return (error);
        vp = fp->f_vnode;

        error = extattr_set_vp(vp, SCARG(uap, attrnamespace), attrname,
            SCARG(uap, data), SCARG(uap, nbytes), l, retval, 0);

        fd_putfile(SCARG(uap, fd));
        return (error);
}

int
sys_extattr_set_file(struct lwp *l, const struct sys_extattr_set_file_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) attrnamespace;
                syscallarg(const char *) attrname;
                syscallarg(const void *) data;
                syscallarg(size_t) nbytes;
        } */
        struct vnode *vp;
        char attrname[EXTATTR_MAXNAMELEN];
        int error;

        error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
            NULL);
        if (error)
                return (error);

        error = namei_simple_user(SCARG(uap, path),
                                NSM_FOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        error = extattr_set_vp(vp, SCARG(uap, attrnamespace), attrname,
            SCARG(uap, data), SCARG(uap, nbytes), l, retval, 0);

        vrele(vp);
        return (error);
}

int
sys_extattr_set_link(struct lwp *l, const struct sys_extattr_set_link_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) attrnamespace;
                syscallarg(const char *) attrname;
                syscallarg(const void *) data;
                syscallarg(size_t) nbytes;
        } */
        struct vnode *vp;
        char attrname[EXTATTR_MAXNAMELEN];
        int error;

        error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
            NULL);
        if (error)
                return (error);

        error = namei_simple_user(SCARG(uap, path),
                                NSM_NOFOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        error = extattr_set_vp(vp, SCARG(uap, attrnamespace), attrname,
            SCARG(uap, data), SCARG(uap, nbytes), l, retval, 0);

        vrele(vp);
        return (error);
}

int
sys_extattr_get_fd(struct lwp *l, const struct sys_extattr_get_fd_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(int) attrnamespace;
                syscallarg(const char *) attrname;
                syscallarg(void *) data;
                syscallarg(size_t) nbytes;
        } */
        struct file *fp;
        struct vnode *vp;
        char attrname[EXTATTR_MAXNAMELEN];
        int error;

        error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
            NULL);
        if (error)
                return (error);

        error = fd_getvnode(SCARG(uap, fd), &fp);
        if (error)
                return (error);
        vp = fp->f_vnode;

        error = extattr_get_vp(vp, SCARG(uap, attrnamespace), attrname,
            SCARG(uap, data), SCARG(uap, nbytes), l, retval);

        fd_putfile(SCARG(uap, fd));
        return (error);
}

int
sys_extattr_get_file(struct lwp *l, const struct sys_extattr_get_file_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) attrnamespace;
                syscallarg(const char *) attrname;
                syscallarg(void *) data;
                syscallarg(size_t) nbytes;
        } */
        struct vnode *vp;
        char attrname[EXTATTR_MAXNAMELEN];
        int error;

        error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
            NULL);
        if (error)
                return (error);

        error = namei_simple_user(SCARG(uap, path),
                                NSM_FOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        error = extattr_get_vp(vp, SCARG(uap, attrnamespace), attrname,
            SCARG(uap, data), SCARG(uap, nbytes), l, retval);

        vrele(vp);
        return (error);
}

int
sys_extattr_get_link(struct lwp *l, const struct sys_extattr_get_link_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) attrnamespace;
                syscallarg(const char *) attrname;
                syscallarg(void *) data;
                syscallarg(size_t) nbytes;
        } */
        struct vnode *vp;
        char attrname[EXTATTR_MAXNAMELEN];
        int error;

        error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
            NULL);
        if (error)
                return (error);

        error = namei_simple_user(SCARG(uap, path),
                                NSM_NOFOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        error = extattr_get_vp(vp, SCARG(uap, attrnamespace), attrname,
            SCARG(uap, data), SCARG(uap, nbytes), l, retval);

        vrele(vp);
        return (error);
}

int
sys_extattr_delete_fd(struct lwp *l, const struct sys_extattr_delete_fd_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(int) attrnamespace;
                syscallarg(const char *) attrname;
        } */
        struct file *fp;
        struct vnode *vp;
        char attrname[EXTATTR_MAXNAMELEN];
        int error;

        error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
            NULL);
        if (error)
                return (error);

        error = fd_getvnode(SCARG(uap, fd), &fp);
        if (error)
                return (error);
        vp = fp->f_vnode;

        error = extattr_delete_vp(vp, SCARG(uap, attrnamespace), attrname, l);

        fd_putfile(SCARG(uap, fd));
        return (error);
}

int
sys_extattr_delete_file(struct lwp *l, const struct sys_extattr_delete_file_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) attrnamespace;
                syscallarg(const char *) attrname;
        } */
        struct vnode *vp;
        char attrname[EXTATTR_MAXNAMELEN];
        int error;

        error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
            NULL);
        if (error)
                return (error);

        error = namei_simple_user(SCARG(uap, path),
                                NSM_FOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        error = extattr_delete_vp(vp, SCARG(uap, attrnamespace), attrname, l);

        vrele(vp);
        return (error);
}

int
sys_extattr_delete_link(struct lwp *l, const struct sys_extattr_delete_link_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) attrnamespace;
                syscallarg(const char *) attrname;
        } */
        struct vnode *vp;
        char attrname[EXTATTR_MAXNAMELEN];
        int error;

        error = copyinstr(SCARG(uap, attrname), attrname, sizeof(attrname),
            NULL);
        if (error)
                return (error);

        error = namei_simple_user(SCARG(uap, path),
                                NSM_NOFOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        error = extattr_delete_vp(vp, SCARG(uap, attrnamespace), attrname, l);

        vrele(vp);
        return (error);
}

int
sys_extattr_list_fd(struct lwp *l, const struct sys_extattr_list_fd_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(int) attrnamespace;
                syscallarg(void *) data;
                syscallarg(size_t) nbytes;
        } */
        struct file *fp;
        struct vnode *vp;
        int error;

        error = fd_getvnode(SCARG(uap, fd), &fp);
        if (error)
                return (error);
        vp = fp->f_vnode;

        error = extattr_list_vp(vp, SCARG(uap, attrnamespace),
            SCARG(uap, data), SCARG(uap, nbytes),
            EXTATTR_LIST_LENPREFIX, l, retval);

        fd_putfile(SCARG(uap, fd));
        return (error);
}

int
sys_extattr_list_file(struct lwp *l, const struct sys_extattr_list_file_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) attrnamespace;
                syscallarg(void *) data;
                syscallarg(size_t) nbytes;
        } */
        struct vnode *vp;
        int error;

        error = namei_simple_user(SCARG(uap, path),
                                NSM_FOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        error = extattr_list_vp(vp, SCARG(uap, attrnamespace),
            SCARG(uap, data), SCARG(uap, nbytes),
            EXTATTR_LIST_LENPREFIX, l, retval);

        vrele(vp);
        return (error);
}

int
sys_extattr_list_link(struct lwp *l, const struct sys_extattr_list_link_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(int) attrnamespace;
                syscallarg(void *) data;
                syscallarg(size_t) nbytes;
        } */
        struct vnode *vp;
        int error;

        error = namei_simple_user(SCARG(uap, path),
                                NSM_NOFOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        error = extattr_list_vp(vp, SCARG(uap, attrnamespace),
            SCARG(uap, data), SCARG(uap, nbytes),
            EXTATTR_LIST_LENPREFIX, l, retval);

        vrele(vp);
        return (error);
}

/*****************************************************************************
 * Linux-compatible <sys/xattr.h> API for file system extended attributes
 *****************************************************************************/

#define MATCH_NS(ns, key) (strncmp(ns, key, sizeof(ns) - 1) == 0)
static int
xattr_native(const char *key) {
        if (MATCH_NS("system.", key))
                return EXTATTR_NAMESPACE_SYSTEM;
        else if (MATCH_NS("user.", key))
                return EXTATTR_NAMESPACE_USER;
        else if (MATCH_NS("security.", key))
                return EXTATTR_NAMESPACE_SYSTEM;
        else if (MATCH_NS("trusted.", key))
                return EXTATTR_NAMESPACE_SYSTEM;
        else 
                return EXTATTR_NAMESPACE_USER;
        
}
#undef MATCH_NS

#define XATTR_ERRNO(e) ((e) == EOPNOTSUPP ? ENOTSUP : (e))

int
sys_setxattr(struct lwp *l, const struct sys_setxattr_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(const char *) name;
                syscallarg(void *) value;
                syscallarg(size_t) size;
                syscallarg(int) flags;
        } */
        struct vnode *vp;
        char attrname[XATTR_NAME_MAX];
        int attrnamespace;
        register_t attrlen;
        int error;

        error = copyinstr(SCARG(uap, name), attrname, sizeof(attrname),
            NULL);
        if (error)
                goto out;

        error = namei_simple_user(SCARG(uap, path),
                                NSM_FOLLOW_NOEMULROOT, &vp);
        if (error)
                goto out;

        attrnamespace = xattr_native(attrname);

        error = extattr_set_vp(vp, attrnamespace,
            attrname, SCARG(uap, value), SCARG(uap, size), l, 
            &attrlen, SCARG(uap, flags));

        vrele(vp);
out:
        *retval = (error == 0) ? 0 : -1;
        return (XATTR_ERRNO(error));
}

int
sys_lsetxattr(struct lwp *l, const struct sys_lsetxattr_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(const char *) name;
                syscallarg(void *) value;
                syscallarg(size_t) size;
                syscallarg(int) flags;
        } */
        struct vnode *vp;
        char attrname[XATTR_NAME_MAX];
        int attrnamespace;
        register_t attrlen;
        int error;

        error = copyinstr(SCARG(uap, name), attrname, sizeof(attrname),
            NULL);
        if (error)
                goto out;

        error = namei_simple_user(SCARG(uap, path),
                                NSM_NOFOLLOW_NOEMULROOT, &vp);
        if (error)
                goto out;

        attrnamespace = xattr_native(attrname);

        error = extattr_set_vp(vp, attrnamespace,
            attrname, SCARG(uap, value), SCARG(uap, size), l,
            &attrlen, SCARG(uap, flags));

        vrele(vp);
out:
        *retval = (error == 0) ? 0 : -1;
        return (XATTR_ERRNO(error));
}

int
sys_fsetxattr(struct lwp *l, const struct sys_fsetxattr_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const char *) name;
                syscallarg(void *) value;
                syscallarg(size_t) size;
                syscallarg(int) flags;
        } */
        struct file *fp;
        struct vnode *vp;
        char attrname[XATTR_NAME_MAX];
        int attrnamespace;
        register_t attrlen;
        int error;

        error = copyinstr(SCARG(uap, name), attrname, sizeof(attrname),
            NULL);
        if (error)
                goto out;

        error = fd_getvnode(SCARG(uap, fd), &fp);
        if (error)
                goto out;
        vp = fp->f_vnode;

        attrnamespace = xattr_native(attrname);

        error = extattr_set_vp(vp, attrnamespace,
            attrname, SCARG(uap, value), SCARG(uap, size), l,
            &attrlen, SCARG(uap, flags));

        fd_putfile(SCARG(uap, fd));
out:
        *retval = (error == 0) ? 0 : -1;
        return (XATTR_ERRNO(error));
}

int
sys_getxattr(struct lwp *l, const struct sys_getxattr_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(const char *) name;
                syscallarg(void *) value;
                syscallarg(size_t) size;
        } */
        struct vnode *vp;
        char attrname[XATTR_NAME_MAX];
        int attrnamespace;
        int error;

        error = copyinstr(SCARG(uap, name), attrname, sizeof(attrname),
            NULL);
        if (error)
                return (error);

        error = namei_simple_user(SCARG(uap, path),
                                NSM_FOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        attrnamespace = xattr_native(attrname);

        error = extattr_get_vp(vp, attrnamespace,
            attrname, SCARG(uap, value), SCARG(uap, size), l, retval);

        vrele(vp);
        return (XATTR_ERRNO(error));
}

int
sys_lgetxattr(struct lwp *l, const struct sys_lgetxattr_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(const char *) name;
                syscallarg(void *) value;
                syscallarg(size_t) size;
        } */
        struct vnode *vp;
        char attrname[XATTR_NAME_MAX];
        int attrnamespace;
        int error;

        error = copyinstr(SCARG(uap, name), attrname, sizeof(attrname),
            NULL);
        if (error)
                return (error);

        error = namei_simple_user(SCARG(uap, path),
                                NSM_NOFOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        attrnamespace = xattr_native(attrname);

        error = extattr_get_vp(vp, attrnamespace,
            attrname, SCARG(uap, value), SCARG(uap, size), l, retval);

        vrele(vp);
        return (XATTR_ERRNO(error));
}

int
sys_fgetxattr(struct lwp *l, const struct sys_fgetxattr_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const char *) name;
                syscallarg(void *) value;
                syscallarg(size_t) size;
        } */
        struct file *fp;
        struct vnode *vp;
        char attrname[XATTR_NAME_MAX];
        int attrnamespace;
        int error;

        error = copyinstr(SCARG(uap, name), attrname, sizeof(attrname),
            NULL);
        if (error)
                return (error);

        error = fd_getvnode(SCARG(uap, fd), &fp);
        if (error)
                return (error);
        vp = fp->f_vnode;

        attrnamespace = xattr_native(attrname);

        error = extattr_get_vp(vp, attrnamespace,
            attrname, SCARG(uap, value), SCARG(uap, size), l, retval);

        fd_putfile(SCARG(uap, fd));
        return (XATTR_ERRNO(error));
}

int
sys_listxattr(struct lwp *l, const struct sys_listxattr_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(char *) list;
                syscallarg(size_t) size;
        } */
        struct vnode *vp;
        char *list;
        size_t size;
        register_t listsize_usr, listsize_sys;
        int error;

        error = namei_simple_user(SCARG(uap, path),
                                NSM_FOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        list = SCARG(uap, list);
        size = SCARG(uap, size);

        error = extattr_list_vp(vp, EXTATTR_NAMESPACE_USER,
            list, size, 0, l, &listsize_usr);
        if (error)
                goto out;

        if (list)
                list += listsize_usr;
        if (size)
                size -= listsize_usr;

        error = extattr_list_vp(vp, EXTATTR_NAMESPACE_SYSTEM,
            list, size, 0, l, &listsize_sys);
        switch (error) {
        case EPERM:
                error = 0; /* Ignore and just skip system EA */
                listsize_sys = 0;
                break;
        case 0:
                break;
        default: 
                goto out;
                break;
        }

        *retval = listsize_usr + listsize_sys;         

out:
        vrele(vp);
        return (XATTR_ERRNO(error));
}

int
sys_llistxattr(struct lwp *l, const struct sys_llistxattr_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(char *) list;
                syscallarg(size_t) size;
        } */
        struct vnode *vp;
        char *list;
        size_t size;
        register_t listsize_usr, listsize_sys;
        int error;

        error = namei_simple_user(SCARG(uap, path),
                                NSM_NOFOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        list = SCARG(uap, list);
        size = SCARG(uap, size);

        error = extattr_list_vp(vp, EXTATTR_NAMESPACE_USER,
            list, size, 0, l, &listsize_usr);
        if (error)
                goto out;
        if (list)
                list += listsize_usr;
        if (size)
                size -= listsize_usr;

        error = extattr_list_vp(vp, EXTATTR_NAMESPACE_SYSTEM,
            list, size, 0, l, &listsize_sys);
        switch (error) {
        case EPERM:
                error = 0; /* Ignore and just skip system EA */
                listsize_sys = 0;
                break;
        case 0:
                break;
        default: 
                goto out;
                break;
        }

        *retval = listsize_usr + listsize_sys;         
out:
        vrele(vp);
        return (XATTR_ERRNO(error));
}

int
sys_flistxattr(struct lwp *l, const struct sys_flistxattr_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(char *) list;
                syscallarg(size_t) size;
        } */
        struct file *fp;
        struct vnode *vp;
        char *list;
        size_t size;
        register_t listsize_usr, listsize_sys;
        int error;

        error = fd_getvnode(SCARG(uap, fd), &fp);
        if (error)
                return (error);
        vp = fp->f_vnode;

        list = SCARG(uap, list);
        size = SCARG(uap, size);

        error = extattr_list_vp(vp, EXTATTR_NAMESPACE_USER,
            list, size, 0, l, &listsize_usr);
        if (error)
                goto out;

        if (list)
                list += listsize_usr;
        if (size)
                size -= listsize_usr;

        error = extattr_list_vp(vp, EXTATTR_NAMESPACE_SYSTEM,
            list, size, 0, l, &listsize_sys);
        switch (error) {
        case EPERM:
                error = 0; /* Ignore and just skip system EA */
                listsize_sys = 0;
                break;
        case 0:
                break;
        default: 
                goto out;
                break;
        }

        *retval = listsize_usr + listsize_sys;         
out:

        fd_putfile(SCARG(uap, fd));
        return (XATTR_ERRNO(error));
}

int
sys_removexattr(struct lwp *l, const struct sys_removexattr_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(const char *) name;
        } */
        struct vnode *vp;
        char attrname[XATTR_NAME_MAX];
        int attrnamespace;
        int error;

        error = copyinstr(SCARG(uap, name), attrname, sizeof(attrname),
            NULL);
        if (error)
                return (error);

        error = namei_simple_user(SCARG(uap, path),
                                NSM_FOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        attrnamespace = xattr_native(attrname);

        error = extattr_delete_vp(vp, attrnamespace, attrname, l);

        vrele(vp);
        return (XATTR_ERRNO(error));
}

int
sys_lremovexattr(struct lwp *l, const struct sys_lremovexattr_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *) path;
                syscallarg(const char *) name;
        } */
        struct vnode *vp;
        char attrname[XATTR_NAME_MAX];
        int attrnamespace;
        int error;

        error = copyinstr(SCARG(uap, name), attrname, sizeof(attrname),
            NULL);
        if (error)
                return (error);

        error = namei_simple_user(SCARG(uap, path),
                                NSM_NOFOLLOW_NOEMULROOT, &vp);
        if (error)
                return (error);

        attrnamespace = xattr_native(attrname);

        error = extattr_delete_vp(vp, attrnamespace, attrname, l);

        vrele(vp);
        return (XATTR_ERRNO(error));
}

int
sys_fremovexattr(struct lwp *l, const struct sys_fremovexattr_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const char *) name;
        } */
        struct file *fp;
        struct vnode *vp;
        char attrname[XATTR_NAME_MAX];
        int attrnamespace;
        int error;

        error = copyinstr(SCARG(uap, name), attrname, sizeof(attrname),
            NULL);
        if (error)
                return (error);

        error = fd_getvnode(SCARG(uap, fd), &fp);
        if (error)
                return (error);
        vp = fp->f_vnode;

        attrnamespace = xattr_native(attrname);

        error = extattr_delete_vp(vp, attrnamespace, attrname, l);

        fd_putfile(SCARG(uap, fd));
        return (XATTR_ERRNO(error));
}














































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
/*        $NetBSD: rtsock.c,v 1.255 2020/03/09 21:20:55 roy Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1988, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)rtsock.c        8.7 (Berkeley) 10/12/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rtsock.c,v 1.255 2020/03/09 21:20:55 roy Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/sysctl.h>
#include <sys/kauth.h>
#include <sys/kmem.h>
#include <sys/intr.h>
#include <sys/condvar.h>
#include <sys/compat_stub.h>

#include <net/if.h>
#include <net/if_llatbl.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/raw_cb.h>

#include <netinet/in_var.h>
#include <netinet/if_inarp.h>

#include <netmpls/mpls.h>

#include <compat/net/if.h>
#include <compat/net/route.h>

#ifdef COMPAT_RTSOCK
#undef COMPAT_RTSOCK
#endif

static int if_addrflags(struct ifaddr *);

#include <net/rtsock_shared.c>

/*
 * XXX avoid using void * once msghdr compat disappears.
 */
void
rt_setmetrics(void *in, struct rtentry *out)
{
        const struct rt_xmsghdr *rtm = in;

        _rt_setmetrics(rtm->rtm_inits, rtm, out);
}

int
rt_msg3(int type, struct rt_addrinfo *rtinfo, void *cpv, struct rt_walkarg *w,
        int *lenp)
{
        return rt_msg2(type, rtinfo, cpv, w, lenp);
}

static int
if_addrflags(struct ifaddr *ifa)
{

        switch (ifa->ifa_addr->sa_family) {
#ifdef INET
        case AF_INET:
                return ifatoia(ifa)->ia4_flags;
#endif
#ifdef INET6
        case AF_INET6:
                return ifatoia6(ifa)->ia6_flags;
#endif
        default:
                return 0;
        }
}


/*
 * Send a routing message as mimicing that a cloned route is added.
 */
void
rt_clonedmsg(int type, const struct sockaddr *src, const struct sockaddr *dst,
    const uint8_t *lladdr, const struct ifnet *ifp)
{
        struct rt_addrinfo info;
        /* Mimic flags exactly */
#define RTF_LLINFO        0x400
#define RTF_CLONED        0x2000
        int flags = RTF_DONE;
        union {
                struct sockaddr sa;
                struct sockaddr_storage ss;
                struct sockaddr_dl sdl;
        } u;

        if (type != RTM_MISS)
                flags |= RTF_HOST | RTF_CLONED | RTF_LLINFO;
        if (type == RTM_ADD || type == RTM_CHANGE)
                flags |= RTF_UP;
        memset(&info, 0, sizeof(info));
        info.rti_info[RTAX_AUTHOR] = src;
        info.rti_info[RTAX_DST] = dst;
        sockaddr_dl_init(&u.sdl, sizeof(u.ss), ifp->if_index, ifp->if_type,
            NULL, 0, lladdr, ifp->if_addrlen);
        info.rti_info[RTAX_GATEWAY] = &u.sa;

        rt_missmsg(type, &info, flags, 0);
#undef RTF_LLINFO
#undef RTF_CLONED
}


/*
 * The remaining code implements the routing-table sysctl node.  It is
 * compiled only for the non-COMPAT case.
 */

/*
 * This is used in dumping the kernel table via sysctl().
 */
static int
sysctl_dumpentry(struct rtentry *rt, void *v)
{
        struct rt_walkarg *w = v;
        int error = 0, size;
        struct rt_addrinfo info;

        if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
                return 0;
        memset(&info, 0, sizeof(info));
        info.rti_info[RTAX_DST] = rt_getkey(rt);
        info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
        info.rti_info[RTAX_NETMASK] = rt_mask(rt);
        info.rti_info[RTAX_TAG] = rt_gettag(rt);
        if (rt->rt_ifp) {
                const struct ifaddr *rtifa;
                info.rti_info[RTAX_IFP] = rt->rt_ifp->if_dl->ifa_addr;
                /* rtifa used to be simply rt->rt_ifa.  If rt->rt_ifa != NULL,
                 * then rt_get_ifa() != NULL.  So this ought to still be safe.
                 * --dyoung
                 */
                rtifa = rt_get_ifa(rt);
                info.rti_info[RTAX_IFA] = rtifa->ifa_addr;
                if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
                        info.rti_info[RTAX_BRD] = rtifa->ifa_dstaddr;
        }
        if ((error = rt_msg2(RTM_GET, &info, 0, w, &size)))
                return error;
        if (w->w_where && w->w_tmem && w->w_needed <= 0) {
                struct rt_xmsghdr *rtm = (struct rt_xmsghdr *)w->w_tmem;

                rtm->rtm_flags = rt->rt_flags;
                rtm->rtm_use = rt->rt_use;
                rtm_setmetrics(rt, rtm);
                KASSERT(rt->rt_ifp != NULL);
                rtm->rtm_index = rt->rt_ifp->if_index;
                rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
                rtm->rtm_addrs = info.rti_addrs;
                if ((error = copyout(rtm, w->w_where, size)) != 0)
                        w->w_where = NULL;
                else
                        w->w_where = (char *)w->w_where + size;
        }
        return error;
}

static int
sysctl_iflist_if(struct ifnet *ifp, struct rt_walkarg *w,
    struct rt_addrinfo *info, size_t len)
{
        struct if_xmsghdr *ifm;
        int error;

        ifm = (struct if_xmsghdr *)w->w_tmem;
        ifm->ifm_index = ifp->if_index;
        ifm->ifm_flags = ifp->if_flags;
        if_export_if_data(ifp, &ifm->ifm_data, false);
        ifm->ifm_addrs = info->rti_addrs;
        if ((error = copyout(ifm, w->w_where, len)) == 0)
                w->w_where = (char *)w->w_where + len;
        return error;
}

static int
sysctl_iflist_addr(struct rt_walkarg *w, struct ifaddr *ifa,
     struct rt_addrinfo *info)
{
        int len, error;

        if ((error = rt_msg2(RTM_XNEWADDR, info, 0, w, &len)))
                return error;
        if (w->w_where && w->w_tmem && w->w_needed <= 0) {
                struct ifa_xmsghdr *ifam;

                ifam = (struct ifa_xmsghdr *)w->w_tmem;
                ifam->ifam_index = ifa->ifa_ifp->if_index;
                ifam->ifam_flags = ifa->ifa_flags;
                ifam->ifam_metric = ifa->ifa_metric;
                ifam->ifam_addrs = info->rti_addrs;
                ifam->ifam_pid = 0;
                ifam->ifam_addrflags = if_addrflags(ifa);
                if ((error = copyout(w->w_tmem, w->w_where, len)) == 0)
                        w->w_where = (char *)w->w_where + len;
        }
        return error;
}

static int
sysctl_iflist(int af, struct rt_walkarg *w, int type)
{
        struct ifnet *ifp;
        struct ifaddr *ifa;
        struct        rt_addrinfo info;
        int        cmd, len, error = 0;
        int s;
        struct psref psref;
        int bound;

        switch (type) {
        case NET_RT_IFLIST:
                cmd = RTM_IFINFO;
                break;
        case NET_RT_OOOIFLIST:
                cmd = RTM_OOIFINFO;
                break;
        case NET_RT_OOIFLIST:
                cmd = RTM_OIFINFO;
                break;
        case NET_RT_OIFLIST:
                cmd = RTM_IFINFO;
                break;
        default:
#ifdef RTSOCK_DEBUG
                printf("%s: unsupported IFLIST type %d\n", __func__, type);
#endif
                return EINVAL;
        }

        memset(&info, 0, sizeof(info));

        bound = curlwp_bind();
        s = pserialize_read_enter();
        IFNET_READER_FOREACH(ifp) {
                int _s;
                if (w->w_arg && w->w_arg != ifp->if_index)
                        continue;
                if (IFADDR_READER_EMPTY(ifp))
                        continue;

                if_acquire(ifp, &psref);
                pserialize_read_exit(s);

                info.rti_info[RTAX_IFP] = ifp->if_dl->ifa_addr;
                if ((error = rt_msg2(cmd, &info, NULL, w, &len)) != 0)
                        goto release_exit;
                info.rti_info[RTAX_IFP] = NULL;
                if (w->w_where && w->w_tmem && w->w_needed <= 0) {
                        switch (type) {
                        case NET_RT_OIFLIST: /* old _70 */
                                if (!rtsock_iflist_70_hook.hooked) {
                                        error = EINVAL;
                                        break;
                                }
                                /* FALLTHROUGH */
                        case NET_RT_IFLIST: /* current */
                                error = sysctl_iflist_if(ifp, w, &info, len);
                                break;
                        case NET_RT_OOIFLIST: /* old _50 */
                                MODULE_HOOK_CALL(rtsock_iflist_50_hook,
                                    (ifp, w, &info, len), enosys(), error);
                                break;
                        case NET_RT_OOOIFLIST: /* old _14 */
                                MODULE_HOOK_CALL(rtsock_iflist_14_hook,
                                   (ifp, w, &info, len), enosys(), error);
                                break;
                        default:
                                error = EINVAL;
                        }
                        if (error != 0) {
                                if (error == ENOSYS)
                                        error = EINVAL;
                                goto release_exit;
                        }
                }
                _s = pserialize_read_enter();
                IFADDR_READER_FOREACH(ifa, ifp) {
                        struct psref _psref;
                        if (af && af != ifa->ifa_addr->sa_family)
                                continue;
                        ifa_acquire(ifa, &_psref);
                        pserialize_read_exit(_s);

                        info.rti_info[RTAX_IFA] = ifa->ifa_addr;
                        info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
                        info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
                        switch (type) {
                        case NET_RT_IFLIST:
                                error = sysctl_iflist_addr(w, ifa, &info);
                                break;
                        case NET_RT_OIFLIST:
                        case NET_RT_OOIFLIST:
                        case NET_RT_OOOIFLIST:
                                MODULE_HOOK_CALL(rtsock_iflist_70_hook,
                                    (w, ifa, &info), enosys(), error);
                                break;
                        default:
                                error = EINVAL;
                        }

                        _s = pserialize_read_enter();
                        ifa_release(ifa, &_psref);
                        if (error != 0) {
                                pserialize_read_exit(_s);
                                goto release_exit;
                        }
                }
                pserialize_read_exit(_s);
                info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
                    info.rti_info[RTAX_BRD] = NULL;

                s = pserialize_read_enter();
                if_release(ifp, &psref);
        }
        pserialize_read_exit(s);
        curlwp_bindx(bound);

        return 0;

release_exit:
        if_release(ifp, &psref);
        curlwp_bindx(bound);
        return error;
}

static int
sysctl_rtable(SYSCTLFN_ARGS)
{
        void         *where = oldp;
        size_t        *given = oldlenp;
        int        i, s, error = EINVAL;
        u_char  af;
        struct        rt_walkarg w;

        if (namelen == 1 && name[0] == CTL_QUERY)
                return sysctl_query(SYSCTLFN_CALL(rnode));

        if (newp)
                return EPERM;
        if (namelen != 3)
                return EINVAL;
        af = name[0];
        w.w_tmemneeded = 0;
        w.w_tmemsize = 0;
        w.w_tmem = NULL;
again:
        /* we may return here if a later [re]alloc of the t_mem buffer fails */
        if (w.w_tmemneeded) {
                w.w_tmem = kmem_zalloc(w.w_tmemneeded, KM_SLEEP);
                w.w_tmemsize = w.w_tmemneeded;
                w.w_tmemneeded = 0;
        }
        w.w_op = name[1];
        w.w_arg = name[2];
        w.w_given = *given;
        w.w_needed = 0 - w.w_given;
        w.w_where = where;

        KERNEL_LOCK_UNLESS_NET_MPSAFE();
        s = splsoftnet();
        switch (w.w_op) {

        case NET_RT_DUMP:
        case NET_RT_FLAGS:
#if defined(INET) || defined(INET6)
                /*
                 * take care of llinfo entries, the caller must
                 * specify an AF
                 */
                if (w.w_op == NET_RT_FLAGS &&
                    (w.w_arg == 0 || w.w_arg & RTF_LLDATA)) {
                        if (af != 0)
                                error = lltable_sysctl_dump(af, &w);
                        else
                                error = EINVAL;
                        break;
                }
#endif

                for (i = 1; i <= AF_MAX; i++) {
                        if (af == 0 || af == i) {
                                error = rt_walktree(i, sysctl_dumpentry, &w);
                                if (error != 0)
                                        break;
#if defined(INET) || defined(INET6)
                                /*
                                 * Return ARP/NDP entries too for
                                 * backward compatibility.
                                 */
                                error = lltable_sysctl_dump(i, &w);
                                if (error != 0)
                                        break;
#endif
                        }
                }
                break;

        case NET_RT_OOOIFLIST:                /* compat_14 */
        case NET_RT_OOIFLIST:                /* compat_50 */
        case NET_RT_OIFLIST:                /* compat_70 */
        case NET_RT_IFLIST:                /* current */
                error = sysctl_iflist(af, &w, w.w_op);
                break;
        }
        splx(s);
        KERNEL_UNLOCK_UNLESS_NET_MPSAFE();

        /* check to see if we couldn't allocate memory with NOWAIT */
        if (error == ENOBUFS && w.w_tmem == 0 && w.w_tmemneeded)
                goto again;

        if (w.w_tmem)
                kmem_free(w.w_tmem, w.w_tmemsize);
        w.w_needed += w.w_given;
        if (where) {
                *given = (char *)w.w_where - (char *)where;
                if (*given < w.w_needed)
                        return ENOMEM;
        } else {
                *given = (11 * w.w_needed) / 10;
        }
        return error;
}

void
sysctl_net_route_setup(struct sysctllog **clog, int pf, const char *name)
{
        const struct sysctlnode *rnode = NULL;

        sysctl_createv(clog, 0, NULL, &rnode,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, name,
                       SYSCTL_DESCR("PF_ROUTE information"),
                       NULL, 0, NULL, 0,
                       CTL_NET, pf, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "rtable",
                       SYSCTL_DESCR("Routing table information"),
                       sysctl_rtable, 0, NULL, 0,
                       CTL_NET, pf, 0 /* any protocol */, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "stats",
                       SYSCTL_DESCR("Routing statistics"),
                       NULL, 0, &rtstat, sizeof(rtstat),
                       CTL_CREATE, CTL_EOL);
}






































































































































































































   47 



    2 
    1 




   48 

   46 







   14 










   46 










   23 















   39 

   40 




   22 

   14 
   22 
   17 



















   22 
    5 





   22 

   14 



   20 







    8 
    4 
    4 







    6 









    4 
    2 








































    1 




    1 









    1 
























   13 
    4 
    4 


















   11 












   11 
   11 




    9 




    5 




    2 
    1 
   11 

    1 




    1 





    1 
























   35 


   35 



   11 





    6 

   35 










   34 











   34 



    7 













    6 
    3 









































   34 







    6 
    3 
    2 







    6 
    6 




















    6 












   33 



   33 

    7 











   31 
   30 









    4 
    3 
    2 
    4 



























    1 
    1 


    1 











    1 
    1 





    1 






    2 












































    1 












    1 










   50 











   50 






   49 




   46 







   49 


    8 
    6 
    5 

    6 


    2 

    2 



   49 






   48 





   44 



   15 









   37 






   48 

   36 


   13 




    2 



   15 


   45 




   45 





   30 
    4 








   39 









   40 












    9 











   15 




   44 


















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
/*        $NetBSD: vfs_lockf.c,v 1.77 2022/08/03 11:09:13 riastradh Exp $        */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Scooter Morris at Genentech Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ufs_lockf.c        8.4 (Berkeley) 10/26/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.77 2022/08/03 11:09:13 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/pool.h>
#include <sys/fcntl.h>
#include <sys/lockf.h>
#include <sys/atomic.h>
#include <sys/kauth.h>
#include <sys/uidinfo.h>

/*
 * The lockf structure is a kernel structure which contains the information
 * associated with a byte range lock.  The lockf structures are linked into
 * the vnode structure.  Locks are sorted by the starting byte of the lock for
 * efficiency.
 *
 * lf_next is used for two purposes, depending on whether the lock is
 * being held, or is in conflict with an existing lock.  If this lock
 * is held, it indicates the next lock on the same vnode.
 * For pending locks, if lock->lf_next is non-NULL, then lock->lf_block
 * must be queued on the lf_blkhd TAILQ of lock->lf_next.
 */

TAILQ_HEAD(locklist, lockf);

struct lockf {
        kcondvar_t lf_cv;         /* Signalling */
        short        lf_flags;         /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */
        short        lf_type;         /* Lock type: F_RDLCK, F_WRLCK */
        off_t        lf_start;         /* The byte # of the start of the lock */
        off_t        lf_end;                 /* The byte # of the end of the lock (-1=EOF)*/
        void        *lf_id;                 /* process or file description holding lock */
        struct        lockf **lf_head; /* Back pointer to the head of lockf list */
        struct        lockf *lf_next;         /* Next lock on this vnode, or blocking lock */
        struct  locklist lf_blkhd; /* List of requests blocked on this lock */
        TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */
        uid_t        lf_uid;                 /* User ID responsible */
};

/* Maximum length of sleep chains to traverse to try and detect deadlock. */
#define MAXDEPTH 50

static pool_cache_t lockf_cache;
static kmutex_t *lockf_lock;
static char lockstr[] = "lockf";

/*
 * This variable controls the maximum number of processes that will
 * be checked in doing deadlock detection.
 */
int maxlockdepth = MAXDEPTH;

#ifdef LOCKF_DEBUG
int        lockf_debug = 0;
#endif

#define SELF        0x1
#define OTHERS        0x2

/*
 * XXX TODO
 * Misc cleanups: "void *id" should be visible in the API as a
 * "struct proc *".
 * (This requires rototilling all VFS's which support advisory locking).
 */

/*
 * If there's a lot of lock contention on a single vnode, locking
 * schemes which allow for more paralleism would be needed.  Given how
 * infrequently byte-range locks are actually used in typical BSD
 * code, a more complex approach probably isn't worth it.
 */

/*
 * We enforce a limit on locks by uid, so that a single user cannot
 * run the kernel out of memory.  For now, the limit is pretty coarse.
 * There is no limit on root.
 *
 * Splitting a lock will always succeed, regardless of current allocations.
 * If you're slightly above the limit, we still have to permit an allocation
 * so that the unlock can succeed.  If the unlocking causes too many splits,
 * however, you're totally cutoff.
 */
#define MAXLOCKSPERUID (2 * maxfiles)

#ifdef LOCKF_DEBUG
/*
 * Print out a lock.
 */
static void
lf_print(const char *tag, struct lockf *lock)
{

        printf("%s: lock %p for ", tag, lock);
        if (lock->lf_flags & F_POSIX)
                printf("proc %d", ((struct proc *)lock->lf_id)->p_pid);
        else
                printf("file %p", (struct file *)lock->lf_id);
        printf(" %s, start %jd, end %jd",
                lock->lf_type == F_RDLCK ? "shared" :
                lock->lf_type == F_WRLCK ? "exclusive" :
                lock->lf_type == F_UNLCK ? "unlock" :
                "unknown", (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
        if (TAILQ_FIRST(&lock->lf_blkhd))
                printf(" block %p\n", TAILQ_FIRST(&lock->lf_blkhd));
        else
                printf("\n");
}

static void
lf_printlist(const char *tag, struct lockf *lock)
{
        struct lockf *lf, *blk;

        printf("%s: Lock list:\n", tag);
        for (lf = *lock->lf_head; lf; lf = lf->lf_next) {
                printf("\tlock %p for ", lf);
                if (lf->lf_flags & F_POSIX)
                        printf("proc %d", ((struct proc *)lf->lf_id)->p_pid);
                else
                        printf("file %p", (struct file *)lf->lf_id);
                printf(", %s, start %jd, end %jd",
                        lf->lf_type == F_RDLCK ? "shared" :
                        lf->lf_type == F_WRLCK ? "exclusive" :
                        lf->lf_type == F_UNLCK ? "unlock" :
                        "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end);
                TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
                        if (blk->lf_flags & F_POSIX)
                                printf("; proc %d",
                                    ((struct proc *)blk->lf_id)->p_pid);
                        else
                                printf("; file %p", (struct file *)blk->lf_id);
                        printf(", %s, start %jd, end %jd",
                                blk->lf_type == F_RDLCK ? "shared" :
                                blk->lf_type == F_WRLCK ? "exclusive" :
                                blk->lf_type == F_UNLCK ? "unlock" :
                                "unknown", (intmax_t)blk->lf_start, (intmax_t)blk->lf_end);
                        if (TAILQ_FIRST(&blk->lf_blkhd))
                                 panic("lf_printlist: bad list");
                }
                printf("\n");
        }
}
#endif /* LOCKF_DEBUG */

/*
 * 3 options for allowfail.
 * 0 - always allocate.  1 - cutoff at limit.  2 - cutoff at double limit.
 */
static struct lockf *
lf_alloc(int allowfail)
{
        struct uidinfo *uip;
        struct lockf *lock;
        u_long lcnt;
        const uid_t uid = kauth_cred_geteuid(kauth_cred_get());

        uip = uid_find(uid);
        lcnt = atomic_inc_ulong_nv(&uip->ui_lockcnt);
        if (uid && allowfail && lcnt >
            (allowfail == 1 ? MAXLOCKSPERUID : (MAXLOCKSPERUID * 2))) {
                atomic_dec_ulong(&uip->ui_lockcnt);
                return NULL;
        }

        lock = pool_cache_get(lockf_cache, PR_WAITOK);
        lock->lf_uid = uid;
        return lock;
}

static void
lf_free(struct lockf *lock)
{
        struct uidinfo *uip;

        uip = uid_find(lock->lf_uid);
        atomic_dec_ulong(&uip->ui_lockcnt);
        pool_cache_put(lockf_cache, lock);
}

static int
lf_ctor(void *arg, void *obj, int flag)
{
        struct lockf *lock;

        lock = obj;
        cv_init(&lock->lf_cv, lockstr);

        return 0;
}

static void
lf_dtor(void *arg, void *obj)
{
        struct lockf *lock;

        lock = obj;
        cv_destroy(&lock->lf_cv);
}

/*
 * Walk the list of locks for an inode to
 * find an overlapping lock (if any).
 *
 * NOTE: this returns only the FIRST overlapping lock.  There
 *         may be more than one.
 */
static int
lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
    struct lockf ***prev, struct lockf **overlap)
{
        off_t start, end;

        *overlap = lf;
        if (lf == NULL)
                return 0;
#ifdef LOCKF_DEBUG
        if (lockf_debug & 2)
                lf_print("lf_findoverlap: looking for overlap in", lock);
#endif /* LOCKF_DEBUG */
        start = lock->lf_start;
        end = lock->lf_end;
        while (lf != NULL) {
                if (((type == SELF) && lf->lf_id != lock->lf_id) ||
                    ((type == OTHERS) && lf->lf_id == lock->lf_id)) {
                        *prev = &lf->lf_next;
                        *overlap = lf = lf->lf_next;
                        continue;
                }
#ifdef LOCKF_DEBUG
                if (lockf_debug & 2)
                        lf_print("\tchecking", lf);
#endif /* LOCKF_DEBUG */
                /*
                 * OK, check for overlap
                 *
                 * Six cases:
                 *        0) no overlap
                 *        1) overlap == lock
                 *        2) overlap contains lock
                 *        3) lock contains overlap
                 *        4) overlap starts before lock
                 *        5) overlap ends after lock
                 */
                if ((lf->lf_end != -1 && start > lf->lf_end) ||
                    (end != -1 && lf->lf_start > end)) {
                        /* Case 0 */
#ifdef LOCKF_DEBUG
                        if (lockf_debug & 2)
                                printf("no overlap\n");
#endif /* LOCKF_DEBUG */
                        if ((type & SELF) && end != -1 && lf->lf_start > end)
                                return 0;
                        *prev = &lf->lf_next;
                        *overlap = lf = lf->lf_next;
                        continue;
                }
                if ((lf->lf_start == start) && (lf->lf_end == end)) {
                        /* Case 1 */
#ifdef LOCKF_DEBUG
                        if (lockf_debug & 2)
                                printf("overlap == lock\n");
#endif /* LOCKF_DEBUG */
                        return 1;
                }
                if ((lf->lf_start <= start) &&
                    (end != -1) &&
                    ((lf->lf_end >= end) || (lf->lf_end == -1))) {
                        /* Case 2 */
#ifdef LOCKF_DEBUG
                        if (lockf_debug & 2)
                                printf("overlap contains lock\n");
#endif /* LOCKF_DEBUG */
                        return 2;
                }
                if (start <= lf->lf_start &&
                           (end == -1 ||
                           (lf->lf_end != -1 && end >= lf->lf_end))) {
                        /* Case 3 */
#ifdef LOCKF_DEBUG
                        if (lockf_debug & 2)
                                printf("lock contains overlap\n");
#endif /* LOCKF_DEBUG */
                        return 3;
                }
                if ((lf->lf_start < start) &&
                        ((lf->lf_end >= start) || (lf->lf_end == -1))) {
                        /* Case 4 */
#ifdef LOCKF_DEBUG
                        if (lockf_debug & 2)
                                printf("overlap starts before lock\n");
#endif /* LOCKF_DEBUG */
                        return 4;
                }
                if ((lf->lf_start > start) &&
                        (end != -1) &&
                        ((lf->lf_end > end) || (lf->lf_end == -1))) {
                        /* Case 5 */
#ifdef LOCKF_DEBUG
                        if (lockf_debug & 2)
                                printf("overlap ends after lock\n");
#endif /* LOCKF_DEBUG */
                        return 5;
                }
                panic("lf_findoverlap: default");
        }
        return 0;
}

/*
 * Split a lock and a contained region into
 * two or three locks as necessary.
 */
static void
lf_split(struct lockf *lock1, struct lockf *lock2, struct lockf **sparelock)
{
        struct lockf *splitlock;

#ifdef LOCKF_DEBUG
        if (lockf_debug & 2) {
                lf_print("lf_split", lock1);
                lf_print("splitting from", lock2);
        }
#endif /* LOCKF_DEBUG */
        /*
         * Check to see if splitting into only two pieces.
         */
        if (lock1->lf_start == lock2->lf_start) {
                lock1->lf_start = lock2->lf_end + 1;
                lock2->lf_next = lock1;
                return;
        }
        if (lock1->lf_end == lock2->lf_end) {
                lock1->lf_end = lock2->lf_start - 1;
                lock2->lf_next = lock1->lf_next;
                lock1->lf_next = lock2;
                return;
        }
        /*
         * Make a new lock consisting of the last part of
         * the encompassing lock
         */
        splitlock = *sparelock;
        *sparelock = NULL;
        cv_destroy(&splitlock->lf_cv);
        memcpy(splitlock, lock1, sizeof(*splitlock));
        cv_init(&splitlock->lf_cv, lockstr);

        splitlock->lf_start = lock2->lf_end + 1;
        TAILQ_INIT(&splitlock->lf_blkhd);
        lock1->lf_end = lock2->lf_start - 1;
        /*
         * OK, now link it in
         */
        splitlock->lf_next = lock1->lf_next;
        lock2->lf_next = splitlock;
        lock1->lf_next = lock2;
}

/*
 * Wakeup a blocklist
 */
static void
lf_wakelock(struct lockf *listhead)
{
        struct lockf *wakelock;

        while ((wakelock = TAILQ_FIRST(&listhead->lf_blkhd))) {
                KASSERT(wakelock->lf_next == listhead);
                TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
                wakelock->lf_next = NULL;
#ifdef LOCKF_DEBUG
                if (lockf_debug & 2)
                        lf_print("lf_wakelock: awakening", wakelock);
#endif
                cv_broadcast(&wakelock->lf_cv);
        }
}

/*
 * Remove a byte-range lock on an inode.
 *
 * Generally, find the lock (or an overlap to that lock)
 * and remove it (or shrink it), then wakeup anyone we can.
 */
static int
lf_clearlock(struct lockf *unlock, struct lockf **sparelock)
{
        struct lockf **head = unlock->lf_head;
        struct lockf *lf = *head;
        struct lockf *overlap, **prev;
        int ovcase;

        if (lf == NULL)
                return 0;
#ifdef LOCKF_DEBUG
        if (unlock->lf_type != F_UNLCK)
                panic("lf_clearlock: bad type");
        if (lockf_debug & 1)
                lf_print("lf_clearlock", unlock);
#endif /* LOCKF_DEBUG */
        prev = head;
        while ((ovcase = lf_findoverlap(lf, unlock, SELF,
            &prev, &overlap)) != 0) {
                /*
                 * Wakeup the list of locks to be retried.
                 */
                lf_wakelock(overlap);

                switch (ovcase) {

                case 1: /* overlap == lock */
                        *prev = overlap->lf_next;
                        lf_free(overlap);
                        break;

                case 2: /* overlap contains lock: split it */
                        if (overlap->lf_start == unlock->lf_start) {
                                overlap->lf_start = unlock->lf_end + 1;
                                break;
                        }
                        lf_split(overlap, unlock, sparelock);
                        overlap->lf_next = unlock->lf_next;
                        break;

                case 3: /* lock contains overlap */
                        *prev = overlap->lf_next;
                        lf = overlap->lf_next;
                        lf_free(overlap);
                        continue;

                case 4: /* overlap starts before lock */
                        overlap->lf_end = unlock->lf_start - 1;
                        prev = &overlap->lf_next;
                        lf = overlap->lf_next;
                        continue;

                case 5: /* overlap ends after lock */
                        overlap->lf_start = unlock->lf_end + 1;
                        break;
                }
                break;
        }
#ifdef LOCKF_DEBUG
        if (lockf_debug & 1)
                lf_printlist("lf_clearlock", unlock);
#endif /* LOCKF_DEBUG */
        return 0;
}

/*
 * Walk the list of locks for an inode and
 * return the first blocking lock.
 */
static struct lockf *
lf_getblock(struct lockf *lock)
{
        struct lockf **prev, *overlap, *lf = *(lock->lf_head);

        prev = lock->lf_head;
        while (lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != 0) {
                /*
                 * We've found an overlap, see if it blocks us
                 */
                if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
                        return overlap;
                /*
                 * Nope, point to the next one on the list and
                 * see if it blocks us
                 */
                lf = overlap->lf_next;
        }
        return NULL;
}

/*
 * Set a byte-range lock.
 */
static int
lf_setlock(struct lockf *lock, struct lockf **sparelock,
    kmutex_t *interlock)
{
        struct lockf *block;
        struct lockf **head = lock->lf_head;
        struct lockf **prev, *overlap, *ltmp;
        int ovcase, needtolink, error;

#ifdef LOCKF_DEBUG
        if (lockf_debug & 1)
                lf_print("lf_setlock", lock);
#endif /* LOCKF_DEBUG */

        /*
         * Scan lock list for this file looking for locks that would block us.
         */
        while ((block = lf_getblock(lock)) != NULL) {
                /*
                 * Free the structure and return if nonblocking.
                 */
                if ((lock->lf_flags & F_WAIT) == 0) {
                        lf_free(lock);
                        return EAGAIN;
                }
                /*
                 * We are blocked. Since flock style locks cover
                 * the whole file, there is no chance for deadlock.
                 * For byte-range locks we must check for deadlock.
                 *
                 * Deadlock detection is done by looking through the
                 * wait channels to see if there are any cycles that
                 * involve us. MAXDEPTH is set just to make sure we
                 * do not go off into neverneverland.
                 */
                if ((lock->lf_flags & F_POSIX) &&
                    (block->lf_flags & F_POSIX)) {
                        struct lwp *wlwp;
                        volatile const struct lockf *waitblock;
                        int i = 0;
                        struct proc *p;

                        p = (struct proc *)block->lf_id;
                        KASSERT(p != NULL);
                        while (i++ < maxlockdepth) {
                                mutex_enter(p->p_lock);
                                if (p->p_nlwps > 1) {
                                        mutex_exit(p->p_lock);
                                        break;
                                }
                                wlwp = LIST_FIRST(&p->p_lwps);
                                lwp_lock(wlwp);
                                if (wlwp->l_wchan == NULL ||
                                    wlwp->l_wmesg != lockstr) {
                                        lwp_unlock(wlwp);
                                        mutex_exit(p->p_lock);
                                        break;
                                }
                                waitblock = wlwp->l_wchan;
                                lwp_unlock(wlwp);
                                mutex_exit(p->p_lock);
                                /* Get the owner of the blocking lock */
                                waitblock = waitblock->lf_next;
                                if ((waitblock->lf_flags & F_POSIX) == 0)
                                        break;
                                p = (struct proc *)waitblock->lf_id;
                                if (p == curproc) {
                                        lf_free(lock);
                                        return EDEADLK;
                                }
                        }
                        /*
                         * If we're still following a dependency chain
                         * after maxlockdepth iterations, assume we're in
                         * a cycle to be safe.
                         */
                        if (i >= maxlockdepth) {
                                lf_free(lock);
                                return EDEADLK;
                        }
                }
                /*
                 * For flock type locks, we must first remove
                 * any shared locks that we hold before we sleep
                 * waiting for an exclusive lock.
                 */
                if ((lock->lf_flags & F_FLOCK) &&
                    lock->lf_type == F_WRLCK) {
                        lock->lf_type = F_UNLCK;
                        (void) lf_clearlock(lock, NULL);
                        lock->lf_type = F_WRLCK;
                }
                /*
                 * Add our lock to the blocked list and sleep until we're free.
                 * Remember who blocked us (for deadlock detection).
                 */
                lock->lf_next = block;
                TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
#ifdef LOCKF_DEBUG
                if (lockf_debug & 1) {
                        lf_print("lf_setlock: blocking on", block);
                        lf_printlist("lf_setlock", block);
                }
#endif /* LOCKF_DEBUG */
                error = cv_wait_sig(&lock->lf_cv, interlock);

                /*
                 * We may have been awoken by a signal (in
                 * which case we must remove ourselves from the
                 * blocked list) and/or by another process
                 * releasing a lock (in which case we have already
                 * been removed from the blocked list and our
                 * lf_next field set to NULL).
                 */
                if (lock->lf_next != NULL) {
                        TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
                        lock->lf_next = NULL;
                }
                if (error) {
                        lf_free(lock);
                        return error;
                }
        }
        /*
         * No blocks!!  Add the lock.  Note that we will
         * downgrade or upgrade any overlapping locks this
         * process already owns.
         *
         * Skip over locks owned by other processes.
         * Handle any locks that overlap and are owned by ourselves.
         */
        prev = head;
        block = *head;
        needtolink = 1;
        for (;;) {
                ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
                if (ovcase)
                        block = overlap->lf_next;
                /*
                 * Six cases:
                 *        0) no overlap
                 *        1) overlap == lock
                 *        2) overlap contains lock
                 *        3) lock contains overlap
                 *        4) overlap starts before lock
                 *        5) overlap ends after lock
                 */
                switch (ovcase) {
                case 0: /* no overlap */
                        if (needtolink) {
                                *prev = lock;
                                lock->lf_next = overlap;
                        }
                        break;

                case 1: /* overlap == lock */
                        /*
                         * If downgrading lock, others may be
                         * able to acquire it.
                         */
                        if (lock->lf_type == F_RDLCK &&
                            overlap->lf_type == F_WRLCK)
                                lf_wakelock(overlap);
                        overlap->lf_type = lock->lf_type;
                        lf_free(lock);
                        lock = overlap; /* for debug output below */
                        break;

                case 2: /* overlap contains lock */
                        /*
                         * Check for common starting point and different types.
                         */
                        if (overlap->lf_type == lock->lf_type) {
                                lf_free(lock);
                                lock = overlap; /* for debug output below */
                                break;
                        }
                        if (overlap->lf_start == lock->lf_start) {
                                *prev = lock;
                                lock->lf_next = overlap;
                                overlap->lf_start = lock->lf_end + 1;
                        } else
                                lf_split(overlap, lock, sparelock);
                        lf_wakelock(overlap);
                        break;

                case 3: /* lock contains overlap */
                        /*
                         * If downgrading lock, others may be able to
                         * acquire it, otherwise take the list.
                         */
                        if (lock->lf_type == F_RDLCK &&
                            overlap->lf_type == F_WRLCK) {
                                lf_wakelock(overlap);
                        } else {
                                while ((ltmp = TAILQ_FIRST(&overlap->lf_blkhd))) {
                                        KASSERT(ltmp->lf_next == overlap);
                                        TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
                                            lf_block);
                                        ltmp->lf_next = lock;
                                        TAILQ_INSERT_TAIL(&lock->lf_blkhd,
                                            ltmp, lf_block);
                                }
                        }
                        /*
                         * Add the new lock if necessary and delete the overlap.
                         */
                        if (needtolink) {
                                *prev = lock;
                                lock->lf_next = overlap->lf_next;
                                prev = &lock->lf_next;
                                needtolink = 0;
                        } else
                                *prev = overlap->lf_next;
                        lf_free(overlap);
                        continue;

                case 4: /* overlap starts before lock */
                        /*
                         * Add lock after overlap on the list.
                         */
                        lock->lf_next = overlap->lf_next;
                        overlap->lf_next = lock;
                        overlap->lf_end = lock->lf_start - 1;
                        prev = &lock->lf_next;
                        lf_wakelock(overlap);
                        needtolink = 0;
                        continue;

                case 5: /* overlap ends after lock */
                        /*
                         * Add the new lock before overlap.
                         */
                        if (needtolink) {
                                *prev = lock;
                                lock->lf_next = overlap;
                        }
                        overlap->lf_start = lock->lf_end + 1;
                        lf_wakelock(overlap);
                        break;
                }
                break;
        }
#ifdef LOCKF_DEBUG
        if (lockf_debug & 1) {
                lf_print("lf_setlock: got the lock", lock);
                lf_printlist("lf_setlock", lock);
        }
#endif /* LOCKF_DEBUG */
        return 0;
}

/*
 * Check whether there is a blocking lock,
 * and if so return its process identifier.
 */
static int
lf_getlock(struct lockf *lock, struct flock *fl)
{
        struct lockf *block;

#ifdef LOCKF_DEBUG
        if (lockf_debug & 1)
                lf_print("lf_getlock", lock);
#endif /* LOCKF_DEBUG */

        if ((block = lf_getblock(lock)) != NULL) {
                fl->l_type = block->lf_type;
                fl->l_whence = SEEK_SET;
                fl->l_start = block->lf_start;
                if (block->lf_end == -1)
                        fl->l_len = 0;
                else
                        fl->l_len = block->lf_end - block->lf_start + 1;
                if (block->lf_flags & F_POSIX)
                        fl->l_pid = ((struct proc *)block->lf_id)->p_pid;
                else
                        fl->l_pid = -1;
        } else {
                fl->l_type = F_UNLCK;
        }
        return 0;
}

/*
 * Do an advisory lock operation.
 */
int
lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size)
{
        struct flock *fl = ap->a_fl;
        struct lockf *lock = NULL;
        struct lockf *sparelock;
        kmutex_t *interlock = lockf_lock;
        off_t start, end;
        int error = 0;

        KASSERTMSG(size >= 0, "size=%jd", (intmax_t)size);

        /*
         * Convert the flock structure into a start and end.
         */
        switch (fl->l_whence) {
        case SEEK_SET:
        case SEEK_CUR:
                /*
                 * Caller is responsible for adding any necessary offset
                 * when SEEK_CUR is used.
                 */
                start = fl->l_start;
                break;

        case SEEK_END:
                if (fl->l_start > __type_max(off_t) - size)
                        return EINVAL;
                start = size + fl->l_start;
                break;

        default:
                return EINVAL;
        }

        if (fl->l_len == 0)
                end = -1;
        else {
                if (fl->l_len >= 0) {
                        if (start >= 0 &&
                            fl->l_len - 1 > __type_max(off_t) - start)
                                return EINVAL;
                        end = start + fl->l_len - 1;
                } else {
                        /* lockf() allows -ve lengths */
                        if (start < 0)
                                return EINVAL;
                        end = start - 1;
                        start += fl->l_len;
                }
        }
        if (start < 0)
                return EINVAL;

        /*
         * Allocate locks before acquiring the interlock.  We need two
         * locks in the worst case.
         */
        switch (ap->a_op) {
        case F_SETLK:
        case F_UNLCK:
                /*
                 * XXX For F_UNLCK case, we can re-use the lock.
                 */
                if ((ap->a_flags & F_FLOCK) == 0) {
                        /*
                         * Byte-range lock might need one more lock.
                         */
                        sparelock = lf_alloc(0);
                        if (sparelock == NULL) {
                                error = ENOMEM;
                                goto quit;
                        }
                        break;
                }
                /* FALLTHROUGH */

        case F_GETLK:
                sparelock = NULL;
                break;

        default:
                return EINVAL;
        }

        switch (ap->a_op) {
        case F_SETLK:
                lock = lf_alloc(1);
                break;
        case F_UNLCK:
                if (start == 0 || end == -1) {
                        /* never split */
                        lock = lf_alloc(0);
                } else {
                        /* might split */
                        lock = lf_alloc(2);
                }
                break;
        case F_GETLK:
                lock = lf_alloc(0);
                break;
        }
        if (lock == NULL) {
                error = ENOMEM;
                goto quit;
        }

        mutex_enter(interlock);

        /*
         * Avoid the common case of unlocking when inode has no locks.
         */
        if (*head == (struct lockf *)0) {
                if (ap->a_op != F_SETLK) {
                        fl->l_type = F_UNLCK;
                        error = 0;
                        goto quit_unlock;
                }
        }

        /*
         * Create the lockf structure.
         */
        lock->lf_start = start;
        lock->lf_end = end;
        lock->lf_head = head;
        lock->lf_type = fl->l_type;
        lock->lf_next = (struct lockf *)0;
        TAILQ_INIT(&lock->lf_blkhd);
        lock->lf_flags = ap->a_flags;
        if (lock->lf_flags & F_POSIX) {
                KASSERT(curproc == (struct proc *)ap->a_id);
        }
        lock->lf_id = ap->a_id;

        /*
         * Do the requested operation.
         */
        switch (ap->a_op) {

        case F_SETLK:
                error = lf_setlock(lock, &sparelock, interlock);
                lock = NULL; /* lf_setlock freed it */
                break;

        case F_UNLCK:
                error = lf_clearlock(lock, &sparelock);
                break;

        case F_GETLK:
                error = lf_getlock(lock, fl);
                break;

        default:
                break;
                /* NOTREACHED */
        }

quit_unlock:
        mutex_exit(interlock);
quit:
        if (lock)
                lf_free(lock);
        if (sparelock)
                lf_free(sparelock);

        return error;
}

/*
 * Initialize subsystem.   XXX We use a global lock.  This could be the
 * vnode interlock, but the deadlock detection code may need to inspect
 * locks belonging to other files.
 */
void
lf_init(void)
{

        lockf_cache = pool_cache_init(sizeof(struct lockf), 0, 0, 0, "lockf",
             NULL, IPL_NONE, lf_ctor, lf_dtor, NULL);
        lockf_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
}












































































   26 























   11 


   11 


   11 


























   11 












   12 
   12 




































  157 




























  155 

































  148 


  149 
















  149 


















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
/*        $NetBSD: subr_localcount.c,v 1.7 2017/11/17 09:26:36 ozaki-r Exp $        */

/*-
 * Copyright (c) 2016 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * CPU-local reference counts
 *
 *        localcount(9) is a reference-counting scheme that involves no
 *        interprocessor synchronization most of the time, at the cost of
 *        eight bytes of memory per CPU per object and at the cost of
 *        expensive interprocessor synchronization to drain references.
 *
 *        localcount(9) references may be held across sleeps, may be
 *        transferred from CPU to CPU or thread to thread: they behave
 *        semantically like typical reference counts, with different
 *        pragmatic performance characteristics.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_localcount.c,v 1.7 2017/11/17 09:26:36 ozaki-r Exp $");

#include <sys/param.h>
#include <sys/localcount.h>
#include <sys/types.h>
#include <sys/condvar.h>
#include <sys/errno.h>
#include <sys/mutex.h>
#include <sys/percpu.h>
#include <sys/xcall.h>
#if defined(DEBUG) && defined(LOCKDEBUG)
#include <sys/atomic.h>
#endif

static void localcount_xc(void *, void *);

/*
 * localcount_init(lc)
 *
 *        Initialize a localcount object.  Returns 0 on success, error
 *        code on failure.  May fail to allocate memory for percpu(9).
 *
 *        The caller must call localcount_drain and then localcount_fini
 *        when done with lc.
 */
void
localcount_init(struct localcount *lc)
{

        lc->lc_totalp = NULL;
        lc->lc_percpu = percpu_alloc(sizeof(int64_t));
}

/*
 * localcount_drain(lc, cv, interlock)
 *
 *        Wait for all acquired references to lc to drain.  Caller must
 *        hold interlock; localcount_drain releases it during cross-calls
 *        and waits on cv.  The cv and interlock passed here must be the
 *        same as are passed to localcount_release for this lc.
 *
 *        Caller must guarantee that no new references can be acquired
 *        with localcount_acquire before calling localcount_drain.  For
 *        example, any object that may be found in a list and acquired
 *        must be removed from the list before localcount_drain.
 *
 *        The localcount object lc may be used only with localcount_fini
 *        after this, unless reinitialized after localcount_fini with
 *        localcount_init.
 */
void
localcount_drain(struct localcount *lc, kcondvar_t *cv, kmutex_t *interlock)
{
        int64_t total = 0;

        KASSERT(mutex_owned(interlock));
        KASSERT(lc->lc_totalp == NULL);

        /* Mark it draining.  */
        lc->lc_totalp = &total;

        /*
         * Count up all references on all CPUs.
         *
         * This serves as a global memory barrier: after xc_wait, all
         * CPUs will have witnessed the nonnull value of lc->lc_totalp,
         * so that it is safe to wait on the cv for them.
         */
        mutex_exit(interlock);
        xc_wait(xc_broadcast(0, &localcount_xc, lc, interlock));
        mutex_enter(interlock);

        /* Wait for remaining references to drain.  */
        while (total != 0) {
                /*
                 * At this point, now that we have added up all
                 * references on all CPUs, the total had better be
                 * nonnegative.
                 */
                KASSERTMSG((0 < total),
                    "negatively referenced localcount: %p, %"PRId64,
                    lc, total);
                cv_wait(cv, interlock);
        }

        /* Paranoia: Cause any further use of lc->lc_totalp to crash.  */
        lc->lc_totalp = (void *)(uintptr_t)1;
}

/*
 * localcount_fini(lc)
 *
 *        Finalize a localcount object, releasing any memory allocated
 *        for it.  The localcount object must already have been drained.
 */
void
localcount_fini(struct localcount *lc)
{

        KASSERT(lc->lc_totalp == (void *)(uintptr_t)1);
        percpu_free(lc->lc_percpu, sizeof(uint64_t));
}

/*
 * localcount_xc(cookie0, cookie1)
 *
 *        Accumulate and transfer the per-CPU reference counts to a
 *        global total, resetting the per-CPU counter to zero.  Once
 *        localcount_drain() has started, we only maintain the total
 *        count in localcount_release().
 */
static void
localcount_xc(void *cookie0, void *cookie1)
{
        struct localcount *lc = cookie0;
        kmutex_t *interlock = cookie1;
        int64_t *localp;

        mutex_enter(interlock);
        localp = percpu_getref(lc->lc_percpu);
        *lc->lc_totalp += *localp;
        *localp -= *localp;                /* ie, *localp = 0; */
        percpu_putref(lc->lc_percpu);
        mutex_exit(interlock);
}

/*
 * localcount_adjust(lc, delta)
 *
 *        Add delta -- positive or negative -- to the local CPU's count
 *        for lc.
 */
static void
localcount_adjust(struct localcount *lc, int delta)
{
        int64_t *localp;

        localp = percpu_getref(lc->lc_percpu);
        *localp += delta;
        percpu_putref(lc->lc_percpu);
}

/*
 * localcount_acquire(lc)
 *
 *        Acquire a reference to lc.
 *
 *        The reference may be held across sleeps and may be migrated
 *        from CPU to CPU, or even thread to thread -- it is only
 *        counted, not associated with a particular concrete owner.
 *
 *        Involves no interprocessor synchronization.  May be used in any
 *        context: while a lock is held, within a pserialize(9) read
 *        section, in hard interrupt context (provided other users block
 *        hard interrupts), in soft interrupt context, in thread context,
 *        &c.
 *
 *        Caller must guarantee that there is no concurrent
 *        localcount_drain.  For example, any object that may be found in
 *        a list and acquired must be removed from the list before
 *        localcount_drain.
 */
void
localcount_acquire(struct localcount *lc)
{

        KASSERT(lc->lc_totalp == NULL);
        localcount_adjust(lc, +1);
#if defined(DEBUG) && defined(LOCKDEBUG)
        if (atomic_inc_32_nv(&lc->lc_refcnt) == 0)
                panic("counter overflow");
#endif
}

/*
 * localcount_release(lc, cv, interlock)
 *
 *        Release a reference to lc.  If there is a concurrent
 *        localcount_drain and this may be the last reference, notify
 *        localcount_drain by acquiring interlock, waking cv, and
 *        releasing interlock.  The cv and interlock passed here must be
 *        the same as are passed to localcount_drain for this lc.
 *
 *        Involves no interprocessor synchronization unless there is a
 *        concurrent localcount_drain in progress.
 */
void
localcount_release(struct localcount *lc, kcondvar_t *cv, kmutex_t *interlock)
{

        /*
         * Block xcall so that if someone begins draining after we see
         * lc->lc_totalp as null, then they won't start cv_wait until
         * after they have counted this CPU's contributions.
         *
         * Otherwise, localcount_drain may notice an extant reference
         * from this CPU and cv_wait for it, but having seen
         * lc->lc_totalp as null, this CPU will not wake
         * localcount_drain.
         */
        kpreempt_disable();

        KDASSERT(mutex_ownable(interlock));
        if (__predict_false(lc->lc_totalp != NULL)) {
                /*
                 * Slow path -- wake localcount_drain in case this is
                 * the last reference.
                 */
                mutex_enter(interlock);
                if (--*lc->lc_totalp == 0)
                        cv_broadcast(cv);
                mutex_exit(interlock);
                goto out;
        }

        localcount_adjust(lc, -1);
#if defined(DEBUG) && defined(LOCKDEBUG)
        if (atomic_dec_32_nv(&lc->lc_refcnt) == UINT_MAX)
                panic("counter underflow");
#endif
 out:        kpreempt_enable();
}

/*
 * localcount_debug_refcnt(lc)
 *
 *        Return a total reference count of lc.  It returns a correct value
 *        only if DEBUG and LOCKDEBUG enabled.  Otherwise always return 0.
 */
uint32_t
localcount_debug_refcnt(const struct localcount *lc)
{

#if defined(DEBUG) && defined(LOCKDEBUG)
        return lc->lc_refcnt;
#else
        return 0;
#endif
}




























































  172 
  170 


  171 

  172 

  169 





   42 
   43 



  172 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
/*        $NetBSD: strlcpy.c,v 1.3 2007/06/04 18:19:27 christos Exp $        */
/*        $OpenBSD: strlcpy.c,v 1.7 2003/04/12 21:56:39 millert Exp $        */

/*
 * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND TODD C. MILLER DISCLAIMS ALL
 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL TODD C. MILLER BE LIABLE
 * FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#if !defined(_KERNEL) && !defined(_STANDALONE)
#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif

#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
__RCSID("$NetBSD: strlcpy.c,v 1.3 2007/06/04 18:19:27 christos Exp $");
#endif /* LIBC_SCCS and not lint */

#ifdef _LIBC
#include "namespace.h"
#endif
#include <sys/types.h>
#include <assert.h>
#include <string.h>

#ifdef _LIBC
# ifdef __weak_alias
__weak_alias(strlcpy, _strlcpy)
# endif
#endif
#else
#include <lib/libkern/libkern.h>
#endif /* !_KERNEL && !_STANDALONE */


#if !HAVE_STRLCPY
/*
 * Copy src to string dst of size siz.  At most siz-1 characters
 * will be copied.  Always NUL terminates (unless siz == 0).
 * Returns strlen(src); if retval >= siz, truncation occurred.
 */
size_t
strlcpy(char *dst, const char *src, size_t siz)
{
        char *d = dst;
        const char *s = src;
        size_t n = siz;

        _DIAGASSERT(dst != NULL);
        _DIAGASSERT(src != NULL);

        /* Copy as many bytes as will fit */
        if (n != 0 && --n != 0) {
                do {
                        if ((*d++ = *s++) == 0)
                                break;
                } while (--n != 0);
        }

        /* Not enough room in dst, add NUL and traverse rest of src */
        if (n == 0) {
                if (siz != 0)
                        *d = '\0';                /* NUL-terminate dst */
                while (*s++)
                        ;
        }

        return(s - src - 1);        /* count does not include NUL */
}
#endif





































































































































































































































































































































































































  439 
  463 




  463 
  463 




  463 
  463 



















































































  462 

























   24 
















































































































































































































































































































































































































  464 





  462 
  462 






















































  464 








  460 










  463 


  463 

  462 





  462 

























  463 
















  463 
  463 


  463 












  461 





  461 


  459 


   27 
  461 

  461 









  439 




  439 
  439 


  439 


  439 
  439 














  462 


  462 



  461 
  463 



  463 



  463 
  463 













  463 









  439 

  439 
  439 


  463 








































































































































































































  462 

























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
/*        $NetBSD: virtio.c,v 1.58 2022/08/14 10:06:54 riastradh Exp $        */

/*
 * Copyright (c) 2020 The NetBSD Foundation, Inc.
 * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
 * Copyright (c) 2010 Minoura Makoto.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: virtio.c,v 1.58 2022/08/14 10:06:54 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/atomic.h>
#include <sys/bus.h>
#include <sys/device.h>
#include <sys/kmem.h>
#include <sys/module.h>

#define VIRTIO_PRIVATE

#include <dev/pci/virtioreg.h> /* XXX: move to non-pci */
#include <dev/pci/virtiovar.h> /* XXX: move to non-pci */

#define MINSEG_INDIRECT                2 /* use indirect if nsegs >= this value */

/* incomplete list */
static const char *virtio_device_name[] = {
        "unknown (0)",                        /*  0 */
        "network",                        /*  1 */
        "block",                        /*  2 */
        "console",                        /*  3 */
        "entropy",                        /*  4 */
        "memory balloon",                /*  5 */
        "I/O memory",                        /*  6 */
        "remote processor messaging",        /*  7 */
        "SCSI",                                /*  8 */
        "9P transport",                        /*  9 */
};
#define NDEVNAMES        __arraycount(virtio_device_name)

static void        virtio_init_vq(struct virtio_softc *,
                    struct virtqueue *, const bool);

void
virtio_set_status(struct virtio_softc *sc, int status)
{
        sc->sc_ops->set_status(sc, status);
}

/*
 * Reset the device.
 */
/*
 * To reset the device to a known state, do following:
 *        virtio_reset(sc);             // this will stop the device activity
 *        <dequeue finished requests>; // virtio_dequeue() still can be called
 *        <revoke pending requests in the vqs if any>;
 *        virtio_reinit_start(sc);     // dequeue prohibitted
 *        newfeatures = virtio_negotiate_features(sc, requestedfeatures);
 *        <some other initialization>;
 *        virtio_reinit_end(sc);             // device activated; enqueue allowed
 * Once attached, feature negotiation can only be allowed after virtio_reset.
 */
void
virtio_reset(struct virtio_softc *sc)
{
        virtio_device_reset(sc);
}

int
virtio_reinit_start(struct virtio_softc *sc)
{
        int i, r;

        virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
        virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
        for (i = 0; i < sc->sc_nvqs; i++) {
                int n;
                struct virtqueue *vq = &sc->sc_vqs[i];
                n = sc->sc_ops->read_queue_size(sc, vq->vq_index);
                if (n == 0)        /* vq disappeared */
                        continue;
                if (n != vq->vq_num) {
                        panic("%s: virtqueue size changed, vq index %d\n",
                              device_xname(sc->sc_dev),
                              vq->vq_index);
                }
                virtio_init_vq(sc, vq, true);
                sc->sc_ops->setup_queue(sc, vq->vq_index,
                    vq->vq_dmamap->dm_segs[0].ds_addr);
        }

        r = sc->sc_ops->setup_interrupts(sc, 1);
        if (r != 0)
                goto fail;

        return 0;

fail:
        virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);

        return 1;
}

void
virtio_reinit_end(struct virtio_softc *sc)
{
        virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
}

/*
 * Feature negotiation.
 */
void
virtio_negotiate_features(struct virtio_softc *sc, uint64_t guest_features)
{
        if (!(device_cfdata(sc->sc_dev)->cf_flags & 1) &&
            !(device_cfdata(sc->sc_child)->cf_flags & 1)) /* XXX */
                guest_features |= VIRTIO_F_RING_INDIRECT_DESC;
        sc->sc_ops->neg_features(sc, guest_features);
        if (sc->sc_active_features & VIRTIO_F_RING_INDIRECT_DESC)
                sc->sc_indirect = true;
        else
                sc->sc_indirect = false;
}


/*
 * Device configuration registers readers/writers
 */
#if 0
#define DPRINTFR(n, fmt, val, index, num) \
        printf("\n%s (", n); \
        for (int i = 0; i < num; i++) \
                printf("%02x ", bus_space_read_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index+i)); \
        printf(") -> "); printf(fmt, val); printf("\n");
#define DPRINTFR2(n, fmt, val_s, val_n) \
        printf("%s ", n); \
        printf("\n        stream "); printf(fmt, val_s); printf(" norm "); printf(fmt, val_n); printf("\n");
#else
#define DPRINTFR(n, fmt, val, index, num)
#define DPRINTFR2(n, fmt, val_s, val_n)
#endif


uint8_t
virtio_read_device_config_1(struct virtio_softc *sc, int index) {
        bus_space_tag_t           iot = sc->sc_devcfg_iot;
        bus_space_handle_t ioh = sc->sc_devcfg_ioh;
        uint8_t val;

        val = bus_space_read_1(iot, ioh, index);

        DPRINTFR("read_1", "%02x", val, index, 1);
        return val;
}

uint16_t
virtio_read_device_config_2(struct virtio_softc *sc, int index) {
        bus_space_tag_t           iot = sc->sc_devcfg_iot;
        bus_space_handle_t ioh = sc->sc_devcfg_ioh;
        uint16_t val;

        val = bus_space_read_2(iot, ioh, index);
        if (BYTE_ORDER != sc->sc_bus_endian)
                val = bswap16(val);

        DPRINTFR("read_2", "%04x", val, index, 2);
        DPRINTFR2("read_2", "%04x",
                bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index),
                bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
        return val;
}

uint32_t
virtio_read_device_config_4(struct virtio_softc *sc, int index) {
        bus_space_tag_t           iot = sc->sc_devcfg_iot;
        bus_space_handle_t ioh = sc->sc_devcfg_ioh;
        uint32_t val;

        val = bus_space_read_4(iot, ioh, index);
        if (BYTE_ORDER != sc->sc_bus_endian)
                val = bswap32(val);

        DPRINTFR("read_4", "%08x", val, index, 4);
        DPRINTFR2("read_4", "%08x",
                bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index),
                bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
        return val;
}

/*
 * The Virtio spec explicitly tells that reading and writing 8 bytes are not
 * considered atomic and no triggers may be connected to reading or writing
 * it. We access it using two 32 reads. See virtio spec 4.1.3.1.
 */
uint64_t
virtio_read_device_config_8(struct virtio_softc *sc, int index) {
        bus_space_tag_t           iot = sc->sc_devcfg_iot;
        bus_space_handle_t ioh = sc->sc_devcfg_ioh;
        union {
                uint64_t u64;
                uint32_t l[2];
        } v;
        uint64_t val;

        v.l[0] = bus_space_read_4(iot, ioh, index);
        v.l[1] = bus_space_read_4(iot, ioh, index + 4);
        if (sc->sc_bus_endian != sc->sc_struct_endian) {
                v.l[0] = bswap32(v.l[0]);
                v.l[1] = bswap32(v.l[1]);
        }
        val = v.u64;

        if (BYTE_ORDER != sc->sc_struct_endian)
                val = bswap64(val);

        DPRINTFR("read_8", "%08lx", val, index, 8);
        DPRINTFR2("read_8 low ", "%08x",
                bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index),
                bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
        DPRINTFR2("read_8 high ", "%08x",
                bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index + 4),
                bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index + 4));
        return val;
}

/*
 * In the older virtio spec, device config registers are host endian. On newer
 * they are little endian. Some newer devices however explicitly specify their
 * register to always be little endian. These functions cater for these.
 */
uint16_t
virtio_read_device_config_le_2(struct virtio_softc *sc, int index) {
        bus_space_tag_t           iot = sc->sc_devcfg_iot;
        bus_space_handle_t ioh = sc->sc_devcfg_ioh;
        uint16_t val;

        val = bus_space_read_2(iot, ioh, index);
        if (sc->sc_bus_endian != LITTLE_ENDIAN)
                val = bswap16(val);

        DPRINTFR("read_le_2", "%04x", val, index, 2);
        DPRINTFR2("read_le_2", "%04x",
                bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
                bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
        return val;
}

uint32_t
virtio_read_device_config_le_4(struct virtio_softc *sc, int index) {
        bus_space_tag_t           iot = sc->sc_devcfg_iot;
        bus_space_handle_t ioh = sc->sc_devcfg_ioh;
        uint32_t val;

        val = bus_space_read_4(iot, ioh, index);
        if (sc->sc_bus_endian != LITTLE_ENDIAN)
                val = bswap32(val);

        DPRINTFR("read_le_4", "%08x", val, index, 4);
        DPRINTFR2("read_le_4", "%08x",
                bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
                bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
        return val;
}

void
virtio_write_device_config_1(struct virtio_softc *sc, int index, uint8_t value)
{
        bus_space_tag_t           iot = sc->sc_devcfg_iot;
        bus_space_handle_t ioh = sc->sc_devcfg_ioh;

        bus_space_write_1(iot, ioh, index, value);
}

void
virtio_write_device_config_2(struct virtio_softc *sc, int index, uint16_t value)
{
        bus_space_tag_t           iot = sc->sc_devcfg_iot;
        bus_space_handle_t ioh = sc->sc_devcfg_ioh;

        if (BYTE_ORDER != sc->sc_bus_endian)
                value = bswap16(value);
        bus_space_write_2(iot, ioh, index, value);
}

void
virtio_write_device_config_4(struct virtio_softc *sc, int index, uint32_t value)
{
        bus_space_tag_t           iot = sc->sc_devcfg_iot;
        bus_space_handle_t ioh = sc->sc_devcfg_ioh;

        if (BYTE_ORDER != sc->sc_bus_endian)
                value = bswap32(value);
        bus_space_write_4(iot, ioh, index, value);
}

/*
 * The Virtio spec explicitly tells that reading and writing 8 bytes are not
 * considered atomic and no triggers may be connected to reading or writing
 * it. We access it using two 32 bit writes. For good measure it is stated to
 * always write lsb first just in case of a hypervisor bug. See See virtio
 * spec 4.1.3.1.
 */
void
virtio_write_device_config_8(struct virtio_softc *sc, int index, uint64_t value)
{
        bus_space_tag_t           iot = sc->sc_devcfg_iot;
        bus_space_handle_t ioh = sc->sc_devcfg_ioh;
        union {
                uint64_t u64;
                uint32_t l[2];
        } v;

        if (BYTE_ORDER != sc->sc_struct_endian)
                value = bswap64(value);

        v.u64 = value;
        if (sc->sc_bus_endian != sc->sc_struct_endian) {
                v.l[0] = bswap32(v.l[0]);
                v.l[1] = bswap32(v.l[1]);
        }

        if (sc->sc_struct_endian == LITTLE_ENDIAN) {
                bus_space_write_4(iot, ioh, index,     v.l[0]);
                bus_space_write_4(iot, ioh, index + 4, v.l[1]);
        } else {
                bus_space_write_4(iot, ioh, index + 4, v.l[1]);
                bus_space_write_4(iot, ioh, index,     v.l[0]);
        }
}

/*
 * In the older virtio spec, device config registers are host endian. On newer
 * they are little endian. Some newer devices however explicitly specify their
 * register to always be little endian. These functions cater for these.
 */
void
virtio_write_device_config_le_2(struct virtio_softc *sc, int index, uint16_t value)
{
        bus_space_tag_t           iot = sc->sc_devcfg_iot;
        bus_space_handle_t ioh = sc->sc_devcfg_ioh;

        if (sc->sc_bus_endian != LITTLE_ENDIAN)
                value = bswap16(value);
        bus_space_write_2(iot, ioh, index, value);
}

void
virtio_write_device_config_le_4(struct virtio_softc *sc, int index, uint32_t value)
{
        bus_space_tag_t           iot = sc->sc_devcfg_iot;
        bus_space_handle_t ioh = sc->sc_devcfg_ioh;

        if (sc->sc_bus_endian != LITTLE_ENDIAN)
                value = bswap32(value);
        bus_space_write_4(iot, ioh, index, value);
}


/*
 * data structures endian helpers
 */
uint16_t virtio_rw16(struct virtio_softc *sc, uint16_t val)
{
        KASSERT(sc);
        return BYTE_ORDER != sc->sc_struct_endian ? bswap16(val) : val;
}

uint32_t virtio_rw32(struct virtio_softc *sc, uint32_t val)
{
        KASSERT(sc);
        return BYTE_ORDER != sc->sc_struct_endian ? bswap32(val) : val;
}

uint64_t virtio_rw64(struct virtio_softc *sc, uint64_t val)
{
        KASSERT(sc);
        return BYTE_ORDER != sc->sc_struct_endian ? bswap64(val) : val;
}


/*
 * Interrupt handler.
 */
static void
virtio_soft_intr(void *arg)
{
        struct virtio_softc *sc = arg;

        KASSERT(sc->sc_intrhand != NULL);

        (*sc->sc_intrhand)(sc);
}

/*
 * dmamap sync operations for a virtqueue.
 */
static inline void
vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
{

        /* availoffset == sizeof(vring_desc)*vq_num */
        bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
            ops);
}

static inline void
vq_sync_aring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
{
        uint16_t hdrlen = offsetof(struct vring_avail, ring);
        size_t payloadlen = sc->sc_nvqs * sizeof(uint16_t);
        size_t usedlen = 0;

        if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
                usedlen = sizeof(uint16_t);
        bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
            vq->vq_availoffset, hdrlen + payloadlen + usedlen, ops);
}

static inline void
vq_sync_aring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
{
        uint16_t hdrlen = offsetof(struct vring_avail, ring);

        bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
            vq->vq_availoffset, hdrlen, ops);
}

static inline void
vq_sync_aring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
{
        uint16_t hdrlen = offsetof(struct vring_avail, ring);
        size_t payloadlen = sc->sc_nvqs * sizeof(uint16_t);

        bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
            vq->vq_availoffset + hdrlen, payloadlen, ops);
}

static inline void
vq_sync_aring_used(struct virtio_softc *sc, struct virtqueue *vq, int ops)
{
        uint16_t hdrlen = offsetof(struct vring_avail, ring);
        size_t payloadlen = sc->sc_nvqs * sizeof(uint16_t);
        size_t usedlen = sizeof(uint16_t);

        if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
                return;
        bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
            vq->vq_availoffset + hdrlen + payloadlen, usedlen, ops);
}

static inline void
vq_sync_uring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
{
        uint16_t hdrlen = offsetof(struct vring_used, ring);
        size_t payloadlen = sc->sc_nvqs * sizeof(struct vring_used_elem);
        size_t availlen = 0;

        if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
                availlen = sizeof(uint16_t);
        bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
            vq->vq_usedoffset, hdrlen + payloadlen + availlen, ops);
}

static inline void
vq_sync_uring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
{
        uint16_t hdrlen = offsetof(struct vring_used, ring);

        bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
            vq->vq_usedoffset, hdrlen, ops);
}

static inline void
vq_sync_uring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
{
        uint16_t hdrlen = offsetof(struct vring_used, ring);
        size_t payloadlen = sc->sc_nvqs * sizeof(struct vring_used_elem);

        bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
            vq->vq_usedoffset + hdrlen, payloadlen, ops);
}

static inline void
vq_sync_uring_avail(struct virtio_softc *sc, struct virtqueue *vq, int ops)
{
        uint16_t hdrlen = offsetof(struct vring_used, ring);
        size_t payloadlen = sc->sc_nvqs * sizeof(struct vring_used_elem);
        size_t availlen = sizeof(uint16_t);

        if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
                return;
        bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
            vq->vq_usedoffset + hdrlen + payloadlen, availlen, ops);
}

static inline void
vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
    int ops)
{
        int offset = vq->vq_indirectoffset +
            sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;

        bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
            offset, sizeof(struct vring_desc) * vq->vq_maxnsegs, ops);
}

bool
virtio_vq_is_enqueued(struct virtio_softc *sc, struct virtqueue *vq)
{

        if (vq->vq_queued) {
                vq->vq_queued = 0;
                vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
        }

        vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
        if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
                return 0;
        vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
        return 1;
}

/*
 * Scan vq, bus_dmamap_sync for the vqs (not for the payload),
 * and calls (*vq_done)() if some entries are consumed.
 *
 * Can be used as sc_intrhand.
 */
int
virtio_vq_intr(struct virtio_softc *sc)
{
        struct virtqueue *vq;
        int i, r = 0;

        for (i = 0; i < sc->sc_nvqs; i++) {
                vq = &sc->sc_vqs[i];
                if (virtio_vq_is_enqueued(sc, vq) == 1) {
                        if (vq->vq_done)
                                r |= (*vq->vq_done)(vq);
                }
        }

        return r;
}

int
virtio_vq_intrhand(struct virtio_softc *sc)
{
        struct virtqueue *vq;
        int i, r = 0;

        for (i = 0; i < sc->sc_nvqs; i++) {
                vq = &sc->sc_vqs[i];
                r |= (*vq->vq_intrhand)(vq->vq_intrhand_arg);
        }

        return r;
}


/*
 * Increase the event index in order to delay interrupts.
 */
int
virtio_postpone_intr(struct virtio_softc *sc, struct virtqueue *vq,
                uint16_t nslots)
{
        uint16_t        idx, nused;

        idx = vq->vq_used_idx + nslots;

        /* set the new event index: avail_ring->used_event = idx */
        *vq->vq_used_event = virtio_rw16(sc, idx);
        vq_sync_aring_used(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
        vq->vq_queued++;

        nused = (uint16_t)
                (virtio_rw16(sc, vq->vq_used->idx) - vq->vq_used_idx);
        KASSERT(nused <= vq->vq_num);

        return nslots < nused;
}

/*
 * Postpone interrupt until 3/4 of the available descriptors have been
 * consumed.
 */
int
virtio_postpone_intr_smart(struct virtio_softc *sc, struct virtqueue *vq)
{
        uint16_t        nslots;

        nslots = (uint16_t)
                (virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx) * 3 / 4;

        return virtio_postpone_intr(sc, vq, nslots);
}

/*
 * Postpone interrupt until all of the available descriptors have been
 * consumed.
 */
int
virtio_postpone_intr_far(struct virtio_softc *sc, struct virtqueue *vq)
{
        uint16_t        nslots;

        nslots = (uint16_t)
                (virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx);

        return virtio_postpone_intr(sc, vq, nslots);
}

/*
 * Start/stop vq interrupt.  No guarantee.
 */
void
virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
{

        if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
                /*
                 * No way to disable the interrupt completely with
                 * RingEventIdx. Instead advance used_event by half the
                 * possible value. This won't happen soon and is far enough in
                 * the past to not trigger a spurios interrupt.
                 */
                *vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx + 0x8000);
                vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
        } else {
                vq->vq_avail->flags |=
                    virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
                vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
        }
        vq->vq_queued++;
}

int
virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
{

        if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
                /*
                 * If event index feature is negotiated, enabling interrupts
                 * is done through setting the latest consumed index in the
                 * used_event field
                 */
                *vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx);
                vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
        } else {
                vq->vq_avail->flags &=
                    ~virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
                vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
        }
        vq->vq_queued++;

        vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
        if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
                return 0;
        vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
        return 1;
}

/*
 * Initialize vq structure.
 */
static void
virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq,
    const bool reinit)
{
        int i, j;
        int vq_size = vq->vq_num;

        memset(vq->vq_vaddr, 0, vq->vq_bytesize);

        /* build the indirect descriptor chain */
        if (vq->vq_indirect != NULL) {
                struct vring_desc *vd;

                for (i = 0; i < vq_size; i++) {
                        vd = vq->vq_indirect;
                        vd += vq->vq_maxnsegs * i;
                        for (j = 0; j < vq->vq_maxnsegs-1; j++) {
                                vd[j].next = virtio_rw16(sc, j + 1);
                        }
                }
        }

        /* free slot management */
        SIMPLEQ_INIT(&vq->vq_freelist);
        for (i = 0; i < vq_size; i++) {
                SIMPLEQ_INSERT_TAIL(&vq->vq_freelist,
                                    &vq->vq_entries[i], qe_list);
                vq->vq_entries[i].qe_index = i;
        }
        if (!reinit)
                mutex_init(&vq->vq_freelist_lock, MUTEX_SPIN, sc->sc_ipl);

        /* enqueue/dequeue status */
        vq->vq_avail_idx = 0;
        vq->vq_used_idx = 0;
        vq->vq_queued = 0;
        if (!reinit) {
                mutex_init(&vq->vq_aring_lock, MUTEX_SPIN, sc->sc_ipl);
                mutex_init(&vq->vq_uring_lock, MUTEX_SPIN, sc->sc_ipl);
        }
        vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
        vq->vq_queued++;
}

/*
 * Allocate/free a vq.
 */
int
virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, int index,
    int maxsegsize, int maxnsegs, const char *name)
{
        int vq_size, allocsize1, allocsize2, allocsize3, allocsize = 0;
        int rsegs, r, hdrlen;
#define VIRTQUEUE_ALIGN(n)        (((n)+(VIRTIO_PAGE_SIZE-1))&        \
                                 ~(VIRTIO_PAGE_SIZE-1))

        /* Make sure callers allocate vqs in order */
        KASSERT(sc->sc_nvqs == index);

        memset(vq, 0, sizeof(*vq));

        vq_size = sc->sc_ops->read_queue_size(sc, index);
        if (vq_size == 0) {
                aprint_error_dev(sc->sc_dev,
                                 "virtqueue not exist, index %d for %s\n",
                                 index, name);
                goto err;
        }

        hdrlen = sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX ? 3 : 2;

        /* allocsize1: descriptor table + avail ring + pad */
        allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc)*vq_size
                             + sizeof(uint16_t)*(hdrlen + vq_size));
        /* allocsize2: used ring + pad */
        allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * hdrlen
                             + sizeof(struct vring_used_elem)*vq_size);
        /* allocsize3: indirect table */
        if (sc->sc_indirect && maxnsegs >= MINSEG_INDIRECT)
                allocsize3 = sizeof(struct vring_desc) * maxnsegs * vq_size;
        else
                allocsize3 = 0;
        allocsize = allocsize1 + allocsize2 + allocsize3;

        /* alloc and map the memory */
        r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
                             &vq->vq_segs[0], 1, &rsegs, BUS_DMA_WAITOK);
        if (r != 0) {
                aprint_error_dev(sc->sc_dev,
                                 "virtqueue %d for %s allocation failed, "
                                 "error code %d\n", index, name, r);
                goto err;
        }
        r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], rsegs, allocsize,
                           &vq->vq_vaddr, BUS_DMA_WAITOK);
        if (r != 0) {
                aprint_error_dev(sc->sc_dev,
                                 "virtqueue %d for %s map failed, "
                                 "error code %d\n", index, name, r);
                goto err;
        }
        r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
                              BUS_DMA_WAITOK, &vq->vq_dmamap);
        if (r != 0) {
                aprint_error_dev(sc->sc_dev,
                                 "virtqueue %d for %s dmamap creation failed, "
                                 "error code %d\n", index, name, r);
                goto err;
        }
        r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap,
                            vq->vq_vaddr, allocsize, NULL, BUS_DMA_WAITOK);
        if (r != 0) {
                aprint_error_dev(sc->sc_dev,
                                 "virtqueue %d for %s dmamap load failed, "
                                 "error code %d\n", index, name, r);
                goto err;
        }

        /* remember addresses and offsets for later use */
        vq->vq_owner = sc;
        vq->vq_num = vq_size;
        vq->vq_index = index;
        vq->vq_desc = vq->vq_vaddr;
        vq->vq_availoffset = sizeof(struct vring_desc)*vq_size;
        vq->vq_avail = (void*)(((char*)vq->vq_desc) + vq->vq_availoffset);
        vq->vq_used_event = (uint16_t *) ((char *)vq->vq_avail +
                 offsetof(struct vring_avail, ring[vq->vq_num]));
        vq->vq_usedoffset = allocsize1;
        vq->vq_used = (void*)(((char*)vq->vq_desc) + vq->vq_usedoffset);
        vq->vq_avail_event = (uint16_t *)((char *)vq->vq_used +
                 offsetof(struct vring_used, ring[vq->vq_num]));

        if (allocsize3 > 0) {
                vq->vq_indirectoffset = allocsize1 + allocsize2;
                vq->vq_indirect = (void*)(((char*)vq->vq_desc)
                                          + vq->vq_indirectoffset);
        }
        vq->vq_bytesize = allocsize;
        vq->vq_maxsegsize = maxsegsize;
        vq->vq_maxnsegs = maxnsegs;

        /* free slot management */
        vq->vq_entries = kmem_zalloc(sizeof(struct vq_entry)*vq_size,
                                     KM_SLEEP);
        virtio_init_vq(sc, vq, false);

        /* set the vq address */
        sc->sc_ops->setup_queue(sc, index,
            vq->vq_dmamap->dm_segs[0].ds_addr);

        aprint_verbose_dev(sc->sc_dev,
                           "allocated %u byte for virtqueue %d for %s, "
                           "size %d\n", allocsize, index, name, vq_size);
        if (allocsize3 > 0)
                aprint_verbose_dev(sc->sc_dev,
                                   "using %d byte (%d entries) "
                                   "indirect descriptors\n",
                                   allocsize3, maxnsegs * vq_size);

        sc->sc_nvqs++;

        return 0;

err:
        sc->sc_ops->setup_queue(sc, index, 0);
        if (vq->vq_dmamap)
                bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
        if (vq->vq_vaddr)
                bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
        if (vq->vq_segs[0].ds_addr)
                bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
        memset(vq, 0, sizeof(*vq));

        return -1;
}

int
virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
{
        struct vq_entry *qe;
        int i = 0;

        /* device must be already deactivated */
        /* confirm the vq is empty */
        SIMPLEQ_FOREACH(qe, &vq->vq_freelist, qe_list) {
                i++;
        }
        if (i != vq->vq_num) {
                printf("%s: freeing non-empty vq, index %d\n",
                       device_xname(sc->sc_dev), vq->vq_index);
                return EBUSY;
        }

        /* tell device that there's no virtqueue any longer */
        sc->sc_ops->setup_queue(sc, vq->vq_index, 0);

        vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);

        kmem_free(vq->vq_entries, sizeof(*vq->vq_entries) * vq->vq_num);
        bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
        bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
        bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
        bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
        mutex_destroy(&vq->vq_freelist_lock);
        mutex_destroy(&vq->vq_uring_lock);
        mutex_destroy(&vq->vq_aring_lock);
        memset(vq, 0, sizeof(*vq));

        sc->sc_nvqs--;

        return 0;
}

/*
 * Free descriptor management.
 */
static struct vq_entry *
vq_alloc_entry(struct virtqueue *vq)
{
        struct vq_entry *qe;

        mutex_enter(&vq->vq_freelist_lock);
        if (SIMPLEQ_EMPTY(&vq->vq_freelist)) {
                mutex_exit(&vq->vq_freelist_lock);
                return NULL;
        }
        qe = SIMPLEQ_FIRST(&vq->vq_freelist);
        SIMPLEQ_REMOVE_HEAD(&vq->vq_freelist, qe_list);
        mutex_exit(&vq->vq_freelist_lock);

        return qe;
}

static void
vq_free_entry(struct virtqueue *vq, struct vq_entry *qe)
{
        mutex_enter(&vq->vq_freelist_lock);
        SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, qe, qe_list);
        mutex_exit(&vq->vq_freelist_lock);

        return;
}

/*
 * Enqueue several dmamaps as a single request.
 */
/*
 * Typical usage:
 *  <queue size> number of followings are stored in arrays
 *  - command blocks (in dmamem) should be pre-allocated and mapped
 *  - dmamaps for command blocks should be pre-allocated and loaded
 *  - dmamaps for payload should be pre-allocated
 *      r = virtio_enqueue_prep(sc, vq, &slot);                // allocate a slot
 *        if (r)                // currently 0 or EAGAIN
 *          return r;
 *        r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
 *        if (r) {
 *          virtio_enqueue_abort(sc, vq, slot);
 *          return r;
 *        }
 *        r = virtio_enqueue_reserve(sc, vq, slot,
 *                                   dmamap_payload[slot]->dm_nsegs+1);
 *                                                        // ^ +1 for command
 *        if (r) {        // currently 0 or EAGAIN
 *          bus_dmamap_unload(dmat, dmamap_payload[slot]);
 *          return r;                                        // do not call abort()
 *        }
 *        <setup and prepare commands>
 *        bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
 *        bus_dmamap_sync(dmat, dmamap_payload[slot],...);
 *        virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], false);
 *        virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
 *        virtio_enqueue_commit(sc, vq, slot, true);
 */

/*
 * enqueue_prep: allocate a slot number
 */
int
virtio_enqueue_prep(struct virtio_softc *sc, struct virtqueue *vq, int *slotp)
{
        struct vq_entry *qe1;

        KASSERT(slotp != NULL);

        qe1 = vq_alloc_entry(vq);
        if (qe1 == NULL)
                return EAGAIN;
        /* next slot is not allocated yet */
        qe1->qe_next = -1;
        *slotp = qe1->qe_index;

        return 0;
}

/*
 * enqueue_reserve: allocate remaining slots and build the descriptor chain.
 */
int
virtio_enqueue_reserve(struct virtio_softc *sc, struct virtqueue *vq,
                       int slot, int nsegs)
{
        int indirect;
        struct vq_entry *qe1 = &vq->vq_entries[slot];

        KASSERT(qe1->qe_next == -1);
        KASSERT(1 <= nsegs && nsegs <= vq->vq_num);

        if ((vq->vq_indirect != NULL) &&
            (nsegs >= MINSEG_INDIRECT) &&
            (nsegs <= vq->vq_maxnsegs))
                indirect = 1;
        else
                indirect = 0;
        qe1->qe_indirect = indirect;

        if (indirect) {
                struct vring_desc *vd;
                uint64_t addr;
                int i;

                vd = &vq->vq_desc[qe1->qe_index];
                addr = vq->vq_dmamap->dm_segs[0].ds_addr
                        + vq->vq_indirectoffset;
                addr += sizeof(struct vring_desc)
                        * vq->vq_maxnsegs * qe1->qe_index;
                vd->addr  = virtio_rw64(sc, addr);
                vd->len   = virtio_rw32(sc, sizeof(struct vring_desc) * nsegs);
                vd->flags = virtio_rw16(sc, VRING_DESC_F_INDIRECT);

                vd = vq->vq_indirect;
                vd += vq->vq_maxnsegs * qe1->qe_index;
                qe1->qe_desc_base = vd;

                for (i = 0; i < nsegs-1; i++) {
                        vd[i].flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
                }
                vd[i].flags  = virtio_rw16(sc, 0);
                qe1->qe_next = 0;

                return 0;
        } else {
                struct vring_desc *vd;
                struct vq_entry *qe;
                int i, s;

                vd = &vq->vq_desc[0];
                qe1->qe_desc_base = vd;
                qe1->qe_next = qe1->qe_index;
                s = slot;
                for (i = 0; i < nsegs - 1; i++) {
                        qe = vq_alloc_entry(vq);
                        if (qe == NULL) {
                                vd[s].flags = virtio_rw16(sc, 0);
                                virtio_enqueue_abort(sc, vq, slot);
                                return EAGAIN;
                        }
                        vd[s].flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
                        vd[s].next  = virtio_rw16(sc, qe->qe_index);
                        s = qe->qe_index;
                }
                vd[s].flags = virtio_rw16(sc, 0);

                return 0;
        }
}

/*
 * enqueue: enqueue a single dmamap.
 */
int
virtio_enqueue(struct virtio_softc *sc, struct virtqueue *vq, int slot,
               bus_dmamap_t dmamap, bool write)
{
        struct vq_entry *qe1 = &vq->vq_entries[slot];
        struct vring_desc *vd = qe1->qe_desc_base;
        int i;
        int s = qe1->qe_next;

        KASSERT(s >= 0);
        KASSERT(dmamap->dm_nsegs > 0);

        for (i = 0; i < dmamap->dm_nsegs; i++) {
                vd[s].addr = virtio_rw64(sc, dmamap->dm_segs[i].ds_addr);
                vd[s].len  = virtio_rw32(sc, dmamap->dm_segs[i].ds_len);
                if (!write)
                        vd[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
                s = virtio_rw16(sc, vd[s].next);
        }
        qe1->qe_next = s;

        return 0;
}

int
virtio_enqueue_p(struct virtio_softc *sc, struct virtqueue *vq, int slot,
                 bus_dmamap_t dmamap, bus_addr_t start, bus_size_t len,
                 bool write)
{
        struct vq_entry *qe1 = &vq->vq_entries[slot];
        struct vring_desc *vd = qe1->qe_desc_base;
        int s = qe1->qe_next;

        KASSERT(s >= 0);
        KASSERT(dmamap->dm_nsegs == 1); /* XXX */
        KASSERT((dmamap->dm_segs[0].ds_len > start) &&
                (dmamap->dm_segs[0].ds_len >= start + len));

        vd[s].addr = virtio_rw64(sc, dmamap->dm_segs[0].ds_addr + start);
        vd[s].len  = virtio_rw32(sc, len);
        if (!write)
                vd[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
        qe1->qe_next = virtio_rw16(sc, vd[s].next);

        return 0;
}

/*
 * enqueue_commit: add it to the aring.
 */
int
virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot,
                      bool notifynow)
{
        struct vq_entry *qe1;

        if (slot < 0) {
                mutex_enter(&vq->vq_aring_lock);
                goto notify;
        }
        vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
        qe1 = &vq->vq_entries[slot];
        if (qe1->qe_indirect)
                vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
        mutex_enter(&vq->vq_aring_lock);
        vq->vq_avail->ring[(vq->vq_avail_idx++) % vq->vq_num] =
            virtio_rw16(sc, slot);

notify:
        if (notifynow) {
                uint16_t o, n, t;
                uint16_t flags;

                o = virtio_rw16(sc, vq->vq_avail->idx);
                n = vq->vq_avail_idx;

                /*
                 * Prepare for `device->CPU' (host->guest) transfer
                 * into the buffer.  This must happen before we commit
                 * the vq->vq_avail->idx update to ensure we're not
                 * still using the buffer in case program-prior loads
                 * or stores in it get delayed past the store to
                 * vq->vq_avail->idx.
                 */
                vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);

                /* ensure payload is published, then avail idx */
                vq_sync_aring_payload(sc, vq, BUS_DMASYNC_PREWRITE);
                vq->vq_avail->idx = virtio_rw16(sc, vq->vq_avail_idx);
                vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
                vq->vq_queued++;

                if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
                        vq_sync_uring_avail(sc, vq, BUS_DMASYNC_POSTREAD);
                        t = virtio_rw16(sc, *vq->vq_avail_event) + 1;
                        if ((uint16_t) (n - t) < (uint16_t) (n - o))
                                sc->sc_ops->kick(sc, vq->vq_index);
                } else {
                        vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
                        flags = virtio_rw16(sc, vq->vq_used->flags);
                        if (!(flags & VRING_USED_F_NO_NOTIFY))
                                sc->sc_ops->kick(sc, vq->vq_index);
                }
        }
        mutex_exit(&vq->vq_aring_lock);

        return 0;
}

/*
 * enqueue_abort: rollback.
 */
int
virtio_enqueue_abort(struct virtio_softc *sc, struct virtqueue *vq, int slot)
{
        struct vq_entry *qe = &vq->vq_entries[slot];
        struct vring_desc *vd;
        int s;

        if (qe->qe_next < 0) {
                vq_free_entry(vq, qe);
                return 0;
        }

        s = slot;
        vd = &vq->vq_desc[0];
        while (virtio_rw16(sc, vd[s].flags) & VRING_DESC_F_NEXT) {
                s = virtio_rw16(sc, vd[s].next);
                vq_free_entry(vq, qe);
                qe = &vq->vq_entries[s];
        }
        vq_free_entry(vq, qe);
        return 0;
}

/*
 * Dequeue a request.
 */
/*
 * dequeue: dequeue a request from uring; dmamap_sync for uring is
 *            already done in the interrupt handler.
 */
int
virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
               int *slotp, int *lenp)
{
        uint16_t slot, usedidx;
        struct vq_entry *qe;

        if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
                return ENOENT;
        mutex_enter(&vq->vq_uring_lock);
        usedidx = vq->vq_used_idx++;
        mutex_exit(&vq->vq_uring_lock);
        usedidx %= vq->vq_num;
        slot = virtio_rw32(sc, vq->vq_used->ring[usedidx].id);
        qe = &vq->vq_entries[slot];

        if (qe->qe_indirect)
                vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);

        if (slotp)
                *slotp = slot;
        if (lenp)
                *lenp = virtio_rw32(sc, vq->vq_used->ring[usedidx].len);

        return 0;
}

/*
 * dequeue_commit: complete dequeue; the slot is recycled for future use.
 *                 if you forget to call this the slot will be leaked.
 */
int
virtio_dequeue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot)
{
        struct vq_entry *qe = &vq->vq_entries[slot];
        struct vring_desc *vd = &vq->vq_desc[0];
        int s = slot;

        while (virtio_rw16(sc, vd[s].flags) & VRING_DESC_F_NEXT) {
                s = virtio_rw16(sc, vd[s].next);
                vq_free_entry(vq, qe);
                qe = &vq->vq_entries[s];
        }
        vq_free_entry(vq, qe);

        return 0;
}

/*
 * Attach a child, fill all the members.
 */
void
virtio_child_attach_start(struct virtio_softc *sc, device_t child, int ipl,
                    struct virtqueue *vqs,
                    virtio_callback config_change,
                    virtio_callback intr_hand,
                    int req_flags, int req_features, const char *feat_bits)
{
        char buf[1024];

        sc->sc_child = child;
        sc->sc_ipl = ipl;
        sc->sc_vqs = vqs;
        sc->sc_config_change = config_change;
        sc->sc_intrhand = intr_hand;
        sc->sc_flags = req_flags;

        virtio_negotiate_features(sc, req_features);
        snprintb(buf, sizeof(buf), feat_bits, sc->sc_active_features);
        aprint_normal(": features: %s\n", buf);
        aprint_naive("\n");
}

void
virtio_child_attach_set_vqs(struct virtio_softc *sc,
    struct virtqueue *vqs, int nvq_pairs)
{

        KASSERT(nvq_pairs == 1 ||
            (sc->sc_flags & VIRTIO_F_INTR_SOFTINT) == 0);
        if (nvq_pairs > 1)
                sc->sc_child_mq = true;

        sc->sc_vqs = vqs;
}

int
virtio_child_attach_finish(struct virtio_softc *sc)
{
        int r;

        sc->sc_finished_called = true;
        r = sc->sc_ops->alloc_interrupts(sc);
        if (r != 0) {
                aprint_error_dev(sc->sc_dev, "failed to allocate interrupts\n");
                goto fail;
        }

        r = sc->sc_ops->setup_interrupts(sc, 0);
        if (r != 0) {
                aprint_error_dev(sc->sc_dev, "failed to setup interrupts\n");
                goto fail;
        }

        KASSERT(sc->sc_soft_ih == NULL);
        if (sc->sc_flags & VIRTIO_F_INTR_SOFTINT) {
                u_int flags = SOFTINT_NET;
                if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE)
                        flags |= SOFTINT_MPSAFE;

                sc->sc_soft_ih = softint_establish(flags, virtio_soft_intr, sc);
                if (sc->sc_soft_ih == NULL) {
                        sc->sc_ops->free_interrupts(sc);
                        aprint_error_dev(sc->sc_dev,
                            "failed to establish soft interrupt\n");
                        goto fail;
                }
        }

        virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
        return 0;

fail:
        if (sc->sc_soft_ih) {
                softint_disestablish(sc->sc_soft_ih);
                sc->sc_soft_ih = NULL;
        }

        sc->sc_ops->free_interrupts(sc);

        virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
        return 1;
}

void
virtio_child_detach(struct virtio_softc *sc)
{
        sc->sc_child = NULL;
        sc->sc_vqs = NULL;

        virtio_device_reset(sc);

        sc->sc_ops->free_interrupts(sc);

        if (sc->sc_soft_ih) {
                softint_disestablish(sc->sc_soft_ih);
                sc->sc_soft_ih = NULL;
        }
}

void
virtio_child_attach_failed(struct virtio_softc *sc)
{
        virtio_child_detach(sc);

        virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);

        sc->sc_child = VIRTIO_CHILD_FAILED;
}

bus_dma_tag_t
virtio_dmat(struct virtio_softc *sc)
{
        return sc->sc_dmat;
}

device_t
virtio_child(struct virtio_softc *sc)
{
        return sc->sc_child;
}

int
virtio_intrhand(struct virtio_softc *sc)
{
        return (*sc->sc_intrhand)(sc);
}

uint64_t
virtio_features(struct virtio_softc *sc)
{
        return sc->sc_active_features;
}

int
virtio_attach_failed(struct virtio_softc *sc)
{
        device_t self = sc->sc_dev;

        /* no error if its not connected, but its failed */
        if (sc->sc_childdevid == 0)
                return 1;

        if (sc->sc_child == NULL) {
                aprint_error_dev(self,
                        "no matching child driver; not configured\n");
                return 1;
        }

        if (sc->sc_child == VIRTIO_CHILD_FAILED) {
                aprint_error_dev(self, "virtio configuration failed\n");
                return 1;
        }

        /* sanity check */
        if (!sc->sc_finished_called) {
                aprint_error_dev(self, "virtio internal error, child driver "
                        "signaled OK but didn't initialize interrupts\n");
                return 1;
        }

        return 0;
}

void
virtio_print_device_type(device_t self, int id, int revision)
{
        aprint_normal_dev(self, "%s device (id %d, rev. 0x%02x)\n",
            (id < NDEVNAMES ? virtio_device_name[id] : "Unknown"),
            id,
            revision);
}


MODULE(MODULE_CLASS_DRIVER, virtio, NULL);

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
virtio_modcmd(modcmd_t cmd, void *opaque)
{
        int error = 0;

#ifdef _MODULE
        switch (cmd) {
        case MODULE_CMD_INIT:
                error = config_init_component(cfdriver_ioconf_virtio,
                    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
                break;
        case MODULE_CMD_FINI:
                error = config_fini_component(cfdriver_ioconf_virtio,
                    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
                break;
        default:
                error = ENOTTY;
                break;
        }
#endif

        return error;
}












































































































































  905 


















































   10 

































































































































































































































































































































































































































































































































































































   10 
   10 


   10 



   10 
   10 

    3 

   10 
   10 
   10 





   10 
    3 

   10 
























































































  895 
  665 


  896 








  895 



























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
/*        $NetBSD: lapic.c,v 1.88 2022/08/20 23:48:51 riastradh Exp $        */

/*-
 * Copyright (c) 2000, 2008, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by RedBack Networks Inc.
 *
 * Author: Bill Sommerfeld
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lapic.c,v 1.88 2022/08/20 23:48:51 riastradh Exp $");

#include "acpica.h"
#include "ioapic.h"
#include "opt_acpi.h"
#include "opt_ddb.h"
#include "opt_mpbios.h"                /* for MPDEBUG */
#include "opt_multiprocessor.h"
#include "opt_ntp.h"
#include "opt_xen.h"


#include <sys/param.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/timetc.h>

#include <uvm/uvm_extern.h>

#include <dev/ic/i8253reg.h>

#include <x86/machdep.h>
#include <machine/cpu.h>
#include <machine/cpu_counter.h>
#include <machine/cpufunc.h>
#include <machine/cpuvar.h>
#include <machine/pmap.h>
#include <machine/vmparam.h>
#include <machine/mpacpi.h>
#include <machine/mpbiosvar.h>
#include <machine/pcb.h>
#include <machine/pmap_private.h>
#include <machine/specialreg.h>
#include <machine/segments.h>
#include <x86/x86/tsc.h>
#include <x86/i82093var.h>

#include <machine/apicvar.h>
#include <machine/i82489reg.h>
#include <machine/i82489var.h>

#ifndef XENPV
#if NACPICA > 0
#include <dev/acpi/acpica.h>
#include <dev/acpi/acpivar.h>
#endif

#ifdef DDB
#include <machine/db_machdep.h>
#ifdef MULTIPROCESSOR
#ifdef __x86_64__
typedef void (vector)(void);
extern vector Xintr_x2apic_ddbipi;
extern int ddb_vec;
#endif
#endif
#endif

#include <dev/vmt/vmtreg.h>        /* for vmt_hvcall() */
#include <dev/vmt/vmtvar.h>        /* for vmt_hvcall() */

/* Referenced from vector.S */
void                lapic_clockintr(void *, struct intrframe *);

static void        lapic_delay(unsigned int);
static uint32_t lapic_gettick(void);
static void        lapic_setup_bsp(paddr_t);
static void        lapic_map(paddr_t);

static void lapic_hwmask(struct pic *, int);
static void lapic_hwunmask(struct pic *, int);
static void lapic_setup(struct pic *, struct cpu_info *, int, int, int);
/* Make it public to call via ddb */
void        lapic_dump(void);

struct pic local_pic = {
        .pic_name = "lapic",
        .pic_type = PIC_LAPIC,
        .pic_lock = __SIMPLELOCK_UNLOCKED,
        .pic_hwmask = lapic_hwmask,
        .pic_hwunmask = lapic_hwunmask,
        .pic_addroute = lapic_setup,
        .pic_delroute = lapic_setup,
        .pic_intr_get_devname = x86_intr_get_devname,
        .pic_intr_get_assigned = x86_intr_get_assigned,
        .pic_intr_get_count = x86_intr_get_count,
};

static int i82489_ipi(int vec, int target, int dl);
static int x2apic_ipi(int vec, int target, int dl);
int (*x86_ipi)(int, int, int) = i82489_ipi;

bool x2apic_mode __read_mostly;
#ifdef LAPIC_ENABLE_X2APIC
bool x2apic_enable = true;
#else
bool x2apic_enable = false;
#endif

static bool lapic_broken_periodic __read_mostly;

static uint32_t
i82489_readreg(u_int reg)
{
        return *((volatile uint32_t *)(local_apic_va + reg));
}

static void
i82489_writereg(u_int reg, uint32_t val)
{
        *((volatile uint32_t *)(local_apic_va + reg)) = val;
}

static uint32_t
i82489_cpu_number(void)
{
        return i82489_readreg(LAPIC_ID) >> LAPIC_ID_SHIFT;
}

static uint32_t
x2apic_readreg(u_int reg)
{
        return rdmsr(MSR_X2APIC_BASE + (reg >> 4));
}

static void
x2apic_writereg(u_int reg, uint32_t val)
{
        x86_mfence();
        wrmsr(MSR_X2APIC_BASE + (reg >> 4), val);
}

static void
x2apic_writereg64(u_int reg, uint64_t val)
{
        KDASSERT(reg == LAPIC_ICRLO);
        x86_mfence();
        wrmsr(MSR_X2APIC_BASE + (reg >> 4), val);
}

static void
x2apic_write_icr(uint32_t hi, uint32_t lo)
{
        x2apic_writereg64(LAPIC_ICRLO, ((uint64_t)hi << 32) | lo);
}

static uint32_t
x2apic_cpu_number(void)
{
        return x2apic_readreg(LAPIC_ID);
}

uint32_t
lapic_readreg(u_int reg)
{
        if (x2apic_mode)
                return x2apic_readreg(reg);
        return i82489_readreg(reg);
}

void
lapic_writereg(u_int reg, uint32_t val)
{
        if (x2apic_mode)
                x2apic_writereg(reg, val);
        else
                i82489_writereg(reg, val);
}

void
lapic_write_tpri(uint32_t val)
{

        val &= LAPIC_TPRI_MASK;
#ifdef i386
        lapic_writereg(LAPIC_TPRI, val);
#else
        lcr8(val >> 4);
#endif
}

uint32_t
lapic_cpu_number(void)
{
        if (x2apic_mode)
                return x2apic_cpu_number();
        return i82489_cpu_number();
}

static void
lapic_enable_x2apic(void)
{
        uint64_t apicbase;

        apicbase = rdmsr(MSR_APICBASE);
        if (!ISSET(apicbase, APICBASE_EN)) {
                apicbase |= APICBASE_EN;
                wrmsr(MSR_APICBASE, apicbase);
        }
        apicbase |= APICBASE_EXTD;
        wrmsr(MSR_APICBASE, apicbase);
}

bool
lapic_is_x2apic(void)
{
        uint64_t msr;

        if (!ISSET(cpu_feature[0], CPUID_APIC) ||
            rdmsr_safe(MSR_APICBASE, &msr) == EFAULT)
                return false;
        return (msr & (APICBASE_EN | APICBASE_EXTD)) ==
            (APICBASE_EN | APICBASE_EXTD);
}

/*
 * Initialize the local APIC on the BSP.
 */
static void
lapic_setup_bsp(paddr_t lapic_base)
{
        u_int regs[6];
        const char *reason = NULL;
        const char *hw_vendor;
        bool bios_x2apic;

        if (ISSET(cpu_feature[1], CPUID2_X2APIC)) {
#if NACPICA > 0
                if (acpi_present) {
                        ACPI_TABLE_DMAR *dmar;
                        ACPI_STATUS status;

                        /*
                         * Automatically detect several configurations where
                         * x2APIC mode is known to cause troubles.  User can
                         * override the setting with hw.x2apic_enable tunable.
                         */
                        status = AcpiGetTable(ACPI_SIG_DMAR, 1,
                            (ACPI_TABLE_HEADER **)&dmar);
                        if (ACPI_SUCCESS(status)) {
                                if (ISSET(dmar->Flags, ACPI_DMAR_X2APIC_OPT_OUT)) {
                                        reason = "by DMAR table";
                                }
                                AcpiPutTable(&dmar->Header);
                        }
                }
#endif        /* NACPICA > 0 */
                if (vm_guest == VM_GUEST_VMWARE) {
                        vmt_hvcall(VM_CMD_GET_VCPU_INFO, regs);
                        if (ISSET(regs[0], VCPUINFO_VCPU_RESERVED) ||
                            !ISSET(regs[0], VCPUINFO_LEGACY_X2APIC))
                                reason = "inside VMWare without intr "
                                    "redirection";
                } else if (vm_guest == VM_GUEST_XENHVM) {
                        reason = "due to running under XEN";
                } else if (vm_guest == VM_GUEST_NO &&
                    CPUID_TO_FAMILY(curcpu()->ci_signature) == 6 &&
                    CPUID_TO_MODEL(curcpu()->ci_signature) == 0x2a) {
                        hw_vendor = pmf_get_platform("board-vendor");
                        if (hw_vendor != NULL) {
                                /*
                                 * It seems that some Lenovo and ASUS
                                 * SandyBridge-based notebook BIOSes have a bug
                                 * which prevents booting AP in x2APIC mode.
                                 * Since the only way to detect mobile CPU is
                                 * to check northbridge pci id, which cannot be
                                 * done that early, disable x2APIC for all
                                 * Lenovo and ASUS SandyBridge machines.
                                 */
                                if (strcmp(hw_vendor, "LENOVO") == 0 ||
                                    strcmp(hw_vendor, "ASUSTeK Computer Inc.") == 0) {
                                        reason = "for a suspected SandyBridge "
                                            "BIOS bug";
                                }
                        }
                }
                bios_x2apic = lapic_is_x2apic();
                if (reason != NULL && bios_x2apic) {
                        aprint_verbose("x2APIC should be disabled %s but "
                            "already enabled by BIOS; enabling.\n", reason);
                        reason = NULL;
                }
                if (reason == NULL)
                        x2apic_mode = true;
                else
                        aprint_verbose("x2APIC available but disabled %s\n",
                            reason);
                if (x2apic_enable != x2apic_mode) {
                        if (bios_x2apic && !x2apic_enable)
                                aprint_verbose("x2APIC disabled by user and "
                                    "enabled by BIOS; ignoring user setting.\n");
                        else
                                x2apic_mode = x2apic_enable;
                }
        }
        if (x2apic_mode) {
                x86_ipi = x2apic_ipi;
#if NIOAPIC > 0
                struct ioapic_softc *ioapic;
                for (ioapic = ioapics; ioapic != NULL; ioapic = ioapic->sc_next) {
                        ioapic->sc_pic.pic_edge_stubs = x2apic_edge_stubs;
                        ioapic->sc_pic.pic_level_stubs = x2apic_level_stubs;
                }
#endif
#if defined(DDB) && defined(MULTIPROCESSOR)
#ifdef __x86_64__
                struct idt_vec *iv = &(cpu_info_primary.ci_idtvec);
                idt_descriptor_t *idt = iv->iv_idt;
                set_idtgate(&idt[ddb_vec], &Xintr_x2apic_ddbipi, 1,
                    SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
#else
                /*
                 * Set DDB IPI handler in cpu_set_tss_gates() when cpu0 is
                 * attached.
                 */
#endif
#endif

                x86_disable_intr();
                lapic_enable_x2apic();
#ifdef MULTIPROCESSOR
                cpu_init_first();        /* Catch up to changed cpu_number() */
#endif
                lapic_write_tpri(0);
                x86_enable_intr();
        } else
                lapic_map(lapic_base);
}

static void
lapic_map(paddr_t lapic_base)
{
        pt_entry_t *pte;
        vaddr_t va = local_apic_va;

        /*
         * If the CPU has an APIC MSR, use it and ignore the supplied value:
         * some ACPI implementations have been observed to pass bad values.
         * Additionally, ensure that the lapic is enabled as we are committed
         * to using it at this point.  Be conservative and assume that the MSR
         * is not present on the Pentium (is it?).
         */
        if (CPUID_TO_FAMILY(curcpu()->ci_signature) >= 6) {
                lapic_base = (paddr_t)rdmsr(MSR_APICBASE);
                if ((lapic_base & APICBASE_PHYSADDR) == 0) {
                        lapic_base |= LAPIC_BASE;
                }
                wrmsr(MSR_APICBASE, lapic_base | APICBASE_EN);
                lapic_base &= APICBASE_PHYSADDR;
        }

        x86_disable_intr();

        /*
         * Map local apic.  If we have a local apic, it's safe to assume
         * we're on a 486 or better and can use invlpg and non-cacheable PTE's
         *
         * Whap the PTE "by hand" rather than calling pmap_kenter_pa because
         * the latter will attempt to invoke TLB shootdown code just as we
         * might have changed the value of cpu_number()..
         */

        pte = kvtopte(va);
        *pte = lapic_base | PTE_W | PTE_P | PTE_PCD | pmap_pg_g | pmap_pg_nx;
        invlpg(va);

#ifdef MULTIPROCESSOR
        cpu_init_first();        /* Catch up to changed cpu_number() */
#endif

        lapic_write_tpri(0);
        x86_enable_intr();
}

/*
 * enable local apic
 */
void
lapic_enable(void)
{
        lapic_writereg(LAPIC_SVR, LAPIC_SVR_ENABLE | LAPIC_SPURIOUS_VECTOR);
}

void
lapic_set_lvt(void)
{
        struct cpu_info *ci = curcpu();
        int i;
        struct mp_intr_map *mpi;
        uint32_t lint0, lint1;

#ifdef MULTIPROCESSOR
        if (mp_verbose) {
                apic_format_redir(device_xname(ci->ci_dev), "prelint", 0,
                    APIC_VECTYPE_LAPIC_LVT, 0, lapic_readreg(LAPIC_LVT_LINT0));
                apic_format_redir(device_xname(ci->ci_dev), "prelint", 1,
                    APIC_VECTYPE_LAPIC_LVT, 0, lapic_readreg(LAPIC_LVT_LINT1));
        }
#endif

        /*
         * If an I/O APIC has been attached, assume that it is used instead of
         * the 8259A for interrupt delivery.  Otherwise request the LAPIC to
         * get external interrupts via LINT0 for the primary CPU.
         */
        lint0 = LAPIC_DLMODE_EXTINT;
        if (nioapics > 0 || !CPU_IS_PRIMARY(curcpu()))
                lint0 |= LAPIC_LVT_MASKED;
        lapic_writereg(LAPIC_LVT_LINT0, lint0);

        /*
         * Non Maskable Interrupts are to be delivered to the primary CPU.
         */
        lint1 = LAPIC_DLMODE_NMI;
        if (!CPU_IS_PRIMARY(curcpu()))
                lint1 |= LAPIC_LVT_MASKED;
        lapic_writereg(LAPIC_LVT_LINT1, lint1);

        for (i = 0; i < mp_nintr; i++) {
                mpi = &mp_intrs[i];
                if (mpi->ioapic == NULL && (mpi->cpu_id == MPS_ALL_APICS ||
                    mpi->cpu_id == ci->ci_cpuid)) {
                        if (mpi->ioapic_pin > 1)
                                aprint_error_dev(ci->ci_dev,
                                    "%s: WARNING: bad pin value %d\n",
                                    __func__, mpi->ioapic_pin);
                        if (mpi->ioapic_pin == 0)
                                lapic_writereg(LAPIC_LVT_LINT0, mpi->redir);
                        else
                                lapic_writereg(LAPIC_LVT_LINT1, mpi->redir);
                }
        }

#ifdef MULTIPROCESSOR
        if (mp_verbose)
                lapic_dump();
#endif
}

/*
 * Initialize fixed idt vectors for use by local apic.
 */
void
lapic_boot_init(paddr_t lapic_base)
{
        struct idt_vec *iv = &(cpu_info_primary.ci_idtvec);

        lapic_setup_bsp(lapic_base);

#ifdef MULTIPROCESSOR
        idt_vec_reserve(iv, LAPIC_IPI_VECTOR);
        idt_vec_set(iv, LAPIC_IPI_VECTOR,
            x2apic_mode ? Xintr_x2apic_ipi : Xintr_lapic_ipi);

        idt_vec_reserve(iv, LAPIC_TLB_VECTOR);
        idt_vec_set(iv, LAPIC_TLB_VECTOR,
            x2apic_mode ? Xintr_x2apic_tlb : Xintr_lapic_tlb);
#endif
        idt_vec_reserve(iv, LAPIC_SPURIOUS_VECTOR);
        idt_vec_set(iv, LAPIC_SPURIOUS_VECTOR, Xintrspurious);

        idt_vec_reserve(iv, LAPIC_TIMER_VECTOR);
        idt_vec_set(iv, LAPIC_TIMER_VECTOR,
            x2apic_mode ? Xintr_x2apic_ltimer : Xintr_lapic_ltimer);
}

static uint32_t
lapic_gettick(void)
{
        return lapic_readreg(LAPIC_CCR_TIMER);
}

#include <sys/kernel.h>                /* for hz */

uint32_t lapic_tval;

/*
 * this gets us up to a 4GHz busclock....
 */
uint32_t lapic_per_second;
uint32_t lapic_frac_usec_per_cycle;
uint64_t lapic_frac_cycle_per_usec;
uint32_t lapic_delaytab[26];

static u_int
lapic_get_timecount(struct timecounter *tc)
{
        struct cpu_info *ci;
        uint32_t cur_timer;
        int s;

        s = splhigh();
        ci = curcpu();

        /*
         * Check for a race against the clockinterrupt.
         * The update of ci_lapic_counter is blocked by splhigh() and
         * the check for a pending clockinterrupt compensates for that.
         *
         * If the current tick is almost the Initial Counter, explicitly
         * check for the pending interrupt bit as the interrupt delivery
         * could be asynchronious and compensate as well.
         *
         * This can't be done without splhigh() as the calling code might
         * have masked the clockinterrupt already.
         *
         * This code assumes that clockinterrupts are not missed.
         */
        cur_timer = lapic_gettick();
        if (cur_timer >= lapic_tval - 1) {
                uint16_t reg = LAPIC_IRR + LAPIC_TIMER_VECTOR / 32 * 16;

                if (lapic_readreg(reg) & (1 << (LAPIC_TIMER_VECTOR % 32))) {
                        cur_timer -= lapic_tval;
                }
        } else if (ci->ci_ipending & (1 << LIR_TIMER))
                cur_timer = lapic_gettick() - lapic_tval;
        cur_timer = ci->ci_lapic_counter - cur_timer;
        splx(s);

        return cur_timer;
}

static struct timecounter lapic_timecounter = {
        .tc_get_timecount = lapic_get_timecount,
        .tc_counter_mask = ~0u,
        .tc_name = "lapic",
        .tc_quality =
#ifndef MULTIPROCESSOR
            2100,
#else
            -100, /* per CPU state */
#endif
};

extern u_int i8254_get_timecount(struct timecounter *);

void
lapic_clockintr(void *arg, struct intrframe *frame)
{
        struct cpu_info *ci = curcpu();

        ci->ci_lapic_counter += lapic_tval;
        ci->ci_isources[LIR_TIMER]->is_evcnt.ev_count++;
        hardclock((struct clockframe *)frame);
}

void
lapic_reset(void)
{

        /*
         * Mask the clock interrupt and set mode,
         * then set divisor,
         * then unmask and set the vector.
         */
        lapic_writereg(LAPIC_LVT_TIMER,
            LAPIC_LVT_TMM_PERIODIC | LAPIC_LVT_MASKED);
        lapic_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
        lapic_writereg(LAPIC_ICR_TIMER, lapic_tval);
        lapic_writereg(LAPIC_LVT_TIMER,
            LAPIC_LVT_TMM_PERIODIC | LAPIC_TIMER_VECTOR);
        lapic_writereg(LAPIC_EOI, 0);
}

static void
lapic_initclock(void)
{

        if (curcpu() == &cpu_info_primary) {
                /*
                 * Recalibrate the timer using the cycle counter, now that
                 * the cycle counter itself has been recalibrated.
                 */
                lapic_calibrate_timer(true);

                /*
                 * Hook up time counter.  This assume that all LAPICs have
                 * the same frequency.
                 */
                lapic_timecounter.tc_frequency = lapic_per_second;
                tc_init(&lapic_timecounter);
        }

        /* Start local apic countdown timer running, in repeated mode. */
        lapic_reset();
}

/*
 * Calibrate the local apic count-down timer (which is running at
 * bus-clock speed) vs. the i8254 counter/timer (which is running at
 * a fixed rate).
 *
 * The Intel MP spec says: "An MP operating system may use the IRQ8
 * real-time clock as a reference to determine the actual APIC timer clock
 * speed."
 *
 * We're actually using the IRQ0 timer.  Hmm.
 */
void
lapic_calibrate_timer(bool secondpass)
{
        struct cpu_info *ci = curcpu();
        uint64_t tmp;
        int i;
        char tbuf[9];

        KASSERT(ci == &cpu_info_primary);

        aprint_debug_dev(ci->ci_dev, "[re]calibrating local timer\n");

        /*
         * Configure timer to one-shot, interrupt masked,
         * large positive number.
         */
        x86_disable_intr();
        lapic_writereg(LAPIC_LVT_TIMER, LAPIC_LVT_MASKED);
        lapic_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
        lapic_writereg(LAPIC_ICR_TIMER, 0x80000000);
        (void)lapic_gettick();

        if (secondpass && cpu_hascounter()) {
                /*
                 * Second pass calibration, using the TSC which has ideally
                 * been calibrated using the HPET or information gleaned
                 * from MSRs by this point.
                 */
                uint64_t l0, l1, t0, t1;

                (void)cpu_counter();
                t0 = cpu_counter();
                l0 = lapic_gettick();
                t0 += cpu_counter();
                DELAY(50000);
                t1 = cpu_counter();
                l1 = lapic_gettick();
                t1 += cpu_counter();

                tmp = (l0 - l1) * cpu_frequency(ci) / ((t1 - t0 + 1) / 2);
                lapic_per_second = rounddown(tmp + 500, 1000);
        } else if (lapic_per_second == 0) {
                /*
                 * Inaccurate first pass calibration using the i8254.
                 */
                unsigned int seen, delta, initial_i8254, initial_lapic;
                unsigned int cur_i8254, cur_lapic;

                (void)gettick();
                initial_lapic = lapic_gettick();
                initial_i8254 = gettick();
                for (seen = 0; seen < TIMER_FREQ / 100; seen += delta) {
                        cur_i8254 = gettick();
                        if (cur_i8254 > initial_i8254)
                                delta = x86_rtclock_tval - (cur_i8254 - initial_i8254);
                        else
                                delta = initial_i8254 - cur_i8254;
                        initial_i8254 = cur_i8254;
                }
                cur_lapic = lapic_gettick();
                tmp = initial_lapic - cur_lapic;
                lapic_per_second = (tmp * TIMER_FREQ + seen / 2) / seen;
        }
        x86_enable_intr();

        humanize_number(tbuf, sizeof(tbuf), lapic_per_second, "Hz", 1000);
        aprint_debug_dev(ci->ci_dev, "apic clock running at %s\n", tbuf);

        if (lapic_per_second != 0) {
                /*
                 * reprogram the apic timer to run in periodic mode.
                 * XXX need to program timer on other CPUs, too.
                 */
                lapic_tval = (lapic_per_second * 2) / hz;
                lapic_tval = (lapic_tval / 2) + (lapic_tval & 0x1);

                lapic_writereg(LAPIC_LVT_TIMER, LAPIC_LVT_TMM_PERIODIC
                    | LAPIC_LVT_MASKED | LAPIC_TIMER_VECTOR);
                lapic_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
                lapic_writereg(LAPIC_ICR_TIMER, lapic_tval);

                /*
                 * Compute fixed-point ratios between cycles and
                 * microseconds to avoid having to do any division
                 * in lapic_delay.
                 */

                tmp = (1000000 * (uint64_t)1 << 32) / lapic_per_second;
                lapic_frac_usec_per_cycle = tmp;

                tmp = (lapic_per_second * (uint64_t)1 << 32) / 1000000;

                lapic_frac_cycle_per_usec = tmp;

                /*
                 * Compute delay in cycles for likely short delays in usec.
                 */
                for (i = 0; i < 26; i++)
                        lapic_delaytab[i] = (lapic_frac_cycle_per_usec * i) >>
                            32;

                /*
                 * Apply workaround for broken periodic timer under KVM
                 */
                if (vm_guest == VM_GUEST_KVM) {
                        lapic_broken_periodic = true;
                        lapic_timecounter.tc_quality = -100;
                        aprint_debug_dev(ci->ci_dev,
                            "applying KVM timer workaround\n");
                }

                /*
                 * Now that the timer's calibrated, use the apic timer routines
                 * for all our timing needs..
                 */
                if (!secondpass) {
                        delay_func = lapic_delay;
                        x86_initclock_func = lapic_initclock;
                        initrtclock(0);
                }
        }
}

/*
 * delay for N usec.
 */

static void
lapic_delay(unsigned int usec)
{
        int32_t xtick, otick;
        int64_t deltat;

        /* XXX Bad to disable preemption, but it's tied to the cpu. */
        kpreempt_disable();
        otick = lapic_gettick();

        if (usec <= 0) {
                kpreempt_enable();
                return;
        }

        if (usec <= 25)
                deltat = lapic_delaytab[usec];
        else
                deltat = (lapic_frac_cycle_per_usec * usec) >> 32;

        while (deltat > 0) {
                xtick = lapic_gettick();
                if (lapic_broken_periodic && xtick == 0 && otick == 0) {
                        lapic_reset();
                        xtick = lapic_gettick();
                        if (xtick == 0)
                                panic("lapic timer stopped ticking");
                }
                if (xtick > otick)
                        deltat -= lapic_tval - (xtick - otick);
                else
                        deltat -= otick - xtick;
                otick = xtick;

                x86_pause();
        }
        kpreempt_enable();
}

/*
 * XXX the following belong mostly or partly elsewhere..
 */

static void
i82489_icr_wait(void)
{
#ifdef DIAGNOSTIC
        unsigned j = 100000;
#endif /* DIAGNOSTIC */

        while ((i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) != 0) {
                x86_pause();
#ifdef DIAGNOSTIC
                j--;
                if (j == 0)
                        panic("i82489_icr_wait: busy");
#endif /* DIAGNOSTIC */
        }
}

static int
i82489_ipi_init(int target)
{
        uint32_t esr;

        i82489_writereg(LAPIC_ESR, 0);
        (void)i82489_readreg(LAPIC_ESR);

        i82489_writereg(LAPIC_ICRHI, target << LAPIC_ID_SHIFT);

        i82489_writereg(LAPIC_ICRLO, LAPIC_DLMODE_INIT | LAPIC_LEVEL_ASSERT);
        i82489_icr_wait();
        delay_func(10000);
        i82489_writereg(LAPIC_ICRLO,
            LAPIC_DLMODE_INIT | LAPIC_TRIGMODE_LEVEL | LAPIC_LEVEL_DEASSERT);
        i82489_icr_wait();

        if ((i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) != 0)
                return EBUSY;

        esr = i82489_readreg(LAPIC_ESR);
        if (esr != 0)
                aprint_debug("%s: ESR %08x\n", __func__, esr);

        return 0;
}

static int
i82489_ipi_startup(int target, int vec)
{
        uint32_t esr;

        i82489_writereg(LAPIC_ESR, 0);
        (void)i82489_readreg(LAPIC_ESR);

        i82489_icr_wait();
        i82489_writereg(LAPIC_ICRHI, target << LAPIC_ID_SHIFT);
        i82489_writereg(LAPIC_ICRLO, vec | LAPIC_DLMODE_STARTUP |
            LAPIC_LEVEL_ASSERT);
        i82489_icr_wait();

        if ((i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) != 0)
                return EBUSY;

        esr = i82489_readreg(LAPIC_ESR);
        if (esr != 0)
                aprint_debug("%s: ESR %08x\n", __func__, esr);

        return 0;
}

static int
i82489_ipi(int vec, int target, int dl)
{
        int result, s;

        s = splhigh();

        i82489_icr_wait();

        if ((target & LAPIC_DEST_MASK) == 0)
                i82489_writereg(LAPIC_ICRHI, target << LAPIC_ID_SHIFT);

        i82489_writereg(LAPIC_ICRLO,
            (target & LAPIC_DEST_MASK) | vec | dl | LAPIC_LEVEL_ASSERT);

#ifdef DIAGNOSTIC
        i82489_icr_wait();
        result = (i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) ? EBUSY : 0;
#else
        /* Don't wait - if it doesn't go, we're in big trouble anyway. */
        result = 0;
#endif
        splx(s);

        return result;
}

static int
x2apic_ipi_init(int target)
{

        x2apic_write_icr(target, LAPIC_DLMODE_INIT | LAPIC_LEVEL_ASSERT);

        delay_func(10000);

        x2apic_write_icr(0,
            LAPIC_DLMODE_INIT | LAPIC_TRIGMODE_LEVEL | LAPIC_LEVEL_DEASSERT);

        return 0;
}

static int
x2apic_ipi_startup(int target, int vec)
{

        x2apic_write_icr(target,
            vec | LAPIC_DLMODE_STARTUP | LAPIC_LEVEL_ASSERT);

        return 0;
}

static int
x2apic_ipi(int vec, int target, int dl)
{
        uint32_t dest_id = 0;

        if ((target & LAPIC_DEST_MASK) == 0)
                dest_id = target;

        x2apic_write_icr(dest_id,
            (target & LAPIC_DEST_MASK) | vec | dl | LAPIC_LEVEL_ASSERT);

        return 0;
}

int
x86_ipi_init(int target)
{
        if (x2apic_mode)
                return x2apic_ipi_init(target);
        return i82489_ipi_init(target);
}

int
x86_ipi_startup(int target, int vec)
{
        if (x2apic_mode)
                return x2apic_ipi_startup(target, vec);
        return i82489_ipi_startup(target, vec);
}

/*
 * Using 'pin numbers' as:
 * 0 - timer
 * 1 - thermal
 * 2 - PCINT
 * 3 - LVINT0
 * 4 - LVINT1
 * 5 - LVERR
 */

static void
lapic_hwmask(struct pic *pic, int pin)
{
        int reg;
        uint32_t val;

        reg = LAPIC_LVT_TIMER + (pin << 4);
        val = lapic_readreg(reg);
        val |= LAPIC_LVT_MASKED;
        lapic_writereg(reg, val);
}

static void
lapic_hwunmask(struct pic *pic, int pin)
{
        int reg;
        uint32_t val;

        reg = LAPIC_LVT_TIMER + (pin << 4);
        val = lapic_readreg(reg);
        val &= ~LAPIC_LVT_MASKED;
        lapic_writereg(reg, val);
}

static void
lapic_setup(struct pic *pic, struct cpu_info *ci,
    int pin, int idtvec, int type)
{
}

void
lapic_dump(void)
{
        struct cpu_info *ci = curcpu();

#define APIC_LVT_PRINT(ci, where, idx, lvtreg)                                \
        apic_format_redir(device_xname(ci->ci_dev), where, (idx),        \
            APIC_VECTYPE_LAPIC_LVT, 0, lapic_readreg(lvtreg))

        APIC_LVT_PRINT(ci, "cmci", 0, LAPIC_LVT_CMCI);
        APIC_LVT_PRINT(ci, "timer", 0, LAPIC_LVT_TIMER);
        APIC_LVT_PRINT(ci, "thermal", 0, LAPIC_LVT_THERM);
        APIC_LVT_PRINT(ci, "pcint", 0, LAPIC_LVT_PCINT);
        APIC_LVT_PRINT(ci, "lint", 0, LAPIC_LVT_LINT0);
        APIC_LVT_PRINT(ci, "lint", 1, LAPIC_LVT_LINT1);
        APIC_LVT_PRINT(ci, "err", 0, LAPIC_LVT_ERR);

#undef APIC_LVT_PRINT
}
#else /* XENPV */
void
lapic_boot_init(paddr_t lapic_base)
{
}
#endif /* XENPV */































































    1 







    2 





































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
/*        $NetBSD: rf_mcpair.c,v 1.25 2021/07/23 00:54:45 oster Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Jim Zelenka
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/* rf_mcpair.c
 * an mcpair is a structure containing a mutex and a condition variable.
 * it's used to block the current thread until some event occurs.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_mcpair.c,v 1.25 2021/07/23 00:54:45 oster Exp $");

#include <dev/raidframe/raidframevar.h>

#include "rf_archs.h"
#include "rf_threadstuff.h"
#include "rf_mcpair.h"
#include "rf_debugMem.h"
#include "rf_general.h"
#include "rf_shutdown.h"
#include "rf_netbsd.h"
#include "rf_raid.h"

#include <sys/pool.h>
#include <sys/proc.h>

#define RF_MAX_FREE_MCPAIR 128
#define RF_MIN_FREE_MCPAIR  24

static void rf_ShutdownMCPair(void *);

static void
rf_ShutdownMCPair(void *arg)
{
        RF_Raid_t *raidPtr;

        raidPtr = (RF_Raid_t *) arg;
        
        pool_destroy(&raidPtr->pools.mcpair);
}

int
rf_ConfigureMCPair(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                   RF_Config_t *cfgPtr)
{

        rf_pool_init(raidPtr, raidPtr->poolNames.mcpair, &raidPtr->pools.mcpair, sizeof(RF_MCPair_t),
                     "mcpair", RF_MIN_FREE_MCPAIR, RF_MAX_FREE_MCPAIR);
        rf_ShutdownCreate(listp, rf_ShutdownMCPair, raidPtr);

        return (0);
}

RF_MCPair_t *
rf_AllocMCPair(RF_Raid_t *raidPtr)
{
        RF_MCPair_t *t;

        t = pool_get(&raidPtr->pools.mcpair, PR_WAITOK);
        rf_init_mutex2(t->mutex, IPL_VM);
        rf_init_cond2(t->cond, "mcpair");
        t->flag = 0;

        return (t);
}

void
rf_FreeMCPair(RF_Raid_t *raidPtr, RF_MCPair_t *t)
{
        rf_destroy_cond2(t->cond);
        rf_destroy_mutex2(t->mutex);
        pool_put(&raidPtr->pools.mcpair, t);
}

/* the callback function used to wake you up when you use an mcpair to
   wait for something */
void
rf_MCPairWakeupFunc(RF_MCPair_t *mcpair)
{
        RF_LOCK_MCPAIR(mcpair);
        mcpair->flag = 1;
        rf_broadcast_cond2(mcpair->cond);
        RF_UNLOCK_MCPAIR(mcpair);
}




























































































































































































































































































































   35 



























































   39 


   38 







   38 

   14 


















   14 



   12 




   10 



   10 



   10 



   10 



   10 





   10 


   10 








   10 




   10 





   10 







   10 


   10 




    6 










    6 















    6 










    6 
















   10 
   10 



   10 



   10 





    9 










    3 


    3 










    4 

















    5 














   16 










































































   50 






    2 

   50 

   50 


   42 
   10 
   10 



   34 




   34 











   32 




   31 
   40 


   10 

   49 





















   50 







   50 



















   50 



    6 

    2 

   47 








   50 


   50 









   10 
   40 

















   39 



















   34 
    1 






    6 

    6 

































    3 



    3 








    3 




   35 



   35 


   35 



   44 


   44 





   44 



































































































































































































































































































































































































































































































































































   16 







   15 


























    3 












    3 








































































































































    6 
    1 


    5 

    5 


    1 
    2 

    4 









    2 


    1 


    1 

    3 










    5 







    5 



    5 

    5 

    2 
    2 













    4 




















































































































































































































































































































































































































   31 


   29 







   29 


   29 






























































































































































































































































































    5 









    8 
    5 

    4 

    8 
    8 
    8 














   24 
   16 




    2 



    8 







    5 
    5 
    5 





    4 


    2 



    6 

    3 
    6 














   44 











   44 









   44 


   44 
    2 

















   34 























   32 









    3 










    3 


   31 







    3 





















































    2 





    2 


























    2 




































    2 






    2 










    2 














    2 








    2 

    2 
    2 















    2 
    2 
    2 





















    2 













   29 



























   44 










   40 



   44 
   26 








   34 
   17 








   34 




    2 


    2 


   38 

   38 



   16 
   38 
    2 


   38 















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
/*        $NetBSD: kern_exec.c,v 1.518 2022/07/01 01:05:31 riastradh Exp $        */

/*-
 * Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
 * Copyright (C) 1992 Wolfgang Solfrank.
 * Copyright (C) 1992 TooLs GmbH.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by TooLs GmbH.
 * 4. The name of TooLs GmbH may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.518 2022/07/01 01:05:31 riastradh Exp $");

#include "opt_exec.h"
#include "opt_execfmt.h"
#include "opt_ktrace.h"
#include "opt_modular.h"
#include "opt_syscall_debug.h"
#include "veriexec.h"
#include "opt_pax.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/ptrace.h>
#include <sys/mount.h>
#include <sys/kmem.h>
#include <sys/namei.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/acct.h>
#include <sys/atomic.h>
#include <sys/exec.h>
#include <sys/futex.h>
#include <sys/ktrace.h>
#include <sys/uidinfo.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <sys/ras.h>
#include <sys/signalvar.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/kauth.h>
#include <sys/lwpctl.h>
#include <sys/pax.h>
#include <sys/cpu.h>
#include <sys/module.h>
#include <sys/syscallvar.h>
#include <sys/syscallargs.h>
#include <sys/vfs_syscalls.h>
#if NVERIEXEC > 0
#include <sys/verified_exec.h>
#endif /* NVERIEXEC > 0 */
#include <sys/sdt.h>
#include <sys/spawn.h>
#include <sys/prot.h>
#include <sys/cprng.h>

#include <uvm/uvm_extern.h>

#include <machine/reg.h>

#include <compat/common/compat_util.h>

#ifndef MD_TOPDOWN_INIT
#ifdef __USE_TOPDOWN_VM
#define        MD_TOPDOWN_INIT(epp)        (epp)->ep_flags |= EXEC_TOPDOWN_VM
#else
#define        MD_TOPDOWN_INIT(epp)
#endif
#endif

struct execve_data;

extern int user_va0_disable;

static size_t calcargs(struct execve_data * restrict, const size_t);
static size_t calcstack(struct execve_data * restrict, const size_t);
static int copyoutargs(struct execve_data * restrict, struct lwp *,
    char * const);
static int copyoutpsstrs(struct execve_data * restrict, struct proc *);
static int copyinargs(struct execve_data * restrict, char * const *,
    char * const *, execve_fetch_element_t, char **);
static int copyinargstrs(struct execve_data * restrict, char * const *,
    execve_fetch_element_t, char **, size_t *, void (*)(const void *, size_t));
static int exec_sigcode_map(struct proc *, const struct emul *);

#if defined(DEBUG) && !defined(DEBUG_EXEC)
#define DEBUG_EXEC
#endif
#ifdef DEBUG_EXEC
#define DPRINTF(a) printf a
#define COPYPRINTF(s, a, b) printf("%s, %d: copyout%s @%p %zu\n", __func__, \
    __LINE__, (s), (a), (b))
static void dump_vmcmds(const struct exec_package * const, size_t, int);
#define DUMPVMCMDS(p, x, e) do { dump_vmcmds((p), (x), (e)); } while (0)
#else
#define DPRINTF(a)
#define COPYPRINTF(s, a, b)
#define DUMPVMCMDS(p, x, e) do {} while (0)
#endif /* DEBUG_EXEC */

/*
 * DTrace SDT provider definitions
 */
SDT_PROVIDER_DECLARE(proc);
SDT_PROBE_DEFINE1(proc, kernel, , exec, "char *");
SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *");
SDT_PROBE_DEFINE1(proc, kernel, , exec__failure, "int");

/*
 * Exec function switch:
 *
 * Note that each makecmds function is responsible for loading the
 * exec package with the necessary functions for any exec-type-specific
 * handling.
 *
 * Functions for specific exec types should be defined in their own
 * header file.
 */
static const struct execsw        **execsw = NULL;
static int                        nexecs;

u_int        exec_maxhdrsz;         /* must not be static - used by netbsd32 */

/* list of dynamically loaded execsw entries */
static LIST_HEAD(execlist_head, exec_entry) ex_head =
    LIST_HEAD_INITIALIZER(ex_head);
struct exec_entry {
        LIST_ENTRY(exec_entry)        ex_list;
        SLIST_ENTRY(exec_entry)        ex_slist;
        const struct execsw        *ex_sw;
};

#ifndef __HAVE_SYSCALL_INTERN
void        syscall(void);
#endif

/* NetBSD autoloadable syscalls */
#ifdef MODULAR
#include <kern/syscalls_autoload.c>
#endif

/* NetBSD emul struct */
struct emul emul_netbsd = {
        .e_name =                "netbsd",
#ifdef EMUL_NATIVEROOT
        .e_path =                EMUL_NATIVEROOT,
#else
        .e_path =                NULL,
#endif
#ifndef __HAVE_MINIMAL_EMUL
        .e_flags =                EMUL_HAS_SYS___syscall,
        .e_errno =                NULL,
        .e_nosys =                SYS_syscall,
        .e_nsysent =                SYS_NSYSENT,
#endif
#ifdef MODULAR
        .e_sc_autoload =        netbsd_syscalls_autoload,
#endif
        .e_sysent =                sysent,
        .e_nomodbits =                sysent_nomodbits,
#ifdef SYSCALL_DEBUG
        .e_syscallnames =        syscallnames,
#else
        .e_syscallnames =        NULL,
#endif
        .e_sendsig =                sendsig,
        .e_trapsignal =                trapsignal,
        .e_sigcode =                NULL,
        .e_esigcode =                NULL,
        .e_sigobject =                NULL,
        .e_setregs =                setregs,
        .e_proc_exec =                NULL,
        .e_proc_fork =                NULL,
        .e_proc_exit =                NULL,
        .e_lwp_fork =                NULL,
        .e_lwp_exit =                NULL,
#ifdef __HAVE_SYSCALL_INTERN
        .e_syscall_intern =        syscall_intern,
#else
        .e_syscall =                syscall,
#endif
        .e_sysctlovly =                NULL,
        .e_vm_default_addr =        uvm_default_mapaddr,
        .e_usertrap =                NULL,
        .e_ucsize =                sizeof(ucontext_t),
        .e_startlwp =                startlwp
};

/*
 * Exec lock. Used to control access to execsw[] structures.
 * This must not be static so that netbsd32 can access it, too.
 */
krwlock_t exec_lock __cacheline_aligned;

/*
 * Data used between a loadvm and execve part of an "exec" operation
 */
struct execve_data {
        struct exec_package        ed_pack;
        struct pathbuf                *ed_pathbuf;
        struct vattr                ed_attr;
        struct ps_strings        ed_arginfo;
        char                        *ed_argp;
        const char                *ed_pathstring;
        char                        *ed_resolvedname;
        size_t                        ed_ps_strings_sz;
        int                        ed_szsigcode;
        size_t                        ed_argslen;
        long                        ed_argc;
        long                        ed_envc;
};

/*
 * data passed from parent lwp to child during a posix_spawn()
 */
struct spawn_exec_data {
        struct execve_data        sed_exec;
        struct posix_spawn_file_actions
                                *sed_actions;
        struct posix_spawnattr        *sed_attrs;
        struct proc                *sed_parent;
        kcondvar_t                sed_cv_child_ready;
        kmutex_t                sed_mtx_child;
        int                        sed_error;
        volatile uint32_t        sed_refcnt;
};

static struct vm_map *exec_map;
static struct pool exec_pool;

static void *
exec_pool_alloc(struct pool *pp, int flags)
{

        return (void *)uvm_km_alloc(exec_map, NCARGS, 0,
            UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
}

static void
exec_pool_free(struct pool *pp, void *addr)
{

        uvm_km_free(exec_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE);
}

static struct pool_allocator exec_palloc = {
        .pa_alloc = exec_pool_alloc,
        .pa_free = exec_pool_free,
        .pa_pagesz = NCARGS
};

static void
exec_path_free(struct execve_data *data)
{              
        pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
        pathbuf_destroy(data->ed_pathbuf);
        if (data->ed_resolvedname)
                PNBUF_PUT(data->ed_resolvedname);
}

static int
exec_resolvename(struct lwp *l, struct exec_package *epp, struct vnode *vp,
    char **rpath)
{
        int error;
        char *p;

        KASSERT(rpath != NULL);

        *rpath = PNBUF_GET();
        error = vnode_to_path(*rpath, MAXPATHLEN, vp, l, l->l_proc);
        if (error) {
                DPRINTF(("%s: can't resolve name for %s, error %d\n",
                    __func__, epp->ep_kname, error));
                PNBUF_PUT(*rpath);
                *rpath = NULL;
                return error;
        }
        epp->ep_resolvedname = *rpath;
        if ((p = strrchr(*rpath, '/')) != NULL)
                epp->ep_kname = p + 1;
        return 0;
}


/*
 * check exec:
 * given an "executable" described in the exec package's namei info,
 * see what we can do with it.
 *
 * ON ENTRY:
 *        exec package with appropriate namei info
 *        lwp pointer of exec'ing lwp
 *        NO SELF-LOCKED VNODES
 *
 * ON EXIT:
 *        error:        nothing held, etc.  exec header still allocated.
 *        ok:        filled exec package, executable's vnode (unlocked).
 *
 * EXEC SWITCH ENTRY:
 *         Locked vnode to check, exec package, proc.
 *
 * EXEC SWITCH EXIT:
 *        ok:        return 0, filled exec package, executable's vnode (unlocked).
 *        error:        destructive:
 *                        everything deallocated execept exec header.
 *                non-destructive:
 *                        error code, executable's vnode (unlocked),
 *                        exec header unmodified.
 */
int
/*ARGSUSED*/
check_exec(struct lwp *l, struct exec_package *epp, struct pathbuf *pb,
    char **rpath)
{
        int                error, i;
        struct vnode        *vp;
        size_t                resid;

        if (epp->ep_resolvedname) {
                struct nameidata nd;

                // grab the absolute pathbuf here before namei() trashes it.
                pathbuf_copystring(pb, epp->ep_resolvedname, PATH_MAX);
                NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);

                /* first get the vnode */
                if ((error = namei(&nd)) != 0)
                        return error;

                epp->ep_vp = vp = nd.ni_vp;
#ifdef DIAGNOSTIC
                /* paranoia (take this out once namei stuff stabilizes) */
                memset(nd.ni_pnbuf, '~', PATH_MAX);
#endif
        } else {
                struct file *fp;

                if ((error = fd_getvnode(epp->ep_xfd, &fp)) != 0)
                        return error;
                epp->ep_vp = vp = fp->f_vnode;
                vref(vp);
                fd_putfile(epp->ep_xfd);
                if ((error = exec_resolvename(l, epp, vp, rpath)) != 0)
                        return error;
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        }

        /* check access and type */
        if (vp->v_type != VREG) {
                error = EACCES;
                goto bad1;
        }
        if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
                goto bad1;

        /* get attributes */
        /* XXX VOP_GETATTR is the only thing that needs LK_EXCLUSIVE here */
        if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
                goto bad1;

        /* Check mount point */
        if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
                error = EACCES;
                goto bad1;
        }
        if (vp->v_mount->mnt_flag & MNT_NOSUID)
                epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);

        /* try to open it */
        if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
                goto bad1;

        /* now we have the file, get the exec header */
        error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
                        UIO_SYSSPACE, IO_NODELOCKED, l->l_cred, &resid, NULL);
        if (error)
                goto bad1;

        /* unlock vp, since we need it unlocked from here on out. */
        VOP_UNLOCK(vp);

#if NVERIEXEC > 0
        error = veriexec_verify(l, vp,
            epp->ep_resolvedname ? epp->ep_resolvedname : epp->ep_kname,
            epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
            NULL);
        if (error)
                goto bad2;
#endif /* NVERIEXEC > 0 */

#ifdef PAX_SEGVGUARD
        error = pax_segvguard(l, vp, epp->ep_resolvedname, false);
        if (error)
                goto bad2;
#endif /* PAX_SEGVGUARD */

        epp->ep_hdrvalid = epp->ep_hdrlen - resid;

        /*
         * Set up default address space limits.  Can be overridden
         * by individual exec packages.
         */
        epp->ep_vm_minaddr = exec_vm_minaddr(VM_MIN_ADDRESS);
        epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;

        /*
         * set up the vmcmds for creation of the process
         * address space
         */
        error = ENOEXEC;
        for (i = 0; i < nexecs; i++) {
                int newerror;

                epp->ep_esch = execsw[i];
                newerror = (*execsw[i]->es_makecmds)(l, epp);

                if (!newerror) {
                        /* Seems ok: check that entry point is not too high */
                        if (epp->ep_entry >= epp->ep_vm_maxaddr) {
#ifdef DIAGNOSTIC
                                printf("%s: rejecting %p due to "
                                    "too high entry address (>= %p)\n",
                                         __func__, (void *)epp->ep_entry,
                                         (void *)epp->ep_vm_maxaddr);
#endif
                                error = ENOEXEC;
                                break;
                        }
                        /* Seems ok: check that entry point is not too low */
                        if (epp->ep_entry < epp->ep_vm_minaddr) {
#ifdef DIAGNOSTIC
                                printf("%s: rejecting %p due to "
                                    "too low entry address (< %p)\n",
                                     __func__, (void *)epp->ep_entry,
                                     (void *)epp->ep_vm_minaddr);
#endif
                                error = ENOEXEC;
                                break;
                        }

                        /* check limits */
#ifdef DIAGNOSTIC
#define LMSG "%s: rejecting due to %s limit (%ju > %ju)\n"
#endif
#ifdef MAXTSIZ
                        if (epp->ep_tsize > MAXTSIZ) {
#ifdef DIAGNOSTIC
                                printf(LMSG, __func__, "text",
                                    (uintmax_t)epp->ep_tsize,
                                    (uintmax_t)MAXTSIZ);
#endif
                                error = ENOMEM;
                                break;
                        }
#endif
                        vsize_t dlimit =
                            (vsize_t)l->l_proc->p_rlimit[RLIMIT_DATA].rlim_cur;
                        if (epp->ep_dsize > dlimit) {
#ifdef DIAGNOSTIC
                                printf(LMSG, __func__, "data",
                                    (uintmax_t)epp->ep_dsize,
                                    (uintmax_t)dlimit);
#endif
                                error = ENOMEM;
                                break;
                        }
                        return 0;
                }

                /*
                 * Reset all the fields that may have been modified by the
                 * loader.
                 */
                KASSERT(epp->ep_emul_arg == NULL);
                if (epp->ep_emul_root != NULL) {
                        vrele(epp->ep_emul_root);
                        epp->ep_emul_root = NULL;
                }
                if (epp->ep_interp != NULL) {
                        vrele(epp->ep_interp);
                        epp->ep_interp = NULL;
                }
                epp->ep_pax_flags = 0;

                /* make sure the first "interesting" error code is saved. */
                if (error == ENOEXEC)
                        error = newerror;

                if (epp->ep_flags & EXEC_DESTR)
                        /* Error from "#!" code, tidied up by recursive call */
                        return error;
        }

        /* not found, error */

        /*
         * free any vmspace-creation commands,
         * and release their references
         */
        kill_vmcmds(&epp->ep_vmcmds);

#if NVERIEXEC > 0 || defined(PAX_SEGVGUARD)
bad2:
#endif
        /*
         * close and release the vnode, restore the old one, free the
         * pathname buf, and punt.
         */
        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
        VOP_CLOSE(vp, FREAD, l->l_cred);
        vput(vp);
        return error;

bad1:
        /*
         * free the namei pathname buffer, and put the vnode
         * (which we don't yet have open).
         */
        vput(vp);                                /* was still locked */
        return error;
}

#ifdef __MACHINE_STACK_GROWS_UP
#define STACK_PTHREADSPACE NBPG
#else
#define STACK_PTHREADSPACE 0
#endif

static int
execve_fetch_element(char * const *array, size_t index, char **value)
{
        return copyin(array + index, value, sizeof(*value));
}

/*
 * exec system call
 */
int
sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval)
{
        /* {
                syscallarg(const char *)        path;
                syscallarg(char * const *)        argp;
                syscallarg(char * const *)        envp;
        } */

        return execve1(l, true, SCARG(uap, path), -1, SCARG(uap, argp),
            SCARG(uap, envp), execve_fetch_element);
}

int
sys_fexecve(struct lwp *l, const struct sys_fexecve_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int)                        fd;
                syscallarg(char * const *)        argp;
                syscallarg(char * const *)        envp;
        } */

        return execve1(l, false, NULL, SCARG(uap, fd), SCARG(uap, argp),
            SCARG(uap, envp), execve_fetch_element);
}

/*
 * Load modules to try and execute an image that we do not understand.
 * If no execsw entries are present, we load those likely to be needed
 * in order to run native images only.  Otherwise, we autoload all
 * possible modules that could let us run the binary.  XXX lame
 */
static void
exec_autoload(void)
{
#ifdef MODULAR
        static const char * const native[] = {
                "exec_elf32",
                "exec_elf64",
                "exec_script",
                NULL
        };
        static const char * const compat[] = {
                "exec_elf32",
                "exec_elf64",
                "exec_script",
                "exec_aout",
                "exec_coff",
                "exec_ecoff",
                "compat_aoutm68k",
                "compat_netbsd32",
#if 0
                "compat_linux",
                "compat_linux32",
#endif
                "compat_sunos",
                "compat_sunos32",
                "compat_ultrix",
                NULL
        };
        char const * const *list;
        int i;

        list = nexecs == 0 ? native : compat;
        for (i = 0; list[i] != NULL; i++) {
                if (module_autoload(list[i], MODULE_CLASS_EXEC) != 0) {
                        continue;
                }
                yield();
        }
#endif
}

/*
 * Copy the user or kernel supplied upath to the allocated pathbuffer pbp
 * making it absolute in the process, by prepending the current working
 * directory if it is not. If offs is supplied it will contain the offset
 * where the original supplied copy of upath starts.
 */
int
exec_makepathbuf(struct lwp *l, const char *upath, enum uio_seg seg,
    struct pathbuf **pbp, size_t *offs)
{
        char *path, *bp;
        size_t len, tlen;
        int error;
        struct cwdinfo *cwdi;

        path = PNBUF_GET();
        if (seg == UIO_SYSSPACE) {
                error = copystr(upath, path, MAXPATHLEN, &len);
        } else {
                error = copyinstr(upath, path, MAXPATHLEN, &len);
        }
        if (error)
                goto err;

        if (path[0] == '/') {
                if (offs)
                        *offs = 0;
                goto out;
        }

        len++;
        if (len + 1 >= MAXPATHLEN) {
                error = ENAMETOOLONG;
                goto err;
        }
        bp = path + MAXPATHLEN - len;
        memmove(bp, path, len);
        *(--bp) = '/';

        cwdi = l->l_proc->p_cwdi;
        rw_enter(&cwdi->cwdi_lock, RW_READER);
        error = getcwd_common(cwdi->cwdi_cdir, NULL, &bp, path, MAXPATHLEN / 2,
            GETCWD_CHECK_ACCESS, l);
        rw_exit(&cwdi->cwdi_lock);

        if (error)
                goto err;
        tlen = path + MAXPATHLEN - bp;

        memmove(path, bp, tlen);
        path[tlen - 1] = '\0';
        if (offs)
                *offs = tlen - len;
out:
        *pbp = pathbuf_assimilate(path);
        return 0;
err:
        PNBUF_PUT(path);
        return error;
}

vaddr_t
exec_vm_minaddr(vaddr_t va_min)
{
        /*
         * Increase va_min if we don't want NULL to be mappable by the
         * process.
         */
#define VM_MIN_GUARD        PAGE_SIZE
        if (user_va0_disable && (va_min < VM_MIN_GUARD))
                return VM_MIN_GUARD;
        return va_min;
}

static int
execve_loadvm(struct lwp *l, bool has_path, const char *path, int fd,
        char * const *args, char * const *envs,
        execve_fetch_element_t fetch_element,
        struct execve_data * restrict data)
{
        struct exec_package        * const epp = &data->ed_pack;
        int                        error;
        struct proc                *p;
        char                        *dp;
        u_int                        modgen;

        KASSERT(data != NULL);

        p = l->l_proc;
        modgen = 0;

        SDT_PROBE(proc, kernel, , exec, path, 0, 0, 0, 0);

        /*
         * Check if we have exceeded our number of processes limit.
         * This is so that we handle the case where a root daemon
         * forked, ran setuid to become the desired user and is trying
         * to exec. The obvious place to do the reference counting check
         * is setuid(), but we don't do the reference counting check there
         * like other OS's do because then all the programs that use setuid()
         * must be modified to check the return code of setuid() and exit().
         * It is dangerous to make setuid() fail, because it fails open and
         * the program will continue to run as root. If we make it succeed
         * and return an error code, again we are not enforcing the limit.
         * The best place to enforce the limit is here, when the process tries
         * to execute a new image, because eventually the process will need
         * to call exec in order to do something useful.
         */
 retry:
        if (p->p_flag & PK_SUGID) {
                if (kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
                     p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
                     &p->p_rlimit[RLIMIT_NPROC],
                     KAUTH_ARG(RLIMIT_NPROC)) != 0 &&
                    chgproccnt(kauth_cred_getuid(l->l_cred), 0) >
                     p->p_rlimit[RLIMIT_NPROC].rlim_cur)
                return EAGAIN;
        }

        /*
         * Drain existing references and forbid new ones.  The process
         * should be left alone until we're done here.  This is necessary
         * to avoid race conditions - e.g. in ptrace() - that might allow
         * a local user to illicitly obtain elevated privileges.
         */
        rw_enter(&p->p_reflock, RW_WRITER);

        if (has_path) {
                size_t        offs;
                /*
                 * Init the namei data to point the file user's program name.
                 * This is done here rather than in check_exec(), so that it's
                 * possible to override this settings if any of makecmd/probe
                 * functions call check_exec() recursively - for example,
                 * see exec_script_makecmds().
                 */
                if ((error = exec_makepathbuf(l, path, UIO_USERSPACE,
                    &data->ed_pathbuf, &offs)) != 0)
                        goto clrflg;
                data->ed_pathstring = pathbuf_stringcopy_get(data->ed_pathbuf);
                epp->ep_kname = data->ed_pathstring + offs;
                data->ed_resolvedname = PNBUF_GET();
                epp->ep_resolvedname = data->ed_resolvedname;
                epp->ep_xfd = -1;
        } else {
                data->ed_pathbuf = pathbuf_assimilate(strcpy(PNBUF_GET(), "/"));
                data->ed_pathstring = pathbuf_stringcopy_get(data->ed_pathbuf);
                epp->ep_kname = "*fexecve*";
                data->ed_resolvedname = NULL;
                epp->ep_resolvedname = NULL;
                epp->ep_xfd = fd;
        }


        /*
         * initialize the fields of the exec package.
         */
        epp->ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP);
        epp->ep_hdrlen = exec_maxhdrsz;
        epp->ep_hdrvalid = 0;
        epp->ep_emul_arg = NULL;
        epp->ep_emul_arg_free = NULL;
        memset(&epp->ep_vmcmds, 0, sizeof(epp->ep_vmcmds));
        epp->ep_vap = &data->ed_attr;
        epp->ep_flags = (p->p_flag & PK_32) ? EXEC_FROM32 : 0;
        MD_TOPDOWN_INIT(epp);
        epp->ep_emul_root = NULL;
        epp->ep_interp = NULL;
        epp->ep_esch = NULL;
        epp->ep_pax_flags = 0;
        memset(epp->ep_machine_arch, 0, sizeof(epp->ep_machine_arch));

        rw_enter(&exec_lock, RW_READER);

        /* see if we can run it. */
        if ((error = check_exec(l, epp, data->ed_pathbuf,
            &data->ed_resolvedname)) != 0) {
                if (error != ENOENT && error != EACCES && error != ENOEXEC) {
                        DPRINTF(("%s: check exec failed for %s, error %d\n",
                            __func__, epp->ep_kname, error));
                }
                goto freehdr;
        }

        /* allocate an argument buffer */
        data->ed_argp = pool_get(&exec_pool, PR_WAITOK);
        KASSERT(data->ed_argp != NULL);
        dp = data->ed_argp;

        if ((error = copyinargs(data, args, envs, fetch_element, &dp)) != 0) {
                goto bad;
        }

        /*
         * Calculate the new stack size.
         */

#ifdef __MACHINE_STACK_GROWS_UP
/*
 * copyargs() fills argc/argv/envp from the lower address even on
 * __MACHINE_STACK_GROWS_UP machines.  Reserve a few words just below the SP
 * so that _rtld() use it.
 */
#define        RTLD_GAP        32
#else
#define        RTLD_GAP        0
#endif

        const size_t argenvstrlen = (char *)ALIGN(dp) - data->ed_argp;

        data->ed_argslen = calcargs(data, argenvstrlen);

        const size_t len = calcstack(data, pax_aslr_stack_gap(epp) + RTLD_GAP);

        if (len > epp->ep_ssize) {
                /* in effect, compare to initial limit */
                DPRINTF(("%s: stack limit exceeded %zu\n", __func__, len));
                error = ENOMEM;
                goto bad;
        }
        /* adjust "active stack depth" for process VSZ */
        epp->ep_ssize = len;

        return 0;

 bad:
        /* free the vmspace-creation commands, and release their references */
        kill_vmcmds(&epp->ep_vmcmds);
        /* kill any opened file descriptor, if necessary */
        if (epp->ep_flags & EXEC_HASFD) {
                epp->ep_flags &= ~EXEC_HASFD;
                fd_close(epp->ep_fd);
        }
        /* close and put the exec'd file */
        vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
        VOP_CLOSE(epp->ep_vp, FREAD, l->l_cred);
        vput(epp->ep_vp);
        pool_put(&exec_pool, data->ed_argp);

 freehdr:
        kmem_free(epp->ep_hdr, epp->ep_hdrlen);
        if (epp->ep_emul_root != NULL)
                vrele(epp->ep_emul_root);
        if (epp->ep_interp != NULL)
                vrele(epp->ep_interp);

        rw_exit(&exec_lock);

        exec_path_free(data);

 clrflg:
        rw_exit(&p->p_reflock);

        if (modgen != module_gen && error == ENOEXEC) {
                modgen = module_gen;
                exec_autoload();
                goto retry;
        }

        SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0);
        return error;
}

static int
execve_dovmcmds(struct lwp *l, struct execve_data * restrict data)
{
        struct exec_package        * const epp = &data->ed_pack;
        struct proc                *p = l->l_proc;
        struct exec_vmcmd        *base_vcp;
        int                        error = 0;
        size_t                        i;

        /* record proc's vnode, for use by procfs and others */
        if (p->p_textvp)
                vrele(p->p_textvp);
        vref(epp->ep_vp);
        p->p_textvp = epp->ep_vp;

        /* create the new process's VM space by running the vmcmds */
        KASSERTMSG(epp->ep_vmcmds.evs_used != 0, "%s: no vmcmds", __func__);

#ifdef TRACE_EXEC
        DUMPVMCMDS(epp, 0, 0);
#endif

        base_vcp = NULL;

        for (i = 0; i < epp->ep_vmcmds.evs_used && !error; i++) {
                struct exec_vmcmd *vcp;

                vcp = &epp->ep_vmcmds.evs_cmds[i];
                if (vcp->ev_flags & VMCMD_RELATIVE) {
                        KASSERTMSG(base_vcp != NULL,
                            "%s: relative vmcmd with no base", __func__);
                        KASSERTMSG((vcp->ev_flags & VMCMD_BASE) == 0,
                            "%s: illegal base & relative vmcmd", __func__);
                        vcp->ev_addr += base_vcp->ev_addr;
                }
                error = (*vcp->ev_proc)(l, vcp);
                if (error)
                        DUMPVMCMDS(epp, i, error);
                if (vcp->ev_flags & VMCMD_BASE)
                        base_vcp = vcp;
        }

        /* free the vmspace-creation commands, and release their references */
        kill_vmcmds(&epp->ep_vmcmds);

        vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
        VOP_CLOSE(epp->ep_vp, FREAD, l->l_cred);
        vput(epp->ep_vp);

        /* if an error happened, deallocate and punt */
        if (error != 0) {
                DPRINTF(("%s: vmcmd %zu failed: %d\n", __func__, i - 1, error));
        }
        return error;
}

static void
execve_free_data(struct execve_data *data)
{
        struct exec_package        * const epp = &data->ed_pack;

        /* free the vmspace-creation commands, and release their references */
        kill_vmcmds(&epp->ep_vmcmds);
        /* kill any opened file descriptor, if necessary */
        if (epp->ep_flags & EXEC_HASFD) {
                epp->ep_flags &= ~EXEC_HASFD;
                fd_close(epp->ep_fd);
        }

        /* close and put the exec'd file */
        vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
        VOP_CLOSE(epp->ep_vp, FREAD, curlwp->l_cred);
        vput(epp->ep_vp);
        pool_put(&exec_pool, data->ed_argp);

        kmem_free(epp->ep_hdr, epp->ep_hdrlen);
        if (epp->ep_emul_root != NULL)
                vrele(epp->ep_emul_root);
        if (epp->ep_interp != NULL)
                vrele(epp->ep_interp);

        exec_path_free(data);
}

static void
pathexec(struct proc *p, const char *resolvedname)
{
        /* set command name & other accounting info */
        const char *cmdname;

        if (resolvedname == NULL) {
                cmdname = "*fexecve*";
                resolvedname = "/";
        } else {
                cmdname = strrchr(resolvedname, '/') + 1;
        }
        KASSERTMSG(resolvedname[0] == '/', "bad resolvedname `%s'",
            resolvedname);

        strlcpy(p->p_comm, cmdname, sizeof(p->p_comm));

        kmem_strfree(p->p_path);
        p->p_path = kmem_strdupsize(resolvedname, NULL, KM_SLEEP);
}

/* XXX elsewhere */
static int
credexec(struct lwp *l, struct execve_data *data)
{
        struct proc *p = l->l_proc;
        struct vattr *attr = &data->ed_attr;
        int error;

        /*
         * Deal with set[ug]id.  MNT_NOSUID has already been used to disable
         * s[ug]id.  It's OK to check for PSL_TRACED here as we have blocked
         * out additional references on the process for the moment.
         */
        if ((p->p_slflag & PSL_TRACED) == 0 &&

            (((attr->va_mode & S_ISUID) != 0 &&
              kauth_cred_geteuid(l->l_cred) != attr->va_uid) ||

             ((attr->va_mode & S_ISGID) != 0 &&
              kauth_cred_getegid(l->l_cred) != attr->va_gid))) {
                /*
                 * Mark the process as SUGID before we do
                 * anything that might block.
                 */
                proc_crmod_enter();
                proc_crmod_leave(NULL, NULL, true);
                if (data->ed_argc == 0) {
                        DPRINTF((
                            "%s: not executing set[ug]id binary with no args\n",
                            __func__));
                        return EINVAL;
                }

                /* Make sure file descriptors 0..2 are in use. */
                if ((error = fd_checkstd()) != 0) {
                        DPRINTF(("%s: fdcheckstd failed %d\n",
                            __func__, error));
                        return error;
                }

                /*
                 * Copy the credential so other references don't see our
                 * changes.
                 */
                l->l_cred = kauth_cred_copy(l->l_cred);
#ifdef KTRACE
                /*
                 * If the persistent trace flag isn't set, turn off.
                 */
                if (p->p_tracep) {
                        mutex_enter(&ktrace_lock);
                        if (!(p->p_traceflag & KTRFAC_PERSISTENT))
                                ktrderef(p);
                        mutex_exit(&ktrace_lock);
                }
#endif
                if (attr->va_mode & S_ISUID)
                        kauth_cred_seteuid(l->l_cred, attr->va_uid);
                if (attr->va_mode & S_ISGID)
                        kauth_cred_setegid(l->l_cred, attr->va_gid);
        } else {
                if (kauth_cred_geteuid(l->l_cred) ==
                    kauth_cred_getuid(l->l_cred) &&
                    kauth_cred_getegid(l->l_cred) ==
                    kauth_cred_getgid(l->l_cred))
                        p->p_flag &= ~PK_SUGID;
        }

        /*
         * Copy the credential so other references don't see our changes.
         * Test to see if this is necessary first, since in the common case
         * we won't need a private reference.
         */
        if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
            kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
                l->l_cred = kauth_cred_copy(l->l_cred);
                kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
                kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
        }

        /* Update the master credentials. */
        if (l->l_cred != p->p_cred) {
                kauth_cred_t ocred;

                kauth_cred_hold(l->l_cred);
                mutex_enter(p->p_lock);
                ocred = p->p_cred;
                p->p_cred = l->l_cred;
                mutex_exit(p->p_lock);
                kauth_cred_free(ocred);
        }

        return 0;
}

static void
emulexec(struct lwp *l, struct exec_package *epp)
{
        struct proc                *p = l->l_proc;

        /* The emulation root will usually have been found when we looked
         * for the elf interpreter (or similar), if not look now. */
        if (epp->ep_esch->es_emul->e_path != NULL &&
            epp->ep_emul_root == NULL)
                emul_find_root(l, epp);

        /* Any old emulation root got removed by fdcloseexec */
        rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
        p->p_cwdi->cwdi_edir = epp->ep_emul_root;
        rw_exit(&p->p_cwdi->cwdi_lock);
        epp->ep_emul_root = NULL;
        if (epp->ep_interp != NULL)
                vrele(epp->ep_interp);

        /*
         * Call emulation specific exec hook. This can setup per-process
         * p->p_emuldata or do any other per-process stuff an emulation needs.
         *
         * If we are executing process of different emulation than the
         * original forked process, call e_proc_exit() of the old emulation
         * first, then e_proc_exec() of new emulation. If the emulation is
         * same, the exec hook code should deallocate any old emulation
         * resources held previously by this process.
         */
        if (p->p_emul && p->p_emul->e_proc_exit
            && p->p_emul != epp->ep_esch->es_emul)
                (*p->p_emul->e_proc_exit)(p);

        /*
         * Call exec hook. Emulation code may NOT store reference to anything
         * from &pack.
         */
        if (epp->ep_esch->es_emul->e_proc_exec)
                (*epp->ep_esch->es_emul->e_proc_exec)(p, epp);

        /* update p_emul, the old value is no longer needed */
        p->p_emul = epp->ep_esch->es_emul;

        /* ...and the same for p_execsw */
        p->p_execsw = epp->ep_esch;

#ifdef __HAVE_SYSCALL_INTERN
        (*p->p_emul->e_syscall_intern)(p);
#endif
        ktremul();
}

static int
execve_runproc(struct lwp *l, struct execve_data * restrict data,
        bool no_local_exec_lock, bool is_spawn)
{
        struct exec_package        * const epp = &data->ed_pack;
        int error = 0;
        struct proc                *p;
        struct vmspace                *vm;

        /*
         * In case of a posix_spawn operation, the child doing the exec
         * might not hold the reader lock on exec_lock, but the parent
         * will do this instead.
         */
        KASSERT(no_local_exec_lock || rw_lock_held(&exec_lock));
        KASSERT(!no_local_exec_lock || is_spawn);
        KASSERT(data != NULL);

        p = l->l_proc;

        /* Get rid of other LWPs. */
        if (p->p_nlwps > 1) {
                mutex_enter(p->p_lock);
                exit_lwps(l);
                mutex_exit(p->p_lock);
        }
        KDASSERT(p->p_nlwps == 1);

        /*
         * All of the other LWPs got rid of their robust futexes
         * when they exited above, but we might still have some
         * to dispose of.  Do that now.
         */
        if (__predict_false(l->l_robust_head != 0)) {
                futex_release_all_lwp(l);
                /*
                 * Since this LWP will live on with a different
                 * program image, we need to clear the robust
                 * futex list pointer here.
                 */
                l->l_robust_head = 0;
        }

        /* Destroy any lwpctl info. */
        if (p->p_lwpctl != NULL)
                lwp_ctl_exit();

        /* Remove POSIX timers */
        ptimers_free(p, TIMERS_POSIX);

        /* Set the PaX flags. */
        pax_set_flags(epp, p);

        /*
         * Do whatever is necessary to prepare the address space
         * for remapping.  Note that this might replace the current
         * vmspace with another!
         *
         * vfork(): do not touch any user space data in the new child
         * until we have awoken the parent below, or it will defeat
         * lazy pmap switching (on x86).
         */
        if (is_spawn)
                uvmspace_spawn(l, epp->ep_vm_minaddr,
                    epp->ep_vm_maxaddr,
                    epp->ep_flags & EXEC_TOPDOWN_VM);
        else
                uvmspace_exec(l, epp->ep_vm_minaddr,
                    epp->ep_vm_maxaddr,
                    epp->ep_flags & EXEC_TOPDOWN_VM);
        vm = p->p_vmspace;

        vm->vm_taddr = (void *)epp->ep_taddr;
        vm->vm_tsize = btoc(epp->ep_tsize);
        vm->vm_daddr = (void*)epp->ep_daddr;
        vm->vm_dsize = btoc(epp->ep_dsize);
        vm->vm_ssize = btoc(epp->ep_ssize);
        vm->vm_issize = 0;
        vm->vm_maxsaddr = (void *)epp->ep_maxsaddr;
        vm->vm_minsaddr = (void *)epp->ep_minsaddr;

        pax_aslr_init_vm(l, vm, epp);

        cwdexec(p);
        fd_closeexec();                /* handle close on exec */

        if (__predict_false(ktrace_on))
                fd_ktrexecfd();

        execsigs(p);                /* reset caught signals */

        mutex_enter(p->p_lock);
        l->l_ctxlink = NULL;        /* reset ucontext link */
        p->p_acflag &= ~AFORK;
        p->p_flag |= PK_EXEC;
        mutex_exit(p->p_lock);

        error = credexec(l, data);
        if (error)
                goto exec_abort;

#if defined(__HAVE_RAS)
        /*
         * Remove all RASs from the address space.
         */
        ras_purgeall();
#endif

        /*
         * Stop profiling.
         */
        if ((p->p_stflag & PST_PROFIL) != 0) {
                mutex_spin_enter(&p->p_stmutex);
                stopprofclock(p);
                mutex_spin_exit(&p->p_stmutex);
        }

        /*
         * It's OK to test PL_PPWAIT unlocked here, as other LWPs have
         * exited and exec()/exit() are the only places it will be cleared.
         *
         * Once the parent has been awoken, curlwp may teleport to a new CPU
         * in sched_vforkexec(), and it's then OK to start messing with user
         * data.  See comment above.
         */
        if ((p->p_lflag & PL_PPWAIT) != 0) {
                bool samecpu;
                lwp_t *lp;

                mutex_enter(&proc_lock);
                lp = p->p_vforklwp;
                p->p_vforklwp = NULL;
                l->l_lwpctl = NULL; /* was on loan from blocked parent */
                cv_broadcast(&lp->l_waitcv);

                /* Clear flags after cv_broadcast() (scheduler needs them). */
                p->p_lflag &= ~PL_PPWAIT;
                lp->l_vforkwaiting = false;

                /* If parent is still on same CPU, teleport curlwp elsewhere. */
                samecpu = (lp->l_cpu == curlwp->l_cpu);
                mutex_exit(&proc_lock);

                /* Give the parent its CPU back - find a new home. */
                KASSERT(!is_spawn);
                sched_vforkexec(l, samecpu);
        }

        /* Now map address space. */
        error = execve_dovmcmds(l, data);
        if (error != 0)
                goto exec_abort;

        pathexec(p, epp->ep_resolvedname);

        char * const newstack = STACK_GROW(vm->vm_minsaddr, epp->ep_ssize);

        error = copyoutargs(data, l, newstack);
        if (error != 0)
                goto exec_abort;

        doexechooks(p);

        /*
         * Set initial SP at the top of the stack.
         *
         * Note that on machines where stack grows up (e.g. hppa), SP points to
         * the end of arg/env strings.  Userland guesses the address of argc
         * via ps_strings::ps_argvstr.
         */

        /* Setup new registers and do misc. setup. */
        (*epp->ep_esch->es_emul->e_setregs)(l, epp, (vaddr_t)newstack);
        if (epp->ep_esch->es_setregs)
                (*epp->ep_esch->es_setregs)(l, epp, (vaddr_t)newstack);

        /* Provide a consistent LWP private setting */
        (void)lwp_setprivate(l, NULL);

        /* Discard all PCU state; need to start fresh */
        pcu_discard_all(l);

        /* map the process's signal trampoline code */
        if ((error = exec_sigcode_map(p, epp->ep_esch->es_emul)) != 0) {
                DPRINTF(("%s: map sigcode failed %d\n", __func__, error));
                goto exec_abort;
        }

        pool_put(&exec_pool, data->ed_argp);

        /*
         * Notify anyone who might care that we've exec'd.
         *
         * This is slightly racy; someone could sneak in and
         * attach a knote after we've decided not to notify,
         * or vice-versa, but that's not particularly bothersome.
         * knote_proc_exec() will acquire p->p_lock as needed.
         */
        if (!SLIST_EMPTY(&p->p_klist)) {
                knote_proc_exec(p);
        }

        kmem_free(epp->ep_hdr, epp->ep_hdrlen);

        SDT_PROBE(proc, kernel, , exec__success, epp->ep_kname, 0, 0, 0, 0);

        emulexec(l, epp);

        /* Allow new references from the debugger/procfs. */
        rw_exit(&p->p_reflock);
        if (!no_local_exec_lock)
                rw_exit(&exec_lock);

        mutex_enter(&proc_lock);

        /* posix_spawn(3) reports a single event with implied exec(3) */
        if ((p->p_slflag & PSL_TRACED) && !is_spawn) {
                mutex_enter(p->p_lock);
                eventswitch(TRAP_EXEC, 0, 0);
                mutex_enter(&proc_lock);
        }

        if (p->p_sflag & PS_STOPEXEC) {
                ksiginfoq_t kq;

                KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
                p->p_pptr->p_nstopchild++;
                p->p_waited = 0;
                mutex_enter(p->p_lock);
                ksiginfo_queue_init(&kq);
                sigclearall(p, &contsigmask, &kq);
                lwp_lock(l);
                l->l_stat = LSSTOP;
                p->p_stat = SSTOP;
                p->p_nrlwps--;
                lwp_unlock(l);
                mutex_exit(p->p_lock);
                mutex_exit(&proc_lock);
                lwp_lock(l);
                spc_lock(l->l_cpu);
                mi_switch(l);
                ksiginfo_queue_drain(&kq);
        } else {
                mutex_exit(&proc_lock);
        }

        exec_path_free(data);
#ifdef TRACE_EXEC
        DPRINTF(("%s finished\n", __func__));
#endif
        return EJUSTRETURN;

 exec_abort:
        SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0);
        rw_exit(&p->p_reflock);
        if (!no_local_exec_lock)
                rw_exit(&exec_lock);

        exec_path_free(data);

        /*
         * the old process doesn't exist anymore.  exit gracefully.
         * get rid of the (new) address space we have created, if any, get rid
         * of our namei data and vnode, and exit noting failure
         */
        if (vm != NULL) {
                uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
                        VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
        }

        exec_free_emul_arg(epp);
        pool_put(&exec_pool, data->ed_argp);
        kmem_free(epp->ep_hdr, epp->ep_hdrlen);
        if (epp->ep_emul_root != NULL)
                vrele(epp->ep_emul_root);
        if (epp->ep_interp != NULL)
                vrele(epp->ep_interp);

        /* Acquire the sched-state mutex (exit1() will release it). */
        if (!is_spawn) {
                mutex_enter(p->p_lock);
                exit1(l, error, SIGABRT);
        }

        return error;
}

int
execve1(struct lwp *l, bool has_path, const char *path, int fd,
    char * const *args, char * const *envs,
    execve_fetch_element_t fetch_element)
{
        struct execve_data data;
        int error;

        error = execve_loadvm(l, has_path, path, fd, args, envs, fetch_element,
            &data);
        if (error)
                return error;
        error = execve_runproc(l, &data, false, false);
        return error;
}

static size_t
fromptrsz(const struct exec_package *epp)
{
        return (epp->ep_flags & EXEC_FROM32) ? sizeof(int) : sizeof(char *);
}

static size_t
ptrsz(const struct exec_package *epp)
{
        return (epp->ep_flags & EXEC_32) ? sizeof(int) : sizeof(char *);
}

static size_t
calcargs(struct execve_data * restrict data, const size_t argenvstrlen)
{
        struct exec_package        * const epp = &data->ed_pack;

        const size_t nargenvptrs =
            1 +                                /* long argc */
            data->ed_argc +                /* char *argv[] */
            1 +                                /* \0 */
            data->ed_envc +                /* char *env[] */
            1;                                /* \0 */

        return (nargenvptrs * ptrsz(epp))        /* pointers */
            + argenvstrlen                        /* strings */
            + epp->ep_esch->es_arglen;                /* auxinfo */
}

static size_t
calcstack(struct execve_data * restrict data, const size_t gaplen)
{
        struct exec_package        * const epp = &data->ed_pack;

        data->ed_szsigcode = epp->ep_esch->es_emul->e_esigcode -
            epp->ep_esch->es_emul->e_sigcode;

        data->ed_ps_strings_sz = (epp->ep_flags & EXEC_32) ?
            sizeof(struct ps_strings32) : sizeof(struct ps_strings);

        const size_t sigcode_psstr_sz =
            data->ed_szsigcode +        /* sigcode */
            data->ed_ps_strings_sz +        /* ps_strings */
            STACK_PTHREADSPACE;                /* pthread space */

        const size_t stacklen =
            data->ed_argslen +
            gaplen +
            sigcode_psstr_sz;

        /* make the stack "safely" aligned */
        return STACK_LEN_ALIGN(stacklen, STACK_ALIGNBYTES);
}

static int
copyoutargs(struct execve_data * restrict data, struct lwp *l,
    char * const newstack)
{
        struct exec_package        * const epp = &data->ed_pack;
        struct proc                *p = l->l_proc;
        int                        error;

        memset(&data->ed_arginfo, 0, sizeof(data->ed_arginfo));

        /* remember information about the process */
        data->ed_arginfo.ps_nargvstr = data->ed_argc;
        data->ed_arginfo.ps_nenvstr = data->ed_envc;

        /*
         * Allocate the stack address passed to the newly execve()'ed process.
         *
         * The new stack address will be set to the SP (stack pointer) register
         * in setregs().
         */

        char *newargs = STACK_ALLOC(
            STACK_SHRINK(newstack, data->ed_argslen), data->ed_argslen);

        error = (*epp->ep_esch->es_copyargs)(l, epp,
            &data->ed_arginfo, &newargs, data->ed_argp);

        if (error) {
                DPRINTF(("%s: copyargs failed %d\n", __func__, error));
                return error;
        }

        error = copyoutpsstrs(data, p);
        if (error != 0)
                return error;

        return 0;
}

static int
copyoutpsstrs(struct execve_data * restrict data, struct proc *p)
{
        struct exec_package        * const epp = &data->ed_pack;
        struct ps_strings32        arginfo32;
        void                        *aip;
        int                        error;

        /* fill process ps_strings info */
        p->p_psstrp = (vaddr_t)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
            STACK_PTHREADSPACE), data->ed_ps_strings_sz);

        if (epp->ep_flags & EXEC_32) {
                aip = &arginfo32;
                arginfo32.ps_argvstr = (vaddr_t)data->ed_arginfo.ps_argvstr;
                arginfo32.ps_nargvstr = data->ed_arginfo.ps_nargvstr;
                arginfo32.ps_envstr = (vaddr_t)data->ed_arginfo.ps_envstr;
                arginfo32.ps_nenvstr = data->ed_arginfo.ps_nenvstr;
        } else
                aip = &data->ed_arginfo;

        /* copy out the process's ps_strings structure */
        if ((error = copyout(aip, (void *)p->p_psstrp, data->ed_ps_strings_sz))
            != 0) {
                DPRINTF(("%s: ps_strings copyout %p->%p size %zu failed\n",
                    __func__, aip, (void *)p->p_psstrp, data->ed_ps_strings_sz));
                return error;
        }

        return 0;
}

static int
copyinargs(struct execve_data * restrict data, char * const *args,
    char * const *envs, execve_fetch_element_t fetch_element, char **dpp)
{
        struct exec_package        * const epp = &data->ed_pack;
        char                        *dp;
        size_t                        i;
        int                        error;

        dp = *dpp;

        data->ed_argc = 0;

        /* copy the fake args list, if there's one, freeing it as we go */
        if (epp->ep_flags & EXEC_HASARGL) {
                struct exec_fakearg        *fa = epp->ep_fa;

                while (fa->fa_arg != NULL) {
                        const size_t maxlen = ARG_MAX - (dp - data->ed_argp);
                        size_t len;

                        len = strlcpy(dp, fa->fa_arg, maxlen);
                        /* Count NUL into len. */
                        if (len < maxlen)
                                len++;
                        else {
                                while (fa->fa_arg != NULL) {
                                        kmem_free(fa->fa_arg, fa->fa_len);
                                        fa++;
                                }
                                kmem_free(epp->ep_fa, epp->ep_fa_len);
                                epp->ep_flags &= ~EXEC_HASARGL;
                                return E2BIG;
                        }
                        ktrexecarg(fa->fa_arg, len - 1);
                        dp += len;

                        kmem_free(fa->fa_arg, fa->fa_len);
                        fa++;
                        data->ed_argc++;
                }
                kmem_free(epp->ep_fa, epp->ep_fa_len);
                epp->ep_flags &= ~EXEC_HASARGL;
        }

        /*
         * Read and count argument strings from user.
         */

        if (args == NULL) {
                DPRINTF(("%s: null args\n", __func__));
                return EINVAL;
        }
        if (epp->ep_flags & EXEC_SKIPARG)
                args = (const void *)((const char *)args + fromptrsz(epp));
        i = 0;
        error = copyinargstrs(data, args, fetch_element, &dp, &i, ktr_execarg);
        if (error != 0) {
                DPRINTF(("%s: copyin arg %d\n", __func__, error));
                return error;
        }
        data->ed_argc += i;

        /*
         * Read and count environment strings from user.
         */

        data->ed_envc = 0;
        /* environment need not be there */
        if (envs == NULL)
                goto done;
        i = 0;
        error = copyinargstrs(data, envs, fetch_element, &dp, &i, ktr_execenv);
        if (error != 0) {
                DPRINTF(("%s: copyin env %d\n", __func__, error));
                return error;
        }
        data->ed_envc += i;

done:
        *dpp = dp;

        return 0;
}

static int
copyinargstrs(struct execve_data * restrict data, char * const *strs,
    execve_fetch_element_t fetch_element, char **dpp, size_t *ip,
    void (*ktr)(const void *, size_t))
{
        char                        *dp, *sp;
        size_t                        i;
        int                        error;

        dp = *dpp;

        i = 0;
        while (1) {
                const size_t maxlen = ARG_MAX - (dp - data->ed_argp);
                size_t len;

                if ((error = (*fetch_element)(strs, i, &sp)) != 0) {
                        return error;
                }
                if (!sp)
                        break;
                if ((error = copyinstr(sp, dp, maxlen, &len)) != 0) {
                        if (error == ENAMETOOLONG)
                                error = E2BIG;
                        return error;
                }
                if (__predict_false(ktrace_on))
                        (*ktr)(dp, len - 1);
                dp += len;
                i++;
        }

        *dpp = dp;
        *ip = i;

        return 0;
}

/*
 * Copy argv and env strings from kernel buffer (argp) to the new stack.
 * Those strings are located just after auxinfo.
 */
int
copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
    char **stackp, void *argp)
{
        char        **cpp, *dp, *sp;
        size_t        len;
        void        *nullp;
        long        argc, envc;
        int        error;

        cpp = (char **)*stackp;
        nullp = NULL;
        argc = arginfo->ps_nargvstr;
        envc = arginfo->ps_nenvstr;

        /* argc on stack is long */
        CTASSERT(sizeof(*cpp) == sizeof(argc));

        dp = (char *)(cpp +
            1 +                                /* long argc */
            argc +                        /* char *argv[] */
            1 +                                /* \0 */
            envc +                        /* char *env[] */
            1) +                        /* \0 */
            pack->ep_esch->es_arglen;        /* auxinfo */
        sp = argp;

        if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0) {
                COPYPRINTF("", cpp - 1, sizeof(argc));
                return error;
        }

        /* XXX don't copy them out, remap them! */
        arginfo->ps_argvstr = cpp; /* remember location of argv for later */

        for (; --argc >= 0; sp += len, dp += len) {
                if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
                        COPYPRINTF("", cpp - 1, sizeof(dp));
                        return error;
                }
                if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
                        COPYPRINTF("str", dp, (size_t)ARG_MAX);
                        return error;
                }
        }

        if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
                COPYPRINTF("", cpp - 1, sizeof(nullp));
                return error;
        }

        arginfo->ps_envstr = cpp; /* remember location of envp for later */

        for (; --envc >= 0; sp += len, dp += len) {
                if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
                        COPYPRINTF("", cpp - 1, sizeof(dp));
                        return error;
                }
                if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
                        COPYPRINTF("str", dp, (size_t)ARG_MAX);
                        return error;
                }

        }

        if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
                COPYPRINTF("", cpp - 1, sizeof(nullp));
                return error;
        }

        *stackp = (char *)cpp;
        return 0;
}


/*
 * Add execsw[] entries.
 */
int
exec_add(struct execsw *esp, int count)
{
        struct exec_entry        *it;
        int                        i, error = 0;

        if (count == 0) {
                return 0;
        }

        /* Check for duplicates. */
        rw_enter(&exec_lock, RW_WRITER);
        for (i = 0; i < count; i++) {
                LIST_FOREACH(it, &ex_head, ex_list) {
                        /* assume unique (makecmds, probe_func, emulation) */
                        if (it->ex_sw->es_makecmds == esp[i].es_makecmds &&
                            it->ex_sw->u.elf_probe_func ==
                            esp[i].u.elf_probe_func &&
                            it->ex_sw->es_emul == esp[i].es_emul) {
                                rw_exit(&exec_lock);
                                return EEXIST;
                        }
                }
        }

        /* Allocate new entries. */
        for (i = 0; i < count; i++) {
                it = kmem_alloc(sizeof(*it), KM_SLEEP);
                it->ex_sw = &esp[i];
                error = exec_sigcode_alloc(it->ex_sw->es_emul);
                if (error != 0) {
                        kmem_free(it, sizeof(*it));
                        break;
                }
                LIST_INSERT_HEAD(&ex_head, it, ex_list);
        }
        /* If even one fails, remove them all back. */
        if (error != 0) {
                for (i--; i >= 0; i--) {
                        it = LIST_FIRST(&ex_head);
                        LIST_REMOVE(it, ex_list);
                        exec_sigcode_free(it->ex_sw->es_emul);
                        kmem_free(it, sizeof(*it));
                }
                return error;
        }

        /* update execsw[] */
        exec_init(0);
        rw_exit(&exec_lock);
        return 0;
}

/*
 * Remove execsw[] entry.
 */
int
exec_remove(struct execsw *esp, int count)
{
        struct exec_entry        *it, *next;
        int                        i;
        const struct proclist_desc *pd;
        proc_t                        *p;

        if (count == 0) {
                return 0;
        }

        /* Abort if any are busy. */
        rw_enter(&exec_lock, RW_WRITER);
        for (i = 0; i < count; i++) {
                mutex_enter(&proc_lock);
                for (pd = proclists; pd->pd_list != NULL; pd++) {
                        PROCLIST_FOREACH(p, pd->pd_list) {
                                if (p->p_execsw == &esp[i]) {
                                        mutex_exit(&proc_lock);
                                        rw_exit(&exec_lock);
                                        return EBUSY;
                                }
                        }
                }
                mutex_exit(&proc_lock);
        }

        /* None are busy, so remove them all. */
        for (i = 0; i < count; i++) {
                for (it = LIST_FIRST(&ex_head); it != NULL; it = next) {
                        next = LIST_NEXT(it, ex_list);
                        if (it->ex_sw == &esp[i]) {
                                LIST_REMOVE(it, ex_list);
                                exec_sigcode_free(it->ex_sw->es_emul);
                                kmem_free(it, sizeof(*it));
                                break;
                        }
                }
        }

        /* update execsw[] */
        exec_init(0);
        rw_exit(&exec_lock);
        return 0;
}

/*
 * Initialize exec structures. If init_boot is true, also does necessary
 * one-time initialization (it's called from main() that way).
 * Once system is multiuser, this should be called with exec_lock held,
 * i.e. via exec_{add|remove}().
 */
int
exec_init(int init_boot)
{
        const struct execsw         **sw;
        struct exec_entry        *ex;
        SLIST_HEAD(,exec_entry)        first;
        SLIST_HEAD(,exec_entry)        any;
        SLIST_HEAD(,exec_entry)        last;
        int                        i, sz;

        if (init_boot) {
                /* do one-time initializations */
                vaddr_t vmin = 0, vmax;

                rw_init(&exec_lock);
                exec_map = uvm_km_suballoc(kernel_map, &vmin, &vmax,
                    maxexec*NCARGS, VM_MAP_PAGEABLE, false, NULL);
                pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
                    "execargs", &exec_palloc, IPL_NONE);
                pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
        } else {
                KASSERT(rw_write_held(&exec_lock));
        }

        /* Sort each entry onto the appropriate queue. */
        SLIST_INIT(&first);
        SLIST_INIT(&any);
        SLIST_INIT(&last);
        sz = 0;
        LIST_FOREACH(ex, &ex_head, ex_list) {
                switch(ex->ex_sw->es_prio) {
                case EXECSW_PRIO_FIRST:
                        SLIST_INSERT_HEAD(&first, ex, ex_slist);
                        break;
                case EXECSW_PRIO_ANY:
                        SLIST_INSERT_HEAD(&any, ex, ex_slist);
                        break;
                case EXECSW_PRIO_LAST:
                        SLIST_INSERT_HEAD(&last, ex, ex_slist);
                        break;
                default:
                        panic("%s", __func__);
                        break;
                }
                sz++;
        }

        /*
         * Create new execsw[].  Ensure we do not try a zero-sized
         * allocation.
         */
        sw = kmem_alloc(sz * sizeof(struct execsw *) + 1, KM_SLEEP);
        i = 0;
        SLIST_FOREACH(ex, &first, ex_slist) {
                sw[i++] = ex->ex_sw;
        }
        SLIST_FOREACH(ex, &any, ex_slist) {
                sw[i++] = ex->ex_sw;
        }
        SLIST_FOREACH(ex, &last, ex_slist) {
                sw[i++] = ex->ex_sw;
        }

        /* Replace old execsw[] and free used memory. */
        if (execsw != NULL) {
                kmem_free(__UNCONST(execsw),
                    nexecs * sizeof(struct execsw *) + 1);
        }
        execsw = sw;
        nexecs = sz;

        /* Figure out the maximum size of an exec header. */
        exec_maxhdrsz = sizeof(int);
        for (i = 0; i < nexecs; i++) {
                if (execsw[i]->es_hdrsz > exec_maxhdrsz)
                        exec_maxhdrsz = execsw[i]->es_hdrsz;
        }

        return 0;
}

int
exec_sigcode_alloc(const struct emul *e)
{
        vaddr_t va;
        vsize_t sz;
        int error;
        struct uvm_object *uobj;

        KASSERT(rw_lock_held(&exec_lock));

        if (e == NULL || e->e_sigobject == NULL)
                return 0;

        sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
        if (sz == 0)
                return 0;

        /*
         * Create a sigobject for this emulation.
         *
         * sigobject is an anonymous memory object (just like SYSV shared
         * memory) that we keep a permanent reference to and that we map
         * in all processes that need this sigcode. The creation is simple,
         * we create an object, add a permanent reference to it, map it in
         * kernel space, copy out the sigcode to it and unmap it.
         * We map it with PROT_READ|PROT_EXEC into the process just
         * the way sys_mmap() would map it.
         */
        if (*e->e_sigobject == NULL) {
                uobj = uao_create(sz, 0);
                (*uobj->pgops->pgo_reference)(uobj);
                va = vm_map_min(kernel_map);
                if ((error = uvm_map(kernel_map, &va, round_page(sz),
                    uobj, 0, 0,
                    UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
                    UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
                        printf("sigcode kernel mapping failed %d\n", error);
                        (*uobj->pgops->pgo_detach)(uobj);
                        return error;
                }
                memcpy((void *)va, e->e_sigcode, sz);
#ifdef PMAP_NEED_PROCWR
                pmap_procwr(&proc0, va, sz);
#endif
                uvm_unmap(kernel_map, va, va + round_page(sz));
                *e->e_sigobject = uobj;
                KASSERT(uobj->uo_refs == 1);
        } else {
                /* if already created, reference++ */
                uobj = *e->e_sigobject;
                (*uobj->pgops->pgo_reference)(uobj);
        }

        return 0;
}

void
exec_sigcode_free(const struct emul *e)
{
        struct uvm_object *uobj;

        KASSERT(rw_lock_held(&exec_lock));

        if (e == NULL || e->e_sigobject == NULL)
                return;

        uobj = *e->e_sigobject;
        if (uobj == NULL)
                return;

        if (uobj->uo_refs == 1)
                *e->e_sigobject = NULL;        /* I'm the last person to reference. */
        (*uobj->pgops->pgo_detach)(uobj);
}

static int
exec_sigcode_map(struct proc *p, const struct emul *e)
{
        vaddr_t va;
        vsize_t sz;
        int error;
        struct uvm_object *uobj;

        sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
        if (e->e_sigobject == NULL || sz == 0)
                return 0;

        uobj = *e->e_sigobject;
        if (uobj == NULL)
                return 0;

        /* Just a hint to uvm_map where to put it. */
        va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
            round_page(sz), p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);

#ifdef __alpha__
        /*
         * Tru64 puts /sbin/loader at the end of user virtual memory,
         * which causes the above calculation to put the sigcode at
         * an invalid address.  Put it just below the text instead.
         */
        if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
                va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
        }
#endif

        (*uobj->pgops->pgo_reference)(uobj);
        error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
                        uobj, 0, 0,
                        UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
                                    UVM_ADV_RANDOM, 0));
        if (error) {
                DPRINTF(("%s, %d: map %p "
                    "uvm_map %#"PRIxVSIZE"@%#"PRIxVADDR" failed %d\n",
                    __func__, __LINE__, &p->p_vmspace->vm_map, round_page(sz),
                    va, error));
                (*uobj->pgops->pgo_detach)(uobj);
                return error;
        }
        p->p_sigctx.ps_sigcode = (void *)va;
        return 0;
}

/*
 * Release a refcount on spawn_exec_data and destroy memory, if this
 * was the last one.
 */
static void
spawn_exec_data_release(struct spawn_exec_data *data)
{

        membar_release();
        if (atomic_dec_32_nv(&data->sed_refcnt) != 0)
                return;
        membar_acquire();

        cv_destroy(&data->sed_cv_child_ready);
        mutex_destroy(&data->sed_mtx_child);

        if (data->sed_actions)
                posix_spawn_fa_free(data->sed_actions,
                    data->sed_actions->len);
        if (data->sed_attrs)
                kmem_free(data->sed_attrs,
                    sizeof(*data->sed_attrs));
        kmem_free(data, sizeof(*data));
}

static int
handle_posix_spawn_file_actions(struct posix_spawn_file_actions *actions)
{
        struct lwp *l = curlwp;
        register_t retval;
        int error, newfd;

        if (actions == NULL)
                return 0;

        for (size_t i = 0; i < actions->len; i++) {
                const struct posix_spawn_file_actions_entry *fae =
                    &actions->fae[i];
                switch (fae->fae_action) {
                case FAE_OPEN:
                        if (fd_getfile(fae->fae_fildes) != NULL) {
                                error = fd_close(fae->fae_fildes);
                                if (error)
                                        return error;
                        }
                        error = fd_open(fae->fae_path, fae->fae_oflag,
                            fae->fae_mode, &newfd);
                        if (error)
                                return error;
                        if (newfd != fae->fae_fildes) {
                                error = dodup(l, newfd,
                                    fae->fae_fildes, 0, &retval);
                                if (fd_getfile(newfd) != NULL)
                                        fd_close(newfd);
                        }
                        break;
                case FAE_DUP2:
                        error = dodup(l, fae->fae_fildes,
                            fae->fae_newfildes, 0, &retval);
                        break;
                case FAE_CLOSE:
                        if (fd_getfile(fae->fae_fildes) == NULL) {
                                return EBADF;
                        }
                        error = fd_close(fae->fae_fildes);
                        break;
                case FAE_CHDIR:
                        error = do_sys_chdir(l, fae->fae_chdir_path,
                            UIO_SYSSPACE, &retval);
                        break;
                case FAE_FCHDIR:
                        error = do_sys_fchdir(l, fae->fae_fildes, &retval);
                        break;
                }
                if (error)
                        return error;
        }
        return 0;
}

static int
handle_posix_spawn_attrs(struct posix_spawnattr *attrs, struct proc *parent)
{
        struct sigaction sigact;
        int error;
        struct proc *p = curproc;
        struct lwp *l = curlwp;

        if (attrs == NULL)
                return 0;

        memset(&sigact, 0, sizeof(sigact));
        sigact._sa_u._sa_handler = SIG_DFL;
        sigact.sa_flags = 0;

        /* 
         * set state to SSTOP so that this proc can be found by pid.
         * see proc_enterprp, do_sched_setparam below
         */
        mutex_enter(&proc_lock);
        /*
         * p_stat should be SACTIVE, so we need to adjust the
         * parent's p_nstopchild here.  For safety, just make
         * we're on the good side of SDEAD before we adjust.
         */
        int ostat = p->p_stat;
        KASSERT(ostat < SSTOP);
        p->p_stat = SSTOP;
        p->p_waited = 0;
        p->p_pptr->p_nstopchild++;
        mutex_exit(&proc_lock);

        /* Set process group */
        if (attrs->sa_flags & POSIX_SPAWN_SETPGROUP) {
                pid_t mypid = p->p_pid;
                pid_t pgrp = attrs->sa_pgroup;

                if (pgrp == 0)
                        pgrp = mypid;

                error = proc_enterpgrp(parent, mypid, pgrp, false);
                if (error)
                        goto out;
        }

        /* Set scheduler policy */
        if (attrs->sa_flags & POSIX_SPAWN_SETSCHEDULER)
                error = do_sched_setparam(p->p_pid, 0, attrs->sa_schedpolicy,
                    &attrs->sa_schedparam);
        else if (attrs->sa_flags & POSIX_SPAWN_SETSCHEDPARAM) {
                error = do_sched_setparam(parent->p_pid, 0,
                    SCHED_NONE, &attrs->sa_schedparam);
        }
        if (error)
                goto out;

        /* Reset user ID's */
        if (attrs->sa_flags & POSIX_SPAWN_RESETIDS) {
                error = do_setresgid(l, -1, kauth_cred_getgid(l->l_cred), -1,
                     ID_E_EQ_R | ID_E_EQ_S);
                if (error)
                        return error;
                error = do_setresuid(l, -1, kauth_cred_getuid(l->l_cred), -1,
                    ID_E_EQ_R | ID_E_EQ_S);
                if (error)
                        goto out;
        }

        /* Set signal masks/defaults */
        if (attrs->sa_flags & POSIX_SPAWN_SETSIGMASK) {
                mutex_enter(p->p_lock);
                error = sigprocmask1(l, SIG_SETMASK, &attrs->sa_sigmask, NULL);
                mutex_exit(p->p_lock);
                if (error)
                        goto out;
        }

        if (attrs->sa_flags & POSIX_SPAWN_SETSIGDEF) {
                /*
                 * The following sigaction call is using a sigaction
                 * version 0 trampoline which is in the compatibility
                 * code only. This is not a problem because for SIG_DFL
                 * and SIG_IGN, the trampolines are now ignored. If they
                 * were not, this would be a problem because we are
                 * holding the exec_lock, and the compat code needs
                 * to do the same in order to replace the trampoline
                 * code of the process.
                 */
                for (int i = 1; i <= NSIG; i++) {
                        if (sigismember(&attrs->sa_sigdefault, i))
                                sigaction1(l, i, &sigact, NULL, NULL, 0);
                }
        }
        error = 0;
out:
        mutex_enter(&proc_lock);
        p->p_stat = ostat;
        p->p_pptr->p_nstopchild--;
        mutex_exit(&proc_lock);
        return error;
}

/*
 * A child lwp of a posix_spawn operation starts here and ends up in
 * cpu_spawn_return, dealing with all filedescriptor and scheduler
 * manipulations in between.
 * The parent waits for the child, as it is not clear whether the child
 * will be able to acquire its own exec_lock. If it can, the parent can
 * be released early and continue running in parallel. If not (or if the
 * magic debug flag is passed in the scheduler attribute struct), the
 * child rides on the parent's exec lock until it is ready to return to
 * to userland - and only then releases the parent. This method loses
 * concurrency, but improves error reporting.
 */
static void
spawn_return(void *arg)
{
        struct spawn_exec_data *spawn_data = arg;
        struct lwp *l = curlwp;
        struct proc *p = l->l_proc;
        int error;
        bool have_reflock;
        bool parent_is_waiting = true;

        /*
         * Check if we can release parent early.
         * We either need to have no sed_attrs, or sed_attrs does not
         * have POSIX_SPAWN_RETURNERROR or one of the flags, that require
         * safe access to the parent proc (passed in sed_parent).
         * We then try to get the exec_lock, and only if that works, we can
         * release the parent here already.
         */
        struct posix_spawnattr *attrs = spawn_data->sed_attrs;
        if ((!attrs || (attrs->sa_flags
                & (POSIX_SPAWN_RETURNERROR|POSIX_SPAWN_SETPGROUP)) == 0)
            && rw_tryenter(&exec_lock, RW_READER)) {
                parent_is_waiting = false;
                mutex_enter(&spawn_data->sed_mtx_child);
                cv_signal(&spawn_data->sed_cv_child_ready);
                mutex_exit(&spawn_data->sed_mtx_child);
        }

        /* don't allow debugger access yet */
        rw_enter(&p->p_reflock, RW_WRITER);
        have_reflock = true;

        /* handle posix_spawnattr */
        error = handle_posix_spawn_attrs(attrs, spawn_data->sed_parent);
        if (error)
                goto report_error;

        /* handle posix_spawn_file_actions */
        error = handle_posix_spawn_file_actions(spawn_data->sed_actions);
        if (error)
                goto report_error;

        /* now do the real exec */
        error = execve_runproc(l, &spawn_data->sed_exec, parent_is_waiting,
            true);
        have_reflock = false;
        if (error == EJUSTRETURN)
                error = 0;
        else if (error)
                goto report_error;

        if (parent_is_waiting) {
                mutex_enter(&spawn_data->sed_mtx_child);
                cv_signal(&spawn_data->sed_cv_child_ready);
                mutex_exit(&spawn_data->sed_mtx_child);
        }

        /* release our refcount on the data */
        spawn_exec_data_release(spawn_data);

        if ((p->p_slflag & (PSL_TRACED|PSL_TRACEDCHILD)) ==
            (PSL_TRACED|PSL_TRACEDCHILD)) {
                eventswitchchild(p, TRAP_CHLD, PTRACE_POSIX_SPAWN);
        }

        /* and finally: leave to userland for the first time */
        cpu_spawn_return(l);

        /* NOTREACHED */
        return;

 report_error:
        if (have_reflock) {
                /*
                 * We have not passed through execve_runproc(),
                 * which would have released the p_reflock and also
                 * taken ownership of the sed_exec part of spawn_data,
                 * so release/free both here.
                 */
                rw_exit(&p->p_reflock);
                execve_free_data(&spawn_data->sed_exec);
        }

        if (parent_is_waiting) {
                /* pass error to parent */
                mutex_enter(&spawn_data->sed_mtx_child);
                spawn_data->sed_error = error;
                cv_signal(&spawn_data->sed_cv_child_ready);
                mutex_exit(&spawn_data->sed_mtx_child);
        } else {
                rw_exit(&exec_lock);
        }

        /* release our refcount on the data */
        spawn_exec_data_release(spawn_data);

        /* done, exit */
        mutex_enter(p->p_lock);
        /*
         * Posix explicitly asks for an exit code of 127 if we report
         * errors from the child process - so, unfortunately, there
         * is no way to report a more exact error code.
         * A NetBSD specific workaround is POSIX_SPAWN_RETURNERROR as
         * flag bit in the attrp argument to posix_spawn(2), see above.
         */
        exit1(l, 127, 0);
}

static __inline char **
posix_spawn_fae_path(struct posix_spawn_file_actions_entry *fae)
{
        switch (fae->fae_action) {
        case FAE_OPEN:
                return &fae->fae_path;
        case FAE_CHDIR:
                return &fae->fae_chdir_path;
        default:
                return NULL;
        }
}
    
void
posix_spawn_fa_free(struct posix_spawn_file_actions *fa, size_t len)
{

        for (size_t i = 0; i < len; i++) {
                char **pathp = posix_spawn_fae_path(&fa->fae[i]);
                if (pathp)
                        kmem_strfree(*pathp);
        }
        if (fa->len > 0)
                kmem_free(fa->fae, sizeof(*fa->fae) * fa->len);
        kmem_free(fa, sizeof(*fa));
}

static int
posix_spawn_fa_alloc(struct posix_spawn_file_actions **fap,
    const struct posix_spawn_file_actions *ufa, rlim_t lim)
{
        struct posix_spawn_file_actions *fa;
        struct posix_spawn_file_actions_entry *fae;
        char *pbuf = NULL;
        int error;
        size_t i = 0;

        fa = kmem_alloc(sizeof(*fa), KM_SLEEP);
        error = copyin(ufa, fa, sizeof(*fa));
        if (error || fa->len == 0) {
                kmem_free(fa, sizeof(*fa));
                return error;        /* 0 if not an error, and len == 0 */
        }

        if (fa->len > lim) {
                kmem_free(fa, sizeof(*fa));
                return EINVAL;
        }

        fa->size = fa->len;
        size_t fal = fa->len * sizeof(*fae);
        fae = fa->fae;
        fa->fae = kmem_alloc(fal, KM_SLEEP);
        error = copyin(fae, fa->fae, fal);
        if (error)
                goto out;

        pbuf = PNBUF_GET();
        for (; i < fa->len; i++) {
                char **pathp = posix_spawn_fae_path(&fa->fae[i]);
                if (pathp == NULL)
                        continue;
                error = copyinstr(*pathp, pbuf, MAXPATHLEN, &fal);
                if (error)
                        goto out;
                *pathp = kmem_alloc(fal, KM_SLEEP);
                memcpy(*pathp, pbuf, fal);
        }
        PNBUF_PUT(pbuf);

        *fap = fa;
        return 0;
out:
        if (pbuf)
                PNBUF_PUT(pbuf);
        posix_spawn_fa_free(fa, i);
        return error;
}

/*
 * N.B. increments nprocs upon success.  Callers need to drop nprocs if
 * they fail for some other reason.
 */
int
check_posix_spawn(struct lwp *l1)
{
        int error, tnprocs, count;
        uid_t uid;
        struct proc *p1;

        p1 = l1->l_proc;
        uid = kauth_cred_getuid(l1->l_cred);
        tnprocs = atomic_inc_uint_nv(&nprocs);

        /*
         * Although process entries are dynamically created, we still keep
         * a global limit on the maximum number we will create.
         */
        if (__predict_false(tnprocs >= maxproc))
                error = -1;
        else
                error = kauth_authorize_process(l1->l_cred,
                    KAUTH_PROCESS_FORK, p1, KAUTH_ARG(tnprocs), NULL, NULL);

        if (error) {
                atomic_dec_uint(&nprocs);
                return EAGAIN;
        }

        /*
         * Enforce limits.
         */
        count = chgproccnt(uid, 1);
        if (kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_RLIMIT,
             p1, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
             &p1->p_rlimit[RLIMIT_NPROC], KAUTH_ARG(RLIMIT_NPROC)) != 0 &&
            __predict_false(count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur)) {
                (void)chgproccnt(uid, -1);
                atomic_dec_uint(&nprocs);
                return EAGAIN;
        }

        return 0;
}

int
do_posix_spawn(struct lwp *l1, pid_t *pid_res, bool *child_ok, const char *path,
        struct posix_spawn_file_actions *fa,
        struct posix_spawnattr *sa,
        char *const *argv, char *const *envp,
        execve_fetch_element_t fetch)
{

        struct proc *p1, *p2;
        struct lwp *l2;
        int error;
        struct spawn_exec_data *spawn_data;
        vaddr_t uaddr = 0;
        pid_t pid;
        bool have_exec_lock = false;

        p1 = l1->l_proc;

        /* Allocate and init spawn_data */
        spawn_data = kmem_zalloc(sizeof(*spawn_data), KM_SLEEP);
        spawn_data->sed_refcnt = 1; /* only parent so far */
        cv_init(&spawn_data->sed_cv_child_ready, "pspawn");
        mutex_init(&spawn_data->sed_mtx_child, MUTEX_DEFAULT, IPL_NONE);
        mutex_enter(&spawn_data->sed_mtx_child);

        /*
         * Do the first part of the exec now, collect state
         * in spawn_data.
         */
        error = execve_loadvm(l1, true, path, -1, argv,
            envp, fetch, &spawn_data->sed_exec);
        if (error == EJUSTRETURN)
                error = 0;
        else if (error)
                goto error_exit;

        have_exec_lock = true;

        /*
         * Allocate virtual address space for the U-area now, while it
         * is still easy to abort the fork operation if we're out of
         * kernel virtual address space.
         */
        uaddr = uvm_uarea_alloc();
        if (__predict_false(uaddr == 0)) {
                error = ENOMEM;
                goto error_exit;
        }
        
        /*
         * Allocate new proc. Borrow proc0 vmspace for it, we will
         * replace it with its own before returning to userland
         * in the child.
         */
        p2 = proc_alloc();
        if (p2 == NULL) {
                /* We were unable to allocate a process ID. */
                error = EAGAIN;
                goto error_exit;
        }

        /*
         * This is a point of no return, we will have to go through
         * the child proc to properly clean it up past this point.
         */
        pid = p2->p_pid;

        /*
         * Make a proc table entry for the new process.
         * Start by zeroing the section of proc that is zero-initialized,
         * then copy the section that is copied directly from the parent.
         */
        memset(&p2->p_startzero, 0,
            (unsigned) ((char *)&p2->p_endzero - (char *)&p2->p_startzero));
        memcpy(&p2->p_startcopy, &p1->p_startcopy,
            (unsigned) ((char *)&p2->p_endcopy - (char *)&p2->p_startcopy));
        p2->p_vmspace = proc0.p_vmspace;

        TAILQ_INIT(&p2->p_sigpend.sp_info);

        LIST_INIT(&p2->p_lwps);
        LIST_INIT(&p2->p_sigwaiters);

        /*
         * Duplicate sub-structures as needed.
         * Increase reference counts on shared objects.
         * Inherit flags we want to keep.  The flags related to SIGCHLD
         * handling are important in order to keep a consistent behaviour
         * for the child after the fork.  If we are a 32-bit process, the
         * child will be too.
         */
        p2->p_flag =
            p1->p_flag & (PK_SUGID | PK_NOCLDWAIT | PK_CLDSIGIGN | PK_32);
        p2->p_emul = p1->p_emul;
        p2->p_execsw = p1->p_execsw;

        mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
        mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
        rw_init(&p2->p_reflock);
        cv_init(&p2->p_waitcv, "wait");
        cv_init(&p2->p_lwpcv, "lwpwait");

        p2->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);

        kauth_proc_fork(p1, p2);

        p2->p_raslist = NULL;
        p2->p_fd = fd_copy();

        /* XXX racy */
        p2->p_mqueue_cnt = p1->p_mqueue_cnt;

        p2->p_cwdi = cwdinit();

        /*
         * Note: p_limit (rlimit stuff) is copy-on-write, so normally
         * we just need increase pl_refcnt.
         */
        if (!p1->p_limit->pl_writeable) {
                lim_addref(p1->p_limit);
                p2->p_limit = p1->p_limit;
        } else {
                p2->p_limit = lim_copy(p1->p_limit);
        }

        p2->p_lflag = 0;
        l1->l_vforkwaiting = false;
        p2->p_sflag = 0;
        p2->p_slflag = 0;
        p2->p_pptr = p1;
        p2->p_ppid = p1->p_pid;
        LIST_INIT(&p2->p_children);

        p2->p_aio = NULL;

#ifdef KTRACE
        /*
         * Copy traceflag and tracefile if enabled.
         * If not inherited, these were zeroed above.
         */
        if (p1->p_traceflag & KTRFAC_INHERIT) {
                mutex_enter(&ktrace_lock);
                p2->p_traceflag = p1->p_traceflag;
                if ((p2->p_tracep = p1->p_tracep) != NULL)
                        ktradref(p2);
                mutex_exit(&ktrace_lock);
        }
#endif

        /*
         * Create signal actions for the child process.
         */
        p2->p_sigacts = sigactsinit(p1, 0);
        mutex_enter(p1->p_lock);
        p2->p_sflag |=
            (p1->p_sflag & (PS_STOPFORK | PS_STOPEXEC | PS_NOCLDSTOP));
        sched_proc_fork(p1, p2);
        mutex_exit(p1->p_lock);

        p2->p_stflag = p1->p_stflag;

        /*
         * p_stats.
         * Copy parts of p_stats, and zero out the rest.
         */
        p2->p_stats = pstatscopy(p1->p_stats);

        /* copy over machdep flags to the new proc */
        cpu_proc_fork(p1, p2);

        /*
         * Prepare remaining parts of spawn data
         */
        spawn_data->sed_actions = fa;
        spawn_data->sed_attrs = sa;

        spawn_data->sed_parent = p1;

        /* create LWP */
        lwp_create(l1, p2, uaddr, 0, NULL, 0, spawn_return, spawn_data,
            &l2, l1->l_class, &l1->l_sigmask, &l1->l_sigstk);
        l2->l_ctxlink = NULL;        /* reset ucontext link */

        /*
         * Copy the credential so other references don't see our changes.
         * Test to see if this is necessary first, since in the common case
         * we won't need a private reference.
         */
        if (kauth_cred_geteuid(l2->l_cred) != kauth_cred_getsvuid(l2->l_cred) ||
            kauth_cred_getegid(l2->l_cred) != kauth_cred_getsvgid(l2->l_cred)) {
                l2->l_cred = kauth_cred_copy(l2->l_cred);
                kauth_cred_setsvuid(l2->l_cred, kauth_cred_geteuid(l2->l_cred));
                kauth_cred_setsvgid(l2->l_cred, kauth_cred_getegid(l2->l_cred));
        }

        /* Update the master credentials. */
        if (l2->l_cred != p2->p_cred) {
                kauth_cred_t ocred;

                kauth_cred_hold(l2->l_cred);
                mutex_enter(p2->p_lock);
                ocred = p2->p_cred;
                p2->p_cred = l2->l_cred;
                mutex_exit(p2->p_lock);
                kauth_cred_free(ocred);
        }

        *child_ok = true;
        spawn_data->sed_refcnt = 2;        /* child gets it as well */
#if 0
        l2->l_nopreempt = 1; /* start it non-preemptable */
#endif

        /*
         * It's now safe for the scheduler and other processes to see the
         * child process.
         */
        mutex_enter(&proc_lock);

        if (p1->p_session->s_ttyvp != NULL && p1->p_lflag & PL_CONTROLT)
                p2->p_lflag |= PL_CONTROLT;

        LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
        p2->p_exitsig = SIGCHLD;        /* signal for parent on exit */

        if ((p1->p_slflag & (PSL_TRACEPOSIX_SPAWN|PSL_TRACED)) ==
            (PSL_TRACEPOSIX_SPAWN|PSL_TRACED)) {
                proc_changeparent(p2, p1->p_pptr);
                SET(p2->p_slflag, PSL_TRACEDCHILD);
        }

        p2->p_oppid = p1->p_pid;  /* Remember the original parent id. */

        LIST_INSERT_AFTER(p1, p2, p_pglist);
        LIST_INSERT_HEAD(&allproc, p2, p_list);

        p2->p_trace_enabled = trace_is_enabled(p2);
#ifdef __HAVE_SYSCALL_INTERN
        (*p2->p_emul->e_syscall_intern)(p2);
#endif

        /*
         * Make child runnable, set start time, and add to run queue except
         * if the parent requested the child to start in SSTOP state.
         */
        mutex_enter(p2->p_lock);

        getmicrotime(&p2->p_stats->p_start);

        lwp_lock(l2);
        KASSERT(p2->p_nrlwps == 1);
        KASSERT(l2->l_stat == LSIDL);
        p2->p_nrlwps = 1;
        p2->p_stat = SACTIVE;
        setrunnable(l2);
        /* LWP now unlocked */

        mutex_exit(p2->p_lock);
        mutex_exit(&proc_lock);

        cv_wait(&spawn_data->sed_cv_child_ready, &spawn_data->sed_mtx_child);
        error = spawn_data->sed_error;
        mutex_exit(&spawn_data->sed_mtx_child);
        spawn_exec_data_release(spawn_data);

        rw_exit(&p1->p_reflock);
        rw_exit(&exec_lock);
        have_exec_lock = false;

        *pid_res = pid;

        if (error)
                return error;

        if (p1->p_slflag & PSL_TRACED) {
                /* Paranoid check */
                mutex_enter(&proc_lock);
                if ((p1->p_slflag & (PSL_TRACEPOSIX_SPAWN|PSL_TRACED)) !=
                    (PSL_TRACEPOSIX_SPAWN|PSL_TRACED)) {
                        mutex_exit(&proc_lock);
                        return 0;
                }

                mutex_enter(p1->p_lock);
                eventswitch(TRAP_CHLD, PTRACE_POSIX_SPAWN, pid);
        }
        return 0;

 error_exit:
        if (have_exec_lock) {
                execve_free_data(&spawn_data->sed_exec);
                rw_exit(&p1->p_reflock);
                rw_exit(&exec_lock);
        }
        mutex_exit(&spawn_data->sed_mtx_child);
        spawn_exec_data_release(spawn_data);
        if (uaddr != 0)
                uvm_uarea_free(uaddr);

        return error;
}

int
sys_posix_spawn(struct lwp *l1, const struct sys_posix_spawn_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(pid_t *) pid;
                syscallarg(const char *) path;
                syscallarg(const struct posix_spawn_file_actions *) file_actions;
                syscallarg(const struct posix_spawnattr *) attrp;
                syscallarg(char *const *) argv;
                syscallarg(char *const *) envp;
        } */        

        int error;
        struct posix_spawn_file_actions *fa = NULL;
        struct posix_spawnattr *sa = NULL;
        pid_t pid;
        bool child_ok = false;
        rlim_t max_fileactions;
        proc_t *p = l1->l_proc;

        /* check_posix_spawn() increments nprocs for us. */
        error = check_posix_spawn(l1);
        if (error) {
                *retval = error;
                return 0;
        }

        /* copy in file_actions struct */
        if (SCARG(uap, file_actions) != NULL) {
                max_fileactions = 2 * uimin(p->p_rlimit[RLIMIT_NOFILE].rlim_cur,
                    maxfiles);
                error = posix_spawn_fa_alloc(&fa, SCARG(uap, file_actions),
                    max_fileactions);
                if (error)
                        goto error_exit;
        }

        /* copyin posix_spawnattr struct */
        if (SCARG(uap, attrp) != NULL) {
                sa = kmem_alloc(sizeof(*sa), KM_SLEEP);
                error = copyin(SCARG(uap, attrp), sa, sizeof(*sa));
                if (error)
                        goto error_exit;
        }

        /*
         * Do the spawn
         */
        error = do_posix_spawn(l1, &pid, &child_ok, SCARG(uap, path), fa, sa,
            SCARG(uap, argv), SCARG(uap, envp), execve_fetch_element);
        if (error)
                goto error_exit;

        if (error == 0 && SCARG(uap, pid) != NULL)
                error = copyout(&pid, SCARG(uap, pid), sizeof(pid));

        *retval = error;
        return 0;

 error_exit:
        if (!child_ok) {
                (void)chgproccnt(kauth_cred_getuid(l1->l_cred), -1);
                atomic_dec_uint(&nprocs);

                if (sa)
                        kmem_free(sa, sizeof(*sa));
                if (fa)
                        posix_spawn_fa_free(fa, fa->len);
        }

        *retval = error;
        return 0;
}

void
exec_free_emul_arg(struct exec_package *epp)
{
        if (epp->ep_emul_arg_free != NULL) {
                KASSERT(epp->ep_emul_arg != NULL);
                (*epp->ep_emul_arg_free)(epp->ep_emul_arg);
                epp->ep_emul_arg_free = NULL;
                epp->ep_emul_arg = NULL;
        } else {
                KASSERT(epp->ep_emul_arg == NULL);
        }
}

#ifdef DEBUG_EXEC
static void
dump_vmcmds(const struct exec_package * const epp, size_t x, int error)
{
        struct exec_vmcmd *vp = &epp->ep_vmcmds.evs_cmds[0];
        size_t j;

        if (error == 0)
                DPRINTF(("vmcmds %u\n", epp->ep_vmcmds.evs_used));
        else
                DPRINTF(("vmcmds %zu/%u, error %d\n", x, 
                    epp->ep_vmcmds.evs_used, error));

        for (j = 0; j < epp->ep_vmcmds.evs_used; j++) {
                DPRINTF(("vmcmd[%zu] = vmcmd_map_%s %#"
                    PRIxVADDR"/%#"PRIxVSIZE" fd@%#"
                    PRIxVSIZE" prot=0%o flags=%d\n", j,
                    vp[j].ev_proc == vmcmd_map_pagedvn ?
                    "pagedvn" :
                    vp[j].ev_proc == vmcmd_map_readvn ?
                    "readvn" :
                    vp[j].ev_proc == vmcmd_map_zero ?
                    "zero" : "*unknown*",
                    vp[j].ev_addr, vp[j].ev_len,
                    vp[j].ev_offset, vp[j].ev_prot,
                    vp[j].ev_flags));
                if (error != 0 && j == x)
                        DPRINTF(("     ^--- failed\n"));
        }
}
#endif






























































































































































































































































































































































    1 








    1 























































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
/*        $NetBSD: ld.c,v 1.112 2021/05/30 11:24:02 riastradh Exp $        */

/*-
 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran and Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Disk driver for use by RAID controllers.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ld.c,v 1.112 2021/05/30 11:24:02 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/queue.h>
#include <sys/proc.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/endian.h>
#include <sys/disklabel.h>
#include <sys/disk.h>
#include <sys/dkio.h>
#include <sys/stat.h>
#include <sys/conf.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/syslog.h>
#include <sys/mutex.h>
#include <sys/module.h>
#include <sys/reboot.h>

#include <dev/ldvar.h>

#include "ioconf.h"

static void        ldminphys(struct buf *bp);
static bool        ld_suspend(device_t, const pmf_qual_t *);
static bool        ld_resume(device_t, const pmf_qual_t *);
static bool        ld_shutdown(device_t, int);
static int        ld_diskstart(device_t, struct buf *bp);
static void        ld_iosize(device_t, int *);
static int        ld_dumpblocks(device_t, void *, daddr_t, int);
static void        ld_fake_geometry(struct ld_softc *);
static void        ld_set_geometry(struct ld_softc *);
static void        ld_config_interrupts (device_t);
static int        ld_lastclose(device_t);
static int        ld_discard(device_t, off_t, off_t);
static int        ld_flush(device_t, bool);

static dev_type_open(ldopen);
static dev_type_close(ldclose);
static dev_type_read(ldread);
static dev_type_write(ldwrite);
static dev_type_ioctl(ldioctl);
static dev_type_strategy(ldstrategy);
static dev_type_dump(lddump);
static dev_type_size(ldsize);
static dev_type_discard(lddiscard);

const struct bdevsw ld_bdevsw = {
        .d_open = ldopen,
        .d_close = ldclose,
        .d_strategy = ldstrategy,
        .d_ioctl = ldioctl,
        .d_dump = lddump,
        .d_psize = ldsize,
        .d_discard = lddiscard,
        .d_flag = D_DISK | D_MPSAFE
};

const struct cdevsw ld_cdevsw = {
        .d_open = ldopen,
        .d_close = ldclose,
        .d_read = ldread,
        .d_write = ldwrite,
        .d_ioctl = ldioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = lddiscard,
        .d_flag = D_DISK | D_MPSAFE
};

static const struct        dkdriver lddkdriver = {
        .d_open = ldopen,
        .d_close = ldclose,
        .d_strategy = ldstrategy,
        .d_iosize = ld_iosize,
        .d_minphys  = ldminphys,
        .d_diskstart = ld_diskstart,
        .d_dumpblocks = ld_dumpblocks,
        .d_lastclose = ld_lastclose,
        .d_discard = ld_discard
};

void
ldattach(struct ld_softc *sc, const char *default_strategy)
{
        device_t self = sc->sc_dv;
        struct dk_softc *dksc = &sc->sc_dksc;

        mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_VM);
        cv_init(&sc->sc_drain, "lddrain");

        if ((sc->sc_flags & LDF_ENABLED) == 0) {
                return;
        }

        /* don't attach a disk that we cannot handle */
        if (sc->sc_secsize < DEV_BSIZE) {
                sc->sc_flags &= ~LDF_ENABLED;
                return;
        }

        /* Initialise dk and disk structure. */
        dk_init(dksc, self, DKTYPE_LD);
        disk_init(&dksc->sc_dkdev, dksc->sc_xname, &lddkdriver);

        if (sc->sc_maxxfer > MAXPHYS)
                sc->sc_maxxfer = MAXPHYS;

        /* Build synthetic geometry if necessary. */
        if (sc->sc_nheads == 0 || sc->sc_nsectors == 0 ||
            sc->sc_ncylinders == 0)
            ld_fake_geometry(sc);

        sc->sc_disksize512 = sc->sc_secperunit * sc->sc_secsize / DEV_BSIZE;

        if (sc->sc_flags & LDF_NO_RND)
                dksc->sc_flags |= DKF_NO_RND;

        /* Attach dk and disk subsystems */
        dk_attach(dksc);
        disk_attach(&dksc->sc_dkdev);
        ld_set_geometry(sc);

        bufq_alloc(&dksc->sc_bufq, default_strategy, BUFQ_SORT_RAWBLOCK);

        /* Register with PMF */
        if (!pmf_device_register1(dksc->sc_dev, ld_suspend, ld_resume,
                ld_shutdown))
                aprint_error_dev(dksc->sc_dev,
                    "couldn't establish power handler\n");

        /* Discover wedges on this disk. */
        config_interrupts(sc->sc_dv, ld_config_interrupts);
}

int
ldadjqparam(struct ld_softc *sc, int xmax)
{

        mutex_enter(&sc->sc_mutex);
        sc->sc_maxqueuecnt = xmax;
        mutex_exit(&sc->sc_mutex);

        return (0);
}

int
ldbegindetach(struct ld_softc *sc, int flags)
{
        struct dk_softc *dksc = &sc->sc_dksc;
        int error;

        /* If we never attached properly, no problem with detaching.  */
        if ((sc->sc_flags & LDF_ENABLED) == 0)
                return 0;

        /*
         * If the disk is still open, back out before we commit to
         * detaching.
         */
        error = disk_begindetach(&dksc->sc_dkdev, ld_lastclose, dksc->sc_dev,
            flags);
        if (error)
                return error;

        /* We are now committed to detaching.  Prevent new xfers.  */
        ldadjqparam(sc, 0);

        return 0;
}

void
ldenddetach(struct ld_softc *sc)
{
        struct dk_softc *dksc = &sc->sc_dksc;
        int bmaj, cmaj, i, mn;

        if ((sc->sc_flags & LDF_ENABLED) == 0)
                return;

        /* Wait for commands queued with the hardware to complete. */
        mutex_enter(&sc->sc_mutex);
        while (sc->sc_queuecnt > 0) {
                if (cv_timedwait(&sc->sc_drain, &sc->sc_mutex, 30 * hz)) {
                        /*
                         * XXX This seems like a recipe for crashing on
                         * use after free...
                         */
                        printf("%s: not drained\n", dksc->sc_xname);
                        break;
                }
        }
        mutex_exit(&sc->sc_mutex);

        /* Kill off any queued buffers. */
        dk_drain(dksc);
        bufq_free(dksc->sc_bufq);

        /* Locate the major numbers. */
        bmaj = bdevsw_lookup_major(&ld_bdevsw);
        cmaj = cdevsw_lookup_major(&ld_cdevsw);

        /* Nuke the vnodes for any open instances. */
        for (i = 0; i < MAXPARTITIONS; i++) {
                mn = DISKMINOR(device_unit(dksc->sc_dev), i);
                vdevgone(bmaj, mn, mn, VBLK);
                vdevgone(cmaj, mn, mn, VCHR);
        }

        /* Delete all of our wedges. */
        dkwedge_delall(&dksc->sc_dkdev);

        /* Detach from the disk list. */
        disk_detach(&dksc->sc_dkdev);
        disk_destroy(&dksc->sc_dkdev);

        dk_detach(dksc);

        /* Deregister with PMF */
        pmf_device_deregister(dksc->sc_dev);

        /*
         * XXX We can't really flush the cache here, because the
         * XXX device may already be non-existent from the controller's
         * XXX perspective.
         */
#if 0
        ld_flush(dksc->sc_dev, false);
#endif
        cv_destroy(&sc->sc_drain);
        mutex_destroy(&sc->sc_mutex);
}

/* ARGSUSED */
static bool
ld_suspend(device_t dev, const pmf_qual_t *qual)
{
        struct ld_softc *sc = device_private(dev);
        int queuecnt;
        bool ok = false;

        /* Block new requests and wait for outstanding requests to drain.  */
        mutex_enter(&sc->sc_mutex);
        KASSERT((sc->sc_flags & LDF_SUSPEND) == 0);
        sc->sc_flags |= LDF_SUSPEND;
        while ((queuecnt = sc->sc_queuecnt) > 0) {
                if (cv_timedwait(&sc->sc_drain, &sc->sc_mutex, 30 * hz))
                        break;
        }
        mutex_exit(&sc->sc_mutex);

        /* Block suspend if we couldn't drain everything in 30sec.  */
        if (queuecnt > 0) {
                device_printf(dev, "timeout draining buffers\n");
                goto out;
        }

        /* Flush cache before we lose power.  If we can't, block suspend.  */
        if (ld_flush(dev, /*poll*/false) != 0) {
                device_printf(dev, "failed to flush cache\n");
                goto out;
        }

        /* Success!  */
        ok = true;

out:        if (!ok)
                (void)ld_resume(dev, qual);
        return ok;
}

static bool
ld_resume(device_t dev, const pmf_qual_t *qual)
{
        struct ld_softc *sc = device_private(dev);

        /* Allow new requests to come in.  */
        mutex_enter(&sc->sc_mutex);
        KASSERT(sc->sc_flags & LDF_SUSPEND);
        sc->sc_flags &= ~LDF_SUSPEND;
        mutex_exit(&sc->sc_mutex);

        /* Restart any pending queued requests.  */
        dk_start(&sc->sc_dksc, NULL);

        return true;
}

/* ARGSUSED */
static bool
ld_shutdown(device_t dev, int flags)
{
        if ((flags & RB_NOSYNC) == 0 && ld_flush(dev, true) != 0)
                return false;

        return true;
}

/* ARGSUSED */
static int
ldopen(dev_t dev, int flags, int fmt, struct lwp *l)
{
        struct ld_softc *sc;
        struct dk_softc *dksc;
        int unit;

        unit = DISKUNIT(dev);
        if ((sc = device_lookup_private(&ld_cd, unit)) == NULL)
                return (ENXIO);

        if ((sc->sc_flags & LDF_ENABLED) == 0)
                return (ENODEV);

        dksc = &sc->sc_dksc;

        return dk_open(dksc, dev, flags, fmt, l);
}

static int
ld_lastclose(device_t self)
{
        ld_flush(self, false);

        return 0;
}

/* ARGSUSED */
static int
ldclose(dev_t dev, int flags, int fmt, struct lwp *l)
{
        struct ld_softc *sc;
        struct dk_softc *dksc;
        int unit;

        unit = DISKUNIT(dev);
        sc = device_lookup_private(&ld_cd, unit);
        dksc = &sc->sc_dksc;

        return dk_close(dksc, dev, flags, fmt, l);
}

/* ARGSUSED */
static int
ldread(dev_t dev, struct uio *uio, int ioflag)
{

        return (physio(ldstrategy, NULL, dev, B_READ, ldminphys, uio));
}

/* ARGSUSED */
static int
ldwrite(dev_t dev, struct uio *uio, int ioflag)
{

        return (physio(ldstrategy, NULL, dev, B_WRITE, ldminphys, uio));
}

/* ARGSUSED */
static int
ldioctl(dev_t dev, u_long cmd, void *addr, int32_t flag, struct lwp *l)
{
        struct ld_softc *sc;
        struct dk_softc *dksc;
        int unit, error;

        unit = DISKUNIT(dev);
        sc = device_lookup_private(&ld_cd, unit);
        dksc = &sc->sc_dksc;

        error = 0;

        /*
         * Some common checks so that individual attachments wouldn't need
         * to duplicate them.
         */
        switch (cmd) {
        case DIOCCACHESYNC:
                /*
                 * XXX Do we really need to care about having a writable
                 * file descriptor here?
                 */
                if ((flag & FWRITE) == 0)
                        error = EBADF;
                else
                        error = 0;
                break;
        }

        if (error != 0)
                return (error);

        if (sc->sc_ioctl) {
                if ((sc->sc_flags & LDF_MPSAFE) == 0)
                        KERNEL_LOCK(1, curlwp);
                error = (*sc->sc_ioctl)(sc, cmd, addr, flag, 0);
                if ((sc->sc_flags & LDF_MPSAFE) == 0)
                        KERNEL_UNLOCK_ONE(curlwp);
                if (error != EPASSTHROUGH)
                        return (error);
        }

        /* something not handled by the attachment */
        return dk_ioctl(dksc, dev, cmd, addr, flag, l);
}

/*
 * Flush the device's cache.
 */
static int
ld_flush(device_t self, bool poll)
{
        int error = 0;
        struct ld_softc *sc = device_private(self);

        if (sc->sc_ioctl) {
                if ((sc->sc_flags & LDF_MPSAFE) == 0)
                        KERNEL_LOCK(1, curlwp);
                error = (*sc->sc_ioctl)(sc, DIOCCACHESYNC, NULL, 0, poll);
                if ((sc->sc_flags & LDF_MPSAFE) == 0)
                        KERNEL_UNLOCK_ONE(curlwp);
                if (error != 0)
                        device_printf(self, "unable to flush cache\n");
        }

        return error;
}

static void
ldstrategy(struct buf *bp)
{
        struct ld_softc *sc;
        struct dk_softc *dksc;
        int unit;

        unit = DISKUNIT(bp->b_dev);
        sc = device_lookup_private(&ld_cd, unit);
        dksc = &sc->sc_dksc;

        dk_strategy(dksc, bp);
}

static int
ld_diskstart(device_t dev, struct buf *bp)
{
        struct ld_softc *sc = device_private(dev);
        int error;

        if (sc->sc_queuecnt >= sc->sc_maxqueuecnt ||
            sc->sc_flags & LDF_SUSPEND) {
                if (sc->sc_flags & LDF_SUSPEND)
                        aprint_debug_dev(dev, "i/o blocked while suspended\n");
                return EAGAIN;
        }

        if ((sc->sc_flags & LDF_MPSAFE) == 0)
                KERNEL_LOCK(1, curlwp);

        mutex_enter(&sc->sc_mutex);

        if (sc->sc_queuecnt >= sc->sc_maxqueuecnt ||
            sc->sc_flags & LDF_SUSPEND) {
                if (sc->sc_flags & LDF_SUSPEND)
                        aprint_debug_dev(dev, "i/o blocked while suspended\n");
                error = EAGAIN;
        } else {
                error = (*sc->sc_start)(sc, bp);
                if (error == 0)
                        sc->sc_queuecnt++;
        }

        mutex_exit(&sc->sc_mutex);

        if ((sc->sc_flags & LDF_MPSAFE) == 0)
                KERNEL_UNLOCK_ONE(curlwp);

        return error;
}

void
lddone(struct ld_softc *sc, struct buf *bp)
{
        struct dk_softc *dksc = &sc->sc_dksc;

        dk_done(dksc, bp);

        mutex_enter(&sc->sc_mutex);
        if (--sc->sc_queuecnt <= sc->sc_maxqueuecnt) {
                cv_broadcast(&sc->sc_drain);
                mutex_exit(&sc->sc_mutex);
                dk_start(dksc, NULL);
        } else
                mutex_exit(&sc->sc_mutex);
}

static int
ldsize(dev_t dev)
{
        struct ld_softc *sc;
        struct dk_softc *dksc;
        int unit;

        unit = DISKUNIT(dev);
        if ((sc = device_lookup_private(&ld_cd, unit)) == NULL)
                return (-1);
        dksc = &sc->sc_dksc;

        if ((sc->sc_flags & LDF_ENABLED) == 0)
                return (-1);

        return dk_size(dksc, dev);
}

/*
 * Take a dump.
 */
static int
lddump(dev_t dev, daddr_t blkno, void *va, size_t size)
{
        struct ld_softc *sc;
        struct dk_softc *dksc;
        int unit;

        unit = DISKUNIT(dev);
        if ((sc = device_lookup_private(&ld_cd, unit)) == NULL)
                return (ENXIO);
        dksc = &sc->sc_dksc;

        if ((sc->sc_flags & LDF_ENABLED) == 0)
                return (ENODEV);

        return dk_dump(dksc, dev, blkno, va, size, 0);
}

static int
ld_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
{
        struct ld_softc *sc = device_private(dev);

        if (sc->sc_dump == NULL)
                return (ENODEV);

        return (*sc->sc_dump)(sc, va, blkno, nblk);
}

/*
 * Adjust the size of a transfer.
 */
static void
ldminphys(struct buf *bp)
{
        int unit;
        struct ld_softc *sc;

        unit = DISKUNIT(bp->b_dev);
        sc = device_lookup_private(&ld_cd, unit);

        ld_iosize(sc->sc_dv, &bp->b_bcount);
        minphys(bp);
}

static void
ld_iosize(device_t d, int *countp)
{
        struct ld_softc *sc = device_private(d);

        if (*countp > sc->sc_maxxfer)
                *countp = sc->sc_maxxfer;
}

static void
ld_fake_geometry(struct ld_softc *sc)
{
        uint64_t ncyl;

        if (sc->sc_secperunit <= 528 * 2048)                /* 528MB */
                sc->sc_nheads = 16;
        else if (sc->sc_secperunit <= 1024 * 2048)        /* 1GB */
                sc->sc_nheads = 32;
        else if (sc->sc_secperunit <= 21504 * 2048)        /* 21GB */
                sc->sc_nheads = 64;
        else if (sc->sc_secperunit <= 43008 * 2048)        /* 42GB */
                sc->sc_nheads = 128;
        else
                sc->sc_nheads = 255;

        sc->sc_nsectors = 63;
        sc->sc_ncylinders = INT_MAX;
        ncyl = sc->sc_secperunit /
            (sc->sc_nheads * sc->sc_nsectors);
        if (ncyl < INT_MAX)
                sc->sc_ncylinders = (int)ncyl;
}

static void
ld_set_geometry(struct ld_softc *sc)
{
        struct dk_softc *dksc = &sc->sc_dksc;
        struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
        char tbuf[9];

        format_bytes(tbuf, sizeof(tbuf), sc->sc_secperunit *
            sc->sc_secsize);
        aprint_normal_dev(dksc->sc_dev, "%s, %d cyl, %d head, %d sec, "
            "%d bytes/sect x %"PRIu64" sectors\n",
            tbuf, sc->sc_ncylinders, sc->sc_nheads,
            sc->sc_nsectors, sc->sc_secsize, sc->sc_secperunit);

        memset(dg, 0, sizeof(*dg));
        dg->dg_secperunit = sc->sc_secperunit;
        dg->dg_secsize = sc->sc_secsize;
        dg->dg_nsectors = sc->sc_nsectors;
        dg->dg_ntracks = sc->sc_nheads;
        dg->dg_ncylinders = sc->sc_ncylinders;

        disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, sc->sc_typename);
}

static void
ld_config_interrupts(device_t d)
{
        struct ld_softc *sc = device_private(d);
        struct dk_softc *dksc = &sc->sc_dksc;

        dkwedge_discover(&dksc->sc_dkdev);
}

static int
ld_discard(device_t dev, off_t pos, off_t len)
{
        struct ld_softc *sc = device_private(dev);
        struct buf dbuf, *bp = &dbuf;
        int error = 0;

        KASSERT(len <= INT_MAX);

        if (sc->sc_discard == NULL)
                return (ENODEV);

        if ((sc->sc_flags & LDF_MPSAFE) == 0)
                KERNEL_LOCK(1, curlwp);

        buf_init(bp);
        bp->b_vp = NULL;
        bp->b_data = NULL;
        bp->b_bufsize = 0;
        bp->b_rawblkno = pos / sc->sc_secsize;
        bp->b_bcount = len;
        bp->b_flags = B_WRITE;
        bp->b_cflags = BC_BUSY;

        error = (*sc->sc_discard)(sc, bp);
        if (error == 0)
                error = biowait(bp);

        buf_destroy(bp);

        if ((sc->sc_flags & LDF_MPSAFE) == 0)
                KERNEL_UNLOCK_ONE(curlwp);

        return error;
}

void
lddiscardend(struct ld_softc *sc, struct buf *bp)
{

        if (bp->b_error)
                bp->b_resid = bp->b_bcount;
        biodone(bp);
}

static int
lddiscard(dev_t dev, off_t pos, off_t len)
{
        struct ld_softc *sc;
        struct dk_softc *dksc;
        int unit;

        unit = DISKUNIT(dev);
        sc = device_lookup_private(&ld_cd, unit);
        dksc = &sc->sc_dksc;

        return dk_discard(dksc, dev, pos, len);
}

MODULE(MODULE_CLASS_DRIVER, ld, "dk_subr");

#ifdef _MODULE
CFDRIVER_DECL(ld, DV_DISK, NULL);
#endif

static int
ld_modcmd(modcmd_t cmd, void *opaque)
{
#ifdef _MODULE
        devmajor_t bmajor, cmajor;
#endif
        int error = 0;

#ifdef _MODULE
        switch (cmd) {
        case MODULE_CMD_INIT:
                bmajor = cmajor = -1;
                error = devsw_attach(ld_cd.cd_name, &ld_bdevsw, &bmajor,
                    &ld_cdevsw, &cmajor);
                if (error)
                        break;
                error = config_cfdriver_attach(&ld_cd);
                break;
        case MODULE_CMD_FINI:
                error = config_cfdriver_detach(&ld_cd);
                if (error)
                        break;
                devsw_detach(&ld_bdevsw, &ld_cdevsw);
                break;
        default:
                error = ENOTTY;
                break;
        }
#endif

        return error;
}



























































































































































































































































    4 
    4 









    4 




















    4 


    4 














   66 









   66 
   66 





























































































   63 
   63 









   66 


   66 
   66 

   66 




   66 
























































   63 



   66 

   64 
   63 


















   16 

















   16 
   16 
   16 

   16 


   16 
   16 


   16 




   16 










   16 










 1804 



  964 


  963 

 1805 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
/*        $NetBSD: ffs_vnops.c,v 1.138 2021/12/14 11:06:12 chs Exp $        */

/*-
 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Wasabi Systems, Inc, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ffs_vnops.c        8.15 (Berkeley) 5/14/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_vnops.c,v 1.138 2021/12/14 11:06:12 chs Exp $");

#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
#include "opt_wapbl.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/resourcevar.h>
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/buf.h>
#include <sys/event.h>
#include <sys/proc.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/pool.h>
#include <sys/signalvar.h>
#include <sys/kauth.h>
#include <sys/wapbl.h>

#include <miscfs/fifofs/fifo.h>
#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>

#include <ufs/ufs/acl.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/dir.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_wapbl.h>

#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>

/* Global vfs data structures for ufs. */
int (**ffs_vnodeop_p)(void *);
const struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
        { &vop_default_desc, vn_default_error },
        { &vop_parsepath_desc, genfs_parsepath },        /* parsepath */
        { &vop_lookup_desc, ufs_lookup },                /* lookup */
        { &vop_create_desc, ufs_create },                /* create */
        { &vop_whiteout_desc, ufs_whiteout },                /* whiteout */
        { &vop_mknod_desc, ufs_mknod },                        /* mknod */
        { &vop_open_desc, ufs_open },                        /* open */
        { &vop_close_desc, ufs_close },                        /* close */
        { &vop_access_desc, genfs_access },                /* access */
        { &vop_accessx_desc, ufs_accessx },                /* accessx */
        { &vop_getattr_desc, ufs_getattr },                /* getattr */
        { &vop_setattr_desc, ufs_setattr },                /* setattr */
        { &vop_read_desc, ffs_read },                        /* read */
        { &vop_write_desc, ffs_write },                        /* write */
        { &vop_fallocate_desc, genfs_eopnotsupp },        /* fallocate */
        { &vop_fdiscard_desc, genfs_eopnotsupp },        /* fdiscard */
        { &vop_ioctl_desc, genfs_enoioctl },                /* ioctl */
        { &vop_fcntl_desc, genfs_fcntl },                /* fcntl */
        { &vop_poll_desc, genfs_poll },                        /* poll */
        { &vop_kqfilter_desc, genfs_kqfilter },                /* kqfilter */
        { &vop_revoke_desc, genfs_revoke },                /* revoke */
        { &vop_mmap_desc, genfs_mmap },                        /* mmap */
        { &vop_fsync_desc, ffs_fsync },                        /* fsync */
        { &vop_seek_desc, genfs_seek },                        /* seek */
        { &vop_remove_desc, ufs_remove },                /* remove */
        { &vop_link_desc, ufs_link },                        /* link */
        { &vop_rename_desc, ufs_rename },                /* rename */
        { &vop_mkdir_desc, ufs_mkdir },                        /* mkdir */
        { &vop_rmdir_desc, ufs_rmdir },                        /* rmdir */
        { &vop_symlink_desc, ufs_symlink },                /* symlink */
        { &vop_readdir_desc, ufs_readdir },                /* readdir */
        { &vop_readlink_desc, ufs_readlink },                /* readlink */
        { &vop_abortop_desc, genfs_abortop },                /* abortop */
        { &vop_inactive_desc, ufs_inactive },                /* inactive */
        { &vop_reclaim_desc, ffs_reclaim },                /* reclaim */
        { &vop_lock_desc, genfs_lock },                        /* lock */
        { &vop_unlock_desc, genfs_unlock },                /* unlock */
        { &vop_bmap_desc, ufs_bmap },                        /* bmap */
        { &vop_strategy_desc, ufs_strategy },                /* strategy */
        { &vop_print_desc, ufs_print },                        /* print */
        { &vop_islocked_desc, genfs_islocked },                /* islocked */
        { &vop_pathconf_desc, ufs_pathconf },                /* pathconf */
        { &vop_advlock_desc, ufs_advlock },                /* advlock */
        { &vop_bwrite_desc, vn_bwrite },                /* bwrite */
        { &vop_getpages_desc, genfs_getpages },                /* getpages */
        { &vop_putpages_desc, genfs_putpages },                /* putpages */
        { &vop_openextattr_desc, ffs_openextattr },        /* openextattr */
        { &vop_closeextattr_desc, ffs_closeextattr },        /* closeextattr */
        { &vop_getextattr_desc, ffs_getextattr },        /* getextattr */
        { &vop_setextattr_desc, ffs_setextattr },        /* setextattr */
        { &vop_listextattr_desc, ffs_listextattr },        /* listextattr */
        { &vop_deleteextattr_desc, ffs_deleteextattr },        /* deleteextattr */
        { &vop_getacl_desc, ufs_getacl },                /* getacl */
        { &vop_setacl_desc, ufs_setacl },                /* setacl */
        { &vop_aclcheck_desc, ufs_aclcheck },                /* aclcheck */
        { NULL, NULL }
};
const struct vnodeopv_desc ffs_vnodeop_opv_desc =
        { &ffs_vnodeop_p, ffs_vnodeop_entries };

int (**ffs_specop_p)(void *);
const struct vnodeopv_entry_desc ffs_specop_entries[] = {
        { &vop_default_desc, vn_default_error },
        GENFS_SPECOP_ENTRIES,
        { &vop_close_desc, ufsspec_close },                /* close */
        { &vop_access_desc, genfs_access },                /* access */
        { &vop_accessx_desc, ufs_accessx },                /* accessx */
        { &vop_getattr_desc, ufs_getattr },                /* getattr */
        { &vop_setattr_desc, ufs_setattr },                /* setattr */
        { &vop_read_desc, ufsspec_read },                /* read */
        { &vop_write_desc, ufsspec_write },                /* write */
        { &vop_fcntl_desc, genfs_fcntl },                /* fcntl */
        { &vop_fsync_desc, ffs_spec_fsync },                /* fsync */
        { &vop_inactive_desc, ufs_inactive },                /* inactive */
        { &vop_reclaim_desc, ffs_reclaim },                /* reclaim */
        { &vop_lock_desc, genfs_lock },                        /* lock */
        { &vop_unlock_desc, genfs_unlock },                /* unlock */
        { &vop_print_desc, ufs_print },                        /* print */
        { &vop_islocked_desc, genfs_islocked },                /* islocked */
        { &vop_bwrite_desc, vn_bwrite },                /* bwrite */
        { &vop_openextattr_desc, ffs_openextattr },        /* openextattr */
        { &vop_closeextattr_desc, ffs_closeextattr },        /* closeextattr */
        { &vop_getextattr_desc, ffs_getextattr },        /* getextattr */
        { &vop_setextattr_desc, ffs_setextattr },        /* setextattr */
        { &vop_listextattr_desc, ffs_listextattr },        /* listextattr */
        { &vop_deleteextattr_desc, ffs_deleteextattr },        /* deleteextattr */
        { &vop_getacl_desc, ufs_getacl },                /* getacl */
        { &vop_setacl_desc, ufs_setacl },                /* setacl */
        { &vop_aclcheck_desc, ufs_aclcheck },                /* aclcheck */
        { NULL, NULL }
};
const struct vnodeopv_desc ffs_specop_opv_desc =
        { &ffs_specop_p, ffs_specop_entries };

int (**ffs_fifoop_p)(void *);
const struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
        { &vop_default_desc, vn_default_error },
        GENFS_FIFOOP_ENTRIES,
        { &vop_close_desc, ufsfifo_close },                /* close */
        { &vop_access_desc, genfs_access },                /* access */
        { &vop_accessx_desc, ufs_accessx },                /* accessx */
        { &vop_getattr_desc, ufs_getattr },                /* getattr */
        { &vop_setattr_desc, ufs_setattr },                /* setattr */
        { &vop_read_desc, ufsfifo_read },                /* read */
        { &vop_write_desc, ufsfifo_write },                /* write */
        { &vop_fcntl_desc, genfs_fcntl },                /* fcntl */
        { &vop_fsync_desc, ffs_fsync },                        /* fsync */
        { &vop_inactive_desc, ufs_inactive },                /* inactive */
        { &vop_reclaim_desc, ffs_reclaim },                /* reclaim */
        { &vop_lock_desc, genfs_lock },                        /* lock */
        { &vop_unlock_desc, genfs_unlock },                /* unlock */
        { &vop_bmap_desc, ufs_bmap },                        /* bmap */
        { &vop_strategy_desc, ffsext_strategy },        /* strategy */
        { &vop_print_desc, ufs_print },                        /* print */
        { &vop_islocked_desc, genfs_islocked },                /* islocked */
        { &vop_pathconf_desc, ufs_pathconf },                /* pathconf */
        { &vop_bwrite_desc, vn_bwrite },                /* bwrite */
        { &vop_openextattr_desc, ffs_openextattr },        /* openextattr */
        { &vop_closeextattr_desc, ffs_closeextattr },        /* closeextattr */
        { &vop_getextattr_desc, ffs_getextattr },        /* getextattr */
        { &vop_setextattr_desc, ffs_setextattr },        /* setextattr */
        { &vop_listextattr_desc, ffs_listextattr },        /* listextattr */
        { &vop_deleteextattr_desc, ffs_deleteextattr },        /* deleteextattr */
        { &vop_getacl_desc, ufs_getacl },                /* getacl */
        { &vop_setacl_desc, ufs_setacl },                /* setacl */
        { &vop_aclcheck_desc, ufs_aclcheck },                /* aclcheck */
        { NULL, NULL }
};
const struct vnodeopv_desc ffs_fifoop_opv_desc =
        { &ffs_fifoop_p, ffs_fifoop_entries };

#include <ufs/ufs/ufs_readwrite.c>

int
ffs_spec_fsync(void *v)
{
        struct vop_fsync_args /* {
                struct vnode *a_vp;
                kauth_cred_t a_cred;
                int a_flags;
                off_t a_offlo;
                off_t a_offhi;
                struct lwp *a_l;
        } */ *ap = v;
        int error, flags, uflags;
        struct vnode *vp;

        flags = ap->a_flags;
        uflags = UPDATE_CLOSE | ((flags & FSYNC_WAIT) ? UPDATE_WAIT : 0);
        vp = ap->a_vp;

        error = spec_fsync(v);
        if (error)
                goto out;

#ifdef WAPBL
        struct mount *mp = vp->v_mount;

        if (mp && mp->mnt_wapbl) {
                /*
                 * Don't bother writing out metadata if the syncer is
                 * making the request.  We will let the sync vnode
                 * write it out in a single burst through a call to
                 * VFS_SYNC().
                 */
                if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY)) != 0)
                        goto out;
                if ((VTOI(vp)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE
                    | IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) != 0) {
                        error = UFS_WAPBL_BEGIN(mp);
                        if (error != 0)
                                goto out;
                        error = ffs_update(vp, NULL, NULL, uflags);
                        UFS_WAPBL_END(mp);
                }
                goto out;
        }
#endif /* WAPBL */

        error = ffs_update(vp, NULL, NULL, uflags);

out:
        return error;
}

int
ffs_fsync(void *v)
{
        struct vop_fsync_args /* {
                struct vnode *a_vp;
                kauth_cred_t a_cred;
                int a_flags;
                off_t a_offlo;
                off_t a_offhi;
                struct lwp *a_l;
        } */ *ap = v;
        struct buf *bp;
        int num, error, i;
        struct indir ia[UFS_NIADDR + 1];
        int bsize;
        daddr_t blk_high;
        struct vnode *vp;
        struct mount *mp;

        vp = ap->a_vp;
        mp = vp->v_mount;

        if ((ap->a_offlo == 0 && ap->a_offhi == 0) || (vp->v_type != VREG)) {
                error = ffs_full_fsync(vp, ap->a_flags);
                goto out;
        }

        bsize = mp->mnt_stat.f_iosize;
        blk_high = ap->a_offhi / bsize;
        if (ap->a_offhi % bsize != 0)
                blk_high++;

        /*
         * First, flush all pages in range.
         */

        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
        error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo),
            round_page(ap->a_offhi), PGO_CLEANIT |
            ((ap->a_flags & FSYNC_WAIT) ? PGO_SYNCIO : 0));
        if (error) {
                goto out;
        }

#ifdef WAPBL
        KASSERT(vp->v_type == VREG);
        if (mp->mnt_wapbl) {
                /*
                 * Don't bother writing out metadata if the syncer is
                 * making the request.  We will let the sync vnode
                 * write it out in a single burst through a call to
                 * VFS_SYNC().
                 */
                if ((ap->a_flags & (FSYNC_DATAONLY | FSYNC_LAZY)) != 0) {
                        return 0;
                }
                error = 0;
                if (vp->v_tag == VT_UFS && VTOI(vp)->i_flag &
                    (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY |
                                 IN_MODIFIED | IN_ACCESSED)) {
                        error = UFS_WAPBL_BEGIN(mp);
                        if (error) {
                                return error;
                        }
                        error = ffs_update(vp, NULL, NULL, UPDATE_CLOSE |
                            ((ap->a_flags & FSYNC_WAIT) ? UPDATE_WAIT : 0));
                        UFS_WAPBL_END(mp);
                }
                if (error || (ap->a_flags & FSYNC_NOLOG) != 0) {
                        return error;
                }
                error = wapbl_flush(mp->mnt_wapbl, 0);
                return error;
        }
#endif /* WAPBL */

        /*
         * Then, flush indirect blocks.
         */

        if (blk_high >= UFS_NDADDR) {
                error = ufs_getlbns(vp, blk_high, ia, &num);
                if (error)
                        goto out;

                mutex_enter(&bufcache_lock);
                for (i = 0; i < num; i++) {
                        if ((bp = incore(vp, ia[i].in_lbn)) == NULL)
                                continue;
                        if ((bp->b_cflags & BC_BUSY) != 0 ||
                            (bp->b_oflags & BO_DELWRI) == 0)
                                continue;
                        bp->b_cflags |= BC_BUSY | BC_VFLUSH;
                        mutex_exit(&bufcache_lock);
                        bawrite(bp);
                        mutex_enter(&bufcache_lock);
                }
                mutex_exit(&bufcache_lock);
        }

        if (ap->a_flags & FSYNC_WAIT) {
                mutex_enter(vp->v_interlock);
                while (vp->v_numoutput > 0)
                        cv_wait(&vp->v_cv, vp->v_interlock);
                mutex_exit(vp->v_interlock);
        }

        error = ffs_update(vp, NULL, NULL, UPDATE_CLOSE |
            (((ap->a_flags & (FSYNC_WAIT | FSYNC_DATAONLY)) == FSYNC_WAIT)
            ? UPDATE_WAIT : 0));

        if (error == 0 && ap->a_flags & FSYNC_CACHE) {
                int l = 0;
                VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &l, FWRITE,
                        curlwp->l_cred);
        }

out:
        return error;
}

/*
 * Synch an open file.  Called for VOP_FSYNC().
 */
/* ARGSUSED */
int
ffs_full_fsync(struct vnode *vp, int flags)
{
        int error, i, uflags;

        KASSERT(vp->v_tag == VT_UFS);
        KASSERT(VTOI(vp) != NULL);
        KASSERT(vp->v_type != VCHR && vp->v_type != VBLK);

        uflags = UPDATE_CLOSE | ((flags & FSYNC_WAIT) ? UPDATE_WAIT : 0);

#ifdef WAPBL
        struct mount *mp = vp->v_mount;

        if (mp && mp->mnt_wapbl) {

                /*
                 * Flush all dirty data associated with the vnode.
                 */
                if (vp->v_type == VREG) {
                        int pflags = PGO_ALLPAGES | PGO_CLEANIT;

                        if ((flags & FSYNC_LAZY))
                                pflags |= PGO_LAZY;
                        if ((flags & FSYNC_WAIT))
                                pflags |= PGO_SYNCIO;
                        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
                        error = VOP_PUTPAGES(vp, 0, 0, pflags);
                        if (error)
                                return error;
                }

                /*
                 * Don't bother writing out metadata if the syncer is
                 * making the request.  We will let the sync vnode
                 * write it out in a single burst through a call to
                 * VFS_SYNC().
                 */
                if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY)) != 0)
                        return 0;

                if ((VTOI(vp)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE
                    | IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) != 0) {
                        error = UFS_WAPBL_BEGIN(mp);
                        if (error)
                                return error;
                        error = ffs_update(vp, NULL, NULL, uflags);
                        UFS_WAPBL_END(mp);
                } else {
                        error = 0;
                }
                if (error || (flags & FSYNC_NOLOG) != 0)
                        return error;

                /*
                 * Don't flush the log if the vnode being flushed
                 * contains no dirty buffers that could be in the log.
                 */
                if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
                        error = wapbl_flush(mp->mnt_wapbl, 0);
                        if (error)
                                return error;
                }

                if ((flags & FSYNC_WAIT) != 0) {
                        mutex_enter(vp->v_interlock);
                        while (vp->v_numoutput != 0)
                                cv_wait(&vp->v_cv, vp->v_interlock);
                        mutex_exit(vp->v_interlock);
                }

                return error;
        }
#endif /* WAPBL */

        error = vflushbuf(vp, flags);
        if (error == 0)
                error = ffs_update(vp, NULL, NULL, uflags);
        if (error == 0 && (flags & FSYNC_CACHE) != 0) {
                i = 1;
                (void)VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &i, FWRITE,
                    kauth_cred_get());
        }

        return error;
}

/*
 * Reclaim an inode so that it can be used for other purposes.
 */
int
ffs_reclaim(void *v)
{
        struct vop_reclaim_v2_args /* {
                struct vnode *a_vp;
                struct lwp *a_l;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct inode *ip = VTOI(vp);
        struct mount *mp = vp->v_mount;
        struct ufsmount *ump = ip->i_ump;
        void *data;
        int error;

        VOP_UNLOCK(vp);

        /*
         * The inode must be freed and updated before being removed
         * from its hash chain.  Other threads trying to gain a hold
         * or lock on the inode will be stalled.
         */
        error = UFS_WAPBL_BEGIN(mp);
        if (error) {
                return error;
        }
        if (ip->i_nlink <= 0 && ip->i_omode != 0 &&
            (vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
                ffs_vfree(vp, ip->i_number, ip->i_omode);
        UFS_WAPBL_END(mp);
        if ((error = ufs_reclaim(vp)) != 0) {
                return (error);
        }
        if (ip->i_din.ffs1_din != NULL) {
                if (ump->um_fstype == UFS1)
                        pool_cache_put(ffs_dinode1_cache, ip->i_din.ffs1_din);
                else
                        pool_cache_put(ffs_dinode2_cache, ip->i_din.ffs2_din);
        }
        /*
         * To interlock with ffs_sync().
         */
        genfs_node_destroy(vp);
        mutex_enter(vp->v_interlock);
        data = vp->v_data;
        vp->v_data = NULL;
        mutex_exit(vp->v_interlock);

        /*
         * XXX MFS ends up here, too, to free an inode.  Should we create
         * XXX a separate pool for MFS inodes?
         */
        pool_cache_put(ffs_inode_cache, data);
        return (0);
}

/*
 * Return the last logical file offset that should be written for this file
 * if we're doing a write that ends at "size".
 */

void
ffs_gop_size(struct vnode *vp, off_t size, off_t *eobp, int flags)
{
        struct inode *ip = VTOI(vp);
        struct fs *fs = ip->i_fs;
        daddr_t olbn, nlbn;

        olbn = ffs_lblkno(fs, ip->i_size);
        nlbn = ffs_lblkno(fs, size);
        if (nlbn < UFS_NDADDR && olbn <= nlbn) {
                *eobp = ffs_fragroundup(fs, size);
        } else {
                *eobp = ffs_blkroundup(fs, size);
        }
}
































































































































   37 

   38 























































   37 


































































































































































































   38 








   37 










































    1 


    1 


    1 


















    1 


    1 












































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
/*        $NetBSD: pktqueue.c,v 1.16 2021/12/21 04:09:32 knakahara Exp $        */

/*-
 * Copyright (c) 2014 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * The packet queue (pktqueue) interface is a lockless IP input queue
 * which also abstracts and handles network ISR scheduling.  It provides
 * a mechanism to enable receiver-side packet steering (RPS).
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: pktqueue.c,v 1.16 2021/12/21 04:09:32 knakahara Exp $");

#ifdef _KERNEL_OPT
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/types.h>

#include <sys/atomic.h>
#include <sys/cpu.h>
#include <sys/pcq.h>
#include <sys/intr.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/percpu.h>
#include <sys/xcall.h>

#include <net/pktqueue.h>
#include <net/rss_config.h>

#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>

struct pktqueue {
        /*
         * The lock used for a barrier mechanism.  The barrier counter,
         * as well as the drop counter, are managed atomically though.
         * Ensure this group is in a separate cache line.
         */
        union {
                struct {
                        kmutex_t        pq_lock;
                        volatile u_int        pq_barrier;
                };
                uint8_t         _pad[COHERENCY_UNIT];
        };

        /* The size of the queue, counters and the interrupt handler. */
        u_int                pq_maxlen;
        percpu_t *        pq_counters;
        void *                pq_sih;

        /* Finally, per-CPU queues. */
        struct percpu *        pq_pcq;        /* struct pcq * */
};

/* The counters of the packet queue. */
#define        PQCNT_ENQUEUE        0
#define        PQCNT_DEQUEUE        1
#define        PQCNT_DROP        2
#define        PQCNT_NCOUNTERS        3

typedef struct {
        uint64_t        count[PQCNT_NCOUNTERS];
} pktq_counters_t;

/* Special marker value used by pktq_barrier() mechanism. */
#define        PKTQ_MARKER        ((void *)(~0ULL))

static void
pktq_init_cpu(void *vqp, void *vpq, struct cpu_info *ci)
{
        struct pcq **qp = vqp;
        struct pktqueue *pq = vpq;

        *qp = pcq_create(pq->pq_maxlen, KM_SLEEP);
}

static void
pktq_fini_cpu(void *vqp, void *vpq, struct cpu_info *ci)
{
        struct pcq **qp = vqp, *q = *qp;

        KASSERT(pcq_peek(q) == NULL);
        pcq_destroy(q);
        *qp = NULL;                /* paranoia */
}

static struct pcq *
pktq_pcq(struct pktqueue *pq, struct cpu_info *ci)
{
        struct pcq **qp, *q;

        /*
         * As long as preemption is disabled, the xcall to swap percpu
         * buffers can't complete, so it is safe to read the pointer.
         */
        KASSERT(kpreempt_disabled());

        qp = percpu_getptr_remote(pq->pq_pcq, ci);
        q = *qp;

        return q;
}

pktqueue_t *
pktq_create(size_t maxlen, void (*intrh)(void *), void *sc)
{
        const u_int sflags = SOFTINT_NET | SOFTINT_MPSAFE | SOFTINT_RCPU;
        pktqueue_t *pq;
        percpu_t *pc;
        void *sih;

        pc = percpu_alloc(sizeof(pktq_counters_t));
        if ((sih = softint_establish(sflags, intrh, sc)) == NULL) {
                percpu_free(pc, sizeof(pktq_counters_t));
                return NULL;
        }

        pq = kmem_zalloc(sizeof(*pq), KM_SLEEP);
        mutex_init(&pq->pq_lock, MUTEX_DEFAULT, IPL_NONE);
        pq->pq_maxlen = maxlen;
        pq->pq_counters = pc;
        pq->pq_sih = sih;
        pq->pq_pcq = percpu_create(sizeof(struct pcq *),
            pktq_init_cpu, pktq_fini_cpu, pq);

        return pq;
}

void
pktq_destroy(pktqueue_t *pq)
{

        percpu_free(pq->pq_pcq, sizeof(struct pcq *));
        percpu_free(pq->pq_counters, sizeof(pktq_counters_t));
        softint_disestablish(pq->pq_sih);
        mutex_destroy(&pq->pq_lock);
        kmem_free(pq, sizeof(*pq));
}

/*
 * - pktq_inc_counter: increment the counter given an ID.
 * - pktq_collect_counts: handler to sum up the counts from each CPU.
 * - pktq_getcount: return the effective count given an ID.
 */

static inline void
pktq_inc_count(pktqueue_t *pq, u_int i)
{
        percpu_t *pc = pq->pq_counters;
        pktq_counters_t *c;

        c = percpu_getref(pc);
        c->count[i]++;
        percpu_putref(pc);
}

static void
pktq_collect_counts(void *mem, void *arg, struct cpu_info *ci)
{
        const pktq_counters_t *c = mem;
        pktq_counters_t *sum = arg;

        int s = splnet();

        for (u_int i = 0; i < PQCNT_NCOUNTERS; i++) {
                sum->count[i] += c->count[i];
        }

        splx(s);
}

uint64_t
pktq_get_count(pktqueue_t *pq, pktq_count_t c)
{
        pktq_counters_t sum;

        if (c != PKTQ_MAXLEN) {
                memset(&sum, 0, sizeof(sum));
                percpu_foreach_xcall(pq->pq_counters,
                    XC_HIGHPRI_IPL(IPL_SOFTNET), pktq_collect_counts, &sum);
        }
        switch (c) {
        case PKTQ_NITEMS:
                return sum.count[PQCNT_ENQUEUE] - sum.count[PQCNT_DEQUEUE];
        case PKTQ_DROPS:
                return sum.count[PQCNT_DROP];
        case PKTQ_MAXLEN:
                return pq->pq_maxlen;
        }
        return 0;
}

uint32_t
pktq_rps_hash(pktq_rps_hash_func_t *funcp, const struct mbuf *m)
{
        pktq_rps_hash_func_t func = atomic_load_relaxed(funcp);

        KASSERT(func != NULL);

        return (*func)(m);
}

static uint32_t
pktq_rps_hash_zero(const struct mbuf *m __unused)
{

        return 0;
}

static uint32_t
pktq_rps_hash_curcpu(const struct mbuf *m __unused)
{

        return cpu_index(curcpu());
}

static uint32_t
pktq_rps_hash_toeplitz(const struct mbuf *m)
{
        struct ip *ip;
        /*
         * Disable UDP port - IP fragments aren't currently being handled
         * and so we end up with a mix of 2-tuple and 4-tuple
         * traffic.
         */
        const u_int flag = RSS_TOEPLITZ_USE_TCP_PORT;

        /* glance IP version */
        if ((m->m_flags & M_PKTHDR) == 0)
                return 0;

        ip = mtod(m, struct ip *);
        if (ip->ip_v == IPVERSION) {
                if (__predict_false(m->m_len < sizeof(struct ip)))
                        return 0;
                return rss_toeplitz_hash_from_mbuf_ipv4(m, flag);
        } else if (ip->ip_v == 6) {
                if (__predict_false(m->m_len < sizeof(struct ip6_hdr)))
                        return 0;
                return rss_toeplitz_hash_from_mbuf_ipv6(m, flag);
        }

        return 0;
}

/*
 * toeplitz without curcpu.
 * Generally, this has better performance than toeplitz.
 */
static uint32_t
pktq_rps_hash_toeplitz_othercpus(const struct mbuf *m)
{
        uint32_t hash;

        if (ncpu == 1)
                return 0;

        hash = pktq_rps_hash_toeplitz(m);
        hash %= ncpu - 1;
        if (hash >= cpu_index(curcpu()))
                return hash + 1;
        else
                return hash;
}

static struct pktq_rps_hash_table {
        const char* prh_type;
        pktq_rps_hash_func_t prh_func;
} const pktq_rps_hash_tab[] = {
        { "zero", pktq_rps_hash_zero },
        { "curcpu", pktq_rps_hash_curcpu },
        { "toeplitz", pktq_rps_hash_toeplitz },
        { "toeplitz-othercpus", pktq_rps_hash_toeplitz_othercpus },
};
const pktq_rps_hash_func_t pktq_rps_hash_default =
#ifdef NET_MPSAFE
        pktq_rps_hash_curcpu;
#else
        pktq_rps_hash_zero;
#endif

static const char *
pktq_get_rps_hash_type(pktq_rps_hash_func_t func)
{

        for (int i = 0; i < __arraycount(pktq_rps_hash_tab); i++) {
                if (func == pktq_rps_hash_tab[i].prh_func) {
                        return pktq_rps_hash_tab[i].prh_type;
                }
        }

        return NULL;
}

static int
pktq_set_rps_hash_type(pktq_rps_hash_func_t *func, const char *type)
{

        if (strcmp(type, pktq_get_rps_hash_type(*func)) == 0)
                return 0;

        for (int i = 0; i < __arraycount(pktq_rps_hash_tab); i++) {
                if (strcmp(type, pktq_rps_hash_tab[i].prh_type) == 0) {
                        atomic_store_relaxed(func, pktq_rps_hash_tab[i].prh_func);
                        return 0;
                }
        }

        return ENOENT;
}

int
sysctl_pktq_rps_hash_handler(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        pktq_rps_hash_func_t *func;
        int error;
        char type[PKTQ_RPS_HASH_NAME_LEN];

        node = *rnode;
        func = node.sysctl_data;

        strlcpy(type, pktq_get_rps_hash_type(*func), PKTQ_RPS_HASH_NAME_LEN);

        node.sysctl_data = &type;
        node.sysctl_size = sizeof(type);
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        error = pktq_set_rps_hash_type(func, type);

        return error;
 }

/*
 * pktq_enqueue: inject the packet into the end of the queue.
 *
 * => Must be called from the interrupt or with the preemption disabled.
 * => Consumes the packet and returns true on success.
 * => Returns false on failure; caller is responsible to free the packet.
 */
bool
pktq_enqueue(pktqueue_t *pq, struct mbuf *m, const u_int hash __unused)
{
#if defined(_RUMPKERNEL) || defined(_RUMP_NATIVE_ABI)
        struct cpu_info *ci = curcpu();
#else
        struct cpu_info *ci = cpu_lookup(hash % ncpu);
#endif

        KASSERT(kpreempt_disabled());

        if (__predict_false(!pcq_put(pktq_pcq(pq, ci), m))) {
                pktq_inc_count(pq, PQCNT_DROP);
                return false;
        }
        softint_schedule_cpu(pq->pq_sih, ci);
        pktq_inc_count(pq, PQCNT_ENQUEUE);
        return true;
}

/*
 * pktq_dequeue: take a packet from the queue.
 *
 * => Must be called with preemption disabled.
 * => Must ensure there are not concurrent dequeue calls.
 */
struct mbuf *
pktq_dequeue(pktqueue_t *pq)
{
        struct cpu_info *ci = curcpu();
        struct mbuf *m;

        KASSERT(kpreempt_disabled());

        m = pcq_get(pktq_pcq(pq, ci));
        if (__predict_false(m == PKTQ_MARKER)) {
                /* Note the marker entry. */
                atomic_inc_uint(&pq->pq_barrier);
                return NULL;
        }
        if (__predict_true(m != NULL)) {
                pktq_inc_count(pq, PQCNT_DEQUEUE);
        }
        return m;
}

/*
 * pktq_barrier: waits for a grace period when all packets enqueued at
 * the moment of calling this routine will be processed.  This is used
 * to ensure that e.g. packets referencing some interface were drained.
 */
void
pktq_barrier(pktqueue_t *pq)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;
        u_int pending = 0;

        mutex_enter(&pq->pq_lock);
        KASSERT(pq->pq_barrier == 0);

        for (CPU_INFO_FOREACH(cii, ci)) {
                struct pcq *q;

                kpreempt_disable();
                q = pktq_pcq(pq, ci);
                kpreempt_enable();

                /* If the queue is empty - nothing to do. */
                if (pcq_peek(q) == NULL) {
                        continue;
                }
                /* Otherwise, put the marker and entry. */
                while (!pcq_put(q, PKTQ_MARKER)) {
                        kpause("pktqsync", false, 1, NULL);
                }
                kpreempt_disable();
                softint_schedule_cpu(pq->pq_sih, ci);
                kpreempt_enable();
                pending++;
        }

        /* Wait for each queue to process the markers. */
        while (pq->pq_barrier != pending) {
                kpause("pktqsync", false, 1, NULL);
        }
        pq->pq_barrier = 0;
        mutex_exit(&pq->pq_lock);
}

/*
 * pktq_flush: free mbufs in all queues.
 *
 * => The caller must ensure there are no concurrent writers or flush calls.
 */
void
pktq_flush(pktqueue_t *pq)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;
        struct mbuf *m;

        for (CPU_INFO_FOREACH(cii, ci)) {
                struct pcq *q;

                kpreempt_disable();
                q = pktq_pcq(pq, ci);
                kpreempt_enable();

                /*
                 * XXX This can't be right -- if the softint is running
                 * then pcq_get isn't safe here.
                 */
                while ((m = pcq_get(q)) != NULL) {
                        pktq_inc_count(pq, PQCNT_DEQUEUE);
                        m_freem(m);
                }
        }
}

static void
pktq_set_maxlen_cpu(void *vpq, void *vqs)
{
        struct pktqueue *pq = vpq;
        struct pcq **qp, *q, **qs = vqs;
        unsigned i = cpu_index(curcpu());
        int s;

        s = splnet();
        qp = percpu_getref(pq->pq_pcq);
        q = *qp;
        *qp = qs[i];
        qs[i] = q;
        percpu_putref(pq->pq_pcq);
        splx(s);
}

/*
 * pktq_set_maxlen: create per-CPU queues using a new size and replace
 * the existing queues without losing any packets.
 *
 * XXX ncpu must remain stable throughout.
 */
int
pktq_set_maxlen(pktqueue_t *pq, size_t maxlen)
{
        const u_int slotbytes = ncpu * sizeof(pcq_t *);
        pcq_t **qs;

        if (!maxlen || maxlen > PCQ_MAXLEN)
                return EINVAL;
        if (pq->pq_maxlen == maxlen)
                return 0;

        /* First, allocate the new queues. */
        qs = kmem_zalloc(slotbytes, KM_SLEEP);
        for (u_int i = 0; i < ncpu; i++) {
                qs[i] = pcq_create(maxlen, KM_SLEEP);
        }

        /*
         * Issue an xcall to replace the queue pointers on each CPU.
         * This implies all the necessary memory barriers.
         */
        mutex_enter(&pq->pq_lock);
        xc_wait(xc_broadcast(XC_HIGHPRI, pktq_set_maxlen_cpu, pq, qs));
        pq->pq_maxlen = maxlen;
        mutex_exit(&pq->pq_lock);

        /*
         * At this point, the new packets are flowing into the new
         * queues.  However, the old queues may have some packets
         * present which are no longer being processed.  We are going
         * to re-enqueue them.  This may change the order of packet
         * arrival, but it is not considered an issue.
         *
         * There may be in-flight interrupts calling pktq_dequeue()
         * which reference the old queues.  Issue a barrier to ensure
         * that we are going to be the only pcq_get() callers on the
         * old queues.
         */
        pktq_barrier(pq);

        for (u_int i = 0; i < ncpu; i++) {
                struct pcq *q;
                struct mbuf *m;

                kpreempt_disable();
                q = pktq_pcq(pq, cpu_lookup(i));
                kpreempt_enable();

                while ((m = pcq_get(qs[i])) != NULL) {
                        while (!pcq_put(q, m)) {
                                kpause("pktqrenq", false, 1, NULL);
                        }
                }
                pcq_destroy(qs[i]);
        }

        /* Well, that was fun. */
        kmem_free(qs, slotbytes);
        return 0;
}

int
sysctl_pktq_maxlen(SYSCTLFN_ARGS, pktqueue_t *pq)
{
        u_int nmaxlen = pktq_get_count(pq, PKTQ_MAXLEN);
        struct sysctlnode node = *rnode;
        int error;

        node.sysctl_data = &nmaxlen;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;
        return pktq_set_maxlen(pq, nmaxlen);
}

int
sysctl_pktq_count(SYSCTLFN_ARGS, pktqueue_t *pq, u_int count_id)
{
        uint64_t count = pktq_get_count(pq, count_id);
        struct sysctlnode node = *rnode;

        node.sysctl_data = &count;
        return sysctl_lookup(SYSCTLFN_CALL(&node));
}












































 4065 





































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/*        $NetBSD: lock.h,v 1.29 2022/02/12 17:17:54 riastradh Exp $        */

/*-
 * Copyright (c) 2000, 2006 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe and Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Machine-dependent spin lock operations.
 */

#ifndef _X86_LOCK_H_
#define        _X86_LOCK_H_

#include <sys/param.h>

static __inline int
__SIMPLELOCK_LOCKED_P(const __cpu_simple_lock_t *__ptr)
{
        return *__ptr == __SIMPLELOCK_LOCKED;
}

static __inline int
__SIMPLELOCK_UNLOCKED_P(const __cpu_simple_lock_t *__ptr)
{
        return *__ptr == __SIMPLELOCK_UNLOCKED;
}

static __inline void
__cpu_simple_lock_set(__cpu_simple_lock_t *__ptr)
{

        *__ptr = __SIMPLELOCK_LOCKED;
}

static __inline void
__cpu_simple_lock_clear(__cpu_simple_lock_t *__ptr)
{

        *__ptr = __SIMPLELOCK_UNLOCKED;
}

#ifdef _HARDKERNEL
# include <machine/cpufunc.h>
# define SPINLOCK_SPIN_HOOK        /* nothing */
# ifdef SPINLOCK_BACKOFF_HOOK
#  undef SPINLOCK_BACKOFF_HOOK
# endif
# define SPINLOCK_BACKOFF_HOOK        x86_pause()
# define SPINLOCK_INLINE
#else /* !_HARDKERNEL */
# define SPINLOCK_BODY
# define SPINLOCK_INLINE static __inline __unused
#endif /* _HARDKERNEL */

SPINLOCK_INLINE void        __cpu_simple_lock_init(__cpu_simple_lock_t *);
SPINLOCK_INLINE void        __cpu_simple_lock(__cpu_simple_lock_t *);
SPINLOCK_INLINE int        __cpu_simple_lock_try(__cpu_simple_lock_t *);
SPINLOCK_INLINE void        __cpu_simple_unlock(__cpu_simple_lock_t *);

#ifdef SPINLOCK_BODY
SPINLOCK_INLINE void
__cpu_simple_lock_init(__cpu_simple_lock_t *lockp)
{

        *lockp = __SIMPLELOCK_UNLOCKED;
}

SPINLOCK_INLINE int
__cpu_simple_lock_try(__cpu_simple_lock_t *lockp)
{
        uint8_t val;

        val = __SIMPLELOCK_LOCKED;
        __asm volatile ("xchgb %0,(%2)" : 
            "=qQ" (val)
            :"0" (val), "r" (lockp));
        __insn_barrier();
        return val == __SIMPLELOCK_UNLOCKED;
}

SPINLOCK_INLINE void
__cpu_simple_lock(__cpu_simple_lock_t *lockp)
{

        while (!__cpu_simple_lock_try(lockp))
                /* nothing */;
        __insn_barrier();
}

/*
 * Note on x86 memory ordering
 *
 * When releasing a lock we must ensure that no stores or loads from within
 * the critical section are re-ordered by the CPU to occur outside of it:
 * they must have completed and be visible to other processors once the lock
 * has been released.
 *
 * NetBSD usually runs with the kernel mapped (via MTRR) in a WB (write
 * back) memory region.  In that case, memory ordering on x86 platforms
 * looks like this:
 *
 * i386                All loads/stores occur in instruction sequence.
 *
 * i486                All loads/stores occur in instruction sequence.  In
 * Pentium        exceptional circumstances, loads can be re-ordered around
 *                stores, but for the purposes of releasing a lock it does
 *                not matter.  Stores may not be immediately visible to other
 *                processors as they can be buffered.  However, since the
 *                stores are buffered in order the lock release will always be
 *                the last operation in the critical section that becomes
 *                visible to other CPUs.
 *
 * Pentium Pro        The "Intel 64 and IA-32 Architectures Software Developer's
 * onwards        Manual" volume 3A (order number 248966) says that (1) "Reads
 *                can be carried out speculatively and in any order" and (2)
 *                "Reads can pass buffered stores, but the processor is
 *                self-consistent.".  This would be a problem for the below,
 *                and would mandate a locked instruction cycle or load fence
 *                before releasing the simple lock.
 *
 *                The "Intel Pentium 4 Processor Optimization" guide (order
 *                number 253668-022US) says: "Loads can be moved before stores
 *                that occurred earlier in the program if they are not
 *                predicted to load from the same linear address.".  This is
 *                not a problem since the only loads that can be re-ordered
 *                take place once the lock has been released via a store.
 *
 *                The above two documents seem to contradict each other,
 *                however with the exception of early steppings of the Pentium
 *                Pro, the second document is closer to the truth: a store
 *                will always act as a load fence for all loads that precede
 *                the store in instruction order.
 *
 *                Again, note that stores can be buffered and will not always
 *                become immediately visible to other CPUs: they are however
 *                buffered in order.
 *
 * AMD64        Stores occur in order and are buffered.  Loads can be
 *                reordered, however stores act as load fences, meaning that
 *                loads can not be reordered around stores.
 */
SPINLOCK_INLINE void
__cpu_simple_unlock(__cpu_simple_lock_t *lockp)
{

        __insn_barrier();
        *lockp = __SIMPLELOCK_UNLOCKED;
}

#endif        /* SPINLOCK_BODY */

#endif /* _X86_LOCK_H_ */























































































































































































































































































































































   37 

   37 

   37 





   37 





   37 




   37 





   38 







   37 





   38 

   38 


   38 







   38 







   38 
   38 
   38 





   38 














   38 


   38 





   38 








   38 









   33 


   33 

   32 
   32 
   33 
   33 


   38 







   38 


   38 




   37 
   38 
   38 





   38 


   38 









   37 

   37 







   38 















   38 

   38 


















   37 








   38 




   38 














   38 





   38 





















   38 


















   38 




   38 














   38 


   38 









   38 
   38 





   38 







   38 


   38 






   38 
   38 


   38 



























   38 

























































    2 

    2 







































   38 








   38 

   37 





































































































































































































   38 

   38 








   38 









































   38 






   38 
   38 

   38 








  184 











  184 
  184 

  184 






  184 
  184 











  184 


  184 


   48 


   48 




   48 

   48 






  184 



  184 








  184 
  184 

  184 
  184 



  184 
  184 






  184 
  166 
  166 
  166 





  166 




  166 

  166 




   48 


   48 


  184 
  166 



   47 


  184 
  184 











  184 
  184 
  184 
  184 

  184 
  184 


  184 



   48 

  166 
  166 








  184 














   38 








   38 








   38 




   38 






   38 




   38 















   38 

   38 






















































































    3 
    3 



    3 

    3 











    3 












    3 

    3 

    3 

    3 





    3 


    3 





    3 
    3 


    3 











    3 






































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
/*        $NetBSD: usbdi.c,v 1.243 2022/08/20 11:32:20 riastradh Exp $        */

/*
 * Copyright (c) 1998, 2012, 2015 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology, Matthew R. Green (mrg@eterna.com.au),
 * and Nick Hudson.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: usbdi.c,v 1.243 2022/08/20 11:32:20 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#include "opt_compat_netbsd.h"
#include "usb_dma.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>
#include <sys/kmem.h>
#include <sys/proc.h>
#include <sys/bus.h>
#include <sys/cpu.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usb_mem.h>
#include <dev/usb/usb_quirks.h>
#include <dev/usb/usb_sdt.h>
#include <dev/usb/usbhist.h>

/* UTF-8 encoding stuff */
#include <fs/unicode.h>

SDT_PROBE_DEFINE5(usb, device, pipe, open,
    "struct usbd_interface *"/*iface*/,
    "uint8_t"/*address*/,
    "uint8_t"/*flags*/,
    "int"/*ival*/,
    "struct usbd_pipe *"/*pipe*/);

SDT_PROBE_DEFINE7(usb, device, pipe, open__intr,
    "struct usbd_interface *"/*iface*/,
    "uint8_t"/*address*/,
    "uint8_t"/*flags*/,
    "int"/*ival*/,
    "usbd_callback"/*cb*/,
    "void *"/*cookie*/,
    "struct usbd_pipe *"/*pipe*/);

SDT_PROBE_DEFINE2(usb, device, pipe, transfer__start,
    "struct usbd_pipe *"/*pipe*/,
    "struct usbd_xfer *"/*xfer*/);
SDT_PROBE_DEFINE3(usb, device, pipe, transfer__done,
    "struct usbd_pipe *"/*pipe*/,
    "struct usbd_xfer *"/*xfer*/,
    "usbd_status"/*err*/);
SDT_PROBE_DEFINE2(usb, device, pipe, start,
    "struct usbd_pipe *"/*pipe*/,
    "struct usbd_xfer *"/*xfer*/);

SDT_PROBE_DEFINE1(usb, device, pipe, close,  "struct usbd_pipe *"/*pipe*/);
SDT_PROBE_DEFINE1(usb, device, pipe, abort__start,
    "struct usbd_pipe *"/*pipe*/);
SDT_PROBE_DEFINE1(usb, device, pipe, abort__done,
    "struct usbd_pipe *"/*pipe*/);
SDT_PROBE_DEFINE1(usb, device, pipe, clear__endpoint__stall,
    "struct usbd_pipe *"/*pipe*/);
SDT_PROBE_DEFINE1(usb, device, pipe, clear__endpoint__toggle,
    "struct usbd_pipe *"/*pipe*/);

SDT_PROBE_DEFINE5(usb, device, xfer, create,
    "struct usbd_xfer *"/*xfer*/,
    "struct usbd_pipe *"/*pipe*/,
    "size_t"/*len*/,
    "unsigned int"/*flags*/,
    "unsigned int"/*nframes*/);
SDT_PROBE_DEFINE1(usb, device, xfer, start,  "struct usbd_xfer *"/*xfer*/);
SDT_PROBE_DEFINE1(usb, device, xfer, preabort,  "struct usbd_xfer *"/*xfer*/);
SDT_PROBE_DEFINE1(usb, device, xfer, abort,  "struct usbd_xfer *"/*xfer*/);
SDT_PROBE_DEFINE1(usb, device, xfer, timeout,  "struct usbd_xfer *"/*xfer*/);
SDT_PROBE_DEFINE2(usb, device, xfer, done,
    "struct usbd_xfer *"/*xfer*/,
    "usbd_status"/*status*/);
SDT_PROBE_DEFINE1(usb, device, xfer, destroy,  "struct usbd_xfer *"/*xfer*/);

SDT_PROBE_DEFINE5(usb, device, request, start,
    "struct usbd_device *"/*dev*/,
    "usb_device_request_t *"/*req*/,
    "size_t"/*len*/,
    "int"/*flags*/,
    "uint32_t"/*timeout*/);

SDT_PROBE_DEFINE7(usb, device, request, done,
    "struct usbd_device *"/*dev*/,
    "usb_device_request_t *"/*req*/,
    "size_t"/*actlen*/,
    "int"/*flags*/,
    "uint32_t"/*timeout*/,
    "void *"/*data*/,
    "usbd_status"/*status*/);

Static void usbd_ar_pipe(struct usbd_pipe *);
Static void usbd_start_next(struct usbd_pipe *);
Static usbd_status usbd_open_pipe_ival
        (struct usbd_interface *, uint8_t, uint8_t, struct usbd_pipe **, int);
static void *usbd_alloc_buffer(struct usbd_xfer *, uint32_t);
static void usbd_free_buffer(struct usbd_xfer *);
static struct usbd_xfer *usbd_alloc_xfer(struct usbd_device *, unsigned int);
static void usbd_free_xfer(struct usbd_xfer *);
static void usbd_xfer_timeout(void *);
static void usbd_xfer_timeout_task(void *);
static bool usbd_xfer_probe_timeout(struct usbd_xfer *);
static void usbd_xfer_cancel_timeout_async(struct usbd_xfer *);

#if defined(USB_DEBUG)
void
usbd_dump_iface(struct usbd_interface *iface)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "iface %#jx", (uintptr_t)iface, 0, 0, 0);

        if (iface == NULL)
                return;
        USBHIST_LOG(usbdebug, "     device = %#jx idesc = %#jx index = %jd",
            (uintptr_t)iface->ui_dev, (uintptr_t)iface->ui_idesc,
            iface->ui_index, 0);
        USBHIST_LOG(usbdebug, "     altindex=%jd",
            iface->ui_altindex, 0, 0, 0);
}

void
usbd_dump_device(struct usbd_device *dev)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev = %#jx", (uintptr_t)dev, 0, 0, 0);

        if (dev == NULL)
                return;
        USBHIST_LOG(usbdebug, "     bus = %#jx default_pipe = %#jx",
            (uintptr_t)dev->ud_bus, (uintptr_t)dev->ud_pipe0, 0, 0);
        USBHIST_LOG(usbdebug, "     address = %jd config = %jd depth = %jd ",
            dev->ud_addr, dev->ud_config, dev->ud_depth, 0);
        USBHIST_LOG(usbdebug, "     speed = %jd self_powered = %jd "
            "power = %jd langid = %jd",
            dev->ud_speed, dev->ud_selfpowered, dev->ud_power, dev->ud_langid);
}

void
usbd_dump_endpoint(struct usbd_endpoint *endp)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "endp = %#jx", (uintptr_t)endp, 0, 0, 0);

        if (endp == NULL)
                return;
        USBHIST_LOG(usbdebug, "    edesc = %#jx refcnt = %jd",
            (uintptr_t)endp->ue_edesc, endp->ue_refcnt, 0, 0);
        if (endp->ue_edesc)
                USBHIST_LOG(usbdebug, "     bEndpointAddress=0x%02jx",
                    endp->ue_edesc->bEndpointAddress, 0, 0, 0);
}

void
usbd_dump_queue(struct usbd_pipe *pipe)
{
        struct usbd_xfer *xfer;

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "pipe = %#jx", (uintptr_t)pipe, 0, 0, 0);

        SIMPLEQ_FOREACH(xfer, &pipe->up_queue, ux_next) {
                USBHIST_LOG(usbdebug, "     xfer = %#jx", (uintptr_t)xfer,
                    0, 0, 0);
        }
}

void
usbd_dump_pipe(struct usbd_pipe *pipe)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "pipe = %#jx", (uintptr_t)pipe, 0, 0, 0);

        if (pipe == NULL)
                return;
        usbd_dump_iface(pipe->up_iface);
        usbd_dump_device(pipe->up_dev);
        usbd_dump_endpoint(pipe->up_endpoint);
        USBHIST_LOG(usbdebug, "(usbd_dump_pipe)", 0, 0, 0, 0);
        USBHIST_LOG(usbdebug, "     running = %jd aborting = %jd",
            pipe->up_running, pipe->up_aborting, 0, 0);
        USBHIST_LOG(usbdebug, "     intrxfer = %#jx, repeat = %jd, "
            "interval = %jd", (uintptr_t)pipe->up_intrxfer, pipe->up_repeat,
            pipe->up_interval, 0);
}
#endif

usbd_status
usbd_open_pipe(struct usbd_interface *iface, uint8_t address,
               uint8_t flags, struct usbd_pipe **pipe)
{
        return (usbd_open_pipe_ival(iface, address, flags, pipe,
                                    USBD_DEFAULT_INTERVAL));
}

usbd_status
usbd_open_pipe_ival(struct usbd_interface *iface, uint8_t address,
                    uint8_t flags, struct usbd_pipe **pipe, int ival)
{
        struct usbd_pipe *p = NULL;
        struct usbd_endpoint *ep = NULL /* XXXGCC */;
        bool piperef = false;
        usbd_status err;
        int i;

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "iface = %#jx address = %#jx flags = %#jx",
            (uintptr_t)iface, address, flags, 0);

        /*
         * Block usbd_set_interface so we have a snapshot of the
         * interface endpoints.  They will remain stable until we drop
         * the reference in usbd_close_pipe (or on failure here).
         */
        err = usbd_iface_piperef(iface);
        if (err)
                goto out;
        piperef = true;

        /* Find the endpoint at this address.  */
        for (i = 0; i < iface->ui_idesc->bNumEndpoints; i++) {
                ep = &iface->ui_endpoints[i];
                if (ep->ue_edesc == NULL) {
                        err = USBD_IOERROR;
                        goto out;
                }
                if (ep->ue_edesc->bEndpointAddress == address)
                        break;
        }
        if (i == iface->ui_idesc->bNumEndpoints) {
                err = USBD_BAD_ADDRESS;
                goto out;
        }

        /* Set up the pipe with this endpoint.  */
        err = usbd_setup_pipe_flags(iface->ui_dev, iface, ep, ival, &p, flags);
        if (err)
                goto out;

        /* Success! */
        *pipe = p;
        p = NULL;                /* handed off to caller */
        piperef = false;        /* handed off to pipe */
        SDT_PROBE5(usb, device, pipe, open,
            iface, address, flags, ival, p);
        err = USBD_NORMAL_COMPLETION;

out:        if (p)
                usbd_close_pipe(p);
        if (piperef)
                usbd_iface_pipeunref(iface);
        return err;
}

usbd_status
usbd_open_pipe_intr(struct usbd_interface *iface, uint8_t address,
                    uint8_t flags, struct usbd_pipe **pipe,
                    void *priv, void *buffer, uint32_t len,
                    usbd_callback cb, int ival)
{
        usbd_status err;
        struct usbd_xfer *xfer;
        struct usbd_pipe *ipipe;

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "address = %#jx flags = %#jx len = %jd",
            address, flags, len, 0);

        err = usbd_open_pipe_ival(iface, address,
                                  USBD_EXCLUSIVE_USE | (flags & USBD_MPSAFE),
                                  &ipipe, ival);
        if (err)
                return err;
        err = usbd_create_xfer(ipipe, len, flags, 0, &xfer);
        if (err)
                goto bad1;

        usbd_setup_xfer(xfer, priv, buffer, len, flags, USBD_NO_TIMEOUT, cb);
        ipipe->up_intrxfer = xfer;
        ipipe->up_repeat = 1;
        err = usbd_transfer(xfer);
        *pipe = ipipe;
        if (err != USBD_IN_PROGRESS)
                goto bad3;
        SDT_PROBE7(usb, device, pipe, open__intr,
            iface, address, flags, ival, cb, priv, ipipe);
        return USBD_NORMAL_COMPLETION;

 bad3:
        ipipe->up_intrxfer = NULL;
        ipipe->up_repeat = 0;

        usbd_destroy_xfer(xfer);
 bad1:
        usbd_close_pipe(ipipe);
        return err;
}

void
usbd_close_pipe(struct usbd_pipe *pipe)
{
        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);

        KASSERT(pipe != NULL);

        usbd_lock_pipe(pipe);
        SDT_PROBE1(usb, device, pipe, close,  pipe);
        if (!SIMPLEQ_EMPTY(&pipe->up_queue)) {
                printf("WARNING: pipe closed with active xfers on addr %d\n",
                    pipe->up_dev->ud_addr);
                usbd_ar_pipe(pipe);
        }
        KASSERT(SIMPLEQ_EMPTY(&pipe->up_queue));
        pipe->up_methods->upm_close(pipe);
        usbd_unlock_pipe(pipe);

        cv_destroy(&pipe->up_callingcv);
        if (pipe->up_intrxfer)
                usbd_destroy_xfer(pipe->up_intrxfer);
        usb_rem_task_wait(pipe->up_dev, &pipe->up_async_task, USB_TASKQ_DRIVER,
            NULL);
        usbd_endpoint_release(pipe->up_dev, pipe->up_endpoint);
        if (pipe->up_iface)
                usbd_iface_pipeunref(pipe->up_iface);
        kmem_free(pipe, pipe->up_dev->ud_bus->ub_pipesize);
}

usbd_status
usbd_transfer(struct usbd_xfer *xfer)
{
        struct usbd_pipe *pipe = xfer->ux_pipe;
        usbd_status err;
        unsigned int size, flags;

        USBHIST_FUNC(); USBHIST_CALLARGS(usbdebug,
            "xfer = %#jx, flags = %#jx, pipe = %#jx, running = %jd",
            (uintptr_t)xfer, xfer->ux_flags, (uintptr_t)pipe, pipe->up_running);
        KASSERT(xfer->ux_status == USBD_NOT_STARTED);
        SDT_PROBE1(usb, device, xfer, start,  xfer);

#ifdef USB_DEBUG
        if (usbdebug > 5)
                usbd_dump_queue(pipe);
#endif
        xfer->ux_done = 0;

        KASSERT(xfer->ux_length == 0 || xfer->ux_buf != NULL);

        size = xfer->ux_length;
        flags = xfer->ux_flags;

        if (size != 0) {
                /*
                 * Use the xfer buffer if none specified in transfer setup.
                 * isoc transfers always use the xfer buffer, i.e.
                 * ux_buffer is always NULL for isoc.
                 */
                if (xfer->ux_buffer == NULL) {
                        xfer->ux_buffer = xfer->ux_buf;
                }

                /*
                 * If not using the xfer buffer copy data to the
                 * xfer buffer for OUT transfers of >0 length
                 */
                if (xfer->ux_buffer != xfer->ux_buf) {
                        KASSERT(xfer->ux_buf);
                        if (!usbd_xfer_isread(xfer)) {
                                memcpy(xfer->ux_buf, xfer->ux_buffer, size);
                        }
                }
        }

        usbd_lock_pipe(pipe);
        if (pipe->up_aborting) {
                /*
                 * XXX For synchronous transfers this is fine.  What to
                 * do for asynchronous transfers?  The callback is
                 * never run, not even with status USBD_CANCELLED.
                 */
                usbd_unlock_pipe(pipe);
                USBHIST_LOG(usbdebug, "<- done xfer %#jx, aborting",
                    (uintptr_t)xfer, 0, 0, 0);
                SDT_PROBE2(usb, device, xfer, done,  xfer, USBD_CANCELLED);
                return USBD_CANCELLED;
        }

        /* xfer is not valid after the transfer method unless synchronous */
        SDT_PROBE2(usb, device, pipe, transfer__start,  pipe, xfer);
        do {
#ifdef DIAGNOSTIC
                xfer->ux_state = XFER_ONQU;
#endif
                SIMPLEQ_INSERT_TAIL(&pipe->up_queue, xfer, ux_next);
                if (pipe->up_running && pipe->up_serialise) {
                        err = USBD_IN_PROGRESS;
                } else {
                        pipe->up_running = 1;
                        err = USBD_NORMAL_COMPLETION;
                }
                if (err)
                        break;
                err = pipe->up_methods->upm_transfer(xfer);
        } while (0);
        SDT_PROBE3(usb, device, pipe, transfer__done,  pipe, xfer, err);

        usbd_unlock_pipe(pipe);

        if (err != USBD_IN_PROGRESS && err) {
                /*
                 * The transfer made it onto the pipe queue, but didn't get
                 * accepted by the HCD for some reason.  It needs removing
                 * from the pipe queue.
                 */
                USBHIST_LOG(usbdebug, "xfer failed: %jd, reinserting",
                    err, 0, 0, 0);
                usbd_lock_pipe(pipe);
                SDT_PROBE1(usb, device, xfer, preabort,  xfer);
#ifdef DIAGNOSTIC
                xfer->ux_state = XFER_BUSY;
#endif
                SIMPLEQ_REMOVE_HEAD(&pipe->up_queue, ux_next);
                if (pipe->up_serialise)
                        usbd_start_next(pipe);
                usbd_unlock_pipe(pipe);
        }

        if (!(flags & USBD_SYNCHRONOUS)) {
                USBHIST_LOG(usbdebug, "<- done xfer %#jx, not sync (err %jd)",
                    (uintptr_t)xfer, err, 0, 0);
                KASSERTMSG(err != USBD_NORMAL_COMPLETION,
                    "asynchronous xfer %p completed synchronously", xfer);
                return err;
        }

        if (err != USBD_IN_PROGRESS) {
                USBHIST_LOG(usbdebug, "<- done xfer %#jx, sync (err %jd)",
                    (uintptr_t)xfer, err, 0, 0);
                SDT_PROBE2(usb, device, xfer, done,  xfer, err);
                return err;
        }

        /* Sync transfer, wait for completion. */
        usbd_lock_pipe(pipe);
        while (!xfer->ux_done) {
                if (pipe->up_dev->ud_bus->ub_usepolling)
                        panic("usbd_transfer: not done");
                USBHIST_LOG(usbdebug, "<- sleeping on xfer %#jx",
                    (uintptr_t)xfer, 0, 0, 0);

                err = 0;
                if ((flags & USBD_SYNCHRONOUS_SIG) != 0) {
                        err = cv_wait_sig(&xfer->ux_cv, pipe->up_dev->ud_bus->ub_lock);
                } else {
                        cv_wait(&xfer->ux_cv, pipe->up_dev->ud_bus->ub_lock);
                }
                if (err) {
                        if (!xfer->ux_done) {
                                SDT_PROBE1(usb, device, xfer, abort,  xfer);
                                pipe->up_methods->upm_abort(xfer);
                        }
                        break;
                }
        }
        SDT_PROBE2(usb, device, xfer, done,  xfer, xfer->ux_status);
        /* XXX Race to read xfer->ux_status?  */
        usbd_unlock_pipe(pipe);
        return xfer->ux_status;
}

/* Like usbd_transfer(), but waits for completion. */
usbd_status
usbd_sync_transfer(struct usbd_xfer *xfer)
{
        xfer->ux_flags |= USBD_SYNCHRONOUS;
        return usbd_transfer(xfer);
}

/* Like usbd_transfer(), but waits for completion and listens for signals. */
usbd_status
usbd_sync_transfer_sig(struct usbd_xfer *xfer)
{
        xfer->ux_flags |= USBD_SYNCHRONOUS | USBD_SYNCHRONOUS_SIG;
        return usbd_transfer(xfer);
}

static void *
usbd_alloc_buffer(struct usbd_xfer *xfer, uint32_t size)
{
        KASSERT(xfer->ux_buf == NULL);
        KASSERT(size != 0);

        xfer->ux_bufsize = 0;
#if NUSB_DMA > 0
        struct usbd_bus *bus = xfer->ux_bus;

        if (bus->ub_usedma) {
                usb_dma_t *dmap = &xfer->ux_dmabuf;

                KASSERT((bus->ub_dmaflags & USBMALLOC_COHERENT) == 0);
                int err = usb_allocmem(bus->ub_dmatag, size, 0, bus->ub_dmaflags, dmap);
                if (err) {
                        return NULL;
                }
                xfer->ux_buf = KERNADDR(&xfer->ux_dmabuf, 0);
                xfer->ux_bufsize = size;

                return xfer->ux_buf;
        }
#endif
        KASSERT(xfer->ux_bus->ub_usedma == false);
        xfer->ux_buf = kmem_alloc(size, KM_SLEEP);
        xfer->ux_bufsize = size;
        return xfer->ux_buf;
}

static void
usbd_free_buffer(struct usbd_xfer *xfer)
{
        KASSERT(xfer->ux_buf != NULL);
        KASSERT(xfer->ux_bufsize != 0);

        void *buf = xfer->ux_buf;
        uint32_t size = xfer->ux_bufsize;

        xfer->ux_buf = NULL;
        xfer->ux_bufsize = 0;

#if NUSB_DMA > 0
        struct usbd_bus *bus = xfer->ux_bus;

        if (bus->ub_usedma) {
                usb_dma_t *dmap = &xfer->ux_dmabuf;

                usb_freemem(dmap);
                return;
        }
#endif
        KASSERT(xfer->ux_bus->ub_usedma == false);

        kmem_free(buf, size);
}

void *
usbd_get_buffer(struct usbd_xfer *xfer)
{
        return xfer->ux_buf;
}

struct usbd_pipe *
usbd_get_pipe0(struct usbd_device *dev)
{

        return dev->ud_pipe0;
}

static struct usbd_xfer *
usbd_alloc_xfer(struct usbd_device *dev, unsigned int nframes)
{
        struct usbd_xfer *xfer;

        USBHIST_FUNC();

        ASSERT_SLEEPABLE();

        xfer = dev->ud_bus->ub_methods->ubm_allocx(dev->ud_bus, nframes);
        if (xfer == NULL)
                goto out;
        xfer->ux_bus = dev->ud_bus;
        callout_init(&xfer->ux_callout, CALLOUT_MPSAFE);
        callout_setfunc(&xfer->ux_callout, usbd_xfer_timeout, xfer);
        cv_init(&xfer->ux_cv, "usbxfer");
        usb_init_task(&xfer->ux_aborttask, usbd_xfer_timeout_task, xfer,
            USB_TASKQ_MPSAFE);

out:
        USBHIST_CALLARGS(usbdebug, "returns %#jx", (uintptr_t)xfer, 0, 0, 0);

        return xfer;
}

static void
usbd_free_xfer(struct usbd_xfer *xfer)
{
        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "%#jx", (uintptr_t)xfer, 0, 0, 0);

        if (xfer->ux_buf) {
                usbd_free_buffer(xfer);
        }

        /* Wait for any straggling timeout to complete. */
        mutex_enter(xfer->ux_bus->ub_lock);
        xfer->ux_timeout_reset = false; /* do not resuscitate */
        callout_halt(&xfer->ux_callout, xfer->ux_bus->ub_lock);
        usb_rem_task_wait(xfer->ux_pipe->up_dev, &xfer->ux_aborttask,
            USB_TASKQ_HC, xfer->ux_bus->ub_lock);
        mutex_exit(xfer->ux_bus->ub_lock);

        cv_destroy(&xfer->ux_cv);
        xfer->ux_bus->ub_methods->ubm_freex(xfer->ux_bus, xfer);
}

int
usbd_create_xfer(struct usbd_pipe *pipe, size_t len, unsigned int flags,
    unsigned int nframes, struct usbd_xfer **xp)
{
        KASSERT(xp != NULL);
        void *buf = NULL;

        struct usbd_xfer *xfer = usbd_alloc_xfer(pipe->up_dev, nframes);
        if (xfer == NULL)
                return ENOMEM;

        xfer->ux_pipe = pipe;
        xfer->ux_flags = flags;
        xfer->ux_nframes = nframes;
        xfer->ux_methods = pipe->up_methods;

        if (len) {
                buf = usbd_alloc_buffer(xfer, len);
                if (!buf) {
                        usbd_free_xfer(xfer);
                        return ENOMEM;
                }
        }

        if (xfer->ux_methods->upm_init) {
                int err = xfer->ux_methods->upm_init(xfer);
                if (err) {
                        usbd_free_xfer(xfer);
                        return err;
                }
        }

        *xp = xfer;
        SDT_PROBE5(usb, device, xfer, create,
            xfer, pipe, len, flags, nframes);
        return 0;
}

void
usbd_destroy_xfer(struct usbd_xfer *xfer)
{

        SDT_PROBE1(usb, device, xfer, destroy,  xfer);
        if (xfer->ux_methods->upm_fini)
                xfer->ux_methods->upm_fini(xfer);

        usbd_free_xfer(xfer);
}

void
usbd_setup_xfer(struct usbd_xfer *xfer, void *priv, void *buffer,
    uint32_t length, uint16_t flags, uint32_t timeout, usbd_callback callback)
{
        KASSERT(xfer->ux_pipe);

        xfer->ux_priv = priv;
        xfer->ux_buffer = buffer;
        xfer->ux_length = length;
        xfer->ux_actlen = 0;
        xfer->ux_flags = flags;
        xfer->ux_timeout = timeout;
        xfer->ux_status = USBD_NOT_STARTED;
        xfer->ux_callback = callback;
        xfer->ux_rqflags &= ~URQ_REQUEST;
        xfer->ux_nframes = 0;
}

void
usbd_setup_default_xfer(struct usbd_xfer *xfer, struct usbd_device *dev,
    void *priv, uint32_t timeout, usb_device_request_t *req, void *buffer,
    uint32_t length, uint16_t flags, usbd_callback callback)
{
        KASSERT(xfer->ux_pipe == dev->ud_pipe0);

        xfer->ux_priv = priv;
        xfer->ux_buffer = buffer;
        xfer->ux_length = length;
        xfer->ux_actlen = 0;
        xfer->ux_flags = flags;
        xfer->ux_timeout = timeout;
        xfer->ux_status = USBD_NOT_STARTED;
        xfer->ux_callback = callback;
        xfer->ux_request = *req;
        xfer->ux_rqflags |= URQ_REQUEST;
        xfer->ux_nframes = 0;
}

void
usbd_setup_isoc_xfer(struct usbd_xfer *xfer, void *priv, uint16_t *frlengths,
    uint32_t nframes, uint16_t flags, usbd_callback callback)
{
        xfer->ux_priv = priv;
        xfer->ux_buffer = NULL;
        xfer->ux_length = 0;
        xfer->ux_actlen = 0;
        xfer->ux_flags = flags;
        xfer->ux_timeout = USBD_NO_TIMEOUT;
        xfer->ux_status = USBD_NOT_STARTED;
        xfer->ux_callback = callback;
        xfer->ux_rqflags &= ~URQ_REQUEST;
        xfer->ux_frlengths = frlengths;
        xfer->ux_nframes = nframes;

        for (size_t i = 0; i < xfer->ux_nframes; i++)
                xfer->ux_length += xfer->ux_frlengths[i];
}

void
usbd_get_xfer_status(struct usbd_xfer *xfer, void **priv,
                     void **buffer, uint32_t *count, usbd_status *status)
{
        if (priv != NULL)
                *priv = xfer->ux_priv;
        if (buffer != NULL)
                *buffer = xfer->ux_buffer;
        if (count != NULL)
                *count = xfer->ux_actlen;
        if (status != NULL)
                *status = xfer->ux_status;
}

usb_config_descriptor_t *
usbd_get_config_descriptor(struct usbd_device *dev)
{
        KASSERT(dev != NULL);

        return dev->ud_cdesc;
}

usb_interface_descriptor_t *
usbd_get_interface_descriptor(struct usbd_interface *iface)
{
        KASSERT(iface != NULL);

        return iface->ui_idesc;
}

usb_device_descriptor_t *
usbd_get_device_descriptor(struct usbd_device *dev)
{
        KASSERT(dev != NULL);

        return &dev->ud_ddesc;
}

usb_endpoint_descriptor_t *
usbd_interface2endpoint_descriptor(struct usbd_interface *iface, uint8_t index)
{

        if (index >= iface->ui_idesc->bNumEndpoints)
                return NULL;
        return iface->ui_endpoints[index].ue_edesc;
}

/* Some drivers may wish to abort requests on the default pipe, *
 * but there is no mechanism for getting a handle on it.        */
void
usbd_abort_default_pipe(struct usbd_device *device)
{
        usbd_abort_pipe(device->ud_pipe0);
}

void
usbd_abort_pipe(struct usbd_pipe *pipe)
{

        usbd_suspend_pipe(pipe);
        usbd_resume_pipe(pipe);
}

void
usbd_suspend_pipe(struct usbd_pipe *pipe)
{

        usbd_lock_pipe(pipe);
        usbd_ar_pipe(pipe);
        usbd_unlock_pipe(pipe);
}

void
usbd_resume_pipe(struct usbd_pipe *pipe)
{

        usbd_lock_pipe(pipe);
        KASSERT(SIMPLEQ_EMPTY(&pipe->up_queue));
        pipe->up_aborting = 0;
        usbd_unlock_pipe(pipe);
}

usbd_status
usbd_clear_endpoint_stall(struct usbd_pipe *pipe)
{
        struct usbd_device *dev = pipe->up_dev;
        usbd_status err;

        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);
        SDT_PROBE1(usb, device, pipe, clear__endpoint__stall,  pipe);

        /*
         * Clearing en endpoint stall resets the endpoint toggle, so
         * do the same to the HC toggle.
         */
        SDT_PROBE1(usb, device, pipe, clear__endpoint__toggle,  pipe);
        pipe->up_methods->upm_cleartoggle(pipe);

        err = usbd_clear_endpoint_feature(dev,
            pipe->up_endpoint->ue_edesc->bEndpointAddress, UF_ENDPOINT_HALT);
#if 0
XXX should we do this?
        if (!err) {
                pipe->state = USBD_PIPE_ACTIVE;
                /* XXX activate pipe */
        }
#endif
        return err;
}

void
usbd_clear_endpoint_stall_task(void *arg)
{
        struct usbd_pipe *pipe = arg;
        struct usbd_device *dev = pipe->up_dev;

        SDT_PROBE1(usb, device, pipe, clear__endpoint__stall,  pipe);
        SDT_PROBE1(usb, device, pipe, clear__endpoint__toggle,  pipe);
        pipe->up_methods->upm_cleartoggle(pipe);

        (void)usbd_clear_endpoint_feature(dev,
            pipe->up_endpoint->ue_edesc->bEndpointAddress, UF_ENDPOINT_HALT);
}

void
usbd_clear_endpoint_stall_async(struct usbd_pipe *pipe)
{
        usb_add_task(pipe->up_dev, &pipe->up_async_task, USB_TASKQ_DRIVER);
}

void
usbd_clear_endpoint_toggle(struct usbd_pipe *pipe)
{

        SDT_PROBE1(usb, device, pipe, clear__endpoint__toggle,  pipe);
        pipe->up_methods->upm_cleartoggle(pipe);
}

usbd_status
usbd_endpoint_count(struct usbd_interface *iface, uint8_t *count)
{
        KASSERT(iface != NULL);
        KASSERT(iface->ui_idesc != NULL);

        *count = iface->ui_idesc->bNumEndpoints;
        return USBD_NORMAL_COMPLETION;
}

usbd_status
usbd_interface_count(struct usbd_device *dev, uint8_t *count)
{

        if (dev->ud_cdesc == NULL)
                return USBD_NOT_CONFIGURED;
        *count = dev->ud_cdesc->bNumInterface;
        return USBD_NORMAL_COMPLETION;
}

void
usbd_interface2device_handle(struct usbd_interface *iface,
                             struct usbd_device **dev)
{

        *dev = iface->ui_dev;
}

usbd_status
usbd_device2interface_handle(struct usbd_device *dev,
                             uint8_t ifaceno, struct usbd_interface **iface)
{

        if (dev->ud_cdesc == NULL)
                return USBD_NOT_CONFIGURED;
        if (ifaceno >= dev->ud_cdesc->bNumInterface)
                return USBD_INVAL;
        *iface = &dev->ud_ifaces[ifaceno];
        return USBD_NORMAL_COMPLETION;
}

struct usbd_device *
usbd_pipe2device_handle(struct usbd_pipe *pipe)
{
        KASSERT(pipe != NULL);

        return pipe->up_dev;
}

/* XXXX use altno */
usbd_status
usbd_set_interface(struct usbd_interface *iface, int altidx)
{
        bool locked = false;
        usb_device_request_t req;
        usbd_status err;

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "iface %#jx", (uintptr_t)iface, 0, 0, 0);

        err = usbd_iface_lock(iface);
        if (err)
                goto out;
        locked = true;

        err = usbd_fill_iface_data(iface->ui_dev, iface->ui_index, altidx);
        if (err)
                goto out;

        req.bmRequestType = UT_WRITE_INTERFACE;
        req.bRequest = UR_SET_INTERFACE;
        USETW(req.wValue, iface->ui_idesc->bAlternateSetting);
        USETW(req.wIndex, iface->ui_idesc->bInterfaceNumber);
        USETW(req.wLength, 0);
        err = usbd_do_request(iface->ui_dev, &req, 0);

out:        /* XXX back out iface data?  */
        if (locked)
                usbd_iface_unlock(iface);
        return err;
}

int
usbd_get_no_alts(usb_config_descriptor_t *cdesc, int ifaceno)
{
        char *p = (char *)cdesc;
        char *end = p + UGETW(cdesc->wTotalLength);
        usb_descriptor_t *desc;
        usb_interface_descriptor_t *idesc;
        int n;

        for (n = 0; end - p >= sizeof(*desc); p += desc->bLength) {
                desc = (usb_descriptor_t *)p;
                if (desc->bLength < sizeof(*desc) || desc->bLength > end - p)
                        break;
                if (desc->bDescriptorType != UDESC_INTERFACE)
                        continue;
                if (desc->bLength < sizeof(*idesc))
                        break;
                idesc = (usb_interface_descriptor_t *)desc;
                if (idesc->bInterfaceNumber == ifaceno) {
                        n++;
                        if (n == INT_MAX)
                                break;
                }
        }
        return n;
}

int
usbd_get_interface_altindex(struct usbd_interface *iface)
{
        return iface->ui_altindex;
}

usbd_status
usbd_get_interface(struct usbd_interface *iface, uint8_t *aiface)
{
        usb_device_request_t req;

        req.bmRequestType = UT_READ_INTERFACE;
        req.bRequest = UR_GET_INTERFACE;
        USETW(req.wValue, 0);
        USETW(req.wIndex, iface->ui_idesc->bInterfaceNumber);
        USETW(req.wLength, 1);
        return usbd_do_request(iface->ui_dev, &req, aiface);
}

/*** Internal routines ***/

/* Dequeue all pipe operations, called with bus lock held. */
Static void
usbd_ar_pipe(struct usbd_pipe *pipe)
{
        struct usbd_xfer *xfer;

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "pipe = %#jx", (uintptr_t)pipe, 0, 0, 0);
        SDT_PROBE1(usb, device, pipe, abort__start,  pipe);

        ASSERT_SLEEPABLE();
        KASSERT(mutex_owned(pipe->up_dev->ud_bus->ub_lock));

        /*
         * Allow only one thread at a time to abort the pipe, so we
         * don't get confused if upm_abort drops the lock in the middle
         * of the abort to wait for hardware completion softints to
         * stop using the xfer before returning.
         */
        KASSERTMSG(pipe->up_abortlwp == NULL, "pipe->up_abortlwp=%p",
            pipe->up_abortlwp);
        pipe->up_abortlwp = curlwp;

#ifdef USB_DEBUG
        if (usbdebug > 5)
                usbd_dump_queue(pipe);
#endif
        pipe->up_repeat = 0;
        pipe->up_running = 0;
        pipe->up_aborting = 1;
        while ((xfer = SIMPLEQ_FIRST(&pipe->up_queue)) != NULL) {
                USBHIST_LOG(usbdebug, "pipe = %#jx xfer = %#jx "
                    "(methods = %#jx)", (uintptr_t)pipe, (uintptr_t)xfer,
                    (uintptr_t)pipe->up_methods, 0);
                if (xfer->ux_status == USBD_NOT_STARTED) {
                        SDT_PROBE1(usb, device, xfer, preabort,  xfer);
#ifdef DIAGNOSTIC
                        xfer->ux_state = XFER_BUSY;
#endif
                        SIMPLEQ_REMOVE_HEAD(&pipe->up_queue, ux_next);
                } else {
                        /* Make the HC abort it (and invoke the callback). */
                        SDT_PROBE1(usb, device, xfer, abort,  xfer);
                        pipe->up_methods->upm_abort(xfer);
                        while (pipe->up_callingxfer == xfer) {
                                USBHIST_LOG(usbdebug, "wait for callback"
                                    "pipe = %#jx xfer = %#jx",
                                    (uintptr_t)pipe, (uintptr_t)xfer, 0, 0);
                                cv_wait(&pipe->up_callingcv,
                                    pipe->up_dev->ud_bus->ub_lock);
                        }
                        /* XXX only for non-0 usbd_clear_endpoint_stall(pipe); */
                }
        }

        /*
         * There may be an xfer callback already in progress which was
         * taken off the queue before we got to it.  We must wait for
         * the callback to finish before returning control to the
         * caller.
         */
        while (pipe->up_callingxfer) {
                USBHIST_LOG(usbdebug, "wait for callback"
                    "pipe = %#jx xfer = %#jx",
                    (uintptr_t)pipe, (uintptr_t)pipe->up_callingxfer, 0, 0);
                cv_wait(&pipe->up_callingcv, pipe->up_dev->ud_bus->ub_lock);
        }

        KASSERT(mutex_owned(pipe->up_dev->ud_bus->ub_lock));
        KASSERTMSG(pipe->up_abortlwp == curlwp, "pipe->up_abortlwp=%p",
            pipe->up_abortlwp);
        pipe->up_abortlwp = NULL;

        SDT_PROBE1(usb, device, pipe, abort__done,  pipe);
}

/* Called with USB lock held. */
void
usb_transfer_complete(struct usbd_xfer *xfer)
{
        struct usbd_pipe *pipe = xfer->ux_pipe;
        struct usbd_bus *bus = pipe->up_dev->ud_bus;
        int sync = xfer->ux_flags & USBD_SYNCHRONOUS;
        int erred;
        int polling = bus->ub_usepolling;
        int repeat = pipe->up_repeat;

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "pipe = %#jx xfer = %#jx status = %jd "
            "actlen = %jd", (uintptr_t)pipe, (uintptr_t)xfer, xfer->ux_status,
            xfer->ux_actlen);

        KASSERT(polling || mutex_owned(pipe->up_dev->ud_bus->ub_lock));
        KASSERTMSG(xfer->ux_state == XFER_ONQU, "xfer %p state is %x", xfer,
            xfer->ux_state);
        KASSERT(pipe != NULL);

        /*
         * If device is known to miss out ack, then pretend that
         * output timeout is a success. Userland should handle
         * the logic to verify that the operation succeeded.
         */
        if (pipe->up_dev->ud_quirks &&
            pipe->up_dev->ud_quirks->uq_flags & UQ_MISS_OUT_ACK &&
            xfer->ux_status == USBD_TIMEOUT &&
            !usbd_xfer_isread(xfer)) {
                USBHIST_LOG(usbdebug, "Possible output ack miss for xfer %#jx: "
                    "hiding write timeout to %jd.%jd for %ju bytes written",
                    (uintptr_t)xfer, curlwp->l_proc->p_pid, curlwp->l_lid,
                    xfer->ux_length);

                xfer->ux_status = USBD_NORMAL_COMPLETION;
                xfer->ux_actlen = xfer->ux_length;
        }

        erred = xfer->ux_status == USBD_CANCELLED ||
                xfer->ux_status == USBD_TIMEOUT;

        if (!repeat) {
                /* Remove request from queue. */

                KASSERTMSG(!SIMPLEQ_EMPTY(&pipe->up_queue),
                    "pipe %p is empty, but xfer %p wants to complete", pipe,
                     xfer);
                KASSERTMSG(xfer == SIMPLEQ_FIRST(&pipe->up_queue),
                    "xfer %p is not start of queue (%p is at start)", xfer,
                   SIMPLEQ_FIRST(&pipe->up_queue));

#ifdef DIAGNOSTIC
                xfer->ux_state = XFER_BUSY;
#endif
                SIMPLEQ_REMOVE_HEAD(&pipe->up_queue, ux_next);
        }
        USBHIST_LOG(usbdebug, "xfer %#jx: repeat %jd new head = %#jx",
            (uintptr_t)xfer, repeat, (uintptr_t)SIMPLEQ_FIRST(&pipe->up_queue),
            0);

        /* Count completed transfers. */
        ++pipe->up_dev->ud_bus->ub_stats.uds_requests
                [pipe->up_endpoint->ue_edesc->bmAttributes & UE_XFERTYPE];

        xfer->ux_done = 1;
        if (!xfer->ux_status && xfer->ux_actlen < xfer->ux_length &&
            !(xfer->ux_flags & USBD_SHORT_XFER_OK)) {
                USBHIST_LOG(usbdebug, "short transfer %jd < %jd",
                    xfer->ux_actlen, xfer->ux_length, 0, 0);
                xfer->ux_status = USBD_SHORT_XFER;
        }

        USBHIST_LOG(usbdebug, "xfer %#jx doing done %#jx", (uintptr_t)xfer,
            (uintptr_t)pipe->up_methods->upm_done, 0, 0);
        SDT_PROBE2(usb, device, xfer, done,  xfer, xfer->ux_status);
        pipe->up_methods->upm_done(xfer);

        if (xfer->ux_length != 0 && xfer->ux_buffer != xfer->ux_buf) {
                KDASSERTMSG(xfer->ux_actlen <= xfer->ux_length,
                    "actlen %d length %d",xfer->ux_actlen, xfer->ux_length);

                /* Only if IN transfer */
                if (usbd_xfer_isread(xfer)) {
                        memcpy(xfer->ux_buffer, xfer->ux_buf, xfer->ux_actlen);
                }
        }

        USBHIST_LOG(usbdebug, "xfer %#jx doing callback %#jx status %jd",
            (uintptr_t)xfer, (uintptr_t)xfer->ux_callback, xfer->ux_status, 0);

        if (xfer->ux_callback) {
                if (!polling) {
                        KASSERT(pipe->up_callingxfer == NULL);
                        pipe->up_callingxfer = xfer;
                        mutex_exit(pipe->up_dev->ud_bus->ub_lock);
                        if (!(pipe->up_flags & USBD_MPSAFE))
                                KERNEL_LOCK(1, curlwp);
                }

                xfer->ux_callback(xfer, xfer->ux_priv, xfer->ux_status);

                if (!polling) {
                        if (!(pipe->up_flags & USBD_MPSAFE))
                                KERNEL_UNLOCK_ONE(curlwp);
                        mutex_enter(pipe->up_dev->ud_bus->ub_lock);
                        KASSERT(pipe->up_callingxfer == xfer);
                        pipe->up_callingxfer = NULL;
                        cv_broadcast(&pipe->up_callingcv);
                }
        }

        if (sync && !polling) {
                USBHIST_LOG(usbdebug, "<- done xfer %#jx, wakeup",
                    (uintptr_t)xfer, 0, 0, 0);
                cv_broadcast(&xfer->ux_cv);
        }

        if (repeat) {
                xfer->ux_actlen = 0;
                xfer->ux_status = USBD_NOT_STARTED;
        } else {
                /* XXX should we stop the queue on all errors? */
                if (erred && pipe->up_iface != NULL)        /* not control pipe */
                        pipe->up_running = 0;
        }
        if (pipe->up_running && pipe->up_serialise)
                usbd_start_next(pipe);
}

/* Called with USB lock held. */
void
usbd_start_next(struct usbd_pipe *pipe)
{
        struct usbd_xfer *xfer;
        usbd_status err;

        USBHIST_FUNC();

        KASSERT(pipe != NULL);
        KASSERT(pipe->up_methods != NULL);
        KASSERT(pipe->up_methods->upm_start != NULL);
        KASSERT(pipe->up_serialise == true);

        int polling = pipe->up_dev->ud_bus->ub_usepolling;
        KASSERT(polling || mutex_owned(pipe->up_dev->ud_bus->ub_lock));

        /* Get next request in queue. */
        xfer = SIMPLEQ_FIRST(&pipe->up_queue);
        USBHIST_CALLARGS(usbdebug, "pipe = %#jx, xfer = %#jx", (uintptr_t)pipe,
            (uintptr_t)xfer, 0, 0);
        if (xfer == NULL) {
                pipe->up_running = 0;
        } else {
                SDT_PROBE2(usb, device, pipe, start,  pipe, xfer);
                err = pipe->up_methods->upm_start(xfer);

                if (err != USBD_IN_PROGRESS) {
                        USBHIST_LOG(usbdebug, "error = %jd", err, 0, 0, 0);
                        pipe->up_running = 0;
                        /* XXX do what? */
                }
        }

        KASSERT(polling || mutex_owned(pipe->up_dev->ud_bus->ub_lock));
}

usbd_status
usbd_do_request(struct usbd_device *dev, usb_device_request_t *req, void *data)
{

        return usbd_do_request_flags(dev, req, data, 0, 0,
            USBD_DEFAULT_TIMEOUT);
}

usbd_status
usbd_do_request_flags(struct usbd_device *dev, usb_device_request_t *req,
    void *data, uint16_t flags, int *actlen, uint32_t timeout)
{
        size_t len = UGETW(req->wLength);

        return usbd_do_request_len(dev, req, len, data, flags, actlen, timeout);
}

usbd_status
usbd_do_request_len(struct usbd_device *dev, usb_device_request_t *req,
    size_t len, void *data, uint16_t flags, int *actlen, uint32_t timeout)
{
        struct usbd_xfer *xfer;
        usbd_status err;

        KASSERT(len >= UGETW(req->wLength));

        USBHIST_FUNC();
        USBHIST_CALLARGS(usbdebug, "dev=%#jx req=%jx flags=%jx len=%jx",
            (uintptr_t)dev, (uintptr_t)req, flags, len);

        ASSERT_SLEEPABLE();

        SDT_PROBE5(usb, device, request, start,
            dev, req, len, flags, timeout);

        int error = usbd_create_xfer(dev->ud_pipe0, len, 0, 0, &xfer);
        if (error) {
                SDT_PROBE7(usb, device, request, done,
                    dev, req, /*actlen*/0, flags, timeout, data, USBD_NOMEM);
                return USBD_NOMEM;
        }

        usbd_setup_default_xfer(xfer, dev, 0, timeout, req, data,
            UGETW(req->wLength), flags, NULL);
        KASSERT(xfer->ux_pipe == dev->ud_pipe0);
        err = usbd_sync_transfer(xfer);
#if defined(USB_DEBUG) || defined(DIAGNOSTIC)
        if (xfer->ux_actlen > xfer->ux_length) {
                USBHIST_LOG(usbdebug, "overrun addr = %jd type = 0x%02jx",
                    dev->ud_addr, xfer->ux_request.bmRequestType, 0, 0);
                USBHIST_LOG(usbdebug, "     req = 0x%02jx val = %jd "
                    "index = %jd",
                    xfer->ux_request.bRequest, UGETW(xfer->ux_request.wValue),
                    UGETW(xfer->ux_request.wIndex), 0);
                USBHIST_LOG(usbdebug, "     rlen = %jd length = %jd "
                    "actlen = %jd",
                    UGETW(xfer->ux_request.wLength),
                    xfer->ux_length, xfer->ux_actlen, 0);
        }
#endif
        if (actlen != NULL)
                *actlen = xfer->ux_actlen;

        usbd_destroy_xfer(xfer);

        SDT_PROBE7(usb, device, request, done,
            dev, req, xfer->ux_actlen, flags, timeout, data, err);

        if (err) {
                USBHIST_LOG(usbdebug, "returning err = %jd", err, 0, 0, 0);
        }
        return err;
}

const struct usbd_quirks *
usbd_get_quirks(struct usbd_device *dev)
{
#ifdef DIAGNOSTIC
        if (dev == NULL) {
                printf("usbd_get_quirks: dev == NULL\n");
                return 0;
        }
#endif
        return dev->ud_quirks;
}

/* XXX do periodic free() of free list */

/*
 * Called from keyboard driver when in polling mode.
 */
void
usbd_dopoll(struct usbd_interface *iface)
{
        iface->ui_dev->ud_bus->ub_methods->ubm_dopoll(iface->ui_dev->ud_bus);
}

/*
 * This is for keyboard driver as well, which only operates in polling
 * mode from the ask root, etc., prompt and from DDB.
 */
void
usbd_set_polling(struct usbd_device *dev, int on)
{
        if (on)
                dev->ud_bus->ub_usepolling++;
        else
                dev->ud_bus->ub_usepolling--;

        /* Kick the host controller when switching modes */
        mutex_enter(dev->ud_bus->ub_lock);
        dev->ud_bus->ub_methods->ubm_softint(dev->ud_bus);
        mutex_exit(dev->ud_bus->ub_lock);
}


usb_endpoint_descriptor_t *
usbd_get_endpoint_descriptor(struct usbd_interface *iface, uint8_t address)
{
        struct usbd_endpoint *ep;
        int i;

        for (i = 0; i < iface->ui_idesc->bNumEndpoints; i++) {
                ep = &iface->ui_endpoints[i];
                if (ep->ue_edesc->bEndpointAddress == address)
                        return iface->ui_endpoints[i].ue_edesc;
        }
        return NULL;
}

/*
 * usbd_ratecheck() can limit the number of error messages that occurs.
 * When a device is unplugged it may take up to 0.25s for the hub driver
 * to notice it.  If the driver continuously tries to do I/O operations
 * this can generate a large number of messages.
 */
int
usbd_ratecheck(struct timeval *last)
{
        static struct timeval errinterval = { 0, 250000 }; /* 0.25 s*/

        return ratecheck(last, &errinterval);
}

/*
 * Search for a vendor/product pair in an array.  The item size is
 * given as an argument.
 */
const struct usb_devno *
usb_match_device(const struct usb_devno *tbl, u_int nentries, u_int sz,
                 uint16_t vendor, uint16_t product)
{
        while (nentries-- > 0) {
                uint16_t tproduct = tbl->ud_product;
                if (tbl->ud_vendor == vendor &&
                    (tproduct == product || tproduct == USB_PRODUCT_ANY))
                        return tbl;
                tbl = (const struct usb_devno *)((const char *)tbl + sz);
        }
        return NULL;
}

usbd_status
usbd_get_string(struct usbd_device *dev, int si, char *buf)
{
        return usbd_get_string0(dev, si, buf, 1);
}

usbd_status
usbd_get_string0(struct usbd_device *dev, int si, char *buf, int unicode)
{
        int swap = dev->ud_quirks->uq_flags & UQ_SWAP_UNICODE;
        usb_string_descriptor_t us;
        char *s;
        int i, n;
        uint16_t c;
        usbd_status err;
        int size;

        USBHIST_FUNC(); USBHIST_CALLED(usbdebug);

        buf[0] = '\0';
        if (si == 0)
                return USBD_INVAL;
        if (dev->ud_quirks->uq_flags & UQ_NO_STRINGS)
                return USBD_STALLED;
        if (dev->ud_langid == USBD_NOLANG) {
                /* Set up default language */
                err = usbd_get_string_desc(dev, USB_LANGUAGE_TABLE, 0, &us,
                    &size);
                if (err || size < 4) {
                        USBHIST_LOG(usbdebug, "getting lang failed, using 0",
                            0, 0, 0, 0);
                        dev->ud_langid = 0; /* Well, just pick something then */
                } else {
                        /* Pick the first language as the default. */
                        dev->ud_langid = UGETW(us.bString[0]);
                }
        }
        err = usbd_get_string_desc(dev, si, dev->ud_langid, &us, &size);
        if (err)
                return err;
        s = buf;
        n = size / 2 - 1;
        if (unicode) {
                for (i = 0; i < n; i++) {
                        c = UGETW(us.bString[i]);
                        if (swap)
                                c = (c >> 8) | (c << 8);
                        s += wput_utf8(s, 3, c);
                }
                *s++ = 0;
        }
#ifdef COMPAT_30
        else {
                for (i = 0; i < n; i++) {
                        c = UGETW(us.bString[i]);
                        if (swap)
                                c = (c >> 8) | (c << 8);
                        *s++ = (c < 0x80) ? c : '?';
                }
                *s++ = 0;
        }
#endif
        return USBD_NORMAL_COMPLETION;
}

/*
 * usbd_xfer_trycomplete(xfer)
 *
 *        Try to claim xfer for completion.  Return true if successful,
 *        false if the xfer has been synchronously aborted or has timed
 *        out.
 *
 *        If this returns true, caller is responsible for setting
 *        xfer->ux_status and calling usb_transfer_complete.  To be used
 *        in a host controller interrupt handler.
 *
 *        Caller must either hold the bus lock or have the bus in polling
 *        mode.  If this succeeds, caller must proceed to call
 *        usb_complete_transfer under the bus lock or with polling
 *        enabled -- must not release and reacquire the bus lock in the
 *        meantime.  Failing to heed this rule may lead to catastrophe
 *        with abort or timeout.
 */
bool
usbd_xfer_trycomplete(struct usbd_xfer *xfer)
{
        struct usbd_bus *bus __diagused = xfer->ux_bus;

        KASSERT(bus->ub_usepolling || mutex_owned(bus->ub_lock));

        /*
         * If software has completed it, either by synchronous abort or
         * by timeout, too late.
         */
        if (xfer->ux_status != USBD_IN_PROGRESS)
                return false;

        /*
         * We are completing the xfer.  Cancel the timeout if we can,
         * but only asynchronously.  See usbd_xfer_cancel_timeout_async
         * for why we need not wait for the callout or task here.
         */
        usbd_xfer_cancel_timeout_async(xfer);

        /* Success!  Note: Caller must set xfer->ux_status afterwar.  */
        return true;
}

/*
 * usbd_xfer_abort(xfer)
 *
 *        Try to claim xfer to abort.  If successful, mark it completed
 *        with USBD_CANCELLED and call the bus-specific method to abort
 *        at the hardware level.
 *
 *        To be called in thread context from struct
 *        usbd_pipe_methods::upm_abort.
 *
 *        Caller must hold the bus lock.
 */
void
usbd_xfer_abort(struct usbd_xfer *xfer)
{
        struct usbd_bus *bus = xfer->ux_bus;

        KASSERT(mutex_owned(bus->ub_lock));

        /*
         * If host controller interrupt or timer interrupt has
         * completed it, too late.  But the xfer cannot be
         * cancelled already -- only one caller can synchronously
         * abort.
         */
        KASSERT(xfer->ux_status != USBD_CANCELLED);
        if (xfer->ux_status != USBD_IN_PROGRESS)
                return;

        /*
         * Cancel the timeout if we can, but only asynchronously; see
         * usbd_xfer_cancel_timeout_async for why we need not wait for
         * the callout or task here.
         */
        usbd_xfer_cancel_timeout_async(xfer);

        /*
         * We beat everyone else.  Claim the status as cancelled, do
         * the bus-specific dance to abort the hardware, and complete
         * the xfer.
         */
        xfer->ux_status = USBD_CANCELLED;
        bus->ub_methods->ubm_abortx(xfer);
        usb_transfer_complete(xfer);
}

/*
 * usbd_xfer_timeout(xfer)
 *
 *        Called at IPL_SOFTCLOCK when too much time has elapsed waiting
 *        for xfer to complete.  Since we can't abort the xfer at
 *        IPL_SOFTCLOCK, defer to a usb_task to run it in thread context,
 *        unless the xfer has completed or aborted concurrently -- and if
 *        the xfer has also been resubmitted, take care of rescheduling
 *        the callout.
 */
static void
usbd_xfer_timeout(void *cookie)
{
        struct usbd_xfer *xfer = cookie;
        struct usbd_bus *bus = xfer->ux_bus;
        struct usbd_device *dev = xfer->ux_pipe->up_dev;

        /* Acquire the lock so we can transition the timeout state.  */
        mutex_enter(bus->ub_lock);

        /*
         * Use usbd_xfer_probe_timeout to check whether the timeout is
         * still valid, or to reschedule the callout if necessary.  If
         * it is still valid, schedule the task.
         */
        if (usbd_xfer_probe_timeout(xfer))
                usb_add_task(dev, &xfer->ux_aborttask, USB_TASKQ_HC);

        /*
         * Notify usbd_xfer_cancel_timeout_async that we may have
         * scheduled the task.  This causes callout_invoking to return
         * false in usbd_xfer_cancel_timeout_async so that it can tell
         * which stage in the callout->task->abort process we're at.
         */
        callout_ack(&xfer->ux_callout);

        /* All done -- release the lock.  */
        mutex_exit(bus->ub_lock);
}

/*
 * usbd_xfer_timeout_task(xfer)
 *
 *        Called in thread context when too much time has elapsed waiting
 *        for xfer to complete.  Abort the xfer with USBD_TIMEOUT, unless
 *        it has completed or aborted concurrently -- and if the xfer has
 *        also been resubmitted, take care of rescheduling the callout.
 */
static void
usbd_xfer_timeout_task(void *cookie)
{
        struct usbd_xfer *xfer = cookie;
        struct usbd_bus *bus = xfer->ux_bus;

        /* Acquire the lock so we can transition the timeout state.  */
        mutex_enter(bus->ub_lock);

        /*
         * Use usbd_xfer_probe_timeout to check whether the timeout is
         * still valid, or to reschedule the callout if necessary.  If
         * it is not valid -- the timeout has been asynchronously
         * cancelled, or the xfer has already been resubmitted -- then
         * we're done here.
         */
        if (!usbd_xfer_probe_timeout(xfer))
                goto out;

        /*
         * May have completed or been aborted, but we're the only one
         * who can time it out.  If it has completed or been aborted,
         * no need to timeout.
         */
        KASSERT(xfer->ux_status != USBD_TIMEOUT);
        if (xfer->ux_status != USBD_IN_PROGRESS)
                goto out;

        /*
         * We beat everyone else.  Claim the status as timed out, do
         * the bus-specific dance to abort the hardware, and complete
         * the xfer.
         */
        xfer->ux_status = USBD_TIMEOUT;
        bus->ub_methods->ubm_abortx(xfer);
        usb_transfer_complete(xfer);

out:        /* All done -- release the lock.  */
        mutex_exit(bus->ub_lock);
}

/*
 * usbd_xfer_probe_timeout(xfer)
 *
 *        Probe the status of xfer's timeout.  Acknowledge and process a
 *        request to reschedule.  Return true if the timeout is still
 *        valid and the caller should take further action (queueing a
 *        task or aborting the xfer), false if it must stop here.
 */
static bool
usbd_xfer_probe_timeout(struct usbd_xfer *xfer)
{
        struct usbd_bus *bus = xfer->ux_bus;
        bool valid;

        KASSERT(bus->ub_usepolling || mutex_owned(bus->ub_lock));

        /* The timeout must be set.  */
        KASSERT(xfer->ux_timeout_set);

        /*
         * Neither callout nor task may be pending; they execute
         * alternately in lock step.
         */
        KASSERT(!callout_pending(&xfer->ux_callout));
        KASSERT(!usb_task_pending(xfer->ux_pipe->up_dev, &xfer->ux_aborttask));

        /* There are a few cases... */
        if (bus->ub_methods->ubm_dying(bus)) {
                /* Host controller dying.  Drop it all on the floor.  */
                xfer->ux_timeout_set = false;
                xfer->ux_timeout_reset = false;
                valid = false;
        } else if (xfer->ux_timeout_reset) {
                /*
                 * The xfer completed _and_ got resubmitted while we
                 * waited for the lock.  Acknowledge the request to
                 * reschedule, and reschedule it if there is a timeout
                 * and the bus is not polling.
                 */
                xfer->ux_timeout_reset = false;
                if (xfer->ux_timeout && !bus->ub_usepolling) {
                        KASSERT(xfer->ux_timeout_set);
                        callout_schedule(&xfer->ux_callout,
                            mstohz(xfer->ux_timeout));
                } else {
                        /* No more callout or task scheduled.  */
                        xfer->ux_timeout_set = false;
                }
                valid = false;
        } else if (xfer->ux_status != USBD_IN_PROGRESS) {
                /*
                 * The xfer has completed by hardware completion or by
                 * software abort, and has not been resubmitted, so the
                 * timeout must be unset, and is no longer valid for
                 * the caller.
                 */
                xfer->ux_timeout_set = false;
                valid = false;
        } else {
                /*
                 * The xfer has not yet completed, so the timeout is
                 * valid.
                 */
                valid = true;
        }

        /* Any reset must have been processed.  */
        KASSERT(!xfer->ux_timeout_reset);

        /*
         * Either we claim the timeout is set, or the callout is idle.
         * If the timeout is still set, we may be handing off to the
         * task instead, so this is an if but not an iff.
         */
        KASSERT(xfer->ux_timeout_set || !callout_pending(&xfer->ux_callout));

        /*
         * The task must be idle now.
         *
         * - If the caller is the callout, _and_ the timeout is still
         *   valid, the caller will schedule it, but it hasn't been
         *   scheduled yet.  (If the timeout is not valid, the task
         *   should not be scheduled.)
         *
         * - If the caller is the task, it cannot be scheduled again
         *   until the callout runs again, which won't happen until we
         *   next release the lock.
         */
        KASSERT(!usb_task_pending(xfer->ux_pipe->up_dev, &xfer->ux_aborttask));

        KASSERT(bus->ub_usepolling || mutex_owned(bus->ub_lock));

        return valid;
}

/*
 * usbd_xfer_schedule_timeout(xfer)
 *
 *        Ensure that xfer has a timeout.  If the callout is already
 *        queued or the task is already running, request that they
 *        reschedule the callout.  If not, and if we're not polling,
 *        schedule the callout anew.
 *
 *        To be called in thread context from struct
 *        usbd_pipe_methods::upm_start.
 */
void
usbd_xfer_schedule_timeout(struct usbd_xfer *xfer)
{
        struct usbd_bus *bus = xfer->ux_bus;

        KASSERT(bus->ub_usepolling || mutex_owned(bus->ub_lock));

        if (xfer->ux_timeout_set) {
                /*
                 * Callout or task has fired from a prior completed
                 * xfer but has not yet noticed that the xfer is done.
                 * Ask it to reschedule itself to ux_timeout.
                 */
                xfer->ux_timeout_reset = true;
        } else if (xfer->ux_timeout && !bus->ub_usepolling) {
                /* Callout is not scheduled.  Schedule it.  */
                KASSERT(!callout_pending(&xfer->ux_callout));
                callout_schedule(&xfer->ux_callout, mstohz(xfer->ux_timeout));
                xfer->ux_timeout_set = true;
        }

        KASSERT(bus->ub_usepolling || mutex_owned(bus->ub_lock));
}

/*
 * usbd_xfer_cancel_timeout_async(xfer)
 *
 *        Cancel the callout and the task of xfer, which have not yet run
 *        to completion, but don't wait for the callout or task to finish
 *        running.
 *
 *        If they have already fired, at worst they are waiting for the
 *        bus lock.  They will see that the xfer is no longer in progress
 *        and give up, or they will see that the xfer has been
 *        resubmitted with a new timeout and reschedule the callout.
 *
 *        If a resubmitted request completed so fast that the callout
 *        didn't have time to process a timer reset, just cancel the
 *        timer reset.
 */
static void
usbd_xfer_cancel_timeout_async(struct usbd_xfer *xfer)
{
        struct usbd_bus *bus __diagused = xfer->ux_bus;

        KASSERT(bus->ub_usepolling || mutex_owned(bus->ub_lock));

        /*
         * If the timer wasn't running anyway, forget about it.  This
         * can happen if we are completing an isochronous transfer
         * which doesn't use the same timeout logic.
         */
        if (!xfer->ux_timeout_set)
                return;

        xfer->ux_timeout_reset = false;
        if (!callout_stop(&xfer->ux_callout)) {
                /*
                 * We stopped the callout before it ran.  The timeout
                 * is no longer set.
                 */
                xfer->ux_timeout_set = false;
        } else if (callout_invoking(&xfer->ux_callout)) {
                /*
                 * The callout has begun to run but it has not yet
                 * acquired the lock and called callout_ack.  The task
                 * cannot be queued yet, and the callout cannot have
                 * been rescheduled yet.
                 *
                 * By the time the callout acquires the lock, we will
                 * have transitioned from USBD_IN_PROGRESS to a
                 * completed status, and possibly also resubmitted the
                 * xfer and set xfer->ux_timeout_reset = true.  In both
                 * cases, the callout will DTRT, so no further action
                 * is needed here.
                 */
        } else if (usb_rem_task(xfer->ux_pipe->up_dev, &xfer->ux_aborttask)) {
                /*
                 * The callout had fired and scheduled the task, but we
                 * stopped the task before it could run.  The timeout
                 * is therefore no longer set -- the next resubmission
                 * of the xfer must schedule a new timeout.
                 *
                 * The callout should not be pending at this point:
                 * it is scheduled only under the lock, and only when
                 * xfer->ux_timeout_set is false, or by the callout or
                 * task itself when xfer->ux_timeout_reset is true.
                 */
                xfer->ux_timeout_set = false;
        }

        /*
         * The callout cannot be scheduled and the task cannot be
         * queued at this point.  Either we cancelled them, or they are
         * already running and waiting for the bus lock.
         */
        KASSERT(!callout_pending(&xfer->ux_callout));
        KASSERT(!usb_task_pending(xfer->ux_pipe->up_dev, &xfer->ux_aborttask));

        KASSERT(bus->ub_usepolling || mutex_owned(bus->ub_lock));
}































































































































































































































































    3 
    3 





























































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
/* $NetBSD: if_bwfm_usb.c,v 1.15 2020/07/22 17:18:10 riastradh Exp $ */
/* $OpenBSD: if_bwfm_usb.c,v 1.2 2017/10/15 14:55:13 patrick Exp $ */
/*
 * Copyright (c) 2010-2016 Broadcom Corporation
 * Copyright (c) 2016,2017 Patrick Wildt <patrick@blueri.se>
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_bwfm_usb.c,v 1.15 2020/07/22 17:18:10 riastradh Exp $");

#include <sys/param.h>
#include <sys/types.h>

#include <sys/buf.h>
#include <sys/device.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/queue.h>
#include <sys/socket.h>
#include <sys/systm.h>
#include <sys/workqueue.h>

#include <net/bpf.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_ether.h>
#include <net/if_media.h>

#include <netinet/in.h>

#include <net80211/ieee80211_var.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdivar.h>

#include <dev/ic/bwfmreg.h>
#include <dev/ic/bwfmvar.h>

static const struct bwfm_firmware_selector bwfm_usb_fwtab[] = {
        BWFM_FW_ENTRY(BRCM_CC_43143_CHIP_ID,
                      BWFM_FWSEL_ALLREVS, "brcmfmac43143"),

        BWFM_FW_ENTRY(BRCM_CC_43235_CHIP_ID,
                      BWFM_FWSEL_REV_EQ(3), "brcmfmac43236b"),
        BWFM_FW_ENTRY(BRCM_CC_43236_CHIP_ID,
                      BWFM_FWSEL_REV_EQ(3), "brcmfmac43236b"),
        BWFM_FW_ENTRY(BRCM_CC_43238_CHIP_ID,
                      BWFM_FWSEL_REV_EQ(3), "brcmfmac43236b"),

        BWFM_FW_ENTRY(BRCM_CC_43242_CHIP_ID,
                      BWFM_FWSEL_ALLREVS, "brcmfmac43242a"),

        BWFM_FW_ENTRY(BRCM_CC_43566_CHIP_ID,
                      BWFM_FWSEL_ALLREVS, "brcmfmac43569"),
        BWFM_FW_ENTRY(BRCM_CC_43569_CHIP_ID,
                      BWFM_FWSEL_ALLREVS, "brcmfmac43569"),

        BWFM_FW_ENTRY(CY_CC_4373_CHIP_ID,
                      BWFM_FWSEL_ALLREVS, "brcmfmac4373"),

        BWFM_FW_ENTRY_END
};

/*
 * Various supported device vendors/products.
 */
static const struct usb_devno bwfm_usbdevs[] = {
        { USB_VENDOR_BROADCOM,        USB_PRODUCT_BROADCOM_BCM43143 },
        { USB_VENDOR_BROADCOM,        USB_PRODUCT_BROADCOM_BCM43236 },
        { USB_VENDOR_BROADCOM,        USB_PRODUCT_BROADCOM_BCM43242 },
        { USB_VENDOR_BROADCOM,        USB_PRODUCT_BROADCOM_BCM43569 },
        { USB_VENDOR_BROADCOM,        USB_PRODUCT_BROADCOM_BCMFW },
};

#ifdef BWFM_DEBUG
#define DPRINTF(x)        do { if (bwfm_debug > 0) printf x; } while (0)
#define DPRINTFN(n, x)        do { if (bwfm_debug >= (n)) printf x; } while (0)
static int bwfm_debug = 2;
#else
#define DPRINTF(x)        do { ; } while (0)
#define DPRINTFN(n, x)        do { ; } while (0)
#endif

#define DEVNAME(sc)        device_xname((sc)->sc_sc.sc_dev)

#define BRCMF_POSTBOOT_ID        0xA123        /* ID to detect if dongle
                                         * has boot up
                                         */

#define TRX_MAGIC                0x30524448        /* "HDR0" */
#define TRX_MAX_OFFSET                3                /* Max number of file offsets */
#define TRX_UNCOMP_IMAGE        0x20                /* Trx holds uncompressed img */
#define TRX_RDL_CHUNK                1500                /* size of each dl transfer */
#define TRX_OFFSETS_DLFWLEN_IDX        0

/* Control messages: bRequest values */
#define DL_GETSTATE        0        /* returns the rdl_state_t struct */
#define DL_CHECK_CRC        1        /* currently unused */
#define DL_GO                2        /* execute downloaded image */
#define DL_START        3        /* initialize dl state */
#define DL_REBOOT        4        /* reboot the device in 2 seconds */
#define DL_GETVER        5        /* returns the bootrom_id_t struct */
#define DL_GO_PROTECTED        6        /* execute the downloaded code and set reset
                                 * event to occur in 2 seconds.  It is the
                                 * responsibility of the downloaded code to
                                 * clear this event
                                 */
#define DL_EXEC                7        /* jump to a supplied address */
#define DL_RESETCFG        8        /* To support single enum on dongle
                                 * - Not used by bootloader
                                 */
#define DL_DEFER_RESP_OK 9        /* Potentially defer the response to setup
                                 * if resp unavailable
                                 */

/* states */
#define DL_WAITING        0        /* waiting to rx first pkt */
#define DL_READY        1        /* hdr was good, waiting for more of the
                                 * compressed image
                                 */
#define DL_BAD_HDR        2        /* hdr was corrupted */
#define DL_BAD_CRC        3        /* compressed image was corrupted */
#define DL_RUNNABLE        4        /* download was successful,waiting for go cmd */
#define DL_START_FAIL        5        /* failed to initialize correctly */
#define DL_NVRAM_TOOBIG        6        /* host specified nvram data exceeds DL_NVRAM
                                 * value
                                 */
#define DL_IMAGE_TOOBIG        7        /* firmware image too big */


struct trx_header {
        uint32_t        magic;                        /* "HDR0" */
        uint32_t        len;                        /* Length of file including header */
        uint32_t        crc32;                        /* CRC from flag_version to end of file */
        uint32_t        flag_version;                /* 0:15 flags, 16:31 version */
        uint32_t        offsets[TRX_MAX_OFFSET];/* Offsets of partitions from start of
                                                 * header
                                                 */
};

struct rdl_state {
        uint32_t        state;
        uint32_t        bytes;
};

struct bootrom_id {
        uint32_t        chip;                /* Chip id */
        uint32_t        chiprev;        /* Chip rev */
        uint32_t        ramsize;        /* Size of  RAM */
        uint32_t        remapbase;        /* Current remap base address */
        uint32_t        boardtype;        /* Type of board */
        uint32_t        boardrev;        /* Board revision */
};

struct bwfm_usb_rx_data {
        struct bwfm_usb_softc                *sc;
        struct usbd_xfer                *xfer;
        uint8_t                                *buf;
};

struct bwfm_usb_tx_data {
        struct bwfm_usb_softc                *sc;
        struct usbd_xfer                *xfer;
        uint8_t                                *buf;
        struct mbuf                        *mbuf;
        TAILQ_ENTRY(bwfm_usb_tx_data)         next;
};

#define BWFM_RX_LIST_COUNT                50
#define BWFM_TX_LIST_COUNT                50
#define BWFM_RXBUFSZ                        1600
#define BWFM_TXBUFSZ                        1600
struct bwfm_usb_softc {
        struct bwfm_softc         sc_sc;
        struct usbd_device        *sc_udev;
        struct usbd_interface        *sc_iface;
        uint8_t                         sc_ifaceno;

        uint16_t                 sc_vendor;
        uint16_t                 sc_product;

        uint32_t                 sc_chip;
        uint32_t                 sc_chiprev;

        int                         sc_rx_no;
        int                         sc_tx_no;

        struct usbd_pipe        *sc_rx_pipeh;
        struct usbd_pipe        *sc_tx_pipeh;

        struct bwfm_usb_rx_data         sc_rx_data[BWFM_RX_LIST_COUNT];
        struct bwfm_usb_tx_data         sc_tx_data[BWFM_TX_LIST_COUNT];
        TAILQ_HEAD(, bwfm_usb_tx_data) sc_tx_free_list;

        kmutex_t                 sc_rx_lock;
        kmutex_t                 sc_tx_lock;
};

int                 bwfm_usb_match(device_t, cfdata_t, void *);
void                 bwfm_usb_attachhook(device_t);
void                 bwfm_usb_attach(device_t, device_t, void *);
int                 bwfm_usb_detach(device_t, int);

int                 bwfm_usb_dl_cmd(struct bwfm_usb_softc *, uint8_t, void *, int);
int                 bwfm_usb_load_microcode(struct bwfm_usb_softc *, const u_char *,
                     size_t);

int                 bwfm_usb_alloc_rx_list(struct bwfm_usb_softc *);
void                 bwfm_usb_free_rx_list(struct bwfm_usb_softc *);
int                 bwfm_usb_alloc_tx_list(struct bwfm_usb_softc *);
void                 bwfm_usb_free_tx_list(struct bwfm_usb_softc *);

int                 bwfm_usb_txcheck(struct bwfm_softc *);
int                 bwfm_usb_txdata(struct bwfm_softc *, struct mbuf **);
int                 bwfm_usb_txctl(struct bwfm_softc *, char *, size_t);
int                 bwfm_usb_rxctl(struct bwfm_softc *, char *, size_t *);

struct mbuf *         bwfm_usb_newbuf(void);
void                 bwfm_usb_rxeof(struct usbd_xfer *, void *, usbd_status);
void                 bwfm_usb_txeof(struct usbd_xfer *, void *, usbd_status);

static const struct bwfm_bus_ops bwfm_usb_bus_ops = {
        .bs_init = NULL,
        .bs_stop = NULL,
        .bs_txcheck = bwfm_usb_txcheck,
        .bs_txdata = bwfm_usb_txdata,
        .bs_txctl = bwfm_usb_txctl,
        .bs_rxctl = bwfm_usb_rxctl,
};

CFATTACH_DECL_NEW(bwfm_usb, sizeof(struct bwfm_usb_softc),
    bwfm_usb_match, bwfm_usb_attach, bwfm_usb_detach, NULL);

int
bwfm_usb_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return (usb_lookup(bwfm_usbdevs, uaa->uaa_vendor, uaa->uaa_product) != NULL) ?
            UMATCH_VENDOR_PRODUCT_CONF_IFACE : UMATCH_NONE;
}

void
bwfm_usb_attach(device_t parent, device_t self, void *aux)
{
        struct bwfm_usb_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        usb_device_descriptor_t *dd;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        int i;

        sc->sc_sc.sc_dev = self;
        sc->sc_udev = uaa->uaa_device;
        mutex_init(&sc->sc_rx_lock, MUTEX_DEFAULT, IPL_NET);
        mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET);

        aprint_naive("\n");

        devinfop = usbd_devinfo_alloc(sc->sc_udev, 0);
        aprint_normal(": %s\n", devinfop);
        usbd_devinfo_free(devinfop);

        if (usbd_set_config_no(sc->sc_udev, 1, 1) != 0) {
                aprint_error_dev(self, "failed to set configuration\n");
                return;
        }
        if (usbd_device2interface_handle(sc->sc_udev, 0, &sc->sc_iface) != 0) {
                aprint_error_dev(self, "failed to get interface handle\n");
                return;
        }

        sc->sc_ifaceno = 0;
        sc->sc_vendor = uaa->uaa_vendor;
        sc->sc_product = uaa->uaa_product;
        sc->sc_sc.sc_bus_ops = &bwfm_usb_bus_ops;
        sc->sc_sc.sc_proto_ops = &bwfm_proto_bcdc_ops;

        /* Check number of configurations. */
        dd = usbd_get_device_descriptor(sc->sc_udev);
        if (dd->bNumConfigurations != 1) {
                printf("%s: number of configurations not supported\n",
                    DEVNAME(sc));
                return;
        }

        /* Get endpoints. */
        id = usbd_get_interface_descriptor(sc->sc_iface);

        sc->sc_rx_no = sc->sc_tx_no = -1;
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL) {
                        printf("%s: no endpoint descriptor for iface %d\n",
                            DEVNAME(sc), i);
                        return;
                }

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK &&
                    sc->sc_rx_no == -1)
                        sc->sc_rx_no = ed->bEndpointAddress;
                else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK &&
                    sc->sc_tx_no == -1)
                        sc->sc_tx_no = ed->bEndpointAddress;
        }
        if (sc->sc_rx_no == -1 || sc->sc_tx_no == -1) {
                printf("%s: missing endpoint\n", DEVNAME(sc));
                return;
        }

        config_mountroot(self, bwfm_usb_attachhook);
}

void
bwfm_usb_attachhook(device_t self)
{
        struct bwfm_usb_softc *sc = device_private(self);
        struct bwfm_softc *bwfm = &sc->sc_sc;
        struct bwfm_usb_rx_data *data;
        struct bootrom_id brom;
        struct bwfm_firmware_context fwctx;
        usbd_status error;
        u_char *ucode;
        size_t ucsize;
        int i;

        /* Read chip id and chip rev to check the firmware. */
        memset(&brom, 0, sizeof(brom));
        bwfm_usb_dl_cmd(sc, DL_GETVER, &brom, sizeof(brom));
        sc->sc_chip = le32toh(brom.chip);
        sc->sc_chiprev = le32toh(brom.chiprev);

        /* Setup data pipes */
        error = usbd_open_pipe(sc->sc_iface, sc->sc_rx_no, USBD_EXCLUSIVE_USE,
            &sc->sc_rx_pipeh);
        if (error != 0) {
                aprint_error_dev(bwfm->sc_dev, "could not open rx pipe: %s\n",
                    usbd_errstr(error));
                return;
        }
        error = usbd_open_pipe(sc->sc_iface, sc->sc_tx_no, USBD_EXCLUSIVE_USE,
            &sc->sc_tx_pipeh);
        if (error != 0) {
                aprint_error_dev(bwfm->sc_dev, "could not open tx pipe: %s\n",
                    usbd_errstr(error));
                return;
        }

        /* Firmware not yet loaded? */
        if (sc->sc_chip != BRCMF_POSTBOOT_ID) {
                bwfm_firmware_context_init(&fwctx,
                    sc->sc_chip, sc->sc_chiprev, NULL,
                    BWFM_FWREQ(BWFM_FILETYPE_UCODE));

                if (!bwfm_firmware_open(bwfm, bwfm_usb_fwtab, &fwctx)) {
                        /* Error message already displayed. */
                        return;
                }

                ucode = bwfm_firmware_data(&fwctx, BWFM_FILETYPE_UCODE,
                    &ucsize);
                KASSERT(ucode != NULL);

                if (bwfm_usb_load_microcode(sc, ucode, ucsize) != 0) {
                        aprint_error_dev(bwfm->sc_dev,
                            "could not load microcode\n");
                        bwfm_firmware_close(&fwctx);
                        return;
                }

                bwfm_firmware_close(&fwctx);

                for (i = 0; i < 10; i++) {
                        delay(100 * 1000);
                        memset(&brom, 0, sizeof(brom));
                        bwfm_usb_dl_cmd(sc, DL_GETVER, &brom, sizeof(brom));
                        if (le32toh(brom.chip) == BRCMF_POSTBOOT_ID)
                                break;
                }

                if (le32toh(brom.chip) != BRCMF_POSTBOOT_ID) {
                        aprint_error_dev(bwfm->sc_dev,
                            "firmware did not start up\n");
                        return;
                }

                sc->sc_chip = le32toh(brom.chip);
                sc->sc_chiprev = le32toh(brom.chiprev);
        }

        bwfm_usb_dl_cmd(sc, DL_RESETCFG, &brom, sizeof(brom));

        if (bwfm_usb_alloc_rx_list(sc) || bwfm_usb_alloc_tx_list(sc)) {
                printf("%s: cannot allocate rx/tx lists\n", DEVNAME(sc));
                return;
        }

        bwfm_attach(&sc->sc_sc);

        for (i = 0; i < BWFM_RX_LIST_COUNT; i++) {
                data = &sc->sc_rx_data[i];

                usbd_setup_xfer(data->xfer, data, data->buf,
                    BWFM_RXBUFSZ, USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT,
                    bwfm_usb_rxeof);
                error = usbd_transfer(data->xfer);
                if (error != 0 && error != USBD_IN_PROGRESS)
                        aprint_error_dev(bwfm->sc_dev,
                            "could not set up new transfer: %s\n",
                            usbd_errstr(error));
        }
}

struct mbuf *
bwfm_usb_newbuf(void)
{
        struct mbuf *m;

        MGETHDR(m, M_DONTWAIT, MT_DATA);
        if (m == NULL)
                return (NULL);

        MCLGET(m, M_DONTWAIT);
        if (!(m->m_flags & M_EXT)) {
                m_freem(m);
                return (NULL);
        }

        m->m_len = m->m_pkthdr.len = MCLBYTES;

        return (m);
}

void
bwfm_usb_rxeof(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct bwfm_usb_rx_data *data = priv;
        struct bwfm_usb_softc *sc = data->sc;
        struct bwfm_proto_bcdc_hdr *hdr;
        usbd_status error;
        uint32_t len, off;
        struct mbuf *m;

        DPRINTFN(2, ("%s: %s status %s\n", DEVNAME(sc), __func__,
            usbd_errstr(status)));

        if (__predict_false(status != USBD_NORMAL_COMPLETION)) {
                usbd_clear_endpoint_stall_async(sc->sc_rx_pipeh);
                if (status != USBD_CANCELLED)
                        goto resubmit;
                return;
        }
        usbd_get_xfer_status(xfer, NULL, NULL, &len, NULL);

        off = 0;
        hdr = (void *)data->buf;
        if (len < sizeof(*hdr))
                goto resubmit;
        len -= sizeof(*hdr);
        off += sizeof(*hdr);
        if (len <= hdr->data_offset << 2)
                goto resubmit;
        len -= hdr->data_offset << 2;
        off += hdr->data_offset << 2;

        m = bwfm_usb_newbuf();
        if (m == NULL)
                goto resubmit;

        memcpy(mtod(m, char *), data->buf + off, len);
        m->m_len = m->m_pkthdr.len = len;
        mutex_enter(&sc->sc_rx_lock); /* XXX */
        bwfm_rx(&sc->sc_sc, m);
        mutex_exit(&sc->sc_rx_lock);

resubmit:
        usbd_setup_xfer(data->xfer, data, data->buf,
            BWFM_RXBUFSZ, USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT,
            bwfm_usb_rxeof);
        error = usbd_transfer(data->xfer);
        if (error != 0 && error != USBD_IN_PROGRESS)
                printf("%s: could not set up new transfer: %s\n",
                    DEVNAME(sc), usbd_errstr(error));
}

int
bwfm_usb_alloc_rx_list(struct bwfm_usb_softc *sc)
{
        struct bwfm_usb_rx_data *data;
        int i, error = 0;

        for (i = 0; i < BWFM_RX_LIST_COUNT; i++) {
                data = &sc->sc_rx_data[i];

                data->sc = sc; /* Backpointer for callbacks. */

                if (usbd_create_xfer(sc->sc_rx_pipeh, BWFM_RXBUFSZ,
                    0, 0, &data->xfer) != 0) {
                        printf("%s: could not create xfer\n",
                            DEVNAME(sc));
                        error = ENOMEM;
                        break;
                }
                data->buf = usbd_get_buffer(data->xfer);
        }
        if (error != 0)
                bwfm_usb_free_rx_list(sc);
        return (error);
}

void
bwfm_usb_free_rx_list(struct bwfm_usb_softc *sc)
{
        int i;

        /* NB: Caller must abort pipe first. */
        for (i = 0; i < BWFM_RX_LIST_COUNT; i++) {
                if (sc->sc_rx_data[i].xfer != NULL)
                        usbd_destroy_xfer(sc->sc_rx_data[i].xfer);
                sc->sc_rx_data[i].xfer = NULL;
        }
}

int
bwfm_usb_alloc_tx_list(struct bwfm_usb_softc *sc)
{
        struct bwfm_usb_tx_data *data;
        int i, error = 0;

        TAILQ_INIT(&sc->sc_tx_free_list);
        for (i = 0; i < BWFM_TX_LIST_COUNT; i++) {
                data = &sc->sc_tx_data[i];

                data->sc = sc; /* Backpointer for callbacks. */

                if (usbd_create_xfer(sc->sc_tx_pipeh, BWFM_TXBUFSZ,
                    USBD_FORCE_SHORT_XFER, 0, &data->xfer) != 0) {
                        printf("%s: could not create xfer\n",
                            DEVNAME(sc));
                        error = ENOMEM;
                        break;
                }
                data->buf = usbd_get_buffer(data->xfer);

                /* Append this Tx buffer to our free list. */
                TAILQ_INSERT_TAIL(&sc->sc_tx_free_list, data, next);
        }
        if (error != 0)
                bwfm_usb_free_tx_list(sc);
        return (error);
}

void
bwfm_usb_free_tx_list(struct bwfm_usb_softc *sc)
{
        int i;

        /* NB: Caller must abort pipe first. */
        for (i = 0; i < BWFM_TX_LIST_COUNT; i++) {
                if (sc->sc_tx_data[i].xfer != NULL)
                        usbd_destroy_xfer(sc->sc_tx_data[i].xfer);
                sc->sc_tx_data[i].xfer = NULL;
        }
}

void
bwfm_usb_txeof(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct bwfm_usb_tx_data *data = priv;
        struct bwfm_usb_softc *sc = data->sc;
        struct ifnet *ifp = sc->sc_sc.sc_ic.ic_ifp;
        int s;

        DPRINTFN(2, ("%s: %s status %s\n", DEVNAME(sc), __func__,
            usbd_errstr(status)));

        m_freem(data->mbuf);
        data->mbuf = NULL;

        mutex_enter(&sc->sc_tx_lock);
        /* Put this Tx buffer back to our free list. */
        TAILQ_INSERT_TAIL(&sc->sc_tx_free_list, data, next);
        mutex_exit(&sc->sc_tx_lock);

        s = splnet();

        if (__predict_false(status != USBD_NORMAL_COMPLETION)) {
                if (status == USBD_CANCELLED)
                        usbd_clear_endpoint_stall_async(sc->sc_tx_pipeh);
                if_statinc(ifp, if_oerrors);
                splx(s);
                return;
        }

        if_statinc(ifp, if_opackets);

        /* We just released a Tx buffer, notify Tx. */
        if ((ifp->if_flags & IFF_OACTIVE) != 0) {
                ifp->if_flags &= ~IFF_OACTIVE;
                if_schedule_deferred_start(ifp);
        }
        splx(s);
}

int
bwfm_usb_detach(device_t self, int flags)
{
        struct bwfm_usb_softc *sc = device_private(self);

        bwfm_detach(&sc->sc_sc, flags);

        if (sc->sc_rx_pipeh != NULL) {
                usbd_abort_pipe(sc->sc_rx_pipeh);
                usbd_close_pipe(sc->sc_rx_pipeh);
        }
        if (sc->sc_tx_pipeh != NULL) {
                usbd_abort_pipe(sc->sc_tx_pipeh);
                usbd_close_pipe(sc->sc_tx_pipeh);
        }

        bwfm_usb_free_rx_list(sc);
        bwfm_usb_free_tx_list(sc);

        mutex_destroy(&sc->sc_rx_lock);
        mutex_destroy(&sc->sc_tx_lock);

        return 0;
}

int
bwfm_usb_dl_cmd(struct bwfm_usb_softc *sc, uByte cmd, void *buf, int len)
{
        usb_device_request_t req;
        usbd_status error;

        req.bmRequestType = UT_READ_VENDOR_INTERFACE;
        req.bRequest = cmd;

        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_ifaceno);
        USETW(req.wLength, len);

        error = usbd_do_request(sc->sc_udev, &req, buf);
        if (error != 0) {
                printf("%s: could not read register: %s\n",
                    DEVNAME(sc), usbd_errstr(error));
        }
        return error;
}

int
bwfm_usb_load_microcode(struct bwfm_usb_softc *sc, const u_char *ucode, size_t size)
{
        const struct trx_header *trx = (const struct trx_header *)ucode;
        struct rdl_state state;
        uint32_t rdlstate, rdlbytes, sent = 0, sendlen = 0;
        struct usbd_xfer *xfer;
        usbd_status error;
        char *buf;

        if (le32toh(trx->magic) != TRX_MAGIC ||
            (le32toh(trx->flag_version) & TRX_UNCOMP_IMAGE) == 0) {
                printf("%s: invalid firmware\n", DEVNAME(sc));
                return 1;
        }

        bwfm_usb_dl_cmd(sc, DL_START, &state, sizeof(state));
        rdlstate = le32toh(state.state);
        rdlbytes = le32toh(state.bytes);

        if (rdlstate != DL_WAITING) {
                printf("%s: cannot start fw download\n", DEVNAME(sc));
                return 1;
        }

        error = usbd_create_xfer(sc->sc_tx_pipeh, TRX_RDL_CHUNK,
            0, 0, &xfer);
        if (error != 0) {
                printf("%s: cannot create xfer\n", DEVNAME(sc));
                goto err;
        }

        buf = usbd_get_buffer(xfer);

        while (rdlbytes != size) {
                sendlen = MIN(size - sent, TRX_RDL_CHUNK);
                memcpy(buf, ucode + sent, sendlen);

                usbd_setup_xfer(xfer, NULL, buf, sendlen,
                    USBD_SYNCHRONOUS, USBD_NO_TIMEOUT, NULL);
                error = usbd_transfer(xfer);
                if (error != 0 && error != USBD_IN_PROGRESS) {
                        printf("%s: transfer error\n", DEVNAME(sc));
                        goto err;
                }
                sent += sendlen;

                bwfm_usb_dl_cmd(sc, DL_GETSTATE, &state, sizeof(state));
                rdlstate = le32toh(state.state);
                rdlbytes = le32toh(state.bytes);

                if (rdlbytes != sent) {
                        printf("%s: device reported different size\n",
                            DEVNAME(sc));
                        goto err;
                }

                if (rdlstate == DL_BAD_HDR || rdlstate == DL_BAD_CRC) {
                        printf("%s: device reported bad hdr/crc\n",
                            DEVNAME(sc));
                        goto err;
                }
        }

        bwfm_usb_dl_cmd(sc, DL_GETSTATE, &state, sizeof(state));
        rdlstate = le32toh(state.state);
        rdlbytes = le32toh(state.bytes);

        if (rdlstate != DL_RUNNABLE) {
                printf("%s: dongle not runnable\n", DEVNAME(sc));
                goto err;
        }

        bwfm_usb_dl_cmd(sc, DL_GO, &state, sizeof(state));

        usbd_destroy_xfer(xfer);

        return 0;
err:
        if (sc->sc_tx_pipeh != NULL) {
                usbd_abort_pipe(sc->sc_tx_pipeh);
                usbd_close_pipe(sc->sc_tx_pipeh);
                sc->sc_tx_pipeh = NULL;
        }
        if (xfer != NULL)
                usbd_destroy_xfer(xfer);
        return 1;
}

int
bwfm_usb_txcheck(struct bwfm_softc *bwfm)
{
        struct bwfm_usb_softc *sc = (void *)bwfm;

        mutex_enter(&sc->sc_tx_lock);

        if (TAILQ_EMPTY(&sc->sc_tx_free_list)) {
                mutex_exit(&sc->sc_tx_lock);
                return ENOBUFS;
        }

        mutex_exit(&sc->sc_tx_lock);
        return 0;
}


int
bwfm_usb_txdata(struct bwfm_softc *bwfm, struct mbuf **mp)
{
        struct bwfm_usb_softc *sc = (void *)bwfm;
        struct mbuf *m = *mp;
        struct bwfm_proto_bcdc_hdr *hdr;
        struct bwfm_usb_tx_data *data;
        struct ether_header *eh;
        uint32_t len = 0;
        int error, ac;

        DPRINTFN(2, ("%s: %s\n", DEVNAME(sc), __func__));

        mutex_enter(&sc->sc_tx_lock);

        if (TAILQ_EMPTY(&sc->sc_tx_free_list)) {
                mutex_exit(&sc->sc_tx_lock);
                return ENOBUFS;
        }

        /* No QoS for EAPOL frames. */
        eh = mtod(m, struct ether_header *);
        ac = (eh->ether_type != htons(ETHERTYPE_PAE)) ?
            M_WME_GETAC(m) : WME_AC_BE;

        /* Grab a Tx buffer from our free list. */
        data = TAILQ_FIRST(&sc->sc_tx_free_list);
        TAILQ_REMOVE(&sc->sc_tx_free_list, data, next);

        mutex_exit(&sc->sc_tx_lock);

        hdr = (void *)&data->buf[len];
        hdr->data_offset = 0;
        hdr->priority = ac;
        hdr->flags = BWFM_BCDC_FLAG_VER(BWFM_BCDC_FLAG_PROTO_VER);
        hdr->flags2 = 0;
        len += sizeof(*hdr);

        m_copydata(m, 0, m->m_pkthdr.len, &data->buf[len]);
        len += m->m_pkthdr.len;

        data->mbuf = m;

        usbd_setup_xfer(data->xfer, data, data->buf,
            len, USBD_FORCE_SHORT_XFER, USBD_NO_TIMEOUT,
            bwfm_usb_txeof);
        error = usbd_transfer(data->xfer);
        if (error != 0 && error != USBD_IN_PROGRESS)
                printf("%s: could not set up new transfer: %s\n",
                    DEVNAME(sc), usbd_errstr(error));
        return 0;
}

int
bwfm_usb_txctl(struct bwfm_softc *bwfm, char *buf, size_t len)
{
        struct bwfm_usb_softc *sc = (void *)bwfm;
        usb_device_request_t req;
        usbd_status error;
        int ret = 1;

        DPRINTFN(2, ("%s: %s\n", DEVNAME(sc), __func__));

        req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
        req.bRequest = 0;

        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_ifaceno);
        USETW(req.wLength, len);

        error = usbd_do_request(sc->sc_udev, &req, buf);
        if (error != 0) {
                printf("%s: could not read ctl packet: %s\n",
                    DEVNAME(sc), usbd_errstr(error));
                goto err;
        }

        ret = 0;
err:
        return ret;
}

int
bwfm_usb_rxctl(struct bwfm_softc *bwfm, char *buf, size_t *len)
{
        struct bwfm_usb_softc *sc = (void *)bwfm;
        usb_device_request_t req;
        usbd_status error;
        uint32_t len32;
        int ret = 1;

        DPRINTFN(2, ("%s: %s\n", DEVNAME(sc), __func__));

        req.bmRequestType = UT_READ_CLASS_INTERFACE;
        req.bRequest = 1;

        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_ifaceno);
        USETW(req.wLength, *len);

        error = usbd_do_request_flags(sc->sc_udev, &req, buf, 0,
            &len32, USBD_DEFAULT_TIMEOUT);
        if (error != 0) {
                printf("%s: could not read ctl packet: %s\n",
                    DEVNAME(sc), usbd_errstr(error));
                goto err;
        }

        if (len32 > *len) {
                printf("%s: broken length\n", DEVNAME(sc));
                goto err;
        }

        *len = len32;
        ret = 0;
err:
        return ret;
}






















































































































































































































































    3 
    3 

























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
/*        $OpenBSD: if_zyd.c,v 1.52 2007/02/11 00:08:04 jsg Exp $        */
/*        $NetBSD: if_zyd.c,v 1.60 2021/06/13 09:24:33 mlelstv Exp $        */

/*-
 * Copyright (c) 2006 by Damien Bergamini <damien.bergamini@free.fr>
 * Copyright (c) 2006 by Florian Stoehr <ich@florian-stoehr.de>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*-
 * ZyDAS ZD1211/ZD1211B USB WLAN driver.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_zyd.c,v 1.60 2021/06/13 09:24:33 mlelstv Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/sockio.h>
#include <sys/proc.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/socket.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/conf.h>
#include <sys/device.h>

#include <sys/bus.h>
#include <machine/endian.h>

#include <net/bpf.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_ether.h>
#include <net/if_media.h>
#include <net/if_types.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>

#include <net80211/ieee80211_netbsd.h>
#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_amrr.h>
#include <net80211/ieee80211_radiotap.h>

#include <dev/firmload.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <dev/usb/if_zydreg.h>

#ifdef ZYD_DEBUG
#define DPRINTF(x)        do { if (zyddebug > 0) printf x; } while (0)
#define DPRINTFN(n, x)        do { if (zyddebug > (n)) printf x; } while (0)
int zyddebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n, x)
#endif

static const struct zyd_phy_pair zyd_def_phy[] = ZYD_DEF_PHY;
static const struct zyd_phy_pair zyd_def_phyB[] = ZYD_DEF_PHYB;

/* various supported device vendors/products */
#define ZYD_ZD1211_DEV(v, p)        \
        { { USB_VENDOR_##v, USB_PRODUCT_##v##_##p }, ZYD_ZD1211 }
#define ZYD_ZD1211B_DEV(v, p)        \
        { { USB_VENDOR_##v, USB_PRODUCT_##v##_##p }, ZYD_ZD1211B }
static const struct zyd_type {
        struct usb_devno        dev;
        uint8_t                        rev;
#define ZYD_ZD1211        0
#define ZYD_ZD1211B        1
} zyd_devs[] = {
        ZYD_ZD1211_DEV(3COM2,                3CRUSB10075),
        ZYD_ZD1211_DEV(ABOCOM,                WL54),
        ZYD_ZD1211_DEV(ASUSTEK,                WL159G),
        ZYD_ZD1211_DEV(CYBERTAN,        TG54USB),
        ZYD_ZD1211_DEV(DRAYTEK,                VIGOR550),
        ZYD_ZD1211_DEV(PLANEX2,                GWUS54GD),
        ZYD_ZD1211_DEV(PLANEX2,                GWUS54GZL),
        ZYD_ZD1211_DEV(PLANEX3,                GWUS54GZ),
        ZYD_ZD1211_DEV(PLANEX3,                GWUS54MINI),
        ZYD_ZD1211_DEV(SAGEM,                XG760A),
        ZYD_ZD1211_DEV(SENAO,                NUB8301),
        ZYD_ZD1211_DEV(SITECOMEU,        WL113),
        ZYD_ZD1211_DEV(SWEEX,                ZD1211),
        ZYD_ZD1211_DEV(TEKRAM,                QUICKWLAN),
        ZYD_ZD1211_DEV(TEKRAM,                ZD1211_1),
        ZYD_ZD1211_DEV(TEKRAM,                ZD1211_2),
        ZYD_ZD1211_DEV(TWINMOS,                G240),
        ZYD_ZD1211_DEV(UMEDIA,                ALL0298V2),
        ZYD_ZD1211_DEV(UMEDIA,                TEW429UB_A),
        ZYD_ZD1211_DEV(UMEDIA,                TEW429UB),
        ZYD_ZD1211_DEV(WISTRONNEWEB,        UR055G),
        ZYD_ZD1211_DEV(ZCOM,                ZD1211),
        ZYD_ZD1211_DEV(ZYDAS,                ZD1211),
        ZYD_ZD1211_DEV(ZYXEL,                AG225H),
        ZYD_ZD1211_DEV(ZYXEL,                ZYAIRG220),
        ZYD_ZD1211_DEV(ZYXEL,                G200V2),

        ZYD_ZD1211B_DEV(ACCTON,                SMCWUSBG),
        ZYD_ZD1211B_DEV(ACCTON,                WN4501H_LF_IR),
        ZYD_ZD1211B_DEV(ACCTON,                WUS201),
        ZYD_ZD1211B_DEV(ACCTON,                ZD1211B),
        ZYD_ZD1211B_DEV(ASUSTEK,        A9T_WIFI),
        ZYD_ZD1211B_DEV(BELKIN,                F5D7050C),
        ZYD_ZD1211B_DEV(BELKIN,                ZD1211B),
        ZYD_ZD1211B_DEV(BEWAN,                BWIFI_USB54AR),
        ZYD_ZD1211B_DEV(CISCOLINKSYS,        WUSBF54G),
        ZYD_ZD1211B_DEV(CYBERTAN,        ZD1211B),
        ZYD_ZD1211B_DEV(FIBERLINE,        WL430U),
        ZYD_ZD1211B_DEV(MELCO,                KG54L),
        ZYD_ZD1211B_DEV(PHILIPS,        SNU5600),
        ZYD_ZD1211B_DEV(PHILIPS,        SNU5630NS05),
        ZYD_ZD1211B_DEV(PLANEX2,        GWUS54GXS),
        ZYD_ZD1211B_DEV(SAGEM,                XG76NA),
        ZYD_ZD1211B_DEV(SITECOMEU,        WL603),
        ZYD_ZD1211B_DEV(SITECOMEU,        ZD1211B),
        ZYD_ZD1211B_DEV(SONY,                IFU_WLM2),
        ZYD_ZD1211B_DEV(UMEDIA,                TEW429UBC1),
        ZYD_ZD1211B_DEV(UNKNOWN1,        ZD1211B),
        ZYD_ZD1211B_DEV(UNKNOWN2,        ZD1211B),
        ZYD_ZD1211B_DEV(UNKNOWN3,        ZD1211B),
        ZYD_ZD1211B_DEV(USR,                USR5423),
        ZYD_ZD1211B_DEV(VTECH,                ZD1211B),
        ZYD_ZD1211B_DEV(ZCOM,                ZD1211B),
        ZYD_ZD1211B_DEV(ZYDAS,                ZD1211B),
        ZYD_ZD1211B_DEV(ZYDAS,                ZD1211B_2),
        ZYD_ZD1211B_DEV(ZYXEL,                M202),
        ZYD_ZD1211B_DEV(ZYXEL,                G220V2),
};
#define zyd_lookup(v, p)        \
        ((const struct zyd_type *)usb_lookup(zyd_devs, v, p))

static int zyd_match(device_t, cfdata_t, void *);
static void zyd_attach(device_t, device_t, void *);
static int zyd_detach(device_t, int);
static int zyd_activate(device_t, enum devact);


CFATTACH_DECL_NEW(zyd, sizeof(struct zyd_softc), zyd_match,
    zyd_attach, zyd_detach, zyd_activate);

Static void        zyd_attachhook(device_t);
Static int        zyd_complete_attach(struct zyd_softc *);
Static int        zyd_open_pipes(struct zyd_softc *);
Static void        zyd_close_pipes(struct zyd_softc *);
Static int        zyd_alloc_tx_list(struct zyd_softc *);
Static void        zyd_free_tx_list(struct zyd_softc *);
Static int        zyd_alloc_rx_list(struct zyd_softc *);
Static void        zyd_free_rx_list(struct zyd_softc *);
Static struct        ieee80211_node *zyd_node_alloc(struct ieee80211_node_table *);
Static int        zyd_media_change(struct ifnet *);
Static void        zyd_next_scan(void *);
Static void        zyd_task(void *);
Static int        zyd_newstate(struct ieee80211com *, enum ieee80211_state, int);
Static int        zyd_cmd(struct zyd_softc *, uint16_t, const void *, int,
                    void *, int, u_int);
Static int        zyd_read16(struct zyd_softc *, uint16_t, uint16_t *);
Static int        zyd_read32(struct zyd_softc *, uint16_t, uint32_t *);
Static int        zyd_write16(struct zyd_softc *, uint16_t, uint16_t);
Static int        zyd_write32(struct zyd_softc *, uint16_t, uint32_t);
Static int        zyd_rfwrite(struct zyd_softc *, uint32_t);
Static void        zyd_lock_phy(struct zyd_softc *);
Static void        zyd_unlock_phy(struct zyd_softc *);
Static int        zyd_rfmd_init(struct zyd_rf *);
Static int        zyd_rfmd_switch_radio(struct zyd_rf *, int);
Static int        zyd_rfmd_set_channel(struct zyd_rf *, uint8_t);
Static int        zyd_al2230_init(struct zyd_rf *);
Static int        zyd_al2230_switch_radio(struct zyd_rf *, int);
Static int        zyd_al2230_set_channel(struct zyd_rf *, uint8_t);
Static int        zyd_al2230_init_b(struct zyd_rf *);
Static int        zyd_al7230B_init(struct zyd_rf *);
Static int        zyd_al7230B_switch_radio(struct zyd_rf *, int);
Static int        zyd_al7230B_set_channel(struct zyd_rf *, uint8_t);
Static int        zyd_al2210_init(struct zyd_rf *);
Static int        zyd_al2210_switch_radio(struct zyd_rf *, int);
Static int        zyd_al2210_set_channel(struct zyd_rf *, uint8_t);
Static int        zyd_gct_init(struct zyd_rf *);
Static int        zyd_gct_switch_radio(struct zyd_rf *, int);
Static int        zyd_gct_set_channel(struct zyd_rf *, uint8_t);
Static int        zyd_maxim_init(struct zyd_rf *);
Static int        zyd_maxim_switch_radio(struct zyd_rf *, int);
Static int        zyd_maxim_set_channel(struct zyd_rf *, uint8_t);
Static int        zyd_maxim2_init(struct zyd_rf *);
Static int        zyd_maxim2_switch_radio(struct zyd_rf *, int);
Static int        zyd_maxim2_set_channel(struct zyd_rf *, uint8_t);
Static int        zyd_rf_attach(struct zyd_softc *, uint8_t);
Static const char *zyd_rf_name(uint8_t);
Static int        zyd_hw_init(struct zyd_softc *);
Static int        zyd_read_eeprom(struct zyd_softc *);
Static int        zyd_set_macaddr(struct zyd_softc *, const uint8_t *);
Static int        zyd_set_bssid(struct zyd_softc *, const uint8_t *);
Static int        zyd_switch_radio(struct zyd_softc *, int);
Static void        zyd_set_led(struct zyd_softc *, int, int);
Static int        zyd_set_rxfilter(struct zyd_softc *);
Static void        zyd_set_chan(struct zyd_softc *, struct ieee80211_channel *);
Static int        zyd_set_beacon_interval(struct zyd_softc *, int);
Static uint8_t        zyd_plcp_signal(int);
Static void        zyd_intr(struct usbd_xfer *, void *, usbd_status);
Static void        zyd_rx_data(struct zyd_softc *, const uint8_t *, uint16_t);
Static void        zyd_rxeof(struct usbd_xfer *, void *, usbd_status);
Static void        zyd_txeof(struct usbd_xfer *, void *, usbd_status);
Static int        zyd_tx_mgt(struct zyd_softc *, struct mbuf *,
                    struct ieee80211_node *);
Static int        zyd_tx_data(struct zyd_softc *, struct mbuf *,
                    struct ieee80211_node *);
Static void        zyd_start(struct ifnet *);
Static void        zyd_watchdog(struct ifnet *);
Static int        zyd_ioctl(struct ifnet *, u_long, void *);
Static int        zyd_init(struct ifnet *);
Static void        zyd_stop(struct ifnet *, int);
Static int        zyd_loadfirmware(struct zyd_softc *, u_char *, size_t);
Static void        zyd_iter_func(void *, struct ieee80211_node *);
Static void        zyd_amrr_timeout(void *);
Static void        zyd_newassoc(struct ieee80211_node *, int);

static int
zyd_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return (zyd_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL) ?
            UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

Static void
zyd_attachhook(device_t self)
{
        struct zyd_softc *sc = device_private(self);
        firmware_handle_t fwh;
        const char *fwname;
        u_char *fw;
        size_t size;
        int error;

        fwname = (sc->mac_rev == ZYD_ZD1211) ? "zyd-zd1211" : "zyd-zd1211b";
        if ((error = firmware_open("zyd", fwname, &fwh)) != 0) {
                aprint_error_dev(sc->sc_dev,
                    "failed to open firmware %s (error=%d)\n", fwname, error);
                return;
        }
        size = firmware_get_size(fwh);
        fw = firmware_malloc(size);
        if (fw == NULL) {
                aprint_error_dev(sc->sc_dev,
                    "failed to allocate firmware memory\n");
                firmware_close(fwh);
                return;
        }
        error = firmware_read(fwh, 0, fw, size);
        firmware_close(fwh);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev,
                    "failed to read firmware (error %d)\n", error);
                firmware_free(fw, size);
                return;
        }

        error = zyd_loadfirmware(sc, fw, size);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev,
                    "could not load firmware (error=%d)\n", error);
                firmware_free(fw, size);
                return;
        }

        firmware_free(fw, size);

        /* complete the attach process */
        if ((error = zyd_complete_attach(sc)) == 0)
                sc->attached = 1;
        return;
}

static void
zyd_attach(device_t parent, device_t self, void *aux)
{
        struct zyd_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        char *devinfop;
        usb_device_descriptor_t* ddesc;
        struct ifnet *ifp = &sc->sc_if;

        sc->sc_dev = self;
        sc->sc_udev = uaa->uaa_device;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(uaa->uaa_device, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        sc->mac_rev = zyd_lookup(uaa->uaa_vendor, uaa->uaa_product)->rev;

        ddesc = usbd_get_device_descriptor(sc->sc_udev);
        if (UGETW(ddesc->bcdDevice) < 0x4330) {
                aprint_error_dev(self, "device version mismatch: %#x "
                    "(only >= 43.30 supported)\n", UGETW(ddesc->bcdDevice));
                return;
        }

        ifp->if_softc = sc;
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_init = zyd_init;
        ifp->if_ioctl = zyd_ioctl;
        ifp->if_start = zyd_start;
        ifp->if_watchdog = zyd_watchdog;
        IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
        IFQ_SET_READY(&ifp->if_snd);
        memcpy(ifp->if_xname, device_xname(sc->sc_dev), IFNAMSIZ);

        mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_SOFTUSB);
        cv_init(&sc->sc_cmdcv, "zydcmd");
        SIMPLEQ_INIT(&sc->sc_rqh);

        /* defer configrations after file system is ready to load firmware */
        config_mountroot(self, zyd_attachhook);
}

Static int
zyd_complete_attach(struct zyd_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        usbd_status error;
        int i;

        usb_init_task(&sc->sc_task, zyd_task, sc, 0);
        callout_init(&(sc->sc_scan_ch), 0);

        sc->amrr.amrr_min_success_threshold =  1;
        sc->amrr.amrr_max_success_threshold = 10;
        callout_init(&sc->sc_amrr_ch, 0);

        error = usbd_set_config_no(sc->sc_udev, ZYD_CONFIG_NO, 1);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(error));
                goto fail;
        }

        error = usbd_device2interface_handle(sc->sc_udev, ZYD_IFACE_INDEX,
            &sc->sc_iface);
        if (error != 0) {
                aprint_error_dev(sc->sc_dev,
                    "getting interface handle failed\n");
                goto fail;
        }

        if ((error = zyd_open_pipes(sc)) != 0) {
                aprint_error_dev(sc->sc_dev, "could not open pipes\n");
                goto fail;
        }

        if ((error = zyd_read_eeprom(sc)) != 0) {
                aprint_error_dev(sc->sc_dev, "could not read EEPROM\n");
                goto fail;
        }

        if ((error = zyd_rf_attach(sc, sc->rf_rev)) != 0) {
                aprint_error_dev(sc->sc_dev, "could not attach RF\n");
                goto fail;
        }

        if ((error = zyd_hw_init(sc)) != 0) {
                aprint_error_dev(sc->sc_dev,
                    "hardware initialization failed\n");
                goto fail;
        }

        aprint_normal_dev(sc->sc_dev,
            "HMAC ZD1211%s, FW %02x.%02x, RF %s, PA %x, address %s\n",
            (sc->mac_rev == ZYD_ZD1211) ? "": "B",
            sc->fw_rev >> 8, sc->fw_rev & 0xff, zyd_rf_name(sc->rf_rev),
            sc->pa_rev, ether_sprintf(ic->ic_myaddr));

        ic->ic_ifp = ifp;
        ic->ic_phytype = IEEE80211_T_OFDM;        /* not only, but not used */
        ic->ic_opmode = IEEE80211_M_STA;        /* default to BSS mode */
        ic->ic_state = IEEE80211_S_INIT;

        /* set device capabilities */
        ic->ic_caps =
            IEEE80211_C_MONITOR |        /* monitor mode supported */
            IEEE80211_C_TXPMGT |        /* tx power management */
            IEEE80211_C_SHPREAMBLE |        /* short preamble supported */
            IEEE80211_C_WEP;                /* s/w WEP */

        /* set supported .11b and .11g rates */
        ic->ic_sup_rates[IEEE80211_MODE_11B] = ieee80211_std_rateset_11b;
        ic->ic_sup_rates[IEEE80211_MODE_11G] = ieee80211_std_rateset_11g;

        /* set supported .11b and .11g channels (1 through 14) */
        for (i = 1; i <= 14; i++) {
                ic->ic_channels[i].ic_freq =
                    ieee80211_ieee2mhz(i, IEEE80211_CHAN_2GHZ);
                ic->ic_channels[i].ic_flags =
                    IEEE80211_CHAN_CCK | IEEE80211_CHAN_OFDM |
                    IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ;
        }

        if_attach(ifp);
        ieee80211_ifattach(ic);
        ic->ic_node_alloc = zyd_node_alloc;
        ic->ic_newassoc = zyd_newassoc;

        /* override state transition machine */
        sc->sc_newstate = ic->ic_newstate;
        ic->ic_newstate = zyd_newstate;

        /* XXX media locking needs revisiting */
        mutex_init(&sc->sc_media_mtx, MUTEX_DEFAULT, IPL_SOFTUSB);
        ieee80211_media_init_with_lock(ic,
            zyd_media_change, ieee80211_media_status, &sc->sc_media_mtx);

        bpf_attach2(ifp, DLT_IEEE802_11_RADIO,
            sizeof(struct ieee80211_frame) + IEEE80211_RADIOTAP_HDRLEN,
            &sc->sc_drvbpf);

        sc->sc_rxtap_len = sizeof(sc->sc_rxtapu);
        sc->sc_rxtap.wr_ihdr.it_len = htole16(sc->sc_rxtap_len);
        sc->sc_rxtap.wr_ihdr.it_present = htole32(ZYD_RX_RADIOTAP_PRESENT);

        sc->sc_txtap_len = sizeof(sc->sc_txtapu);
        sc->sc_txtap.wt_ihdr.it_len = htole16(sc->sc_txtap_len);
        sc->sc_txtap.wt_ihdr.it_present = htole32(ZYD_TX_RADIOTAP_PRESENT);

        ieee80211_announce(ic);

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

fail:        return error;
}

static int
zyd_detach(device_t self, int flags)
{
        struct zyd_softc *sc = device_private(self);
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;

        if (!sc->attached)
                return 0;

        mutex_enter(&sc->sc_lock);

        zyd_stop(ifp, 1);
        callout_halt(&sc->sc_scan_ch, NULL);
        callout_halt(&sc->sc_amrr_ch, NULL);
        usb_rem_task_wait(sc->sc_udev, &sc->sc_task, USB_TASKQ_DRIVER, NULL);

        /* Abort, etc. done by zyd_stop */
        zyd_close_pipes(sc);

        sc->attached = 0;

        bpf_detach(ifp);
        ieee80211_ifdetach(ic);
        if_detach(ifp);

        mutex_exit(&sc->sc_lock);

        mutex_destroy(&sc->sc_lock);
        cv_destroy(&sc->sc_cmdcv);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return 0;
}

Static int
zyd_open_pipes(struct zyd_softc *sc)
{
        usb_endpoint_descriptor_t *edesc;
        usbd_status error;

        /* interrupt in */
        edesc = usbd_get_endpoint_descriptor(sc->sc_iface, 0x83);
        if (edesc == NULL)
                return EINVAL;

        sc->ibuf_size = UGETW(edesc->wMaxPacketSize);
        if (sc->ibuf_size == 0)        /* should not happen */
                return EINVAL;

        sc->ibuf = kmem_alloc(sc->ibuf_size, KM_SLEEP);

        error = usbd_open_pipe_intr(sc->sc_iface, 0x83, USBD_SHORT_XFER_OK,
            &sc->zyd_ep[ZYD_ENDPT_IIN], sc, sc->ibuf, sc->ibuf_size, zyd_intr,
            USBD_DEFAULT_INTERVAL);
        if (error != 0) {
                printf("%s: open rx intr pipe failed: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
                goto fail;
        }

        /* interrupt out (not necessarily an interrupt pipe) */
        error = usbd_open_pipe(sc->sc_iface, 0x04, USBD_EXCLUSIVE_USE,
            &sc->zyd_ep[ZYD_ENDPT_IOUT]);
        if (error != 0) {
                printf("%s: open tx intr pipe failed: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
                goto fail;
        }

        /* bulk in */
        error = usbd_open_pipe(sc->sc_iface, 0x82, USBD_EXCLUSIVE_USE,
            &sc->zyd_ep[ZYD_ENDPT_BIN]);
        if (error != 0) {
                printf("%s: open rx pipe failed: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
                goto fail;
        }

        /* bulk out */
        error = usbd_open_pipe(sc->sc_iface, 0x01, USBD_EXCLUSIVE_USE,
            &sc->zyd_ep[ZYD_ENDPT_BOUT]);
        if (error != 0) {
                printf("%s: open tx pipe failed: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
                goto fail;
        }

        return 0;

fail:        zyd_close_pipes(sc);
        return error;
}

Static void
zyd_close_pipes(struct zyd_softc *sc)
{
        int i;

        for (i = 0; i < ZYD_ENDPT_CNT; i++) {
                if (sc->zyd_ep[i] != NULL) {
                        usbd_close_pipe(sc->zyd_ep[i]);
                        sc->zyd_ep[i] = NULL;
                }
        }
        if (sc->ibuf != NULL) {
                kmem_free(sc->ibuf, sc->ibuf_size);
                sc->ibuf = NULL;
        }
}

Static int
zyd_alloc_tx_list(struct zyd_softc *sc)
{
        int i, error;

        sc->tx_queued = 0;

        for (i = 0; i < ZYD_TX_LIST_CNT; i++) {
                struct zyd_tx_data *data = &sc->tx_data[i];

                data->sc = sc;        /* backpointer for callbacks */

                error = usbd_create_xfer(sc->zyd_ep[ZYD_ENDPT_BOUT],
                    ZYD_MAX_TXBUFSZ, USBD_FORCE_SHORT_XFER, 0, &data->xfer);
                if (error) {
                        printf("%s: could not allocate tx xfer\n",
                            device_xname(sc->sc_dev));
                        goto fail;
                }
                data->buf = usbd_get_buffer(data->xfer);

                /* clear Tx descriptor */
                memset(data->buf, 0, sizeof(struct zyd_tx_desc));
        }
        return 0;

fail:        zyd_free_tx_list(sc);
        return error;
}

Static void
zyd_free_tx_list(struct zyd_softc *sc)
{
        int i;

        for (i = 0; i < ZYD_TX_LIST_CNT; i++) {
                struct zyd_tx_data *data = &sc->tx_data[i];

                if (data->xfer != NULL) {
                        usbd_destroy_xfer(data->xfer);
                        data->xfer = NULL;
                }
                if (data->ni != NULL) {
                        ieee80211_free_node(data->ni);
                        data->ni = NULL;
                }
        }
}

Static int
zyd_alloc_rx_list(struct zyd_softc *sc)
{
        int i, error;

        for (i = 0; i < ZYD_RX_LIST_CNT; i++) {
                struct zyd_rx_data *data = &sc->rx_data[i];

                data->sc = sc;        /* backpointer for callbacks */

                error = usbd_create_xfer(sc->zyd_ep[ZYD_ENDPT_BIN],
                    ZYX_MAX_RXBUFSZ, 0, 0, &data->xfer);
                if (error) {
                        printf("%s: could not allocate rx xfer\n",
                            device_xname(sc->sc_dev));
                        goto fail;
                }
                data->buf = usbd_get_buffer(data->xfer);
        }
        return 0;

fail:        zyd_free_rx_list(sc);
        return error;
}

Static void
zyd_free_rx_list(struct zyd_softc *sc)
{
        int i;

        for (i = 0; i < ZYD_RX_LIST_CNT; i++) {
                struct zyd_rx_data *data = &sc->rx_data[i];

                if (data->xfer != NULL) {
                        usbd_destroy_xfer(data->xfer);
                        data->xfer = NULL;
                }
        }
}

/* ARGUSED */
Static struct ieee80211_node *
zyd_node_alloc(struct ieee80211_node_table *nt __unused)
{
        struct zyd_node *zn;

        zn = malloc(sizeof(struct zyd_node), M_80211_NODE, M_NOWAIT | M_ZERO);
        return zn ? &zn->ni : NULL;
}

Static int
zyd_media_change(struct ifnet *ifp)
{
        int error;

        error = ieee80211_media_change(ifp);
        if (error != ENETRESET)
                return error;

        if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
                zyd_init(ifp);

        return 0;
}

/*
 * This function is called periodically (every 200ms) during scanning to
 * switch from one channel to another.
 */
Static void
zyd_next_scan(void *arg)
{
        struct zyd_softc *sc = arg;
        struct ieee80211com *ic = &sc->sc_ic;

        if (ic->ic_state == IEEE80211_S_SCAN)
                ieee80211_next_scan(ic);
}

Static void
zyd_task(void *arg)
{
        struct zyd_softc *sc = arg;
        struct ieee80211com *ic = &sc->sc_ic;
        enum ieee80211_state ostate;

        ostate = ic->ic_state;

        switch (sc->sc_state) {
        case IEEE80211_S_INIT:
                if (ostate == IEEE80211_S_RUN) {
                        /* turn link LED off */
                        zyd_set_led(sc, ZYD_LED1, 0);

                        /* stop data LED from blinking */
                        zyd_write32(sc, sc->fwbase + ZYD_FW_LINK_STATUS, 0);
                }
                break;

        case IEEE80211_S_SCAN:
                zyd_set_chan(sc, ic->ic_curchan);
                callout_reset(&sc->sc_scan_ch, hz / 5, zyd_next_scan, sc);
                break;

        case IEEE80211_S_AUTH:
        case IEEE80211_S_ASSOC:
                zyd_set_chan(sc, ic->ic_curchan);
                break;

        case IEEE80211_S_RUN:
        {
                struct ieee80211_node *ni = ic->ic_bss;

                zyd_set_chan(sc, ic->ic_curchan);

                if (ic->ic_opmode != IEEE80211_M_MONITOR) {
                        /* turn link LED on */
                        zyd_set_led(sc, ZYD_LED1, 1);

                        /* make data LED blink upon Tx */
                        zyd_write32(sc, sc->fwbase + ZYD_FW_LINK_STATUS, 1);

                        zyd_set_bssid(sc, ni->ni_bssid);
                }

                if (ic->ic_opmode == IEEE80211_M_STA) {
                        /* fake a join to init the tx rate */
                        zyd_newassoc(ni, 1);
                }

                /* start automatic rate control timer */
                if (ic->ic_fixed_rate == IEEE80211_FIXED_RATE_NONE)
                        callout_reset(&sc->sc_amrr_ch, hz, zyd_amrr_timeout, sc);

                break;
        }
        }

        sc->sc_newstate(ic, sc->sc_state, -1);
}

Static int
zyd_newstate(struct ieee80211com *ic, enum ieee80211_state nstate, int arg)
{
        struct zyd_softc *sc = ic->ic_ifp->if_softc;

        if (!sc->attached)
                return ENXIO;

        /*
         * XXXSMP: This does not wait for the task, if it is in flight,
         * to complete.  If this code works at all, it must rely on the
         * kernel lock to serialize with the USB task thread.
         */
        usb_rem_task(sc->sc_udev, &sc->sc_task);
        callout_stop(&sc->sc_scan_ch);
        callout_stop(&sc->sc_amrr_ch);

        /* do it in a process context */
        sc->sc_state = nstate;
        usb_add_task(sc->sc_udev, &sc->sc_task, USB_TASKQ_DRIVER);

        return 0;
}

Static int
zyd_cmd(struct zyd_softc *sc, uint16_t code, const void *idata, int ilen,
    void *odata, int olen, u_int flags)
{
        struct usbd_xfer *xfer;
        struct zyd_cmd cmd;
        struct rq rq;
        uint16_t xferflags;
        int error;
        usbd_status uerror;

        error = usbd_create_xfer(sc->zyd_ep[ZYD_ENDPT_IOUT],
            sizeof(uint16_t) + ilen, USBD_FORCE_SHORT_XFER, 0, &xfer);
        if (error)
                return error;

        cmd.code = htole16(code);
        memcpy(cmd.data, idata, ilen);

        xferflags = USBD_FORCE_SHORT_XFER;
        if (!(flags & ZYD_CMD_FLAG_READ))
                xferflags |= USBD_SYNCHRONOUS;
        else {
                rq.idata = idata;
                rq.odata = odata;
                rq.len = olen / sizeof(struct zyd_pair);
                mutex_enter(&sc->sc_lock);
                SIMPLEQ_INSERT_TAIL(&sc->sc_rqh, &rq, rq);
                mutex_exit(&sc->sc_lock);
        }

        usbd_setup_xfer(xfer, 0, &cmd, sizeof(uint16_t) + ilen, xferflags,
            ZYD_INTR_TIMEOUT, NULL);
        uerror = usbd_transfer(xfer);
        if (uerror != USBD_IN_PROGRESS && uerror != 0) {
                printf("%s: could not send command (error=%s)\n",
                    device_xname(sc->sc_dev), usbd_errstr(uerror));
                (void)usbd_destroy_xfer(xfer);
                return EIO;
        }
        if (!(flags & ZYD_CMD_FLAG_READ)) {
                (void)usbd_destroy_xfer(xfer);
                return 0;        /* write: don't wait for reply */
        }
        /* wait at most one second for command reply */
        mutex_enter(&sc->sc_lock);
        error = cv_timedwait_sig(&sc->sc_cmdcv, &sc->sc_lock, hz);
        if (error == EWOULDBLOCK)
                printf("%s: zyd_read sleep timeout\n", device_xname(sc->sc_dev));
        SIMPLEQ_REMOVE(&sc->sc_rqh, &rq, rq, rq);
        mutex_exit(&sc->sc_lock);

        (void)usbd_destroy_xfer(xfer);
        return error;
}

Static int
zyd_read16(struct zyd_softc *sc, uint16_t reg, uint16_t *val)
{
        struct zyd_pair tmp;
        int error;

        reg = htole16(reg);
        error = zyd_cmd(sc, ZYD_CMD_IORD, &reg, sizeof(reg), &tmp, sizeof(tmp),
            ZYD_CMD_FLAG_READ);
        if (error == 0)
                *val = le16toh(tmp.val);
        else
                *val = 0;
        return error;
}

Static int
zyd_read32(struct zyd_softc *sc, uint16_t reg, uint32_t *val)
{
        struct zyd_pair tmp[2];
        uint16_t regs[2];
        int error;

        regs[0] = htole16(ZYD_REG32_HI(reg));
        regs[1] = htole16(ZYD_REG32_LO(reg));
        error = zyd_cmd(sc, ZYD_CMD_IORD, regs, sizeof(regs), tmp, sizeof(tmp),
            ZYD_CMD_FLAG_READ);
        if (error == 0)
                *val = le16toh(tmp[0].val) << 16 | le16toh(tmp[1].val);
        else
                *val = 0;
        return error;
}

Static int
zyd_write16(struct zyd_softc *sc, uint16_t reg, uint16_t val)
{
        struct zyd_pair pair;

        pair.reg = htole16(reg);
        pair.val = htole16(val);

        return zyd_cmd(sc, ZYD_CMD_IOWR, &pair, sizeof(pair), NULL, 0, 0);
}

Static int
zyd_write32(struct zyd_softc *sc, uint16_t reg, uint32_t val)
{
        struct zyd_pair pair[2];

        pair[0].reg = htole16(ZYD_REG32_HI(reg));
        pair[0].val = htole16(val >> 16);
        pair[1].reg = htole16(ZYD_REG32_LO(reg));
        pair[1].val = htole16(val & 0xffff);

        return zyd_cmd(sc, ZYD_CMD_IOWR, pair, sizeof(pair), NULL, 0, 0);
}

Static int
zyd_rfwrite(struct zyd_softc *sc, uint32_t val)
{
        struct zyd_rf *rf = &sc->sc_rf;
        struct zyd_rfwrite req;
        uint16_t cr203;
        int i;

        (void)zyd_read16(sc, ZYD_CR203, &cr203);
        cr203 &= ~(ZYD_RF_IF_LE | ZYD_RF_CLK | ZYD_RF_DATA);

        req.code  = htole16(2);
        req.width = htole16(rf->width);
        for (i = 0; i < rf->width; i++) {
                req.bit[i] = htole16(cr203);
                if (val & (1 << (rf->width - 1 - i)))
                        req.bit[i] |= htole16(ZYD_RF_DATA);
        }
        return zyd_cmd(sc, ZYD_CMD_RFCFG, &req, 4 + 2 * rf->width, NULL, 0, 0);
}

Static void
zyd_lock_phy(struct zyd_softc *sc)
{
        uint32_t tmp;

        (void)zyd_read32(sc, ZYD_MAC_MISC, &tmp);
        tmp &= ~ZYD_UNLOCK_PHY_REGS;
        (void)zyd_write32(sc, ZYD_MAC_MISC, tmp);
}

Static void
zyd_unlock_phy(struct zyd_softc *sc)
{
        uint32_t tmp;

        (void)zyd_read32(sc, ZYD_MAC_MISC, &tmp);
        tmp |= ZYD_UNLOCK_PHY_REGS;
        (void)zyd_write32(sc, ZYD_MAC_MISC, tmp);
}

/*
 * RFMD RF methods.
 */
Static int
zyd_rfmd_init(struct zyd_rf *rf)
{
        struct zyd_softc *sc = rf->rf_sc;
        static const struct zyd_phy_pair phyini[] = ZYD_RFMD_PHY;
        static const uint32_t rfini[] = ZYD_RFMD_RF;
        int error;
        size_t i;

        /* init RF-dependent PHY registers */
        for (i = 0; i < __arraycount(phyini); i++) {
                error = zyd_write16(sc, phyini[i].reg, phyini[i].val);
                if (error != 0)
                        return error;
        }

        /* init RFMD radio */
        for (i = 0; i < __arraycount(rfini); i++) {
                if ((error = zyd_rfwrite(sc, rfini[i])) != 0)
                        return error;
        }
        return 0;
}

Static int
zyd_rfmd_switch_radio(struct zyd_rf *rf, int on)
{
        struct zyd_softc *sc = rf->rf_sc;

        (void)zyd_write16(sc, ZYD_CR10, on ? 0x89 : 0x15);
        (void)zyd_write16(sc, ZYD_CR11, on ? 0x00 : 0x81);

        return 0;
}

Static int
zyd_rfmd_set_channel(struct zyd_rf *rf, uint8_t chan)
{
        struct zyd_softc *sc = rf->rf_sc;
        static const struct {
                uint32_t        r1, r2;
        } rfprog[] = ZYD_RFMD_CHANTABLE;

        (void)zyd_rfwrite(sc, rfprog[chan - 1].r1);
        (void)zyd_rfwrite(sc, rfprog[chan - 1].r2);

        return 0;
}

/*
 * AL2230 RF methods.
 */
Static int
zyd_al2230_init(struct zyd_rf *rf)
{
        struct zyd_softc *sc = rf->rf_sc;
        static const struct zyd_phy_pair phyini[] = ZYD_AL2230_PHY;
        static const struct zyd_phy_pair phy2230s[] = ZYD_AL2230S_PHY_INIT;
        static const uint32_t rfini[] = ZYD_AL2230_RF;
        int error;
        size_t i;

        /* init RF-dependent PHY registers */
        for (i = 0; i < __arraycount(phyini); i++) {
                error = zyd_write16(sc, phyini[i].reg, phyini[i].val);
                if (error != 0)
                        return error;
        }

        if (sc->rf_rev == ZYD_RF_AL2230S) {
                for (i = 0; i < __arraycount(phy2230s); i++) {
                        error = zyd_write16(sc, phy2230s[i].reg,
                            phy2230s[i].val);
                        if (error != 0)
                                return error;
                }
        }

        /* init AL2230 radio */
        for (i = 0; i < __arraycount(rfini); i++) {
                if ((error = zyd_rfwrite(sc, rfini[i])) != 0)
                        return error;
        }
        return 0;
}

Static int
zyd_al2230_init_b(struct zyd_rf *rf)
{
        struct zyd_softc *sc = rf->rf_sc;
        static const struct zyd_phy_pair phyini[] = ZYD_AL2230_PHY_B;
        static const uint32_t rfini[] = ZYD_AL2230_RF_B;
        int error;
        size_t i;

        /* init RF-dependent PHY registers */
        for (i = 0; i < __arraycount(phyini); i++) {
                error = zyd_write16(sc, phyini[i].reg, phyini[i].val);
                if (error != 0)
                        return error;
        }

        /* init AL2230 radio */
        for (i = 0; i < __arraycount(rfini); i++) {
                if ((error = zyd_rfwrite(sc, rfini[i])) != 0)
                        return error;
        }
        return 0;
}

Static int
zyd_al2230_switch_radio(struct zyd_rf *rf, int on)
{
        struct zyd_softc *sc = rf->rf_sc;
        int on251 = (sc->mac_rev == ZYD_ZD1211) ? 0x3f : 0x7f;

        (void)zyd_write16(sc, ZYD_CR11,  on ? 0x00 : 0x04);
        (void)zyd_write16(sc, ZYD_CR251, on ? on251 : 0x2f);

        return 0;
}

Static int
zyd_al2230_set_channel(struct zyd_rf *rf, uint8_t chan)
{
        struct zyd_softc *sc = rf->rf_sc;
        static const struct {
                uint32_t        r1, r2, r3;
        } rfprog[] = ZYD_AL2230_CHANTABLE;

        (void)zyd_rfwrite(sc, rfprog[chan - 1].r1);
        (void)zyd_rfwrite(sc, rfprog[chan - 1].r2);
        (void)zyd_rfwrite(sc, rfprog[chan - 1].r3);

        (void)zyd_write16(sc, ZYD_CR138, 0x28);
        (void)zyd_write16(sc, ZYD_CR203, 0x06);

        return 0;
}

/*
 * AL7230B RF methods.
 */
Static int
zyd_al7230B_init(struct zyd_rf *rf)
{
        struct zyd_softc *sc = rf->rf_sc;
        static const struct zyd_phy_pair phyini_1[] = ZYD_AL7230B_PHY_1;
        static const struct zyd_phy_pair phyini_2[] = ZYD_AL7230B_PHY_2;
        static const struct zyd_phy_pair phyini_3[] = ZYD_AL7230B_PHY_3;
        static const uint32_t rfini_1[] = ZYD_AL7230B_RF_1;
        static const uint32_t rfini_2[] = ZYD_AL7230B_RF_2;
        int error;
        size_t i;

        /* for AL7230B, PHY and RF need to be initialized in "phases" */

        /* init RF-dependent PHY registers, part one */
        for (i = 0; i < __arraycount(phyini_1); i++) {
                error = zyd_write16(sc, phyini_1[i].reg, phyini_1[i].val);
                if (error != 0)
                        return error;
        }
        /* init AL7230B radio, part one */
        for (i = 0; i < __arraycount(rfini_1); i++) {
                if ((error = zyd_rfwrite(sc, rfini_1[i])) != 0)
                        return error;
        }
        /* init RF-dependent PHY registers, part two */
        for (i = 0; i < __arraycount(phyini_2); i++) {
                error = zyd_write16(sc, phyini_2[i].reg, phyini_2[i].val);
                if (error != 0)
                        return error;
        }
        /* init AL7230B radio, part two */
        for (i = 0; i < __arraycount(rfini_2); i++) {
                if ((error = zyd_rfwrite(sc, rfini_2[i])) != 0)
                        return error;
        }
        /* init RF-dependent PHY registers, part three */
        for (i = 0; i < __arraycount(phyini_3); i++) {
                error = zyd_write16(sc, phyini_3[i].reg, phyini_3[i].val);
                if (error != 0)
                        return error;
        }

        return 0;
}

Static int
zyd_al7230B_switch_radio(struct zyd_rf *rf, int on)
{
        struct zyd_softc *sc = rf->rf_sc;

        (void)zyd_write16(sc, ZYD_CR11,  on ? 0x00 : 0x04);
        (void)zyd_write16(sc, ZYD_CR251, on ? 0x3f : 0x2f);

        return 0;
}

Static int
zyd_al7230B_set_channel(struct zyd_rf *rf, uint8_t chan)
{
        struct zyd_softc *sc = rf->rf_sc;
        static const struct {
                uint32_t        r1, r2;
        } rfprog[] = ZYD_AL7230B_CHANTABLE;
        static const uint32_t rfsc[] = ZYD_AL7230B_RF_SETCHANNEL;
        int error;
        size_t i;

        (void)zyd_write16(sc, ZYD_CR240, 0x57);
        (void)zyd_write16(sc, ZYD_CR251, 0x2f);

        for (i = 0; i < __arraycount(rfsc); i++) {
                if ((error = zyd_rfwrite(sc, rfsc[i])) != 0)
                        return error;
        }

        (void)zyd_write16(sc, ZYD_CR128, 0x14);
        (void)zyd_write16(sc, ZYD_CR129, 0x12);
        (void)zyd_write16(sc, ZYD_CR130, 0x10);
        (void)zyd_write16(sc, ZYD_CR38,  0x38);
        (void)zyd_write16(sc, ZYD_CR136, 0xdf);

        (void)zyd_rfwrite(sc, rfprog[chan - 1].r1);
        (void)zyd_rfwrite(sc, rfprog[chan - 1].r2);
        (void)zyd_rfwrite(sc, 0x3c9000);

        (void)zyd_write16(sc, ZYD_CR251, 0x3f);
        (void)zyd_write16(sc, ZYD_CR203, 0x06);
        (void)zyd_write16(sc, ZYD_CR240, 0x08);

        return 0;
}

/*
 * AL2210 RF methods.
 */
Static int
zyd_al2210_init(struct zyd_rf *rf)
{
        struct zyd_softc *sc = rf->rf_sc;
        static const struct zyd_phy_pair phyini[] = ZYD_AL2210_PHY;
        static const uint32_t rfini[] = ZYD_AL2210_RF;
        uint32_t tmp;
        int error;
        size_t i;

        (void)zyd_write32(sc, ZYD_CR18, 2);

        /* init RF-dependent PHY registers */
        for (i = 0; i < __arraycount(phyini); i++) {
                error = zyd_write16(sc, phyini[i].reg, phyini[i].val);
                if (error != 0)
                        return error;
        }
        /* init AL2210 radio */
        for (i = 0; i < __arraycount(rfini); i++) {
                if ((error = zyd_rfwrite(sc, rfini[i])) != 0)
                        return error;
        }
        (void)zyd_write16(sc, ZYD_CR47, 0x1e);
        (void)zyd_read32(sc, ZYD_CR_RADIO_PD, &tmp);
        (void)zyd_write32(sc, ZYD_CR_RADIO_PD, tmp & ~1);
        (void)zyd_write32(sc, ZYD_CR_RADIO_PD, tmp | 1);
        (void)zyd_write32(sc, ZYD_CR_RFCFG, 0x05);
        (void)zyd_write32(sc, ZYD_CR_RFCFG, 0x00);
        (void)zyd_write16(sc, ZYD_CR47, 0x1e);
        (void)zyd_write32(sc, ZYD_CR18, 3);

        return 0;
}

Static int
zyd_al2210_switch_radio(struct zyd_rf *rf, int on)
{
        /* vendor driver does nothing for this RF chip */

        return 0;
}

Static int
zyd_al2210_set_channel(struct zyd_rf *rf, uint8_t chan)
{
        struct zyd_softc *sc = rf->rf_sc;
        static const uint32_t rfprog[] = ZYD_AL2210_CHANTABLE;
        uint32_t tmp;

        (void)zyd_write32(sc, ZYD_CR18, 2);
        (void)zyd_write16(sc, ZYD_CR47, 0x1e);
        (void)zyd_read32(sc, ZYD_CR_RADIO_PD, &tmp);
        (void)zyd_write32(sc, ZYD_CR_RADIO_PD, tmp & ~1);
        (void)zyd_write32(sc, ZYD_CR_RADIO_PD, tmp | 1);
        (void)zyd_write32(sc, ZYD_CR_RFCFG, 0x05);

        (void)zyd_write32(sc, ZYD_CR_RFCFG, 0x00);
        (void)zyd_write16(sc, ZYD_CR47, 0x1e);

        /* actually set the channel */
        (void)zyd_rfwrite(sc, rfprog[chan - 1]);

        (void)zyd_write32(sc, ZYD_CR18, 3);

        return 0;
}

/*
 * GCT RF methods.
 */
Static int
zyd_gct_init(struct zyd_rf *rf)
{
        struct zyd_softc *sc = rf->rf_sc;
        static const struct zyd_phy_pair phyini[] = ZYD_GCT_PHY;
        static const uint32_t rfini[] = ZYD_GCT_RF;
        int error;
        size_t i;

        /* init RF-dependent PHY registers */
        for (i = 0; i < __arraycount(phyini); i++) {
                error = zyd_write16(sc, phyini[i].reg, phyini[i].val);
                if (error != 0)
                        return error;
        }
        /* init cgt radio */
        for (i = 0; i < __arraycount(rfini); i++) {
                if ((error = zyd_rfwrite(sc, rfini[i])) != 0)
                        return error;
        }
        return 0;
}

Static int
zyd_gct_switch_radio(struct zyd_rf *rf, int on)
{
        /* vendor driver does nothing for this RF chip */

        return 0;
}

Static int
zyd_gct_set_channel(struct zyd_rf *rf, uint8_t chan)
{
        struct zyd_softc *sc = rf->rf_sc;
        static const uint32_t rfprog[] = ZYD_GCT_CHANTABLE;

        (void)zyd_rfwrite(sc, 0x1c0000);
        (void)zyd_rfwrite(sc, rfprog[chan - 1]);
        (void)zyd_rfwrite(sc, 0x1c0008);

        return 0;
}

/*
 * Maxim RF methods.
 */
Static int
zyd_maxim_init(struct zyd_rf *rf)
{
        struct zyd_softc *sc = rf->rf_sc;
        static const struct zyd_phy_pair phyini[] = ZYD_MAXIM_PHY;
        static const uint32_t rfini[] = ZYD_MAXIM_RF;
        uint16_t tmp;
        int error;
        size_t i;

        /* init RF-dependent PHY registers */
        for (i = 0; i < __arraycount(phyini); i++) {
                error = zyd_write16(sc, phyini[i].reg, phyini[i].val);
                if (error != 0)
                        return error;
        }
        (void)zyd_read16(sc, ZYD_CR203, &tmp);
        (void)zyd_write16(sc, ZYD_CR203, tmp & ~(1 << 4));

        /* init maxim radio */
        for (i = 0; i < __arraycount(rfini); i++) {
                if ((error = zyd_rfwrite(sc, rfini[i])) != 0)
                        return error;
        }
        (void)zyd_read16(sc, ZYD_CR203, &tmp);
        (void)zyd_write16(sc, ZYD_CR203, tmp | (1 << 4));

        return 0;
}

Static int
zyd_maxim_switch_radio(struct zyd_rf *rf, int on)
{
        /* vendor driver does nothing for this RF chip */

        return 0;
}

Static int
zyd_maxim_set_channel(struct zyd_rf *rf, uint8_t chan)
{
        struct zyd_softc *sc = rf->rf_sc;
        static const struct zyd_phy_pair phyini[] = ZYD_MAXIM_PHY;
        static const uint32_t rfini[] = ZYD_MAXIM_RF;
        static const struct {
                uint32_t        r1, r2;
        } rfprog[] = ZYD_MAXIM_CHANTABLE;
        uint16_t tmp;
        int error;
        size_t i;

        /*
         * Do the same as we do when initializing it, except for the channel
         * values coming from the two channel tables.
         */

        /* init RF-dependent PHY registers */
        for (i = 0; i < __arraycount(phyini); i++) {
                error = zyd_write16(sc, phyini[i].reg, phyini[i].val);
                if (error != 0)
                        return error;
        }
        (void)zyd_read16(sc, ZYD_CR203, &tmp);
        (void)zyd_write16(sc, ZYD_CR203, tmp & ~(1 << 4));

        /* first two values taken from the chantables */
        (void)zyd_rfwrite(sc, rfprog[chan - 1].r1);
        (void)zyd_rfwrite(sc, rfprog[chan - 1].r2);

        /* init maxim radio - skipping the two first values */
        for (i = 2; i < __arraycount(rfini); i++) {
                if ((error = zyd_rfwrite(sc, rfini[i])) != 0)
                        return error;
        }
        (void)zyd_read16(sc, ZYD_CR203, &tmp);
        (void)zyd_write16(sc, ZYD_CR203, tmp | (1 << 4));

        return 0;
}

/*
 * Maxim2 RF methods.
 */
Static int
zyd_maxim2_init(struct zyd_rf *rf)
{
        struct zyd_softc *sc = rf->rf_sc;
        static const struct zyd_phy_pair phyini[] = ZYD_MAXIM2_PHY;
        static const uint32_t rfini[] = ZYD_MAXIM2_RF;
        uint16_t tmp;
        int error;
        size_t i;

        /* init RF-dependent PHY registers */
        for (i = 0; i < __arraycount(phyini); i++) {
                error = zyd_write16(sc, phyini[i].reg, phyini[i].val);
                if (error != 0)
                        return error;
        }
        (void)zyd_read16(sc, ZYD_CR203, &tmp);
        (void)zyd_write16(sc, ZYD_CR203, tmp & ~(1 << 4));

        /* init maxim2 radio */
        for (i = 0; i < __arraycount(rfini); i++) {
                if ((error = zyd_rfwrite(sc, rfini[i])) != 0)
                        return error;
        }
        (void)zyd_read16(sc, ZYD_CR203, &tmp);
        (void)zyd_write16(sc, ZYD_CR203, tmp | (1 << 4));

        return 0;
}

Static int
zyd_maxim2_switch_radio(struct zyd_rf *rf, int on)
{
        /* vendor driver does nothing for this RF chip */

        return 0;
}

Static int
zyd_maxim2_set_channel(struct zyd_rf *rf, uint8_t chan)
{
        struct zyd_softc *sc = rf->rf_sc;
        static const struct zyd_phy_pair phyini[] = ZYD_MAXIM2_PHY;
        static const uint32_t rfini[] = ZYD_MAXIM2_RF;
        static const struct {
                uint32_t        r1, r2;
        } rfprog[] = ZYD_MAXIM2_CHANTABLE;
        uint16_t tmp;
        int error;
        size_t i;

        /*
         * Do the same as we do when initializing it, except for the channel
         * values coming from the two channel tables.
         */

        /* init RF-dependent PHY registers */
        for (i = 0; i < __arraycount(phyini); i++) {
                error = zyd_write16(sc, phyini[i].reg, phyini[i].val);
                if (error != 0)
                        return error;
        }
        (void)zyd_read16(sc, ZYD_CR203, &tmp);
        (void)zyd_write16(sc, ZYD_CR203, tmp & ~(1 << 4));

        /* first two values taken from the chantables */
        (void)zyd_rfwrite(sc, rfprog[chan - 1].r1);
        (void)zyd_rfwrite(sc, rfprog[chan - 1].r2);

        /* init maxim2 radio - skipping the two first values */
        for (i = 2; i < __arraycount(rfini); i++) {
                if ((error = zyd_rfwrite(sc, rfini[i])) != 0)
                        return error;
        }
        (void)zyd_read16(sc, ZYD_CR203, &tmp);
        (void)zyd_write16(sc, ZYD_CR203, tmp | (1 << 4));

        return 0;
}

Static int
zyd_rf_attach(struct zyd_softc *sc, uint8_t type)
{
        struct zyd_rf *rf = &sc->sc_rf;

        rf->rf_sc = sc;

        switch (type) {
        case ZYD_RF_RFMD:
                rf->init         = zyd_rfmd_init;
                rf->switch_radio = zyd_rfmd_switch_radio;
                rf->set_channel  = zyd_rfmd_set_channel;
                rf->width        = 24;        /* 24-bit RF values */
                break;
        case ZYD_RF_AL2230:
        case ZYD_RF_AL2230S:
                if (sc->mac_rev == ZYD_ZD1211B)
                        rf->init = zyd_al2230_init_b;
                else
                        rf->init = zyd_al2230_init;
                rf->switch_radio = zyd_al2230_switch_radio;
                rf->set_channel  = zyd_al2230_set_channel;
                rf->width        = 24;        /* 24-bit RF values */
                break;
        case ZYD_RF_AL7230B:
                rf->init         = zyd_al7230B_init;
                rf->switch_radio = zyd_al7230B_switch_radio;
                rf->set_channel  = zyd_al7230B_set_channel;
                rf->width        = 24;        /* 24-bit RF values */
                break;
        case ZYD_RF_AL2210:
                rf->init         = zyd_al2210_init;
                rf->switch_radio = zyd_al2210_switch_radio;
                rf->set_channel  = zyd_al2210_set_channel;
                rf->width        = 24;        /* 24-bit RF values */
                break;
        case ZYD_RF_GCT:
                rf->init         = zyd_gct_init;
                rf->switch_radio = zyd_gct_switch_radio;
                rf->set_channel  = zyd_gct_set_channel;
                rf->width        = 21;        /* 21-bit RF values */
                break;
        case ZYD_RF_MAXIM_NEW:
                rf->init         = zyd_maxim_init;
                rf->switch_radio = zyd_maxim_switch_radio;
                rf->set_channel  = zyd_maxim_set_channel;
                rf->width        = 18;        /* 18-bit RF values */
                break;
        case ZYD_RF_MAXIM_NEW2:
                rf->init         = zyd_maxim2_init;
                rf->switch_radio = zyd_maxim2_switch_radio;
                rf->set_channel  = zyd_maxim2_set_channel;
                rf->width        = 18;        /* 18-bit RF values */
                break;
        default:
                printf("%s: sorry, radio \"%s\" is not supported yet\n",
                    device_xname(sc->sc_dev), zyd_rf_name(type));
                return EINVAL;
        }
        return 0;
}

Static const char *
zyd_rf_name(uint8_t type)
{
        static const char * const zyd_rfs[] = {
                "unknown", "unknown", "UW2451",   "UCHIP",     "AL2230",
                "AL7230B", "THETA",   "AL2210",   "MAXIM_NEW", "GCT",
                "AL2230S", "RALINK",  "INTERSIL", "RFMD",      "MAXIM_NEW2",
                "PHILIPS"
        };

        return zyd_rfs[(type > 15) ? 0 : type];
}

Static int
zyd_hw_init(struct zyd_softc *sc)
{
        struct zyd_rf *rf = &sc->sc_rf;
        const struct zyd_phy_pair *phyp;
        int error;

        /* specify that the plug and play is finished */
        (void)zyd_write32(sc, ZYD_MAC_AFTER_PNP, 1);

        (void)zyd_read16(sc, ZYD_FIRMWARE_BASE_ADDR, &sc->fwbase);
        DPRINTF(("firmware base address=0x%04x\n", sc->fwbase));

        /* retrieve firmware revision number */
        (void)zyd_read16(sc, sc->fwbase + ZYD_FW_FIRMWARE_REV, &sc->fw_rev);

        (void)zyd_write32(sc, ZYD_CR_GPI_EN, 0);
        (void)zyd_write32(sc, ZYD_MAC_CONT_WIN_LIMIT, 0x7f043f);

        /* disable interrupts */
        (void)zyd_write32(sc, ZYD_CR_INTERRUPT, 0);

        /* PHY init */
        zyd_lock_phy(sc);
        phyp = (sc->mac_rev == ZYD_ZD1211B) ? zyd_def_phyB : zyd_def_phy;
        for (; phyp->reg != 0; phyp++) {
                if ((error = zyd_write16(sc, phyp->reg, phyp->val)) != 0)
                        goto fail;
        }
        zyd_unlock_phy(sc);

        /* HMAC init */
        zyd_write32(sc, ZYD_MAC_ACK_EXT, 0x00000020);
        zyd_write32(sc, ZYD_CR_ADDA_MBIAS_WT, 0x30000808);

        if (sc->mac_rev == ZYD_ZD1211) {
                zyd_write32(sc, ZYD_MAC_RETRY, 0x00000002);
        } else {
                zyd_write32(sc, ZYD_MAC_RETRY, 0x02020202);
                zyd_write32(sc, ZYD_MACB_TXPWR_CTL4, 0x007f003f);
                zyd_write32(sc, ZYD_MACB_TXPWR_CTL3, 0x007f003f);
                zyd_write32(sc, ZYD_MACB_TXPWR_CTL2, 0x003f001f);
                zyd_write32(sc, ZYD_MACB_TXPWR_CTL1, 0x001f000f);
                zyd_write32(sc, ZYD_MACB_AIFS_CTL1, 0x00280028);
                zyd_write32(sc, ZYD_MACB_AIFS_CTL2, 0x008C003C);
                zyd_write32(sc, ZYD_MACB_TXOP, 0x01800824);
        }

        zyd_write32(sc, ZYD_MAC_SNIFFER, 0x00000000);
        zyd_write32(sc, ZYD_MAC_RXFILTER, 0x00000000);
        zyd_write32(sc, ZYD_MAC_GHTBL, 0x00000000);
        zyd_write32(sc, ZYD_MAC_GHTBH, 0x80000000);
        zyd_write32(sc, ZYD_MAC_MISC, 0x000000a4);
        zyd_write32(sc, ZYD_CR_ADDA_PWR_DWN, 0x0000007f);
        zyd_write32(sc, ZYD_MAC_BCNCFG, 0x00f00401);
        zyd_write32(sc, ZYD_MAC_PHY_DELAY2, 0x00000000);
        zyd_write32(sc, ZYD_MAC_ACK_EXT, 0x00000080);
        zyd_write32(sc, ZYD_CR_ADDA_PWR_DWN, 0x00000000);
        zyd_write32(sc, ZYD_MAC_SIFS_ACK_TIME, 0x00000100);
        zyd_write32(sc, ZYD_MAC_DIFS_EIFS_SIFS, 0x0547c032);
        zyd_write32(sc, ZYD_CR_RX_PE_DELAY, 0x00000070);
        zyd_write32(sc, ZYD_CR_PS_CTRL, 0x10000000);
        zyd_write32(sc, ZYD_MAC_RTSCTSRATE, 0x02030203);
        zyd_write32(sc, ZYD_MAC_RX_THRESHOLD, 0x000c0640);
        zyd_write32(sc, ZYD_MAC_BACKOFF_PROTECT, 0x00000114);

        /* RF chip init */
        zyd_lock_phy(sc);
        error = (*rf->init)(rf);
        zyd_unlock_phy(sc);
        if (error != 0) {
                printf("%s: radio initialization failed\n",
                    device_xname(sc->sc_dev));
                goto fail;
        }

        /* init beacon interval to 100ms */
        if ((error = zyd_set_beacon_interval(sc, 100)) != 0)
                goto fail;

fail:        return error;
}

Static int
zyd_read_eeprom(struct zyd_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        uint32_t tmp;
        uint16_t val;
        int i;

        /* read MAC address */
        (void)zyd_read32(sc, ZYD_EEPROM_MAC_ADDR_P1, &tmp);
        ic->ic_myaddr[0] = tmp & 0xff;
        ic->ic_myaddr[1] = tmp >>  8;
        ic->ic_myaddr[2] = tmp >> 16;
        ic->ic_myaddr[3] = tmp >> 24;
        (void)zyd_read32(sc, ZYD_EEPROM_MAC_ADDR_P2, &tmp);
        ic->ic_myaddr[4] = tmp & 0xff;
        ic->ic_myaddr[5] = tmp >>  8;

        (void)zyd_read32(sc, ZYD_EEPROM_POD, &tmp);
        sc->rf_rev = tmp & 0x0f;
        sc->pa_rev = (tmp >> 16) & 0x0f;

        /* read regulatory domain (currently unused) */
        (void)zyd_read32(sc, ZYD_EEPROM_SUBID, &tmp);
        sc->regdomain = tmp >> 16;
        DPRINTF(("regulatory domain %x\n", sc->regdomain));

        /* read Tx power calibration tables */
        for (i = 0; i < 7; i++) {
                (void)zyd_read16(sc, ZYD_EEPROM_PWR_CAL + i, &val);
                sc->pwr_cal[i * 2] = val >> 8;
                sc->pwr_cal[i * 2 + 1] = val & 0xff;

                (void)zyd_read16(sc, ZYD_EEPROM_PWR_INT + i, &val);
                sc->pwr_int[i * 2] = val >> 8;
                sc->pwr_int[i * 2 + 1] = val & 0xff;

                (void)zyd_read16(sc, ZYD_EEPROM_36M_CAL + i, &val);
                sc->ofdm36_cal[i * 2] = val >> 8;
                sc->ofdm36_cal[i * 2 + 1] = val & 0xff;

                (void)zyd_read16(sc, ZYD_EEPROM_48M_CAL + i, &val);
                sc->ofdm48_cal[i * 2] = val >> 8;
                sc->ofdm48_cal[i * 2 + 1] = val & 0xff;

                (void)zyd_read16(sc, ZYD_EEPROM_54M_CAL + i, &val);
                sc->ofdm54_cal[i * 2] = val >> 8;
                sc->ofdm54_cal[i * 2 + 1] = val & 0xff;
        }
        return 0;
}

Static int
zyd_set_macaddr(struct zyd_softc *sc, const uint8_t *addr)
{
        uint32_t tmp;

        tmp = addr[3] << 24 | addr[2] << 16 | addr[1] << 8 | addr[0];
        (void)zyd_write32(sc, ZYD_MAC_MACADRL, tmp);

        tmp = addr[5] << 8 | addr[4];
        (void)zyd_write32(sc, ZYD_MAC_MACADRH, tmp);

        return 0;
}

Static int
zyd_set_bssid(struct zyd_softc *sc, const uint8_t *addr)
{
        uint32_t tmp;

        tmp = addr[3] << 24 | addr[2] << 16 | addr[1] << 8 | addr[0];
        (void)zyd_write32(sc, ZYD_MAC_BSSADRL, tmp);

        tmp = addr[5] << 8 | addr[4];
        (void)zyd_write32(sc, ZYD_MAC_BSSADRH, tmp);

        return 0;
}

Static int
zyd_switch_radio(struct zyd_softc *sc, int on)
{
        struct zyd_rf *rf = &sc->sc_rf;
        int error;

        zyd_lock_phy(sc);
        error = (*rf->switch_radio)(rf, on);
        zyd_unlock_phy(sc);

        return error;
}

Static void
zyd_set_led(struct zyd_softc *sc, int which, int on)
{
        uint32_t tmp;

        (void)zyd_read32(sc, ZYD_MAC_TX_PE_CONTROL, &tmp);
        tmp &= ~which;
        if (on)
                tmp |= which;
        (void)zyd_write32(sc, ZYD_MAC_TX_PE_CONTROL, tmp);
}

Static int
zyd_set_rxfilter(struct zyd_softc *sc)
{
        uint32_t rxfilter;

        switch (sc->sc_ic.ic_opmode) {
        case IEEE80211_M_STA:
                rxfilter = ZYD_FILTER_BSS;
                break;
        case IEEE80211_M_IBSS:
        case IEEE80211_M_HOSTAP:
                rxfilter = ZYD_FILTER_HOSTAP;
                break;
        case IEEE80211_M_MONITOR:
                rxfilter = ZYD_FILTER_MONITOR;
                break;
        default:
                /* should not get there */
                return EINVAL;
        }
        return zyd_write32(sc, ZYD_MAC_RXFILTER, rxfilter);
}

Static void
zyd_set_chan(struct zyd_softc *sc, struct ieee80211_channel *c)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct zyd_rf *rf = &sc->sc_rf;
        u_int chan;

        chan = ieee80211_chan2ieee(ic, c);
        if (chan == 0 || chan == IEEE80211_CHAN_ANY)
                return;

        zyd_lock_phy(sc);

        (*rf->set_channel)(rf, chan);

        /* update Tx power */
        (void)zyd_write32(sc, ZYD_CR31, sc->pwr_int[chan - 1]);
        (void)zyd_write32(sc, ZYD_CR68, sc->pwr_cal[chan - 1]);

        if (sc->mac_rev == ZYD_ZD1211B) {
                (void)zyd_write32(sc, ZYD_CR67, sc->ofdm36_cal[chan - 1]);
                (void)zyd_write32(sc, ZYD_CR66, sc->ofdm48_cal[chan - 1]);
                (void)zyd_write32(sc, ZYD_CR65, sc->ofdm54_cal[chan - 1]);

                (void)zyd_write32(sc, ZYD_CR69, 0x28);
                (void)zyd_write32(sc, ZYD_CR69, 0x2a);
        }

        zyd_unlock_phy(sc);
}

Static int
zyd_set_beacon_interval(struct zyd_softc *sc, int bintval)
{
        /* XXX this is probably broken.. */
        (void)zyd_write32(sc, ZYD_CR_ATIM_WND_PERIOD, bintval - 2);
        (void)zyd_write32(sc, ZYD_CR_PRE_TBTT,        bintval - 1);
        (void)zyd_write32(sc, ZYD_CR_BCN_INTERVAL,    bintval);

        return 0;
}

Static uint8_t
zyd_plcp_signal(int rate)
{
        switch (rate) {
        /* CCK rates (returned values are device-dependent) */
        case 2:                return 0x0;
        case 4:                return 0x1;
        case 11:        return 0x2;
        case 22:        return 0x3;

        /* OFDM rates (cf IEEE Std 802.11a-1999, pp. 14 Table 80) */
        case 12:        return 0xb;
        case 18:        return 0xf;
        case 24:        return 0xa;
        case 36:        return 0xe;
        case 48:        return 0x9;
        case 72:        return 0xd;
        case 96:        return 0x8;
        case 108:        return 0xc;

        /* unsupported rates (should not get there) */
        default:        return 0xff;
        }
}

Static void
zyd_intr(struct usbd_xfer *xfer, void * priv, usbd_status status)
{
        struct zyd_softc *sc = (struct zyd_softc *)priv;
        struct zyd_cmd *cmd;
        uint32_t datalen;

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;

                if (status == USBD_STALLED) {
                        usbd_clear_endpoint_stall_async(
                            sc->zyd_ep[ZYD_ENDPT_IIN]);
                }
                return;
        }

        cmd = (struct zyd_cmd *)sc->ibuf;

        if (le16toh(cmd->code) == ZYD_NOTIF_RETRYSTATUS) {
                struct zyd_notif_retry *retry =
                    (struct zyd_notif_retry *)cmd->data;
                struct ieee80211com *ic = &sc->sc_ic;
                struct ifnet *ifp = &sc->sc_if;
                struct ieee80211_node *ni;

                DPRINTF(("retry intr: rate=%#x addr=%s count=%d (%#x)\n",
                    le16toh(retry->rate), ether_sprintf(retry->macaddr),
                    le16toh(retry->count) & 0xff, le16toh(retry->count)));

                /*
                 * Find the node to which the packet was sent and update its
                 * retry statistics.  In BSS mode, this node is the AP we're
                 * associated to so no lookup is actually needed.
                 */
                if (ic->ic_opmode != IEEE80211_M_STA) {
                        ni = ieee80211_find_node(&ic->ic_scan, retry->macaddr);
                        if (ni == NULL)
                                return;        /* just ignore */
                } else
                        ni = ic->ic_bss;

                ((struct zyd_node *)ni)->amn.amn_retrycnt++;

                if (le16toh(retry->count) & 0x100)
                        if_statinc(ifp, if_oerrors);

        } else if (le16toh(cmd->code) == ZYD_NOTIF_IORD) {
                struct rq *rqp;

                if (le16toh(*(uint16_t *)cmd->data) == ZYD_CR_INTERRUPT)
                        return;        /* HMAC interrupt */

                usbd_get_xfer_status(xfer, NULL, NULL, &datalen, NULL);
                datalen -= sizeof(cmd->code);
                datalen -= 2;        /* XXX: padding? */

                mutex_enter(&sc->sc_lock);
                SIMPLEQ_FOREACH(rqp, &sc->sc_rqh, rq) {
                        int i;

                        if (sizeof(struct zyd_pair) * rqp->len != datalen)
                                continue;
                        for (i = 0; i < rqp->len; i++) {
                                if (*(((const uint16_t *)rqp->idata) + i) !=
                                    (((struct zyd_pair *)cmd->data) + i)->reg)
                                        break;
                        }
                        if (i != rqp->len)
                                continue;

                        /* copy answer into caller-supplied buffer */
                        memcpy(rqp->odata, cmd->data,
                            sizeof(struct zyd_pair) * rqp->len);
                        cv_signal(&sc->sc_cmdcv);
                        mutex_exit(&sc->sc_lock);
                        return;
                }
                mutex_exit(&sc->sc_lock);
                return;        /* unexpected IORD notification */
        } else {
                printf("%s: unknown notification %x\n", device_xname(sc->sc_dev),
                    le16toh(cmd->code));
        }
}

Static void
zyd_rx_data(struct zyd_softc *sc, const uint8_t *buf, uint16_t len)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        struct ieee80211_node *ni;
        struct ieee80211_frame *wh;
        const struct zyd_plcphdr *plcp;
        const struct zyd_rx_stat *stat;
        struct mbuf *m;
        int rlen, s;

        if (len < ZYD_MIN_FRAGSZ) {
                printf("%s: frame too short (length=%d)\n",
                    device_xname(sc->sc_dev), len);
                if_statinc(ifp, if_ierrors);
                return;
        }

        plcp = (const struct zyd_plcphdr *)buf;
        stat = (const struct zyd_rx_stat *)
            (buf + len - sizeof(struct zyd_rx_stat));

        if (stat->flags & ZYD_RX_ERROR) {
                DPRINTF(("%s: RX status indicated error (%x)\n",
                    device_xname(sc->sc_dev), stat->flags));
                if_statinc(ifp, if_ierrors);
                return;
        }

        /* compute actual frame length */
        rlen = len - sizeof(struct zyd_plcphdr) -
            sizeof(struct zyd_rx_stat) - IEEE80211_CRC_LEN;

        /* allocate a mbuf to store the frame */
        MGETHDR(m, M_DONTWAIT, MT_DATA);
        if (m == NULL) {
                printf("%s: could not allocate rx mbuf\n",
                    device_xname(sc->sc_dev));
                if_statinc(ifp, if_ierrors);
                return;
        }
        if (rlen > MHLEN) {
                MCLGET(m, M_DONTWAIT);
                if (!(m->m_flags & M_EXT)) {
                        printf("%s: could not allocate rx mbuf cluster\n",
                            device_xname(sc->sc_dev));
                        m_freem(m);
                        if_statinc(ifp, if_ierrors);
                        return;
                }
        }
        m_set_rcvif(m, ifp);
        m->m_pkthdr.len = m->m_len = rlen;
        memcpy(mtod(m, uint8_t *), (const uint8_t *)(plcp + 1), rlen);

        s = splnet();

        if (sc->sc_drvbpf != NULL) {
                struct zyd_rx_radiotap_header *tap = &sc->sc_rxtap;
                static const uint8_t rates[] = {
                        /* reverse function of zyd_plcp_signal() */
                        2, 4, 11, 22, 0, 0, 0, 0,
                        96, 48, 24, 12, 108, 72, 36, 18
                };

                tap->wr_flags = IEEE80211_RADIOTAP_F_FCS;
                tap->wr_chan_freq = htole16(ic->ic_curchan->ic_freq);
                tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags);
                tap->wr_rssi = stat->rssi;
                tap->wr_rate = rates[plcp->signal & 0xf];

                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_rxtap_len, m, BPF_D_IN);
        }

        wh = mtod(m, struct ieee80211_frame *);
        ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh);
        ieee80211_input(ic, m, ni, stat->rssi, 0);

        /* node is no longer needed */
        ieee80211_free_node(ni);

        splx(s);
}

Static void
zyd_rxeof(struct usbd_xfer *xfer, void * priv, usbd_status status)
{
        struct zyd_rx_data *data = priv;
        struct zyd_softc *sc = data->sc;
        struct ifnet *ifp = &sc->sc_if;
        const struct zyd_rx_desc *desc;
        int len;

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;

                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall(sc->zyd_ep[ZYD_ENDPT_BIN]);

                goto skip;
        }
        usbd_get_xfer_status(xfer, NULL, NULL, &len, NULL);

        if (len < ZYD_MIN_RXBUFSZ) {
                printf("%s: xfer too short (length=%d)\n",
                    device_xname(sc->sc_dev), len);
                if_statinc(ifp, if_ierrors);
                goto skip;
        }

        desc = (const struct zyd_rx_desc *)
            (data->buf + len - sizeof(struct zyd_rx_desc));

        if (UGETW(desc->tag) == ZYD_TAG_MULTIFRAME) {
                const uint8_t *p = data->buf, *end = p + len;
                int i;

                DPRINTFN(3, ("received multi-frame transfer\n"));

                for (i = 0; i < ZYD_MAX_RXFRAMECNT; i++) {
                        const uint16_t len16 = UGETW(desc->len[i]);

                        if (len16 == 0 || p + len16 > end)
                                break;

                        zyd_rx_data(sc, p, len16);
                        /* next frame is aligned on a 32-bit boundary */
                        p += (len16 + 3) & ~3;
                }
        } else {
                DPRINTFN(3, ("received single-frame transfer\n"));

                zyd_rx_data(sc, data->buf, len);
        }

skip:        /* setup a new transfer */

        usbd_setup_xfer(xfer, data, NULL, ZYX_MAX_RXBUFSZ, USBD_SHORT_XFER_OK,
            USBD_NO_TIMEOUT, zyd_rxeof);
        (void)usbd_transfer(xfer);
}

Static int
zyd_tx_mgt(struct zyd_softc *sc, struct mbuf *m0, struct ieee80211_node *ni)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        struct zyd_tx_desc *desc;
        struct zyd_tx_data *data;
        struct ieee80211_frame *wh;
        struct ieee80211_key *k;
        int xferlen, totlen, rate;
        uint16_t pktlen;
        usbd_status error;

        data = &sc->tx_data[0];
        desc = (struct zyd_tx_desc *)data->buf;

        rate = IEEE80211_IS_CHAN_5GHZ(ic->ic_curchan) ? 12 : 2;

        wh = mtod(m0, struct ieee80211_frame *);

        if (wh->i_fc[1] & IEEE80211_FC1_WEP) {
                k = ieee80211_crypto_encap(ic, ni, m0);
                if (k == NULL) {
                        m_freem(m0);
                        return ENOBUFS;
                }
        }

        data->ni = ni;

        wh = mtod(m0, struct ieee80211_frame *);

        xferlen = sizeof(struct zyd_tx_desc) + m0->m_pkthdr.len;
        totlen = m0->m_pkthdr.len + IEEE80211_CRC_LEN;

        /* fill Tx descriptor */
        desc->len = htole16(totlen);

        desc->flags = ZYD_TX_FLAG_BACKOFF;
        if (!IEEE80211_IS_MULTICAST(wh->i_addr1)) {
                /* multicast frames are not sent at OFDM rates in 802.11b/g */
                if (totlen > ic->ic_rtsthreshold) {
                        desc->flags |= ZYD_TX_FLAG_RTS;
                } else if (ZYD_RATE_IS_OFDM(rate) &&
                    (ic->ic_flags & IEEE80211_F_USEPROT)) {
                        if (ic->ic_protmode == IEEE80211_PROT_CTSONLY)
                                desc->flags |= ZYD_TX_FLAG_CTS_TO_SELF;
                        else if (ic->ic_protmode == IEEE80211_PROT_RTSCTS)
                                desc->flags |= ZYD_TX_FLAG_RTS;
                }
        } else
                desc->flags |= ZYD_TX_FLAG_MULTICAST;

        if ((wh->i_fc[0] &
            (IEEE80211_FC0_TYPE_MASK | IEEE80211_FC0_SUBTYPE_MASK)) ==
            (IEEE80211_FC0_TYPE_CTL | IEEE80211_FC0_SUBTYPE_PS_POLL))
                desc->flags |= ZYD_TX_FLAG_TYPE(ZYD_TX_TYPE_PS_POLL);

        desc->phy = zyd_plcp_signal(rate);
        if (ZYD_RATE_IS_OFDM(rate)) {
                desc->phy |= ZYD_TX_PHY_OFDM;
                if (ic->ic_curmode == IEEE80211_MODE_11A)
                        desc->phy |= ZYD_TX_PHY_5GHZ;
        } else if (rate != 2 && (ic->ic_flags & IEEE80211_F_SHPREAMBLE))
                desc->phy |= ZYD_TX_PHY_SHPREAMBLE;

        /* actual transmit length (XXX why +10?) */
        pktlen = sizeof(struct zyd_tx_desc) + 10;
        if (sc->mac_rev == ZYD_ZD1211)
                pktlen += totlen;
        desc->pktlen = htole16(pktlen);

        desc->plcp_length = (16 * totlen + rate - 1) / rate;
        desc->plcp_service = 0;
        if (rate == 22) {
                const int remainder = (16 * totlen) % 22;
                if (remainder != 0 && remainder < 7)
                        desc->plcp_service |= ZYD_PLCP_LENGEXT;
        }

        if (sc->sc_drvbpf != NULL) {
                struct zyd_tx_radiotap_header *tap = &sc->sc_txtap;

                tap->wt_flags = 0;
                tap->wt_rate = rate;
                tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq);
                tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags);

                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_txtap_len, m0, BPF_D_OUT);
        }

        m_copydata(m0, 0, m0->m_pkthdr.len,
            data->buf + sizeof(struct zyd_tx_desc));

        DPRINTFN(10, ("%s: sending mgt frame len=%zu rate=%u xferlen=%u\n",
            device_xname(sc->sc_dev), (size_t)m0->m_pkthdr.len, rate, xferlen));

        m_freem(m0);        /* mbuf no longer needed */

        usbd_setup_xfer(data->xfer, data, data->buf, xferlen,
            USBD_FORCE_SHORT_XFER, ZYD_TX_TIMEOUT, zyd_txeof);
        error = usbd_transfer(data->xfer);
        if (error != USBD_IN_PROGRESS && error != 0) {
                if_statinc(ifp, if_oerrors);
                return EIO;
        }
        sc->tx_queued++;

        return 0;
}

Static void
zyd_txeof(struct usbd_xfer *xfer, void * priv, usbd_status status)
{
        struct zyd_tx_data *data = priv;
        struct zyd_softc *sc = data->sc;
        struct ifnet *ifp = &sc->sc_if;
        int s;

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;

                printf("%s: could not transmit buffer: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(status));

                if (status == USBD_STALLED) {
                        usbd_clear_endpoint_stall_async(
                            sc->zyd_ep[ZYD_ENDPT_BOUT]);
                }
                if_statinc(ifp, if_oerrors);
                return;
        }

        s = splnet();

        /* update rate control statistics */
        ((struct zyd_node *)data->ni)->amn.amn_txcnt++;

        ieee80211_free_node(data->ni);
        data->ni = NULL;

        sc->tx_queued--;
        if_statinc(ifp, if_opackets);

        sc->tx_timer = 0;
        ifp->if_flags &= ~IFF_OACTIVE;
        zyd_start(ifp);

        splx(s);
}

Static int
zyd_tx_data(struct zyd_softc *sc, struct mbuf *m0, struct ieee80211_node *ni)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        struct zyd_tx_desc *desc;
        struct zyd_tx_data *data;
        struct ieee80211_frame *wh;
        struct ieee80211_key *k;
        int xferlen, totlen, rate;
        uint16_t pktlen;
        usbd_status error;

        wh = mtod(m0, struct ieee80211_frame *);

        if (ic->ic_fixed_rate != IEEE80211_FIXED_RATE_NONE)
                rate = ic->ic_bss->ni_rates.rs_rates[ic->ic_fixed_rate];
        else
                rate = ni->ni_rates.rs_rates[ni->ni_txrate];
        rate &= IEEE80211_RATE_VAL;

        if (wh->i_fc[1] & IEEE80211_FC1_WEP) {
                k = ieee80211_crypto_encap(ic, ni, m0);
                if (k == NULL) {
                        m_freem(m0);
                        return ENOBUFS;
                }

                /* packet header may have moved, reset our local pointer */
                wh = mtod(m0, struct ieee80211_frame *);
        }

        data = &sc->tx_data[0];
        desc = (struct zyd_tx_desc *)data->buf;

        data->ni = ni;

        xferlen = sizeof(struct zyd_tx_desc) + m0->m_pkthdr.len;
        totlen = m0->m_pkthdr.len + IEEE80211_CRC_LEN;

        /* fill Tx descriptor */
        desc->len = htole16(totlen);

        desc->flags = ZYD_TX_FLAG_BACKOFF;
        if (!IEEE80211_IS_MULTICAST(wh->i_addr1)) {
                /* multicast frames are not sent at OFDM rates in 802.11b/g */
                if (totlen > ic->ic_rtsthreshold) {
                        desc->flags |= ZYD_TX_FLAG_RTS;
                } else if (ZYD_RATE_IS_OFDM(rate) &&
                    (ic->ic_flags & IEEE80211_F_USEPROT)) {
                        if (ic->ic_protmode == IEEE80211_PROT_CTSONLY)
                                desc->flags |= ZYD_TX_FLAG_CTS_TO_SELF;
                        else if (ic->ic_protmode == IEEE80211_PROT_RTSCTS)
                                desc->flags |= ZYD_TX_FLAG_RTS;
                }
        } else
                desc->flags |= ZYD_TX_FLAG_MULTICAST;

        if ((wh->i_fc[0] &
            (IEEE80211_FC0_TYPE_MASK | IEEE80211_FC0_SUBTYPE_MASK)) ==
            (IEEE80211_FC0_TYPE_CTL | IEEE80211_FC0_SUBTYPE_PS_POLL))
                desc->flags |= ZYD_TX_FLAG_TYPE(ZYD_TX_TYPE_PS_POLL);

        desc->phy = zyd_plcp_signal(rate);
        if (ZYD_RATE_IS_OFDM(rate)) {
                desc->phy |= ZYD_TX_PHY_OFDM;
                if (ic->ic_curmode == IEEE80211_MODE_11A)
                        desc->phy |= ZYD_TX_PHY_5GHZ;
        } else if (rate != 2 && (ic->ic_flags & IEEE80211_F_SHPREAMBLE))
                desc->phy |= ZYD_TX_PHY_SHPREAMBLE;

        /* actual transmit length (XXX why +10?) */
        pktlen = sizeof(struct zyd_tx_desc) + 10;
        if (sc->mac_rev == ZYD_ZD1211)
                pktlen += totlen;
        desc->pktlen = htole16(pktlen);

        desc->plcp_length = (16 * totlen + rate - 1) / rate;
        desc->plcp_service = 0;
        if (rate == 22) {
                const int remainder = (16 * totlen) % 22;
                if (remainder != 0 && remainder < 7)
                        desc->plcp_service |= ZYD_PLCP_LENGEXT;
        }

        if (sc->sc_drvbpf != NULL) {
                struct zyd_tx_radiotap_header *tap = &sc->sc_txtap;

                tap->wt_flags = 0;
                tap->wt_rate = rate;
                tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq);
                tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags);

                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_txtap_len, m0, BPF_D_OUT);
        }

        m_copydata(m0, 0, m0->m_pkthdr.len,
            data->buf + sizeof(struct zyd_tx_desc));

        DPRINTFN(10, ("%s: sending data frame len=%zu rate=%u xferlen=%u\n",
            device_xname(sc->sc_dev), (size_t)m0->m_pkthdr.len, rate, xferlen));

        m_freem(m0);        /* mbuf no longer needed */

        usbd_setup_xfer(data->xfer, data, data->buf, xferlen,
            USBD_FORCE_SHORT_XFER, ZYD_TX_TIMEOUT, zyd_txeof);
        error = usbd_transfer(data->xfer);
        if (error != USBD_IN_PROGRESS && error != 0) {
                if_statinc(ifp, if_oerrors);
                return EIO;
        }
        sc->tx_queued++;

        return 0;
}

Static void
zyd_start(struct ifnet *ifp)
{
        struct zyd_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ether_header *eh;
        struct ieee80211_node *ni;
        struct mbuf *m0;

        for (;;) {
                IF_POLL(&ic->ic_mgtq, m0);
                if (m0 != NULL) {
                        if (sc->tx_queued >= ZYD_TX_LIST_CNT) {
                                ifp->if_flags |= IFF_OACTIVE;
                                break;
                        }
                        IF_DEQUEUE(&ic->ic_mgtq, m0);

                        ni = M_GETCTX(m0, struct ieee80211_node *);
                        M_CLEARCTX(m0);
                        bpf_mtap3(ic->ic_rawbpf, m0, BPF_D_OUT);
                        if (zyd_tx_mgt(sc, m0, ni) != 0)
                                break;
                } else {
                        if (ic->ic_state != IEEE80211_S_RUN)
                                break;
                        IFQ_POLL(&ifp->if_snd, m0);
                        if (m0 == NULL)
                                break;
                        if (sc->tx_queued >= ZYD_TX_LIST_CNT) {
                                ifp->if_flags |= IFF_OACTIVE;
                                break;
                        }
                        IFQ_DEQUEUE(&ifp->if_snd, m0);

                        if (m0->m_len < sizeof(struct ether_header) &&
                            !(m0 = m_pullup(m0, sizeof(struct ether_header))))
                                continue;

                        eh = mtod(m0, struct ether_header *);
                        ni = ieee80211_find_txnode(ic, eh->ether_dhost);
                        if (ni == NULL) {
                                m_freem(m0);
                                continue;
                        }
                        bpf_mtap(ifp, m0, BPF_D_OUT);
                        if ((m0 = ieee80211_encap(ic, m0, ni)) == NULL) {
                                ieee80211_free_node(ni);
                                if_statinc(ifp, if_oerrors);
                                continue;
                        }
                        bpf_mtap3(ic->ic_rawbpf, m0, BPF_D_OUT);
                        if (zyd_tx_data(sc, m0, ni) != 0) {
                                ieee80211_free_node(ni);
                                if_statinc(ifp, if_oerrors);
                                break;
                        }
                }

                sc->tx_timer = 5;
                ifp->if_timer = 1;
        }
}

Static void
zyd_watchdog(struct ifnet *ifp)
{
        struct zyd_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;

        ifp->if_timer = 0;

        if (sc->tx_timer > 0) {
                if (--sc->tx_timer == 0) {
                        printf("%s: device timeout\n", device_xname(sc->sc_dev));
                        /* zyd_init(ifp); XXX needs a process context ? */
                        if_statinc(ifp, if_oerrors);
                        return;
                }
                ifp->if_timer = 1;
        }

        ieee80211_watchdog(ic);
}

Static int
zyd_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
        struct zyd_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        int s, error = 0;

        s = splnet();

        switch (cmd) {
        case SIOCSIFFLAGS:
                if ((error = ifioctl_common(ifp, cmd, data)) != 0)
                        break;
                /* XXX re-use ether_ioctl() */
                switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) {
                case IFF_UP:
                        zyd_init(ifp);
                        break;
                case IFF_RUNNING:
                        zyd_stop(ifp, 1);
                        break;
                default:
                        break;
                }
                break;

        default:
                error = ieee80211_ioctl(ic, cmd, data);
        }

        if (error == ENETRESET) {
                if ((ifp->if_flags & (IFF_RUNNING | IFF_UP)) ==
                    (IFF_RUNNING | IFF_UP))
                        zyd_init(ifp);
                error = 0;
        }

        splx(s);

        return error;
}

Static int
zyd_init(struct ifnet *ifp)
{
        struct zyd_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        int i, error;

        zyd_stop(ifp, 0);

        IEEE80211_ADDR_COPY(ic->ic_myaddr, CLLADDR(ifp->if_sadl));
        DPRINTF(("setting MAC address to %s\n", ether_sprintf(ic->ic_myaddr)));
        error = zyd_set_macaddr(sc, ic->ic_myaddr);
        if (error != 0)
                return error;

        /* we'll do software WEP decryption for now */
        DPRINTF(("setting encryption type\n"));
        error = zyd_write32(sc, ZYD_MAC_ENCRYPTION_TYPE, ZYD_ENC_SNIFFER);
        if (error != 0)
                return error;

        /* promiscuous mode */
        (void)zyd_write32(sc, ZYD_MAC_SNIFFER,
            (ic->ic_opmode == IEEE80211_M_MONITOR) ? 1 : 0);

        (void)zyd_set_rxfilter(sc);

        /* switch radio transmitter ON */
        (void)zyd_switch_radio(sc, 1);

        /* set basic rates */
        if (ic->ic_curmode == IEEE80211_MODE_11B)
                (void)zyd_write32(sc, ZYD_MAC_BAS_RATE, 0x0003);
        else if (ic->ic_curmode == IEEE80211_MODE_11A)
                (void)zyd_write32(sc, ZYD_MAC_BAS_RATE, 0x1500);
        else        /* assumes 802.11b/g */
                (void)zyd_write32(sc, ZYD_MAC_BAS_RATE, 0x000f);

        /* set mandatory rates */
        if (ic->ic_curmode == IEEE80211_MODE_11B)
                (void)zyd_write32(sc, ZYD_MAC_MAN_RATE, 0x000f);
        else if (ic->ic_curmode == IEEE80211_MODE_11A)
                (void)zyd_write32(sc, ZYD_MAC_MAN_RATE, 0x1500);
        else        /* assumes 802.11b/g */
                (void)zyd_write32(sc, ZYD_MAC_MAN_RATE, 0x150f);

        /* set default BSS channel */
        ic->ic_bss->ni_chan = ic->ic_ibss_chan;
        zyd_set_chan(sc, ic->ic_bss->ni_chan);

        /* enable interrupts */
        (void)zyd_write32(sc, ZYD_CR_INTERRUPT, ZYD_HWINT_MASK);

        /*
         * Allocate Tx and Rx xfer queues.
         */
        if ((error = zyd_alloc_tx_list(sc)) != 0) {
                printf("%s: could not allocate Tx list\n",
                    device_xname(sc->sc_dev));
                goto fail;
        }
        if ((error = zyd_alloc_rx_list(sc)) != 0) {
                printf("%s: could not allocate Rx list\n",
                    device_xname(sc->sc_dev));
                goto fail;
        }

        /*
         * Start up the receive pipe.
         */
        for (i = 0; i < ZYD_RX_LIST_CNT; i++) {
                struct zyd_rx_data *data = &sc->rx_data[i];

                usbd_setup_xfer(data->xfer, data, NULL, ZYX_MAX_RXBUFSZ,
                    USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, zyd_rxeof);
                error = usbd_transfer(data->xfer);
                if (error != USBD_IN_PROGRESS && error != 0) {
                        printf("%s: could not queue Rx transfer\n",
                            device_xname(sc->sc_dev));
                        goto fail;
                }
        }

        ifp->if_flags &= ~IFF_OACTIVE;
        ifp->if_flags |= IFF_RUNNING;

        if (ic->ic_opmode == IEEE80211_M_MONITOR)
                ieee80211_new_state(ic, IEEE80211_S_RUN, -1);
        else
                ieee80211_new_state(ic, IEEE80211_S_SCAN, -1);

        return 0;

fail:        zyd_stop(ifp, 1);
        return error;
}

Static void
zyd_stop(struct ifnet *ifp, int disable)
{
        struct zyd_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;

        ieee80211_new_state(ic, IEEE80211_S_INIT, -1);        /* free all nodes */

        sc->tx_timer = 0;
        ifp->if_timer = 0;
        ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);

        /* switch radio transmitter OFF */
        (void)zyd_switch_radio(sc, 0);

        /* disable Rx */
        (void)zyd_write32(sc, ZYD_MAC_RXFILTER, 0);

        /* disable interrupts */
        (void)zyd_write32(sc, ZYD_CR_INTERRUPT, 0);

        usbd_abort_pipe(sc->zyd_ep[ZYD_ENDPT_BIN]);
        usbd_abort_pipe(sc->zyd_ep[ZYD_ENDPT_BOUT]);

        zyd_free_rx_list(sc);
        zyd_free_tx_list(sc);
}

Static int
zyd_loadfirmware(struct zyd_softc *sc, u_char *fw, size_t size)
{
        usb_device_request_t req;
        uint16_t addr;
        uint8_t stat;

        DPRINTF(("firmware size=%zu\n", size));

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = ZYD_DOWNLOADREQ;
        USETW(req.wIndex, 0);

        addr = ZYD_FIRMWARE_START_ADDR;
        while (size > 0) {
#if 0
                const int mlen = uimin(size, 4096);
#else
                /*
                 * XXXX: When the transfer size is 4096 bytes, it is not
                 * likely to be able to transfer it.
                 * The cause is port or machine or chip?
                 */
                const int mlen = uimin(size, 64);
#endif

                DPRINTF(("loading firmware block: len=%d, addr=%#x\n", mlen,
                    addr));

                USETW(req.wValue, addr);
                USETW(req.wLength, mlen);
                if (usbd_do_request(sc->sc_udev, &req, fw) != 0)
                        return EIO;

                addr += mlen / 2;
                fw   += mlen;
                size -= mlen;
        }

        /* check whether the upload succeeded */
        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = ZYD_DOWNLOADSTS;
        USETW(req.wValue, 0);
        USETW(req.wIndex, 0);
        USETW(req.wLength, sizeof(stat));
        if (usbd_do_request(sc->sc_udev, &req, &stat) != 0)
                return EIO;

        return (stat & 0x80) ? EIO : 0;
}

Static void
zyd_iter_func(void *arg, struct ieee80211_node *ni)
{
        struct zyd_softc *sc = arg;
        struct zyd_node *zn = (struct zyd_node *)ni;

        ieee80211_amrr_choose(&sc->amrr, ni, &zn->amn);
}

Static void
zyd_amrr_timeout(void *arg)
{
        struct zyd_softc *sc = arg;
        struct ieee80211com *ic = &sc->sc_ic;
        int s;

        s = splnet();
        if (ic->ic_opmode == IEEE80211_M_STA)
                zyd_iter_func(sc, ic->ic_bss);
        else
                ieee80211_iterate_nodes(&ic->ic_sta, zyd_iter_func, sc);
        splx(s);

        callout_reset(&sc->sc_amrr_ch, hz, zyd_amrr_timeout, sc);
}

Static void
zyd_newassoc(struct ieee80211_node *ni, int isnew)
{
        struct zyd_softc *sc = ni->ni_ic->ic_ifp->if_softc;
        int i;

        ieee80211_amrr_node_init(&sc->amrr, &((struct zyd_node *)ni)->amn);

        /* set rate to some reasonable initial value */
        for (i = ni->ni_rates.rs_nrates - 1;
             i > 0 && (ni->ni_rates.rs_rates[i] & IEEE80211_RATE_VAL) > 72;
             i--);
        ni->ni_txrate = i;
}

static int
zyd_activate(device_t self, enum devact act)
{
        struct zyd_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                if_deactivate(&sc->sc_if);
                return 0;
        default:
                return EOPNOTSUPP;
        }
}










































































    2 


































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
/*        $NetBSD: rf_copyback.c,v 1.55 2021/07/27 03:01:48 oster Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Mark Holland
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*****************************************************************************
 *
 * copyback.c -- code to copy reconstructed data back from spare space to
 *               the replaced disk.
 *
 * the code operates using callbacks on the I/Os to continue with the
 * next unit to be copied back.  We do this because a simple loop
 * containing blocking I/Os will not work in the simulator.
 *
 ****************************************************************************/

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_copyback.c,v 1.55 2021/07/27 03:01:48 oster Exp $");

#include <dev/raidframe/raidframevar.h>

#include <sys/time.h>
#include <sys/buf.h>
#include "rf_raid.h"
#include "rf_mcpair.h"
#include "rf_acctrace.h"
#include "rf_etimer.h"
#include "rf_general.h"
#include "rf_utils.h"
#include "rf_copyback.h"
#include "rf_decluster.h"
#include "rf_driver.h"
#include "rf_shutdown.h"
#include "rf_kintf.h"

#define RF_COPYBACK_DATA   0
#define RF_COPYBACK_PARITY 1

int     rf_copyback_in_progress;

static void rf_CopybackReadDoneProc(void *, int);
static void rf_CopybackWriteDoneProc(void *, int);
static void rf_CopybackOne(RF_CopybackDesc_t * desc, int typ,
                           RF_RaidAddr_t addr, RF_RowCol_t testCol,
                           RF_SectorNum_t testOffs);
static void rf_CopybackComplete(RF_CopybackDesc_t * desc, int status);

int
rf_ConfigureCopyback(RF_ShutdownList_t **listp)
{
        rf_copyback_in_progress = 0;
        return (0);
}

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/namei.h> /* for pathbuf */

#include <miscfs/specfs/specdev.h> /* for v_rdev */

/* do a complete copyback */
void
rf_CopybackReconstructedData(RF_Raid_t *raidPtr)
{
        RF_ComponentLabel_t *c_label;
        int     found, retcode;
        RF_CopybackDesc_t *desc;
        RF_RowCol_t fcol;
        RF_RaidDisk_t *badDisk;
        char   *databuf;

        struct pathbuf *dev_pb;
        struct vnode *vp;

        int ac;

        fcol = 0;
        found = 0;
        for (fcol = 0; fcol < raidPtr->numCol; fcol++) {
                if (raidPtr->Disks[fcol].status == rf_ds_dist_spared
                    || raidPtr->Disks[fcol].status == rf_ds_spared) {
                        found = 1;
                        break;
                }
        }

        if (!found) {
                printf("raid%d: no disks need copyback\n", raidPtr->raidid);
                return;
        }

        badDisk = &raidPtr->Disks[fcol];

        /* This device may have been opened successfully the first time. Close
         * it before trying to open it again.. */

        if (raidPtr->raid_cinfo[fcol].ci_vp != NULL) {
                printf("Closed the open device: %s\n",
                    raidPtr->Disks[fcol].devname);
                vp = raidPtr->raid_cinfo[fcol].ci_vp;
                ac = raidPtr->Disks[fcol].auto_configured;
                rf_close_component(raidPtr, vp, ac);
                raidPtr->raid_cinfo[fcol].ci_vp = NULL;

        }
        /* note that this disk was *not* auto_configured (any longer) */
        raidPtr->Disks[fcol].auto_configured = 0;

        printf("About to (re-)open the device: %s\n",
            raidPtr->Disks[fcol].devname);

        dev_pb = pathbuf_create(raidPtr->Disks[fcol].devname);
        if (dev_pb == NULL) {
                /* shouldn't happen unless maybe the system is OOMing */
                printf("raid%d: copyback: pathbuf_create on device: %s failed: %d!\n",
                       raidPtr->raidid, raidPtr->Disks[fcol].devname,
                       ENOMEM);
                return;
        }
        retcode = vn_bdev_openpath(dev_pb, &vp, curlwp);
        pathbuf_destroy(dev_pb);

        if (retcode) {
                printf("raid%d: copyback: open device: %s failed: %d!\n",
                       raidPtr->raidid, raidPtr->Disks[fcol].devname,
                       retcode);

                /* XXX the component isn't responding properly... must be
                 * still dead :-( */
                return;

        } else {

                /* Ok, so we can at least do a lookup... How about actually
                 * getting a vp for it? */

                retcode = rf_getdisksize(vp, &raidPtr->Disks[fcol]);
                if (retcode) {
                        return;
                }

                raidPtr->raid_cinfo[fcol].ci_vp = vp;
                raidPtr->raid_cinfo[fcol].ci_dev = vp->v_rdev;

                raidPtr->Disks[fcol].dev = vp->v_rdev;        /* XXX or the above? */

                /* we allow the user to specify that only a fraction of the
                 * disks should be used this is just for debug:  it speeds up
                 * the parity scan */
                raidPtr->Disks[fcol].numBlocks =
                    raidPtr->Disks[fcol].numBlocks *
                    rf_sizePercentage / 100;
        }

        if (retcode) {
                printf("raid%d: copyback: target disk failed TUR\n",
                       raidPtr->raidid);
                return;
        }
        /* get a buffer to hold one SU  */
        databuf = RF_Malloc(rf_RaidAddressToByte(raidPtr,
            raidPtr->Layout.sectorsPerStripeUnit));

        /* create a descriptor */
        desc = RF_Malloc(sizeof(*desc));
        desc->raidPtr = raidPtr;
        desc->status = 0;
        desc->fcol = fcol;
        desc->spCol = badDisk->spareCol;
        desc->stripeAddr = 0;
        desc->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
        desc->sectPerStripe = raidPtr->Layout.sectorsPerStripeUnit * raidPtr->Layout.numDataCol;
        desc->databuf = databuf;
        desc->mcpair = rf_AllocMCPair(raidPtr);

        /* quiesce the array, since we don't want to code support for user
         * accs here */
        rf_SuspendNewRequestsAndWait(raidPtr);

        /* adjust state of the array and of the disks */
        rf_lock_mutex2(raidPtr->mutex);
        raidPtr->Disks[desc->fcol].status = rf_ds_optimal;
        raidPtr->status = rf_rs_optimal;
        rf_copyback_in_progress = 1;        /* debug only */
        rf_unlock_mutex2(raidPtr->mutex);

        RF_GETTIME(desc->starttime);
        rf_ContinueCopyback(desc);

        /* Data has been restored.  Fix up the component label. */
        /* Don't actually need the read here.. */
        
        c_label = raidget_component_label(raidPtr, fcol);
        raid_init_component_label(raidPtr, c_label);

        c_label->row = 0;
        c_label->column = fcol;
        rf_component_label_set_partitionsize(c_label,
            raidPtr->Disks[fcol].partitionSize);

        raidflush_component_label(raidPtr, fcol);

        /* XXXjld why is this here? */
        rf_update_component_labels(raidPtr, RF_NORMAL_COMPONENT_UPDATE);
}


/*
 * invoked via callback after a copyback I/O has completed to
 * continue on with the next one
 */
void
rf_ContinueCopyback(RF_CopybackDesc_t *desc)
{
        RF_SectorNum_t testOffs, stripeAddr;
        RF_Raid_t *raidPtr = desc->raidPtr;
        RF_RaidAddr_t addr;
        RF_RowCol_t testCol;
#if RF_DEBUG_RECON
        int     old_pctg, new_pctg;
        struct timeval t, diff;
#endif
        int done;

#if RF_DEBUG_RECON
        old_pctg = (-1);
#endif
        while (1) {
                stripeAddr = desc->stripeAddr;
                desc->raidPtr->copyback_stripes_done = stripeAddr
                        / desc->sectPerStripe;
#if RF_DEBUG_RECON
                if (rf_prReconSched) {
                        old_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors;
                }
#endif
                desc->stripeAddr += desc->sectPerStripe;
#if RF_DEBUG_RECON
                if (rf_prReconSched) {
                        new_pctg = 100 * desc->stripeAddr / raidPtr->totalSectors;
                        if (new_pctg != old_pctg) {
                                RF_GETTIME(t);
                                RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
                                printf("%d %d.%06d\n", new_pctg, (int) diff.tv_sec, (int) diff.tv_usec);
                        }
                }
#endif
                if (stripeAddr >= raidPtr->totalSectors) {
                        rf_CopybackComplete(desc, 0);
                        return;
                }
                /* walk through the current stripe, su-by-su */
                for (done = 0, addr = stripeAddr; addr < stripeAddr + desc->sectPerStripe; addr += desc->sectPerSU) {

                        /* map the SU, disallowing remap to spare space */
                        (raidPtr->Layout.map->MapSector) (raidPtr, addr, &testCol, &testOffs, RF_DONT_REMAP);

                        if (testCol == desc->fcol) {
                                rf_CopybackOne(desc, RF_COPYBACK_DATA, addr, testCol, testOffs);
                                done = 1;
                                break;
                        }
                }

                if (!done) {
                        /* we didn't find the failed disk in the data part.
                         * check parity. */

                        /* map the parity for this stripe, disallowing remap
                         * to spare space */
                        (raidPtr->Layout.map->MapParity) (raidPtr, stripeAddr, &testCol, &testOffs, RF_DONT_REMAP);

                        if (testCol == desc->fcol) {
                                rf_CopybackOne(desc, RF_COPYBACK_PARITY, stripeAddr, testCol, testOffs);
                        }
                }
                /* check to see if the last read/write pair failed */
                if (desc->status) {
                        rf_CopybackComplete(desc, 1);
                        return;
                }
                /* we didn't find any units to copy back in this stripe.
                 * Continue with the next one */
        }
}


/* copyback one unit */
static void
rf_CopybackOne(RF_CopybackDesc_t *desc, int typ, RF_RaidAddr_t addr,
               RF_RowCol_t testCol, RF_SectorNum_t testOffs)
{
        RF_SectorCount_t sectPerSU = desc->sectPerSU;
        RF_Raid_t *raidPtr = desc->raidPtr;
        RF_RowCol_t spCol = desc->spCol;
        RF_SectorNum_t spOffs;

        /* find the spare spare location for this SU */
        if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
                if (typ == RF_COPYBACK_DATA)
                        raidPtr->Layout.map->MapSector(raidPtr, addr, &spCol, &spOffs, RF_REMAP);
                else
                        raidPtr->Layout.map->MapParity(raidPtr, addr, &spCol, &spOffs, RF_REMAP);
        } else {
                spOffs = testOffs;
        }

        /* create reqs to read the old location & write the new */
        desc->readreq = rf_CreateDiskQueueData(RF_IO_TYPE_READ, spOffs,
            sectPerSU, desc->databuf, 0L, 0, rf_CopybackReadDoneProc, desc,
            NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
        desc->writereq = rf_CreateDiskQueueData(RF_IO_TYPE_WRITE, testOffs,
            sectPerSU, desc->databuf, 0L, 0, rf_CopybackWriteDoneProc, desc,
            NULL, (void *) raidPtr, RF_DISKQUEUE_DATA_FLAGS_NONE, NULL);
        desc->fcol = testCol;

        /* enqueue the read.  the write will go out as part of the callback on
         * the read. at user-level & in the kernel, wait for the read-write
         * pair to complete. in the simulator, just return, since everything
         * will happen as callbacks */

        RF_LOCK_MCPAIR(desc->mcpair);
        desc->mcpair->flag = 0;
        RF_UNLOCK_MCPAIR(desc->mcpair);

        rf_DiskIOEnqueue(&raidPtr->Queues[spCol], desc->readreq, RF_IO_NORMAL_PRIORITY);

        RF_LOCK_MCPAIR(desc->mcpair);
        while (!desc->mcpair->flag) {
                RF_WAIT_MCPAIR(desc->mcpair);
        }
        RF_UNLOCK_MCPAIR(desc->mcpair);
        rf_FreeDiskQueueData(desc->readreq);
        rf_FreeDiskQueueData(desc->writereq);

}


/* called at interrupt context when the read has completed.  just send out the write */
static void
rf_CopybackReadDoneProc(void *v, int status)
{
        RF_CopybackDesc_t *desc = v;
        if (status) {                /* invoke the callback with bad status */
                printf("raid%d: copyback read failed.  Aborting.\n",
                       desc->raidPtr->raidid);
                (desc->writereq->CompleteFunc) (desc, -100);
        } else {
                rf_DiskIOEnqueue(&(desc->raidPtr->Queues[desc->fcol]), desc->writereq, RF_IO_NORMAL_PRIORITY);
        }
}
/* called at interrupt context when the write has completed.
 * at user level & in the kernel, wake up the copyback thread.
 * in the simulator, invoke the next copyback directly.
 * can't free diskqueuedata structs in the kernel b/c we're at interrupt context.
 */
static void
rf_CopybackWriteDoneProc(void *v, int status)
{
        RF_CopybackDesc_t *desc = v;
        if (status && status != -100) {
                printf("raid%d: copyback write failed.  Aborting.\n",
                       desc->raidPtr->raidid);
        }
        desc->status = status;
        rf_MCPairWakeupFunc(desc->mcpair);
}
/* invoked when the copyback has completed */
static void
rf_CopybackComplete(RF_CopybackDesc_t *desc, int status)
{
        RF_Raid_t *raidPtr = desc->raidPtr;
        struct timeval t, diff;

        if (!status) {
                rf_lock_mutex2(raidPtr->mutex);
                if (raidPtr->Layout.map->flags & RF_DISTRIBUTE_SPARE) {
                        RF_ASSERT(raidPtr->Layout.map->parityConfig == 'D');
                        rf_FreeSpareTable(raidPtr);
                } else {
                        raidPtr->Disks[desc->spCol].status = rf_ds_spare;
                }
                rf_unlock_mutex2(raidPtr->mutex);

                RF_GETTIME(t);
                RF_TIMEVAL_DIFF(&desc->starttime, &t, &diff);
#if 0
                printf("Copyback time was %d.%06d seconds\n",
                    (int) diff.tv_sec, (int) diff.tv_usec);
#endif
        } else
                printf("raid%d: Copyback failure.  Status: %d\n",
                       raidPtr->raidid, status);

        RF_Free(desc->databuf, rf_RaidAddressToByte(raidPtr, desc->sectPerSU));
        rf_FreeMCPair(raidPtr, desc->mcpair);
        RF_Free(desc, sizeof(*desc));

        rf_copyback_in_progress = 0;
        rf_ResumeNewRequests(raidPtr);
}






















































































































































































































































































































































































































































































































































































































    3 
    3 

































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
/*        $NetBSD: if_urtw.c,v 1.26 2022/03/03 06:06:52 riastradh Exp $        */
/*        $OpenBSD: if_urtw.c,v 1.39 2011/07/03 15:47:17 matthew Exp $        */

/*-
 * Copyright (c) 2009 Martynas Venckus <martynas@openbsd.org>
 * Copyright (c) 2008 Weongyo Jeong <weongyo@FreeBSD.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_urtw.c,v 1.26 2022/03/03 06:06:52 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/sockio.h>
#include <sys/proc.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/module.h>
#include <sys/bus.h>

#include <machine/endian.h>
#include <net/bpf.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_ether.h>
#include <net/if_media.h>
#include <net/if_types.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/if_inarp.h>
#include <netinet/ip.h>

#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_radiotap.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdevs.h>

#include "if_urtwreg.h"

#ifdef URTW_DEBUG
#define        DPRINTF(x)        do { if (urtw_debug) printf x; } while (0)
#define        DPRINTFN(n, x)        do { if (urtw_debug >= (n)) printf x; } while (0)
int urtw_debug = 0;
#else
#define        DPRINTF(x)
#define        DPRINTFN(n, x)
#endif

/*
 * Recognized device vendors/products.
 */
static const struct urtw_type {
        struct usb_devno        dev;
        uint8_t                        rev;
} urtw_devs[] = {
#define        URTW_DEV_RTL8187(v, p)        \
            { { USB_VENDOR_##v, USB_PRODUCT_##v##_##p }, URTW_HWREV_8187 }
#define        URTW_DEV_RTL8187B(v, p)        \
            { { USB_VENDOR_##v, USB_PRODUCT_##v##_##p }, URTW_HWREV_8187B }
        /* Realtek RTL8187 devices. */
        URTW_DEV_RTL8187(ASUSTEK,        P5B_WIFI),
        URTW_DEV_RTL8187(DICKSMITH,        RTL8187),
        URTW_DEV_RTL8187(LINKSYS4,        WUSB54GC_2),
        URTW_DEV_RTL8187(LOGITEC,        RTL8187),
        URTW_DEV_RTL8187(NETGEAR,        WG111V2),
        URTW_DEV_RTL8187(REALTEK,        RTL8187),
        URTW_DEV_RTL8187(SITECOMEU,        WL168V1),
        URTW_DEV_RTL8187(SPHAIRON,        RTL8187),
        URTW_DEV_RTL8187(SURECOM,        EP9001G2A),
        /* Realtek RTL8187B devices. */
        URTW_DEV_RTL8187B(BELKIN,        F5D7050E),
        URTW_DEV_RTL8187B(NETGEAR,        WG111V3),
        URTW_DEV_RTL8187B(REALTEK,        RTL8187B_0),
        URTW_DEV_RTL8187B(REALTEK,        RTL8187B_1),
        URTW_DEV_RTL8187B(REALTEK,        RTL8187B_2),
        URTW_DEV_RTL8187B(SITECOMEU,        WL168V4)
#undef        URTW_DEV_RTL8187
#undef        URTW_DEV_RTL8187B
};
#define        urtw_lookup(v, p)        \
            ((const struct urtw_type *)usb_lookup(urtw_devs, v, p))

/*
 * Helper read/write macros.
 */
#define urtw_read8_m(sc, val, data)        do {                        \
        error = urtw_read8_c(sc, val, data, 0);                        \
        if (error != 0)                                                \
                goto fail;                                        \
} while (0)
#define urtw_read8_idx_m(sc, val, data, idx)        do {                \
        error = urtw_read8_c(sc, val, data, idx);                \
        if (error != 0)                                                \
                goto fail;                                        \
} while (0)
#define urtw_write8_m(sc, val, data)        do {                        \
        error = urtw_write8_c(sc, val, data, 0);                \
        if (error != 0)                                                \
                goto fail;                                        \
} while (0)
#define urtw_write8_idx_m(sc, val, data, idx)        do {                \
        error = urtw_write8_c(sc, val, data, idx);                \
        if (error != 0)                                                \
                goto fail;                                        \
} while (0)
#define urtw_read16_m(sc, val, data)        do {                        \
        error = urtw_read16_c(sc, val, data, 0);                \
        if (error != 0)                                                \
                goto fail;                                        \
} while (0)
#define urtw_read16_idx_m(sc, val, data, idx)        do {                \
        error = urtw_read16_c(sc, val, data, idx);                \
        if (error != 0)                                                \
                goto fail;                                        \
} while (0)
#define urtw_write16_m(sc, val, data)        do {                        \
        error = urtw_write16_c(sc, val, data, 0);                \
        if (error != 0)                                                \
                goto fail;                                        \
} while (0)
#define urtw_write16_idx_m(sc, val, data, idx)        do {                \
        error = urtw_write16_c(sc, val, data, idx);                \
        if (error != 0)                                                \
                goto fail;                                        \
} while (0)
#define urtw_read32_m(sc, val, data)        do {                        \
        error = urtw_read32_c(sc, val, data, 0);                \
        if (error != 0)                                                \
                goto fail;                                        \
} while (0)
#define urtw_read32_idx_m(sc, val, data, idx)        do {                \
        error = urtw_read32_c(sc, val, data, idx);                \
        if (error != 0)                                                \
                goto fail;                                        \
} while (0)
#define urtw_write32_m(sc, val, data)        do {                        \
        error = urtw_write32_c(sc, val, data, 0);                \
        if (error != 0)                                                \
                goto fail;                                        \
} while (0)
#define urtw_write32_idx_m(sc, val, data, idx)        do {                \
        error = urtw_write32_c(sc, val, data, idx);                \
        if (error != 0)                                                \
                goto fail;                                        \
} while (0)
#define urtw_8187_write_phy_ofdm(sc, val, data)        do {                \
        error = urtw_8187_write_phy_ofdm_c(sc, val, data);        \
        if (error != 0)                                                \
                goto fail;                                        \
} while (0)
#define urtw_8187_write_phy_cck(sc, val, data)        do {                \
        error = urtw_8187_write_phy_cck_c(sc, val, data);        \
        if (error != 0)                                                \
                goto fail;                                        \
} while (0)
#define urtw_8225_write(sc, val, data)        do {                        \
        error = urtw_8225_write_c(sc, val, data);                \
        if (error != 0)                                                \
                goto fail;                                        \
} while (0)

struct urtw_pair {
        uint32_t        reg;
        uint32_t        val;
};

struct urtw_pair_idx {
        uint8_t                reg;
        uint8_t                val;
        uint8_t                idx;
};

static struct urtw_pair_idx urtw_8187b_regtbl[] = {
        { 0xf0, 0x32, 0 }, { 0xf1, 0x32, 0 }, { 0xf2, 0x00, 0 },
        { 0xf3, 0x00, 0 }, { 0xf4, 0x32, 0 }, { 0xf5, 0x43, 0 },
        { 0xf6, 0x00, 0 }, { 0xf7, 0x00, 0 }, { 0xf8, 0x46, 0 },
        { 0xf9, 0xa4, 0 }, { 0xfa, 0x00, 0 }, { 0xfb, 0x00, 0 },
        { 0xfc, 0x96, 0 }, { 0xfd, 0xa4, 0 }, { 0xfe, 0x00, 0 },
        { 0xff, 0x00, 0 },

        { 0x58, 0x4b, 1 }, { 0x59, 0x00, 1 }, { 0x5a, 0x4b, 1 },
        { 0x5b, 0x00, 1 }, { 0x60, 0x4b, 1 }, { 0x61, 0x09, 1 },
        { 0x62, 0x4b, 1 }, { 0x63, 0x09, 1 }, { 0xce, 0x0f, 1 },
        { 0xcf, 0x00, 1 }, { 0xe0, 0xff, 1 }, { 0xe1, 0x0f, 1 },
        { 0xe2, 0x00, 1 }, { 0xf0, 0x4e, 1 }, { 0xf1, 0x01, 1 },
        { 0xf2, 0x02, 1 }, { 0xf3, 0x03, 1 }, { 0xf4, 0x04, 1 },
        { 0xf5, 0x05, 1 }, { 0xf6, 0x06, 1 }, { 0xf7, 0x07, 1 },
        { 0xf8, 0x08, 1 },

        { 0x4e, 0x00, 2 }, { 0x0c, 0x04, 2 }, { 0x21, 0x61, 2 },
        { 0x22, 0x68, 2 }, { 0x23, 0x6f, 2 }, { 0x24, 0x76, 2 },
        { 0x25, 0x7d, 2 }, { 0x26, 0x84, 2 }, { 0x27, 0x8d, 2 },
        { 0x4d, 0x08, 2 }, { 0x50, 0x05, 2 }, { 0x51, 0xf5, 2 },
        { 0x52, 0x04, 2 }, { 0x53, 0xa0, 2 }, { 0x54, 0x1f, 2 },
        { 0x55, 0x23, 2 }, { 0x56, 0x45, 2 }, { 0x57, 0x67, 2 },
        { 0x58, 0x08, 2 }, { 0x59, 0x08, 2 }, { 0x5a, 0x08, 2 },
        { 0x5b, 0x08, 2 }, { 0x60, 0x08, 2 }, { 0x61, 0x08, 2 },
        { 0x62, 0x08, 2 }, { 0x63, 0x08, 2 }, { 0x64, 0xcf, 2 },
        { 0x72, 0x56, 2 }, { 0x73, 0x9a, 2 },

        { 0x34, 0xf0, 0 }, { 0x35, 0x0f, 0 }, { 0x5b, 0x40, 0 },
        { 0x84, 0x88, 0 }, { 0x85, 0x24, 0 }, { 0x88, 0x54, 0 },
        { 0x8b, 0xb8, 0 }, { 0x8c, 0x07, 0 }, { 0x8d, 0x00, 0 },
        { 0x94, 0x1b, 0 }, { 0x95, 0x12, 0 }, { 0x96, 0x00, 0 },
        { 0x97, 0x06, 0 }, { 0x9d, 0x1a, 0 }, { 0x9f, 0x10, 0 },
        { 0xb4, 0x22, 0 }, { 0xbe, 0x80, 0 }, { 0xdb, 0x00, 0 },
        { 0xee, 0x00, 0 }, { 0x91, 0x03, 0 },

        { 0x4c, 0x00, 2 }, { 0x9f, 0x00, 3 }, { 0x8c, 0x01, 0 },
        { 0x8d, 0x10, 0 }, { 0x8e, 0x08, 0 }, { 0x8f, 0x00, 0 }
};

static uint8_t urtw_8225_agc[] = {
        0x9e, 0x9e, 0x9e, 0x9e, 0x9e, 0x9e, 0x9e, 0x9e, 0x9d, 0x9c, 0x9b,
        0x9a, 0x99, 0x98, 0x97, 0x96, 0x95, 0x94, 0x93, 0x92, 0x91, 0x90,
        0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88, 0x87, 0x86, 0x85,
        0x84, 0x83, 0x82, 0x81, 0x80, 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a,
        0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, 0x2f,
        0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24,
        0x23, 0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19,
        0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e,
        0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03,
        0x02, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
        0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
        0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01
};

static uint32_t urtw_8225_channel[] = {
        0x0000,                /* dummy channel 0 */
        0x085c,                /* 1 */
        0x08dc,                /* 2 */
        0x095c,                /* 3 */
        0x09dc,                /* 4 */
        0x0a5c,                /* 5 */
        0x0adc,                /* 6 */
        0x0b5c,                /* 7 */
        0x0bdc,                /* 8 */
        0x0c5c,                /* 9 */
        0x0cdc,                /* 10 */
        0x0d5c,                /* 11 */
        0x0ddc,                /* 12 */
        0x0e5c,                /* 13 */
        0x0f72,                /* 14 */
};

static uint8_t urtw_8225_gain[] = {
        0x23, 0x88, 0x7c, 0xa5,                /* -82dbm */
        0x23, 0x88, 0x7c, 0xb5,                /* -82dbm */
        0x23, 0x88, 0x7c, 0xc5,                /* -82dbm */
        0x33, 0x80, 0x79, 0xc5,                /* -78dbm */
        0x43, 0x78, 0x76, 0xc5,                /* -74dbm */
        0x53, 0x60, 0x73, 0xc5,                /* -70dbm */
        0x63, 0x58, 0x70, 0xc5,                /* -66dbm */
};

static struct urtw_pair urtw_8225_rf_part1[] = {
        { 0x00, 0x0067 }, { 0x01, 0x0fe0 }, { 0x02, 0x044d }, { 0x03, 0x0441 },
        { 0x04, 0x0486 }, { 0x05, 0x0bc0 }, { 0x06, 0x0ae6 }, { 0x07, 0x082a },
        { 0x08, 0x001f }, { 0x09, 0x0334 }, { 0x0a, 0x0fd4 }, { 0x0b, 0x0391 },
        { 0x0c, 0x0050 }, { 0x0d, 0x06db }, { 0x0e, 0x0029 }, { 0x0f, 0x0914 }
};

static struct urtw_pair urtw_8225_rf_part2[] = {
        { 0x00, 0x01 }, { 0x01, 0x02 }, { 0x02, 0x42 }, { 0x03, 0x00 },
        { 0x04, 0x00 }, { 0x05, 0x00 }, { 0x06, 0x40 }, { 0x07, 0x00 },
        { 0x08, 0x40 }, { 0x09, 0xfe }, { 0x0a, 0x09 }, { 0x0b, 0x80 },
        { 0x0c, 0x01 }, { 0x0e, 0xd3 }, { 0x0f, 0x38 }, { 0x10, 0x84 },
        { 0x11, 0x06 }, { 0x12, 0x20 }, { 0x13, 0x20 }, { 0x14, 0x00 },
        { 0x15, 0x40 }, { 0x16, 0x00 }, { 0x17, 0x40 }, { 0x18, 0xef },
        { 0x19, 0x19 }, { 0x1a, 0x20 }, { 0x1b, 0x76 }, { 0x1c, 0x04 },
        { 0x1e, 0x95 }, { 0x1f, 0x75 }, { 0x20, 0x1f }, { 0x21, 0x27 },
        { 0x22, 0x16 }, { 0x24, 0x46 }, { 0x25, 0x20 }, { 0x26, 0x90 },
        { 0x27, 0x88 }
};

static struct urtw_pair urtw_8225_rf_part3[] = {
        { 0x00, 0x98 }, { 0x03, 0x20 }, { 0x04, 0x7e }, { 0x05, 0x12 },
        { 0x06, 0xfc }, { 0x07, 0x78 }, { 0x08, 0x2e }, { 0x10, 0x9b },
        { 0x11, 0x88 }, { 0x12, 0x47 }, { 0x13, 0xd0 }, { 0x19, 0x00 },
        { 0x1a, 0xa0 }, { 0x1b, 0x08 }, { 0x40, 0x86 }, { 0x41, 0x8d },
        { 0x42, 0x15 }, { 0x43, 0x18 }, { 0x44, 0x1f }, { 0x45, 0x1e },
        { 0x46, 0x1a }, { 0x47, 0x15 }, { 0x48, 0x10 }, { 0x49, 0x0a },
        { 0x4a, 0x05 }, { 0x4b, 0x02 }, { 0x4c, 0x05 }
};

static uint16_t urtw_8225_rxgain[] = {
        0x0400, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0408, 0x0409,
        0x040a, 0x040b, 0x0502, 0x0503, 0x0504, 0x0505, 0x0540, 0x0541,
        0x0542, 0x0543, 0x0544, 0x0545, 0x0580, 0x0581, 0x0582, 0x0583,
        0x0584, 0x0585, 0x0588, 0x0589, 0x058a, 0x058b, 0x0643, 0x0644,
        0x0645, 0x0680, 0x0681, 0x0682, 0x0683, 0x0684, 0x0685, 0x0688,
        0x0689, 0x068a, 0x068b, 0x068c, 0x0742, 0x0743, 0x0744, 0x0745,
        0x0780, 0x0781, 0x0782, 0x0783, 0x0784, 0x0785, 0x0788, 0x0789,
        0x078a, 0x078b, 0x078c, 0x078d, 0x0790, 0x0791, 0x0792, 0x0793,
        0x0794, 0x0795, 0x0798, 0x0799, 0x079a, 0x079b, 0x079c, 0x079d,
        0x07a0, 0x07a1, 0x07a2, 0x07a3, 0x07a4, 0x07a5, 0x07a8, 0x07a9,
        0x07aa, 0x07ab, 0x07ac, 0x07ad, 0x07b0, 0x07b1, 0x07b2, 0x07b3,
        0x07b4, 0x07b5, 0x07b8, 0x07b9, 0x07ba, 0x07bb, 0x07bb
};

static uint8_t urtw_8225_threshold[] = {
        0x8d, 0x8d, 0x8d, 0x8d, 0x9d, 0xad, 0xbd
};

static uint8_t urtw_8225_tx_gain_cck_ofdm[] = {
        0x02, 0x06, 0x0e, 0x1e, 0x3e, 0x7e
};

static uint8_t urtw_8225_txpwr_cck[] = {
        0x18, 0x17, 0x15, 0x11, 0x0c, 0x08, 0x04, 0x02,
        0x1b, 0x1a, 0x17, 0x13, 0x0e, 0x09, 0x04, 0x02,
        0x1f, 0x1e, 0x1a, 0x15, 0x10, 0x0a, 0x05, 0x02,
        0x22, 0x21, 0x1d, 0x18, 0x11, 0x0b, 0x06, 0x02,
        0x26, 0x25, 0x21, 0x1b, 0x14, 0x0d, 0x06, 0x03,
        0x2b, 0x2a, 0x25, 0x1e, 0x16, 0x0e, 0x07, 0x03
};

static uint8_t urtw_8225_txpwr_cck_ch14[] = {
        0x18, 0x17, 0x15, 0x0c, 0x00, 0x00, 0x00, 0x00,
        0x1b, 0x1a, 0x17, 0x0e, 0x00, 0x00, 0x00, 0x00,
        0x1f, 0x1e, 0x1a, 0x0f, 0x00, 0x00, 0x00, 0x00,
        0x22, 0x21, 0x1d, 0x11, 0x00, 0x00, 0x00, 0x00,
        0x26, 0x25, 0x21, 0x13, 0x00, 0x00, 0x00, 0x00,
        0x2b, 0x2a, 0x25, 0x15, 0x00, 0x00, 0x00, 0x00
};

static uint8_t urtw_8225_txpwr_ofdm[] = {
        0x80, 0x90, 0xa2, 0xb5, 0xcb, 0xe4
};

static uint8_t urtw_8225v2_agc[] = {
        0x5e, 0x5e, 0x5e, 0x5e, 0x5d, 0x5b, 0x59, 0x57,
        0x55, 0x53, 0x51, 0x4f, 0x4d, 0x4b, 0x49, 0x47,
        0x45, 0x43, 0x41, 0x3f, 0x3d, 0x3b, 0x39, 0x37,
        0x35, 0x33, 0x31, 0x2f, 0x2d, 0x2b, 0x29, 0x27,
        0x25, 0x23, 0x21, 0x1f, 0x1d, 0x1b, 0x19, 0x17,
        0x15, 0x13, 0x11, 0x0f, 0x0d, 0x0b, 0x09, 0x07,
        0x05, 0x03, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
        0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
        0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
        0x19, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26,
        0x26, 0x27, 0x27, 0x28, 0x28, 0x29, 0x2a, 0x2a,
        0x2a, 0x2b, 0x2b, 0x2b, 0x2c, 0x2c, 0x2c, 0x2d,
        0x2d, 0x2d, 0x2d, 0x2e, 0x2e, 0x2e, 0x2e, 0x2f,
        0x2f, 0x2f, 0x30, 0x30, 0x31, 0x31, 0x31, 0x31,
        0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31,
        0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31
};

static uint8_t urtw_8225v2_ofdm[] = {
        0x10, 0x0d, 0x01, 0x00, 0x14, 0xfb, 0xfb, 0x60,
        0x00, 0x60, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00,
        0x40, 0x00, 0x40, 0x00, 0x00, 0x00, 0xa8, 0x26,
        0x32, 0x33, 0x07, 0xa5, 0x6f, 0x55, 0xc8, 0xb3,
        0x0a, 0xe1, 0x2c, 0x8a, 0x86, 0x83, 0x34, 0x0f,
        0x4f, 0x24, 0x6f, 0xc2, 0x6b, 0x40, 0x80, 0x00,
        0xc0, 0xc1, 0x58, 0xf1, 0x00, 0xe4, 0x90, 0x3e,
        0x6d, 0x3c, 0xfb, 0x07
};

static uint8_t urtw_8225v2_gain_bg[] = {
        0x23, 0x15, 0xa5,                /* -82-1dbm */
        0x23, 0x15, 0xb5,                /* -82-2dbm */
        0x23, 0x15, 0xc5,                /* -82-3dbm */
        0x33, 0x15, 0xc5,                /* -78dbm */
        0x43, 0x15, 0xc5,                /* -74dbm */
        0x53, 0x15, 0xc5,                /* -70dbm */
        0x63, 0x15, 0xc5,                /* -66dbm */
};

static struct urtw_pair urtw_8225v2_rf_part1[] = {
        { 0x00, 0x02bf }, { 0x01, 0x0ee0 }, { 0x02, 0x044d }, { 0x03, 0x0441 },
        { 0x04, 0x08c3 }, { 0x05, 0x0c72 }, { 0x06, 0x00e6 }, { 0x07, 0x082a },
        { 0x08, 0x003f }, { 0x09, 0x0335 }, { 0x0a, 0x09d4 }, { 0x0b, 0x07bb },
        { 0x0c, 0x0850 }, { 0x0d, 0x0cdf }, { 0x0e, 0x002b }, { 0x0f, 0x0114 }
};

static struct urtw_pair urtw_8225v2_rf_part2[] = {
        { 0x00, 0x01 }, { 0x01, 0x02 }, { 0x02, 0x42 }, { 0x03, 0x00 },
        { 0x04, 0x00 }, { 0x05, 0x00 }, { 0x06, 0x40 }, { 0x07, 0x00 },
        { 0x08, 0x40 }, { 0x09, 0xfe }, { 0x0a, 0x08 }, { 0x0b, 0x80 },
        { 0x0c, 0x01 }, { 0x0d, 0x43 }, { 0x0e, 0xd3 }, { 0x0f, 0x38 },
        { 0x10, 0x84 }, { 0x11, 0x07 }, { 0x12, 0x20 }, { 0x13, 0x20 },
        { 0x14, 0x00 }, { 0x15, 0x40 }, { 0x16, 0x00 }, { 0x17, 0x40 },
        { 0x18, 0xef }, { 0x19, 0x19 }, { 0x1a, 0x20 }, { 0x1b, 0x15 },
        { 0x1c, 0x04 }, { 0x1d, 0xc5 }, { 0x1e, 0x95 }, { 0x1f, 0x75 },
        { 0x20, 0x1f }, { 0x21, 0x17 }, { 0x22, 0x16 }, { 0x23, 0x80 },
        { 0x24, 0x46 }, { 0x25, 0x00 }, { 0x26, 0x90 }, { 0x27, 0x88 }
};

static struct urtw_pair urtw_8225v2_rf_part3[] = {
        { 0x00, 0x98 }, { 0x03, 0x20 }, { 0x04, 0x7e }, { 0x05, 0x12 },
        { 0x06, 0xfc }, { 0x07, 0x78 }, { 0x08, 0x2e }, { 0x09, 0x11 },
        { 0x0a, 0x17 }, { 0x0b, 0x11 }, { 0x10, 0x9b }, { 0x11, 0x88 },
        { 0x12, 0x47 }, { 0x13, 0xd0 }, { 0x19, 0x00 }, { 0x1a, 0xa0 },
        { 0x1b, 0x08 }, { 0x1d, 0x00 }, { 0x40, 0x86 }, { 0x41, 0x9d },
        { 0x42, 0x15 }, { 0x43, 0x18 }, { 0x44, 0x36 }, { 0x45, 0x35 },
        { 0x46, 0x2e }, { 0x47, 0x25 }, { 0x48, 0x1c }, { 0x49, 0x12 },
        { 0x4a, 0x09 }, { 0x4b, 0x04 }, { 0x4c, 0x05 }
};

static uint16_t urtw_8225v2_rxgain[] = {
        0x0400, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0408, 0x0409,
        0x040a, 0x040b, 0x0502, 0x0503, 0x0504, 0x0505, 0x0540, 0x0541,
        0x0542, 0x0543, 0x0544, 0x0545, 0x0580, 0x0581, 0x0582, 0x0583,
        0x0584, 0x0585, 0x0588, 0x0589, 0x058a, 0x058b, 0x0643, 0x0644,
        0x0645, 0x0680, 0x0681, 0x0682, 0x0683, 0x0684, 0x0685, 0x0688,
        0x0689, 0x068a, 0x068b, 0x068c, 0x0742, 0x0743, 0x0744, 0x0745,
        0x0780, 0x0781, 0x0782, 0x0783, 0x0784, 0x0785, 0x0788, 0x0789,
        0x078a, 0x078b, 0x078c, 0x078d, 0x0790, 0x0791, 0x0792, 0x0793,
        0x0794, 0x0795, 0x0798, 0x0799, 0x079a, 0x079b, 0x079c, 0x079d,
        0x07a0, 0x07a1, 0x07a2, 0x07a3, 0x07a4, 0x07a5, 0x07a8, 0x07a9,
        0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03b0, 0x03b1, 0x03b2, 0x03b3,
        0x03b4, 0x03b5, 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bb
};

static uint8_t urtw_8225v2_tx_gain_cck_ofdm[] = {
        0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
        0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
        0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
        0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
        0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
        0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23
};

static uint8_t urtw_8225v2_txpwr_cck[] = {
        0x36, 0x35, 0x2e, 0x25, 0x1c, 0x12, 0x09, 0x04,
        0x30, 0x2f, 0x29, 0x21, 0x19, 0x10, 0x08, 0x03,
        0x2b, 0x2a, 0x25, 0x1e, 0x16, 0x0e, 0x07, 0x03,
        0x26, 0x25, 0x21, 0x1b, 0x14, 0x0d, 0x06, 0x03
};

static uint8_t urtw_8225v2_txpwr_cck_ch14[] = {
        0x36, 0x35, 0x2e, 0x1b, 0x00, 0x00, 0x00, 0x00,
        0x30, 0x2f, 0x29, 0x15, 0x00, 0x00, 0x00, 0x00,
        0x30, 0x2f, 0x29, 0x15, 0x00, 0x00, 0x00, 0x00,
        0x30, 0x2f, 0x29, 0x15, 0x00, 0x00, 0x00, 0x00
};

static struct urtw_pair urtw_8225v2_b_rf[] = {
        { 0x00, 0x00b7 }, { 0x01, 0x0ee0 }, { 0x02, 0x044d }, { 0x03, 0x0441 },
        { 0x04, 0x08c3 }, { 0x05, 0x0c72 }, { 0x06, 0x00e6 }, { 0x07, 0x082a },
        { 0x08, 0x003f }, { 0x09, 0x0335 }, { 0x0a, 0x09d4 }, { 0x0b, 0x07bb },
        { 0x0c, 0x0850 }, { 0x0d, 0x0cdf }, { 0x0e, 0x002b }, { 0x0f, 0x0114 },
        { 0x00, 0x01b7 }
};

static struct urtw_pair urtw_ratetable[] = {
        {  2,  0 }, {   4,  1 }, { 11, 2 }, { 12, 4 }, { 18, 5 },
        { 22,  3 }, {  24,  6 }, { 36, 7 }, { 48, 8 }, { 72, 9 },
        { 96, 10 }, { 108, 11 }
};

static int                urtw_init(struct ifnet *);
static void                urtw_stop(struct ifnet *, int);
static int                urtw_ioctl(struct ifnet *, u_long, void *);
static void                urtw_start(struct ifnet *);
static int                urtw_alloc_rx_data_list(struct urtw_softc *);
static void                urtw_free_rx_data_list(struct urtw_softc *);
static int                urtw_alloc_tx_data_list(struct urtw_softc *);
static void                urtw_free_tx_data_list(struct urtw_softc *);
static void                urtw_rxeof(struct usbd_xfer *, void *,
                    usbd_status);
static int                urtw_tx_start(struct urtw_softc *,
                    struct ieee80211_node *, struct mbuf *, int);
static void                urtw_txeof_low(struct usbd_xfer *, void *,
                    usbd_status);
static void                urtw_txeof_normal(struct usbd_xfer *, void *,
                    usbd_status);
static void                urtw_next_scan(void *);
static void                urtw_task(void *);
static void                urtw_ledusbtask(void *);
static void                urtw_ledtask(void *);
static int                urtw_media_change(struct ifnet *);
static int                urtw_newstate(struct ieee80211com *, enum ieee80211_state, int);
static void                urtw_watchdog(struct ifnet *);
static void                urtw_set_chan(struct urtw_softc *, struct ieee80211_channel *);
static int                urtw_isbmode(uint16_t);
static uint16_t        urtw_rate2rtl(int);
static uint16_t        urtw_rtl2rate(int);
static usbd_status        urtw_set_rate(struct urtw_softc *);
static usbd_status        urtw_update_msr(struct urtw_softc *);
static usbd_status        urtw_read8_c(struct urtw_softc *, int, uint8_t *, uint8_t);
static usbd_status        urtw_read16_c(struct urtw_softc *, int, uint16_t *, uint8_t);
static usbd_status        urtw_read32_c(struct urtw_softc *, int, uint32_t *, uint8_t);
static usbd_status        urtw_write8_c(struct urtw_softc *, int, uint8_t, uint8_t);
static usbd_status        urtw_write16_c(struct urtw_softc *, int, uint16_t, uint8_t);
static usbd_status        urtw_write32_c(struct urtw_softc *, int, uint32_t, uint8_t);
static usbd_status        urtw_eprom_cs(struct urtw_softc *, int);
static usbd_status        urtw_eprom_ck(struct urtw_softc *);
static usbd_status        urtw_eprom_sendbits(struct urtw_softc *, int16_t *,
                    int);
static usbd_status        urtw_eprom_read32(struct urtw_softc *, uint32_t,
                    uint32_t *);
static usbd_status        urtw_eprom_readbit(struct urtw_softc *, int16_t *);
static usbd_status        urtw_eprom_writebit(struct urtw_softc *, int16_t);
static usbd_status        urtw_get_macaddr(struct urtw_softc *);
static usbd_status        urtw_get_txpwr(struct urtw_softc *);
static usbd_status        urtw_get_rfchip(struct urtw_softc *);
static usbd_status        urtw_led_init(struct urtw_softc *);
static usbd_status        urtw_8185_rf_pins_enable(struct urtw_softc *);
static usbd_status        urtw_8185_tx_antenna(struct urtw_softc *, uint8_t);
static usbd_status        urtw_8187_write_phy(struct urtw_softc *, uint8_t, uint32_t);
static usbd_status        urtw_8187_write_phy_ofdm_c(struct urtw_softc *, uint8_t,
                    uint32_t);
static usbd_status        urtw_8187_write_phy_cck_c(struct urtw_softc *, uint8_t,
                    uint32_t);
static usbd_status        urtw_8225_setgain(struct urtw_softc *, int16_t);
static usbd_status        urtw_8225_usb_init(struct urtw_softc *);
static usbd_status        urtw_8225_write_c(struct urtw_softc *, uint8_t, uint16_t);
static usbd_status        urtw_8225_write_s16(struct urtw_softc *, uint8_t, int,
                    uint16_t);
static usbd_status        urtw_8225_read(struct urtw_softc *, uint8_t, uint32_t *);
static usbd_status        urtw_8225_rf_init(struct urtw_rf *);
static usbd_status        urtw_8225_rf_set_chan(struct urtw_rf *, int);
static usbd_status        urtw_8225_rf_set_sens(struct urtw_rf *);
static usbd_status        urtw_8225_set_txpwrlvl(struct urtw_softc *, int);
static usbd_status        urtw_8225v2_rf_init(struct urtw_rf *);
static usbd_status        urtw_8225v2_rf_set_chan(struct urtw_rf *, int);
static usbd_status        urtw_8225v2_set_txpwrlvl(struct urtw_softc *, int);
static usbd_status        urtw_8225v2_setgain(struct urtw_softc *, int16_t);
static usbd_status        urtw_8225_isv2(struct urtw_softc *, int *);
static usbd_status        urtw_read8e(struct urtw_softc *, int, uint8_t *);
static usbd_status        urtw_write8e(struct urtw_softc *, int, uint8_t);
static usbd_status        urtw_8180_set_anaparam(struct urtw_softc *, uint32_t);
static usbd_status        urtw_8185_set_anaparam2(struct urtw_softc *, uint32_t);
static usbd_status        urtw_open_pipes(struct urtw_softc *);
static usbd_status        urtw_close_pipes(struct urtw_softc *);
static usbd_status        urtw_intr_enable(struct urtw_softc *);
static usbd_status        urtw_intr_disable(struct urtw_softc *);
static usbd_status        urtw_reset(struct urtw_softc *);
static usbd_status        urtw_led_on(struct urtw_softc *, int);
static usbd_status        urtw_led_ctl(struct urtw_softc *, int);
static usbd_status        urtw_led_blink(struct urtw_softc *);
static usbd_status        urtw_led_mode0(struct urtw_softc *, int);
static usbd_status        urtw_led_mode1(struct urtw_softc *, int);
static usbd_status        urtw_led_mode2(struct urtw_softc *, int);
static usbd_status        urtw_led_mode3(struct urtw_softc *, int);
static usbd_status        urtw_rx_setconf(struct urtw_softc *);
static usbd_status        urtw_rx_enable(struct urtw_softc *);
static usbd_status        urtw_tx_enable(struct urtw_softc *);
static usbd_status        urtw_8187b_update_wmm(struct urtw_softc *);
static usbd_status        urtw_8187b_reset(struct urtw_softc *);
static int                urtw_8187b_init(struct ifnet *);
static usbd_status        urtw_8225v2_b_config_mac(struct urtw_softc *);
static usbd_status        urtw_8225v2_b_init_rfe(struct urtw_softc *);
static usbd_status        urtw_8225v2_b_update_chan(struct urtw_softc *);
static usbd_status        urtw_8225v2_b_rf_init(struct urtw_rf *);
static usbd_status        urtw_8225v2_b_rf_set_chan(struct urtw_rf *, int);
static usbd_status        urtw_8225v2_b_set_txpwrlvl(struct urtw_softc *, int);
static int                urtw_set_bssid(struct urtw_softc *, const uint8_t *);
static int                urtw_set_macaddr(struct urtw_softc *, const uint8_t *);

static int urtw_match(device_t, cfdata_t, void *);
static void urtw_attach(device_t, device_t, void *);
static int urtw_detach(device_t, int);
static int urtw_activate(device_t, enum devact);

CFATTACH_DECL_NEW(urtw, sizeof(struct urtw_softc),
        urtw_match,
        urtw_attach,
        urtw_detach,
        urtw_activate
);

static int
urtw_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return urtw_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
            UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
urtw_attach(device_t parent, device_t self, void *aux)
{
        struct urtw_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = &sc->sc_if;
        usbd_status error;
        uint8_t data8;
        uint32_t data;
        int i;

        sc->sc_dev = self;
        sc->sc_udev = uaa->uaa_device;
        sc->sc_hwrev = urtw_lookup(uaa->uaa_vendor, uaa->uaa_product)->rev;
        sc->sc_init_state = URTW_INIT_NONE;

        aprint_naive("\n");
        aprint_normal(": ");

        if (sc->sc_hwrev & URTW_HWREV_8187) {
                urtw_read32_m(sc, URTW_TX_CONF, &data);
                data &= URTW_TX_HWREV_MASK;
                switch (data) {
                case URTW_TX_HWREV_8187_D:
                        sc->sc_hwrev |= URTW_HWREV_8187_D;
                        aprint_normal("RTL8187 rev D");
                        break;
                case URTW_TX_HWREV_8187B_D:
                        /*
                         * Detect Realtek RTL8187B devices that use
                         * USB IDs of RTL8187.
                         */
                        sc->sc_hwrev = URTW_HWREV_8187B | URTW_HWREV_8187B_B;
                        aprint_normal("RTL8187B rev B (early)");
                        break;
                default:
                        sc->sc_hwrev |= URTW_HWREV_8187_B;
                        aprint_normal("RTL8187 rev 0x%02x", data >> 25);
                        break;
                }
        } else {
                /* RTL8187B hwrev register. */
                urtw_read8_m(sc, URTW_8187B_HWREV, &data8);
                switch (data8) {
                case URTW_8187B_HWREV_8187B_B:
                        sc->sc_hwrev |= URTW_HWREV_8187B_B;
                        aprint_normal("RTL8187B rev B");
                        break;
                case URTW_8187B_HWREV_8187B_D:
                        sc->sc_hwrev |= URTW_HWREV_8187B_D;
                        aprint_normal("RTL8187B rev D");
                        break;
                case URTW_8187B_HWREV_8187B_E:
                        sc->sc_hwrev |= URTW_HWREV_8187B_E;
                        aprint_normal("RTL8187B rev E");
                        break;
                default:
                        sc->sc_hwrev |= URTW_HWREV_8187B_B;
                        aprint_normal("RTL8187B rev 0x%02x", data8);
                        break;
                }
        }

        urtw_read32_m(sc, URTW_RX, &data);
        sc->sc_epromtype = (data & URTW_RX_9356SEL) ? URTW_EEPROM_93C56 :
            URTW_EEPROM_93C46;

        error = urtw_get_rfchip(sc);
        if (error != 0)
                goto fail;
        error = urtw_get_macaddr(sc);
        if (error != 0)
                goto fail;
        error = urtw_get_txpwr(sc);
        if (error != 0)
                goto fail;
        error = urtw_led_init(sc);                /* XXX incompleted */
        if (error != 0)
                goto fail;

        sc->sc_rts_retry = URTW_DEFAULT_RTS_RETRY;
        sc->sc_tx_retry = URTW_DEFAULT_TX_RETRY;
        sc->sc_currate = 3;
        /* XXX for what? */
        sc->sc_preamble_mode = 2;

        usb_init_task(&sc->sc_task, urtw_task, sc, 0);
        usb_init_task(&sc->sc_ledtask, urtw_ledusbtask, sc, 0);
        callout_init(&sc->scan_to, 0);
        callout_setfunc(&sc->scan_to, urtw_next_scan, sc);
        callout_init(&sc->sc_led_ch, 0);
        callout_setfunc(&sc->sc_led_ch, urtw_ledtask, sc);

        ic->ic_ifp = ifp;
        ic->ic_phytype = IEEE80211_T_OFDM;        /* not only, but not used */
        ic->ic_opmode = IEEE80211_M_STA;        /* default to BSS mode */
        ic->ic_state = IEEE80211_S_INIT;

        /* set device capabilities */
        ic->ic_caps =
            IEEE80211_C_MONITOR |        /* monitor mode supported */
            IEEE80211_C_TXPMGT |        /* tx power management */
            IEEE80211_C_SHPREAMBLE |        /* short preamble supported */
            IEEE80211_C_SHSLOT |        /* short slot time supported */
            IEEE80211_C_WEP |                /* s/w WEP */
            IEEE80211_C_WPA;                /* WPA/RSN */

        /* set supported .11b and .11g rates */
        ic->ic_sup_rates[IEEE80211_MODE_11B] = ieee80211_std_rateset_11b;
        ic->ic_sup_rates[IEEE80211_MODE_11G] = ieee80211_std_rateset_11g;

        /* set supported .11b and .11g channels (1 through 14) */
        for (i = 1; i <= 14; i++) {
                ic->ic_channels[i].ic_freq =
                    ieee80211_ieee2mhz(i, IEEE80211_CHAN_2GHZ);
                ic->ic_channels[i].ic_flags =
                    IEEE80211_CHAN_CCK | IEEE80211_CHAN_OFDM |
                    IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ;
        }

        ifp->if_softc = sc;
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        if (sc->sc_hwrev & URTW_HWREV_8187) {
                ifp->if_init = urtw_init;
        } else {
                ifp->if_init = urtw_8187b_init;
        }
        ifp->if_ioctl = urtw_ioctl;
        ifp->if_start = urtw_start;
        ifp->if_watchdog = urtw_watchdog;
        IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
        IFQ_SET_READY(&ifp->if_snd);
        memcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);

        if_attach(ifp);
        ieee80211_ifattach(ic);

        /* override state transition machine */
        sc->sc_newstate = ic->ic_newstate;
        ic->ic_newstate = urtw_newstate;

        /* XXX media locking needs revisiting */
        mutex_init(&sc->sc_media_mtx, MUTEX_DEFAULT, IPL_SOFTUSB);
        ieee80211_media_init_with_lock(ic,
            urtw_media_change, ieee80211_media_status, &sc->sc_media_mtx);

        bpf_attach2(ifp, DLT_IEEE802_11_RADIO,
            sizeof(struct ieee80211_frame) + IEEE80211_RADIOTAP_HDRLEN,
            &sc->sc_drvbpf);

        sc->sc_rxtap_len = sizeof(sc->sc_rxtapu);
        sc->sc_rxtap.wr_ihdr.it_len = htole16(sc->sc_rxtap_len);
        sc->sc_rxtap.wr_ihdr.it_present = htole32(URTW_RX_RADIOTAP_PRESENT);

        sc->sc_txtap_len = sizeof(sc->sc_txtapu);
        sc->sc_txtap.wt_ihdr.it_len = htole16(sc->sc_txtap_len);
        sc->sc_txtap.wt_ihdr.it_present = htole32(URTW_TX_RADIOTAP_PRESENT);

        aprint_normal(", address %s\n", ether_sprintf(ic->ic_myaddr));

        ieee80211_announce(ic);

        sc->sc_init_state = URTW_INIT_INITED;

        return;
fail:
        aprint_error(": %s failed!\n", __func__);
        sc->sc_dying = true;
}

static int
urtw_detach(device_t self, int flags)
{
        struct urtw_softc *sc = device_private(self);
        struct ifnet *ifp = &sc->sc_if;
        int s;

        s = splusb();

        sc->sc_dying = true;

        if (sc->sc_init_state < URTW_INIT_INITED)
                goto out;

        callout_halt(&sc->scan_to, NULL);
        callout_halt(&sc->sc_led_ch, NULL);
        callout_destroy(&sc->scan_to);
        callout_destroy(&sc->sc_led_ch);

        usb_rem_task_wait(sc->sc_udev, &sc->sc_task, USB_TASKQ_DRIVER, NULL);
        usb_rem_task_wait(sc->sc_udev, &sc->sc_ledtask, USB_TASKQ_DRIVER,
            NULL);

        if (ifp->if_softc != NULL) {
                bpf_detach(ifp);
                ieee80211_ifdetach(&sc->sc_ic);        /* free all nodes */
                if_detach(ifp);
        }

        /* abort and free xfers */
        urtw_free_tx_data_list(sc);
        urtw_free_rx_data_list(sc);
        urtw_close_pipes(sc);

out:
        splx(s);
        return 0;
}

static int
urtw_activate(device_t self, enum devact act)
{
        struct urtw_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = true;
                break;
        }

        return 0;
}

static usbd_status
urtw_close_pipes(struct urtw_softc *sc)
{
        usbd_status error = 0;

        if (sc->sc_rxpipe != NULL) {
                usbd_close_pipe(sc->sc_rxpipe);
                sc->sc_rxpipe = NULL;
        }
        if (sc->sc_txpipe_low != NULL) {
                usbd_close_pipe(sc->sc_txpipe_low);
                sc->sc_txpipe_low = NULL;
        }
        if (sc->sc_txpipe_normal != NULL) {
                usbd_close_pipe(sc->sc_txpipe_normal);
                sc->sc_txpipe_normal = NULL;
        }
        return error;
}

static usbd_status
urtw_open_pipes(struct urtw_softc *sc)
{
        usbd_status error;

        /*
         * NB: there is no way to distinguish each pipes so we need to hardcode
         * pipe numbers
         */

        /* tx pipe - low priority packets */
        if (sc->sc_hwrev & URTW_HWREV_8187)
                error = usbd_open_pipe(sc->sc_iface, 0x2,
                    USBD_EXCLUSIVE_USE, &sc->sc_txpipe_low);
        else
                error = usbd_open_pipe(sc->sc_iface, 0x6,
                    USBD_EXCLUSIVE_USE, &sc->sc_txpipe_low);
        if (error != 0) {
                printf("%s: could not open Tx low pipe: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
                goto fail;
        }
        /* tx pipe - normal priority packets */
        if (sc->sc_hwrev & URTW_HWREV_8187)
                error = usbd_open_pipe(sc->sc_iface, 0x3,
                    USBD_EXCLUSIVE_USE, &sc->sc_txpipe_normal);
        else
                error = usbd_open_pipe(sc->sc_iface, 0x7,
                    USBD_EXCLUSIVE_USE, &sc->sc_txpipe_normal);
        if (error != 0) {
                printf("%s: could not open Tx normal pipe: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
                goto fail;
        }
        /* rx pipe */
        if (sc->sc_hwrev & URTW_HWREV_8187)
                error = usbd_open_pipe(sc->sc_iface, 0x81,
                    USBD_EXCLUSIVE_USE, &sc->sc_rxpipe);
        else
                error = usbd_open_pipe(sc->sc_iface, 0x83,
                    USBD_EXCLUSIVE_USE, &sc->sc_rxpipe);
        if (error != 0) {
                printf("%s: could not open Rx pipe: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
                goto fail;
        }

        return 0;
fail:
        (void)urtw_close_pipes(sc);
        return error;
}

static int
urtw_alloc_rx_data_list(struct urtw_softc *sc)
{
        int i, error;

        for (i = 0; i < URTW_RX_DATA_LIST_COUNT; i++) {
                struct urtw_rx_data *data = &sc->sc_rx_data[i];

                data->sc = sc;

                error = usbd_create_xfer(sc->sc_rxpipe, MCLBYTES,
                    0, 0, &data->xfer);
                if (error) {

                        printf("%s: could not allocate rx xfer\n",
                            device_xname(sc->sc_dev));
                        error = ENOMEM;
                        goto fail;
                }

                MGETHDR(data->m, M_DONTWAIT, MT_DATA);
                if (data->m == NULL) {
                        printf("%s: could not allocate rx mbuf\n",
                            device_xname(sc->sc_dev));
                        error = ENOMEM;
                        goto fail;
                }
                MCLGET(data->m, M_DONTWAIT);
                if (!(data->m->m_flags & M_EXT)) {
                        printf("%s: could not allocate rx mbuf cluster\n",
                            device_xname(sc->sc_dev));
                        error = ENOMEM;
                        goto fail;
                }
                data->buf = mtod(data->m, uint8_t *);
        }

        return 0;

fail:
        urtw_free_rx_data_list(sc);
        return error;
}

static void
urtw_free_rx_data_list(struct urtw_softc *sc)
{
        int i;

        /* Make sure no transfers are pending. */
        if (sc->sc_rxpipe != NULL)
                usbd_abort_pipe(sc->sc_rxpipe);

        for (i = 0; i < URTW_RX_DATA_LIST_COUNT; i++) {
                struct urtw_rx_data *data = &sc->sc_rx_data[i];

                if (data->xfer != NULL) {
                        usbd_destroy_xfer(data->xfer);
                        data->xfer = NULL;
                }
                if (data->m != NULL) {
                        m_freem(data->m);
                        data->m = NULL;
                }
        }
}

static int
urtw_alloc_tx_data_list(struct urtw_softc *sc)
{
        int i, error;

        for (size_t j = 0; j < URTW_PRIORITY_MAX; j++) {
                for (i = 0; i < URTW_TX_DATA_LIST_COUNT; i++) {
                        struct urtw_tx_data *data = &sc->sc_tx_data[j][i];

                        data->sc = sc;
                        data->ni = NULL;

                        error = usbd_create_xfer((j == URTW_PRIORITY_LOW) ?
                            sc->sc_txpipe_low : sc->sc_txpipe_normal,
                            URTW_TX_MAXSIZE, USBD_FORCE_SHORT_XFER, 0,
                            &data->xfer);
                        if (error) {
                                printf("%s: could not allocate tx xfer\n",
                                    device_xname(sc->sc_dev));
                                goto fail;
                        }

                        data->buf = usbd_get_buffer(data->xfer);

                        if (((unsigned long)data->buf) % 4)
                                printf("%s: warn: unaligned buffer %p\n",
                                    device_xname(sc->sc_dev), data->buf);
                }
        }

        return 0;

fail:
        urtw_free_tx_data_list(sc);
        return error;
}

static void
urtw_free_tx_data_list(struct urtw_softc *sc)
{
        int i;

        /* Make sure no transfers are pending. */
        if (sc->sc_txpipe_low != NULL)
                usbd_abort_pipe(sc->sc_txpipe_low);
        if (sc->sc_txpipe_normal != NULL)
                usbd_abort_pipe(sc->sc_txpipe_normal);

        for (size_t j = 0; j < URTW_PRIORITY_MAX; j++) {
                for (i = 0; i < URTW_TX_DATA_LIST_COUNT; i++) {
                        struct urtw_tx_data *data = &sc->sc_tx_data[j][i];

                        if (data->xfer != NULL) {
                                usbd_destroy_xfer(data->xfer);
                                data->xfer = NULL;
                        }
                        if (data->ni != NULL) {
                                ieee80211_free_node(data->ni);
                                data->ni = NULL;
                        }
                }
        }
}

static int
urtw_media_change(struct ifnet *ifp)
{
        int error;

        error = ieee80211_media_change(ifp);
        if (error != ENETRESET)
                return error;

        if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) ==
            (IFF_UP | IFF_RUNNING))
                if_init(ifp);

        return 0;
}

static int
urtw_newstate(struct ieee80211com *ic, enum ieee80211_state nstate, int arg)
{
        struct urtw_softc *sc = ic->ic_ifp->if_softc;

        /*
         * XXXSMP: This does not wait for the task, if it is in flight,
         * to complete.  If this code works at all, it must rely on the
         * kernel lock to serialize with the USB task thread.
         */
        usb_rem_task(sc->sc_udev, &sc->sc_task);
        callout_stop(&sc->scan_to);

        /* do it in a process context */
        sc->sc_state = nstate;
        sc->sc_arg = arg;
        usb_add_task(sc->sc_udev, &sc->sc_task, USB_TASKQ_DRIVER);

        return 0;
}

static usbd_status
urtw_led_init(struct urtw_softc *sc)
{
        uint32_t rev;
        usbd_status error;

        urtw_read8_m(sc, URTW_PSR, &sc->sc_psr);
        error = urtw_eprom_read32(sc, URTW_EPROM_SWREV, &rev);
        if (error != 0)
                goto fail;

        switch (rev & URTW_EPROM_CID_MASK) {
        case URTW_EPROM_CID_ALPHA0:
                sc->sc_strategy = URTW_SW_LED_MODE1;
                break;
        case URTW_EPROM_CID_SERCOMM_PS:
                sc->sc_strategy = URTW_SW_LED_MODE3;
                break;
        case URTW_EPROM_CID_HW_LED:
                sc->sc_strategy = URTW_HW_LED;
                break;
        case URTW_EPROM_CID_RSVD0:
        case URTW_EPROM_CID_RSVD1:
        default:
                sc->sc_strategy = URTW_SW_LED_MODE0;
                break;
        }

        sc->sc_gpio_ledpin = URTW_LED_PIN_GPIO0;

fail:
        return error;
}

static usbd_status
urtw_8225_write_s16(struct urtw_softc *sc, uint8_t addr, int index,
    uint16_t data)
{
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = URTW_8187_SETREGS_REQ;
        USETW(req.wValue, addr);
        USETW(req.wIndex, index);
        USETW(req.wLength, sizeof(uint16_t));

        return usbd_do_request(sc->sc_udev, &req, &data);
}

static usbd_status
urtw_8225_read(struct urtw_softc *sc, uint8_t addr, uint32_t *data)
{
        int i;
        int16_t bit;
        uint8_t rlen = 12, wlen = 6;
        uint16_t o1, o2, o3, tmp;
        uint32_t d2w = ((uint32_t)(addr & 0x1f)) << 27;
        uint32_t mask = 0x80000000, value = 0;
        usbd_status error;

        urtw_read16_m(sc, URTW_RF_PINS_OUTPUT, &o1);
        urtw_read16_m(sc, URTW_RF_PINS_ENABLE, &o2);
        urtw_read16_m(sc, URTW_RF_PINS_SELECT, &o3);
        urtw_write16_m(sc, URTW_RF_PINS_ENABLE, o2 | 0xf);
        urtw_write16_m(sc, URTW_RF_PINS_SELECT, o3 | 0xf);
        o1 &= ~0xf;
        urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, o1 | URTW_BB_HOST_BANG_EN);
        DELAY(5);
        urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, o1);
        DELAY(5);

        for (i = 0; i < (wlen / 2); i++, mask = mask >> 1) {
                bit = ((d2w & mask) != 0) ? 1 : 0;

                urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, bit | o1);
                DELAY(2);
                urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, bit | o1 |
                    URTW_BB_HOST_BANG_CLK);
                DELAY(2);
                urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, bit | o1 |
                    URTW_BB_HOST_BANG_CLK);
                DELAY(2);
                mask = mask >> 1;
                if (i == 2)
                        break;
                bit = ((d2w & mask) != 0) ? 1 : 0;
                urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, bit | o1 |
                    URTW_BB_HOST_BANG_CLK);
                DELAY(2);
                urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, bit | o1 |
                    URTW_BB_HOST_BANG_CLK);
                DELAY(2);
                urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, bit | o1);
                DELAY(1);
        }
        urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, bit | o1 | URTW_BB_HOST_BANG_RW |
            URTW_BB_HOST_BANG_CLK);
        DELAY(2);
        urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, bit | o1 | URTW_BB_HOST_BANG_RW);
        DELAY(2);
        urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, o1 | URTW_BB_HOST_BANG_RW);
        DELAY(2);

        mask = 0x800;
        for (i = 0; i < rlen; i++, mask = mask >> 1) {
                urtw_write16_m(sc, URTW_RF_PINS_OUTPUT,
                    o1 | URTW_BB_HOST_BANG_RW);
                DELAY(2);
                urtw_write16_m(sc, URTW_RF_PINS_OUTPUT,
                    o1 | URTW_BB_HOST_BANG_RW | URTW_BB_HOST_BANG_CLK);
                DELAY(2);
                urtw_write16_m(sc, URTW_RF_PINS_OUTPUT,
                    o1 | URTW_BB_HOST_BANG_RW | URTW_BB_HOST_BANG_CLK);
                DELAY(2);
                urtw_write16_m(sc, URTW_RF_PINS_OUTPUT,
                    o1 | URTW_BB_HOST_BANG_RW | URTW_BB_HOST_BANG_CLK);
                DELAY(2);

                urtw_read16_m(sc, URTW_RF_PINS_INPUT, &tmp);
                value |= ((tmp & URTW_BB_HOST_BANG_CLK) ? mask : 0);
                urtw_write16_m(sc, URTW_RF_PINS_OUTPUT,
                    o1 | URTW_BB_HOST_BANG_RW);
                DELAY(2);
        }

        urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, o1 | URTW_BB_HOST_BANG_EN |
            URTW_BB_HOST_BANG_RW);
        DELAY(2);

        urtw_write16_m(sc, URTW_RF_PINS_ENABLE, o2);
        urtw_write16_m(sc, URTW_RF_PINS_SELECT, o3);
        urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, 0x3a0);

        if (data != NULL)
                *data = value;
fail:
        return error;
}

static usbd_status
urtw_8225_write_c(struct urtw_softc *sc, uint8_t addr, uint16_t data)
{
        uint16_t d80, d82, d84;
        usbd_status error;

        urtw_read16_m(sc, URTW_RF_PINS_OUTPUT, &d80);
        d80 &= 0xfff3;
        urtw_read16_m(sc, URTW_RF_PINS_ENABLE, &d82);
        urtw_read16_m(sc, URTW_RF_PINS_SELECT, &d84);
        d84 &= 0xfff0;
        urtw_write16_m(sc, URTW_RF_PINS_ENABLE, d82 | 0x0007);
        urtw_write16_m(sc, URTW_RF_PINS_SELECT, d84 | 0x0007);
        DELAY(10);

        urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, d80 | URTW_BB_HOST_BANG_EN);
        DELAY(2);
        urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, d80);
        DELAY(10);

        error = urtw_8225_write_s16(sc, addr, 0x8225, data);
        if (error != 0)
                goto fail;

        urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, d80 | URTW_BB_HOST_BANG_EN);
        DELAY(10);
        urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, d80 | URTW_BB_HOST_BANG_EN);
        urtw_write16_m(sc, URTW_RF_PINS_SELECT, d84);
        usbd_delay_ms(sc->sc_udev, 2);
fail:
        return error;
}

static usbd_status
urtw_8225_isv2(struct urtw_softc *sc, int *ret)
{
        uint32_t data;
        usbd_status error;

        *ret = 1;

        urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, 0x0080);
        urtw_write16_m(sc, URTW_RF_PINS_SELECT, 0x0080);
        urtw_write16_m(sc, URTW_RF_PINS_ENABLE, 0x0080);
        usbd_delay_ms(sc->sc_udev, 500);

        urtw_8225_write(sc, 0x0, 0x1b7);

        error = urtw_8225_read(sc, 0x8, &data);
        if (error != 0)
                goto fail;
        if (data != 0x588)
                *ret = 0;
        else {
                error = urtw_8225_read(sc, 0x9, &data);
                if (error != 0)
                        goto fail;
                if (data != 0x700)
                        *ret = 0;
        }

        urtw_8225_write(sc, 0x0, 0xb7);
fail:
        return error;
}

static usbd_status
urtw_get_rfchip(struct urtw_softc *sc)
{
        struct urtw_rf *rf = &sc->sc_rf;
        int ret;
        uint32_t data;
        usbd_status error;

        rf->rf_sc = sc;

        if (sc->sc_hwrev & URTW_HWREV_8187) {
                error = urtw_eprom_read32(sc, URTW_EPROM_RFCHIPID, &data);
                if (error != 0)
                        return error;
                switch (data & 0xff) {
                case URTW_EPROM_RFCHIPID_RTL8225U:
                        error = urtw_8225_isv2(sc, &ret);
                        if (error != 0)
                                goto fail;
                        if (ret == 0) {
                                rf->init = urtw_8225_rf_init;
                                rf->set_chan = urtw_8225_rf_set_chan;
                                rf->set_sens = urtw_8225_rf_set_sens;
                                printf(", RFv1");
                        } else {
                                rf->init = urtw_8225v2_rf_init;
                                rf->set_chan = urtw_8225v2_rf_set_chan;
                                rf->set_sens = NULL;
                                printf(", RFv2");
                        }
                        break;
                default:
                        goto fail;
                }
        } else {
                rf->init = urtw_8225v2_b_rf_init;
                rf->set_chan = urtw_8225v2_b_rf_set_chan;
                rf->set_sens = NULL;
        }

        rf->max_sens = URTW_8225_RF_MAX_SENS;
        rf->sens = URTW_8225_RF_DEF_SENS;

        return 0;

fail:
        aprint_error(": unsupported RF chip %d", data & 0xff);
        return USBD_INVAL;
}

static usbd_status
urtw_get_txpwr(struct urtw_softc *sc)
{
        int i, j;
        uint32_t data;
        usbd_status error;

        error = urtw_eprom_read32(sc, URTW_EPROM_TXPW_BASE, &data);
        if (error != 0)
                goto fail;
        sc->sc_txpwr_cck_base = data & 0xf;
        sc->sc_txpwr_ofdm_base = (data >> 4) & 0xf;

        for (i = 1, j = 0; i < 6; i += 2, j++) {
                error = urtw_eprom_read32(sc, URTW_EPROM_TXPW0 + j, &data);
                if (error != 0)
                        goto fail;
                sc->sc_txpwr_cck[i] = data & 0xf;
                sc->sc_txpwr_cck[i + 1] = (data & 0xf00) >> 8;
                sc->sc_txpwr_ofdm[i] = (data & 0xf0) >> 4;
                sc->sc_txpwr_ofdm[i + 1] = (data & 0xf000) >> 12;
        }
        for (i = 1, j = 0; i < 4; i += 2, j++) {
                error = urtw_eprom_read32(sc, URTW_EPROM_TXPW1 + j, &data);
                if (error != 0)
                        goto fail;
                sc->sc_txpwr_cck[i + 6] = data & 0xf;
                sc->sc_txpwr_cck[i + 6 + 1] = (data & 0xf00) >> 8;
                sc->sc_txpwr_ofdm[i + 6] = (data & 0xf0) >> 4;
                sc->sc_txpwr_ofdm[i + 6 + 1] = (data & 0xf000) >> 12;
        }
        if (sc->sc_hwrev & URTW_HWREV_8187) {
                for (i = 1, j = 0; i < 4; i += 2, j++) {
                        error = urtw_eprom_read32(sc, URTW_EPROM_TXPW2 + j,
                            &data);
                        if (error != 0)
                                goto fail;
                        sc->sc_txpwr_cck[i + 6 + 4] = data & 0xf;
                        sc->sc_txpwr_cck[i + 6 + 4 + 1] = (data & 0xf00) >> 8;
                        sc->sc_txpwr_ofdm[i + 6 + 4] = (data & 0xf0) >> 4;
                        sc->sc_txpwr_ofdm[i + 6 + 4 + 1] =
                            (data & 0xf000) >> 12;
                }
        } else {
                /* Channel 11. */
                error = urtw_eprom_read32(sc, 0x1b, &data);
                if (error != 0)
                        goto fail;
                sc->sc_txpwr_cck[11] = data & 0xf;
                sc->sc_txpwr_ofdm[11] = (data & 0xf0) >> 4;

                /* Channel 12. */
                error = urtw_eprom_read32(sc, 0xa, &data);
                if (error != 0)
                        goto fail;
                sc->sc_txpwr_cck[12] = data & 0xf;
                sc->sc_txpwr_ofdm[12] = (data & 0xf0) >> 4;

                /* Channel 13, 14. */
                error = urtw_eprom_read32(sc, 0x1c, &data);
                if (error != 0)
                        goto fail;
                sc->sc_txpwr_cck[13] = data & 0xf;
                sc->sc_txpwr_ofdm[13] = (data & 0xf0) >> 4;
                sc->sc_txpwr_cck[14] = (data & 0xf00) >> 8;
                sc->sc_txpwr_ofdm[14] = (data & 0xf000) >> 12;
        }
fail:
        return error;
}

static usbd_status
urtw_get_macaddr(struct urtw_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        usbd_status error;
        uint32_t data;

        error = urtw_eprom_read32(sc, URTW_EPROM_MACADDR, &data);
        if (error != 0)
                goto fail;
        ic->ic_myaddr[0] = data & 0xff;
        ic->ic_myaddr[1] = (data & 0xff00) >> 8;
        error = urtw_eprom_read32(sc, URTW_EPROM_MACADDR + 1, &data);
        if (error != 0)
                goto fail;
        ic->ic_myaddr[2] = data & 0xff;
        ic->ic_myaddr[3] = (data & 0xff00) >> 8;
        error = urtw_eprom_read32(sc, URTW_EPROM_MACADDR + 2, &data);
        if (error != 0)
                goto fail;
        ic->ic_myaddr[4] = data & 0xff;
        ic->ic_myaddr[5] = (data & 0xff00) >> 8;
fail:
        return error;
}

static usbd_status
urtw_eprom_read32(struct urtw_softc *sc, uint32_t addr, uint32_t *data)
{
#define URTW_READCMD_LEN                3
        int addrlen, i;
        int16_t addrstr[8], data16, readcmd[] = { 1, 1, 0 };
        usbd_status error;

        /* NB: make sure the buffer is initialized */
        *data = 0;

        /* enable EPROM programming */
        urtw_write8_m(sc, URTW_EPROM_CMD, URTW_EPROM_CMD_PROGRAM_MODE);
        DELAY(URTW_EPROM_DELAY);

        error = urtw_eprom_cs(sc, URTW_EPROM_ENABLE);
        if (error != 0)
                goto fail;
        error = urtw_eprom_ck(sc);
        if (error != 0)
                goto fail;
        error = urtw_eprom_sendbits(sc, readcmd, URTW_READCMD_LEN);
        if (error != 0)
                goto fail;
        if (sc->sc_epromtype == URTW_EEPROM_93C56) {
                addrlen = 8;
                addrstr[0] = addr & (1 << 7);
                addrstr[1] = addr & (1 << 6);
                addrstr[2] = addr & (1 << 5);
                addrstr[3] = addr & (1 << 4);
                addrstr[4] = addr & (1 << 3);
                addrstr[5] = addr & (1 << 2);
                addrstr[6] = addr & (1 << 1);
                addrstr[7] = addr & (1 << 0);
        } else {
                addrlen=6;
                addrstr[0] = addr & (1 << 5);
                addrstr[1] = addr & (1 << 4);
                addrstr[2] = addr & (1 << 3);
                addrstr[3] = addr & (1 << 2);
                addrstr[4] = addr & (1 << 1);
                addrstr[5] = addr & (1 << 0);
        }
        error = urtw_eprom_sendbits(sc, addrstr, addrlen);
        if (error != 0)
                goto fail;

        error = urtw_eprom_writebit(sc, 0);
        if (error != 0)
                goto fail;

        for (i = 0; i < 16; i++) {
                error = urtw_eprom_ck(sc);
                if (error != 0)
                        goto fail;
                error = urtw_eprom_readbit(sc, &data16);
                if (error != 0)
                        goto fail;

                (*data) |= (data16 << (15 - i));
        }

        error = urtw_eprom_cs(sc, URTW_EPROM_DISABLE);
        if (error != 0)
                goto fail;
        error = urtw_eprom_ck(sc);
        if (error != 0)
                goto fail;

        /* now disable EPROM programming */
        urtw_write8_m(sc, URTW_EPROM_CMD, URTW_EPROM_CMD_NORMAL_MODE);
fail:
        return error;
#undef URTW_READCMD_LEN
}

static usbd_status
urtw_eprom_readbit(struct urtw_softc *sc, int16_t *data)
{
        uint8_t data8;
        usbd_status error;

        urtw_read8_m(sc, URTW_EPROM_CMD, &data8);
        *data = (data8 & URTW_EPROM_READBIT) ? 1 : 0;
        DELAY(URTW_EPROM_DELAY);

fail:
        return error;
}

static usbd_status
urtw_eprom_sendbits(struct urtw_softc *sc, int16_t *buf, int buflen)
{
        int i = 0;
        usbd_status error = 0;

        for (i = 0; i < buflen; i++) {
                error = urtw_eprom_writebit(sc, buf[i]);
                if (error != 0)
                        goto fail;
                error = urtw_eprom_ck(sc);
                if (error != 0)
                        goto fail;
        }
fail:
        return error;
}

static usbd_status
urtw_eprom_writebit(struct urtw_softc *sc, int16_t bit)
{
        uint8_t data;
        usbd_status error;

        urtw_read8_m(sc, URTW_EPROM_CMD, &data);
        if (bit != 0)
                urtw_write8_m(sc, URTW_EPROM_CMD, data | URTW_EPROM_WRITEBIT);
        else
                urtw_write8_m(sc, URTW_EPROM_CMD, data & ~URTW_EPROM_WRITEBIT);
        DELAY(URTW_EPROM_DELAY);
fail:
        return error;
}

static usbd_status
urtw_eprom_ck(struct urtw_softc *sc)
{
        uint8_t data;
        usbd_status error;

        /* masking */
        urtw_read8_m(sc, URTW_EPROM_CMD, &data);
        urtw_write8_m(sc, URTW_EPROM_CMD, data | URTW_EPROM_CK);
        DELAY(URTW_EPROM_DELAY);
        /* unmasking */
        urtw_read8_m(sc, URTW_EPROM_CMD, &data);
        urtw_write8_m(sc, URTW_EPROM_CMD, data & ~URTW_EPROM_CK);
        DELAY(URTW_EPROM_DELAY);
fail:
        return error;
}

static usbd_status
urtw_eprom_cs(struct urtw_softc *sc, int able)
{
        uint8_t data;
        usbd_status error;

        urtw_read8_m(sc, URTW_EPROM_CMD, &data);
        if (able == URTW_EPROM_ENABLE)
                urtw_write8_m(sc, URTW_EPROM_CMD, data | URTW_EPROM_CS);
        else
                urtw_write8_m(sc, URTW_EPROM_CMD, data & ~URTW_EPROM_CS);
        DELAY(URTW_EPROM_DELAY);
fail:
        return error;
}

static usbd_status
urtw_read8_c(struct urtw_softc *sc, int val, uint8_t *data, uint8_t idx)
{
        usb_device_request_t req;
        usbd_status error;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = URTW_8187_GETREGS_REQ;
        USETW(req.wValue, val | 0xff00);
        USETW(req.wIndex, idx & 0x03);
        USETW(req.wLength, sizeof(uint8_t));

        error = usbd_do_request(sc->sc_udev, &req, data);
        return error;
}

static usbd_status
urtw_read8e(struct urtw_softc *sc, int val, uint8_t *data)
{
        usb_device_request_t req;
        usbd_status error;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = URTW_8187_GETREGS_REQ;
        USETW(req.wValue, val | 0xfe00);
        USETW(req.wIndex, 0);
        USETW(req.wLength, sizeof(uint8_t));

        error = usbd_do_request(sc->sc_udev, &req, data);
        return error;
}

static usbd_status
urtw_read16_c(struct urtw_softc *sc, int val, uint16_t *data, uint8_t idx)
{
        usb_device_request_t req;
        usbd_status error;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = URTW_8187_GETREGS_REQ;
        USETW(req.wValue, val | 0xff00);
        USETW(req.wIndex, idx & 0x03);
        USETW(req.wLength, sizeof(uint16_t));

        error = usbd_do_request(sc->sc_udev, &req, data);
        return error;
}

static usbd_status
urtw_read32_c(struct urtw_softc *sc, int val, uint32_t *data, uint8_t idx)
{
        usb_device_request_t req;
        usbd_status error;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = URTW_8187_GETREGS_REQ;
        USETW(req.wValue, val | 0xff00);
        USETW(req.wIndex, idx & 0x03);
        USETW(req.wLength, sizeof(uint32_t));

        error = usbd_do_request(sc->sc_udev, &req, data);
        return error;
}

static usbd_status
urtw_write8_c(struct urtw_softc *sc, int val, uint8_t data, uint8_t idx)
{
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = URTW_8187_SETREGS_REQ;
        USETW(req.wValue, val | 0xff00);
        USETW(req.wIndex, idx & 0x03);
        USETW(req.wLength, sizeof(uint8_t));

        return usbd_do_request(sc->sc_udev, &req, &data);
}

static usbd_status
urtw_write8e(struct urtw_softc *sc, int val, uint8_t data)
{
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = URTW_8187_SETREGS_REQ;
        USETW(req.wValue, val | 0xfe00);
        USETW(req.wIndex, 0);
        USETW(req.wLength, sizeof(uint8_t));

        return usbd_do_request(sc->sc_udev, &req, &data);
}

static usbd_status
urtw_write16_c(struct urtw_softc *sc, int val, uint16_t data, uint8_t idx)
{
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = URTW_8187_SETREGS_REQ;
        USETW(req.wValue, val | 0xff00);
        USETW(req.wIndex, idx & 0x03);
        USETW(req.wLength, sizeof(uint16_t));

        return usbd_do_request(sc->sc_udev, &req, &data);
}

static usbd_status
urtw_write32_c(struct urtw_softc *sc, int val, uint32_t data, uint8_t idx)
{
        usb_device_request_t req;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = URTW_8187_SETREGS_REQ;
        USETW(req.wValue, val | 0xff00);
        USETW(req.wIndex, idx & 0x03);
        USETW(req.wLength, sizeof(uint32_t));

        return usbd_do_request(sc->sc_udev, &req, &data);
}

static usbd_status
urtw_set_mode(struct urtw_softc *sc, uint32_t mode)
{
        uint8_t data;
        usbd_status error;

        urtw_read8_m(sc, URTW_EPROM_CMD, &data);
        data = (data & ~URTW_EPROM_CMD_MASK) | (mode << URTW_EPROM_CMD_SHIFT);
        data = data & ~(URTW_EPROM_CS | URTW_EPROM_CK);
        urtw_write8_m(sc, URTW_EPROM_CMD, data);
fail:
        return error;
}

static usbd_status
urtw_8180_set_anaparam(struct urtw_softc *sc, uint32_t val)
{
        uint8_t data;
        usbd_status error;

        error = urtw_set_mode(sc, URTW_EPROM_CMD_CONFIG);
        if (error)
                goto fail;

        urtw_read8_m(sc, URTW_CONFIG3, &data);
        urtw_write8_m(sc, URTW_CONFIG3, data | URTW_CONFIG3_ANAPARAM_WRITE);
        urtw_write32_m(sc, URTW_ANAPARAM, val);
        urtw_read8_m(sc, URTW_CONFIG3, &data);
        urtw_write8_m(sc, URTW_CONFIG3, data & ~URTW_CONFIG3_ANAPARAM_WRITE);

        error = urtw_set_mode(sc, URTW_EPROM_CMD_NORMAL);
        if (error)
                goto fail;
fail:
        return error;
}

static usbd_status
urtw_8185_set_anaparam2(struct urtw_softc *sc, uint32_t val)
{
        uint8_t data;
        usbd_status error;

        error = urtw_set_mode(sc, URTW_EPROM_CMD_CONFIG);
        if (error)
                goto fail;

        urtw_read8_m(sc, URTW_CONFIG3, &data);
        urtw_write8_m(sc, URTW_CONFIG3, data | URTW_CONFIG3_ANAPARAM_WRITE);
        urtw_write32_m(sc, URTW_ANAPARAM2, val);
        urtw_read8_m(sc, URTW_CONFIG3, &data);
        urtw_write8_m(sc, URTW_CONFIG3, data & ~URTW_CONFIG3_ANAPARAM_WRITE);

        error = urtw_set_mode(sc, URTW_EPROM_CMD_NORMAL);
        if (error)
                goto fail;
fail:
        return error;
}

static usbd_status
urtw_intr_disable(struct urtw_softc *sc)
{
        usbd_status error;

        urtw_write16_m(sc, URTW_INTR_MASK, 0);

fail:
        return error;
}

static usbd_status
urtw_reset(struct urtw_softc *sc)
{
        uint8_t data;
        usbd_status error;

        error = urtw_8180_set_anaparam(sc, URTW_8187_8225_ANAPARAM_ON);
        if (error)
                goto fail;
        error = urtw_8185_set_anaparam2(sc, URTW_8187_8225_ANAPARAM2_ON);
        if (error)
                goto fail;

        error = urtw_intr_disable(sc);
        if (error)
                goto fail;
        usbd_delay_ms(sc->sc_udev, 100);

        error = urtw_write8e(sc, 0x18, 0x10);
        if (error != 0)
                goto fail;
        error = urtw_write8e(sc, 0x18, 0x11);
        if (error != 0)
                goto fail;
        error = urtw_write8e(sc, 0x18, 0x00);
        if (error != 0)
                goto fail;
        usbd_delay_ms(sc->sc_udev, 100);

        urtw_read8_m(sc, URTW_CMD, &data);
        data = (data & 2) | URTW_CMD_RST;
        urtw_write8_m(sc, URTW_CMD, data);
        usbd_delay_ms(sc->sc_udev, 100);

        urtw_read8_m(sc, URTW_CMD, &data);
        if (data & URTW_CMD_RST) {
                printf("%s: reset timeout\n", device_xname(sc->sc_dev));
                goto fail;
        }

        error = urtw_set_mode(sc, URTW_EPROM_CMD_LOAD);
        if (error)
                goto fail;
        usbd_delay_ms(sc->sc_udev, 100);

        error = urtw_8180_set_anaparam(sc, URTW_8187_8225_ANAPARAM_ON);
        if (error)
                goto fail;
        error = urtw_8185_set_anaparam2(sc, URTW_8187_8225_ANAPARAM2_ON);
        if (error)
                goto fail;
fail:
        return error;
}

static usbd_status
urtw_led_on(struct urtw_softc *sc, int type)
{
        usbd_status error;

        if (type == URTW_LED_GPIO) {
                switch (sc->sc_gpio_ledpin) {
                case URTW_LED_PIN_GPIO0:
                        urtw_write8_m(sc, URTW_GPIO, 0x01);
                        urtw_write8_m(sc, URTW_GP_ENABLE, 0x00);
                        break;
                default:
                        panic("unsupported LED PIN type %#x",
                            sc->sc_gpio_ledpin);
                        /* NOTREACHED */
                }
        } else {
                panic("unsupported LED type %#x", type);
                /* NOTREACHED */
        }

        sc->sc_gpio_ledon = 1;
fail:
        return error;
}

static usbd_status
urtw_led_off(struct urtw_softc *sc, int type)
{
        usbd_status error;

        if (type == URTW_LED_GPIO) {
                switch (sc->sc_gpio_ledpin) {
                case URTW_LED_PIN_GPIO0:
                        urtw_write8_m(sc, URTW_GPIO, 0x01);
                        urtw_write8_m(sc, URTW_GP_ENABLE, 0x01);
                        break;
                default:
                        panic("unsupported LED PIN type %#x",
                            sc->sc_gpio_ledpin);
                        /* NOTREACHED */
                }
        } else {
                panic("unsupported LED type %#x", type);
                /* NOTREACHED */
        }

        sc->sc_gpio_ledon = 0;

fail:
        return error;
}

static usbd_status
urtw_led_mode0(struct urtw_softc *sc, int mode)
{
        switch (mode) {
        case URTW_LED_CTL_POWER_ON:
                sc->sc_gpio_ledstate = URTW_LED_POWER_ON_BLINK;
                break;
        case URTW_LED_CTL_TX:
                if (sc->sc_gpio_ledinprogress == 1)
                        return 0;

                sc->sc_gpio_ledstate = URTW_LED_BLINK_NORMAL;
                sc->sc_gpio_blinktime = 2;
                break;
        case URTW_LED_CTL_LINK:
                sc->sc_gpio_ledstate = URTW_LED_ON;
                break;
        default:
                panic("unsupported LED mode %#x", mode);
                /* NOTREACHED */
        }

        switch (sc->sc_gpio_ledstate) {
        case URTW_LED_ON:
                if (sc->sc_gpio_ledinprogress != 0)
                        break;
                urtw_led_on(sc, URTW_LED_GPIO);
                break;
        case URTW_LED_BLINK_NORMAL:
                if (sc->sc_gpio_ledinprogress != 0)
                        break;
                sc->sc_gpio_ledinprogress = 1;
                sc->sc_gpio_blinkstate = (sc->sc_gpio_ledon != 0) ?
                        URTW_LED_OFF : URTW_LED_ON;
                if (!sc->sc_dying)
                        callout_schedule(&sc->sc_led_ch, mstohz(100));
                break;
        case URTW_LED_POWER_ON_BLINK:
                urtw_led_on(sc, URTW_LED_GPIO);
                usbd_delay_ms(sc->sc_udev, 100);
                urtw_led_off(sc, URTW_LED_GPIO);
                break;
        default:
                panic("unknown LED status %#x", sc->sc_gpio_ledstate);
                /* NOTREACHED */
        }
        return 0;
}

static usbd_status
urtw_led_mode1(struct urtw_softc *sc, int mode)
{
        return USBD_INVAL;
}

static usbd_status
urtw_led_mode2(struct urtw_softc *sc, int mode)
{
        return USBD_INVAL;
}

static usbd_status
urtw_led_mode3(struct urtw_softc *sc, int mode)
{
        return USBD_INVAL;
}

static void
urtw_ledusbtask(void *arg)
{
        struct urtw_softc *sc = arg;

        if (sc->sc_strategy != URTW_SW_LED_MODE0)
                panic("could not process a LED strategy %#x", sc->sc_strategy);

        urtw_led_blink(sc);
}

static void
urtw_ledtask(void *arg)
{
        struct urtw_softc *sc = arg;

        /*
         * NB: to change a status of the led we need at least a sleep so we
         * can't do it here
         */
        usb_add_task(sc->sc_udev, &sc->sc_ledtask, USB_TASKQ_DRIVER);
}

static usbd_status
urtw_led_ctl(struct urtw_softc *sc, int mode)
{
        usbd_status error = 0;

        switch (sc->sc_strategy) {
        case URTW_SW_LED_MODE0:
                error = urtw_led_mode0(sc, mode);
                break;
        case URTW_SW_LED_MODE1:
                error = urtw_led_mode1(sc, mode);
                break;
        case URTW_SW_LED_MODE2:
                error = urtw_led_mode2(sc, mode);
                break;
        case URTW_SW_LED_MODE3:
                error = urtw_led_mode3(sc, mode);
                break;
        default:
                panic("unsupported LED mode %d", sc->sc_strategy);
                /* NOTREACHED */
        }

        return error;
}

static usbd_status
urtw_led_blink(struct urtw_softc *sc)
{
        uint8_t ing = 0;

        if (sc->sc_gpio_blinkstate == URTW_LED_ON)
                (void)urtw_led_on(sc, URTW_LED_GPIO);
        else
                (void)urtw_led_off(sc, URTW_LED_GPIO);
        sc->sc_gpio_blinktime--;
        if (sc->sc_gpio_blinktime == 0)
                ing = 1;
        else {
                if (sc->sc_gpio_ledstate != URTW_LED_BLINK_NORMAL &&
                    sc->sc_gpio_ledstate != URTW_LED_BLINK_SLOWLY &&
                    sc->sc_gpio_ledstate != URTW_LED_BLINK_CM3)
                        ing = 1;
        }
        if (ing == 1) {
                if (sc->sc_gpio_ledstate == URTW_LED_ON &&
                    sc->sc_gpio_ledon == 0)
                        (void)urtw_led_on(sc, URTW_LED_GPIO);
                else if (sc->sc_gpio_ledstate == URTW_LED_OFF &&
                    sc->sc_gpio_ledon == 1)
                        (void)urtw_led_off(sc, URTW_LED_GPIO);

                sc->sc_gpio_blinktime = 0;
                sc->sc_gpio_ledinprogress = 0;
                return 0;
        }

        sc->sc_gpio_blinkstate = (sc->sc_gpio_blinkstate != URTW_LED_ON) ?
            URTW_LED_ON : URTW_LED_OFF;

        switch (sc->sc_gpio_ledstate) {
        case URTW_LED_BLINK_NORMAL:
                if (!sc->sc_dying)
                        callout_schedule(&sc->sc_led_ch, mstohz(100));
                break;
        default:
                panic("unknown LED status %#x", sc->sc_gpio_ledstate);
                /* NOTREACHED */
        }
        return 0;
}

static usbd_status
urtw_update_msr(struct urtw_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        uint8_t data;
        usbd_status error;

        urtw_read8_m(sc, URTW_MSR, &data);
        data &= ~URTW_MSR_LINK_MASK;

        /* Should always be set. */
        if (sc->sc_hwrev & URTW_HWREV_8187B)
                data |= URTW_MSR_LINK_ENEDCA;

        if (sc->sc_state == IEEE80211_S_RUN) {
                switch (ic->ic_opmode) {
                case IEEE80211_M_STA:
                case IEEE80211_M_MONITOR:
                        data |= URTW_MSR_LINK_STA;
                        break;
                default:
                        panic("unsupported operation mode %#x",
                            ic->ic_opmode);
                        /* NOTREACHED */
                }
        } else
                data |= URTW_MSR_LINK_NONE;

        urtw_write8_m(sc, URTW_MSR, data);
fail:
        return error;
}

static uint16_t
urtw_rate2rtl(int rate)
{
        unsigned int i;

        for (i = 0; i < __arraycount(urtw_ratetable); i++) {
                if (rate == urtw_ratetable[i].reg)
                        return urtw_ratetable[i].val;
        }

        return 3;
}

static uint16_t
urtw_rtl2rate(int rate)
{
        unsigned int i;

        for (i = 0; i < __arraycount(urtw_ratetable); i++) {
                if (rate == urtw_ratetable[i].val)
                        return urtw_ratetable[i].reg;
        }

        return 0;
}

static usbd_status
urtw_set_rate(struct urtw_softc *sc)
{
        int i, basic_rate, min_rr_rate, max_rr_rate;
        uint16_t data;
        usbd_status error;

        basic_rate = urtw_rate2rtl(48);
        min_rr_rate = urtw_rate2rtl(12);
        max_rr_rate = urtw_rate2rtl(48);

        urtw_write8_m(sc, URTW_RESP_RATE,
            max_rr_rate << URTW_RESP_MAX_RATE_SHIFT |
            min_rr_rate << URTW_RESP_MIN_RATE_SHIFT);

        urtw_read16_m(sc, URTW_8187_BRSR, &data);
        data &= ~URTW_BRSR_MBR_8185;

        for (i = 0; i <= basic_rate; i++)
                data |= (1 << i);

        urtw_write16_m(sc, URTW_8187_BRSR, data);
fail:
        return error;
}

static usbd_status
urtw_intr_enable(struct urtw_softc *sc)
{
        usbd_status error;

        urtw_write16_m(sc, URTW_INTR_MASK, 0xffff);
fail:
        return error;
}

static usbd_status
urtw_rx_setconf(struct urtw_softc *sc)
{
        struct ifnet *ifp = sc->sc_ic.ic_ifp;
        struct ieee80211com *ic = &sc->sc_ic;
        uint32_t data;
        usbd_status error;

        urtw_read32_m(sc, URTW_RX, &data);
        data = data &~ URTW_RX_FILTER_MASK;
#if 0
        data = data | URTW_RX_FILTER_CTL;
#endif
        data = data | URTW_RX_FILTER_MNG | URTW_RX_FILTER_DATA;
        data = data | URTW_RX_FILTER_BCAST | URTW_RX_FILTER_MCAST;

        if (ic->ic_opmode == IEEE80211_M_MONITOR) {
                data = data | URTW_RX_FILTER_ICVERR;
                data = data | URTW_RX_FILTER_PWR;
        }
        if (sc->sc_crcmon == 1 && ic->ic_opmode == IEEE80211_M_MONITOR)
                data = data | URTW_RX_FILTER_CRCERR;

        if (ic->ic_opmode == IEEE80211_M_MONITOR ||
            (ifp->if_flags & (IFF_ALLMULTI | IFF_PROMISC))) {
                data = data | URTW_RX_FILTER_ALLMAC;
        } else {
                data = data | URTW_RX_FILTER_NICMAC;
                data = data | URTW_RX_CHECK_BSSID;
        }

        data = data &~ URTW_RX_FIFO_THRESHOLD_MASK;
        data = data | URTW_RX_FIFO_THRESHOLD_NONE | URTW_RX_AUTORESETPHY;
        data = data &~ URTW_MAX_RX_DMA_MASK;
        data = data | URTW_MAX_RX_DMA_2048 | URTW_RCR_ONLYERLPKT;

        urtw_write32_m(sc, URTW_RX, data);
fail:
        return error;
}

static usbd_status
urtw_rx_enable(struct urtw_softc *sc)
{
        int i;
        struct urtw_rx_data *rx_data;
        uint8_t data;
        usbd_status error;

        /*
         * Start up the receive pipe.
         */
        for (i = 0; i < URTW_RX_DATA_LIST_COUNT; i++) {
                rx_data = &sc->sc_rx_data[i];

                usbd_setup_xfer(rx_data->xfer, rx_data, rx_data->buf, MCLBYTES,
                    USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, urtw_rxeof);
                error = usbd_transfer(rx_data->xfer);
                if (error != USBD_IN_PROGRESS && error != 0) {
                        printf("%s: could not queue Rx transfer\n",
                            device_xname(sc->sc_dev));
                        goto fail;
                }
        }

        error = urtw_rx_setconf(sc);
        if (error != 0)
                goto fail;

        urtw_read8_m(sc, URTW_CMD, &data);
        urtw_write8_m(sc, URTW_CMD, data | URTW_CMD_RX_ENABLE);
fail:
        return error;
}

static usbd_status
urtw_tx_enable(struct urtw_softc *sc)
{
        uint8_t data8;
        uint32_t data;
        usbd_status error;

        if (sc->sc_hwrev & URTW_HWREV_8187) {
                urtw_read8_m(sc, URTW_CW_CONF, &data8);
                data8 &= ~(URTW_CW_CONF_PERPACKET_CW |
                    URTW_CW_CONF_PERPACKET_RETRY);
                urtw_write8_m(sc, URTW_CW_CONF, data8);

                urtw_read8_m(sc, URTW_TX_AGC_CTL, &data8);
                data8 &= ~URTW_TX_AGC_CTL_PERPACKET_GAIN;
                data8 &= ~URTW_TX_AGC_CTL_PERPACKET_ANTSEL;
                data8 &= ~URTW_TX_AGC_CTL_FEEDBACK_ANT;
                urtw_write8_m(sc, URTW_TX_AGC_CTL, data8);

                urtw_read32_m(sc, URTW_TX_CONF, &data);
                data &= ~URTW_TX_LOOPBACK_MASK;
                data |= URTW_TX_LOOPBACK_NONE;
                data &= ~(URTW_TX_DPRETRY_MASK | URTW_TX_RTSRETRY_MASK);
                data |= sc->sc_tx_retry << URTW_TX_DPRETRY_SHIFT;
                data |= sc->sc_rts_retry << URTW_TX_RTSRETRY_SHIFT;
                data &= ~(URTW_TX_NOCRC | URTW_TX_MXDMA_MASK);
                data |= URTW_TX_MXDMA_2048 | URTW_TX_CWMIN | URTW_TX_DISCW;
                data &= ~URTW_TX_SWPLCPLEN;
                data |= URTW_TX_NOICV;
                urtw_write32_m(sc, URTW_TX_CONF, data);
        } else {
                data = URTW_TX_DURPROCMODE | URTW_TX_DISREQQSIZE |
                    URTW_TX_MXDMA_2048 | URTW_TX_SHORTRETRY |
                    URTW_TX_LONGRETRY;
                urtw_write32_m(sc, URTW_TX_CONF, data);
        }

        urtw_read8_m(sc, URTW_CMD, &data8);
        urtw_write8_m(sc, URTW_CMD, data8 | URTW_CMD_TX_ENABLE);
fail:
        return error;
}

static int
urtw_init(struct ifnet *ifp)
{
        struct urtw_softc *sc = ifp->if_softc;
        struct urtw_rf *rf = &sc->sc_rf;
        struct ieee80211com *ic = &sc->sc_ic;
        usbd_status error;

        urtw_stop(ifp, 0);

        error = urtw_reset(sc);
        if (error)
                goto fail;

        urtw_write8_m(sc, 0x85, 0);
        urtw_write8_m(sc, URTW_GPIO, 0);

        /* for led */
        urtw_write8_m(sc, 0x85, 4);
        error = urtw_led_ctl(sc, URTW_LED_CTL_POWER_ON);
        if (error != 0)
                goto fail;

        error = urtw_set_mode(sc, URTW_EPROM_CMD_CONFIG);
        if (error)
                goto fail;

        /* applying MAC address again. */
        IEEE80211_ADDR_COPY(ic->ic_myaddr, CLLADDR(ifp->if_sadl));
        error = urtw_set_macaddr(sc, ic->ic_myaddr);
        if (error)
                goto fail;
        error = urtw_set_mode(sc, URTW_EPROM_CMD_NORMAL);
        if (error)
                goto fail;

        error = urtw_update_msr(sc);
        if (error)
                goto fail;

        urtw_write32_m(sc, URTW_INT_TIMEOUT, 0);
        urtw_write8_m(sc, URTW_WPA_CONFIG, 0);
        urtw_write8_m(sc, URTW_RATE_FALLBACK, 0x81);
        error = urtw_set_rate(sc);
        if (error != 0)
                goto fail;

        error = rf->init(rf);
        if (error != 0)
                goto fail;
        if (rf->set_sens != NULL)
                rf->set_sens(rf);

        urtw_write16_m(sc, 0x5e, 1);
        urtw_write16_m(sc, 0xfe, 0x10);
        urtw_write8_m(sc, URTW_TALLY_SEL, 0x80);
        urtw_write8_m(sc, 0xff, 0x60);
        urtw_write16_m(sc, 0x5e, 0);
        urtw_write8_m(sc, 0x85, 4);

        error = urtw_intr_enable(sc);
        if (error != 0)
                goto fail;

        /* reset softc variables */
        for (size_t j = 0; j < URTW_PRIORITY_MAX; j++) {
                sc->sc_txidx[j] = sc->sc_tx_queued[j] = 0;
        }
        sc->sc_txtimer = 0;

        if (!(sc->sc_flags & URTW_INIT_ONCE)) {
                error = usbd_set_config_no(sc->sc_udev, URTW_CONFIG_NO, 0);
                if (error != 0) {
                        aprint_error_dev(sc->sc_dev, "failed to set configuration"
                            ", err=%s\n", usbd_errstr(error));
                        goto fail;
                }
                /* get the first interface handle */
                error = usbd_device2interface_handle(sc->sc_udev,
                    URTW_IFACE_INDEX, &sc->sc_iface);
                if (error != 0) {
                        printf("%s: could not get interface handle\n",
                            device_xname(sc->sc_dev));
                        goto fail;
                }
                error = urtw_open_pipes(sc);
                if (error != 0)
                        goto fail;
                error = urtw_alloc_rx_data_list(sc);
                if (error != 0)
                        goto fail;
                error = urtw_alloc_tx_data_list(sc);
                if (error != 0)
                        goto fail;
                sc->sc_flags |= URTW_INIT_ONCE;
        }

        error = urtw_rx_enable(sc);
        if (error != 0)
                goto fail;
        error = urtw_tx_enable(sc);
        if (error != 0)
                goto fail;

        ifp->if_flags &= ~IFF_OACTIVE;
        ifp->if_flags |= IFF_RUNNING;

        if (ic->ic_opmode == IEEE80211_M_MONITOR)
                ieee80211_new_state(ic, IEEE80211_S_RUN, -1);
        else
                ieee80211_new_state(ic, IEEE80211_S_SCAN, -1);

        return 0;
fail:
        return error;
}

static int
urtw_ioctl(struct ifnet *ifp, u_long cmd, void *data)
{
#define IS_RUNNING(ifp) \
        (((ifp)->if_flags & IFF_UP) && ((ifp)->if_flags & IFF_RUNNING))

        struct urtw_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        int s, error = 0;

        if (sc->sc_dying)
                return ENXIO;

        s = splnet();

        switch (cmd) {
        case SIOCSIFFLAGS:
                if ((error = ifioctl_common(ifp, cmd, data)) != 0)
                        break;
                switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) {
                case IFF_UP|IFF_RUNNING:
                        break;
                case IFF_UP:
                        if_init(ifp);
                        break;
                case IFF_RUNNING:
                        urtw_stop(ifp, 1);
                        break;
                case 0:
                        break;
                }
                break;

        case SIOCADDMULTI:
        case SIOCDELMULTI:
                if ((error = ether_ioctl(ifp, cmd, data)) == ENETRESET)
                        error = 0;
                break;

        default:
                error = ieee80211_ioctl(ic, cmd, data);
                break;
        }

        if (error == ENETRESET) {
                if (IS_RUNNING(ifp) &&
                    (ic->ic_roaming != IEEE80211_ROAMING_MANUAL))
                        if_init(ifp);
                error = 0;
        }

        splx(s);

        return error;
#undef IS_RUNNING
}

static void
urtw_start(struct ifnet *ifp)
{
        struct urtw_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_node *ni;
        struct ether_header *eh;
        struct mbuf *m0;

        /*
         * net80211 may still try to send management frames even if the
         * IFF_RUNNING flag is not set...
         */
        if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
                return;

        for (;;) {
                IF_POLL(&ic->ic_mgtq, m0);
                if (m0 != NULL) {

                        if (sc->sc_tx_queued[URTW_PRIORITY_NORMAL] >=
                            URTW_TX_DATA_LIST_COUNT) {
                                ifp->if_flags |= IFF_OACTIVE;
                                break;
                        }
                        IF_DEQUEUE(&ic->ic_mgtq, m0);
                        ni = M_GETCTX(m0, struct ieee80211_node *);
                        M_CLEARCTX(m0);
                        bpf_mtap3(ic->ic_rawbpf, m0, BPF_D_OUT);
                        if (urtw_tx_start(sc, ni, m0, URTW_PRIORITY_NORMAL)
                            != 0)
                                break;
                } else {
                        if (ic->ic_state != IEEE80211_S_RUN)
                                break;
                        IFQ_POLL(&ifp->if_snd, m0);
                        if (m0 == NULL)
                                break;
                        if (sc->sc_tx_queued[URTW_PRIORITY_NORMAL] >=
                            URTW_TX_DATA_LIST_COUNT) {
                                ifp->if_flags |= IFF_OACTIVE;
                                break;
                        }
                        IFQ_DEQUEUE(&ifp->if_snd, m0);
                        if (m0->m_len < sizeof(struct ether_header) &&
                            !(m0 = m_pullup(m0, sizeof(struct ether_header))))
                                continue;

                        eh = mtod(m0, struct ether_header *);
                        ni = ieee80211_find_txnode(ic, eh->ether_dhost);
                        if (ni == NULL) {
                                m_freem(m0);
                                continue;
                        }
                        bpf_mtap(ifp, m0, BPF_D_OUT);
                        m0 = ieee80211_encap(ic, m0, ni);
                        if (m0 == NULL) {
                                ieee80211_free_node(ni);
                                continue;
                        }
                        bpf_mtap3(ic->ic_rawbpf, m0, BPF_D_OUT);
                        if (urtw_tx_start(sc, ni, m0, URTW_PRIORITY_NORMAL)
                            != 0) {
                                ieee80211_free_node(ni);
                                if_statinc(ifp, if_oerrors);
                                break;
                        }
                }
                sc->sc_txtimer = 5;
                ifp->if_timer = 1;
        }
}

static void
urtw_watchdog(struct ifnet *ifp)
{
        struct urtw_softc *sc = ifp->if_softc;

        ifp->if_timer = 0;

        if (sc->sc_txtimer > 0) {
                if (--sc->sc_txtimer == 0) {
                        printf("%s: device timeout\n", device_xname(sc->sc_dev));
                        if_statinc(ifp, if_oerrors);
                        return;
                }
                ifp->if_timer = 1;
        }

        ieee80211_watchdog(&sc->sc_ic);
}

static void
urtw_txeof_low(struct usbd_xfer *xfer, void *priv,
    usbd_status status)
{
        struct urtw_tx_data *data = priv;
        struct urtw_softc *sc = data->sc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = ic->ic_ifp;
        int s;

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;

                printf("%s: could not transmit buffer: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(status));

                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->sc_txpipe_low);

                if_statinc(ifp, if_oerrors);
                return;
        }

        s = splnet();

        ieee80211_free_node(data->ni);
        data->ni = NULL;

        sc->sc_txtimer = 0;
        if_statinc(ifp, if_opackets);

        sc->sc_tx_queued[URTW_PRIORITY_LOW]--;
        ifp->if_flags &= ~IFF_OACTIVE;
        urtw_start(ifp);

        splx(s);
}

static void
urtw_txeof_normal(struct usbd_xfer *xfer, void *priv,
    usbd_status status)
{
        struct urtw_tx_data *data = priv;
        struct urtw_softc *sc = data->sc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = ic->ic_ifp;
        int s;

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;

                printf("%s: could not transmit buffer: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(status));

                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->sc_txpipe_normal);

                if_statinc(ifp, if_oerrors);
                return;
        }

        s = splnet();

        ieee80211_free_node(data->ni);
        data->ni = NULL;

        sc->sc_txtimer = 0;
        if_statinc(ifp, if_opackets);

        sc->sc_tx_queued[URTW_PRIORITY_NORMAL]--;
        ifp->if_flags &= ~IFF_OACTIVE;
        urtw_start(ifp);

        splx(s);
}

static int
urtw_tx_start(struct urtw_softc *sc, struct ieee80211_node *ni, struct mbuf *m0,
    int prior)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct urtw_tx_data *data;
        struct ieee80211_frame *wh;
        struct ieee80211_key *k;
        usbd_status error;
        int xferlen;

        wh = mtod(m0, struct ieee80211_frame *);

        if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) {
                k = ieee80211_crypto_encap(ic, ni, m0);
                if (k == NULL) {
                        m_freem(m0);
                        return ENOBUFS;
                }
                /* packet header may have moved, reset our local pointer */
                wh = mtod(m0, struct ieee80211_frame *);
        }

        if (sc->sc_drvbpf != NULL) {
                struct urtw_tx_radiotap_header *tap = &sc->sc_txtap;

                tap->wt_flags = 0;
                tap->wt_rate = 0;
                tap->wt_chan_freq = htole16(ic->ic_bss->ni_chan->ic_freq);
                tap->wt_chan_flags = htole16(ic->ic_bss->ni_chan->ic_flags);

                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_txtap_len, m0, BPF_D_OUT);
        }

        if (sc->sc_hwrev & URTW_HWREV_8187)
                xferlen = m0->m_pkthdr.len + 4 * 3;
        else
                xferlen = m0->m_pkthdr.len + 4 * 8;

        if ((0 == xferlen % 64) || (0 == xferlen % 512))
                xferlen += 1;

        data = &sc->sc_tx_data[prior][sc->sc_txidx[prior]];
        sc->sc_txidx[prior] =
            (sc->sc_txidx[prior] + 1) % URTW_TX_DATA_LIST_COUNT;

        memset(data->buf, 0, URTW_TX_MAXSIZE);
        data->buf[0] = m0->m_pkthdr.len & 0xff;
        data->buf[1] = (m0->m_pkthdr.len & 0x0f00) >> 8;
        data->buf[1] |= (1 << 7);

        /* XXX sc_preamble_mode is always 2. */
        if ((ic->ic_flags & IEEE80211_F_SHPREAMBLE) &&
            (ni->ni_capinfo & IEEE80211_CAPINFO_SHORT_PREAMBLE) &&
            (sc->sc_preamble_mode == 1) && (sc->sc_currate != 0))
                data->buf[2] |= 1;
        if ((m0->m_pkthdr.len > ic->ic_rtsthreshold) &&
            prior == URTW_PRIORITY_LOW)
                panic("TODO tx.");
        if (wh->i_fc[1] & IEEE80211_FC1_MORE_FRAG)
                data->buf[2] |= (1 << 1);
        /* RTS rate - 10 means we use a basic rate. */
        data->buf[2] |= (urtw_rate2rtl(2) << 3);
        /*
         * XXX currently TX rate control depends on the rate value of
         * RX descriptor because I don't know how to we can control TX rate
         * in more smart way.  Please fix me you find a thing.
         */
        data->buf[3] = sc->sc_currate;
        if (prior == URTW_PRIORITY_NORMAL) {
                if (IEEE80211_IS_MULTICAST(wh->i_addr1))
                        data->buf[3] = urtw_rate2rtl(ni->ni_rates.rs_rates[0]);
                else if (ic->ic_fixed_rate != -1)
                        data->buf[3] = urtw_rate2rtl(ic->ic_fixed_rate);
        }

        if (sc->sc_hwrev & URTW_HWREV_8187) {
                data->buf[8] = 3;                /* CW minimum */
                data->buf[8] |= (7 << 4);        /* CW maximum */
                data->buf[9] |= 11;                /* retry limitation */
                m_copydata(m0, 0, m0->m_pkthdr.len, (uint8_t *)&data->buf[12]);
        } else {
                data->buf[21] |= 11;                /* retry limitation */
                m_copydata(m0, 0, m0->m_pkthdr.len, (uint8_t *)&data->buf[32]);
        }

        data->ni = ni;

        /* mbuf is no longer needed. */
        m_freem(m0);

        usbd_setup_xfer(data->xfer, data, data->buf, xferlen,
            USBD_FORCE_SHORT_XFER, URTW_DATA_TIMEOUT,
            (prior == URTW_PRIORITY_LOW) ? urtw_txeof_low : urtw_txeof_normal);
        error = usbd_transfer(data->xfer);
        if (error != USBD_IN_PROGRESS && error != USBD_NORMAL_COMPLETION) {
                printf("%s: could not send frame: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(error));
                return EIO;
        }

        error = urtw_led_ctl(sc, URTW_LED_CTL_TX);
        if (error != 0)
                printf("%s: could not control LED (%d)\n",
                    device_xname(sc->sc_dev), error);

        sc->sc_tx_queued[prior]++;

        return 0;
}

static usbd_status
urtw_8225_usb_init(struct urtw_softc *sc)
{
        uint8_t data;
        usbd_status error;

        urtw_write8_m(sc, URTW_RF_PINS_SELECT + 1, 0);
        urtw_write8_m(sc, URTW_GPIO, 0);
        error = urtw_read8e(sc, 0x53, &data);
        if (error)
                goto fail;
        error = urtw_write8e(sc, 0x53, data | (1 << 7));
        if (error)
                goto fail;
        urtw_write8_m(sc, URTW_RF_PINS_SELECT + 1, 4);
        urtw_write8_m(sc, URTW_GPIO, 0x20);
        urtw_write8_m(sc, URTW_GP_ENABLE, 0);

        urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, 0x80);
        urtw_write16_m(sc, URTW_RF_PINS_SELECT, 0x80);
        urtw_write16_m(sc, URTW_RF_PINS_ENABLE, 0x80);

        usbd_delay_ms(sc->sc_udev, 500);
fail:
        return error;
}

static usbd_status
urtw_8185_rf_pins_enable(struct urtw_softc *sc)
{
        usbd_status error = 0;

        urtw_write16_m(sc, URTW_RF_PINS_ENABLE, 0x1ff7);
fail:
        return error;
}

static usbd_status
urtw_8187_write_phy(struct urtw_softc *sc, uint8_t addr, uint32_t data)
{
        uint32_t phyw;
        usbd_status error;

        phyw = ((data << 8) | (addr | 0x80));
        urtw_write8_m(sc, 0x7f, ((phyw & 0xff000000) >> 24));
        urtw_write8_m(sc, 0x7e, ((phyw & 0x00ff0000) >> 16));
        urtw_write8_m(sc, 0x7d, ((phyw & 0x0000ff00) >> 8));
        urtw_write8_m(sc, 0x7c, ((phyw & 0x000000ff)));
        /*
         * Delay removed from 8185 to 8187.
         * usbd_delay_ms(sc->sc_udev, 1);
         */
fail:
        return error;
}

static usbd_status
urtw_8187_write_phy_ofdm_c(struct urtw_softc *sc, uint8_t addr, uint32_t data)
{
        data = data & 0xff;
        return urtw_8187_write_phy(sc, addr, data);
}

static usbd_status
urtw_8187_write_phy_cck_c(struct urtw_softc *sc, uint8_t addr, uint32_t data)
{
        data = data & 0xff;
        return urtw_8187_write_phy(sc, addr, data | 0x10000);
}

static usbd_status
urtw_8225_setgain(struct urtw_softc *sc, int16_t gain)
{
        usbd_status error;

        urtw_8187_write_phy_ofdm(sc, 0x0d, urtw_8225_gain[gain * 4]);
        urtw_8187_write_phy_ofdm(sc, 0x1b, urtw_8225_gain[gain * 4 + 2]);
        urtw_8187_write_phy_ofdm(sc, 0x1d, urtw_8225_gain[gain * 4 + 3]);
        urtw_8187_write_phy_ofdm(sc, 0x23, urtw_8225_gain[gain * 4 + 1]);
fail:
        return error;
}

static usbd_status
urtw_8225_set_txpwrlvl(struct urtw_softc *sc, int chan)
{
        int i, idx, set;
        uint8_t *cck_pwltable;
        uint8_t cck_pwrlvl_max, ofdm_pwrlvl_min, ofdm_pwrlvl_max;
        uint8_t cck_pwrlvl = sc->sc_txpwr_cck[chan] & 0xff;
        uint8_t ofdm_pwrlvl = sc->sc_txpwr_ofdm[chan] & 0xff;
        usbd_status error;

        cck_pwrlvl_max = 11;
        ofdm_pwrlvl_max = 25;        /* 12 -> 25 */
        ofdm_pwrlvl_min = 10;

        /* CCK power setting */
        cck_pwrlvl = (cck_pwrlvl > cck_pwrlvl_max) ? cck_pwrlvl_max : cck_pwrlvl;
        idx = cck_pwrlvl % 6;
        set = cck_pwrlvl / 6;
        cck_pwltable = (chan == 14) ? urtw_8225_txpwr_cck_ch14 :
            urtw_8225_txpwr_cck;

        urtw_write8_m(sc, URTW_TX_GAIN_CCK,
            urtw_8225_tx_gain_cck_ofdm[set] >> 1);
        for (i = 0; i < 8; i++) {
                urtw_8187_write_phy_cck(sc, 0x44 + i,
                    cck_pwltable[idx * 8 + i]);
        }
        usbd_delay_ms(sc->sc_udev, 1);

        /* OFDM power setting */
        ofdm_pwrlvl = (ofdm_pwrlvl > (ofdm_pwrlvl_max - ofdm_pwrlvl_min)) ?
            ofdm_pwrlvl_max : ofdm_pwrlvl + ofdm_pwrlvl_min;
        ofdm_pwrlvl = (ofdm_pwrlvl > 35) ? 35 : ofdm_pwrlvl;

        idx = ofdm_pwrlvl % 6;
        set = ofdm_pwrlvl / 6;

        error = urtw_8185_set_anaparam2(sc, URTW_8187_8225_ANAPARAM2_ON);
        if (error)
                goto fail;
        urtw_8187_write_phy_ofdm(sc, 2, 0x42);
        urtw_8187_write_phy_ofdm(sc, 6, 0);
        urtw_8187_write_phy_ofdm(sc, 8, 0);

        urtw_write8_m(sc, URTW_TX_GAIN_OFDM,
            urtw_8225_tx_gain_cck_ofdm[set] >> 1);
        urtw_8187_write_phy_ofdm(sc, 0x5, urtw_8225_txpwr_ofdm[idx]);
        urtw_8187_write_phy_ofdm(sc, 0x7, urtw_8225_txpwr_ofdm[idx]);
        usbd_delay_ms(sc->sc_udev, 1);
fail:
        return error;
}

static usbd_status
urtw_8185_tx_antenna(struct urtw_softc *sc, uint8_t ant)
{
        usbd_status error;

        urtw_write8_m(sc, URTW_TX_ANTENNA, ant);
        usbd_delay_ms(sc->sc_udev, 1);
fail:
        return error;
}

static usbd_status
urtw_8225_rf_init(struct urtw_rf *rf)
{
        struct urtw_softc *sc = rf->rf_sc;
        unsigned int i;
        uint16_t data;
        usbd_status error;

        error = urtw_8180_set_anaparam(sc, URTW_8187_8225_ANAPARAM_ON);
        if (error)
                goto fail;

        error = urtw_8225_usb_init(sc);
        if (error)
                goto fail;

        urtw_write32_m(sc, URTW_RF_TIMING, 0x000a8008);
        urtw_read16_m(sc, URTW_8187_BRSR, &data);        /* XXX ??? */
        urtw_write16_m(sc, URTW_8187_BRSR, 0xffff);
        urtw_write32_m(sc, URTW_RF_PARA, 0x100044);

        error = urtw_set_mode(sc, URTW_EPROM_CMD_CONFIG);
        if (error)
                goto fail;
        urtw_write8_m(sc, URTW_CONFIG3, 0x44);
        error = urtw_set_mode(sc, URTW_EPROM_CMD_NORMAL);
        if (error)
                goto fail;

        error = urtw_8185_rf_pins_enable(sc);
        if (error)
                goto fail;

        usbd_delay_ms(sc->sc_udev, 500);

        for (i = 0; i < __arraycount(urtw_8225_rf_part1); i++) {
                urtw_8225_write(sc, urtw_8225_rf_part1[i].reg,
                    urtw_8225_rf_part1[i].val);
        }
        usbd_delay_ms(sc->sc_udev, 50);
        urtw_8225_write(sc, 0x2, 0xc4d);
        usbd_delay_ms(sc->sc_udev, 200);
        urtw_8225_write(sc, 0x2, 0x44d);
        usbd_delay_ms(sc->sc_udev, 200);
        urtw_8225_write(sc, 0x0, 0x127);

        for (i = 0; i < __arraycount(urtw_8225_rxgain); i++) {
                urtw_8225_write(sc, 0x1, (uint8_t)(i + 1));
                urtw_8225_write(sc, 0x2, urtw_8225_rxgain[i]);
        }

        urtw_8225_write(sc, 0x0, 0x27);
        urtw_8225_write(sc, 0x0, 0x22f);

        for (i = 0; i < __arraycount(urtw_8225_agc); i++) {
                urtw_8187_write_phy_ofdm(sc, 0xb, urtw_8225_agc[i]);
                urtw_8187_write_phy_ofdm(sc, 0xa, (uint8_t)i + 0x80);
        }

        for (i = 0; i < __arraycount(urtw_8225_rf_part2); i++) {
                urtw_8187_write_phy_ofdm(sc, urtw_8225_rf_part2[i].reg,
                    urtw_8225_rf_part2[i].val);
                usbd_delay_ms(sc->sc_udev, 1);
        }

        error = urtw_8225_setgain(sc, 4);
        if (error)
                goto fail;

        for (i = 0; i < __arraycount(urtw_8225_rf_part3); i++) {
                urtw_8187_write_phy_cck(sc, urtw_8225_rf_part3[i].reg,
                    urtw_8225_rf_part3[i].val);
                usbd_delay_ms(sc->sc_udev, 1);
        }

        urtw_write8_m(sc, 0x5b, 0x0d);

        error = urtw_8225_set_txpwrlvl(sc, 1);
        if (error)
                goto fail;

        urtw_8187_write_phy_cck(sc, 0x10, 0x9b);
        usbd_delay_ms(sc->sc_udev, 1);
        urtw_8187_write_phy_ofdm(sc, 0x26, 0x90);
        usbd_delay_ms(sc->sc_udev, 1);

        /* TX ant A, 0x0 for B */
        error = urtw_8185_tx_antenna(sc, 0x3);
        if (error)
                goto fail;
        urtw_write32_m(sc, 0x94, 0x3dc00002);

        error = urtw_8225_rf_set_chan(rf, 1);
fail:
        return error;
}

static usbd_status
urtw_8225_rf_set_chan(struct urtw_rf *rf, int chan)
{
        struct urtw_softc *sc = rf->rf_sc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_channel *c = ic->ic_ibss_chan;
        usbd_status error;

        error = urtw_8225_set_txpwrlvl(sc, chan);
        if (error)
                goto fail;
        urtw_8225_write(sc, 0x7, urtw_8225_channel[chan]);
        usbd_delay_ms(sc->sc_udev, 10);

        urtw_write8_m(sc, URTW_SIFS, 0x22);

        if (sc->sc_state == IEEE80211_S_ASSOC &&
            ic->ic_flags & IEEE80211_F_SHSLOT)
                urtw_write8_m(sc, URTW_SLOT, 0x9);
        else
                urtw_write8_m(sc, URTW_SLOT, 0x14);

        if (IEEE80211_IS_CHAN_G(c)) {
                urtw_write8_m(sc, URTW_DIFS, 0x14);
                urtw_write8_m(sc, URTW_8187_EIFS, 0x5b - 0x14);
                urtw_write8_m(sc, URTW_CW_VAL, 0x73);
        } else {
                urtw_write8_m(sc, URTW_DIFS, 0x24);
                urtw_write8_m(sc, URTW_8187_EIFS, 0x5b - 0x24);
                urtw_write8_m(sc, URTW_CW_VAL, 0xa5);
        }

fail:
        return error;
}

static usbd_status
urtw_8225_rf_set_sens(struct urtw_rf *rf)
{
        struct urtw_softc *sc = rf->rf_sc;
        usbd_status error;

        if (rf->sens > 6)
                return -1;

        if (rf->sens > 4)
                urtw_8225_write(sc, 0x0c, 0x850);
        else
                urtw_8225_write(sc, 0x0c, 0x50);

        rf->sens = 6 - rf->sens;
        error = urtw_8225_setgain(sc, rf->sens);
        if (error)
                goto fail;

        urtw_8187_write_phy_cck(sc, 0x41, urtw_8225_threshold[rf->sens]);

fail:
        return error;
}

static void
urtw_stop(struct ifnet *ifp, int disable)
{
        struct urtw_softc *sc = ifp->if_softc;
        struct ieee80211com *ic = &sc->sc_ic;
        uint8_t data;
        usbd_status error;

        ieee80211_new_state(ic, IEEE80211_S_INIT, -1);

        sc->sc_txtimer = 0;
        ifp->if_timer = 0;
        ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);

        callout_stop(&sc->scan_to);
        callout_stop(&sc->sc_led_ch);

        urtw_intr_disable(sc);
        urtw_read8_m(sc, URTW_CMD, &data);
        data &= ~URTW_CMD_TX_ENABLE;
        data &= ~URTW_CMD_RX_ENABLE;
        urtw_write8_m(sc, URTW_CMD, data);

        if (sc->sc_rxpipe != NULL)
                usbd_abort_pipe(sc->sc_rxpipe);
        if (sc->sc_txpipe_low != NULL)
                usbd_abort_pipe(sc->sc_txpipe_low);
        if (sc->sc_txpipe_normal != NULL)
                usbd_abort_pipe(sc->sc_txpipe_normal);

fail:
        return;
}

static int
urtw_isbmode(uint16_t rate)
{
        rate = urtw_rtl2rate(rate);

        return ((rate <= 22 && rate != 12 && rate != 18) ||
            rate == 44) ? 1 : 0;
}

static void
urtw_rxeof(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct urtw_rx_data *data = priv;
        struct urtw_softc *sc = data->sc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ifnet *ifp = ic->ic_ifp;
        struct ieee80211_frame *wh;
        struct ieee80211_node *ni;
        struct mbuf *m, *mnew;
        uint8_t *desc, quality, rate;
        int actlen, flen, len, rssi, s;

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;

                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->sc_rxpipe);
                if_statinc(ifp, if_ierrors);
                goto skip;
        }

        usbd_get_xfer_status(xfer, NULL, NULL, &actlen, NULL);
        if (actlen < URTW_MIN_RXBUFSZ) {
                if_statinc(ifp, if_ierrors);
                goto skip;
        }

        if (sc->sc_hwrev & URTW_HWREV_8187)
                /* 4 dword and 4 byte CRC */
                len = actlen - (4 * 4);
        else
                /* 5 dword and 4 byte CRC */
                len = actlen - (4 * 5);

        desc = data->buf + len;
        flen = ((desc[1] & 0x0f) << 8) + (desc[0] & 0xff);
        if (flen > actlen) {
                if_statinc(ifp, if_ierrors);
                goto skip;
        }

        rate = (desc[2] & 0xf0) >> 4;
        if (sc->sc_hwrev & URTW_HWREV_8187) {
                quality = desc[4] & 0xff;
                rssi = (desc[6] & 0xfe) >> 1;

                /* XXX correct? */
                if (!urtw_isbmode(rate)) {
                        rssi = (rssi > 90) ? 90 : ((rssi < 25) ? 25 : rssi);
                        rssi = ((90 - rssi) * 100) / 65;
                } else {
                        rssi = (rssi > 90) ? 95 : ((rssi < 30) ? 30 : rssi);
                        rssi = ((95 - rssi) * 100) / 65;
                }
        } else {
                quality = desc[12];
                rssi = 14 - desc[14] / 2;
        }

        MGETHDR(mnew, M_DONTWAIT, MT_DATA);
        if (mnew == NULL) {
                printf("%s: could not allocate rx mbuf\n",
                    device_xname(sc->sc_dev));
                if_statinc(ifp, if_ierrors);
                goto skip;
        }
        MCLGET(mnew, M_DONTWAIT);
        if (!(mnew->m_flags & M_EXT)) {
                printf("%s: could not allocate rx mbuf cluster\n",
                    device_xname(sc->sc_dev));
                m_freem(mnew);
                if_statinc(ifp, if_ierrors);
                goto skip;
        }

        m = data->m;
        data->m = mnew;
        data->buf = mtod(mnew, uint8_t *);

        /* finalize mbuf */
        m_set_rcvif(m, ifp);
        m->m_pkthdr.len = m->m_len = flen - 4;

        s = splnet();

        if (sc->sc_drvbpf != NULL) {
                struct urtw_rx_radiotap_header *tap = &sc->sc_rxtap;

                /* XXX Are variables correct? */
                tap->wr_chan_freq = htole16(ic->ic_ibss_chan->ic_freq);
                tap->wr_chan_flags = htole16(ic->ic_ibss_chan->ic_flags);
                tap->wr_dbm_antsignal = (int8_t)rssi;

                bpf_mtap2(sc->sc_drvbpf, tap, sc->sc_rxtap_len, m, BPF_D_IN);
        }
        wh = mtod(m, struct ieee80211_frame *);
        if ((wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) == IEEE80211_FC0_TYPE_DATA)
                sc->sc_currate = (rate > 0) ? rate : sc->sc_currate;
        ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh);

        /* XXX correct? */
        if (!urtw_isbmode(rate)) {
                if (quality > 127)
                        quality = 0;
                else if (quality < 27)
                        quality = 100;
                else
                        quality = 127 - quality;
        } else
                quality = (quality > 64) ? 0 : ((64 - quality) * 100) / 64;

        /* send the frame to the 802.11 layer */
        ieee80211_input(ic, m, ni, rssi, 0);

        /* node is no longer needed */
        ieee80211_free_node(ni);

        splx(s);

skip:        /* setup a new transfer */
        usbd_setup_xfer(xfer, data, data->buf, MCLBYTES,
            USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, urtw_rxeof);
        (void)usbd_transfer(xfer);
}

static usbd_status
urtw_8225v2_setgain(struct urtw_softc *sc, int16_t gain)
{
        uint8_t *gainp;
        usbd_status error;

        /* XXX for A? */
        gainp = urtw_8225v2_gain_bg;
        urtw_8187_write_phy_ofdm(sc, 0x0d, gainp[gain * 3]);
        usbd_delay_ms(sc->sc_udev, 1);
        urtw_8187_write_phy_ofdm(sc, 0x1b, gainp[gain * 3 + 1]);
        usbd_delay_ms(sc->sc_udev, 1);
        urtw_8187_write_phy_ofdm(sc, 0x1d, gainp[gain * 3 + 2]);
        usbd_delay_ms(sc->sc_udev, 1);
        urtw_8187_write_phy_ofdm(sc, 0x21, 0x17);
        usbd_delay_ms(sc->sc_udev, 1);
fail:
        return error;
}

static usbd_status
urtw_8225v2_set_txpwrlvl(struct urtw_softc *sc, int chan)
{
        int i;
        uint8_t *cck_pwrtable;
        uint8_t cck_pwrlvl_max = 15, ofdm_pwrlvl_max = 25, ofdm_pwrlvl_min = 10;
        uint8_t cck_pwrlvl = sc->sc_txpwr_cck[chan] & 0xff;
        uint8_t ofdm_pwrlvl = sc->sc_txpwr_ofdm[chan] & 0xff;
        usbd_status error;

        /* CCK power setting */
        cck_pwrlvl = (cck_pwrlvl > cck_pwrlvl_max) ? cck_pwrlvl_max : cck_pwrlvl;
        cck_pwrlvl += sc->sc_txpwr_cck_base;
        cck_pwrlvl = (cck_pwrlvl > 35) ? 35 : cck_pwrlvl;
        cck_pwrtable = (chan == 14) ? urtw_8225v2_txpwr_cck_ch14 :
            urtw_8225v2_txpwr_cck;

        for (i = 0; i < 8; i++) {
                urtw_8187_write_phy_cck(sc, 0x44 + i, cck_pwrtable[i]);
        }
        urtw_write8_m(sc, URTW_TX_GAIN_CCK,
            urtw_8225v2_tx_gain_cck_ofdm[cck_pwrlvl]);
        usbd_delay_ms(sc->sc_udev, 1);

        /* OFDM power setting */
        ofdm_pwrlvl = (ofdm_pwrlvl > (ofdm_pwrlvl_max - ofdm_pwrlvl_min)) ?
                ofdm_pwrlvl_max : ofdm_pwrlvl + ofdm_pwrlvl_min;
        ofdm_pwrlvl += sc->sc_txpwr_ofdm_base;
        ofdm_pwrlvl = (ofdm_pwrlvl > 35) ? 35 : ofdm_pwrlvl;

        error = urtw_8185_set_anaparam2(sc, URTW_8187_8225_ANAPARAM2_ON);
        if (error)
                goto fail;

        urtw_8187_write_phy_ofdm(sc, 2, 0x42);
        urtw_8187_write_phy_ofdm(sc, 5, 0x0);
        urtw_8187_write_phy_ofdm(sc, 6, 0x40);
        urtw_8187_write_phy_ofdm(sc, 7, 0x0);
        urtw_8187_write_phy_ofdm(sc, 8, 0x40);

        urtw_write8_m(sc, URTW_TX_GAIN_OFDM,
            urtw_8225v2_tx_gain_cck_ofdm[ofdm_pwrlvl]);
        usbd_delay_ms(sc->sc_udev, 1);
fail:
        return error;
}

static usbd_status
urtw_8225v2_rf_init(struct urtw_rf *rf)
{
        struct urtw_softc *sc = rf->rf_sc;
        int i;
        uint16_t data;
        uint32_t data32;
        usbd_status error;

        error = urtw_8180_set_anaparam(sc, URTW_8187_8225_ANAPARAM_ON);
        if (error)
                goto fail;

        error = urtw_8225_usb_init(sc);
        if (error)
                goto fail;

        urtw_write32_m(sc, URTW_RF_TIMING, 0x000a8008);
        urtw_read16_m(sc, URTW_8187_BRSR, &data);        /* XXX ??? */
        urtw_write16_m(sc, URTW_8187_BRSR, 0xffff);
        urtw_write32_m(sc, URTW_RF_PARA, 0x100044);

        error = urtw_set_mode(sc, URTW_EPROM_CMD_CONFIG);
        if (error)
                goto fail;
        urtw_write8_m(sc, URTW_CONFIG3, 0x44);
        error = urtw_set_mode(sc, URTW_EPROM_CMD_NORMAL);
        if (error)
                goto fail;

        error = urtw_8185_rf_pins_enable(sc);
        if (error)
                goto fail;

        usbd_delay_ms(sc->sc_udev, 1000);

        for (i = 0; i < __arraycount(urtw_8225v2_rf_part1); i++) {
                urtw_8225_write(sc, urtw_8225v2_rf_part1[i].reg,
                    urtw_8225v2_rf_part1[i].val);
                usbd_delay_ms(sc->sc_udev, 1);
        }
        usbd_delay_ms(sc->sc_udev, 50);

        urtw_8225_write(sc, 0x0, 0x1b7);

        for (i = 0; i < __arraycount(urtw_8225v2_rxgain); i++) {
                urtw_8225_write(sc, 0x1, (uint8_t)(i + 1));
                urtw_8225_write(sc, 0x2, urtw_8225v2_rxgain[i]);
        }

        urtw_8225_write(sc, 0x3, 0x2);
        urtw_8225_write(sc, 0x5, 0x4);
        urtw_8225_write(sc, 0x0, 0xb7);
        urtw_8225_write(sc, 0x2, 0xc4d);
        usbd_delay_ms(sc->sc_udev, 100);
        urtw_8225_write(sc, 0x2, 0x44d);
        usbd_delay_ms(sc->sc_udev, 100);

        error = urtw_8225_read(sc, 0x6, &data32);
        if (error != 0)
                goto fail;
        if (data32 != 0xe6)
                printf("%s: expect 0xe6!! (%#x)\n", device_xname(sc->sc_dev),
                    data32);
        if (!(data32 & 0x80)) {
                urtw_8225_write(sc, 0x02, 0x0c4d);
                usbd_delay_ms(sc->sc_udev, 200);
                urtw_8225_write(sc, 0x02, 0x044d);
                usbd_delay_ms(sc->sc_udev, 100);
                error = urtw_8225_read(sc, 0x6, &data32);
                if (error != 0)
                        goto fail;
                if (!(data32 & 0x80))
                        printf("%s: RF calibration failed\n",
                            device_xname(sc->sc_dev));
        }
        usbd_delay_ms(sc->sc_udev, 100);

        urtw_8225_write(sc, 0x0, 0x2bf);
        for (i = 0; i < __arraycount(urtw_8225_agc); i++) {
                urtw_8187_write_phy_ofdm(sc, 0xb, urtw_8225_agc[i]);
                urtw_8187_write_phy_ofdm(sc, 0xa, (uint8_t)i + 0x80);
        }

        for (i = 0; i < __arraycount(urtw_8225v2_rf_part2); i++) {
                urtw_8187_write_phy_ofdm(sc, urtw_8225v2_rf_part2[i].reg,
                    urtw_8225v2_rf_part2[i].val);
        }

        error = urtw_8225v2_setgain(sc, 4);
        if (error)
                goto fail;

        for (i = 0; i < __arraycount(urtw_8225v2_rf_part3); i++) {
                urtw_8187_write_phy_cck(sc, urtw_8225v2_rf_part3[i].reg,
                    urtw_8225v2_rf_part3[i].val);
        }

        urtw_write8_m(sc, 0x5b, 0x0d);

        error = urtw_8225v2_set_txpwrlvl(sc, 1);
        if (error)
                goto fail;

        urtw_8187_write_phy_cck(sc, 0x10, 0x9b);
        urtw_8187_write_phy_ofdm(sc, 0x26, 0x90);

        /* TX ant A, 0x0 for B */
        error = urtw_8185_tx_antenna(sc, 0x3);
        if (error)
                goto fail;
        urtw_write32_m(sc, 0x94, 0x3dc00002);

        error = urtw_8225_rf_set_chan(rf, 1);
fail:
        return error;
}

static usbd_status
urtw_8225v2_rf_set_chan(struct urtw_rf *rf, int chan)
{
        struct urtw_softc *sc = rf->rf_sc;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_channel *c = ic->ic_ibss_chan;
        usbd_status error;

        error = urtw_8225v2_set_txpwrlvl(sc, chan);
        if (error)
                goto fail;

        urtw_8225_write(sc, 0x7, urtw_8225_channel[chan]);
        usbd_delay_ms(sc->sc_udev, 10);

        urtw_write8_m(sc, URTW_SIFS, 0x22);

        if(sc->sc_state == IEEE80211_S_ASSOC &&
            ic->ic_flags & IEEE80211_F_SHSLOT)
                urtw_write8_m(sc, URTW_SLOT, 0x9);
        else
                urtw_write8_m(sc, URTW_SLOT, 0x14);

        if (IEEE80211_IS_CHAN_G(c)) {
                urtw_write8_m(sc, URTW_DIFS, 0x14);
                urtw_write8_m(sc, URTW_8187_EIFS, 0x5b - 0x14);
                urtw_write8_m(sc, URTW_CW_VAL, 0x73);
        } else {
                urtw_write8_m(sc, URTW_DIFS, 0x24);
                urtw_write8_m(sc, URTW_8187_EIFS, 0x5b - 0x24);
                urtw_write8_m(sc, URTW_CW_VAL, 0xa5);
        }

fail:
        return error;
}

static void
urtw_set_chan(struct urtw_softc *sc, struct ieee80211_channel *c)
{
        struct urtw_rf *rf = &sc->sc_rf;
        struct ieee80211com *ic = &sc->sc_ic;
        usbd_status error = 0;
        uint32_t data;
        u_int chan;

        chan = ieee80211_chan2ieee(ic, c);
        if (chan == 0 || chan == IEEE80211_CHAN_ANY)
                return;
        /*
         * During changing the channel we need to temporary disable
         * TX.
         */
        urtw_read32_m(sc, URTW_TX_CONF, &data);
        data &= ~URTW_TX_LOOPBACK_MASK;
        urtw_write32_m(sc, URTW_TX_CONF, data | URTW_TX_LOOPBACK_MAC);
        error = rf->set_chan(rf, chan);
        if (error != 0) {
                printf("%s could not change the channel\n",
                    device_xname(sc->sc_dev));
                return;
        }
        usbd_delay_ms(sc->sc_udev, 10);
        urtw_write32_m(sc, URTW_TX_CONF, data | URTW_TX_LOOPBACK_NONE);

fail:        return;

}

static void
urtw_next_scan(void *arg)
{
        struct urtw_softc *sc = arg;
        struct ieee80211com *ic = &sc->sc_ic;
        int s;

        if (sc->sc_dying)
                return;

        s = splnet();
        if (ic->ic_state == IEEE80211_S_SCAN)
                ieee80211_next_scan(ic);
        splx(s);
}

static void
urtw_task(void *arg)
{
        struct urtw_softc *sc = arg;
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_node *ni;
        enum ieee80211_state ostate;
        usbd_status error = 0;

        if (sc->sc_dying)
                return;

        ostate = ic->ic_state;

        switch (sc->sc_state) {
        case IEEE80211_S_INIT:
                if (ostate == IEEE80211_S_RUN) {
                        /* turn link LED off */
                        (void)urtw_led_off(sc, URTW_LED_GPIO);
                }
                break;

        case IEEE80211_S_SCAN:
                urtw_set_chan(sc, ic->ic_curchan);
                if (!sc->sc_dying)
                        callout_schedule(&sc->scan_to, mstohz(200));
                break;

        case IEEE80211_S_AUTH:
        case IEEE80211_S_ASSOC:
                urtw_set_chan(sc, ic->ic_curchan);
                break;

        case IEEE80211_S_RUN:
                ni = ic->ic_bss;

                urtw_set_chan(sc, ic->ic_curchan);

                /* setting bssid. */
                error = urtw_set_bssid(sc, ni->ni_bssid);
                if (error != 0)
                        goto fail;
                urtw_update_msr(sc);
                /* XXX maybe the below would be incorrect. */
                urtw_write16_m(sc, URTW_ATIM_WND, 2);
                urtw_write16_m(sc, URTW_ATIM_TR_ITV, 100);
                urtw_write16_m(sc, URTW_BEACON_INTERVAL, 0x64);
                urtw_write16_m(sc, URTW_BEACON_INTERVAL_TIME, 0x3ff);
                error = urtw_led_ctl(sc, URTW_LED_CTL_LINK);
                if (error != 0)
                        printf("%s: could not control LED (%d)\n",
                            device_xname(sc->sc_dev), error);
                break;
        }

        sc->sc_newstate(ic, sc->sc_state, sc->sc_arg);

fail:
        if (error != 0) {
                DPRINTF(("%s: error duing processing RUN state.",
                    device_xname(sc->sc_dev)));
        }
}

static usbd_status
urtw_8187b_update_wmm(struct urtw_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_channel *c = ic->ic_ibss_chan;
        uint32_t data;
        uint8_t aifs, sifs, slot, ecwmin, ecwmax;
        usbd_status error;

        sifs = 0xa;
        if (IEEE80211_IS_CHAN_G(c))
                slot = 0x9;
        else
                slot = 0x14;

        aifs = (2 * slot) + sifs;
        ecwmin = 3;
        ecwmax = 7;

        data = ((uint32_t)aifs << 0) |                /* AIFS, offset 0 */
            ((uint32_t)ecwmin << 8) |                /* ECW minimum, offset 8 */
            ((uint32_t)ecwmax << 12);                /* ECW maximum, offset 16 */

        urtw_write32_m(sc, URTW_AC_VO, data);
        urtw_write32_m(sc, URTW_AC_VI, data);
        urtw_write32_m(sc, URTW_AC_BE, data);
        urtw_write32_m(sc, URTW_AC_BK, data);

fail:
        return error;
}

static usbd_status
urtw_8187b_reset(struct urtw_softc *sc)
{
        uint8_t data;
        usbd_status error;

        error = urtw_set_mode(sc, URTW_EPROM_CMD_CONFIG);
        if (error)
                goto fail;

        urtw_read8_m(sc, URTW_CONFIG3, &data);
        urtw_write8_m(sc, URTW_CONFIG3, data | URTW_CONFIG3_ANAPARAM_WRITE |
                URTW_CONFIG3_GNT_SELECT);

        urtw_write32_m(sc, URTW_ANAPARAM2, URTW_8187B_8225_ANAPARAM2_ON);
        urtw_write32_m(sc, URTW_ANAPARAM, URTW_8187B_8225_ANAPARAM_ON);
        urtw_write8_m(sc, URTW_ANAPARAM3, URTW_8187B_8225_ANAPARAM3_ON);

        urtw_write8_m(sc, 0x61, 0x10);
        urtw_read8_m(sc, 0x62, &data);
        urtw_write8_m(sc, 0x62, data & ~(1 << 5));
        urtw_write8_m(sc, 0x62, data | (1 << 5));

        urtw_read8_m(sc, URTW_CONFIG3, &data);
        urtw_write8_m(sc, URTW_CONFIG3, data & ~URTW_CONFIG3_ANAPARAM_WRITE);

        error = urtw_set_mode(sc, URTW_EPROM_CMD_NORMAL);
        if (error)
                goto fail;

        urtw_read8_m(sc, URTW_CMD, &data);
        data = (data & 2) | URTW_CMD_RST;
        urtw_write8_m(sc, URTW_CMD, data);
        usbd_delay_ms(sc->sc_udev, 100);

        urtw_read8_m(sc, URTW_CMD, &data);
        if (data & URTW_CMD_RST) {
                printf("%s: reset timeout\n", device_xname(sc->sc_dev));
                goto fail;
        }

fail:
        return error;
}

static int
urtw_8187b_init(struct ifnet *ifp)
{
        struct urtw_softc *sc = ifp->if_softc;
        struct urtw_rf *rf = &sc->sc_rf;
        struct ieee80211com *ic = &sc->sc_ic;
        uint8_t data;
        usbd_status error;

        urtw_stop(ifp, 0);

        error = urtw_8187b_update_wmm(sc);
        if (error != 0)
                goto fail;
        error = urtw_8187b_reset(sc);
        if (error)
                goto fail;

        /* Applying MAC address again. */
        error = urtw_set_mode(sc, URTW_EPROM_CMD_CONFIG);
        if (error)
                goto fail;
        IEEE80211_ADDR_COPY(ic->ic_myaddr, CLLADDR(ifp->if_sadl));
        error = urtw_set_macaddr(sc, ic->ic_myaddr);
        if (error)
                goto fail;
        error = urtw_set_mode(sc, URTW_EPROM_CMD_NORMAL);
        if (error)
                goto fail;

        error = urtw_update_msr(sc);
        if (error)
                goto fail;

        error = rf->init(rf);
        if (error != 0)
                goto fail;

        urtw_write8_m(sc, URTW_CMD, URTW_CMD_TX_ENABLE |
                URTW_CMD_RX_ENABLE);
        error = urtw_intr_enable(sc);
        if (error != 0)
                goto fail;

        error = urtw_write8e(sc, 0x41, 0xf4);
        if (error != 0)
                goto fail;
        error = urtw_write8e(sc, 0x40, 0x00);
        if (error != 0)
                goto fail;
        error = urtw_write8e(sc, 0x42, 0x00);
        if (error != 0)
                goto fail;
        error = urtw_write8e(sc, 0x42, 0x01);
        if (error != 0)
                goto fail;
        error = urtw_write8e(sc, 0x40, 0x0f);
        if (error != 0)
                goto fail;
        error = urtw_write8e(sc, 0x42, 0x00);
        if (error != 0)
                goto fail;
        error = urtw_write8e(sc, 0x42, 0x01);
        if (error != 0)
                goto fail;

        urtw_read8_m(sc, 0xdb, &data);
        urtw_write8_m(sc, 0xdb, data | (1 << 2));
        urtw_write16_idx_m(sc, 0x72, 0x59fa, 3);
        urtw_write16_idx_m(sc, 0x74, 0x59d2, 3);
        urtw_write16_idx_m(sc, 0x76, 0x59d2, 3);
        urtw_write16_idx_m(sc, 0x78, 0x19fa, 3);
        urtw_write16_idx_m(sc, 0x7a, 0x19fa, 3);
        urtw_write16_idx_m(sc, 0x7c, 0x00d0, 3);
        urtw_write8_m(sc, 0x61, 0);
        urtw_write8_idx_m(sc, 0x80, 0x0f, 1);
        urtw_write8_idx_m(sc, 0x83, 0x03, 1);
        urtw_write8_m(sc, 0xda, 0x10);
        urtw_write8_idx_m(sc, 0x4d, 0x08, 2);

        urtw_write32_m(sc, URTW_HSSI_PARA, 0x0600321b);

        urtw_write16_idx_m(sc, 0xec, 0x0800, 1);

        urtw_write8_m(sc, URTW_ACM_CONTROL, 0);

        /* Reset softc variables. */
        for (size_t j = 0; j < URTW_PRIORITY_MAX; j++) {
                sc->sc_txidx[j] = sc->sc_tx_queued[j] = 0;
        }
        sc->sc_txtimer = 0;

        if (!(sc->sc_flags & URTW_INIT_ONCE)) {
                error = usbd_set_config_no(sc->sc_udev, URTW_CONFIG_NO, 0);
                if (error != 0) {
                        aprint_error_dev(sc->sc_dev, "failed to set configuration"
                            ", err=%s\n", usbd_errstr(error));

                        goto fail;
                }
                /* Get the first interface handle. */
                error = usbd_device2interface_handle(sc->sc_udev,
                    URTW_IFACE_INDEX, &sc->sc_iface);
                if (error != 0) {
                        printf("%s: could not get interface handle\n",
                            device_xname(sc->sc_dev));
                        goto fail;
                }
                error = urtw_open_pipes(sc);
                if (error != 0)
                        goto fail;
                error = urtw_alloc_rx_data_list(sc);
                if (error != 0)
                        goto fail;
                error = urtw_alloc_tx_data_list(sc);
                if (error != 0)
                        goto fail;
                sc->sc_flags |= URTW_INIT_ONCE;
        }

        error = urtw_rx_enable(sc);
        if (error != 0)
                goto fail;
        error = urtw_tx_enable(sc);
        if (error != 0)
                goto fail;

        ifp->if_flags &= ~IFF_OACTIVE;
        ifp->if_flags |= IFF_RUNNING;

        if (ic->ic_opmode == IEEE80211_M_MONITOR)
                ieee80211_new_state(ic, IEEE80211_S_RUN, -1);
        else
                ieee80211_new_state(ic, IEEE80211_S_SCAN, -1);

fail:
        return error;
}

static usbd_status
urtw_8225v2_b_config_mac(struct urtw_softc *sc)
{
        int i;
        usbd_status error;

        for (i = 0; i < __arraycount(urtw_8187b_regtbl); i++) {
                urtw_write8_idx_m(sc, urtw_8187b_regtbl[i].reg,
                    urtw_8187b_regtbl[i].val, urtw_8187b_regtbl[i].idx);
        }

        urtw_write16_m(sc, URTW_TID_AC_MAP, 0xfa50);
        urtw_write16_m(sc, URTW_INT_MIG, 0);

        urtw_write32_idx_m(sc, 0xf0, 0, 1);
        urtw_write32_idx_m(sc, 0xf4, 0, 1);
        urtw_write8_idx_m(sc, 0xf8, 0, 1);

        urtw_write32_m(sc, URTW_RF_TIMING, 0x00004001);

fail:
        return error;
}

static usbd_status
urtw_8225v2_b_init_rfe(struct urtw_softc *sc)
{
        usbd_status error;

        urtw_write16_m(sc, URTW_RF_PINS_OUTPUT, 0x0480);
        urtw_write16_m(sc, URTW_RF_PINS_SELECT, 0x2488);
        urtw_write16_m(sc, URTW_RF_PINS_ENABLE, 0x1fff);
        usbd_delay_ms(sc->sc_udev, 100);

fail:
        return error;
}

static usbd_status
urtw_8225v2_b_update_chan(struct urtw_softc *sc)
{
        struct ieee80211com *ic = &sc->sc_ic;
        struct ieee80211_channel *c = ic->ic_ibss_chan;
        uint8_t aifs, difs, eifs, sifs, slot;
        usbd_status error;

        urtw_write8_m(sc, URTW_SIFS, 0x22);

        sifs = 0xa;
        if (IEEE80211_IS_CHAN_G(c)) {
                slot = 0x9;
                difs = 0x1c;
                eifs = 0x5b;
        } else {
                slot = 0x14;
                difs = 0x32;
                eifs = 0x5b;
        }
        aifs = (2 * slot) + sifs;

        urtw_write8_m(sc, URTW_SLOT, slot);

        urtw_write8_m(sc, URTW_AC_VO, aifs);
        urtw_write8_m(sc, URTW_AC_VI, aifs);
        urtw_write8_m(sc, URTW_AC_BE, aifs);
        urtw_write8_m(sc, URTW_AC_BK, aifs);

        urtw_write8_m(sc, URTW_DIFS, difs);
        urtw_write8_m(sc, URTW_8187B_EIFS, eifs);

fail:
        return error;
}

static usbd_status
urtw_8225v2_b_rf_init(struct urtw_rf *rf)
{
        struct urtw_softc *sc = rf->rf_sc;
        unsigned int i;
        uint8_t data;
        usbd_status error;

        /* Set up ACK rate, retry limit, TX AGC, TX antenna. */
        urtw_write16_m(sc, URTW_8187B_BRSR, 0x0fff);
        urtw_read8_m(sc, URTW_CW_CONF, &data);
        urtw_write8_m(sc, URTW_CW_CONF, data |
                URTW_CW_CONF_PERPACKET_RETRY);
        urtw_read8_m(sc, URTW_TX_AGC_CTL, &data);
        urtw_write8_m(sc, URTW_TX_AGC_CTL, data |
                URTW_TX_AGC_CTL_PERPACKET_GAIN |
                URTW_TX_AGC_CTL_PERPACKET_ANTSEL);

        /* Auto rate fallback control. */
        urtw_write16_idx_m(sc, URTW_ARFR, 0x0fff, 1);        /* 1M ~ 54M */
        urtw_read8_m(sc, URTW_RATE_FALLBACK, &data);
        urtw_write8_m(sc, URTW_RATE_FALLBACK, data |
                URTW_RATE_FALLBACK_ENABLE);

        urtw_write16_m(sc, URTW_BEACON_INTERVAL, 100);
        urtw_write16_m(sc, URTW_ATIM_WND, 2);
        urtw_write16_idx_m(sc, URTW_FEMR, 0xffff, 1);

        error = urtw_set_mode(sc, URTW_EPROM_CMD_CONFIG);
        if (error)
                goto fail;
        urtw_read8_m(sc, URTW_CONFIG1, &data);
        urtw_write8_m(sc, URTW_CONFIG1, (data & 0x3f) | 0x80);
        error = urtw_set_mode(sc, URTW_EPROM_CMD_NORMAL);
        if (error)
                goto fail;

        urtw_write8_m(sc, URTW_WPA_CONFIG, 0);
        urtw_8225v2_b_config_mac(sc);
        urtw_write16_idx_m(sc, URTW_RFSW_CTRL, 0x569a, 2);

        error = urtw_set_mode(sc, URTW_EPROM_CMD_CONFIG);
        if (error)
                goto fail;
        urtw_read8_m(sc, URTW_CONFIG3, &data);
        urtw_write8_m(sc, URTW_CONFIG3, data | URTW_CONFIG3_ANAPARAM_WRITE);
        error = urtw_set_mode(sc, URTW_EPROM_CMD_NORMAL);
        if (error)
                goto fail;

        urtw_8225v2_b_init_rfe(sc);

        for (i = 0; i < __arraycount(urtw_8225v2_b_rf); i++) {
                urtw_8225_write(sc, urtw_8225v2_b_rf[i].reg,
                    urtw_8225v2_b_rf[i].val);
        }

        for (i = 0; i < __arraycount(urtw_8225v2_rxgain); i++) {
                urtw_8225_write(sc, 0x1, (uint8_t)(i + 1));
                urtw_8225_write(sc, 0x2, urtw_8225v2_rxgain[i]);
        }

        urtw_8225_write(sc, 0x03, 0x080);
        urtw_8225_write(sc, 0x05, 0x004);
        urtw_8225_write(sc, 0x00, 0x0b7);
        urtw_8225_write(sc, 0x02, 0xc4d);
        urtw_8225_write(sc, 0x02, 0x44d);
        urtw_8225_write(sc, 0x00, 0x2bf);

        urtw_write8_m(sc, URTW_TX_GAIN_CCK, 0x03);
        urtw_write8_m(sc, URTW_TX_GAIN_OFDM, 0x07);
        urtw_write8_m(sc, URTW_TX_ANTENNA, 0x03);

        urtw_8187_write_phy_ofdm(sc, 0x80, 0x12);
        for (i = 0; i < __arraycount(urtw_8225v2_agc); i++) {
                urtw_8187_write_phy_ofdm(sc, 0x0f, urtw_8225v2_agc[i]);
                urtw_8187_write_phy_ofdm(sc, 0x0e, (uint8_t)i + 0x80);
                urtw_8187_write_phy_ofdm(sc, 0x0e, 0);
        }
        urtw_8187_write_phy_ofdm(sc, 0x80, 0x10);

        for (i = 0; i < __arraycount(urtw_8225v2_ofdm); i++)
                urtw_8187_write_phy_ofdm(sc, i, urtw_8225v2_ofdm[i]);

        urtw_8225v2_b_update_chan(sc);

        urtw_8187_write_phy_ofdm(sc, 0x97, 0x46);
        urtw_8187_write_phy_ofdm(sc, 0xa4, 0xb6);
        urtw_8187_write_phy_ofdm(sc, 0x85, 0xfc);
        urtw_8187_write_phy_cck(sc, 0xc1, 0x88);

        error = urtw_8225v2_b_rf_set_chan(rf, 1);
fail:
        return error;
}

static usbd_status
urtw_8225v2_b_rf_set_chan(struct urtw_rf *rf, int chan)
{
        struct urtw_softc *sc = rf->rf_sc;
        usbd_status error;

        error = urtw_8225v2_b_set_txpwrlvl(sc, chan);
        if (error)
                goto fail;

        urtw_8225_write(sc, 0x7, urtw_8225_channel[chan]);
        /*
         * Delay removed from 8185 to 8187.
         * usbd_delay_ms(sc->sc_udev, 10);
         */

        urtw_write16_m(sc, URTW_AC_VO, 0x5114);
        urtw_write16_m(sc, URTW_AC_VI, 0x5114);
        urtw_write16_m(sc, URTW_AC_BE, 0x5114);
        urtw_write16_m(sc, URTW_AC_BK, 0x5114);

fail:
        return error;
}

static usbd_status
urtw_8225v2_b_set_txpwrlvl(struct urtw_softc *sc, int chan)
{
        int i;
        uint8_t *cck_pwrtable;
        uint8_t cck_pwrlvl_min, cck_pwrlvl_max, ofdm_pwrlvl_min,
            ofdm_pwrlvl_max;
        int8_t cck_pwrlvl = sc->sc_txpwr_cck[chan] & 0xff;
        int8_t ofdm_pwrlvl = sc->sc_txpwr_ofdm[chan] & 0xff;
        usbd_status error;

        if (sc->sc_hwrev & URTW_HWREV_8187B_B) {
                cck_pwrlvl_min = 0;
                cck_pwrlvl_max = 15;
                ofdm_pwrlvl_min = 2;
                ofdm_pwrlvl_max = 17;
        } else {
                cck_pwrlvl_min = 7;
                cck_pwrlvl_max = 22;
                ofdm_pwrlvl_min = 10;
                ofdm_pwrlvl_max = 25;
        }

        /* CCK power setting */
        cck_pwrlvl = (cck_pwrlvl > (cck_pwrlvl_max - cck_pwrlvl_min)) ?
            cck_pwrlvl_max : (cck_pwrlvl + cck_pwrlvl_min);

        cck_pwrlvl += sc->sc_txpwr_cck_base;
        cck_pwrlvl = (cck_pwrlvl > 35) ? 35 : cck_pwrlvl;
        cck_pwrlvl = (cck_pwrlvl < 0) ? 0 : cck_pwrlvl;

        cck_pwrtable = (chan == 14) ? urtw_8225v2_txpwr_cck_ch14 :
            urtw_8225v2_txpwr_cck;

        if (sc->sc_hwrev & URTW_HWREV_8187B_B) {
                if (cck_pwrlvl <= 6)
                        ; /* do nothing */
                else if (cck_pwrlvl <= 11)
                        cck_pwrtable += 8;
                else
                        cck_pwrtable += 16;
        } else {
                if (cck_pwrlvl <= 5)
                        ; /* do nothing */
                else if (cck_pwrlvl <= 11)
                        cck_pwrtable += 8;
                else if (cck_pwrlvl <= 17)
                        cck_pwrtable += 16;
                else
                        cck_pwrtable += 24;
        }

        for (i = 0; i < 8; i++) {
                urtw_8187_write_phy_cck(sc, 0x44 + i, cck_pwrtable[i]);
        }

        urtw_write8_m(sc, URTW_TX_GAIN_CCK,
            urtw_8225v2_tx_gain_cck_ofdm[cck_pwrlvl] << 1);
        /*
         * Delay removed from 8185 to 8187.
         * usbd_delay_ms(sc->sc_udev, 1);
         */

        /* OFDM power setting */
        ofdm_pwrlvl = (ofdm_pwrlvl > (ofdm_pwrlvl_max - ofdm_pwrlvl_min)) ?
            ofdm_pwrlvl_max : ofdm_pwrlvl + ofdm_pwrlvl_min;

        ofdm_pwrlvl += sc->sc_txpwr_ofdm_base;
        ofdm_pwrlvl = (ofdm_pwrlvl > 35) ? 35 : ofdm_pwrlvl;
        ofdm_pwrlvl = (ofdm_pwrlvl < 0) ? 0 : ofdm_pwrlvl;

        urtw_write8_m(sc, URTW_TX_GAIN_OFDM,
            urtw_8225v2_tx_gain_cck_ofdm[ofdm_pwrlvl] << 1);

        if (sc->sc_hwrev & URTW_HWREV_8187B_B) {
                if (ofdm_pwrlvl <= 11) {
                        urtw_8187_write_phy_ofdm(sc, 0x87, 0x60);
                        urtw_8187_write_phy_ofdm(sc, 0x89, 0x60);
                } else {
                        urtw_8187_write_phy_ofdm(sc, 0x87, 0x5c);
                        urtw_8187_write_phy_ofdm(sc, 0x89, 0x5c);
                }
        } else {
                if (ofdm_pwrlvl <= 11) {
                        urtw_8187_write_phy_ofdm(sc, 0x87, 0x5c);
                        urtw_8187_write_phy_ofdm(sc, 0x89, 0x5c);
                } else if (ofdm_pwrlvl <= 17) {
                        urtw_8187_write_phy_ofdm(sc, 0x87, 0x54);
                        urtw_8187_write_phy_ofdm(sc, 0x89, 0x54);
                } else {
                        urtw_8187_write_phy_ofdm(sc, 0x87, 0x50);
                        urtw_8187_write_phy_ofdm(sc, 0x89, 0x50);
                }
        }

        /*
         * Delay removed from 8185 to 8187.
         * usbd_delay_ms(sc->sc_udev, 1);
         */
fail:
        return error;
}

static int
urtw_set_bssid(struct urtw_softc *sc, const uint8_t *bssid)
{
        int error;

        urtw_write32_m(sc, URTW_BSSID,
            bssid[0] | bssid[1] << 8 | bssid[2] << 16 | bssid[3] << 24);
        urtw_write16_m(sc, URTW_BSSID + 4,
            bssid[4] | bssid[5] << 8);

        return 0;

fail:
        return error;
}

static int
urtw_set_macaddr(struct urtw_softc *sc, const uint8_t *addr)
{
        int error;

        urtw_write32_m(sc, URTW_MAC0,
            addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24);
        urtw_write16_m(sc, URTW_MAC4,
            addr[4] | addr[5] << 8);

        return 0;

fail:
        return error;
}

MODULE(MODULE_CLASS_DRIVER, if_urtw, NULL);

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
if_urtw_modcmd(modcmd_t cmd, void *aux)
{
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                error = config_init_component(cfdriver_ioconf_urtw,
                    cfattach_ioconf_urtw, cfdata_ioconf_urtw);
#endif
                return error;
        case MODULE_CMD_FINI:
#ifdef _MODULE
                error = config_fini_component(cfdriver_ioconf_urtw,
                    cfattach_ioconf_urtw, cfdata_ioconf_urtw);
#endif
                return error;
        default:
                return ENOTTY;
        }
}


















































































































































































































































































































































    1 



    1 



    1 


    1 




    1 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
/*-
 * Copyright (c) 2010-2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This material is based upon work partially supported by The
 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * NPF interface for the Application Level Gateways (ALGs).
 */

#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_alg.c,v 1.22 2020/05/30 14:16:56 rmind Exp $");

#include <sys/param.h>
#include <sys/types.h>

#include <sys/kmem.h>
#include <sys/module.h>
#endif

#include "npf_impl.h"

/*
 * NAT ALG description structure.  For more compact use of cache,
 * the functions are separated in their own arrays.  The number of
 * ALGs is expected to be very small.
 */

struct npf_alg {
        const char *        na_name;
        unsigned        na_slot;
};

struct npf_algset {
        /* List of ALGs and the count. */
        npf_alg_t        alg_list[NPF_MAX_ALGS];
        unsigned        alg_count;

        /* Matching, inspection and translation functions. */
        npfa_funcs_t        alg_funcs[NPF_MAX_ALGS];
};

#define        NPF_ALG_PREF        "npf_alg_"
#define        NPF_ALG_PREFLEN        (sizeof(NPF_ALG_PREF) - 1)

void
npf_alg_init(npf_t *npf)
{
        npf_algset_t *aset;

        aset = kmem_zalloc(sizeof(npf_algset_t), KM_SLEEP);
        npf->algset = aset;
}

void
npf_alg_fini(npf_t *npf)
{
        npf_algset_t *aset = npf->algset;

        kmem_free(aset, sizeof(npf_algset_t));
}

static npf_alg_t *
npf_alg_lookup(npf_t *npf, const char *name)
{
        npf_algset_t *aset = npf->algset;

        KASSERT(npf_config_locked_p(npf));

        for (unsigned i = 0; i < aset->alg_count; i++) {
                npf_alg_t *alg = &aset->alg_list[i];
                const char *aname = alg->na_name;

                if (aname && strcmp(aname, name) == 0)
                        return alg;
        }
        return NULL;
}

npf_alg_t *
npf_alg_construct(npf_t *npf, const char *name)
{
        npf_alg_t *alg;

        npf_config_enter(npf);
        if ((alg = npf_alg_lookup(npf, name)) == NULL) {
                char modname[NPF_ALG_PREFLEN + 64];

                snprintf(modname, sizeof(modname), "%s%s", NPF_ALG_PREF, name);
                npf_config_exit(npf);

                if (module_autoload(modname, MODULE_CLASS_MISC) != 0) {
                        return NULL;
                }
                npf_config_enter(npf);
                alg = npf_alg_lookup(npf, name);
        }
        npf_config_exit(npf);
        return alg;
}

/*
 * npf_alg_register: register application-level gateway.
 */
npf_alg_t *
npf_alg_register(npf_t *npf, const char *name, const npfa_funcs_t *funcs)
{
        npf_algset_t *aset = npf->algset;
        npfa_funcs_t *afuncs;
        npf_alg_t *alg;
        unsigned i;

        npf_config_enter(npf);
        if (npf_alg_lookup(npf, name) != NULL) {
                npf_config_exit(npf);
                return NULL;
        }

        /* Find a spare slot. */
        for (i = 0; i < NPF_MAX_ALGS; i++) {
                alg = &aset->alg_list[i];
                if (alg->na_name == NULL) {
                        break;
                }
        }
        if (i == NPF_MAX_ALGS) {
                npf_config_exit(npf);
                return NULL;
        }

        /* Register the ALG. */
        alg->na_name = name;
        alg->na_slot = i;

        /*
         * Assign the functions.  Make sure the 'destroy' gets visible first.
         */
        afuncs = &aset->alg_funcs[i];
        atomic_store_relaxed(&afuncs->destroy, funcs->destroy);
        membar_producer();
        atomic_store_relaxed(&afuncs->translate, funcs->translate);
        atomic_store_relaxed(&afuncs->inspect, funcs->inspect);
        atomic_store_relaxed(&afuncs->match, funcs->match);
        membar_producer();

        atomic_store_relaxed(&aset->alg_count, MAX(aset->alg_count, i + 1));
        npf_config_exit(npf);

        return alg;
}

/*
 * npf_alg_unregister: unregister application-level gateway.
 */
int
npf_alg_unregister(npf_t *npf, npf_alg_t *alg)
{
        npf_algset_t *aset = npf->algset;
        unsigned i = alg->na_slot;
        npfa_funcs_t *afuncs;

        /* Deactivate the functions first. */
        npf_config_enter(npf);
        afuncs = &aset->alg_funcs[i];
        atomic_store_relaxed(&afuncs->match, NULL);
        atomic_store_relaxed(&afuncs->translate, NULL);
        atomic_store_relaxed(&afuncs->inspect, NULL);
        npf_config_sync(npf);

        /*
         * Finally, unregister the ALG.  We leave the 'destroy' callback
         * as the following will invoke it for the relevant connections.
         */
        npf_ruleset_freealg(npf_config_natset(npf), alg);
        atomic_store_relaxed(&afuncs->destroy, NULL);
        alg->na_name = NULL;
        npf_config_exit(npf);

        return 0;
}

/*
 * npf_alg_match: call the ALG matching inspectors.
 *
 *        The purpose of the "matching" inspector function in the ALG API
 *        is to determine whether this connection matches the ALG criteria
 *        i.e. is concerning the ALG.  If yes, ALG can associate itself with
 *        the given NAT state structure and set/save an arbitrary parameter.
 *        This is done using the using the npf_nat_setalg() function.
 *
 *        => This is called when the packet matches the dynamic NAT policy
 *           and the NAT state entry is being created for it [NAT-ESTABLISH].
 */
bool
npf_alg_match(npf_cache_t *npc, npf_nat_t *nt, int di)
{
        npf_t *npf = npc->npc_ctx;
        npf_algset_t *aset = npf->algset;
        bool match = false;
        unsigned count;
        int s;

        KASSERTMSG(npf_iscached(npc, NPC_IP46), "expecting protocol number");

        s = npf_config_read_enter(npf);
        count = atomic_load_relaxed(&aset->alg_count);
        for (unsigned i = 0; i < count; i++) {
                const npfa_funcs_t *f = &aset->alg_funcs[i];
                bool (*match_func)(npf_cache_t *, npf_nat_t *, int);

                match_func = atomic_load_relaxed(&f->match);
                if (match_func && match_func(npc, nt, di)) {
                        match = true;
                        break;
                }
        }
        npf_config_read_exit(npf, s);
        return match;
}

/*
 * npf_alg_exec: execute the ALG translation processors.
 *
 *        The ALG function would perform any additional packet translation
 *        or manipulation here.
 *
 *        => This is called when the packet is being translated according
 *           to the dynamic NAT logic [NAT-TRANSLATE].
 */
void
npf_alg_exec(npf_cache_t *npc, npf_nat_t *nt, const npf_flow_t flow)
{
        npf_t *npf = npc->npc_ctx;
        npf_algset_t *aset = npf->algset;
        unsigned count;
        int s;

        s = npf_config_read_enter(npf);
        count = atomic_load_relaxed(&aset->alg_count);
        for (unsigned i = 0; i < count; i++) {
                const npfa_funcs_t *f = &aset->alg_funcs[i];
                bool (*translate_func)(npf_cache_t *, npf_nat_t *, npf_flow_t);

                translate_func = atomic_load_relaxed(&f->translate);
                if (translate_func) {
                        translate_func(npc, nt, flow);
                }
        }
        npf_config_read_exit(npf, s);
}

/*
 * npf_alg_conn: query ALGs which may perform a custom state lookup.
 *
 *        The purpose of ALG connection inspection function is to provide
 *        ALGs with a mechanism to override the regular connection state
 *        lookup, if they need to.  For example, some ALGs may want to
 *        extract and use a different n-tuple to perform a lookup.
 *
 *        => This is called at the beginning of the connection state lookup
 *           function [CONN-LOOKUP].
 *
 *        => Must use the npf_conn_lookup() function to perform the custom
 *           connection state lookup and return the result.
 *
 *        => Returning NULL will result in NPF performing a regular state
 *           lookup for the packet.
 */
npf_conn_t *
npf_alg_conn(npf_cache_t *npc, int di)
{
        npf_t *npf = npc->npc_ctx;
        npf_algset_t *aset = npf->algset;
        npf_conn_t *con = NULL;
        unsigned count;
        int s;

        s = npf_config_read_enter(npf);
        count = atomic_load_relaxed(&aset->alg_count);
        for (unsigned i = 0; i < count; i++) {
                const npfa_funcs_t *f = &aset->alg_funcs[i];
                npf_conn_t *(*inspect_func)(npf_cache_t *, int);

                inspect_func = atomic_load_relaxed(&f->inspect);
                if (inspect_func && (con = inspect_func(npc, di)) != NULL) {
                        break;
                }
        }
        npf_config_read_exit(npf, s);
        return con;
}

/*
 * npf_alg_destroy: free the ALG structure associated with the NAT entry.
 */
void
npf_alg_destroy(npf_t *npf, npf_alg_t *alg, npf_nat_t *nat, npf_conn_t *con)
{
        npf_algset_t *aset = npf->algset;
        const npfa_funcs_t *f = &aset->alg_funcs[alg->na_slot];
        void (*destroy_func)(npf_t *, npf_nat_t *, npf_conn_t *);

        if ((destroy_func = atomic_load_relaxed(&f->destroy)) != NULL) {
                destroy_func(npf, nat, con);
        }
}

/*
 * npf_alg_export: serialise the configuration of ALGs.
 */
int
npf_alg_export(npf_t *npf, nvlist_t *nvl)
{
        npf_algset_t *aset = npf->algset;

        KASSERT(npf_config_locked_p(npf));

        for (unsigned i = 0; i < aset->alg_count; i++) {
                const npf_alg_t *alg = &aset->alg_list[i];
                nvlist_t *algdict;

                if (alg->na_name == NULL) {
                        continue;
                }
                algdict = nvlist_create(0);
                nvlist_add_string(algdict, "name", alg->na_name);
                nvlist_append_nvlist_array(nvl, "algs", algdict);
                nvlist_destroy(algdict);
        }
        return 0;
}































































































































   38 




    2 

    2 



    1 
    2 

    2 









    6 




    6 


    4 
    6 


    4 





    6 






    1 










    1 






    1 





    1 





















    4 









    2 


    1 


    1 







    1 







    1 
    1 





    9 

    8 





    1 

















   13 







   13 


    9 
   13 









   13 

    2 

    1 

    1 




   12 
    8 

    8 
    8 


   13 

   12 
   12 
   13 
    1 
   13 
    8 
   13 
    8 
   13 

   13 
    8 
   13 
    5 
   13 


   13 









    5 





    2 

    3 
    6 
    1 

    5 
    5 
    2 


    4 


    6 
    2 

    4 


    6 
    1 



    5 


    1 

    4 
    5 

    5 


















    6 

    2 



    2 
    2 
    1 

    1 


    4 




    6 









    4 







    4 
    1 

    4 
    4 
    2 

    2 

    4 
    1 

    3 
    4 
    2 

    2 
    4 
    2 

    2 
    4 
    2 

    2 
    4 
    4 


    4 




    3 

    1 

    2 
    3 
    2 

    1 


    1 




    4 


















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
/*        $NetBSD: tty_43.c,v 1.40 2022/07/10 13:57:14 riastradh Exp $        */

/*-
 * Copyright (c) 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1982, 1986, 1991, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)tty_compat.c        8.2 (Berkeley) 1/9/95
 */

/*
 * mapping routines for old line discipline (yuck)
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tty_43.c,v 1.40 2022/07/10 13:57:14 riastradh Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/ioctl.h>
#include <sys/proc.h>
#include <sys/conf.h>
#include <sys/tty.h>
#include <sys/termios.h>
#include <sys/file.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <sys/compat_stub.h>
#include <sys/module_hook.h>
#include <sys/ioctl_compat.h>

#include <compat/common/compat_mod.h>
#include <compat/sys/ttycom.h>

int ttydebug = 0;

static const struct speedtab compatspeeds[] = {
#define MAX_SPEED        17
        { 115200, 17 },
        { 57600, 16 },
        { 38400, 15 },
        { 19200, 14 },
        { 9600,        13 },
        { 4800,        12 },
        { 2400,        11 },
        { 1800,        10 },
        { 1200,        9 },
        { 600,        8 },
        { 300,        7 },
        { 200,        6 },
        { 150,        5 },
        { 134,        4 },
        { 110,        3 },
        { 75,        2 },
        { 50,        1 },
        { 0,        0 },
        { -1,        -1 },
};
static const int compatspcodes[] = {
        0, 50, 75, 110, 134, 150, 200, 300, 600, 1200,
        1800, 2400, 4800, 9600, 19200, 38400, 57600, 115200
};

static int ttcompatgetflags(struct tty *);
static void ttcompatsetflags(struct tty *, struct termios *);
static void ttcompatsetlflags(struct tty *, struct termios *);

/*ARGSUSED*/
int
compat_43_ttioctl(struct tty *tp, u_long com, void *data, int flag,
    struct lwp *l)
{

        switch (com) {
        case TIOCGETP: {
                struct sgttyb *sg = (struct sgttyb *)data;
                int speed;

                mutex_spin_enter(&tty_lock);
                speed = ttspeedtab(tp->t_ospeed, compatspeeds);
                sg->sg_ospeed = (speed == -1) ? MAX_SPEED : speed;
                if (tp->t_ispeed == 0)
                        sg->sg_ispeed = sg->sg_ospeed;
                else {
                        speed = ttspeedtab(tp->t_ispeed, compatspeeds);
                        sg->sg_ispeed = (speed == -1) ? MAX_SPEED : speed;
                }
                sg->sg_erase = tty_getctrlchar(tp, VERASE);
                sg->sg_kill = tty_getctrlchar(tp, VKILL);
                sg->sg_flags = ttcompatgetflags(tp);
                mutex_spin_exit(&tty_lock);
                break;
        }

        case TIOCSETP:
        case TIOCSETN: {
                struct sgttyb *sg = (struct sgttyb *)data;
                struct termios term;
                int speed;

                mutex_spin_enter(&tty_lock);
                term = tp->t_termios;
                if ((speed = sg->sg_ispeed) > MAX_SPEED || speed < 0)
                        term.c_ispeed = speed;
                else
                        term.c_ispeed = compatspcodes[speed];
                if ((speed = sg->sg_ospeed) > MAX_SPEED || speed < 0)
                        term.c_ospeed = speed;
                else
                        term.c_ospeed = compatspcodes[speed];
                term.c_cc[VERASE] = sg->sg_erase;
                term.c_cc[VKILL] = sg->sg_kill;
                tp->t_flags = (ttcompatgetflags(tp)&0xffff0000) | (sg->sg_flags&0xffff);
                ttcompatsetflags(tp, &term);
                mutex_spin_exit(&tty_lock);
                return (ttioctl(tp, com == TIOCSETP ? TIOCSETAF : TIOCSETA,
                        (void *)&term, flag, l));
        }

        case TIOCGETC: {
                struct tchars *tc = (struct tchars *)data;

                tc->t_intrc = tty_getctrlchar(tp, VINTR);
                tc->t_quitc = tty_getctrlchar(tp, VQUIT);
                tc->t_startc = tty_getctrlchar(tp, VSTART);
                tc->t_stopc = tty_getctrlchar(tp, VSTOP);
                tc->t_eofc = tty_getctrlchar(tp, VEOF);
                tc->t_brkc = tty_getctrlchar(tp, VEOL);
                break;
        }
        case TIOCSETC: {
                struct tchars *tc = (struct tchars *)data;

                tty_setctrlchar(tp, VINTR, tc->t_intrc);
                tty_setctrlchar(tp, VQUIT, tc->t_quitc);
                tty_setctrlchar(tp, VSTART, tc->t_startc);
                tty_setctrlchar(tp, VSTOP, tc->t_stopc);
                tty_setctrlchar(tp, VEOF, tc->t_eofc);
                tty_setctrlchar(tp, VEOL, tc->t_brkc);
                if (tc->t_brkc == (char)-1)
                        tty_setctrlchar(tp, VEOL2, _POSIX_VDISABLE);
                break;
        }
        case TIOCSLTC: {
                struct ltchars *ltc = (struct ltchars *)data;

                tty_setctrlchar(tp, VSUSP, ltc->t_suspc);
                tty_setctrlchar(tp, VDSUSP, ltc->t_dsuspc);
                tty_setctrlchar(tp, VREPRINT, ltc->t_rprntc);
                tty_setctrlchar(tp, VDISCARD, ltc->t_flushc);
                tty_setctrlchar(tp, VWERASE, ltc->t_werasc);
                tty_setctrlchar(tp, VLNEXT, ltc->t_lnextc);
                break;
        }
        case TIOCGLTC: {
                struct ltchars *ltc = (struct ltchars *)data;

                ltc->t_suspc = tty_getctrlchar(tp, VSUSP);
                ltc->t_dsuspc = tty_getctrlchar(tp, VDSUSP);
                ltc->t_rprntc = tty_getctrlchar(tp, VREPRINT);
                ltc->t_flushc = tty_getctrlchar(tp, VDISCARD);
                ltc->t_werasc = tty_getctrlchar(tp, VWERASE);
                ltc->t_lnextc = tty_getctrlchar(tp, VLNEXT);
                break;
        }
        case TIOCLBIS:
        case TIOCLBIC:
        case TIOCLSET: {
                struct termios term;
                unsigned argbits, flags;

                argbits = *(int *)data;

                mutex_spin_enter(&tty_lock);
                term = tp->t_termios;
                flags = ttcompatgetflags(tp);
                switch (com) {
                case TIOCLSET:
                        tp->t_flags = (flags & 0xffff) | (argbits << 16);
                        break;
                case TIOCLBIS:
                        tp->t_flags = flags | (argbits << 16);
                        break;
                case TIOCLBIC:
                        tp->t_flags = flags & ~(argbits << 16);
                        break;
                }
                ttcompatsetlflags(tp, &term);
                mutex_spin_exit(&tty_lock);
                return (ttioctl(tp, TIOCSETA, (void *)&term, flag, l));
        }
        case TIOCLGET:
                mutex_spin_enter(&tty_lock);
                *(int *)data = ttcompatgetflags(tp)>>16;
                mutex_spin_exit(&tty_lock);
                if (ttydebug)
                        printf("CLGET: returning %x\n", *(int *)data);
                break;

        case OTIOCGETD:
                mutex_spin_enter(&tty_lock);
                *(int *)data = (tp->t_linesw == NULL || tp->t_linesw->l_no == 0)
                    ? 2 /* XXX old NTTYDISC */ : tp->t_linesw->l_no;
                mutex_spin_exit(&tty_lock);
                break;

        case OTIOCSETD: {
                int ldisczero = 0;

                return (ttioctl(tp, TIOCSETD,
                        *(int *)data == 2 ? (void *)&ldisczero : data, flag,
                        l));
            }

        case OTIOCCONS:
                *(int *)data = 1;
                return (ttioctl(tp, TIOCCONS, data, flag, l));

        case TIOCHPCL:
                mutex_spin_enter(&tty_lock);
                SET(tp->t_cflag, HUPCL);
                mutex_spin_exit(&tty_lock);
                break;

        default:
                return (EPASSTHROUGH);
        }
        return (0);
}

static int
ttcompatgetflags(struct tty *tp)
{
        tcflag_t iflag = tp->t_iflag;
        tcflag_t lflag = tp->t_lflag;
        tcflag_t oflag = tp->t_oflag;
        tcflag_t cflag = tp->t_cflag;
        int flags = 0;

        KASSERT(mutex_owned(&tty_lock));

        if (ISSET(iflag, IXOFF))
                SET(flags, TANDEM);
        if (ISSET(iflag, ICRNL) || ISSET(oflag, ONLCR))
                SET(flags, CRMOD);
        if (ISSET(cflag, PARENB)) {
                if (ISSET(iflag, INPCK)) {
                        if (ISSET(cflag, PARODD))
                                SET(flags, ODDP);
                        else
                                SET(flags, EVENP);
                } else
                        SET(flags, ANYP);
        }

        if (!ISSET(lflag, ICANON)) {
                /* fudge */
                if (ISSET(iflag, IXON) || ISSET(lflag, ISIG|IEXTEN) ||
                    ISSET(cflag, PARENB))
                        SET(flags, CBREAK);
                else
                        SET(flags, RAW);
        }

        if (ISSET(flags, RAW))
                SET(flags, ISSET(tp->t_flags, LITOUT|PASS8));
        else if (ISSET(cflag, CSIZE) == CS8) {
                if (!ISSET(oflag, OPOST))
                        SET(flags, LITOUT);
                if (!ISSET(iflag, ISTRIP))
                        SET(flags, PASS8);
        }

        if (ISSET(cflag, MDMBUF))
                SET(flags, MDMBUF);
        if (!ISSET(cflag, HUPCL))
                SET(flags, NOHANG);
        if (ISSET(oflag, OXTABS))
                SET(flags, XTABS);
        if (ISSET(lflag, ECHOE))
                SET(flags, CRTERA|CRTBS);
        if (ISSET(lflag, ECHOKE))
                SET(flags, CRTKIL|CRTBS);
        if (ISSET(lflag, ECHOPRT))
                SET(flags, PRTERA);
        if (ISSET(lflag, ECHOCTL))
                SET(flags, CTLECH);
        if (!ISSET(iflag, IXANY))
                SET(flags, DECCTQ);
        SET(flags, ISSET(lflag, ECHO|TOSTOP|FLUSHO|PENDIN|NOFLSH));
        if (ttydebug)
                printf("getflags: %x\n", flags);
        return (flags);
}

static void
ttcompatsetflags(struct tty *tp, struct termios *t)
{
        int flags = tp->t_flags;

        KASSERT(mutex_owned(&tty_lock));

        tcflag_t iflag = t->c_iflag;
        tcflag_t oflag = t->c_oflag;
        tcflag_t lflag = t->c_lflag;
        tcflag_t cflag = t->c_cflag;

        if (ISSET(flags, TANDEM))
                SET(iflag, IXOFF);
        else
                CLR(iflag, IXOFF);
        if (ISSET(flags, ECHO))
                SET(lflag, ECHO);
        else
                CLR(lflag, ECHO);
        if (ISSET(flags, CRMOD)) {
                SET(iflag, ICRNL);
                SET(oflag, ONLCR);
        } else {
                CLR(iflag, ICRNL);
                CLR(oflag, ONLCR);
        }
        if (ISSET(flags, XTABS))
                SET(oflag, OXTABS);
        else
                CLR(oflag, OXTABS);


        if (ISSET(flags, RAW)) {
                iflag &= IXOFF;
                CLR(lflag, ISIG|ICANON|IEXTEN);
                CLR(cflag, PARENB);
        } else {
                SET(iflag, BRKINT|IXON|IMAXBEL);
                SET(lflag, ISIG|IEXTEN);
                if (ISSET(flags, CBREAK))
                        CLR(lflag, ICANON);
                else
                        SET(lflag, ICANON);
                switch (ISSET(flags, ANYP)) {
                case 0:
                        CLR(cflag, PARENB);
                        break;
                case ANYP:
                        SET(cflag, PARENB);
                        CLR(iflag, INPCK);
                        break;
                case EVENP:
                        SET(cflag, PARENB);
                        SET(iflag, INPCK);
                        CLR(cflag, PARODD);
                        break;
                case ODDP:
                        SET(cflag, PARENB);
                        SET(iflag, INPCK);
                        SET(cflag, PARODD);
                        break;
                }
        }

        if (ISSET(flags, RAW|LITOUT|PASS8)) {
                CLR(cflag, CSIZE);
                SET(cflag, CS8);
                if (!ISSET(flags, RAW|PASS8))
                        SET(iflag, ISTRIP);
                else
                        CLR(iflag, ISTRIP);
                if (!ISSET(flags, RAW|LITOUT))
                        SET(oflag, OPOST);
                else
                        CLR(oflag, OPOST);
        } else {
                CLR(cflag, CSIZE);
                SET(cflag, CS7);
                SET(iflag, ISTRIP);
                SET(oflag, OPOST);
        }

        t->c_iflag = iflag;
        t->c_oflag = oflag;
        t->c_lflag = lflag;
        t->c_cflag = cflag;
}

static void
ttcompatsetlflags(struct tty *tp, struct termios *t)
{
        int flags = tp->t_flags;
        tcflag_t iflag = t->c_iflag;
        tcflag_t oflag = t->c_oflag;
        tcflag_t lflag = t->c_lflag;
        tcflag_t cflag = t->c_cflag;

        KASSERT(mutex_owned(&tty_lock));

        /* Nothing we can do with CRTBS. */
        if (ISSET(flags, PRTERA))
                SET(lflag, ECHOPRT);
        else
                CLR(lflag, ECHOPRT);
        if (ISSET(flags, CRTERA))
                SET(lflag, ECHOE);
        else
                CLR(lflag, ECHOE);
        /* Nothing we can do with TILDE. */
        if (ISSET(flags, MDMBUF))
                SET(cflag, MDMBUF);
        else
                CLR(cflag, MDMBUF);
        if (ISSET(flags, NOHANG))
                CLR(cflag, HUPCL);
        else
                SET(cflag, HUPCL);
        if (ISSET(flags, CRTKIL))
                SET(lflag, ECHOKE);
        else
                CLR(lflag, ECHOKE);
        if (ISSET(flags, CTLECH))
                SET(lflag, ECHOCTL);
        else
                CLR(lflag, ECHOCTL);
        if (!ISSET(flags, DECCTQ))
                SET(iflag, IXANY);
        else
                CLR(iflag, IXANY);
        CLR(lflag, TOSTOP|FLUSHO|PENDIN|NOFLSH);
        SET(lflag, ISSET(flags, TOSTOP|FLUSHO|PENDIN|NOFLSH));

        if (ISSET(flags, RAW|LITOUT|PASS8)) {
                CLR(cflag, CSIZE);
                SET(cflag, CS8);
                if (!ISSET(flags, RAW|PASS8))
                        SET(iflag, ISTRIP);
                else
                        CLR(iflag, ISTRIP);
                if (!ISSET(flags, RAW|LITOUT))
                        SET(oflag, OPOST);
                else
                        CLR(oflag, OPOST);
        } else {
                CLR(cflag, CSIZE);
                SET(cflag, CS7);
                SET(iflag, ISTRIP);
                SET(oflag, OPOST);
        }

        t->c_iflag = iflag;
        t->c_oflag = oflag;
        t->c_lflag = lflag;
        t->c_cflag = cflag;
}

int
kern_tty_43_init(void)
{
        MODULE_HOOK_SET(tty_ttioctl_43_hook, compat_43_ttioctl);
        return 0;
}

int
kern_tty_43_fini(void)
{
        MODULE_HOOK_UNSET(tty_ttioctl_43_hook);
        return 0;
}































































   18 







    8 


   17 


   10 
    5 






   13 


   17 






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
/*        $NetBSD: sysv_sem_50.c,v 1.5 2019/12/15 16:48:26 tsutsui Exp $        */

/*-
 * Copyright (c) 1999 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sysv_sem_50.c,v 1.5 2019/12/15 16:48:26 tsutsui Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/signal.h>
#include <sys/proc.h>
#include <sys/sem.h>

#ifndef SYSVSEM
#define        SYSVSEM
#endif

#include <sys/syscallargs.h>

#include <compat/sys/sem.h>

int
compat_50_sys_____semctl13(struct lwp *l, const struct compat_50_sys_____semctl13_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) semid;
                syscallarg(int) semnum;
                syscallarg(int) cmd;
                syscallarg(union __semun *) arg;
        } */
        union __semun arg;
        struct semid_ds sembuf;
        struct semid_ds13 osembuf;
        int cmd, error;
        void *pass_arg;

        cmd = SCARG(uap, cmd);

        pass_arg = get_semctl_arg(cmd, &sembuf, &arg);

        if (pass_arg != NULL) {
                error = copyin(SCARG(uap, arg), &arg, sizeof(arg));
                if (error)
                        return (error);
                if (cmd == IPC_SET) {
                        error = copyin(arg.buf, &osembuf, sizeof(osembuf));
                        if (error)
                                return (error);
                        __semid_ds13_to_native(&osembuf, &sembuf);
                }
        }

        error = semctl1(l, SCARG(uap, semid), SCARG(uap, semnum), cmd,
            pass_arg, retval);

        if (error == 0 && cmd == IPC_STAT) {
                __native_to_semid_ds13(&sembuf, &osembuf);
                error = copyout(&osembuf, arg.buf, sizeof(osembuf));
        }

        return (error);
}



























































































































































































    2 

    2 
    2 
    2 

    2 
    2 































    9 





    1 

    1 






















































    8 


























    9 


























































    4 

    3 


    3 



    2 







    1 




    3 

    3 
    1 
    1 








    2 








    1 









































































































    3 
    3 



    3 

    3 




    3 











    4 
    4 








    4 

    3 




    2 


    3 
    3 









   20 


   20 


   20 

   20 


   20 
    3 


   20 









    5 

    5 


    5 




    1 


    4 














    2 













    1 














    1 

    1 































    1 



    1 



    1 































    1 







    1 


















































































    1 










    1 

    1 








    1 



    1 













































































































































































































   58 


















    2 
    1 



    1 

    1 




    1 






   56 
    1 

    1 



   55 
   55 
   36 


    2 

    2 







    1 



    2 
    1 

    1 

    1 



    2 
    1 

    1 

    2 



    1 
    1 

    1 











    4 





    1 











    1 





   46 

   46 

    3 











   37 

















   37 
   19 
   48 








   17 







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
/*        $NetBSD: tty_pty.c,v 1.149 2021/10/11 01:07:36 thorpej Exp $        */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)tty_pty.c        8.4 (Berkeley) 2/20/95
 */

/*
 * Pseudo-teletype Driver
 * (Actually two drivers, requiring two entries in 'cdevsw')
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: tty_pty.c,v 1.149 2021/10/11 01:07:36 thorpej Exp $");

#include "opt_ptm.h"

#define TTY_ALLOW_PRIVATE

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/ioctl.h>
#include <sys/ioctl_compat.h>
#include <sys/proc.h>
#include <sys/tty.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/namei.h>
#include <sys/signalvar.h>
#include <sys/uio.h>
#include <sys/filedesc.h>
#include <sys/conf.h>
#include <sys/poll.h>
#include <sys/pty.h>
#include <sys/kauth.h>

#include "ioconf.h"

#define        DEFAULT_NPTYS                16        /* default number of initial ptys */
#define DEFAULT_MAXPTYS                992        /* default maximum number of ptys */

#define BUFSIZ 100                /* Chunk size iomoved to/from user */

struct        pt_softc {
        struct        tty *pt_tty;
        int        pt_flags;
        struct        selinfo pt_selr, pt_selw;
        u_char        pt_send;
        u_char        pt_ucntl;
};

static struct pt_softc **pt_softc = NULL;        /* pty array */
static int maxptys = DEFAULT_MAXPTYS;        /* maximum number of ptys (sysctable) */
kmutex_t pt_softc_mutex;
int npty = 0;                        /* for pstat -t */

#define        PF_PKT                0x08                /* packet mode */
#define        PF_STOPPED        0x10                /* user told stopped */
#define        PF_REMOTE        0x20                /* remote and flow controlled input */
#define        PF_NOSTOP        0x40
#define PF_UCNTL        0x80                /* user control mode */

void        ptcwakeup(struct tty *, int);
void        ptsstart(struct tty *);
int        pty_maxptys(int, int);

static struct pt_softc **ptyarralloc(int);

dev_type_open(ptcopen);
dev_type_close(ptcclose);
dev_type_read(ptcread);
dev_type_write(ptcwrite);
dev_type_poll(ptcpoll);
dev_type_kqfilter(ptckqfilter);

dev_type_open(ptsopen);
dev_type_close(ptsclose);
dev_type_read(ptsread);
dev_type_write(ptswrite);
dev_type_stop(ptsstop);
dev_type_poll(ptspoll);

dev_type_ioctl(ptyioctl);
dev_type_tty(ptytty);

const struct cdevsw ptc_cdevsw = {
        .d_open = ptcopen,
        .d_close = ptcclose,
        .d_read = ptcread,
        .d_write = ptcwrite,
        .d_ioctl = ptyioctl,
        .d_stop = nullstop,
        .d_tty = ptytty,
        .d_poll = ptcpoll,
        .d_mmap = nommap,
        .d_kqfilter = ptckqfilter,
        .d_discard = nodiscard,
        .d_flag = D_TTY
};

const struct cdevsw pts_cdevsw = {
        .d_open = ptsopen,
        .d_close = ptsclose,
        .d_read = ptsread,
        .d_write = ptswrite,
        .d_ioctl = ptyioctl,
        .d_stop = ptsstop,
        .d_tty = ptytty,
        .d_poll = ptspoll,
        .d_mmap = nommap,
        .d_kqfilter = ttykqfilter,
        .d_discard = nodiscard,
        .d_flag = D_TTY
};

#if defined(pmax)
/*
 * Used by arch/pmax/conf/majors.pmax, which needs a second copy as it
 * needs to map this stuff to two pairs of majors.
 */

const struct cdevsw ptc_ultrix_cdevsw = {
        .d_open = ptcopen,
        .d_close = ptcclose,
        .d_read = ptcread,
        .d_write = ptcwrite,
        .d_ioctl = ptyioctl,
        .d_stop = nullstop,
        .d_tty = ptytty,
        .d_poll = ptcpoll,
        .d_mmap = nommap,
        .d_kqfilter = ptckqfilter,
        .d_discard = nodiscard,
        .d_flag = D_TTY
};

const struct cdevsw pts_ultrix_cdevsw = {
        .d_open = ptsopen,
        .d_close = ptsclose,
        .d_read = ptsread,
        .d_write = ptswrite,
        .d_ioctl = ptyioctl,
        .d_stop = ptsstop,
        .d_tty = ptytty,
        .d_poll = ptspoll,
        .d_mmap = nommap,
        .d_kqfilter = ttykqfilter,
        .d_discard = nodiscard,
        .d_flag = D_TTY
};
#endif /* defined(pmax) */

/*
 * Check if a pty is free to use.
 */
int
pty_isfree(int minor, int lock)
{
        struct pt_softc *pt = pt_softc[minor];
        if (lock)
                mutex_enter(&pt_softc_mutex);
        minor = pt == NULL || pt->pt_tty == NULL ||
            pt->pt_tty->t_oproc == NULL;
        if (lock)
                mutex_exit(&pt_softc_mutex);
        return minor;
}

/*
 * Allocate and zero array of nelem elements.
 */
static struct pt_softc **
ptyarralloc(int nelem)
{
        struct pt_softc **pt;
        nelem += 10;
        pt = kmem_zalloc(nelem * sizeof(*pt), KM_SLEEP);
        return pt;
}

static void
ptyarrfree(struct pt_softc **pt, int nelem)
{

        nelem += 10;
        kmem_free(pt, nelem * sizeof(*pt));
}

/*
 * Check if the minor is correct and ensure necessary structures
 * are properly allocated.
 */
int
pty_check(int ptn)
{
        struct pt_softc *pti;

        if (ptn >= npty) {
                struct pt_softc **newpt, **oldpt;
                int newnpty;
                int oldnpty;

                /* check if the requested pty can be granted */
                if (ptn >= maxptys) {
            limit_reached:
                        tablefull("pty", "increase kern.maxptys");
                        return ENXIO;
                }

                /* Allocate a larger pty array */
                for (newnpty = npty; newnpty <= ptn;)
                        newnpty *= 2;
                if (newnpty > maxptys)
                        newnpty = maxptys;
                newpt = ptyarralloc(newnpty);

                /*
                 * Now grab the pty array mutex - we need to ensure
                 * that the pty array is consistent while copying its
                 * content to newly allocated, larger space; we also
                 * need to be safe against pty_maxptys().
                 */
                mutex_enter(&pt_softc_mutex);

                if (newnpty >= maxptys) {
                        /* limit cut away beneath us... */
                        if (ptn >= maxptys) {
                                mutex_exit(&pt_softc_mutex);
                                ptyarrfree(newpt, newnpty);
                                goto limit_reached;
                        }
                        newnpty = maxptys;
                }

                /*
                 * If the pty array was not enlarged while we were waiting
                 * for mutex, copy current contents of pt_softc[] to newly
                 * allocated array and start using the new bigger array.
                 */
                if (newnpty > npty) {
                        memcpy(newpt, pt_softc, npty*sizeof(struct pt_softc *));
                        oldpt = pt_softc;
                        oldnpty = npty;
                        pt_softc = newpt;
                        npty = newnpty;
                } else {
                        /* was enlarged when waited for lock, free new space */
                        oldpt = newpt;
                        oldnpty = newnpty;
                }

                mutex_exit(&pt_softc_mutex);
                ptyarrfree(oldpt, oldnpty);
        }

        /*
         * If the entry is not yet allocated, allocate one. The mutex is
         * needed so that the state of pt_softc[] array is consistant
         * in case it has been lengthened above.
         */
        if (!pt_softc[ptn]) {
                pti = kmem_zalloc(sizeof(*pti), KM_SLEEP);

                selinit(&pti->pt_selr);
                selinit(&pti->pt_selw);
                pti->pt_tty = tty_alloc();

                mutex_enter(&pt_softc_mutex);

                /*
                 * Check the entry again - it might have been
                 * added while we were waiting for mutex.
                 */
                if (pt_softc[ptn]) {
                        mutex_exit(&pt_softc_mutex);
                        tty_free(pti->pt_tty);
                        seldestroy(&pti->pt_selr);
                        seldestroy(&pti->pt_selw);
                        kmem_free(pti, sizeof(*pti));
                        return 0;
                }
                tty_attach(pti->pt_tty);
                pt_softc[ptn] = pti;

                mutex_exit(&pt_softc_mutex);
        }

        return 0;
}

/*
 * Set maxpty in thread-safe way. Returns 0 in case of error, otherwise
 * new value of maxptys.
 */
int
pty_maxptys(int newmax, int set)
{
        if (!set)
                return maxptys;

        /*
         * We have to grab the pt_softc lock, so that we would pick correct
         * value of npty (might be modified in pty_check()).
         */
        mutex_enter(&pt_softc_mutex);

        /*
         * The value cannot be set to value lower than the highest pty
         * number ever allocated.
         */
        if (newmax >= npty)
                maxptys = newmax;
        else
                newmax = 0;

        mutex_exit(&pt_softc_mutex);

        return newmax;
}

/*
 * Establish n (or default if n is 1) ptys in the system.
 */
void
ptyattach(int n)
{

        mutex_init(&pt_softc_mutex, MUTEX_DEFAULT, IPL_NONE);

        /* maybe should allow 0 => none? */
        if (n <= 1)
                n = DEFAULT_NPTYS;
        pt_softc = ptyarralloc(n);
        npty = n;
#ifndef NO_DEV_PTM
        ptmattach(1);
#endif
}

/*ARGSUSED*/
int
ptsopen(dev_t dev, int flag, int devtype, struct lwp *l)
{
        struct pt_softc *pti;
        struct tty *tp;
        int error;
        int ptn = minor(dev);

        if ((error = pty_check(ptn)) != 0)
                return error;

        mutex_spin_enter(&tty_lock);
        pti = pt_softc[ptn];
        tp = pti->pt_tty;
        if (!ISSET(tp->t_state, TS_ISOPEN)) {
                tp->t_dev = dev;
                ttychars(tp);                /* Set up default chars */
                tp->t_iflag = TTYDEF_IFLAG;
                tp->t_oflag = TTYDEF_OFLAG;
                tp->t_lflag = TTYDEF_LFLAG;
                tp->t_cflag = TTYDEF_CFLAG;
                tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED;
                ttsetwater(tp);                /* would be done in xxparam() */
        } else if (kauth_authorize_device_tty(l->l_cred, KAUTH_DEVICE_TTY_OPEN,
            tp) != 0) {
                mutex_spin_exit(&tty_lock);
                return EBUSY;
        }
        if (tp->t_oproc)                        /* Ctrlr still around. */
                SET(tp->t_state, TS_CARR_ON);
        if (!ISSET(flag, O_NONBLOCK)) {
                while (!ISSET(tp->t_state, TS_CARR_ON)) {
                        tp->t_wopen++;
                        error = ttysleep(tp, &tp->t_rawcv, true, 0);
                        tp->t_wopen--;
                        if (error != 0) {
                                mutex_spin_exit(&tty_lock);
                                return error;
                        }
                }
        }
        mutex_spin_exit(&tty_lock);
        error = (*tp->t_linesw->l_open)(dev, tp);
        ptcwakeup(tp, FREAD|FWRITE);
        return error;
}

int
ptsclose(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct pt_softc *pti = pt_softc[minor(dev)];
        struct tty *tp = pti->pt_tty;
        int error;

        error = (*tp->t_linesw->l_close)(tp, flag);
        error |= ttyclose(tp);
        ptcwakeup(tp, FREAD|FWRITE);
        return error;
}

int
ptsread(dev_t dev, struct uio *uio, int flag)
{
        struct proc *p = curproc;
        struct pt_softc *pti = pt_softc[minor(dev)];
        struct tty *tp = pti->pt_tty;
        int error = 0;
        int cc, c;

again:
        if (pti->pt_flags & PF_REMOTE) {
                mutex_spin_enter(&tty_lock);
                while (isbackground(p, tp)) {        /* XXXSMP */
                        if (sigismasked(curlwp, SIGTTIN) ||
                            p->p_pgrp->pg_jobc == 0 ||
                            p->p_lflag & PL_PPWAIT) {
                                mutex_spin_exit(&tty_lock);
                                return EIO;
                        }
                        ttysig(tp, TTYSIG_PG1, SIGTTIN);
                        error = ttypause(tp, hz);
                        if (error != 0) {
                                mutex_spin_exit(&tty_lock);
                                return error;
                        }
                }
                if (tp->t_canq.c_cc == 0) {
                        if (flag & IO_NDELAY) {
                                mutex_spin_exit(&tty_lock);
                                return EWOULDBLOCK;
                        }
                        error = ttysleep(tp, &tp->t_cancv, true, 0);
                        mutex_spin_exit(&tty_lock);
                        if (error != 0)
                                return error;
                        goto again;
                }
                while(error == 0 && tp->t_canq.c_cc > 1 && uio->uio_resid > 0) {
                        c = getc(&tp->t_canq);
                        mutex_spin_exit(&tty_lock);
                        error = ureadc(c, uio);
                        mutex_spin_enter(&tty_lock);
                        /* Re-check terminal state here? */
                }
                if (tp->t_canq.c_cc == 1)
                        (void) getc(&tp->t_canq);
                cc = tp->t_canq.c_cc;
                mutex_spin_exit(&tty_lock);
                if (cc)
                        return error;
        } else if (tp->t_oproc)
                error = (*tp->t_linesw->l_read)(tp, uio, flag);
        ptcwakeup(tp, FWRITE);
        return error;
}

/*
 * Write to pseudo-tty.
 * Wakeups of controlling tty will happen
 * indirectly, when tty driver calls ptsstart.
 */
int
ptswrite(dev_t dev, struct uio *uio, int flag)
{
        struct pt_softc *pti = pt_softc[minor(dev)];
        struct tty *tp = pti->pt_tty;

        if (tp->t_oproc == NULL)
                return EIO;
        return (*tp->t_linesw->l_write)(tp, uio, flag);
}

/*
 * Poll pseudo-tty.
 */
int
ptspoll(dev_t dev, int events, struct lwp *l)
{
        struct pt_softc *pti = pt_softc[minor(dev)];
        struct tty *tp = pti->pt_tty;

        if (tp->t_oproc == NULL)
                return POLLHUP;

        return (*tp->t_linesw->l_poll)(tp, events, l);
}

/*
 * Start output on pseudo-tty.
 * Wake up process polling or sleeping for input from controlling tty.
 */
void
ptsstart(struct tty *tp)
{
        struct pt_softc *pti;

        KASSERT(tp->t_dev != NODEV);
        pti = pt_softc[minor(tp->t_dev)];

        KASSERT(mutex_owned(&tty_lock));

        if (ISSET(tp->t_state, TS_TTSTOP))
                return;
        if (pti->pt_flags & PF_STOPPED) {
                pti->pt_flags &= ~PF_STOPPED;
                pti->pt_send = TIOCPKT_START;
        }

        selnotify(&pti->pt_selr, 0, NOTE_SUBMIT);
        cv_broadcast(&tp->t_outcvf);
}

/*
 * Stop output.
 */
void
ptsstop(struct tty *tp, int flush)
{
        struct pt_softc *pti;

        KASSERT(tp->t_dev != NODEV);
        pti = pt_softc[minor(tp->t_dev)];

        KASSERT(mutex_owned(&tty_lock));

        /* note: FLUSHREAD and FLUSHWRITE already ok */
        CTASSERT(TIOCPKT_FLUSHREAD == FREAD);
        CTASSERT(TIOCPKT_FLUSHWRITE == FWRITE);
        if (flush == 0) {
                flush = TIOCPKT_STOP;
                pti->pt_flags |= PF_STOPPED;
        } else
                pti->pt_flags &= ~PF_STOPPED;
        pti->pt_send |= flush;

        /* change of perspective */
        if (flush & FREAD) {
                selnotify(&pti->pt_selw, 0, NOTE_SUBMIT);
                cv_broadcast(&tp->t_rawcvf);
        }
        if (flush & FWRITE) {
                selnotify(&pti->pt_selr, 0, NOTE_SUBMIT);
                cv_broadcast(&tp->t_outcvf);
        }
}

void
ptcwakeup(struct tty *tp, int flag)
{
        struct pt_softc *pti;

        if (tp->t_dev == NODEV)
                return;        /* client side not open yet */

        pti = pt_softc[minor(tp->t_dev)];
        KASSERT(pti != NULL);

        mutex_spin_enter(&tty_lock);
        if (flag & FREAD) {
                selnotify(&pti->pt_selr, 0, NOTE_SUBMIT);
                cv_broadcast(&tp->t_outcvf);
        }
        if (flag & FWRITE) {
                selnotify(&pti->pt_selw, 0, NOTE_SUBMIT);
                cv_broadcast(&tp->t_rawcvf);
        }
        mutex_spin_exit(&tty_lock);
}

/*ARGSUSED*/
int
ptcopen(dev_t dev, int flag, int devtype, struct lwp *l)
{
        struct pt_softc *pti;
        struct tty *tp;
        int error;
        int ptn = minor(dev);

        if ((error = pty_check(ptn)) != 0)
                return error;

        pti = pt_softc[ptn];
        tp = pti->pt_tty;

        mutex_spin_enter(&tty_lock);
        if (tp->t_oproc) {
                mutex_spin_exit(&tty_lock);
                return EIO;
        }
        tp->t_dev = dev;
        tp->t_oproc = ptsstart;
        mutex_spin_exit(&tty_lock);
        (void)(*tp->t_linesw->l_modem)(tp, 1);
        CLR(tp->t_lflag, EXTPROC);
        pti->pt_flags = 0;
        pti->pt_send = 0;
        pti->pt_ucntl = 0;
        return 0;
}

/*ARGSUSED*/
int
ptcclose(dev_t dev, int flag, int devtype, struct lwp *l)
{
        struct pt_softc *pti = pt_softc[minor(dev)];
        struct tty *tp = pti->pt_tty;

        (void)(*tp->t_linesw->l_modem)(tp, 0);
        mutex_spin_enter(&tty_lock);
        CLR(tp->t_state, TS_CARR_ON);
        tp->t_oproc = NULL;                /* mark closed */
        mutex_spin_exit(&tty_lock);
        return 0;
}

int
ptcread(dev_t dev, struct uio *uio, int flag)
{
        struct pt_softc *pti = pt_softc[minor(dev)];
        struct tty *tp = pti->pt_tty;
        u_char bf[BUFSIZ];
        int error = 0, cc;
        int c;

        if (uio->uio_resid <= 0)
                return EINVAL;

        /*
         * We want to block until the slave
         * is open, and there's something to read;
         * but if we lost the slave or we're NBIO,
         * then return the appropriate error instead.
         */
        mutex_spin_enter(&tty_lock);
        for (;;) {
                if (ISSET(tp->t_state, TS_ISOPEN)) {
                        if (pti->pt_flags & PF_PKT && (c = pti->pt_send)) {
                                pti->pt_send = 0;
                                mutex_spin_exit(&tty_lock);
                                error = ureadc(c, uio);
                                if (error != 0)
                                        return error;
                                /*
                                 * Since we don't have the tty locked, there's
                                 * a risk of messing up `t_termios'. This is
                                 * relevant only if the tty got closed and then
                                 * opened again while we were out uiomoving.
                                 */
                                if (c & TIOCPKT_IOCTL) {
                                        cc = uimin(uio->uio_resid,
                                                sizeof(tp->t_termios));
                                        uiomove((void *) &tp->t_termios,
                                                cc, uio);
                                }
                                return 0;
                        }
                        if (pti->pt_flags & PF_UCNTL && (c = pti->pt_ucntl)) {
                                pti->pt_ucntl = 0;
                                mutex_spin_exit(&tty_lock);
                                error = ureadc(c, uio);
                                if (error != 0)
                                        return error;
                                return 0;
                        }
                        if (tp->t_outq.c_cc && !ISSET(tp->t_state, TS_TTSTOP))
                                break;
                }
                if (!ISSET(tp->t_state, TS_CARR_ON)) {
                        error = 0;        /* EOF */
                        goto out;
                }
                if (flag & IO_NDELAY) {
                        error = EWOULDBLOCK;
                        goto out;
                }
                error = cv_wait_sig(&tp->t_outcvf, &tty_lock);
                if (error != 0)
                        goto out;
        }

        if (pti->pt_flags & (PF_PKT|PF_UCNTL)) {
                mutex_spin_exit(&tty_lock);
                error = ureadc(0, uio);
                mutex_spin_enter(&tty_lock);
                if (error == 0 && !ISSET(tp->t_state, TS_ISOPEN))
                        error = EIO;
        }
        while (uio->uio_resid > 0 && error == 0) {
                cc = q_to_b(&tp->t_outq, bf, uimin(uio->uio_resid, BUFSIZ));
                if (cc <= 0)
                        break;
                mutex_spin_exit(&tty_lock);
                error = uiomove(bf, cc, uio);
                mutex_spin_enter(&tty_lock);
                if (error == 0 && !ISSET(tp->t_state, TS_ISOPEN))
                        error = EIO;
        }
        ttypull(tp);
out:
        mutex_spin_exit(&tty_lock);
        return error;
}


int
ptcwrite(dev_t dev, struct uio *uio, int flag)
{
        struct pt_softc *pti = pt_softc[minor(dev)];
        struct tty *tp = pti->pt_tty;
        u_char *cp = NULL;
        int cc = 0;
        u_char locbuf[BUFSIZ];
        int cnt = 0;
        int error = 0;

again:
        mutex_spin_enter(&tty_lock);
        if (!ISSET(tp->t_state, TS_ISOPEN))
                goto block;
        if (pti->pt_flags & PF_REMOTE) {
                if (tp->t_canq.c_cc)
                        goto block;
                while (uio->uio_resid > 0 && tp->t_canq.c_cc < TTYHOG) {
                        if (cc == 0) {
                                cc = uimin(uio->uio_resid, BUFSIZ);
                                cc = uimin(cc, TTYHOG - tp->t_canq.c_cc);
                                cp = locbuf;
                                mutex_spin_exit(&tty_lock);
                                error = uiomove(cp, cc, uio);
                                if (error != 0)
                                        return error;
                                mutex_spin_enter(&tty_lock);
                                /* check again for safety */
                                if (!ISSET(tp->t_state, TS_ISOPEN)) {
                                        /*
                                         * adjust for data copied in but not
                                         * written
                                         */
                                        uio->uio_resid += cc;
                                        error = EIO;
                                        goto out;
                                }
                        }
                        if (cc) {
                                cc = b_to_q(cp, cc, &tp->t_outq);
                                if (cc > 0)
                                        goto block;
                        }
                }
                (void) putc(0, &tp->t_canq);
                ttwakeup(tp);
                cv_broadcast(&tp->t_cancv);
                error = 0;
                goto out;
        }
        while (uio->uio_resid > 0) {
                if (cc == 0) {
                        cc = uimin(uio->uio_resid, BUFSIZ);
                        cp = locbuf;
                        mutex_spin_exit(&tty_lock);
                        error = uiomove(cp, cc, uio);
                        if (error != 0)
                                return error;
                        mutex_spin_enter(&tty_lock);
                        /* check again for safety */
                        if (!ISSET(tp->t_state, TS_ISOPEN)) {
                                /* adjust for data copied in but not written */
                                uio->uio_resid += cc;
                                error = EIO;
                                goto out;
                        }
                }
                while (cc > 0) {
                        int used = tp->t_rawq.c_cc + tp->t_canq.c_cc;
                        int canon = ISSET(tp->t_lflag, ICANON) ? 1 : 0;
                        /*
                         * We need space for 2 characters if canonical
                         * because we might need to print ^C
                         */
                        if (used >= (TTYHOG - canon) &&
                           (tp->t_canq.c_cc > 0 || !canon)) {
                                cv_broadcast(&tp->t_rawcv);
                                goto block;
                        }
                        /*
                         * XXX - should change l_rint to be called with lock
                         *         see also tty.c:ttyinput_wlock()
                         */
                        mutex_spin_exit(&tty_lock);
                        (*tp->t_linesw->l_rint)(*cp++, tp);
                        mutex_spin_enter(&tty_lock);
                        cnt++;
                        cc--;
                }
        }
        error = 0;
        goto out;

block:
        /*
         * Come here to wait for slave to open, for space
         * in outq, or space in rawq.
         */
        if (!ISSET(tp->t_state, TS_CARR_ON)) {
                /* adjust for data copied in but not written */
                uio->uio_resid += cc;
                error = EIO;
                goto out;
        }
        if (flag & IO_NDELAY) {
                /* adjust for data copied in but not written */
                uio->uio_resid += cc;
                error = cnt == 0 ? EWOULDBLOCK : 0;
                goto out;
        }
        error = cv_wait_sig(&tp->t_rawcvf, &tty_lock);
        mutex_spin_exit(&tty_lock);
        if (error != 0) {
                /* adjust for data copied in but not written */
                uio->uio_resid += cc;
                return error;
        }
        goto again;

out:
        mutex_spin_exit(&tty_lock);
        return error;
}

int
ptcpoll(dev_t dev, int events, struct lwp *l)
{
        struct pt_softc *pti = pt_softc[minor(dev)];
        struct tty *tp = pti->pt_tty;
        int revents = 0;

        mutex_spin_enter(&tty_lock);

        if (events & (POLLIN | POLLRDNORM))
                if (ISSET(tp->t_state, TS_ISOPEN) &&
                    ((tp->t_outq.c_cc > 0 && !ISSET(tp->t_state, TS_TTSTOP)) ||
                     ((pti->pt_flags & PF_PKT) && pti->pt_send) ||
                     ((pti->pt_flags & PF_UCNTL) && pti->pt_ucntl)))
                        revents |= events & (POLLIN | POLLRDNORM);

        if (events & (POLLOUT | POLLWRNORM))
                if (ISSET(tp->t_state, TS_ISOPEN) &&
                    ((pti->pt_flags & PF_REMOTE) ?
                     (tp->t_canq.c_cc == 0) :
                     ((tp->t_rawq.c_cc + tp->t_canq.c_cc < TTYHOG-2) ||
                      (tp->t_canq.c_cc == 0 && ISSET(tp->t_lflag, ICANON)))))
                        revents |= events & (POLLOUT | POLLWRNORM);

        if (events & POLLHUP)
                if (!ISSET(tp->t_state, TS_CARR_ON))
                        revents |= POLLHUP;

        if (revents == 0) {
                if (events & (POLLIN | POLLHUP | POLLRDNORM))
                        selrecord(l, &pti->pt_selr);

                if (events & (POLLOUT | POLLWRNORM))
                        selrecord(l, &pti->pt_selw);
        }

        mutex_spin_exit(&tty_lock);

        return revents;
}

static void
filt_ptcrdetach(struct knote *kn)
{
        struct pt_softc *pti;

        pti = kn->kn_hook;

        mutex_spin_enter(&tty_lock);
        selremove_knote(&pti->pt_selr, kn);
        mutex_spin_exit(&tty_lock);
}

static int
filt_ptcread(struct knote *kn, long hint)
{
        struct pt_softc *pti;
        struct tty        *tp;
        int canread;

        pti = kn->kn_hook;
        tp = pti->pt_tty;

        if ((hint & NOTE_SUBMIT) == 0) {
                mutex_spin_enter(&tty_lock);
        }

        canread = (ISSET(tp->t_state, TS_ISOPEN) &&
                    ((tp->t_outq.c_cc > 0 && !ISSET(tp->t_state, TS_TTSTOP)) ||
                     ((pti->pt_flags & PF_PKT) && pti->pt_send) ||
                     ((pti->pt_flags & PF_UCNTL) && pti->pt_ucntl)));

        if (canread) {
                /*
                 * c_cc is number of characters after output post-processing;
                 * the amount of data actually read(2) depends on
                 * setting of input flags for the terminal.
                 */
                kn->kn_data = tp->t_outq.c_cc;
                if (((pti->pt_flags & PF_PKT) && pti->pt_send) ||
                    ((pti->pt_flags & PF_UCNTL) && pti->pt_ucntl))
                        kn->kn_data++;
        }
        if (!ISSET(tp->t_state, TS_CARR_ON)) {
                knote_set_eof(kn, 0);
                canread = 1;
        }

        if ((hint & NOTE_SUBMIT) == 0) {
                mutex_spin_exit(&tty_lock);
        }

        return canread;
}

static void
filt_ptcwdetach(struct knote *kn)
{
        struct pt_softc *pti;

        pti = kn->kn_hook;

        mutex_spin_enter(&tty_lock);
        selremove_knote(&pti->pt_selw, kn);
        mutex_spin_exit(&tty_lock);
}

static int
filt_ptcwrite(struct knote *kn, long hint)
{
        struct pt_softc *pti;
        struct tty        *tp;
        int canwrite;
        int nwrite;

        pti = kn->kn_hook;
        tp = pti->pt_tty;

        if ((hint & NOTE_SUBMIT) == 0) {
                mutex_spin_enter(&tty_lock);
        }

        canwrite = (ISSET(tp->t_state, TS_ISOPEN) &&
                    ((pti->pt_flags & PF_REMOTE) ?
                     (tp->t_canq.c_cc == 0) :
                     ((tp->t_rawq.c_cc + tp->t_canq.c_cc < TTYHOG-2) ||
                      (tp->t_canq.c_cc == 0 && ISSET(tp->t_lflag, ICANON)))));

        if (canwrite) {
                if (pti->pt_flags & PF_REMOTE)
                        nwrite = tp->t_canq.c_cn;
                else {
                        /* this is guaranteed to be > 0 due to above check */
                        nwrite = tp->t_canq.c_cn
                                - (tp->t_rawq.c_cc + tp->t_canq.c_cc);
                }
                kn->kn_data = nwrite;
        }

        if ((hint & NOTE_SUBMIT) == 0) {
                mutex_spin_exit(&tty_lock);
        }

        return canwrite;
}

static const struct filterops ptcread_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_ptcrdetach,
        .f_event = filt_ptcread,
};

static const struct filterops ptcwrite_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_ptcwdetach,
        .f_event = filt_ptcwrite,
};

int
ptckqfilter(dev_t dev, struct knote *kn)
{
        struct pt_softc *pti = pt_softc[minor(dev)];
        struct selinfo        *sip;

        switch (kn->kn_filter) {
        case EVFILT_READ:
                sip = &pti->pt_selr;
                kn->kn_fop = &ptcread_filtops;
                break;
        case EVFILT_WRITE:
                sip = &pti->pt_selw;
                kn->kn_fop = &ptcwrite_filtops;
                break;
        default:
                return EINVAL;
        }

        kn->kn_hook = pti;

        mutex_spin_enter(&tty_lock);
        selrecord_knote(sip, kn);
        mutex_spin_exit(&tty_lock);

        return 0;
}

struct tty *
ptytty(dev_t dev)
{
        struct pt_softc *pti = pt_softc[minor(dev)];
        struct tty *tp = pti->pt_tty;

        return tp;
}

/*ARGSUSED*/
int
ptyioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        struct pt_softc *pti = pt_softc[minor(dev)];
        struct tty *tp = pti->pt_tty;
        const struct cdevsw *cdev;
        u_char *cc = tp->t_cc;
        int stop, error, sig;
#ifndef NO_DEV_PTM
        struct mount *mp;
#endif

        /*
         * IF CONTROLLER STTY THEN MUST FLUSH TO PREVENT A HANG.
         * ttywflush(tp) will hang if there are characters in the outq.
         */
        if (cmd == TIOCEXT) {
                /*
                 * When the EXTPROC bit is being toggled, we need
                 * to send an TIOCPKT_IOCTL if the packet driver
                 * is turned on.
                 */
                if (*(int *)data) {
                        if (pti->pt_flags & PF_PKT) {
                                pti->pt_send |= TIOCPKT_IOCTL;
                                ptcwakeup(tp, FREAD);
                        }
                        SET(tp->t_lflag, EXTPROC);
                } else {
                        if (ISSET(tp->t_lflag, EXTPROC) &&
                            (pti->pt_flags & PF_PKT)) {
                                pti->pt_send |= TIOCPKT_IOCTL;
                                ptcwakeup(tp, FREAD);
                        }
                        CLR(tp->t_lflag, EXTPROC);
                }
                return(0);
        }

#ifndef NO_DEV_PTM
        /* Allow getting the name from either the master or the slave */
        if (cmd == TIOCPTSNAME) {
                if ((error = pty_getmp(l, &mp)) != 0)
                        return error;
                return pty_fill_ptmget(l, dev, -1, -1, data, mp);
        }
#endif

        cdev = cdevsw_lookup(dev);
        if (cdev != NULL && cdev->d_open == ptcopen)
                switch (cmd) {
#ifndef NO_DEV_PTM
                case TIOCGRANTPT:
                        if ((error = pty_getmp(l, &mp)) != 0)
                                return error;
                        return pty_grant_slave(l, dev, mp);
#endif

                case TIOCGPGRP:
                        /*
                         * We avoid calling ttioctl on the controller since,
                         * in that case, tp must be the controlling terminal.
                         */
                        *(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : 0;
                        return 0;

                case TIOCPKT:
                        if (*(int *)data) {
                                if (pti->pt_flags & PF_UCNTL)
                                        return EINVAL;
                                pti->pt_flags |= PF_PKT;
                        } else
                                pti->pt_flags &= ~PF_PKT;
                        return 0;

                case TIOCUCNTL:
                        if (*(int *)data) {
                                if (pti->pt_flags & PF_PKT)
                                        return EINVAL;
                                pti->pt_flags |= PF_UCNTL;
                        } else
                                pti->pt_flags &= ~PF_UCNTL;
                        return 0;

                case TIOCREMOTE:
                        if (*(int *)data)
                                pti->pt_flags |= PF_REMOTE;
                        else
                                pti->pt_flags &= ~PF_REMOTE;
                        mutex_spin_enter(&tty_lock);
                        ttyflush(tp, FREAD|FWRITE);
                        mutex_spin_exit(&tty_lock);
                        return 0;

                case TIOCSETP:
                case TIOCSETN:
                case TIOCSETD:
                case TIOCSETA:
                case TIOCSETAW:
                case TIOCSETAF:
                        mutex_spin_enter(&tty_lock);
                        ndflush(&tp->t_outq, tp->t_outq.c_cc);
                        mutex_spin_exit(&tty_lock);
                        break;

                case TIOCSIG:
                        sig = (int)(long)*(void **)data;
                        if (sig <= 0 || sig >= NSIG)
                                return EINVAL;
                        mutex_spin_enter(&tty_lock);
                        if (!ISSET(tp->t_lflag, NOFLSH))
                                ttyflush(tp, FREAD|FWRITE);
                        tp->t_state |= TS_SIGINFO;
                        ttysig(tp, TTYSIG_PG1, sig);
                        mutex_spin_exit(&tty_lock);
                        return 0;

                case FIONREAD:
                        mutex_spin_enter(&tty_lock);
                        *(int *)data = tp->t_outq.c_cc;
                        mutex_spin_exit(&tty_lock);
                        return 0;
                }

        error = (*tp->t_linesw->l_ioctl)(tp, cmd, data, flag, l);
        if (error == EPASSTHROUGH)
                 error = ttioctl(tp, cmd, data, flag, l);
        if (error == EPASSTHROUGH) {
                if (pti->pt_flags & PF_UCNTL &&
                    (cmd & ~0xff) == UIOCCMD(0)) {
                        if (cmd & 0xff) {
                                pti->pt_ucntl = (u_char)cmd;
                                ptcwakeup(tp, FREAD);
                        }
                        return 0;
                }
        }
        /*
         * If external processing and packet mode send ioctl packet.
         */
        if (ISSET(tp->t_lflag, EXTPROC) && (pti->pt_flags & PF_PKT)) {
                switch(cmd) {
                case TIOCSETA:
                case TIOCSETAW:
                case TIOCSETAF:
                case TIOCSETP:
                case TIOCSETN:
                case TIOCSETC:
                case TIOCSLTC:
                case TIOCLBIS:
                case TIOCLBIC:
                case TIOCLSET:
                        pti->pt_send |= TIOCPKT_IOCTL;
                        ptcwakeup(tp, FREAD);
                default:
                        break;
                }
        }
        stop = ISSET(tp->t_iflag, IXON) && CCEQ(cc[VSTOP], CTRL('s'))
                && CCEQ(cc[VSTART], CTRL('q'));
        if (pti->pt_flags & PF_NOSTOP) {
                if (stop) {
                        pti->pt_send &= ~TIOCPKT_NOSTOP;
                        pti->pt_send |= TIOCPKT_DOSTOP;
                        pti->pt_flags &= ~PF_NOSTOP;
                        ptcwakeup(tp, FREAD);
                }
        } else {
                if (!stop) {
                        pti->pt_send &= ~TIOCPKT_DOSTOP;
                        pti->pt_send |= TIOCPKT_NOSTOP;
                        pti->pt_flags |= PF_NOSTOP;
                        ptcwakeup(tp, FREAD);
                }
        }
        return error;
}




















































    5 
























    5 



    4 





































    4 






























    4 










    1 











    1 


    1 

    1 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
/*-
 * Copyright (c) 2014 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * NPF network interface handling module.
 */

#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf_ifaddr.c,v 1.8 2022/02/13 19:20:11 riastradh Exp $");

#include <sys/param.h>
#include <sys/types.h>
#include <sys/kmem.h>

#include <net/if.h>
#include <netinet/in.h>
#include <netinet6/in6_var.h>
#endif

#include "npf_impl.h"

static npf_table_t *
lookup_ifnet_table(npf_t *npf, ifnet_t *ifp)
{
        const npf_ifops_t *ifops = npf->ifops;
        char tname[NPF_TABLE_MAXNAMELEN];
        const char *ifname;
        npf_config_t *nc;
        npf_table_t *t;
        unsigned tid;

        /* Get the interface name and prefix it. */
        ifname = ifops->getname(npf, ifp);
        snprintf(tname, sizeof(tname), ".ifnet-%s", ifname);

        KERNEL_LOCK(1, NULL);
        nc = npf_config_enter(npf);

        /*
         * Check whether this interface is of any interest to us.
         */
        t = npf_tableset_getbyname(nc->tableset, tname);
        if (!t) {
                goto out;
        }
        tid = npf_table_getid(t);

        /* Create a new NPF table for the interface. */
        t = npf_table_create(tname, tid, NPF_TABLE_IFADDR, NULL, 0);
        if (!t) {
                goto out;
        }
        return t;
out:
        npf_config_exit(npf);
        KERNEL_UNLOCK_ONE(NULL);
        return NULL;
}

static void
replace_ifnet_table(npf_t *npf, npf_table_t *newt)
{
        npf_tableset_t *ts = atomic_load_relaxed(&npf->config)->tableset;
        npf_table_t *oldt;

        KASSERT(npf_config_locked_p(npf));

        KERNEL_UNLOCK_ONE(NULL);

        /*
         * Finally, swap the tables and issue a sync barrier.
         */
        oldt = npf_tableset_swap(ts, newt);
        npf_config_sync(npf);
        npf_config_exit(npf);

        /* At this point, it is safe to destroy the old table. */
        npf_table_destroy(oldt);
}

void
npf_ifaddr_sync(npf_t *npf, ifnet_t *ifp)
{
        npf_table_t *t;
        struct ifaddr *ifa;

        /*
         * First, check whether this interface is of any interest to us.
         *
         * => Acquires npf-config-lock and kernel-lock on success.
         */
        t = lookup_ifnet_table(npf, ifp);
        if (!t)
                return;

        /*
         * Populate the table with the interface addresses.
         * Note: currently, this list is protected by the kernel-lock.
         */
        IFADDR_FOREACH(ifa, ifp) {
                struct sockaddr *sa = ifa->ifa_addr;
                const void *p = NULL;
                int alen = 0;

                if (sa->sa_family == AF_INET) {
                        const struct sockaddr_in *sin4 = satosin(sa);
                        alen = sizeof(struct in_addr);
                        p = &sin4->sin_addr;
                }
                if (sa->sa_family == AF_INET6) {
                        const struct sockaddr_in6 *sin6 = satosin6(sa);
                        alen = sizeof(struct in6_addr);
                        p = &sin6->sin6_addr;
                }
                if (alen) {
                        npf_addr_t addr;
                        memcpy(&addr, p, alen);
                        npf_table_insert(t, alen, &addr, NPF_NO_NETMASK);
                }
        }

        /* Publish the new table. */
        replace_ifnet_table(npf, t);
}

void
npf_ifaddr_flush(npf_t *npf, ifnet_t *ifp)
{
        npf_table_t *t;

        /*
         * Flush: just load an empty table.
         */
        t = lookup_ifnet_table(npf, ifp);
        if (!t) {
                return;
        }
        replace_ifnet_table(npf, t);
}

void
npf_ifaddr_syncall(npf_t *npf)
{
        ifnet_t *ifp;

        KERNEL_LOCK(1, NULL);
        IFNET_GLOBAL_LOCK();
        IFNET_WRITER_FOREACH(ifp) {
                npf_ifaddr_sync(npf, ifp);
        }
        IFNET_GLOBAL_UNLOCK();
        KERNEL_UNLOCK_ONE(NULL);
}







































































































































































    1 








    2 



    2 
    2 





    2 








    2 

    2 








    2 


    2 
    2 




















    2 







    2 




    2 








































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
/*        $NetBSD: rf_stripelocks.c,v 1.35 2021/07/23 00:54:45 oster Exp $        */
/*
 * Copyright (c) 1995 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Authors: Mark Holland, Jim Zelenka
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*
 * stripelocks.c -- code to lock stripes for read and write access
 *
 * The code distinguishes between read locks and write locks. There can be
 * as many readers to given stripe as desired. When a write request comes
 * in, no further readers are allowed to enter, and all subsequent requests
 * are queued in FIFO order. When a the number of readers goes to zero, the
 * writer is given the lock. When a writer releases the lock, the list of
 * queued requests is scanned, and all readersq up to the next writer are
 * given the lock.
 *
 * The lock table size must be one less than a power of two, but HASH_STRIPEID
 * is the only function that requires this.
 *
 * The code now supports "range locks". When you ask to lock a stripe, you
 * specify a range of addresses in that stripe that you want to lock. When
 * you acquire the lock, you've locked only this range of addresses, and
 * other threads can concurrently read/write any non-overlapping portions
 * of the stripe. The "addresses" that you lock are abstract in that you
 * can pass in anything you like.  The expectation is that you'll pass in
 * the range of physical disk offsets of the parity bits you're planning
 * to update. The idea behind this, of course, is to allow sub-stripe
 * locking. The implementation is perhaps not the best imaginable; in the
 * worst case a lock release is O(n^2) in the total number of outstanding
 * requests to a given stripe.  Note that if you're striping with a
 * stripe unit size equal to an entire disk (i.e. not striping), there will
 * be only one stripe and you may spend some significant number of cycles
 * searching through stripe lock descriptors.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: rf_stripelocks.c,v 1.35 2021/07/23 00:54:45 oster Exp $");

#include <dev/raidframe/raidframevar.h>

#include "rf_raid.h"
#include "rf_stripelocks.h"
#include "rf_alloclist.h"
#include "rf_debugprint.h"
#include "rf_general.h"
#include "rf_driver.h"
#include "rf_shutdown.h"

#ifdef DEBUG

#define Dprintf1(s,a)         rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
#define Dprintf2(s,a,b)       rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
#define Dprintf3(s,a,b,c)     rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL)
#define Dprintf4(s,a,b,c,d)   rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),NULL,NULL,NULL,NULL)
#define Dprintf5(s,a,b,c,d,e) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),NULL,NULL,NULL)
#define Dprintf6(s,a,b,c,d,e,f) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),NULL,NULL)
#define Dprintf7(s,a,b,c,d,e,f,g) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),NULL)
#define Dprintf8(s,a,b,c,d,e,f,g,h) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),(void *)((unsigned long)d),(void *)((unsigned long)e),(void *)((unsigned long)f),(void *)((unsigned long)g),(void *)((unsigned long)h))

#else /* DEBUG */

#define Dprintf1(s,a) {}
#define Dprintf2(s,a,b) {}
#define Dprintf3(s,a,b,c) {}
#define Dprintf4(s,a,b,c,d) {}
#define Dprintf5(s,a,b,c,d,e) {}
#define Dprintf6(s,a,b,c,d,e,f) {}
#define Dprintf7(s,a,b,c,d,e,f,g) {}
#define Dprintf8(s,a,b,c,d,e,f,g,h) {}

#endif /* DEBUG */

#define FLUSH

#define HASH_STRIPEID(_sid_)  ( (_sid_) & (rf_lockTableSize-1) )

static void AddToWaitersQueue(RF_StripeLockDesc_t * lockDesc,
                              RF_LockReqDesc_t * lockReqDesc);
static RF_StripeLockDesc_t *AllocStripeLockDesc(RF_Raid_t *raidPtr, RF_StripeNum_t stripeID);
static void FreeStripeLockDesc(RF_Raid_t *raidPtr, RF_StripeLockDesc_t * p);
static RF_LockTableEntry_t *rf_MakeLockTable(void);
#if RF_DEBUG_STRIPELOCK
static void PrintLockedStripes(RF_LockTableEntry_t * lockTable);
#endif

/* determines if two ranges overlap.  always yields false if either
   start value is negative */
#define SINGLE_RANGE_OVERLAP(_strt1, _stop1, _strt2, _stop2)              \
        ( (_strt1 >= 0) && (_strt2 >= 0) &&                               \
          (RF_MAX(_strt1, _strt2) <= RF_MIN(_stop1, _stop2)) )

/* determines if any of the ranges specified in the two lock
   descriptors overlap each other */

#define RANGE_OVERLAP(_cand, _pred)                                       \
  ( SINGLE_RANGE_OVERLAP((_cand)->start,  (_cand)->stop,                  \
                         (_pred)->start,  (_pred)->stop ) ||              \
    SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2,                 \
                         (_pred)->start,  (_pred)->stop ) ||              \
    SINGLE_RANGE_OVERLAP((_cand)->start,  (_cand)->stop,                  \
                         (_pred)->start2, (_pred)->stop2) ||              \
    SINGLE_RANGE_OVERLAP((_cand)->start2, (_cand)->stop2,                 \
                         (_pred)->start2, (_pred)->stop2) )

/* Determines if a candidate lock request conflicts with a predecessor
 * lock req.  Note that the arguments are not interchangeable.
 *
 * The rules are:
 *
 *      a candidate read conflicts with a predecessor write if any
 *      ranges overlap
 *
 *      a candidate write conflicts with a predecessor read if any
 *      ranges overlap
 *
 *      a candidate write conflicts with a predecessor write if any
 *      ranges overlap */

#define STRIPELOCK_CONFLICT(_cand, _pred)                                 \
        RANGE_OVERLAP((_cand), (_pred)) &&                                \
        ( ( (((_cand)->type == RF_IO_TYPE_READ) &&                        \
             ((_pred)->type == RF_IO_TYPE_WRITE)) ||                      \
            (((_cand)->type == RF_IO_TYPE_WRITE) &&                       \
             ((_pred)->type == RF_IO_TYPE_READ)) ||                       \
            (((_cand)->type == RF_IO_TYPE_WRITE) &&                       \
             ((_pred)->type == RF_IO_TYPE_WRITE))                         \
          )                                                               \
        )

#define RF_MAX_FREE_STRIPELOCK 128
#define RF_MIN_FREE_STRIPELOCK  32

static void rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable);
static void rf_ShutdownStripeLockFreeList(void *);
static void rf_RaidShutdownStripeLocks(void *);

static void
rf_ShutdownStripeLockFreeList(void *arg)
{
        RF_Raid_t *raidPtr;

        raidPtr = (RF_Raid_t *) arg;
        
        pool_destroy(&raidPtr->pools.stripelock);
}

int
rf_ConfigureStripeLockFreeList(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                               RF_Config_t *cfgPtr)
{
        unsigned mask;

        rf_pool_init(raidPtr, raidPtr->poolNames.stripelock, &raidPtr->pools.stripelock, sizeof(RF_StripeLockDesc_t),
                     "strplock", RF_MIN_FREE_STRIPELOCK, RF_MAX_FREE_STRIPELOCK);
        rf_ShutdownCreate(listp, rf_ShutdownStripeLockFreeList, raidPtr);

        for (mask = 0x1; mask; mask <<= 1)
                if (rf_lockTableSize == mask)
                        break;
        if (!mask) {
                printf("[WARNING:  lock table size must be a power of two.  Setting to %d.]\n", RF_DEFAULT_LOCK_TABLE_SIZE);
                rf_lockTableSize = RF_DEFAULT_LOCK_TABLE_SIZE;
        }
        return (0);
}

static void
rf_DestroyLockTable(RF_LockTableEntry_t *lockTable)
{
        int     i;

        for (i = 0; i < rf_lockTableSize; i++) {
                rf_destroy_mutex2(lockTable[i].mutex);
        }
        RF_Free(lockTable, rf_lockTableSize * sizeof(RF_LockTableEntry_t));
}

static RF_LockTableEntry_t *
rf_MakeLockTable(void)
{
        RF_LockTableEntry_t *lockTable;
        int     i;

        lockTable = RF_Malloc(rf_lockTableSize * sizeof(*lockTable));
        if (lockTable == NULL)
                return (NULL);
        for (i = 0; i < rf_lockTableSize; i++) {
                rf_init_mutex2(lockTable[i].mutex, IPL_VM);
        }
        return (lockTable);
}

static void
rf_ShutdownStripeLocks(RF_LockTableEntry_t * lockTable)
{

#if RF_DEBUG_STRIPELOCK
        if (rf_stripeLockDebug) {
                PrintLockedStripes(lockTable);
        }
#endif
        rf_DestroyLockTable(lockTable);
}

static void
rf_RaidShutdownStripeLocks(void *arg)
{
        RF_Raid_t *raidPtr = (RF_Raid_t *) arg;
        rf_ShutdownStripeLocks(raidPtr->lockTable);
}

int
rf_ConfigureStripeLocks(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
                        RF_Config_t *cfgPtr)
{

        raidPtr->lockTable = rf_MakeLockTable();
        if (raidPtr->lockTable == NULL)
                return (ENOMEM);
        rf_ShutdownCreate(listp, rf_RaidShutdownStripeLocks, raidPtr);

        return (0);
}
/* returns 0 if you've got the lock, and non-zero if you have to wait.
 * if and only if you have to wait, we'll cause cbFunc to get invoked
 * with cbArg when you are granted the lock.  We store a tag in
 * *releaseTag that you need to give back to us when you release the
 * lock.  */
int
rf_AcquireStripeLock(RF_Raid_t *raidPtr, RF_LockTableEntry_t *lockTable, RF_StripeNum_t stripeID,
                     RF_LockReqDesc_t *lockReqDesc)
{
        RF_StripeLockDesc_t *lockDesc;
        RF_StripeLockDesc_t *newlockDesc;
        RF_LockReqDesc_t *p;
#if defined(DEBUG) && (RF_DEBUG_STRIPELOCK > 0)
        int     tid = 0;
#endif
        int     hashval = HASH_STRIPEID(stripeID);
        int     retcode = 0;

        RF_ASSERT(RF_IO_IS_R_OR_W(lockReqDesc->type));

#if RF_DEBUG_STRIPELOCK
        if (rf_stripeLockDebug) {
                if (stripeID == -1) {
                        Dprintf1("[%d] Lock acquisition suppressed (stripeID == -1)\n", tid);
                } else {
                        Dprintf8("[%d] Trying to acquire stripe lock table 0x%lx SID %ld type %c range %ld-%ld, range2 %ld-%ld hashval %d\n",
                            tid, (unsigned long) lockTable, stripeID, lockReqDesc->type, lockReqDesc->start,
                            lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2);
                        Dprintf3("[%d] lock %ld hashval %d\n", tid, stripeID, hashval);
                        FLUSH;
                }
        }
#endif
        if (stripeID == -1)
                return (0);
        lockReqDesc->next = NULL;        /* just to be sure */
        newlockDesc = AllocStripeLockDesc(raidPtr, stripeID);

        rf_lock_mutex2(lockTable[hashval].mutex);
        for (lockDesc = lockTable[hashval].descList; lockDesc;
             lockDesc = lockDesc->next) {
                if (lockDesc->stripeID == stripeID)
                        break;
        }

        if (!lockDesc) {
                /* no entry in table => no one reading or writing */
                lockDesc = newlockDesc;
                lockDesc->next = lockTable[hashval].descList;
                lockTable[hashval].descList = lockDesc;
                if (lockReqDesc->type == RF_IO_TYPE_WRITE)
                        lockDesc->nWriters++;
                lockDesc->granted = lockReqDesc;
#if RF_DEBUG_STRIPELOCK
                if (rf_stripeLockDebug) {
                        Dprintf7("[%d] no one waiting: lock %ld %c %ld-%ld %ld-%ld granted\n",
                            tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2);
                        FLUSH;
                }
#endif
        } else {
                /* we won't be needing newlockDesc after all.. pity.. */
                FreeStripeLockDesc(raidPtr, newlockDesc);

                if (lockReqDesc->type == RF_IO_TYPE_WRITE)
                        lockDesc->nWriters++;

                if (lockDesc->nWriters == 0) {
                        /* no need to search any lists if there are no
                         * writers anywhere */
                        lockReqDesc->next = lockDesc->granted;
                        lockDesc->granted = lockReqDesc;
#if RF_DEBUG_STRIPELOCK
                        if (rf_stripeLockDebug) {
                                Dprintf7("[%d] no writers: lock %ld %c %ld-%ld %ld-%ld granted\n",
                                    tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2);
                                FLUSH;
                        }
#endif
                } else {

                        /* search the granted & waiting lists for a
                         * conflict.  stop searching as soon as we
                         * find one */
                        retcode = 0;
                        for (p = lockDesc->granted; p; p = p->next)
                                if (STRIPELOCK_CONFLICT(lockReqDesc, p)) {
                                        retcode = 1;
                                        break;
                                }
                        if (!retcode)
                                for (p = lockDesc->waitersH; p; p = p->next)
                                        if (STRIPELOCK_CONFLICT(lockReqDesc, p)) {
                                                retcode = 2;
                                                break;
                                        }
                        if (!retcode) {
                                /* no conflicts found => grant lock */
                                lockReqDesc->next = lockDesc->granted;
                                lockDesc->granted = lockReqDesc;
#if RF_DEBUG_STRIPELOCK
                                if (rf_stripeLockDebug) {
                                        Dprintf7("[%d] no conflicts: lock %ld %c %ld-%ld %ld-%ld granted\n",
                                            tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop,
                                            lockReqDesc->start2, lockReqDesc->stop2);
                                        FLUSH;
                                }
#endif
                        } else {
#if RF_DEBUG_STRIPELOCK
                                if (rf_stripeLockDebug) {
                                        Dprintf6("[%d] conflict: lock %ld %c %ld-%ld hashval=%d not granted\n",
                                            tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop,
                                            hashval);
                                        Dprintf3("[%d] lock %ld retcode=%d\n", tid, stripeID, retcode);
                                        FLUSH;
                                }
#endif
                                AddToWaitersQueue(lockDesc, lockReqDesc);
                                /* conflict => the current access must wait */
                        }
                }
        }

        rf_unlock_mutex2(lockTable[hashval].mutex);
        return (retcode);
}

void
rf_ReleaseStripeLock(RF_Raid_t *raidPtr, RF_LockTableEntry_t *lockTable, RF_StripeNum_t stripeID,
                     RF_LockReqDesc_t *lockReqDesc)
{
        RF_StripeLockDesc_t *lockDesc, *ld_t;
        RF_LockReqDesc_t *lr, *lr_t, *callbacklist, *t;
#if defined(DEBUG) && (RF_DEBUG_STRIPELOCK > 0)
        int     tid = 0;
#endif
        int     hashval = HASH_STRIPEID(stripeID);
        int     release_it, consider_it;
        RF_LockReqDesc_t *candidate, *candidate_t, *predecessor;

        RF_ASSERT(RF_IO_IS_R_OR_W(lockReqDesc->type));

#if RF_DEBUG_STRIPELOCK
        if (rf_stripeLockDebug) {
                if (stripeID == -1) {
                        Dprintf1("[%d] Lock release suppressed (stripeID == -1)\n", tid);
                } else {
                        Dprintf8("[%d] Releasing stripe lock on stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
                            tid, stripeID, lockReqDesc->type, lockReqDesc->start, lockReqDesc->stop, lockReqDesc->start2, lockReqDesc->stop2, lockTable);
                        FLUSH;
                }
        }
#endif
        if (stripeID == -1)
                return;

        rf_lock_mutex2(lockTable[hashval].mutex);

        /* find the stripe lock descriptor */
        for (ld_t = NULL, lockDesc = lockTable[hashval].descList;
             lockDesc; ld_t = lockDesc, lockDesc = lockDesc->next) {
                if (lockDesc->stripeID == stripeID)
                        break;
        }
        RF_ASSERT(lockDesc);        /* major error to release a lock that doesn't
                                 * exist */

        /* find the stripe lock request descriptor & delete it from the list */
        for (lr_t = NULL, lr = lockDesc->granted; lr; lr_t = lr, lr = lr->next)
                if (lr == lockReqDesc)
                        break;

        RF_ASSERT(lr && (lr == lockReqDesc));        /* major error to release a
                                                 * lock that hasn't been
                                                 * granted */
        if (lr_t)
                lr_t->next = lr->next;
        else {
                RF_ASSERT(lr == lockDesc->granted);
                lockDesc->granted = lr->next;
        }
        lr->next = NULL;

        if (lockReqDesc->type == RF_IO_TYPE_WRITE)
                lockDesc->nWriters--;

        /* search through the waiters list to see if anyone needs to
         * be woken up. for each such descriptor in the wait list, we
         * check it against everything granted and against everything
         * _in front_ of it in the waiters queue.  If it conflicts
         * with none of these, we release it.
         *
         * DON'T TOUCH THE TEMPLINK POINTER OF ANYTHING IN THE GRANTED
         * LIST HERE.
         *
         * This will roach the case where the callback tries to
         * acquire a new lock in the same stripe.  There are some
         * asserts to try and detect this.
         *
         * We apply 2 performance optimizations: (1) if releasing this
         * lock results in no more writers to this stripe, we just
         * release everybody waiting, since we place no restrictions
         * on the number of concurrent reads. (2) we consider as
         * candidates for wakeup only those waiters that have a range
         * overlap with either the descriptor being woken up or with
         * something in the callbacklist (i.e.  something we've just
         * now woken up). This allows us to avoid the long evaluation
         * for some descriptors. */

        callbacklist = NULL;
        if (lockDesc->nWriters == 0) {        /* performance tweak (1) */
                while (lockDesc->waitersH) {
                        /* delete from waiters list */
                        lr = lockDesc->waitersH;
                        lockDesc->waitersH = lr->next;

                        RF_ASSERT(lr->type == RF_IO_TYPE_READ);

                        /* add to granted list */
                        lr->next = lockDesc->granted;
                        lockDesc->granted = lr;

                        RF_ASSERT(!lr->templink);
                        /* put on callback list so that we'll invoke
                           callback below */
                        lr->templink = callbacklist;
                        callbacklist = lr;
#if RF_DEBUG_STRIPELOCK
                        if (rf_stripeLockDebug) {
                                Dprintf8("[%d] No writers: granting lock stripe ID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
                                    tid, stripeID, lr->type, lr->start, lr->stop, lr->start2, lr->stop2, (unsigned long) lockTable);
                                FLUSH;
                        }
#endif
                }
                lockDesc->waitersT = NULL;
                /* we've purged the whole waiters list */

        } else
                for (candidate_t = NULL, candidate = lockDesc->waitersH;
                     candidate;) {

                        /* performance tweak (2) */
                        consider_it = 0;
                        if (RANGE_OVERLAP(lockReqDesc, candidate))
                                consider_it = 1;
                        else
                                for (t = callbacklist; t; t = t->templink)
                                        if (RANGE_OVERLAP(t, candidate)) {
                                                consider_it = 1;
                                                break;
                                        }
                        if (!consider_it) {
#if RF_DEBUG_STRIPELOCK
                                if (rf_stripeLockDebug) {
                                        Dprintf8("[%d] No overlap: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
                                            tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2,
                                            (unsigned long) lockTable);
                                        FLUSH;
                                }
#endif
                                candidate_t = candidate;
                                candidate = candidate->next;
                                continue;
                        }
                        /* we have a candidate for release.  check to
                         * make sure it is not blocked by any granted
                         * locks */
                        release_it = 1;
                        for (predecessor = lockDesc->granted; predecessor;
                             predecessor = predecessor->next) {
                                if (STRIPELOCK_CONFLICT(candidate,
                                                        predecessor)) {
#if RF_DEBUG_STRIPELOCK
                                        if (rf_stripeLockDebug) {
                                                Dprintf8("[%d] Conflicts with granted lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
                                                    tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2,
                                                    (unsigned long) lockTable);
                                                FLUSH;
                                        }
#endif
                                        release_it = 0;
                                        break;
                                }
                        }

                        /* now check to see if the candidate is
                         * blocked by any waiters that occur before it
                         * it the wait queue */
                        if (release_it)
                                for (predecessor = lockDesc->waitersH;
                                     predecessor != candidate;
                                     predecessor = predecessor->next) {
                                        if (STRIPELOCK_CONFLICT(candidate,
                                                                predecessor)) {
#if RF_DEBUG_STRIPELOCK
                                                if (rf_stripeLockDebug) {
                                                        Dprintf8("[%d] Conflicts with waiting lock: rejecting candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
                                                            tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2,
                                                            (unsigned long) lockTable);
                                                        FLUSH;
                                                }
#endif
                                                release_it = 0;
                                                break;
                                        }
                                }

                        /* release it if indicated */
                        if (release_it) {
#if RF_DEBUG_STRIPELOCK
                                if (rf_stripeLockDebug) {
                                        Dprintf8("[%d] Granting lock to candidate stripeID %ld, type %c range %ld-%ld %ld-%ld table 0x%lx\n",
                                            tid, stripeID, candidate->type, candidate->start, candidate->stop, candidate->start2, candidate->stop2,
                                            (unsigned long) lockTable);
                                        FLUSH;
                                }
#endif
                                if (candidate_t) {
                                        candidate_t->next = candidate->next;
                                        if (lockDesc->waitersT == candidate)
                                                lockDesc->waitersT = candidate_t;        /* cannot be waitersH since candidate_t is not NULL */
                                } else {
                                        RF_ASSERT(candidate == lockDesc->waitersH);
                                        lockDesc->waitersH = lockDesc->waitersH->next;
                                        if (!lockDesc->waitersH)
                                                lockDesc->waitersT = NULL;
                                }
                                /* move it to the granted list */
                                candidate->next = lockDesc->granted;
                                lockDesc->granted = candidate;

                                RF_ASSERT(!candidate->templink);
                                /* put it on the list of things to be
                                   called after we release the mutex */
                                candidate->templink = callbacklist;

                                callbacklist = candidate;

                                if (!candidate_t)
                                        candidate = lockDesc->waitersH;
                                else
                                        candidate = candidate_t->next;
                                /* continue with the rest of the list */
                        } else {
                                candidate_t = candidate;
                                /* continue with the rest of the list */
                                candidate = candidate->next;
                        }
                }

        /* delete the descriptor if no one is waiting or active */
        if (!lockDesc->granted && !lockDesc->waitersH) {
                RF_ASSERT(lockDesc->nWriters == 0);
#if RF_DEBUG_STRIPELOCK
                if (rf_stripeLockDebug) {
                        Dprintf3("[%d] Last lock released (table 0x%lx): deleting desc for stripeID %ld\n", tid, (unsigned long) lockTable, stripeID);
                        FLUSH;
                }
#endif
                if (ld_t)
                        ld_t->next = lockDesc->next;
                else {
                        RF_ASSERT(lockDesc == lockTable[hashval].descList);
                        lockTable[hashval].descList = lockDesc->next;
                }
                FreeStripeLockDesc(raidPtr, lockDesc);
                lockDesc = NULL;/* only for the ASSERT below */
        }
        rf_unlock_mutex2(lockTable[hashval].mutex);

        /* now that we've unlocked the mutex, invoke the callback on
         * all the descriptors in the list */

        /* if we deleted the descriptor, we should have no callbacks
         * to do */
        RF_ASSERT(!((callbacklist) && (!lockDesc)));
        for (candidate = callbacklist; candidate;) {
                t = candidate;
                candidate = candidate->templink;
                t->templink = NULL;
                (t->cbFunc) (t->cbArg);
        }
}
/* must have the indicated lock table mutex upon entry */
static void
AddToWaitersQueue(RF_StripeLockDesc_t *lockDesc, RF_LockReqDesc_t *lockReqDesc)
{
        if (!lockDesc->waitersH) {
                lockDesc->waitersH = lockDesc->waitersT = lockReqDesc;
        } else {
                lockDesc->waitersT->next = lockReqDesc;
                lockDesc->waitersT = lockReqDesc;
        }
}

static RF_StripeLockDesc_t *
AllocStripeLockDesc(RF_Raid_t *raidPtr, RF_StripeNum_t stripeID)
{
        RF_StripeLockDesc_t *p;

        p = pool_get(&raidPtr->pools.stripelock, PR_WAITOK);
        if (p) {
                p->stripeID = stripeID;
                p->granted = NULL;
                p->waitersH = NULL;
                p->waitersT = NULL;
                p->nWriters = 0;
                p->next = NULL;
        }
        return (p);
}

static void
FreeStripeLockDesc(RF_Raid_t *raidPtr, RF_StripeLockDesc_t *p)
{
        pool_put(&raidPtr->pools.stripelock, p);
}

#if RF_DEBUG_STRIPELOCK
static void
PrintLockedStripes(RF_LockTableEntry_t *lockTable)
{
        int     i, j, foundone = 0, did;
        RF_StripeLockDesc_t *p;
        RF_LockReqDesc_t *q;

        rf_lock_mutex2(rf_printf_mutex);
        printf("Locked stripes:\n");
        for (i = 0; i < rf_lockTableSize; i++)
                if (lockTable[i].descList) {
                        foundone = 1;
                        for (p = lockTable[i].descList; p; p = p->next) {
                                printf("Stripe ID 0x%lx (%d) nWriters %d\n",
                                    (long) p->stripeID, (int) p->stripeID,
                                       p->nWriters);

                                if (!(p->granted))
                                        printf("Granted: (none)\n");
                                else
                                        printf("Granted:\n");
                                for (did = 1, j = 0, q = p->granted; q;
                                     j++, q = q->next) {
                                        printf("  %c(%ld-%ld", q->type, (long) q->start, (long) q->stop);
                                        if (q->start2 != -1)
                                                printf(",%ld-%ld) ", (long) q->start2,
                                                    (long) q->stop2);
                                        else
                                                printf(") ");
                                        if (j && !(j % 4)) {
                                                printf("\n");
                                                did = 1;
                                        } else
                                                did = 0;
                                }
                                if (!did)
                                        printf("\n");

                                if (!(p->waitersH))
                                        printf("Waiting: (none)\n");
                                else
                                        printf("Waiting:\n");
                                for (did = 1, j = 0, q = p->waitersH; q;
                                     j++, q = q->next) {
                                        printf("%c(%ld-%ld", q->type, (long) q->start, (long) q->stop);
                                        if (q->start2 != -1)
                                                printf(",%ld-%ld) ", (long) q->start2, (long) q->stop2);
                                        else
                                                printf(") ");
                                        if (j && !(j % 4)) {
                                                printf("\n         ");
                                                did = 1;
                                        } else
                                                did = 0;
                                }
                                if (!did)
                                        printf("\n");
                        }
                }
        if (!foundone)
                printf("(none)\n");
        else
                printf("\n");
        rf_unlock_mutex2(rf_printf_mutex);
}
#endif



























































































































   18 








   44 







   44 


   44 















   20 





   20 














   20 











    4 






   28 




   28 














   28 






































































    1 















































































    1 






    1 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
/*        $NetBSD: layer_vfsops.c,v 1.55 2022/07/18 04:30:30 thorpej Exp $        */

/*
 * Copyright (c) 1999 National Aeronautics & Space Administration
 * All rights reserved.
 *
 * This software was written by William Studenmund of the
 * Numerical Aerospace Simulation Facility, NASA Ames Research Center.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the National Aeronautics & Space Administration
 *    nor the names of its contributors may be used to endorse or promote
 *    products derived from this software without specific prior written
 *    permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NATIONAL AERONAUTICS & SPACE ADMINISTRATION
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE ADMINISTRATION OR CONTRIB-
 * UTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1992, 1993, 1995
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software donated to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        from: Id: lofs_vfsops.c,v 1.9 1992/05/30 10:26:24 jsp Exp
 *        from: @(#)lofs_vfsops.c        1.2 (Berkeley) 6/18/92
 *        @(#)null_vfsops.c        8.7 (Berkeley) 5/14/95
 */

/*
 * Generic layer VFS operations.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: layer_vfsops.c,v 1.55 2022/07/18 04:30:30 thorpej Exp $");

#include <sys/param.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/kauth.h>
#include <sys/module.h>

#include <miscfs/specfs/specdev.h>
#include <miscfs/genfs/genfs.h>
#include <miscfs/genfs/layer.h>
#include <miscfs/genfs/layer_extern.h>

SYSCTL_SETUP_PROTO(sysctl_vfs_layerfs_setup);

MODULE(MODULE_CLASS_MISC, layerfs, NULL);

static int
layerfs_modcmd(modcmd_t cmd, void *arg)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                return 0;
        case MODULE_CMD_FINI:
                return 0;
        default:
                return ENOTTY;
        }
        return 0;
}

/*
 * VFS start.  Nothing needed here - the start routine on the underlying
 * filesystem will have been called when that filesystem was mounted.
 */
int
layerfs_start(struct mount *mp, int flags)
{

#ifdef notyet
        return VFS_START(mp->mnt_lower, flags);
#else
        return 0;
#endif
}

int
layerfs_root(struct mount *mp, int lktype, struct vnode **vpp)
{
        struct vnode *vp;

        vp = MOUNTTOLAYERMOUNT(mp)->layerm_rootvp;
        if (vp == NULL) {
                *vpp = NULL;
                return EINVAL;
        }
        /*
         * Return root vnode with locked and with a reference held.
         */
        vref(vp);
        vn_lock(vp, lktype | LK_RETRY);
        *vpp = vp;
        return 0;
}

int
layerfs_quotactl(struct mount *mp, struct quotactl_args *args)
{

        return VFS_QUOTACTL(mp->mnt_lower, args);
}

int
layerfs_statvfs(struct mount *mp, struct statvfs *sbp)
{
        struct statvfs *sbuf;
        int error;

        sbuf = kmem_zalloc(sizeof(*sbuf), KM_SLEEP);
        error = VFS_STATVFS(mp->mnt_lower, sbuf);
        if (error) {
                goto done;
        }
        /* Copy across the relevant data and fake the rest. */
        sbp->f_flag = sbuf->f_flag;
        sbp->f_bsize = sbuf->f_bsize;
        sbp->f_frsize = sbuf->f_frsize;
        sbp->f_iosize = sbuf->f_iosize;
        sbp->f_blocks = sbuf->f_blocks;
        sbp->f_bfree = sbuf->f_bfree;
        sbp->f_bavail = sbuf->f_bavail;
        sbp->f_bresvd = sbuf->f_bresvd;
        sbp->f_files = sbuf->f_files;
        sbp->f_ffree = sbuf->f_ffree;
        sbp->f_favail = sbuf->f_favail;
        sbp->f_fresvd = sbuf->f_fresvd;
        sbp->f_namemax = sbuf->f_namemax;
        copy_statvfs_info(sbp, mp);
done:
        kmem_free(sbuf, sizeof(*sbuf));
        return error;
}

int
layerfs_sync(struct mount *mp, int waitfor,
    kauth_cred_t cred)
{

        /*
         * XXX - Assumes no data cached at layer.
         */
        return 0;
}

int
layerfs_loadvnode(struct mount *mp, struct vnode *vp,
    const void *key, size_t key_len, const void **new_key)
{
        struct layer_mount *lmp = MOUNTTOLAYERMOUNT(mp);
        struct vnode *lowervp;
        struct layer_node *xp;

        KASSERT(key_len == sizeof(struct vnode *));
        memcpy(&lowervp, key, key_len);

        xp = kmem_alloc(lmp->layerm_size, KM_SLEEP);

        /* Share the interlock, vmobjlock, and klist with the lower node. */
        vshareilock(vp, lowervp);
        rw_obj_hold(lowervp->v_uobj.vmobjlock);
        uvm_obj_setlock(&vp->v_uobj, lowervp->v_uobj.vmobjlock);
        vshareklist(vp, lowervp);

        vp->v_tag = lmp->layerm_tag;
        vp->v_type = lowervp->v_type;
        vp->v_op = lmp->layerm_vnodeop_p;
        if (vp->v_type == VBLK || vp->v_type == VCHR)
                spec_node_init(vp, lowervp->v_rdev);
        vp->v_data = xp;
        xp->layer_vnode = vp;
        xp->layer_lowervp = lowervp;
        xp->layer_flags = 0;
        uvm_vnp_setsize(vp, 0);

        /*  Add a reference to the lower node. */
        vref(lowervp);
        *new_key = &xp->layer_lowervp;
        return 0;
}

int
layerfs_vget(struct mount *mp, ino_t ino, int lktype, struct vnode **vpp)
{
        struct vnode *vp;
        int error;

        error = VFS_VGET(mp->mnt_lower, ino, lktype, &vp);
        if (error) {
                *vpp = NULL;
                return error;
        }
        VOP_UNLOCK(vp);
        error = layer_node_create(mp, vp, vpp);
        if (error) {
                vrele(vp);
                *vpp = NULL;
                return error;
        }
        error = vn_lock(*vpp, lktype);
        if (error) {
                vrele(*vpp);
                *vpp = NULL;
                return error;
        }
        return 0;
}

int
layerfs_fhtovp(struct mount *mp, struct fid *fidp, int lktype,
    struct vnode **vpp)
{
        struct vnode *vp;
        int error;

        error = VFS_FHTOVP(mp->mnt_lower, fidp, lktype, &vp);
        if (error) {
                *vpp = NULL;
                return error;
        }
        VOP_UNLOCK(vp);
        error = layer_node_create(mp, vp, vpp);
        if (error) {
                vput(vp);
                *vpp = NULL;
                return (error);
        }
        error = vn_lock(*vpp, lktype);
        if (error) {
                vrele(*vpp);
                *vpp = NULL;
                return error;
        }
        return 0;
}

int
layerfs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
{

        return VFS_VPTOFH(LAYERVPTOLOWERVP(vp), fhp, fh_size);
}

/*
 * layerfs_snapshot - handle a snapshot through a layered file system
 *
 * At present, we do NOT support snapshotting through a layered file
 * system as the ffs implementation changes v_vnlock of the snapshot
 * vnodes to point to one common lock. As there is no way for us to
 * absolutely pass this change up the stack, a layered file system
 * would end up referencing the wrong lock.
 *
 * This routine serves as a central resource for this behavior; all
 * layered file systems don't need to worry about the above. Also, if
 * things get fixed, all layers get the benefit.
 */
int
layerfs_snapshot(struct mount *mp, struct vnode *vp,
    struct timespec *ts)
{

        return EOPNOTSUPP;
}

/*
 * layerfs_suspendctl - suspend a layered file system
 *
 * Here we should suspend the lower file system(s) too.  At present
 * this will deadlock as we don't know which to suspend first.
 *
 * This routine serves as a central resource for this behavior; all
 * layered file systems don't need to worry about the above. Also, if
 * things get fixed, all layers get the benefit.
 */
int
layerfs_suspendctl(struct mount *mp, int cmd)
{

        return genfs_suspendctl(mp, cmd);
}

SYSCTL_SETUP(sysctl_vfs_layerfs_setup, "sysctl vfs.layerfs subtree setup")
{
        const struct sysctlnode *layerfs_node = NULL;

        sysctl_createv(clog, 0, NULL, &layerfs_node,
#ifdef _MODULE
                       0,
#else
                       CTLFLAG_PERMANENT,
#endif
                       CTLTYPE_NODE, "layerfs",
                       SYSCTL_DESCR("Generic layered file system"),
                       NULL, 0, NULL, 0,
                       CTL_VFS, CTL_CREATE, CTL_EOL);

#ifdef LAYERFS_DIAGNOSTIC
        sysctl_createv(clog, 0, &layerfs_node, NULL,
#ifndef _MODULE
                       CTLFLAG_PERMANENT |
#endif
                       CTLFLAG_READWRITE,
                       CTLTYPE_INT,
                       "debug",
                       SYSCTL_DESCR("Verbose debugging messages"),
                       NULL, 0, &layerfs_debug, 0,
                       CTL_CREATE, CTL_EOL);
#endif

        /*
         * other subtrees should really be aliases to this, but since
         * they can't tell if layerfs has been instantiated yet, they
         * can't do that...not easily.  not yet.  :-)
         */
}

int
layerfs_renamelock_enter(struct mount *mp)
{

        return VFS_RENAMELOCK_ENTER(mp->mnt_lower);
}

void
layerfs_renamelock_exit(struct mount *mp)
{

        VFS_RENAMELOCK_EXIT(mp->mnt_lower);
}





























































































    7 

  513 

  514 




    6 




























    4 










    1 




































































   40 



    6 
   36 


    6 



    5 

    2 

    4 


    1 

    4 



    2 



    1 





   11 



    9 

   11 

    1 

   10 



    7 



   24 

   22 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
/*      $NetBSD: clockctl.c,v 1.39 2022/03/28 12:33:20 riastradh Exp $ */

/*-
 * Copyright (c) 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Emmanuel Dreyfus.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: clockctl.c,v 1.39 2022/03/28 12:33:20 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_ntp.h"
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/ioctl.h>
#include <sys/device.h>
#include <sys/time.h>
#include <sys/conf.h>
#include <sys/timex.h>
#include <sys/kauth.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/compat_stub.h>

#include <sys/clockctl.h>
#include <compat/sys/clockctl.h>
#include <compat/sys/time_types.h>


kmutex_t clockctl_mtx;
int clockctl_refcnt;

#include "ioconf.h"

dev_type_ioctl(clockctlioctl);

const struct cdevsw clockctl_cdevsw = {
        .d_open = clockctlopen,
        .d_close = clockctlclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = clockctlioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER,
};

static kauth_listener_t clockctl_listener;

static int
clockctl_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        int result;
        enum kauth_system_req req;
        bool device_context;

        result = KAUTH_RESULT_DEFER;
        req = (enum kauth_system_req)(uintptr_t)arg0;

        if ((action != KAUTH_SYSTEM_TIME) ||
            (req != KAUTH_REQ_SYSTEM_TIME_SYSTEM))
                return result;

        device_context = arg3 != NULL;

        /* Device is controlled by permissions, so allow. */
        if (device_context)
                result = KAUTH_RESULT_ALLOW;

        return result;
}

/*ARGSUSED*/
void
clockctlattach(int num)
{

/*
 * Don't initialize the listener here - it will get handled as part
 * of module initialization.
 */
#if 0
        clockctl_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
            clockctl_listener_cb, NULL);
#endif
}

/*
 * Maintain a refcount for each open/close, so we know when it is
 * safe to call devsw_detach()
 */
int
clockctlopen(dev_t dev, int flag, int mode, struct lwp *l)
{

        mutex_enter(&clockctl_mtx);
        clockctl_refcnt++;
        mutex_exit(&clockctl_mtx);

        return 0;
}

int
clockctlclose(dev_t dev, int flag, int mode, struct lwp *l)
{

        mutex_enter(&clockctl_mtx);
        clockctl_refcnt--;
        mutex_exit(&clockctl_mtx);

        return 0;
}

MODULE(MODULE_CLASS_DRIVER, clockctl, NULL);

int
clockctl_modcmd(modcmd_t cmd, void *data)
{
        int error;
#ifdef _MODULE
        int bmajor, cmajor;
#endif

        error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
                mutex_init(&clockctl_mtx, MUTEX_DEFAULT, IPL_NONE);

                clockctl_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
                    clockctl_listener_cb, NULL);

#ifdef _MODULE
                bmajor = cmajor = -1;
                error = devsw_attach("clockctl", NULL, &bmajor,
                    &clockctl_cdevsw, &cmajor);
                if (error != 0)
                        kauth_unlisten_scope(clockctl_listener);
#endif

                break;

        case MODULE_CMD_FINI:
                mutex_enter(&clockctl_mtx);
                if (clockctl_refcnt != 0) {
                        mutex_exit(&clockctl_mtx);
                        return EBUSY;
                }
#ifdef _MODULE
                devsw_detach(NULL, &clockctl_cdevsw);
#endif
                mutex_exit(&clockctl_mtx);

                kauth_unlisten_scope(clockctl_listener);
                mutex_destroy(&clockctl_mtx);
                break;

        default:
                error = ENOTTY;
                break;
        }

        return error;
}

int
clockctlioctl(
    dev_t dev,
    u_long cmd,
    void *data,
    int flags,
    struct lwp *l)
{
        int error = 0;

        switch (cmd) {
        case CLOCKCTL_SETTIMEOFDAY: {
                struct clockctl_settimeofday *args = data;

                error = settimeofday1(args->tv, true, args->tzp, l, false);
                break;
        }
        case CLOCKCTL_ADJTIME: {
                struct timeval atv, oldatv;
                struct clockctl_adjtime *args = data;

                if (args->delta) {
                        error = copyin(args->delta, &atv, sizeof(atv));
                        if (error)
                                return (error);
                }
                adjtime1(args->delta ? &atv : NULL,
                    args->olddelta ? &oldatv : NULL, l->l_proc);
                if (args->olddelta)
                        error = copyout(&oldatv, args->olddelta,
                            sizeof(oldatv));
                break;
        }
        case CLOCKCTL_CLOCK_SETTIME: {
                struct clockctl_clock_settime *args = data;
                struct timespec ts;

                error = copyin(args->tp, &ts, sizeof ts);
                if (error)
                        return (error);
                error = clock_settime1(l->l_proc, args->clock_id, &ts, false);
                break;
        }
        case CLOCKCTL_NTP_ADJTIME: {
                struct clockctl_ntp_adjtime *args = data;
                struct timex ntv;

                if (vec_ntp_timestatus == NULL) {
                        error = ENOTTY;
                        break;
                }
                error = copyin(args->tp, &ntv, sizeof(ntv));
                if (error)
                        return (error);

                (*vec_ntp_adjtime1)(&ntv);

                error = copyout(&ntv, args->tp, sizeof(ntv));
                if (error == 0)
                        args->retval = (*vec_ntp_timestatus)();
                break;
        }
        default:
                MODULE_HOOK_CALL(clockctl_ioctl_50_hook,
                    (dev, cmd, data, flags, l), enosys(), error);
                if (error == ENOSYS)
                        error = ENOTTY;
        }

        return (error);
}





















































































































































































































































































































































    2 



    2 
    2 

































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
/*        $NetBSD: umass.c,v 1.188 2022/03/20 19:26:27 andvar Exp $        */

/*
 * Copyright (c) 2003 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1999 MAEKAWA Masahide <bishop@rr.iij4u.or.jp>,
 *                      Nick Hibma <n_hibma@freebsd.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *     $FreeBSD: src/sys/dev/usb/umass.c,v 1.13 2000/03/26 01:39:12 n_hibma Exp $
 */

/*
 * Universal Serial Bus Mass Storage Class specs:
 * http://www.usb.org/developers/docs/devclass_docs/Mass_Storage_Specification_Overview_v1.4_2-19-2010.pdf
 * http://www.usb.org/developers/docs/devclass_docs/usbmassbulk_10.pdf
 * http://www.usb.org/developers/docs/devclass_docs/usb_msc_cbi_1.1.pdf
 * http://www.usb.org/developers/docs/devclass_docs/usbmass-ufi10.pdf
 */

/*
 * Ported to NetBSD by Lennart Augustsson <augustss@NetBSD.org>.
 * Parts of the code written by Jason R. Thorpe <thorpej@shagadelic.org>.
 */

/*
 * The driver handles 3 Wire Protocols
 * - Command/Bulk/Interrupt (CBI)
 * - Command/Bulk/Interrupt with Command Completion Interrupt (CBI with CCI)
 * - Mass Storage Bulk-Only (BBB)
 *   (BBB refers Bulk/Bulk/Bulk for Command/Data/Status phases)
 *
 * Over these wire protocols it handles the following command protocols
 * - SCSI
 * - 8070 (ATA/ATAPI for rewritable removable media)
 * - UFI (USB Floppy Interface)
 *
 * 8070i is a transformed version of the SCSI command set. UFI is a transformed
 * version of the 8070i command set.  The sc->transform method is used to
 * convert the commands into the appropriate format (if at all necessary).
 * For example, ATAPI requires all commands to be 12 bytes in length amongst
 * other things.
 *
 * The source code below is marked and can be split into a number of pieces
 * (in this order):
 *
 * - probe/attach/detach
 * - generic transfer routines
 * - BBB
 * - CBI
 * - CBI_I (in addition to functions from CBI)
 * - CAM (Common Access Method)
 * - SCSI
 * - UFI
 * - 8070i
 *
 * The protocols are implemented using a state machine, for the transfers as
 * well as for the resets. The state machine is contained in umass_*_state.
 * The state machine is started through either umass_*_transfer or
 * umass_*_reset.
 *
 * The reason for doing this is a) CAM performs a lot better this way and b) it
 * avoids sleeping in interrupt context which is prohibited (for example after a
 * failed transfer).
 */

/*
 * The SCSI related part of this driver has been derived from the
 * dev/ppbus/vpo.c driver, by Nicolas Souchu (nsouch@freebsd.org).
 *
 * The CAM layer uses so called actions which are messages sent to the host
 * adapter for completion. The actions come in through umass_cam_action. The
 * appropriate block of routines is called depending on the transport protocol
 * in use. When the transfer has finished, these routines call
 * umass_cam_cb again to complete the CAM command.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: umass.c,v 1.188 2022/03/20 19:26:27 andvar Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include "atapibus.h"
#include "scsibus.h"

#include <sys/param.h>
#include <sys/buf.h>
#include <sys/conf.h>
#include <sys/device.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/sysctl.h>
#include <sys/systm.h>

#include <dev/usb/usb.h>
#include <dev/usb/usb_sdt.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usbhist.h>

#include <dev/usb/umassvar.h>
#include <dev/usb/umass_quirks.h>
#include <dev/usb/umass_scsipi.h>

#include <dev/scsipi/scsipi_all.h>
#include <dev/scsipi/scsipiconf.h>

SDT_PROBE_DEFINE1(usb, umass, device, attach__start,
    "struct umass_softc *"/*sc*/);
SDT_PROBE_DEFINE2(usb, umass, device, attach__done,
    "struct umass_softc *"/*sc*/, "usbd_status"/*err*/);
SDT_PROBE_DEFINE1(usb, umass, device, detach__start,
    "struct umass_softc *"/*sc*/);
SDT_PROBE_DEFINE2(usb, umass, device, detach__done,
    "struct umass_softc *"/*sc*/, "int"/*error*/);

SDT_PROBE_DEFINE7(usb, umass, transfer, start__bbb,
    "struct umass_softc *"/*sc*/,
    "transfer_cb_f"/*cb*/,
    "void *"/*priv*/,
    "void *"/*data*/,
    "int"/*datalen*/,
    "int"/*dir*/,
    "int"/*timeout*/);
SDT_PROBE_DEFINE7(usb, umass, transfer, start__cbi,
    "struct umass_softc *"/*sc*/,
    "transfer_cb_f"/*cb*/,
    "void *"/*priv*/,
    "void *"/*data*/,
    "int"/*datalen*/,
    "int"/*dir*/,
    "int"/*timeout*/);
SDT_PROBE_DEFINE7(usb, umass, transfer, done,
    "struct umass_softc *"/*sc*/,
    "transfer_cb_f"/*cb*/,
    "void *"/*priv*/,
    "void *"/*data*/,
    "int"/*datalen*/,
    "int"/*resid*/,
    "int"/*status*/);        /* STATUS_* */

SDT_PROBE_DEFINE3(usb, umass, bbb, state,
    "struct umass_softc *"/*sc*/,
    "struct usbd_xfer *"/*xfer*/,
    "usbd_status"/*err*/);
SDT_PROBE_DEFINE2(usb, umass, bbb, reset,
    "struct umass_softc *"/*sc*/,
    "int"/*status*/);

SDT_PROBE_DEFINE3(usb, umass, cbi, state,
    "struct umass_softc *"/*sc*/,
    "struct usbd_xfer *"/*xfer*/,
    "usbd_status"/*err*/);
SDT_PROBE_DEFINE2(usb, umass, cbi, reset,
    "struct umass_softc *"/*sc*/,
    "int"/*status*/);

#ifdef USB_DEBUG
#ifdef UMASS_DEBUG
int umassdebug = 0;

SYSCTL_SETUP(sysctl_hw_umass_setup, "sysctl hw.umass setup")
{
        int err;
        const struct sysctlnode *rnode;
        const struct sysctlnode *cnode;

        err = sysctl_createv(clog, 0, NULL, &rnode,
            CTLFLAG_PERMANENT, CTLTYPE_NODE, "umass",
            SYSCTL_DESCR("umass global controls"),
            NULL, 0, NULL, 0, CTL_HW, CTL_CREATE, CTL_EOL);

        if (err)
                goto fail;

        /* control debugging printfs */
        err = sysctl_createv(clog, 0, &rnode, &cnode,
            CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT,
            "debug", SYSCTL_DESCR("Enable debugging output"),
            NULL, 0, &umassdebug, sizeof(umassdebug), CTL_CREATE, CTL_EOL);
        if (err)
                goto fail;

        return;
fail:
        aprint_error("%s: sysctl_createv failed (err = %d)\n", __func__, err);
}

const char *states[TSTATE_STATES+1] = {
        /* should be kept in sync with the list at transfer_state */
        "Idle",
        "BBB CBW",
        "BBB Data",
        "BBB Data bulk-in/-out clear stall",
        "BBB CSW, 1st attempt",
        "BBB CSW bulk-in clear stall",
        "BBB CSW, 2nd attempt",
        "BBB Reset",
        "BBB bulk-in clear stall",
        "BBB bulk-out clear stall",
        "CBI Command",
        "CBI Data",
        "CBI Status",
        "CBI Data bulk-in/-out clear stall",
        "CBI Status intr-in clear stall",
        "CBI Reset",
        "CBI bulk-in clear stall",
        "CBI bulk-out clear stall",
        NULL
};
#endif
#endif

/* USB device probe/attach/detach functions */
static int umass_match(device_t, cfdata_t, void *);
static void umass_attach(device_t, device_t, void *);
static int umass_detach(device_t, int);
static void umass_childdet(device_t, device_t);
static int umass_activate(device_t, enum devact);

CFATTACH_DECL2_NEW(umass, sizeof(struct umass_softc), umass_match,
    umass_attach, umass_detach, umass_activate, NULL, umass_childdet);

Static void umass_disco(struct umass_softc *sc);

/* generic transfer functions */
Static usbd_status umass_setup_transfer(struct umass_softc *,
                                struct usbd_pipe *,
                                void *, int, int,
                                struct usbd_xfer *);
Static usbd_status umass_setup_ctrl_transfer(struct umass_softc *,
                                usb_device_request_t *,
                                void *, int, int,
                                struct usbd_xfer *);
Static void umass_clear_endpoint_stall(struct umass_softc *, int,
                                struct usbd_xfer *);
Static void umass_transfer_done(struct umass_softc *, int, int);
Static void umass_transfer_reset(struct umass_softc *);
#if 0
Static void umass_reset(struct umass_softc *, transfer_cb_f, void *);
#endif

/* Bulk-Only related functions */
Static void umass_bbb_transfer(struct umass_softc *, int, void *, int, void *,
                               int, int, u_int, int, umass_callback, void *);
Static void umass_bbb_reset(struct umass_softc *, int);
Static void umass_bbb_state(struct usbd_xfer *, void *, usbd_status);

static usbd_status umass_bbb_get_max_lun(struct umass_softc *, uint8_t *);

/* CBI related functions */
Static void umass_cbi_transfer(struct umass_softc *, int, void *, int, void *,
                               int, int, u_int, int, umass_callback, void *);
Static void umass_cbi_reset(struct umass_softc *, int);
Static void umass_cbi_state(struct usbd_xfer *, void *, usbd_status);

Static int umass_cbi_adsc(struct umass_softc *, char *, int, int,
    struct usbd_xfer *);

const struct umass_wire_methods umass_bbb_methods = {
        .wire_xfer = umass_bbb_transfer,
        .wire_reset = umass_bbb_reset,
        .wire_state = umass_bbb_state
};

const struct umass_wire_methods umass_cbi_methods = {
        .wire_xfer = umass_cbi_transfer,
        .wire_reset = umass_cbi_reset,
        .wire_state = umass_cbi_state
};

#ifdef UMASS_DEBUG
/* General debugging functions */
Static void umass_bbb_dump_cbw(struct umass_softc *, umass_bbb_cbw_t *);
Static void umass_bbb_dump_csw(struct umass_softc *, umass_bbb_csw_t *);
Static void umass_dump_buffer(struct umass_softc *, uint8_t *, int, int);
#endif


/*
 * USB device probe/attach/detach
 */

static int
umass_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;
        const struct umass_quirk *quirk;

        quirk = umass_lookup(uiaa->uiaa_vendor, uiaa->uiaa_product);
        if (quirk != NULL && quirk->uq_match != UMASS_QUIRK_USE_DEFAULTMATCH)
                return quirk->uq_match;

        if (uiaa->uiaa_class != UICLASS_MASS)
                return UMATCH_NONE;

        switch (uiaa->uiaa_subclass) {
        case UISUBCLASS_RBC:
        case UISUBCLASS_SFF8020I:
        case UISUBCLASS_QIC157:
        case UISUBCLASS_UFI:
        case UISUBCLASS_SFF8070I:
        case UISUBCLASS_SCSI:
                break;
        default:
                return UMATCH_IFACECLASS;
        }

        switch (uiaa->uiaa_proto) {
        case UIPROTO_MASS_CBI_I:
        case UIPROTO_MASS_CBI:
        case UIPROTO_MASS_BBB_OLD:
        case UIPROTO_MASS_BBB:
                break;
        default:
                return UMATCH_IFACECLASS_IFACESUBCLASS;
        }

        return UMATCH_IFACECLASS_IFACESUBCLASS_IFACEPROTO;
}

static void
umass_attach(device_t parent, device_t self, void *aux)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        struct umass_softc *sc = device_private(self);
        struct usbif_attach_arg *uiaa = aux;
        const struct umass_quirk *quirk;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        const char *sWire, *sCommand;
        char *devinfop;
        usbd_status err;
        int i, error;

        SDT_PROBE1(usb, umass, device, attach__start,  sc);

        sc->sc_dev = self;

        aprint_naive("\n");
        aprint_normal("\n");

        mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_SOFTUSB);

        devinfop = usbd_devinfo_alloc(uiaa->uiaa_device, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        sc->sc_udev = uiaa->uiaa_device;
        sc->sc_iface = uiaa->uiaa_iface;
        sc->sc_ifaceno = uiaa->uiaa_ifaceno;

        quirk = umass_lookup(uiaa->uiaa_vendor, uiaa->uiaa_product);
        if (quirk != NULL) {
                sc->sc_wire = quirk->uq_wire;
                sc->sc_cmd = quirk->uq_cmd;
                sc->sc_quirks = quirk->uq_flags;
                sc->sc_busquirks = quirk->uq_busquirks;

                if (quirk->uq_fixup != NULL)
                        (*quirk->uq_fixup)(sc);
        } else {
                sc->sc_wire = UMASS_WPROTO_UNSPEC;
                sc->sc_cmd = UMASS_CPROTO_UNSPEC;
                sc->sc_quirks = 0;
                sc->sc_busquirks = 0;
        }

        if (sc->sc_wire == UMASS_WPROTO_UNSPEC) {
                switch (uiaa->uiaa_proto) {
                case UIPROTO_MASS_CBI:
                        sc->sc_wire = UMASS_WPROTO_CBI;
                        break;
                case UIPROTO_MASS_CBI_I:
                        sc->sc_wire = UMASS_WPROTO_CBI_I;
                        break;
                case UIPROTO_MASS_BBB:
                case UIPROTO_MASS_BBB_OLD:
                        sc->sc_wire = UMASS_WPROTO_BBB;
                        break;
                default:
                        DPRINTFM(UDMASS_GEN, "Unsupported wire protocol %ju",
                            uiaa->uiaa_proto, 0, 0, 0);
                        SDT_PROBE2(usb, umass, device, attach__done,
                            sc, USBD_IOERROR);
                        return;
                }
        }

        if (sc->sc_cmd == UMASS_CPROTO_UNSPEC) {
                switch (uiaa->uiaa_subclass) {
                case UISUBCLASS_SCSI:
                        sc->sc_cmd = UMASS_CPROTO_SCSI;
                        break;
                case UISUBCLASS_UFI:
                        sc->sc_cmd = UMASS_CPROTO_UFI;
                        break;
                case UISUBCLASS_SFF8020I:
                case UISUBCLASS_SFF8070I:
                case UISUBCLASS_QIC157:
                        sc->sc_cmd = UMASS_CPROTO_ATAPI;
                        break;
                case UISUBCLASS_RBC:
                        sc->sc_cmd = UMASS_CPROTO_RBC;
                        break;
                default:
                        DPRINTFM(UDMASS_GEN, "Unsupported command protocol %ju",
                            uiaa->uiaa_subclass, 0, 0, 0);
                        SDT_PROBE2(usb, umass, device, attach__done,
                            sc, USBD_IOERROR);
                        return;
                }
        }

        switch (sc->sc_wire) {
        case UMASS_WPROTO_CBI:
                sWire = "CBI";
                break;
        case UMASS_WPROTO_CBI_I:
                sWire = "CBI with CCI";
                break;
        case UMASS_WPROTO_BBB:
                sWire = "Bulk-Only";
                break;
        default:
                sWire = "unknown";
                break;
        }

        switch (sc->sc_cmd) {
        case UMASS_CPROTO_RBC:
                sCommand = "RBC";
                break;
        case UMASS_CPROTO_SCSI:
                sCommand = "SCSI";
                break;
        case UMASS_CPROTO_UFI:
                sCommand = "UFI";
                break;
        case UMASS_CPROTO_ATAPI:
                sCommand = "ATAPI";
                break;
        case UMASS_CPROTO_ISD_ATA:
                sCommand = "ISD-ATA";
                break;
        default:
                sCommand = "unknown";
                break;
        }

        aprint_verbose_dev(self, "using %s over %s\n", sCommand, sWire);

        if (quirk != NULL && quirk->uq_init != NULL) {
                err = (*quirk->uq_init)(sc);
                if (err) {
                        aprint_error_dev(self, "quirk init failed\n");
                        SDT_PROBE2(usb, umass, device, attach__done,  sc, err);
                        umass_disco(sc);
                        return;
                }
        }

        /*
         * In addition to the Control endpoint the following endpoints
         * are required:
         * a) bulk-in endpoint.
         * b) bulk-out endpoint.
         * and for Control/Bulk/Interrupt with CCI (CBI_I)
         * c) intr-in
         *
         * The endpoint addresses are not fixed, so we have to read them
         * from the device descriptors of the current interface.
         */
        id = usbd_get_interface_descriptor(sc->sc_iface);
        for (i = 0 ; i < id->bNumEndpoints ; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "could not read endpoint descriptor\n");
                        SDT_PROBE2(usb, umass, device, attach__done,
                            sc, USBD_IOERROR);
                        return;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN
                    && (ed->bmAttributes & UE_XFERTYPE) == UE_BULK) {
                        sc->sc_epaddr[UMASS_BULKIN] = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT
                    && (ed->bmAttributes & UE_XFERTYPE) == UE_BULK) {
                        sc->sc_epaddr[UMASS_BULKOUT] = ed->bEndpointAddress;
                } else if (sc->sc_wire == UMASS_WPROTO_CBI_I
                    && UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN
                    && (ed->bmAttributes & UE_XFERTYPE) == UE_INTERRUPT) {
                        sc->sc_epaddr[UMASS_INTRIN] = ed->bEndpointAddress;
#ifdef UMASS_DEBUG
                        if (UGETW(ed->wMaxPacketSize) > 2) {
                                DPRINTFM(UDMASS_CBI, "sc %#jx intr size is %jd",
                                    (uintptr_t)sc, UGETW(ed->wMaxPacketSize),
                                    0, 0);
                        }
#endif
                }
        }

        /* check whether we found all the endpoints we need */
        if (!sc->sc_epaddr[UMASS_BULKIN] || !sc->sc_epaddr[UMASS_BULKOUT] ||
            (sc->sc_wire == UMASS_WPROTO_CBI_I &&
             !sc->sc_epaddr[UMASS_INTRIN])) {
                aprint_error_dev(self, "endpoint not found %u/%u/%u\n",
                       sc->sc_epaddr[UMASS_BULKIN],
                       sc->sc_epaddr[UMASS_BULKOUT],
                       sc->sc_epaddr[UMASS_INTRIN]);
                return;
        }

        /*
         * Get the maximum LUN supported by the device.
         */
        if (sc->sc_wire == UMASS_WPROTO_BBB &&
            (sc->sc_quirks & UMASS_QUIRK_NOGETMAXLUN) == 0) {
                err = umass_bbb_get_max_lun(sc, &sc->maxlun);
                if (err) {
                        aprint_error_dev(self, "unable to get Max Lun: %s\n",
                            usbd_errstr(err));
                        SDT_PROBE2(usb, umass, device, attach__done,  sc, err);
                        return;
                }
                if (sc->maxlun > 0)
                        sc->sc_busquirks |= PQUIRK_FORCELUNS;
        } else {
                sc->maxlun = 0;
        }

        /* Open the bulk-in and -out pipe */
        DPRINTFM(UDMASS_USB, "sc %#jx: opening iface %#jx epaddr %jd for "
            "BULKOUT", (uintptr_t)sc, (uintptr_t)sc->sc_iface,
            sc->sc_epaddr[UMASS_BULKOUT], 0);
        err = usbd_open_pipe(sc->sc_iface, sc->sc_epaddr[UMASS_BULKOUT],
            USBD_EXCLUSIVE_USE | USBD_MPSAFE, &sc->sc_pipe[UMASS_BULKOUT]);
        if (err) {
                aprint_error_dev(self, "cannot open %u-out pipe (bulk)\n",
                    sc->sc_epaddr[UMASS_BULKOUT]);
                SDT_PROBE2(usb, umass, device, attach__done,  sc, err);
                umass_disco(sc);
                return;
        }
        DPRINTFM(UDMASS_USB, "sc %#jx: opening iface %#jx epaddr %jd for "
            "BULKIN", (uintptr_t)sc, (uintptr_t)sc->sc_iface,
            sc->sc_epaddr[UMASS_BULKIN], 0);
        err = usbd_open_pipe(sc->sc_iface, sc->sc_epaddr[UMASS_BULKIN],
            USBD_EXCLUSIVE_USE | USBD_MPSAFE, &sc->sc_pipe[UMASS_BULKIN]);
        if (err) {
                aprint_error_dev(self, "could not open %u-in pipe (bulk)\n",
                    sc->sc_epaddr[UMASS_BULKIN]);
                SDT_PROBE2(usb, umass, device, attach__done,  sc, err);
                umass_disco(sc);
                return;
        }
        /*
         * Open the intr-in pipe if the protocol is CBI with CCI.
         * Note: early versions of the Zip drive do have an interrupt pipe, but
         * this pipe is unused
         *
         * We do not open the interrupt pipe as an interrupt pipe, but as a
         * normal bulk endpoint. We send an IN transfer down the wire at the
         * appropriate time, because we know exactly when to expect data on
         * that endpoint. This saves bandwidth, but more important, makes the
         * code for handling the data on that endpoint simpler. No data
         * arriving concurrently.
         */
        if (sc->sc_wire == UMASS_WPROTO_CBI_I) {
                DPRINTFM(UDMASS_USB,
                    "sc %#jx: opening iface %#jx epaddr %jd for INTRIN",
                    (uintptr_t)sc, (uintptr_t)sc->sc_iface,
                    sc->sc_epaddr[UMASS_INTRIN], 0);
                err = usbd_open_pipe(sc->sc_iface, sc->sc_epaddr[UMASS_INTRIN],
                    USBD_EXCLUSIVE_USE | USBD_MPSAFE, &sc->sc_pipe[UMASS_INTRIN]);
                if (err) {
                        aprint_error_dev(self, "couldn't open %u-in (intr)\n",
                            sc->sc_epaddr[UMASS_INTRIN]);
                        SDT_PROBE2(usb, umass, device, attach__done,  sc, err);
                        umass_disco(sc);
                        return;
                }
        }

        /* initialisation of generic part */
        sc->transfer_state = TSTATE_IDLE;

        for (i = 0; i < XFER_NR; i++) {
                sc->transfer_xfer[i] = NULL;
        }

        /*
         * Create the transfers
         */
        struct usbd_pipe *pipe0 = usbd_get_pipe0(sc->sc_udev);
        switch (sc->sc_wire) {
        case UMASS_WPROTO_BBB:
                err = usbd_create_xfer(sc->sc_pipe[UMASS_BULKIN],
                    UMASS_MAX_TRANSFER_SIZE, 0, 0,
                    &sc->transfer_xfer[XFER_BBB_DATAIN]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(sc->sc_pipe[UMASS_BULKOUT],
                    UMASS_MAX_TRANSFER_SIZE, 0, 0,
                    &sc->transfer_xfer[XFER_BBB_DATAOUT]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(sc->sc_pipe[UMASS_BULKOUT],
                    UMASS_BBB_CBW_SIZE, 0, 0,
                    &sc->transfer_xfer[XFER_BBB_CBW]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(sc->sc_pipe[UMASS_BULKIN],
                    UMASS_BBB_CSW_SIZE, 0, 0,
                    &sc->transfer_xfer[XFER_BBB_CSW1]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(sc->sc_pipe[UMASS_BULKIN],
                    UMASS_BBB_CSW_SIZE, 0, 0,
                    &sc->transfer_xfer[XFER_BBB_CSW2]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(pipe0, 0, 0, 0,
                    &sc->transfer_xfer[XFER_BBB_SCLEAR]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(pipe0, 0, 0, 0,
                    &sc->transfer_xfer[XFER_BBB_DCLEAR]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(pipe0, 0, 0, 0,
                    &sc->transfer_xfer[XFER_BBB_RESET1]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(pipe0, 0, 0, 0,
                    &sc->transfer_xfer[XFER_BBB_RESET2]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(pipe0, 0, 0, 0,
                    &sc->transfer_xfer[XFER_BBB_RESET3]);
                if (err)
                        goto fail_create;
                break;
        case UMASS_WPROTO_CBI:
        case UMASS_WPROTO_CBI_I:
                err = usbd_create_xfer(pipe0, sizeof(sc->cbl), 0, 0,
                    &sc->transfer_xfer[XFER_CBI_CB]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(sc->sc_pipe[UMASS_BULKIN],
                    UMASS_MAX_TRANSFER_SIZE, 0, 0,
                    &sc->transfer_xfer[XFER_CBI_DATAIN]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(sc->sc_pipe[UMASS_BULKOUT],
                    UMASS_MAX_TRANSFER_SIZE, 0, 0,
                    &sc->transfer_xfer[XFER_CBI_DATAOUT]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(sc->sc_pipe[UMASS_INTRIN],
                    sizeof(sc->sbl), 0, 0,
                    &sc->transfer_xfer[XFER_CBI_STATUS]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(pipe0, 0, 0, 0,
                    &sc->transfer_xfer[XFER_CBI_DCLEAR]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(pipe0, 0, 0, 0,
                    &sc->transfer_xfer[XFER_CBI_SCLEAR]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(pipe0, sizeof(sc->cbl), 0, 0,
                    &sc->transfer_xfer[XFER_CBI_RESET1]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(pipe0, sizeof(sc->cbl), 0, 0,
                    &sc->transfer_xfer[XFER_CBI_RESET2]);
                if (err)
                        goto fail_create;
                err = usbd_create_xfer(pipe0, sizeof(sc->cbl), 0, 0,
                    &sc->transfer_xfer[XFER_CBI_RESET3]);
                if (err)
                        goto fail_create;
                break;
        default:
        fail_create:
                aprint_error_dev(self, "failed to create xfers\n");
                SDT_PROBE2(usb, umass, device, attach__done,  sc, err);
                umass_disco(sc);
                return;
        }

        /*
         * Record buffer pointers for data transfer (it's huge), command and
         * status data here
         */
        switch (sc->sc_wire) {
        case UMASS_WPROTO_BBB:
                sc->datain_buffer =
                    usbd_get_buffer(sc->transfer_xfer[XFER_BBB_DATAIN]);
                sc->dataout_buffer =
                    usbd_get_buffer(sc->transfer_xfer[XFER_BBB_DATAOUT]);
                sc->cmd_buffer =
                    usbd_get_buffer(sc->transfer_xfer[XFER_BBB_CBW]);
                sc->s1_buffer =
                    usbd_get_buffer(sc->transfer_xfer[XFER_BBB_CSW1]);
                sc->s2_buffer =
                    usbd_get_buffer(sc->transfer_xfer[XFER_BBB_CSW2]);
                break;
        case UMASS_WPROTO_CBI:
        case UMASS_WPROTO_CBI_I:
                sc->datain_buffer =
                    usbd_get_buffer(sc->transfer_xfer[XFER_CBI_DATAIN]);
                sc->dataout_buffer =
                    usbd_get_buffer(sc->transfer_xfer[XFER_CBI_DATAOUT]);
                sc->cmd_buffer =
                    usbd_get_buffer(sc->transfer_xfer[XFER_CBI_CB]);
                sc->s1_buffer =
                    usbd_get_buffer(sc->transfer_xfer[XFER_CBI_STATUS]);
                sc->s2_buffer =
                    usbd_get_buffer(sc->transfer_xfer[XFER_CBI_RESET1]);
                break;
        default:
                break;
        }

        /* Initialise the wire protocol specific methods */
        switch (sc->sc_wire) {
        case UMASS_WPROTO_BBB:
                sc->sc_methods = &umass_bbb_methods;
                break;
        case UMASS_WPROTO_CBI:
        case UMASS_WPROTO_CBI_I:
                sc->sc_methods = &umass_cbi_methods;
                break;
        default:
                umass_disco(sc);
                return;
        }

        error = 0;
        switch (sc->sc_cmd) {
        case UMASS_CPROTO_RBC:
        case UMASS_CPROTO_SCSI:
#if NSCSIBUS > 0
                error = umass_scsi_attach(sc);
#else
                aprint_error_dev(self, "scsibus not configured\n");
#endif
                break;

        case UMASS_CPROTO_UFI:
        case UMASS_CPROTO_ATAPI:
#if NATAPIBUS > 0
                error = umass_atapi_attach(sc);
#else
                aprint_error_dev(self, "atapibus not configured\n");
#endif
                break;

        default:
                aprint_error_dev(self, "command protocol=%#x not supported\n",
                    sc->sc_cmd);
                umass_disco(sc);
                return;
        }
        if (error) {
                aprint_error_dev(self, "bus attach failed\n");
                SDT_PROBE2(usb, umass, device, attach__done,
                    sc, USBD_IOERROR);
                umass_disco(sc);
                return;
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        DPRINTFM(UDMASS_GEN, "sc %#jx: Attach finished", (uintptr_t)sc,
            0, 0, 0);

        SDT_PROBE2(usb, umass, device, attach__done,  sc, 0);
        return;
}

static void
umass_childdet(device_t self, device_t child)
{
        struct umass_softc *sc = device_private(self);

        KASSERTMSG(child == sc->bus->sc_child,
                   "assertion child == sc->bus->sc_child failed\n");
        sc->bus->sc_child = NULL;
}

static int
umass_detach(device_t self, int flags)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        struct umass_softc *sc = device_private(self);
        struct umassbus_softc *scbus;
        int rv = 0, i;

        DPRINTFM(UDMASS_USB, "sc %#jx detached", (uintptr_t)sc, 0, 0, 0);
        SDT_PROBE1(usb, umass, device, detach__start,  sc);

        mutex_enter(&sc->sc_lock);
        sc->sc_dying = true;
        mutex_exit(&sc->sc_lock);

        pmf_device_deregister(self);

        /* Abort the pipes to wake up any waiting processes. */
        for (i = 0 ; i < UMASS_NEP ; i++) {
                if (sc->sc_pipe[i] != NULL)
                        usbd_abort_pipe(sc->sc_pipe[i]);
        }
        usbd_abort_default_pipe(sc->sc_udev);

        scbus = sc->bus;
        if (scbus != NULL) {
                if (scbus->sc_child != NULL)
                        rv = config_detach(scbus->sc_child, flags);

                switch (sc->sc_cmd) {
                case UMASS_CPROTO_RBC:
                case UMASS_CPROTO_SCSI:
#if NSCSIBUS > 0
                        umass_scsi_detach(sc);
#else
                        aprint_error_dev(self, "scsibus not configured\n");
#endif
                        break;

                case UMASS_CPROTO_UFI:
                case UMASS_CPROTO_ATAPI:
#if NATAPIBUS > 0
                        umass_atapi_detach(sc);
#else
                        aprint_error_dev(self, "atapibus not configured\n");
#endif
                        break;

                default:
                        /* nothing to do */
                        break;
                }

                /* protocol detach is expected to free sc->bus */
                KASSERT(sc->bus == NULL);
        }

        if (rv)
                goto out;

        umass_disco(sc);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        mutex_destroy(&sc->sc_lock);

out:        SDT_PROBE2(usb, umass, device, detach__done,  sc, rv);
        return rv;
}

static int
umass_activate(device_t dev, enum devact act)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        struct umass_softc *sc = device_private(dev);

        DPRINTFM(UDMASS_USB, "sc %#jx act %jd", (uintptr_t)sc, act, 0, 0);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

Static void
umass_disco(struct umass_softc *sc)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        int i;

        /* Remove all the pipes. */
        for (i = 0 ; i < UMASS_NEP ; i++) {
                if (sc->sc_pipe[i] != NULL) {
                        usbd_abort_pipe(sc->sc_pipe[i]);
                }
        }

        /* Some xfers may be queued in the default pipe */
        usbd_abort_default_pipe(sc->sc_udev);

        /* Free the xfers. */
        for (i = 0; i < XFER_NR; i++) {
                if (sc->transfer_xfer[i] != NULL) {
                        usbd_destroy_xfer(sc->transfer_xfer[i]);
                        sc->transfer_xfer[i] = NULL;
                }
        }

        for (i = 0 ; i < UMASS_NEP ; i++) {
                if (sc->sc_pipe[i] != NULL) {
                        usbd_close_pipe(sc->sc_pipe[i]);
                        sc->sc_pipe[i] = NULL;
                }
        }

}

/*
 * Generic functions to handle transfers
 */

Static usbd_status
umass_setup_transfer(struct umass_softc *sc, struct usbd_pipe *pipe,
                        void *buffer, int buflen, int flags,
                        struct usbd_xfer *xfer)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        usbd_status err;

        if (sc->sc_dying)
                return USBD_IOERROR;

        /* Initialise a USB transfer and then schedule it */

        usbd_setup_xfer(xfer, sc, buffer, buflen, flags, sc->timeout,
            sc->sc_methods->wire_state);

        err = usbd_transfer(xfer);
        DPRINTFM(UDMASS_XFER, "start xfer buffer=%#jx buflen=%jd flags=%#jx "
            "timeout=%jd", (uintptr_t)buffer, buflen, flags, sc->timeout);
        if (err && err != USBD_IN_PROGRESS) {
                DPRINTFM(UDMASS_BBB, "failed to setup transfer... err=%jd",
                    err, 0, 0, 0);
                return err;
        }

        return USBD_NORMAL_COMPLETION;
}


Static usbd_status
umass_setup_ctrl_transfer(struct umass_softc *sc, usb_device_request_t *req,
         void *buffer, int buflen, int flags, struct usbd_xfer *xfer)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        usbd_status err;

        if (sc->sc_dying)
                return USBD_IOERROR;

        /* Initialise a USB control transfer and then schedule it */

        usbd_setup_default_xfer(xfer, sc->sc_udev, (void *) sc, sc->timeout,
                req, buffer, buflen, flags, sc->sc_methods->wire_state);

        err = usbd_transfer(xfer);
        if (err && err != USBD_IN_PROGRESS) {
                DPRINTFM(UDMASS_BBB, "failed to setup ctrl transfer... err=%jd",
                    err, 0, 0, 0);

                /* do not reset, as this would make us loop */
                return err;
        }

        return USBD_NORMAL_COMPLETION;
}

Static void
umass_clear_endpoint_stall(struct umass_softc *sc, int endpt,
        struct usbd_xfer *xfer)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();

        if (sc->sc_dying) {
                umass_transfer_done(sc, sc->transfer_datalen,
                    STATUS_WIRE_FAILED);
                return;
        }

        DPRINTFM(UDMASS_BBB, "Clear endpoint 0x%02jx stall",
            sc->sc_epaddr[endpt], 0, 0, 0);

        usbd_clear_endpoint_toggle(sc->sc_pipe[endpt]);

        sc->sc_req.bmRequestType = UT_WRITE_ENDPOINT;
        sc->sc_req.bRequest = UR_CLEAR_FEATURE;
        USETW(sc->sc_req.wValue, UF_ENDPOINT_HALT);
        USETW(sc->sc_req.wIndex, sc->sc_epaddr[endpt]);
        USETW(sc->sc_req.wLength, 0);
        if (umass_setup_ctrl_transfer(sc, &sc->sc_req, NULL, 0, 0, xfer))
                umass_transfer_done(sc, sc->transfer_datalen,
                    STATUS_WIRE_FAILED);
}

Static void
umass_transfer_done(struct umass_softc *sc, int residue, int status)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();

        sc->transfer_state = TSTATE_IDLE;
        SDT_PROBE7(usb, umass, transfer, done,
            sc,
            sc->transfer_cb,
            sc->transfer_priv,
            sc->transfer_data,
            sc->transfer_datalen,
            residue,
            status);
        sc->transfer_cb(sc, sc->transfer_priv, residue, status);
}

Static void
umass_transfer_reset(struct umass_softc *sc)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();

        sc->transfer_state = TSTATE_IDLE;
        if (sc->transfer_priv) {
                SDT_PROBE7(usb, umass, transfer, done,
                    sc,
                    sc->transfer_cb,
                    sc->transfer_priv,
                    sc->transfer_data,
                    sc->transfer_datalen,
                    sc->transfer_datalen,
                    sc->transfer_status);
                sc->transfer_cb(sc, sc->transfer_priv, sc->transfer_datalen,
                    sc->transfer_status);
        }
}

#if 0
Static void
umass_reset(struct umass_softc *sc, transfer_cb_f cb, void *priv)
{
        sc->transfer_cb = cb;
        sc->transfer_priv = priv;

        /* The reset is a forced reset, so no error (yet) */
        sc->reset(sc, STATUS_CMD_OK);
}
#endif

/*
 * Bulk protocol specific functions
 */

Static void
umass_bbb_reset(struct umass_softc *sc, int status)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        SDT_PROBE2(usb, umass, bbb, reset,  sc, status);
        KASSERTMSG(sc->sc_wire & UMASS_WPROTO_BBB,
                   "sc->sc_wire == 0x%02x wrong for umass_bbb_reset\n",
                   sc->sc_wire);

        if (sc->sc_dying) {
                umass_transfer_done(sc, sc->transfer_datalen, status);
                return;
        }

        /*
         * Reset recovery (5.3.4 in Universal Serial Bus Mass Storage Class)
         *
         * For Reset Recovery the host shall issue in the following order:
         * a) a Bulk-Only Mass Storage Reset
         * b) a Clear Feature HALT to the Bulk-In endpoint
         * c) a Clear Feature HALT to the Bulk-Out endpoint
         *
         * This is done in 3 steps, states:
         * TSTATE_BBB_RESET1
         * TSTATE_BBB_RESET2
         * TSTATE_BBB_RESET3
         *
         * If the reset doesn't succeed, the device should be port reset.
         */

        DPRINTFM(UDMASS_BBB, "Bulk Reset", 0, 0, 0, 0);

        sc->transfer_state = TSTATE_BBB_RESET1;
        sc->transfer_status = status;

        /* reset is a class specific interface write */
        sc->sc_req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
        sc->sc_req.bRequest = UR_BBB_RESET;
        USETW(sc->sc_req.wValue, 0);
        USETW(sc->sc_req.wIndex, sc->sc_ifaceno);
        USETW(sc->sc_req.wLength, 0);
        if (umass_setup_ctrl_transfer(sc, &sc->sc_req, NULL, 0, 0,
                sc->transfer_xfer[XFER_BBB_RESET1]))
                umass_transfer_done(sc, sc->transfer_datalen, status);
}

Static void
umass_bbb_transfer(struct umass_softc *sc, int lun, void *cmd, int cmdlen,
                   void *data, int datalen, int dir, u_int timeout,
                   int flags, umass_callback cb, void *priv)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        SDT_PROBE7(usb, umass, transfer, start__bbb,
            sc, cb, priv, data, datalen, dir, timeout);
        static int dCBWtag = 42;        /* unique for CBW of transfer */

        KASSERT(cb);
        DPRINTFM(UDMASS_BBB, "sc %#jx cmd=0x%02jx", (uintptr_t)sc,
            *(u_char *)cmd, 0, 0);

        KASSERTMSG(sc->sc_wire & UMASS_WPROTO_BBB,
                   "sc->sc_wire == 0x%02x wrong for umass_bbb_transfer\n",
                   sc->sc_wire);

        if (sc->sc_dying) {
                SDT_PROBE7(usb, umass, transfer, done,
                    sc, cb, priv, data, datalen, datalen, STATUS_WIRE_FAILED);
                cb(sc, priv, datalen, STATUS_WIRE_FAILED);
                return;
        }

        /* Be a little generous. */
        sc->timeout = timeout + USBD_DEFAULT_TIMEOUT;

        /*
         * Do a Bulk-Only transfer with cmdlen bytes from cmd, possibly
         * a data phase of datalen bytes from/to the device and finally a
         * csw read phase.
         * If the data direction was inbound a maximum of datalen bytes
         * is stored in the buffer pointed to by data.
         *
         * umass_bbb_transfer initialises the transfer and lets the state
         * machine in umass_bbb_state handle the completion. It uses the
         * following states:
         * TSTATE_BBB_COMMAND
         *   -> TSTATE_BBB_DATA
         *   -> TSTATE_BBB_STATUS
         *   -> TSTATE_BBB_STATUS2
         *   -> TSTATE_BBB_IDLE
         *
         * An error in any of those states will invoke
         * umass_bbb_reset.
         */

        /* check the given arguments */
        KASSERTMSG(datalen == 0 || data != NULL,
                   "%s: datalen > 0, but no buffer",device_xname(sc->sc_dev));
        KASSERTMSG(cmdlen <= CBWCDBLENGTH,
                   "%s: cmdlen exceeds CDB length in CBW (%d > %d)",
                        device_xname(sc->sc_dev), cmdlen, CBWCDBLENGTH);
        KASSERTMSG(dir == DIR_NONE || datalen > 0,
                   "%s: datalen == 0 while direction is not NONE\n",
                        device_xname(sc->sc_dev));
        KASSERTMSG(datalen == 0 || dir != DIR_NONE,
                   "%s: direction is NONE while datalen is not zero\n",
                        device_xname(sc->sc_dev));
        /* CTASSERT */
        KASSERTMSG(sizeof(umass_bbb_cbw_t) == UMASS_BBB_CBW_SIZE,
                   "%s: CBW struct does not have the right size (%zu vs. %u)\n",
                        device_xname(sc->sc_dev),
                        sizeof(umass_bbb_cbw_t), UMASS_BBB_CBW_SIZE);
        /* CTASSERT */
        KASSERTMSG(sizeof(umass_bbb_csw_t) == UMASS_BBB_CSW_SIZE,
                   "%s: CSW struct does not have the right size (%zu vs. %u)\n",
                        device_xname(sc->sc_dev),
                        sizeof(umass_bbb_csw_t), UMASS_BBB_CSW_SIZE);

        /*
         * Determine the direction of the data transfer and the length.
         *
         * dCBWDataTransferLength (datalen) :
         *   This field indicates the number of bytes of data that the host
         *   intends to transfer on the IN or OUT Bulk endpoint(as indicated by
         *   the Direction bit) during the execution of this command. If this
         *   field is set to 0, the device will expect that no data will be
         *   transferred IN or OUT during this command, regardless of the value
         *   of the Direction bit defined in dCBWFlags.
         *
         * dCBWFlags (dir) :
         *   The bits of the Flags field are defined as follows:
         *     Bits 0-6         reserved
         *     Bit  7         Direction - this bit shall be ignored if the
         *                             dCBWDataTransferLength field is zero.
         *                 0 = data Out from host to device
         *                 1 = data In from device to host
         */

        /* Fill in the Command Block Wrapper */
        USETDW(sc->cbw.dCBWSignature, CBWSIGNATURE);
        USETDW(sc->cbw.dCBWTag, dCBWtag);
        dCBWtag++;        /* cannot be done in macro (it will be done 4 times) */
        USETDW(sc->cbw.dCBWDataTransferLength, datalen);
        /* DIR_NONE is treated as DIR_OUT (0x00) */
        sc->cbw.bCBWFlags = (dir == DIR_IN? CBWFLAGS_IN:CBWFLAGS_OUT);
        sc->cbw.bCBWLUN = lun;
        sc->cbw.bCDBLength = cmdlen;
        memcpy(sc->cbw.CBWCDB, cmd, cmdlen);

        DIF(UDMASS_BBB, umass_bbb_dump_cbw(sc, &sc->cbw));

        /* store the details for the data transfer phase */
        sc->transfer_dir = dir;
        sc->transfer_data = data;
        sc->transfer_datalen = datalen;
        sc->transfer_actlen = 0;
        sc->transfer_cb = cb;
        sc->transfer_priv = priv;
        sc->transfer_status = STATUS_CMD_OK;

        /* move from idle to the command state */
        sc->transfer_state = TSTATE_BBB_COMMAND;

        /* Send the CBW from host to device via bulk-out endpoint. */
        if (umass_setup_transfer(sc, sc->sc_pipe[UMASS_BULKOUT],
                        &sc->cbw, UMASS_BBB_CBW_SIZE, flags,
                        sc->transfer_xfer[XFER_BBB_CBW])) {
                umass_bbb_reset(sc, STATUS_WIRE_FAILED);
        }
}


Static void
umass_bbb_state(struct usbd_xfer *xfer, void *priv,
                usbd_status err)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        struct umass_softc *sc = (struct umass_softc *) priv;
        struct usbd_xfer *next_xfer;
        int residue;

        SDT_PROBE3(usb, umass, bbb, state,  sc, xfer, err);

        KASSERTMSG(sc->sc_wire & UMASS_WPROTO_BBB,
                   "sc->sc_wire == 0x%02x wrong for umass_bbb_state\n",
                   sc->sc_wire);

        /*
         * State handling for BBB transfers.
         *
         * The subroutine is rather long. It steps through the states given in
         * Annex A of the Bulk-Only specification.
         * Each state first does the error handling of the previous transfer
         * and then prepares the next transfer.
         * Each transfer is done asynchroneously so after the request/transfer
         * has been submitted you will find a 'return;'.
         */

        DPRINTFM(UDMASS_BBB, "sc %#jx xfer %#jx, transfer_state %jd dir %jd",
            (uintptr_t)sc, (uintptr_t)xfer, sc->transfer_state,
            sc->transfer_dir);

        if (err == USBD_CANCELLED) {
                DPRINTFM(UDMASS_BBB, "sc %#jx xfer %#jx cancelled",
                    (uintptr_t)sc, (uintptr_t)xfer, 0, 0);

                umass_transfer_done(sc, 0, STATUS_TIMEOUT);
                return;
        }

        if (sc->sc_dying) {
                umass_transfer_done(sc, sc->transfer_datalen,
                    STATUS_WIRE_FAILED);
                return;
        }

        switch (sc->transfer_state) {

        /***** Bulk Transfer *****/
        case TSTATE_BBB_COMMAND:
                /* Command transport phase, error handling */
                if (err) {
                        DPRINTFM(UDMASS_BBB, "sc %#jx failed to send CBW",
                            (uintptr_t)sc, 0, 0, 0);
                        /* If the device detects that the CBW is invalid, then
                         * the device may STALL both bulk endpoints and require
                         * a Bulk-Reset
                         */
                        umass_bbb_reset(sc, STATUS_WIRE_FAILED);
                        return;
                }

                /* Data transport phase, setup transfer */
                sc->transfer_state = TSTATE_BBB_DATA;
                if (sc->transfer_dir == DIR_IN) {
                        if (umass_setup_transfer(sc, sc->sc_pipe[UMASS_BULKIN],
                                        sc->datain_buffer, sc->transfer_datalen,
                                        USBD_SHORT_XFER_OK,
                                        sc->transfer_xfer[XFER_BBB_DATAIN]))
                                umass_bbb_reset(sc, STATUS_WIRE_FAILED);

                        return;
                } else if (sc->transfer_dir == DIR_OUT) {
                        memcpy(sc->dataout_buffer, sc->transfer_data,
                               sc->transfer_datalen);
                        if (umass_setup_transfer(sc,
                            sc->sc_pipe[UMASS_BULKOUT], sc->dataout_buffer,
                            sc->transfer_datalen, 0,/* fixed length transfer */
                            sc->transfer_xfer[XFER_BBB_DATAOUT]))
                                umass_bbb_reset(sc, STATUS_WIRE_FAILED);

                        return;
                } else {
                        DPRINTFM(UDMASS_BBB, "sc %#jx: no data phase",
                            (uintptr_t)sc, 0, 0, 0);
                }

                /* if no data phase, err == 0 */
                /* FALLTHROUGH */
        case TSTATE_BBB_DATA:
                /* Command transport phase error handling (ignored if no data
                 * phase (fallthrough from previous state)) */
                if (sc->transfer_dir != DIR_NONE) {
                        /* retrieve the length of the transfer that was done */
                        usbd_get_xfer_status(xfer, NULL, NULL,
                             &sc->transfer_actlen, NULL);
                        DPRINTFM(UDMASS_BBB, "sc %#jx: BBB_DATA actlen=%jd",
                            (uintptr_t)sc, sc->transfer_actlen, 0, 0);

                        if (err) {
                                DPRINTFM(UDMASS_BBB, "sc %#jx Data dir %jd "
                                    "err %jd failed, err %jd",
                                    (uintptr_t)sc, sc->transfer_dir,
                                    sc->transfer_datalen, err);

                                if (err == USBD_STALLED) {
                                        sc->transfer_state = TSTATE_BBB_DCLEAR;
                                        umass_clear_endpoint_stall(sc,
                                          (sc->transfer_dir == DIR_IN?
                                            UMASS_BULKIN:UMASS_BULKOUT),
                                          sc->transfer_xfer[XFER_BBB_DCLEAR]);
                                } else {
                                        /* Unless the error is a pipe stall the
                                         * error is fatal.
                                         */
                                        umass_bbb_reset(sc,STATUS_WIRE_FAILED);
                                }
                                return;
                        }
                }

                /* err == 0 (no data phase or successful) */
                /* FALLTHROUGH */
        case TSTATE_BBB_DCLEAR: /* stall clear after data phase */
                if (sc->transfer_dir == DIR_IN)
                        memcpy(sc->transfer_data, sc->datain_buffer,
                               sc->transfer_actlen);

                DIF(UDMASS_BBB, if (sc->transfer_dir == DIR_IN)
                                        umass_dump_buffer(sc, sc->transfer_data,
                                                sc->transfer_datalen, 48));

                /* err == 0 (no data phase or successful) */
                /* FALLTHROUGH */
        case TSTATE_BBB_SCLEAR: /* stall clear after status phase */
                /* Reading of CSW after bulk stall condition in data phase
                 * (TSTATE_BBB_DATA2) or bulk-in stall condition after
                 * reading CSW (TSTATE_BBB_SCLEAR).
                 * In the case of no data phase or successful data phase,
                 * err == 0 and the following if block is passed.
                 */
                if (err) {        /* should not occur */
                        printf("%s: BBB bulk-%s stall clear failed, %s\n",
                            device_xname(sc->sc_dev),
                            (sc->transfer_dir == DIR_IN? "in":"out"),
                            usbd_errstr(err));
                        umass_bbb_reset(sc, STATUS_WIRE_FAILED);
                        return;
                }

                /* Status transport phase, setup transfer */
                if (sc->transfer_state == TSTATE_BBB_COMMAND ||
                    sc->transfer_state == TSTATE_BBB_DATA ||
                    sc->transfer_state == TSTATE_BBB_DCLEAR) {
                        /* After no data phase, successful data phase and
                         * after clearing bulk-in/-out stall condition
                         */
                        sc->transfer_state = TSTATE_BBB_STATUS1;
                        next_xfer = sc->transfer_xfer[XFER_BBB_CSW1];
                } else {
                        /* After first attempt of fetching CSW */
                        sc->transfer_state = TSTATE_BBB_STATUS2;
                        next_xfer = sc->transfer_xfer[XFER_BBB_CSW2];
                }

                /* Read the Command Status Wrapper via bulk-in endpoint. */
                if (umass_setup_transfer(sc, sc->sc_pipe[UMASS_BULKIN],
                        &sc->csw, UMASS_BBB_CSW_SIZE, 0, next_xfer)) {
                        umass_bbb_reset(sc, STATUS_WIRE_FAILED);
                        return;
                }

                return;
        case TSTATE_BBB_STATUS1:        /* first attempt */
        case TSTATE_BBB_STATUS2:        /* second attempt */
                /* Status transfer, error handling */
                if (err) {
                        DPRINTFM(UDMASS_BBB, "sc %#jx Failed to read CSW "
                            "err %jd (state %jd)", (uintptr_t)sc, err,
                            sc->transfer_state, 0);

                        /* If this was the first attempt at fetching the CSW
                         * retry it, otherwise fail.
                         */
                        if (sc->transfer_state == TSTATE_BBB_STATUS1) {
                                sc->transfer_state = TSTATE_BBB_SCLEAR;
                                umass_clear_endpoint_stall(sc, UMASS_BULKIN,
                                    sc->transfer_xfer[XFER_BBB_SCLEAR]);
                                return;
                        } else {
                                umass_bbb_reset(sc, STATUS_WIRE_FAILED);
                                return;
                        }
                }

                DIF(UDMASS_BBB, umass_bbb_dump_csw(sc, &sc->csw));

#ifdef UMASS_DEBUG
                residue = UGETDW(sc->csw.dCSWDataResidue);
                if (residue != sc->transfer_datalen - sc->transfer_actlen)
                        printf("%s: dCSWDataResidue=%d req=%d act=%d\n",
                               device_xname(sc->sc_dev), residue,
                               sc->transfer_datalen, sc->transfer_actlen);
#endif
                residue = sc->transfer_datalen - sc->transfer_actlen;

                /* Translate weird command-status signatures. */
                if ((sc->sc_quirks & UMASS_QUIRK_WRONG_CSWSIG) &&
                    UGETDW(sc->csw.dCSWSignature) == CSWSIGNATURE_OLYMPUS_C1)
                        USETDW(sc->csw.dCSWSignature, CSWSIGNATURE);

                /* Translate invalid command-status tags */
                if (sc->sc_quirks & UMASS_QUIRK_WRONG_CSWTAG)
                        USETDW(sc->csw.dCSWTag, UGETDW(sc->cbw.dCBWTag));

                /* Check CSW and handle any error */
                if (UGETDW(sc->csw.dCSWSignature) != CSWSIGNATURE) {
                        /* Invalid CSW: Wrong signature or wrong tag might
                         * indicate that the device is confused -> reset it.
                         */
                        printf("%s: Invalid CSW: sig 0x%08x should be 0x%08x\n",
                                device_xname(sc->sc_dev),
                                UGETDW(sc->csw.dCSWSignature),
                                CSWSIGNATURE);

                        umass_bbb_reset(sc, STATUS_WIRE_FAILED);
                        return;
                } else if (UGETDW(sc->csw.dCSWTag)
                                != UGETDW(sc->cbw.dCBWTag)) {
                        printf("%s: Invalid CSW: tag %d should be %d\n",
                                device_xname(sc->sc_dev),
                                UGETDW(sc->csw.dCSWTag),
                                UGETDW(sc->cbw.dCBWTag));

                        umass_bbb_reset(sc, STATUS_WIRE_FAILED);
                        return;

                /* CSW is valid here */
                } else if (sc->csw.bCSWStatus > CSWSTATUS_PHASE) {
                        printf("%s: Invalid CSW: status %d > %d\n",
                                device_xname(sc->sc_dev),
                                sc->csw.bCSWStatus,
                                CSWSTATUS_PHASE);

                        umass_bbb_reset(sc, STATUS_WIRE_FAILED);
                        return;
                } else if (sc->csw.bCSWStatus == CSWSTATUS_PHASE) {
                        printf("%s: Phase Error, residue = %d\n",
                                device_xname(sc->sc_dev), residue);

                        umass_bbb_reset(sc, STATUS_WIRE_FAILED);
                        return;

                } else if (sc->transfer_actlen > sc->transfer_datalen) {
                        /* Buffer overrun! Don't let this go by unnoticed */
                        panic("%s: transferred %s %d bytes instead of %d bytes",
                            device_xname(sc->sc_dev),
                            sc->transfer_dir == DIR_IN ? "IN" : "OUT",
                            sc->transfer_actlen, sc->transfer_datalen);
#if 0
                } else if (sc->transfer_datalen - sc->transfer_actlen
                           != residue) {
                        DPRINTFM(UDMASS_BBB, "sc %#jx: actlen=%jd != "
                            "residue=%jd\n", (uintptr_t)sc,
                            sc->transfer_datalen - sc->transfer_actlen,
                            residue, 0);

                        umass_bbb_reset(sc, STATUS_WIRE_FAILED);
                        return;
#endif
                } else if (sc->csw.bCSWStatus == CSWSTATUS_FAILED) {
                        DPRINTFM(UDMASS_BBB, "sc %#jx: Command Failed, "
                            "res = %jd", (uintptr_t)sc, residue, 0, 0);

                        /* SCSI command failed but transfer was successful */
                        umass_transfer_done(sc, residue, STATUS_CMD_FAILED);
                        return;

                } else {        /* success */
                        umass_transfer_done(sc, residue, STATUS_CMD_OK);
                        return;
                }

        /***** Bulk Reset *****/
        case TSTATE_BBB_RESET1:
                if (err)
                        printf("%s: BBB reset failed, %s\n",
                                device_xname(sc->sc_dev), usbd_errstr(err));

                sc->transfer_state = TSTATE_BBB_RESET2;
                umass_clear_endpoint_stall(sc, UMASS_BULKIN,
                        sc->transfer_xfer[XFER_BBB_RESET2]);

                return;
        case TSTATE_BBB_RESET2:
                if (err)        /* should not occur */
                        printf("%s: BBB bulk-in clear stall failed, %s\n",
                               device_xname(sc->sc_dev), usbd_errstr(err));
                        /* no error recovery, otherwise we end up in a loop */

                sc->transfer_state = TSTATE_BBB_RESET3;
                umass_clear_endpoint_stall(sc, UMASS_BULKOUT,
                        sc->transfer_xfer[XFER_BBB_RESET3]);

                return;
        case TSTATE_BBB_RESET3:
                if (err)        /* should not occur */
                        printf("%s: BBB bulk-out clear stall failed, %s\n",
                               device_xname(sc->sc_dev), usbd_errstr(err));
                        /* no error recovery, otherwise we end up in a loop */

                umass_transfer_reset(sc);

                return;

        /***** Default *****/
        default:
                panic("%s: Unknown state %d",
                      device_xname(sc->sc_dev), sc->transfer_state);
        }
}

/*
 * Command/Bulk/Interrupt (CBI) specific functions
 */

Static int
umass_cbi_adsc(struct umass_softc *sc, char *buffer, int buflen, int flags,
               struct usbd_xfer *xfer)
{
        KASSERTMSG(sc->sc_wire & (UMASS_WPROTO_CBI|UMASS_WPROTO_CBI_I),
                   "sc->sc_wire == 0x%02x wrong for umass_cbi_adsc\n",
                   sc->sc_wire);

        if ((sc->sc_cmd == UMASS_CPROTO_RBC) &&
            (sc->sc_quirks & UMASS_QUIRK_RBC_PAD_TO_12) != 0 && buflen < 12) {
                (void)memset(buffer + buflen, 0, 12 - buflen);
                buflen = 12;
        }

        sc->sc_req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
        sc->sc_req.bRequest = UR_CBI_ADSC;
        USETW(sc->sc_req.wValue, 0);
        USETW(sc->sc_req.wIndex, sc->sc_ifaceno);
        USETW(sc->sc_req.wLength, buflen);
        return umass_setup_ctrl_transfer(sc, &sc->sc_req, buffer,
                                         buflen, flags, xfer);
}


Static void
umass_cbi_reset(struct umass_softc *sc, int status)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        SDT_PROBE2(usb, umass, bbb, reset,  sc, status);
        int i;
#        define SEND_DIAGNOSTIC_CMDLEN        12

        KASSERTMSG(sc->sc_wire & (UMASS_WPROTO_CBI|UMASS_WPROTO_CBI_I),
                   "sc->sc_wire == 0x%02x wrong for umass_cbi_reset\n",
                   sc->sc_wire);

        if (sc->sc_dying) {
                umass_transfer_done(sc, sc->transfer_datalen, status);
                return;
        }

        /*
         * Command Block Reset Protocol
         *
         * First send a reset request to the device. Then clear
         * any possibly stalled bulk endpoints.

         * This is done in 3 steps, states:
         * TSTATE_CBI_RESET1
         * TSTATE_CBI_RESET2
         * TSTATE_CBI_RESET3
         *
         * If the reset doesn't succeed, the device should be port reset.
         */

        DPRINTFM(UDMASS_CBI, "sc %#jx: CBI Reset", (uintptr_t)sc, 0, 0, 0);

        /* CTASSERT */
        KASSERTMSG(sizeof(sc->cbl) >= SEND_DIAGNOSTIC_CMDLEN,
                   "%s: CBL struct is too small (%zu < %u)\n",
                        device_xname(sc->sc_dev),
                        sizeof(sc->cbl), SEND_DIAGNOSTIC_CMDLEN);

        sc->transfer_state = TSTATE_CBI_RESET1;
        sc->transfer_status = status;

        /* The 0x1d code is the SEND DIAGNOSTIC command. To distingiush between
         * the two the last 10 bytes of the cbl is filled with 0xff (section
         * 2.2 of the CBI spec).
         */
        sc->cbl[0] = 0x1d;        /* Command Block Reset */
        sc->cbl[1] = 0x04;
        for (i = 2; i < SEND_DIAGNOSTIC_CMDLEN; i++)
                sc->cbl[i] = 0xff;

        if (umass_cbi_adsc(sc, sc->cbl, SEND_DIAGNOSTIC_CMDLEN, 0,
                sc->transfer_xfer[XFER_CBI_RESET1]))
                umass_transfer_done(sc, sc->transfer_datalen, status);
        /* XXX if the command fails we should reset the port on the bub */
}

Static void
umass_cbi_transfer(struct umass_softc *sc, int lun,
                   void *cmd, int cmdlen, void *data, int datalen, int dir,
                   u_int timeout, int flags, umass_callback cb, void *priv)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        SDT_PROBE7(usb, umass, transfer, start__cbi,
            sc, cb, priv, data, datalen, dir, timeout);

        DPRINTFM(UDMASS_CBI, "sc %#jx: cmd=0x%02jx, len=%jd",
             (uintptr_t)sc, *(u_char *)cmd, datalen, 0);

        KASSERT(cb);
        KASSERTMSG(sc->sc_wire & (UMASS_WPROTO_CBI|UMASS_WPROTO_CBI_I),
                   "sc->sc_wire == 0x%02x wrong for umass_cbi_transfer\n",
                   sc->sc_wire);

        if (sc->sc_dying) {
                SDT_PROBE7(usb, umass, transfer, done,
                    sc, cb, priv, data, datalen, datalen, STATUS_WIRE_FAILED);
                cb(sc, priv, datalen, STATUS_WIRE_FAILED);
                return;
        }

        /* Be a little generous. */
        sc->timeout = timeout + USBD_DEFAULT_TIMEOUT;

        /*
         * Do a CBI transfer with cmdlen bytes from cmd, possibly
         * a data phase of datalen bytes from/to the device and finally a
         * csw read phase.
         * If the data direction was inbound a maximum of datalen bytes
         * is stored in the buffer pointed to by data.
         *
         * umass_cbi_transfer initialises the transfer and lets the state
         * machine in umass_cbi_state handle the completion. It uses the
         * following states:
         * TSTATE_CBI_COMMAND
         *   -> XXX fill in
         *
         * An error in any of those states will invoke
         * umass_cbi_reset.
         */

        /* check the given arguments */
        KASSERTMSG(datalen == 0 || data != NULL,
                   "%s: datalen > 0, but no buffer",device_xname(sc->sc_dev));
        KASSERTMSG(datalen == 0 || dir != DIR_NONE,
                   "%s: direction is NONE while datalen is not zero\n",
                        device_xname(sc->sc_dev));

        /* store the details for the data transfer phase */
        sc->transfer_dir = dir;
        sc->transfer_data = data;
        sc->transfer_datalen = datalen;
        sc->transfer_actlen = 0;
        sc->transfer_cb = cb;
        sc->transfer_priv = priv;
        sc->transfer_status = STATUS_CMD_OK;

        /* move from idle to the command state */
        sc->transfer_state = TSTATE_CBI_COMMAND;

        /* Send the Command Block from host to device via control endpoint. */
        if (umass_cbi_adsc(sc, cmd, cmdlen, flags,
            sc->transfer_xfer[XFER_CBI_CB]))
                umass_cbi_reset(sc, STATUS_WIRE_FAILED);
}

Static void
umass_cbi_state(struct usbd_xfer *xfer, void *priv,
                usbd_status err)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        struct umass_softc *sc = (struct umass_softc *) priv;

        SDT_PROBE3(usb, umass, bbb, state,  sc, xfer, err);

        KASSERTMSG(sc->sc_wire & (UMASS_WPROTO_CBI|UMASS_WPROTO_CBI_I),
                   "sc->sc_wire == 0x%02x wrong for umass_cbi_state\n",
                   sc->sc_wire);

        if (err == USBD_CANCELLED) {
                DPRINTFM(UDMASS_BBB, "sc %#jx xfer %#jx cancelled",
                        (uintptr_t)sc, (uintptr_t)xfer, 0, 0);
                umass_transfer_done(sc, 0, STATUS_TIMEOUT);
                return;
        }

        if (sc->sc_dying) {
                umass_transfer_done(sc, sc->transfer_datalen,
                    STATUS_WIRE_FAILED);
                return;
        }

        /*
         * State handling for CBI transfers.
         */

        DPRINTFM(UDMASS_CBI, "sc %#jx: Handling CBI state %jd, xfer=%#jx, ...",
            (uintptr_t)sc, sc->transfer_state, (uintptr_t)xfer, 0);
        DPRINTFM(UDMASS_CBI, "... err %jd", err, 0, 0, 0);

        switch (sc->transfer_state) {

        /***** CBI Transfer *****/
        case TSTATE_CBI_COMMAND:
                if (err == USBD_STALLED) {
                        DPRINTFM(UDMASS_CBI, "sc %#jx: Command Transport "
                            "failed", (uintptr_t)sc, 0, 0, 0);
                        /* Status transport by control pipe (section 2.3.2.1).
                         * The command contained in the command block failed.
                         *
                         * The control pipe has already been unstalled by the
                         * USB stack.
                         * Section 2.4.3.1.1 states that the bulk in endpoints
                         * should not stalled at this point.
                         */
                        umass_transfer_done(sc, sc->transfer_datalen,
                            STATUS_CMD_FAILED);
                        return;
                } else if (err) {
                        DPRINTFM(UDMASS_CBI, "sc %#jx: failed to send ADSC",
                            (uintptr_t)sc, 0, 0, 0);
                        umass_cbi_reset(sc, STATUS_WIRE_FAILED);
                        return;
                }

                /* Data transport phase, setup transfer */
                sc->transfer_state = TSTATE_CBI_DATA;
                if (sc->transfer_dir == DIR_IN) {
                        if (umass_setup_transfer(sc, sc->sc_pipe[UMASS_BULKIN],
                            sc->datain_buffer, sc->transfer_datalen,
                            USBD_SHORT_XFER_OK,
                            sc->transfer_xfer[XFER_CBI_DATAIN]))
                                umass_cbi_reset(sc, STATUS_WIRE_FAILED);

                        return;
                } else if (sc->transfer_dir == DIR_OUT) {
                        memcpy(sc->dataout_buffer, sc->transfer_data,
                               sc->transfer_datalen);
                        if (umass_setup_transfer(sc, sc->sc_pipe[UMASS_BULKOUT],
                            sc->dataout_buffer, sc->transfer_datalen,
                            0, /* fixed length transfer */
                            sc->transfer_xfer[XFER_CBI_DATAOUT]))
                                umass_cbi_reset(sc, STATUS_WIRE_FAILED);

                        return;
                } else {
                        DPRINTFM(UDMASS_CBI, "sc %#jx: no data phase",
                            (uintptr_t)sc, 0, 0, 0);
                }

                /* if no data phase, err == 0 */
                /* FALLTHROUGH */
        case TSTATE_CBI_DATA:
                /* Command transport phase error handling (ignored if no data
                 * phase (fallthrough from previous state)) */
                if (sc->transfer_dir != DIR_NONE) {
                        /* retrieve the length of the transfer that was done */
                        usbd_get_xfer_status(xfer, NULL, NULL,
                            &sc->transfer_actlen, NULL);
                        DPRINTFM(UDMASS_CBI, "sc %#jx: CBI_DATA actlen=%jd",
                                (uintptr_t)sc, sc->transfer_actlen, 0, 0);

                        if (err) {
                                DPRINTFM(UDMASS_CBI, "sc %#jx: Data dir %jd "
                                    "err %jd failed",
                                    (uintptr_t)sc, sc->transfer_dir,
                                    sc->transfer_datalen, err);

                                if (err == USBD_STALLED) {
                                        sc->transfer_state = TSTATE_CBI_DCLEAR;
                                        umass_clear_endpoint_stall(sc,
                                          (sc->transfer_dir == DIR_IN?
                                            UMASS_BULKIN:UMASS_BULKOUT),
                                        sc->transfer_xfer[XFER_CBI_DCLEAR]);
                                } else {
                                        /* Unless the error is a pipe stall the
                                         * error is fatal.
                                         */
                                        umass_cbi_reset(sc, STATUS_WIRE_FAILED);
                                }
                                return;
                        }
                }

                if (sc->transfer_dir == DIR_IN)
                        memcpy(sc->transfer_data, sc->datain_buffer,
                               sc->transfer_actlen);

                DIF(UDMASS_CBI, if (sc->transfer_dir == DIR_IN)
                                        umass_dump_buffer(sc, sc->transfer_data,
                                                sc->transfer_actlen, 48));

                /* Status phase */
                if (sc->sc_wire == UMASS_WPROTO_CBI_I) {
                        sc->transfer_state = TSTATE_CBI_STATUS;
                        memset(&sc->sbl, 0, sizeof(sc->sbl));
                        if (umass_setup_transfer(sc, sc->sc_pipe[UMASS_INTRIN],
                                    &sc->sbl, sizeof(sc->sbl),
                                    0,        /* fixed length transfer */
                                    sc->transfer_xfer[XFER_CBI_STATUS]))
                                umass_cbi_reset(sc, STATUS_WIRE_FAILED);
                } else {
                        /* No command completion interrupt. Request
                         * sense to get status of command.
                         */
                        umass_transfer_done(sc,
                            sc->transfer_datalen - sc->transfer_actlen,
                            STATUS_CMD_UNKNOWN);
                }
                return;

        case TSTATE_CBI_STATUS:
                if (err) {
                        DPRINTFM(UDMASS_CBI, "sc %#jx: Status Transport failed",
                            (uintptr_t)sc, 0, 0, 0);
                        /* Status transport by interrupt pipe (section 2.3.2.2).
                         */

                        if (err == USBD_STALLED) {
                                sc->transfer_state = TSTATE_CBI_SCLEAR;
                                umass_clear_endpoint_stall(sc, UMASS_INTRIN,
                                        sc->transfer_xfer[XFER_CBI_SCLEAR]);
                        } else {
                                umass_cbi_reset(sc, STATUS_WIRE_FAILED);
                        }
                        return;
                }

                /* Dissect the information in the buffer */

                {
                        uint32_t actlen;
                        usbd_get_xfer_status(xfer,NULL,NULL,&actlen,NULL);
                        DPRINTFM(UDMASS_CBI, "sc %#jx: CBI_STATUS actlen=%jd",
                            (uintptr_t)sc, actlen, 0, 0);
                        if (actlen != 2)
                                break;
                }

                if (sc->sc_cmd == UMASS_CPROTO_UFI) {
                        int status;

                        /* Section 3.4.3.1.3 specifies that the UFI command
                         * protocol returns an ASC and ASCQ in the interrupt
                         * data block.
                         */

                        DPRINTFM(UDMASS_CBI, "sc %#jx: UFI CCI, ASC = 0x%02jx, "
                            "ASCQ = 0x%02jx", (uintptr_t)sc, sc->sbl.ufi.asc,
                            sc->sbl.ufi.ascq, 0);

                        if ((sc->sbl.ufi.asc == 0 && sc->sbl.ufi.ascq == 0) ||
                            sc->sc_sense)
                                status = STATUS_CMD_OK;
                        else
                                status = STATUS_CMD_FAILED;

                        /* No autosense, command successful */
                        umass_transfer_done(sc,
                            sc->transfer_datalen - sc->transfer_actlen,
                            status);
                } else {
                        int status;

                        /* Command Interrupt Data Block */

                        DPRINTFM(UDMASS_CBI, "sc %#jx: type=0x%02jx, "
                            "value=0x%02jx", (uintptr_t)sc,
                            sc->sbl.common.type, sc->sbl.common.value, 0);

                        if (sc->sbl.common.type == IDB_TYPE_CCI) {
                                switch (sc->sbl.common.value & IDB_VALUE_STATUS_MASK) {
                                case IDB_VALUE_PASS:
                                        status = STATUS_CMD_OK;
                                        break;
                                case IDB_VALUE_FAIL:
                                case IDB_VALUE_PERSISTENT:
                                        status = STATUS_CMD_FAILED;
                                        break;
                                case IDB_VALUE_PHASE:
                                default: /* XXX: gcc */
                                        status = STATUS_WIRE_FAILED;
                                        break;
                                }

                                umass_transfer_done(sc,
                                    sc->transfer_datalen - sc->transfer_actlen,
                                    status);
                        } else {
                                /* XXX What to do?  */
                                umass_transfer_done(sc, sc->transfer_datalen,
                                    STATUS_WIRE_FAILED);
                        }
                }
                return;

        case TSTATE_CBI_DCLEAR:
                if (err) {        /* should not occur */
                        printf("%s: CBI bulk-%s stall clear failed, %s\n",
                            device_xname(sc->sc_dev),
                            (sc->transfer_dir == DIR_IN? "in":"out"),
                            usbd_errstr(err));
                        umass_cbi_reset(sc, STATUS_WIRE_FAILED);
                } else {
                        umass_transfer_done(sc,
                            sc->transfer_datalen, STATUS_CMD_FAILED);
                }
                return;

        case TSTATE_CBI_SCLEAR:
                if (err) {        /* should not occur */
                        printf("%s: CBI intr-in stall clear failed, %s\n",
                               device_xname(sc->sc_dev), usbd_errstr(err));
                        umass_cbi_reset(sc, STATUS_WIRE_FAILED);
                } else {
                        umass_transfer_done(sc,
                            sc->transfer_datalen, STATUS_CMD_FAILED);
                }
                return;

        /***** CBI Reset *****/
        case TSTATE_CBI_RESET1:
                if (err)
                        printf("%s: CBI reset failed, %s\n",
                                device_xname(sc->sc_dev), usbd_errstr(err));

                sc->transfer_state = TSTATE_CBI_RESET2;
                umass_clear_endpoint_stall(sc, UMASS_BULKIN,
                        sc->transfer_xfer[XFER_CBI_RESET2]);

                return;
        case TSTATE_CBI_RESET2:
                if (err)        /* should not occur */
                        printf("%s: CBI bulk-in stall clear failed, %s\n",
                               device_xname(sc->sc_dev), usbd_errstr(err));
                        /* no error recovery, otherwise we end up in a loop */

                sc->transfer_state = TSTATE_CBI_RESET3;
                umass_clear_endpoint_stall(sc, UMASS_BULKOUT,
                        sc->transfer_xfer[XFER_CBI_RESET3]);

                return;
        case TSTATE_CBI_RESET3:
                if (err)        /* should not occur */
                        printf("%s: CBI bulk-out stall clear failed, %s\n",
                               device_xname(sc->sc_dev), usbd_errstr(err));
                        /* no error recovery, otherwise we end up in a loop */

                umass_transfer_reset(sc);
                return;


        /***** Default *****/
        default:
                panic("%s: Unknown state %d",
                      device_xname(sc->sc_dev), sc->transfer_state);
        }
}

static usbd_status
umass_bbb_get_max_lun(struct umass_softc *sc, uint8_t *maxlun)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        usb_device_request_t req;
        usbd_status err;

        *maxlun = 0;                /* Default to 0. */

        DPRINTFM(UDMASS_BBB, "sc %#jx: Get Max Lun", (uintptr_t)sc, 0, 0, 0);

        /* The Get Max Lun command is a class-specific request. */
        req.bmRequestType = UT_READ_CLASS_INTERFACE;
        req.bRequest = UR_BBB_GET_MAX_LUN;
        USETW(req.wValue, 0);
        USETW(req.wIndex, sc->sc_ifaceno);
        USETW(req.wLength, 1);

        err = usbd_do_request_flags(sc->sc_udev, &req, maxlun,
            USBD_SHORT_XFER_OK, 0, USBD_DEFAULT_TIMEOUT);
        switch (err) {
        case USBD_NORMAL_COMPLETION:
                DPRINTFM(UDMASS_BBB, "sc %#jx: Max Lun %jd",
                    (uintptr_t)sc, *maxlun , 0, 0);
                break;

        case USBD_STALLED:
                /*
                 * Device doesn't support Get Max Lun request.
                 */
                err = USBD_NORMAL_COMPLETION;
                DPRINTFM(UDMASS_BBB, "sc %#jx: Get Max Lun not supported",
                    (uintptr_t)sc, 0, 0, 0);
                break;

        case USBD_SHORT_XFER:
                /*
                 * XXX This must mean Get Max Lun is not supported, too!
                 */
                err = USBD_NORMAL_COMPLETION;
                DPRINTFM(UDMASS_BBB, "sc %#jx: Get Max Lun SHORT_XFER",
                    (uintptr_t)sc, 0, 0, 0);
                break;

        default:
                printf("%s: Get Max Lun failed: %s\n",
                    device_xname(sc->sc_dev), usbd_errstr(err));
                /* XXX Should we port_reset the device? */
                break;
        }

        return err;
}




#ifdef UMASS_DEBUG
Static void
umass_bbb_dump_cbw(struct umass_softc *sc, umass_bbb_cbw_t *cbw)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        int clen = cbw->bCDBLength;
        int dlen = UGETDW(cbw->dCBWDataTransferLength);
        uint8_t *c = cbw->CBWCDB;
        int tag = UGETDW(cbw->dCBWTag);
        int flags = cbw->bCBWFlags;

        DPRINTFM(UDMASS_BBB, "sc %#jx: CBW %jd: cmdlen=%jd",
            (uintptr_t)sc, tag, clen, 0);
        DPRINTFM(UDMASS_BBB, "  0x%02jx%02jx%02jx%02jx...",
            c[0], c[1], c[2], c[3]);
        DPRINTFM(UDMASS_BBB, "  0x%02jx%02jx%02jx%02jx...",
            c[4], c[5], c[6], c[7]);
        DPRINTFM(UDMASS_BBB, "  0x%02jx%02jx...", c[8], c[9], 0, 0);
        DPRINTFM(UDMASS_BBB, "  data = %jd bytes, flags = %jx", dlen, flags, 0,
            0);
}

Static void
umass_bbb_dump_csw(struct umass_softc *sc, umass_bbb_csw_t *csw)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        int sig = UGETDW(csw->dCSWSignature);
        int tag = UGETDW(csw->dCSWTag);
        int res = UGETDW(csw->dCSWDataResidue);
        int status = csw->bCSWStatus;

        DPRINTFM(UDMASS_BBB, "sc %#jx: CSW %jd: sig = 0x%08jx, tag = %jd",
            (uintptr_t)sc, (uintptr_t)csw, sig, tag);
        DPRINTFM(UDMASS_BBB, "  res = %jd, status = 0x%02jx",
            res, status, 0, 0);
}

Static void
umass_dump_buffer(struct umass_softc *sc, uint8_t *buffer, int buflen,
                  int printlen)
{
        UMASSHIST_FUNC(); UMASSHIST_CALLED();
        int i;

        DPRINTFM(UDMASS_GEN, "sc %#jx: buffer %#jx", (uintptr_t)sc,
            (uintptr_t)buffer, 0, 0);
        for (i = 0; i < buflen && i < printlen;) {
                if (i + 3 < buflen && i + 3 < printlen) {
                        DPRINTFM(UDMASS_GEN, "   0x%02jx%02jx%02jx%02jx",
                            buffer[i], buffer[i + 1],
                            buffer[i + 2], buffer[i + 3]);
                        i += 4;
                } else if (i + 2 < buflen && i + 2 < printlen) {
                        DPRINTFM(UDMASS_GEN, "   0x%02jx%02jx%02jx",
                            buffer[i], buffer[i + 1], buffer[i + 2], 0);
                        i += 3;
                } else if (i + 1 < buflen && i + 2 < printlen) {
                        DPRINTFM(UDMASS_GEN, "   0x%02jx%02jx",
                            buffer[i], buffer[i + 1], 0, 0);
                        i += 2;
                } else {
                        DPRINTFM(UDMASS_GEN, "   0x%02jx", buffer[i], 0, 0, 0);
                        i += 1;
                }
        }
}
#endif























































































































































































































































































































































































































































































































































 1522 






   74 










 1455 
 1452 






  129 
















 1376 


 1415 

 1417 










 1060 


















 1450 
 1451 











 1447 

  906 






 1058 

 1081 
 1059 

 1033 






  164 


  305 
  311 
  145 









   34 
   21 
   34 
   28 










   45 






 1672 











 1010 

 1014 


 1014 































 1342 
  635 















  106 




































 2263 


 1436 
   82 





 1445 










 1427 
  225 

 1430 











 1443 

 1444 


























 2269 


 2273 
 1230 







 1442 







 1440 
 1442 

 1441 

    4 




 1441 






























































































 1740 










 1739 



 1738 
































  619 

  618 













  620 










  620 





























































  206 


  207 

  207 
  168 
  168 


  207 

  207 


  205 


  205 






  205 







































































































































































































































































































































































































































































































































































































































































































































































































































































































































   77 





   77 




   77 


   77 










   49 
   49 











 1226 

 1225 
   77 


   77 





   77 



 1227 
 1224 

 1224 
 1227 




  181 
  181 
  181 

 1214 











  130 


  130 
  130 

  130 




   69 
   69 
   80 

   49 
   49 











 1492 

 1493 
   49 

   49 
   49 














 1443 

 1442 

 1444 



 1445 






























 1140 

  400 


 1140 
















  135 
  135 











  135 
  132 













   13 
   13 

   13 


  134 





















 1366 
 1367 
 1369 
 1340 
 1369 







 1370 
  141 
   77 









 1356 
  389 
  317 








 1352 
 1227 



 1353 





  818 


 1024 









 1024 




 1024 



 1352 

















  275 

  292 
  292 
  292 
  275 
   27 

  292 


  217 

  216 
  216 




   90 
    4 

   90 
   90 
   90 
   90 
   90 


   90 
   90 






  292 
  291 









 1420 





 1422 
  901 

  901 


 1380 


 1197 




 1200 

 1380 
 1418 








  107 

  107 


  104 
  107 










  107 











  107 
  107 
  107 



  107 
















  107 
    2 
  107 








  107 































  451 


  450 
  451 
  450 

  451 



  447 

  449 

    1 


  450 



  449 
  449 





  449 

  450 























  450 
  450 





  450 
  450 



  450 



  450 





























  450 






































   71 


























































   71 









   71 
































































































   71 

   71 



























   71 

   71 













   87 
   87 






















   71 



   71 



   71 

































   87 
   87 












   89 
   89 





































  506 








   89 

   47 


   88 








   87 


   87 

















   87 








   87 
   87 


   87 



   87 







































































































































































































































   65 
























































































































































 2036 








 2036 
 2036 






 2038 
   39 



 2018 




























 1077 











 1079 


 1078 






   20 
























 1740 



 1456 


 1740 









 1740 


 1739 
 1740 

 1740 


  932 



















 1457 



















 1670 









 1669 
 1669 






 1671 










































 2056 











 2056 









  911 

  912 







  909 



 2042 





 2024 
 2022 






 2022 
 2023 














 2175 
 2179 


  178 


 2176 


 2170 
 2164 
 2167 
 2178 








 2113 























 2113 
 1233 



 2110 



 2114 
 2115 

 1818 




 2114 

 1231 

 2108 
 2114 












  275 


  274 
  275 















































  774 









  776 

































  183 









  184 

  184 



























   11 


   10 

























   45 





   11 










   45 

   44 

   11 







   45 


























  125 















  122 
  122 























  132 
  132 

  133 

  132 



  120 











  107 

  101 



   66 
  119 





  119 











  120 
  120 








  120 
  120 























  151 
   19 
   12 



    8 







   12 
   12 






    8 
    5 

  139 

  125 



  139 

   19 


  125 


  125 
  106 







  125 



   34 
   96 


  150 











  150 




  151 

  150 
























   45 
   21 

   45 

   45 




   45 


   34 
   34 




   45 

   45 
   45 
   45 






   45 





   45 









   44 
   26 

   44 








   44 
   44 
   44 

   44 
   44 

   45 

   45 

   20 

















    7 

   12 








  129 









  129 
  128 
  129 

  129 


   20 



   20 



   19 
   20 









   20 











   16 


    7 





   20 

   20 



   20 

   12 



   20 


   20 
    8 
   20 



    8 





   20 



   20 




   12 
   12 



   12 


   16 
   16 

   12 




















  129 




















































































   69 


   69 

    6 
   34 















   34 

   68 


















   84 







   17 
   17 
   17 
   17 
   85 

   68 






























































   72 











   72 



   72 
   70 

   72 








   72 



   71 


   71 




   71 


   70 
   70 

   70 






   70 



   70 


   70 
   70 



   70 




   71 

   71 











   29 













   29 


   29 


   29 

   29 



   29 
   29 









   29 
   29 



















 1369 



 1372 











 1371 














 1370 
 1370 

 1371 







 1370 
 1319 

   86 
 1372 
 1344 

 1372 
   71 
 1370 
 1368 

  690 
  690 








 1369 



 1365 


    1 








 1370 





 1344 


  451 









 1342 


   71 












 1367 























    1 







  450 



 1369 


  164 










 1369 
  381 






































 1368 




 1369 







 1343 




 1342 




 1370 
  382 

  382 



 1352 
 1350 





 1370 

  164 







  164 


 1306 
 1307 





 1353 

 1024 

 1074 


 1365 





  178 
  160 

 1369 

 1368 








































































































































































































































































































































































































































































































































































































































































 1827 








 1828 



  101 

  100 
  100 
  100 












  100 




  100 





















































































    1 





    1 








































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665
5666
5667
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158
6159
6160
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196
6197
6198
6199
6200
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257
6258
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273
6274
6275
6276
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327
6328
6329
6330
6331
6332
6333
6334
6335
6336
6337
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383
6384
6385
6386
6387
6388
6389
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445
6446
6447
6448
6449
6450
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
6625
6626
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674
6675
6676
6677
6678
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699
6700
6701
6702
6703
6704
6705
6706
6707
6708
6709
6710
6711
6712
6713
6714
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736
6737
6738
6739
6740
6741
6742
6743
6744
6745
6746
6747
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775
6776
6777
6778
/*        $NetBSD: pmap.c,v 1.420 2022/08/20 23:49:31 riastradh Exp $        */

/*
 * Copyright (c) 2008, 2010, 2016, 2017, 2019, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran, and by Maxime Villard.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2007 Manuel Bouyer.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

/*
 * Copyright 2001 (c) Wasabi Systems, Inc.
 * All rights reserved.
 *
 * Written by Frank van der Linden for Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed for the NetBSD Project by
 *      Wasabi Systems, Inc.
 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
 *    or promote products derived from this software without specific prior
 *    written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.420 2022/08/20 23:49:31 riastradh Exp $");

#include "opt_user_ldt.h"
#include "opt_lockdebug.h"
#include "opt_multiprocessor.h"
#include "opt_xen.h"
#include "opt_svs.h"
#include "opt_kaslr.h"

#define        __MUTEX_PRIVATE        /* for assertions */

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/pool.h>
#include <sys/kernel.h>
#include <sys/atomic.h>
#include <sys/cpu.h>
#include <sys/intr.h>
#include <sys/xcall.h>
#include <sys/kcore.h>
#include <sys/kmem.h>
#include <sys/asan.h>
#include <sys/msan.h>
#include <sys/entropy.h>

#include <uvm/uvm.h>
#include <uvm/pmap/pmap_pvt.h>

#include <dev/isa/isareg.h>

#include <machine/specialreg.h>
#include <machine/gdt.h>
#include <machine/isa_machdep.h>
#include <machine/cpuvar.h>
#include <machine/cputypes.h>
#include <machine/pmap_private.h>

#include <x86/bootspace.h>
#include <x86/pat.h>
#include <x86/pmap_pv.h>

#include <x86/i82489reg.h>
#include <x86/i82489var.h>

#ifdef XEN
#include <xen/include/public/xen.h>
#include <xen/hypervisor.h>
#include <xen/xenpmap.h>
#endif

#ifdef __HAVE_DIRECT_MAP
#include <crypto/nist_hash_drbg/nist_hash_drbg.h>
#endif

/*
 * general info:
 *
 *  - for an explanation of how the x86 MMU hardware works see
 *    the comments in <machine/pte.h>.
 *
 *  - for an explanation of the general memory structure used by
 *    this pmap (including the recursive mapping), see the comments
 *    in <machine/pmap.h>.
 *
 * this file contains the code for the "pmap module."   the module's
 * job is to manage the hardware's virtual to physical address mappings.
 * note that there are two levels of mapping in the VM system:
 *
 *  [1] the upper layer of the VM system uses vm_map's and vm_map_entry's
 *      to map ranges of virtual address space to objects/files.  for
 *      example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only
 *      to the file /bin/ls starting at offset zero."   note that
 *      the upper layer mapping is not concerned with how individual
 *      vm_pages are mapped.
 *
 *  [2] the lower layer of the VM system (the pmap) maintains the mappings
 *      from virtual addresses.   it is concerned with which vm_page is
 *      mapped where.   for example, when you run /bin/ls and start
 *      at page 0x1000 the fault routine may lookup the correct page
 *      of the /bin/ls file and then ask the pmap layer to establish
 *      a mapping for it.
 *
 * note that information in the lower layer of the VM system can be
 * thrown away since it can easily be reconstructed from the info
 * in the upper layer.
 *
 * data structures we use include:
 *
 *  - struct pmap: describes the address space of one thread
 *  - struct pmap_page: describes one pv-tracked page, without
 *    necessarily a corresponding vm_page
 *  - struct pv_entry: describes one <PMAP,VA> mapping of a PA
 *  - pmap_page::pp_pvlist: there is one list per pv-tracked page of
 *    physical memory.   the pp_pvlist points to a list of pv_entry
 *    structures which describe all the <PMAP,VA> pairs that this
 *    page is mapped in.    this is critical for page based operations
 *    such as pmap_page_protect() [change protection on _all_ mappings
 *    of a page]
 */

/*
 * Locking
 *
 * We have the following locks that we must deal with, listed in the order
 * that they are acquired:
 *
 * pg->uobject->vmobjlock, pg->uanon->an_lock
 *
 *        For managed pages, these per-object locks are taken by the VM system
 *        before calling into the pmap module - either a read or write hold.
 *        The lock hold prevent pages from changing identity while the pmap is
 *        operating on them.  For example, the same lock is held across a call
 *        to pmap_remove() and the following call to pmap_update(), so that a
 *        page does not gain a new identity while its TLB visibility is stale.
 *
 * pmap->pm_lock
 *
 *        This lock protects the fields in the pmap structure including the
 *        non-kernel PDEs in the PDP, the PTEs, and PTPs and connected data
 *        structures.  For modifying unmanaged kernel PTEs it is not needed as
 *        kernel PDEs are never freed, and the kernel is expected to be self
 *        consistent (and the lock can't be taken for unmanaged kernel PTEs,
 *        because they can be modified from interrupt context).
 *
 * pmaps_lock
 *
 *        This lock protects the list of active pmaps (headed by "pmaps").
 *        It's acquired when adding or removing pmaps or adjusting kernel PDEs.
 *
 * pp_lock
 *
 *        This per-page lock protects PV entry lists and the embedded PV entry
 *        in each vm_page, allowing for concurrent operation on pages by
 *        different pmaps.  This is a spin mutex at IPL_VM, because at the
 *        points it is taken context switching is usually not tolerable, and
 *        spin mutexes must block out interrupts that could take kernel_lock.
 */

/* uvm_object is abused here to index pmap_pages; make assertions happy. */
#ifdef DIAGNOSTIC
#define        PMAP_DUMMY_LOCK(pm)        rw_enter(&(pm)->pm_dummy_lock, RW_WRITER)
#define        PMAP_DUMMY_UNLOCK(pm)        rw_exit(&(pm)->pm_dummy_lock)
#else
#define        PMAP_DUMMY_LOCK(pm)
#define        PMAP_DUMMY_UNLOCK(pm)
#endif

static const struct uvm_pagerops pmap_pager = {
        /* nothing */
};

/*
 * pl_i(va, X) == plX_i(va) <= pl_i_roundup(va, X)
 */
#define pl_i(va, lvl) \
        (((VA_SIGN_POS(va)) & ptp_frames[(lvl)-1]) >> ptp_shifts[(lvl)-1])

#define        pl_i_roundup(va, lvl)        pl_i((va)+ ~ptp_frames[(lvl)-1], (lvl))

/*
 * PTP macros:
 *   a PTP's index is the PD index of the PDE that points to it
 *   a PTP's offset is the byte-offset in the PTE space that this PTP is at
 *   a PTP's VA is the first VA mapped by that PTP
 */

#define ptp_va2o(va, lvl)        (pl_i(va, (lvl)+1) * PAGE_SIZE)

const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER;
const vaddr_t ptp_frames[] = PTP_FRAME_INITIALIZER;
const int ptp_shifts[] = PTP_SHIFT_INITIALIZER;
const long nkptpmax[] = NKPTPMAX_INITIALIZER;
const long nbpd[] = NBPD_INITIALIZER;
#ifdef i386
pd_entry_t * const normal_pdes[] = PDES_INITIALIZER;
#else
pd_entry_t *normal_pdes[3];
#endif

long nkptp[] = NKPTP_INITIALIZER;

struct pmap_head pmaps;
kmutex_t pmaps_lock __cacheline_aligned;

struct pcpu_area *pcpuarea __read_mostly;

static vaddr_t pmap_maxkvaddr;

/*
 * Misc. event counters.
 */
struct evcnt pmap_iobmp_evcnt;
struct evcnt pmap_ldt_evcnt;

/*
 * PAT
 */
static bool cpu_pat_enabled __read_mostly = false;

/*
 * Global data structures
 */

static struct pmap kernel_pmap_store __cacheline_aligned; /* kernel's pmap */
struct pmap *const kernel_pmap_ptr = &kernel_pmap_store;
static rb_tree_t pmap_kernel_rb __cacheline_aligned;

struct bootspace bootspace __read_mostly;
struct slotspace slotspace __read_mostly;

/* Set to PTE_NX if supported. */
pd_entry_t pmap_pg_nx __read_mostly = 0;

/* Set to PTE_G if supported. */
pd_entry_t pmap_pg_g __read_mostly = 0;

/* Set to true if large pages are supported. */
int pmap_largepages __read_mostly = 0;

paddr_t lowmem_rsvd __read_mostly;
paddr_t avail_start __read_mostly; /* PA of first available physical page */
paddr_t avail_end __read_mostly; /* PA of last available physical page */

#ifdef XENPV
paddr_t pmap_pa_start; /* PA of first physical page for this domain */
paddr_t pmap_pa_end;   /* PA of last physical page for this domain */
#endif

#define        VM_PAGE_TO_PP(pg)        (&(pg)->mdpage.mp_pp)
#define        PMAP_CHECK_PP(pp) \
    KASSERTMSG((pp)->pp_lock.mtx_ipl._ipl == IPL_VM, "bad pmap_page %p", pp)

#define PAGE_ALIGNED(pp)        \
        __builtin_assume_aligned((void *)(pp), PAGE_SIZE)

/*
 * Other data structures
 */

static pt_entry_t protection_codes[8] __read_mostly;

static bool pmap_initialized __read_mostly = false; /* pmap_init done yet? */

/*
 * The following two vaddr_t's are used during system startup to keep track of
 * how much of the kernel's VM space we have used. Once the system is started,
 * the management of the remaining kernel VM space is turned over to the
 * kernel_map vm_map.
 */
static vaddr_t virtual_avail __read_mostly;        /* VA of first free KVA */
static vaddr_t virtual_end __read_mostly;        /* VA of last free KVA */

#ifndef XENPV
/*
 * LAPIC virtual address, and fake physical address.
 */
volatile vaddr_t local_apic_va __read_mostly;
paddr_t local_apic_pa __read_mostly;
#endif

/*
 * pool that pmap structures are allocated from
 */
struct pool_cache pmap_cache;
static int  pmap_ctor(void *, void *, int);
static void pmap_dtor(void *, void *);

/*
 * pv_page cache
 */
static struct pool_cache pmap_pvp_cache;

#ifdef __HAVE_DIRECT_MAP
vaddr_t pmap_direct_base __read_mostly;
vaddr_t pmap_direct_end __read_mostly;
#endif

#ifndef __HAVE_DIRECT_MAP
/*
 * Special VAs and the PTEs that map them
 */
static pt_entry_t *early_zero_pte;
static void pmap_vpage_cpualloc(struct cpu_info *);
#ifdef XENPV
char *early_zerop; /* also referenced from xen_locore() */
#else
static char *early_zerop;
#endif
#endif

int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int);

/* PDP pool and its callbacks */
static struct pool pmap_pdp_pool;
static void pmap_pdp_init(pd_entry_t *);
static void pmap_pdp_fini(pd_entry_t *);

#ifdef PAE
/* need to allocate items of 4 pages */
static void *pmap_pdp_alloc(struct pool *, int);
static void pmap_pdp_free(struct pool *, void *);
static struct pool_allocator pmap_pdp_allocator = {
        .pa_alloc = pmap_pdp_alloc,
        .pa_free = pmap_pdp_free,
        .pa_pagesz = PAGE_SIZE * PDP_SIZE,
};
#endif

extern vaddr_t idt_vaddr;
extern paddr_t idt_paddr;
extern vaddr_t gdt_vaddr;
extern paddr_t gdt_paddr;
extern vaddr_t ldt_vaddr;
extern paddr_t ldt_paddr;

#ifdef i386
/* stuff to fix the pentium f00f bug */
extern vaddr_t pentium_idt_vaddr;
#endif

/* Array of freshly allocated PTPs, for pmap_get_ptp(). */
struct pmap_ptparray {
        struct vm_page *pg[PTP_LEVELS + 1];
        bool alloced[PTP_LEVELS + 1];
};

/*
 * PV entries are allocated in page-sized chunks and cached per-pmap to
 * avoid intense pressure on memory allocators.
 */

struct pv_page {
        LIST_HEAD(, pv_entry)        pvp_pves;
        LIST_ENTRY(pv_page)        pvp_list;
        long                        pvp_nfree;
        struct pmap                *pvp_pmap;
};

#define        PVE_PER_PVP        ((PAGE_SIZE / sizeof(struct pv_entry)) - 1)

/*
 * PV tree prototypes
 */

static int        pmap_compare_key(void *, const void *, const void *);
static int        pmap_compare_nodes(void *, const void *, const void *);

/* Read-black tree */
static const rb_tree_ops_t pmap_rbtree_ops = {
        .rbto_compare_nodes = pmap_compare_nodes,
        .rbto_compare_key = pmap_compare_key,
        .rbto_node_offset = offsetof(struct pv_entry, pve_rb),
        .rbto_context = NULL
};

/*
 * Local prototypes
 */

#ifdef __HAVE_PCPU_AREA
static void pmap_init_pcpu(void);
#endif
#ifdef __HAVE_DIRECT_MAP
static void pmap_init_directmap(struct pmap *);
#endif
#if !defined(XENPV)
static void pmap_remap_global(void);
#endif
#ifndef XENPV
static void pmap_init_lapic(void);
static void pmap_remap_largepages(void);
#endif

static int pmap_get_ptp(struct pmap *, struct pmap_ptparray *, vaddr_t, int,
    struct vm_page **);
static void pmap_unget_ptp(struct pmap *, struct pmap_ptparray *);
static void pmap_install_ptp(struct pmap *, struct pmap_ptparray *, vaddr_t,
    pd_entry_t * const *);
static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, int);
static void pmap_freepage(struct pmap *, struct vm_page *, int);
static void pmap_free_ptp(struct pmap *, struct vm_page *, vaddr_t,
    pt_entry_t *, pd_entry_t * const *);
static bool pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,
    vaddr_t);
static void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, vaddr_t,
    vaddr_t);
static int pmap_pvp_ctor(void *, void *, int);
static void pmap_pvp_dtor(void *, void *);
static struct pv_entry *pmap_alloc_pv(struct pmap *);
static void pmap_free_pv(struct pmap *, struct pv_entry *);
static void pmap_drain_pv(struct pmap *);

static void pmap_alloc_level(struct pmap *, vaddr_t, long *);

static void pmap_load1(struct lwp *, struct pmap *, struct pmap *);
static void pmap_reactivate(struct pmap *);

long
pmap_resident_count(struct pmap *pmap)
{

        return pmap->pm_stats.resident_count;
}

long
pmap_wired_count(struct pmap *pmap)
{

        return pmap->pm_stats.wired_count;
}

/*
 * p m a p   h e l p e r   f u n c t i o n s
 */

static inline void
pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff)
{

        KASSERT(cold || mutex_owned(&pmap->pm_lock));
        pmap->pm_stats.resident_count += resid_diff;
        pmap->pm_stats.wired_count += wired_diff;
}

static inline void
pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte)
{
        int resid_diff = ((npte & PTE_P) ? 1 : 0) - ((opte & PTE_P) ? 1 : 0);
        int wired_diff = ((npte & PTE_WIRED) ? 1 : 0) - ((opte & PTE_WIRED) ? 1 : 0);

        KASSERT((npte & (PTE_P | PTE_WIRED)) != PTE_WIRED);
        KASSERT((opte & (PTE_P | PTE_WIRED)) != PTE_WIRED);

        pmap_stats_update(pmap, resid_diff, wired_diff);
}

/*
 * ptp_to_pmap: lookup pmap by ptp
 */
static inline struct pmap *
ptp_to_pmap(struct vm_page *ptp)
{
        struct pmap *pmap;

        if (ptp == NULL) {
                return pmap_kernel();
        }
        pmap = (struct pmap *)ptp->uobject;
        KASSERT(pmap != NULL);
        KASSERT(&pmap->pm_obj[0] == ptp->uobject);
        return pmap;
}

static inline struct pv_pte *
pve_to_pvpte(struct pv_entry *pve)
{

        if (pve == NULL)
                return NULL;
        KASSERT((void *)&pve->pve_pte == (void *)pve);
        return &pve->pve_pte;
}

static inline struct pv_entry *
pvpte_to_pve(struct pv_pte *pvpte)
{
        struct pv_entry *pve = (void *)pvpte;

        KASSERT(pve_to_pvpte(pve) == pvpte);
        return pve;
}

/*
 * Return true if the pmap page has an embedded PV entry.
 */
static inline bool
pv_pte_embedded(struct pmap_page *pp)
{

        KASSERT(mutex_owned(&pp->pp_lock));
        return (bool)((vaddr_t)pp->pp_pte.pte_ptp | pp->pp_pte.pte_va);
}

/*
 * pv_pte_first, pv_pte_next: PV list iterator.
 */
static inline struct pv_pte *
pv_pte_first(struct pmap_page *pp)
{

        KASSERT(mutex_owned(&pp->pp_lock));
        if (pv_pte_embedded(pp)) {
                return &pp->pp_pte;
        }
        return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist));
}

static inline struct pv_pte *
pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte)
{

        KASSERT(mutex_owned(&pp->pp_lock));
        KASSERT(pvpte != NULL);
        if (pvpte == &pp->pp_pte) {
                return pve_to_pvpte(LIST_FIRST(&pp->pp_pvlist));
        }
        return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list));
}

static inline uint8_t
pmap_pte_to_pp_attrs(pt_entry_t pte)
{
        uint8_t ret = 0;
        if (pte & PTE_D)
                ret |= PP_ATTRS_D;
        if (pte & PTE_A)
                ret |= PP_ATTRS_A;
        if (pte & PTE_W)
                ret |= PP_ATTRS_W;
        return ret;
}

static inline pt_entry_t
pmap_pp_attrs_to_pte(uint8_t attrs)
{
        pt_entry_t pte = 0;
        if (attrs & PP_ATTRS_D)
                pte |= PTE_D;
        if (attrs & PP_ATTRS_A)
                pte |= PTE_A;
        if (attrs & PP_ATTRS_W)
                pte |= PTE_W;
        return pte;
}

/*
 * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]?
 * of course the kernel is always loaded
 */
bool
pmap_is_curpmap(struct pmap *pmap)
{
        return ((pmap == pmap_kernel()) || (pmap == curcpu()->ci_pmap));
}

inline void
pmap_reference(struct pmap *pmap)
{

        atomic_inc_uint(&pmap->pm_obj[0].uo_refs);
}

/*
 * rbtree: compare two nodes.
 */
static int
pmap_compare_nodes(void *context, const void *n1, const void *n2)
{
        const struct pv_entry *pve1 = n1;
        const struct pv_entry *pve2 = n2;

        KASSERT(pve1->pve_pte.pte_ptp == pve2->pve_pte.pte_ptp);

        if (pve1->pve_pte.pte_va < pve2->pve_pte.pte_va) {
                return -1;
        }
        if (pve1->pve_pte.pte_va > pve2->pve_pte.pte_va) {
                return 1;
        }
        return 0;
}

/*
 * rbtree: compare a node and a key.
 */
static int
pmap_compare_key(void *context, const void *n, const void *k)
{
        const struct pv_entry *pve = n;
        const vaddr_t key = (vaddr_t)k;

        if (pve->pve_pte.pte_va < key) {
                return -1;
        }
        if (pve->pve_pte.pte_va > key) {
                return 1;
        }
        return 0;
}

/*
 * pmap_ptp_range_set: abuse ptp->uanon to record minimum VA of PTE
 */
static inline void
pmap_ptp_range_set(struct vm_page *ptp, vaddr_t va)
{
        vaddr_t *min = (vaddr_t *)&ptp->uanon;

        if (va < *min) {
                *min = va;
        }
}

/*
 * pmap_ptp_range_clip: abuse ptp->uanon to clip range of PTEs to remove
 */
static inline void
pmap_ptp_range_clip(struct vm_page *ptp, vaddr_t *startva, pt_entry_t **pte)
{
        vaddr_t sclip;

        if (ptp == NULL) {
                return;
        }

        sclip = (vaddr_t)ptp->uanon;
        sclip = (*startva < sclip ? sclip : *startva);
        *pte += (sclip - *startva) / PAGE_SIZE;
        *startva = sclip;
}

/*
 * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
 *
 * there are several pmaps involved.  some or all of them might be same.
 *
 *        - the pmap given by the first argument
 *                our caller wants to access this pmap's PTEs.
 *
 *        - pmap_kernel()
 *                the kernel pmap.  note that it only contains the kernel part
 *                of the address space which is shared by any pmap.  ie. any
 *                pmap can be used instead of pmap_kernel() for our purpose.
 *
 *        - ci->ci_pmap
 *                pmap currently loaded on the cpu.
 *
 *        - vm_map_pmap(&curproc->p_vmspace->vm_map)
 *                current process' pmap.
 *
 * => caller must lock pmap first (if not the kernel pmap)
 * => must be undone with pmap_unmap_ptes before returning
 * => disables kernel preemption
 */
void
pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2, pd_entry_t **ptepp,
    pd_entry_t * const **pdeppp)
{
        struct pmap *curpmap;
        struct cpu_info *ci;
        lwp_t *l;

        kpreempt_disable();

        /* The kernel's pmap is always accessible. */
        if (pmap == pmap_kernel()) {
                *pmap2 = NULL;
                *ptepp = PTE_BASE;
                *pdeppp = normal_pdes;
                return;
        }

        KASSERT(mutex_owned(&pmap->pm_lock));

        l = curlwp;
        ci = l->l_cpu;
        curpmap = ci->ci_pmap;
        if (pmap == curpmap) {
                /*
                 * Already on the CPU: make it valid.  This is very
                 * often the case during exit(), when we have switched
                 * to the kernel pmap in order to destroy a user pmap.
                 */
                if (__predict_false(ci->ci_tlbstate != TLBSTATE_VALID)) {
                        pmap_reactivate(pmap);
                }
                *pmap2 = NULL;
        } else {
                /*
                 * Toss current pmap from CPU and install new pmap, but keep
                 * a reference to the old one.  Dropping the reference can
                 * can block as it needs to take locks, so defer that to
                 * pmap_unmap_ptes().
                 */
                pmap_reference(pmap);
                pmap_load1(l, pmap, curpmap);
                *pmap2 = curpmap;
        }
        KASSERT(ci->ci_tlbstate == TLBSTATE_VALID);
#ifdef DIAGNOSTIC
        pmap->pm_ncsw = lwp_pctr();
#endif
        *ptepp = PTE_BASE;

#if defined(XENPV) && defined(__x86_64__)
        KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] == L4_BASE);
        ci->ci_normal_pdes[PTP_LEVELS - 2] = pmap->pm_pdir;
        *pdeppp = ci->ci_normal_pdes;
#else
        *pdeppp = normal_pdes;
#endif
}

/*
 * pmap_unmap_ptes: unlock the PTE mapping of "pmap"
 *
 * => we cannot tolerate context switches while mapped in: assert this.
 * => reenables kernel preemption.
 * => does not unlock pmap.
 */
void
pmap_unmap_ptes(struct pmap *pmap, struct pmap * pmap2)
{
        struct cpu_info *ci;
        struct pmap *mypmap;
        struct lwp *l;

        KASSERT(kpreempt_disabled());

        /* The kernel's pmap is always accessible. */
        if (pmap == pmap_kernel()) {
                kpreempt_enable();
                return;
        }

        l = curlwp;
        ci = l->l_cpu;

        KASSERT(mutex_owned(&pmap->pm_lock));
        KASSERT(pmap->pm_ncsw == lwp_pctr());

#if defined(XENPV) && defined(__x86_64__)
        KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] != L4_BASE);
        ci->ci_normal_pdes[PTP_LEVELS - 2] = L4_BASE;
#endif

        /* If not our own pmap, mark whatever's on the CPU now as lazy. */
        KASSERT(ci->ci_tlbstate == TLBSTATE_VALID);
        mypmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
        if (ci->ci_pmap == vm_map_pmap(&l->l_proc->p_vmspace->vm_map)) {
                ci->ci_want_pmapload = 0;
        } else {
                ci->ci_want_pmapload = (mypmap != pmap_kernel());
                ci->ci_tlbstate = TLBSTATE_LAZY;
        }

        /* Now safe to re-enable preemption. */
        kpreempt_enable();

        /* Toss reference to other pmap taken earlier. */
        if (pmap2 != NULL) {
                pmap_destroy(pmap2);
        }
}

inline static void
pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte)
{

#if !defined(__x86_64__)
        if (curproc == NULL || curproc->p_vmspace == NULL ||
            pm != vm_map_pmap(&curproc->p_vmspace->vm_map))
                return;

        if ((opte ^ npte) & PTE_X)
                pmap_update_pg(va);

        /*
         * Executability was removed on the last executable change.
         * Reset the code segment to something conservative and
         * let the trap handler deal with setting the right limit.
         * We can't do that because of locking constraints on the vm map.
         */

        if ((opte & PTE_X) && (npte & PTE_X) == 0 && va == pm->pm_hiexec) {
                struct trapframe *tf = curlwp->l_md.md_regs;

                tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
                pm->pm_hiexec = I386_MAX_EXE_ADDR;
        }
#endif /* !defined(__x86_64__) */
}

#if !defined(__x86_64__)
/*
 * Fixup the code segment to cover all potential executable mappings.
 * returns 0 if no changes to the code segment were made.
 */
int
pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb)
{
        struct vm_map_entry *ent;
        struct pmap *pm = vm_map_pmap(map);
        vaddr_t va = 0;

        vm_map_lock_read(map);
        for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) {
                /*
                 * This entry has greater va than the entries before.
                 * We need to make it point to the last page, not past it.
                 */
                if (ent->protection & VM_PROT_EXECUTE)
                        va = trunc_page(ent->end) - PAGE_SIZE;
        }
        vm_map_unlock_read(map);
        if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL))
                return 0;

        pm->pm_hiexec = va;
        if (pm->pm_hiexec > I386_MAX_EXE_ADDR) {
                tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
        } else {
                tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
                return 0;
        }
        return 1;
}
#endif /* !defined(__x86_64__) */

void
pat_init(struct cpu_info *ci)
{
#ifndef XENPV
        uint64_t pat;

        if (!(ci->ci_feat_val[0] & CPUID_PAT))
                return;

        /* We change WT to WC. Leave all other entries the default values. */
        pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) |
              PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) |
              PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) |
              PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC);

        wrmsr(MSR_CR_PAT, pat);
        cpu_pat_enabled = true;
#endif
}

static pt_entry_t
pmap_pat_flags(u_int flags)
{
        u_int cacheflags = (flags & PMAP_CACHE_MASK);

        if (!cpu_pat_enabled) {
                switch (cacheflags) {
                case PMAP_NOCACHE:
                case PMAP_NOCACHE_OVR:
                        /* results in PGC_UCMINUS on cpus which have
                         * the cpuid PAT but PAT "disabled"
                         */
                        return PTE_PCD;
                default:
                        return 0;
                }
        }

        switch (cacheflags) {
        case PMAP_NOCACHE:
                return PGC_UC;
        case PMAP_WRITE_COMBINE:
                return PGC_WC;
        case PMAP_WRITE_BACK:
                return PGC_WB;
        case PMAP_NOCACHE_OVR:
                return PGC_UCMINUS;
        }

        return 0;
}

/*
 * p m a p   k e n t e r   f u n c t i o n s
 *
 * functions to quickly enter/remove pages from the kernel address
 * space.   pmap_kremove is exported to MI kernel.  we make use of
 * the recursive PTE mappings.
 */

/*
 * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking
 *
 * => no need to lock anything, assume va is already allocated
 * => should be faster than normal pmap enter function
 */
void
pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
{
        pt_entry_t *pte, opte, npte;

        KASSERT(!(prot & ~VM_PROT_ALL));

        if (va < VM_MIN_KERNEL_ADDRESS)
                pte = vtopte(va);
        else
                pte = kvtopte(va);
#if defined(XENPV) && defined(DOM0OPS)
        if (pa < pmap_pa_start || pa >= pmap_pa_end) {
#ifdef DEBUG
                printf_nolog("%s: pa %#" PRIxPADDR " for va %#" PRIxVADDR
                    " outside range\n", __func__, pa, va);
#endif /* DEBUG */
                npte = pa;
        } else
#endif /* XENPV && DOM0OPS */
                npte = pmap_pa2pte(pa);
        npte |= protection_codes[prot] | PTE_P | pmap_pg_g;
        npte |= pmap_pat_flags(flags);
        opte = pmap_pte_testset(pte, npte); /* zap! */

        /*
         * XXX: make sure we are not dealing with a large page, since the only
         * large pages created are for the kernel image, and they should never
         * be kentered.
         */
        KASSERTMSG(!(opte & PTE_PS), "PTE_PS va=%#"PRIxVADDR, va);

        if ((opte & (PTE_P | PTE_A)) == (PTE_P | PTE_A)) {
                /* This should not happen. */
                printf_nolog("%s: mapping already present\n", __func__);
                kpreempt_disable();
                pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER);
                kpreempt_enable();
        }
}

__strict_weak_alias(pmap_kenter_ma, pmap_kenter_pa);

#if defined(__x86_64__)
/*
 * Change protection for a virtual address. Local for a CPU only, don't
 * care about TLB shootdowns.
 *
 * => must be called with preemption disabled
 */
void
pmap_changeprot_local(vaddr_t va, vm_prot_t prot)
{
        pt_entry_t *pte, opte, npte;

        KASSERT(kpreempt_disabled());

        if (va < VM_MIN_KERNEL_ADDRESS)
                pte = vtopte(va);
        else
                pte = kvtopte(va);

        npte = opte = *pte;

        if ((prot & VM_PROT_WRITE) != 0)
                npte |= PTE_W;
        else
                npte &= ~(PTE_W|PTE_D);

        if (opte != npte) {
                pmap_pte_set(pte, npte);
                pmap_pte_flush();
                invlpg(va);
        }
}
#endif /* defined(__x86_64__) */

/*
 * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking
 *
 * => no need to lock anything
 * => caller must dispose of any vm_page mapped in the va range
 * => note: not an inline function
 * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE
 * => we assume kernel only unmaps valid addresses and thus don't bother
 *    checking the valid bit before doing TLB flushing
 * => must be followed by call to pmap_update() before reuse of page
 */
static void
pmap_kremove1(vaddr_t sva, vsize_t len, bool localonly)
{
        pt_entry_t *pte, opte;
        vaddr_t va, eva;

        eva = sva + len;

        kpreempt_disable();
        for (va = sva; va < eva; va += PAGE_SIZE) {
                pte = kvtopte(va);
                opte = pmap_pte_testset(pte, 0); /* zap! */
                if ((opte & (PTE_P | PTE_A)) == (PTE_P | PTE_A) && !localonly) {
                        pmap_tlb_shootdown(pmap_kernel(), va, opte,
                            TLBSHOOT_KREMOVE);
                }
                KASSERTMSG((opte & PTE_PS) == 0,
                    "va %#" PRIxVADDR " is a large page", va);
                KASSERTMSG((opte & PTE_PVLIST) == 0,
                    "va %#" PRIxVADDR " is a pv tracked page", va);
        }
        if (localonly) {
                tlbflushg();
        }
        kpreempt_enable();
}

void
pmap_kremove(vaddr_t sva, vsize_t len)
{

        pmap_kremove1(sva, len, false);
}

/*
 * pmap_kremove_local: like pmap_kremove(), but only worry about
 * TLB invalidations on the current CPU.  this is only intended
 * for use while writing kernel crash dumps, either after panic
 * or via reboot -d.
 */
void
pmap_kremove_local(vaddr_t sva, vsize_t len)
{

        pmap_kremove1(sva, len, true);
}

/*
 * p m a p   i n i t   f u n c t i o n s
 *
 * pmap_bootstrap and pmap_init are called during system startup
 * to init the pmap module.   pmap_bootstrap() does a low level
 * init just to get things rolling.   pmap_init() finishes the job.
 */

/*
 * pmap_bootstrap_valloc: allocate a virtual address in the bootstrap area.
 * This function is to be used before any VM system has been set up.
 *
 * The va is taken from virtual_avail.
 */
static vaddr_t
pmap_bootstrap_valloc(size_t npages)
{
        vaddr_t va = virtual_avail;
        virtual_avail += npages * PAGE_SIZE;
        return va;
}

/*
 * pmap_bootstrap_palloc: allocate a physical address in the bootstrap area.
 * This function is to be used before any VM system has been set up.
 *
 * The pa is taken from avail_start.
 */
static paddr_t
pmap_bootstrap_palloc(size_t npages)
{
        paddr_t pa = avail_start;
        avail_start += npages * PAGE_SIZE;
        return pa;
}

/*
 * pmap_bootstrap: get the system in a state where it can run with VM properly
 * enabled (called before main()). The VM system is fully init'd later.
 *
 * => on i386, locore.S has already enabled the MMU by allocating a PDP for the
 *    kernel, and nkpde PTP's for the kernel.
 * => kva_start is the first free virtual address in kernel space.
 */
void
pmap_bootstrap(vaddr_t kva_start)
{
        struct pmap *kpm;
        int i;
        vaddr_t kva;

        pmap_pg_nx = (cpu_feature[2] & CPUID_NOX ? PTE_NX : 0);

        /*
         * Set up our local static global vars that keep track of the usage of
         * KVM before kernel_map is set up.
         */
        virtual_avail = kva_start;                /* first free KVA */
        virtual_end = VM_MAX_KERNEL_ADDRESS;        /* last KVA */

        /*
         * Set up protection_codes: we need to be able to convert from a MI
         * protection code (some combo of VM_PROT...) to something we can jam
         * into a x86 PTE.
         */
        protection_codes[VM_PROT_NONE] = pmap_pg_nx;
        protection_codes[VM_PROT_EXECUTE] = PTE_X;
        protection_codes[VM_PROT_READ] = pmap_pg_nx;
        protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PTE_X;
        protection_codes[VM_PROT_WRITE] = PTE_W | pmap_pg_nx;
        protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PTE_W | PTE_X;
        protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PTE_W | pmap_pg_nx;
        protection_codes[VM_PROT_ALL] = PTE_W | PTE_X;

        /*
         * Now we init the kernel's pmap.
         *
         * The kernel pmap's pm_obj is not used for much. However, in user pmaps
         * the pm_obj contains the list of active PTPs.
         */
        kpm = pmap_kernel();
        mutex_init(&kpm->pm_lock, MUTEX_DEFAULT, IPL_NONE);
        rw_init(&kpm->pm_dummy_lock);
        for (i = 0; i < PTP_LEVELS - 1; i++) {
                uvm_obj_init(&kpm->pm_obj[i], &pmap_pager, false, 1);
                uvm_obj_setlock(&kpm->pm_obj[i], &kpm->pm_dummy_lock);
                kpm->pm_ptphint[i] = NULL;
        }
        memset(&kpm->pm_list, 0, sizeof(kpm->pm_list));  /* pm_list not used */

        kpm->pm_pdir = (pd_entry_t *)bootspace.pdir;
        for (i = 0; i < PDP_SIZE; i++)
                kpm->pm_pdirpa[i] = PDPpaddr + PAGE_SIZE * i;

        kpm->pm_stats.wired_count = kpm->pm_stats.resident_count =
                x86_btop(kva_start - VM_MIN_KERNEL_ADDRESS);

        kcpuset_create(&kpm->pm_cpus, true);
        kcpuset_create(&kpm->pm_kernel_cpus, true);

        kpm->pm_ldt = NULL;
        kpm->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL);

        /*
         * the above is just a rough estimate and not critical to the proper
         * operation of the system.
         */

#if !defined(XENPV)
        /*
         * Begin to enable global TLB entries if they are supported: add PTE_G
         * attribute to already mapped kernel pages. Do that only if SVS is
         * disabled.
         *
         * The G bit has no effect until the CR4_PGE bit is set in CR4, which
         * happens later in cpu_init().
         */
#ifdef SVS
        if (!svs_enabled && (cpu_feature[0] & CPUID_PGE)) {
#else
        if (cpu_feature[0] & CPUID_PGE) {
#endif
                pmap_pg_g = PTE_G;
                pmap_remap_global();
        }
#endif

#ifndef XENPV
        /*
         * Enable large pages if they are supported.
         */
        if (cpu_feature[0] & CPUID_PSE) {
                lcr4(rcr4() | CR4_PSE);        /* enable hardware (via %cr4) */
                pmap_largepages = 1;        /* enable software */

                /*
                 * The TLB must be flushed after enabling large pages on Pentium
                 * CPUs, according to section 3.6.2.2 of "Intel Architecture
                 * Software Developer's Manual, Volume 3: System Programming".
                 */
                tlbflushg();

                /* Remap the kernel. */
                pmap_remap_largepages();
        }
        pmap_init_lapic();
#endif /* !XENPV */

#ifdef __HAVE_PCPU_AREA
        pmap_init_pcpu();
#endif

#ifdef __HAVE_DIRECT_MAP
        pmap_init_directmap(kpm);
#else
        pmap_vpage_cpualloc(&cpu_info_primary);

        if (VM_MIN_KERNEL_ADDRESS == KERNBASE) { /* i386 */
                early_zerop = (void *)cpu_info_primary.vpage[VPAGE_ZER];
                early_zero_pte = cpu_info_primary.vpage_pte[VPAGE_ZER];
        } else { /* amd64 */
                /*
                 * zero_pte is stuck at the end of mapped space for the kernel
                 * image (disjunct from kva space). This is done so that it
                 * can safely be used in pmap_growkernel (pmap_get_physpage),
                 * when it's called for the first time.
                 * XXXfvdl fix this for MULTIPROCESSOR later.
                 */
#ifdef XENPV
                /* early_zerop initialized in xen_locore() */
#else
                early_zerop = (void *)bootspace.spareva;
#endif
                early_zero_pte = PTE_BASE + pl1_i((vaddr_t)early_zerop);
        }
#endif

#if defined(XENPV) && defined(__x86_64__)
        extern vaddr_t xen_dummy_page;
        paddr_t xen_dummy_user_pgd;

        /*
         * We want a dummy page directory for Xen: when deactivating a pmap,
         * Xen will still consider it active. So we set user PGD to this one
         * to lift all protection on the now inactive page tables set.
         */
        xen_dummy_user_pgd = xen_dummy_page - KERNBASE;

        /* Zero fill it, the less checks in Xen it requires the better */
        memset(PAGE_ALIGNED(xen_dummy_user_pgd + KERNBASE), 0, PAGE_SIZE);
        /* Mark read-only */
        HYPERVISOR_update_va_mapping(xen_dummy_user_pgd + KERNBASE,
            pmap_pa2pte(xen_dummy_user_pgd) | PTE_P | pmap_pg_nx,
            UVMF_INVLPG);
        /* Pin as L4 */
        xpq_queue_pin_l4_table(xpmap_ptom_masked(xen_dummy_user_pgd));
#endif

        /*
         * Allocate space for the IDT, GDT and LDT.
         */
        idt_vaddr = pmap_bootstrap_valloc(1);
        idt_paddr = pmap_bootstrap_palloc(1);

        gdt_vaddr = pmap_bootstrap_valloc(1);
        gdt_paddr = pmap_bootstrap_palloc(1);

#ifdef __HAVE_PCPU_AREA
        ldt_vaddr = (vaddr_t)&pcpuarea->ldt;
#else
        ldt_vaddr = pmap_bootstrap_valloc(1);
#endif
        ldt_paddr = pmap_bootstrap_palloc(1);

#if !defined(__x86_64__)
        /* pentium f00f bug stuff */
        pentium_idt_vaddr = pmap_bootstrap_valloc(1);
#endif

#if defined(XENPVHVM)
        /* XXX: move to hypervisor.c with appropriate API adjustments */
        extern paddr_t HYPERVISOR_shared_info_pa;
        extern volatile struct xencons_interface *xencons_interface; /* XXX */
        extern struct xenstore_domain_interface *xenstore_interface; /* XXX */

        if (vm_guest != VM_GUEST_XENPVH) {
                HYPERVISOR_shared_info = (void *) pmap_bootstrap_valloc(1);
                HYPERVISOR_shared_info_pa = pmap_bootstrap_palloc(1);
        }
        xencons_interface = (void *) pmap_bootstrap_valloc(1);
        xenstore_interface = (void *) pmap_bootstrap_valloc(1);
#endif
        /*
         * Now we reserve some VM for mapping pages when doing a crash dump.
         */
        virtual_avail = reserve_dumppages(virtual_avail);

        /*
         * Init the global lock and global list.
         */
        mutex_init(&pmaps_lock, MUTEX_DEFAULT, IPL_NONE);
        LIST_INIT(&pmaps);

        /*
         * Ensure the TLB is sync'd with reality by flushing it...
         */
        tlbflushg();

        /*
         * Calculate pmap_maxkvaddr from nkptp[].
         */
        kva = VM_MIN_KERNEL_ADDRESS;
        for (i = PTP_LEVELS - 1; i >= 1; i--) {
                kva += nkptp[i] * nbpd[i];
        }
        pmap_maxkvaddr = kva;
}

#ifndef XENPV
static void
pmap_init_lapic(void)
{
        /*
         * On CPUs that have no LAPIC, local_apic_va is never kentered. But our
         * x86 implementation relies a lot on this address to be valid; so just
         * allocate a fake physical page that will be kentered into
         * local_apic_va by machdep.
         *
         * If the LAPIC is present, the va will be remapped somewhere else
         * later in lapic_map.
         */
        local_apic_va = pmap_bootstrap_valloc(1);
        local_apic_pa = pmap_bootstrap_palloc(1);
}
#endif

#ifdef __x86_64__
static size_t
pmap_pagetree_nentries_range(vaddr_t startva, vaddr_t endva, size_t pgsz)
{
        size_t npages;
        npages = (roundup(endva, pgsz) / pgsz) -
            (rounddown(startva, pgsz) / pgsz);
        return npages;
}
#endif

#if defined(__HAVE_DIRECT_MAP) || defined(KASAN) || defined(KMSAN)
static inline void
slotspace_copy(int type, pd_entry_t *dst, pd_entry_t *src)
{
        size_t sslot = slotspace.area[type].sslot;
        size_t nslot = slotspace.area[type].nslot;

        memcpy(&dst[sslot], &src[sslot], nslot * sizeof(pd_entry_t));
}
#endif

#ifdef __x86_64__
/*
 * Randomize the location of an area. We count the holes in the VM space. We
 * randomly select one hole, and then randomly select an area within that hole.
 * Finally we update the associated entry in the slotspace structure.
 */
vaddr_t
slotspace_rand(int type, size_t sz, size_t align, size_t randhole,
    vaddr_t randva)
{
        struct {
                int start;
                int end;
        } holes[SLSPACE_NAREAS+1];
        size_t i, nholes, hole;
        size_t startsl, endsl, nslots, winsize;
        vaddr_t startva, va;

        sz = roundup(sz, align);

        /*
         * Take one more slot with +NBPD_L4, because we may end up choosing
         * an area that crosses slots:
         *     +------+------+------+
         *     | Slot | Slot | Slot |
         *     +------+------+------+
         *        [Chosen Area]
         * And in that case we must take into account the additional slot
         * consumed.
         */
        nslots = roundup(sz+NBPD_L4, NBPD_L4) / NBPD_L4;

        /* Get the holes. */
        nholes = 0;
        size_t curslot = 0 + 256; /* end of SLAREA_USER */
        while (1) {
                /*
                 * Find the first occupied slot after the current one.
                 * The area between the two is a hole.
                 */
                size_t minsslot = 512;
                size_t minnslot = 0;
                for (i = 0; i < SLSPACE_NAREAS; i++) {
                        if (!slotspace.area[i].active)
                                continue;
                        if (slotspace.area[i].sslot >= curslot &&
                            slotspace.area[i].sslot < minsslot) {
                                minsslot = slotspace.area[i].sslot;
                                minnslot = slotspace.area[i].nslot;
                        }
                }

                /* No hole anymore, stop here. */
                if (minsslot == 512) {
                        break;
                }

                /* Register the hole. */
                if (minsslot - curslot >= nslots) {
                        holes[nholes].start = curslot;
                        holes[nholes].end = minsslot;
                        nholes++;
                }

                /* Skip that hole, and iterate again. */
                curslot = minsslot + minnslot;
        }

        if (nholes == 0) {
                panic("%s: impossible", __func__);
        }

        /* Select a hole. */
        hole = randhole;
#ifdef NO_X86_ASLR
        hole = 0;
#endif
        hole %= nholes;
        startsl = holes[hole].start;
        endsl = holes[hole].end;
        startva = VA_SIGN_NEG(startsl * NBPD_L4);

        /* Select an area within the hole. */
        va = randva;
#ifdef NO_X86_ASLR
        va = 0;
#endif
        winsize = ((endsl - startsl) * NBPD_L4) - sz;
        va %= winsize;
        va = rounddown(va, align);
        va += startva;

        /* Update the entry. */
        slotspace.area[type].sslot = pl4_i(va);
        slotspace.area[type].nslot =
            pmap_pagetree_nentries_range(va, va+sz, NBPD_L4);
        slotspace.area[type].active = true;

        return va;
}
#endif

#ifdef __HAVE_PCPU_AREA
static void
pmap_init_pcpu(void)
{
        const vaddr_t startva = PMAP_PCPU_BASE;
        size_t nL4e, nL3e, nL2e, nL1e;
        size_t L4e_idx, L3e_idx, L2e_idx, L1e_idx __diagused;
        paddr_t pa;
        vaddr_t endva;
        vaddr_t tmpva;
        pt_entry_t *pte;
        size_t size;
        int i;

        const pd_entry_t pteflags = PTE_P | PTE_W | pmap_pg_nx;

        size = sizeof(struct pcpu_area);

        endva = startva + size;

        /* We will use this temporary va. */
        tmpva = bootspace.spareva;
        pte = PTE_BASE + pl1_i(tmpva);

        /* Build L4 */
        L4e_idx = pl4_i(startva);
        nL4e = pmap_pagetree_nentries_range(startva, endva, NBPD_L4);
        KASSERT(nL4e  == 1);
        for (i = 0; i < nL4e; i++) {
                KASSERT(L4_BASE[L4e_idx+i] == 0);

                pa = pmap_bootstrap_palloc(1);
                *pte = (pa & PTE_FRAME) | pteflags;
                pmap_update_pg(tmpva);
                memset(PAGE_ALIGNED(tmpva), 0, PAGE_SIZE);

                L4_BASE[L4e_idx+i] = pa | pteflags | PTE_A;
        }

        /* Build L3 */
        L3e_idx = pl3_i(startva);
        nL3e = pmap_pagetree_nentries_range(startva, endva, NBPD_L3);
        for (i = 0; i < nL3e; i++) {
                KASSERT(L3_BASE[L3e_idx+i] == 0);

                pa = pmap_bootstrap_palloc(1);
                *pte = (pa & PTE_FRAME) | pteflags;
                pmap_update_pg(tmpva);
                memset(PAGE_ALIGNED(tmpva), 0, PAGE_SIZE);

                L3_BASE[L3e_idx+i] = pa | pteflags | PTE_A;
        }

        /* Build L2 */
        L2e_idx = pl2_i(startva);
        nL2e = pmap_pagetree_nentries_range(startva, endva, NBPD_L2);
        for (i = 0; i < nL2e; i++) {

                KASSERT(L2_BASE[L2e_idx+i] == 0);

                pa = pmap_bootstrap_palloc(1);
                *pte = (pa & PTE_FRAME) | pteflags;
                pmap_update_pg(tmpva);
                memset(PAGE_ALIGNED(tmpva), 0, PAGE_SIZE);

                L2_BASE[L2e_idx+i] = pa | pteflags | PTE_A;
        }

        /* Build L1 */
        L1e_idx = pl1_i(startva);
        nL1e = pmap_pagetree_nentries_range(startva, endva, NBPD_L1);
        for (i = 0; i < nL1e; i++) {
                /*
                 * Nothing to do, the PTEs will be entered via
                 * pmap_kenter_pa.
                 */
                KASSERT(L1_BASE[L1e_idx+i] == 0);
        }

        *pte = 0;
        pmap_update_pg(tmpva);

        pcpuarea = (struct pcpu_area *)startva;

        tlbflush();
}
#endif

#ifdef __HAVE_DIRECT_MAP
static void
randomize_hole(size_t *randholep, vaddr_t *randvap)
{
        struct nist_hash_drbg drbg;
        uint8_t seed[NIST_HASH_DRBG_SEEDLEN_BYTES];
        const char p[] = "x86/directmap";
        int error;

        entropy_extract(seed, sizeof(seed), 0);

        error = nist_hash_drbg_instantiate(&drbg, seed, sizeof(seed),
            /*nonce*/NULL, 0,
            /*personalization*/p, strlen(p));
        KASSERTMSG(error == 0, "error=%d", error);

        error = nist_hash_drbg_generate(&drbg, randholep, sizeof(*randholep),
            /*additional*/NULL, 0);
        KASSERTMSG(error == 0, "error=%d", error);

        error = nist_hash_drbg_generate(&drbg, randvap, sizeof(*randvap),
            /*additional*/NULL, 0);
        KASSERTMSG(error == 0, "error=%d", error);

        explicit_memset(seed, 0, sizeof(seed));
        explicit_memset(&drbg, 0, sizeof(drbg));
}

/*
 * Create the amd64 direct map. Called only once at boot time. We map all of
 * the physical memory contiguously using 2MB large pages, with RW permissions.
 * However there is a hole: the kernel is mapped with RO permissions.
 */
static void
pmap_init_directmap(struct pmap *kpm)
{
        extern phys_ram_seg_t mem_clusters[];
        extern int mem_cluster_cnt;

        vaddr_t startva;
        size_t nL4e, nL3e, nL2e;
        size_t L4e_idx, L3e_idx, L2e_idx;
        size_t spahole, epahole;
        paddr_t lastpa, pa;
        vaddr_t endva;
        vaddr_t tmpva;
        pt_entry_t *pte;
        phys_ram_seg_t *mc;
        int i;
        size_t randhole;
        vaddr_t randva;

        const pd_entry_t pteflags = PTE_P | PTE_W | pmap_pg_nx;
        const pd_entry_t holepteflags = PTE_P | pmap_pg_nx;

        CTASSERT(NL4_SLOT_DIRECT * NBPD_L4 == MAXPHYSMEM);

        spahole = roundup(bootspace.head.pa, NBPD_L2);
        epahole = rounddown(bootspace.boot.pa, NBPD_L2);

        /* Get the last physical address available */
        lastpa = 0;
        for (i = 0; i < mem_cluster_cnt; i++) {
                mc = &mem_clusters[i];
                lastpa = MAX(lastpa, mc->start + mc->size);
        }

        /*
         * x86_add_cluster should have truncated the memory to MAXPHYSMEM.
         */
        if (lastpa > MAXPHYSMEM) {
                panic("pmap_init_directmap: lastpa incorrect");
        }

        randomize_hole(&randhole, &randva);
        startva = slotspace_rand(SLAREA_DMAP, lastpa, NBPD_L2,
            randhole, randva);
        endva = startva + lastpa;

        /* We will use this temporary va. */
        tmpva = bootspace.spareva;
        pte = PTE_BASE + pl1_i(tmpva);

        /* Build L4 */
        L4e_idx = pl4_i(startva);
        nL4e = pmap_pagetree_nentries_range(startva, endva, NBPD_L4);
        KASSERT(nL4e <= NL4_SLOT_DIRECT);
        for (i = 0; i < nL4e; i++) {
                KASSERT(L4_BASE[L4e_idx+i] == 0);

                pa = pmap_bootstrap_palloc(1);
                *pte = (pa & PTE_FRAME) | pteflags;
                pmap_update_pg(tmpva);
                memset(PAGE_ALIGNED(tmpva), 0, PAGE_SIZE);

                L4_BASE[L4e_idx+i] = pa | pteflags | PTE_A;
        }

        /* Build L3 */
        L3e_idx = pl3_i(startva);
        nL3e = pmap_pagetree_nentries_range(startva, endva, NBPD_L3);
        for (i = 0; i < nL3e; i++) {
                KASSERT(L3_BASE[L3e_idx+i] == 0);

                pa = pmap_bootstrap_palloc(1);
                *pte = (pa & PTE_FRAME) | pteflags;
                pmap_update_pg(tmpva);
                memset(PAGE_ALIGNED(tmpva), 0, PAGE_SIZE);

                L3_BASE[L3e_idx+i] = pa | pteflags | PTE_A;
        }

        /* Build L2 */
        L2e_idx = pl2_i(startva);
        nL2e = pmap_pagetree_nentries_range(startva, endva, NBPD_L2);
        for (i = 0; i < nL2e; i++) {
                KASSERT(L2_BASE[L2e_idx+i] == 0);

                pa = (paddr_t)(i * NBPD_L2);

                if (spahole <= pa && pa < epahole) {
                        L2_BASE[L2e_idx+i] = pa | holepteflags | PTE_A |
                            PTE_PS | pmap_pg_g;
                } else {
                        L2_BASE[L2e_idx+i] = pa | pteflags | PTE_A |
                            PTE_PS | pmap_pg_g;
                }
        }

        *pte = 0;
        pmap_update_pg(tmpva);

        pmap_direct_base = startva;
        pmap_direct_end = endva;

        tlbflush();
}
#endif /* __HAVE_DIRECT_MAP */

#if !defined(XENPV)
/*
 * Remap all of the virtual pages created so far with the PTE_G bit.
 */
static void
pmap_remap_global(void)
{
        vaddr_t kva, kva_end;
        unsigned long p1i;
        size_t i;

        /* head */
        kva = bootspace.head.va;
        kva_end = kva + bootspace.head.sz;
        for ( ; kva < kva_end; kva += PAGE_SIZE) {
                p1i = pl1_i(kva);
                if (pmap_valid_entry(PTE_BASE[p1i]))
                        PTE_BASE[p1i] |= pmap_pg_g;
        }

        /* kernel segments */
        for (i = 0; i < BTSPACE_NSEGS; i++) {
                if (bootspace.segs[i].type == BTSEG_NONE) {
                        continue;
                }
                kva = bootspace.segs[i].va;
                kva_end = kva + bootspace.segs[i].sz;
                for ( ; kva < kva_end; kva += PAGE_SIZE) {
                        p1i = pl1_i(kva);
                        if (pmap_valid_entry(PTE_BASE[p1i]))
                                PTE_BASE[p1i] |= pmap_pg_g;
                }
        }

        /* boot space */
        kva = bootspace.boot.va;
        kva_end = kva + bootspace.boot.sz;
        for ( ; kva < kva_end; kva += PAGE_SIZE) {
                p1i = pl1_i(kva);
                if (pmap_valid_entry(PTE_BASE[p1i]))
                        PTE_BASE[p1i] |= pmap_pg_g;
        }
}
#endif

#ifndef XENPV
/*
 * Remap several kernel segments with large pages. We cover as many pages as we
 * can. Called only once at boot time, if the CPU supports large pages.
 */
static void
pmap_remap_largepages(void)
{
        pd_entry_t *pde;
        vaddr_t kva, kva_end;
        paddr_t pa;
        size_t i;

        /* Remap the kernel text using large pages. */
        for (i = 0; i < BTSPACE_NSEGS; i++) {
                if (bootspace.segs[i].type != BTSEG_TEXT) {
                        continue;
                }
                kva = roundup(bootspace.segs[i].va, NBPD_L2);
                if (kva < bootspace.segs[i].va) {
                        continue;
                }
                kva_end = rounddown(bootspace.segs[i].va +
                        bootspace.segs[i].sz, NBPD_L2);
                pa = roundup(bootspace.segs[i].pa, NBPD_L2);
                for (/* */; kva < kva_end; kva += NBPD_L2, pa += NBPD_L2) {
                        pde = &L2_BASE[pl2_i(kva)];
                        *pde = pa | pmap_pg_g | PTE_PS | PTE_P;
                        tlbflushg();
                }
        }

        /* Remap the kernel rodata using large pages. */
        for (i = 0; i < BTSPACE_NSEGS; i++) {
                if (bootspace.segs[i].type != BTSEG_RODATA) {
                        continue;
                }
                kva = roundup(bootspace.segs[i].va, NBPD_L2);
                if (kva < bootspace.segs[i].va) {
                        continue;
                }
                kva_end = rounddown(bootspace.segs[i].va +
                        bootspace.segs[i].sz, NBPD_L2);
                pa = roundup(bootspace.segs[i].pa, NBPD_L2);
                for (/* */; kva < kva_end; kva += NBPD_L2, pa += NBPD_L2) {
                        pde = &L2_BASE[pl2_i(kva)];
                        *pde = pa | pmap_pg_g | PTE_PS | pmap_pg_nx | PTE_P;
                        tlbflushg();
                }
        }

        /* Remap the kernel data+bss using large pages. */
        for (i = 0; i < BTSPACE_NSEGS; i++) {
                if (bootspace.segs[i].type != BTSEG_DATA) {
                        continue;
                }
                kva = roundup(bootspace.segs[i].va, NBPD_L2);
                if (kva < bootspace.segs[i].va) {
                        continue;
                }
                kva_end = rounddown(bootspace.segs[i].va +
                        bootspace.segs[i].sz, NBPD_L2);
                pa = roundup(bootspace.segs[i].pa, NBPD_L2);
                for (/* */; kva < kva_end; kva += NBPD_L2, pa += NBPD_L2) {
                        pde = &L2_BASE[pl2_i(kva)];
                        *pde = pa | pmap_pg_g | PTE_PS | pmap_pg_nx | PTE_W | PTE_P;
                        tlbflushg();
                }
        }
}
#endif /* !XENPV */

/*
 * pmap_init: called from uvm_init, our job is to get the pmap system ready
 * to manage mappings.
 */
void
pmap_init(void)
{
        int flags;

        /*
         * initialize caches.
         */

        pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap), COHERENCY_UNIT,
            0, 0, "pmappl", NULL, IPL_NONE, pmap_ctor, pmap_dtor, NULL);

#ifdef XENPV
        /*
         * pool_cache(9) should not touch cached objects, since they
         * are pinned on xen and R/O for the domU
         */
        flags = PR_NOTOUCH;
#else
        flags = 0;
#endif

#ifdef PAE
        pool_init(&pmap_pdp_pool, PAGE_SIZE * PDP_SIZE, 0, 0, flags,
            "pdppl", &pmap_pdp_allocator, IPL_NONE);
#else
        pool_init(&pmap_pdp_pool, PAGE_SIZE, 0, 0, flags,
            "pdppl", NULL, IPL_NONE);
#endif
        pool_cache_bootstrap(&pmap_pvp_cache, PAGE_SIZE, PAGE_SIZE,
             0, 0, "pvpage", &pool_allocator_kmem,
            IPL_NONE, pmap_pvp_ctor, pmap_pvp_dtor, NULL);

        pmap_tlb_init();

        /* XXX: Since cpu_hatch() is only for secondary CPUs. */
        pmap_tlb_cpu_init(curcpu());

        evcnt_attach_dynamic(&pmap_iobmp_evcnt, EVCNT_TYPE_MISC,
            NULL, "x86", "io bitmap copy");
        evcnt_attach_dynamic(&pmap_ldt_evcnt, EVCNT_TYPE_MISC,
            NULL, "x86", "ldt sync");

        /*
         * The kernel doesn't keep track of PTPs, so there's nowhere handy
         * to hang a tree of pv_entry records.  Dynamically allocated
         * pv_entry lists are not heavily used in the kernel's pmap (the
         * usual case is embedded), so cop out and use a single RB tree
         * to cover them.
         */
        rb_tree_init(&pmap_kernel_rb, &pmap_rbtree_ops);

        /*
         * done: pmap module is up (and ready for business)
         */

        pmap_initialized = true;
}

#ifndef XENPV
/*
 * pmap_cpu_init_late: perform late per-CPU initialization.
 */
void
pmap_cpu_init_late(struct cpu_info *ci)
{
        /*
         * The BP has already its own PD page allocated during early
         * MD startup.
         */
        if (ci == &cpu_info_primary)
                return;
#ifdef PAE
        cpu_alloc_l3_page(ci);
#endif
}
#endif

#ifndef __HAVE_DIRECT_MAP
CTASSERT(CACHE_LINE_SIZE > sizeof(pt_entry_t));
CTASSERT(CACHE_LINE_SIZE % sizeof(pt_entry_t) == 0);

static void
pmap_vpage_cpualloc(struct cpu_info *ci)
{
        bool primary = (ci == &cpu_info_primary);
        size_t i, npages;
        vaddr_t vabase;
        vsize_t vrange;

        npages = (CACHE_LINE_SIZE / sizeof(pt_entry_t));
        KASSERT(npages >= VPAGE_MAX);
        vrange = npages * PAGE_SIZE;

        if (primary) {
                while ((vabase = pmap_bootstrap_valloc(1)) % vrange != 0) {
                        /* Waste some pages to align properly */
                }
                /* The base is aligned, allocate the rest (contiguous) */
                pmap_bootstrap_valloc(npages - 1);
        } else {
                vabase = uvm_km_alloc(kernel_map, vrange, vrange,
                    UVM_KMF_VAONLY);
                if (vabase == 0) {
                        panic("%s: failed to allocate tmp VA for CPU %d\n",
                            __func__, cpu_index(ci));
                }
        }

        KASSERT((vaddr_t)&PTE_BASE[pl1_i(vabase)] % CACHE_LINE_SIZE == 0);

        for (i = 0; i < VPAGE_MAX; i++) {
                ci->vpage[i] = vabase + i * PAGE_SIZE;
                ci->vpage_pte[i] = PTE_BASE + pl1_i(ci->vpage[i]);
        }
}

void
pmap_vpage_cpu_init(struct cpu_info *ci)
{
        if (ci == &cpu_info_primary) {
                /* cpu0 already taken care of in pmap_bootstrap */
                return;
        }

        pmap_vpage_cpualloc(ci);
}
#endif

/*
 * p v _ e n t r y   f u n c t i o n s
 */

/*
 * pmap_pvp_dtor: pool_cache constructor for PV pages.
 */
static int
pmap_pvp_ctor(void *arg, void *obj, int flags)
{
        struct pv_page *pvp = (struct pv_page *)obj;
        struct pv_entry *pve = (struct pv_entry *)obj + 1;
        struct pv_entry *maxpve = pve + PVE_PER_PVP;

        KASSERT(sizeof(struct pv_page) <= sizeof(struct pv_entry));
        KASSERT(trunc_page((vaddr_t)obj) == (vaddr_t)obj);

        LIST_INIT(&pvp->pvp_pves);
        pvp->pvp_nfree = PVE_PER_PVP;
        pvp->pvp_pmap = NULL;

        for (; pve < maxpve; pve++) {
                LIST_INSERT_HEAD(&pvp->pvp_pves, pve, pve_list);
        }

        return 0;
}

/*
 * pmap_pvp_dtor: pool_cache destructor for PV pages.
 */
static void
pmap_pvp_dtor(void *arg, void *obj)
{
        struct pv_page *pvp __diagused = obj;

        KASSERT(pvp->pvp_pmap == NULL);
        KASSERT(pvp->pvp_nfree == PVE_PER_PVP);
}

/*
 * pmap_alloc_pv: allocate a PV entry (likely cached with pmap).
 */
static struct pv_entry *
pmap_alloc_pv(struct pmap *pmap)
{
        struct pv_entry *pve;
        struct pv_page *pvp;

        KASSERT(mutex_owned(&pmap->pm_lock));

        if (__predict_false((pvp = LIST_FIRST(&pmap->pm_pvp_part)) == NULL)) {
                if ((pvp = LIST_FIRST(&pmap->pm_pvp_full)) != NULL) {
                        LIST_REMOVE(pvp, pvp_list);
                } else {
                        pvp = pool_cache_get(&pmap_pvp_cache, PR_NOWAIT);
                }
                if (__predict_false(pvp == NULL)) {
                        return NULL;
                }
                /* full -> part */
                LIST_INSERT_HEAD(&pmap->pm_pvp_part, pvp, pvp_list);
                pvp->pvp_pmap = pmap;
        }

        KASSERT(pvp->pvp_pmap == pmap);
        KASSERT(pvp->pvp_nfree > 0);

        pve = LIST_FIRST(&pvp->pvp_pves);
        LIST_REMOVE(pve, pve_list);
        pvp->pvp_nfree--;

        if (__predict_false(pvp->pvp_nfree == 0)) {
                /* part -> empty */
                KASSERT(LIST_EMPTY(&pvp->pvp_pves));
                LIST_REMOVE(pvp, pvp_list);
                LIST_INSERT_HEAD(&pmap->pm_pvp_empty, pvp, pvp_list);
        } else {
                KASSERT(!LIST_EMPTY(&pvp->pvp_pves));
        }

        return pve;
}

/*
 * pmap_free_pv: delayed free of a PV entry.
 */
static void
pmap_free_pv(struct pmap *pmap, struct pv_entry *pve)
{
        struct pv_page *pvp = (struct pv_page *)trunc_page((vaddr_t)pve);

        KASSERT(mutex_owned(&pmap->pm_lock));
        KASSERT(pvp->pvp_pmap == pmap);
        KASSERT(pvp->pvp_nfree >= 0);

        LIST_INSERT_HEAD(&pvp->pvp_pves, pve, pve_list);
        pvp->pvp_nfree++;

        if (__predict_false(pvp->pvp_nfree == 1)) {
                /* empty -> part */
                LIST_REMOVE(pvp, pvp_list);
                LIST_INSERT_HEAD(&pmap->pm_pvp_part, pvp, pvp_list);
        } else if (__predict_false(pvp->pvp_nfree == PVE_PER_PVP)) {
                /* part -> full */
                LIST_REMOVE(pvp, pvp_list);
                LIST_INSERT_HEAD(&pmap->pm_pvp_full, pvp, pvp_list);
        }
}

/*
 * pmap_drain_pv: free full PV pages.
 */
static void
pmap_drain_pv(struct pmap *pmap)
{
        struct pv_page *pvp;

        KASSERT(mutex_owned(&pmap->pm_lock));

        while ((pvp = LIST_FIRST(&pmap->pm_pvp_full)) != NULL) {
                LIST_REMOVE(pvp, pvp_list);
                KASSERT(pvp->pvp_pmap == pmap);
                KASSERT(pvp->pvp_nfree == PVE_PER_PVP);
                pvp->pvp_pmap = NULL;
                pool_cache_put(&pmap_pvp_cache, pvp);
        }
}

/*
 * pmap_check_pv: verify {VA, PTP} pair is either tracked/untracked by page
 */
static void
pmap_check_pv(struct pmap *pmap, struct vm_page *ptp, struct pmap_page *pp,
    vaddr_t va, bool tracked)
{
#ifdef DEBUG
        struct pv_pte *pvpte;

        PMAP_CHECK_PP(pp);

        mutex_spin_enter(&pp->pp_lock);
        for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) {
                if (pvpte->pte_ptp == ptp && pvpte->pte_va == va) {
                        break;
                }
        }
        mutex_spin_exit(&pp->pp_lock);

        if (pvpte && !tracked) {
                panic("pmap_check_pv: %p/%lx found on pp %p", ptp, va, pp);
        } else if (!pvpte && tracked) {
                panic("pmap_check_pv: %p/%lx missing on pp %p", ptp, va, pp);
        }
#endif
}

/*
 * pmap_treelookup_pv: search the PV tree for a dynamic entry
 *
 * => pmap must be locked
 */
static struct pv_entry *
pmap_treelookup_pv(const struct pmap *pmap, const struct vm_page *ptp,
    const rb_tree_t *tree, const vaddr_t va)
{
        struct pv_entry *pve;
        rb_node_t *node;

        /*
         * Inlined lookup tailored for exactly what's needed here that is
         * quite a bit faster than using rb_tree_find_node().
         */
        for (node = tree->rbt_root;;) {
                if (__predict_false(RB_SENTINEL_P(node))) {
                        return NULL;
                }
                pve = (struct pv_entry *)
                    ((uintptr_t)node - offsetof(struct pv_entry, pve_rb));
                if (pve->pve_pte.pte_va == va) {
                        KASSERT(pve->pve_pte.pte_ptp == ptp);
                        return pve;
                }
                node = node->rb_nodes[pve->pve_pte.pte_va < va];
        }
}

/*
 * pmap_lookup_pv: look up a non-embedded pv entry for the given pmap
 *
 * => a PV entry must be known present (doesn't check for existence)
 * => pmap must be locked
 */
static struct pv_entry *
pmap_lookup_pv(const struct pmap *pmap, const struct vm_page *ptp,
    const struct pmap_page * const old_pp, const vaddr_t va)
{
        struct pv_entry *pve;
        const rb_tree_t *tree;

        KASSERT(mutex_owned(&pmap->pm_lock));
        KASSERT(ptp != NULL || pmap == pmap_kernel());

        /*
         * [This mostly deals with the case of process-private pages, i.e.
         * anonymous memory allocations or COW.]
         *
         * If the page is tracked with an embedded entry then the tree
         * lookup can be avoided.  It's safe to check for this specific
         * set of values without pp_lock because both will only ever be
         * set together for this pmap.
         *
         */
        if (atomic_load_relaxed(&old_pp->pp_pte.pte_ptp) == ptp &&
            atomic_load_relaxed(&old_pp->pp_pte.pte_va) == va) {
                return NULL;
        }

        /*
         * [This mostly deals with shared mappings, for example shared libs
         * and executables.]
         *
         * Optimise for pmap_remove_ptes() which works by ascending scan:
         * look at the lowest numbered node in the tree first.  The tree is
         * known non-empty because of the check above.  For short lived
         * processes where pmap_remove() isn't used much this gets close to
         * a 100% hit rate.
         */
        tree = (ptp != NULL ? &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb);
        KASSERT(!RB_SENTINEL_P(tree->rbt_root));
        pve = (struct pv_entry *)
            ((uintptr_t)tree->rbt_minmax[RB_DIR_LEFT] -
            offsetof(struct pv_entry, pve_rb));
        if (__predict_true(pve->pve_pte.pte_va == va)) {
                KASSERT(pve->pve_pte.pte_ptp == ptp);
                return pve;
        }

        /* Search the RB tree for the key (uncommon). */
        return pmap_treelookup_pv(pmap, ptp, tree, va);
}

/*
 * pmap_enter_pv: enter a mapping onto a pmap_page lst
 *
 * => pmap must be locked
 * => does NOT insert dynamic entries to tree (pmap_enter() does later)
 */
static int
pmap_enter_pv(struct pmap *pmap, struct pmap_page *pp, struct vm_page *ptp,
    vaddr_t va, struct pv_entry **new_pve, struct pv_entry **old_pve,
    bool *samepage, bool *new_embedded, rb_tree_t *tree)
{
        struct pv_entry *pve;
        int error;

        KASSERT(mutex_owned(&pmap->pm_lock));
        KASSERT(ptp_to_pmap(ptp) == pmap);
        KASSERT(ptp == NULL || ptp->uobject != NULL);
        KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
        PMAP_CHECK_PP(pp);

        /*
         * If entering the same page and it's already tracked with an
         * embedded entry, we can avoid the expense below.  It's safe
         * to check for this very specific set of values without a lock
         * because both will only ever be set together for this pmap.
         */
        if (atomic_load_relaxed(&pp->pp_pte.pte_ptp) == ptp &&
            atomic_load_relaxed(&pp->pp_pte.pte_va) == va) {
                *samepage = true;
                pmap_check_pv(pmap, ptp, pp, va, true);
                return 0;
        }

        /*
         * Check for an existing dynamic mapping at this address.  If it's
         * for the same page, then it will be reused and nothing needs to be
         * changed.
         */
        *old_pve = pmap_treelookup_pv(pmap, ptp, tree, va);
        if (*old_pve != NULL && (*old_pve)->pve_pp == pp) {
                *samepage = true;
                pmap_check_pv(pmap, ptp, pp, va, true);
                return 0;
        }

        /*
         * Need to put a new mapping in place.  Grab a spare pv_entry in
         * case it's needed; won't know for sure until the lock is taken.
         */
        if (pmap->pm_pve == NULL) {
                pmap->pm_pve = pmap_alloc_pv(pmap);
        }

        error = 0;
        pmap_check_pv(pmap, ptp, pp, va, false);
        mutex_spin_enter(&pp->pp_lock);
        if (!pv_pte_embedded(pp)) {
                /*
                 * Embedded PV tracking available - easy.
                 */
                pp->pp_pte.pte_ptp = ptp;
                pp->pp_pte.pte_va = va;
                *new_embedded = true;
        } else if (__predict_false(pmap->pm_pve == NULL)) {
                /*
                 * No memory.
                 */
                error = ENOMEM;
        } else {
                /*
                 * Install new pv_entry on the page.
                 */
                pve = pmap->pm_pve;
                pmap->pm_pve = NULL;
                *new_pve = pve;
                pve->pve_pte.pte_ptp = ptp;
                pve->pve_pte.pte_va = va;
                pve->pve_pp = pp;
                LIST_INSERT_HEAD(&pp->pp_pvlist, pve, pve_list);
        }
        mutex_spin_exit(&pp->pp_lock);
        if (error == 0) {
                pmap_check_pv(pmap, ptp, pp, va, true);
        }

        return error;
}

/*
 * pmap_remove_pv: try to remove a mapping from a pv_list
 *
 * => pmap must be locked
 * => removes dynamic entries from tree and frees them
 * => caller should adjust ptp's wire_count and free PTP if needed
 */
static void
pmap_remove_pv(struct pmap *pmap, struct pmap_page *pp, struct vm_page *ptp,
    vaddr_t va, struct pv_entry *pve, uint8_t oattrs)
{
        rb_tree_t *tree = (ptp != NULL ?
            &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb);

        KASSERT(mutex_owned(&pmap->pm_lock));
        KASSERT(ptp_to_pmap(ptp) == pmap);
        KASSERT(ptp == NULL || ptp->uobject != NULL);
        KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
        KASSERT(ptp != NULL || pmap == pmap_kernel());

        pmap_check_pv(pmap, ptp, pp, va, true);

        if (pve == NULL) {
                mutex_spin_enter(&pp->pp_lock);
                KASSERT(pp->pp_pte.pte_ptp == ptp);
                KASSERT(pp->pp_pte.pte_va == va);
                pp->pp_attrs |= oattrs;
                pp->pp_pte.pte_ptp = NULL;
                pp->pp_pte.pte_va = 0;
                mutex_spin_exit(&pp->pp_lock);
        } else {
                mutex_spin_enter(&pp->pp_lock);
                KASSERT(pp->pp_pte.pte_ptp != ptp ||
                    pp->pp_pte.pte_va != va);
                KASSERT(pve->pve_pte.pte_ptp == ptp);
                KASSERT(pve->pve_pte.pte_va == va);
                KASSERT(pve->pve_pp == pp);
                pp->pp_attrs |= oattrs;
                LIST_REMOVE(pve, pve_list);
                mutex_spin_exit(&pp->pp_lock);

                KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == pve);
                rb_tree_remove_node(tree, pve);
#ifdef DIAGNOSTIC
                memset(pve, 0, sizeof(*pve));
#endif
                pmap_free_pv(pmap, pve);
        }

        KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL);
        pmap_check_pv(pmap, ptp, pp, va, false);
}

/*
 * p t p   f u n c t i o n s
 */

static struct vm_page *
pmap_find_ptp(struct pmap *pmap, vaddr_t va, int level)
{
        int lidx = level - 1;
        off_t off = ptp_va2o(va, level);
        struct vm_page *pg;

        KASSERT(mutex_owned(&pmap->pm_lock));

        if (pmap->pm_ptphint[lidx] && off == pmap->pm_ptphint[lidx]->offset) {
                KASSERT(pmap->pm_ptphint[lidx]->wire_count > 0);
                pg = pmap->pm_ptphint[lidx];
                PMAP_CHECK_PP(VM_PAGE_TO_PP(pg));
                return pg;
        }
        PMAP_DUMMY_LOCK(pmap);
        pg = uvm_pagelookup(&pmap->pm_obj[lidx], off);
        PMAP_DUMMY_UNLOCK(pmap);
        if (pg != NULL && __predict_false(pg->wire_count == 0)) {
                /* This page is queued to be freed - ignore. */
                pg = NULL;
        }
        if (pg != NULL) {
                PMAP_CHECK_PP(VM_PAGE_TO_PP(pg));
        }
        pmap->pm_ptphint[lidx] = pg;
        return pg;
}

static inline void
pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level)
{
        int lidx;

        KASSERT(ptp->wire_count <= 1);
        PMAP_CHECK_PP(VM_PAGE_TO_PP(ptp));

        lidx = level - 1;
        pmap_stats_update(pmap, -ptp->wire_count, 0);
        if (pmap->pm_ptphint[lidx] == ptp)
                pmap->pm_ptphint[lidx] = NULL;
        ptp->wire_count = 0;
        ptp->uanon = NULL;
        KASSERT(RB_TREE_MIN(&VM_PAGE_TO_PP(ptp)->pp_rb) == NULL);

        /*
         * Enqueue the PTP to be freed by pmap_update().  We can't remove
         * the page from the uvm_object, as that can take further locks
         * (intolerable right now because the PTEs are likely mapped in).
         * Instead mark the PTP as free and if we bump into it again, we'll
         * either ignore or reuse (depending on what's useful at the time).
         */
        LIST_INSERT_HEAD(&pmap->pm_gc_ptp, ptp, mdpage.mp_pp.pp_link);
}

static void
pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va,
              pt_entry_t *ptes, pd_entry_t * const *pdes)
{
        unsigned long index;
        int level;
        vaddr_t invaladdr;
        pd_entry_t opde;

        KASSERT(pmap != pmap_kernel());
        KASSERT(mutex_owned(&pmap->pm_lock));
        KASSERT(kpreempt_disabled());

        level = 1;
        do {
                index = pl_i(va, level + 1);
                opde = pmap_pte_testset(&pdes[level - 1][index], 0);

                /*
                 * On Xen-amd64 or SVS, we need to sync the top level page
                 * directory on each CPU.
                 */
#if defined(XENPV) && defined(__x86_64__)
                if (level == PTP_LEVELS - 1) {
                        xen_kpm_sync(pmap, index);
                }
#elif defined(SVS)
                if (svs_enabled && level == PTP_LEVELS - 1) {
                        svs_pmap_sync(pmap, index);
                }
#endif

                invaladdr = level == 1 ? (vaddr_t)ptes :
                    (vaddr_t)pdes[level - 2];
                pmap_tlb_shootdown(pmap, invaladdr + index * PAGE_SIZE,
                    opde, TLBSHOOT_FREE_PTP);

#if defined(XENPV)
                pmap_tlb_shootnow();
#endif

                pmap_freepage(pmap, ptp, level);
                if (level < PTP_LEVELS - 1) {
                        ptp = pmap_find_ptp(pmap, va, level + 1);
                        ptp->wire_count--;
                        if (ptp->wire_count > 1)
                                break;
                }
        } while (++level < PTP_LEVELS);
        pmap_pte_flush();
}

/*
 * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one)
 *
 * => pmap should NOT be pmap_kernel()
 * => pmap should be locked
 * => we are not touching any PTEs yet, so they need not be mapped in
 */
static int
pmap_get_ptp(struct pmap *pmap, struct pmap_ptparray *pt, vaddr_t va,
    int flags, struct vm_page **resultp)
{
        struct vm_page *ptp;
        int i, aflags;
        struct uvm_object *obj;
        voff_t off;

        KASSERT(pmap != pmap_kernel());
        KASSERT(mutex_owned(&pmap->pm_lock));

        /*
         * Loop through all page table levels allocating a page
         * for any level where we don't already have one.
         */
        memset(pt, 0, sizeof(*pt));
        aflags = ((flags & PMAP_CANFAIL) ? 0 : UVM_PGA_USERESERVE) |
                UVM_PGA_ZERO;
        for (i = PTP_LEVELS; i > 1; i--) {
                obj = &pmap->pm_obj[i - 2];
                off = ptp_va2o(va, i - 1);

                PMAP_DUMMY_LOCK(pmap);
                pt->pg[i] = uvm_pagelookup(obj, off);

                if (pt->pg[i] == NULL) {
                        pt->pg[i] = uvm_pagealloc(obj, off, NULL, aflags);
                        pt->alloced[i] = (pt->pg[i] != NULL);
                } else if (pt->pg[i]->wire_count == 0) {
                        /* This page was queued to be freed; dequeue it. */
                        LIST_REMOVE(pt->pg[i], mdpage.mp_pp.pp_link);
                        pt->alloced[i] = true;
                }
                PMAP_DUMMY_UNLOCK(pmap);
                if (pt->pg[i] == NULL) {
                        pmap_unget_ptp(pmap, pt);
                        return ENOMEM;
                } else if (pt->alloced[i]) {
                        pt->pg[i]->uanon = (struct vm_anon *)(vaddr_t)~0L;
                        rb_tree_init(&VM_PAGE_TO_PP(pt->pg[i])->pp_rb,
                            &pmap_rbtree_ops);
                        PMAP_CHECK_PP(VM_PAGE_TO_PP(pt->pg[i]));
                }
        }
        ptp = pt->pg[2];
        KASSERT(ptp != NULL);
        *resultp = ptp;
        pmap->pm_ptphint[0] = ptp;
        return 0;
}

/*
 * pmap_install_ptp: install any freshly allocated PTPs
 *
 * => pmap should NOT be pmap_kernel()
 * => pmap should be locked
 * => PTEs must be mapped
 * => preemption must be disabled
 */
static void
pmap_install_ptp(struct pmap *pmap, struct pmap_ptparray *pt, vaddr_t va,
    pd_entry_t * const *pdes)
{
        struct vm_page *ptp;
        unsigned long index;
        pd_entry_t *pva;
        paddr_t pa;
        int i;

        KASSERT(pmap != pmap_kernel());
        KASSERT(mutex_owned(&pmap->pm_lock));
        KASSERT(kpreempt_disabled());

        /*
         * Now that we have all the pages looked up or allocated,
         * loop through again installing any new ones into the tree.
         */
        for (i = PTP_LEVELS; i > 1; i--) {
                index = pl_i(va, i);
                pva = pdes[i - 2];

                if (pmap_valid_entry(pva[index])) {
                        KASSERT(!pt->alloced[i]);
                        continue;
                }

                ptp = pt->pg[i];
                ptp->flags &= ~PG_BUSY; /* never busy */
                ptp->wire_count = 1;
                pmap->pm_ptphint[i - 2] = ptp;
                pa = VM_PAGE_TO_PHYS(ptp);
                pmap_pte_set(&pva[index], (pd_entry_t)
                    (pmap_pa2pte(pa) | PTE_U | PTE_W | PTE_P));

                /*
                 * On Xen-amd64 or SVS, we need to sync the top level page
                 * directory on each CPU.
                 */
#if defined(XENPV) && defined(__x86_64__)
                if (i == PTP_LEVELS) {
                        xen_kpm_sync(pmap, index);
                }
#elif defined(SVS)
                if (svs_enabled && i == PTP_LEVELS) {
                        svs_pmap_sync(pmap, index);
                }
#endif

                pmap_pte_flush();
                pmap_stats_update(pmap, 1, 0);

                /*
                 * If we're not in the top level, increase the
                 * wire count of the parent page.
                 */
                if (i < PTP_LEVELS) {
                        pt->pg[i + 1]->wire_count++;
                }
        }
}

/*
 * pmap_unget_ptp: free unusued PTPs
 *
 * => pmap should NOT be pmap_kernel()
 * => pmap should be locked
 */
static void
pmap_unget_ptp(struct pmap *pmap, struct pmap_ptparray *pt)
{
        int i;

        KASSERT(pmap != pmap_kernel());
        KASSERT(mutex_owned(&pmap->pm_lock));

        for (i = PTP_LEVELS; i > 1; i--) {
                if (!pt->alloced[i]) {
                        continue;
                }
                KASSERT(pt->pg[i]->wire_count == 0);
                PMAP_CHECK_PP(VM_PAGE_TO_PP(pt->pg[i]));
                pmap_freepage(pmap, pt->pg[i], i - 1);
        }
}

/*
 * p m a p   l i f e c y c l e   f u n c t i o n s
 */

/*
 * pmap_pdp_init: constructor a new PDP.
 */
static void
pmap_pdp_init(pd_entry_t *pdir)
{
        paddr_t pdirpa = 0;
        vaddr_t object;
        int i;

#if !defined(XENPV) || !defined(__x86_64__)
        int npde;
#endif
#ifdef XENPV
        int s;
#endif

        memset(PAGE_ALIGNED(pdir), 0, PDP_SIZE * PAGE_SIZE);

        /*
         * NOTE: This is all done unlocked, but we will check afterwards
         * if we have raced with pmap_growkernel().
         */

#if defined(XENPV) && defined(__x86_64__)
        /* Fetch the physical address of the page directory */
        (void)pmap_extract(pmap_kernel(), (vaddr_t)pdir, &pdirpa);

        /*
         * This pdir will NEVER be active in kernel mode, so mark
         * recursive entry invalid.
         */
        pdir[PDIR_SLOT_PTE] = pmap_pa2pte(pdirpa);

        /*
         * PDP constructed this way won't be for the kernel, hence we
         * don't put kernel mappings on Xen.
         *
         * But we need to make pmap_create() happy, so put a dummy
         * (without PTE_P) value at the right place.
         */
        pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1] =
             (pd_entry_t)-1 & PTE_FRAME;
#else /* XENPV && __x86_64__*/
        object = (vaddr_t)pdir;
        for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) {
                /* Fetch the physical address of the page directory */
                (void)pmap_extract(pmap_kernel(), object, &pdirpa);

                /* Put in recursive PDE to map the PTEs */
                pdir[PDIR_SLOT_PTE + i] = pmap_pa2pte(pdirpa) | PTE_P |
                    pmap_pg_nx;
#ifndef XENPV
                pdir[PDIR_SLOT_PTE + i] |= PTE_W;
#endif
        }

        /* Copy the kernel's top level PDE */
        npde = nkptp[PTP_LEVELS - 1];

        memcpy(&pdir[PDIR_SLOT_KERN], &PDP_BASE[PDIR_SLOT_KERN],
            npde * sizeof(pd_entry_t));

        if (VM_MIN_KERNEL_ADDRESS != KERNBASE) {
                int idx = pl_i(KERNBASE, PTP_LEVELS);
                pdir[idx] = PDP_BASE[idx];
        }

#ifdef __HAVE_PCPU_AREA
        pdir[PDIR_SLOT_PCPU] = PDP_BASE[PDIR_SLOT_PCPU];
#endif
#ifdef __HAVE_DIRECT_MAP
        slotspace_copy(SLAREA_DMAP, pdir, PDP_BASE);
#endif
#ifdef KASAN
        slotspace_copy(SLAREA_ASAN, pdir, PDP_BASE);
#endif
#ifdef KMSAN
        slotspace_copy(SLAREA_MSAN, pdir, PDP_BASE);
#endif
#endif /* XENPV  && __x86_64__*/

#ifdef XENPV
        s = splvm();
        object = (vaddr_t)pdir;
        pmap_protect(pmap_kernel(), object, object + (PAGE_SIZE * PDP_SIZE),
            VM_PROT_READ);
        pmap_update(pmap_kernel());
        for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) {
                /*
                 * pin as L2/L4 page, we have to do the page with the
                 * PDIR_SLOT_PTE entries last
                 */
#ifdef PAE
                if (i == l2tol3(PDIR_SLOT_PTE))
                        continue;
#endif

                (void) pmap_extract(pmap_kernel(), object, &pdirpa);
#ifdef __x86_64__
                xpq_queue_pin_l4_table(xpmap_ptom_masked(pdirpa));
#else
                xpq_queue_pin_l2_table(xpmap_ptom_masked(pdirpa));
#endif
        }
#ifdef PAE
        object = ((vaddr_t)pdir) + PAGE_SIZE  * l2tol3(PDIR_SLOT_PTE);
        (void)pmap_extract(pmap_kernel(), object, &pdirpa);
        xpq_queue_pin_l2_table(xpmap_ptom_masked(pdirpa));
#endif
        splx(s);
#endif /* XENPV */
}

/*
 * pmap_pdp_fini: destructor for the PDPs.
 */
static void
pmap_pdp_fini(pd_entry_t *pdir)
{
#ifdef XENPV
        paddr_t pdirpa = 0;        /* XXX: GCC */
        vaddr_t object = (vaddr_t)pdir;
        int i;
        int s = splvm();
        pt_entry_t *pte;

        for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) {
                /* fetch the physical address of the page directory. */
                (void) pmap_extract(pmap_kernel(), object, &pdirpa);
                /* unpin page table */
                xpq_queue_unpin_table(xpmap_ptom_masked(pdirpa));
        }
        object = (vaddr_t)pdir;
        for (i = 0; i < PDP_SIZE; i++, object += PAGE_SIZE) {
                /* Set page RW again */
                pte = kvtopte(object);
                pmap_pte_set(pte, *pte | PTE_W);
                xen_bcast_invlpg((vaddr_t)object);
        }
        splx(s);
#endif  /* XENPV */
}

#ifdef PAE
static void *
pmap_pdp_alloc(struct pool *pp, int flags)
{
        return (void *)uvm_km_alloc(kernel_map,
            PAGE_SIZE * PDP_SIZE, PAGE_SIZE * PDP_SIZE,
            ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK) |
            UVM_KMF_WIRED);
}

static void
pmap_pdp_free(struct pool *pp, void *v)
{
        uvm_km_free(kernel_map, (vaddr_t)v, PAGE_SIZE * PDP_SIZE,
            UVM_KMF_WIRED);
}
#endif /* PAE */

/*
 * pmap_ctor: constructor for the pmap cache.
 */
static int
pmap_ctor(void *arg, void *obj, int flags)
{
        struct pmap *pmap = obj;
        pt_entry_t p;
        int i;

        KASSERT((flags & PR_WAITOK) != 0);

        mutex_init(&pmap->pm_lock, MUTEX_DEFAULT, IPL_NONE);
        rw_init(&pmap->pm_dummy_lock);
        kcpuset_create(&pmap->pm_cpus, true);
        kcpuset_create(&pmap->pm_kernel_cpus, true);
#ifdef XENPV
        kcpuset_create(&pmap->pm_xen_ptp_cpus, true);
#endif
        LIST_INIT(&pmap->pm_gc_ptp);
        pmap->pm_pve = NULL;
        LIST_INIT(&pmap->pm_pvp_full);
        LIST_INIT(&pmap->pm_pvp_part);
        LIST_INIT(&pmap->pm_pvp_empty);

        /* allocate and init PDP */
        pmap->pm_pdir = pool_get(&pmap_pdp_pool, PR_WAITOK);

        for (;;) {
                pmap_pdp_init(pmap->pm_pdir);
                mutex_enter(&pmaps_lock);
                p = pmap->pm_pdir[PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1] - 1];
                if (__predict_true(p != 0)) {
                        break;
                }
                mutex_exit(&pmaps_lock);
        }

        for (i = 0; i < PDP_SIZE; i++)
                pmap->pm_pdirpa[i] =
                    pmap_pte2pa(pmap->pm_pdir[PDIR_SLOT_PTE + i]);

        LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
        mutex_exit(&pmaps_lock);

        return 0;
}

/*
 * pmap_ctor: destructor for the pmap cache.
 */
static void
pmap_dtor(void *arg, void *obj)
{
        struct pmap *pmap = obj;

        mutex_enter(&pmaps_lock);
        LIST_REMOVE(pmap, pm_list);
        mutex_exit(&pmaps_lock);

        pmap_pdp_fini(pmap->pm_pdir);
        pool_put(&pmap_pdp_pool, pmap->pm_pdir);
        mutex_destroy(&pmap->pm_lock);
        rw_destroy(&pmap->pm_dummy_lock);
        kcpuset_destroy(pmap->pm_cpus);
        kcpuset_destroy(pmap->pm_kernel_cpus);
#ifdef XENPV
        kcpuset_destroy(pmap->pm_xen_ptp_cpus);
#endif
}

/*
 * pmap_create: create a pmap object.
 */
struct pmap *
pmap_create(void)
{
        struct pmap *pmap;
        int i;

        pmap = pool_cache_get(&pmap_cache, PR_WAITOK);

        /* init uvm_object */
        for (i = 0; i < PTP_LEVELS - 1; i++) {
                uvm_obj_init(&pmap->pm_obj[i], &pmap_pager, false, 1);
                uvm_obj_setlock(&pmap->pm_obj[i], &pmap->pm_dummy_lock);
                pmap->pm_ptphint[i] = NULL;
        }
        pmap->pm_stats.wired_count = 0;
        /* count the PDP allocd below */
        pmap->pm_stats.resident_count = PDP_SIZE;
#if !defined(__x86_64__)
        pmap->pm_hiexec = 0;
#endif

        /* Used by NVMM and Xen */
        pmap->pm_enter = NULL;
        pmap->pm_extract = NULL;
        pmap->pm_remove = NULL;
        pmap->pm_sync_pv = NULL;
        pmap->pm_pp_remove_ent = NULL;
        pmap->pm_write_protect = NULL;
        pmap->pm_unwire = NULL;
        pmap->pm_tlb_flush = NULL;
        pmap->pm_data = NULL;

        /* init the LDT */
        pmap->pm_ldt = NULL;
        pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL);

        return pmap;
}

/*
 * pmap_check_ptps: verify that none of the pmap's page table objects
 * have any pages allocated to them.
 */
static void
pmap_check_ptps(struct pmap *pmap)
{
        int i;

        for (i = 0; i < PTP_LEVELS - 1; i++) {
                KASSERTMSG(pmap->pm_obj[i].uo_npages == 0,
                    "pmap %p level %d still has %d pages",
                    pmap, i, (int)pmap->pm_obj[i].uo_npages);
        }
}

static void
pmap_check_inuse(struct pmap *pmap)
{
#ifdef DEBUG
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;

        for (CPU_INFO_FOREACH(cii, ci)) {
                if (ci->ci_pmap == pmap)
                        panic("destroying pmap being used");
#if defined(XENPV) && defined(__x86_64__)
                for (int i = 0; i < PDIR_SLOT_USERLIM; i++) {
                        if (pmap->pm_pdir[i] != 0 &&
                            ci->ci_kpm_pdir[i] == pmap->pm_pdir[i]) {
                                printf("pmap_destroy(%p) pmap_kernel %p "
                                    "curcpu %d cpu %d ci_pmap %p "
                                    "ci->ci_kpm_pdir[%d]=%" PRIx64
                                    " pmap->pm_pdir[%d]=%" PRIx64 "\n",
                                    pmap, pmap_kernel(), curcpu()->ci_index,
                                    ci->ci_index, ci->ci_pmap,
                                    i, ci->ci_kpm_pdir[i],
                                    i, pmap->pm_pdir[i]);
                                panic("%s: used pmap", __func__);
                        }
                }
#endif
        }
#endif /* DEBUG */
}

/*
 * pmap_destroy:  drop reference count on pmap.  free pmap if reference
 * count goes to zero.
 *
 * => we can be called from pmap_unmap_ptes() with a different, unrelated
 *    pmap's lock held.  be careful!
 */
void
pmap_destroy(struct pmap *pmap)
{
        int i;

        /*
         * drop reference count and verify not in use.
         */

        if (atomic_dec_uint_nv(&pmap->pm_obj[0].uo_refs) > 0) {
                return;
        }
        pmap_check_inuse(pmap);

        /*
         * handle any deferred frees.
         */

        mutex_enter(&pmap->pm_lock);
        if (pmap->pm_pve != NULL) {
                pmap_free_pv(pmap, pmap->pm_pve);
                pmap->pm_pve = NULL;
        }
        pmap_drain_pv(pmap);
        mutex_exit(&pmap->pm_lock);
        pmap_update(pmap);

        /*
         * Reference count is zero, free pmap resources and then free pmap.
         */

        pmap_check_ptps(pmap);
        KASSERT(LIST_EMPTY(&pmap->pm_gc_ptp));

#ifdef USER_LDT
        if (pmap->pm_ldt != NULL) {
                /*
                 * No need to switch the LDT; this address space is gone,
                 * nothing is using it.
                 *
                 * No need to lock the pmap for ldt_free (or anything else),
                 * we're the last one to use it.
                 */
                /* XXXAD can't take cpu_lock here - fix soon. */
                mutex_enter(&cpu_lock);
                ldt_free(pmap->pm_ldt_sel);
                mutex_exit(&cpu_lock);
                uvm_km_free(kernel_map, (vaddr_t)pmap->pm_ldt,
                    MAX_USERLDT_SIZE, UVM_KMF_WIRED);
        }
#endif

        for (i = 0; i < PTP_LEVELS - 1; i++) {
                uvm_obj_destroy(&pmap->pm_obj[i], false);
        }
        kcpuset_zero(pmap->pm_cpus);
        kcpuset_zero(pmap->pm_kernel_cpus);
#ifdef XENPV
        kcpuset_zero(pmap->pm_xen_ptp_cpus);
#endif

        KASSERT(LIST_EMPTY(&pmap->pm_pvp_full));
        KASSERT(LIST_EMPTY(&pmap->pm_pvp_part));
        KASSERT(LIST_EMPTY(&pmap->pm_pvp_empty));

        pmap_check_ptps(pmap);
        if (__predict_false(pmap->pm_enter != NULL)) {
                /* XXX make this a different cache */
                pool_cache_destruct_object(&pmap_cache, pmap);
        } else {
                pool_cache_put(&pmap_cache, pmap);
        }
}

/*
 * pmap_zap_ptp: clear out an entire PTP without modifying PTEs
 *
 * => caller must hold pmap's lock
 * => PTP must be mapped into KVA
 * => must be called with kernel preemption disabled
 * => does as little work as possible
 */
static void
pmap_zap_ptp(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte,
    vaddr_t startva, vaddr_t blkendva)
{
#ifndef XENPV
        struct pv_entry *pve;
        struct vm_page *pg;
        struct pmap_page *pp;
        pt_entry_t opte;
        rb_tree_t *tree;
        vaddr_t va;
        int wired;
        uint8_t oattrs;
        u_int cnt;

        KASSERT(mutex_owned(&pmap->pm_lock));
        KASSERT(kpreempt_disabled());
        KASSERT(pmap != pmap_kernel());
        KASSERT(ptp->wire_count > 1);
        KASSERT(ptp->wire_count - 1 <= PAGE_SIZE / sizeof(pt_entry_t));

        /*
         * Start at the lowest entered VA, and scan until there are no more
         * PTEs in the PTPs.
         */
        tree = &VM_PAGE_TO_PP(ptp)->pp_rb;
        pve = RB_TREE_MIN(tree);
        wired = 0;
        va = (vaddr_t)ptp->uanon;
        pte += ((va - startva) >> PAGE_SHIFT);

        for (cnt = ptp->wire_count; cnt > 1; pte++, va += PAGE_SIZE) {
                /*
                 * No need for an atomic to clear the PTE.  Nothing else can
                 * see the address space any more and speculative access (if
                 * possible) won't modify.  Therefore there's no need to
                 * track the accessed/dirty bits.
                 */
                opte = *pte;
                if (!pmap_valid_entry(opte)) {
                        continue;
                }

                /*
                 * Count the PTE.  If it's not for a managed mapping
                 * there's noting more to do.
                 */
                cnt--;
                wired -= (opte & PTE_WIRED);
                if ((opte & PTE_PVLIST) == 0) {
#ifndef DOM0OPS
                        KASSERTMSG((PHYS_TO_VM_PAGE(pmap_pte2pa(opte)) == NULL),
                            "managed page without PTE_PVLIST for %#"
                            PRIxVADDR, va);
                        KASSERTMSG((pmap_pv_tracked(pmap_pte2pa(opte)) == NULL),
                            "pv-tracked page without PTE_PVLIST for %#"
                            PRIxVADDR, va);
#endif
                        KASSERT(pmap_treelookup_pv(pmap, ptp, (ptp != NULL ?
                            &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb),
                            va) == NULL);
                        continue;
                }

                /*
                 * "pve" now points to the lowest (by VA) dynamic PV entry
                 * in the PTP.  If it's for this VA, take advantage of it to
                 * avoid calling PHYS_TO_VM_PAGE().  Avoid modifying the RB
                 * tree by skipping to the next VA in the tree whenever
                 * there is a match here.  The tree will be cleared out in
                 * one pass before return to pmap_remove_all().
                 */
                oattrs = pmap_pte_to_pp_attrs(opte);
                if (pve != NULL && pve->pve_pte.pte_va == va) {
                        pp = pve->pve_pp;
                        KASSERT(pve->pve_pte.pte_ptp == ptp);
                        KASSERT(pp->pp_pte.pte_ptp != ptp ||
                            pp->pp_pte.pte_va != va);
                        mutex_spin_enter(&pp->pp_lock);
                        pp->pp_attrs |= oattrs;
                        LIST_REMOVE(pve, pve_list);
                        mutex_spin_exit(&pp->pp_lock);

                        /*
                         * pve won't be touched again until pmap_drain_pv(),
                         * so it's still safe to traverse the tree.
                         */
                        pmap_free_pv(pmap, pve);
                        pve = RB_TREE_NEXT(tree, pve);
                        continue;
                }

                /*
                 * No entry in the tree so it must be embedded.  Look up the
                 * page and cancel the embedded entry.
                 */
                if ((pg = PHYS_TO_VM_PAGE(pmap_pte2pa(opte))) != NULL) {
                        pp = VM_PAGE_TO_PP(pg);
                } else if ((pp = pmap_pv_tracked(pmap_pte2pa(opte))) == NULL) {
                        paddr_t pa = pmap_pte2pa(opte);
                        panic("%s: PTE_PVLIST with pv-untracked page"
                            " va = %#"PRIxVADDR"pa = %#"PRIxPADDR
                            "(%#"PRIxPADDR")", __func__, va, pa, atop(pa));
                }
                mutex_spin_enter(&pp->pp_lock);
                KASSERT(pp->pp_pte.pte_ptp == ptp);
                KASSERT(pp->pp_pte.pte_va == va);
                pp->pp_attrs |= oattrs;
                pp->pp_pte.pte_ptp = NULL;
                pp->pp_pte.pte_va = 0;
                mutex_spin_exit(&pp->pp_lock);
        }

        /* PTP now empty - adjust the tree & stats to match. */
        pmap_stats_update(pmap, -(ptp->wire_count - 1), wired / PTE_WIRED);
        ptp->wire_count = 1;
#ifdef DIAGNOSTIC
        rb_tree_init(tree, &pmap_rbtree_ops);
#endif
#else        /* !XENPV */
        /*
         * XXXAD For XEN, it's not clear to me that we can do this, because
         * I guess the hypervisor keeps track of PTEs too.
         */
        pmap_remove_ptes(pmap, ptp, (vaddr_t)pte, startva, blkendva);
#endif        /* !XENPV */
}

/*
 * pmap_remove_all: remove all mappings from pmap in bulk.
 *
 * Ordinarily when removing mappings it's important to hold the UVM object's
 * lock, so that pages do not gain a new identity while retaining stale TLB
 * entries (the same lock hold covers both pmap_remove() and pmap_update()).
 * Here it's known that the address space is no longer visible to any user
 * process, so we don't need to worry about that.
 */
bool
pmap_remove_all(struct pmap *pmap)
{
        struct vm_page *ptps[32];
        vaddr_t va, blkendva;
        struct pmap *pmap2;
        pt_entry_t *ptes;
        pd_entry_t pde __diagused;
        pd_entry_t * const *pdes;
        int lvl __diagused, i, n;

        /* XXX Can't handle EPT just yet. */
        if (pmap->pm_remove != NULL) {
                return false;
        }

        for (;;) {
                /* Fetch a block of PTPs from tree. */
                mutex_enter(&pmap->pm_lock);
                n = radix_tree_gang_lookup_node(&pmap->pm_obj[0].uo_pages, 0,
                    (void **)ptps, __arraycount(ptps), false);
                if (n == 0) {
                        mutex_exit(&pmap->pm_lock);
                        break;
                }

                /* Remove all mappings in the set of PTPs. */
                pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
                for (i = 0; i < n; i++) {
                        if (ptps[i]->wire_count == 0) {
                                /* It's dead: pmap_update() will expunge. */
                                continue;
                        }

                        /* Determine range of block. */
                        va = ptps[i]->offset * PAGE_SIZE / sizeof(pt_entry_t);
                        blkendva = x86_round_pdr(va + 1);

                        /* Make sure everything squares up... */
                        KASSERT(pmap_pdes_valid(va, pdes, &pde, &lvl));
                        KASSERT(lvl == 1);
                        KASSERT(pmap_find_ptp(pmap, va, 1) == ptps[i]);

                        /* Zap! */
                        pmap_zap_ptp(pmap, ptps[i], &ptes[pl1_i(va)], va,
                            blkendva);

                        /* PTP should now be unused - free it. */
                        KASSERT(ptps[i]->wire_count == 1);
                        pmap_free_ptp(pmap, ptps[i], va, ptes, pdes);
                }
                pmap_unmap_ptes(pmap, pmap2);
                pmap_drain_pv(pmap);
                pmap_tlb_shootdown(pmap, -1L, 0, TLBSHOOT_REMOVE_ALL);
                mutex_exit(&pmap->pm_lock);

                /* Process deferred frees. */
                pmap_update(pmap);

                /* A breathing point. */
                preempt_point();
        }

        /* Verify that the pmap is now completely empty. */
        pmap_check_ptps(pmap);
        KASSERTMSG(pmap->pm_stats.resident_count == PDP_SIZE,
            "pmap %p not empty", pmap);

        return true;
}

#if defined(PMAP_FORK)
/*
 * pmap_fork: perform any necessary data structure manipulation when
 * a VM space is forked.
 */
void
pmap_fork(struct pmap *pmap1, struct pmap *pmap2)
{
#ifdef USER_LDT
        union descriptor *new_ldt;
        int sel;

        if (__predict_true(pmap1->pm_ldt == NULL)) {
                return;
        }

        /*
         * Copy the LDT into the new process.
         *
         * Read pmap1's ldt pointer unlocked; if it changes behind our back
         * we'll retry. This will starve if there's a stream of LDT changes
         * in another thread but that should not happen.
         */

retry:
        if (pmap1->pm_ldt != NULL) {
                /* Allocate space for the new process's LDT */
                new_ldt = (union descriptor *)uvm_km_alloc(kernel_map,
                    MAX_USERLDT_SIZE, 0, UVM_KMF_WIRED);
                if (new_ldt == NULL) {
                        printf("WARNING: %s: unable to allocate LDT space\n",
                            __func__);
                        return;
                }
                mutex_enter(&cpu_lock);
                /* Get a GDT slot for it */
                sel = ldt_alloc(new_ldt, MAX_USERLDT_SIZE);
                if (sel == -1) {
                        mutex_exit(&cpu_lock);
                        uvm_km_free(kernel_map, (vaddr_t)new_ldt,
                            MAX_USERLDT_SIZE, UVM_KMF_WIRED);
                        printf("WARNING: %s: unable to allocate LDT selector\n",
                            __func__);
                        return;
                }
        } else {
                /* Wasn't anything there after all. */
                new_ldt = NULL;
                sel = -1;
                mutex_enter(&cpu_lock);
        }

        /*
         * Now that we have cpu_lock, ensure the LDT status is the same.
         */
        if (pmap1->pm_ldt != NULL) {
                if (new_ldt == NULL) {
                        /* A wild LDT just appeared. */
                        mutex_exit(&cpu_lock);
                        goto retry;
                }

                /* Copy the LDT data and install it in pmap2 */
                memcpy(new_ldt, pmap1->pm_ldt, MAX_USERLDT_SIZE);
                pmap2->pm_ldt = new_ldt;
                pmap2->pm_ldt_sel = sel;
                mutex_exit(&cpu_lock);
        } else {
                if (new_ldt != NULL) {
                        /* The LDT disappeared, drop what we did. */
                        ldt_free(sel);
                        mutex_exit(&cpu_lock);
                        uvm_km_free(kernel_map, (vaddr_t)new_ldt,
                            MAX_USERLDT_SIZE, UVM_KMF_WIRED);
                        return;
                }

                /* We're good, just leave. */
                mutex_exit(&cpu_lock);
        }
#endif /* USER_LDT */
}
#endif /* PMAP_FORK */

#ifdef USER_LDT

/*
 * pmap_ldt_xcall: cross call used by pmap_ldt_sync.  if the named pmap
 * is active, reload LDTR.
 */
static void
pmap_ldt_xcall(void *arg1, void *arg2)
{
        struct pmap *pm;

        kpreempt_disable();
        pm = arg1;
        if (curcpu()->ci_pmap == pm) {
#if defined(SVS)
                if (svs_enabled) {
                        svs_ldt_sync(pm);
                } else
#endif
                lldt(pm->pm_ldt_sel);
        }
        kpreempt_enable();
}

/*
 * pmap_ldt_sync: LDT selector for the named pmap is changing.  swap
 * in the new selector on all CPUs.
 */
void
pmap_ldt_sync(struct pmap *pm)
{
        uint64_t where;

        KASSERT(mutex_owned(&cpu_lock));

        pmap_ldt_evcnt.ev_count++;
        where = xc_broadcast(0, pmap_ldt_xcall, pm, NULL);
        xc_wait(where);
}

/*
 * pmap_ldt_cleanup: if the pmap has a local LDT, deallocate it, and
 * restore the default.
 */
void
pmap_ldt_cleanup(struct lwp *l)
{
        pmap_t pmap = l->l_proc->p_vmspace->vm_map.pmap;
        union descriptor *ldt;
        int sel;

        if (__predict_true(pmap->pm_ldt == NULL)) {
                return;
        }

        mutex_enter(&cpu_lock);
        if (pmap->pm_ldt != NULL) {
                sel = pmap->pm_ldt_sel;
                ldt = pmap->pm_ldt;
                pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL);
                pmap->pm_ldt = NULL;
                pmap_ldt_sync(pmap);
                ldt_free(sel);
                uvm_km_free(kernel_map, (vaddr_t)ldt, MAX_USERLDT_SIZE,
                    UVM_KMF_WIRED);
        }
        mutex_exit(&cpu_lock);
}
#endif /* USER_LDT */

/*
 * pmap_activate: activate a process' pmap
 *
 * => must be called with kernel preemption disabled
 * => if lwp is the curlwp, then set ci_want_pmapload so that
 *    actual MMU context switch will be done by pmap_load() later
 */
void
pmap_activate(struct lwp *l)
{
        struct cpu_info *ci;
        struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);

        KASSERT(kpreempt_disabled());

        ci = curcpu();

        if (l != ci->ci_curlwp)
                return;

        KASSERT(ci->ci_want_pmapload == 0);
        KASSERT(ci->ci_tlbstate != TLBSTATE_VALID);

        /*
         * no need to switch to kernel vmspace because
         * it's a subset of any vmspace.
         */

        if (pmap == pmap_kernel()) {
                ci->ci_want_pmapload = 0;
                return;
        }

        ci->ci_want_pmapload = 1;
}

#if defined(XENPV) && defined(__x86_64__)
#define        KASSERT_PDIRPA(pmap) \
        KASSERT(pmap_pdirpa(pmap, 0) == ci->ci_xen_current_user_pgd || \
            pmap == pmap_kernel())
#elif defined(PAE)
#define        KASSERT_PDIRPA(pmap) \
        KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(ci->ci_pae_l3_pdir[0]))
#elif !defined(XENPV)
#define        KASSERT_PDIRPA(pmap) \
        KASSERT(pmap_pdirpa(pmap, 0) == pmap_pte2pa(rcr3()))
#else
#define        KASSERT_PDIRPA(pmap)        KASSERT(true)        /* nothing to do */
#endif

/*
 * pmap_reactivate: try to regain reference to the pmap.
 *
 * => Must be called with kernel preemption disabled.
 */
static void
pmap_reactivate(struct pmap *pmap)
{
        struct cpu_info * const ci = curcpu();
        const cpuid_t cid = cpu_index(ci);

        KASSERT(kpreempt_disabled());
        KASSERT_PDIRPA(pmap);

        /*
         * If we still have a lazy reference to this pmap, we can assume
         * that there was no TLB shootdown for this pmap in the meantime.
         *
         * The order of events here is important as we must synchronize
         * with TLB shootdown interrupts.  Declare interest in invalidations
         * (TLBSTATE_VALID) and then check the CPU set, which the IPIs can
         * change only when the state is TLBSTATE_LAZY.
         */

        ci->ci_tlbstate = TLBSTATE_VALID;
        KASSERT(kcpuset_isset(pmap->pm_kernel_cpus, cid));

        if (__predict_true(kcpuset_isset(pmap->pm_cpus, cid))) {
                /* We have the reference, state is valid. */
        } else {
                /*
                 * Must reload the TLB, pmap has been changed during
                 * deactivated.
                 */
                kcpuset_atomic_set(pmap->pm_cpus, cid);

                tlbflush();
        }
}

/*
 * pmap_load: perform the actual pmap switch, i.e. fill in %cr3 register
 * and relevant LDT info.
 *
 * Ensures that the current process' pmap is loaded on the current CPU's
 * MMU and that there are no stale TLB entries.
 *
 * => The caller should disable kernel preemption or do check-and-retry
 *    to prevent a preemption from undoing our efforts.
 * => This function may block.
 */
void
pmap_load(void)
{
        struct cpu_info *ci;
        struct pmap *pmap, *oldpmap;
        struct lwp *l;
        uint64_t ncsw;

        kpreempt_disable();
 retry:
        ci = curcpu();
        if (!ci->ci_want_pmapload) {
                kpreempt_enable();
                return;
        }
        l = ci->ci_curlwp;
        ncsw = l->l_ncsw;
        __insn_barrier();

        /* should be able to take ipis. */
        KASSERT(ci->ci_ilevel < IPL_HIGH);
#ifdef XENPV
        /* Check to see if interrupts are enabled (ie; no events are masked) */
        KASSERT(x86_read_psl() == 0);
#else
        KASSERT((x86_read_psl() & PSL_I) != 0);
#endif

        KASSERT(l != NULL);
        pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
        KASSERT(pmap != pmap_kernel());
        oldpmap = ci->ci_pmap;

        if (pmap == oldpmap) {
                pmap_reactivate(pmap);
                ci->ci_want_pmapload = 0;
                kpreempt_enable();
                return;
        }

        /*
         * Acquire a reference to the new pmap and perform the switch.
         */

        pmap_reference(pmap);
        pmap_load1(l, pmap, oldpmap);
        ci->ci_want_pmapload = 0;

        /*
         * we're now running with the new pmap.  drop the reference
         * to the old pmap.  if we block, we need to go around again.
         */

        pmap_destroy(oldpmap);
        __insn_barrier();
        if (l->l_ncsw != ncsw) {
                goto retry;
        }

        kpreempt_enable();
}

/*
 * pmap_load1: the guts of pmap load, shared by pmap_map_ptes() and
 * pmap_load().  It's critically important that this function does not
 * block.
 */
static void
pmap_load1(struct lwp *l, struct pmap *pmap, struct pmap *oldpmap)
{
        struct cpu_info *ci;
        struct pcb *pcb;
        cpuid_t cid;

        KASSERT(kpreempt_disabled());

        pcb = lwp_getpcb(l);
        ci = l->l_cpu;
        cid = cpu_index(ci);

        kcpuset_atomic_clear(oldpmap->pm_cpus, cid);
        kcpuset_atomic_clear(oldpmap->pm_kernel_cpus, cid);

        KASSERT_PDIRPA(oldpmap);
        KASSERT(!kcpuset_isset(pmap->pm_cpus, cid));
        KASSERT(!kcpuset_isset(pmap->pm_kernel_cpus, cid));

        /*
         * Mark the pmap in use by this CPU.  Again, we must synchronize
         * with TLB shootdown interrupts, so set the state VALID first,
         * then register us for shootdown events on this pmap.
         */
        ci->ci_tlbstate = TLBSTATE_VALID;
        kcpuset_atomic_set(pmap->pm_cpus, cid);
        kcpuset_atomic_set(pmap->pm_kernel_cpus, cid);
        ci->ci_pmap = pmap;

        /*
         * update tss.  now that we have registered for invalidations
         * from other CPUs, we're good to load the page tables.
         */
#ifdef PAE
        pcb->pcb_cr3 = ci->ci_pae_l3_pdirpa;
#else
        pcb->pcb_cr3 = pmap_pdirpa(pmap, 0);
#endif

#ifdef i386
#ifndef XENPV
        ci->ci_tss->tss.tss_ldt = pmap->pm_ldt_sel;
        ci->ci_tss->tss.tss_cr3 = pcb->pcb_cr3;
#endif
#endif

#if defined(SVS) && defined(USER_LDT)
        if (svs_enabled) {
                svs_ldt_sync(pmap);
        } else
#endif
        lldt(pmap->pm_ldt_sel);

        cpu_load_pmap(pmap, oldpmap);
}

/*
 * pmap_deactivate: deactivate a process' pmap.
 *
 * => Must be called with kernel preemption disabled (high IPL is enough).
 */
void
pmap_deactivate(struct lwp *l)
{
        struct pmap *pmap;
        struct cpu_info *ci;

        KASSERT(kpreempt_disabled());

        if (l != curlwp) {
                return;
        }

        /*
         * Wait for pending TLB shootdowns to complete.  Necessary because
         * TLB shootdown state is per-CPU, and the LWP may be coming off
         * the CPU before it has a chance to call pmap_update(), e.g. due
         * to kernel preemption or blocking routine in between.
         */
        pmap_tlb_shootnow();

        ci = curcpu();

        if (ci->ci_want_pmapload) {
                /*
                 * ci_want_pmapload means that our pmap is not loaded on
                 * the CPU or TLB might be stale.  note that pmap_kernel()
                 * is always considered loaded.
                 */
                KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map)
                    != pmap_kernel());
                KASSERT(vm_map_pmap(&l->l_proc->p_vmspace->vm_map)
                    != ci->ci_pmap || ci->ci_tlbstate != TLBSTATE_VALID);

                /*
                 * userspace has not been touched.
                 * nothing to do here.
                 */

                ci->ci_want_pmapload = 0;
                return;
        }

        pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);

        if (pmap == pmap_kernel()) {
                return;
        }

        KASSERT_PDIRPA(pmap);
        KASSERT(ci->ci_pmap == pmap);

        /*
         * we aren't interested in TLB invalidations for this pmap,
         * at least for the time being.
         */

        KASSERT(ci->ci_tlbstate == TLBSTATE_VALID);
        ci->ci_tlbstate = TLBSTATE_LAZY;
}

/*
 * some misc. functions
 */

bool
pmap_pdes_valid(vaddr_t va, pd_entry_t * const *pdes, pd_entry_t *lastpde,
    int *lastlvl)
{
        unsigned long index;
        pd_entry_t pde;
        int i;

        for (i = PTP_LEVELS; i > 1; i--) {
                index = pl_i(va, i);
                pde = pdes[i - 2][index];
                if ((pde & PTE_P) == 0) {
                        *lastlvl = i;
                        return false;
                }
                if (pde & PTE_PS)
                        break;
        }
        if (lastpde != NULL)
                *lastpde = pde;
        *lastlvl = i;
        return true;
}

/*
 * pmap_extract: extract a PA for the given VA
 */
bool
pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap)
{
        pt_entry_t *ptes, pte;
        pd_entry_t pde;
        pd_entry_t * const *pdes;
        struct pmap *pmap2;
        paddr_t pa;
        bool rv;
        int lvl;

        if (__predict_false(pmap->pm_extract != NULL)) {
                return (*pmap->pm_extract)(pmap, va, pap);
        }

#ifdef __HAVE_DIRECT_MAP
        if (va >= PMAP_DIRECT_BASE && va < PMAP_DIRECT_END) {
                if (pap != NULL) {
                        *pap = PMAP_DIRECT_UNMAP(va);
                }
                return true;
        }
#endif

        rv = false;
        pa = 0;

        if (pmap != pmap_kernel()) {
                mutex_enter(&pmap->pm_lock);
        }
        pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
        if (pmap_pdes_valid(va, pdes, &pde, &lvl)) {
                if (lvl == 2) {
                        pa = (pde & PTE_LGFRAME) | (va & (NBPD_L2 - 1));
                        rv = true;
                } else {
                        KASSERT(lvl == 1);
                        pte = ptes[pl1_i(va)];
                        if (__predict_true((pte & PTE_P) != 0)) {
                                pa = pmap_pte2pa(pte) | (va & (NBPD_L1 - 1));
                                rv = true;
                        }
                }
        }
        pmap_unmap_ptes(pmap, pmap2);
        if (pmap != pmap_kernel()) {
                mutex_exit(&pmap->pm_lock);
        }
        if (pap != NULL) {
                *pap = pa;
        }

        return rv;
}

/*
 * vtophys: virtual address to physical address.  For use by
 * machine-dependent code only.
 */
paddr_t
vtophys(vaddr_t va)
{
        paddr_t pa;

        if (pmap_extract(pmap_kernel(), va, &pa) == true)
                return pa;
        return 0;
}

__strict_weak_alias(pmap_extract_ma, pmap_extract);

#ifdef XENPV
/*
 * vtomach: virtual address to machine address.  For use by
 * machine-dependent code only.
 */
paddr_t
vtomach(vaddr_t va)
{
        paddr_t pa;

        if (pmap_extract_ma(pmap_kernel(), va, &pa) == true)
                return pa;
        return 0;
}
#endif

/*
 * pmap_virtual_space: used during bootup [pmap_steal_memory] to
 * determine the bounds of the kernel virtual address space.
 */
void
pmap_virtual_space(vaddr_t *startp, vaddr_t *endp)
{
        *startp = virtual_avail;
        *endp = virtual_end;
}

void
pmap_zero_page(paddr_t pa)
{
#if defined(__HAVE_DIRECT_MAP)
        memset(PAGE_ALIGNED(PMAP_DIRECT_MAP(pa)), 0, PAGE_SIZE);
#else
#if defined(XENPV)
        if (XEN_VERSION_SUPPORTED(3, 4)) {
                xen_pagezero(pa);
                return;
        }
#endif
        struct cpu_info *ci;
        pt_entry_t *zpte;
        vaddr_t zerova;

        const pd_entry_t pteflags = PTE_P | PTE_W | pmap_pg_nx | PTE_D | PTE_A;

        kpreempt_disable();

        ci = curcpu();
        zerova = ci->vpage[VPAGE_ZER];
        zpte = ci->vpage_pte[VPAGE_ZER];

        KASSERTMSG(!*zpte, "pmap_zero_page: lock botch");

        pmap_pte_set(zpte, pmap_pa2pte(pa) | pteflags);
        pmap_pte_flush();
        pmap_update_pg(zerova);                /* flush TLB */

        memset(PAGE_ALIGNED(zerova), 0, PAGE_SIZE);

#if defined(DIAGNOSTIC) || defined(XENPV)
        pmap_pte_set(zpte, 0);                                /* zap ! */
        pmap_pte_flush();
#endif

        kpreempt_enable();
#endif /* defined(__HAVE_DIRECT_MAP) */
}

void
pmap_copy_page(paddr_t srcpa, paddr_t dstpa)
{
#if defined(__HAVE_DIRECT_MAP)
        vaddr_t srcva = PMAP_DIRECT_MAP(srcpa);
        vaddr_t dstva = PMAP_DIRECT_MAP(dstpa);

        memcpy(PAGE_ALIGNED(dstva), PAGE_ALIGNED(srcva), PAGE_SIZE);
#else
#if defined(XENPV)
        if (XEN_VERSION_SUPPORTED(3, 4)) {
                xen_copy_page(srcpa, dstpa);
                return;
        }
#endif
        struct cpu_info *ci;
        pt_entry_t *srcpte, *dstpte;
        vaddr_t srcva, dstva;

        const pd_entry_t pteflags = PTE_P | PTE_W | pmap_pg_nx | PTE_A;

        kpreempt_disable();

        ci = curcpu();
        srcva = ci->vpage[VPAGE_SRC];
        dstva = ci->vpage[VPAGE_DST];
        srcpte = ci->vpage_pte[VPAGE_SRC];
        dstpte = ci->vpage_pte[VPAGE_DST];

        KASSERT(*srcpte == 0 && *dstpte == 0);

        pmap_pte_set(srcpte, pmap_pa2pte(srcpa) | pteflags);
        pmap_pte_set(dstpte, pmap_pa2pte(dstpa) | pteflags | PTE_D);
        pmap_pte_flush();
        pmap_update_pg(srcva);
        pmap_update_pg(dstva);

        memcpy(PAGE_ALIGNED(dstva), PAGE_ALIGNED(srcva), PAGE_SIZE);

#if defined(DIAGNOSTIC) || defined(XENPV)
        pmap_pte_set(srcpte, 0);
        pmap_pte_set(dstpte, 0);
        pmap_pte_flush();
#endif

        kpreempt_enable();
#endif /* defined(__HAVE_DIRECT_MAP) */
}

static pt_entry_t *
pmap_map_ptp(struct vm_page *ptp)
{
#ifdef __HAVE_DIRECT_MAP
        return (void *)PMAP_DIRECT_MAP(VM_PAGE_TO_PHYS(ptp));
#else
        struct cpu_info *ci;
        pt_entry_t *ptppte;
        vaddr_t ptpva;

        KASSERT(kpreempt_disabled());

#ifndef XENPV
        const pd_entry_t pteflags = PTE_P | PTE_W | pmap_pg_nx | PTE_A | PTE_D;
#else
        const pd_entry_t pteflags = PTE_P | pmap_pg_nx | PTE_A | PTE_D;
#endif

        ci = curcpu();
        ptpva = ci->vpage[VPAGE_PTP];
        ptppte = ci->vpage_pte[VPAGE_PTP];

        pmap_pte_set(ptppte, pmap_pa2pte(VM_PAGE_TO_PHYS(ptp)) | pteflags);

        pmap_pte_flush();
        pmap_update_pg(ptpva);

        return (pt_entry_t *)ptpva;
#endif
}

static void
pmap_unmap_ptp(void)
{
#ifndef __HAVE_DIRECT_MAP
#if defined(DIAGNOSTIC) || defined(XENPV)
        struct cpu_info *ci;
        pt_entry_t *pte;

        KASSERT(kpreempt_disabled());

        ci = curcpu();
        pte = ci->vpage_pte[VPAGE_PTP];

        if (*pte != 0) {
                pmap_pte_set(pte, 0);
                pmap_pte_flush();
        }
#endif
#endif
}

static pt_entry_t *
pmap_map_pte(struct pmap *pmap, struct vm_page *ptp, vaddr_t va)
{

        KASSERT(kpreempt_disabled());
        if (pmap_is_curpmap(pmap)) {
                return &PTE_BASE[pl1_i(va)]; /* (k)vtopte */
        }
        KASSERT(ptp != NULL);
        return pmap_map_ptp(ptp) + pl1_pi(va);
}

static void
pmap_unmap_pte(void)
{

        KASSERT(kpreempt_disabled());

        pmap_unmap_ptp();
}

/*
 * p m a p   r e m o v e   f u n c t i o n s
 *
 * functions that remove mappings
 */

/*
 * pmap_remove_ptes: remove PTEs from a PTP
 *
 * => caller must hold pmap's lock
 * => PTP must be mapped into KVA
 * => PTP should be null if pmap == pmap_kernel()
 * => must be called with kernel preemption disabled
 * => returns composite pte if at least one page should be shot down
 */
static void
pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,
    vaddr_t startva, vaddr_t endva)
{
        pt_entry_t *pte = (pt_entry_t *)ptpva;

        KASSERT(mutex_owned(&pmap->pm_lock));
        KASSERT(kpreempt_disabled());

        /*
         * mappings are very often sparse, so clip the given range to the
         * range of PTEs that are known present in the PTP.
         */
        pmap_ptp_range_clip(ptp, &startva, &pte);

        /*
         * note that ptpva points to the PTE that maps startva.   this may
         * or may not be the first PTE in the PTP.
         *
         * we loop through the PTP while there are still PTEs to look at
         * and the wire_count is greater than 1 (because we use the wire_count
         * to keep track of the number of real PTEs in the PTP).
         */
        while (startva < endva && (ptp == NULL || ptp->wire_count > 1)) {
                (void)pmap_remove_pte(pmap, ptp, pte, startva);
                startva += PAGE_SIZE;
                pte++;
        }
}

/*
 * pmap_remove_pte: remove a single PTE from a PTP.
 *
 * => caller must hold pmap's lock
 * => PTP must be mapped into KVA
 * => PTP should be null if pmap == pmap_kernel()
 * => returns true if we removed a mapping
 * => must be called with kernel preemption disabled
 */
static bool
pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte,
    vaddr_t va)
{
        struct pv_entry *pve;
        struct vm_page *pg;
        struct pmap_page *pp;
        pt_entry_t opte;

        KASSERT(mutex_owned(&pmap->pm_lock));
        KASSERT(kpreempt_disabled());

        if (!pmap_valid_entry(*pte)) {
                /* VA not mapped. */
                return false;
        }

        /* Atomically save the old PTE and zap it. */
        opte = pmap_pte_testset(pte, 0);
        if (!pmap_valid_entry(opte)) {
                return false;
        }

        pmap_exec_account(pmap, va, opte, 0);
        pmap_stats_update_bypte(pmap, 0, opte);

        if (ptp) {
                /*
                 * Dropping a PTE.  Make sure that the PDE is flushed.
                 */
                ptp->wire_count--;
                if (ptp->wire_count <= 1) {
                        opte |= PTE_A;
                }
        }

        if ((opte & PTE_A) != 0) {
                pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_REMOVE_PTE);
        }

        /*
         * If we are not on a pv list - we are done.
         */
        if ((opte & PTE_PVLIST) == 0) {
#ifndef DOM0OPS
                KASSERTMSG((PHYS_TO_VM_PAGE(pmap_pte2pa(opte)) == NULL),
                    "managed page without PTE_PVLIST for %#"PRIxVADDR, va);
                KASSERTMSG((pmap_pv_tracked(pmap_pte2pa(opte)) == NULL),
                    "pv-tracked page without PTE_PVLIST for %#"PRIxVADDR, va);
#endif
                KASSERT(pmap_treelookup_pv(pmap, ptp, (ptp != NULL ?
                    &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb), va) == NULL);
                return true;
        }

        if ((pg = PHYS_TO_VM_PAGE(pmap_pte2pa(opte))) != NULL) {
                pp = VM_PAGE_TO_PP(pg);
        } else if ((pp = pmap_pv_tracked(pmap_pte2pa(opte))) == NULL) {
                paddr_t pa = pmap_pte2pa(opte);
                panic("%s: PTE_PVLIST with pv-untracked page"
                    " va = %#"PRIxVADDR"pa = %#"PRIxPADDR" (%#"PRIxPADDR")",
                    __func__, va, pa, atop(pa));
        }

        /* Sync R/M bits. */
        pve = pmap_lookup_pv(pmap, ptp, pp, va);
        pmap_remove_pv(pmap, pp, ptp, va, pve, pmap_pte_to_pp_attrs(opte));
        return true;
}

static void
pmap_remove_locked(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
{
        pt_entry_t *ptes;
        pd_entry_t pde;
        pd_entry_t * const *pdes;
        bool result;
        vaddr_t blkendva, va = sva;
        struct vm_page *ptp;
        struct pmap *pmap2;
        int lvl;

        KASSERT(mutex_owned(&pmap->pm_lock));

        pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);

        /*
         * removing one page?  take shortcut function.
         */

        if (va + PAGE_SIZE == eva) {
                if (pmap_pdes_valid(va, pdes, &pde, &lvl)) {
                        KASSERT(lvl == 1);

                        /* Get PTP if non-kernel mapping. */
                        if (pmap != pmap_kernel()) {
                                ptp = pmap_find_ptp(pmap, va, 1);
                                KASSERTMSG(ptp != NULL,
                                    "%s: unmanaged PTP detected", __func__);
                        } else {
                                /* Never free kernel PTPs. */
                                ptp = NULL;
                        }

                        result = pmap_remove_pte(pmap, ptp,
                            &ptes[pl1_i(va)], va);

                        /*
                         * if mapping removed and the PTP is no longer
                         * being used, free it!
                         */

                        if (result && ptp && ptp->wire_count <= 1)
                                pmap_free_ptp(pmap, ptp, va, ptes, pdes);
                }
        } else for (/* null */ ; va < eva ; va = blkendva) {
                /* determine range of block */
                blkendva = x86_round_pdr(va+1);
                if (blkendva > eva)
                        blkendva = eva;

                if (!pmap_pdes_valid(va, pdes, &pde, &lvl)) {
                        /* Skip a range corresponding to an invalid pde. */
                        blkendva = (va & ptp_frames[lvl - 1]) + nbpd[lvl - 1];
                        continue;
                }
                KASSERT(lvl == 1);

                /* Get PTP if non-kernel mapping. */
                if (pmap != pmap_kernel()) {
                        ptp = pmap_find_ptp(pmap, va, 1);
                        KASSERTMSG(ptp != NULL, "%s: unmanaged PTP detected",
                            __func__);
                } else {
                        /* Never free kernel PTPs. */
                        ptp = NULL;
                }

                pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[pl1_i(va)], va,
                    blkendva);

                /* If PTP is no longer being used, free it. */
                if (ptp && ptp->wire_count <= 1) {
                        pmap_free_ptp(pmap, ptp, va, ptes, pdes);
                }
        }
        pmap_unmap_ptes(pmap, pmap2);
        pmap_drain_pv(pmap);
}

/*
 * pmap_remove: mapping removal function.
 *
 * => caller should not be holding any pmap locks
 */
void
pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
{
        if (__predict_false(pmap->pm_remove != NULL)) {
                (*pmap->pm_remove)(pmap, sva, eva);
                return;
        }

        mutex_enter(&pmap->pm_lock);
        pmap_remove_locked(pmap, sva, eva);
        mutex_exit(&pmap->pm_lock);
}

/*
 * pmap_sync_pv: clear pte bits and return the old value of the pp_attrs.
 *
 * => The 'clearbits' parameter is either ~0 or PP_ATTRS_...
 * => Caller should disable kernel preemption.
 * => issues tlb shootdowns if necessary.
 */
static int
pmap_sync_pv(struct pv_pte *pvpte, paddr_t pa, int clearbits, uint8_t *oattrs,
    pt_entry_t *optep)
{
        struct pmap *pmap;
        struct vm_page *ptp;
        vaddr_t va;
        pt_entry_t *ptep;
        pt_entry_t opte;
        pt_entry_t npte;
        pt_entry_t expect;
        bool need_shootdown;

        ptp = pvpte->pte_ptp;
        va = pvpte->pte_va;
        KASSERT(ptp == NULL || ptp->uobject != NULL);
        KASSERT(ptp == NULL || ptp_va2o(va, 1) == ptp->offset);
        pmap = ptp_to_pmap(ptp);
        KASSERT(kpreempt_disabled());

        if (__predict_false(pmap->pm_sync_pv != NULL)) {
                return (*pmap->pm_sync_pv)(ptp, va, pa, clearbits, oattrs,
                    optep);
        }

        expect = pmap_pa2pte(pa) | PTE_P;

        if (clearbits != ~0) {
                KASSERT((clearbits & ~(PP_ATTRS_D|PP_ATTRS_A|PP_ATTRS_W)) == 0);
                clearbits = pmap_pp_attrs_to_pte(clearbits);
        }

        ptep = pmap_map_pte(pmap, ptp, va);
        do {
                opte = *ptep;
                KASSERT((opte & (PTE_D | PTE_A)) != PTE_D);
                KASSERT((opte & (PTE_A | PTE_P)) != PTE_A);
                KASSERT(opte == 0 || (opte & PTE_P) != 0);
                if ((opte & (PTE_FRAME | PTE_P)) != expect) {
                        /*
                         * We lost a race with a V->P operation like
                         * pmap_remove().  Wait for the competitor
                         * reflecting pte bits into mp_attrs.
                         */
                        pmap_unmap_pte();
                        return EAGAIN;
                }

                /*
                 * Check if there's anything to do on this PTE.
                 */
                if ((opte & clearbits) == 0) {
                        need_shootdown = false;
                        break;
                }

                /*
                 * We need a shootdown if the PTE is cached (PTE_A) ...
                 * ... Unless we are clearing only the PTE_W bit and
                 * it isn't cached as RW (PTE_D).
                 */
                need_shootdown = (opte & PTE_A) != 0 &&
                    !(clearbits == PTE_W && (opte & PTE_D) == 0);

                npte = opte & ~clearbits;

                /*
                 * If we need a shootdown anyway, clear PTE_A and PTE_D.
                 */
                if (need_shootdown) {
                        npte &= ~(PTE_A | PTE_D);
                }
                KASSERT((npte & (PTE_D | PTE_A)) != PTE_D);
                KASSERT((npte & (PTE_A | PTE_P)) != PTE_A);
                KASSERT(npte == 0 || (opte & PTE_P) != 0);
        } while (pmap_pte_cas(ptep, opte, npte) != opte);

        if (need_shootdown) {
                pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_SYNC_PV);
        }
        pmap_unmap_pte();

        *oattrs = pmap_pte_to_pp_attrs(opte);
        if (optep != NULL)
                *optep = opte;
        return 0;
}

static void
pmap_pp_remove_ent(struct pmap *pmap, struct vm_page *ptp, pt_entry_t opte,
    vaddr_t va)
{
        struct pmap *pmap2;
        pt_entry_t *ptes;
        pd_entry_t * const *pdes;

        KASSERT(mutex_owned(&pmap->pm_lock));

        pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
        pmap_stats_update_bypte(pmap, 0, opte);
        ptp->wire_count--;
        if (ptp->wire_count <= 1) {
                pmap_free_ptp(pmap, ptp, va, ptes, pdes);
        }
        pmap_unmap_ptes(pmap, pmap2);
}

static void
pmap_pp_remove(struct pmap_page *pp, paddr_t pa)
{
        struct pv_pte *pvpte;
        struct vm_page *ptp;
        uintptr_t sum;
        uint8_t oattrs;
        bool locked;

        /*
         * Do an unlocked check to see if the page has no mappings, eg when
         * pmap_remove_all() was called before amap_wipeout() for a process
         * private amap - common.  The page being removed must be on the way
         * out, so we don't have to worry about concurrent attempts to enter
         * it (otherwise the caller either doesn't care or has screwed up).
         */
        sum = (uintptr_t)atomic_load_relaxed(&pp->pp_pte.pte_va);
        sum |= (uintptr_t)atomic_load_relaxed(&pp->pp_pte.pte_ptp);
        sum |= (uintptr_t)atomic_load_relaxed(&pp->pp_pvlist.lh_first);
        if (sum == 0) {
                return;
        }

        kpreempt_disable();
        for (;;) {
                struct pmap *pmap;
                struct pv_entry *pve;
                pt_entry_t opte;
                vaddr_t va;

                mutex_spin_enter(&pp->pp_lock);
                if ((pvpte = pv_pte_first(pp)) == NULL) {
                        mutex_spin_exit(&pp->pp_lock);
                        break;
                }

                /*
                 * Add a reference to the pmap before clearing the pte.
                 * Otherwise the pmap can disappear behind us.
                 */
                ptp = pvpte->pte_ptp;
                pmap = ptp_to_pmap(ptp);
                KASSERT(pmap->pm_obj[0].uo_refs > 0);
                if (ptp != NULL) {
                        pmap_reference(pmap);
                }

                /*
                 * Now try to lock it.  We need a direct handoff between
                 * pp_lock and pm_lock to know the pv_entry is kept intact
                 * and kept associated with this pmap.  If that can't be
                 * had, wait for the pmap's lock to become free and then
                 * retry.
                 */
                locked = mutex_tryenter(&pmap->pm_lock);
                mutex_spin_exit(&pp->pp_lock);
                if (!locked) {
                        mutex_enter(&pmap->pm_lock);
                        /* nothing, just wait for it */
                        mutex_exit(&pmap->pm_lock);
                        if (ptp != NULL) {
                                pmap_destroy(pmap);
                        }
                        continue;
                }
                va = pvpte->pte_va;

                KASSERTMSG(pmap->pm_stats.resident_count > PDP_SIZE,
                    "va %lx pmap %p ptp %p is empty", va, pmap, ptp);
                KASSERTMSG(ptp == NULL || (ptp->flags & PG_FREE) == 0,
                    "va %lx pmap %p ptp %p is free", va, pmap, ptp);
                KASSERTMSG(ptp == NULL || ptp->wire_count > 1,
                    "va %lx pmap %p ptp %p is empty", va, pmap, ptp);

#ifdef DEBUG
                pmap_check_pv(pmap, ptp, pp, pvpte->pte_va, true);
                rb_tree_t *tree = (ptp != NULL ?
                    &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb);
                pve = pmap_treelookup_pv(pmap, ptp, tree, va);
                if (pve == NULL) {
                        KASSERTMSG(&pp->pp_pte == pvpte,
                            "va %lx pmap %p ptp %p pvpte %p pve %p oops 1",
                            va, pmap, ptp, pvpte, pve);
                } else {
                        KASSERTMSG(&pve->pve_pte == pvpte,
                            "va %lx pmap %p ptp %p pvpte %p pve %p oops 2",
                            va, pmap, ptp, pvpte, pve);
                }
#endif

                if (pmap_sync_pv(pvpte, pa, ~0, &oattrs, &opte)) {
                        panic("pmap_pp_remove: mapping not present");
                }

                pve = pmap_lookup_pv(pmap, ptp, pp, va);
                pmap_remove_pv(pmap, pp, ptp, va, pve, oattrs);

                /* Update the PTP reference count. Free if last reference. */
                if (ptp != NULL) {
                        KASSERT(pmap != pmap_kernel());
                        pmap_tlb_shootnow();
                        if (__predict_false(pmap->pm_pp_remove_ent != NULL)) {
                                (*pmap->pm_pp_remove_ent)(pmap, ptp, opte, va);
                        } else {
                                pmap_pp_remove_ent(pmap, ptp, opte, va);
                        }
                } else {
                        KASSERT(pmap == pmap_kernel());
                        pmap_stats_update_bypte(pmap, 0, opte);
                }
                pmap_tlb_shootnow();
                pmap_drain_pv(pmap);
                mutex_exit(&pmap->pm_lock);
                if (ptp != NULL) {
                        pmap_destroy(pmap);
                }
        }
        kpreempt_enable();
}

/*
 * pmap_page_remove: remove a managed vm_page from all pmaps that map it
 *
 * => R/M bits are sync'd back to attrs
 */
void
pmap_page_remove(struct vm_page *pg)
{
        struct pmap_page *pp;
        paddr_t pa;

        pp = VM_PAGE_TO_PP(pg);
        pa = VM_PAGE_TO_PHYS(pg);
        pmap_pp_remove(pp, pa);
}

/*
 * pmap_pv_remove: remove an unmanaged pv-tracked page from all pmaps
 * that map it
 */
void
pmap_pv_remove(paddr_t pa)
{
        struct pmap_page *pp;

        pp = pmap_pv_tracked(pa);
        if (pp == NULL)
                panic("%s: page not pv-tracked: %#"PRIxPADDR, __func__, pa);
        pmap_pp_remove(pp, pa);
}

/*
 * p m a p   a t t r i b u t e  f u n c t i o n s
 * functions that test/change managed page's attributes
 * since a page can be mapped multiple times we must check each PTE that
 * maps it by going down the pv lists.
 */

/*
 * pmap_test_attrs: test a page's attributes
 */
bool
pmap_test_attrs(struct vm_page *pg, unsigned testbits)
{
        struct pmap_page *pp;
        struct pv_pte *pvpte;
        struct pmap *pmap;
        uint8_t oattrs;
        u_int result;
        paddr_t pa;

        pp = VM_PAGE_TO_PP(pg);
        if ((pp->pp_attrs & testbits) != 0) {
                return true;
        }
        pa = VM_PAGE_TO_PHYS(pg);
 startover:
        mutex_spin_enter(&pp->pp_lock);
        for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) {
                if ((pp->pp_attrs & testbits) != 0) {
                        break;
                }
                if (pmap_sync_pv(pvpte, pa, 0, &oattrs, NULL)) {
                        /*
                         * raced with a V->P operation.  wait for the other
                         * side to finish by acquiring pmap's lock.  if no
                         * wait, updates to pp_attrs by the other side may
                         * go unseen.
                         */
                        pmap = ptp_to_pmap(pvpte->pte_ptp);
                        pmap_reference(pmap);
                        mutex_spin_exit(&pp->pp_lock);
                        mutex_enter(&pmap->pm_lock);
                        /* nothing. */
                        mutex_exit(&pmap->pm_lock);
                        pmap_destroy(pmap);
                        goto startover;
                }
                pp->pp_attrs |= oattrs;
        }
        result = pp->pp_attrs & testbits;
        mutex_spin_exit(&pp->pp_lock);

        /*
         * note that we will exit the for loop with a non-null pve if
         * we have found the bits we are testing for.
         */

        return result != 0;
}

static bool
pmap_pp_clear_attrs(struct pmap_page *pp, paddr_t pa, unsigned clearbits)
{
        struct pv_pte *pvpte;
        struct pmap *pmap;
        uint8_t oattrs;
        u_int result;

startover:
        mutex_spin_enter(&pp->pp_lock);
        for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) {
                if (pmap_sync_pv(pvpte, pa, clearbits, &oattrs, NULL)) {
                        /*
                         * raced with a V->P operation.  wait for the other
                         * side to finish by acquiring pmap's lock.  it is
                         * probably unmapping the page, and it will be gone
                         * when the loop is restarted.
                         */
                        pmap = ptp_to_pmap(pvpte->pte_ptp);
                        pmap_reference(pmap);
                        mutex_spin_exit(&pp->pp_lock);
                        mutex_enter(&pmap->pm_lock);
                        /* nothing. */
                        mutex_exit(&pmap->pm_lock);
                        pmap_destroy(pmap);
                        goto startover;
                }
                pp->pp_attrs |= oattrs;
        }
        result = pp->pp_attrs & clearbits;
        pp->pp_attrs &= ~clearbits;
        pmap_tlb_shootnow();
        mutex_spin_exit(&pp->pp_lock);

        return result != 0;
}

/*
 * pmap_clear_attrs: clear the specified attribute for a page.
 *
 * => we return true if we cleared one of the bits we were asked to
 */
bool
pmap_clear_attrs(struct vm_page *pg, unsigned clearbits)
{
        struct pmap_page *pp;
        paddr_t pa;

        pp = VM_PAGE_TO_PP(pg);
        pa = VM_PAGE_TO_PHYS(pg);

        /*
         * If this is a new page, assert it has no mappings and simply zap
         * the stored attributes without taking any locks.
         */
        if ((pg->flags & PG_FAKE) != 0) {
                KASSERT(atomic_load_relaxed(&pp->pp_pte.pte_va) == 0);
                KASSERT(atomic_load_relaxed(&pp->pp_pte.pte_ptp) == NULL);
                KASSERT(atomic_load_relaxed(&pp->pp_pvlist.lh_first) == NULL);
                atomic_store_relaxed(&pp->pp_attrs, 0);
                return false;
        } else {
                return pmap_pp_clear_attrs(pp, pa, clearbits);
        }
}

/*
 * pmap_pv_clear_attrs: clear the specified attributes for an unmanaged
 * pv-tracked page.
 */
bool
pmap_pv_clear_attrs(paddr_t pa, unsigned clearbits)
{
        struct pmap_page *pp;

        pp = pmap_pv_tracked(pa);
        if (pp == NULL)
                panic("%s: page not pv-tracked: %#"PRIxPADDR, __func__, pa);

        return pmap_pp_clear_attrs(pp, pa, clearbits);
}

/*
 * p m a p   p r o t e c t i o n   f u n c t i o n s
 */

/*
 * pmap_page_protect: change the protection of all recorded mappings
 * of a managed page
 *
 * => NOTE: this is an inline function in pmap.h
 */

/* see pmap.h */

/*
 * pmap_pv_protect: change the protection of all recorded mappings
 * of an unmanaged pv-tracked page
 *
 * => NOTE: this is an inline function in pmap.h
 */

/* see pmap.h */

/*
 * pmap_protect: set the protection in of the pages in a pmap
 *
 * => NOTE: this is an inline function in pmap.h
 */

/* see pmap.h */

/*
 * pmap_write_protect: write-protect pages in a pmap.
 *
 * Note for Xen-amd64. Xen automatically adds PTE_U to the kernel pages, but we
 * don't need to remove this bit when re-entering the PTEs here: Xen tracks the
 * kernel pages with a reserved bit (_PAGE_GUEST_KERNEL), so even if PTE_U is
 * present the page will still be considered as a kernel page, and the privilege
 * separation will be enforced correctly.
 */
void
pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
{
        pt_entry_t bit_rem, bit_put;
        pt_entry_t *ptes;
        pt_entry_t * const *pdes;
        struct pmap *pmap2;
        vaddr_t blockend, va;
        int lvl, i;

        if (__predict_false(pmap->pm_write_protect != NULL)) {
                (*pmap->pm_write_protect)(pmap, sva, eva, prot);
                return;
        }

        bit_rem = 0;
        if (!(prot & VM_PROT_WRITE))
                bit_rem = PTE_W;

        bit_put = 0;
        if (!(prot & VM_PROT_EXECUTE))
                bit_put = pmap_pg_nx;

        sva &= ~PAGE_MASK;
        eva &= ~PAGE_MASK;

        /*
         * Acquire pmap.  No need to lock the kernel pmap as we won't
         * be touching PV entries nor stats and kernel PDEs aren't
         * freed.
         */
        if (pmap != pmap_kernel()) {
                mutex_enter(&pmap->pm_lock);
        }
        pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);

        for (va = sva ; va < eva; va = blockend) {
                pt_entry_t *spte, *epte;

                blockend = x86_round_pdr(va + 1);
                if (blockend > eva)
                        blockend = eva;

                /* Is it a valid block? */
                if (!pmap_pdes_valid(va, pdes, NULL, &lvl)) {
                        continue;
                }
                KASSERT(va < VM_MAXUSER_ADDRESS || va >= VM_MAX_ADDRESS);
                KASSERT(lvl == 1);

                spte = &ptes[pl1_i(va)];
                epte = &ptes[pl1_i(blockend)];

                for (i = 0; spte < epte; spte++, i++) {
                        pt_entry_t opte, npte;

                        do {
                                opte = *spte;
                                if (!pmap_valid_entry(opte)) {
                                        goto next;
                                }
                                npte = (opte & ~bit_rem) | bit_put;
                        } while (pmap_pte_cas(spte, opte, npte) != opte);

                        if ((opte & PTE_D) != 0) {
                                vaddr_t tva = va + x86_ptob(i);
                                pmap_tlb_shootdown(pmap, tva, opte,
                                    TLBSHOOT_WRITE_PROTECT);
                        }
next:;
                }
        }

        /* Release pmap. */
        pmap_unmap_ptes(pmap, pmap2);
        if (pmap != pmap_kernel()) {
                mutex_exit(&pmap->pm_lock);
        }
}

/*
 * pmap_unwire: clear the wired bit in the PTE.
 *
 * => Mapping should already be present.
 */
void
pmap_unwire(struct pmap *pmap, vaddr_t va)
{
        pt_entry_t *ptes, *ptep, opte;
        pd_entry_t * const *pdes;
        struct pmap *pmap2;
        int lvl;

        if (__predict_false(pmap->pm_unwire != NULL)) {
                (*pmap->pm_unwire)(pmap, va);
                return;
        }

        /*
         * Acquire pmap.  Need to lock the kernel pmap only to protect the
         * statistics.
         */
        mutex_enter(&pmap->pm_lock);
        pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);

        if (!pmap_pdes_valid(va, pdes, NULL, &lvl)) {
                panic("%s: invalid PDE va=%#" PRIxVADDR, __func__, va);
        }
        KASSERT(lvl == 1);

        ptep = &ptes[pl1_i(va)];
        opte = *ptep;
        KASSERT(pmap_valid_entry(opte));

        if (opte & PTE_WIRED) {
                pt_entry_t npte = opte & ~PTE_WIRED;

                opte = pmap_pte_testset(ptep, npte);
                pmap_stats_update_bypte(pmap, npte, opte);
        } else {
                printf("%s: wiring for pmap %p va %#" PRIxVADDR
                    " did not change!\n", __func__, pmap, va);
        }

        /* Release pmap. */
        pmap_unmap_ptes(pmap, pmap2);
        mutex_exit(&pmap->pm_lock);
}

/*
 * pmap_copy: copy mappings from one pmap to another
 *
 * => optional function
 * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
 */

/*
 * defined as macro in pmap.h
 */

__strict_weak_alias(pmap_enter, pmap_enter_default);

int
pmap_enter_default(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot,
    u_int flags)
{
        if (__predict_false(pmap->pm_enter != NULL)) {
                return (*pmap->pm_enter)(pmap, va, pa, prot, flags);
        }

        return pmap_enter_ma(pmap, va, pa, pa, prot, flags, 0);
}

/*
 * pmap_enter: enter a mapping into a pmap
 *
 * => must be done "now" ... no lazy-evaluation
 */
int
pmap_enter_ma(struct pmap *pmap, vaddr_t va, paddr_t ma, paddr_t pa,
           vm_prot_t prot, u_int flags, int domid)
{
        pt_entry_t *ptes, opte, npte;
        pt_entry_t *ptep;
        pd_entry_t * const *pdes;
        struct vm_page *ptp;
        struct vm_page *new_pg, *old_pg;
        struct pmap_page *new_pp, *old_pp;
        struct pv_entry *old_pve, *new_pve;
        bool wired = (flags & PMAP_WIRED) != 0;
        struct pmap *pmap2;
        struct pmap_ptparray pt;
        int error;
        bool getptp, samepage, new_embedded;
        rb_tree_t *tree;

        KASSERT(pmap_initialized);
        KASSERT(va < VM_MAX_KERNEL_ADDRESS);
        KASSERTMSG(va != (vaddr_t)PDP_BASE, "%s: trying to map va=%#"
            PRIxVADDR " over PDP!", __func__, va);
        KASSERTMSG(va < VM_MIN_KERNEL_ADDRESS ||
            pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)]),
            "%s: missing kernel PTP for va=%#" PRIxVADDR, __func__, va);

#ifdef XENPV
        KASSERT(domid == DOMID_SELF || pa == 0);
#endif

        npte = ma | protection_codes[prot] | PTE_P;
        npte |= pmap_pat_flags(flags);
        if (wired)
                npte |= PTE_WIRED;
        if (va < VM_MAXUSER_ADDRESS)
                npte |= PTE_U;

        if (pmap == pmap_kernel())
                npte |= pmap_pg_g;
        if (flags & VM_PROT_ALL) {
                npte |= PTE_A;
                if (flags & VM_PROT_WRITE) {
                        KASSERT((npte & PTE_W) != 0);
                        npte |= PTE_D;
                }
        }

#ifdef XENPV
        if (domid != DOMID_SELF)
                new_pg = NULL;
        else
#endif
                new_pg = PHYS_TO_VM_PAGE(pa);

        if (new_pg != NULL) {
                /* This is a managed page */
                npte |= PTE_PVLIST;
                new_pp = VM_PAGE_TO_PP(new_pg);
                PMAP_CHECK_PP(new_pp);
        } else if ((new_pp = pmap_pv_tracked(pa)) != NULL) {
                /* This is an unmanaged pv-tracked page */
                npte |= PTE_PVLIST;
                PMAP_CHECK_PP(new_pp);
        } else {
                new_pp = NULL;
        }

        /* Begin by locking the pmap. */
        mutex_enter(&pmap->pm_lock);

        /* Look up the PTP.  Allocate if none present. */
        ptp = NULL;
        getptp = false;
        if (pmap != pmap_kernel()) {
                ptp = pmap_find_ptp(pmap, va, 1);
                if (ptp == NULL) {
                        getptp = true;
                        error = pmap_get_ptp(pmap, &pt, va, flags, &ptp);
                        if (error != 0) {
                                if (flags & PMAP_CANFAIL) {
                                        mutex_exit(&pmap->pm_lock);
                                        return error;
                                }
                                panic("%s: get ptp failed, error=%d", __func__,
                                    error);
                        }
                }
                tree = &VM_PAGE_TO_PP(ptp)->pp_rb;
        } else {
                /* Embedded PV entries rely on this. */
                KASSERT(va != 0);
                tree = &pmap_kernel_rb;
        }

        /*
         * Look up the old PV entry at this VA (if any), and insert a new PV
         * entry if required for the new mapping.  Temporarily track the old
         * and new mappings concurrently.  Only after the old mapping is
         * evicted from the pmap will we remove its PV entry.  Otherwise,
         * our picture of modified/accessed state for either page could get
         * out of sync (we need any P->V operation for either page to stall
         * on pmap->pm_lock until done here).
         */
        new_pve = NULL;
        old_pve = NULL;
        samepage = false;
        new_embedded = false;

        if (new_pp != NULL) {
                error = pmap_enter_pv(pmap, new_pp, ptp, va, &new_pve,
                    &old_pve, &samepage, &new_embedded, tree);

                /*
                 * If a new pv_entry was needed and none was available, we
                 * can go no further.
                 */
                if (error != 0) {
                        if (flags & PMAP_CANFAIL) {
                                if (getptp) {
                                        pmap_unget_ptp(pmap, &pt);
                                }
                                mutex_exit(&pmap->pm_lock);
                                return error;
                        }
                        panic("%s: alloc pve failed", __func__);
                }
        } else {
                old_pve = pmap_treelookup_pv(pmap, ptp, tree, va);
        }

        /* Map PTEs into address space. */
        pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);

        /* Install any newly allocated PTPs. */
        if (getptp) {
                pmap_install_ptp(pmap, &pt, va, pdes);
        }

        /* Check if there is an existing mapping. */
        ptep = &ptes[pl1_i(va)];
        opte = *ptep;
        bool have_oldpa = pmap_valid_entry(opte);
        paddr_t oldpa = pmap_pte2pa(opte);

        /*
         * Update the pte.
         */
        do {
                opte = *ptep;

                /*
                 * if the same page, inherit PTE_A and PTE_D.
                 */
                if (((opte ^ npte) & (PTE_FRAME | PTE_P)) == 0) {
                        npte |= opte & (PTE_A | PTE_D);
                }
#if defined(XENPV)
                if (domid != DOMID_SELF) {
                        /* pmap_pte_cas with error handling */
                        int s = splvm();
                        if (opte != *ptep) {
                                splx(s);
                                continue;
                        }
                        error = xpq_update_foreign(
                            vtomach((vaddr_t)ptep), npte, domid, flags);
                        splx(s);
                        if (error) {
                                /* Undo pv_entry tracking - oof. */
                                if (new_pp != NULL) {
                                        mutex_spin_enter(&new_pp->pp_lock);
                                        if (new_pve != NULL) {
                                                LIST_REMOVE(new_pve, pve_list);
                                                KASSERT(pmap->pm_pve == NULL);
                                                pmap->pm_pve = new_pve;
                                        } else if (new_embedded) {
                                                new_pp->pp_pte.pte_ptp = NULL;
                                                new_pp->pp_pte.pte_va = 0;
                                        }
                                        mutex_spin_exit(&new_pp->pp_lock);
                                }
                                pmap_unmap_ptes(pmap, pmap2);
                                /* Free new PTP. */
                                if (ptp != NULL && ptp->wire_count <= 1) {
                                        pmap_free_ptp(pmap, ptp, va, ptes,
                                            pdes);
                                }
                                mutex_exit(&pmap->pm_lock);
                                return error;
                        }
                        break;
                }
#endif /* defined(XENPV) */
        } while (pmap_pte_cas(ptep, opte, npte) != opte);

        /*
         * Done with the PTEs: they can now be unmapped.
         */
        pmap_unmap_ptes(pmap, pmap2);

        /*
         * Update statistics and PTP's reference count.
         */
        pmap_stats_update_bypte(pmap, npte, opte);
        if (ptp != NULL) {
                if (!have_oldpa) {
                        ptp->wire_count++;
                }
                /* Remember minimum VA in PTP. */
                pmap_ptp_range_set(ptp, va);
        }
        KASSERT(ptp == NULL || ptp->wire_count > 1);

        /*
         * If the same page, we can skip pv_entry handling.
         */
        if (((opte ^ npte) & (PTE_FRAME | PTE_P)) == 0) {
                KASSERT(((opte ^ npte) & PTE_PVLIST) == 0);
                if ((npte & PTE_PVLIST) != 0) {
                        KASSERT(samepage);
                        pmap_check_pv(pmap, ptp, new_pp, va, true);
                }
                goto same_pa;
        } else if ((npte & PTE_PVLIST) != 0) {
                KASSERT(!samepage);
        }

        /*
         * If old page is pv-tracked, remove pv_entry from its list.
         */
        if ((~opte & (PTE_P | PTE_PVLIST)) == 0) {
                if ((old_pg = PHYS_TO_VM_PAGE(oldpa)) != NULL) {
                        old_pp = VM_PAGE_TO_PP(old_pg);
                } else if ((old_pp = pmap_pv_tracked(oldpa)) == NULL) {
                        panic("%s: PTE_PVLIST with pv-untracked page"
                            " va = %#"PRIxVADDR
                            " pa = %#" PRIxPADDR " (%#" PRIxPADDR ")",
                            __func__, va, oldpa, atop(pa));
                }

                pmap_remove_pv(pmap, old_pp, ptp, va, old_pve,
                    pmap_pte_to_pp_attrs(opte));
        } else {
                KASSERT(old_pve == NULL);
                KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL);
        }

        /*
         * If new page is dynamically PV tracked, insert to tree.
         */
        if (new_pve != NULL) {
                KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL);
                old_pve = rb_tree_insert_node(tree, new_pve);
                KASSERT(old_pve == new_pve);
                pmap_check_pv(pmap, ptp, new_pp, va, true);
        }

same_pa:
        /*
         * shootdown tlb if necessary.
         */

        if ((~opte & (PTE_P | PTE_A)) == 0 &&
            ((opte ^ npte) & (PTE_FRAME | PTE_W)) != 0) {
                pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_ENTER);
        }
        pmap_drain_pv(pmap);
        mutex_exit(&pmap->pm_lock);
        return 0;
}

#if defined(XEN) && defined(DOM0OPS)

struct pmap_data_gnt {
        SLIST_ENTRY(pmap_data_gnt) pd_gnt_list;
        vaddr_t pd_gnt_sva;
        vaddr_t pd_gnt_eva; /* range covered by this gnt */
        int pd_gnt_refs; /* ref counter */
        struct gnttab_map_grant_ref pd_gnt_ops[1]; /* variable length */
};
SLIST_HEAD(pmap_data_gnt_head, pmap_data_gnt);

static void pmap_remove_gnt(struct pmap *, vaddr_t, vaddr_t);

static struct pmap_data_gnt *
pmap_find_gnt(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
{
        struct pmap_data_gnt_head *headp;
        struct pmap_data_gnt *pgnt;

        KASSERT(mutex_owned(&pmap->pm_lock));
        headp = pmap->pm_data;
        KASSERT(headp != NULL);
        SLIST_FOREACH(pgnt, headp, pd_gnt_list) {
                if (pgnt->pd_gnt_sva <= sva && eva <= pgnt->pd_gnt_eva)
                        return pgnt;
                /* check that we're not overlapping part of a region */
                KASSERT(pgnt->pd_gnt_sva >= eva || pgnt->pd_gnt_eva <= sva);
        }
        return NULL;
}

static void
pmap_alloc_gnt(struct pmap *pmap, vaddr_t sva, int nentries,
    const struct gnttab_map_grant_ref *ops)
{
        struct pmap_data_gnt_head *headp;
        struct pmap_data_gnt *pgnt;
        vaddr_t eva = sva + nentries * PAGE_SIZE;
        KASSERT(mutex_owned(&pmap->pm_lock));
        KASSERT(nentries >= 1);
        if (pmap->pm_remove == NULL) {
                pmap->pm_remove = pmap_remove_gnt;
                KASSERT(pmap->pm_data == NULL);
                headp = kmem_alloc(sizeof(*headp), KM_SLEEP);
                SLIST_INIT(headp);
                pmap->pm_data = headp;
        } else {
                KASSERT(pmap->pm_remove == pmap_remove_gnt);
                KASSERT(pmap->pm_data != NULL);
                headp = pmap->pm_data;
        }

        pgnt = pmap_find_gnt(pmap, sva, eva);
        if (pgnt != NULL) {
                KASSERT(pgnt->pd_gnt_sva == sva);
                KASSERT(pgnt->pd_gnt_eva == eva);
                return;
        }

        /* new entry */
        pgnt = kmem_alloc(sizeof(*pgnt) +
            (nentries - 1) * sizeof(struct gnttab_map_grant_ref), KM_SLEEP);
        pgnt->pd_gnt_sva = sva;
        pgnt->pd_gnt_eva = eva;
        pgnt->pd_gnt_refs = 0;
        memcpy(pgnt->pd_gnt_ops, ops,
            sizeof(struct gnttab_map_grant_ref) * nentries);
        SLIST_INSERT_HEAD(headp, pgnt, pd_gnt_list);
}

static void
pmap_free_gnt(struct pmap *pmap, struct pmap_data_gnt *pgnt)
{
        struct pmap_data_gnt_head *headp = pmap->pm_data;
        int nentries = (pgnt->pd_gnt_eva - pgnt->pd_gnt_sva) / PAGE_SIZE;
        KASSERT(nentries >= 1);
        KASSERT(mutex_owned(&pmap->pm_lock));
        KASSERT(pgnt->pd_gnt_refs == 0);
        SLIST_REMOVE(headp, pgnt, pmap_data_gnt, pd_gnt_list);
        kmem_free(pgnt, sizeof(*pgnt) +
                    (nentries - 1) * sizeof(struct gnttab_map_grant_ref));
        if (SLIST_EMPTY(headp)) {
                kmem_free(headp, sizeof(*headp));
                pmap->pm_data = NULL;
                pmap->pm_remove = NULL;
        }
}

/*
 * pmap_enter_gnt: enter a grant entry into a pmap
 *
 * => must be done "now" ... no lazy-evaluation
 */
int
pmap_enter_gnt(struct pmap *pmap, vaddr_t va, vaddr_t sva, int nentries,
    const struct gnttab_map_grant_ref *oops)
{
        struct pmap_data_gnt *pgnt;
        pt_entry_t *ptes, opte;
        pt_entry_t *ptep;
        pd_entry_t * const *pdes;
        struct vm_page *ptp;
        struct vm_page *old_pg;
        struct pmap_page *old_pp;
        struct pv_entry *old_pve;
        struct pmap *pmap2;
        struct pmap_ptparray pt;
        int error;
        bool getptp;
        rb_tree_t *tree;
        struct gnttab_map_grant_ref *op;
        int ret;
        int idx;

        KASSERT(pmap_initialized);
        KASSERT(va < VM_MAX_KERNEL_ADDRESS);
        KASSERTMSG(va != (vaddr_t)PDP_BASE, "%s: trying to map va=%#"
            PRIxVADDR " over PDP!", __func__, va);
        KASSERT(pmap != pmap_kernel());

        /* Begin by locking the pmap. */
        mutex_enter(&pmap->pm_lock);
        pmap_alloc_gnt(pmap, sva, nentries, oops);

        pgnt = pmap_find_gnt(pmap, va, va + PAGE_SIZE);
        KASSERT(pgnt != NULL);

        /* Look up the PTP.  Allocate if none present. */
        ptp = NULL;
        getptp = false;
        ptp = pmap_find_ptp(pmap, va, 1);
        if (ptp == NULL) {
                getptp = true;
                error = pmap_get_ptp(pmap, &pt, va, PMAP_CANFAIL, &ptp);
                if (error != 0) {
                        mutex_exit(&pmap->pm_lock);
                        return error;
                }
        }
        tree = &VM_PAGE_TO_PP(ptp)->pp_rb;

        /*
         * Look up the old PV entry at this VA (if any), and insert a new PV
         * entry if required for the new mapping.  Temporarily track the old
         * and new mappings concurrently.  Only after the old mapping is
         * evicted from the pmap will we remove its PV entry.  Otherwise,
         * our picture of modified/accessed state for either page could get
         * out of sync (we need any P->V operation for either page to stall
         * on pmap->pm_lock until done here).
         */
        old_pve = NULL;

        old_pve = pmap_treelookup_pv(pmap, ptp, tree, va);

        /* Map PTEs into address space. */
        pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);

        /* Install any newly allocated PTPs. */
        if (getptp) {
                pmap_install_ptp(pmap, &pt, va, pdes);
        }

        /* Check if there is an existing mapping. */
        ptep = &ptes[pl1_i(va)];
        opte = *ptep;
        bool have_oldpa = pmap_valid_entry(opte);
        paddr_t oldpa = pmap_pte2pa(opte);

        /*
         * Update the pte.
         */

        idx = (va - pgnt->pd_gnt_sva) / PAGE_SIZE;
        op = &pgnt->pd_gnt_ops[idx];

#ifdef XENPV /* XXX */
        op->host_addr = xpmap_ptetomach(ptep);
#endif
        op->dev_bus_addr = 0;
        op->status = GNTST_general_error;
        ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, 1);
        if (__predict_false(ret)) {
                printf("%s: GNTTABOP_map_grant_ref failed: %d\n",
                    __func__, ret);
                op->status = GNTST_general_error;
        }
        for (int d = 0; d < 256 && op->status == GNTST_eagain; d++) {
                kpause("gntmap", false, mstohz(1), NULL);
                ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, 1);
                if (__predict_false(ret)) {
                        printf("%s: GNTTABOP_map_grant_ref failed: %d\n",
                            __func__, ret);
                        op->status = GNTST_general_error;
                }
        }
        if (__predict_false(op->status != GNTST_okay)) {
                printf("%s: GNTTABOP_map_grant_ref status: %d\n",
                    __func__, op->status);
                if (have_oldpa) {
                        ptp->wire_count--;
                }
        } else {
                pgnt->pd_gnt_refs++;
                if (!have_oldpa) {
                        ptp->wire_count++;
                }
                KASSERT(ptp->wire_count > 1);
                /* Remember minimum VA in PTP. */
                pmap_ptp_range_set(ptp, va);
        }
        if (ptp->wire_count <= 1)
                pmap_free_ptp(pmap, ptp, va, ptes, pdes);

        /*
         * Done with the PTEs: they can now be unmapped.
         */
        pmap_unmap_ptes(pmap, pmap2);

        /*
         * Update statistics and PTP's reference count.
         */
        pmap_stats_update_bypte(pmap, 0, opte);

        /*
         * If old page is pv-tracked, remove pv_entry from its list.
         */
        if ((~opte & (PTE_P | PTE_PVLIST)) == 0) {
                if ((old_pg = PHYS_TO_VM_PAGE(oldpa)) != NULL) {
                        old_pp = VM_PAGE_TO_PP(old_pg);
                } else if ((old_pp = pmap_pv_tracked(oldpa)) == NULL) {
                        panic("%s: PTE_PVLIST with pv-untracked page"
                            " va = %#"PRIxVADDR " pa = %#" PRIxPADDR,
                            __func__, va, oldpa);
                }

                pmap_remove_pv(pmap, old_pp, ptp, va, old_pve,
                    pmap_pte_to_pp_attrs(opte));
        } else {
                KASSERT(old_pve == NULL);
                KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL);
        }

        pmap_drain_pv(pmap);
        mutex_exit(&pmap->pm_lock);
        return op->status;
}

/*
 * pmap_remove_gnt: grant mapping removal function.
 *
 * => caller should not be holding any pmap locks
 */
static void
pmap_remove_gnt(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
{
        struct pmap_data_gnt *pgnt;
        pt_entry_t *ptes;
        pd_entry_t pde;
        pd_entry_t * const *pdes;
        struct vm_page *ptp;
        struct pmap *pmap2;
        vaddr_t va;
        int lvl;
        int idx;
        struct gnttab_map_grant_ref *op;
        struct gnttab_unmap_grant_ref unmap_op;
        int ret;

        KASSERT(pmap != pmap_kernel());
        KASSERT(pmap->pm_remove == pmap_remove_gnt);

        mutex_enter(&pmap->pm_lock);
        for (va = sva; va < eva; va += PAGE_SIZE) {
                pgnt = pmap_find_gnt(pmap, va, va + PAGE_SIZE);
                if (pgnt == NULL) {
                        pmap_remove_locked(pmap, sva, eva);
                        continue;
                }

                pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
                if (!pmap_pdes_valid(va, pdes, &pde, &lvl)) {
                        panic("pmap_remove_gnt pdes not valid");
                }

                idx = (va - pgnt->pd_gnt_sva) / PAGE_SIZE;
                op = &pgnt->pd_gnt_ops[idx];
                KASSERT(lvl == 1);
                KASSERT(op->status == GNTST_okay);

                /* Get PTP if non-kernel mapping. */
                ptp = pmap_find_ptp(pmap, va, 1);
                KASSERTMSG(ptp != NULL,
                    "%s: unmanaged PTP detected", __func__);

                if (op->status == GNTST_okay)  {
                        KASSERT(pmap_valid_entry(ptes[pl1_i(va)]));
                        unmap_op.handle = op->handle;
                        unmap_op.dev_bus_addr = 0;
#ifdef XENPV /* XXX */
                        unmap_op.host_addr = xpmap_ptetomach(&ptes[pl1_i(va)]);
#endif
                        ret = HYPERVISOR_grant_table_op(
                            GNTTABOP_unmap_grant_ref, &unmap_op, 1);
                        if (ret) {
                                printf("%s: GNTTABOP_unmap_grant_ref "
                                    "failed: %d\n", __func__, ret);
                        }

                        ptp->wire_count--;
                        pgnt->pd_gnt_refs--;
                        if (pgnt->pd_gnt_refs == 0) {
                                pmap_free_gnt(pmap, pgnt);
                        }
                }
                /*
                 * if mapping removed and the PTP is no longer
                 * being used, free it!
                 */

                if (ptp->wire_count <= 1)
                        pmap_free_ptp(pmap, ptp, va, ptes, pdes);
                pmap_unmap_ptes(pmap, pmap2);
        }
        mutex_exit(&pmap->pm_lock);
}
#endif /* XEN && DOM0OPS */

paddr_t
pmap_get_physpage(void)
{
        struct vm_page *ptp;
        struct pmap *kpm = pmap_kernel();
        paddr_t pa;

        if (!uvm.page_init_done) {
                /*
                 * We're growing the kernel pmap early (from
                 * uvm_pageboot_alloc()). This case must be
                 * handled a little differently.
                 */

                if (!uvm_page_physget(&pa))
                        panic("%s: out of memory", __func__);
#if defined(__HAVE_DIRECT_MAP)
                memset(PAGE_ALIGNED(PMAP_DIRECT_MAP(pa)), 0, PAGE_SIZE);
#else
#if defined(XENPV)
                if (XEN_VERSION_SUPPORTED(3, 4)) {
                        xen_pagezero(pa);
                        return pa;
                }
#endif
                kpreempt_disable();
                pmap_pte_set(early_zero_pte, pmap_pa2pte(pa) | PTE_P |
                    PTE_W | pmap_pg_nx);
                pmap_pte_flush();
                pmap_update_pg((vaddr_t)early_zerop);
                memset(PAGE_ALIGNED(early_zerop), 0, PAGE_SIZE);
#if defined(DIAGNOSTIC) || defined(XENPV)
                pmap_pte_set(early_zero_pte, 0);
                pmap_pte_flush();
#endif /* defined(DIAGNOSTIC) */
                kpreempt_enable();
#endif /* defined(__HAVE_DIRECT_MAP) */
        } else {
                /* XXX */
                ptp = uvm_pagealloc(NULL, 0, NULL,
                                    UVM_PGA_USERESERVE|UVM_PGA_ZERO);
                if (ptp == NULL)
                        panic("%s: out of memory", __func__);
                ptp->flags &= ~PG_BUSY;
                ptp->wire_count = 1;
                pa = VM_PAGE_TO_PHYS(ptp);
        }
        pmap_stats_update(kpm, 1, 0);

        return pa;
}

/*
 * Expand the page tree with the specified amount of PTPs, mapping virtual
 * addresses starting at kva. We populate all the levels but the last one
 * (L1). The nodes of the tree are created as RW, but the pages covered
 * will be kentered in L1, with proper permissions.
 *
 * Used only by pmap_growkernel.
 */
static void
pmap_alloc_level(struct pmap *cpm, vaddr_t kva, long *needed_ptps)
{
        unsigned long i;
        paddr_t pa;
        unsigned long index, endindex;
        int level;
        pd_entry_t *pdep;
#ifdef XENPV
        int s = splvm(); /* protect xpq_* */
#endif

        for (level = PTP_LEVELS; level > 1; level--) {
                if (level == PTP_LEVELS)
                        pdep = cpm->pm_pdir;
                else
                        pdep = normal_pdes[level - 2];
                index = pl_i_roundup(kva, level);
                endindex = index + needed_ptps[level - 1] - 1;

                for (i = index; i <= endindex; i++) {
                        pt_entry_t pte;

                        KASSERT(!pmap_valid_entry(pdep[i]));
                        pa = pmap_get_physpage();
                        pte = pmap_pa2pte(pa) | PTE_P | PTE_W;
#ifdef __x86_64__
                        pte |= pmap_pg_nx;
#endif
                        pmap_pte_set(&pdep[i], pte);

#ifdef XENPV
                        if (level == PTP_LEVELS && i >= PDIR_SLOT_KERN) {
                                if (__predict_true(
                                    cpu_info_primary.ci_flags & CPUF_PRESENT)) {
                                        /* update per-cpu PMDs on all cpus */
                                        xen_kpm_sync(pmap_kernel(), i);
                                } else {
                                        /*
                                         * too early; update primary CPU
                                         * PMD only (without locks)
                                         */
#ifdef __x86_64__
                                        pd_entry_t *cpu_pdep =
                                                &cpu_info_primary.ci_kpm_pdir[i];
#else
                                        pd_entry_t *cpu_pdep =
                                            &cpu_info_primary.ci_kpm_pdir[l2tol2(i)];
#endif
                                        pmap_pte_set(cpu_pdep, pte);
                                }
                        }
#endif

                        KASSERT(level != PTP_LEVELS || nkptp[level - 1] +
                            pl_i(VM_MIN_KERNEL_ADDRESS, level) == i);
                        nkptp[level - 1]++;
                }
                pmap_pte_flush();
        }
#ifdef XENPV
        splx(s);
#endif
}

/*
 * pmap_growkernel: increase usage of KVM space.
 *
 * => we allocate new PTPs for the kernel and install them in all
 *    the pmaps on the system.
 */
vaddr_t
pmap_growkernel(vaddr_t maxkvaddr)
{
        struct pmap *kpm = pmap_kernel();
        struct pmap *cpm;
#if !defined(XENPV) || !defined(__x86_64__)
        struct pmap *pm;
        long old;
#endif
        int s, i;
        long needed_kptp[PTP_LEVELS], target_nptp;
        bool invalidate = false;

        s = splvm();        /* to be safe */
        mutex_enter(&kpm->pm_lock);

        if (maxkvaddr <= pmap_maxkvaddr) {
                mutex_exit(&kpm->pm_lock);
                splx(s);
                return pmap_maxkvaddr;
        }

        maxkvaddr = x86_round_pdr(maxkvaddr);
#if !defined(XENPV) || !defined(__x86_64__)
        old = nkptp[PTP_LEVELS - 1];
#endif

        /* Initialize needed_kptp. */
        for (i = PTP_LEVELS - 1; i >= 1; i--) {
                target_nptp = pl_i_roundup(maxkvaddr, i + 1) -
                    pl_i_roundup(VM_MIN_KERNEL_ADDRESS, i + 1);

                if (target_nptp > nkptpmax[i])
                        panic("out of KVA space");
                KASSERT(target_nptp >= nkptp[i]);
                needed_kptp[i] = target_nptp - nkptp[i];
        }

#ifdef XENPV
        /* only pmap_kernel(), or the per-cpu map, has kernel entries */
        cpm = kpm;
#else
        /* Get the current pmap */
        if (__predict_true(cpu_info_primary.ci_flags & CPUF_PRESENT)) {
                cpm = curcpu()->ci_pmap;
        } else {
                cpm = kpm;
        }
#endif

        kasan_shadow_map((void *)pmap_maxkvaddr,
            (size_t)(maxkvaddr - pmap_maxkvaddr));
        kmsan_shadow_map((void *)pmap_maxkvaddr,
            (size_t)(maxkvaddr - pmap_maxkvaddr));

        pmap_alloc_level(cpm, pmap_maxkvaddr, needed_kptp);

        /*
         * If the number of top level entries changed, update all pmaps.
         */
        if (needed_kptp[PTP_LEVELS - 1] != 0) {
#ifdef XENPV
#ifdef __x86_64__
                /* nothing, kernel entries are never entered in user pmap */
#else
                int pdkidx;

                mutex_enter(&pmaps_lock);
                LIST_FOREACH(pm, &pmaps, pm_list) {
                        for (pdkidx = PDIR_SLOT_KERN + old;
                            pdkidx < PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1];
                            pdkidx++) {
                                pmap_pte_set(&pm->pm_pdir[pdkidx],
                                    kpm->pm_pdir[pdkidx]);
                        }
                        pmap_pte_flush();
                }
                mutex_exit(&pmaps_lock);
#endif /* __x86_64__ */
#else /* XENPV */
                size_t newpdes;
                newpdes = nkptp[PTP_LEVELS - 1] - old;
                if (cpm != kpm) {
                        memcpy(&kpm->pm_pdir[PDIR_SLOT_KERN + old],
                            &cpm->pm_pdir[PDIR_SLOT_KERN + old],
                            newpdes * sizeof(pd_entry_t));
                }

                mutex_enter(&pmaps_lock);
                LIST_FOREACH(pm, &pmaps, pm_list) {
                        if (__predict_false(pm->pm_enter != NULL)) {
                                /*
                                 * Not a native pmap, the kernel is not mapped,
                                 * so nothing to synchronize.
                                 */
                                continue;
                        }
                        memcpy(&pm->pm_pdir[PDIR_SLOT_KERN + old],
                            &kpm->pm_pdir[PDIR_SLOT_KERN + old],
                            newpdes * sizeof(pd_entry_t));
                }
                mutex_exit(&pmaps_lock);
#endif
                invalidate = true;
        }
        pmap_maxkvaddr = maxkvaddr;
        mutex_exit(&kpm->pm_lock);
        splx(s);

        if (invalidate && pmap_initialized) {
                /* Invalidate the pmap cache. */
                pool_cache_invalidate(&pmap_cache);
        }

        return maxkvaddr;
}

#ifdef DEBUG
void pmap_dump(struct pmap *, vaddr_t, vaddr_t);

/*
 * pmap_dump: dump all the mappings from a pmap
 *
 * => caller should not be holding any pmap locks
 */
void
pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
{
        pt_entry_t *ptes, *pte;
        pd_entry_t * const *pdes;
        struct pmap *pmap2;
        vaddr_t blkendva;
        int lvl;

        /*
         * if end is out of range truncate.
         * if (end == start) update to max.
         */

        if (eva > VM_MAXUSER_ADDRESS || eva <= sva)
                eva = VM_MAXUSER_ADDRESS;

        mutex_enter(&pmap->pm_lock);
        pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);

        /*
         * dumping a range of pages: we dump in PTP sized blocks (4MB)
         */

        for (/* null */ ; sva < eva ; sva = blkendva) {

                /* determine range of block */
                blkendva = x86_round_pdr(sva+1);
                if (blkendva > eva)
                        blkendva = eva;

                /* valid block? */
                if (!pmap_pdes_valid(sva, pdes, NULL, &lvl))
                        continue;
                KASSERT(lvl == 1);

                pte = &ptes[pl1_i(sva)];
                for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) {
                        if (!pmap_valid_entry(*pte))
                                continue;
                        printf("va %#" PRIxVADDR " -> pa %#" PRIxPADDR
                            " (pte=%#" PRIxPADDR ")\n",
                            sva, (paddr_t)pmap_pte2pa(*pte), (paddr_t)*pte);
                }
        }
        pmap_unmap_ptes(pmap, pmap2);
        mutex_exit(&pmap->pm_lock);
}
#endif

/*
 * pmap_update: process deferred invalidations and frees.
 */
void
pmap_update(struct pmap *pmap)
{
        struct pmap_page *pp;
        struct vm_page *ptp;

        /*
         * Initiate any pending TLB shootdowns.  Wait for them to
         * complete before returning control to the caller.
         */
        kpreempt_disable();
        pmap_tlb_shootnow();
        kpreempt_enable();

        /*
         * Now that shootdowns are complete, process deferred frees.  This
         * is an unlocked check, but is safe as we're only interested in
         * work done in this LWP - we won't get a false negative.
         */
        if (atomic_load_relaxed(&pmap->pm_gc_ptp.lh_first) == NULL) {
                return;
        }

        mutex_enter(&pmap->pm_lock);
        while ((ptp = LIST_FIRST(&pmap->pm_gc_ptp)) != NULL) {
                KASSERT(ptp->wire_count == 0);
                KASSERT(ptp->uanon == NULL);
                LIST_REMOVE(ptp, mdpage.mp_pp.pp_link);
                pp = VM_PAGE_TO_PP(ptp);
                LIST_INIT(&pp->pp_pvlist);
                pp->pp_attrs = 0;
                pp->pp_pte.pte_ptp = NULL;
                pp->pp_pte.pte_va = 0;
                PMAP_CHECK_PP(VM_PAGE_TO_PP(ptp));

                /*
                 * XXX Hack to avoid extra locking, and lock
                 * assertions in uvm_pagefree().  Despite uobject
                 * being set, this isn't a managed page.
                 */
                PMAP_DUMMY_LOCK(pmap);
                uvm_pagerealloc(ptp, NULL, 0);
                PMAP_DUMMY_UNLOCK(pmap);
                uvm_pagefree(ptp);
        }
        mutex_exit(&pmap->pm_lock);
}

#if PTP_LEVELS > 4
#error "Unsupported number of page table mappings"
#endif

paddr_t
pmap_init_tmp_pgtbl(paddr_t pg)
{
        static bool maps_loaded;
        static const paddr_t x86_tmp_pml_paddr[] = {
            4 * PAGE_SIZE,        /* L1 */
            5 * PAGE_SIZE,        /* L2 */
            6 * PAGE_SIZE,        /* L3 */
            7 * PAGE_SIZE        /* L4 */
        };
        static vaddr_t x86_tmp_pml_vaddr[] = { 0, 0, 0, 0 };

        pd_entry_t *tmp_pml, *kernel_pml;

        int level;

        if (!maps_loaded) {
                for (level = 0; level < PTP_LEVELS; ++level) {
                        x86_tmp_pml_vaddr[level] =
                            uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
                            UVM_KMF_VAONLY);

                        if (x86_tmp_pml_vaddr[level] == 0)
                                panic("mapping of real mode PML failed\n");
                        pmap_kenter_pa(x86_tmp_pml_vaddr[level],
                            x86_tmp_pml_paddr[level],
                            VM_PROT_READ | VM_PROT_WRITE, 0);
                }
                pmap_update(pmap_kernel());
                maps_loaded = true;
        }

        /* Zero levels 1-3 */
        for (level = 0; level < PTP_LEVELS - 1; ++level) {
                tmp_pml = (void *)x86_tmp_pml_vaddr[level];
                memset(PAGE_ALIGNED(tmp_pml), 0, PAGE_SIZE);
        }

        /* Copy PML4 */
        kernel_pml = pmap_kernel()->pm_pdir;
        tmp_pml = (void *)x86_tmp_pml_vaddr[PTP_LEVELS - 1];
        memcpy(PAGE_ALIGNED(tmp_pml), PAGE_ALIGNED(kernel_pml), PAGE_SIZE);

#ifdef PAE
        /*
         * Use the last 4 entries of the L2 page as L3 PD entries. These
         * last entries are unlikely to be used for temporary mappings.
         * 508: maps 0->1GB (userland)
         * 509: unused
         * 510: unused
         * 511: maps 3->4GB (kernel)
         */
        tmp_pml[508] = x86_tmp_pml_paddr[PTP_LEVELS - 1] | PTE_P;
        tmp_pml[509] = 0;
        tmp_pml[510] = 0;
        tmp_pml[511] = pmap_pdirpa(pmap_kernel(), PDIR_SLOT_KERN) | PTE_P;
#endif

        for (level = PTP_LEVELS - 1; level > 0; --level) {
                tmp_pml = (void *)x86_tmp_pml_vaddr[level];

                tmp_pml[pl_i(pg, level + 1)] =
                    (x86_tmp_pml_paddr[level - 1] & PTE_FRAME) | PTE_W | PTE_P;
        }

        tmp_pml = (void *)x86_tmp_pml_vaddr[0];
        tmp_pml[pl_i(pg, 1)] = (pg & PTE_FRAME) | PTE_W | PTE_P;

#ifdef PAE
        /* Return the PA of the L3 page (entry 508 of the L2 page) */
        return x86_tmp_pml_paddr[PTP_LEVELS - 1] + 508 * sizeof(pd_entry_t);
#endif

        return x86_tmp_pml_paddr[PTP_LEVELS - 1];
}

u_int
x86_mmap_flags(paddr_t mdpgno)
{
        u_int nflag = (mdpgno >> X86_MMAP_FLAG_SHIFT) & X86_MMAP_FLAG_MASK;
        u_int pflag = 0;

        if (nflag & X86_MMAP_FLAG_PREFETCH)
                pflag |= PMAP_WRITE_COMBINE;

        return pflag;
}

#if defined(__HAVE_DIRECT_MAP) && defined(__x86_64__) && !defined(XENPV)

/*
 * -----------------------------------------------------------------------------
 * *****************************************************************************
 * *****************************************************************************
 * *****************************************************************************
 * *****************************************************************************
 * **************** HERE BEGINS THE EPT CODE, USED BY INTEL-VMX ****************
 * *****************************************************************************
 * *****************************************************************************
 * *****************************************************************************
 * *****************************************************************************
 * -----------------------------------------------------------------------------
 *
 * These functions are invoked as callbacks from the code above. Contrary to
 * native, EPT does not have a recursive slot; therefore, it is not possible
 * to call pmap_map_ptes(). Instead, we use the direct map and walk down the
 * tree manually.
 *
 * Apart from that, the logic is mostly the same as native. Once a pmap has
 * been created, NVMM calls pmap_ept_transform() to make it an EPT pmap.
 * After that we're good, and the callbacks will handle the translations
 * for us.
 *
 * -----------------------------------------------------------------------------
 */

/* Hardware bits. */
#define EPT_R                __BIT(0)        /* read */
#define EPT_W                __BIT(1)        /* write */
#define EPT_X                __BIT(2)        /* execute */
#define EPT_T                __BITS(5,3)        /* type */
#define                TYPE_UC        0
#define                TYPE_WC        1
#define                TYPE_WT        4
#define                TYPE_WP        5
#define                TYPE_WB        6
#define EPT_NOPAT        __BIT(6)
#define EPT_L                __BIT(7)        /* large */
#define EPT_A                __BIT(8)        /* accessed */
#define EPT_D                __BIT(9)        /* dirty */
/* Software bits. */
#define EPT_PVLIST        __BIT(60)
#define EPT_WIRED        __BIT(61)

#define pmap_ept_valid_entry(pte)        (pte & EPT_R)

bool pmap_ept_has_ad __read_mostly;

static inline void
pmap_ept_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte)
{
        int resid_diff = ((npte & EPT_R) ? 1 : 0) - ((opte & EPT_R) ? 1 : 0);
        int wired_diff = ((npte & EPT_WIRED) ? 1 : 0) - ((opte & EPT_WIRED) ? 1 : 0);

        KASSERT((npte & (EPT_R | EPT_WIRED)) != EPT_WIRED);
        KASSERT((opte & (EPT_R | EPT_WIRED)) != EPT_WIRED);

        pmap_stats_update(pmap, resid_diff, wired_diff);
}

static pt_entry_t
pmap_ept_type(u_int flags)
{
        u_int cacheflags = (flags & PMAP_CACHE_MASK);
        pt_entry_t ret;

        switch (cacheflags) {
        case PMAP_NOCACHE:
        case PMAP_NOCACHE_OVR:
                ret = __SHIFTIN(TYPE_UC, EPT_T);
                break;
        case PMAP_WRITE_COMBINE:
                ret = __SHIFTIN(TYPE_WC, EPT_T);
                break;
        case PMAP_WRITE_BACK:
        default:
                ret = __SHIFTIN(TYPE_WB, EPT_T);
                break;
        }

        ret |= EPT_NOPAT;
        return ret;
}

static inline pt_entry_t
pmap_ept_prot(vm_prot_t prot)
{
        pt_entry_t res = 0;

        if (prot & VM_PROT_READ)
                res |= EPT_R;
        if (prot & VM_PROT_WRITE)
                res |= EPT_W;
        if (prot & VM_PROT_EXECUTE)
                res |= EPT_X;

        return res;
}

static inline uint8_t
pmap_ept_to_pp_attrs(pt_entry_t ept)
{
        uint8_t ret = 0;
        if (pmap_ept_has_ad) {
                if (ept & EPT_D)
                        ret |= PP_ATTRS_D;
                if (ept & EPT_A)
                        ret |= PP_ATTRS_A;
        } else {
                ret |= (PP_ATTRS_D|PP_ATTRS_A);
        }
        if (ept & EPT_W)
                ret |= PP_ATTRS_W;
        return ret;
}

static inline pt_entry_t
pmap_pp_attrs_to_ept(uint8_t attrs)
{
        pt_entry_t ept = 0;
        if (attrs & PP_ATTRS_D)
                ept |= EPT_D;
        if (attrs & PP_ATTRS_A)
                ept |= EPT_A;
        if (attrs & PP_ATTRS_W)
                ept |= EPT_W;
        return ept;
}

/*
 * Helper for pmap_ept_free_ptp.
 * tree[0] = &L2[L2idx]
 * tree[1] = &L3[L3idx]
 * tree[2] = &L4[L4idx]
 */
static void
pmap_ept_get_tree(struct pmap *pmap, vaddr_t va, pd_entry_t **tree)
{
        pt_entry_t *pteva;
        paddr_t ptepa;
        int i, index;

        ptepa = pmap->pm_pdirpa[0];
        for (i = PTP_LEVELS; i > 1; i--) {
                index = pl_pi(va, i);
                pteva = (pt_entry_t *)PMAP_DIRECT_MAP(ptepa);
                KASSERT(pmap_ept_valid_entry(pteva[index]));
                tree[i - 2] = &pteva[index];
                ptepa = pmap_pte2pa(pteva[index]);
        }
}

static void
pmap_ept_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va)
{
        pd_entry_t *tree[3];
        int level;

        KASSERT(pmap != pmap_kernel());
        KASSERT(mutex_owned(&pmap->pm_lock));
        KASSERT(kpreempt_disabled());

        pmap_ept_get_tree(pmap, va, tree);

        level = 1;
        do {
                (void)pmap_pte_testset(tree[level - 1], 0);

                pmap_freepage(pmap, ptp, level);
                if (level < PTP_LEVELS - 1) {
                        ptp = pmap_find_ptp(pmap, va, level + 1);
                        ptp->wire_count--;
                        if (ptp->wire_count > 1)
                                break;
                }
        } while (++level < PTP_LEVELS);
        pmap_pte_flush();
}

/* Allocate L4->L3->L2. Return L2. */
static void
pmap_ept_install_ptp(struct pmap *pmap, struct pmap_ptparray *pt, vaddr_t va)
{
        struct vm_page *ptp;
        unsigned long index;
        pd_entry_t *pteva;
        paddr_t ptepa;
        int i;

        KASSERT(pmap != pmap_kernel());
        KASSERT(mutex_owned(&pmap->pm_lock));
        KASSERT(kpreempt_disabled());

        /*
         * Now that we have all the pages looked up or allocated,
         * loop through again installing any new ones into the tree.
         */
        ptepa = pmap->pm_pdirpa[0];
        for (i = PTP_LEVELS; i > 1; i--) {
                index = pl_pi(va, i);
                pteva = (pt_entry_t *)PMAP_DIRECT_MAP(ptepa);

                if (pmap_ept_valid_entry(pteva[index])) {
                        KASSERT(!pt->alloced[i]);
                        ptepa = pmap_pte2pa(pteva[index]);
                        continue;
                }

                ptp = pt->pg[i];
                ptp->flags &= ~PG_BUSY; /* never busy */
                ptp->wire_count = 1;
                pmap->pm_ptphint[i - 2] = ptp;
                ptepa = VM_PAGE_TO_PHYS(ptp);
                pmap_pte_set(&pteva[index], ptepa | EPT_R | EPT_W | EPT_X);

                pmap_pte_flush();
                pmap_stats_update(pmap, 1, 0);

                /*
                 * If we're not in the top level, increase the
                 * wire count of the parent page.
                 */
                if (i < PTP_LEVELS) {
                        pt->pg[i + 1]->wire_count++;
                }
        }
}

static int
pmap_ept_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot,
    u_int flags)
{
        pt_entry_t *ptes, opte, npte;
        pt_entry_t *ptep;
        struct vm_page *ptp;
        struct vm_page *new_pg, *old_pg;
        struct pmap_page *new_pp, *old_pp;
        struct pv_entry *old_pve, *new_pve;
        bool wired = (flags & PMAP_WIRED) != 0;
        bool accessed;
        struct pmap_ptparray pt;
        int error;
        bool getptp, samepage, new_embedded;
        rb_tree_t *tree;

        KASSERT(pmap_initialized);
        KASSERT(va < VM_MAXUSER_ADDRESS);

        npte = pa | pmap_ept_prot(prot) | pmap_ept_type(flags);

        if (wired)
                npte |= EPT_WIRED;
        if (flags & VM_PROT_ALL) {
                npte |= EPT_A;
                if (flags & VM_PROT_WRITE) {
                        KASSERT((npte & EPT_W) != 0);
                        npte |= EPT_D;
                }
        }

        new_pg = PHYS_TO_VM_PAGE(pa);
        if (new_pg != NULL) {
                /* This is a managed page */
                npte |= EPT_PVLIST;
                new_pp = VM_PAGE_TO_PP(new_pg);
        } else if ((new_pp = pmap_pv_tracked(pa)) != NULL) {
                /* This is an unmanaged pv-tracked page */
                npte |= EPT_PVLIST;
        } else {
                new_pp = NULL;
        }

        /* Begin by locking the pmap. */
        mutex_enter(&pmap->pm_lock);

        /* Look up the PTP.  Allocate if none present. */
        ptp = NULL;
        getptp = false;
        if (pmap != pmap_kernel()) {
                ptp = pmap_find_ptp(pmap, va, 1);
                if (ptp == NULL) {
                        getptp = true;
                        error = pmap_get_ptp(pmap, &pt, va, flags, &ptp);
                        if (error != 0) {
                                if (flags & PMAP_CANFAIL) {
                                        mutex_exit(&pmap->pm_lock);
                                        return error;
                                }
                                panic("%s: get ptp failed, error=%d", __func__,
                                    error);
                        }
                }
                tree = &VM_PAGE_TO_PP(ptp)->pp_rb;
        } else {
                /* Embedded PV entries rely on this. */
                KASSERT(va != 0);
                tree = &pmap_kernel_rb;
        }

        /*
         * Look up the old PV entry at this VA (if any), and insert a new PV
         * entry if required for the new mapping.  Temporarily track the old
         * and new mappings concurrently.  Only after the old mapping is
         * evicted from the pmap will we remove its PV entry.  Otherwise,
         * our picture of modified/accessed state for either page could get
         * out of sync (we need any P->V operation for either page to stall
         * on pmap->pm_lock until done here).
         */
        new_pve = NULL;
        old_pve = NULL;
        samepage = false;
        new_embedded = false;

        if (new_pp != NULL) {
                error = pmap_enter_pv(pmap, new_pp, ptp, va, &new_pve,
                    &old_pve, &samepage, &new_embedded, tree);

                /*
                 * If a new pv_entry was needed and none was available, we
                 * can go no further.
                 */
                if (error != 0) {
                        if (flags & PMAP_CANFAIL) {
                                if (getptp) {
                                        pmap_unget_ptp(pmap, &pt);
                                }
                                mutex_exit(&pmap->pm_lock);
                                return error;
                        }
                        panic("%s: alloc pve failed", __func__);
                }
        } else {
                old_pve = pmap_treelookup_pv(pmap, ptp, tree, va);
        }

        /* Map PTEs into address space. */
        kpreempt_disable();

        /* Install any newly allocated PTPs. */
        if (getptp) {
                pmap_ept_install_ptp(pmap, &pt, va);
        }

        /* Check if there is an existing mapping. */
        ptes = (pt_entry_t *)PMAP_DIRECT_MAP(VM_PAGE_TO_PHYS(ptp));
        ptep = &ptes[pl1_pi(va)];
        opte = *ptep;
        bool have_oldpa = pmap_ept_valid_entry(opte);
        paddr_t oldpa = pmap_pte2pa(opte);

        /*
         * Update the pte.
         */
        do {
                opte = *ptep;

                /*
                 * if the same page, inherit PTE_A and PTE_D.
                 */
                if (((opte ^ npte) & (PTE_FRAME | EPT_R)) == 0) {
                        npte |= opte & (EPT_A | EPT_D);
                }
        } while (pmap_pte_cas(ptep, opte, npte) != opte);

        /*
         * Done with the PTEs: they can now be unmapped.
         */
        kpreempt_enable();

        /*
         * Update statistics and PTP's reference count.
         */
        pmap_ept_stats_update_bypte(pmap, npte, opte);
        if (ptp != NULL) {
                if (!have_oldpa) {
                        ptp->wire_count++;
                }
                /* Remember minimum VA in PTP. */
                pmap_ptp_range_set(ptp, va);
        }
        KASSERT(ptp == NULL || ptp->wire_count > 1);

        /*
         * If the same page, we can skip pv_entry handling.
         */
        if (((opte ^ npte) & (PTE_FRAME | EPT_R)) == 0) {
                KASSERT(((opte ^ npte) & EPT_PVLIST) == 0);
                if ((npte & EPT_PVLIST) != 0) {
                        KASSERT(samepage);
                        pmap_check_pv(pmap, ptp, new_pp, va, true);
                }
                goto same_pa;
        } else if ((npte & EPT_PVLIST) != 0) {
                KASSERT(!samepage);
        }

        /*
         * If old page is pv-tracked, remove pv_entry from its list.
         */
        if ((~opte & (EPT_R | EPT_PVLIST)) == 0) {
                if ((old_pg = PHYS_TO_VM_PAGE(oldpa)) != NULL) {
                        old_pp = VM_PAGE_TO_PP(old_pg);
                } else if ((old_pp = pmap_pv_tracked(oldpa)) == NULL) {
                        panic("%s: EPT_PVLIST with pv-untracked page"
                            " va = %#"PRIxVADDR
                            " pa = %#" PRIxPADDR " (%#" PRIxPADDR ")",
                            __func__, va, oldpa, atop(pa));
                }

                pmap_remove_pv(pmap, old_pp, ptp, va, old_pve,
                    pmap_ept_to_pp_attrs(opte));
        } else {
                KASSERT(old_pve == NULL);
                KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL);
        }

        /*
         * If new page is dynamically PV tracked, insert to tree.
         */
        if (new_pve != NULL) {
                KASSERT(pmap_treelookup_pv(pmap, ptp, tree, va) == NULL);
                old_pve = rb_tree_insert_node(tree, new_pve);
                KASSERT(old_pve == new_pve);
                pmap_check_pv(pmap, ptp, new_pp, va, true);
        }

same_pa:
        /*
         * shootdown tlb if necessary.
         */

        if (pmap_ept_has_ad) {
                accessed = (~opte & (EPT_R | EPT_A)) == 0;
        } else {
                accessed = (opte & EPT_R) != 0;
        }
        if (accessed && ((opte ^ npte) & (PTE_FRAME | EPT_W)) != 0) {
                pmap_tlb_shootdown(pmap, va, 0, TLBSHOOT_ENTER);
        }
        pmap_drain_pv(pmap);
        mutex_exit(&pmap->pm_lock);
        return 0;
}

/* Pay close attention, this returns L2. */
static int
pmap_ept_pdes_invalid(struct pmap *pmap, vaddr_t va, pd_entry_t *lastpde)
{
        pt_entry_t *pteva;
        paddr_t ptepa;
        int i, index;

        KASSERT(mutex_owned(&pmap->pm_lock));

        ptepa = pmap->pm_pdirpa[0];
        for (i = PTP_LEVELS; i > 1; i--) {
                pteva = (pt_entry_t *)PMAP_DIRECT_MAP(ptepa);
                index = pl_pi(va, i);
                if (!pmap_ept_valid_entry(pteva[index]))
                        return i;
                ptepa = pmap_pte2pa(pteva[index]);
        }
        if (lastpde != NULL) {
                *lastpde = pteva[index];
        }

        return 0;
}

static bool
pmap_ept_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap)
{
        pt_entry_t *ptes, pte;
        pd_entry_t pde;
        paddr_t ptppa, pa;
        bool rv;

#ifdef __HAVE_DIRECT_MAP
        if (va >= PMAP_DIRECT_BASE && va < PMAP_DIRECT_END) {
                if (pap != NULL) {
                        *pap = PMAP_DIRECT_UNMAP(va);
                }
                return true;
        }
#endif

        rv = false;
        pa = 0;

        mutex_enter(&pmap->pm_lock);
        kpreempt_disable();

        if (!pmap_ept_pdes_invalid(pmap, va, &pde)) {
                ptppa = pmap_pte2pa(pde);
                ptes = (pt_entry_t *)PMAP_DIRECT_MAP(ptppa);
                pte = ptes[pl1_pi(va)];
                if (__predict_true((pte & EPT_R) != 0)) {
                        pa = pmap_pte2pa(pte) | (va & (NBPD_L1 - 1));
                        rv = true;
                }
        }

        kpreempt_enable();
        mutex_exit(&pmap->pm_lock);

        if (pap != NULL) {
                *pap = pa;
        }
        return rv;
}

static bool
pmap_ept_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte,
    vaddr_t va)
{
        struct pv_entry *pve;
        struct vm_page *pg;
        struct pmap_page *pp;
        pt_entry_t opte;
        bool accessed;

        KASSERT(pmap != pmap_kernel());
        KASSERT(mutex_owned(&pmap->pm_lock));
        KASSERT(kpreempt_disabled());

        if (!pmap_ept_valid_entry(*pte)) {
                /* VA not mapped. */
                return false;
        }

        /* Atomically save the old PTE and zap it. */
        opte = pmap_pte_testset(pte, 0);
        if (!pmap_ept_valid_entry(opte)) {
                return false;
        }

        pmap_ept_stats_update_bypte(pmap, 0, opte);

        if (ptp) {
                /*
                 * Dropping a PTE.  Make sure that the PDE is flushed.
                 */
                ptp->wire_count--;
                if (ptp->wire_count <= 1) {
                        opte |= EPT_A;
                }
        }

        if (pmap_ept_has_ad) {
                accessed = (opte & EPT_A) != 0;
        } else {
                accessed = true;
        }
        if (accessed) {
                pmap_tlb_shootdown(pmap, va, 0, TLBSHOOT_REMOVE_PTE);
        }

        /*
         * If we are not on a pv list - we are done.
         */
        if ((opte & EPT_PVLIST) == 0) {
                KASSERTMSG((PHYS_TO_VM_PAGE(pmap_pte2pa(opte)) == NULL),
                    "managed page without EPT_PVLIST for %#"PRIxVADDR, va);
                KASSERTMSG((pmap_pv_tracked(pmap_pte2pa(opte)) == NULL),
                    "pv-tracked page without EPT_PVLIST for %#"PRIxVADDR, va);
                KASSERT(pmap_treelookup_pv(pmap, ptp, (ptp != NULL ?
                    &VM_PAGE_TO_PP(ptp)->pp_rb : &pmap_kernel_rb), va) == NULL);
                return true;
        }

        if ((pg = PHYS_TO_VM_PAGE(pmap_pte2pa(opte))) != NULL) {
                pp = VM_PAGE_TO_PP(pg);
        } else if ((pp = pmap_pv_tracked(pmap_pte2pa(opte))) == NULL) {
                paddr_t pa = pmap_pte2pa(opte);
                panic("%s: EPT_PVLIST with pv-untracked page"
                    " va = %#"PRIxVADDR"pa = %#"PRIxPADDR" (%#"PRIxPADDR")",
                    __func__, va, pa, atop(pa));
        }

        /* Sync R/M bits. */
        pve = pmap_lookup_pv(pmap, ptp, pp, va);
        pmap_remove_pv(pmap, pp, ptp, va, pve, pmap_ept_to_pp_attrs(opte));
        return true;
}

static void
pmap_ept_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,
    vaddr_t startva, vaddr_t endva)
{
        pt_entry_t *pte = (pt_entry_t *)ptpva;

        KASSERT(pmap != pmap_kernel());
        KASSERT(mutex_owned(&pmap->pm_lock));
        KASSERT(kpreempt_disabled());

        /*
         * mappings are very often sparse, so clip the given range to the
         * range of PTEs that are known present in the PTP.
         */
        pmap_ptp_range_clip(ptp, &startva, &pte);

        /*
         * note that ptpva points to the PTE that maps startva.   this may
         * or may not be the first PTE in the PTP.
         *
         * we loop through the PTP while there are still PTEs to look at
         * and the wire_count is greater than 1 (because we use the wire_count
         * to keep track of the number of real PTEs in the PTP).
         */
        while (startva < endva && (ptp == NULL || ptp->wire_count > 1)) {
                (void)pmap_ept_remove_pte(pmap, ptp, pte, startva);
                startva += PAGE_SIZE;
                pte++;
        }
}

static void
pmap_ept_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
{
        pt_entry_t *ptes;
        pd_entry_t pde;
        paddr_t ptppa;
        vaddr_t blkendva, va = sva;
        struct vm_page *ptp;

        mutex_enter(&pmap->pm_lock);
        kpreempt_disable();

        for (/* null */ ; va < eva ; va = blkendva) {
                int lvl;

                /* determine range of block */
                blkendva = x86_round_pdr(va+1);
                if (blkendva > eva)
                        blkendva = eva;

                lvl = pmap_ept_pdes_invalid(pmap, va, &pde);
                if (lvl != 0) {
                        /* Skip a range corresponding to an invalid pde. */
                        blkendva = (va & ptp_frames[lvl - 1]) + nbpd[lvl - 1];
                        continue;
                }

                /* PA of the PTP */
                ptppa = pmap_pte2pa(pde);

                ptp = pmap_find_ptp(pmap, va, 1);
                KASSERTMSG(ptp != NULL, "%s: unmanaged PTP detected",
                    __func__);

                ptes = (pt_entry_t *)PMAP_DIRECT_MAP(ptppa);

                pmap_ept_remove_ptes(pmap, ptp, (vaddr_t)&ptes[pl1_pi(va)], va,
                    blkendva);

                /* If PTP is no longer being used, free it. */
                if (ptp && ptp->wire_count <= 1) {
                        pmap_ept_free_ptp(pmap, ptp, va);
                }
        }

        kpreempt_enable();
        pmap_drain_pv(pmap);
        mutex_exit(&pmap->pm_lock);
}

static int
pmap_ept_sync_pv(struct vm_page *ptp, vaddr_t va, paddr_t pa, int clearbits,
    uint8_t *oattrs, pt_entry_t *optep)
{
        struct pmap *pmap;
        pt_entry_t *ptep;
        pt_entry_t opte;
        pt_entry_t npte;
        pt_entry_t expect;
        bool need_shootdown;

        expect = pmap_pa2pte(pa) | EPT_R;
        pmap = ptp_to_pmap(ptp);

        if (clearbits != ~0) {
                KASSERT((clearbits & ~(PP_ATTRS_D|PP_ATTRS_A|PP_ATTRS_W)) == 0);
                clearbits = pmap_pp_attrs_to_ept(clearbits);
        }

        ptep = pmap_map_pte(pmap, ptp, va);
        do {
                opte = *ptep;
                KASSERT((opte & (EPT_D | EPT_A)) != EPT_D);
                KASSERT((opte & (EPT_A | EPT_R)) != EPT_A);
                KASSERT(opte == 0 || (opte & EPT_R) != 0);
                if ((opte & (PTE_FRAME | EPT_R)) != expect) {
                        /*
                         * We lost a race with a V->P operation like
                         * pmap_remove().  Wait for the competitor
                         * reflecting pte bits into mp_attrs.
                         */
                        pmap_unmap_pte();
                        return EAGAIN;
                }

                /*
                 * Check if there's anything to do on this PTE.
                 */
                if ((opte & clearbits) == 0) {
                        need_shootdown = false;
                        break;
                }

                /*
                 * We need a shootdown if the PTE is cached (EPT_A) ...
                 * ... Unless we are clearing only the EPT_W bit and
                 * it isn't cached as RW (EPT_D).
                 */
                if (pmap_ept_has_ad) {
                        need_shootdown = (opte & EPT_A) != 0 &&
                            !(clearbits == EPT_W && (opte & EPT_D) == 0);
                } else {
                        need_shootdown = true;
                }

                npte = opte & ~clearbits;

                /*
                 * If we need a shootdown anyway, clear EPT_A and EPT_D.
                 */
                if (need_shootdown) {
                        npte &= ~(EPT_A | EPT_D);
                }
                KASSERT((npte & (EPT_D | EPT_A)) != EPT_D);
                KASSERT((npte & (EPT_A | EPT_R)) != EPT_A);
                KASSERT(npte == 0 || (opte & EPT_R) != 0);
        } while (pmap_pte_cas(ptep, opte, npte) != opte);

        if (need_shootdown) {
                pmap_tlb_shootdown(pmap, va, 0, TLBSHOOT_SYNC_PV);
        }
        pmap_unmap_pte();

        *oattrs = pmap_ept_to_pp_attrs(opte);
        if (optep != NULL)
                *optep = opte;
        return 0;
}

static void
pmap_ept_pp_remove_ent(struct pmap *pmap, struct vm_page *ptp, pt_entry_t opte,
    vaddr_t va)
{

        KASSERT(mutex_owned(&pmap->pm_lock));

        pmap_ept_stats_update_bypte(pmap, 0, opte);
        ptp->wire_count--;
        if (ptp->wire_count <= 1) {
                pmap_ept_free_ptp(pmap, ptp, va);
        }
}

static void
pmap_ept_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
{
        pt_entry_t bit_rem;
        pt_entry_t *ptes, *spte;
        pt_entry_t opte, npte;
        pd_entry_t pde;
        paddr_t ptppa;
        vaddr_t va;
        bool modified;

        bit_rem = 0;
        if (!(prot & VM_PROT_WRITE))
                bit_rem = EPT_W;

        sva &= PTE_FRAME;
        eva &= PTE_FRAME;

        /* Acquire pmap. */
        mutex_enter(&pmap->pm_lock);
        kpreempt_disable();

        for (va = sva; va < eva; va += PAGE_SIZE) {
                if (pmap_ept_pdes_invalid(pmap, va, &pde)) {
                        continue;
                }

                ptppa = pmap_pte2pa(pde);
                ptes = (pt_entry_t *)PMAP_DIRECT_MAP(ptppa);
                spte = &ptes[pl1_pi(va)];

                do {
                        opte = *spte;
                        if (!pmap_ept_valid_entry(opte)) {
                                goto next;
                        }
                        npte = (opte & ~bit_rem);
                } while (pmap_pte_cas(spte, opte, npte) != opte);

                if (pmap_ept_has_ad) {
                        modified = (opte & EPT_D) != 0;
                } else {
                        modified = true;
                }
                if (modified) {
                        vaddr_t tva = x86_ptob(spte - ptes);
                        pmap_tlb_shootdown(pmap, tva, 0,
                            TLBSHOOT_WRITE_PROTECT);
                }
next:;
        }

        kpreempt_enable();
        mutex_exit(&pmap->pm_lock);
}

static void
pmap_ept_unwire(struct pmap *pmap, vaddr_t va)
{
        pt_entry_t *ptes, *ptep, opte;
        pd_entry_t pde;
        paddr_t ptppa;

        /* Acquire pmap. */
        mutex_enter(&pmap->pm_lock);
        kpreempt_disable();

        if (pmap_ept_pdes_invalid(pmap, va, &pde)) {
                panic("%s: invalid PDE va=%#" PRIxVADDR, __func__, va);
        }

        ptppa = pmap_pte2pa(pde);
        ptes = (pt_entry_t *)PMAP_DIRECT_MAP(ptppa);
        ptep = &ptes[pl1_pi(va)];
        opte = *ptep;
        KASSERT(pmap_ept_valid_entry(opte));

        if (opte & EPT_WIRED) {
                pt_entry_t npte = opte & ~EPT_WIRED;

                opte = pmap_pte_testset(ptep, npte);
                pmap_ept_stats_update_bypte(pmap, npte, opte);
        } else {
                printf("%s: wiring for pmap %p va %#" PRIxVADDR
                    "did not change!\n", __func__, pmap, va);
        }

        /* Release pmap. */
        kpreempt_enable();
        mutex_exit(&pmap->pm_lock);
}

/* -------------------------------------------------------------------------- */

void
pmap_ept_transform(struct pmap *pmap)
{
        pmap->pm_enter = pmap_ept_enter;
        pmap->pm_extract = pmap_ept_extract;
        pmap->pm_remove = pmap_ept_remove;
        pmap->pm_sync_pv = pmap_ept_sync_pv;
        pmap->pm_pp_remove_ent = pmap_ept_pp_remove_ent;
        pmap->pm_write_protect = pmap_ept_write_protect;
        pmap->pm_unwire = pmap_ept_unwire;

        memset(PAGE_ALIGNED(pmap->pm_pdir), 0, PAGE_SIZE);
}

#endif /* __HAVE_DIRECT_MAP && __x86_64__ && !XENPV */























































































































































    3 
    3 
































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
/*        $NetBSD: if_url.c,v 1.97 2022/08/20 14:08:59 riastradh Exp $        */

/*
 * Copyright (c) 2001, 2002
 *     Shingo WATANABE <nabe@nabechan.org>.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the author nor the names of any co-contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

/*
 * The RTL8150L(Realtek USB to fast ethernet controller) spec can be found at
 *   ftp://ftp.realtek.com.tw/lancard/data_sheet/8150/8150v14.pdf
 *   ftp://152.104.125.40/lancard/data_sheet/8150/8150v14.pdf
 */

/*
 * TODO:
 *        Interrupt Endpoint support
 *        External PHYs
 *        powerhook() support?
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_url.c,v 1.97 2022/08/20 14:08:59 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_usb.h"
#endif

#include <sys/param.h>

#include <net/if_ether.h>
#ifdef INET
#include <netinet/in.h>
#include <netinet/if_inarp.h>
#endif

#include <dev/mii/urlphyreg.h>

#include <dev/usb/usbnet.h>

#include <dev/usb/if_urlreg.h>

/* Function declarations */
static int        url_match(device_t, cfdata_t, void *);
static void        url_attach(device_t, device_t, void *);

CFATTACH_DECL_NEW(url, sizeof(struct usbnet), url_match, url_attach,
    usbnet_detach, usbnet_activate);

static unsigned        url_uno_tx_prepare(struct usbnet *, struct mbuf *,
                                   struct usbnet_chain *);
static void url_uno_rx_loop(struct usbnet *, struct usbnet_chain *, uint32_t);
static int url_uno_mii_read_reg(struct usbnet *, int, int, uint16_t *);
static int url_uno_mii_write_reg(struct usbnet *, int, int, uint16_t);
static void url_uno_stop(struct ifnet *, int);
static void url_uno_mii_statchg(struct ifnet *);
static int url_uno_init(struct ifnet *);
static void url_uno_mcast(struct ifnet *);
static void url_reset(struct usbnet *);

static int url_csr_read_1(struct usbnet *, int);
static int url_csr_read_2(struct usbnet *, int);
static int url_csr_write_1(struct usbnet *, int, int);
static int url_csr_write_2(struct usbnet *, int, int);
static int url_csr_write_4(struct usbnet *, int, int);
static int url_mem(struct usbnet *, int, int, void *, int);

static const struct usbnet_ops url_ops = {
        .uno_stop = url_uno_stop,
        .uno_mcast = url_uno_mcast,
        .uno_read_reg = url_uno_mii_read_reg,
        .uno_write_reg = url_uno_mii_write_reg,
        .uno_statchg = url_uno_mii_statchg,
        .uno_tx_prepare = url_uno_tx_prepare,
        .uno_rx_loop = url_uno_rx_loop,
        .uno_init = url_uno_init,
};

/* Macros */
#ifdef URL_DEBUG
#define DPRINTF(x)        if (urldebug) printf x
#define DPRINTFN(n, x)        if (urldebug >= (n)) printf x
int urldebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n, x)
#endif

#define        URL_SETBIT(un, reg, x)        \
        url_csr_write_1(un, reg, url_csr_read_1(un, reg) | (x))

#define        URL_SETBIT2(un, reg, x)        \
        url_csr_write_2(un, reg, url_csr_read_2(un, reg) | (x))

#define        URL_CLRBIT(un, reg, x)        \
        url_csr_write_1(un, reg, url_csr_read_1(un, reg) & ~(x))

#define        URL_CLRBIT2(un, reg, x)        \
        url_csr_write_2(un, reg, url_csr_read_2(un, reg) & ~(x))

static const struct url_type {
        struct usb_devno url_dev;
        uint16_t url_flags;
#define URL_EXT_PHY        0x0001
} url_devs [] = {
        /* MELCO LUA-KTX */
        {{ USB_VENDOR_MELCO, USB_PRODUCT_MELCO_LUAKTX }, 0},
        /* Realtek RTL8150L Generic (GREEN HOUSE USBKR100) */
        {{ USB_VENDOR_REALTEK, USB_PRODUCT_REALTEK_RTL8150L}, 0},
        /* Longshine LCS-8138TX */
        {{ USB_VENDOR_ABOCOM, USB_PRODUCT_ABOCOM_LCS8138TX}, 0},
        /* Micronet SP128AR */
        {{ USB_VENDOR_MICRONET, USB_PRODUCT_MICRONET_SP128AR}, 0},
        /* OQO model 01 */
        {{ USB_VENDOR_OQO, USB_PRODUCT_OQO_ETHER01}, 0},
};
#define url_lookup(v, p) ((const struct url_type *)usb_lookup(url_devs, v, p))


/* Probe */
static int
url_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return url_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}
/* Attach */
static void
url_attach(device_t parent, device_t self, void *aux)
{
        USBNET_MII_DECL_DEFAULT(unm);
        struct usbnet * const un = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        struct usbd_interface *iface;
        usbd_status err;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        int i;

        aprint_naive("\n");
        aprint_normal("\n");
        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        un->un_dev = self;
        un->un_udev = dev;
        un->un_sc = un;
        un->un_ops = &url_ops;
        un->un_rx_xfer_flags = USBD_SHORT_XFER_OK;
        un->un_tx_xfer_flags = USBD_FORCE_SHORT_XFER;
        un->un_rx_list_cnt = URL_RX_LIST_CNT;
        un->un_tx_list_cnt = URL_TX_LIST_CNT;
        un->un_rx_bufsz = URL_BUFSZ;
        un->un_tx_bufsz = URL_BUFSZ;

        /* Move the device into the configured state. */
        err = usbd_set_config_no(dev, URL_CONFIG_NO, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(err));
                return;
        }

        /* get control interface */
        err = usbd_device2interface_handle(dev, URL_IFACE_INDEX, &iface);
        if (err) {
                aprint_error_dev(self, "failed to get interface, err=%s\n",
                       usbd_errstr(err));
                return;
        }

        un->un_iface = iface;
        un->un_flags = url_lookup(uaa->uaa_vendor, uaa->uaa_product)->url_flags;
#if 0
        if (un->un_flags & URL_EXT_PHY) {
                un->un_read_reg_cb = url_ext_mii_read_reg;
                un->un_write_reg_cb = url_ext_mii_write_reg;
        }
#endif

        /* get interface descriptor */
        id = usbd_get_interface_descriptor(un->un_iface);

        /* find endpoints */
        un->un_ed[USBNET_ENDPT_RX] = un->un_ed[USBNET_ENDPT_TX] =
            un->un_ed[USBNET_ENDPT_INTR] = 0;
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(un->un_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "couldn't get endpoint %d\n", i);
                        return;
                }
                if ((ed->bmAttributes & UE_XFERTYPE) == UE_BULK &&
                    UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN)
                        un->un_ed[USBNET_ENDPT_RX] = ed->bEndpointAddress;
                else if ((ed->bmAttributes & UE_XFERTYPE) == UE_BULK &&
                         UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT)
                        un->un_ed[USBNET_ENDPT_TX] = ed->bEndpointAddress;
                else if ((ed->bmAttributes & UE_XFERTYPE) == UE_INTERRUPT &&
                         UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN)
                        un->un_ed[USBNET_ENDPT_INTR] = ed->bEndpointAddress;
        }

        if (un->un_ed[USBNET_ENDPT_RX] == 0 ||
            un->un_ed[USBNET_ENDPT_TX] == 0 ||
            un->un_ed[USBNET_ENDPT_INTR] == 0) {
                aprint_error_dev(self, "missing endpoint\n");
                return;
        }

        /* Set these up now for url_mem().  */
        usbnet_attach(un);

        /* reset the adapter */
        url_reset(un);

        /* Get Ethernet Address */
        err = url_mem(un, URL_CMD_READMEM, URL_IDR0, (void *)un->un_eaddr,
                      ETHER_ADDR_LEN);
        if (err) {
                aprint_error_dev(self, "read MAC address failed\n");
                return;
        }

        /* initialize interface information */
        usbnet_attach_ifp(un, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST,
            0, &unm);
}

/* read/write memory */
static int
url_mem(struct usbnet *un, int cmd, int offset, void *buf, int len)
{
        usb_device_request_t req;
        usbd_status err;

        DPRINTFN(0x200,
                ("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        if (usbnet_isdying(un)) {
                if (cmd == URL_CMD_READMEM)
                        memset(buf, 0, len);
                return 0;
        }

        if (cmd == URL_CMD_READMEM)
                req.bmRequestType = UT_READ_VENDOR_DEVICE;
        else
                req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = URL_REQ_MEM;
        USETW(req.wValue, offset);
        USETW(req.wIndex, 0x0000);
        USETW(req.wLength, len);

        err = usbd_do_request(un->un_udev, &req, buf);
        if (err) {
                DPRINTF(("%s: url_mem(): %s failed. off=%04x, err=%d\n",
                         device_xname(un->un_dev),
                         cmd == URL_CMD_READMEM ? "read" : "write",
                         offset, err));
                if (cmd == URL_CMD_READMEM)
                        memset(buf, 0, len);
        }

        return err;
}

/* read 1byte from register */
static int
url_csr_read_1(struct usbnet *un, int reg)
{
        uint8_t val = 0;

        DPRINTFN(0x100,
                 ("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        return url_mem(un, URL_CMD_READMEM, reg, &val, 1) ? 0 : val;
}

/* read 2bytes from register */
static int
url_csr_read_2(struct usbnet *un, int reg)
{
        uWord val;

        DPRINTFN(0x100,
                 ("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        USETW(val, 0);
        return url_mem(un, URL_CMD_READMEM, reg, &val, 2) ? 0 : UGETW(val);
}

/* write 1byte to register */
static int
url_csr_write_1(struct usbnet *un, int reg, int aval)
{
        uint8_t val = aval;

        DPRINTFN(0x100,
                 ("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        return url_mem(un, URL_CMD_WRITEMEM, reg, &val, 1) ? -1 : 0;
}

/* write 2bytes to register */
static int
url_csr_write_2(struct usbnet *un, int reg, int aval)
{
        uWord val;

        DPRINTFN(0x100,
                 ("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        USETW(val, aval);

        return url_mem(un, URL_CMD_WRITEMEM, reg, &val, 2) ? -1 : 0;
}

/* write 4bytes to register */
static int
url_csr_write_4(struct usbnet *un, int reg, int aval)
{
        uDWord val;

        DPRINTFN(0x100,
                 ("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        USETDW(val, aval);

        return url_mem(un, URL_CMD_WRITEMEM, reg, &val, 4) ? -1 : 0;
}

static int
url_uno_init(struct ifnet *ifp)
{
        struct usbnet * const un = ifp->if_softc;
        const u_char *eaddr;
        int i;

        DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        url_reset(un);

        eaddr = CLLADDR(ifp->if_sadl);
        for (i = 0; i < ETHER_ADDR_LEN; i++)
                url_csr_write_1(un, URL_IDR0 + i, eaddr[i]);

        /* Init transmission control register */
        URL_CLRBIT(un, URL_TCR,
                   URL_TCR_TXRR1 | URL_TCR_TXRR0 |
                   URL_TCR_IFG1 | URL_TCR_IFG0 |
                   URL_TCR_NOCRC);

        /* Init receive control register */
        URL_SETBIT2(un, URL_RCR, URL_RCR_TAIL | URL_RCR_AD | URL_RCR_AB);

        /* Enable RX and TX */
        URL_SETBIT(un, URL_CR, URL_CR_TE | URL_CR_RE);

        return 0;
}

static void
url_reset(struct usbnet *un)
{
        int i;

        DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        if (usbnet_isdying(un))
                return;

        URL_SETBIT(un, URL_CR, URL_CR_SOFT_RST);

        for (i = 0; i < URL_TX_TIMEOUT; i++) {
                if (usbnet_isdying(un))
                        return;
                if (!(url_csr_read_1(un, URL_CR) & URL_CR_SOFT_RST))
                        break;
                delay(10);        /* XXX */
        }

        delay(10000);                /* XXX */
}

static void
url_uno_mcast(struct ifnet *ifp)
{
        struct usbnet * const un = ifp->if_softc;
        struct ethercom *ec = usbnet_ec(un);
        struct ether_multi *enm;
        struct ether_multistep step;
        uint32_t mchash[2] = { 0, 0 };
        int h = 0, rcr;

        DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        if (usbnet_isdying(un))
                return;

        rcr = url_csr_read_2(un, URL_RCR);
        rcr &= ~(URL_RCR_AAP | URL_RCR_AAM | URL_RCR_AM);

        ETHER_LOCK(ec);
        if (usbnet_ispromisc(un)) {
                ec->ec_flags |= ETHER_F_ALLMULTI;
                ETHER_UNLOCK(ec);
                /* run promisc. mode */
                rcr |= URL_RCR_AAM; /* ??? */
                rcr |= URL_RCR_AAP;
                goto update;
        }
        ec->ec_flags &= ~ETHER_F_ALLMULTI;
        ETHER_FIRST_MULTI(step, ec, enm);
        while (enm != NULL) {
                if (memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)) {
                        ec->ec_flags |= ETHER_F_ALLMULTI;
                        ETHER_UNLOCK(ec);
                        /* accept all multicast frames */
                        rcr |= URL_RCR_AAM;
                        goto update;
                }
                h = ether_crc32_be(enm->enm_addrlo, ETHER_ADDR_LEN);
                /* 1(31) and 5(30:26) bit sampling */
                mchash[h >> 31] |= 1 << ((h >> 26) & 0x1f);
                ETHER_NEXT_MULTI(step, enm);
        }
        ETHER_UNLOCK(ec);
        if (h != 0)
                rcr |= URL_RCR_AM;        /* activate mcast hash filter */
        url_csr_write_4(un, URL_MAR0, mchash[0]);
        url_csr_write_4(un, URL_MAR4, mchash[1]);
 update:
        url_csr_write_2(un, URL_RCR, rcr);
}

static unsigned
url_uno_tx_prepare(struct usbnet *un, struct mbuf *m, struct usbnet_chain *c)
{
        int total_len;

        DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev),__func__));

        KASSERT(un->un_tx_bufsz >= URL_MIN_FRAME_LEN);
        if ((unsigned)m->m_pkthdr.len > un->un_tx_bufsz)
                return 0;

        /* Copy the mbuf data into a contiguous buffer */
        m_copydata(m, 0, m->m_pkthdr.len, c->unc_buf);
        total_len = m->m_pkthdr.len;

        if (total_len < URL_MIN_FRAME_LEN) {
                memset(c->unc_buf + total_len, 0,
                    URL_MIN_FRAME_LEN - total_len);
                total_len = URL_MIN_FRAME_LEN;
        }

        DPRINTF(("%s: %s: send %d bytes\n", device_xname(un->un_dev),
                 __func__, total_len));

        return total_len;
}

static void
url_uno_rx_loop(struct usbnet *un, struct usbnet_chain *c, uint32_t total_len)
{
        struct ifnet *ifp = usbnet_ifp(un);
        url_rxhdr_t rxhdr;

        DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev),__func__));

        if (total_len <= ETHER_CRC_LEN || total_len <= sizeof(rxhdr)) {
                if_statinc(ifp, if_ierrors);
                return;
        }

        memcpy(&rxhdr, c->unc_buf + total_len - ETHER_CRC_LEN, sizeof(rxhdr));

        DPRINTF(("%s: RX Status: %dbytes%s%s%s%s packets\n",
                 device_xname(un->un_dev),
                 UGETW(rxhdr) & URL_RXHDR_BYTEC_MASK,
                 UGETW(rxhdr) & URL_RXHDR_VALID_MASK ? ", Valid" : "",
                 UGETW(rxhdr) & URL_RXHDR_RUNTPKT_MASK ? ", Runt" : "",
                 UGETW(rxhdr) & URL_RXHDR_PHYPKT_MASK ? ", Physical match" : "",
                 UGETW(rxhdr) & URL_RXHDR_MCASTPKT_MASK ? ", Multicast" : ""));

        if ((UGETW(rxhdr) & URL_RXHDR_VALID_MASK) == 0) {
                if_statinc(ifp, if_ierrors);
                return;
        }

        total_len -= ETHER_CRC_LEN;

        DPRINTF(("%s: %s: deliver %d\n", device_xname(un->un_dev),
                 __func__, total_len));
        usbnet_enqueue(un, c->unc_buf, total_len, 0, 0, 0);
}

#if 0
static void url_intr(void)
{
}
#endif

/* Stop the adapter and free any mbufs allocated to the RX and TX lists. */
static void
url_uno_stop(struct ifnet *ifp, int disable)
{
        struct usbnet * const un = ifp->if_softc;

        DPRINTF(("%s: %s: enter\n", device_xname(un->un_dev), __func__));

        url_reset(un);
}

static int
url_uno_mii_read_reg(struct usbnet *un, int phy, int reg, uint16_t *val)
{
        uint16_t data;
        usbd_status err = USBD_NORMAL_COMPLETION;

        DPRINTFN(0xff, ("%s: %s: enter, phy=%d reg=0x%04x\n",
                 device_xname(un->un_dev), __func__, phy, reg));

        /* XXX: one PHY only for the RTL8150 internal PHY */
        if (phy != 0) {
                DPRINTFN(0xff, ("%s: %s: phy=%d is not supported\n",
                         device_xname(un->un_dev), __func__, phy));
                *val = 0;
                return EINVAL;
        }

        switch (reg) {
        case MII_BMCR:                /* Control Register */
                reg = URL_BMCR;
                break;
        case MII_BMSR:                /* Status Register */
                reg = URL_BMSR;
                break;
        case MII_PHYIDR1:
        case MII_PHYIDR2:
                *val = 0;
                goto R_DONE;
                break;
        case MII_ANAR:                /* Autonegotiation advertisement */
                reg = URL_ANAR;
                break;
        case MII_ANLPAR:        /* Autonegotiation link partner abilities */
                reg = URL_ANLP;
                break;
        case URLPHY_MSR:        /* Media Status Register */
                reg = URL_MSR;
                break;
        default:
                printf("%s: %s: bad register %04x\n",
                       device_xname(un->un_dev), __func__, reg);
                return EINVAL;
        }

        if (reg == URL_MSR)
                data = url_csr_read_1(un, reg);
        else
                data = url_csr_read_2(un, reg);
        *val = data;

 R_DONE:
        DPRINTFN(0xff, ("%s: %s: phy=%d reg=0x%04x => 0x%04hx\n",
                 device_xname(un->un_dev), __func__, phy, reg, *val));

        return err;
}

static int
url_uno_mii_write_reg(struct usbnet *un, int phy, int reg, uint16_t val)
{

        DPRINTFN(0xff, ("%s: %s: enter, phy=%d reg=0x%04x val=0x%04hx\n",
                 device_xname(un->un_dev), __func__, phy, reg, val));

        /* XXX: one PHY only for the RTL8150 internal PHY */
        if (phy != 0) {
                DPRINTFN(0xff, ("%s: %s: phy=%d is not supported\n",
                         device_xname(un->un_dev), __func__, phy));
                return EINVAL;
        }

        switch (reg) {
        case MII_BMCR:                /* Control Register */
                reg = URL_BMCR;
                break;
        case MII_BMSR:                /* Status Register */
                reg = URL_BMSR;
                break;
        case MII_PHYIDR1:
        case MII_PHYIDR2:
                return 0;
        case MII_ANAR:                /* Autonegotiation advertisement */
                reg = URL_ANAR;
                break;
        case MII_ANLPAR:        /* Autonegotiation link partner abilities */
                reg = URL_ANLP;
                break;
        case URLPHY_MSR:        /* Media Status Register */
                reg = URL_MSR;
                break;
        default:
                printf("%s: %s: bad register %04x\n",
                       device_xname(un->un_dev), __func__, reg);
                return EINVAL;
        }

        if (reg == URL_MSR)
                url_csr_write_1(un, reg, val);
        else
                url_csr_write_2(un, reg, val);

        return 0;
}

static void
url_uno_mii_statchg(struct ifnet *ifp)
{
        struct usbnet * const un = ifp->if_softc;

        DPRINTF(("%s: %s: enter\n", ifp->if_xname, __func__));

        /* XXX */
        usbnet_set_link(un, true);
}

#if 0
/*
 * external PHYs support, but not test.
 */
static usbd_status
url_ext_mii_read_reg(struct usbnet *un, int phy, int reg)
{
        uint16_t val;

        DPRINTF(("%s: %s: enter, phy=%d reg=0x%04x\n",
                 device_xname(un->un_dev), __func__, phy, reg));

        url_csr_write_1(un, URL_PHYADD, phy & URL_PHYADD_MASK);
        /*
         * RTL8150L will initiate a MII management data transaction
         * if PHYCNT_OWN bit is set 1 by software. After transaction,
         * this bit is auto cleared by TRL8150L.
         */
        url_csr_write_1(un, URL_PHYCNT,
                        (reg | URL_PHYCNT_PHYOWN) & ~URL_PHYCNT_RWCR);
        for (i = 0; i < URL_TIMEOUT; i++) {
                if ((url_csr_read_1(un, URL_PHYCNT) & URL_PHYCNT_PHYOWN) == 0)
                        break;
        }
        if (i == URL_TIMEOUT) {
                printf("%s: MII read timed out\n", device_xname(un->un_dev));
        }

        val = url_csr_read_2(un, URL_PHYDAT);

        DPRINTF(("%s: %s: phy=%d reg=0x%04x => 0x%04x\n",
                 device_xname(un->un_dev), __func__, phy, reg, val));

        return USBD_NORMAL_COMPLETION;
}

static usbd_status
url_ext_mii_write_reg(struct usbnet *un, int phy, int reg, int data)
{

        DPRINTF(("%s: %s: enter, phy=%d reg=0x%04x data=0x%04x\n",
                 device_xname(un->un_dev), __func__, phy, reg, data));

        url_csr_write_2(un, URL_PHYDAT, data);
        url_csr_write_1(un, URL_PHYADD, phy);
        url_csr_write_1(un, URL_PHYCNT, reg | URL_PHYCNT_RWCR);        /* Write */

        for (i=0; i < URL_TIMEOUT; i++) {
                if (url_csr_read_1(un, URL_PHYCNT) & URL_PHYCNT_PHYOWN)
                        break;
        }

        if (i == URL_TIMEOUT) {
                printf("%s: MII write timed out\n",
                       device_xname(un->un_dev));
                return USBD_TIMEOUT;
        }

        return USBD_NORMAL_COMPLETION;
}
#endif

#ifdef _MODULE
#include "ioconf.c"
#endif

USBNET_MODULE(url)























































































































































































































  435 



   84 

    8 


   76 



  426 

  425 

















  439 


  439 




  438 












  185 

  439 

  439 

  439 



   84 


  438 
  283 

  107 

  283 

  184 


  439 









































    4 



    4 


    4 



  338 


  338 














 2704 










  434 
 2702 





















  271 











  338 
  336 


  338 

    2 






    2 




















  337 













  338 

















  338 


























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
/*        $NetBSD: x86_tlb.c,v 1.20 2022/08/20 23:48:51 riastradh Exp $        */

/*-
 * Copyright (c) 2008-2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran and Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * x86 pmap(9) module: TLB shootdowns.
 *
 * TLB shootdowns are hard interrupts that operate outside the SPL framework.
 * They do not need to be blocked, provided that the pmap module gets the
 * order of events correct.  The calls are made by poking the LAPIC directly.
 * The interrupt handler is short and does one of the following: invalidate
 * a set of pages, all user TLB entries or the entire TLB.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: x86_tlb.c,v 1.20 2022/08/20 23:48:51 riastradh Exp $");

#include <sys/param.h>
#include <sys/kernel.h>

#include <sys/systm.h>
#include <sys/atomic.h>
#include <sys/cpu.h>
#include <sys/intr.h>
#include <uvm/uvm.h>

#include <machine/cpuvar.h>
#include <machine/pmap_private.h>

#ifdef XENPV
#include <xen/xenpmap.h>
#endif /* XENPV */
#include <x86/i82489reg.h>
#include <x86/i82489var.h>

/*
 * TLB shootdown packet.  Each CPU has a copy of this packet, where we build
 * sets of TLB shootdowns.  If shootdowns need to occur on remote CPUs, the
 * packet is copied into a shared mailbox kept on the initiator's kernel
 * stack.  Once the copy is made, no further updates to the mailbox are made
 * until the request is completed.  This keeps the cache line in the shared
 * state, and bus traffic to a minimum.
 *
 * In order to make maximal use of the available space, control fields are
 * overlaid into the lower 12 bits of the first 4 virtual addresses.  This
 * is very ugly, but it counts.
 *
 * On i386 the packet is 64 bytes in size.  On amd64 it's 128 bytes.  This
 * is sized in concert with UBC_WINSIZE, otherwise excessive shootdown
 * interrupts could be isssued.
 */

#define        TP_MAXVA        16                /* for individual mappings */
#define        TP_ALLVA        PAGE_MASK        /* special: shoot all mappings */

typedef struct {
        uintptr_t                tp_store[TP_MAXVA];
} pmap_tlb_packet_t;

#define        TP_COUNT        0
#define        TP_USERPMAP        1
#define        TP_GLOBAL        2
#define        TP_DONE                3

#define        TP_GET_COUNT(tp)        ((tp)->tp_store[TP_COUNT] & PAGE_MASK)
#define        TP_GET_USERPMAP(tp)        ((tp)->tp_store[TP_USERPMAP] & 1)
#define        TP_GET_GLOBAL(tp)        ((tp)->tp_store[TP_GLOBAL] & 1)
#define        TP_GET_DONE(tp)                (atomic_load_relaxed(&(tp)->tp_store[TP_DONE]) & 1)
#define        TP_GET_VA(tp, i)        ((tp)->tp_store[(i)] & ~PAGE_MASK)

#define        TP_INC_COUNT(tp)        ((tp)->tp_store[TP_COUNT]++)
#define        TP_SET_ALLVA(tp)        ((tp)->tp_store[TP_COUNT] |= TP_ALLVA)
#define        TP_SET_VA(tp, c, va)        ((tp)->tp_store[(c)] |= ((va) & ~PAGE_MASK))

#define        TP_SET_USERPMAP(tp)        ((tp)->tp_store[TP_USERPMAP] |= 1)
#define        TP_SET_GLOBAL(tp)        ((tp)->tp_store[TP_GLOBAL] |= 1)
#define        TP_SET_DONE(tp)                                                             \
        do {                                                                     \
                uintptr_t v = atomic_load_relaxed(&(tp)->tp_store[TP_DONE]); \
                atomic_store_relaxed(&(tp)->tp_store[TP_DONE], v | 1);             \
        } while (/* CONSTCOND */ 0);

#define        TP_CLEAR(tp)                memset(__UNVOLATILE(tp), 0, sizeof(*(tp)));

/*
 * TLB shootdown state.
 */
static volatile pmap_tlb_packet_t *volatile pmap_tlb_packet __cacheline_aligned;
static volatile u_int                pmap_tlb_pendcount        __cacheline_aligned;
static struct evcnt                pmap_tlb_evcnt                __cacheline_aligned;

/*
 * TLB shootdown statistics.
 */
#ifdef TLBSTATS
static struct evcnt                tlbstat_local[TLBSHOOT__MAX];
static struct evcnt                tlbstat_remote[TLBSHOOT__MAX];
static struct evcnt                tlbstat_kernel[TLBSHOOT__MAX];
static struct evcnt                tlbstat_single_req;
static struct evcnt                tlbstat_single_issue;
static const char *                tlbstat_name[ ] = {
        "REMOVE_ALL",
        "KENTER",
        "KREMOVE",
        "FREE_PTP",
        "REMOVE_PTE",
        "SYNC_PV",
        "WRITE_PROTECT",
        "ENTER",
        "NVMM",
        "BUS_DMA",
        "BUS_SPACE",
};
#endif

void
pmap_tlb_init(void)
{

        evcnt_attach_dynamic(&pmap_tlb_evcnt, EVCNT_TYPE_INTR,
            NULL, "TLB", "shootdown");

#ifdef TLBSTATS
        int i;

        for (i = 0; i < TLBSHOOT__MAX; i++) {
                evcnt_attach_dynamic(&tlbstat_local[i], EVCNT_TYPE_MISC,
                    NULL, "tlbshoot local", tlbstat_name[i]);
        }
        for (i = 0; i < TLBSHOOT__MAX; i++) {
                evcnt_attach_dynamic(&tlbstat_remote[i], EVCNT_TYPE_MISC,
                    NULL, "tlbshoot remote", tlbstat_name[i]);
        }
        for (i = 0; i < TLBSHOOT__MAX; i++) {
                evcnt_attach_dynamic(&tlbstat_kernel[i], EVCNT_TYPE_MISC,
                    NULL, "tlbshoot kernel", tlbstat_name[i]);
        }
        evcnt_attach_dynamic(&tlbstat_single_req, EVCNT_TYPE_MISC,
            NULL, "tlbshoot single page", "requests");
        evcnt_attach_dynamic(&tlbstat_single_issue, EVCNT_TYPE_MISC,
            NULL, "tlbshoot single page", "issues");
#endif
}

void
pmap_tlb_cpu_init(struct cpu_info *ci)
{
        pmap_tlb_packet_t *tp = (pmap_tlb_packet_t *)ci->ci_pmap_data;

        memset(tp, 0, sizeof(pmap_tlb_packet_t));
        kcpuset_create(&ci->ci_tlb_cpuset, true);
}

static inline void
pmap_tlbstat_count(struct pmap *pm, vaddr_t va, tlbwhy_t why)
{
#ifdef TLBSTATS
        const cpuid_t cid = cpu_index(curcpu());
        bool local = false, remote = false;

        if (va != (vaddr_t)-1LL) {
                atomic_inc_64(&tlbstat_single_req.ev_count);
        }
        if (pm == pmap_kernel()) {
                atomic_inc_64(&tlbstat_kernel[why].ev_count);
                return;
        }

        if (va >= VM_MAXUSER_ADDRESS) {
                remote = kcpuset_isotherset(pm->pm_kernel_cpus, cid);
                local = kcpuset_isset(pm->pm_kernel_cpus, cid);
        }
        remote |= kcpuset_isotherset(pm->pm_cpus, cid);
        local |= kcpuset_isset(pm->pm_cpus, cid);

        if (local) {
                atomic_inc_64(&tlbstat_local[why].ev_count);
        }
        if (remote) {
                atomic_inc_64(&tlbstat_remote[why].ev_count);
        }
#endif
}

static inline void
pmap_tlb_invalidate(volatile pmap_tlb_packet_t *tp)
{
        int i = TP_GET_COUNT(tp);

        /* Find out what we need to invalidate. */
        if (i == TP_ALLVA) {
                if (TP_GET_GLOBAL(tp) != 0) {
                        /* Invalidating all TLB entries. */
                        tlbflushg();
                } else {
                        /* Invalidating non-global TLB entries only. */
                        tlbflush();
                }
        } else {
                /* Invalidating a single page or a range of pages. */
                KASSERT(i != 0);
                do {
                        --i;
                        pmap_update_pg(TP_GET_VA(tp, i));
                } while (i > 0);
        }
}

/*
 * pmap_tlb_shootdown: invalidate a page on all CPUs using pmap 'pm'.
 */
void
pmap_tlb_shootdown(struct pmap *pm, vaddr_t va, pt_entry_t pte, tlbwhy_t why)
{
        pmap_tlb_packet_t *tp;
        struct cpu_info *ci;
        uint8_t count;
        int s;

#ifndef XENPV
        KASSERT((pte & PTE_G) == 0 || pm == pmap_kernel());
#endif

        if (__predict_false(pm->pm_tlb_flush != NULL)) {
                (*pm->pm_tlb_flush)(pm);
                return;
        }

        if ((pte & PTE_PS) != 0) {
                va &= PTE_LGFRAME;
        }

        /*
         * Add the shootdown operation to our pending set.
         */
        s = splvm();
        ci = curcpu();
        tp = (pmap_tlb_packet_t *)ci->ci_pmap_data;

        /* Whole address flush will be needed if PTE_G is set. */
        if ((pte & PTE_G) != 0) {
                TP_SET_GLOBAL(tp);
        }
        count = TP_GET_COUNT(tp);

        if (count < TP_MAXVA && va != (vaddr_t)-1LL) {
                /* Flush a single page. */
                TP_SET_VA(tp, count, va);
                TP_INC_COUNT(tp);
        } else {
                /* Flush everything - may already be set. */
                TP_SET_ALLVA(tp);
        }

        if (pm != pmap_kernel()) {
                kcpuset_merge(ci->ci_tlb_cpuset, pm->pm_cpus);
                if (va >= VM_MAXUSER_ADDRESS) {
                        kcpuset_merge(ci->ci_tlb_cpuset, pm->pm_kernel_cpus);
                }
                TP_SET_USERPMAP(tp);
        } else {
                kcpuset_copy(ci->ci_tlb_cpuset, kcpuset_running);
        }
        pmap_tlbstat_count(pm, va, why);
        splx(s);
}

#ifdef XENPV

static inline void
pmap_tlb_processpacket(volatile pmap_tlb_packet_t *tp, kcpuset_t *target)
{
#ifdef MULTIPROCESSOR
        int i = TP_GET_COUNT(tp);

        if (i != TP_ALLVA) {
                /* Invalidating a single page or a range of pages. */
                KASSERT(i != 0);
                do {
                        --i;
                        xen_mcast_invlpg(TP_GET_VA(tp, i), target);
                } while (i > 0);
        } else {
                xen_mcast_tlbflush(target);
        }

        /* Remote CPUs have been synchronously flushed. */
        pmap_tlb_pendcount = 0;
        pmap_tlb_packet = NULL;
        TP_SET_DONE(tp);
#endif /* MULTIPROCESSOR */
}

#else

static inline void
pmap_tlb_processpacket(volatile pmap_tlb_packet_t *tp, kcpuset_t *target)
{
#ifdef MULTIPROCESSOR
        int err = 0;

        if (!kcpuset_match(target, kcpuset_attached)) {
                const struct cpu_info * const self = curcpu();
                CPU_INFO_ITERATOR cii;
                struct cpu_info *lci;

                for (CPU_INFO_FOREACH(cii, lci)) {
                        const cpuid_t lcid = cpu_index(lci);

                        if (__predict_false(lci == self) ||
                            !kcpuset_isset(target, lcid)) {
                                continue;
                        }
                        err |= x86_ipi(LAPIC_TLB_VECTOR,
                            lci->ci_cpuid, LAPIC_DLMODE_FIXED);
                }
        } else {
                err = x86_ipi(LAPIC_TLB_VECTOR, LAPIC_DEST_ALLEXCL,
                    LAPIC_DLMODE_FIXED);
        }
        KASSERT(err == 0);
#endif /* MULTIPROCESSOR */
}

#endif /* XENPV */

/*
 * pmap_tlb_shootnow: process pending TLB shootdowns queued on current CPU.
 *
 * => Must be called with preemption disabled.
 */
void
pmap_tlb_shootnow(void)
{
        volatile pmap_tlb_packet_t *tp, *ts;
        volatile uint8_t stackbuf[sizeof(*tp) + COHERENCY_UNIT];
        struct cpu_info *ci;
        kcpuset_t *target;
        u_int local, rcpucount;
        cpuid_t cid;
        int s;

        KASSERT(kpreempt_disabled());

        /* Pre-check first. */
        ci = curcpu();
        tp = (pmap_tlb_packet_t *)ci->ci_pmap_data;
        if (TP_GET_COUNT(tp) == 0) {
                return;
        }

        /* An interrupt may have flushed our updates, so check again. */
        s = splvm();
        if (TP_GET_COUNT(tp) == 0) {
                splx(s);
                return;
        }

        cid = cpu_index(ci);
        target = ci->ci_tlb_cpuset;
        local = kcpuset_isset(target, cid) ? 1 : 0;
        rcpucount = kcpuset_countset(target) - local;

        /*
         * Fast path for local shootdowns only.  Do the shootdowns, and
         * clear out the buffer for the next user.
         */
        if (rcpucount == 0) {
                pmap_tlb_invalidate(tp);
                kcpuset_zero(ci->ci_tlb_cpuset);
                TP_CLEAR(tp);
                splx(s);
                return;
        }

        /*
         * Copy the packet into the stack buffer, and gain ownership of the
         * global pointer.  We must keep interrupts blocked once we own the
         * pointer and until the IPIs are triggered, or we could deadlock
         * against an interrupt on the current CPU trying the same.
         */
        KASSERT(rcpucount < ncpu);
        ts = (void *)roundup2((uintptr_t)stackbuf, COHERENCY_UNIT);
        *ts = *tp;
        KASSERT(TP_GET_DONE(ts) == 0);
        while (atomic_cas_ptr(&pmap_tlb_packet, NULL,
            __UNVOLATILE(ts)) != NULL) {
                KASSERT(atomic_load_relaxed(&pmap_tlb_packet) != ts);
                /*
                 * Don't bother with exponentional backoff, as the pointer
                 * is in a dedicated cache line and only updated twice per
                 * IPI (in contrast to the pending counter).  The cache
                 * line will spend most of its time in the SHARED state.
                 */
                splx(s);
                do {
                        x86_pause();
                } while (atomic_load_relaxed(&pmap_tlb_packet) != NULL);
                s = splvm();

                /*
                 * An interrupt might have done the shootdowns for
                 * us while we spun.
                 */
                if (TP_GET_COUNT(tp) == 0) {
                        splx(s);
                        return;
                }
        }

        /*
         * Ownership of the global pointer provides serialization of the
         * update to the count and the event counter.  With those values
         * updated, start shootdowns on remote CPUs.
         */
        pmap_tlb_pendcount = rcpucount;
        pmap_tlb_evcnt.ev_count++;
        pmap_tlb_processpacket(ts, target);

        /*
         * Clear out the local CPU's buffer for the next user.  Once done,
         * we can drop the IPL.
         */
#ifdef TLBSTATS
        if (TP_GET_COUNT(tp) != TP_ALLVA) {
                atomic_add_64(&tlbstat_single_issue.ev_count,
                    TP_GET_COUNT(tp));
        }
#endif
        kcpuset_zero(ci->ci_tlb_cpuset);
        TP_CLEAR(tp);
        splx(s);

        /*
         * Shootdowns on remote CPUs are now in flight.  In the meantime,
         * perform local shootdown if needed, using our copy of the packet.
         */
        if (local) {
                pmap_tlb_invalidate(ts);
        }

        /*
         * Wait for the updates to be processed by remote CPUs.  Poll the
         * flag in the packet in order to limit bus traffic (only the last
         * CPU out will update it and only we are reading it).  No memory
         * barrier required due to prior stores - yay x86.
         */
        while (TP_GET_DONE(ts) == 0) {
                x86_pause();
        }
}

/*
 * pmap_tlb_intr: pmap shootdown interrupt handler to invalidate TLB entries.
 *
 * Called from IPI only.  We are outside the SPL framework, with interrupts
 * disabled on the CPU: be careful.
 *
 * TLB flush and the interrupt that brought us here are serializing
 * operations (they defeat speculative execution).  Any speculative load
 * producing a TLB fill between receipt of the interrupt and the TLB flush
 * will load "current" PTEs.  None of the mappings relied on by this ISR for
 * its execution will be changing.  So it's safe to acknowledge the request
 * and allow the initiator to proceed before performing the flush.
 */
void
pmap_tlb_intr(void)
{
        pmap_tlb_packet_t copy;
        volatile pmap_tlb_packet_t *source;
        struct cpu_info *ci;

        /* Make a private copy of the packet. */
        source = pmap_tlb_packet;
        copy = *source;

        /*
         * If we are the last CPU out, clear the active pointer and mark the
         * packet as done.  Both can be done without using an atomic, and
         * the one atomic we do use serves as our memory barrier.
         *
         * It's important to clear the active pointer before setting
         * TP_DONE, to ensure a remote CPU does not exit & re-enter
         * pmap_tlb_shootnow() only to find its current pointer still
         * seemingly active.
         */
        if (atomic_dec_uint_nv(&pmap_tlb_pendcount) == 0) {
                atomic_store_relaxed(&pmap_tlb_packet, NULL);
                __insn_barrier();
                TP_SET_DONE(source);
        }
        pmap_tlb_invalidate(&copy);

        /*
         * Check the current TLB state.  If we don't want further flushes
         * for this pmap, then take the CPU out of the pmap's set.  The
         * order of updates to the set and TLB state must closely align with
         * the pmap code, as we can interrupt code running in the pmap
         * module.
         */
        ci = curcpu();
        if (ci->ci_tlbstate == TLBSTATE_LAZY && TP_GET_USERPMAP(&copy) != 0) {
                kcpuset_atomic_clear(ci->ci_pmap->pm_cpus, cpu_index(ci));
                ci->ci_tlbstate = TLBSTATE_STALE;
        }
}

















































































































































































    3 









    3 

    3 
    2 




    2 








    1 




    2 









    2 

    1 














































    2 

    3 




























    3 

















    3 



















































































    4 










    4 
























    4 






    4 
























    4 

    4 
















    2 




    2 















































    3 



















    3 



    3 




    3 


    2 



    2 


    3 
    3 


    1 






    3 
    2 
    3 








    2 


    2 

    2 




    3 







    2 







































    1 





    3 





































    1 













    1 









































    1 










    1 











    1 





    1 



    1 







































    1 






    1 


    1 








    1 






















    5 










    1 
    1 




    1 
    1 




    3 














1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
/*        $NetBSD: fdesc_vnops.c,v 1.140 2022/03/27 17:10:55 christos Exp $        */

/*
 * Copyright (c) 1992, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software donated to Berkeley by
 * Jan-Simon Pendry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)fdesc_vnops.c        8.17 (Berkeley) 5/22/95
 *
 * #Id: fdesc_vnops.c,v 1.12 1993/04/06 16:17:17 jsp Exp #
 */

/*
 * /dev/fd Filesystem
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: fdesc_vnops.c,v 1.140 2022/03/27 17:10:55 christos Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/socketvar.h>
#include <sys/filedesc.h>
#include <sys/vnode.h>
#include <sys/malloc.h>
#include <sys/conf.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/buf.h>
#include <sys/dirent.h>
#include <sys/tty.h>
#include <sys/kauth.h>
#include <sys/atomic.h>

#include <miscfs/fdesc/fdesc.h>
#include <miscfs/genfs/genfs.h>

#define cttyvp(p) ((p)->p_lflag & PL_CONTROLT ? (p)->p_session->s_ttyvp : NULL)

dev_t devctty;

#if (FD_STDIN != FD_STDOUT-1) || (FD_STDOUT != FD_STDERR-1)
FD_STDIN, FD_STDOUT, FD_STDERR must be a sequence n, n+1, n+2
#endif

int        fdesc_lookup(void *);
int        fdesc_open(void *);
int        fdesc_getattr(void *);
int        fdesc_setattr(void *);
int        fdesc_read(void *);
int        fdesc_write(void *);
int        fdesc_ioctl(void *);
int        fdesc_poll(void *);
int        fdesc_kqfilter(void *);
int        fdesc_readdir(void *);
int        fdesc_readlink(void *);
int        fdesc_inactive(void *);
int        fdesc_reclaim(void *);
int        fdesc_print(void *);
int        fdesc_pathconf(void *);

static int fdesc_attr(int, struct vattr *, kauth_cred_t);

int (**fdesc_vnodeop_p)(void *);
const struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = {
        { &vop_default_desc, vn_default_error },
        { &vop_parsepath_desc, genfs_parsepath },        /* parsepath */
        { &vop_lookup_desc, fdesc_lookup },                /* lookup */
        { &vop_create_desc, genfs_eopnotsupp },                /* create */
        { &vop_mknod_desc, genfs_eopnotsupp },                /* mknod */
        { &vop_open_desc, fdesc_open },                        /* open */
        { &vop_close_desc, genfs_nullop },                /* close */
        { &vop_access_desc, genfs_nullop },                /* access */
        { &vop_accessx_desc, genfs_accessx },                /* accessx */
        { &vop_getattr_desc, fdesc_getattr },                /* getattr */
        { &vop_setattr_desc, fdesc_setattr },                /* setattr */
        { &vop_read_desc, fdesc_read },                        /* read */
        { &vop_write_desc, fdesc_write },                /* write */
        { &vop_fallocate_desc, genfs_eopnotsupp },        /* fallocate */
        { &vop_fdiscard_desc, genfs_eopnotsupp },        /* fdiscard */
        { &vop_ioctl_desc, fdesc_ioctl },                /* ioctl */
        { &vop_fcntl_desc, genfs_fcntl },                /* fcntl */
        { &vop_poll_desc, fdesc_poll },                        /* poll */
        { &vop_kqfilter_desc, fdesc_kqfilter },                /* kqfilter */
        { &vop_revoke_desc, genfs_revoke },                /* revoke */
        { &vop_mmap_desc, genfs_eopnotsupp },                /* mmap */
        { &vop_fsync_desc, genfs_nullop },                /* fsync */
        { &vop_seek_desc, genfs_seek },                        /* seek */
        { &vop_remove_desc, genfs_eopnotsupp },                /* remove */
        { &vop_link_desc, genfs_erofs_link },                /* link */
        { &vop_rename_desc, genfs_eopnotsupp },                /* rename */
        { &vop_mkdir_desc, genfs_eopnotsupp },                /* mkdir */
        { &vop_rmdir_desc, genfs_eopnotsupp },                /* rmdir */
        { &vop_symlink_desc, genfs_erofs_symlink },        /* symlink */
        { &vop_readdir_desc, fdesc_readdir },                /* readdir */
        { &vop_readlink_desc, fdesc_readlink },                /* readlink */
        { &vop_abortop_desc, genfs_abortop },                /* abortop */
        { &vop_inactive_desc, fdesc_inactive },                /* inactive */
        { &vop_reclaim_desc, fdesc_reclaim },                /* reclaim */
        { &vop_lock_desc, genfs_lock },                        /* lock */
        { &vop_unlock_desc, genfs_unlock },                /* unlock */
        { &vop_bmap_desc, genfs_eopnotsupp },                /* bmap */
        { &vop_strategy_desc, genfs_badop },                /* strategy */
        { &vop_print_desc, fdesc_print },                /* print */
        { &vop_islocked_desc, genfs_islocked },                /* islocked */
        { &vop_pathconf_desc, fdesc_pathconf },                /* pathconf */
        { &vop_advlock_desc, genfs_einval },                /* advlock */
        { &vop_bwrite_desc, genfs_eopnotsupp },                /* bwrite */
        { &vop_putpages_desc, genfs_null_putpages },        /* putpages */
        { NULL, NULL }
};

const struct vnodeopv_desc fdesc_vnodeop_opv_desc =
        { &fdesc_vnodeop_p, fdesc_vnodeop_entries };

/*
 * Initialise cache headers
 */
void
fdesc_init(void)
{
        int cttymajor;

        /* locate the major number */
        cttymajor = devsw_name2chr("ctty", NULL, 0);
        devctty = makedev(cttymajor, 0);
}

void
fdesc_done(void)
{
}

/*
 * vp is the current namei directory
 * ndp is the name to locate in that directory...
 */
int
fdesc_lookup(void *v)
{
        struct vop_lookup_v2_args /* {
                struct vnode * a_dvp;
                struct vnode ** a_vpp;
                struct componentname * a_cnp;
        } */ *ap = v;
        struct vnode **vpp = ap->a_vpp;
        struct vnode *dvp = ap->a_dvp;
        struct componentname *cnp = ap->a_cnp;
        struct lwp *l = curlwp;
        const char *pname = cnp->cn_nameptr;
        struct proc *p = l->l_proc;
        unsigned fd = 0;
        int error, ix = -1;
        fdtab_t *dt;

        dt = atomic_load_consume(&curlwp->l_fd->fd_dt);

        if (cnp->cn_namelen == 1 && *pname == '.') {
                *vpp = dvp;
                vref(dvp);
                return (0);
        }

        switch (VTOFDESC(dvp)->fd_type) {
        default:
        case Flink:
        case Fdesc:
        case Fctty:
                error = ENOTDIR;
                goto bad;

        case Froot:
                if (cnp->cn_namelen == 2 && memcmp(pname, "fd", 2) == 0) {
                        ix = FD_DEVFD;
                        goto good;
                }

                if (cnp->cn_namelen == 3 && memcmp(pname, "tty", 3) == 0) {
                        struct vnode *ttyvp = cttyvp(p);
                        if (ttyvp == NULL) {
                                error = ENXIO;
                                goto bad;
                        }
                        ix = FD_CTTY;
                        goto good;
                }

                switch (cnp->cn_namelen) {
                case 5:
                        if (memcmp(pname, "stdin", 5) == 0) {
                                ix = FD_STDIN;
                                goto good;
                        }
                        break;
                case 6:
                        if (memcmp(pname, "stdout", 6) == 0) {
                                ix = FD_STDOUT;
                                goto good;
                        } else if (memcmp(pname, "stderr", 6) == 0) {
                                ix = FD_STDERR;
                                goto good;
                        }
                        break;
                }

                error = ENOENT;
                goto bad;

        case Fdevfd:
                if (cnp->cn_namelen == 2 && memcmp(pname, "..", 2) == 0) {
                        ix = FD_ROOT;
                        goto good;
                }

                fd = 0;
                while (*pname >= '0' && *pname <= '9') {
                        fd = 10 * fd + *pname++ - '0';
                        if (fd >= dt->dt_nfiles)
                                break;
                }

                if (*pname != '\0') {
                        error = ENOENT;
                        goto bad;
                }

                if (fd >= dt->dt_nfiles || dt->dt_ff[fd] == NULL ||
                    dt->dt_ff[fd]->ff_file == NULL) {
                        error = EBADF;
                        goto bad;
                }

                ix = FD_DESC + fd;
                goto good;
        }

bad:
        *vpp = NULL;
        return error;

good:
        KASSERT(ix != -1);
        error = vcache_get(dvp->v_mount, &ix, sizeof(ix), vpp);
        if (error)
                return error;

        /*
         * Prevent returning VNON nodes.
         * Operation fdesc_inactive() will reset the type to VNON.
         */
        if (ix == FD_CTTY)
                (*vpp)->v_type = VCHR;
        else if (ix >= FD_DESC)
                (*vpp)->v_type = VREG;
        KASSERT((*vpp)->v_type != VNON);

        return 0;
}

int
fdesc_open(void *v)
{
        struct vop_open_args /* {
                struct vnode *a_vp;
                int  a_mode;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;

        switch (VTOFDESC(vp)->fd_type) {
        case Fdesc:
                /*
                 * XXX Kludge: set dupfd to contain the value of the
                 * the file descriptor being sought for duplication.
                 * The error return ensures that the vnode for this
                 * device will be released by vn_open. vn_open will
                 * then detect this special error and take the actions
                 * in fd_dupopen. Other callers of vn_open or VOP_OPEN
                 * not prepared to deal with this situation will
                 * report a real error.
                 */
                curlwp->l_dupfd = VTOFDESC(vp)->fd_fd;        /* XXX */
                return EDUPFD;

        case Fctty:
                return cdev_open(devctty, ap->a_mode, 0, curlwp);
        case Froot:
        case Fdevfd:
        case Flink:
                break;
        }

        return (0);
}

static int
fdesc_attr(int fd, struct vattr *vap, kauth_cred_t cred)
{
        file_t *fp;
        struct stat stb;
        int error;

        if ((fp = fd_getfile(fd)) == NULL)
                return (EBADF);

        switch (fp->f_type) {
        case DTYPE_VNODE:
                vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
                error = VOP_GETATTR(fp->f_vnode, vap, cred);
                VOP_UNLOCK(fp->f_vnode);
                if (error == 0 && vap->va_type == VDIR) {
                        /*
                         * directories can cause loops in the namespace,
                         * so turn off the 'x' bits to avoid trouble.
                         */
                        vap->va_mode &= ~(S_IXUSR|S_IXGRP|S_IXOTH);
                }
                break;

        default:
                memset(&stb, 0, sizeof(stb));
                error = (*fp->f_ops->fo_stat)(fp, &stb);
                if (error)
                        break;

                vattr_null(vap);
                switch(fp->f_type) {
                case DTYPE_SOCKET:
                        vap->va_type = VSOCK;
                        break;
                case DTYPE_PIPE:
                        vap->va_type = VFIFO;
                        break;
                default:
                        /* use VNON perhaps? */
                        vap->va_type = VBAD;
                        break;
                }
                vap->va_mode = stb.st_mode;
                vap->va_nlink = stb.st_nlink;
                vap->va_uid = stb.st_uid;
                vap->va_gid = stb.st_gid;
                vap->va_fsid = stb.st_dev;
                vap->va_fileid = stb.st_ino;
                vap->va_size = stb.st_size;
                vap->va_blocksize = stb.st_blksize;
                vap->va_atime = stb.st_atimespec;
                vap->va_mtime = stb.st_mtimespec;
                vap->va_ctime = stb.st_ctimespec;
                vap->va_gen = stb.st_gen;
                vap->va_flags = stb.st_flags;
                vap->va_rdev = stb.st_rdev;
                vap->va_bytes = stb.st_blocks * stb.st_blksize;
                break;
        }

        fd_putfile(fd);
        return (error);
}

int
fdesc_getattr(void *v)
{
        struct vop_getattr_args /* {
                struct vnode *a_vp;
                struct vattr *a_vap;
                kauth_cred_t a_cred;
                struct lwp *a_l;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct vattr *vap = ap->a_vap;
        unsigned fd;
        int error = 0;
        struct timeval tv;

        switch (VTOFDESC(vp)->fd_type) {
        case Froot:
        case Fdevfd:
        case Flink:
        case Fctty:
                vattr_null(vap);
                vap->va_fileid = VTOFDESC(vp)->fd_ix;

#define R_ALL (S_IRUSR|S_IRGRP|S_IROTH)
#define W_ALL (S_IWUSR|S_IWGRP|S_IWOTH)
#define X_ALL (S_IXUSR|S_IXGRP|S_IXOTH)

                switch (VTOFDESC(vp)->fd_type) {
                case Flink:
                        vap->va_mode = R_ALL|X_ALL;
                        vap->va_type = VLNK;
                        vap->va_rdev = 0;
                        vap->va_nlink = 1;
                        vap->va_size = strlen(VTOFDESC(vp)->fd_link);
                        break;

                case Fctty:
                        vap->va_mode = R_ALL|W_ALL;
                        vap->va_type = VCHR;
                        vap->va_rdev = devctty;
                        vap->va_nlink = 1;
                        vap->va_size = 0;
                        break;

                default:
                        vap->va_mode = R_ALL|X_ALL;
                        vap->va_type = VDIR;
                        vap->va_rdev = 0;
                        vap->va_nlink = 2;
                        vap->va_size = DEV_BSIZE;
                        break;
                }
                vap->va_uid = 0;
                vap->va_gid = 0;
                vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
                vap->va_blocksize = DEV_BSIZE;
                getmicroboottime(&tv);
                vap->va_atime.tv_sec = tv.tv_sec;
                vap->va_atime.tv_nsec = 0;
                vap->va_mtime = vap->va_atime;
                vap->va_ctime = vap->va_mtime;
                vap->va_gen = 0;
                vap->va_flags = 0;
                vap->va_bytes = 0;
                break;

        case Fdesc:
                fd = VTOFDESC(vp)->fd_fd;
                error = fdesc_attr(fd, vap, ap->a_cred);
                break;

        default:
                panic("fdesc_getattr");
                break;
        }

        if (error == 0)
                vp->v_type = vap->va_type;

        return (error);
}

int
fdesc_setattr(void *v)
{
        struct vop_setattr_args /* {
                struct vnode *a_vp;
                struct vattr *a_vap;
                kauth_cred_t a_cred;
        } */ *ap = v;
        file_t *fp;
        unsigned fd;

        /*
         * Can't mess with the root vnode
         */
        switch (VTOFDESC(ap->a_vp)->fd_type) {
        case Fdesc:
                break;

        case Fctty:
                return (0);

        default:
                return (EACCES);
        }

        fd = VTOFDESC(ap->a_vp)->fd_fd;
        if ((fp = fd_getfile(fd)) == NULL)
                return (EBADF);

        /*
         * XXX: Can't reasonably set the attr's on any types currently.
         *      On vnode's this will cause truncation and socket/pipes make
         *      no sense.
         */
        fd_putfile(fd);
        return (0);
}


struct fdesc_target {
        ino_t ft_fileno;
        u_char ft_type;
        u_char ft_namlen;
        const char *ft_name;
} fdesc_targets[] = {
#define N(s) sizeof(s)-1, s
        { FD_DEVFD,  DT_DIR,     N("fd")     },
        { FD_STDIN,  DT_LNK,     N("stdin")  },
        { FD_STDOUT, DT_LNK,     N("stdout") },
        { FD_STDERR, DT_LNK,     N("stderr") },
        { FD_CTTY,   DT_UNKNOWN, N("tty")    },
#undef N
#define UIO_MX _DIRENT_RECLEN((struct dirent *)NULL, sizeof("stderr") - 1)
};
static int nfdesc_targets = sizeof(fdesc_targets) / sizeof(fdesc_targets[0]);

int
fdesc_readdir(void *v)
{
        struct vop_readdir_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                kauth_cred_t a_cred;
                int *a_eofflag;
                off_t **a_cookies;
                int *a_ncookies;
        } */ *ap = v;
        struct uio *uio = ap->a_uio;
        struct dirent d;
        off_t i;
        int j;
        int error;
        off_t *cookies = NULL;
        int ncookies;
        fdtab_t *dt;

        switch (VTOFDESC(ap->a_vp)->fd_type) {
        case Fctty:
                return 0;

        case Fdesc:
                return ENOTDIR;

        default:
                break;
        }

        dt = atomic_load_consume(&curlwp->l_fd->fd_dt);

        if (uio->uio_resid < UIO_MX)
                return EINVAL;
        if (uio->uio_offset < 0)
                return EINVAL;

        error = 0;
        i = uio->uio_offset;
        (void)memset(&d, 0, UIO_MX);
        d.d_reclen = UIO_MX;
        if (ap->a_ncookies)
                ncookies = uio->uio_resid / UIO_MX;
        else
                ncookies = 0;

        if (VTOFDESC(ap->a_vp)->fd_type == Froot) {
                struct fdesc_target *ft;

                if (i >= nfdesc_targets)
                        return 0;

                if (ap->a_ncookies) {
                        ncookies = uimin(ncookies, (nfdesc_targets - i));
                        cookies = malloc(ncookies * sizeof(off_t),
                            M_TEMP, M_WAITOK);
                        *ap->a_cookies = cookies;
                        *ap->a_ncookies = ncookies;
                }

                for (ft = &fdesc_targets[i]; uio->uio_resid >= UIO_MX &&
                    i < nfdesc_targets; ft++, i++) {
                        switch (ft->ft_fileno) {
                        case FD_CTTY:
                                if (cttyvp(curproc) == NULL)
                                        continue;
                                break;

                        case FD_STDIN:
                        case FD_STDOUT:
                        case FD_STDERR:
                                if ((ft->ft_fileno - FD_STDIN) >=
                                    dt->dt_nfiles)
                                        continue;
                                if (dt->dt_ff[ft->ft_fileno - FD_STDIN]
                                    == NULL || dt->dt_ff[ft->ft_fileno -
                                    FD_STDIN]->ff_file == NULL)
                                        continue;
                                break;
                        }

                        d.d_fileno = ft->ft_fileno;
                        d.d_namlen = ft->ft_namlen;
                        (void)memcpy(d.d_name, ft->ft_name, ft->ft_namlen + 1);
                        d.d_type = ft->ft_type;

                        if ((error = uiomove(&d, UIO_MX, uio)) != 0)
                                break;
                        if (cookies)
                                *cookies++ = i + 1;
                }
        } else {
                if (ap->a_ncookies) {
                        ncookies = uimin(ncookies, dt->dt_nfiles + 2);
                        cookies = malloc(ncookies * sizeof(off_t),
                            M_TEMP, M_WAITOK);
                        *ap->a_cookies = cookies;
                        *ap->a_ncookies = ncookies;
                }
                for (; i - 2 < dt->dt_nfiles && uio->uio_resid >= UIO_MX; i++) {
                        switch (i) {
                        case 0:
                        case 1:
                                d.d_fileno = FD_ROOT;                /* XXX */
                                d.d_namlen = i + 1;
                                (void)memcpy(d.d_name, "..", d.d_namlen);
                                d.d_name[i + 1] = '\0';
                                d.d_type = DT_DIR;
                                break;

                        default:
                                j = (int)i - 2;
                                if (dt->dt_ff[j] == NULL ||
                                    dt->dt_ff[j]->ff_file == NULL)
                                        continue;
                                d.d_fileno = j + FD_STDIN;
                                d.d_namlen = snprintf(d.d_name,
                                    sizeof(d.d_name), "%d", j);
                                d.d_type = DT_UNKNOWN;
                                break;
                        }

                        if ((error = uiomove(&d, UIO_MX, uio)) != 0)
                                break;
                        if (cookies)
                                *cookies++ = i + 1;
                }
        }

        if (ap->a_ncookies && error) {
                free(*ap->a_cookies, M_TEMP);
                *ap->a_ncookies = 0;
                *ap->a_cookies = NULL;
        }

        uio->uio_offset = i;
        return error;
}

int
fdesc_readlink(void *v)
{
        struct vop_readlink_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        int error;

        if (vp->v_type != VLNK)
                return (EPERM);

        if (VTOFDESC(vp)->fd_type == Flink) {
                const char *ln = VTOFDESC(vp)->fd_link;
                error = uiomove(__UNCONST(ln), strlen(ln), ap->a_uio);
        } else {
                error = EOPNOTSUPP;
        }

        return (error);
}

int
fdesc_read(void *v)
{
        struct vop_read_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                int  a_ioflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        int error = EOPNOTSUPP;
        struct vnode *vp = ap->a_vp;

        switch (VTOFDESC(vp)->fd_type) {
        case Fctty:
                VOP_UNLOCK(vp);
                error = cdev_read(devctty, ap->a_uio, ap->a_ioflag);
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                break;

        default:
                error = EOPNOTSUPP;
                break;
        }

        return (error);
}

int
fdesc_write(void *v)
{
        struct vop_write_args /* {
                struct vnode *a_vp;
                struct uio *a_uio;
                int  a_ioflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        int error = EOPNOTSUPP;
        struct vnode *vp = ap->a_vp;

        switch (VTOFDESC(vp)->fd_type) {
        case Fctty:
                VOP_UNLOCK(vp);
                error = cdev_write(devctty, ap->a_uio, ap->a_ioflag);
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                break;

        default:
                error = EOPNOTSUPP;
                break;
        }

        return (error);
}

int
fdesc_ioctl(void *v)
{
        struct vop_ioctl_args /* {
                struct vnode *a_vp;
                u_long a_command;
                void *a_data;
                int  a_fflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
        int error = EOPNOTSUPP;

        switch (VTOFDESC(ap->a_vp)->fd_type) {
        case Fctty:
                error = cdev_ioctl(devctty, ap->a_command, ap->a_data,
                    ap->a_fflag, curlwp);
                break;

        default:
                error = EOPNOTSUPP;
                break;
        }

        return (error);
}

int
fdesc_poll(void *v)
{
        struct vop_poll_args /* {
                struct vnode *a_vp;
                int a_events;
        } */ *ap = v;
        int revents;

        switch (VTOFDESC(ap->a_vp)->fd_type) {
        case Fctty:
                revents = cdev_poll(devctty, ap->a_events, curlwp);
                break;

        default:
                revents = genfs_poll(v);
                break;
        }

        return (revents);
}

int
fdesc_kqfilter(void *v)
{
        struct vop_kqfilter_args /* {
                struct vnode *a_vp;
                struct knote *a_kn;
        } */ *ap = v;
        int error, fd;
        file_t *fp;

        switch (VTOFDESC(ap->a_vp)->fd_type) {
        case Fctty:
                error = cdev_kqfilter(devctty, ap->a_kn);
                break;

        case Fdesc:
                /* just invoke kqfilter for the underlying descriptor */
                fd = VTOFDESC(ap->a_vp)->fd_fd;
                if ((fp = fd_getfile(fd)) == NULL)
                        return (1);
                error = (*fp->f_ops->fo_kqfilter)(fp, ap->a_kn);
                fd_putfile(fd);
                break;

        default:
                return (genfs_kqfilter(v));
        }

        return (error);
}

int
fdesc_inactive(void *v)
{
        struct vop_inactive_v2_args /* {
                struct vnode *a_vp;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct fdescnode *fd = VTOFDESC(vp);

        /*
         * Clear out the v_type field to avoid
         * nasty things happening on reclaim.
         */
        if (fd->fd_type == Fctty || fd->fd_type == Fdesc)
                vp->v_type = VNON;

        return (0);
}

int
fdesc_reclaim(void *v)
{
        struct vop_reclaim_v2_args /* {
                struct vnode *a_vp;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct fdescnode *fd = VTOFDESC(vp);

        VOP_UNLOCK(vp);

        vp->v_data = NULL;
        kmem_free(fd, sizeof(struct fdescnode));

        return (0);
}

/*
 * Return POSIX pathconf information applicable to special devices.
 */
int
fdesc_pathconf(void *v)
{
        struct vop_pathconf_args /* {
                struct vnode *a_vp;
                int a_name;
                register_t *a_retval;
        } */ *ap = v;

        switch (ap->a_name) {
        case _PC_LINK_MAX:
                *ap->a_retval = LINK_MAX;
                return (0);
        case _PC_MAX_CANON:
                *ap->a_retval = MAX_CANON;
                return (0);
        case _PC_MAX_INPUT:
                *ap->a_retval = MAX_INPUT;
                return (0);
        case _PC_PIPE_BUF:
                *ap->a_retval = PIPE_BUF;
                return (0);
        case _PC_CHOWN_RESTRICTED:
                *ap->a_retval = 1;
                return (0);
        case _PC_VDISABLE:
                *ap->a_retval = _POSIX_VDISABLE;
                return (0);
        case _PC_SYNC_IO:
                *ap->a_retval = 1;
                return (0);
        default:
                return genfs_pathconf(ap);
        }
        /* NOTREACHED */
}

/*
 * Print out the contents of a /dev/fd vnode.
 */
/* ARGSUSED */
int
fdesc_print(void *v)
{
        printf("tag VT_NON, fdesc vnode\n");
        return (0);
}









































































































































































































































































































































    4 
    4 





    4 
    4 







    3 




    2 



    1 

    1 


    2 






    4 
    4 
















    6 
    6 
    6 





    6 
    6 
    5 
    4 
    4 

    5 



















    4 
    4 

    4 


    2 

















    1 

    1 
    1 
    1 

    1 





    1 






































    4 




















   30 
















    1 






   28 
















    1 


    2 


    5 





    3 








   18 



   17 

    2 













   15 
    9 






    8 
    4 


    6 












    3 

    5 

    3 


    5 

    2 


    2 





   10 


    2 



    1 
    1 



    1 








    1 








    1 













    5 




    5 










    4 



    4 
























    1 

    1 



























    9 
   25 




   25 


















    7 




    7 


    7 


    6 
    7 


    4 


    4 

    4 
    4 


    4 


    3 





    2 
    4 

    2 



    2 



















    4 





















    1 















    3 

    3 





























































































































































    3 



    3 
    4 



































































































































































    1 




    1 
























































    1 





















    2 












    4 

































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
/*        $NetBSD: uvm_swap.c,v 1.206 2021/08/23 13:08:18 hannken Exp $        */

/*
 * Copyright (c) 1995, 1996, 1997, 2009 Matthew R. Green
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from: NetBSD: vm_swap.c,v 1.52 1997/12/02 13:47:37 pk Exp
 * from: Id: uvm_swap.c,v 1.1.2.42 1998/02/02 20:38:06 chuck Exp
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_swap.c,v 1.206 2021/08/23 13:08:18 hannken Exp $");

#include "opt_uvmhist.h"
#include "opt_compat_netbsd.h"
#include "opt_ddb.h"
#include "opt_vmswap.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/atomic.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/conf.h>
#include <sys/cprng.h>
#include <sys/proc.h>
#include <sys/namei.h>
#include <sys/disklabel.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/vnode.h>
#include <sys/file.h>
#include <sys/vmem.h>
#include <sys/blist.h>
#include <sys/mount.h>
#include <sys/pool.h>
#include <sys/kmem.h>
#include <sys/syscallargs.h>
#include <sys/swap.h>
#include <sys/kauth.h>
#include <sys/sysctl.h>
#include <sys/workqueue.h>

#include <uvm/uvm.h>

#include <miscfs/specfs/specdev.h>

#include <crypto/aes/aes.h>
#include <crypto/aes/aes_cbc.h>

/*
 * uvm_swap.c: manage configuration and i/o to swap space.
 */

/*
 * swap space is managed in the following way:
 *
 * each swap partition or file is described by a "swapdev" structure.
 * each "swapdev" structure contains a "swapent" structure which contains
 * information that is passed up to the user (via system calls).
 *
 * each swap partition is assigned a "priority" (int) which controls
 * swap partition usage.
 *
 * the system maintains a global data structure describing all swap
 * partitions/files.   there is a sorted LIST of "swappri" structures
 * which describe "swapdev"'s at that priority.   this LIST is headed
 * by the "swap_priority" global var.    each "swappri" contains a
 * TAILQ of "swapdev" structures at that priority.
 *
 * locking:
 *  - swap_syscall_lock (krwlock_t): this lock serializes the swapctl
 *    system call and prevents the swap priority list from changing
 *    while we are in the middle of a system call (e.g. SWAP_STATS).
 *  - uvm_swap_data_lock (kmutex_t): this lock protects all swap data
 *    structures including the priority list, the swapdev structures,
 *    and the swapmap arena.
 *
 * each swap device has the following info:
 *  - swap device in use (could be disabled, preventing future use)
 *  - swap enabled (allows new allocations on swap)
 *  - map info in /dev/drum
 *  - vnode pointer
 * for swap files only:
 *  - block size
 *  - max byte count in buffer
 *  - buffer
 *
 * userland controls and configures swap with the swapctl(2) system call.
 * the sys_swapctl performs the following operations:
 *  [1] SWAP_NSWAP: returns the number of swap devices currently configured
 *  [2] SWAP_STATS: given a pointer to an array of swapent structures
 *        (passed in via "arg") of a size passed in via "misc" ... we load
 *        the current swap config into the array. The actual work is done
 *        in the uvm_swap_stats() function.
 *  [3] SWAP_ON: given a pathname in arg (could be device or file) and a
 *        priority in "misc", start swapping on it.
 *  [4] SWAP_OFF: as SWAP_ON, but stops swapping to a device
 *  [5] SWAP_CTL: changes the priority of a swap device (new priority in
 *        "misc")
 */

/*
 * swapdev: describes a single swap partition/file
 *
 * note the following should be true:
 * swd_inuse <= swd_nblks  [number of blocks in use is <= total blocks]
 * swd_nblks <= swd_mapsize [because mapsize includes miniroot+disklabel]
 */
struct swapdev {
        dev_t                        swd_dev;        /* device id */
        int                        swd_flags;        /* flags:inuse/enable/fake */
        int                        swd_priority;        /* our priority */
        int                        swd_nblks;        /* blocks in this device */
        char                        *swd_path;        /* saved pathname of device */
        int                        swd_pathlen;        /* length of pathname */
        int                        swd_npages;        /* #pages we can use */
        int                        swd_npginuse;        /* #pages in use */
        int                        swd_npgbad;        /* #pages bad */
        int                        swd_drumoffset;        /* page0 offset in drum */
        int                        swd_drumsize;        /* #pages in drum */
        blist_t                        swd_blist;        /* blist for this swapdev */
        struct vnode                *swd_vp;        /* backing vnode */
        TAILQ_ENTRY(swapdev)        swd_next;        /* priority tailq */

        int                        swd_bsize;        /* blocksize (bytes) */
        int                        swd_maxactive;        /* max active i/o reqs */
        struct bufq_state        *swd_tab;        /* buffer list */
        int                        swd_active;        /* number of active buffers */

        volatile uint32_t        *swd_encmap;        /* bitmap of encrypted slots */
        struct aesenc                swd_enckey;        /* AES key expanded for enc */
        struct aesdec                swd_deckey;        /* AES key expanded for dec */
        bool                        swd_encinit;        /* true if keys initialized */
};

/*
 * swap device priority entry; the list is kept sorted on `spi_priority'.
 */
struct swappri {
        int                        spi_priority;     /* priority */
        TAILQ_HEAD(spi_swapdev, swapdev)        spi_swapdev;
        /* tailq of swapdevs at this priority */
        LIST_ENTRY(swappri)        spi_swappri;      /* global list of pri's */
};

/*
 * The following two structures are used to keep track of data transfers
 * on swap devices associated with regular files.
 * NOTE: this code is more or less a copy of vnd.c; we use the same
 * structure names here to ease porting..
 */
struct vndxfer {
        struct buf        *vx_bp;                /* Pointer to parent buffer */
        struct swapdev        *vx_sdp;
        int                vx_error;
        int                vx_pending;        /* # of pending aux buffers */
        int                vx_flags;
#define VX_BUSY                1
#define VX_DEAD                2
};

struct vndbuf {
        struct buf        vb_buf;
        struct vndxfer        *vb_xfer;
};

/*
 * We keep a of pool vndbuf's and vndxfer structures.
 */
static struct pool vndxfer_pool, vndbuf_pool;

/*
 * local variables
 */
static vmem_t *swapmap;        /* controls the mapping of /dev/drum */

/* list of all active swap devices [by priority] */
LIST_HEAD(swap_priority, swappri);
static struct swap_priority swap_priority;

/* locks */
static kmutex_t uvm_swap_data_lock __cacheline_aligned;
static krwlock_t swap_syscall_lock;
bool uvm_swap_init_done = false;

/* workqueue and use counter for swap to regular files */
static int sw_reg_count = 0;
static struct workqueue *sw_reg_workqueue;

/* tuneables */
u_int uvm_swapisfull_factor = 99;
#if VMSWAP_DEFAULT_PLAINTEXT
bool uvm_swap_encrypt = false;
#else
bool uvm_swap_encrypt = true;
#endif

/*
 * prototypes
 */
static struct swapdev        *swapdrum_getsdp(int);

static struct swapdev        *swaplist_find(struct vnode *, bool);
static void                 swaplist_insert(struct swapdev *,
                                         struct swappri *, int);
static void                 swaplist_trim(void);

static int swap_on(struct lwp *, struct swapdev *);
static int swap_off(struct lwp *, struct swapdev *);

static void sw_reg_strategy(struct swapdev *, struct buf *, int);
static void sw_reg_biodone(struct buf *);
static void sw_reg_iodone(struct work *wk, void *dummy);
static void sw_reg_start(struct swapdev *);

static int uvm_swap_io(struct vm_page **, int, int, int);

static void uvm_swap_genkey(struct swapdev *);
static void uvm_swap_encryptpage(struct swapdev *, void *, int);
static void uvm_swap_decryptpage(struct swapdev *, void *, int);

static size_t
encmap_size(size_t npages)
{
        struct swapdev *sdp;
        const size_t bytesperword = sizeof(sdp->swd_encmap[0]);
        const size_t bitsperword = NBBY * bytesperword;
        const size_t nbits = npages; /* one bit for each page */
        const size_t nwords = howmany(nbits, bitsperword);
        const size_t nbytes = nwords * bytesperword;

        return nbytes;
}

/*
 * uvm_swap_init: init the swap system data structures and locks
 *
 * => called at boot time from init_main.c after the filesystems
 *        are brought up (which happens after uvm_init())
 */
void
uvm_swap_init(void)
{
        UVMHIST_FUNC(__func__);

        UVMHIST_CALLED(pdhist);
        /*
         * first, init the swap list, its counter, and its lock.
         * then get a handle on the vnode for /dev/drum by using
         * the its dev_t number ("swapdev", from MD conf.c).
         */

        LIST_INIT(&swap_priority);
        uvmexp.nswapdev = 0;
        rw_init(&swap_syscall_lock);
        mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, IPL_NONE);

        if (bdevvp(swapdev, &swapdev_vp))
                panic("%s: can't get vnode for swap device", __func__);
        if (vn_lock(swapdev_vp, LK_EXCLUSIVE | LK_RETRY))
                panic("%s: can't lock swap device", __func__);
        if (VOP_OPEN(swapdev_vp, FREAD | FWRITE, NOCRED))
                panic("%s: can't open swap device", __func__);
        VOP_UNLOCK(swapdev_vp);

        /*
         * create swap block resource map to map /dev/drum.   the range
         * from 1 to INT_MAX allows 2 gigablocks of swap space.  note
         * that block 0 is reserved (used to indicate an allocation
         * failure, or no allocation).
         */
        swapmap = vmem_create("swapmap", 1, INT_MAX - 1, 1, NULL, NULL, NULL, 0,
            VM_NOSLEEP, IPL_NONE);
        if (swapmap == 0) {
                panic("%s: vmem_create failed", __func__);
        }

        pool_init(&vndxfer_pool, sizeof(struct vndxfer), 0, 0, 0, "swp vnx",
            NULL, IPL_BIO);
        pool_init(&vndbuf_pool, sizeof(struct vndbuf), 0, 0, 0, "swp vnd",
            NULL, IPL_BIO);

        uvm_swap_init_done = true;

        UVMHIST_LOG(pdhist, "<- done", 0, 0, 0, 0);
}

/*
 * swaplist functions: functions that operate on the list of swap
 * devices on the system.
 */

/*
 * swaplist_insert: insert swap device "sdp" into the global list
 *
 * => caller must hold both swap_syscall_lock and uvm_swap_data_lock
 * => caller must provide a newly allocated swappri structure (we will
 *        FREE it if we don't need it... this it to prevent allocation
 *        blocking here while adding swap)
 */
static void
swaplist_insert(struct swapdev *sdp, struct swappri *newspp, int priority)
{
        struct swappri *spp, *pspp;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(pdhist);

        KASSERT(rw_write_held(&swap_syscall_lock));
        KASSERT(mutex_owned(&uvm_swap_data_lock));

        /*
         * find entry at or after which to insert the new device.
         */
        pspp = NULL;
        LIST_FOREACH(spp, &swap_priority, spi_swappri) {
                if (priority <= spp->spi_priority)
                        break;
                pspp = spp;
        }

        /*
         * new priority?
         */
        if (spp == NULL || spp->spi_priority != priority) {
                spp = newspp;  /* use newspp! */
                UVMHIST_LOG(pdhist, "created new swappri = %jd",
                            priority, 0, 0, 0);

                spp->spi_priority = priority;
                TAILQ_INIT(&spp->spi_swapdev);

                if (pspp)
                        LIST_INSERT_AFTER(pspp, spp, spi_swappri);
                else
                        LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri);
        } else {
                  /* we don't need a new priority structure, free it */
                kmem_free(newspp, sizeof(*newspp));
        }

        /*
         * priority found (or created).   now insert on the priority's
         * tailq list and bump the total number of swapdevs.
         */
        sdp->swd_priority = priority;
        TAILQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
        uvmexp.nswapdev++;
}

/*
 * swaplist_find: find and optionally remove a swap device from the
 *        global list.
 *
 * => caller must hold both swap_syscall_lock and uvm_swap_data_lock
 * => we return the swapdev we found (and removed)
 */
static struct swapdev *
swaplist_find(struct vnode *vp, bool remove)
{
        struct swapdev *sdp;
        struct swappri *spp;

        KASSERT(rw_lock_held(&swap_syscall_lock));
        KASSERT(remove ? rw_write_held(&swap_syscall_lock) : 1);
        KASSERT(mutex_owned(&uvm_swap_data_lock));

        /*
         * search the lists for the requested vp
         */

        LIST_FOREACH(spp, &swap_priority, spi_swappri) {
                TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
                        if (sdp->swd_vp == vp) {
                                if (remove) {
                                        TAILQ_REMOVE(&spp->spi_swapdev,
                                            sdp, swd_next);
                                        uvmexp.nswapdev--;
                                }
                                return(sdp);
                        }
                }
        }
        return (NULL);
}

/*
 * swaplist_trim: scan priority list for empty priority entries and kill
 *        them.
 *
 * => caller must hold both swap_syscall_lock and uvm_swap_data_lock
 */
static void
swaplist_trim(void)
{
        struct swappri *spp, *nextspp;

        KASSERT(rw_write_held(&swap_syscall_lock));
        KASSERT(mutex_owned(&uvm_swap_data_lock));

        LIST_FOREACH_SAFE(spp, &swap_priority, spi_swappri, nextspp) {
                if (!TAILQ_EMPTY(&spp->spi_swapdev))
                        continue;
                LIST_REMOVE(spp, spi_swappri);
                kmem_free(spp, sizeof(*spp));
        }
}

/*
 * swapdrum_getsdp: given a page offset in /dev/drum, convert it back
 *        to the "swapdev" that maps that section of the drum.
 *
 * => each swapdev takes one big contig chunk of the drum
 * => caller must hold uvm_swap_data_lock
 */
static struct swapdev *
swapdrum_getsdp(int pgno)
{
        struct swapdev *sdp;
        struct swappri *spp;

        KASSERT(mutex_owned(&uvm_swap_data_lock));

        LIST_FOREACH(spp, &swap_priority, spi_swappri) {
                TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
                        if (sdp->swd_flags & SWF_FAKE)
                                continue;
                        if (pgno >= sdp->swd_drumoffset &&
                            pgno < (sdp->swd_drumoffset + sdp->swd_drumsize)) {
                                return sdp;
                        }
                }
        }
        return NULL;
}

/*
 * swapdrum_sdp_is: true iff the swap device for pgno is sdp
 *
 * => for use in positive assertions only; result is not stable
 */
static bool __debugused
swapdrum_sdp_is(int pgno, struct swapdev *sdp)
{
        bool result;

        mutex_enter(&uvm_swap_data_lock);
        result = swapdrum_getsdp(pgno) == sdp;
        mutex_exit(&uvm_swap_data_lock);

        return result;
}

void swapsys_lock(krw_t op)
{
        rw_enter(&swap_syscall_lock, op);
}

void swapsys_unlock(void)
{
        rw_exit(&swap_syscall_lock);
}

static void
swapent_cvt(struct swapent *se, const struct swapdev *sdp, int inuse)
{
        se->se_dev = sdp->swd_dev;
        se->se_flags = sdp->swd_flags;
        se->se_nblks = sdp->swd_nblks;
        se->se_inuse = inuse;
        se->se_priority = sdp->swd_priority;
        KASSERT(sdp->swd_pathlen < sizeof(se->se_path));
        strcpy(se->se_path, sdp->swd_path);
}

int (*uvm_swap_stats13)(const struct sys_swapctl_args *, register_t *) =
    (void *)enosys;
int (*uvm_swap_stats50)(const struct sys_swapctl_args *, register_t *) =
    (void *)enosys;

/*
 * sys_swapctl: main entry point for swapctl(2) system call
 *         [with two helper functions: swap_on and swap_off]
 */
int
sys_swapctl(struct lwp *l, const struct sys_swapctl_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) cmd;
                syscallarg(void *) arg;
                syscallarg(int) misc;
        } */
        struct vnode *vp;
        struct nameidata nd;
        struct swappri *spp;
        struct swapdev *sdp;
#define SWAP_PATH_MAX (PATH_MAX + 1)
        char        *userpath;
        size_t        len = 0;
        int        error;
        int        priority;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(pdhist);

        /*
         * we handle the non-priv NSWAP and STATS request first.
         *
         * SWAP_NSWAP: return number of config'd swap devices
         * [can also be obtained with uvmexp sysctl]
         */
        if (SCARG(uap, cmd) == SWAP_NSWAP) {
                const int nswapdev = uvmexp.nswapdev;
                UVMHIST_LOG(pdhist, "<- done SWAP_NSWAP=%jd", nswapdev,
                    0, 0, 0);
                *retval = nswapdev;
                return 0;
        }

        userpath = kmem_alloc(SWAP_PATH_MAX, KM_SLEEP);

        /*
         * ensure serialized syscall access by grabbing the swap_syscall_lock
         */
        rw_enter(&swap_syscall_lock, RW_WRITER);

        /*
         * SWAP_STATS: get stats on current # of configured swap devs
         *
         * note that the swap_priority list can't change as long
         * as we are holding the swap_syscall_lock.  we don't want
         * to grab the uvm_swap_data_lock because we may fault&sleep during
         * copyout() and we don't want to be holding that lock then!
         */
        switch (SCARG(uap, cmd)) {
        case SWAP_STATS13:
                error = (*uvm_swap_stats13)(uap, retval);
                goto out;
        case SWAP_STATS50:
                error = (*uvm_swap_stats50)(uap, retval);
                goto out;
        case SWAP_STATS:
                error = uvm_swap_stats(SCARG(uap, arg), SCARG(uap, misc),
                    NULL, sizeof(struct swapent), retval);
                UVMHIST_LOG(pdhist, "<- done SWAP_STATS", 0, 0, 0, 0);
                goto out;

        case SWAP_GETDUMPDEV:
                error = copyout(&dumpdev, SCARG(uap, arg), sizeof(dumpdev));
                goto out;
        default:
                break;
        }

        /*
         * all other requests require superuser privs.   verify.
         */
        if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_SWAPCTL,
            0, NULL, NULL, NULL)))
                goto out;

        if (SCARG(uap, cmd) == SWAP_DUMPOFF) {
                /* drop the current dump device */
                dumpdev = NODEV;
                dumpcdev = NODEV;
                cpu_dumpconf();
                goto out;
        }

        /*
         * at this point we expect a path name in arg.   we will
         * use namei() to gain a vnode reference (vref), and lock
         * the vnode (VOP_LOCK).
         *
         * XXX: a NULL arg means use the root vnode pointer (e.g. for
         * miniroot)
         */
        if (SCARG(uap, arg) == NULL) {
                vp = rootvp;                /* miniroot */
                vref(vp);
                if (vn_lock(vp, LK_EXCLUSIVE)) {
                        vrele(vp);
                        error = EBUSY;
                        goto out;
                }
                if (SCARG(uap, cmd) == SWAP_ON &&
                    copystr("miniroot", userpath, SWAP_PATH_MAX, &len))
                        panic("swapctl: miniroot copy failed");
        } else {
                struct pathbuf *pb;

                /*
                 * This used to allow copying in one extra byte
                 * (SWAP_PATH_MAX instead of PATH_MAX) for SWAP_ON.
                 * This was completely pointless because if anyone
                 * used that extra byte namei would fail with
                 * ENAMETOOLONG anyway, so I've removed the excess
                 * logic. - dholland 20100215
                 */

                error = pathbuf_copyin(SCARG(uap, arg), &pb);
                if (error) {
                        goto out;
                }
                if (SCARG(uap, cmd) == SWAP_ON) {
                        /* get a copy of the string */
                        pathbuf_copystring(pb, userpath, SWAP_PATH_MAX);
                        len = strlen(userpath) + 1;
                }
                NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
                if ((error = namei(&nd))) {
                        pathbuf_destroy(pb);
                        goto out;
                }
                vp = nd.ni_vp;
                pathbuf_destroy(pb);
        }
        /* note: "vp" is referenced and locked */

        error = 0;                /* assume no error */
        switch(SCARG(uap, cmd)) {

        case SWAP_DUMPDEV:
                if (vp->v_type != VBLK) {
                        error = ENOTBLK;
                        break;
                }
                if (bdevsw_lookup(vp->v_rdev)) {
                        dumpdev = vp->v_rdev;
                        dumpcdev = devsw_blk2chr(dumpdev);
                } else
                        dumpdev = NODEV;
                cpu_dumpconf();
                break;

        case SWAP_CTL:
                /*
                 * get new priority, remove old entry (if any) and then
                 * reinsert it in the correct place.  finally, prune out
                 * any empty priority structures.
                 */
                priority = SCARG(uap, misc);
                spp = kmem_alloc(sizeof(*spp), KM_SLEEP);
                mutex_enter(&uvm_swap_data_lock);
                if ((sdp = swaplist_find(vp, true)) == NULL) {
                        error = ENOENT;
                } else {
                        swaplist_insert(sdp, spp, priority);
                        swaplist_trim();
                }
                mutex_exit(&uvm_swap_data_lock);
                if (error)
                        kmem_free(spp, sizeof(*spp));
                break;

        case SWAP_ON:

                /*
                 * check for duplicates.   if none found, then insert a
                 * dummy entry on the list to prevent someone else from
                 * trying to enable this device while we are working on
                 * it.
                 */

                priority = SCARG(uap, misc);
                sdp = kmem_zalloc(sizeof(*sdp), KM_SLEEP);
                spp = kmem_alloc(sizeof(*spp), KM_SLEEP);
                sdp->swd_flags = SWF_FAKE;
                sdp->swd_vp = vp;
                sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV;
                bufq_alloc(&sdp->swd_tab, "disksort", BUFQ_SORT_RAWBLOCK);
                mutex_enter(&uvm_swap_data_lock);
                if (swaplist_find(vp, false) != NULL) {
                        error = EBUSY;
                        mutex_exit(&uvm_swap_data_lock);
                        bufq_free(sdp->swd_tab);
                        kmem_free(sdp, sizeof(*sdp));
                        kmem_free(spp, sizeof(*spp));
                        break;
                }
                swaplist_insert(sdp, spp, priority);
                mutex_exit(&uvm_swap_data_lock);

                KASSERT(len > 0);
                sdp->swd_pathlen = len;
                sdp->swd_path = kmem_alloc(len, KM_SLEEP);
                if (copystr(userpath, sdp->swd_path, len, 0) != 0)
                        panic("swapctl: copystr");

                /*
                 * we've now got a FAKE placeholder in the swap list.
                 * now attempt to enable swap on it.  if we fail, undo
                 * what we've done and kill the fake entry we just inserted.
                 * if swap_on is a success, it will clear the SWF_FAKE flag
                 */

                if ((error = swap_on(l, sdp)) != 0) {
                        mutex_enter(&uvm_swap_data_lock);
                        (void) swaplist_find(vp, true);  /* kill fake entry */
                        swaplist_trim();
                        mutex_exit(&uvm_swap_data_lock);
                        bufq_free(sdp->swd_tab);
                        kmem_free(sdp->swd_path, sdp->swd_pathlen);
                        kmem_free(sdp, sizeof(*sdp));
                        break;
                }
                break;

        case SWAP_OFF:
                mutex_enter(&uvm_swap_data_lock);
                if ((sdp = swaplist_find(vp, false)) == NULL) {
                        mutex_exit(&uvm_swap_data_lock);
                        error = ENXIO;
                        break;
                }

                /*
                 * If a device isn't in use or enabled, we
                 * can't stop swapping from it (again).
                 */
                if ((sdp->swd_flags & (SWF_INUSE|SWF_ENABLE)) == 0) {
                        mutex_exit(&uvm_swap_data_lock);
                        error = EBUSY;
                        break;
                }

                /*
                 * do the real work.
                 */
                error = swap_off(l, sdp);
                break;

        default:
                error = EINVAL;
        }

        /*
         * done!  release the ref gained by namei() and unlock.
         */
        vput(vp);
out:
        rw_exit(&swap_syscall_lock);
        kmem_free(userpath, SWAP_PATH_MAX);

        UVMHIST_LOG(pdhist, "<- done!  error=%jd", error, 0, 0, 0);
        return (error);
}

/*
 * uvm_swap_stats: implements swapctl(SWAP_STATS). The function is kept
 * away from sys_swapctl() in order to allow COMPAT_* swapctl()
 * emulation to use it directly without going through sys_swapctl().
 * The problem with using sys_swapctl() there is that it involves
 * copying the swapent array to the stackgap, and this array's size
 * is not known at build time. Hence it would not be possible to
 * ensure it would fit in the stackgap in any case.
 */
int
uvm_swap_stats(char *ptr, int misc,
    void (*f)(void *, const struct swapent *), size_t len,
    register_t *retval)
{
        struct swappri *spp;
        struct swapdev *sdp;
        struct swapent sep;
        int count = 0;
        int error;

        KASSERT(len <= sizeof(sep));
        if (len == 0)
                return ENOSYS;

        if (misc < 0)
                return EINVAL;

        if (misc == 0 || uvmexp.nswapdev == 0)
                return 0;

        /* Make sure userland cannot exhaust kernel memory */
        if ((size_t)misc > (size_t)uvmexp.nswapdev)
                misc = uvmexp.nswapdev;

        KASSERT(rw_lock_held(&swap_syscall_lock));

        LIST_FOREACH(spp, &swap_priority, spi_swappri) {
                TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
                        int inuse;

                        if (misc-- <= 0)
                                break;

                        inuse = btodb((uint64_t)sdp->swd_npginuse <<
                            PAGE_SHIFT);

                        memset(&sep, 0, sizeof(sep));
                        swapent_cvt(&sep, sdp, inuse);
                        if (f)
                                (*f)(&sep, &sep);
                        if ((error = copyout(&sep, ptr, len)) != 0)
                                return error;
                        ptr += len;
                        count++;
                }
        }
        *retval = count;
        return 0;
}

/*
 * swap_on: attempt to enable a swapdev for swapping.   note that the
 *        swapdev is already on the global list, but disabled (marked
 *        SWF_FAKE).
 *
 * => we avoid the start of the disk (to protect disk labels)
 * => we also avoid the miniroot, if we are swapping to root.
 * => caller should leave uvm_swap_data_lock unlocked, we may lock it
 *        if needed.
 */
static int
swap_on(struct lwp *l, struct swapdev *sdp)
{
        struct vnode *vp;
        int error, npages, nblocks, size;
        long addr;
        vmem_addr_t result;
        struct vattr va;
        dev_t dev;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(pdhist);

        /*
         * we want to enable swapping on sdp.   the swd_vp contains
         * the vnode we want (locked and ref'd), and the swd_dev
         * contains the dev_t of the file, if it a block device.
         */

        vp = sdp->swd_vp;
        dev = sdp->swd_dev;

        /*
         * open the swap file (mostly useful for block device files to
         * let device driver know what is up).
         *
         * we skip the open/close for root on swap because the root
         * has already been opened when root was mounted (mountroot).
         */
        if (vp != rootvp) {
                if ((error = VOP_OPEN(vp, FREAD|FWRITE, l->l_cred)))
                        return (error);
        }

        /* XXX this only works for block devices */
        UVMHIST_LOG(pdhist, "  dev=%jd, major(dev)=%jd", dev, major(dev), 0, 0);

        /*
         * we now need to determine the size of the swap area.   for
         * block specials we can call the d_psize function.
         * for normal files, we must stat [get attrs].
         *
         * we put the result in nblks.
         * for normal files, we also want the filesystem block size
         * (which we get with statfs).
         */
        switch (vp->v_type) {
        case VBLK:
                if ((nblocks = bdev_size(dev)) == -1) {
                        error = ENXIO;
                        goto bad;
                }
                break;

        case VREG:
                if ((error = VOP_GETATTR(vp, &va, l->l_cred)))
                        goto bad;
                nblocks = (int)btodb(va.va_size);
                sdp->swd_bsize = 1 << vp->v_mount->mnt_fs_bshift;
                /*
                 * limit the max # of outstanding I/O requests we issue
                 * at any one time.   take it easy on NFS servers.
                 */
                if (vp->v_tag == VT_NFS)
                        sdp->swd_maxactive = 2; /* XXX */
                else
                        sdp->swd_maxactive = 8; /* XXX */
                break;

        default:
                error = ENXIO;
                goto bad;
        }

        /*
         * save nblocks in a safe place and convert to pages.
         */

        sdp->swd_nblks = nblocks;
        npages = dbtob((uint64_t)nblocks) >> PAGE_SHIFT;

        /*
         * for block special files, we want to make sure that leave
         * the disklabel and bootblocks alone, so we arrange to skip
         * over them (arbitrarily choosing to skip PAGE_SIZE bytes).
         * note that because of this the "size" can be less than the
         * actual number of blocks on the device.
         */
        if (vp->v_type == VBLK) {
                /* we use pages 1 to (size - 1) [inclusive] */
                size = npages - 1;
                addr = 1;
        } else {
                /* we use pages 0 to (size - 1) [inclusive] */
                size = npages;
                addr = 0;
        }

        /*
         * make sure we have enough blocks for a reasonable sized swap
         * area.   we want at least one page.
         */

        if (size < 1) {
                UVMHIST_LOG(pdhist, "  size <= 1!!", 0, 0, 0, 0);
                error = EINVAL;
                goto bad;
        }

        UVMHIST_LOG(pdhist, "  dev=%#jx: size=%jd addr=%jd", dev, size, addr, 0);

        /*
         * now we need to allocate an extent to manage this swap device
         */

        sdp->swd_blist = blist_create(npages);
        /* mark all expect the `saved' region free. */
        blist_free(sdp->swd_blist, addr, size);

        /*
         * allocate space to for swap encryption state and mark the
         * keys uninitialized so we generate them lazily
         */
        sdp->swd_encmap = kmem_zalloc(encmap_size(npages), KM_SLEEP);
        sdp->swd_encinit = false;

        /*
         * if the vnode we are swapping to is the root vnode
         * (i.e. we are swapping to the miniroot) then we want
         * to make sure we don't overwrite it.   do a statfs to
         * find its size and skip over it.
         */
        if (vp == rootvp) {
                struct mount *mp;
                struct statvfs *sp;
                int rootblocks, rootpages;

                mp = rootvnode->v_mount;
                sp = &mp->mnt_stat;
                rootblocks = sp->f_blocks * btodb(sp->f_frsize);
                /*
                 * XXX: sp->f_blocks isn't the total number of
                 * blocks in the filesystem, it's the number of
                 * data blocks.  so, our rootblocks almost
                 * definitely underestimates the total size
                 * of the filesystem - how badly depends on the
                 * details of the filesystem type.  there isn't
                 * an obvious way to deal with this cleanly
                 * and perfectly, so for now we just pad our
                 * rootblocks estimate with an extra 5 percent.
                 */
                rootblocks += (rootblocks >> 5) +
                        (rootblocks >> 6) +
                        (rootblocks >> 7);
                rootpages = round_page(dbtob(rootblocks)) >> PAGE_SHIFT;
                if (rootpages > size)
                        panic("swap_on: miniroot larger than swap?");

                if (rootpages != blist_fill(sdp->swd_blist, addr, rootpages)) {
                        panic("swap_on: unable to preserve miniroot");
                }

                size -= rootpages;
                printf("Preserved %d pages of miniroot ", rootpages);
                printf("leaving %d pages of swap\n", size);
        }

        /*
         * add a ref to vp to reflect usage as a swap device.
         */
        vref(vp);

        /*
         * now add the new swapdev to the drum and enable.
         */
        error = vmem_alloc(swapmap, npages, VM_BESTFIT | VM_SLEEP, &result);
        if (error != 0)
                panic("swapdrum_add");
        /*
         * If this is the first regular swap create the workqueue.
         * => Protected by swap_syscall_lock.
         */
        if (vp->v_type != VBLK) {
                if (sw_reg_count++ == 0) {
                        KASSERT(sw_reg_workqueue == NULL);
                        if (workqueue_create(&sw_reg_workqueue, "swapiod",
                            sw_reg_iodone, NULL, PRIBIO, IPL_BIO, 0) != 0)
                                panic("%s: workqueue_create failed", __func__);
                }
        }

        sdp->swd_drumoffset = (int)result;
        sdp->swd_drumsize = npages;
        sdp->swd_npages = size;
        mutex_enter(&uvm_swap_data_lock);
        sdp->swd_flags &= ~SWF_FAKE;        /* going live */
        sdp->swd_flags |= (SWF_INUSE|SWF_ENABLE);
        uvmexp.swpages += size;
        uvmexp.swpgavail += size;
        mutex_exit(&uvm_swap_data_lock);
        return (0);

        /*
         * failure: clean up and return error.
         */

bad:
        if (sdp->swd_blist) {
                blist_destroy(sdp->swd_blist);
        }
        if (vp != rootvp) {
                (void)VOP_CLOSE(vp, FREAD|FWRITE, l->l_cred);
        }
        return (error);
}

/*
 * swap_off: stop swapping on swapdev
 *
 * => swap data should be locked, we will unlock.
 */
static int
swap_off(struct lwp *l, struct swapdev *sdp)
{
        int npages = sdp->swd_npages;
        int error = 0;

        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(pdhist, "  dev=%#jx, npages=%jd", sdp->swd_dev,npages, 0, 0);

        KASSERT(rw_write_held(&swap_syscall_lock));
        KASSERT(mutex_owned(&uvm_swap_data_lock));

        /* disable the swap area being removed */
        sdp->swd_flags &= ~SWF_ENABLE;
        uvmexp.swpgavail -= npages;
        mutex_exit(&uvm_swap_data_lock);

        /*
         * the idea is to find all the pages that are paged out to this
         * device, and page them all in.  in uvm, swap-backed pageable
         * memory can take two forms: aobjs and anons.  call the
         * swapoff hook for each subsystem to bring in pages.
         */

        if (uao_swap_off(sdp->swd_drumoffset,
                         sdp->swd_drumoffset + sdp->swd_drumsize) ||
            amap_swap_off(sdp->swd_drumoffset,
                          sdp->swd_drumoffset + sdp->swd_drumsize)) {
                error = ENOMEM;
        } else if (sdp->swd_npginuse > sdp->swd_npgbad) {
                error = EBUSY;
        }

        if (error) {
                mutex_enter(&uvm_swap_data_lock);
                sdp->swd_flags |= SWF_ENABLE;
                uvmexp.swpgavail += npages;
                mutex_exit(&uvm_swap_data_lock);

                return error;
        }

        /*
         * If this is the last regular swap destroy the workqueue.
         * => Protected by swap_syscall_lock.
         */
        if (sdp->swd_vp->v_type != VBLK) {
                KASSERT(sw_reg_count > 0);
                KASSERT(sw_reg_workqueue != NULL);
                if (--sw_reg_count == 0) {
                        workqueue_destroy(sw_reg_workqueue);
                        sw_reg_workqueue = NULL;
                }
        }

        /*
         * done with the vnode.
         * drop our ref on the vnode before calling VOP_CLOSE()
         * so that spec_close() can tell if this is the last close.
         */
        vrele(sdp->swd_vp);
        if (sdp->swd_vp != rootvp) {
                (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, l->l_cred);
        }

        mutex_enter(&uvm_swap_data_lock);
        uvmexp.swpages -= npages;
        uvmexp.swpginuse -= sdp->swd_npgbad;

        if (swaplist_find(sdp->swd_vp, true) == NULL)
                panic("%s: swapdev not in list", __func__);
        swaplist_trim();
        mutex_exit(&uvm_swap_data_lock);

        /*
         * free all resources!
         */
        vmem_free(swapmap, sdp->swd_drumoffset, sdp->swd_drumsize);
        blist_destroy(sdp->swd_blist);
        bufq_free(sdp->swd_tab);
        kmem_free(__UNVOLATILE(sdp->swd_encmap),
            encmap_size(sdp->swd_drumsize));
        explicit_memset(&sdp->swd_enckey, 0, sizeof sdp->swd_enckey);
        explicit_memset(&sdp->swd_deckey, 0, sizeof sdp->swd_deckey);
        kmem_free(sdp, sizeof(*sdp));
        return (0);
}

void
uvm_swap_shutdown(struct lwp *l)
{
        struct swapdev *sdp;
        struct swappri *spp;
        struct vnode *vp;
        int error;

        if (!uvm_swap_init_done || uvmexp.nswapdev == 0)
                return;
        printf("turning off swap...");
        rw_enter(&swap_syscall_lock, RW_WRITER);
        mutex_enter(&uvm_swap_data_lock);
again:
        LIST_FOREACH(spp, &swap_priority, spi_swappri)
                TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
                        if (sdp->swd_flags & SWF_FAKE)
                                continue;
                        if ((sdp->swd_flags & (SWF_INUSE|SWF_ENABLE)) == 0)
                                continue;
#ifdef DEBUG
                        printf("\nturning off swap on %s...", sdp->swd_path);
#endif
                        /* Have to lock and reference vnode for swap_off(). */
                        vn_lock(vp = sdp->swd_vp, LK_EXCLUSIVE|LK_RETRY);
                        vref(vp);
                        error = swap_off(l, sdp);
                        vput(vp);
                        mutex_enter(&uvm_swap_data_lock);
                        if (error) {
                                printf("stopping swap on %s failed "
                                    "with error %d\n", sdp->swd_path, error);
                                TAILQ_REMOVE(&spp->spi_swapdev, sdp, swd_next);
                                uvmexp.nswapdev--;
                                swaplist_trim();
                        }
                        goto again;
                }
        printf(" done\n");
        mutex_exit(&uvm_swap_data_lock);
        rw_exit(&swap_syscall_lock);
}


/*
 * /dev/drum interface and i/o functions
 */

/*
 * swstrategy: perform I/O on the drum
 *
 * => we must map the i/o request from the drum to the correct swapdev.
 */
static void
swstrategy(struct buf *bp)
{
        struct swapdev *sdp;
        struct vnode *vp;
        int pageno, bn;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(pdhist);

        /*
         * convert block number to swapdev.   note that swapdev can't
         * be yanked out from under us because we are holding resources
         * in it (i.e. the blocks we are doing I/O on).
         */
        pageno = dbtob((int64_t)bp->b_blkno) >> PAGE_SHIFT;
        mutex_enter(&uvm_swap_data_lock);
        sdp = swapdrum_getsdp(pageno);
        mutex_exit(&uvm_swap_data_lock);
        if (sdp == NULL) {
                bp->b_error = EINVAL;
                bp->b_resid = bp->b_bcount;
                biodone(bp);
                UVMHIST_LOG(pdhist, "  failed to get swap device", 0, 0, 0, 0);
                return;
        }

        /*
         * convert drum page number to block number on this swapdev.
         */

        pageno -= sdp->swd_drumoffset;        /* page # on swapdev */
        bn = btodb((uint64_t)pageno << PAGE_SHIFT); /* convert to diskblock */

        UVMHIST_LOG(pdhist, "  Rd/Wr (0/1) %jd: mapoff=%#jx bn=%#jx bcount=%jd",
                ((bp->b_flags & B_READ) == 0) ? 1 : 0,
                sdp->swd_drumoffset, bn, bp->b_bcount);

        /*
         * for block devices we finish up here.
         * for regular files we have to do more work which we delegate
         * to sw_reg_strategy().
         */

        vp = sdp->swd_vp;                /* swapdev vnode pointer */
        switch (vp->v_type) {
        default:
                panic("%s: vnode type 0x%x", __func__, vp->v_type);

        case VBLK:

                /*
                 * must convert "bp" from an I/O on /dev/drum to an I/O
                 * on the swapdev (sdp).
                 */
                bp->b_blkno = bn;                /* swapdev block number */
                bp->b_dev = sdp->swd_dev;        /* swapdev dev_t */

                /*
                 * if we are doing a write, we have to redirect the i/o on
                 * drum's v_numoutput counter to the swapdevs.
                 */
                if ((bp->b_flags & B_READ) == 0) {
                        mutex_enter(bp->b_objlock);
                        vwakeup(bp);        /* kills one 'v_numoutput' on drum */
                        mutex_exit(bp->b_objlock);
                        mutex_enter(vp->v_interlock);
                        vp->v_numoutput++;        /* put it on swapdev */
                        mutex_exit(vp->v_interlock);
                }

                /*
                 * finally plug in swapdev vnode and start I/O
                 */
                bp->b_vp = vp;
                bp->b_objlock = vp->v_interlock;
                VOP_STRATEGY(vp, bp);
                return;

        case VREG:
                /*
                 * delegate to sw_reg_strategy function.
                 */
                sw_reg_strategy(sdp, bp, bn);
                return;
        }
        /* NOTREACHED */
}

/*
 * swread: the read function for the drum (just a call to physio)
 */
/*ARGSUSED*/
static int
swread(dev_t dev, struct uio *uio, int ioflag)
{
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(pdhist, "  dev=%#jx offset=%#jx", dev, uio->uio_offset, 0, 0);

        return (physio(swstrategy, NULL, dev, B_READ, minphys, uio));
}

/*
 * swwrite: the write function for the drum (just a call to physio)
 */
/*ARGSUSED*/
static int
swwrite(dev_t dev, struct uio *uio, int ioflag)
{
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(pdhist, "  dev=%#jx offset=%#jx", dev, uio->uio_offset, 0, 0);

        return (physio(swstrategy, NULL, dev, B_WRITE, minphys, uio));
}

const struct bdevsw swap_bdevsw = {
        .d_open = nullopen,
        .d_close = nullclose,
        .d_strategy = swstrategy,
        .d_ioctl = noioctl,
        .d_dump = nodump,
        .d_psize = nosize,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

const struct cdevsw swap_cdevsw = {
        .d_open = nullopen,
        .d_close = nullclose,
        .d_read = swread,
        .d_write = swwrite,
        .d_ioctl = noioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER,
};

/*
 * sw_reg_strategy: handle swap i/o to regular files
 */
static void
sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn)
{
        struct vnode        *vp;
        struct vndxfer        *vnx;
        daddr_t                nbn;
        char                 *addr;
        off_t                byteoff;
        int                s, off, nra, error, sz, resid;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(pdhist);

        /*
         * allocate a vndxfer head for this transfer and point it to
         * our buffer.
         */
        vnx = pool_get(&vndxfer_pool, PR_WAITOK);
        vnx->vx_flags = VX_BUSY;
        vnx->vx_error = 0;
        vnx->vx_pending = 0;
        vnx->vx_bp = bp;
        vnx->vx_sdp = sdp;

        /*
         * setup for main loop where we read filesystem blocks into
         * our buffer.
         */
        error = 0;
        bp->b_resid = bp->b_bcount;        /* nothing transferred yet! */
        addr = bp->b_data;                /* current position in buffer */
        byteoff = dbtob((uint64_t)bn);

        for (resid = bp->b_resid; resid; resid -= sz) {
                struct vndbuf        *nbp;

                /*
                 * translate byteoffset into block number.  return values:
                 *   vp = vnode of underlying device
                 *  nbn = new block number (on underlying vnode dev)
                 *  nra = num blocks we can read-ahead (excludes requested
                 *        block)
                 */
                nra = 0;
                error = VOP_BMAP(sdp->swd_vp, byteoff / sdp->swd_bsize,
                                         &vp, &nbn, &nra);

                if (error == 0 && nbn == (daddr_t)-1) {
                        /*
                         * this used to just set error, but that doesn't
                         * do the right thing.  Instead, it causes random
                         * memory errors.  The panic() should remain until
                         * this condition doesn't destabilize the system.
                         */
#if 1
                        panic("%s: swap to sparse file", __func__);
#else
                        error = EIO;        /* failure */
#endif
                }

                /*
                 * punt if there was an error or a hole in the file.
                 * we must wait for any i/o ops we have already started
                 * to finish before returning.
                 *
                 * XXX we could deal with holes here but it would be
                 * a hassle (in the write case).
                 */
                if (error) {
                        s = splbio();
                        vnx->vx_error = error;        /* pass error up */
                        goto out;
                }

                /*
                 * compute the size ("sz") of this transfer (in bytes).
                 */
                off = byteoff % sdp->swd_bsize;
                sz = (1 + nra) * sdp->swd_bsize - off;
                if (sz > resid)
                        sz = resid;

                UVMHIST_LOG(pdhist, "sw_reg_strategy: "
                    "vp %#jx/%#jx offset %#jx/%#jx",
                    (uintptr_t)sdp->swd_vp, (uintptr_t)vp, byteoff, nbn);

                /*
                 * now get a buf structure.   note that the vb_buf is
                 * at the front of the nbp structure so that you can
                 * cast pointers between the two structure easily.
                 */
                nbp = pool_get(&vndbuf_pool, PR_WAITOK);
                buf_init(&nbp->vb_buf);
                nbp->vb_buf.b_flags    = bp->b_flags;
                nbp->vb_buf.b_cflags   = bp->b_cflags;
                nbp->vb_buf.b_oflags   = bp->b_oflags;
                nbp->vb_buf.b_bcount   = sz;
                nbp->vb_buf.b_bufsize  = sz;
                nbp->vb_buf.b_error    = 0;
                nbp->vb_buf.b_data     = addr;
                nbp->vb_buf.b_lblkno   = 0;
                nbp->vb_buf.b_blkno    = nbn + btodb(off);
                nbp->vb_buf.b_rawblkno = nbp->vb_buf.b_blkno;
                nbp->vb_buf.b_iodone   = sw_reg_biodone;
                nbp->vb_buf.b_vp       = vp;
                nbp->vb_buf.b_objlock  = vp->v_interlock;
                if (vp->v_type == VBLK) {
                        nbp->vb_buf.b_dev = vp->v_rdev;
                }

                nbp->vb_xfer = vnx;        /* patch it back in to vnx */

                /*
                 * Just sort by block number
                 */
                s = splbio();
                if (vnx->vx_error != 0) {
                        buf_destroy(&nbp->vb_buf);
                        pool_put(&vndbuf_pool, nbp);
                        goto out;
                }
                vnx->vx_pending++;

                /* sort it in and start I/O if we are not over our limit */
                /* XXXAD locking */
                bufq_put(sdp->swd_tab, &nbp->vb_buf);
                sw_reg_start(sdp);
                splx(s);

                /*
                 * advance to the next I/O
                 */
                byteoff += sz;
                addr += sz;
        }

        s = splbio();

out: /* Arrive here at splbio */
        vnx->vx_flags &= ~VX_BUSY;
        if (vnx->vx_pending == 0) {
                error = vnx->vx_error;
                pool_put(&vndxfer_pool, vnx);
                bp->b_error = error;
                biodone(bp);
        }
        splx(s);
}

/*
 * sw_reg_start: start an I/O request on the requested swapdev
 *
 * => reqs are sorted by b_rawblkno (above)
 */
static void
sw_reg_start(struct swapdev *sdp)
{
        struct buf        *bp;
        struct vnode        *vp;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(pdhist);

        /* recursion control */
        if ((sdp->swd_flags & SWF_BUSY) != 0)
                return;

        sdp->swd_flags |= SWF_BUSY;

        while (sdp->swd_active < sdp->swd_maxactive) {
                bp = bufq_get(sdp->swd_tab);
                if (bp == NULL)
                        break;
                sdp->swd_active++;

                UVMHIST_LOG(pdhist,
                    "sw_reg_start:  bp %#jx vp %#jx blkno %#jx cnt %#jx",
                    (uintptr_t)bp, (uintptr_t)bp->b_vp, (uintptr_t)bp->b_blkno,
                    bp->b_bcount);
                vp = bp->b_vp;
                KASSERT(bp->b_objlock == vp->v_interlock);
                if ((bp->b_flags & B_READ) == 0) {
                        mutex_enter(vp->v_interlock);
                        vp->v_numoutput++;
                        mutex_exit(vp->v_interlock);
                }
                VOP_STRATEGY(vp, bp);
        }
        sdp->swd_flags &= ~SWF_BUSY;
}

/*
 * sw_reg_biodone: one of our i/o's has completed
 */
static void
sw_reg_biodone(struct buf *bp)
{
        workqueue_enqueue(sw_reg_workqueue, &bp->b_work, NULL);
}

/*
 * sw_reg_iodone: one of our i/o's has completed and needs post-i/o cleanup
 *
 * => note that we can recover the vndbuf struct by casting the buf ptr
 */
static void
sw_reg_iodone(struct work *wk, void *dummy)
{
        struct vndbuf *vbp = (void *)wk;
        struct vndxfer *vnx = vbp->vb_xfer;
        struct buf *pbp = vnx->vx_bp;                /* parent buffer */
        struct swapdev        *sdp = vnx->vx_sdp;
        int s, resid, error;
        KASSERT(&vbp->vb_buf.b_work == wk);
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(pdhist, "  vbp=%#jx vp=%#jx blkno=%#jx addr=%#jx",
            (uintptr_t)vbp, (uintptr_t)vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno,
            (uintptr_t)vbp->vb_buf.b_data);
        UVMHIST_LOG(pdhist, "  cnt=%#jx resid=%#jx",
            vbp->vb_buf.b_bcount, vbp->vb_buf.b_resid, 0, 0);

        /*
         * protect vbp at splbio and update.
         */

        s = splbio();
        resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid;
        pbp->b_resid -= resid;
        vnx->vx_pending--;

        if (vbp->vb_buf.b_error != 0) {
                /* pass error upward */
                error = vbp->vb_buf.b_error ? vbp->vb_buf.b_error : EIO;
                UVMHIST_LOG(pdhist, "  got error=%jd !", error, 0, 0, 0);
                vnx->vx_error = error;
        }

        /*
         * kill vbp structure
         */
        buf_destroy(&vbp->vb_buf);
        pool_put(&vndbuf_pool, vbp);

        /*
         * wrap up this transaction if it has run to completion or, in
         * case of an error, when all auxiliary buffers have returned.
         */
        if (vnx->vx_error != 0) {
                /* pass error upward */
                error = vnx->vx_error;
                if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) {
                        pbp->b_error = error;
                        biodone(pbp);
                        pool_put(&vndxfer_pool, vnx);
                }
        } else if (pbp->b_resid == 0) {
                KASSERT(vnx->vx_pending == 0);
                if ((vnx->vx_flags & VX_BUSY) == 0) {
                        UVMHIST_LOG(pdhist, "  iodone, pbp=%#jx error=%jd !",
                            (uintptr_t)pbp, vnx->vx_error, 0, 0);
                        biodone(pbp);
                        pool_put(&vndxfer_pool, vnx);
                }
        }

        /*
         * done!   start next swapdev I/O if one is pending
         */
        sdp->swd_active--;
        sw_reg_start(sdp);
        splx(s);
}


/*
 * uvm_swap_alloc: allocate space on swap
 *
 * => allocation is done "round robin" down the priority list, as we
 *        allocate in a priority we "rotate" the circle queue.
 * => space can be freed with uvm_swap_free
 * => we return the page slot number in /dev/drum (0 == invalid slot)
 * => we lock uvm_swap_data_lock
 * => XXXMRG: "LESSOK" INTERFACE NEEDED TO EXTENT SYSTEM
 */
int
uvm_swap_alloc(int *nslots /* IN/OUT */, bool lessok)
{
        struct swapdev *sdp;
        struct swappri *spp;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(pdhist);

        /*
         * no swap devices configured yet?   definite failure.
         */
        if (uvmexp.nswapdev < 1)
                return 0;

        /*
         * XXXJAK: BEGIN HACK
         *
         * blist_alloc() in subr_blist.c will panic if we try to allocate
         * too many slots.
         */
        if (*nslots > BLIST_MAX_ALLOC) {
                if (__predict_false(lessok == false))
                        return 0;
                *nslots = BLIST_MAX_ALLOC;
        }
        /* XXXJAK: END HACK */

        /*
         * lock data lock, convert slots into blocks, and enter loop
         */
        mutex_enter(&uvm_swap_data_lock);

ReTry:        /* XXXMRG */
        LIST_FOREACH(spp, &swap_priority, spi_swappri) {
                TAILQ_FOREACH(sdp, &spp->spi_swapdev, swd_next) {
                        uint64_t result;

                        /* if it's not enabled, then we can't swap from it */
                        if ((sdp->swd_flags & SWF_ENABLE) == 0)
                                continue;
                        if (sdp->swd_npginuse + *nslots > sdp->swd_npages)
                                continue;
                        result = blist_alloc(sdp->swd_blist, *nslots);
                        if (result == BLIST_NONE) {
                                continue;
                        }
                        KASSERT(result < sdp->swd_drumsize);

                        /*
                         * successful allocation!  now rotate the tailq.
                         */
                        TAILQ_REMOVE(&spp->spi_swapdev, sdp, swd_next);
                        TAILQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
                        sdp->swd_npginuse += *nslots;
                        uvmexp.swpginuse += *nslots;
                        mutex_exit(&uvm_swap_data_lock);
                        /* done!  return drum slot number */
                        UVMHIST_LOG(pdhist,
                            "success!  returning %jd slots starting at %jd",
                            *nslots, result + sdp->swd_drumoffset, 0, 0);
                        return (result + sdp->swd_drumoffset);
                }
        }

        /* XXXMRG: BEGIN HACK */
        if (*nslots > 1 && lessok) {
                *nslots = 1;
                /* XXXMRG: ugh!  blist should support this for us */
                goto ReTry;
        }
        /* XXXMRG: END HACK */

        mutex_exit(&uvm_swap_data_lock);
        return 0;
}

/*
 * uvm_swapisfull: return true if most of available swap is allocated
 * and in use.  we don't count some small portion as it may be inaccessible
 * to us at any given moment, for example if there is lock contention or if
 * pages are busy.
 */
bool
uvm_swapisfull(void)
{
        int swpgonly;
        bool rv;

        if (uvmexp.swpages == 0) {
                return true;
        }

        mutex_enter(&uvm_swap_data_lock);
        KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
        swpgonly = (int)((uint64_t)uvmexp.swpgonly * 100 /
            uvm_swapisfull_factor);
        rv = (swpgonly >= uvmexp.swpgavail);
        mutex_exit(&uvm_swap_data_lock);

        return (rv);
}

/*
 * uvm_swap_markbad: keep track of swap ranges where we've had i/o errors
 *
 * => we lock uvm_swap_data_lock
 */
void
uvm_swap_markbad(int startslot, int nslots)
{
        struct swapdev *sdp;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(pdhist);

        mutex_enter(&uvm_swap_data_lock);
        sdp = swapdrum_getsdp(startslot);
        KASSERT(sdp != NULL);

        /*
         * we just keep track of how many pages have been marked bad
         * in this device, to make everything add up in swap_off().
         * we assume here that the range of slots will all be within
         * one swap device.
         */

        KASSERT(uvmexp.swpgonly >= nslots);
        atomic_add_int(&uvmexp.swpgonly, -nslots);
        sdp->swd_npgbad += nslots;
        UVMHIST_LOG(pdhist, "now %jd bad", sdp->swd_npgbad, 0,0,0);
        mutex_exit(&uvm_swap_data_lock);
}

/*
 * uvm_swap_free: free swap slots
 *
 * => this can be all or part of an allocation made by uvm_swap_alloc
 * => we lock uvm_swap_data_lock
 */
void
uvm_swap_free(int startslot, int nslots)
{
        struct swapdev *sdp;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(pdhist, "freeing %jd slots starting at %jd", nslots,
            startslot, 0, 0);

        /*
         * ignore attempts to free the "bad" slot.
         */

        if (startslot == SWSLOT_BAD) {
                return;
        }

        /*
         * convert drum slot offset back to sdp, free the blocks
         * in the extent, and return.   must hold pri lock to do
         * lookup and access the extent.
         */

        mutex_enter(&uvm_swap_data_lock);
        sdp = swapdrum_getsdp(startslot);
        KASSERT(uvmexp.nswapdev >= 1);
        KASSERT(sdp != NULL);
        KASSERT(sdp->swd_npginuse >= nslots);
        blist_free(sdp->swd_blist, startslot - sdp->swd_drumoffset, nslots);
        sdp->swd_npginuse -= nslots;
        uvmexp.swpginuse -= nslots;
        mutex_exit(&uvm_swap_data_lock);
}

/*
 * uvm_swap_put: put any number of pages into a contig place on swap
 *
 * => can be sync or async
 */

int
uvm_swap_put(int swslot, struct vm_page **ppsp, int npages, int flags)
{
        int error;

        error = uvm_swap_io(ppsp, swslot, npages, B_WRITE |
            ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
        return error;
}

/*
 * uvm_swap_get: get a single page from swap
 *
 * => usually a sync op (from fault)
 */

int
uvm_swap_get(struct vm_page *page, int swslot, int flags)
{
        int error;

        atomic_inc_uint(&uvmexp.nswget);
        KASSERT(flags & PGO_SYNCIO);
        if (swslot == SWSLOT_BAD) {
                return EIO;
        }

        error = uvm_swap_io(&page, swslot, 1, B_READ |
            ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
        if (error == 0) {

                /*
                 * this page is no longer only in swap.
                 */

                KASSERT(uvmexp.swpgonly > 0);
                atomic_dec_uint(&uvmexp.swpgonly);
        }
        return error;
}

/*
 * uvm_swap_io: do an i/o operation to swap
 */

static int
uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags)
{
        daddr_t startblk;
        struct        buf *bp;
        vaddr_t kva;
        int        error, mapinflags;
        bool write, async, swap_encrypt;
        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(pdhist, "<- called, startslot=%jd, npages=%jd, flags=%#jx",
            startslot, npages, flags, 0);

        write = (flags & B_READ) == 0;
        async = (flags & B_ASYNC) != 0;
        swap_encrypt = atomic_load_relaxed(&uvm_swap_encrypt);

        /*
         * allocate a buf for the i/o.
         */

        KASSERT(curlwp != uvm.pagedaemon_lwp || (write && async));
        bp = getiobuf(swapdev_vp, curlwp != uvm.pagedaemon_lwp);
        if (bp == NULL) {
                uvm_aio_aiodone_pages(pps, npages, true, ENOMEM);
                return ENOMEM;
        }

        /*
         * convert starting drum slot to block number
         */

        startblk = btodb((uint64_t)startslot << PAGE_SHIFT);

        /*
         * first, map the pages into the kernel.
         */

        mapinflags = !write ?
                UVMPAGER_MAPIN_WAITOK|UVMPAGER_MAPIN_READ :
                UVMPAGER_MAPIN_WAITOK|UVMPAGER_MAPIN_WRITE;
        if (write && swap_encrypt)        /* need to encrypt in-place */
                mapinflags |= UVMPAGER_MAPIN_READ;
        kva = uvm_pagermapin(pps, npages, mapinflags);

        /*
         * encrypt writes in place if requested
         */

        if (write) do {
                struct swapdev *sdp;
                int i;

                /*
                 * Get the swapdev so we can discriminate on the
                 * encryption state.  There may or may not be an
                 * encryption key generated; we may or may not be asked
                 * to encrypt swap.
                 *
                 * 1. NO KEY, NO ENCRYPTION: Nothing to do.
                 *
                 * 2. NO KEY, BUT ENCRYPTION: Generate a key, encrypt,
                 *    and mark the slots encrypted.
                 *
                 * 3. KEY, BUT NO ENCRYPTION: The slots may already be
                 *    marked encrypted from a past life.  Mark them not
                 *    encrypted.
                 *
                 * 4. KEY, ENCRYPTION: Encrypt and mark the slots
                 *    encrypted.
                 */
                mutex_enter(&uvm_swap_data_lock);
                sdp = swapdrum_getsdp(startslot);
                if (!sdp->swd_encinit) {
                        if (!swap_encrypt) {
                                mutex_exit(&uvm_swap_data_lock);
                                break;
                        }
                        uvm_swap_genkey(sdp);
                }
                KASSERT(sdp->swd_encinit);
                mutex_exit(&uvm_swap_data_lock);

                for (i = 0; i < npages; i++) {
                        int s = startslot + i;
                        KDASSERT(swapdrum_sdp_is(s, sdp));
                        KASSERT(s >= sdp->swd_drumoffset);
                        s -= sdp->swd_drumoffset;
                        KASSERT(s < sdp->swd_drumsize);

                        if (swap_encrypt) {
                                uvm_swap_encryptpage(sdp,
                                    (void *)(kva + (vsize_t)i*PAGE_SIZE), s);
                                atomic_or_32(&sdp->swd_encmap[s/32],
                                    __BIT(s%32));
                        } else {
                                atomic_and_32(&sdp->swd_encmap[s/32],
                                    ~__BIT(s%32));
                        }
                }
        } while (0);

        /*
         * fill in the bp/sbp.   we currently route our i/o through
         * /dev/drum's vnode [swapdev_vp].
         */

        bp->b_cflags = BC_BUSY | BC_NOCACHE;
        bp->b_flags = (flags & (B_READ|B_ASYNC));
        bp->b_proc = &proc0;        /* XXX */
        bp->b_vnbufs.le_next = NOLIST;
        bp->b_data = (void *)kva;
        bp->b_blkno = startblk;
        bp->b_bufsize = bp->b_bcount = npages << PAGE_SHIFT;

        /*
         * bump v_numoutput (counter of number of active outputs).
         */

        if (write) {
                mutex_enter(swapdev_vp->v_interlock);
                swapdev_vp->v_numoutput++;
                mutex_exit(swapdev_vp->v_interlock);
        }

        /*
         * for async ops we must set up the iodone handler.
         */

        if (async) {
                bp->b_iodone = uvm_aio_aiodone;
                UVMHIST_LOG(pdhist, "doing async!", 0, 0, 0, 0);
                if (curlwp == uvm.pagedaemon_lwp)
                        BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
                else
                        BIO_SETPRIO(bp, BPRIO_TIMELIMITED);
        } else {
                bp->b_iodone = NULL;
                BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
        }
        UVMHIST_LOG(pdhist,
            "about to start io: data = %#jx blkno = %#jx, bcount = %jd",
            (uintptr_t)bp->b_data, bp->b_blkno, bp->b_bcount, 0);

        /*
         * now we start the I/O, and if async, return.
         */

        VOP_STRATEGY(swapdev_vp, bp);
        if (async) {
                /*
                 * Reads are always synchronous; if this changes, we
                 * need to add an asynchronous path for decryption.
                 */
                KASSERT(write);
                return 0;
        }

        /*
         * must be sync i/o.   wait for it to finish
         */

        error = biowait(bp);
        if (error)
                goto out;

        /*
         * decrypt reads in place if needed
         */

        if (!write) do {
                struct swapdev *sdp;
                bool encinit;
                int i;

                /*
                 * Get the sdp.  Everything about it except the encinit
                 * bit, saying whether the encryption key is
                 * initialized or not, and the encrypted bit for each
                 * page, is stable until all swap pages have been
                 * released and the device is removed.
                 */
                mutex_enter(&uvm_swap_data_lock);
                sdp = swapdrum_getsdp(startslot);
                encinit = sdp->swd_encinit;
                mutex_exit(&uvm_swap_data_lock);

                if (!encinit)
                        /*
                         * If there's no encryption key, there's no way
                         * any of these slots can be encrypted, so
                         * nothing to do here.
                         */
                        break;
                for (i = 0; i < npages; i++) {
                        int s = startslot + i;
                        KDASSERT(swapdrum_sdp_is(s, sdp));
                        KASSERT(s >= sdp->swd_drumoffset);
                        s -= sdp->swd_drumoffset;
                        KASSERT(s < sdp->swd_drumsize);
                        if ((atomic_load_relaxed(&sdp->swd_encmap[s/32]) &
                                __BIT(s%32)) == 0)
                                continue;
                        uvm_swap_decryptpage(sdp,
                            (void *)(kva + (vsize_t)i*PAGE_SIZE), s);
                }
        } while (0);
out:
        /*
         * kill the pager mapping
         */

        uvm_pagermapout(kva, npages);

        /*
         * now dispose of the buf and we're done.
         */

        if (write) {
                mutex_enter(swapdev_vp->v_interlock);
                vwakeup(bp);
                mutex_exit(swapdev_vp->v_interlock);
        }
        putiobuf(bp);
        UVMHIST_LOG(pdhist, "<- done (sync)  error=%jd", error, 0, 0, 0);

        return (error);
}

/*
 * uvm_swap_genkey(sdp)
 *
 *        Generate a key for swap encryption.
 */
static void
uvm_swap_genkey(struct swapdev *sdp)
{
        uint8_t key[32];

        KASSERT(!sdp->swd_encinit);

        cprng_strong(kern_cprng, key, sizeof key, 0);
        aes_setenckey256(&sdp->swd_enckey, key);
        aes_setdeckey256(&sdp->swd_deckey, key);
        explicit_memset(key, 0, sizeof key);

        sdp->swd_encinit = true;
}

/*
 * uvm_swap_encryptpage(sdp, kva, slot)
 *
 *        Encrypt one page of data at kva for the specified slot number
 *        in the swap device.
 */
static void
uvm_swap_encryptpage(struct swapdev *sdp, void *kva, int slot)
{
        uint8_t preiv[16] __aligned(16) = {0}, iv[16] __aligned(16);

        /* iv := AES_k(le32enc(slot) || 0^96) */
        le32enc(preiv, slot);
        aes_enc(&sdp->swd_enckey, (const void *)preiv, iv, AES_256_NROUNDS);

        /* *kva := AES-CBC_k(iv, *kva) */
        aes_cbc_enc(&sdp->swd_enckey, kva, kva, PAGE_SIZE, iv,
            AES_256_NROUNDS);

        explicit_memset(&iv, 0, sizeof iv);
}

/*
 * uvm_swap_decryptpage(sdp, kva, slot)
 *
 *        Decrypt one page of data at kva for the specified slot number
 *        in the swap device.
 */
static void
uvm_swap_decryptpage(struct swapdev *sdp, void *kva, int slot)
{
        uint8_t preiv[16] __aligned(16) = {0}, iv[16] __aligned(16);

        /* iv := AES_k(le32enc(slot) || 0^96) */
        le32enc(preiv, slot);
        aes_enc(&sdp->swd_enckey, (const void *)preiv, iv, AES_256_NROUNDS);

        /* *kva := AES-CBC^{-1}_k(iv, *kva) */
        aes_cbc_dec(&sdp->swd_deckey, kva, kva, PAGE_SIZE, iv,
            AES_256_NROUNDS);

        explicit_memset(&iv, 0, sizeof iv);
}

SYSCTL_SETUP(sysctl_uvmswap_setup, "sysctl uvmswap setup")
{

        sysctl_createv(clog, 0, NULL, NULL,
            CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_BOOL, "swap_encrypt",
            SYSCTL_DESCR("Encrypt data when swapped out to disk"),
            NULL, 0, &uvm_swap_encrypt, 0,
            CTL_VM, CTL_CREATE, CTL_EOL);
}









































































































































   38 
   38 





   38 










































  162 









































  162 


  163 


  163 



  163 

  163 
    4 



  162 
  160 

  163 




  163 


  163 
  163 

  160 
  160 
  159 

  159 
















































   16 






































   16 
   16 



   16 



   16 



   16 




   16 
    2 





   16 





   16 









   16 









   16 


    5 
    5 
    5 

    5 















    5 
    5 






   12 

   12 

































   12 


















   12 







   12 








   12 


   11 
   11 
   11 

   11 










   11 
   11 

   15 
















































  338 

  338 







  338 



  338 


  242 
  101 

  338 





  103 
   44 

  242 
  242 

  338 


  338 
  338 
  338 

































  103 

  103 









    2 


    2 
    2 
    2 
    2 



    2 
    2 
    2 
    2 



    2 









  101 









  101 



  101 
  101 



  101 
  101 
  101 







  101 
  101 
  101 
  101 
  101 
   45 
    1 

  101 
   98 
   98 
   98 
   60 





  101 
  101 




  101 




































































































  166 

  165 

















  165 


  166 
  165 
  158 






   11 

   11 



   11 

    6 
   11 
   10 

    3 
    3 
    3 






   29 



























  390 







   91 




   91 
   90 

   84 
   91 
  390 







    1 
    1 
    1 


    1 
































   16 

   16 

   12 



    6 
    6 



    6 
    6 

    6 


    6 
    6 
    6 
    6 
    6 







    5 
    2 

    5 

    2 
    5 

    5 
    5 



    5 

    5 



    2 

    2 
    2 














  166 






  166 





  166 

  166 
  166 




  166 
  166 

  166 


  166 
  166 
   49 


   49 







   12 
    3 





   49 







  133 
  133 
   81 
  133 

   63 




   63 

    1 



    1 
    1 
    1 



    1 





  131 









  130 
  130 
  129 





   68 
  129 

  130 



   63 

   63 
   63 
  165 

















   50 









   50 
   50 
   50 
   30 

   42 










   32 






   49 







   49 

   50 


   50 

   50 












   50 

   50 
   50 















  337 








  338 


  338 

  338 
  338 
  338 








  338 
  338 



  338 
  338 


  338 
    3 







  338 
  338 

  338 
    3 
    3 





    3 

    3 





    3 





  338 


  338 
  338 

  338 
  335 

  335 



  338 

  338 



  338 













  338 





  338 

  338 







  338 




  338 
    3 
    3 






    3 


    3 
    3 


  338 

  338 
  338 




  103 



  338 

    3 


  337 
  338 












































































































































   38 




   38 




   38 



   38 




   37 

   38 
   38 



























































































































































   38 




   38 



   38 
   38 






























































































































   38 



   38 


   38 
   38 
   38 
   38 

   19 









   19 

   19 

























   29 






   29 

   29 
   29 











    1 



    1 















   38 

   38 









   16 











   16 












   16 




   16 




   16 
   16 




   16 



   16 















































   16 



   16 


   16 
   16 
   16 







   16 

   16 

   16 
    9 
   16 




    8 



   16 

   16 





















































  164 








  109 

   61 
  164 
  163 







   39 














  164 
  164 





  164 




  164 
  163 
  131 





























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
/*        $NetBSD: ffs_alloc.c,v 1.171 2022/04/23 16:22:23 hannken Exp $        */

/*-
 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2002 Networks Associates Technology, Inc.
 * All rights reserved.
 *
 * This software was developed for the FreeBSD Project by Marshall
 * Kirk McKusick and Network Associates Laboratories, the Security
 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
 * research program
 *
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ffs_alloc.c        8.19 (Berkeley) 7/13/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.171 2022/04/23 16:22:23 hannken Exp $");

#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
#include "opt_quota.h"
#include "opt_uvm_page_trkown.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/cprng.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/syslog.h>
#include <sys/vnode.h>
#include <sys/wapbl.h>
#include <sys/cprng.h>

#include <miscfs/specfs/specdev.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ufs/ufs_bswap.h>
#include <ufs/ufs/ufs_wapbl.h>

#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>

#ifdef UVM_PAGE_TRKOWN
#include <uvm/uvm_object.h>
#include <uvm/uvm_page.h>
#endif

static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int, int, int);
static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t, int, int);
static ino_t ffs_dirpref(struct inode *);
static daddr_t ffs_fragextend(struct inode *, int, daddr_t, int, int);
static void ffs_fserr(struct fs *, kauth_cred_t, const char *);
static daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int, int, int,
    daddr_t (*)(struct inode *, int, daddr_t, int, int, int));
static daddr_t ffs_nodealloccg(struct inode *, int, daddr_t, int, int, int);
static int32_t ffs_mapsearch(struct fs *, struct cg *,
                                      daddr_t, int);
static void ffs_blkfree_common(struct ufsmount *, struct fs *, dev_t, struct buf *,
    daddr_t, long, bool);
static void ffs_freefile_common(struct ufsmount *, struct fs *, dev_t, struct buf *, ino_t,
    int, bool);

/* if 1, changes in optimalization strategy are logged */
int ffs_log_changeopt = 0;

/* in ffs_tables.c */
extern const int inside[], around[];
extern const u_char * const fragtbl[];

/* Basic consistency check for block allocations */
static int
ffs_check_bad_allocation(const char *func, struct fs *fs, daddr_t bno,
    long size, dev_t dev, ino_t inum)
{
        if ((u_int)size > fs->fs_bsize || ffs_fragoff(fs, size) != 0 ||
            ffs_fragnum(fs, bno) + ffs_numfrags(fs, size) > fs->fs_frag) {
                panic("%s: bad size: dev = 0x%llx, bno = %" PRId64 
                    " bsize = %d, size = %ld, fs = %s", func,
                    (long long)dev, bno, fs->fs_bsize, size, fs->fs_fsmnt);
        }

        if (bno >= fs->fs_size) {
                printf("%s: bad block %" PRId64 ", ino %llu\n", func, bno,
                    (unsigned long long)inum);
                ffs_fserr(fs, NOCRED, "bad block");
                return EINVAL;
        }
        return 0;
}

/*
 * Allocate a block in the file system.
 *
 * The size of the requested block is given, which must be some
 * multiple of fs_fsize and <= fs_bsize.
 * A preference may be optionally specified. If a preference is given
 * the following hierarchy is used to allocate a block:
 *   1) allocate the requested block.
 *   2) allocate a rotationally optimal block in the same cylinder.
 *   3) allocate a block in the same cylinder group.
 *   4) quadradically rehash into other cylinder groups, until an
 *      available block is located.
 * If no block preference is given the following hierarchy is used
 * to allocate a block:
 *   1) allocate a block in the cylinder group that contains the
 *      inode for the file.
 *   2) quadradically rehash into other cylinder groups, until an
 *      available block is located.
 *
 * => called with um_lock held
 * => releases um_lock before returning
 */
int
ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size,
    int flags, kauth_cred_t cred, daddr_t *bnp)
{
        struct ufsmount *ump;
        struct fs *fs;
        daddr_t bno;
        int cg;
#if defined(QUOTA) || defined(QUOTA2)
        int error;
#endif

        fs = ip->i_fs;
        ump = ip->i_ump;

        KASSERT(mutex_owned(&ump->um_lock));

#ifdef UVM_PAGE_TRKOWN

        /*
         * Sanity-check that allocations within the file size
         * do not allow other threads to read the stale contents
         * of newly allocated blocks.
         * Usually pages will exist to cover the new allocation.
         * There is an optimization in ffs_write() where we skip
         * creating pages if several conditions are met:
         *  - the file must not be mapped (in any user address space).
         *  - the write must cover whole pages and whole blocks.
         * If those conditions are not met then pages must exist and
         * be locked by the current thread.
         */

        struct vnode *vp = ITOV(ip);
        if (vp->v_type == VREG && (flags & IO_EXT) == 0 &&
            ffs_lblktosize(fs, (voff_t)lbn) < round_page(vp->v_size) &&
            ((vp->v_vflag & VV_MAPPED) != 0 || (size & PAGE_MASK) != 0 ||
             ffs_blkoff(fs, size) != 0)) {
                struct vm_page *pg __diagused;
                struct uvm_object *uobj = &vp->v_uobj;
                voff_t off = trunc_page(ffs_lblktosize(fs, lbn));
                voff_t endoff = round_page(ffs_lblktosize(fs, lbn) + size);

                rw_enter(uobj->vmobjlock, RW_WRITER);
                while (off < endoff) {
                        pg = uvm_pagelookup(uobj, off);
                        KASSERT((pg != NULL && pg->owner_tag != NULL &&
                                 pg->owner == curproc->p_pid &&
                                 pg->lowner == curlwp->l_lid));
                        off += PAGE_SIZE;
                }
                rw_exit(uobj->vmobjlock);
        }
#endif

        *bnp = 0;

        KASSERTMSG((cred != NOCRED), "missing credential");
        KASSERTMSG(((u_int)size <= fs->fs_bsize),
            "bad size: dev = 0x%llx, bsize = %d, size = %d, fs = %s",
            (unsigned long long)ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
        KASSERTMSG((ffs_fragoff(fs, size) == 0),
            "bad size: dev = 0x%llx, bsize = %d, size = %d, fs = %s",
            (unsigned long long)ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);

        if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
                goto nospace;
        if (freespace(fs, fs->fs_minfree) <= 0 &&
            kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL,
            NULL, NULL) != 0)
                goto nospace;
#if defined(QUOTA) || defined(QUOTA2)
        mutex_exit(&ump->um_lock);
        if ((error = chkdq(ip, btodb(size), cred, 0)) != 0)
                return (error);
        mutex_enter(&ump->um_lock);
#endif

        if (bpref >= fs->fs_size)
                bpref = 0;
        if (bpref == 0)
                cg = ino_to_cg(fs, ip->i_number);
        else
                cg = dtog(fs, bpref);
        bno = ffs_hashalloc(ip, cg, bpref, size, 0, flags, ffs_alloccg);
        if (bno > 0) {
                DIP_ADD(ip, blocks, btodb(size));
                if (flags & IO_EXT)
                        ip->i_flag |= IN_CHANGE;
                else
                        ip->i_flag |= IN_CHANGE | IN_UPDATE;
                *bnp = bno;
                return (0);
        }
#if defined(QUOTA) || defined(QUOTA2)
        /*
         * Restore user's disk quota because allocation failed.
         */
        (void) chkdq(ip, -btodb(size), cred, FORCE);
#endif
        if (flags & B_CONTIG) {
                /*
                 * XXX ump->um_lock handling is "suspect" at best.
                 * For the case where ffs_hashalloc() fails early
                 * in the B_CONTIG case we reach here with um_lock
                 * already unlocked, so we can't release it again
                 * like in the normal error path.  See kern/39206.
                 *
                 *
                 * Fail silently - it's up to our caller to report
                 * errors.
                 */
                return (ENOSPC);
        }
nospace:
        mutex_exit(&ump->um_lock);
        ffs_fserr(fs, cred, "file system full");
        uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
        return (ENOSPC);
}

/*
 * Reallocate a fragment to a bigger size
 *
 * The number and size of the old block is given, and a preference
 * and new size is also specified. The allocator attempts to extend
 * the original block. Failing that, the regular block allocator is
 * invoked to get an appropriate block.
 *
 * => called with um_lock held
 * => return with um_lock released
 */
int
ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bprev, daddr_t bpref,
    int osize, int nsize, int flags, kauth_cred_t cred, struct buf **bpp,
    daddr_t *blknop)
{
        struct ufsmount *ump;
        struct fs *fs;
        struct buf *bp;
        int cg, request, error;
        daddr_t bno;

        fs = ip->i_fs;
        ump = ip->i_ump;

        KASSERT(mutex_owned(&ump->um_lock));

#ifdef UVM_PAGE_TRKOWN

        /*
         * Sanity-check that allocations within the file size
         * do not allow other threads to read the stale contents
         * of newly allocated blocks.
         * Unlike in ffs_alloc(), here pages must always exist
         * for such allocations, because only the last block of a file
         * can be a fragment and ffs_write() will reallocate the
         * fragment to the new size using ufs_balloc_range(),
         * which always creates pages to cover blocks it allocates.
         */

        if (ITOV(ip)->v_type == VREG) {
                struct vm_page *pg __diagused;
                struct uvm_object *uobj = &ITOV(ip)->v_uobj;
                voff_t off = trunc_page(ffs_lblktosize(fs, lbprev));
                voff_t endoff = round_page(ffs_lblktosize(fs, lbprev) + osize);

                rw_enter(uobj->vmobjlock, RW_WRITER);
                while (off < endoff) {
                        pg = uvm_pagelookup(uobj, off);
                        KASSERT(pg->owner == curproc->p_pid &&
                                pg->lowner == curlwp->l_lid);
                        off += PAGE_SIZE;
                }
                rw_exit(uobj->vmobjlock);
        }
#endif

        KASSERTMSG((cred != NOCRED), "missing credential");
        KASSERTMSG(((u_int)osize <= fs->fs_bsize),
            "bad size: dev=0x%llx, bsize=%d, osize=%d, nsize=%d, fs=%s",
            (unsigned long long)ip->i_dev, fs->fs_bsize, osize, nsize,
            fs->fs_fsmnt);
        KASSERTMSG((ffs_fragoff(fs, osize) == 0),
            "bad size: dev=0x%llx, bsize=%d, osize=%d, nsize=%d, fs=%s",
            (unsigned long long)ip->i_dev, fs->fs_bsize, osize, nsize,
            fs->fs_fsmnt);
        KASSERTMSG(((u_int)nsize <= fs->fs_bsize),
            "bad size: dev=0x%llx, bsize=%d, osize=%d, nsize=%d, fs=%s",
            (unsigned long long)ip->i_dev, fs->fs_bsize, osize, nsize,
            fs->fs_fsmnt);
        KASSERTMSG((ffs_fragoff(fs, nsize) == 0),
            "bad size: dev=0x%llx, bsize=%d, osize=%d, nsize=%d, fs=%s",
            (unsigned long long)ip->i_dev, fs->fs_bsize, osize, nsize,
            fs->fs_fsmnt);

        if (freespace(fs, fs->fs_minfree) <= 0 &&
            kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL,
            NULL, NULL) != 0) {
                mutex_exit(&ump->um_lock);
                goto nospace;
        }

        if (bprev == 0) {
                panic("%s: bad bprev: dev = 0x%llx, bsize = %d, bprev = %"
                    PRId64 ", fs = %s", __func__,
                    (unsigned long long)ip->i_dev, fs->fs_bsize, bprev,
                    fs->fs_fsmnt);
        }
        mutex_exit(&ump->um_lock);

        /*
         * Allocate the extra space in the buffer.
         */
        if (bpp != NULL &&
            (error = bread(ITOV(ip), lbprev, osize, 0, &bp)) != 0) {
                return (error);
        }
#if defined(QUOTA) || defined(QUOTA2)
        if ((error = chkdq(ip, btodb(nsize - osize), cred, 0)) != 0) {
                if (bpp != NULL) {
                        brelse(bp, 0);
                }
                return (error);
        }
#endif
        /*
         * Check for extension in the existing location.
         */
        cg = dtog(fs, bprev);
        mutex_enter(&ump->um_lock);
        if ((bno = ffs_fragextend(ip, cg, bprev, osize, nsize)) != 0) {
                DIP_ADD(ip, blocks, btodb(nsize - osize));
                if (flags & IO_EXT)
                        ip->i_flag |= IN_CHANGE;
                else
                        ip->i_flag |= IN_CHANGE | IN_UPDATE;

                if (bpp != NULL) {
                        if (bp->b_blkno != FFS_FSBTODB(fs, bno)) {
                                panic("%s: bad blockno %#llx != %#llx",
                                    __func__, (unsigned long long) bp->b_blkno,
                                    (unsigned long long)FFS_FSBTODB(fs, bno));
                        }
                        allocbuf(bp, nsize, 1);
                        memset((char *)bp->b_data + osize, 0, nsize - osize);
                        mutex_enter(bp->b_objlock);
                        KASSERT(!cv_has_waiters(&bp->b_done));
                        bp->b_oflags |= BO_DONE;
                        mutex_exit(bp->b_objlock);
                        *bpp = bp;
                }
                if (blknop != NULL) {
                        *blknop = bno;
                }
                return (0);
        }
        /*
         * Allocate a new disk location.
         */
        if (bpref >= fs->fs_size)
                bpref = 0;
        switch ((int)fs->fs_optim) {
        case FS_OPTSPACE:
                /*
                 * Allocate an exact sized fragment. Although this makes
                 * best use of space, we will waste time relocating it if
                 * the file continues to grow. If the fragmentation is
                 * less than half of the minimum free reserve, we choose
                 * to begin optimizing for time.
                 */
                request = nsize;
                if (fs->fs_minfree < 5 ||
                    fs->fs_cstotal.cs_nffree >
                    fs->fs_dsize * fs->fs_minfree / (2 * 100))
                        break;

                if (ffs_log_changeopt) {
                        log(LOG_NOTICE,
                                "%s: optimization changed from SPACE to TIME\n",
                                fs->fs_fsmnt);
                }

                fs->fs_optim = FS_OPTTIME;
                break;
        case FS_OPTTIME:
                /*
                 * At this point we have discovered a file that is trying to
                 * grow a small fragment to a larger fragment. To save time,
                 * we allocate a full sized block, then free the unused portion.
                 * If the file continues to grow, the `ffs_fragextend' call
                 * above will be able to grow it in place without further
                 * copying. If aberrant programs cause disk fragmentation to
                 * grow within 2% of the free reserve, we choose to begin
                 * optimizing for space.
                 */
                request = fs->fs_bsize;
                if (fs->fs_cstotal.cs_nffree <
                    fs->fs_dsize * (fs->fs_minfree - 2) / 100)
                        break;

                if (ffs_log_changeopt) {
                        log(LOG_NOTICE,
                                "%s: optimization changed from TIME to SPACE\n",
                                fs->fs_fsmnt);
                }

                fs->fs_optim = FS_OPTSPACE;
                break;
        default:
                panic("%s: bad optim: dev = 0x%llx, optim = %d, fs = %s",
                    __func__, (unsigned long long)ip->i_dev, fs->fs_optim,
                    fs->fs_fsmnt);
                /* NOTREACHED */
        }
        bno = ffs_hashalloc(ip, cg, bpref, request, nsize, 0, ffs_alloccg);
        if (bno > 0) {
                /*
                 * Use forced deallocation registration, we can't handle
                 * failure here. This is safe, as this place is ever hit
                 * maximum once per write operation, when fragment is extended
                 * to longer fragment, or a full block.
                 */
                if ((ip->i_ump->um_mountp->mnt_wapbl) &&
                    (ITOV(ip)->v_type != VREG)) {
                        /* this should never fail */
                        error = UFS_WAPBL_REGISTER_DEALLOCATION_FORCE(
                            ip->i_ump->um_mountp, FFS_FSBTODB(fs, bprev),
                            osize);
                        if (error)
                                panic("ffs_realloccg: dealloc registration failed");
                } else {
                        ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize,
                            ip->i_number);
                }
                DIP_ADD(ip, blocks, btodb(nsize - osize));
                if (flags & IO_EXT)
                        ip->i_flag |= IN_CHANGE;
                else
                        ip->i_flag |= IN_CHANGE | IN_UPDATE;
                if (bpp != NULL) {
                        bp->b_blkno = FFS_FSBTODB(fs, bno);
                        allocbuf(bp, nsize, 1);
                        memset((char *)bp->b_data + osize, 0, (u_int)nsize - osize);
                        mutex_enter(bp->b_objlock);
                        KASSERT(!cv_has_waiters(&bp->b_done));
                        bp->b_oflags |= BO_DONE;
                        mutex_exit(bp->b_objlock);
                        *bpp = bp;
                }
                if (blknop != NULL) {
                        *blknop = bno;
                }
                return (0);
        }
        mutex_exit(&ump->um_lock);

#if defined(QUOTA) || defined(QUOTA2)
        /*
         * Restore user's disk quota because allocation failed.
         */
        (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE);
#endif
        if (bpp != NULL) {
                brelse(bp, 0);
        }

nospace:
        /*
         * no space available
         */
        ffs_fserr(fs, cred, "file system full");
        uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
        return (ENOSPC);
}

/*
 * Allocate an inode in the file system.
 *
 * If allocating a directory, use ffs_dirpref to select the inode.
 * If allocating in a directory, the following hierarchy is followed:
 *   1) allocate the preferred inode.
 *   2) allocate an inode in the same cylinder group.
 *   3) quadradically rehash into other cylinder groups, until an
 *      available inode is located.
 * If no inode preference is given the following hierarchy is used
 * to allocate an inode:
 *   1) allocate an inode in cylinder group 0.
 *   2) quadradically rehash into other cylinder groups, until an
 *      available inode is located.
 *
 * => um_lock not held upon entry or return
 */
int
ffs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, ino_t *inop)
{
        struct ufsmount *ump;
        struct inode *pip;
        struct fs *fs;
        ino_t ino, ipref;
        int cg, error;

        UFS_WAPBL_JUNLOCK_ASSERT(pvp->v_mount);

        pip = VTOI(pvp);
        fs = pip->i_fs;
        ump = pip->i_ump;

        error = UFS_WAPBL_BEGIN(pvp->v_mount);
        if (error) {
                return error;
        }
        mutex_enter(&ump->um_lock);
        if (fs->fs_cstotal.cs_nifree == 0)
                goto noinodes;

        if ((mode & IFMT) == IFDIR)
                ipref = ffs_dirpref(pip);
        else
                ipref = pip->i_number;
        if (ipref >= fs->fs_ncg * fs->fs_ipg)
                ipref = 0;
        cg = ino_to_cg(fs, ipref);
        /*
         * Track number of dirs created one after another
         * in a same cg without intervening by files.
         */
        if ((mode & IFMT) == IFDIR) {
                if (fs->fs_contigdirs[cg] < 255)
                        fs->fs_contigdirs[cg]++;
        } else {
                if (fs->fs_contigdirs[cg] > 0)
                        fs->fs_contigdirs[cg]--;
        }
        ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 0, 0, ffs_nodealloccg);
        if (ino == 0)
                goto noinodes;
        UFS_WAPBL_END(pvp->v_mount);
        *inop = ino;
        return 0;

noinodes:
        mutex_exit(&ump->um_lock);
        UFS_WAPBL_END(pvp->v_mount);
        ffs_fserr(fs, cred, "out of inodes");
        uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
        return ENOSPC;
}

/*
 * Find a cylinder group in which to place a directory.
 *
 * The policy implemented by this algorithm is to allocate a
 * directory inode in the same cylinder group as its parent
 * directory, but also to reserve space for its files inodes
 * and data. Restrict the number of directories which may be
 * allocated one after another in the same cylinder group
 * without intervening allocation of files.
 *
 * If we allocate a first level directory then force allocation
 * in another cylinder group.
 */
static ino_t
ffs_dirpref(struct inode *pip)
{
        register struct fs *fs;
        int cg, prefcg;
        int64_t dirsize, cgsize, curdsz;
        int avgifree, avgbfree, avgndir;
        int minifree, minbfree, maxndir;
        int mincg, minndir;
        int maxcontigdirs;

        KASSERT(mutex_owned(&pip->i_ump->um_lock));

        fs = pip->i_fs;

        avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
        avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
        avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;

        /*
         * Force allocation in another cg if creating a first level dir.
         */
        if (ITOV(pip)->v_vflag & VV_ROOT) {
                prefcg = cprng_fast32() % fs->fs_ncg;
                mincg = prefcg;
                minndir = fs->fs_ipg;
                for (cg = prefcg; cg < fs->fs_ncg; cg++)
                        if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
                            fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
                            fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
                                mincg = cg;
                                minndir = fs->fs_cs(fs, cg).cs_ndir;
                        }
                for (cg = 0; cg < prefcg; cg++)
                        if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
                            fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
                            fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
                                mincg = cg;
                                minndir = fs->fs_cs(fs, cg).cs_ndir;
                        }
                return ((ino_t)(fs->fs_ipg * mincg));
        }

        /*
         * Count various limits which used for
         * optimal allocation of a directory inode.
         * Try cylinder groups with >75% avgifree and avgbfree.
         * Avoid cylinder groups with no free blocks or inodes as that
         * triggers an I/O-expensive cylinder group scan.
         */
        maxndir = uimin(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
        minifree = avgifree - avgifree / 4;
        if (minifree < 1)
                minifree = 1;
        minbfree = avgbfree - avgbfree / 4;
        if (minbfree < 1)
                minbfree = 1;
        cgsize = (int64_t)fs->fs_fsize * fs->fs_fpg;
        dirsize = (int64_t)fs->fs_avgfilesize * fs->fs_avgfpdir;
        if (avgndir != 0) {
                curdsz = (cgsize - (int64_t)avgbfree * fs->fs_bsize) / avgndir;
                if (dirsize < curdsz)
                        dirsize = curdsz;
        }
        if (cgsize < dirsize * 255)
                maxcontigdirs = (avgbfree * fs->fs_bsize) / dirsize;
        else
                maxcontigdirs = 255;
        if (fs->fs_avgfpdir > 0)
                maxcontigdirs = uimin(maxcontigdirs,
                                    fs->fs_ipg / fs->fs_avgfpdir);
        if (maxcontigdirs == 0)
                maxcontigdirs = 1;

        /*
         * Limit number of dirs in one cg and reserve space for
         * regular files, but only if we have no deficit in
         * inodes or space.
         */
        prefcg = ino_to_cg(fs, pip->i_number);
        for (cg = prefcg; cg < fs->fs_ncg; cg++)
                if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
                    fs->fs_cs(fs, cg).cs_nifree >= minifree &&
                        fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
                        if (fs->fs_contigdirs[cg] < maxcontigdirs)
                                return ((ino_t)(fs->fs_ipg * cg));
                }
        for (cg = 0; cg < prefcg; cg++)
                if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
                    fs->fs_cs(fs, cg).cs_nifree >= minifree &&
                        fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
                        if (fs->fs_contigdirs[cg] < maxcontigdirs)
                                return ((ino_t)(fs->fs_ipg * cg));
                }
        /*
         * This is a backstop when we are deficient in space.
         */
        for (cg = prefcg; cg < fs->fs_ncg; cg++)
                if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
                        return ((ino_t)(fs->fs_ipg * cg));
        for (cg = 0; cg < prefcg; cg++)
                if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
                        break;
        return ((ino_t)(fs->fs_ipg * cg));
}

/*
 * Select the desired position for the next block in a file.  The file is
 * logically divided into sections. The first section is composed of the
 * direct blocks. Each additional section contains fs_maxbpg blocks.
 *
 * If no blocks have been allocated in the first section, the policy is to
 * request a block in the same cylinder group as the inode that describes
 * the file. If no blocks have been allocated in any other section, the
 * policy is to place the section in a cylinder group with a greater than
 * average number of free blocks.  An appropriate cylinder group is found
 * by using a rotor that sweeps the cylinder groups. When a new group of
 * blocks is needed, the sweep begins in the cylinder group following the
 * cylinder group from which the previous allocation was made. The sweep
 * continues until a cylinder group with greater than the average number
 * of free blocks is found. If the allocation is for the first block in an
 * indirect block, the information on the previous allocation is unavailable;
 * here a best guess is made based upon the logical block number being
 * allocated.
 *
 * If a section is already partially allocated, the policy is to
 * contiguously allocate fs_maxcontig blocks.  The end of one of these
 * contiguous blocks and the beginning of the next is laid out
 * contigously if possible.
 *
 * => um_lock held on entry and exit
 */
daddr_t
ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx, int flags,
    int32_t *bap /* XXX ondisk32 */)
{
        struct fs *fs;
        int cg;
        int avgbfree, startcg;

        KASSERT(mutex_owned(&ip->i_ump->um_lock));

        fs = ip->i_fs;

        /*
         * If allocating a contiguous file with B_CONTIG, use the hints
         * in the inode extensions to return the desired block.
         *
         * For metadata (indirect blocks) return the address of where
         * the first indirect block resides - we'll scan for the next
         * available slot if we need to allocate more than one indirect
         * block.  For data, return the address of the actual block
         * relative to the address of the first data block.
         */
        if (flags & B_CONTIG) {
                KASSERT(ip->i_ffs_first_data_blk != 0);
                KASSERT(ip->i_ffs_first_indir_blk != 0);
                if (flags & B_METAONLY)
                        return ip->i_ffs_first_indir_blk;
                else
                        return ip->i_ffs_first_data_blk + ffs_blkstofrags(fs, lbn);
        }

        if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
                if (lbn < UFS_NDADDR + FFS_NINDIR(fs)) {
                        cg = ino_to_cg(fs, ip->i_number);
                        return (cgbase(fs, cg) + fs->fs_frag);
                }
                /*
                 * Find a cylinder with greater than average number of
                 * unused data blocks.
                 */
                if (indx == 0 || bap[indx - 1] == 0)
                        startcg =
                            ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
                else
                        startcg = dtog(fs,
                                ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
                startcg %= fs->fs_ncg;
                avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
                for (cg = startcg; cg < fs->fs_ncg; cg++)
                        if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
                                return (cgbase(fs, cg) + fs->fs_frag);
                        }
                for (cg = 0; cg < startcg; cg++)
                        if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
                                return (cgbase(fs, cg) + fs->fs_frag);
                        }
                return (0);
        }
        /*
         * We just always try to lay things out contiguously.
         */
        return ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
}

daddr_t
ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int flags,
    int64_t *bap)
{
        struct fs *fs;
        int cg;
        int avgbfree, startcg;

        KASSERT(mutex_owned(&ip->i_ump->um_lock));

        fs = ip->i_fs;

        /*
         * If allocating a contiguous file with B_CONTIG, use the hints
         * in the inode extensions to return the desired block.
         *
         * For metadata (indirect blocks) return the address of where
         * the first indirect block resides - we'll scan for the next
         * available slot if we need to allocate more than one indirect
         * block.  For data, return the address of the actual block
         * relative to the address of the first data block.
         */
        if (flags & B_CONTIG) {
                KASSERT(ip->i_ffs_first_data_blk != 0);
                KASSERT(ip->i_ffs_first_indir_blk != 0);
                if (flags & B_METAONLY)
                        return ip->i_ffs_first_indir_blk;
                else
                        return ip->i_ffs_first_data_blk + ffs_blkstofrags(fs, lbn);
        }

        if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
                if (lbn < UFS_NDADDR + FFS_NINDIR(fs)) {
                        cg = ino_to_cg(fs, ip->i_number);
                        return (cgbase(fs, cg) + fs->fs_frag);
                }
                /*
                 * Find a cylinder with greater than average number of
                 * unused data blocks.
                 */
                if (indx == 0 || bap[indx - 1] == 0)
                        startcg =
                            ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
                else
                        startcg = dtog(fs,
                                ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
                startcg %= fs->fs_ncg;
                avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
                for (cg = startcg; cg < fs->fs_ncg; cg++)
                        if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
                                return (cgbase(fs, cg) + fs->fs_frag);
                        }
                for (cg = 0; cg < startcg; cg++)
                        if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
                                return (cgbase(fs, cg) + fs->fs_frag);
                        }
                return (0);
        }
        /*
         * We just always try to lay things out contiguously.
         */
        return ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
}


/*
 * Implement the cylinder overflow algorithm.
 *
 * The policy implemented by this algorithm is:
 *   1) allocate the block in its requested cylinder group.
 *   2) quadradically rehash on the cylinder group number.
 *   3) brute force search for a free block.
 *
 * => called with um_lock held
 * => returns with um_lock released on success, held on failure
 *    (*allocator releases lock on success, retains lock on failure)
 */
/*VARARGS5*/
static daddr_t
ffs_hashalloc(struct inode *ip, int cg, daddr_t pref,
    int size /* size for data blocks, mode for inodes */,
    int realsize,
    int flags,
    daddr_t (*allocator)(struct inode *, int, daddr_t, int, int, int))
{
        struct fs *fs;
        daddr_t result;
        int i, icg = cg;

        fs = ip->i_fs;
        /*
         * 1: preferred cylinder group
         */
        result = (*allocator)(ip, cg, pref, size, realsize, flags);
        if (result)
                return (result);

        if (flags & B_CONTIG)
                return (result);
        /*
         * 2: quadratic rehash
         */
        for (i = 1; i < fs->fs_ncg; i *= 2) {
                cg += i;
                if (cg >= fs->fs_ncg)
                        cg -= fs->fs_ncg;
                result = (*allocator)(ip, cg, 0, size, realsize, flags);
                if (result)
                        return (result);
        }
        /*
         * 3: brute force search
         * Note that we start at i == 2, since 0 was checked initially,
         * and 1 is always checked in the quadratic rehash.
         */
        cg = (icg + 2) % fs->fs_ncg;
        for (i = 2; i < fs->fs_ncg; i++) {
                result = (*allocator)(ip, cg, 0, size, realsize, flags);
                if (result)
                        return (result);
                cg++;
                if (cg == fs->fs_ncg)
                        cg = 0;
        }
        return (0);
}

/*
 * Determine whether a fragment can be extended.
 *
 * Check to see if the necessary fragments are available, and
 * if they are, allocate them.
 *
 * => called with um_lock held
 * => returns with um_lock released on success, held on failure
 */
static daddr_t
ffs_fragextend(struct inode *ip, int cg, daddr_t bprev, int osize, int nsize)
{
        struct ufsmount *ump;
        struct fs *fs;
        struct cg *cgp;
        struct buf *bp;
        daddr_t bno;
        int frags, bbase;
        int i, error;
        u_int8_t *blksfree;

        fs = ip->i_fs;
        ump = ip->i_ump;

        KASSERT(mutex_owned(&ump->um_lock));

        if (fs->fs_cs(fs, cg).cs_nffree < ffs_numfrags(fs, nsize - osize))
                return (0);
        frags = ffs_numfrags(fs, nsize);
        bbase = ffs_fragnum(fs, bprev);
        if (bbase > ffs_fragnum(fs, (bprev + frags - 1))) {
                /* cannot extend across a block boundary */
                return (0);
        }
        mutex_exit(&ump->um_lock);
        error = bread(ip->i_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)),
                (int)fs->fs_cgsize, B_MODIFY, &bp);
        if (error)
                goto fail;
        cgp = (struct cg *)bp->b_data;
        if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs)))
                goto fail;
        cgp->cg_old_time = ufs_rw32(time_second, UFS_FSNEEDSWAP(fs));
        if ((fs->fs_magic != FS_UFS1_MAGIC) ||
            (fs->fs_old_flags & FS_FLAGS_UPDATED))
                cgp->cg_time = ufs_rw64(time_second, UFS_FSNEEDSWAP(fs));
        bno = dtogd(fs, bprev);
        blksfree = cg_blksfree(cgp, UFS_FSNEEDSWAP(fs));
        for (i = ffs_numfrags(fs, osize); i < frags; i++)
                if (isclr(blksfree, bno + i))
                        goto fail;
        /*
         * the current fragment can be extended
         * deduct the count on fragment being extended into
         * increase the count on the remaining fragment (if any)
         * allocate the extended piece
         */
        for (i = frags; i < fs->fs_frag - bbase; i++)
                if (isclr(blksfree, bno + i))
                        break;
        ufs_add32(cgp->cg_frsum[i - ffs_numfrags(fs, osize)], -1, UFS_FSNEEDSWAP(fs));
        if (i != frags)
                ufs_add32(cgp->cg_frsum[i - frags], 1, UFS_FSNEEDSWAP(fs));
        mutex_enter(&ump->um_lock);
        for (i = ffs_numfrags(fs, osize); i < frags; i++) {
                clrbit(blksfree, bno + i);
                ufs_add32(cgp->cg_cs.cs_nffree, -1, UFS_FSNEEDSWAP(fs));
                fs->fs_cstotal.cs_nffree--;
                fs->fs_cs(fs, cg).cs_nffree--;
        }
        fs->fs_fmod = 1;
        ACTIVECG_CLR(fs, cg);
        mutex_exit(&ump->um_lock);
        bdwrite(bp);
        return (bprev);

 fail:
         if (bp != NULL)
                brelse(bp, 0);
         mutex_enter(&ump->um_lock);
         return (0);
}

/*
 * Determine whether a block can be allocated.
 *
 * Check to see if a block of the appropriate size is available,
 * and if it is, allocate it.
 */
static daddr_t
ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size, int realsize,
    int flags)
{
        struct ufsmount *ump;
        struct fs *fs = ip->i_fs;
        struct cg *cgp;
        struct buf *bp;
        int32_t bno;
        daddr_t blkno;
        int error, frags, allocsiz, i;
        u_int8_t *blksfree;
        const int needswap = UFS_FSNEEDSWAP(fs);

        ump = ip->i_ump;

        KASSERT(mutex_owned(&ump->um_lock));

        if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
                return (0);
        mutex_exit(&ump->um_lock);
        error = bread(ip->i_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)),
                (int)fs->fs_cgsize, B_MODIFY, &bp);
        if (error)
                goto fail;
        cgp = (struct cg *)bp->b_data;
        if (!cg_chkmagic(cgp, needswap) ||
            (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize))
                goto fail;
        cgp->cg_old_time = ufs_rw32(time_second, needswap);
        if ((fs->fs_magic != FS_UFS1_MAGIC) ||
            (fs->fs_old_flags & FS_FLAGS_UPDATED))
                cgp->cg_time = ufs_rw64(time_second, needswap);
        if (size == fs->fs_bsize) {
                mutex_enter(&ump->um_lock);
                blkno = ffs_alloccgblk(ip, bp, bpref, realsize, flags);
                ACTIVECG_CLR(fs, cg);
                mutex_exit(&ump->um_lock);

                /*
                 * If actually needed size is lower, free the extra blocks now.
                 * This is safe to call here, there is no outside reference
                 * to this block yet. It is not necessary to keep um_lock
                 * locked.
                 */
                if (realsize != 0 && realsize < size) {
                        ffs_blkfree_common(ip->i_ump, ip->i_fs,
                            ip->i_devvp->v_rdev,
                            bp, blkno + ffs_numfrags(fs, realsize),
                            (long)(size - realsize), false);
                }

                bdwrite(bp);
                return (blkno);
        }
        /*
         * check to see if any fragments are already available
         * allocsiz is the size which will be allocated, hacking
         * it down to a smaller size if necessary
         */
        blksfree = cg_blksfree(cgp, needswap);
        frags = ffs_numfrags(fs, size);
        for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
                if (cgp->cg_frsum[allocsiz] != 0)
                        break;
        if (allocsiz == fs->fs_frag) {
                /*
                 * no fragments were available, so a block will be
                 * allocated, and hacked up
                 */
                if (cgp->cg_cs.cs_nbfree == 0)
                        goto fail;
                mutex_enter(&ump->um_lock);
                blkno = ffs_alloccgblk(ip, bp, bpref, realsize, flags);
                bno = dtogd(fs, blkno);
                for (i = frags; i < fs->fs_frag; i++)
                        setbit(blksfree, bno + i);
                i = fs->fs_frag - frags;
                ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
                fs->fs_cstotal.cs_nffree += i;
                fs->fs_cs(fs, cg).cs_nffree += i;
                fs->fs_fmod = 1;
                ufs_add32(cgp->cg_frsum[i], 1, needswap);
                ACTIVECG_CLR(fs, cg);
                mutex_exit(&ump->um_lock);
                bdwrite(bp);
                return (blkno);
        }
        bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
#if 0
        /*
         * XXX fvdl mapsearch will panic, and never return -1
         *          also: returning NULL as daddr_t ?
         */
        if (bno < 0)
                goto fail;
#endif
        for (i = 0; i < frags; i++)
                clrbit(blksfree, bno + i);
        mutex_enter(&ump->um_lock);
        ufs_add32(cgp->cg_cs.cs_nffree, -frags, needswap);
        fs->fs_cstotal.cs_nffree -= frags;
        fs->fs_cs(fs, cg).cs_nffree -= frags;
        fs->fs_fmod = 1;
        ufs_add32(cgp->cg_frsum[allocsiz], -1, needswap);
        if (frags != allocsiz)
                ufs_add32(cgp->cg_frsum[allocsiz - frags], 1, needswap);
        blkno = cgbase(fs, cg) + bno;
        ACTIVECG_CLR(fs, cg);
        mutex_exit(&ump->um_lock);
        bdwrite(bp);
        return blkno;

 fail:
         if (bp != NULL)
                brelse(bp, 0);
         mutex_enter(&ump->um_lock);
         return (0);
}

/*
 * Allocate a block in a cylinder group.
 *
 * This algorithm implements the following policy:
 *   1) allocate the requested block.
 *   2) allocate a rotationally optimal block in the same cylinder.
 *   3) allocate the next available block on the block rotor for the
 *      specified cylinder group.
 * Note that this routine only allocates fs_bsize blocks; these
 * blocks may be fragmented by the routine that allocates them.
 */
static daddr_t
ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref, int realsize,
    int flags)
{
        struct fs *fs = ip->i_fs;
        struct cg *cgp;
        int cg;
        daddr_t blkno;
        int32_t bno;
        u_int8_t *blksfree;
        const int needswap = UFS_FSNEEDSWAP(fs);

        KASSERT(mutex_owned(&ip->i_ump->um_lock));

        cgp = (struct cg *)bp->b_data;
        blksfree = cg_blksfree(cgp, needswap);
        if (bpref == 0 || dtog(fs, bpref) != ufs_rw32(cgp->cg_cgx, needswap)) {
                bpref = ufs_rw32(cgp->cg_rotor, needswap);
        } else {
                bpref = ffs_blknum(fs, bpref);
                bno = dtogd(fs, bpref);
                /*
                 * if the requested block is available, use it
                 */
                if (ffs_isblock(fs, blksfree, ffs_fragstoblks(fs, bno)))
                        goto gotit;
                /*
                 * if the requested data block isn't available and we are
                 * trying to allocate a contiguous file, return an error.
                 */
                if ((flags & (B_CONTIG | B_METAONLY)) == B_CONTIG)
                        return (0);
        }

        /*
         * Take the next available block in this cylinder group.
         */
        bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
#if 0 
        /*
         * XXX jdolecek ffs_mapsearch() succeeds or panics
         */
        if (bno < 0)
                return (0);
#endif
        cgp->cg_rotor = ufs_rw32(bno, needswap);
gotit:
        blkno = ffs_fragstoblks(fs, bno);
        ffs_clrblock(fs, blksfree, blkno);
        ffs_clusteracct(fs, cgp, blkno, -1);
        ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
        fs->fs_cstotal.cs_nbfree--;
        fs->fs_cs(fs, ufs_rw32(cgp->cg_cgx, needswap)).cs_nbfree--;
        if ((fs->fs_magic == FS_UFS1_MAGIC) &&
            ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
                int cylno;
                cylno = old_cbtocylno(fs, bno);
                KASSERT(cylno >= 0);
                KASSERT(cylno < fs->fs_old_ncyl);
                KASSERT(old_cbtorpos(fs, bno) >= 0);
                KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bno) < fs->fs_old_nrpos);
                ufs_add16(old_cg_blks(fs, cgp, cylno, needswap)[old_cbtorpos(fs, bno)], -1,
                    needswap);
                ufs_add32(old_cg_blktot(cgp, needswap)[cylno], -1, needswap);
        }
        fs->fs_fmod = 1;
        cg = ufs_rw32(cgp->cg_cgx, needswap);
        blkno = cgbase(fs, cg) + bno;
        return (blkno);
}

/*
 * Determine whether an inode can be allocated.
 *
 * Check to see if an inode is available, and if it is,
 * allocate it using the following policy:
 *   1) allocate the requested inode.
 *   2) allocate the next available inode after the requested
 *      inode in the specified cylinder group.
 */
static daddr_t
ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode, int realsize,
    int flags)
{
        struct ufsmount *ump = ip->i_ump;
        struct fs *fs = ip->i_fs;
        struct cg *cgp;
        struct buf *bp, *ibp;
        u_int8_t *inosused;
        int error, start, len, loc, map, i;
        int32_t initediblk, maxiblk, irotor;
        daddr_t nalloc;
        struct ufs2_dinode *dp2;
        const int needswap = UFS_FSNEEDSWAP(fs);

        KASSERT(mutex_owned(&ump->um_lock));
        UFS_WAPBL_JLOCK_ASSERT(ip->i_ump->um_mountp);

        if (fs->fs_cs(fs, cg).cs_nifree == 0)
                return (0);
        mutex_exit(&ump->um_lock);
        ibp = NULL;
        if (fs->fs_magic == FS_UFS2_MAGIC) {
                initediblk = -1;
        } else {
                initediblk = fs->fs_ipg;
        }
        maxiblk = initediblk;

retry:
        error = bread(ip->i_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)),
                (int)fs->fs_cgsize, B_MODIFY, &bp);
        if (error)
                goto fail;
        cgp = (struct cg *)bp->b_data;
        if (!cg_chkmagic(cgp, needswap) || cgp->cg_cs.cs_nifree == 0)
                goto fail;

        if (ibp != NULL &&
            initediblk != ufs_rw32(cgp->cg_initediblk, needswap)) {
                /* Another thread allocated more inodes so we retry the test. */
                brelse(ibp, 0);
                ibp = NULL;
        }
        /*
         * Check to see if we need to initialize more inodes.
         */
        if (fs->fs_magic == FS_UFS2_MAGIC && ibp == NULL) {
                initediblk = ufs_rw32(cgp->cg_initediblk, needswap);
                maxiblk = initediblk;
                nalloc = fs->fs_ipg - ufs_rw32(cgp->cg_cs.cs_nifree, needswap);
                if (nalloc + FFS_INOPB(fs) > initediblk &&
                    initediblk < ufs_rw32(cgp->cg_niblk, needswap)) {
                        /*
                         * We have to release the cg buffer here to prevent
                         * a deadlock when reading the inode block will
                         * run a copy-on-write that might use this cg.
                         */
                        brelse(bp, 0);
                        bp = NULL;
                        error = ffs_getblk(ip->i_devvp, FFS_FSBTODB(fs,
                            ino_to_fsba(fs, cg * fs->fs_ipg + initediblk)),
                            FFS_NOBLK, fs->fs_bsize, false, &ibp);
                        if (error)
                                goto fail;

                        maxiblk += FFS_INOPB(fs);
                        
                        goto retry;
                }
        }

        cgp->cg_old_time = ufs_rw32(time_second, needswap);
        if ((fs->fs_magic != FS_UFS1_MAGIC) ||
            (fs->fs_old_flags & FS_FLAGS_UPDATED))
                cgp->cg_time = ufs_rw64(time_second, needswap);
        inosused = cg_inosused(cgp, needswap);
        
        if (ipref) {
                ipref %= fs->fs_ipg;
                /* safeguard to stay in (to be) allocated range */
                if (ipref < maxiblk && isclr(inosused, ipref))
                        goto gotit;
        }

        irotor = ufs_rw32(cgp->cg_irotor, needswap); 

        KASSERTMSG(irotor < initediblk, "%s: allocation botch: cg=%d, irotor %d"
                   " out of bounds, initediblk=%d",
                   __func__, cg, irotor, initediblk);

        start = irotor / NBBY;
        len = howmany(maxiblk - irotor, NBBY);
        loc = skpc(0xff, len, &inosused[start]);
        if (loc == 0) {
                len = start + 1;
                start = 0;
                loc = skpc(0xff, len, &inosused[0]);
                if (loc == 0) {
                        panic("%s: map corrupted: cg=%d, irotor=%d, fs=%s",
                            __func__, cg, ufs_rw32(cgp->cg_irotor, needswap),
                            fs->fs_fsmnt);
                        /* NOTREACHED */
                }
        }
        i = start + len - loc;
        map = inosused[i] ^ 0xff;
        if (map == 0) {
                panic("%s: block not in map: fs=%s", __func__, fs->fs_fsmnt);
        }
        
        ipref = i * NBBY + ffs(map) - 1;

        cgp->cg_irotor = ufs_rw32(ipref, needswap);

gotit:
        KASSERTMSG(ipref < maxiblk, "%s: allocation botch: cg=%d attempt to "
                   "allocate inode index %d beyond max allocated index %d"
                   " of %d inodes/cg",
                   __func__, cg, (int)ipref, maxiblk, cgp->cg_niblk);

        UFS_WAPBL_REGISTER_INODE(ip->i_ump->um_mountp, cg * fs->fs_ipg + ipref,
            mode);
        /*
         * Check to see if we need to initialize more inodes.
         */
        if (ibp != NULL) {
                KASSERT(initediblk == ufs_rw32(cgp->cg_initediblk, needswap));
                memset(ibp->b_data, 0, fs->fs_bsize);
                dp2 = (struct ufs2_dinode *)(ibp->b_data);
                for (i = 0; i < FFS_INOPB(fs); i++) {
                        /*
                         * Don't bother to swap, it's supposed to be
                         * random, after all.
                         */
                        dp2->di_gen = (cprng_fast32() & INT32_MAX) / 2 + 1;
                        dp2++;
                }
                initediblk += FFS_INOPB(fs);
                cgp->cg_initediblk = ufs_rw32(initediblk, needswap);
        }

        mutex_enter(&ump->um_lock);
        ACTIVECG_CLR(fs, cg);
        setbit(inosused, ipref);
        ufs_add32(cgp->cg_cs.cs_nifree, -1, needswap);
        fs->fs_cstotal.cs_nifree--;
        fs->fs_cs(fs, cg).cs_nifree--;
        fs->fs_fmod = 1;
        if ((mode & IFMT) == IFDIR) {
                ufs_add32(cgp->cg_cs.cs_ndir, 1, needswap);
                fs->fs_cstotal.cs_ndir++;
                fs->fs_cs(fs, cg).cs_ndir++;
        }
        mutex_exit(&ump->um_lock);
        if (ibp != NULL) {
                bwrite(ibp);
                bwrite(bp);
        } else
                bdwrite(bp);
        return (cg * fs->fs_ipg + ipref);
 fail:
        if (bp != NULL)
                brelse(bp, 0);
        if (ibp != NULL)
                brelse(ibp, 0);
        mutex_enter(&ump->um_lock);
        return (0);
}

/*
 * Allocate a block or fragment.
 *
 * The specified block or fragment is removed from the
 * free map, possibly fragmenting a block in the process.
 *
 * This implementation should mirror fs_blkfree
 *
 * => um_lock not held on entry or exit
 */
int
ffs_blkalloc(struct inode *ip, daddr_t bno, long size)
{
        int error;

        error = ffs_check_bad_allocation(__func__, ip->i_fs, bno, size,
            ip->i_dev, ip->i_uid);
        if (error)
                return error;

        return ffs_blkalloc_ump(ip->i_ump, bno, size);
}

int
ffs_blkalloc_ump(struct ufsmount *ump, daddr_t bno, long size)
{
        struct fs *fs = ump->um_fs;
        struct cg *cgp;
        struct buf *bp;
        int32_t fragno, cgbno;
        int i, error, cg, blk, frags, bbase;
        u_int8_t *blksfree;
        const int needswap = UFS_FSNEEDSWAP(fs);

        KASSERT((u_int)size <= fs->fs_bsize && ffs_fragoff(fs, size) == 0 &&
            ffs_fragnum(fs, bno) + ffs_numfrags(fs, size) <= fs->fs_frag);
        KASSERT(bno < fs->fs_size);

        cg = dtog(fs, bno);
        error = bread(ump->um_devvp, FFS_FSBTODB(fs, cgtod(fs, cg)),
                (int)fs->fs_cgsize, B_MODIFY, &bp);
        if (error) {
                return error;
        }
        cgp = (struct cg *)bp->b_data;
        if (!cg_chkmagic(cgp, needswap)) {
                brelse(bp, 0);
                return EIO;
        }
        cgp->cg_old_time = ufs_rw32(time_second, needswap);
        cgp->cg_time = ufs_rw64(time_second, needswap);
        cgbno = dtogd(fs, bno);
        blksfree = cg_blksfree(cgp, needswap);

        mutex_enter(&ump->um_lock);
        if (size == fs->fs_bsize) {
                fragno = ffs_fragstoblks(fs, cgbno);
                if (!ffs_isblock(fs, blksfree, fragno)) {
                        mutex_exit(&ump->um_lock);
                        brelse(bp, 0);
                        return EBUSY;
                }
                ffs_clrblock(fs, blksfree, fragno);
                ffs_clusteracct(fs, cgp, fragno, -1);
                ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
                fs->fs_cstotal.cs_nbfree--;
                fs->fs_cs(fs, cg).cs_nbfree--;
        } else {
                bbase = cgbno - ffs_fragnum(fs, cgbno);

                frags = ffs_numfrags(fs, size);
                for (i = 0; i < frags; i++) {
                        if (isclr(blksfree, cgbno + i)) {
                                mutex_exit(&ump->um_lock);
                                brelse(bp, 0);
                                return EBUSY;
                        }
                }
                /*
                 * if a complete block is being split, account for it
                 */
                fragno = ffs_fragstoblks(fs, bbase);
                if (ffs_isblock(fs, blksfree, fragno)) {
                        ufs_add32(cgp->cg_cs.cs_nffree, fs->fs_frag, needswap);
                        fs->fs_cstotal.cs_nffree += fs->fs_frag;
                        fs->fs_cs(fs, cg).cs_nffree += fs->fs_frag;
                        ffs_clusteracct(fs, cgp, fragno, -1);
                        ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
                        fs->fs_cstotal.cs_nbfree--;
                        fs->fs_cs(fs, cg).cs_nbfree--;
                }
                /*
                 * decrement the counts associated with the old frags
                 */
                blk = blkmap(fs, blksfree, bbase);
                ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap);
                /*
                 * allocate the fragment
                 */
                for (i = 0; i < frags; i++) {
                        clrbit(blksfree, cgbno + i);
                }
                ufs_add32(cgp->cg_cs.cs_nffree, -i, needswap);
                fs->fs_cstotal.cs_nffree -= i;
                fs->fs_cs(fs, cg).cs_nffree -= i;
                /*
                 * add back in counts associated with the new frags
                 */
                blk = blkmap(fs, blksfree, bbase);
                ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap);
        }
        fs->fs_fmod = 1;
        ACTIVECG_CLR(fs, cg);
        mutex_exit(&ump->um_lock);
        bdwrite(bp);
        return 0;
}

/*
 * Free a block or fragment.
 *
 * The specified block or fragment is placed back in the
 * free map. If a fragment is deallocated, a possible
 * block reassembly is checked.
 *
 * => um_lock not held on entry or exit
 */
static void
ffs_blkfree_cg(struct fs *fs, struct vnode *devvp, daddr_t bno, long size)
{
        struct cg *cgp;
        struct buf *bp;
        struct ufsmount *ump;
        daddr_t cgblkno;
        int error, cg;
        dev_t dev;
        const bool devvp_is_snapshot = (devvp->v_type != VBLK);
        const int needswap = UFS_FSNEEDSWAP(fs);

        KASSERT(!devvp_is_snapshot);

        cg = dtog(fs, bno);
        dev = devvp->v_rdev;
        ump = VFSTOUFS(spec_node_getmountedfs(devvp));
        KASSERT(fs == ump->um_fs);
        cgblkno = FFS_FSBTODB(fs, cgtod(fs, cg));

        error = bread(devvp, cgblkno, (int)fs->fs_cgsize,
            B_MODIFY, &bp);
        if (error) {
                return;
        }
        cgp = (struct cg *)bp->b_data;
        if (!cg_chkmagic(cgp, needswap)) {
                brelse(bp, 0);
                return;
        }

        ffs_blkfree_common(ump, fs, dev, bp, bno, size, devvp_is_snapshot);

        bdwrite(bp);
}

struct discardopdata {
        struct work wk; /* must be first */
        struct vnode *devvp;
        daddr_t bno;
        long size;
};

struct discarddata {
        struct fs *fs;
        struct discardopdata *entry;
        long maxsize;
        kmutex_t entrylk;
        struct workqueue *wq;
        int wqcnt, wqdraining;
        kmutex_t wqlk;
        kcondvar_t wqcv;
        /* timer for flush? */
};

static void
ffs_blkfree_td(struct fs *fs, struct discardopdata *td)
{
        struct mount *mp = spec_node_getmountedfs(td->devvp);
        long todo;
        int error;

        while (td->size) {
                todo = uimin(td->size,
                  ffs_lfragtosize(fs, (fs->fs_frag - ffs_fragnum(fs, td->bno))));
                error = UFS_WAPBL_BEGIN(mp);
                if (error) {
                        printf("ffs: failed to begin wapbl transaction"
                            " for discard: %d\n", error);
                        break;
                }
                ffs_blkfree_cg(fs, td->devvp, td->bno, todo);
                UFS_WAPBL_END(mp);
                td->bno += ffs_numfrags(fs, todo);
                td->size -= todo;
        }
}

static void
ffs_discardcb(struct work *wk, void *arg)
{
        struct discardopdata *td = (void *)wk;
        struct discarddata *ts = arg;
        struct fs *fs = ts->fs;
        off_t start, len;
#ifdef TRIMDEBUG
        int error;
#endif

/* like FSBTODB but emits bytes; XXX move to fs.h */
#ifndef FFS_FSBTOBYTES
#define FFS_FSBTOBYTES(fs, b) ((b) << (fs)->fs_fshift)
#endif

        start = FFS_FSBTOBYTES(fs, td->bno);
        len = td->size;
        vn_lock(td->devvp, LK_EXCLUSIVE | LK_RETRY);
#ifdef TRIMDEBUG
        error =
#endif
                VOP_FDISCARD(td->devvp, start, len);
        VOP_UNLOCK(td->devvp);
#ifdef TRIMDEBUG
        printf("trim(%" PRId64 ",%ld):%d\n", td->bno, td->size, error);
#endif

        ffs_blkfree_td(fs, td);
        kmem_free(td, sizeof(*td));
        mutex_enter(&ts->wqlk);
        ts->wqcnt--;
        if (ts->wqdraining && !ts->wqcnt)
                cv_signal(&ts->wqcv);
        mutex_exit(&ts->wqlk);
}

void *
ffs_discard_init(struct vnode *devvp, struct fs *fs)
{
        struct discarddata *ts;
        int error;

        ts = kmem_zalloc(sizeof (*ts), KM_SLEEP);
        error = workqueue_create(&ts->wq, "trimwq", ffs_discardcb, ts,
                                 PRI_USER, IPL_NONE, 0);
        if (error) {
                kmem_free(ts, sizeof (*ts));
                return NULL;
        }
        mutex_init(&ts->entrylk, MUTEX_DEFAULT, IPL_NONE);
        mutex_init(&ts->wqlk, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&ts->wqcv, "trimwqcv");
        ts->maxsize = 100*1024; /* XXX */
        ts->fs = fs;
        return ts;
}

void
ffs_discard_finish(void *vts, int flags)
{
        struct discarddata *ts = vts;
        struct discardopdata *td = NULL;

        /* wait for workqueue to drain */
        mutex_enter(&ts->wqlk);
        if (ts->wqcnt) {
                ts->wqdraining = 1;
                cv_wait(&ts->wqcv, &ts->wqlk);
        }
        mutex_exit(&ts->wqlk);

        mutex_enter(&ts->entrylk);
        if (ts->entry) {
                td = ts->entry;
                ts->entry = NULL;
        }
        mutex_exit(&ts->entrylk);
        if (td) {
                /* XXX don't tell disk, its optional */
                ffs_blkfree_td(ts->fs, td);
#ifdef TRIMDEBUG
                printf("finish(%" PRId64 ",%ld)\n", td->bno, td->size);
#endif
                kmem_free(td, sizeof(*td));
        }

        cv_destroy(&ts->wqcv);
        mutex_destroy(&ts->entrylk);
        mutex_destroy(&ts->wqlk);
        workqueue_destroy(ts->wq);
        kmem_free(ts, sizeof(*ts));
}

void
ffs_blkfree(struct fs *fs, struct vnode *devvp, daddr_t bno, long size,
    ino_t inum)
{
        struct ufsmount *ump;
        int error;
        dev_t dev;
        struct discarddata *ts;
        struct discardopdata *td;

        dev = devvp->v_rdev;
        ump = VFSTOUFS(spec_node_getmountedfs(devvp));
        if (ffs_snapblkfree(fs, devvp, bno, size, inum))
                return;

        error = ffs_check_bad_allocation(__func__, fs, bno, size, dev, inum);
        if (error)
                return;

        if (!ump->um_discarddata) {
                ffs_blkfree_cg(fs, devvp, bno, size);
                return;
        }

#ifdef TRIMDEBUG
        printf("blkfree(%" PRId64 ",%ld)\n", bno, size);
#endif
        ts = ump->um_discarddata;
        td = NULL;

        mutex_enter(&ts->entrylk);
        if (ts->entry) {
                td = ts->entry;
                /* ffs deallocs backwards, check for prepend only */
                if (td->bno == bno + ffs_numfrags(fs, size)
                    && td->size + size <= ts->maxsize) {
                        td->bno = bno;
                        td->size += size;
                        if (td->size < ts->maxsize) {
#ifdef TRIMDEBUG
                                printf("defer(%" PRId64 ",%ld)\n", td->bno, td->size);
#endif
                                mutex_exit(&ts->entrylk);
                                return;
                        }
                        size = 0; /* mark done */
                }
                ts->entry = NULL;
        }
        mutex_exit(&ts->entrylk);

        if (td) {
#ifdef TRIMDEBUG
                printf("enq old(%" PRId64 ",%ld)\n", td->bno, td->size);
#endif
                mutex_enter(&ts->wqlk);
                ts->wqcnt++;
                mutex_exit(&ts->wqlk);
                workqueue_enqueue(ts->wq, &td->wk, NULL);
        }
        if (!size)
                return;

        td = kmem_alloc(sizeof(*td), KM_SLEEP);
        td->devvp = devvp;
        td->bno = bno;
        td->size = size;

        if (td->size < ts->maxsize) { /* XXX always the case */
                mutex_enter(&ts->entrylk);
                if (!ts->entry) { /* possible race? */
#ifdef TRIMDEBUG
                        printf("defer(%" PRId64 ",%ld)\n", td->bno, td->size);
#endif
                        ts->entry = td;
                        td = NULL;
                }
                mutex_exit(&ts->entrylk);
        }
        if (td) {
#ifdef TRIMDEBUG
                printf("enq new(%" PRId64 ",%ld)\n", td->bno, td->size);
#endif
                mutex_enter(&ts->wqlk);
                ts->wqcnt++;
                mutex_exit(&ts->wqlk);
                workqueue_enqueue(ts->wq, &td->wk, NULL);
        }
}

/*
 * Free a block or fragment from a snapshot cg copy.
 *
 * The specified block or fragment is placed back in the
 * free map. If a fragment is deallocated, a possible
 * block reassembly is checked.
 *
 * => um_lock not held on entry or exit
 */
void
ffs_blkfree_snap(struct fs *fs, struct vnode *devvp, daddr_t bno, long size,
    ino_t inum)
{
        struct cg *cgp;
        struct buf *bp;
        struct ufsmount *ump;
        daddr_t cgblkno;
        int error, cg;
        dev_t dev;
        const bool devvp_is_snapshot = (devvp->v_type != VBLK);
        const int needswap = UFS_FSNEEDSWAP(fs);

        KASSERT(devvp_is_snapshot);

        cg = dtog(fs, bno);
        dev = VTOI(devvp)->i_devvp->v_rdev;
        ump = VFSTOUFS(devvp->v_mount);
        cgblkno = ffs_fragstoblks(fs, cgtod(fs, cg));

        error = ffs_check_bad_allocation(__func__, fs, bno, size, dev, inum);
        if (error)
                return;

        error = bread(devvp, cgblkno, (int)fs->fs_cgsize,
            B_MODIFY, &bp);
        if (error) {
                return;
        }
        cgp = (struct cg *)bp->b_data;
        if (!cg_chkmagic(cgp, needswap)) {
                brelse(bp, 0);
                return;
        }

        ffs_blkfree_common(ump, fs, dev, bp, bno, size, devvp_is_snapshot);

        bdwrite(bp);
}

static void
ffs_blkfree_common(struct ufsmount *ump, struct fs *fs, dev_t dev,
    struct buf *bp, daddr_t bno, long size, bool devvp_is_snapshot)
{
        struct cg *cgp;
        int32_t fragno, cgbno;
        int i, cg, blk, frags, bbase;
        u_int8_t *blksfree;
        const int needswap = UFS_FSNEEDSWAP(fs);

        cg = dtog(fs, bno);
        cgp = (struct cg *)bp->b_data;
        cgp->cg_old_time = ufs_rw32(time_second, needswap);
        if ((fs->fs_magic != FS_UFS1_MAGIC) ||
            (fs->fs_old_flags & FS_FLAGS_UPDATED))
                cgp->cg_time = ufs_rw64(time_second, needswap);
        cgbno = dtogd(fs, bno);
        blksfree = cg_blksfree(cgp, needswap);
        mutex_enter(&ump->um_lock);
        if (size == fs->fs_bsize) {
                fragno = ffs_fragstoblks(fs, cgbno);
                if (!ffs_isfreeblock(fs, blksfree, fragno)) {
                        if (devvp_is_snapshot) {
                                mutex_exit(&ump->um_lock);
                                return;
                        }
                        panic("%s: freeing free block: dev = 0x%llx, block = %"
                            PRId64 ", fs = %s", __func__,
                            (unsigned long long)dev, bno, fs->fs_fsmnt);
                }
                ffs_setblock(fs, blksfree, fragno);
                ffs_clusteracct(fs, cgp, fragno, 1);
                ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
                fs->fs_cstotal.cs_nbfree++;
                fs->fs_cs(fs, cg).cs_nbfree++;
                if ((fs->fs_magic == FS_UFS1_MAGIC) &&
                    ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
                        i = old_cbtocylno(fs, cgbno);
                        KASSERT(i >= 0);
                        KASSERT(i < fs->fs_old_ncyl);
                        KASSERT(old_cbtorpos(fs, cgbno) >= 0);
                        KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, cgbno) < fs->fs_old_nrpos);
                        ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs, cgbno)], 1,
                            needswap);
                        ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap);
                }
        } else {
                bbase = cgbno - ffs_fragnum(fs, cgbno);
                /*
                 * decrement the counts associated with the old frags
                 */
                blk = blkmap(fs, blksfree, bbase);
                ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap);
                /*
                 * deallocate the fragment
                 */
                frags = ffs_numfrags(fs, size);
                for (i = 0; i < frags; i++) {
                        if (isset(blksfree, cgbno + i)) {
                                panic("%s: freeing free frag: "
                                    "dev = 0x%llx, block = %" PRId64
                                    ", fs = %s", __func__,
                                    (unsigned long long)dev, bno + i,
                                    fs->fs_fsmnt);
                        }
                        setbit(blksfree, cgbno + i);
                }
                ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
                fs->fs_cstotal.cs_nffree += i;
                fs->fs_cs(fs, cg).cs_nffree += i;
                /*
                 * add back in counts associated with the new frags
                 */
                blk = blkmap(fs, blksfree, bbase);
                ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap);
                /*
                 * if a complete block has been reassembled, account for it
                 */
                fragno = ffs_fragstoblks(fs, bbase);
                if (ffs_isblock(fs, blksfree, fragno)) {
                        ufs_add32(cgp->cg_cs.cs_nffree, -fs->fs_frag, needswap);
                        fs->fs_cstotal.cs_nffree -= fs->fs_frag;
                        fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
                        ffs_clusteracct(fs, cgp, fragno, 1);
                        ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
                        fs->fs_cstotal.cs_nbfree++;
                        fs->fs_cs(fs, cg).cs_nbfree++;
                        if ((fs->fs_magic == FS_UFS1_MAGIC) &&
                            ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
                                i = old_cbtocylno(fs, bbase);
                                KASSERT(i >= 0);
                                KASSERT(i < fs->fs_old_ncyl);
                                KASSERT(old_cbtorpos(fs, bbase) >= 0);
                                KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bbase) < fs->fs_old_nrpos);
                                ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs,
                                    bbase)], 1, needswap);
                                ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap);
                        }
                }
        }
        fs->fs_fmod = 1;
        ACTIVECG_CLR(fs, cg);
        mutex_exit(&ump->um_lock);
}

/*
 * Free an inode.
 */
int
ffs_vfree(struct vnode *vp, ino_t ino, int mode)
{

        return ffs_freefile(vp->v_mount, ino, mode);
}

/*
 * Do the actual free operation.
 * The specified inode is placed back in the free map.
 *
 * => um_lock not held on entry or exit
 */
int
ffs_freefile(struct mount *mp, ino_t ino, int mode)
{
        struct ufsmount *ump = VFSTOUFS(mp);
        struct fs *fs = ump->um_fs;
        struct vnode *devvp;
        struct cg *cgp;
        struct buf *bp;
        int error, cg;
        daddr_t cgbno;
        dev_t dev;
        const int needswap = UFS_FSNEEDSWAP(fs);

        cg = ino_to_cg(fs, ino);
        devvp = ump->um_devvp;
        dev = devvp->v_rdev;
        cgbno = FFS_FSBTODB(fs, cgtod(fs, cg));

        if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
                panic("%s: range: dev = 0x%llx, ino = %llu, fs = %s", __func__,
                    (long long)dev, (unsigned long long)ino, fs->fs_fsmnt);
        error = bread(devvp, cgbno, (int)fs->fs_cgsize,
            B_MODIFY, &bp);
        if (error) {
                return (error);
        }
        cgp = (struct cg *)bp->b_data;
        if (!cg_chkmagic(cgp, needswap)) {
                brelse(bp, 0);
                return (0);
        }

        ffs_freefile_common(ump, fs, dev, bp, ino, mode, false);

        bdwrite(bp);

        return 0;
}

int
ffs_freefile_snap(struct fs *fs, struct vnode *devvp, ino_t ino, int mode)
{
        struct ufsmount *ump;
        struct cg *cgp;
        struct buf *bp;
        int error, cg;
        daddr_t cgbno;
        dev_t dev;
        const int needswap = UFS_FSNEEDSWAP(fs);

        KASSERT(devvp->v_type != VBLK);

        cg = ino_to_cg(fs, ino);
        dev = VTOI(devvp)->i_devvp->v_rdev;
        ump = VFSTOUFS(devvp->v_mount);
        cgbno = ffs_fragstoblks(fs, cgtod(fs, cg));
        if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
                panic("%s: range: dev = 0x%llx, ino = %llu, fs = %s", __func__,
                    (unsigned long long)dev, (unsigned long long)ino,
                    fs->fs_fsmnt);
        error = bread(devvp, cgbno, (int)fs->fs_cgsize,
            B_MODIFY, &bp);
        if (error) {
                return (error);
        }
        cgp = (struct cg *)bp->b_data;
        if (!cg_chkmagic(cgp, needswap)) {
                brelse(bp, 0);
                return (0);
        }
        ffs_freefile_common(ump, fs, dev, bp, ino, mode, true);

        bdwrite(bp);

        return 0;
}

static void
ffs_freefile_common(struct ufsmount *ump, struct fs *fs, dev_t dev,
    struct buf *bp, ino_t ino, int mode, bool devvp_is_snapshot)
{
        int cg;
        struct cg *cgp;
        u_int8_t *inosused;
        const int needswap = UFS_FSNEEDSWAP(fs);

        cg = ino_to_cg(fs, ino);
        cgp = (struct cg *)bp->b_data;
        cgp->cg_old_time = ufs_rw32(time_second, needswap);
        if ((fs->fs_magic != FS_UFS1_MAGIC) ||
            (fs->fs_old_flags & FS_FLAGS_UPDATED))
                cgp->cg_time = ufs_rw64(time_second, needswap);
        inosused = cg_inosused(cgp, needswap);
        ino %= fs->fs_ipg;
        if (isclr(inosused, ino)) {
                printf("ifree: dev = 0x%llx, ino = %llu, fs = %s\n",
                    (unsigned long long)dev, (unsigned long long)ino +
                    cg * fs->fs_ipg, fs->fs_fsmnt);
                if (fs->fs_ronly == 0)
                        panic("%s: freeing free inode", __func__);
        }
        clrbit(inosused, ino);
        if (!devvp_is_snapshot)
                UFS_WAPBL_UNREGISTER_INODE(ump->um_mountp,
                    ino + cg * fs->fs_ipg, mode);
        if (ino < ufs_rw32(cgp->cg_irotor, needswap))
                cgp->cg_irotor = ufs_rw32(ino, needswap);
        ufs_add32(cgp->cg_cs.cs_nifree, 1, needswap);
        mutex_enter(&ump->um_lock);
        fs->fs_cstotal.cs_nifree++;
        fs->fs_cs(fs, cg).cs_nifree++;
        if ((mode & IFMT) == IFDIR) {
                ufs_add32(cgp->cg_cs.cs_ndir, -1, needswap);
                fs->fs_cstotal.cs_ndir--;
                fs->fs_cs(fs, cg).cs_ndir--;
        }
        fs->fs_fmod = 1;
        ACTIVECG_CLR(fs, cg);
        mutex_exit(&ump->um_lock);
}

/*
 * Check to see if a file is free.
 */
int
ffs_checkfreefile(struct fs *fs, struct vnode *devvp, ino_t ino)
{
        struct cg *cgp;
        struct buf *bp;
        daddr_t cgbno;
        int ret, cg;
        u_int8_t *inosused;
        const bool devvp_is_snapshot = (devvp->v_type != VBLK);

        KASSERT(devvp_is_snapshot);

        cg = ino_to_cg(fs, ino);
        if (devvp_is_snapshot)
                cgbno = ffs_fragstoblks(fs, cgtod(fs, cg));
        else
                cgbno = FFS_FSBTODB(fs, cgtod(fs, cg));
        if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
                return 1;
        if (bread(devvp, cgbno, (int)fs->fs_cgsize, 0, &bp)) {
                return 1;
        }
        cgp = (struct cg *)bp->b_data;
        if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
                brelse(bp, 0);
                return 1;
        }
        inosused = cg_inosused(cgp, UFS_FSNEEDSWAP(fs));
        ino %= fs->fs_ipg;
        ret = isclr(inosused, ino);
        brelse(bp, 0);
        return ret;
}

/*
 * Find a block of the specified size in the specified cylinder group.
 *
 * It is a panic if a request is made to find a block if none are
 * available.
 */
static int32_t
ffs_mapsearch(struct fs *fs, struct cg *cgp, daddr_t bpref, int allocsiz)
{
        int32_t bno;
        int start, len, loc, i;
        int blk, field, subfield, pos;
        int ostart, olen;
        u_int8_t *blksfree;
        const int needswap = UFS_FSNEEDSWAP(fs);

        /* KASSERT(mutex_owned(&ump->um_lock)); */

        /*
         * find the fragment by searching through the free block
         * map for an appropriate bit pattern
         */
        if (bpref)
                start = dtogd(fs, bpref) / NBBY;
        else
                start = ufs_rw32(cgp->cg_frotor, needswap) / NBBY;
        blksfree = cg_blksfree(cgp, needswap);
        len = howmany(fs->fs_fpg, NBBY) - start;
        ostart = start;
        olen = len;
        loc = scanc((u_int)len,
                (const u_char *)&blksfree[start],
                (const u_char *)fragtbl[fs->fs_frag],
                (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1)))));
        if (loc == 0) {
                len = start + 1;
                start = 0;
                loc = scanc((u_int)len,
                        (const u_char *)&blksfree[0],
                        (const u_char *)fragtbl[fs->fs_frag],
                        (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1)))));
                if (loc == 0) {
                        panic("%s: map corrupted: start=%d, len=%d, "
                            "fs = %s, offset=%d/%ld, cg %d", __func__,
                            ostart, olen, fs->fs_fsmnt,
                            ufs_rw32(cgp->cg_freeoff, needswap),
                            (long)blksfree - (long)cgp, cgp->cg_cgx);
                        /* NOTREACHED */
                }
        }
        bno = (start + len - loc) * NBBY;
        cgp->cg_frotor = ufs_rw32(bno, needswap);
        /*
         * found the byte in the map
         * sift through the bits to find the selected frag
         */
        for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
                blk = blkmap(fs, blksfree, bno);
                blk <<= 1;
                field = around[allocsiz];
                subfield = inside[allocsiz];
                for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
                        if ((blk & field) == subfield)
                                return (bno + pos);
                        field <<= 1;
                        subfield <<= 1;
                }
        }
        panic("%s: block not in map: bno=%d, fs=%s", __func__,
            bno, fs->fs_fsmnt);
        /* return (-1); */
}

/*
 * Fserr prints the name of a file system with an error diagnostic.
 *
 * The form of the error message is:
 *        fs: error message
 */
static void
ffs_fserr(struct fs *fs, kauth_cred_t cred, const char *cp)
{
        KASSERT(cred != NULL);

        if (cred == NOCRED || cred == FSCRED) {
                log(LOG_ERR, "pid %d, command %s, on %s: %s\n",
                    curproc->p_pid, curproc->p_comm,
                    fs->fs_fsmnt, cp);
        } else {
                log(LOG_ERR, "uid %d, pid %d, command %s, on %s: %s\n",
                    kauth_cred_getuid(cred), curproc->p_pid, curproc->p_comm,
                    fs->fs_fsmnt, cp);
        }
}










































































    3 



    3 


    3 














































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
/*        $NetBSD: slcompress.c,v 1.41 2019/04/06 08:38:23 msaitoh Exp $   */
/*        Id: slcompress.c,v 1.3 1996/05/24 07:04:47 paulus Exp         */

/*
 * Copyright (c) 1989, 1993, 1994
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)slcompress.c        8.2 (Berkeley) 4/16/94
 */

/*
 * Routines to compress and uncompess tcp packets (for transmission
 * over low speed serial lines.
 *
 * Van Jacobson (van@helios.ee.lbl.gov), Dec 31, 1989:
 *        - Initial distribution.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: slcompress.c,v 1.41 2019/04/06 08:38:23 msaitoh Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#endif

#ifdef INET
#include <sys/param.h>
#include <sys/mbuf.h>
#include <sys/systm.h>
#include <sys/module.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>

#include <net/slcompress.h>

#ifndef SL_NO_STATS
#define INCR(counter) ++comp->counter;
#else
#define INCR(counter)
#endif


void
sl_compress_init(struct slcompress *comp)
{
        u_int i;
        struct cstate *tstate = comp->tstate;

        memset(comp, 0, sizeof(*comp));
        for (i = MAX_STATES - 1; i > 0; --i) {
                tstate[i].cs_id = i;
                tstate[i].cs_next = &tstate[i - 1];
        }
        tstate[0].cs_next = &tstate[MAX_STATES - 1];
        tstate[0].cs_id = 0;
        comp->last_cs = &tstate[0];
        comp->last_recv = 255;
        comp->last_xmit = 255;
        comp->flags = SLF_TOSS;
}


/*
 * Like sl_compress_init, but we get to specify the maximum connection
 * ID to use on transmission.
 */
void
sl_compress_setup(struct slcompress *comp, int max_state)
{
        u_int i;
        struct cstate *tstate = comp->tstate;

        if (max_state == -1) {
                max_state = MAX_STATES - 1;
                memset(comp, 0, sizeof(*comp));
        } else {
                /* Don't reset statistics */
                memset(comp->tstate, 0, sizeof(comp->tstate));
                memset(comp->rstate, 0, sizeof(comp->rstate));
        }
        for (i = max_state; i > 0; --i) {
                tstate[i].cs_id = i;
                tstate[i].cs_next = &tstate[i - 1];
        }
        tstate[0].cs_next = &tstate[max_state];
        tstate[0].cs_id = 0;
        comp->last_cs = &tstate[0];
        comp->last_recv = 255;
        comp->last_xmit = 255;
        comp->flags = SLF_TOSS;
}


/*
 * ENCODE encodes a number that is known to be non-zero.  ENCODEZ checks for
 * zero (since zero has to be encoded in the long, 3 byte form).
 */
#define ENCODE(n) { \
        if ((uint16_t)(n) >= 256) { \
                *cp++ = 0; \
                cp[1] = (n); \
                cp[0] = (n) >> 8; \
                cp += 2; \
        } else { \
                *cp++ = (n); \
        } \
}
#define ENCODEZ(n) { \
        if ((uint16_t)(n) >= 256 || (uint16_t)(n) == 0) { \
                *cp++ = 0; \
                cp[1] = (n); \
                cp[0] = (n) >> 8; \
                cp += 2; \
        } else { \
                *cp++ = (n); \
        } \
}

#define DECODEL(f) { \
        if (*cp == 0) {\
                (f) = htonl(ntohl(f) + ((cp[1] << 8) | cp[2])); \
                cp += 3; \
        } else { \
                (f) = htonl(ntohl(f) + (uint32_t)*cp++); \
        } \
}

#define DECODES(f) { \
        if (*cp == 0) {\
                (f) = htons(ntohs(f) + ((cp[1] << 8) | cp[2])); \
                cp += 3; \
        } else { \
                (f) = htons(ntohs(f) + (uint32_t)*cp++); \
        } \
}

#define DECODEU(f) { \
        if (*cp == 0) {\
                (f) = htons((cp[1] << 8) | cp[2]); \
                cp += 3; \
        } else { \
                (f) = htons((uint32_t)*cp++); \
        } \
}

u_int
sl_compress_tcp(struct mbuf *m, struct ip *ip, struct slcompress *comp,
    int compress_cid)
{
        struct cstate *cs = comp->last_cs->cs_next;
        u_int hlen = ip->ip_hl;
        struct tcphdr *oth;
        struct tcphdr *th;
        u_int deltaS, deltaA;
        u_int changes = 0;
        u_char new_seq[16];
        u_char *cp = new_seq;

        /*
         * Bail if this is an IP fragment or if the TCP packet isn't
         * `compressible' (i.e., ACK isn't set or some other control bit is
         * set).  (We assume that the caller has already made sure the
         * packet is IP proto TCP).
         */
        if ((ip->ip_off & htons(0x3fff)) || m->m_len < 40)
                return TYPE_IP;

        th = (struct tcphdr *)&((int32_t *)ip)[hlen];
        if ((th->th_flags & (TH_SYN|TH_FIN|TH_RST|TH_ACK)) != TH_ACK)
                return TYPE_IP;
        /*
         * Packet is compressible -- we're going to send either a
         * COMPRESSED_TCP or UNCOMPRESSED_TCP packet.  Either way we need
         * to locate (or create) the connection state.  Special case the
         * most recently used connection since it's most likely to be used
         * again & we don't have to do any reordering if it's used.
         */
        INCR(sls_packets)
        if (ip->ip_src.s_addr != cs->cs_ip.ip_src.s_addr ||
            ip->ip_dst.s_addr != cs->cs_ip.ip_dst.s_addr ||
            *(int32_t *)th != ((int32_t *)&cs->cs_ip)[cs->cs_ip.ip_hl]) {
                /*
                 * Wasn't the first -- search for it.
                 *
                 * States are kept in a circularly linked list with last_cs
                 * pointing to the end of the list.  The list is kept in lru
                 * order by moving a state to the head of the list whenever it
                 * is referenced.  Since the list is short and, empirically,
                 * the connection we want is almost always near the front, we
                 * locate states via linear search.  If we don't find a state
                 * for the datagram, the oldest state is (re-)used.
                 */
                struct cstate *lcs;
                struct cstate *lastcs = comp->last_cs;

                do {
                        lcs = cs; cs = cs->cs_next;
                        INCR(sls_searches)
                        if (ip->ip_src.s_addr == cs->cs_ip.ip_src.s_addr
                            && ip->ip_dst.s_addr == cs->cs_ip.ip_dst.s_addr
                            && *(int32_t *)th ==
                            ((int32_t *)&cs->cs_ip)[cs->cs_ip.ip_hl])
                                goto found;
                } while (cs != lastcs);

                /*
                 * Didn't find it -- re-use oldest cstate.  Send an
                 * uncompressed packet that tells the other side what
                 * connection number we're using for this conversation.
                 * Note that since the state list is circular, the oldest
                 * state points to the newest and we only need to set
                 * last_cs to update the lru linkage.
                 */
                INCR(sls_misses)
                comp->last_cs = lcs;
                hlen += th->th_off;
                hlen <<= 2;
                if (hlen > m->m_len)
                        return TYPE_IP;
                goto uncompressed;

        found:
                /* Found it -- move to the front on the connection list. */
                if (cs == lastcs)
                        comp->last_cs = lcs;
                else {
                        lcs->cs_next = cs->cs_next;
                        cs->cs_next = lastcs->cs_next;
                        lastcs->cs_next = cs;
                }
        }

        /*
         * Make sure that only what we expect to change changed. The first
         * line of the `if' checks the IP protocol version, header length &
         * type of service.  The 2nd line checks the "Don't fragment" bit.
         * The 3rd line checks the time-to-live and protocol (the protocol
         * check is unnecessary but costless).  The 4th line checks the TCP
         * header length.  The 5th line checks IP options, if any.  The 6th
         * line checks TCP options, if any.  If any of these things are
         * different between the previous & current datagram, we send the
         * current datagram `uncompressed'.
         */
        oth = (struct tcphdr *)&((int32_t *)&cs->cs_ip)[hlen];
        deltaS = hlen;
        hlen += th->th_off;
        hlen <<= 2;
        if (hlen > m->m_len)
                return TYPE_IP;

        if (((uint16_t *)ip)[0] != ((uint16_t *)&cs->cs_ip)[0] ||
            ((uint16_t *)ip)[3] != ((uint16_t *)&cs->cs_ip)[3] ||
            ((uint16_t *)ip)[4] != ((uint16_t *)&cs->cs_ip)[4] ||
            th->th_off != oth->th_off ||
            (deltaS > 5 &&
             memcmp(ip + 1, &cs->cs_ip + 1, (deltaS - 5) << 2)) ||
            (th->th_off > 5 &&
             memcmp(th + 1, oth + 1, (th->th_off - 5) << 2)))
                goto uncompressed;

        /*
         * Figure out which of the changing fields changed.  The receiver
         * expects changes in the order: urgent, window, ack, seq (the order
         * minimizes the number of temporaries needed in this section of code).
         */
        if (th->th_flags & TH_URG) {
                deltaS = ntohs(th->th_urp);
                ENCODEZ(deltaS);
                changes |= NEW_U;
        } else if (th->th_urp != oth->th_urp)
                /*
                 * argh! URG not set but urp changed -- a sensible
                 * implementation should never do this but RFC793 doesn't
                 * prohibit the change so we have to deal with it.
                 */
                 goto uncompressed;

        deltaS = (uint16_t)(ntohs(th->th_win) - ntohs(oth->th_win));
        if (deltaS) {
                ENCODE(deltaS);
                changes |= NEW_W;
        }

        deltaA = ntohl(th->th_ack) - ntohl(oth->th_ack);
        if (deltaA) {
                if (deltaA > 0xffff)
                        goto uncompressed;
                ENCODE(deltaA);
                changes |= NEW_A;
        }

        deltaS = ntohl(th->th_seq) - ntohl(oth->th_seq);
        if (deltaS) {
                if (deltaS > 0xffff)
                        goto uncompressed;
                ENCODE(deltaS);
                changes |= NEW_S;
        }

        switch (changes) {
        case 0:
                /*
                 * Nothing changed. If this packet contains data and the
                 * last one didn't, this is probably a data packet following
                 * an ack (normal on an interactive connection) and we send
                 * it compressed.  Otherwise it's probably a retransmit,
                 * retransmitted ack or window probe.  Send it uncompressed
                 * in case the other side missed the compressed version.
                 */
                if (ip->ip_len != cs->cs_ip.ip_len &&
                    ntohs(cs->cs_ip.ip_len) == hlen)
                        break;

                /* FALLTHROUGH */

        case SPECIAL_I:
        case SPECIAL_D:
                /*
                 * actual changes match one of our special case encodings --
                 * send packet uncompressed.
                 */
                goto uncompressed;

        case NEW_S|NEW_A:
                if (deltaS == deltaA &&
                    deltaS == ntohs(cs->cs_ip.ip_len) - hlen) {
                        /* special case for echoed terminal traffic */
                        changes = SPECIAL_I;
                        cp = new_seq;
                }
                break;

        case NEW_S:
                if (deltaS == ntohs(cs->cs_ip.ip_len) - hlen) {
                        /* special case for data xfer */
                        changes = SPECIAL_D;
                        cp = new_seq;
                }
                break;
        }

        deltaS = ntohs(ip->ip_id) - ntohs(cs->cs_ip.ip_id);
        if (deltaS != 1) {
                ENCODEZ(deltaS);
                changes |= NEW_I;
        }
        if (th->th_flags & TH_PUSH)
                changes |= TCP_PUSH_BIT;
        /*
         * Grab the cksum before we overwrite it below.  Then update our
         * state with this packet's header.
         */
        deltaA = ntohs(th->th_sum);
        memcpy(&cs->cs_ip, ip, hlen);

        /*
         * We want to use the original packet as our compressed packet.
         * (cp - new_seq) is the number of bytes we need for compressed
         * sequence numbers.  In addition we need one byte for the change
         * mask, one for the connection id and two for the tcp checksum.
         * So, (cp - new_seq) + 4 bytes of header are needed.  hlen is how
         * many bytes of the original packet to toss so subtract the two to
         * get the new packet size.
         */
        deltaS = cp - new_seq;
        cp = (u_char *)ip;
        if (compress_cid == 0 || comp->last_xmit != cs->cs_id) {
                comp->last_xmit = cs->cs_id;
                hlen -= deltaS + 4;
                cp += hlen;
                *cp++ = changes | NEW_C;
                *cp++ = cs->cs_id;
        } else {
                hlen -= deltaS + 3;
                cp += hlen;
                *cp++ = changes;
        }
        m->m_len -= hlen;
        m->m_data += hlen;
        *cp++ = deltaA >> 8;
        *cp++ = deltaA;
        memcpy(cp, new_seq, deltaS);
        INCR(sls_compressed)
        return TYPE_COMPRESSED_TCP;

        /*
         * Update connection state cs & send uncompressed packet
         * ('uncompressed' means a regular ip/tcp packet but with the
         * 'conversation id' we hope to use on future compressed packets in the
         * protocol field).
         */
uncompressed:
        memcpy(&cs->cs_ip, ip, hlen);
        ip->ip_p = cs->cs_id;
        comp->last_xmit = cs->cs_id;
        return TYPE_UNCOMPRESSED_TCP;
}


int
sl_uncompress_tcp(u_char **bufp, int len, u_int type, struct slcompress *comp)
{
        u_char *hdr, *cp;
        int vjlen;
        u_int hlen;

        cp = bufp ? *bufp : NULL;
        vjlen = sl_uncompress_tcp_core(cp, len, len, type, comp, &hdr, &hlen);
        if (vjlen < 0)
                return 0;        /* error */
        if (vjlen == 0)
                return len;        /* was uncompressed already */

        cp += vjlen;
        len -= vjlen;

        /*
         * At this point, cp points to the first byte of data in the
         * packet.  If we're not aligned on a 4-byte boundary, copy the
         * data down so the ip & tcp headers will be aligned.  Then back up
         * cp by the tcp/ip header length to make room for the reconstructed
         * header (we assume the packet we were handed has enough space to
         * prepend 128 bytes of header).
         */
        if ((long)cp & 3) {
                if (len > 0)
                        memmove((void *)((long)cp &~ 3), cp, len);
                cp = (u_char *)((long)cp &~ 3);
        }
        cp -= hlen;
        len += hlen;
        memcpy(cp, hdr, hlen);

        *bufp = cp;
        return len;
}

/*
 * Uncompress a packet of total length total_len.  The first buflen bytes are
 * at buf; this must include the entire (compressed or uncompressed) TCP/IP
 * header.  This procedure returns the length of the VJ header, with a pointer
 * to the uncompressed IP header in *hdrp and its length in *hlenp.
 */
int
sl_uncompress_tcp_core(u_char *buf, int buflen, int total_len, u_int type,
    struct slcompress *comp, u_char **hdrp, u_int *hlenp)
{
        u_char *cp;
        u_int hlen, changes;
        struct tcphdr *th;
        struct cstate *cs;
        struct ip *ip;
        uint16_t *bp;
        u_int vjlen;

        switch (type) {

        case TYPE_UNCOMPRESSED_TCP:
                if (buf == NULL)
                        goto bad;
                ip = (struct ip *) buf;
                if (ip->ip_p >= MAX_STATES)
                        goto bad;
                cs = &comp->rstate[comp->last_recv = ip->ip_p];
                comp->flags &=~ SLF_TOSS;
                ip->ip_p = IPPROTO_TCP;
                /*
                 * Calculate the size of the TCP/IP header and make sure that
                 * we don't overflow the space we have available for it.
                 */
                hlen = ip->ip_hl << 2;
                if (hlen + sizeof(struct tcphdr) > buflen)
                        goto bad;
                hlen += ((struct tcphdr *)&((char *)ip)[hlen])->th_off << 2;
                if (hlen > MAX_HDR || hlen > buflen)
                        goto bad;
                memcpy(&cs->cs_ip, ip, hlen);
                cs->cs_hlen = hlen;
                INCR(sls_uncompressedin)
                *hdrp = (u_char *)&cs->cs_ip;
                *hlenp = hlen;
                return 0;

        default:
                goto bad;

        case TYPE_COMPRESSED_TCP:
                break;
        }
        /* We've got a compressed packet. */
        INCR(sls_compressedin)
        if (buf == NULL)
                goto bad;
        cp = buf;
        changes = *cp++;
        if (changes & NEW_C) {
                /*
                 * Make sure the state index is in range, then grab the state.
                 * If we have a good state index, clear the 'discard' flag.
                 */
                if (*cp >= MAX_STATES)
                        goto bad;

                comp->flags &=~ SLF_TOSS;
                comp->last_recv = *cp++;
        } else {
                /*
                 * this packet has an implicit state index.  If we've had a
                 * line error since the last time we got an explicit state
                 * index, we have to toss the packet.
                 */
                if (comp->flags & SLF_TOSS) {
                        INCR(sls_tossed)
                        return -1;
                }
        }
        cs = &comp->rstate[comp->last_recv];
        hlen = cs->cs_ip.ip_hl << 2;
        th = (struct tcphdr *)&((u_char *)&cs->cs_ip)[hlen];
        th->th_sum = htons((*cp << 8) | cp[1]);
        cp += 2;
        if (changes & TCP_PUSH_BIT)
                th->th_flags |= TH_PUSH;
        else
                th->th_flags &=~ TH_PUSH;

        switch (changes & SPECIALS_MASK) {
        case SPECIAL_I:
                {
                u_int i = ntohs(cs->cs_ip.ip_len) - cs->cs_hlen;
                th->th_ack = htonl(ntohl(th->th_ack) + i);
                th->th_seq = htonl(ntohl(th->th_seq) + i);
                }
                break;

        case SPECIAL_D:
                th->th_seq = htonl(ntohl(th->th_seq) + ntohs(cs->cs_ip.ip_len)
                                   - cs->cs_hlen);
                break;

        default:
                if (changes & NEW_U) {
                        th->th_flags |= TH_URG;
                        DECODEU(th->th_urp)
                } else
                        th->th_flags &=~ TH_URG;
                if (changes & NEW_W)
                        DECODES(th->th_win)
                if (changes & NEW_A)
                        DECODEL(th->th_ack)
                if (changes & NEW_S)
                        DECODEL(th->th_seq)
                break;
        }
        if (changes & NEW_I) {
                DECODES(cs->cs_ip.ip_id)
        } else
                cs->cs_ip.ip_id = htons(ntohs(cs->cs_ip.ip_id) + 1);

        /*
         * At this point, cp points to the first byte of data in the packet.
         * Fill in the IP total length and update the IP header checksum.
         */
        vjlen = cp - buf;
        buflen -= vjlen;
        if (buflen < 0)
                /*
                 * We must have dropped some characters (crc should detect
                 * this but the old slip framing won't)
                 */
                goto bad;

        total_len += cs->cs_hlen - vjlen;
        cs->cs_ip.ip_len = htons(total_len);

        /* Recompute the ip header checksum */
        bp = (uint16_t *)&cs->cs_ip;
        cs->cs_ip.ip_sum = 0;
        for (changes = 0; hlen > 0; hlen -= 2)
                changes += *bp++;
        changes = (changes & 0xffff) + (changes >> 16);
        changes = (changes & 0xffff) + (changes >> 16);
        cs->cs_ip.ip_sum = ~ changes;

        *hdrp = (u_char *)&cs->cs_ip;
        *hlenp = cs->cs_hlen;
        return vjlen;

bad:
        comp->flags |= SLF_TOSS;
        INCR(sls_errorin)
        return -1;
}
#endif

MODULE(MODULE_CLASS_MISC, slcompress, NULL);

static int
slcompress_modcmd(modcmd_t cmd, void *arg)
{
        switch (cmd) {
        case MODULE_CMD_INIT:
        case MODULE_CMD_FINI:
#ifdef INET
                return 0;
#endif
        case MODULE_CMD_STAT:
        case MODULE_CMD_AUTOUNLOAD:
        default:
                return ENOTTY;
        }
}


















































































    2 





























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
/*        $NetBSD: dnvlist.c,v 1.4 2018/09/08 14:32:25 christos Exp $        */

/*-
 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
 *
 * Copyright (c) 2013 The FreeBSD Foundation
 * All rights reserved.
 *
 * This software was developed by Pawel Jakub Dawidek under sponsorship from
 * the FreeBSD Foundation.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
#ifdef __FreeBSD__
__FBSDID("$FreeBSD: head/sys/contrib/libnv/dnvlist.c 328474 2018-01-27 12:58:21Z oshogbo $");
#else
__RCSID("$NetBSD: dnvlist.c,v 1.4 2018/09/08 14:32:25 christos Exp $");
#endif

#if defined(_KERNEL) || defined(_STANDALONE)

#include <sys/types.h>
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/malloc.h>

#ifdef __FreeBSD__
#include <machine/stdarg.h>
#endif

#else
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#endif

#ifdef __FreeBSD__
#include <sys/dnv.h>
#include <sys/nv.h>
#else
#include "dnv.h"
#include "nv.h"
#endif

#include "nv_impl.h"

#define        DNVLIST_GET(ftype, type)                                        \
ftype                                                                        \
dnvlist_get_##type(const nvlist_t *nvl, const char *name, ftype defval)        \
{                                                                        \
                                                                        \
        if (nvlist_exists_##type(nvl, name))                                \
                return (nvlist_get_##type(nvl, name));                        \
        else                                                                \
                return (defval);                                        \
}

DNVLIST_GET(bool, bool)
DNVLIST_GET(uint64_t, number)
DNVLIST_GET(const char *, string)
DNVLIST_GET(const nvlist_t *, nvlist)
#if !defined(_KERNEL) && !defined(_STANDALONE)
DNVLIST_GET(int, descriptor)
#endif

#undef        DNVLIST_GET

const void *
dnvlist_get_binary(const nvlist_t *nvl, const char *name, size_t *sizep,
    const void *defval, size_t defsize)
{
        const void *value;

        if (nvlist_exists_binary(nvl, name))
                value = nvlist_get_binary(nvl, name, sizep);
        else {
                if (sizep != NULL)
                        *sizep = defsize;
                value = defval;
        }
        return (value);
}

#define        DNVLIST_TAKE(ftype, type)                                        \
ftype                                                                        \
dnvlist_take_##type(nvlist_t *nvl, const char *name, ftype defval)        \
{                                                                        \
                                                                        \
        if (nvlist_exists_##type(nvl, name))                                \
                return (nvlist_take_##type(nvl, name));                        \
        else                                                                \
                return (defval);                                        \
}

DNVLIST_TAKE(bool, bool)
DNVLIST_TAKE(uint64_t, number)
DNVLIST_TAKE(char *, string)
DNVLIST_TAKE(nvlist_t *, nvlist)
#if !defined(_KERNEL) && !defined(_STANDALONE)
DNVLIST_TAKE(int, descriptor)
#endif

#undef        DNVLIST_TAKE

void *
dnvlist_take_binary(nvlist_t *nvl, const char *name, size_t *sizep,
    void *defval, size_t defsize)
{
        void *value;

        if (nvlist_exists_binary(nvl, name))
                value = nvlist_take_binary(nvl, name, sizep);
        else {
                if (sizep != NULL)
                        *sizep = defsize;
                value = defval;
        }
        return (value);
}


































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
/*        $NetBSD: bluetooth.h,v 1.12 2014/05/18 14:46:16 rmind Exp $        */

/*-
 * Copyright (c) 2005 Iain Hibbert.
 * Copyright (c) 2006 Itronix Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of Itronix Inc. may not be used to endorse
 *    or promote products derived from this software without specific
 *    prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _NETBT_BLUETOOTH_H_
#define _NETBT_BLUETOOTH_H_

#include <sys/socket.h>
#include <sys/types.h>

/*
 * Bluetooth Address Family Protocol Numbers
 */
#define BTPROTO_HCI        1
#define BTPROTO_L2CAP        2
#define BTPROTO_RFCOMM        3
#define BTPROTO_SCO        4

/* All sizes are in bytes */
#define BLUETOOTH_BDADDR_SIZE        6

/*
 * Bluetooth device address
 */
typedef struct {
        uint8_t        b[BLUETOOTH_BDADDR_SIZE];
} __packed bdaddr_t;

/*
 * bdaddr utility functions
 */
static __inline int
bdaddr_same(const bdaddr_t *a, const bdaddr_t *b)
{

        return (a->b[0] == b->b[0] && a->b[1] == b->b[1]
                && a->b[2] == b->b[2] && a->b[3] == b->b[3]
                && a->b[4] == b->b[4] && a->b[5] == b->b[5]);
}

static __inline int
bdaddr_any(const bdaddr_t *a)
{

        return (a->b[0] == 0 && a->b[1] == 0 && a->b[2] == 0
                && a->b[3] == 0 && a->b[4] == 0 && a->b[5] == 0);
}

static __inline void
bdaddr_copy(bdaddr_t *d, const bdaddr_t *s)
{

        d->b[0] = s->b[0];
        d->b[1] = s->b[1];
        d->b[2] = s->b[2];
        d->b[3] = s->b[3];
        d->b[4] = s->b[4];
        d->b[5] = s->b[5];
}

/*
 * Socket address used by Bluetooth protocols
 */
struct sockaddr_bt {
        uint8_t                bt_len;
        sa_family_t        bt_family;
        bdaddr_t        bt_bdaddr;
        uint16_t        bt_psm;
        uint8_t                bt_channel;
        uint8_t                bt_zero[5];
};

/* Note: this is actually 6 bytes including terminator */
#define BDADDR_ANY        ((const bdaddr_t *) "\000\000\000\000\000")

#ifdef _KERNEL

#include <sys/protosw.h>

#include <sys/mallocvar.h>
MALLOC_DECLARE(M_BLUETOOTH);

/*
 * Bluetooth Protocol API callback methods
 */
struct mbuf;
struct btproto {
        void (*connecting)(void *);
        void (*connected)(void *);
        void (*disconnected)(void *, int);
        void *(*newconn)(void *, struct sockaddr_bt *, struct sockaddr_bt *);
        void (*complete)(void *, int);
        void (*linkmode)(void *, int);
        void (*input)(void *, struct mbuf *);
};

extern const struct pr_usrreqs hci_usrreqs;
extern const struct pr_usrreqs sco_usrreqs;
extern const struct pr_usrreqs l2cap_usrreqs;
extern const struct pr_usrreqs rfcomm_usrreqs;

extern kmutex_t *bt_lock;

/*
 * Debugging stuff
 */
#ifdef BLUETOOTH_DEBUG
extern int bluetooth_debug;
# define DPRINTF(...)        do {                        \
        if (bluetooth_debug) {                        \
                printf("%s: ", __func__);        \
                printf(__VA_ARGS__);                \
        }                                        \
} while (/* CONSTCOND */0)

# define DPRINTFN(n, ...)        do {                \
        if (bluetooth_debug > (n)) {                \
                printf("%s: ", __func__);        \
                printf(__VA_ARGS__);                \
        }                                        \
} while (/* CONSTCOND */0)

# define UNKNOWN(value)                        \
                printf("%s: %s = %d unknown!\n", __func__, #value, (value));
#else
# define DPRINTF(...) ((void)0)
# define DPRINTFN(...) ((void)0)
# define UNKNOWN(x) ((void)0)
#endif        /* BLUETOOTH_DEBUG */

#endif        /* _KERNEL */

#endif        /* _NETBT_BLUETOOTH_H_ */



































































































































































































    3 
    3 

    1 
    1 
    1 
    1 



































  427 














































  187 
  187 


  187 

  187 















  187 







  187 






  187 









  187 































































































































































































  187 

































  774 









  161 

  161 
  690 


   31 




   31 
   31 




  773 
  191 
  775 
  775 
  774 

  775 



  775 










  767 
  766 
  765 




































  183 

  763 

  761 
















  762 














 1547 



 1544 




 1519 

    1 












































































































































 1589 







































 1586 
   56 








 1579 










 1371 











 1395 


 1331 


 1115 



   58 































 1527 





 1529 























































 1430 
 1429 














 1397 

   73 




 1355 
   90 


 1341 


 1397 

   90 



 1397 









 1181 
  428 





  425 









 1020 







  843 







   76 










  811 
  111 

   89 









 1368 


 1308 

 1313 


















  178 





 1395 













  811 







  808 
    4 





  811 


 1077 




   66 













 1396 












    5 


    3 








    3 
    5 







 1397 
 1396 















  775 





  774 









    1 






    1 




















 1397 







  810 








 1396 


 1396 



 1396 
 1388 







  495 






  463 








  463 
  462 

   50 



 1394 
   50 



 1395 
























   50 
   50 
   50 






   50 








   50 







   50 

   50 


















  186 







  187 





















  186 





























  187 



  187 
  186 






  187 


















  187 

  161 

   76 






























































































  161 

  161 









  161 


  161 


  161 
  161 


























   13 

  187 












  186 






  186 
  187 
  186 

  186 









  187 
   18 









































  186 






  187 





  187 






















   18 












  171 





  168 

















   33 





   33 









   15 






   15 






















 1305 


















 1073 

















 1299 

 1299 











  662 

 1073 
 1075 


























 1290 

 1291 











 1290 
 1290 

 1066 


 1286 


































 1080 











 1078 
   13 




 1070 
 1070 


 1070 









 1062 
 1062 






 1059 


 1012 




 1067 
 1068 












  699 













  701 








  700 









  701 
  701 

  701 
  699 


  701 
   50 

  701 
  701 

















   33 
















   33 














   33 











   12 










   12 















    3 


   20 



   20 

   20 












   20 
   20 
    3 









    3 
   20 













   20 



    2 











   20 



























 1037 

   18 
 1023 


 1036 






 1037 



 1037 
 1036 














































































  689 












  691 
  691 






  690 









  677 






   20 


















  659 










  678 













 1275 















  686 

 1278 

 1277 






 1278 
 1276 










 1275 






 1278 

  995 
 1278 
  543 



























 1277 






 1275 
 1277 



















   77 















    2 
    2 
    2 


 1227 





 1186 
































   93 



   92 
   92 


   74 















    8 













   32 
















   32 
   32 



   32 





   32 
   32 









   32 
   32 


   32 







   32 


   30 
   29 

   30 

   30 





   28 
   28 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
/*        $NetBSD: uvm_fault.c,v 1.230 2022/06/03 00:21:44 dholland Exp $        */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp
 */

/*
 * uvm_fault.c: fault handler
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.230 2022/06/03 00:21:44 dholland Exp $");

#include "opt_uvmhist.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/atomic.h>
#include <sys/kernel.h>
#include <sys/mman.h>

#include <uvm/uvm.h>
#include <uvm/uvm_pdpolicy.h>

/*
 *
 * a word on page faults:
 *
 * types of page faults we handle:
 *
 * CASE 1: upper layer faults                   CASE 2: lower layer faults
 *
 *    CASE 1A         CASE 1B                  CASE 2A        CASE 2B
 *    read/write1     write>1                  read/write   +-cow_write/zero
 *         |             |                         |        |
 *      +--|--+       +--|--+     +-----+       +  |  +     | +-----+
 * amap |  V  |       |  ---------> new |          |        | |  ^  |
 *      +-----+       +-----+     +-----+       +  |  +     | +--|--+
 *                                                 |        |    |
 *      +-----+       +-----+                   +--|--+     | +--|--+
 * uobj | d/c |       | d/c |                   |  V  |     +----+  |
 *      +-----+       +-----+                   +-----+       +-----+
 *
 * d/c = don't care
 *
 *   case [0]: layerless fault
 *        no amap or uobj is present.   this is an error.
 *
 *   case [1]: upper layer fault [anon active]
 *     1A: [read] or [write with anon->an_ref == 1]
 *                I/O takes place in upper level anon and uobj is not touched.
 *     1B: [write with anon->an_ref > 1]
 *                new anon is alloc'd and data is copied off ["COW"]
 *
 *   case [2]: lower layer fault [uobj]
 *     2A: [read on non-NULL uobj] or [write to non-copy_on_write area]
 *                I/O takes place directly in object.
 *     2B: [write to copy_on_write] or [read on NULL uobj]
 *                data is "promoted" from uobj to a new anon.
 *                if uobj is null, then we zero fill.
 *
 * we follow the standard UVM locking protocol ordering:
 *
 * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ)
 * we hold a PG_BUSY page if we unlock for I/O
 *
 *
 * the code is structured as follows:
 *
 *     - init the "IN" params in the ufi structure
 *   ReFault: (ERESTART returned to the loop in uvm_fault_internal)
 *     - do lookups [locks maps], check protection, handle needs_copy
 *     - check for case 0 fault (error)
 *     - establish "range" of fault
 *     - if we have an amap lock it and extract the anons
 *     - if sequential advice deactivate pages behind us
 *     - at the same time check pmap for unmapped areas and anon for pages
 *         that we could map in (and do map it if found)
 *     - check object for resident pages that we could map in
 *     - if (case 2) goto Case2
 *     - >>> handle case 1
 *           - ensure source anon is resident in RAM
 *           - if case 1B alloc new anon and copy from source
 *           - map the correct page in
 *   Case2:
 *     - >>> handle case 2
 *           - ensure source page is resident (if uobj)
 *           - if case 2B alloc new anon and copy from source (could be zero
 *                fill if uobj == NULL)
 *           - map the correct page in
 *     - done!
 *
 * note on paging:
 *   if we have to do I/O we place a PG_BUSY page in the correct object,
 * unlock everything, and do the I/O.   when I/O is done we must reverify
 * the state of the world before assuming that our data structures are
 * valid.   [because mappings could change while the map is unlocked]
 *
 *  alternative 1: unbusy the page in question and restart the page fault
 *    from the top (ReFault).   this is easy but does not take advantage
 *    of the information that we already have from our previous lookup,
 *    although it is possible that the "hints" in the vm_map will help here.
 *
 * alternative 2: the system already keeps track of a "version" number of
 *    a map.   [i.e. every time you write-lock a map (e.g. to change a
 *    mapping) you bump the version number up by one...]   so, we can save
 *    the version number of the map before we release the lock and start I/O.
 *    then when I/O is done we can relock and check the version numbers
 *    to see if anything changed.    this might save us some over 1 because
 *    we don't have to unbusy the page and may be less compares(?).
 *
 * alternative 3: put in backpointers or a way to "hold" part of a map
 *    in place while I/O is in progress.   this could be complex to
 *    implement (especially with structures like amap that can be referenced
 *    by multiple map entries, and figuring out what should wait could be
 *    complex as well...).
 *
 * we use alternative 2.  given that we are multi-threaded now we may want
 * to reconsider the choice.
 */

/*
 * local data structures
 */

struct uvm_advice {
        int advice;
        int nback;
        int nforw;
};

/*
 * page range array:
 * note: index in array must match "advice" value
 * XXX: borrowed numbers from freebsd.   do they work well for us?
 */

static const struct uvm_advice uvmadvice[] = {
        { UVM_ADV_NORMAL, 3, 4 },
        { UVM_ADV_RANDOM, 0, 0 },
        { UVM_ADV_SEQUENTIAL, 8, 7},
};

#define UVM_MAXRANGE 16        /* must be MAX() of nback+nforw+1 */

/*
 * private prototypes
 */

/*
 * externs from other modules
 */

extern int start_init_exec;        /* Is init_main() done / init running? */

/*
 * inline functions
 */

/*
 * uvmfault_anonflush: try and deactivate pages in specified anons
 *
 * => does not have to deactivate page if it is busy
 */

static inline void
uvmfault_anonflush(struct vm_anon **anons, int n)
{
        int lcv;
        struct vm_page *pg;

        for (lcv = 0; lcv < n; lcv++) {
                if (anons[lcv] == NULL)
                        continue;
                KASSERT(rw_lock_held(anons[lcv]->an_lock));
                pg = anons[lcv]->an_page;
                if (pg && (pg->flags & PG_BUSY) == 0) {
                        uvm_pagelock(pg);
                        uvm_pagedeactivate(pg);
                        uvm_pageunlock(pg);
                }
        }
}

/*
 * normal functions
 */

/*
 * uvmfault_amapcopy: clear "needs_copy" in a map.
 *
 * => called with VM data structures unlocked (usually, see below)
 * => we get a write lock on the maps and clear needs_copy for a VA
 * => if we are out of RAM we sleep (waiting for more)
 */

static void
uvmfault_amapcopy(struct uvm_faultinfo *ufi)
{
        for (;;) {

                /*
                 * no mapping?  give up.
                 */

                if (uvmfault_lookup(ufi, true) == false)
                        return;

                /*
                 * copy if needed.
                 */

                if (UVM_ET_ISNEEDSCOPY(ufi->entry))
                        amap_copy(ufi->map, ufi->entry, AMAP_COPY_NOWAIT,
                                ufi->orig_rvaddr, ufi->orig_rvaddr + 1);

                /*
                 * didn't work?  must be out of RAM.   unlock and sleep.
                 */

                if (UVM_ET_ISNEEDSCOPY(ufi->entry)) {
                        uvmfault_unlockmaps(ufi, true);
                        uvm_wait("fltamapcopy");
                        continue;
                }

                /*
                 * got it!   unlock and return.
                 */

                uvmfault_unlockmaps(ufi, true);
                return;
        }
        /*NOTREACHED*/
}

/*
 * uvmfault_anonget: get data in an anon into a non-busy, non-released
 * page in that anon.
 *
 * => Map, amap and thus anon should be locked by caller.
 * => If we fail, we unlock everything and error is returned.
 * => If we are successful, return with everything still locked.
 * => We do not move the page on the queues [gets moved later].  If we
 *    allocate a new page [we_own], it gets put on the queues.  Either way,
 *    the result is that the page is on the queues at return time
 * => For pages which are on loan from a uvm_object (and thus are not owned
 *    by the anon): if successful, return with the owning object locked.
 *    The caller must unlock this object when it unlocks everything else.
 */

int
uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap,
    struct vm_anon *anon)
{
        struct vm_page *pg;
        krw_t lock_type;
        int error;

        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
        KASSERT(rw_lock_held(anon->an_lock));
        KASSERT(anon->an_lock == amap->am_lock);

        /* Increment the counters.*/
        cpu_count(CPU_COUNT_FLTANGET, 1);
        if (anon->an_page) {
                curlwp->l_ru.ru_minflt++;
        } else {
                curlwp->l_ru.ru_majflt++;
        }
        error = 0;

        /*
         * Loop until we get the anon data, or fail.
         */

        for (;;) {
                bool we_own, locked;
                /*
                 * Note: 'we_own' will become true if we set PG_BUSY on a page.
                 */
                we_own = false;
                pg = anon->an_page;

                /*
                 * If there is a resident page and it is loaned, then anon
                 * may not own it.  Call out to uvm_anon_lockloanpg() to
                 * identify and lock the real owner of the page.
                 */

                if (pg && pg->loan_count)
                        pg = uvm_anon_lockloanpg(anon);

                /*
                 * Is page resident?  Make sure it is not busy/released.
                 */

                lock_type = rw_lock_op(anon->an_lock);
                if (pg) {

                        /*
                         * at this point, if the page has a uobject [meaning
                         * we have it on loan], then that uobject is locked
                         * by us!   if the page is busy, we drop all the
                         * locks (including uobject) and try again.
                         */

                        if ((pg->flags & PG_BUSY) == 0) {
                                UVMHIST_LOG(maphist, "<- OK",0,0,0,0);
                                return 0;
                        }
                        cpu_count(CPU_COUNT_FLTPGWAIT, 1);

                        /*
                         * The last unlock must be an atomic unlock and wait
                         * on the owner of page.
                         */

                        if (pg->uobject) {
                                /* Owner of page is UVM object. */
                                uvmfault_unlockall(ufi, amap, NULL);
                                UVMHIST_LOG(maphist, " unlock+wait on uobj",0,
                                    0,0,0);
                                uvm_pagewait(pg, pg->uobject->vmobjlock, "anonget1");
                        } else {
                                /* Owner of page is anon. */
                                uvmfault_unlockall(ufi, NULL, NULL);
                                UVMHIST_LOG(maphist, " unlock+wait on anon",0,
                                    0,0,0);
                                uvm_pagewait(pg, anon->an_lock, "anonget2");
                        }
                } else {
#if defined(VMSWAP)
                        /*
                         * No page, therefore allocate one.  A write lock is
                         * required for this.  If the caller didn't supply
                         * one, fail now and have them retry.
                         */

                        if (lock_type == RW_READER) {
                                return ENOLCK;
                        }
                        pg = uvm_pagealloc(NULL,
                            ufi != NULL ? ufi->orig_rvaddr : 0,
                            anon, ufi != NULL ? UVM_FLAG_COLORMATCH : 0);
                        if (pg == NULL) {
                                /* Out of memory.  Wait a little. */
                                uvmfault_unlockall(ufi, amap, NULL);
                                cpu_count(CPU_COUNT_FLTNORAM, 1);
                                UVMHIST_LOG(maphist, "  noram -- UVM_WAIT",0,
                                    0,0,0);
                                if (!uvm_reclaimable()) {
                                        return ENOMEM;
                                }
                                uvm_wait("flt_noram1");
                        } else {
                                /* PG_BUSY bit is set. */
                                we_own = true;
                                uvmfault_unlockall(ufi, amap, NULL);

                                /*
                                 * Pass a PG_BUSY+PG_FAKE clean page into
                                 * the uvm_swap_get() function with all data
                                 * structures unlocked.  Note that it is OK
                                 * to read an_swslot here, because we hold
                                 * PG_BUSY on the page.
                                 */
                                cpu_count(CPU_COUNT_PAGEINS, 1);
                                error = uvm_swap_get(pg, anon->an_swslot,
                                    PGO_SYNCIO);

                                /*
                                 * We clean up after the I/O below in the
                                 * 'we_own' case.
                                 */
                        }
#else
                        panic("%s: no page", __func__);
#endif /* defined(VMSWAP) */
                }

                /*
                 * Re-lock the map and anon.
                 */

                locked = uvmfault_relock(ufi);
                if (locked || we_own) {
                        rw_enter(anon->an_lock, lock_type);
                }

                /*
                 * If we own the page (i.e. we set PG_BUSY), then we need
                 * to clean up after the I/O.  There are three cases to
                 * consider:
                 *
                 * 1) Page was released during I/O: free anon and ReFault.
                 * 2) I/O not OK.  Free the page and cause the fault to fail.
                 * 3) I/O OK!  Activate the page and sync with the non-we_own
                 *    case (i.e. drop anon lock if not locked).
                 */

                if (we_own) {
                        KASSERT(lock_type == RW_WRITER);
#if defined(VMSWAP)
                        if (error) {

                                /*
                                 * Remove the swap slot from the anon and
                                 * mark the anon as having no real slot.
                                 * Do not free the swap slot, thus preventing
                                 * it from being used again.
                                 */

                                if (anon->an_swslot > 0) {
                                        uvm_swap_markbad(anon->an_swslot, 1);
                                }
                                anon->an_swslot = SWSLOT_BAD;

                                if ((pg->flags & PG_RELEASED) != 0) {
                                        goto released;
                                }

                                /*
                                 * Note: page was never !PG_BUSY, so it
                                 * cannot be mapped and thus no need to
                                 * pmap_page_protect() it.
                                 */

                                uvm_pagefree(pg);

                                if (locked) {
                                        uvmfault_unlockall(ufi, NULL, NULL);
                                }
                                rw_exit(anon->an_lock);
                                UVMHIST_LOG(maphist, "<- ERROR", 0,0,0,0);
                                return error;
                        }

                        if ((pg->flags & PG_RELEASED) != 0) {
released:
                                KASSERT(anon->an_ref == 0);

                                /*
                                 * Released while we had unlocked amap.
                                 */

                                if (locked) {
                                        uvmfault_unlockall(ufi, NULL, NULL);
                                }
                                uvm_anon_release(anon);

                                if (error) {
                                        UVMHIST_LOG(maphist,
                                            "<- ERROR/RELEASED", 0,0,0,0);
                                        return error;
                                }

                                UVMHIST_LOG(maphist, "<- RELEASED", 0,0,0,0);
                                return ERESTART;
                        }

                        /*
                         * We have successfully read the page, activate it.
                         */

                        uvm_pagelock(pg);
                        uvm_pageactivate(pg);
                        uvm_pagewakeup(pg);
                        uvm_pageunlock(pg);
                        pg->flags &= ~(PG_BUSY|PG_FAKE);
                        uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_UNKNOWN);
                        UVM_PAGE_OWN(pg, NULL);
#else
                        panic("%s: we_own", __func__);
#endif /* defined(VMSWAP) */
                }

                /*
                 * We were not able to re-lock the map - restart the fault.
                 */

                if (!locked) {
                        if (we_own) {
                                rw_exit(anon->an_lock);
                        }
                        UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0);
                        return ERESTART;
                }

                /*
                 * Verify that no one has touched the amap and moved
                 * the anon on us.
                 */

                if (ufi != NULL && amap_lookup(&ufi->entry->aref,
                    ufi->orig_rvaddr - ufi->entry->start) != anon) {

                        uvmfault_unlockall(ufi, amap, NULL);
                        UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0);
                        return ERESTART;
                }

                /*
                 * Retry..
                 */

                cpu_count(CPU_COUNT_FLTANRETRY, 1);
                continue;
        }
        /*NOTREACHED*/
}

/*
 * uvmfault_promote: promote data to a new anon.  used for 1B and 2B.
 *
 *        1. allocate an anon and a page.
 *        2. fill its contents.
 *        3. put it into amap.
 *
 * => if we fail (result != 0) we unlock everything.
 * => on success, return a new locked anon via 'nanon'.
 *    (*nanon)->an_page will be a resident, locked, dirty page.
 * => it's caller's responsibility to put the promoted nanon->an_page to the
 *    page queue.
 */

static int
uvmfault_promote(struct uvm_faultinfo *ufi,
    struct vm_anon *oanon,
    struct vm_page *uobjpage,
    struct vm_anon **nanon, /* OUT: allocated anon */
    struct vm_anon **spare)
{
        struct vm_amap *amap = ufi->entry->aref.ar_amap;
        struct uvm_object *uobj;
        struct vm_anon *anon;
        struct vm_page *pg;
        struct vm_page *opg;
        int error;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        if (oanon) {
                /* anon COW */
                opg = oanon->an_page;
                KASSERT(opg != NULL);
                KASSERT(opg->uobject == NULL || opg->loan_count > 0);
        } else if (uobjpage != PGO_DONTCARE) {
                /* object-backed COW */
                opg = uobjpage;
                KASSERT(rw_lock_held(opg->uobject->vmobjlock));
        } else {
                /* ZFOD */
                opg = NULL;
        }
        if (opg != NULL) {
                uobj = opg->uobject;
        } else {
                uobj = NULL;
        }

        KASSERT(amap != NULL);
        KASSERT(uobjpage != NULL);
        KASSERT(rw_write_held(amap->am_lock));
        KASSERT(oanon == NULL || amap->am_lock == oanon->an_lock);
        KASSERT(uobj == NULL || rw_lock_held(uobj->vmobjlock));

        if (*spare != NULL) {
                anon = *spare;
                *spare = NULL;
        } else {
                anon = uvm_analloc();
        }
        if (anon) {

                /*
                 * The new anon is locked.
                 *
                 * if opg == NULL, we want a zero'd, dirty page,
                 * so have uvm_pagealloc() do that for us.
                 */

                KASSERT(anon->an_lock == NULL);
                anon->an_lock = amap->am_lock;
                pg = uvm_pagealloc(NULL, ufi->orig_rvaddr, anon,
                    UVM_FLAG_COLORMATCH | (opg == NULL ? UVM_PGA_ZERO : 0));
                if (pg == NULL) {
                        anon->an_lock = NULL;
                }
        } else {
                pg = NULL;
        }

        /*
         * out of memory resources?
         */

        if (pg == NULL) {
                /* save anon for the next try. */
                if (anon != NULL) {
                        *spare = anon;
                }

                /* unlock and fail ... */
                uvmfault_unlockall(ufi, amap, uobj);
                if (!uvm_reclaimable()) {
                        UVMHIST_LOG(maphist, "out of VM", 0,0,0,0);
                        cpu_count(CPU_COUNT_FLTNOANON, 1);
                        error = ENOMEM;
                        goto done;
                }

                UVMHIST_LOG(maphist, "out of RAM, waiting for more", 0,0,0,0);
                cpu_count(CPU_COUNT_FLTNORAM, 1);
                uvm_wait("flt_noram5");
                error = ERESTART;
                goto done;
        }

        /* copy page [pg now dirty] */
        if (opg) {
                uvm_pagecopy(opg, pg);
        }
        KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY);

        amap_add(&ufi->entry->aref, ufi->orig_rvaddr - ufi->entry->start, anon,
            oanon != NULL);

        /*
         * from this point on am_lock won't be dropped until the page is
         * entered, so it's safe to unbusy the page up front.
         *
         * uvm_fault_{upper,lower}_done will activate or enqueue the page.
         */

        pg = anon->an_page;
        pg->flags &= ~(PG_BUSY|PG_FAKE);
        UVM_PAGE_OWN(pg, NULL);

        *nanon = anon;
        error = 0;
done:
        return error;
}

/*
 * Update statistics after fault resolution.
 * - maxrss
 */
void
uvmfault_update_stats(struct uvm_faultinfo *ufi)
{
        struct vm_map                *map;
        struct vmspace                 *vm;
        struct proc                *p;
        vsize_t                         res;

        map = ufi->orig_map;

        p = curproc;
        KASSERT(p != NULL);
        vm = p->p_vmspace;

        if (&vm->vm_map != map)
                return;

        res = pmap_resident_count(map->pmap);
        if (vm->vm_rssmax < res)
                vm->vm_rssmax = res;
}

/*
 *   F A U L T   -   m a i n   e n t r y   p o i n t
 */

/*
 * uvm_fault: page fault handler
 *
 * => called from MD code to resolve a page fault
 * => VM data structures usually should be unlocked.   however, it is
 *        possible to call here with the main map locked if the caller
 *        gets a write lock, sets it recursive, and then calls us (c.f.
 *        uvm_map_pageable).   this should be avoided because it keeps
 *        the map locked off during I/O.
 * => MUST NEVER BE CALLED IN INTERRUPT CONTEXT
 */

#define MASK(entry)     (UVM_ET_ISCOPYONWRITE(entry) ? \
                         ~VM_PROT_WRITE : VM_PROT_ALL)

/* fault_flag values passed from uvm_fault_wire to uvm_fault_internal */
#define UVM_FAULT_WIRE                (1 << 0)
#define UVM_FAULT_MAXPROT        (1 << 1)

struct uvm_faultctx {

        /*
         * the following members are set up by uvm_fault_check() and
         * read-only after that.
         *
         * note that narrow is used by uvm_fault_check() to change
         * the behaviour after ERESTART.
         *
         * most of them might change after RESTART if the underlying
         * map entry has been changed behind us.  an exception is
         * wire_paging, which does never change.
         */
        vm_prot_t access_type;
        vaddr_t startva;
        int npages;
        int centeridx;
        bool narrow;                /* work on a single requested page only */
        bool wire_mapping;        /* request a PMAP_WIRED mapping
                                   (UVM_FAULT_WIRE or VM_MAPENT_ISWIRED) */
        bool wire_paging;        /* request uvm_pagewire
                                   (true for UVM_FAULT_WIRE) */
        bool cow_now;                /* VM_PROT_WRITE is actually requested
                                   (ie. should break COW and page loaning) */

        /*
         * enter_prot is set up by uvm_fault_check() and clamped
         * (ie. drop the VM_PROT_WRITE bit) in various places in case
         * of !cow_now.
         */
        vm_prot_t enter_prot;        /* prot at which we want to enter pages in */

        /*
         * the following member is for uvmfault_promote() and ERESTART.
         */
        struct vm_anon *anon_spare;

        /*
         * the following is actually a uvm_fault_lower() internal.
         * it's here merely for debugging.
         * (or due to the mechanical separation of the function?)
         */
        bool promote;

        /*
         * type of lock to acquire on objects in both layers.
         */
        krw_t lower_lock_type;
        krw_t upper_lock_type;
};

static inline int        uvm_fault_check(
                            struct uvm_faultinfo *, struct uvm_faultctx *,
                            struct vm_anon ***, bool);

static int                uvm_fault_upper(
                            struct uvm_faultinfo *, struct uvm_faultctx *,
                            struct vm_anon **);
static inline int        uvm_fault_upper_lookup(
                            struct uvm_faultinfo *, const struct uvm_faultctx *,
                            struct vm_anon **, struct vm_page **);
static inline void        uvm_fault_upper_neighbor(
                            struct uvm_faultinfo *, const struct uvm_faultctx *,
                            vaddr_t, struct vm_page *, bool);
static inline int        uvm_fault_upper_loan(
                            struct uvm_faultinfo *, struct uvm_faultctx *,
                            struct vm_anon *, struct uvm_object **);
static inline int        uvm_fault_upper_promote(
                            struct uvm_faultinfo *, struct uvm_faultctx *,
                            struct uvm_object *, struct vm_anon *);
static inline int        uvm_fault_upper_direct(
                            struct uvm_faultinfo *, struct uvm_faultctx *,
                            struct uvm_object *, struct vm_anon *);
static int                uvm_fault_upper_enter(
                            struct uvm_faultinfo *, const struct uvm_faultctx *,
                            struct uvm_object *, struct vm_anon *,
                            struct vm_page *, struct vm_anon *);
static inline void        uvm_fault_upper_done(
                            struct uvm_faultinfo *, const struct uvm_faultctx *,
                            struct vm_anon *, struct vm_page *);

static int                uvm_fault_lower(
                            struct uvm_faultinfo *, struct uvm_faultctx *,
                            struct vm_page **);
static inline void        uvm_fault_lower_lookup(
                            struct uvm_faultinfo *, const struct uvm_faultctx *,
                            struct vm_page **);
static inline void        uvm_fault_lower_neighbor(
                            struct uvm_faultinfo *, const struct uvm_faultctx *,
                            vaddr_t, struct vm_page *);
static inline int        uvm_fault_lower_io(
                            struct uvm_faultinfo *, struct uvm_faultctx *,
                            struct uvm_object **, struct vm_page **);
static inline int        uvm_fault_lower_direct(
                            struct uvm_faultinfo *, struct uvm_faultctx *,
                            struct uvm_object *, struct vm_page *);
static inline int        uvm_fault_lower_direct_loan(
                            struct uvm_faultinfo *, struct uvm_faultctx *,
                            struct uvm_object *, struct vm_page **,
                            struct vm_page **);
static inline int        uvm_fault_lower_promote(
                            struct uvm_faultinfo *, struct uvm_faultctx *,
                            struct uvm_object *, struct vm_page *);
static int                uvm_fault_lower_enter(
                            struct uvm_faultinfo *, const struct uvm_faultctx *,
                            struct uvm_object *,
                            struct vm_anon *, struct vm_page *);
static inline void        uvm_fault_lower_done(
                            struct uvm_faultinfo *, const struct uvm_faultctx *,
                            struct uvm_object *, struct vm_page *);

int
uvm_fault_internal(struct vm_map *orig_map, vaddr_t vaddr,
    vm_prot_t access_type, int fault_flag)
{
        struct uvm_faultinfo ufi;
        struct uvm_faultctx flt = {
                .access_type = access_type,

                /* don't look for neighborhood * pages on "wire" fault */
                .narrow = (fault_flag & UVM_FAULT_WIRE) != 0,

                /* "wire" fault causes wiring of both mapping and paging */
                .wire_mapping = (fault_flag & UVM_FAULT_WIRE) != 0,
                .wire_paging = (fault_flag & UVM_FAULT_WIRE) != 0,

                /*
                 * default lock type to acquire on upper & lower layer
                 * objects: reader.  this can be upgraded at any point
                 * during the fault from read -> write and uvm_faultctx
                 * changed to match, but is never downgraded write -> read.
                 */
#ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
                .upper_lock_type = RW_WRITER,
                .lower_lock_type = RW_WRITER,
#else
                .upper_lock_type = RW_READER,
                .lower_lock_type = RW_READER,
#endif
        };
        const bool maxprot = (fault_flag & UVM_FAULT_MAXPROT) != 0;
        struct vm_anon *anons_store[UVM_MAXRANGE], **anons;
        struct vm_page *pages_store[UVM_MAXRANGE], **pages;
        int error;

        UVMHIST_FUNC(__func__);
        UVMHIST_CALLARGS(maphist, "(map=%#jx, vaddr=%#jx, at=%jd, ff=%jd)",
              (uintptr_t)orig_map, vaddr, access_type, fault_flag);

        /* Don't count anything until user interaction is possible */
        kpreempt_disable();
        if (__predict_true(start_init_exec)) {
                struct cpu_info *ci = curcpu();
                CPU_COUNT(CPU_COUNT_NFAULT, 1);
                /* Don't flood RNG subsystem with samples. */
                if (++(ci->ci_faultrng) == 503) {
                        ci->ci_faultrng = 0;
                        rnd_add_uint32(&curcpu()->ci_data.cpu_uvm->rs,
                            sizeof(vaddr_t) == sizeof(uint32_t) ?
                            (uint32_t)vaddr : sizeof(vaddr_t) ==
                            sizeof(uint64_t) ?
                            (uint32_t)vaddr :
                            (uint32_t)ci->ci_counts[CPU_COUNT_NFAULT]);
                }
        }
        kpreempt_enable();

        /*
         * init the IN parameters in the ufi
         */

        ufi.orig_map = orig_map;
        ufi.orig_rvaddr = trunc_page(vaddr);
        ufi.orig_size = PAGE_SIZE;        /* can't get any smaller than this */

        error = ERESTART;
        while (error == ERESTART) { /* ReFault: */
                anons = anons_store;
                pages = pages_store;

                error = uvm_fault_check(&ufi, &flt, &anons, maxprot);
                if (error != 0)
                        continue;

                error = uvm_fault_upper_lookup(&ufi, &flt, anons, pages);
                if (error != 0)
                        continue;

                if (pages[flt.centeridx] == PGO_DONTCARE)
                        error = uvm_fault_upper(&ufi, &flt, anons);
                else {
                        struct uvm_object * const uobj =
                            ufi.entry->object.uvm_obj;

                        if (uobj && uobj->pgops->pgo_fault != NULL) {
                                /*
                                 * invoke "special" fault routine.
                                 */
                                rw_enter(uobj->vmobjlock, RW_WRITER);
                                /* locked: maps(read), amap(if there), uobj */
                                error = uobj->pgops->pgo_fault(&ufi,
                                    flt.startva, pages, flt.npages,
                                    flt.centeridx, flt.access_type,
                                    PGO_LOCKED|PGO_SYNCIO);

                                /*
                                 * locked: nothing, pgo_fault has unlocked
                                 * everything
                                 */

                                /*
                                 * object fault routine responsible for
                                 * pmap_update().
                                 */

                                /*
                                 * Wake up the pagedaemon if the fault method
                                 * failed for lack of memory but some can be
                                 * reclaimed.
                                 */
                                if (error == ENOMEM && uvm_reclaimable()) {
                                        uvm_wait("pgo_fault");
                                        error = ERESTART;
                                }
                        } else {
                                error = uvm_fault_lower(&ufi, &flt, pages);
                        }
                }
        }

        if (flt.anon_spare != NULL) {
                flt.anon_spare->an_ref--;
                KASSERT(flt.anon_spare->an_ref == 0);
                KASSERT(flt.anon_spare->an_lock == NULL);
                uvm_anfree(flt.anon_spare);
        }
        return error;
}

/*
 * uvm_fault_check: check prot, handle needs-copy, etc.
 *
 *        1. lookup entry.
 *        2. check protection.
 *        3. adjust fault condition (mainly for simulated fault).
 *        4. handle needs-copy (lazy amap copy).
 *        5. establish range of interest for neighbor fault (aka pre-fault).
 *        6. look up anons (if amap exists).
 *        7. flush pages (if MADV_SEQUENTIAL)
 *
 * => called with nothing locked.
 * => if we fail (result != 0) we unlock everything.
 * => initialize/adjust many members of flt.
 */

static int
uvm_fault_check(
        struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
        struct vm_anon ***ranons, bool maxprot)
{
        struct vm_amap *amap;
        struct uvm_object *uobj;
        vm_prot_t check_prot;
        int nback, nforw;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        /*
         * lookup and lock the maps
         */

        if (uvmfault_lookup(ufi, false) == false) {
                UVMHIST_LOG(maphist, "<- no mapping @ %#jx", ufi->orig_rvaddr,
                    0,0,0);
                return EFAULT;
        }
        /* locked: maps(read) */

#ifdef DIAGNOSTIC
        if ((ufi->map->flags & VM_MAP_PAGEABLE) == 0) {
                printf("Page fault on non-pageable map:\n");
                printf("ufi->map = %p\n", ufi->map);
                printf("ufi->orig_map = %p\n", ufi->orig_map);
                printf("ufi->orig_rvaddr = %#lx\n", (u_long) ufi->orig_rvaddr);
                panic("uvm_fault: (ufi->map->flags & VM_MAP_PAGEABLE) == 0");
        }
#endif

        /*
         * check protection
         */

        check_prot = maxprot ?
            ufi->entry->max_protection : ufi->entry->protection;
        if ((check_prot & flt->access_type) != flt->access_type) {
                UVMHIST_LOG(maphist,
                    "<- protection failure (prot=%#jx, access=%#jx)",
                    ufi->entry->protection, flt->access_type, 0, 0);
                uvmfault_unlockmaps(ufi, false);
                return EFAULT;
        }

        /*
         * "enter_prot" is the protection we want to enter the page in at.
         * for certain pages (e.g. copy-on-write pages) this protection can
         * be more strict than ufi->entry->protection.  "wired" means either
         * the entry is wired or we are fault-wiring the pg.
         */

        flt->enter_prot = ufi->entry->protection;
        if (VM_MAPENT_ISWIRED(ufi->entry)) {
                flt->wire_mapping = true;
                flt->wire_paging = true;
                flt->narrow = true;
        }

        if (flt->wire_mapping) {
                flt->access_type = flt->enter_prot; /* full access for wired */
                flt->cow_now = (check_prot & VM_PROT_WRITE) != 0;
        } else {
                flt->cow_now = (flt->access_type & VM_PROT_WRITE) != 0;
        }

        if (flt->wire_paging) {
                /* wiring pages requires a write lock. */
                flt->upper_lock_type = RW_WRITER;
                flt->lower_lock_type = RW_WRITER;
        }

        flt->promote = false;

        /*
         * handle "needs_copy" case.   if we need to copy the amap we will
         * have to drop our readlock and relock it with a write lock.  (we
         * need a write lock to change anything in a map entry [e.g.
         * needs_copy]).
         */

        if (UVM_ET_ISNEEDSCOPY(ufi->entry)) {
                if (flt->cow_now || (ufi->entry->object.uvm_obj == NULL)) {
                        KASSERT(!maxprot);
                        /* need to clear */
                        UVMHIST_LOG(maphist,
                            "  need to clear needs_copy and refault",0,0,0,0);
                        uvmfault_unlockmaps(ufi, false);
                        uvmfault_amapcopy(ufi);
                        cpu_count(CPU_COUNT_FLTAMCOPY, 1);
                        return ERESTART;

                } else {

                        /*
                         * ensure that we pmap_enter page R/O since
                         * needs_copy is still true
                         */

                        flt->enter_prot &= ~VM_PROT_WRITE;
                }
        }

        /*
         * identify the players
         */

        amap = ufi->entry->aref.ar_amap;        /* upper layer */
        uobj = ufi->entry->object.uvm_obj;        /* lower layer */

        /*
         * check for a case 0 fault.  if nothing backing the entry then
         * error now.
         */

        if (amap == NULL && uobj == NULL) {
                uvmfault_unlockmaps(ufi, false);
                UVMHIST_LOG(maphist,"<- no backing store, no overlay",0,0,0,0);
                return EFAULT;
        }

        /*
         * for a case 2B fault waste no time on adjacent pages because
         * they are likely already entered.
         */

        if (uobj != NULL && amap != NULL &&
            (flt->access_type & VM_PROT_WRITE) != 0) {
                /* wide fault (!narrow) */
                flt->narrow = true;
        }

        /*
         * establish range of interest based on advice from mapper
         * and then clip to fit map entry.   note that we only want
         * to do this the first time through the fault.   if we
         * ReFault we will disable this by setting "narrow" to true.
         */

        if (flt->narrow == false) {

                /* wide fault (!narrow) */
                KASSERT(uvmadvice[ufi->entry->advice].advice ==
                         ufi->entry->advice);
                nback = MIN(uvmadvice[ufi->entry->advice].nback,
                    (ufi->orig_rvaddr - ufi->entry->start) >> PAGE_SHIFT);
                flt->startva = ufi->orig_rvaddr - (nback << PAGE_SHIFT);
                /*
                 * note: "-1" because we don't want to count the
                 * faulting page as forw
                 */
                nforw = MIN(uvmadvice[ufi->entry->advice].nforw,
                            ((ufi->entry->end - ufi->orig_rvaddr) >>
                             PAGE_SHIFT) - 1);
                flt->npages = nback + nforw + 1;
                flt->centeridx = nback;

                flt->narrow = true;        /* ensure only once per-fault */

        } else {

                /* narrow fault! */
                nback = nforw = 0;
                flt->startva = ufi->orig_rvaddr;
                flt->npages = 1;
                flt->centeridx = 0;

        }
        /* offset from entry's start to pgs' start */
        const voff_t eoff = flt->startva - ufi->entry->start;

        /* locked: maps(read) */
        UVMHIST_LOG(maphist, "  narrow=%jd, back=%jd, forw=%jd, startva=%#jx",
                    flt->narrow, nback, nforw, flt->startva);
        UVMHIST_LOG(maphist, "  entry=%#jx, amap=%#jx, obj=%#jx",
            (uintptr_t)ufi->entry, (uintptr_t)amap, (uintptr_t)uobj, 0);

        /*
         * guess at the most suitable lock types to acquire.
         * if we've got an amap then lock it and extract current anons.
         */

        if (amap) {
                if ((amap_flags(amap) & AMAP_SHARED) == 0) {
                        /*
                         * the amap isn't shared.  get a writer lock to
                         * avoid the cost of upgrading the lock later if
                         * needed.
                         *
                         * XXX nice for PostgreSQL, but consider threads.
                         */
                        flt->upper_lock_type = RW_WRITER;
                } else if ((flt->access_type & VM_PROT_WRITE) != 0) {
                        /*
                         * assume we're about to COW.
                         */
                        flt->upper_lock_type = RW_WRITER;
                }
                amap_lock(amap, flt->upper_lock_type);
                amap_lookups(&ufi->entry->aref, eoff, *ranons, flt->npages);
        } else {
                if ((flt->access_type & VM_PROT_WRITE) != 0) {
                        /*
                         * we are about to dirty the object and that
                         * requires a write lock.
                         */
                        flt->lower_lock_type = RW_WRITER;
                }
                *ranons = NULL;        /* to be safe */
        }

        /* locked: maps(read), amap(if there) */
        KASSERT(amap == NULL ||
            rw_lock_op(amap->am_lock) == flt->upper_lock_type);

        /*
         * for MADV_SEQUENTIAL mappings we want to deactivate the back pages
         * now and then forget about them (for the rest of the fault).
         */

        if (ufi->entry->advice == MADV_SEQUENTIAL && nback != 0) {

                UVMHIST_LOG(maphist, "  MADV_SEQUENTIAL: flushing backpages",
                    0,0,0,0);
                /* flush back-page anons? */
                if (amap)
                        uvmfault_anonflush(*ranons, nback);

                /*
                 * flush object?  change lock type to RW_WRITER, to avoid
                 * excessive competition between read/write locks if many
                 * threads doing "sequential access".
                 */
                if (uobj) {
                        voff_t uoff;

                        flt->lower_lock_type = RW_WRITER;
                        uoff = ufi->entry->offset + eoff;
                        rw_enter(uobj->vmobjlock, RW_WRITER);
                        (void) (uobj->pgops->pgo_put)(uobj, uoff, uoff +
                                    (nback << PAGE_SHIFT), PGO_DEACTIVATE);
                }

                /* now forget about the backpages */
                if (amap)
                        *ranons += nback;
                flt->startva += (nback << PAGE_SHIFT);
                flt->npages -= nback;
                flt->centeridx = 0;
        }
        /*
         * => startva is fixed
         * => npages is fixed
         */
        KASSERT(flt->startva <= ufi->orig_rvaddr);
        KASSERT(ufi->orig_rvaddr + ufi->orig_size <=
            flt->startva + (flt->npages << PAGE_SHIFT));
        return 0;
}

/*
 * uvm_fault_upper_upgrade: upgrade upper lock, reader -> writer
 */

static inline int
uvm_fault_upper_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
    struct vm_amap *amap, struct uvm_object *uobj)
{
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        KASSERT(amap != NULL);
        KASSERT(flt->upper_lock_type == rw_lock_op(amap->am_lock));

        /*
         * fast path.
         */

        if (__predict_true(flt->upper_lock_type == RW_WRITER)) {
                return 0;
        }

        /*
         * otherwise try for the upgrade.  if we don't get it, unlock
         * everything, restart the fault and next time around get a writer
         * lock.
         */

        flt->upper_lock_type = RW_WRITER;
        if (__predict_false(!rw_tryupgrade(amap->am_lock))) {
                uvmfault_unlockall(ufi, amap, uobj);
                cpu_count(CPU_COUNT_FLTNOUP, 1);
                UVMHIST_LOG(maphist, "  !upgrade upper", 0, 0,0,0);
                return ERESTART;
        }
        cpu_count(CPU_COUNT_FLTUP, 1);
        KASSERT(flt->upper_lock_type == rw_lock_op(amap->am_lock));
        return 0;
}

/*
 * uvm_fault_upper_lookup: look up existing h/w mapping and amap.
 *
 * iterate range of interest:
 *        1. check if h/w mapping exists.  if yes, we don't care
 *        2. check if anon exists.  if not, page is lower.
 *        3. if anon exists, enter h/w mapping for neighbors.
 *
 * => called with amap locked (if exists).
 */

static int
uvm_fault_upper_lookup(
        struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
        struct vm_anon **anons, struct vm_page **pages)
{
        struct vm_amap *amap = ufi->entry->aref.ar_amap;
        int lcv;
        vaddr_t currva;
        bool shadowed __unused;
        bool entered;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        /* locked: maps(read), amap(if there) */
        KASSERT(amap == NULL ||
            rw_lock_op(amap->am_lock) == flt->upper_lock_type);

        /*
         * map in the backpages and frontpages we found in the amap in hopes
         * of preventing future faults.    we also init the pages[] array as
         * we go.
         */

        currva = flt->startva;
        shadowed = false;
        entered = false;
        for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE) {
                /*
                 * unmapped or center page.   check if any anon at this level.
                 */
                if (amap == NULL || anons[lcv] == NULL) {
                        pages[lcv] = NULL;
                        continue;
                }

                /*
                 * check for present page and map if possible.
                 */

                pages[lcv] = PGO_DONTCARE;
                if (lcv == flt->centeridx) {        /* save center for later! */
                        shadowed = true;
                        continue;
                }

                struct vm_anon *anon = anons[lcv];
                struct vm_page *pg = anon->an_page;

                KASSERT(anon->an_lock == amap->am_lock);

                /*
                 * ignore loaned and busy pages.
                 * don't play with VAs that are already mapped.
                 */

                if (pg && pg->loan_count == 0 && (pg->flags & PG_BUSY) == 0 &&
                    !pmap_extract(ufi->orig_map->pmap, currva, NULL)) {
                        uvm_fault_upper_neighbor(ufi, flt, currva,
                            pg, anon->an_ref > 1);
                        entered = true;
                }
        }
        if (entered) {
                pmap_update(ufi->orig_map->pmap);
        }

        /* locked: maps(read), amap(if there) */
        KASSERT(amap == NULL ||
            rw_lock_op(amap->am_lock) == flt->upper_lock_type);
        /* (shadowed == true) if there is an anon at the faulting address */
        UVMHIST_LOG(maphist, "  shadowed=%jd, will_get=%jd", shadowed,
            (ufi->entry->object.uvm_obj && shadowed != false),0,0);

        return 0;
}

/*
 * uvm_fault_upper_neighbor: enter single upper neighbor page.
 *
 * => called with amap and anon locked.
 */

static void
uvm_fault_upper_neighbor(
        struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
        vaddr_t currva, struct vm_page *pg, bool readonly)
{
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        /* locked: amap, anon */

        KASSERT(pg->uobject == NULL);
        KASSERT(pg->uanon != NULL);
        KASSERT(rw_lock_op(pg->uanon->an_lock) == flt->upper_lock_type);
        KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN);

        /*
         * there wasn't a direct fault on the page, so avoid the cost of
         * activating it.
         */

        if (!uvmpdpol_pageisqueued_p(pg) && pg->wire_count == 0) {
                uvm_pagelock(pg);
                uvm_pageenqueue(pg);
                uvm_pageunlock(pg);
        }

        UVMHIST_LOG(maphist,
            "  MAPPING: n anon: pm=%#jx, va=%#jx, pg=%#jx",
            (uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0);
        cpu_count(CPU_COUNT_FLTNAMAP, 1);

        /*
         * Since this page isn't the page that's actually faulting,
         * ignore pmap_enter() failures; it's not critical that we
         * enter these right now.
         */

        (void) pmap_enter(ufi->orig_map->pmap, currva,
            VM_PAGE_TO_PHYS(pg),
            readonly ? (flt->enter_prot & ~VM_PROT_WRITE) :
            flt->enter_prot,
            PMAP_CANFAIL | (flt->wire_mapping ? PMAP_WIRED : 0));
}

/*
 * uvm_fault_upper: handle upper fault.
 *
 *        1. acquire anon lock.
 *        2. get anon.  let uvmfault_anonget do the dirty work.
 *        3. handle loan.
 *        4. dispatch direct or promote handlers.
 */

static int
uvm_fault_upper(
        struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
        struct vm_anon **anons)
{
        struct vm_amap * const amap = ufi->entry->aref.ar_amap;
        struct vm_anon * const anon = anons[flt->centeridx];
        struct uvm_object *uobj;
        int error;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        /* locked: maps(read), amap, anon */
        KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type);
        KASSERT(anon->an_lock == amap->am_lock);

        /*
         * handle case 1: fault on an anon in our amap
         */

        UVMHIST_LOG(maphist, "  case 1 fault: anon=%#jx",
            (uintptr_t)anon, 0, 0, 0);

        /*
         * no matter if we have case 1A or case 1B we are going to need to
         * have the anon's memory resident.   ensure that now.
         */

        /*
         * let uvmfault_anonget do the dirty work.
         * if it fails (!OK) it will unlock everything for us.
         * if it succeeds, locks are still valid and locked.
         * also, if it is OK, then the anon's page is on the queues.
         * if the page is on loan from a uvm_object, then anonget will
         * lock that object for us if it does not fail.
         */
 retry:
        error = uvmfault_anonget(ufi, amap, anon);
        switch (error) {
        case 0:
                break;

        case ERESTART:
                return ERESTART;

        case EAGAIN:
                kpause("fltagain1", false, hz/2, NULL);
                return ERESTART;

        case ENOLCK:
                /* it needs a write lock: retry */
                error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL);
                if (error != 0) {
                        return error;
                }
                KASSERT(rw_write_held(amap->am_lock));
                goto retry;

        default:
                return error;
        }

        /*
         * uobj is non null if the page is on loan from an object (i.e. uobj)
         */

        uobj = anon->an_page->uobject;        /* locked by anonget if !NULL */

        /* locked: maps(read), amap, anon, uobj(if one) */
        KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type);
        KASSERT(anon->an_lock == amap->am_lock);
        KASSERT(uobj == NULL ||
            rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);

        /*
         * special handling for loaned pages
         */

        if (anon->an_page->loan_count) {
                error = uvm_fault_upper_loan(ufi, flt, anon, &uobj);
                if (error != 0)
                        return error;
        }

        /*
         * if we are case 1B then we will need to allocate a new blank
         * anon to transfer the data into.   note that we have a lock
         * on anon, so no one can busy or release the page until we are done.
         * also note that the ref count can't drop to zero here because
         * it is > 1 and we are only dropping one ref.
         *
         * in the (hopefully very rare) case that we are out of RAM we
         * will unlock, wait for more RAM, and refault.
         *
         * if we are out of anon VM we kill the process (XXX: could wait?).
         */

        if (flt->cow_now && anon->an_ref > 1) {
                flt->promote = true;
                error = uvm_fault_upper_promote(ufi, flt, uobj, anon);
        } else {
                error = uvm_fault_upper_direct(ufi, flt, uobj, anon);
        }
        return error;
}

/*
 * uvm_fault_upper_loan: handle loaned upper page.
 *
 *        1. if not cow'ing now, simply adjust flt->enter_prot.
 *        2. if cow'ing now, and if ref count is 1, break loan.
 */

static int
uvm_fault_upper_loan(
        struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
        struct vm_anon *anon, struct uvm_object **ruobj)
{
        struct vm_amap * const amap = ufi->entry->aref.ar_amap;
        int error = 0;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        if (!flt->cow_now) {

                /*
                 * for read faults on loaned pages we just cap the
                 * protection at read-only.
                 */

                flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE;

        } else {
                /*
                 * note that we can't allow writes into a loaned page!
                 *
                 * if we have a write fault on a loaned page in an
                 * anon then we need to look at the anon's ref count.
                 * if it is greater than one then we are going to do
                 * a normal copy-on-write fault into a new anon (this
                 * is not a problem).  however, if the reference count
                 * is one (a case where we would normally allow a
                 * write directly to the page) then we need to kill
                 * the loan before we continue.
                 */

                /* >1 case is already ok */
                if (anon->an_ref == 1) {
                        /* breaking loan requires a write lock. */
                        error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL);
                        if (error != 0) {
                                return error;
                        }
                        KASSERT(rw_write_held(amap->am_lock));

                        error = uvm_loanbreak_anon(anon, *ruobj);
                        if (error != 0) {
                                uvmfault_unlockall(ufi, amap, *ruobj);
                                uvm_wait("flt_noram2");
                                return ERESTART;
                        }
                        /* if we were a loan receiver uobj is gone */
                        if (*ruobj)
                                *ruobj = NULL;
                }
        }
        return error;
}

/*
 * uvm_fault_upper_promote: promote upper page.
 *
 *        1. call uvmfault_promote.
 *        2. enqueue page.
 *        3. deref.
 *        4. pass page to uvm_fault_upper_enter.
 */

static int
uvm_fault_upper_promote(
        struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
        struct uvm_object *uobj, struct vm_anon *anon)
{
        struct vm_amap * const amap = ufi->entry->aref.ar_amap;
        struct vm_anon * const oanon = anon;
        struct vm_page *pg;
        int error;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        UVMHIST_LOG(maphist, "  case 1B: COW fault",0,0,0,0);
        cpu_count(CPU_COUNT_FLT_ACOW, 1);

        /* promoting requires a write lock. */
        error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL);
        if (error != 0) {
                return error;
        }
        KASSERT(rw_write_held(amap->am_lock));

        error = uvmfault_promote(ufi, oanon, PGO_DONTCARE, &anon,
            &flt->anon_spare);
        switch (error) {
        case 0:
                break;
        case ERESTART:
                return ERESTART;
        default:
                return error;
        }
        pg = anon->an_page;

        KASSERT(anon->an_lock == oanon->an_lock);
        KASSERT((pg->flags & (PG_BUSY | PG_FAKE)) == 0);

        /* deref: can not drop to zero here by defn! */
        KASSERT(oanon->an_ref > 1);
        oanon->an_ref--;

        /*
         * note: oanon is still locked, as is the new anon.  we
         * need to check for this later when we unlock oanon; if
         * oanon != anon, we'll have to unlock anon, too.
         */

        return uvm_fault_upper_enter(ufi, flt, uobj, anon, pg, oanon);
}

/*
 * uvm_fault_upper_direct: handle direct fault.
 */

static int
uvm_fault_upper_direct(
        struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
        struct uvm_object *uobj, struct vm_anon *anon)
{
        struct vm_anon * const oanon = anon;
        struct vm_page *pg;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        cpu_count(CPU_COUNT_FLT_ANON, 1);
        pg = anon->an_page;
        if (anon->an_ref > 1)     /* disallow writes to ref > 1 anons */
                flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE;

        return uvm_fault_upper_enter(ufi, flt, uobj, anon, pg, oanon);
}

/*
 * uvm_fault_upper_enter: enter h/w mapping of upper page.
 */

static int
uvm_fault_upper_enter(
        struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
        struct uvm_object *uobj, struct vm_anon *anon, struct vm_page *pg,
        struct vm_anon *oanon)
{
        struct pmap *pmap = ufi->orig_map->pmap;
        vaddr_t va = ufi->orig_rvaddr;
        struct vm_amap * const amap = ufi->entry->aref.ar_amap;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        /* locked: maps(read), amap, oanon, anon(if different from oanon) */
        KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type);
        KASSERT(anon->an_lock == amap->am_lock);
        KASSERT(oanon->an_lock == amap->am_lock);
        KASSERT(uobj == NULL ||
            rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
        KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN);

        /*
         * now map the page in.
         */

        UVMHIST_LOG(maphist,
            "  MAPPING: anon: pm=%#jx, va=%#jx, pg=%#jx, promote=%jd",
            (uintptr_t)pmap, va, (uintptr_t)pg, flt->promote);
        if (pmap_enter(pmap, va, VM_PAGE_TO_PHYS(pg),
            flt->enter_prot, flt->access_type | PMAP_CANFAIL |
            (flt->wire_mapping ? PMAP_WIRED : 0)) != 0) {

                /*
                 * If pmap_enter() fails, it must not leave behind an existing
                 * pmap entry.  In particular, a now-stale entry for a different
                 * page would leave the pmap inconsistent with the vm_map.
                 * This is not to imply that pmap_enter() should remove an
                 * existing mapping in such a situation (since that could create
                 * different problems, eg. if the existing mapping is wired),
                 * but rather that the pmap should be designed such that it
                 * never needs to fail when the new mapping is replacing an
                 * existing mapping and the new page has no existing mappings.
                 *
                 * XXX This can't be asserted safely any more because many
                 * LWPs and/or many processes could simultaneously fault on
                 * the same VA and some might succeed.
                 */

                /* KASSERT(!pmap_extract(pmap, va, NULL)); */

                /*
                 * ensure that the page is queued in the case that
                 * we just promoted.
                 */

                uvm_pagelock(pg);
                uvm_pageenqueue(pg);
                uvm_pageunlock(pg);

                /*
                 * No need to undo what we did; we can simply think of
                 * this as the pmap throwing away the mapping information.
                 *
                 * We do, however, have to go through the ReFault path,
                 * as the map may change while we're asleep.
                 */

                uvmfault_unlockall(ufi, amap, uobj);
                if (!uvm_reclaimable()) {
                        UVMHIST_LOG(maphist,
                            "<- failed.  out of VM",0,0,0,0);
                        /* XXX instrumentation */
                        return ENOMEM;
                }
                /* XXX instrumentation */
                uvm_wait("flt_pmfail1");
                return ERESTART;
        }

        uvm_fault_upper_done(ufi, flt, anon, pg);

        /*
         * done case 1!  finish up by unlocking everything and returning success
         */

        pmap_update(pmap);
        uvmfault_unlockall(ufi, amap, uobj);
        return 0;
}

/*
 * uvm_fault_upper_done: queue upper center page.
 */

static void
uvm_fault_upper_done(
        struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
        struct vm_anon *anon, struct vm_page *pg)
{
        const bool wire_paging = flt->wire_paging;

        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        /*
         * ... update the page queues.
         */

        if (wire_paging) {
                uvm_pagelock(pg);
                uvm_pagewire(pg);
                uvm_pageunlock(pg);

                /*
                 * since the now-wired page cannot be paged out,
                 * release its swap resources for others to use.
                 * and since an anon with no swap cannot be clean,
                 * mark it dirty now.
                 */

                uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
                uvm_anon_dropswap(anon);
        } else if (uvmpdpol_pageactivate_p(pg)) {
                /*
                 * avoid re-activating the page unless needed,
                 * to avoid false sharing on multiprocessor.
                 */

                uvm_pagelock(pg);
                uvm_pageactivate(pg);
                uvm_pageunlock(pg);
        }
}

/*
 * uvm_fault_lower_upgrade: upgrade lower lock, reader -> writer
 */

static inline int
uvm_fault_lower_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
    struct vm_amap *amap, struct uvm_object *uobj, struct vm_page *uobjpage)
{

        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        KASSERT(uobj != NULL);
        KASSERT(flt->lower_lock_type == rw_lock_op(uobj->vmobjlock));

        /*
         * fast path.
         */

        if (__predict_true(flt->lower_lock_type == RW_WRITER)) {
                return 0;
        }

        /*
         * otherwise try for the upgrade.  if we don't get it, unlock
         * everything, restart the fault and next time around get a writer
         * lock.
         */

        flt->lower_lock_type = RW_WRITER;
        if (__predict_false(!rw_tryupgrade(uobj->vmobjlock))) {
                uvmfault_unlockall(ufi, amap, uobj);
                cpu_count(CPU_COUNT_FLTNOUP, 1);
                UVMHIST_LOG(maphist, "  !upgrade lower", 0, 0,0,0);
                return ERESTART;
        }
        cpu_count(CPU_COUNT_FLTUP, 1);
        KASSERT(flt->lower_lock_type == rw_lock_op(uobj->vmobjlock));
        return 0;
}

/*
 * uvm_fault_lower: handle lower fault.
 *
 *        1. check uobj
 *        1.1. if null, ZFOD.
 *        1.2. if not null, look up unnmapped neighbor pages.
 *        2. for center page, check if promote.
 *        2.1. ZFOD always needs promotion.
 *        2.2. other uobjs, when entry is marked COW (usually MAP_PRIVATE vnode).
 *        3. if uobj is not ZFOD and page is not found, do i/o.
 *        4. dispatch either direct / promote fault.
 */

static int
uvm_fault_lower(
        struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
        struct vm_page **pages)
{
        struct vm_amap *amap __diagused = ufi->entry->aref.ar_amap;
        struct uvm_object *uobj = ufi->entry->object.uvm_obj;
        struct vm_page *uobjpage;
        int error;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        /*
         * now, if the desired page is not shadowed by the amap and we have
         * a backing object that does not have a special fault routine, then
         * we ask (with pgo_get) the object for resident pages that we care
         * about and attempt to map them in.  we do not let pgo_get block
         * (PGO_LOCKED).
         */

        if (uobj == NULL) {
                /* zero fill; don't care neighbor pages */
                uobjpage = NULL;
        } else {
                uvm_fault_lower_lookup(ufi, flt, pages);
                uobjpage = pages[flt->centeridx];
        }

        /*
         * note that at this point we are done with any front or back pages.
         * we are now going to focus on the center page (i.e. the one we've
         * faulted on).  if we have faulted on the upper (anon) layer
         * [i.e. case 1], then the anon we want is anons[centeridx] (we have
         * not touched it yet).  if we have faulted on the bottom (uobj)
         * layer [i.e. case 2] and the page was both present and available,
         * then we've got a pointer to it as "uobjpage" and we've already
         * made it BUSY.
         */

        /*
         * locked:
         * maps(read), amap(if there), uobj(if !null), uobjpage(if !null)
         */
        KASSERT(amap == NULL ||
            rw_lock_op(amap->am_lock) == flt->upper_lock_type);
        KASSERT(uobj == NULL ||
            rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);

        /*
         * note that uobjpage can not be PGO_DONTCARE at this point.  we now
         * set uobjpage to PGO_DONTCARE if we are doing a zero fill.  if we
         * have a backing object, check and see if we are going to promote
         * the data up to an anon during the fault.
         */

        if (uobj == NULL) {
                uobjpage = PGO_DONTCARE;
                flt->promote = true;                /* always need anon here */
        } else {
                KASSERT(uobjpage != PGO_DONTCARE);
                flt->promote = flt->cow_now && UVM_ET_ISCOPYONWRITE(ufi->entry);
        }
        UVMHIST_LOG(maphist, "  case 2 fault: promote=%jd, zfill=%jd",
            flt->promote, (uobj == NULL), 0,0);

        /*
         * if uobjpage is not null then we do not need to do I/O to get the
         * uobjpage.
         *
         * if uobjpage is null, then we need to unlock and ask the pager to
         * get the data for us.   once we have the data, we need to reverify
         * the state the world.   we are currently not holding any resources.
         */

        if (uobjpage) {
                /* update rusage counters */
                curlwp->l_ru.ru_minflt++;
        } else {
                error = uvm_fault_lower_io(ufi, flt, &uobj, &uobjpage);
                if (error != 0)
                        return error;
        }

        /*
         * locked:
         * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj)
         */
        KASSERT(amap == NULL ||
            rw_lock_op(amap->am_lock) == flt->upper_lock_type);
        KASSERT(uobj == NULL ||
            rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);

        /*
         * notes:
         *  - at this point uobjpage can not be NULL
         *  - at this point uobjpage can not be PG_RELEASED (since we checked
         *  for it above)
         *  - at this point uobjpage could be waited on (handle later)
         *  - uobjpage can be from a different object if tmpfs (vnode vs UAO)
         */

        KASSERT(uobjpage != NULL);
        KASSERT(uobj == NULL ||
            uobjpage->uobject->vmobjlock == uobj->vmobjlock);
        KASSERT(uobj == NULL || !UVM_OBJ_IS_CLEAN(uobjpage->uobject) ||
            uvm_pagegetdirty(uobjpage) == UVM_PAGE_STATUS_CLEAN);

        if (!flt->promote) {
                error = uvm_fault_lower_direct(ufi, flt, uobj, uobjpage);
        } else {
                error = uvm_fault_lower_promote(ufi, flt, uobj, uobjpage);
        }
        return error;
}

/*
 * uvm_fault_lower_lookup: look up on-memory uobj pages.
 *
 *        1. get on-memory pages.
 *        2. if failed, give up (get only center page later).
 *        3. if succeeded, enter h/w mapping of neighbor pages.
 */

static void
uvm_fault_lower_lookup(
        struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
        struct vm_page **pages)
{
        struct uvm_object *uobj = ufi->entry->object.uvm_obj;
        int lcv, gotpages;
        vaddr_t currva;
        bool entered;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        rw_enter(uobj->vmobjlock, flt->lower_lock_type);

        /*
         * Locked: maps(read), amap(if there), uobj
         */

        cpu_count(CPU_COUNT_FLTLGET, 1);
        gotpages = flt->npages;
        (void) uobj->pgops->pgo_get(uobj,
            ufi->entry->offset + flt->startva - ufi->entry->start,
            pages, &gotpages, flt->centeridx,
            flt->access_type & MASK(ufi->entry), ufi->entry->advice,
            PGO_LOCKED);

        KASSERT(rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);

        /*
         * check for pages to map, if we got any
         */

        if (gotpages == 0) {
                pages[flt->centeridx] = NULL;
                return;
        }

        entered = false;
        currva = flt->startva;
        for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE) {
                struct vm_page *curpg;

                curpg = pages[lcv];
                if (curpg == NULL || curpg == PGO_DONTCARE) {
                        continue;
                }

                /*
                 * in the case of tmpfs, the pages might be from a different
                 * uvm_object.  just make sure that they have the same lock.
                 */

                KASSERT(curpg->uobject->vmobjlock == uobj->vmobjlock);
                KASSERT((curpg->flags & PG_BUSY) == 0);

                /*
                 * leave the centre page for later.  don't screw with
                 * existing mappings (needless & expensive).
                 */

                if (lcv == flt->centeridx) {
                        UVMHIST_LOG(maphist, "  got uobjpage (%#jx) "
                            "with locked get", (uintptr_t)curpg, 0, 0, 0);
                } else if (!pmap_extract(ufi->orig_map->pmap, currva, NULL)) {
                        uvm_fault_lower_neighbor(ufi, flt, currva, curpg);
                        entered = true;
                }
        }
        if (entered) {
                pmap_update(ufi->orig_map->pmap);
        }
}

/*
 * uvm_fault_lower_neighbor: enter h/w mapping of lower neighbor page.
 */

static void
uvm_fault_lower_neighbor(
        struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
        vaddr_t currva, struct vm_page *pg)
{
        const bool readonly = uvm_pagereadonly_p(pg) || pg->loan_count > 0;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        /* locked: maps(read), amap(if there), uobj */

        /*
         * calling pgo_get with PGO_LOCKED returns us pages which
         * are neither busy nor released, so we don't need to check
         * for this.  we can just directly enter the pages.
         *
         * there wasn't a direct fault on the page, so avoid the cost of
         * activating it.
         */

        if (!uvmpdpol_pageisqueued_p(pg) && pg->wire_count == 0) {
                uvm_pagelock(pg);
                uvm_pageenqueue(pg);
                uvm_pageunlock(pg);
        }

        UVMHIST_LOG(maphist,
            "  MAPPING: n obj: pm=%#jx, va=%#jx, pg=%#jx",
            (uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0);
        cpu_count(CPU_COUNT_FLTNOMAP, 1);

        /*
         * Since this page isn't the page that's actually faulting,
         * ignore pmap_enter() failures; it's not critical that we
         * enter these right now.
         * NOTE: page can't be waited on or PG_RELEASED because we've
         * held the lock the whole time we've had the handle.
         */
        KASSERT((pg->flags & PG_PAGEOUT) == 0);
        KASSERT((pg->flags & PG_RELEASED) == 0);
        KASSERT(!UVM_OBJ_IS_CLEAN(pg->uobject) ||
            uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN);
        KASSERT((pg->flags & PG_BUSY) == 0);
        KASSERT(rw_lock_op(pg->uobject->vmobjlock) == flt->lower_lock_type);

        const vm_prot_t mapprot =
            readonly ? (flt->enter_prot & ~VM_PROT_WRITE) :
            flt->enter_prot & MASK(ufi->entry);
        const u_int mapflags =
            PMAP_CANFAIL | (flt->wire_mapping ? (mapprot | PMAP_WIRED) : 0);
        (void) pmap_enter(ufi->orig_map->pmap, currva,
            VM_PAGE_TO_PHYS(pg), mapprot, mapflags);
}

/*
 * uvm_fault_lower_io: get lower page from backing store.
 *
 *        1. unlock everything, because i/o will block.
 *        2. call pgo_get.
 *        3. if failed, recover.
 *        4. if succeeded, relock everything and verify things.
 */

static int
uvm_fault_lower_io(
        struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
        struct uvm_object **ruobj, struct vm_page **ruobjpage)
{
        struct vm_amap * const amap = ufi->entry->aref.ar_amap;
        struct uvm_object *uobj = *ruobj;
        struct vm_page *pg;
        bool locked;
        int gotpages;
        int error;
        voff_t uoff;
        vm_prot_t access_type;
        int advice;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        /* update rusage counters */
        curlwp->l_ru.ru_majflt++;

        /* grab everything we need from the entry before we unlock */
        uoff = (ufi->orig_rvaddr - ufi->entry->start) + ufi->entry->offset;
        access_type = flt->access_type & MASK(ufi->entry);
        advice = ufi->entry->advice;

        /* Locked: maps(read), amap(if there), uobj */
        KASSERT(rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);

        /* Upgrade to a write lock if needed. */
        error = uvm_fault_lower_upgrade(ufi, flt, amap, uobj, NULL);
        if (error != 0) {
                return error;
        }
        uvmfault_unlockall(ufi, amap, NULL);

        /* Locked: uobj(write) */
        KASSERT(rw_write_held(uobj->vmobjlock));

        cpu_count(CPU_COUNT_FLTGET, 1);
        gotpages = 1;
        pg = NULL;
        error = uobj->pgops->pgo_get(uobj, uoff, &pg, &gotpages,
            0, access_type, advice, PGO_SYNCIO);
        /* locked: pg(if no error) */

        /*
         * recover from I/O
         */

        if (error) {
                if (error == EAGAIN) {
                        UVMHIST_LOG(maphist,
                            "  pgo_get says TRY AGAIN!",0,0,0,0);
                        kpause("fltagain2", false, hz/2, NULL);
                        return ERESTART;
                }

#if 0
                KASSERT(error != ERESTART);
#else
                /* XXXUEBS don't re-fault? */
                if (error == ERESTART)
                        error = EIO;
#endif

                UVMHIST_LOG(maphist, "<- pgo_get failed (code %jd)",
                    error, 0,0,0);
                return error;
        }

        /*
         * re-verify the state of the world by first trying to relock
         * the maps.  always relock the object.
         */

        locked = uvmfault_relock(ufi);
        if (locked && amap)
                amap_lock(amap, flt->upper_lock_type);

        /* might be changed */
        uobj = pg->uobject;

        rw_enter(uobj->vmobjlock, flt->lower_lock_type);
        KASSERT((pg->flags & PG_BUSY) != 0);
        KASSERT(flt->lower_lock_type == RW_WRITER);

        uvm_pagelock(pg);
        uvm_pageactivate(pg);
        uvm_pageunlock(pg);

        /* locked(locked): maps(read), amap(if !null), uobj, pg */
        /* locked(!locked): uobj, pg */

        /*
         * verify that the page has not be released and re-verify
         * that amap slot is still free.   if there is a problem,
         * we unlock and clean up.
         */

        if ((pg->flags & PG_RELEASED) != 0 ||
            (locked && amap && amap_lookup(&ufi->entry->aref,
              ufi->orig_rvaddr - ufi->entry->start))) {
                if (locked)
                        uvmfault_unlockall(ufi, amap, NULL);
                locked = false;
        }

        /*
         * unbusy/release the page.
         */

        if ((pg->flags & PG_RELEASED) == 0) {
                pg->flags &= ~PG_BUSY;
                uvm_pagelock(pg);
                uvm_pagewakeup(pg);
                uvm_pageunlock(pg);
                UVM_PAGE_OWN(pg, NULL);
        } else {
                cpu_count(CPU_COUNT_FLTPGRELE, 1);
                uvm_pagefree(pg);
        }

        /*
         * didn't get the lock?   retry.
         */

        if (locked == false) {
                UVMHIST_LOG(maphist,
                    "  wasn't able to relock after fault: retry",
                    0,0,0,0);
                rw_exit(uobj->vmobjlock);
                return ERESTART;
        }

        /*
         * we have the data in pg.  we are holding object lock (so the page
         * can't be released on us).
         */

        /* locked: maps(read), amap(if !null), uobj */

        *ruobj = uobj;
        *ruobjpage = pg;
        return 0;
}

/*
 * uvm_fault_lower_direct: fault lower center page
 *
 *        1. adjust flt->enter_prot.
 *        2. if page is loaned, resolve.
 */

int
uvm_fault_lower_direct(
        struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
        struct uvm_object *uobj, struct vm_page *uobjpage)
{
        struct vm_page *pg;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        /*
         * we are not promoting.   if the mapping is COW ensure that we
         * don't give more access than we should (e.g. when doing a read
         * fault on a COPYONWRITE mapping we want to map the COW page in
         * R/O even though the entry protection could be R/W).
         *
         * set "pg" to the page we want to map in (uobjpage, usually)
         */

        cpu_count(CPU_COUNT_FLT_OBJ, 1);
        if (UVM_ET_ISCOPYONWRITE(ufi->entry) ||
            UVM_OBJ_NEEDS_WRITEFAULT(uobjpage->uobject))
                flt->enter_prot &= ~VM_PROT_WRITE;
        pg = uobjpage;                /* map in the actual object */

        KASSERT(uobjpage != PGO_DONTCARE);

        /*
         * we are faulting directly on the page.   be careful
         * about writing to loaned pages...
         */

        if (uobjpage->loan_count) {
                uvm_fault_lower_direct_loan(ufi, flt, uobj, &pg, &uobjpage);
        }
        KASSERT(pg == uobjpage);
        KASSERT((pg->flags & PG_BUSY) == 0);
        return uvm_fault_lower_enter(ufi, flt, uobj, NULL, pg);
}

/*
 * uvm_fault_lower_direct_loan: resolve loaned page.
 *
 *        1. if not cow'ing, adjust flt->enter_prot.
 *        2. if cow'ing, break loan.
 */

static int
uvm_fault_lower_direct_loan(
        struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
        struct uvm_object *uobj, struct vm_page **rpg,
        struct vm_page **ruobjpage)
{
        struct vm_amap * const amap = ufi->entry->aref.ar_amap;
        struct vm_page *pg;
        struct vm_page *uobjpage = *ruobjpage;
        int error;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        if (!flt->cow_now) {
                /* read fault: cap the protection at readonly */
                /* cap! */
                flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE;
        } else {
                /*
                 * write fault: must break the loan here.  to do this
                 * we need a write lock on the object.
                 */

                error = uvm_fault_lower_upgrade(ufi, flt, amap, uobj, uobjpage);
                if (error != 0) {
                        return error;
                }
                KASSERT(rw_write_held(uobj->vmobjlock));

                pg = uvm_loanbreak(uobjpage);
                if (pg == NULL) {

                        uvmfault_unlockall(ufi, amap, uobj);
                        UVMHIST_LOG(maphist,
                          "  out of RAM breaking loan, waiting",
                          0,0,0,0);
                        cpu_count(CPU_COUNT_FLTNORAM, 1);
                        uvm_wait("flt_noram4");
                        return ERESTART;
                }
                *rpg = pg;
                *ruobjpage = pg;

                /*
                 * drop ownership of page while still holding object lock,
                 * which won't be dropped until the page is entered.
                 */

                uvm_pagelock(pg);
                uvm_pagewakeup(pg);
                uvm_pageunlock(pg);
                pg->flags &= ~PG_BUSY;
                UVM_PAGE_OWN(pg, NULL);
        }
        return 0;
}

/*
 * uvm_fault_lower_promote: promote lower page.
 *
 *        1. call uvmfault_promote.
 *        2. fill in data.
 *        3. if not ZFOD, dispose old page.
 */

int
uvm_fault_lower_promote(
        struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
        struct uvm_object *uobj, struct vm_page *uobjpage)
{
        struct vm_amap * const amap = ufi->entry->aref.ar_amap;
        struct vm_anon *anon;
        struct vm_page *pg;
        int error;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        KASSERT(amap != NULL);

        /* promoting requires a write lock. */
        error = uvm_fault_upper_upgrade(ufi, flt, amap, uobj);
        if (error != 0) {
                return error;
        }
        KASSERT(rw_write_held(amap->am_lock));
        KASSERT(uobj == NULL ||
            rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);

        /*
         * If we are going to promote the data to an anon we
         * allocate a blank anon here and plug it into our amap.
         */
        error = uvmfault_promote(ufi, NULL, uobjpage, &anon, &flt->anon_spare);
        switch (error) {
        case 0:
                break;
        case ERESTART:
                return ERESTART;
        default:
                return error;
        }

        pg = anon->an_page;

        /*
         * Fill in the data.
         */

        if (uobjpage != PGO_DONTCARE) {
                cpu_count(CPU_COUNT_FLT_PRCOPY, 1);

                /*
                 * promote to shared amap?  make sure all sharing
                 * procs see it
                 */

                if ((amap_flags(amap) & AMAP_SHARED) != 0) {
                        pmap_page_protect(uobjpage, VM_PROT_NONE);
                        /*
                         * XXX: PAGE MIGHT BE WIRED!
                         */
                }

                UVMHIST_LOG(maphist,
                    "  promote uobjpage %#jx to anon/page %#jx/%#jx",
                    (uintptr_t)uobjpage, (uintptr_t)anon, (uintptr_t)pg, 0);

        } else {
                cpu_count(CPU_COUNT_FLT_PRZERO, 1);

                /*
                 * Page is zero'd and marked dirty by
                 * uvmfault_promote().
                 */

                UVMHIST_LOG(maphist,"  zero fill anon/page %#jx/%#jx",
                    (uintptr_t)anon, (uintptr_t)pg, 0, 0);
        }

        return uvm_fault_lower_enter(ufi, flt, uobj, anon, pg);
}

/*
 * uvm_fault_lower_enter: enter h/w mapping of lower page or anon page promoted
 * from the lower page.
 */

int
uvm_fault_lower_enter(
        struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
        struct uvm_object *uobj,
        struct vm_anon *anon, struct vm_page *pg)
{
        struct vm_amap * const amap = ufi->entry->aref.ar_amap;
        const bool readonly = uvm_pagereadonly_p(pg);
        int error;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        /*
         * Locked:
         *
         *        maps(read), amap(if !null), uobj(if !null),
         *        anon(if !null), pg(if anon), unlock_uobj(if !null)
         *
         * anon must be write locked (promotion).  uobj can be either.
         *
         * Note: pg is either the uobjpage or the new page in the new anon.
         */

        KASSERT(amap == NULL ||
            rw_lock_op(amap->am_lock) == flt->upper_lock_type);
        KASSERT(uobj == NULL ||
            rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type);
        KASSERT(anon == NULL || anon->an_lock == amap->am_lock);

        /*
         * note that pg can't be PG_RELEASED or PG_BUSY since we did
         * not drop the object lock since the last time we checked.
         */

        KASSERT((pg->flags & PG_RELEASED) == 0);
        KASSERT((pg->flags & PG_BUSY) == 0);

        /*
         * all resources are present.   we can now map it in and free our
         * resources.
         */

        UVMHIST_LOG(maphist,
            "  MAPPING: case2: pm=%#jx, va=%#jx, pg=%#jx, promote=%jd",
            (uintptr_t)ufi->orig_map->pmap, ufi->orig_rvaddr,
            (uintptr_t)pg, flt->promote);
        KASSERTMSG((flt->access_type & VM_PROT_WRITE) == 0 || !readonly,
            "promote=%u cow_now=%u access_type=%x enter_prot=%x cow=%u "
            "entry=%p map=%p orig_rvaddr=%p pg=%p",
            flt->promote, flt->cow_now, flt->access_type, flt->enter_prot,
            UVM_ET_ISCOPYONWRITE(ufi->entry), ufi->entry, ufi->orig_map,
            (void *)ufi->orig_rvaddr, pg);
        KASSERT((flt->access_type & VM_PROT_WRITE) == 0 || !readonly);
        if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr,
            VM_PAGE_TO_PHYS(pg),
            readonly ? flt->enter_prot & ~VM_PROT_WRITE : flt->enter_prot,
            flt->access_type | PMAP_CANFAIL |
            (flt->wire_mapping ? PMAP_WIRED : 0)) != 0) {

                /*
                 * No need to undo what we did; we can simply think of
                 * this as the pmap throwing away the mapping information.
                 *
                 * We do, however, have to go through the ReFault path,
                 * as the map may change while we're asleep.
                 */

                /*
                 * ensure that the page is queued in the case that
                 * we just promoted the page.
                 */

                if (anon != NULL) {
                        uvm_pagelock(pg);
                        uvm_pageenqueue(pg);
                        uvm_pagewakeup(pg);
                        uvm_pageunlock(pg);
                }

                uvmfault_unlockall(ufi, amap, uobj);
                if (!uvm_reclaimable()) {
                        UVMHIST_LOG(maphist,
                            "<- failed.  out of VM",0,0,0,0);
                        /* XXX instrumentation */
                        error = ENOMEM;
                        return error;
                }
                /* XXX instrumentation */
                uvm_wait("flt_pmfail2");
                return ERESTART;
        }

        uvm_fault_lower_done(ufi, flt, uobj, pg);
        pmap_update(ufi->orig_map->pmap);
        uvmfault_unlockall(ufi, amap, uobj);

        UVMHIST_LOG(maphist, "<- done (SUCCESS!)",0,0,0,0);
        return 0;
}

/*
 * uvm_fault_lower_done: queue lower center page.
 */

void
uvm_fault_lower_done(
        struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt,
        struct uvm_object *uobj, struct vm_page *pg)
{

        UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);

        if (flt->wire_paging) {
                uvm_pagelock(pg);
                uvm_pagewire(pg);
                uvm_pageunlock(pg);
                if (pg->flags & PG_AOBJ) {

                        /*
                         * since the now-wired page cannot be paged out,
                         * release its swap resources for others to use.
                         * since an aobj page with no swap cannot be clean,
                         * mark it dirty now.
                         *
                         * use pg->uobject here.  if the page is from a
                         * tmpfs vnode, the pages are backed by its UAO and
                         * not the vnode.
                         */

                        KASSERT(uobj != NULL);
                        KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);
                        uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
                        uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT);
                }
        } else if (uvmpdpol_pageactivate_p(pg)) {
                /*
                 * avoid re-activating the page unless needed,
                 * to avoid false sharing on multiprocessor.
                 */

                uvm_pagelock(pg);
                uvm_pageactivate(pg);
                uvm_pageunlock(pg);
        }
}


/*
 * uvm_fault_wire: wire down a range of virtual addresses in a map.
 *
 * => map may be read-locked by caller, but MUST NOT be write-locked.
 * => if map is read-locked, any operations which may cause map to
 *        be write-locked in uvm_fault() must be taken care of by
 *        the caller.  See uvm_map_pageable().
 */

int
uvm_fault_wire(struct vm_map *map, vaddr_t start, vaddr_t end,
    vm_prot_t access_type, int maxprot)
{
        vaddr_t va;
        int error;

        /*
         * now fault it in a page at a time.   if the fault fails then we have
         * to undo what we have done.   note that in uvm_fault VM_PROT_NONE
         * is replaced with the max protection if fault_type is VM_FAULT_WIRE.
         */

        /*
         * XXX work around overflowing a vaddr_t.  this prevents us from
         * wiring the last page in the address space, though.
         */
        if (start > end) {
                return EFAULT;
        }

        for (va = start; va < end; va += PAGE_SIZE) {
                error = uvm_fault_internal(map, va, access_type,
                    (maxprot ? UVM_FAULT_MAXPROT : 0) | UVM_FAULT_WIRE);
                if (error) {
                        if (va != start) {
                                uvm_fault_unwire(map, start, va);
                        }
                        return error;
                }
        }
        return 0;
}

/*
 * uvm_fault_unwire(): unwire range of virtual space.
 */

void
uvm_fault_unwire(struct vm_map *map, vaddr_t start, vaddr_t end)
{
        vm_map_lock_read(map);
        uvm_fault_unwire_locked(map, start, end);
        vm_map_unlock_read(map);
}

/*
 * uvm_fault_unwire_locked(): the guts of uvm_fault_unwire().
 *
 * => map must be at least read-locked.
 */

void
uvm_fault_unwire_locked(struct vm_map *map, vaddr_t start, vaddr_t end)
{
        struct vm_map_entry *entry, *oentry;
        pmap_t pmap = vm_map_pmap(map);
        vaddr_t va;
        paddr_t pa;
        struct vm_page *pg;

        /*
         * we assume that the area we are unwiring has actually been wired
         * in the first place.   this means that we should be able to extract
         * the PAs from the pmap.   we also lock out the page daemon so that
         * we can call uvm_pageunwire.
         */

        /*
         * find the beginning map entry for the region.
         */

        KASSERT(start >= vm_map_min(map) && end <= vm_map_max(map));
        if (uvm_map_lookup_entry(map, start, &entry) == false)
                panic("uvm_fault_unwire_locked: address not in map");

        oentry = NULL;
        for (va = start; va < end; va += PAGE_SIZE) {

                /*
                 * find the map entry for the current address.
                 */

                KASSERT(va >= entry->start);
                while (va >= entry->end) {
                        KASSERT(entry->next != &map->header &&
                                entry->next->start <= entry->end);
                        entry = entry->next;
                }

                /*
                 * lock it.
                 */

                if (entry != oentry) {
                        if (oentry != NULL) {
                                uvm_map_unlock_entry(oentry);
                        }
                        uvm_map_lock_entry(entry, RW_WRITER);
                        oentry = entry;
                }

                /*
                 * if the entry is no longer wired, tell the pmap.
                 */

                if (!pmap_extract(pmap, va, &pa))
                        continue;

                if (VM_MAPENT_ISWIRED(entry) == 0)
                        pmap_unwire(pmap, va);

                pg = PHYS_TO_VM_PAGE(pa);
                if (pg) {
                        uvm_pagelock(pg);
                        uvm_pageunwire(pg);
                        uvm_pageunlock(pg);
                }
        }

        if (oentry != NULL) {
                uvm_map_unlock_entry(entry);
        }
}

































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
/*        $NetBSD: if_arp.h,v 1.43 2021/02/19 14:51:59 christos Exp $        */

/*
 * Copyright (c) 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)if_arp.h        8.1 (Berkeley) 6/10/93
 */

#ifndef _NET_IF_ARP_H_
#define _NET_IF_ARP_H_
/*
 * Address Resolution Protocol.
 *
 * See RFC 826 for protocol description.  ARP packets are variable
 * in size; the arphdr structure defines the fixed-length portion.
 * Protocol type values are the same as those for 10 Mb/s Ethernet.
 * It is followed by the variable-sized fields ar_sha, arp_spa,
 * arp_tha and arp_tpa in that order, according to the lengths
 * specified.  Field names used correspond to RFC 826.
 */
struct        arphdr {
        uint16_t ar_hrd;        /* format of hardware address */
#define ARPHRD_ETHER                1  /* ethernet hardware format */
#define ARPHRD_IEEE802                6  /* IEEE 802 hardware format */
#define ARPHRD_ARCNET                7  /* ethernet hardware format */
#define ARPHRD_FRELAY                15 /* frame relay hardware format */
#define ARPHRD_STRIP                23 /* Ricochet Starmode Radio hardware format */
#define        ARPHRD_IEEE1394                24 /* IEEE 1394 (FireWire) hardware format */
        uint16_t ar_pro;           /* format of protocol address */
        uint8_t  ar_hln;           /* length of hardware address */
        uint8_t  ar_pln;           /* length of protocol address */
        uint16_t ar_op;                   /* one of: */
#define        ARPOP_REQUEST                1  /* request to resolve address */
#define        ARPOP_REPLY                2  /* response to previous request */
#define        ARPOP_REVREQUEST        3  /* request protocol address given hardware */
#define        ARPOP_REVREPLY                4  /* response giving protocol address */
#define        ARPOP_INVREQUEST        8  /* request to identify peer */
#define        ARPOP_INVREPLY                9  /* response identifying peer */
/*
 * The remaining fields are variable in size,
 * according to the sizes above.
 */
#ifdef COMMENT_ONLY
        uint8_t  ar_sha[];        /* sender hardware address */
        uint8_t  ar_spa[];        /* sender protocol address */
        uint8_t  ar_tha[];        /* target hardware address (!IEEE1394) */
        uint8_t  ar_tpa[];        /* target protocol address */
#endif
};

static __inline uint8_t *
ar_data(struct arphdr *ap)
{
        return (uint8_t *)(void *)(ap + 1);
}

static __inline uint8_t *
ar_sha(struct arphdr *ap)
{
        return ar_data(ap) + 0;
}

static __inline uint8_t *
ar_spa(struct arphdr *ap)
{
        return ar_data(ap) + ap->ar_hln;
}

static __inline uint8_t *
ar_tha(struct arphdr *ap)
{
        if (ntohs(ap->ar_hrd) == ARPHRD_IEEE1394) {
                return NULL;
        } else {
                return ar_data(ap) + ap->ar_hln + ap->ar_pln;
        }
}

static __inline uint8_t *
ar_tpa(struct arphdr *ap)
{
        if (ntohs(ap->ar_hrd) == ARPHRD_IEEE1394) {
                return ar_data(ap) + ap->ar_hln + ap->ar_pln;
        } else {
                return ar_data(ap) + ap->ar_hln + ap->ar_pln + ap->ar_hln;
        }
}

/*
 * ARP ioctl request
 */
struct arpreq {
        struct        sockaddr arp_pa;                /* protocol address */
        struct        sockaddr arp_ha;                /* hardware address */
        int        arp_flags;                        /* flags */
};
/*  arp_flags and at_flags field values */
#define        ATF_INUSE        0x01        /* entry in use */
#define ATF_COM                0x02        /* completed entry (enaddr valid) */
#define        ATF_PERM        0x04        /* permanent entry */
#define        ATF_PUBL        0x08        /* publish entry (respond for other host) */
#define        ATF_USETRAILERS        0x10        /* has requested trailers */

/*
 * Kernel statistics about arp
 */
#define        ARP_STAT_SNDTOTAL        0        /* total packets sent */
#define        ARP_STAT_SNDREPLY        1        /* replies sent */
#define        ARP_STAT_SENDREQUEST        2        /* requests sent */
#define        ARP_STAT_RCVTOTAL        3        /* total packets received */
#define        ARP_STAT_RCVREQUEST        4        /* valid requests received */
#define        ARP_STAT_RCVREPLY        5        /* replies received */
#define        ARP_STAT_RCVMCAST        6        /* multicast/broadcast received */
#define        ARP_STAT_RCVBADPROTO        7        /* unknown protocol type received */
#define        ARP_STAT_RCVBADLEN        8        /* bad (short) length received */
#define        ARP_STAT_RCVZEROTPA        9        /* received w/ null target ip */
#define        ARP_STAT_RCVZEROSPA        10        /* received w/ null source ip */
#define        ARP_STAT_RCVNOINT        11        /* couldn't map to interface */
#define        ARP_STAT_RCVLOCALSHA        12        /* received from local hw address */
#define        ARP_STAT_RCVBCASTSHA        13        /* received w/ broadcast src */
#define        ARP_STAT_RCVLOCALSPA        14        /* received for a local ip [dup!] */
#define        ARP_STAT_RCVOVERPERM        15        /* attempts to overwrite static info */
#define        ARP_STAT_RCVOVERINT        16        /* attempts to overwrite wrong if */
#define        ARP_STAT_RCVOVER        17        /* entries overwritten! */
#define        ARP_STAT_RCVLENCHG        18        /* changes in hw address len */
#define        ARP_STAT_DFRTOTAL        19        /* deferred pending ARP resolution */
#define        ARP_STAT_DFRSENT        20        /* deferred, then sent */
#define        ARP_STAT_DFRDROPPED        21        /* deferred, then dropped */
#define        ARP_STAT_ALLOCFAIL        22        /* failures to allocate llinfo */

#define        ARP_NSTATS                23

void arp_stat_add(int, uint64_t);

#endif /* !_NET_IF_ARP_H_ */












































































    3 
    3 









































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
/* $NetBSD: emdtv.c,v 1.18 2022/06/26 22:49:09 riastradh Exp $ */

/*-
 * Copyright (c) 2008, 2011 Jared D. McNeill <jmcneill@invisible.ca>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: emdtv.c,v 1.18 2022/06/26 22:49:09 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/lwp.h>
#include <sys/module.h>
#include <sys/conf.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdevs.h>

#include <dev/usb/emdtvvar.h>
#include <dev/usb/emdtvreg.h>

static int        emdtv_match(device_t, cfdata_t, void *);
static void        emdtv_attach(device_t, device_t, void *);
static int        emdtv_detach(device_t, int);
static int        emdtv_rescan(device_t, const char *, const int *);
static void        emdtv_childdet(device_t, device_t);
static int        emdtv_activate(device_t, enum devact);

static bool        emdtv_read_eeprom(struct emdtv_softc *);
static void        emdtv_board_setup(struct emdtv_softc *);

static void        emdtv_default_board_init(struct emdtv_softc *);

CFATTACH_DECL2_NEW(emdtv, sizeof(struct emdtv_softc),
    emdtv_match, emdtv_attach, emdtv_detach, emdtv_activate,
    emdtv_rescan, emdtv_childdet);

static const struct usb_devno emdtv_devices[] = {
        { USB_VENDOR_AMD,        USB_PRODUCT_AMD_TV_WONDER_600_USB },
        { USB_VENDOR_PINNACLE,        USB_PRODUCT_PINNACLE_PCTV800E },
};

int emdtv_debug_regs = 0;

static int
emdtv_match(device_t parent, cfdata_t match, void *opaque)
{
        struct usb_attach_arg *uaa = opaque;

        return usb_lookup(emdtv_devices, uaa->uaa_vendor, uaa->uaa_product) != NULL ?
            UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
emdtv_attach(device_t parent, device_t self, void *opaque)
{
        struct emdtv_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = opaque;
        struct usbd_device *dev = uaa->uaa_device;
        usbd_status status;
        char *devinfo;

        devinfo = usbd_devinfo_alloc(dev, 0);
        aprint_naive("\n");
        aprint_normal(": %s\n", devinfo);
        usbd_devinfo_free(devinfo);

        sc->sc_dev = self;
        sc->sc_udev = dev;

        sc->sc_vendor = uaa->uaa_vendor;
        sc->sc_product = uaa->uaa_product;

        emdtv_i2c_attach(sc);

        emdtv_read_eeprom(sc);

        sc->sc_board = emdtv_board_lookup(sc->sc_vendor, sc->sc_product);
        if (sc->sc_board == NULL) {
                aprint_error_dev(sc->sc_dev,
                    "unsupported board 0x%04x:0x%04x\n",
                    sc->sc_vendor, sc->sc_product);
                sc->sc_dying = true;
                return;
        }

        emdtv_write_1(sc, 0x02, 0xa0, 0x23);
        if (emdtv_read_1(sc, UR_GET_STATUS, 0x05) != 0) {
                (void)emdtv_read_1(sc, 0x02, 0xa0);
                if (emdtv_read_1(sc, 0x02, 0xa0) & 0x08)
                        aprint_debug_dev(sc->sc_dev,
                            "board requires manual gpio configuration\n");
        }

        emdtv_board_setup(sc);

        emdtv_gpio_ctl(sc, EMDTV_GPIO_ANALOG_ON, false);
        emdtv_gpio_ctl(sc, EMDTV_GPIO_TS1_ON, false);
        usbd_delay_ms(sc->sc_udev, 100);
        emdtv_gpio_ctl(sc, EMDTV_GPIO_ANALOG_ON, true);
        emdtv_gpio_ctl(sc, EMDTV_GPIO_TUNER1_ON, true);
        usbd_delay_ms(sc->sc_udev, 100);

        status = usbd_set_config_no(sc->sc_udev, 1, 1);
        if (status != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(sc->sc_dev, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(status));
                return;
        }

        status = usbd_device2interface_handle(sc->sc_udev, 0, &sc->sc_iface);
        if (status != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(sc->sc_dev, "couldn't find iface handle\n");
                return;
        }

        status = usbd_set_interface(sc->sc_iface, 1);
        if (status != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(sc->sc_dev, "couldn't set interface\n");
                return;
        }

        emdtv_dtv_attach(sc);
        emdtv_ir_attach(sc);
        sc->sc_subdevs_attached = true;
}

static int
emdtv_detach(device_t self, int flags)
{
        struct emdtv_softc *sc = device_private(self);
        usbd_status status;
        int error;

        sc->sc_dying = true;

        error = config_detach_children(self, flags);
        if (error)
                return error;

        if (sc->sc_subdevs_attached) {
                emdtv_ir_detach(sc, flags);
                emdtv_dtv_detach(sc, flags);
        }

        if (sc->sc_iface != NULL) {
                status = usbd_set_interface(sc->sc_iface, 0);
                if (status != USBD_NORMAL_COMPLETION)
                        aprint_error_dev(sc->sc_dev,
                            "couldn't stop stream: %s\n", usbd_errstr(status));
        }

        emdtv_i2c_detach(sc, flags);

        return 0;
}

int
emdtv_activate(device_t self, enum devact act)
{
        struct emdtv_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = true;
                break;
        }

        return 0;
}

static int
emdtv_rescan(device_t self, const char *ifattr, const int *locs)
{
        struct emdtv_softc *sc = device_private(self);

        emdtv_dtv_rescan(sc, ifattr, locs);

        return 0;
}

static void
emdtv_childdet(device_t self, device_t child)
{
        struct emdtv_softc *sc = device_private(self);

        if (child == sc->sc_cirdev)
                sc->sc_cirdev = NULL;
        if (child == sc->sc_dtvdev)
                sc->sc_dtvdev = NULL;
}

static bool
emdtv_read_eeprom(struct emdtv_softc *sc)
{
        i2c_addr_t ee = EM28XX_I2C_ADDR_EEPROM;
        uint8_t buf, *p = sc->sc_eeprom;
        struct emdtv_eeprom *eeprom = (struct emdtv_eeprom *)sc->sc_eeprom;
        int block, size = sizeof(sc->sc_eeprom);

        if (iic_exec(&sc->sc_i2c, I2C_OP_READ, ee, NULL, 0, NULL, 0, 0))
                return false;
        buf = 0;
        if (iic_exec(&sc->sc_i2c, I2C_OP_WRITE_WITH_STOP, ee, &buf, 1,
            NULL, 0, 0))
                return false;
        while (size > 0) {
                block = uimin(size, 16);
                if (iic_exec(&sc->sc_i2c, I2C_OP_READ, ee, NULL, 0,
                    p, block, 0))
                        return false;
                size -= block;
                p += block;
        }

        aprint_normal_dev(sc->sc_dev,
            "id 0x%08x vendor 0x%04x product 0x%04x\n",
            eeprom->id, eeprom->vendor, eeprom->product);

        sc->sc_vendor = eeprom->vendor;
        sc->sc_product = eeprom->product;

        return true;
}

static void
emdtv_board_setup(struct emdtv_softc *sc)
{
        switch (sc->sc_vendor) {
        case USB_VENDOR_EMPIA:
                switch (sc->sc_product) {
                case USB_PRODUCT_EMPIA_EM2883:
                        emdtv_write_1(sc, UR_GET_STATUS, EM28XX_XCLK_REG, 0x97);
                        emdtv_write_1(sc, UR_GET_STATUS, EM28XX_I2C_CLK_REG,
                            0x40);
                        delay(10000);
                        emdtv_write_1(sc, UR_GET_STATUS, 0x08, 0x2d);
                        delay(10000);
                        break;
                default:
                        aprint_normal_dev(sc->sc_dev,
                            "unknown EMPIA board 0x%04x/0x%04x\n",
                            sc->sc_vendor, sc->sc_product);
                        break;
                }
                break;
        case USB_VENDOR_AMD:
                switch (sc->sc_product) {
                case USB_PRODUCT_AMD_TV_WONDER_600_USB:
                        emdtv_default_board_init(sc);
                        break;
                default:
                        aprint_normal_dev(sc->sc_dev,
                            "unknown AMD board 0x%04x/0x%04x\n",
                            sc->sc_vendor, sc->sc_product);
                }
                break;
        case USB_VENDOR_PINNACLE:
                switch (sc->sc_product) {
                case USB_PRODUCT_PINNACLE_PCTV800E:
                        emdtv_default_board_init(sc);
                        break;
                default:
                        aprint_normal_dev(sc->sc_dev,
                            "unknown Pinnacle board 0x%04x/0x%04x\n",
                            sc->sc_vendor, sc->sc_product);
                }
                break;
        default:
                aprint_normal_dev(sc->sc_dev,
                    "unknown board 0x%04x:0x%04x\n",
                    sc->sc_vendor, sc->sc_product);
                break;
        }
}

/*
 * Register read/write
 */
uint8_t
emdtv_read_1(struct emdtv_softc *sc, uint8_t req, uint16_t index)
{
        uint8_t val;
        emdtv_read_multi_1(sc, req, index, &val, 1);
        return val;
}

void
emdtv_write_1(struct emdtv_softc *sc, uint8_t req, uint16_t index, uint8_t val)
{
        emdtv_write_multi_1(sc, req, index, &val, 1);
}

void
emdtv_read_multi_1(struct emdtv_softc *sc, uint8_t req, uint16_t index,
    uint8_t *datap, uint16_t count)
{
        usb_device_request_t request;
        usbd_status status;

        request.bmRequestType = UT_READ_VENDOR_DEVICE;
        request.bRequest = req;
        USETW(request.wValue, 0x0000);
        USETW(request.wIndex, index);
        USETW(request.wLength, count);

        KERNEL_LOCK(1, curlwp);
        status = usbd_do_request(sc->sc_udev, &request, datap);
        KERNEL_UNLOCK_ONE(curlwp);

        if (status != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(sc->sc_dev, "couldn't read %x/%x: %s\n",
                    req, index, usbd_errstr(status));
                memset(datap, 0, count);
        }

        if (emdtv_debug_regs) {
                int i;
                printf("%s [%s] c0 %02x 00 00 %02x 00 01 00 <<<",
                    __func__, status == 0 ? " OK" : "NOK", req, index);
                for (i = 0; status == 0 && i < count; i++)
                        printf(" %02x", datap[i]);
                printf("\n");
        }
}

void
emdtv_write_multi_1(struct emdtv_softc *sc, uint8_t req, uint16_t index,
    const uint8_t *datap, uint16_t count)
{
        usb_device_request_t request;
        usbd_status status;

        request.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        request.bRequest = req;
        USETW(request.wValue, 0x0000);
        USETW(request.wIndex, index);
        USETW(request.wLength, count);

        KERNEL_LOCK(1, curlwp);
        status = usbd_do_request(sc->sc_udev, &request, __UNCONST(datap));
        KERNEL_UNLOCK_ONE(curlwp);

        if (status != USBD_NORMAL_COMPLETION)
                aprint_error_dev(sc->sc_dev, "couldn't read %x/%x: %s\n",
                    req, index, usbd_errstr(status));

        if (emdtv_debug_regs) {
                int i;
                printf("%s [%s] 40 %02x 00 00 %02x 00 %02x 00 >>>",
                    __func__, status == 0 ? " OK" : "NOK",
                    req, index, count);
                for (i = 0; i < count; ++i)
                        printf(" %02x", datap[i]);
                printf("\n");
        }
}

bool
emdtv_gpio_ctl(struct emdtv_softc *sc, emdtv_gpio_reg_t gpioreg, bool onoff)
{
        const struct emdtv_board *eb = sc->sc_board;
        uint16_t gpio_value, reg;
        uint8_t gpio;
        uint8_t eeprom_offset = 0x3c;
        uint8_t val;

        if (sc->sc_board->eb_manual_gpio == false) {
                val = eeprom_offset + gpioreg;
                emdtv_write_1(sc, 0x03, 0xa0, val);
                gpio_value = emdtv_read_1(sc, 0x02, 0xa0);
        } else {
                const struct emdtv_gpio_regs *r = &eb->eb_gpio_regs;
                switch (gpioreg) {
                case EMDTV_GPIO_TS1_ON:
                        gpio_value = r->ts1_on;
                        break;
                case EMDTV_GPIO_ANALOG_ON:
                        gpio_value = r->a_on;
                        break;
                case EMDTV_GPIO_TUNER1_ON:
                        gpio_value = r->t1_on;
                        break;
                case EMDTV_GPIO_TUNER1_RESET:
                        gpio_value = r->t1_reset;
                                break;
                case EMDTV_GPIO_DEMOD1_RESET:
                        gpio_value = r->d1_reset;
                        break;
                default:
                        aprint_error_dev(sc->sc_dev,
                            "unknown gpio reg %d\n", gpioreg);
                        return false;
                }
        }

        if ((gpio_value & 0x80) == 0) {
                aprint_error_dev(sc->sc_dev,
                    "gpio reg %d not enabled\n", gpioreg);
                return false;
        }

        reg = gpio_value & 0x10 ? 0x04 : 0x08;
        gpio = emdtv_read_1(sc, UR_GET_STATUS, reg);
        if ((gpio_value & 0x40) == 0) {
                gpio &= ~((uint8_t)(1 << (gpio_value & 7)));

                if (onoff)
                        gpio |= ((gpio_value >> 5) & 1) << (gpio_value & 7);
                else
                        gpio |= (((gpio_value >> 5) & 1) ^ 1) <<
                            (gpio_value & 7);
                emdtv_write_1(sc, UR_GET_STATUS, reg, gpio);
        } else {
                gpio &= ~((uint8_t)(1 << (gpio_value & 0xf)));

                gpio |= ((gpio_value >> 5) & 1) << (gpio_value & 7);
                emdtv_write_1(sc, UR_GET_STATUS, reg, gpio);
                usbd_delay_ms(sc->sc_udev, 100);

                gpio &= ~((uint8_t)(1 << (gpio_value & 0xf)));
                gpio |= (((gpio_value >> 5) & 1) ^ 1) << (gpio_value & 7);
                emdtv_write_1(sc, UR_GET_STATUS, reg, gpio);
                usbd_delay_ms(sc->sc_udev, 100);
        }

        return true;
}

static void
emdtv_default_board_init(struct emdtv_softc *sc)
{
        emdtv_write_1(sc, UR_GET_STATUS, EM28XX_XCLK_REG, 0x27);
        emdtv_write_1(sc, UR_GET_STATUS, EM28XX_I2C_CLK_REG, 0x40);
        emdtv_write_1(sc, UR_GET_STATUS, 0x08, 0xff);
        emdtv_write_1(sc, UR_GET_STATUS, 0x04, 0x00);
        usbd_delay_ms(sc->sc_udev, 100);
        emdtv_write_1(sc, UR_GET_STATUS, 0x04, 0x08);
        usbd_delay_ms(sc->sc_udev, 100);
        emdtv_write_1(sc, UR_GET_STATUS, 0x08, 0xff);
        usbd_delay_ms(sc->sc_udev, 50);
        emdtv_write_1(sc, UR_GET_STATUS, 0x08, 0x2d);
        usbd_delay_ms(sc->sc_udev, 50);
        emdtv_write_1(sc, UR_GET_STATUS, 0x08, 0x3d);
        //emdtv_write_1(sc, UR_GET_STATUS, 0x0f, 0xa7);
        usbd_delay_ms(sc->sc_udev, 10);
}

MODULE(MODULE_CLASS_DRIVER, emdtv, "cir,lg3303,xc3028");

#ifdef _MODULE
#include "ioconf.c"
#endif

static int
emdtv_modcmd(modcmd_t cmd, void *opaque)
{
        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                return config_init_component(cfdriver_ioconf_emdtv,
                    cfattach_ioconf_emdtv, cfdata_ioconf_emdtv);
#else
                return 0;
#endif
        case MODULE_CMD_FINI:
#ifdef _MODULE
                return config_fini_component(cfdriver_ioconf_emdtv,
                    cfattach_ioconf_emdtv, cfdata_ioconf_emdtv);
#else
                return 0;
#endif
        default:
                return ENOTTY;
        }
}



























































































































    2 



























































































































































































































































































































































































    1 






    1 




















































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
/*        $NetBSD: netbsd32_machdep.c,v 1.141 2022/08/20 23:49:31 riastradh Exp $        */

/*
 * Copyright (c) 2001 Wasabi Systems, Inc.
 * All rights reserved.
 *
 * Written by Frank van der Linden for Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed for the NetBSD Project by
 *      Wasabi Systems, Inc.
 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
 *    or promote products derived from this software without specific prior
 *    written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: netbsd32_machdep.c,v 1.141 2022/08/20 23:49:31 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
#include "opt_compat_netbsd32.h"
#include "opt_execfmt.h"
#include "opt_user_ldt.h"
#include "opt_mtrr.h"
#endif

#include <sys/param.h>
#include <sys/exec.h>
#include <sys/exec_aout.h>
#include <sys/kmem.h>
#include <sys/kmem.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/systm.h>
#include <sys/core.h>
#include <sys/mount.h>
#include <sys/buf.h>
#include <sys/vnode.h>
#include <sys/ras.h>
#include <sys/ptrace.h>
#include <sys/kauth.h>
#include <sys/compat_stub.h>

#include <x86/fpu.h>
#include <x86/dbregs.h>
#include <machine/frame.h>
#include <machine/reg.h>
#include <machine/vmparam.h>
#ifdef MTRR
#include <machine/mtrr.h>
#endif
#include <machine/netbsd32_machdep.h>
#include <machine/sysarch.h>
#include <machine/userret.h>
#include <machine/gdt.h>
#include <machine/pmap_private.h>

#include <compat/netbsd32/netbsd32.h>
#include <compat/netbsd32/netbsd32_exec.h>
#include <compat/netbsd32/netbsd32_syscallargs.h>

#include <compat/sys/signal.h>
#include <compat/sys/signalvar.h>

/* Provide a the name of the architecture we're emulating */
const char machine32[] = "i386";
const char machine_arch32[] = "i386";

static int netbsd32_process_doxmmregs(struct lwp *, struct lwp *, void *, bool);
static int netbsd32_process_xmmregio(struct lwp *, struct lwp *, struct uio *);

#ifdef USER_LDT
static int x86_64_get_ldt32(struct lwp *, void *, register_t *);
static int x86_64_set_ldt32(struct lwp *, void *, register_t *);
#else
#define x86_64_get_ldt32(x, y, z)        ENOSYS
#define x86_64_set_ldt32(x, y, z)        ENOSYS
#endif

#ifdef MTRR
static int x86_64_get_mtrr32(struct lwp *, void *, register_t *);
static int x86_64_set_mtrr32(struct lwp *, void *, register_t *);
#else
#define x86_64_get_mtrr32(x, y, z)        ENOSYS
#define x86_64_set_mtrr32(x, y, z)        ENOSYS
#endif

int check_sigcontext32(struct lwp *, const struct netbsd32_sigcontext *);
void netbsd32_buildcontext(struct lwp *, struct trapframe *, void *,
    sig_t, int);

#ifdef EXEC_AOUT
/*
 * There is no native a.out -- this function is required
 * for i386 a.out emulation (COMPAT_NETBSD32+EXEC_AOUT).
 */
int
cpu_exec_aout_makecmds(struct lwp *p, struct exec_package *e)
{

        return ENOEXEC;
}
#endif

void
netbsd32_setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
{
        struct pcb *pcb;
        struct trapframe *tf;
        struct proc *p = l->l_proc;

        pcb = lwp_getpcb(l);

#if defined(USER_LDT)
        pmap_ldt_cleanup(l);
#endif

        netbsd32_adjust_limits(p);

        fpu_clear(l, pack->ep_osversion >= 699002600
            ?  __NetBSD_NPXCW__ : __NetBSD_COMPAT_NPXCW__);
        x86_dbregs_clear(l);

        kpreempt_disable();
        pcb->pcb_flags = PCB_COMPAT32;
        p->p_flag |= PK_32;
        l->l_md.md_flags = MDL_COMPAT32;        /* force iret not sysret */
        cpu_segregs32_zero(l);
        cpu_fsgs_reload(l, LSEL(LUDATA32_SEL, SEL_UPL),
            LSEL(LUDATA32_SEL, SEL_UPL));
        kpreempt_enable();

        tf = l->l_md.md_regs;
        tf->tf_ds = LSEL(LUDATA32_SEL, SEL_UPL);
        tf->tf_es = LSEL(LUDATA32_SEL, SEL_UPL);
        tf->tf_rdi = 0;
        tf->tf_rsi = 0;
        tf->tf_rbp = 0;
        tf->tf_rbx = (uint32_t)p->p_psstrp;
        tf->tf_rdx = 0;
        tf->tf_rcx = 0;
        tf->tf_rax = 0;
        tf->tf_rip = pack->ep_entry;
        tf->tf_cs = LSEL(LUCODE32_SEL, SEL_UPL);
        tf->tf_rflags = PSL_USERSET;
        tf->tf_rsp = stack;
        tf->tf_ss = LSEL(LUDATA32_SEL, SEL_UPL);
}

void
netbsd32_buildcontext(struct lwp *l, struct trapframe *tf, void *fp,
    sig_t catcher, int onstack)
{
        /*
         * Build context to run handler in.
         */
        tf->tf_ds = GSEL(GUDATA32_SEL, SEL_UPL);
        tf->tf_es = GSEL(GUDATA32_SEL, SEL_UPL);
#if 0
        tf->tf_fs = GSEL(GUDATA32_SEL, SEL_UPL);
        tf->tf_gs = GSEL(GUDATA32_SEL, SEL_UPL);
#endif

        /* Ensure FP state is sane. */
        fpu_sigreset(l);

        tf->tf_rip = (uint64_t)catcher;
        tf->tf_cs = GSEL(GUCODE32_SEL, SEL_UPL);
        tf->tf_rflags &= ~PSL_CLEARSIG;
        tf->tf_rsp = (uint64_t)fp;
        tf->tf_ss = GSEL(GUDATA32_SEL, SEL_UPL);

        /* Remember that we're now on the signal stack. */
        if (onstack)
                l->l_sigstk.ss_flags |= SS_ONSTACK;
        if ((vaddr_t)catcher >= VM_MAXUSER_ADDRESS32) {
                /*
                 * process has given an invalid address for the
                 * handler. Stop it, but do not do it before so
                 * we can return the right info to userland (or in core dump)
                 */
                sigexit(l, SIGILL);
                /* NOTREACHED */
        }
}

void
netbsd32_sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask)
{
        struct lwp *l = curlwp;
        struct proc *p = l->l_proc;
        struct sigacts *ps = p->p_sigacts;
        int onstack, error;
        int sig = ksi->ksi_signo;
        struct netbsd32_sigframe_siginfo *fp, frame;
        const struct sigaction *sa = &SIGACTION(p, sig);
        sig_t catcher = sa->sa_handler;
        struct trapframe *tf = l->l_md.md_regs;
        stack_t * const ss = &l->l_sigstk;

        /* Do we need to jump onto the signal stack? */
        onstack =
            (ss->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
            (sa->sa_flags & SA_ONSTACK) != 0;

        /* Allocate space for the signal handler context. */
        if (onstack)
                fp = (struct netbsd32_sigframe_siginfo *)
                    ((char *)ss->ss_sp + ss->ss_size);
        else
                fp = (struct netbsd32_sigframe_siginfo *)tf->tf_rsp;

        fp--;

        /* Build stack frame for signal trampoline. */
        switch (ps->sa_sigdesc[sig].sd_vers) {
        case __SIGTRAMP_SIGCODE_VERSION:     /* handled by sendsig_sigcontext */
        case __SIGTRAMP_SIGCONTEXT_VERSION: /* handled by sendsig_sigcontext */
        default:        /* unknown version */
                printf("nsendsig: bad version %d\n",
                    ps->sa_sigdesc[sig].sd_vers);
                sigexit(l, SIGILL);
        case __SIGTRAMP_SIGINFO_VERSION:
                break;
        }

        memset(&frame, 0, sizeof(frame));
        frame.sf_ra = (uint32_t)(uintptr_t)ps->sa_sigdesc[sig].sd_tramp;
        frame.sf_signum = sig;
        frame.sf_sip = (uint32_t)(uintptr_t)&fp->sf_si;
        frame.sf_ucp = (uint32_t)(uintptr_t)&fp->sf_uc;
        netbsd32_si_to_si32(&frame.sf_si, (const siginfo_t *)&ksi->ksi_info);
        frame.sf_uc.uc_flags = _UC_SIGMASK;
        frame.sf_uc.uc_sigmask = *mask;
        frame.sf_uc.uc_link = (uint32_t)(uintptr_t)l->l_ctxlink;
        frame.sf_uc.uc_flags |= (ss->ss_flags & SS_ONSTACK)
            ? _UC_SETSTACK : _UC_CLRSTACK;
        sendsig_reset(l, sig);

        mutex_exit(p->p_lock);
        cpu_getmcontext32(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags);
        error = copyout(&frame, fp, sizeof(frame));
        mutex_enter(p->p_lock);

        if (error != 0) {
                /*
                 * Process has trashed its stack; give it an illegal
                 * instruction to halt it in its tracks.
                 */
                sigexit(l, SIGILL);
                /* NOTREACHED */
        }

        netbsd32_buildcontext(l, tf, fp, catcher, onstack);
}

/*
 * Dump the machine specific segment at the start of a core dump.
 */
struct md_core32 {
        struct reg32 intreg;
        struct fpreg32 freg;
};

int
cpu_coredump32(struct lwp *l, struct coredump_iostate *iocookie,
    struct core32 *chdr)
{
        struct md_core32 md_core;
        struct coreseg cseg;
        int error;

        if (iocookie == NULL) {
                CORE_SETMAGIC(*chdr, COREMAGIC, MID_I386, 0);
                chdr->c_hdrsize = ALIGN32(sizeof(*chdr));
                chdr->c_seghdrsize = ALIGN32(sizeof(cseg));
                chdr->c_cpusize = sizeof(md_core);
                chdr->c_nseg++;
                return 0;
        }

        /* Save integer registers. */
        error = netbsd32_process_read_regs(l, &md_core.intreg);
        if (error)
                return error;

        /* Save floating point registers. */
        error = netbsd32_process_read_fpregs(l, &md_core.freg, NULL);
        if (error)
                return error;

        CORE_SETMAGIC(cseg, CORESEGMAGIC, MID_I386, CORE_CPU);
        cseg.c_addr = 0;
        cseg.c_size = chdr->c_cpusize;

        MODULE_HOOK_CALL(coredump_write_hook, (iocookie, UIO_SYSSPACE, &cseg,
            chdr->c_seghdrsize), ENOSYS, error);
        if (error)
                return error;

        MODULE_HOOK_CALL(coredump_write_hook, (iocookie, UIO_SYSSPACE, &md_core,
            sizeof(md_core)), ENOSYS, error);

        return error;
}

int
netbsd32_ptrace_translate_request(int req)
{

        switch (req)
        {
        case 0 ... PT_FIRSTMACH - 1:        return req;
        case PT32_STEP:                        return PT_STEP;
        case PT32_GETREGS:                return PT_GETREGS;
        case PT32_SETREGS:                return PT_SETREGS;
        case PT32_GETFPREGS:                return PT_GETFPREGS;
        case PT32_SETFPREGS:                return PT_SETFPREGS;
        case PT32_GETXMMREGS:                return PT_GETXMMREGS;
        case PT32_SETXMMREGS:                return PT_SETXMMREGS;
        case PT32_GETDBREGS:                return PT_GETDBREGS;
        case PT32_SETDBREGS:                return PT_SETDBREGS;
        case PT32_SETSTEP:                return PT_SETSTEP;
        case PT32_CLEARSTEP:                return PT_CLEARSTEP;
        case PT32_GETXSTATE:                return PT_GETXSTATE;
        case PT32_SETXSTATE:                return PT_SETXSTATE;
        default:                        return -1;
        }
}

int
netbsd32_process_read_regs(struct lwp *l, struct reg32 *regs)
{
        struct trapframe *tf = l->l_md.md_regs;

        /* XXX avoid sign extension problems with unknown upper bits? */
        regs->r_gs = tf->tf_gs & 0xffff;
        regs->r_fs = tf->tf_fs & 0xffff;
        regs->r_es = tf->tf_es & 0xffff;
        regs->r_ds = tf->tf_ds & 0xffff;
        regs->r_eflags = tf->tf_rflags;
        regs->r_edi = tf->tf_rdi & 0xffffffff;
        regs->r_esi = tf->tf_rsi & 0xffffffff;
        regs->r_ebp = tf->tf_rbp & 0xffffffff;
        regs->r_ebx = tf->tf_rbx & 0xffffffff;
        regs->r_edx = tf->tf_rdx & 0xffffffff;
        regs->r_ecx = tf->tf_rcx & 0xffffffff;
        regs->r_eax = tf->tf_rax & 0xffffffff;
        regs->r_eip = tf->tf_rip & 0xffffffff;
        regs->r_cs = tf->tf_cs & 0xffff;
        regs->r_esp = tf->tf_rsp & 0xffffffff;
        regs->r_ss = tf->tf_ss & 0xffff;

        return 0;
}

int
netbsd32_process_read_fpregs(struct lwp *l, struct fpreg32 *regs, size_t *sz)
{

        __CTASSERT(sizeof(*regs) == sizeof(struct save87));
        process_read_fpregs_s87(l, (struct save87 *)regs);
        return 0;
}

int
netbsd32_process_read_dbregs(struct lwp *l, struct dbreg32 *regs, size_t *sz)
{
        struct dbreg regs64;

        x86_dbregs_read(l, &regs64);
        memset(regs, 0, sizeof(*regs));
        regs->dr[0] = regs64.dr[0] & 0xffffffff;
        regs->dr[1] = regs64.dr[1] & 0xffffffff;
        regs->dr[2] = regs64.dr[2] & 0xffffffff;
        regs->dr[3] = regs64.dr[3] & 0xffffffff;

        regs->dr[6] = regs64.dr[6] & 0xffffffff;
        regs->dr[7] = regs64.dr[7] & 0xffffffff;

        return 0;
}

int
netbsd32_process_write_regs(struct lwp *l, const struct reg32 *regs)
{
        struct trapframe *tf;
        struct pcb *pcb;

        tf = l->l_md.md_regs;
        pcb = lwp_getpcb(l);

        /*
         * Check for security violations.
         */
        if (((regs->r_eflags ^ tf->tf_rflags) & PSL_USERSTATIC) != 0)
                return EINVAL;
        if (!VALID_USER_CSEL32(regs->r_cs))
                return EINVAL;
        if (regs->r_fs != 0 && !VALID_USER_DSEL32(regs->r_fs) &&
            !(VALID_USER_FSEL32(regs->r_fs) && pcb->pcb_fs != 0))
                return EINVAL;
        if (regs->r_gs != 0 && !VALID_USER_DSEL32(regs->r_gs) &&
            !(VALID_USER_GSEL32(regs->r_gs) && pcb->pcb_gs != 0))
                return EINVAL;
        if (regs->r_es != 0 && !VALID_USER_DSEL32(regs->r_es))
                return EINVAL;
        if (!VALID_USER_DSEL32(regs->r_ds) ||
            !VALID_USER_DSEL32(regs->r_ss))
                return EINVAL;
        if ((u_int)regs->r_eip >= VM_MAXUSER_ADDRESS32)
                return EINVAL;

        tf->tf_rax = regs->r_eax;
        tf->tf_rcx = regs->r_ecx;
        tf->tf_rdx = regs->r_edx;
        tf->tf_rbx = regs->r_ebx;
        tf->tf_rsp = regs->r_esp;
        tf->tf_rbp = regs->r_ebp;
        tf->tf_rsi = regs->r_esi;
        tf->tf_rdi = regs->r_edi;
        tf->tf_rip = regs->r_eip;
        tf->tf_rflags = regs->r_eflags;
        tf->tf_cs = regs->r_cs & 0xFFFF;
        tf->tf_ss = regs->r_ss & 0xFFFF;
        tf->tf_ds = regs->r_ds & 0xFFFF;
        tf->tf_es = regs->r_es & 0xFFFF;
        tf->tf_fs = regs->r_fs & 0xFFFF;
        tf->tf_gs = regs->r_gs & 0xFFFF;

        return 0;
}

int
netbsd32_process_write_fpregs(struct lwp *l, const struct fpreg32 *regs,
    size_t sz)
{

        __CTASSERT(sizeof(*regs) == sizeof(struct save87));
        process_write_fpregs_s87(l, (const struct save87 *)regs);
        return 0;
}

int
netbsd32_process_write_dbregs(struct lwp *l, const struct dbreg32 *regs,
    size_t sz)
{
        size_t i;
        struct dbreg regs64;

        /* Check that DR0-DR3 contain user-space address */
        for (i = 0; i < X86_DBREGS; i++) {
                if ((u_int)regs->dr[i] >= VM_MAXUSER_ADDRESS32)
                        return EINVAL;
        }

        if (regs->dr[7] & X86_DR7_GENERAL_DETECT_ENABLE) {
                return EINVAL;
        }

        memset(&regs64, 0, sizeof(regs64));

        regs64.dr[0] = (u_int)regs->dr[0];
        regs64.dr[1] = (u_int)regs->dr[1];
        regs64.dr[2] = (u_int)regs->dr[2];
        regs64.dr[3] = (u_int)regs->dr[3];

        regs64.dr[6] = (u_int)regs->dr[6];
        regs64.dr[7] = (u_int)regs->dr[7];

        x86_dbregs_write(l, &regs64);
        return 0;
}

static int
netbsd32_process_doxmmregs(struct lwp *curl, struct lwp *l, void *addr,
    bool write)
        /* curl:                 tracer */
        /* l:                         traced */
{
        struct uio uio;
        struct iovec iov;
        struct vmspace *vm;
        int error;

        if ((curl->l_proc->p_flag & PK_32) == 0 ||
            (l->l_proc->p_flag & PK_32) == 0)
                return EINVAL;

        if (!process_machdep_validfpu(l->l_proc))
                return EINVAL;

        error = proc_vmspace_getref(curl->l_proc, &vm);
        if (error)
                return error;

        iov.iov_base = addr;
        iov.iov_len = sizeof(struct xmmregs32);
        uio.uio_iov = &iov;
        uio.uio_iovcnt = 1;
        uio.uio_offset = 0;
        uio.uio_resid = sizeof(struct xmmregs32);
        uio.uio_rw = write ? UIO_WRITE : UIO_READ;
        uio.uio_vmspace = vm;

        error = netbsd32_process_xmmregio(curl, l, &uio);
        uvmspace_free(vm);
        return error;
}

static int
netbsd32_process_xmmregio(struct lwp *curl, struct lwp *l, struct uio *uio)
        /* curl:                 tracer */
        /* l:                         traced */
{
        struct xmmregs32 regs;
        int error;
        char *kv;
        size_t kl;

        kl = sizeof(regs);
        kv = (char *)&regs;

        if (uio->uio_offset < 0 || uio->uio_offset > (off_t)kl)
                return EINVAL;

        kv += uio->uio_offset;
        kl -= uio->uio_offset;

        if (kl > uio->uio_resid)
                kl = uio->uio_resid;

        process_read_fpregs_xmm(l, &regs.fxstate);
        error = uiomove(kv, kl, uio);
        if (error == 0 && uio->uio_rw == UIO_WRITE) {
                if (l->l_proc->p_stat != SSTOP)
                        error = EBUSY;
                else
                        process_write_fpregs_xmm(l, &regs.fxstate);
        }

        uio->uio_offset = 0;
        return error;
}

int
netbsd32_sysarch(struct lwp *l, const struct netbsd32_sysarch_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) op;
                syscallarg(netbsd32_voidp) parms;
        } */
        int error;

        switch (SCARG(uap, op)) {
        case X86_IOPL:
                error = x86_iopl(l,
                    NETBSD32PTR64(SCARG(uap, parms)), retval);
                break;
        case X86_GET_LDT:
                error = x86_64_get_ldt32(l,
                    NETBSD32PTR64(SCARG(uap, parms)), retval);
                break;
        case X86_SET_LDT:
                error = x86_64_set_ldt32(l,
                    NETBSD32PTR64(SCARG(uap, parms)), retval);
                break;
        case X86_GET_MTRR:
                error = x86_64_get_mtrr32(l,
                    NETBSD32PTR64(SCARG(uap, parms)), retval);
                break;
        case X86_SET_MTRR:
                error = x86_64_set_mtrr32(l,
                    NETBSD32PTR64(SCARG(uap, parms)), retval);
                break;
        default:
                error = EINVAL;
                break;
        }
        return error;
}

#ifdef USER_LDT
static int
x86_64_set_ldt32(struct lwp *l, void *args, register_t *retval)
{
        struct x86_set_ldt_args32 ua32;
        struct x86_set_ldt_args ua;
        union descriptor *descv;
        int error;

        if ((error = copyin(args, &ua32, sizeof(ua32))) != 0)
                return error;

        ua.start = ua32.start;
        ua.num = ua32.num;

        if (ua.num < 0 || ua.num > MAX_USERLDT_SLOTS)
                return EINVAL;

        const size_t alloc_size = sizeof(*descv) * ua.num;

        descv = kmem_alloc(alloc_size, KM_SLEEP);
        error = copyin((void *)(uintptr_t)ua32.desc, descv,
            sizeof(*descv) * ua.num);
        if (error == 0)
                error = x86_set_ldt1(l, &ua, descv);
        *retval = ua.start;

        kmem_free(descv, alloc_size);
        return error;
}

static int
x86_64_get_ldt32(struct lwp *l, void *args, register_t *retval)
{
        struct x86_get_ldt_args32 ua32;
        struct x86_get_ldt_args ua;
        union descriptor *cp;
        int error;

        if ((error = copyin(args, &ua32, sizeof(ua32))) != 0)
                return error;

        ua.start = ua32.start;
        ua.num = ua32.num;

        if (ua.num < 0 || ua.num > MAX_USERLDT_SLOTS)
                return EINVAL;

        const size_t alloc_size = ua.num * sizeof(union descriptor);

        cp = kmem_alloc(alloc_size, KM_SLEEP);
        error = x86_get_ldt1(l, &ua, cp);
        *retval = ua.num;
        if (error == 0)
                error = copyout(cp, (void *)(uintptr_t)ua32.desc,
                    ua.num * sizeof(*cp));

        kmem_free(cp, alloc_size);
        return error;
}
#endif

#ifdef MTRR
static int
x86_64_get_mtrr32(struct lwp *l, void *args, register_t *retval)
{
        struct x86_64_get_mtrr_args32 args32;
        int error, i;
        int32_t n;
        struct mtrr32 *m32p, m32;
        struct mtrr *m64p, *mp;
        size_t size;

        m64p = NULL;

        if (mtrr_funcs == NULL)
                return ENOSYS;

        error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_MTRR_GET,
            NULL, NULL, NULL, NULL);
        if (error)
                return error;

        error = copyin(args, &args32, sizeof(args32));
        if (error != 0)
                return error;

        if (args32.mtrrp == 0) {
                n = (MTRR_I686_NFIXED_SOFT + MTRR_I686_NVAR_MAX);
                return copyout(&n, (void *)(uintptr_t)args32.n, sizeof(n));
        }

        error = copyin((void *)(uintptr_t)args32.n, &n, sizeof(n));
        if (error != 0)
                return error;

        if (n <= 0 || n > (MTRR_I686_NFIXED_SOFT + MTRR_I686_NVAR_MAX))
                return EINVAL;

        size = n * sizeof(struct mtrr);
        m64p = kmem_zalloc(size, KM_SLEEP);
        error = mtrr_get(m64p, &n, l->l_proc, 0);
        if (error != 0)
                goto fail;
        m32p = (struct mtrr32 *)(uintptr_t)args32.mtrrp;
        mp = m64p;
        for (i = 0; i < n; i++) {
                m32.base = mp->base;
                m32.len = mp->len;
                m32.type = mp->type;
                m32.flags = mp->flags;
                m32.owner = mp->owner;
                error = copyout(&m32, m32p, sizeof(m32));
                if (error != 0)
                        break;
                mp++;
                m32p++;
        }
fail:
        if (m64p != NULL)
                kmem_free(m64p, size);
        if (error != 0)
                n = 0;
        copyout(&n, (void *)(uintptr_t)args32.n, sizeof(n));
        return error;
}

static int
x86_64_set_mtrr32(struct lwp *l, void *args, register_t *retval)
{
        struct x86_64_set_mtrr_args32 args32;
        struct mtrr32 *m32p, m32;
        struct mtrr *m64p, *mp;
        int error, i;
        int32_t n;
        size_t size;

        m64p = NULL;

        if (mtrr_funcs == NULL)
                return ENOSYS;

        error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_MTRR_SET,
            NULL, NULL, NULL, NULL);
        if (error)
                return error;

        error = copyin(args, &args32, sizeof(args32));
        if (error != 0)
                return error;

        error = copyin((void *)(uintptr_t)args32.n, &n, sizeof(n));
        if (error != 0)
                return error;

        if (n <= 0 || n > (MTRR_I686_NFIXED_SOFT + MTRR_I686_NVAR_MAX)) {
                error = EINVAL;
                goto fail;
        }

        size = n * sizeof(struct mtrr);
        m64p = kmem_zalloc(size, KM_SLEEP);
        m32p = (struct mtrr32 *)(uintptr_t)args32.mtrrp;
        mp = m64p;
        for (i = 0; i < n; i++) {
                error = copyin(m32p, &m32, sizeof(m32));
                if (error != 0)
                        goto fail;
                mp->base = m32.base;
                mp->len = m32.len;
                mp->type = m32.type;
                mp->flags = m32.flags;
                mp->owner = m32.owner;
                m32p++;
                mp++;
        }

        error = mtrr_set(m64p, &n, l->l_proc, 0);
fail:
        if (m64p != NULL)
                kmem_free(m64p, size);
        if (error != 0)
                n = 0;
        copyout(&n, (void *)(uintptr_t)args32.n, sizeof(n));
        return error;
}
#endif

int
cpu_setmcontext32(struct lwp *l, const mcontext32_t *mcp, unsigned int flags)
{
        struct trapframe *tf = l->l_md.md_regs;
        const __greg32_t *gr = mcp->__gregs;
        struct proc *p = l->l_proc;
        int error;

        /* Restore register context, if any. */
        if ((flags & _UC_CPU) != 0) {
                /*
                 * Check for security violations.
                 */
                error = cpu_mcontext32_validate(l, mcp);
                if (error != 0)
                        return error;

                cpu_fsgs_reload(l, gr[_REG32_FS], gr[_REG32_GS]);
                tf->tf_es = gr[_REG32_ES] & 0xFFFF;
                tf->tf_ds = gr[_REG32_DS] & 0xFFFF;
                /* Only change the user-alterable part of eflags */
                tf->tf_rflags &= ~PSL_USER;
                tf->tf_rflags |= (gr[_REG32_EFL] & PSL_USER);
                tf->tf_rdi    = gr[_REG32_EDI];
                tf->tf_rsi    = gr[_REG32_ESI];
                tf->tf_rbp    = gr[_REG32_EBP];
                tf->tf_rbx    = gr[_REG32_EBX];
                tf->tf_rdx    = gr[_REG32_EDX];
                tf->tf_rcx    = gr[_REG32_ECX];
                tf->tf_rax    = gr[_REG32_EAX];
                tf->tf_rip    = gr[_REG32_EIP];
                tf->tf_cs     = gr[_REG32_CS] & 0xFFFF;
                tf->tf_rsp    = gr[_REG32_UESP];
                tf->tf_ss     = gr[_REG32_SS] & 0xFFFF;
        }

        if ((flags & _UC_TLSBASE) != 0)
                lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);

        /* Restore floating point register context, if any. */
        if ((flags & _UC_FPU) != 0) {
                /* Assume fxsave context */
                process_write_fpregs_xmm(l, (const struct fxsave *)
                    &mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
        }

        mutex_enter(p->p_lock);
        if (flags & _UC_SETSTACK)
                l->l_sigstk.ss_flags |= SS_ONSTACK;
        if (flags & _UC_CLRSTACK)
                l->l_sigstk.ss_flags &= ~SS_ONSTACK;
        mutex_exit(p->p_lock);

        return 0;
}

void
cpu_getmcontext32(struct lwp *l, mcontext32_t *mcp, unsigned int *flags)
{
        const struct trapframe *tf = l->l_md.md_regs;
        __greg32_t *gr = mcp->__gregs;
        __greg32_t ras_eip;

        /* Save register context. */
        gr[_REG32_GS]  = tf->tf_gs & 0xFFFF;
        gr[_REG32_FS]  = tf->tf_fs & 0xFFFF;
        gr[_REG32_ES]  = tf->tf_es & 0xFFFF;
        gr[_REG32_DS]  = tf->tf_ds & 0xFFFF;
        gr[_REG32_EFL] = tf->tf_rflags;
        gr[_REG32_EDI]    = tf->tf_rdi;
        gr[_REG32_ESI]    = tf->tf_rsi;
        gr[_REG32_EBP]    = tf->tf_rbp;
        gr[_REG32_EBX]    = tf->tf_rbx;
        gr[_REG32_EDX]    = tf->tf_rdx;
        gr[_REG32_ECX]    = tf->tf_rcx;
        gr[_REG32_EAX]    = tf->tf_rax;
        gr[_REG32_EIP]    = tf->tf_rip;
        gr[_REG32_CS]     = tf->tf_cs & 0xFFFF;
        gr[_REG32_ESP]    = tf->tf_rsp;
        gr[_REG32_UESP]   = tf->tf_rsp;
        gr[_REG32_SS]     = tf->tf_ss & 0xFFFF;
        gr[_REG32_TRAPNO] = tf->tf_trapno;
        gr[_REG32_ERR]    = tf->tf_err;

        if ((ras_eip = (__greg32_t)(uintptr_t)ras_lookup(l->l_proc,
            (void *) (uintptr_t)gr[_REG32_EIP])) != (__greg32_t)-1)
                gr[_REG32_EIP] = ras_eip;

        *flags |= _UC_CPU;

        mcp->_mc_tlsbase = (uint32_t)(uintptr_t)l->l_private;
        *flags |= _UC_TLSBASE;

        /* Save floating point register context. */
        process_read_fpregs_xmm(l, (struct fxsave *)
            &mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
        memset(&mcp->__fpregs.__fp_pad, 0, sizeof(mcp->__fpregs.__fp_pad));
        *flags |= _UC_FXSAVE | _UC_FPU;
}

void
startlwp32(void *arg)
{
        ucontext32_t *uc = arg;
        lwp_t *l = curlwp;
        int error __diagused;

        error = cpu_setmcontext32(l, &uc->uc_mcontext, uc->uc_flags);
        KASSERT(error == 0);

        /* Note: we are freeing ucontext_t, not ucontext32_t. */
        kmem_free(uc, sizeof(ucontext_t));
        userret(l);
}

int
check_sigcontext32(struct lwp *l, const struct netbsd32_sigcontext *scp)
{
        struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
        struct trapframe *tf;
        struct pcb *pcb;

        tf = l->l_md.md_regs;
        pcb = lwp_getpcb(curlwp);

        if (((scp->sc_eflags ^ tf->tf_rflags) & PSL_USERSTATIC) != 0)
                return EINVAL;

        if (__predict_false(pmap->pm_ldt != NULL)) {
                /* Allow unfamiliar segment register values (USER_LDT). */
                if (!USERMODE(scp->sc_cs))
                        return EINVAL;
        } else {
                if (!VALID_USER_CSEL32(scp->sc_cs))
                        return EINVAL;
                if (scp->sc_fs != 0 && !VALID_USER_DSEL32(scp->sc_fs) &&
                    !(VALID_USER_FSEL32(scp->sc_fs) && pcb->pcb_fs != 0))
                        return EINVAL;
                if (scp->sc_gs != 0 && !VALID_USER_DSEL32(scp->sc_gs) &&
                    !(VALID_USER_GSEL32(scp->sc_gs) && pcb->pcb_gs != 0))
                        return EINVAL;
                if (scp->sc_es != 0 && !VALID_USER_DSEL32(scp->sc_es))
                        return EINVAL;
                if (!VALID_USER_DSEL32(scp->sc_ds) ||
                    !VALID_USER_DSEL32(scp->sc_ss))
                        return EINVAL;
        }

        if (scp->sc_eip >= VM_MAXUSER_ADDRESS32)
                return EINVAL;

        return 0;
}

int
cpu_mcontext32_validate(struct lwp *l, const mcontext32_t *mcp)
{
        struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
        const __greg32_t *gr;
        struct trapframe *tf;
        struct pcb *pcb;

        gr = mcp->__gregs;
        tf = l->l_md.md_regs;
        pcb = lwp_getpcb(l);

        if (((gr[_REG32_EFL] ^ tf->tf_rflags) & PSL_USERSTATIC) != 0)
                return EINVAL;

        if (__predict_false(pmap->pm_ldt != NULL)) {
                /* Allow unfamiliar segment register values (USER_LDT). */
                if (!USERMODE(gr[_REG32_CS]))
                        return EINVAL;
        } else {
                if (!VALID_USER_CSEL32(gr[_REG32_CS]))
                        return EINVAL;
                if (gr[_REG32_FS] != 0 && !VALID_USER_DSEL32(gr[_REG32_FS]) &&
                    !(VALID_USER_FSEL32(gr[_REG32_FS]) && pcb->pcb_fs != 0))
                        return EINVAL;
                if (gr[_REG32_GS] != 0 && !VALID_USER_DSEL32(gr[_REG32_GS]) &&
                    !(VALID_USER_GSEL32(gr[_REG32_GS]) && pcb->pcb_gs != 0))
                        return EINVAL;
                if (gr[_REG32_ES] != 0 && !VALID_USER_DSEL32(gr[_REG32_ES]))
                        return EINVAL;
                if (!VALID_USER_DSEL32(gr[_REG32_DS]) ||
                    !VALID_USER_DSEL32(gr[_REG32_SS]))
                        return EINVAL;
        }

        if (gr[_REG32_EIP] >= VM_MAXUSER_ADDRESS32)
                return EINVAL;

        return 0;
}

static int
cpu_mcontext32from64_validate(struct lwp *l, const struct reg *regp)
{
        mcontext32_t mc;
        __greg32_t *gr32 = mc.__gregs;
        const __greg_t *gr = regp->regs;

        memset(&mc, 0, sizeof(mc));
        gr32[_REG32_EFL] = gr[_REG_RFLAGS];
        gr32[_REG32_EIP] = gr[_REG_RIP];
        gr32[_REG32_CS] = gr[_REG_CS];
        gr32[_REG32_DS] = gr[_REG_DS];
        gr32[_REG32_ES] = gr[_REG_ES];
        gr32[_REG32_FS] = gr[_REG_FS];
        gr32[_REG32_GS] = gr[_REG_GS];
        gr32[_REG32_SS] = gr[_REG_SS];
        return cpu_mcontext32_validate(l, &mc);
}

vaddr_t
netbsd32_vm_default_addr(struct proc *p, vaddr_t base, vsize_t sz,
    int topdown)
{
        if (topdown)
                return VM_DEFAULT_ADDRESS32_TOPDOWN(base, sz);
        else
                return VM_DEFAULT_ADDRESS32_BOTTOMUP(base, sz);
}

static const char *
netbsd32_machine32(void)
{

        return machine32;
}

void
netbsd32_machdep_md_init(void)
{

        MODULE_HOOK_SET(netbsd32_machine32_hook, netbsd32_machine32);
        MODULE_HOOK_SET(netbsd32_reg_validate_hook,
            cpu_mcontext32from64_validate);
        MODULE_HOOK_SET(netbsd32_process_doxmmregs_hook,
            netbsd32_process_doxmmregs);
}

void
netbsd32_machdep_md_fini(void)
{

        MODULE_HOOK_UNSET(netbsd32_machine32_hook);
        MODULE_HOOK_UNSET(netbsd32_reg_validate_hook);
        MODULE_HOOK_UNSET(netbsd32_process_doxmmregs_hook);
}

































































































































































   11 










   11 



















    2 





    2 

































    1 





    2 
    1 

    1 
    1 



    1 










































    1 


    1 


    1 
















































































































































    2 


    1 







    1 


    1 

    2 




















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
/* $NetBSD: wsevent.c,v 1.47 2021/09/26 01:16:10 thorpej Exp $ */

/*-
 * Copyright (c) 2006, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Julio M. Merino Vidal.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1996, 1997 Christopher G. Demetriou.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Christopher G. Demetriou
 *        for the NetBSD Project.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1992, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This software was developed by the Computer Systems Engineering group
 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
 * contributed to Berkeley.
 *
 * All advertising materials mentioning features or use of this software
 * must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Lawrence Berkeley Laboratory.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)event.c        8.1 (Berkeley) 6/11/93
 */

/*
 * Internal "wscons_event" queue interface for the keyboard and mouse drivers.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: wsevent.c,v 1.47 2021/09/26 01:16:10 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
#include "opt_modular.h"
#endif

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/fcntl.h>
#include <sys/kmem.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/vnode.h>
#include <sys/select.h>
#include <sys/poll.h>
#include <sys/compat_stub.h>
#include <sys/sysctl.h>

#include <dev/wscons/wsconsio.h>
#include <dev/wscons/wseventvar.h>

/*
 * Size of a wsevent queue (measured in number of events).
 * Should be a power of two so that `%' is fast.
 * At the moment, the value below makes the queues use 2 Kbytes each; this
 * value may need tuning.
 */
#define        WSEVENT_QSIZE        256

#define EVSIZE(ver)        ((ver) == WSEVENT_VERSION ? \
    sizeof(struct wscons_event) : \
    sizeof(struct owscons_event))
#define EVARRAY(ev, idx) (&(ev)->q[(idx)])

static int wsevent_default_version = WSEVENT_VERSION;

/*
 * Priority of code managing wsevent queues.  PWSEVENT is set just above
 * PSOCK, which is just above TTIPRI, on the theory that mouse and keyboard
 * `user' input should be quick.
 */
#define        PWSEVENT        23
#define        splwsevent()        spltty()

static void        wsevent_intr(void *);

/*
 * Initialize a wscons_event queue.
 */
void
wsevent_init(struct wseventvar *ev, struct proc *p)
{

        if (ev->q != NULL) {
#ifdef DIAGNOSTIC
                printf("wsevent_init: already init\n");
#endif
                return;
        }
        /*
         * For binary compat set default version and either build with
         * COMPAT_50 or load COMPAT_50 module to include the compatibility
         * code.
         */
        if (wsevent_default_version >= 0 &&
            wsevent_default_version < WSEVENT_VERSION)
                ev->version = wsevent_default_version;
        else
                ev->version = WSEVENT_VERSION;

        ev->get = ev->put = 0;
        ev->q = kmem_alloc(WSEVENT_QSIZE * sizeof(*ev->q), KM_SLEEP);
        selinit(&ev->sel);
        ev->io = p;
        ev->sih = softint_establish(SOFTINT_MPSAFE | SOFTINT_CLOCK,
            wsevent_intr, ev);
}

/*
 * Tear down a wscons_event queue.
 */
void
wsevent_fini(struct wseventvar *ev)
{
        if (ev->q == NULL) {
#ifdef DIAGNOSTIC
                printf("wsevent_fini: already fini\n");
#endif
                return;
        }
        seldestroy(&ev->sel);
        kmem_free(ev->q, WSEVENT_QSIZE * sizeof(*ev->q));
        ev->q = NULL;
        softint_disestablish(ev->sih);
}

static int
wsevent_copyout_events(const struct wscons_event *events, int cnt,
    struct uio *uio, int ver)
{
        int error;

        switch (ver) {
        case 0:
                MODULE_HOOK_CALL(wscons_copyout_events_50_hook,
                    (events, cnt, uio), enosys(), error);
                if (error == ENOSYS)
                        error = EINVAL;
                return error;
        case WSEVENT_VERSION:
                return uiomove(__UNCONST(events), cnt * sizeof(*events), uio);
        default:
                panic("%s: unknown version %d", __func__, ver);
        }
}

/*
 * User-level interface: read, poll.
 * (User cannot write an event queue.)
 */
int
wsevent_read(struct wseventvar *ev, struct uio *uio, int flags)
{
        int s, n, cnt, error;
        const int ver = ev->version;
        const size_t evsize = EVSIZE(ver);

        /*
         * Make sure we can return at least 1.
         */
        if (uio->uio_resid < evsize)
                return (EMSGSIZE);        /* ??? */
        s = splwsevent();
        while (ev->get == ev->put) {
                if (flags & IO_NDELAY) {
                        splx(s);
                        return (EWOULDBLOCK);
                }
                ev->wanted = 1;
                error = tsleep(ev, PWSEVENT | PCATCH, "wsevent_read", 0);
                if (error) {
                        splx(s);
                        return (error);
                }
        }
        /*
         * Move wscons_event from tail end of queue (there is at least one
         * there).
         */
        if (ev->put < ev->get)
                cnt = WSEVENT_QSIZE - ev->get;        /* events in [get..QSIZE) */
        else
                cnt = ev->put - ev->get;        /* events in [get..put) */
        splx(s);
        n = howmany(uio->uio_resid, evsize);
        if (cnt > n)
                cnt = n;
        error = wsevent_copyout_events(EVARRAY(ev, ev->get), cnt, uio, ver);
        n -= cnt;
        /*
         * If we do not wrap to 0, used up all our space, or had an error,
         * stop.  Otherwise move from front of queue to put index, if there
         * is anything there to move.
         */
        if ((ev->get = (ev->get + cnt) % WSEVENT_QSIZE) != 0 ||
            n == 0 || error || (cnt = ev->put) == 0)
                return (error);
        if (cnt > n)
                cnt = n;
        error = wsevent_copyout_events(EVARRAY(ev, 0), cnt, uio, ver);
        ev->get = cnt;
        return (error);
}

int
wsevent_poll(struct wseventvar *ev, int events, struct lwp *l)
{
        int revents = 0;
        int s = splwsevent();

        if (events & (POLLIN | POLLRDNORM)) {
                if (ev->get != ev->put)
                        revents |= events & (POLLIN | POLLRDNORM);
                else
                        selrecord(l, &ev->sel);
        }

        splx(s);
        return (revents);
}

static void
filt_wseventrdetach(struct knote *kn)
{
        struct wseventvar *ev = kn->kn_hook;
        int s;

        s = splwsevent();
        selremove_knote(&ev->sel, kn);
        splx(s);
}

static int
filt_wseventread(struct knote *kn, long hint)
{
        struct wseventvar *ev = kn->kn_hook;

        if (ev->get == ev->put)
                return (0);

        if (ev->get < ev->put)
                kn->kn_data = ev->put - ev->get;
        else
                kn->kn_data = (WSEVENT_QSIZE - ev->get) + ev->put;

        kn->kn_data *= EVSIZE(ev->version);

        return (1);
}

static const struct filterops wsevent_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_wseventrdetach,
        .f_event = filt_wseventread,
};

int
wsevent_kqfilter(struct wseventvar *ev, struct knote *kn)
{
        int s;

        switch (kn->kn_filter) {
        case EVFILT_READ:
                kn->kn_fop = &wsevent_filtops;
                break;

        default:
                return (EINVAL);
        }

        kn->kn_hook = ev;

        s = splwsevent();
        selrecord_knote(&ev->sel, kn);
        splx(s);

        return (0);
}

/*
 * Wakes up all listener of the 'ev' queue.
 */
void
wsevent_wakeup(struct wseventvar *ev)
{

        selnotify(&ev->sel, 0, 0);

        if (ev->wanted) {
                ev->wanted = 0;
                wakeup(ev);
        }

        if (ev->async) {
                softint_schedule(ev->sih);
        }
}

/*
 * Soft interrupt handler: sends signal to async proc.
 */
static void
wsevent_intr(void *cookie)
{
        struct wseventvar *ev;

        ev = cookie;

        if (ev->async) {
                mutex_enter(&proc_lock);
                psignal(ev->io, SIGIO);
                mutex_exit(&proc_lock);
        }
}

/*
 * Injects the set of events given in 'events', whose size is 'nevents',
 * into the 'ev' queue.  If there is not enough free space to inject them
 * all, returns ENOSPC and the queue is left intact; otherwise returns 0
 * and wakes up all listeners.
 */
int
wsevent_inject(struct wseventvar *ev, struct wscons_event *events,
    size_t nevents)
{
        size_t avail, i;
        struct timespec t;

        /* Calculate number of free slots in the queue. */
        if (ev->put < ev->get)
                avail = ev->get - ev->put;
        else
                avail = WSEVENT_QSIZE - (ev->put - ev->get);
        KASSERT(avail <= WSEVENT_QSIZE);

        /* Fail if there is all events will not fit in the queue. */
        if (avail < nevents)
                return ENOSPC;

        /* Use the current time for all events. */
        getnanotime(&t);

        /* Inject the events. */
        for (i = 0; i < nevents; i++) {
                struct wscons_event *we;

                we = EVARRAY(ev, ev->put);
                we->type = events[i].type;
                we->value = events[i].value;
                we->time = t;

                ev->put = (ev->put + 1) % WSEVENT_QSIZE;
        }
        wsevent_wakeup(ev);

        return 0;
}

int
wsevent_setversion(struct wseventvar *ev, int vers)
{
        if (ev == NULL)
                return EINVAL;

        switch (vers) {
        case 0:
        case WSEVENT_VERSION:
                break;
        default:
                return EINVAL;
        }

        if (vers == ev->version)
                return 0;

        ev->get = ev->put = 0;
        ev->version = vers;
        return 0;
}

SYSCTL_SETUP(sysctl_wsevent_setup, "sysctl hw.wsevent subtree setup")
{
        const struct sysctlnode *node = NULL;
 
        if (sysctl_createv(clog, 0, NULL, &node,
            CTLFLAG_PERMANENT,
            CTLTYPE_NODE, "wsevent", NULL, 
            NULL, 0, NULL, 0,
            CTL_HW, CTL_CREATE, CTL_EOL) != 0)
                return;
 
        sysctl_createv(clog, 0, &node, NULL,
            CTLFLAG_READWRITE,
            CTLTYPE_INT, "default_version",
            SYSCTL_DESCR("Set default event version for compatibility"),
            NULL, 0, &wsevent_default_version, 0,
            CTL_CREATE, CTL_EOL);
}
















































































































































































































































































































































































































































































    1 


    1 






























    1 










































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
/*        $NetBSD: lockstat.c,v 1.30 2022/04/08 10:17:54 andvar Exp $        */

/*-
 * Copyright (c) 2006, 2007, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Lock statistics driver, providing kernel support for the lockstat(8)
 * command.
 *
 * We use a global lock word (lockstat_lock) to track device opens.
 * Only one thread can hold the device at a time, providing a global lock.
 *
 * XXX Timings for contention on sleep locks are currently incorrect.
 * XXX Convert this to use timecounters!
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: lockstat.c,v 1.30 2022/04/08 10:17:54 andvar Exp $");

#include <sys/types.h>
#include <sys/param.h>

#include <sys/atomic.h>
#include <sys/conf.h>
#include <sys/cpu.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/lock.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/xcall.h>

#include <dev/lockstat.h>

#include "ioconf.h"

#ifndef __HAVE_CPU_COUNTER
#error CPU counters not available
#endif

#if LONG_BIT == 64
#define        LOCKSTAT_HASH_SHIFT        3
#elif LONG_BIT == 32
#define        LOCKSTAT_HASH_SHIFT        2
#endif

#define        LOCKSTAT_MINBUFS        1000
#define        LOCKSTAT_DEFBUFS        20000
#define        LOCKSTAT_MAXBUFS        1000000

#define        LOCKSTAT_HASH_SIZE        128
#define        LOCKSTAT_HASH_MASK        (LOCKSTAT_HASH_SIZE - 1)
#define        LOCKSTAT_HASH(key)        \
        ((key >> LOCKSTAT_HASH_SHIFT) & LOCKSTAT_HASH_MASK)

typedef struct lscpu {
        SLIST_HEAD(, lsbuf)        lc_free;
        u_int                        lc_overflow;
        LIST_HEAD(lslist, lsbuf) lc_hash[LOCKSTAT_HASH_SIZE];
} lscpu_t;

typedef struct lslist lslist_t;

void        lockstat_start(lsenable_t *);
int        lockstat_alloc(lsenable_t *);
void        lockstat_init_tables(lsenable_t *);
int        lockstat_stop(lsdisable_t *);
void        lockstat_free(void);

dev_type_open(lockstat_open);
dev_type_close(lockstat_close);
dev_type_read(lockstat_read);
dev_type_ioctl(lockstat_ioctl);

volatile u_int        lockstat_enabled;
volatile u_int        lockstat_dev_enabled;
__cpu_simple_lock_t lockstat_enabled_lock;
uintptr_t        lockstat_csstart;
uintptr_t        lockstat_csend;
uintptr_t        lockstat_csmask;
uintptr_t        lockstat_lamask;
uintptr_t        lockstat_lockstart;
uintptr_t        lockstat_lockend;
__cpu_simple_lock_t lockstat_lock;
lwp_t                *lockstat_lwp;
lsbuf_t                *lockstat_baseb;
size_t                lockstat_sizeb;
int                lockstat_busy;
struct timespec        lockstat_stime;

#ifdef KDTRACE_HOOKS
volatile u_int lockstat_dtrace_enabled;
CTASSERT(LB_NEVENT <= 3);
CTASSERT(LB_NLOCK <= (7 << LB_LOCK_SHIFT));
void
lockstat_probe_stub(uint32_t id, uintptr_t lock, uintptr_t callsite,
    uintptr_t flags, uintptr_t count, uintptr_t cycles)
{
}

uint32_t        lockstat_probemap[LS_NPROBES];
void                (*lockstat_probe_func)(uint32_t, uintptr_t, uintptr_t,
                    uintptr_t, uintptr_t, uintptr_t) = &lockstat_probe_stub;
#endif

const struct cdevsw lockstat_cdevsw = {
        .d_open = lockstat_open,
        .d_close = lockstat_close,
        .d_read = lockstat_read,
        .d_write = nowrite,
        .d_ioctl = lockstat_ioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER | D_MPSAFE
};

/*
 * Called when the pseudo-driver is attached.
 */
void
lockstatattach(int nunits)
{

        (void)nunits;

        __cpu_simple_lock_init(&lockstat_lock);
        __cpu_simple_lock_init(&lockstat_enabled_lock);
}

/*
 * Prepare the per-CPU tables for use, or clear down tables when tracing is
 * stopped.
 */
void
lockstat_init_tables(lsenable_t *le)
{
        int i, per, slop, cpuno;
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;
        lscpu_t *lc;
        lsbuf_t *lb;

        /* coverity[assert_side_effect] */
        KASSERT(!lockstat_dev_enabled);

        for (CPU_INFO_FOREACH(cii, ci)) {
                if (ci->ci_lockstat != NULL) {
                        kmem_free(ci->ci_lockstat, sizeof(lscpu_t));
                        ci->ci_lockstat = NULL;
                }
        }

        if (le == NULL)
                return;

        lb = lockstat_baseb;
        per = le->le_nbufs / ncpu;
        slop = le->le_nbufs - (per * ncpu);
        cpuno = 0;
        for (CPU_INFO_FOREACH(cii, ci)) {
                lc = kmem_alloc(sizeof(*lc), KM_SLEEP);
                lc->lc_overflow = 0;
                ci->ci_lockstat = lc;

                SLIST_INIT(&lc->lc_free);
                for (i = 0; i < LOCKSTAT_HASH_SIZE; i++)
                        LIST_INIT(&lc->lc_hash[i]);

                for (i = per; i != 0; i--, lb++) {
                        lb->lb_cpu = (uint16_t)cpuno;
                        SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
                }
                if (--slop > 0) {
                        lb->lb_cpu = (uint16_t)cpuno;
                        SLIST_INSERT_HEAD(&lc->lc_free, lb, lb_chain.slist);
                        lb++;
                }
                cpuno++;
        }
}

/*
 * Start collecting lock statistics.
 */
void
lockstat_start(lsenable_t *le)
{

        /* coverity[assert_side_effect] */
        KASSERT(!lockstat_dev_enabled);

        lockstat_init_tables(le);

        if ((le->le_flags & LE_CALLSITE) != 0)
                lockstat_csmask = (uintptr_t)-1LL;
        else
                lockstat_csmask = 0;

        if ((le->le_flags & LE_LOCK) != 0)
                lockstat_lamask = (uintptr_t)-1LL;
        else
                lockstat_lamask = 0;

        lockstat_csstart = le->le_csstart;
        lockstat_csend = le->le_csend;
        lockstat_lockstart = le->le_lockstart;
        lockstat_lockstart = le->le_lockstart;
        lockstat_lockend = le->le_lockend;

        /*
         * Ensure everything is initialized on all CPUs, by issuing a
         * null xcall with the side effect of a release barrier on this
         * CPU and an acquire barrier on all other CPUs, before they
         * can witness any flags set in lockstat_dev_enabled -- this
         * way we don't need to add any barriers in lockstat_event.
         */
        xc_barrier(0);

        /*
         * Start timing after the xcall, so we don't spuriously count
         * xcall communication time, but before flipping the switch, so
         * we don't dirty sample with locks taken in the timecounter.
         */
        getnanotime(&lockstat_stime);

        LOCKSTAT_ENABLED_UPDATE_BEGIN();
        atomic_store_relaxed(&lockstat_dev_enabled, le->le_mask);
        LOCKSTAT_ENABLED_UPDATE_END();
}

/*
 * Stop collecting lock statistics.
 */
int
lockstat_stop(lsdisable_t *ld)
{
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;
        u_int cpuno, overflow;
        struct timespec ts;
        int error;
        lwp_t *l;

        /* coverity[assert_side_effect] */
        KASSERT(lockstat_dev_enabled);

        /*
         * Disable and wait for other CPUs to exit lockstat_event().
         */
        LOCKSTAT_ENABLED_UPDATE_BEGIN();
        atomic_store_relaxed(&lockstat_dev_enabled, 0);
        LOCKSTAT_ENABLED_UPDATE_END();
        getnanotime(&ts);
        xc_barrier(0);

        /*
         * Did we run out of buffers while tracing?
         */
        overflow = 0;
        for (CPU_INFO_FOREACH(cii, ci))
                overflow += ((lscpu_t *)ci->ci_lockstat)->lc_overflow;

        if (overflow != 0) {
                error = EOVERFLOW;
                log(LOG_NOTICE, "lockstat: %d buffer allocations failed\n",
                    overflow);
        } else
                error = 0;

        lockstat_init_tables(NULL);

        /* Run through all LWPs and clear the slate for the next run. */
        mutex_enter(&proc_lock);
        LIST_FOREACH(l, &alllwp, l_list) {
                l->l_pfailaddr = 0;
                l->l_pfailtime = 0;
                l->l_pfaillock = 0;
        }
        mutex_exit(&proc_lock);

        if (ld == NULL)
                return error;

        /*
         * Fill out the disable struct for the caller.
         */
        timespecsub(&ts, &lockstat_stime, &ld->ld_time);
        ld->ld_size = lockstat_sizeb;

        cpuno = 0;
        for (CPU_INFO_FOREACH(cii, ci)) {
                if (cpuno >= sizeof(ld->ld_freq) / sizeof(ld->ld_freq[0])) {
                        log(LOG_WARNING, "lockstat: too many CPUs\n");
                        break;
                }
                ld->ld_freq[cpuno++] = cpu_frequency(ci);
        }

        return error;
}

/*
 * Allocate buffers for lockstat_start().
 */
int
lockstat_alloc(lsenable_t *le)
{
        lsbuf_t *lb;
        size_t sz;

        /* coverity[assert_side_effect] */
        KASSERT(!lockstat_dev_enabled);
        lockstat_free();

        sz = sizeof(*lb) * le->le_nbufs;

        lb = kmem_zalloc(sz, KM_SLEEP);

        /* coverity[assert_side_effect] */
        KASSERT(!lockstat_dev_enabled);
        KASSERT(lockstat_baseb == NULL);
        lockstat_sizeb = sz;
        lockstat_baseb = lb;

        return (0);
}

/*
 * Free allocated buffers after tracing has stopped.
 */
void
lockstat_free(void)
{

        /* coverity[assert_side_effect] */
        KASSERT(!lockstat_dev_enabled);

        if (lockstat_baseb != NULL) {
                kmem_free(lockstat_baseb, lockstat_sizeb);
                lockstat_baseb = NULL;
        }
}

/*
 * Main entry point from lock primitives.
 */
void
lockstat_event(uintptr_t lock, uintptr_t callsite, u_int flags, u_int count,
               uint64_t cycles)
{
        lslist_t *ll;
        lscpu_t *lc;
        lsbuf_t *lb;
        u_int event;
        int s;

#ifdef KDTRACE_HOOKS
        uint32_t id;
        CTASSERT((LS_NPROBES & (LS_NPROBES - 1)) == 0);
        if ((id = atomic_load_relaxed(&lockstat_probemap[LS_COMPRESS(flags)]))
            != 0)
                (*lockstat_probe_func)(id, lock, callsite, flags, count,
                    cycles);
#endif

        if ((flags & atomic_load_relaxed(&lockstat_dev_enabled)) != flags ||
            count == 0)
                return;
        if (lock < lockstat_lockstart || lock > lockstat_lockend)
                return;
        if (callsite < lockstat_csstart || callsite > lockstat_csend)
                return;

        callsite &= lockstat_csmask;
        lock &= lockstat_lamask;

        /*
         * Find the table for this lock+callsite pair, and try to locate a
         * buffer with the same key.
         */
        s = splhigh();
        lc = curcpu()->ci_lockstat;
        ll = &lc->lc_hash[LOCKSTAT_HASH(lock ^ callsite)];
        event = (flags & LB_EVENT_MASK) - 1;

        LIST_FOREACH(lb, ll, lb_chain.list) {
                if (lb->lb_lock == lock && lb->lb_callsite == callsite)
                        break;
        }

        if (lb != NULL) {
                /*
                 * We found a record.  Move it to the front of the list, as
                 * we're likely to hit it again soon.
                 */
                if (lb != LIST_FIRST(ll)) {
                        LIST_REMOVE(lb, lb_chain.list);
                        LIST_INSERT_HEAD(ll, lb, lb_chain.list);
                }
                lb->lb_counts[event] += count;
                lb->lb_times[event] += cycles;
        } else if ((lb = SLIST_FIRST(&lc->lc_free)) != NULL) {
                /*
                 * Pinch a new buffer and fill it out.
                 */
                SLIST_REMOVE_HEAD(&lc->lc_free, lb_chain.slist);
                LIST_INSERT_HEAD(ll, lb, lb_chain.list);
                lb->lb_flags = (uint16_t)flags;
                lb->lb_lock = lock;
                lb->lb_callsite = callsite;
                lb->lb_counts[event] = count;
                lb->lb_times[event] = cycles;
        } else {
                /*
                 * We didn't find a buffer and there were none free.
                 * lockstat_stop() will notice later on and report the
                 * error.
                 */
                 lc->lc_overflow++;
        }

        splx(s);
}

/*
 * Accept an open() on /dev/lockstat.
 */
int
lockstat_open(dev_t dev, int flag, int mode, lwp_t *l)
{

        if (!__cpu_simple_lock_try(&lockstat_lock))
                return EBUSY;
        lockstat_lwp = curlwp;
        return 0;
}

/*
 * Accept the last close() on /dev/lockstat.
 */
int
lockstat_close(dev_t dev, int flag, int mode, lwp_t *l)
{

        lockstat_lwp = NULL;
        if (lockstat_dev_enabled) {
                lockstat_stop(NULL);
                lockstat_free();
        }
        __cpu_simple_unlock(&lockstat_lock);
        return 0;
}

/*
 * Handle control operations.
 */
int
lockstat_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l)
{
        lsenable_t *le;
        int error;

        if (lockstat_lwp != curlwp)
                return EBUSY;

        switch (cmd) {
        case IOC_LOCKSTAT_GVERSION:
                *(int *)data = LS_VERSION;
                error = 0;
                break;

        case IOC_LOCKSTAT_ENABLE:
                le = (lsenable_t *)data;

                if (!cpu_hascounter()) {
                        error = ENODEV;
                        break;
                }
                if (atomic_load_relaxed(&lockstat_dev_enabled)) {
                        error = EBUSY;
                        break;
                }

                /*
                 * Sanitize the arguments passed in and set up filtering.
                 */
                if (le->le_nbufs == 0) {
                        le->le_nbufs = MIN(LOCKSTAT_DEFBUFS * ncpu,
                            LOCKSTAT_MAXBUFS);
                } else if (le->le_nbufs > LOCKSTAT_MAXBUFS ||
                    le->le_nbufs < LOCKSTAT_MINBUFS) {
                        error = EINVAL;
                        break;
                }
                if ((le->le_flags & LE_ONE_CALLSITE) == 0) {
                        le->le_csstart = 0;
                        le->le_csend = le->le_csstart - 1;
                }
                if ((le->le_flags & LE_ONE_LOCK) == 0) {
                        le->le_lockstart = 0;
                        le->le_lockend = le->le_lockstart - 1;
                }
                if ((le->le_mask & LB_EVENT_MASK) == 0)
                        return EINVAL;
                if ((le->le_mask & LB_LOCK_MASK) == 0)
                        return EINVAL;

                /*
                 * Start tracing.
                 */
                if ((error = lockstat_alloc(le)) == 0)
                        lockstat_start(le);
                break;

        case IOC_LOCKSTAT_DISABLE:
                if (!atomic_load_relaxed(&lockstat_dev_enabled))
                        error = EINVAL;
                else
                        error = lockstat_stop((lsdisable_t *)data);
                break;

        default:
                error = ENOTTY;
                break;
        }

        return error;
}

/*
 * Copy buffers out to user-space.
 */
int
lockstat_read(dev_t dev, struct uio *uio, int flag)
{

        if (curlwp != lockstat_lwp || lockstat_dev_enabled)
                return EBUSY;
        return uiomove(lockstat_baseb, lockstat_sizeb, uio);
}
































































  438 






  442 

















    6 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
/*        $NetBSD: scsipi_base.h,v 1.24 2017/02/26 10:58:47 maya Exp $        */

/*-
 * Copyright (c) 1998, 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _DEV_SCSIPI_SCSIPI_BASE_H_
#define _DEV_SCSIPI_SCSIPI_BASE_H_

struct scsipi_xfer *scsipi_get_xs(struct scsipi_periph *, int);
void        scsipi_put_xs(struct scsipi_xfer *);

static __inline struct scsipi_xfer *scsipi_make_xs_internal(struct scsipi_periph *,
            struct scsipi_generic *, int cmdlen, u_char *data_addr,
            int datalen, int retries, int timeout, struct buf *,
            int flags) __unused;

static __inline struct scsipi_xfer *scsipi_make_xs_unlocked(struct scsipi_periph *,
            struct scsipi_generic *, int cmdlen, u_char *data_addr,
            int datalen, int retries, int timeout, struct buf *,
            int flags) __unused;

static __inline struct scsipi_xfer *scsipi_make_xs_locked(struct scsipi_periph *,
            struct scsipi_generic *, int cmdlen, u_char *data_addr,
            int datalen, int retries, int timeout, struct buf *,
            int flags) __unused;

/*
 * Make a scsipi_xfer, and return a pointer to it.
 */

static __inline struct scsipi_xfer *
scsipi_make_xs_internal(struct scsipi_periph *periph, struct scsipi_generic *cmd,
    int cmdlen, u_char *data_addr, int datalen, int retries, int timeout,
    struct buf *bp, int flags)
{
        struct scsipi_xfer *xs;

        if ((xs = scsipi_get_xs(periph, flags)) == NULL)
                return (NULL);

        /*
         * Fill out the scsipi_xfer structure.  We don't know whose context
         * the cmd is in, so copy it.
         */
        memcpy(&xs->cmdstore, cmd, cmdlen);
        xs->cmd = &xs->cmdstore;
        xs->cmdlen = cmdlen;
        xs->data = data_addr;
        xs->datalen = datalen;
        xs->xs_retries = retries;
        xs->timeout = timeout;
        xs->bp = bp;

        return (xs);
}

static __inline struct scsipi_xfer *
scsipi_make_xs_unlocked(struct scsipi_periph *periph, struct scsipi_generic *cmd,
    int cmdlen, u_char *data_addr, int datalen, int retries, int timeout,
    struct buf *bp, int flags)
{

        return scsipi_make_xs_internal(periph, cmd, cmdlen, data_addr,
            datalen, retries, timeout, bp, flags & ~XS_CTL_NOSLEEP);
}

static __inline struct scsipi_xfer *
scsipi_make_xs_locked(struct scsipi_periph *periph, struct scsipi_generic *cmd,
    int cmdlen, u_char *data_addr, int datalen, int retries, int timeout,
    struct buf *bp, int flags)
{

        KDASSERT(mutex_owned(chan_mtx(periph->periph_channel)));
        return scsipi_make_xs_internal(periph, cmd, cmdlen, data_addr,
            datalen, retries, timeout, bp, flags | XS_CTL_NOSLEEP);
}

#endif /* _DEV_SCSIPI_SCSIPI_BASE_H_ */





















































  357 





  382 



































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
/*        $NetBSD: ufs_bswap.h,v 1.23 2018/04/19 21:50:10 christos Exp $        */

/*
 * Copyright (c) 1998 Manuel Bouyer.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 */

#ifndef _UFS_UFS_BSWAP_H_
#define _UFS_UFS_BSWAP_H_

#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
#endif

#include <sys/bswap.h>

/* Macros to access UFS flags */
#ifdef FFS_EI
#define        UFS_MPNEEDSWAP(ump)        ((ump)->um_flags & UFS_NEEDSWAP)
#define UFS_FSNEEDSWAP(fs)        ((fs)->fs_flags & FS_SWAPPED)
#define        UFS_IPNEEDSWAP(ip)        UFS_MPNEEDSWAP((ip)->i_ump)
#else
#define        UFS_MPNEEDSWAP(ump)        ((void)(ump), 0)
#define UFS_FSNEEDSWAP(fs)        ((void)(fs), 0)
#define        UFS_IPNEEDSWAP(ip)        ((void)(ip), 0)
#endif

#if (!defined(_KERNEL) && !defined(NO_FFS_EI)) || defined(FFS_EI)
/* inlines for access to swapped data */
static __inline u_int16_t
ufs_rw16(uint16_t a, int ns)
{
        return ((ns) ? bswap16(a) : (a));
}

static __inline u_int32_t
ufs_rw32(uint32_t a, int ns)
{
        return ((ns) ? bswap32(a) : (a));
}

static __inline u_int64_t
ufs_rw64(uint64_t a, int ns)
{
        return ((ns) ? bswap64(a) : (a));
}
#else
static __inline u_int16_t
ufs_rw16(uint16_t a, int ns)
{
        return a;
}

static __inline u_int32_t
ufs_rw32(uint32_t a, int ns)
{
        return a;
}

static __inline u_int64_t
ufs_rw64(uint64_t a, int ns)
{
        return a;
}
#endif

#define ufs_add16(a, b, ns) \
        (a) = ufs_rw16(ufs_rw16((a), (ns)) + (b), (ns))
#define ufs_add32(a, b, ns) \
        (a) = ufs_rw32(ufs_rw32((a), (ns)) + (b), (ns))
#define ufs_add64(a, b, ns) \
        (a) = ufs_rw64(ufs_rw64((a), (ns)) + (b), (ns))

#endif /* !_UFS_UFS_BSWAP_H_ */























































    1 

    1 





    1 







    1 

    1 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
/*        $NetBSD: uipc_usrreq_70.c,v 1.5 2019/12/12 02:15:42 pgoyette Exp $        */

/*-
 * Copyright (c) 2016 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Roy Marples.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uipc_usrreq_70.c,v 1.5 2019/12/12 02:15:42 pgoyette Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/lwp.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/unpcb.h>
#include <sys/mbuf.h>
#include <sys/kauth.h>
#include <sys/compat_stub.h>

#include <compat/sys/socket.h>

struct mbuf *
compat_70_unp_addsockcred(struct lwp *l, struct mbuf *control)
{
        struct sockcred70 *sc;
        struct mbuf *m;
        void *p;

        m = sbcreatecontrol1(&p, SOCKCRED70SIZE(kauth_cred_ngroups(l->l_cred)),
                SCM_OCREDS, SOL_SOCKET, M_WAITOK);
        if (m == NULL) {
                return control;
        }

        sc = p;
        sc->sc_uid = kauth_cred_getuid(l->l_cred);
        sc->sc_euid = kauth_cred_geteuid(l->l_cred);
        sc->sc_gid = kauth_cred_getgid(l->l_cred);
        sc->sc_egid = kauth_cred_getegid(l->l_cred);
        sc->sc_ngroups = kauth_cred_ngroups(l->l_cred);

        for (int i = 0; i < sc->sc_ngroups; i++)
                sc->sc_groups[i] = kauth_cred_group(l->l_cred, i);

        return m_add(control, m);
}

void
uipc_usrreq_70_init(void)
{

        MODULE_HOOK_SET(uipc_unp_70_hook, compat_70_unp_addsockcred);
}

void
uipc_usrreq_70_fini(void)
{

        MODULE_HOOK_UNSET(uipc_unp_70_hook);
}
































































































































































































 4808 































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
/*        $NetBSD: intr.h,v 1.65 2022/05/24 15:55:19 bouyer Exp $        */

/*-
 * Copyright (c) 1998, 2001, 2006, 2007, 2008, 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum, and by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _X86_INTR_H_
#define _X86_INTR_H_

#define        __HAVE_FAST_SOFTINTS
#if !defined(NO_PREEMPTION)
#define        __HAVE_PREEMPTION
#endif /* !defined(NO_PREEMPTION) */

#ifdef _KERNEL
#include <sys/types.h>
#include <sys/kcpuset.h>
#else
#include <stdbool.h>
#endif

#include <sys/evcnt.h>
#include <sys/queue.h>
#include <machine/intrdefs.h>

#ifdef XEN
#include <xen/include/public/xen.h>
#include <xen/include/public/event_channel.h>
#endif /* XEN */

#ifndef _LOCORE
#include <machine/pic.h>

/*
 * Struct describing an interrupt source for a CPU. struct cpu_info
 * has an array of MAX_INTR_SOURCES of these. The index in the array
 * is equal to the stub number of the stubcode as present in vector.s
 *
 * The primary CPU's array of interrupt sources has its first 16
 * entries reserved for legacy ISA irq handlers. This means that
 * they have a 1:1 mapping for arrayindex:irq_num. This is not
 * true for interrupts that come in through IO APICs, to find
 * their source, go through ci->ci_isources[index].is_pic
 *
 * It's possible to always maintain a 1:1 mapping, but that means
 * limiting the total number of interrupt sources to MAX_INTR_SOURCES
 * (32), instead of 32 per CPU. It also would mean that having multiple
 * IO APICs which deliver interrupts from an equal pin number would
 * overlap if they were to be sent to the same CPU.
 */

struct intrstub {
        void *ist_entry;
        void *ist_recurse;
        void *ist_resume;
};

struct percpu_evcnt {
        cpuid_t cpuid;
        uint64_t count;
};

struct intrsource {
        int is_maxlevel;                /* max. IPL for this source */
        int is_pin;                        /* IRQ for legacy; pin for IO APIC,
                                           -1 for MSI */
        struct intrhand *is_handlers;        /* handler chain */
        struct pic *is_pic;                /* originating PIC */
        void *is_recurse;                /* entry for spllower */
        void *is_resume;                /* entry for doreti */
        lwp_t *is_lwp;                        /* for soft interrupts */
#if defined(XEN)
        u_long ipl_evt_mask1;        /* pending events for this IPL */
        u_long ipl_evt_mask2[NR_EVENT_CHANNELS];
#endif
        struct evcnt is_evcnt;                /* interrupt counter per cpu */
        /*
         * is_mask_count requires special handling; it can only be modified
         * or examined on the CPU that owns the interrupt source, and such
         * references need to be protected by disabling interrupts.  This
         * is because intr_mask() can be called from an interrupt handler.
         * is_distribute_pending does not require such special handling
         * because intr_unmask() cannot be called from an interrupt handler.
         */
        u_int is_mask_count;                /* masked? (nested) [see above] */
        int is_distribute_pending;        /* ci<->ci move pending [cpu_lock] */
        int is_flags;                        /* see below */
        int is_type;                        /* level, edge */
        int is_idtvec;
        int is_minlevel;
        char is_evname[32];                /* event counter name */
        char is_intrid[INTRIDBUF];        /* intrid created by create_intrid() */
        char is_xname[INTRDEVNAMEBUF];        /* device names */
        cpuid_t is_active_cpu;                /* active cpuid */
        struct percpu_evcnt *is_saved_evcnt;        /* interrupt count of deactivated cpus */
        SIMPLEQ_ENTRY(intrsource) is_list;        /* link of intrsources */
};

#define IS_LEGACY        0x0001                /* legacy ISA irq source */
#define IS_IPI                0x0002
#define IS_LOG                0x0004

/*
 * Interrupt handler chains.  *_intr_establish() insert a handler into
 * the list.  The handler is called with its (single) argument.
 */

struct intrhand {
        struct pic *ih_pic;
        int        (*ih_fun)(void *);
        void        *ih_arg;
        int        ih_level;
        int        (*ih_realfun)(void *);
        void        *ih_realarg;
        struct        intrhand *ih_next;
        struct        intrhand **ih_prevp;
        int        ih_pin;
        int        ih_slot;
#if defined(XEN)
        int        ih_pending;
        struct        intrhand *ih_evt_next;
#endif
        struct cpu_info *ih_cpu;
        char        ih_xname[INTRDEVNAMEBUF];
};

#define IMASK(ci,level) (ci)->ci_imask[(level)]
#define IUNMASK(ci,level) (ci)->ci_iunmask[(level)]

#ifdef _KERNEL

void Xspllower(int);
void spllower(int);
int splraise(int);
void softintr(int);

/*
 * Convert spl level to local APIC level
 */

#define APIC_LEVEL(l)   ((l) << 4)

/*
 * Miscellaneous
 */

#define SPL_ASSERT_BELOW(x) KDASSERT(curcpu()->ci_ilevel < (x))
#define        spl0()                spllower(IPL_NONE)
#define        splx(x)                spllower(x)

typedef uint8_t ipl_t;
typedef struct {
        ipl_t _ipl;
} ipl_cookie_t;

static inline ipl_cookie_t
makeiplcookie(ipl_t ipl)
{

        return (ipl_cookie_t){._ipl = ipl};
}

static inline int
splraiseipl(ipl_cookie_t icookie)
{

        return splraise(icookie._ipl);
}

#include <sys/spl.h>

/*
 * Stub declarations.
 */

void Xsoftintr(void);
void Xrecurse_preempt(void);
void Xresume_preempt(void);

extern struct intrstub legacy_stubs[];
extern struct intrstub ioapic_edge_stubs[];
extern struct intrstub ioapic_level_stubs[];
extern struct intrstub x2apic_edge_stubs[];
extern struct intrstub x2apic_level_stubs[];

struct cpu_info;

struct pcibus_attach_args;

typedef uint64_t intr_handle_t;

void intr_default_setup(void);
void x86_nmi(void);
void *intr_establish_xname(int, struct pic *, int, int, int, int (*)(void *),
                           void *, bool, const char *);
void *intr_establish(int, struct pic *, int, int, int, int (*)(void *), void *, bool);
void intr_mask(struct intrhand *);
void intr_unmask(struct intrhand *);
void intr_disestablish(struct intrhand *);
void intr_add_pcibus(struct pcibus_attach_args *);
const char *intr_string(intr_handle_t, char *, size_t);
void cpu_intr_init(struct cpu_info *);
int intr_find_mpmapping(int, int, intr_handle_t *);
struct pic *intr_findpic(int);
void intr_printconfig(void);

const char *intr_create_intrid(int, struct pic *, int, char *, size_t);
struct intrsource *intr_allocate_io_intrsource(const char *);
void intr_free_io_intrsource(const char *);

void x86_init_preempt(struct cpu_info *);
void x86_intr_calculatemasks(struct cpu_info *);

int x86_send_ipi(struct cpu_info *, int);
void x86_broadcast_ipi(int);
void x86_ipi_handler(void);

void x86_intr_get_devname(const char *, char *, size_t);
void x86_intr_get_assigned(const char *, kcpuset_t *);
uint64_t x86_intr_get_count(const char *, u_int);

#ifndef XENPV
extern void (* const ipifunc[X86_NIPI])(struct cpu_info *);
#endif

#endif /* _KERNEL */

#endif /* !_LOCORE */

#endif /* !_X86_INTR_H_ */
















































































    5 



    5 

    5 










    1 
    1 






    1 


























































































































    5 






    1 



































































   69 



























  137 
  137 



































































































    3 












































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
/*        $NetBSD: kern_hook.c,v 1.13 2022/05/31 08:43:16 andvar Exp $        */

/*-
 * Copyright (c) 1997, 1998, 1999, 2002, 2007, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, and by Luke Mewburn.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_hook.c,v 1.13 2022/05/31 08:43:16 andvar Exp $");

#include <sys/param.h>
#include <sys/condvar.h>
#include <sys/cpu.h>
#include <sys/device.h>
#include <sys/hook.h>
#include <sys/kmem.h>
#include <sys/malloc.h>
#include <sys/rwlock.h>
#include <sys/systm.h>

/*
 * A generic linear hook.
 */
struct hook_desc {
        LIST_ENTRY(hook_desc) hk_list;
        void        (*hk_fn)(void *);
        void        *hk_arg;
};
typedef LIST_HEAD(, hook_desc) hook_list_t;

enum hook_list_st {
        HKLIST_IDLE,
        HKLIST_INUSE,
};

struct khook_list {
        hook_list_t         hl_list;
        kmutex_t         hl_lock;
        kmutex_t        *hl_cvlock;
        struct lwp        *hl_lwp;
        kcondvar_t         hl_cv;
        enum hook_list_st
                         hl_state;
        khook_t                *hl_active_hk;
        char                 hl_namebuf[HOOKNAMSIZ];
};

int        powerhook_debug = 0;

static void *
hook_establish(hook_list_t *list, void (*fn)(void *), void *arg)
{
        struct hook_desc *hd;

        hd = malloc(sizeof(*hd), M_DEVBUF, M_NOWAIT);
        if (hd == NULL)
                return (NULL);

        hd->hk_fn = fn;
        hd->hk_arg = arg;
        LIST_INSERT_HEAD(list, hd, hk_list);

        return (hd);
}

static void
hook_disestablish(hook_list_t *list, void *vhook)
{
#ifdef DIAGNOSTIC
        struct hook_desc *hd;

        LIST_FOREACH(hd, list, hk_list) {
                if (hd == vhook)
                        break;
        }

        if (hd == NULL)
                panic("hook_disestablish: hook %p not established", vhook);
#endif
        LIST_REMOVE((struct hook_desc *)vhook, hk_list);
        free(vhook, M_DEVBUF);
}

static void
hook_destroy(hook_list_t *list)
{
        struct hook_desc *hd;

        while ((hd = LIST_FIRST(list)) != NULL) {
                LIST_REMOVE(hd, hk_list);
                free(hd, M_DEVBUF);
        }
}

static void
hook_proc_run(hook_list_t *list, struct proc *p)
{
        struct hook_desc *hd;

        LIST_FOREACH(hd, list, hk_list) {
                __FPTRCAST(void (*)(struct proc *, void *), *hd->hk_fn)(p,
                    hd->hk_arg);
        }
}

/*
 * "Shutdown hook" types, functions, and variables.
 *
 * Should be invoked immediately before the
 * system is halted or rebooted, i.e. after file systems unmounted,
 * after crash dump done, etc.
 *
 * Each shutdown hook is removed from the list before it's run, so that
 * it won't be run again.
 */

static hook_list_t shutdownhook_list = LIST_HEAD_INITIALIZER(shutdownhook_list);

void *
shutdownhook_establish(void (*fn)(void *), void *arg)
{
        return hook_establish(&shutdownhook_list, fn, arg);
}

void
shutdownhook_disestablish(void *vhook)
{
        hook_disestablish(&shutdownhook_list, vhook);
}

/*
 * Run shutdown hooks.  Should be invoked immediately before the
 * system is halted or rebooted, i.e. after file systems unmounted,
 * after crash dump done, etc.
 *
 * Each shutdown hook is removed from the list before it's run, so that
 * it won't be run again.
 */
void
doshutdownhooks(void)
{
        struct hook_desc *dp;

        while ((dp = LIST_FIRST(&shutdownhook_list)) != NULL) {
                LIST_REMOVE(dp, hk_list);
                (*dp->hk_fn)(dp->hk_arg);
#if 0
                /*
                 * Don't bother freeing the hook structure,, since we may
                 * be rebooting because of a memory corruption problem,
                 * and this might only make things worse.  It doesn't
                 * matter, anyway, since the system is just about to
                 * reboot.
                 */
                free(dp, M_DEVBUF);
#endif
        }
}

/*
 * "Mountroot hook" types, functions, and variables.
 */

static hook_list_t mountroothook_list=LIST_HEAD_INITIALIZER(mountroothook_list);

void *
mountroothook_establish(void (*fn)(device_t), device_t dev)
{
        return hook_establish(&mountroothook_list, __FPTRCAST(void (*), fn),
            dev);
}

void
mountroothook_disestablish(void *vhook)
{
        hook_disestablish(&mountroothook_list, vhook);
}

void
mountroothook_destroy(void)
{
        hook_destroy(&mountroothook_list);
}

void
domountroothook(device_t therootdev)
{
        struct hook_desc *hd;

        LIST_FOREACH(hd, &mountroothook_list, hk_list) {
                if (hd->hk_arg == therootdev) {
                        (*hd->hk_fn)(hd->hk_arg);
                        return;
                }
        }
}

static hook_list_t exechook_list = LIST_HEAD_INITIALIZER(exechook_list);

void *
exechook_establish(void (*fn)(struct proc *, void *), void *arg)
{
        return hook_establish(&exechook_list, __FPTRCAST(void (*)(void *), fn),
            arg);
}

void
exechook_disestablish(void *vhook)
{
        hook_disestablish(&exechook_list, vhook);
}

/*
 * Run exec hooks.
 */
void
doexechooks(struct proc *p)
{
        hook_proc_run(&exechook_list, p);
}

static hook_list_t exithook_list = LIST_HEAD_INITIALIZER(exithook_list);
extern krwlock_t exec_lock;

void *
exithook_establish(void (*fn)(struct proc *, void *), void *arg)
{
        void *rv;

        rw_enter(&exec_lock, RW_WRITER);
        rv = hook_establish(&exithook_list, __FPTRCAST(void (*)(void *), fn),
            arg);
        rw_exit(&exec_lock);
        return rv;
}

void
exithook_disestablish(void *vhook)
{

        rw_enter(&exec_lock, RW_WRITER);
        hook_disestablish(&exithook_list, vhook);
        rw_exit(&exec_lock);
}

/*
 * Run exit hooks.
 */
void
doexithooks(struct proc *p)
{
        hook_proc_run(&exithook_list, p);
}

static hook_list_t forkhook_list = LIST_HEAD_INITIALIZER(forkhook_list);

void *
forkhook_establish(void (*fn)(struct proc *, struct proc *))
{
        return hook_establish(&forkhook_list, __FPTRCAST(void (*)(void *), fn),
            NULL);
}

void
forkhook_disestablish(void *vhook)
{
        hook_disestablish(&forkhook_list, vhook);
}

/*
 * Run fork hooks.
 */
void
doforkhooks(struct proc *p2, struct proc *p1)
{
        struct hook_desc *hd;

        LIST_FOREACH(hd, &forkhook_list, hk_list) {
                __FPTRCAST(void (*)(struct proc *, struct proc *), *hd->hk_fn)
                    (p2, p1);
        }
}

static hook_list_t critpollhook_list = LIST_HEAD_INITIALIZER(critpollhook_list);

void *
critpollhook_establish(void (*fn)(void *), void *arg)
{
        return hook_establish(&critpollhook_list, fn, arg);
}

void
critpollhook_disestablish(void *vhook)
{
        hook_disestablish(&critpollhook_list, vhook);
}

/*
 * Run critical polling hooks.
 */
void
docritpollhooks(void)
{
        struct hook_desc *hd;

        LIST_FOREACH(hd, &critpollhook_list, hk_list) {
                (*hd->hk_fn)(hd->hk_arg);
        }
}

/*
 * "Power hook" types, functions, and variables.
 * The list of power hooks is kept ordered with the last registered hook
 * first.
 * When running the hooks on power down the hooks are called in reverse
 * registration order, when powering up in registration order.
 */
struct powerhook_desc {
        TAILQ_ENTRY(powerhook_desc) sfd_list;
        void        (*sfd_fn)(int, void *);
        void        *sfd_arg;
        char        sfd_name[16];
};

static TAILQ_HEAD(powerhook_head, powerhook_desc) powerhook_list =
    TAILQ_HEAD_INITIALIZER(powerhook_list);

void *
powerhook_establish(const char *name, void (*fn)(int, void *), void *arg)
{
        struct powerhook_desc *ndp;

        ndp = (struct powerhook_desc *)
            malloc(sizeof(*ndp), M_DEVBUF, M_NOWAIT);
        if (ndp == NULL)
                return (NULL);

        ndp->sfd_fn = fn;
        ndp->sfd_arg = arg;
        strlcpy(ndp->sfd_name, name, sizeof(ndp->sfd_name));
        TAILQ_INSERT_HEAD(&powerhook_list, ndp, sfd_list);

        aprint_error("%s: WARNING: powerhook_establish is deprecated\n", name);
        return (ndp);
}

void
powerhook_disestablish(void *vhook)
{
#ifdef DIAGNOSTIC
        struct powerhook_desc *dp;

        TAILQ_FOREACH(dp, &powerhook_list, sfd_list)
                if (dp == vhook)
                        goto found;
        panic("powerhook_disestablish: hook %p not established", vhook);
 found:
#endif

        TAILQ_REMOVE(&powerhook_list, (struct powerhook_desc *)vhook,
            sfd_list);
        free(vhook, M_DEVBUF);
}

/*
 * Run power hooks.
 */
void
dopowerhooks(int why)
{
        struct powerhook_desc *dp;
        const char *why_name;
        static const char * pwr_names[] = {PWR_NAMES};
        why_name = why < __arraycount(pwr_names) ? pwr_names[why] : "???";

        if (why == PWR_RESUME || why == PWR_SOFTRESUME) {
                TAILQ_FOREACH_REVERSE(dp, &powerhook_list, powerhook_head,
                    sfd_list)
                {
                        if (powerhook_debug)
                                printf("dopowerhooks %s: %s (%p)\n",
                                    why_name, dp->sfd_name, dp);
                        (*dp->sfd_fn)(why, dp->sfd_arg);
                }
        } else {
                TAILQ_FOREACH(dp, &powerhook_list, sfd_list) {
                        if (powerhook_debug)
                                printf("dopowerhooks %s: %s (%p)\n",
                                    why_name, dp->sfd_name, dp);
                        (*dp->sfd_fn)(why, dp->sfd_arg);
                }
        }

        if (powerhook_debug)
                printf("dopowerhooks: %s done\n", why_name);
}

/*
 * A simple linear hook.
 */

khook_list_t *
simplehook_create(int ipl, const char *wmsg)
{
        khook_list_t *l;

        l = kmem_zalloc(sizeof(*l), KM_SLEEP);

        mutex_init(&l->hl_lock, MUTEX_DEFAULT, ipl);
        strlcpy(l->hl_namebuf, wmsg, sizeof(l->hl_namebuf));
        cv_init(&l->hl_cv, l->hl_namebuf);
        LIST_INIT(&l->hl_list);
        l->hl_state = HKLIST_IDLE;

        return l;
}

void
simplehook_destroy(khook_list_t *l)
{
        struct hook_desc *hd;

        KASSERT(l->hl_state == HKLIST_IDLE);

        while ((hd = LIST_FIRST(&l->hl_list)) != NULL) {
                LIST_REMOVE(hd, hk_list);
                kmem_free(hd, sizeof(*hd));
        }

        cv_destroy(&l->hl_cv);
        mutex_destroy(&l->hl_lock);
        kmem_free(l, sizeof(*l));
}

int
simplehook_dohooks(khook_list_t *l)
{
        struct hook_desc *hd, *nexthd;
        kmutex_t *cv_lock;
        void (*fn)(void *);
        void *arg;

        mutex_enter(&l->hl_lock);
        if (l->hl_state != HKLIST_IDLE) {
                mutex_exit(&l->hl_lock);
                return EBUSY;
        }

        /* stop removing hooks */
        l->hl_state = HKLIST_INUSE;
        l->hl_lwp = curlwp;

        LIST_FOREACH(hd, &l->hl_list, hk_list) {
                if (hd->hk_fn == NULL)
                        continue;

                fn = hd->hk_fn;
                arg = hd->hk_arg;
                l->hl_active_hk = hd;
                l->hl_cvlock = NULL;

                mutex_exit(&l->hl_lock);

                /* do callback without l->hl_lock */
                (*fn)(arg);

                mutex_enter(&l->hl_lock);
                l->hl_active_hk = NULL;
                cv_lock = l->hl_cvlock;

                if (hd->hk_fn == NULL) {
                        if (cv_lock != NULL) {
                                mutex_exit(&l->hl_lock);
                                mutex_enter(cv_lock);
                        }

                        cv_broadcast(&l->hl_cv);

                        if (cv_lock != NULL) {
                                mutex_exit(cv_lock);
                                mutex_enter(&l->hl_lock);
                        }
                }
        }

        /* remove marked node while running hooks */
        LIST_FOREACH_SAFE(hd, &l->hl_list, hk_list, nexthd) {
                if (hd->hk_fn == NULL) {
                        LIST_REMOVE(hd, hk_list);
                        kmem_free(hd, sizeof(*hd));
                }
        }

        l->hl_lwp = NULL;
        l->hl_state = HKLIST_IDLE;
        mutex_exit(&l->hl_lock);

        return 0;
}

khook_t *
simplehook_establish(khook_list_t *l, void (*fn)(void *), void *arg)
{
        struct hook_desc *hd;

        hd = kmem_zalloc(sizeof(*hd), KM_SLEEP);
        hd->hk_fn = fn;
        hd->hk_arg = arg;

        mutex_enter(&l->hl_lock);
        LIST_INSERT_HEAD(&l->hl_list, hd, hk_list);
        mutex_exit(&l->hl_lock);

        return hd;
}

void
simplehook_disestablish(khook_list_t *l, khook_t *hd, kmutex_t *lock)
{
        struct hook_desc *hd0 __diagused;
        kmutex_t *cv_lock;

        KASSERT(lock == NULL || mutex_owned(lock));
        mutex_enter(&l->hl_lock);

#ifdef DIAGNOSTIC
        LIST_FOREACH(hd0, &l->hl_list, hk_list) {
                if (hd == hd0)
                        break;
        }

        if (hd0 == NULL)
                panic("hook_disestablish: hook %p not established", hd);
#endif

        /* The hook is not referred, remove immediately */
        if (l->hl_state == HKLIST_IDLE) {
                LIST_REMOVE(hd, hk_list);
                kmem_free(hd, sizeof(*hd));
                mutex_exit(&l->hl_lock);
                return;
        }

        /* remove callback. hd will be removed in dohooks */
        hd->hk_fn = NULL;
        hd->hk_arg = NULL;

        /* If the hook is running, wait for the completion */
        if (l->hl_active_hk == hd &&
            l->hl_lwp != curlwp) {
                if (lock != NULL) {
                        cv_lock = lock;
                        KASSERT(l->hl_cvlock == NULL);
                        l->hl_cvlock = lock;
                        mutex_exit(&l->hl_lock);
                } else {
                        cv_lock = &l->hl_lock;
                }

                cv_wait(&l->hl_cv, cv_lock);

                if (lock == NULL)
                        mutex_exit(&l->hl_lock);
        } else {
                mutex_exit(&l->hl_lock);
        }
}

bool
simplehook_has_hooks(khook_list_t *l)
{
        bool empty;

        mutex_enter(&l->hl_lock);
        empty = LIST_EMPTY(&l->hl_list);
        mutex_exit(&l->hl_lock);

        return !empty;
}




















































 1359 
 1360 

 1357 
 1360 

  383 

  383 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
/*        $NetBSD: strncmp.c,v 1.3 2018/02/04 20:22:17 mrg Exp $        */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
#if 0
static char sccsid[] = "@(#)strncmp.c        8.1 (Berkeley) 6/4/93";
#else
__RCSID("$NetBSD: strncmp.c,v 1.3 2018/02/04 20:22:17 mrg Exp $");
#endif
#endif /* LIBC_SCCS and not lint */

#if !defined(_KERNEL) && !defined(_STANDALONE)
#include <assert.h>
#include <string.h>
#else
#include <lib/libkern/libkern.h>
#endif

int
strncmp(const char *s1, const char *s2, size_t n)
{

        if (n == 0)
                return (0);
        do {
                if (*s1 != *s2++)
                        return (*(const unsigned char *)s1 -
                            *(const unsigned char *)--s2);
                if (*s1++ == 0)
                        break;
        } while (--n != 0);
        return (0);
}




















































































    8 










    7 










    7 







    6 


    6 

    6 















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/*        $NetBSD: spkr_audio.c,v 1.11 2021/04/03 04:10:30 isaki Exp $        */

/*-
 * Copyright (c) 2016 Nathanial Sloss <nathanialsloss@yahoo.com.au>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: spkr_audio.c,v 1.11 2021/04/03 04:10:30 isaki Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/audioio.h>
#include <sys/kernel.h>
#include <sys/errno.h>
#include <sys/device.h>
#include <sys/malloc.h>
#include <sys/uio.h>
#include <sys/proc.h>
#include <sys/ioctl.h>
#include <sys/conf.h>
#include <sys/sysctl.h>

#include <dev/audio/audiovar.h>
#include <dev/audio/audiobellvar.h>

#include <dev/spkrvar.h>
#include <dev/spkrio.h>

static int spkr_audio_probe(device_t, cfdata_t, void *);
static void spkr_audio_attach(device_t, device_t, void *);
static int spkr_audio_detach(device_t, int);
static int spkr_audio_rescan(device_t, const char *, const int *);
static void spkr_audio_childdet(device_t, device_t);

struct spkr_audio_softc {
        struct spkr_softc sc_spkr;
        device_t                sc_audiodev;
};

CFATTACH_DECL3_NEW(spkr_audio, sizeof(struct spkr_audio_softc),
    spkr_audio_probe, spkr_audio_attach, spkr_audio_detach, NULL,
    spkr_audio_rescan, spkr_audio_childdet, 0);

static void
spkr_audio_tone(device_t self, u_int xhz, u_int ticks)
{
        struct spkr_audio_softc *sc = device_private(self);

#ifdef SPKRDEBUG
        device_printf(self, "%s: %u %u\n", __func__, xhz, ticks);
#endif /* SPKRDEBUG */

        if (xhz == 0 || ticks == 0)
                return;

        audiobell(sc->sc_audiodev, xhz, hztoms(ticks), sc->sc_spkr.sc_vol, 0);
}

static int
spkr_audio_probe(device_t parent, cfdata_t cf, void *aux)
{
        struct audio_softc *asc = device_private(parent);

        if ((asc->sc_props & AUDIO_PROP_PLAYBACK))
                return 1;

        return 0;
}

static void
spkr_audio_attach(device_t parent, device_t self, void *aux)
{
        struct spkr_audio_softc *sc = device_private(self);

        aprint_naive("\n");
        aprint_normal(": PC Speaker (synthesized)\n");

        sc->sc_audiodev = parent;
        sc->sc_spkr.sc_vol = 80;
        
        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n"); 

        spkr_attach(self, spkr_audio_tone);
}

static int
spkr_audio_detach(device_t self, int flags)
{
        int error;

        if ((error = spkr_detach(self, flags)) != 0)
                return error;

        pmf_device_deregister(self);

        return 0;
}

static int 
spkr_audio_rescan(device_t self, const char *iattr, const int *locators)
{

        return spkr_rescan(self, iattr, locators);
}

static void 
spkr_audio_childdet(device_t self, device_t child)
{
 
        spkr_childdet(self, child);
}




















































































































































































    3 
    3 















































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
/* $NetBSD: umcs.c,v 1.20 2022/06/26 21:35:53 riastradh Exp $ */
/* $FreeBSD: head/sys/dev/usb/serial/umcs.c 260559 2014-01-12 11:44:28Z hselasky $ */

/*-
 * Copyright (c) 2010 Lev Serebryakov <lev@FreeBSD.org>.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * This driver supports several multiport USB-to-RS232 serial adapters driven
 * by MosChip mos7820 and mos7840, bridge chips.
 * The adapters are sold under many different brand names.
 *
 * Datasheets are available at MosChip www site at
 * http://www.moschip.com.  The datasheets don't contain full
 * programming information for the chip.
 *
 * It is nornal to have only two enabled ports in devices, based on
 * quad-port mos7840.
 *
 */
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: umcs.c,v 1.20 2022/06/26 21:35:53 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/atomic.h>
#include <sys/kernel.h>
#include <sys/conf.h>
#include <sys/tty.h>
#include <sys/device.h>
#include <sys/kmem.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <dev/usb/usbdevs.h>
#include <dev/usb/ucomvar.h>

#include "umcs.h"

#if 0
#define        DPRINTF(ARG)        printf ARG
#else
#define        DPRINTF(ARG)
#endif

/*
 * Two-port devices (both with 7820 chip and 7840 chip configured as two-port)
 * have ports 0 and 2, with ports 1 and 3 omitted.
 * So, PHYSICAL port numbers on two-port device will be 0 and 2.
 *
 * We use an array of the following struct, indexed by ucom port index,
 * and include the physical port number in it.
 */
struct umcs7840_softc_oneport {
        device_t sc_port_ucom;                /* ucom subdevice */
        unsigned int sc_port_phys;        /* physical port number */
        uint8_t        sc_port_lcr;                /* local line control register */
        uint8_t        sc_port_mcr;                /* local modem control register */
};

struct umcs7840_softc {
        device_t sc_dev;                /* ourself */
        enum {
                UMCS_INIT_NONE,
                UMCS_INIT_INITED
        } sc_init_state;
        struct usbd_interface *sc_iface; /* the usb interface */
        struct usbd_device *sc_udev;        /* the usb device */
        struct usbd_pipe *sc_intr_pipe;        /* interrupt pipe */
        uint8_t *sc_intr_buf;                /* buffer for interrupt xfer */
        unsigned int sc_intr_buflen;        /* size of buffer */
        struct usb_task sc_change_task;        /* async status changes */
        volatile uint32_t sc_change_mask;        /* mask of port changes */
        struct umcs7840_softc_oneport sc_ports[UMCS7840_MAX_PORTS];
                                        /* data for each port */
        uint8_t        sc_numports;                /* number of ports (subunits) */
        bool sc_init_done;                /* special one time init in open */
        bool sc_dying;                        /* we have been deactivated */
};

static int umcs7840_get_reg(struct umcs7840_softc *, uint8_t, uint8_t *);
static int umcs7840_set_reg(struct umcs7840_softc *, uint8_t, uint8_t);
static int umcs7840_get_UART_reg(struct umcs7840_softc *, uint8_t, uint8_t, uint8_t *);
static int umcs7840_set_UART_reg(struct umcs7840_softc *, uint8_t, uint8_t, uint8_t );
static int umcs7840_calc_baudrate(uint32_t, uint16_t *, uint8_t *);
static void umcs7840_dtr(struct umcs7840_softc *, int, bool);
static void umcs7840_rts(struct umcs7840_softc *, int, bool);
static void umcs7840_break(struct umcs7840_softc *, int, bool );

static int umcs7840_match(device_t, cfdata_t, void *);
static void umcs7840_attach(device_t, device_t, void *);
static int umcs7840_detach(device_t, int);
static void umcs7840_intr(struct usbd_xfer *, void *, usbd_status);
static void umcs7840_change_task(void *arg);
static void umcs7840_childdet(device_t, device_t);

static void umcs7840_get_status(void *, int, u_char *, u_char *);
static void umcs7840_set(void *, int, int, int);
static int umcs7840_param(void *, int, struct termios *);
static int umcs7840_port_open(void *, int);
static void umcs7840_port_close(void *, int);

static const struct ucom_methods umcs7840_methods = {
        .ucom_get_status = umcs7840_get_status,
        .ucom_set = umcs7840_set,
        .ucom_param = umcs7840_param,
        .ucom_open = umcs7840_port_open,
        .ucom_close = umcs7840_port_close,
};

static const struct usb_devno umcs7840_devs[] = {
        { USB_VENDOR_MOSCHIP,                USB_PRODUCT_MOSCHIP_MCS7703 },
        { USB_VENDOR_MOSCHIP,                USB_PRODUCT_MOSCHIP_MCS7810 },
        { USB_VENDOR_MOSCHIP,                USB_PRODUCT_MOSCHIP_MCS7820 },
        { USB_VENDOR_MOSCHIP,                USB_PRODUCT_MOSCHIP_MCS7840 },
        { USB_VENDOR_ATEN,                USB_PRODUCT_ATEN_UC2324 }
};
#define umcs7840_lookup(v, p) usb_lookup(umcs7840_devs, v, p)

CFATTACH_DECL2_NEW(umcs, sizeof(struct umcs7840_softc), umcs7840_match,
    umcs7840_attach, umcs7840_detach, NULL, NULL,
    umcs7840_childdet);

static inline int
umcs7840_reg_sp(int phyport)
{
        KASSERT(phyport >= 0 && phyport < 4);
        switch (phyport) {
        default:
        case 0:        return MCS7840_DEV_REG_SP1;
        case 1:        return MCS7840_DEV_REG_SP2;
        case 2:        return MCS7840_DEV_REG_SP3;
        case 3:        return MCS7840_DEV_REG_SP4;
        }
}

static inline int
umcs7840_reg_ctrl(int phyport)
{
        KASSERT(phyport >= 0 && phyport < 4);
        switch (phyport) {
        default:
        case 0:        return MCS7840_DEV_REG_CONTROL1;
        case 1:        return MCS7840_DEV_REG_CONTROL2;
        case 2:        return MCS7840_DEV_REG_CONTROL3;
        case 3:        return MCS7840_DEV_REG_CONTROL4;
        }
}

static int
umcs7840_match(device_t dev, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return umcs7840_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
umcs7840_attach(device_t parent, device_t self, void *aux)
{
        struct umcs7840_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        struct ucom_attach_args ucaa;
        int error, i, intr_addr;
        uint8_t data;

        sc->sc_dev = self;
        sc->sc_udev = uaa->uaa_device;
        sc->sc_dying = false;
        sc->sc_init_state = UMCS_INIT_NONE;

        if (usbd_set_config_index(sc->sc_udev, MCS7840_CONFIG_INDEX, 1) != 0) {
                aprint_error(": could not set configuration no\n");
                sc->sc_dying = true;
                return;
        }

        /* get the first interface handle */
        error = usbd_device2interface_handle(sc->sc_udev, MCS7840_IFACE_INDEX,
            &sc->sc_iface);
        if (error != 0) {
                aprint_error(": could not get interface handle\n");
                sc->sc_dying = true;
                return;
        }

        /*
         * Get number of ports
         * Documentation (full datasheet) says, that number of ports is
         * set as MCS7840_DEV_MODE_SELECT24S bit in MODE R/Only
         * register. But vendor driver uses these undocumented
         * register & bit.
         *
         * Experiments show, that MODE register can have `0'
         * (4 ports) bit on 2-port device, so use vendor driver's way.
         *
         * Also, see notes in header file for these constants.
         */
        error = umcs7840_get_reg(sc, MCS7840_DEV_REG_GPIO, &data);
        if (error == 0 && (data & MCS7840_DEV_GPIO_4PORTS) != 0) {
                sc->sc_numports = 4;
                /* physical port no are : 0, 1, 2, 3 */
        } else {
                if (uaa->uaa_product == USB_PRODUCT_MOSCHIP_MCS7810)
                        sc->sc_numports = 1;
                else {
                        sc->sc_numports = 2;
                        /* physical port no are : 0 and 2 */
                }
        }
        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal(": %s\n", devinfop);
        usbd_devinfo_free(devinfop);
        aprint_verbose_dev(self, "found %d active ports\n", sc->sc_numports);

        if (!umcs7840_get_reg(sc, MCS7840_DEV_REG_MODE, &data)) {
                aprint_verbose_dev(self, "On-die confguration: RST: active %s, "
                    "HRD: %s, PLL: %s, POR: %s, Ports: %s, EEPROM write %s, "
                    "IrDA is %savailable\n",
                    (data & MCS7840_DEV_MODE_RESET) ? "low" : "high",
                    (data & MCS7840_DEV_MODE_SER_PRSNT) ? "yes" : "no",
                    (data & MCS7840_DEV_MODE_PLLBYPASS) ? "bypassed" : "avail",
                    (data & MCS7840_DEV_MODE_PORBYPASS) ? "bypassed" : "avail",
                    (data & MCS7840_DEV_MODE_SELECT24S) ? "2" : "4",
                    (data & MCS7840_DEV_MODE_EEPROMWR) ? "enabled" : "disabled",
                    (data & MCS7840_DEV_MODE_IRDA) ? "" : "not ");
        }

        /*
         * Set up the interrupt pipe
         */
        id = usbd_get_interface_descriptor(sc->sc_iface);
        intr_addr = -1;
        for (i = 0 ; i < id->bNumEndpoints ; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL) continue;
                if (UE_GET_DIR(ed->bEndpointAddress) != UE_DIR_IN
                    || UE_GET_XFERTYPE(ed->bmAttributes) != UE_INTERRUPT)
                        continue;
                sc->sc_intr_buflen = UGETW(ed->wMaxPacketSize);
                intr_addr = ed->bEndpointAddress;
                break;
        }
        if (intr_addr < 0) {
                aprint_error_dev(self, "interrupt pipe not found\n");
                sc->sc_dying = true;
                return;
        }
        if (sc->sc_intr_buflen == 0) {
                aprint_error_dev(self, "invalid interrupt endpoint"
                    " (addr %d)\n", intr_addr);
                sc->sc_dying = true;
                return;
        }
        sc->sc_intr_buf = kmem_alloc(sc->sc_intr_buflen, KM_SLEEP);

        error = usbd_open_pipe_intr(sc->sc_iface, intr_addr,
                    USBD_SHORT_XFER_OK, &sc->sc_intr_pipe, sc, sc->sc_intr_buf,
                    sc->sc_intr_buflen, umcs7840_intr, 100);
        if (error) {
                aprint_error_dev(self, "cannot open interrupt pipe "
                    "(addr %d): error %d\n", intr_addr, error);
                sc->sc_dying = true;
                return;
        }

        usb_init_task(&sc->sc_change_task, umcs7840_change_task, sc,
            USB_TASKQ_MPSAFE);

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        sc->sc_init_state = UMCS_INIT_INITED;

        memset(&ucaa, 0, sizeof(ucaa));
        ucaa.ucaa_ibufsize = 256;
        ucaa.ucaa_obufsize = 256;
        ucaa.ucaa_ibufsizepad = 256;
        ucaa.ucaa_opkthdrlen = 0;
        ucaa.ucaa_device = sc->sc_udev;
        ucaa.ucaa_iface = sc->sc_iface;
        ucaa.ucaa_methods = &umcs7840_methods;
        ucaa.ucaa_arg = sc;

        for (i = 0; i < sc->sc_numports; i++) {
                ucaa.ucaa_bulkin = ucaa.ucaa_bulkout = -1;

                /*
                 * On four port cards, endpoints are 0/1 for first,
                 * 2/3 for second, ...
                 * On two port cards, they are 0/1 for first, 4/5 for second.
                 * On single port, just 0/1 will be used.
                 */
                int phyport = i * (sc->sc_numports == 2 ? 2 : 1);

                ed = usbd_interface2endpoint_descriptor(sc->sc_iface,
                        phyport*2);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "no bulk in endpoint found for %d\n", i);
                        sc->sc_dying = true;
                        return;
                }
                ucaa.ucaa_bulkin = ed->bEndpointAddress;

                ed = usbd_interface2endpoint_descriptor(sc->sc_iface,
                        phyport*2 + 1);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "no bulk out endpoint found for %d\n", i);
                        return;
                }
                ucaa.ucaa_bulkout = ed->bEndpointAddress;
                ucaa.ucaa_portno = i;
                DPRINTF(("port %d physical port %d bulk-in %d bulk-out %d\n",
                    i, phyport, ucaa.ucaa_bulkin, ucaa.ucaa_bulkout));

                sc->sc_ports[i].sc_port_phys = phyport;
                sc->sc_ports[i].sc_port_ucom =
                    config_found(self, &ucaa, ucomprint,
                                 CFARGS(.submatch = ucomsubmatch));
        }
}

static int
umcs7840_get_reg(struct umcs7840_softc *sc, uint8_t reg, uint8_t *data)
{
        usb_device_request_t req;
        int err;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = MCS7840_RDREQ;
        USETW(req.wValue, 0);
        USETW(req.wIndex, reg);
        USETW(req.wLength, UMCS7840_READ_LENGTH);

        err = usbd_do_request(sc->sc_udev, &req, data);
        if (err)
                aprint_normal_dev(sc->sc_dev,
                    "Reading register %d failed: %s\n", reg, usbd_errstr(err));
        return err;
}

static int
umcs7840_set_reg(struct umcs7840_softc *sc, uint8_t reg, uint8_t data)
{
        usb_device_request_t req;
        int err;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = MCS7840_WRREQ;
        USETW(req.wValue, data);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 0);

        err = usbd_do_request(sc->sc_udev, &req, 0);
        if (err)
                aprint_normal_dev(sc->sc_dev, "Writing register %d failed: %s\n", reg, usbd_errstr(err));

        return err;
}

static int
umcs7840_get_UART_reg(struct umcs7840_softc *sc, uint8_t portno,
        uint8_t reg, uint8_t *data)
{
        usb_device_request_t req;
        uint16_t wVal;
        int err;

        /* portno is port number */
        wVal = ((uint16_t)(portno + 1)) << 8;

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = MCS7840_RDREQ;
        USETW(req.wValue, wVal);
        USETW(req.wIndex, reg);
        USETW(req.wLength, UMCS7840_READ_LENGTH);

        err = usbd_do_request(sc->sc_udev, &req, data);
        if (err)
                aprint_normal_dev(sc->sc_dev, "Reading UART %d register %d failed: %s\n", portno, reg, usbd_errstr(err));
        return err;
}

static int
umcs7840_set_UART_reg(struct umcs7840_softc *sc, uint8_t portno, uint8_t reg, uint8_t data)
{
        usb_device_request_t req;
        int err;
        uint16_t wVal;

        /* portno is the physical port number */
        wVal = ((uint16_t)(portno + 1)) << 8 | data;

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = MCS7840_WRREQ;
        USETW(req.wValue, wVal);
        USETW(req.wIndex, reg);
        USETW(req.wLength, 0);

        err = usbd_do_request(sc->sc_udev, &req, NULL);
        if (err)
                aprint_error_dev(sc->sc_dev,
                    "Writing UART %d register %d failed: %s\n",
                    portno, reg, usbd_errstr(err));
        return err;
}

static int
umcs7840_set_baudrate(struct umcs7840_softc *sc, uint8_t portno,
        uint32_t rate)
{
        int err;
        uint16_t divisor;
        uint8_t clk;
        uint8_t data;
        uint8_t physport = sc->sc_ports[portno].sc_port_phys;
        int spreg = umcs7840_reg_sp(physport);

        if (umcs7840_calc_baudrate(rate, &divisor, &clk)) {
                DPRINTF(("Port %d bad speed: %d\n", portno, rate));
                return -1;
        }
        if (divisor == 0 || (clk & MCS7840_DEV_SPx_CLOCK_MASK) != clk) {
                DPRINTF(("Port %d bad speed calculation: %d\n", portno,
                    rate));
                return -1;
        }
        DPRINTF(("Port %d set speed: %d (%02x / %d)\n", portno, rate, clk, divisor));

        /* Set clock source for standard BAUD frequences */
        err = umcs7840_get_reg(sc, spreg, &data);
        if (err)
                return err;
        data &= MCS7840_DEV_SPx_CLOCK_MASK;
        data |= clk;
        err = umcs7840_set_reg(sc, spreg, data);
        if (err)
                return err;

        /* Set divider */
        sc->sc_ports[portno].sc_port_lcr |= MCS7840_UART_LCR_DIVISORS;
        err = umcs7840_set_UART_reg(sc, physport, MCS7840_UART_REG_LCR, sc->sc_ports[portno].sc_port_lcr);
        if (err)
                return err;

        err = umcs7840_set_UART_reg(sc, physport, MCS7840_UART_REG_DLL, (uint8_t)(divisor & 0xff));
        if (err)
                return err;
        err = umcs7840_set_UART_reg(sc, physport, MCS7840_UART_REG_DLM, (uint8_t)((divisor >> 8) & 0xff));
        if (err)
                return err;

        /* Turn off access to DLL/DLM registers of UART */
        sc->sc_ports[portno].sc_port_lcr &= ~MCS7840_UART_LCR_DIVISORS;
        err = umcs7840_set_UART_reg(sc, physport, MCS7840_UART_REG_LCR, sc->sc_ports[portno].sc_port_lcr);
        if (err)
                return err;
        return 0;
}

static int
umcs7840_calc_baudrate(uint32_t rate, uint16_t *divisor, uint8_t *clk)
{
        /* Maximum speeds for standard frequences, when PLL is not used */
        static const uint32_t umcs7840_baudrate_divisors[] =
            {0, 115200, 230400, 403200, 460800, 806400, 921600,
             1572864, 3145728,};
        static const uint8_t umcs7840_baudrate_divisors_len =
             __arraycount(umcs7840_baudrate_divisors);
        uint8_t i = 0;

        if (rate > umcs7840_baudrate_divisors[umcs7840_baudrate_divisors_len - 1])
                return -1;

        for (i = 0; i < umcs7840_baudrate_divisors_len - 1
             && !(rate > umcs7840_baudrate_divisors[i]
             && rate <= umcs7840_baudrate_divisors[i + 1]); ++i);
        *divisor = umcs7840_baudrate_divisors[i + 1] / rate;
        /* 0x00 .. 0x70 */
        *clk = i << MCS7840_DEV_SPx_CLOCK_SHIFT;
        return 0;
}

static int
umcs7840_detach(device_t self, int flags)
{
        struct umcs7840_softc *sc = device_private(self);
        int rv = 0, i;

        sc->sc_dying = true;

        /* close interrupt pipe */
        if (sc->sc_intr_pipe != NULL) {
                usbd_abort_pipe(sc->sc_intr_pipe);
                usbd_close_pipe(sc->sc_intr_pipe);
                sc->sc_intr_pipe = NULL;
        }
        if (sc->sc_intr_buf != NULL) {
                kmem_free(sc->sc_intr_buf, sc->sc_intr_buflen);
                sc->sc_intr_buf = NULL;
        }

        if (sc->sc_init_state < UMCS_INIT_INITED)
                return 0;

        usb_rem_task_wait(sc->sc_udev, &sc->sc_change_task, USB_TASKQ_DRIVER,
            NULL);

        /* detach children */
        for (i = 0; i < sc->sc_numports; i++) {
                if (sc->sc_ports[i].sc_port_ucom) {
                        rv |= config_detach(sc->sc_ports[i].sc_port_ucom,
                            flags);
                        sc->sc_ports[i].sc_port_ucom = NULL;
                }
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return rv;
}

static void
umcs7840_childdet(device_t self, device_t child)
{
        struct umcs7840_softc *sc = device_private(self);
        int i;

        for (i = 0; i < sc->sc_numports; i++) {
                if (child == sc->sc_ports[i].sc_port_ucom) {
                        sc->sc_ports[i].sc_port_ucom = NULL;
                        return;
                }
        }
}

static void
umcs7840_get_status(void *self, int portno, u_char *lsr, u_char *msr)
{
        struct umcs7840_softc *sc = self;
        uint8_t pn = sc->sc_ports[portno].sc_port_phys;
        uint8_t        hw_lsr = 0;        /* local line status register */
        uint8_t        hw_msr = 0;        /* local modem status register */

        if (sc->sc_dying)
                return;

        /* Read LSR & MSR */
        umcs7840_get_UART_reg(sc, pn, MCS7840_UART_REG_LSR, &hw_lsr);
        umcs7840_get_UART_reg(sc, pn, MCS7840_UART_REG_MSR, &hw_msr);

        *lsr = hw_lsr;
        *msr = hw_msr;
}

static void
umcs7840_set(void *self, int portno, int reg, int onoff)
{
        struct umcs7840_softc *sc = self;

        if (sc->sc_dying)
                return;

        switch (reg) {
        case UCOM_SET_DTR:
                umcs7840_dtr(sc, portno, onoff);
                break;
        case UCOM_SET_RTS:
                umcs7840_rts(sc, portno, onoff);
                break;
        case UCOM_SET_BREAK:
                umcs7840_break(sc, portno, onoff);
                break;
        default:
                break;
        }
}

static int
umcs7840_param(void *self, int portno, struct termios *t)
{
        struct umcs7840_softc *sc = self;
        int pn = sc->sc_ports[portno].sc_port_phys;
        uint8_t lcr = sc->sc_ports[portno].sc_port_lcr;
        uint8_t mcr = sc->sc_ports[portno].sc_port_mcr;

        if (sc->sc_dying)
                return EIO;

        if (t->c_cflag & CSTOPB) {
                lcr |= MCS7840_UART_LCR_STOPB2;
        } else {
                lcr |= MCS7840_UART_LCR_STOPB1;
        }

        lcr &= ~MCS7840_UART_LCR_PARITYMASK;
        if (t->c_cflag & PARENB) {
                lcr |= MCS7840_UART_LCR_PARITYON;
                if (t->c_cflag & PARODD) {
                        lcr = MCS7840_UART_LCR_PARITYODD;
                } else {
                        lcr = MCS7840_UART_LCR_PARITYEVEN;
                }
        } else {
                lcr &= ~MCS7840_UART_LCR_PARITYON;
        }

        lcr &= ~MCS7840_UART_LCR_DATALENMASK;
        switch (t->c_cflag & CSIZE) {
        case CS5:
                lcr |= MCS7840_UART_LCR_DATALEN5;
                break;
        case CS6:
                lcr |= MCS7840_UART_LCR_DATALEN6;
                break;
        case CS7:
                lcr |= MCS7840_UART_LCR_DATALEN7;
                break;
        case CS8:
                lcr |= MCS7840_UART_LCR_DATALEN8;
                break;
        }

        if (t->c_cflag & CRTSCTS)
                mcr |= MCS7840_UART_MCR_CTSRTS;
        else
                mcr &= ~MCS7840_UART_MCR_CTSRTS;

        if (t->c_cflag & CLOCAL)
                mcr &= ~MCS7840_UART_MCR_DTRDSR;
        else
                mcr |= MCS7840_UART_MCR_DTRDSR;

        sc->sc_ports[portno].sc_port_lcr = lcr;
        umcs7840_set_UART_reg(sc, pn, MCS7840_UART_REG_LCR,
            sc->sc_ports[pn].sc_port_lcr);

        sc->sc_ports[portno].sc_port_mcr = mcr;
        umcs7840_set_UART_reg(sc, pn, MCS7840_UART_REG_MCR,
            sc->sc_ports[pn].sc_port_mcr);

        if (umcs7840_set_baudrate(sc, portno, t->c_ospeed))
                return EIO;

        return 0;
}

static void
umcs7840_dtr(struct umcs7840_softc *sc, int portno, bool onoff)
{
        int pn = sc->sc_ports[portno].sc_port_phys;
        uint8_t mcr = sc->sc_ports[portno].sc_port_mcr;

        if (onoff)
                mcr |= MCS7840_UART_MCR_DTR;
        else
                mcr &= ~MCS7840_UART_MCR_DTR;

        sc->sc_ports[portno].sc_port_mcr = mcr;
        umcs7840_set_UART_reg(sc, pn, MCS7840_UART_REG_MCR,
            sc->sc_ports[pn].sc_port_mcr);
}

static void
umcs7840_rts(struct umcs7840_softc *sc, int portno, bool onoff)
{
        int pn = sc->sc_ports[portno].sc_port_phys;
        uint8_t mcr = sc->sc_ports[portno].sc_port_mcr;

        if (onoff)
                mcr |= MCS7840_UART_MCR_RTS;
        else
                mcr &= ~MCS7840_UART_MCR_RTS;

        sc->sc_ports[portno].sc_port_mcr = mcr;
        umcs7840_set_UART_reg(sc, pn, MCS7840_UART_REG_MCR,
            sc->sc_ports[pn].sc_port_mcr);
}

static void
umcs7840_break(struct umcs7840_softc *sc, int portno, bool onoff)
{
        int pn = sc->sc_ports[portno].sc_port_phys;
        uint8_t lcr = sc->sc_ports[portno].sc_port_lcr;

        if (onoff)
                lcr |= MCS7840_UART_LCR_BREAK;
        else
                lcr &= ~MCS7840_UART_LCR_BREAK;

        sc->sc_ports[portno].sc_port_lcr = lcr;
        umcs7840_set_UART_reg(sc, pn, MCS7840_UART_REG_LCR,
            sc->sc_ports[pn].sc_port_lcr);
}

static int
umcs7840_port_open(void *self, int portno)
{
        struct umcs7840_softc *sc = self;
        int pn = sc->sc_ports[portno].sc_port_phys;
        int spreg = umcs7840_reg_sp(pn);
        int ctrlreg = umcs7840_reg_ctrl(pn);
        uint8_t data;

        if (sc->sc_dying)
                return EIO;

        /* If it very first open, finish global configuration */
        if (!sc->sc_init_done) {
                if (umcs7840_get_reg(sc, MCS7840_DEV_REG_CONTROL1, &data))
                        return EIO;
                data |= MCS7840_DEV_CONTROL1_DRIVER_DONE;
                if (umcs7840_set_reg(sc, MCS7840_DEV_REG_CONTROL1, data))
                        return EIO;
                sc->sc_init_done = 1;
        }

        /* Toggle reset bit on-off */
        if (umcs7840_get_reg(sc, spreg, &data))
                return EIO;
        data |= MCS7840_DEV_SPx_UART_RESET;
        if (umcs7840_set_reg(sc, spreg, data))
                return EIO;
        data &= ~MCS7840_DEV_SPx_UART_RESET;
        if (umcs7840_set_reg(sc, spreg, data))
                return EIO;

        /* Set RS-232 mode */
        if (umcs7840_set_UART_reg(sc, pn, MCS7840_UART_REG_SCRATCHPAD,
            MCS7840_UART_SCRATCHPAD_RS232))
                return EIO;

        /* Disable RX on time of initialization */
        if (umcs7840_get_reg(sc, ctrlreg, &data))
                return EIO;
        data |= MCS7840_DEV_CONTROLx_RX_DISABLE;
        if (umcs7840_set_reg(sc, ctrlreg, data))
                return EIO;

        /* Disable all interrupts */
        if (umcs7840_set_UART_reg(sc, pn, MCS7840_UART_REG_IER, 0))
                return EIO;

        /* Reset FIFO -- documented */
        if (umcs7840_set_UART_reg(sc, pn, MCS7840_UART_REG_FCR, 0))
                return EIO;
        if (umcs7840_set_UART_reg(sc, pn, MCS7840_UART_REG_FCR,
            MCS7840_UART_FCR_ENABLE | MCS7840_UART_FCR_FLUSHRHR |
            MCS7840_UART_FCR_FLUSHTHR | MCS7840_UART_FCR_RTL_1_14))
                return EIO;

        /* Set 8 bit, no parity, 1 stop bit -- documented */
        sc->sc_ports[pn].sc_port_lcr =
            MCS7840_UART_LCR_DATALEN8 | MCS7840_UART_LCR_STOPB1;
        if (umcs7840_set_UART_reg(sc, pn, MCS7840_UART_REG_LCR,
            sc->sc_ports[pn].sc_port_lcr))
                return EIO;

        /*
         * Enable DTR/RTS on modem control, enable modem interrupts --
         * documented
         */
        sc->sc_ports[pn].sc_port_mcr = MCS7840_UART_MCR_DTR
            | MCS7840_UART_MCR_RTS | MCS7840_UART_MCR_IE;
        if (umcs7840_set_UART_reg(sc, pn, MCS7840_UART_REG_MCR,
            sc->sc_ports[pn].sc_port_mcr))
                return EIO;

        /* Clearing Bulkin and Bulkout FIFO */
        if (umcs7840_get_reg(sc, spreg, &data))
                return EIO;
        data |= MCS7840_DEV_SPx_RESET_OUT_FIFO | MCS7840_DEV_SPx_RESET_IN_FIFO;
        if (umcs7840_set_reg(sc, spreg, data))
                return EIO;
        data &= ~(MCS7840_DEV_SPx_RESET_OUT_FIFO
            | MCS7840_DEV_SPx_RESET_IN_FIFO);
        if (umcs7840_set_reg(sc, spreg, data))
                return EIO;

        /* Set speed 9600 */
        if (umcs7840_set_baudrate(sc, portno, 9600))
                return EIO;


        /* Finally enable all interrupts -- documented */
        /*
         * Copied from vendor driver, I don't know why we should read LCR
         * here
         */
        if (umcs7840_get_UART_reg(sc, pn, MCS7840_UART_REG_LCR,
            &sc->sc_ports[pn].sc_port_lcr))
                return EIO;
        if (umcs7840_set_UART_reg(sc, pn, MCS7840_UART_REG_IER,
            MCS7840_UART_IER_RXSTAT | MCS7840_UART_IER_MODEM))
                return EIO;

        /* Enable RX */
        if (umcs7840_get_reg(sc, ctrlreg, &data))
                return EIO;
        data &= ~MCS7840_DEV_CONTROLx_RX_DISABLE;
        if (umcs7840_set_reg(sc, ctrlreg, data))
                return EIO;
        return 0;
}

static void
umcs7840_port_close(void *self, int portno)
{
        struct umcs7840_softc *sc = self;
        int pn = sc->sc_ports[portno].sc_port_phys;
        int ctrlreg = umcs7840_reg_ctrl(pn);
        uint8_t data;

        if (sc->sc_dying)
                return;

        umcs7840_set_UART_reg(sc, pn, MCS7840_UART_REG_MCR, 0);
        umcs7840_set_UART_reg(sc, pn, MCS7840_UART_REG_IER, 0);

        /* Disable RX */
        if (umcs7840_get_reg(sc, ctrlreg, &data))
                return;
        data |= MCS7840_DEV_CONTROLx_RX_DISABLE;
        if (umcs7840_set_reg(sc, ctrlreg, data))
                return;
}

static void
umcs7840_intr(struct usbd_xfer *xfer, void *priv,
    usbd_status status)
{
        struct umcs7840_softc *sc = priv;
        u_char *buf = sc->sc_intr_buf;
        int actlen;
        int subunit;

        if (sc->sc_dying)
                return;

        if (status == USBD_NOT_STARTED || status == USBD_CANCELLED
            || status == USBD_IOERROR)
                return;

        if (status != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(sc->sc_dev,
                    "umcs7840_intr: abnormal status: %s\n",
                    usbd_errstr(status));
                usbd_clear_endpoint_stall_async(sc->sc_intr_pipe);
                return;
        }

        usbd_get_xfer_status(xfer, NULL, NULL, &actlen, NULL);
        if (actlen == 5 || actlen == 13) {
                uint32_t change_mask = 0;
                /* Check status of all ports */
                for (subunit = 0; subunit < sc->sc_numports; subunit++) {
                        uint8_t pn = sc->sc_ports[subunit].sc_port_phys;
                        if (buf[pn] & MCS7840_UART_ISR_NOPENDING)
                                continue;
                        DPRINTF(("Port %d has pending interrupt: %02x "
                            "(FIFO: %02x)\n", pn,
                            buf[pn] & MCS7840_UART_ISR_INTMASK,
                            buf[pn] & (~MCS7840_UART_ISR_INTMASK)));
                        switch (buf[pn] & MCS7840_UART_ISR_INTMASK) {
                        case MCS7840_UART_ISR_RXERR:
                        case MCS7840_UART_ISR_RXHASDATA:
                        case MCS7840_UART_ISR_RXTIMEOUT:
                        case MCS7840_UART_ISR_MSCHANGE:
                                change_mask |= (1U << subunit);
                                break;
                        default:
                                /* Do nothing */
                                break;
                        }
                }

                if (change_mask != 0) {
                        atomic_or_32(&sc->sc_change_mask, change_mask);
                        usb_add_task(sc->sc_udev, &sc->sc_change_task,
                            USB_TASKQ_DRIVER);
                }
        } else {
                aprint_error_dev(sc->sc_dev,
                   "Invalid interrupt data length %d", actlen);
        }
}

static void
umcs7840_change_task(void *arg)
{
        struct umcs7840_softc *sc = arg;
        uint32_t change_mask;
        int i;

        change_mask = atomic_swap_32(&sc->sc_change_mask, 0);
        for (i = 0; i < sc->sc_numports; i++) {
                if (ISSET(change_mask, (1U << i)))
                        ucom_status_change(device_private(
                            sc->sc_ports[i].sc_port_ucom));
        }
}















































































  123 











    7 
















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
/*        $NetBSD: subr_lwp_specificdata.c,v 1.4 2019/05/17 03:34:26 ozaki-r Exp $        */

/*-
 * Copyright (c) 2006 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#define _LWP_API_PRIVATE

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: subr_lwp_specificdata.c,v 1.4 2019/05/17 03:34:26 ozaki-r Exp $");

#include <sys/param.h>
#include <sys/lwp.h>
#include <sys/specificdata.h>

static specificdata_domain_t lwp_specificdata_domain;

void
lwpinit_specificdata(void)
{

        lwp_specificdata_domain = specificdata_domain_create();
        KASSERT(lwp_specificdata_domain != NULL);
}

/*
 * lwp_specific_key_create --
 *        Create a key for subsystem lwp-specific data.
 */
int
lwp_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
{

        return (specificdata_key_create(lwp_specificdata_domain, keyp, dtor));
}

/*
 * lwp_specific_key_delete --
 *        Delete a key for subsystem lwp-specific data.
 */
void
lwp_specific_key_delete(specificdata_key_t key)
{

        specificdata_key_delete(lwp_specificdata_domain, key);
}

/*
 * lwp_initspecific --
 *        Initialize an LWP's specificdata container.
 */
void
lwp_initspecific(struct lwp *l)
{
        int error __diagused;

        error = specificdata_init(lwp_specificdata_domain, &l->l_specdataref);
        KASSERT(error == 0);
}

/*
 * lwp_finispecific --
 *        Finalize an LWP's specificdata container.
 */
void
lwp_finispecific(struct lwp *l)
{

        specificdata_fini(lwp_specificdata_domain, &l->l_specdataref);
}

/*
 * lwp_getspecific --
 *        Return lwp-specific data corresponding to the specified key.
 *
 *        Note: LWP specific data is NOT INTERLOCKED.  An LWP should access
 *        only its OWN SPECIFIC DATA.  If it is necessary to access another
 *        LWP's specifc data, care must be taken to ensure that doing so
 *        would not cause internal data structure inconsistency (i.e. caller
 *        can guarantee that the target LWP is not inside an lwp_getspecific()
 *        or lwp_setspecific() call).
 */
void *
lwp_getspecific(specificdata_key_t key)
{

        return (specificdata_getspecific_unlocked(lwp_specificdata_domain,
                                                  &curlwp->l_specdataref, key));
}

void *
_lwp_getspecific_by_lwp(struct lwp *l, specificdata_key_t key)
{

        return (specificdata_getspecific_unlocked(lwp_specificdata_domain,
                                                  &l->l_specdataref, key));
}

/*
 * lwp_setspecific --
 *        Set lwp-specific data corresponding to the specified key.
 */
void
lwp_setspecific(specificdata_key_t key, void *data)
{

        specificdata_setspecific(lwp_specificdata_domain,
                                 &curlwp->l_specdataref, key, data);
}

void
lwp_setspecific_by_lwp(struct lwp *l, specificdata_key_t key, void *data)
{

        specificdata_setspecific(lwp_specificdata_domain,
                                 &l->l_specdataref, key, data);
}



























































































































































   77 


   77 






   77 


   77 


   77 

   76 












   77 
   39 
   39 
   38 











   39 









   39 



   77 



   76 

   77 














   31 


   31 



   31 

   31 
   15 
   15 


    1 






    1 


   30 
    8 
    8 


    6 






    6 

   31 
















   72 


   72 




   70 
   70 
    1 

    1 

    1 

    1 






    1 


   70 




   70 




   64 

    1 
   64 


















    4 















    7 
















  144 
  144 
  144 
  144 
  144 
  144 

  144 


    9 

  144 




  144 









   60 


   60 
   60 
   60 
   60 
   60 




   60 
   60 

   60 
    2 


   60 
















  381 
  381 
  381 
  381 




  381 
  381 





  381 
  274 


  238 
    3 

  250 

   12 




    8 
    8 




    4 


   12 
   12 
   12 


  381 









   14 









    7 



    7 
    7 









    7 









    7 





















































































   45 









   57 

   56 











   57 

   57 








   57 
   57 





   57 






   57 

   56 













   12 

   12 
   12 

   12 
   12 













   45 
   45 







   45 
   45 
   16 


    3 


   16 

   45 

   45 









   17 
   17 

   17 









































































































































































































































































































  596 


































































































































  132 
  128 
  131 



  131 
  130 









   68 


   42 
























   50 







   50 

   50 

    2 













    2 






    2 



    2 

    2 


   48 




   50 



   50 
   37 
   37 

   50 

   35 




















  491 















  491 
  491 






















 1275 
   75 









 1275 


 1275 
   66 







 1274 

 1275 


































  126 











   45 
   40 

   45 

   40 


   45 







   19 











   98 
   88 

   98 

   88 


   98 







    3 


    3 




    3 







   65 
   44 

   65 

   43 


   65 







   20 
   13 

   20 

   13 


   13 







    1 


    1 




    1 







    9 
    2 

    9 

    2 


    9 























    2 











   62 
   44 

   62 

   43 


   61 
























































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
/*        $NetBSD: vfs_subr.c,v 1.493 2022/03/28 12:38:33 riastradh Exp $        */

/*-
 * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008, 2019, 2020
 *     The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, by Charles M. Hannum, by Andrew Doran,
 * by Marshall Kirk McKusick and Greg Ganger at the University of Michigan.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 * (c) UNIX System Laboratories, Inc.
 * All or some portions of this file are derived from material licensed
 * to the University of California by American Telephone and Telegraph
 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
 * the permission of UNIX System Laboratories, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vfs_subr.c        8.13 (Berkeley) 4/18/94
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.493 2022/03/28 12:38:33 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_ddb.h"
#include "opt_compat_netbsd.h"
#include "opt_compat_43.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/dirent.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/mount.h>
#include <sys/fstrans.h>
#include <sys/vnode_impl.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/namei.h>
#include <sys/buf.h>
#include <sys/errno.h>
#include <sys/kmem.h>
#include <sys/syscallargs.h>
#include <sys/kauth.h>
#include <sys/module.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>
#include <uvm/uvm_ddb.h>

const enum vtype iftovt_tab[16] = {
        VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
        VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
};
const int        vttoif_tab[9] = {
        0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
        S_IFSOCK, S_IFIFO, S_IFMT,
};

/*
 * Insq/Remq for the vnode usage lists.
 */
#define        bufinsvn(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_vnbufs)
#define        bufremvn(bp) {                                                        \
        LIST_REMOVE(bp, b_vnbufs);                                        \
        (bp)->b_vnbufs.le_next = NOLIST;                                \
}

int doforce = 1;                /* 1 => permit forcible unmounting */

extern struct mount *dead_rootmount;

/*
 * Local declarations.
 */

static void vn_initialize_syncerd(void);

/*
 * Initialize the vnode management data structures.
 */
void
vntblinit(void)
{

        vn_initialize_syncerd();
        vfs_mount_sysinit();
        vfs_vnode_sysinit();
}

/*
 * Flush out and invalidate all buffers associated with a vnode.
 * Called with the underlying vnode locked, which should prevent new dirty
 * buffers from being queued.
 */
int
vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l,
          bool catch_p, int slptimeo)
{
        struct buf *bp, *nbp;
        int error;
        int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
            (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0);

        /* XXXUBC this doesn't look at flags or slp* */
        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
        error = VOP_PUTPAGES(vp, 0, 0, flushflags);
        if (error) {
                return error;
        }

        if (flags & V_SAVE) {
                error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0);
                if (error)
                        return (error);
                KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd));
        }

        mutex_enter(&bufcache_lock);
restart:
        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                KASSERT(bp->b_vp == vp);
                nbp = LIST_NEXT(bp, b_vnbufs);
                error = bbusy(bp, catch_p, slptimeo, NULL);
                if (error != 0) {
                        if (error == EPASSTHROUGH)
                                goto restart;
                        mutex_exit(&bufcache_lock);
                        return (error);
                }
                brelsel(bp, BC_INVAL | BC_VFLUSH);
        }

        for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
                KASSERT(bp->b_vp == vp);
                nbp = LIST_NEXT(bp, b_vnbufs);
                error = bbusy(bp, catch_p, slptimeo, NULL);
                if (error != 0) {
                        if (error == EPASSTHROUGH)
                                goto restart;
                        mutex_exit(&bufcache_lock);
                        return (error);
                }
                /*
                 * XXX Since there are no node locks for NFS, I believe
                 * there is a slight chance that a delayed write will
                 * occur while sleeping just above, so check for it.
                 */
                if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) {
#ifdef DEBUG
                        printf("buffer still DELWRI\n");
#endif
                        bp->b_cflags |= BC_BUSY | BC_VFLUSH;
                        mutex_exit(&bufcache_lock);
                        VOP_BWRITE(bp->b_vp, bp);
                        mutex_enter(&bufcache_lock);
                        goto restart;
                }
                brelsel(bp, BC_INVAL | BC_VFLUSH);
        }

#ifdef DIAGNOSTIC
        if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
                panic("vinvalbuf: flush failed, vp %p", vp);
#endif

        mutex_exit(&bufcache_lock);

        return (0);
}

/*
 * Destroy any in core blocks past the truncation length.
 * Called with the underlying vnode locked, which should prevent new dirty
 * buffers from being queued.
 */
int
vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch_p, int slptimeo)
{
        struct buf *bp, *nbp;
        int error;
        voff_t off;

        off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
        error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
        if (error) {
                return error;
        }

        mutex_enter(&bufcache_lock);
restart:
        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                KASSERT(bp->b_vp == vp);
                nbp = LIST_NEXT(bp, b_vnbufs);
                if (bp->b_lblkno < lbn)
                        continue;
                error = bbusy(bp, catch_p, slptimeo, NULL);
                if (error != 0) {
                        if (error == EPASSTHROUGH)
                                goto restart;
                        mutex_exit(&bufcache_lock);
                        return (error);
                }
                brelsel(bp, BC_INVAL | BC_VFLUSH);
        }

        for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
                KASSERT(bp->b_vp == vp);
                nbp = LIST_NEXT(bp, b_vnbufs);
                if (bp->b_lblkno < lbn)
                        continue;
                error = bbusy(bp, catch_p, slptimeo, NULL);
                if (error != 0) {
                        if (error == EPASSTHROUGH)
                                goto restart;
                        mutex_exit(&bufcache_lock);
                        return (error);
                }
                brelsel(bp, BC_INVAL | BC_VFLUSH);
        }
        mutex_exit(&bufcache_lock);

        return (0);
}

/*
 * Flush all dirty buffers from a vnode.
 * Called with the underlying vnode locked, which should prevent new dirty
 * buffers from being queued.
 */
int
vflushbuf(struct vnode *vp, int flags)
{
        struct buf *bp, *nbp;
        int error, pflags;
        bool dirty, sync;

        sync = (flags & FSYNC_WAIT) != 0;
        pflags = PGO_CLEANIT | PGO_ALLPAGES |
                (sync ? PGO_SYNCIO : 0) |
                ((flags & FSYNC_LAZY) ? PGO_LAZY : 0);
        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
        (void) VOP_PUTPAGES(vp, 0, 0, pflags);

loop:
        mutex_enter(&bufcache_lock);
        for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
                KASSERT(bp->b_vp == vp);
                nbp = LIST_NEXT(bp, b_vnbufs);
                if ((bp->b_cflags & BC_BUSY))
                        continue;
                if ((bp->b_oflags & BO_DELWRI) == 0)
                        panic("vflushbuf: not dirty, bp %p", bp);
                bp->b_cflags |= BC_BUSY | BC_VFLUSH;
                mutex_exit(&bufcache_lock);
                /*
                 * Wait for I/O associated with indirect blocks to complete,
                 * since there is no way to quickly wait for them below.
                 */
                if (bp->b_vp == vp || !sync)
                        (void) bawrite(bp);
                else {
                        error = bwrite(bp);
                        if (error)
                                return error;
                }
                goto loop;
        }
        mutex_exit(&bufcache_lock);

        if (!sync)
                return 0;

        mutex_enter(vp->v_interlock);
        while (vp->v_numoutput != 0)
                cv_wait(&vp->v_cv, vp->v_interlock);
        dirty = !LIST_EMPTY(&vp->v_dirtyblkhd);
        mutex_exit(vp->v_interlock);

        if (dirty) {
                vprint("vflushbuf: dirty", vp);
                goto loop;
        }

        return 0;
}

/*
 * Create a vnode for a block device.
 * Used for root filesystem and swap areas.
 * Also used for memory file system special devices.
 */
int
bdevvp(dev_t dev, vnode_t **vpp)
{
        struct vattr va;

        vattr_null(&va);
        va.va_type = VBLK;
        va.va_rdev = dev;

        return vcache_new(dead_rootmount, NULL, &va, NOCRED, NULL, vpp);
}

/*
 * Create a vnode for a character device.
 * Used for kernfs and some console handling.
 */
int
cdevvp(dev_t dev, vnode_t **vpp)
{
        struct vattr va;

        vattr_null(&va);
        va.va_type = VCHR;
        va.va_rdev = dev;

        return vcache_new(dead_rootmount, NULL, &va, NOCRED, NULL, vpp);
}

/*
 * Associate a buffer with a vnode.  There must already be a hold on
 * the vnode.
 */
void
bgetvp(struct vnode *vp, struct buf *bp)
{

        KASSERT(bp->b_vp == NULL);
        KASSERT(bp->b_objlock == &buffer_lock);
        KASSERT(mutex_owned(vp->v_interlock));
        KASSERT(mutex_owned(&bufcache_lock));
        KASSERT((bp->b_cflags & BC_BUSY) != 0);
        KASSERT(!cv_has_waiters(&bp->b_done));

        vholdl(vp);
        bp->b_vp = vp;
        if (vp->v_type == VBLK || vp->v_type == VCHR)
                bp->b_dev = vp->v_rdev;
        else
                bp->b_dev = NODEV;

        /*
         * Insert onto list for new vnode.
         */
        bufinsvn(bp, &vp->v_cleanblkhd);
        bp->b_objlock = vp->v_interlock;
}

/*
 * Disassociate a buffer from a vnode.
 */
void
brelvp(struct buf *bp)
{
        struct vnode *vp = bp->b_vp;

        KASSERT(vp != NULL);
        KASSERT(bp->b_objlock == vp->v_interlock);
        KASSERT(mutex_owned(vp->v_interlock));
        KASSERT(mutex_owned(&bufcache_lock));
        KASSERT((bp->b_cflags & BC_BUSY) != 0);
        KASSERT(!cv_has_waiters(&bp->b_done));

        /*
         * Delete from old vnode list, if on one.
         */
        if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
                bufremvn(bp);

        if ((vp->v_iflag & (VI_ONWORKLST | VI_PAGES)) == VI_ONWORKLST &&
            LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
                vn_syncer_remove_from_worklist(vp);

        bp->b_objlock = &buffer_lock;
        bp->b_vp = NULL;
        holdrelel(vp);
}

/*
 * Reassign a buffer from one vnode list to another.
 * The list reassignment must be within the same vnode.
 * Used to assign file specific control information
 * (indirect blocks) to the list to which they belong.
 */
void
reassignbuf(struct buf *bp, struct vnode *vp)
{
        struct buflists *listheadp;
        int delayx;

        KASSERT(mutex_owned(&bufcache_lock));
        KASSERT(bp->b_objlock == vp->v_interlock);
        KASSERT(mutex_owned(vp->v_interlock));
        KASSERT((bp->b_cflags & BC_BUSY) != 0);

        /*
         * Delete from old vnode list, if on one.
         */
        if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
                bufremvn(bp);

        /*
         * If dirty, put on list of dirty buffers;
         * otherwise insert onto list of clean buffers.
         */
        if ((bp->b_oflags & BO_DELWRI) == 0) {
                listheadp = &vp->v_cleanblkhd;
                if ((vp->v_iflag & (VI_ONWORKLST | VI_PAGES)) ==
                    VI_ONWORKLST &&
                    LIST_FIRST(&vp->v_dirtyblkhd) == NULL)
                        vn_syncer_remove_from_worklist(vp);
        } else {
                listheadp = &vp->v_dirtyblkhd;
                if ((vp->v_iflag & VI_ONWORKLST) == 0) {
                        switch (vp->v_type) {
                        case VDIR:
                                delayx = dirdelay;
                                break;
                        case VBLK:
                                if (spec_node_getmountedfs(vp) != NULL) {
                                        delayx = metadelay;
                                        break;
                                }
                                /* fall through */
                        default:
                                delayx = filedelay;
                                break;
                        }
                        if (!vp->v_mount ||
                            (vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
                                vn_syncer_add_to_worklist(vp, delayx);
                }
        }
        bufinsvn(bp, listheadp);
}

/*
 * Lookup a vnode by device number and return it referenced.
 */
int
vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
{

        return (spec_node_lookup_by_dev(type, dev, VDEAD_NOWAIT, vpp) == 0);
}

/*
 * Revoke all the vnodes corresponding to the specified minor number
 * range (endpoints inclusive) of the specified major.
 */
void
vdevgone(int maj, int minl, int minh, enum vtype type)
{
        vnode_t *vp;
        dev_t dev;
        int mn;

        for (mn = minl; mn <= minh; mn++) {
                dev = makedev(maj, mn);
                /*
                 * Notify anyone trying to get at this device that it
                 * has been detached, and then revoke it.
                 */
                switch (type) {
                case VBLK:
                        bdev_detached(dev);
                        break;
                case VCHR:
                        cdev_detached(dev);
                        break;
                default:
                        panic("invalid specnode type: %d", type);
                }
                /*
                 * Passing 0 as flags, instead of VDEAD_NOWAIT, means
                 * spec_node_lookup_by_dev will wait for vnodes it
                 * finds concurrently being revoked before returning.
                 */
                while (spec_node_lookup_by_dev(type, dev, 0, &vp) == 0) {
                        VOP_REVOKE(vp, REVOKEALL);
                        vrele(vp);
                }
        }
}

/*
 * The filesystem synchronizer mechanism - syncer.
 *
 * It is useful to delay writes of file data and filesystem metadata for
 * a certain amount of time so that quickly created and deleted files need
 * not waste disk bandwidth being created and removed.  To implement this,
 * vnodes are appended to a "workitem" queue.
 *
 * Most pending metadata should not wait for more than ten seconds.  Thus,
 * mounted on block devices are delayed only about a half the time that file
 * data is delayed.  Similarly, directory updates are more critical, so are
 * only delayed about a third the time that file data is delayed.
 *
 * There are SYNCER_MAXDELAY queues that are processed in a round-robin
 * manner at a rate of one each second (driven off the filesystem syner
 * thread). The syncer_delayno variable indicates the next queue that is
 * to be processed.  Items that need to be processed soon are placed in
 * this queue:
 *
 *        syncer_workitem_pending[syncer_delayno]
 *
 * A delay of e.g. fifteen seconds is done by placing the request fifteen
 * entries later in the queue:
 *
 *        syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
 *
 * Flag VI_ONWORKLST indicates that vnode is added into the queue.
 */

#define SYNCER_MAXDELAY                32

typedef TAILQ_HEAD(synclist, vnode_impl) synclist_t;

static void        vn_syncer_add1(struct vnode *, int);
static void        sysctl_vfs_syncfs_setup(struct sysctllog **);

/*
 * Defines and variables for the syncer process.
 */
int syncer_maxdelay = SYNCER_MAXDELAY;        /* maximum delay time */
time_t syncdelay = 30;                        /* max time to delay syncing data */
time_t filedelay = 30;                        /* time to delay syncing files */
time_t dirdelay  = 15;                        /* time to delay syncing directories */
time_t metadelay = 10;                        /* time to delay syncing metadata */
time_t lockdelay = 1;                        /* time to delay if locking fails */

static kmutex_t                syncer_data_lock; /* short term lock on data structs */

static int                syncer_delayno = 0;
static long                syncer_last;
static synclist_t *        syncer_workitem_pending;

static void
vn_initialize_syncerd(void)
{
        int i;

        syncer_last = SYNCER_MAXDELAY + 2;

        sysctl_vfs_syncfs_setup(NULL);

        syncer_workitem_pending =
            kmem_alloc(syncer_last * sizeof (struct synclist), KM_SLEEP);

        for (i = 0; i < syncer_last; i++)
                TAILQ_INIT(&syncer_workitem_pending[i]);

        mutex_init(&syncer_data_lock, MUTEX_DEFAULT, IPL_NONE);
}

/*
 * Return delay factor appropriate for the given file system.   For
 * WAPBL we use the sync vnode to burst out metadata updates: sync
 * those file systems more frequently.
 */
static inline int
sync_delay(struct mount *mp)
{

        return mp->mnt_wapbl != NULL ? metadelay : syncdelay;
}

/*
 * Compute the next slot index from delay.
 */
static inline int
sync_delay_slot(int delayx)
{

        if (delayx > syncer_maxdelay - 2)
                delayx = syncer_maxdelay - 2;
        return (syncer_delayno + delayx) % syncer_last;
}

/*
 * Add an item to the syncer work queue.
 */
static void
vn_syncer_add1(struct vnode *vp, int delayx)
{
        synclist_t *slp;
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);

        KASSERT(mutex_owned(&syncer_data_lock));

        if (vp->v_iflag & VI_ONWORKLST) {
                /*
                 * Remove in order to adjust the position of the vnode.
                 * Note: called from sched_sync(), which will not hold
                 * interlock, therefore we cannot modify v_iflag here.
                 */
                slp = &syncer_workitem_pending[vip->vi_synclist_slot];
                TAILQ_REMOVE(slp, vip, vi_synclist);
        } else {
                KASSERT(mutex_owned(vp->v_interlock));
                vp->v_iflag |= VI_ONWORKLST;
        }

        vip->vi_synclist_slot = sync_delay_slot(delayx);

        slp = &syncer_workitem_pending[vip->vi_synclist_slot];
        TAILQ_INSERT_TAIL(slp, vip, vi_synclist);
}

void
vn_syncer_add_to_worklist(struct vnode *vp, int delayx)
{

        KASSERT(mutex_owned(vp->v_interlock));

        mutex_enter(&syncer_data_lock);
        vn_syncer_add1(vp, delayx);
        mutex_exit(&syncer_data_lock);
}

/*
 * Remove an item from the syncer work queue.
 */
void
vn_syncer_remove_from_worklist(struct vnode *vp)
{
        synclist_t *slp;
        vnode_impl_t *vip = VNODE_TO_VIMPL(vp);

        KASSERT(mutex_owned(vp->v_interlock));

        if (vp->v_iflag & VI_ONWORKLST) {
                mutex_enter(&syncer_data_lock);
                vp->v_iflag &= ~VI_ONWORKLST;
                slp = &syncer_workitem_pending[vip->vi_synclist_slot];
                TAILQ_REMOVE(slp, vip, vi_synclist);
                mutex_exit(&syncer_data_lock);
        }
}

/*
 * Add this mount point to the syncer.
 */
void
vfs_syncer_add_to_worklist(struct mount *mp)
{
        static int start, incr, next;
        int vdelay;

        KASSERT(mutex_owned(mp->mnt_updating));
        KASSERT((mp->mnt_iflag & IMNT_ONWORKLIST) == 0);

        /*
         * We attempt to scatter the mount points on the list
         * so that they will go off at evenly distributed times
         * even if all the filesystems are mounted at once.
         */

        next += incr;
        if (next == 0 || next > syncer_maxdelay) {
                start /= 2;
                incr /= 2;
                if (start == 0) {
                        start = syncer_maxdelay / 2;
                        incr = syncer_maxdelay;
                }
                next = start;
        }
        mp->mnt_iflag |= IMNT_ONWORKLIST;
        vdelay = sync_delay(mp);
        mp->mnt_synclist_slot = vdelay > 0 ? next % vdelay : 0;
}

/*
 * Remove the mount point from the syncer.
 */
void
vfs_syncer_remove_from_worklist(struct mount *mp)
{

        KASSERT(mutex_owned(mp->mnt_updating));
        KASSERT((mp->mnt_iflag & IMNT_ONWORKLIST) != 0);

        mp->mnt_iflag &= ~IMNT_ONWORKLIST;
}

/*
 * Try lazy sync, return true on success.
 */
static bool
lazy_sync_vnode(struct vnode *vp)
{
        bool synced;

        KASSERT(mutex_owned(&syncer_data_lock));

        synced = false;
        if (vcache_tryvget(vp) == 0) {
                mutex_exit(&syncer_data_lock);
                if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
                        synced = true;
                        (void) VOP_FSYNC(vp, curlwp->l_cred,
                            FSYNC_LAZY, 0, 0);
                        vput(vp);
                } else
                        vrele(vp);
                mutex_enter(&syncer_data_lock);
        }
        return synced;
}

/*
 * System filesystem synchronizer daemon.
 */
void
sched_sync(void *arg)
{
        mount_iterator_t *iter;
        synclist_t *slp;
        struct vnode_impl *vi;
        struct vnode *vp;
        struct mount *mp;
        time_t starttime;
        bool synced;

        for (;;) {
                starttime = time_second;

                /*
                 * Sync mounts whose dirty time has expired.
                 */
                mountlist_iterator_init(&iter);
                while ((mp = mountlist_iterator_trynext(iter)) != NULL) {
                        if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0 ||
                            mp->mnt_synclist_slot != syncer_delayno) {
                                continue;
                        }
                        mp->mnt_synclist_slot = sync_delay_slot(sync_delay(mp));
                        VFS_SYNC(mp, MNT_LAZY, curlwp->l_cred);
                }
                mountlist_iterator_destroy(iter);

                mutex_enter(&syncer_data_lock);

                /*
                 * Push files whose dirty time has expired.
                 */
                slp = &syncer_workitem_pending[syncer_delayno];
                syncer_delayno += 1;
                if (syncer_delayno >= syncer_last)
                        syncer_delayno = 0;

                while ((vi = TAILQ_FIRST(slp)) != NULL) {
                        vp = VIMPL_TO_VNODE(vi);
                        synced = lazy_sync_vnode(vp);

                        /*
                         * XXX The vnode may have been recycled, in which
                         * case it may have a new identity.
                         */
                        vi = TAILQ_FIRST(slp);
                        if (vi != NULL && VIMPL_TO_VNODE(vi) == vp) {
                                /*
                                 * Put us back on the worklist.  The worklist
                                 * routine will remove us from our current
                                 * position and then add us back in at a later
                                 * position.
                                 *
                                 * Try again sooner rather than later if
                                 * we were unable to lock the vnode.  Lock
                                 * failure should not prevent us from doing
                                 * the sync "soon".
                                 *
                                 * If we locked it yet arrive here, it's
                                 * likely that lazy sync is in progress and
                                 * so the vnode still has dirty metadata. 
                                 * syncdelay is mainly to get this vnode out
                                 * of the way so we do not consider it again
                                 * "soon" in this loop, so the delay time is
                                 * not critical as long as it is not "soon". 
                                 * While write-back strategy is the file
                                 * system's domain, we expect write-back to
                                 * occur no later than syncdelay seconds
                                 * into the future.
                                 */
                                vn_syncer_add1(vp,
                                    synced ? syncdelay : lockdelay);
                        }
                }

                /*
                 * If it has taken us less than a second to process the
                 * current work, then wait.  Otherwise start right over
                 * again.  We can still lose time if any single round
                 * takes more than two seconds, but it does not really
                 * matter as we are just trying to generally pace the
                 * filesystem activity.
                 */
                if (time_second == starttime) {
                        kpause("syncer", false, hz, &syncer_data_lock);
                }
                mutex_exit(&syncer_data_lock);
        }
}

static void
sysctl_vfs_syncfs_setup(struct sysctllog **clog)
{
        const struct sysctlnode *rnode, *cnode;

        sysctl_createv(clog, 0, NULL, &rnode,
                        CTLFLAG_PERMANENT,
                        CTLTYPE_NODE, "sync",
                        SYSCTL_DESCR("syncer options"),
                        NULL, 0, NULL, 0,
                        CTL_VFS, CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, &cnode,
                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                        CTLTYPE_QUAD, "delay",
                        SYSCTL_DESCR("max time to delay syncing data"),
                        NULL, 0, &syncdelay, 0,
                        CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, &cnode,
                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                        CTLTYPE_QUAD, "filedelay",
                        SYSCTL_DESCR("time to delay syncing files"),
                        NULL, 0, &filedelay, 0,
                        CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, &cnode,
                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                        CTLTYPE_QUAD, "dirdelay",
                        SYSCTL_DESCR("time to delay syncing directories"),
                        NULL, 0, &dirdelay, 0,
                        CTL_CREATE, CTL_EOL);

        sysctl_createv(clog, 0, &rnode, &cnode,
                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                        CTLTYPE_QUAD, "metadelay",
                        SYSCTL_DESCR("time to delay syncing metadata"),
                        NULL, 0, &metadelay, 0,
                        CTL_CREATE, CTL_EOL);
}

/*
 * sysctl helper routine to return list of supported fstypes
 */
int
sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
{
        char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
        char *where = oldp;
        struct vfsops *v;
        size_t needed, left, slen;
        int error, first;

        if (newp != NULL)
                return (EPERM);
        if (namelen != 0)
                return (EINVAL);

        first = 1;
        error = 0;
        needed = 0;
        left = *oldlenp;

        sysctl_unlock();
        mutex_enter(&vfs_list_lock);
        LIST_FOREACH(v, &vfs_list, vfs_list) {
                if (where == NULL)
                        needed += strlen(v->vfs_name) + 1;
                else {
                        memset(bf, 0, sizeof(bf));
                        if (first) {
                                strncpy(bf, v->vfs_name, sizeof(bf));
                                first = 0;
                        } else {
                                bf[0] = ' ';
                                strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
                        }
                        bf[sizeof(bf)-1] = '\0';
                        slen = strlen(bf);
                        if (left < slen + 1)
                                break;
                        v->vfs_refcount++;
                        mutex_exit(&vfs_list_lock);
                        /* +1 to copy out the trailing NUL byte */
                        error = copyout(bf, where, slen + 1);
                        mutex_enter(&vfs_list_lock);
                        v->vfs_refcount--;
                        if (error)
                                break;
                        where += slen;
                        needed += slen;
                        left -= slen;
                }
        }
        mutex_exit(&vfs_list_lock);
        sysctl_relock();
        *oldlenp = needed;
        return (error);
}

int kinfo_vdebug = 1;
int kinfo_vgetfailed;

#define KINFO_VNODESLOP        10

/*
 * Dump vnode list (via sysctl).
 * Copyout address of vnode followed by vnode.
 */
int
sysctl_kern_vnode(SYSCTLFN_ARGS)
{
        char *where = oldp;
        size_t *sizep = oldlenp;
        struct mount *mp;
        vnode_t *vp, vbuf;
        mount_iterator_t *iter;
        struct vnode_iterator *marker;
        char *bp = where;
        char *ewhere;
        int error;

        if (namelen != 0)
                return (EOPNOTSUPP);
        if (newp != NULL)
                return (EPERM);

#define VPTRSZ        sizeof(vnode_t *)
#define VNODESZ        sizeof(vnode_t)
        if (where == NULL) {
                *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
                return (0);
        }
        ewhere = where + *sizep;

        sysctl_unlock();
        mountlist_iterator_init(&iter);
        while ((mp = mountlist_iterator_next(iter)) != NULL) {
                vfs_vnode_iterator_init(mp, &marker);
                while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) {
                        if (bp + VPTRSZ + VNODESZ > ewhere) {
                                vrele(vp);
                                vfs_vnode_iterator_destroy(marker);
                                mountlist_iterator_destroy(iter);
                                sysctl_relock();
                                *sizep = bp - where;
                                return (ENOMEM);
                        }
                        memcpy(&vbuf, vp, VNODESZ);
                        if ((error = copyout(&vp, bp, VPTRSZ)) ||
                            (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) {
                                vrele(vp);
                                vfs_vnode_iterator_destroy(marker);
                                mountlist_iterator_destroy(iter);
                                sysctl_relock();
                                return (error);
                        }
                        vrele(vp);
                        bp += VPTRSZ + VNODESZ;
                }
                vfs_vnode_iterator_destroy(marker);
        }
        mountlist_iterator_destroy(iter);
        sysctl_relock();

        *sizep = bp - where;
        return (0);
}

/*
 * Set vnode attributes to VNOVAL
 */
void
vattr_null(struct vattr *vap)
{

        memset(vap, 0, sizeof(*vap));

        vap->va_type = VNON;

        /*
         * Assign individually so that it is safe even if size and
         * sign of each member are varied.
         */
        vap->va_mode = VNOVAL;
        vap->va_nlink = VNOVAL;
        vap->va_uid = VNOVAL;
        vap->va_gid = VNOVAL;
        vap->va_fsid = VNOVAL;
        vap->va_fileid = VNOVAL;
        vap->va_size = VNOVAL;
        vap->va_blocksize = VNOVAL;
        vap->va_atime.tv_sec =
            vap->va_mtime.tv_sec =
            vap->va_ctime.tv_sec =
            vap->va_birthtime.tv_sec = VNOVAL;
        vap->va_atime.tv_nsec =
            vap->va_mtime.tv_nsec =
            vap->va_ctime.tv_nsec =
            vap->va_birthtime.tv_nsec = VNOVAL;
        vap->va_gen = VNOVAL;
        vap->va_flags = VNOVAL;
        vap->va_rdev = VNOVAL;
        vap->va_bytes = VNOVAL;
}

/*
 * Vnode state to string.
 */
const char *
vstate_name(enum vnode_state state)
{

        switch (state) {
        case VS_ACTIVE:
                return "ACTIVE";
        case VS_MARKER:
                return "MARKER";
        case VS_LOADING:
                return "LOADING";
        case VS_LOADED:
                return "LOADED";
        case VS_BLOCKED:
                return "BLOCKED";
        case VS_RECLAIMING:
                return "RECLAIMING";
        case VS_RECLAIMED:
                return "RECLAIMED";
        default:
                return "ILLEGAL";
        }
}

/*
 * Print a description of a vnode (common part).
 */
static void
vprint_common(struct vnode *vp, const char *prefix,
    void (*pr)(const char *, ...) __printflike(1, 2))
{
        int n;
        char bf[96];
        const uint8_t *cp;
        vnode_impl_t *vip;
        const char * const vnode_tags[] = { VNODE_TAGS };
        const char * const vnode_types[] = { VNODE_TYPES };
        const char vnode_flagbits[] = VNODE_FLAGBITS;

#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
#define ARRAY_PRINT(idx, arr) \
    ((unsigned int)(idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")

        vip = VNODE_TO_VIMPL(vp);

        snprintb(bf, sizeof(bf),
            vnode_flagbits, vp->v_iflag | vp->v_vflag | vp->v_uflag);

        (*pr)("vnode %p flags %s\n", vp, bf);
        (*pr)("%stag %s(%d) type %s(%d) mount %p typedata %p\n", prefix,
            ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
            ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
            vp->v_mount, vp->v_mountedhere);
        (*pr)("%susecount %d writecount %d holdcount %d\n", prefix,
            vrefcnt(vp), vp->v_writecount, vp->v_holdcnt);
        (*pr)("%ssize %" PRIx64 " writesize %" PRIx64 " numoutput %d\n",
            prefix, vp->v_size, vp->v_writesize, vp->v_numoutput);
        (*pr)("%sdata %p lock %p\n", prefix, vp->v_data, &vip->vi_lock);

        (*pr)("%sstate %s key(%p %zd)", prefix, vstate_name(vip->vi_state),
            vip->vi_key.vk_mount, vip->vi_key.vk_key_len);
        n = vip->vi_key.vk_key_len;
        cp = vip->vi_key.vk_key;
        while (n-- > 0)
                (*pr)(" %02x", *cp++);
        (*pr)("\n");
        (*pr)("%slrulisthd %p\n", prefix, vip->vi_lrulisthd);

#undef ARRAY_PRINT
#undef ARRAY_SIZE
}

/*
 * Print out a description of a vnode.
 */
void
vprint(const char *label, struct vnode *vp)
{

        if (label != NULL)
                printf("%s: ", label);
        vprint_common(vp, "\t", printf);
        if (vp->v_data != NULL) {
                printf("\t");
                VOP_PRINT(vp);
        }
}

/*
 * Given a file system name, look up the vfsops for that
 * file system, or return NULL if file system isn't present
 * in the kernel.
 */
struct vfsops *
vfs_getopsbyname(const char *name)
{
        struct vfsops *v;

        mutex_enter(&vfs_list_lock);
        LIST_FOREACH(v, &vfs_list, vfs_list) {
                if (strcmp(v->vfs_name, name) == 0)
                        break;
        }
        if (v != NULL)
                v->vfs_refcount++;
        mutex_exit(&vfs_list_lock);

        return (v);
}

void
copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
{
        const struct statvfs *mbp;

        if (sbp == (mbp = &mp->mnt_stat))
                return;

        (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
        sbp->f_fsid = mbp->f_fsid;
        sbp->f_owner = mbp->f_owner;
        sbp->f_flag = mbp->f_flag;
        sbp->f_syncwrites = mbp->f_syncwrites;
        sbp->f_asyncwrites = mbp->f_asyncwrites;
        sbp->f_syncreads = mbp->f_syncreads;
        sbp->f_asyncreads = mbp->f_asyncreads;
        (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
        (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
            sizeof(sbp->f_fstypename));
        (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
            sizeof(sbp->f_mntonname));
        (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
            sizeof(sbp->f_mntfromname));
        (void)memcpy(sbp->f_mntfromlabel, mp->mnt_stat.f_mntfromlabel,
            sizeof(sbp->f_mntfromlabel));
        sbp->f_namemax = mbp->f_namemax;
}

int
set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
    const char *vfsname, struct mount *mp, struct lwp *l)
{
        int error;
        size_t size;
        struct statvfs *sfs = &mp->mnt_stat;
        int (*fun)(const void *, void *, size_t, size_t *);

        (void)strlcpy(mp->mnt_stat.f_fstypename, vfsname,
            sizeof(mp->mnt_stat.f_fstypename));

        if (onp) {
                struct cwdinfo *cwdi = l->l_proc->p_cwdi;
                fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
                if (cwdi->cwdi_rdir != NULL) {
                        size_t len;
                        char *bp;
                        char *path = PNBUF_GET();

                        bp = path + MAXPATHLEN;
                        *--bp = '\0';
                        rw_enter(&cwdi->cwdi_lock, RW_READER);
                        error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
                            path, MAXPATHLEN / 2, 0, l);
                        rw_exit(&cwdi->cwdi_lock);
                        if (error) {
                                PNBUF_PUT(path);
                                return error;
                        }

                        len = strlen(bp);
                        if (len > sizeof(sfs->f_mntonname) - 1)
                                len = sizeof(sfs->f_mntonname) - 1;
                        (void)strncpy(sfs->f_mntonname, bp, len);
                        PNBUF_PUT(path);

                        if (len < sizeof(sfs->f_mntonname) - 1) {
                                error = (*fun)(onp, &sfs->f_mntonname[len],
                                    sizeof(sfs->f_mntonname) - len - 1, &size);
                                if (error)
                                        return error;
                                size += len;
                        } else {
                                size = len;
                        }
                } else {
                        error = (*fun)(onp, &sfs->f_mntonname,
                            sizeof(sfs->f_mntonname) - 1, &size);
                        if (error)
                                return error;
                }
                (void)memset(sfs->f_mntonname + size, 0,
                    sizeof(sfs->f_mntonname) - size);
        }

        if (fromp) {
                fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
                error = (*fun)(fromp, sfs->f_mntfromname,
                    sizeof(sfs->f_mntfromname) - 1, &size);
                if (error)
                        return error;
                (void)memset(sfs->f_mntfromname + size, 0,
                    sizeof(sfs->f_mntfromname) - size);
        }
        return 0;
}

/*
 * Knob to control the precision of file timestamps:
 *
 *   0 = seconds only; nanoseconds zeroed.
 *   1 = seconds and nanoseconds, accurate within 1/HZ.
 *   2 = seconds and nanoseconds, truncated to microseconds.
 * >=3 = seconds and nanoseconds, maximum precision.
 */
enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC };

int vfs_timestamp_precision __read_mostly = TSP_NSEC;

void
vfs_timestamp(struct timespec *tsp)
{
        struct timeval tv;

        switch (vfs_timestamp_precision) {
        case TSP_SEC:
                tsp->tv_sec = time_second;
                tsp->tv_nsec = 0;
                break;
        case TSP_HZ:
                getnanotime(tsp);
                break;
        case TSP_USEC:
                microtime(&tv);
                TIMEVAL_TO_TIMESPEC(&tv, tsp);
                break;
        case TSP_NSEC:
        default:
                nanotime(tsp);
                break;
        }
}

/*
 * The purpose of this routine is to remove granularity from accmode_t,
 * reducing it into standard unix access bits - VEXEC, VREAD, VWRITE,
 * VADMIN and VAPPEND.
 *
 * If it returns 0, the caller is supposed to continue with the usual
 * access checks using 'accmode' as modified by this routine.  If it
 * returns nonzero value, the caller is supposed to return that value
 * as errno.
 *
 * Note that after this routine runs, accmode may be zero.
 */
int
vfs_unixify_accmode(accmode_t *accmode)
{
        /*
         * There is no way to specify explicit "deny" rule using
         * file mode or POSIX.1e ACLs.
         */
        if (*accmode & VEXPLICIT_DENY) {
                *accmode = 0;
                return (0);
        }

        /*
         * None of these can be translated into usual access bits.
         * Also, the common case for NFSv4 ACLs is to not contain
         * either of these bits. Caller should check for VWRITE
         * on the containing directory instead.
         */
        if (*accmode & (VDELETE_CHILD | VDELETE))
                return (EPERM);

        if (*accmode & VADMIN_PERMS) {
                *accmode &= ~VADMIN_PERMS;
                *accmode |= VADMIN;
        }

        /*
         * There is no way to deny VREAD_ATTRIBUTES, VREAD_ACL
         * or VSYNCHRONIZE using file mode or POSIX.1e ACL.
         */
        *accmode &= ~(VSTAT_PERMS | VSYNCHRONIZE);

        return (0);
}

time_t        rootfstime;                        /* recorded root fs time, if known */
void
setrootfstime(time_t t)
{
        rootfstime = t;
}

static const uint8_t vttodt_tab[ ] = {
        [VNON]        =        DT_UNKNOWN,
        [VREG]        =        DT_REG,
        [VDIR]        =        DT_DIR,
        [VBLK]        =        DT_BLK,
        [VCHR]        =        DT_CHR,
        [VLNK]        =        DT_LNK,
        [VSOCK]        =        DT_SOCK,
        [VFIFO]        =        DT_FIFO,
        [VBAD]        =        DT_UNKNOWN
};

uint8_t
vtype2dt(enum vtype vt)
{

        CTASSERT(VBAD == __arraycount(vttodt_tab) - 1);
        return vttodt_tab[vt];
}

int
VFS_MOUNT(struct mount *mp, const char *a, void *b, size_t *c)
{
        int error;

        KERNEL_LOCK(1, NULL);
        error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c);
        KERNEL_UNLOCK_ONE(NULL);

        return error;
}
        
int
VFS_START(struct mount *mp, int a)
{
        int error;

        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                KERNEL_LOCK(1, NULL);
        }
        error = (*(mp->mnt_op->vfs_start))(mp, a);
        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                KERNEL_UNLOCK_ONE(NULL);
        }

        return error;
}
        
int
VFS_UNMOUNT(struct mount *mp, int a)
{
        int error;

        KERNEL_LOCK(1, NULL);
        error = (*(mp->mnt_op->vfs_unmount))(mp, a);
        KERNEL_UNLOCK_ONE(NULL);

        return error;
}

int
VFS_ROOT(struct mount *mp, int lktype, struct vnode **a)
{
        int error;

        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                KERNEL_LOCK(1, NULL);
        }
        error = (*(mp->mnt_op->vfs_root))(mp, lktype, a);
        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                KERNEL_UNLOCK_ONE(NULL);
        }

        return error;
}

int
VFS_QUOTACTL(struct mount *mp, struct quotactl_args *args)
{
        int error;

        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                KERNEL_LOCK(1, NULL);
        }
        error = (*(mp->mnt_op->vfs_quotactl))(mp, args);
        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                KERNEL_UNLOCK_ONE(NULL);
        }

        return error;
}

int
VFS_STATVFS(struct mount *mp, struct statvfs *a)
{
        int error;

        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                KERNEL_LOCK(1, NULL);
        }
        error = (*(mp->mnt_op->vfs_statvfs))(mp, a);
        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                KERNEL_UNLOCK_ONE(NULL);
        }

        return error;
}

int
VFS_SYNC(struct mount *mp, int a, struct kauth_cred *b)
{
        int error;

        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                KERNEL_LOCK(1, NULL);
        }
        error = (*(mp->mnt_op->vfs_sync))(mp, a, b);
        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                KERNEL_UNLOCK_ONE(NULL);
        }

        return error;
}

int
VFS_FHTOVP(struct mount *mp, struct fid *a, int b, struct vnode **c)
{
        int error;

        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                KERNEL_LOCK(1, NULL);
        }
        error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b, c);
        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                KERNEL_UNLOCK_ONE(NULL);
        }

        return error;
}

int
VFS_VPTOFH(struct vnode *vp, struct fid *a, size_t *b)
{
        int error;

        if ((vp->v_vflag & VV_MPSAFE) == 0) {
                KERNEL_LOCK(1, NULL);
        }
        error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b);
        if ((vp->v_vflag & VV_MPSAFE) == 0) {
                KERNEL_UNLOCK_ONE(NULL);
        }

        return error;
}

int
VFS_SNAPSHOT(struct mount *mp, struct vnode *a, struct timespec *b)
{
        int error;

        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                KERNEL_LOCK(1, NULL);
        }
        error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b);
        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                KERNEL_UNLOCK_ONE(NULL);
        }

        return error;
}

int
VFS_EXTATTRCTL(struct mount *mp, int a, struct vnode *b, int c, const char *d)
{
        int error;

        KERNEL_LOCK(1, NULL);                /* XXXSMP check ffs */
        error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d);
        KERNEL_UNLOCK_ONE(NULL);        /* XXX */

        return error;
}

int
VFS_SUSPENDCTL(struct mount *mp, int a)
{
        int error;

        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                KERNEL_LOCK(1, NULL);
        }
        error = (*(mp->mnt_op->vfs_suspendctl))(mp, a);
        if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
                KERNEL_UNLOCK_ONE(NULL);
        }

        return error;
}

#if defined(DDB) || defined(DEBUGPRINT)
static const char buf_flagbits[] = BUF_FLAGBITS;

void
vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
{
        char bf[1024];

        (*pr)("  vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%"
            PRIx64 " dev 0x%x\n",
            bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev);

        snprintb(bf, sizeof(bf),
            buf_flagbits, bp->b_flags | bp->b_oflags | bp->b_cflags);
        (*pr)("  error %d flags %s\n", bp->b_error, bf);

        (*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
                  bp->b_bufsize, bp->b_bcount, bp->b_resid);
        (*pr)("  data %p saveaddr %p\n",
                  bp->b_data, bp->b_saveaddr);
        (*pr)("  iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock);
}

void
vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...))
{

        uvm_object_printit(&vp->v_uobj, full, pr);
        (*pr)("\n");
        vprint_common(vp, "", pr);
        if (full) {
                struct buf *bp;

                (*pr)("clean bufs:\n");
                LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
                        (*pr)(" bp %p\n", bp);
                        vfs_buf_print(bp, full, pr);
                }

                (*pr)("dirty bufs:\n");
                LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
                        (*pr)(" bp %p\n", bp);
                        vfs_buf_print(bp, full, pr);
                }
        }
}

void
vfs_vnode_lock_print(void *vlock, int full, void (*pr)(const char *, ...))
{
        struct mount *mp;
        vnode_impl_t *vip;

        for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) {
                TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
                        if (&vip->vi_lock == vlock ||
                            VIMPL_TO_VNODE(vip)->v_interlock == vlock)
                                vfs_vnode_print(VIMPL_TO_VNODE(vip), full, pr);
                }
        }
}

void
vfs_mount_print_all(int full, void (*pr)(const char *, ...))
{
        struct mount *mp;
        for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp))
                vfs_mount_print(mp, full, pr);
}

void
vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...))
{
        char sbuf[256];

        (*pr)("vnodecovered = %p data = %p\n",
                        mp->mnt_vnodecovered, mp->mnt_data);

        (*pr)("fs_bshift %d dev_bshift = %d\n",
                        mp->mnt_fs_bshift, mp->mnt_dev_bshift);

        snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_flag);
        (*pr)("flag = %s\n", sbuf);

        snprintb(sbuf, sizeof(sbuf), __IMNT_FLAG_BITS, mp->mnt_iflag);
        (*pr)("iflag = %s\n", sbuf);

        (*pr)("refcnt = %d updating @ %p\n", mp->mnt_refcnt, mp->mnt_updating);

        (*pr)("statvfs cache:\n");
        (*pr)("\tbsize = %lu\n", mp->mnt_stat.f_bsize);
        (*pr)("\tfrsize = %lu\n", mp->mnt_stat.f_frsize);
        (*pr)("\tiosize = %lu\n", mp->mnt_stat.f_iosize);

        (*pr)("\tblocks = %"PRIu64"\n", mp->mnt_stat.f_blocks);
        (*pr)("\tbfree = %"PRIu64"\n", mp->mnt_stat.f_bfree);
        (*pr)("\tbavail = %"PRIu64"\n", mp->mnt_stat.f_bavail);
        (*pr)("\tbresvd = %"PRIu64"\n", mp->mnt_stat.f_bresvd);

        (*pr)("\tfiles = %"PRIu64"\n", mp->mnt_stat.f_files);
        (*pr)("\tffree = %"PRIu64"\n", mp->mnt_stat.f_ffree);
        (*pr)("\tfavail = %"PRIu64"\n", mp->mnt_stat.f_favail);
        (*pr)("\tfresvd = %"PRIu64"\n", mp->mnt_stat.f_fresvd);

        (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
                        mp->mnt_stat.f_fsidx.__fsid_val[0],
                        mp->mnt_stat.f_fsidx.__fsid_val[1]);

        (*pr)("\towner = %"PRIu32"\n", mp->mnt_stat.f_owner);
        (*pr)("\tnamemax = %lu\n", mp->mnt_stat.f_namemax);

        snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_stat.f_flag);

        (*pr)("\tflag = %s\n", sbuf);
        (*pr)("\tsyncwrites = %" PRIu64 "\n", mp->mnt_stat.f_syncwrites);
        (*pr)("\tasyncwrites = %" PRIu64 "\n", mp->mnt_stat.f_asyncwrites);
        (*pr)("\tsyncreads = %" PRIu64 "\n", mp->mnt_stat.f_syncreads);
        (*pr)("\tasyncreads = %" PRIu64 "\n", mp->mnt_stat.f_asyncreads);
        (*pr)("\tfstypename = %s\n", mp->mnt_stat.f_fstypename);
        (*pr)("\tmntonname = %s\n", mp->mnt_stat.f_mntonname);
        (*pr)("\tmntfromname = %s\n", mp->mnt_stat.f_mntfromname);

        {
                int cnt = 0;
                vnode_t *vp;
                vnode_impl_t *vip;
                (*pr)("locked vnodes =");
                TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
                        vp = VIMPL_TO_VNODE(vip);
                        if (VOP_ISLOCKED(vp)) {
                                if ((++cnt % 6) == 0) {
                                        (*pr)(" %p,\n\t", vp);
                                } else {
                                        (*pr)(" %p,", vp);
                                }
                        }
                }
                (*pr)("\n");
        }

        if (full) {
                int cnt = 0;
                vnode_t *vp;
                vnode_impl_t *vip;
                (*pr)("all vnodes =");
                TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
                        vp = VIMPL_TO_VNODE(vip);
                        if (!TAILQ_NEXT(vip, vi_mntvnodes)) {
                                (*pr)(" %p", vp);
                        } else if ((++cnt % 6) == 0) {
                                (*pr)(" %p,\n\t", vp);
                        } else {
                                (*pr)(" %p,", vp);
                        }
                }
                (*pr)("\n");
        }
}

/*
 * List all of the locked vnodes in the system.
 */
void printlockedvnodes(void);

void
printlockedvnodes(void)
{
        struct mount *mp;
        vnode_t *vp;
        vnode_impl_t *vip;

        printf("Locked vnodes\n");
        for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) {
                TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
                        vp = VIMPL_TO_VNODE(vip);
                        if (VOP_ISLOCKED(vp))
                                vprint(NULL, vp);
                }
        }
}

#endif /* DDB || DEBUGPRINT */















































































































































































































































































    1 
































    9 









    9 


    9 



    8 






    9 
    9 



    9 




    9 

























































    9 














   29 




    1 


    1 
    1 









   28 
    1 

   27 


   13 

    2 


    2 
    1 

    1 


















   10 





   14 

    1 













   13 




   27 
   28 








   11 

   10 

    9 
   11 
















   17 


   17 





   16 



   16 












    4 








    4 






















    4 
    5 

    5 







    4 



    4 

    2 
    2 





    1 
    1 



    2 




    3 



















    5 
    5 




    4 
    5 



















    2 

























































    1 
    1 














    2 
    2 

































   11 
   11 







    7 



    6 



    4 




    9 





    2 
    2 
    2 
    1 

    9 








    2 



























   48 


































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
/*        $NetBSD: raw_ip.c,v 1.181 2022/06/13 09:23:23 knakahara Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1988, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)raw_ip.c        8.7 (Berkeley) 5/15/95
 */

/*
 * Raw interface to IP protocol.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.181 2022/06/13 09:23:23 knakahara Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_ipsec.h"
#include "opt_mrouting.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/protosw.h>
#include <sys/socketvar.h>
#include <sys/errno.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kauth.h>

#include <net/if.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/ip_private.h>
#include <netinet/ip_mroute.h>
#include <netinet/ip_icmp.h>
#include <netinet/in_pcb.h>
#include <netinet/in_proto.h>
#include <netinet/in_var.h>

#ifdef IPSEC
#include <netipsec/ipsec.h>
#endif

struct inpcbtable rawcbtable;

int         rip_pcbnotify(struct inpcbtable *, struct in_addr,
    struct in_addr, int, int, void (*)(struct inpcb *, int));
static int         rip_connect_pcb(struct inpcb *, struct sockaddr_in *);
static void         rip_disconnect1(struct inpcb *);

static void sysctl_net_inet_raw_setup(struct sysctllog **);

/*
 * Nominal space allocated to a raw ip socket.
 */
#define        RIPSNDQ                8192
#define        RIPRCVQ                8192

static u_long                rip_sendspace = RIPSNDQ;
static u_long                rip_recvspace = RIPRCVQ;

/*
 * Raw interface to IP protocol.
 */

/*
 * Initialize raw connection block q.
 */
void
rip_init(void)
{

        sysctl_net_inet_raw_setup(NULL);
        in_pcbinit(&rawcbtable, 1, 1);
}

static void
rip_sbappendaddr(struct inpcb *last, struct ip *ip, const struct sockaddr *sa,
    int hlen, struct mbuf *n)
{
        struct mbuf *opts = NULL;

        if (last->inp_flags & INP_NOHEADER)
                m_adj(n, hlen);
        if (last->inp_flags & INP_CONTROLOPTS ||
            SOOPT_TIMESTAMP(last->inp_socket->so_options))
                ip_savecontrol(last, &opts, ip, n);
        if (sbappendaddr(&last->inp_socket->so_rcv, sa, n, opts) == 0) {
                soroverflow(last->inp_socket);
                m_freem(n);
                if (opts)
                        m_freem(opts);
        } else {
                sorwakeup(last->inp_socket);
        }
}

/*
 * Setup generic address and protocol structures
 * for raw_input routine, then pass them along with
 * mbuf chain.
 */
void
rip_input(struct mbuf *m, int off, int proto)
{
        struct ip *ip = mtod(m, struct ip *);
        struct inpcb_hdr *inph;
        struct inpcb *inp;
        struct inpcb *last = NULL;
        struct mbuf *n;
        struct sockaddr_in ripsrc;
        int hlen;

        sockaddr_in_init(&ripsrc, &ip->ip_src, 0);

        /*
         * XXX Compatibility: programs using raw IP expect ip_len
         * XXX to have the header length subtracted, and in host order.
         * XXX ip_off is also expected to be host order.
         */
        hlen = ip->ip_hl << 2;
        ip->ip_len = ntohs(ip->ip_len) - hlen;
        NTOHS(ip->ip_off);

        TAILQ_FOREACH(inph, &rawcbtable.inpt_queue, inph_queue) {
                inp = (struct inpcb *)inph;
                if (inp->inp_af != AF_INET)
                        continue;
                if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
                        continue;
                if (!in_nullhost(inp->inp_laddr) &&
                    !in_hosteq(inp->inp_laddr, ip->ip_dst))
                        continue;
                if (!in_nullhost(inp->inp_faddr) &&
                    !in_hosteq(inp->inp_faddr, ip->ip_src))
                        continue;

                if (last == NULL) {
                        ;
                }
#if defined(IPSEC)
                else if (ipsec_used && ipsec_in_reject(m, last)) {
                        /* do not inject data into pcb */
                }
#endif
                else if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) {
                        rip_sbappendaddr(last, ip, sintosa(&ripsrc), hlen, n);
                }

                last = inp;
        }

#if defined(IPSEC)
        if (ipsec_used && last != NULL && ipsec_in_reject(m, last)) {
                m_freem(m);
                IP_STATDEC(IP_STAT_DELIVERED);
                /* do not inject data into pcb */
        } else
#endif
        if (last != NULL) {
                rip_sbappendaddr(last, ip, sintosa(&ripsrc), hlen, m);
        } else if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
                uint64_t *ips;

                icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
                    0, 0);
                ips = IP_STAT_GETREF();
                ips[IP_STAT_NOPROTO]++;
                ips[IP_STAT_DELIVERED]--;
                IP_STAT_PUTREF();
        } else {
                m_freem(m);
        }

        return;
}

int
rip_pcbnotify(struct inpcbtable *table,
    struct in_addr faddr, struct in_addr laddr, int proto, int errno,
    void (*notify)(struct inpcb *, int))
{
        struct inpcb_hdr *inph;
        int nmatch;

        nmatch = 0;
        TAILQ_FOREACH(inph, &table->inpt_queue, inph_queue) {
                struct inpcb *inp = (struct inpcb *)inph;
                if (inp->inp_af != AF_INET)
                        continue;
                if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
                        continue;
                if (in_hosteq(inp->inp_faddr, faddr) &&
                    in_hosteq(inp->inp_laddr, laddr)) {
                        (*notify)(inp, errno);
                        nmatch++;
                }
        }

        return nmatch;
}

void *
rip_ctlinput(int cmd, const struct sockaddr *sa, void *v)
{
        struct ip *ip = v;
        void (*notify)(struct inpcb *, int) = in_rtchange;
        int errno;

        if (sa->sa_family != AF_INET ||
            sa->sa_len != sizeof(struct sockaddr_in))
                return NULL;
        if ((unsigned)cmd >= PRC_NCMDS)
                return NULL;
        errno = inetctlerrmap[cmd];
        if (PRC_IS_REDIRECT(cmd))
                notify = in_rtchange, ip = 0;
        else if (cmd == PRC_HOSTDEAD)
                ip = 0;
        else if (errno == 0)
                return NULL;
        if (ip) {
                rip_pcbnotify(&rawcbtable, satocsin(sa)->sin_addr,
                    ip->ip_src, ip->ip_p, errno, notify);

                /* XXX mapped address case */
        } else
                in_pcbnotifyall(&rawcbtable, satocsin(sa)->sin_addr, errno,
                    notify);
        return NULL;
}

/*
 * Generate IP header and pass packet to ip_output.
 * Tack on options user may have setup with control call.
 */
int
rip_output(struct mbuf *m, struct inpcb *inp, struct mbuf *control,
    struct lwp *l)
{
        struct ip *ip;
        struct mbuf *opts;
        struct ip_pktopts pktopts;
        kauth_cred_t cred;
        int error, flags;

        flags = (inp->inp_socket->so_options & SO_DONTROUTE) |
            IP_ALLOWBROADCAST | IP_RETURNMTU;

        if (l == NULL)
                cred = NULL;
        else
                cred = l->l_cred;

        /* Setup IP outgoing packet options */
        memset(&pktopts, 0, sizeof(pktopts));
        error = ip_setpktopts(control, &pktopts, &flags, inp, cred);
        if (control != NULL)
                m_freem(control);
        if (error != 0)
                goto release;

        /*
         * If the user handed us a complete IP packet, use it.
         * Otherwise, allocate an mbuf for a header and fill it in.
         */
        if ((inp->inp_flags & INP_HDRINCL) == 0) {
                if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
                        error = EMSGSIZE;
                        goto release;
                }
                M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
                if (!m) {
                        error = ENOBUFS;
                        goto release;
                }
                ip = mtod(m, struct ip *);
                ip->ip_tos = 0;
                ip->ip_off = htons(0);
                ip->ip_p = inp->inp_ip.ip_p;
                ip->ip_len = htons(m->m_pkthdr.len);
                ip->ip_src = pktopts.ippo_laddr.sin_addr;
                ip->ip_dst = inp->inp_faddr;
                ip->ip_ttl = MAXTTL;
                opts = inp->inp_options;
        } else {
                if (m->m_pkthdr.len > IP_MAXPACKET) {
                        error = EMSGSIZE;
                        goto release;
                }
                if (m->m_pkthdr.len < sizeof(struct ip)) {
                        error = EINVAL;
                        goto release;
                }
                ip = mtod(m, struct ip *);

                /*
                 * If the mbuf is read-only, we need to allocate
                 * a new mbuf for the header, since we need to
                 * modify the header.
                 */
                if (M_READONLY(m)) {
                        int hlen = ip->ip_hl << 2;

                        m = m_copyup(m, hlen, (max_linkhdr + 3) & ~3);
                        if (m == NULL) {
                                error = ENOMEM;
                                goto release;
                        }
                        ip = mtod(m, struct ip *);
                }

                /* XXX userland passes ip_len and ip_off in host order */
                if (m->m_pkthdr.len != ip->ip_len) {
                        error = EINVAL;
                        goto release;
                }
                HTONS(ip->ip_len);
                HTONS(ip->ip_off);

                if (ip->ip_id != 0 || m->m_pkthdr.len < IP_MINFRAGSIZE)
                        flags |= IP_NOIPNEWID;
                opts = NULL;

                /* Prevent ip_output from overwriting header fields. */
                flags |= IP_RAWOUTPUT;

                IP_STATINC(IP_STAT_RAWOUT);
        }

        /*
         * IP output.  Note: if IP_RETURNMTU flag is set, the MTU size
         * will be stored in inp_errormtu.
         */
        return ip_output(m, opts, &inp->inp_route, flags, pktopts.ippo_imo,
            inp);

 release:
        if (m != NULL)
                m_freem(m);
        return error;
}

/*
 * Raw IP socket option processing.
 */
int
rip_ctloutput(int op, struct socket *so, struct sockopt *sopt)
{
        struct inpcb *inp = sotoinpcb(so);
        int error = 0;
        int optval;

        if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_NOHEADER) {
                if (op == PRCO_GETOPT) {
                        optval = (inp->inp_flags & INP_NOHEADER) ? 1 : 0;
                        error = sockopt_set(sopt, &optval, sizeof(optval));
                } else if (op == PRCO_SETOPT) {
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                goto out;
                        if (optval) {
                                inp->inp_flags &= ~INP_HDRINCL;
                                inp->inp_flags |= INP_NOHEADER;
                        } else
                                inp->inp_flags &= ~INP_NOHEADER;
                }
                goto out;
        } else if (sopt->sopt_level != IPPROTO_IP)
                return ip_ctloutput(op, so, sopt);

        switch (op) {

        case PRCO_SETOPT:
                switch (sopt->sopt_name) {
                case IP_HDRINCL:
                        error = sockopt_getint(sopt, &optval);
                        if (error)
                                break;
                        if (optval)
                                inp->inp_flags |= INP_HDRINCL;
                        else
                                inp->inp_flags &= ~INP_HDRINCL;
                        break;

#ifdef MROUTING
                case MRT_INIT:
                case MRT_DONE:
                case MRT_ADD_VIF:
                case MRT_DEL_VIF:
                case MRT_ADD_MFC:
                case MRT_DEL_MFC:
                case MRT_ASSERT:
                case MRT_API_CONFIG:
                case MRT_ADD_BW_UPCALL:
                case MRT_DEL_BW_UPCALL:
                        error = ip_mrouter_set(so, sopt);
                        break;
#endif

                default:
                        error = ip_ctloutput(op, so, sopt);
                        break;
                }
                break;

        case PRCO_GETOPT:
                switch (sopt->sopt_name) {
                case IP_HDRINCL:
                        optval = inp->inp_flags & INP_HDRINCL;
                        error = sockopt_set(sopt, &optval, sizeof(optval));
                        break;

#ifdef MROUTING
                case MRT_VERSION:
                case MRT_ASSERT:
                case MRT_API_SUPPORT:
                case MRT_API_CONFIG:
                        error = ip_mrouter_get(so, sopt);
                        break;
#endif

                default:
                        error = ip_ctloutput(op, so, sopt);
                        break;
                }
                break;
        }
 out:
        return error;
}

int
rip_connect_pcb(struct inpcb *inp, struct sockaddr_in *addr)
{

        if (IFNET_READER_EMPTY())
                return (EADDRNOTAVAIL);
        if (addr->sin_family != AF_INET)
                return (EAFNOSUPPORT);
        if (addr->sin_len != sizeof(*addr))
                return EINVAL;
        inp->inp_faddr = addr->sin_addr;
        return (0);
}

static void
rip_disconnect1(struct inpcb *inp)
{

        inp->inp_faddr = zeroin_addr;
}

static int
rip_attach(struct socket *so, int proto)
{
        struct inpcb *inp;
        int error;

        KASSERT(sotoinpcb(so) == NULL);
        sosetlock(so);

        if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
                error = soreserve(so, rip_sendspace, rip_recvspace);
                if (error) {
                        return error;
                }
        }

        error = in_pcballoc(so, &rawcbtable);
        if (error) {
                return error;
        }
        inp = sotoinpcb(so);
        inp->inp_ip.ip_p = proto;
        KASSERT(solocked(so));

        return 0;
}

static void
rip_detach(struct socket *so)
{
        struct inpcb *inp;

        KASSERT(solocked(so));
        inp = sotoinpcb(so);
        KASSERT(inp != NULL);

#ifdef MROUTING
        extern struct socket *ip_mrouter;
        if (so == ip_mrouter) {
                ip_mrouter_done();
        }
#endif
        in_pcbdetach(inp);
}

static int
rip_accept(struct socket *so, struct sockaddr *nam)
{
        KASSERT(solocked(so));

        panic("rip_accept");

        return EOPNOTSUPP;
}

static int
rip_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct inpcb *inp = sotoinpcb(so);
        struct sockaddr_in *addr = (struct sockaddr_in *)nam;
        int error = 0;
        int s, ss;
        struct ifaddr *ifa;

        KASSERT(solocked(so));
        KASSERT(inp != NULL);
        KASSERT(nam != NULL);

        if (addr->sin_len != sizeof(*addr))
                return EINVAL;

        s = splsoftnet();
        if (IFNET_READER_EMPTY()) {
                error = EADDRNOTAVAIL;
                goto release;
        }
        if (addr->sin_family != AF_INET) {
                error = EAFNOSUPPORT;
                goto release;
        }
        ss = pserialize_read_enter();
        if ((ifa = ifa_ifwithaddr(sintosa(addr))) == NULL &&
            (inp->inp_flags & INP_BINDANY) == 0 &&
            !in_nullhost(addr->sin_addr))
        {
                pserialize_read_exit(ss);
                error = EADDRNOTAVAIL;
                goto release;
        }
        if (ifa && (ifatoia(ifa))->ia4_flags & IN6_IFF_DUPLICATED) {
                pserialize_read_exit(ss);
                error = EADDRNOTAVAIL;
                goto release;
        }
        pserialize_read_exit(ss);

        inp->inp_laddr = addr->sin_addr;

release:
        splx(s);
        return error;
}

static int
rip_listen(struct socket *so, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
rip_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
        struct inpcb *inp = sotoinpcb(so);
        int error = 0;
        int s;

        KASSERT(solocked(so));
        KASSERT(inp != NULL);
        KASSERT(nam != NULL);

        s = splsoftnet();
        error = rip_connect_pcb(inp, (struct sockaddr_in *)nam);
        if (! error)
                soisconnected(so);
        splx(s);

        return error;
}

static int
rip_connect2(struct socket *so, struct socket *so2)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
rip_disconnect(struct socket *so)
{
        struct inpcb *inp = sotoinpcb(so);
        int s;

        KASSERT(solocked(so));
        KASSERT(inp != NULL);

        s = splsoftnet();
        soisdisconnected(so);
        rip_disconnect1(inp);
        splx(s);

        return 0;
}

static int
rip_shutdown(struct socket *so)
{
        int s;

        KASSERT(solocked(so));

        /*
         * Mark the connection as being incapable of further input.
         */
        s = splsoftnet();
        socantsendmore(so);
        splx(s);

        return 0;
}

static int
rip_abort(struct socket *so)
{
        KASSERT(solocked(so));

        panic("rip_abort");

        return EOPNOTSUPP;
}

static int
rip_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
{
        return in_control(so, cmd, nam, ifp);
}

static int
rip_stat(struct socket *so, struct stat *ub)
{
        KASSERT(solocked(so));

        /* stat: don't bother with a blocksize. */
        return 0;
}

static int
rip_peeraddr(struct socket *so, struct sockaddr *nam)
{
        int s;

        KASSERT(solocked(so));
        KASSERT(sotoinpcb(so) != NULL);
        KASSERT(nam != NULL);

        s = splsoftnet();
        in_setpeeraddr(sotoinpcb(so), (struct sockaddr_in *)nam);
        splx(s);

        return 0;
}

static int
rip_sockaddr(struct socket *so, struct sockaddr *nam)
{
        int s;

        KASSERT(solocked(so));
        KASSERT(sotoinpcb(so) != NULL);
        KASSERT(nam != NULL);

        s = splsoftnet();
        in_setsockaddr(sotoinpcb(so), (struct sockaddr_in *)nam);
        splx(s);

        return 0;
}

static int
rip_rcvd(struct socket *so, int flags, struct lwp *l)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
rip_recvoob(struct socket *so, struct mbuf *m, int flags)
{
        KASSERT(solocked(so));

        return EOPNOTSUPP;
}

static int
rip_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
    struct mbuf *control, struct lwp *l)
{
        struct inpcb *inp = sotoinpcb(so);
        int error = 0;
        int s;

        KASSERT(solocked(so));
        KASSERT(inp != NULL);
        KASSERT(m != NULL);

        /*
         * Ship a packet out.  The appropriate raw output
         * routine handles any massaging necessary.
         */
        s = splsoftnet();
        if (nam) {
                if ((so->so_state & SS_ISCONNECTED) != 0) {
                        error = EISCONN;
                        goto die;
                }
                error = rip_connect_pcb(inp, (struct sockaddr_in *)nam);
                if (error)
                        goto die;
        } else {
                if ((so->so_state & SS_ISCONNECTED) == 0) {
                        error = ENOTCONN;
                        goto die;
                }
        }
        error = rip_output(m, inp, control, l);
        m = NULL;
        control = NULL;
        if (nam)
                rip_disconnect1(inp);
 die:
        if (m != NULL)
                m_freem(m);
        if (control != NULL)
                m_freem(control);

        splx(s);
        return error;
}

static int
rip_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
{
        KASSERT(solocked(so));

        m_freem(m);
        m_freem(control);

        return EOPNOTSUPP;
}

static int
rip_purgeif(struct socket *so, struct ifnet *ifp)
{
        int s;

        s = splsoftnet();
        mutex_enter(softnet_lock);
        in_pcbpurgeif0(&rawcbtable, ifp);
#ifdef NET_MPSAFE
        mutex_exit(softnet_lock);
#endif
        in_purgeif(ifp);
#ifdef NET_MPSAFE
        mutex_enter(softnet_lock);
#endif
        in_pcbpurgeif(&rawcbtable, ifp);
        mutex_exit(softnet_lock);
        splx(s);

        return 0;
}

PR_WRAP_USRREQS(rip)
#define        rip_attach        rip_attach_wrapper
#define        rip_detach        rip_detach_wrapper
#define        rip_accept        rip_accept_wrapper
#define        rip_bind        rip_bind_wrapper
#define        rip_listen        rip_listen_wrapper
#define        rip_connect        rip_connect_wrapper
#define        rip_connect2        rip_connect2_wrapper
#define        rip_disconnect        rip_disconnect_wrapper
#define        rip_shutdown        rip_shutdown_wrapper
#define        rip_abort        rip_abort_wrapper
#define        rip_ioctl        rip_ioctl_wrapper
#define        rip_stat        rip_stat_wrapper
#define        rip_peeraddr        rip_peeraddr_wrapper
#define        rip_sockaddr        rip_sockaddr_wrapper
#define        rip_rcvd        rip_rcvd_wrapper
#define        rip_recvoob        rip_recvoob_wrapper
#define        rip_send        rip_send_wrapper
#define        rip_sendoob        rip_sendoob_wrapper
#define        rip_purgeif        rip_purgeif_wrapper

const struct pr_usrreqs rip_usrreqs = {
        .pr_attach        = rip_attach,
        .pr_detach        = rip_detach,
        .pr_accept        = rip_accept,
        .pr_bind        = rip_bind,
        .pr_listen        = rip_listen,
        .pr_connect        = rip_connect,
        .pr_connect2        = rip_connect2,
        .pr_disconnect        = rip_disconnect,
        .pr_shutdown        = rip_shutdown,
        .pr_abort        = rip_abort,
        .pr_ioctl        = rip_ioctl,
        .pr_stat        = rip_stat,
        .pr_peeraddr        = rip_peeraddr,
        .pr_sockaddr        = rip_sockaddr,
        .pr_rcvd        = rip_rcvd,
        .pr_recvoob        = rip_recvoob,
        .pr_send        = rip_send,
        .pr_sendoob        = rip_sendoob,
        .pr_purgeif        = rip_purgeif,
};

static void
sysctl_net_inet_raw_setup(struct sysctllog **clog)
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "inet", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET, CTL_EOL);
        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "raw",
                       SYSCTL_DESCR("Raw IPv4 settings"),
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_INET, IPPROTO_RAW, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_STRUCT, "pcblist",
                       SYSCTL_DESCR("Raw IPv4 control block list"),
                       sysctl_inpcblist, 0, &rawcbtable, 0,
                       CTL_NET, PF_INET, IPPROTO_RAW,
                       CTL_CREATE, CTL_EOL);
}








































































































































































































































































































































































































































































































    2 

    2 












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
/*        $NetBSD: uvideo.c,v 1.83 2022/07/01 01:06:51 riastradh Exp $        */

/*
 * Copyright (c) 2008 Patrick Mahoney
 * All rights reserved.
 *
 * This code was written by Patrick Mahoney (pat@polycrystal.org) as
 * part of Google Summer of Code 2008.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by the NetBSD
 *        Foundation, Inc. and its contributors.
 * 4. Neither the name of The NetBSD Foundation nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * USB video specs:
 *      http://www.usb.org/developers/devclass_docs/USB_Video_Class_1_1.zip
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvideo.c,v 1.83 2022/07/01 01:06:51 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#ifdef _MODULE
#include <sys/module.h>
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/uio.h>
#include <sys/file.h>
#include <sys/select.h>
#include <sys/proc.h>
#include <sys/conf.h>
#include <sys/vnode.h>
#include <sys/poll.h>
#include <sys/queue.h>        /* SLIST */
#include <sys/kthread.h>
#include <sys/bus.h>

#include <sys/videoio.h>
#include <dev/video_if.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usb_quirks.h>

#include <dev/usb/uvideoreg.h>

#define UVIDEO_NXFERS        3
#define UVIDEO_NFRAMES_MAX 80
#define PRI_UVIDEO        PRI_BIO

/* #define UVIDEO_DISABLE_MJPEG */

#ifdef UVIDEO_DEBUG
#define DPRINTF(x)        do { if (uvideodebug) printf x; } while (0)
#define DPRINTFN(n,x)        do { if (uvideodebug>(n)) printf x; } while (0)
int        uvideodebug = 20;
#else
#define DPRINTF(x)        __nothing
#define DPRINTFN(n,x)        __nothing
#endif

typedef enum {
        UVIDEO_STATE_CLOSED,
        UVIDEO_STATE_OPENING,
        UVIDEO_STATE_IDLE
} uvideo_state;

struct uvideo_camera_terminal {
        uint16_t        ct_objective_focal_min;
        uint16_t        ct_objective_focal_max;
        uint16_t        ct_ocular_focal_length;
};

struct uvideo_processing_unit {
        uint16_t        pu_max_multiplier; /* digital zoom */
        uint8_t                pu_video_standards;
};

struct uvideo_extension_unit {
        guid_t                xu_guid;
};

/*
 * For simplicity, we consider a Terminal a special case of Unit
 * rather than a separate entity.
 */
struct uvideo_unit {
        uint8_t                vu_id;
        uint8_t                vu_type;
        uint8_t                vu_dst_id;
        uint8_t                vu_nsrcs;
        union {
                uint8_t        vu_src_id;        /* vu_nsrcs = 1 */
                uint8_t        *vu_src_id_ary; /* vu_nsrcs > 1 */
        } s;

        /* fields for individual unit/terminal types */
        union {
                struct uvideo_camera_terminal        vu_camera;
                struct uvideo_processing_unit        vu_processing;
                struct uvideo_extension_unit        vu_extension;
        } u;

        /* Used by camera terminal, processing and extension units. */
        uint8_t                vu_control_size; /* number of bytes in vu_controls */
        uint8_t                *vu_controls;         /* array of bytes. bits are
                                          * numbered from 0 at least
                                          * significant bit to
                                          * (8*vu_control_size - 1)*/
};

struct uvideo_alternate {
        uint8_t                altno;
        uint8_t                interval;
        uint16_t        max_packet_size;
        SLIST_ENTRY(uvideo_alternate)        entries;
};
SLIST_HEAD(altlist, uvideo_alternate);

#define UVIDEO_FORMAT_GET_FORMAT_INDEX(fmt)        \
        ((fmt)->format.priv & 0xff)
#define UVIDEO_FORMAT_GET_FRAME_INDEX(fmt)        \
        (((fmt)->format.priv >> 8) & 0xff)
/* TODO: find a better way to set bytes within this 32 bit value? */
#define UVIDEO_FORMAT_SET_FORMAT_INDEX(fmt, index) do {        \
                (fmt)->format.priv &= ~0xff;                \
                (fmt)->format.priv |= ((index) & 0xff);        \
        } while (0)
#define UVIDEO_FORMAT_SET_FRAME_INDEX(fmt, index) do {                        \
                (fmt)->format.priv &= ~(0xff << 8);                        \
                ((fmt)->format.priv |= (((index) & 0xff) << 8));        \
        } while (0)

struct uvideo_pixel_format {
        enum video_pixel_format        pixel_format;
        SIMPLEQ_ENTRY(uvideo_pixel_format) entries;
};
SIMPLEQ_HEAD(uvideo_pixel_format_list, uvideo_pixel_format);

struct uvideo_format {
        struct video_format        format;
        SIMPLEQ_ENTRY(uvideo_format) entries;
};
SIMPLEQ_HEAD(uvideo_format_list, uvideo_format);

struct uvideo_isoc_xfer;
struct uvideo_stream;

struct uvideo_isoc {
        struct uvideo_isoc_xfer        *i_ix;
        struct uvideo_stream        *i_vs;
        struct usbd_xfer        *i_xfer;
        uint8_t                        *i_buf;
        uint16_t                *i_frlengths;
};

struct uvideo_isoc_xfer {
        uint8_t                        ix_endpt;
        struct usbd_pipe        *ix_pipe;
        struct uvideo_isoc        ix_i[UVIDEO_NXFERS];
        uint32_t                ix_nframes;
        uint32_t                ix_uframe_len;

        struct altlist                ix_altlist;
};

struct uvideo_bulk_xfer {
        uint8_t                        bx_endpt;
        struct usbd_pipe        *bx_pipe;
        struct usbd_xfer        *bx_xfer;
        uint8_t                        *bx_buffer;
        int                        bx_buflen;
        bool                        bx_running;
        kcondvar_t                bx_cv;
        kmutex_t                bx_lock;
};

struct uvideo_stream {
        device_t                vs_videodev;
        struct uvideo_softc        *vs_parent;
        struct usbd_interface        *vs_iface;
        uint8_t                        vs_ifaceno;
        uint8_t                        vs_subtype;  /* input or output */
        uint16_t                vs_probelen; /* length of probe and
                                              * commit data; varies
                                              * depending on version
                                              * of spec. */
        struct uvideo_format_list vs_formats;
        struct uvideo_pixel_format_list vs_pixel_formats;
        struct video_format        *vs_default_format;
        struct video_format        vs_current_format;

        /* usb transfer details */
        uint8_t                        vs_xfer_type;
        union {
                struct uvideo_bulk_xfer        bulk;
                struct uvideo_isoc_xfer isoc;
        } vs_xfer;

        int                        vs_frameno;        /* toggles between 0 and 1 */

        /* current video format */
        uint32_t                vs_max_payload_size;
        uint32_t                vs_frame_interval;
        SLIST_ENTRY(uvideo_stream) entries;

        uvideo_state                vs_state;
};
SLIST_HEAD(uvideo_stream_list, uvideo_stream);

struct uvideo_softc {
        device_t           sc_dev;                /* base device */
        struct usbd_device        *sc_udev;        /* device */
        struct usbd_interface        *sc_iface;        /* interface handle */
        int                     sc_ifaceno;        /* interface number */
        char                        *sc_devname;

        int                        sc_dying;

        uint8_t                        sc_nunits;
        struct uvideo_unit        **sc_unit;

        struct uvideo_stream_list sc_stream_list;

        char                        sc_businfo[32];
};

static int        uvideo_match(device_t, cfdata_t, void *);
static void        uvideo_attach(device_t, device_t, void *);
static int        uvideo_detach(device_t, int);
static void        uvideo_childdet(device_t, device_t);
static int        uvideo_activate(device_t, enum devact);

static int        uvideo_open(void *, int);
static void        uvideo_close(void *);
static const char * uvideo_get_devname(void *);
static const char * uvideo_get_businfo(void *);

static int        uvideo_enum_format(void *, uint32_t, struct video_format *);
static int        uvideo_get_format(void *, struct video_format *);
static int        uvideo_set_format(void *, struct video_format *);
static int        uvideo_try_format(void *, struct video_format *);
static int        uvideo_get_framerate(void *, struct video_fract *);
static int        uvideo_set_framerate(void *, struct video_fract *);
static int        uvideo_start_transfer(void *);
static int        uvideo_stop_transfer(void *);

static int        uvideo_get_control_group(void *,
                                         struct video_control_group *);
static int        uvideo_set_control_group(void *,
                                         const struct video_control_group *);

static usbd_status        uvideo_init_control(
        struct uvideo_softc *,
        const usb_interface_descriptor_t *,
        usbd_desc_iter_t *);
static usbd_status        uvideo_init_collection(
        struct uvideo_softc *,
        const usb_interface_descriptor_t *,
        usbd_desc_iter_t *);

/* Functions for unit & terminal descriptors */
static struct uvideo_unit *        uvideo_unit_alloc(const uvideo_descriptor_t *);
static usbd_status                uvideo_unit_init(struct uvideo_unit *,
                                                 const uvideo_descriptor_t *);
static void                        uvideo_unit_free(struct uvideo_unit *);
static void                        uvideo_unit_alloc_controls(struct uvideo_unit *,
                                                           uint8_t,
                                                           const uint8_t *);
static void                        uvideo_unit_free_controls(struct uvideo_unit *);
static void                        uvideo_unit_alloc_sources(struct uvideo_unit *,
                                                          uint8_t,
                                                          const uint8_t *);
static void                        uvideo_unit_free_sources(struct uvideo_unit *);




/*
 * Functions for uvideo_stream, primary unit associated with a video
 * driver or device file.
 */
static struct uvideo_stream *        uvideo_find_stream(struct uvideo_softc *,
                                                   uint8_t);
#if 0
static struct uvideo_format *        uvideo_stream_find_format(
        struct uvideo_stream *,
        uint8_t, uint8_t);
#endif
static struct uvideo_format *        uvideo_stream_guess_format(
        struct uvideo_stream *,
        enum video_pixel_format, uint32_t, uint32_t);
static struct uvideo_stream *        uvideo_stream_alloc(void);
static usbd_status                uvideo_stream_init(
        struct uvideo_stream *,
        struct uvideo_softc *,
        const usb_interface_descriptor_t *);
static usbd_status                uvideo_stream_init_desc(
        struct uvideo_stream *,
        const usb_interface_descriptor_t *,
        usbd_desc_iter_t *);
static usbd_status                uvideo_stream_init_frame_based_format(
        struct uvideo_stream *,
        const uvideo_descriptor_t *,
        usbd_desc_iter_t *);
static void                        uvideo_stream_free(struct uvideo_stream *);

static int                uvideo_stream_start_xfer(struct uvideo_stream *);
static int                uvideo_stream_stop_xfer(struct uvideo_stream *);
static usbd_status        uvideo_stream_recv_process(struct uvideo_stream *,
                                                   uint8_t *, uint32_t);
static usbd_status        uvideo_stream_recv_isoc_start(struct uvideo_stream *);
static usbd_status        uvideo_stream_recv_isoc_start1(struct uvideo_isoc *);
static void                uvideo_stream_recv_isoc_complete(struct usbd_xfer *,
                                                         void *,
                                                         usbd_status);
static void                uvideo_stream_recv_bulk_transfer(void *);

/* format probe and commit */
#define uvideo_stream_probe(vs, act, data)                                \
        (uvideo_stream_probe_and_commit((vs), (act),                        \
                                        UVIDEO_VS_PROBE_CONTROL, (data)))
#define uvideo_stream_commit(vs, act, data)                                \
        (uvideo_stream_probe_and_commit((vs), (act),                        \
                                        UVIDEO_VS_COMMIT_CONTROL, (data)))
static usbd_status        uvideo_stream_probe_and_commit(struct uvideo_stream *,
                                                       uint8_t, uint8_t,
                                                       void *);
static void                uvideo_init_probe_data(uvideo_probe_and_commit_data_t *);


static int        usb_guid_cmp(const usb_guid_t *, const guid_t *);


CFATTACH_DECL2_NEW(uvideo, sizeof(struct uvideo_softc),
    uvideo_match, uvideo_attach, uvideo_detach, uvideo_activate, NULL,
    uvideo_childdet);




static const struct video_hw_if uvideo_hw_if = {
        .open = uvideo_open,
        .close = uvideo_close,
        .get_devname = uvideo_get_devname,
        .get_businfo = uvideo_get_businfo,
        .enum_format = uvideo_enum_format,
        .get_format = uvideo_get_format,
        .set_format = uvideo_set_format,
        .try_format = uvideo_try_format,
        .get_framerate = uvideo_get_framerate,
        .set_framerate = uvideo_set_framerate,
        .start_transfer = uvideo_start_transfer,
        .stop_transfer = uvideo_stop_transfer,
        .control_iter_init = NULL,
        .control_iter_next = NULL,
        .get_control_desc_group = NULL,
        .get_control_group = uvideo_get_control_group,
        .set_control_group = uvideo_set_control_group,
};

#ifdef UVIDEO_DEBUG
/*
 * Some functions to print out descriptors.  Mostly useless other than
 * debugging/exploration purposes.
 */
static void usb_guid_print(const usb_guid_t *);
static void print_descriptor(const usb_descriptor_t *);
static void print_interface_descriptor(const usb_interface_descriptor_t *);
static void print_endpoint_descriptor(const usb_endpoint_descriptor_t *);

static void print_vc_descriptor(const usb_descriptor_t *);
static void print_vs_descriptor(const usb_descriptor_t *);

static void print_vc_header_descriptor(
        const uvideo_vc_header_descriptor_t *);
static void print_input_terminal_descriptor(
        const uvideo_input_terminal_descriptor_t *);
static void print_output_terminal_descriptor(
        const uvideo_output_terminal_descriptor_t *);
static void print_camera_terminal_descriptor(
        const uvideo_camera_terminal_descriptor_t *);
static void print_selector_unit_descriptor(
        const uvideo_selector_unit_descriptor_t *);
static void print_processing_unit_descriptor(
        const uvideo_processing_unit_descriptor_t *);
static void print_extension_unit_descriptor(
        const uvideo_extension_unit_descriptor_t *);
static void print_interrupt_endpoint_descriptor(
        const uvideo_vc_interrupt_endpoint_descriptor_t *);

static void print_vs_input_header_descriptor(
        const uvideo_vs_input_header_descriptor_t *);
static void print_vs_output_header_descriptor(
        const uvideo_vs_output_header_descriptor_t *);

static void print_vs_format_uncompressed_descriptor(
        const uvideo_vs_format_uncompressed_descriptor_t *);
static void print_vs_frame_uncompressed_descriptor(
        const uvideo_vs_frame_uncompressed_descriptor_t *);
static void print_vs_format_mjpeg_descriptor(
        const uvideo_vs_format_mjpeg_descriptor_t *);
static void print_vs_frame_mjpeg_descriptor(
        const uvideo_vs_frame_mjpeg_descriptor_t *);
static void print_vs_format_dv_descriptor(
        const uvideo_vs_format_dv_descriptor_t *);
#endif /* !UVIDEO_DEBUG */

#define GET(type, descp, field)                                                      \
        (KASSERT((descp)->bLength >= sizeof(type)),                              \
            ((const type *)(descp))->field)
#define GETP(type, descp, field)                                              \
        (KASSERT((descp)->bLength >= sizeof(type)),                              \
            &(((const type *)(descp))->field))

/*
 * Given a format descriptor and frame descriptor, copy values common
 * to all formats into a struct uvideo_format.
 */
#define UVIDEO_FORMAT_INIT_FRAME_BASED(format_type, format_desc,        \
                                       frame_type, frame_desc,                \
                                       format)                                \
        do {                                                                \
                UVIDEO_FORMAT_SET_FORMAT_INDEX(                                \
                        format,                                                \
                        GET(format_type, format_desc, bFormatIndex));        \
                UVIDEO_FORMAT_SET_FRAME_INDEX(                                \
                        format,                                                \
                        GET(frame_type, frame_desc, bFrameIndex));        \
                format->format.width =                                        \
                    UGETW(GET(frame_type, frame_desc, wWidth));                \
                format->format.height =                                        \
                    UGETW(GET(frame_type, frame_desc, wHeight));        \
                format->format.aspect_x =                                \
                    GET(format_type, format_desc, bAspectRatioX);        \
                format->format.aspect_y =                                \
                    GET(format_type, format_desc, bAspectRatioY);        \
        } while (0)


static int
uvideo_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;

        /*
         * TODO: May need to change in the future to work with
         * Interface Association Descriptor.
         */

        /* Trigger on the Video Control Interface which must be present */
        if (uiaa->uiaa_class == UICLASS_VIDEO &&
            uiaa->uiaa_subclass == UISUBCLASS_VIDEOCONTROL)
                return UMATCH_IFACECLASS_IFACESUBCLASS;

        return UMATCH_NONE;
}

static void
uvideo_attach(device_t parent, device_t self, void *aux)
{
        struct uvideo_softc *sc = device_private(self);
        struct usbif_attach_arg *uiaa = aux;
        usbd_desc_iter_t iter;
        const usb_interface_descriptor_t *ifdesc;
        struct uvideo_stream *vs;
        usbd_status err;

        sc->sc_dev = self;

        sc->sc_devname = usbd_devinfo_alloc(uiaa->uiaa_device, 0);

        aprint_naive("\n");
        aprint_normal(": %s\n", sc->sc_devname);

        sc->sc_udev = uiaa->uiaa_device;
        sc->sc_iface = uiaa->uiaa_iface;
        sc->sc_ifaceno = uiaa->uiaa_ifaceno;
        sc->sc_dying = 0;
        SLIST_INIT(&sc->sc_stream_list);
        snprintf(sc->sc_businfo, sizeof(sc->sc_businfo), "usb:%08x",
            sc->sc_udev->ud_cookie.cookie);

#ifdef UVIDEO_DEBUG
        /*
         * Debugging dump of descriptors. TODO: move this to userspace
         * via a custom IOCTL or something.
         */
        const usb_descriptor_t *desc;
        usb_desc_iter_init(sc->sc_udev, &iter);
        while ((desc = usb_desc_iter_next(&iter)) != NULL) {
                /* print out all descriptors */
                printf("uvideo_attach: ");
                print_descriptor(desc);
        }
#endif /* !UVIDEO_DEBUG */

        /* iterate through interface descriptors and initialize softc */
        usb_desc_iter_init(sc->sc_udev, &iter);
        while ((ifdesc = usb_desc_iter_next_interface(&iter)) != NULL) {
                KASSERT(ifdesc->bLength >= USB_INTERFACE_DESCRIPTOR_SIZE);
                if (ifdesc->bInterfaceClass != UICLASS_VIDEO) {
                        DPRINTFN(50, ("uvideo_attach: "
                                      "ignoring non-uvc interface: "
                                      "len=%d type=0x%02x "
                                      "class=0x%02x subclass=0x%02x\n",
                                      ifdesc->bLength,
                                      ifdesc->bDescriptorType,
                                      ifdesc->bInterfaceClass,
                                      ifdesc->bInterfaceSubClass));
                        continue;
                }

                switch (ifdesc->bInterfaceSubClass) {
                case UISUBCLASS_VIDEOCONTROL:
                        err = uvideo_init_control(sc, ifdesc, &iter);
                        if (err != USBD_NORMAL_COMPLETION) {
                                DPRINTF(("uvideo_attach: error with interface "
                                         "%d, VideoControl, "
                                         "descriptor len=%d type=0x%02x: "
                                         "%s (%d)\n",
                                         ifdesc->bInterfaceNumber,
                                         ifdesc->bLength,
                                         ifdesc->bDescriptorType,
                                         usbd_errstr(err), err));
                        }
                        break;
                case UISUBCLASS_VIDEOSTREAMING:
                        vs = uvideo_find_stream(sc, ifdesc->bInterfaceNumber);
                        if (vs == NULL) {
                                vs = uvideo_stream_alloc();
                                err = uvideo_stream_init(vs, sc, ifdesc);
                                if (err != USBD_NORMAL_COMPLETION) {
                                        DPRINTF(("uvideo_attach: "
                                                 "error initializing stream: "
                                                 "%s (%d)\n",
                                                 usbd_errstr(err), err));
                                        goto bad;
                                }
                        }
                        err = uvideo_stream_init_desc(vs, ifdesc, &iter);
                        if (err != USBD_NORMAL_COMPLETION) {
                                DPRINTF(("uvideo_attach: "
                                         "error initializing stream descriptor: "
                                         "%s (%d)\n",
                                         usbd_errstr(err), err));
                                goto bad;
                        }
                        break;
                case UISUBCLASS_VIDEOCOLLECTION:
                        err = uvideo_init_collection(sc, ifdesc, &iter);
                        if (err != USBD_NORMAL_COMPLETION) {
                                DPRINTF(("uvideo_attach: error with interface "
                                       "%d, VideoCollection, "
                                       "descriptor len=%d type=0x%02x: "
                                       "%s (%d)\n",
                                       ifdesc->bInterfaceNumber,
                                       ifdesc->bLength,
                                       ifdesc->bDescriptorType,
                                       usbd_errstr(err), err));
                                goto bad;
                        }
                        break;
                default:
                        DPRINTF(("uvideo_attach: unknown UICLASS_VIDEO "
                                 "subclass=0x%02x\n",
                                 ifdesc->bInterfaceSubClass));
                        break;
                }

        }


        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        SLIST_FOREACH(vs, &sc->sc_stream_list, entries) {
                /*
                 * If the descriptor is invalid, there may be no
                 * default format.
                 *
                 * XXX Maybe this should just be removed from the list
                 * at some other point, but finding the right other
                 * point is not trivial.
                 */
                if (vs->vs_default_format == NULL)
                        continue;
                /* XXX initialization of vs_videodev is racy */
                vs->vs_videodev = video_attach_mi(&uvideo_hw_if, sc->sc_dev,
                    vs);
        }

        return;

bad:
        if (err != USBD_NORMAL_COMPLETION) {
                DPRINTF(("uvideo_attach: error: %s (%d)\n",
                         usbd_errstr(err), err));
        }
        return;
}


static int
uvideo_activate(device_t self, enum devact act)
{
        struct uvideo_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                DPRINTF(("uvideo_activate: deactivating\n"));
                sc->sc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}


/* Detach child (video interface) */
static void
uvideo_childdet(device_t self, device_t child)
{
        struct uvideo_softc *sc = device_private(self);
        struct uvideo_stream *vs;

        SLIST_FOREACH(vs, &sc->sc_stream_list, entries) {
                if (child == vs->vs_videodev) {
                        vs->vs_videodev = NULL;
                        break;
                }
        }
        KASSERTMSG(vs != NULL, "unknown child of %s detached: %s @ %p",
            device_xname(self), device_xname(child), child);
}


static int
uvideo_detach(device_t self, int flags)
{
        struct uvideo_softc *sc = device_private(self);
        struct uvideo_stream *vs;
        int error;

        error = config_detach_children(self, flags);
        if (error)
                return error;

        sc->sc_dying = 1;

        pmf_device_deregister(self);

        /*
         * TODO: close the device if it is currently opened?  Or will
         * close be called automatically?
         */

        while (!SLIST_EMPTY(&sc->sc_stream_list)) {
                vs = SLIST_FIRST(&sc->sc_stream_list);
                SLIST_REMOVE_HEAD(&sc->sc_stream_list, entries);
                uvideo_stream_stop_xfer(vs);
                uvideo_stream_free(vs);
        }

#if 0
        /*
         * Wait for outstanding request to complete.  TODO: what is
         * appropriate here?
         */
        usbd_delay_ms(sc->sc_udev, 1000);
#endif

        DPRINTFN(15, ("uvideo: detaching from %s\n",
                device_xname(sc->sc_dev)));

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        usbd_devinfo_free(sc->sc_devname);

        return 0;
}

/*
 * Search the stream list for a stream matching the interface number.
 * This is an O(n) search, but most devices should have only one or at
 * most two streams.
 */
static struct uvideo_stream *
uvideo_find_stream(struct uvideo_softc *sc, uint8_t ifaceno)
{
        struct uvideo_stream *vs;

        SLIST_FOREACH(vs, &sc->sc_stream_list, entries) {
                if (vs->vs_ifaceno == ifaceno)
                        return vs;
        }

        return NULL;
}

/*
 * Search the format list for the given format and frame index.  This
 * might be improved through indexing, but the format and frame count
 * is unknown ahead of time (only after iterating through the
 * usb device descriptors).
 */
#if 0
static struct uvideo_format *
uvideo_stream_find_format(struct uvideo_stream *vs,
                          uint8_t format_index, uint8_t frame_index)
{
        struct uvideo_format *format;

        SIMPLEQ_FOREACH(format, &vs->vs_formats, entries) {
                if (UVIDEO_FORMAT_GET_FORMAT_INDEX(format) == format_index &&
                    UVIDEO_FORMAT_GET_FRAME_INDEX(format) == frame_index)
                        return format;
        }
        return NULL;
}
#endif

static struct uvideo_format *
uvideo_stream_guess_format(struct uvideo_stream *vs,
                           enum video_pixel_format pixel_format,
                           uint32_t width, uint32_t height)
{
        struct uvideo_format *format, *gformat = NULL;

        SIMPLEQ_FOREACH(format, &vs->vs_formats, entries) {
                if (format->format.pixel_format != pixel_format)
                        continue;
                if (format->format.width <= width &&
                    format->format.height <= height) {
                        if (gformat == NULL ||
                            (gformat->format.width < format->format.width &&
                             gformat->format.height < format->format.height))
                                gformat = format;
                }
        }

        return gformat;
}

static struct uvideo_stream *
uvideo_stream_alloc(void)
{
        return kmem_zalloc(sizeof(*uvideo_stream_alloc()), KM_SLEEP);
}


static usbd_status
uvideo_init_control(struct uvideo_softc *sc,
                    const usb_interface_descriptor_t *ifdesc,
                    usbd_desc_iter_t *iter)
{
        const usb_descriptor_t *desc;
        const uvideo_descriptor_t *uvdesc;
        usbd_desc_iter_t orig;
        uint8_t i, j, nunits;

        /* save original iterator state */
        memcpy(&orig, iter, sizeof(orig));

        /* count number of units and terminals */
        nunits = 0;
        while ((desc = usb_desc_iter_next_non_interface(iter)) != NULL) {
                if (desc->bDescriptorType != UDESC_CS_INTERFACE ||
                    desc->bLength < sizeof(*uvdesc))
                        continue;
                uvdesc = (const uvideo_descriptor_t *)desc;

                if (uvdesc->bDescriptorSubtype < UDESC_INPUT_TERMINAL ||
                    uvdesc->bDescriptorSubtype > UDESC_EXTENSION_UNIT)
                        continue;
                KASSERT(nunits < 255);
                ++nunits;
        }

        if (nunits == 0) {
                DPRINTF(("uvideo_init_control: no units\n"));
                return USBD_NORMAL_COMPLETION;
        }

        i = 0;

        /* allocate space for units */
        sc->sc_nunits = nunits;
        sc->sc_unit = kmem_alloc(sizeof(*sc->sc_unit) * nunits, KM_SLEEP);

        /* restore original iterator state */
        memcpy(iter, &orig, sizeof(orig));

        /* iterate again, initializing the units */
        while ((desc = usb_desc_iter_next_non_interface(iter)) != NULL) {
                if (desc->bDescriptorType != UDESC_CS_INTERFACE ||
                    desc->bLength < sizeof(*uvdesc))
                        continue;
                uvdesc = (const uvideo_descriptor_t *)desc;

                if (uvdesc->bDescriptorSubtype < UDESC_INPUT_TERMINAL ||
                    uvdesc->bDescriptorSubtype > UDESC_EXTENSION_UNIT)
                        continue;

                sc->sc_unit[i] = uvideo_unit_alloc(uvdesc);
                /* TODO: free other units before returning? */
                if (sc->sc_unit[i] == NULL)
                        goto enomem;
                KASSERT(i < 255);
                ++i;
        }

        return USBD_NORMAL_COMPLETION;

enomem:
        if (sc->sc_unit != NULL) {
                for (j = 0; j < i; ++j) {
                        uvideo_unit_free(sc->sc_unit[j]);
                        sc->sc_unit[j] = NULL;
                }
                kmem_free(sc->sc_unit, sizeof(*sc->sc_unit) * nunits);
                sc->sc_unit = NULL;
        }
        sc->sc_nunits = 0;

        return USBD_NOMEM;
}

static usbd_status
uvideo_init_collection(struct uvideo_softc *sc,
                       const usb_interface_descriptor_t *ifdesc,
                       usbd_desc_iter_t *iter)
{
        DPRINTF(("uvideo: ignoring Video Collection\n"));
        return USBD_NORMAL_COMPLETION;
}

/*
 * Allocates space for and initializes a uvideo unit based on the
 * given descriptor.  Returns NULL with bad descriptor or ENOMEM.
 */
static struct uvideo_unit *
uvideo_unit_alloc(const uvideo_descriptor_t *desc)
{
        struct uvideo_unit *vu;
        usbd_status err;

        KASSERT(desc->bDescriptorType == UDESC_CS_INTERFACE);

        vu = kmem_zalloc(sizeof(*vu), KM_SLEEP);
        err = uvideo_unit_init(vu, desc);
        if (err != USBD_NORMAL_COMPLETION) {
                DPRINTF(("uvideo_unit_alloc: error initializing unit: "
                         "%s (%d)\n", usbd_errstr(err), err));
                kmem_free(vu, sizeof(*vu));
                return NULL;
        }

        return vu;
}

static usbd_status
uvideo_unit_init(struct uvideo_unit *vu, const uvideo_descriptor_t *desc)
{
        struct uvideo_camera_terminal *ct;
        struct uvideo_processing_unit *pu;

        const uvideo_input_terminal_descriptor_t *input;
        const uvideo_camera_terminal_descriptor_t *camera;
        const uvideo_selector_unit_descriptor_t *selector;
        const uvideo_processing_unit_descriptor_t *processing;
        const uvideo_extension_unit_descriptor_t *extension;

        switch (desc->bDescriptorSubtype) {
        case UDESC_INPUT_TERMINAL:
                if (desc->bLength < sizeof(*input))
                        return USBD_INVAL;
                input = (const uvideo_input_terminal_descriptor_t *)desc;
                switch (UGETW(input->wTerminalType)) {
                case UVIDEO_ITT_CAMERA:
                        if (desc->bLength < sizeof(*camera))
                                return USBD_INVAL;
                        camera =
                            (const uvideo_camera_terminal_descriptor_t *)desc;

                        ct = &vu->u.vu_camera;
                        ct->ct_objective_focal_min =
                            UGETW(camera->wObjectiveFocalLengthMin);
                        ct->ct_objective_focal_max =
                            UGETW(camera->wObjectiveFocalLengthMax);
                        ct->ct_ocular_focal_length =
                            UGETW(camera->wOcularFocalLength);

                        uvideo_unit_alloc_controls(vu, camera->bControlSize,
                                                   camera->bmControls);
                        break;
                default:
                        DPRINTF(("uvideo_unit_init: "
                                 "unknown input terminal type 0x%04x\n",
                                 UGETW(input->wTerminalType)));
                        return USBD_INVAL;
                }
                break;
        case UDESC_OUTPUT_TERMINAL:
                break;
        case UDESC_SELECTOR_UNIT:
                if (desc->bLength < sizeof(*selector))
                        return USBD_INVAL;
                selector = (const uvideo_selector_unit_descriptor_t *)desc;

                uvideo_unit_alloc_sources(vu, selector->bNrInPins,
                                          selector->baSourceID);
                break;
        case UDESC_PROCESSING_UNIT:
                if (desc->bLength < sizeof(*processing))
                        return USBD_INVAL;
                processing = (const uvideo_processing_unit_descriptor_t *)desc;
                pu = &vu->u.vu_processing;

                pu->pu_video_standards = PU_GET_VIDEO_STANDARDS(processing);
                pu->pu_max_multiplier = UGETW(processing->wMaxMultiplier);

                uvideo_unit_alloc_sources(vu, 1, &processing->bSourceID);
                uvideo_unit_alloc_controls(vu, processing->bControlSize,
                                           processing->bmControls);
                break;
        case UDESC_EXTENSION_UNIT:
                if (desc->bLength < sizeof(*extension))
                        return USBD_INVAL;
                extension = (const uvideo_extension_unit_descriptor_t *)desc;
                /* TODO: copy guid */

                uvideo_unit_alloc_sources(vu, extension->bNrInPins,
                                          extension->baSourceID);
                uvideo_unit_alloc_controls(vu, XU_GET_CONTROL_SIZE(extension),
                                           XU_GET_CONTROLS(extension));
                break;
        default:
                DPRINTF(("uvideo_unit_alloc: unknown descriptor "
                         "type=0x%02x subtype=0x%02x\n",
                         desc->bDescriptorType, desc->bDescriptorSubtype));
                return USBD_INVAL;
        }

        return USBD_NORMAL_COMPLETION;
}

static void
uvideo_unit_free(struct uvideo_unit *vu)
{
        uvideo_unit_free_sources(vu);
        uvideo_unit_free_controls(vu);
        kmem_free(vu, sizeof(*vu));
}

static void
uvideo_unit_alloc_sources(struct uvideo_unit *vu,
                          uint8_t nsrcs, const uint8_t *src_ids)
{

        vu->vu_nsrcs = nsrcs;
        if (nsrcs == 0) {
                /* do nothing */
        } else if (nsrcs == 1) {
                vu->s.vu_src_id = src_ids[0];
        } else {
                vu->s.vu_src_id_ary =
                    kmem_alloc(sizeof(*vu->s.vu_src_id_ary) * nsrcs, KM_SLEEP);
                memcpy(vu->s.vu_src_id_ary, src_ids, nsrcs);
        }
}

static void
uvideo_unit_free_sources(struct uvideo_unit *vu)
{

        if (vu->vu_nsrcs <= 1)
                return;

        kmem_free(vu->s.vu_src_id_ary,
            sizeof(*vu->s.vu_src_id_ary) * vu->vu_nsrcs);
        vu->s.vu_src_id_ary = NULL;
        vu->vu_nsrcs = 0;
}

static void
uvideo_unit_alloc_controls(struct uvideo_unit *vu, uint8_t size,
                           const uint8_t *controls)
{

        vu->vu_control_size = size;
        if (size == 0)
                return;

        vu->vu_controls = kmem_alloc(sizeof(*vu->vu_controls) * size, KM_SLEEP);
        memcpy(vu->vu_controls, controls, size);
}

static void
uvideo_unit_free_controls(struct uvideo_unit *vu)
{

        if (vu->vu_control_size == 0)
                return;

        kmem_free(vu->vu_controls,
            sizeof(*vu->vu_controls) * vu->vu_control_size);
        vu->vu_controls = NULL;
        vu->vu_control_size = 0;
}


/*
 * Initialize a stream from a Video Streaming interface
 * descriptor. Adds the stream to the stream_list in uvideo_softc.
 * This should be called once for new streams, and
 * uvideo_stream_init_desc() should then be called for this and each
 * additional interface with the same interface number.
 */
static usbd_status
uvideo_stream_init(struct uvideo_stream *vs,
                   struct uvideo_softc *sc,
                   const usb_interface_descriptor_t *ifdesc)
{
        uWord len;
        usbd_status err;

        DPRINTF(("%s: %s ifaceno=%d vs=%p\n", __func__,
                device_xname(sc->sc_dev),
                ifdesc->bInterfaceNumber,
                vs));

        SLIST_INSERT_HEAD(&sc->sc_stream_list, vs, entries);
        vs->vs_parent = sc;
        vs->vs_ifaceno = ifdesc->bInterfaceNumber;
        vs->vs_subtype = 0;
        SIMPLEQ_INIT(&vs->vs_formats);
        SIMPLEQ_INIT(&vs->vs_pixel_formats);
        vs->vs_default_format = NULL;
        vs->vs_current_format.priv = -1;
        vs->vs_xfer_type = 0;
        vs->vs_state = UVIDEO_STATE_CLOSED;

        err = usbd_device2interface_handle(sc->sc_udev, vs->vs_ifaceno,
            &vs->vs_iface);
        if (err != USBD_NORMAL_COMPLETION) {
                DPRINTF(("uvideo_stream_init: "
                         "error getting vs interface: "
                         "%s (%d)\n",
                         usbd_errstr(err), err));
                return err;
        }

        /*
         * For Xbox Live Vision camera, linux-uvc folk say we need to
         * set an alternate interface and wait ~3 seconds prior to
         * doing the format probe/commit.  We set to alternate
         * interface 0, which is the default, zero bandwidth
         * interface.  This should not have adverse affects on other
         * cameras.  Errors are ignored.
         */
        err = usbd_set_interface(vs->vs_iface, 0);
        if (err != USBD_NORMAL_COMPLETION) {
                DPRINTF(("uvideo_stream_init: error setting alt interface: "
                         "%s (%d)\n",
                         usbd_errstr(err), err));
        }

        /*
         * Initialize probe and commit data size.  This value is
         * dependent on the version of the spec the hardware
         * implements.
         */
        err = uvideo_stream_probe(vs, UR_GET_LEN, &len);
        if (err != USBD_NORMAL_COMPLETION) {
                DPRINTF(("uvideo_stream_init: "
                         "error getting probe data len: "
                         "%s (%d)\n",
                         usbd_errstr(err), err));
                vs->vs_probelen = 26; /* conservative v1.0 length */
        } else if (UGETW(len) <= sizeof(uvideo_probe_and_commit_data_t)) {
                DPRINTFN(15,("uvideo_stream_init: probelen=%d\n", UGETW(len)));
                vs->vs_probelen = UGETW(len);
        } else {
                DPRINTFN(15,("uvideo_stream_init: device returned invalid probe"
                                " len %d, using default\n", UGETW(len)));
                vs->vs_probelen = 26;
        }

        return USBD_NORMAL_COMPLETION;
}

/*
 * Further stream initialization based on a Video Streaming interface
 * descriptor and following descriptors belonging to that interface.
 * Iterates through all descriptors belonging to this particular
 * interface descriptor, modifying the iterator.  This may be called
 * multiple times because there may be several alternate interfaces
 * associated with the same interface number.
 */
static usbd_status
uvideo_stream_init_desc(struct uvideo_stream *vs,
                        const usb_interface_descriptor_t *ifdesc,
                        usbd_desc_iter_t *iter)
{
        const usb_descriptor_t *desc;
        const uvideo_descriptor_t *uvdesc;
        struct uvideo_bulk_xfer *bx;
        struct uvideo_isoc_xfer *ix;
        struct uvideo_alternate *alt;
        uint8_t xfer_type, xfer_dir;
        uint8_t bmAttributes, bEndpointAddress;
        int i;

        DPRINTF(("%s: bInterfaceNumber=%d bAlternateSetting=%d\n", __func__,
                ifdesc->bInterfaceNumber, ifdesc->bAlternateSetting));

        /*
         * Iterate until the next interface descriptor.  All
         * descriptors until then belong to this streaming
         * interface.
         */
        while ((desc = usb_desc_iter_next_non_interface(iter)) != NULL) {
                switch (desc->bDescriptorType) {
                case UDESC_ENDPOINT:
                        if (desc->bLength < sizeof(usb_endpoint_descriptor_t))
                                goto baddesc;
                        bmAttributes = GET(usb_endpoint_descriptor_t,
                                           desc, bmAttributes);
                        bEndpointAddress = GET(usb_endpoint_descriptor_t,
                                               desc, bEndpointAddress);
                        xfer_type = UE_GET_XFERTYPE(bmAttributes);
                        xfer_dir = UE_GET_DIR(bEndpointAddress);
                        if (xfer_type == UE_BULK && xfer_dir == UE_DIR_IN) {
                                bx = &vs->vs_xfer.bulk;
                                if (vs->vs_xfer_type == 0) {
                                        DPRINTFN(15, ("uvideo_attach: "
                                                      "BULK stream *\n"));
                                        vs->vs_xfer_type = UE_BULK;
                                        bx->bx_endpt = bEndpointAddress;
                                        DPRINTF(("uvideo_attach: BULK "
                                                 "endpoint %x\n",
                                                 bx->bx_endpt));
                                        bx->bx_running = false;
                                        cv_init(&bx->bx_cv,
                                            device_xname(vs->vs_parent->sc_dev)
                                            );
                                        mutex_init(&bx->bx_lock,
                                          MUTEX_DEFAULT, IPL_NONE);
                                }
                        } else if (xfer_type == UE_ISOCHRONOUS) {
                                ix = &vs->vs_xfer.isoc;
                                for (i = 0; i < UVIDEO_NXFERS; i++) {
                                        ix->ix_i[i].i_ix = ix;
                                        ix->ix_i[i].i_vs = vs;
                                }
                                if (vs->vs_xfer_type == 0) {
                                        DPRINTFN(15, ("uvideo_attach: "
                                                      "ISOC stream *\n"));
                                        SLIST_INIT(&ix->ix_altlist);
                                        vs->vs_xfer_type = UE_ISOCHRONOUS;
                                        ix->ix_endpt =
                                            GET(usb_endpoint_descriptor_t,
                                                desc, bEndpointAddress);
                                }

                                alt = kmem_alloc(sizeof(*alt), KM_SLEEP);
                                alt->altno = ifdesc->bAlternateSetting;
                                alt->interval =
                                    GET(usb_endpoint_descriptor_t,
                                        desc, bInterval);

                                alt->max_packet_size =
                                UE_GET_SIZE(UGETW(GET(usb_endpoint_descriptor_t,
                                        desc, wMaxPacketSize)));
                                alt->max_packet_size *=
                                        (UE_GET_TRANS(UGETW(GET(
                                                usb_endpoint_descriptor_t, desc,
                                                wMaxPacketSize)))) + 1;

                                SLIST_INSERT_HEAD(&ix->ix_altlist,
                                                  alt, entries);
                        }
                        break;
                case UDESC_CS_INTERFACE:
                        if (desc->bLength < sizeof(*uvdesc))
                                goto baddesc;
                        uvdesc = (const uvideo_descriptor_t *)desc;
                        if (ifdesc->bAlternateSetting != 0) {
                                DPRINTF(("uvideo_stream_init_alternate: "
                                         "unexpected class-specific descriptor "
                                         "len=%d type=0x%02x subtype=0x%02x\n",
                                         uvdesc->bLength,
                                         uvdesc->bDescriptorType,
                                         uvdesc->bDescriptorSubtype));
                                break;
                        }

                        switch (uvdesc->bDescriptorSubtype) {
                        case UDESC_VS_INPUT_HEADER:
                                vs->vs_subtype = UDESC_VS_INPUT_HEADER;
                                break;
                        case UDESC_VS_OUTPUT_HEADER:
                                /* TODO: handle output stream */
                                DPRINTF(("uvideo: VS output not implemented\n"));
                                vs->vs_subtype = UDESC_VS_OUTPUT_HEADER;
                                return USBD_INVAL;
                        case UDESC_VS_FORMAT_UNCOMPRESSED:
                        case UDESC_VS_FORMAT_FRAME_BASED:
                        case UDESC_VS_FORMAT_MJPEG:
                                uvideo_stream_init_frame_based_format(vs,
                                                                      uvdesc,
                                                                      iter);
                                break;
                        case UDESC_VS_FORMAT_MPEG2TS:
                        case UDESC_VS_FORMAT_DV:
                        case UDESC_VS_FORMAT_STREAM_BASED:
                        default:
                                DPRINTF(("uvideo: unimplemented VS CS "
                                         "descriptor len=%d type=0x%02x "
                                         "subtype=0x%02x\n",
                                         uvdesc->bLength,
                                         uvdesc->bDescriptorType,
                                         uvdesc->bDescriptorSubtype));
                                break;
                        }
                        break;
                default:
                baddesc:
                        DPRINTF(("uvideo_stream_init_desc: "
                                 "bad descriptor "
                                 "len=%d type=0x%02x\n",
                                 desc->bLength,
                                 desc->bDescriptorType));
                        break;
                }
        }

        DPRINTF(("%s: bInterfaceNumber=%d bAlternateSetting=%d done\n",
                __func__,
                ifdesc->bInterfaceNumber, ifdesc->bAlternateSetting));

        return USBD_NORMAL_COMPLETION;
}

/* Finialize and free memory associated with this stream. */
static void
uvideo_stream_free(struct uvideo_stream *vs)
{
        struct uvideo_alternate *alt;
        struct uvideo_pixel_format *pixel_format;
        struct uvideo_format *format;

        /* free linked list of alternate interfaces */
        if (vs->vs_xfer_type == UE_ISOCHRONOUS) {
                while (!SLIST_EMPTY(&vs->vs_xfer.isoc.ix_altlist)) {
                        alt = SLIST_FIRST(&vs->vs_xfer.isoc.ix_altlist);
                        SLIST_REMOVE_HEAD(&vs->vs_xfer.isoc.ix_altlist,
                                          entries);
                        kmem_free(alt, sizeof(*alt));
                }
        }

        /* free linked-list of formats and pixel formats */
        while ((format = SIMPLEQ_FIRST(&vs->vs_formats)) != NULL) {
                SIMPLEQ_REMOVE_HEAD(&vs->vs_formats, entries);
                kmem_free(format, sizeof(*format));
        }
        while ((pixel_format = SIMPLEQ_FIRST(&vs->vs_pixel_formats)) != NULL) {
                SIMPLEQ_REMOVE_HEAD(&vs->vs_pixel_formats, entries);
                kmem_free(pixel_format, sizeof(*pixel_format));
        }

        kmem_free(vs, sizeof(*vs));
}


static usbd_status
uvideo_stream_init_frame_based_format(struct uvideo_stream *vs,
                                      const uvideo_descriptor_t *format_desc,
                                      usbd_desc_iter_t *iter)
{
        struct uvideo_pixel_format *pformat, *pfiter;
        enum video_pixel_format pixel_format;
        struct uvideo_format *format;
        const usb_descriptor_t *desc;
        const uvideo_descriptor_t *uvdesc;
        uint8_t subtype, subtypelen, default_index, index;
        uint32_t frame_interval;
        const usb_guid_t *guid;

        DPRINTF(("%s: ifaceno=%d subtype=%d probelen=%d\n", __func__,
                vs->vs_ifaceno, vs->vs_subtype, vs->vs_probelen));

        pixel_format = VIDEO_FORMAT_UNDEFINED;

        switch (format_desc->bDescriptorSubtype) {
        case UDESC_VS_FORMAT_UNCOMPRESSED:
                DPRINTF(("%s: uncompressed\n", __func__));
                if (format_desc->bLength <
                    sizeof(uvideo_vs_format_uncompressed_descriptor_t)) {
                        DPRINTF(("uvideo: truncated uncompressed format: %d\n",
                                format_desc->bLength));
                        return USBD_INVAL;
                }
                subtype = UDESC_VS_FRAME_UNCOMPRESSED;
                subtypelen = sizeof(uvideo_vs_frame_uncompressed_descriptor_t);
                default_index = GET(uvideo_vs_format_uncompressed_descriptor_t,
                                    format_desc,
                                    bDefaultFrameIndex);
                guid = GETP(uvideo_vs_format_uncompressed_descriptor_t,
                            format_desc,
                            guidFormat);
                if (usb_guid_cmp(guid, &uvideo_guid_format_yuy2) == 0)
                        pixel_format = VIDEO_FORMAT_YUY2;
                else if (usb_guid_cmp(guid, &uvideo_guid_format_nv12) == 0)
                        pixel_format = VIDEO_FORMAT_NV12;
                else if (usb_guid_cmp(guid, &uvideo_guid_format_uyvy) == 0)
                        pixel_format = VIDEO_FORMAT_UYVY;
                else {
#ifdef UVIDEO_DEBUG
                        DPRINTF(("%s: unknown format: ", __func__));
                        usb_guid_print(guid);
                        DPRINTF(("\n"));
#endif
                }
                break;
        case UDESC_VS_FORMAT_FRAME_BASED:
                DPRINTF(("%s: frame-based\n", __func__));
                if (format_desc->bLength <
                    sizeof(uvideo_format_frame_based_descriptor_t)) {
                        DPRINTF(("uvideo: truncated frame-based format: %d\n",
                                format_desc->bLength));
                        return USBD_INVAL;
                }
                subtype = UDESC_VS_FRAME_FRAME_BASED;
                subtypelen = sizeof(uvideo_frame_frame_based_descriptor_t);
                default_index = GET(uvideo_format_frame_based_descriptor_t,
                                    format_desc,
                                    bDefaultFrameIndex);
                break;
        case UDESC_VS_FORMAT_MJPEG:
                DPRINTF(("%s: mjpeg\n", __func__));
                if (format_desc->bLength <
                    sizeof(uvideo_vs_format_mjpeg_descriptor_t)) {
                        DPRINTF(("uvideo: truncated mjpeg format: %d\n",
                                format_desc->bLength));
                        return USBD_INVAL;
                }
                subtype = UDESC_VS_FRAME_MJPEG;
                subtypelen = sizeof(uvideo_vs_frame_mjpeg_descriptor_t);
                default_index = GET(uvideo_vs_format_mjpeg_descriptor_t,
                                    format_desc,
                                    bDefaultFrameIndex);
                pixel_format = VIDEO_FORMAT_MJPEG;
                break;
        default:
                DPRINTF(("uvideo: unknown frame based format %d\n",
                         format_desc->bDescriptorSubtype));
                return USBD_INVAL;
        }

        KASSERT(subtypelen >= sizeof(*uvdesc));

        pformat = NULL;
        SIMPLEQ_FOREACH(pfiter, &vs->vs_pixel_formats, entries) {
                if (pfiter->pixel_format == pixel_format) {
                        pformat = pfiter;
                        break;
                }
        }
        if (pixel_format != VIDEO_FORMAT_UNDEFINED && pformat == NULL) {
                pformat = kmem_zalloc(sizeof(*pformat), KM_SLEEP);
                pformat->pixel_format = pixel_format;
                DPRINTF(("uvideo: Adding pixel format %d\n",
                    pixel_format));
                SIMPLEQ_INSERT_TAIL(&vs->vs_pixel_formats,
                    pformat, entries);
        }

        /*
         * Iterate through frame descriptors directly following the
         * format descriptor, and add a format to the format list for
         * each frame descriptor.
         */
        while ((desc = usb_desc_iter_peek(iter)) != NULL) {
                if (desc->bDescriptorType != UDESC_CS_INTERFACE)
                        break;
                if (desc->bLength < sizeof(*uvdesc)) {
                        DPRINTF(("uvideo: truncated CS descriptor, length %d\n",
                                desc->bLength));
                        break;
                }
                uvdesc = (const uvideo_descriptor_t *)desc;
                if (uvdesc->bDescriptorSubtype != subtype)
                        break;
                if (uvdesc->bLength < subtypelen) {
                        DPRINTF(("uvideo:"
                                " truncated CS subtype-0x%x descriptor,"
                                " length %d < %d\n",
                                uvdesc->bDescriptorSubtype,
                                uvdesc->bLength, subtypelen));
                        break;
                }

                /* We peeked; now consume.  */
                (void)usb_desc_iter_next(iter);

                format = kmem_zalloc(sizeof(*format), KM_SLEEP);
                format->format.pixel_format = pixel_format;

                switch (format_desc->bDescriptorSubtype) {
                case UDESC_VS_FORMAT_UNCOMPRESSED:
#ifdef UVIDEO_DEBUG
                        if (pixel_format == VIDEO_FORMAT_UNDEFINED &&
                            uvideodebug) {
                                guid = GETP(
                                    uvideo_vs_format_uncompressed_descriptor_t,
                                    format_desc,
                                    guidFormat);

                                DPRINTF(("uvideo: format undefined "));
                                usb_guid_print(guid);
                                DPRINTF(("\n"));
                        }
#endif

                        UVIDEO_FORMAT_INIT_FRAME_BASED(
                                uvideo_vs_format_uncompressed_descriptor_t,
                                format_desc,
                                uvideo_vs_frame_uncompressed_descriptor_t,
                                uvdesc,
                                format);
                        format->format.sample_size =
                            UGETDW(
                              GET(uvideo_vs_frame_uncompressed_descriptor_t,
                              uvdesc, dwMaxVideoFrameBufferSize));
                        format->format.stride =
                            format->format.sample_size / format->format.height;
                        index = GET(uvideo_vs_frame_uncompressed_descriptor_t,
                                    uvdesc,
                                    bFrameIndex);
                        frame_interval =
                            UGETDW(
                                GET(uvideo_vs_frame_uncompressed_descriptor_t,
                                uvdesc,
                                dwDefaultFrameInterval));
                        break;
                case UDESC_VS_FORMAT_MJPEG:
                        UVIDEO_FORMAT_INIT_FRAME_BASED(
                                uvideo_vs_format_mjpeg_descriptor_t,
                                format_desc,
                                uvideo_vs_frame_mjpeg_descriptor_t,
                                uvdesc,
                                format);
                        format->format.sample_size =
                            UGETDW(
                                GET(uvideo_vs_frame_mjpeg_descriptor_t,
                                uvdesc, dwMaxVideoFrameBufferSize));
                        format->format.stride =
                            format->format.sample_size / format->format.height;
                        index = GET(uvideo_vs_frame_mjpeg_descriptor_t,
                                    uvdesc,
                                    bFrameIndex);
                        frame_interval =
                            UGETDW(
                                GET(uvideo_vs_frame_mjpeg_descriptor_t,
                                uvdesc,
                                dwDefaultFrameInterval));
                        break;
                case UDESC_VS_FORMAT_FRAME_BASED:
                        format->format.pixel_format = VIDEO_FORMAT_UNDEFINED;
                        UVIDEO_FORMAT_INIT_FRAME_BASED(
                                uvideo_format_frame_based_descriptor_t,
                                format_desc,
                                uvideo_frame_frame_based_descriptor_t,
                                uvdesc,
                                format);
                        index = GET(uvideo_frame_frame_based_descriptor_t,
                                    uvdesc,
                                    bFrameIndex);
                        format->format.stride =
                            UGETDW(
                                GET(uvideo_frame_frame_based_descriptor_t,
                                uvdesc, dwBytesPerLine));
                        format->format.sample_size =
                            format->format.stride * format->format.height;
                        frame_interval =
                            UGETDW(
                                GET(uvideo_frame_frame_based_descriptor_t,
                                uvdesc, dwDefaultFrameInterval));
                        break;
                default:
                        /* shouldn't ever get here */
                        DPRINTF(("uvideo: unknown frame based format %d\n",
                                 format_desc->bDescriptorSubtype));
                        kmem_free(format, sizeof(*format));
                        return USBD_INVAL;
                }

                DPRINTF(("uvideo: found format (index %d) type %d "
                    "size %ux%u size %u stride %u interval %u\n",
                    index, format->format.pixel_format, format->format.width,
                    format->format.height, format->format.sample_size,
                    format->format.stride, frame_interval));

                SIMPLEQ_INSERT_TAIL(&vs->vs_formats, format, entries);

                if (vs->vs_default_format == NULL && index == default_index
#ifdef UVIDEO_DISABLE_MJPEG
                    && subtype != UDESC_VS_FRAME_MJPEG
#endif
                    ) {
                        DPRINTF((" ^ picking this one\n"));
                        vs->vs_default_format = &format->format;
                        vs->vs_frame_interval = frame_interval;
                }

        }

        return USBD_NORMAL_COMPLETION;
}

static int
uvideo_stream_start_xfer(struct uvideo_stream *vs)
{
        struct uvideo_softc *sc = vs->vs_parent;
        struct uvideo_bulk_xfer *bx;
        struct uvideo_isoc_xfer *ix;
        uint32_t vframe_len;        /* rough bytes per video frame */
        uint32_t uframe_len;        /* bytes per usb frame (TODO: or microframe?) */
        uint32_t nframes;        /* number of usb frames (TODO: or microframs?) */
        int i, ret;
        int error;

        struct uvideo_alternate *alt, *alt_maybe;
        usbd_status err;

        switch (vs->vs_xfer_type) {
        case UE_BULK:
                ret = 0;
                bx = &vs->vs_xfer.bulk;

                err = usbd_open_pipe(vs->vs_iface, bx->bx_endpt, 0,
                    &bx->bx_pipe);
                if (err != USBD_NORMAL_COMPLETION) {
                        DPRINTF(("uvideo: error opening pipe: %s (%d)\n",
                                 usbd_errstr(err), err));
                        return EIO;
                }
                DPRINTF(("uvideo: pipe %p\n", bx->bx_pipe));

                error = usbd_create_xfer(bx->bx_pipe, vs->vs_max_payload_size,
                    0, 0, &bx->bx_xfer);
                if (error) {
                        DPRINTF(("uvideo: couldn't allocate xfer\n"));
                        return error;
                }
                DPRINTF(("uvideo: xfer %p\n", bx->bx_xfer));

                bx->bx_buflen = vs->vs_max_payload_size;
                bx->bx_buffer = usbd_get_buffer(bx->bx_xfer);

                mutex_enter(&bx->bx_lock);
                if (bx->bx_running == false) {
                        bx->bx_running = true;
                        ret = kthread_create(PRI_UVIDEO, 0, NULL,
                            uvideo_stream_recv_bulk_transfer, vs,
                            NULL, "%s", device_xname(sc->sc_dev));
                        if (ret) {
                                DPRINTF(("uvideo: couldn't create kthread:"
                                         " %d\n", err));
                                bx->bx_running = false;
                                mutex_exit(&bx->bx_lock);
                                return err;
                        }
                } else
                        aprint_error_dev(sc->sc_dev,
                            "transfer already in progress\n");
                mutex_exit(&bx->bx_lock);

                DPRINTF(("uvideo: thread created\n"));

                return 0;
        case UE_ISOCHRONOUS:
                ix = &vs->vs_xfer.isoc;

                /*
                 * Choose an alternate interface most suitable for
                 * this format.  Choose the smallest size that can
                 * contain max_payload_size.
                 *
                 * It is assumed that the list is sorted in descending
                 * order from largest to smallest packet size.
                 *
                 * TODO: what should the strategy be for choosing an
                 * alt interface?
                 */
                alt = NULL;
                SLIST_FOREACH(alt_maybe, &ix->ix_altlist, entries) {
                        /*
                         * TODO: define "packet" and "payload".  I think
                         * several packets can make up one payload which would
                         * call into question this method of selecting an
                         * alternate interface...
                         */

                        if (alt_maybe->max_packet_size > vs->vs_max_payload_size)
                                continue;

                        if (alt == NULL ||
                            alt_maybe->max_packet_size >= alt->max_packet_size)
                                alt = alt_maybe;
                }

                if (alt == NULL) {
                        DPRINTF(("uvideo_stream_start_xfer: "
                                 "no suitable alternate interface found\n"));
                        return EINVAL;
                }

                DPRINTFN(15,("uvideo_stream_start_xfer: "
                             "choosing alternate interface "
                             "%d wMaxPacketSize=%d bInterval=%d\n",
                             alt->altno, alt->max_packet_size, alt->interval));

                err = usbd_set_interface(vs->vs_iface, alt->altno);
                if (err != USBD_NORMAL_COMPLETION) {
                        DPRINTF(("uvideo_stream_start_xfer: "
                                 "error setting alt interface: %s (%d)\n",
                                 usbd_errstr(err), err));
                        return EIO;
                }

                /* TODO: "packet" not same as frame */
                vframe_len = vs->vs_current_format.sample_size;
                uframe_len = alt->max_packet_size;
                nframes = (vframe_len + uframe_len - 1) / uframe_len;
                nframes = (nframes + 7) & ~7; /*round up for ehci inefficiency*/
                nframes = uimin(UVIDEO_NFRAMES_MAX, nframes);
                DPRINTF(("uvideo_stream_start_xfer: nframes=%d\n", nframes));

                ix->ix_nframes = nframes;
                ix->ix_uframe_len = uframe_len;
                for (i = 0; i < UVIDEO_NXFERS; i++) {
                        struct uvideo_isoc *isoc = &ix->ix_i[i];
                        isoc->i_frlengths =
                            kmem_alloc(sizeof(isoc->i_frlengths[0]) * nframes,
                                KM_SLEEP);
                }

                err = usbd_open_pipe(vs->vs_iface, ix->ix_endpt,
                                     USBD_EXCLUSIVE_USE, &ix->ix_pipe);
                if (err != USBD_NORMAL_COMPLETION) {
                        DPRINTF(("uvideo: error opening pipe: %s (%d)\n",
                                 usbd_errstr(err), err));
                        return EIO;
                }

                for (i = 0; i < UVIDEO_NXFERS; i++) {
                        struct uvideo_isoc *isoc = &ix->ix_i[i];
                        error = usbd_create_xfer(ix->ix_pipe,
                            nframes * uframe_len, 0, ix->ix_nframes,
                            &isoc->i_xfer);
                        if (error) {
                                DPRINTF(("uvideo: "
                                    "couldn't allocate xfer (%d)\n", error));
                                return error;
                        }

                        isoc->i_buf = usbd_get_buffer(isoc->i_xfer);
                }

                uvideo_stream_recv_isoc_start(vs);

                return 0;
        default:
                /* should never get here */
                DPRINTF(("uvideo_stream_start_xfer: unknown xfer type %#x\n",
                         vs->vs_xfer_type));
                return EINVAL;
        }
}

static int
uvideo_stream_stop_xfer(struct uvideo_stream *vs)
{
        struct uvideo_bulk_xfer *bx;
        struct uvideo_isoc_xfer *ix;
        usbd_status err;
        int i;

        switch (vs->vs_xfer_type) {
        case UE_BULK:
                bx = &vs->vs_xfer.bulk;

                DPRINTF(("uvideo_stream_stop_xfer: UE_BULK: "
                         "waiting for thread to complete\n"));
                mutex_enter(&bx->bx_lock);
                if (bx->bx_running == true) {
                        bx->bx_running = false;
                        cv_wait_sig(&bx->bx_cv, &bx->bx_lock);
                }
                mutex_exit(&bx->bx_lock);

                DPRINTF(("uvideo_stream_stop_xfer: UE_BULK: cleaning up\n"));

                if (bx->bx_pipe) {
                        usbd_abort_pipe(bx->bx_pipe);
                }

                if (bx->bx_xfer) {
                        usbd_destroy_xfer(bx->bx_xfer);
                        bx->bx_xfer = NULL;
                }

                if (bx->bx_pipe) {
                        usbd_close_pipe(bx->bx_pipe);
                        bx->bx_pipe = NULL;
                }

                DPRINTF(("uvideo_stream_stop_xfer: UE_BULK: done\n"));

                return 0;
        case UE_ISOCHRONOUS:
                ix = &vs->vs_xfer.isoc;
                if (ix->ix_pipe != NULL) {
                        usbd_abort_pipe(ix->ix_pipe);
                }

                for (i = 0; i < UVIDEO_NXFERS; i++) {
                        struct uvideo_isoc *isoc = &ix->ix_i[i];
                        if (isoc->i_xfer != NULL) {
                                usbd_destroy_xfer(isoc->i_xfer);
                                isoc->i_xfer = NULL;
                        }
                }

                if (ix->ix_pipe != NULL) {
                        usbd_close_pipe(ix->ix_pipe);
                        ix->ix_pipe = NULL;
                }

                for (i = 0; i < UVIDEO_NXFERS; i++) {
                        struct uvideo_isoc *isoc = &ix->ix_i[i];
                        if (isoc->i_frlengths != NULL) {
                                kmem_free(isoc->i_frlengths,
                                  sizeof(isoc->i_frlengths[0]) *
                                  ix->ix_nframes);
                                isoc->i_frlengths = NULL;
                        }
                }

                /* Give it some time to settle */
                usbd_delay_ms(vs->vs_parent->sc_udev, 1000);

                /* Set to zero bandwidth alternate interface zero */
                err = usbd_set_interface(vs->vs_iface, 0);
                if (err != USBD_NORMAL_COMPLETION) {
                        DPRINTF(("uvideo_stream_stop_transfer: "
                                 "error setting zero bandwidth interface: "
                                 "%s (%d)\n",
                                 usbd_errstr(err), err));
                        return EIO;
                }

                return 0;
        default:
                /* should never get here */
                DPRINTF(("uvideo_stream_stop_xfer: unknown xfer type %#x\n",
                         vs->vs_xfer_type));
                return EINVAL;
        }
}

static usbd_status
uvideo_stream_recv_isoc_start(struct uvideo_stream *vs)
{
        int i;

        for (i = 0; i < UVIDEO_NXFERS; i++)
                uvideo_stream_recv_isoc_start1(&vs->vs_xfer.isoc.ix_i[i]);

        return USBD_NORMAL_COMPLETION;
}

/* Initiate a usb transfer. */
static usbd_status
uvideo_stream_recv_isoc_start1(struct uvideo_isoc *isoc)
{
        struct uvideo_isoc_xfer *ix;
        usbd_status err;
        int i;

        ix = isoc->i_ix;

        for (i = 0; i < ix->ix_nframes; ++i)
                isoc->i_frlengths[i] = ix->ix_uframe_len;

        usbd_setup_isoc_xfer(isoc->i_xfer,
                             isoc,
                             isoc->i_frlengths,
                             ix->ix_nframes,
                             USBD_SHORT_XFER_OK,
                             uvideo_stream_recv_isoc_complete);

        err = usbd_transfer(isoc->i_xfer);
        if (err != USBD_IN_PROGRESS) {
                DPRINTF(("uvideo_stream_recv_start: "
                         "usbd_transfer status=%s (%d)\n",
                         usbd_errstr(err), err));
        }
        return err;
}

static usbd_status
uvideo_stream_recv_process(struct uvideo_stream *vs, uint8_t *buf, uint32_t len)
{
        uvideo_payload_header_t *hdr;
        struct video_payload payload;

        if (len < sizeof(uvideo_payload_header_t)) {
                DPRINTF(("uvideo_stream_recv_process: len %d < payload hdr\n",
                         len));
                return USBD_SHORT_XFER;
        }

        hdr = (uvideo_payload_header_t *)buf;

        if (hdr->bHeaderLength > UVIDEO_PAYLOAD_HEADER_SIZE ||
            hdr->bHeaderLength < sizeof(uvideo_payload_header_t))
                return USBD_INVAL;
        if (hdr->bHeaderLength == len && !(hdr->bmHeaderInfo & UV_END_OF_FRAME))
                return USBD_INVAL;
        if (hdr->bmHeaderInfo & UV_ERROR)
                return USBD_IOERROR;

        payload.data = buf + hdr->bHeaderLength;
        payload.size = len - hdr->bHeaderLength;
        payload.frameno = hdr->bmHeaderInfo & UV_FRAME_ID;
        payload.end_of_frame = hdr->bmHeaderInfo & UV_END_OF_FRAME;

        video_submit_payload(vs->vs_videodev, &payload);

        return USBD_NORMAL_COMPLETION;
}

/* Callback on completion of usb isoc transfer */
static void
uvideo_stream_recv_isoc_complete(struct usbd_xfer *xfer,
                                 void *priv,
                                 usbd_status status)
{
        struct uvideo_stream *vs;
        struct uvideo_isoc_xfer *ix;
        struct uvideo_isoc *isoc;
        int i;
        uint32_t count;
        uint8_t *buf;

        isoc = priv;
        vs = isoc->i_vs;
        ix = isoc->i_ix;

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF(("uvideo_stream_recv_isoc_complete: status=%s (%d)\n",
                        usbd_errstr(status), status));

                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(ix->ix_pipe);
                else
                        return;
        } else {
                usbd_get_xfer_status(xfer, NULL, NULL, &count, NULL);

                if (count == 0) {
                        /* DPRINTF(("uvideo: zero length transfer\n")); */
                        goto next;
                }


                for (i = 0, buf = isoc->i_buf;
                     i < ix->ix_nframes;
                     ++i, buf += ix->ix_uframe_len)
                {
                        status = uvideo_stream_recv_process(vs, buf,
                            isoc->i_frlengths[i]);
                        if (status == USBD_IOERROR)
                                break;
                }
        }

next:
        uvideo_stream_recv_isoc_start1(isoc);
}

static void
uvideo_stream_recv_bulk_transfer(void *addr)
{
        struct uvideo_stream *vs = addr;
        struct uvideo_bulk_xfer *bx = &vs->vs_xfer.bulk;
        usbd_status err;
        uint32_t len;

        DPRINTF(("uvideo_stream_recv_bulk_transfer: "
                 "vs %p sc %p bx %p buffer %p\n", vs, vs->vs_parent, bx,
                 bx->bx_buffer));

        while (bx->bx_running) {
                len = bx->bx_buflen;
                err = usbd_bulk_transfer(bx->bx_xfer, bx->bx_pipe,
                    USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT,
                    bx->bx_buffer, &len);

                if (err == USBD_NORMAL_COMPLETION) {
                        uvideo_stream_recv_process(vs, bx->bx_buffer, len);
                } else {
                        DPRINTF(("uvideo_stream_recv_bulk_transfer: %s\n",
                                 usbd_errstr(err)));
                }
        }

        DPRINTF(("uvideo_stream_recv_bulk_transfer: notify complete\n"));

        mutex_enter(&bx->bx_lock);
        cv_broadcast(&bx->bx_cv);
        mutex_exit(&bx->bx_lock);

        DPRINTF(("uvideo_stream_recv_bulk_transfer: return\n"));

        kthread_exit(0);
}

/*
 * uvideo_open - probe and commit video format and start receiving
 * video data
 */
static int
uvideo_open(void *addr, int flags)
{
        struct uvideo_stream *vs = addr;
        struct uvideo_softc *sc = vs->vs_parent;
        struct video_format fmt;

        DPRINTF(("uvideo_open: sc=%p\n", sc));
        if (sc->sc_dying)
                return EIO;

        /* XXX select default format */
        fmt = *vs->vs_default_format;
        return uvideo_set_format(addr, &fmt);
}


static void
uvideo_close(void *addr)
{
        struct uvideo_stream *vs = addr;

        uvideo_stop_transfer(addr);

        if (vs->vs_state != UVIDEO_STATE_CLOSED) {
                vs->vs_state = UVIDEO_STATE_CLOSED;
        }
}

static const char *
uvideo_get_devname(void *addr)
{
        struct uvideo_stream *vs = addr;

        return vs->vs_parent->sc_devname;
}

static const char *
uvideo_get_businfo(void *addr)
{
        struct uvideo_stream *vs = addr;

        return vs->vs_parent->sc_businfo;
}

static int
uvideo_enum_format(void *addr, uint32_t index, struct video_format *format)
{
        struct uvideo_stream *vs = addr;
        struct uvideo_softc *sc = vs->vs_parent;
        struct uvideo_format *video_format;
        int off;

        if (sc->sc_dying)
                return EIO;

        off = 0;
        SIMPLEQ_FOREACH(video_format, &vs->vs_formats, entries) {
                if (off++ != index)
                        continue;
                format->pixel_format = video_format->format.pixel_format;
                format->width = video_format->format.width;
                format->height = video_format->format.height;
                return 0;
        }

        return EINVAL;
}

/*
 * uvideo_get_format
 */
static int
uvideo_get_format(void *addr, struct video_format *format)
{
        struct uvideo_stream *vs = addr;
        struct uvideo_softc *sc = vs->vs_parent;

        if (sc->sc_dying)
                return EIO;

        *format = vs->vs_current_format;

        return 0;
}

/*
 * uvideo_set_format - TODO: this is broken and does nothing
 */
static int
uvideo_set_format(void *addr, struct video_format *format)
{
        struct uvideo_stream *vs = addr;
        struct uvideo_softc *sc = vs->vs_parent;
        struct uvideo_format *uvfmt;
        uvideo_probe_and_commit_data_t probe, maxprobe;
        usbd_status err;

        DPRINTF(("uvideo_set_format: sc=%p\n", sc));
        if (sc->sc_dying)
                return EIO;

        uvfmt =        uvideo_stream_guess_format(vs, format->pixel_format,
                                           format->width, format->height);
        if (uvfmt == NULL) {
                DPRINTF(("uvideo: uvideo_stream_guess_format couldn't find "
                         "%dx%d format %d\n", format->width, format->height,
                         format->pixel_format));
                return EINVAL;
        }

        uvideo_init_probe_data(&probe);
        probe.bFormatIndex = UVIDEO_FORMAT_GET_FORMAT_INDEX(uvfmt);
        probe.bFrameIndex = UVIDEO_FORMAT_GET_FRAME_INDEX(uvfmt);
        USETDW(probe.dwFrameInterval, vs->vs_frame_interval);        /* XXX */

        maxprobe = probe;
        err = uvideo_stream_probe(vs, UR_GET_MAX, &maxprobe);
        if (err) {
                DPRINTF(("uvideo: error probe/GET_MAX: %s (%d)\n",
                         usbd_errstr(err), err));
        } else {
                USETW(probe.wCompQuality, UGETW(maxprobe.wCompQuality));
        }

        err = uvideo_stream_probe(vs, UR_SET_CUR, &probe);
        if (err) {
                DPRINTF(("uvideo: error commit/SET_CUR: %s (%d)\n",
                         usbd_errstr(err), err));
                return EIO;
        }

        uvideo_init_probe_data(&probe);
        err = uvideo_stream_probe(vs, UR_GET_CUR, &probe);
        if (err) {
                DPRINTF(("uvideo: error commit/SET_CUR: %s (%d)\n",
                         usbd_errstr(err), err));
                return EIO;
        }

        if (probe.bFormatIndex != UVIDEO_FORMAT_GET_FORMAT_INDEX(uvfmt)) {
                DPRINTF(("uvideo: probe/GET_CUR returned format index %d "
                         "(expected %d)\n", probe.bFormatIndex,
                         UVIDEO_FORMAT_GET_FORMAT_INDEX(uvfmt)));
                probe.bFormatIndex = UVIDEO_FORMAT_GET_FORMAT_INDEX(uvfmt);
        }
        if (probe.bFrameIndex != UVIDEO_FORMAT_GET_FRAME_INDEX(uvfmt)) {
                DPRINTF(("uvideo: probe/GET_CUR returned frame index %d "
                         "(expected %d)\n", probe.bFrameIndex,
                         UVIDEO_FORMAT_GET_FRAME_INDEX(uvfmt)));
                probe.bFrameIndex = UVIDEO_FORMAT_GET_FRAME_INDEX(uvfmt);
        }
        USETDW(probe.dwFrameInterval, vs->vs_frame_interval);        /* XXX */

        /*
         * commit/SET_CUR. Fourth step is to set the alternate
         * interface.  Currently the fourth step is in
         * uvideo_start_transfer.  Maybe move it here?
         */
        err = uvideo_stream_commit(vs, UR_SET_CUR, &probe);
        if (err) {
                DPRINTF(("uvideo: error commit/SET_CUR: %s (%d)\n",
                         usbd_errstr(err), err));
                return EIO;
        }

        DPRINTFN(15, ("uvideo_set_format: committing to format: "
                      "bmHint=0x%04x bFormatIndex=%d bFrameIndex=%d "
                      "dwFrameInterval=%u wKeyFrameRate=%d wPFrameRate=%d "
                      "wCompQuality=%d wCompWindowSize=%d wDelay=%d "
                      "dwMaxVideoFrameSize=%u dwMaxPayloadTransferSize=%u",
                      UGETW(probe.bmHint),
                      probe.bFormatIndex,
                      probe.bFrameIndex,
                      UGETDW(probe.dwFrameInterval),
                      UGETW(probe.wKeyFrameRate),
                      UGETW(probe.wPFrameRate),
                      UGETW(probe.wCompQuality),
                      UGETW(probe.wCompWindowSize),
                      UGETW(probe.wDelay),
                      UGETDW(probe.dwMaxVideoFrameSize),
                      UGETDW(probe.dwMaxPayloadTransferSize)));
        if (vs->vs_probelen == 34) {
                DPRINTFN(15, (" dwClockFrequency=%u bmFramingInfo=0x%02x "
                              "bPreferedVersion=%d bMinVersion=%d "
                              "bMaxVersion=%d",
                              UGETDW(probe.dwClockFrequency),
                              probe.bmFramingInfo,
                              probe.bPreferedVersion,
                              probe.bMinVersion,
                              probe.bMaxVersion));
        }
        DPRINTFN(15, ("\n"));

        vs->vs_frame_interval = UGETDW(probe.dwFrameInterval);
        vs->vs_max_payload_size = UGETDW(probe.dwMaxPayloadTransferSize);

        *format = uvfmt->format;
        vs->vs_current_format = *format;
        DPRINTF(("uvideo_set_format: pixeltype is %d\n", format->pixel_format));

        return 0;
}

static int
uvideo_try_format(void *addr, struct video_format *format)
{
        struct uvideo_stream *vs = addr;
        struct uvideo_format *uvfmt;

        uvfmt =        uvideo_stream_guess_format(vs, format->pixel_format,
                                           format->width, format->height);
        if (uvfmt == NULL)
                return EINVAL;

        *format = uvfmt->format;
        return 0;
}

static int
uvideo_get_framerate(void *addr, struct video_fract *fract)
{
        struct uvideo_stream *vs = addr;

        switch (vs->vs_frame_interval) {
        case 41666:        /* 240 */
        case 83333:        /* 120 */
        case 166666:        /* 60 */
        case 200000:        /* 50 */
        case 333333:        /* 30 */
        case 400000:        /* 25 */
        case 500000:        /* 20 */
        case 666666:        /* 15 */
        case 1000000:        /* 10 */
                fract->numerator = 1;
                fract->denominator = 10000000 / vs->vs_frame_interval;
                break;
        case 166833:        /* 59.94 */
                fract->numerator = 60;
                fract->denominator = 1001;
                break;
        case 333667:        /* 29.97 */
                fract->numerator = 30;
                fract->denominator = 1001;
                break;
        default:
                fract->numerator = vs->vs_frame_interval;
                fract->denominator = 10000000;
                break;
        }

        return 0;
}

static int
uvideo_set_framerate(void *addr, struct video_fract *fract)
{
        /* XXX setting framerate is not supported yet, return actual rate */
        return uvideo_get_framerate(addr, fract);
}

static int
uvideo_start_transfer(void *addr)
{
        struct uvideo_stream *vs = addr;
        int s, err;

        s = splusb();
        err = uvideo_stream_start_xfer(vs);
        splx(s);

        return err;
}

static int
uvideo_stop_transfer(void *addr)
{
        struct uvideo_stream *vs = addr;
        int err, s;

        s = splusb();
        err = uvideo_stream_stop_xfer(vs);
        splx(s);

        return err;
}


static int
uvideo_get_control_group(void *addr, struct video_control_group *group)
{
        struct uvideo_stream *vs = addr;
        struct uvideo_softc *sc = vs->vs_parent;
        usb_device_request_t req;
        usbd_status err;
        uint8_t control_id, ent_id, data[16];
        uint16_t len;
        int s;

        /* request setup */
        switch (group->group_id) {
        case VIDEO_CONTROL_PANTILT_RELATIVE:
                if (group->length != 4)
                        return EINVAL;

                return EINVAL;
        case VIDEO_CONTROL_SHARPNESS:
                if (group->length != 1)
                        return EINVAL;

                control_id = UVIDEO_PU_SHARPNESS_CONTROL;
                ent_id = 2; /* TODO: hardcoded logitech processing unit */
                len = 2;
                break;
        default:
                return EINVAL;
        }

        /* do request */
        req.bmRequestType = UVIDEO_REQUEST_TYPE_INTERFACE |
            UVIDEO_REQUEST_TYPE_CLASS_SPECIFIC |
            UVIDEO_REQUEST_TYPE_GET;
        req.bRequest = UR_GET_CUR;
        USETW(req.wValue, control_id << 8);
        USETW(req.wIndex, (ent_id << 8) | sc->sc_ifaceno);
        USETW(req.wLength, len);

        s = splusb();
        err = usbd_do_request(sc->sc_udev, &req, data);
        splx(s);
        if (err != USBD_NORMAL_COMPLETION) {
                DPRINTF(("uvideo_set_control: error %s (%d)\n",
                         usbd_errstr(err), err));
                return EIO;        /* TODO: more detail here? */
        }

        /* extract request data */
        switch (group->group_id) {
        case VIDEO_CONTROL_SHARPNESS:
                group->control[0].value = UGETW(data);
                break;
        default:
                return EINVAL;
        }

        return 0;
}


static int
uvideo_set_control_group(void *addr, const struct video_control_group *group)
{
        struct uvideo_stream *vs = addr;
        struct uvideo_softc *sc = vs->vs_parent;
        usb_device_request_t req;
        usbd_status err;
        uint8_t control_id, ent_id, data[16]; /* long enough for all controls */
        uint16_t len;
        int s;

        switch (group->group_id) {
        case VIDEO_CONTROL_PANTILT_RELATIVE:
                if (group->length != 4)
                        return EINVAL;

                if (group->control[0].value != 0 ||
                    group->control[0].value != 1 ||
                    group->control[0].value != 0xff)
                        return ERANGE;

                if (group->control[2].value != 0 ||
                    group->control[2].value != 1 ||
                    group->control[2].value != 0xff)
                        return ERANGE;

                control_id = UVIDEO_CT_PANTILT_RELATIVE_CONTROL;
                ent_id = 1;        /* TODO: hardcoded logitech camera terminal  */
                len = 4;
                data[0] = group->control[0].value;
                data[1] = group->control[1].value;
                data[2] = group->control[2].value;
                data[3] = group->control[3].value;
                break;
        case VIDEO_CONTROL_BRIGHTNESS:
                if (group->length != 1)
                        return EINVAL;
                control_id = UVIDEO_PU_BRIGHTNESS_CONTROL;
                ent_id = 2;
                len = 2;
                USETW(data, group->control[0].value);
                break;
        case VIDEO_CONTROL_GAIN:
                if (group->length != 1)
                        return EINVAL;
                control_id = UVIDEO_PU_GAIN_CONTROL;
                ent_id = 2;
                len = 2;
                USETW(data, group->control[0].value);
                break;
        case VIDEO_CONTROL_SHARPNESS:
                if (group->length != 1)
                        return EINVAL;
                control_id = UVIDEO_PU_SHARPNESS_CONTROL;
                ent_id = 2; /* TODO: hardcoded logitech processing unit */
                len = 2;
                USETW(data, group->control[0].value);
                break;
        default:
                return EINVAL;
        }

        req.bmRequestType = UVIDEO_REQUEST_TYPE_INTERFACE |
            UVIDEO_REQUEST_TYPE_CLASS_SPECIFIC |
            UVIDEO_REQUEST_TYPE_SET;
        req.bRequest = UR_SET_CUR;
        USETW(req.wValue, control_id << 8);
        USETW(req.wIndex, (ent_id << 8) | sc->sc_ifaceno);
        USETW(req.wLength, len);

        s = splusb();
        err = usbd_do_request(sc->sc_udev, &req, data);
        splx(s);
        if (err != USBD_NORMAL_COMPLETION) {
                DPRINTF(("uvideo_set_control: error %s (%d)\n",
                         usbd_errstr(err), err));
                return EIO;        /* TODO: more detail here? */
        }

        return 0;
}

static usbd_status
uvideo_stream_probe_and_commit(struct uvideo_stream *vs,
                               uint8_t action, uint8_t control,
                               void *data)
{
        usb_device_request_t req;

        switch (action) {
        case UR_SET_CUR:
                req.bmRequestType = UT_WRITE_CLASS_INTERFACE;
                USETW(req.wLength, vs->vs_probelen);
                break;
        case UR_GET_CUR:
        case UR_GET_MIN:
        case UR_GET_MAX:
        case UR_GET_DEF:
                req.bmRequestType = UT_READ_CLASS_INTERFACE;
                USETW(req.wLength, vs->vs_probelen);
                break;
        case UR_GET_INFO:
                req.bmRequestType = UT_READ_CLASS_INTERFACE;
                USETW(req.wLength, sizeof(uByte));
                break;
        case UR_GET_LEN:
                req.bmRequestType = UT_READ_CLASS_INTERFACE;
                USETW(req.wLength, sizeof(uWord)); /* is this right? */
                break;
        default:
                DPRINTF(("uvideo_probe_and_commit: "
                         "unknown request action %d\n", action));
                return USBD_NOT_STARTED;
        }

        req.bRequest = action;
        USETW2(req.wValue, control, 0);
        USETW2(req.wIndex, 0, vs->vs_ifaceno);

        return (usbd_do_request_flags(vs->vs_parent->sc_udev, &req, data,
                                      0, 0,
                                      USBD_DEFAULT_TIMEOUT));
}

static void
uvideo_init_probe_data(uvideo_probe_and_commit_data_t *probe)
{
        /* all zeroes tells camera to choose what it wants */
        memset(probe, 0, sizeof(*probe));
}


#ifdef _MODULE

MODULE(MODULE_CLASS_DRIVER, uvideo, NULL);
static const struct cfiattrdata videobuscf_iattrdata = {
        "videobus", 0, {
                { NULL, NULL, 0 },
        }
};
static const struct cfiattrdata * const uvideo_attrs[] = {
        &videobuscf_iattrdata, NULL
};
CFDRIVER_DECL(uvideo, DV_DULL, uvideo_attrs);
extern struct cfattach uvideo_ca;
extern struct cfattach uvideo_ca;
static int uvideoloc[6] = { -1, -1, -1, -1, -1, -1 };
static struct cfparent uhubparent = {
        "usbifif", NULL, DVUNIT_ANY
};
static struct cfdata uvideo_cfdata[] = {
        {
                .cf_name = "uvideo",
                .cf_atname = "uvideo",
                .cf_unit = 0,
                .cf_fstate = FSTATE_STAR,
                .cf_loc = uvideoloc,
                .cf_flags = 0,
                .cf_pspec = &uhubparent,
        },
        { NULL, NULL, 0, 0, NULL, 0, NULL },
};

static int
uvideo_modcmd(modcmd_t cmd, void *arg)
{
        int err;


        switch (cmd) {
        case MODULE_CMD_INIT:
                DPRINTF(("uvideo: attempting to load\n"));

                err = config_cfdriver_attach(&uvideo_cd);
                if (err)
                        return err;
                err = config_cfattach_attach("uvideo", &uvideo_ca);
                if (err) {
                        config_cfdriver_detach(&uvideo_cd);
                        return err;
                }
                err = config_cfdata_attach(uvideo_cfdata, 1);
                if (err) {
                        config_cfattach_detach("uvideo", &uvideo_ca);
                        config_cfdriver_detach(&uvideo_cd);
                        return err;
                }
                DPRINTF(("uvideo: loaded module\n"));
                return 0;
        case MODULE_CMD_FINI:
                DPRINTF(("uvideo: attempting to unload module\n"));
                err = config_cfdata_detach(uvideo_cfdata);
                if (err)
                        return err;
                config_cfattach_detach("uvideo", &uvideo_ca);
                config_cfdriver_detach(&uvideo_cd);
                DPRINTF(("uvideo: module unload\n"));
                return 0;
        default:
                return ENOTTY;
        }
}

#endif        /* _MODULE */


#ifdef UVIDEO_DEBUG
/*
 * Some functions to print out descriptors.  Mostly useless other than
 * debugging/exploration purposes.
 */


static void
print_bitmap(const uByte *start, uByte nbytes)
{
        int byte, bit;

        /* most significant first */
        for (byte = nbytes-1; byte >= 0; --byte) {
                if (byte < nbytes-1) printf("-");
                for (bit = 7; bit >= 0; --bit)
                        printf("%01d", (start[byte] >> bit) &1);
        }
}

static void
print_descriptor(const usb_descriptor_t *desc)
{
        static int current_class = -1;
        static int current_subclass = -1;

        if (desc->bDescriptorType == UDESC_INTERFACE) {
                const usb_interface_descriptor_t *id;

                if (desc->bLength < sizeof(*id)) {
                        printf("[truncated interface]\n");
                        return;
                }
                id = (const usb_interface_descriptor_t *)desc;
                current_class = id->bInterfaceClass;
                current_subclass = id->bInterfaceSubClass;
                print_interface_descriptor(id);
                printf("\n");
                return;
        }

        printf("  ");                /* indent */

        if (current_class == UICLASS_VIDEO) {
                switch (current_subclass) {
                case UISUBCLASS_VIDEOCONTROL:
                        print_vc_descriptor(desc);
                        break;
                case UISUBCLASS_VIDEOSTREAMING:
                        print_vs_descriptor(desc);
                        break;
                case UISUBCLASS_VIDEOCOLLECTION:
                        printf("uvc collection: len=%d type=0x%02x",
                            desc->bLength, desc->bDescriptorType);
                        break;
                }
        } else {
                printf("non uvc descriptor len=%d type=0x%02x",
                    desc->bLength, desc->bDescriptorType);
        }

        printf("\n");
}

static void
print_vc_descriptor(const usb_descriptor_t *desc)
{
        const uvideo_descriptor_t *vcdesc;

        printf("VC ");

        switch (desc->bDescriptorType) {
        case UDESC_ENDPOINT:
                if (desc->bLength < sizeof(usb_endpoint_descriptor_t)) {
                        printf("[truncated endpoint]");
                        break;
                }
                print_endpoint_descriptor(
                        (const usb_endpoint_descriptor_t *)desc);
                break;
        case UDESC_CS_INTERFACE:
                if (desc->bLength < sizeof(*vcdesc)) {
                        printf("[truncated class-specific]");
                        break;
                }
                vcdesc = (const uvideo_descriptor_t *)desc;
                switch (vcdesc->bDescriptorSubtype) {
                case UDESC_VC_HEADER:
                        if (desc->bLength <
                            sizeof(uvideo_vc_header_descriptor_t)) {
                                printf("[truncated videocontrol header]");
                                break;
                        }
                        print_vc_header_descriptor(
                          (const uvideo_vc_header_descriptor_t *)
                                vcdesc);
                        break;
                case UDESC_INPUT_TERMINAL:
                        if (desc->bLength <
                            sizeof(uvideo_input_terminal_descriptor_t)) {
                                printf("[truncated input terminal]");
                                break;
                        }
                        switch (UGETW(
                           ((const uvideo_input_terminal_descriptor_t *)
                                    vcdesc)->wTerminalType)) {
                        case UVIDEO_ITT_CAMERA:
                                if (desc->bLength <
                                    sizeof(uvideo_camera_terminal_descriptor_t)) {
                                        printf("[truncated camera terminal]");
                                        break;
                                }
                                print_camera_terminal_descriptor(
                          (const uvideo_camera_terminal_descriptor_t *)vcdesc);
                                break;
                        default:
                                print_input_terminal_descriptor(
                          (const uvideo_input_terminal_descriptor_t *)vcdesc);
                                break;
                        }
                        break;
                case UDESC_OUTPUT_TERMINAL:
                        if (desc->bLength <
                            sizeof(uvideo_output_terminal_descriptor_t)) {
                                printf("[truncated output terminal]");
                                break;
                        }
                        print_output_terminal_descriptor(
                                (const uvideo_output_terminal_descriptor_t *)
                                vcdesc);
                        break;
                case UDESC_SELECTOR_UNIT:
                        if (desc->bLength <
                            sizeof(uvideo_selector_unit_descriptor_t)) {
                                printf("[truncated selector unit]");
                                break;
                        }
                        print_selector_unit_descriptor(
                                (const uvideo_selector_unit_descriptor_t *)
                                vcdesc);
                        break;
                case UDESC_PROCESSING_UNIT:
                        if (desc->bLength <
                            sizeof(uvideo_processing_unit_descriptor_t)) {
                                printf("[truncated processing unit]");
                                break;
                        }
                        print_processing_unit_descriptor(
                                (const uvideo_processing_unit_descriptor_t *)
                                vcdesc);
                        break;
                case UDESC_EXTENSION_UNIT:
                        if (desc->bLength <
                            sizeof(uvideo_extension_unit_descriptor_t)) {
                                printf("[truncated extension unit]");
                                break;
                        }
                        print_extension_unit_descriptor(
                                (const uvideo_extension_unit_descriptor_t *)
                                vcdesc);
                        break;
                default:
                        printf("class specific interface "
                            "len=%d type=0x%02x subtype=0x%02x",
                            vcdesc->bLength,
                            vcdesc->bDescriptorType,
                            vcdesc->bDescriptorSubtype);
                        break;
                }
                break;
        case UDESC_CS_ENDPOINT:
                if (desc->bLength < sizeof(*vcdesc)) {
                        printf("[truncated class-specific]");
                        break;
                }
                vcdesc = (const uvideo_descriptor_t *)desc;
                switch (vcdesc->bDescriptorSubtype) {
                case UDESC_VC_INTERRUPT_ENDPOINT:
                        if (desc->bLength <
                            sizeof(uvideo_vc_interrupt_endpoint_descriptor_t)) {
                                printf("[truncated "
                                    "videocontrol interrupt endpoint]");
                                break;
                        }
                        print_interrupt_endpoint_descriptor(
                            (const uvideo_vc_interrupt_endpoint_descriptor_t *)
                                vcdesc);
                        break;
                default:
                        printf("class specific endpoint "
                            "len=%d type=0x%02x subtype=0x%02x",
                            vcdesc->bLength,
                            vcdesc->bDescriptorType,
                            vcdesc->bDescriptorSubtype);
                        break;
                }
                break;
        default:
                printf("unknown: len=%d type=0x%02x",
                    desc->bLength, desc->bDescriptorType);
                break;
        }
}

static void
print_vs_descriptor(const usb_descriptor_t *desc)
{
        const uvideo_descriptor_t * vsdesc;
        printf("VS ");

        switch (desc->bDescriptorType) {
        case UDESC_ENDPOINT:
                if (desc->bLength < sizeof(usb_endpoint_descriptor_t)) {
                        printf("[truncated endpoint]");
                        break;
                }
                print_endpoint_descriptor(
                        (const usb_endpoint_descriptor_t *)desc);
                break;
        case UDESC_CS_INTERFACE:
                if (desc->bLength < sizeof(*vsdesc)) {
                        printf("[truncated class-specific]");
                        break;
                }
                vsdesc = (const uvideo_descriptor_t *)desc;
                switch (vsdesc->bDescriptorSubtype) {
                case UDESC_VS_INPUT_HEADER:
                        if (desc->bLength <
                            sizeof(uvideo_vs_input_header_descriptor_t)) {
                                printf("[truncated videostream input header]");
                                break;
                        }
                        print_vs_input_header_descriptor(
                         (const uvideo_vs_input_header_descriptor_t *)
                                vsdesc);
                        break;
                case UDESC_VS_OUTPUT_HEADER:
                        if (desc->bLength <
                            sizeof(uvideo_vs_output_header_descriptor_t)) {
                                printf("[truncated "
                                    "videostream output header]");
                                break;
                        }
                        print_vs_output_header_descriptor(
                        (const uvideo_vs_output_header_descriptor_t *)
                                vsdesc);
                        break;
                case UDESC_VS_FORMAT_UNCOMPRESSED:
                        if (desc->bLength <
                            sizeof(uvideo_vs_format_uncompressed_descriptor_t))
                        {
                                printf("[truncated "
                                    "videostream format uncompressed]");
                                break;
                        }
                        print_vs_format_uncompressed_descriptor(
                           (const uvideo_vs_format_uncompressed_descriptor_t *)
                                vsdesc);
                        break;
                case UDESC_VS_FRAME_UNCOMPRESSED:
                        if (desc->bLength <
                            sizeof(uvideo_vs_frame_uncompressed_descriptor_t))
                        {
                                printf("[truncated "
                                    "videostream frame uncompressed]");
                                break;
                        }
                        print_vs_frame_uncompressed_descriptor(
                            (const uvideo_vs_frame_uncompressed_descriptor_t *)
                                vsdesc);
                        break;
                case UDESC_VS_FORMAT_MJPEG:
                        if (desc->bLength <
                            sizeof(uvideo_vs_format_mjpeg_descriptor_t)) {
                                printf("[truncated videostream format mjpeg]");
                                break;
                        }
                        print_vs_format_mjpeg_descriptor(
                                (const uvideo_vs_format_mjpeg_descriptor_t *)
                                vsdesc);
                        break;
                case UDESC_VS_FRAME_MJPEG:
                        if (desc->bLength <
                            sizeof(uvideo_vs_frame_mjpeg_descriptor_t)) {
                                printf("[truncated videostream frame mjpeg]");
                                break;
                        }
                        print_vs_frame_mjpeg_descriptor(
                                (const uvideo_vs_frame_mjpeg_descriptor_t *)
                                vsdesc);
                        break;
                case UDESC_VS_FORMAT_DV:
                        if (desc->bLength <
                            sizeof(uvideo_vs_format_dv_descriptor_t)) {
                                printf("[truncated videostream format dv]");
                                break;
                        }
                        print_vs_format_dv_descriptor(
                                (const uvideo_vs_format_dv_descriptor_t *)
                                vsdesc);
                        break;
                default:
                        printf("unknown cs interface: len=%d type=0x%02x "
                            "subtype=0x%02x",
                            vsdesc->bLength, vsdesc->bDescriptorType,
                            vsdesc->bDescriptorSubtype);
                }
                break;
        default:
                printf("unknown: len=%d type=0x%02x",
                    desc->bLength, desc->bDescriptorType);
                break;
        }
}

static void
print_interface_descriptor(const usb_interface_descriptor_t *id)
{
        printf("Interface: Len=%d Type=0x%02x "
            "bInterfaceNumber=0x%02x "
            "bAlternateSetting=0x%02x bNumEndpoints=0x%02x "
            "bInterfaceClass=0x%02x bInterfaceSubClass=0x%02x "
            "bInterfaceProtocol=0x%02x iInterface=0x%02x",
            id->bLength,
            id->bDescriptorType,
            id->bInterfaceNumber,
            id->bAlternateSetting,
            id->bNumEndpoints,
            id->bInterfaceClass,
            id->bInterfaceSubClass,
            id->bInterfaceProtocol,
            id->iInterface);
}

static void
print_endpoint_descriptor(const usb_endpoint_descriptor_t *desc)
{
        printf("Endpoint: Len=%d Type=0x%02x "
            "bEndpointAddress=0x%02x ",
            desc->bLength,
            desc->bDescriptorType,
            desc->bEndpointAddress);
        printf("bmAttributes=");
        print_bitmap(&desc->bmAttributes, 1);
        printf(" wMaxPacketSize=%d bInterval=%d",
            UGETW(desc->wMaxPacketSize),
            desc->bInterval);
}

static void
print_vc_header_descriptor(
        const uvideo_vc_header_descriptor_t *desc)
{
        printf("Interface Header: "
            "Len=%d Type=0x%02x Subtype=0x%02x "
            "bcdUVC=%d wTotalLength=%d "
            "dwClockFrequency=%u bInCollection=%d",
            desc->bLength,
            desc->bDescriptorType,
            desc->bDescriptorSubtype,
            UGETW(desc->bcdUVC),
            UGETW(desc->wTotalLength),
            UGETDW(desc->dwClockFrequency),
            desc->bInCollection);
}

static void
print_input_terminal_descriptor(
        const uvideo_input_terminal_descriptor_t *desc)
{
        printf("Input Terminal: "
            "Len=%d Type=0x%02x Subtype=0x%02x "
            "bTerminalID=%d wTerminalType=%x bAssocTerminal=%d "
            "iTerminal=%d",
            desc->bLength,
            desc->bDescriptorType,
            desc->bDescriptorSubtype,
            desc->bTerminalID,
            UGETW(desc->wTerminalType),
            desc->bAssocTerminal,
            desc->iTerminal);
}

static void
print_output_terminal_descriptor(
        const uvideo_output_terminal_descriptor_t *desc)
{
        printf("Output Terminal: "
            "Len=%d Type=0x%02x Subtype=0x%02x "
            "bTerminalID=%d wTerminalType=%x bAssocTerminal=%d "
            "bSourceID=%d iTerminal=%d",
            desc->bLength,
            desc->bDescriptorType,
            desc->bDescriptorSubtype,
            desc->bTerminalID,
            UGETW(desc->wTerminalType),
            desc->bAssocTerminal,
            desc->bSourceID,
            desc->iTerminal);
}

static void
print_camera_terminal_descriptor(
        const uvideo_camera_terminal_descriptor_t *desc)
{
        printf("Camera Terminal: "
            "Len=%d Type=0x%02x Subtype=0x%02x "
            "bTerminalID=%d wTerminalType=%x bAssocTerminal=%d "
            "iTerminal=%d "
            "wObjectiveFocalLengthMin/Max=%d/%d "
            "wOcularFocalLength=%d "
            "bControlSize=%d ",
            desc->bLength,
            desc->bDescriptorType,
            desc->bDescriptorSubtype,
            desc->bTerminalID,
            UGETW(desc->wTerminalType),
            desc->bAssocTerminal,
            desc->iTerminal,
            UGETW(desc->wObjectiveFocalLengthMin),
            UGETW(desc->wObjectiveFocalLengthMax),
            UGETW(desc->wOcularFocalLength),
            desc->bControlSize);
        printf("bmControls=");
        print_bitmap(desc->bmControls, desc->bControlSize);
}

static void
print_selector_unit_descriptor(
        const uvideo_selector_unit_descriptor_t *desc)
{
        int i;
        const uByte *b;
        printf("Selector Unit: "
            "Len=%d Type=0x%02x Subtype=0x%02x "
            "bUnitID=%d bNrInPins=%d ",
            desc->bLength,
            desc->bDescriptorType,
            desc->bDescriptorSubtype,
            desc->bUnitID,
            desc->bNrInPins);
        printf("baSourceIDs=");
        b = &desc->baSourceID[0];
        for (i = 0; i < desc->bNrInPins; ++i)
                printf("%d ", *b++);
        printf("iSelector=%d", *b);
}

static void
print_processing_unit_descriptor(
        const uvideo_processing_unit_descriptor_t *desc)
{
        const uByte *b;

        printf("Processing Unit: "
            "Len=%d Type=0x%02x Subtype=0x%02x "
            "bUnitID=%d bSourceID=%d wMaxMultiplier=%d bControlSize=%d ",
            desc->bLength,
            desc->bDescriptorType,
            desc->bDescriptorSubtype,
            desc->bUnitID,
            desc->bSourceID,
            UGETW(desc->wMaxMultiplier),
            desc->bControlSize);
        printf("bmControls=");
        print_bitmap(desc->bmControls, desc->bControlSize);
        b = &desc->bControlSize + desc->bControlSize + 1;
        printf(" iProcessing=%d bmVideoStandards=", *b);
        b += 1;
        print_bitmap(b, 1);
}

static void
print_extension_unit_descriptor(
        const uvideo_extension_unit_descriptor_t *desc)
{
        const uByte * byte;
        uByte controlbytes;
        int i;

        printf("Extension Unit: "
            "Len=%d Type=0x%02x Subtype=0x%02x "
            "bUnitID=%d ",
            desc->bLength,
            desc->bDescriptorType,
            desc->bDescriptorSubtype,
            desc->bUnitID);

        printf("guidExtensionCode=");
        usb_guid_print(&desc->guidExtensionCode);
        printf(" ");

        printf("bNumControls=%d bNrInPins=%d ",
            desc->bNumControls,
            desc->bNrInPins);

        printf("baSourceIDs=");
        byte = &desc->baSourceID[0];
        for (i = 0; i < desc->bNrInPins; ++i)
                printf("%d ", *byte++);

        controlbytes = *byte++;
        printf("bControlSize=%d ", controlbytes);
        printf("bmControls=");
        print_bitmap(byte, controlbytes);

        byte += controlbytes;
        printf(" iExtension=%d", *byte);
}

static void
print_interrupt_endpoint_descriptor(
        const uvideo_vc_interrupt_endpoint_descriptor_t *desc)
{
        printf("Interrupt Endpoint: "
            "Len=%d Type=0x%02x Subtype=0x%02x "
            "wMaxTransferSize=%d ",
            desc->bLength,
            desc->bDescriptorType,
            desc->bDescriptorSubtype,
            UGETW(desc->wMaxTransferSize));
}


static void
print_vs_output_header_descriptor(
        const uvideo_vs_output_header_descriptor_t *desc)
{
        printf("Interface Output Header: "
            "Len=%d Type=0x%02x Subtype=0x%02x "
            "bNumFormats=%d wTotalLength=%d bEndpointAddress=%d "
            "bTerminalLink=%d bControlSize=%d",
            desc->bLength,
            desc->bDescriptorType,
            desc->bDescriptorSubtype,
            desc->bNumFormats,
            UGETW(desc->wTotalLength),
            desc->bEndpointAddress,
            desc->bTerminalLink,
            desc->bControlSize);
}

static void
print_vs_input_header_descriptor(
        const uvideo_vs_input_header_descriptor_t *desc)
{
        printf("Interface Input Header: "
            "Len=%d Type=0x%02x Subtype=0x%02x "
            "bNumFormats=%d wTotalLength=%d bEndpointAddress=%d "
            "bmInfo=%x bTerminalLink=%d bStillCaptureMethod=%d "
            "bTriggerSupport=%d bTriggerUsage=%d bControlSize=%d ",
            desc->bLength,
            desc->bDescriptorType,
            desc->bDescriptorSubtype,
            desc->bNumFormats,
            UGETW(desc->wTotalLength),
            desc->bEndpointAddress,
            desc->bmInfo,
            desc->bTerminalLink,
            desc->bStillCaptureMethod,
            desc->bTriggerSupport,
            desc->bTriggerUsage,
            desc->bControlSize);
        print_bitmap(desc->bmaControls, desc->bControlSize);
}

static void
print_vs_format_uncompressed_descriptor(
        const uvideo_vs_format_uncompressed_descriptor_t *desc)
{
        printf("Format Uncompressed: "
            "Len=%d Type=0x%02x Subtype=0x%02x "
            "bFormatIndex=%d bNumFrameDescriptors=%d ",
            desc->bLength,
            desc->bDescriptorType,
            desc->bDescriptorSubtype,
            desc->bFormatIndex,
            desc->bNumFrameDescriptors);
        usb_guid_print(&desc->guidFormat);
        printf(" bBitsPerPixel=%d bDefaultFrameIndex=%d "
            "bAspectRatioX=%d bAspectRatioY=%d "
            "bmInterlaceFlags=0x%02x bCopyProtect=%d",
            desc->bBitsPerPixel,
            desc->bDefaultFrameIndex,
            desc->bAspectRatioX,
            desc->bAspectRatioY,
            desc->bmInterlaceFlags,
            desc->bCopyProtect);
}

static void
print_vs_frame_uncompressed_descriptor(
        const uvideo_vs_frame_uncompressed_descriptor_t *desc)
{
        printf("Frame Uncompressed: "
            "Len=%d Type=0x%02x Subtype=0x%02x "
            "bFrameIndex=%d bmCapabilities=0x%02x "
            "wWidth=%d wHeight=%d dwMinBitRate=%u dwMaxBitRate=%u "
            "dwMaxVideoFrameBufferSize=%u dwDefaultFrameInterval=%u "
            "bFrameIntervalType=%d",
            desc->bLength,
            desc->bDescriptorType,
            desc->bDescriptorSubtype,
            desc->bFrameIndex,
            desc->bmCapabilities,
            UGETW(desc->wWidth),
            UGETW(desc->wHeight),
            UGETDW(desc->dwMinBitRate),
            UGETDW(desc->dwMaxBitRate),
            UGETDW(desc->dwMaxVideoFrameBufferSize),
            UGETDW(desc->dwDefaultFrameInterval),
            desc->bFrameIntervalType);
}

static void
print_vs_format_mjpeg_descriptor(
        const uvideo_vs_format_mjpeg_descriptor_t *desc)
{
        printf("MJPEG format: "
            "Len=%d Type=0x%02x Subtype=0x%02x "
            "bFormatIndex=%d bNumFrameDescriptors=%d bmFlags=0x%02x "
            "bDefaultFrameIndex=%d bAspectRatioX=%d bAspectRatioY=%d "
            "bmInterlaceFlags=0x%02x bCopyProtect=%d",
            desc->bLength,
            desc->bDescriptorType,
            desc->bDescriptorSubtype,
            desc->bFormatIndex,
            desc->bNumFrameDescriptors,
            desc->bmFlags,
            desc->bDefaultFrameIndex,
            desc->bAspectRatioX,
            desc->bAspectRatioY,
            desc->bmInterlaceFlags,
            desc->bCopyProtect);
}

static void
print_vs_frame_mjpeg_descriptor(
        const uvideo_vs_frame_mjpeg_descriptor_t *desc)
{
        printf("MJPEG frame: "
            "Len=%d Type=0x%02x Subtype=0x%02x "
            "bFrameIndex=%d bmCapabilities=0x%02x "
            "wWidth=%d wHeight=%d dwMinBitRate=%u dwMaxBitRate=%u "
            "dwMaxVideoFrameBufferSize=%u dwDefaultFrameInterval=%u "
            "bFrameIntervalType=%d",
            desc->bLength,
            desc->bDescriptorType,
            desc->bDescriptorSubtype,
            desc->bFrameIndex,
            desc->bmCapabilities,
            UGETW(desc->wWidth),
            UGETW(desc->wHeight),
            UGETDW(desc->dwMinBitRate),
            UGETDW(desc->dwMaxBitRate),
            UGETDW(desc->dwMaxVideoFrameBufferSize),
            UGETDW(desc->dwDefaultFrameInterval),
            desc->bFrameIntervalType);
}

static void
print_vs_format_dv_descriptor(
        const uvideo_vs_format_dv_descriptor_t *desc)
{
        printf("MJPEG format: "
            "Len=%d Type=0x%02x Subtype=0x%02x "
            "bFormatIndex=%d dwMaxVideoFrameBufferSize=%u "
            "bFormatType/Rate=%d bFormatType/Format=%d",
            desc->bLength,
            desc->bDescriptorType,
            desc->bDescriptorSubtype,
            desc->bFormatIndex,
            UGETDW(desc->dwMaxVideoFrameBufferSize),
            UVIDEO_GET_DV_FREQ(desc->bFormatType),
            UVIDEO_GET_DV_FORMAT(desc->bFormatType));
}

#endif /* !UVIDEO_DEBUG */

#ifdef UVIDEO_DEBUG
static void
usb_guid_print(const usb_guid_t *guid)
{
        printf("%04X-%02X-%02X-",
               UGETDW(guid->data1),
               UGETW(guid->data2),
               UGETW(guid->data3));
        printf("%02X%02X-",
               guid->data4[0],
               guid->data4[1]);
        printf("%02X%02X%02X%02X%02X%02X",
               guid->data4[2],
               guid->data4[3],
               guid->data4[4],
               guid->data4[5],
               guid->data4[6],
               guid->data4[7]);
}
#endif /* !UVIDEO_DEBUG */

/*
 * Returns less than zero, zero, or greater than zero if uguid is less
 * than, equal to, or greater than guid.
 */
static int
usb_guid_cmp(const usb_guid_t *uguid, const guid_t *guid)
{
        if (guid->data1 > UGETDW(uguid->data1))
                return 1;
        else if (guid->data1 < UGETDW(uguid->data1))
                return -1;

        if (guid->data2 > UGETW(uguid->data2))
                return 1;
        else if (guid->data2 < UGETW(uguid->data2))
                return -1;

        if (guid->data3 > UGETW(uguid->data3))
                return 1;
        else if (guid->data3 < UGETW(uguid->data3))
                return -1;

        return memcmp(guid->data4, uguid->data4, 8);
}

























































































































































































































    1 












    1 











    1 





































































    3 


    2 

    3 

    3 
















    2 
















    2 
























    2 




    2 


    2 




    2 

    2 


















    2 













































































































    1 












    1 





























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
/*        $NetBSD: ugen.c,v 1.168 2021/09/26 01:16:09 thorpej Exp $        */

/*
 * Copyright (c) 1998, 2004 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology.
 *
 * Copyright (c) 2006 BBN Technologies Corp.  All rights reserved.
 * Effort sponsored in part by the Defense Advanced Research Projects
 * Agency (DARPA) and the Department of the Interior National Business
 * Center under agreement number NBCHC050166.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */


#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ugen.c,v 1.168 2021/09/26 01:16:09 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/conf.h>
#include <sys/tty.h>
#include <sys/file.h>
#include <sys/select.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/poll.h>
#include <sys/compat_stub.h>
#include <sys/module.h>
#include <sys/rbtree.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>

#include "ioconf.h"

#ifdef UGEN_DEBUG
#define DPRINTF(x)        if (ugendebug) printf x
#define DPRINTFN(n,x)        if (ugendebug>(n)) printf x
int        ugendebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

#define        UGEN_CHUNK        128        /* chunk size for read */
#define        UGEN_IBSIZE        1020        /* buffer size */
#define        UGEN_BBSIZE        1024

#define UGEN_NISOREQS        4        /* number of outstanding xfer requests */
#define UGEN_NISORFRMS        8        /* number of transactions per req */
#define UGEN_NISOFRAMES        (UGEN_NISORFRMS * UGEN_NISOREQS)

#define UGEN_BULK_RA_WB_BUFSIZE        16384                /* default buffer size */
#define UGEN_BULK_RA_WB_BUFMAX        (1 << 20)        /* maximum allowed buffer */

struct isoreq {
        struct ugen_endpoint *sce;
        struct usbd_xfer *xfer;
        void *dmabuf;
        uint16_t sizes[UGEN_NISORFRMS];
};

struct ugen_endpoint {
        struct ugen_softc *sc;
        usb_endpoint_descriptor_t *edesc;
        struct usbd_interface *iface;
        int state;
#define UGEN_SHORT_OK        0x04        /* short xfers are OK */
#define UGEN_BULK_RA        0x08        /* in bulk read-ahead mode */
#define UGEN_BULK_WB        0x10        /* in bulk write-behind mode */
#define UGEN_RA_WB_STOP        0x20        /* RA/WB xfer is stopped (buffer full/empty) */
        struct usbd_pipe *pipeh;
        struct clist q;
        u_char *ibuf;                /* start of buffer (circular for isoc) */
        u_char *fill;                /* location for input (isoc) */
        u_char *limit;                /* end of circular buffer (isoc) */
        u_char *cur;                /* current read location (isoc) */
        uint32_t timeout;
        uint32_t ra_wb_bufsize; /* requested size for RA/WB buffer */
        uint32_t ra_wb_reqsize; /* requested xfer length for RA/WB */
        uint32_t ra_wb_used;         /* how much is in buffer */
        uint32_t ra_wb_xferlen; /* current xfer length for RA/WB */
        struct usbd_xfer *ra_wb_xfer;
        struct isoreq isoreqs[UGEN_NISOREQS];
        /* Keep these last; we don't overwrite them in ugen_set_config() */
#define UGEN_ENDPOINT_NONZERO_CRUFT        offsetof(struct ugen_endpoint, rsel)
        struct selinfo rsel;
        kcondvar_t cv;
};

struct ugen_softc {
        device_t sc_dev;                /* base device */
        struct usbd_device *sc_udev;
        struct rb_node sc_node;
        unsigned sc_unit;

        kmutex_t                sc_lock;
        kcondvar_t                sc_detach_cv;

        char sc_is_open[USB_MAX_ENDPOINTS];
        struct ugen_endpoint sc_endpoints[USB_MAX_ENDPOINTS][2];
#define OUT 0
#define IN  1

        int sc_refcnt;
        char sc_buffer[UGEN_BBSIZE];
        u_char sc_dying;
        u_char sc_attached;
};

static struct {
        kmutex_t        lock;
        rb_tree_t        tree;
} ugenif __cacheline_aligned;

static int
compare_ugen(void *cookie, const void *vsca, const void *vscb)
{
        const struct ugen_softc *sca = vsca;
        const struct ugen_softc *scb = vscb;

        if (sca->sc_unit < scb->sc_unit)
                return -1;
        if (sca->sc_unit > scb->sc_unit)
                return +1;
        return 0;
}

static int
compare_ugen_key(void *cookie, const void *vsc, const void *vk)
{
        const struct ugen_softc *sc = vsc;
        const unsigned *k = vk;

        if (sc->sc_unit < *k)
                return -1;
        if (sc->sc_unit > *k)
                return +1;
        return 0;
}

static const rb_tree_ops_t ugenif_tree_ops = {
        .rbto_compare_nodes = compare_ugen,
        .rbto_compare_key = compare_ugen_key,
        .rbto_node_offset = offsetof(struct ugen_softc, sc_node),
};

static void
ugenif_get_unit(struct ugen_softc *sc)
{
        struct ugen_softc *sc0;
        unsigned i;

        mutex_enter(&ugenif.lock);
        for (i = 0, sc0 = RB_TREE_MIN(&ugenif.tree);
             sc0 != NULL && i == sc0->sc_unit;
             i++, sc0 = RB_TREE_NEXT(&ugenif.tree, sc0))
                KASSERT(i < UINT_MAX);
        KASSERT(rb_tree_find_node(&ugenif.tree, &i) == NULL);
        sc->sc_unit = i;
        sc0 = rb_tree_insert_node(&ugenif.tree, sc);
        KASSERT(sc0 == sc);
        KASSERT(rb_tree_find_node(&ugenif.tree, &i) == sc);
        mutex_exit(&ugenif.lock);
}

static void
ugenif_put_unit(struct ugen_softc *sc)
{

        mutex_enter(&ugenif.lock);
        KASSERT(rb_tree_find_node(&ugenif.tree, &sc->sc_unit) == sc);
        rb_tree_remove_node(&ugenif.tree, sc);
        sc->sc_unit = -1;
        mutex_exit(&ugenif.lock);
}

static struct ugen_softc *
ugenif_acquire(unsigned unit)
{
        struct ugen_softc *sc;

        mutex_enter(&ugenif.lock);
        sc = rb_tree_find_node(&ugenif.tree, &unit);
        if (sc == NULL)
                goto out;
        mutex_enter(&sc->sc_lock);
        if (sc->sc_dying) {
                mutex_exit(&sc->sc_lock);
                sc = NULL;
                goto out;
        }
        KASSERT(sc->sc_refcnt < INT_MAX);
        sc->sc_refcnt++;
        mutex_exit(&sc->sc_lock);
out:        mutex_exit(&ugenif.lock);

        return sc;
}

static void
ugenif_release(struct ugen_softc *sc)
{

        mutex_enter(&sc->sc_lock);
        if (--sc->sc_refcnt < 0)
                cv_broadcast(&sc->sc_detach_cv);
        mutex_exit(&sc->sc_lock);
}

static dev_type_open(ugenopen);
static dev_type_close(ugenclose);
static dev_type_read(ugenread);
static dev_type_write(ugenwrite);
static dev_type_ioctl(ugenioctl);
static dev_type_poll(ugenpoll);
static dev_type_kqfilter(ugenkqfilter);

const struct cdevsw ugen_cdevsw = {
        .d_open = ugenopen,
        .d_close = ugenclose,
        .d_read = ugenread,
        .d_write = ugenwrite,
        .d_ioctl = ugenioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = ugenpoll,
        .d_mmap = nommap,
        .d_kqfilter = ugenkqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER,
};

Static void ugenintr(struct usbd_xfer *, void *,
                     usbd_status);
Static void ugen_isoc_rintr(struct usbd_xfer *, void *,
                            usbd_status);
Static void ugen_bulkra_intr(struct usbd_xfer *, void *,
                             usbd_status);
Static void ugen_bulkwb_intr(struct usbd_xfer *, void *,
                             usbd_status);
Static int ugen_do_read(struct ugen_softc *, int, struct uio *, int);
Static int ugen_do_write(struct ugen_softc *, int, struct uio *, int);
Static int ugen_do_ioctl(struct ugen_softc *, int, u_long,
                         void *, int, struct lwp *);
Static int ugen_set_config(struct ugen_softc *, int, int);
Static usb_config_descriptor_t *ugen_get_cdesc(struct ugen_softc *,
                                               int, int *);
Static usbd_status ugen_set_interface(struct ugen_softc *, int, int);
Static int ugen_get_alt_index(struct ugen_softc *, int);
Static void ugen_clear_endpoints(struct ugen_softc *);

#define UGENUNIT(n) ((minor(n) >> 4) & 0xf)
#define UGENENDPOINT(n) (minor(n) & 0xf)
#define UGENDEV(u, e) (makedev(0, ((u) << 4) | (e)))

static int        ugenif_match(device_t, cfdata_t, void *);
static void        ugenif_attach(device_t, device_t, void *);
static int        ugen_match(device_t, cfdata_t, void *);
static void        ugen_attach(device_t, device_t, void *);
static int        ugen_detach(device_t, int);
static int        ugen_activate(device_t, enum devact);

CFATTACH_DECL_NEW(ugen, sizeof(struct ugen_softc), ugen_match,
    ugen_attach, ugen_detach, ugen_activate);
CFATTACH_DECL_NEW(ugenif, sizeof(struct ugen_softc), ugenif_match,
    ugenif_attach, ugen_detach, ugen_activate);

/* toggle to control attach priority. -1 means "let autoconf decide" */
int ugen_override = -1;

static int
ugen_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;
        int override;

        if (ugen_override != -1)
                override = ugen_override;
        else
                override = match->cf_flags & 1;

        if (override)
                return UMATCH_HIGHEST;
        else if (uaa->uaa_usegeneric)
                return UMATCH_GENERIC;
        else
                return UMATCH_NONE;
}

static int
ugenif_match(device_t parent, cfdata_t match, void *aux)
{
        /* Assume that they knew what they configured! (see ugenif(4)) */
        return UMATCH_HIGHEST;
}

static void
ugen_attach(device_t parent, device_t self, void *aux)
{
        struct usb_attach_arg *uaa = aux;
        struct usbif_attach_arg uiaa;

        memset(&uiaa, 0, sizeof(uiaa));
        uiaa.uiaa_port = uaa->uaa_port;
        uiaa.uiaa_vendor = uaa->uaa_vendor;
        uiaa.uiaa_product = uaa->uaa_product;
        uiaa.uiaa_release = uaa->uaa_release;
        uiaa.uiaa_device = uaa->uaa_device;
        uiaa.uiaa_configno = -1;
        uiaa.uiaa_ifaceno = -1;

        ugenif_attach(parent, self, &uiaa);
}

static void
ugenif_attach(device_t parent, device_t self, void *aux)
{
        struct ugen_softc *sc = device_private(self);
        struct usbif_attach_arg *uiaa = aux;
        struct usbd_device *udev;
        char *devinfop;
        usbd_status err;
        int i, dir, conf;

        aprint_naive("\n");
        aprint_normal("\n");

        mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_SOFTUSB);
        cv_init(&sc->sc_detach_cv, "ugendet");

        devinfop = usbd_devinfo_alloc(uiaa->uiaa_device, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        sc->sc_dev = self;
        sc->sc_udev = udev = uiaa->uiaa_device;

        for (i = 0; i < USB_MAX_ENDPOINTS; i++) {
                for (dir = OUT; dir <= IN; dir++) {
                        struct ugen_endpoint *sce;

                        sce = &sc->sc_endpoints[i][dir];
                        selinit(&sce->rsel);
                        cv_init(&sce->cv, "ugensce");
                }
        }

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        if (uiaa->uiaa_ifaceno < 0) {
                /*
                 * If we attach the whole device,
                 * set configuration index 0, the default one.
                 */
                err = usbd_set_config_index(udev, 0, 0);
                if (err) {
                        aprint_error_dev(self,
                            "setting configuration index 0 failed\n");
                        return;
                }
        }

        /* Get current configuration */
        conf = usbd_get_config_descriptor(udev)->bConfigurationValue;

        /* Set up all the local state for this configuration. */
        err = ugen_set_config(sc, conf, uiaa->uiaa_ifaceno < 0);
        if (err) {
                aprint_error_dev(self, "setting configuration %d failed\n",
                    conf);
                return;
        }

        ugenif_get_unit(sc);
        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);
        sc->sc_attached = 1;
}

Static void
ugen_clear_endpoints(struct ugen_softc *sc)
{

        /* Clear out the old info, but leave the selinfo and cv initialised. */
        for (int i = 0; i < USB_MAX_ENDPOINTS; i++) {
                for (int dir = OUT; dir <= IN; dir++) {
                        struct ugen_endpoint *sce = &sc->sc_endpoints[i][dir];
                        memset(sce, 0, UGEN_ENDPOINT_NONZERO_CRUFT);
                }
        }
}

Static int
ugen_set_config(struct ugen_softc *sc, int configno, int chkopen)
{
        struct usbd_device *dev = sc->sc_udev;
        usb_config_descriptor_t *cdesc;
        struct usbd_interface *iface;
        usb_endpoint_descriptor_t *ed;
        struct ugen_endpoint *sce;
        uint8_t niface, nendpt;
        int ifaceno, endptno, endpt;
        usbd_status err;
        int dir;

        DPRINTFN(1,("ugen_set_config: %s to configno %d, sc=%p\n",
                    device_xname(sc->sc_dev), configno, sc));

        KASSERT(KERNEL_LOCKED_P()); /* sc_is_open */

        if (chkopen) {
                /*
                 * We start at 1, not 0, because we don't care whether the
                 * control endpoint is open or not. It is always present.
                 */
                for (endptno = 1; endptno < USB_MAX_ENDPOINTS; endptno++)
                        if (sc->sc_is_open[endptno]) {
                                DPRINTFN(1,
                                     ("ugen_set_config: %s - endpoint %d is open\n",
                                      device_xname(sc->sc_dev), endptno));
                                return USBD_IN_USE;
                        }

                /* Prevent opening while we're setting the config.  */
                for (endptno = 1; endptno < USB_MAX_ENDPOINTS; endptno++) {
                        KASSERT(!sc->sc_is_open[endptno]);
                        sc->sc_is_open[endptno] = 1;
                }
        }

        /* Avoid setting the current value. */
        cdesc = usbd_get_config_descriptor(dev);
        if (!cdesc || cdesc->bConfigurationValue != configno) {
                err = usbd_set_config_no(dev, configno, 1);
                if (err)
                        goto out;
        }

        ugen_clear_endpoints(sc);

        err = usbd_interface_count(dev, &niface);
        if (err)
                goto out;

        for (ifaceno = 0; ifaceno < niface; ifaceno++) {
                DPRINTFN(1,("ugen_set_config: ifaceno %d\n", ifaceno));
                err = usbd_device2interface_handle(dev, ifaceno, &iface);
                if (err)
                        goto out;
                err = usbd_endpoint_count(iface, &nendpt);
                if (err)
                        goto out;
                for (endptno = 0; endptno < nendpt; endptno++) {
                        ed = usbd_interface2endpoint_descriptor(iface,endptno);
                        KASSERT(ed != NULL);
                        endpt = ed->bEndpointAddress;
                        dir = UE_GET_DIR(endpt) == UE_DIR_IN ? IN : OUT;
                        sce = &sc->sc_endpoints[UE_GET_ADDR(endpt)][dir];
                        DPRINTFN(1,("ugen_set_config: endptno %d, endpt=0x%02x"
                                    "(%d,%d), sce=%p\n",
                                    endptno, endpt, UE_GET_ADDR(endpt),
                                    UE_GET_DIR(endpt), sce));
                        sce->sc = sc;
                        sce->edesc = ed;
                        sce->iface = iface;
                }
        }
        err = USBD_NORMAL_COMPLETION;

out:        if (chkopen) {
                /*
                 * Allow open again now that we're done trying to set
                 * the config.
                 */
                for (endptno = 1; endptno < USB_MAX_ENDPOINTS; endptno++) {
                        KASSERT(sc->sc_is_open[endptno]);
                        sc->sc_is_open[endptno] = 0;
                }
        }
        return err;
}

static int
ugenopen(dev_t dev, int flag, int mode, struct lwp *l)
{
        struct ugen_softc *sc;
        int unit = UGENUNIT(dev);
        int endpt = UGENENDPOINT(dev);
        usb_endpoint_descriptor_t *edesc;
        struct ugen_endpoint *sce;
        int dir, isize;
        usbd_status err;
        struct usbd_xfer *xfer;
        int i, j;
        int error;
        int opened;

        KASSERT(KERNEL_LOCKED_P()); /* sc_is_open */

        if ((sc = ugenif_acquire(unit)) == NULL)
                return ENXIO;

        DPRINTFN(5, ("ugenopen: flag=%d, mode=%d, unit=%d endpt=%d\n",
                     flag, mode, unit, endpt));

        /* The control endpoint allows multiple opens. */
        if (endpt == USB_CONTROL_ENDPOINT) {
                opened = sc->sc_is_open[USB_CONTROL_ENDPOINT] = 1;
                error = 0;
                goto out;
        }

        if (sc->sc_is_open[endpt]) {
                error = EBUSY;
                goto out;
        }
        opened = sc->sc_is_open[endpt] = 1;

        /* Make sure there are pipes for all directions. */
        for (dir = OUT; dir <= IN; dir++) {
                if (flag & (dir == OUT ? FWRITE : FREAD)) {
                        sce = &sc->sc_endpoints[endpt][dir];
                        if (sce->edesc == NULL) {
                                error = ENXIO;
                                goto out;
                        }
                }
        }

        /* Actually open the pipes. */
        /* XXX Should back out properly if it fails. */
        for (dir = OUT; dir <= IN; dir++) {
                if (!(flag & (dir == OUT ? FWRITE : FREAD)))
                        continue;
                sce = &sc->sc_endpoints[endpt][dir];
                sce->state = 0;
                sce->timeout = USBD_NO_TIMEOUT;
                DPRINTFN(5, ("ugenopen: sc=%p, endpt=%d, dir=%d, sce=%p\n",
                             sc, endpt, dir, sce));
                edesc = sce->edesc;
                switch (edesc->bmAttributes & UE_XFERTYPE) {
                case UE_INTERRUPT:
                        if (dir == OUT) {
                                err = usbd_open_pipe(sce->iface,
                                    edesc->bEndpointAddress, 0, &sce->pipeh);
                                if (err) {
                                        error = EIO;
                                        goto out;
                                }
                                break;
                        }
                        isize = UGETW(edesc->wMaxPacketSize);
                        if (isize == 0) {        /* shouldn't happen */
                                error = EINVAL;
                                goto out;
                        }
                        sce->ibuf = kmem_alloc(isize, KM_SLEEP);
                        DPRINTFN(5, ("ugenopen: intr endpt=%d,isize=%d\n",
                                     endpt, isize));
                        if (clalloc(&sce->q, UGEN_IBSIZE, 0) == -1) {
                                kmem_free(sce->ibuf, isize);
                                sce->ibuf = NULL;
                                error = ENOMEM;
                                goto out;
                        }
                        err = usbd_open_pipe_intr(sce->iface,
                                  edesc->bEndpointAddress,
                                  USBD_SHORT_XFER_OK, &sce->pipeh, sce,
                                  sce->ibuf, isize, ugenintr,
                                  USBD_DEFAULT_INTERVAL);
                        if (err) {
                                clfree(&sce->q);
                                kmem_free(sce->ibuf, isize);
                                sce->ibuf = NULL;
                                error = EIO;
                                goto out;
                        }
                        DPRINTFN(5, ("ugenopen: interrupt open done\n"));
                        break;
                case UE_BULK:
                        err = usbd_open_pipe(sce->iface,
                                  edesc->bEndpointAddress, 0, &sce->pipeh);
                        if (err) {
                                error = EIO;
                                goto out;
                        }
                        sce->ra_wb_bufsize = UGEN_BULK_RA_WB_BUFSIZE;
                        /*
                         * Use request size for non-RA/WB transfers
                         * as the default.
                         */
                        sce->ra_wb_reqsize = UGEN_BBSIZE;
                        break;
                case UE_ISOCHRONOUS:
                        if (dir == OUT) {
                                error = EINVAL;
                                goto out;
                        }
                        isize = UGETW(edesc->wMaxPacketSize);
                        if (isize == 0) {        /* shouldn't happen */
                                error = EINVAL;
                                goto out;
                        }
                        sce->ibuf = kmem_alloc(isize * UGEN_NISOFRAMES,
                                KM_SLEEP);
                        sce->cur = sce->fill = sce->ibuf;
                        sce->limit = sce->ibuf + isize * UGEN_NISOFRAMES;
                        DPRINTFN(5, ("ugenopen: isoc endpt=%d, isize=%d\n",
                                     endpt, isize));
                        err = usbd_open_pipe(sce->iface,
                                  edesc->bEndpointAddress, 0, &sce->pipeh);
                        if (err) {
                                kmem_free(sce->ibuf, isize * UGEN_NISOFRAMES);
                                sce->ibuf = NULL;
                                error = EIO;
                                goto out;
                        }
                        for (i = 0; i < UGEN_NISOREQS; ++i) {
                                sce->isoreqs[i].sce = sce;
                                err = usbd_create_xfer(sce->pipeh,
                                    isize * UGEN_NISORFRMS, 0, UGEN_NISORFRMS,
                                    &xfer);
                                if (err)
                                        goto bad;
                                sce->isoreqs[i].xfer = xfer;
                                sce->isoreqs[i].dmabuf = usbd_get_buffer(xfer);
                                for (j = 0; j < UGEN_NISORFRMS; ++j)
                                        sce->isoreqs[i].sizes[j] = isize;
                                usbd_setup_isoc_xfer(xfer, &sce->isoreqs[i],
                                    sce->isoreqs[i].sizes, UGEN_NISORFRMS, 0,
                                    ugen_isoc_rintr);
                                (void)usbd_transfer(xfer);
                        }
                        DPRINTFN(5, ("ugenopen: isoc open done\n"));
                        break;
                bad:
                        while (--i >= 0) { /* implicit buffer free */
                                usbd_destroy_xfer(sce->isoreqs[i].xfer);
                                sce->isoreqs[i].xfer = NULL;
                        }
                        usbd_close_pipe(sce->pipeh);
                        sce->pipeh = NULL;
                        kmem_free(sce->ibuf, isize * UGEN_NISOFRAMES);
                        sce->ibuf = NULL;
                        error = ENOMEM;
                        goto out;
                case UE_CONTROL:
                        sce->timeout = USBD_DEFAULT_TIMEOUT;
                        error = EINVAL;
                        goto out;
                }
        }
        error = 0;
out:        if (error && opened)
                sc->sc_is_open[endpt] = 0;
        ugenif_release(sc);
        return error;
}

static void
ugen_do_close(struct ugen_softc *sc, int flag, int endpt)
{
        struct ugen_endpoint *sce;
        int dir;
        int i;

        KASSERT(KERNEL_LOCKED_P()); /* sc_is_open */

        if (!sc->sc_is_open[endpt])
                goto out;

        if (endpt == USB_CONTROL_ENDPOINT) {
                DPRINTFN(5, ("ugenclose: close control\n"));
                goto out;
        }

        for (dir = OUT; dir <= IN; dir++) {
                if (!(flag & (dir == OUT ? FWRITE : FREAD)))
                        continue;
                sce = &sc->sc_endpoints[endpt][dir];
                if (sce->pipeh == NULL)
                        continue;
                DPRINTFN(5, ("ugenclose: endpt=%d dir=%d sce=%p\n",
                             endpt, dir, sce));

                usbd_abort_pipe(sce->pipeh);

                int isize = UGETW(sce->edesc->wMaxPacketSize);
                int msize = 0;

                switch (sce->edesc->bmAttributes & UE_XFERTYPE) {
                case UE_INTERRUPT:
                        ndflush(&sce->q, sce->q.c_cc);
                        clfree(&sce->q);
                        msize = isize;
                        break;
                case UE_ISOCHRONOUS:
                        for (i = 0; i < UGEN_NISOREQS; ++i) {
                                usbd_destroy_xfer(sce->isoreqs[i].xfer);
                                sce->isoreqs[i].xfer = NULL;
                        }
                        msize = isize * UGEN_NISOFRAMES;
                        break;
                case UE_BULK:
                        if (sce->state & (UGEN_BULK_RA | UGEN_BULK_WB)) {
                                usbd_destroy_xfer(sce->ra_wb_xfer);
                                sce->ra_wb_xfer = NULL;
                                msize = sce->ra_wb_bufsize;
                        }
                        break;
                default:
                        break;
                }
                usbd_close_pipe(sce->pipeh);
                sce->pipeh = NULL;
                if (sce->ibuf != NULL) {
                        kmem_free(sce->ibuf, msize);
                        sce->ibuf = NULL;
                }
        }

out:        sc->sc_is_open[endpt] = 0;
        for (dir = OUT; dir <= IN; dir++) {
                sce = &sc->sc_endpoints[endpt][dir];
                KASSERT(sce->pipeh == NULL);
                KASSERT(sce->ibuf == NULL);
                KASSERT(sce->ra_wb_xfer == NULL);
                for (i = 0; i < UGEN_NISOREQS; i++)
                        KASSERT(sce->isoreqs[i].xfer == NULL);
        }
}

static int
ugenclose(dev_t dev, int flag, int mode, struct lwp *l)
{
        int endpt = UGENENDPOINT(dev);
        struct ugen_softc *sc;

        DPRINTFN(5, ("ugenclose: flag=%d, mode=%d, unit=%d, endpt=%d\n",
                     flag, mode, UGENUNIT(dev), endpt));

        KASSERT(KERNEL_LOCKED_P()); /* ugen_do_close */

        if ((sc = ugenif_acquire(UGENUNIT(dev))) == NULL)
                return ENXIO;

        KASSERT(sc->sc_is_open[endpt]);
        ugen_do_close(sc, flag, endpt);
        KASSERT(!sc->sc_is_open[endpt]);

        ugenif_release(sc);

        return 0;
}

Static int
ugen_do_read(struct ugen_softc *sc, int endpt, struct uio *uio, int flag)
{
        struct ugen_endpoint *sce = &sc->sc_endpoints[endpt][IN];
        uint32_t n, tn;
        struct usbd_xfer *xfer;
        usbd_status err;
        int error = 0;

        DPRINTFN(5, ("%s: ugenread: %d\n", device_xname(sc->sc_dev), endpt));

        if (endpt == USB_CONTROL_ENDPOINT)
                return ENODEV;

        KASSERT(sce->edesc);
        KASSERT(sce->pipeh);

        switch (sce->edesc->bmAttributes & UE_XFERTYPE) {
        case UE_INTERRUPT:
                /* Block until activity occurred. */
                mutex_enter(&sc->sc_lock);
                while (sce->q.c_cc == 0) {
                        if (flag & IO_NDELAY) {
                                mutex_exit(&sc->sc_lock);
                                return EWOULDBLOCK;
                        }
                        DPRINTFN(5, ("ugenread: sleep on %p\n", sce));
                        /* "ugenri" */
                        error = cv_timedwait_sig(&sce->cv, &sc->sc_lock,
                            mstohz(sce->timeout));
                        DPRINTFN(5, ("ugenread: woke, error=%d\n", error));
                        if (sc->sc_dying)
                                error = EIO;
                        if (error)
                                break;
                }
                mutex_exit(&sc->sc_lock);

                /* Transfer as many chunks as possible. */
                while (sce->q.c_cc > 0 && uio->uio_resid > 0 && !error) {
                        n = uimin(sce->q.c_cc, uio->uio_resid);
                        if (n > sizeof(sc->sc_buffer))
                                n = sizeof(sc->sc_buffer);

                        /* Remove a small chunk from the input queue. */
                        q_to_b(&sce->q, sc->sc_buffer, n);
                        DPRINTFN(5, ("ugenread: got %d chars\n", n));

                        /* Copy the data to the user process. */
                        error = uiomove(sc->sc_buffer, n, uio);
                        if (error)
                                break;
                }
                break;
        case UE_BULK:
                if (sce->state & UGEN_BULK_RA) {
                        DPRINTFN(5, ("ugenread: BULK_RA req: %zd used: %d\n",
                                     uio->uio_resid, sce->ra_wb_used));
                        xfer = sce->ra_wb_xfer;

                        mutex_enter(&sc->sc_lock);
                        if (sce->ra_wb_used == 0 && flag & IO_NDELAY) {
                                mutex_exit(&sc->sc_lock);
                                return EWOULDBLOCK;
                        }
                        while (uio->uio_resid > 0 && !error) {
                                while (sce->ra_wb_used == 0) {
                                        DPRINTFN(5,
                                                 ("ugenread: sleep on %p\n",
                                                  sce));
                                        /* "ugenrb" */
                                        error = cv_timedwait_sig(&sce->cv,
                                            &sc->sc_lock, mstohz(sce->timeout));
                                        DPRINTFN(5,
                                                 ("ugenread: woke, error=%d\n",
                                                  error));
                                        if (sc->sc_dying)
                                                error = EIO;
                                        if (error)
                                                break;
                                }

                                /* Copy data to the process. */
                                while (uio->uio_resid > 0
                                       && sce->ra_wb_used > 0) {
                                        n = uimin(uio->uio_resid,
                                                sce->ra_wb_used);
                                        n = uimin(n, sce->limit - sce->cur);
                                        error = uiomove(sce->cur, n, uio);
                                        if (error)
                                                break;
                                        sce->cur += n;
                                        sce->ra_wb_used -= n;
                                        if (sce->cur == sce->limit)
                                                sce->cur = sce->ibuf;
                                }

                                /*
                                 * If the transfers stopped because the
                                 * buffer was full, restart them.
                                 */
                                if (sce->state & UGEN_RA_WB_STOP &&
                                    sce->ra_wb_used < sce->limit - sce->ibuf) {
                                        n = (sce->limit - sce->ibuf)
                                            - sce->ra_wb_used;
                                        usbd_setup_xfer(xfer, sce, NULL,
                                            uimin(n, sce->ra_wb_xferlen),
                                            0, USBD_NO_TIMEOUT,
                                            ugen_bulkra_intr);
                                        sce->state &= ~UGEN_RA_WB_STOP;
                                        err = usbd_transfer(xfer);
                                        if (err != USBD_IN_PROGRESS)
                                                /*
                                                 * The transfer has not been
                                                 * queued.  Setting STOP
                                                 * will make us try
                                                 * again at the next read.
                                                 */
                                                sce->state |= UGEN_RA_WB_STOP;
                                }
                        }
                        mutex_exit(&sc->sc_lock);
                        break;
                }
                error = usbd_create_xfer(sce->pipeh, UGEN_BBSIZE,
                    0, 0, &xfer);
                if (error)
                        return error;
                while ((n = uimin(UGEN_BBSIZE, uio->uio_resid)) != 0) {
                        DPRINTFN(1, ("ugenread: start transfer %d bytes\n",n));
                        tn = n;
                        err = usbd_bulk_transfer(xfer, sce->pipeh,
                            sce->state & UGEN_SHORT_OK ? USBD_SHORT_XFER_OK : 0,
                            sce->timeout, sc->sc_buffer, &tn);
                        if (err) {
                                if (err == USBD_INTERRUPTED)
                                        error = EINTR;
                                else if (err == USBD_TIMEOUT)
                                        error = ETIMEDOUT;
                                else
                                        error = EIO;
                                break;
                        }
                        DPRINTFN(1, ("ugenread: got %d bytes\n", tn));
                        error = uiomove(sc->sc_buffer, tn, uio);
                        if (error || tn < n)
                                break;
                }
                usbd_destroy_xfer(xfer);
                break;
        case UE_ISOCHRONOUS:
                mutex_enter(&sc->sc_lock);
                while (sce->cur == sce->fill) {
                        if (flag & IO_NDELAY) {
                                mutex_exit(&sc->sc_lock);
                                return EWOULDBLOCK;
                        }
                        /* "ugenri" */
                        DPRINTFN(5, ("ugenread: sleep on %p\n", sce));
                        error = cv_timedwait_sig(&sce->cv, &sc->sc_lock,
                            mstohz(sce->timeout));
                        DPRINTFN(5, ("ugenread: woke, error=%d\n", error));
                        if (sc->sc_dying)
                                error = EIO;
                        if (error)
                                break;
                }

                while (sce->cur != sce->fill && uio->uio_resid > 0 && !error) {
                        if(sce->fill > sce->cur)
                                n = uimin(sce->fill - sce->cur, uio->uio_resid);
                        else
                                n = uimin(sce->limit - sce->cur, uio->uio_resid);

                        DPRINTFN(5, ("ugenread: isoc got %d chars\n", n));

                        /* Copy the data to the user process. */
                        error = uiomove(sce->cur, n, uio);
                        if (error)
                                break;
                        sce->cur += n;
                        if (sce->cur >= sce->limit)
                                sce->cur = sce->ibuf;
                }
                mutex_exit(&sc->sc_lock);
                break;


        default:
                return ENXIO;
        }
        return error;
}

static int
ugenread(dev_t dev, struct uio *uio, int flag)
{
        int endpt = UGENENDPOINT(dev);
        struct ugen_softc *sc;
        int error;

        if ((sc = ugenif_acquire(UGENUNIT(dev))) == NULL)
                return ENXIO;
        error = ugen_do_read(sc, endpt, uio, flag);
        ugenif_release(sc);

        return error;
}

Static int
ugen_do_write(struct ugen_softc *sc, int endpt, struct uio *uio,
        int flag)
{
        struct ugen_endpoint *sce = &sc->sc_endpoints[endpt][OUT];
        uint32_t n;
        int error = 0;
        uint32_t tn;
        char *dbuf;
        struct usbd_xfer *xfer;
        usbd_status err;

        DPRINTFN(5, ("%s: ugenwrite: %d\n", device_xname(sc->sc_dev), endpt));

        if (endpt == USB_CONTROL_ENDPOINT)
                return ENODEV;

        KASSERT(sce->edesc);
        KASSERT(sce->pipeh);

        switch (sce->edesc->bmAttributes & UE_XFERTYPE) {
        case UE_BULK:
                if (sce->state & UGEN_BULK_WB) {
                        DPRINTFN(5, ("ugenwrite: BULK_WB req: %zd used: %d\n",
                                     uio->uio_resid, sce->ra_wb_used));
                        xfer = sce->ra_wb_xfer;

                        mutex_enter(&sc->sc_lock);
                        if (sce->ra_wb_used == sce->limit - sce->ibuf &&
                            flag & IO_NDELAY) {
                                mutex_exit(&sc->sc_lock);
                                return EWOULDBLOCK;
                        }
                        while (uio->uio_resid > 0 && !error) {
                                while (sce->ra_wb_used ==
                                       sce->limit - sce->ibuf) {
                                        DPRINTFN(5,
                                                 ("ugenwrite: sleep on %p\n",
                                                  sce));
                                        /* "ugenwb" */
                                        error = cv_timedwait_sig(&sce->cv,
                                            &sc->sc_lock, mstohz(sce->timeout));
                                        DPRINTFN(5,
                                                 ("ugenwrite: woke, error=%d\n",
                                                  error));
                                        if (sc->sc_dying)
                                                error = EIO;
                                        if (error)
                                                break;
                                }

                                /* Copy data from the process. */
                                while (uio->uio_resid > 0 &&
                                    sce->ra_wb_used < sce->limit - sce->ibuf) {
                                        n = uimin(uio->uio_resid,
                                                (sce->limit - sce->ibuf)
                                                 - sce->ra_wb_used);
                                        n = uimin(n, sce->limit - sce->fill);
                                        error = uiomove(sce->fill, n, uio);
                                        if (error)
                                                break;
                                        sce->fill += n;
                                        sce->ra_wb_used += n;
                                        if (sce->fill == sce->limit)
                                                sce->fill = sce->ibuf;
                                }

                                /*
                                 * If the transfers stopped because the
                                 * buffer was empty, restart them.
                                 */
                                if (sce->state & UGEN_RA_WB_STOP &&
                                    sce->ra_wb_used > 0) {
                                        dbuf = (char *)usbd_get_buffer(xfer);
                                        n = uimin(sce->ra_wb_used,
                                                sce->ra_wb_xferlen);
                                        tn = uimin(n, sce->limit - sce->cur);
                                        memcpy(dbuf, sce->cur, tn);
                                        dbuf += tn;
                                        if (n - tn > 0)
                                                memcpy(dbuf, sce->ibuf,
                                                       n - tn);
                                        usbd_setup_xfer(xfer, sce, NULL, n,
                                            0, USBD_NO_TIMEOUT,
                                            ugen_bulkwb_intr);
                                        sce->state &= ~UGEN_RA_WB_STOP;
                                        err = usbd_transfer(xfer);
                                        if (err != USBD_IN_PROGRESS)
                                                /*
                                                 * The transfer has not been
                                                 * queued.  Setting STOP
                                                 * will make us try again
                                                 * at the next read.
                                                 */
                                                sce->state |= UGEN_RA_WB_STOP;
                                }
                        }
                        mutex_exit(&sc->sc_lock);
                        break;
                }
                error = usbd_create_xfer(sce->pipeh, UGEN_BBSIZE,
                    0, 0, &xfer);
                if (error)
                        return error;
                while ((n = uimin(UGEN_BBSIZE, uio->uio_resid)) != 0) {
                        error = uiomove(sc->sc_buffer, n, uio);
                        if (error)
                                break;
                        DPRINTFN(1, ("ugenwrite: transfer %d bytes\n", n));
                        err = usbd_bulk_transfer(xfer, sce->pipeh, 0, sce->timeout,
                            sc->sc_buffer, &n);
                        if (err) {
                                if (err == USBD_INTERRUPTED)
                                        error = EINTR;
                                else if (err == USBD_TIMEOUT)
                                        error = ETIMEDOUT;
                                else
                                        error = EIO;
                                break;
                        }
                }
                usbd_destroy_xfer(xfer);
                break;
        case UE_INTERRUPT:
                error = usbd_create_xfer(sce->pipeh,
                    UGETW(sce->edesc->wMaxPacketSize), 0, 0, &xfer);
                if (error)
                        return error;
                while ((n = uimin(UGETW(sce->edesc->wMaxPacketSize),
                    uio->uio_resid)) != 0) {
                        error = uiomove(sc->sc_buffer, n, uio);
                        if (error)
                                break;
                        DPRINTFN(1, ("ugenwrite: transfer %d bytes\n", n));
                        err = usbd_intr_transfer(xfer, sce->pipeh, 0,
                            sce->timeout, sc->sc_buffer, &n);
                        if (err) {
                                if (err == USBD_INTERRUPTED)
                                        error = EINTR;
                                else if (err == USBD_TIMEOUT)
                                        error = ETIMEDOUT;
                                else
                                        error = EIO;
                                break;
                        }
                }
                usbd_destroy_xfer(xfer);
                break;
        default:
                return ENXIO;
        }
        return error;
}

static int
ugenwrite(dev_t dev, struct uio *uio, int flag)
{
        int endpt = UGENENDPOINT(dev);
        struct ugen_softc *sc;
        int error;

        if ((sc = ugenif_acquire(UGENUNIT(dev))) == NULL)
                return ENXIO;
        error = ugen_do_write(sc, endpt, uio, flag);
        ugenif_release(sc);

        return error;
}

static int
ugen_activate(device_t self, enum devact act)
{
        struct ugen_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

static int
ugen_detach(device_t self, int flags)
{
        struct ugen_softc *sc = device_private(self);
        struct ugen_endpoint *sce;
        int i, dir;
        int maj, mn;

        DPRINTF(("ugen_detach: sc=%p flags=%d\n", sc, flags));

        KASSERT(KERNEL_LOCKED_P()); /* sc_is_open */

        /*
         * Fail if we're not forced to detach and userland has any
         * endpoints open.
         */
        if ((flags & DETACH_FORCE) == 0) {
                for (i = 0; i < USB_MAX_ENDPOINTS; i++) {
                        if (sc->sc_is_open[i])
                                return EBUSY;
                }
        }

        /* Prevent new users.  Prevent suspend/resume.  */
        sc->sc_dying = 1;
        pmf_device_deregister(self);

        /*
         * If we never finished attaching, skip nixing endpoints and
         * users because there aren't any.
         */
        if (!sc->sc_attached)
                goto out;

        /* Abort all pipes.  */
        for (i = 0; i < USB_MAX_ENDPOINTS; i++) {
                for (dir = OUT; dir <= IN; dir++) {
                        sce = &sc->sc_endpoints[i][dir];
                        if (sce->pipeh)
                                usbd_abort_pipe(sce->pipeh);
                }
        }

        /*
         * Wait for users to drain.  Before this point there can be no
         * more I/O operations started because we set sc_dying; after
         * this, there can be no more I/O operations in progress, so it
         * will be safe to free things.
         */
        mutex_enter(&sc->sc_lock);
        if (--sc->sc_refcnt >= 0) {
                /* Wake everyone */
                for (i = 0; i < USB_MAX_ENDPOINTS; i++) {
                        for (dir = OUT; dir <= IN; dir++)
                                cv_broadcast(&sc->sc_endpoints[i][dir].cv);
                }
                /* Wait for processes to go away. */
                do {
                        cv_wait(&sc->sc_detach_cv, &sc->sc_lock);
                } while (sc->sc_refcnt >= 0);
        }
        mutex_exit(&sc->sc_lock);

        /* locate the major number */
        maj = cdevsw_lookup_major(&ugen_cdevsw);

        /*
         * Nuke the vnodes for any open instances (calls ugenclose, but
         * with no effect because we already set sc_dying).
         */
        mn = sc->sc_unit * USB_MAX_ENDPOINTS;
        vdevgone(maj, mn, mn + USB_MAX_ENDPOINTS - 1, VCHR);

        /* Actually close any lingering pipes.  */
        for (i = 0; i < USB_MAX_ENDPOINTS; i++)
                ugen_do_close(sc, FREAD|FWRITE, i);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);
        ugenif_put_unit(sc);

out:        for (i = 0; i < USB_MAX_ENDPOINTS; i++) {
                for (dir = OUT; dir <= IN; dir++) {
                        sce = &sc->sc_endpoints[i][dir];
                        seldestroy(&sce->rsel);
                        cv_destroy(&sce->cv);
                }
        }

        cv_destroy(&sc->sc_detach_cv);
        mutex_destroy(&sc->sc_lock);

        return 0;
}

Static void
ugenintr(struct usbd_xfer *xfer, void *addr, usbd_status status)
{
        struct ugen_endpoint *sce = addr;
        struct ugen_softc *sc = sce->sc;
        uint32_t count;
        u_char *ibuf;

        if (status == USBD_CANCELLED)
                return;

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF(("ugenintr: status=%d\n", status));
                if (status == USBD_STALLED)
                    usbd_clear_endpoint_stall_async(sce->pipeh);
                return;
        }

        usbd_get_xfer_status(xfer, NULL, NULL, &count, NULL);
        ibuf = sce->ibuf;

        DPRINTFN(5, ("ugenintr: xfer=%p status=%d count=%d\n",
                     xfer, status, count));
        DPRINTFN(5, ("          data = %02x %02x %02x\n",
                     ibuf[0], ibuf[1], ibuf[2]));

        mutex_enter(&sc->sc_lock);
        (void)b_to_q(ibuf, count, &sce->q);
        cv_signal(&sce->cv);
        mutex_exit(&sc->sc_lock);
        selnotify(&sce->rsel, 0, 0);
}

Static void
ugen_isoc_rintr(struct usbd_xfer *xfer, void *addr,
                usbd_status status)
{
        struct isoreq *req = addr;
        struct ugen_endpoint *sce = req->sce;
        struct ugen_softc *sc = sce->sc;
        uint32_t count, n;
        int i, isize;

        /* Return if we are aborting. */
        if (status == USBD_CANCELLED)
                return;

        usbd_get_xfer_status(xfer, NULL, NULL, &count, NULL);
        DPRINTFN(5,("ugen_isoc_rintr: xfer %ld, count=%d\n",
            (long)(req - sce->isoreqs), count));

        mutex_enter(&sc->sc_lock);

        /* throw away oldest input if the buffer is full */
        if (sce->fill < sce->cur && sce->cur <= sce->fill + count) {
                sce->cur += count;
                if (sce->cur >= sce->limit)
                        sce->cur = sce->ibuf + (sce->limit - sce->cur);
                DPRINTFN(5, ("ugen_isoc_rintr: throwing away %d bytes\n",
                             count));
        }

        isize = UGETW(sce->edesc->wMaxPacketSize);
        for (i = 0; i < UGEN_NISORFRMS; i++) {
                uint32_t actlen = req->sizes[i];
                char const *tbuf = (char const *)req->dmabuf + isize * i;

                /* copy data to buffer */
                while (actlen > 0) {
                        n = uimin(actlen, sce->limit - sce->fill);
                        memcpy(sce->fill, tbuf, n);

                        tbuf += n;
                        actlen -= n;
                        sce->fill += n;
                        if (sce->fill == sce->limit)
                                sce->fill = sce->ibuf;
                }

                /* setup size for next transfer */
                req->sizes[i] = isize;
        }

        usbd_setup_isoc_xfer(xfer, req, req->sizes, UGEN_NISORFRMS, 0,
            ugen_isoc_rintr);
        (void)usbd_transfer(xfer);

        cv_signal(&sce->cv);
        mutex_exit(&sc->sc_lock);
        selnotify(&sce->rsel, 0, 0);
}

Static void
ugen_bulkra_intr(struct usbd_xfer *xfer, void *addr,
                 usbd_status status)
{
        struct ugen_endpoint *sce = addr;
        struct ugen_softc *sc = sce->sc;
        uint32_t count, n;
        char const *tbuf;
        usbd_status err;

        /* Return if we are aborting. */
        if (status == USBD_CANCELLED)
                return;

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF(("ugen_bulkra_intr: status=%d\n", status));
                sce->state |= UGEN_RA_WB_STOP;
                if (status == USBD_STALLED)
                    usbd_clear_endpoint_stall_async(sce->pipeh);
                return;
        }

        usbd_get_xfer_status(xfer, NULL, NULL, &count, NULL);

        mutex_enter(&sc->sc_lock);

        /* Keep track of how much is in the buffer. */
        sce->ra_wb_used += count;

        /* Copy data to buffer. */
        tbuf = (char const *)usbd_get_buffer(sce->ra_wb_xfer);
        n = uimin(count, sce->limit - sce->fill);
        memcpy(sce->fill, tbuf, n);
        tbuf += n;
        count -= n;
        sce->fill += n;
        if (sce->fill == sce->limit)
                sce->fill = sce->ibuf;
        if (count > 0) {
                memcpy(sce->fill, tbuf, count);
                sce->fill += count;
        }

        /* Set up the next request if necessary. */
        n = (sce->limit - sce->ibuf) - sce->ra_wb_used;
        if (n > 0) {
                usbd_setup_xfer(xfer, sce, NULL, uimin(n, sce->ra_wb_xferlen), 0,
                    USBD_NO_TIMEOUT, ugen_bulkra_intr);
                err = usbd_transfer(xfer);
                if (err != USBD_IN_PROGRESS) {
                        printf("usbd_bulkra_intr: error=%d\n", err);
                        /*
                         * The transfer has not been queued.  Setting STOP
                         * will make us try again at the next read.
                         */
                        sce->state |= UGEN_RA_WB_STOP;
                }
        }
        else
                sce->state |= UGEN_RA_WB_STOP;

        cv_signal(&sce->cv);
        mutex_exit(&sc->sc_lock);
        selnotify(&sce->rsel, 0, 0);
}

Static void
ugen_bulkwb_intr(struct usbd_xfer *xfer, void *addr,
                 usbd_status status)
{
        struct ugen_endpoint *sce = addr;
        struct ugen_softc *sc = sce->sc;
        uint32_t count, n;
        char *tbuf;
        usbd_status err;

        /* Return if we are aborting. */
        if (status == USBD_CANCELLED)
                return;

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF(("ugen_bulkwb_intr: status=%d\n", status));
                sce->state |= UGEN_RA_WB_STOP;
                if (status == USBD_STALLED)
                    usbd_clear_endpoint_stall_async(sce->pipeh);
                return;
        }

        usbd_get_xfer_status(xfer, NULL, NULL, &count, NULL);

        mutex_enter(&sc->sc_lock);

        /* Keep track of how much is in the buffer. */
        sce->ra_wb_used -= count;

        /* Update buffer pointers. */
        sce->cur += count;
        if (sce->cur >= sce->limit)
                sce->cur = sce->ibuf + (sce->cur - sce->limit);

        /* Set up next request if necessary. */
        if (sce->ra_wb_used > 0) {
                /* copy data from buffer */
                tbuf = (char *)usbd_get_buffer(sce->ra_wb_xfer);
                count = uimin(sce->ra_wb_used, sce->ra_wb_xferlen);
                n = uimin(count, sce->limit - sce->cur);
                memcpy(tbuf, sce->cur, n);
                tbuf += n;
                if (count - n > 0)
                        memcpy(tbuf, sce->ibuf, count - n);

                usbd_setup_xfer(xfer, sce, NULL, count, 0, USBD_NO_TIMEOUT,
                    ugen_bulkwb_intr);
                err = usbd_transfer(xfer);
                if (err != USBD_IN_PROGRESS) {
                        printf("usbd_bulkwb_intr: error=%d\n", err);
                        /*
                         * The transfer has not been queued.  Setting STOP
                         * will make us try again at the next write.
                         */
                        sce->state |= UGEN_RA_WB_STOP;
                }
        }
        else
                sce->state |= UGEN_RA_WB_STOP;

        cv_signal(&sce->cv);
        mutex_exit(&sc->sc_lock);
        selnotify(&sce->rsel, 0, 0);
}

Static usbd_status
ugen_set_interface(struct ugen_softc *sc, int ifaceidx, int altno)
{
        struct usbd_interface *iface;
        usb_endpoint_descriptor_t *ed;
        usbd_status err;
        struct ugen_endpoint *sce;
        uint8_t niface, nendpt, endptno, endpt;
        int dir;

        DPRINTFN(15, ("ugen_set_interface %d %d\n", ifaceidx, altno));

        err = usbd_interface_count(sc->sc_udev, &niface);
        if (err)
                return err;
        if (ifaceidx < 0 || ifaceidx >= niface)
                return USBD_INVAL;

        err = usbd_device2interface_handle(sc->sc_udev, ifaceidx, &iface);
        if (err)
                return err;
        err = usbd_endpoint_count(iface, &nendpt);
        if (err)
                return err;

        /* change setting */
        err = usbd_set_interface(iface, altno);
        if (err)
                return err;

        err = usbd_endpoint_count(iface, &nendpt);
        if (err)
                return err;

        ugen_clear_endpoints(sc);

        for (endptno = 0; endptno < nendpt; endptno++) {
                ed = usbd_interface2endpoint_descriptor(iface,endptno);
                KASSERT(ed != NULL);
                endpt = ed->bEndpointAddress;
                dir = UE_GET_DIR(endpt) == UE_DIR_IN ? IN : OUT;
                sce = &sc->sc_endpoints[UE_GET_ADDR(endpt)][dir];
                sce->sc = sc;
                sce->edesc = ed;
                sce->iface = iface;
        }
        return 0;
}

/* Retrieve a complete descriptor for a certain device and index. */
Static usb_config_descriptor_t *
ugen_get_cdesc(struct ugen_softc *sc, int index, int *lenp)
{
        usb_config_descriptor_t *cdesc, *tdesc, cdescr;
        int len;
        usbd_status err;

        if (index == USB_CURRENT_CONFIG_INDEX) {
                tdesc = usbd_get_config_descriptor(sc->sc_udev);
                if (tdesc == NULL)
                        return NULL;
                len = UGETW(tdesc->wTotalLength);
                if (lenp)
                        *lenp = len;
                cdesc = kmem_alloc(len, KM_SLEEP);
                memcpy(cdesc, tdesc, len);
                DPRINTFN(5,("ugen_get_cdesc: current, len=%d\n", len));
        } else {
                err = usbd_get_config_desc(sc->sc_udev, index, &cdescr);
                if (err)
                        return 0;
                len = UGETW(cdescr.wTotalLength);
                DPRINTFN(5,("ugen_get_cdesc: index=%d, len=%d\n", index, len));
                if (lenp)
                        *lenp = len;
                cdesc = kmem_alloc(len, KM_SLEEP);
                err = usbd_get_config_desc_full(sc->sc_udev, index, cdesc, len);
                if (err) {
                        kmem_free(cdesc, len);
                        return 0;
                }
        }
        return cdesc;
}

Static int
ugen_get_alt_index(struct ugen_softc *sc, int ifaceidx)
{
        struct usbd_interface *iface;
        usbd_status err;

        err = usbd_device2interface_handle(sc->sc_udev, ifaceidx, &iface);
        if (err)
                return -1;
        return usbd_get_interface_altindex(iface);
}

Static int
ugen_do_ioctl(struct ugen_softc *sc, int endpt, u_long cmd,
              void *addr, int flag, struct lwp *l)
{
        struct ugen_endpoint *sce;
        usbd_status err;
        struct usbd_interface *iface;
        struct usb_config_desc *cd;
        usb_config_descriptor_t *cdesc;
        struct usb_interface_desc *id;
        usb_interface_descriptor_t *idesc;
        struct usb_endpoint_desc *ed;
        usb_endpoint_descriptor_t *edesc;
        struct usb_alt_interface *ai;
        struct usb_string_desc *si;
        uint8_t conf, alt;
        int cdesclen;
        int error;
        int dir;

        KASSERT(KERNEL_LOCKED_P()); /* ugen_set_config */

        DPRINTFN(5, ("ugenioctl: cmd=%08lx\n", cmd));

        switch (cmd) {
        case FIONBIO:
                /* All handled in the upper FS layer. */
                return 0;
        case USB_SET_SHORT_XFER:
                if (endpt == USB_CONTROL_ENDPOINT)
                        return EINVAL;
                /* This flag only affects read */
                sce = &sc->sc_endpoints[endpt][IN];
                if (sce == NULL || sce->pipeh == NULL)
                        return EINVAL;
                if (*(int *)addr)
                        sce->state |= UGEN_SHORT_OK;
                else
                        sce->state &= ~UGEN_SHORT_OK;
                return 0;
        case USB_SET_TIMEOUT:
                for (dir = OUT; dir <= IN; dir++) {
                        sce = &sc->sc_endpoints[endpt][dir];
                        if (sce == NULL)
                                return EINVAL;

                        sce->timeout = *(int *)addr;
                }
                return 0;
        case USB_SET_BULK_RA:
                if (endpt == USB_CONTROL_ENDPOINT)
                        return EINVAL;
                sce = &sc->sc_endpoints[endpt][IN];
                if (sce == NULL || sce->pipeh == NULL)
                        return EINVAL;
                edesc = sce->edesc;
                if ((edesc->bmAttributes & UE_XFERTYPE) != UE_BULK)
                        return EINVAL;

                if (*(int *)addr) {
                        /* Only turn RA on if it's currently off. */
                        if (sce->state & UGEN_BULK_RA)
                                return 0;
                        KASSERT(sce->ra_wb_xfer == NULL);
                        KASSERT(sce->ibuf == NULL);

                        if (sce->ra_wb_bufsize == 0 || sce->ra_wb_reqsize == 0)
                                /* shouldn't happen */
                                return EINVAL;
                        error = usbd_create_xfer(sce->pipeh,
                            sce->ra_wb_reqsize, 0, 0, &sce->ra_wb_xfer);
                        if (error)
                                return error;
                        sce->ra_wb_xferlen = sce->ra_wb_reqsize;
                        sce->ibuf = kmem_alloc(sce->ra_wb_bufsize, KM_SLEEP);
                        sce->fill = sce->cur = sce->ibuf;
                        sce->limit = sce->ibuf + sce->ra_wb_bufsize;
                        sce->ra_wb_used = 0;
                        sce->state |= UGEN_BULK_RA;
                        sce->state &= ~UGEN_RA_WB_STOP;
                        /* Now start reading. */
                        usbd_setup_xfer(sce->ra_wb_xfer, sce, NULL,
                            uimin(sce->ra_wb_xferlen, sce->ra_wb_bufsize),
                             0, USBD_NO_TIMEOUT, ugen_bulkra_intr);
                        err = usbd_transfer(sce->ra_wb_xfer);
                        if (err != USBD_IN_PROGRESS) {
                                sce->state &= ~UGEN_BULK_RA;
                                kmem_free(sce->ibuf, sce->ra_wb_bufsize);
                                sce->ibuf = NULL;
                                usbd_destroy_xfer(sce->ra_wb_xfer);
                                sce->ra_wb_xfer = NULL;
                                return EIO;
                        }
                } else {
                        /* Only turn RA off if it's currently on. */
                        if (!(sce->state & UGEN_BULK_RA))
                                return 0;

                        sce->state &= ~UGEN_BULK_RA;
                        usbd_abort_pipe(sce->pipeh);
                        usbd_destroy_xfer(sce->ra_wb_xfer);
                        sce->ra_wb_xfer = NULL;
                        /*
                         * XXX Discard whatever's in the buffer, but we
                         * should keep it around and drain the buffer
                         * instead.
                         */
                        kmem_free(sce->ibuf, sce->ra_wb_bufsize);
                        sce->ibuf = NULL;
                }
                return 0;
        case USB_SET_BULK_WB:
                if (endpt == USB_CONTROL_ENDPOINT)
                        return EINVAL;
                sce = &sc->sc_endpoints[endpt][OUT];
                if (sce == NULL || sce->pipeh == NULL)
                        return EINVAL;
                edesc = sce->edesc;
                if ((edesc->bmAttributes & UE_XFERTYPE) != UE_BULK)
                        return EINVAL;

                if (*(int *)addr) {
                        /* Only turn WB on if it's currently off. */
                        if (sce->state & UGEN_BULK_WB)
                                return 0;
                        KASSERT(sce->ra_wb_xfer == NULL);
                        KASSERT(sce->ibuf == NULL);

                        if (sce->ra_wb_bufsize == 0 || sce->ra_wb_reqsize == 0)
                                /* shouldn't happen */
                                return EINVAL;
                        error = usbd_create_xfer(sce->pipeh, sce->ra_wb_reqsize,
                            0, 0, &sce->ra_wb_xfer);
                        /* XXX check error???  */
                        sce->ra_wb_xferlen = sce->ra_wb_reqsize;
                        sce->ibuf = kmem_alloc(sce->ra_wb_bufsize, KM_SLEEP);
                        sce->fill = sce->cur = sce->ibuf;
                        sce->limit = sce->ibuf + sce->ra_wb_bufsize;
                        sce->ra_wb_used = 0;
                        sce->state |= UGEN_BULK_WB | UGEN_RA_WB_STOP;
                } else {
                        /* Only turn WB off if it's currently on. */
                        if (!(sce->state & UGEN_BULK_WB))
                                return 0;

                        sce->state &= ~UGEN_BULK_WB;
                        /*
                         * XXX Discard whatever's in the buffer, but we
                         * should keep it around and keep writing to
                         * drain the buffer instead.
                         */
                        usbd_abort_pipe(sce->pipeh);
                        usbd_destroy_xfer(sce->ra_wb_xfer);
                        sce->ra_wb_xfer = NULL;
                        kmem_free(sce->ibuf, sce->ra_wb_bufsize);
                        sce->ibuf = NULL;
                }
                return 0;
        case USB_SET_BULK_RA_OPT:
        case USB_SET_BULK_WB_OPT:
        {
                struct usb_bulk_ra_wb_opt *opt;

                if (endpt == USB_CONTROL_ENDPOINT)
                        return EINVAL;
                opt = (struct usb_bulk_ra_wb_opt *)addr;
                if (cmd == USB_SET_BULK_RA_OPT)
                        sce = &sc->sc_endpoints[endpt][IN];
                else
                        sce = &sc->sc_endpoints[endpt][OUT];
                if (sce == NULL || sce->pipeh == NULL)
                        return EINVAL;
                if (opt->ra_wb_buffer_size < 1 ||
                    opt->ra_wb_buffer_size > UGEN_BULK_RA_WB_BUFMAX ||
                    opt->ra_wb_request_size < 1 ||
                    opt->ra_wb_request_size > opt->ra_wb_buffer_size)
                        return EINVAL;
                /*
                 * XXX These changes do not take effect until the
                 * next time RA/WB mode is enabled but they ought to
                 * take effect immediately.
                 */
                sce->ra_wb_bufsize = opt->ra_wb_buffer_size;
                sce->ra_wb_reqsize = opt->ra_wb_request_size;
                return 0;
        }
        default:
                break;
        }

        if (endpt != USB_CONTROL_ENDPOINT)
                return EINVAL;

        switch (cmd) {
#ifdef UGEN_DEBUG
        case USB_SETDEBUG:
                ugendebug = *(int *)addr;
                break;
#endif
        case USB_GET_CONFIG:
                err = usbd_get_config(sc->sc_udev, &conf);
                if (err)
                        return EIO;
                *(int *)addr = conf;
                break;
        case USB_SET_CONFIG:
                if (!(flag & FWRITE))
                        return EPERM;
                err = ugen_set_config(sc, *(int *)addr, 1);
                switch (err) {
                case USBD_NORMAL_COMPLETION:
                        break;
                case USBD_IN_USE:
                        return EBUSY;
                default:
                        return EIO;
                }
                break;
        case USB_GET_ALTINTERFACE:
                ai = (struct usb_alt_interface *)addr;
                err = usbd_device2interface_handle(sc->sc_udev,
                          ai->uai_interface_index, &iface);
                if (err)
                        return EINVAL;
                idesc = usbd_get_interface_descriptor(iface);
                if (idesc == NULL)
                        return EIO;
                ai->uai_alt_no = idesc->bAlternateSetting;
                break;
        case USB_SET_ALTINTERFACE:
                if (!(flag & FWRITE))
                        return EPERM;
                ai = (struct usb_alt_interface *)addr;
                err = usbd_device2interface_handle(sc->sc_udev,
                          ai->uai_interface_index, &iface);
                if (err)
                        return EINVAL;
                err = ugen_set_interface(sc, ai->uai_interface_index,
                    ai->uai_alt_no);
                if (err)
                        return EINVAL;
                break;
        case USB_GET_NO_ALT:
                ai = (struct usb_alt_interface *)addr;
                cdesc = ugen_get_cdesc(sc, ai->uai_config_index, &cdesclen);
                if (cdesc == NULL)
                        return EINVAL;
                idesc = usbd_find_idesc(cdesc, ai->uai_interface_index, 0);
                if (idesc == NULL) {
                        kmem_free(cdesc, cdesclen);
                        return EINVAL;
                }
                ai->uai_alt_no = usbd_get_no_alts(cdesc,
                    idesc->bInterfaceNumber);
                kmem_free(cdesc, cdesclen);
                break;
        case USB_GET_DEVICE_DESC:
                *(usb_device_descriptor_t *)addr =
                        *usbd_get_device_descriptor(sc->sc_udev);
                break;
        case USB_GET_CONFIG_DESC:
                cd = (struct usb_config_desc *)addr;
                cdesc = ugen_get_cdesc(sc, cd->ucd_config_index, &cdesclen);
                if (cdesc == NULL)
                        return EINVAL;
                cd->ucd_desc = *cdesc;
                kmem_free(cdesc, cdesclen);
                break;
        case USB_GET_INTERFACE_DESC:
                id = (struct usb_interface_desc *)addr;
                cdesc = ugen_get_cdesc(sc, id->uid_config_index, &cdesclen);
                if (cdesc == NULL)
                        return EINVAL;
                if (id->uid_config_index == USB_CURRENT_CONFIG_INDEX &&
                    id->uid_alt_index == USB_CURRENT_ALT_INDEX)
                        alt = ugen_get_alt_index(sc, id->uid_interface_index);
                else
                        alt = id->uid_alt_index;
                idesc = usbd_find_idesc(cdesc, id->uid_interface_index, alt);
                if (idesc == NULL) {
                        kmem_free(cdesc, cdesclen);
                        return EINVAL;
                }
                id->uid_desc = *idesc;
                kmem_free(cdesc, cdesclen);
                break;
        case USB_GET_ENDPOINT_DESC:
                ed = (struct usb_endpoint_desc *)addr;
                cdesc = ugen_get_cdesc(sc, ed->ued_config_index, &cdesclen);
                if (cdesc == NULL)
                        return EINVAL;
                if (ed->ued_config_index == USB_CURRENT_CONFIG_INDEX &&
                    ed->ued_alt_index == USB_CURRENT_ALT_INDEX)
                        alt = ugen_get_alt_index(sc, ed->ued_interface_index);
                else
                        alt = ed->ued_alt_index;
                edesc = usbd_find_edesc(cdesc, ed->ued_interface_index,
                                        alt, ed->ued_endpoint_index);
                if (edesc == NULL) {
                        kmem_free(cdesc, cdesclen);
                        return EINVAL;
                }
                ed->ued_desc = *edesc;
                kmem_free(cdesc, cdesclen);
                break;
        case USB_GET_FULL_DESC:
        {
                int len;
                struct iovec iov;
                struct uio uio;
                struct usb_full_desc *fd = (struct usb_full_desc *)addr;

                cdesc = ugen_get_cdesc(sc, fd->ufd_config_index, &cdesclen);
                if (cdesc == NULL)
                        return EINVAL;
                len = cdesclen;
                if (len > fd->ufd_size)
                        len = fd->ufd_size;
                iov.iov_base = (void *)fd->ufd_data;
                iov.iov_len = len;
                uio.uio_iov = &iov;
                uio.uio_iovcnt = 1;
                uio.uio_resid = len;
                uio.uio_offset = 0;
                uio.uio_rw = UIO_READ;
                uio.uio_vmspace = l->l_proc->p_vmspace;
                error = uiomove((void *)cdesc, len, &uio);
                kmem_free(cdesc, cdesclen);
                return error;
        }
        case USB_GET_STRING_DESC: {
                int len;
                si = (struct usb_string_desc *)addr;
                err = usbd_get_string_desc(sc->sc_udev, si->usd_string_index,
                          si->usd_language_id, &si->usd_desc, &len);
                if (err)
                        return EINVAL;
                break;
        }
        case USB_DO_REQUEST:
        {
                struct usb_ctl_request *ur = (void *)addr;
                int len = UGETW(ur->ucr_request.wLength);
                struct iovec iov;
                struct uio uio;
                void *ptr = 0;
                usbd_status xerr;

                error = 0;

                if (!(flag & FWRITE))
                        return EPERM;
                /* Avoid requests that would damage the bus integrity. */
                if ((ur->ucr_request.bmRequestType == UT_WRITE_DEVICE &&
                     ur->ucr_request.bRequest == UR_SET_ADDRESS) ||
                    (ur->ucr_request.bmRequestType == UT_WRITE_DEVICE &&
                     ur->ucr_request.bRequest == UR_SET_CONFIG) ||
                    (ur->ucr_request.bmRequestType == UT_WRITE_INTERFACE &&
                     ur->ucr_request.bRequest == UR_SET_INTERFACE))
                        return EINVAL;

                if (len < 0 || len > 32767)
                        return EINVAL;
                if (len != 0) {
                        iov.iov_base = (void *)ur->ucr_data;
                        iov.iov_len = len;
                        uio.uio_iov = &iov;
                        uio.uio_iovcnt = 1;
                        uio.uio_resid = len;
                        uio.uio_offset = 0;
                        uio.uio_rw =
                                ur->ucr_request.bmRequestType & UT_READ ?
                                UIO_READ : UIO_WRITE;
                        uio.uio_vmspace = l->l_proc->p_vmspace;
                        ptr = kmem_alloc(len, KM_SLEEP);
                        if (uio.uio_rw == UIO_WRITE) {
                                error = uiomove(ptr, len, &uio);
                                if (error)
                                        goto ret;
                        }
                }
                sce = &sc->sc_endpoints[endpt][IN];
                xerr = usbd_do_request_flags(sc->sc_udev, &ur->ucr_request,
                          ptr, ur->ucr_flags, &ur->ucr_actlen, sce->timeout);
                if (xerr) {
                        error = EIO;
                        goto ret;
                }
                if (len != 0) {
                        if (uio.uio_rw == UIO_READ) {
                                size_t alen = uimin(len, ur->ucr_actlen);
                                error = uiomove(ptr, alen, &uio);
                                if (error)
                                        goto ret;
                        }
                }
        ret:
                if (ptr)
                        kmem_free(ptr, len);
                return error;
        }
        case USB_GET_DEVICEINFO:
                usbd_fill_deviceinfo(sc->sc_udev,
                                     (struct usb_device_info *)addr, 0);
                break;
        case USB_GET_DEVICEINFO_OLD:
        {
                int ret;
                MODULE_HOOK_CALL(usb_subr_fill_30_hook,
                    (sc->sc_udev, (struct usb_device_info_old *)addr, 0,
                      usbd_devinfo_vp, usbd_printBCD),
                    enosys(), ret);
                if (ret == 0)
                        return 0;
                return EINVAL;
        }
        default:
                return EINVAL;
        }
        return 0;
}

static int
ugenioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
{
        int endpt = UGENENDPOINT(dev);
        struct ugen_softc *sc;
        int error;

        if ((sc = ugenif_acquire(UGENUNIT(dev))) == 0)
                return ENXIO;
        error = ugen_do_ioctl(sc, endpt, cmd, addr, flag, l);
        ugenif_release(sc);

        return error;
}

static int
ugenpoll(dev_t dev, int events, struct lwp *l)
{
        struct ugen_softc *sc;
        struct ugen_endpoint *sce_in, *sce_out;
        int revents = 0;

        if ((sc = ugenif_acquire(UGENUNIT(dev))) == NULL)
                return POLLHUP;

        if (UGENENDPOINT(dev) == USB_CONTROL_ENDPOINT) {
                revents |= POLLERR;
                goto out;
        }

        sce_in = &sc->sc_endpoints[UGENENDPOINT(dev)][IN];
        sce_out = &sc->sc_endpoints[UGENENDPOINT(dev)][OUT];
        KASSERT(sce_in->edesc || sce_out->edesc);
        KASSERT(sce_in->pipeh || sce_out->pipeh);

        mutex_enter(&sc->sc_lock);
        if (sce_in && sce_in->pipeh && (events & (POLLIN | POLLRDNORM)))
                switch (sce_in->edesc->bmAttributes & UE_XFERTYPE) {
                case UE_INTERRUPT:
                        if (sce_in->q.c_cc > 0)
                                revents |= events & (POLLIN | POLLRDNORM);
                        else
                                selrecord(l, &sce_in->rsel);
                        break;
                case UE_ISOCHRONOUS:
                        if (sce_in->cur != sce_in->fill)
                                revents |= events & (POLLIN | POLLRDNORM);
                        else
                                selrecord(l, &sce_in->rsel);
                        break;
                case UE_BULK:
                        if (sce_in->state & UGEN_BULK_RA) {
                                if (sce_in->ra_wb_used > 0)
                                        revents |= events &
                                            (POLLIN | POLLRDNORM);
                                else
                                        selrecord(l, &sce_in->rsel);
                                break;
                        }
                        /*
                         * We have no easy way of determining if a read will
                         * yield any data or a write will happen.
                         * Pretend they will.
                         */
                        revents |= events & (POLLIN | POLLRDNORM);
                        break;
                default:
                        break;
                }
        if (sce_out && sce_out->pipeh && (events & (POLLOUT | POLLWRNORM)))
                switch (sce_out->edesc->bmAttributes & UE_XFERTYPE) {
                case UE_INTERRUPT:
                case UE_ISOCHRONOUS:
                        /* XXX unimplemented */
                        break;
                case UE_BULK:
                        if (sce_out->state & UGEN_BULK_WB) {
                                if (sce_out->ra_wb_used <
                                    sce_out->limit - sce_out->ibuf)
                                        revents |= events &
                                            (POLLOUT | POLLWRNORM);
                                else
                                        selrecord(l, &sce_out->rsel);
                                break;
                        }
                        /*
                         * We have no easy way of determining if a read will
                         * yield any data or a write will happen.
                         * Pretend they will.
                         */
                         revents |= events & (POLLOUT | POLLWRNORM);
                         break;
                default:
                        break;
                }

        mutex_exit(&sc->sc_lock);

out:        ugenif_release(sc);
        return revents;
}

static void
filt_ugenrdetach(struct knote *kn)
{
        struct ugen_endpoint *sce = kn->kn_hook;
        struct ugen_softc *sc = sce->sc;

        mutex_enter(&sc->sc_lock);
        selremove_knote(&sce->rsel, kn);
        mutex_exit(&sc->sc_lock);
}

static int
filt_ugenread_intr(struct knote *kn, long hint)
{
        struct ugen_endpoint *sce = kn->kn_hook;
        struct ugen_softc *sc = sce->sc;
        int ret;

        mutex_enter(&sc->sc_lock);
        if (sc->sc_dying) {
                ret = 0;
        } else {
                kn->kn_data = sce->q.c_cc;
                ret = kn->kn_data > 0;
        }
        mutex_exit(&sc->sc_lock);

        return ret;
}

static int
filt_ugenread_isoc(struct knote *kn, long hint)
{
        struct ugen_endpoint *sce = kn->kn_hook;
        struct ugen_softc *sc = sce->sc;
        int ret;

        mutex_enter(&sc->sc_lock);
        if (sc->sc_dying) {
                ret = 0;
        } else if (sce->cur == sce->fill) {
                ret = 0;
        } else if (sce->cur < sce->fill) {
                kn->kn_data = sce->fill - sce->cur;
                ret = 1;
        } else {
                kn->kn_data = (sce->limit - sce->cur) +
                    (sce->fill - sce->ibuf);
                ret = 1;
        }
        mutex_exit(&sc->sc_lock);

        return ret;
}

static int
filt_ugenread_bulk(struct knote *kn, long hint)
{
        struct ugen_endpoint *sce = kn->kn_hook;
        struct ugen_softc *sc = sce->sc;
        int ret;

        mutex_enter(&sc->sc_lock);
        if (sc->sc_dying) {
                ret = 0;
        } else if (!(sce->state & UGEN_BULK_RA)) {
                /*
                 * We have no easy way of determining if a read will
                 * yield any data or a write will happen.
                 * So, emulate "seltrue".
                 */
                ret = filt_seltrue(kn, hint);
        } else if (sce->ra_wb_used == 0) {
                ret = 0;
        } else {
                kn->kn_data = sce->ra_wb_used;
                ret = 1;
        }
        mutex_exit(&sc->sc_lock);

        return ret;
}

static int
filt_ugenwrite_bulk(struct knote *kn, long hint)
{
        struct ugen_endpoint *sce = kn->kn_hook;
        struct ugen_softc *sc = sce->sc;
        int ret;

        mutex_enter(&sc->sc_lock);
        if (sc->sc_dying) {
                ret = 0;
        } else if (!(sce->state & UGEN_BULK_WB)) {
                /*
                 * We have no easy way of determining if a read will
                 * yield any data or a write will happen.
                 * So, emulate "seltrue".
                 */
                ret = filt_seltrue(kn, hint);
        } else if (sce->ra_wb_used == sce->limit - sce->ibuf) {
                ret = 0;
        } else {
                kn->kn_data = (sce->limit - sce->ibuf) - sce->ra_wb_used;
                ret = 1;
        }
        mutex_exit(&sc->sc_lock);

        return ret;
}

static const struct filterops ugenread_intr_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_ugenrdetach,
        .f_event = filt_ugenread_intr,
};

static const struct filterops ugenread_isoc_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_ugenrdetach,
        .f_event = filt_ugenread_isoc,
};

static const struct filterops ugenread_bulk_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_ugenrdetach,
        .f_event = filt_ugenread_bulk,
};

static const struct filterops ugenwrite_bulk_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_ugenrdetach,
        .f_event = filt_ugenwrite_bulk,
};

static int
ugenkqfilter(dev_t dev, struct knote *kn)
{
        struct ugen_softc *sc;
        struct ugen_endpoint *sce;
        struct selinfo *sip;
        int error;

        if ((sc = ugenif_acquire(UGENUNIT(dev))) == NULL)
                return ENXIO;

        if (UGENENDPOINT(dev) == USB_CONTROL_ENDPOINT) {
                error = ENODEV;
                goto out;
        }

        switch (kn->kn_filter) {
        case EVFILT_READ:
                sce = &sc->sc_endpoints[UGENENDPOINT(dev)][IN];
                if (sce == NULL) {
                        error = EINVAL;
                        goto out;
                }

                sip = &sce->rsel;
                switch (sce->edesc->bmAttributes & UE_XFERTYPE) {
                case UE_INTERRUPT:
                        kn->kn_fop = &ugenread_intr_filtops;
                        break;
                case UE_ISOCHRONOUS:
                        kn->kn_fop = &ugenread_isoc_filtops;
                        break;
                case UE_BULK:
                        kn->kn_fop = &ugenread_bulk_filtops;
                        break;
                default:
                        error = EINVAL;
                        goto out;
                }
                break;

        case EVFILT_WRITE:
                sce = &sc->sc_endpoints[UGENENDPOINT(dev)][OUT];
                if (sce == NULL) {
                        error = EINVAL;
                        goto out;
                }

                sip = &sce->rsel;
                switch (sce->edesc->bmAttributes & UE_XFERTYPE) {
                case UE_INTERRUPT:
                case UE_ISOCHRONOUS:
                        /* XXX poll doesn't support this */
                        error = EINVAL;
                        goto out;

                case UE_BULK:
                        kn->kn_fop = &ugenwrite_bulk_filtops;
                        break;
                default:
                        error = EINVAL;
                        goto out;
                }
                break;

        default:
                error = EINVAL;
                goto out;
        }

        kn->kn_hook = sce;

        mutex_enter(&sc->sc_lock);
        selrecord_knote(sip, kn);
        mutex_exit(&sc->sc_lock);

        error = 0;

out:        ugenif_release(sc);
        return error;
}

MODULE(MODULE_CLASS_DRIVER, ugen, NULL);

static int
ugen_modcmd(modcmd_t cmd, void *aux)
{

        switch (cmd) {
        case MODULE_CMD_INIT:
                mutex_init(&ugenif.lock, MUTEX_DEFAULT, IPL_NONE);
                rb_tree_init(&ugenif.tree, &ugenif_tree_ops);
                return 0;
        default:
                return ENOTTY;
        }
}



























































































































































































































































































   65 





   70 









































































































































































































































































   75 


   75 











   75 

   75 

   75 









































































































































   57 


































    4 










    4 
    4 
    4 
    4 


























































































































   75 






   74 



   75 




















   75 




    4 

























































































































































































































































































































































    3 

    3 



    3 
    3 



    3 


































































































    6 
    4 





    6 


    6 
    6 



    6 



















































    6 








    6 










    6 
    6 
    6 













    1 


    1 










    1 
    1 
    1 















































































































































































































































   11 

   11 













   11 












    9 

    9 

    9 

    9 












    6 


























































































   66 


















   75 








   74 
    4 







   71 








   71 
   70 



   71 



   71 
   61 



   75 
































   71 


   71 








   71 












   70 

   71 



   66 



   61 



   71 


   71 





























    6 










    6 












    6 



    6 





    6 












    3 



    3 


















    3 
    3 
    3 
    3 




































   28 


   28 






   23 



    1 



    2 















    5 


    4 


    4 





    4 




   27 



   23 



    3 








    3 





















    7 

    5 






    4 





    4 
    4 


    3 
    3 




    2 










    6 

    5 






    4 





    4 
    4 


    5 
    3 




    2 
    5 














    2 





    1 
    3 























    2 





    2 
    2 









    3 













    1 





























































    2 

    2 















    1 










    1 














































    3 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
/*        $NetBSD: kern_entropy.c,v 1.57 2022/08/05 23:43:46 riastradh Exp $        */

/*-
 * Copyright (c) 2019 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Taylor R. Campbell.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Entropy subsystem
 *
 *        * Each CPU maintains a per-CPU entropy pool so that gathering
 *          entropy requires no interprocessor synchronization, except
 *          early at boot when we may be scrambling to gather entropy as
 *          soon as possible.
 *
 *          - entropy_enter gathers entropy and never drops it on the
 *            floor, at the cost of sometimes having to do cryptography.
 *
 *          - entropy_enter_intr gathers entropy or drops it on the
 *            floor, with low latency.  Work to stir the pool or kick the
 *            housekeeping thread is scheduled in soft interrupts.
 *
 *        * entropy_enter immediately enters into the global pool if it
 *          can transition to full entropy in one swell foop.  Otherwise,
 *          it defers to a housekeeping thread that consolidates entropy,
 *          but only when the CPUs collectively have full entropy, in
 *          order to mitigate iterative-guessing attacks.
 *
 *        * The entropy housekeeping thread continues to consolidate
 *          entropy even after we think we have full entropy, in case we
 *          are wrong, but is limited to one discretionary consolidation
 *          per minute, and only when new entropy is actually coming in,
 *          to limit performance impact.
 *
 *        * The entropy epoch is the number that changes when we
 *          transition from partial entropy to full entropy, so that
 *          users can easily determine when to reseed.  This also
 *          facilitates an operator explicitly causing everything to
 *          reseed by sysctl -w kern.entropy.consolidate=1.
 *
 *        * No entropy estimation based on the sample values, which is a
 *          contradiction in terms and a potential source of side
 *          channels.  It is the responsibility of the driver author to
 *          study how predictable the physical source of input can ever
 *          be, and to furnish a lower bound on the amount of entropy it
 *          has.
 *
 *        * Entropy depletion is available for testing (or if you're into
 *          that sort of thing), with sysctl -w kern.entropy.depletion=1;
 *          the logic to support it is small, to minimize chance of bugs.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_entropy.c,v 1.57 2022/08/05 23:43:46 riastradh Exp $");

#include <sys/param.h>
#include <sys/types.h>
#include <sys/atomic.h>
#include <sys/compat_stub.h>
#include <sys/condvar.h>
#include <sys/cpu.h>
#include <sys/entropy.h>
#include <sys/errno.h>
#include <sys/evcnt.h>
#include <sys/event.h>
#include <sys/file.h>
#include <sys/intr.h>
#include <sys/kauth.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/kthread.h>
#include <sys/lwp.h>
#include <sys/module_hook.h>
#include <sys/mutex.h>
#include <sys/percpu.h>
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/reboot.h>
#include <sys/rnd.h>                /* legacy kernel API */
#include <sys/rndio.h>                /* userland ioctl interface */
#include <sys/rndsource.h>        /* kernel rndsource driver API */
#include <sys/select.h>
#include <sys/selinfo.h>
#include <sys/sha1.h>                /* for boot seed checksum */
#include <sys/stdint.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/xcall.h>

#include <lib/libkern/entpool.h>

#include <machine/limits.h>

#ifdef __HAVE_CPU_COUNTER
#include <machine/cpu_counter.h>
#endif

/*
 * struct entropy_cpu
 *
 *        Per-CPU entropy state.  The pool is allocated separately
 *        because percpu(9) sometimes moves per-CPU objects around
 *        without zeroing them, which would lead to unwanted copies of
 *        sensitive secrets.  The evcnt is allocated separately because
 *        evcnt(9) assumes it stays put in memory.
 */
struct entropy_cpu {
        struct entropy_cpu_evcnt {
                struct evcnt                softint;
                struct evcnt                intrdrop;
                struct evcnt                intrtrunc;
        }                        *ec_evcnt;
        struct entpool                *ec_pool;
        unsigned                ec_pending;
        bool                        ec_locked;
};

/*
 * struct entropy_cpu_lock
 *
 *        State for locking the per-CPU entropy state.
 */
struct entropy_cpu_lock {
        int                ecl_s;
        uint64_t        ecl_ncsw;
};

/*
 * struct rndsource_cpu
 *
 *        Per-CPU rndsource state.
 */
struct rndsource_cpu {
        unsigned                rc_entropybits;
        unsigned                rc_timesamples;
        unsigned                rc_datasamples;
};

/*
 * entropy_global (a.k.a. E for short in this file)
 *
 *        Global entropy state.  Writes protected by the global lock.
 *        Some fields, marked (A), can be read outside the lock, and are
 *        maintained with atomic_load/store_relaxed.
 */
struct {
        kmutex_t        lock;                /* covers all global state */
        struct entpool        pool;                /* global pool for extraction */
        unsigned        needed;                /* (A) needed globally */
        unsigned        pending;        /* (A) pending in per-CPU pools */
        unsigned        timestamp;        /* (A) time of last consolidation */
        unsigned        epoch;                /* (A) changes when needed -> 0 */
        kcondvar_t        cv;                /* notifies state changes */
        struct selinfo        selq;                /* notifies needed -> 0 */
        struct lwp        *sourcelock;        /* lock on list of sources */
        kcondvar_t        sourcelock_cv;        /* notifies sourcelock release */
        LIST_HEAD(,krndsource) sources;        /* list of entropy sources */
        enum entropy_stage {
                ENTROPY_COLD = 0, /* single-threaded */
                ENTROPY_WARM,          /* multi-threaded at boot before CPUs */
                ENTROPY_HOT,          /* multi-threaded multi-CPU */
        }                stage;
        bool                consolidate;        /* kick thread to consolidate */
        bool                seed_rndsource;        /* true if seed source is attached */
        bool                seeded;                /* true if seed file already loaded */
} entropy_global __cacheline_aligned = {
        /* Fields that must be initialized when the kernel is loaded.  */
        .needed = ENTROPY_CAPACITY*NBBY,
        .epoch = (unsigned)-1,        /* -1 means entropy never consolidated */
        .sources = LIST_HEAD_INITIALIZER(entropy_global.sources),
        .stage = ENTROPY_COLD,
};

#define        E        (&entropy_global)        /* declutter */

/* Read-mostly globals */
static struct percpu        *entropy_percpu __read_mostly; /* struct entropy_cpu */
static void                *entropy_sih __read_mostly; /* softint handler */
static struct lwp        *entropy_lwp __read_mostly; /* housekeeping thread */

static struct krndsource seed_rndsource __read_mostly;

/*
 * Event counters
 *
 *        Must be careful with adding these because they can serve as
 *        side channels.
 */
static struct evcnt entropy_discretionary_evcnt =
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "entropy", "discretionary");
EVCNT_ATTACH_STATIC(entropy_discretionary_evcnt);
static struct evcnt entropy_immediate_evcnt =
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "entropy", "immediate");
EVCNT_ATTACH_STATIC(entropy_immediate_evcnt);
static struct evcnt entropy_partial_evcnt =
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "entropy", "partial");
EVCNT_ATTACH_STATIC(entropy_partial_evcnt);
static struct evcnt entropy_consolidate_evcnt =
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "entropy", "consolidate");
EVCNT_ATTACH_STATIC(entropy_consolidate_evcnt);
static struct evcnt entropy_extract_fail_evcnt =
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "entropy", "extract fail");
EVCNT_ATTACH_STATIC(entropy_extract_fail_evcnt);
static struct evcnt entropy_request_evcnt =
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "entropy", "request");
EVCNT_ATTACH_STATIC(entropy_request_evcnt);
static struct evcnt entropy_deplete_evcnt =
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "entropy", "deplete");
EVCNT_ATTACH_STATIC(entropy_deplete_evcnt);
static struct evcnt entropy_notify_evcnt =
    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "entropy", "notify");
EVCNT_ATTACH_STATIC(entropy_notify_evcnt);

/* Sysctl knobs */
static bool        entropy_collection = 1;
static bool        entropy_depletion = 0; /* Silly!  */

static const struct sysctlnode        *entropy_sysctlroot;
static struct sysctllog                *entropy_sysctllog;

/* Forward declarations */
static void        entropy_init_cpu(void *, void *, struct cpu_info *);
static void        entropy_fini_cpu(void *, void *, struct cpu_info *);
static void        entropy_account_cpu(struct entropy_cpu *);
static void        entropy_enter(const void *, size_t, unsigned);
static bool        entropy_enter_intr(const void *, size_t, unsigned);
static void        entropy_softintr(void *);
static void        entropy_thread(void *);
static uint32_t        entropy_pending(void);
static void        entropy_pending_cpu(void *, void *, struct cpu_info *);
static void        entropy_do_consolidate(void);
static void        entropy_consolidate_xc(void *, void *);
static void        entropy_notify(void);
static int        sysctl_entropy_consolidate(SYSCTLFN_ARGS);
static int        sysctl_entropy_gather(SYSCTLFN_ARGS);
static void        filt_entropy_read_detach(struct knote *);
static int        filt_entropy_read_event(struct knote *, long);
static int        entropy_request(size_t, int);
static void        rnd_add_data_1(struct krndsource *, const void *, uint32_t,
                    uint32_t, uint32_t);
static unsigned        rndsource_entropybits(struct krndsource *);
static void        rndsource_entropybits_cpu(void *, void *, struct cpu_info *);
static void        rndsource_to_user(struct krndsource *, rndsource_t *);
static void        rndsource_to_user_est(struct krndsource *, rndsource_est_t *);
static void        rndsource_to_user_est_cpu(void *, void *, struct cpu_info *);

/*
 * entropy_timer()
 *
 *        Cycle counter, time counter, or anything that changes a wee bit
 *        unpredictably.
 */
static inline uint32_t
entropy_timer(void)
{
        struct bintime bt;
        uint32_t v;

        /* If we have a CPU cycle counter, use the low 32 bits.  */
#ifdef __HAVE_CPU_COUNTER
        if (__predict_true(cpu_hascounter()))
                return cpu_counter32();
#endif        /* __HAVE_CPU_COUNTER */

        /* If we're cold, tough.  Can't binuptime while cold.  */
        if (__predict_false(cold))
                return 0;

        /* Fold the 128 bits of binuptime into 32 bits.  */
        binuptime(&bt);
        v = bt.frac;
        v ^= bt.frac >> 32;
        v ^= bt.sec;
        v ^= bt.sec >> 32;
        return v;
}

static void
attach_seed_rndsource(void)
{

        /*
         * First called no later than entropy_init, while we are still
         * single-threaded, so no need for RUN_ONCE.
         */
        if (E->stage >= ENTROPY_WARM || E->seed_rndsource)
                return;
        rnd_attach_source(&seed_rndsource, "seed", RND_TYPE_UNKNOWN,
            RND_FLAG_COLLECT_VALUE);
        E->seed_rndsource = true;
}

/*
 * entropy_init()
 *
 *        Initialize the entropy subsystem.  Panic on failure.
 *
 *        Requires percpu(9) and sysctl(9) to be initialized.
 */
static void
entropy_init(void)
{
        uint32_t extra[2];
        struct krndsource *rs;
        unsigned i = 0;

        KASSERT(E->stage == ENTROPY_COLD);

        /* Grab some cycle counts early at boot.  */
        extra[i++] = entropy_timer();

        /* Run the entropy pool cryptography self-test.  */
        if (entpool_selftest() == -1)
                panic("entropy pool crypto self-test failed");

        /* Create the sysctl directory.  */
        sysctl_createv(&entropy_sysctllog, 0, NULL, &entropy_sysctlroot,
            CTLFLAG_PERMANENT, CTLTYPE_NODE, "entropy",
            SYSCTL_DESCR("Entropy (random number sources) options"),
            NULL, 0, NULL, 0,
            CTL_KERN, CTL_CREATE, CTL_EOL);

        /* Create the sysctl knobs.  */
        /* XXX These shouldn't be writable at securelevel>0.  */
        sysctl_createv(&entropy_sysctllog, 0, &entropy_sysctlroot, NULL,
            CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_BOOL, "collection",
            SYSCTL_DESCR("Automatically collect entropy from hardware"),
            NULL, 0, &entropy_collection, 0, CTL_CREATE, CTL_EOL);
        sysctl_createv(&entropy_sysctllog, 0, &entropy_sysctlroot, NULL,
            CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_BOOL, "depletion",
            SYSCTL_DESCR("`Deplete' entropy pool when observed"),
            NULL, 0, &entropy_depletion, 0, CTL_CREATE, CTL_EOL);
        sysctl_createv(&entropy_sysctllog, 0, &entropy_sysctlroot, NULL,
            CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "consolidate",
            SYSCTL_DESCR("Trigger entropy consolidation now"),
            sysctl_entropy_consolidate, 0, NULL, 0, CTL_CREATE, CTL_EOL);
        sysctl_createv(&entropy_sysctllog, 0, &entropy_sysctlroot, NULL,
            CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "gather",
            SYSCTL_DESCR("Trigger entropy gathering from sources now"),
            sysctl_entropy_gather, 0, NULL, 0, CTL_CREATE, CTL_EOL);
        /* XXX These should maybe not be readable at securelevel>0.  */
        sysctl_createv(&entropy_sysctllog, 0, &entropy_sysctlroot, NULL,
            CTLFLAG_PERMANENT|CTLFLAG_READONLY|CTLFLAG_PRIVATE, CTLTYPE_INT,
            "needed", SYSCTL_DESCR("Systemwide entropy deficit"),
            NULL, 0, &E->needed, 0, CTL_CREATE, CTL_EOL);
        sysctl_createv(&entropy_sysctllog, 0, &entropy_sysctlroot, NULL,
            CTLFLAG_PERMANENT|CTLFLAG_READONLY|CTLFLAG_PRIVATE, CTLTYPE_INT,
            "pending", SYSCTL_DESCR("Entropy pending on CPUs"),
            NULL, 0, &E->pending, 0, CTL_CREATE, CTL_EOL);
        sysctl_createv(&entropy_sysctllog, 0, &entropy_sysctlroot, NULL,
            CTLFLAG_PERMANENT|CTLFLAG_READONLY|CTLFLAG_PRIVATE, CTLTYPE_INT,
            "epoch", SYSCTL_DESCR("Entropy epoch"),
            NULL, 0, &E->epoch, 0, CTL_CREATE, CTL_EOL);

        /* Initialize the global state for multithreaded operation.  */
        mutex_init(&E->lock, MUTEX_DEFAULT, IPL_SOFTSERIAL);
        cv_init(&E->cv, "entropy");
        selinit(&E->selq);
        cv_init(&E->sourcelock_cv, "entsrclock");

        /* Make sure the seed source is attached.  */
        attach_seed_rndsource();

        /* Note if the bootloader didn't provide a seed.  */
        if (!E->seeded)
                aprint_debug("entropy: no seed from bootloader\n");

        /* Allocate the per-CPU records for all early entropy sources.  */
        LIST_FOREACH(rs, &E->sources, list)
                rs->state = percpu_alloc(sizeof(struct rndsource_cpu));

        /* Allocate and initialize the per-CPU state.  */
        entropy_percpu = percpu_create(sizeof(struct entropy_cpu),
            entropy_init_cpu, entropy_fini_cpu, NULL);

        /* Enter the boot cycle count to get started.  */
        extra[i++] = entropy_timer();
        KASSERT(i == __arraycount(extra));
        entropy_enter(extra, sizeof extra, 0);
        explicit_memset(extra, 0, sizeof extra);

        /* We are now ready for multi-threaded operation.  */
        E->stage = ENTROPY_WARM;
}

static void
entropy_init_late_cpu(void *a, void *b)
{
        int bound;

        /*
         * We're not necessarily in a softint lwp here (xc_broadcast
         * triggers softint on other CPUs, but calls directly on this
         * CPU), so explicitly bind to the current CPU to invoke the
         * softintr -- this lets us have a simpler assertion in
         * entropy_account_cpu.  Not necessary to avoid migration
         * because xc_broadcast disables kpreemption anyway, but it
         * doesn't hurt.
         */
        bound = curlwp_bind();
        entropy_softintr(NULL);
        curlwp_bindx(bound);
}

/*
 * entropy_init_late()
 *
 *        Late initialization.  Panic on failure.
 *
 *        Requires CPUs to have been detected and LWPs to have started.
 */
static void
entropy_init_late(void)
{
        void *sih;
        int error;

        KASSERT(E->stage == ENTROPY_WARM);

        /*
         * Establish the softint at the highest softint priority level.
         * Must happen after CPU detection.
         */
        sih = softint_establish(SOFTINT_SERIAL|SOFTINT_MPSAFE,
            &entropy_softintr, NULL);
        if (sih == NULL)
                panic("unable to establish entropy softint");

        /*
         * Create the entropy housekeeping thread.  Must happen after
         * lwpinit.
         */
        error = kthread_create(PRI_NONE, KTHREAD_MPSAFE|KTHREAD_TS, NULL,
            entropy_thread, NULL, &entropy_lwp, "entbutler");
        if (error)
                panic("unable to create entropy housekeeping thread: %d",
                    error);

        /*
         * Wait until the per-CPU initialization has hit all CPUs
         * before proceeding to mark the entropy system hot and
         * enabling use of the softint.
         */
        xc_barrier(XC_HIGHPRI);
        E->stage = ENTROPY_HOT;
        atomic_store_relaxed(&entropy_sih, sih);

        /*
         * At this point, entering new samples from interrupt handlers
         * will trigger the softint to process them.  But there may be
         * some samples that were entered from interrupt handlers
         * before the softint was available.  Make sure we process
         * those samples on all CPUs by running the softint logic on
         * all CPUs.
         */
        xc_wait(xc_broadcast(XC_HIGHPRI, entropy_init_late_cpu, NULL, NULL));
}

/*
 * entropy_init_cpu(ptr, cookie, ci)
 *
 *        percpu(9) constructor for per-CPU entropy pool.
 */
static void
entropy_init_cpu(void *ptr, void *cookie, struct cpu_info *ci)
{
        struct entropy_cpu *ec = ptr;
        const char *cpuname;

        ec->ec_evcnt = kmem_alloc(sizeof(*ec->ec_evcnt), KM_SLEEP);
        ec->ec_pool = kmem_zalloc(sizeof(*ec->ec_pool), KM_SLEEP);
        ec->ec_pending = 0;
        ec->ec_locked = false;

        /* XXX ci_cpuname may not be initialized early enough.  */
        cpuname = ci->ci_cpuname[0] == '\0' ? "cpu0" : ci->ci_cpuname;
        evcnt_attach_dynamic(&ec->ec_evcnt->softint, EVCNT_TYPE_MISC, NULL,
            cpuname, "entropy softint");
        evcnt_attach_dynamic(&ec->ec_evcnt->intrdrop, EVCNT_TYPE_MISC, NULL,
            cpuname, "entropy intrdrop");
        evcnt_attach_dynamic(&ec->ec_evcnt->intrtrunc, EVCNT_TYPE_MISC, NULL,
            cpuname, "entropy intrtrunc");
}

/*
 * entropy_fini_cpu(ptr, cookie, ci)
 *
 *        percpu(9) destructor for per-CPU entropy pool.
 */
static void
entropy_fini_cpu(void *ptr, void *cookie, struct cpu_info *ci)
{
        struct entropy_cpu *ec = ptr;

        /*
         * Zero any lingering data.  Disclosure of the per-CPU pool
         * shouldn't retroactively affect the security of any keys
         * generated, because entpool(9) erases whatever we have just
         * drawn out of any pool, but better safe than sorry.
         */
        explicit_memset(ec->ec_pool, 0, sizeof(*ec->ec_pool));

        evcnt_detach(&ec->ec_evcnt->intrtrunc);
        evcnt_detach(&ec->ec_evcnt->intrdrop);
        evcnt_detach(&ec->ec_evcnt->softint);

        kmem_free(ec->ec_pool, sizeof(*ec->ec_pool));
        kmem_free(ec->ec_evcnt, sizeof(*ec->ec_evcnt));
}

/*
 * ec = entropy_cpu_get(&lock)
 * entropy_cpu_put(&lock, ec)
 *
 *        Lock and unlock the per-CPU entropy state.  This only prevents
 *        access on the same CPU -- by hard interrupts, by soft
 *        interrupts, or by other threads.
 *
 *        Blocks soft interrupts and preemption altogether; doesn't block
 *        hard interrupts, but causes samples in hard interrupts to be
 *        dropped.
 */
static struct entropy_cpu *
entropy_cpu_get(struct entropy_cpu_lock *lock)
{
        struct entropy_cpu *ec;

        ec = percpu_getref(entropy_percpu);
        lock->ecl_s = splsoftserial();
        KASSERT(!ec->ec_locked);
        ec->ec_locked = true;
        lock->ecl_ncsw = curlwp->l_ncsw;
        __insn_barrier();

        return ec;
}

static void
entropy_cpu_put(struct entropy_cpu_lock *lock, struct entropy_cpu *ec)
{

        KASSERT(ec == percpu_getptr_remote(entropy_percpu, curcpu()));
        KASSERT(ec->ec_locked);

        __insn_barrier();
        KASSERT(lock->ecl_ncsw == curlwp->l_ncsw);
        ec->ec_locked = false;
        splx(lock->ecl_s);
        percpu_putref(entropy_percpu);
}

/*
 * entropy_seed(seed)
 *
 *        Seed the entropy pool with seed.  Meant to be called as early
 *        as possible by the bootloader; may be called before or after
 *        entropy_init.  Must be called before system reaches userland.
 *        Must be called in thread or soft interrupt context, not in hard
 *        interrupt context.  Must be called at most once.
 *
 *        Overwrites the seed in place.  Caller may then free the memory.
 */
static void
entropy_seed(rndsave_t *seed)
{
        SHA1_CTX ctx;
        uint8_t digest[SHA1_DIGEST_LENGTH];
        bool seeded;

        /*
         * Verify the checksum.  If the checksum fails, take the data
         * but ignore the entropy estimate -- the file may have been
         * incompletely written with garbage, which is harmless to add
         * but may not be as unpredictable as alleged.
         */
        SHA1Init(&ctx);
        SHA1Update(&ctx, (const void *)&seed->entropy, sizeof(seed->entropy));
        SHA1Update(&ctx, seed->data, sizeof(seed->data));
        SHA1Final(digest, &ctx);
        CTASSERT(sizeof(seed->digest) == sizeof(digest));
        if (!consttime_memequal(digest, seed->digest, sizeof(digest))) {
                printf("entropy: invalid seed checksum\n");
                seed->entropy = 0;
        }
        explicit_memset(&ctx, 0, sizeof ctx);
        explicit_memset(digest, 0, sizeof digest);

        /*
         * If the entropy is insensibly large, try byte-swapping.
         * Otherwise assume the file is corrupted and act as though it
         * has zero entropy.
         */
        if (howmany(seed->entropy, NBBY) > sizeof(seed->data)) {
                seed->entropy = bswap32(seed->entropy);
                if (howmany(seed->entropy, NBBY) > sizeof(seed->data))
                        seed->entropy = 0;
        }

        /* Make sure the seed source is attached.  */
        attach_seed_rndsource();

        /* Test and set E->seeded.  */
        if (E->stage >= ENTROPY_WARM)
                mutex_enter(&E->lock);
        seeded = E->seeded;
        E->seeded = (seed->entropy > 0);
        if (E->stage >= ENTROPY_WARM)
                mutex_exit(&E->lock);

        /*
         * If we've been seeded, may be re-entering the same seed
         * (e.g., bootloader vs module init, or something).  No harm in
         * entering it twice, but it contributes no additional entropy.
         */
        if (seeded) {
                printf("entropy: double-seeded by bootloader\n");
                seed->entropy = 0;
        } else {
                printf("entropy: entering seed from bootloader"
                    " with %u bits of entropy\n", (unsigned)seed->entropy);
        }

        /* Enter it into the pool and promptly zero it.  */
        rnd_add_data(&seed_rndsource, seed->data, sizeof(seed->data),
            seed->entropy);
        explicit_memset(seed, 0, sizeof(*seed));
}

/*
 * entropy_bootrequest()
 *
 *        Request entropy from all sources at boot, once config is
 *        complete and interrupts are running.
 */
void
entropy_bootrequest(void)
{
        int error;

        KASSERT(E->stage >= ENTROPY_WARM);

        /*
         * Request enough to satisfy the maximum entropy shortage.
         * This is harmless overkill if the bootloader provided a seed.
         */
        mutex_enter(&E->lock);
        error = entropy_request(ENTROPY_CAPACITY, ENTROPY_WAIT);
        KASSERT(error == 0);
        mutex_exit(&E->lock);
}

/*
 * entropy_epoch()
 *
 *        Returns the current entropy epoch.  If this changes, you should
 *        reseed.  If -1, means system entropy has not yet reached full
 *        entropy or been explicitly consolidated; never reverts back to
 *        -1.  Never zero, so you can always use zero as an uninitialized
 *        sentinel value meaning `reseed ASAP'.
 *
 *        Usage model:
 *
 *                struct foo {
 *                        struct crypto_prng prng;
 *                        unsigned epoch;
 *                } *foo;
 *
 *                unsigned epoch = entropy_epoch();
 *                if (__predict_false(epoch != foo->epoch)) {
 *                        uint8_t seed[32];
 *                        if (entropy_extract(seed, sizeof seed, 0) != 0)
 *                                warn("no entropy");
 *                        crypto_prng_reseed(&foo->prng, seed, sizeof seed);
 *                        foo->epoch = epoch;
 *                }
 */
unsigned
entropy_epoch(void)
{

        /*
         * Unsigned int, so no need for seqlock for an atomic read, but
         * make sure we read it afresh each time.
         */
        return atomic_load_relaxed(&E->epoch);
}

/*
 * entropy_ready()
 *
 *        True if the entropy pool has full entropy.
 */
bool
entropy_ready(void)
{

        return atomic_load_relaxed(&E->needed) == 0;
}

/*
 * entropy_account_cpu(ec)
 *
 *        Consider whether to consolidate entropy into the global pool
 *        after we just added some into the current CPU's pending pool.
 *
 *        - If this CPU can provide enough entropy now, do so.
 *
 *        - If this and whatever else is available on other CPUs can
 *          provide enough entropy, kick the consolidation thread.
 *
 *        - Otherwise, do as little as possible, except maybe consolidate
 *          entropy at most once a minute.
 *
 *        Caller must be bound to a CPU and therefore have exclusive
 *        access to ec.  Will acquire and release the global lock.
 */
static void
entropy_account_cpu(struct entropy_cpu *ec)
{
        struct entropy_cpu_lock lock;
        struct entropy_cpu *ec0;
        unsigned diff;

        KASSERT(E->stage >= ENTROPY_WARM);
        KASSERT(curlwp->l_pflag & LP_BOUND);

        /*
         * If there's no entropy needed, and entropy has been
         * consolidated in the last minute, do nothing.
         */
        if (__predict_true(atomic_load_relaxed(&E->needed) == 0) &&
            __predict_true(!atomic_load_relaxed(&entropy_depletion)) &&
            __predict_true((time_uptime - E->timestamp) <= 60))
                return;

        /*
         * Consider consolidation, under the global lock and with the
         * per-CPU state locked.
         */
        mutex_enter(&E->lock);
        ec0 = entropy_cpu_get(&lock);
        KASSERT(ec0 == ec);
        if (ec->ec_pending == 0) {
                /* Raced with consolidation xcall.  Nothing to do.  */
        } else if (E->needed != 0 && E->needed <= ec->ec_pending) {
                /*
                 * If we have not yet attained full entropy but we can
                 * now, do so.  This way we disseminate entropy
                 * promptly when it becomes available early at boot;
                 * otherwise we leave it to the entropy consolidation
                 * thread, which is rate-limited to mitigate side
                 * channels and abuse.
                 */
                uint8_t buf[ENTPOOL_CAPACITY];

                /* Transfer from the local pool to the global pool.  */
                entpool_extract(ec->ec_pool, buf, sizeof buf);
                entpool_enter(&E->pool, buf, sizeof buf);
                atomic_store_relaxed(&ec->ec_pending, 0);
                atomic_store_relaxed(&E->needed, 0);

                /* Notify waiters that we now have full entropy.  */
                entropy_notify();
                entropy_immediate_evcnt.ev_count++;
        } else {
                /* Determine how much we can add to the global pool.  */
                KASSERTMSG(E->pending <= ENTROPY_CAPACITY*NBBY,
                    "E->pending=%u", E->pending);
                diff = MIN(ec->ec_pending, ENTROPY_CAPACITY*NBBY - E->pending);

                /*
                 * This should make a difference unless we are already
                 * saturated.
                 */
                KASSERTMSG(diff || E->pending == ENTROPY_CAPACITY*NBBY,
                    "diff=%u E->pending=%u ec->ec_pending=%u cap=%u",
                    diff, E->pending, ec->ec_pending,
                    (unsigned)ENTROPY_CAPACITY*NBBY);

                /* Add to the global, subtract from the local.  */
                E->pending += diff;
                KASSERT(E->pending);
                KASSERTMSG(E->pending <= ENTROPY_CAPACITY*NBBY,
                    "E->pending=%u", E->pending);
                atomic_store_relaxed(&ec->ec_pending, ec->ec_pending - diff);

                if (E->needed <= E->pending) {
                        /*
                         * Enough entropy between all the per-CPU
                         * pools.  Wake up the housekeeping thread.
                         *
                         * If we don't need any entropy, this doesn't
                         * mean much, but it is the only time we ever
                         * gather additional entropy in case the
                         * accounting has been overly optimistic.  This
                         * happens at most once a minute, so there's
                         * negligible performance cost.
                         */
                        E->consolidate = true;
                        cv_broadcast(&E->cv);
                        if (E->needed == 0)
                                entropy_discretionary_evcnt.ev_count++;
                } else {
                        /* Can't get full entropy.  Keep gathering.  */
                        entropy_partial_evcnt.ev_count++;
                }
        }
        entropy_cpu_put(&lock, ec);
        mutex_exit(&E->lock);
}

/*
 * entropy_enter_early(buf, len, nbits)
 *
 *        Do entropy bookkeeping globally, before we have established
 *        per-CPU pools.  Enter directly into the global pool in the hope
 *        that we enter enough before the first entropy_extract to thwart
 *        iterative-guessing attacks; entropy_extract will warn if not.
 */
static void
entropy_enter_early(const void *buf, size_t len, unsigned nbits)
{
        bool notify = false;

        KASSERT(E->stage == ENTROPY_COLD);

        /* Enter it into the pool.  */
        entpool_enter(&E->pool, buf, len);

        /*
         * Decide whether to notify reseed -- we will do so if either:
         * (a) we transition from partial entropy to full entropy, or
         * (b) we get a batch of full entropy all at once.
         */
        notify |= (E->needed && E->needed <= nbits);
        notify |= (nbits >= ENTROPY_CAPACITY*NBBY);

        /* Subtract from the needed count and notify if appropriate.  */
        E->needed -= MIN(E->needed, nbits);
        if (notify) {
                entropy_notify();
                entropy_immediate_evcnt.ev_count++;
        }
}

/*
 * entropy_enter(buf, len, nbits)
 *
 *        Enter len bytes of data from buf into the system's entropy
 *        pool, stirring as necessary when the internal buffer fills up.
 *        nbits is a lower bound on the number of bits of entropy in the
 *        process that led to this sample.
 */
static void
entropy_enter(const void *buf, size_t len, unsigned nbits)
{
        struct entropy_cpu_lock lock;
        struct entropy_cpu *ec;
        unsigned pending;
        int bound;

        KASSERTMSG(!cpu_intr_p(),
            "use entropy_enter_intr from interrupt context");
        KASSERTMSG(howmany(nbits, NBBY) <= len,
            "impossible entropy rate: %u bits in %zu-byte string", nbits, len);

        /* If it's too early after boot, just use entropy_enter_early.  */
        if (__predict_false(E->stage == ENTROPY_COLD)) {
                entropy_enter_early(buf, len, nbits);
                return;
        }

        /*
         * Bind ourselves to the current CPU so we don't switch CPUs
         * between entering data into the current CPU's pool (and
         * updating the pending count) and transferring it to the
         * global pool in entropy_account_cpu.
         */
        bound = curlwp_bind();

        /*
         * With the per-CPU state locked, enter into the per-CPU pool
         * and count up what we can add.
         */
        ec = entropy_cpu_get(&lock);
        entpool_enter(ec->ec_pool, buf, len);
        pending = ec->ec_pending;
        pending += MIN(ENTROPY_CAPACITY*NBBY - pending, nbits);
        atomic_store_relaxed(&ec->ec_pending, pending);
        entropy_cpu_put(&lock, ec);

        /* Consolidate globally if appropriate based on what we added.  */
        if (pending)
                entropy_account_cpu(ec);

        curlwp_bindx(bound);
}

/*
 * entropy_enter_intr(buf, len, nbits)
 *
 *        Enter up to len bytes of data from buf into the system's
 *        entropy pool without stirring.  nbits is a lower bound on the
 *        number of bits of entropy in the process that led to this
 *        sample.  If the sample could be entered completely, assume
 *        nbits of entropy pending; otherwise assume none, since we don't
 *        know whether some parts of the sample are constant, for
 *        instance.  Schedule a softint to stir the entropy pool if
 *        needed.  Return true if used fully, false if truncated at all.
 *
 *        Using this in thread context will work, but you might as well
 *        use entropy_enter in that case.
 */
static bool
entropy_enter_intr(const void *buf, size_t len, unsigned nbits)
{
        struct entropy_cpu *ec;
        bool fullyused = false;
        uint32_t pending;
        void *sih;

        KASSERT(cpu_intr_p());
        KASSERTMSG(howmany(nbits, NBBY) <= len,
            "impossible entropy rate: %u bits in %zu-byte string", nbits, len);

        /* If it's too early after boot, just use entropy_enter_early.  */
        if (__predict_false(E->stage == ENTROPY_COLD)) {
                entropy_enter_early(buf, len, nbits);
                return true;
        }

        /*
         * Acquire the per-CPU state.  If someone is in the middle of
         * using it, drop the sample.  Otherwise, take the lock so that
         * higher-priority interrupts will drop their samples.
         */
        ec = percpu_getref(entropy_percpu);
        if (ec->ec_locked) {
                ec->ec_evcnt->intrdrop.ev_count++;
                goto out0;
        }
        ec->ec_locked = true;
        __insn_barrier();

        /*
         * Enter as much as we can into the per-CPU pool.  If it was
         * truncated, schedule a softint to stir the pool and stop.
         */
        if (!entpool_enter_nostir(ec->ec_pool, buf, len)) {
                sih = atomic_load_relaxed(&entropy_sih);
                if (__predict_true(sih != NULL))
                        softint_schedule(sih);
                ec->ec_evcnt->intrtrunc.ev_count++;
                goto out1;
        }
        fullyused = true;

        /* Count up what we can contribute.  */
        pending = ec->ec_pending;
        pending += MIN(ENTROPY_CAPACITY*NBBY - pending, nbits);
        atomic_store_relaxed(&ec->ec_pending, pending);

        /* Schedule a softint if we added anything and it matters.  */
        if (__predict_false((atomic_load_relaxed(&E->needed) != 0) ||
                atomic_load_relaxed(&entropy_depletion)) &&
            nbits != 0) {
                sih = atomic_load_relaxed(&entropy_sih);
                if (__predict_true(sih != NULL))
                        softint_schedule(sih);
        }

out1:        /* Release the per-CPU state.  */
        KASSERT(ec->ec_locked);
        __insn_barrier();
        ec->ec_locked = false;
out0:        percpu_putref(entropy_percpu);

        return fullyused;
}

/*
 * entropy_softintr(cookie)
 *
 *        Soft interrupt handler for entering entropy.  Takes care of
 *        stirring the local CPU's entropy pool if it filled up during
 *        hard interrupts, and promptly crediting entropy from the local
 *        CPU's entropy pool to the global entropy pool if needed.
 */
static void
entropy_softintr(void *cookie)
{
        struct entropy_cpu_lock lock;
        struct entropy_cpu *ec;
        unsigned pending;

        /*
         * With the per-CPU state locked, stir the pool if necessary
         * and determine if there's any pending entropy on this CPU to
         * account globally.
         */
        ec = entropy_cpu_get(&lock);
        ec->ec_evcnt->softint.ev_count++;
        entpool_stir(ec->ec_pool);
        pending = ec->ec_pending;
        entropy_cpu_put(&lock, ec);

        /* Consolidate globally if appropriate based on what we added.  */
        if (pending)
                entropy_account_cpu(ec);
}

/*
 * entropy_thread(cookie)
 *
 *        Handle any asynchronous entropy housekeeping.
 */
static void
entropy_thread(void *cookie)
{
        bool consolidate;

        for (;;) {
                /*
                 * Wait until there's full entropy somewhere among the
                 * CPUs, as confirmed at most once per minute, or
                 * someone wants to consolidate.
                 */
                if (entropy_pending() >= ENTROPY_CAPACITY*NBBY) {
                        consolidate = true;
                } else {
                        mutex_enter(&E->lock);
                        if (!E->consolidate)
                                cv_timedwait(&E->cv, &E->lock, 60*hz);
                        consolidate = E->consolidate;
                        E->consolidate = false;
                        mutex_exit(&E->lock);
                }

                if (consolidate) {
                        /* Do it.  */
                        entropy_do_consolidate();

                        /* Mitigate abuse.  */
                        kpause("entropy", false, hz, NULL);
                }
        }
}

/*
 * entropy_pending()
 *
 *        Count up the amount of entropy pending on other CPUs.
 */
static uint32_t
entropy_pending(void)
{
        uint32_t pending = 0;

        percpu_foreach(entropy_percpu, &entropy_pending_cpu, &pending);
        return pending;
}

static void
entropy_pending_cpu(void *ptr, void *cookie, struct cpu_info *ci)
{
        struct entropy_cpu *ec = ptr;
        uint32_t *pendingp = cookie;
        uint32_t cpu_pending;

        cpu_pending = atomic_load_relaxed(&ec->ec_pending);
        *pendingp += MIN(ENTROPY_CAPACITY*NBBY - *pendingp, cpu_pending);
}

/*
 * entropy_do_consolidate()
 *
 *        Issue a cross-call to gather entropy on all CPUs and advance
 *        the entropy epoch.
 */
static void
entropy_do_consolidate(void)
{
        static const struct timeval interval = {.tv_sec = 60, .tv_usec = 0};
        static struct timeval lasttime; /* serialized by E->lock */
        struct entpool pool;
        uint8_t buf[ENTPOOL_CAPACITY];
        unsigned diff;
        uint64_t ticket;

        /* Gather entropy on all CPUs into a temporary pool.  */
        memset(&pool, 0, sizeof pool);
        ticket = xc_broadcast(0, &entropy_consolidate_xc, &pool, NULL);
        xc_wait(ticket);

        /* Acquire the lock to notify waiters.  */
        mutex_enter(&E->lock);

        /* Count another consolidation.  */
        entropy_consolidate_evcnt.ev_count++;

        /* Note when we last consolidated, i.e. now.  */
        E->timestamp = time_uptime;

        /* Mix what we gathered into the global pool.  */
        entpool_extract(&pool, buf, sizeof buf);
        entpool_enter(&E->pool, buf, sizeof buf);
        explicit_memset(&pool, 0, sizeof pool);

        /* Count the entropy that was gathered.  */
        diff = MIN(E->needed, E->pending);
        atomic_store_relaxed(&E->needed, E->needed - diff);
        E->pending -= diff;
        if (__predict_false(E->needed > 0)) {
                if ((boothowto & AB_DEBUG) != 0 &&
                    ratecheck(&lasttime, &interval)) {
                        printf("WARNING:"
                            " consolidating less than full entropy\n");
                }
        }

        /* Advance the epoch and notify waiters.  */
        entropy_notify();

        /* Release the lock.  */
        mutex_exit(&E->lock);
}

/*
 * entropy_consolidate_xc(vpool, arg2)
 *
 *        Extract output from the local CPU's input pool and enter it
 *        into a temporary pool passed as vpool.
 */
static void
entropy_consolidate_xc(void *vpool, void *arg2 __unused)
{
        struct entpool *pool = vpool;
        struct entropy_cpu_lock lock;
        struct entropy_cpu *ec;
        uint8_t buf[ENTPOOL_CAPACITY];
        uint32_t extra[7];
        unsigned i = 0;

        /* Grab CPU number and cycle counter to mix extra into the pool.  */
        extra[i++] = cpu_number();
        extra[i++] = entropy_timer();

        /*
         * With the per-CPU state locked, extract from the per-CPU pool
         * and count it as no longer pending.
         */
        ec = entropy_cpu_get(&lock);
        extra[i++] = entropy_timer();
        entpool_extract(ec->ec_pool, buf, sizeof buf);
        atomic_store_relaxed(&ec->ec_pending, 0);
        extra[i++] = entropy_timer();
        entropy_cpu_put(&lock, ec);
        extra[i++] = entropy_timer();

        /*
         * Copy over statistics, and enter the per-CPU extract and the
         * extra timing into the temporary pool, under the global lock.
         */
        mutex_enter(&E->lock);
        extra[i++] = entropy_timer();
        entpool_enter(pool, buf, sizeof buf);
        explicit_memset(buf, 0, sizeof buf);
        extra[i++] = entropy_timer();
        KASSERT(i == __arraycount(extra));
        entpool_enter(pool, extra, sizeof extra);
        explicit_memset(extra, 0, sizeof extra);
        mutex_exit(&E->lock);
}

/*
 * entropy_notify()
 *
 *        Caller just contributed entropy to the global pool.  Advance
 *        the entropy epoch and notify waiters.
 *
 *        Caller must hold the global entropy lock.  Except for the
 *        `sysctl -w kern.entropy.consolidate=1` trigger, the caller must
 *        have just have transitioned from partial entropy to full
 *        entropy -- E->needed should be zero now.
 */
static void
entropy_notify(void)
{
        static const struct timeval interval = {.tv_sec = 60, .tv_usec = 0};
        static struct timeval lasttime; /* serialized by E->lock */
        unsigned epoch;

        KASSERT(E->stage == ENTROPY_COLD || mutex_owned(&E->lock));

        /*
         * If this is the first time, print a message to the console
         * that we're ready so operators can compare it to the timing
         * of other events.
         */
        if (__predict_false(E->epoch == (unsigned)-1) && E->needed == 0)
                printf("entropy: ready\n");

        /* Set the epoch; roll over from UINTMAX-1 to 1.  */
        if (__predict_true(!atomic_load_relaxed(&entropy_depletion)) ||
            ratecheck(&lasttime, &interval)) {
                epoch = E->epoch + 1;
                if (epoch == 0 || epoch == (unsigned)-1)
                        epoch = 1;
                atomic_store_relaxed(&E->epoch, epoch);
        }
        KASSERT(E->epoch != (unsigned)-1);

        /* Notify waiters.  */
        if (E->stage >= ENTROPY_WARM) {
                cv_broadcast(&E->cv);
                selnotify(&E->selq, POLLIN|POLLRDNORM, NOTE_SUBMIT);
        }

        /* Count another notification.  */
        entropy_notify_evcnt.ev_count++;
}

/*
 * entropy_consolidate()
 *
 *        Trigger entropy consolidation and wait for it to complete.
 *
 *        This should be used sparingly, not periodically -- requiring
 *        conscious intervention by the operator or a clear policy
 *        decision.  Otherwise, the kernel will automatically consolidate
 *        when enough entropy has been gathered into per-CPU pools to
 *        transition to full entropy.
 */
void
entropy_consolidate(void)
{
        uint64_t ticket;
        int error;

        KASSERT(E->stage == ENTROPY_HOT);

        mutex_enter(&E->lock);
        ticket = entropy_consolidate_evcnt.ev_count;
        E->consolidate = true;
        cv_broadcast(&E->cv);
        while (ticket == entropy_consolidate_evcnt.ev_count) {
                error = cv_wait_sig(&E->cv, &E->lock);
                if (error)
                        break;
        }
        mutex_exit(&E->lock);
}

/*
 * sysctl -w kern.entropy.consolidate=1
 *
 *        Trigger entropy consolidation and wait for it to complete.
 *        Writable only by superuser.  This, writing to /dev/random, and
 *        ioctl(RNDADDDATA) are the only ways for the system to
 *        consolidate entropy if the operator knows something the kernel
 *        doesn't about how unpredictable the pending entropy pools are.
 */
static int
sysctl_entropy_consolidate(SYSCTLFN_ARGS)
{
        struct sysctlnode node = *rnode;
        int arg = 0;
        int error;

        KASSERT(E->stage == ENTROPY_HOT);

        node.sysctl_data = &arg;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;
        if (arg)
                entropy_consolidate();

        return error;
}

/*
 * sysctl -w kern.entropy.gather=1
 *
 *        Trigger gathering entropy from all on-demand sources, and wait
 *        for synchronous sources (but not asynchronous sources) to
 *        complete.  Writable only by superuser.
 */
static int
sysctl_entropy_gather(SYSCTLFN_ARGS)
{
        struct sysctlnode node = *rnode;
        int arg = 0;
        int error;

        KASSERT(E->stage == ENTROPY_HOT);

        node.sysctl_data = &arg;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;
        if (arg) {
                mutex_enter(&E->lock);
                error = entropy_request(ENTROPY_CAPACITY,
                    ENTROPY_WAIT|ENTROPY_SIG);
                mutex_exit(&E->lock);
        }

        return 0;
}

/*
 * entropy_extract(buf, len, flags)
 *
 *        Extract len bytes from the global entropy pool into buf.
 *
 *        Caller MUST NOT expose these bytes directly -- must use them
 *        ONLY to seed a cryptographic pseudorandom number generator
 *        (`CPRNG'), a.k.a. deterministic random bit generator (`DRBG'),
 *        and then erase them.  entropy_extract does not, on its own,
 *        provide backtracking resistance -- it must be combined with a
 *        PRNG/DRBG that does.
 *
 *        You generally shouldn't use this directly -- use cprng(9)
 *        instead.
 *
 *        Flags may have:
 *
 *                ENTROPY_WAIT        Wait for entropy if not available yet.
 *                ENTROPY_SIG        Allow interruption by a signal during wait.
 *                ENTROPY_HARDFAIL Either fill the buffer with full entropy,
 *                                or fail without filling it at all.
 *
 *        Return zero on success, or error on failure:
 *
 *                EWOULDBLOCK        No entropy and ENTROPY_WAIT not set.
 *                EINTR/ERESTART        No entropy, ENTROPY_SIG set, and interrupted.
 *
 *        If ENTROPY_WAIT is set, allowed only in thread context.  If
 *        ENTROPY_WAIT is not set, allowed also in softint context.
 *        Forbidden in hard interrupt context.
 */
int
entropy_extract(void *buf, size_t len, int flags)
{
        static const struct timeval interval = {.tv_sec = 60, .tv_usec = 0};
        static struct timeval lasttime; /* serialized by E->lock */
        int error;

        if (ISSET(flags, ENTROPY_WAIT)) {
                ASSERT_SLEEPABLE();
                KASSERTMSG(E->stage >= ENTROPY_WARM,
                    "can't wait for entropy until warm");
        }

        /* Refuse to operate in interrupt context.  */
        KASSERT(!cpu_intr_p());

        /* Acquire the global lock to get at the global pool.  */
        if (E->stage >= ENTROPY_WARM)
                mutex_enter(&E->lock);

        /* Wait until there is enough entropy in the system.  */
        error = 0;
        while (E->needed) {
                /* Ask for more, synchronously if possible.  */
                error = entropy_request(len, flags);
                if (error)
                        break;

                /* If we got enough, we're done.  */
                if (E->needed == 0) {
                        KASSERT(error == 0);
                        break;
                }

                /* If not waiting, stop here.  */
                if (!ISSET(flags, ENTROPY_WAIT)) {
                        error = EWOULDBLOCK;
                        break;
                }

                /* Wait for some entropy to come in and try again.  */
                KASSERT(E->stage >= ENTROPY_WARM);
                printf("entropy: pid %d (%s) blocking due to lack of entropy\n",
                       curproc->p_pid, curproc->p_comm);

                if (ISSET(flags, ENTROPY_SIG)) {
                        error = cv_wait_sig(&E->cv, &E->lock);
                        if (error)
                                break;
                } else {
                        cv_wait(&E->cv, &E->lock);
                }
        }

        /*
         * Count failure -- but fill the buffer nevertheless, unless
         * the caller specified ENTROPY_HARDFAIL.
         */
        if (error) {
                if (ISSET(flags, ENTROPY_HARDFAIL))
                        goto out;
                entropy_extract_fail_evcnt.ev_count++;
        }

        /*
         * Report a warning if we have never yet reached full entropy.
         * This is the only case where we consider entropy to be
         * `depleted' without kern.entropy.depletion enabled -- when we
         * only have partial entropy, an adversary may be able to
         * narrow the state of the pool down to a small number of
         * possibilities; the output then enables them to confirm a
         * guess, reducing its entropy from the adversary's perspective
         * to zero.
         */
        if (__predict_false(E->epoch == (unsigned)-1)) {
                if (ratecheck(&lasttime, &interval))
                        printf("WARNING:"
                            " system needs entropy for security;"
                            " see entropy(7)\n");
                atomic_store_relaxed(&E->needed, ENTROPY_CAPACITY*NBBY);
        }

        /* Extract data from the pool, and `deplete' if we're doing that.  */
        entpool_extract(&E->pool, buf, len);
        if (__predict_false(atomic_load_relaxed(&entropy_depletion)) &&
            error == 0) {
                unsigned cost = MIN(len, ENTROPY_CAPACITY)*NBBY;

                atomic_store_relaxed(&E->needed,
                    E->needed + MIN(ENTROPY_CAPACITY*NBBY - E->needed, cost));
                entropy_deplete_evcnt.ev_count++;
        }

out:        /* Release the global lock and return the error.  */
        if (E->stage >= ENTROPY_WARM)
                mutex_exit(&E->lock);
        return error;
}

/*
 * entropy_poll(events)
 *
 *        Return the subset of events ready, and if it is not all of
 *        events, record curlwp as waiting for entropy.
 */
int
entropy_poll(int events)
{
        int revents = 0;

        KASSERT(E->stage >= ENTROPY_WARM);

        /* Always ready for writing.  */
        revents |= events & (POLLOUT|POLLWRNORM);

        /* Narrow it down to reads.  */
        events &= POLLIN|POLLRDNORM;
        if (events == 0)
                return revents;

        /*
         * If we have reached full entropy and we're not depleting
         * entropy, we are forever ready.
         */
        if (__predict_true(atomic_load_relaxed(&E->needed) == 0) &&
            __predict_true(!atomic_load_relaxed(&entropy_depletion)))
                return revents | events;

        /*
         * Otherwise, check whether we need entropy under the lock.  If
         * we don't, we're ready; if we do, add ourselves to the queue.
         */
        mutex_enter(&E->lock);
        if (E->needed == 0)
                revents |= events;
        else
                selrecord(curlwp, &E->selq);
        mutex_exit(&E->lock);

        return revents;
}

/*
 * filt_entropy_read_detach(kn)
 *
 *        struct filterops::f_detach callback for entropy read events:
 *        remove kn from the list of waiters.
 */
static void
filt_entropy_read_detach(struct knote *kn)
{

        KASSERT(E->stage >= ENTROPY_WARM);

        mutex_enter(&E->lock);
        selremove_knote(&E->selq, kn);
        mutex_exit(&E->lock);
}

/*
 * filt_entropy_read_event(kn, hint)
 *
 *        struct filterops::f_event callback for entropy read events:
 *        poll for entropy.  Caller must hold the global entropy lock if
 *        hint is NOTE_SUBMIT, and must not if hint is not NOTE_SUBMIT.
 */
static int
filt_entropy_read_event(struct knote *kn, long hint)
{
        int ret;

        KASSERT(E->stage >= ENTROPY_WARM);

        /* Acquire the lock, if caller is outside entropy subsystem.  */
        if (hint == NOTE_SUBMIT)
                KASSERT(mutex_owned(&E->lock));
        else
                mutex_enter(&E->lock);

        /*
         * If we still need entropy, can't read anything; if not, can
         * read arbitrarily much.
         */
        if (E->needed != 0) {
                ret = 0;
        } else {
                if (atomic_load_relaxed(&entropy_depletion))
                        kn->kn_data = ENTROPY_CAPACITY*NBBY;
                else
                        kn->kn_data = MIN(INT64_MAX, SSIZE_MAX);
                ret = 1;
        }

        /* Release the lock, if caller is outside entropy subsystem.  */
        if (hint == NOTE_SUBMIT)
                KASSERT(mutex_owned(&E->lock));
        else
                mutex_exit(&E->lock);

        return ret;
}

/* XXX Makes sense only for /dev/u?random.  */
static const struct filterops entropy_read_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_entropy_read_detach,
        .f_event = filt_entropy_read_event,
};

/*
 * entropy_kqfilter(kn)
 *
 *        Register kn to receive entropy event notifications.  May be
 *        EVFILT_READ or EVFILT_WRITE; anything else yields EINVAL.
 */
int
entropy_kqfilter(struct knote *kn)
{

        KASSERT(E->stage >= ENTROPY_WARM);

        switch (kn->kn_filter) {
        case EVFILT_READ:
                /* Enter into the global select queue.  */
                mutex_enter(&E->lock);
                kn->kn_fop = &entropy_read_filtops;
                selrecord_knote(&E->selq, kn);
                mutex_exit(&E->lock);
                return 0;
        case EVFILT_WRITE:
                /* Can always dump entropy into the system.  */
                kn->kn_fop = &seltrue_filtops;
                return 0;
        default:
                return EINVAL;
        }
}

/*
 * rndsource_setcb(rs, get, getarg)
 *
 *        Set the request callback for the entropy source rs, if it can
 *        provide entropy on demand.  Must precede rnd_attach_source.
 */
void
rndsource_setcb(struct krndsource *rs, void (*get)(size_t, void *),
    void *getarg)
{

        rs->get = get;
        rs->getarg = getarg;
}

/*
 * rnd_attach_source(rs, name, type, flags)
 *
 *        Attach the entropy source rs.  Must be done after
 *        rndsource_setcb, if any, and before any calls to rnd_add_data.
 */
void
rnd_attach_source(struct krndsource *rs, const char *name, uint32_t type,
    uint32_t flags)
{
        uint32_t extra[4];
        unsigned i = 0;

        /* Grab cycle counter to mix extra into the pool.  */
        extra[i++] = entropy_timer();

        /*
         * Apply some standard flags:
         *
         * - We do not bother with network devices by default, for
         *   hysterical raisins (perhaps: because it is often the case
         *   that an adversary can influence network packet timings).
         */
        switch (type) {
        case RND_TYPE_NET:
                flags |= RND_FLAG_NO_COLLECT;
                break;
        }

        /* Sanity-check the callback if RND_FLAG_HASCB is set.  */
        KASSERT(!ISSET(flags, RND_FLAG_HASCB) || rs->get != NULL);

        /* Initialize the random source.  */
        memset(rs->name, 0, sizeof(rs->name)); /* paranoia */
        strlcpy(rs->name, name, sizeof(rs->name));
        memset(&rs->time_delta, 0, sizeof(rs->time_delta));
        memset(&rs->value_delta, 0, sizeof(rs->value_delta));
        rs->total = 0;
        rs->type = type;
        rs->flags = flags;
        if (E->stage >= ENTROPY_WARM)
                rs->state = percpu_alloc(sizeof(struct rndsource_cpu));
        extra[i++] = entropy_timer();

        /* Wire it into the global list of random sources.  */
        if (E->stage >= ENTROPY_WARM)
                mutex_enter(&E->lock);
        LIST_INSERT_HEAD(&E->sources, rs, list);
        if (E->stage >= ENTROPY_WARM)
                mutex_exit(&E->lock);
        extra[i++] = entropy_timer();

        /* Request that it provide entropy ASAP, if we can.  */
        if (ISSET(flags, RND_FLAG_HASCB))
                (*rs->get)(ENTROPY_CAPACITY, rs->getarg);
        extra[i++] = entropy_timer();

        /* Mix the extra into the pool.  */
        KASSERT(i == __arraycount(extra));
        entropy_enter(extra, sizeof extra, 0);
        explicit_memset(extra, 0, sizeof extra);
}

/*
 * rnd_detach_source(rs)
 *
 *        Detach the entropy source rs.  May sleep waiting for users to
 *        drain.  Further use is not allowed.
 */
void
rnd_detach_source(struct krndsource *rs)
{

        /*
         * If we're cold (shouldn't happen, but hey), just remove it
         * from the list -- there's nothing allocated.
         */
        if (E->stage == ENTROPY_COLD) {
                LIST_REMOVE(rs, list);
                return;
        }

        /* We may have to wait for entropy_request.  */
        ASSERT_SLEEPABLE();

        /* Wait until the source list is not in use, and remove it.  */
        mutex_enter(&E->lock);
        while (E->sourcelock)
                cv_wait(&E->sourcelock_cv, &E->lock);
        LIST_REMOVE(rs, list);
        mutex_exit(&E->lock);

        /* Free the per-CPU data.  */
        percpu_free(rs->state, sizeof(struct rndsource_cpu));
}

/*
 * rnd_lock_sources(flags)
 *
 *        Lock the list of entropy sources.  Caller must hold the global
 *        entropy lock.  If successful, no rndsource will go away until
 *        rnd_unlock_sources even while the caller releases the global
 *        entropy lock.
 *
 *        If flags & ENTROPY_WAIT, wait for concurrent access to finish.
 *        If flags & ENTROPY_SIG, allow interruption by signal.
 */
static int __attribute__((warn_unused_result))
rnd_lock_sources(int flags)
{
        int error;

        KASSERT(E->stage == ENTROPY_COLD || mutex_owned(&E->lock));

        while (E->sourcelock) {
                KASSERT(E->stage >= ENTROPY_WARM);
                if (!ISSET(flags, ENTROPY_WAIT))
                        return EWOULDBLOCK;
                if (ISSET(flags, ENTROPY_SIG)) {
                        error = cv_wait_sig(&E->sourcelock_cv, &E->lock);
                        if (error)
                                return error;
                } else {
                        cv_wait(&E->sourcelock_cv, &E->lock);
                }
        }

        E->sourcelock = curlwp;
        return 0;
}

/*
 * rnd_unlock_sources()
 *
 *        Unlock the list of sources after rnd_lock_sources.  Caller must
 *        hold the global entropy lock.
 */
static void
rnd_unlock_sources(void)
{

        KASSERT(E->stage == ENTROPY_COLD || mutex_owned(&E->lock));

        KASSERTMSG(E->sourcelock == curlwp, "lwp %p releasing lock held by %p",
            curlwp, E->sourcelock);
        E->sourcelock = NULL;
        if (E->stage >= ENTROPY_WARM)
                cv_signal(&E->sourcelock_cv);
}

/*
 * rnd_sources_locked()
 *
 *        True if we hold the list of rndsources locked, for diagnostic
 *        assertions.
 */
static bool __diagused
rnd_sources_locked(void)
{

        return E->sourcelock == curlwp;
}

/*
 * entropy_request(nbytes, flags)
 *
 *        Request nbytes bytes of entropy from all sources in the system.
 *        OK if we overdo it.  Caller must hold the global entropy lock;
 *        will release and re-acquire it.
 *
 *        If flags & ENTROPY_WAIT, wait for concurrent access to finish.
 *        If flags & ENTROPY_SIG, allow interruption by signal.
 */
static int
entropy_request(size_t nbytes, int flags)
{
        struct krndsource *rs;
        int error;

        KASSERT(E->stage == ENTROPY_COLD || mutex_owned(&E->lock));
        if (flags & ENTROPY_WAIT)
                ASSERT_SLEEPABLE();

        /*
         * Lock the list of entropy sources to block rnd_detach_source
         * until we're done, and to serialize calls to the entropy
         * callbacks as guaranteed to drivers.
         */
        error = rnd_lock_sources(flags);
        if (error)
                return error;
        entropy_request_evcnt.ev_count++;

        /* Clamp to the maximum reasonable request.  */
        nbytes = MIN(nbytes, ENTROPY_CAPACITY);

        /* Walk the list of sources.  */
        LIST_FOREACH(rs, &E->sources, list) {
                /* Skip sources without callbacks.  */
                if (!ISSET(rs->flags, RND_FLAG_HASCB))
                        continue;

                /*
                 * Skip sources that are disabled altogether -- we
                 * would just ignore their samples anyway.
                 */
                if (ISSET(rs->flags, RND_FLAG_NO_COLLECT))
                        continue;

                /* Drop the lock while we call the callback.  */
                if (E->stage >= ENTROPY_WARM)
                        mutex_exit(&E->lock);
                (*rs->get)(nbytes, rs->getarg);
                if (E->stage >= ENTROPY_WARM)
                        mutex_enter(&E->lock);
        }

        /* Request done; unlock the list of entropy sources.  */
        rnd_unlock_sources();
        return 0;
}

/*
 * rnd_add_uint32(rs, value)
 *
 *        Enter 32 bits of data from an entropy source into the pool.
 *
 *        If rs is NULL, may not be called from interrupt context.
 *
 *        If rs is non-NULL, may be called from any context.  May drop
 *        data if called from interrupt context.
 */
void
rnd_add_uint32(struct krndsource *rs, uint32_t value)
{

        rnd_add_data(rs, &value, sizeof value, 0);
}

void
_rnd_add_uint32(struct krndsource *rs, uint32_t value)
{

        rnd_add_data(rs, &value, sizeof value, 0);
}

void
_rnd_add_uint64(struct krndsource *rs, uint64_t value)
{

        rnd_add_data(rs, &value, sizeof value, 0);
}

/*
 * rnd_add_data(rs, buf, len, entropybits)
 *
 *        Enter data from an entropy source into the pool, with a
 *        driver's estimate of how much entropy the physical source of
 *        the data has.  If RND_FLAG_NO_ESTIMATE, we ignore the driver's
 *        estimate and treat it as zero.
 *
 *        If rs is NULL, may not be called from interrupt context.
 *
 *        If rs is non-NULL, may be called from any context.  May drop
 *        data if called from interrupt context.
 */
void
rnd_add_data(struct krndsource *rs, const void *buf, uint32_t len,
    uint32_t entropybits)
{
        uint32_t extra;
        uint32_t flags;

        KASSERTMSG(howmany(entropybits, NBBY) <= len,
            "%s: impossible entropy rate:"
            " %"PRIu32" bits in %"PRIu32"-byte string",
            rs ? rs->name : "(anonymous)", entropybits, len);

        /* If there's no rndsource, just enter the data and time now.  */
        if (rs == NULL) {
                entropy_enter(buf, len, entropybits);
                extra = entropy_timer();
                entropy_enter(&extra, sizeof extra, 0);
                explicit_memset(&extra, 0, sizeof extra);
                return;
        }

        /* Load a snapshot of the flags.  Ioctl may change them under us.  */
        flags = atomic_load_relaxed(&rs->flags);

        /*
         * Skip if:
         * - we're not collecting entropy, or
         * - the operator doesn't want to collect entropy from this, or
         * - neither data nor timings are being collected from this.
         */
        if (!atomic_load_relaxed(&entropy_collection) ||
            ISSET(flags, RND_FLAG_NO_COLLECT) ||
            !ISSET(flags, RND_FLAG_COLLECT_VALUE|RND_FLAG_COLLECT_TIME))
                return;

        /* If asked, ignore the estimate.  */
        if (ISSET(flags, RND_FLAG_NO_ESTIMATE))
                entropybits = 0;

        /* If we are collecting data, enter them.  */
        if (ISSET(flags, RND_FLAG_COLLECT_VALUE))
                rnd_add_data_1(rs, buf, len, entropybits,
                    RND_FLAG_COLLECT_VALUE);

        /* If we are collecting timings, enter one.  */
        if (ISSET(flags, RND_FLAG_COLLECT_TIME)) {
                extra = entropy_timer();
                rnd_add_data_1(rs, &extra, sizeof extra, 0,
                    RND_FLAG_COLLECT_TIME);
        }
}

static unsigned
add_sat(unsigned a, unsigned b)
{
        unsigned c = a + b;

        return (c < a ? UINT_MAX : c);
}

/*
 * rnd_add_data_1(rs, buf, len, entropybits, flag)
 *
 *        Internal subroutine to call either entropy_enter_intr, if we're
 *        in interrupt context, or entropy_enter if not, and to count the
 *        entropy in an rndsource.
 */
static void
rnd_add_data_1(struct krndsource *rs, const void *buf, uint32_t len,
    uint32_t entropybits, uint32_t flag)
{
        bool fullyused;

        /*
         * If we're in interrupt context, use entropy_enter_intr and
         * take note of whether it consumed the full sample; if not,
         * use entropy_enter, which always consumes the full sample.
         */
        if (curlwp && cpu_intr_p()) {
                fullyused = entropy_enter_intr(buf, len, entropybits);
        } else {
                entropy_enter(buf, len, entropybits);
                fullyused = true;
        }

        /*
         * If we used the full sample, note how many bits were
         * contributed from this source.
         */
        if (fullyused) {
                if (__predict_false(E->stage == ENTROPY_COLD)) {
                        rs->total = add_sat(rs->total, entropybits);
                        switch (flag) {
                        case RND_FLAG_COLLECT_TIME:
                                rs->time_delta.insamples =
                                    add_sat(rs->time_delta.insamples, 1);
                                break;
                        case RND_FLAG_COLLECT_VALUE:
                                rs->value_delta.insamples =
                                    add_sat(rs->value_delta.insamples, 1);
                                break;
                        }
                } else {
                        struct rndsource_cpu *rc = percpu_getref(rs->state);

                        atomic_store_relaxed(&rc->rc_entropybits,
                            add_sat(rc->rc_entropybits, entropybits));
                        switch (flag) {
                        case RND_FLAG_COLLECT_TIME:
                                atomic_store_relaxed(&rc->rc_timesamples,
                                    add_sat(rc->rc_timesamples, 1));
                                break;
                        case RND_FLAG_COLLECT_VALUE:
                                atomic_store_relaxed(&rc->rc_datasamples,
                                    add_sat(rc->rc_datasamples, 1));
                                break;
                        }
                        percpu_putref(rs->state);
                }
        }
}

/*
 * rnd_add_data_sync(rs, buf, len, entropybits)
 *
 *        Same as rnd_add_data.  Originally used in rndsource callbacks,
 *        to break an unnecessary cycle; no longer really needed.
 */
void
rnd_add_data_sync(struct krndsource *rs, const void *buf, uint32_t len,
    uint32_t entropybits)
{

        rnd_add_data(rs, buf, len, entropybits);
}

/*
 * rndsource_entropybits(rs)
 *
 *        Return approximately the number of bits of entropy that have
 *        been contributed via rs so far.  Approximate if other CPUs may
 *        be calling rnd_add_data concurrently.
 */
static unsigned
rndsource_entropybits(struct krndsource *rs)
{
        unsigned nbits = rs->total;

        KASSERT(E->stage >= ENTROPY_WARM);
        KASSERT(rnd_sources_locked());
        percpu_foreach(rs->state, rndsource_entropybits_cpu, &nbits);
        return nbits;
}

static void
rndsource_entropybits_cpu(void *ptr, void *cookie, struct cpu_info *ci)
{
        struct rndsource_cpu *rc = ptr;
        unsigned *nbitsp = cookie;
        unsigned cpu_nbits;

        cpu_nbits = atomic_load_relaxed(&rc->rc_entropybits);
        *nbitsp += MIN(UINT_MAX - *nbitsp, cpu_nbits);
}

/*
 * rndsource_to_user(rs, urs)
 *
 *        Copy a description of rs out to urs for userland.
 */
static void
rndsource_to_user(struct krndsource *rs, rndsource_t *urs)
{

        KASSERT(E->stage >= ENTROPY_WARM);
        KASSERT(rnd_sources_locked());

        /* Avoid kernel memory disclosure.  */
        memset(urs, 0, sizeof(*urs));

        CTASSERT(sizeof(urs->name) == sizeof(rs->name));
        strlcpy(urs->name, rs->name, sizeof(urs->name));
        urs->total = rndsource_entropybits(rs);
        urs->type = rs->type;
        urs->flags = atomic_load_relaxed(&rs->flags);
}

/*
 * rndsource_to_user_est(rs, urse)
 *
 *        Copy a description of rs and estimation statistics out to urse
 *        for userland.
 */
static void
rndsource_to_user_est(struct krndsource *rs, rndsource_est_t *urse)
{

        KASSERT(E->stage >= ENTROPY_WARM);
        KASSERT(rnd_sources_locked());

        /* Avoid kernel memory disclosure.  */
        memset(urse, 0, sizeof(*urse));

        /* Copy out the rndsource description.  */
        rndsource_to_user(rs, &urse->rt);

        /* Gather the statistics.  */
        urse->dt_samples = rs->time_delta.insamples;
        urse->dt_total = 0;
        urse->dv_samples = rs->value_delta.insamples;
        urse->dv_total = urse->rt.total;
        percpu_foreach(rs->state, rndsource_to_user_est_cpu, urse);
}

static void
rndsource_to_user_est_cpu(void *ptr, void *cookie, struct cpu_info *ci)
{
        struct rndsource_cpu *rc = ptr;
        rndsource_est_t *urse = cookie;

        urse->dt_samples = add_sat(urse->dt_samples,
            atomic_load_relaxed(&rc->rc_timesamples));
        urse->dv_samples = add_sat(urse->dv_samples,
            atomic_load_relaxed(&rc->rc_datasamples));
}

/*
 * entropy_reset_xc(arg1, arg2)
 *
 *        Reset the current CPU's pending entropy to zero.
 */
static void
entropy_reset_xc(void *arg1 __unused, void *arg2 __unused)
{
        uint32_t extra = entropy_timer();
        struct entropy_cpu_lock lock;
        struct entropy_cpu *ec;

        /*
         * With the per-CPU state locked, zero the pending count and
         * enter a cycle count for fun.
         */
        ec = entropy_cpu_get(&lock);
        ec->ec_pending = 0;
        entpool_enter(ec->ec_pool, &extra, sizeof extra);
        entropy_cpu_put(&lock, ec);
}

/*
 * entropy_ioctl(cmd, data)
 *
 *        Handle various /dev/random ioctl queries.
 */
int
entropy_ioctl(unsigned long cmd, void *data)
{
        struct krndsource *rs;
        bool privileged;
        int error;

        KASSERT(E->stage >= ENTROPY_WARM);

        /* Verify user's authorization to perform the ioctl.  */
        switch (cmd) {
        case RNDGETENTCNT:
        case RNDGETPOOLSTAT:
        case RNDGETSRCNUM:
        case RNDGETSRCNAME:
        case RNDGETESTNUM:
        case RNDGETESTNAME:
                error = kauth_authorize_device(kauth_cred_get(),
                    KAUTH_DEVICE_RND_GETPRIV, NULL, NULL, NULL, NULL);
                break;
        case RNDCTL:
                error = kauth_authorize_device(kauth_cred_get(),
                    KAUTH_DEVICE_RND_SETPRIV, NULL, NULL, NULL, NULL);
                break;
        case RNDADDDATA:
                error = kauth_authorize_device(kauth_cred_get(),
                    KAUTH_DEVICE_RND_ADDDATA, NULL, NULL, NULL, NULL);
                /* Ascertain whether the user's inputs should be counted.  */
                if (kauth_authorize_device(kauth_cred_get(),
                        KAUTH_DEVICE_RND_ADDDATA_ESTIMATE,
                        NULL, NULL, NULL, NULL) == 0)
                        privileged = true;
                break;
        default: {
                /*
                 * XXX Hack to avoid changing module ABI so this can be
                 * pulled up.  Later, we can just remove the argument.
                 */
                static const struct fileops fops = {
                        .fo_ioctl = rnd_system_ioctl,
                };
                struct file f = {
                        .f_ops = &fops,
                };
                MODULE_HOOK_CALL(rnd_ioctl_50_hook, (&f, cmd, data),
                    enosys(), error);
#if defined(_LP64)
                if (error == ENOSYS)
                        MODULE_HOOK_CALL(rnd_ioctl32_50_hook, (&f, cmd, data),
                            enosys(), error);
#endif
                if (error == ENOSYS)
                        error = ENOTTY;
                break;
        }
        }

        /* If anything went wrong with authorization, stop here.  */
        if (error)
                return error;

        /* Dispatch on the command.  */
        switch (cmd) {
        case RNDGETENTCNT: {        /* Get current entropy count in bits.  */
                uint32_t *countp = data;

                mutex_enter(&E->lock);
                *countp = ENTROPY_CAPACITY*NBBY - E->needed;
                mutex_exit(&E->lock);

                break;
        }
        case RNDGETPOOLSTAT: {        /* Get entropy pool statistics.  */
                rndpoolstat_t *pstat = data;

                mutex_enter(&E->lock);

                /* parameters */
                pstat->poolsize = ENTPOOL_SIZE/sizeof(uint32_t); /* words */
                pstat->threshold = ENTROPY_CAPACITY*1; /* bytes */
                pstat->maxentropy = ENTROPY_CAPACITY*NBBY; /* bits */

                /* state */
                pstat->added = 0; /* XXX total entropy_enter count */
                pstat->curentropy = ENTROPY_CAPACITY*NBBY - E->needed;
                pstat->removed = 0; /* XXX total entropy_extract count */
                pstat->discarded = 0; /* XXX bits of entropy beyond capacity */
                pstat->generated = 0; /* XXX bits of data...fabricated? */

                mutex_exit(&E->lock);
                break;
        }
        case RNDGETSRCNUM: {        /* Get entropy sources by number.  */
                rndstat_t *stat = data;
                uint32_t start = 0, i = 0;

                /* Skip if none requested; fail if too many requested.  */
                if (stat->count == 0)
                        break;
                if (stat->count > RND_MAXSTATCOUNT)
                        return EINVAL;

                /*
                 * Under the lock, find the first one, copy out as many
                 * as requested, and report how many we copied out.
                 */
                mutex_enter(&E->lock);
                error = rnd_lock_sources(ENTROPY_WAIT|ENTROPY_SIG);
                if (error) {
                        mutex_exit(&E->lock);
                        return error;
                }
                LIST_FOREACH(rs, &E->sources, list) {
                        if (start++ == stat->start)
                                break;
                }
                while (i < stat->count && rs != NULL) {
                        mutex_exit(&E->lock);
                        rndsource_to_user(rs, &stat->source[i++]);
                        mutex_enter(&E->lock);
                        rs = LIST_NEXT(rs, list);
                }
                KASSERT(i <= stat->count);
                stat->count = i;
                rnd_unlock_sources();
                mutex_exit(&E->lock);
                break;
        }
        case RNDGETESTNUM: {        /* Get sources and estimates by number.  */
                rndstat_est_t *estat = data;
                uint32_t start = 0, i = 0;

                /* Skip if none requested; fail if too many requested.  */
                if (estat->count == 0)
                        break;
                if (estat->count > RND_MAXSTATCOUNT)
                        return EINVAL;

                /*
                 * Under the lock, find the first one, copy out as many
                 * as requested, and report how many we copied out.
                 */
                mutex_enter(&E->lock);
                error = rnd_lock_sources(ENTROPY_WAIT|ENTROPY_SIG);
                if (error) {
                        mutex_exit(&E->lock);
                        return error;
                }
                LIST_FOREACH(rs, &E->sources, list) {
                        if (start++ == estat->start)
                                break;
                }
                while (i < estat->count && rs != NULL) {
                        mutex_exit(&E->lock);
                        rndsource_to_user_est(rs, &estat->source[i++]);
                        mutex_enter(&E->lock);
                        rs = LIST_NEXT(rs, list);
                }
                KASSERT(i <= estat->count);
                estat->count = i;
                rnd_unlock_sources();
                mutex_exit(&E->lock);
                break;
        }
        case RNDGETSRCNAME: {        /* Get entropy sources by name.  */
                rndstat_name_t *nstat = data;
                const size_t n = sizeof(rs->name);

                CTASSERT(sizeof(rs->name) == sizeof(nstat->name));

                /*
                 * Under the lock, search by name.  If found, copy it
                 * out; if not found, fail with ENOENT.
                 */
                mutex_enter(&E->lock);
                error = rnd_lock_sources(ENTROPY_WAIT|ENTROPY_SIG);
                if (error) {
                        mutex_exit(&E->lock);
                        return error;
                }
                LIST_FOREACH(rs, &E->sources, list) {
                        if (strncmp(rs->name, nstat->name, n) == 0)
                                break;
                }
                if (rs != NULL) {
                        mutex_exit(&E->lock);
                        rndsource_to_user(rs, &nstat->source);
                        mutex_enter(&E->lock);
                } else {
                        error = ENOENT;
                }
                rnd_unlock_sources();
                mutex_exit(&E->lock);
                break;
        }
        case RNDGETESTNAME: {        /* Get sources and estimates by name.  */
                rndstat_est_name_t *enstat = data;
                const size_t n = sizeof(rs->name);

                CTASSERT(sizeof(rs->name) == sizeof(enstat->name));

                /*
                 * Under the lock, search by name.  If found, copy it
                 * out; if not found, fail with ENOENT.
                 */
                mutex_enter(&E->lock);
                error = rnd_lock_sources(ENTROPY_WAIT|ENTROPY_SIG);
                if (error) {
                        mutex_exit(&E->lock);
                        return error;
                }
                LIST_FOREACH(rs, &E->sources, list) {
                        if (strncmp(rs->name, enstat->name, n) == 0)
                                break;
                }
                if (rs != NULL) {
                        mutex_exit(&E->lock);
                        rndsource_to_user_est(rs, &enstat->source);
                        mutex_enter(&E->lock);
                } else {
                        error = ENOENT;
                }
                rnd_unlock_sources();
                mutex_exit(&E->lock);
                break;
        }
        case RNDCTL: {                /* Modify entropy source flags.  */
                rndctl_t *rndctl = data;
                const size_t n = sizeof(rs->name);
                uint32_t resetflags = RND_FLAG_NO_ESTIMATE|RND_FLAG_NO_COLLECT;
                uint32_t flags;
                bool reset = false, request = false;

                CTASSERT(sizeof(rs->name) == sizeof(rndctl->name));

                /* Whitelist the flags that user can change.  */
                rndctl->mask &= RND_FLAG_NO_ESTIMATE|RND_FLAG_NO_COLLECT;

                /*
                 * For each matching rndsource, either by type if
                 * specified or by name if not, set the masked flags.
                 */
                mutex_enter(&E->lock);
                LIST_FOREACH(rs, &E->sources, list) {
                        if (rndctl->type != 0xff) {
                                if (rs->type != rndctl->type)
                                        continue;
                        } else {
                                if (strncmp(rs->name, rndctl->name, n) != 0)
                                        continue;
                        }
                        flags = rs->flags & ~rndctl->mask;
                        flags |= rndctl->flags & rndctl->mask;
                        if ((rs->flags & resetflags) == 0 &&
                            (flags & resetflags) != 0)
                                reset = true;
                        if ((rs->flags ^ flags) & resetflags)
                                request = true;
                        atomic_store_relaxed(&rs->flags, flags);
                }
                mutex_exit(&E->lock);

                /*
                 * If we disabled estimation or collection, nix all the
                 * pending entropy and set needed to the maximum.
                 */
                if (reset) {
                        xc_broadcast(0, &entropy_reset_xc, NULL, NULL);
                        mutex_enter(&E->lock);
                        E->pending = 0;
                        atomic_store_relaxed(&E->needed,
                            ENTROPY_CAPACITY*NBBY);
                        mutex_exit(&E->lock);
                }

                /*
                 * If we changed any of the estimation or collection
                 * flags, request new samples from everyone -- either
                 * to make up for what we just lost, or to get new
                 * samples from what we just added.
                 *
                 * Failing on signal, while waiting for another process
                 * to finish requesting entropy, is OK here even though
                 * we have committed side effects, because this ioctl
                 * command is idempotent, so repeating it is safe.
                 */
                if (request) {
                        mutex_enter(&E->lock);
                        error = entropy_request(ENTROPY_CAPACITY,
                            ENTROPY_WAIT|ENTROPY_SIG);
                        mutex_exit(&E->lock);
                }
                break;
        }
        case RNDADDDATA: {        /* Enter seed into entropy pool.  */
                rnddata_t *rdata = data;
                unsigned entropybits = 0;

                if (!atomic_load_relaxed(&entropy_collection))
                        break;        /* thanks but no thanks */
                if (rdata->len > MIN(sizeof(rdata->data), UINT32_MAX/NBBY))
                        return EINVAL;

                /*
                 * This ioctl serves as the userland alternative a
                 * bootloader-provided seed -- typically furnished by
                 * /etc/rc.d/random_seed.  We accept the user's entropy
                 * claim only if
                 *
                 * (a) the user is privileged, and
                 * (b) we have not entered a bootloader seed.
                 *
                 * under the assumption that the user may use this to
                 * load a seed from disk that we have already loaded
                 * from the bootloader, so we don't double-count it.
                 */
                if (privileged && rdata->entropy && rdata->len) {
                        mutex_enter(&E->lock);
                        if (!E->seeded) {
                                entropybits = MIN(rdata->entropy,
                                    MIN(rdata->len, ENTROPY_CAPACITY)*NBBY);
                                E->seeded = true;
                        }
                        mutex_exit(&E->lock);
                }

                /* Enter the data and consolidate entropy.  */
                rnd_add_data(&seed_rndsource, rdata->data, rdata->len,
                    entropybits);
                entropy_consolidate();
                break;
        }
        default:
                error = ENOTTY;
        }

        /* Return any error that may have come up.  */
        return error;
}

/* Legacy entry points */

void
rnd_seed(void *seed, size_t len)
{

        if (len != sizeof(rndsave_t)) {
                printf("entropy: invalid seed length: %zu,"
                    " expected sizeof(rndsave_t) = %zu\n",
                    len, sizeof(rndsave_t));
                return;
        }
        entropy_seed(seed);
}

void
rnd_init(void)
{

        entropy_init();
}

void
rnd_init_softint(void)
{

        entropy_init_late();
        entropy_bootrequest();
}

int
rnd_system_ioctl(struct file *fp, unsigned long cmd, void *data)
{

        return entropy_ioctl(cmd, data);
}



























































































































































































































































































































































































































   12 













    2 

   25 


























































   59 



























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
/*        $NetBSD: in.h,v 1.114 2021/02/03 18:13:13 roy Exp $        */

/*
 * Copyright (c) 1982, 1986, 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)in.h        8.3 (Berkeley) 1/3/94
 */

/*
 * Constants and structures defined by the internet system,
 * Per RFC 790, September 1981, and numerous additions.
 */

#ifndef _NETINET_IN_H_
#define        _NETINET_IN_H_

#include <sys/featuretest.h>
#include <machine/int_types.h>

#ifndef        _BSD_UINT8_T_
typedef __uint8_t        uint8_t;
#define        _BSD_UINT8_T_
#endif

#ifndef        _BSD_UINT32_T_
typedef __uint32_t        uint32_t;
#define        _BSD_UINT32_T_
#endif

#include <sys/ansi.h>

#ifndef in_addr_t
typedef __in_addr_t        in_addr_t;
#define        in_addr_t        __in_addr_t
#endif

#ifndef in_port_t
typedef __in_port_t        in_port_t;
#define        in_port_t        __in_port_t
#endif

#ifndef sa_family_t
typedef __sa_family_t        sa_family_t;
#define        sa_family_t        __sa_family_t
#endif

/*
 * Protocols
 */
#define        IPPROTO_IP                0                /* dummy for IP */
#define        IPPROTO_HOPOPTS                0                /* IP6 hop-by-hop options */
#define        IPPROTO_ICMP                1                /* control message protocol */
#define        IPPROTO_IGMP                2                /* group mgmt protocol */
#define        IPPROTO_GGP                3                /* gateway^2 (deprecated) */
#define        IPPROTO_IPV4                4                 /* IP header */
#define        IPPROTO_IPIP                4                /* IP inside IP */
#define        IPPROTO_TCP                6                /* tcp */
#define        IPPROTO_EGP                8                /* exterior gateway protocol */
#define        IPPROTO_PUP                12                /* pup */
#define        IPPROTO_UDP                17                /* user datagram protocol */
#define        IPPROTO_IDP                22                /* xns idp */
#define        IPPROTO_TP                29                 /* tp-4 w/ class negotiation */
#define        IPPROTO_DCCP                33                /* DCCP */
#define        IPPROTO_IPV6                41                /* IP6 header */
#define        IPPROTO_ROUTING                43                /* IP6 routing header */
#define        IPPROTO_FRAGMENT        44                /* IP6 fragmentation header */
#define        IPPROTO_RSVP                46                /* resource reservation */
#define        IPPROTO_GRE                47                /* GRE encaps RFC 1701 */
#define        IPPROTO_ESP                50                 /* encap. security payload */
#define        IPPROTO_AH                51                 /* authentication header */
#define        IPPROTO_MOBILE                55                /* IP Mobility RFC 2004 */
#define        IPPROTO_IPV6_ICMP        58                /* IPv6 ICMP */
#define        IPPROTO_ICMPV6                58                /* ICMP6 */
#define        IPPROTO_NONE                59                /* IP6 no next header */
#define        IPPROTO_DSTOPTS                60                /* IP6 destination option */
#define        IPPROTO_EON                80                /* ISO cnlp */
#define        IPPROTO_ETHERIP                97                /* Ethernet-in-IP */
#define        IPPROTO_ENCAP                98                /* encapsulation header */
#define        IPPROTO_PIM                103                /* Protocol indep. multicast */
#define        IPPROTO_IPCOMP                108                /* IP Payload Comp. Protocol */
#define        IPPROTO_VRRP                112                /* VRRP RFC 2338 */
#define        IPPROTO_CARP                112                /* Common Address Resolution Protocol */
#define        IPPROTO_L2TP                115                /* L2TPv3 */
#define        IPPROTO_SCTP                132                /* SCTP */
#define IPPROTO_PFSYNC      240     /* PFSYNC */
#define        IPPROTO_RAW                255                /* raw IP packet */
#define        IPPROTO_MAX                256

/* last return value of *_input(), meaning "all job for this pkt is done".  */
#define        IPPROTO_DONE                257

/* sysctl placeholder for (FAST_)IPSEC */
#define CTL_IPPROTO_IPSEC        258


/*
 * Local port number conventions:
 *
 * Ports < IPPORT_RESERVED are reserved for privileged processes (e.g. root),
 * unless a kernel is compiled with IPNOPRIVPORTS defined.
 *
 * When a user does a bind(2) or connect(2) with a port number of zero,
 * a non-conflicting local port address is chosen.
 *
 * The default range is IPPORT_ANONMIN to IPPORT_ANONMAX, although
 * that is settable by sysctl(3); net.inet.ip.anonportmin and
 * net.inet.ip.anonportmax respectively.
 *
 * A user may set the IPPROTO_IP option IP_PORTRANGE to change this
 * default assignment range.
 *
 * The value IP_PORTRANGE_DEFAULT causes the default behavior.
 *
 * The value IP_PORTRANGE_HIGH is the same as IP_PORTRANGE_DEFAULT,
 * and exists only for FreeBSD compatibility purposes.
 *
 * The value IP_PORTRANGE_LOW changes the range to the "low" are
 * that is (by convention) restricted to privileged processes.
 * This convention is based on "vouchsafe" principles only.
 * It is only secure if you trust the remote host to restrict these ports.
 * The range is IPPORT_RESERVEDMIN to IPPORT_RESERVEDMAX.
 */

#define        IPPORT_RESERVED                1024
#define        IPPORT_ANONMIN                49152
#define        IPPORT_ANONMAX                65535
#define        IPPORT_RESERVEDMIN        600
#define        IPPORT_RESERVEDMAX        (IPPORT_RESERVED-1)

/*
 * Internet address (a structure for historical reasons)
 */
struct in_addr {
        in_addr_t s_addr;
};
#ifdef __CTASSERT
__CTASSERT(sizeof(struct in_addr) == 4);
#endif

/*
 * Definitions of bits in internet address integers.
 * On subnets, the decomposition of addresses to host and net parts
 * is done according to subnet mask, not the masks here.
 *
 * By byte-swapping the constants, we avoid ever having to byte-swap IP
 * addresses inside the kernel.  Unfortunately, user-level programs rely
 * on these macros not doing byte-swapping.
 */
#ifdef _KERNEL
#define        __IPADDR(x)        ((uint32_t) htonl((uint32_t)(x)))
#else
#define        __IPADDR(x)        ((uint32_t)(x))
#endif

#define        IN_CLASSA(i)                (((uint32_t)(i) & __IPADDR(0x80000000)) == \
                                 __IPADDR(0x00000000))
#define        IN_CLASSA_NET                __IPADDR(0xff000000)
#define        IN_CLASSA_NSHIFT        24
#define        IN_CLASSA_HOST                __IPADDR(0x00ffffff)
#define        IN_CLASSA_MAX                128

#define        IN_CLASSB(i)                (((uint32_t)(i) & __IPADDR(0xc0000000)) == \
                                 __IPADDR(0x80000000))
#define        IN_CLASSB_NET                __IPADDR(0xffff0000)
#define        IN_CLASSB_NSHIFT        16
#define        IN_CLASSB_HOST                __IPADDR(0x0000ffff)
#define        IN_CLASSB_MAX                65536

#define        IN_CLASSC(i)                (((uint32_t)(i) & __IPADDR(0xe0000000)) == \
                                 __IPADDR(0xc0000000))
#define        IN_CLASSC_NET                __IPADDR(0xffffff00)
#define        IN_CLASSC_NSHIFT        8
#define        IN_CLASSC_HOST                __IPADDR(0x000000ff)

#define        IN_CLASSD(i)                (((uint32_t)(i) & __IPADDR(0xf0000000)) == \
                                 __IPADDR(0xe0000000))
/* These ones aren't really net and host fields, but routing needn't know. */
#define        IN_CLASSD_NET                __IPADDR(0xf0000000)
#define        IN_CLASSD_NSHIFT        28
#define        IN_CLASSD_HOST                __IPADDR(0x0fffffff)
#define        IN_MULTICAST(i)                IN_CLASSD(i)

#define        IN_EXPERIMENTAL(i)        (((uint32_t)(i) & __IPADDR(0xf0000000)) == \
                                 __IPADDR(0xf0000000))
#define        IN_BADCLASS(i)                (((uint32_t)(i) & __IPADDR(0xf0000000)) == \
                                 __IPADDR(0xf0000000))

#define IN_LINKLOCAL(i)        (((uint32_t)(i) & __IPADDR(0xffff0000)) == \
                         __IPADDR(0xa9fe0000))

#define        IN_PRIVATE(i)        ((((uint32_t)(i) & __IPADDR(0xff000000)) ==        \
                          __IPADDR(0x0a000000))        ||                        \
                         (((uint32_t)(i) & __IPADDR(0xfff00000)) ==        \
                          __IPADDR(0xac100000))        ||                        \
                         (((uint32_t)(i) & __IPADDR(0xffff0000)) ==        \
                          __IPADDR(0xc0a80000)))

#define        IN_LOCAL_GROUP(i)        (((uint32_t)(i) & __IPADDR(0xffffff00)) == \
                                 __IPADDR(0xe0000000))

#define        IN_ANY_LOCAL(i)                (IN_LINKLOCAL(i) || IN_LOCAL_GROUP(i))

#define        INADDR_ANY                __IPADDR(0x00000000)
#define        INADDR_LOOPBACK                __IPADDR(0x7f000001)
#define        INADDR_BROADCAST        __IPADDR(0xffffffff)        /* must be masked */
#define        INADDR_NONE                __IPADDR(0xffffffff)        /* -1 return */

#define        INADDR_UNSPEC_GROUP        __IPADDR(0xe0000000)        /* 224.0.0.0 */
#define        INADDR_ALLHOSTS_GROUP        __IPADDR(0xe0000001)        /* 224.0.0.1 */
#define        INADDR_ALLRTRS_GROUP        __IPADDR(0xe0000002)        /* 224.0.0.2 */
#define        INADDR_CARP_GROUP        __IPADDR(0xe0000012)        /* 224.0.0.18 */
#define        INADDR_MAX_LOCAL_GROUP        __IPADDR(0xe00000ff)        /* 224.0.0.255 */

#define        IN_LOOPBACKNET                127                        /* official! */

#define        IN_RFC3021_MASK                __IPADDR(0xfffffffe)

/*
 * Socket address, internet style.
 */
struct sockaddr_in {
        uint8_t                sin_len;
        sa_family_t        sin_family;
        in_port_t        sin_port;
        struct in_addr        sin_addr;
        __int8_t        sin_zero[8];
};

#define        INET_ADDRSTRLEN                 16

/*
 * Structure used to describe IP options.
 * Used to store options internally, to pass them to a process,
 * or to restore options retrieved earlier.
 * The ip_dst is used for the first-hop gateway when using a source route
 * (this gets put into the header proper).
 */
struct ip_opts {
        struct in_addr        ip_dst;                /* first hop, 0 w/o src rt */
#if defined(__cplusplus)
        __int8_t        Ip_opts[40];        /* actually variable in size */
#else
        __int8_t        ip_opts[40];        /* actually variable in size */
#endif
};

/*
 * Options for use with [gs]etsockopt at the IP level.
 * First word of comment is data type; bool is stored in int.
 */
#define        IP_OPTIONS                1    /* buf/ip_opts; set/get IP options */
#define        IP_HDRINCL                2    /* int; header is included with data */
#define        IP_TOS                        3    /* int; IP type of service and preced. */
#define        IP_TTL                        4    /* int; IP time to live */
#define        IP_RECVOPTS                5    /* bool; receive all IP opts w/dgram */
#define        IP_RECVRETOPTS                6    /* bool; receive IP opts for response */
#define        IP_RECVDSTADDR                7    /* bool; receive IP dst addr w/dgram */
#define        IP_RETOPTS                8    /* ip_opts; set/get IP options */
#define        IP_MULTICAST_IF                9    /* in_addr; set/get IP multicast i/f  */
#define        IP_MULTICAST_TTL        10   /* u_char; set/get IP multicast ttl */
#define        IP_MULTICAST_LOOP        11   /* u_char; set/get IP multicast loopback */
/* The add and drop membership option numbers need to match with the v6 ones */
#define        IP_ADD_MEMBERSHIP        12   /* ip_mreq; add an IP group membership */
#define        IP_DROP_MEMBERSHIP        13   /* ip_mreq; drop an IP group membership */
#define        IP_PORTALGO                18   /* int; port selection algo (rfc6056) */
#define        IP_PORTRANGE                19   /* int; range to use for ephemeral port */
#define        IP_RECVIF                20   /* bool; receive reception if w/dgram */
#define        IP_ERRORMTU                21   /* int; get MTU of last xmit = EMSGSIZE */
#define        IP_IPSEC_POLICY                22   /* struct; get/set security policy */
#define        IP_RECVTTL                23   /* bool; receive IP TTL w/dgram */
#define        IP_MINTTL                24   /* minimum TTL for packet or drop */
#define        IP_PKTINFO                25   /* struct; set default src if/addr */
#define        IP_RECVPKTINFO                26   /* int; receive dst if/addr w/dgram */
#define        IP_BINDANY                27   /* bool: allow bind to any address */
#define IP_SENDSRCADDR IP_RECVDSTADDR /* FreeBSD compatibility */

/*
 * Information sent in the control message of a datagram socket for
 * IP_PKTINFO and IP_RECVPKTINFO.
 */
struct in_pktinfo {
        struct in_addr        ipi_addr;        /* src/dst address */
        unsigned int ipi_ifindex;        /* interface index */
};

#define ipi_spec_dst ipi_addr        /* Solaris/Linux compatibility */

/*
 * Defaults and limits for options
 */
#define        IP_DEFAULT_MULTICAST_TTL  1        /* normally limit m'casts to 1 hop  */
#define        IP_DEFAULT_MULTICAST_LOOP 1        /* normally hear sends if a member  */
#define        IP_MAX_MEMBERSHIPS        20        /* per socket; must fit in one mbuf */

/*
 * Argument structure for IP_ADD_MEMBERSHIP and IP_DROP_MEMBERSHIP.
 */
struct ip_mreq {
        struct        in_addr imr_multiaddr;        /* IP multicast address of group */
        struct        in_addr imr_interface;        /* local IP address of interface */
};

/*
 * Argument for IP_PORTRANGE:
 * - which range to search when port is unspecified at bind() or connect()
 */
#define        IP_PORTRANGE_DEFAULT        0        /* default range */
#define        IP_PORTRANGE_HIGH        1        /* same as DEFAULT (FreeBSD compat) */
#define        IP_PORTRANGE_LOW        2        /* use privileged range */

#if defined(_NETBSD_SOURCE)
/*
 * Definitions for inet sysctl operations.
 *
 * Third level is protocol number.
 * Fourth level is desired variable within that protocol.
 */

/*
 * Names for IP sysctl objects
 */
#define        IPCTL_FORWARDING        1        /* act as router */
#define        IPCTL_SENDREDIRECTS        2        /* may send redirects when forwarding */
#define        IPCTL_DEFTTL                3        /* default TTL */
/* IPCTL_DEFMTU=4, never implemented */
#define        IPCTL_FORWSRCRT                5        /* forward source-routed packets */
#define        IPCTL_DIRECTEDBCAST        6        /* default broadcast behavior */
#define        IPCTL_ALLOWSRCRT        7        /* allow/drop all source-routed pkts */
#define        IPCTL_SUBNETSARELOCAL        8        /* treat subnets as local addresses */
#define        IPCTL_MTUDISC                9        /* allow path MTU discovery */
#define        IPCTL_ANONPORTMIN      10        /* minimum ephemeral port */
#define        IPCTL_ANONPORTMAX      11        /* maximum ephemeral port */
#define        IPCTL_MTUDISCTIMEOUT   12        /* allow path MTU discovery */
#define        IPCTL_MAXFLOWS         13        /* maximum ip flows allowed */
#define        IPCTL_HOSTZEROBROADCAST 14        /* is host zero a broadcast addr? */
#define        IPCTL_GIF_TTL                15        /* default TTL for gif encap packet */
#define        IPCTL_LOWPORTMIN       16        /* minimum reserved port */
#define        IPCTL_LOWPORTMAX       17        /* maximum reserved port */
#define        IPCTL_MAXFRAGPACKETS   18        /* max packets reassembly queue */
#define        IPCTL_GRE_TTL          19        /* default TTL for gre encap packet */
#define        IPCTL_CHECKINTERFACE   20        /* drop pkts in from 'wrong' iface */
#define        IPCTL_IFQ               21        /* IP packet input queue */
#define        IPCTL_RANDOMID               22        /* use random IP ids (if configured) */
#define        IPCTL_LOOPBACKCKSUM    23        /* do IP checksum on loopback */
#define        IPCTL_STATS                24        /* IP statistics */
#define        IPCTL_DAD_COUNT        25        /* DAD packets to send */

#endif /* _NETBSD_SOURCE */

/* INET6 stuff */
#define        __KAME_NETINET_IN_H_INCLUDED_
#include <netinet6/in6.h>
#undef __KAME_NETINET_IN_H_INCLUDED_

#ifdef _KERNEL
#include <sys/psref.h>

/*
 * in_cksum_phdr:
 *
 *        Compute significant parts of the IPv4 checksum pseudo-header
 *        for use in a delayed TCP/UDP checksum calculation.
 *
 *        Args:
 *
 *                src                Source IP address
 *                dst                Destination IP address
 *                lenproto        htons(proto-hdr-len + proto-number)
 */
static __inline u_int16_t __unused
in_cksum_phdr(u_int32_t src, u_int32_t dst, u_int32_t lenproto)
{
        u_int32_t sum;

        sum = lenproto +
              (u_int16_t)(src >> 16) +
              (u_int16_t)(src /*& 0xffff*/) +
              (u_int16_t)(dst >> 16) +
              (u_int16_t)(dst /*& 0xffff*/);

        sum = (u_int16_t)(sum >> 16) + (u_int16_t)(sum /*& 0xffff*/);

        if (sum > 0xffff)
                sum -= 0xffff;

        return (sum);
}

/*
 * in_cksum_addword:
 *
 *        Add the two 16-bit network-order values, carry, and return.
 */
static __inline u_int16_t __unused
in_cksum_addword(u_int16_t a, u_int16_t b)
{
        u_int32_t sum = a + b;

        if (sum > 0xffff)
                sum -= 0xffff;

        return (sum);
}

extern        struct in_addr zeroin_addr;
extern        u_char        ip_protox[];
extern const struct sockaddr_in in_any;

int        in_broadcast(struct in_addr, struct ifnet *);
int        in_direct(struct in_addr, struct ifnet *);
int        in_canforward(struct in_addr);
int        cpu_in_cksum(struct mbuf *, int, int, uint32_t);
int        in_cksum(struct mbuf *, int);
int        in4_cksum(struct mbuf *, u_int8_t, int, int);
int        in_localaddr(struct in_addr);
void        in_socktrim(struct sockaddr_in *);

void        in_len2mask(struct in_addr *, u_int);

void        in_if_link_up(struct ifnet *);
void        in_if_link_down(struct ifnet *);
void        in_if_up(struct ifnet *);
void        in_if_down(struct ifnet *);
void        in_if_link_state_change(struct ifnet *, int);

struct route;
struct ip_moptions;

struct in_ifaddr *in_selectsrc(struct sockaddr_in *,
        struct route *, int, struct ip_moptions *, int *, struct psref *);

struct ip;
int in_tunnel_validate(const struct ip *, struct in_addr, struct in_addr);

#define        in_hosteq(s,t)        ((s).s_addr == (t).s_addr)
#define        in_nullhost(x)        ((x).s_addr == INADDR_ANY)

#define        satosin(sa)        ((struct sockaddr_in *)(sa))
#define        satocsin(sa)        ((const struct sockaddr_in *)(sa))
#define        sintosa(sin)        ((struct sockaddr *)(sin))
#define        sintocsa(sin)        ((const struct sockaddr *)(sin))
#define        ifatoia(ifa)        ((struct in_ifaddr *)(ifa))

int sockaddr_in_cmp(const struct sockaddr *, const struct sockaddr *);
const void *sockaddr_in_const_addr(const struct sockaddr *, socklen_t *);
void *sockaddr_in_addr(struct sockaddr *, socklen_t *);

static __inline void
sockaddr_in_init1(struct sockaddr_in *sin, const struct in_addr *addr,
    in_port_t port)
{
        sin->sin_port = port;
        sin->sin_addr = *addr;
        memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
}

static __inline void
sockaddr_in_init(struct sockaddr_in *sin, const struct in_addr *addr,
    in_port_t port)
{
        sin->sin_family = AF_INET;
        sin->sin_len = sizeof(*sin);
        sockaddr_in_init1(sin, addr, port);
}

static __inline struct sockaddr *
sockaddr_in_alloc(const struct in_addr *addr, in_port_t port, int flags)
{
        struct sockaddr *sa;

        sa = sockaddr_alloc(AF_INET, sizeof(struct sockaddr_in), flags);

        if (sa == NULL)
                return NULL;

        sockaddr_in_init1(satosin(sa), addr, port);

        return sa;
}
#endif /* _KERNEL */

#if defined(_KERNEL) || defined(_TEST)
int        in_print(char *, size_t, const struct in_addr *);
#define IN_PRINT(b, a)        (in_print((b), sizeof(b), a), (b))
int        sin_print(char *, size_t, const void *);
#endif

#endif /* !_NETINET_IN_H_ */




































    2 

























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
/*        $NetBSD: rf_optnames.h,v 1.11 2005/12/11 12:23:37 christos Exp $        */
/*
 * rf_optnames.h
 */
/*
 * Copyright (c) 1996 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Author: Jim Zelenka
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

/*
 * Don't protect against multiple inclusion here- we actually want this.
 */

RF_DBG_OPTION(accessDebug, 0)
RF_DBG_OPTION(cscanDebug, 0)        /* debug CSCAN sorting */
RF_DBG_OPTION(dagDebug, 0)
RF_DBG_OPTION(degDagDebug, 0)
RF_DBG_OPTION(diskDebug, 0)
RF_DBG_OPTION(engineDebug, 0)
RF_DBG_OPTION(fifoDebug, 0)        /* debug fifo queueing */
RF_DBG_OPTION(floatingRbufDebug, 0)
RF_DBG_OPTION(forceHeadSepLimit, -1)
RF_DBG_OPTION(forceNumFloatingReconBufs, -1)                /* wire down number of
                                                         * extra recon buffers
                                                         * to use */
RF_DBG_OPTION(keepAccTotals, 0)                /* turn on keep_acc_totals */
RF_DBG_OPTION(lockTableSize, RF_DEFAULT_LOCK_TABLE_SIZE)
RF_DBG_OPTION(mapDebug, 0)
RF_DBG_OPTION(maxNumTraces, -1)

RF_DBG_OPTION(memDebug, 0)
RF_DBG_OPTION(memDebugAddress, 0)
RF_DBG_OPTION(numBufsToAccumulate, 1)                /* number of buffers to
                                                 * accumulate before doing XOR */
RF_DBG_OPTION(prReconSched, 0)
RF_DBG_OPTION(printDAGsDebug, 0)
RF_DBG_OPTION(printStatesDebug, 0)
RF_DBG_OPTION(protectedSectors, 64L)                /* # of sectors at start of
                                                 * disk to exclude from RAID
                                                 * address space */
RF_DBG_OPTION(pssDebug, 0)
RF_DBG_OPTION(queueDebug, 0)
RF_DBG_OPTION(quiesceDebug, 0)
RF_DBG_OPTION(raidSectorOffset, 0)        /* added to all incoming sectors to
                                         * debug alignment problems */
RF_DBG_OPTION(reconDebug, 0)
RF_DBG_OPTION(reconbufferDebug, 0)
RF_DBG_OPTION(scanDebug, 0)        /* debug SCAN sorting */
RF_DBG_OPTION(showXorCallCounts, 0)        /* show n-way Xor call counts */
RF_DBG_OPTION(shutdownDebug, 0)                /* show shutdown calls */
RF_DBG_OPTION(sizePercentage, 100)
RF_DBG_OPTION(sstfDebug, 0)        /* turn on debugging info for sstf queueing */
RF_DBG_OPTION(stripeLockDebug, 0)
RF_DBG_OPTION(suppressLocksAndLargeWrites, 0)
RF_DBG_OPTION(validateDAGDebug, 0)
RF_DBG_OPTION(validateVisitedDebug, 1)                /* XXX turn to zero by
                                                 * default? */
RF_DBG_OPTION(verifyParityDebug, 0)
RF_DBG_OPTION(debugKernelAccess, 0)        /* DoAccessKernel debugging */

#if RF_INCLUDE_PARITYLOGGING > 0
RF_DBG_OPTION(forceParityLogReint, 0)
RF_DBG_OPTION(numParityRegions, 0)        /* number of regions in the array */
RF_DBG_OPTION(numReintegrationThreads, 1)
RF_DBG_OPTION(parityLogDebug, 0)        /* if nonzero, enables debugging of
                                         * parity logging */
RF_DBG_OPTION(totalInCoreLogCapacity, 1024 * 1024)        /* target bytes
                                                         * available for in-core
                                                         * logs */
#endif                                /* RF_INCLUDE_PARITYLOGGING > 0 */

























































































































































 1276 






 1208 






    1 







 1137 


   43 
















































 1287 
 1287 





















  622 













  130 
  130 












  130 















 1286 
 1289 






  504 










  503 







































































  581 











































  502 










  503 
  502 








  125 
















   47 






   11 


   37 
   37 





   47 

   37 


   36 































 1507 








   43 
 1288 
 1288 


 1287 


 1506 


 1505 




 1506 
 1507 
 1485 


   58 
   58 
   58 
   58 




   46 



   47 

 1498 

 1274 


   56 
   56 
   56 



  492 



  493 


 1498 

 1274 


 1287 

 1286 
  521 

 1287 
 1288 







































































  523 


  523 




  522 
  523 








































  133 






  132 

  133 

  133 
  122 


  132 



  132 


  132 









   11 
   11 




  132 


  131 




  132 


  131 
  132 

























 1405 
 1464 

 1406 








 1139 
 1132 

 1140 
 1140 





































 1137 
 1138 
 1136 












 1138 






 1137 
 1130 



 1130 




   94 






 1101 




   99 


 1074 
  211 
 1073 
 1074 
 1072 

  212 



   29 




   74 








 1072 




  883 










































 1138 



































   36 


   36 















   40 


   40 























  531 
 1199 

  533 






















  518 




  520 

  519 
  520 

  520 
  519 



  520 



  519 















   44 




   44 

   44 
   44 




   43 
   44 




   43 
   44 



   44 

   44 
   44 




   42 


















































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
/*        $NetBSD: radixtree.c,v 1.28 2022/05/24 20:50:17 andvar Exp $        */

/*-
 * Copyright (c)2011,2012,2013 YAMAMOTO Takashi,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * radixtree.c
 *
 * Overview:
 *
 * This is an implementation of radix tree, whose keys are uint64_t and leafs
 * are user provided pointers.
 *
 * Leaf nodes are just void * and this implementation doesn't care about
 * what they actually point to.  However, this implementation has an assumption
 * about their alignment.  Specifically, this implementation assumes that their
 * 2 LSBs are always zero and uses them for internal accounting.
 *
 * Intermediate nodes and memory allocation:
 *
 * Intermediate nodes are automatically allocated and freed internally and
 * basically users don't need to care about them.  The allocation is done via
 * pool_cache_get(9) for _KERNEL, malloc(3) for userland, and alloc() for
 * _STANDALONE environment.  Only radix_tree_insert_node function can allocate
 * memory for intermediate nodes and thus can fail for ENOMEM.
 *
 * Memory Efficiency:
 *
 * It's designed to work efficiently with dense index distribution.
 * The memory consumption (number of necessary intermediate nodes) heavily
 * depends on the index distribution.  Basically, more dense index distribution
 * consumes less nodes per item.  Approximately,
 *
 *  - the best case: about RADIX_TREE_PTR_PER_NODE items per intermediate node.
 *    it would look like the following.
 *
 *     root (t_height=1)
 *      |
 *      v
 *      [ | | | ]   (intermediate node.  RADIX_TREE_PTR_PER_NODE=4 in this fig)
 *       | | | |
 *       v v v v
 *       p p p p    (items)
 *
 *  - the worst case: RADIX_TREE_MAX_HEIGHT intermediate nodes per item.
 *    it would look like the following if RADIX_TREE_MAX_HEIGHT=3.
 *
 *     root (t_height=3)
 *      |
 *      v
 *      [ | | | ]
 *           |
 *           v
 *           [ | | | ]
 *                |
 *                v
 *                [ | | | ]
 *                   |
 *                   v
 *                   p
 *
 * The height of tree (t_height) is dynamic.  It's smaller if only small
 * index values are used.  As an extreme case, if only index 0 is used,
 * the corresponding value is directly stored in the root of the tree
 * (struct radix_tree) without allocating any intermediate nodes.  In that
 * case, t_height=0.
 *
 * Gang lookup:
 *
 * This implementation provides a way to scan many nodes quickly via
 * radix_tree_gang_lookup_node function and its varients.
 *
 * Tags:
 *
 * This implementation provides tagging functionality, which allows quick
 * scanning of a subset of leaf nodes.  Leaf nodes are untagged when inserted
 * into the tree and can be tagged by radix_tree_set_tag function.
 * radix_tree_gang_lookup_tagged_node function and its variants returns only
 * leaf nodes with the given tag.  To reduce amount of nodes to visit for
 * these functions, this implementation keeps tagging information in internal
 * intermediate nodes and quickly skips uninterested parts of a tree.
 *
 * A tree has RADIX_TREE_TAG_ID_MAX independent tag spaces, each of which are
 * identified by an zero-origin numbers, tagid.  For the current implementation,
 * RADIX_TREE_TAG_ID_MAX is 2.  A set of tags is described as a bitmask tagmask,
 * which is a bitwise OR of (1 << tagid).
 */

#include <sys/cdefs.h>

#if defined(_KERNEL) || defined(_STANDALONE)
__KERNEL_RCSID(0, "$NetBSD: radixtree.c,v 1.28 2022/05/24 20:50:17 andvar Exp $");
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/pool.h>
#include <sys/radixtree.h>
#include <lib/libkern/libkern.h>
#if defined(_STANDALONE)
#include <lib/libsa/stand.h>
#endif /* defined(_STANDALONE) */
#else /* defined(_KERNEL) || defined(_STANDALONE) */
__RCSID("$NetBSD: radixtree.c,v 1.28 2022/05/24 20:50:17 andvar Exp $");
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#if 1
#define KASSERT assert
#else
#define KASSERT(a)        /* nothing */
#endif
#endif /* defined(_KERNEL) || defined(_STANDALONE) */

#include <sys/radixtree.h>

#define        RADIX_TREE_BITS_PER_HEIGHT        4        /* XXX tune */
#define        RADIX_TREE_PTR_PER_NODE                (1 << RADIX_TREE_BITS_PER_HEIGHT)
#define        RADIX_TREE_MAX_HEIGHT                (64 / RADIX_TREE_BITS_PER_HEIGHT)
#define        RADIX_TREE_INVALID_HEIGHT        (RADIX_TREE_MAX_HEIGHT + 1)
__CTASSERT((64 % RADIX_TREE_BITS_PER_HEIGHT) == 0);

__CTASSERT(((1 << RADIX_TREE_TAG_ID_MAX) & (sizeof(int) - 1)) == 0);
#define        RADIX_TREE_TAG_MASK        ((1 << RADIX_TREE_TAG_ID_MAX) - 1)

static inline void *
entry_ptr(void *p)
{

        return (void *)((uintptr_t)p & ~RADIX_TREE_TAG_MASK);
}

static inline unsigned int
entry_tagmask(void *p)
{

        return (uintptr_t)p & RADIX_TREE_TAG_MASK;
}

static inline void *
entry_compose(void *p, unsigned int tagmask)
{

        return (void *)((uintptr_t)p | tagmask);
}

static inline bool
entry_match_p(void *p, unsigned int tagmask)
{

        KASSERT(entry_ptr(p) != NULL || entry_tagmask(p) == 0);
        if (p == NULL) {
                return false;
        }
        if (tagmask == 0) {
                return true;
        }
        return (entry_tagmask(p) & tagmask) != 0;
}

/*
 * radix_tree_node: an intermediate node
 *
 * we don't care the type of leaf nodes.  they are just void *.
 *
 * we used to maintain a count of non-NULL nodes in this structure, but it
 * prevented it from being aligned to a cache line boundary; the performance
 * benefit from being cache friendly is greater than the benefit of having
 * a dedicated count value, especially in multi-processor situations where
 * we need to avoid intra-pool-page false sharing.
 */

struct radix_tree_node {
        void *n_ptrs[RADIX_TREE_PTR_PER_NODE];
};

/*
 * p_refs[0].pptr == &t->t_root
 *        :
 * p_refs[n].pptr == &(*p_refs[n-1])->n_ptrs[x]
 *        :
 *        :
 * p_refs[t->t_height].pptr == &leaf_pointer
 */

struct radix_tree_path {
        struct radix_tree_node_ref {
                void **pptr;
        } p_refs[RADIX_TREE_MAX_HEIGHT + 1]; /* +1 for the root ptr */
        /*
         * p_lastidx is either the index of the last valid element of p_refs[]
         * or RADIX_TREE_INVALID_HEIGHT.
         * RADIX_TREE_INVALID_HEIGHT means that radix_tree_lookup_ptr found
         * that the height of the tree is not enough to cover the given index.
         */
        unsigned int p_lastidx;
};

static inline void **
path_pptr(const struct radix_tree *t, const struct radix_tree_path *p,
    unsigned int height)
{

        KASSERT(height <= t->t_height);
        return p->p_refs[height].pptr;
}

static inline struct radix_tree_node *
path_node(const struct radix_tree * t, const struct radix_tree_path *p,
    unsigned int height)
{

        KASSERT(height <= t->t_height);
        return entry_ptr(*path_pptr(t, p, height));
}

/*
 * radix_tree_init_tree:
 *
 * Initialize a tree.
 */

void
radix_tree_init_tree(struct radix_tree *t)
{

        t->t_height = 0;
        t->t_root = NULL;
}

/*
 * radix_tree_fini_tree:
 *
 * Finish using a tree.
 */

void
radix_tree_fini_tree(struct radix_tree *t)
{

        KASSERT(t->t_root == NULL);
        KASSERT(t->t_height == 0);
}

/*
 * radix_tree_empty_tree_p:
 *
 * Return if the tree is empty.
 */

bool
radix_tree_empty_tree_p(struct radix_tree *t)
{

        return t->t_root == NULL;
}

/*
 * radix_tree_empty_tree_p:
 *
 * Return true if the tree has any nodes with the given tag.  Otherwise
 * return false.
 *
 * It's illegal to call this function with tagmask 0.
 */

bool
radix_tree_empty_tagged_tree_p(struct radix_tree *t, unsigned int tagmask)
{

        KASSERT(tagmask != 0);
        return (entry_tagmask(t->t_root) & tagmask) == 0;
}

static void
radix_tree_node_init(struct radix_tree_node *n)
{

        memset(n, 0, sizeof(*n));
}

#if defined(_KERNEL)
pool_cache_t radix_tree_node_cache __read_mostly;

static int
radix_tree_node_ctor(void *dummy, void *item, int flags)
{
        struct radix_tree_node *n = item;

        KASSERT(dummy == NULL);
        radix_tree_node_init(n);
        return 0;
}

/*
 * radix_tree_init:
 *
 * initialize the subsystem.
 */

void
radix_tree_init(void)
{

        radix_tree_node_cache = pool_cache_init(sizeof(struct radix_tree_node),
            coherency_unit, 0, PR_LARGECACHE, "radixnode", NULL, IPL_NONE,
            radix_tree_node_ctor, NULL, NULL);
        KASSERT(radix_tree_node_cache != NULL);
}

/*
 * radix_tree_await_memory:
 *
 * after an insert has failed with ENOMEM, wait for memory to become
 * available, so the caller can retry.  this needs to ensure that the
 * maximum possible required number of nodes is available.
 */

void
radix_tree_await_memory(void)
{
        struct radix_tree_node *nodes[RADIX_TREE_MAX_HEIGHT];
        int i;

        for (i = 0; i < __arraycount(nodes); i++) {
                nodes[i] = pool_cache_get(radix_tree_node_cache, PR_WAITOK);
        }
        while (--i >= 0) {
                pool_cache_put(radix_tree_node_cache, nodes[i]);
        }
}

#endif /* defined(_KERNEL) */

/*
 * radix_tree_sum_node:
 *
 * return the logical sum of all entries in the given node.  used to quickly
 * check for tag masks or empty nodes.
 */

static uintptr_t
radix_tree_sum_node(const struct radix_tree_node *n)
{
#if RADIX_TREE_PTR_PER_NODE > 16
        unsigned int i;
        uintptr_t sum;

        for (i = 0, sum = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
                sum |= (uintptr_t)n->n_ptrs[i];
        }
        return sum;
#else /* RADIX_TREE_PTR_PER_NODE > 16 */
        uintptr_t sum;

        /*
         * Unrolling the above is much better than a tight loop with two
         * test+branch pairs.  On x86 with gcc 5.5.0 this compiles into 19
         * deterministic instructions including the "return" and prologue &
         * epilogue.
         */
        sum = (uintptr_t)n->n_ptrs[0];
        sum |= (uintptr_t)n->n_ptrs[1];
        sum |= (uintptr_t)n->n_ptrs[2];
        sum |= (uintptr_t)n->n_ptrs[3];
#if RADIX_TREE_PTR_PER_NODE > 4
        sum |= (uintptr_t)n->n_ptrs[4];
        sum |= (uintptr_t)n->n_ptrs[5];
        sum |= (uintptr_t)n->n_ptrs[6];
        sum |= (uintptr_t)n->n_ptrs[7];
#endif
#if RADIX_TREE_PTR_PER_NODE > 8
        sum |= (uintptr_t)n->n_ptrs[8];
        sum |= (uintptr_t)n->n_ptrs[9];
        sum |= (uintptr_t)n->n_ptrs[10];
        sum |= (uintptr_t)n->n_ptrs[11];
        sum |= (uintptr_t)n->n_ptrs[12];
        sum |= (uintptr_t)n->n_ptrs[13];
        sum |= (uintptr_t)n->n_ptrs[14];
        sum |= (uintptr_t)n->n_ptrs[15];
#endif
        return sum;
#endif /* RADIX_TREE_PTR_PER_NODE > 16 */
}

static int __unused
radix_tree_node_count_ptrs(const struct radix_tree_node *n)
{
        unsigned int i, c;

        for (i = c = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
                c += (n->n_ptrs[i] != NULL);
        }
        return c;
}

static struct radix_tree_node *
radix_tree_alloc_node(void)
{
        struct radix_tree_node *n;

#if defined(_KERNEL)
        /*
         * note that pool_cache_get can block.
         */
        n = pool_cache_get(radix_tree_node_cache, PR_NOWAIT);
#else /* defined(_KERNEL) */
#if defined(_STANDALONE)
        n = alloc(sizeof(*n));
#else /* defined(_STANDALONE) */
        n = malloc(sizeof(*n));
#endif /* defined(_STANDALONE) */
        if (n != NULL) {
                radix_tree_node_init(n);
        }
#endif /* defined(_KERNEL) */
        KASSERT(n == NULL || radix_tree_sum_node(n) == 0);
        return n;
}

static void
radix_tree_free_node(struct radix_tree_node *n)
{

        KASSERT(radix_tree_sum_node(n) == 0);
#if defined(_KERNEL)
        pool_cache_put(radix_tree_node_cache, n);
#elif defined(_STANDALONE)
        dealloc(n, sizeof(*n));
#else
        free(n);
#endif
}

/*
 * radix_tree_grow:
 *
 * increase the height of the tree.
 */

static __noinline int
radix_tree_grow(struct radix_tree *t, unsigned int newheight)
{
        const unsigned int tagmask = entry_tagmask(t->t_root);
        struct radix_tree_node *newnodes[RADIX_TREE_MAX_HEIGHT];
        void *root;
        int h;

        KASSERT(newheight <= RADIX_TREE_MAX_HEIGHT);
        if ((root = t->t_root) == NULL) {
                t->t_height = newheight;
                return 0;
        }
        for (h = t->t_height; h < newheight; h++) {
                newnodes[h] = radix_tree_alloc_node();
                if (__predict_false(newnodes[h] == NULL)) {
                        while (--h >= (int)t->t_height) {
                                newnodes[h]->n_ptrs[0] = NULL;
                                radix_tree_free_node(newnodes[h]);
                        }
                        return ENOMEM;
                }
                newnodes[h]->n_ptrs[0] = root;
                root = entry_compose(newnodes[h], tagmask);
        }
        t->t_root = root;
        t->t_height = h;
        return 0;
}

/*
 * radix_tree_lookup_ptr:
 *
 * an internal helper function used for various exported functions.
 *
 * return the pointer to store the node for the given index.
 *
 * if alloc is true, try to allocate the storage.  (note for _KERNEL:
 * in that case, this function can block.)  if the allocation failed or
 * alloc is false, return NULL.
 *
 * if path is not NULL, fill it for the caller's investigation.
 *
 * if tagmask is not zero, search only for nodes with the tag set.
 * note that, however, this function doesn't check the tagmask for the leaf
 * pointer.  it's a caller's responsibility to investigate the value which
 * is pointed by the returned pointer if necessary.
 *
 * while this function is a bit large, as it's called with some constant
 * arguments, inlining might have benefits.  anyway, a compiler will decide.
 */

static inline void **
radix_tree_lookup_ptr(struct radix_tree *t, uint64_t idx,
    struct radix_tree_path *path, bool alloc, const unsigned int tagmask)
{
        struct radix_tree_node *n;
        int hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
        int shift;
        void **vpp;
        const uint64_t mask = (UINT64_C(1) << RADIX_TREE_BITS_PER_HEIGHT) - 1;
        struct radix_tree_node_ref *refs = NULL;

        /*
         * check unsupported combinations
         */
        KASSERT(tagmask == 0 || !alloc);
        KASSERT(path == NULL || !alloc);
        vpp = &t->t_root;
        if (path != NULL) {
                refs = path->p_refs;
                refs->pptr = vpp;
        }
        n = NULL;
        for (shift = 64 - RADIX_TREE_BITS_PER_HEIGHT; shift >= 0;) {
                struct radix_tree_node *c;
                void *entry;
                const uint64_t i = (idx >> shift) & mask;

                if (shift >= hshift) {
                        unsigned int newheight;

                        KASSERT(vpp == &t->t_root);
                        if (i == 0) {
                                shift -= RADIX_TREE_BITS_PER_HEIGHT;
                                continue;
                        }
                        if (!alloc) {
                                if (path != NULL) {
                                        KASSERT((refs - path->p_refs) == 0);
                                        path->p_lastidx =
                                            RADIX_TREE_INVALID_HEIGHT;
                                }
                                return NULL;
                        }
                        newheight = shift / RADIX_TREE_BITS_PER_HEIGHT + 1;
                        if (radix_tree_grow(t, newheight)) {
                                return NULL;
                        }
                        hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
                }
                entry = *vpp;
                c = entry_ptr(entry);
                if (c == NULL ||
                    (tagmask != 0 &&
                    (entry_tagmask(entry) & tagmask) == 0)) {
                        if (!alloc) {
                                if (path != NULL) {
                                        path->p_lastidx = refs - path->p_refs;
                                }
                                return NULL;
                        }
                        c = radix_tree_alloc_node();
                        if (c == NULL) {
                                return NULL;
                        }
                        *vpp = c;
                }
                n = c;
                vpp = &n->n_ptrs[i];
                if (path != NULL) {
                        refs++;
                        refs->pptr = vpp;
                }
                shift -= RADIX_TREE_BITS_PER_HEIGHT;
        }
        if (alloc) {
                KASSERT(*vpp == NULL);
        }
        if (path != NULL) {
                path->p_lastidx = refs - path->p_refs;
        }
        return vpp;
}

/*
 * radix_tree_undo_insert_node:
 *
 * Undo the effects of a failed insert.  The conditions that led to the
 * insert may change and it may not be retried.  If the insert is not
 * retried, there will be no corresponding radix_tree_remove_node() for
 * this index in the future.  Therefore any adjustments made to the tree
 * before memory was exhausted must be reverted.
 */

static __noinline void
radix_tree_undo_insert_node(struct radix_tree *t, uint64_t idx)
{
        struct radix_tree_path path;
        int i;

        (void)radix_tree_lookup_ptr(t, idx, &path, false, 0);
        if (path.p_lastidx == RADIX_TREE_INVALID_HEIGHT) {
                /*
                 * no nodes were inserted.
                 */
                return;
        }
        for (i = path.p_lastidx - 1; i >= 0; i--) {
                struct radix_tree_node ** const pptr =
                    (struct radix_tree_node **)path_pptr(t, &path, i);
                struct radix_tree_node *n;

                KASSERT(pptr != NULL);
                n = entry_ptr(*pptr);
                KASSERT(n != NULL);
                if (radix_tree_sum_node(n) != 0) {
                        break;
                }
                radix_tree_free_node(n);
                *pptr = NULL;
        }
        /*
         * fix up height
         */
        if (i < 0) {
                KASSERT(t->t_root == NULL);
                t->t_height = 0;
        }
}

/*
 * radix_tree_insert_node:
 *
 * Insert the node at the given index.
 *
 * It's illegal to insert NULL.  It's illegal to insert a non-aligned pointer.
 *
 * This function returns ENOMEM if necessary memory allocation failed.
 * Otherwise, this function returns 0.
 *
 * Note that inserting a node can involves memory allocation for intermediate
 * nodes.  If _KERNEL, it's done with no-sleep IPL_NONE memory allocation.
 *
 * For the newly inserted node, all tags are cleared.
 */

int
radix_tree_insert_node(struct radix_tree *t, uint64_t idx, void *p)
{
        void **vpp;

        KASSERT(p != NULL);
        KASSERT(entry_tagmask(entry_compose(p, 0)) == 0);
        vpp = radix_tree_lookup_ptr(t, idx, NULL, true, 0);
        if (__predict_false(vpp == NULL)) {
                radix_tree_undo_insert_node(t, idx);
                return ENOMEM;
        }
        KASSERT(*vpp == NULL);
        *vpp = p;
        return 0;
}

/*
 * radix_tree_replace_node:
 *
 * Replace a node at the given index with the given node and return the
 * replaced one.
 *
 * It's illegal to try to replace a node which has not been inserted.
 *
 * This function keeps tags intact.
 */

void *
radix_tree_replace_node(struct radix_tree *t, uint64_t idx, void *p)
{
        void **vpp;
        void *oldp;

        KASSERT(p != NULL);
        KASSERT(entry_tagmask(entry_compose(p, 0)) == 0);
        vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
        KASSERT(vpp != NULL);
        oldp = *vpp;
        KASSERT(oldp != NULL);
        *vpp = entry_compose(p, entry_tagmask(*vpp));
        return entry_ptr(oldp);
}

/*
 * radix_tree_remove_node:
 *
 * Remove the node at the given index.
 *
 * It's illegal to try to remove a node which has not been inserted.
 */

void *
radix_tree_remove_node(struct radix_tree *t, uint64_t idx)
{
        struct radix_tree_path path;
        void **vpp;
        void *oldp;
        int i;

        vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
        KASSERT(vpp != NULL);
        oldp = *vpp;
        KASSERT(oldp != NULL);
        KASSERT(path.p_lastidx == t->t_height);
        KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
        *vpp = NULL;
        for (i = t->t_height - 1; i >= 0; i--) {
                void *entry;
                struct radix_tree_node ** const pptr =
                    (struct radix_tree_node **)path_pptr(t, &path, i);
                struct radix_tree_node *n;

                KASSERT(pptr != NULL);
                entry = *pptr;
                n = entry_ptr(entry);
                KASSERT(n != NULL);
                if (radix_tree_sum_node(n) != 0) {
                        break;
                }
                radix_tree_free_node(n);
                *pptr = NULL;
        }
        /*
         * fix up height
         */
        if (i < 0) {
                KASSERT(t->t_root == NULL);
                t->t_height = 0;
        }
        /*
         * update tags
         */
        for (; i >= 0; i--) {
                void *entry;
                struct radix_tree_node ** const pptr =
                    (struct radix_tree_node **)path_pptr(t, &path, i);
                struct radix_tree_node *n;
                unsigned int newmask;

                KASSERT(pptr != NULL);
                entry = *pptr;
                n = entry_ptr(entry);
                KASSERT(n != NULL);
                KASSERT(radix_tree_sum_node(n) != 0);
                newmask = radix_tree_sum_node(n) & RADIX_TREE_TAG_MASK;
                if (newmask == entry_tagmask(entry)) {
                        break;
                }
                *pptr = entry_compose(n, newmask);
        }
        /*
         * XXX is it worth to try to reduce height?
         * if we do that, make radix_tree_grow rollback its change as well.
         */
        return entry_ptr(oldp);
}

/*
 * radix_tree_lookup_node:
 *
 * Returns the node at the given index.
 * Returns NULL if nothing is found at the given index.
 */

void *
radix_tree_lookup_node(struct radix_tree *t, uint64_t idx)
{
        void **vpp;

        vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
        if (vpp == NULL) {
                return NULL;
        }
        return entry_ptr(*vpp);
}

static inline void
gang_lookup_init(struct radix_tree *t, uint64_t idx,
    struct radix_tree_path *path, const unsigned int tagmask)
{
        void **vpp __unused;

        vpp = radix_tree_lookup_ptr(t, idx, path, false, tagmask);
        KASSERT(vpp == NULL ||
            vpp == path_pptr(t, path, path->p_lastidx));
        KASSERT(&t->t_root == path_pptr(t, path, 0));
        KASSERT(path->p_lastidx == RADIX_TREE_INVALID_HEIGHT ||
           path->p_lastidx == t->t_height ||
           !entry_match_p(*path_pptr(t, path, path->p_lastidx), tagmask));
}

/*
 * gang_lookup_scan:
 *
 * a helper routine for radix_tree_gang_lookup_node and its variants.
 */

static inline unsigned int
__attribute__((__always_inline__))
gang_lookup_scan(struct radix_tree *t, struct radix_tree_path *path,
    void **results, const unsigned int maxresults, const unsigned int tagmask,
    const bool reverse, const bool dense)
{

        /*
         * we keep the path updated only for lastidx-1.
         * vpp is what path_pptr(t, path, lastidx) would be.
         */
        void **vpp;
        unsigned int nfound;
        unsigned int lastidx;
        /*
         * set up scan direction dependant constants so that we can iterate
         * n_ptrs as the following.
         *
         *        for (i = first; i != guard; i += step)
         *                visit n->n_ptrs[i];
         */
        const int step = reverse ? -1 : 1;
        const unsigned int first = reverse ? RADIX_TREE_PTR_PER_NODE - 1 : 0;
        const unsigned int last = reverse ? 0 : RADIX_TREE_PTR_PER_NODE - 1;
        const unsigned int guard = last + step;

        KASSERT(maxresults > 0);
        KASSERT(&t->t_root == path_pptr(t, path, 0));
        lastidx = path->p_lastidx;
        KASSERT(lastidx == RADIX_TREE_INVALID_HEIGHT ||
           lastidx == t->t_height ||
           !entry_match_p(*path_pptr(t, path, lastidx), tagmask));
        nfound = 0;
        if (lastidx == RADIX_TREE_INVALID_HEIGHT) {
                /*
                 * requested idx is beyond the right-most node.
                 */
                if (reverse && !dense) {
                        lastidx = 0;
                        vpp = path_pptr(t, path, lastidx);
                        goto descend;
                }
                return 0;
        }
        vpp = path_pptr(t, path, lastidx);
        while (/*CONSTCOND*/true) {
                struct radix_tree_node *n;
                unsigned int i;

                if (entry_match_p(*vpp, tagmask)) {
                        KASSERT(lastidx == t->t_height);
                        /*
                         * record the matching non-NULL leaf.
                         */
                        results[nfound] = entry_ptr(*vpp);
                        nfound++;
                        if (nfound == maxresults) {
                                return nfound;
                        }
                } else if (dense) {
                        return nfound;
                }
scan_siblings:
                /*
                 * try to find the next matching non-NULL sibling.
                 */
                if (lastidx == 0) {
                        /*
                         * the root has no siblings.
                         * we've done.
                         */
                        KASSERT(vpp == &t->t_root);
                        break;
                }
                n = path_node(t, path, lastidx - 1);
                for (i = vpp - n->n_ptrs + step; i != guard; i += step) {
                        KASSERT(i < RADIX_TREE_PTR_PER_NODE);
                        if (entry_match_p(n->n_ptrs[i], tagmask)) {
                                vpp = &n->n_ptrs[i];
                                break;
                        } else if (dense) {
                                return nfound;
                        }
                }
                if (i == guard) {
                        /*
                         * not found.  go to parent.
                         */
                        lastidx--;
                        vpp = path_pptr(t, path, lastidx);
                        goto scan_siblings;
                }
descend:
                /*
                 * following the left-most (or right-most in the case of
                 * reverse scan) child node, descend until reaching the leaf or
                 * an non-matching entry.
                 */
                while (entry_match_p(*vpp, tagmask) && lastidx < t->t_height) {
                        /*
                         * save vpp in the path so that we can come back to this
                         * node after finishing visiting children.
                         */
                        path->p_refs[lastidx].pptr = vpp;
                        n = entry_ptr(*vpp);
                        vpp = &n->n_ptrs[first];
                        lastidx++;
                }
        }
        return nfound;
}

/*
 * radix_tree_gang_lookup_node:
 *
 * Scan the tree starting from the given index in the ascending order and
 * return found nodes.
 *
 * results should be an array large enough to hold maxresults pointers.
 * This function returns the number of nodes found, up to maxresults.
 * Returning less than maxresults means there are no more nodes in the tree.
 *
 * If dense == true, this function stops scanning when it founds a hole of
 * indexes.  I.e. an index for which radix_tree_lookup_node would returns NULL.
 * If dense == false, this function skips holes and continue scanning until
 * maxresults nodes are found or it reaches the limit of the index range.
 *
 * The result of this function is semantically equivalent to what could be
 * obtained by repeated calls of radix_tree_lookup_node with increasing index.
 * but this function is expected to be computationally cheaper when looking up
 * multiple nodes at once.  Especially, it's expected to be much cheaper when
 * node indexes are distributed sparsely.
 *
 * Note that this function doesn't return index values of found nodes.
 * Thus, in the case of dense == false, if index values are important for
 * a caller, it's the caller's responsibility to check them, typically
 * by examinining the returned nodes using some caller-specific knowledge
 * about them.
 * In the case of dense == true, a node returned via results[N] is always for
 * the index (idx + N).
 */

unsigned int
radix_tree_gang_lookup_node(struct radix_tree *t, uint64_t idx,
    void **results, unsigned int maxresults, bool dense)
{
        struct radix_tree_path path;

        gang_lookup_init(t, idx, &path, 0);
        return gang_lookup_scan(t, &path, results, maxresults, 0, false, dense);
}

/*
 * radix_tree_gang_lookup_node_reverse:
 *
 * Same as radix_tree_gang_lookup_node except that this one scans the
 * tree in the reverse order.  I.e. descending index values.
 */

unsigned int
radix_tree_gang_lookup_node_reverse(struct radix_tree *t, uint64_t idx,
    void **results, unsigned int maxresults, bool dense)
{
        struct radix_tree_path path;

        gang_lookup_init(t, idx, &path, 0);
        return gang_lookup_scan(t, &path, results, maxresults, 0, true, dense);
}

/*
 * radix_tree_gang_lookup_tagged_node:
 *
 * Same as radix_tree_gang_lookup_node except that this one only returns
 * nodes tagged with tagid.
 *
 * It's illegal to call this function with tagmask 0.
 */

unsigned int
radix_tree_gang_lookup_tagged_node(struct radix_tree *t, uint64_t idx,
    void **results, unsigned int maxresults, bool dense, unsigned int tagmask)
{
        struct radix_tree_path path;

        KASSERT(tagmask != 0);
        gang_lookup_init(t, idx, &path, tagmask);
        return gang_lookup_scan(t, &path, results, maxresults, tagmask, false,
            dense);
}

/*
 * radix_tree_gang_lookup_tagged_node_reverse:
 *
 * Same as radix_tree_gang_lookup_tagged_node except that this one scans the
 * tree in the reverse order.  I.e. descending index values.
 */

unsigned int
radix_tree_gang_lookup_tagged_node_reverse(struct radix_tree *t, uint64_t idx,
    void **results, unsigned int maxresults, bool dense, unsigned int tagmask)
{
        struct radix_tree_path path;

        KASSERT(tagmask != 0);
        gang_lookup_init(t, idx, &path, tagmask);
        return gang_lookup_scan(t, &path, results, maxresults, tagmask, true,
            dense);
}

/*
 * radix_tree_get_tag:
 *
 * Return the tagmask for the node at the given index.
 *
 * It's illegal to call this function for a node which has not been inserted.
 */

unsigned int
radix_tree_get_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
{
        /*
         * the following two implementations should behave same.
         * the former one was chosen because it seems faster.
         */
#if 1
        void **vpp;

        vpp = radix_tree_lookup_ptr(t, idx, NULL, false, tagmask);
        if (vpp == NULL) {
                return false;
        }
        KASSERT(*vpp != NULL);
        return (entry_tagmask(*vpp) & tagmask);
#else
        void **vpp;

        vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
        KASSERT(vpp != NULL);
        return (entry_tagmask(*vpp) & tagmask);
#endif
}

/*
 * radix_tree_set_tag:
 *
 * Set the tag for the node at the given index.
 *
 * It's illegal to call this function for a node which has not been inserted.
 * It's illegal to call this function with tagmask 0.
 */

void
radix_tree_set_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
{
        struct radix_tree_path path;
        void **vpp __unused;
        int i;

        KASSERT(tagmask != 0);
        vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
        KASSERT(vpp != NULL);
        KASSERT(*vpp != NULL);
        KASSERT(path.p_lastidx == t->t_height);
        KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
        for (i = t->t_height; i >= 0; i--) {
                void ** const pptr = (void **)path_pptr(t, &path, i);
                void *entry;

                KASSERT(pptr != NULL);
                entry = *pptr;
                if ((entry_tagmask(entry) & tagmask) != 0) {
                        break;
                }
                *pptr = (void *)((uintptr_t)entry | tagmask);
        }
}

/*
 * radix_tree_clear_tag:
 *
 * Clear the tag for the node at the given index.
 *
 * It's illegal to call this function for a node which has not been inserted.
 * It's illegal to call this function with tagmask 0.
 */

void
radix_tree_clear_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
{
        struct radix_tree_path path;
        void **vpp;
        int i;

        KASSERT(tagmask != 0);
        vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
        KASSERT(vpp != NULL);
        KASSERT(*vpp != NULL);
        KASSERT(path.p_lastidx == t->t_height);
        KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
        /*
         * if already cleared, nothing to do
         */
        if ((entry_tagmask(*vpp) & tagmask) == 0) {
                return;
        }
        /*
         * clear the tag only if no children have the tag.
         */
        for (i = t->t_height; i >= 0; i--) {
                void ** const pptr = (void **)path_pptr(t, &path, i);
                void *entry;

                KASSERT(pptr != NULL);
                entry = *pptr;
                KASSERT((entry_tagmask(entry) & tagmask) != 0);
                *pptr = entry_compose(entry_ptr(entry),
                    entry_tagmask(entry) & ~tagmask);
                /*
                 * check if we should proceed to process the next level.
                 */
                if (0 < i) {
                        struct radix_tree_node *n = path_node(t, &path, i - 1);

                        if ((radix_tree_sum_node(n) & tagmask) != 0) {
                                break;
                        }
                }
        }
}

#if defined(UNITTEST)

#include <inttypes.h>
#include <stdio.h>

static void
radix_tree_dump_node(const struct radix_tree *t, void *vp,
    uint64_t offset, unsigned int height)
{
        struct radix_tree_node *n;
        unsigned int i;

        for (i = 0; i < t->t_height - height; i++) {
                printf(" ");
        }
        if (entry_tagmask(vp) == 0) {
                printf("[%" PRIu64 "] %p", offset, entry_ptr(vp));
        } else {
                printf("[%" PRIu64 "] %p (tagmask=0x%x)", offset, entry_ptr(vp),
                    entry_tagmask(vp));
        }
        if (height == 0) {
                printf(" (leaf)\n");
                return;
        }
        n = entry_ptr(vp);
        assert((radix_tree_sum_node(n) & RADIX_TREE_TAG_MASK) ==
            entry_tagmask(vp));
        printf(" (%u children)\n", radix_tree_node_count_ptrs(n));
        for (i = 0; i < __arraycount(n->n_ptrs); i++) {
                void *c;

                c = n->n_ptrs[i];
                if (c == NULL) {
                        continue;
                }
                radix_tree_dump_node(t, c,
                    offset + i * (UINT64_C(1) <<
                    (RADIX_TREE_BITS_PER_HEIGHT * (height - 1))), height - 1);
        }
}

void radix_tree_dump(const struct radix_tree *);

void
radix_tree_dump(const struct radix_tree *t)
{

        printf("tree %p height=%u\n", t, t->t_height);
        radix_tree_dump_node(t, t->t_root, 0, t->t_height);
}

static void
test1(void)
{
        struct radix_tree s;
        struct radix_tree *t = &s;
        void *results[3];

        radix_tree_init_tree(t);
        radix_tree_dump(t);
        assert(radix_tree_lookup_node(t, 0) == NULL);
        assert(radix_tree_lookup_node(t, 1000) == NULL);
        assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 0);
        assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 0);
        assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 0);
        assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 0);
        assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false) ==
            0);
        assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true) ==
            0);
        assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
            == 0);
        assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
            == 0);
        assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
            == 0);
        assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
            == 0);
        assert(radix_tree_gang_lookup_tagged_node(t, 1000, results, 3, false, 1)
            == 0);
        assert(radix_tree_gang_lookup_tagged_node(t, 1000, results, 3, true, 1)
            == 0);
        assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
            false, 1) == 0);
        assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
            true, 1) == 0);
        assert(radix_tree_gang_lookup_tagged_node_reverse(t, 1000, results, 3,
            false, 1) == 0);
        assert(radix_tree_gang_lookup_tagged_node_reverse(t, 1000, results, 3,
            true, 1) == 0);
        assert(radix_tree_empty_tree_p(t));
        assert(radix_tree_empty_tagged_tree_p(t, 1));
        assert(radix_tree_empty_tagged_tree_p(t, 2));
        assert(radix_tree_insert_node(t, 0, (void *)0xdeadbea0) == 0);
        assert(!radix_tree_empty_tree_p(t));
        assert(radix_tree_empty_tagged_tree_p(t, 1));
        assert(radix_tree_empty_tagged_tree_p(t, 2));
        assert(radix_tree_lookup_node(t, 0) == (void *)0xdeadbea0);
        assert(radix_tree_lookup_node(t, 1000) == NULL);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 1);
        assert(results[0] == (void *)0xdeadbea0);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 1);
        assert(results[0] == (void *)0xdeadbea0);
        assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 0);
        assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 0);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false) ==
            1);
        assert(results[0] == (void *)0xdeadbea0);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true) ==
            1);
        assert(results[0] == (void *)0xdeadbea0);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
            == 1);
        assert(results[0] == (void *)0xdeadbea0);
        assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
            == 0);
        assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
            == 0);
        assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
            == 0);
        assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
            false, 1) == 0);
        assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
            true, 1) == 0);
        assert(radix_tree_insert_node(t, 1000, (void *)0xdeadbea0) == 0);
        assert(radix_tree_remove_node(t, 0) == (void *)0xdeadbea0);
        assert(!radix_tree_empty_tree_p(t));
        radix_tree_dump(t);
        assert(radix_tree_lookup_node(t, 0) == NULL);
        assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 1);
        assert(results[0] == (void *)0xdeadbea0);
        assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 0);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 1);
        assert(results[0] == (void *)0xdeadbea0);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 1);
        assert(results[0] == (void *)0xdeadbea0);
        assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false)
            == 0);
        assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true)
            == 0);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
            == 1);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
            == 1);
        assert(results[0] == (void *)0xdeadbea0);
        assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
            == 0);
        assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
            == 0);
        assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
            false, 1) == 0);
        assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
            true, 1) == 0);
        assert(!radix_tree_get_tag(t, 1000, 1));
        assert(!radix_tree_get_tag(t, 1000, 2));
        assert(radix_tree_get_tag(t, 1000, 2 | 1) == 0);
        assert(radix_tree_empty_tagged_tree_p(t, 1));
        assert(radix_tree_empty_tagged_tree_p(t, 2));
        radix_tree_set_tag(t, 1000, 2);
        assert(!radix_tree_get_tag(t, 1000, 1));
        assert(radix_tree_get_tag(t, 1000, 2));
        assert(radix_tree_get_tag(t, 1000, 2 | 1) == 2);
        assert(radix_tree_empty_tagged_tree_p(t, 1));
        assert(!radix_tree_empty_tagged_tree_p(t, 2));
        radix_tree_dump(t);
        assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
        assert(radix_tree_insert_node(t, 0, (void *)0xbea0) == 0);
        radix_tree_dump(t);
        assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
        assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
        assert(radix_tree_insert_node(t, UINT64_C(10000000000), (void *)0xdea0)
            == 0);
        radix_tree_dump(t);
        assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
        assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
        assert(radix_tree_lookup_node(t, UINT64_C(10000000000)) ==
            (void *)0xdea0);
        radix_tree_dump(t);
        assert(!radix_tree_get_tag(t, 0, 2));
        assert(radix_tree_get_tag(t, 1000, 2));
        assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 1));
        radix_tree_set_tag(t, 0, 2);
        radix_tree_set_tag(t, UINT64_C(10000000000), 2);
        radix_tree_dump(t);
        assert(radix_tree_get_tag(t, 0, 2));
        assert(radix_tree_get_tag(t, 1000, 2));
        assert(radix_tree_get_tag(t, UINT64_C(10000000000), 2));
        radix_tree_clear_tag(t, 0, 2);
        radix_tree_clear_tag(t, UINT64_C(10000000000), 2);
        radix_tree_dump(t);
        assert(!radix_tree_get_tag(t, 0, 2));
        assert(radix_tree_get_tag(t, 1000, 2));
        assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 2));
        radix_tree_dump(t);
        assert(radix_tree_replace_node(t, 1000, (void *)0x12345678) ==
            (void *)0xdeadbea0);
        assert(!radix_tree_get_tag(t, 1000, 1));
        assert(radix_tree_get_tag(t, 1000, 2));
        assert(radix_tree_get_tag(t, 1000, 2 | 1) == 2);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 3);
        assert(results[0] == (void *)0xbea0);
        assert(results[1] == (void *)0x12345678);
        assert(results[2] == (void *)0xdea0);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 1);
        assert(results[0] == (void *)0xbea0);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node(t, 1, results, 3, false) == 2);
        assert(results[0] == (void *)0x12345678);
        assert(results[1] == (void *)0xdea0);
        assert(radix_tree_gang_lookup_node(t, 1, results, 3, true) == 0);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node(t, 1001, results, 3, false) == 1);
        assert(results[0] == (void *)0xdea0);
        assert(radix_tree_gang_lookup_node(t, 1001, results, 3, true) == 0);
        assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3,
            false) == 0);
        assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3,
            true) == 0);
        assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
            3, false) == 0);
        assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
            3, true) == 0);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, false, 2)
            == 1);
        assert(results[0] == (void *)0x12345678);
        assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, true, 2)
            == 0);
        assert(entry_tagmask(t->t_root) != 0);
        assert(radix_tree_remove_node(t, 1000) == (void *)0x12345678);
        assert(entry_tagmask(t->t_root) == 0);
        radix_tree_dump(t);
        assert(radix_tree_insert_node(t, UINT64_C(10000000001), (void *)0xfff0)
            == 0);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000000), results, 3,
            false) == 2);
        assert(results[0] == (void *)0xdea0);
        assert(results[1] == (void *)0xfff0);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000000), results, 3,
            true) == 2);
        assert(results[0] == (void *)0xdea0);
        assert(results[1] == (void *)0xfff0);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node_reverse(t, UINT64_C(10000000001),
            results, 3, false) == 3);
        assert(results[0] == (void *)0xfff0);
        assert(results[1] == (void *)0xdea0);
        assert(results[2] == (void *)0xbea0);
        memset(results, 0, sizeof(results));
        assert(radix_tree_gang_lookup_node_reverse(t, UINT64_C(10000000001),
            results, 3, true) == 2);
        assert(results[0] == (void *)0xfff0);
        assert(results[1] == (void *)0xdea0);
        assert(radix_tree_remove_node(t, UINT64_C(10000000000)) ==
            (void *)0xdea0);
        assert(radix_tree_remove_node(t, UINT64_C(10000000001)) ==
            (void *)0xfff0);
        radix_tree_dump(t);
        assert(radix_tree_remove_node(t, 0) == (void *)0xbea0);
        radix_tree_dump(t);
        radix_tree_fini_tree(t);
}

#include <sys/time.h>

struct testnode {
        uint64_t idx;
        bool tagged[RADIX_TREE_TAG_ID_MAX];
};

static void
printops(const char *title, const char *name, int tag, unsigned int n,
    const struct timeval *stv, const struct timeval *etv)
{
        uint64_t s = stv->tv_sec * 1000000 + stv->tv_usec;
        uint64_t e = etv->tv_sec * 1000000 + etv->tv_usec;

        printf("RESULT %s %s %d %lf op/s\n", title, name, tag,
            (double)n / (e - s) * 1000000);
}

#define        TEST2_GANG_LOOKUP_NODES        16

static bool
test2_should_tag(unsigned int i, unsigned int tagid)
{

        if (tagid == 0) {
                return (i % 4) == 0;        /* 25% */
        } else {
                return (i % 7) == 0;        /* 14% */
        }
        return 1;
}

static void
check_tag_count(const unsigned int *ntagged, unsigned int tagmask,
    unsigned int count)
{
        unsigned int tag;

        for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
                if ((tagmask & (1 << tag)) == 0) {
                        continue;
                }
                if (((tagmask - 1) & tagmask) == 0) {
                        assert(count == ntagged[tag]);
                } else {
                        assert(count >= ntagged[tag]);
                }
        }
}

static void
test2(const char *title, bool dense)
{
        struct radix_tree s;
        struct radix_tree *t = &s;
        struct testnode *n;
        unsigned int i;
        unsigned int nnodes = 100000;
        unsigned int removed;
        unsigned int tag;
        unsigned int tagmask;
        unsigned int ntagged[RADIX_TREE_TAG_ID_MAX];
        struct testnode *nodes;
        struct timeval stv;
        struct timeval etv;

        nodes = malloc(nnodes * sizeof(*nodes));
        for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
                ntagged[tag] = 0;
        }
        radix_tree_init_tree(t);
        for (i = 0; i < nnodes; i++) {
                n = &nodes[i];
                n->idx = random();
                if (sizeof(long) == 4) {
                        n->idx <<= 32;
                        n->idx |= (uint32_t)random();
                }
                if (dense) {
                        n->idx %= nnodes * 2;
                }
                while (radix_tree_lookup_node(t, n->idx) != NULL) {
                        n->idx++;
                }
                radix_tree_insert_node(t, n->idx, n);
                for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
                        tagmask = 1 << tag;

                        n->tagged[tag] = test2_should_tag(i, tag);
                        if (n->tagged[tag]) {
                                radix_tree_set_tag(t, n->idx, tagmask);
                                ntagged[tag]++;
                        }
                        assert((n->tagged[tag] ? tagmask : 0) ==
                            radix_tree_get_tag(t, n->idx, tagmask));
                }
        }

        gettimeofday(&stv, NULL);
        for (i = 0; i < nnodes; i++) {
                n = &nodes[i];
                assert(radix_tree_lookup_node(t, n->idx) == n);
        }
        gettimeofday(&etv, NULL);
        printops(title, "lookup", 0, nnodes, &stv, &etv);

        for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
                unsigned int count = 0;

                gettimeofday(&stv, NULL);
                for (i = 0; i < nnodes; i++) {
                        unsigned int tagged;

                        n = &nodes[i];
                        tagged = radix_tree_get_tag(t, n->idx, tagmask);
                        assert((tagged & ~tagmask) == 0);
                        for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
                                assert((tagmask & (1 << tag)) == 0 ||
                                    n->tagged[tag] == !!(tagged & (1 << tag)));
                        }
                        if (tagged) {
                                count++;
                        }
                }
                gettimeofday(&etv, NULL);
                check_tag_count(ntagged, tagmask, count);
                printops(title, "get_tag", tagmask, nnodes, &stv, &etv);
        }

        gettimeofday(&stv, NULL);
        for (i = 0; i < nnodes; i++) {
                n = &nodes[i];
                radix_tree_remove_node(t, n->idx);
        }
        gettimeofday(&etv, NULL);
        printops(title, "remove", 0, nnodes, &stv, &etv);

        gettimeofday(&stv, NULL);
        for (i = 0; i < nnodes; i++) {
                n = &nodes[i];
                radix_tree_insert_node(t, n->idx, n);
        }
        gettimeofday(&etv, NULL);
        printops(title, "insert", 0, nnodes, &stv, &etv);

        for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
                tagmask = 1 << tag;

                ntagged[tag] = 0;
                gettimeofday(&stv, NULL);
                for (i = 0; i < nnodes; i++) {
                        n = &nodes[i];
                        if (n->tagged[tag]) {
                                radix_tree_set_tag(t, n->idx, tagmask);
                                ntagged[tag]++;
                        }
                }
                gettimeofday(&etv, NULL);
                printops(title, "set_tag", tag, ntagged[tag], &stv, &etv);
        }

        gettimeofday(&stv, NULL);
        {
                struct testnode *results[TEST2_GANG_LOOKUP_NODES];
                uint64_t nextidx;
                unsigned int nfound;
                unsigned int total;

                nextidx = 0;
                total = 0;
                while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
                    (void *)results, __arraycount(results), false)) > 0) {
                        nextidx = results[nfound - 1]->idx + 1;
                        total += nfound;
                        if (nextidx == 0) {
                                break;
                        }
                }
                assert(total == nnodes);
        }
        gettimeofday(&etv, NULL);
        printops(title, "ganglookup", 0, nnodes, &stv, &etv);

        gettimeofday(&stv, NULL);
        {
                struct testnode *results[TEST2_GANG_LOOKUP_NODES];
                uint64_t nextidx;
                unsigned int nfound;
                unsigned int total;

                nextidx = UINT64_MAX;
                total = 0;
                while ((nfound = radix_tree_gang_lookup_node_reverse(t, nextidx,
                    (void *)results, __arraycount(results), false)) > 0) {
                        nextidx = results[nfound - 1]->idx - 1;
                        total += nfound;
                        if (nextidx == UINT64_MAX) {
                                break;
                        }
                }
                assert(total == nnodes);
        }
        gettimeofday(&etv, NULL);
        printops(title, "ganglookup_reverse", 0, nnodes, &stv, &etv);

        for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
                unsigned int total = 0;

                gettimeofday(&stv, NULL);
                {
                        struct testnode *results[TEST2_GANG_LOOKUP_NODES];
                        uint64_t nextidx;
                        unsigned int nfound;

                        nextidx = 0;
                        while ((nfound = radix_tree_gang_lookup_tagged_node(t,
                            nextidx, (void *)results, __arraycount(results),
                            false, tagmask)) > 0) {
                                nextidx = results[nfound - 1]->idx + 1;
                                total += nfound;
                        }
                }
                gettimeofday(&etv, NULL);
                check_tag_count(ntagged, tagmask, total);
                assert(tagmask != 0 || total == 0);
                printops(title, "ganglookup_tag", tagmask, total, &stv, &etv);
        }

        for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
                unsigned int total = 0;

                gettimeofday(&stv, NULL);
                {
                        struct testnode *results[TEST2_GANG_LOOKUP_NODES];
                        uint64_t nextidx;
                        unsigned int nfound;

                        nextidx = UINT64_MAX;
                        while ((nfound =
                            radix_tree_gang_lookup_tagged_node_reverse(t,
                            nextidx, (void *)results, __arraycount(results),
                            false, tagmask)) > 0) {
                                nextidx = results[nfound - 1]->idx - 1;
                                total += nfound;
                                if (nextidx == UINT64_MAX) {
                                        break;
                                }
                        }
                }
                gettimeofday(&etv, NULL);
                check_tag_count(ntagged, tagmask, total);
                assert(tagmask != 0 || total == 0);
                printops(title, "ganglookup_tag_reverse", tagmask, total,
                    &stv, &etv);
        }

        removed = 0;
        for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
                unsigned int total;

                total = 0;
                tagmask = 1 << tag;
                gettimeofday(&stv, NULL);
                {
                        struct testnode *results[TEST2_GANG_LOOKUP_NODES];
                        uint64_t nextidx;
                        unsigned int nfound;

                        nextidx = 0;
                        while ((nfound = radix_tree_gang_lookup_tagged_node(t,
                            nextidx, (void *)results, __arraycount(results),
                            false, tagmask)) > 0) {
                                for (i = 0; i < nfound; i++) {
                                        radix_tree_remove_node(t,
                                            results[i]->idx);
                                }
                                nextidx = results[nfound - 1]->idx + 1;
                                total += nfound;
                                if (nextidx == 0) {
                                        break;
                                }
                        }
                }
                gettimeofday(&etv, NULL);
                if (tag == 0) {
                        check_tag_count(ntagged, tagmask, total);
                } else {
                        assert(total <= ntagged[tag]);
                }
                printops(title, "ganglookup_tag+remove", tagmask, total, &stv,
                    &etv);
                removed += total;
        }

        gettimeofday(&stv, NULL);
        {
                struct testnode *results[TEST2_GANG_LOOKUP_NODES];
                uint64_t nextidx;
                unsigned int nfound;
                unsigned int total;

                nextidx = 0;
                total = 0;
                while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
                    (void *)results, __arraycount(results), false)) > 0) {
                        for (i = 0; i < nfound; i++) {
                                assert(results[i] == radix_tree_remove_node(t,
                                    results[i]->idx));
                        }
                        nextidx = results[nfound - 1]->idx + 1;
                        total += nfound;
                        if (nextidx == 0) {
                                break;
                        }
                }
                assert(total == nnodes - removed);
        }
        gettimeofday(&etv, NULL);
        printops(title, "ganglookup+remove", 0, nnodes - removed, &stv, &etv);

        assert(radix_tree_empty_tree_p(t));
        for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
                assert(radix_tree_empty_tagged_tree_p(t, tagmask));
        }
        radix_tree_fini_tree(t);
        free(nodes);
}

int
main(int argc, char *argv[])
{

        test1();
        test2("dense", true);
        test2("sparse", false);
        return 0;
}

#endif /* defined(UNITTEST) */































































    2 












  482 




























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/*        $NetBSD: sleeptab.h,v 1.2 2020/11/01 21:00:20 christos Exp $        */

/*-
 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020
 *     The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe and Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef        _SYS_SLEEPTAB_H_
#define        _SYS_SLEEPTAB_H_

#define        SLEEPTAB_HASH_SHIFT        7
#define        SLEEPTAB_HASH_SIZE        (1 << SLEEPTAB_HASH_SHIFT)
#define        SLEEPTAB_HASH_MASK        (SLEEPTAB_HASH_SIZE - 1)
#define        SLEEPTAB_HASH(wchan)        (((uintptr_t)(wchan) >> 8) & SLEEPTAB_HASH_MASK)

LIST_HEAD(sleepq, lwp);

typedef struct sleeptab {
        sleepq_t        st_queue[SLEEPTAB_HASH_SIZE];
} sleeptab_t;

void        sleeptab_init(sleeptab_t *);

extern sleeptab_t        sleeptab;

#ifdef _KERNEL
/*
 * Find the correct sleep queue for the specified wait channel.  This
 * acquires and holds the per-queue interlock.
 */
static __inline sleepq_t *
sleeptab_lookup(sleeptab_t *st, wchan_t wchan, kmutex_t **mp)
{
        extern sleepqlock_t sleepq_locks[SLEEPTAB_HASH_SIZE];
        sleepq_t *sq;
        u_int hash;

        hash = SLEEPTAB_HASH(wchan);
        sq = &st->st_queue[hash];
        *mp = &sleepq_locks[hash].lock;
        mutex_spin_enter(*mp);
        return sq;
}

static __inline kmutex_t *
sleepq_hashlock(wchan_t wchan)
{
        extern sleepqlock_t sleepq_locks[SLEEPTAB_HASH_SIZE];
        kmutex_t *mp;

        mp = &sleepq_locks[SLEEPTAB_HASH(wchan)].lock;
        mutex_spin_enter(mp);
        return mp;
}

#define sleepq_destroy(a) __nothing

#endif

/*
 * Turnstiles, specialized sleep queues for use by kernel locks.
 */

typedef struct turnstile {
        LIST_ENTRY(turnstile)        ts_chain;        /* link on hash chain */
        struct turnstile        *ts_free;        /* turnstile free list */
        wchan_t                        ts_obj;                /* lock object */
        sleepq_t                ts_sleepq[2];        /* sleep queues */
        u_int                        ts_waiters[2];        /* count of waiters */

        /* priority inheritance */
        pri_t                        ts_eprio;
        lwp_t                        *ts_inheritor;
        SLIST_ENTRY(turnstile)        ts_pichain;
} turnstile_t;

LIST_HEAD(tschain, turnstile);

typedef struct tschain tschain_t;

#define        TS_READER_Q        0                /* reader sleep queue */
#define        TS_WRITER_Q        1                /* writer sleep queue */

#define        TS_WAITERS(ts, q)                                                \
        (ts)->ts_waiters[(q)]

#define        TS_ALL_WAITERS(ts)                                                \
        ((ts)->ts_waiters[TS_READER_Q] +                                \
         (ts)->ts_waiters[TS_WRITER_Q])

#define        TS_FIRST(ts, q)        (LIST_FIRST(&(ts)->ts_sleepq[(q)]))

#ifdef        _KERNEL

void        turnstile_init(void);
turnstile_t        *turnstile_lookup(wchan_t);
void        turnstile_ctor(turnstile_t *);
void        turnstile_exit(wchan_t);
void        turnstile_block(turnstile_t *, int, wchan_t, syncobj_t *);
void        turnstile_wakeup(turnstile_t *, int, int, lwp_t *);
void        turnstile_print(volatile void *, void (*)(const char *, ...)
    __printflike(1, 2));
void        turnstile_unsleep(lwp_t *, bool);
void        turnstile_changepri(lwp_t *, pri_t);

extern struct pool turnstile_pool;
extern turnstile_t turnstile0;

#endif        /* _KERNEL */

#endif        /* _SYS_SLEEPTAB_H_ */


































































































    5 
    1 



    1 









































































































































































    2 











    2 
    2 



























    2 













































    1 




    1 


    1 






    1 






































    1 













    1 






















    5 
    5 

















































































    5 








    5 















    4 


    4 



    1 

    1 


    5 



























    3 








    3 




    3 









    1 
    1 
























    5 
    2 

    2 


    5 


    5 






























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
/*        $NetBSD: putter.c,v 1.39 2021/09/26 01:16:09 thorpej Exp $        */

/*
 * Copyright (c) 2006, 2007  Antti Kantee.  All Rights Reserved.
 *
 * Development of this software was supported by the
 * Ulla Tuominen Foundation and the Finnish Cultural Foundation and the
 * Research Foundation of Helsinki University of Technology
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Pass-to-Userspace TransporTER: generic kernel-user request-response
 * transport interface.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: putter.c,v 1.39 2021/09/26 01:16:09 thorpej Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/kmem.h>
#include <sys/poll.h>
#include <sys/stat.h>
#include <sys/socketvar.h>
#include <sys/module.h>
#include <sys/kauth.h>

#include <dev/putter/putter_sys.h>

/*
 * Device routines.  These are for when /dev/putter is initially
 * opened before it has been cloned.
 */

dev_type_open(puttercdopen);
dev_type_close(puttercdclose);
dev_type_ioctl(puttercdioctl);

/* dev */
const struct cdevsw putter_cdevsw = {
        .d_open = puttercdopen,
        .d_close = puttercdclose,
        .d_read = noread,
        .d_write = nowrite,
        .d_ioctl = noioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

/*
 * Configuration data.
 *
 * This is static-size for now.  Will be redone for devfs.
 */

#define PUTTER_CONFSIZE 16

static struct putter_config {
        int        pc_minor;
        int        (*pc_config)(int, int, int);
} putterconf[PUTTER_CONFSIZE];

static int
putter_configure(dev_t dev, int flags, int fmt, int fd)
{
        struct putter_config *pc;

        /* are we the catch-all node? */
        if (minor(dev) == PUTTER_MINOR_WILDCARD
            || minor(dev) == PUTTER_MINOR_COMPAT)
                return 0;

        /* nopes?  try to configure us */
        for (pc = putterconf; pc->pc_config; pc++)
                if (minor(dev) == pc->pc_minor)
                        return pc->pc_config(fd, flags, fmt);
        return ENXIO;
}

int
putter_register(putter_config_fn pcfn, int minor)
{
        int i;

        for (i = 0; i < PUTTER_CONFSIZE; i++)
                if (putterconf[i].pc_config == NULL)
                        break;
        if (i == PUTTER_CONFSIZE)
                return EBUSY;

        putterconf[i].pc_minor = minor;
        putterconf[i].pc_config = pcfn;
        return 0;
}

/*
 * putter instance structures.  these are always allocated and freed
 * from the context of the transport user.
 */
struct putter_instance {
        pid_t                        pi_pid;
        int                        pi_idx;
        int                        pi_fd;
        struct selinfo                pi_sel;

        void                        *pi_private;
        struct putter_ops        *pi_pop;

        uint8_t                        *pi_curput;
        size_t                        pi_curres;
        void                        *pi_curopaq;
        struct timespec                pi_atime;
        struct timespec                pi_mtime;
        struct timespec                pi_btime;

        TAILQ_ENTRY(putter_instance) pi_entries;
};
#define PUTTER_EMBRYO ((void *)-1)        /* before attach        */
#define PUTTER_DEAD ((void *)-2)        /* after detach                */

static TAILQ_HEAD(, putter_instance) putter_ilist
    = TAILQ_HEAD_INITIALIZER(putter_ilist);

static int get_pi_idx(struct putter_instance *);

#ifdef DEBUG
#ifndef PUTTERDEBUG
#define PUTTERDEBUG
#endif
#endif

#ifdef PUTTERDEBUG
int putterdebug = 0;
#define DPRINTF(x) if (putterdebug > 0) printf x
#define DPRINTF_VERBOSE(x) if (putterdebug > 1) printf x
#else
#define DPRINTF(x)
#define DPRINTF_VERBOSE(x)
#endif

/*
 * public init / deinit
 */

/* protects both the list and the contents of the list elements */
static kmutex_t pi_mtx;

void putterattach(void);

void
putterattach(void)
{

        mutex_init(&pi_mtx, MUTEX_DEFAULT, IPL_NONE);
}

#if 0
void
putter_destroy(void)
{

        mutex_destroy(&pi_mtx);
}
#endif

/*
 * fd routines, for cloner
 */
static int putter_fop_read(file_t *, off_t *, struct uio *,
                           kauth_cred_t, int);
static int putter_fop_write(file_t *, off_t *, struct uio *,
                            kauth_cred_t, int);
static int putter_fop_ioctl(file_t*, u_long, void *);
static int putter_fop_poll(file_t *, int);
static int putter_fop_stat(file_t *, struct stat *);
static int putter_fop_close(file_t *);
static int putter_fop_kqfilter(file_t *, struct knote *);


static const struct fileops putter_fileops = {
        .fo_name = "putter",
        .fo_read = putter_fop_read,
        .fo_write = putter_fop_write,
        .fo_ioctl = putter_fop_ioctl,
        .fo_fcntl = fnullop_fcntl,
        .fo_poll = putter_fop_poll,
        .fo_stat = putter_fop_stat,
        .fo_close = putter_fop_close,
        .fo_kqfilter = putter_fop_kqfilter,
        .fo_restart = fnullop_restart,
};

static int
putter_fop_read(file_t *fp, off_t *off, struct uio *uio,
        kauth_cred_t cred, int flags)
{
        struct putter_instance *pi = fp->f_data;
        size_t origres, moved;
        int error;

        KERNEL_LOCK(1, NULL);
        getnanotime(&pi->pi_atime);

        if (pi->pi_private == PUTTER_EMBRYO || pi->pi_private == PUTTER_DEAD) {
                printf("putter_fop_read: private %d not inited\n", pi->pi_idx);
                KERNEL_UNLOCK_ONE(NULL);
                return ENOENT;
        }

        if (pi->pi_curput == NULL) {
                error = pi->pi_pop->pop_getout(pi->pi_private, uio->uio_resid,
                    fp->f_flag & O_NONBLOCK, &pi->pi_curput,
                    &pi->pi_curres, &pi->pi_curopaq);
                if (error) {
                        KERNEL_UNLOCK_ONE(NULL);
                        return error;
                }
        }

        origres = uio->uio_resid;
        error = uiomove(pi->pi_curput, pi->pi_curres, uio);
        moved = origres - uio->uio_resid;
        DPRINTF(("putter_fop_read (%p): moved %zu bytes from %p, error %d\n",
            pi, moved, pi->pi_curput, error));

        KASSERT(pi->pi_curres >= moved);
        pi->pi_curres -= moved;
        pi->pi_curput += moved;

        if (pi->pi_curres == 0) {
                pi->pi_pop->pop_releaseout(pi->pi_private,
                    pi->pi_curopaq, error);
                pi->pi_curput = NULL;
        }

        KERNEL_UNLOCK_ONE(NULL);
        return error;
}

static int
putter_fop_write(file_t *fp, off_t *off, struct uio *uio,
        kauth_cred_t cred, int flags)
{
        struct putter_instance *pi = fp->f_data;
        struct putter_hdr pth;
        uint8_t *buf;
        size_t frsize;
        int error;

        KERNEL_LOCK(1, NULL);
        getnanotime(&pi->pi_mtime);

        DPRINTF(("putter_fop_write (%p): writing response, resid %zu\n",
            pi->pi_private, uio->uio_resid));

        if (pi->pi_private == PUTTER_EMBRYO || pi->pi_private == PUTTER_DEAD) {
                printf("putter_fop_write: putter %d not inited\n", pi->pi_idx);
                KERNEL_UNLOCK_ONE(NULL);
                return ENOENT;
        }

        error = uiomove(&pth, sizeof(struct putter_hdr), uio);
        if (error) {
                KERNEL_UNLOCK_ONE(NULL);
                return error;
        }

        /* Sorry mate, the kernel doesn't buffer. */
        frsize = pth.pth_framelen - sizeof(struct putter_hdr);
        if (uio->uio_resid < frsize) {
                KERNEL_UNLOCK_ONE(NULL);
                return EINVAL;
        }

        buf = kmem_alloc(frsize + sizeof(struct putter_hdr), KM_SLEEP);
        memcpy(buf, &pth, sizeof(pth));
        error = uiomove(buf+sizeof(struct putter_hdr), frsize, uio);
        if (error == 0) {
                pi->pi_pop->pop_dispatch(pi->pi_private,
                    (struct putter_hdr *)buf);
        }
        kmem_free(buf, frsize + sizeof(struct putter_hdr));

        KERNEL_UNLOCK_ONE(NULL);
        return error;
}

/*
 * Poll query interface.  The question is only if an event
 * can be read from us.
 */
#define PUTTERPOLL_EVSET (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)
static int
putter_fop_poll(file_t *fp, int events)
{
        struct putter_instance *pi = fp->f_data;
        int revents;

        KERNEL_LOCK(1, NULL);

        if (pi->pi_private == PUTTER_EMBRYO || pi->pi_private == PUTTER_DEAD) {
                printf("putter_fop_ioctl: putter %d not inited\n", pi->pi_idx);
                KERNEL_UNLOCK_ONE(NULL);
                return ENOENT;
        }

        revents = events & (POLLOUT | POLLWRNORM | POLLWRBAND);
        if ((events & PUTTERPOLL_EVSET) == 0) {
                KERNEL_UNLOCK_ONE(NULL);
                return revents;
        }

        /* check queue */
        if (pi->pi_pop->pop_waitcount(pi->pi_private))
                revents |= PUTTERPOLL_EVSET;
        else
                selrecord(curlwp, &pi->pi_sel);

        KERNEL_UNLOCK_ONE(NULL);
        return revents;
}

/*
 * device close = forced unmount.
 *
 * unmounting is a frightfully complex operation to avoid races
 */
static int
putter_fop_close(file_t *fp)
{
        struct putter_instance *pi = fp->f_data;
        int rv;

        DPRINTF(("putter_fop_close: device closed\n"));

        KERNEL_LOCK(1, NULL);

 restart:
        mutex_enter(&pi_mtx);
        /*
         * First check if the driver was never born.  In that case
         * remove the instance from the list.  If mount is attempted later,
         * it will simply fail.
         */
        if (pi->pi_private == PUTTER_EMBRYO) {
                TAILQ_REMOVE(&putter_ilist, pi, pi_entries);
                mutex_exit(&pi_mtx);

                DPRINTF(("putter_fop_close: data associated with fp %p was "
                    "embryonic\n", fp));

                goto out;
        }

        /*
         * Next, analyze if unmount was called and the instance is dead.
         * In this case we can just free the structure and go home, it
         * was removed from the list by putter_rmprivate().
         */
        if (pi->pi_private == PUTTER_DEAD) {
                mutex_exit(&pi_mtx);

                DPRINTF(("putter_fop_close: putter associated with fp %p (%d) "
                    "dead, freeing\n", fp, pi->pi_idx));

                goto out;
        }

        /*
         * So we have a reference.  Proceed to unravel the
         * underlying driver.
         */
        mutex_exit(&pi_mtx);

        /* hmm?  suspicious locking? */
        if (pi->pi_curput != NULL) {
                pi->pi_pop->pop_releaseout(pi->pi_private, pi->pi_curopaq,
                    ENXIO);
                pi->pi_curput = NULL;
        }
        while ((rv = pi->pi_pop->pop_close(pi->pi_private)) == ERESTART)
                goto restart;

 out:
        KERNEL_UNLOCK_ONE(NULL);
        /*
         * Finally, release the instance information.  It was already
         * removed from the list by putter_rmprivate() and we know it's
         * dead, so no need to lock.
         */
        kmem_free(pi, sizeof(struct putter_instance));

        return 0;
}

static int
putter_fop_stat(file_t *fp, struct stat *st)
{
        struct putter_instance *pi = fp->f_data;

        (void)memset(st, 0, sizeof(*st));
        KERNEL_LOCK(1, NULL);
        st->st_dev = makedev(cdevsw_lookup_major(&putter_cdevsw), pi->pi_idx);
        st->st_atimespec = pi->pi_atime;
        st->st_mtimespec = pi->pi_mtime;
        st->st_ctimespec = st->st_birthtimespec = pi->pi_btime;
        st->st_uid = kauth_cred_geteuid(fp->f_cred);
        st->st_gid = kauth_cred_getegid(fp->f_cred);
        st->st_mode = S_IFCHR;
        KERNEL_UNLOCK_ONE(NULL);
        return 0;
}

static int
putter_fop_ioctl(file_t *fp, u_long cmd, void *data)
{

        /*
         * work already done in sys_ioctl().  skip sanity checks to enable
         * setting non-blocking fd on an embryotic driver.
         */
        if (cmd == FIONBIO)
                return 0;

        return EINVAL;
}

/* kqueue stuff */

static void
filt_putterdetach(struct knote *kn)
{
        struct putter_instance *pi = kn->kn_hook;

        KERNEL_LOCK(1, NULL);
        mutex_enter(&pi_mtx);
        selremove_knote(&pi->pi_sel, kn);
        mutex_exit(&pi_mtx);
        KERNEL_UNLOCK_ONE(NULL);
}

static int
filt_putter(struct knote *kn, long hint)
{
        struct putter_instance *pi = kn->kn_hook;
        int error, rv;

        KERNEL_LOCK(1, NULL);
        error = 0;
        mutex_enter(&pi_mtx);
        if (pi->pi_private == PUTTER_EMBRYO || pi->pi_private == PUTTER_DEAD)
                error = 1;
        mutex_exit(&pi_mtx);
        if (error) {
                KERNEL_UNLOCK_ONE(NULL);
                return 0;
        }

        kn->kn_data = pi->pi_pop->pop_waitcount(pi->pi_private);
        rv = kn->kn_data != 0;
        KERNEL_UNLOCK_ONE(NULL);
        return rv;
}

static const struct filterops putter_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = NULL,
        .f_detach = filt_putterdetach,
        .f_event = filt_putter,
};

static int
putter_fop_kqfilter(file_t *fp, struct knote *kn)
{
        struct putter_instance *pi = fp->f_data;

        KERNEL_LOCK(1, NULL);

        switch (kn->kn_filter) {
        case EVFILT_READ:
                kn->kn_fop = &putter_filtops;
                kn->kn_hook = pi;

                mutex_enter(&pi_mtx);
                selrecord_knote(&pi->pi_sel, kn);
                mutex_exit(&pi_mtx);

                break;
        case EVFILT_WRITE:
                kn->kn_fop = &seltrue_filtops;
                break;
        default:
                KERNEL_UNLOCK_ONE(NULL);
                return EINVAL;
        }

        KERNEL_UNLOCK_ONE(NULL);
        return 0;
}

int
puttercdopen(dev_t dev, int flags, int fmt, struct lwp *l)
{
        struct putter_instance *pi;
        file_t *fp;
        int error, fd, idx;
        proc_t *p;

        p = curproc;
        pi = kmem_alloc(sizeof(struct putter_instance), KM_SLEEP);
        mutex_enter(&pi_mtx);
        idx = get_pi_idx(pi);

        pi->pi_pid = p->p_pid;
        pi->pi_idx = idx;
        pi->pi_curput = NULL;
        pi->pi_curres = 0;
        pi->pi_curopaq = NULL;
        getnanotime(&pi->pi_btime);
        pi->pi_atime = pi->pi_mtime = pi->pi_btime;
        selinit(&pi->pi_sel);
        mutex_exit(&pi_mtx);

        if ((error = fd_allocfile(&fp, &fd)) != 0)
                goto bad1;

        if ((error = putter_configure(dev, flags, fmt, fd)) != 0)
                goto bad2;

        DPRINTF(("puttercdopen: registered embryonic pmp for pid: %d\n",
            pi->pi_pid));

        error = fd_clone(fp, fd, FREAD|FWRITE, &putter_fileops, pi);
        KASSERT(error == EMOVEFD);
        return error;

 bad2:
         fd_abort(p, fp, fd);
 bad1:
        putter_detach(pi);
        kmem_free(pi, sizeof(struct putter_instance));
        return error;
}

int
puttercdclose(dev_t dev, int flags, int fmt, struct lwp *l)
{

        panic("puttercdclose impossible\n");

        return 0;
}


/*
 * Set the private structure for the file descriptor.  This is
 * typically done immediately when the counterpart has knowledge
 * about the private structure's address and the file descriptor
 * (e.g. vfs mount routine).
 *
 * We only want to make sure that the caller had the right to open the
 * device, we don't so much care about which context it gets in case
 * the same process opened multiple (since they are equal at this point).
 */
struct putter_instance *
putter_attach(pid_t pid, int fd, void *ppriv, struct putter_ops *pop)
{
        struct putter_instance *pi = NULL;

        mutex_enter(&pi_mtx);
        TAILQ_FOREACH(pi, &putter_ilist, pi_entries) {
                if (pi->pi_pid == pid && pi->pi_private == PUTTER_EMBRYO) {
                        pi->pi_private = ppriv;
                        pi->pi_fd = fd;
                        pi->pi_pop = pop;
                        break;
                    }
        }
        mutex_exit(&pi_mtx);

        DPRINTF(("putter_setprivate: pi at %p (%d/%d)\n", pi,
            pi ? pi->pi_pid : 0, pi ? pi->pi_fd : 0));

        return pi;
}

/*
 * Remove fp <-> private mapping.
 */
void
putter_detach(struct putter_instance *pi)
{

        mutex_enter(&pi_mtx);
        TAILQ_REMOVE(&putter_ilist, pi, pi_entries);
        pi->pi_private = PUTTER_DEAD;
        mutex_exit(&pi_mtx);
        seldestroy(&pi->pi_sel);

        DPRINTF(("putter_nukebypmp: nuked %p\n", pi));
}

void
putter_notify(struct putter_instance *pi)
{

        selnotify(&pi->pi_sel, 0, 0);
}

/* search sorted list of instances for free minor, sorted insert arg */
static int
get_pi_idx(struct putter_instance *pi_i)
{
        struct putter_instance *pi;
        int i;

        KASSERT(mutex_owned(&pi_mtx));

        i = 0;
        TAILQ_FOREACH(pi, &putter_ilist, pi_entries) {
                if (i != pi->pi_idx)
                        break;
                i++;
        }

        pi_i->pi_private = PUTTER_EMBRYO;

        if (pi == NULL)
                TAILQ_INSERT_TAIL(&putter_ilist, pi_i, pi_entries);
        else
                TAILQ_INSERT_BEFORE(pi, pi_i, pi_entries);

        return i;
}

MODULE(MODULE_CLASS_DRIVER, putter, NULL);

static int
putter_modcmd(modcmd_t cmd, void *arg)
{
#ifdef _MODULE
        devmajor_t bmajor = NODEVMAJOR, cmajor = NODEVMAJOR;

        switch (cmd) {
        case MODULE_CMD_INIT:
                putterattach();
                return devsw_attach("putter", NULL, &bmajor,
                    &putter_cdevsw, &cmajor);
        case MODULE_CMD_FINI:
                return ENOTTY; /* XXX: putterdetach */
        default:
                return ENOTTY;
        }
#else
        if (cmd == MODULE_CMD_INIT)
                return 0;
        return ENOTTY;
#endif
}
























































































































    3 

    3 







































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
/*        $NetBSD: udsbr.c,v 1.31 2020/03/14 02:35:33 christos Exp $        */

/*
 * Copyright (c) 2002 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Driver for the D-Link DSB-R100 FM radio.
 * I apologize for the magic hex constants, but this is what happens
 * when you have to reverse engineer the driver.
 * Parts of the code borrowed from Linux and parts from Warner Losh's
 * FreeBSD driver.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: udsbr.c,v 1.31 2020/03/14 02:35:33 christos Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/device.h>

#include <sys/radioio.h>
#include <dev/radio_if.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdivar.h>

#include <dev/usb/usbdevs.h>

#ifdef UDSBR_DEBUG
#define DPRINTF(x)        if (udsbrdebug) printf x
#define DPRINTFN(n,x)        if (udsbrdebug>(n)) printf x
int        udsbrdebug = 0;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

#define UDSBR_CONFIG_NO                1

Static        int     udsbr_get_info(void *, struct radio_info *);
Static        int     udsbr_set_info(void *, struct radio_info *);

static const struct radio_hw_if udsbr_hw_if = {
        NULL, /* open */
        NULL, /* close */
        udsbr_get_info,
        udsbr_set_info,
        NULL
};

struct udsbr_softc {
        device_t                sc_dev;
        struct usbd_device *        sc_udev;

        char                        sc_mute;
        char                        sc_vol;
        uint32_t                sc_freq;

        device_t                sc_child;

        char                        sc_dying;
};

Static        int        udsbr_req(struct udsbr_softc *, int, int,
                          int);
Static        void        udsbr_start(struct udsbr_softc *);
Static        void        udsbr_stop(struct udsbr_softc *);
Static        void        udsbr_setfreq(struct udsbr_softc *, int);
Static        int        udsbr_status(struct udsbr_softc *);

static int udsbr_match(device_t, cfdata_t, void *);
static void udsbr_attach(device_t, device_t, void *);
static void udsbr_childdet(device_t, device_t);
static int udsbr_detach(device_t, int);
static int udsbr_activate(device_t, enum devact);

CFATTACH_DECL2_NEW(udsbr, sizeof(struct udsbr_softc), udsbr_match,
    udsbr_attach, udsbr_detach, udsbr_activate, NULL, udsbr_childdet);

static int
udsbr_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        DPRINTFN(50,("udsbr_match\n"));

        if (uaa->uaa_vendor != USB_VENDOR_CYPRESS ||
            uaa->uaa_product != USB_PRODUCT_CYPRESS_FMRADIO)
                return UMATCH_NONE;
        return UMATCH_VENDOR_PRODUCT;
}

static void
udsbr_attach(device_t parent, device_t self, void *aux)
{
        struct udsbr_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *        dev = uaa->uaa_device;
        char                        *devinfop;
        usbd_status                err;

        DPRINTFN(10,("udsbr_attach: sc=%p\n", sc));

        sc->sc_dev = self;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        err = usbd_set_config_no(dev, UDSBR_CONFIG_NO, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(err));
                return;
        }

        sc->sc_udev = dev;

        DPRINTFN(10, ("udsbr_attach: %p\n", sc->sc_udev));

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        sc->sc_child = radio_attach_mi(&udsbr_hw_if, sc, sc->sc_dev);

        return;
}

static void
udsbr_childdet(device_t self, device_t child)
{
}

static int
udsbr_detach(device_t self, int flags)
{
        struct udsbr_softc *sc = device_private(self);
        int rv = 0;

        if (sc->sc_child != NULL)
                rv = config_detach(sc->sc_child, flags);
        if (sc->sc_udev != NULL)
                usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev,
                    sc->sc_dev);

        return rv;
}

static int
udsbr_activate(device_t self, enum devact act)
{
        struct udsbr_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

int
udsbr_req(struct udsbr_softc *sc, int ureq, int value, int index)
{
        usb_device_request_t req;
        usbd_status err;
        u_char data;

        DPRINTFN(1,("udsbr_req: ureq=0x%02x value=0x%04x index=0x%04x\n",
                    ureq, value, index));
        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = ureq;
        USETW(req.wValue, value);
        USETW(req.wIndex, index);
        USETW(req.wLength, 1);
        err = usbd_do_request(sc->sc_udev, &req, &data);
        if (err) {
                aprint_error_dev(sc->sc_dev, "request failed err=%d\n", err);
        }
        return !(data & 1);
}

void
udsbr_start(struct udsbr_softc *sc)
{
        (void)udsbr_req(sc, 0x00, 0x0000, 0x00c7);
        (void)udsbr_req(sc, 0x02, 0x0001, 0x0000);
}

void
udsbr_stop(struct udsbr_softc *sc)
{
        (void)udsbr_req(sc, 0x00, 0x0016, 0x001c);
        (void)udsbr_req(sc, 0x02, 0x0000, 0x0000);
}

void
udsbr_setfreq(struct udsbr_softc *sc, int freq)
{
        DPRINTF(("udsbr_setfreq: setfreq=%d\n", freq));
        /*
         * Freq now is in Hz.  We need to convert it to the frequency
         * that the radio wants.  This frequency is 10.7MHz above
         * the actual frequency.  We then need to convert to
         * units of 12.5kHz.  We add one to the IFM to make rounding
         * easier.
         */
        freq = (freq * 1000 + 10700001) / 12500;
        (void)udsbr_req(sc, 0x01, (freq >> 8) & 0xff, freq & 0xff);
        (void)udsbr_req(sc, 0x00, 0x0096, 0x00b7);
        usbd_delay_ms(sc->sc_udev, 240); /* wait for signal to settle */
}

int
udsbr_status(struct udsbr_softc *sc)
{
        return udsbr_req(sc, 0x00, 0x0000, 0x0024);
}


int
udsbr_get_info(void *v, struct radio_info *ri)
{
        struct udsbr_softc *sc = v;

        ri->mute = sc->sc_mute;
        ri->volume = sc->sc_vol ? 255 : 0;
        ri->caps = RADIO_CAPS_DETECT_STEREO;
        ri->rfreq = 0;
        ri->lock = 0;
        ri->freq = sc->sc_freq;
        ri->info = udsbr_status(sc) ? RADIO_INFO_STEREO : 0;

        return 0;
}

int
udsbr_set_info(void *v, struct radio_info *ri)
{
        struct udsbr_softc *sc = v;

        sc->sc_mute = ri->mute != 0;
        sc->sc_vol = ri->volume != 0;
        sc->sc_freq = ri->freq;
        udsbr_setfreq(sc, sc->sc_freq);

        if (sc->sc_mute || sc->sc_vol == 0)
                udsbr_stop(sc);
        else
                udsbr_start(sc);

        return 0;
}





































































































































































































































































































    8 
   10 

    5 













































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
/*        $NetBSD: if_ether.h,v 1.89 2022/06/20 08:14:48 yamaguchi Exp $        */

/*
 * Copyright (c) 1982, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)if_ether.h        8.1 (Berkeley) 6/10/93
 */

#ifndef _NET_IF_ETHER_H_
#define _NET_IF_ETHER_H_

#ifdef _KERNEL
#ifdef _KERNEL_OPT
#include "opt_mbuftrace.h"
#endif
#include <sys/mbuf.h>
#endif

#ifndef _STANDALONE
#include <net/if.h>
#endif

/*
 * Some basic Ethernet constants.
 */
#define        ETHER_ADDR_LEN        6        /* length of an Ethernet address */
#define        ETHER_TYPE_LEN        2        /* length of the Ethernet type field */
#define        ETHER_CRC_LEN        4        /* length of the Ethernet CRC */
#define        ETHER_HDR_LEN        ((ETHER_ADDR_LEN * 2) + ETHER_TYPE_LEN)
#define        ETHER_MIN_LEN        64        /* minimum frame length, including CRC */
#define        ETHER_MAX_LEN        1518        /* maximum frame length, including CRC */
#define        ETHER_MAX_LEN_JUMBO 9018 /* maximum jumbo frame len, including CRC */

/*
 * Some Ethernet extensions.
 */
#define        ETHER_VLAN_ENCAP_LEN        4     /* length of 802.1Q VLAN encapsulation */
#define        EVL_VLANOFTAG(tag)        ((tag) & 4095)                /* VLAN ID */
#define        EVL_PRIOFTAG(tag)        (((tag) >> 13) & 7)        /* Priority */
#define        EVL_CFIOFTAG(tag)        (((tag) >> 12) & 1)        /* CFI */
#define        ETHER_PPPOE_ENCAP_LEN        8        /* length of PPPoE encapsulation */

/*
 * Mbuf adjust factor to force 32-bit alignment of IP header.
 * Drivers should do m_adj(m, ETHER_ALIGN) when setting up a
 * receive so the upper layers get the IP header properly aligned
 * past the 14-byte Ethernet header.
 */
#define        ETHER_ALIGN        2        /* driver adjust for IP hdr alignment */

/*
 * Ethernet address - 6 octets
 * this is only used by the ethers(3) functions.
 */
struct ether_addr {
        uint8_t ether_addr_octet[ETHER_ADDR_LEN];
};

/*
 * Structure of a 10Mb/s Ethernet header.
 */
struct ether_header {
        uint8_t  ether_dhost[ETHER_ADDR_LEN];
        uint8_t  ether_shost[ETHER_ADDR_LEN];
        uint16_t ether_type;
};

#include <net/ethertypes.h>

#define        ETHER_IS_MULTICAST(addr) (*(addr) & 0x01) /* is address mcast/bcast? */
#define        ETHER_IS_LOCAL(addr) (*(addr) & 0x02) /* is address local? */

#define        ETHERMTU_JUMBO        (ETHER_MAX_LEN_JUMBO - ETHER_HDR_LEN - ETHER_CRC_LEN)
#define        ETHERMTU        (ETHER_MAX_LEN - ETHER_HDR_LEN - ETHER_CRC_LEN)
#define        ETHERMIN        (ETHER_MIN_LEN - ETHER_HDR_LEN - ETHER_CRC_LEN)

/*
 * Compute the maximum frame size based on ethertype (i.e. possible
 * encapsulation) and whether or not an FCS is present.
 */
#define        ETHER_MAX_FRAME(ifp, etype, hasfcs)                                \
        ((ifp)->if_mtu + ETHER_HDR_LEN +                                \
         ((hasfcs) ? ETHER_CRC_LEN : 0) +                                \
         (((etype) == ETHERTYPE_VLAN) ? ETHER_VLAN_ENCAP_LEN : 0) +        \
         (((etype) == ETHERTYPE_PPPOE) ? ETHER_PPPOE_ENCAP_LEN : 0))

/*
 * Ethernet CRC32 polynomials (big- and little-endian verions).
 */
#define        ETHER_CRC_POLY_LE        0xedb88320
#define        ETHER_CRC_POLY_BE        0x04c11db6

#ifndef _STANDALONE

/*
 * Ethernet-specific mbuf flags.
 */
#define        M_HASFCS        M_LINK0        /* FCS included at end of frame */
#define        M_PROMISC        M_LINK1        /* this packet is not for us */

#ifdef _KERNEL
/*
 * Macro to map an IP multicast address to an Ethernet multicast address.
 * The high-order 25 bits of the Ethernet address are statically assigned,
 * and the low-order 23 bits are taken from the low end of the IP address.
 */
#define ETHER_MAP_IP_MULTICAST(ipaddr, enaddr)                                \
        /* const struct in_addr *ipaddr; */                                \
        /* uint8_t enaddr[ETHER_ADDR_LEN]; */                                \
do {                                                                        \
        (enaddr)[0] = 0x01;                                                \
        (enaddr)[1] = 0x00;                                                \
        (enaddr)[2] = 0x5e;                                                \
        (enaddr)[3] = ((const uint8_t *)ipaddr)[1] & 0x7f;                \
        (enaddr)[4] = ((const uint8_t *)ipaddr)[2];                        \
        (enaddr)[5] = ((const uint8_t *)ipaddr)[3];                        \
} while (/*CONSTCOND*/0)
/*
 * Macro to map an IP6 multicast address to an Ethernet multicast address.
 * The high-order 16 bits of the Ethernet address are statically assigned,
 * and the low-order 32 bits are taken from the low end of the IP6 address.
 */
#define ETHER_MAP_IPV6_MULTICAST(ip6addr, enaddr)                        \
        /* struct in6_addr *ip6addr; */                                        \
        /* uint8_t enaddr[ETHER_ADDR_LEN]; */                                \
{                                                                       \
        (enaddr)[0] = 0x33;                                                \
        (enaddr)[1] = 0x33;                                                \
        (enaddr)[2] = ((const uint8_t *)ip6addr)[12];                        \
        (enaddr)[3] = ((const uint8_t *)ip6addr)[13];                        \
        (enaddr)[4] = ((const uint8_t *)ip6addr)[14];                        \
        (enaddr)[5] = ((const uint8_t *)ip6addr)[15];                        \
}
#endif

struct mii_data;

struct ethercom;

typedef int (*ether_cb_t)(struct ethercom *);
typedef int (*ether_vlancb_t)(struct ethercom *, uint16_t, bool);

/*
 * Structure shared between the ethernet driver modules and
 * the multicast list code.  For example, each ec_softc or il_softc
 * begins with this structure.
 */
struct ethercom {
        struct        ifnet ec_if;                        /* network-visible interface */
        LIST_HEAD(, ether_multi) ec_multiaddrs;        /* list of ether multicast
                                                   addrs */
        int        ec_multicnt;                        /* length of ec_multiaddrs
                                                   list */
        int        ec_capabilities;                /* capabilities, provided by
                                                   driver */
        int        ec_capenable;                        /* tells hardware which
                                                   capabilities to enable */

        int        ec_nvlans;                        /* # VLANs on this interface */
        SIMPLEQ_HEAD(, vlanid_list) ec_vids;        /* list of VLAN IDs */
        /* The device handle for the MII bus child device. */
        struct mii_data                                *ec_mii;
        struct ifmedia                                *ec_ifmedia;
        /*
         * Called after a change to ec_if.if_flags.  Returns
         * ENETRESET if the device should be reinitialized with
         * ec_if.if_init, 0 on success, not 0 on failure.
         */
        ether_cb_t                                ec_ifflags_cb;
        /*
         * Called whenever a vlan interface is configured or unconfigured.
         * Args include the vlan tag and a flag indicating whether the tag is
         * being added or removed.
         */
        ether_vlancb_t                                ec_vlan_cb;
        /* Hooks called at the beginning of detach of this interface */
        khook_list_t                                *ec_ifdetach_hooks;
        kmutex_t                                *ec_lock;
        /* Flags used only by the kernel */
        int                                        ec_flags;
#ifdef MBUFTRACE
        struct        mowner ec_rx_mowner;                /* mbufs received */
        struct        mowner ec_tx_mowner;                /* mbufs transmitted */
#endif
};

#define        ETHERCAP_VLAN_MTU        0x00000001 /* VLAN-compatible MTU */
#define        ETHERCAP_VLAN_HWTAGGING        0x00000002 /* hardware VLAN tag support */
#define        ETHERCAP_JUMBO_MTU        0x00000004 /* 9000 byte MTU supported */
#define        ETHERCAP_VLAN_HWFILTER        0x00000008 /* iface hw can filter vlan tag */
#define        ETHERCAP_EEE                0x00000010 /* Energy Efficiency Ethernet */
#define        ETHERCAP_MASK                0x0000001f

#define        ECCAPBITS                \
        "\020"                        \
        "\1VLAN_MTU"                \
        "\2VLAN_HWTAGGING"        \
        "\3JUMBO_MTU"                \
        "\4VLAN_HWFILTER"        \
        "\5EEE"

/* ioctl() for Ethernet capabilities */
struct eccapreq {
        char                eccr_name[IFNAMSIZ];        /* if name, e.g. "en0" */
        int                eccr_capabilities;        /* supported capabiliites */
        int                eccr_capenable;                /* capabilities enabled */
};

/* sysctl for Ethernet multicast addresses */
struct ether_multi_sysctl {
        u_int   enm_refcount;
        uint8_t enm_addrlo[ETHER_ADDR_LEN];
        uint8_t enm_addrhi[ETHER_ADDR_LEN];
};

#ifdef        _KERNEL
/*
 * Flags for ec_flags
 */
/* Store IFF_ALLMULTI in ec_flags instead of if_flags to avoid data races. */
#define ETHER_F_ALLMULTI        __BIT(0)

extern const uint8_t etherbroadcastaddr[ETHER_ADDR_LEN];
extern const uint8_t ethermulticastaddr_slowprotocols[ETHER_ADDR_LEN];
extern const uint8_t ether_ipmulticast_min[ETHER_ADDR_LEN];
extern const uint8_t ether_ipmulticast_max[ETHER_ADDR_LEN];

void        ether_set_ifflags_cb(struct ethercom *, ether_cb_t);
void        ether_set_vlan_cb(struct ethercom *, ether_vlancb_t);
int        ether_ioctl(struct ifnet *, u_long, void *);
int        ether_addmulti(const struct sockaddr *, struct ethercom *);
int        ether_delmulti(const struct sockaddr *, struct ethercom *);
int        ether_multiaddr(const struct sockaddr *, uint8_t[], uint8_t[]);
void    ether_input(struct ifnet *, struct mbuf *);

/*
 * Ethernet multicast address structure.  There is one of these for each
 * multicast address or range of multicast addresses that we are supposed
 * to listen to on a particular interface.  They are kept in a linked list,
 * rooted in the interface's ethercom structure.
 */
struct ether_multi {
        uint8_t enm_addrlo[ETHER_ADDR_LEN]; /* low  or only address of range */
        uint8_t enm_addrhi[ETHER_ADDR_LEN]; /* high or only address of range */
        u_int        enm_refcount;                /* no. claims to this addr/range */
        LIST_ENTRY(ether_multi) enm_list;
};

/*
 * Structure used by macros below to remember position when stepping through
 * all of the ether_multi records.
 */
struct ether_multistep {
        struct ether_multi  *e_enm;
};

/*
 * lookup the ether_multi record for a given range of Ethernet
 * multicast addresses connected to a given ethercom structure.
 * If no matching record is found, NULL is returned.
 */
static __inline struct ether_multi *
ether_lookup_multi(const uint8_t *addrlo, const uint8_t *addrhi,
    const struct ethercom *ec)
{
        struct ether_multi *enm;

        LIST_FOREACH(enm, &ec->ec_multiaddrs, enm_list) {
                if (memcmp(enm->enm_addrlo, addrlo, ETHER_ADDR_LEN) != 0)
                        continue;
                if (memcmp(enm->enm_addrhi, addrhi, ETHER_ADDR_LEN) != 0)
                        continue;

                break;
        }

        return enm;
}

/*
 * step through all of the ether_multi records, one at a time.
 * The current position is remembered in "step", which the caller must
 * provide.  ether_first_multi(), below, must be called to initialize "step"
 * and get the first record.  Both functions return a NULL when there
 * are no remaining records.
 */
static __inline struct ether_multi *
ether_next_multi(struct ether_multistep *step)
{
        struct ether_multi *enm;

        enm = step->e_enm;
        if (enm != NULL)
                step->e_enm = LIST_NEXT(enm, enm_list);

        return enm;
}
#define ETHER_NEXT_MULTI(step, enm)                \
        /* struct ether_multistep step; */        \
        /* struct ether_multi *enm; */                \
        (enm) = ether_next_multi(&(step))

static __inline struct ether_multi *
ether_first_multi(struct ether_multistep *step, const struct ethercom *ec)
{

        step->e_enm = LIST_FIRST(&ec->ec_multiaddrs);

        return ether_next_multi(step);
}

#define ETHER_FIRST_MULTI(step, ec, enm)                \
        /* struct ether_multistep step; */                \
        /* struct ethercom *ec; */                        \
        /* struct ether_multi *enm; */                        \
        (enm) = ether_first_multi(&(step), (ec))

#define ETHER_LOCK(ec)                mutex_enter((ec)->ec_lock)
#define ETHER_UNLOCK(ec)        mutex_exit((ec)->ec_lock)

/*
 * Ethernet 802.1Q VLAN structures.
 */

/* for ethercom */
struct vlanid_list {
        uint16_t vid;
        SIMPLEQ_ENTRY(vlanid_list) vid_list;
};

/* add VLAN tag to input/received packet */
static __inline void
vlan_set_tag(struct mbuf *m, uint16_t vlantag)
{
        /* VLAN tag contains priority, CFI and VLAN ID */
        KASSERT((m->m_flags & M_PKTHDR) != 0);
        m->m_pkthdr.ether_vtag = vlantag;
        m->m_flags |= M_VLANTAG;
        return;
}

/* extract VLAN ID value from a VLAN tag */
static __inline uint16_t
vlan_get_tag(struct mbuf *m)
{
        KASSERT((m->m_flags & M_PKTHDR) != 0);
        KASSERT(m->m_flags & M_VLANTAG);
        return m->m_pkthdr.ether_vtag;
}

static __inline bool
vlan_has_tag(struct mbuf *m)
{
        return (m->m_flags & M_VLANTAG) != 0;
}

static __inline bool
vlan_is_hwtag_enabled(struct ifnet *_ifp)
{
        struct ethercom *ec = (void *)_ifp;

        if (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING)
                return true;

        return false;
}

/* test if any VLAN is configured for this interface */
#define VLAN_ATTACHED(ec)        ((ec)->ec_nvlans > 0)

void        etherinit(void);
void        ether_ifattach(struct ifnet *, const uint8_t *);
void        ether_ifdetach(struct ifnet *);
int        ether_mediachange(struct ifnet *);
void        ether_mediastatus(struct ifnet *, struct ifmediareq *);
void *        ether_ifdetachhook_establish(struct ifnet *,
            void (*)(void *), void *arg);
void        ether_ifdetachhook_disestablish(struct ifnet *,
            void *, kmutex_t *);

char        *ether_sprintf(const uint8_t *);
char        *ether_snprintf(char *, size_t, const uint8_t *);

uint32_t ether_crc32_le(const uint8_t *, size_t);
uint32_t ether_crc32_be(const uint8_t *, size_t);

int        ether_aton_r(u_char *, size_t, const char *);
int        ether_enable_vlan_mtu(struct ifnet *);
int        ether_disable_vlan_mtu(struct ifnet *);
int        ether_add_vlantag(struct ifnet *, uint16_t, bool *);
int        ether_del_vlantag(struct ifnet *, uint16_t);
int        ether_inject_vlantag(struct mbuf **, uint16_t, uint16_t);
struct mbuf *
        ether_strip_vlantag(struct mbuf *);
#else
/*
 * Prototype ethers(3) functions.
 */
#include <sys/cdefs.h>
__BEGIN_DECLS
char *        ether_ntoa(const struct ether_addr *);
struct ether_addr *
        ether_aton(const char *);
int        ether_ntohost(char *, const struct ether_addr *);
int        ether_hostton(const char *, struct ether_addr *);
int        ether_line(const char *, struct ether_addr *, char *);
__END_DECLS
#endif

#endif /* _STANDALONE */

#endif /* !_NET_IF_ETHER_H_ */



































































    9 
    9 
























    9 
    9 







   17 













   76 
    2 






   76 

   76 
















    2 

    2 


    1 



    1 

    3 

















    9 



    8 

    8 

    7 









    4 
    4 



    4 





    7 


    6 

    1 




















    4 
    4 
    4 



    2 

    2 



    3 

























   17 






   17 







    4 






   16 




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
/*        $NetBSD: kern_ras.c,v 1.42 2022/08/08 22:31:45 riastradh Exp $        */

/*-
 * Copyright (c) 2002, 2006, 2007, 2008 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Gregory McGarry, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_ras.c,v 1.42 2022/08/08 22:31:45 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/proc.h>
#include <sys/ras.h>
#include <sys/xcall.h>
#include <sys/syscallargs.h>

#include <uvm/uvm_extern.h>

#define MAX_RAS_PER_PROC        16

u_int ras_per_proc = MAX_RAS_PER_PROC;

#ifdef DEBUG
int ras_debug = 0;
#define DPRINTF(x)        if (ras_debug) printf x
#else
#define DPRINTF(x)        /* nothing */
#endif

/*
 * Force all CPUs through cpu_switchto(), waiting until complete.
 * Context switching will drain the write buffer on the calling
 * CPU.
 */
static void
ras_sync(void)
{

        /* No need to sync if exiting or single threaded. */
        if (curproc->p_nlwps > 1 && ncpu > 1) {
                xc_barrier(0);
        }
}

/*
 * Check the specified address to see if it is within the
 * sequence.  If it is found, we return the restart address,
 * otherwise we return -1.  If we do perform a restart, we
 * mark the sequence as hit.
 *
 * No locking required: we disable preemption and ras_sync()
 * guarantees that individual entries are valid while we still
 * have visibility of them.
 */
void *
ras_lookup(struct proc *p, void *addr)
{
        struct ras *rp;
        void *startaddr;
        lwp_t *l;

        startaddr = (void *)-1;
        l = curlwp;

        KPREEMPT_DISABLE(l);
        for (rp = p->p_raslist; rp != NULL; rp = rp->ras_next) {
                if (addr > rp->ras_startaddr && addr < rp->ras_endaddr) {
                        startaddr = rp->ras_startaddr;
                        DPRINTF(("RAS hit: p=%p %p\n", p, addr));
                        break;
                }
        }
        KPREEMPT_ENABLE(l);

        return startaddr;
}

/*
 * During a fork, we copy all of the sequences from parent p1 to
 * the child p2.
 *
 * No locking required as the parent must be paused.
 */
int
ras_fork(struct proc *p1, struct proc *p2)
{
        struct ras *rp, *nrp;

        for (rp = p1->p_raslist; rp != NULL; rp = rp->ras_next) {
                nrp = kmem_alloc(sizeof(*nrp), KM_SLEEP);
                nrp->ras_startaddr = rp->ras_startaddr;
                nrp->ras_endaddr = rp->ras_endaddr;
                nrp->ras_next = p2->p_raslist;
                p2->p_raslist = nrp;
        }

        DPRINTF(("ras_fork: p1=%p, p2=%p\n", p1, p2));

        return 0;
}

/*
 * Nuke all sequences for this process.
 */
int
ras_purgeall(void)
{
        struct ras *rp, *nrp;
        proc_t *p;

        p = curproc;

        if (p->p_raslist == NULL)
                return 0;

        mutex_enter(&p->p_auxlock);
        if ((rp = p->p_raslist) != NULL) {
                p->p_raslist = NULL;
                ras_sync();
                for(; rp != NULL; rp = nrp) {
                        nrp = rp->ras_next;
                        kmem_free(rp, sizeof(*rp));
                }
        }
        mutex_exit(&p->p_auxlock);

        return 0;
}

#if defined(__HAVE_RAS)

/*
 * Install the new sequence.  If it already exists, return
 * an error.
 */
static int
ras_install(void *addr, size_t len)
{
        struct ras *rp;
        struct ras *newrp;
        void *endaddr;
        int nras, error;
        proc_t *p;

        if (len == 0)
                return EINVAL;

        if ((uintptr_t)addr < VM_MIN_ADDRESS ||
            (uintptr_t)addr > VM_MAXUSER_ADDRESS)
                return EINVAL;
        if (len > VM_MAXUSER_ADDRESS - (uintptr_t)addr)
                return EINVAL;
        endaddr = (char *)addr + len;

        newrp = kmem_alloc(sizeof(*newrp), KM_SLEEP);
        newrp->ras_startaddr = addr;
        newrp->ras_endaddr = endaddr;
        error = 0;
        nras = 0;
        p = curproc;

        mutex_enter(&p->p_auxlock);
        for (rp = p->p_raslist; rp != NULL; rp = rp->ras_next) {
                if (++nras >= ras_per_proc) {
                        error = EINVAL;
                        break;
                }
                if (addr < rp->ras_endaddr && endaddr > rp->ras_startaddr) {
                        error = EEXIST;
                        break;
                }
        }
        if (rp == NULL) {
                newrp->ras_next = p->p_raslist;
                p->p_raslist = newrp;
                ras_sync();
                 mutex_exit(&p->p_auxlock);
        } else {
                 mutex_exit(&p->p_auxlock);
                 kmem_free(newrp, sizeof(*newrp));
        }

        return error;
}

/*
 * Nuke the specified sequence.  Both address and len must
 * match, otherwise we return an error.
 */
static int
ras_purge(void *addr, size_t len)
{
        struct ras *rp, **link;
        proc_t *p;

        p = curproc;

        mutex_enter(&p->p_auxlock);
        link = &p->p_raslist;
        for (rp = *link; rp != NULL; link = &rp->ras_next, rp = *link) {
                if (addr == rp->ras_startaddr &&
                    (char *)rp->ras_endaddr - (char *)rp->ras_startaddr == len)
                        break;
        }
        if (rp != NULL) {
                *link = rp->ras_next;
                ras_sync();
                mutex_exit(&p->p_auxlock);
                kmem_free(rp, sizeof(*rp));
                return 0;
        } else {
                mutex_exit(&p->p_auxlock);
                return ESRCH;
        }
}

#endif /* defined(__HAVE_RAS) */

/*ARGSUSED*/
int
sys_rasctl(struct lwp *l, const struct sys_rasctl_args *uap, register_t *retval)
{
#if defined(__HAVE_RAS)
        /* {
                syscallarg(void *) addr;
                syscallarg(size_t) len;
                syscallarg(int) op;
        } */
        void *addr;
        size_t len;
        int op;
        int error;

        /*
         * first, extract syscall args from the uap.
         */

        addr = (void *)SCARG(uap, addr);
        len = (size_t)SCARG(uap, len);
        op = SCARG(uap, op);

        DPRINTF(("sys_rasctl: p=%p addr=%p, len=%ld, op=0x%x\n",
            curproc, addr, (long)len, op));

        switch (op) {
        case RAS_INSTALL:
                error = ras_install(addr, len);
                break;
        case RAS_PURGE:
                error = ras_purge(addr, len);
                break;
        case RAS_PURGE_ALL:
                error = ras_purgeall();
                break;
        default:
                error = EINVAL;
                break;
        }

        return (error);
#else
        return (EOPNOTSUPP);
#endif
}






































































































    4 












































    2 




















    2 












    2 
    2 


    2 





















































































































































    3 
    2 
    2 

    1 





    2 











    3 















































   10 




    9 

    9 
    9 
    9 
    1 


    9 


   10 






    3 


    2 









    1 




    1 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
/*        $NetBSD: mm.c,v 1.24 2019/02/05 11:33:13 mrg Exp $        */

/*-
 * Copyright (c) 2002, 2008, 2010 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Christos Zoulas, Joerg Sonnenberger and Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Special /dev/{mem,kmem,zero,null} memory devices.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: mm.c,v 1.24 2019/02/05 11:33:13 mrg Exp $");

#include "opt_compat_netbsd.h"

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/uio.h>
#include <sys/termios.h>

#include <dev/mm.h>

#include <uvm/uvm_extern.h>

static void *                dev_zero_page        __read_mostly;
static kmutex_t                dev_mem_lock        __cacheline_aligned;
static vaddr_t                dev_mem_addr        __read_mostly;

static dev_type_open(mm_open);
static dev_type_read(mm_readwrite);
static dev_type_ioctl(mm_ioctl);
static dev_type_mmap(mm_mmap);
static dev_type_ioctl(mm_ioctl);

const struct cdevsw mem_cdevsw = {
        .d_open = mm_open,
        .d_close = nullclose,
        .d_read = mm_readwrite,
        .d_write = mm_readwrite,
        .d_ioctl = mm_ioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = mm_mmap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_MPSAFE
};

#ifdef pmax        /* XXX */
const struct cdevsw mem_ultrix_cdevsw = {
        .d_open = nullopen,
        .d_close = nullclose,
        .d_read = mm_readwrite,
        .d_write = mm_readwrite,
        .d_ioctl = mm_ioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = mm_mmap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_MPSAFE
};
#endif

static int
mm_open(dev_t dev, int flag, int mode, struct lwp *l)
{
#ifdef __HAVE_MM_MD_OPEN
        int error;
        if ((error = mm_md_open(dev, flag, mode, l)) != 0)
                return error;
#endif
        l->l_proc->p_flag |= PK_KMEM;
        return 0;
}

/*
 * mm_init: initialize memory device driver.
 */
void
mm_init(void)
{
        vaddr_t pg;

        mutex_init(&dev_mem_lock, MUTEX_DEFAULT, IPL_NONE);

        /* Read-only zero-page. */
        pg = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
        KASSERT(pg != 0);
        pmap_protect(pmap_kernel(), pg, pg + PAGE_SIZE, VM_PROT_READ);
        pmap_update(pmap_kernel());
        dev_zero_page = (void *)pg;

#ifndef __HAVE_MM_MD_CACHE_ALIASING
        /* KVA for mappings during I/O. */
        dev_mem_addr = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
            UVM_KMF_VAONLY|UVM_KMF_WAITVA);
        KASSERT(dev_mem_addr != 0);
#else
        dev_mem_addr = 0;
#endif
}


/*
 * dev_mem_getva: get a special virtual address.  If architecture requires,
 * allocate VA according to PA, which avoids cache-aliasing issues.  Use a
 * constant, general mapping address otherwise.
 */
static inline vaddr_t
dev_mem_getva(paddr_t pa, int color)
{
#ifdef __HAVE_MM_MD_CACHE_ALIASING
        return uvm_km_alloc(kernel_map, PAGE_SIZE,
            color & uvmexp.colormask,
            UVM_KMF_VAONLY | UVM_KMF_WAITVA | UVM_KMF_COLORMATCH);
#else
        return dev_mem_addr;
#endif
}

static inline void
dev_mem_relva(paddr_t pa, vaddr_t va)
{
#ifdef __HAVE_MM_MD_CACHE_ALIASING
        uvm_km_free(kernel_map, va, PAGE_SIZE, UVM_KMF_VAONLY);
#else
        KASSERT(dev_mem_addr == va);
#endif
}

/*
 * dev_kmem_readwrite: helper for DEV_MEM (/dev/mem) case of R/W.
 */
static int
dev_mem_readwrite(struct uio *uio, struct iovec *iov)
{
        paddr_t paddr;
        vaddr_t vaddr;
        vm_prot_t prot;
        size_t len, offset;
        bool have_direct;
        int error;
        int color = 0;

        /* Check for wrap around. */
        if ((uintptr_t)uio->uio_offset != uio->uio_offset) {
                return EFAULT;
        }
        paddr = uio->uio_offset & ~PAGE_MASK;
        prot = (uio->uio_rw == UIO_WRITE) ? VM_PROT_WRITE : VM_PROT_READ;
        error = mm_md_physacc(paddr, prot);
        if (error) {
                return error;
        }
        offset = uio->uio_offset & PAGE_MASK;
        len = MIN(uio->uio_resid, PAGE_SIZE - offset);

#ifdef __HAVE_MM_MD_CACHE_ALIASING
        have_direct = mm_md_page_color(paddr, &color);
#else
        have_direct = true;
        color = 0;
#endif

#ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS
        /* Is physical address directly mapped?  Return VA. */
        if (have_direct)
                have_direct = mm_md_direct_mapped_phys(paddr, &vaddr);
#else
        vaddr = 0;
        have_direct = false;
#endif
        if (!have_direct) {
                /* Get a special virtual address. */
                const vaddr_t va = dev_mem_getva(paddr, color);

                /* Map selected KVA to physical address. */
                mutex_enter(&dev_mem_lock);
                pmap_kenter_pa(va, paddr, prot, 0);
                pmap_update(pmap_kernel());

                /* Perform I/O. */
                vaddr = va + offset;
                error = uiomove((void *)vaddr, len, uio);

                /* Unmap, flush before unlock. */
                pmap_kremove(va, PAGE_SIZE);
                pmap_update(pmap_kernel());
                mutex_exit(&dev_mem_lock);

                /* "Release" the virtual address. */
                dev_mem_relva(paddr, va);
        } else {
                /* Direct map, just perform I/O. */
                vaddr += offset;
                error = uiomove((void *)vaddr, len, uio);
        }
        return error;
}

/*
 * dev_kmem_readwrite: helper for DEV_KMEM (/dev/kmem) case of R/W.
 */
static int
dev_kmem_readwrite(struct uio *uio, struct iovec *iov)
{
        void *addr;
        size_t len, offset;
        vm_prot_t prot;
        int error;
        bool md_kva;

        /* Check for wrap around. */
        addr = (void *)(intptr_t)uio->uio_offset;
        if ((uintptr_t)addr != uio->uio_offset) {
                return EFAULT;
        }
        /*
         * Handle non-page aligned offset.
         * Otherwise, we operate in page-by-page basis.
         */
        offset = uio->uio_offset & PAGE_MASK;
        len = MIN(uio->uio_resid, PAGE_SIZE - offset);
        prot = (uio->uio_rw == UIO_WRITE) ? VM_PROT_WRITE : VM_PROT_READ;

        md_kva = false;

#ifdef __HAVE_MM_MD_DIRECT_MAPPED_IO
        paddr_t paddr;
        /* MD case: is this is a directly mapped address? */
        if (mm_md_direct_mapped_io(addr, &paddr)) {
                /* If so, validate physical address. */
                error = mm_md_physacc(paddr, prot);
                if (error) {
                        return error;
                }
                md_kva = true;
        }
#endif
        if (!md_kva) {
                bool checked = false;

#ifdef __HAVE_MM_MD_KERNACC
                /* MD check for the address. */
                error = mm_md_kernacc(addr, prot, &checked);
                if (error) {
                        return error;
                }
#endif
                /* UVM check for the address (unless MD indicated to not). */
                if (!checked && !uvm_kernacc(addr, len, prot)) {
                        return EFAULT;
                }
        }
        error = uiomove(addr, len, uio);
        return error;
}

/*
 * dev_zero_readwrite: helper for DEV_ZERO (/dev/null) case of R/W.
 */
static inline int
dev_zero_readwrite(struct uio *uio, struct iovec *iov)
{
        size_t len;

        /* Nothing to do for the write case. */
        if (uio->uio_rw == UIO_WRITE) {
                uio->uio_resid = 0;
                return 0;
        }
        /*
         * Read in page-by-page basis, caller will continue.
         * Cut appropriately for a single/last-iteration cases.
         */
        len = MIN(iov->iov_len, PAGE_SIZE);
        return uiomove(dev_zero_page, len, uio);
}

/*
 * mm_readwrite: general memory R/W function.
 */
static int
mm_readwrite(dev_t dev, struct uio *uio, int flags)
{
        struct iovec *iov;
        int error;

#ifdef __HAVE_MM_MD_READWRITE
        /* If defined - there are extra MD cases. */
        switch (minor(dev)) {
        case DEV_MEM:
        case DEV_KMEM:
        case DEV_NULL:
        case DEV_ZERO:
#if defined(COMPAT_16) && defined(__arm)
        case _DEV_ZERO_oARM:
#endif
                break;
        default:
                return mm_md_readwrite(dev, uio);
        }
#endif
        error = 0;
        while (uio->uio_resid > 0 && error == 0) {
                iov = uio->uio_iov;
                if (iov->iov_len == 0) {
                        /* Processed; next I/O vector. */
                        uio->uio_iov++;
                        uio->uio_iovcnt--;
                        KASSERT(uio->uio_iovcnt >= 0);
                        continue;
                }
                /* Helper functions will process in page-by-page basis. */
                switch (minor(dev)) {
                case DEV_MEM:
                        error = dev_mem_readwrite(uio, iov);
                        break;
                case DEV_KMEM:
                        error = dev_kmem_readwrite(uio, iov);
                        break;
                case DEV_NULL:
                        if (uio->uio_rw == UIO_WRITE) {
                                uio->uio_resid = 0;
                        }
                        /* Break directly out of the loop. */
                        return 0;
                case DEV_FULL:
                        if (uio->uio_rw == UIO_WRITE) {
                                return ENOSPC;
                        }
#if defined(COMPAT_16) && defined(__arm)
                        /* FALLTHROUGH */
                case _DEV_ZERO_oARM:
#endif
                /* FALLTHROUGH */
                case DEV_ZERO:
                        error = dev_zero_readwrite(uio, iov);
                        break;
                default:
                        error = ENXIO;
                        break;
                }
        }
        return error;
}

/*
 * mm_mmap: general mmap() handler.
 */
static paddr_t
mm_mmap(dev_t dev, off_t off, int acc)
{
        vm_prot_t prot;

#ifdef __HAVE_MM_MD_MMAP
        /* If defined - there are extra mmap() MD cases. */
        switch (minor(dev)) {
        case DEV_MEM:
        case DEV_KMEM:
        case DEV_NULL:
#if defined(COMPAT_16) && defined(__arm)
        case _DEV_ZERO_oARM:
#endif
        case DEV_ZERO:
                break;
        default:
                return mm_md_mmap(dev, off, acc);
        }
#endif
        /*
         * /dev/null does not make sense, /dev/kmem is volatile and
         * /dev/zero is handled in mmap already.
         */
        if (minor(dev) != DEV_MEM) {
                return -1;
        }

        prot = 0;
        if (acc & PROT_EXEC)
                prot |= VM_PROT_EXECUTE;
        if (acc & PROT_READ)
                prot |= VM_PROT_READ;
        if (acc & PROT_WRITE)
                prot |= VM_PROT_WRITE;

        /* Validate the physical address. */
        if (mm_md_physacc(off, prot) != 0) {
                return -1;
        }
        return off >> PGSHIFT;
}

static int
mm_ioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{

        switch (cmd) {
        case FIONBIO:
                /* We never block anyway. */
                return 0;

        case FIOSETOWN:
        case FIOGETOWN:
        case TIOCGPGRP:
        case TIOCSPGRP:
        case TIOCGETA:
                return ENOTTY;

        case FIOASYNC:
                if ((*(int *)data) == 0) {
                        return 0;
                }
                /* FALLTHROUGH */
        default:
                return EOPNOTSUPP;
        }
}







































































































































































































































































































































































































































































    3 
    3 
    3 

    3 


    3 
    3 








    3 









































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
/*        $NetBSD: ubt.c,v 1.66 2022/04/06 22:01:45 mlelstv Exp $        */

/*-
 * Copyright (c) 2006 Itronix Inc.
 * All rights reserved.
 *
 * Written by Iain Hibbert for Itronix Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of Itronix Inc. may not be used to endorse
 *    or promote products derived from this software without specific
 *    prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY ITRONIX INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ITRONIX INC. BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/*
 * Copyright (c) 2002, 2003 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) and
 * David Sainty (dsainty@NetBSD.org).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/*
 * This driver originally written by Lennart Augustsson and David Sainty,
 * but was mostly rewritten for the NetBSD Bluetooth protocol stack by
 * Iain Hibbert for Itronix, Inc using the FreeBSD ng_ubt.c driver as a
 * reference.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ubt.c,v 1.66 2022/04/06 22:01:45 mlelstv Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/systm.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>

#include <netbt/bluetooth.h>
#include <netbt/hci.h>

/*******************************************************************************
 *
 *        debugging stuff
 */
#undef DPRINTF
#undef DPRINTFN

#ifdef UBT_DEBUG
int        ubt_debug = 0;

#define DPRINTF(...)                do {                \
        if (ubt_debug) {                        \
                printf("%s: ", __func__);        \
                printf(__VA_ARGS__);                \
        }                                        \
} while (/* CONSTCOND */0)

#define DPRINTFN(n, ...)        do {                \
        if (ubt_debug > (n)) {                        \
                printf("%s: ", __func__);        \
                printf(__VA_ARGS__);                \
        }                                        \
} while (/* CONSTCOND */0)

SYSCTL_SETUP(sysctl_hw_ubt_debug_setup, "sysctl hw.ubt_debug setup")
{

        sysctl_createv(NULL, 0, NULL, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "ubt_debug",
                SYSCTL_DESCR("ubt debug level"),
                NULL, 0,
                &ubt_debug, sizeof(ubt_debug),
                CTL_HW, CTL_CREATE, CTL_EOL);
}
#else
#define DPRINTF(...)
#define DPRINTFN(...)
#endif

/*******************************************************************************
 *
 *        ubt softc structure
 *
 */

/* buffer sizes */
/*
 * NB: although ACL packets can extend to 65535 bytes, most devices
 * have max_acl_size at much less (largest I have seen is 384)
 */
#define UBT_BUFSIZ_CMD                (HCI_CMD_PKT_SIZE - 1)
#define UBT_BUFSIZ_ACL                (2048 - 1)
#define UBT_BUFSIZ_EVENT        (HCI_EVENT_PKT_SIZE - 1)

/* Transmit timeouts */
#define UBT_CMD_TIMEOUT                USBD_DEFAULT_TIMEOUT
#define UBT_ACL_TIMEOUT                USBD_DEFAULT_TIMEOUT

/*
 * ISOC transfers
 *
 * xfer buffer size depends on the frame size, and the number
 * of frames per transfer is fixed, as each frame should be
 * 1ms worth of data. This keeps the rate that xfers complete
 * fairly constant. We use multiple xfers to keep the hardware
 * busy
 */
#define UBT_NXFERS                3        /* max xfers to queue */
#define UBT_NFRAMES                10        /* frames per xfer */

struct ubt_isoc_xfer {
        struct ubt_softc        *softc;
        struct usbd_xfer        *xfer;
        uint8_t                        *buf;
        uint16_t                 size[UBT_NFRAMES];
        int                         busy;
};

struct ubt_softc {
        device_t                 sc_dev;
        struct usbd_device        *sc_udev;
        int                         sc_refcnt;
        int                         sc_dying;
        int                         sc_enabled;

        /* Control Interface */
        struct usbd_interface *         sc_iface0;

        /* Commands (control) */
        struct usbd_xfer        *sc_cmd_xfer;
        uint8_t                        *sc_cmd_buf;
        int                         sc_cmd_busy;        /* write active */
        MBUFQ_HEAD()                 sc_cmd_queue;        /* output queue */

        /* Events (interrupt) */
        int                         sc_evt_addr;        /* endpoint address */
        struct usbd_pipe        *sc_evt_pipe;
        uint8_t                        *sc_evt_buf;

        /* ACL data (in) */
        int                         sc_aclrd_addr;        /* endpoint address */
        struct usbd_pipe        *sc_aclrd_pipe;        /* read pipe */
        struct usbd_xfer        *sc_aclrd_xfer;        /* read xfer */
        uint8_t                        *sc_aclrd_buf;        /* read buffer */
        int                         sc_aclrd_busy;        /* reading */

        /* ACL data (out) */
        int                         sc_aclwr_addr;        /* endpoint address */
        struct usbd_pipe        *sc_aclwr_pipe;        /* write pipe */
        struct usbd_xfer        *sc_aclwr_xfer;        /* write xfer */
        uint8_t                        *sc_aclwr_buf;        /* write buffer */
        int                         sc_aclwr_busy;        /* write active */
        MBUFQ_HEAD()                 sc_aclwr_queue;/* output queue */

        /* ISOC interface */
        struct usbd_interface        *sc_iface1;        /* ISOC interface */
        struct sysctllog        *sc_log;        /* sysctl log */
        int                         sc_config;        /* current config no */
        int                         sc_alt_config;        /* no of alternates */

        /* SCO data (in) */
        int                         sc_scord_addr;        /* endpoint address */
        struct usbd_pipe        *sc_scord_pipe;        /* read pipe */
        int                         sc_scord_size;        /* frame length */
        struct ubt_isoc_xfer         sc_scord[UBT_NXFERS];
        struct mbuf                *sc_scord_mbuf;        /* current packet */

        /* SCO data (out) */
        int                         sc_scowr_addr;        /* endpoint address */
        struct usbd_pipe        *sc_scowr_pipe;        /* write pipe */
        int                         sc_scowr_size;        /* frame length */
        struct ubt_isoc_xfer         sc_scowr[UBT_NXFERS];
        struct mbuf                *sc_scowr_mbuf;        /* current packet */
        int                         sc_scowr_busy;        /* write active */
        MBUFQ_HEAD()                 sc_scowr_queue;/* output queue */

        /* Protocol structure */
        struct hci_unit                *sc_unit;
        struct bt_stats                 sc_stats;

        /* Successfully attached */
        int                         sc_ok;
};

/*******************************************************************************
 *
 * Bluetooth unit/USB callback routines
 *
 */
static int ubt_enable(device_t);
static void ubt_disable(device_t);

static void ubt_xmit_cmd(device_t, struct mbuf *);
static void ubt_xmit_cmd_start(struct ubt_softc *);
static void ubt_xmit_cmd_complete(struct usbd_xfer *,
                                void *, usbd_status);

static void ubt_xmit_acl(device_t, struct mbuf *);
static void ubt_xmit_acl_start(struct ubt_softc *);
static void ubt_xmit_acl_complete(struct usbd_xfer *,
                                void *, usbd_status);

static void ubt_xmit_sco(device_t, struct mbuf *);
static void ubt_xmit_sco_start(struct ubt_softc *);
static void ubt_xmit_sco_start1(struct ubt_softc *, struct ubt_isoc_xfer *);
static void ubt_xmit_sco_complete(struct usbd_xfer *,
                                void *, usbd_status);

static void ubt_recv_event(struct usbd_xfer *,
                                void *, usbd_status);

static void ubt_recv_acl_start(struct ubt_softc *);
static void ubt_recv_acl_complete(struct usbd_xfer *,
                                void *, usbd_status);

static void ubt_recv_sco_start1(struct ubt_softc *, struct ubt_isoc_xfer *);
static void ubt_recv_sco_complete(struct usbd_xfer *,
                                void *, usbd_status);

static void ubt_stats(device_t, struct bt_stats *, int);

static const struct hci_if ubt_hci = {
        .enable = ubt_enable,
        .disable = ubt_disable,
        .output_cmd = ubt_xmit_cmd,
        .output_acl = ubt_xmit_acl,
        .output_sco = ubt_xmit_sco,
        .get_stats = ubt_stats,
        .ipl = IPL_SOFTUSB,
};

/*******************************************************************************
 *
 * USB Autoconfig stuff
 *
 */

static int        ubt_match(device_t, cfdata_t, void *);
static void        ubt_attach(device_t, device_t, void *);
static int        ubt_detach(device_t, int);
static int        ubt_activate(device_t, enum devact);

CFATTACH_DECL_NEW(ubt, sizeof(struct ubt_softc), ubt_match, ubt_attach,
    ubt_detach, ubt_activate);

static int ubt_set_isoc_config(struct ubt_softc *);
static int ubt_sysctl_config(SYSCTLFN_PROTO);
static void ubt_abortdealloc(struct ubt_softc *);

/*
 * To match or ignore, add details to the ubt_dev list.
 * Use value of -1 to indicate a wildcard
 * To override another entry, add details earlier
 */
static const struct ubt_devno {
        int                        vendor;
        int                        product;
        int                        class;
        int                        subclass;
        int                        proto;
        int                        match;
} ubt_dev[] = {
        {   /* ignore Broadcom 2033 without firmware */
            USB_VENDOR_BROADCOM,
            USB_PRODUCT_BROADCOM_BCM2033NF,
            -1,
            -1,
            -1,
            UMATCH_NONE
        },
        {   /* Apple Bluetooth Host Controller MacbookPro 7,1 */
            USB_VENDOR_APPLE,
            USB_PRODUCT_APPLE_BLUETOOTH_HOST_1,
            -1,
            -1,
            -1,
            UMATCH_VENDOR_PRODUCT
        },
        {   /* Apple Bluetooth Host Controller iMac 11,1 */
            USB_VENDOR_APPLE,
            USB_PRODUCT_APPLE_BLUETOOTH_HOST_2,
            -1,
            -1,
            -1,
            UMATCH_VENDOR_PRODUCT
        },
        {   /* Apple Bluetooth Host Controller MacBookPro 8,2 */
            USB_VENDOR_APPLE,
            USB_PRODUCT_APPLE_BLUETOOTH_HOST_3,
            -1,
            -1,
            -1,
            UMATCH_VENDOR_PRODUCT
        },
        {   /* Apple Bluetooth Host Controller MacBookAir 3,1 3,2*/
            USB_VENDOR_APPLE,
            USB_PRODUCT_APPLE_BLUETOOTH_HOST_4,
            -1,
            -1,
            -1,
            UMATCH_VENDOR_PRODUCT
        },
        {   /* Apple Bluetooth Host Controller MacBookAir 4,1 */
            USB_VENDOR_APPLE,
            USB_PRODUCT_APPLE_BLUETOOTH_HOST_5,
            -1,
            -1,
            -1,
            UMATCH_VENDOR_PRODUCT
        },
        {   /* Apple Bluetooth Host Controller MacMini 5,1 */
            USB_VENDOR_APPLE,
            USB_PRODUCT_APPLE_BLUETOOTH_HOST_6,
            -1,
            -1,
            -1,
            UMATCH_VENDOR_PRODUCT
        },
        {   /* Apple Bluetooth Host Controller MacBookAir 6,1 */
            USB_VENDOR_APPLE,
            USB_PRODUCT_APPLE_BLUETOOTH_HOST_7,
            -1,
            -1,
            -1,
            UMATCH_VENDOR_PRODUCT
        },
        {   /* Apple Bluetooth Host Controller MacBookPro 9,2 */
            USB_VENDOR_APPLE,
            USB_PRODUCT_APPLE_BLUETOOTH_HOST_8,
            -1,
            -1,
            -1,
            UMATCH_VENDOR_PRODUCT
        },
        {   /* Broadcom chips with PatchRAM support */
            USB_VENDOR_BROADCOM,
            -1,
            UDCLASS_VENDOR,
            UDSUBCLASS_RF,
            UDPROTO_BLUETOOTH,
            UMATCH_VENDOR_DEVCLASS_DEVPROTO
        },
        {   /* Broadcom based device with PatchRAM support */
            USB_VENDOR_FOXCONN,
            -1,
            UDCLASS_VENDOR,
            UDSUBCLASS_RF,
            UDPROTO_BLUETOOTH,
            UMATCH_VENDOR_DEVCLASS_DEVPROTO
        },
        {   /* Broadcom based device with PatchRAM support */
            USB_VENDOR_LITEON,
            -1,
            UDCLASS_VENDOR,
            UDSUBCLASS_RF,
            UDPROTO_BLUETOOTH,
            UMATCH_VENDOR_DEVCLASS_DEVPROTO
        },
        {   /* Broadcom based device with PatchRAM support */
            USB_VENDOR_BELKIN,
            -1,
            UDCLASS_VENDOR,
            UDSUBCLASS_RF,
            UDPROTO_BLUETOOTH,
            UMATCH_VENDOR_DEVCLASS_DEVPROTO
        },
        {   /* Broadcom based device with PatchRAM support */
            USB_VENDOR_TOSHIBA,
            -1,
            UDCLASS_VENDOR,
            UDSUBCLASS_RF,
            UDPROTO_BLUETOOTH,
            UMATCH_VENDOR_DEVCLASS_DEVPROTO
        },
        {   /* Broadcom based device with PatchRAM support */
            USB_VENDOR_ASUSTEK,
            -1,
            UDCLASS_VENDOR,
            UDSUBCLASS_RF,
            UDPROTO_BLUETOOTH,
            UMATCH_VENDOR_DEVCLASS_DEVPROTO
        },
        {   /* Generic Bluetooth SIG compliant devices */
            -1,
            -1,
            UDCLASS_WIRELESS,
            UDSUBCLASS_RF,
            UDPROTO_BLUETOOTH,
            UMATCH_DEVCLASS_DEVSUBCLASS_DEVPROTO
        },
};

static int
ubt_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;
        size_t i;

        DPRINTFN(50, "ubt_match\n");

        for (i = 0; i < __arraycount(ubt_dev); i++) {
                if (ubt_dev[i].vendor != -1
                    && ubt_dev[i].vendor != (int)uaa->uaa_vendor)
                        continue;
                if (ubt_dev[i].product != -1
                    && ubt_dev[i].product != (int)uaa->uaa_product)
                        continue;
                if (ubt_dev[i].class != -1
                    && ubt_dev[i].class != uaa->uaa_class)
                        continue;
                if (ubt_dev[i].subclass != -1
                    && ubt_dev[i].subclass != uaa->uaa_subclass)
                        continue;
                if (ubt_dev[i].proto != -1
                    && ubt_dev[i].proto != uaa->uaa_proto)
                        continue;

                return ubt_dev[i].match;
        }

        return UMATCH_NONE;
}

static void
ubt_attach(device_t parent, device_t self, void *aux)
{
        struct ubt_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        usb_config_descriptor_t *cd;
        usb_endpoint_descriptor_t *ed;
        const struct sysctlnode *node;
        char *devinfop;
        int err;
        uint8_t count, i;

        DPRINTFN(50, "ubt_attach: sc=%p\n", sc);

        sc->sc_dev = self;
        sc->sc_udev = uaa->uaa_device;

        MBUFQ_INIT(&sc->sc_cmd_queue);
        MBUFQ_INIT(&sc->sc_aclwr_queue);
        MBUFQ_INIT(&sc->sc_scowr_queue);

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(sc->sc_udev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        /*
         * Move the device into the configured state
         */
        err = usbd_set_config_index(sc->sc_udev, 0, 1);
        if (err) {
                aprint_error_dev(self,
                    "failed to set configuration idx 0: %s\n",
                    usbd_errstr(err));

                return;
        }

        /*
         * Interface 0 must have 3 endpoints
         *        1) Interrupt endpoint to receive HCI events
         *        2) Bulk IN endpoint to receive ACL data
         *        3) Bulk OUT endpoint to send ACL data
         */
        err = usbd_device2interface_handle(sc->sc_udev, 0, &sc->sc_iface0);
        if (err) {
                aprint_error_dev(self,
                    "Could not get interface 0 handle %s (%d)\n",
                    usbd_errstr(err), err);

                return;
        }

        sc->sc_evt_addr = -1;
        sc->sc_aclrd_addr = -1;
        sc->sc_aclwr_addr = -1;

        count = 0;
        (void)usbd_endpoint_count(sc->sc_iface0, &count);

        for (i = 0 ; i < count ; i++) {
                int dir, type;

                ed = usbd_interface2endpoint_descriptor(sc->sc_iface0, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "could not read endpoint descriptor %d\n", i);

                        return;
                }

                dir = UE_GET_DIR(ed->bEndpointAddress);
                type = UE_GET_XFERTYPE(ed->bmAttributes);

                if (dir == UE_DIR_IN && type == UE_INTERRUPT)
                        sc->sc_evt_addr = ed->bEndpointAddress;
                else if (dir == UE_DIR_IN && type == UE_BULK)
                        sc->sc_aclrd_addr = ed->bEndpointAddress;
                else if (dir == UE_DIR_OUT && type == UE_BULK)
                        sc->sc_aclwr_addr = ed->bEndpointAddress;
        }

        if (sc->sc_evt_addr == -1) {
                aprint_error_dev(self,
                    "missing INTERRUPT endpoint on interface 0\n");

                return;
        }
        if (sc->sc_aclrd_addr == -1) {
                aprint_error_dev(self,
                    "missing BULK IN endpoint on interface 0\n");

                return;
        }
        if (sc->sc_aclwr_addr == -1) {
                aprint_error_dev(self,
                    "missing BULK OUT endpoint on interface 0\n");

                return;
        }

        /*
         * Interface 1 must have 2 endpoints
         *        1) Isochronous IN endpoint to receive SCO data
         *        2) Isochronous OUT endpoint to send SCO data
         *
         * and will have several configurations, which can be selected
         * via a sysctl variable. We select config 0 to start, which
         * means that no SCO data will be available.
         */
        err = usbd_device2interface_handle(sc->sc_udev, 1, &sc->sc_iface1);
        if (err) {
                aprint_error_dev(self,
                    "Could not get interface 1 handle %s (%d)\n",
                    usbd_errstr(err), err);

                return;
        }

        cd = usbd_get_config_descriptor(sc->sc_udev);
        if (cd == NULL) {
                aprint_error_dev(self, "could not get config descriptor\n");

                return;
        }

        sc->sc_alt_config = usbd_get_no_alts(cd, 1);

        /* set initial config */
        err = ubt_set_isoc_config(sc);
        if (err) {
                aprint_error_dev(self, "ISOC config failed\n");

                return;
        }

        /* Attach HCI */
        sc->sc_unit = hci_attach_pcb(&ubt_hci, sc->sc_dev, 0);

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        /* sysctl set-up for alternate configs */
        sysctl_createv(&sc->sc_log, 0, NULL, &node,
                0,
                CTLTYPE_NODE, device_xname(sc->sc_dev),
                SYSCTL_DESCR("ubt driver information"),
                NULL, 0,
                NULL, 0,
                CTL_HW,
                CTL_CREATE, CTL_EOL);

        if (node != NULL) {
                sysctl_createv(&sc->sc_log, 0, NULL, NULL,
                        CTLFLAG_READWRITE,
                        CTLTYPE_INT, "config",
                        SYSCTL_DESCR("configuration number"),
                        ubt_sysctl_config, 0,
                        (void *)sc, 0,
                        CTL_HW, node->sysctl_num,
                        CTL_CREATE, CTL_EOL);

                sysctl_createv(&sc->sc_log, 0, NULL, NULL,
                        CTLFLAG_READONLY,
                        CTLTYPE_INT, "alt_config",
                        SYSCTL_DESCR("number of alternate configurations"),
                        NULL, 0,
                        &sc->sc_alt_config, sizeof(sc->sc_alt_config),
                        CTL_HW, node->sysctl_num,
                        CTL_CREATE, CTL_EOL);

                sysctl_createv(&sc->sc_log, 0, NULL, NULL,
                        CTLFLAG_READONLY,
                        CTLTYPE_INT, "sco_rxsize",
                        SYSCTL_DESCR("max SCO receive size"),
                        NULL, 0,
                        &sc->sc_scord_size, sizeof(sc->sc_scord_size),
                        CTL_HW, node->sysctl_num,
                        CTL_CREATE, CTL_EOL);

                sysctl_createv(&sc->sc_log, 0, NULL, NULL,
                        CTLFLAG_READONLY,
                        CTLTYPE_INT, "sco_txsize",
                        SYSCTL_DESCR("max SCO transmit size"),
                        NULL, 0,
                        &sc->sc_scowr_size, sizeof(sc->sc_scowr_size),
                        CTL_HW, node->sysctl_num,
                        CTL_CREATE, CTL_EOL);
        }

        sc->sc_ok = 1;

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");

        return;
}

static int
ubt_detach(device_t self, int flags)
{
        struct ubt_softc *sc = device_private(self);
        int s;

        DPRINTF("sc=%p flags=%d\n", sc, flags);

        pmf_device_deregister(self);

        sc->sc_dying = 1;

        if (!sc->sc_ok)
                return 0;

        /* delete sysctl nodes */
        sysctl_teardown(&sc->sc_log);

        /* Detach HCI interface */
        if (sc->sc_unit) {
                hci_detach_pcb(sc->sc_unit);
                sc->sc_unit = NULL;
        }

        /*
         * Abort all pipes. Causes processes waiting for transfer to wake.
         *
         * Actually, hci_detach_pcb() above will call ubt_disable() which
         * may call ubt_abortdealloc(), but lets be sure since doing it
         * twice wont cause an error.
         */
        ubt_abortdealloc(sc);

        /* wait for all processes to finish */
        s = splusb();
        if (sc->sc_refcnt-- > 0)
                usb_detach_waitold(sc->sc_dev);

        splx(s);

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        DPRINTFN(1, "driver detached\n");

        return 0;
}

static int
ubt_activate(device_t self, enum devact act)
{
        struct ubt_softc *sc = device_private(self);

        DPRINTFN(1, "sc=%p, act=%d\n", sc, act);

        switch (act) {
        case DVACT_DEACTIVATE:
                sc->sc_dying = 1;
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

/* set ISOC configuration */
static int
ubt_set_isoc_config(struct ubt_softc *sc)
{
        usb_endpoint_descriptor_t *ed;
        int rd_addr, wr_addr, rd_size, wr_size;
        uint8_t count, i;
        int err;

        err = usbd_set_interface(sc->sc_iface1, sc->sc_config);
        if (err != USBD_NORMAL_COMPLETION) {
                aprint_error_dev(sc->sc_dev,
                    "Could not set config %d on ISOC interface. %s (%d)\n",
                    sc->sc_config, usbd_errstr(err), err);

                return err == USBD_IN_USE ? EBUSY : EIO;
        }

        /*
         * We wont get past the above if there are any pipes open, so no
         * need to worry about buf/xfer/pipe deallocation. If we get an
         * error after this, the frame quantities will be 0 and no SCO
         * data will be possible.
         */

        sc->sc_scord_size = rd_size = 0;
        sc->sc_scord_addr = rd_addr = -1;

        sc->sc_scowr_size = wr_size = 0;
        sc->sc_scowr_addr = wr_addr = -1;

        count = 0;
        (void)usbd_endpoint_count(sc->sc_iface1, &count);

        for (i = 0 ; i < count ; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface1, i);
                if (ed == NULL) {
                        aprint_error_dev(sc->sc_dev,
                            "could not read endpoint descriptor %d\n", i);

                        return EIO;
                }

                DPRINTFN(5, "%s: endpoint type %02x (%02x) addr %02x (%s)\n",
                        device_xname(sc->sc_dev),
                        UE_GET_XFERTYPE(ed->bmAttributes),
                        UE_GET_ISO_TYPE(ed->bmAttributes),
                        ed->bEndpointAddress,
                        UE_GET_DIR(ed->bEndpointAddress) ? "in" : "out");

                if (UE_GET_XFERTYPE(ed->bmAttributes) != UE_ISOCHRONOUS)
                        continue;

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN) {
                        rd_addr = ed->bEndpointAddress;
                        rd_size = UGETW(ed->wMaxPacketSize);
                } else {
                        wr_addr = ed->bEndpointAddress;
                        wr_size = UGETW(ed->wMaxPacketSize);
                }
        }

        if (rd_addr == -1) {
                aprint_error_dev(sc->sc_dev,
                    "missing ISOC IN endpoint on interface config %d\n",
                    sc->sc_config);

                return ENOENT;
        }
        if (wr_addr == -1) {
                aprint_error_dev(sc->sc_dev,
                    "missing ISOC OUT endpoint on interface config %d\n",
                    sc->sc_config);

                return ENOENT;
        }

        if (rd_size > MLEN) {
                aprint_error_dev(sc->sc_dev, "rd_size=%d exceeds MLEN\n",
                    rd_size);

                return EOVERFLOW;
        }

        if (wr_size > MLEN) {
                aprint_error_dev(sc->sc_dev, "wr_size=%d exceeds MLEN\n",
                    wr_size);

                return EOVERFLOW;
        }

        sc->sc_scord_size = rd_size;
        sc->sc_scord_addr = rd_addr;

        sc->sc_scowr_size = wr_size;
        sc->sc_scowr_addr = wr_addr;

        return 0;
}

/* sysctl helper to set alternate configurations */
static int
ubt_sysctl_config(SYSCTLFN_ARGS)
{
        struct sysctlnode node;
        struct ubt_softc *sc;
        int t, error;

        node = *rnode;
        sc = node.sysctl_data;

        t = sc->sc_config;
        node.sysctl_data = &t;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        if (t < 0 || t >= sc->sc_alt_config)
                return EINVAL;

        /* This may not change when the unit is enabled */
        if (sc->sc_enabled)
                return EBUSY;

        KERNEL_LOCK(1, curlwp);
        sc->sc_config = t;
        error = ubt_set_isoc_config(sc);
        KERNEL_UNLOCK_ONE(curlwp);
        return error;
}

static void
ubt_abortdealloc(struct ubt_softc *sc)
{
        int i;

        DPRINTFN(1, "sc=%p\n", sc);

        /* Abort all pipes */
        usbd_abort_default_pipe(sc->sc_udev);

        if (sc->sc_evt_pipe != NULL) {
                usbd_abort_pipe(sc->sc_evt_pipe);
        }

        if (sc->sc_aclrd_pipe != NULL) {
                usbd_abort_pipe(sc->sc_aclrd_pipe);
        }

        if (sc->sc_aclwr_pipe != NULL) {
                usbd_abort_pipe(sc->sc_aclwr_pipe);
        }

        if (sc->sc_scord_pipe != NULL) {
                usbd_abort_pipe(sc->sc_scord_pipe);
        }

        if (sc->sc_scowr_pipe != NULL) {
                usbd_abort_pipe(sc->sc_scowr_pipe);
        }

        /* Free event buffer */
        if (sc->sc_evt_buf != NULL) {
                kmem_free(sc->sc_evt_buf, UBT_BUFSIZ_EVENT);
                sc->sc_evt_buf = NULL;
        }

        /* Free all xfers and xfer buffers (implicit) */
        if (sc->sc_cmd_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_cmd_xfer);
                sc->sc_cmd_xfer = NULL;
                sc->sc_cmd_buf = NULL;
        }

        if (sc->sc_aclrd_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_aclrd_xfer);
                sc->sc_aclrd_xfer = NULL;
                sc->sc_aclrd_buf = NULL;
        }

        if (sc->sc_aclwr_xfer != NULL) {
                usbd_destroy_xfer(sc->sc_aclwr_xfer);
                sc->sc_aclwr_xfer = NULL;
                sc->sc_aclwr_buf = NULL;
        }

        for (i = 0 ; i < UBT_NXFERS ; i++) {
                if (sc->sc_scord[i].xfer != NULL) {
                        usbd_destroy_xfer(sc->sc_scord[i].xfer);
                        sc->sc_scord[i].xfer = NULL;
                        sc->sc_scord[i].buf = NULL;
                }

                if (sc->sc_scowr[i].xfer != NULL) {
                        usbd_destroy_xfer(sc->sc_scowr[i].xfer);
                        sc->sc_scowr[i].xfer = NULL;
                        sc->sc_scowr[i].buf = NULL;
                }
        }

        if (sc->sc_evt_pipe != NULL) {
                usbd_close_pipe(sc->sc_evt_pipe);
                sc->sc_evt_pipe = NULL;
        }

        if (sc->sc_aclrd_pipe != NULL) {
                usbd_close_pipe(sc->sc_aclrd_pipe);
                sc->sc_aclrd_pipe = NULL;
        }

        if (sc->sc_aclwr_pipe != NULL) {
                usbd_close_pipe(sc->sc_aclwr_pipe);
                sc->sc_aclwr_pipe = NULL;
        }

        if (sc->sc_scord_pipe != NULL) {
                usbd_close_pipe(sc->sc_scord_pipe);
                sc->sc_scord_pipe = NULL;
        }

        if (sc->sc_scowr_pipe != NULL) {
                usbd_close_pipe(sc->sc_scowr_pipe);
                sc->sc_scowr_pipe = NULL;
        }

        /* Free partial SCO packets */
        if (sc->sc_scord_mbuf != NULL) {
                m_freem(sc->sc_scord_mbuf);
                sc->sc_scord_mbuf = NULL;
        }

        if (sc->sc_scowr_mbuf != NULL) {
                m_freem(sc->sc_scowr_mbuf);
                sc->sc_scowr_mbuf = NULL;
        }

        /* Empty mbuf queues */
        MBUFQ_DRAIN(&sc->sc_cmd_queue);
        MBUFQ_DRAIN(&sc->sc_aclwr_queue);
        MBUFQ_DRAIN(&sc->sc_scowr_queue);
}

/*******************************************************************************
 *
 * Bluetooth Unit/USB callbacks
 *
 */
static int
ubt_enable(device_t self)
{
        struct ubt_softc *sc = device_private(self);
        usbd_status err;
        int s, i, error;

        DPRINTFN(1, "sc=%p\n", sc);

        if (sc->sc_enabled)
                return 0;

        s = splusb();

        /* Events */
        sc->sc_evt_buf = kmem_alloc(UBT_BUFSIZ_EVENT, KM_SLEEP);
        err = usbd_open_pipe_intr(sc->sc_iface0,
                                  sc->sc_evt_addr,
                                  USBD_SHORT_XFER_OK,
                                  &sc->sc_evt_pipe,
                                  sc,
                                  sc->sc_evt_buf,
                                  UBT_BUFSIZ_EVENT,
                                  ubt_recv_event,
                                  USBD_DEFAULT_INTERVAL);
        if (err != USBD_NORMAL_COMPLETION) {
                error = EIO;
                goto bad;
        }

        /* Commands */
        struct usbd_pipe *pipe0 = usbd_get_pipe0(sc->sc_udev);
        error = usbd_create_xfer(pipe0, UBT_BUFSIZ_CMD, USBD_FORCE_SHORT_XFER,
            0, &sc->sc_cmd_xfer);
        if (error)
                goto bad;
        sc->sc_cmd_buf = usbd_get_buffer(sc->sc_cmd_xfer);
        sc->sc_cmd_busy = 0;

        /* ACL read */
        err = usbd_open_pipe(sc->sc_iface0, sc->sc_aclrd_addr,
                                USBD_EXCLUSIVE_USE, &sc->sc_aclrd_pipe);
        if (err != USBD_NORMAL_COMPLETION) {
                error = EIO;
                goto bad;
        }
        error = usbd_create_xfer(sc->sc_aclrd_pipe, UBT_BUFSIZ_ACL,
            0, 0, &sc->sc_aclrd_xfer);
        if (error)
                goto bad;
        sc->sc_aclrd_buf = usbd_get_buffer(sc->sc_aclrd_xfer);
        sc->sc_aclrd_busy = 0;
        ubt_recv_acl_start(sc);

        /* ACL write */
        err = usbd_open_pipe(sc->sc_iface0, sc->sc_aclwr_addr,
                                USBD_EXCLUSIVE_USE, &sc->sc_aclwr_pipe);
        if (err != USBD_NORMAL_COMPLETION) {
                error = EIO;
                goto bad;
        }
        error = usbd_create_xfer(sc->sc_aclwr_pipe, UBT_BUFSIZ_ACL,
            USBD_FORCE_SHORT_XFER, 0, &sc->sc_aclwr_xfer);
        if (error)
                goto bad;
        sc->sc_aclwr_buf = usbd_get_buffer(sc->sc_aclwr_xfer);
        sc->sc_aclwr_busy = 0;

        /* SCO read */
        if (sc->sc_scord_size > 0) {
                err = usbd_open_pipe(sc->sc_iface1, sc->sc_scord_addr,
                                        USBD_EXCLUSIVE_USE, &sc->sc_scord_pipe);
                if (err != USBD_NORMAL_COMPLETION) {
                        error = EIO;
                        goto bad;
                }

                for (i = 0 ; i < UBT_NXFERS ; i++) {
                        error = usbd_create_xfer(sc->sc_scord_pipe,
                            sc->sc_scord_size * UBT_NFRAMES,
                            0, UBT_NFRAMES,
                            &sc->sc_scord[i].xfer);
                        if (error)
                                goto bad;

                        sc->sc_scord[i].buf =
                            usbd_get_buffer(sc->sc_scord[i].xfer);
                        sc->sc_scord[i].softc = sc;
                        sc->sc_scord[i].busy = 0;
                        ubt_recv_sco_start1(sc, &sc->sc_scord[i]);
                }
        }

        /* SCO write */
        if (sc->sc_scowr_size > 0) {
                err = usbd_open_pipe(sc->sc_iface1, sc->sc_scowr_addr,
                                        USBD_EXCLUSIVE_USE, &sc->sc_scowr_pipe);
                if (err != USBD_NORMAL_COMPLETION) {
                        error = EIO;
                        goto bad;
                }

                for (i = 0 ; i < UBT_NXFERS ; i++) {
                        error = usbd_create_xfer(sc->sc_scowr_pipe,
                            sc->sc_scowr_size * UBT_NFRAMES,
                            USBD_FORCE_SHORT_XFER, UBT_NFRAMES,
                            &sc->sc_scowr[i].xfer);
                        if (error)
                                goto bad;
                        sc->sc_scowr[i].buf =
                            usbd_get_buffer(sc->sc_scowr[i].xfer);
                        sc->sc_scowr[i].softc = sc;
                        sc->sc_scowr[i].busy = 0;
                }

                sc->sc_scowr_busy = 0;
        }

        sc->sc_enabled = 1;
        splx(s);
        return 0;

bad:
        ubt_abortdealloc(sc);
        splx(s);
        return error;
}

static void
ubt_disable(device_t self)
{
        struct ubt_softc *sc = device_private(self);
        int s;

        DPRINTFN(1, "sc=%p\n", sc);

        if (sc->sc_enabled == 0)
                return;

        s = splusb();
        ubt_abortdealloc(sc);

        sc->sc_enabled = 0;
        splx(s);
}

static void
ubt_xmit_cmd(device_t self, struct mbuf *m)
{
        struct ubt_softc *sc = device_private(self);
        int s;

        KASSERT(sc->sc_enabled);

        s = splusb();
        MBUFQ_ENQUEUE(&sc->sc_cmd_queue, m);

        if (sc->sc_cmd_busy == 0)
                ubt_xmit_cmd_start(sc);

        splx(s);
}

static void
ubt_xmit_cmd_start(struct ubt_softc *sc)
{
        usb_device_request_t req;
        usbd_status status;
        struct mbuf *m;
        int len;

        if (sc->sc_dying)
                return;

        if (MBUFQ_FIRST(&sc->sc_cmd_queue) == NULL)
                return;

        MBUFQ_DEQUEUE(&sc->sc_cmd_queue, m);
        KASSERT(m != NULL);

        DPRINTFN(15, "%s: xmit CMD packet (%d bytes)\n",
                        device_xname(sc->sc_dev), m->m_pkthdr.len);

        sc->sc_refcnt++;
        sc->sc_cmd_busy = 1;

        len = m->m_pkthdr.len - 1;
        m_copydata(m, 1, len, sc->sc_cmd_buf);
        m_freem(m);

        memset(&req, 0, sizeof(req));
        req.bmRequestType = UT_WRITE_CLASS_DEVICE;
        USETW(req.wLength, len);

        usbd_setup_default_xfer(sc->sc_cmd_xfer,
                                sc->sc_udev,
                                sc,
                                UBT_CMD_TIMEOUT,
                                &req,
                                sc->sc_cmd_buf,
                                len,
                                USBD_FORCE_SHORT_XFER,
                                ubt_xmit_cmd_complete);

        status = usbd_transfer(sc->sc_cmd_xfer);

        KASSERT(status != USBD_NORMAL_COMPLETION);

        if (status != USBD_IN_PROGRESS) {
                DPRINTF("usbd_transfer status=%s (%d)\n",
                        usbd_errstr(status), status);

                sc->sc_refcnt--;
                sc->sc_cmd_busy = 0;
        }
}

static void
ubt_xmit_cmd_complete(struct usbd_xfer *xfer,
                        void * h, usbd_status status)
{
        struct ubt_softc *sc = h;
        uint32_t count;

        DPRINTFN(15, "%s: CMD complete status=%s (%d)\n",
                        device_xname(sc->sc_dev), usbd_errstr(status), status);

        sc->sc_cmd_busy = 0;

        if (--sc->sc_refcnt < 0) {
                DPRINTF("sc_refcnt=%d\n", sc->sc_refcnt);
                usb_detach_wakeupold(sc->sc_dev);
                return;
        }

        if (sc->sc_dying) {
                DPRINTF("sc_dying\n");
                return;
        }

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF("status=%s (%d)\n",
                        usbd_errstr(status), status);

                sc->sc_stats.err_tx++;
                return;
        }

        usbd_get_xfer_status(xfer, NULL, NULL, &count, NULL);
        sc->sc_stats.cmd_tx++;
        sc->sc_stats.byte_tx += count;

        ubt_xmit_cmd_start(sc);
}

static void
ubt_xmit_acl(device_t self, struct mbuf *m)
{
        struct ubt_softc *sc = device_private(self);
        int s;

        KASSERT(sc->sc_enabled);

        s = splusb();
        MBUFQ_ENQUEUE(&sc->sc_aclwr_queue, m);

        if (sc->sc_aclwr_busy == 0)
                ubt_xmit_acl_start(sc);

        splx(s);
}

static void
ubt_xmit_acl_start(struct ubt_softc *sc)
{
        struct mbuf *m;
        usbd_status status;
        int len;

        if (sc->sc_dying)
                return;

        if (MBUFQ_FIRST(&sc->sc_aclwr_queue) == NULL)
                return;

        sc->sc_refcnt++;
        sc->sc_aclwr_busy = 1;

        MBUFQ_DEQUEUE(&sc->sc_aclwr_queue, m);
        KASSERT(m != NULL);

        DPRINTFN(15, "%s: xmit ACL packet (%d bytes)\n",
                        device_xname(sc->sc_dev), m->m_pkthdr.len);

        len = m->m_pkthdr.len - 1;
        if (len > UBT_BUFSIZ_ACL) {
                DPRINTF("%s: truncating ACL packet (%d => %d)!\n",
                        device_xname(sc->sc_dev), len, UBT_BUFSIZ_ACL);

                len = UBT_BUFSIZ_ACL;
        }

        m_copydata(m, 1, len, sc->sc_aclwr_buf);
        m_freem(m);

        sc->sc_stats.acl_tx++;
        sc->sc_stats.byte_tx += len;

        usbd_setup_xfer(sc->sc_aclwr_xfer,
                        sc,
                        sc->sc_aclwr_buf,
                        len,
                        USBD_FORCE_SHORT_XFER,
                        UBT_ACL_TIMEOUT,
                        ubt_xmit_acl_complete);

        status = usbd_transfer(sc->sc_aclwr_xfer);

        KASSERT(status != USBD_NORMAL_COMPLETION);

        if (status != USBD_IN_PROGRESS) {
                DPRINTF("usbd_transfer status=%s (%d)\n",
                        usbd_errstr(status), status);

                sc->sc_refcnt--;
                sc->sc_aclwr_busy = 0;
        }
}

static void
ubt_xmit_acl_complete(struct usbd_xfer *xfer,
                void * h, usbd_status status)
{
        struct ubt_softc *sc = h;

        DPRINTFN(15, "%s: ACL complete status=%s (%d)\n",
                device_xname(sc->sc_dev), usbd_errstr(status), status);

        sc->sc_aclwr_busy = 0;

        if (--sc->sc_refcnt < 0) {
                usb_detach_wakeupold(sc->sc_dev);
                return;
        }

        if (sc->sc_dying)
                return;

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF("status=%s (%d)\n",
                        usbd_errstr(status), status);

                sc->sc_stats.err_tx++;

                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->sc_aclwr_pipe);
                else
                        return;
        }

        ubt_xmit_acl_start(sc);
}

static void
ubt_xmit_sco(device_t self, struct mbuf *m)
{
        struct ubt_softc *sc = device_private(self);
        int s;

        KASSERT(sc->sc_enabled);

        s = splusb();
        MBUFQ_ENQUEUE(&sc->sc_scowr_queue, m);

        if (sc->sc_scowr_busy == 0)
                ubt_xmit_sco_start(sc);

        splx(s);
}

static void
ubt_xmit_sco_start(struct ubt_softc *sc)
{
        int i;

        if (sc->sc_dying || sc->sc_scowr_size == 0)
                return;

        for (i = 0 ; i < UBT_NXFERS ; i++) {
                if (sc->sc_scowr[i].busy)
                        continue;

                ubt_xmit_sco_start1(sc, &sc->sc_scowr[i]);
        }
}

static void
ubt_xmit_sco_start1(struct ubt_softc *sc, struct ubt_isoc_xfer *isoc)
{
        struct mbuf *m;
        uint8_t *buf;
        int num, len, size, space;

        space = sc->sc_scowr_size * UBT_NFRAMES;
        buf = isoc->buf;
        len = 0;

        /*
         * Fill the request buffer with data from the queue,
         * keeping any leftover packet on our private hook.
         *
         * Complete packets are passed back up to the stack
         * for disposal, since we can't rely on the controller
         * to tell us when it has finished with them.
         */

        m = sc->sc_scowr_mbuf;
        while (space > 0) {
                if (m == NULL) {
                        MBUFQ_DEQUEUE(&sc->sc_scowr_queue, m);
                        if (m == NULL)
                                break;

                        m_adj(m, 1);        /* packet type */
                }

                if (m->m_pkthdr.len > 0) {
                        size = MIN(m->m_pkthdr.len, space);

                        m_copydata(m, 0, size, buf);
                        m_adj(m, size);

                        buf += size;
                        len += size;
                        space -= size;
                }

                if (m->m_pkthdr.len == 0) {
                        sc->sc_stats.sco_tx++;
                        if (!hci_complete_sco(sc->sc_unit, m))
                                sc->sc_stats.err_tx++;

                        m = NULL;
                }
        }
        sc->sc_scowr_mbuf = m;

        DPRINTFN(15, "isoc=%p, len=%d, space=%d\n", isoc, len, space);

        if (len == 0)        /* nothing to send */
                return;

        sc->sc_refcnt++;
        sc->sc_scowr_busy = 1;
        sc->sc_stats.byte_tx += len;
        isoc->busy = 1;

        /*
         * calculate number of isoc frames and sizes
         */

        for (num = 0 ; len > 0 ; num++) {
                size = MIN(sc->sc_scowr_size, len);

                isoc->size[num] = size;
                len -= size;
        }

        usbd_setup_isoc_xfer(isoc->xfer,
                             isoc,
                             isoc->size,
                             num,
                             USBD_FORCE_SHORT_XFER,
                             ubt_xmit_sco_complete);

        usbd_transfer(isoc->xfer);
}

static void
ubt_xmit_sco_complete(struct usbd_xfer *xfer,
                void * h, usbd_status status)
{
        struct ubt_isoc_xfer *isoc = h;
        struct ubt_softc *sc;
        int i;

        KASSERT(xfer == isoc->xfer);
        sc = isoc->softc;

        DPRINTFN(15, "isoc=%p, status=%s (%d)\n",
                isoc, usbd_errstr(status), status);

        isoc->busy = 0;

        for (i = 0 ; ; i++) {
                if (i == UBT_NXFERS) {
                        sc->sc_scowr_busy = 0;
                        break;
                }

                if (sc->sc_scowr[i].busy)
                        break;
        }

        if (--sc->sc_refcnt < 0) {
                usb_detach_wakeupold(sc->sc_dev);
                return;
        }

        if (sc->sc_dying)
                return;

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF("status=%s (%d)\n",
                        usbd_errstr(status), status);

                sc->sc_stats.err_tx++;

                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->sc_scowr_pipe);
                else
                        return;
        }

        ubt_xmit_sco_start(sc);
}

/*
 * load incoming data into an mbuf with
 * leading type byte
 */
static struct mbuf *
ubt_mbufload(uint8_t *buf, int count, uint8_t type)
{
        struct mbuf *m;

        MGETHDR(m, M_DONTWAIT, MT_DATA);
        if (m == NULL)
                return NULL;

        *mtod(m, uint8_t *) = type;
        m->m_pkthdr.len = m->m_len = MHLEN;
        m_copyback(m, 1, count, buf);        // (extends if necessary)
        if (m->m_pkthdr.len != MAX(MHLEN, count + 1)) {
                m_freem(m);
                return NULL;
        }

        m->m_pkthdr.len = count + 1;
        m->m_len = MIN(MHLEN, m->m_pkthdr.len);

        return m;
}

static void
ubt_recv_event(struct usbd_xfer *xfer, void * h, usbd_status status)
{
        struct ubt_softc *sc = h;
        struct mbuf *m;
        uint32_t count;
        void *buf;

        DPRINTFN(15, "sc=%p status=%s (%d)\n",
                    sc, usbd_errstr(status), status);

        if (status != USBD_NORMAL_COMPLETION || sc->sc_dying)
                return;

        usbd_get_xfer_status(xfer, NULL, &buf, &count, NULL);

        if (count < sizeof(hci_event_hdr_t) - 1) {
                DPRINTF("dumped undersized event (count = %d)\n", count);
                sc->sc_stats.err_rx++;
                return;
        }

        sc->sc_stats.evt_rx++;
        sc->sc_stats.byte_rx += count;

        m = ubt_mbufload(buf, count, HCI_EVENT_PKT);
        if (m == NULL || !hci_input_event(sc->sc_unit, m))
                sc->sc_stats.err_rx++;
}

static void
ubt_recv_acl_start(struct ubt_softc *sc)
{
        usbd_status status;

        DPRINTFN(15, "sc=%p\n", sc);

        if (sc->sc_aclrd_busy || sc->sc_dying) {
                DPRINTF("sc_aclrd_busy=%d, sc_dying=%d\n",
                        sc->sc_aclrd_busy,
                        sc->sc_dying);

                return;
        }

        sc->sc_refcnt++;
        sc->sc_aclrd_busy = 1;

        usbd_setup_xfer(sc->sc_aclrd_xfer,
                        sc,
                        sc->sc_aclrd_buf,
                        UBT_BUFSIZ_ACL,
                        USBD_SHORT_XFER_OK,
                        USBD_NO_TIMEOUT,
                        ubt_recv_acl_complete);

        status = usbd_transfer(sc->sc_aclrd_xfer);

        KASSERT(status != USBD_NORMAL_COMPLETION);

        if (status != USBD_IN_PROGRESS) {
                DPRINTF("usbd_transfer status=%s (%d)\n",
                        usbd_errstr(status), status);

                sc->sc_refcnt--;
                sc->sc_aclrd_busy = 0;
        }
}

static void
ubt_recv_acl_complete(struct usbd_xfer *xfer,
                void * h, usbd_status status)
{
        struct ubt_softc *sc = h;
        struct mbuf *m;
        uint32_t count;
        void *buf;

        DPRINTFN(15, "sc=%p status=%s (%d)\n",
                        sc, usbd_errstr(status), status);

        sc->sc_aclrd_busy = 0;

        if (--sc->sc_refcnt < 0) {
                DPRINTF("refcnt = %d\n", sc->sc_refcnt);
                usb_detach_wakeupold(sc->sc_dev);
                return;
        }

        if (sc->sc_dying) {
                DPRINTF("sc_dying\n");
                return;
        }

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF("status=%s (%d)\n",
                        usbd_errstr(status), status);

                sc->sc_stats.err_rx++;

                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(sc->sc_aclrd_pipe);
                else
                        return;
        } else {
                usbd_get_xfer_status(xfer, NULL, &buf, &count, NULL);

                if (count < sizeof(hci_acldata_hdr_t) - 1) {
                        DPRINTF("dumped undersized packet (%d)\n", count);
                        sc->sc_stats.err_rx++;
                } else {
                        sc->sc_stats.acl_rx++;
                        sc->sc_stats.byte_rx += count;

                        m = ubt_mbufload(buf, count, HCI_ACL_DATA_PKT);
                        if (m == NULL || !hci_input_acl(sc->sc_unit, m))
                                sc->sc_stats.err_rx++;
                }
        }

        /* and restart */
        ubt_recv_acl_start(sc);
}

static void
ubt_recv_sco_start1(struct ubt_softc *sc, struct ubt_isoc_xfer *isoc)
{
        int i;

        DPRINTFN(15, "sc=%p, isoc=%p\n", sc, isoc);

        if (isoc->busy || sc->sc_dying || sc->sc_scord_size == 0) {
                DPRINTF("%s%s%s\n",
                        isoc->busy ? " busy" : "",
                        sc->sc_dying ? " dying" : "",
                        sc->sc_scord_size == 0 ? " size=0" : "");

                return;
        }

        sc->sc_refcnt++;
        isoc->busy = 1;

        for (i = 0 ; i < UBT_NFRAMES ; i++)
                isoc->size[i] = sc->sc_scord_size;

        usbd_setup_isoc_xfer(isoc->xfer,
                             isoc,
                             isoc->size,
                             UBT_NFRAMES,
                             USBD_SHORT_XFER_OK,
                             ubt_recv_sco_complete);

        usbd_transfer(isoc->xfer);
}

static void
ubt_recv_sco_complete(struct usbd_xfer *xfer,
                void * h, usbd_status status)
{
        struct ubt_isoc_xfer *isoc = h;
        struct ubt_softc *sc;
        struct mbuf *m;
        uint32_t count;
        uint8_t *ptr, *frame;
        int i, size, got, want;

        KASSERT(isoc != NULL);
        KASSERT(isoc->xfer == xfer);

        sc = isoc->softc;
        isoc->busy = 0;

        if (--sc->sc_refcnt < 0) {
                DPRINTF("refcnt=%d\n", sc->sc_refcnt);
                usb_detach_wakeupold(sc->sc_dev);
                return;
        }

        if (sc->sc_dying) {
                DPRINTF("sc_dying\n");
                return;
        }

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF("status=%s (%d)\n",
                        usbd_errstr(status), status);

                sc->sc_stats.err_rx++;

                if (status == USBD_STALLED) {
                        usbd_clear_endpoint_stall_async(sc->sc_scord_pipe);
                        goto restart;
                }

                return;
        }

        usbd_get_xfer_status(xfer, NULL, NULL, &count, NULL);
        if (count == 0)
                goto restart;

        DPRINTFN(15, "sc=%p, isoc=%p, count=%u\n",
                        sc, isoc, count);

        sc->sc_stats.byte_rx += count;

        /*
         * Extract SCO packets from ISOC frames. The way we have it,
         * no SCO packet can be bigger than MHLEN. This is unlikely
         * to actually happen, but if we ran out of mbufs and lost
         * sync then we may get spurious data that makes it seem that
         * way, so we discard data that wont fit. This doesnt really
         * help with the lost sync situation alas.
         */

        m = sc->sc_scord_mbuf;
        if (m != NULL) {
                sc->sc_scord_mbuf = NULL;
                ptr = mtod(m, uint8_t *) + m->m_pkthdr.len;
                got = m->m_pkthdr.len;
                want = sizeof(hci_scodata_hdr_t);
                if (got >= want)
                        want += mtod(m, hci_scodata_hdr_t *)->length ;
        } else {
                ptr = NULL;
                got = 0;
                want = 0;
        }

        for (i = 0 ; i < UBT_NFRAMES ; i++) {
                frame = isoc->buf + (i * sc->sc_scord_size);

                while (isoc->size[i] > 0) {
                        size = isoc->size[i];

                        if (m == NULL) {
                                MGETHDR(m, M_DONTWAIT, MT_DATA);
                                if (m == NULL) {
                                        aprint_error_dev(sc->sc_dev,
                                            "out of memory (xfer halted)\n");

                                        sc->sc_stats.err_rx++;
                                        return;                /* lost sync */
                                }

                                ptr = mtod(m, uint8_t *);
                                *ptr++ = HCI_SCO_DATA_PKT;
                                got = 1;
                                want = sizeof(hci_scodata_hdr_t);
                        }

                        if (got + size > want)
                                size = want - got;

                        memcpy(ptr, frame, size);

                        ptr += size;
                        got += size;
                        frame += size;

                        if (got == want) {
                                /*
                                 * If we only got a header, add the packet
                                 * length to our want count. Send complete
                                 * packets up to protocol stack.
                                 */
                                if (want == sizeof(hci_scodata_hdr_t)) {
                                        uint32_t len =
                                            mtod(m, hci_scodata_hdr_t *)->length;
                                        want += len;
                                        if (len == 0 || want > MHLEN) {
                                                aprint_error_dev(sc->sc_dev,
                                                    "packet too large %u "
                                                    "(lost sync)\n", len);
                                                sc->sc_stats.err_rx++;
                                                return;
                                        }
                                }

                                if (got == want) {
                                        m->m_pkthdr.len = m->m_len = got;
                                        sc->sc_stats.sco_rx++;
                                        if (!hci_input_sco(sc->sc_unit, m))
                                                sc->sc_stats.err_rx++;

                                        m = NULL;
                                }
                        }

                        isoc->size[i] -= size;
                }
        }

        if (m != NULL) {
                m->m_pkthdr.len = m->m_len = got;
                sc->sc_scord_mbuf = m;
        }

restart: /* and restart */
        ubt_recv_sco_start1(sc, isoc);
}

void
ubt_stats(device_t self, struct bt_stats *dest, int flush)
{
        struct ubt_softc *sc = device_private(self);
        int s;

        s = splusb();
        memcpy(dest, &sc->sc_stats, sizeof(struct bt_stats));

        if (flush)
                memset(&sc->sc_stats, 0, sizeof(struct bt_stats));

        splx(s);
}










































































































































































































































































    5 















    2 







    2 



    2 







    5 


















    2 






    6 








    6 






    5 














    6 
    5 






    6 




    6 





















    6 











    4 



    1 



    5 


    6 







    2 







    2 

















    2 



    2 



    2 



    2 



    2 
    2 

























































































































































































































































































































































































































































































































































    1 







    1 



    1 


    1 






























    5 

    5 


    4 



    1 


























































    6 


    6 


    4 


    2 













   11 


















   11 

    3 



    1 





    8 








    8 





   10 
    8 




    7 






    6 





    6 


    2 













    3 







    6 




    5 




    1 



    1 








    1 

    1 





























































































































































































































































































    1 






    1 



    1 






















































































































































































































































































































































































































    6 

    6 


    9 










    9 













































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
/*        $NetBSD: vnd.c,v 1.286 2022/05/31 14:13:31 riastradh Exp $        */

/*-
 * Copyright (c) 1996, 1997, 1998, 2008, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1988 University of Utah.
 * Copyright (c) 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the Systems Programming Group of the University of Utah Computer
 * Science Department.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from: Utah $Hdr: vn.c 1.13 94/04/02$
 *
 *        @(#)vn.c        8.9 (Berkeley) 5/14/95
 */

/*
 * Vnode disk driver.
 *
 * Block/character interface to a vnode.  Allows one to treat a file
 * as a disk (e.g. build a filesystem in it, mount it, etc.).
 *
 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations,
 * this uses them to avoid distorting the local buffer cache.  If those
 * block-level operations are not available, this falls back to the regular
 * read and write calls.  Using these may distort the cache in some cases
 * but better have the driver working than preventing it to work on file
 * systems where the block-level operations are not implemented for
 * whatever reason.
 *
 * NOTE 2: There is a security issue involved with this driver.
 * Once mounted all access to the contents of the "mapped" file via
 * the special file is controlled by the permissions on the special
 * file, the protection of the mapped file is ignored (effectively,
 * by using root credentials in all transactions).
 *
 * NOTE 3: Doesn't interact with leases, should it?
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.286 2022/05/31 14:13:31 riastradh Exp $");

#if defined(_KERNEL_OPT)
#include "opt_vnd.h"
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/namei.h>
#include <sys/proc.h>
#include <sys/kthread.h>
#include <sys/errno.h>
#include <sys/buf.h>
#include <sys/bufq.h>
#include <sys/malloc.h>
#include <sys/ioctl.h>
#include <sys/disklabel.h>
#include <sys/device.h>
#include <sys/disk.h>
#include <sys/stat.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/fstrans.h>
#include <sys/file.h>
#include <sys/uio.h>
#include <sys/conf.h>
#include <sys/kauth.h>
#include <sys/module.h>
#include <sys/compat_stub.h>
#include <sys/atomic.h>

#include <net/zlib.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>

#include <dev/dkvar.h>
#include <dev/vndvar.h>

#include "ioconf.h"

#if defined(VNDDEBUG) && !defined(DEBUG)
#define DEBUG
#endif

#ifdef DEBUG
int dovndcluster = 1;
#define VDB_FOLLOW        0x01
#define VDB_INIT        0x02
#define VDB_IO                0x04
#define VDB_LABEL        0x08
int vnddebug = 0;
#endif

#define vndunit(x)        DISKUNIT(x)

struct vndxfer {
        struct buf vx_buf;
        struct vnd_softc *vx_vnd;
};
#define        VND_BUFTOXFER(bp)        ((struct vndxfer *)(void *)bp)

#define VND_GETXFER(vnd)        pool_get(&(vnd)->sc_vxpool, PR_WAITOK)
#define VND_PUTXFER(vnd, vx)        pool_put(&(vnd)->sc_vxpool, (vx))

#define VNDLABELDEV(dev) \
    (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART))

#define        VND_MAXPENDING(vnd)        ((vnd)->sc_maxactive * 4)
#define        VND_MAXPAGES(vnd)        (1024 * 1024 / PAGE_SIZE)


static void        vndclear(struct vnd_softc *, int);
static int        vnddoclear(struct vnd_softc *, int, int, bool);
static int        vndsetcred(struct vnd_softc *, kauth_cred_t);
static void        vndthrottle(struct vnd_softc *, struct vnode *);
static void        vndiodone(struct buf *);
#if 0
static void        vndshutdown(void);
#endif

static void        vndgetdefaultlabel(struct vnd_softc *, struct disklabel *);
static void        vndgetdisklabel(dev_t, struct vnd_softc *);

static int        vndlock(struct vnd_softc *);
static void        vndunlock(struct vnd_softc *);
#ifdef VND_COMPRESSION
static void        compstrategy(struct buf *, off_t);
static void        *vnd_alloc(void *, u_int, u_int);
static void        vnd_free(void *, void *);
#endif /* VND_COMPRESSION */

static void        vndthread(void *);
static bool        vnode_has_op(const struct vnode *, int);
static void        handle_with_rdwr(struct vnd_softc *, const struct buf *,
                    struct buf *);
static void        handle_with_strategy(struct vnd_softc *, const struct buf *,
                    struct buf *);
static void        vnd_set_geometry(struct vnd_softc *);

static dev_type_open(vndopen);
static dev_type_close(vndclose);
static dev_type_read(vndread);
static dev_type_write(vndwrite);
static dev_type_ioctl(vndioctl);
static dev_type_strategy(vndstrategy);
static dev_type_dump(vnddump);
static dev_type_size(vndsize);

const struct bdevsw vnd_bdevsw = {
        .d_open = vndopen,
        .d_close = vndclose,
        .d_strategy = vndstrategy,
        .d_ioctl = vndioctl,
        .d_dump = vnddump,
        .d_psize = vndsize,
        .d_discard = nodiscard,
        .d_flag = D_DISK
};

const struct cdevsw vnd_cdevsw = {
        .d_open = vndopen,
        .d_close = vndclose,
        .d_read = vndread,
        .d_write = vndwrite,
        .d_ioctl = vndioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = nopoll,
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
        .d_flag = D_DISK
};

static int        vnd_match(device_t, cfdata_t, void *);
static void        vnd_attach(device_t, device_t, void *);
static int        vnd_detach(device_t, int);

CFATTACH_DECL3_NEW(vnd, sizeof(struct vnd_softc),
    vnd_match, vnd_attach, vnd_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);

static struct vnd_softc        *vnd_spawn(int);
static int        vnd_destroy(device_t);

static const struct        dkdriver vnddkdriver = {
        .d_strategy = vndstrategy,
        .d_minphys = minphys
};

void
vndattach(int num)
{
        int error;

        error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
        if (error)
                aprint_error("%s: unable to register cfattach, error = %d\n",
                    vnd_cd.cd_name, error);
}

static int
vnd_match(device_t self, cfdata_t cfdata, void *aux)
{

        return 1;
}

static void
vnd_attach(device_t parent, device_t self, void *aux)
{
        struct vnd_softc *sc = device_private(self);

        sc->sc_dev = self;
        sc->sc_comp_offsets = NULL;
        sc->sc_comp_buff = NULL;
        sc->sc_comp_decombuf = NULL;
        bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK);
        disk_init(&sc->sc_dkdev, device_xname(self), &vnddkdriver);
        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");
}

static int
vnd_detach(device_t self, int flags)
{
        int error;
        struct vnd_softc *sc = device_private(self);

        if (sc->sc_flags & VNF_INITED) {
                error = vnddoclear(sc, 0, -1, (flags & DETACH_FORCE) != 0);
                if (error != 0)
                        return error;
        }

        pmf_device_deregister(self);
        bufq_free(sc->sc_tab);
        disk_destroy(&sc->sc_dkdev);

        return 0;
}

static struct vnd_softc *
vnd_spawn(int unit)
{
        cfdata_t cf;

        cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
        cf->cf_name = vnd_cd.cd_name;
        cf->cf_atname = vnd_cd.cd_name;
        cf->cf_unit = unit;
        cf->cf_fstate = FSTATE_STAR;

        return device_private(config_attach_pseudo(cf));
}

static int
vnd_destroy(device_t dev)
{
        int error;
        cfdata_t cf;

        cf = device_cfdata(dev);
        error = config_detach(dev, DETACH_QUIET);
        if (error)
                return error;
        free(cf, M_DEVBUF);
        return 0;
}

static int
vndopen(dev_t dev, int flags, int mode, struct lwp *l)
{
        int unit = vndunit(dev);
        struct vnd_softc *sc;
        int error = 0, part, pmask;
        struct disklabel *lp;

#ifdef DEBUG
        if (vnddebug & VDB_FOLLOW)
                printf("vndopen(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l);
#endif
        sc = device_lookup_private(&vnd_cd, unit);
        if (sc == NULL) {
                sc = vnd_spawn(unit);
                if (sc == NULL)
                        return ENOMEM;

                /* compatibility, keep disklabel after close */
                sc->sc_flags = VNF_KLABEL;
        }

        if ((error = vndlock(sc)) != 0)
                return error;

        mutex_enter(&sc->sc_dkdev.dk_openlock);

        if ((sc->sc_flags & VNF_CLEARING) != 0) {
                error = ENXIO;
                goto done;
        }

        lp = sc->sc_dkdev.dk_label;

        part = DISKPART(dev);
        pmask = (1 << part);

        if (sc->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
                error = EBUSY;
                goto done;
        }

        if ((flags & FWRITE) && (sc->sc_flags & VNF_READONLY)) {
                error = EROFS;
                goto done;
        }

        if (sc->sc_flags & VNF_INITED) {
                if ((sc->sc_dkdev.dk_openmask & ~(1<<RAW_PART)) != 0) {
                        /*
                         * If any non-raw partition is open, but the disk
                         * has been invalidated, disallow further opens.
                         */
                        if ((sc->sc_flags & VNF_VLABEL) == 0) {
                                error = EIO;
                                goto done;
                        }
                } else {
                        /*
                         * Load the partition info if not already loaded.
                         */
                        if ((sc->sc_flags & VNF_VLABEL) == 0) {
                                sc->sc_flags |= VNF_VLABEL;
                                vndgetdisklabel(dev, sc);
                        }
                }
        }

        /* Check that the partitions exists. */
        if (part != RAW_PART) {
                if (((sc->sc_flags & VNF_INITED) == 0) ||
                    ((part >= lp->d_npartitions) ||
                     (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
                        error = ENXIO;
                        goto done;
                }
        }

        /* Prevent our unit from being unconfigured while open. */
        switch (mode) {
        case S_IFCHR:
                sc->sc_dkdev.dk_copenmask |= pmask;
                break;

        case S_IFBLK:
                sc->sc_dkdev.dk_bopenmask |= pmask;
                break;
        }
        sc->sc_dkdev.dk_openmask =
            sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;

 done:
        mutex_exit(&sc->sc_dkdev.dk_openlock);
        vndunlock(sc);
        return error;
}

static int
vndclose(dev_t dev, int flags, int mode, struct lwp *l)
{
        int unit = vndunit(dev);
        struct vnd_softc *sc;
        int error = 0, part;

#ifdef DEBUG
        if (vnddebug & VDB_FOLLOW)
                printf("vndclose(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l);
#endif
        sc = device_lookup_private(&vnd_cd, unit);
        if (sc == NULL)
                return ENXIO;

        if ((error = vndlock(sc)) != 0)
                return error;

        mutex_enter(&sc->sc_dkdev.dk_openlock);

        part = DISKPART(dev);

        /* ...that much closer to allowing unconfiguration... */
        switch (mode) {
        case S_IFCHR:
                sc->sc_dkdev.dk_copenmask &= ~(1 << part);
                break;

        case S_IFBLK:
                sc->sc_dkdev.dk_bopenmask &= ~(1 << part);
                break;
        }
        sc->sc_dkdev.dk_openmask =
            sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;

        /* are we last opener ? */
        if (sc->sc_dkdev.dk_openmask == 0) {
                if ((sc->sc_flags & VNF_KLABEL) == 0)
                        sc->sc_flags &= ~VNF_VLABEL;
        }

        mutex_exit(&sc->sc_dkdev.dk_openlock);

        vndunlock(sc);

        if ((sc->sc_flags & VNF_INITED) == 0) {
                if ((error = vnd_destroy(sc->sc_dev)) != 0) {
                        aprint_error_dev(sc->sc_dev,
                            "unable to detach instance\n");
                        return error;
                }
        }

        return 0;
}

/*
 * Queue the request, and wakeup the kernel thread to handle it.
 */
static void
vndstrategy(struct buf *bp)
{
        int unit = vndunit(bp->b_dev);
        struct vnd_softc *vnd =
            device_lookup_private(&vnd_cd, unit);
        struct disklabel *lp;
        daddr_t blkno;
        int s = splbio();

        if (vnd == NULL) {
                bp->b_error = ENXIO;
                goto done;
        }
        lp = vnd->sc_dkdev.dk_label;

        if ((vnd->sc_flags & VNF_INITED) == 0) {
                bp->b_error = ENXIO;
                goto done;
        }

        /*
         * The transfer must be a whole number of blocks.
         */
        if ((bp->b_bcount % lp->d_secsize) != 0) {
                bp->b_error = EINVAL;
                goto done;
        }

        /*
         * check if we're read-only.
         */
        if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) {
                bp->b_error = EACCES;
                goto done;
        }

        /* If it's a nil transfer, wake up the top half now. */
        if (bp->b_bcount == 0) {
                goto done;
        }

        /*
         * Do bounds checking and adjust transfer.  If there's an error,
         * the bounds check will flag that for us.
         */
        if (DISKPART(bp->b_dev) == RAW_PART) {
                if (bounds_check_with_mediasize(bp, DEV_BSIZE,
                    vnd->sc_size) <= 0)
                        goto done;
        } else {
                if (bounds_check_with_label(&vnd->sc_dkdev,
                    bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0)
                        goto done;
        }

        /*
         * Put the block number in terms of the logical blocksize
         * of the "device".
         */

        blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE);

        /*
         * Translate the partition-relative block number to an absolute.
         */
        if (DISKPART(bp->b_dev) != RAW_PART) {
                struct partition *pp;

                pp = &vnd->sc_dkdev.dk_label->d_partitions[
                    DISKPART(bp->b_dev)];
                blkno += pp->p_offset;
        }
        bp->b_rawblkno = blkno;

#ifdef DEBUG
        if (vnddebug & VDB_FOLLOW)
                printf("vndstrategy(%p): unit %d\n", bp, unit);
#endif
        if ((vnd->sc_flags & VNF_USE_VN_RDWR)) {
                KASSERT(vnd->sc_pending >= 0 &&
                    vnd->sc_pending <= VND_MAXPENDING(vnd));
                while (vnd->sc_pending == VND_MAXPENDING(vnd))
                        tsleep(&vnd->sc_pending, PRIBIO, "vndpc", 0);
                vnd->sc_pending++;
        }
        bufq_put(vnd->sc_tab, bp);
        wakeup(&vnd->sc_tab);
        splx(s);
        return;

done:
        bp->b_resid = bp->b_bcount;
        biodone(bp);
        splx(s);
}

static bool
vnode_has_strategy(struct vnd_softc *vnd)
{
        return vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) &&
            vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy));
}

/* Verify that I/O requests cannot be smaller than the
 * smallest I/O size supported by the backend.
 */
static bool
vnode_has_large_blocks(struct vnd_softc *vnd)
{
        u_int32_t vnd_secsize, iosize;

        iosize = vnd->sc_iosize;
        vnd_secsize = vnd->sc_geom.vng_secsize;

        return vnd_secsize % iosize != 0;
}

/* XXX this function needs a reliable check to detect
 * sparse files. Otherwise, bmap/strategy may be used
 * and fail on non-allocated blocks. VOP_READ/VOP_WRITE
 * works on sparse files.
 */
#if notyet
static bool
vnode_strategy_probe(struct vnd_softc *vnd)
{
        int error;
        daddr_t nbn;

        if (!vnode_has_strategy(vnd))
                return false;

        if (vnode_has_large_blocks(vnd))
                return false;

        /* Convert the first logical block number to its
         * physical block number.
         */
        error = 0;
        vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_BMAP(vnd->sc_vp, 0, NULL, &nbn, NULL);
        VOP_UNLOCK(vnd->sc_vp);

        /* Test if that worked. */
        if (error == 0 && (long)nbn == -1)
                return false;

        return true;
}
#endif

static void
vndthread(void *arg)
{
        struct vnd_softc *vnd = arg;
        int s;

        /* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to
         * directly access the backing vnode.  If we can, use these two
         * operations to avoid messing with the local buffer cache.
         * Otherwise fall back to regular VOP_READ/VOP_WRITE operations
         * which are guaranteed to work with any file system. */
        if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 &&
            ! vnode_has_strategy(vnd))
                vnd->sc_flags |= VNF_USE_VN_RDWR;

        /* VOP_STRATEGY can only be used if the backing vnode allows
         * to access blocks as small as defined by the vnd geometry.
         */
        if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 &&
            vnode_has_large_blocks(vnd))
                vnd->sc_flags |= VNF_USE_VN_RDWR;

#ifdef DEBUG
        if (vnddebug & VDB_INIT)
                printf("vndthread: vp %p, %s\n", vnd->sc_vp,
                    (vnd->sc_flags & VNF_USE_VN_RDWR) == 0 ?
                    "using bmap/strategy operations" :
                    "using read/write operations");
#endif

        s = splbio();
        vnd->sc_flags |= VNF_KTHREAD;
        wakeup(&vnd->sc_kthread);

        /*
         * Dequeue requests and serve them depending on the available
         * vnode operations.
         */
        while ((vnd->sc_flags & VNF_VUNCONF) == 0) {
                struct vndxfer *vnx;
                struct buf *obp;
                struct buf *bp;

                obp = bufq_get(vnd->sc_tab);
                if (obp == NULL) {
                        tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0);
                        continue;
                };
                if ((vnd->sc_flags & VNF_USE_VN_RDWR)) {
                        KASSERT(vnd->sc_pending > 0 &&
                            vnd->sc_pending <= VND_MAXPENDING(vnd));
                        if (vnd->sc_pending-- == VND_MAXPENDING(vnd))
                                wakeup(&vnd->sc_pending);
                }
                splx(s);
#ifdef DEBUG
                if (vnddebug & VDB_FOLLOW)
                        printf("vndthread(%p)\n", obp);
#endif

                if (vnd->sc_vp->v_mount == NULL) {
                        obp->b_error = ENXIO;
                        goto done;
                }
#ifdef VND_COMPRESSION
                /* handle a compressed read */
                if ((obp->b_flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) {
                        off_t bn;

                        /* Convert to a byte offset within the file. */
                        bn = obp->b_rawblkno *
                            vnd->sc_dkdev.dk_label->d_secsize;

                        compstrategy(obp, bn);
                        goto done;
                }
#endif /* VND_COMPRESSION */

                /*
                 * Allocate a header for this transfer and link it to the
                 * buffer
                 */
                s = splbio();
                vnx = VND_GETXFER(vnd);
                splx(s);
                vnx->vx_vnd = vnd;

                s = splbio();
                while (vnd->sc_active >= vnd->sc_maxactive) {
                        tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0);
                }
                vnd->sc_active++;
                splx(s);

                /* Instrumentation. */
                disk_busy(&vnd->sc_dkdev);

                bp = &vnx->vx_buf;
                buf_init(bp);
                bp->b_flags = (obp->b_flags & (B_READ | B_PHYS | B_RAW));
                bp->b_oflags = obp->b_oflags;
                bp->b_cflags = obp->b_cflags;
                bp->b_iodone = vndiodone;
                bp->b_private = obp;
                bp->b_vp = vnd->sc_vp;
                bp->b_objlock = bp->b_vp->v_interlock;
                bp->b_data = obp->b_data;
                bp->b_bcount = obp->b_bcount;
                BIO_COPYPRIO(bp, obp);

                /* Make sure the request succeeds while suspending this fs. */
                fstrans_start_lazy(vnd->sc_vp->v_mount);

                /* Handle the request using the appropriate operations. */
                if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0)
                        handle_with_strategy(vnd, obp, bp);
                else
                        handle_with_rdwr(vnd, obp, bp);

                fstrans_done(vnd->sc_vp->v_mount);

                s = splbio();
                continue;

done:
                biodone(obp);
                s = splbio();
        }

        vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF);
        wakeup(&vnd->sc_kthread);
        splx(s);
        kthread_exit(0);
}

/*
 * Checks if the given vnode supports the requested operation.
 * The operation is specified the offset returned by VOFFSET.
 *
 * XXX The test below used to determine this is quite fragile
 * because it relies on the file system to use genfs to specify
 * unimplemented operations.  There might be another way to do
 * it more cleanly.
 */
static bool
vnode_has_op(const struct vnode *vp, int opoffset)
{
        int (*defaultp)(void *);
        int (*opp)(void *);

        defaultp = vp->v_op[VOFFSET(vop_default)];
        opp = vp->v_op[opoffset];

        return opp != defaultp && opp != genfs_eopnotsupp &&
            opp != genfs_badop && opp != genfs_nullop;
}

/*
 * Handles the read/write request given in 'bp' using the vnode's VOP_READ
 * and VOP_WRITE operations.
 *
 * 'obp' is a pointer to the original request fed to the vnd device.
 */
static void
handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp)
{
        bool doread;
        off_t offset;
        size_t len, resid;
        struct vnode *vp;
        int npages;

        doread = bp->b_flags & B_READ;
        offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize;
        len = bp->b_bcount;
        vp = vnd->sc_vp;

#if defined(DEBUG)
        if (vnddebug & VDB_IO)
                printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64
                    ", secsize %d, offset %" PRIu64
                    ", bcount %d\n",
                    vp, doread ? "read" : "write", obp->b_rawblkno,
                    vnd->sc_dkdev.dk_label->d_secsize, offset,
                    bp->b_bcount);
#endif

        /* Issue the read or write operation. */
        bp->b_error =
            vn_rdwr(doread ? UIO_READ : UIO_WRITE,
            vp, bp->b_data, len, offset, UIO_SYSSPACE,
            IO_ADV_ENCODE(POSIX_FADV_NOREUSE) | IO_DIRECT,
                vnd->sc_cred, &resid, NULL);
        bp->b_resid = resid;

        /*
         * Avoid caching too many pages, the vnd user
         * is usually a filesystem and caches itself.
         * We need some amount of caching to not hinder
         * read-ahead and write-behind operations.
         */
        npages = atomic_load_relaxed(&vp->v_uobj.uo_npages);
        if (npages > VND_MAXPAGES(vnd)) {
                rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
                (void) VOP_PUTPAGES(vp, 0, 0,
                    PGO_ALLPAGES | PGO_CLEANIT | PGO_FREE);
        }

        /* We need to increase the number of outputs on the vnode if
         * there was any write to it. */
        if (!doread) {
                mutex_enter(vp->v_interlock);
                vp->v_numoutput++;
                mutex_exit(vp->v_interlock);
        }

        biodone(bp);
}

/*
 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP
 * and VOP_STRATEGY operations.
 *
 * 'obp' is a pointer to the original request fed to the vnd device.
 */
static void
handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp,
    struct buf *bp)
{
        int bsize, error, flags, skipped;
        size_t resid, sz;
        off_t bn, offset;
        struct vnode *vp;
        struct buf *nbp = NULL;

        flags = obp->b_flags;


        /* convert to a byte offset within the file. */
        bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize;

        bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
        skipped = 0;

        /*
         * Break the request into bsize pieces and feed them
         * sequentially using VOP_BMAP/VOP_STRATEGY.
         * We do it this way to keep from flooding NFS servers if we
         * are connected to an NFS file.  This places the burden on
         * the client rather than the server.
         */
        error = 0;
        bp->b_resid = bp->b_bcount;
        for (offset = 0, resid = bp->b_resid; /* true */;
            resid -= sz, offset += sz) {
                daddr_t nbn;
                int off, nra;

                nra = 0;
                vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
                error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
                VOP_UNLOCK(vnd->sc_vp);

                if (error == 0 && (long)nbn == -1)
                        error = EIO;

                /*
                 * If there was an error or a hole in the file...punt.
                 * Note that we may have to wait for any operations
                 * that we have already fired off before releasing
                 * the buffer.
                 *
                 * XXX we could deal with holes here but it would be
                 * a hassle (in the write case).
                 */
                if (error) {
                        skipped += resid;
                        break;
                }

#ifdef DEBUG
                if (!dovndcluster)
                        nra = 0;
#endif

                off = bn % bsize;
                sz = MIN(((off_t)1 + nra) * bsize - off, resid);
#ifdef        DEBUG
                if (vnddebug & VDB_IO)
                        printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64
                            " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn,
                            nbn, sz);
#endif

                nbp = getiobuf(vp, true);
                nestiobuf_setup(bp, nbp, offset, sz);
                nbp->b_blkno = nbn + btodb(off);

#if 0 /* XXX #ifdef DEBUG */
                if (vnddebug & VDB_IO)
                        printf("vndstart(%ld): bp %p vp %p blkno "
                            "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n",
                            (long) (vnd-vnd_softc), &nbp->vb_buf,
                            nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno,
                            nbp->vb_buf.b_flags, nbp->vb_buf.b_data,
                            nbp->vb_buf.b_bcount);
#endif
                if (resid == sz) {
                        break;
                }
                VOP_STRATEGY(vp, nbp);
                bn += sz;
        }
        if (!(flags & B_READ)) {
                struct vnode *w_vp;
                /*
                 * this is the last nested buf, account for
                 * the parent buf write too.
                 * This has to be done last, so that
                 * fsync won't wait for this write which
                 * has no chance to complete before all nested bufs
                 * have been queued. But it has to be done
                 * before the last VOP_STRATEGY()
                 * or the call to nestiobuf_done().
                 */
                w_vp = bp->b_vp;
                mutex_enter(w_vp->v_interlock);
                w_vp->v_numoutput++;
                mutex_exit(w_vp->v_interlock);
        }
        KASSERT(skipped != 0 || nbp != NULL);
        if (skipped)
                nestiobuf_done(bp, skipped, error);
        else
                VOP_STRATEGY(vp, nbp);
}

static void
vndiodone(struct buf *bp)
{
        struct vndxfer *vnx = VND_BUFTOXFER(bp);
        struct vnd_softc *vnd = vnx->vx_vnd;
        struct buf *obp = bp->b_private;
        int s = splbio();

        KERNEL_LOCK(1, NULL);                /* XXXSMP */
        KASSERT(&vnx->vx_buf == bp);
        KASSERT(vnd->sc_active > 0);
#ifdef DEBUG
        if (vnddebug & VDB_IO) {
                printf("vndiodone1: bp %p iodone: error %d\n",
                    bp, bp->b_error);
        }
#endif
        disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid,
            (bp->b_flags & B_READ));
        vnd->sc_active--;
        if (vnd->sc_active == 0) {
                wakeup(&vnd->sc_tab);
        }
        KERNEL_UNLOCK_ONE(NULL);        /* XXXSMP */
        splx(s);
        obp->b_error = bp->b_error;
        obp->b_resid = bp->b_resid;
        buf_destroy(bp);
        VND_PUTXFER(vnd, vnx);
        biodone(obp);
}

/* ARGSUSED */
static int
vndread(dev_t dev, struct uio *uio, int flags)
{
        int unit = vndunit(dev);
        struct vnd_softc *sc;

#ifdef DEBUG
        if (vnddebug & VDB_FOLLOW)
                printf("vndread(0x%"PRIx64", %p)\n", dev, uio);
#endif

        sc = device_lookup_private(&vnd_cd, unit);
        if (sc == NULL)
                return ENXIO;

        if ((sc->sc_flags & VNF_INITED) == 0)
                return ENXIO;

        return physio(vndstrategy, NULL, dev, B_READ, minphys, uio);
}

/* ARGSUSED */
static int
vndwrite(dev_t dev, struct uio *uio, int flags)
{
        int unit = vndunit(dev);
        struct vnd_softc *sc;

#ifdef DEBUG
        if (vnddebug & VDB_FOLLOW)
                printf("vndwrite(0x%"PRIx64", %p)\n", dev, uio);
#endif

        sc = device_lookup_private(&vnd_cd, unit);
        if (sc == NULL)
                return ENXIO;

        if ((sc->sc_flags & VNF_INITED) == 0)
                return ENXIO;

        return physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio);
}

static int
vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va)
{
        int error;
        struct vnd_softc *vnd;

        if (*un == -1)
                *un = unit;
        if (*un < 0)
                return EINVAL;

        vnd = device_lookup_private(&vnd_cd, *un);
        if (vnd == NULL)
                return -1;

        if ((vnd->sc_flags & VNF_INITED) == 0)
                return -1;

        vn_lock(vnd->sc_vp, LK_SHARED | LK_RETRY);
        error = VOP_GETATTR(vnd->sc_vp, va, l->l_cred);
        VOP_UNLOCK(vnd->sc_vp);
        return error;
}

static int
vnddoclear(struct vnd_softc *vnd, int pmask, int minor, bool force)
{
        int error;

        if ((error = vndlock(vnd)) != 0)
                return error;

        /*
         * Don't unconfigure if any other partitions are open
         * or if both the character and block flavors of this
         * partition are open.
         */
        if (DK_BUSY(vnd, pmask) && !force) {
                vndunlock(vnd);
                return EBUSY;
        }

        /* Delete all of our wedges */
        dkwedge_delall(&vnd->sc_dkdev);

        /*
         * XXX vndclear() might call vndclose() implicitly;
         * release lock to avoid recursion
         *
         * Set VNF_CLEARING to prevent vndopen() from
         * sneaking in after we vndunlock().
         */
        vnd->sc_flags |= VNF_CLEARING;
        vndunlock(vnd);
        vndclear(vnd, minor);
#ifdef DEBUG
        if (vnddebug & VDB_INIT)
                printf("%s: CLRed\n", __func__);
#endif

        /* Destroy the xfer and buffer pools. */
        pool_destroy(&vnd->sc_vxpool);

        /* Detach the disk. */
        disk_detach(&vnd->sc_dkdev);

        return 0;
}

static int
vndioctl_get(struct lwp *l, void *data, int unit, struct vattr *va)
{
        int error;

        KASSERT(l);

        /* the first member is always int vnd_unit in all the versions */
        if (*(int *)data >= vnd_cd.cd_ndevs)
                return ENXIO;

        switch (error = vnd_cget(l, unit, (int *)data, va)) {
        case -1:
                /* unused is not an error */
                memset(va, 0, sizeof(*va));
                /*FALLTHROUGH*/
        case 0:
                return 0;
        default:
                return error;
        }
}

/* ARGSUSED */
static int
vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        bool force;
        int unit = vndunit(dev);
        struct vnd_softc *vnd;
        struct vnd_ioctl *vio;
        struct vattr vattr;
        struct pathbuf *pb;
        struct vnode *vp;
        int error, part, pmask;
        uint64_t geomsize;
        int fflags;
#ifdef __HAVE_OLD_DISKLABEL
        struct disklabel newlabel;
#endif

#ifdef DEBUG
        if (vnddebug & VDB_FOLLOW)
                printf("vndioctl(0x%"PRIx64", 0x%lx, %p, 0x%x, %p): unit %d\n",
                    dev, cmd, data, flag, l->l_proc, unit);
#endif
        /* Do the get's first; they don't need initialization or verification */
        switch (cmd) {
        case VNDIOCGET:
                if ((error = vndioctl_get(l, data, unit, &vattr)) != 0)
                        return error;

                struct vnd_user *vnu = data;
                vnu->vnu_dev = vattr.va_fsid;
                vnu->vnu_ino = vattr.va_fileid;
                return 0;

        default:
                /* First check for COMPAT_50 hook */
                MODULE_HOOK_CALL(compat_vndioctl_50_hook,
                    (cmd, l, data, unit, &vattr, vndioctl_get),
                    enosys(), error);

                /*
                 * If not present, then COMPAT_30 hook also not
                 * present, so just continue with checks for the
                 * "write" commands
                 */
                if (error == ENOSYS) {
                        error = 0;
                        break;
                }

                /* If not already handled, try the COMPAT_30 hook */
                if (error == EPASSTHROUGH)
                        MODULE_HOOK_CALL(compat_vndioctl_30_hook,
                            (cmd, l, data, unit, &vattr, vndioctl_get),
                            enosys(), error);

                /* If no COMPAT_30 module, or not handled, check writes */
                if (error == ENOSYS || error == EPASSTHROUGH) {
                        error = 0;
                        break;
                }
                return error;
        }

        vnd = device_lookup_private(&vnd_cd, unit);
        if (vnd == NULL)
                return ENXIO;
        vio = (struct vnd_ioctl *)data;

        /* Must be open for writes for these commands... */
        switch (cmd) {
        case VNDIOCSET50:
        case VNDIOCCLR50:
                if (!compat_vndioctl_50_hook.hooked)
                        return EINVAL;
                /* FALLTHROUGH */
        case VNDIOCSET:
        case VNDIOCCLR:
        case DIOCSDINFO:
        case DIOCWDINFO:
#ifdef __HAVE_OLD_DISKLABEL
        case ODIOCSDINFO:
        case ODIOCWDINFO:
#endif
        case DIOCKLABEL:
        case DIOCWLABEL:
        case DIOCCACHESYNC:
                if ((flag & FWRITE) == 0)
                        return EBADF;
        }

        switch (cmd) {
        case VNDIOCSET50:
        case VNDIOCSET:
                /* Must not be initialized */
                if (vnd->sc_flags & VNF_INITED)
                        return EBUSY;
                break;
        default:
                /* Must be initialized */
                if ((vnd->sc_flags & VNF_INITED) == 0)
                        return ENXIO;
                break;
        }

        error = disk_ioctl(&vnd->sc_dkdev, dev, cmd, data, flag, l);
        if (error != EPASSTHROUGH)
                return error;

        switch (cmd) {
        case VNDIOCSET50:
        case VNDIOCSET:
                if ((error = vndlock(vnd)) != 0)
                        return error;

                fflags = FREAD;
                if ((vio->vnd_flags & VNDIOF_READONLY) == 0)
                        fflags |= FWRITE;
                if ((vio->vnd_flags & VNDIOF_FILEIO) != 0)
                        vnd->sc_flags |= VNF_USE_VN_RDWR;
                error = pathbuf_copyin(vio->vnd_file, &pb);
                if (error) {
                        goto unlock_and_exit;
                }
                error = vn_open(NULL, pb, 0, fflags, 0, &vp, NULL, NULL);
                if (error != 0) {
                        pathbuf_destroy(pb);
                        goto unlock_and_exit;
                }
                KASSERT(l);
                error = VOP_GETATTR(vp, &vattr, l->l_cred);
                if (!error && vp->v_type != VREG)
                        error = EOPNOTSUPP;
                if (!error && vattr.va_bytes < vattr.va_size)
                        /* File is definitely sparse, use vn_rdwr() */
                        vnd->sc_flags |= VNF_USE_VN_RDWR;
                if (error) {
                        VOP_UNLOCK(vp);
                        goto close_and_exit;
                }

                /* If using a compressed file, initialize its info */
                /* (or abort with an error if kernel has no compression) */
                if (vio->vnd_flags & VNDIOF_COMP) {
#ifdef VND_COMPRESSION
                        struct vnd_comp_header *ch;
                        int i;
                        uint32_t comp_size;
                        uint32_t comp_maxsize;

                        /* allocate space for compressed file header */
                        ch = malloc(sizeof(struct vnd_comp_header),
                            M_TEMP, M_WAITOK);

                        /* read compressed file header */
                        error = vn_rdwr(UIO_READ, vp, (void *)ch,
                            sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE,
                            IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL);
                        if (error) {
                                free(ch, M_TEMP);
                                VOP_UNLOCK(vp);
                                goto close_and_exit;
                        }

                        if (be32toh(ch->block_size) == 0 ||
                            be32toh(ch->num_blocks) > UINT32_MAX - 1) {
                                free(ch, M_TEMP);
                                VOP_UNLOCK(vp);
                                goto close_and_exit;
                        }

                        /* save some header info */
                        vnd->sc_comp_blksz = be32toh(ch->block_size);
                        /* note last offset is the file byte size */
                        vnd->sc_comp_numoffs = be32toh(ch->num_blocks) + 1;
                        free(ch, M_TEMP);
                        if (!DK_DEV_BSIZE_OK(vnd->sc_comp_blksz)) {
                                VOP_UNLOCK(vp);
                                error = EINVAL;
                                goto close_and_exit;
                        }
                        KASSERT(0 < vnd->sc_comp_blksz);
                        KASSERT(0 < vnd->sc_comp_numoffs);
                        /*
                         * @#^@!$& gcc -Wtype-limits refuses to let me
                         * write SIZE_MAX/sizeof(uint64_t) < numoffs,
                         * because the range of the type on amd64 makes
                         * the comparisons always false.
                         */
#if SIZE_MAX <= UINT32_MAX*(64/CHAR_BIT)
                        if (SIZE_MAX/sizeof(uint64_t) < vnd->sc_comp_numoffs) {
                                VOP_UNLOCK(vp);
                                error = EINVAL;
                                goto close_and_exit;
                        }
#endif
                        if ((vattr.va_size < sizeof(struct vnd_comp_header)) ||
                            (vattr.va_size - sizeof(struct vnd_comp_header) <
                                sizeof(uint64_t)*vnd->sc_comp_numoffs) ||
                            (UQUAD_MAX/vnd->sc_comp_blksz <
                                vnd->sc_comp_numoffs - 1)) {
                                VOP_UNLOCK(vp);
                                error = EINVAL;
                                goto close_and_exit;
                        }

                        /* set decompressed file size */
                        KASSERT(vnd->sc_comp_numoffs - 1 <=
                            UQUAD_MAX/vnd->sc_comp_blksz);
                        vattr.va_size =
                            ((u_quad_t)vnd->sc_comp_numoffs - 1) *
                             (u_quad_t)vnd->sc_comp_blksz;

                        /* allocate space for all the compressed offsets */
                        __CTASSERT(UINT32_MAX <= UQUAD_MAX/sizeof(uint64_t));
                        vnd->sc_comp_offsets =
                            malloc(sizeof(uint64_t) * vnd->sc_comp_numoffs,
                                M_DEVBUF, M_WAITOK);

                        /* read in the offsets */
                        error = vn_rdwr(UIO_READ, vp,
                            (void *)vnd->sc_comp_offsets,
                            sizeof(uint64_t) * vnd->sc_comp_numoffs,
                            sizeof(struct vnd_comp_header), UIO_SYSSPACE,
                          IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL);
                        if (error) {
                                VOP_UNLOCK(vp);
                                goto close_and_exit;
                        }
                        /*
                         * find largest block size (used for allocation limit).
                         * Also convert offset to native byte order.
                         */
                        comp_maxsize = 0;
                        for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) {
                                vnd->sc_comp_offsets[i] =
                                    be64toh(vnd->sc_comp_offsets[i]);
                                comp_size =
                                    be64toh(vnd->sc_comp_offsets[i + 1])
                                    - vnd->sc_comp_offsets[i];
                                if (comp_size > comp_maxsize)
                                        comp_maxsize = comp_size;
                        }
                        vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] =
                            be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs
                                    - 1]);

                        /* create compressed data buffer */
                        vnd->sc_comp_buff = malloc(comp_maxsize,
                            M_DEVBUF, M_WAITOK);

                        /* create decompressed buffer */
                        vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz,
                            M_DEVBUF, M_WAITOK);
                        vnd->sc_comp_buffblk = -1;

                        /* Initialize decompress stream */
                        memset(&vnd->sc_comp_stream, 0, sizeof(z_stream));
                        vnd->sc_comp_stream.zalloc = vnd_alloc;
                        vnd->sc_comp_stream.zfree = vnd_free;
                        error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS);
                        if (error) {
                                if (vnd->sc_comp_stream.msg)
                                        printf("vnd%d: compressed file, %s\n",
                                            unit, vnd->sc_comp_stream.msg);
                                VOP_UNLOCK(vp);
                                error = EINVAL;
                                goto close_and_exit;
                        }

                        vnd->sc_flags |= VNF_COMP | VNF_READONLY;
#else /* !VND_COMPRESSION */
                        VOP_UNLOCK(vp);
                        error = EOPNOTSUPP;
                        goto close_and_exit;
#endif /* VND_COMPRESSION */
                }

                VOP_UNLOCK(vp);
                vnd->sc_vp = vp;
                vnd->sc_size = btodb(vattr.va_size);        /* note truncation */

                /* get smallest I/O size for underlying device, fall back to
                 * fundamental I/O size of underlying filesystem
                 */
                error = bdev_ioctl(vattr.va_fsid, DIOCGSECTORSIZE, &vnd->sc_iosize, FKIOCTL, l);
                if (error)
                        vnd->sc_iosize = vnd->sc_vp->v_mount->mnt_stat.f_frsize;

                /* Default I/O size to DEV_BSIZE */
                if (vnd->sc_iosize == 0)
                        vnd->sc_iosize = DEV_BSIZE;

                /*
                 * Use pseudo-geometry specified.  If none was provided,
                 * use "standard" Adaptec fictitious geometry.
                 */
                if (vio->vnd_flags & VNDIOF_HASGEOM) {

                        memcpy(&vnd->sc_geom, &vio->vnd_geom,
                            sizeof(vio->vnd_geom));

                        /*
                         * Sanity-check the sector size.
                         */
                        if (!DK_DEV_BSIZE_OK(vnd->sc_geom.vng_secsize) ||
                            vnd->sc_geom.vng_ntracks == 0 ||
                            vnd->sc_geom.vng_nsectors == 0) {
                                error = EINVAL;
                                goto close_and_exit;
                        }

                        /*
                         * Compute missing cylinder count from size
                         */
                        if (vnd->sc_geom.vng_ncylinders == 0)
                                vnd->sc_geom.vng_ncylinders = vnd->sc_size / (
                                        (vnd->sc_geom.vng_secsize / DEV_BSIZE) *
                                        vnd->sc_geom.vng_ntracks *
                                        vnd->sc_geom.vng_nsectors);

                        /*
                         * Compute the size (in DEV_BSIZE blocks) specified
                         * by the geometry.
                         */
                        geomsize = (int64_t)vnd->sc_geom.vng_nsectors *
                            vnd->sc_geom.vng_ntracks *
                            vnd->sc_geom.vng_ncylinders *
                            (vnd->sc_geom.vng_secsize / DEV_BSIZE);

                        /*
                         * Sanity-check the size against the specified
                         * geometry.
                         */
                        if (vnd->sc_size < geomsize) {
                                error = EINVAL;
                                goto close_and_exit;
                        }
                } else if (vnd->sc_size >= (32 * 64)) {
                        /*
                         * Size must be at least 2048 DEV_BSIZE blocks
                         * (1M) in order to use this geometry.
                         */
                        vnd->sc_geom.vng_secsize = DEV_BSIZE;
                        vnd->sc_geom.vng_nsectors = 32;
                        vnd->sc_geom.vng_ntracks = 64;
                        vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32);
                } else {
                        vnd->sc_geom.vng_secsize = DEV_BSIZE;
                        vnd->sc_geom.vng_nsectors = 1;
                        vnd->sc_geom.vng_ntracks = 1;
                        vnd->sc_geom.vng_ncylinders = vnd->sc_size;
                }

                vnd_set_geometry(vnd);

                if (vio->vnd_flags & VNDIOF_READONLY) {
                        vnd->sc_flags |= VNF_READONLY;
                }

                if ((error = vndsetcred(vnd, l->l_cred)) != 0)
                        goto close_and_exit;

                vndthrottle(vnd, vnd->sc_vp);
                vio->vnd_osize = dbtob(vnd->sc_size);
                if (cmd != VNDIOCSET50)
                        vio->vnd_size = dbtob(vnd->sc_size);
                vnd->sc_flags |= VNF_INITED;

                /* create the kernel thread, wait for it to be up */
                error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd,
                    &vnd->sc_kthread, "%s", device_xname(vnd->sc_dev));
                if (error)
                        goto close_and_exit;
                while ((vnd->sc_flags & VNF_KTHREAD) == 0) {
                        tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0);
                }
#ifdef DEBUG
                if (vnddebug & VDB_INIT)
                        printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n",
                            vnd->sc_vp, (unsigned long) vnd->sc_size,
                            vnd->sc_geom.vng_secsize,
                            vnd->sc_geom.vng_nsectors,
                            vnd->sc_geom.vng_ntracks,
                            vnd->sc_geom.vng_ncylinders);
#endif

                /* Attach the disk. */
                disk_attach(&vnd->sc_dkdev);

                /* Initialize the xfer and buffer pools. */
                pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0,
                    0, 0, "vndxpl", NULL, IPL_BIO);

                vndunlock(vnd);

                pathbuf_destroy(pb);

                /* Discover wedges on this disk */
                dkwedge_discover(&vnd->sc_dkdev);

                break;

close_and_exit:
                (void) vn_close(vp, fflags, l->l_cred);
                pathbuf_destroy(pb);
unlock_and_exit:
#ifdef VND_COMPRESSION
                /* free any allocated memory (for compressed file) */
                if (vnd->sc_comp_offsets) {
                        free(vnd->sc_comp_offsets, M_DEVBUF);
                        vnd->sc_comp_offsets = NULL;
                }
                if (vnd->sc_comp_buff) {
                        free(vnd->sc_comp_buff, M_DEVBUF);
                        vnd->sc_comp_buff = NULL;
                }
                if (vnd->sc_comp_decombuf) {
                        free(vnd->sc_comp_decombuf, M_DEVBUF);
                        vnd->sc_comp_decombuf = NULL;
                }
#endif /* VND_COMPRESSION */
                vndunlock(vnd);
                return error;

        case VNDIOCCLR50:
        case VNDIOCCLR:
                part = DISKPART(dev);
                pmask = (1 << part);
                force = (vio->vnd_flags & VNDIOF_FORCE) != 0;

                if ((error = vnddoclear(vnd, pmask, minor(dev), force)) != 0)
                        return error;

                break;


        case DIOCWDINFO:
        case DIOCSDINFO:
#ifdef __HAVE_OLD_DISKLABEL
        case ODIOCWDINFO:
        case ODIOCSDINFO:
#endif
        {
                struct disklabel *lp;

                if ((error = vndlock(vnd)) != 0)
                        return error;

                vnd->sc_flags |= VNF_LABELLING;

#ifdef __HAVE_OLD_DISKLABEL
                if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
                        memset(&newlabel, 0, sizeof newlabel);
                        memcpy(&newlabel, data, sizeof (struct olddisklabel));
                        lp = &newlabel;
                } else
#endif
                lp = (struct disklabel *)data;

                error = setdisklabel(vnd->sc_dkdev.dk_label,
                    lp, 0, vnd->sc_dkdev.dk_cpulabel);
                if (error == 0) {
                        if (cmd == DIOCWDINFO
#ifdef __HAVE_OLD_DISKLABEL
                            || cmd == ODIOCWDINFO
#endif
                           )
                                error = writedisklabel(VNDLABELDEV(dev),
                                    vndstrategy, vnd->sc_dkdev.dk_label,
                                    vnd->sc_dkdev.dk_cpulabel);
                }

                vnd->sc_flags &= ~VNF_LABELLING;

                vndunlock(vnd);

                if (error)
                        return error;
                break;
        }

        case DIOCKLABEL:
                if (*(int *)data != 0)
                        vnd->sc_flags |= VNF_KLABEL;
                else
                        vnd->sc_flags &= ~VNF_KLABEL;
                break;

        case DIOCWLABEL:
                if (*(int *)data != 0)
                        vnd->sc_flags |= VNF_WLABEL;
                else
                        vnd->sc_flags &= ~VNF_WLABEL;
                break;

        case DIOCGDEFLABEL:
                vndgetdefaultlabel(vnd, (struct disklabel *)data);
                break;

#ifdef __HAVE_OLD_DISKLABEL
        case ODIOCGDEFLABEL:
                vndgetdefaultlabel(vnd, &newlabel);
                if (newlabel.d_npartitions > OLDMAXPARTITIONS)
                        return ENOTTY;
                memcpy(data, &newlabel, sizeof (struct olddisklabel));
                break;
#endif

        case DIOCGSTRATEGY:
            {
                struct disk_strategy *dks = (void *)data;

                /* No lock needed, never changed */
                strlcpy(dks->dks_name,
                    bufq_getstrategyname(vnd->sc_tab),
                    sizeof(dks->dks_name));
                dks->dks_paramlen = 0;
                break;
            }
        case DIOCGCACHE:
            {
                int *bits = (int *)data;
                *bits |= DKCACHE_READ | DKCACHE_WRITE;
                break;
            }
        case DIOCCACHESYNC:
                vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
                error = VOP_FSYNC(vnd->sc_vp, vnd->sc_cred,
                    FSYNC_WAIT | FSYNC_DATAONLY | FSYNC_CACHE, 0, 0);
                VOP_UNLOCK(vnd->sc_vp);
                return error;

        default:
                return ENOTTY;
        }

        return 0;
}

/*
 * Duplicate the current processes' credentials.  Since we are called only
 * as the result of a SET ioctl and only root can do that, any future access
 * to this "disk" is essentially as root.  Note that credentials may change
 * if some other uid can write directly to the mapped file (NFS).
 */
static int
vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred)
{
        struct uio auio;
        struct iovec aiov;
        char *tmpbuf;
        int error;

        vnd->sc_cred = kauth_cred_dup(cred);
        tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);

        /* XXX: Horrible kludge to establish credentials for NFS */
        aiov.iov_base = tmpbuf;
        aiov.iov_len = uimin(DEV_BSIZE, dbtob(vnd->sc_size));
        auio.uio_iov = &aiov;
        auio.uio_iovcnt = 1;
        auio.uio_offset = 0;
        auio.uio_rw = UIO_READ;
        auio.uio_resid = aiov.iov_len;
        UIO_SETUP_SYSSPACE(&auio);
        vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
        error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred);
        if (error == 0) {
                /*
                 * Because vnd does all IO directly through the vnode
                 * we need to flush (at least) the buffer from the above
                 * VOP_READ from the buffer cache to prevent cache
                 * incoherencies.  Also, be careful to write dirty
                 * buffers back to stable storage.
                 */
                error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred,
                            curlwp, 0, 0);
        }
        VOP_UNLOCK(vnd->sc_vp);

        free(tmpbuf, M_TEMP);
        return error;
}

/*
 * Set maxactive based on FS type
 */
static void
vndthrottle(struct vnd_softc *vnd, struct vnode *vp)
{

        if (vp->v_tag == VT_NFS)
                vnd->sc_maxactive = 2;
        else
                vnd->sc_maxactive = 8;

        if (vnd->sc_maxactive < 1)
                vnd->sc_maxactive = 1;
}

#if 0
static void
vndshutdown(void)
{
        struct vnd_softc *vnd;

        for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++)
                if (vnd->sc_flags & VNF_INITED)
                        vndclear(vnd);
}
#endif

static void
vndclear(struct vnd_softc *vnd, int myminor)
{
        struct vnode *vp = vnd->sc_vp;
        int fflags = FREAD;
        int bmaj, cmaj, i, mn;
        int s;

#ifdef DEBUG
        if (vnddebug & VDB_FOLLOW)
                printf("vndclear(%p): vp %p\n", vnd, vp);
#endif
        /* locate the major number */
        bmaj = bdevsw_lookup_major(&vnd_bdevsw);
        cmaj = cdevsw_lookup_major(&vnd_cdevsw);

        /* Nuke the vnodes for any open instances */
        for (i = 0; i < MAXPARTITIONS; i++) {
                mn = DISKMINOR(device_unit(vnd->sc_dev), i);
                if (mn != myminor) { /* XXX avoid to kill own vnode */
                        vdevgone(bmaj, mn, mn, VBLK);
                        vdevgone(cmaj, mn, mn, VCHR);
                }
        }

        if ((vnd->sc_flags & VNF_READONLY) == 0)
                fflags |= FWRITE;

        s = splbio();
        bufq_drain(vnd->sc_tab);
        splx(s);

        vnd->sc_flags |= VNF_VUNCONF;
        wakeup(&vnd->sc_tab);
        while (vnd->sc_flags & VNF_KTHREAD)
                tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0);

#ifdef VND_COMPRESSION
        /* free the compressed file buffers */
        if (vnd->sc_flags & VNF_COMP) {
                if (vnd->sc_comp_offsets) {
                        free(vnd->sc_comp_offsets, M_DEVBUF);
                        vnd->sc_comp_offsets = NULL;
                }
                if (vnd->sc_comp_buff) {
                        free(vnd->sc_comp_buff, M_DEVBUF);
                        vnd->sc_comp_buff = NULL;
                }
                if (vnd->sc_comp_decombuf) {
                        free(vnd->sc_comp_decombuf, M_DEVBUF);
                        vnd->sc_comp_decombuf = NULL;
                }
        }
#endif /* VND_COMPRESSION */
        vnd->sc_flags &=
            ~(VNF_INITED | VNF_READONLY | VNF_KLABEL | VNF_VLABEL
              | VNF_VUNCONF | VNF_COMP | VNF_CLEARING);
        if (vp == NULL)
                panic("vndclear: null vp");
        (void) vn_close(vp, fflags, vnd->sc_cred);
        kauth_cred_free(vnd->sc_cred);
        vnd->sc_vp = NULL;
        vnd->sc_cred = NULL;
        vnd->sc_size = 0;
}

static int
vndsize(dev_t dev)
{
        struct vnd_softc *sc;
        struct disklabel *lp;
        int part, unit, omask;
        int size;

        unit = vndunit(dev);
        sc = device_lookup_private(&vnd_cd, unit);
        if (sc == NULL)
                return -1;

        if ((sc->sc_flags & VNF_INITED) == 0)
                return -1;

        part = DISKPART(dev);
        omask = sc->sc_dkdev.dk_openmask & (1 << part);
        lp = sc->sc_dkdev.dk_label;

        if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp))        /* XXX */
                return -1;

        if (lp->d_partitions[part].p_fstype != FS_SWAP)
                size = -1;
        else
                size = lp->d_partitions[part].p_size *
                    (lp->d_secsize / DEV_BSIZE);

        if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp))        /* XXX */
                return -1;

        return size;
}

static int
vnddump(dev_t dev, daddr_t blkno, void *va,
    size_t size)
{

        /* Not implemented. */
        return ENXIO;
}

static void
vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp)
{
        struct vndgeom *vng = &sc->sc_geom;
        struct partition *pp;
        unsigned spb;

        memset(lp, 0, sizeof(*lp));

        spb = vng->vng_secsize / DEV_BSIZE;
        if (sc->sc_size / spb > UINT32_MAX)
                lp->d_secperunit = UINT32_MAX;
        else
                lp->d_secperunit = sc->sc_size / spb;
        lp->d_secsize = vng->vng_secsize;
        lp->d_nsectors = vng->vng_nsectors;
        lp->d_ntracks = vng->vng_ntracks;
        lp->d_ncylinders = vng->vng_ncylinders;
        lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;

        strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename));
        lp->d_type = DKTYPE_VND;
        strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
        lp->d_rpm = 3600;
        lp->d_interleave = 1;
        lp->d_flags = 0;

        pp = &lp->d_partitions[RAW_PART];
        pp->p_offset = 0;
        pp->p_size = lp->d_secperunit;
        pp->p_fstype = FS_UNUSED;
        lp->d_npartitions = RAW_PART + 1;

        lp->d_magic = DISKMAGIC;
        lp->d_magic2 = DISKMAGIC;
        lp->d_checksum = dkcksum(lp);
}

/*
 * Read the disklabel from a vnd.  If one is not present, create a fake one.
 */
static void
vndgetdisklabel(dev_t dev, struct vnd_softc *sc)
{
        const char *errstring;
        struct disklabel *lp = sc->sc_dkdev.dk_label;
        struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel;
        int i;

        memset(clp, 0, sizeof(*clp));

        vndgetdefaultlabel(sc, lp);

        /*
         * Call the generic disklabel extraction routine.
         */
        errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp);
        if (errstring) {
                /*
                 * Lack of disklabel is common, but we print the warning
                 * anyway, since it might contain other useful information.
                 */
                aprint_normal_dev(sc->sc_dev, "%s\n", errstring);

                /*
                 * For historical reasons, if there's no disklabel
                 * present, all partitions must be FS_BSDFFS and
                 * occupy the entire disk.
                 */
                for (i = 0; i < MAXPARTITIONS; i++) {
                        /*
                         * Don't wipe out port specific hack (such as
                         * dos partition hack of i386 port).
                         */
                        if (lp->d_partitions[i].p_size != 0)
                                continue;

                        lp->d_partitions[i].p_size = lp->d_secperunit;
                        lp->d_partitions[i].p_offset = 0;
                        lp->d_partitions[i].p_fstype = FS_BSDFFS;
                }

                strncpy(lp->d_packname, "default label",
                    sizeof(lp->d_packname));

                lp->d_npartitions = MAXPARTITIONS;
                lp->d_checksum = dkcksum(lp);
        }
}

/*
 * Wait interruptibly for an exclusive lock.
 *
 * XXX
 * Several drivers do this; it should be abstracted and made MP-safe.
 */
static int
vndlock(struct vnd_softc *sc)
{
        int error;

        while ((sc->sc_flags & VNF_LOCKED) != 0) {
                sc->sc_flags |= VNF_WANTED;
                if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0)
                        return error;
        }
        sc->sc_flags |= VNF_LOCKED;
        return 0;
}

/*
 * Unlock and wake up any waiters.
 */
static void
vndunlock(struct vnd_softc *sc)
{

        sc->sc_flags &= ~VNF_LOCKED;
        if ((sc->sc_flags & VNF_WANTED) != 0) {
                sc->sc_flags &= ~VNF_WANTED;
                wakeup(sc);
        }
}

#ifdef VND_COMPRESSION
/* compressed file read */
static void
compstrategy(struct buf *bp, off_t bn)
{
        int error;
        int unit = vndunit(bp->b_dev);
        struct vnd_softc *vnd =
            device_lookup_private(&vnd_cd, unit);
        u_int32_t comp_block;
        struct uio auio;
        char *addr;
        int s;

        /* set up constants for data move */
        auio.uio_rw = UIO_READ;
        UIO_SETUP_SYSSPACE(&auio);

        /* read, and transfer the data */
        addr = bp->b_data;
        bp->b_resid = bp->b_bcount;
        s = splbio();
        while (bp->b_resid > 0) {
                unsigned length;
                size_t length_in_buffer;
                u_int32_t offset_in_buffer;
                struct iovec aiov;

                /* calculate the compressed block number */
                comp_block = bn / (off_t)vnd->sc_comp_blksz;

                /* check for good block number */
                if (comp_block >= vnd->sc_comp_numoffs) {
                        bp->b_error = EINVAL;
                        splx(s);
                        return;
                }

                /* read in the compressed block, if not in buffer */
                if (comp_block != vnd->sc_comp_buffblk) {
                        length = vnd->sc_comp_offsets[comp_block + 1] -
                            vnd->sc_comp_offsets[comp_block];
                        vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
                        error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff,
                            length, vnd->sc_comp_offsets[comp_block],
                            UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vnd->sc_cred,
                            NULL, NULL);
                        if (error) {
                                bp->b_error = error;
                                VOP_UNLOCK(vnd->sc_vp);
                                splx(s);
                                return;
                        }
                        /* uncompress the buffer */
                        vnd->sc_comp_stream.next_in = vnd->sc_comp_buff;
                        vnd->sc_comp_stream.avail_in = length;
                        vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf;
                        vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz;
                        inflateReset(&vnd->sc_comp_stream);
                        error = inflate(&vnd->sc_comp_stream, Z_FINISH);
                        if (error != Z_STREAM_END) {
                                if (vnd->sc_comp_stream.msg)
                                        aprint_normal_dev(vnd->sc_dev,
                                            "compressed file, %s\n",
                                            vnd->sc_comp_stream.msg);
                                bp->b_error = EBADMSG;
                                VOP_UNLOCK(vnd->sc_vp);
                                splx(s);
                                return;
                        }
                        vnd->sc_comp_buffblk = comp_block;
                        VOP_UNLOCK(vnd->sc_vp);
                }

                /* transfer the usable uncompressed data */
                offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz;
                length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer;
                if (length_in_buffer > bp->b_resid)
                        length_in_buffer = bp->b_resid;
                auio.uio_iov = &aiov;
                auio.uio_iovcnt = 1;
                aiov.iov_base = addr;
                aiov.iov_len = length_in_buffer;
                auio.uio_resid = aiov.iov_len;
                auio.uio_offset = 0;
                error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer,
                    length_in_buffer, &auio);
                if (error) {
                        bp->b_error = error;
                        splx(s);
                        return;
                }

                bn += length_in_buffer;
                addr += length_in_buffer;
                bp->b_resid -= length_in_buffer;
        }
        splx(s);
}

/* compression memory allocation routines */
static void *
vnd_alloc(void *aux, u_int items, u_int siz)
{
        return malloc(items * siz, M_TEMP, M_NOWAIT);
}

static void
vnd_free(void *aux, void *ptr)
{
        free(ptr, M_TEMP);
}
#endif /* VND_COMPRESSION */

static void
vnd_set_geometry(struct vnd_softc *vnd)
{
        struct disk_geom *dg = &vnd->sc_dkdev.dk_geom;
        unsigned spb;

        memset(dg, 0, sizeof(*dg));

        spb = vnd->sc_geom.vng_secsize / DEV_BSIZE;
        dg->dg_secperunit = vnd->sc_size / spb;
        dg->dg_secsize = vnd->sc_geom.vng_secsize;
        dg->dg_nsectors = vnd->sc_geom.vng_nsectors;
        dg->dg_ntracks = vnd->sc_geom.vng_ntracks;
        dg->dg_ncylinders = vnd->sc_geom.vng_ncylinders;

#ifdef DEBUG
        if (vnddebug & VDB_LABEL) {
                printf("dg->dg_secperunit: %" PRId64 "\n", dg->dg_secperunit);
                printf("dg->dg_ncylinders: %u\n", dg->dg_ncylinders);
        }
#endif
        disk_set_info(vnd->sc_dev, &vnd->sc_dkdev, NULL);
}

#ifdef VND_COMPRESSION
#define VND_DEPENDS "zlib"
#else
#define VND_DEPENDS NULL
#endif

MODULE(MODULE_CLASS_DRIVER, vnd, VND_DEPENDS);

#ifdef _MODULE
int vnd_bmajor = -1, vnd_cmajor = -1;

CFDRIVER_DECL(vnd, DV_DISK, NULL);
#endif

static int
vnd_modcmd(modcmd_t cmd, void *arg)
{
        int error = 0;

        switch (cmd) {
        case MODULE_CMD_INIT:
#ifdef _MODULE
                /*
                 * Attach the {b,c}devsw's
                 */
                error = devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor,
                    &vnd_cdevsw, &vnd_cmajor);
                if (error) {
#ifdef DIAGNOSTIC
                        aprint_error("%s: unable to attach %s devsw, "
                            "error %d", __func__, vnd_cd.cd_name, error);
#endif
                        break;
                }

                error = config_cfdriver_attach(&vnd_cd);
                if (error) {
                        devsw_detach(&vnd_bdevsw, &vnd_cdevsw);
                        break;
                }

                error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
                if (error) {
                        config_cfdriver_detach(&vnd_cd);
                        devsw_detach(&vnd_bdevsw, &vnd_cdevsw);
#ifdef DIAGNOSTIC
                        aprint_error("%s: unable to register cfattach for \n"
                            "%s, error %d", __func__, vnd_cd.cd_name, error);
#endif
                        break;
                }
#endif
                break;

        case MODULE_CMD_FINI:
#ifdef _MODULE
                /*
                 * Remove device from autoconf database
                 */
                error = config_cfattach_detach(vnd_cd.cd_name, &vnd_ca);
                if (error) { 
#ifdef DIAGNOSTIC
                        aprint_error("%s: failed to detach %s cfattach, "
                            "error %d\n", __func__, vnd_cd.cd_name, error);
#endif
                        break;
                }
                error = config_cfdriver_detach(&vnd_cd);
                if (error) {
                        (void)config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 
#ifdef DIAGNOSTIC
                        aprint_error("%s: failed to detach %s cfdriver, "
                            "error %d\n", __func__, vnd_cd.cd_name, error);
                        break;
#endif
                }
                /*
                 * Remove {b,c}devsw's
                 */
                devsw_detach(&vnd_bdevsw, &vnd_cdevsw);

#endif
                break;

        case MODULE_CMD_STAT:
                return ENOTTY;

        default:
                return ENOTTY;
        }

        return error;
}







































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































    3 


    3 

    3 












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
/*        $NetBSD: if_atu.c,v 1.75 2022/03/03 06:06:52 riastradh Exp $ */
/*        $OpenBSD: if_atu.c,v 1.48 2004/12/30 01:53:21 dlg Exp $ */
/*
 * Copyright (c) 2003, 2004
 *        Daan Vreeken <Danovitsch@Vitsch.net>.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *        This product includes software developed by Daan Vreeken.
 * 4. Neither the name of the author nor the names of any co-contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY Daan Vreeken AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL Daan Vreeken OR THE VOICES IN HIS HEAD
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Atmel AT76c503 / AT76c503a / AT76c505 / AT76c505a  USB WLAN driver
 * version 0.5 - 2004-08-03
 *
 * Originally written by Daan Vreeken <Danovitsch @ Vitsch . net>
 *  http://vitsch.net/bsd/atuwi
 *
 * Contributed to by :
 *  Chris Whitehouse, Alistair Phillips, Peter Pilka, Martijn van Buul,
 *  Suihong Liang, Arjan van Leeuwen, Stuart Walsh
 *
 * Ported to OpenBSD by Theo de Raadt and David Gwynne.
 * Ported to NetBSD by Jesse Off
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: if_atu.c,v 1.75 2022/03/03 06:06:52 riastradh Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/sockio.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/systm.h>
#include <sys/kthread.h>
#include <sys/queue.h>
#include <sys/device.h>
#include <sys/bus.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdevs.h>

#include <dev/microcode/atmel/atmel_intersil_fw.h>
#include <dev/microcode/atmel/atmel_rfmd2958-smc_fw.h>
#include <dev/microcode/atmel/atmel_rfmd2958_fw.h>
#include <dev/microcode/atmel/atmel_rfmd_fw.h>

#include <net/bpf.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_media.h>
#include <net/if_ether.h>

#ifdef INET
#include <netinet/in.h>
#include <netinet/if_ether.h>
#endif

#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_radiotap.h>

#include <dev/usb/if_atureg.h>

#ifdef ATU_DEBUG
#define DPRINTF(x)        do { if (atudebug) printf x; } while (0)
#define DPRINTFN(n,x)        do { if (atudebug>(n)) printf x; } while (0)
int atudebug = 1;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

/*
 * Various supported device vendors/products/radio type.
 */
static const struct atu_type atu_devs[] = {
        { USB_VENDOR_3COM,        USB_PRODUCT_3COM_3CRSHEW696,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_ABOCOM,        USB_PRODUCT_ABOCOM_BWU613,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_ACCTON,        USB_PRODUCT_ACCTON_2664W,
          AT76C503_rfmd_acc,        ATU_NO_QUIRK },
        { USB_VENDOR_ACERP,        USB_PRODUCT_ACERP_AWL300,
          RadioIntersil,        ATU_NO_QUIRK },
        { USB_VENDOR_ACERP,        USB_PRODUCT_ACERP_AWL400,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_ACTIONTEC,        USB_PRODUCT_ACTIONTEC_UAT1,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_ADDTRON,        USB_PRODUCT_ADDTRON_AWU120,
          RadioIntersil,        ATU_NO_QUIRK },
        { USB_VENDOR_AINCOMM,        USB_PRODUCT_AINCOMM_AWU2000B,
          RadioRFMD2958,        ATU_NO_QUIRK },
        { USB_VENDOR_ASKEY,        USB_PRODUCT_ASKEY_VOYAGER1010,
          RadioIntersil,        ATU_NO_QUIRK },
        { USB_VENDOR_ASKEY,        USB_PRODUCT_ASKEY_WLL013I,
          RadioIntersil,        ATU_NO_QUIRK },
        { USB_VENDOR_ASKEY,        USB_PRODUCT_ASKEY_WLL013,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_ATMEL,        USB_PRODUCT_ATMEL_AT76C503I1,
          RadioIntersil,        ATU_NO_QUIRK },
        { USB_VENDOR_ATMEL,        USB_PRODUCT_ATMEL_AT76C503I2,
          AT76C503_i3863,        ATU_NO_QUIRK },
        { USB_VENDOR_ATMEL,        USB_PRODUCT_ATMEL_AT76C503RFMD,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_ATMEL,        USB_PRODUCT_ATMEL_AT76C505RFMD,
          AT76C505_rfmd,        ATU_NO_QUIRK },
        { USB_VENDOR_ATMEL,        USB_PRODUCT_ATMEL_AT76C505RFMD2958,
          RadioRFMD2958,        ATU_NO_QUIRK },
        { USB_VENDOR_ATMEL,        USB_PRODUCT_ATMEL_AT76C505A, /* SMC2662 V.4 */
          RadioRFMD2958_SMC,        ATU_QUIRK_NO_REMAP | ATU_QUIRK_FW_DELAY },
        { USB_VENDOR_ATMEL,        USB_PRODUCT_ATMEL_AT76C505AS, /* quirk? */
          RadioRFMD2958_SMC,        ATU_QUIRK_NO_REMAP | ATU_QUIRK_FW_DELAY },
        { USB_VENDOR_ATMEL,        USB_PRODUCT_ATMEL_WN210,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_BELKIN,        USB_PRODUCT_BELKIN_F5D6050,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_CONCEPTRONIC, USB_PRODUCT_CONCEPTRONIC_C11U,
          RadioIntersil,        ATU_NO_QUIRK },
        { USB_VENDOR_CONCEPTRONIC, USB_PRODUCT_CONCEPTRONIC_WL210,
          RadioIntersil,        ATU_NO_QUIRK },
        { USB_VENDOR_COMPAQ,        USB_PRODUCT_COMPAQ_IPAQWLAN,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_COREGA,        USB_PRODUCT_COREGA_WLUSB_11_STICK,
          RadioRFMD2958,        ATU_NO_QUIRK },
        { USB_VENDOR_DICKSMITH,        USB_PRODUCT_DICKSMITH_CHUSB611G,
          RadioRFMD2958,        ATU_NO_QUIRK },
        { USB_VENDOR_DICKSMITH,        USB_PRODUCT_DICKSMITH_WL200U,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_DICKSMITH,        USB_PRODUCT_DICKSMITH_WL240U,
          RadioRFMD2958,        ATU_NO_QUIRK },
        { USB_VENDOR_DICKSMITH,        USB_PRODUCT_DICKSMITH_XH1153,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_DLINK,        USB_PRODUCT_DLINK_DWL120E,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_GIGABYTE,        USB_PRODUCT_GIGABYTE_GNWLBM101,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_GIGASET,        USB_PRODUCT_GIGASET_WLAN, /* quirk? */
          RadioRFMD2958_SMC,        ATU_QUIRK_NO_REMAP | ATU_QUIRK_FW_DELAY },
        { USB_VENDOR_HP,        USB_PRODUCT_HP_HN210W,
          RadioIntersil,        ATU_NO_QUIRK },
        { USB_VENDOR_INTEL,        USB_PRODUCT_INTEL_AP310,
          RadioIntersil,        ATU_NO_QUIRK },
        { USB_VENDOR_IODATA,        USB_PRODUCT_IODATA_USBWNB11A,
          RadioIntersil,        ATU_NO_QUIRK },
        { USB_VENDOR_LEXAR,        USB_PRODUCT_LEXAR_2662WAR,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_LINKSYS,        USB_PRODUCT_LINKSYS_WUSB11,
          RadioIntersil,        ATU_NO_QUIRK },
        { USB_VENDOR_LINKSYS2,        USB_PRODUCT_LINKSYS2_WUSB11,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_LINKSYS2,        USB_PRODUCT_LINKSYS2_NWU11B,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_LINKSYS3,        USB_PRODUCT_LINKSYS3_WUSB11V28,
          RadioRFMD2958,        ATU_NO_QUIRK },
        { USB_VENDOR_MSI,        USB_PRODUCT_MSI_WLAN,
          RadioRFMD2958,        ATU_NO_QUIRK },
        { USB_VENDOR_NETGEAR2,        USB_PRODUCT_NETGEAR2_MA101,
          RadioIntersil,        ATU_NO_QUIRK },
        { USB_VENDOR_NETGEAR2,        USB_PRODUCT_NETGEAR2_MA101B,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_OQO,        USB_PRODUCT_OQO_WIFI01,
          RadioRFMD2958_SMC,        ATU_QUIRK_NO_REMAP | ATU_QUIRK_FW_DELAY },
        { USB_VENDOR_PLANEX2,        USB_PRODUCT_PLANEX2_GW_US11S,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_SAMSUNG,        USB_PRODUCT_SAMSUNG_SWL2100W,
          AT76C503_i3863,        ATU_NO_QUIRK },
        { USB_VENDOR_SIEMENS2,        USB_PRODUCT_SIEMENS2_WLL013,
          RadioRFMD,                ATU_NO_QUIRK },
        { USB_VENDOR_SMC3,        USB_PRODUCT_SMC3_2662WV1,
          RadioIntersil,        ATU_NO_QUIRK },
        { USB_VENDOR_SMC3,        USB_PRODUCT_SMC3_2662WV2,
          AT76C503_rfmd_acc,        ATU_NO_QUIRK },
        { USB_VENDOR_TEKRAM,        USB_PRODUCT_TEKRAM_U300C,
          RadioIntersil,        ATU_NO_QUIRK },
        { USB_VENDOR_ZCOM,        USB_PRODUCT_ZCOM_M4Y750,
          RadioIntersil,        ATU_NO_QUIRK },
};

static const struct atu_radfirm {
        enum        atu_radio_type atur_type;
        unsigned char        *atur_internal;
        size_t                atur_internal_sz;
        unsigned char        *atur_external;
        size_t                atur_external_sz;
} atu_radfirm[] = {
        { RadioRFMD,
          atmel_fw_rfmd_int,                sizeof(atmel_fw_rfmd_int),
          atmel_fw_rfmd_ext,                sizeof(atmel_fw_rfmd_ext) },
        { RadioRFMD2958,
          atmel_fw_rfmd2958_int,        sizeof(atmel_fw_rfmd2958_int),
          atmel_fw_rfmd2958_ext,        sizeof(atmel_fw_rfmd2958_ext) },
        { RadioRFMD2958_SMC,
          atmel_fw_rfmd2958_smc_int,        sizeof(atmel_fw_rfmd2958_smc_int),
          atmel_fw_rfmd2958_smc_ext,        sizeof(atmel_fw_rfmd2958_smc_ext) },
        { RadioIntersil,
          atmel_fw_intersil_int,        sizeof(atmel_fw_intersil_int),
          atmel_fw_intersil_ext,        sizeof(atmel_fw_intersil_ext) }
};

static int        atu_newbuf(struct atu_softc *, struct atu_chain *, struct mbuf *);
static void        atu_rxeof(struct usbd_xfer *, void *, usbd_status);
static void        atu_txeof(struct usbd_xfer *, void *, usbd_status);
static void        atu_start(struct ifnet *);
static int        atu_ioctl(struct ifnet *, u_long, void *);
static int        atu_init(struct ifnet *);
static void        atu_stop(struct ifnet *, int);
static void        atu_watchdog(struct ifnet *);
static usbd_status atu_usb_request(struct atu_softc *, uint8_t,
            uint8_t, uint16_t, uint16_t,
            uint16_t, uint8_t *);
static int        atu_send_command(struct atu_softc *, uint8_t *, int);
static int        atu_get_cmd_status(struct atu_softc *, uint8_t,
            uint8_t *);
static int        atu_wait_completion(struct atu_softc *, uint8_t,
            uint8_t *);
static int        atu_send_mib(struct atu_softc *, uint8_t,
            uint8_t, uint8_t, void *);
static int        atu_get_mib(struct atu_softc *, uint8_t,
            uint8_t, uint8_t, uint8_t *);
#if 0
int        atu_start_ibss(struct atu_softc *);
#endif
static int        atu_start_scan(struct atu_softc *);
static int        atu_switch_radio(struct atu_softc *, int);
static int        atu_initial_config(struct atu_softc *);
static int        atu_join(struct atu_softc *, struct ieee80211_node *);
static int8_t        atu_get_dfu_state(struct atu_softc *);
static uint8_t atu_get_opmode(struct atu_softc *, uint8_t *);
static void        atu_internal_firmware(device_t);
static void        atu_external_firmware(device_t);
static int        atu_get_card_config(struct atu_softc *);
static int        atu_media_change(struct ifnet *);
static void        atu_media_status(struct ifnet *, struct ifmediareq *);
static int        atu_tx_list_init(struct atu_softc *);
static int        atu_rx_list_init(struct atu_softc *);
static void        atu_xfer_list_free(struct atu_softc *, struct atu_chain *,
            int);

static void atu_task(void *);
static int atu_newstate(struct ieee80211com *, enum ieee80211_state, int);
static int atu_tx_start(struct atu_softc *, struct ieee80211_node *,
    struct atu_chain *, struct mbuf *);
static void atu_complete_attach(struct atu_softc *);
static uint8_t atu_calculate_padding(int);

static int atu_match(device_t, cfdata_t, void *);
static void atu_attach(device_t, device_t, void *);
static int atu_detach(device_t, int);
static int atu_activate(device_t, enum devact);

CFATTACH_DECL_NEW(atu, sizeof(struct atu_softc), atu_match, atu_attach,
    atu_detach, atu_activate);

static usbd_status
atu_usb_request(struct atu_softc *sc, uint8_t type,
    uint8_t request, uint16_t value, uint16_t index, uint16_t length,
    uint8_t *data)
{
        usb_device_request_t        req;
        struct usbd_xfer        *xfer;
        usbd_status                err;
        int                        total_len = 0, s;

        req.bmRequestType = type;
        req.bRequest = request;
        USETW(req.wValue, value);
        USETW(req.wIndex, index);
        USETW(req.wLength, length);

#ifdef ATU_DEBUG
        if (atudebug) {
                DPRINTFN(20, ("%s: req=%02x val=%02x ind=%02x "
                    "len=%02x\n", device_xname(sc->atu_dev), request,
                    value, index, length));
        }
#endif /* ATU_DEBUG */

        s = splnet();

        struct usbd_pipe *pipe0 = usbd_get_pipe0(sc->atu_udev);
        int error = usbd_create_xfer(pipe0, length, 0, 0,
            &xfer);
        if (error) {
                splx(s);
                return USBD_IOERROR;
        }
        usbd_setup_default_xfer(xfer, sc->atu_udev, 0, 500000, &req, data,
            length, USBD_SHORT_XFER_OK, NULL);

        err = usbd_sync_transfer(xfer);

        usbd_get_xfer_status(xfer, NULL, NULL, &total_len, NULL);

#ifdef ATU_DEBUG
        if (atudebug) {
                if (type & UT_READ) {
                        DPRINTFN(20, ("%s: transferred %#x bytes in\n",
                            device_xname(sc->atu_dev), total_len));
                } else {
                        if (total_len != length)
                                DPRINTF(("%s: wrote only %x bytes\n",
                                    device_xname(sc->atu_dev), total_len));
                }
        }
#endif /* ATU_DEBUG */

        usbd_destroy_xfer(xfer);

        splx(s);
        return err;
}

static int
atu_send_command(struct atu_softc *sc, uint8_t *command, int size)
{
        return atu_usb_request(sc, UT_WRITE_VENDOR_DEVICE, 0x0e, 0x0000,
            0x0000, size, command);
}

static int
atu_get_cmd_status(struct atu_softc *sc, uint8_t cmd, uint8_t *status)
{
        /*
         * all other drivers (including Windoze) request 40 bytes of status
         * and get a short-xfer of just 6 bytes. we can save 34 bytes of
         * buffer if we just request those 6 bytes in the first place :)
         */
        /*
        return atu_usb_request(sc, UT_READ_VENDOR_INTERFACE, 0x22, cmd,
            0x0000, 40, status);
        */
        return atu_usb_request(sc, UT_READ_VENDOR_INTERFACE, 0x22, cmd,
            0x0000, 6, status);
}

static int
atu_wait_completion(struct atu_softc *sc, uint8_t cmd, uint8_t *status)
{
        int                        idle_count = 0, err;
        uint8_t                        statusreq[6];

        DPRINTFN(15, ("%s: wait-completion: cmd=%02x\n",
            device_xname(sc->atu_dev), cmd));

        while (1) {
                err = atu_get_cmd_status(sc, cmd, statusreq);
                if (err)
                        return err;

#ifdef ATU_DEBUG
                if (atudebug) {
                        DPRINTFN(20, ("%s: status=%s cmd=%02x\n",
                            device_xname(sc->atu_dev),
                        ether_sprintf(statusreq), cmd));
                }
#endif /* ATU_DEBUG */

                /*
                 * during normal operations waiting on STATUS_IDLE
                 * will never happen more than once
                 */
                if ((statusreq[5] == STATUS_IDLE) && (idle_count++ > 20)) {
                        DPRINTF(("%s: idle_count > 20!\n",
                            device_xname(sc->atu_dev)));
                        return 0;
                }

                if ((statusreq[5] != STATUS_IN_PROGRESS) &&
                    (statusreq[5] != STATUS_IDLE)) {
                        if (status != NULL)
                                *status = statusreq[5];
                        return 0;
                }
                usbd_delay_ms(sc->atu_udev, 25);
        }
}

static int
atu_send_mib(struct atu_softc *sc, uint8_t type, uint8_t size,
    uint8_t index, void *data)
{
        int                                err;
        struct atu_cmd_set_mib                request;

        /*
         * We don't construct a MIB packet first and then memcpy it into an
         * Atmel-command-packet, we just construct it the right way at once :)
         */

        memset(&request, 0, sizeof(request));

        request.AtCmd = CMD_SET_MIB;
        USETW(request.AtSize, size + 4);

        request.MIBType = type;
        request.MIBSize = size;
        request.MIBIndex = index;
        request.MIBReserved = 0;

        /*
         * For 1 and 2 byte requests we assume a direct value,
         * everything bigger than 2 bytes we assume a pointer to the data
         */
        switch (size) {
        case 0:
                break;
        case 1:
                request.data[0]=(long)data & 0x000000ff;
                break;
        case 2:
                request.data[0]=(long)data & 0x000000ff;
                request.data[1]=(long)data >> 8;
                break;
        default:
                memcpy(request.data, data, size);
                break;
        }

        err = atu_usb_request(sc, UT_WRITE_VENDOR_DEVICE, 0x0e, 0x0000,
            0x0000, size+8, (uByte *)&request);
        if (err)
                return err;

        DPRINTFN(15, ("%s: sendmib : waitcompletion...\n",
            device_xname(sc->atu_dev)));
        return atu_wait_completion(sc, CMD_SET_MIB, NULL);
}

static int
atu_get_mib(struct atu_softc *sc, uint8_t type, uint8_t size,
    uint8_t index, uint8_t *buf)
{

        /* linux/at76c503.c - 478 */
        return atu_usb_request(sc, UT_READ_VENDOR_INTERFACE, 0x033,
            type << 8, index, size, buf);
}

#if 0
int
atu_start_ibss(struct atu_softc *sc)
{
        struct ieee80211com                *ic = &sc->sc_ic;
        int                                err;
        struct atu_cmd_start_ibss        Request;

        Request.Cmd = CMD_START_IBSS;
        Request.Reserved = 0;
        Request.Size = sizeof(Request) - 4;

        memset(Request.BSSID, 0x00, sizeof(Request.BSSID));
        memset(Request.SSID, 0x00, sizeof(Request.SSID));
        memcpy(Request.SSID, ic->ic_des_ssid, ic->ic_des_ssidlen);
        Request.SSIDSize = ic->ic_des_ssidlen;
        if (sc->atu_desired_channel != IEEE80211_CHAN_ANY)
                Request.Channel = (uint8_t)sc->atu_desired_channel;
        else
                Request.Channel = ATU_DEFAULT_CHANNEL;
        Request.BSSType = AD_HOC_MODE;
        memset(Request.Res, 0x00, sizeof(Request.Res));

        /* Write config to adapter */
        err = atu_send_command(sc, (uint8_t *)&Request, sizeof(Request));
        if (err) {
                DPRINTF(("%s: start ibss failed!\n",
                    device_xname(sc->atu_dev)));
                return err;
        }

        /* Wait for the adapter to do its thing */
        err = atu_wait_completion(sc, CMD_START_IBSS, NULL);
        if (err) {
                DPRINTF(("%s: error waiting for start_ibss\n",
                    device_xname(sc->atu_dev)));
                return err;
        }

        /* Get the current BSSID */
        err = atu_get_mib(sc, MIB_MAC_MGMT__CURRENT_BSSID, sc->atu_bssid);
        if (err) {
                DPRINTF(("%s: could not get BSSID!\n",
                    device_xname(sc->atu_dev)));
                return err;
        }

        DPRINTF(("%s: started a new IBSS (BSSID=%s)\n",
            device_xname(sc->atu_dev), ether_sprintf(sc->atu_bssid)));
        return 0;
}
#endif

static int
atu_start_scan(struct atu_softc *sc)
{
        struct ieee80211com                *ic = &sc->sc_ic;
        struct atu_cmd_do_scan                Scan;
        usbd_status                        err;
        int                                Cnt;

        memset(&Scan, 0, sizeof(Scan));

        Scan.Cmd = CMD_START_SCAN;
        Scan.Reserved = 0;
        USETW(Scan.Size, sizeof(Scan) - 4);

        /* use the broadcast BSSID (in active scan) */
        for (Cnt=0; Cnt<6; Cnt++)
                Scan.BSSID[Cnt] = 0xff;

        memset(Scan.SSID, 0x00, sizeof(Scan.SSID));
        memcpy(Scan.SSID, ic->ic_des_essid, ic->ic_des_esslen);
        Scan.SSID_Len = ic->ic_des_esslen;

        /* default values for scan */
        Scan.ScanType = ATU_SCAN_ACTIVE;
        if (sc->atu_desired_channel != IEEE80211_CHAN_ANY)
                Scan.Channel = (uint8_t)sc->atu_desired_channel;
        else
                Scan.Channel = sc->atu_channel;

        ic->ic_curchan = &ic->ic_channels[Scan.Channel];

        /* we like scans to be quick :) */
        /* the time we wait before sending probe's */
        USETW(Scan.ProbeDelay, 0);
        /* the time we stay on one channel */
        USETW(Scan.MinChannelTime, 100);
        USETW(Scan.MaxChannelTime, 200);
        /* whether or not we scan all channels */
        Scan.InternationalScan = 0xc1;

#ifdef ATU_DEBUG
        if (atudebug) {
                DPRINTFN(20, ("%s: scan cmd len=%02zx\n",
                    device_xname(sc->atu_dev), sizeof(Scan)));
        }
#endif /* ATU_DEBUG */

        /* Write config to adapter */
        err = atu_send_command(sc, (uint8_t *)&Scan, sizeof(Scan));
        if (err)
                return err;

        /*
         * We don't wait for the command to finish... the mgmt-thread will do
         * that for us
         */
        /*
        err = atu_wait_completion(sc, CMD_START_SCAN, NULL);
        if (err)
                return err;
        */
        return 0;
}

static int
atu_switch_radio(struct atu_softc *sc, int state)
{
        usbd_status                err;
        struct atu_cmd                CmdRadio;

        if (sc->atu_radio == RadioIntersil) {
                /*
                 * Intersil doesn't seem to need/support switching the radio
                 * on/off
                 */
                return 0;
        }

        memset(&CmdRadio, 0, sizeof(CmdRadio));
        CmdRadio.Cmd = CMD_RADIO_ON;

        if (sc->atu_radio_on != state) {
                if (state == 0)
                        CmdRadio.Cmd = CMD_RADIO_OFF;

                err = atu_send_command(sc, (uint8_t *)&CmdRadio,
                    sizeof(CmdRadio));
                if (err)
                        return err;

                err = atu_wait_completion(sc, CmdRadio.Cmd, NULL);
                if (err)
                        return err;

                DPRINTFN(10, ("%s: radio turned %s\n",
                    device_xname(sc->atu_dev), state ? "on" : "off"));
                sc->atu_radio_on = state;
        }
        return 0;
}

static int
atu_initial_config(struct atu_softc *sc)
{
        struct ieee80211com                *ic = &sc->sc_ic;
        uint32_t                        i;
        usbd_status                        err;
/*        uint8_t                                rates[4] = {0x82, 0x84, 0x8B, 0x96};*/
        uint8_t                                rates[4] = {0x82, 0x04, 0x0B, 0x16};
        struct atu_cmd_card_config        cmd;
        uint8_t                                reg_domain;

        DPRINTFN(10, ("%s: sending mac-addr\n", device_xname(sc->atu_dev)));
        err = atu_send_mib(sc, MIB_MAC_ADDR__ADDR, ic->ic_myaddr);
        if (err) {
                DPRINTF(("%s: error setting mac-addr\n",
                    device_xname(sc->atu_dev)));
                return err;
        }

        /*
        DPRINTF(("%s: sending reg-domain\n", device_xname(sc->atu_dev)));
        err = atu_send_mib(sc, MIB_PHY__REG_DOMAIN, NR(0x30));
        if (err) {
                DPRINTF(("%s: error setting mac-addr\n",
                    device_xname(sc->atu_dev)));
                return err;
        }
        */

        memset(&cmd, 0, sizeof(cmd));
        cmd.Cmd = CMD_STARTUP;
        cmd.Reserved = 0;
        USETW(cmd.Size, sizeof(cmd) - 4);

        if (sc->atu_desired_channel != IEEE80211_CHAN_ANY)
                cmd.Channel = (uint8_t)sc->atu_desired_channel;
        else
                cmd.Channel = sc->atu_channel;
        cmd.AutoRateFallback = 1;
        memcpy(cmd.BasicRateSet, rates, 4);

        /* ShortRetryLimit should be 7 according to 802.11 spec */
        cmd.ShortRetryLimit = 7;
        USETW(cmd.RTS_Threshold, 2347);
        USETW(cmd.FragThreshold, 2346);

        /* Doesn't seem to work, but we'll set it to 1 anyway */
        cmd.PromiscuousMode = 1;

        /* this goes into the beacon we transmit */
        if (ic->ic_flags & IEEE80211_F_PRIVACY)
                cmd.PrivacyInvoked = 1;
        else
                cmd.PrivacyInvoked = 0;

        cmd.ExcludeUnencrypted = 0;

        if (ic->ic_flags & IEEE80211_F_PRIVACY) {
                switch (ic->ic_nw_keys[ic->ic_def_txkey].wk_keylen) {
                case 5:
                        cmd.EncryptionType = ATU_WEP_40BITS;
                        break;
                case 13:
                        cmd.EncryptionType = ATU_WEP_104BITS;
                        break;
                default:
                        cmd.EncryptionType = ATU_WEP_OFF;
                        break;
                }


                cmd.WEP_DefaultKeyID = ic->ic_def_txkey;
                for (i = 0; i < IEEE80211_WEP_NKID; i++) {
                        memcpy(cmd.WEP_DefaultKey[i], ic->ic_nw_keys[i].wk_key,
                            ic->ic_nw_keys[i].wk_keylen);
                }
        }

        /* Setting the SSID here doesn't seem to do anything */
        memset(cmd.SSID, 0x00, sizeof(cmd.SSID));
        memcpy(cmd.SSID, ic->ic_des_essid, ic->ic_des_esslen);
        cmd.SSID_Len = ic->ic_des_esslen;

        cmd.ShortPreamble = 0;
        USETW(cmd.BeaconPeriod, 100);
        /* cmd.BeaconPeriod = 65535; */

        /*
         * TODO:
         * read reg domain MIB_PHY @ 0x17 (1 byte), (reply = 0x30)
         * we should do something useful with this info. right now it's just
         * ignored
         */
        err = atu_get_mib(sc, MIB_PHY__REG_DOMAIN, &reg_domain);
        if (err) {
                DPRINTF(("%s: could not get regdomain!\n",
                    device_xname(sc->atu_dev)));
        } else {
                DPRINTF(("%s: in reg domain %#x according to the "
                    "adapter\n", device_xname(sc->atu_dev), reg_domain));
        }

#ifdef ATU_DEBUG
        if (atudebug) {
                DPRINTFN(20, ("%s: configlen=%02zx\n",
                        device_xname(sc->atu_dev), sizeof(cmd)));
        }
#endif /* ATU_DEBUG */

        /* Windoze : driver says exclude-unencrypted=1 & encr-type=1 */

        err = atu_send_command(sc, (uint8_t *)&cmd, sizeof(cmd));
        if (err)
                return err;
        err = atu_wait_completion(sc, CMD_STARTUP, NULL);
        if (err)
                return err;

        /* Turn on radio now */
        err = atu_switch_radio(sc, 1);
        if (err)
                return err;

        /* preamble type = short */
        err = atu_send_mib(sc, MIB_LOCAL__PREAMBLE, NR(PREAMBLE_SHORT));
        if (err)
                return err;

        /* frag = 1536 */
        err = atu_send_mib(sc, MIB_MAC__FRAG, NR(2346));
        if (err)
                return err;

        /* rts = 1536 */
        err = atu_send_mib(sc, MIB_MAC__RTS, NR(2347));
        if (err)
                return err;

        /* auto rate fallback = 1 */
        err = atu_send_mib(sc, MIB_LOCAL__AUTO_RATE_FALLBACK, NR(1));
        if (err)
                return err;

        /* power mode = full on, no power saving */
        err = atu_send_mib(sc, MIB_MAC_MGMT__POWER_MODE,
            NR(POWER_MODE_ACTIVE));
        if (err)
                return err;

        DPRINTFN(10, ("%s: completed initial config\n",
           device_xname(sc->atu_dev)));
        return 0;
}

static int
atu_join(struct atu_softc *sc, struct ieee80211_node *node)
{
        struct atu_cmd_join        join;
        uint8_t                        status = 0;        /* XXX: GCC */
        usbd_status                err;

        memset(&join, 0, sizeof(join));

        join.Cmd = CMD_JOIN;
        join.Reserved = 0x00;
        USETW(join.Size, sizeof(join) - 4);

        DPRINTFN(15, ("%s: pre-join sc->atu_bssid=%s\n",
            device_xname(sc->atu_dev), ether_sprintf(sc->atu_bssid)));
        DPRINTFN(15, ("%s: mode=%d\n", device_xname(sc->atu_dev),
            sc->atu_mode));
        memcpy(join.bssid, node->ni_bssid, IEEE80211_ADDR_LEN);
        memset(join.essid, 0x00, 32);
        memcpy(join.essid, node->ni_essid, node->ni_esslen);
        join.essid_size = node->ni_esslen;
        if (node->ni_capinfo & IEEE80211_CAPINFO_IBSS)
                join.bss_type = AD_HOC_MODE;
        else
                join.bss_type = INFRASTRUCTURE_MODE;
        join.channel = ieee80211_chan2ieee(&sc->sc_ic, node->ni_chan);

        USETW(join.timeout, ATU_JOIN_TIMEOUT);
        join.reserved = 0x00;

        DPRINTFN(10, ("%s: trying to join BSSID=%s\n",
            device_xname(sc->atu_dev), ether_sprintf(join.bssid)));
        err = atu_send_command(sc, (uint8_t *)&join, sizeof(join));
        if (err) {
                DPRINTF(("%s: ERROR trying to join IBSS\n",
                    device_xname(sc->atu_dev)));
                return err;
        }
        err = atu_wait_completion(sc, CMD_JOIN, &status);
        if (err) {
                DPRINTF(("%s: error joining BSS!\n",
                    device_xname(sc->atu_dev)));
                return err;
        }
        if (status != STATUS_COMPLETE) {
                DPRINTF(("%s: error joining... [status=%02x]\n",
                    device_xname(sc->atu_dev), status));
                return status;
        } else {
                DPRINTFN(10, ("%s: joined BSS\n", device_xname(sc->atu_dev)));
        }
        return err;
}

/*
 * Get the state of the DFU unit
 */
static int8_t
atu_get_dfu_state(struct atu_softc *sc)
{
        uint8_t        state;

        if (atu_usb_request(sc, DFU_GETSTATE, 0, 0, 1, &state))
                return -1;
        return state;
}

/*
 * Get MAC opmode
 */
static uint8_t
atu_get_opmode(struct atu_softc *sc, uint8_t *mode)
{

        return atu_usb_request(sc, UT_READ_VENDOR_INTERFACE, 0x33, 0x0001,
            0x0000, 1, mode);
}

/*
 * Upload the internal firmware into the device
 */
static void
atu_internal_firmware(device_t arg)
{
        struct atu_softc *sc = device_private(arg);
        u_char        state, *ptr = NULL, *firm = NULL, status[6];
        int block_size, block = 0, err, i;
        size_t        bytes_left = 0;

        /*
         * Uploading firmware is done with the DFU (Device Firmware Upgrade)
         * interface. See "Universal Serial Bus - Device Class Specification
         * for Device Firmware Upgrade" pdf for details of the protocol.
         * Maybe this could be moved to a separate 'firmware driver' once more
         * device drivers need it... For now we'll just do it here.
         *
         * Just for your information, the Atmel's DFU descriptor looks like
         * this:
         *
         * 07                size
         * 21                type
         * 01                capabilities : only firmware download, need reset
         *                  after download
         * 13 05        detach timeout : max 1299ms between DFU_DETACH and
         *                  reset
         * 00 04        max bytes of firmware per transaction : 1024
         */

        /* Choose the right firmware for the device */
        for (i = 0; i < __arraycount(atu_radfirm); i++)
                if (sc->atu_radio == atu_radfirm[i].atur_type) {
                        firm = atu_radfirm[i].atur_internal;
                        bytes_left = atu_radfirm[i].atur_internal_sz;
                }

        if (firm == NULL) {
                aprint_error_dev(arg, "no firmware found\n");
                return;
        }

        ptr = firm;
        state = atu_get_dfu_state(sc);

        while (block >= 0 && state > 0) {
                switch (state) {
                case DFUState_DnLoadSync:
                        /* get DFU status */
                        err = atu_usb_request(sc, DFU_GETSTATUS, 0, 0 , 6,
                            status);
                        if (err) {
                                DPRINTF(("%s: dfu_getstatus failed!\n",
                                    device_xname(sc->atu_dev)));
                                return;
                        }
                        /* success means state => DnLoadIdle */
                        state = DFUState_DnLoadIdle;
                        continue;
                        break;

                case DFUState_DFUIdle:
                case DFUState_DnLoadIdle:
                        if (bytes_left>=DFU_MaxBlockSize)
                                block_size = DFU_MaxBlockSize;
                        else
                                block_size = bytes_left;
                        DPRINTFN(15, ("%s: firmware block %d\n",
                            device_xname(sc->atu_dev), block));

                        err = atu_usb_request(sc, DFU_DNLOAD, block++, 0,
                            block_size, ptr);
                        if (err) {
                                DPRINTF(("%s: dfu_dnload failed\n",
                                    device_xname(sc->atu_dev)));
                                return;
                        }

                        ptr += block_size;
                        bytes_left -= block_size;
                        if (block_size == 0)
                                block = -1;
                        break;

                default:
                        usbd_delay_ms(sc->atu_udev, 100);
                        DPRINTFN(20, ("%s: sleeping for a while\n",
                            device_xname(sc->atu_dev)));
                        break;
                }

                state = atu_get_dfu_state(sc);
        }

        if (state != DFUState_ManifestSync) {
                DPRINTF(("%s: state != manifestsync... eek!\n",
                    device_xname(sc->atu_dev)));
        }

        err = atu_usb_request(sc, DFU_GETSTATUS, 0, 0, 6, status);
        if (err) {
                DPRINTF(("%s: dfu_getstatus failed!\n",
                    device_xname(sc->atu_dev)));
                return;
        }

        DPRINTFN(15, ("%s: sending remap\n", device_xname(sc->atu_dev)));
        err = atu_usb_request(sc, DFU_REMAP, 0, 0, 0, NULL);
        if ((err) && !(sc->atu_quirk & ATU_QUIRK_NO_REMAP)) {
                DPRINTF(("%s: remap failed!\n", device_xname(sc->atu_dev)));
                return;
        }

        /* after a lot of trying and measuring I found out the device needs
         * about 56 miliseconds after sending the remap command before
         * it's ready to communicate again. So we'll wait just a little bit
         * longer than that to be sure...
         */
        usbd_delay_ms(sc->atu_udev, 56+100);

        aprint_error_dev(arg, "reattaching after firmware upload\n");
        usb_needs_reattach(sc->atu_udev);
}

static void
atu_external_firmware(device_t arg)
{
        struct atu_softc *sc = device_private(arg);
        u_char        *ptr = NULL, *firm = NULL;
        int        block_size, block = 0, err, i;
        size_t        bytes_left = 0;

        for (i = 0; i < __arraycount(atu_radfirm); i++)
                if (sc->atu_radio == atu_radfirm[i].atur_type) {
                        firm = atu_radfirm[i].atur_external;
                        bytes_left = atu_radfirm[i].atur_external_sz;
                }

        if (firm == NULL) {
                aprint_error_dev(arg, "no firmware found\n");
                return;
        }
        ptr = firm;

        while (bytes_left) {
                if (bytes_left > 1024)
                        block_size = 1024;
                else
                        block_size = bytes_left;

                DPRINTFN(15, ("%s: block:%d size:%d\n",
                    device_xname(sc->atu_dev), block, block_size));
                err = atu_usb_request(sc, UT_WRITE_VENDOR_DEVICE, 0x0e,
                    0x0802, block, block_size, ptr);
                if (err) {
                        DPRINTF(("%s: could not load external firmware "
                            "block\n", device_xname(sc->atu_dev)));
                        return;
                }

                ptr += block_size;
                block++;
                bytes_left -= block_size;
        }

        err = atu_usb_request(sc, UT_WRITE_VENDOR_DEVICE, 0x0e, 0x0802,
            block, 0, NULL);
        if (err) {
                DPRINTF(("%s: could not load last zero-length firmware "
                    "block\n", device_xname(sc->atu_dev)));
                return;
        }

        /*
         * The SMC2662w V.4 seems to require some time to do its thing with
         * the external firmware... 20 ms isn't enough, but 21 ms works 100
         * times out of 100 tries. We'll wait a bit longer just to be sure
         */
        if (sc->atu_quirk & ATU_QUIRK_FW_DELAY)
                usbd_delay_ms(sc->atu_udev, 21 + 100);

        DPRINTFN(10, ("%s: external firmware upload done\n",
            device_xname(sc->atu_dev)));
        /* complete configuration after the firmwares have been uploaded */
        atu_complete_attach(sc);
}

static int
atu_get_card_config(struct atu_softc *sc)
{
        struct ieee80211com                *ic = &sc->sc_ic;
        struct atu_rfmd_conf                rfmd_conf;
        struct atu_intersil_conf        intersil_conf;
        int                                err;

        switch (sc->atu_radio) {

        case RadioRFMD:
        case RadioRFMD2958:
        case RadioRFMD2958_SMC:
        case AT76C503_rfmd_acc:
        case AT76C505_rfmd:
                err = atu_usb_request(sc, UT_READ_VENDOR_INTERFACE, 0x33,
                    0x0a02, 0x0000, sizeof(rfmd_conf),
                    (uint8_t *)&rfmd_conf);
                if (err) {
                        DPRINTF(("%s: could not get rfmd config!\n",
                            device_xname(sc->atu_dev)));
                        return err;
                }
                memcpy(ic->ic_myaddr, rfmd_conf.MACAddr, IEEE80211_ADDR_LEN);
                break;

        case RadioIntersil:
        case AT76C503_i3863:
                err = atu_usb_request(sc, UT_READ_VENDOR_INTERFACE, 0x33,
                    0x0902, 0x0000, sizeof(intersil_conf),
                    (uint8_t *)&intersil_conf);
                if (err) {
                        DPRINTF(("%s: could not get intersil config!\n",
                            device_xname(sc->atu_dev)));
                        return err;
                }
                memcpy(ic->ic_myaddr, intersil_conf.MACAddr,
                    IEEE80211_ADDR_LEN);
                break;
        }
        return 0;
}

/*
 * Probe for an AT76c503 chip.
 */
static int
atu_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;
        int                        i;

        for (i = 0; i < __arraycount(atu_devs); i++) {
                const struct atu_type *t = &atu_devs[i];

                if (uaa->uaa_vendor == t->atu_vid &&
                    uaa->uaa_product == t->atu_pid) {
                        return UMATCH_VENDOR_PRODUCT;
                }
        }
        return UMATCH_NONE;
}

static int
atu_media_change(struct ifnet *ifp)
{
        struct atu_softc        *sc = ifp->if_softc;
        struct ieee80211com        *ic = &sc->sc_ic;
        int                        err, s;

        DPRINTFN(10, ("%s: atu_media_change\n", device_xname(sc->atu_dev)));

        err = ieee80211_media_change(ifp);
        if (err == ENETRESET) {
                if ((ifp->if_flags & (IFF_RUNNING|IFF_UP)) ==
                    (IFF_RUNNING|IFF_UP)) {
                        s = splnet();
                        ieee80211_new_state(ic, IEEE80211_S_INIT, -1);
                        atu_initial_config(sc);
                        ieee80211_new_state(ic, IEEE80211_S_SCAN, -1);
                        splx(s);
                }
                err = 0;
        }

        return err;
}

static void
atu_media_status(struct ifnet *ifp, struct ifmediareq *req)
{
#ifdef ATU_DEBUG
        struct atu_softc        *sc = ifp->if_softc;
#endif /* ATU_DEBUG */

        DPRINTFN(10, ("%s: atu_media_status\n", device_xname(sc->atu_dev)));

        ieee80211_media_status(ifp, req);
}

static void
atu_task(void *arg)
{
        struct atu_softc        *sc = (struct atu_softc *)arg;
        struct ieee80211com        *ic = &sc->sc_ic;
        usbd_status                err;
        int                        s;

        DPRINTFN(10, ("%s: atu_task\n", device_xname(sc->atu_dev)));

        if (sc->sc_state != ATU_S_OK)
                return;

        switch (sc->sc_cmd) {
        case ATU_C_SCAN:

                err = atu_start_scan(sc);
                if (err) {
                        DPRINTFN(1, ("%s: atu_task: couldn't start scan!\n",
                            device_xname(sc->atu_dev)));
                        return;
                }

                err = atu_wait_completion(sc, CMD_START_SCAN, NULL);
                if (err) {
                        DPRINTF(("%s: atu_task: error waiting for scan\n",
                            device_xname(sc->atu_dev)));
                        return;
                }

                DPRINTF(("%s: ==========================> END OF SCAN!\n",
                    device_xname(sc->atu_dev)));

                s = splnet();
                ieee80211_next_scan(ic);
                splx(s);

                DPRINTF(("%s: ----------------------======> END OF SCAN2!\n",
                    device_xname(sc->atu_dev)));
                break;

        case ATU_C_JOIN:
                atu_join(sc, ic->ic_bss);
        }
}

static int
atu_newstate(struct ieee80211com *ic, enum ieee80211_state nstate, int arg)
{
        struct ifnet                *ifp = ic->ic_ifp;
        struct atu_softc        *sc = ifp->if_softc;
        enum ieee80211_state        ostate = ic->ic_state;

        DPRINTFN(10, ("%s: atu_newstate: %s -> %s\n", device_xname(sc->atu_dev),
            ieee80211_state_name[ostate], ieee80211_state_name[nstate]));

        switch (nstate) {
        case IEEE80211_S_SCAN:
                memcpy(ic->ic_chan_scan, ic->ic_chan_active,
                    sizeof(ic->ic_chan_active));
                ieee80211_node_table_reset(&ic->ic_scan);

                /* tell the event thread that we want a scan */
                sc->sc_cmd = ATU_C_SCAN;
                usb_add_task(sc->atu_udev, &sc->sc_task, USB_TASKQ_DRIVER);

                /* handle this ourselves */
                ic->ic_state = nstate;
                return 0;

        case IEEE80211_S_AUTH:
        case IEEE80211_S_RUN:
                if (ostate == IEEE80211_S_SCAN) {
                        sc->sc_cmd = ATU_C_JOIN;
                        usb_add_task(sc->atu_udev, &sc->sc_task,
                            USB_TASKQ_DRIVER);
                }
                break;
        default:
                /* nothing to do */
                break;
        }

        return (*sc->sc_newstate)(ic, nstate, arg);
}

/*
 * Attach the interface. Allocate softc structures, do
 * setup and ethernet/BPF attach.
 */
static void
atu_attach(device_t parent, device_t self, void *aux)
{
        struct atu_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        char                        *devinfop;
        usbd_status                err;
        struct usbd_device        *dev = uaa->uaa_device;
        uint8_t                        mode, channel;
        int i;

        sc->atu_dev = self;
        sc->sc_state = ATU_S_UNCONFIG;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        err = usbd_set_config_no(dev, ATU_CONFIG_NO, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration"
                    ", err=%s\n", usbd_errstr(err));
                return;
        }

        err = usbd_device2interface_handle(dev, ATU_IFACE_IDX, &sc->atu_iface);
        if (err) {
                aprint_error_dev(self, "getting interface handle failed\n");
                return;
        }

        sc->atu_unit = device_unit(self);
        sc->atu_udev = dev;

        /*
         * look up the radio_type for the device
         * basically does the same as atu_match
         */
        for (i = 0; i < __arraycount(atu_devs); i++) {
                const struct atu_type *t = &atu_devs[i];

                if (uaa->uaa_vendor == t->atu_vid &&
                    uaa->uaa_product == t->atu_pid) {
                        sc->atu_radio = t->atu_radio;
                        sc->atu_quirk = t->atu_quirk;
                }
        }

        /*
         * Check in the interface descriptor if we're in DFU mode
         * If we're in DFU mode, we upload the external firmware
         * If we're not, the PC must have rebooted without power-cycling
         * the device.. I've tried this out, a reboot only requeres the
         * external firmware to be reloaded :)
         *
         * Hmm. The at76c505a doesn't report a DFU descriptor when it's
         * in DFU mode... Let's just try to get the opmode
         */
        err = atu_get_opmode(sc, &mode);
        DPRINTFN(20, ("%s: opmode: %d\n", device_xname(sc->atu_dev), mode));
        if (err || (mode != MODE_NETCARD && mode != MODE_NOFLASHNETCARD)) {
                DPRINTF(("%s: starting internal firmware download\n",
                    device_xname(sc->atu_dev)));

                atu_internal_firmware(sc->atu_dev);
                /*
                 * atu_internal_firmware will cause a reset of the device
                 * so we don't want to do any more configuration after this
                 * point.
                 */
                return;
        }

        if (mode != MODE_NETCARD) {
                DPRINTFN(15, ("%s: device needs external firmware\n",
                    device_xname(sc->atu_dev)));

                if (mode != MODE_NOFLASHNETCARD) {
                        DPRINTF(("%s: unexpected opmode=%d\n",
                            device_xname(sc->atu_dev), mode));
                }

                /*
                 * There is no difference in opmode before and after external
                 * firmware upload with the SMC2662 V.4 . So instead we'll try
                 * to read the channel number. If we succeed, external
                 * firmwaremust have been already uploaded...
                 */
                if (sc->atu_radio != RadioIntersil) {
                        err = atu_get_mib(sc, MIB_PHY__CHANNEL, &channel);
                        if (!err) {
                                DPRINTF(("%s: external firmware has already"
                                    " been downloaded\n",
                                    device_xname(sc->atu_dev)));
                                atu_complete_attach(sc);
                                return;
                        }
                }

                atu_external_firmware(sc->atu_dev);

                /*
                 * atu_external_firmware will call atu_complete_attach after
                 * it's finished so we can just return.
                 */
        } else {
                /* all the firmwares are in place, so complete the attach */
                atu_complete_attach(sc);
        }

        return;
}

static void
atu_complete_attach(struct atu_softc *sc)
{
        struct ieee80211com                *ic = &sc->sc_ic;
        struct ifnet                        *ifp = &sc->sc_if;
        usb_interface_descriptor_t        *id;
        usb_endpoint_descriptor_t        *ed;
        usbd_status                        err;
        int                                i;
#ifdef ATU_DEBUG
        struct atu_fw                        fw;
#endif

        id = usbd_get_interface_descriptor(sc->atu_iface);

        /* Find endpoints. */
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->atu_iface, i);
                if (!ed) {
                        DPRINTF(("%s: num_endp:%d\n", device_xname(sc->atu_dev),
                            sc->atu_iface->ui_idesc->bNumEndpoints));
                        DPRINTF(("%s: couldn't get ep %d\n",
                            device_xname(sc->atu_dev), i));
                        return;
                }
                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        sc->atu_ed[ATU_ENDPT_RX] = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        sc->atu_ed[ATU_ENDPT_TX] = ed->bEndpointAddress;
                }
        }

        /* read device config & get MAC address */
        err = atu_get_card_config(sc);
        if (err) {
                aprint_error("\n%s: could not get card cfg!\n",
                    device_xname(sc->atu_dev));
                return;
        }

#ifdef ATU_DEBUG
        /* DEBUG : try to get firmware version */
        err = atu_get_mib(sc, MIB_FW_VERSION, sizeof(fw), 0, (uint8_t *)&fw);
        if (!err) {
                DPRINTFN(15, ("%s: firmware: maj:%d min:%d patch:%d "
                    "build:%d\n", device_xname(sc->atu_dev), fw.major,
                        fw.minor, fw.patch, fw.build));
        } else {
                DPRINTF(("%s: get firmware version failed\n",
                    device_xname(sc->atu_dev)));
        }
#endif /* ATU_DEBUG */

        /* Show the world our MAC address */
        aprint_normal_dev(sc->atu_dev, "MAC address %s\n",
            ether_sprintf(ic->ic_myaddr));

        sc->atu_cdata.atu_tx_inuse = 0;
        sc->atu_encrypt = ATU_WEP_OFF;
        sc->atu_wepkeylen = ATU_WEP_104BITS;
        sc->atu_wepkey = 0;

        memset(sc->atu_bssid, 0, ETHER_ADDR_LEN);
        sc->atu_channel = ATU_DEFAULT_CHANNEL;
        sc->atu_desired_channel = IEEE80211_CHAN_ANY;
        sc->atu_mode = INFRASTRUCTURE_MODE;

        ic->ic_ifp = ifp;
        ic->ic_phytype = IEEE80211_T_DS;
        ic->ic_opmode = IEEE80211_M_STA;
        ic->ic_state = IEEE80211_S_INIT;
#ifdef FIXME
        ic->ic_caps = IEEE80211_C_IBSS | IEEE80211_C_WEP | IEEE80211_C_SCANALL;
#else
        ic->ic_caps = IEEE80211_C_IBSS | IEEE80211_C_WEP;
#endif

        i = 0;
        ic->ic_sup_rates[IEEE80211_MODE_11B] = ieee80211_std_rateset_11b;

        for (i = 1; i <= 14; i++) {
                ic->ic_channels[i].ic_flags = IEEE80211_CHAN_B |
                    IEEE80211_CHAN_PASSIVE;
                ic->ic_channels[i].ic_freq = ieee80211_ieee2mhz(i,
                    ic->ic_channels[i].ic_flags);
        }

        ic->ic_ibss_chan = &ic->ic_channels[0];

        ifp->if_softc = sc;
        memcpy(ifp->if_xname, device_xname(sc->atu_dev), IFNAMSIZ);
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_init = atu_init;
        ifp->if_stop = atu_stop;
        ifp->if_start = atu_start;
        ifp->if_ioctl = atu_ioctl;
        ifp->if_watchdog = atu_watchdog;
        ifp->if_mtu = ATU_DEFAULT_MTU;
        IFQ_SET_READY(&ifp->if_snd);

        /* Call MI attach routine. */
        if_attach(ifp);
        ieee80211_ifattach(ic);

        sc->sc_newstate = ic->ic_newstate;
        ic->ic_newstate = atu_newstate;

        /* setup ifmedia interface */
        /* XXX media locking needs revisiting */
        mutex_init(&sc->sc_media_mtx, MUTEX_DEFAULT, IPL_SOFTUSB);
        ieee80211_media_init_with_lock(ic,
            atu_media_change, atu_media_status, &sc->sc_media_mtx);

        usb_init_task(&sc->sc_task, atu_task, sc, 0);

        sc->sc_state = ATU_S_OK;
}

static int
atu_detach(device_t self, int flags)
{
        struct atu_softc *sc = device_private(self);
        struct ifnet                *ifp = &sc->sc_if;

        DPRINTFN(10, ("%s: atu_detach state=%d\n", device_xname(sc->atu_dev),
            sc->sc_state));

        if (sc->sc_state != ATU_S_UNCONFIG) {
                atu_stop(ifp, 1);

                ieee80211_ifdetach(&sc->sc_ic);
                if_detach(ifp);
        }

        return 0;
}

static int
atu_activate(device_t self, enum devact act)
{
        struct atu_softc *sc = device_private(self);

        switch (act) {
        case DVACT_DEACTIVATE:
                if (sc->sc_state != ATU_S_UNCONFIG) {
                        if_deactivate(&sc->atu_ec.ec_if);
                        sc->sc_state = ATU_S_DEAD;
                }
                return 0;
        default:
                return EOPNOTSUPP;
        }
}

/*
 * Initialize an RX descriptor and attach an MBUF cluster.
 */
static int
atu_newbuf(struct atu_softc *sc, struct atu_chain *c, struct mbuf *m)
{
        struct mbuf                *m_new = NULL;

        if (m == NULL) {
                MGETHDR(m_new, M_DONTWAIT, MT_DATA);
                if (m_new == NULL) {
                        DPRINTF(("%s: no memory for rx list\n",
                            device_xname(sc->atu_dev)));
                        return ENOBUFS;
                }

                MCLGET(m_new, M_DONTWAIT);
                if (!(m_new->m_flags & M_EXT)) {
                        DPRINTF(("%s: no memory for rx list\n",
                            device_xname(sc->atu_dev)));
                        m_freem(m_new);
                        return ENOBUFS;
                }
                m_new->m_len = m_new->m_pkthdr.len = MCLBYTES;
        } else {
                m_new = m;
                m_new->m_len = m_new->m_pkthdr.len = MCLBYTES;
                m_new->m_data = m_new->m_ext.ext_buf;
        }
        c->atu_mbuf = m_new;
        return 0;
}

static int
atu_rx_list_init(struct atu_softc *sc)
{
        struct atu_cdata        *cd = &sc->atu_cdata;
        struct atu_chain        *c;
        int                        i;

        DPRINTFN(15, ("%s: atu_rx_list_init: enter\n",
            device_xname(sc->atu_dev)));

        for (i = 0; i < ATU_RX_LIST_CNT; i++) {
                c = &cd->atu_rx_chain[i];
                c->atu_sc = sc;
                c->atu_idx = i;
                if (c->atu_xfer == NULL) {
                        int err = usbd_create_xfer(sc->atu_ep[ATU_ENDPT_RX],
                            ATU_RX_BUFSZ, 0, 0, &c->atu_xfer);
                        if (err)
                                return err;
                        c->atu_buf = usbd_get_buffer(c->atu_xfer);
                        if (atu_newbuf(sc, c, NULL) == ENOBUFS) /* XXX free? */
                                return ENOBUFS;
                }
        }
        return 0;
}

static int
atu_tx_list_init(struct atu_softc *sc)
{
        struct atu_cdata        *cd = &sc->atu_cdata;
        struct atu_chain        *c;
        int                        i;

        DPRINTFN(15, ("%s: atu_tx_list_init\n",
            device_xname(sc->atu_dev)));

        SLIST_INIT(&cd->atu_tx_free);
        sc->atu_cdata.atu_tx_inuse = 0;

        for (i = 0; i < ATU_TX_LIST_CNT; i++) {
                c = &cd->atu_tx_chain[i];
                c->atu_sc = sc;
                c->atu_idx = i;
                if (c->atu_xfer == NULL) {
                        int err = usbd_create_xfer(sc->atu_ep[ATU_ENDPT_TX],
                            ATU_TX_BUFSZ, 0, 0, &c->atu_xfer);
                        if (err) {
                                return err;
                        }
                        c->atu_buf = usbd_get_buffer(c->atu_xfer);
                        SLIST_INSERT_HEAD(&cd->atu_tx_free, c, atu_list);
                }
        }
        return 0;
}

static void
atu_xfer_list_free(struct atu_softc *sc, struct atu_chain *ch, int listlen)
{
        int                        i;

        /* Free resources. */
        for (i = 0; i < listlen; i++) {
                if (ch[i].atu_buf != NULL)
                        ch[i].atu_buf = NULL;
                if (ch[i].atu_mbuf != NULL) {
                        m_freem(ch[i].atu_mbuf);
                        ch[i].atu_mbuf = NULL;
                }
                if (ch[i].atu_xfer != NULL) {
                        usbd_destroy_xfer(ch[i].atu_xfer);
                        ch[i].atu_xfer = NULL;
                }
        }
}

/*
 * A frame has been uploaded: pass the resulting mbuf chain up to
 * the higher level protocols.
 */
static void
atu_rxeof(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct atu_chain        *c = (struct atu_chain *)priv;
        struct atu_softc        *sc = c->atu_sc;
        struct ieee80211com        *ic = &sc->sc_ic;
        struct ifnet                *ifp = &sc->sc_if;
        struct atu_rx_hdr        *h;
        struct ieee80211_frame_min        *wh;
        struct ieee80211_node        *ni;
        struct mbuf                *m;
        uint32_t                len;
        int                        s;

        DPRINTFN(25, ("%s: atu_rxeof\n", device_xname(sc->atu_dev)));

        if (sc->sc_state != ATU_S_OK)
                return;

        if ((ifp->if_flags & (IFF_RUNNING|IFF_UP)) != (IFF_RUNNING|IFF_UP))
                goto done;

        if (status != USBD_NORMAL_COMPLETION) {
                DPRINTF(("%s: status != USBD_NORMAL_COMPLETION\n",
                    device_xname(sc->atu_dev)));
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED) {
                        return;
                }
#if 0
                if (status == USBD_IOERROR) {
                        DPRINTF(("%s: rx: EEK! lost device?\n",
                            device_xname(sc->atu_dev)));

                        /*
                         * My experience with USBD_IOERROR is that trying to
                         * restart the transfer will always fail and we'll
                         * keep on looping restarting transfers untill someone
                         * pulls the plug of the device.
                         * So we don't restart the transfer, but just let it
                         * die... If someone knows of a situation where we can
                         * recover from USBD_IOERROR, let me know.
                         */
                        splx(s);
                        return;
                }
#endif /* 0 */

                if (usbd_ratecheck(&sc->atu_rx_notice)) {
                        DPRINTF(("%s: usb error on rx: %s\n",
                            device_xname(sc->atu_dev), usbd_errstr(status)));
                }
                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(
                            sc->atu_ep[ATU_ENDPT_RX]);
                goto done;
        }

        usbd_get_xfer_status(xfer, NULL, NULL, &len, NULL);

        if (len <= 1) {
                DPRINTF(("%s: atu_rxeof: too short\n",
                    device_xname(sc->atu_dev)));
                goto done;
        } else if (len > MCLBYTES) {
                DPRINTF(("%s: atu_rxeof: too long\n",
                    device_xname(sc->atu_dev)));
                goto done;
        }

        h = (struct atu_rx_hdr *)c->atu_buf;
        len = UGETW(h->length) - 4; /* XXX magic number */

        m = c->atu_mbuf;
        memcpy(mtod(m, char *), c->atu_buf + ATU_RX_HDRLEN, len);
        m_set_rcvif(m, ifp);
        m->m_pkthdr.len = m->m_len = len;

        wh = mtod(m, struct ieee80211_frame_min *);
        ni = ieee80211_find_rxnode(ic, wh);

        if_statinc(ifp, if_ipackets);

        s = splnet();

        if (atu_newbuf(sc, c, NULL) == ENOBUFS) {
                if_statinc(ifp, if_ierrors);
                goto done1; /* XXX if we can't allocate, why restart it? */
        }

        if (wh->i_fc[1] & IEEE80211_FC1_WEP) {
                /*
                 * WEP is decrypted by hardware. Clear WEP bit
                 * header for ieee80211_input().
                 */
                wh->i_fc[1] &= ~IEEE80211_FC1_WEP;
        }

        ieee80211_input(ic, m, ni, h->rssi, UGETDW(h->rx_time));

        ieee80211_free_node(ni);
done1:
        splx(s);
done:
        /* Setup new transfer. */
        usbd_setup_xfer(c->atu_xfer, c, c->atu_buf, ATU_RX_BUFSZ,
            USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, atu_rxeof);
        usbd_transfer(c->atu_xfer);
}

/*
 * A frame was downloaded to the chip. It's safe for us to clean up
 * the list buffers.
 */
static void
atu_txeof(struct usbd_xfer *xfer, void *priv, usbd_status status)
{
        struct atu_chain        *c = (struct atu_chain *)priv;
        struct atu_softc        *sc = c->atu_sc;
        struct ifnet                *ifp = &sc->sc_if;
        usbd_status                err;
        int                        s;

        DPRINTFN(25, ("%s: atu_txeof status=%d\n", device_xname(sc->atu_dev),
            status));

        if (c->atu_mbuf) {
                m_freem(c->atu_mbuf);
                c->atu_mbuf = NULL;
        }

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;

                DPRINTF(("%s: usb error on tx: %s\n",
                        device_xname(sc->atu_dev), usbd_errstr(status)));
                if (status == USBD_STALLED)
                        usbd_clear_endpoint_stall_async(
                                sc->atu_ep[ATU_ENDPT_TX]);
                return;
        }

        usbd_get_xfer_status(c->atu_xfer, NULL, NULL, NULL, &err);

        if (err)
                if_statinc(ifp, if_oerrors);
        else
                if_statinc(ifp, if_opackets);

        s = splnet();
        SLIST_INSERT_HEAD(&sc->atu_cdata.atu_tx_free, c, atu_list);
        sc->atu_cdata.atu_tx_inuse--;
        if (sc->atu_cdata.atu_tx_inuse == 0)
                ifp->if_timer = 0;
        ifp->if_flags &= ~IFF_OACTIVE;
        splx(s);

        atu_start(ifp);
}

static uint8_t
atu_calculate_padding(int size)
{
        size %= 64;

        if (size < 50)
                return 50 - size;
        if (size >=61)
                return 64 + 50 - size;
        return 0;
}

static int
atu_tx_start(struct atu_softc *sc, struct ieee80211_node *ni,
    struct atu_chain *c, struct mbuf *m)
{
        int                        len;
        struct atu_tx_hdr        *h;
        usbd_status                err;
        uint8_t                        pad;

        DPRINTFN(25, ("%s: atu_tx_start\n", device_xname(sc->atu_dev)));

        /* Don't try to send when we're shutting down the driver */
        if (sc->sc_state != ATU_S_OK) {
                m_freem(m);
                return EIO;
        }

        /*
         * Copy the mbuf data into a contiguous buffer, leaving
         * enough room for the atmel headers
         */
        len = m->m_pkthdr.len;

        m_copydata(m, 0, m->m_pkthdr.len, c->atu_buf + ATU_TX_HDRLEN);

        h = (struct atu_tx_hdr *)c->atu_buf;
        memset(h, 0, ATU_TX_HDRLEN);
        USETW(h->length, len);
        h->tx_rate = 4; /* XXX rate = auto */
        len += ATU_TX_HDRLEN;

        pad = atu_calculate_padding(len);
        len += pad;
        h->padding = pad;

        c->atu_length = len;
        c->atu_mbuf = m;

        usbd_setup_xfer(c->atu_xfer, c, c->atu_buf, c->atu_length, 0,
            ATU_TX_TIMEOUT, atu_txeof);

        /* Let's get this thing into the air! */
        c->atu_in_xfer = 1;
        err = usbd_transfer(c->atu_xfer);
        if (err != USBD_IN_PROGRESS) {
                DPRINTFN(25, ("%s: atu_tx_start, err=%d",
                    device_xname(sc->atu_dev), err));
                c->atu_mbuf = NULL;
                m_freem(m);
                return EIO;
        }

        return 0;
}

static void
atu_start(struct ifnet *ifp)
{
        struct atu_softc        *sc = ifp->if_softc;
        struct ieee80211com        *ic = &sc->sc_ic;
        struct atu_cdata        *cd = &sc->atu_cdata;
        struct ieee80211_node        *ni;
        struct atu_chain        *c;
        struct mbuf                *m = NULL;
        int                        s;

        DPRINTFN(25, ("%s: atu_start: enter\n", device_xname(sc->atu_dev)));

        if ((ifp->if_flags & IFF_RUNNING) == 0) {
                return;
        }
        if (ifp->if_flags & IFF_OACTIVE) {
                DPRINTFN(30, ("%s: atu_start: IFF_OACTIVE\n",
                    device_xname(sc->atu_dev)));
                return;
        }

        for (;;) {
                /* grab a TX buffer */
                s = splnet();
                c = SLIST_FIRST(&cd->atu_tx_free);
                if (c != NULL) {
                        SLIST_REMOVE_HEAD(&cd->atu_tx_free, atu_list);
                        cd->atu_tx_inuse++;
                        if (cd->atu_tx_inuse == ATU_TX_LIST_CNT)
                                ifp->if_flags |= IFF_OACTIVE;
                }
                splx(s);
                if (c == NULL) {
                        DPRINTFN(10, ("%s: out of tx xfers\n",
                            device_xname(sc->atu_dev)));
                        ifp->if_flags |= IFF_OACTIVE;
                        break;
                }

                /*
                 * Poll the management queue for frames, it has priority over
                 * normal data frames.
                 */
                IF_DEQUEUE(&ic->ic_mgtq, m);
                if (m == NULL) {
                        DPRINTFN(10, ("%s: atu_start: data packet\n",
                            device_xname(sc->atu_dev)));
                        if (ic->ic_state != IEEE80211_S_RUN) {
                                DPRINTFN(25, ("%s: no data till running\n",
                                    device_xname(sc->atu_dev)));
                                /* put the xfer back on the list */
                                s = splnet();
                                SLIST_INSERT_HEAD(&cd->atu_tx_free, c,
                                    atu_list);
                                cd->atu_tx_inuse--;
                                splx(s);
                                break;
                        }

                        IFQ_DEQUEUE(&ifp->if_snd, m);
                        if (m == NULL) {
                                DPRINTFN(25, ("%s: nothing to send\n",
                                    device_xname(sc->atu_dev)));
                                s = splnet();
                                SLIST_INSERT_HEAD(&cd->atu_tx_free, c,
                                    atu_list);
                                cd->atu_tx_inuse--;
                                splx(s);
                                break;
                        }
                        bpf_mtap(ifp, m, BPF_D_OUT);
                        ni = ieee80211_find_txnode(ic,
                            mtod(m, struct ether_header *)->ether_dhost);
                        if (ni == NULL) {
                                m_freem(m);
                                goto bad;
                        }
                        m = ieee80211_encap(ic, m, ni);
                        if (m == NULL)
                                goto bad;
                } else {
                        DPRINTFN(25, ("%s: atu_start: mgmt packet\n",
                            device_xname(sc->atu_dev)));

                        /*
                         * Hack!  The referenced node pointer is in the
                         * rcvif field of the packet header.  This is
                         * placed there by ieee80211_mgmt_output because
                         * we need to hold the reference with the frame
                         * and there's no other way (other than packet
                         * tags which we consider too expensive to use)
                         * to pass it along.
                         */
                        ni = M_GETCTX(m, struct ieee80211_node *);
                        M_CLEARCTX(m);

                        /* sc->sc_stats.ast_tx_mgmt++; */
                }

                bpf_mtap3(ic->ic_rawbpf, m, BPF_D_OUT);

                if (atu_tx_start(sc, ni, c, m)) {
bad:
                        s = splnet();
                        SLIST_INSERT_HEAD(&cd->atu_tx_free, c,
                            atu_list);
                        cd->atu_tx_inuse--;
                        splx(s);
                        /* if_statinc(ifp, if_oerrors); */
                        if (ni != NULL)
                                ieee80211_free_node(ni);
                        continue;
                }
                ifp->if_timer = 5;
        }
}

static int
atu_init(struct ifnet *ifp)
{
        struct atu_softc        *sc = ifp->if_softc;
        struct ieee80211com        *ic = &sc->sc_ic;
        struct atu_chain        *c;
        usbd_status                err;
        int                        i, s;

        s = splnet();

        DPRINTFN(10, ("%s: atu_init\n", device_xname(sc->atu_dev)));

        if (ifp->if_flags & IFF_RUNNING) {
                splx(s);
                return 0;
        }

        /* Load the multicast filter. */
        /*atu_setmulti(sc); */

        /* Open RX and TX pipes. */
        err = usbd_open_pipe(sc->atu_iface, sc->atu_ed[ATU_ENDPT_RX],
            USBD_EXCLUSIVE_USE, &sc->atu_ep[ATU_ENDPT_RX]);
        if (err) {
                DPRINTF(("%s: open rx pipe failed: %s\n",
                    device_xname(sc->atu_dev), usbd_errstr(err)));
                splx(s);
                return EIO;
        }

        err = usbd_open_pipe(sc->atu_iface, sc->atu_ed[ATU_ENDPT_TX],
            USBD_EXCLUSIVE_USE, &sc->atu_ep[ATU_ENDPT_TX]);
        if (err) {
                DPRINTF(("%s: open tx pipe failed: %s\n",
                    device_xname(sc->atu_dev), usbd_errstr(err)));
                splx(s);
                return EIO;
        }

        /* Init TX ring */
        if (atu_tx_list_init(sc))
                printf("%s: tx list init failed\n", device_xname(sc->atu_dev));

        /* Init RX ring */
        if (atu_rx_list_init(sc))
                printf("%s: rx list init failed\n", device_xname(sc->atu_dev));

        /* Start up the receive pipe. */
        for (i = 0; i < ATU_RX_LIST_CNT; i++) {
                c = &sc->atu_cdata.atu_rx_chain[i];

                usbd_setup_xfer(c->atu_xfer, c, c->atu_buf, ATU_RX_BUFSZ,
                    USBD_SHORT_XFER_OK, USBD_NO_TIMEOUT, atu_rxeof);
                usbd_transfer(c->atu_xfer);
        }

        DPRINTFN(10, ("%s: starting up using MAC=%s\n",
            device_xname(sc->atu_dev), ether_sprintf(ic->ic_myaddr)));

        /* Do initial setup */
        err = atu_initial_config(sc);
        if (err) {
                DPRINTF(("%s: initial config failed!\n",
                    device_xname(sc->atu_dev)));
                splx(s);
                return EIO;
        }
        DPRINTFN(10, ("%s: initialised transceiver\n",
            device_xname(sc->atu_dev)));

        /* sc->atu_rxfilt = ATU_RXFILT_UNICAST|ATU_RXFILT_BROADCAST; */

        /* If we want promiscuous mode, set the allframes bit. */
        /*
        if (ifp->if_flags & IFF_PROMISC)
                sc->atu_rxfilt |= ATU_RXFILT_PROMISC;
        */

        ifp->if_flags |= IFF_RUNNING;
        ifp->if_flags &= ~IFF_OACTIVE;
        splx(s);

        /* XXX the following HAS to be replaced */
        s = splnet();
        err = ieee80211_new_state(ic, IEEE80211_S_SCAN, -1);
        if (err) {
                DPRINTFN(1, ("%s: atu_init: error calling "
                    "ieee80211_net_state", device_xname(sc->atu_dev)));
        }
        splx(s);

        return 0;
}

#if 0 && defined(ATU_DEBUG) /* XXX XXX XXX UNUSED */
static void        atu_debug_print(struct atu_softc *);
static void
atu_debug_print(struct atu_softc *sc)
{
        usbd_status                err;
        uint8_t                        tmp[32];

        /* DEBUG */
        if ((err = atu_get_mib(sc, MIB_MAC_MGMT__CURRENT_BSSID, tmp)))
                return;
        DPRINTF(("%s: DEBUG: current BSSID=%s\n", device_xname(sc->atu_dev),
            ether_sprintf(tmp)));

        if ((err = atu_get_mib(sc, MIB_MAC_MGMT__BEACON_PERIOD, tmp)))
                return;
        DPRINTF(("%s: DEBUG: beacon period=%d\n", device_xname(sc->atu_dev),
            tmp[0]));

        if ((err = atu_get_mib(sc, MIB_MAC_WEP__PRIVACY_INVOKED, tmp)))
                return;
        DPRINTF(("%s: DEBUG: privacy invoked=%d\n", device_xname(sc->atu_dev),
            tmp[0]));

        if ((err = atu_get_mib(sc, MIB_MAC_WEP__ENCR_LEVEL, tmp)))
                return;
        DPRINTF(("%s: DEBUG: encr_level=%d\n", device_xname(sc->atu_dev),
            tmp[0]));

        if ((err = atu_get_mib(sc, MIB_MAC_WEP__ICV_ERROR_COUNT, tmp)))
                return;
        DPRINTF(("%s: DEBUG: icv error count=%d\n", device_xname(sc->atu_dev),
            *(short *)tmp));

        if ((err = atu_get_mib(sc, MIB_MAC_WEP__EXCLUDED_COUNT, tmp)))
                return;
        DPRINTF(("%s: DEBUG: wep excluded count=%d\n",
            device_xname(sc->atu_dev), *(short *)tmp));

        if ((err = atu_get_mib(sc, MIB_MAC_MGMT__POWER_MODE, tmp)))
                return;
        DPRINTF(("%s: DEBUG: power mode=%d\n", device_xname(sc->atu_dev),
            tmp[0]));

        if ((err = atu_get_mib(sc, MIB_PHY__CHANNEL, tmp)))
                return;
        DPRINTF(("%s: DEBUG: channel=%d\n", device_xname(sc->atu_dev), tmp[0]));

        if ((err = atu_get_mib(sc, MIB_PHY__REG_DOMAIN, tmp)))
                return;
        DPRINTF(("%s: DEBUG: reg domain=%d\n", device_xname(sc->atu_dev),
            tmp[0]));

        if ((err = atu_get_mib(sc, MIB_LOCAL__SSID_SIZE, tmp)))
                return;
        DPRINTF(("%s: DEBUG: ssid size=%d\n", device_xname(sc->atu_dev),
            tmp[0]));

        if ((err = atu_get_mib(sc, MIB_LOCAL__BEACON_ENABLE, tmp)))
                return;
        DPRINTF(("%s: DEBUG: beacon enable=%d\n", device_xname(sc->atu_dev),
            tmp[0]));

        if ((err = atu_get_mib(sc, MIB_LOCAL__AUTO_RATE_FALLBACK, tmp)))
                return;
        DPRINTF(("%s: DEBUG: auto rate fallback=%d\n",
            device_xname(sc->atu_dev), tmp[0]));

        if ((err = atu_get_mib(sc, MIB_MAC_ADDR__ADDR, tmp)))
                return;
        DPRINTF(("%s: DEBUG: mac addr=%s\n", device_xname(sc->atu_dev),
            ether_sprintf(tmp)));

        if ((err = atu_get_mib(sc, MIB_MAC__DESIRED_SSID, tmp)))
                return;
        DPRINTF(("%s: DEBUG: desired ssid=%s\n", device_xname(sc->atu_dev),
            tmp));

        if ((err = atu_get_mib(sc, MIB_MAC_MGMT__CURRENT_ESSID, tmp)))
                return;
        DPRINTF(("%s: DEBUG: current ESSID=%s\n", device_xname(sc->atu_dev),
            tmp));
}
#endif /* ATU_DEBUG */

static int
atu_ioctl(struct ifnet *ifp, u_long command, void *data)
{
        struct atu_softc        *sc = ifp->if_softc;
        struct ieee80211com        *ic = &sc->sc_ic;
        int                        err = 0, s;

        s = splnet();
        switch (command) {
        default:
                DPRINTFN(15, ("%s: ieee80211_ioctl (%lu)\n",
                    device_xname(sc->atu_dev), command));
                err = ieee80211_ioctl(ic, command, data);
                break;
        }

        if (err == ENETRESET) {
                if ((ifp->if_flags & (IFF_RUNNING|IFF_UP)) ==
                    (IFF_RUNNING|IFF_UP)) {
                        DPRINTF(("%s: atu_ioctl(): netreset %lu\n",
                            device_xname(sc->atu_dev), command));
                        ieee80211_new_state(ic, IEEE80211_S_INIT, -1);
                        atu_initial_config(sc);
                        ieee80211_new_state(ic, IEEE80211_S_SCAN, -1);
                }
                err = 0;
        }

        splx(s);
        return err;
}

static void
atu_watchdog(struct ifnet *ifp)
{
        struct atu_softc        *sc = ifp->if_softc;
        struct atu_chain        *c;
        usbd_status                stat;
        int                        cnt, s;

        DPRINTF(("%s: atu_watchdog\n", device_xname(sc->atu_dev)));

        ifp->if_timer = 0;

        if (sc->sc_state != ATU_S_OK || (ifp->if_flags & IFF_RUNNING) == 0)
                return;

        sc = ifp->if_softc;
        s = splnet();
        if_statinc(ifp, if_oerrors);
        DPRINTF(("%s: watchdog timeout\n", device_xname(sc->atu_dev)));

        /*
         * TODO:
         * we should change this since we have multiple TX tranfers...
         */
        for (cnt = 0; cnt < ATU_TX_LIST_CNT; cnt++) {
                c = &sc->atu_cdata.atu_tx_chain[cnt];
                if (c->atu_in_xfer) {
                        usbd_get_xfer_status(c->atu_xfer, NULL, NULL, NULL,
                            &stat);
                        atu_txeof(c->atu_xfer, c, stat);
                }
        }

        if (!IFQ_IS_EMPTY(&ifp->if_snd))
                atu_start(ifp);
        splx(s);

        ieee80211_watchdog(&sc->sc_ic);
}

/*
 * Stop the adapter and free any mbufs allocated to the
 * RX and TX lists.
 */
static void
atu_stop(struct ifnet *ifp, int disable)
{
        struct atu_softc        *sc = ifp->if_softc;
        struct ieee80211com        *ic = &sc->sc_ic;
        struct atu_cdata        *cd;
        int s;

        s = splnet();
        ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
        ifp->if_timer = 0;

        usb_rem_task_wait(sc->atu_udev, &sc->sc_task, USB_TASKQ_DRIVER, NULL);
        ieee80211_new_state(ic, IEEE80211_S_INIT, -1);

        /* Stop transfers. */
        if (sc->atu_ep[ATU_ENDPT_RX] != NULL) {
                usbd_abort_pipe(sc->atu_ep[ATU_ENDPT_RX]);
        }

        if (sc->atu_ep[ATU_ENDPT_TX] != NULL) {
                usbd_abort_pipe(sc->atu_ep[ATU_ENDPT_TX]);
        }

        /* Free RX/TX/MGMT list resources. */
        cd = &sc->atu_cdata;
        atu_xfer_list_free(sc, cd->atu_rx_chain, ATU_RX_LIST_CNT);
        atu_xfer_list_free(sc, cd->atu_tx_chain, ATU_TX_LIST_CNT);

        /* Close pipes */
        if (sc->atu_ep[ATU_ENDPT_RX] != NULL) {
                usbd_close_pipe(sc->atu_ep[ATU_ENDPT_RX]);
                sc->atu_ep[ATU_ENDPT_RX] = NULL;
        }

        if (sc->atu_ep[ATU_ENDPT_TX] != NULL) {
                usbd_close_pipe(sc->atu_ep[ATU_ENDPT_TX]);
                sc->atu_ep[ATU_ENDPT_TX] = NULL;
        }

        /* Let's be nice and turn off the radio before we leave */
        atu_switch_radio(sc, 0);

        splx(s);
}






























































































































































































































































































































































































































































































































































































































































































    5 

    3 
    5 

    1 




    1 















    1 










    7 








    6 


    6 



    5 
    7 

    4 


































    4 













    4 































   22 

























   21 
   21 




   21 




    2 

    1 



    2 

    1 
    1 




    2 

    1 







   13 


    1 



    3 




    2 
    1 
    1 
    1 




    2 






    1 


    2 



    4 

    3 







    3 








    1 



    1 



















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
/* $NetBSD: wsmouse.c,v 1.72 2022/07/17 11:44:30 riastradh Exp $ */

/*-
 * Copyright (c) 2006 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Julio M. Merino Vidal.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1996, 1997 Christopher G. Demetriou.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Christopher G. Demetriou
 *        for the NetBSD Project.
 * 4. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1992, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This software was developed by the Computer Systems Engineering group
 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
 * contributed to Berkeley.
 *
 * All advertising materials mentioning features or use of this software
 * must display the following acknowledgement:
 *        This product includes software developed by the University of
 *        California, Lawrence Berkeley Laboratory.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ms.c        8.1 (Berkeley) 6/11/93
 */

/*
 * Mouse driver.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: wsmouse.c,v 1.72 2022/07/17 11:44:30 riastradh Exp $");

#include "wsmouse.h"
#include "wsdisplay.h"
#include "wsmux.h"

#include <sys/param.h>
#include <sys/conf.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <sys/fcntl.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/tty.h>
#include <sys/signalvar.h>
#include <sys/device.h>
#include <sys/vnode.h>
#include <sys/callout.h>

#include <dev/wscons/wsconsio.h>
#include <dev/wscons/wsmousevar.h>
#include <dev/wscons/wseventvar.h>
#include <dev/wscons/wsmuxvar.h>

#include "ioconf.h"

#if defined(WSMUX_DEBUG) && NWSMUX > 0
#define DPRINTF(x)        if (wsmuxdebug) printf x
#define DPRINTFN(n,x)        if (wsmuxdebug > (n)) printf x
extern int wsmuxdebug;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif

#define INVALID_X        INT_MAX
#define INVALID_Y        INT_MAX
#define INVALID_Z        INT_MAX
#define INVALID_W        INT_MAX

struct wsmouse_softc {
        struct wsevsrc        sc_base;

        const struct wsmouse_accessops *sc_accessops;
        void                *sc_accesscookie;

        u_int                sc_mb;                /* mouse button state */
        u_int                sc_ub;                /* user button state */
        int                sc_dx;                /* delta-x */
        int                sc_dy;                /* delta-y */
        int                sc_dz;                /* delta-z */
        int                sc_dw;                /* delta-w */
        int                sc_x;                /* absolute-x */
        int                sc_y;                /* absolute-y */
        int                sc_z;                /* absolute-z */
        int                sc_w;                /* absolute-w */

        int                sc_refcnt;
        u_char                sc_dying;        /* device is being detached */

        struct wsmouse_repeat        sc_repeat;
        int                        sc_repeat_button;
        callout_t                sc_repeat_callout;
        unsigned int                sc_repeat_delay;

        int                        sc_reverse_scroll;
        int                        sc_horiz_scroll_dist;
        int                        sc_vert_scroll_dist;
};

static int  wsmouse_match(device_t, cfdata_t, void *);
static void wsmouse_attach(device_t, device_t, void *);
static int  wsmouse_detach(device_t, int);
static int  wsmouse_activate(device_t, enum devact);

static int  wsmouse_set_params(struct wsmouse_softc *,
                               struct wsmouse_param *, size_t);
static int  wsmouse_get_params(struct wsmouse_softc *,
                               struct wsmouse_param *, size_t);
static int  wsmouse_handle_params(struct wsmouse_softc *,
                                  struct wsmouse_parameters *, bool);

static int  wsmouse_do_ioctl(struct wsmouse_softc *, u_long, void *,
                             int, struct lwp *);

#if NWSMUX > 0
static int  wsmouse_mux_open(struct wsevsrc *, struct wseventvar *);
static int  wsmouse_mux_close(struct wsevsrc *);
#endif

static int  wsmousedoioctl(device_t, u_long, void *, int, struct lwp *);

static int  wsmousedoopen(struct wsmouse_softc *, struct wseventvar *);

CFATTACH_DECL_NEW(wsmouse, sizeof (struct wsmouse_softc),
    wsmouse_match, wsmouse_attach, wsmouse_detach, wsmouse_activate);

static void wsmouse_repeat(void *v);

dev_type_open(wsmouseopen);
dev_type_close(wsmouseclose);
dev_type_read(wsmouseread);
dev_type_ioctl(wsmouseioctl);
dev_type_poll(wsmousepoll);
dev_type_kqfilter(wsmousekqfilter);

const struct cdevsw wsmouse_cdevsw = {
        .d_open = wsmouseopen,
        .d_close = wsmouseclose,
        .d_read = wsmouseread,
        .d_write = nowrite,
        .d_ioctl = wsmouseioctl,
        .d_stop = nostop,
        .d_tty = notty,
        .d_poll = wsmousepoll,
        .d_mmap = nommap,
        .d_kqfilter = wsmousekqfilter,
        .d_discard = nodiscard,
        .d_flag = D_OTHER
};

#if NWSMUX > 0
struct wssrcops wsmouse_srcops = {
        WSMUX_MOUSE,
        wsmouse_mux_open, wsmouse_mux_close, wsmousedoioctl, NULL, NULL
};
#endif

/*
 * Print function (for parent devices).
 */
int
wsmousedevprint(void *aux, const char *pnp)
{

        if (pnp)
                aprint_normal("wsmouse at %s", pnp);
        return (UNCONF);
}

int
wsmouse_match(device_t parent, cfdata_t match, void *aux)
{
        return (1);
}

void
wsmouse_attach(device_t parent, device_t self, void *aux)
{
        struct wsmouse_softc *sc = device_private(self);
        struct wsmousedev_attach_args *ap = aux;
#if NWSMUX > 0
        int mux, error;
#endif

        sc->sc_base.me_dv = self;
        sc->sc_accessops = ap->accessops;
        sc->sc_accesscookie = ap->accesscookie;

        /* Initialize button repeating. */
        memset(&sc->sc_repeat, 0, sizeof(sc->sc_repeat));
        sc->sc_repeat_button = -1;
        sc->sc_repeat_delay = 0;
        sc->sc_reverse_scroll = 0;
        sc->sc_horiz_scroll_dist = WSMOUSE_DEFAULT_SCROLL_DIST;
        sc->sc_vert_scroll_dist = WSMOUSE_DEFAULT_SCROLL_DIST;
        callout_init(&sc->sc_repeat_callout, 0);
        callout_setfunc(&sc->sc_repeat_callout, wsmouse_repeat, sc);

#if NWSMUX > 0
        sc->sc_base.me_ops = &wsmouse_srcops;
        mux = device_cfdata(self)->wsmousedevcf_mux;
        if (mux >= 0) {
                error = wsmux_attach_sc(wsmux_getmux(mux), &sc->sc_base);
                if (error)
                        aprint_error(" attach error=%d", error);
                else
                        aprint_normal(" mux %d", mux);
        }
#else
        if (device_cfdata(self)->wsmousedevcf_mux >= 0)
                aprint_normal(" (mux ignored)");
#endif

        aprint_naive("\n");
        aprint_normal("\n");

        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");
}

int
wsmouse_activate(device_t self, enum devact act)
{
        struct wsmouse_softc *sc = device_private(self);

        if (act == DVACT_DEACTIVATE)
                sc->sc_dying = 1;
        return (0);
}

/*
 * Detach a mouse.  To keep track of users of the softc we keep
 * a reference count that's incremented while inside, e.g., read.
 * If the mouse is active and the reference count is > 0 (0 is the
 * normal state) we post an event and then wait for the process
 * that had the reference to wake us up again.  Then we blow away the
 * vnode and return (which will deallocate the softc).
 */
int
wsmouse_detach(device_t self, int flags)
{
        struct wsmouse_softc *sc = device_private(self);
        struct wseventvar *evar;
        int maj, mn;
        int s;

#if NWSMUX > 0
        /* Tell parent mux we're leaving. */
        if (sc->sc_base.me_parent != NULL) {
                DPRINTF(("wsmouse_detach:\n"));
                wsmux_detach_sc(&sc->sc_base);
        }
#endif

        /* If we're open ... */
        evar = sc->sc_base.me_evp;
        if (evar != NULL && evar->io != NULL) {
                s = spltty();
                if (--sc->sc_refcnt >= 0) {
                        struct wscons_event event;

                        /* Wake everyone by generating a dummy event. */
                        event.type = 0;
                        event.value = 0;
                        if (wsevent_inject(evar, &event, 1) != 0)
                                wsevent_wakeup(evar);

                        /* Wait for processes to go away. */
                        if (tsleep(sc, PZERO, "wsmdet", hz * 60))
                                printf("wsmouse_detach: %s didn't detach\n",
                                       device_xname(self));
                }
                splx(s);
        }

        /* locate the major number */
        maj = cdevsw_lookup_major(&wsmouse_cdevsw);

        /* Nuke the vnodes for any open instances (calls close). */
        mn = device_unit(self);
        vdevgone(maj, mn, mn, VCHR);

        return (0);
}

void
wsmouse_input(device_t wsmousedev, u_int btns /* 0 is up */,
        int x, int y, int z, int w, u_int flags)
{
        struct wsmouse_softc *sc = device_private(wsmousedev);
        struct wseventvar *evar;
        int mb, ub, d, nevents;
        /* one for each dimension (4) + a bit for each button */
        struct wscons_event events[4 + sizeof(d) * 8];

        /*
         * Discard input if not open.
         */
        evar = sc->sc_base.me_evp;
        if (evar == NULL)
                return;

#ifdef DIAGNOSTIC
        if (evar->q == NULL) {
                printf("wsmouse_input: evar->q=NULL\n");
                return;
        }
#endif

#if NWSMUX > 0
        DPRINTFN(5,("wsmouse_input: %s mux=%p, evar=%p\n",
                    device_xname(sc->sc_base.me_dv),
                    sc->sc_base.me_parent, evar));
#endif

        sc->sc_mb = btns;
        if (!(flags & WSMOUSE_INPUT_ABSOLUTE_X))
                sc->sc_dx += x;
        if (!(flags & WSMOUSE_INPUT_ABSOLUTE_Y))
                sc->sc_dy += y;
        if (!(flags & WSMOUSE_INPUT_ABSOLUTE_Z))
                sc->sc_dz += z;
        if (!(flags & WSMOUSE_INPUT_ABSOLUTE_W))
                sc->sc_dw += w;

        /*
         * We have at least one event (mouse button, delta-X, or
         * delta-Y; possibly all three, and possibly three separate
         * button events).  Deliver these events until we are out
         * of changes or out of room.  As events get delivered,
         * mark them `unchanged'.
         */
        ub = sc->sc_ub;
        nevents = 0;

        if (flags & WSMOUSE_INPUT_ABSOLUTE_X) {
                if (sc->sc_x != x) {
                        events[nevents].type = WSCONS_EVENT_MOUSE_ABSOLUTE_X;
                        events[nevents].value = x;
                        nevents++;
                }
        } else {
                if (sc->sc_dx) {
                        events[nevents].type = WSCONS_EVENT_MOUSE_DELTA_X;
                        events[nevents].value = sc->sc_dx;
                        nevents++;
                }
        }
        if (flags & WSMOUSE_INPUT_ABSOLUTE_Y) {
                if (sc->sc_y != y) {
                        events[nevents].type = WSCONS_EVENT_MOUSE_ABSOLUTE_Y;
                        events[nevents].value = y;
                        nevents++;
                }
        } else {
                if (sc->sc_dy) {
                        events[nevents].type = WSCONS_EVENT_MOUSE_DELTA_Y;
                        events[nevents].value = sc->sc_dy;
                        nevents++;
                }
        }
        if (flags & WSMOUSE_INPUT_ABSOLUTE_Z) {
                if (sc->sc_z != z) {
                        events[nevents].type = WSCONS_EVENT_MOUSE_ABSOLUTE_Z;
                        events[nevents].value = z;
                        nevents++;
                }
        } else {
                if (sc->sc_dz) {
                        events[nevents].type = WSCONS_EVENT_MOUSE_DELTA_Z;
                        events[nevents].value = sc->sc_dz;
                        nevents++;
                }
        }
        if (flags & WSMOUSE_INPUT_ABSOLUTE_W) {
                if (sc->sc_w != w) {
                        events[nevents].type = WSCONS_EVENT_MOUSE_ABSOLUTE_W;
                        events[nevents].value = w;
                        nevents++;
                }
        } else {
                if (sc->sc_dw) {
                        events[nevents].type = WSCONS_EVENT_MOUSE_DELTA_W;
                        events[nevents].value = sc->sc_dw;
                        nevents++;
                }
        }

        mb = sc->sc_mb;
        while ((d = mb ^ ub) != 0) {
                int btnno;

                /*
                 * Cancel button repeating if button status changed.
                 */
                if (sc->sc_repeat_button != -1) {
                        KASSERT(sc->sc_repeat_button >= 0);
                        KASSERT(sc->sc_repeat.wr_buttons &
                            (1 << sc->sc_repeat_button));
                        ub &= ~(1 << sc->sc_repeat_button);
                        sc->sc_repeat_button = -1;
                        callout_stop(&sc->sc_repeat_callout);
                }

                /*
                 * Mouse button change.  Find the first change and drop
                 * it into the event queue.
                 */
                btnno = ffs(d) - 1;
                KASSERT(btnno >= 0);

                if (nevents >= __arraycount(events)) {
                        aprint_error_dev(sc->sc_base.me_dv,
                            "Event queue full (button status mb=0x%x"
                            " ub=0x%x)\n", mb, ub);
                        break;
                }

                events[nevents].type =
                    (mb & d) ? WSCONS_EVENT_MOUSE_DOWN : WSCONS_EVENT_MOUSE_UP;
                events[nevents].value = btnno;
                nevents++;

                ub ^= (1 << btnno);

                /*
                 * Program button repeating if configured for this button.
                 */
                if ((mb & d) && (sc->sc_repeat.wr_buttons & (1 << btnno)) &&
                    sc->sc_repeat.wr_delay_first > 0) {
                        sc->sc_repeat_button = btnno;
                        sc->sc_repeat_delay = sc->sc_repeat.wr_delay_first;
                        callout_schedule(&sc->sc_repeat_callout,
                            mstohz(sc->sc_repeat_delay));
                }
        }

        if (nevents == 0 || wsevent_inject(evar, events, nevents) == 0) {
                /* All events were correctly injected into the queue.
                 * Synchronize the mouse's status with what the user
                 * has received. */
                sc->sc_x = x; sc->sc_dx = 0;
                sc->sc_y = y; sc->sc_dy = 0;
                sc->sc_z = z; sc->sc_dz = 0;
                sc->sc_w = w; sc->sc_dw = 0;
                sc->sc_ub = ub;
#if NWSMUX > 0
                DPRINTFN(5,("wsmouse_input: %s wakeup evar=%p\n",
                            device_xname(sc->sc_base.me_dv), evar));
#endif
        }
}

void
wsmouse_precision_scroll(device_t wsmousedev, int x, int y)
{
        struct wsmouse_softc *sc = device_private(wsmousedev);
        struct wseventvar *evar;
        struct wscons_event events[2];
        int nevents = 0;

        evar = sc->sc_base.me_evp;
        if (evar == NULL)
                return;

        if (sc->sc_reverse_scroll) {
                x = -x;
                y = -y;
        }

        x = (x * 4096) / sc->sc_horiz_scroll_dist;
        y = (y * 4096) / sc->sc_vert_scroll_dist;

        if (x != 0) {
                events[nevents].type = WSCONS_EVENT_HSCROLL;
                events[nevents].value = x;
                nevents++;
        }

        if (y != 0) {
                events[nevents].type = WSCONS_EVENT_VSCROLL;
                events[nevents].value = y;
                nevents++;
        }

        (void)wsevent_inject(evar, events, nevents);
}

static void
wsmouse_repeat(void *v)
{
        int oldspl;
        unsigned int newdelay;
        struct wsmouse_softc *sc;
        struct wscons_event events[2];

        oldspl = spltty();
        sc = (struct wsmouse_softc *)v;

        if (sc->sc_repeat_button == -1) {
                /* Race condition: a "button up" event came in when
                 * this function was already called but did not do
                 * spltty() yet. */
                splx(oldspl);
                return;
        }
        KASSERT(sc->sc_repeat_button >= 0);

        KASSERT(sc->sc_repeat.wr_buttons & (1 << sc->sc_repeat_button));

        newdelay = sc->sc_repeat_delay;

        events[0].type = WSCONS_EVENT_MOUSE_UP;
        events[0].value = sc->sc_repeat_button;
        events[1].type = WSCONS_EVENT_MOUSE_DOWN;
        events[1].value = sc->sc_repeat_button;

        if (wsevent_inject(sc->sc_base.me_evp, events, 2) == 0) {
                sc->sc_ub = 1 << sc->sc_repeat_button;

                if (newdelay - sc->sc_repeat.wr_delay_decrement <
                    sc->sc_repeat.wr_delay_minimum)
                        newdelay = sc->sc_repeat.wr_delay_minimum;
                else if (newdelay > sc->sc_repeat.wr_delay_minimum)
                        newdelay -= sc->sc_repeat.wr_delay_decrement;
                KASSERT(newdelay >= sc->sc_repeat.wr_delay_minimum);
                KASSERT(newdelay <= sc->sc_repeat.wr_delay_first);
        }

        /*
         * Reprogram the repeating event.
         */
        sc->sc_repeat_delay = newdelay;
        callout_schedule(&sc->sc_repeat_callout, mstohz(newdelay));

        splx(oldspl);
}

static int
wsmouse_set_params(struct wsmouse_softc *sc,
    struct wsmouse_param *buf, size_t nparams)
{
        size_t i = 0;

        for (i = 0; i < nparams; ++i) {
                switch (buf[i].key) {
                case WSMOUSECFG_REVERSE_SCROLLING:
                        sc->sc_reverse_scroll = (buf[i].value != 0);
                        break;
                case WSMOUSECFG_HORIZSCROLLDIST:
                        sc->sc_horiz_scroll_dist = buf[i].value;
                        break;
                case WSMOUSECFG_VERTSCROLLDIST:
                        sc->sc_vert_scroll_dist = buf[i].value;
                        break;
                }
        }
        return 0;
}

static int
wsmouse_get_params(struct wsmouse_softc *sc,
    struct wsmouse_param *buf, size_t nparams)
{
        size_t i = 0;

        for (i = 0; i < nparams; ++i) {
                switch (buf[i].key) {
                case WSMOUSECFG_REVERSE_SCROLLING:
                        buf[i].value = sc->sc_reverse_scroll;
                        break;
                case WSMOUSECFG_HORIZSCROLLDIST:
                        buf[i].value = sc->sc_horiz_scroll_dist;
                        break;
                case WSMOUSECFG_VERTSCROLLDIST:
                        buf[i].value = sc->sc_vert_scroll_dist;
                        break;
                }
        }
        return 0;
}

static int
wsmouse_handle_params(struct wsmouse_softc *sc, struct wsmouse_parameters *upl,
    bool set)
{
        size_t len;
        struct wsmouse_param *buf;
        int error = 0;

        if (upl->params == NULL || upl->nparams > WSMOUSECFG_MAX)
                return EINVAL;
        if (upl->nparams == 0)
                return 0;

        len = upl->nparams * sizeof(struct wsmouse_param);

        buf = kmem_alloc(len, KM_SLEEP);
        if (buf == NULL)
                return ENOMEM;
        if ((error = copyin(upl->params, buf, len)) != 0)
                goto error;

        if (set) {
                error = wsmouse_set_params(sc, buf, upl->nparams);
                if (error != 0)
                        goto error;
        } else {
                error = wsmouse_get_params(sc, buf, upl->nparams);
                if (error != 0)
                        goto error;
                if ((error = copyout(buf, upl->params, len)) != 0)
                        goto error;
        }

error:
        kmem_free(buf, len);
        return error;
}

int
wsmouseopen(dev_t dev, int flags, int mode, struct lwp *l)
{
        struct wsmouse_softc *sc;
        struct wseventvar *evar;
        int error;

        sc = device_lookup_private(&wsmouse_cd, minor(dev));
        if (sc == NULL)
                return ENXIO;

#if NWSMUX > 0
        DPRINTF(("wsmouseopen: %s mux=%p p=%p\n", device_xname(sc->sc_base.me_dv),
                 sc->sc_base.me_parent, l));
#endif

        if (sc->sc_dying)
                return (EIO);

        if ((flags & (FREAD | FWRITE)) == FWRITE)
                return (0);                        /* always allow open for write
                                                   so ioctl() is possible. */

        if (sc->sc_base.me_evp != NULL)
                return (EBUSY);

        evar = &sc->sc_base.me_evar;
        wsevent_init(evar, l->l_proc);
        sc->sc_base.me_evp = evar;

        error = wsmousedoopen(sc, evar);
        if (error) {
                DPRINTF(("wsmouseopen: %s open failed\n",
                         device_xname(sc->sc_base.me_dv)));
                sc->sc_base.me_evp = NULL;
                wsevent_fini(evar);
        }
        return (error);
}

int
wsmouseclose(dev_t dev, int flags, int mode,
    struct lwp *l)
{
        struct wsmouse_softc *sc =
            device_lookup_private(&wsmouse_cd, minor(dev));
        struct wseventvar *evar = sc->sc_base.me_evp;

        if (evar == NULL)
                /* not open for read */
                return (0);
        sc->sc_base.me_evp = NULL;
        (*sc->sc_accessops->disable)(sc->sc_accesscookie);
        wsevent_fini(evar);

        return (0);
}

int
wsmousedoopen(struct wsmouse_softc *sc, struct wseventvar *evp)
{
        sc->sc_base.me_evp = evp;
        sc->sc_x = INVALID_X;
        sc->sc_y = INVALID_Y;
        sc->sc_z = INVALID_Z;
        sc->sc_w = INVALID_W;

        /* Stop button repeating when messing with the device. */
        if (sc->sc_repeat_button != -1) {
                KASSERT(sc->sc_repeat_button >= 0);
                sc->sc_repeat_button = -1;
                callout_stop(&sc->sc_repeat_callout);
        }

        /* enable the device, and punt if that's not possible */
        return (*sc->sc_accessops->enable)(sc->sc_accesscookie);
}

int
wsmouseread(dev_t dev, struct uio *uio, int flags)
{
        struct wsmouse_softc *sc =
            device_lookup_private(&wsmouse_cd, minor(dev));
        int error;

        if (sc->sc_dying)
                return (EIO);

#ifdef DIAGNOSTIC
        if (sc->sc_base.me_evp == NULL) {
                printf("wsmouseread: evp == NULL\n");
                return (EINVAL);
        }
#endif

        sc->sc_refcnt++;
        error = wsevent_read(sc->sc_base.me_evp, uio, flags);
        if (--sc->sc_refcnt < 0) {
                wakeup(sc);
                error = EIO;
        }
        return (error);
}

int
wsmouseioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
        return (wsmousedoioctl(device_lookup(&wsmouse_cd, minor(dev)),
                               cmd, data, flag, l));
}

/* A wrapper around the ioctl() workhorse to make reference counting easy. */
int
wsmousedoioctl(device_t dv, u_long cmd, void *data, int flag,
               struct lwp *l)
{
        struct wsmouse_softc *sc = device_private(dv);
        int error;

        sc->sc_refcnt++;
        error = wsmouse_do_ioctl(sc, cmd, data, flag, l);
        if (--sc->sc_refcnt < 0)
                wakeup(sc);
        return (error);
}

int
wsmouse_do_ioctl(struct wsmouse_softc *sc, u_long cmd, void *data,
                 int flag, struct lwp *l)
{
        int error;
        struct wsmouse_repeat *wr;

        if (sc->sc_dying)
                return (EIO);

        /*
         * Try the generic ioctls that the wsmouse interface supports.
         */
        switch (cmd) {
        case FIONBIO:                /* we will remove this someday (soon???) */
                return (0);

        case FIOASYNC:
                if (sc->sc_base.me_evp == NULL)
                        return (EINVAL);
                sc->sc_base.me_evp->async = *(int *)data != 0;
                return (0);

        case FIOSETOWN:
                if (sc->sc_base.me_evp == NULL)
                        return (EINVAL);
                if (-*(int *)data != sc->sc_base.me_evp->io->p_pgid
                    && *(int *)data != sc->sc_base.me_evp->io->p_pid)
                        return (EPERM);
                return (0);

        case TIOCSPGRP:
                if (sc->sc_base.me_evp == NULL)
                        return (EINVAL);
                if (*(int *)data != sc->sc_base.me_evp->io->p_pgid)
                        return (EPERM);
                return (0);
        }

        /*
         * Try the wsmouse specific ioctls.
         */
        switch (cmd) {
        case WSMOUSEIO_GETREPEAT:
                wr = (struct wsmouse_repeat *)data;
                memcpy(wr, &sc->sc_repeat, sizeof(sc->sc_repeat));
                return 0;

        case WSMOUSEIO_SETREPEAT:
                if ((flag & FWRITE) == 0)
                        return EACCES;

                /* Validate input data. */
                wr = (struct wsmouse_repeat *)data;
                if (wr->wr_delay_first != 0 &&
                    (wr->wr_delay_first < wr->wr_delay_decrement ||
                     wr->wr_delay_first < wr->wr_delay_minimum ||
                     wr->wr_delay_first < wr->wr_delay_minimum +
                     wr->wr_delay_decrement))
                        return EINVAL;

                /* Stop current repeating and set new data. */
                sc->sc_repeat_button = -1;
                callout_stop(&sc->sc_repeat_callout);
                memcpy(&sc->sc_repeat, wr, sizeof(sc->sc_repeat));

                return 0;

        case WSMOUSEIO_SETVERSION:
                return wsevent_setversion(sc->sc_base.me_evp, *(int *)data);

        case WSMOUSEIO_GETPARAMS:
                return wsmouse_handle_params(sc,
                    (struct wsmouse_parameters *)data, false);

        case WSMOUSEIO_SETPARAMS:
                if ((flag & FWRITE) == 0)
                        return EACCES;
                return wsmouse_handle_params(sc,
                    (struct wsmouse_parameters *)data, true);
        }

        /*
         * Try the mouse driver for WSMOUSEIO ioctls.  It returns -1
         * if it didn't recognize the request.
         */
        error = (*sc->sc_accessops->ioctl)(sc->sc_accesscookie, cmd,
            data, flag, l);
        return (error); /* may be EPASSTHROUGH */
}

int
wsmousepoll(dev_t dev, int events, struct lwp *l)
{
        struct wsmouse_softc *sc =
            device_lookup_private(&wsmouse_cd, minor(dev));

        if (sc->sc_base.me_evp == NULL)
                return (POLLERR);
        return (wsevent_poll(sc->sc_base.me_evp, events, l));
}

int
wsmousekqfilter(dev_t dev, struct knote *kn)
{
        struct wsmouse_softc *sc =
            device_lookup_private(&wsmouse_cd, minor(dev));

        if (sc->sc_base.me_evp == NULL)
                return (1);
        return (wsevent_kqfilter(sc->sc_base.me_evp, kn));
}

#if NWSMUX > 0
int
wsmouse_mux_open(struct wsevsrc *me, struct wseventvar *evp)
{
        struct wsmouse_softc *sc = (struct wsmouse_softc *)me;

        if (sc->sc_base.me_evp != NULL)
                return (EBUSY);

        return wsmousedoopen(sc, evp);
}

int
wsmouse_mux_close(struct wsevsrc *me)
{
        struct wsmouse_softc *sc = (struct wsmouse_softc *)me;

        sc->sc_base.me_evp = NULL;
        (*sc->sc_accessops->disable)(sc->sc_accesscookie);

        return (0);
}

int
wsmouse_add_mux(int unit, struct wsmux_softc *muxsc)
{
        struct wsmouse_softc *sc;

        sc = device_lookup_private(&wsmouse_cd, unit);
        if (sc == NULL)
                return ENXIO;

        if (sc->sc_base.me_parent != NULL || sc->sc_base.me_evp != NULL)
                return (EBUSY);

        return (wsmux_attach_sc(muxsc, &sc->sc_base));
}
#endif /* NWSMUX > 0 */



































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
/*        $NetBSD: timevar.h,v 1.48 2021/07/20 08:37:20 skrll Exp $        */

/*
 *  Copyright (c) 2005, 2008, 2020 The NetBSD Foundation, Inc.
 *  All rights reserved.
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *
 *  THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 *  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 *  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 *  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 *  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 *  POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)time.h        8.5 (Berkeley) 5/4/95
 */

#ifndef _SYS_TIMEVAR_H_
#define _SYS_TIMEVAR_H_

#include <sys/callout.h>
#include <sys/queue.h>
#include <sys/signal.h>
#include <sys/systm.h>

struct itimer;
LIST_HEAD(itlist, itimer);

/*
 * Interval timer operations vector.
 *
 * Required fields:
 *
 *        - ito_fire: A function to be called when the itimer fires.
 *          The timer implementation should perform whatever processing
 *          is necessary for that timer type.
 *
 * Optional fields:
 *
 *        - ito_realtime_changed: A function that is called when the system
 *          time (CLOCK_REALTIME) is called.
 */
struct itimer_ops {
        void        (*ito_fire)(struct itimer *);
        void        (*ito_realtime_changed)(struct itimer *);
};

/*
 * Common interval timer data.
 */
struct itimer {
        union {
                struct {
                        callout_t                it_ch;
                        LIST_ENTRY(itimer)        it_rtchgq;
                } it_real;
                struct {
                        struct itlist                *it_vlist;
                        LIST_ENTRY(itimer)        it_list;
                        bool                        it_active;
                } it_virtual;
        };
        const struct itimer_ops *it_ops;
        struct itimerspec it_time;
        clockid_t it_clockid;
        int        it_overruns;        /* Overruns currently accumulating */
        bool        it_dying;
};

#define        it_ch                it_real.it_ch
#define        it_rtchgq        it_real.it_rtchgq

#define        it_vlist        it_virtual.it_vlist
#define        it_list                it_virtual.it_list
#define        it_active        it_virtual.it_active

/*
 * Structure used to manage timers in a process.
 */
struct ptimer {
        struct itimer pt_itimer;/* common interval timer data */

        TAILQ_ENTRY(ptimer) pt_chain; /* link in signalling queue */
        struct        sigevent pt_ev;        /* event notification info */
        int        pt_poverruns;        /* Overruns associated w/ a delivery */
        int        pt_entry;        /* slot in proc's timer table */
        struct proc *pt_proc;        /* associated process */
        bool        pt_queued;        /* true if linked into signalling queue */
};

#define        TIMER_MIN        4        /* [0..3] are reserved for setitimer(2) */
                                /* REAL=0,VIRTUAL=1,PROF=2,MONOTONIC=3 */
#define        TIMER_MAX        36        /* 32 is minimum user timers per POSIX */
#define        TIMERS_ALL        0
#define        TIMERS_POSIX        1

struct ptimers {
        struct itlist pts_virtual;
        struct itlist pts_prof;
        struct itimer *pts_timers[TIMER_MAX];
};

/*
 * Functions for looking at our clock: [get]{bin,nano,micro}[up]time()
 *
 * Functions without the "get" prefix returns the best timestamp
 * we can produce in the given format.
 *
 * "bin"   == struct bintime  == seconds + 64 bit fraction of seconds.
 * "nano"  == struct timespec == seconds + nanoseconds.
 * "micro" == struct timeval  == seconds + microseconds.
 *
 * Functions containing "up" returns time relative to boot and
 * should be used for calculating time intervals.
 *
 * Functions without "up" returns GMT time.
 *
 * Functions with the "get" prefix returns a less precise result
 * much faster than the functions without "get" prefix and should
 * be used where a precision of 1/HZ (eg 10 msec on a 100HZ machine)
 * is acceptable or where performance is priority.
 * (NB: "precision", _not_ "resolution" !)
 *
 */

void        binuptime(struct bintime *);
void        nanouptime(struct timespec *);
void        microuptime(struct timeval *);

void        bintime(struct bintime *);
void        nanotime(struct timespec *);
void        microtime(struct timeval *);

void        getbinuptime(struct bintime *);
void        getnanouptime(struct timespec *);
void        getmicrouptime(struct timeval *);

void        getbintime(struct bintime *);
void        getnanotime(struct timespec *);
void        getmicrotime(struct timeval *);

void        getbinboottime(struct bintime *);
void        getnanoboottime(struct timespec *);
void        getmicroboottime(struct timeval *);

/* Other functions */
int        ts2timo(clockid_t, int, struct timespec *, int *, struct timespec *);
void        adjtime1(const struct timeval *, struct timeval *, struct proc *);
int        clock_getres1(clockid_t, struct timespec *);
int        clock_gettime1(clockid_t, struct timespec *);
int        clock_settime1(struct proc *, clockid_t, const struct timespec *, bool);
void        clock_timeleft(clockid_t, struct timespec *, struct timespec *);
int        dogetitimer(struct proc *, int, struct itimerval *);
int        dosetitimer(struct proc *, int, struct itimerval *);
int        dotimer_gettime(int, struct proc *, struct itimerspec *);
int        dotimer_settime(int, struct itimerspec *, struct itimerspec *, int,
            struct proc *);
int        tshzto(const struct timespec *);
int        tshztoup(const struct timespec *);
int        tvhzto(const struct timeval *);
void        inittimecounter(void);
int        itimerfix(struct timeval *);
int        itimespecfix(struct timespec *);
int        ppsratecheck(struct timeval *, int *, int);
int        ratecheck(struct timeval *, const struct timeval *);
int        settime(struct proc *p, struct timespec *);
int        nanosleep1(struct lwp *, clockid_t, int, struct timespec *,
            struct timespec *);
int        settimeofday1(const struct timeval *, bool,
            const void *, struct lwp *, bool);
int        timer_create1(timer_t *, clockid_t, struct sigevent *, copyin_t,
            struct lwp *);
int        tstohz(const struct timespec *);
int        tvtohz(const struct timeval *);
int        inittimeleft(struct timespec *, struct timespec *);
int        gettimeleft(struct timespec *, struct timespec *);
void        timerupcall(struct lwp *);
void        time_init(void);
bool        time_wraps(struct timespec *, struct timespec *);

void        itimer_init(struct itimer *, const struct itimer_ops *,
            clockid_t, struct itlist *);
void        itimer_poison(struct itimer *);
void        itimer_fini(struct itimer *);

void        itimer_lock(void);
void        itimer_unlock(void);
bool        itimer_lock_held(void);                /* for diagnostic assertions only */
int        itimer_settime(struct itimer *);
void        itimer_gettime(const struct itimer *, struct itimerspec *);

void        ptimer_tick(struct lwp *, bool);
void        ptimers_free(struct proc *, int);

extern volatile time_t time_second;        /* current second in the epoch */
extern volatile time_t time_uptime;        /* system uptime in seconds */

#define        DEFAULT_TIMEOUT_EPSILON                                                      \
        (&(const struct bintime) {                                              \
                .sec = 0,                                                      \
                .frac = ((uint64_t)1 << 32)/hz << 32,                              \
        })

static __inline time_t time_mono_to_wall(time_t t)
{

        return t - time_uptime + time_second;
}

static __inline time_t time_wall_to_mono(time_t t)
{

        return t - time_second + time_uptime;
}

#endif /* !_SYS_TIMEVAR_H_ */























































































































































    1 
    1 
    1 





   29 
   50 
   45 























































































    1 
    1 
















































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
/*        $NetBSD: in_proto.c,v 1.130 2018/09/14 05:09:51 maxv Exp $        */

/*
 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * Copyright (c) 1982, 1986, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)in_proto.c        8.2 (Berkeley) 2/9/95
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: in_proto.c,v 1.130 2018/09/14 05:09:51 maxv Exp $");

#ifdef _KERNEL_OPT
#include "opt_mrouting.h"
#include "opt_inet.h"
#include "opt_ipsec.h"
#include "opt_pim.h"
#include "opt_gateway.h"
#include "opt_dccp.h"
#include "opt_sctp.h"
#include "opt_compat_netbsd.h"
#include "opt_net_mpsafe.h"
#endif

#include <sys/param.h>
#include <sys/socket.h>
#include <sys/protosw.h>
#include <sys/domain.h>
#include <sys/mbuf.h>

#include <net/if.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/ip_icmp.h>
#include <netinet/in_ifattach.h>
#include <netinet/in_pcb.h>
#include <netinet/in_proto.h>

#ifdef INET6
#ifndef INET
#include <netinet/in.h>
#endif
#include <netinet/ip6.h>
#endif

#include <netinet/igmp_var.h>
#ifdef PIM
#include <netinet/pim_var.h>
#endif
#include <netinet/tcp.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_debug.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <netinet/ip_encap.h>

#ifdef DCCP
#include <netinet/dccp.h>
#include <netinet/dccp_var.h>
#endif

#ifdef SCTP
#include <netinet/sctp.h>
#include <netinet/sctp_var.h>
#endif

/*
 * TCP/IP protocol family: IP, ICMP, UDP, TCP.
 */

#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/key.h>
#endif        /* IPSEC */

#include "carp.h"
#if NCARP > 0
#include <netinet/ip_carp.h>
#endif

#include "pfsync.h"
#if NPFSYNC > 0
#include <net/pfvar.h>
#include <net/if_pfsync.h>
#endif

DOMAIN_DEFINE(inetdomain);        /* forward declare and add to link set */

/* Wrappers to acquire kernel_lock. */

PR_WRAP_CTLINPUT(rip_ctlinput)
PR_WRAP_CTLINPUT(udp_ctlinput)
PR_WRAP_CTLINPUT(tcp_ctlinput)

#define        rip_ctlinput        rip_ctlinput_wrapper
#define        udp_ctlinput        udp_ctlinput_wrapper
#define        tcp_ctlinput        tcp_ctlinput_wrapper

PR_WRAP_CTLOUTPUT(rip_ctloutput)
PR_WRAP_CTLOUTPUT(udp_ctloutput)
PR_WRAP_CTLOUTPUT(tcp_ctloutput)

#define        rip_ctloutput        rip_ctloutput_wrapper
#define        udp_ctloutput        udp_ctloutput_wrapper
#define        tcp_ctloutput        tcp_ctloutput_wrapper

#ifdef DCCP
PR_WRAP_CTLINPUT(dccp_ctlinput)
PR_WRAP_CTLOUTPUT(dccp_ctloutput)

#define dccp_ctlinput        dccp_ctlinput_wrapper
#define dccp_ctloutput        dccp_ctloutput_wrapper
#endif

#ifdef SCTP
PR_WRAP_CTLINPUT(sctp_ctlinput)
PR_WRAP_CTLOUTPUT(sctp_ctloutput)

#define sctp_ctlinput        sctp_ctlinput_wrapper
#define sctp_ctloutput        sctp_ctloutput_wrapper
#endif

#ifdef NET_MPSAFE
PR_WRAP_INPUT(udp_input)
PR_WRAP_INPUT(tcp_input)
#ifdef DCCP
PR_WRAP_INPUT(dccp_input)
#endif
#ifdef SCTP
PR_WRAP_INPUT(sctp_input)
#endif
PR_WRAP_INPUT(rip_input)
#if NPFSYNC > 0
PR_WRAP_INPUT(pfsync_input)
#endif
PR_WRAP_INPUT(igmp_input)
#ifdef PIM
PR_WRAP_INPUT(pim_input)
#endif

#define        udp_input                udp_input_wrapper
#define        tcp_input                tcp_input_wrapper
#define        dccp_input                dccp_input_wrapper
#define        sctp_input                sctp_input_wrapper
#define        rip_input                rip_input_wrapper
#define        pfsync_input                pfsync_input_wrapper
#define        igmp_input                igmp_input_wrapper
#define        pim_input                pim_input_wrapper
#endif

#if defined(IPSEC)

#ifdef IPSEC_RUMPKERNEL
/*
 * .pr_input = ipsec4_common_input won't be resolved on loading
 * the ipsec shared library. We need a wrapper anyway.
 */
static void
ipsec4_common_input_wrapper(struct mbuf *m, int off, int proto)
{

        if (ipsec_enabled) {
                ipsec4_common_input(m, off, proto);
        } else {
                m_freem(m);
        }
}
#define        ipsec4_common_input        ipsec4_common_input_wrapper

/* The ctlinput functions may not be loaded */
#define        IPSEC_WRAP_CTLINPUT(name)                        \
static void *                                                \
name##_wrapper(int a, const struct sockaddr *b, void *c)\
{                                                        \
        void *rv;                                        \
        KERNEL_LOCK(1, NULL);                                \
        if (ipsec_enabled)                                \
                rv = name(a, b, c);                        \
        else                                                \
                rv = NULL;                                \
        KERNEL_UNLOCK_ONE(NULL);                        \
        return rv;                                        \
}
IPSEC_WRAP_CTLINPUT(ah4_ctlinput)
IPSEC_WRAP_CTLINPUT(esp4_ctlinput)

#else /* !IPSEC_RUMPKERNEL */

PR_WRAP_CTLINPUT(ah4_ctlinput)
PR_WRAP_CTLINPUT(esp4_ctlinput)

#endif /* !IPSEC_RUMPKERNEL */

#define        ah4_ctlinput        ah4_ctlinput_wrapper
#define        esp4_ctlinput        esp4_ctlinput_wrapper

#endif /* IPSEC */

const struct protosw inetsw[] = {
{        .pr_domain = &inetdomain,
        .pr_init = ip_init,
        .pr_fasttimo = ip_fasttimo,
        .pr_slowtimo = ip_slowtimo,
        .pr_drain = ip_drainstub,
},
{        .pr_type = SOCK_RAW,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_ICMP,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
        .pr_input = icmp_input,
        .pr_ctlinput = rip_ctlinput,
        .pr_ctloutput = rip_ctloutput,
        .pr_usrreqs = &rip_usrreqs,
        .pr_init = icmp_init,
},
{        .pr_type = SOCK_DGRAM,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_UDP,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_PURGEIF,
        .pr_input = udp_input,
        .pr_ctlinput = udp_ctlinput,
        .pr_ctloutput = udp_ctloutput,
        .pr_usrreqs = &udp_usrreqs,
        .pr_init = udp_init,
},
{        .pr_type = SOCK_STREAM,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_TCP,
        .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS|PR_PURGEIF,
        .pr_input = tcp_input,
        .pr_ctlinput = tcp_ctlinput,
        .pr_ctloutput = tcp_ctloutput,
        .pr_usrreqs = &tcp_usrreqs,
        .pr_init = tcp_init,
        .pr_fasttimo = tcp_fasttimo,
        .pr_drain = tcp_drainstub,
},
#ifdef DCCP
{        .pr_type = SOCK_CONN_DGRAM,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_DCCP,
        .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_ATOMIC|PR_LISTEN|PR_ABRTACPTDIS,
        .pr_input = dccp_input,
        .pr_ctlinput = dccp_ctlinput,
        .pr_ctloutput = dccp_ctloutput,
        .pr_usrreqs = &dccp_usrreqs,
        .pr_init = dccp_init,
},
#endif
#ifdef SCTP
{        .pr_type = SOCK_DGRAM,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_SCTP,
        .pr_flags = PR_ADDR_OPT|PR_WANTRCVD,
        .pr_input = sctp_input,
        .pr_ctlinput = sctp_ctlinput,
        .pr_ctloutput = sctp_ctloutput,
        .pr_usrreqs = &sctp_usrreqs,
        .pr_init = sctp_init,
        .pr_drain = sctp_drain
},
{        .pr_type = SOCK_SEQPACKET,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_SCTP,
        .pr_flags = PR_ADDR_OPT|PR_WANTRCVD,
        .pr_input = sctp_input,
        .pr_ctlinput = sctp_ctlinput,
        .pr_ctloutput = sctp_ctloutput,
        .pr_usrreqs = &sctp_usrreqs,
        .pr_drain = sctp_drain
},
{        .pr_type = SOCK_STREAM,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_SCTP,
        .pr_flags = PR_CONNREQUIRED|PR_ADDR_OPT|PR_WANTRCVD|PR_LISTEN,
        .pr_input = sctp_input,
        .pr_ctlinput = sctp_ctlinput,
        .pr_ctloutput = sctp_ctloutput,
        .pr_usrreqs = &sctp_usrreqs,
        .pr_drain = sctp_drain
},
#endif /* SCTP */
{        .pr_type = SOCK_RAW,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_RAW,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_PURGEIF,
        .pr_input = rip_input,
        .pr_ctlinput = rip_ctlinput,
        .pr_ctloutput = rip_ctloutput,
        .pr_usrreqs = &rip_usrreqs,
},
#ifdef GATEWAY
{        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_IP,
        .pr_slowtimo = ipflow_slowtimo,
        .pr_init = ipflow_poolinit,
},
#endif /* GATEWAY */
#ifdef IPSEC
{        .pr_type = SOCK_RAW,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_AH,
        .pr_flags = PR_ATOMIC|PR_ADDR,
        .pr_input = ipsec4_common_input,
        .pr_ctlinput = ah4_ctlinput,
},
{        .pr_type = SOCK_RAW,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_ESP,
        .pr_flags = PR_ATOMIC|PR_ADDR,
        .pr_input = ipsec4_common_input,
        .pr_ctlinput = esp4_ctlinput,
},
{        .pr_type = SOCK_RAW,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_IPCOMP,
        .pr_flags = PR_ATOMIC|PR_ADDR,
        .pr_input = ipsec4_common_input,
},
#endif /* IPSEC */
{        .pr_type = SOCK_RAW,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_IPV4,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
        .pr_input = encap4_input,
        .pr_ctlinput = rip_ctlinput,
        .pr_ctloutput = rip_ctloutput,
        .pr_usrreqs = &rip_usrreqs,
        .pr_init = encap_init,
},
#ifdef INET6
{        .pr_type = SOCK_RAW,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_IPV6,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
        .pr_input = encap4_input,
        .pr_ctlinput = rip_ctlinput,
        .pr_ctloutput = rip_ctloutput,
        .pr_usrreqs = &rip_usrreqs,
        .pr_init = encap_init,
},
#endif /* INET6 */
#if NCARP > 0
{        .pr_type = SOCK_RAW,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_CARP,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
        .pr_input = carp_proto_input,
        .pr_ctloutput = rip_ctloutput,
        .pr_usrreqs = &rip_usrreqs,
        .pr_init = carp_init,
},
#endif /* NCARP > 0 */
{        .pr_type = SOCK_RAW,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_L2TP,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
        .pr_input = encap4_input,
        .pr_ctlinput = rip_ctlinput,
        .pr_ctloutput = rip_ctloutput,
        .pr_usrreqs = &rip_usrreqs,        /*XXX*/
        .pr_init = encap_init,
},
#if NPFSYNC > 0
{        .pr_type = SOCK_RAW,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_PFSYNC,
        .pr_flags         = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
        .pr_input         = pfsync_input,
        .pr_ctloutput = rip_ctloutput,
        .pr_usrreqs         = &rip_usrreqs,
},
#endif /* NPFSYNC > 0 */
{        .pr_type = SOCK_RAW,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_IGMP,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
        .pr_input = igmp_input, 
        .pr_ctloutput = rip_ctloutput,
        .pr_ctlinput = rip_ctlinput,
        .pr_usrreqs = &rip_usrreqs,
        .pr_fasttimo = igmp_fasttimo,
        .pr_slowtimo = igmp_slowtimo,
        .pr_init = igmp_init,
},
#ifdef PIM
{        .pr_type = SOCK_RAW,
        .pr_domain = &inetdomain,
        .pr_protocol = IPPROTO_PIM,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
        .pr_input = pim_input, 
        .pr_ctloutput = rip_ctloutput,
        .pr_ctlinput = rip_ctlinput,
        .pr_usrreqs = &rip_usrreqs,
},
#endif /* PIM */
/* raw wildcard */
{        .pr_type = SOCK_RAW,
        .pr_domain = &inetdomain,
        .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
        .pr_input = rip_input, 
        .pr_ctloutput = rip_ctloutput,
        .pr_ctlinput = rip_ctlinput,
        .pr_usrreqs = &rip_usrreqs,
        .pr_init = rip_init,
},
};

const struct sockaddr_in in_any = {
          .sin_len = sizeof(struct sockaddr_in)
        , .sin_family = AF_INET
        , .sin_port = 0
        , .sin_addr = {.s_addr = 0 /* INADDR_ANY */}
};

struct domain inetdomain = {
        .dom_family = PF_INET, .dom_name = "internet", .dom_init = NULL,
        .dom_externalize = NULL, .dom_dispose = NULL,
        .dom_protosw = inetsw,
        .dom_protoswNPROTOSW = &inetsw[__arraycount(inetsw)],
        .dom_rtattach = rt_inithead,
        .dom_rtoffset = 32,
        .dom_maxrtkey = sizeof(struct ip_pack4),
        .dom_if_up = in_if_up,
        .dom_if_down = in_if_down,
        .dom_ifattach = in_domifattach,
        .dom_ifdetach = in_domifdetach,
        .dom_if_link_state_change = in_if_link_state_change,
        .dom_ifqueues = { NULL, NULL },
        .dom_link = { NULL },
        .dom_mowner = MOWNER_INIT("",""),
        .dom_sa_cmpofs = offsetof(struct sockaddr_in, sin_addr),
        .dom_sa_cmplen = sizeof(struct in_addr),
        .dom_sa_any = (const struct sockaddr *)&in_any,
        .dom_sockaddr_const_addr = sockaddr_in_const_addr,
        .dom_sockaddr_addr = sockaddr_in_addr,
};

u_char        ip_protox[IPPROTO_MAX];

static void
sockaddr_in_addrlen(const struct sockaddr *sa, socklen_t *slenp)
{
        socklen_t slen;

        if (slenp == NULL)
                return;

        slen = sockaddr_getlen(sa);
        *slenp = (socklen_t)MIN(sizeof(struct in_addr),
            slen - MIN(slen, offsetof(struct sockaddr_in, sin_addr)));
}

const void *
sockaddr_in_const_addr(const struct sockaddr *sa, socklen_t *slenp)
{
        const struct sockaddr_in *sin;

        sockaddr_in_addrlen(sa, slenp);
        sin = (const struct sockaddr_in *)sa;
        return &sin->sin_addr;
}

void *
sockaddr_in_addr(struct sockaddr *sa, socklen_t *slenp)
{
        struct sockaddr_in *sin;

        sockaddr_in_addrlen(sa, slenp);
        sin = (struct sockaddr_in *)sa;
        return &sin->sin_addr;
}

int
sockaddr_in_cmp(const struct sockaddr *sa1, const struct sockaddr *sa2)
{
        uint_fast8_t len;
        const uint_fast8_t addrofs = offsetof(struct sockaddr_in, sin_addr),
                           addrend = addrofs + sizeof(struct in_addr);
        int rc;
        const struct sockaddr_in *sin1, *sin2;

        sin1 = satocsin(sa1);
        sin2 = satocsin(sa2);

        len = MIN(addrend, MIN(sin1->sin_len, sin2->sin_len));

        if (len > addrofs &&
             (rc = memcmp(&sin1->sin_addr, &sin2->sin_addr,
                          len - addrofs)) != 0)
                return rc;

        return sin1->sin_len - sin2->sin_len;
}









































































    7 
    7 



    7 
    7 
    7 


    7 






















 1120 







   68 
 1074 
 1119 
 1074 








 1120 







 1117 
 1120 



 1118 








 1120 
 1119 










 1120 









   66 







   66 





 1077 

 1120 
 1120 
 1119 





 1120 
   16 
   16 









 1113 
 1063 



 1063 


 1062 



 1063 







 1113 



 1062 
 1063 

 1062 
 1063 



 1063 




 1060 







 1059 














 1059 

 1057 
 1057 
    3 
    3 



    3 








   78 







   78 





   77 













    9 







   71 





   78 
   78 













   78 













   78 

   69 


   70 


   78 









   78 








    6 






   78 
   29 
   29 



   29 
   29 






    1 
   29 











   29 







   74 
   74 

   63 




   62 
   63 
   60 


   62 









   65 
   64 
    7 


























   64 


    2 
   11 

   11 

   63 















   76 


   76 
   76 





   75 







   63 

   63 

   76 
   76 


   11 



   11 



   11 





   73 
   61 




   58 


   74 

   73 



   75 

    8 
 1111 

   75 

 1115 


















   63 


















   64 


   64 




   64 
























   51 


   51 
   51 



   50 









   64 

   20 
   30 









   30 


   12 
   12 


   12 







   12 









   20 















   20 





   20 
   20 
   20 

   19 









   20 

   18 



   18 






   18 

   18 

   18 








    2 




    1 


    2 













   64 




   30 
    2 



   64 









   63 





















   63 









































  139 







  139 



























  140 
  140 
  139 





  140 
  140 





  140 










   94 
  140 

   68 
   89 
    5 


   89 





   88 
  131 






   61 
   52 













   52 






   52 









   52 





   60 

   49 








   60 

    7 
    7 








   60 
   36 











   60 




   58 
   58 

   58 
   58 
   58 



   58 







   58 
   14 















   58 


   14 








   14 










   14 

   14 














   58 











   46 






   44 






   44 










   44 












   44 












   44 
   44 
   44 

   44 
   34 


   44 











   18 


   10 


    8 







   44 
























   44 



   37 








   57 
   43 
   57 
   57 

   57 






   43 

   57 

   57 
   18 


   53 

















   29 
   29 
   29 
   29 

   29 





   57 

   43 
   43 
   43 

   43 




   43 












   53 












   41 
    1 

    1 







   53 
   12 
    9 



   53 
   41 

   12 

   53 












   52 
   45 

   45 













   43 















   43 






















































   43 











   43 
   43 

   43 







   43 



   43 




   43 



   43 







   37 


   14 




   43 
   43 




   43 

   42 
   43 

























   43 























   43 




   12 


   43 












   42 



   41 





   13 




   42 

























































































































































   13 











   13 











   13 



   11 










   11 
   11 

    1 



   11 








    7 










    3 













   11 
   13 




























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
/*        $NetBSD: genfs_io.c,v 1.102 2022/01/14 21:59:50 riastradh Exp $        */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.102 2022/01/14 21:59:50 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/kmem.h>
#include <sys/kauth.h>
#include <sys/fstrans.h>
#include <sys/buf.h>
#include <sys/atomic.h>

#include <miscfs/genfs/genfs.h>
#include <miscfs/genfs/genfs_node.h>
#include <miscfs/specfs/specdev.h>

#include <uvm/uvm.h>
#include <uvm/uvm_pager.h>
#include <uvm/uvm_page_array.h>

static int genfs_do_directio(struct vmspace *, vaddr_t, size_t, struct vnode *,
    off_t, enum uio_rw);
static void genfs_dio_iodone(struct buf *);

static int genfs_getpages_read(struct vnode *, struct vm_page **, int, off_t,
    off_t, bool, bool, bool, bool);
static int genfs_do_io(struct vnode *, off_t, vaddr_t, size_t, int, enum uio_rw,
    void (*)(struct buf *));
static void genfs_rel_pages(struct vm_page **, unsigned int);

int genfs_maxdio = MAXPHYS;

static void
genfs_rel_pages(struct vm_page **pgs, unsigned int npages)
{
        unsigned int i;

        for (i = 0; i < npages; i++) {
                struct vm_page *pg = pgs[i];

                if (pg == NULL || pg == PGO_DONTCARE)
                        continue;
                KASSERT(uvm_page_owner_locked_p(pg, true));
                if (pg->flags & PG_FAKE) {
                        pg->flags |= PG_RELEASED;
                }
        }
        uvm_page_unbusy(pgs, npages);
}

/*
 * generic VM getpages routine.
 * Return PG_BUSY pages for the given range,
 * reading from backing store if necessary.
 */

int
genfs_getpages(void *v)
{
        struct vop_getpages_args /* {
                struct vnode *a_vp;
                voff_t a_offset;
                struct vm_page **a_m;
                int *a_count;
                int a_centeridx;
                vm_prot_t a_access_type;
                int a_advice;
                int a_flags;
        } */ * const ap = v;

        off_t diskeof, memeof;
        int i, error, npages, iflag;
        const int flags = ap->a_flags;
        struct vnode * const vp = ap->a_vp;
        struct uvm_object * const uobj = &vp->v_uobj;
        const bool async = (flags & PGO_SYNCIO) == 0;
        const bool memwrite = (ap->a_access_type & VM_PROT_WRITE) != 0;
        const bool overwrite = (flags & PGO_OVERWRITE) != 0;
        const bool blockalloc = memwrite && (flags & PGO_NOBLOCKALLOC) == 0;
        const bool need_wapbl = (vp->v_mount->mnt_wapbl &&
                        (flags & PGO_JOURNALLOCKED) == 0);
        const bool glocked = (flags & PGO_GLOCKHELD) != 0;
        bool holds_wapbl = false;
        struct mount *trans_mount = NULL;
        UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);

        UVMHIST_LOG(ubchist, "vp %#jx off 0x%jx/%jx count %jd",
            (uintptr_t)vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count);

        KASSERT(memwrite >= overwrite);
        KASSERT(vp->v_type == VREG || vp->v_type == VDIR ||
            vp->v_type == VLNK || vp->v_type == VBLK);

        /*
         * the object must be locked.  it can only be a read lock when
         * processing a read fault with PGO_LOCKED.
         */

        KASSERT(rw_lock_held(uobj->vmobjlock));
        KASSERT(rw_write_held(uobj->vmobjlock) ||
           ((flags & PGO_LOCKED) != 0 && !memwrite));

#ifdef DIAGNOSTIC
        if ((flags & PGO_JOURNALLOCKED) && vp->v_mount->mnt_wapbl)
                WAPBL_JLOCK_ASSERT(vp->v_mount);
#endif

        /*
         * check for reclaimed vnode.  v_interlock is not held here, but
         * VI_DEADCHECK is set with vmobjlock held.
         */

        iflag = atomic_load_relaxed(&vp->v_iflag);
        if (__predict_false((iflag & VI_DEADCHECK) != 0)) {
                mutex_enter(vp->v_interlock);
                error = vdead_check(vp, VDEAD_NOWAIT);
                mutex_exit(vp->v_interlock);
                if (error) {
                        if ((flags & PGO_LOCKED) == 0)
                                rw_exit(uobj->vmobjlock);
                        return error;
                }
        }

startover:
        error = 0;
        const voff_t origvsize = vp->v_size;
        const off_t origoffset = ap->a_offset;
        const int orignpages = *ap->a_count;

        GOP_SIZE(vp, origvsize, &diskeof, 0);
        if (flags & PGO_PASTEOF) {
                off_t newsize;
#if defined(DIAGNOSTIC)
                off_t writeeof;
#endif /* defined(DIAGNOSTIC) */

                newsize = MAX(origvsize,
                    origoffset + (orignpages << PAGE_SHIFT));
                GOP_SIZE(vp, newsize, &memeof, GOP_SIZE_MEM);
#if defined(DIAGNOSTIC)
                GOP_SIZE(vp, vp->v_writesize, &writeeof, GOP_SIZE_MEM);
                if (newsize > round_page(writeeof)) {
                        panic("%s: past eof: %" PRId64 " vs. %" PRId64,
                            __func__, newsize, round_page(writeeof));
                }
#endif /* defined(DIAGNOSTIC) */
        } else {
                GOP_SIZE(vp, origvsize, &memeof, GOP_SIZE_MEM);
        }
        KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages);
        KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0);
        KASSERT(orignpages > 0);

        /*
         * Bounds-check the request.
         */

        if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) {
                if ((flags & PGO_LOCKED) == 0) {
                        rw_exit(uobj->vmobjlock);
                }
                UVMHIST_LOG(ubchist, "off 0x%jx count %jd goes past EOF 0x%jx",
                    origoffset, *ap->a_count, memeof,0);
                error = EINVAL;
                goto out_err;
        }

        /* uobj is locked */

        if ((flags & PGO_NOTIMESTAMP) == 0 &&
            (vp->v_type != VBLK ||
            (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) {
                int updflags = 0;

                if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) {
                        updflags = GOP_UPDATE_ACCESSED;
                }
                if (memwrite) {
                        updflags |= GOP_UPDATE_MODIFIED;
                }
                if (updflags != 0) {
                        GOP_MARKUPDATE(vp, updflags);
                }
        }

        /*
         * For PGO_LOCKED requests, just return whatever's in memory.
         */

        if (flags & PGO_LOCKED) {
                int nfound;
                struct vm_page *pg;

                KASSERT(!glocked);
                npages = *ap->a_count;
#if defined(DEBUG)
                for (i = 0; i < npages; i++) {
                        pg = ap->a_m[i];
                        KASSERT(pg == NULL || pg == PGO_DONTCARE);
                }
#endif /* defined(DEBUG) */
                 nfound = uvn_findpages(uobj, origoffset, &npages,
                    ap->a_m, NULL,
                    UFP_NOWAIT | UFP_NOALLOC | UFP_NOBUSY |
                    (memwrite ? UFP_NORDONLY : 0));
                KASSERT(npages == *ap->a_count);
                if (nfound == 0) {
                        error = EBUSY;
                        goto out_err;
                }
                /*
                 * lock and unlock g_glock to ensure that no one is truncating
                 * the file behind us.
                 */
                if (!genfs_node_rdtrylock(vp)) {
                        /*
                         * restore the array.
                         */

                        for (i = 0; i < npages; i++) {
                                pg = ap->a_m[i];

                                if (pg != NULL && pg != PGO_DONTCARE) {
                                        ap->a_m[i] = NULL;
                                }
                                KASSERT(ap->a_m[i] == NULL ||
                                    ap->a_m[i] == PGO_DONTCARE);
                        }
                } else {
                        genfs_node_unlock(vp);
                }
                error = (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0);
                if (error == 0 && memwrite) {
                        for (i = 0; i < npages; i++) {
                                pg = ap->a_m[i];
                                if (pg == NULL || pg == PGO_DONTCARE) {
                                        continue;
                                }
                                if (uvm_pagegetdirty(pg) ==
                                    UVM_PAGE_STATUS_CLEAN) {
                                        uvm_pagemarkdirty(pg,
                                            UVM_PAGE_STATUS_UNKNOWN);
                                }
                        }
                }
                goto out_err;
        }
        rw_exit(uobj->vmobjlock);

        /*
         * find the requested pages and make some simple checks.
         * leave space in the page array for a whole block.
         */

        const int fs_bshift = (vp->v_type != VBLK) ?
            vp->v_mount->mnt_fs_bshift : DEV_BSHIFT;
        const int fs_bsize = 1 << fs_bshift;
#define        blk_mask        (fs_bsize - 1)
#define        trunc_blk(x)        ((x) & ~blk_mask)
#define        round_blk(x)        (((x) + blk_mask) & ~blk_mask)

        const int orignmempages = MIN(orignpages,
            round_page(memeof - origoffset) >> PAGE_SHIFT);
        npages = orignmempages;
        const off_t startoffset = trunc_blk(origoffset);
        const off_t endoffset = MIN(
            round_page(round_blk(origoffset + (npages << PAGE_SHIFT))),
            round_page(memeof));
        const int ridx = (origoffset - startoffset) >> PAGE_SHIFT;

        const int pgs_size = sizeof(struct vm_page *) *
            ((endoffset - startoffset) >> PAGE_SHIFT);
        struct vm_page **pgs, *pgs_onstack[UBC_MAX_PAGES];

        if (pgs_size > sizeof(pgs_onstack)) {
                pgs = kmem_zalloc(pgs_size, async ? KM_NOSLEEP : KM_SLEEP);
                if (pgs == NULL) {
                        pgs = pgs_onstack;
                        error = ENOMEM;
                        goto out_err;
                }
        } else {
                pgs = pgs_onstack;
                (void)memset(pgs, 0, pgs_size);
        }

        UVMHIST_LOG(ubchist, "ridx %jd npages %jd startoff %#jx endoff %#jx",
            ridx, npages, startoffset, endoffset);

        if (trans_mount == NULL) {
                trans_mount = vp->v_mount;
                fstrans_start(trans_mount);
                /*
                 * check if this vnode is still valid.
                 */
                mutex_enter(vp->v_interlock);
                error = vdead_check(vp, 0);
                mutex_exit(vp->v_interlock);
                if (error)
                        goto out_err_free;
                /*
                 * XXX: This assumes that we come here only via
                 * the mmio path
                 */
                if (blockalloc && need_wapbl) {
                        error = WAPBL_BEGIN(trans_mount);
                        if (error)
                                goto out_err_free;
                        holds_wapbl = true;
                }
        }

        /*
         * hold g_glock to prevent a race with truncate.
         *
         * check if our idea of v_size is still valid.
         */

        KASSERT(!glocked || genfs_node_wrlocked(vp));
        if (!glocked) {
                if (blockalloc) {
                        genfs_node_wrlock(vp);
                } else {
                        genfs_node_rdlock(vp);
                }
        }
        rw_enter(uobj->vmobjlock, RW_WRITER);
        if (vp->v_size < origvsize) {
                if (!glocked) {
                        genfs_node_unlock(vp);
                }
                if (pgs != pgs_onstack)
                        kmem_free(pgs, pgs_size);
                goto startover;
        }

        if (uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], NULL,
            async ? UFP_NOWAIT : UFP_ALL) != orignmempages) {
                if (!glocked) {
                        genfs_node_unlock(vp);
                }
                KASSERT(async != 0);
                genfs_rel_pages(&pgs[ridx], orignmempages);
                rw_exit(uobj->vmobjlock);
                error = EBUSY;
                goto out_err_free;
        }

        /*
         * if PGO_OVERWRITE is set, don't bother reading the pages.
         */

        if (overwrite) {
                if (!glocked) {
                        genfs_node_unlock(vp);
                }
                UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);

                for (i = 0; i < npages; i++) {
                        struct vm_page *pg = pgs[ridx + i];

                        /*
                         * it's caller's responsibility to allocate blocks
                         * beforehand for the overwrite case.
                         */

                        KASSERT((pg->flags & PG_RDONLY) == 0 || !blockalloc);
                        pg->flags &= ~PG_RDONLY;

                        /*
                         * mark the page DIRTY.
                         * otherwise another thread can do putpages and pull
                         * our vnode from syncer's queue before our caller does
                         * ubc_release.  note that putpages won't see CLEAN
                         * pages even if they are BUSY.
                         */

                        uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
                }
                npages += ridx;
                goto out;
        }

        /*
         * if the pages are already resident, just return them.
         */

        for (i = 0; i < npages; i++) {
                struct vm_page *pg = pgs[ridx + i];

                if ((pg->flags & PG_FAKE) ||
                    (blockalloc && (pg->flags & PG_RDONLY) != 0)) {
                        break;
                }
        }
        if (i == npages) {
                if (!glocked) {
                        genfs_node_unlock(vp);
                }
                UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
                npages += ridx;
                goto out;
        }

        /*
         * the page wasn't resident and we're not overwriting,
         * so we're going to have to do some i/o.
         * find any additional pages needed to cover the expanded range.
         */

        npages = (endoffset - startoffset) >> PAGE_SHIFT;
        if (startoffset != origoffset || npages != orignmempages) {
                int npgs;

                /*
                 * we need to avoid deadlocks caused by locking
                 * additional pages at lower offsets than pages we
                 * already have locked.  unlock them all and start over.
                 */

                genfs_rel_pages(&pgs[ridx], orignmempages);
                memset(pgs, 0, pgs_size);

                UVMHIST_LOG(ubchist, "reset npages start 0x%jx end 0x%jx",
                    startoffset, endoffset, 0,0);
                npgs = npages;
                if (uvn_findpages(uobj, startoffset, &npgs, pgs, NULL,
                    async ? UFP_NOWAIT : UFP_ALL) != npages) {
                        if (!glocked) {
                                genfs_node_unlock(vp);
                        }
                        KASSERT(async != 0);
                        genfs_rel_pages(pgs, npages);
                        rw_exit(uobj->vmobjlock);
                        error = EBUSY;
                        goto out_err_free;
                }
        }

        rw_exit(uobj->vmobjlock);
        error = genfs_getpages_read(vp, pgs, npages, startoffset, diskeof,
            async, memwrite, blockalloc, glocked);
        if (!glocked) {
                genfs_node_unlock(vp);
        }
        if (error == 0 && async)
                goto out_err_free;
        rw_enter(uobj->vmobjlock, RW_WRITER);

        /*
         * we're almost done!  release the pages...
         * for errors, we free the pages.
         * otherwise we activate them and mark them as valid and clean.
         * also, unbusy pages that were not actually requested.
         */

        if (error) {
                genfs_rel_pages(pgs, npages);
                rw_exit(uobj->vmobjlock);
                UVMHIST_LOG(ubchist, "returning error %jd", error,0,0,0);
                goto out_err_free;
        }

out:
        UVMHIST_LOG(ubchist, "succeeding, npages %jd", npages,0,0,0);
        error = 0;
        for (i = 0; i < npages; i++) {
                struct vm_page *pg = pgs[i];
                if (pg == NULL) {
                        continue;
                }
                UVMHIST_LOG(ubchist, "examining pg %#jx flags 0x%jx",
                    (uintptr_t)pg, pg->flags, 0,0);
                if (pg->flags & PG_FAKE && !overwrite) {
                        /*
                         * we've read page's contents from the backing storage.
                         *
                         * for a read fault, we keep them CLEAN;  if we
                         * encountered a hole while reading, the pages can
                         * already been dirtied with zeros.
                         */
                        KASSERTMSG(blockalloc || uvm_pagegetdirty(pg) ==
                            UVM_PAGE_STATUS_CLEAN, "page %p not clean", pg);
                        pg->flags &= ~PG_FAKE;
                }
                KASSERT(!memwrite || !blockalloc || (pg->flags & PG_RDONLY) == 0);
                if (i < ridx || i >= ridx + orignmempages || async) {
                        UVMHIST_LOG(ubchist, "unbusy pg %#jx offset 0x%jx",
                            (uintptr_t)pg, pg->offset,0,0);
                        if (pg->flags & PG_FAKE) {
                                KASSERT(overwrite);
                                uvm_pagezero(pg);
                        }
                        if (pg->flags & PG_RELEASED) {
                                uvm_pagefree(pg);
                                continue;
                        }
                        uvm_pagelock(pg);
                        uvm_pageenqueue(pg);
                        uvm_pagewakeup(pg);
                        uvm_pageunlock(pg);
                        pg->flags &= ~(PG_BUSY|PG_FAKE);
                        UVM_PAGE_OWN(pg, NULL);
                } else if (memwrite && !overwrite &&
                    uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN) {
                        /*
                         * for a write fault, start dirtiness tracking of
                         * requested pages.
                         */
                        uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_UNKNOWN);
                }
        }
        rw_exit(uobj->vmobjlock);
        if (ap->a_m != NULL) {
                memcpy(ap->a_m, &pgs[ridx],
                    orignmempages * sizeof(struct vm_page *));
        }

out_err_free:
        if (pgs != NULL && pgs != pgs_onstack)
                kmem_free(pgs, pgs_size);
out_err:
        if (trans_mount != NULL) {
                if (holds_wapbl)
                        WAPBL_END(trans_mount);
                fstrans_done(trans_mount);
        }
        return error;
}

/*
 * genfs_getpages_read: Read the pages in with VOP_BMAP/VOP_STRATEGY.
 *
 * "glocked" (which is currently not actually used) tells us not whether
 * the genfs_node is locked on entry (it always is) but whether it was
 * locked on entry to genfs_getpages.
 */
static int
genfs_getpages_read(struct vnode *vp, struct vm_page **pgs, int npages,
    off_t startoffset, off_t diskeof,
    bool async, bool memwrite, bool blockalloc, bool glocked)
{
        struct uvm_object * const uobj = &vp->v_uobj;
        const int fs_bshift = (vp->v_type != VBLK) ?
            vp->v_mount->mnt_fs_bshift : DEV_BSHIFT;
        const int dev_bshift = (vp->v_type != VBLK) ?
            vp->v_mount->mnt_dev_bshift : DEV_BSHIFT;
        kauth_cred_t const cred = curlwp->l_cred;                /* XXXUBC curlwp */
        size_t bytes, iobytes, tailstart, tailbytes, totalbytes, skipbytes;
        vaddr_t kva;
        struct buf *bp, *mbp;
        bool sawhole = false;
        int i;
        int error = 0;

        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);

        /*
         * read the desired page(s).
         */

        totalbytes = npages << PAGE_SHIFT;
        bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0));
        tailbytes = totalbytes - bytes;
        skipbytes = 0;

        kva = uvm_pagermapin(pgs, npages,
            UVMPAGER_MAPIN_READ | (async ? 0 : UVMPAGER_MAPIN_WAITOK));
        if (kva == 0)
                return EBUSY;

        mbp = getiobuf(vp, true);
        mbp->b_bufsize = totalbytes;
        mbp->b_data = (void *)kva;
        mbp->b_resid = mbp->b_bcount = bytes;
        mbp->b_cflags |= BC_BUSY;
        if (async) {
                mbp->b_flags = B_READ | B_ASYNC;
                mbp->b_iodone = uvm_aio_aiodone;
        } else {
                mbp->b_flags = B_READ;
                mbp->b_iodone = NULL;
        }
        if (async)
                BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
        else
                BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);

        /*
         * if EOF is in the middle of the range, zero the part past EOF.
         * skip over pages which are not PG_FAKE since in that case they have
         * valid data that we need to preserve.
         */

        tailstart = bytes;
        while (tailbytes > 0) {
                const int len = PAGE_SIZE - (tailstart & PAGE_MASK);

                KASSERT(len <= tailbytes);
                if ((pgs[tailstart >> PAGE_SHIFT]->flags & PG_FAKE) != 0) {
                        memset((void *)(kva + tailstart), 0, len);
                        UVMHIST_LOG(ubchist, "tailbytes %#jx 0x%jx 0x%jx",
                            (uintptr_t)kva, tailstart, len, 0);
                }
                tailstart += len;
                tailbytes -= len;
        }

        /*
         * now loop over the pages, reading as needed.
         */

        bp = NULL;
        off_t offset;
        for (offset = startoffset;
            bytes > 0;
            offset += iobytes, bytes -= iobytes) {
                int run;
                daddr_t lbn, blkno;
                int pidx;
                struct vnode *devvp;

                /*
                 * skip pages which don't need to be read.
                 */

                pidx = (offset - startoffset) >> PAGE_SHIFT;
                while ((pgs[pidx]->flags & PG_FAKE) == 0) {
                        size_t b;

                        KASSERT((offset & (PAGE_SIZE - 1)) == 0);
                        if ((pgs[pidx]->flags & PG_RDONLY)) {
                                sawhole = true;
                        }
                        b = MIN(PAGE_SIZE, bytes);
                        offset += b;
                        bytes -= b;
                        skipbytes += b;
                        pidx++;
                        UVMHIST_LOG(ubchist, "skipping, new offset 0x%jx",
                            offset, 0,0,0);
                        if (bytes == 0) {
                                goto loopdone;
                        }
                }

                /*
                 * bmap the file to find out the blkno to read from and
                 * how much we can read in one i/o.  if bmap returns an error,
                 * skip the rest of the top-level i/o.
                 */

                lbn = offset >> fs_bshift;
                error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
                if (error) {
                        UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%jx -> %jd",
                            lbn,error,0,0);
                        skipbytes += bytes;
                        bytes = 0;
                        goto loopdone;
                }

                /*
                 * see how many pages can be read with this i/o.
                 * reduce the i/o size if necessary to avoid
                 * overwriting pages with valid data.
                 */

                iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
                    bytes);
                if (offset + iobytes > round_page(offset)) {
                        int pcount;

                        pcount = 1;
                        while (pidx + pcount < npages &&
                            pgs[pidx + pcount]->flags & PG_FAKE) {
                                pcount++;
                        }
                        iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
                            (offset - trunc_page(offset)));
                }

                /*
                 * if this block isn't allocated, zero it instead of
                 * reading it.  unless we are going to allocate blocks,
                 * mark the pages we zeroed PG_RDONLY.
                 */

                if (blkno == (daddr_t)-1) {
                        int holepages = (round_page(offset + iobytes) -
                            trunc_page(offset)) >> PAGE_SHIFT;
                        UVMHIST_LOG(ubchist, "lbn 0x%jx -> HOLE", lbn,0,0,0);

                        sawhole = true;
                        memset((char *)kva + (offset - startoffset), 0,
                            iobytes);
                        skipbytes += iobytes;

                        if (!blockalloc) {
                                rw_enter(uobj->vmobjlock, RW_WRITER);
                                for (i = 0; i < holepages; i++) {
                                        pgs[pidx + i]->flags |= PG_RDONLY;
                                }
                                rw_exit(uobj->vmobjlock);
                        }
                        continue;
                }

                /*
                 * allocate a sub-buf for this piece of the i/o
                 * (or just use mbp if there's only 1 piece),
                 * and start it going.
                 */

                if (offset == startoffset && iobytes == bytes) {
                        bp = mbp;
                } else {
                        UVMHIST_LOG(ubchist, "vp %#jx bp %#jx num now %jd",
                            (uintptr_t)vp, (uintptr_t)bp, vp->v_numoutput, 0);
                        bp = getiobuf(vp, true);
                        nestiobuf_setup(mbp, bp, offset - startoffset, iobytes);
                }
                bp->b_lblkno = 0;

                /* adjust physical blkno for partial blocks */
                bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
                    dev_bshift);

                UVMHIST_LOG(ubchist,
                    "bp %#jx offset 0x%x bcount 0x%x blkno 0x%x",
                    (uintptr_t)bp, offset, bp->b_bcount, bp->b_blkno);

                VOP_STRATEGY(devvp, bp);
        }

loopdone:
        nestiobuf_done(mbp, skipbytes, error);
        if (async) {
                UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0);
                return 0;
        }
        if (bp != NULL) {
                error = biowait(mbp);
        }

        /* Remove the mapping (make KVA available as soon as possible) */
        uvm_pagermapout(kva, npages);

        /*
         * if this we encountered a hole then we have to do a little more work.
         * for read faults, we marked the page PG_RDONLY so that future
         * write accesses to the page will fault again.
         * for write faults, we must make sure that the backing store for
         * the page is completely allocated while the pages are locked.
         */

        if (!error && sawhole && blockalloc) {
                error = GOP_ALLOC(vp, startoffset,
                    npages << PAGE_SHIFT, 0, cred);
                UVMHIST_LOG(ubchist, "gop_alloc off 0x%jx/0x%jx -> %jd",
                    startoffset, npages << PAGE_SHIFT, error,0);
                if (!error) {
                        rw_enter(uobj->vmobjlock, RW_WRITER);
                        for (i = 0; i < npages; i++) {
                                struct vm_page *pg = pgs[i];

                                if (pg == NULL) {
                                        continue;
                                }
                                pg->flags &= ~PG_RDONLY;
                                uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
                                UVMHIST_LOG(ubchist, "mark dirty pg %#jx",
                                    (uintptr_t)pg, 0, 0, 0);
                        }
                        rw_exit(uobj->vmobjlock);
                }
        }

        putiobuf(mbp);
        return error;
}

/*
 * generic VM putpages routine.
 * Write the given range of pages to backing store.
 *
 * => "offhi == 0" means flush all pages at or after "offlo".
 * => object should be locked by caller.  we return with the
 *      object unlocked.
 * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O).
 *        thus, a caller might want to unlock higher level resources
 *        (e.g. vm_map) before calling flush.
 * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, we will not block
 * => if PGO_ALLPAGES is set, then all pages in the object will be processed.
 *
 * note on "cleaning" object and PG_BUSY pages:
 *        this routine is holding the lock on the object.   the only time
 *        that it can run into a PG_BUSY page that it does not own is if
 *        some other process has started I/O on the page (e.g. either
 *        a pagein, or a pageout).  if the PG_BUSY page is being paged
 *        in, then it can not be dirty (!UVM_PAGE_STATUS_CLEAN) because no
 *        one has        had a chance to modify it yet.  if the PG_BUSY page is
 *        being paged out then it means that someone else has already started
 *        cleaning the page for us (how nice!).  in this case, if we
 *        have syncio specified, then after we make our pass through the
 *        object we need to wait for the other PG_BUSY pages to clear
 *        off (i.e. we need to do an iosync).   also note that once a
 *        page is PG_BUSY it must stay in its object until it is un-busyed.
 */

int
genfs_putpages(void *v)
{
        struct vop_putpages_args /* {
                struct vnode *a_vp;
                voff_t a_offlo;
                voff_t a_offhi;
                int a_flags;
        } */ * const ap = v;

        return genfs_do_putpages(ap->a_vp, ap->a_offlo, ap->a_offhi,
            ap->a_flags, NULL);
}

int
genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff,
    int origflags, struct vm_page **busypg)
{
        struct uvm_object * const uobj = &vp->v_uobj;
        krwlock_t * const slock = uobj->vmobjlock;
        off_t nextoff;
        int i, error, npages, nback;
        int freeflag;
        /*
         * This array is larger than it should so that it's size is constant.
         * The right size is MAXPAGES.
         */
        struct vm_page *pgs[MAXPHYS / MIN_PAGE_SIZE];
#define MAXPAGES (MAXPHYS / PAGE_SIZE)
        struct vm_page *pg, *tpg;
        struct uvm_page_array a;
        bool wasclean, needs_clean;
        bool async = (origflags & PGO_SYNCIO) == 0;
        bool pagedaemon = curlwp == uvm.pagedaemon_lwp;
        struct mount *trans_mp;
        int flags;
        bool modified;                /* if we write out any pages */
        bool holds_wapbl;
        bool cleanall;                /* try to pull off from the syncer's list */
        bool onworklst;
        bool nodirty;
        const bool dirtyonly = (origflags & (PGO_DEACTIVATE|PGO_FREE)) == 0;

        UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);

        KASSERT(origflags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE));
        KASSERT((startoff & PAGE_MASK) == 0 && (endoff & PAGE_MASK) == 0);
        KASSERT(startoff < endoff || endoff == 0);
        KASSERT(rw_write_held(slock));

        UVMHIST_LOG(ubchist, "vp %#jx pages %jd off 0x%jx len 0x%jx",
            (uintptr_t)vp, uobj->uo_npages, startoff, endoff - startoff);

#ifdef DIAGNOSTIC
        if ((origflags & PGO_JOURNALLOCKED) && vp->v_mount->mnt_wapbl)
                WAPBL_JLOCK_ASSERT(vp->v_mount);
#endif

        trans_mp = NULL;
        holds_wapbl = false;

retry:
        modified = false;
        flags = origflags;

        /*
         * shortcut if we have no pages to process.
         */

        nodirty = uvm_obj_clean_p(uobj);
#ifdef DIAGNOSTIC
        mutex_enter(vp->v_interlock);
        KASSERT((vp->v_iflag & VI_ONWORKLST) != 0 || nodirty);
        mutex_exit(vp->v_interlock);
#endif
        if (uobj->uo_npages == 0 || (dirtyonly && nodirty)) {
                mutex_enter(vp->v_interlock);
                if (vp->v_iflag & VI_ONWORKLST && LIST_EMPTY(&vp->v_dirtyblkhd)) {
                        vn_syncer_remove_from_worklist(vp);
                }
                mutex_exit(vp->v_interlock);
                if (trans_mp) {
                        if (holds_wapbl)
                                WAPBL_END(trans_mp);
                        fstrans_done(trans_mp);
                }
                rw_exit(slock);
                return (0);
        }

        /*
         * the vnode has pages, set up to process the request.
         */

        if (trans_mp == NULL && (flags & PGO_CLEANIT) != 0) {
                if (pagedaemon) {
                        /* Pagedaemon must not sleep here. */
                        trans_mp = vp->v_mount;
                        error = fstrans_start_nowait(trans_mp);
                        if (error) {
                                rw_exit(slock);
                                return error;
                        }
                } else {
                        /*
                         * Cannot use vdeadcheck() here as this operation
                         * usually gets used from VOP_RECLAIM().  Test for
                         * change of v_mount instead and retry on change.
                         */
                        rw_exit(slock);
                        trans_mp = vp->v_mount;
                        fstrans_start(trans_mp);
                        if (vp->v_mount != trans_mp) {
                                fstrans_done(trans_mp);
                                trans_mp = NULL;
                        } else {
                                holds_wapbl = (trans_mp->mnt_wapbl &&
                                    (origflags & PGO_JOURNALLOCKED) == 0);
                                if (holds_wapbl) {
                                        error = WAPBL_BEGIN(trans_mp);
                                        if (error) {
                                                fstrans_done(trans_mp);
                                                return error;
                                        }
                                }
                        }
                        rw_enter(slock, RW_WRITER);
                        goto retry;
                }
        }

        error = 0;
        wasclean = uvm_obj_nowriteback_p(uobj);
        nextoff = startoff;
        if (endoff == 0 || flags & PGO_ALLPAGES) {
                endoff = trunc_page(LLONG_MAX);
        }

        /*
         * if this vnode is known not to have dirty pages,
         * don't bother to clean it out.
         */

        if (nodirty) {
                /* We handled the dirtyonly && nodirty case above.  */
                KASSERT(!dirtyonly);
                flags &= ~PGO_CLEANIT;
        }

        /*
         * start the loop to scan pages.
         */

        cleanall = true;
        freeflag = pagedaemon ? PG_PAGEOUT : PG_RELEASED;
        uvm_page_array_init(&a, uobj, dirtyonly ? (UVM_PAGE_ARRAY_FILL_DIRTY |
            (!async ? UVM_PAGE_ARRAY_FILL_WRITEBACK : 0)) : 0);
        for (;;) {
                bool pgprotected;

                /*
                 * if !dirtyonly, iterate over all resident pages in the range.
                 *
                 * if dirtyonly, only possibly dirty pages are interesting.
                 * however, if we are asked to sync for integrity, we should
                 * wait on pages being written back by other threads as well.
                 */

                pg = uvm_page_array_fill_and_peek(&a, nextoff, 0);
                if (pg == NULL) {
                        break;
                }

                KASSERT(pg->uobject == uobj);
                KASSERT((pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
                    (pg->flags & (PG_BUSY)) != 0);
                KASSERT(pg->offset >= startoff);
                KASSERT(pg->offset >= nextoff);
                KASSERT(!dirtyonly ||
                    uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN ||
                    uvm_obj_page_writeback_p(pg));

                if (pg->offset >= endoff) {
                        break;
                }

                /*
                 * a preempt point.
                 */

                if (preempt_needed()) {
                        nextoff = pg->offset; /* visit this page again */
                        rw_exit(slock);
                        preempt();
                        /*
                         * as we dropped the object lock, our cached pages can
                         * be stale.
                         */
                        uvm_page_array_clear(&a);
                        rw_enter(slock, RW_WRITER);
                        continue;
                }

                /*
                 * if the current page is busy, wait for it to become unbusy.
                 */

                if ((pg->flags & PG_BUSY) != 0) {
                        UVMHIST_LOG(ubchist, "busy %#jx", (uintptr_t)pg,
                           0, 0, 0);
                        if ((pg->flags & (PG_RELEASED|PG_PAGEOUT)) != 0
                            && (flags & PGO_BUSYFAIL) != 0) {
                                UVMHIST_LOG(ubchist, "busyfail %#jx",
                                    (uintptr_t)pg, 0, 0, 0);
                                error = EDEADLK;
                                if (busypg != NULL)
                                        *busypg = pg;
                                break;
                        }
                        if (pagedaemon) {
                                /*
                                 * someone has taken the page while we
                                 * dropped the lock for fstrans_start.
                                 */
                                break;
                        }
                        /*
                         * don't bother to wait on other's activities
                         * unless we are asked to sync for integrity.
                         */
                        if (!async && (flags & PGO_RECLAIM) == 0) {
                                wasclean = false;
                                nextoff = pg->offset + PAGE_SIZE;
                                uvm_page_array_advance(&a);
                                continue;
                        }
                        nextoff = pg->offset; /* visit this page again */
                        uvm_pagewait(pg, slock, "genput");
                        /*
                         * as we dropped the object lock, our cached pages can
                         * be stale.
                         */
                        uvm_page_array_clear(&a);
                        rw_enter(slock, RW_WRITER);
                        continue;
                }

                nextoff = pg->offset + PAGE_SIZE;
                uvm_page_array_advance(&a);

                /*
                 * if we're freeing, remove all mappings of the page now.
                 * if we're cleaning, check if the page is needs to be cleaned.
                 */

                pgprotected = false;
                if (flags & PGO_FREE) {
                        pmap_page_protect(pg, VM_PROT_NONE);
                        pgprotected = true;
                } else if (flags & PGO_CLEANIT) {

                        /*
                         * if we still have some hope to pull this vnode off
                         * from the syncer queue, write-protect the page.
                         */

                        if (cleanall && wasclean) {

                                /*
                                 * uobj pages get wired only by uvm_fault
                                 * where uobj is locked.
                                 */

                                if (pg->wire_count == 0) {
                                        pmap_page_protect(pg,
                                            VM_PROT_READ|VM_PROT_EXECUTE);
                                        pgprotected = true;
                                } else {
                                        cleanall = false;
                                }
                        }
                }

                if (flags & PGO_CLEANIT) {
                        needs_clean = uvm_pagecheckdirty(pg, pgprotected);
                } else {
                        needs_clean = false;
                }

                /*
                 * if we're cleaning, build a cluster.
                 * the cluster will consist of pages which are currently dirty.
                 * if not cleaning, just operate on the one page.
                 */

                if (needs_clean) {
                        wasclean = false;
                        memset(pgs, 0, sizeof(pgs));
                        pg->flags |= PG_BUSY;
                        UVM_PAGE_OWN(pg, "genfs_putpages");

                        /*
                         * let the fs constrain the offset range of the cluster.
                         * we additionally constrain the range here such that
                         * it fits in the "pgs" pages array.
                         */

                        off_t fslo, fshi, genlo, lo, off = pg->offset;
                        GOP_PUTRANGE(vp, off, &fslo, &fshi);
                        KASSERT(fslo == trunc_page(fslo));
                        KASSERT(fslo <= off);
                        KASSERT(fshi == trunc_page(fshi));
                        KASSERT(fshi == 0 || off < fshi);

                        if (off > MAXPHYS / 2)
                                genlo = trunc_page(off - (MAXPHYS / 2));
                        else
                                genlo = 0;
                        lo = MAX(fslo, genlo);

                        /*
                         * first look backward.
                         */

                        npages = (off - lo) >> PAGE_SHIFT;
                        nback = npages;
                        uvn_findpages(uobj, off - PAGE_SIZE, &nback,
                            &pgs[0], NULL,
                            UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY|UFP_BACKWARD);
                        if (nback) {
                                memmove(&pgs[0], &pgs[npages - nback],
                                    nback * sizeof(pgs[0]));
                                if (npages - nback < nback)
                                        memset(&pgs[nback], 0,
                                            (npages - nback) * sizeof(pgs[0]));
                                else
                                        memset(&pgs[npages - nback], 0,
                                            nback * sizeof(pgs[0]));
                        }

                        /*
                         * then plug in our page of interest.
                         */

                        pgs[nback] = pg;

                        /*
                         * then look forward to fill in the remaining space in
                         * the array of pages.
                         *
                         * pass our cached array of pages so that hopefully
                         * uvn_findpages can find some good pages in it.
                         * the array a was filled above with the one of
                         * following sets of flags:
                         *        0
                         *        UVM_PAGE_ARRAY_FILL_DIRTY
                         *        UVM_PAGE_ARRAY_FILL_DIRTY|WRITEBACK
                         *
                         * XXX this is fragile but it'll work: the array
                         * was earlier filled sparsely, but UFP_DIRTYONLY
                         * implies dense.  see corresponding comment in
                         * uvn_findpages().
                         */

                        npages = MAXPAGES - nback - 1;
                        if (fshi)
                                npages = MIN(npages,
                                             (fshi - off - 1) >> PAGE_SHIFT);
                        uvn_findpages(uobj, off + PAGE_SIZE, &npages,
                            &pgs[nback + 1], &a,
                            UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY);
                        npages += nback + 1;
                } else {
                        pgs[0] = pg;
                        npages = 1;
                        nback = 0;
                }

                /*
                 * apply FREE or DEACTIVATE options if requested.
                 */

                for (i = 0; i < npages; i++) {
                        tpg = pgs[i];
                        KASSERT(tpg->uobject == uobj);
                        KASSERT(i == 0 ||
                            pgs[i-1]->offset + PAGE_SIZE == tpg->offset);
                        KASSERT(!needs_clean || uvm_pagegetdirty(pgs[i]) !=
                            UVM_PAGE_STATUS_DIRTY);
                        if (needs_clean) {
                                /*
                                 * mark pages as WRITEBACK so that concurrent
                                 * fsync can find and wait for our activities.
                                 */
                                uvm_obj_page_set_writeback(pgs[i]);
                        }
                        if (tpg->offset < startoff || tpg->offset >= endoff)
                                continue;
                        if (flags & PGO_DEACTIVATE && tpg->wire_count == 0) {
                                uvm_pagelock(tpg);
                                uvm_pagedeactivate(tpg);
                                uvm_pageunlock(tpg);
                        } else if (flags & PGO_FREE) {
                                pmap_page_protect(tpg, VM_PROT_NONE);
                                if (tpg->flags & PG_BUSY) {
                                        tpg->flags |= freeflag;
                                        if (pagedaemon) {
                                                uvm_pageout_start(1);
                                                uvm_pagelock(tpg);
                                                uvm_pagedequeue(tpg);
                                                uvm_pageunlock(tpg);
                                        }
                                } else {

                                        /*
                                         * ``page is not busy''
                                         * implies that npages is 1
                                         * and needs_clean is false.
                                         */

                                        KASSERT(npages == 1);
                                        KASSERT(!needs_clean);
                                        KASSERT(pg == tpg);
                                        KASSERT(nextoff ==
                                            tpg->offset + PAGE_SIZE);
                                        uvm_pagefree(tpg);
                                        if (pagedaemon)
                                                uvmexp.pdfreed++;
                                }
                        }
                }
                if (needs_clean) {
                        modified = true;
                        KASSERT(nextoff == pg->offset + PAGE_SIZE);
                        KASSERT(nback < npages);
                        nextoff = pg->offset + ((npages - nback) << PAGE_SHIFT);
                        KASSERT(pgs[nback] == pg);
                        KASSERT(nextoff == pgs[npages - 1]->offset + PAGE_SIZE);

                        /*
                         * start the i/o.
                         */
                        rw_exit(slock);
                        error = GOP_WRITE(vp, pgs, npages, flags);
                        /*
                         * as we dropped the object lock, our cached pages can
                         * be stale.
                         */
                        uvm_page_array_clear(&a);
                        rw_enter(slock, RW_WRITER);
                        if (error) {
                                break;
                        }
                }
        }
        uvm_page_array_fini(&a);

        /*
         * update ctime/mtime if the modification we started writing out might
         * be from mmap'ed write.
         *
         * this is necessary when an application keeps a file mmaped and
         * repeatedly modifies it via the window.  note that, because we
         * don't always write-protect pages when cleaning, such modifications
         * might not involve any page faults.
         */

        mutex_enter(vp->v_interlock);
        if (modified && (vp->v_iflag & VI_WRMAP) != 0 &&
            (vp->v_type != VBLK ||
            (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) {
                GOP_MARKUPDATE(vp, GOP_UPDATE_MODIFIED);
        }

        /*
         * if we no longer have any possibly dirty pages, take us off the
         * syncer list.
         */

        if ((vp->v_iflag & VI_ONWORKLST) != 0 && uvm_obj_clean_p(uobj) &&
            LIST_EMPTY(&vp->v_dirtyblkhd)) {
                vn_syncer_remove_from_worklist(vp);
        }

        /* Wait for output to complete. */
        rw_exit(slock);
        if (!wasclean && !async && vp->v_numoutput != 0) {
                while (vp->v_numoutput != 0)
                        cv_wait(&vp->v_cv, vp->v_interlock);
        }
        onworklst = (vp->v_iflag & VI_ONWORKLST) != 0;
        mutex_exit(vp->v_interlock);

        if ((flags & PGO_RECLAIM) != 0 && onworklst) {
                /*
                 * in the case of PGO_RECLAIM, ensure to make the vnode clean.
                 * retrying is not a big deal because, in many cases,
                 * uobj->uo_npages is already 0 here.
                 */
                rw_enter(slock, RW_WRITER);
                goto retry;
        }

        if (trans_mp) {
                if (holds_wapbl)
                        WAPBL_END(trans_mp);
                fstrans_done(trans_mp);
        }

        return (error);
}

/*
 * Default putrange method for file systems that do not care
 * how many pages are given to one GOP_WRITE() call.
 */
void
genfs_gop_putrange(struct vnode *vp, off_t off, off_t *lop, off_t *hip)
{

        *lop = 0;
        *hip = 0;
}

int
genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags)
{
        off_t off;
        vaddr_t kva;
        size_t len;
        int error;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);

        UVMHIST_LOG(ubchist, "vp %#jx pgs %#jx npages %jd flags 0x%jx",
            (uintptr_t)vp, (uintptr_t)pgs, npages, flags);

        off = pgs[0]->offset;
        kva = uvm_pagermapin(pgs, npages,
            UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);
        len = npages << PAGE_SHIFT;

        error = genfs_do_io(vp, off, kva, len, flags, UIO_WRITE,
                            uvm_aio_aiodone);

        return error;
}

/*
 * genfs_gop_write_rwmap:
 *
 * a variant of genfs_gop_write.  it's used by UDF for its directory buffers.
 * this maps pages with PROT_WRITE so that VOP_STRATEGY can modifies
 * the contents before writing it out to the underlying storage.
 */

int
genfs_gop_write_rwmap(struct vnode *vp, struct vm_page **pgs, int npages,
    int flags)
{
        off_t off;
        vaddr_t kva;
        size_t len;
        int error;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);

        UVMHIST_LOG(ubchist, "vp %#jx pgs %#jx npages %jd flags 0x%jx",
            (uintptr_t)vp, (uintptr_t)pgs, npages, flags);

        off = pgs[0]->offset;
        kva = uvm_pagermapin(pgs, npages,
            UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
        len = npages << PAGE_SHIFT;

        error = genfs_do_io(vp, off, kva, len, flags, UIO_WRITE,
                            uvm_aio_aiodone);

        return error;
}

/*
 * Backend routine for doing I/O to vnode pages.  Pages are already locked
 * and mapped into kernel memory.  Here we just look up the underlying
 * device block addresses and call the strategy routine.
 */

static int
genfs_do_io(struct vnode *vp, off_t off, vaddr_t kva, size_t len, int flags,
    enum uio_rw rw, void (*iodone)(struct buf *))
{
        int s, error;
        int fs_bshift, dev_bshift;
        off_t eof, offset, startoffset;
        size_t bytes, iobytes, skipbytes;
        struct buf *mbp, *bp;
        const bool async = (flags & PGO_SYNCIO) == 0;
        const bool lazy = (flags & PGO_LAZY) == 0;
        const bool iowrite = rw == UIO_WRITE;
        const int brw = iowrite ? B_WRITE : B_READ;
        UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist);

        UVMHIST_LOG(ubchist, "vp %#jx kva %#jx len 0x%jx flags 0x%jx",
            (uintptr_t)vp, (uintptr_t)kva, len, flags);

        KASSERT(vp->v_size <= vp->v_writesize);
        GOP_SIZE(vp, vp->v_writesize, &eof, 0);
        if (vp->v_type != VBLK) {
                fs_bshift = vp->v_mount->mnt_fs_bshift;
                dev_bshift = vp->v_mount->mnt_dev_bshift;
        } else {
                fs_bshift = DEV_BSHIFT;
                dev_bshift = DEV_BSHIFT;
        }
        error = 0;
        startoffset = off;
        bytes = MIN(len, eof - startoffset);
        skipbytes = 0;
        KASSERT(bytes != 0);

        if (iowrite) {
                /*
                 * why += 2?
                 * 1 for biodone, 1 for uvm_aio_aiodone.
                 */
                mutex_enter(vp->v_interlock);
                vp->v_numoutput += 2;
                mutex_exit(vp->v_interlock);
        }
        mbp = getiobuf(vp, true);
        UVMHIST_LOG(ubchist, "vp %#jx mbp %#jx num now %jd bytes 0x%jx",
            (uintptr_t)vp, (uintptr_t)mbp, vp->v_numoutput, bytes);
        mbp->b_bufsize = len;
        mbp->b_data = (void *)kva;
        mbp->b_resid = mbp->b_bcount = bytes;
        mbp->b_cflags |= BC_BUSY | BC_AGE;
        if (async) {
                mbp->b_flags = brw | B_ASYNC;
                mbp->b_iodone = iodone;
        } else {
                mbp->b_flags = brw;
                mbp->b_iodone = NULL;
        }
        if (curlwp == uvm.pagedaemon_lwp)
                BIO_SETPRIO(mbp, BPRIO_TIMELIMITED);
        else if (async || lazy)
                BIO_SETPRIO(mbp, BPRIO_TIMENONCRITICAL);
        else
                BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL);

        bp = NULL;
        for (offset = startoffset;
            bytes > 0;
            offset += iobytes, bytes -= iobytes) {
                int run;
                daddr_t lbn, blkno;
                struct vnode *devvp;

                /*
                 * bmap the file to find out the blkno to read from and
                 * how much we can read in one i/o.  if bmap returns an error,
                 * skip the rest of the top-level i/o.
                 */

                lbn = offset >> fs_bshift;
                error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run);
                if (error) {
                        UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%jx -> %jd",
                            lbn, error, 0, 0);
                        skipbytes += bytes;
                        bytes = 0;
                        goto loopdone;
                }

                /*
                 * see how many pages can be read with this i/o.
                 * reduce the i/o size if necessary to avoid
                 * overwriting pages with valid data.
                 */

                iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
                    bytes);

                /*
                 * if this block isn't allocated, zero it instead of
                 * reading it.  unless we are going to allocate blocks,
                 * mark the pages we zeroed PG_RDONLY.
                 */

                if (blkno == (daddr_t)-1) {
                        if (!iowrite) {
                                memset((char *)kva + (offset - startoffset), 0,
                                    iobytes);
                        }
                        skipbytes += iobytes;
                        continue;
                }

                /*
                 * allocate a sub-buf for this piece of the i/o
                 * (or just use mbp if there's only 1 piece),
                 * and start it going.
                 */

                if (offset == startoffset && iobytes == bytes) {
                        bp = mbp;
                } else {
                        UVMHIST_LOG(ubchist, "vp %#jx bp %#jx num now %jd",
                            (uintptr_t)vp, (uintptr_t)bp, vp->v_numoutput, 0);
                        bp = getiobuf(vp, true);
                        nestiobuf_setup(mbp, bp, offset - startoffset, iobytes);
                }
                bp->b_lblkno = 0;

                /* adjust physical blkno for partial blocks */
                bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
                    dev_bshift);

                UVMHIST_LOG(ubchist,
                    "bp %#jx offset 0x%jx bcount 0x%jx blkno 0x%jx",
                    (uintptr_t)bp, offset, bp->b_bcount, bp->b_blkno);

                VOP_STRATEGY(devvp, bp);
        }

loopdone:
        if (skipbytes) {
                UVMHIST_LOG(ubchist, "skipbytes %jd", skipbytes, 0,0,0);
        }
        nestiobuf_done(mbp, skipbytes, error);
        if (async) {
                UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0);
                return (0);
        }
        UVMHIST_LOG(ubchist, "waiting for mbp %#jx", (uintptr_t)mbp, 0, 0, 0);
        error = biowait(mbp);
        s = splbio();
        (*iodone)(mbp);
        splx(s);
        UVMHIST_LOG(ubchist, "returning, error %jd", error, 0, 0, 0);
        return (error);
}

int
genfs_compat_getpages(void *v)
{
        struct vop_getpages_args /* {
                struct vnode *a_vp;
                voff_t a_offset;
                struct vm_page **a_m;
                int *a_count;
                int a_centeridx;
                vm_prot_t a_access_type;
                int a_advice;
                int a_flags;
        } */ *ap = v;

        off_t origoffset;
        struct vnode *vp = ap->a_vp;
        struct uvm_object *uobj = &vp->v_uobj;
        struct vm_page *pg, **pgs;
        vaddr_t kva;
        int i, error, orignpages, npages;
        struct iovec iov;
        struct uio uio;
        kauth_cred_t cred = curlwp->l_cred;
        const bool memwrite = (ap->a_access_type & VM_PROT_WRITE) != 0;

        error = 0;
        origoffset = ap->a_offset;
        orignpages = *ap->a_count;
        pgs = ap->a_m;

        if (ap->a_flags & PGO_LOCKED) {
                uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m, NULL,
                    UFP_NOWAIT|UFP_NOALLOC| (memwrite ? UFP_NORDONLY : 0));

                error = ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0;
                return error;
        }
        if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= vp->v_size) {
                rw_exit(uobj->vmobjlock);
                return EINVAL;
        }
        if ((ap->a_flags & PGO_SYNCIO) == 0) {
                rw_exit(uobj->vmobjlock);
                return 0;
        }
        npages = orignpages;
        uvn_findpages(uobj, origoffset, &npages, pgs, NULL, UFP_ALL);
        rw_exit(uobj->vmobjlock);
        kva = uvm_pagermapin(pgs, npages,
            UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK);
        for (i = 0; i < npages; i++) {
                pg = pgs[i];
                if ((pg->flags & PG_FAKE) == 0) {
                        continue;
                }
                iov.iov_base = (char *)kva + (i << PAGE_SHIFT);
                iov.iov_len = PAGE_SIZE;
                uio.uio_iov = &iov;
                uio.uio_iovcnt = 1;
                uio.uio_offset = origoffset + (i << PAGE_SHIFT);
                uio.uio_rw = UIO_READ;
                uio.uio_resid = PAGE_SIZE;
                UIO_SETUP_SYSSPACE(&uio);
                /* XXX vn_lock */
                error = VOP_READ(vp, &uio, 0, cred);
                if (error) {
                        break;
                }
                if (uio.uio_resid) {
                        memset(iov.iov_base, 0, uio.uio_resid);
                }
        }
        uvm_pagermapout(kva, npages);
        rw_enter(uobj->vmobjlock, RW_WRITER);
        for (i = 0; i < npages; i++) {
                pg = pgs[i];
                if (error && (pg->flags & PG_FAKE) != 0) {
                        pg->flags |= PG_RELEASED;
                } else {
                        uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_UNKNOWN);
                        uvm_pagelock(pg);
                        uvm_pageactivate(pg);
                        uvm_pageunlock(pg);
                }
        }
        if (error) {
                uvm_page_unbusy(pgs, npages);
        }
        rw_exit(uobj->vmobjlock);
        return error;
}

int
genfs_compat_gop_write(struct vnode *vp, struct vm_page **pgs, int npages,
    int flags)
{
        off_t offset;
        struct iovec iov;
        struct uio uio;
        kauth_cred_t cred = curlwp->l_cred;
        struct buf *bp;
        vaddr_t kva;
        int error;

        offset = pgs[0]->offset;
        kva = uvm_pagermapin(pgs, npages,
            UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK);

        iov.iov_base = (void *)kva;
        iov.iov_len = npages << PAGE_SHIFT;
        uio.uio_iov = &iov;
        uio.uio_iovcnt = 1;
        uio.uio_offset = offset;
        uio.uio_rw = UIO_WRITE;
        uio.uio_resid = npages << PAGE_SHIFT;
        UIO_SETUP_SYSSPACE(&uio);
        /* XXX vn_lock */
        error = VOP_WRITE(vp, &uio, 0, cred);

        mutex_enter(vp->v_interlock);
        vp->v_numoutput++;
        mutex_exit(vp->v_interlock);

        bp = getiobuf(vp, true);
        bp->b_cflags |= BC_BUSY | BC_AGE;
        bp->b_lblkno = offset >> vp->v_mount->mnt_fs_bshift;
        bp->b_data = (char *)kva;
        bp->b_bcount = npages << PAGE_SHIFT;
        bp->b_bufsize = npages << PAGE_SHIFT;
        bp->b_resid = 0;
        bp->b_error = error;
        uvm_aio_aiodone(bp);
        return (error);
}

/*
 * Process a uio using direct I/O.  If we reach a part of the request
 * which cannot be processed in this fashion for some reason, just return.
 * The caller must handle some additional part of the request using
 * buffered I/O before trying direct I/O again.
 */

void
genfs_directio(struct vnode *vp, struct uio *uio, int ioflag)
{
        struct vmspace *vs;
        struct iovec *iov;
        vaddr_t va;
        size_t len;
        const int mask = DEV_BSIZE - 1;
        int error;
        bool need_wapbl = (vp->v_mount && vp->v_mount->mnt_wapbl &&
            (ioflag & IO_JOURNALLOCKED) == 0);

#ifdef DIAGNOSTIC
        if ((ioflag & IO_JOURNALLOCKED) && vp->v_mount->mnt_wapbl)
                WAPBL_JLOCK_ASSERT(vp->v_mount);
#endif

        /*
         * We only support direct I/O to user space for now.
         */

        if (VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) {
                return;
        }

        /*
         * If the vnode is mapped, we would need to get the getpages lock
         * to stabilize the bmap, but then we would get into trouble while
         * locking the pages if the pages belong to this same vnode (or a
         * multi-vnode cascade to the same effect).  Just fall back to
         * buffered I/O if the vnode is mapped to avoid this mess.
         */

        if (vp->v_vflag & VV_MAPPED) {
                return;
        }

        if (need_wapbl) {
                error = WAPBL_BEGIN(vp->v_mount);
                if (error)
                        return;
        }

        /*
         * Do as much of the uio as possible with direct I/O.
         */

        vs = uio->uio_vmspace;
        while (uio->uio_resid) {
                iov = uio->uio_iov;
                if (iov->iov_len == 0) {
                        uio->uio_iov++;
                        uio->uio_iovcnt--;
                        continue;
                }
                va = (vaddr_t)iov->iov_base;
                len = MIN(iov->iov_len, genfs_maxdio);
                len &= ~mask;

                /*
                 * If the next chunk is smaller than DEV_BSIZE or extends past
                 * the current EOF, then fall back to buffered I/O.
                 */

                if (len == 0 || uio->uio_offset + len > vp->v_size) {
                        break;
                }

                /*
                 * Check alignment.  The file offset must be at least
                 * sector-aligned.  The exact constraint on memory alignment
                 * is very hardware-dependent, but requiring sector-aligned
                 * addresses there too is safe.
                 */

                if (uio->uio_offset & mask || va & mask) {
                        break;
                }
                error = genfs_do_directio(vs, va, len, vp, uio->uio_offset,
                                          uio->uio_rw);
                if (error) {
                        break;
                }
                iov->iov_base = (char *)iov->iov_base + len;
                iov->iov_len -= len;
                uio->uio_offset += len;
                uio->uio_resid -= len;
        }

        if (need_wapbl)
                WAPBL_END(vp->v_mount);
}

/*
 * Iodone routine for direct I/O.  We don't do much here since the request is
 * always synchronous, so the caller will do most of the work after biowait().
 */

static void
genfs_dio_iodone(struct buf *bp)
{

        KASSERT((bp->b_flags & B_ASYNC) == 0);
        if ((bp->b_flags & B_READ) == 0 && (bp->b_cflags & BC_AGE) != 0) {
                mutex_enter(bp->b_objlock);
                vwakeup(bp);
                mutex_exit(bp->b_objlock);
        }
        putiobuf(bp);
}

/*
 * Process one chunk of a direct I/O request.
 */

static int
genfs_do_directio(struct vmspace *vs, vaddr_t uva, size_t len, struct vnode *vp,
    off_t off, enum uio_rw rw)
{
        struct vm_map *map;
        struct pmap *upm, *kpm __unused;
        size_t klen = round_page(uva + len) - trunc_page(uva);
        off_t spoff, epoff;
        vaddr_t kva, puva;
        paddr_t pa;
        vm_prot_t prot;
        int error, rv __diagused, poff, koff;
        const int pgoflags = PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED |
                (rw == UIO_WRITE ? PGO_FREE : 0);

        /*
         * For writes, verify that this range of the file already has fully
         * allocated backing store.  If there are any holes, just punt and
         * make the caller take the buffered write path.
         */

        if (rw == UIO_WRITE) {
                daddr_t lbn, elbn, blkno;
                int bsize, bshift, run;

                bshift = vp->v_mount->mnt_fs_bshift;
                bsize = 1 << bshift;
                lbn = off >> bshift;
                elbn = (off + len + bsize - 1) >> bshift;
                while (lbn < elbn) {
                        error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
                        if (error) {
                                return error;
                        }
                        if (blkno == (daddr_t)-1) {
                                return ENOSPC;
                        }
                        lbn += 1 + run;
                }
        }

        /*
         * Flush any cached pages for parts of the file that we're about to
         * access.  If we're writing, invalidate pages as well.
         */

        spoff = trunc_page(off);
        epoff = round_page(off + len);
        rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
        error = VOP_PUTPAGES(vp, spoff, epoff, pgoflags);
        if (error) {
                return error;
        }

        /*
         * Wire the user pages and remap them into kernel memory.
         */

        prot = rw == UIO_READ ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ;
        error = uvm_vslock(vs, (void *)uva, len, prot);
        if (error) {
                return error;
        }

        map = &vs->vm_map;
        upm = vm_map_pmap(map);
        kpm = vm_map_pmap(kernel_map);
        puva = trunc_page(uva);
        kva = uvm_km_alloc(kernel_map, klen, atop(puva) & uvmexp.colormask,
            UVM_KMF_VAONLY | UVM_KMF_WAITVA | UVM_KMF_COLORMATCH);
        for (poff = 0; poff < klen; poff += PAGE_SIZE) {
                rv = pmap_extract(upm, puva + poff, &pa);
                KASSERT(rv);
                pmap_kenter_pa(kva + poff, pa, prot, PMAP_WIRED);
        }
        pmap_update(kpm);

        /*
         * Do the I/O.
         */

        koff = uva - trunc_page(uva);
        error = genfs_do_io(vp, off, kva + koff, len, PGO_SYNCIO, rw,
                            genfs_dio_iodone);

        /*
         * Tear down the kernel mapping.
         */

        pmap_kremove(kva, klen);
        pmap_update(kpm);
        uvm_km_free(kernel_map, kva, klen, UVM_KMF_VAONLY);

        /*
         * Unwire the user pages.
         */

        uvm_vsunlock(vs, (void *)uva, len);
        return error;
}

















































































  338 
















  338 





  338 

  338 













  125 

  128 

  128 

  125 
  118 
  125 
  118 
  125 




















   29 

   29 
   29 

   29 


   29 
   29 
   29 





   29 

















   58 

   58 








   58 















   19 

   19 



   19 















   50 

   50 























   19 

   19 





























   54 



   54 

   54 
   54 




   19 

   50 



   54 

   54 

   53 


   53 
   54 

   53 
   53 

   42 



   53 



   54 






   12 
   54 

   12 
   12 

    7 



   12 




   54 


   54 

   12 
   52 
   53 




   54 
    1 
   54 


   54 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
/*        $NetBSD: ffs_subr.c,v 1.53 2022/05/24 06:28:02 andvar Exp $        */

/*
 * Copyright (c) 1982, 1986, 1989, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)ffs_subr.c        8.5 (Berkeley) 3/21/95
 */

#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ffs_subr.c,v 1.53 2022/05/24 06:28:02 andvar Exp $");

#include <sys/param.h>

/* in ffs_tables.c */
extern const int inside[], around[];
extern const u_char * const fragtbl[];

#ifndef _KERNEL
#define FFS_EI /* always include byteswapped filesystems support */
#endif
#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>
#include <ufs/ufs/ufs_bswap.h>

#ifndef _KERNEL
#include <ufs/ufs/dinode.h>
void    panic(const char *, ...)
    __attribute__((__noreturn__,__format__(__printf__,1,2)));

#else        /* _KERNEL */
#include <sys/systm.h>
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/buf.h>
#include <sys/inttypes.h>
#include <sys/pool.h>
#include <sys/fstrans.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>

/*
 * Load up the contents of an inode and copy the appropriate pieces
 * to the incore copy.
 */
void
ffs_load_inode(struct buf *bp, struct inode *ip, struct fs *fs, ino_t ino)
{
        struct ufs1_dinode *dp1;
        struct ufs2_dinode *dp2;

        if (ip->i_ump->um_fstype == UFS1) {
                dp1 = (struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ino);
#ifdef FFS_EI
                if (UFS_FSNEEDSWAP(fs))
                        ffs_dinode1_swap(dp1, ip->i_din.ffs1_din);
                else
#endif
                *ip->i_din.ffs1_din = *dp1;

                ip->i_mode = ip->i_ffs1_mode;
                ip->i_nlink = ip->i_ffs1_nlink;
                ip->i_size = ip->i_ffs1_size;
                ip->i_flags = ip->i_ffs1_flags;
                ip->i_gen = ip->i_ffs1_gen;
                ip->i_uid = ip->i_ffs1_uid;
                ip->i_gid = ip->i_ffs1_gid;
        } else {
                dp2 = (struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ino);
#ifdef FFS_EI
                if (UFS_FSNEEDSWAP(fs))
                        ffs_dinode2_swap(dp2, ip->i_din.ffs2_din);
                else
#endif
                *ip->i_din.ffs2_din = *dp2;

                ip->i_mode = ip->i_ffs2_mode;
                ip->i_nlink = ip->i_ffs2_nlink;
                ip->i_size = ip->i_ffs2_size;
                ip->i_flags = ip->i_ffs2_flags;
                ip->i_gen = ip->i_ffs2_gen;
                ip->i_uid = ip->i_ffs2_uid;
                ip->i_gid = ip->i_ffs2_gid;
        }
}

int
ffs_getblk(struct vnode *vp, daddr_t lblkno, daddr_t blkno, int size,
    bool clearbuf, buf_t **bpp)
{
        int error = 0;

        KASSERT(blkno >= 0 || blkno == FFS_NOBLK);

        if ((*bpp = getblk(vp, lblkno, size, 0, 0)) == NULL)
                return ENOMEM;
        if (blkno != FFS_NOBLK)
                (*bpp)->b_blkno = blkno;
        if (clearbuf)
                clrbuf(*bpp);
        if ((*bpp)->b_blkno >= 0 && (error = fscow_run(*bpp, false)) != 0) {
                brelse(*bpp, BC_INVAL);
                *bpp = NULL;
        }
        return error;
}

#endif        /* _KERNEL */

/*
 * Update the frsum fields to reflect addition or deletion
 * of some frags.
 */
void
ffs_fragacct(struct fs *fs, int fragmap, int32_t fraglist[], int cnt,
    int needswap)
{
        int inblk;
        int field, subfield;
        int siz, pos;

        inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1;
        fragmap <<= 1;
        for (siz = 1; siz < fs->fs_frag; siz++) {
                if ((inblk & (1 << (siz + (fs->fs_frag & (NBBY - 1))))) == 0)
                        continue;
                field = around[siz];
                subfield = inside[siz];
                for (pos = siz; pos <= fs->fs_frag; pos++) {
                        if ((fragmap & field) == subfield) {
                                fraglist[siz] = ufs_rw32(
                                    ufs_rw32(fraglist[siz], needswap) + cnt,
                                    needswap);
                                pos += siz;
                                field <<= siz;
                                subfield <<= siz;
                        }
                        field <<= 1;
                        subfield <<= 1;
                }
        }
}

/*
 * block operations
 *
 * check if a block is available
 *  returns true if all the corresponding bits in the free map are 1
 *  returns false if any corresponding bit in the free map is 0
 */
int
ffs_isblock(struct fs *fs, u_char *cp, int32_t h)
{
        u_char mask;

        switch ((int)fs->fs_fragshift) {
        case 3:
                return (cp[h] == 0xff);
        case 2:
                mask = 0x0f << ((h & 0x1) << 2);
                return ((cp[h >> 1] & mask) == mask);
        case 1:
                mask = 0x03 << ((h & 0x3) << 1);
                return ((cp[h >> 2] & mask) == mask);
        case 0:
                mask = 0x01 << (h & 0x7);
                return ((cp[h >> 3] & mask) == mask);
        default:
                panic("%s: unknown fs_fragshift %d", __func__,
                    (int)fs->fs_fragshift);
        }
}

/*
 * check if a block is completely allocated
 *  returns true if all the corresponding bits in the free map are 0
 *  returns false if any corresponding bit in the free map is 1
 */
int
ffs_isfreeblock(struct fs *fs, u_char *cp, int32_t h)
{

        switch ((int)fs->fs_fragshift) {
        case 3:
                return (cp[h] == 0);
        case 2:
                return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0);
        case 1:
                return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0);
        case 0:
                return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0);
        default:
                panic("%s: unknown fs_fragshift %d", __func__,
                    (int)fs->fs_fragshift);
        }
}

/*
 * take a block out of the map
 */
void
ffs_clrblock(struct fs *fs, u_char *cp, int32_t h)
{

        switch ((int)fs->fs_fragshift) {
        case 3:
                cp[h] = 0;
                return;
        case 2:
                cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2));
                return;
        case 1:
                cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1));
                return;
        case 0:
                cp[h >> 3] &= ~(0x01 << (h & 0x7));
                return;
        default:
                panic("%s: unknown fs_fragshift %d", __func__,
                    (int)fs->fs_fragshift);
        }
}

/*
 * put a block into the map
 */
void
ffs_setblock(struct fs *fs, u_char *cp, int32_t h)
{

        switch ((int)fs->fs_fragshift) {
        case 3:
                cp[h] = 0xff;
                return;
        case 2:
                cp[h >> 1] |= (0x0f << ((h & 0x1) << 2));
                return;
        case 1:
                cp[h >> 2] |= (0x03 << ((h & 0x3) << 1));
                return;
        case 0:
                cp[h >> 3] |= (0x01 << (h & 0x7));
                return;
        default:
                panic("%s: unknown fs_fragshift %d", __func__,
                    (int)fs->fs_fragshift);
        }
}

/*
 * Update the cluster map because of an allocation or free.
 *
 * Cnt == 1 means free; cnt == -1 means allocating.
 */
void
ffs_clusteracct(struct fs *fs, struct cg *cgp, int32_t blkno, int cnt)
{
        int32_t *sump;
        int32_t *lp;
        u_char *freemapp, *mapp;
        int i, start, end, forw, back, map;
        unsigned int bit;
        const int needswap = UFS_FSNEEDSWAP(fs);

        /* KASSERT(mutex_owned(&ump->um_lock)); */

        if (fs->fs_contigsumsize <= 0)
                return;
        freemapp = cg_clustersfree(cgp, needswap);
        sump = cg_clustersum(cgp, needswap);
        /*
         * Allocate or clear the actual block.
         */
        if (cnt > 0)
                setbit(freemapp, blkno);
        else
                clrbit(freemapp, blkno);
        /*
         * Find the size of the cluster going forward.
         */
        start = blkno + 1;
        end = start + fs->fs_contigsumsize;
        if ((uint32_t)end >= ufs_rw32(cgp->cg_nclusterblks, needswap))
                end = ufs_rw32(cgp->cg_nclusterblks, needswap);
        mapp = &freemapp[start / NBBY];
        map = *mapp++;
        bit = 1U << ((unsigned int)start % NBBY);
        for (i = start; i < end; i++) {
                if ((map & bit) == 0)
                        break;
                if ((i & (NBBY - 1)) != (NBBY - 1)) {
                        bit <<= 1;
                } else {
                        map = *mapp++;
                        bit = 1;
                }
        }
        forw = i - start;
        /*
         * Find the size of the cluster going backward.
         */
        start = blkno - 1;
        end = start - fs->fs_contigsumsize;
        if (end < 0)
                end = -1;
        mapp = &freemapp[start / NBBY];
        map = *mapp--;
        bit = 1U << ((unsigned int)start % NBBY);
        for (i = start; i > end; i--) {
                if ((map & bit) == 0)
                        break;
                if ((i & (NBBY - 1)) != 0) {
                        bit >>= 1;
                } else {
                        map = *mapp--;
                        bit = 1U << (NBBY - 1);
                }
        }
        back = start - i;
        /*
         * Account for old cluster and the possibly new forward and
         * back clusters.
         */
        i = back + forw + 1;
        if (i > fs->fs_contigsumsize)
                i = fs->fs_contigsumsize;
        ufs_add32(sump[i], cnt, needswap);
        if (back > 0)
                ufs_add32(sump[back], -cnt, needswap);
        if (forw > 0)
                ufs_add32(sump[forw], -cnt, needswap);

        /*
         * Update cluster summary information.
         */
        lp = &sump[fs->fs_contigsumsize];
        for (i = fs->fs_contigsumsize; i > 0; i--)
                if (ufs_rw32(*lp--, needswap) > 0)
                        break;
#if defined(_KERNEL)
        fs->fs_maxcluster[ufs_rw32(cgp->cg_cgx, needswap)] = i;
#endif
}



































































































































































































































    3 
    3 

















































































































































































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
/*        $NetBSD: uvscom.c,v 1.39 2021/08/07 16:19:17 thorpej Exp $        */
/*-
 * Copyright (c) 2001-2002, Shunsuke Akiyama <akiyama@jp.FreeBSD.org>.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $FreeBSD: src/sys/dev/usb/uvscom.c,v 1.1 2002/03/18 18:23:39 joe Exp $
 */

/*
 * uvscom: SUNTAC Slipper U VS-10U driver.
 * Slipper U is a PC card to USB converter for data communication card
 * adapter.  It supports DDI Pocket's Air H" C@rd, C@rd H" 64, NTT's P-in,
 * P-in m@ater and various data communication card adapters.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvscom.c,v 1.39 2021/08/07 16:19:17 thorpej Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/fcntl.h>
#include <sys/conf.h>
#include <sys/tty.h>
#include <sys/file.h>
#include <sys/ioctl.h>
#include <sys/device.h>
#include <sys/proc.h>
#include <sys/poll.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbcdc.h>

#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usb_quirks.h>

#include <dev/usb/ucomvar.h>

#ifdef UVSCOM_DEBUG
static int        uvscomdebug = 1;

#define DPRINTFN(n, x)  do { \
                                if (uvscomdebug > (n)) \
                                        printf x; \
                        } while (0)
#else
#define DPRINTFN(n, x)
#endif
#define DPRINTF(x) DPRINTFN(0, x)

#define        UVSCOM_CONFIG_INDEX        0
#define        UVSCOM_IFACE_INDEX        0

#define UVSCOM_INTR_INTERVAL        100        /* mS */

#define UVSCOM_UNIT_WAIT        5

/* Request */
#define UVSCOM_SET_SPEED        0x10
#define UVSCOM_LINE_CTL                0x11
#define UVSCOM_SET_PARAM        0x12
#define UVSCOM_READ_STATUS        0xd0
#define UVSCOM_SHUTDOWN                0xe0

/* UVSCOM_SET_SPEED parameters */
#define UVSCOM_SPEED_150BPS        0x00
#define UVSCOM_SPEED_300BPS        0x01
#define UVSCOM_SPEED_600BPS        0x02
#define UVSCOM_SPEED_1200BPS        0x03
#define UVSCOM_SPEED_2400BPS        0x04
#define UVSCOM_SPEED_4800BPS        0x05
#define UVSCOM_SPEED_9600BPS        0x06
#define UVSCOM_SPEED_19200BPS        0x07
#define UVSCOM_SPEED_38400BPS        0x08
#define UVSCOM_SPEED_57600BPS        0x09
#define UVSCOM_SPEED_115200BPS        0x0a

/* UVSCOM_LINE_CTL parameters */
#define UVSCOM_BREAK                0x40
#define UVSCOM_RTS                0x02
#define UVSCOM_DTR                0x01
#define UVSCOM_LINE_INIT        0x08

/* UVSCOM_SET_PARAM parameters */
#define UVSCOM_DATA_MASK        0x03
#define UVSCOM_DATA_BIT_8        0x03
#define UVSCOM_DATA_BIT_7        0x02
#define UVSCOM_DATA_BIT_6        0x01
#define UVSCOM_DATA_BIT_5        0x00

#define UVSCOM_STOP_MASK        0x04
#define UVSCOM_STOP_BIT_2        0x04
#define UVSCOM_STOP_BIT_1        0x00

#define UVSCOM_PARITY_MASK        0x18
#define UVSCOM_PARITY_EVEN        0x18
#if 0
#define UVSCOM_PARITY_UNK        0x10
#endif
#define UVSCOM_PARITY_ODD        0x08
#define UVSCOM_PARITY_NONE        0x00

/* Status bits */
#define UVSCOM_TXRDY                0x04
#define UVSCOM_RXRDY                0x01

#define UVSCOM_DCD                0x08
#define UVSCOM_NOCARD                0x04
#define UVSCOM_DSR                0x02
#define UVSCOM_CTS                0x01
#define UVSCOM_USTAT_MASK        (UVSCOM_NOCARD | UVSCOM_DSR | UVSCOM_CTS)

struct        uvscom_softc {
        device_t                sc_dev;                /* base device */
        struct usbd_device *        sc_udev;        /* USB device */
        struct usbd_interface *        sc_iface;        /* interface */
        int                        sc_iface_number;/* interface number */

        struct usbd_interface *        sc_intr_iface;        /* interrupt interface */
        int                        sc_intr_number;        /* interrupt number */
        struct usbd_pipe *        sc_intr_pipe;        /* interrupt pipe */
        u_char                        *sc_intr_buf;        /* interrupt buffer */
        int                        sc_isize;

        u_char                        sc_dtr;                /* current DTR state */
        u_char                        sc_rts;                /* current RTS state */

        u_char                        sc_lsr;                /* Local status register */
        u_char                        sc_msr;                /* uvscom status register */

        uint16_t                sc_lcr;                /* Line control */
        u_char                        sc_usr;                /* unit status */

        device_t                sc_subdev;        /* ucom device */
        bool                        sc_dying;        /* disconnecting */
};

/*
 * These are the maximum number of bytes transferred per frame.
 * The output buffer size cannot be increased due to the size encoding.
 */
#define UVSCOMIBUFSIZE 512
#define UVSCOMOBUFSIZE 64

static        usbd_status uvscom_readstat(struct uvscom_softc *);
static        usbd_status uvscom_shutdown(struct uvscom_softc *);
static        usbd_status uvscom_reset(struct uvscom_softc *);
static        usbd_status uvscom_set_line_coding(struct uvscom_softc *,
                                           uint16_t, uint16_t);
static        usbd_status uvscom_set_line(struct uvscom_softc *, uint16_t);
static        usbd_status uvscom_set_crtscts(struct uvscom_softc *);
static        void uvscom_get_status(void *, int, u_char *, u_char *);
static        void uvscom_dtr(struct uvscom_softc *, int);
static        void uvscom_rts(struct uvscom_softc *, int);
static        void uvscom_break(struct uvscom_softc *, int);

static        void uvscom_set(void *, int, int, int);
static        void uvscom_intr(struct usbd_xfer *, void *, usbd_status);
static        int  uvscom_param(void *, int, struct termios *);
static        int  uvscom_open(void *, int);
static        void uvscom_close(void *, int);

static const struct ucom_methods uvscom_methods = {
        .ucom_get_status = uvscom_get_status,
        .ucom_set = uvscom_set,
        .ucom_param = uvscom_param,
        .ucom_ioctl = NULL,                /* TODO */
        .ucom_open = uvscom_open,
        .ucom_close = uvscom_close,
};

static const struct usb_devno uvscom_devs [] = {
        /* SUNTAC U-Cable type A4 */
        { USB_VENDOR_SUNTAC, USB_PRODUCT_SUNTAC_AS144L4 },
        /* SUNTAC U-Cable type D2 */
        { USB_VENDOR_SUNTAC, USB_PRODUCT_SUNTAC_DS96L },
        /* SUNTAC U-Cable type P1 */
        { USB_VENDOR_SUNTAC, USB_PRODUCT_SUNTAC_PS64P1 },
        /* SUNTAC Slipper U  */
        { USB_VENDOR_SUNTAC, USB_PRODUCT_SUNTAC_VS10U },
        /* SUNTAC Ir-Trinity */
        { USB_VENDOR_SUNTAC, USB_PRODUCT_SUNTAC_IS96U },
};
#define uvscom_lookup(v, p) usb_lookup(uvscom_devs, v, p)

static int uvscom_match(device_t, cfdata_t, void *);
static void uvscom_attach(device_t, device_t, void *);
static void uvscom_childdet(device_t, device_t);
static int uvscom_detach(device_t, int);

CFATTACH_DECL2_NEW(uvscom, sizeof(struct uvscom_softc), uvscom_match,
    uvscom_attach, uvscom_detach, NULL, NULL, uvscom_childdet);

static int
uvscom_match(device_t parent, cfdata_t match, void *aux)
{
        struct usb_attach_arg *uaa = aux;

        return uvscom_lookup(uaa->uaa_vendor, uaa->uaa_product) != NULL ?
                UMATCH_VENDOR_PRODUCT : UMATCH_NONE;
}

static void
uvscom_attach(device_t parent, device_t self, void *aux)
{
        struct uvscom_softc *sc = device_private(self);
        struct usb_attach_arg *uaa = aux;
        struct usbd_device *dev = uaa->uaa_device;
        usb_config_descriptor_t *cdesc;
        usb_interface_descriptor_t *id;
        usb_endpoint_descriptor_t *ed;
        char *devinfop;
        usbd_status err;
        int i;
        struct ucom_attach_args ucaa;

        aprint_naive("\n");
        aprint_normal("\n");

        devinfop = usbd_devinfo_alloc(dev, 0);
        aprint_normal_dev(self, "%s\n", devinfop);
        usbd_devinfo_free(devinfop);

        sc->sc_dev = self;
        sc->sc_udev = dev;
        sc->sc_dying = false;

        DPRINTF(("uvscom attach: sc = %p\n", sc));

        /* initialize endpoints */
        ucaa.ucaa_bulkin = ucaa.ucaa_bulkout = -1;
        sc->sc_intr_number = -1;
        sc->sc_intr_pipe = NULL;

        /* Move the device into the configured state. */
        err = usbd_set_config_index(dev, UVSCOM_CONFIG_INDEX, 1);
        if (err) {
                aprint_error_dev(self, "failed to set configuration, err=%s\n",
                    usbd_errstr(err));
                sc->sc_dying = true;
                return;
        }

        /* get the config descriptor */
        cdesc = usbd_get_config_descriptor(sc->sc_udev);

        if (cdesc == NULL) {
                aprint_error_dev(self,
                    "failed to get configuration descriptor\n");
                sc->sc_dying = true;
                return;
        }

        /* get the common interface */
        err = usbd_device2interface_handle(dev, UVSCOM_IFACE_INDEX,
                                           &sc->sc_iface);
        if (err) {
                aprint_error_dev(self, "failed to get interface, err=%s\n",
                    usbd_errstr(err));
                sc->sc_dying = true;
                return;
        }

        id = usbd_get_interface_descriptor(sc->sc_iface);
        sc->sc_iface_number = id->bInterfaceNumber;

        /* Find endpoints */
        for (i = 0; i < id->bNumEndpoints; i++) {
                ed = usbd_interface2endpoint_descriptor(sc->sc_iface, i);
                if (ed == NULL) {
                        aprint_error_dev(self,
                            "no endpoint descriptor for %d\n", i);
                        sc->sc_dying = true;
                        return;
                }

                if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                    UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        ucaa.ucaa_bulkin = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_OUT &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_BULK) {
                        ucaa.ucaa_bulkout = ed->bEndpointAddress;
                } else if (UE_GET_DIR(ed->bEndpointAddress) == UE_DIR_IN &&
                           UE_GET_XFERTYPE(ed->bmAttributes) == UE_INTERRUPT) {
                        sc->sc_intr_number = ed->bEndpointAddress;
                        sc->sc_isize = UGETW(ed->wMaxPacketSize);
                }
        }

        if (ucaa.ucaa_bulkin == -1) {
                aprint_error_dev(self, "Could not find data bulk in\n");
                sc->sc_dying = true;
                return;
        }
        if (ucaa.ucaa_bulkout == -1) {
                aprint_error_dev(self, "Could not find data bulk out\n");
                sc->sc_dying = true;
                return;
        }
        if (sc->sc_intr_number == -1) {
                aprint_error_dev(self, "Could not find interrupt in\n");
                sc->sc_dying = true;
                return;
        }

        sc->sc_dtr = sc->sc_rts = 0;
        sc->sc_lcr = UVSCOM_LINE_INIT;

        ucaa.ucaa_portno = UCOM_UNK_PORTNO;
        /* ucaa_bulkin, ucaa_bulkout set above */
        ucaa.ucaa_ibufsize = UVSCOMIBUFSIZE;
        ucaa.ucaa_obufsize = UVSCOMOBUFSIZE;
        ucaa.ucaa_ibufsizepad = UVSCOMIBUFSIZE;
        ucaa.ucaa_opkthdrlen = 0;
        ucaa.ucaa_device = dev;
        ucaa.ucaa_iface = sc->sc_iface;
        ucaa.ucaa_methods = &uvscom_methods;
        ucaa.ucaa_arg = sc;
        ucaa.ucaa_info = NULL;

        err = uvscom_reset(sc);

        if (err) {
                aprint_error_dev(self, "reset failed, %s\n", usbd_errstr(err));
                sc->sc_dying = true;
                return;
        }

        DPRINTF(("uvscom: in = %#x out = %#x intr = %#x\n",
                 ucaa.ucaa_bulkin, ucaa.ucaa_bulkout, sc->sc_intr_number));

        usbd_add_drv_event(USB_EVENT_DRIVER_ATTACH, sc->sc_udev, sc->sc_dev);

        DPRINTF(("uplcom: in=%#x out=%#x intr=%#x\n",
                ucaa.ucaa_bulkin, ucaa.ucaa_bulkout, sc->sc_intr_number ));
        sc->sc_subdev = config_found(self, &ucaa, ucomprint,
            CFARGS(.submatch = ucomsubmatch));

        return;
}

static void
uvscom_childdet(device_t self, device_t child)
{
        struct uvscom_softc *sc = device_private(self);

        KASSERT(sc->sc_subdev == child);
        sc->sc_subdev = NULL;
}

static void 
uvscom_close_pipe(struct uvscom_softc *sc)
{

        if (sc->sc_intr_pipe != NULL) {
                usbd_abort_pipe(sc->sc_intr_pipe);
                usbd_close_pipe(sc->sc_intr_pipe);
                sc->sc_intr_pipe = NULL;
        }
        if (sc->sc_intr_buf) {
                kmem_free(sc->sc_intr_buf, sc->sc_isize);
                sc->sc_intr_buf = NULL;
        }
}

static int
uvscom_detach(device_t self, int flags)
{
        struct uvscom_softc *sc = device_private(self);
        int rv = 0;

        DPRINTF(("uvscom_detach: sc = %p\n", sc));

        sc->sc_dying = true;
 
        uvscom_close_pipe(sc);

        if (sc->sc_subdev != NULL) {
                rv = config_detach(sc->sc_subdev, flags);
                sc->sc_subdev = NULL;
        }

        usbd_add_drv_event(USB_EVENT_DRIVER_DETACH, sc->sc_udev, sc->sc_dev);

        return rv;
}

static usbd_status
uvscom_readstat(struct uvscom_softc *sc)
{
        usb_device_request_t req;
        usbd_status err;
        uint16_t r;

        DPRINTF(("%s: send readstat\n", device_xname(sc->sc_dev)));

        req.bmRequestType = UT_READ_VENDOR_DEVICE;
        req.bRequest = UVSCOM_READ_STATUS;
        USETW(req.wValue, 0);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 2);

        err = usbd_do_request(sc->sc_udev, &req, &r);
        if (err) {
                aprint_error_dev(sc->sc_dev, "uvscom_readstat: %s\n",
                    usbd_errstr(err));
                return err;
        }

        DPRINTF(("%s: uvscom_readstat: r = %d\n",
                 device_xname(sc->sc_dev), r));

        return USBD_NORMAL_COMPLETION;
}

static usbd_status
uvscom_shutdown(struct uvscom_softc *sc)
{
        usb_device_request_t req;
        usbd_status err;

        DPRINTF(("%s: send shutdown\n", device_xname(sc->sc_dev)));

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = UVSCOM_SHUTDOWN;
        USETW(req.wValue, 0);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);

        err = usbd_do_request(sc->sc_udev, &req, NULL);
        if (err) {
                aprint_error_dev(sc->sc_dev, "uvscom_shutdown: %s\n",
                   usbd_errstr(err));
                return err;
        }

        return USBD_NORMAL_COMPLETION;
}

static usbd_status
uvscom_reset(struct uvscom_softc *sc)
{
        DPRINTF(("%s: uvscom_reset\n", device_xname(sc->sc_dev)));

        return USBD_NORMAL_COMPLETION;
}

static usbd_status
uvscom_set_crtscts(struct uvscom_softc *sc)
{
        DPRINTF(("%s: uvscom_set_crtscts\n", device_xname(sc->sc_dev)));

        return USBD_NORMAL_COMPLETION;
}

static usbd_status
uvscom_set_line(struct uvscom_softc *sc, uint16_t line)
{
        usb_device_request_t req;
        usbd_status err;

        DPRINTF(("%s: uvscom_set_line: %04x\n",
                 device_xname(sc->sc_dev), line));

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = UVSCOM_LINE_CTL;
        USETW(req.wValue, line);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);

        err = usbd_do_request(sc->sc_udev, &req, NULL);
        if (err) {
                aprint_error_dev(sc->sc_dev, "uvscom_set_line: %s\n",
                    usbd_errstr(err));
                return err;
        }

        return USBD_NORMAL_COMPLETION;
}

static usbd_status
uvscom_set_line_coding(struct uvscom_softc *sc, uint16_t lsp, uint16_t ls)
{
        usb_device_request_t req;
        usbd_status err;

        DPRINTF(("%s: uvscom_set_line_coding: %02x %02x\n",
                 device_xname(sc->sc_dev), lsp, ls));

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = UVSCOM_SET_SPEED;
        USETW(req.wValue, lsp);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);

        err = usbd_do_request(sc->sc_udev, &req, NULL);
        if (err) {
                aprint_error_dev(sc->sc_dev, "uvscom_set_line_coding: %s\n",
                   usbd_errstr(err));
                return err;
        }

        req.bmRequestType = UT_WRITE_VENDOR_DEVICE;
        req.bRequest = UVSCOM_SET_PARAM;
        USETW(req.wValue, ls);
        USETW(req.wIndex, 0);
        USETW(req.wLength, 0);

        err = usbd_do_request(sc->sc_udev, &req, NULL);
        if (err) {
                aprint_error_dev(sc->sc_dev, "uvscom_set_line_coding: %s\n",
                   usbd_errstr(err));
                return err;
        }

        return USBD_NORMAL_COMPLETION;
}

static void
uvscom_dtr(struct uvscom_softc *sc, int onoff)
{
        DPRINTF(("%s: uvscom_dtr: onoff = %d\n",
                 device_xname(sc->sc_dev), onoff));

        if (sc->sc_dtr == onoff)
                return;                        /* no change */

        sc->sc_dtr = onoff;

        if (onoff)
                SET(sc->sc_lcr, UVSCOM_DTR);
        else
                CLR(sc->sc_lcr, UVSCOM_DTR);

        uvscom_set_line(sc, sc->sc_lcr);
}

static void
uvscom_rts(struct uvscom_softc *sc, int onoff)
{
        DPRINTF(("%s: uvscom_rts: onoff = %d\n",
                 device_xname(sc->sc_dev), onoff));

        if (sc->sc_rts == onoff)
                return;                        /* no change */

        sc->sc_rts = onoff;

        if (onoff)
                SET(sc->sc_lcr, UVSCOM_RTS);
        else
                CLR(sc->sc_lcr, UVSCOM_RTS);

        uvscom_set_line(sc, sc->sc_lcr);
}

static void
uvscom_break(struct uvscom_softc *sc, int onoff)
{
        DPRINTF(("%s: uvscom_break: onoff = %d\n",
                 device_xname(sc->sc_dev), onoff));

        if (onoff)
                uvscom_set_line(sc, SET(sc->sc_lcr, UVSCOM_BREAK));
}

static void
uvscom_set(void *addr, int portno, int reg, int onoff)
{
        struct uvscom_softc *sc = addr;

        if (sc->sc_dying)
                return;

        switch (reg) {
        case UCOM_SET_DTR:
                uvscom_dtr(sc, onoff);
                break;
        case UCOM_SET_RTS:
                uvscom_rts(sc, onoff);
                break;
        case UCOM_SET_BREAK:
                uvscom_break(sc, onoff);
                break;
        default:
                break;
        }
}

static int
uvscom_param(void *addr, int portno, struct termios *t)
{
        struct uvscom_softc *sc = addr;
        usbd_status err;
        uint16_t lsp;
        uint16_t ls;

        DPRINTF(("%s: uvscom_param: sc = %p\n",
                 device_xname(sc->sc_dev), sc));

        if (sc->sc_dying)
                return EIO;

        ls = 0;

        switch (t->c_ospeed) {
        case B150:
                lsp = UVSCOM_SPEED_150BPS;
                break;
        case B300:
                lsp = UVSCOM_SPEED_300BPS;
                break;
        case B600:
                lsp = UVSCOM_SPEED_600BPS;
                break;
        case B1200:
                lsp = UVSCOM_SPEED_1200BPS;
                break;
        case B2400:
                lsp = UVSCOM_SPEED_2400BPS;
                break;
        case B4800:
                lsp = UVSCOM_SPEED_4800BPS;
                break;
        case B9600:
                lsp = UVSCOM_SPEED_9600BPS;
                break;
        case B19200:
                lsp = UVSCOM_SPEED_19200BPS;
                break;
        case B38400:
                lsp = UVSCOM_SPEED_38400BPS;
                break;
        case B57600:
                lsp = UVSCOM_SPEED_57600BPS;
                break;
        case B115200:
                lsp = UVSCOM_SPEED_115200BPS;
                break;
        default:
                return EIO;
        }

        if (ISSET(t->c_cflag, CSTOPB))
                SET(ls, UVSCOM_STOP_BIT_2);
        else
                SET(ls, UVSCOM_STOP_BIT_1);

        if (ISSET(t->c_cflag, PARENB)) {
                if (ISSET(t->c_cflag, PARODD))
                        SET(ls, UVSCOM_PARITY_ODD);
                else
                        SET(ls, UVSCOM_PARITY_EVEN);
        } else
                SET(ls, UVSCOM_PARITY_NONE);

        switch (ISSET(t->c_cflag, CSIZE)) {
        case CS5:
                SET(ls, UVSCOM_DATA_BIT_5);
                break;
        case CS6:
                SET(ls, UVSCOM_DATA_BIT_6);
                break;
        case CS7:
                SET(ls, UVSCOM_DATA_BIT_7);
                break;
        case CS8:
                SET(ls, UVSCOM_DATA_BIT_8);
                break;
        default:
                return EIO;
        }

        err = uvscom_set_line_coding(sc, lsp, ls);
        if (err)
                return EIO;

        if (ISSET(t->c_cflag, CRTSCTS)) {
                err = uvscom_set_crtscts(sc);
                if (err)
                        return EIO;
        }

        return 0;
}

static int
uvscom_open(void *addr, int portno)
{
        struct uvscom_softc *sc = addr;
        int err;
        int i;

        if (sc->sc_dying)
                return EIO;

        DPRINTF(("uvscom_open: sc = %p\n", sc));

        if (sc->sc_intr_number != -1 && sc->sc_intr_pipe == NULL) {
                DPRINTF(("uvscom_open: open interrupt pipe.\n"));

                sc->sc_usr = 0;                /* clear unit status */

                err = uvscom_readstat(sc);
                if (err) {
                        DPRINTF(("%s: uvscom_open: readstat faild\n",
                                 device_xname(sc->sc_dev)));
                        return EIO;
                }

                sc->sc_intr_buf = kmem_alloc(sc->sc_isize, KM_SLEEP);
                err = usbd_open_pipe_intr(sc->sc_iface,
                                          sc->sc_intr_number,
                                          USBD_SHORT_XFER_OK,
                                          &sc->sc_intr_pipe,
                                          sc,
                                          sc->sc_intr_buf,
                                          sc->sc_isize,
                                          uvscom_intr,
                                          UVSCOM_INTR_INTERVAL);
                if (err) {
                        aprint_error_dev(sc->sc_dev,
                            "cannot open interrupt pipe (addr %d)\n",
                                 sc->sc_intr_number);
                        return EIO;
                }
        } else {
                DPRINTF(("uvscom_open: did not open interrupt pipe.\n"));
        }

        if ((sc->sc_usr & UVSCOM_USTAT_MASK) == 0) {
                /* unit is not ready */

                for (i = UVSCOM_UNIT_WAIT; i > 0; --i) {
                        kpause("uvsopen", false, hz, NULL);
                        if (ISSET(sc->sc_usr, UVSCOM_USTAT_MASK))
                                break;
                }
                if (i == 0) {
                        DPRINTF(("%s: unit is not ready\n",
                                 device_xname(sc->sc_dev)));
                        return EIO;
                }

                /* check PC card was inserted */
                if (ISSET(sc->sc_usr, UVSCOM_NOCARD)) {
                        DPRINTF(("%s: no card\n",
                                 device_xname(sc->sc_dev)));
                        return EIO;
                }
        }

        return 0;
}

static void
uvscom_close(void *addr, int portno)
{
        struct uvscom_softc *sc = addr;

        DPRINTF(("uvscom_close: close\n"));

        if (sc->sc_dying) 
                return;

        uvscom_shutdown(sc);
        uvscom_close_pipe(sc);
}

static void
uvscom_intr(struct usbd_xfer *xfer, void *priv,
    usbd_status status)
{
        struct uvscom_softc *sc = priv;
        u_char *buf = sc->sc_intr_buf;
        u_char pstatus;

        if (sc->sc_dying)
                return;

        if (status != USBD_NORMAL_COMPLETION) {
                if (status == USBD_NOT_STARTED || status == USBD_CANCELLED)
                        return;

                aprint_error_dev(sc->sc_dev,
                    "uvscom_intr: abnormal status: %s\n",
                    usbd_errstr(status));
                usbd_clear_endpoint_stall_async(sc->sc_intr_pipe);
                return;
        }

        DPRINTFN(2, ("%s: uvscom status = %02x %02x\n",
                 device_xname(sc->sc_dev), buf[0], buf[1]));

        sc->sc_lsr = sc->sc_msr = 0;
        sc->sc_usr = buf[1];

        pstatus = buf[0];
        if (ISSET(pstatus, UVSCOM_TXRDY))
                SET(sc->sc_lsr, ULSR_TXRDY);
        if (ISSET(pstatus, UVSCOM_RXRDY))
                SET(sc->sc_lsr, ULSR_RXRDY);

        pstatus = buf[1];
        if (ISSET(pstatus, UVSCOM_CTS))
                SET(sc->sc_msr, UMSR_CTS);
        if (ISSET(pstatus, UVSCOM_DSR))
                SET(sc->sc_msr, UMSR_DSR);
        if (ISSET(pstatus, UVSCOM_DCD))
                SET(sc->sc_msr, UMSR_DCD);

        ucom_status_change(device_private(sc->sc_subdev));
}

static void
uvscom_get_status(void *addr, int portno, u_char *lsr, u_char *msr)
{
        struct uvscom_softc *sc = addr;

        if (sc->sc_dying)
                return;

        *lsr = sc->sc_lsr;
        *msr = sc->sc_msr;
}





















































































































    2 





    2 







































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
/*        $NetBSD: ukyopon.c,v 1.26 2020/04/12 01:10:54 simonb Exp $        */

/*
 * Copyright (c) 1998, 2005 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Lennart Augustsson (lennart@augustsson.net) at
 * Carlstedt Research & Technology.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by ITOH Yasufumi.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ukyopon.c,v 1.26 2020/04/12 01:10:54 simonb Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/ioctl.h>
#include <sys/conf.h>
#include <sys/tty.h>
#include <sys/file.h>
#include <sys/select.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/device.h>
#include <sys/poll.h>

#include <sys/bus.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbcdc.h>

#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>
#include <dev/usb/usbdivar.h>
#include <dev/usb/usbdevs.h>
#include <dev/usb/usb_quirks.h>

#include <dev/usb/ucomvar.h>
#include <dev/usb/umodemvar.h>
#include <dev/usb/ukyopon.h>

#ifdef UKYOPON_DEBUG
#define DPRINTFN(n, x)        if (ukyopondebug > (n)) printf x
int        ukyopondebug = 0;
#else
#define DPRINTFN(n, x)
#endif
#define DPRINTF(x) DPRINTFN(0, x)

struct ukyopon_softc {
        /* generic umodem device */
        struct umodem_softc        sc_umodem;

        /* ukyopon addition */
};

#define UKYOPON_MODEM_IFACE_INDEX        0
#define UKYOPON_DATA_IFACE_INDEX        3

static void        ukyopon_get_status(void *, int, u_char *, u_char *);
static int        ukyopon_ioctl(void *, int, u_long, void *, int, proc_t *);
static void        ukyopon_set(void *, int, int, int);
static int        ukyopon_param(void *, int, struct termios *);
static int        ukyopon_open(void *, int);
static void        ukyopon_close(void *, int);

static const struct ucom_methods ukyopon_methods = {
        .ucom_get_status = ukyopon_get_status,
        .ucom_set = ukyopon_set,
        .ucom_param = ukyopon_param,
        .ucom_ioctl = ukyopon_ioctl,
        .ucom_open = ukyopon_open,
        .ucom_close = ukyopon_close,
};

static int        ukyopon_match(device_t, cfdata_t, void *);
static void        ukyopon_attach(device_t, device_t, void *);
static int        ukyopon_detach(device_t, int);

CFATTACH_DECL_NEW(ukyopon, sizeof(struct ukyopon_softc), ukyopon_match,
    ukyopon_attach, ukyopon_detach, NULL);

static int
ukyopon_match(device_t parent, cfdata_t match, void *aux)
{
        struct usbif_attach_arg *uiaa = aux;

        if (uiaa->uiaa_vendor == USB_VENDOR_KYOCERA &&
            uiaa->uiaa_product == USB_PRODUCT_KYOCERA_AHK3001V &&
            (uiaa->uiaa_ifaceno == UKYOPON_MODEM_IFACE_INDEX ||
             uiaa->uiaa_ifaceno == UKYOPON_DATA_IFACE_INDEX))
                return UMATCH_VENDOR_PRODUCT;

        return UMATCH_NONE;
}

static void
ukyopon_attach(device_t parent, device_t self, void *aux)
{
        struct ukyopon_softc *sc = device_private(self);
        struct usbif_attach_arg *uiaa = aux;
        struct ucom_attach_args ucaa;

        memset(&ucaa, 0, sizeof(ucaa));

        ucaa.ucaa_portno = (uiaa->uiaa_ifaceno == UKYOPON_MODEM_IFACE_INDEX) ?
                UKYOPON_PORT_MODEM : UKYOPON_PORT_DATA;
        ucaa.ucaa_methods = &ukyopon_methods;
        ucaa.ucaa_info = (uiaa->uiaa_ifaceno == UKYOPON_MODEM_IFACE_INDEX) ?
            "modem port" : "data transfer port";

        if (umodem_common_attach(self, &sc->sc_umodem, uiaa, &ucaa))
                return;
        return;
}

static void
ukyopon_get_status(void *addr, int portno, u_char *lsr, u_char *msr)
{
        struct ukyopon_softc *sc = addr;

        /*
         * The device doesn't set DCD (Data Carrier Detect) bit properly.
         * Assume DCD is always present.
         */
        if ((sc->sc_umodem.sc_msr & UMSR_DCD) == 0)
                sc->sc_umodem.sc_msr |= UMSR_DCD;

        umodem_get_status(&sc->sc_umodem, portno, lsr, msr);
}

static void
ukyopon_set(void *addr, int portno, int reg, int onoff)
{
        struct ukyopon_softc *sc = addr;

        umodem_set(&sc->sc_umodem, portno, reg, onoff);
}

static int
ukyopon_param(void *addr, int portno, struct termios *t)
{
        struct ukyopon_softc *sc = addr;

        return umodem_param(&sc->sc_umodem, portno, t);
}

static int
ukyopon_open(void *addr, int portno)
{
        struct ukyopon_softc *sc = addr;

        return umodem_open(&sc->sc_umodem, portno);
}

static void
ukyopon_close(void *addr, int portno)
{
        struct ukyopon_softc *sc = addr;

        umodem_close(&sc->sc_umodem, portno);
}


static int
ukyopon_ioctl(void *addr, int portno, u_long cmd, void *data, int flag,
              proc_t *p)
{
        struct ukyopon_softc *sc = addr;
        struct ukyopon_identify *arg_id = (void*)data;
        int error = 0;

        switch (cmd) {
        case UKYOPON_IDENTIFY:
                strncpy(arg_id->ui_name, UKYOPON_NAME, sizeof(arg_id->ui_name));
                arg_id->ui_busno =
                    device_unit(sc->sc_umodem.sc_udev->ud_bus->ub_usbctl);
                arg_id->ui_address = sc->sc_umodem.sc_udev->ud_addr;
                arg_id->ui_model = UKYOPON_MODEL_UNKNOWN;
                arg_id->ui_porttype = portno;
                break;

        default:
                error = umodem_ioctl(&sc->sc_umodem, portno, cmd, data, flag, p);
                break;
        }

        return error;
}

int
ukyopon_detach(device_t self, int flags)
{
        struct ukyopon_softc *sc = device_private(self);

        return umodem_common_detach(&sc->sc_umodem, flags);
}





















































































































































   44 

  128 

































   17 
    6 

   15 




















































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
/*        $NetBSD: pmap.h,v 1.134 2022/08/20 23:49:31 riastradh Exp $        */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 2001 Wasabi Systems, Inc.
 * All rights reserved.
 *
 * Written by Frank van der Linden for Wasabi Systems, Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed for the NetBSD Project by
 *      Wasabi Systems, Inc.
 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
 *    or promote products derived from this software without specific prior
 *    written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * pmap.h: see pmap.c for the history of this pmap module.
 */

#ifndef _X86_PMAP_H_
#define        _X86_PMAP_H_

#if defined(_KERNEL)
#include <x86/pmap_pv.h>
#include <uvm/pmap/pmap_pvt.h>

/*
 * MD flags that we use for pmap_enter and pmap_kenter_pa:
 */

/*
 * macros
 */

#define pmap_clear_modify(pg)                pmap_clear_attrs(pg, PP_ATTRS_D)
#define pmap_clear_reference(pg)        pmap_clear_attrs(pg, PP_ATTRS_A)
#define pmap_copy(DP,SP,D,L,S)                __USE(L)
#define pmap_is_modified(pg)                pmap_test_attrs(pg, PP_ATTRS_D)
#define pmap_is_referenced(pg)                pmap_test_attrs(pg, PP_ATTRS_A)
#define pmap_move(DP,SP,D,L,S)
#define pmap_phys_address(ppn)                (x86_ptob(ppn) & ~X86_MMAP_FLAG_MASK)
#define pmap_mmap_flags(ppn)                x86_mmap_flags(ppn)

#if defined(__x86_64__) || defined(PAE)
#define X86_MMAP_FLAG_SHIFT        (64 - PGSHIFT)
#else
#define X86_MMAP_FLAG_SHIFT        (32 - PGSHIFT)
#endif

#define X86_MMAP_FLAG_MASK        0xf
#define X86_MMAP_FLAG_PREFETCH        0x1

/*
 * prototypes
 */

void                pmap_activate(struct lwp *);
void                pmap_bootstrap(vaddr_t);
bool                pmap_clear_attrs(struct vm_page *, unsigned);
bool                pmap_pv_clear_attrs(paddr_t, unsigned);
void                pmap_deactivate(struct lwp *);
void                pmap_page_remove(struct vm_page *);
void                pmap_pv_remove(paddr_t);
void                pmap_remove(struct pmap *, vaddr_t, vaddr_t);
bool                pmap_test_attrs(struct vm_page *, unsigned);
void                pmap_write_protect(struct pmap *, vaddr_t, vaddr_t, vm_prot_t);
void                pmap_load(void);
paddr_t                pmap_init_tmp_pgtbl(paddr_t);
bool                pmap_remove_all(struct pmap *);
void                pmap_ldt_cleanup(struct lwp *);
void                pmap_ldt_sync(struct pmap *);
void                pmap_kremove_local(vaddr_t, vsize_t);

#define        __HAVE_PMAP_PV_TRACK        1
void                pmap_pv_init(void);
void                pmap_pv_track(paddr_t, psize_t);
void                pmap_pv_untrack(paddr_t, psize_t);

u_int                x86_mmap_flags(paddr_t);

#define PMAP_GROWKERNEL                /* turn on pmap_growkernel interface */
#define PMAP_FORK                /* turn on pmap_fork interface */

/*
 * inline functions
 */

/*
 * pmap_page_protect: change the protection of all recorded mappings
 *        of a managed page
 *
 * => this function is a frontend for pmap_page_remove/pmap_clear_attrs
 * => we only have to worry about making the page more protected.
 *        unprotecting a page is done on-demand at fault time.
 */

__inline static void __unused
pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
{
        if ((prot & VM_PROT_WRITE) == 0) {
                if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) {
                        (void)pmap_clear_attrs(pg, PP_ATTRS_W);
                } else {
                        pmap_page_remove(pg);
                }
        }
}

/*
 * pmap_pv_protect: change the protection of all recorded mappings
 *        of an unmanaged page
 */

__inline static void __unused
pmap_pv_protect(paddr_t pa, vm_prot_t prot)
{
        if ((prot & VM_PROT_WRITE) == 0) {
                if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) {
                        (void)pmap_pv_clear_attrs(pa, PP_ATTRS_W);
                } else {
                        pmap_pv_remove(pa);
                }
        }
}

/*
 * pmap_protect: change the protection of pages in a pmap
 *
 * => this function is a frontend for pmap_remove/pmap_write_protect
 * => we only have to worry about making the page more protected.
 *        unprotecting a page is done on-demand at fault time.
 */

__inline static void __unused
pmap_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
{
        if ((prot & VM_PROT_WRITE) == 0) {
                if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) {
                        pmap_write_protect(pmap, sva, eva, prot);
                } else {
                        pmap_remove(pmap, sva, eva);
                }
        }
}

paddr_t vtophys(vaddr_t);
vaddr_t        pmap_map(vaddr_t, paddr_t, paddr_t, vm_prot_t);
void        pmap_cpu_init_late(struct cpu_info *);

/* pmap functions with machine addresses */
void        pmap_kenter_ma(vaddr_t, paddr_t, vm_prot_t, u_int);
int        pmap_enter_ma(struct pmap *, vaddr_t, paddr_t, paddr_t,
            vm_prot_t, u_int, int);
bool        pmap_extract_ma(pmap_t, vaddr_t, paddr_t *);

paddr_t pmap_get_physpage(void);

/*
 * Hooks for the pool allocator.
 */
#define        POOL_VTOPHYS(va)        vtophys((vaddr_t) (va))

#ifdef __HAVE_DIRECT_MAP

extern vaddr_t pmap_direct_base;
extern vaddr_t pmap_direct_end;

#define PMAP_DIRECT_BASE        pmap_direct_base
#define PMAP_DIRECT_END                pmap_direct_end

#define PMAP_DIRECT_MAP(pa)        ((vaddr_t)PMAP_DIRECT_BASE + (pa))
#define PMAP_DIRECT_UNMAP(va)        ((paddr_t)(va) - PMAP_DIRECT_BASE)

/*
 * Alternate mapping hooks for pool pages.
 */
#define PMAP_MAP_POOLPAGE(pa)        PMAP_DIRECT_MAP((pa))
#define PMAP_UNMAP_POOLPAGE(va)        PMAP_DIRECT_UNMAP((va))

#endif /* __HAVE_DIRECT_MAP */

#define        __HAVE_VM_PAGE_MD
#define        VM_MDPAGE_INIT(pg) \
        memset(&(pg)->mdpage, 0, sizeof((pg)->mdpage)); \
        PMAP_PAGE_INIT(&(pg)->mdpage.mp_pp)

struct vm_page_md {
        struct pmap_page mp_pp;
};

#endif /* _KERNEL */

#endif /* _X86_PMAP_H_ */































































































































































































    4 


















    4 
















    4 
    4 














    4 
















































































































































































    5 


    5 




    5 









    4 




    4 

    1 


    1 







    1 

    4 





















































































































































































































































































































































































































































































    1 












    1 

    1 


















































    4 






    1 


    1 






    1 
    4 

















































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
/* $NetBSD: pckbd.c,v 1.36 2021/08/07 16:19:15 thorpej Exp $ */

/*-
 * Copyright (c) 1998, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Charles M. Hannum.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1990 The Regents of the University of California.
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * William Jolitz and Don Ahn.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)pccons.c        5.11 (Berkeley) 5/21/91
 */

/*
 * code to work keyboard for PC-style console
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: pckbd.c,v 1.36 2021/08/07 16:19:15 thorpej Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/device.h>
#include <sys/malloc.h>
#include <sys/ioctl.h>

#include <sys/bus.h>

#include <dev/pckbport/pckbportvar.h>

#include <dev/wscons/wsconsio.h>
#include <dev/wscons/wskbdvar.h>
#include <dev/wscons/wsksymdef.h>
#include <dev/wscons/wsksymvar.h>

#include <dev/pckbport/pckbdreg.h>
#include <dev/pckbport/pckbdvar.h>
#include <dev/pckbport/wskbdmap_mfii.h>

#include "locators.h"

#include "opt_pckbd_layout.h"
#include "opt_pckbd_cnattach_may_fail.h"
#include "opt_wsdisplay_compat.h"

struct pckbd_internal {
        int t_isconsole;
        pckbport_tag_t t_kbctag;
        pckbport_slot_t t_kbcslot;

        int t_translating;

        int t_lastchar;
        int t_extended0;
        int t_extended1;
        int t_releasing;

        struct pckbd_softc *t_sc; /* back pointer */
};

struct pckbd_softc {
        device_t sc_dev;

        struct pckbd_internal *id;
        int sc_enabled;

        int sc_ledstate;

        device_t sc_wskbddev;
#ifdef WSDISPLAY_COMPAT_RAWKBD
        int rawkbd;
#endif
};

static int pckbd_is_console(pckbport_tag_t, pckbport_slot_t);

int pckbdprobe(device_t, cfdata_t, void *);
void pckbdattach(device_t, device_t, void *);

CFATTACH_DECL_NEW(pckbd, sizeof(struct pckbd_softc),
    pckbdprobe, pckbdattach, NULL, NULL);

int        pckbd_enable(void *, int);
void        pckbd_set_leds(void *, int);
int        pckbd_ioctl(void *, u_long, void *, int, struct lwp *);

const struct wskbd_accessops pckbd_accessops = {
        pckbd_enable,
        pckbd_set_leds,
        pckbd_ioctl,
};

void        pckbd_cngetc(void *, u_int *, int *);
void        pckbd_cnpollc(void *, int);
void        pckbd_cnbell(void *, u_int, u_int, u_int);

const struct wskbd_consops pckbd_consops = {
        pckbd_cngetc,
        pckbd_cnpollc,
        pckbd_cnbell,
};

const struct wskbd_mapdata pckbd_keymapdata = {
        pckbd_keydesctab,
#ifdef PCKBD_LAYOUT
        PCKBD_LAYOUT,
#else
        KB_US,
#endif
};

/*
 * Hackish support for a bell on the PC Keyboard; when a suitable feeper
 * is found, it attaches itself into the pckbd driver here.
 */
void        (*pckbd_bell_fn)(void *, u_int, u_int, u_int, int);
void        *pckbd_bell_fn_arg;

void        pckbd_bell(u_int, u_int, u_int, int);

int        pckbd_scancode_translate(struct pckbd_internal *, int);
int        pckbd_set_xtscancode(pckbport_tag_t, pckbport_slot_t,
            struct pckbd_internal *);
int        pckbd_init(struct pckbd_internal *, pckbport_tag_t, pckbport_slot_t,
                        int);
void        pckbd_input(void *, int);

static int        pckbd_decode(struct pckbd_internal *, int, u_int *, int *);
static int        pckbd_led_encode(int);
static int        pckbd_led_decode(int);

struct pckbd_internal pckbd_consdata;

int
pckbd_set_xtscancode(pckbport_tag_t kbctag, pckbport_slot_t kbcslot,
    struct pckbd_internal *id)
{
        int xt, res = 0;
        u_char cmd[2];

        /*
         * Some keyboard/8042 combinations do not seem to work if the keyboard
         * is set to table 1; in fact, it would appear that some keyboards just
         * ignore the command altogether.  So by default, we use the AT scan
         * codes and have the 8042 translate them.  Unfortunately, this is
         * known to not work on some PS/2 machines.  We try desperately to deal
         * with this by checking the (lack of a) translate bit in the 8042 and
         * attempting to set the keyboard to XT mode.  If this all fails, well,
         * tough luck.  If the PCKBC_CANT_TRANSLATE pckbc flag was set, we
         * enable software translation.
         *
         * XXX It would perhaps be a better choice to just use AT scan codes
         * and not bother with this.
         */
        xt = pckbport_xt_translation(kbctag, kbcslot, 1);
        if (xt == 1) {
                /* The 8042 is translating for us; use AT codes. */
                cmd[0] = KBC_SETTABLE;
                cmd[1] = 2;
                res = pckbport_poll_cmd(kbctag, kbcslot, cmd, 2, 0, 0, 0);
                if (res) {
                        u_char cmdb[1];
                        aprint_debug("pckbd: error setting scanset 2\n");
                        /*
                         * XXX at least one keyboard is reported to lock up
                         * if a "set table" is attempted, thus the "reset".
                         * XXX ignore errors, scanset 2 should be
                         * default anyway.
                         */
                        cmdb[0] = KBC_RESET;
                        (void)pckbport_poll_cmd(kbctag, kbcslot, cmdb, 1, 1, 0, 1);
                        pckbport_flush(kbctag, kbcslot);
                        res = 0;
                }
                if (id != NULL)
                        id->t_translating = 1;
        } else if (xt == -1) {
                /* Software translation required */
                if (id != NULL)
                        id->t_translating = 0;
        } else {
                /* Stupid 8042; set keyboard to XT codes. */
                cmd[0] = KBC_SETTABLE;
                cmd[1] = 1;
                res = pckbport_poll_cmd(kbctag, kbcslot, cmd, 2, 0, 0, 0);
                if (res)
                        aprint_debug("pckbd: error setting scanset 1\n");
                if (id != NULL)
                        id->t_translating = 1;
        }
        return res;
}

static int
pckbd_is_console(pckbport_tag_t tag, pckbport_slot_t slot)
{

        return pckbd_consdata.t_isconsole &&
            tag == pckbd_consdata.t_kbctag && slot == pckbd_consdata.t_kbcslot;
}

static bool
pckbd_suspend(device_t dv, const pmf_qual_t *qual)
{
        struct pckbd_softc *sc = device_private(dv);
        u_char cmd[1];
        int res;

        /* XXX duped from pckbd_enable, but we want to disable
         *     it even if it's the console kbd
         */
        cmd[0] = KBC_DISABLE;
        res = pckbport_enqueue_cmd(sc->id->t_kbctag,
            sc->id->t_kbcslot, cmd, 1, 0, 1, 0);
        if (res)
                return false;

        pckbport_slot_enable(sc->id->t_kbctag,
            sc->id->t_kbcslot, 0);

        sc->sc_enabled = 0;
        return true;
}

static bool
pckbd_resume(device_t dv, const pmf_qual_t *qual)
{
        struct pckbd_softc *sc = device_private(dv);
        u_char cmd[1], resp[1];
        int res;

        /* XXX jmcneill reset the keyboard */
        pckbport_flush(sc->id->t_kbctag, sc->id->t_kbcslot);

        cmd[0] = KBC_RESET;
        res = pckbport_poll_cmd(sc->id->t_kbctag,
            sc->id->t_kbcslot, cmd, 1, 1, resp, 1);
        if (res)
                aprint_debug("%s: reset error %d\n", __func__, res);
        if (resp[0] != KBR_RSTDONE)
                printf("%s: reset response 0x%x\n", __func__, resp[0]);

        pckbport_flush(sc->id->t_kbctag, sc->id->t_kbcslot);

        pckbd_enable(sc, 1);

        return true;
}

/*
 * these are both bad jokes
 */
int
pckbdprobe(device_t parent, cfdata_t cf, void *aux)
{
        struct pckbport_attach_args *pa = aux;
        int res;
        u_char cmd[1], resp[1];

        /*
         * XXX There are rumours that a keyboard can be connected
         * to the aux port as well. For me, this didn't work.
         * For further experiments, allow it if explicitly
         * wired in the config file.
         */
        if ((pa->pa_slot != PCKBPORT_KBD_SLOT) &&
            (cf->cf_loc[PCKBPORTCF_SLOT] == PCKBPORTCF_SLOT_DEFAULT))
                return 0;

        /* Flush any garbage. */
        pckbport_flush(pa->pa_tag, pa->pa_slot);

        /* Reset the keyboard. */
        cmd[0] = KBC_RESET;
        res = pckbport_poll_cmd(pa->pa_tag, pa->pa_slot, cmd, 1, 1, resp, 1);
        if (res) {
                aprint_debug("pckbdprobe: reset error %d\n", res);
                /*
                 * There is probably no keyboard connected.
                 * Let the probe succeed if the keyboard is used
                 * as console input - it can be connected later.
                 */
                return pckbd_is_console(pa->pa_tag, pa->pa_slot) ? 1 : 0;
        }
        if (resp[0] != KBR_RSTDONE) {
                printf("pckbdprobe: reset response 0x%x\n", resp[0]);
                return 0;
        }

        /*
         * Some keyboards seem to leave a second ack byte after the reset.
         * This is kind of stupid, but we account for them anyway by just
         * flushing the buffer.
         */
        pckbport_flush(pa->pa_tag, pa->pa_slot);

        if (pckbd_set_xtscancode(pa->pa_tag, pa->pa_slot, NULL))
                return 0;

        return 2;
}

void
pckbdattach(device_t parent, device_t self, void *aux)
{
        struct pckbd_softc *sc = device_private(self);
        struct pckbport_attach_args *pa = aux;
        struct wskbddev_attach_args a;
        int isconsole;
        u_char cmd[1];

        aprint_naive("\n");
        aprint_normal("\n");

        sc->sc_dev = self;
        isconsole = pckbd_is_console(pa->pa_tag, pa->pa_slot);

        if (isconsole) {
                sc->id = &pckbd_consdata;

                /*
                 * Some keyboards are not enabled after a reset,
                 * so make sure it is enabled now.
                 */
                cmd[0] = KBC_ENABLE;
                (void) pckbport_poll_cmd(sc->id->t_kbctag, sc->id->t_kbcslot,
                                      cmd, 1, 0, 0, 0);
                sc->sc_enabled = 1;
        } else {
                sc->id = malloc(sizeof(struct pckbd_internal),
                                M_DEVBUF, M_WAITOK);
                (void) pckbd_init(sc->id, pa->pa_tag, pa->pa_slot, 0);

                /* no interrupts until enabled */
                cmd[0] = KBC_DISABLE;
                (void) pckbport_poll_cmd(sc->id->t_kbctag, sc->id->t_kbcslot,
                                      cmd, 1, 0, 0, 0);
                sc->sc_enabled = 0;
        }

        sc->id->t_sc = sc;

        pckbport_set_inputhandler(sc->id->t_kbctag, sc->id->t_kbcslot,
                               pckbd_input, sc, device_xname(sc->sc_dev));

        a.console = isconsole;

        a.keymap = &pckbd_keymapdata;

        a.accessops = &pckbd_accessops;
        a.accesscookie = sc;

        if (!pmf_device_register(self, pckbd_suspend, pckbd_resume))
                aprint_error_dev(self, "couldn't establish power handler\n");

        /*
         * Attach the wskbd, saving a handle to it.
         * XXX XXX XXX
         */
        sc->sc_wskbddev = config_found(self, &a, wskbddevprint, CFARGS_NONE);
}

int
pckbd_enable(void *v, int on)
{
        struct pckbd_softc *sc = v;
        int res;
        u_char cmd[1];

        if (on) {
                if (sc->sc_enabled) {
                        aprint_debug("pckbd_enable: bad enable\n");
                        return EBUSY;
                }

                pckbport_slot_enable(sc->id->t_kbctag, sc->id->t_kbcslot, 1);

                cmd[0] = KBC_ENABLE;
                res = pckbport_poll_cmd(sc->id->t_kbctag, sc->id->t_kbcslot,
                                        cmd, 1, 0, NULL, 0);
                if (res) {
                        printf("pckbd_enable: command error\n");
                        return (res);
                }

                res = pckbd_set_xtscancode(sc->id->t_kbctag,
                                           sc->id->t_kbcslot, sc->id);
                if (res)
                        return res;

                sc->sc_enabled = 1;
        } else {
                if (sc->id->t_isconsole)
                        return EBUSY;

                cmd[0] = KBC_DISABLE;
                res = pckbport_enqueue_cmd(sc->id->t_kbctag, sc->id->t_kbcslot,
                                        cmd, 1, 0, 1, 0);
                if (res) {
                        printf("pckbd_disable: command error\n");
                        return res;
                }

                pckbport_slot_enable(sc->id->t_kbctag, sc->id->t_kbcslot, 0);

                sc->sc_enabled = 0;
        }

        return 0;
}

const u_int8_t pckbd_xtbl[] = {
/* 0x00 */
        0,
        0x43,                /* F9 */
        0x89,                /* SunStop */
        0x3f,                /* F5 */
        0x3d,                /* F3 */
        0x3b,                /* F1 */
        0x3c,                /* F2 */
        0x58,                /* F12 */
        0,
        0x44,                /* F10 */
        0x42,                /* F8 */
        0x40,                /* F6 */
        0x3e,                /* F4 */
        0x0f,                /* Tab */
        0x29,                /* ` ~ */
        0,
/* 0x10 */
        0,
        0x38,                /* Left Alt */
        0x2a,                /* Left Shift */
        0,
        0x1d,                /* Left Ctrl */
        0x10,                /* q */
        0x02,                /* 1 ! */
        0,
        0,
        0,
        0x2c,                /* z */
        0x1f,                /* s */
        0x1e,                /* a */
        0x11,                /* w */
        0x03,                /* 2 @ */
        0,
/* 0x20 */        
        0,
        0x2e,                /* c */
        0x2d,                /* x */
        0x20,                /* d */
        0x12,                /* e */
        0x05,                /* 4 $ */
        0x04,                /* 3 # */
        0,
        0,
        0x39,                /* Space */
        0x2f,                /* v */
        0x21,                /* f */
        0x14,                /* t */
        0x13,                /* r */
        0x06,                /* 5 % */
        0,
/* 0x30 */
        0,
        0x31,                /* n */
        0x30,                /* b */
        0x23,                /* h */
        0x22,                /* g */
        0x15,                /* y */
        0x07,                /* 6 ^ */
        0,
        0,
        0,
        0x32,                /* m */
        0x24,                /* j */
        0x16,                /* u */
        0x08,                /* 7 & */
        0x09,                /* 8 * */
        0,
/* 0x40 */
        0,
        0x33,                /* , < */
        0x25,                /* k */
        0x17,                /* i */
        0x18,                /* o */
        0x0b,                /* 0 ) */
        0x0a,                /* 9 ( */
        0,
        0,
        0x34,                /* . > */
        0x35,                /* / ? */
        0x26,                /* l */
        0x27,                /* ; : */
        0x19,                /* p */
        0x0c,                /* - _ */
        0,
/* 0x50 */
        0,
        0,
        0x28,                /* ' " */
        0,
        0x1a,                /* [ { */
        0x0d,                /* = + */
        0,
        0,
        0x3a,                /* Caps Lock */
        0x36,                /* Right Shift */
        0x1c,                /* Return */
        0x1b,                /* ] } */
        0,
        0x2b,                /* \ | */
        0,
        0,
/* 0x60 */
        0,
        0,
        0,
        0,
        0,
        0,
        0x0e,                /* Back Space */
        0,
        0,
        0x4f,                /* KP 1 */
        0,
        0x4b,                /* KP 4 */
        0x47,                /* KP 7 */
        0,
        0,
        0,
/* 0x70 */
        0x52,                /* KP 0 */
        0x53,                /* KP . */
        0x50,                /* KP 2 */
        0x4c,                /* KP 5 */
        0x4d,                /* KP 6 */
        0x48,                /* KP 8 */
        0x01,                /* Escape */
        0x45,                /* Num Lock */
        0x57,                /* F11 */
        0x4e,                /* KP + */
        0x51,                /* KP 3 */
        0x4a,                /* KP - */
        0x37,                /* KP * */
        0x49,                /* KP 9 */
        0x46,                /* Scroll Lock */
        0,
/* 0x80 */
        0,
        0,
        0,
        0x41,                /* F7 (produced as an actual 8 bit code) */
        0,                /* Alt-Print Screen */
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
/* 0x90 */
        0xdb,                /* Left Meta */
        0x88,                /* SunHelp */
        0x8a,                /* SunAgain */
        0x8c,                /* SunUndo */
        0x8e,                /* SunCopy */
        0x90,                /* SunPaste */
        0x92,                /* SunCut */
        0x8b,                /* SunProps */
        0x8d,                /* SunFront */
        0x8f,                /* SunOpen */
        0x91                /* SunFind */
};

const u_int8_t pckbd_xtbl_ext[] = {
/* 0x00 */
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
/* 0x10 */
        0,
        0x38,                /* Right Alt */
        0,                /* E0 12, to be ignored */
        0,
        0x1d,                /* Right Ctrl */
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
/* 0x20 */
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0xdd,                /* Compose */
/* 0x30 */
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
/* 0x40 */
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0xb5,                /* KP / */
        0,
        0,
        0,
        0,
        0,
/* 0x50 */
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0x1c,                /* KP Return */
        0,
        0,
        0,
        0,
        0,
/* 0x60 */
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0x4f,                /* End */
        0,
        0x4b,                /* Left */
        0x47,                /* Home */
        0,
        0,
        0,
/* 0x70 */
        0x52,                /* Insert */
        0x53,                /* Delete */
        0x50,                /* Down */
        0,
        0x4d,                /* Right */
        0x48,                /* Up */
        0,
        0,
        0,
        0,
        0x51,                /* Page Down */
        0,
        0x37,                /* Print Screen */
        0x49,                /* Page Up */
        0x46,                /* Ctrl-Break */
        0
};

/*
 * Translate scan codes from set 2 to set 1
 */
int
pckbd_scancode_translate(struct pckbd_internal *id, int datain)
{
        if (id->t_translating != 0)
                return datain;

        if (datain == KBR_BREAK) {
                id->t_releasing = 0x80;        /* next keycode is a release */
                return 0;        /* consume scancode */
        }

        /*
         * Handle extended sequences
         */
        if (datain == KBR_EXTENDED0 || datain == KBR_EXTENDED1)
                return datain;

        /*
         * Convert BREAK sequence (14 77 -> 1D 45)
         */
        if (id->t_extended1 == 2 && datain == 0x14)
                return 0x1d | id->t_releasing;
        else if (id->t_extended1 == 1 && datain == 0x77)
                return 0x45 | id->t_releasing;

        if (id->t_extended0 != 0) {
                if (datain >= sizeof pckbd_xtbl_ext)
                        datain = 0;
                else
                        datain = pckbd_xtbl_ext[datain];
        } else {
                if (datain >= sizeof pckbd_xtbl)
                        datain = 0;
                else
                        datain = pckbd_xtbl[datain];
        }

        /* 
         * If we are mapping in the range 128-254, then make this
         * an extended keycode, as table 1 codes are limited to
         * the range 0-127 (the top bit is used for key up/break).
         */
        if (datain > 0x7f) {
                datain &= 0x7f;
                id->t_extended0 = 0x80;
        }
                
        if (datain == 0) {
                /*
                 * We don't know how to translate this scan code, but
                 * we can't silently eat it either (because there might
                 * have been an extended byte transmitted already).
                 * Hopefully this value will be harmless to the upper
                 * layers.
                 */
                return 0xff;
        }
        return datain | id->t_releasing;
}

static int
pckbd_decode(struct pckbd_internal *id, int datain, u_int *type, int *dataout)
{
        int key;
        int releasing;

        if (datain == KBR_EXTENDED0) {
                id->t_extended0 = 0x80;
                return 0;
        } else if (datain == KBR_EXTENDED1) {
                id->t_extended1 = 2;
                return 0;
        }

        releasing = datain & 0x80;
        datain &= 0x7f;

        if (id->t_extended0 == 0x80) {
                switch (datain) {
                case 0x2a:
                case 0x36:
                        id->t_extended0 = 0;
                        return 0;
                default:
                        break;
                }
        }

        /* map extended keys to (unused) codes 128-254 */
        key = datain | id->t_extended0;
        id->t_extended0 = 0;

        /*
         * process PAUSE (also break) key (EXT1 1D 45  EXT1 9D C5):
         * map to (unused) code 7F
         */
        if (id->t_extended1 == 2 && (datain == 0x1d || datain == 0x9d)) {
                id->t_extended1 = 1;
                return 0;
        } else if (id->t_extended1 == 1 &&
                   (datain == 0x45 || datain == 0xc5)) {
                id->t_extended1 = 0;
                key = 0x7f;
        } else if (id->t_extended1 > 0) {
                id->t_extended1 = 0;
        }

        if (id->t_translating != 0) {
                id->t_releasing = releasing;
        } else {
                /* id->t_releasing computed in pckbd_scancode_translate() */
        }

        if (id->t_releasing) {
                id->t_releasing = 0;
                id->t_lastchar = 0;
                *type = WSCONS_EVENT_KEY_UP;
        } else {
                /* Always ignore typematic keys */
                if (key == id->t_lastchar)
                        return 0;
                id->t_lastchar = key;
                *type = WSCONS_EVENT_KEY_DOWN;
        }

        *dataout = key;
        return 1;
}

int
pckbd_init(struct pckbd_internal *t, pckbport_tag_t kbctag,
    pckbport_slot_t kbcslot, int console)
{

        memset(t, 0, sizeof(struct pckbd_internal));

        t->t_isconsole = console;
        t->t_kbctag = kbctag;
        t->t_kbcslot = kbcslot;

        return pckbd_set_xtscancode(kbctag, kbcslot, t);
}

static int
pckbd_led_encode(int led)
{
        int res;

        res = 0;

        if (led & WSKBD_LED_SCROLL)
                res |= 0x01;
        if (led & WSKBD_LED_NUM)
                res |= 0x02;
        if (led & WSKBD_LED_CAPS)
                res |= 0x04;
        return res;
}

static int
pckbd_led_decode(int led)
{
        int res;

        res = 0;
        if (led & 0x01)
                res |= WSKBD_LED_SCROLL;
        if (led & 0x02)
                res |= WSKBD_LED_NUM;
        if (led & 0x04)
                res |= WSKBD_LED_CAPS;
        return res;
}

void
pckbd_set_leds(void *v, int leds)
{
        struct pckbd_softc *sc = v;
        u_char cmd[2];

        cmd[0] = KBC_MODEIND;
        cmd[1] = pckbd_led_encode(leds);
        sc->sc_ledstate = cmd[1];

        (void)pckbport_enqueue_cmd(sc->id->t_kbctag, sc->id->t_kbcslot,
                                 cmd, 2, 0, 0, 0);
}

/*
 * Got a console receive interrupt -
 * the console processor wants to give us a character.
 */
void
pckbd_input(void *vsc, int data)
{
        struct pckbd_softc *sc = vsc;
        int key;
        u_int type;

        data = pckbd_scancode_translate(sc->id, data);
        if (data == 0)
                return;

#ifdef WSDISPLAY_COMPAT_RAWKBD
        if (sc->rawkbd) {
                u_char d = data;
                wskbd_rawinput(sc->sc_wskbddev, &d, 1);
                return;
        }
#endif
        if (pckbd_decode(sc->id, data, &type, &key))
                wskbd_input(sc->sc_wskbddev, type, key);
}

int
pckbd_ioctl(void *v, u_long cmd, void *data, int flag,
    struct lwp *l)
{
        struct pckbd_softc *sc = v;

        switch (cmd) {
        case WSKBDIO_GTYPE:
                *(int *)data = WSKBD_TYPE_PC_XT;
                return 0;
        case WSKBDIO_SETLEDS:
        {
                int res;
                u_char cmdb[2];

                cmdb[0] = KBC_MODEIND;
                cmdb[1] = pckbd_led_encode(*(int *)data);
                sc->sc_ledstate = cmdb[1];
                res = pckbport_enqueue_cmd(sc->id->t_kbctag, sc->id->t_kbcslot,
                                        cmdb, 2, 0, 1, 0);
                return res;
        }
        case WSKBDIO_GETLEDS:
                *(int *)data = pckbd_led_decode(sc->sc_ledstate);
                return 0;
#if 0
        case WSKBDIO_COMPLEXBELL:
#define d ((struct wskbd_bell_data *)data)
                /*
                 * Keyboard can't beep directly; we have an
                 * externally-provided global hook to do this.
                 */
                pckbd_bell(d->pitch, d->period, d->volume, 0);
#undef d
                return 0;
#endif
#ifdef WSDISPLAY_COMPAT_RAWKBD
        case WSKBDIO_SETMODE:
                sc->rawkbd = (*(int *)data == WSKBD_RAW);
                return 0;
#endif
        }
        return EPASSTHROUGH;
}

void
pckbd_bell(u_int pitch, u_int period, u_int volume, int poll)
{

        if (pckbd_bell_fn != NULL)
                (*pckbd_bell_fn)(pckbd_bell_fn_arg, pitch, period,
                    volume, poll);
}

void
pckbd_unhook_bell(void (*fn)(void *, u_int, u_int, u_int, int), void *arg)
{
        if (pckbd_bell_fn != fn && pckbd_bell_fn_arg != arg)
                return;
        pckbd_bell_fn = NULL;
        pckbd_bell_fn_arg = NULL;
}

void
pckbd_hookup_bell(void (*fn)(void *, u_int, u_int, u_int, int), void *arg)
{

        if (pckbd_bell_fn == NULL) {
                pckbd_bell_fn = fn;
                pckbd_bell_fn_arg = arg;
        }
}

int
pckbd_cnattach(pckbport_tag_t kbctag, int kbcslot)
{
        int res;
        u_char cmd[1];

        res = pckbd_init(&pckbd_consdata, kbctag, kbcslot, 1);
        /* We may allow the console to be attached if no keyboard is present */
#if defined(PCKBD_CNATTACH_MAY_FAIL)
        if (res)
                return res;
#endif

        /* Just to be sure. */
        cmd[0] = KBC_ENABLE;
        res = pckbport_poll_cmd(kbctag, kbcslot, cmd, 1, 0, 0, 0);

#if defined(PCKBD_CNATTACH_MAY_FAIL)
        if (res)
                return res;
#endif

        wskbd_cnattach(&pckbd_consops, &pckbd_consdata, &pckbd_keymapdata);

        return res;
}

/* ARGSUSED */
void
pckbd_cngetc(void *v, u_int *type, int *data)
{
        struct pckbd_internal *t = v;
        int val;

        for (;;) {
                val = pckbport_poll_data(t->t_kbctag, t->t_kbcslot);
                if (val == -1) {
                        *type = 0;
                        *data = 0;
                        return;
                }

                val = pckbd_scancode_translate(t, val);
                if (val == 0)
                        continue;

                if (pckbd_decode(t, val, type, data))
                        return;
        }
}

void
pckbd_cnpollc(void *v, int on)
{
        struct pckbd_internal *t = v;

        pckbport_set_poll(t->t_kbctag, t->t_kbcslot, on);
}

void
pckbd_cnbell(void *v, u_int pitch, u_int period, u_int volume)
{

        pckbd_bell(pitch, period, volume, 1);
}






































































































































































































































































































































































































































   14 

   13 

   12 
   12 

   12 




    9 
   11 



   14 
   14 












   16 





    7 




   15 
    8 

   14 
    1 







   21 

















   15 







   14 





    9 



    5 


    5 




    2 





    5 
    5 


    5 





    1 

    1 
    1 






    1 



    1 

    1 




    1 








    3 



    2 


















   19 











   12 










    4 
   12 
    1 

    4 




    2 















    4 
   10 






   10 


    7 


    2 





    7 

























    7 


   11 













   17 








   17 












   16 

   16 




   16 






   15 







   14 





   13 




   11 










   11 





   10 







   10 



   10 




    8 


    2 






    1 










    1 












    1 





    1 










    1 


























    8 















    7 
    7 
    7 



    7 









    8 




    1 




    7 






    1 











    7 


    6 
    6 

    6 














    6 



    6 





    6 
















    6 
    5 


    1 
    6 











   15 
   16 













   21 









   21 













   19 

   19 




   19 






   17 







   18 





   16 







   15 
   10 
    8 
    3 






    7 
    5 


    2 







    7 

    3 









    5 

    4 




    4 
    3 






    4 


    4 
    2 
    2 


    2 
    2 

















    7 










    6 















    6 









    6 
    5 












   11 























    2 



    9 








    8 
    8 

    8 














    8 







    9 




   19 
   21 







































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
/*        $NetBSD: sysv_msg.c,v 1.76 2019/10/04 23:20:22 kamil Exp $        */

/*-
 * Copyright (c) 1999, 2006, 2007 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
 * NASA Ames Research Center, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Implementation of SVID messages
 *
 * Author: Daniel Boulet
 *
 * Copyright 1993 Daniel Boulet and RTMX Inc.
 *
 * This system call was implemented by Daniel Boulet under contract from RTMX.
 *
 * Redistribution and use in source forms, with and without modification,
 * are permitted provided that this entire comment appears intact.
 *
 * Redistribution in binary form may occur without any restrictions.
 * Obviously, it would be nice if you gave credit where credit is due
 * but requiring it would be too onerous.
 *
 * This software is provided ``AS IS'' without any warranties of any kind.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sysv_msg.c,v 1.76 2019/10/04 23:20:22 kamil Exp $");

#ifdef _KERNEL_OPT
#include "opt_sysv.h"
#endif

#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/msg.h>
#include <sys/sysctl.h>
#include <sys/mount.h>                /* XXX for <sys/syscallargs.h> */
#include <sys/syscallargs.h>
#include <sys/kauth.h>

#define MSG_DEBUG
#undef MSG_DEBUG_OK

#ifdef MSG_DEBUG_OK
#define MSG_PRINTF(a)        printf a
#else
#define MSG_PRINTF(a)
#endif

static int        nfree_msgmaps;                /* # of free map entries */
static short        free_msgmaps;        /* head of linked list of free map entries */
static struct        __msg *free_msghdrs;        /* list of free msg headers */
static char        *msgpool;                /* MSGMAX byte long msg buffer pool */
static struct        msgmap *msgmaps;        /* MSGSEG msgmap structures */
static struct __msg *msghdrs;                /* MSGTQL msg headers */

kmsq_t        *msqs;                                /* MSGMNI msqid_ds struct's */
kmutex_t msgmutex;                        /* subsystem lock */

static u_int        msg_waiters = 0;        /* total number of msgrcv waiters */
static bool        msg_realloc_state;
static kcondvar_t msg_realloc_cv;

static void msg_freehdr(struct __msg *);

extern int kern_has_sysvmsg;

SYSCTL_SETUP_PROTO(sysctl_ipc_msg_setup);

int
msginit(void)
{
        int i, sz;
        vaddr_t v;

        /*
         * msginfo.msgssz should be a power of two for efficiency reasons.
         * It is also pretty silly if msginfo.msgssz is less than 8
         * or greater than about 256 so ...
         */

        i = 8;
        while (i < 1024 && i != msginfo.msgssz)
                i <<= 1;
        if (i != msginfo.msgssz) {
                printf("msginfo.msgssz = %d, not a small power of 2",
                    msginfo.msgssz);
                return EINVAL;
        }

        if (msginfo.msgseg > 32767) {
                printf("msginfo.msgseg = %d > 32767", msginfo.msgseg);
                return EINVAL;
        }

        /* Allocate the wired memory for our structures */
        sz = ALIGN(msginfo.msgmax) +
            ALIGN(msginfo.msgseg * sizeof(struct msgmap)) +
            ALIGN(msginfo.msgtql * sizeof(struct __msg)) +
            ALIGN(msginfo.msgmni * sizeof(kmsq_t));
        sz = round_page(sz);
        v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
        if (v == 0) {
                printf("sysv_msg: cannot allocate memory");
                return ENOMEM;
        }
        msgpool = (void *)v;
        msgmaps = (void *)((uintptr_t)msgpool + ALIGN(msginfo.msgmax));
        msghdrs = (void *)((uintptr_t)msgmaps +
            ALIGN(msginfo.msgseg * sizeof(struct msgmap)));
        msqs = (void *)((uintptr_t)msghdrs +
            ALIGN(msginfo.msgtql * sizeof(struct __msg)));

        for (i = 0; i < (msginfo.msgseg - 1); i++)
                msgmaps[i].next = i + 1;
        msgmaps[msginfo.msgseg - 1].next = -1;

        free_msgmaps = 0;
        nfree_msgmaps = msginfo.msgseg;

        for (i = 0; i < (msginfo.msgtql - 1); i++) {
                msghdrs[i].msg_type = 0;
                msghdrs[i].msg_next = &msghdrs[i + 1];
        }
        i = msginfo.msgtql - 1;
        msghdrs[i].msg_type = 0;
        msghdrs[i].msg_next = NULL;
        free_msghdrs = &msghdrs[0];

        for (i = 0; i < msginfo.msgmni; i++) {
                cv_init(&msqs[i].msq_cv, "msgwait");
                /* Implies entry is available */
                msqs[i].msq_u.msg_qbytes = 0;
                /* Reset to a known value */
                msqs[i].msq_u.msg_perm._seq = 0;
        }

        mutex_init(&msgmutex, MUTEX_DEFAULT, IPL_NONE);
        cv_init(&msg_realloc_cv, "msgrealc");
        msg_realloc_state = false;

        kern_has_sysvmsg = 1;

        return 0;
}

int
msgfini(void)
{
        int i, sz;
        vaddr_t v = (vaddr_t)msgpool;

        mutex_enter(&msgmutex);
        for (i = 0; i < msginfo.msgmni; i++) {
                if (msqs[i].msq_u.msg_qbytes != 0) {
                        mutex_exit(&msgmutex);
                        return 1; /* queue not available, prevent unload! */
                }
        }
/*
 * Destroy all condvars and free the memory we're using
 */
        for (i = 0; i < msginfo.msgmni; i++) {
                cv_destroy(&msqs[i].msq_cv);
        }
        sz = ALIGN(msginfo.msgmax) +
            ALIGN(msginfo.msgseg * sizeof(struct msgmap)) +
            ALIGN(msginfo.msgtql * sizeof(struct __msg)) +
            ALIGN(msginfo.msgmni * sizeof(kmsq_t));
        sz = round_page(sz);
        uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);

        cv_destroy(&msg_realloc_cv);
        mutex_exit(&msgmutex);
        mutex_destroy(&msgmutex);

        kern_has_sysvmsg = 0;

        return 0;
}

static int
msgrealloc(int newmsgmni, int newmsgseg)
{
        struct msgmap *new_msgmaps;
        struct __msg *new_msghdrs, *new_free_msghdrs;
        char *old_msgpool, *new_msgpool;
        kmsq_t *new_msqs;
        vaddr_t v;
        int i, sz, msqid, newmsgmax, new_nfree_msgmaps;
        short new_free_msgmaps;

        if (newmsgmni < 1 || newmsgseg < 1)
                return EINVAL;

        /* Allocate the wired memory for our structures */
        newmsgmax = msginfo.msgssz * newmsgseg;
        sz = ALIGN(newmsgmax) +
            ALIGN(newmsgseg * sizeof(struct msgmap)) +
            ALIGN(msginfo.msgtql * sizeof(struct __msg)) +
            ALIGN(newmsgmni * sizeof(kmsq_t));
        sz = round_page(sz);
        v = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_WIRED|UVM_KMF_ZERO);
        if (v == 0)
                return ENOMEM;

        mutex_enter(&msgmutex);
        if (msg_realloc_state) {
                mutex_exit(&msgmutex);
                uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
                return EBUSY;
        }
        msg_realloc_state = true;
        if (msg_waiters) {
                /*
                 * Mark reallocation state, wake-up all waiters,
                 * and wait while they will all exit.
                 */
                for (i = 0; i < msginfo.msgmni; i++)
                        cv_broadcast(&msqs[i].msq_cv);
                while (msg_waiters)
                        cv_wait(&msg_realloc_cv, &msgmutex);
        }
        old_msgpool = msgpool;

        /* We cannot reallocate less memory than we use */
        i = 0;
        for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
                struct msqid_ds *mptr;
                kmsq_t *msq;

                msq = &msqs[msqid];
                mptr = &msq->msq_u;
                if (mptr->msg_qbytes || (mptr->msg_perm.mode & MSG_LOCKED))
                        i = msqid;
        }
        if (i >= newmsgmni || (msginfo.msgseg - nfree_msgmaps) > newmsgseg) {
                mutex_exit(&msgmutex);
                uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED);
                return EBUSY;
        }

        new_msgpool = (void *)v;
        new_msgmaps = (void *)((uintptr_t)new_msgpool + ALIGN(newmsgmax));
        new_msghdrs = (void *)((uintptr_t)new_msgmaps +
            ALIGN(newmsgseg * sizeof(struct msgmap)));
        new_msqs = (void *)((uintptr_t)new_msghdrs +
            ALIGN(msginfo.msgtql * sizeof(struct __msg)));

        /* Initialize the structures */
        for (i = 0; i < (newmsgseg - 1); i++)
                new_msgmaps[i].next = i + 1;
        new_msgmaps[newmsgseg - 1].next = -1;
        new_free_msgmaps = 0;
        new_nfree_msgmaps = newmsgseg;

        for (i = 0; i < (msginfo.msgtql - 1); i++) {
                new_msghdrs[i].msg_type = 0;
                new_msghdrs[i].msg_next = &new_msghdrs[i + 1];
        }
        i = msginfo.msgtql - 1;
        new_msghdrs[i].msg_type = 0;
        new_msghdrs[i].msg_next = NULL;
        new_free_msghdrs = &new_msghdrs[0];

        for (i = 0; i < newmsgmni; i++) {
                new_msqs[i].msq_u.msg_qbytes = 0;
                new_msqs[i].msq_u.msg_perm._seq = 0;
                cv_init(&new_msqs[i].msq_cv, "msgwait");
        }

        /*
         * Copy all message queue identifiers, message headers and buffer
         * pools to the new memory location.
         */
        for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
                struct __msg *nmsghdr, *msghdr, *pmsghdr;
                struct msqid_ds *nmptr, *mptr;
                kmsq_t *nmsq, *msq;

                msq = &msqs[msqid];
                mptr = &msq->msq_u;

                if (mptr->msg_qbytes == 0 &&
                    (mptr->msg_perm.mode & MSG_LOCKED) == 0)
                        continue;

                nmsq = &new_msqs[msqid];
                nmptr = &nmsq->msq_u;
                memcpy(nmptr, mptr, sizeof(struct msqid_ds));

                /*
                 * Go through the message headers, and copy each one
                 * by taking the new ones, and thus defragmenting.
                 */
                nmsghdr = pmsghdr = NULL;
                msghdr = mptr->_msg_first;
                while (msghdr) {
                        short nnext = 0, next;
                        u_short msgsz, segcnt;

                        /* Take an entry from the new list of free msghdrs */
                        nmsghdr = new_free_msghdrs;
                        KASSERT(nmsghdr != NULL);
                        new_free_msghdrs = nmsghdr->msg_next;

                        nmsghdr->msg_next = NULL;
                        if (pmsghdr) {
                                pmsghdr->msg_next = nmsghdr;
                        } else {
                                nmptr->_msg_first = nmsghdr;
                                pmsghdr = nmsghdr;
                        }
                        nmsghdr->msg_ts = msghdr->msg_ts;
                        nmsghdr->msg_spot = -1;

                        /* Compute the amount of segments and reserve them */
                        msgsz = msghdr->msg_ts;
                        segcnt = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
                        if (segcnt == 0)
                                continue;
                        while (segcnt--) {
                                nnext = new_free_msgmaps;
                                new_free_msgmaps = new_msgmaps[nnext].next;
                                new_nfree_msgmaps--;
                                new_msgmaps[nnext].next = nmsghdr->msg_spot;
                                nmsghdr->msg_spot = nnext;
                        }

                        /* Copy all segments */
                        KASSERT(nnext == nmsghdr->msg_spot);
                        next = msghdr->msg_spot;
                        while (msgsz > 0) {
                                size_t tlen;

                                if (msgsz >= msginfo.msgssz) {
                                        tlen = msginfo.msgssz;
                                        msgsz -= msginfo.msgssz;
                                } else {
                                        tlen = msgsz;
                                        msgsz = 0;
                                }

                                /* Copy the message buffer */
                                memcpy(&new_msgpool[nnext * msginfo.msgssz],
                                    &msgpool[next * msginfo.msgssz], tlen);

                                /* Next entry of the map */
                                nnext = msgmaps[nnext].next;
                                next = msgmaps[next].next;
                        }

                        /* Next message header */
                        msghdr = msghdr->msg_next;
                }
                nmptr->_msg_last = nmsghdr;
        }
        KASSERT((msginfo.msgseg - nfree_msgmaps) ==
            (newmsgseg - new_nfree_msgmaps));

        sz = ALIGN(msginfo.msgmax) +
            ALIGN(msginfo.msgseg * sizeof(struct msgmap)) +
            ALIGN(msginfo.msgtql * sizeof(struct __msg)) +
            ALIGN(msginfo.msgmni * sizeof(kmsq_t));
        sz = round_page(sz);

        for (i = 0; i < msginfo.msgmni; i++)
                cv_destroy(&msqs[i].msq_cv);

        /* Set the pointers and update the new values */
        msgpool = new_msgpool;
        msgmaps = new_msgmaps;
        msghdrs = new_msghdrs;
        msqs = new_msqs;

        free_msghdrs = new_free_msghdrs;
        free_msgmaps = new_free_msgmaps;
        nfree_msgmaps = new_nfree_msgmaps;
        msginfo.msgmni = newmsgmni;
        msginfo.msgseg = newmsgseg;
        msginfo.msgmax = newmsgmax;

        /* Reallocation completed - notify all waiters, if any */
        msg_realloc_state = false;
        cv_broadcast(&msg_realloc_cv);
        mutex_exit(&msgmutex);

        uvm_km_free(kernel_map, (vaddr_t)old_msgpool, sz, UVM_KMF_WIRED);
        return 0;
}

static void
msg_freehdr(struct __msg *msghdr)
{

        KASSERT(mutex_owned(&msgmutex));

        while (msghdr->msg_ts > 0) {
                short next;
                KASSERT(msghdr->msg_spot >= 0);
                KASSERT(msghdr->msg_spot < msginfo.msgseg);

                next = msgmaps[msghdr->msg_spot].next;
                msgmaps[msghdr->msg_spot].next = free_msgmaps;
                free_msgmaps = msghdr->msg_spot;
                nfree_msgmaps++;
                msghdr->msg_spot = next;
                if (msghdr->msg_ts >= msginfo.msgssz)
                        msghdr->msg_ts -= msginfo.msgssz;
                else
                        msghdr->msg_ts = 0;
        }
        KASSERT(msghdr->msg_spot == -1);
        msghdr->msg_next = free_msghdrs;
        free_msghdrs = msghdr;
}

int
sys___msgctl50(struct lwp *l, const struct sys___msgctl50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) msqid;
                syscallarg(int) cmd;
                syscallarg(struct msqid_ds *) buf;
        } */
        struct msqid_ds msqbuf;
        int cmd, error;

        cmd = SCARG(uap, cmd);

        if (cmd == IPC_SET) {
                error = copyin(SCARG(uap, buf), &msqbuf, sizeof(msqbuf));
                if (error)
                        return (error);
        }

        error = msgctl1(l, SCARG(uap, msqid), cmd,
            (cmd == IPC_SET || cmd == IPC_STAT) ? &msqbuf : NULL);

        if (error == 0 && cmd == IPC_STAT)
                error = copyout(&msqbuf, SCARG(uap, buf), sizeof(msqbuf));

        return (error);
}

int
msgctl1(struct lwp *l, int msqid, int cmd, struct msqid_ds *msqbuf)
{
        kauth_cred_t cred = l->l_cred;
        struct msqid_ds *msqptr;
        kmsq_t *msq;
        int error = 0, ix;

        MSG_PRINTF(("call to msgctl1(%d, %d)\n", msqid, cmd));

        ix = IPCID_TO_IX(msqid);

        mutex_enter(&msgmutex);

        if (ix < 0 || ix >= msginfo.msgmni) {
                MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", ix,
                    msginfo.msgmni));
                error = EINVAL;
                goto unlock;
        }

        msq = &msqs[ix];
        msqptr = &msq->msq_u;

        if (msqptr->msg_qbytes == 0) {
                MSG_PRINTF(("no such msqid\n"));
                error = EINVAL;
                goto unlock;
        }
        if (msqptr->msg_perm._seq != IPCID_TO_SEQ(msqid)) {
                MSG_PRINTF(("wrong sequence number\n"));
                error = EINVAL;
                goto unlock;
        }

        switch (cmd) {
        case IPC_RMID:
        {
                struct __msg *msghdr;
                if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_M)) != 0)
                        break;
                /* Free the message headers */
                msghdr = msqptr->_msg_first;
                while (msghdr != NULL) {
                        struct __msg *msghdr_tmp;

                        /* Free the segments of each message */
                        msqptr->_msg_cbytes -= msghdr->msg_ts;
                        msqptr->msg_qnum--;
                        msghdr_tmp = msghdr;
                        msghdr = msghdr->msg_next;
                        msg_freehdr(msghdr_tmp);
                }
                KASSERT(msqptr->_msg_cbytes == 0);
                KASSERT(msqptr->msg_qnum == 0);

                /* Mark it as free */
                msqptr->msg_qbytes = 0;
                cv_broadcast(&msq->msq_cv);
        }
                break;

        case IPC_SET:
                if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_M)))
                        break;
                if (msqbuf->msg_qbytes > msqptr->msg_qbytes &&
                    kauth_authorize_system(cred, KAUTH_SYSTEM_SYSVIPC,
                    KAUTH_REQ_SYSTEM_SYSVIPC_MSGQ_OVERSIZE,
                    KAUTH_ARG(msqbuf->msg_qbytes),
                    KAUTH_ARG(msqptr->msg_qbytes), NULL) != 0) {
                        error = EPERM;
                        break;
                }
                if (msqbuf->msg_qbytes > msginfo.msgmnb) {
                        MSG_PRINTF(("can't increase msg_qbytes beyond %d "
                            "(truncating)\n", msginfo.msgmnb));
                        /* silently restrict qbytes to system limit */
                        msqbuf->msg_qbytes = msginfo.msgmnb;
                }
                if (msqbuf->msg_qbytes == 0) {
                        MSG_PRINTF(("can't reduce msg_qbytes to 0\n"));
                        error = EINVAL;                /* XXX non-standard errno! */
                        break;
                }
                msqptr->msg_perm.uid = msqbuf->msg_perm.uid;
                msqptr->msg_perm.gid = msqbuf->msg_perm.gid;
                msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
                    (msqbuf->msg_perm.mode & 0777);
                msqptr->msg_qbytes = msqbuf->msg_qbytes;
                msqptr->msg_ctime = time_second;
                break;

        case IPC_STAT:
                if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_R))) {
                        MSG_PRINTF(("requester doesn't have read access\n"));
                        break;
                }
                memset(msqbuf, 0, sizeof *msqbuf);
                msqbuf->msg_perm = msqptr->msg_perm;
                msqbuf->msg_perm.mode &= 0777;
                msqbuf->msg_qnum = msqptr->msg_qnum;
                msqbuf->msg_qbytes = msqptr->msg_qbytes;
                msqbuf->msg_lspid = msqptr->msg_lspid;
                msqbuf->msg_lrpid = msqptr->msg_lrpid;
                msqbuf->msg_stime = msqptr->msg_stime;
                msqbuf->msg_rtime = msqptr->msg_rtime;
                msqbuf->msg_ctime = msqptr->msg_ctime;
                break;

        default:
                MSG_PRINTF(("invalid command %d\n", cmd));
                error = EINVAL;
                break;
        }

unlock:
        mutex_exit(&msgmutex);
        return (error);
}

int
sys_msgget(struct lwp *l, const struct sys_msgget_args *uap, register_t *retval)
{
        /* {
                syscallarg(key_t) key;
                syscallarg(int) msgflg;
        } */
        int msqid, error = 0;
        int key = SCARG(uap, key);
        int msgflg = SCARG(uap, msgflg);
        kauth_cred_t cred = l->l_cred;
        struct msqid_ds *msqptr = NULL;
        kmsq_t *msq;

        mutex_enter(&msgmutex);

        MSG_PRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));

        if (key != IPC_PRIVATE) {
                for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
                        msq = &msqs[msqid];
                        msqptr = &msq->msq_u;
                        if (msqptr->msg_qbytes != 0 &&
                            msqptr->msg_perm._key == key)
                                break;
                }
                if (msqid < msginfo.msgmni) {
                        MSG_PRINTF(("found public key\n"));
                        if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
                                MSG_PRINTF(("not exclusive\n"));
                                error = EEXIST;
                                goto unlock;
                        }
                        if ((error = ipcperm(cred, &msqptr->msg_perm,
                            msgflg & 0700 ))) {
                                MSG_PRINTF(("requester doesn't have 0%o access\n",
                                    msgflg & 0700));
                                goto unlock;
                        }
                        goto found;
                }
        }

        MSG_PRINTF(("need to allocate the msqid_ds\n"));
        if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
                for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
                        /*
                         * Look for an unallocated and unlocked msqid_ds.
                         * msqid_ds's can be locked by msgsnd or msgrcv while
                         * they are copying the message in/out.  We can't
                         * re-use the entry until they release it.
                         */
                        msq = &msqs[msqid];
                        msqptr = &msq->msq_u;
                        if (msqptr->msg_qbytes == 0 &&
                            (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
                                break;
                }
                if (msqid == msginfo.msgmni) {
                        MSG_PRINTF(("no more msqid_ds's available\n"));
                        error = ENOSPC;
                        goto unlock;
                }
                MSG_PRINTF(("msqid %d is available\n", msqid));
                msqptr->msg_perm._key = key;
                msqptr->msg_perm.cuid = kauth_cred_geteuid(cred);
                msqptr->msg_perm.uid = kauth_cred_geteuid(cred);
                msqptr->msg_perm.cgid = kauth_cred_getegid(cred);
                msqptr->msg_perm.gid = kauth_cred_getegid(cred);
                msqptr->msg_perm.mode = (msgflg & 0777);
                /* Make sure that the returned msqid is unique */
                msqptr->msg_perm._seq++;
                msqptr->_msg_first = NULL;
                msqptr->_msg_last = NULL;
                msqptr->_msg_cbytes = 0;
                msqptr->msg_qnum = 0;
                msqptr->msg_qbytes = msginfo.msgmnb;
                msqptr->msg_lspid = 0;
                msqptr->msg_lrpid = 0;
                msqptr->msg_stime = 0;
                msqptr->msg_rtime = 0;
                msqptr->msg_ctime = time_second;
        } else {
                MSG_PRINTF(("didn't find it and wasn't asked to create it\n"));
                error = ENOENT;
                goto unlock;
        }

found:
        /* Construct the unique msqid */
        *retval = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);

unlock:
        mutex_exit(&msgmutex);
        return (error);
}

int
sys_msgsnd(struct lwp *l, const struct sys_msgsnd_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) msqid;
                syscallarg(const void *) msgp;
                syscallarg(size_t) msgsz;
                syscallarg(int) msgflg;
        } */

        return msgsnd1(l, SCARG(uap, msqid), SCARG(uap, msgp),
            SCARG(uap, msgsz), SCARG(uap, msgflg), sizeof(long), copyin);
}

int
msgsnd1(struct lwp *l, int msqidr, const char *user_msgp, size_t msgsz,
    int msgflg, size_t typesz, copyin_t fetch_type)
{
        int segs_needed, error = 0, msqid;
        kauth_cred_t cred = l->l_cred;
        struct msqid_ds *msqptr;
        struct __msg *msghdr;
        kmsq_t *msq;
        short next;

        MSG_PRINTF(("call to msgsnd(%d, %p, %lld, %d)\n", msqidr,
             user_msgp, (long long)msgsz, msgflg));

        if ((ssize_t)msgsz < 0)
                return EINVAL;

restart:
        msqid = IPCID_TO_IX(msqidr);

        mutex_enter(&msgmutex);
        /* In case of reallocation, we will wait for completion */
        while (__predict_false(msg_realloc_state))
                cv_wait(&msg_realloc_cv, &msgmutex);

        if (msqid < 0 || msqid >= msginfo.msgmni) {
                MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
                    msginfo.msgmni));
                error = EINVAL;
                goto unlock;
        }

        msq = &msqs[msqid];
        msqptr = &msq->msq_u;

        if (msqptr->msg_qbytes == 0) {
                MSG_PRINTF(("no such message queue id\n"));
                error = EINVAL;
                goto unlock;
        }
        if (msqptr->msg_perm._seq != IPCID_TO_SEQ(msqidr)) {
                MSG_PRINTF(("wrong sequence number\n"));
                error = EINVAL;
                goto unlock;
        }

        if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_W))) {
                MSG_PRINTF(("requester doesn't have write access\n"));
                goto unlock;
        }

        segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
        MSG_PRINTF(("msgsz=%lld, msgssz=%d, segs_needed=%d\n",
            (long long)msgsz, msginfo.msgssz, segs_needed));
        for (;;) {
                int need_more_resources = 0;

                /*
                 * check msgsz [cannot be negative since it is unsigned]
                 * (inside this loop in case msg_qbytes changes while we sleep)
                 */

                if (msgsz > msqptr->msg_qbytes) {
                        MSG_PRINTF(("msgsz > msqptr->msg_qbytes\n"));
                        error = EINVAL;
                        goto unlock;
                }

                if (msqptr->msg_perm.mode & MSG_LOCKED) {
                        MSG_PRINTF(("msqid is locked\n"));
                        need_more_resources = 1;
                }
                if (msgsz + msqptr->_msg_cbytes > msqptr->msg_qbytes) {
                        MSG_PRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
                        need_more_resources = 1;
                }
                if (segs_needed > nfree_msgmaps) {
                        MSG_PRINTF(("segs_needed > nfree_msgmaps\n"));
                        need_more_resources = 1;
                }
                if (free_msghdrs == NULL) {
                        MSG_PRINTF(("no more msghdrs\n"));
                        need_more_resources = 1;
                }

                if (need_more_resources) {
                        int we_own_it;

                        if ((msgflg & IPC_NOWAIT) != 0) {
                                MSG_PRINTF(("need more resources but caller "
                                    "doesn't want to wait\n"));
                                error = EAGAIN;
                                goto unlock;
                        }

                        if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
                                MSG_PRINTF(("we don't own the msqid_ds\n"));
                                we_own_it = 0;
                        } else {
                                /* Force later arrivals to wait for our
                                   request */
                                MSG_PRINTF(("we own the msqid_ds\n"));
                                msqptr->msg_perm.mode |= MSG_LOCKED;
                                we_own_it = 1;
                        }

                        msg_waiters++;
                        MSG_PRINTF(("goodnight\n"));
                        error = cv_wait_sig(&msq->msq_cv, &msgmutex);
                        MSG_PRINTF(("good morning, error=%d\n", error));
                        msg_waiters--;

                        if (we_own_it)
                                msqptr->msg_perm.mode &= ~MSG_LOCKED;

                        /*
                         * In case of such state, notify reallocator and
                         * restart the call.
                         */
                        if (msg_realloc_state) {
                                cv_broadcast(&msg_realloc_cv);
                                mutex_exit(&msgmutex);
                                goto restart;
                        }

                        if (error != 0) {
                                MSG_PRINTF(("msgsnd: interrupted system "
                                    "call\n"));
                                error = EINTR;
                                goto unlock;
                        }

                        /*
                         * Make sure that the msq queue still exists
                         */

                        if (msqptr->msg_qbytes == 0) {
                                MSG_PRINTF(("msqid deleted\n"));
                                error = EIDRM;
                                goto unlock;
                        }
                } else {
                        MSG_PRINTF(("got all the resources that we need\n"));
                        break;
                }
        }

        /*
         * We have the resources that we need.
         * Make sure!
         */

        KASSERT((msqptr->msg_perm.mode & MSG_LOCKED) == 0);
        KASSERT(segs_needed <= nfree_msgmaps);
        KASSERT(msgsz + msqptr->_msg_cbytes <= msqptr->msg_qbytes);
        KASSERT(free_msghdrs != NULL);

        /*
         * Re-lock the msqid_ds in case we page-fault when copying in the
         * message
         */

        KASSERT((msqptr->msg_perm.mode & MSG_LOCKED) == 0);
        msqptr->msg_perm.mode |= MSG_LOCKED;

        /*
         * Allocate a message header
         */

        msghdr = free_msghdrs;
        free_msghdrs = msghdr->msg_next;
        msghdr->msg_spot = -1;
        msghdr->msg_ts = msgsz;

        /*
         * Allocate space for the message
         */

        while (segs_needed > 0) {
                KASSERT(nfree_msgmaps > 0);
                KASSERT(free_msgmaps != -1);
                KASSERT(free_msgmaps < msginfo.msgseg);

                next = free_msgmaps;
                MSG_PRINTF(("allocating segment %d to message\n", next));
                free_msgmaps = msgmaps[next].next;
                nfree_msgmaps--;
                msgmaps[next].next = msghdr->msg_spot;
                msghdr->msg_spot = next;
                segs_needed--;
        }

        /*
         * Copy in the message type
         */
        mutex_exit(&msgmutex);
        error = (*fetch_type)(user_msgp, &msghdr->msg_type, typesz);
        mutex_enter(&msgmutex);
        if (error != 0) {
                MSG_PRINTF(("error %d copying the message type\n", error));
                msg_freehdr(msghdr);
                msqptr->msg_perm.mode &= ~MSG_LOCKED;
                cv_broadcast(&msq->msq_cv);
                goto unlock;
        }
        user_msgp += typesz;

        /*
         * Validate the message type
         */

        if (msghdr->msg_type < 1) {
                msg_freehdr(msghdr);
                msqptr->msg_perm.mode &= ~MSG_LOCKED;
                cv_broadcast(&msq->msq_cv);
                MSG_PRINTF(("mtype (%ld) < 1\n", msghdr->msg_type));
                error = EINVAL;
                goto unlock;
        }

        /*
         * Copy in the message body
         */

        next = msghdr->msg_spot;
        while (msgsz > 0) {
                size_t tlen;
                KASSERT(next > -1);
                KASSERT(next < msginfo.msgseg);

                if (msgsz > msginfo.msgssz)
                        tlen = msginfo.msgssz;
                else
                        tlen = msgsz;
                mutex_exit(&msgmutex);
                error = copyin(user_msgp, &msgpool[next * msginfo.msgssz], tlen);
                mutex_enter(&msgmutex);
                if (error != 0) {
                        MSG_PRINTF(("error %d copying in message segment\n",
                            error));
                        msg_freehdr(msghdr);
                        msqptr->msg_perm.mode &= ~MSG_LOCKED;
                        cv_broadcast(&msq->msq_cv);
                        goto unlock;
                }
                msgsz -= tlen;
                user_msgp += tlen;
                next = msgmaps[next].next;
        }
        KASSERT(next == -1);

        /*
         * We've got the message.  Unlock the msqid_ds.
         */

        msqptr->msg_perm.mode &= ~MSG_LOCKED;

        /*
         * Make sure that the msqid_ds is still allocated.
         */

        if (msqptr->msg_qbytes == 0) {
                msg_freehdr(msghdr);
                cv_broadcast(&msq->msq_cv);
                error = EIDRM;
                goto unlock;
        }

        /*
         * Put the message into the queue
         */

        if (msqptr->_msg_first == NULL) {
                msqptr->_msg_first = msghdr;
                msqptr->_msg_last = msghdr;
        } else {
                msqptr->_msg_last->msg_next = msghdr;
                msqptr->_msg_last = msghdr;
        }
        msqptr->_msg_last->msg_next = NULL;

        msqptr->_msg_cbytes += msghdr->msg_ts;
        msqptr->msg_qnum++;
        msqptr->msg_lspid = l->l_proc->p_pid;
        msqptr->msg_stime = time_second;

        cv_broadcast(&msq->msq_cv);

unlock:
        mutex_exit(&msgmutex);
        return error;
}

int
sys_msgrcv(struct lwp *l, const struct sys_msgrcv_args *uap, register_t *retval)
{
        /* {
                syscallarg(int) msqid;
                syscallarg(void *) msgp;
                syscallarg(size_t) msgsz;
                syscallarg(long) msgtyp;
                syscallarg(int) msgflg;
        } */

        return msgrcv1(l, SCARG(uap, msqid), SCARG(uap, msgp),
            SCARG(uap, msgsz), SCARG(uap, msgtyp), SCARG(uap, msgflg),
            sizeof(long), copyout, retval);
}

int
msgrcv1(struct lwp *l, int msqidr, char *user_msgp, size_t msgsz, long msgtyp,
    int msgflg, size_t typesz, copyout_t put_type, register_t *retval)
{
        size_t len;
        kauth_cred_t cred = l->l_cred;
        struct msqid_ds *msqptr;
        struct __msg *msghdr;
        int error = 0, msqid;
        kmsq_t *msq;
        short next;

        MSG_PRINTF(("call to msgrcv(%d, %p, %lld, %ld, %d)\n", msqidr,
            user_msgp, (long long)msgsz, msgtyp, msgflg));

        if ((ssize_t)msgsz < 0)
                return EINVAL;

restart:
        msqid = IPCID_TO_IX(msqidr);

        mutex_enter(&msgmutex);
        /* In case of reallocation, we will wait for completion */
        while (__predict_false(msg_realloc_state))
                cv_wait(&msg_realloc_cv, &msgmutex);

        if (msqid < 0 || msqid >= msginfo.msgmni) {
                MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
                    msginfo.msgmni));
                error = EINVAL;
                goto unlock;
        }

        msq = &msqs[msqid];
        msqptr = &msq->msq_u;

        if (msqptr->msg_qbytes == 0) {
                MSG_PRINTF(("no such message queue id\n"));
                error = EINVAL;
                goto unlock;
        }
        if (msqptr->msg_perm._seq != IPCID_TO_SEQ(msqidr)) {
                MSG_PRINTF(("wrong sequence number\n"));
                error = EINVAL;
                goto unlock;
        }

        if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_R))) {
                MSG_PRINTF(("requester doesn't have read access\n"));
                goto unlock;
        }

        msghdr = NULL;
        while (msghdr == NULL) {
                if (msgtyp == 0) {
                        msghdr = msqptr->_msg_first;
                        if (msghdr != NULL) {
                                if (msgsz < msghdr->msg_ts &&
                                    (msgflg & MSG_NOERROR) == 0) {
                                        MSG_PRINTF(("first msg on the queue "
                                            "is too big (want %lld, got %d)\n",
                                            (long long)msgsz, msghdr->msg_ts));
                                        error = E2BIG;
                                        goto unlock;
                                }
                                if (msqptr->_msg_first == msqptr->_msg_last) {
                                        msqptr->_msg_first = NULL;
                                        msqptr->_msg_last = NULL;
                                } else {
                                        msqptr->_msg_first = msghdr->msg_next;
                                        KASSERT(msqptr->_msg_first != NULL);
                                }
                        }
                } else {
                        struct __msg *previous;
                        struct __msg **prev;

                        for (previous = NULL, prev = &msqptr->_msg_first;
                             (msghdr = *prev) != NULL;
                             previous = msghdr, prev = &msghdr->msg_next) {
                                /*
                                 * Is this message's type an exact match or is
                                 * this message's type less than or equal to
                                 * the absolute value of a negative msgtyp?
                                 * Note that the second half of this test can
                                 * NEVER be true if msgtyp is positive since
                                 * msg_type is always positive!
                                 */

                                if (msgtyp != msghdr->msg_type &&
                                    msgtyp != LONG_MIN &&
                                    msghdr->msg_type > -msgtyp)
                                        continue;

                                MSG_PRINTF(("found message type %ld, requested %ld\n",
                                    msghdr->msg_type, msgtyp));
                                if (msgsz < msghdr->msg_ts &&
                                     (msgflg & MSG_NOERROR) == 0) {
                                        MSG_PRINTF(("requested message on the queue "
                                            "is too big (want %lld, got %d)\n",
                                            (long long)msgsz, msghdr->msg_ts));
                                        error = E2BIG;
                                        goto unlock;
                                }
                                *prev = msghdr->msg_next;
                                if (msghdr != msqptr->_msg_last)
                                        break;
                                if (previous == NULL) {
                                        KASSERT(prev == &msqptr->_msg_first);
                                        msqptr->_msg_first = NULL;
                                        msqptr->_msg_last = NULL;
                                } else {
                                        KASSERT(prev != &msqptr->_msg_first);
                                        msqptr->_msg_last = previous;
                                }
                                break;
                        }
                }

                /*
                 * We've either extracted the msghdr for the appropriate
                 * message or there isn't one.
                 * If there is one then bail out of this loop.
                 */
                if (msghdr != NULL)
                        break;

                /*
                 * Hmph!  No message found.  Does the user want to wait?
                 */

                if ((msgflg & IPC_NOWAIT) != 0) {
                        MSG_PRINTF(("no appropriate message found (msgtyp=%ld)\n",
                            msgtyp));
                        error = ENOMSG;
                        goto unlock;
                }

                /*
                 * Wait for something to happen
                 */

                msg_waiters++;
                MSG_PRINTF(("msgrcv:  goodnight\n"));
                error = cv_wait_sig(&msq->msq_cv, &msgmutex);
                MSG_PRINTF(("msgrcv: good morning (error=%d)\n", error));
                msg_waiters--;

                /*
                 * In case of such state, notify reallocator and
                 * restart the call.
                 */
                if (msg_realloc_state) {
                        cv_broadcast(&msg_realloc_cv);
                        mutex_exit(&msgmutex);
                        goto restart;
                }

                if (error != 0) {
                        MSG_PRINTF(("msgsnd: interrupted system call\n"));
                        error = EINTR;
                        goto unlock;
                }

                /*
                 * Make sure that the msq queue still exists
                 */

                if (msqptr->msg_qbytes == 0 ||
                    msqptr->msg_perm._seq != IPCID_TO_SEQ(msqidr)) {
                        MSG_PRINTF(("msqid deleted\n"));
                        error = EIDRM;
                        goto unlock;
                }
        }

        /*
         * Return the message to the user.
         *
         * First, do the bookkeeping (before we risk being interrupted).
         */

        msqptr->_msg_cbytes -= msghdr->msg_ts;
        msqptr->msg_qnum--;
        msqptr->msg_lrpid = l->l_proc->p_pid;
        msqptr->msg_rtime = time_second;

        /*
         * Make msgsz the actual amount that we'll be returning.
         * Note that this effectively truncates the message if it is too long
         * (since msgsz is never increased).
         */

        MSG_PRINTF(("found a message, msgsz=%lld, msg_ts=%d\n",
            (long long)msgsz, msghdr->msg_ts));
        if (msgsz > msghdr->msg_ts)
                msgsz = msghdr->msg_ts;

        /*
         * Return the type to the user.
         */
        mutex_exit(&msgmutex);
        error = (*put_type)(&msghdr->msg_type, user_msgp, typesz);
        mutex_enter(&msgmutex);
        if (error != 0) {
                MSG_PRINTF(("error (%d) copying out message type\n", error));
                msg_freehdr(msghdr);
                cv_broadcast(&msq->msq_cv);
                goto unlock;
        }
        user_msgp += typesz;

        /*
         * Return the segments to the user
         */

        next = msghdr->msg_spot;
        for (len = 0; len < msgsz; len += msginfo.msgssz) {
                size_t tlen;
                KASSERT(next > -1);
                KASSERT(next < msginfo.msgseg);

                if (msgsz - len > msginfo.msgssz)
                        tlen = msginfo.msgssz;
                else
                        tlen = msgsz - len;
                mutex_exit(&msgmutex);
                error = copyout(&msgpool[next * msginfo.msgssz],
                    user_msgp, tlen);
                mutex_enter(&msgmutex);
                if (error != 0) {
                        MSG_PRINTF(("error (%d) copying out message segment\n",
                            error));
                        msg_freehdr(msghdr);
                        cv_broadcast(&msq->msq_cv);
                        goto unlock;
                }
                user_msgp += tlen;
                next = msgmaps[next].next;
        }

        /*
         * Done, return the actual number of bytes copied out.
         */

        msg_freehdr(msghdr);
        cv_broadcast(&msq->msq_cv);
        *retval = msgsz;

unlock:
        mutex_exit(&msgmutex);
        return error;
}

/*
 * Sysctl initialization and nodes.
 */

static int
sysctl_ipc_msgmni(SYSCTLFN_ARGS)
{
        int newsize, error;
        struct sysctlnode node;
        node = *rnode;
        node.sysctl_data = &newsize;

        newsize = msginfo.msgmni;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        sysctl_unlock();
        error = msgrealloc(newsize, msginfo.msgseg);
        sysctl_relock();
        return error;
}

static int
sysctl_ipc_msgseg(SYSCTLFN_ARGS)
{
        int newsize, error;
        struct sysctlnode node;
        node = *rnode;
        node.sysctl_data = &newsize;

        newsize = msginfo.msgseg;
        error = sysctl_lookup(SYSCTLFN_CALL(&node));
        if (error || newp == NULL)
                return error;

        sysctl_unlock();
        error = msgrealloc(msginfo.msgmni, newsize);
        sysctl_relock();
        return error;
}

SYSCTL_SETUP(sysctl_ipc_msg_setup, "sysctl kern.ipc subtree setup")
{
        const struct sysctlnode *node = NULL;

        sysctl_createv(clog, 0, NULL, &node,
                CTLFLAG_PERMANENT,
                CTLTYPE_NODE, "ipc",
                SYSCTL_DESCR("SysV IPC options"),
                NULL, 0, NULL, 0,
                CTL_KERN, KERN_SYSVIPC, CTL_EOL);

        if (node == NULL)
                return;

        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "msgmni",
                SYSCTL_DESCR("Max number of message queue identifiers"),
                sysctl_ipc_msgmni, 0, &msginfo.msgmni, 0,
                CTL_CREATE, CTL_EOL);
        sysctl_createv(clog, 0, &node, NULL,
                CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
                CTLTYPE_INT, "msgseg",
                SYSCTL_DESCR("Max number of number of message segments"),
                sysctl_ipc_msgseg, 0, &msginfo.msgseg, 0,
                CTL_CREATE, CTL_EOL);
}





































































































 1669 
  844 

  841 












 1664 

 1480 
  276 
 1456 


 1671 





1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/*        $NetBSD: memcmp.c,v 1.8 2020/01/29 09:18:26 ad Exp $        */

/*-
 * Copyright (c) 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1990, 1993
 *        The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Chris Torek.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
#if defined(LIBC_SCCS) && !defined(lint)
#if 0
static char sccsid[] = "@(#)memcmp.c        8.1 (Berkeley) 6/4/93";
#else
__RCSID("$NetBSD: memcmp.c,v 1.8 2020/01/29 09:18:26 ad Exp $");
#endif
#endif /* LIBC_SCCS and not lint */

#if !defined(_KERNEL) && !defined(_STANDALONE)
#include <sys/types.h>

#include <assert.h>
#include <string.h>
#else
#include <lib/libkern/libkern.h>
#endif 

#undef memcmp
/*
 * Compare memory regions.
 */
int
memcmp(const void *s1, const void *s2, size_t n)
{
        const unsigned char *c1, *c2;

#ifndef _STANDALONE
        const uintptr_t *b1, *b2;

        b1 = s1;
        b2 = s2;

#ifndef __NO_STRICT_ALIGNMENT
        if ((((uintptr_t)b1 | (uintptr_t)b2) & (sizeof(uintptr_t) - 1)) == 0)
#endif
        {
                while (n >= sizeof(uintptr_t)) {
                        if (*b1 != *b2)
                                break;
                        b1++;
                        b2++;
                        n -= sizeof(uintptr_t);
                }
        }

        c1 = (const unsigned char *)b1;
        c2 = (const unsigned char *)b2;
#else
        c1 = (const unsigned char *)s1;
        c2 = (const unsigned char *)s2;
#endif

        if (n != 0) {
                do {
                        if (*c1++ != *c2++)
                                return *--c1 - *--c2;
                } while (--n != 0);
        }

        return 0;
}

#if defined(__ARM_EABI__)
__strong_alias(__aeabi_memcmp, memcmp)
#endif





















































































































































































































































































































































































































































































































































































































   30 
   30 
   30 

   30 
   30 






   29 

































  414 




  415 
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































   28 





   28 
   28 



   28 
   28 




   27 



   28 







   28 





















































































































































































































































































































































































































































































































































   30 







































































































































   29 

   29 





   29 

   27 

   28 

   28 









   29 























































  993 


















  155 






























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
/*        $NetBSD: kern_event.c,v 1.146 2022/07/24 19:23:44 riastradh Exp $        */

/*-
 * Copyright (c) 2008, 2009, 2021 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*-
 * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
 * Copyright (c) 2009 Apple, Inc
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * FreeBSD: src/sys/kern/kern_event.c,v 1.27 2001/07/05 17:10:44 rwatson Exp
 */

#ifdef _KERNEL_OPT
#include "opt_ddb.h"
#endif /* _KERNEL_OPT */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.146 2022/07/24 19:23:44 riastradh Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/wait.h>
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/select.h>
#include <sys/queue.h>
#include <sys/event.h>
#include <sys/eventvar.h>
#include <sys/poll.h>
#include <sys/kmem.h>
#include <sys/stat.h>
#include <sys/filedesc.h>
#include <sys/syscallargs.h>
#include <sys/kauth.h>
#include <sys/conf.h>
#include <sys/atomic.h>

static int        kqueue_scan(file_t *, size_t, struct kevent *,
                            const struct timespec *, register_t *,
                            const struct kevent_ops *, struct kevent *,
                            size_t);
static int        kqueue_ioctl(file_t *, u_long, void *);
static int        kqueue_fcntl(file_t *, u_int, void *);
static int        kqueue_poll(file_t *, int);
static int        kqueue_kqfilter(file_t *, struct knote *);
static int        kqueue_stat(file_t *, struct stat *);
static int        kqueue_close(file_t *);
static void        kqueue_restart(file_t *);
static int        kqueue_register(struct kqueue *, struct kevent *);
static void        kqueue_doclose(struct kqueue *, struct klist *, int);

static void        knote_detach(struct knote *, filedesc_t *fdp, bool);
static void        knote_enqueue(struct knote *);
static void        knote_activate(struct knote *);
static void        knote_activate_locked(struct knote *);
static void        knote_deactivate_locked(struct knote *);

static void        filt_kqdetach(struct knote *);
static int        filt_kqueue(struct knote *, long hint);
static int        filt_procattach(struct knote *);
static void        filt_procdetach(struct knote *);
static int        filt_proc(struct knote *, long hint);
static int        filt_fileattach(struct knote *);
static void        filt_timerexpire(void *x);
static int        filt_timerattach(struct knote *);
static void        filt_timerdetach(struct knote *);
static int        filt_timer(struct knote *, long hint);
static int        filt_timertouch(struct knote *, struct kevent *, long type);
static int        filt_userattach(struct knote *);
static void        filt_userdetach(struct knote *);
static int        filt_user(struct knote *, long hint);
static int        filt_usertouch(struct knote *, struct kevent *, long type);

/*
 * Private knote state that should never be exposed outside
 * of kern_event.c
 *
 * Field locking:
 *
 * q        kn_kq->kq_lock
 */
struct knote_impl {
        struct knote        ki_knote;
        unsigned int        ki_influx;        /* q: in-flux counter */
        kmutex_t        ki_foplock;        /* for kn_filterops */
};

#define        KIMPL_TO_KNOTE(kip)        (&(kip)->ki_knote)
#define        KNOTE_TO_KIMPL(knp)        container_of((knp), struct knote_impl, ki_knote)

static inline struct knote *
knote_alloc(bool sleepok)
{
        struct knote_impl *ki;

        ki = kmem_zalloc(sizeof(*ki), sleepok ? KM_SLEEP : KM_NOSLEEP);
        mutex_init(&ki->ki_foplock, MUTEX_DEFAULT, IPL_NONE);

        return KIMPL_TO_KNOTE(ki);
}

static inline void
knote_free(struct knote *kn)
{
        struct knote_impl *ki = KNOTE_TO_KIMPL(kn);

        mutex_destroy(&ki->ki_foplock);
        kmem_free(ki, sizeof(*ki));
}

static inline void
knote_foplock_enter(struct knote *kn)
{
        mutex_enter(&KNOTE_TO_KIMPL(kn)->ki_foplock);
}

static inline void
knote_foplock_exit(struct knote *kn)
{
        mutex_exit(&KNOTE_TO_KIMPL(kn)->ki_foplock);
}

static inline bool __diagused
knote_foplock_owned(struct knote *kn)
{
        return mutex_owned(&KNOTE_TO_KIMPL(kn)->ki_foplock);
}

static const struct fileops kqueueops = {
        .fo_name = "kqueue",
        .fo_read = (void *)enxio,
        .fo_write = (void *)enxio,
        .fo_ioctl = kqueue_ioctl,
        .fo_fcntl = kqueue_fcntl,
        .fo_poll = kqueue_poll,
        .fo_stat = kqueue_stat,
        .fo_close = kqueue_close,
        .fo_kqfilter = kqueue_kqfilter,
        .fo_restart = kqueue_restart,
};

static void
filt_nopdetach(struct knote *kn __unused)
{
}

static int
filt_nopevent(struct knote *kn __unused, long hint __unused)
{
        return 0;
}

static const struct filterops nop_fd_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_nopdetach,
        .f_event = filt_nopevent,
};

static const struct filterops nop_filtops = {
        .f_flags = FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_nopdetach,
        .f_event = filt_nopevent,
};

static const struct filterops kqread_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_kqdetach,
        .f_event = filt_kqueue,
};

static const struct filterops proc_filtops = {
        .f_flags = FILTEROP_MPSAFE,
        .f_attach = filt_procattach,
        .f_detach = filt_procdetach,
        .f_event = filt_proc,
};

/*
 * file_filtops is not marked MPSAFE because it's going to call
 * fileops::fo_kqfilter(), which might not be.  That function,
 * however, will override the knote's filterops, and thus will
 * inherit the MPSAFE-ness of the back-end at that time.
 */
static const struct filterops file_filtops = {
        .f_flags = FILTEROP_ISFD,
        .f_attach = filt_fileattach,
        .f_detach = NULL,
        .f_event = NULL,
};

static const struct filterops timer_filtops = {
        .f_flags = FILTEROP_MPSAFE,
        .f_attach = filt_timerattach,
        .f_detach = filt_timerdetach,
        .f_event = filt_timer,
        .f_touch = filt_timertouch,
};

static const struct filterops user_filtops = {
        .f_flags = FILTEROP_MPSAFE,
        .f_attach = filt_userattach,
        .f_detach = filt_userdetach,
        .f_event = filt_user,
        .f_touch = filt_usertouch,
};

static u_int        kq_ncallouts = 0;
static int        kq_calloutmax = (4 * 1024);

#define        KN_HASHSIZE                64                /* XXX should be tunable */
#define        KN_HASH(val, mask)        (((val) ^ (val >> 8)) & (mask))

extern const struct filterops fs_filtops;        /* vfs_syscalls.c */
extern const struct filterops sig_filtops;        /* kern_sig.c */

/*
 * Table for for all system-defined filters.
 * These should be listed in the numeric order of the EVFILT_* defines.
 * If filtops is NULL, the filter isn't implemented in NetBSD.
 * End of list is when name is NULL.
 *
 * Note that 'refcnt' is meaningless for built-in filters.
 */
struct kfilter {
        const char        *name;                /* name of filter */
        uint32_t        filter;                /* id of filter */
        unsigned        refcnt;                /* reference count */
        const struct filterops *filtops;/* operations for filter */
        size_t                namelen;        /* length of name string */
};

/* System defined filters */
static struct kfilter sys_kfilters[] = {
        { "EVFILT_READ",        EVFILT_READ,        0, &file_filtops, 0 },
        { "EVFILT_WRITE",        EVFILT_WRITE,        0, &file_filtops, 0, },
        { "EVFILT_AIO",                EVFILT_AIO,        0, NULL, 0 },
        { "EVFILT_VNODE",        EVFILT_VNODE,        0, &file_filtops, 0 },
        { "EVFILT_PROC",        EVFILT_PROC,        0, &proc_filtops, 0 },
        { "EVFILT_SIGNAL",        EVFILT_SIGNAL,        0, &sig_filtops, 0 },
        { "EVFILT_TIMER",        EVFILT_TIMER,        0, &timer_filtops, 0 },
        { "EVFILT_FS",                EVFILT_FS,        0, &fs_filtops, 0 },
        { "EVFILT_USER",        EVFILT_USER,        0, &user_filtops, 0 },
        { "EVFILT_EMPTY",        EVFILT_EMPTY,        0, &file_filtops, 0 },
        { NULL,                        0,                0, NULL, 0 },
};

/* User defined kfilters */
static struct kfilter        *user_kfilters;                /* array */
static int                user_kfilterc;                /* current offset */
static int                user_kfiltermaxc;        /* max size so far */
static size_t                user_kfiltersz;                /* size of allocated memory */

/*
 * Global Locks.
 *
 * Lock order:
 *
 *        kqueue_filter_lock
 *        -> kn_kq->kq_fdp->fd_lock
 *        -> knote foplock (if taken)
 *        -> object lock (e.g., device driver lock, &c.)
 *        -> kn_kq->kq_lock
 *
 * Locking rules.  ==> indicates the lock is acquired by the backing
 * object, locks prior are acquired before calling filter ops:
 *
 *        f_attach: fdp->fd_lock -> knote foplock ->
 *          (maybe) KERNEL_LOCK ==> backing object lock
 *
 *        f_detach: fdp->fd_lock -> knote foplock ->
 *           (maybe) KERNEL_LOCK ==> backing object lock
 *
 *        f_event via kevent: fdp->fd_lock -> knote foplock ->
 *           (maybe) KERNEL_LOCK ==> backing object lock
 *           N.B. NOTE_SUBMIT will never be set in the "hint" argument
 *           in this case.
 *
 *        f_event via knote (via backing object: Whatever caller guarantees.
 *        Typically:
 *                f_event(NOTE_SUBMIT): caller has already acquired backing
 *                    object lock.
 *                f_event(!NOTE_SUBMIT): caller has not acquired backing object,
 *                    lock or has possibly acquired KERNEL_LOCK.  Backing object
 *                    lock may or may not be acquired as-needed.
 *        N.B. the knote foplock will **not** be acquired in this case.  The
 *        caller guarantees that klist_fini() will not be called concurrently
 *        with knote().
 *
 *        f_touch: fdp->fd_lock -> kn_kq->kq_lock (spin lock)
 *            N.B. knote foplock is **not** acquired in this case and
 *            the caller must guarantee that klist_fini() will never
 *            be called.  kevent_register() restricts filters that
 *            provide f_touch to known-safe cases.
 *
 *        klist_fini(): Caller must guarantee that no more knotes can
 *            be attached to the klist, and must **not** hold the backing
 *            object's lock; klist_fini() itself will acquire the foplock
 *            of each knote on the klist.
 *
 * Locking rules when detaching knotes:
 *
 * There are some situations where knote submission may require dropping
 * locks (see knote_proc_fork()).  In order to support this, it's possible
 * to mark a knote as being 'in-flux'.  Such a knote is guaranteed not to
 * be detached while it remains in-flux.  Because it will not be detached,
 * locks can be dropped so e.g. memory can be allocated, locks on other
 * data structures can be acquired, etc.  During this time, any attempt to
 * detach an in-flux knote must wait until the knote is no longer in-flux.
 * When this happens, the knote is marked for death (KN_WILLDETACH) and the
 * LWP who gets to finish the detach operation is recorded in the knote's
 * 'udata' field (which is no longer required for its original purpose once
 * a knote is so marked).  Code paths that lead to knote_detach() must ensure
 * that their LWP is the one tasked with its final demise after waiting for
 * the in-flux status of the knote to clear.  Note that once a knote is
 * marked KN_WILLDETACH, no code paths may put it into an in-flux state.
 *
 * Once the special circumstances have been handled, the locks are re-
 * acquired in the proper order (object lock -> kq_lock), the knote taken
 * out of flux, and any waiters are notified.  Because waiters must have
 * also dropped *their* locks in order to safely block, they must re-
 * validate all of their assumptions; see knote_detach_quiesce().  See also
 * the kqueue_register() (EV_ADD, EV_DELETE) and kqueue_scan() (EV_ONESHOT)
 * cases.
 *
 * When kqueue_scan() encounters an in-flux knote, the situation is
 * treated like another LWP's list marker.
 *
 * LISTEN WELL: It is important to not hold knotes in flux for an
 * extended period of time! In-flux knotes effectively block any
 * progress of the kqueue_scan() operation.  Any code paths that place
 * knotes in-flux should be careful to not block for indefinite periods
 * of time, such as for memory allocation (i.e. KM_NOSLEEP is OK, but
 * KM_SLEEP is not).
 */
static krwlock_t        kqueue_filter_lock;        /* lock on filter lists */

#define        KQ_FLUX_WAIT(kq)        (void)cv_wait(&kq->kq_cv, &kq->kq_lock)
#define        KQ_FLUX_WAKEUP(kq)        cv_broadcast(&kq->kq_cv)

static inline bool
kn_in_flux(struct knote *kn)
{
        KASSERT(mutex_owned(&kn->kn_kq->kq_lock));
        return KNOTE_TO_KIMPL(kn)->ki_influx != 0;
}

static inline bool
kn_enter_flux(struct knote *kn)
{
        KASSERT(mutex_owned(&kn->kn_kq->kq_lock));

        if (kn->kn_status & KN_WILLDETACH) {
                return false;
        }

        struct knote_impl *ki = KNOTE_TO_KIMPL(kn);
        KASSERT(ki->ki_influx < UINT_MAX);
        ki->ki_influx++;

        return true;
}

static inline bool
kn_leave_flux(struct knote *kn)
{
        KASSERT(mutex_owned(&kn->kn_kq->kq_lock));

        struct knote_impl *ki = KNOTE_TO_KIMPL(kn);
        KASSERT(ki->ki_influx > 0);
        ki->ki_influx--;
        return ki->ki_influx == 0;
}

static void
kn_wait_flux(struct knote *kn, bool can_loop)
{
        struct knote_impl *ki = KNOTE_TO_KIMPL(kn);
        bool loop;

        KASSERT(mutex_owned(&kn->kn_kq->kq_lock));

        /*
         * It may not be safe for us to touch the knote again after
         * dropping the kq_lock.  The caller has let us know in
         * 'can_loop'.
         */
        for (loop = true; loop && ki->ki_influx != 0; loop = can_loop) {
                KQ_FLUX_WAIT(kn->kn_kq);
        }
}

#define        KNOTE_WILLDETACH(kn)                                                \
do {                                                                        \
        (kn)->kn_status |= KN_WILLDETACH;                                \
        (kn)->kn_kevent.udata = curlwp;                                        \
} while (/*CONSTCOND*/0)

/*
 * Wait until the specified knote is in a quiescent state and
 * safe to detach.  Returns true if we potentially blocked (and
 * thus dropped our locks).
 */
static bool
knote_detach_quiesce(struct knote *kn)
{
        struct kqueue *kq = kn->kn_kq;
        filedesc_t *fdp = kq->kq_fdp;

        KASSERT(mutex_owned(&fdp->fd_lock));

        mutex_spin_enter(&kq->kq_lock);
        /*
         * There are two cases where we might see KN_WILLDETACH here:
         *
         * 1. Someone else has already started detaching the knote but
         *    had to wait for it to settle first.
         *
         * 2. We had to wait for it to settle, and had to come back
         *    around after re-acquiring the locks.
         *
         * When KN_WILLDETACH is set, we also set the LWP that claimed
         * the prize of finishing the detach in the 'udata' field of the
         * knote (which will never be used again for its usual purpose
         * once the note is in this state).  If it doesn't point to us,
         * we must drop the locks and let them in to finish the job.
         *
         * Otherwise, once we have claimed the knote for ourselves, we
         * can finish waiting for it to settle.  The is the only scenario
         * where touching a detaching knote is safe after dropping the
         * locks.
         */
        if ((kn->kn_status & KN_WILLDETACH) != 0 &&
            kn->kn_kevent.udata != curlwp) {
                /*
                 * N.B. it is NOT safe for us to touch the knote again
                 * after dropping the locks here.  The caller must go
                 * back around and re-validate everything.  However, if
                 * the knote is in-flux, we want to block to minimize
                 * busy-looping.
                 */
                mutex_exit(&fdp->fd_lock);
                if (kn_in_flux(kn)) {
                        kn_wait_flux(kn, false);
                        mutex_spin_exit(&kq->kq_lock);
                        return true;
                }
                mutex_spin_exit(&kq->kq_lock);
                preempt_point();
                return true;
        }
        /*
         * If we get here, we know that we will be claiming the
         * detach responsibilies, or that we already have and
         * this is the second attempt after re-validation.
         */
        KASSERT((kn->kn_status & KN_WILLDETACH) == 0 ||
                kn->kn_kevent.udata == curlwp);
        /*
         * Similarly, if we get here, either we are just claiming it
         * and may have to wait for it to settle, or if this is the
         * second attempt after re-validation that no other code paths
         * have put it in-flux.
         */
        KASSERT((kn->kn_status & KN_WILLDETACH) == 0 ||
                kn_in_flux(kn) == false);
        KNOTE_WILLDETACH(kn);
        if (kn_in_flux(kn)) {
                mutex_exit(&fdp->fd_lock);
                kn_wait_flux(kn, true);
                /*
                 * It is safe for us to touch the knote again after
                 * dropping the locks, but the caller must still
                 * re-validate everything because other aspects of
                 * the environment may have changed while we blocked.
                 */
                KASSERT(kn_in_flux(kn) == false);
                mutex_spin_exit(&kq->kq_lock);
                return true;
        }
        mutex_spin_exit(&kq->kq_lock);

        return false;
}

/*
 * Calls into the filterops need to be resilient against things which
 * destroy a klist, e.g. device detach, freeing a vnode, etc., to avoid
 * chasing garbage pointers (to data, or even potentially code in a
 * module about to be unloaded).  To that end, we acquire the
 * knote foplock before calling into the filter ops.  When a driver
 * (or anything else) is tearing down its klist, klist_fini() enumerates
 * each knote, acquires its foplock, and replaces the filterops with a
 * nop stub, allowing knote detach (when descriptors are closed) to safely
 * proceed.
 */

static int
filter_attach(struct knote *kn)
{
        int rv;

        KASSERT(knote_foplock_owned(kn));
        KASSERT(kn->kn_fop != NULL);
        KASSERT(kn->kn_fop->f_attach != NULL);

        /*
         * N.B. that kn->kn_fop may change as the result of calling
         * f_attach().  After f_attach() returns, kn->kn_fop may not
         * be modified by code outside of klist_fini().
         */
        if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) {
                rv = kn->kn_fop->f_attach(kn);
        } else {
                KERNEL_LOCK(1, NULL);
                rv = kn->kn_fop->f_attach(kn);
                KERNEL_UNLOCK_ONE(NULL);
        }

        return rv;
}

static void
filter_detach(struct knote *kn)
{

        KASSERT(knote_foplock_owned(kn));
        KASSERT(kn->kn_fop != NULL);
        KASSERT(kn->kn_fop->f_detach != NULL);

        if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) {
                kn->kn_fop->f_detach(kn);
        } else {
                KERNEL_LOCK(1, NULL);
                kn->kn_fop->f_detach(kn);
                KERNEL_UNLOCK_ONE(NULL);
        }
}

static int
filter_event(struct knote *kn, long hint, bool submitting)
{
        int rv;

        /* See knote(). */
        KASSERT(submitting || knote_foplock_owned(kn));
        KASSERT(kn->kn_fop != NULL);
        KASSERT(kn->kn_fop->f_event != NULL);

        if (kn->kn_fop->f_flags & FILTEROP_MPSAFE) {
                rv = kn->kn_fop->f_event(kn, hint);
        } else {
                KERNEL_LOCK(1, NULL);
                rv = kn->kn_fop->f_event(kn, hint);
                KERNEL_UNLOCK_ONE(NULL);
        }

        return rv;
}

static int
filter_touch(struct knote *kn, struct kevent *kev, long type)
{

        /*
         * XXX We cannot assert that the knote foplock is held here
         * XXX beause we cannot safely acquire it in all cases
         * XXX where "touch" will be used in kqueue_scan().  We just
         * XXX have to assume that f_touch will always be safe to call,
         * XXX and kqueue_register() allows only the two known-safe
         * XXX users of that op.
         */

        KASSERT(kn->kn_fop != NULL);
        KASSERT(kn->kn_fop->f_touch != NULL);

        return kn->kn_fop->f_touch(kn, kev, type);
}

static kauth_listener_t        kqueue_listener;

static int
kqueue_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
    void *arg0, void *arg1, void *arg2, void *arg3)
{
        struct proc *p;
        int result;

        result = KAUTH_RESULT_DEFER;
        p = arg0;

        if (action != KAUTH_PROCESS_KEVENT_FILTER)
                return result;

        if ((kauth_cred_getuid(p->p_cred) != kauth_cred_getuid(cred) ||
            ISSET(p->p_flag, PK_SUGID)))
                return result;

        result = KAUTH_RESULT_ALLOW;

        return result;
}

/*
 * Initialize the kqueue subsystem.
 */
void
kqueue_init(void)
{

        rw_init(&kqueue_filter_lock);

        kqueue_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
            kqueue_listener_cb, NULL);
}

/*
 * Find kfilter entry by name, or NULL if not found.
 */
static struct kfilter *
kfilter_byname_sys(const char *name)
{
        int i;

        KASSERT(rw_lock_held(&kqueue_filter_lock));

        for (i = 0; sys_kfilters[i].name != NULL; i++) {
                if (strcmp(name, sys_kfilters[i].name) == 0)
                        return &sys_kfilters[i];
        }
        return NULL;
}

static struct kfilter *
kfilter_byname_user(const char *name)
{
        int i;

        KASSERT(rw_lock_held(&kqueue_filter_lock));

        /* user filter slots have a NULL name if previously deregistered */
        for (i = 0; i < user_kfilterc ; i++) {
                if (user_kfilters[i].name != NULL &&
                    strcmp(name, user_kfilters[i].name) == 0)
                        return &user_kfilters[i];
        }
        return NULL;
}

static struct kfilter *
kfilter_byname(const char *name)
{
        struct kfilter *kfilter;

        KASSERT(rw_lock_held(&kqueue_filter_lock));

        if ((kfilter = kfilter_byname_sys(name)) != NULL)
                return kfilter;

        return kfilter_byname_user(name);
}

/*
 * Find kfilter entry by filter id, or NULL if not found.
 * Assumes entries are indexed in filter id order, for speed.
 */
static struct kfilter *
kfilter_byfilter(uint32_t filter)
{
        struct kfilter *kfilter;

        KASSERT(rw_lock_held(&kqueue_filter_lock));

        if (filter < EVFILT_SYSCOUNT)        /* it's a system filter */
                kfilter = &sys_kfilters[filter];
        else if (user_kfilters != NULL &&
            filter < EVFILT_SYSCOUNT + user_kfilterc)
                                        /* it's a user filter */
                kfilter = &user_kfilters[filter - EVFILT_SYSCOUNT];
        else
                return (NULL);                /* out of range */
        KASSERT(kfilter->filter == filter);        /* sanity check! */
        return (kfilter);
}

/*
 * Register a new kfilter. Stores the entry in user_kfilters.
 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise.
 * If retfilter != NULL, the new filterid is returned in it.
 */
int
kfilter_register(const char *name, const struct filterops *filtops,
                 int *retfilter)
{
        struct kfilter *kfilter;
        size_t len;
        int i;

        if (name == NULL || name[0] == '\0' || filtops == NULL)
                return (EINVAL);        /* invalid args */

        rw_enter(&kqueue_filter_lock, RW_WRITER);
        if (kfilter_byname(name) != NULL) {
                rw_exit(&kqueue_filter_lock);
                return (EEXIST);        /* already exists */
        }
        if (user_kfilterc > 0xffffffff - EVFILT_SYSCOUNT) {
                rw_exit(&kqueue_filter_lock);
                return (EINVAL);        /* too many */
        }

        for (i = 0; i < user_kfilterc; i++) {
                kfilter = &user_kfilters[i];
                if (kfilter->name == NULL) {
                        /* Previously deregistered slot.  Reuse. */
                        goto reuse;
                }
        }

        /* check if need to grow user_kfilters */
        if (user_kfilterc + 1 > user_kfiltermaxc) {
                /* Grow in KFILTER_EXTENT chunks. */
                user_kfiltermaxc += KFILTER_EXTENT;
                len = user_kfiltermaxc * sizeof(*kfilter);
                kfilter = kmem_alloc(len, KM_SLEEP);
                memset((char *)kfilter + user_kfiltersz, 0, len - user_kfiltersz);
                if (user_kfilters != NULL) {
                        memcpy(kfilter, user_kfilters, user_kfiltersz);
                        kmem_free(user_kfilters, user_kfiltersz);
                }
                user_kfiltersz = len;
                user_kfilters = kfilter;
        }
        /* Adding new slot */
        kfilter = &user_kfilters[user_kfilterc++];
reuse:
        kfilter->name = kmem_strdupsize(name, &kfilter->namelen, KM_SLEEP);

        kfilter->filter = (kfilter - user_kfilters) + EVFILT_SYSCOUNT;

        kfilter->filtops = kmem_alloc(sizeof(*filtops), KM_SLEEP);
        memcpy(__UNCONST(kfilter->filtops), filtops, sizeof(*filtops));

        if (retfilter != NULL)
                *retfilter = kfilter->filter;
        rw_exit(&kqueue_filter_lock);

        return (0);
}

/*
 * Unregister a kfilter previously registered with kfilter_register.
 * This retains the filter id, but clears the name and frees filtops (filter
 * operations), so that the number isn't reused during a boot.
 * Returns 0 if operation succeeded, or an appropriate errno(2) otherwise.
 */
int
kfilter_unregister(const char *name)
{
        struct kfilter *kfilter;

        if (name == NULL || name[0] == '\0')
                return (EINVAL);        /* invalid name */

        rw_enter(&kqueue_filter_lock, RW_WRITER);
        if (kfilter_byname_sys(name) != NULL) {
                rw_exit(&kqueue_filter_lock);
                return (EINVAL);        /* can't detach system filters */
        }

        kfilter = kfilter_byname_user(name);
        if (kfilter == NULL) {
                rw_exit(&kqueue_filter_lock);
                return (ENOENT);
        }
        if (kfilter->refcnt != 0) {
                rw_exit(&kqueue_filter_lock);
                return (EBUSY);
        }

        /* Cast away const (but we know it's safe. */
        kmem_free(__UNCONST(kfilter->name), kfilter->namelen);
        kfilter->name = NULL;        /* mark as `not implemented' */

        if (kfilter->filtops != NULL) {
                /* Cast away const (but we know it's safe. */
                kmem_free(__UNCONST(kfilter->filtops),
                    sizeof(*kfilter->filtops));
                kfilter->filtops = NULL; /* mark as `not implemented' */
        }
        rw_exit(&kqueue_filter_lock);

        return (0);
}


/*
 * Filter attach method for EVFILT_READ and EVFILT_WRITE on normal file
 * descriptors. Calls fileops kqfilter method for given file descriptor.
 */
static int
filt_fileattach(struct knote *kn)
{
        file_t *fp;

        fp = kn->kn_obj;

        return (*fp->f_ops->fo_kqfilter)(fp, kn);
}

/*
 * Filter detach method for EVFILT_READ on kqueue descriptor.
 */
static void
filt_kqdetach(struct knote *kn)
{
        struct kqueue *kq;

        kq = ((file_t *)kn->kn_obj)->f_kqueue;

        mutex_spin_enter(&kq->kq_lock);
        selremove_knote(&kq->kq_sel, kn);
        mutex_spin_exit(&kq->kq_lock);
}

/*
 * Filter event method for EVFILT_READ on kqueue descriptor.
 */
/*ARGSUSED*/
static int
filt_kqueue(struct knote *kn, long hint)
{
        struct kqueue *kq;
        int rv;

        kq = ((file_t *)kn->kn_obj)->f_kqueue;

        if (hint != NOTE_SUBMIT)
                mutex_spin_enter(&kq->kq_lock);
        kn->kn_data = KQ_COUNT(kq);
        rv = (kn->kn_data > 0);
        if (hint != NOTE_SUBMIT)
                mutex_spin_exit(&kq->kq_lock);

        return rv;
}

/*
 * Filter attach method for EVFILT_PROC.
 */
static int
filt_procattach(struct knote *kn)
{
        struct proc *p;

        mutex_enter(&proc_lock);
        p = proc_find(kn->kn_id);
        if (p == NULL) {
                mutex_exit(&proc_lock);
                return ESRCH;
        }

        /*
         * Fail if it's not owned by you, or the last exec gave us
         * setuid/setgid privs (unless you're root).
         */
        mutex_enter(p->p_lock);
        mutex_exit(&proc_lock);
        if (kauth_authorize_process(curlwp->l_cred,
            KAUTH_PROCESS_KEVENT_FILTER, p, NULL, NULL, NULL) != 0) {
                    mutex_exit(p->p_lock);
                return EACCES;
        }

        kn->kn_obj = p;
        kn->kn_flags |= EV_CLEAR;        /* automatically set */

        /*
         * NOTE_CHILD is only ever generated internally; don't let it
         * leak in from user-space.  See knote_proc_fork_track().
         */
        kn->kn_sfflags &= ~NOTE_CHILD;

        klist_insert(&p->p_klist, kn);
            mutex_exit(p->p_lock);

        return 0;
}

/*
 * Filter detach method for EVFILT_PROC.
 *
 * The knote may be attached to a different process, which may exit,
 * leaving nothing for the knote to be attached to.  So when the process
 * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
 * it will be deleted when read out.  However, as part of the knote deletion,
 * this routine is called, so a check is needed to avoid actually performing
 * a detach, because the original process might not exist any more.
 */
static void
filt_procdetach(struct knote *kn)
{
        struct kqueue *kq = kn->kn_kq;
        struct proc *p;

        /*
         * We have to synchronize with knote_proc_exit(), but we
         * are forced to acquire the locks in the wrong order here
         * because we can't be sure kn->kn_obj is valid unless
         * KN_DETACHED is not set.
         */
 again:
        mutex_spin_enter(&kq->kq_lock);
        if ((kn->kn_status & KN_DETACHED) == 0) {
                p = kn->kn_obj;
                if (!mutex_tryenter(p->p_lock)) {
                        mutex_spin_exit(&kq->kq_lock);
                        preempt_point();
                        goto again;
                }
                kn->kn_status |= KN_DETACHED;
                klist_remove(&p->p_klist, kn);
                mutex_exit(p->p_lock);
        }
        mutex_spin_exit(&kq->kq_lock);
}

/*
 * Filter event method for EVFILT_PROC.
 *
 * Due to some of the complexities of process locking, we have special
 * entry points for delivering knote submissions.  filt_proc() is used
 * only to check for activation from kqueue_register() and kqueue_scan().
 */
static int
filt_proc(struct knote *kn, long hint)
{
        struct kqueue *kq = kn->kn_kq;
        uint32_t fflags;

        /*
         * Because we share the same klist with signal knotes, just
         * ensure that we're not being invoked for the proc-related
         * submissions.
         */
        KASSERT((hint & (NOTE_EXEC | NOTE_EXIT | NOTE_FORK)) == 0);

        mutex_spin_enter(&kq->kq_lock);
        fflags = kn->kn_fflags;
        mutex_spin_exit(&kq->kq_lock);

        return fflags != 0;
}

void
knote_proc_exec(struct proc *p)
{
        struct knote *kn, *tmpkn;
        struct kqueue *kq;
        uint32_t fflags;

        mutex_enter(p->p_lock);

        SLIST_FOREACH_SAFE(kn, &p->p_klist, kn_selnext, tmpkn) {
                /* N.B. EVFILT_SIGNAL knotes are on this same list. */
                if (kn->kn_fop == &sig_filtops) {
                        continue;
                }
                KASSERT(kn->kn_fop == &proc_filtops);

                kq = kn->kn_kq;
                mutex_spin_enter(&kq->kq_lock);
                fflags = (kn->kn_fflags |= (kn->kn_sfflags & NOTE_EXEC));
                if (fflags) {
                        knote_activate_locked(kn);
                }
                mutex_spin_exit(&kq->kq_lock);
        }

        mutex_exit(p->p_lock);
}

static int __noinline
knote_proc_fork_track(struct proc *p1, struct proc *p2, struct knote *okn)
{
        struct kqueue *kq = okn->kn_kq;

        KASSERT(mutex_owned(&kq->kq_lock));
        KASSERT(mutex_owned(p1->p_lock));

        /*
         * We're going to put this knote into flux while we drop
         * the locks and create and attach a new knote to track the
         * child.  If we are not able to enter flux, then this knote
         * is about to go away, so skip the notification.
         */
        if (!kn_enter_flux(okn)) {
                return 0;
        }

        mutex_spin_exit(&kq->kq_lock);
        mutex_exit(p1->p_lock);

        /*
         * We actually have to register *two* new knotes:
         *
         * ==> One for the NOTE_CHILD notification.  This is a forced
         *     ONESHOT note.
         *
         * ==> One to actually track the child process as it subsequently
         *     forks, execs, and, ultimately, exits.
         *
         * If we only register a single knote, then it's possible for
         * for the NOTE_CHILD and NOTE_EXIT to be collapsed into a single
         * notification if the child exits before the tracking process
         * has received the NOTE_CHILD notification, which applications
         * aren't expecting (the event's 'data' field would be clobbered,
         * for example).
         *
         * To do this, what we have here is an **extremely** stripped-down
         * version of kqueue_register() that has the following properties:
         *
         * ==> Does not block to allocate memory.  If we are unable
         *     to allocate memory, we return ENOMEM.
         *
         * ==> Does not search for existing knotes; we know there
         *     are not any because this is a new process that isn't
         *     even visible to other processes yet.
         *
         * ==> Assumes that the knhash for our kq's descriptor table
         *     already exists (after all, we're already tracking
         *     processes with knotes if we got here).
         *
         * ==> Directly attaches the new tracking knote to the child
         *     process.
         *
         * The whole point is to do the minimum amount of work while the
         * knote is held in-flux, and to avoid doing extra work in general
         * (we already have the new child process; why bother looking it
         * up again?).
         */
        filedesc_t *fdp = kq->kq_fdp;
        struct knote *knchild, *kntrack;
        int error = 0;

        knchild = knote_alloc(false);
        kntrack = knote_alloc(false);
        if (__predict_false(knchild == NULL || kntrack == NULL)) {
                error = ENOMEM;
                goto out;
        }

        kntrack->kn_obj = p2;
        kntrack->kn_id = p2->p_pid;
        kntrack->kn_kq = kq;
        kntrack->kn_fop = okn->kn_fop;
        kntrack->kn_kfilter = okn->kn_kfilter;
        kntrack->kn_sfflags = okn->kn_sfflags;
        kntrack->kn_sdata = p1->p_pid;

        kntrack->kn_kevent.ident = p2->p_pid;
        kntrack->kn_kevent.filter = okn->kn_filter;
        kntrack->kn_kevent.flags =
            okn->kn_flags | EV_ADD | EV_ENABLE | EV_CLEAR;
        kntrack->kn_kevent.fflags = 0;
        kntrack->kn_kevent.data = 0;
        kntrack->kn_kevent.udata = okn->kn_kevent.udata; /* preserve udata */

        /*
         * The child note does not need to be attached to the
         * new proc's klist at all.
         */
        *knchild = *kntrack;
        knchild->kn_status = KN_DETACHED;
        knchild->kn_sfflags = 0;
        knchild->kn_kevent.flags |= EV_ONESHOT;
        knchild->kn_kevent.fflags = NOTE_CHILD;
        knchild->kn_kevent.data = p1->p_pid;                 /* parent */

        mutex_enter(&fdp->fd_lock);

        /*
         * We need to check to see if the kq is closing, and skip
         * attaching the knote if so.  Normally, this isn't necessary
         * when coming in the front door because the file descriptor
         * layer will synchronize this.
         *
         * It's safe to test KQ_CLOSING without taking the kq_lock
         * here because that flag is only ever set when the fd_lock
         * is also held.
         */
        if (__predict_false(kq->kq_count & KQ_CLOSING)) {
                mutex_exit(&fdp->fd_lock);
                goto out;
        }

        /*
         * We do the "insert into FD table" and "attach to klist" steps
         * in the opposite order of kqueue_register() here to avoid
         * having to take p2->p_lock twice.  But this is OK because we
         * hold fd_lock across the entire operation.
         */

        mutex_enter(p2->p_lock);
        error = kauth_authorize_process(curlwp->l_cred,
            KAUTH_PROCESS_KEVENT_FILTER, p2, NULL, NULL, NULL);
        if (__predict_false(error != 0)) {
                mutex_exit(p2->p_lock);
                mutex_exit(&fdp->fd_lock);
                error = EACCES;
                goto out;
        }
        klist_insert(&p2->p_klist, kntrack);
        mutex_exit(p2->p_lock);

        KASSERT(fdp->fd_knhashmask != 0);
        KASSERT(fdp->fd_knhash != NULL);
        struct klist *list = &fdp->fd_knhash[KN_HASH(kntrack->kn_id,
            fdp->fd_knhashmask)];
        SLIST_INSERT_HEAD(list, kntrack, kn_link);
        SLIST_INSERT_HEAD(list, knchild, kn_link);

        /* This adds references for knchild *and* kntrack. */
        atomic_add_int(&kntrack->kn_kfilter->refcnt, 2);

        knote_activate(knchild);

        kntrack = NULL;
        knchild = NULL;

        mutex_exit(&fdp->fd_lock);

 out:
        if (__predict_false(knchild != NULL)) {
                knote_free(knchild);
        }
        if (__predict_false(kntrack != NULL)) {
                knote_free(kntrack);
        }
        mutex_enter(p1->p_lock);
        mutex_spin_enter(&kq->kq_lock);

        if (kn_leave_flux(okn)) {
                KQ_FLUX_WAKEUP(kq);
        }

        return error;
}

void
knote_proc_fork(struct proc *p1, struct proc *p2)
{
        struct knote *kn;
        struct kqueue *kq;
        uint32_t fflags;

        mutex_enter(p1->p_lock);

        /*
         * N.B. We DO NOT use SLIST_FOREACH_SAFE() here because we
         * don't want to pre-fetch the next knote; in the event we
         * have to drop p_lock, we will have put the knote in-flux,
         * meaning that no one will be able to detach it until we
         * have taken the knote out of flux.  However, that does
         * NOT stop someone else from detaching the next note in the
         * list while we have it unlocked.  Thus, we want to fetch
         * the next note in the list only after we have re-acquired
         * the lock, and using SLIST_FOREACH() will satisfy that.
         */
        SLIST_FOREACH(kn, &p1->p_klist, kn_selnext) {
                /* N.B. EVFILT_SIGNAL knotes are on this same list. */
                if (kn->kn_fop == &sig_filtops) {
                        continue;
                }
                KASSERT(kn->kn_fop == &proc_filtops);

                kq = kn->kn_kq;
                mutex_spin_enter(&kq->kq_lock);
                kn->kn_fflags |= (kn->kn_sfflags & NOTE_FORK);
                if (__predict_false(kn->kn_sfflags & NOTE_TRACK)) {
                        /*
                         * This will drop kq_lock and p_lock and
                         * re-acquire them before it returns.
                         */
                        if (knote_proc_fork_track(p1, p2, kn)) {
                                kn->kn_fflags |= NOTE_TRACKERR;
                        }
                        KASSERT(mutex_owned(p1->p_lock));
                        KASSERT(mutex_owned(&kq->kq_lock));
                }
                fflags = kn->kn_fflags;
                if (fflags) {
                        knote_activate_locked(kn);
                }
                mutex_spin_exit(&kq->kq_lock);
        }

        mutex_exit(p1->p_lock);
}

void
knote_proc_exit(struct proc *p)
{
        struct knote *kn;
        struct kqueue *kq;

        KASSERT(mutex_owned(p->p_lock));

        while (!SLIST_EMPTY(&p->p_klist)) {
                kn = SLIST_FIRST(&p->p_klist);
                kq = kn->kn_kq;

                KASSERT(kn->kn_obj == p);

                mutex_spin_enter(&kq->kq_lock);
                kn->kn_data = P_WAITSTATUS(p);
                /*
                 * Mark as ONESHOT, so that the knote is g/c'ed
                 * when read.
                 */
                kn->kn_flags |= (EV_EOF | EV_ONESHOT);
                kn->kn_fflags |= kn->kn_sfflags & NOTE_EXIT;

                /*
                 * Detach the knote from the process and mark it as such.
                 * N.B. EVFILT_SIGNAL are also on p_klist, but by the
                 * time we get here, all open file descriptors for this
                 * process have been released, meaning that signal knotes
                 * will have already been detached.
                 *
                 * We need to synchronize this with filt_procdetach().
                 */
                KASSERT(kn->kn_fop == &proc_filtops);
                if ((kn->kn_status & KN_DETACHED) == 0) {
                        kn->kn_status |= KN_DETACHED;
                        SLIST_REMOVE_HEAD(&p->p_klist, kn_selnext);
                }

                /*
                 * Always activate the knote for NOTE_EXIT regardless
                 * of whether or not the listener cares about it.
                 * This matches historical behavior.
                 */
                knote_activate_locked(kn);
                mutex_spin_exit(&kq->kq_lock);
        }
}

#define        FILT_TIMER_NOSCHED        ((uintptr_t)-1)

static int
filt_timercompute(struct kevent *kev, uintptr_t *tticksp)
{
        struct timespec ts;
        uintptr_t tticks;

        if (kev->fflags & ~(NOTE_TIMER_UNITMASK | NOTE_ABSTIME)) {
                return EINVAL;
        }

        /*
         * Convert the event 'data' to a timespec, then convert the
         * timespec to callout ticks.
         */
        switch (kev->fflags & NOTE_TIMER_UNITMASK) {
        case NOTE_SECONDS:
                ts.tv_sec = kev->data;
                ts.tv_nsec = 0;
                break;

        case NOTE_MSECONDS:                /* == historical value 0 */
                ts.tv_sec = kev->data / 1000;
                ts.tv_nsec = (kev->data % 1000) * 1000000;
                break;

        case NOTE_USECONDS:
                ts.tv_sec = kev->data / 1000000;
                ts.tv_nsec = (kev->data % 1000000) * 1000;
                break;

        case NOTE_NSECONDS:
                ts.tv_sec = kev->data / 1000000000;
                ts.tv_nsec = kev->data % 1000000000;
                break;

        default:
                return EINVAL;
        }

        if (kev->fflags & NOTE_ABSTIME) {
                struct timespec deadline = ts;

                /*
                 * Get current time.
                 *
                 * XXX This is CLOCK_REALTIME.  There is no way to
                 * XXX specify CLOCK_MONOTONIC.
                 */
                nanotime(&ts);

                /* Absolute timers do not repeat. */
                kev->data = FILT_TIMER_NOSCHED;

                /* If we're past the deadline, then the event will fire. */
                if (timespeccmp(&deadline, &ts, <=)) {
                        tticks = FILT_TIMER_NOSCHED;
                        goto out;
                }

                /* Calculate how much time is left. */
                timespecsub(&deadline, &ts, &ts);
        } else {
                /* EV_CLEAR automatically set for relative timers. */
                kev->flags |= EV_CLEAR;
        }

        tticks = tstohz(&ts);

        /* if the supplied value is under our resolution, use 1 tick */
        if (tticks == 0) {
                if (kev->data == 0)
                        return EINVAL;
                tticks = 1;
        } else if (tticks > INT_MAX) {
                return EINVAL;
        }

        if ((kev->flags & EV_ONESHOT) != 0) {
                /* Timer does not repeat. */
                kev->data = FILT_TIMER_NOSCHED;
        } else {
                KASSERT((uintptr_t)tticks != FILT_TIMER_NOSCHED);
                kev->data = tticks;
        }

 out:
        *tticksp = tticks;

        return 0;
}

static void
filt_timerexpire(void *knx)
{
        struct knote *kn = knx;
        struct kqueue *kq = kn->kn_kq;

        mutex_spin_enter(&kq->kq_lock);
        kn->kn_data++;
        knote_activate_locked(kn);
        if (kn->kn_sdata != FILT_TIMER_NOSCHED) {
                KASSERT(kn->kn_sdata > 0 && kn->kn_sdata <= INT_MAX);
                callout_schedule((callout_t *)kn->kn_hook,
                    (int)kn->kn_sdata);
        }
        mutex_spin_exit(&kq->kq_lock);
}

static inline void
filt_timerstart(struct knote *kn, uintptr_t tticks)
{
        callout_t *calloutp = kn->kn_hook;

        KASSERT(mutex_owned(&kn->kn_kq->kq_lock));
        KASSERT(!callout_pending(calloutp));

        if (__predict_false(tticks == FILT_TIMER_NOSCHED)) {
                kn->kn_data = 1;
        } else {
                KASSERT(tticks <= INT_MAX);
                callout_reset(calloutp, (int)tticks, filt_timerexpire, kn);
        }
}

static int
filt_timerattach(struct knote *kn)
{
        callout_t *calloutp;
        struct kqueue *kq;
        uintptr_t tticks;
        int error;

        struct kevent kev = {
                .flags = kn->kn_flags,
                .fflags = kn->kn_sfflags,
                .data = kn->kn_sdata,
        };

        error = filt_timercompute(&kev, &tticks);
        if (error) {
                return error;
        }

        if (atomic_inc_uint_nv(&kq_ncallouts) >= kq_calloutmax ||
            (calloutp = kmem_alloc(sizeof(*calloutp), KM_NOSLEEP)) == NULL) {
                atomic_dec_uint(&kq_ncallouts);
                return ENOMEM;
        }
        callout_init(calloutp, CALLOUT_MPSAFE);

        kq = kn->kn_kq;
        mutex_spin_enter(&kq->kq_lock);

        kn->kn_sdata = kev.data;
        kn->kn_flags = kev.flags;
        KASSERT(kn->kn_sfflags == kev.fflags);
        kn->kn_hook = calloutp;

        filt_timerstart(kn, tticks);

        mutex_spin_exit(&kq->kq_lock);

        return (0);
}

static void
filt_timerdetach(struct knote *kn)
{
        callout_t *calloutp;
        struct kqueue *kq = kn->kn_kq;

        /* prevent rescheduling when we expire */
        mutex_spin_enter(&kq->kq_lock);
        kn->kn_sdata = FILT_TIMER_NOSCHED;
        mutex_spin_exit(&kq->kq_lock);

        calloutp = (callout_t *)kn->kn_hook;

        /*
         * Attempt to stop the callout.  This will block if it's
         * already running.
         */
        callout_halt(calloutp, NULL);

        callout_destroy(calloutp);
        kmem_free(calloutp, sizeof(*calloutp));
        atomic_dec_uint(&kq_ncallouts);
}

static int
filt_timertouch(struct knote *kn, struct kevent *kev, long type)
{
        struct kqueue *kq = kn->kn_kq;
        callout_t *calloutp;
        uintptr_t tticks;
        int error;

        KASSERT(mutex_owned(&kq->kq_lock));

        switch (type) {
        case EVENT_REGISTER:
                /* Only relevant for EV_ADD. */
                if ((kev->flags & EV_ADD) == 0) {
                        return 0;
                }

                /*
                 * Stop the timer, under the assumption that if
                 * an application is re-configuring the timer,
                 * they no longer care about the old one.  We
                 * can safely drop the kq_lock while we wait
                 * because fdp->fd_lock will be held throughout,
                 * ensuring that no one can sneak in with an
                 * EV_DELETE or close the kq.
                 */
                KASSERT(mutex_owned(&kq->kq_fdp->fd_lock));

                calloutp = kn->kn_hook;
                callout_halt(calloutp, &kq->kq_lock);
                KASSERT(mutex_owned(&kq->kq_lock));
                knote_deactivate_locked(kn);
                kn->kn_data = 0;

                error = filt_timercompute(kev, &tticks);
                if (error) {
                        return error;
                }
                kn->kn_sdata = kev->data;
                kn->kn_flags = kev->flags;
                kn->kn_sfflags = kev->fflags;
                filt_timerstart(kn, tticks);
                break;

        case EVENT_PROCESS:
                *kev = kn->kn_kevent;
                break;

        default:
                panic("%s: invalid type (%ld)", __func__, type);
        }

        return 0;
}

static int
filt_timer(struct knote *kn, long hint)
{
        struct kqueue *kq = kn->kn_kq;
        int rv;

        mutex_spin_enter(&kq->kq_lock);
        rv = (kn->kn_data != 0);
        mutex_spin_exit(&kq->kq_lock);

        return rv;
}

static int
filt_userattach(struct knote *kn)
{
        struct kqueue *kq = kn->kn_kq;

        /*
         * EVFILT_USER knotes are not attached to anything in the kernel.
         */
        mutex_spin_enter(&kq->kq_lock);
        kn->kn_hook = NULL;
        if (kn->kn_fflags & NOTE_TRIGGER)
                kn->kn_hookid = 1;
        else
                kn->kn_hookid = 0;
        mutex_spin_exit(&kq->kq_lock);
        return (0);
}

static void
filt_userdetach(struct knote *kn)
{

        /*
         * EVFILT_USER knotes are not attached to anything in the kernel.
         */
}

static int
filt_user(struct knote *kn, long hint)
{
        struct kqueue *kq = kn->kn_kq;
        int hookid;

        mutex_spin_enter(&kq->kq_lock);
        hookid = kn->kn_hookid;
        mutex_spin_exit(&kq->kq_lock);

        return hookid;
}

static int
filt_usertouch(struct knote *kn, struct kevent *kev, long type)
{
        int ffctrl;

        KASSERT(mutex_owned(&kn->kn_kq->kq_lock));

        switch (type) {
        case EVENT_REGISTER:
                if (kev->fflags & NOTE_TRIGGER)
                        kn->kn_hookid = 1;

                ffctrl = kev->fflags & NOTE_FFCTRLMASK;
                kev->fflags &= NOTE_FFLAGSMASK;
                switch (ffctrl) {
                case NOTE_FFNOP:
                        break;

                case NOTE_FFAND:
                        kn->kn_sfflags &= kev->fflags;
                        break;

                case NOTE_FFOR:
                        kn->kn_sfflags |= kev->fflags;
                        break;

                case NOTE_FFCOPY:
                        kn->kn_sfflags = kev->fflags;
                        break;

                default:
                        /* XXX Return error? */
                        break;
                }
                kn->kn_sdata = kev->data;
                if (kev->flags & EV_CLEAR) {
                        kn->kn_hookid = 0;
                        kn->kn_data = 0;
                        kn->kn_fflags = 0;
                }
                break;

        case EVENT_PROCESS:
                *kev = kn->kn_kevent;
                kev->fflags = kn->kn_sfflags;
                kev->data = kn->kn_sdata;
                if (kn->kn_flags & EV_CLEAR) {
                        kn->kn_hookid = 0;
                        kn->kn_data = 0;
                        kn->kn_fflags = 0;
                }
                break;

        default:
                panic("filt_usertouch() - invalid type (%ld)", type);
                break;
        }

        return 0;
}

/*
 * filt_seltrue:
 *
 *        This filter "event" routine simulates seltrue().
 */
int
filt_seltrue(struct knote *kn, long hint)
{

        /*
         * We don't know how much data can be read/written,
         * but we know that it *can* be.  This is about as
         * good as select/poll does as well.
         */
        kn->kn_data = 0;
        return (1);
}

/*
 * This provides full kqfilter entry for device switch tables, which
 * has same effect as filter using filt_seltrue() as filter method.
 */
static void
filt_seltruedetach(struct knote *kn)
{
        /* Nothing to do */
}

const struct filterops seltrue_filtops = {
        .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
        .f_attach = NULL,
        .f_detach = filt_seltruedetach,
        .f_event = filt_seltrue,
};

int
seltrue_kqfilter(dev_t dev, struct knote *kn)
{
        switch (kn->kn_filter) {
        case EVFILT_READ:
        case EVFILT_WRITE:
                kn->kn_fop = &seltrue_filtops;
                break;
        default:
                return (EINVAL);
        }

        /* Nothing more to do */
        return (0);
}

/*
 * kqueue(2) system call.
 */
static int
kqueue1(struct lwp *l, int flags, register_t *retval)
{
        struct kqueue *kq;
        file_t *fp;
        int fd, error;

        if ((error = fd_allocfile(&fp, &fd)) != 0)
                return error;
        fp->f_flag = FREAD | FWRITE | (flags & (FNONBLOCK|FNOSIGPIPE));
        fp->f_type = DTYPE_KQUEUE;
        fp->f_ops = &kqueueops;
        kq = kmem_zalloc(sizeof(*kq), KM_SLEEP);
        mutex_init(&kq->kq_lock, MUTEX_DEFAULT, IPL_SCHED);
        cv_init(&kq->kq_cv, "kqueue");
        selinit(&kq->kq_sel);
        TAILQ_INIT(&kq->kq_head);
        fp->f_kqueue = kq;
        *retval = fd;
        kq->kq_fdp = curlwp->l_fd;
        fd_set_exclose(l, fd, (flags & O_CLOEXEC) != 0);
        fd_affix(curproc, fp, fd);
        return error;
}

/*
 * kqueue(2) system call.
 */
int
sys_kqueue(struct lwp *l, const void *v, register_t *retval)
{
        return kqueue1(l, 0, retval);
}

int
sys_kqueue1(struct lwp *l, const struct sys_kqueue1_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) flags;
        } */
        return kqueue1(l, SCARG(uap, flags), retval);
}

/*
 * kevent(2) system call.
 */
int
kevent_fetch_changes(void *ctx, const struct kevent *changelist,
    struct kevent *changes, size_t index, int n)
{

        return copyin(changelist + index, changes, n * sizeof(*changes));
}

int
kevent_put_events(void *ctx, struct kevent *events,
    struct kevent *eventlist, size_t index, int n)
{

        return copyout(events, eventlist + index, n * sizeof(*events));
}

static const struct kevent_ops kevent_native_ops = {
        .keo_private = NULL,
        .keo_fetch_timeout = copyin,
        .keo_fetch_changes = kevent_fetch_changes,
        .keo_put_events = kevent_put_events,
};

int
sys___kevent50(struct lwp *l, const struct sys___kevent50_args *uap,
    register_t *retval)
{
        /* {
                syscallarg(int) fd;
                syscallarg(const struct kevent *) changelist;
                syscallarg(size_t) nchanges;
                syscallarg(struct kevent *) eventlist;
                syscallarg(size_t) nevents;
                syscallarg(const struct timespec *) timeout;
        } */

        return kevent1(retval, SCARG(uap, fd), SCARG(uap, changelist),
            SCARG(uap, nchanges), SCARG(uap, eventlist), SCARG(uap, nevents),
            SCARG(uap, timeout), &kevent_native_ops);
}

int
kevent1(register_t *retval, int fd,
        const struct kevent *changelist, size_t nchanges,
        struct kevent *eventlist, size_t nevents,
        const struct timespec *timeout,
        const struct kevent_ops *keops)
{
        struct kevent *kevp;
        struct kqueue *kq;
        struct timespec        ts;
        size_t i, n, ichange;
        int nerrors, error;
        struct kevent kevbuf[KQ_NEVENTS];        /* approx 300 bytes on 64-bit */
        file_t *fp;

        /* check that we're dealing with a kq */
        fp = fd_getfile(fd);
        if (fp == NULL)
                return (EBADF);

        if (fp->f_type != DTYPE_KQUEUE) {
                fd_putfile(fd);
                return (EBADF);
        }

        if (timeout != NULL) {
                error = (*keops->keo_fetch_timeout)(timeout, &ts, sizeof(ts));
                if (error)
                        goto done;
                timeout = &ts;
        }

        kq = fp->f_kqueue;
        nerrors = 0;
        ichange = 0;

        /* traverse list of events to register */
        while (nchanges > 0) {
                n = MIN(nchanges, __arraycount(kevbuf));
                error = (*keops->keo_fetch_changes)(keops->keo_private,
                    changelist, kevbuf, ichange, n);
                if (error)
                        goto done;
                for (i = 0; i < n; i++) {
                        kevp = &kevbuf[i];
                        kevp->flags &= ~EV_SYSFLAGS;
                        /* register each knote */
                        error = kqueue_register(kq, kevp);
                        if (!error && !(kevp->flags & EV_RECEIPT))
                                continue;
                        if (nevents == 0)
                                goto done;
                        kevp->flags = EV_ERROR;
                        kevp->data = error;
                        error = (*keops->keo_put_events)
                                (keops->keo_private, kevp,
                                 eventlist, nerrors, 1);
                        if (error)
                                goto done;
                        nevents--;
                        nerrors++;
                }
                nchanges -= n;        /* update the results */
                ichange += n;
        }
        if (nerrors) {
                *retval = nerrors;
                error = 0;
                goto done;
        }

        /* actually scan through the events */
        error = kqueue_scan(fp, nevents, eventlist, timeout, retval, keops,
            kevbuf, __arraycount(kevbuf));
 done:
        fd_putfile(fd);
        return (error);
}

/*
 * Register a given kevent kev onto the kqueue
 */
static int
kqueue_register(struct kqueue *kq, struct kevent *kev)
{
        struct kfilter *kfilter;
        filedesc_t *fdp;
        file_t *fp;
        fdfile_t *ff;
        struct knote *kn, *newkn;
        struct klist *list;
        int error, fd, rv;

        fdp = kq->kq_fdp;
        fp = NULL;
        kn = NULL;
        error = 0;
        fd = 0;

        newkn = knote_alloc(true);

        rw_enter(&kqueue_filter_lock, RW_READER);
        kfilter = kfilter_byfilter(kev->filter);
        if (kfilter == NULL || kfilter->filtops == NULL) {
                /* filter not found nor implemented */
                rw_exit(&kqueue_filter_lock);
                knote_free(newkn);
                return (EINVAL);
        }

        /* search if knote already exists */
        if (kfilter->filtops->f_flags & FILTEROP_ISFD) {
                /* monitoring a file descriptor */
                /* validate descriptor */
                if (kev->ident > INT_MAX
                    || (fp = fd_getfile(fd = kev->ident)) == NULL) {
                        rw_exit(&kqueue_filter_lock);
                        knote_free(newkn);
                        return EBADF;
                }
                mutex_enter(&fdp->fd_lock);
                ff = fdp->fd_dt->dt_ff[fd];
                if (ff->ff_refcnt & FR_CLOSING) {
                        error = EBADF;
                        goto doneunlock;
                }
                if (fd <= fdp->fd_lastkqfile) {
                        SLIST_FOREACH(kn, &ff->ff_knlist, kn_link) {
                                if (kq == kn->kn_kq &&
                                    kev->filter == kn->kn_filter)
                                        break;
                        }
                }
        } else {
                /*
                 * not monitoring a file descriptor, so
                 * lookup knotes in internal hash table
                 */
                mutex_enter(&fdp->fd_lock);
                if (fdp->fd_knhashmask != 0) {
                        list = &fdp->fd_knhash[
                            KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
                        SLIST_FOREACH(kn, list, kn_link) {
                                if (kev->ident == kn->kn_id &&
                                    kq == kn->kn_kq &&
                                    kev->filter == kn->kn_filter)
                                        break;
                        }
                }
        }

        /* It's safe to test KQ_CLOSING while holding only the fd_lock. */
        KASSERT(mutex_owned(&fdp->fd_lock));
        KASSERT((kq->kq_count & KQ_CLOSING) == 0);

        /*
         * kn now contains the matching knote, or NULL if no match
         */
        if (kn == NULL) {
                if (kev->flags & EV_ADD) {
                        /* create new knote */
                        kn = newkn;
                        newkn = NULL;
                        kn->kn_obj = fp;
                        kn->kn_id = kev->ident;
                        kn->kn_kq = kq;
                        kn->kn_fop = kfilter->filtops;
                        kn->kn_kfilter = kfilter;
                        kn->kn_sfflags = kev->fflags;
                        kn->kn_sdata = kev->data;
                        kev->fflags = 0;
                        kev->data = 0;
                        kn->kn_kevent = *kev;

                        KASSERT(kn->kn_fop != NULL);
                        /*
                         * XXX Allow only known-safe users of f_touch.
                         * XXX See filter_touch() for details.
                         */
                        if (kn->kn_fop->f_touch != NULL &&
                            kn->kn_fop != &timer_filtops &&
                            kn->kn_fop != &user_filtops) {
                                error = ENOTSUP;
                                goto fail_ev_add;
                        }

                        /*
                         * apply reference count to knote structure, and
                         * do not release it at the end of this routine.
                         */
                        fp = NULL;

                        if (!(kn->kn_fop->f_flags & FILTEROP_ISFD)) {
                                /*
                                 * If knote is not on an fd, store on
                                 * internal hash table.
                                 */
                                if (fdp->fd_knhashmask == 0) {
                                        /* XXXAD can block with fd_lock held */
                                        fdp->fd_knhash = hashinit(KN_HASHSIZE,
                                            HASH_LIST, true,
                                            &fdp->fd_knhashmask);
                                }
                                list = &fdp->fd_knhash[KN_HASH(kn->kn_id,
                                    fdp->fd_knhashmask)];
                        } else {
                                /* Otherwise, knote is on an fd. */
                                list = (struct klist *)
                                    &fdp->fd_dt->dt_ff[kn->kn_id]->ff_knlist;
                                if ((int)kn->kn_id > fdp->fd_lastkqfile)
                                        fdp->fd_lastkqfile = kn->kn_id;
                        }
                        SLIST_INSERT_HEAD(list, kn, kn_link);

                        /*
                         * N.B. kn->kn_fop may change as the result
                         * of filter_attach()!
                         */
                        knote_foplock_enter(kn);
                        error = filter_attach(kn);
                        if (error != 0) {
#ifdef DEBUG
                                struct proc *p = curlwp->l_proc;
                                const file_t *ft = kn->kn_obj;
                                printf("%s: %s[%d]: event type %d not "
                                    "supported for file type %d/%s "
                                    "(error %d)\n", __func__,
                                    p->p_comm, p->p_pid,
                                    kn->kn_filter, ft ? ft->f_type : -1,
                                    ft ? ft->f_ops->fo_name : "?", error);
#endif

 fail_ev_add:
                                /*
                                 * N.B. no need to check for this note to
                                 * be in-flux, since it was never visible
                                 * to the monitored object.
                                 *
                                 * knote_detach() drops fdp->fd_lock
                                 */
                                knote_foplock_exit(kn);
                                mutex_enter(&kq->kq_lock);
                                KNOTE_WILLDETACH(kn);
                                KASSERT(kn_in_flux(kn) == false);
                                mutex_exit(&kq->kq_lock);
                                knote_detach(kn, fdp, false);
                                goto done;
                        }
                        atomic_inc_uint(&kfilter->refcnt);
                        goto done_ev_add;
                } else {
                        /* No matching knote and the EV_ADD flag is not set. */
                        error = ENOENT;
                        goto doneunlock;
                }
        }

        if (kev->flags & EV_DELETE) {
                /*
                 * Let the world know that this knote is about to go
                 * away, and wait for it to settle if it's currently
                 * in-flux.
                 */
                mutex_spin_enter(&kq->kq_lock);
                if (kn->kn_status & KN_WILLDETACH) {
                        /*
                         * This knote is already on its way out,
                         * so just be done.
                         */
                        mutex_spin_exit(&kq->kq_lock);
                        goto doneunlock;
                }
                KNOTE_WILLDETACH(kn);
                if (kn_in_flux(kn)) {
                        mutex_exit(&fdp->fd_lock);
                        /*
                         * It's safe for us to conclusively wait for
                         * this knote to settle because we know we'll
                         * be completing the detach.
                         */
                        kn_wait_flux(kn, true);
                        KASSERT(kn_in_flux(kn) == false);
                        mutex_spin_exit(&kq->kq_lock);
                        mutex_enter(&fdp->fd_lock);
                } else {
                        mutex_spin_exit(&kq->kq_lock);
                }

                /* knote_detach() drops fdp->fd_lock */
                knote_detach(kn, fdp, true);
                goto done;
        }

        /*
         * The user may change some filter values after the
         * initial EV_ADD, but doing so will not reset any
         * filter which have already been triggered.
         */
        knote_foplock_enter(kn);
        kn->kn_kevent.udata = kev->udata;
        KASSERT(kn->kn_fop != NULL);
        if (!(kn->kn_fop->f_flags & FILTEROP_ISFD) &&
            kn->kn_fop->f_touch != NULL) {
                mutex_spin_enter(&kq->kq_lock);
                error = filter_touch(kn, kev, EVENT_REGISTER);
                mutex_spin_exit(&kq->kq_lock);
                if (__predict_false(error != 0)) {
                        /* Never a new knote (which would consume newkn). */
                        KASSERT(newkn != NULL);
                        knote_foplock_exit(kn);
                        goto doneunlock;
                }
        } else {
                kn->kn_sfflags = kev->fflags;
                kn->kn_sdata = kev->data;
        }

        /*
         * We can get here if we are trying to attach
         * an event to a file descriptor that does not
         * support events, and the attach routine is
         * broken and does not return an error.
         */
 done_ev_add:
        rv = filter_event(kn, 0, false);
        if (rv)
                knote_activate(kn);

        knote_foplock_exit(kn);

        /* disable knote */
        if ((kev->flags & EV_DISABLE)) {
                mutex_spin_enter(&kq->kq_lock);
                if ((kn->kn_status & KN_DISABLED) == 0)
                        kn->kn_status |= KN_DISABLED;
                mutex_spin_exit(&kq->kq_lock);
        }

        /* enable knote */
        if ((kev->flags & EV_ENABLE)) {
                knote_enqueue(kn);
        }
 doneunlock:
        mutex_exit(&fdp->fd_lock);
 done:
        rw_exit(&kqueue_filter_lock);
        if (newkn != NULL)
                knote_free(newkn);
        if (fp != NULL)
                fd_putfile(fd);
        return (error);
}

#define KN_FMT(buf, kn) \
    (snprintb((buf), sizeof(buf), __KN_FLAG_BITS, (kn)->kn_status), buf)

#if defined(DDB)
void
kqueue_printit(struct kqueue *kq, bool full, void (*pr)(const char *, ...))
{
        const struct knote *kn;
        u_int count;
        int nmarker;
        char buf[128];

        count = 0;
        nmarker = 0;

        (*pr)("kqueue %p (restart=%d count=%u):\n", kq,
            !!(kq->kq_count & KQ_RESTART), KQ_COUNT(kq));
        (*pr)("  Queued knotes:\n");
        TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) {
                if (kn->kn_status & KN_MARKER) {
                        nmarker++;
                } else {
                        count++;
                }
                (*pr)("    knote %p: kq=%p status=%s\n",
                    kn, kn->kn_kq, KN_FMT(buf, kn));
                (*pr)("      id=0x%lx (%lu) filter=%d\n",
                    (u_long)kn->kn_id, (u_long)kn->kn_id, kn->kn_filter);
                if (kn->kn_kq != kq) {
                        (*pr)("      !!! kn->kn_kq != kq\n");
                }
        }
        if (count != KQ_COUNT(kq)) {
                (*pr)("  !!! count(%u) != KQ_COUNT(%u)\n",
                    count, KQ_COUNT(kq));
        }
}
#endif /* DDB */

#if defined(DEBUG)
static void
kqueue_check(const char *func, size_t line, const struct kqueue *kq)
{
        const struct knote *kn;
        u_int count;
        int nmarker;
        char buf[128];

        KASSERT(mutex_owned(&kq->kq_lock));

        count = 0;
        nmarker = 0;
        TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe) {
                if ((kn->kn_status & (KN_MARKER | KN_QUEUED)) == 0) {
                        panic("%s,%zu: kq=%p kn=%p !(MARKER|QUEUED) %s",
                            func, line, kq, kn, KN_FMT(buf, kn));
                }
                if ((kn->kn_status & KN_MARKER) == 0) {
                        if (kn->kn_kq != kq) {
                                panic("%s,%zu: kq=%p kn(%p) != kn->kq(%p): %s",
                                    func, line, kq, kn, kn->kn_kq,
                                    KN_FMT(buf, kn));
                        }
                        if ((kn->kn_status & KN_ACTIVE) == 0) {
                                panic("%s,%zu: kq=%p kn=%p: !ACTIVE %s",
                                    func, line, kq, kn, KN_FMT(buf, kn));
                        }
                        count++;
                        if (count > KQ_COUNT(kq)) {
                                panic("%s,%zu: kq=%p kq->kq_count(%u) != "
                                    "count(%d), nmarker=%d",
                                        func, line, kq, KQ_COUNT(kq), count,
                                    nmarker);
                        }
                } else {
                        nmarker++;
                }
        }
}
#define kq_check(a) kqueue_check(__func__, __LINE__, (a))
#else /* defined(DEBUG) */
#define        kq_check(a)        /* nothing */
#endif /* defined(DEBUG) */

static void
kqueue_restart(file_t *fp)
{
        struct kqueue *kq = fp->f_kqueue;
        KASSERT(kq != NULL);

        mutex_spin_enter(&kq->kq_lock);
        kq->kq_count |= KQ_RESTART;
        cv_broadcast(&kq->kq_cv);
        mutex_spin_exit(&kq->kq_lock);
}

/*
 * Scan through the list of events on fp (for a maximum of maxevents),
 * returning the results in to ulistp. Timeout is determined by tsp; if
 * NULL, wait indefinitely, if 0 valued, perform a poll, otherwise wait
 * as appropriate.
 */
static int
kqueue_scan(file_t *fp, size_t maxevents, struct kevent *ulistp,
            const struct timespec *tsp, register_t *retval,
            const struct kevent_ops *keops, struct kevent *kevbuf,
            size_t kevcnt)
{
        struct kqueue        *kq;
        struct kevent        *kevp;
        struct timespec        ats, sleepts;
        struct knote        *kn, *marker;
        struct knote_impl morker;
        size_t                count, nkev, nevents;
        int                timeout, error, touch, rv, influx;
        filedesc_t        *fdp;

        fdp = curlwp->l_fd;
        kq = fp->f_kqueue;
        count = maxevents;
        nkev = nevents = error = 0;
        if (count == 0) {
                *retval = 0;
                return 0;
        }

        if (tsp) {                                /* timeout supplied */
                ats = *tsp;
                if (inittimeleft(&ats, &sleepts) == -1) {
                        *retval = maxevents;
                        return EINVAL;
                }
                timeout = tstohz(&ats);
                if (timeout <= 0)
                        timeout = -1;           /* do poll */
        } else {
                /* no timeout, wait forever */
                timeout = 0;
        }

        memset(&morker, 0, sizeof(morker));
        marker = &morker.ki_knote;
        marker->kn_kq = kq;
        marker->kn_status = KN_MARKER;
        mutex_spin_enter(&kq->kq_lock);
 retry:
        kevp = kevbuf;
        if (KQ_COUNT(kq) == 0) {
                if (timeout >= 0) {
                        error = cv_timedwait_sig(&kq->kq_cv,
                            &kq->kq_lock, timeout);
                        if (error == 0) {
                                if (KQ_COUNT(kq) == 0 &&
                                    (kq->kq_count & KQ_RESTART)) {
                                        /* return to clear file reference */
                                        error = ERESTART;
                                } else if (tsp == NULL || (timeout =
                                    gettimeleft(&ats, &sleepts)) > 0) {
                                        goto retry;
                                }
                        } else {
                                /* don't restart after signals... */
                                if (error == ERESTART)
                                        error = EINTR;
                                if (error == EWOULDBLOCK)
                                        error = 0;
                        }
                }
                mutex_spin_exit(&kq->kq_lock);
                goto done;
        }

        /* mark end of knote list */
        TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
        influx = 0;

        /*
         * Acquire the fdp->fd_lock interlock to avoid races with
         * file creation/destruction from other threads.
         */
        mutex_spin_exit(&kq->kq_lock);
relock:
        mutex_enter(&fdp->fd_lock);
        mutex_spin_enter(&kq->kq_lock);

        while (count != 0) {
                /*
                 * Get next knote.  We are guaranteed this will never
                 * be NULL because of the marker we inserted above.
                 */
                kn = TAILQ_FIRST(&kq->kq_head);

                bool kn_is_other_marker =
                    (kn->kn_status & KN_MARKER) != 0 && kn != marker;
                bool kn_is_detaching = (kn->kn_status & KN_WILLDETACH) != 0;
                bool kn_is_in_flux = kn_in_flux(kn);

                /*
                 * If we found a marker that's not ours, or this knote
                 * is in a state of flux, then wait for everything to
                 * settle down and go around again.
                 */
                if (kn_is_other_marker || kn_is_detaching || kn_is_in_flux) {
                        if (influx) {
                                influx = 0;
                                KQ_FLUX_WAKEUP(kq);
                        }
                        mutex_exit(&fdp->fd_lock);
                        if (kn_is_other_marker || kn_is_in_flux) {
                                KQ_FLUX_WAIT(kq);
                                mutex_spin_exit(&kq->kq_lock);
                        } else {
                                /*
                                 * Detaching but not in-flux?  Someone is
                                 * actively trying to finish the job; just
                                 * go around and try again.
                                 */
                                KASSERT(kn_is_detaching);
                                mutex_spin_exit(&kq->kq_lock);
                                preempt_point();
                        }
                        goto relock;
                }

                TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
                if (kn == marker) {
                        /* it's our marker, stop */
                        KQ_FLUX_WAKEUP(kq);
                        if (count == maxevents) {
                                mutex_exit(&fdp->fd_lock);
                                goto retry;
                        }
                        break;
                }
                KASSERT((kn->kn_status & KN_BUSY) == 0);

                kq_check(kq);
                kn->kn_status &= ~KN_QUEUED;
                kn->kn_status |= KN_BUSY;
                kq_check(kq);
                if (kn->kn_status & KN_DISABLED) {
                        kn->kn_status &= ~KN_BUSY;
                        kq->kq_count--;
                        /* don't want disabled events */
                        continue;
                }
                if ((kn->kn_flags & EV_ONESHOT) == 0) {
                        mutex_spin_exit(&kq->kq_lock);
                        KASSERT(mutex_owned(&fdp->fd_lock));
                        knote_foplock_enter(kn);
                        rv = filter_event(kn, 0, false);
                        knote_foplock_exit(kn);
                        mutex_spin_enter(&kq->kq_lock);
                        /* Re-poll if note was re-enqueued. */
                        if ((kn->kn_status & KN_QUEUED) != 0) {
                                kn->kn_status &= ~KN_BUSY;
                                /* Re-enqueue raised kq_count, lower it again */
                                kq->kq_count--;
                                influx = 1;
                                continue;
                        }
                        if (rv == 0) {
                                /*
                                 * non-ONESHOT event that hasn't triggered
                                 * again, so it will remain de-queued.
                                 */
                                kn->kn_status &= ~(KN_ACTIVE|KN_BUSY);
                                kq->kq_count--;
                                influx = 1;
                                continue;
                        }
                } else {
                        /*
                         * Must NOT drop kq_lock until we can do
                         * the KNOTE_WILLDETACH() below.
                         */
                }
                KASSERT(kn->kn_fop != NULL);
                touch = (!(kn->kn_fop->f_flags & FILTEROP_ISFD) &&
                                kn->kn_fop->f_touch != NULL);
                /* XXXAD should be got from f_event if !oneshot. */
                KASSERT((kn->kn_status & KN_WILLDETACH) == 0);
                if (touch) {
                        (void)filter_touch(kn, kevp, EVENT_PROCESS);
                } else {
                        *kevp = kn->kn_kevent;
                }
                kevp++;
                nkev++;
                influx = 1;
                if (kn->kn_flags & EV_ONESHOT) {
                        /* delete ONESHOT events after retrieval */
                        KNOTE_WILLDETACH(kn);
                        kn->kn_status &= ~KN_BUSY;
                        kq->kq_count--;
                        KASSERT(kn_in_flux(kn) == false);
                        KASSERT((kn->kn_status & KN_WILLDETACH) != 0 &&
                                kn->kn_kevent.udata == curlwp);
                        mutex_spin_exit(&kq->kq_lock);
                        knote_detach(kn, fdp, true);
                        mutex_enter(&fdp->fd_lock);
                        mutex_spin_enter(&kq->kq_lock);
                } else if (kn->kn_flags & EV_CLEAR) {
                        /* clear state after retrieval */
                        kn->kn_data = 0;
                        kn->kn_fflags = 0;
                        /*
                         * Manually clear knotes who weren't
                         * 'touch'ed.
                         */
                        if (touch == 0) {
                                kn->kn_data = 0;
                                kn->kn_fflags = 0;
                        }
                        kn->kn_status &= ~(KN_ACTIVE|KN_BUSY);
                        kq->kq_count--;
                } else if (kn->kn_flags & EV_DISPATCH) {
                        kn->kn_status |= KN_DISABLED;
                        kn->kn_status &= ~(KN_ACTIVE|KN_BUSY);
                        kq->kq_count--;
                } else {
                        /* add event back on list */
                        kq_check(kq);
                        kn->kn_status |= KN_QUEUED;
                        kn->kn_status &= ~KN_BUSY;
                        TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
                        kq_check(kq);
                }

                if (nkev == kevcnt) {
                        /* do copyouts in kevcnt chunks */
                        influx = 0;
                        KQ_FLUX_WAKEUP(kq);
                        mutex_spin_exit(&kq->kq_lock);
                        mutex_exit(&fdp->fd_lock);
                        error = (*keops->keo_put_events)
                            (keops->keo_private,
                            kevbuf, ulistp, nevents, nkev);
                        mutex_enter(&fdp->fd_lock);
                        mutex_spin_enter(&kq->kq_lock);
                        nevents += nkev;
                        nkev = 0;
                        kevp = kevbuf;
                }
                count--;
                if (error != 0 || count == 0) {
                        /* remove marker */
                        TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe);
                        break;
                }
        }
        KQ_FLUX_WAKEUP(kq);
        mutex_spin_exit(&kq->kq_lock);
        mutex_exit(&fdp->fd_lock);

done:
        if (nkev != 0) {
                /* copyout remaining events */
                error = (*keops->keo_put_events)(keops->keo_private,
                    kevbuf, ulistp, nevents, nkev);
        }
        *retval = maxevents - count;

        return error;
}

/*
 * fileops ioctl method for a kqueue descriptor.
 *
 * Two ioctls are currently supported. They both use struct kfilter_mapping:
 *        KFILTER_BYNAME                find name for filter, and return result in
 *                                name, which is of size len.
 *        KFILTER_BYFILTER        find filter for name. len is ignored.
 */
/*ARGSUSED*/
static int
kqueue_ioctl(file_t *fp, u_long com, void *data)
{
        struct kfilter_mapping        *km;
        const struct kfilter        *kfilter;
        char                        *name;
        int                        error;

        km = data;
        error = 0;
        name = kmem_alloc(KFILTER_MAXNAME, KM_SLEEP);

        switch (com) {
        case KFILTER_BYFILTER:        /* convert filter -> name */
                rw_enter(&kqueue_filter_lock, RW_READER);
                kfilter = kfilter_byfilter(km->filter);
                if (kfilter != NULL) {
                        strlcpy(name, kfilter->name, KFILTER_MAXNAME);
                        rw_exit(&kqueue_filter_lock);
                        error = copyoutstr(name, km->name, km->len, NULL);
                } else {
                        rw_exit(&kqueue_filter_lock);
                        error = ENOENT;
                }
                break;

        case KFILTER_BYNAME:        /* convert name -> filter */
                error = copyinstr(km->name, name, KFILTER_MAXNAME, NULL);
                if (error) {
                        break;
                }
                rw_enter(&kqueue_filter_lock, RW_READER);
                kfilter = kfilter_byname(name);
                if (kfilter != NULL)
                        km->filter = kfilter->filter;
                else
                        error = ENOENT;
                rw_exit(&kqueue_filter_lock);
                break;

        default:
                error = ENOTTY;
                break;

        }
        kmem_free(name, KFILTER_MAXNAME);
        return (error);
}

/*
 * fileops fcntl method for a kqueue descriptor.
 */
static int
kqueue_fcntl(file_t *fp, u_int com, void *data)
{

        return (ENOTTY);
}

/*
 * fileops poll method for a kqueue descriptor.
 * Determine if kqueue has events pending.
 */
static int
kqueue_poll(file_t *fp, int events)
{
        struct kqueue        *kq;
        int                revents;

        kq = fp->f_kqueue;

        revents = 0;
        if (events & (POLLIN | POLLRDNORM)) {
                mutex_spin_enter(&kq->kq_lock);
                if (KQ_COUNT(kq) != 0) {
                        revents |= events & (POLLIN | POLLRDNORM);
                } else {
                        selrecord(curlwp, &kq->kq_sel);
                }
                kq_check(kq);
                mutex_spin_exit(&kq->kq_lock);
        }

        return revents;
}

/*
 * fileops stat method for a kqueue descriptor.
 * Returns dummy info, with st_size being number of events pending.
 */
static int
kqueue_stat(file_t *fp, struct stat *st)
{
        struct kqueue *kq;

        kq = fp->f_kqueue;

        memset(st, 0, sizeof(*st));
        st->st_size = KQ_COUNT(kq);
        st->st_blksize = sizeof(struct kevent);
        st->st_mode = S_IFIFO | S_IRUSR | S_IWUSR;
        st->st_blocks = 1;
        st->st_uid = kauth_cred_geteuid(fp->f_cred);
        st->st_gid = kauth_cred_getegid(fp->f_cred);

        return 0;
}

static void
kqueue_doclose(struct kqueue *kq, struct klist *list, int fd)
{
        struct knote *kn;
        filedesc_t *fdp;

        fdp = kq->kq_fdp;

        KASSERT(mutex_owned(&fdp->fd_lock));

 again:
        for (kn = SLIST_FIRST(list); kn != NULL;) {
                if (kq != kn->kn_kq) {
                        kn = SLIST_NEXT(kn, kn_link);
                        continue;
                }
                if (knote_detach_quiesce(kn)) {
                        mutex_enter(&fdp->fd_lock);
                        goto again;
                }
                knote_detach(kn, fdp, true);
                mutex_enter(&fdp->fd_lock);
                kn = SLIST_FIRST(list);
        }
}

/*
 * fileops close method for a kqueue descriptor.
 */
static int
kqueue_close(file_t *fp)
{
        struct kqueue *kq;
        filedesc_t *fdp;
        fdfile_t *ff;
        int i;

        kq = fp->f_kqueue;
        fp->f_kqueue = NULL;
        fp->f_type = 0;
        fdp = curlwp->l_fd;

        KASSERT(kq->kq_fdp == fdp);

        mutex_enter(&fdp->fd_lock);

        /*
         * We're doing to drop the fd_lock multiple times while
         * we detach knotes.  During this time, attempts to register
         * knotes via the back door (e.g. knote_proc_fork_track())
         * need to fail, lest they sneak in to attach a knote after
         * we've already drained the list it's destined for.
         *
         * We must acquire kq_lock here to set KQ_CLOSING (to serialize
         * with other code paths that modify kq_count without holding
         * the fd_lock), but once this bit is set, it's only safe to
         * test it while holding the fd_lock, and holding kq_lock while
         * doing so is not necessary.
         */
        mutex_enter(&kq->kq_lock);
        kq->kq_count |= KQ_CLOSING;
        mutex_exit(&kq->kq_lock);

        for (i = 0; i <= fdp->fd_lastkqfile; i++) {
                if ((ff = fdp->fd_dt->dt_ff[i]) == NULL)
                        continue;
                kqueue_doclose(kq, (struct klist *)&ff->ff_knlist, i);
        }
        if (fdp->fd_knhashmask != 0) {
                for (i = 0; i < fdp->fd_knhashmask + 1; i++) {
                        kqueue_doclose(kq, &fdp->fd_knhash[i], -1);
                }
        }

        mutex_exit(&fdp->fd_lock);

#if defined(DEBUG)
        mutex_enter(&kq->kq_lock);
        kq_check(kq);
        mutex_exit(&kq->kq_lock);
#endif /* DEBUG */
        KASSERT(TAILQ_EMPTY(&kq->kq_head));
        KASSERT(KQ_COUNT(kq) == 0);
        mutex_destroy(&kq->kq_lock);
        cv_destroy(&kq->kq_cv);
        seldestroy(&kq->kq_sel);
        kmem_free(kq, sizeof(*kq));

        return (0);
}

/*
 * struct fileops kqfilter method for a kqueue descriptor.
 * Event triggered when monitored kqueue changes.
 */
static int
kqueue_kqfilter(file_t *fp, struct knote *kn)
{
        struct kqueue *kq;

        kq = ((file_t *)kn->kn_obj)->f_kqueue;

        KASSERT(fp == kn->kn_obj);

        if (kn->kn_filter != EVFILT_READ)
                return EINVAL;

        kn->kn_fop = &kqread_filtops;
        mutex_enter(&kq->kq_lock);
        selrecord_knote(&kq->kq_sel, kn);
        mutex_exit(&kq->kq_lock);

        return 0;
}


/*
 * Walk down a list of knotes, activating them if their event has
 * triggered.  The caller's object lock (e.g. device driver lock)
 * must be held.
 */
void
knote(struct klist *list, long hint)
{
        struct knote *kn, *tmpkn;

        SLIST_FOREACH_SAFE(kn, list, kn_selnext, tmpkn) {
                /*
                 * We assume here that the backing object's lock is
                 * already held if we're traversing the klist, and
                 * so acquiring the knote foplock would create a
                 * deadlock scenario.  But we also know that the klist
                 * won't disappear on us while we're here, so not
                 * acquiring it is safe.
                 */
                if (filter_event(kn, hint, true)) {
                        knote_activate(kn);
                }
        }
}

/*
 * Remove all knotes referencing a specified fd
 */
void
knote_fdclose(int fd)
{
        struct klist *list;
        struct knote *kn;
        filedesc_t *fdp;

 again:
        fdp = curlwp->l_fd;
        mutex_enter(&fdp->fd_lock);
        list = (struct klist *)&fdp->fd_dt->dt_ff[fd]->ff_knlist;
        while ((kn = SLIST_FIRST(list)) != NULL) {
                if (knote_detach_quiesce(kn)) {
                        goto again;
                }
                knote_detach(kn, fdp, true);
                mutex_enter(&fdp->fd_lock);
        }
        mutex_exit(&fdp->fd_lock);
}

/*
 * Drop knote.  Called with fdp->fd_lock held, and will drop before
 * returning.
 */
static void
knote_detach(struct knote *kn, filedesc_t *fdp, bool dofop)
{
        struct klist *list;
        struct kqueue *kq;

        kq = kn->kn_kq;

        KASSERT((kn->kn_status & KN_MARKER) == 0);
        KASSERT((kn->kn_status & KN_WILLDETACH) != 0);
        KASSERT(kn->kn_fop != NULL);
        KASSERT(mutex_owned(&fdp->fd_lock));

        /* Remove from monitored object. */
        if (dofop) {
                knote_foplock_enter(kn);
                filter_detach(kn);
                knote_foplock_exit(kn);
        }

        /* Remove from descriptor table. */
        if (kn->kn_fop->f_flags & FILTEROP_ISFD)
                list = (struct klist *)&fdp->fd_dt->dt_ff[kn->kn_id]->ff_knlist;
        else
                list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];

        SLIST_REMOVE(list, kn, knote, kn_link);

        /* Remove from kqueue. */
again:
        mutex_spin_enter(&kq->kq_lock);
        KASSERT(kn_in_flux(kn) == false);
        if ((kn->kn_status & KN_QUEUED) != 0) {
                kq_check(kq);
                KASSERT(KQ_COUNT(kq) != 0);
                kq->kq_count--;
                TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
                kn->kn_status &= ~KN_QUEUED;
                kq_check(kq);
        } else if (kn->kn_status & KN_BUSY) {
                mutex_spin_exit(&kq->kq_lock);
                goto again;
        }
        mutex_spin_exit(&kq->kq_lock);

        mutex_exit(&fdp->fd_lock);
        if (kn->kn_fop->f_flags & FILTEROP_ISFD)
                fd_putfile(kn->kn_id);
        atomic_dec_uint(&kn->kn_kfilter->refcnt);
        knote_free(kn);
}

/*
 * Queue new event for knote.
 */
static void
knote_enqueue(struct knote *kn)
{
        struct kqueue *kq;

        KASSERT((kn->kn_status & KN_MARKER) == 0);

        kq = kn->kn_kq;

        mutex_spin_enter(&kq->kq_lock);
        if (__predict_false(kn->kn_status & KN_WILLDETACH)) {
                /* Don't bother enqueueing a dying knote. */
                goto out;
        }
        if ((kn->kn_status & KN_DISABLED) != 0) {
                kn->kn_status &= ~KN_DISABLED;
        }
        if ((kn->kn_status & (KN_ACTIVE | KN_QUEUED)) == KN_ACTIVE) {
                kq_check(kq);
                kn->kn_status |= KN_QUEUED;
                TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
                KASSERT(KQ_COUNT(kq) < KQ_MAXCOUNT);
                kq->kq_count++;
                kq_check(kq);
                cv_broadcast(&kq->kq_cv);
                selnotify(&kq->kq_sel, 0, NOTE_SUBMIT);
        }
 out:
        mutex_spin_exit(&kq->kq_lock);
}
/*
 * Queue new event for knote.
 */
static void
knote_activate_locked(struct knote *kn)
{
        struct kqueue *kq;

        KASSERT((kn->kn_status & KN_MARKER) == 0);

        kq = kn->kn_kq;

        if (__predict_false(kn->kn_status & KN_WILLDETACH)) {
                /* Don't bother enqueueing a dying knote. */
                return;
        }
        kn->kn_status |= KN_ACTIVE;
        if ((kn->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) {
                kq_check(kq);
                kn->kn_status |= KN_QUEUED;
                TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
                KASSERT(KQ_COUNT(kq) < KQ_MAXCOUNT);
                kq->kq_count++;
                kq_check(kq);
                cv_broadcast(&kq->kq_cv);
                selnotify(&kq->kq_sel, 0, NOTE_SUBMIT);
        }
}

static void
knote_activate(struct knote *kn)
{
        struct kqueue *kq = kn->kn_kq;

        mutex_spin_enter(&kq->kq_lock);
        knote_activate_locked(kn);
        mutex_spin_exit(&kq->kq_lock);
}

static void
knote_deactivate_locked(struct knote *kn)
{
        struct kqueue *kq = kn->kn_kq;

        if (kn->kn_status & KN_QUEUED) {
                kq_check(kq);
                kn->kn_status &= ~KN_QUEUED;
                TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
                KASSERT(KQ_COUNT(kq) > 0);
                kq->kq_count--;
                kq_check(kq);
        }
        kn->kn_status &= ~KN_ACTIVE;
}

/*
 * Set EV_EOF on the specified knote.  Also allows additional
 * EV_* flags to be set (e.g. EV_ONESHOT).
 */
void
knote_set_eof(struct knote *kn, uint32_t flags)
{
        struct kqueue *kq = kn->kn_kq;

        mutex_spin_enter(&kq->kq_lock);
        kn->kn_flags |= EV_EOF | flags;
        mutex_spin_exit(&kq->kq_lock);
}

/*
 * Clear EV_EOF on the specified knote.
 */
void
knote_clear_eof(struct knote *kn)
{
        struct kqueue *kq = kn->kn_kq;

        mutex_spin_enter(&kq->kq_lock);
        kn->kn_flags &= ~EV_EOF;
        mutex_spin_exit(&kq->kq_lock);
}

/*
 * Initialize a klist.
 */
void
klist_init(struct klist *list)
{
        SLIST_INIT(list);
}

/*
 * Finalize a klist.
 */
void
klist_fini(struct klist *list)
{
        struct knote *kn;

        /*
         * Neuter all existing knotes on the klist because the list is
         * being destroyed.  The caller has guaranteed that no additional
         * knotes will be added to the list, that the backing object's
         * locks are not held (otherwise there is a locking order issue
         * with acquiring the knote foplock ), and that we can traverse
         * the list safely in this state.
         */
        SLIST_FOREACH(kn, list, kn_selnext) {
                knote_foplock_enter(kn);
                KASSERT(kn->kn_fop != NULL);
                if (kn->kn_fop->f_flags & FILTEROP_ISFD) {
                        kn->kn_fop = &nop_fd_filtops;
                } else {
                        kn->kn_fop = &nop_filtops;
                }
                knote_foplock_exit(kn);
        }
}

/*
 * Insert a knote into a klist.
 */
void
klist_insert(struct klist *list, struct knote *kn)
{
        SLIST_INSERT_HEAD(list, kn, kn_selnext);
}

/*
 * Remove a knote from a klist.  Returns true if the last
 * knote was removed and the list is now empty.
 */
bool
klist_remove(struct klist *list, struct knote *kn)
{
        SLIST_REMOVE(list, kn, knote, kn_selnext);
        return SLIST_EMPTY(list);
}






































   31 










   28 















   29 
   28 

   28 







   29 









   31 


   29 












   30 


   28 



   29 
   29 

   29 

   29 




1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/* $NetBSD: prop_stack.c,v 1.3 2019/05/08 02:25:50 thorpej Exp $ */

/*-
 * Copyright (c) 2007 Joerg Sonnenberger <joerg@NetBSD.org>.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include "prop_object_impl.h"
#include "prop_stack.h"

void
_prop_stack_init(prop_stack_t stack)
{
        stack->used_intern_elems = 0;
        SLIST_INIT(&stack->extern_elems);
}

bool
_prop_stack_push(prop_stack_t stack, prop_object_t obj, void *data1,
    void *data2, void *data3)
{
        struct _prop_stack_extern_elem *eelem;
        struct _prop_stack_intern_elem *ielem;

        if (stack->used_intern_elems == PROP_STACK_INTERN_ELEMS) {
                eelem = _PROP_MALLOC(sizeof(*eelem), M_TEMP);

                if (eelem == NULL)
                        return false;

                eelem->object = obj;
                eelem->object_data[0] = data1;
                eelem->object_data[1] = data2;
                eelem->object_data[2] = data3;

                SLIST_INSERT_HEAD(&stack->extern_elems, eelem, stack_link);

                return true;
        }

        _PROP_ASSERT(stack->used_intern_elems < PROP_STACK_INTERN_ELEMS);
        _PROP_ASSERT(SLIST_EMPTY(&stack->extern_elems));

        ielem = &stack->intern_elems[stack->used_intern_elems];
        ielem->object = obj;
        ielem->object_data[0] = data1;
        ielem->object_data[1] = data2;
        ielem->object_data[2] = data3;

        ++stack->used_intern_elems;

        return true;
}

bool
_prop_stack_pop(prop_stack_t stack, prop_object_t *obj, void **data1,
    void **data2, void **data3)
{
        struct _prop_stack_extern_elem *eelem;
        struct _prop_stack_intern_elem *ielem;

        if (stack->used_intern_elems == 0)
                return false;

        if ((eelem = SLIST_FIRST(&stack->extern_elems)) != NULL) {
                _PROP_ASSERT(stack->used_intern_elems == PROP_STACK_INTERN_ELEMS);

                SLIST_REMOVE_HEAD(&stack->extern_elems, stack_link);
                if (obj)
                        *obj = eelem->object;
                if (data1)
                        *data1 = eelem->object_data[0];
                if (data2)
                        *data2 = eelem->object_data[1];
                if (data3)
                        *data3 = eelem->object_data[2];
                _PROP_FREE(eelem, M_TEMP);
                return true;
        }

        --stack->used_intern_elems;
        ielem = &stack->intern_elems[stack->used_intern_elems];

        if (obj)
                *obj = ielem->object;
        if (data1)
                *data1 = ielem->object_data[0];
        if (data2)
                *data2 = ielem->object_data[1];
        if (data3)
                *data3 = ielem->object_data[2];

        return true;
}
































































































































































































































































































































































































































































    4 
    4 
    4 


    4 
    4 

    4 

    4 
    4 
    4 


    4 







   20 
   21 





   21 








   94 
   94 
   94 


   94 
   94 









 1185 
 1185 









   94 
   95 

   95 
   94 

   94 
   94 
   95 
    2 

    2 

   95 






  150 
  149 








    8 
    8 



















 1003 

 1003 

  982 









 1293 


 1293 



















































































































   95 
   95 

   95 


   94 
   94 

    4 
    4 

   82 
   82 


















   95 

   94 















   95 


   95 

   95 

   95 




   94 
















 1249 




 1248 









































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
/*        $NetBSD: uvm_pdpolicy_clock.c,v 1.40 2022/04/12 20:27:56 andvar Exp $        */
/*        NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $        */

/*-
 * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1997 Charles D. Cranor and Washington University.
 * Copyright (c) 1991, 1993, The Regents of the University of California.
 *
 * All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * The Mach Operating System project at Carnegie-Mellon University.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *        @(#)vm_pageout.c        8.5 (Berkeley) 2/14/94
 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
 *
 *
 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
 * All rights reserved.
 *
 * Permission to use, copy, modify and distribute this software and
 * its documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie the
 * rights to redistribute these changes.
 */

#if defined(PDSIM)

#include "pdsim.h"

#else /* defined(PDSIM) */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.40 2022/04/12 20:27:56 andvar Exp $");

#include <sys/param.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/kmem.h>
#include <sys/atomic.h>

#include <uvm/uvm.h>
#include <uvm/uvm_pdpolicy.h>
#include <uvm/uvm_pdpolicy_impl.h>
#include <uvm/uvm_stat.h>

#endif /* defined(PDSIM) */

/*
 * per-CPU queue of pending page status changes.  128 entries makes for a
 * 1kB queue on _LP64 and has been found to be a reasonable compromise that
 * keeps lock contention events and wait times low, while not using too much
 * memory nor allowing global state to fall too far behind.
 */
#if !defined(CLOCK_PDQ_SIZE)
#define        CLOCK_PDQ_SIZE        128
#endif /* !defined(CLOCK_PDQ_SIZE) */

#define PQ_INACTIVE        0x00000010        /* page is in inactive list */
#define PQ_ACTIVE        0x00000020        /* page is in active list */

#if !defined(CLOCK_INACTIVEPCT)
#define        CLOCK_INACTIVEPCT        33
#endif /* !defined(CLOCK_INACTIVEPCT) */

struct uvmpdpol_globalstate {
        kmutex_t lock;                        /* lock on state */
                                        /* <= compiler pads here */
        struct pglist s_activeq                /* allocated pages, in use */
            __aligned(COHERENCY_UNIT);
        struct pglist s_inactiveq;        /* pages between the clock hands */
        int s_active;
        int s_inactive;
        int s_inactarg;
        struct uvm_pctparam s_anonmin;
        struct uvm_pctparam s_filemin;
        struct uvm_pctparam s_execmin;
        struct uvm_pctparam s_anonmax;
        struct uvm_pctparam s_filemax;
        struct uvm_pctparam s_execmax;
        struct uvm_pctparam s_inactivepct;
};

struct uvmpdpol_scanstate {
        bool ss_anonreact, ss_filereact, ss_execreact;
        struct vm_page ss_marker;
};

static void        uvmpdpol_pageactivate_locked(struct vm_page *);
static void        uvmpdpol_pagedeactivate_locked(struct vm_page *);
static void        uvmpdpol_pagedequeue_locked(struct vm_page *);
static bool        uvmpdpol_pagerealize_locked(struct vm_page *);
static struct uvm_cpu *uvmpdpol_flush(void);

static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned;
static struct uvmpdpol_scanstate pdpol_scanstate;

PDPOL_EVCNT_DEFINE(reactexec)
PDPOL_EVCNT_DEFINE(reactfile)
PDPOL_EVCNT_DEFINE(reactanon)

static void
clock_tune(void)
{
        struct uvmpdpol_globalstate *s = &pdpol_state;

        s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct,
            s->s_active + s->s_inactive);
        if (s->s_inactarg <= uvmexp.freetarg) {
                s->s_inactarg = uvmexp.freetarg + 1;
        }
}

void
uvmpdpol_scaninit(void)
{
        struct uvmpdpol_globalstate *s = &pdpol_state;
        struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
        int t;
        bool anonunder, fileunder, execunder;
        bool anonover, fileover, execover;
        bool anonreact, filereact, execreact;
        int64_t freepg, anonpg, filepg, execpg;

        /*
         * decide which types of pages we want to reactivate instead of freeing
         * to keep usage within the minimum and maximum usage limits.
         * uvm_availmem() will sync the counters.
         */

        freepg = uvm_availmem(false);
        anonpg = cpu_count_get(CPU_COUNT_ANONCLEAN) +
            cpu_count_get(CPU_COUNT_ANONDIRTY) +
            cpu_count_get(CPU_COUNT_ANONUNKNOWN);
        execpg = cpu_count_get(CPU_COUNT_EXECPAGES);
        filepg = cpu_count_get(CPU_COUNT_FILECLEAN) +
            cpu_count_get(CPU_COUNT_FILEDIRTY) +
            cpu_count_get(CPU_COUNT_FILEUNKNOWN) -
            execpg;

        mutex_enter(&s->lock);
        t = s->s_active + s->s_inactive + freepg;
        anonunder = anonpg <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t);
        fileunder = filepg <= UVM_PCTPARAM_APPLY(&s->s_filemin, t);
        execunder = execpg <= UVM_PCTPARAM_APPLY(&s->s_execmin, t);
        anonover = anonpg > UVM_PCTPARAM_APPLY(&s->s_anonmax, t);
        fileover = filepg > UVM_PCTPARAM_APPLY(&s->s_filemax, t);
        execover = execpg > UVM_PCTPARAM_APPLY(&s->s_execmax, t);
        anonreact = anonunder || (!anonover && (fileover || execover));
        filereact = fileunder || (!fileover && (anonover || execover));
        execreact = execunder || (!execover && (anonover || fileover));
        if (filereact && execreact && (anonreact || uvm_swapisfull())) {
                anonreact = filereact = execreact = false;
        }
        ss->ss_anonreact = anonreact;
        ss->ss_filereact = filereact;
        ss->ss_execreact = execreact;
        memset(&ss->ss_marker, 0, sizeof(ss->ss_marker));
        ss->ss_marker.flags = PG_MARKER;
        TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
        mutex_exit(&s->lock);
}

void
uvmpdpol_scanfini(void)
{
        struct uvmpdpol_globalstate *s = &pdpol_state;
        struct uvmpdpol_scanstate *ss = &pdpol_scanstate;

        mutex_enter(&s->lock);
        TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
        mutex_exit(&s->lock);
}

struct vm_page *
uvmpdpol_selectvictim(krwlock_t **plock)
{
        struct uvmpdpol_globalstate *s = &pdpol_state;
        struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
        struct vm_page *pg;
        krwlock_t *lock;

        mutex_enter(&s->lock);
        while (/* CONSTCOND */ 1) {
                struct vm_anon *anon;
                struct uvm_object *uobj;

                pg = TAILQ_NEXT(&ss->ss_marker, pdqueue);
                if (pg == NULL) {
                        break;
                }
                KASSERT((pg->flags & PG_MARKER) == 0);
                uvmexp.pdscans++;

                /*
                 * acquire interlock to stabilize page identity.
                 * if we have caught the page in a state of flux
                 * deal with it and retry.
                 */
                mutex_enter(&pg->interlock);
                if (uvmpdpol_pagerealize_locked(pg)) {
                        mutex_exit(&pg->interlock);
                        continue;
                }

                /*
                 * now prepare to move on to the next page.
                 */
                TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker,
                    pdqueue);
                TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg,
                    &ss->ss_marker, pdqueue);

                /*
                 * enforce the minimum thresholds on different
                 * types of memory usage.  if reusing the current
                 * page would reduce that type of usage below its
                 * minimum, reactivate the page instead and move
                 * on to the next page.
                 */
                anon = pg->uanon;
                uobj = pg->uobject;
                if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) {
                        uvmpdpol_pageactivate_locked(pg);
                        mutex_exit(&pg->interlock);
                        PDPOL_EVCNT_INCR(reactexec);
                        continue;
                }
                if (uobj && UVM_OBJ_IS_VNODE(uobj) &&
                    !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) {
                        uvmpdpol_pageactivate_locked(pg);
                        mutex_exit(&pg->interlock);
                        PDPOL_EVCNT_INCR(reactfile);
                        continue;
                }
                if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) {
                        uvmpdpol_pageactivate_locked(pg);
                        mutex_exit(&pg->interlock);
                        PDPOL_EVCNT_INCR(reactanon);
                        continue;
                }

                /*
                 * try to lock the object that owns the page.
                 *
                 * with the page interlock held, we can drop s->lock, which
                 * could otherwise serve as a barrier to us getting the
                 * object locked, because the owner of the object's lock may
                 * be blocked on s->lock (i.e. a deadlock).
                 *
                 * whatever happens, uvmpd_trylockowner() will release the
                 * interlock.  with the interlock dropped we can then
                 * re-acquire our own lock.  the order is:
                 *
                 *        object -> pdpol -> interlock.
                 */
                mutex_exit(&s->lock);
                lock = uvmpd_trylockowner(pg);
                /* pg->interlock now released */
                mutex_enter(&s->lock);
                if (lock == NULL) {
                        /* didn't get it - try the next page. */
                        continue;
                }

                /*
                 * move referenced pages back to active queue and skip to
                 * next page.
                 */
                if (pmap_is_referenced(pg)) {
                        mutex_enter(&pg->interlock);
                        uvmpdpol_pageactivate_locked(pg);
                        mutex_exit(&pg->interlock);
                        uvmexp.pdreact++;
                        rw_exit(lock);
                        continue;
                }

                /* we have a potential victim. */
                *plock = lock;
                break;
        }
        mutex_exit(&s->lock);
        return pg;
}

void
uvmpdpol_balancequeue(int swap_shortage)
{
        struct uvmpdpol_globalstate *s = &pdpol_state;
        int inactive_shortage;
        struct vm_page *p, marker;
        krwlock_t *lock;

        /*
         * we have done the scan to get free pages.   now we work on meeting
         * our inactive target.
         */

        memset(&marker, 0, sizeof(marker));
        marker.flags = PG_MARKER;

        mutex_enter(&s->lock);
        TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue);
        for (;;) {
                inactive_shortage =
                    pdpol_state.s_inactarg - pdpol_state.s_inactive;
                if (inactive_shortage <= 0 && swap_shortage <= 0) {
                        break;
                }
                p = TAILQ_NEXT(&marker, pdqueue);
                if (p == NULL) {
                        break;
                }
                KASSERT((p->flags & PG_MARKER) == 0);

                /*
                 * acquire interlock to stabilize page identity.
                 * if we have caught the page in a state of flux
                 * deal with it and retry.
                 */
                mutex_enter(&p->interlock);
                if (uvmpdpol_pagerealize_locked(p)) {
                        mutex_exit(&p->interlock);
                        continue;
                }

                /*
                 * now prepare to move on to the next page.
                 */
                TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
                TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker,
                    pdqueue);

                /*
                 * try to lock the object that owns the page.  see comments
                 * in uvmpdol_selectvictim().
                 */
                mutex_exit(&s->lock);
                lock = uvmpd_trylockowner(p);
                /* p->interlock now released */
                mutex_enter(&s->lock);
                if (lock == NULL) {
                        /* didn't get it - try the next page. */
                        continue;
                }

                /*
                 * if there's a shortage of swap slots, try to free it.
                 */
                if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 &&
                    (p->flags & PG_BUSY) == 0) {
                        if (uvmpd_dropswap(p)) {
                                swap_shortage--;
                        }
                }

                /*
                 * if there's a shortage of inactive pages, deactivate.
                 */
                if (inactive_shortage > 0) {
                        pmap_clear_reference(p);
                        mutex_enter(&p->interlock);
                        uvmpdpol_pagedeactivate_locked(p);
                        mutex_exit(&p->interlock);
                        uvmexp.pddeact++;
                        inactive_shortage--;
                }
                rw_exit(lock);
        }
        TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
        mutex_exit(&s->lock);
}

static void
uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
{
        struct uvmpdpol_globalstate *s __diagused = &pdpol_state;

        KASSERT(mutex_owned(&s->lock));
        KASSERT(mutex_owned(&pg->interlock));
        KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
            (PQ_INTENT_D | PQ_INTENT_SET));

        if (pg->pqflags & PQ_ACTIVE) {
                TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
                KASSERT(pdpol_state.s_active > 0);
                pdpol_state.s_active--;
        }
        if ((pg->pqflags & PQ_INACTIVE) == 0) {
                KASSERT(pg->wire_count == 0);
                TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue);
                pdpol_state.s_inactive++;
        }
        pg->pqflags &= ~(PQ_ACTIVE | PQ_INTENT_SET);
        pg->pqflags |= PQ_INACTIVE;
}

void
uvmpdpol_pagedeactivate(struct vm_page *pg)
{

        KASSERT(uvm_page_owner_locked_p(pg, false));
        KASSERT(mutex_owned(&pg->interlock));

        /*
         * we have to clear the reference bit now, as when it comes time to
         * realize the intent we won't have the object locked any more.
         */
        pmap_clear_reference(pg);
        uvmpdpol_set_intent(pg, PQ_INTENT_I);
}

static void
uvmpdpol_pageactivate_locked(struct vm_page *pg)
{
        struct uvmpdpol_globalstate *s __diagused = &pdpol_state;

        KASSERT(mutex_owned(&s->lock));
        KASSERT(mutex_owned(&pg->interlock));
        KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
            (PQ_INTENT_D | PQ_INTENT_SET));

        uvmpdpol_pagedequeue_locked(pg);
        TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue);
        pdpol_state.s_active++;
        pg->pqflags &= ~(PQ_INACTIVE | PQ_INTENT_SET);
        pg->pqflags |= PQ_ACTIVE;
}

void
uvmpdpol_pageactivate(struct vm_page *pg)
{

        KASSERT(uvm_page_owner_locked_p(pg, false));
        KASSERT(mutex_owned(&pg->interlock));

        uvmpdpol_set_intent(pg, PQ_INTENT_A);
}

static void
uvmpdpol_pagedequeue_locked(struct vm_page *pg)
{
        struct uvmpdpol_globalstate *s __diagused = &pdpol_state;

        KASSERT(mutex_owned(&s->lock));
        KASSERT(mutex_owned(&pg->interlock));

        if (pg->pqflags & PQ_ACTIVE) {
                TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
                KASSERT((pg->pqflags & PQ_INACTIVE) == 0);
                KASSERT(pdpol_state.s_active > 0);
                pdpol_state.s_active--;
        } else if (pg->pqflags & PQ_INACTIVE) {
                TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue);
                KASSERT(pdpol_state.s_inactive > 0);
                pdpol_state.s_inactive--;
        }
        pg->pqflags &= ~(PQ_ACTIVE | PQ_INACTIVE | PQ_INTENT_SET);
}

void
uvmpdpol_pagedequeue(struct vm_page *pg)
{

        KASSERT(uvm_page_owner_locked_p(pg, true));
        KASSERT(mutex_owned(&pg->interlock));

        uvmpdpol_set_intent(pg, PQ_INTENT_D);
}

void
uvmpdpol_pageenqueue(struct vm_page *pg)
{

        KASSERT(uvm_page_owner_locked_p(pg, false));
        KASSERT(mutex_owned(&pg->interlock));

        uvmpdpol_set_intent(pg, PQ_INTENT_E);
}

void
uvmpdpol_anfree(struct vm_anon *an)
{
}

bool
uvmpdpol_pageisqueued_p(struct vm_page *pg)
{
        uint32_t pqflags;

        /*
         * if there's an intent set, we have to consider it.  otherwise,
         * return the actual state.  we may be called unlocked for the
         * purpose of assertions, which is safe due to the page lifecycle.
         */
        pqflags = atomic_load_relaxed(&pg->pqflags);
        if ((pqflags & PQ_INTENT_SET) != 0) {
                return (pqflags & PQ_INTENT_MASK) != PQ_INTENT_D;
        } else {
                return (pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0;
        }
}

bool
uvmpdpol_pageactivate_p(struct vm_page *pg)
{
        uint32_t pqflags;

        /* consider intent in preference to actual state. */
        pqflags = atomic_load_relaxed(&pg->pqflags);
        if ((pqflags & PQ_INTENT_SET) != 0) {
                pqflags &= PQ_INTENT_MASK;
                return pqflags != PQ_INTENT_A && pqflags != PQ_INTENT_E;
        } else {
                /*
                 * TODO: Enabling this may be too much of a big hammer,
                 * since we do get useful information from activations.
                 * Think about it more and maybe come up with a heuristic
                 * or something.
                 *
                 * return (pqflags & PQ_ACTIVE) == 0;
                 */
                return true;
        }
}

void
uvmpdpol_estimatepageable(int *active, int *inactive)
{
        struct uvmpdpol_globalstate *s = &pdpol_state;

        /*
         * Don't take any locks here.  This can be called from DDB, and in
         * any case the numbers are stale the instant the lock is dropped,
         * so it just doesn't matter.
         */
        if (active) {
                *active = s->s_active;
        }
        if (inactive) {
                *inactive = s->s_inactive;
        }
}

#if !defined(PDSIM)
static int
min_check(struct uvm_pctparam *pct, int t)
{
        struct uvmpdpol_globalstate *s = &pdpol_state;
        int total = t;

        if (pct != &s->s_anonmin) {
                total += uvm_pctparam_get(&s->s_anonmin);
        }
        if (pct != &s->s_filemin) {
                total += uvm_pctparam_get(&s->s_filemin);
        }
        if (pct != &s->s_execmin) {
                total += uvm_pctparam_get(&s->s_execmin);
        }
        if (total > 95) {
                return EINVAL;
        }
        return 0;
}
#endif /* !defined(PDSIM) */

void
uvmpdpol_init(void)
{
        struct uvmpdpol_globalstate *s = &pdpol_state;

        mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE);
        TAILQ_INIT(&s->s_activeq);
        TAILQ_INIT(&s->s_inactiveq);
        uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL);
        uvm_pctparam_init(&s->s_anonmin, 10, min_check);
        uvm_pctparam_init(&s->s_filemin, 10, min_check);
        uvm_pctparam_init(&s->s_execmin,  5, min_check);
        uvm_pctparam_init(&s->s_anonmax, 80, NULL);
        uvm_pctparam_init(&s->s_filemax, 50, NULL);
        uvm_pctparam_init(&s->s_execmax, 30, NULL);
}

void
uvmpdpol_init_cpu(struct uvm_cpu *ucpu)
{

        ucpu->pdq =
            kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP);
        ucpu->pdqhead = CLOCK_PDQ_SIZE;
        ucpu->pdqtail = CLOCK_PDQ_SIZE;
}

void
uvmpdpol_reinit(void)
{
}

bool
uvmpdpol_needsscan_p(void)
{

        /*
         * this must be an unlocked check: can be called from interrupt.
         */
        return pdpol_state.s_inactive < pdpol_state.s_inactarg;
}

void
uvmpdpol_tune(void)
{
        struct uvmpdpol_globalstate *s = &pdpol_state;

        mutex_enter(&s->lock);
        clock_tune();
        mutex_exit(&s->lock);
}

/*
 * uvmpdpol_pagerealize_locked: take the intended state set on a page and
 * make it real.  return true if any work was done.
 */
static bool
uvmpdpol_pagerealize_locked(struct vm_page *pg)
{
        struct uvmpdpol_globalstate *s __diagused = &pdpol_state;

        KASSERT(mutex_owned(&s->lock));
        KASSERT(mutex_owned(&pg->interlock));

        switch (pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) {
        case PQ_INTENT_A | PQ_INTENT_SET:
        case PQ_INTENT_E | PQ_INTENT_SET:
                uvmpdpol_pageactivate_locked(pg);
                return true;
        case PQ_INTENT_I | PQ_INTENT_SET:
                uvmpdpol_pagedeactivate_locked(pg);
                return true;
        case PQ_INTENT_D | PQ_INTENT_SET:
                uvmpdpol_pagedequeue_locked(pg);
                return true;
        default:
                return false;
        }
}

/*
 * uvmpdpol_flush: return the current uvm_cpu with all of its pending
 * updates flushed to the global queues.  this routine may block, and
 * so can switch cpu.  the idea is to empty to queue on whatever cpu
 * we finally end up on.
 */
static struct uvm_cpu *
uvmpdpol_flush(void)
{
        struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
        struct uvm_cpu *ucpu;
        struct vm_page *pg;

        KASSERT(kpreempt_disabled());

        mutex_enter(&s->lock);
        for (;;) {
                /*
                 * prefer scanning forwards (even though mutex_enter() is
                 * serializing) so as to not defeat any prefetch logic in
                 * the CPU.  that means elsewhere enqueuing backwards, like
                 * a stack, but not so important there as pages are being
                 * added singularly.
                 *
                 * prefetch the next "struct vm_page" while working on the
                 * current one.  this has a measurable and very positive
                 * effect in reducing the amount of time spent here under
                 * the global lock.
                 */
                ucpu = curcpu()->ci_data.cpu_uvm;
                KASSERT(ucpu->pdqhead <= ucpu->pdqtail);
                if (__predict_false(ucpu->pdqhead == ucpu->pdqtail)) {
                        break;
                }
                pg = ucpu->pdq[ucpu->pdqhead++];
                if (__predict_true(ucpu->pdqhead != ucpu->pdqtail)) {
                        __builtin_prefetch(ucpu->pdq[ucpu->pdqhead]);
                }
                mutex_enter(&pg->interlock);
                pg->pqflags &= ~PQ_INTENT_QUEUED;
                (void)uvmpdpol_pagerealize_locked(pg);
                mutex_exit(&pg->interlock);
        }
        mutex_exit(&s->lock);
        return ucpu;
}

/*
 * uvmpdpol_pagerealize: realize any intent set on the page.  in this
 * implementation, that means putting the page on a per-CPU queue to be
 * dealt with later.
 */
void
uvmpdpol_pagerealize(struct vm_page *pg)
{
        struct uvm_cpu *ucpu;

        /*
         * drain the per per-CPU queue if full, then enter the page.
         */
        kpreempt_disable();
        ucpu = curcpu()->ci_data.cpu_uvm;
        if (__predict_false(ucpu->pdqhead == 0)) {
                ucpu = uvmpdpol_flush();
        }
        ucpu->pdq[--(ucpu->pdqhead)] = pg;
        kpreempt_enable();
}

/*
 * uvmpdpol_idle: called from the system idle loop.  periodically purge any
 * pending updates back to the global queues.
 */
void
uvmpdpol_idle(struct uvm_cpu *ucpu)
{
        struct uvmpdpol_globalstate *s = &pdpol_state;
        struct vm_page *pg;

        KASSERT(kpreempt_disabled());

        /*
         * if no pages in the queue, we have nothing to do.
         */
        if (ucpu->pdqhead == ucpu->pdqtail) {
                ucpu->pdqtime = getticks();
                return;
        }

        /*
         * don't do this more than ~8 times a second as it would needlessly
         * exert pressure.
         */
        if (getticks() - ucpu->pdqtime < (hz >> 3)) {
                return;
        }

        /*
         * the idle LWP can't block, so we have to try for the lock.  if we
         * get it, purge the per-CPU pending update queue.  continually
         * check for a pending resched: in that case exit immediately.
         */
        if (mutex_tryenter(&s->lock)) {
                while (ucpu->pdqhead != ucpu->pdqtail) {
                        pg = ucpu->pdq[ucpu->pdqhead];
                        if (!mutex_tryenter(&pg->interlock)) {
                                break;
                        }
                        ucpu->pdqhead++;
                        pg->pqflags &= ~PQ_INTENT_QUEUED;
                        (void)uvmpdpol_pagerealize_locked(pg);
                        mutex_exit(&pg->interlock);
                        if (curcpu()->ci_want_resched) {
                                break;
                        }
                }
                if (ucpu->pdqhead == ucpu->pdqtail) {
                        ucpu->pdqtime = getticks();
                }
                mutex_exit(&s->lock);
        }
}

#if !defined(PDSIM)

#include <sys/sysctl.h>        /* XXX SYSCTL_DESCR */

void
uvmpdpol_sysctlsetup(void)
{
        struct uvmpdpol_globalstate *s = &pdpol_state;

        uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin",
            SYSCTL_DESCR("Percentage of physical memory reserved "
            "for anonymous application data"));
        uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin",
            SYSCTL_DESCR("Percentage of physical memory reserved "
            "for cached file data"));
        uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin",
            SYSCTL_DESCR("Percentage of physical memory reserved "
            "for cached executable data"));

        uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax",
            SYSCTL_DESCR("Percentage of physical memory which will "
            "be reclaimed from other usage for "
            "anonymous application data"));
        uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax",
            SYSCTL_DESCR("Percentage of physical memory which will "
            "be reclaimed from other usage for cached "
            "file data"));
        uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax",
            SYSCTL_DESCR("Percentage of physical memory which will "
            "be reclaimed from other usage for cached "
            "executable data"));

        uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct",
            SYSCTL_DESCR("Percentage of inactive queue of "
            "the entire (active + inactive) queue"));
}

#endif /* !defined(PDSIM) */

#if defined(PDSIM)
void
pdsim_dump(const char *id)
{
#if defined(DEBUG)
        /* XXX */
#endif /* defined(DEBUG) */
}
#endif /* defined(PDSIM) */






















































































































































































   16 

















































    2 










1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
/*-
 * Copyright (c) 2009-2013 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This material is based upon work partially supported by The
 * NetBSD Foundation under a contract with Mindaugas Rasiukevicius.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * NPF main: dynamic load/initialisation and unload routines.
 */

#ifdef _KERNEL
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: npf.c,v 1.44 2020/08/27 18:50:25 riastradh Exp $");

#include <sys/param.h>
#include <sys/types.h>

#include <sys/conf.h>
#include <sys/kmem.h>
#include <sys/percpu.h>
#include <sys/xcall.h>
#endif

#include "npf_impl.h"
#include "npf_conn.h"

static __read_mostly npf_t *        npf_kernel_ctx = NULL;

__dso_public int
npfk_sysinit(unsigned nworkers)
{

        npf_bpf_sysinit();
        npf_tableset_sysinit();
        npf_nat_sysinit();
        npf_portmap_sysinit();
        return npf_worker_sysinit(nworkers);
}

__dso_public void
npfk_sysfini(void)
{

        npf_worker_sysfini();
        npf_portmap_sysfini();
        npf_nat_sysfini();
        npf_tableset_sysfini();
        npf_bpf_sysfini();
}

__dso_public npf_t *
npfk_create(int flags, const npf_mbufops_t *mbufops,
    const npf_ifops_t *ifops, void *arg)
{
        npf_t *npf;

        npf = kmem_zalloc(sizeof(npf_t), KM_SLEEP);
        npf->ebr = npf_ebr_create();
        npf->stats_percpu = percpu_alloc(NPF_STATS_SIZE);
        npf->mbufops = mbufops;
        npf->arg = arg;

        npf_param_init(npf);
        npf_state_sysinit(npf);
        npf_ifmap_init(npf, ifops);
        npf_conn_init(npf);
        npf_portmap_init(npf);
        npf_alg_init(npf);
        npf_ext_init(npf);

        /* Load an empty configuration. */
        npf_config_init(npf);

        if ((flags & NPF_NO_GC) == 0) {
                npf_worker_enlist(npf);
        }
        return npf;
}

__dso_public void
npfk_destroy(npf_t *npf)
{
        npf_worker_discharge(npf);

        /*
         * Destroy the current configuration.  Note: at this point all
         * handlers must be deactivated; we will drain any processing.
         */
        npf_config_fini(npf);

        /* Finally, safe to destroy the subsystems. */
        npf_ext_fini(npf);
        npf_alg_fini(npf);
        npf_portmap_fini(npf);
        npf_conn_fini(npf);
        npf_ifmap_fini(npf);
        npf_state_sysfini(npf);
        npf_param_fini(npf);

        npf_ebr_destroy(npf->ebr);
        percpu_free(npf->stats_percpu, NPF_STATS_SIZE);
        kmem_free(npf, sizeof(npf_t));
}


/*
 * npfk_load: (re)load the configuration.
 *
 * => Will not modify the configuration reference.
 */
__dso_public int
npfk_load(npf_t *npf, const void *config_ref, npf_error_t *err)
{
        const nvlist_t *req = (const nvlist_t *)config_ref;
        nvlist_t *resp;
        int error;

        resp = nvlist_create(0);
        error = npfctl_run_op(npf, IOC_NPF_LOAD, req, resp);
        nvlist_destroy(resp);

        return error;
}

__dso_public void
npfk_gc(npf_t *npf)
{
        npf_conn_worker(npf);
}

__dso_public void
npfk_thread_register(npf_t *npf)
{
        npf_ebr_register(npf->ebr);
}

__dso_public void
npfk_thread_unregister(npf_t *npf)
{
        npf_ebr_full_sync(npf->ebr);
        npf_ebr_unregister(npf->ebr);
}

__dso_public void *
npfk_getarg(npf_t *npf)
{
        return npf->arg;
}

void
npf_setkernctx(npf_t *npf)
{
        npf_kernel_ctx = npf;
}

npf_t *
npf_getkernctx(void)
{
        return npf_kernel_ctx;
}

/*
 * NPF statistics interface.
 */

void
npf_stats_inc(npf_t *npf, npf_stats_t st)
{
        uint64_t *stats = percpu_getref(npf->stats_percpu);
        stats[st]++;
        percpu_putref(npf->stats_percpu);
}

void
npf_stats_dec(npf_t *npf, npf_stats_t st)
{
        uint64_t *stats = percpu_getref(npf->stats_percpu);
        stats[st]--;
        percpu_putref(npf->stats_percpu);
}

static void
npf_stats_collect(void *mem, void *arg, struct cpu_info *ci)
{
        uint64_t *percpu_stats = mem, *full_stats = arg;

        for (unsigned i = 0; i < NPF_STATS_COUNT; i++) {
                full_stats[i] += percpu_stats[i];
        }
}

static void
npf_stats_clear_cb(void *mem, void *arg, struct cpu_info *ci)
{
        uint64_t *percpu_stats = mem;

        for (unsigned i = 0; i < NPF_STATS_COUNT; i++) {
                percpu_stats[i] = 0;
        }
}

/*
 * npf_stats: export collected statistics.
 */

__dso_public void
npfk_stats(npf_t *npf, uint64_t *buf)
{
        memset(buf, 0, NPF_STATS_SIZE);
        percpu_foreach_xcall(npf->stats_percpu, XC_HIGHPRI_IPL(IPL_SOFTNET),
            npf_stats_collect, buf);
}

__dso_public void
npfk_stats_clear(npf_t *npf)
{
        percpu_foreach_xcall(npf->stats_percpu, XC_HIGHPRI_IPL(IPL_SOFTNET),
            npf_stats_clear_cb, NULL);
}



















































































































































































































































































































































































































































































































































































































































    2 


    2 












    2 






































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
/*        $NetBSD: umidi_quirks.c,v 1.22 2019/05/08 13:40:19 isaki Exp $        */

/*
 * Copyright (c) 2001 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Takuya SHIOZAKI (tshiozak@NetBSD.org).
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: umidi_quirks.c,v 1.22 2019/05/08 13:40:19 isaki Exp $");

#ifdef _KERNEL_OPT
#include "opt_usb.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/audioio.h>
#include <sys/device.h>
#include <sys/ioctl.h>
#include <sys/conf.h>
#include <sys/file.h>
#include <sys/select.h>
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/poll.h>

#include <dev/usb/usb.h>
#include <dev/usb/usbdi.h>
#include <dev/usb/usbdi_util.h>

#include <dev/usb/usbdevs.h>
#include <dev/usb/uaudioreg.h>
#include <dev/usb/umidi_quirks.h>

/*
 * quirk codes for UMIDI
 */

#ifdef UMIDIQUIRK_DEBUG
#define DPRINTF(x)        if (umidiquirkdebug) printf x
#define DPRINTFN(n,x)        if (umidiquirkdebug >= (n)) printf x
int        umidiquirkdebug = 1;
#else
#define DPRINTF(x)
#define DPRINTFN(n,x)
#endif


/*
 * YAMAHA UX-256
 *  --- this is a typical yamaha device, but has a broken descriptor :-<
 */

UMQ_FIXED_EP_DATA_DEF(YAMAHA, YAMAHA_UX256, ANYIFACE, 1, 1) = {
        /* out */
        { 0, 16 },
        /* in */
        { 1, 8 }
};
UMQ_FIXED_EP_DEF(YAMAHA, YAMAHA_UX256, ANYIFACE, 1, 1);

UMQ_DEF(YAMAHA, YAMAHA_UX256, ANYIFACE) = {
        UMQ_FIXED_EP_REG(YAMAHA, YAMAHA_UX256, ANYIFACE),
#if 0
        UMQ_YAMAHA_REG(YAMAHA, ANYPRODUCT, ANYIFACE),
#endif
        UMQ_TERMINATOR
};


/*
 * YAMAHA generic
 */
UMQ_DEF(YAMAHA, ANYPRODUCT, ANYIFACE) = {
        UMQ_YAMAHA_REG(YAMAHA, ANYPRODUCT, ANYIFACE),
        UMQ_TERMINATOR
};


/*
 * ROLAND UM-1
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_UM1, 2, 1, 1) = {
        /* out */
        { 0, 1 },
        /* in */
        { 1, 1 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_UM1, 2, 1, 1);

UMQ_DEF(ROLAND, ROLAND_UM1, 2) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_UM1, 2),
        UMQ_TERMINATOR
};

/*
 * ROLAND SC-8850
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_SC8850, 2, 1, 1) = {
        /* out */
        { 0, 6 },
        /* in */
        { 1, 6 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_SC8850, 2, 1, 1);

UMQ_DEF(ROLAND, ROLAND_SC8850, 2) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_SC8850, 2),
        UMQ_TERMINATOR
};

/*
 * ROLAND SD-90
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_SD90, 2, 1, 1) = {
        /* out */
        { 0, 4 },
        /* in */
        { 1, 4 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_SD90, 2, 1, 1);

UMQ_DEF(ROLAND, ROLAND_SD90, 2) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_SD90, 2),
        UMQ_TERMINATOR
};


/*
 * ROLAND UM-880 (native mode)
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_UM880N, 0, 1, 1) = {
        /* out */
        { 0, 9 },
        /* in */
        { 1, 9 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_UM880N, 0, 1, 1);

UMQ_DEF(ROLAND, ROLAND_UM880N, 0) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_UM880N, 0),
        UMQ_TERMINATOR
};

/*
 * ROLAND UA-100
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_UA100, 2, 1, 1) = {
        /* out */
        { 0, 3 },
        /* in */
        { 1, 3 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_UA100, 2, 1, 1);

UMQ_DEF(ROLAND, ROLAND_UA100, 2) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_UA100, 2),
        UMQ_TERMINATOR
};

/*
 * ROLAND UM-4
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_UM4, 2, 1, 1) = {
        /* out */
        { 0, 4 },
        /* in */
        { 1, 4 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_UM4, 2, 1, 1);

UMQ_DEF(ROLAND, ROLAND_UM4, 2) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_UM4, 2),
        UMQ_TERMINATOR
};

/*
 * ROLAND U-8
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_U8, 2, 1, 1) = {
        /* out */
        { 0, 2 },
        /* in */
        { 1, 2 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_U8, 2, 1, 1);

UMQ_DEF(ROLAND, ROLAND_U8, 2) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_U8, 2),
        UMQ_TERMINATOR
};

/*
 * ROLAND UM-2
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_UM2, 2, 1, 1) = {
        /* out */
        { 0, 2 },
        /* in */
        { 1, 2 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_UM2, 2, 1, 1);

UMQ_DEF(ROLAND, ROLAND_UM2, 2) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_UM2, 2),
        UMQ_TERMINATOR
};

/*
 * ROLAND SC-8820
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_SC8820, 2, 1, 1) = {
        /* out */
        { 0, 5 }, /* cables 0, 1, 4 only */
        /* in */
        { 1, 5 } /* do. */
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_SC8820, 2, 1, 1);

UMQ_DEF(ROLAND, ROLAND_SC8820, 2) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_SC8820, 2),
        UMQ_TERMINATOR
};

/*
 * ROLAND PC-300
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_PC300, 2, 1, 1) = {
        /* out */
        { 0, 1 },
        /* in */
        { 1, 1 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_PC300, 2, 1, 1);

UMQ_DEF(ROLAND, ROLAND_PC300, 2) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_PC300, 2),
        UMQ_TERMINATOR
};

/*
 * ROLAND SK-500
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_SK500, 2, 1, 1) = {
        /* out */
        { 0, 5 }, /* cables 0, 1, 4 only */
        /* in */
        { 1, 5 } /* do. */
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_SK500, 2, 1, 1);

UMQ_DEF(ROLAND, ROLAND_SK500, 2) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_SK500, 2),
        UMQ_TERMINATOR
};

/*
 * ROLAND SC-D70
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_SCD70, 2, 1, 1) = {
        /* out */
        { 0, 3 },
        /* in */
        { 1, 3 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_SCD70, 2, 1, 1);

UMQ_DEF(ROLAND, ROLAND_SCD70, 2) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_SCD70, 2),
        UMQ_TERMINATOR
};

/*
 * ROLAND XV-5050
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_XV5050, 0, 1, 1) = {
        /* out */
        { 0, 1 },
        /* in */
        { 1, 1 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_XV5050, 0, 1, 1);

UMQ_DEF(ROLAND, ROLAND_XV5050, 0) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_XV5050, 0),
        UMQ_TERMINATOR
};

/*
 * ROLAND UM-550
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_UM550, 0, 1, 1) = {
        /* out */
        { 0, 6 },
        /* in */
        { 1, 6 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_UM550, 0, 1, 1);

UMQ_DEF(ROLAND, ROLAND_UM550, 0) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_UM550, 0),
        UMQ_TERMINATOR
};

/*
 * ROLAND SD-20
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_SD20, 0, 1, 1) = {
        /* out */
        { 0, 2 },
        /* in */
        { 1, 3 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_SD20, 0, 1, 1);

UMQ_DEF(ROLAND, ROLAND_SD20, 0) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_SD20, 0),
        UMQ_TERMINATOR
};

/*
 * ROLAND SD-80
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_SD80, 0, 1, 1) = {
        /* out */
        { 0, 4 },
        /* in */
        { 1, 4 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_SD80, 0, 1, 1);

UMQ_DEF(ROLAND, ROLAND_SD80, 0) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_SD80, 0),
        UMQ_TERMINATOR
};

/*
 * ROLAND UA-700
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_UA700, 3, 1, 1) = {
        /* out */
        { 0, 2 },
        /* in */
        { 1, 2 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_UA700, 3, 1, 1);

UMQ_DEF(ROLAND, ROLAND_UA700, 3) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_UA700, 3),
        UMQ_TERMINATOR
};

/*
 * ROLAND UA-1000
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_UA1000, 3, 1, 1) = {
        /* out */
        { 0, 2 },
        /* in */
        { 1, 2 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_UA1000, 3, 1, 1);

UMQ_DEF(ROLAND, ROLAND_UA1000, 3) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_UA1000, 3),
        UMQ_TERMINATOR
};

/*
 * ROLAND UA-101
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_UA101, 2, 1, 1) = {
        /* out */
        { 0, 2 },
        /* in */
        { 1, 2 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_UA101, 2, 1, 1);

UMQ_DEF(ROLAND, ROLAND_UA101, 2) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_UA101, 2),
        UMQ_TERMINATOR
};

UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_UA101F, 2, 1, 1) = {
        /* out */
        { 0, 2 },
        /* in */
        { 1, 2 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_UA101F, 2, 1, 1);

UMQ_DEF(ROLAND, ROLAND_UA101F, 2) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_UA101F, 2),
        UMQ_TERMINATOR
};

/*
 * ROLAND Fantom-X
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_FANTOMX, 0, 1, 1) = {
        /* out */
        { 0, 1 },
        /* in */
        { 1, 1 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_FANTOMX, 0, 1, 1);

UMQ_DEF(ROLAND, ROLAND_FANTOMX, 0) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_FANTOMX, 0),
        UMQ_TERMINATOR
};

/*
 * ROLAND PCR
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_PCR, 0, 1, 1) = {
        /* out */
        { 0, 3 },
        /* in */
        { 1, 3 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_PCR, 0, 1, 1);

UMQ_DEF(ROLAND, ROLAND_PCR, 0) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_PCR, 0),
        UMQ_TERMINATOR
};

/*
 * ROLAND UM-3EX
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_UM3, 0, 1, 1) = {
        /* out */
        { 0, 3 },
        /* in */
        { 1, 3 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_UM3, 0, 1, 1);

UMQ_DEF(ROLAND, ROLAND_UM3, 0) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_UM3, 0),
        UMQ_TERMINATOR
};

/*
 * ROLAND UA-25
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_UA25, 2, 1, 1) = {
        /* out */
        { 0, 1 },
        /* in */
        { 1, 1 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_UA25, 2, 1, 1);

UMQ_DEF(ROLAND, ROLAND_UA25, 2) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_UA25, 2),
        UMQ_TERMINATOR
};

/*
 * ROLAND UA-4FX
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_UA4FX, 2, 1, 1) = {
        /* out */
        { 0, 1 },
        /* in */
        { 1, 1 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_UA4FX, 2, 1, 1);

UMQ_DEF(ROLAND, ROLAND_UA4FX, 2) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_UA4FX, 2),
        UMQ_TERMINATOR
};

/*
 * ROLAND SonicCell
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_SONICCELL, 2, 1, 1) = {
        /* out */
        { 0, 1 },
        /* in */
        { 1, 1 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_SONICCELL, 2, 1, 1);

UMQ_DEF(ROLAND, ROLAND_SONICCELL, 2) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_SONICCELL, 2),
        UMQ_TERMINATOR
};

/*
 * ROLAND UM-ONE
 */
UMQ_FIXED_EP_DATA_DEF(ROLAND, ROLAND_UMONE, ANYIFACE, 1, 1) = {
        /* out */
        { 0, 1 },
        /* in */
        { 1, 1 }
};
UMQ_FIXED_EP_DEF(ROLAND, ROLAND_UMONE, ANYIFACE, 1, 1);

UMQ_DEF(ROLAND, ROLAND_UMONE, ANYIFACE) = {
        UMQ_FIXED_EP_REG(ROLAND, ROLAND_UMONE, ANYIFACE),
        UMQ_TERMINATOR
};

/*
 * Midiman Midisport 2x4. This has 2 physical MIDI IN jacks that are read
 * on endpoint 0x81 (descriptor index 0). It has 4 physical MIDI OUT jacks
 * that can be written on endpoints 2 or 4 (at descriptor index 2 or 4,
 * coincidentally) interchangeably: either endpoint will accept a Cable Number
 * field of 0 to 3, and data for a given CN will be routed to the same
 * physical output regardless of the endpoint used for the transfer. But
 * there's a catch: flow-control feedback only goes to endpoint 2 for
 * CN 0 and 2, and only to endpoint 4 for CN 1 and 3. If you send output at
 * high rates for CN 0 or 2 over endpoint 4, or for CN 1 or 3 over endpoint 2,
 * the USB transfers complete as fast as possible, giving you an apparent data
 * rate much higher than MIDI's 3125 cps (easy to measure using dd to blast a
 * bunch of midi data to the rmidi device). Of course that isn't a way to make
 * MIDI faster, just a way to overrun the device buffer and spray bits on the
 * floor. So this device needs the fixed endpoint quirk, the fixed cable number
 * quirk (to make sure CNs 0 and 2 are put on the first endpoint and 1 and 3
 * on the other), and then the fixed mididev-assignment quirk (to match jacks
 * to mididevs so the rmidi devices match the order of the blinkenlights).
 */
UMQ_FIXED_EP_DATA_DEF(MIDIMAN, MIDIMAN_MIDISPORT2X4, ANYIFACE, 2, 1) = {
        /* out: ep# jacks */
        { 2, 2 },
        { 4, 2 },
        /* in: ep# jacks */
        { 0, 2 }
};
UMQ_FIXED_EP_DEF(MIDIMAN, MIDIMAN_MIDISPORT2X4, ANYIFACE, 2, 1);
UMQ_FIXED_CN_DEF(MIDIMAN, MIDIMAN_MIDISPORT2X4, ANYIFACE) = {
        0, 2, 1, 3, 0, 1
};
UMQ_FIXED_MD_DEF(MIDIMAN, MIDIMAN_MIDISPORT2X4, ANYIFACE) = {
         0, 0, 2, 1, 1, -1, 3, -1
};
UMQ_DEF(MIDIMAN, MIDIMAN_MIDISPORT2X4, ANYIFACE) = {
        UMQ_FIXED_EP_REG(MIDIMAN, MIDIMAN_MIDISPORT2X4, ANYIFACE),
        UMQ_FIXED_CN_REG(MIDIMAN, MIDIMAN_MIDISPORT2X4, ANYIFACE),
        UMQ_FIXED_MD_REG(MIDIMAN, MIDIMAN_MIDISPORT2X4, ANYIFACE),
        UMQ_TYPE(MIDIMAN_GARBLE),
        UMQ_TERMINATOR
};

/*
 * quirk list
 */
static struct umidi_quirk umidi_quirklist[] = {
        UMQ_REG(YAMAHA, YAMAHA_UX256, ANYIFACE),
        UMQ_REG(YAMAHA, ANYPRODUCT, ANYIFACE),
        UMQ_REG(ROLAND, ROLAND_UM1, 2),
        UMQ_REG(ROLAND, ROLAND_SC8850, 2),
        UMQ_REG(ROLAND, ROLAND_SD90, 2),
        UMQ_REG(ROLAND, ROLAND_UM880N, 0),
        UMQ_REG(ROLAND, ROLAND_UA100, 2),
        UMQ_REG(ROLAND, ROLAND_UM4, 2),
        UMQ_REG(ROLAND, ROLAND_U8, 2),
        UMQ_REG(ROLAND, ROLAND_UM2, 2),
        UMQ_REG(ROLAND, ROLAND_SC8820, 2),
        UMQ_REG(ROLAND, ROLAND_PC300, 2),
        UMQ_REG(ROLAND, ROLAND_SK500, 2),
        UMQ_REG(ROLAND, ROLAND_SCD70, 2),
        UMQ_REG(ROLAND, ROLAND_XV5050, 0),
        UMQ_REG(ROLAND, ROLAND_UM550, 0),
        UMQ_REG(ROLAND, ROLAND_SD20, 0),
        UMQ_REG(ROLAND, ROLAND_SD80, 0),
        UMQ_REG(ROLAND, ROLAND_UA700, 3),
        UMQ_REG(ROLAND, ROLAND_UA1000, 3),
        UMQ_REG(ROLAND, ROLAND_UA101, 2),
        UMQ_REG(ROLAND, ROLAND_UA101F, 2),
        UMQ_REG(ROLAND, ROLAND_FANTOMX, 0),
        UMQ_REG(ROLAND, ROLAND_PCR, 0),
        UMQ_REG(ROLAND, ROLAND_UM3, 0),
        UMQ_REG(ROLAND, ROLAND_UA25, 2),
        UMQ_REG(ROLAND, ROLAND_UA4FX, 2),
        UMQ_REG(ROLAND, ROLAND_SONICCELL, 2),
        UMQ_REG(ROLAND, ROLAND_UMONE, ANYIFACE),
        UMQ_REG(MIDIMAN, MIDIMAN_MIDISPORT2X4, ANYIFACE),
        { .vendor = 0 },
};


/*
 * quirk utilities
 */

const struct umidi_quirk *
umidi_search_quirk(int vendor, int product, int ifaceno)
{
        struct umidi_quirk *p;
        const struct umq_data *q;

        DPRINTF(("umidi_search_quirk: v=%d, p=%d, i=%d\n",
                 vendor, product, ifaceno));

        for (p=&umidi_quirklist[0]; p->vendor; p++) {
                DPRINTFN(10, ("\tv=%d, p=%d, i=%d",
                              p->vendor, p->product, p->iface));
                if ((p->vendor==vendor || p->vendor==ANYVENDOR) &&
                    (p->product==product || p->product==ANYPRODUCT) &&
                    (p->iface==ifaceno || p->iface==ANYIFACE)) {
                        DPRINTFN(10, (" found\n"));
                        if (!p->type_mask)
                                /* make quirk mask */
                                for (q=p->quirks; q->type; q++)
                                        p->type_mask |= 1<<(q->type-1);
                        return p;
                }
                DPRINTFN(10, ("\n"));
        }

        return NULL;
}

static const char *quirk_name[] = {
        "NULL",
        "Fixed Endpoint",
        "Yamaha Specific",
        "Midiman Packet Garbling",
        "Cable Numbers per Endpoint",
        "Cable Numbers Global",
        "Cable Numbers Fixed",
        "Unit Mapping Fixed",
};

void
umidi_print_quirk(const struct umidi_quirk *q)
{
        const struct umq_data *qd;
        if (q) {
                printf("(");
                for (qd=q->quirks; qd->type; qd++)
                        printf("%s%s", quirk_name[qd->type],
                               (qd+1)->type?", ":")\n");
        } else {
                printf("(genuine USB-MIDI)\n");
        }
}

const void *
umidi_get_quirk_data_from_type(const struct umidi_quirk *q, uint32_t type)
{
        const struct umq_data *qd;
        if (q) {
                for (qd=q->quirks; qd->type; qd++)
                        if (qd->type == type)
                                return qd->data;
        }
        return NULL;
}


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x3b00)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f0000000040))


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e57f7f000001"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
sendto$inet(r2, &(0x7f0000000280)='9', 0xfd98, 0x401, 0x0, 0x0)
r3 = accept$unix(r1, 0x0, 0x0)
recvmsg(r3, &(0x7f00000000c0)={&(0x7f0000000040), 0xc, &(0x7f0000000080)=[{&(0x7f0000000140)=""/196, 0xc4}, {&(0x7f00000002c0)=""/138, 0x8a}, {&(0x7f0000000380)=""/254, 0xfe}], 0x3, &(0x7f0000000480)=""/231, 0xe7}, 0x42)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = fcntl$dupfd(0xffffffffffffff9c, 0xb, 0xffffffffffffffff)
ioctl$FIOSEEKDATA(r3, 0xc0086661, &(0x7f0000000000))


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x1a, r0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETKEYREPEAT(r0, 0x800c5707, &(0x7f0000000080))


read(0xffffffffffffff9c, 0x0, 0x0)
sync()


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f00000006c0)={0x0, 0x0, &(0x7f00000019c0)=[{&(0x7f0000000100)="4a4cc26996bd976aed280ff6740130b542261e4c05c5d6ce0d855e577f8b52c2505a0ab8c27254b37e8c3f775645cd81679f601f2d631908f385ca090616d009d84aedd04694b3ccfb1f8b70e90e10ecfea8acd7c007b2690da5c955c1d5c2ae920ae800239975617d5cf4d3d84583f0fec2d6907d79e7fcc7f0dc23b233f1fed0d1d610bed3daf9e1a621", 0x8b}, {&(0x7f00000003c0)="7ded919494e828bd25acb8d770ad432f2dc2d1a39d3f2e692f8a25abdab982af0c64d4328601d4de00a5bac8940445a7191409a7e748cabb00374676120dd05f9338840089639a2a68111e551b2299f0a905a318154dd1c6a0b6a53fb66ec9fa05acffb063bb7c4c8395efe1e18d", 0x6e}, {&(0x7f0000000440)="8ce801f6b60f92b58a8a1e0a6a1bb78c30dea05d56f2af229991987c9771efa6d89b9f32c468afb8d42059e03cf65f352f5bb9795808345f81e93a32d658571fb9b9a2d149", 0x45}, {&(0x7f0000000700)="5d8d4874359ec6b4d85b61789b266e6c8bb16d9c45615140e6e614ca05b321bdf6d969c007d05fd3bf80a13ac4464a2e37ae38", 0x33}, {&(0x7f00000004c0)="30413419ed9be0c60e8e39183f909484051ad87e7cf2682b5be6f3eb049aaf3a7c739d3567386a3550525b203480e15824f0729913e416fda5ecfa5fd45e24d1554ba20ef0702ae1d272f27bd9ef9700c92aed8951960f6e7aa75ceae5ac19647d4a5f8eaaca3763b5fbdeccf48951f93594dd3f0aa73e66d05b4c75a19335bf3f079ac7a6d2201a6d8e05b820e632cb9ce682438c504725e32b2fc4c4f79fed", 0xa0}, {&(0x7f0000000380)="6dc9865f6231327cea1defd4bdef469bd14d", 0x12}, {&(0x7f0000000580)="5c862d7b70baf9510ae9427017d6c3beccb22d66f2cb88308c52022194f7ba05bb19a75cd960c2ebf798c9689053fbb412e568cd65", 0x35}, {&(0x7f00000005c0)="17f29a6a69c9d49f1283815824ea00f9f63cc0e31959cf8a7cd37c2456b1554e6b452a518b92dd901c1a07c5b19db16465b43999bbfdd9fe8a110706734590538ba1d40de1a1329289d6219ccbe471a08fb8304bb5fd88e432108c18bf325f7417e84a3cd9b6f88b787680947b8eb47aa272881da3245efa0d8ca2369fd2b1645659ca0aa264485b314558fd2ac0d72d", 0x90}, {&(0x7f0000000740)="b13723aa5fd1b9b2874e551ef6870d6233e90a61932ecfe08e6d0c0897622026bb96d381bf9cb956ad3160fdc5bc790cfda7a4bb3c33f9613fac5ccd64a9b96dbf7ed7d5c134516e281133d1f7dc6e98ea4952ef125258a12f57eb56c214f549ec83a1dd04c6b640fb247d55cf90a8a6dc996d34b3b6e0ff5f0ccbfab93d9f70662de9a3155b", 0x86}, {&(0x7f0000001900)="06b7487354cfe5d2fe8e39a650959d4ccf29f1bb9778379eb7ab7e5e2738083225cbd2e9094bab8a1491abaf763f9099033847d97c112cdeafb9a899d8bf55296f9e28264ab00fa69d487310aa01904a7f62d28fb54c0238d7ba7c0719b28ec0250f12ba957af1050f0c4fb9e00178a7d14e0c2dd8f48f31430d9998ad8d668d7e0c18fc1e32a49a954d20504bcc1887e673267442291c53a313eb3405214ab97d040cd2c150aa7752a6", 0xaa}, {&(0x7f0000000900)="f9a43b5a4bc57a0f1b08b6c8df07703712c656c9fe5603f887f97767bd3da51d80c710bc46f211fe76353ebc696d5ca05710c21589b7f2a99639d53ccac6c130f57b05f2c46b3406a2166f63f8cf66e532cb1d4ee089eaffd970bdf6e41e009c89458f65ff08525c973ffda8c14c05597dfc901857597d0e9a9acc9672b03a78cf94e292e7b7ca82d2b43bde1046bd94de0cf5a2b2c85fcb167b96a88459bb1d59177a136f971f620a9429006cd2efe42626e7fbd4b03dd5df9e8ecab44a992221773f5dd673786ece13ad02e90ad9cae807ecec61a198ceac0171d3759f226ece293172efaca7b54e7d59971d315360d8e4d7b946e73e5d8d66f5099b9e64aa85b6c648327faf454a6d4d9cb0a04f880830430e0cb01bd4da25ab1a103a38b5900ff116a02b160e7f38309b111c0f5fd0081566da3d791e61a91f481dfd9c2b81e67dce1d201a13cf52d0a9bd8a637a86a2ad65f08a27ae12a0be141e35035f4672777a1b4432a36d98ef94cca552510ab256c236f6e5c1f581895097d49f5efbad537b6e1a6d7357861e6d737776f1b8cab960e8b7477228af4da813c3fc9e943e19c19c6b06fe3b7da97f4b99b5853b041d315ce3ebee48dc92b7391323858173f510ed9048b60d7d5539491372d12a549c0b2fc07cc41959283fad9881185a810b24333b1daafb8ab12b7c0f78e1c4f798cf9a15f758c4f51ba10a2edb55f122643a8cd59c8b5fc53daf85a3b8163845dbd55aec1429364f7c59eb69805001b001911b398810640b68b8b3a8e8c61b1e8dfd2f9c4a48b219cd7dbc6268db7758ef898c4f80a33bb44d32345102bd753b6382dbefe26abd47a476ebb9f05c1fc1fdf82b6b9942b6c8689d1b8e6d15269c3b76d01e3dba9ce4ffacd271c3d3181b6e9db548f55cf7333df3bef475f66ebdc2dd3361c304ac2dc3e83435a76b6dbebd7ecb1cbdea889812457879a92e69b18cfa4b866adf6ff9738d9e4c353fcaca691c3aaf40822e468554033f6da182118b4d696b9ee250d914c7e9848b41660ce784c3fa424f78553839856b2c5f255023bded9306342591323584372cd1f93e0b0866b97801ecedf68874fa12027bd91e39ebcaab7c29ed90f2fa1fc5d98d78a2b249d35be33de3516564c5346958ed9c6333b3083412d5dfde58d617b6ef4d907daad75299c60cb0ebaf0fc340c04f123570cbac0d0930bde9d198fc88733b8396018b342cd70be354163b42f98d62cf09a9473f746861e279d85443fe2418b3dca1e62dbcf2cd8011c9e2d194dc2e8149da2f665606177b5bf05b1ff6dcec8ce0fd4d7493567cd7807297b02d0e1a24ad08f2cacf8b46ac307b7bd9efa0e12ca83e7e2ad7775e8d3f33f19e40f1208da01320713504b8971d28a29d684ed68dfa67d7def6696e46873f6075440c1f6ccf3b0a6d6bfec5fe4bfd8109c09e3e12c028f39142b2ce107ecf3abc68d6177067eb52a8c1de7fa52f9c987f947571e90cc93381b8fa", 0x429}], 0xb}, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000000)='overlay\x00', &(0x7f0000000380)='./file0\x00', 0x1, &(0x7f00000003c0))
unmount(&(0x7f0000000040)='./file0\x00', 0x0)


ioctl$WSDISPLAYIO_GINFO(0xffffffffffffff9c, 0x40105741, 0x0)
open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000100)={{0x12, 0x1, 0x250, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x2, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x3, 0x1, 0x1, 0x0, {0x9, 0x21, 0x0, 0x2c, 0x1, {0x22, 0x67}}}}]}}]}}, &(0x7f0000000340)={0x0, 0x0, 0x30, &(0x7f0000000180)={0x5, 0xf, 0x30, 0x3, [@ss_container_id={0x14, 0x10, 0x4, 0x0, "6a7ba3e7c0f8bf51f13efd702a1f8091"}, @ss_container_id={0x14, 0x10, 0x4, 0x0, "f7dc5813cb27012a14d419f568e7d676"}, @ptm_cap={0x3}]}, 0x1, [{0x0, 0x0}]})
r0 = __fhopen40(0x0, 0x0, 0x0)
fcntl$setown(r0, 0x6, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e57f7f000001"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
setsockopt$sock_int(r0, 0xffff, 0x1001, &(0x7f0000000100)=0x200, 0x4)
connect$unix(r0, &(0x7f00000000c0)=@file={0x1, './file0\x00'}, 0xa)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x1, &(0x7f0000000080)=0x47, 0x4)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000140))
sendto$inet(r2, 0x0, 0x0, 0x0, 0x0, 0x0)
close(r2)
accept$unix(r1, 0x0, 0x0)
getpeername$unix(r2, &(0x7f0000000040)=@file={0x0, ""/60}, &(0x7f0000000180)=0x3e)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
sendmmsg(r0, 0x0, 0x0, 0x0, 0x0)


syz_usb_connect(0x1, 0x2d, &(0x7f0000000080)=ANY=[@ANYBLOB="120100001ddf8308c007121522300000000109021b0001000000010904000001faf40d0009058203"], 0x0)


compat_30_fhopen(0x0, 0x0)
fdatasync(0xffffffffffffffff)
syz_usb_connect$printer(0x7, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x2}}]}}]}}, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000180)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000200)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x7, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x208, 0x0, 0x0, 0x3f}}}}}]}}]}}, 0x0)


r0 = fcntl$dupfd(0xffffffffffffff9c, 0xb, 0xffffffffffffffff)
fcntl$setstatus(r0, 0x4, 0x40)


shmget(0x3, 0x2000, 0x260, &(0x7f0000ffa000/0x2000)=nil)


openat$hdaudio(0xffffffffffffff9c, &(0x7f0000000140), 0x0, 0x0)
ioctl$HDAUDIO_FGRP_WIDGET_INFO(0xffffffffffffffff, 0xc0106803, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f00000006c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, 0x0, 0x8, 0x20)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
dup2(r1, r1)
pipe(&(0x7f0000000400))
getpid()
__getdents30(0xffffffffffffffff, 0x0, 0x0)


compat_50_setitimer(0x0, &(0x7f0000000140)={{}, {0x0, 0xf423f}}, 0x0)


setreuid(0x0, 0xee00)
setpriority(0x2, 0x0, 0x0)


semctl$SETALL(0x0, 0x0, 0x9, &(0x7f0000000200))


r0 = compat_30_socket(0x1f, 0x1, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getsockname(r0, &(0x7f00000014c0)=@data, 0x0)


_ksem_open(&(0x7f0000000000)="2f5d187f6175947561cf736e54742f", 0xa00, 0x0, 0x0, &(0x7f0000000080))


recvmmsg(0xffffffffffffffff, 0x0, 0x0, 0x0, &(0x7f0000000ac0))


__select50(0x0, 0x0, 0xffffffffffffffff, 0x0, 0x0)


syz_usb_connect$uac1(0x1, 0x71, &(0x7f00000005c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, &(0x7f0000000c40)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})


r0 = socket(0x11, 0x3, 0x0)
write(r0, &(0x7f0000000080)="c6", 0x1)


r0 = socket$inet(0x2, 0x4000000000000001, 0x0)
ioctl$FIONWRITE(r0, 0x40046679, &(0x7f0000000000))


pipe(&(0x7f0000000400)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x40, r1)
mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x0)


getpgid(0xffffffffffffffff)


_ksem_init(0xffffffff, &(0x7f00000003c0)=<r0=>0x0)
_ksem_post(r0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x0)
compat_30___stat13(&(0x7f0000000240)='./file0\x00', &(0x7f00000012c0))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
setsockopt(r1, 0x0, 0x1, &(0x7f00000000c0)="11472774", 0x4)
sendto$unix(r0, 0x0, 0x0, 0x0, 0x0, 0x0)
dup3(r0, r1, 0x0)


r0 = socket(0x18, 0x2, 0x0)
getsockopt$inet_opts(r0, 0x29, 0x16, 0x0, 0x0)


munmap(&(0x7f0000001000/0x2000)=nil, 0x2000)
r0 = shmget(0xffffffffffffffff, 0x2000, 0x0, &(0x7f0000000000/0x2000)=nil)
r1 = shmat(r0, &(0x7f0000001000/0x2000)=nil, 0x0)
pwritev(0xffffffffffffffff, &(0x7f0000000040)=[{&(0x7f0000000000)="ecdc802be22f16c29b9b2fd5c7"}], 0x100000000000034c, 0xfffffffffffffffc)
shmctl$IPC_RMID(r0, 0x0)
shmdt(r1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_ocreat(&(0x7f0000000000)='./file0\x00', 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_20_fstatfs(r0, &(0x7f0000000540))


r0 = socket(0x18, 0x2, 0x0)
setsockopt(r0, 0x29, 0x23, &(0x7f0000000080)='\x00', 0x1)


r0 = openat$i2c(0xffffffffffffff9c, &(0x7f0000000000), 0x0, 0x0)
compat_43_ogetdirentries(r0, 0x0, 0x0, 0x0)


open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
truncate(&(0x7f0000000100)='./file0\x00', 0x0, 0x100000000)
compat_50_____semctl13$GETALL(0x0, 0x0, 0x6, 0x0)
truncate(&(0x7f0000000380)='./file0\x00', 0x0, 0x80000001)
truncate(&(0x7f0000000140)='./file0\x00', 0x0, 0x1fc0000000)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)


r0 = socket(0x1f, 0x5, 0x0)
sendmsg(r0, &(0x7f00000019c0)={0x0, 0x0, 0x0, 0x0, &(0x7f0000001600)=[{0xf8, 0x0, 0x0, "def9eedd4cde2ab58aa407058647b6c66f5af1673e4d92f7a1397af3530552a1e6c8f834a016195b3d64c6b7fbb67ffb5fddaf5f6861264297588b4c98000f837c0abdbea55c15bc3f1f8653678b0b6794f774097253fcb4972a2b8211741e08f17c0af31a1e156172d5960c398abdde8461cc6d8c2bf5b83911b97f06bceb13c1254117db6f1783993988281ee7cb75f291657061c6cd16407ee1535f8b621b8a1ff847b3841980f188ed64db64412bd489950bb99b6caa3648ced0e7d12fb15aa1e6d4243406e6720372d68687681ce9e6a1128ef854eac5a49a1adc06596eaf"}, {0xd8, 0x0, 0x0, "1723e11e8ce1301b05204cd9cb84e5884b63caae59c952aa3501b5f66804d68b5066145090f1e3d378c1452b9634e76b2f567297b3e120a4a4f2cf7f50b981aecfa1a703ae435096cad4dedc1f2c6c820ff7f6fcb27ba7d5e6533dacb065c492c80c6400839f52b835b9855f19469583e3c3556b4a2e9a45ad8898231352e352628b936e936ce33dc6836710d6fc0a1cb133bcb97c0f7a1e9a711c45ae356bbfea026ee020f2d1887e603351bee0c21f5c30201a4f18784fafd2ec6850bd4606b6"}], 0x1d0}, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
r2 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
compat_30_getdents(r2, 0x0, 0xfffffff8)


mmap(&(0x7f0000ffd000/0x1000)=nil, 0x1000, 0x1, 0x1011, 0xffffffffffffffff, 0x0, 0x0)
mlock(&(0x7f0000ffd000/0x2000)=nil, 0x2000)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f00000006c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)
syz_usb_connect$uac1(0x0, 0x71, &(0x7f00000004c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, 0x0)


mmap(&(0x7f000000e000/0x4000)=nil, 0x1ffff000, 0x0, 0x1011, 0xffffffffffffffff, 0x0, 0x0)
madvise(&(0x7f000000f000/0x4000)=nil, 0x4000, 0x0)
munmap(&(0x7f000000e000/0x3000)=nil, 0x1ffff000)


_ksem_unlink(0xfffffffffffffffe)


r0 = socket(0x18, 0x1, 0x0)
getsockopt(r0, 0x6, 0x2, 0x0, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000200)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)='2', 0x1)
pathconf(&(0x7f0000000000)='./file0\x00', 0x11)


rasctl(0x0, 0x0, 0x3)


ioctl$WSDISPLAYIO_PUTWSCHAR(0xffffffffffffff9c, 0xc0105756, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x40207442, &(0x7f0000000040))


r0 = _lwp_self()
_lwp_unpark_all(&(0x7f0000001640)=[r0, r0], 0x2, 0x0)


open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
truncate(&(0x7f0000000140)='./file0\x00', 0x0, 0x1fc0000000)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x6000, 0x1203)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
write(r0, 0x0, 0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt(0xffffffffffffffff, 0x0, 0x0, &(0x7f00000007c0)="1a4bb09b6ac25a1993", 0x9)
recvmmsg(r0, &(0x7f0000000740)={0x0}, 0x10, 0x0, 0x0)
recvmsg(r0, &(0x7f0000000b80)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
write(r0, 0x0, 0x0)


pipe2(&(0x7f0000000000)={<r0=>0xffffffffffffffff}, 0x0)
ioctl$FIOASYNC(r0, 0x8004667d, &(0x7f0000000100))


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x100010a, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80046475, &(0x7f0000000000))


syz_usb_connect$hid(0x0, 0x36, &(0x7f00000000c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x10, 0x5ac, 0x20e, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0xb0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x4004746a, &(0x7f0000000040))


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x40, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}}}}}]}}, &(0x7f0000000600)={0x0, 0x0, 0x5, &(0x7f00000000c0)={0x5, 0xf, 0x5}, 0x4, [{0x2, &(0x7f0000000180)=@string={0x2}}, {0x4, &(0x7f0000000240)=@lang_id={0x4, 0x3, 0x411}}, {0xa6, &(0x7f0000000280)=@string={0xa6, 0x3, "e11e7ba113b8e41e3cbecc049fa206589744b8597359cf8edffebb4265d78f1fc0d10d246e7d0af5dc1de5c7f986a61964f689666520f867a48052ac1d021e33a2f966bc24f89134c7cb7fcfa0a481fd5c83636d7ee3237710265a05af9679d5210d1b09e111da87f0aebbb34c09447bf0b8f12c75fef4285bb688cfce63db87798c0ef4a93f7038c2a4ed7bdd4de5084bc0994d84d9b66a51d305f9e802fadf99bf02c7"}}, {0x0, 0x0}]})


munmap(&(0x7f0000000000/0x3000)=nil, 0x3000)
r0 = socket$inet(0x2, 0x3, 0x0)
connect$inet(r0, &(0x7f0000000040)={0x2, 0x3}, 0xc)


r0 = socket$inet6(0x11, 0x10000003, 0x0)
connect$inet6(r0, &(0x7f0000000000)={0x11, 0x1}, 0xc)


r0 = socket$inet(0x2, 0x3, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f0000000240)="892ce63374f2b34aa2f534cfe09099aff2526dc6ad1502b8cc21739e5ca051569b2dab5f32e32a93e55039be", 0x2c)
setsockopt$inet_opts(r0, 0x0, 0x1, 0x0, 0x0)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
fstatat(0xffffffffffffff9c, &(0x7f0000000f40)='./file0\x00', &(0x7f0000000f80), 0x2200)


syz_usb_connect$uac1(0x0, 0x95, &(0x7f0000000640)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x20, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x83, 0x3, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{}, [@processing_unit={0xc, 0x24, 0x7, 0x0, 0x0, 0x0, "94123d9e66"}, @output_terminal={0x9}, @extension_unit={0x7}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x20, 0x0, 0x0, 0x2, {0x7, 0x25, 0x1, 0x0, 0xb7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@format_type_i_discrete={0x8}]}, {{0x9, 0x5, 0x82, 0x9, 0x8, 0x0, 0x0, 0x5f, {0x7, 0x25, 0x1, 0x0, 0x80, 0x4}}}}}}}]}}, 0x0)


mkdir(&(0x7f0000000000)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', 0x0)
mkdir(&(0x7f00000001c0)='./file0\x00', 0x0)
mkdir(&(0x7f0000000080)='./file0/control\x00', 0x0)
rename(&(0x7f0000000240)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', &(0x7f0000000440)='./file0/control/file1\x00')
rename(0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = socket(0x1f, 0x5, 0x0)
listen(r3, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = shmget$private(0x0, 0x1000, 0x0, &(0x7f0000254000/0x1000)=nil)
compat_50___shmctl13$IPC_STAT(r1, 0x2, &(0x7f0000000080)={{}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000000c0)={<r0=>0xffffffffffffffff})
bind$unix(r0, 0xffffffffffffffff, 0xe)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$inet(0x2, 0x3, 0x0)
getsockname$inet(r1, 0x0, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0x80067409, &(0x7f00000000c0)={0xfffffffffffffffc, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @pci})


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0xc0044266, &(0x7f00000000c0)=0x1f00)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$WSMOUSEIO_SETREPEAT(r0, 0x41986472, &(0x7f0000000040))


r0 = compat_30_socket(0x1f, 0x5, 0x0)
pipe(&(0x7f0000000280)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
dup3(r1, r0, 0x200000)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = open(&(0x7f0000000200)='./file0\x00', 0x0, 0x0)
fcntl$lock(r1, 0xd, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$FIOASYNC(r0, 0x80046471, &(0x7f0000000040))


mlock(&(0x7f0000ffa000/0x4000)=nil, 0x4000)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
munlock(&(0x7f0000ffb000/0x2000)=nil, 0x2000)


syz_emit_ethernet(0x36, &(0x7f0000000240))


compat_50___shmctl13$IPC_SET(0x0, 0x1, &(0x7f0000000180)={{0x0, 0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0, 0x800}, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000140)})
posix_spawn(0xffffffffffffffff, 0x0, &(0x7f00000001c0)={0x0, 0x6, &(0x7f0000000180)=@dup}, &(0x7f0000000200)={0x0, 0xffffffffffffffff}, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000100)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
posix_spawn(0x0, 0x0, &(0x7f0000000280)={0x0, 0x0, 0x0}, 0x0, &(0x7f0000000300), 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0xc6a8)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = dup3(r0, r0, 0x0)
ioctl$FIOSETOWN(r1, 0x40184e69, &(0x7f0000000100))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
rasctl(0x0, 0x1, 0x0)
rasctl(&(0x7f0000000200), 0x98, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x11, r0, 0x0, 0x0)


pipe(&(0x7f0000001200)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
__getitimer50(0x0, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
__posix_fadvise50(r0, 0x0, 0x0, 0x0, 0x4)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x14, r0, 0x0, 0x0)


getgroups(0x40000000000001e7, &(0x7f0000000000)=[0x0, 0x0, <r0=>0x0])
setregid(r0, 0x0)
setegid(0x0)


openat$wscons(0xffffffffffffff9c, &(0x7f0000000700), 0x0, 0x0)


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='fdesc\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
pathconf(&(0x7f0000000200)='./file0\x00', 0x6)


__getcwd(&(0x7f0000000000)=""/224, 0xe0)
__getcwd(&(0x7f00000001c0)=""/152, 0x98)


r0 = socket$inet6(0x18, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
connect$inet6(r0, &(0x7f0000000040)={0x18, 0x3}, 0x1c)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x8004640a, &(0x7f0000000000))


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x3b00)
open$dir(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000180)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


r0 = msgget$private(0x0, 0x0)
msgsnd(r0, &(0x7f0000000340)=ANY=[@ANYBLOB="03"], 0x8, 0x0)
msgsnd(r0, &(0x7f00000001c0)=ANY=[@ANYBLOB='6'], 0x8, 0x0)
msgrcv(r0, &(0x7f0000000000)={0x0, ""/209}, 0xd9, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
preadv(r0, &(0x7f0000000480)=[{0x0}], 0x1, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xb202)
open(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)


r0 = compat_30_socket(0x10, 0x2, 0x0)
recvfrom(r0, 0x0, 0x0, 0x11, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000140)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
recvfrom$unix(r0, 0x0, 0x0, 0x10043, 0x0, 0x0)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r1, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r2 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
truncate(&(0x7f0000000080)='./file0\x00', 0x0, 0x8000)
writev(r2, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
open(&(0x7f0000000480)='./file0\x00', 0x0, 0x0)
mincore(&(0x7f00001fe000/0x3000)=nil, 0x3000, &(0x7f0000000140)=""/32)


compat_50_setitimer(0x0, &(0x7f0000000140)={{}, {0x80000001}}, 0x0)
compat_50_setitimer(0x1, &(0x7f0000000000), &(0x7f0000000040))


_lwp_setname(0x0, 0x0)
_lwp_getname(0x0, &(0x7f0000000340)=""/1, 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
__futimes50(r0, &(0x7f0000000040))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0xc0906368, &(0x7f0000000140)=0x3)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000000)='umap\x00', &(0x7f0000000100)='./file0\x00', 0x0, &(0x7f00000001c0))


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0xc0044266, &(0x7f00000000c0))


syz_usb_connect$printer(0x0, 0x2d, &(0x7f00000000c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000680)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047401, &(0x7f0000000140)={0x5})
open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)


symlink(&(0x7f0000000000)='./file0\x00', &(0x7f0000000100)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00')
open$dir(&(0x7f0000000200)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0x200, 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x8020426c, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = open(&(0x7f0000000140)='./file0\x00', 0xfffd, 0x0)
mmap(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0x0, 0x2011, r1, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
write(r0, 0x0, 0x0)


minherit(&(0x7f0000ffb000/0x2000)=nil, 0x2000, 0x0)
mlock(&(0x7f0000ffd000/0x1000)=nil, 0x1000)
mmap(&(0x7f0000ff9000/0x4000)=nil, 0x6000, 0xd49f275d97cc01bb, 0x1810, 0xffffffffffffffff, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
pathconf(&(0x7f0000000040)='./file0\x00', 0x8)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000000400)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0x84086307, &(0x7f0000000140)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @pad="1b944b4fd15a0981fa3a9791cde8505dcbc0316f82b129691e9a0e26c7821ae8"})


mkdir(&(0x7f0000000200)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='lfs\x00', &(0x7f0000000100)='./file0\x00', 0x0, &(0x7f0000000140)='\r', 0x1)


r0 = socket$inet(0x2, 0x1, 0x0)
getsockopt(r0, 0x0, 0x4, 0x0, 0x0)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x18, &(0x7f0000000000)="ca640000", 0x4)


r0 = socket(0x11, 0x3, 0x0)
__fstat50(r0, &(0x7f0000000280))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mlock(&(0x7f0000c9e000/0x3000)=nil, 0x3000)
munlockall()


r0 = socket$unix(0x1, 0x1, 0x0)
poll(&(0x7f0000000040)=[{r0, 0x4}, {0xffffffffffffffff, 0x10c}, {0xffffffffffffffff, 0x140}, {r0, 0x40}, {0xffffffffffffffff, 0x80}, {0xffffffffffffffff, 0x40}, {r0, 0x4}], 0x7, 0x100)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x100010a, 0x0)
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f0000000000))


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
swapctl$SWAP_OFF(0xa, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
__fstat50(r0, &(0x7f0000000100))


pipe(0x0)
pipe2(&(0x7f0000001780), 0x0)


swapctl$SWAP_ON(0x1, 0x0, 0x0)


getppid()
posix_spawn(0x0, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000140)=['\x00', '\xc1@\x00', '&\x00', '%+\x00'])
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000180))
ktrace(0x0, 0x0, 0x10, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
fcntl$setstatus(r1, 0x4, 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x3b00)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x20005100, 0x0)


syz_usb_connect$uac1(0x0, 0x10b, &(0x7f00000000c0)=ANY=[@ANYBLOB="12011003000000106b1d01014000010203010902"], &(0x7f00000002c0)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x4, &(0x7f0000000280)=@lang_id={0x4}}]})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = shmget$private(0x0, 0x3000, 0x0, &(0x7f0000ffb000/0x3000)=nil)
compat_50___shmctl13$IPC_SET(r1, 0x1, &(0x7f00000001c0)={{0x0, 0xffffffffffffffff, 0x0, 0xffffffffffffffff}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


r0 = socket$inet(0x2, 0x3, 0x0)
sendto$inet(r0, &(0x7f00000009c0)="06", 0x358, 0x0, &(0x7f0000000a80)={0x2, 0x0}, 0x10)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000000400)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0xc02c6301, &(0x7f0000000140)={0x4})


compat_50_select(0x300, 0x0, 0x0, 0x0, &(0x7f00000000c0))


r0 = socket(0x2, 0x1, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x0, 0x0}, 0x8)


mkdir(&(0x7f0000000040)='./file0\x00', 0x2)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
r1 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r2 = dup(r1)
fcntl$setstatus(r2, 0x4, 0x0)


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r0 = open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
fcntl$lock(r0, 0x9, &(0x7f00000000c0)={0x0, 0x0, 0x0, 0x100000001})
fcntl$lock(r0, 0x9, &(0x7f0000000000)={0x0, 0x0, 0x40, 0x1000200010005})


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80045710, &(0x7f0000000000))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
socketpair(0x11, 0x3, 0x2, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x31, &(0x7f0000000140)="9513f3948292ae269282d455abab56620385feb9e9b3fda3181149ee114dd200a92ef2b465bbc11fcfdb71b72ce278fa941a79b7d45722a806d166b1bc4513bb05a76025938759964a53836bfd351fe9d2104012dc56fa2aa2786a7b4b39b7a51bf1baa51d3fb561c0ce637ef3c53f88edcc758d1e1eff1031571ebb9a54c1ea8426de968ad829470aa55d5b3eb81a62a35e0b41bc906838a88d756b2d17d0d7", 0xa0)
getsockopt(r0, 0x29, 0x31, 0x0, 0x0)


syz_usb_connect$hid(0x2, 0x3f, &(0x7f0000000080)={{0x12, 0x1, 0x200, 0x0, 0x0, 0x0, 0x8, 0x5ac, 0x21c, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x3, 0x1, 0x0, 0x0, {0x9}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x80}}, [{{0x9, 0x5, 0x2, 0x3, 0x200, 0x20}}]}}}]}}]}}, &(0x7f0000000380)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_50_select(0x40, &(0x7f0000000100), &(0x7f0000000140)={0x1}, 0x0, 0x0)


execve(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
__getfh30(0x0, 0x0, 0x0)
fstatat(0xffffffffffffff9c, &(0x7f0000000400)='./file0\x00', 0x0, 0x0)
lchown(0x0, 0x0, 0x0)
bind$unix(0xffffffffffffffff, &(0x7f0000000680)=@file={0x1, './file0\x00'}, 0xa)
syz_usb_connect$hid(0x0, 0x36, &(0x7f00000006c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
pipe2(&(0x7f0000001000), 0x6)
open$dir(&(0x7f0000001040)='./file0\x00', 0x0, 0x0)
accept$inet6(0xffffffffffffff9c, 0x0, 0x0)
syz_usb_connect$hid(0x6, 0x36, &(0x7f00000010c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x3, 0x0, {0x9, 0x21, 0x0, 0x80}}}]}}]}}, &(0x7f0000001380)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
_ksem_trywait(0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = compat_30_socket(0x22, 0x3, 0x0)
compat_30___fstat13(r1, &(0x7f00000001c0))


r0 = __clone(0x0, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)
ptrace(0x17, r0, 0x0, 0x0)


open$dir(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
unlink(&(0x7f0000000200)='./file0\x00')


minherit(&(0x7f0000ffa000/0x4000)=nil, 0x4000, 0x0)
mmap(&(0x7f0000ffc000/0x1000)=nil, 0x1000, 0x0, 0x1011, 0xffffffffffffffff, 0x0, 0x0)
mmap(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0x0, 0x1011, 0xffffffffffffffff, 0x0, 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x2000427e, 0x0)
ioctl$FIOASYNC(r0, 0x40047477, 0x0)


pipe(0x0)
pipe2(0x0, 0x0)
syz_usb_connect$hid(0x0, 0x3f, &(0x7f0000001780)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0xff, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9, 0x21, 0x7}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x7}}, [{{0x9, 0x5, 0x2, 0x3, 0x0, 0x0, 0x5f}}]}}}]}}]}}, &(0x7f0000001c40)={0x0, 0x0, 0x0, 0x0, 0x3, [{0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}]})


undelete(0x0)


compat_50_quotactl(0x0, 0x0, 0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x2000745e, 0x0)


compat_43_ogethostname(&(0x7f0000000000)=""/55, 0x37)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x400)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
fcntl$getown(r0, 0x5)


compat_50_____semctl13$IPC_STAT(0x0, 0x0, 0x2, &(0x7f00000006c0))


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
compat_43_orecvmsg(r0, &(0x7f00000003c0)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
__mount50(&(0x7f0000000080)='mfs\x00', &(0x7f00000000c0)='./file0\x00', 0x0, &(0x7f0000000180), 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x3a, &(0x7f0000000140)="95", 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
open$dir(&(0x7f0000000300)='./file0\x00', 0x200, 0x0)
fchownat(0xffffffffffffff9c, &(0x7f0000000380)='./file0\x00', 0x0, 0x0, 0x300)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x40104480, 0x0)


modctl$MODCTL_LOAD(0x0, &(0x7f0000000300)={&(0x7f00000005c0)="85ff9ab7e1085ba624daa3b9117e33159599626f8c1882e392975bf0f1787304b789bbfb000000e57f8c49305d6c4ed887bf9ffb34b8a6c14fe3bfdd0400000000000000003c00d21c5efa73d516155a5b735e3bc39f269ee08212942358b19f0ca4a49386fc9f0328ee6b3249cb827442132b20654146bc78b75d0686e1461735b934af335d65dbebcd6f28f528a22d1a55ff72c3c52145ba7e1d9e5d7dc3af8ce6a5589176d067b7229156fe4264f018b5688ab3e1e6fb3111aa604803704b1f6750d545f2aaad339fe9def3448e22c5713ad03400945b5d0e9fb7f7cc6ce90f9c1c86c9b907015e8e07c12f3661c5770fc49f8ab309bf59f2bbd025624449d645657a0d397e53ccb07ed34aa896b179d23a6be183428c05434c856a1b01485e1225ff270cf706a68728b30fcab25c162b84e59716bf8cd98b74d8e9403bb960b941df7cd6f96cd9547ae5b12013120115796bdc321dea16d972742a51d6ec4cd5bdf3f03236bc0c1dd2eaacd52578ba6add8282cf93f8645d05f7d4b9d62eae2d06c9196212c3222526a7386e7a4a0eba3afb00c1f2adcdcbf2c9d91d19a2346805080ae3ce02", 0x2, &(0x7f00000001c0)="3c9744d5af95461164e7dd8d9b62dfad5b60b2dbd3aa622b6f4ce88cc95c1c0e6b8a4d349697afdd6cb7c0370e5de12af941f25dd784409ad96e137d1c66eeb2157c81c0a4140195ae17b7e7ed244e9b5b", 0xffffffffffffffa6})


compat_43_ommap(&(0x7f0000ffe000/0x2000)=nil, 0x2000, 0x0, 0x0, 0xffffffffffffff9c, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e57f7f000001"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
sendto$inet(r2, &(0x7f0000000280)='9', 0xfd98, 0x401, 0x0, 0x0)
close(r2)
accept$unix(r1, 0x0, 0x0)
recvfrom$unix(r2, 0x0, 0x0, 0x23, 0x0, 0x0)


truncate(&(0x7f0000000100)='.\x00', 0x0, 0x0)


setreuid(0xffffffffffffffff, 0xee01)


syz_usb_connect$printer(0x0, 0x36, &(0x7f0000000340)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x20, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x0, "", {{}, [{{0x9, 0x5, 0x82, 0x2, 0x10}}]}}}]}}]}}, 0x0)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


getpeername$unix(0xffffffffffffff9c, 0x0, 0x0)


connect$unix(0xffffffffffffff9c, &(0x7f0000000000)=ANY=[@ANYBLOB="ec02"], 0x1)
r0 = socket(0x2, 0x3, 0x0)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f0000000080)="8914e4ffffff4409b1ef32da00080020000025ce", 0x14)
writev(r0, &(0x7f0000000380)=[{&(0x7f00000000c0)="22a9b0a40cdb28d8a56e805502d72e931b6b241334ac277865dd03781742d0e7d7f890151604bedaa2387e082a3e10f0a2848567fb24b5dde03f0533d4503e8a4ea01f1c2143d1325121193f3f377a4506dd7572a8180ebcbbb9cf7375bb87470f3b7876d11a1a9d63999e8e1c3b8dc5fc983c5cfd31ec2ef05e8af764413c8b9ff08895b331f5f8c0ec055a7ce791e59fb6ef6edf9045442df0414c8b333c2081e8cfb75136b81b9b43634c", 0xac}, {&(0x7f0000000180)="2d8f28bf23283fb2e52e402ee8d8c77113a3edfd0f761c50bf3971e4ff084f7266ae8513cd5e1f124b5162e2fffbabfeacdd32b5e358b3d5d95c1e83b506f01f87b65a0bbe84cb40979ef784226a5d5af2c42a86a5ad44bfaa62f849dcbda6b2363f19c5393f3a37381b6a75127d55426e66129a4aaf53116b1b0836013857d848247f9935a303b66bca6b501e93bf2c4d8327b8fc46be33fb7396526a46474ffd33a4f624705effeb4565944dc6d9eedb4f7c197577ca95f017effe72932d7f1c834128e8", 0xc5}], 0x2)


modctl$MODCTL_LOAD(0x0, &(0x7f0000000040)={&(0x7f0000000400)="1cf97c57c8114e6ae316af83b656676b73e1952ecb672f4d7c86dcf5912bc9dbdf12f52353366309ee8bef9fa6569c5fff5dfd477c6a6cab365c34a9fa4211e9eaf28ed272d37f", 0x0, 0x0})


modctl$MODCTL_LOAD(0x0, 0xffffffffffffffff)


chdir(0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f0000000140)='./file1\x00', 0x0)
rmdir(&(0x7f0000000080)='./file1\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000100)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
write(r1, &(0x7f0000000080)='1', 0x1)


getrlimit(0x111768c04ba0a12f, 0x0)


mknod(&(0x7f00000001c0)='./bus\x00', 0x80002005, 0x5300)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc01c5005, &(0x7f0000000000))


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)


openat(0xffffffffffffff9c, 0x0, 0x400, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0xc0104277, &(0x7f0000000040))


r0 = socket$inet6(0x18, 0x1, 0x0)
getsockopt(r0, 0x29, 0x27, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_ommap(&(0x7f0000ffb000/0x2000)=nil, 0x2000, 0x0, 0x800, r0, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x80104303, &(0x7f0000000100))


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x1c, &(0x7f00000001c0)="65ea174f977310c6d2d6878f50da7f98", 0x10)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="b5027b1c7f000001"], 0x1)
sendto$unix(0xffffffffffffffff, &(0x7f0000000100)="d079058756132a300edff5b7ca599a7db3730137269d5d7c04b201a2231dc1afd28bef97af837472f6dd08c2099938537345bd5e8c32ae627bf4d774f86683b4d4eddc218f6c6656733d3967ae4893329d658a3a3ab45a7cddf16b5fd01b9cf7af92dd60a8972ac83c9ddfb36986686c3953153824d4c3b6438f9d9544d4f5de078cbf5dfb430b738ee4f640254cef90ad54b9e1c70ce3249d7f885d070439373915", 0xa2, 0xf, 0x0, 0x0)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
listen(r0, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x1001, &(0x7f00000000c0)=0x401, 0x4)
r1 = fcntl$dupfd(r0, 0x2, 0xffffffffffffffff)
close(r1)
r2 = socket(0x2, 0x1, 0x0)
fcntl$dupfd(r0, 0x0, 0xffffffffffffffff)
readv(0xffffffffffffffff, 0x0, 0x5e)
bind(0xffffffffffffffff, 0x0, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
writev(r2, &(0x7f0000000280)=[{&(0x7f00000001c0)="b636402fe7816a9c8bcda306273a7c25b5", 0x11}], 0x1)
read(0xffffffffffffffff, 0x0, 0x0)
r3 = accept$inet(r0, 0x0, 0x0)
mknod(0x0, 0x0, 0x0)
accept$inet(0xffffffffffffffff, 0x0, 0x0)
sendto$inet(r3, &(0x7f00000007c0)="1daad5cd36195d6810b318271ef9a4e226c2d78af736de6e7b07d1d62e84affdc376a493673d4195232fedc101a1d8adbe6f5c1dba9d565c875025f067ef069c8b78d70a06bb9b172a2d7c03eff6b58b06bbacff74db5471704990b5bc7c583f053ce0dab8e1d855c002e5c0f21090e2672493cfcf71096ded57b5350b28f1e7ddaa45ba14ff355c5280064a77fcf0cc7510946d503350ec4e8277e807e52985f11324bb0c5d72b0f9c6e2b27671de172e8a5307791b203a90fb79bfc05002e542e32b5b78e75e1f7d9deeb76cf17582198b5056bcdc5d0d8ace2bc699a0db1e138a000000b92c28d39f6e23bc224fd0e41eb6d9e3de4c0122bef8eb0acc9fbb9fa70df9265fa833f7f7788b351b9a0abf03d9e24db2448b2db5c1105d1071194a948a28b75775ade8db55259b6cf70e825b9e7675cc696e9657d9ba4ae249fbfd5ebe2c9499b26fc346d36d0fe7a2d0b32aeff27e94fe6994ffe7086d8f0c631b9688003f65d4b91d0000000000000003f5cea6e423358731875fe8797de2e8521227ec1e7a11d2d5ab7cdae30ef53c508981eb15f187342a1bd1085698e3f5ae7cf69e79e10fc7d8f9f2fe3d1d4cf4dc054a98f9013b515452b742f911f5aa5258ea5504ec5fd29ae3124f55cfdbea9c3969dc552e1d6f13d86e3043a8ed35e413ea2a8c43d6c462463b88ea0a3fa87742efce671a2f79f5b66a844fbb016ba0a0eafb7f26c47f58f25808cbfc6902b0f0133039066c1b0e4b133ab19283a8447b9412faf99b67243a3fab7392f29b6d3cae0b4fc6e528f7662267692e44540bfc23ae65598b6b621eed2daac98c6f59ee7a08fca4bd017e4b6b9be821d9af3bc2fadf96cd15857a54d7e879fa9d61bf34654841f961a030f3c81f9c750128b0ccb8fa9262c5d930c8f527d9bd5fd14ec06e29de61866bb4fb0e405b324db46fa449242b5ff600fa6bbaa50d8736d7be3e9728bcebe3f703aa3d99b63d390759a13c2251c3ef8a3e0bf42c132db4317736a807c778c6b5e72a12330acd4552f2314b02eb9ceacde4bec3b552298a1bd623c1af3f4ed230056a735d0c372b6bc10000000000020000000000000026f184ac625c20f47abf53a298ba0d4e62943a57fafd57a5569c84b5517e0a92ae7580a16e6ca625dc04bb1fe6593f8e75218d1514bfe0a49c3483da21340c35377bb720d545fdf1c604dee2f5f126aca257e273af57b1341269319ddcff0281f060d65ffac74766ce2b0d3ae6074861220f542a28f4f67c464c01e27add18f942dba7e76fcbe894b1a439eebab9a9e9269bcb698aa699784c79c43ef1b6018a0432b2de4f299034e8ba0000005420abec3b55c819f4bd4a3ebe69ff68da7d7334a3c390a00b6373cc180cd7ae1c716b1fe026396179a3bb3f5fef9fdb52c0203c0e9b9965a1aa61d6011c98c2df81535ebaa69a3525fe1f924804e7e729daac801843f856f8599313b72fcbc41d1543c4a9902329d246edf766b227e1f66fc8ce920d102f4cd265857a3dcc54866a2b67a93b1d77a1c10268d3ce409bf692a7bc48afe4290cfe89fcab206057fd34cd39fe7b36c9ddf7759af0448c59a8fb3f8c61fa395d57666d076f312f1abe091c3977861b619e7c511337b9ed63c58da22ac9467f8884820e2d4861baa5e3289275b01fdf62b3861b9338939441cefe5603ff7a2f64fae5a93667d6eb2147018a60fb40bf7d89fa16c82ae55eddfc3e66baf4eb9c091dfd2b634574ed912858e3df00c20e640a0cc7c70c5ae73f999a3d86de45b960e5b5b970010beafb71c01492c9f9d5d762bccd5f03fb22f862cf6b67eebe8c910259b46f53718fb9378e9d60000928a6101ae308a387", 0xfffffdd0, 0x403, &(0x7f0000000040)={0x2, 0x2}, 0xc)
setsockopt$sock_int(r1, 0xffff, 0x1023, &(0x7f0000000080), 0x4)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x4004741a, &(0x7f0000000040))


compat_43_osethostname(&(0x7f0000000000)="0718ba1505431350e65c0712dfe334294ec4d230b8797ebaed6281cf9236d068425935f2864e23b7142a73480e6f1bd8bdcaed8039447a75743f13b08394db07529cd1d7c333b7ccbadb1e3dcb986d62d082726274917f72c16d3578e44afeb44b8ac48f884b57115b5861c4e7f9090a334e56fcc824cd394ee52933918fa5ab", 0x80)


pipe2(&(0x7f0000000000)={<r0=>0xffffffffffffffff}, 0x0)
getsockname$unix(r0, 0x0, 0x0)


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000040)='ptyfs\x00', &(0x7f00000000c0)='./file0\x00', 0x11000400, &(0x7f0000000280))


compat_50_____semctl13$IPC_SET(0x0, 0x0, 0x1, &(0x7f0000000240))


pipe2(&(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
read(r0, &(0x7f0000000080)=""/4096, 0x1000)
dup2(r0, r1)


r0 = socket$unix(0x1, 0x1, 0x0)
fktrace(r0, 0x2, 0x0, 0x0)


getpgid(0x0)
getpgid(0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x0)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000180)={0xffffffffffffffff, <r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$FIONBIO(r0, 0x20004269, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000001840)={&(0x7f0000000000)=@abs={0x0, 0x0, 0x0}, 0x8, 0x0}, 0x0)


r0 = open(&(0x7f0000000100)='./file0\x00', 0x200, 0x0)
fcntl$lock(r0, 0x9, &(0x7f0000000040)={0x0, 0x0, 0x0, 0x8000000100002001})


r0 = socket(0x2, 0x3, 0x0)
sendmsg$unix(r0, &(0x7f00000024c0)={&(0x7f0000000080)=@abs={0x0, 0x0, 0x0}, 0x8, 0x0, 0x0, &(0x7f0000002440)=[@rights={0x10}], 0x10}, 0x7)


mmap(&(0x7f0000ffc000/0x2000)=nil, 0xfffff000, 0xd49f275d97cc01bb, 0x1810, 0xffffffffffffffff, 0x0, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x5)
setsockopt(r0, 0x0, 0x0, &(0x7f00000011c0)="6911dda862", 0x5)


open$dir(&(0x7f0000000000)='./file0\x00', 0x141440, 0x80)


r0 = socket(0x18, 0x2, 0x0)
setsockopt(r0, 0x1000000000029, 0x9, &(0x7f0000000180)="01000000", 0x4)
sendmsg$unix(r0, &(0x7f0000000000)={&(0x7f00000001c0)=ANY=[@ANYBLOB="fb182e3b7a6902e3ff84af418442583c10576b3bbcd0c9804e"], 0x1c, 0x0}, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000700)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
recvfrom$unix(r1, &(0x7f0000002b80)=""/4083, 0xff3, 0x42, 0x0, 0x0)
sendmsg$unix(r0, &(0x7f00000005c0)={0x0, 0x0, &(0x7f0000000500)=[{&(0x7f00000002c0)="e6", 0x1}], 0x1}, 0x0)
sendto$unix(r0, &(0x7f0000000740)="ce40e5a52184f289c7d5df6d40f87e2b3a8d8cb25f6fe87602d81f15e2638783901b066b43c466daaed6394961418ccf6036b1082a6f93ba00b09b0d5b2b814c7059b4d8ae354bcc69a3898cd1fe44fb4783841b794d296e89466cbd397489d87ebb37f2a7dc1a7c92e40f7d7d51f9f50dbfe9a9f1c9a0a6c3c52cceabebce5995b144a61e5caef80b36be703448850873adfb0d81499ce3925c672128e2abde861ed5b376863f5ecf03e56184585221f62ff87bb8a398297a6c3e865203021aa815a8c3bd734c9b4535e81c3b8574dd1ac9d956350ab64febe95795ec7c4441338993fa5badb3b222fb44f16e682b5d15ec2fd3c33dc53677e7394c5c1dd3344c9ff92d19e4629ffa0e12e51c59f98cbc83af11abaf7599f3ae83bf31375e18aa2e316239ed23b06f494002dd391542a07a1623a93dbe465ea9edbb8d3e5dfcfee6657edc20e1b9532996fef9081744720d1615a12b84633dc7609b4c8cc6c353c10b5db1ce3ae47485894b54c514223157051ce212c94a0690a3c9848b772168b12cd864bea749724c1466a237fc2d6746195c37d208bae610ee644f0bd2ab59c7267a198152f2b36c1d9459b18fa8b15111389058c806ea880fd203c860696b403994db8b6d9050e27f99d41d3ad2219a7f2d375d84837a739567a766fcba5102e67820d408e69738f85c2e404c0f84b04d6757ae2ae28fd3b910502fae1ef8014ea3338e6a5f6fe41c088fc5869034ae67703c93248f6cbb482dc9732fe3ce3b808a6174b9c98ce3f9bfd82a4e0dc8ba84487d4f62f878bbbee023660f4208ebf61cecc7410cabbfe6f43d57e8163d7db22953e5a9b9d67720ca732c3be68558b3379994b54b381c35a194cbb80051df746acc9a453cbd939a6caedb48a53e37c248e048ec782040d9050b105ad75819b95647f779a7f90e5a709124846c871b1ec792839e725d6f9dc0639307192fdee3a0c742f0db381f2a5709e3890e64e943b814aad55bebf9068c23fcbe6e2855e92b5e7c3130567db02a84a377cf8365419e8ae65590ba38e3c9b02c6e6f684d1068c1bdb79b42ff59213fda1ed22e5a63c4ed960cf9e575343d69a250a44df17c0647ccb8e4ccae6a6f0c91df2ea6dd7125e4892e6ac555d884b23e514aada792b69518c4a4b3e479e63c1d6b82965cb4e0f6310185f3bdac2c494fc3252f38beba5ab1feb9f3e40b4211108b35b536aacb0db684a4c8e95d3d246db8f769346a5fe70f2da9fabcdc66d9f3e4622f8341ad58f04ab486fd9678a5c41de91f545a6c869e8e4b220efb6744690c59bdbe98957728b1ccd0900c5378a969f67a9d31c0f216e6dc3abe0bea4c29575ada3a3b946f02276560c26b7e1c1d05bb656a0525cbddf4dceb8ec38a179a1e261f542e4de906c5a47d6ae3536181d0de169cc2787093a13ba0a89d32d582d8c7378fc72aea13b06b076094b3d2ce83d0233f92f41db69df782f5473806ca0bbce586865891b45c08c74b483f8d466300c9260ba982680ca9dd5abc80eaaef9a34d9320b38a9dfe5bdcac40a826892cf033e4c30c8e20139158accdfc5cf76dbc267ddbf249c4116e9d10ed9b0897a1c0e31da7e75f3145f060111627b053508633eabfccd4a0d9ba8df47bb5912700ac1cb4cbbffcbbe8c36f44debadfb50726d416714733c8aa3a0b080239be2c7f4ad76fe7eb7b21e28843f15b7f7fb38fef4f2dd5ca54a8611ff96f60a0b5042aa9394ea5082eb53fe4f8fb2f1a5a65c343f228b3e56eb855151cc594ce9b72727640d9ab754562adbfb2d8da7578e6ee57fa356454fb0f05ffab3d6901c6dc9b07a61a2f8f6ddaeee88a32b79d965cdce8b967f8a69ef21188f088816b35c4383412cfb2e4d4c3e5b2867775cde20516e7660ac4958fd8f8c7e1c1236cac581e61f05efce2714238415a36cd7f82445b5a4b97a9b1a39b19f9068dea379fac9cbaada38422be01a9dd1c0880fa2ce8c6489e55438b708d9c0a463895c865019efe2ec6a055e498458d9e06dab7d2fcaa9709d194c2f1fd40db801c712809a2da375e93d1cbf932819e1f174b144a4a623580823bf4a2ffc6ab71a12e7fec37abf923b98cbcda82515bdbd6d6399399665a2c7bfcfef87969c6e28f74d4df774ee2984d943983fd037723182dd837a8cb2f34d86f239ef72f7e4fbf2b521ee1dc68f52bf5576be2eb40bdb727b52a1e377642e1af9f91953f4fabf6b0d4bb9c174252be8cc38493cffd17b313990653eb5888401d9bea9a68631d6acbd74eb2e42c8428823528311498707ff4f8ef842056f0b788e6f621ad40a1eee99f4d06147f9ade8bc66740929cef4dc5f001ab69824133fb16d71b09702965470e4090e88f6ea9fec6f37d36796503b97f775fa4ec924f792bf2e3f32239dfae0040ffeddd7cccde23e2058e99884e19cd20765f01b539851a900aa01faf0cb254aceb0a4553276a9a26e7f026bc87c8dfd33dbedfe3e386615db43dde50578b67619446830ff73bf8b9e03dd90ac149538ae69e23f90994c8022e370e802651eb05b7f2a39f1bc4ea44817f7370aa3eaa06aed02c95caf64affcd01e4af1e57ddb955c771c5e7e9dd54d17a384bda100a397edf0a6a05a4fb7bdaa8ecc3ab81af0b78ca2b3134b663fe98591b3bf7df43bd3c09de61e87d7e442e30c0d6d980bad48a8cc6702046f6c0e5093872598c80fc2aeecee792852ab978c8822470b9fbb53adf7c51f317cfd9bdcd90e89f32291356784f9a323e8e9eec45183a887065d896c0944a8e31ea6d2b254e58065f31459c593874257d81befcff95267285b97abc310ca4395c431b3ae8bc272f06df3c7233a89b96b2da28c2f2365f51502ee0723beda584386bf9a241dce52a0e5e630b2b4f54a3ba2df6fabcbef67be953f6651efcb6cad58dfa84cc5626cc3a9b3567ec7325db1066ea0a2cf0149f292be2b2a8f4e62287d1cc5029ddba8557eb7620aff3ecc1e5e157be975cc8c0afef8288617857ff3f27f87f51cc04f084b957d7a8fe04a157555a2e17a8da664afe259aa4fe0a563fe8612f6eee77fd8fabd90c00cea71fc0d88b0034b52e232d471c136c418c735609bd8876e0363809bf4fa09267bd6f8f02694221eafab47780dbf782fc771d6767c0b285b5982e805feaf8ecb14cd374270b1a6bc73d42a6ad38591cf416ffebe5c25098eee2ec638764e33f72409f50345e25aa12226f3d476f2502a2529a5ca6585f3b583a18d1c78d855ecd3c1eaf2b4eecc47abb932a4f5d51e8a81c33c79c338593440d10490aca5666d3363be566989aa37c63d820e68740ad409b8f76364eda31e878dd7efd68a0918bd86c84e4850a2f793446ecb97fbda3beeb28ba9fc19598a21e121439d620ab5a82ebc741552f8ed2f356db89704a7a481ac98b9cd6daa18752191151adf6c73ac901a13fb662141d0ccfda4901a17dc6b591fd14a90538808f3e2e67c5fc1dd747306fd2e36674398bb8e7153719ccc49c73d28e9ac35d5ce2daf835bde5d5e4e5f59bac95f0725b8f19e955e977bf5dd292c9340c1553de20c9c2a2d7272703869b2aa55d8bd3bd5f9be718c143609066586f2af12cb593b15498509ee7749cd49129fa929d7776e0aa7ea95e4e19a85b6d091c56743b1d811ac56fecb7895dcd9ab48598321abab792c9c3c7fb620b0d927f00a08157cbab6ab75d59bbe948d64e6a9ef94111903ac80b5f6fa48e360f10d77d87c7e614a343a450c4aa0cf862a556b0997d538e1e08650a9ff41d61255879c2c62286b49f2df211f05dbd9f232b3076a703cab5267c44f30355ebad5d50d26d8fdf3fcfa364ba5169b1956a879b7255d3416201403e2eb0ef277474d686586751353379290f723eece018d165f3c82dbcb16b37c32cd91ecf61e4e03b24defcdb03deea591ec5c361ea45677c926b553541ca81014e10dfb64468b477dda5e72dd46b0af5685b36b954ab1d00f55ad96c1217b04728d4744821e4ecb93cf8e9ad185210a5610969a24aff657bd0951537c23b785f28c8fdc908aba05db97f76d53121fda71a1a2ea7066da2aa5dc7de264a93793ae9b227825b5d3be233f4c2fe35994ba90b9098e4cd71ca55bf3b266a4c42a2a8ce9201a1ad53292b294289362dc2609f67122f6006c8539d0abe125d699eea8cc0147a519d9d592f9a627f51d8eb4392a579ad9495e1c419e1355878bf587415901d390e0d6f8092413a34ea6be38013557804d19b82f6609dc8ae96df0826cf0498e2896628329bf1909f075d39be51d57d8614969cade2dc63945ff021ebb93467c664dfb0fbf87aa7155a6dc15ac02fb5d6dbd41bafe5e6af884ea10df6c2dc7d935085184891d283f49cc96ff98b3f5d925a1d05c9e155e88ed92073e2298eb7f3e061a7d1f84504a12b8ba05b4122cb9f24f10c323c875edd02a9ee80998f33dc720797d3fb1987b90bdcde44ced33a500ef7f832cc0b2e876076ab7af627b5eee7fe75cd385edb929aa569c3bd6d3d7fb82b0b20c4e0afdbbb50e879f3682ad92ac311d6e61c0b5e93b4354a55441d6d353ae674a799385bc06269260b9e04e95123cbf1d5f15ebc1f5ed3fcb7bae1e1e41b06d226e8f2d4b23dfa0188785f17f9effca74eda608ed11d94a753c215894047064c5ecb6737a2d213603b95c981dc0044fafeb44af3739fbfc54f7385d077f94938966bd07a56bd6f0299efe0a5fe5049f1363e4d00be811b3606341b6a7368577e3b59ff3fa608013326645d2df33efe40eebf52c492b80acd00ee5119de25c82463649a3ae2f50da2aa520281e48ca6840fe16f8cb16db3e3e8f4c5ec301d2597c955f3f4cb228c256da2478271f3e3b26fc5f4db9fb38650dbff905738e664f19dcd731354d51dc9ec856a078864cc68da4457840cdd4e8f940d97d1df9866bb34adfb31a9262e7034076ba0c99375c9a89c5c36905aae12329b37e8b6c2ed20b01017c80861007cca4abb828f374531fc69722801749e99689ba8c01dc1429ab20decbdf86f072068683c6727dea035e6c04ab80f8e15a605b5425e16bdc45edc330dc2b3cc13d5566dac93dde77adb7ad09fbae53a3e825711f4b2408046aa8ffeece4e0c6a8ea3b9f17a4d09c9bf26fccf9069de0c65208a4ea72c490ece01d5253b1b4b3a3c30167b4b6f88e20c19f0b79f5a8a450ced15f28dd56ff3c8b6bbe6abd2ebe6476c0668716724965b6694c4a8fbe3b1c1765ca34a1a077bcc5085753cefc1395b0df93f271b8e0b85c36a26e9e9ef4315eecb7248ff8949608f03921592efa5de71ec7ea09d9766c61d5bb66060d886afc436f587a97c2c542d4d624bcad3cd4a539e78a1ee62c90267f8ca3b26e7125bde908c974ae9a580912ab442e79eaee91029374d3842e9828b7bd6b7f722eba5d6585d572a10d5348cf1f1c0b1b78b421440a9e9381e2af994f3bf02d2087c8ca8f374f91afe3b9a0c46ca452f8df3bc7fbb729eda13c289584ef8dbd01746efcad502958ebd6e8f9bdd7039bd8b04102e8d226c672088bea956745c7517479f33786372caf41c7b72c6f85d0b217a7e9bdb204262d9ff172c9b8bf293f49f56baadbd0fa7ec9c7a110698cf197bf6e427bc2f828e09fb7aa855a1ce58606e0959e31acd19781051cc1d0dc5194ab97dbb1ee39091c9e19a6ea5fc5d3c7e1de46dbe4b31a26ff0d35b6c8b032299aadd79f8bf2c96d7b743407c9b2c46f338cb0a4ca3ec3044b1d2df3f29ff7888b3b1143540f51d87a03324d69", 0xff2, 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000100)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendto$unix(r0, &(0x7f0000000140)="fa494e9a2c89d5ea3169b5f306b42ed70a08bdb66a51372efe193a590b61d8910f0630355f59610e34255e20ad6cfc70cb6bb577f37c3de6fc03c7867d2be3a9e0b493187f9585ac5b5342ae9d84b4c26673e47f7742edf91c86fb230f22d9f3bf18630813053596ad25733fe9f53f152dea3da507210a9dfd8fd5a7de674fdb261ab221a29f51dcc5ebde3d2f75c668033cebc1fbf873f795759a1d0293f168af99a08b66adb95e99ce87c68c0bb8aa2928cb1e89f96dbaeff3d658eb268b4be3b6bd31bc41c85d21f82b68c9bb", 0xce, 0x80, &(0x7f0000000240)=@abs, 0xa5)


r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x40000400000002c2, 0x0)
ktrace(&(0x7f0000000180)='./file0\x00', 0x4, 0x1016, 0x0)
r1 = open(&(0x7f0000000100)='./file0\x00', 0x200, 0x4)
r2 = getpid()
ktrace(&(0x7f0000000000)='./file0\x00', 0x0, 0xfcfc96ac5f78639c, r2)
fcntl$lock(r1, 0x8, &(0x7f00000000c0)={0x3, 0x2, 0x3f, 0x7, r2})
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x1, 0x2011, r1, 0x2, 0x0)
ftruncate(r0, 0xfffffffffffffffa, 0x0)
msgget$private(0x0, 0x90)
r3 = msgget$private(0x0, 0xfffffffffffffffd)
r4 = msgget$private(0x0, 0x220)
msgsnd(r4, &(0x7f00000001c0)=ANY=[@ANYBLOB="0300000000000000aa60b98454dbc2e2a701dafbfbcb3c8a23438ea6686af1c3b6427b8db0a01fd7c0ce8d0d6ff143fbf8f46e034f5a01cb6de5db0051e88a373adccc13185b063932988aaa8a998e476d1a43689ef697d02f94ac22f4bdd7528643281b888bbca88b7ce006b0b341f419beaa32406a0cbc249596456c4885ef328bfe61a06e09a4aa0302dfff1ed0d6f54850826d695d13238d637b2c4702395917627383271040895975bc2a88220e505351104efae1ca10fcbf9ed12dc0426400"/208], 0xd0, 0x800)
msgctl$IPC_SET(r3, 0x1, 0xfffffffffffffffe)
msgctl$IPC_SET(r3, 0x1, &(0x7f0000000000)={{0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x140, 0x3}, 0x4, 0xe83a, 0xffffffffffffffff, 0x0, 0x2, 0x5, 0x100000000006, 0x3})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
compat_50_select(0x40, &(0x7f0000000740), 0x0, 0x0, &(0x7f0000000800))


compat_43_ocreat(&(0x7f0000000280)='./file1\x00', 0x0)
setuid(0xee01)
unmount(&(0x7f0000000080)='./file1\x00', 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mknod(&(0x7f0000000040)='./bus\x00', 0x2003, 0x4301)
r1 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r1, 0xc0107705, &(0x7f0000000000))


compat_20_statfs(0x0, 0x0)
fork()
__lstat50(0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000006c0)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
recvfrom$unix(r0, &(0x7f00000000c0), 0x832f1f7d, 0x2, &(0x7f0000000000)=@abs, 0x20800000)
write(r1, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r1 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r1, 0x8010570e, &(0x7f0000000080))


__clock_settime50(0x0, &(0x7f0000000000)={0x0, 0xfffffffffffffffa})


r0 = openat(0xffffffffffffff9c, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
posix_spawn(0x0, &(0x7f00000001c0)='#,Fy}\\}%-+\\\x00', 0x0, 0x0, 0x0, 0x0)
posix_spawn(0x0, &(0x7f00000000c0)='-{!\x00', 0x0, 0x0, 0x0, 0x0)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x70e, 0x0)
pwritev(r0, &(0x7f00000003c0)=[{&(0x7f0000000740)="90c3fe67eb586898600425f2f573e0d1ac83c18d65c8e22066c0d389fe894a974c8d45aaf9846f9b3aec3213d2a6ac68a0b0632688ca0fab3647175abf22fea120c9b3bb77ca60c128295bf234505356095dbf9e50a4a5079723b57fed8ef0a251b91e67e1f5d347d5b668a390a25beea3962e7c10b8d9f53f5c82b5eacc26757d14f2fa6be9a2cbb2cfacc5e906dfd1e3208364bb049bd84682cec454327b6a1522c332ea628b0cb672e9e7247818f900e017c7cb9303e6b505059f34d3fb9df3993b7535fa269859e24b2802782224d7d5c13c21d4eee4f8621037c3d78695ad9a278978b26c46049befba997acb9ac407791cdf6046f9f71e36d09827a4493c17a0921dc38af76420c885862413c6ed4f7fe335a5547ee2d7c65d735b189214606da83f9be40faef7438cbfe1ed0439c45506672cda99d1c3471259d08198e13683ef6b08d5c54bfb991dcca6919362e1a0b65844e9194c2d7fd257281fbcae0694eb4c1e7121b6a2c19d7c82054126e2146349c1c8489aada96f3a8400c78d1da37d5228e5aa36b139a8d5957e8209712744b81352d093315d238f5a0c3cb694e5bd546af01421ace28b2e266c33488bccf4815baf3226156e050704a0b7fe058bf69a49e52ac968a0", 0x1cb}], 0x1, 0x0)
writev(r0, &(0x7f0000000000)=[{&(0x7f00000004c0)="0ec465cdab1ab6925cb81235dbb17399c070dde203e502106f690d9947364fe3569560e73bfa9012263c0ef6eb626ad79d51e7b4607879072ca33809a85443bef8e011b3e2e63de6f9637ca6e422106a6a1762b67f560814eef6dcb3f39a2e51600251fbad1ee82088a75ca3764729af3f3d4d967ce8aadb3c3fd7f9ae4f4c83b2", 0x35a}], 0x1)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x5, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f00000000c0)='./file0/file0\x00', 0x0)
mkdir(&(0x7f0000000180)='./file0/file0/../file0\x00', 0x0)
mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
rmdir(&(0x7f0000000080)='./file1\x00')
symlink(&(0x7f0000000200)='./file0\x00', &(0x7f0000000240)='./file0/file0/../file0\x00')


open$dir(&(0x7f0000000000)='./file0\x00', 0x400004000011830a, 0x0)
chflags(&(0x7f0000000680)='./file0\x00', 0x40008)
open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
r1 = getpid()
ktrace(&(0x7f0000000040)='./file0\x00', 0x0, 0x153a, r1)
pwritev(r0, &(0x7f0000000080)=[{0x0, 0x100000}], 0x1, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSMOUSEIO_SETREPEAT(r0, 0x8010572a, &(0x7f0000000000))


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, &(0x7f0000000280)={0x0, 0x0, 0x0, 0x0, 0x2, [{0x4, &(0x7f00000000c0)=@lang_id={0x4}}, {0x4, &(0x7f0000000100)=@lang_id={0x4}}]})


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x400000000018, 0x3, 0x0)
ioctl$FIOGETBMAP(r1, 0xc008667a, &(0x7f0000000480))


_lwp_setname(0xffffffffffffffff, &(0x7f0000000100)='}\\:\x00')


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000000400)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETSCROLL(r0, 0xc0106306, &(0x7f0000000000))


swapctl$SWAP_OFF(0x4, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket$inet(0x2, 0x2, 0x0)
ioctl$FIONSPACE(r1, 0x40046678, &(0x7f0000000100))


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="06028d80ff"], 0x1)
r0 = socket(0x2, 0x2, 0x0)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f0000000780)="0714edf38a03344ef95de162a9537b68fc57560000000025", 0x18)
write(r0, &(0x7f00000001c0)="80813a79bac1aacb3868e436d316fabcfef95e61894f35d9432a3f9d49850600020816d27f76cb369af318965c9d443d1962f1e733372b3f5dffabc0ab5486e9a06359035693c3cf0f8997c439a101deffd0a5d61bfd02c0308552117bf524b471d4bb2761a2e72c0acc0c11fd24847192ed25dac39435541630823ddc708bee498f6f0b9863a280cfa331789b02fc4d3a22d20ddc7c26545e6dab3242b2ac407e46172041f69255b63214975c7899672790ccc2e7fd26c9f48c6c292cca8bb0ab92740228802fbb9dbd7edb691686edb2eba067efb88df3d388122150b031f920e0242066498a39963708cf8f1cd46fc35045189f253b807826abaee4852a3b61dfb89e8ff00edc2bc98e6fece3c4f9d1848a0dc906c3b54c3f3a5c980f4ab65a3c945bce18dfc9d6a8964468cf8d5e96380f356a1c4681cc8648080000006387da5849ba2237b372ff0c6fd6b45359cefc3c6a58049bd5478b85938476085aecf0e11061d2229d49354c71faa51aec5aefeb87931bb18ee9591cd2ed55ae1f48d926c9ee530f7b368eb529e1d3942c35c4bf5bfccd8d090675b4ff020dad592a871874f41223321a8fefbea6560d19a4db8463e14edd4b3be4611e20c2bfad797e1eae6fdbc9262eaaf679505c62e488b37f075f14660a70eb928d030507fd4bd222793f9aab2db23b87b735b475da75cedeb41d30645181d506b38a04d33054e9ba195a745f63ddfeae3fa7c5cd95003fdc20d30671d1fdb4d6bd2d857d99cc40c2268ad0355e476ae3db67364755060b5ba9fe2860605c8ba3f4ae35208a865e54110979073606bcd4424afa3da55fc62c10f02987999ed3db0975f62c02cfd33e2bc259a97f3306534b516b6097ab66c788d296fe01a61cf471c16bfd5e514c8201f079703192c8823ac1590019936d7ae14e607d67bc348f8fc715efc09c9d932536978c5aca05465ad61e0ab0195b0d6a86700748e78c63abe607f8f545aea89074df7261bdcdd187f8e357f57990aea1eb4c4b5415773a6585e8bf04655856b17ffb166357aa0d6951e57b2e51a93fcdab824d576d31819fd877aa07302c00f34b79851d460065df757201f9f949752643863eea1488912342d046fd519d3c5bfc3d1cf8beb0a077251c3e0a8ca0c9d448da806658674b08fbce80d28f9d4264bfeefe3172689c8ce0d9eb4bea52680180f269818a90ba80dd0c4c93705866163ffdefb71c1379885b68f4368d43e8a720834060810e5ecd3b658c5f3c5404a62c8744e01ed2cb97a0c792941aa2da837c7c38cd066c87512515d1c18b30ccfa1f1a8a96e32919b8bb0e94b4b7f5276b37f02d4e639059dc81152234417b4e327923a9832e08c4636234218006a7f750e7376670381e64cacd1fdf814a3bf9c182b64d0ef4cbb39cd8e71cd80caa1076ff3fa5b66a1e34e0cfb468e65df028f5e5914ee2b48fe24229c5c001a71f85a235207b7818f7e5020cad6c950a02e17fa4b5cfb0d0a59c7062478ce356f3aa762c1a89ddae19096ef30733a2e01f81bd70a8f5a16e515f70fd6df90ae4d366ede07eaf45c418df731e51db3021a74452ae6feb8a5eab63c1f1ae09006a2e85ec94d8ca3079429d49bdbaaf5f1a69ab1eee768a4cbc2a0c662e9cc13b4b300339c6b60da3342fdc449ed7ce4f685cecd4c021dbfefa165d9b3545b0d8beb3a59c94a6c32721ea5ed40fec72623b419b08024793cccf9c9315eb63f0db82bb3f29b33db92fd874f60630f83dd393c7e9024069505101cbd3eb196dc44834dc1410da030fb04252eba11901002833b2221e66802f81af5901c3d5a5a4e7179bf44a08d72feddbd6fd8a4504c1b48a828ed32f1869f92a220b27b9e3314e61c2e4cf7087ca3fa91a2806f281249786c59e37f1bf4490f15b6b331d7bc58bbc4a7565247ff7f96778d921a1619dd9f61ab3ff6fc349d9eeda77f9bbeb5b66b1086663c145ea1499abc089c7baa8a1dde96cbaa1e153a22a9e646afa4e8b77c9241217aa2365e6e67a9ec106278e20a49b2c6ed511e73965", 0x5a9)


r0 = socket$unix(0x1, 0x5, 0x0)
fcntl$lock(r0, 0x3af5c1a7f048d0b7, &(0x7f0000000540))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000080)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
recvmmsg(r1, &(0x7f0000000640)={0x0}, 0x10, 0x0, 0x0)


r0 = socket(0x18, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x1002, &(0x7f0000000180), 0x4)


sync()
truncate(&(0x7f0000000000)='./file0\x00', 0x0, 0x7fffffff)
compat_43_stat43(&(0x7f0000000040)='./file0\x00', &(0x7f0000000080))
compat_50_lutimes(&(0x7f0000000100)='./file1\x00', &(0x7f0000000140)={0x58, 0x3f})
__lutimes50(&(0x7f0000000180)='./file1\x00', &(0x7f00000001c0)={0x1, 0x6})
mkdir(&(0x7f0000000200)='./file0\x00', 0x200)
sync()
renameat(0xffffffffffffff9c, &(0x7f0000000240)='./file1\x00', 0xffffffffffffffff, 0x0)
compat_30_socket(0x10, 0x5, 0x0)
unlink(&(0x7f0000000a00)='./file1\x00')
compat_50_clock_getres(0x20000000, &(0x7f0000000a80))


r0 = socket(0xa, 0x3, 0x7)
connect$unix(r0, &(0x7f0000000080)=@abs={0xd4e778bf7e324e89}, 0x6e)


symlinkat(&(0x7f0000000280)='./file0\x00', 0xffffffffffffff9c, &(0x7f00000003c0)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00')
linkat(0xffffffffffffff9c, &(0x7f0000000000)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0x0)
open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)


setpriority(0x2, 0x0, 0x19)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000200)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
fpathconf(r0, 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x8004667d, &(0x7f0000000040))


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f0000000140)='./file0\x00', 0x0)
r1 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
r2 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
utimensat(r2, &(0x7f0000000280)='./file0\x00', &(0x7f00000002c0), 0x0)
utimensat(r1, &(0x7f00000003c0)='./file0\x00', &(0x7f0000000400)={{0x0, 0xffffffff}, {0x7}}, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047401, &(0x7f0000000140)={0x5})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x8008574f, &(0x7f0000000040))


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e57f7f000001"], 0x1)
mprotect(&(0x7f0000320000/0x1000)=nil, 0x1000, 0x0)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
madvise(&(0x7f0000454000/0x1000)=nil, 0x1000, 0x0)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
sendto$inet(r2, &(0x7f0000000280)='9*', 0x2, 0x401, 0x0, 0x0)
connect$unix(0xffffffffffffffff, 0x0, 0x0)
sendto$inet(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0, 0x0)
linkat(0xffffffffffffffff, 0x0, 0xffffffffffffffff, 0x0, 0x0)
pwritev(0xffffffffffffffff, 0x0, 0x0, 0x0)
socket(0x0, 0x0, 0x0)
connect$unix(0xffffffffffffffff, 0x0, 0x0)
close(r2)
accept$unix(r1, 0x0, 0x0)
recvfrom$unix(r2, &(0x7f00000012c0)=""/4060, 0xfdc, 0x40, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000008c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
pwritev(r0, &(0x7f0000000340)=[{0x0}], 0x1, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)
compat_50_wait4(0xffffffffffffffff, 0x0, 0x0, 0x0)
ptrace(0x27, r0, 0x0, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x0, 0x2e00)
r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x19, r0, &(0x7f0000000000), 0x9)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580), 0x1, 0x0)
compat_43_ogetdirentries(r0, 0x0, 0x0, 0x0)


r0 = socket(0x11, 0x3, 0x0)
sendmsg$unix(r0, &(0x7f00000019c0)={&(0x7f0000000340)=@file={0x0, './file0\x00'}, 0xa, 0x0, 0x0, &(0x7f00000017c0)=[@rights={0x10}], 0x10}, 0x405)


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000001c0)='./bus\x00', 0x10004, 0x0)
ioctl$FIOSETOWN(r0, 0x80017472, &(0x7f0000000200)=0x17)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
chdir(0x0)


r0 = msgget(0x2, 0x254)
r1 = msgget$private(0x0, 0xfffffffffffffffd)
msgsnd(r1, &(0x7f0000000300)=ANY=[@ANYRESHEX=r1, @ANYRES16=r1, @ANYBLOB="17d1a117d0f56f9acd5643afdee9fb15cb23ddbc4a2cb109b17cb2e3545c10a7832255d736013147889261ac67ed425447cdd00dc8540738c9e2607fbd5ad2fc556d49223219f117ed175d234e1edf80c6f28f68bb37ae4f00c39bd52a2d1d5428128fe3535b319b73a9cb4bbe0b816b3ce1111f2107c46560755c0f2ffa429355fd356969fa4b35807525a7acefc9fb1068a04d77465f2947fd08671a42bc3b6e72ac6b8d5f81c9ca02"], 0x0, 0x0)
msgrcv(r1, &(0x7f00000001c0)={0x0, ""/147}, 0x9b, 0x0, 0x1000)
msgrcv(r1, &(0x7f00000000c0)={0x0, ""/86}, 0x5e, 0x1, 0x0)
r2 = msgget$private(0x0, 0x29)
msgsnd(r2, &(0x7f0000000000)={0x0, "21938c8b9c96b29ab49040d973c9c13276050a9d59bab85bd0517be04366f6a4b248c99317ba1291ceff52dbceba487325bd44a3bb6c2796d1d4cc767e58ff8328099d4e13927bdf77e14006095bf058da97c1918709680e6844160af60e5014960584876d719b96c0f38c8b5491b74b2d956fdf04d9f94226abbccbe5e8de9f8f1dfb68533cf377cc9fbc6834c5ecc5ab"}, 0x99, 0x800)
msgctl$IPC_RMID(r0, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
getsockopt$inet_opts(r0, 0x6, 0x3, 0x0, &(0x7f00000000c0))


r0 = _lwp_self()
_lwp_suspend(r0)
_lwp_wakeup(r0)
_lwp_kill(r0, 0xf)
_lwp_kill(r0, 0x3f)


getppid()
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000180))


syz_usb_connect(0x0, 0x2d, &(0x7f0000000100)={{0x12, 0x1, 0x0, 0xe2, 0x70, 0xc8, 0x40, 0x257a, 0x2601, 0xfcb9, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0xf3, 0x65, 0x32, 0x0, [], [{}]}}]}}]}}, &(0x7f0000000f40)={0x0, 0x0, 0x0, 0x0, 0x2, [{0x3, &(0x7f0000000d80)=@string={0x3, 0x3, "f1"}}, {0x62, &(0x7f0000000e40)=@string={0x62, 0x3, "58d1238b9adf8a7af362c2e9f826331b4355e867e8f1b179439624def7f0cd600bb753630ad4e1af67e6d667f6bbc33556fe8ffc75deba1cbabf7c9226097286c59858b273b7349965be261036733b83da09fb057c3839ef9bd4ef6b7fd9720c"}}]})


socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000000c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000001640)={&(0x7f0000000100)=@file={0x0, './file0\x00'}, 0xa, &(0x7f0000001580)=[{&(0x7f0000000140)="063fb6afb0e37e9d766d67ebeef9923fbd350774648aeafd27e76b7080df82ec60748f6ab96ae04021ad8c19114264c8567d8da8151e027d06347067b03f7433768d84c9ab68ccb1d5718b39b5091a5e8fe6058d61a3477c2e5c178a6048c5b2351bd108eb52699fddfcd664478528039c62899fe7c6c4d3e74529d3f4827e4f8470c207d7ea0573124bc586323e141317e624d59de7f3d9f0c070960c3ef5fe600011e5c57ec8bc26bba56ed639fddd11ac69c9dead0cc255d1210ddb71cee29c910e508ebc0a1e35b78e504e1061061ee6c3d859412fe61c43098bd1f90ca7837eec8f0486235ff830509227", 0xed}, {&(0x7f0000000240)="06b8", 0x2}, {&(0x7f0000000280)="c624b5411c3f4db61c8d9825ae68b9823549feae7783001f9c3986707dce63e7546fd50ba23939070ad84a4ba4b0fd395169fa1bf9239b4e7474b81b80c9de39cf3f4f30a765a9842d72da1be51971c01d39018fc382bc54df7bc10533a392bd71f74c09a3849344a8653d38f8226a72143a3c51efea87ce844334cbf122fe", 0x7f}, {&(0x7f0000000300)="7c8209", 0x3}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}], 0x9}, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r3 = socket(0x2, 0x3, 0x0)
listen(r3, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x20006470, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202d77f7f000001"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = accept$inet(r1, &(0x7f0000000040), &(0x7f0000000080)=0xc)
setsockopt$sock_timeval(r2, 0xffff, 0x0, &(0x7f00000000c0)={0x3, 0x4}, 0x10)
r3 = socket(0x2, 0x1, 0x0)
connect$unix(r3, &(0x7f0000000000)=ANY=[], 0x10)


r0 = _lwp_self()
r1 = _lwp_self()
_lwp_suspend(r1)
_lwp_kill(r0, 0xb)


madvise(&(0x7f0000ffc000/0x3000)=nil, 0x3000, 0x0)
madvise(&(0x7f0000ffb000/0x3000)=nil, 0x3000, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$HDAUDIO_AFG_WIDGET_INFO(r0, 0xc0104800, &(0x7f0000000200)={0x0})


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x10, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f00000009c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = open(&(0x7f0000000040)='./file0\x00', 0x615, 0x0)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x0, 0x10, r1, 0x0, 0x0)
dup2(r0, r1)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x1910, 0xffffffffffffffff, 0x0, 0x0)


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000001c0)='./bus\x00', 0x10004, 0x0)
ioctl$FIOSETOWN(r0, 0x80067409, &(0x7f0000000200))


openat$tprof(0xffffffffffffff9c, &(0x7f0000000000), 0x100, 0x0)
modctl$MODCTL_LOAD(0x0, &(0x7f00000001c0)={&(0x7f0000000040)="03", 0x0, &(0x7f00000000c0)})
ioctl$I2C_IOCTL_EXEC(0xffffffffffffffff, 0x80284900, &(0x7f0000000400)={0x5, 0xd8e, &(0x7f0000000240)="e5117e9a843d2dac5bdaf10a36ff9b9801792ca0fe6a4e4e2589f04aa9903686d2ac6f702338a7f0c86c8d1fbda36667e0c804bef453ce98eced20bd8082ab3d8050e01650e795f31a696ccdf23fe80d8353aff719fd7882f3fd6e4d0d33dd3103e2c9af74a4dea3fd3ca99608e2445854d5ef29a4a78fee750cb5d6db2402bde6326255c5ef031b098d6392537e35f3082df8208285b162b735fcce7fcdae56724dccb6c67336962bc15a7fbd39069e2342a33af4a9785bc3480596d52e62a887505f9cf61bbea07853a8d61f2f70c9e8ac990758af08aac5a3b050f11d137f2deba2b13614b086aa0c16689eee", 0xee, &(0x7f0000000340)="bd9aebcca55a9da359c5663fe6215dc6aaca84dff0cdaeb1e2b1bfa575b1ff08eea40a2a0e5bedeb1734adf481486c041b5a92d7668a2f635b9c1cf30d8ae4f401936cb3ca7ea40246cd155d5ec1e9b42ab57f16f685387620d737a44f9d8a51dfd8c2dfe6bca929568839ab32b830d27c383a1fc9c1cbb4ed6f2d8814db372e09b43fb06ff10f8655bad678e203b29101cca220fea48bb23cfc", 0x9a})
syz_usb_connect$hid(0x4, 0x3f, &(0x7f0000000440)={{0x12, 0x1, 0x300, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0x9, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x81, 0x1, 0x3, 0x1, 0x0, 0x7, {0x9, 0x21, 0x3f, 0x5, 0x1, {0x22, 0x6fd}}, {{{0x9, 0x5, 0x81, 0x3, 0x20, 0x5, 0x5, 0x6}}, [{{0x9, 0x5, 0x2, 0x3, 0x8, 0x1, 0x5, 0xfa}}]}}}]}}]}}, &(0x7f0000000640)={0xa, &(0x7f0000000480)={0xa, 0x6, 0x110, 0x5, 0x1f, 0x1, 0x10, 0xfc}, 0x0, 0x0, 0x3, [{0x6c, &(0x7f0000000540)=@string={0x6c, 0x3, "b8b85e7545a49dfc8a9dfe27204b0b0dca9129e0c16cec6ce6f871c860b9dac6ccbaaeb91ec7cfb03a90f1007b6627e241b3f8720ad82ed864553295a96cd8dcbdd70a4b779dcc287e536cd24be41f0ea9ccfc586d19056a0030ed75f19b68c3c9f184378a93e4278d97"}}, {0x34, &(0x7f00000005c0)=@string={0x34, 0x3, "80d7b81c99e07b84a154126947b596ae16ab3c3c754863faabcca95e853aae34fbd6fef263122d26d4449f9cc02497361d4f"}}, {0x4, &(0x7f0000000600)=@lang_id={0x4, 0x3, 0x140a}}]})


r0 = socket(0x18, 0x2, 0x0)
setsockopt(r0, 0x29, 0x25, &(0x7f0000000080)="00a0c7bb", 0x4)


clock_nanosleep(0x60000002, 0x0, &(0x7f0000000000), 0x0)


r0 = posix_spawn(0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
getpriority(0x0, r0)


_ksem_trywait(0x0)
_ksem_wait(0x0)
_ksem_close(0x50535244)
_ksem_close(0x50535244)
_ksem_timedwait(0x0, &(0x7f0000000000)={0x0, 0xfffffffffffff25c})
compat_43_ogethostname(&(0x7f0000000040)=""/219, 0xdb)
ktrace(&(0x7f0000000140)='./file0\x00', 0x0, 0x20, 0xffffffffffffffff)
compat_43_ogethostname(&(0x7f0000000180)=""/81, 0x51)
_ksem_open(&(0x7f0000000200)="f043135dcb496bde9af6594985cfa954d0984d0a4ac8908e32c047d8dc361b4500c41f31eec80418108ffa5fbbebf531e7c9316864720b3175f0e5f71cf5305812f021a2d23b30f7e694e48c020a0e88701dd35e77a494eef4be85fc6675b99055ef248450088eb5c3bede7cb73e23202a17fd6da8f93510", 0x0, 0x38, 0x9, &(0x7f0000000280)=<r0=>0x0)
mkdirat(0xffffffffffffff9c, &(0x7f00000002c0)='./file0\x00', 0x100)
_ksem_close(r0)
_ksem_wait(r0)
compat_30___stat13(&(0x7f0000000300)='./file1\x00', &(0x7f0000000340))
_ksem_open(&(0x7f00000003c0)="ab213742a63d29bed9afd3c97492ced44e95210b2fb45334c0e4524cc073561dd5a2a9d3ae5014f8cbffdaa1d561", 0xa00, 0x4351ebf9aff161b3, 0x8000, &(0x7f0000000400)=<r1=>0x0)
_ksem_trywait(r1)
_ksem_trywait(r0)
_ksem_trywait(0x0)
_ksem_wait(r0)
setsockopt$sock_int(0xffffffffffffff9c, 0xffff, 0x1007, &(0x7f0000000440)=0x101, 0x4)
listen(0xffffffffffffffff, 0x81)


r0 = socket$unix(0x1, 0x1, 0x0)
sendmsg$unix(r0, &(0x7f0000002440)={0x0, 0x0, 0x0, 0x0, &(0x7f0000002300)=[@cred={0x20}, @cred={0x20}, @rights={0x18, 0xffff, 0x1, [0xffffffffffffffff]}, @rights={0x20, 0xffff, 0x1, [0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff]}, @rights={0x30, 0xffff, 0x1, [0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff]}, @cred={0x20}, @cred={0x20}], 0xe8}, 0x209)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendto$unix(r0, &(0x7f00000000c0)="d5364e9ee0365ca7fb533d90db5781a1c3a34c2b76575d6756603c61d256f2461de74cd4c13e18f4b9483bd3307fbdf6bba428c3b8a6a1ff55a195861eaf60d4df07757f39710e42a789edbbf043b9ab9202e0df991664aca49599ddc38c2a731a9ca393c798027c91d26299979eeb67100a8dd1f74c0e4aaf3f679c4f70b008a2204025b7fd5425a130e1eccb315a875f", 0x91, 0x0, 0x0, 0x0)


syz_usb_connect$hid(0x2, 0x3f, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0x9, 0x10, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0xec, {0x9, 0x21, 0x0, 0xef}, {{{0x9, 0x5, 0x81, 0x3, 0x10}}, [{{0x9, 0x5, 0x2, 0x3, 0x0, 0x0, 0x0, 0x7}}]}}}]}}]}}, 0x0)
fchown(0xffffffffffffffff, 0x0, 0xffffffffffffffff)
getuid()


r0 = semget(0x1, 0x0, 0x0)
compat_50_____semctl13$IPC_SET(r0, 0x0, 0x1, &(0x7f00000005c0)=@buf=&(0x7f0000000580)={{}, 0x0, 0x0, 0x0, 0x0})


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff})
bind$unix(r0, &(0x7f00000000c0)=@file={0x1, './file0\x00'}, 0xa)
__lstat50(&(0x7f0000000200)='./file0\x00', &(0x7f0000001300))


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)


r0 = socket(0x2, 0x2, 0x0)
shutdown(r0, 0x1)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
mkdirat(0xffffffffffffff9c, &(0x7f00000000c0)='./file0/file0\x00', 0x0)
compat_40_mount(&(0x7f0000000000)='overlay\x00', &(0x7f0000000380)='./file0\x00', 0x1, &(0x7f00000003c0))
__posix_chown(&(0x7f0000000300)='./file0/file0\x00', 0xffffffffffffffff, 0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
paccept(r0, 0x0, &(0x7f00000000c0), 0x20000000)


symlinkat(&(0x7f0000000100)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000040)='./file1\x00')
rename(&(0x7f0000000080)='./file1\x00', &(0x7f0000000140)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00')
rename(&(0x7f0000000540)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', &(0x7f0000000640)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00')


mkdirat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0)
__mount50(&(0x7f0000000080)='coda\x00', &(0x7f00000000c0)='./file0\x00', 0x0, &(0x7f0000000280)="128e7030c8ec3289f95e62cc63b8c4f274cfe13c806a2ce65e153c180f7c8a98eb2459987fb0254579ab751451eda980016ec08850712173533bf6884fadfc0dedef112a9b2fcca41e6dcc5ee821329ff7aff3d9afc2c520980edfca5a99974048df051b5d9f06b06f23ce556066cca51df9294fc2978fb03cf7498f4c9304a45de7f411cc7a45cf228b242de1c71a515f9f446e42b3121b90ef35ce6c5714226bf58d91a691cea6da8fb7e17dcf7a380b328f3e99ebdb4ca932e3bd99a5970467861ef5925dba186a7ed78e1e49f68b86f0ecbeeee3a81ca4ba2091ae99b9675e561195c348c905c7be81e405d679fffab959b54ad6214cebcfb1dd3e2f5f0198769cecc211a3de6e2d9cd6c24d0b4e4c634e65b9fe48fcf613b6b209c87bedac7c11abdbdb888408b6c549b5b859ce8fd22be0b295f09e197e8e37da588ffb4104a83d83b22f7eef7cfe2b4bd43a7995bc671f8a117eec2119585b363ca889e9a7fd344cfe76fa4501d6c35f6eba9b09f1579318c130ebca3e9dd9892271b5f49a4f6ffd008dfde106c804956bd60adac54573da66591b14700836dccf4ff7b5ae7d297d71efd35636c336077a3f9908ff85e198fabde72868569201968e6ce7116d4b8eefe970c0e58028469c3dc35484c1cee8a8f3694d0045a06908be769f9b93aa85a33a13f12d33b96de8b79d10d7944913981bb661fdfd5545a852b57ced1fd6ff25ed8a30dc5ae1e4c83b3994a081237366230c6561862533c1661e81174b558ac7cdb6b0a34c550d5ffb80094e07c3b00a27c31f26d047cc70c36e60fc0aecb52595b882ed4c62e3df5abcd8185a99f1d09178855a2b382225844d0806ec300cbc9b784b83e6bc14fb2f1d59cae47d9f79e4d410811ae53fea07a6c0628ad7433e06aa973f52eec632a37224b4229fe01f74e039d78e17b19179c3f9ebefe0a2a60b7788e1cff0580e6e47a7a8872cec439187e236011dd09eba88790d1ae3809eaa2c0bf6bdb6f9cd3bac65116130e31e9f25680975ccbb4a419bab48958e482d7715f4712d266883591d15e30ad55d97bfca0327fca011810a22929d683bdbe13ef54518de7a24bfe05ee70c2bb8c1db7197bc1399f3867ccca3c0b29513802d7f8d8b3cefa56f294cec0f6ed53a3933e37170d034fbeec6c75ad71be4cb0b790fee82a10bd51e4670bd28b9b255c2d771818834cb8a9143e0f23275041caa94af00a62230ed852d74d0b1576ba62f54b9918b0135ef848fe8db996a76c05927feed8ad5cbd09d99b89678eb433c6d33dc202925772c47552ee477ccf2a5520f9d635d3703daecec0b0e80610a7af220932c91e13d062c009857efa5ddd42b623dd789b8fa608c8b3ac37ad1a133a4b18a756aa8cf47d96b833046b5815e30701c812c300d0a3892541b60f0bb6ff5a1498c86fda80af6b77a0ecb", 0x401)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000140)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
dup2(r0, r1)
poll(&(0x7f0000000000)=[{r0, 0x8be1d77708c17ae6}], 0x1, 0x0)


open$dir(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
symlinkat(&(0x7f0000000000)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00')


_lwp_getname(0xffffffffffffffff, 0x0, 0x0)


compat_50_setitimer(0x0, &(0x7f0000000540)={{}, {0x0, 0xb66}}, 0x0)
compat_50_setitimer(0x0, &(0x7f0000000700), 0x0)
compat_50_getitimer(0x0, &(0x7f0000000040))


syz_usb_connect$hid(0x1, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x46d, 0xc20e, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x1, 0x0, 0x0, {0x9}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x5}}}}}]}}]}}, &(0x7f0000000240)={0x0, 0x0, 0x0, 0x0, 0x3, [{0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}]})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket$inet(0x2, 0x3, 0x0)
bind$inet(r1, &(0x7f0000000000)={0x2, 0x2}, 0x10)


setreuid(0xee00, 0x0)
r0 = getuid()
setreuid(0xee00, r0)
setuid(0xee00)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
lchflags(&(0x7f00000000c0)='./file0\x00', 0x0)


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r0 = getpid()
ktrace(&(0x7f00000000c0)='./file0\x00', 0x0, 0x40000110, r0)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000180)={<r1=>0xffffffffffffffff})
writev(r1, &(0x7f00000008c0)=[{0x0}], 0x1)


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt(r0, 0x0, 0x4, &(0x7f0000000000)="9d0e6a00", 0x4)


syz_usb_connect(0x0, 0x2d, &(0x7f00000001c0)=ANY=[@ANYBLOB="1201000041435820410e5150e8d5000000010902f98a5c01000000090401"], 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x60e, 0x0)
pwrite(r0, &(0x7f0000000100)="89", 0xfffffff, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r2 = compat_30_socket(0x1f, 0x3, 0x0)
dup2(r0, r2)


r0 = _lwp_self()
_lwp_suspend(r0)
r1 = _lwp_self()
_lwp_suspend(r1)
r2 = _lwp_self()
_lwp_continue(r1)
_lwp_detach(r2)


compat_50_futimes(0xffffffffffffffff, 0x0)


accept(0xffffffffffffff9c, &(0x7f0000000000), &(0x7f0000000040)=0xe)
r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(r0, 0x6, 0x7, &(0x7f0000000040), 0x4)


utimensat(0xffffffffffffffff, 0x0, &(0x7f0000000480)={{}, {0x0, 0x3fffffff}}, 0x0)


__setitimer50(0x0, &(0x7f0000000080)={{}, {0x0, 0x6}}, &(0x7f00000000c0))


setrlimit(0x8, &(0x7f0000000040)={0x4, 0x8680})
pipe2(0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000100)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fdatasync(0xffffffffffffffff)


_lwp_detach(0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000000100)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
r1 = __clone(0x0, 0x0)
ptrace(0x9, r1, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, &(0x7f0000000040))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000100)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
compat_43_orecvfrom(r1, &(0x7f0000000380)=""/90, 0x5a, 0x0, 0x0, 0x0)
sendmsg$unix(r0, &(0x7f0000000080)={0x0, 0x0, 0x0}, 0x8)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000005c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
getsockopt$sock_timeval(r0, 0xffff, 0x100c, 0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x800080002002, 0x3d00)
close(0xffffffffffffffff)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104302, &(0x7f0000000040))


linkat(0xffffffffffffffff, 0x0, 0xffffffffffffffff, 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
shmget$private(0x0, 0x400000, 0x0, &(0x7f0000c00000/0x400000)=nil)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendto$unix(r0, 0x0, 0x0, 0x0, 0x0, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r2, 0x0, 0x0)
recvmmsg(r1, &(0x7f0000000500)={&(0x7f0000000000)={0x0, 0x0, 0x0, 0x0, 0x0}}, 0x10, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
r1 = getpid()
r2 = compat_30_socket(0x11, 0x3, 0x0)
r3 = dup(r2)
fktrace(r3, 0x0, 0x4, r1)
__nanosleep50(&(0x7f0000000800), 0x0)


getgid()


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x6131, 0x4000e03)
r0 = openat(0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0x10, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
dup2(r1, r0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
readlink(0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
ioctl$FIOASYNC(r1, 0x8004667d, &(0x7f0000000200))


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt(r0, 0x0, 0x5, &(0x7f0000000000)="9d0e6a00", 0x4)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
mknod(&(0x7f0000000000)='./file0\x00', 0x2080002007, 0x0)
fstatat(r0, &(0x7f0000000100)='./file0\x00', &(0x7f0000000140), 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x80104301, &(0x7f0000000100))


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x404)
r0 = open$dir(&(0x7f0000000100)='./file0\x00', 0x2, 0x0)
r1 = dup(r0)
pwrite(r1, 0x0, 0x0, 0x0)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r1, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r2 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r3 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r3, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)


r0 = socket$inet(0x2, 0x3, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f0000000240)="892ce63374f2b34aa2f534cfe09099aff2526dc6ad1502b8cc21739e5ca051569b2dab5f32e32a93e55039be", 0x2c)
bind$inet(0xffffffffffffffff, &(0x7f0000000000), 0x10)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
getsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f0000001280)=""/4096, &(0x7f0000000000)=0x1000)


pipe(&(0x7f00000003c0)={<r0=>0xffffffffffffffff})
r1 = getpid()
r2 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(r0, 0x0, 0x4, r1)
_lwp_ctl(0x0, 0x0)


compat_43_osetrlimit(0x4, &(0x7f0000000040))


preadv(0xffffffffffffffff, &(0x7f0000000480)=[{0x0}, {0x0}, {&(0x7f0000000280)=""/76, 0x4c}, {&(0x7f0000000300)=""/166, 0xa6}], 0x4, 0x0)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
sendmmsg(r0, &(0x7f0000000340)={0x0}, 0x10, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x2000a71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
__fstat50(r0, &(0x7f00000000c0))


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x1}}]}}]}}, &(0x7f0000000600)={0x0, 0x0, 0x0, 0x0, 0x2, [{0x0, 0x0}, {0x0, 0x0}]})


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x19, &(0x7f0000000040)="eaff125c00000000", 0x8)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x1, r0, 0x0, 0x0)


syz_emit_ethernet(0xfdef, &(0x7f0000000140))


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x41c5)
r0 = open(&(0x7f0000000240)='./file0\x00', 0x5, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80047476, &(0x7f0000000100))


recvmmsg(0xffffffffffffffff, 0x0, 0x0, 0x0, 0xfffffffffffffffe)


r0 = socket$unix(0x1, 0x2, 0x0)
sendmsg$unix(r0, &(0x7f00000002c0)={&(0x7f0000000040)=@file={0xa}, 0xa, &(0x7f0000000280)=[{0x0}, {0x0}, {0x0}], 0x3}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
preadv(0xffffffffffffffff, &(0x7f0000001200)=[{0x0}], 0x1, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000100)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
bind(r0, &(0x7f0000000000)=ANY=[@ANYBLOB="2d012e2f66696c6530"], 0xa)
ktrace(&(0x7f0000000000)='./file0\x00', 0x0, 0x0, 0x0)


pipe(&(0x7f00000014c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
setgroups(0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
_lwp_wait(0x0, 0x0)
_lwp_exit()


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendto(r0, &(0x7f0000000280)='\x00', 0x1, 0x0, 0x0, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
recvmsg(r1, &(0x7f0000000800)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
__getvfsstat90(&(0x7f0000000800), 0xce0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fchmod(0xffffffffffffff9c, 0x0)


mkdir(&(0x7f0000000140)='./file0\x00', 0x0)
open$dir(&(0x7f0000000080)='./file0/../file0\x00', 0x0, 0x0)
symlinkat(&(0x7f00000000c0)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000040)='./file0/../file0\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000180)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = socket(0x18, 0x2, 0x0)
ioctl$FIOASYNC(r3, 0x8004667d, &(0x7f0000000040))


syz_usb_connect$hid(0x0, 0x0, 0x0, &(0x7f0000000540)={0x0, 0x0, 0x5, &(0x7f0000000080)={0x5, 0xf, 0x5}})
r0 = socket(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x7, &(0x7f0000000080), 0x4)


mprotect(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0xe692649ba17ef536)


mlock(&(0x7f0000ffb000/0x4000)=nil, 0x4000)
munmap(&(0x7f0000ffe000/0x2000)=nil, 0x2000)
mmap(&(0x7f0000ffe000/0x2000)=nil, 0x2000, 0x0, 0x1012, 0xffffffffffffffff, 0x0, 0x0)


compat_43_osetrlimit(0x0, &(0x7f0000000040)={0x3de})


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000200)='nfs\x00', &(0x7f0000000080)='./file0\x00', 0x0, 0x0, 0x0)


pipe(0x0)
syz_usb_connect$hid(0x0, 0x3f, &(0x7f0000001780)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0x2, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9, 0x21, 0x0, 0xb8}, {{{0x9, 0x5, 0x81, 0x3, 0x400, 0x7}}, [{{0x9, 0x5, 0x2, 0x3, 0x20, 0x2, 0x5f, 0x39}}]}}}]}}]}}, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="62020207e0"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)


r0 = socket(0x11, 0x3, 0x0)
write(r0, &(0x7f0000000080)="c6814203bae3b883e82557e24b5f9c5a982203b9c6ffa0c871fb369412caaf6486be1556dcbd75015f1330eebbf34d559502f2e49cb20e178576fa6593c31939209a8dc60fc3b8adfdc09e9c9093053431486d973876afcff9b817d18ca1780fc901a50b22defd7cc39eec529ab733e45977fd078edb41ac", 0x78)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f00000011c0)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
__utimes50(&(0x7f0000000040)='./file0\x00', 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='lfs\x00', &(0x7f00000000c0)='./file0\x00', 0x0, &(0x7f0000000040), 0x0)


mknodat(0xffffffffffffff9c, &(0x7f0000000100)='./file0\x00', 0x4f1be3c1ac9f79c5, 0x0)
symlinkat(&(0x7f00000002c0)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000300)='./file1\x00')
__posix_chown(&(0x7f00000000c0)='./file1\x00', 0x0, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f00000004c0)='union\x00', &(0x7f0000000500)='./file0\x00', 0x0, &(0x7f0000000540), 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000340)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
__posix_rename(&(0x7f0000000080)='./file0/file0\x00', &(0x7f00000000c0)='./file1\x00')


symlinkat(&(0x7f0000000280)='./file0\x00', 0xffffffffffffff9c, &(0x7f00000003c0)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00')
mkdirat(0xffffffffffffff9c, &(0x7f0000000180)='./file0\x00', 0x0)
linkat(0xffffffffffffff9c, &(0x7f0000000000)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000000c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
setsockopt$sock_int(r0, 0xffff, 0x8, &(0x7f0000000000), 0x61)


r0 = compat_30_socket(0x1f, 0x3, 0x0)
recvmmsg(r0, &(0x7f0000001940)={0x0}, 0x10, 0x0, &(0x7f0000001980)={0x7fffffffffffffff})


compat_43_osendmsg(0xffffffffffffffff, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x5606)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0xc004667a, &(0x7f0000000140))


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x802c7415, &(0x7f0000000040))


__clock_settime50(0x0, &(0x7f0000000000)={0x1, 0x3b1aca02})


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x0)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = dup2(r0, r0)
ioctl$FIOSETOWN(r1, 0x40043105, &(0x7f0000000000))


pipe(&(0x7f0000000240)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x529478f58bc034ca, r1)
_ksem_unlink(0x0)


r0 = socket(0x18, 0x2, 0x0)
getsockopt$inet_opts(r0, 0x11, 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_orecvmsg(r0, &(0x7f00000003c0)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)


r0 = semget(0x1, 0x0, 0x0)
____semctl50$IPC_STAT(r0, 0x0, 0x2, &(0x7f0000000500)=@array=&(0x7f00000004c0))
r1 = semget$private(0x0, 0x4, 0x600)
semop(r1, &(0x7f0000000180)=[{0x3, 0x3, 0x1000}], 0x1)
semget$private(0x0, 0x4, 0x20)
r2 = getuid()
compat_50___msgctl13$IPC_SET(0xffffffffffffffff, 0x1, &(0x7f0000000440)={{0x3d, r2, 0x0, r2, 0x0, 0x0, 0x8001}, 0x8001, 0x0, 0x5, 0x0, 0xfffffffffffffc91, 0x63a, 0x0, &(0x7f0000000380)={&(0x7f0000000340)={&(0x7f0000000300)={0x0, 0x0, 0x1f, 0x1ff}, 0x6, 0x2, 0x5}, 0x7, 0x81, 0x7}, &(0x7f0000000400)={&(0x7f00000003c0)={0x0, 0x0, 0xe3}, 0x3ff}})
r3 = msgget$private(0x0, 0x2)
msgsnd(r3, &(0x7f0000000500)={0x3, "c5e94674dbd3604cde5c3557da6461341b1f80b87c9e9c0a41a1995045af2238befbac102b3895e0140869a1845fe194ac26f999f83cbdb241ab396b30c0d5d524ba717d8e5c5e2f873559783e450b4d87abe348cb20ebf83ab0ce789d687b03d617130b6a"}, 0x6d, 0x0)
msgsnd(r3, &(0x7f0000000600)=ANY=[@ANYRESHEX], 0xb, 0x800)
msgrcv(r3, &(0x7f00000002c0)=ANY=[@ANYBLOB="00000000000000000000000000004adc000000000000000004000000000000000000000000000000000000000000000000000000000000000000005db3d275bdca7b70f1a856cb358995279cc6b2ef38d9d705ea1145095bf2852a7597f26e5233fcf0706172af4a1826"], 0x3b, 0x0, 0x1000)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0, 0x2e}], 0x1, 0x0)
__futimes50(0xffffffffffffffff, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
open$dir(&(0x7f0000000000)='./file0\x00', 0x2a0, 0x0)


chroot(&(0x7f0000000000)='./file0\x00')
r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0xb, r0, &(0x7f0000000080), 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
chroot(&(0x7f0000000040)='./file0\x00')
__getvfsstat90(&(0x7f0000001bc0), 0xce0, 0x0)


shmat(0xffffffffffffffff, &(0x7f0000ffd000/0x3000)=nil, 0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
sendto$inet(r0, 0x0, 0x0, 0x0, &(0x7f0000000100)={0x10, 0x2}, 0x10)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mlock(&(0x7f0000c9e000/0x3000)=nil, 0x3000)
__clone(0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000340)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
pathconf(&(0x7f0000000040)='./file0\x00', 0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
r1 = open(&(0x7f0000000080)='./file0\x00', 0x60e, 0x0)
write(r1, &(0x7f00000000c0)='c', 0x1)
mmap(&(0x7f0000000000/0x13000)=nil, 0x13000, 0x5, 0x10, r1, 0x0, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x200000000000b, &(0x7f0000000000)='\x00', 0x1)


r0 = dup2(0xffffffffffffff9c, 0xffffffffffffffff)
symlinkat(0x0, r0, &(0x7f0000000040)='./file0\x00')
pipe(&(0x7f0000000140)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
dup2(0xffffffffffffffff, 0xffffffffffffffff)
syz_usb_connect$printer(0x0, 0x36, &(0x7f0000001600)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x40, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x0, "", {{}, [{{0x9, 0x5, 0x82, 0x2, 0x0, 0x81}}]}}}]}}]}}, 0x0)
syz_usb_connect$printer(0x6, 0x2d, &(0x7f00000019c0)={{0x12, 0x1, 0x200, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x60, 0x10, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x7, 0x1, 0x0, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x0, 0x8, 0x0, 0x1}}}}}]}}]}}, 0x0)
connect$unix(r1, &(0x7f0000001cc0)=@abs={0x0, 0x0, 0x2}, 0x8)
setsockopt$inet_opts(r0, 0x0, 0x1, 0x0, 0x0)
socketpair(0x23, 0x20000000, 0x41, 0x0)
socket(0x2, 0x3, 0xb5)


shmdt(0x0)


__stat50(&(0x7f0000001640)='./file0\x00', 0x0)


r0 = _lwp_self()
_lwp_setname(r0, &(0x7f0000000040)=':(\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
preadv(0xffffffffffffffff, 0x0, 0x0, 0x0)
fcntl$lock(r1, 0x8, &(0x7f0000000240))


open$dir(&(0x7f0000000100)='./file1\x00', 0x200, 0x0)
lchflags(&(0x7f0000000080)='./file1\x00', 0x40400)
lchflags(&(0x7f00000000c0)='./file1\x00', 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
truncate(&(0x7f0000000200)='./file0\x00', 0x0, 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0xb4)
rename(&(0x7f0000000200)='./file0\x00', &(0x7f0000000240)='./file1\x00')


compat_43_fstat43(0xffffffffffffffff, &(0x7f0000000000))
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
fchdir(r1)
r2 = compat_30_socket(0x10, 0x40000006, 0x0)
sendto(r2, &(0x7f00000000c0), 0x0, 0x0, &(0x7f0000000100), 0xe)
compat_43_orecv(r0, &(0x7f0000000140)=""/51, 0x33, 0x0)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000180)={<r3=>0xffffffffffffffff})
compat_43_ogetsockname(r3, &(0x7f00000001c0)=""/135, &(0x7f0000000280)=0x87)
socket(0x1, 0x0, 0x1)
syz_usb_connect$hid(0x2, 0x36, &(0x7f0000000480)={{0x12, 0x1, 0x200, 0x0, 0x0, 0x0, 0xff, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0xe0, 0x1f, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x1, 0x0, 0x0, {0x9, 0x21, 0x4, 0x89}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x0, 0x20}}}}}]}}]}}, &(0x7f0000000600)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x4, &(0x7f00000005c0)=@lang_id={0x4}}]})
fchmod(0xffffffffffffffff, 0x0)
compat_43_oaccept(0xffffffffffffffff, 0x0, 0x0)
__posix_fchown(&(0x7f0000000940)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fcntl$lock(0xffffffffffffffff, 0x7, &(0x7f0000000140)={0x0, 0x0, 0x0, 0x0, 0xffffffffffffffff})


mknod(&(0x7f0000000040)='./file2\x00', 0x2000, 0x7)
open(&(0x7f0000000080)='./file2\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x18, 0x2, 0x0)
shutdown(r1, 0x1)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendto(r0, 0x0, 0x0, 0x60e, 0x0, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(0xffffffffffffffff, 0x0, 0x0, &(0x7f0000000000)="077a8cde4d47e05c307bafa768f88de3d2585114d148741618e6a669d74b9e87fccee9d389d2d2036ce32f87cc388bca61c57b3ad127eeee5a67f9ea0f3f09d48e", 0x41)
setsockopt$inet_opts(r0, 0x6, 0x2, &(0x7f0000000040), 0x4)


r0 = compat_30_socket(0x1f, 0x20000001, 0x0)
getsockopt(r0, 0x3, 0x2, 0x0, 0x0)


r0 = socket(0x2, 0x2, 0x0)
sendmsg$unix(r0, &(0x7f0000000100)={&(0x7f0000000040)=@abs={0x0, 0x0, 0x2}, 0x8, 0x0, 0x0, &(0x7f00000000c0), 0x28}, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x4010647f, &(0x7f0000000040))


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x19, &(0x7f0000000040)="eaff125c0a000000", 0x8)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0x0)
__posix_rename(&(0x7f00000000c0)='./file0\x00', &(0x7f0000000100)='./file0\x00')
open(0x0, 0x0, 0x0)


mknodat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0xe06a, 0x0)
mkdir(&(0x7f0000000040)='./file0\x00', 0x0)


r0 = socket$inet6(0x18, 0x10000001, 0x0)
bind$inet6(r0, &(0x7f0000000600)={0x18, 0x3}, 0xc)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
linkat(0xffffffffffffffff, 0x0, 0xffffffffffffffff, 0x0, 0x0)


r0 = semget(0x1, 0x3, 0x208)
semctl$GETALL(r0, 0x0, 0x6, 0xfffffffffffffffe)
semctl$SETALL(0x0, 0x0, 0x9, &(0x7f0000000180)=[0x6, 0x2004, 0x0])
semop(0x0, &(0x7f0000000200)=[{0x4, 0x0, 0x1800}, {0x2, 0x9, 0x1000}], 0x2)
semctl$SETVAL(0x0, 0x81eb0a6141f965d5, 0x8, &(0x7f0000000000)=0x73af)
semop(0x0, &(0x7f0000000080)=[{0x3, 0x4}], 0x1)
r1 = getuid()
semctl$IPC_SET(0x0, 0x0, 0x1, &(0x7f0000000100)={{0xfffffffe, 0x0, 0x0, r1, 0xffffffffffffffff, 0x1d8, 0x7ba0}, 0x7, 0x2, 0x6})
r2 = semget$private(0x0, 0x2, 0x582)
semop(r2, &(0x7f0000000380), 0x0)
semop(r2, &(0x7f00000000c0)=[{0x1, 0x5, 0x1000}], 0x1)
semctl$SETVAL(r2, 0x3, 0x8, &(0x7f0000000280)=0x3)
semctl$SETALL(r2, 0x0, 0x9, &(0x7f0000000180)=[0x6, 0x2004, 0x0])
semop(r2, &(0x7f00000001c0)=[{0x2, 0x80, 0x1800}, {0x3, 0x9, 0x1000}, {0x1, 0x80, 0x1800}], 0x3)
semctl$SETVAL(r2, 0x81eb0a6141f965d5, 0x8, &(0x7f0000000000)=0x73af)
semop(r2, &(0x7f0000000080)=[{0x3, 0x4}], 0x1)
r3 = getuid()
semctl$IPC_SET(r2, 0x0, 0x1, &(0x7f0000000100)={{0xfffffffe, 0x0, 0x0, r3, 0x0, 0x1d8, 0x7ba0}, 0x2000007ff, 0x6, 0x6})
r4 = getegid()
semctl$IPC_SET(0x0, 0x0, 0x1, &(0x7f0000000000)={{0x3f, r1, 0xffffffffffffffff, r3, r4, 0x0, 0x5}, 0x4c, 0x7fffffffffffffff, 0x6})


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
compat_40_mount(&(0x7f00000002c0)='lfs\x00', &(0x7f0000000300)='./file0\x00', 0x0, &(0x7f0000000340))


mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x2903)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x511, 0x0)
ioctl$FIOSEEKHOLE(r0, 0x8004646d, &(0x7f0000000040))


r0 = socket(0x2, 0x1, 0x0)
r1 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
pwritev(r1, &(0x7f0000000140)=[{&(0x7f00000000c0)="4402", 0x2}], 0x1, 0x0)
socket(0x0, 0x0, 0x0)
write(r1, &(0x7f0000000300)="251486", 0x3)
mmap(&(0x7f0000000000/0x13000)=nil, 0x13000, 0x5, 0x10, r1, 0x0, 0x0)
connect$unix(r0, &(0x7f0000000000)=@file={0x0, './file0/file0\x00'}, 0x10)
shutdown(r0, 0x1)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000003c40)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmmsg(r0, 0x0, 0x0, 0x0, 0x0)


openat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x1066a, 0x0)
link(&(0x7f0000000000)='./file0\x00', 0x0)


mkdir(&(0x7f0000000000)='./file1\x00', 0x0)
unlink(&(0x7f0000000380)='./file0\x00')
unlink(&(0x7f0000001200)='./file0\x00')
mkdir(&(0x7f0000000040)='./file0\x00', 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
mknodat(0xffffffffffffffff, &(0x7f0000000040)='./file0\x00', 0x0, 0x0)


madvise(&(0x7f0000ffd000/0x2000)=nil, 0x2000, 0x2)
mmap(&(0x7f0000ffc000/0x2000)=nil, 0x2000, 0xd49f275d97cc01bb, 0x1810, 0xffffffffffffffff, 0x0, 0x0)


r0 = socket(0x18, 0x2, 0x0)
setsockopt(r0, 0x29, 0x16, &(0x7f0000000080)="2e32dd00", 0x4)


r0 = socket$unix(0x1, 0x2, 0x0)
fcntl$setown(r0, 0x6, 0xffffffffffffffff)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r3, 0xc0104277, &(0x7f00000000c0))


socket$unix(0x1, 0x5, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSMOUSEIO_SETREPEAT(r0, 0x8010572a, &(0x7f0000000000)={0x2})


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x1a, r0, &(0x7f0000000000), 0x9)


r0 = socket(0x18, 0x2, 0x0)
recvfrom(r0, 0x0, 0x0, 0x801, 0x0, 0x0)


faccessat(0xffffffffffffffff, 0x0, 0x0, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x1000, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
pwrite(r0, 0x0, 0x0, 0x0)


r0 = socket(0x18, 0x1, 0x0)
r1 = socket(0x18, 0x2, 0x0)
setsockopt(r1, 0x1000000000029, 0x4, &(0x7f0000000000)="06000000", 0x4)
r2 = dup2(r1, r0)
sendmsg$unix(0xffffffffffffffff, &(0x7f0000001700)={&(0x7f0000000900)=ANY=[@ANYBLOB="fb18ff"], 0x1c, 0x0}, 0x0)
sendmsg$unix(r2, &(0x7f0000000040)={&(0x7f0000000900)=ANY=[], 0x1c, 0x0}, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x1c4)
open$dir(&(0x7f0000000040)='./file1\x00', 0x200, 0x0)
link(&(0x7f0000000080)='./file1\x00', &(0x7f0000000280)='./file0/../file0/file0\x00')


pipe(0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x1b, &(0x7f0000000040)='\x00', 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x2, 0x2, 0x0)
getsockopt(r1, 0x0, 0x14, &(0x7f0000000000)=""/25, &(0x7f0000000040)=0x19)


r0 = compat_30_socket(0x1f, 0x5, 0x0)
compat_43_fstat43(r0, &(0x7f0000000100))


pipe2(&(0x7f0000001340)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
readv(r0, &(0x7f00000012c0)=[{&(0x7f00000000c0)=""/4096, 0x1000}], 0x1)
r2 = fcntl$dupfd(r0, 0x0, r1)
poll(&(0x7f00000010c0)=[{r2, 0x40}, {r0}], 0x2, 0x0)
write(r1, &(0x7f0000001380)="8e16714487708ed40b08d1141741cdbc4697a153ab979508bc19c6a340890e46d134e87211f5b2c1a73229df6eed37dd773685b7b3cf879b216dcb83fd72e8813a168ed697ba596735694f60d92c9e5a1f5ea1290000001023482a1d18c54c5a961f717f158d0c5377dc30dad70d0904e6fa7a557d99346453027802002761be09fc230dcfbf4000e5594f373c5aa88e45f41c840a3cd38999857a7815edc2a5c6b8f384ef7e5b9e466ae0286d821c50d16cdf873d048a91406784ee011c12e4c2566f078d4a9483d9f9afd3c0888356a0cf065b34a1c650134b73c19386c07124a82105f5976e157402ddc1452f57a25ed462ec8cb040ec21c4503471014e5010129174498aa3c2b72b718ae5713e41462feaef644c74ba08fc17229c06bf690f48e4d98ca6b79dfb5e0793d6add5d263087dc66668690424a8fc139881419dd8e3bfec7c23c920988b630ab13012aa151695821d8d8cead4bfae15bad985bb49589b6eab9c37020e7aff000000000000000000", 0xfffffeee)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff})
sendmsg(r0, &(0x7f00000001c0)={0x0, 0x0, 0x0, 0x0, &(0x7f0000002a80)=[{0x18, 0x1, 0x1, "e4d5c698"}], 0x18}, 0x0)


r0 = msgget$private(0x0, 0x0)
msgctl$IPC_SET(r0, 0x1, &(0x7f0000000040)={{0x3, 0xffffffffffffffff, 0xee00, 0x0, 0xee00}})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000180)={<r1=>0xffffffffffffffff})
sendmsg$unix(r1, &(0x7f0000000780)={0x0, 0x0, &(0x7f0000000600)=[{0x0}], 0x1}, 0x0)


compat_30_fhopen(&(0x7f00000003c0)={{}, {0x0, 0x0, "244dea333241f44844076e09e8f71d25"}}, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
r2 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(r0, 0x0, 0x4, r1)
ptrace(0x0, 0x0, 0x0, 0x0)


modctl$MODCTL_LOAD(0x0, &(0x7f0000000080)={&(0x7f0000000000), 0x0, &(0x7f0000000040)='<!', 0x2})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
truncate(0x0, 0x0, 0xfffffffffffeffff)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
sendto(r0, 0x0, 0x0, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r1 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r1, 0xc010570d, &(0x7f0000000000))


compat_50_quotactl(&(0x7f0000000000)='.\x00', 0x40000, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f00000003c0)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = posix_spawn(0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
fktrace(r1, 0x0, 0x80, r2)


r0 = socket(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x9, &(0x7f0000000080), 0x4)


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
pathconf(&(0x7f0000000000)='./file0\x00', 0x10)


pipe(&(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
getpeername$inet6(r0, 0x0, 0x0)


__msync13(&(0x7f0000ffe000/0x2000)=nil, 0x3, 0x1)


__clone(0x0, &(0x7f0000000000))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = socket(0x18, 0x3, 0x0)
sendto$inet(r3, 0x0, 0x0, 0x1, &(0x7f00000000c0)={0x2, 0x1}, 0xc)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
compat_50_select(0x0, 0x0, 0x0, 0x0, &(0x7f0000000800))


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_43_ogethostname(0x0, 0x0)


r0 = _lwp_self()
_lwp_kill(r0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$FIOASYNC(r0, 0x8030647e, &(0x7f0000000040))


compat_50_____semctl13$GETALL(0x0, 0x0, 0x6, &(0x7f0000000180)=@buf=&(0x7f0000000140)={{0x0, 0xffffffffffffffff, 0x40}, 0x0, 0x0, 0x0, 0x0})
mkdir(&(0x7f0000000240)='./file0\x00', 0x0)
compat_40_mount(&(0x7f00000000c0)='puffs\x00', &(0x7f0000000100)='./file0/../file0\x00', 0x0, &(0x7f0000000140)="1e")


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r0, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0x400004000011830a, 0x0)
write(r1, &(0x7f0000000780)='\b', 0x1)
compat_43_ocreat(&(0x7f00000000c0)='./file0\x00', 0x0)


lchflags(&(0x7f0000000280)='./file0\x00', 0x0)
mknod(&(0x7f00000014c0)='./file0\x00', 0x1000, 0x0)


getpriority(0x1, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000000c0)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
close(r1)
sendmsg$unix(r0, &(0x7f00000008c0)={0x0, 0x0, 0x0}, 0x404)


syz_usb_connect(0x0, 0x2f, &(0x7f0000000000)={{0x12, 0x1, 0x201, 0xfc, 0x5, 0xe0, 0x10, 0xe41, 0x4156, 0xbd63, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1d, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x19, 0xd6, 0x9d, 0x0, [], [{{0x9, 0x5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, [@generic={0x2, 0x2}]}}]}}]}}]}}, &(0x7f0000000680)={0x0, 0x0, 0x5, &(0x7f0000000440)={0x5, 0xf, 0x5}, 0x1, [{0x0, 0x0}]})


mlockall(0x3)
mprotect(&(0x7f0000ffb000/0x3000)=nil, 0x3000, 0x0)
mprotect(&(0x7f0000ffa000/0x3000)=nil, 0x3000, 0x1)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
lseek(r2, 0x0, 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0)
__mount50(&(0x7f00000002c0)='ptyfs\x00', &(0x7f0000000040)='./file0\x00', 0x0, &(0x7f0000000340), 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_30_getdents(r0, 0x0, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0xc0104308, &(0x7f0000000140)=0x640000)


syz_emit_ethernet(0x2e, &(0x7f0000000040))


__getrusage50(0xffffffffffffffff, &(0x7f0000000240))
__getrusage50(0x0, 0x0)
__getrusage50(0xffffffffffffffff, 0x0)
__getrusage50(0x0, &(0x7f00000000c0))


r0 = compat_30_socket(0x1f, 0x5, 0x0)
sendmsg(r0, &(0x7f0000000340)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000440)=[{0x10}], 0x10}, 0x407)


compat_43_ocreat(&(0x7f0000000500)='./file0\x00', 0x0)
compat_43_stat43(&(0x7f0000000540)='./file0\x00', 0x0)
truncate(&(0x7f0000000640)='./file0\x00', 0x0, 0x3)
mknod(&(0x7f0000000680)='./file0\x00', 0x0, 0x0)
r0 = compat_30_socket(0x1f, 0x3, 0x1)
recvfrom$inet6(r0, 0x0, 0x0, 0x1, 0x0, 0x0)
compat_20_statfs(&(0x7f0000001840)='./file0\x00', &(0x7f0000001880))


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x200000000000c, &(0x7f00000000c0)="eaff115c00000000", 0x8)
setsockopt$inet_opts(r0, 0x0, 0x200000000000c, &(0x7f0000000000)="ea00005c00000000", 0x8)
setsockopt$inet_opts(r0, 0x0, 0x200000000000d, &(0x7f0000000040)="ea00005c00000000", 0x8)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(0x0)
socketpair(0x23, 0x0, 0x3, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$FIOASYNC(r0, 0x41986472, 0x0)


compat_50___shmctl13$IPC_SET(0x0, 0x1, &(0x7f0000000140)={{0x0, 0xffffffffffffffff, 0x0, 0xffffffffffffffff}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x31, 0x0, 0x0)
getsockopt(r0, 0x29, 0x3d, 0x0, 0x0)


r0 = fork()
r1 = getpgid(0x0)
setpgid(r0, r1)


__select50(0x2, &(0x7f00000001c0)={0x4}, &(0x7f0000000080)={0x6}, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000027c0)={<r0=>0xffffffffffffffff})
getsockopt$sock_timeval(r0, 0xffff, 0x100b, &(0x7f0000002800), &(0x7f0000002840)=0x10)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
pipe(&(0x7f0000001500)={<r3=>0xffffffffffffffff})
fcntl$lock(r3, 0x90010000, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(0xffffffffffffffff, 0x0, 0x0, 0x0)
r3 = compat_30_socket(0x1d, 0x3, 0x0)
ioctl$OFIOGETBMAP(r3, 0xc004667a, &(0x7f0000000000))


setrlimit(0x8, &(0x7f0000001580)={0x4, 0x4})
socketpair$unix(0x1, 0x2, 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f0000000140)='./file0\x00', 0x0)
mkdirat(r0, &(0x7f0000000000)='./file1\x00', 0x0)
r1 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
r2 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
renameat(r1, &(0x7f00000002c0)='./file1\x00', r2, &(0x7f0000000400)='./file0/file0\x00')


r0 = shmget$private(0x0, 0x3000, 0x0, &(0x7f0000ffc000/0x3000)=nil)
compat_50___shmctl13$IPC_STAT(r0, 0x2, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)


acct(&(0x7f0000000700)='./file0/file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00')


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x400051c3)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{&(0x7f0000000200)=""/112, 0x70}], 0x1, 0x0)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
r1 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
pwritev(r0, &(0x7f0000000080)=[{&(0x7f00000006c0), 0xf0f75}], 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
symlink(&(0x7f0000000080)='.\x00', &(0x7f0000000000)='./file0\x00')
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_40_mount(&(0x7f0000000000)='overlay\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
__utimes50(&(0x7f00000000c0)='./file0\x00', 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202d7"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = socket(0x2, 0x1, 0x0)
bind(r1, &(0x7f0000000000), 0x10)


r0 = open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
compat_43_olseek(r0, 0x0, 0x0)


_lwp_unpark_all(0x0, 0x0, 0x0)


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000002780)={{0x12, 0x1, 0x201, 0x2, 0x0, 0x0, 0x40, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{{0x9, 0x5, 0x82, 0x2, 0x200}}}}}}}]}}, &(0x7f0000002c00)={0x0, 0x0, 0x23, &(0x7f0000002880)={0x5, 0xf, 0x23, 0x2, [@generic={0xa, 0x10, 0x3, "50db557dd94bbe"}, @ssp_cap={0x14, 0x10, 0xa, 0x0, 0x2, 0x7ff, 0x0, 0x0, [0x0, 0x0]}]}})


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__getlogin(&(0x7f0000000000)=""/222, 0xde)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
open$dir(&(0x7f0000001540)='./file0\x00', 0x71d2cde9fcc2134c, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000005c0))
compat_50_select(0x40, &(0x7f0000000100)={0x9}, 0x0, 0x0, 0x0)
compat_50_select(0x40, &(0x7f0000000080)={0x8}, &(0x7f00000000c0)={0x1}, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_quotactl(0x0, 0x0, 0x0, 0x0)


rasctl(0x0, 0x7, 0x0)
rasctl(&(0x7f0000000000), 0x8000, 0x0)
fork()


socketpair(0xa, 0x0, 0x425, &(0x7f0000000040))


r0 = socket(0x18, 0x1, 0x0)
getsockopt(r0, 0x29, 0xe, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580), 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
compat_30___fstat13(r0, &(0x7f0000000100))


rasctl(&(0x7f0000000000), 0x7fff, 0x0)
rasctl(0x0, 0xedd, 0x0)
rasctl(&(0x7f0000000180), 0x2, 0x0)


socketpair(0x18, 0x2, 0x0, 0x0)


open$dir(&(0x7f0000000300)='./file0\x00', 0x200, 0x0)
fchownat(0xffffffffffffff9c, &(0x7f0000000380)='./file0\x00', 0x0, 0x0, 0x2000)


pipe2(&(0x7f0000000080)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
fchmodat(r0, 0x0, 0x0, 0xa00)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x41c5)
r0 = open(&(0x7f0000000240)='./file0\x00', 0x5, 0x0)
ioctl$OFIOGETBMAP(r0, 0x20007461, 0x0)


socket$inet6(0x18, 0x3, 0x0)


r0 = getppid()
getsid(r0)


mknod(&(0x7f0000000000)='./file0\x00', 0xb002, 0x0)
r0 = open$dir(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
ktrace(&(0x7f0000000040)='./file0\x00', 0x0, 0x0, 0x0)
r1 = fcntl$dupfd(r0, 0x0, r0)
readv(r1, &(0x7f0000000140)=[{&(0x7f00000000c0)=""/107, 0x6b}], 0x1)


symlinkat(&(0x7f00000001c0)='./file1\x00', 0xffffffffffffff9c, &(0x7f0000000200)='./file0\x00')
unlinkat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
shutdown(r0, 0x1)


socketpair(0x2, 0x2, 0x0, 0x0)


syz_usb_connect$cdc_ncm(0x0, 0x0, 0x0, 0x0)
syz_usb_connect$uac1(0x0, 0xa4, &(0x7f0000001240)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x92, 0x3, 0x1, 0x0, 0x20, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{0xa, 0x24, 0x1, 0x0, 0x9}, [@processing_unit={0xd, 0x24, 0x7, 0x4, 0x0, 0x0, "8998532560b2"}, @feature_unit={0x11, 0x24, 0x6, 0x2, 0x1, 0x5, [0x1, 0xa, 0x5, 0x6, 0xd], 0x2}, @processing_unit={0x7, 0x24, 0x7, 0x0, 0x0, 0x2}, @extension_unit={0x7}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@as_header={0x7}]}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x4, 0x0, 0x40, {0x7, 0x25, 0x1, 0x0, 0x6}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x200, 0x1f, 0x0, 0x0, {0x7, 0x25, 0x1, 0x82, 0x0, 0x4}}}}}}}]}}, &(0x7f0000001680)={0xa, &(0x7f0000001340)={0xa, 0x6, 0x200, 0x4, 0x1f, 0x80, 0xff}, 0xf, &(0x7f0000001380)={0x5, 0xf, 0xf, 0x1, [@ss_cap={0xa, 0x10, 0x3, 0x0, 0xc, 0x3f, 0x1, 0x5}]}, 0x5, [{0x4, &(0x7f00000013c0)=@lang_id={0x4, 0x3, 0x437}}, {0x26, &(0x7f0000001400)=@string={0x26, 0x3, "3d1fc518f34c89fb07508d7e5d298255f288502e2c952415580cbd318a2d2224569fd7ff"}}, {0x2, &(0x7f0000001440)=@string={0x2}}, {0xc8, &(0x7f0000001500)=@string={0xc8, 0x3, "c11fed36aec7b8e193df9407dacf9ca558474b3386953a117f11012c7577030e13ab6f99d22fba1c1c3bb40c77b76b3741b3ff44356e6ee9e092228722e9e0a2cf91a26001e74aedb637addc6fc9f4b7271121040b2e3e13015742b92852609b5c300e6237f5d19deff0c7bf6a58b0cc874cc6887791d55c4204b85cdf0fb9c4f1bddb07eb36134968c3c7dc9c7285de8c5bf882872f7536b4ccbe0b8b5b6425fe6cb5bbf42200583f349465fae4ce71c3c96770df04f6a4da3185505cdc5c21e5165227cc59"}}, {0x4, &(0x7f0000001640)=@lang_id={0x4, 0x3, 0x1c5e}}]})
syz_usb_connect(0x3, 0x120, &(0x7f00000025c0)={{0x12, 0x1, 0x0, 0xad, 0xfb, 0x7d, 0x10, 0x19d2, 0xfffd, 0xc81d, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x10e, 0x1, 0x52, 0x1f, 0x0, 0x1, [{{0x9, 0x4, 0x0, 0x84, 0x5, 0xff, 0xff, 0xff, 0x57, [@generic={0xba, 0x5, "5214a993d9161f8e00130f25b352895337e55e2bae46d61131f336d28f24f17eb1f2b3d39e3986d50f5b13aa4603e02280d5043ae18ae39e3ad4d5b9ab003d561e77f28d76b129c8e76e8718b4063bb3c12a0a4f1bb22714dc6bf25cb54970c2a5fed3a73f4cc4dea8567fa8fc8b664a6539ece3efc5e9186498ae1e3884a7f2fcf66eb50b1f14a433513cbf25f9e879f947e7d9d50f80f6019046bb46d194cab9ab670307284796d7810d58b2dd3b1f92913ef50b81ca34"}], [{{0x9, 0x5, 0x7, 0x4, 0x3ff, 0x0, 0xfb}}, {{0x9, 0x5, 0x0, 0x5, 0x40}}, {{0x9, 0x5, 0x83, 0x0, 0x40, 0x81, 0x0, 0x7f}}, {{0x9, 0x5, 0xe, 0x0, 0x0, 0x0, 0x93, 0xfd, [@uac_iso={0x7, 0x25, 0x1, 0x80, 0x0, 0x400}, @uac_iso={0x7, 0x25, 0x1, 0x2, 0x2, 0x800}]}}, {{0x9, 0x5, 0x0, 0x0, 0x0, 0x0, 0xff, 0x0, [@uac_iso={0x7, 0x25, 0x1, 0x2, 0x0, 0xfffd}]}}]}}]}}]}}, 0x0)
syz_usb_connect$cdc_ncm(0x0, 0x98, &(0x7f0000003340)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x10, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x86, 0x2, 0x1, 0x6, 0x80, 0x40, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x8, 0x24, 0x6, 0x0, 0x1, "0e4d60"}, {0x5, 0x24, 0x0, 0x7ff}, {0xd, 0x24, 0xf, 0x1, 0x3, 0xfff, 0x0, 0x8}, {0x6, 0x24, 0x1a, 0x81, 0x12}, [@call_mgmt={0x5, 0x24, 0x1, 0x0, 0x1f}, @call_mgmt={0x5, 0x24, 0x1, 0x0, 0x4}, @acm={0x4, 0x24, 0x2, 0x7}, @mdlm={0x15}, @acm={0x4}]}, {{0x9, 0x5, 0x81, 0x3, 0x0, 0x22, 0x0, 0x7}}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{{0x9, 0x5, 0x82, 0x2, 0x0, 0x0, 0x0, 0x8}}, {{0x9, 0x5, 0x3, 0x2, 0x200}}}}}}}]}}, 0x0)


r0 = socket(0x22, 0x3, 0x0)
writev(r0, &(0x7f00000007c0)=[{&(0x7f0000000400)="11926c07c31232c188c34382ed1441511b664565f174f2a45b123a9ba7886ba4128d6181ec0fb1659eda224a054333281ffd9c49b19090557a4d00eb0328628dfcbfa59173d2b27e0437c000f18e54b890a5747cc6de537852da76ea440719d0c9cb5ebe8b5464e3da824e0c7265b7e86cfebed36f0f699145271759e5bc44403fe5e57f00a24d7dcca218e0814713947a94f96c335e21225d89bde1fce3b3b7751e300d6bbdce7e4262263b4497e6f9e405f300c4cc806de3faf3cfd5", 0xbd}], 0x1)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000280)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
recvmsg(r0, &(0x7f0000000280)={0x0, 0x0, 0x0}, 0x140)


rasctl(0x0, 0x0, 0x2)


compat_20_statfs(&(0x7f0000000440)='./file0\x00', 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
lchflags(0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000005c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000005c0)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmmsg(r1, &(0x7f00000018c0)={0x0}, 0x10, 0xc, 0x0)
fcntl$lock(r0, 0xa, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = compat_30_socket(0x1f, 0x5, 0x0)
getsockopt(r3, 0x0, 0x0, 0x0, 0x0)


r0 = socket(0x18, 0x1, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
shutdown(r0, 0x2)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
__futimes50(r1, 0x0)


modctl$MODCTL_UNLOAD(0x1, &(0x7f0000000000)="c2")


syz_usb_connect(0x0, 0x2f, &(0x7f0000000140)={{0x12, 0x1, 0x0, 0x82, 0x61, 0x78, 0x40, 0xbda, 0x2838, 0x2f59, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1d, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x4, 0xc7, 0xaa, 0x0, [], [{{0x9, 0x5, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, [@generic={0x2, 0xb}]}}]}}]}}]}}, 0x0)


madvise(&(0x7f0000000000/0x4000)=nil, 0x4000, 0x2)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000002580))


mknod(&(0x7f0000000080)='./bus\x00', 0x2080002002, 0x40004200000028ac)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffff9c, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80047442, &(0x7f0000000000))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
poll(&(0x7f00000000c0)=[{r1}, {r1, 0x40}], 0x2, 0x0)
shutdown(r0, 0x1)


swapctl$SWAP_STATS(0xa, &(0x7f0000001840)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, 0xfff)


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x6131, 0x4000e03)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
ioctl$FIOSEEKDATA(r0, 0xc00c4602, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
_lwp_unpark_all(&(0x7f0000000240)=[0x0, 0x0, 0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0], 0x32, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
swapctl$SWAP_ON(0x1, 0x0, 0x0)


r0 = compat_30_socket(0x22, 0x3, 0x18)
fcntl$dupfd(r0, 0xa, 0xffffffffffffffff)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x40067408, &(0x7f0000000000))


r0 = socket(0x18, 0x3, 0x0)
shutdown(r0, 0x2)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
writev(r0, &(0x7f0000000380)=[{0x0}], 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000180)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = socket$inet(0x2, 0x1, 0x0)
fktrace(r3, 0x2, 0x0, 0x0)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r1, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
ioctl$OFIOGETBMAP(r2, 0xc004667a, &(0x7f0000000080)=0x80000000)


r0 = msgget$private(0x0, 0x200)
r1 = msgget$private(0x0, 0xfffffffffffffffd)
msgsnd(r0, &(0x7f00000012c0)={0x3, "cb1891d5c4809ead8b615dbd2e07459877a196c9ff906dcfdaf42086ac41f308be60d4b0fe3489dcd3413e8c3d57d4b3e58fca8535bb7b68a396e96013f265c0f7b979fa49e136dd920d4b74872b70d84af480a748e0ddb752b8615653289386bea1930ef707e37ff9e631256fd200e274c093d14b524b49e951a27a1d0e19c2360ef181f8a25f9560f4ee4e02914b85c8bed1b0dddb9aebe1930a34011e35ead67385805854019013adda7d239d5d1d85300b3c3899a7cc38173b39b51596d083ad722ccc4006c3e7aef306186ca2a79ad0b78960589f75b2cd14043f545550528fe6c6f081f7d524f436fd48ed5de5b233eb6361589b8b05f6a7b4cff13981a6899d6e07907c3eb71818123c0d126364c6e612abc22da0ebf1f0c0bab85cadac2d010e63957b65056b10a959610180ffbed815ce46012bdc09899031747859bee741235609396a74361ed1519bcd926eb880b748bbd1c2caa5be60aba33f9250182a83d72061569264bec818a2de88b568f6d28605088c4cf01ebd2c659faf05c26962cca5d6af1f8901905054b1053b66ca82a60c064f736d9e60778e4d9cf34a8ea81a45897c9ff7e7a3859de81361ab5946bb079041992c598f81a35c6f204a7c3fa837162518577c0d7d632a63294b752435f71e3722ac7c5c46561e639735280dab00a8c067df47d4166184ceba234fb7b4aff19aa4e00f7532dae929876482019de5a368e035e63592707e975a3ae7847e9860bc4d07a373a5cfcc16e631e7c140edd8788ffb9a1457f951f48a14a775c684785302dc71a5769da048d6176747e0a308a45201a8e8eafb6aa6f70395384dbc60e358aa9f3a04b02c99aa008d4438e7685c242762ed67f03cb733e6e2291344ef3b4bb3eb6b0a0caa9de98918f50710e2dccd8b7d5c150018843ed0479e1d480e794608e91482b514a9beb4611363dcea985dbfe1258ce766989073f2d689f7fddb34014d3acb1824a7b4f4b9504d9f878928cb52c8628285d12fb31bc792f381ff76bad29a1b972561721acc6ec6958d7f2c10fb7eed92dbe72269e505e946eb8a53721348f24c4b5a4140a3a70bb54004477f5d321290f76e5993a0bb9b44b1db801b6f9c4a28410144cd67b4c5855b476a410ebcdf487767e69dba4719e0628b45a300d968ac49a38471da97f7be599d4838d9b993d483537cc27bc9a07b2f0e9bfab677da237c112f94729dc8c9fb18fb71bddb4716139060901239066587544e14e71da5c49ee8e534f445bf8faaf5a75c5c44cc0c8daa5bc19fe73b083cbee6571cafd08e59d76c7ff39ea856b382b064d93baf504783613e6ca9ce359fb43d1b1f8dc8cc79b6b0baf7c9f7ea4e9040f3ca66dca9a0d910abc64056562bd47947897e2d01318a949915e86d68a3a089784a22eecd9c60c73c9d737bcce400edca71720d3b2ecf31b011004b72c28b813a1598e5db5fac059a865faf9cf7da31a10fded66fff17b9d6d249f3b16aca32fc29ec00748c6a4dbaf19dace3bc7df4e79d5412feac0f9c0eaa6648c2a1ca6a8d78173aec7a04c6d3298f47e858fe7d58ad0c947cfd0ab8ffc418ae43e41044812f49e859bbd74b1719f1352986491f9ee463ab142cf9c247515326dfdd1650e8283e17f6b929fc8caabbca38e3acd21802d3e16d5563c2c3097f8b44ad8fc08e4006aa3b3451db01b8405a083a0b2d4f9fdeecdceaf283ef82a4913791c4dba009867e0483ad9d92e40b0ee6269a432b5ed4728746896329a0ce513832f95ff6a39e7f4686705adc3832317be1eaa9738ccdd803765b1529500050656541c0cfeffe5a73d3be4323879bf8484ef78ccf75e1b10a8214a0bfebd4fe362520a5846971615b402fcad59e643c75b0e1850496448425d5ff851a28b8323963e98eef658be7eee5ce78821494ad334cf43d7ed66b638647bc446a7a354587eda07dda8bbf9d8beb579d58bb5f5d601e642bb31e7971295a00a5b2cbde769071f18a2e5ac4ac237b13977c8f6b34896a23ba50c291eb619a3d9ad724a17924d52ce5acd295a765c1d1263d8ca9fbbb52488977051a2a274c64e54d9e810a5520357cea2be6f4ff6d4b8296298909188ce0d9c3beae4a1cec68e291d8a39ab8dccdf79f702be1ff32a24b0211dcd78ef718517ba5742cd91df2f04d7ae089e4fd914e82af3c1e2086d89981d21e36f01aa35ddf5cb8cad42a84793bed7e8647d88fea5636518a51e393bf67ff412a2a0b9e672e03977393e6d5c8e0bd0bcde0153a59cf27851220869d3c228da485163a50b3006272c616bd177f8a825f076fbecd2f5317bc46c2324c57cac92ad13c621e418cb9612f9da3a42d6fa1af10c72a5061549f1eeefe93a8ba974f77c2330e65d827836cc25833172142e3d72dd554d4b03367fd7e66656110c4676c8ad222528493b96612a56f79d1ea563aeddfcb96cab9c0ee73e5fe987bfe3ed4aa909e40d4604ed7fa21f92f92d4ea4f68e62fdcc90202d6a50346e9f58c4da2dc7e65afe6f97352993663f46555829c21851db8a6918250b0612af8f4a98ed5e9c3f02ae1ed609b1909120ac71c256456c7b4f5446665dabd4e81c7165e46fb65ade21bbb492ba9d9d1db47bc6036517a5d246a950bcc82defa0341b10112ef86aecabfade69dff88887c9bc78c23de09eb58fa1fa3febce69ad5013a07257ba855674b9da534c8c6c9d7d30c429ec860f65d07a2abfe1abd6fcc6e79cbd4de24c1a9af55f8992444c4fe18c2e34f81b1251c9d553db36a5aa827ab9a35d6900b79ca9252869eca83c523aacb8bf9acf47772a02d0f5356f2fdfd0dd28c49b39a0b44f749e9323217cee6a44b47ddb4651b81296e2f25fafa763591ac213f1ed9d20aea2d8b93ee374040e1c11a2ec7c6b1aa4b5bb98e1bb75a088a32f1ff7d5773cda72facacac77fcc50f46932b982b0af96c150cd4559b865495a1f0a62e7de3a88db90215b0f7aab66358ef02e1b4427bd9bb2b750ca90b742749cd3c0ddf717575e63b80f95bfbedf48f2be19cd3834019425c23bc1f0f61151d99ab59ce238eb6d97c5bc991101e4e3c2785acc281e452a1d3f6bf1aa95c963b0c84c9f26b95c435a54372ada445954e37d6a1bf39fb027f7c7dac45cb623a3b5088d598eb75cf9db7b07a8118bfa7351d95dd00ca47fae56942e48db6127a7494b5cae85696b42de77182f396a12a7d743b5a7bc55ca64be9072131aa77a13b9abe9fe773815950f19be7532d8e56d38ff96df42ea7ba1d958912559da151a52886b999e9032edf1420a1c027d4d974882cec5d230a9860f09a20927695410e8467d629ca4bcdcd63e4cf7794b5e7698b400e15acbf718fbcd8bbb43e93acf0a691de5f1af161d22e7e0e420c00d97ccb6a5d92fc291c06698c305b86f4e19e1d01cb5fc3ae2207a3f73cc6311a283242fe01218319d31d3f57191712d735ad8323eb59a49c9f9bcd7cd091f6f019f7725dfe2616d62ad00228216cb1e5f69661265d5089f9a8e81d7b0635c16729c75e2e92c1f1e695739c749714561af9ae5b983d336715e59f1a3208ae658a1fbc135c16c0455e38602b022ef2c0041af651535f1b2475b19ee50e4be62fc339858b1c70c40311d697045cf68f7bd9288ca92004e7bfc0fa41bb5f0cd878476cdc9ed47b300c89313e326b129ebe34a766be3bc7f7a21a54819322b2ef791ae088eb38cdf2abf3b9fcf25ede45ac266109a9aa1ae11f97edfd23aa10fbf41f55c71f6d588f09664c1b614935694a9732ca7b49c2d160a359c9b316042d20953a75c9b62cd55830c32fdbf892c21bc6bb19d8717be4c911d3d533ef4ec3cd763024af64d82dd47e58cb11da25db051d3ae7337f3d709c8cbd6650fc831d59a6752177fdf9e191a23b050cda9b1596d784c372b7fcb650a8bf11cf6505a733a5562134075bbe18b9b7b896d94cb20afcc62b40dd4f82550a94dea39dafd337555351cdf312991627f42e1b915316486afe890d60b55c3ad3ae8e487edd0f4f220f45218720ce2fb9a56484a0edc3ebf2bc019268bdd1c6c1b618afb45fa93fe9b21ba218b78c5448663e88b4535c661a557b478a2041730544ab98dab20cd303f9f102169f4889dab4828635ffad9d46c47e4ed80741933845613b020a5d3e240bbaf767a6e5dcf9044964d3a87ac261a4bedd9fe0f62025fd2713ab7027770230475ae78727a43ba08faeeea38d577a66d6b68dc4f31f36508749201c32dbde9a055191123999b99326f2805709123b449323435dbd8fb3aadefd48a0cc30bf09c7c174d980cf23c6ff68e87c187f6952da452061005bb8558535666a42a324f338893f03665980da9706c50961b92debb4772b439b93bf1012a922c6157d72315db79f22a742d0464b03df93342949d16bde5eba87e41458bf968c4e524e0aaecf5ea829032fff21e21adea71fad5adf046e0233db0105e64b4cd49a83209f9fe097b74f24524a9d48088e6a2de6fb8fa31107fd5e08831b70e2930e77f50a236252c90813cac57777cd4ea1c992e58bcaf6be5f4900317b976b16c22295a2ea90f7aa918be9da22f085f2f01d4ed90bd9f174c8a6bf9238c439bfea98acac4185430e1dce596c51b1d17f8a67c90ea9164e146373ddf283463df6ad789f4cb00e4a4f18b5ed52e385d7900abb93ebd8e2383c59a391a3f9f9eeb7ab4a8faf8e013e1e4d357eb483665f5a426f60de0b26b8c6afa0f216e62b938ae363082e4f63b2710cd53ecec41cec44385dec0235a314b455724f02e115b00987c7d9806b05733a1cd3419cccee99e33f594e1535efadae2b7f0ec4ddc57fcab1de7401acadf7819c4a02f63302dda0ebe3ed1f29ad414d138af700d8e5c7e59eb94ff541b12a31d62d17aee89f11d057f96ab10cb5cb109bcfd581dd272c2cf7e1b244012de2ad4bb391666f044cbf73283ceb1d311b18eb05902e86baca38fec42147ca0ef091bc51817414bffa396c87de702a8f5e4a4fb0069fcd8db4dcdcc88cd5c8a07ef5fa3f6de23ce71ae57170da440b8dad2a4639dbea7af63147b782bec43332c9c064fdde3232a7e8924403c4f685448f03116c6ccf3bcd867c8d1e4efae35db8cb004959853dd8aceb94f59e4dc172a60f2e9b88e8acc1648c11681cf6c9f617ee34259516014afd2c2106302c5ab025d43a788b962102550325e37d424dbe2eb01484ffe95be48a892c6a0c60b524d83b1d2f091208dd24a4b569551e7ce14d63f5565a3e4bdbc55a19c7022060200fe5b9ba41706e7f2c6023a50318816a5df413cbcff663f805d44ca53b9248703016894cb978b2bcd8e74294c8dcccb5e96a3c3d4f2e768e493465e5c0b0eadb15edefc5d59ce5f75208b1619201d37d59675e1a757e179bdb55a56eae179fbf5fdd3efa70e879a7fedc080713c4d403383fb63013fedb00e82165055bca9d25a79266239017f66c66d32af0b0067cc952acd16bd8bd810d4d11e3562cf71032711b3fdd3cdccc7acf5a6334424f5b0cc2bfc41bee6c6e83516f48a554f4d802337091c6c88e0ff002a977b1b5be2e99cf7309b92c0ffc805c977386c188680685de50079be514ee6f6ded35995bc36795e4105bfdce2ff4225d01a16b8ac29196a2d644583c54fce44c21f3486d26d35a2cb8fa560e5f07626cb972ba39082e198874228fadbce3c3b7b3c1a6e73f3ff1cc608c43ed7a8f5cc4ca30b3f2890e03fcf5343e27143d73e7eeb4da0161140deb1a0773bd3cf6c7a9011b129eb69e8ef436339c53a543c8d23658b3af45"}, 0x1008, 0x0)
msgsnd(r1, &(0x7f0000000000)=ANY=[@ANYBLOB="020000000000000001752454424609000000000000c3533000bc7093a1f658897d98899790ca73b63c2c1b1ae69e13854af54dc3a04470ac6a478c1d562cb1923c1652ba438a4af909b930ae64bfcb3a571b9e8d6d4346856f837345e08d62afbaa40fd1da421253e0fc47b2f5ad3b748432dedb2e8352c435435c543902da2c659bb07ae5dc10c093be928ab2d056f3a6469de5b89a5425696b921eeab8b5307e645b8070a5cbc9d1afce11766014003ff4b2a7318f242ca59e768ee1245cdcdb2d2d2c47b6c9446aad72faa376ff6040ce609a3f3a39eab877c268d7d1993075d6ff2dda14a25dd80aa64a35e929d9702bf5988f286b5bacc078c4e66040ca3e784ddd093608f8a00668629fcb09f2076a8727a2c2986e6ff8fb36f88247ac3ba73b02bd"], 0x67, 0x800)
msgrcv(r1, &(0x7f0000001200)={0x0, ""/182}, 0xbe, 0x0, 0x1800)
r2 = msgget$private(0x0, 0xfffffffffffffffd)
msgsnd(r2, &(0x7f0000000540)=ANY=[@ANYBLOB="020000000000000001752454424609000000000000c3533000bc7093a1f658897d98899790ca73b63c2c1b1ae69e13854af54dc3a04470ac6a478c1d562cb1923c1652ba438a4af909b930ae64bfcb3a571b9e8d6d4346856f837345e08d62afbaa40fd1da421253e0fc47b2f5ad3b748432dedb2e8352c435435c543902da2c659bb07ae5dc10c093be928ab2d056f3a6469de5b89a5425696b921eeab8b5307e645b8070a5cbc9d1afce11766014003ff4b2a7318f242ca59e768ee1245cdcdb2d2d2c47b6c9446aad72faa376ff6040ce609a3f3a39eab877c268d7d1993075d6ff2d"], 0x67, 0x800)
msgrcv(r2, &(0x7f0000001200)={0x0, ""/182}, 0xbe, 0x0, 0x1800)
r3 = msgget$private(0x0, 0xfffffffffffffffd)
msgsnd(r3, &(0x7f0000000540)=ANY=[@ANYBLOB="020000000000000001752454424609000000000000c3533000bc7093a1f658897d98899790ca73b63c2c1b1ae69e13854af54dc3a04470ac6a478c1d562cb1923c1652ba438a4af909b930ae64bfcb3a571b9e8d6d4346856f837345e08d62afbaa40fd1da421253e0fc47b2f5ad3b748432dedb2e8352c435435c543902da2c659bb07ae5dc10c093be928ab2d056f3a6469de5b89a5425696b921eeab8b5307e645b8070a5cbc9d1afce11766014003ff4b2a7318f242ca59e768ee1245cdcdb2d2d2c47b6c9446aad72faa376ff6040ce609a3f3a39eab877c268d7d1993075d6ff2d"], 0x67, 0x800)
msgrcv(r3, &(0x7f0000001200)={0x0, ""/182}, 0xbe, 0x0, 0x1800)
msgrcv(r2, &(0x7f0000002300)={0x0, ""/4096}, 0x1008, 0x2, 0x1800)
msgctl$IPC_RMID(r3, 0x0)
minherit(&(0x7f0000ffc000/0x1000)=nil, 0xffffffffdf003fff, 0x0)


r0 = socket(0x400000000018, 0x3, 0x0)
ioctl$FIOGETBMAP(r0, 0xc0306936, &(0x7f0000000040))


ioctl$WSMUXIO_INJECTEVENT(0xffffffffffffffff, 0x80185760, &(0x7f0000000000)={0x2003ff})
r0 = socket(0x18, 0x1, 0x0)
close(r0)
r1 = socket(0x18, 0x2, 0x0)
connect$unix(r1, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
getsockname$unix(r1, &(0x7f00000000c0)=ANY=[@ANYBLOB='\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00a\x00'/108], &(0x7f0000000040)=0xffffffffffffff19)
close(r1)
r2 = socket(0x18, 0x400000002, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1)
setsockopt(r2, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOSETOWN(r0, 0xc010447d, &(0x7f0000000000))


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000016c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
compat_43_ogetpeername(r0, 0x0, 0x0)


r0 = dup2(0xffffffffffffff9c, 0xffffffffffffffff)
symlinkat(0x0, r0, 0x0)
pipe(&(0x7f0000000140)={<r1=>0xffffffffffffffff})
fdatasync(0xffffffffffffffff)
dup2(0xffffffffffffffff, r1)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000001600)={{0x12, 0x1, 0x300, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x7f, 0x0, 0x7, 0x1, 0x1, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x20, 0x9d}}}}}]}}]}}, 0x0)
syz_usb_connect$printer(0x6, 0x2d, &(0x7f00000019c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x60, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x7, 0x1, 0x0, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x0, 0x8, 0xf9}}}}}]}}]}}, &(0x7f0000001c80)={0x0, 0x0, 0x5, &(0x7f0000001a40)={0x5, 0xf, 0x5}, 0x1, [{0x39, &(0x7f0000001b80)=@string={0x39, 0x3, "435951927de4df40ac91e1333043b159bb1c48e896973afdfddc63b021a33e7df068885753acb344034c856fb5477e7b136f3e044a2dd5"}}]})
setsockopt$inet_opts(r0, 0x0, 0x0, &(0x7f0000001d00), 0x0)
socketpair(0x23, 0x20000000, 0x0, &(0x7f0000001dc0))
bind(0xffffffffffffffff, &(0x7f0000001e40)=@len=0x1, 0xe)
compat_43_oaccept(0xffffffffffffffff, 0x0, &(0x7f00000020c0))


mkdir(&(0x7f0000000200)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='ffs\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000000c0)="e89789b7aea8fd0d", 0x8)


mknod(&(0x7f0000000000)='./bus\x00', 0x2000, 0x41800504)
open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)


r0 = open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
writev(r0, &(0x7f00000000c0)=[{&(0x7f0000000140)='M', 0x1}], 0x1)
mmap(&(0x7f0000000000/0x1000)=nil, 0x1000, 0x5, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000000040)=[{0x0}], 0x1, 0x0)


mkdir(&(0x7f0000000140)='./file0\x00', 0x0)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x10, 0x0)
fcntl$lock(r0, 0x7, &(0x7f0000000000)={0x0, 0x0, 0x0, 0x1000200010005})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x400000000018, 0x1, 0x0)
ioctl$FIOGETBMAP(r1, 0xc0106926, &(0x7f0000000140))


r0 = open$dir(&(0x7f0000000080)='./file1\x00', 0x200, 0x0)
pipe(&(0x7f0000000000)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
dup2(r1, r0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSMOUSEIO_SETREPEAT(r0, 0x80045729, &(0x7f0000000000))


r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
compat_43_ogetdirentries(r0, &(0x7f0000001700)=""/175, 0x3f, 0x0)


r0 = socket$inet(0x11, 0x3, 0x0)
sendto$inet(r0, 0x0, 0x0, 0x405, 0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80087467, &(0x7f0000000080))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_clock_getres(0x0, &(0x7f0000000080))


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x80044279, &(0x7f0000000040))


r0 = socket(0x18, 0x3, 0x0)
r1 = dup(r0)
setsockopt$sock_int(r1, 0x3a, 0x12, 0x0, 0x0)


r0 = socket$unix(0x1, 0x5, 0x0)
getsockopt$sock_int(r0, 0xffff, 0x1008, 0x0, 0x0)


clock_nanosleep(0xf66e329fc80d06c3, 0x0, 0x0, 0x0)


open$dir(&(0x7f0000000000)='./file0\x00', 0x391fe6145298679a, 0x0)
acct(&(0x7f00000000c0)='./file0\x00')
acct(0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000080)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
recvmsg(0xffffffffffffffff, &(0x7f0000000a00)={0x0, 0x0, &(0x7f0000000940)=[{0x0}, {0x0}, {&(0x7f0000000880)=""/142, 0x8e}], 0x3, 0x0}, 0x0)
recvmmsg(r0, &(0x7f0000000640)={0x0}, 0x10, 0x1100, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0xc0044266, &(0x7f00000000c0)=0x1000000)


accept(0xffffffffffffff9c, &(0x7f0000000000), &(0x7f0000000040)=0xe)
r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(r0, 0x6, 0x3, &(0x7f0000000040), 0x4)


r0 = socket$inet6(0x18, 0x3, 0x0)
connect$inet6(r0, &(0x7f0000000000)={0x18, 0x3}, 0xc)


_ksem_init(0x0, &(0x7f0000000040)=<r0=>0x50535244)
_ksem_destroy(r0)
_ksem_trywait(r0)


r0 = socket(0x2, 0x3, 0x0)
close(r0)
setsockopt$sock_int(r0, 0xffff, 0x1001, &(0x7f0000000140)=0x81, 0x4)
r1 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r1, 0x0, 0x9, &(0x7f0000000040)="0101f9c4f4ffffff03000000", 0xc)
connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="8202e4a3e6"], 0x1)
socket(0x2, 0x2, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1)
connect(r0, &(0x7f0000000000), 0x10)


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x1b, &(0x7f0000000040)="eaff125c", 0x4)


socketpair(0x2, 0x1, 0x6, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000100)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
setsockopt$sock_timeval(r0, 0xffff, 0x1004, 0x0, 0x0)


setreuid(0xee01, 0xee00)
setpriority(0x0, 0x0, 0x3ff)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x6000, 0x80001203)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$WSMUXIO_REMOVE_DEVICE(r0, 0x800c7228, &(0x7f00000000c0))


getuid()
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setuid(0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
pathconf(&(0x7f00000000c0)='./file0\x00', 0x6)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0xc1205266, &(0x7f0000000040))


mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x2903)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000000140)=[{&(0x7f00000000c0)=""/87, 0x57}], 0x1, 0x0)


r0 = socket$unix(0x1, 0x5, 0x0)
read(r0, 0x0, 0xfffffffffffffdbb)


r0 = socket(0x18, 0x2, 0x0)
setsockopt(r0, 0x29, 0x1b, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
ioctl$FIONWRITE(r0, 0x40046679, &(0x7f00000000c0))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0xc010447d, &(0x7f0000000080))


mknod(&(0x7f00000001c0)='./bus\x00', 0x80002005, 0x5300)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0xc01c5005, &(0x7f0000000000))


setrlimit(0x7, &(0x7f0000000000)={0x80000000, 0xffffffffffffffff})


pipe2(&(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
r1 = compat_30_socket(0x1f, 0x5, 0x2)
dup2(r0, r1)


ftruncate(0xffffffffffffff9c, 0x0, 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x2000427e, 0x0)
ioctl$FIOASYNC(r0, 0x20004269, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x4000aa4b)
open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
open$dir(&(0x7f0000001500)='./file0\x00', 0x0, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x13, &(0x7f0000000040)='\x00\x00\x00\x00', 0x4)


fchmodat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0, 0x600)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
rasctl(0x0, 0x4, 0x0)
rasctl(0x0, 0x0, 0x2)


setrlimit(0x8, &(0x7f0000000540)={0x2, 0x27})
pipe2(0x0, 0x0)


r0 = fcntl$dupfd(0xffffffffffffff9c, 0xb, 0xffffffffffffffff)
readv(r0, &(0x7f0000000100)=[{0x0}], 0x1)


r0 = socket(0x2, 0x1, 0x0)
setsockopt(r0, 0x0, 0x6, &(0x7f0000000000), 0x0)


pipe2(&(0x7f0000000080)={<r0=>0xffffffffffffffff}, 0x0)
compat_43_oaccept(r0, 0x0, 0x0)


r0 = socket(0x11, 0x3, 0x0)
setsockopt$sock_cred(r0, 0x22, 0x2, 0x0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x40105104, 0x0)


symlinkat(&(0x7f0000000000)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000040)='./file1\x00')
rmdir(&(0x7f00000000c0)='./file1/file0\x00')


r0 = open(&(0x7f0000000040)='./file0\x00', 0x70e, 0x0)
pwritev(r0, &(0x7f00000003c0)=[{&(0x7f0000000740)="90c3fe67eb586898600425f2f573e0d1ac83c18d65c8e22066c0d389fe894a974c8d45aaf9846f9b3aec3213d2a6ac68a0b0632688ca0fab3647175abf22fea120c9b3bb77ca60c128295bf234505356095dbf9e50a4a5079723b57fed8ef0a251b91e67e1f5d347d5b668a390a25beea3962e7c10b8d9f53f5c82b5eacc26757d14f2fa6be9a2cbb2cfacc5e906dfd1e3208364bb049bd84682cec454327b6a1522c332ea628b0cb672e9e7247818f900e017c7cb9303e6b505059f34d3fb9df3993b7535fa269859e24b2802782224d7d5c13c21d4eee4f8621037c3d78695ad9a278978b26c46049befba997acb9ac407791cdf6046f9f71e36d09827a4493c17a0921dc38af76420c885862413c6ed4f7fe335a5547ee2d7c65d735b189214606da83f9be40faef7438cbfe1ed0439c45506672cda99d1c3471259d08198e13683ef6b08d5c54bfb991dcca6919362e1a0b65844e9194c2d7fd257281fbcae0694eb4c1e7121b6a2c19d7c82054126e2146349c1c8489aada96f3a8400c78d1da37d5228e5aa36b139a8d5957e8209712744b81352d093315d238f5a0c3cb694e5bd546af01421ace28b2e266c33488bccf4815baf3226156e050704a0b7fe058bf69a49e52ac968a0", 0x1cb}], 0x1, 0x0)
writev(r0, &(0x7f0000000000)=[{&(0x7f00000004c0)="0ec465cdab1ab6925cb81235dbb17399c070dde203e502106f690d9947364fe3569560e73bfa9012263c0ef6eb626ad79d51e7b4607879072ca33809a85443bef8e011b3e2e63de6f9637ca6e422106a6a1762b67f560814eef6dcb3f39a2e51600251fbad1ee82088a75ca3764729af3f3d4d967ce8aadb3c3fd7f9ae4f4c83b2", 0x35a}], 0x1)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x5, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f00000000c0)='./file0/file0\x00', 0x0)
mkdir(&(0x7f0000000180)='./file1\x00', 0x0)
mkdir(&(0x7f0000000080)='./file1\x00', 0x0)
symlink(&(0x7f0000000140)='./file0/file0\x00', &(0x7f00000001c0)='./file1\x00')
rmdir(&(0x7f0000000080)='./file1\x00')
mkdir(&(0x7f0000000240)='./file1\x00', 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
pipe(0x0)
r2 = openat(0xffffffffffffffff, 0x0, 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x400000000018, 0x3, 0x0)
fcntl$lock(r1, 0xe, &(0x7f0000000280))


utimensat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', &(0x7f0000000040), 0x200)


r0 = open(&(0x7f00000001c0)='./file0\x00', 0x300, 0x0)
flock(r0, 0x1)
r1 = open(&(0x7f00000000c0)='./file0\x00', 0x205, 0x0)
fcntl$lock(r1, 0x9, &(0x7f0000000000)={0x0, 0x0, 0x0, 0x1000300010005})
r2 = open(&(0x7f0000000340)='./file0\x00', 0x0, 0x0)
flock(r2, 0x1)
close(r0)


munlock(&(0x7f0000ffc000/0x3000)=nil, 0x3000)
minherit(&(0x7f0000ffb000/0x3000)=nil, 0x3000, 0x0)
mlock(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
munmap(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
mlock(&(0x7f0000ffb000/0x1000)=nil, 0x1000)


r0 = socket(0x18, 0x1, 0x0)
listen(r0, 0x0)
listen(r0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$FIONBIO(r0, 0x2000427e, 0x0)


r0 = msgget(0x2, 0x6ca)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50___msgctl13$IPC_RMID(r0, 0x0)


mknod(&(0x7f00000006c0)='./file0\x00', 0x2000, 0x200028bf)
r0 = open(&(0x7f0000000480)='./file0\x00', 0x0, 0x0)
read(r0, &(0x7f0000000080)=""/36, 0x24)


r0 = socket$inet(0x2, 0x3, 0x0)
__fstat50(r0, &(0x7f0000000100))


fchownat(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0)


paccept(0xffffffffffffffff, 0x0, 0x0, 0x0)
profil(0x0, 0x0, 0x0, 0x5)
__clone(0x100, &(0x7f00000002c0)='9')


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = fcntl$dupfd(0xffffffffffffff9c, 0xb, 0xffffffffffffffff)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
readv(r1, &(0x7f0000000380)=[{0x0}], 0x1)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x24, &(0x7f0000000000)="ca640000", 0x4)


r0 = socket$unix(0x1, 0x5, 0x0)
fcntl$lock(r0, 0x154ea456e02cd9cb, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x8004570b, &(0x7f0000000000))


_lwp_ctl(0x4, &(0x7f0000000040)=&(0x7f0000000000)={0x6, 0x1080})
r0 = shmget$private(0x0, 0x4000, 0x203, &(0x7f0000ffb000/0x4000)=nil)
shmctl$SHM_LOCK(r0, 0x3)
shmat(r0, &(0x7f0000ffa000/0x3000)=nil, 0x3000)
socketpair(0x1, 0x1, 0x0, &(0x7f0000000080)={<r1=>0xffffffffffffffff})
__fstat50(0xffffffffffffffff, &(0x7f00000000c0)={0x0, 0x0, 0x0, 0x0, <r2=>0x0})
setsockopt$sock_cred(r1, 0xffff, 0x11, &(0x7f0000000180)={0x0, r2}, 0xc)
flock(r1, 0x4)
shmctl$SHM_UNLOCK(r0, 0x4)
accept$inet(r1, &(0x7f00000001c0), &(0x7f0000000200)=0xc)
socketpair(0x6, 0x20000000, 0x1f, &(0x7f0000000240))
pipe2(&(0x7f0000000280)={0xffffffffffffffff, <r3=>0xffffffffffffffff}, 0x0)
accept$unix(r3, &(0x7f00000002c0)=@file={0x0, ""/95}, &(0x7f0000000340)=0x61)


r0 = socket$unix(0x1, 0x2, 0x0)
connect$unix(r0, &(0x7f0000000340)=@abs, 0x6e)


mknod(&(0x7f00000000c0)='./file0\x00', 0x7f, 0x0)
execve(&(0x7f0000000040)='./file0\x00', &(0x7f0000000380), &(0x7f0000000600))


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x0)
pathconf(&(0x7f0000000040)='./file0\x00', 0x7)


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0405267, &(0x7f0000000000))


r0 = socket$inet6(0x18, 0x1, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x200, &(0x7f0000000000), 0x4)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000080)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmmsg(r0, &(0x7f0000000040)={0x0}, 0xb6, 0x0, 0x0)


mmap(&(0x7f0000ffc000/0x4000)=nil, 0x4000, 0x0, 0x1011, 0xffffffffffffffff, 0x0, 0x0)
mmap(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0x0, 0x1011, 0xffffffffffffffff, 0x0, 0x0)


ktrace(0x0, 0x1, 0x0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80046470, &(0x7f0000000000))


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = dup2(r0, r0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r2, 0x0, 0x0)
__getdents30(r1, &(0x7f0000001240)=""/219, 0xfffffff9)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x2000a71a)
r1 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r1, 0x8030447c, &(0x7f00000000c0))


mkdirat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f00000002c0)='ptyfs\x00', &(0x7f0000000040)='./file0\x00', 0x0, &(0x7f0000000340), 0x0)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x80047462, &(0x7f0000000040))


r0 = socket(0x1f, 0x1, 0x0)
setsockopt$sock_linger(r0, 0xffff, 0x80, &(0x7f0000000000), 0x8)


mlock(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
minherit(&(0x7f0000ff5000/0x6000)=nil, 0x6000, 0x0)
fork()


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x200000000000d, &(0x7f0000000040)="caff665c00000002", 0x8)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x33, &(0x7f0000000040), 0x14)


r0 = socket$inet(0x2, 0x1, 0x0)
listen(r0, 0x0)
bind$inet(r0, &(0x7f00000000c0)={0x2, 0x0}, 0xc)


linkat(0xffffffffffffffff, 0x0, 0xffffffffffffff9c, 0x0, 0x0)
renameat(0xffffffffffffff9c, 0x0, 0xffffffffffffff9c, &(0x7f00000001c0)='./file0/../file0\x00')
__stat50(&(0x7f0000000200)='./file0/../file0\x00', &(0x7f0000000240))
ioctl$WSDISPLAYIO_GCURMAX(0xffffffffffffffff, 0x40085748, &(0x7f0000000300))
fork()


syz_usb_connect$hid(0x0, 0x36, &(0x7f00000002c0)=ANY=[@ANYBLOB="1201500200000008ac054b02400001020301090224000101000000090400e001"], &(0x7f0000000640)={0x0, 0x0, 0x5, &(0x7f0000000340)={0x5, 0xf, 0x5}})


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbd63)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
fcntl$dupfd(0xffffffffffffffff, 0x0, 0xffffffffffffffff)
pipe(&(0x7f0000000240)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x529478f58bc034ca, r2)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r3 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
dup2(r0, r3)


socket(0x22, 0x3, 0x2)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x6131, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
ktrace(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = dup(r0)
ioctl$FIONBIO(r1, 0x8004667e, &(0x7f0000000100)=0x2)
writev(r0, &(0x7f0000002380)=[{&(0x7f0000000040)="2a706e790d415af4f9ce1d36173c2060db06a287b2846c766cb2d376f9f3e645eabdf6a70720172a4afeae039783118ad87c9eaf55f433226136d50b13264e61629b0d6db449346eaa9799333a02033c2a0b40eb32fe5d65bf2c4992e41f87a52617ae60d9609a246279", 0x6a}, {&(0x7f00000000c0)="669984163f41bbb79c749c89a1c0f4a16fb3d4bdc51bcb31e24930015fca8757253e6c", 0x23}, {&(0x7f0000000280)="e1d2130973c845e8121361d24c6c3a7abad86540d862e4a74c5f89eef06ef50a621e90b3dd8deffca271962899a2cc23eb86cc156878cc3f45c60d450a162b8ef99de71a56c00e936bd6e9fd93fa1927dde9feeed916883cab591bd3405f5646dd0c6ecad56e894c60e3530f0342eefc2231b9fe0a951942e773fdd42a608d6c6dacf2b0bd6e0b23636b7581596fad086f58a5f7f666fca3d8716a0b222993ebb9f396fce6504db32b034230ffb510a789591099d64d2313be4552c8071d71885b73ec6ff700007a45518951c61cb9d30e97d617b2495d70f84a5200a3c102def8031fb9f2ca0e64", 0xe8}, {&(0x7f0000000200)="b3c317afa3d5d3e6c57f825a11510bfee5d4732a3071760bc23c823c741c8078bb616effb2763e42e3ebe4ef63a3fb7653132bc01eabc89da702fc6c65ab5d41a6f8", 0x42}, {&(0x7f0000002140)="969398c60534862bb21f99b377ee8913fba98a66d704ca671c4a6e5e9ccc6a2cdd87d4cf4ff9ccd8614fa62d7eccb993df224183fda93b4b60e3283ad1882d1bde072baa180ea71379f286b607c3f9f049a59f2754494a04ea2d5a34c5ad46e7aa6fd129a7e3b8e8e10b229912a52d7f267a8dbbe08e1948749f26b84e6eaa9e8562a22b279ad7fcdb41b10b8b24074afb92d93b5b785a4c5bc6cebdf3778beb969ccde069c51d37928272e92dccbf408692044c84651cd36aeb3198eb61b7aae6191b", 0xc3}, {&(0x7f0000000380)="7912fbe2c8cd8b3fb65ef9c9060827b5576625785a5f486f8097e5bbaa796dd41c6faf1e83b4e56cafeae4749ca8a3182d8148c2831c7a2bd920f80c62c1097bff2a8812f6c5bd37a4af7b3208354fa7f47bcd21d26d703d0a5444def2bb6837ce51be4288dbad58b54cfef090479795e1dfecc322f299b947736c456319e6e15188aaead119a9c7928f3a6ebb0837070a49850f1b2059d4e6e47615bb079f20888fcaf873644bf03c3f85918927eab57624bfcb09b6942ccc0a68246866d269cac5a190726618a4b488abf42d779ab0e738e02f50b54d7a21297bb491e3d074ba42c0589bf81e80e3f897894d8b4e74e880a30703cb59dad52f992b89fab28109f544d4f653c27f4c719e7c15029a51a4a4163e13c7829ce73fb80963dd564bc916ab7840617cbaf8db0bd12aa2796e4f61c138704ad19f40bd82ba8b4ff1008825cc9ac326dd7c09d12417c5558c18e05fe1cac6badd6cdb798262f887b7aab94686cb07a6e09b6d7ccb50a02f3da5abead237fdea49ac0a2b8a41d47b2b87f985da6216351d5ad8f23c09ec66155247fce6eb90c97c9e8b2823d335c03cbe33bd681ec2930fe2e7e28d007c930b6987a5d938560f31b61c1ac65961d921b68ed77465f887834dbfac2da9a866100eb42ba9cacb44ee45bf938fd69e458641c4612a67e55878466a6acf1adbe52d1b78a5701a8d60a664678e6768b3f46e075cca8cc6a115708659fba926e76988ccc97d946a8d44c16c1ed2d340a73c9c724e80e6c7499fec89e6e2d29145be55b19824dd793633d908a58f78d8b1936897a5e1e333229d1791e0c64f860610b50ed4f211fe33af889427f0eb4061c946052adc42a99752f994cbd543896b203d0f10dfbee12b50166132533775c90e0459145a0956ebe356f997eea027e97793590841485e7f9161f67bd0fe9ecaf6e1157ccf0fddb14e01204d065b2b088568313901de3b8c33d019362bc080bbe4cadabb46e762140467e6021b0a56a0f96e03967a9b96579140cac12cf191c28e4861965e157ae4750b81d5f9014aded54d918b4a7046883e2c5f2ae0808b227a05b5ab439de37fd851e2a3ec20f0909daae42b1b95957c35423d91c11f128db9d352926b0e06d1a5e876f4c9c601b842ed84aa2faf377c97190eb361966375163310f3587d787154b3bc003c18c6630007511849e9e6fe7b78ecc079f1d981b991be0319c70752f52ed405c56c0042fb1aebaa401a55711aeee0d383dfa666c16b0625bc58ff42a80a0f3d3d246118437c53bbd5937aa0bd8296b619a92c77db781c7ed5db0b855c0e6bc89fe7c3f4570ec2755e9ad5978fbadd9ad06953b2bf302277bfcce6e26cce3e95017b61b0f98976c58d7de54d5db172118d5cdfd2cb80678871ddb2cc75ec18bcfd35def7d8e0905e7c4aa644601396aaf8cc370b1b94804ef46833d997211341b8e1fc1e2ee38c5a847bf4f2b08f6dfd6a4681c9b25438bca0818e1cff8202eabf6bc7799dc7877e3b36d94ac0eaa809849f65463e83b2e5d596958ff5dd8478827c7148a231abb564730ad917b81b4b68a5cbbbd107641ba40f8afc6f02d2e769a0d9430a855a3c9784544c14614da7d905143dde6e696c910863f45ea3d5c3a5142465bc72032a02358e321bb0b09eb516d5f480db22359e09b518b74e625d05a0af3c2ce94c0e90d30fbb0df1355bb3e0488d3b5c389eb7083227902385b3dd59d474f0ce410136a1f836f9ce0191cc77456fb4539ed1cee369c8a3564961b2b44656652488a8d0cf5c432d6c67e9ba9c00d4d9ee5d5f6d59fcf44680609d0524d4bc744dcd822d2ff567aab0573f02389de1e9089fd242e78ff95a96918907c0e67a38b5eab25736d9205dc9442edb502d55c2ff5ee693b61a4e5542c0a20bcf7e3b93d00efc561b1fd3bc540864f90d9eec4d58dcef5347f2e32afa5c086b59366965b63380b1398cbf3ead9394077c2d92165d17c64a9d55f9348a56166edabe572a6318981bc884fb0e6c573759098a3b98cc665ca77ba7e2e6d8b1ecb67c7ad6fcd2f36b1e85083090a40edc4fdf2268cdebec66d6972504eae2f7fac8b42244a8270779b80d794c1671f7089284c096e0f6f4fd5935e3a040080b3fb62d8f89ec080e74644984264a3a0d630ade2c21cf81a35e84dc54b951993507d17e9057e39b63bbfa43f18bae241b310659d694ec96c58d4f13126136e94658bb64e8966a60cd2c502734edaacca81ee5e692b1fa9f3e5b9009024e0c76864163b9470636525edb3139fa7a61ececb86f43db9acf141e036589f5b7f4cf96c046f52cccd0d9b2616a4739a37298f7c64eec22e8bd9d72d3023f568b42409752ffcef5cb5a9edb52fc24e815e4dd94dab6d321fddadd48ee2fb0577ad892e5d9b3cb512c6c4482cacfb254bf9b02231a9282374cdd61ed977aa9827833628b787c28222dee272883ea2be339653789d100ddf101b3119398811c02d8f0ec349d1db7ea7d09e321f632e767ff51f0a6c1568c2210640e3dad7d19613b74ad981304ba1a580a1c903331939761aa86f4e8e7190bccb41b391ac32d6b81af34d7f88adacec7632e98fb2e474a0b2f387a12d001d49cea09319c10b9f0d844f32e5bd32b3df8d683480f2bec4c30bb745f41de648e20f493b2cc352d2d41fbc289f2dc44ff4fe79c964b335af19f674c0f98f9e3d616d56facf549fa4094390321ff8734675b9da850c90adb5fb455866e73a32d24b7cbb9846e0916723e58a47fa0a9340fb5cde9d99e094e8413cee1f7dee752b89e844933d512c1957fb9f70a8e73c8eb9c0211e29fd10669c025c6f692f5aabb388b05ed73213f2418c4bb4df7acbe341c64cba9ae600617104447238ea6a7f2824a6f40c972daca4ed703dd5c3dd0621fe258795c33864af1e08389e7f22623e743f78267cb56988243a280bd9d0a5eb3f3bb95f2273d7981f3a73d6a7d4b70aebd572bf23012e88b143b773140608bc0a5f97d95e694df3a1351563939ff4d0d7a7ae1cdeb7fa59ec9f07b30c06ebc71bc574eac0f26b9dc6616f38f4cb3d8ece6ad29e55f45bc2888aea5cbfde17a4176a39cfc1d7ee8eb61e22206e61ae661fadb04e0d30c2a6b2adcc8d22ceb335b0a2994b5a80490d42a445deee385cfdec9ebec3e28fe0579e364a13ed4141f887837ab9e1c4c640717b1bd597a96b0a93a75e4e4de2892ab01bd19f561ea7615d66658ea97095509f6589603cbcdbb0ff9acb9d85eb880f0a22a21d759f7d45fc7cc3a0f7bc6996551fb1d70df741d4081d0f0f405b5fe7d546cfc060f1454110f6c7a6c92af8ffdb9cd73819b8e0b11c58654160e29156a6e28445a2167c97a92df4e5ed99811397cbafccdbd6370efcee2a6fbae47d054e8ab645af3535857290bae136183e15024bed9634acb3b708df67112d97ba868103d0f7e6a1a7b3c1a7fa8582da951ec6003c075ccd17e654aa740acb50d572cdf8f3df6322412bde543ea2fcd203de5e08a1da636bb0c27eebcf5cae3d5458c96b3373a32e3d5b77b60371f99199e6dc5f3fd9a21ab2af4f8242cf4995de09e0a38b0448ee2f14e518b3feebf3edce8327030bd2386a242ce5bd561000a48adad75f94038da8389f9d34ac327e3cc4ed19a090929119659a8fba14dd01045535c5c899039ca102051636fc1fb6f7624444bfff1b0ca4ddf17fddf9cacb7c854b906c4203fd0ab6fdd6e34b41c263a59b291b6eb0214ea13ead2a8c038a4e7e93d5a17399e20825347483fda81f939b0d871935781173b9c4eab727a69f90525f2a57c766da97f2e2c22e8994f53fd9c0f4da88ccadbc838944671d96a15356527d83500d55892235ec83c264195486a7e35c877f67f318d38c711d6af72be07d55cbbdd15f22cc87f3d6905f19cb2aba28a4383ee0bd0bb64a44697655541271b6abefa7c9ceca19794214972814841812d3f975f7543eb2a556c12fbeaaa07c5fb425da856e1ac6b63117e6690de72f20b64ddc508b84d75e61272c22c5cafb273d8dd1d489a0f90549abd9935185d041ab50426ea744b9536484ee98e1a77e15e4160dc27e0244670e776a602c0beac500a90eefbdfa51c9747936d9c17dd84d36810e3d673b1df24e12487d10938e45acaf6aebc600423383c458f80768d1cd578a36bd5294b08c9180429f9f7bff26627170578bb44b48038f7c364be6bc454a8caf493555b18e2a44b717b4fb11f23a6f2634647600604c3eb6d11ea3016e390bccb4ed75a98e6da725a90c3e3c6afbf06a95063babef14f43809a04c2e9b208ae9e6947eb3a0cfd7341b741a22bdbe08bccfa013749d841d656c9ae27d1bf2372bb7233292f1585393239cf8a91e2a17918b47ed069612ef453ab1d75262a4697c09abf33adac06ab8c609a21a3b00dfa88483d9233bd48331a7ad42395b55f44e61738c5d6d425088cc6764de7111b64e77e69af801cd1e780eeb06e34a690436a7184d979665a6e7639ebe8778ddc9121792f1cad8072b235cd2566946deb03c1bedcb26793f938b405a4bad81bc7286eb9b54049e765c4bcf14db1c5b78666e7b1e899b3e935551dfdbe70fc7a3faadedea73f2dc1a3ddef59658c0b375c4413e69a8fa945de45e4a7c1c3a2a93a12d406eb25060b60446c60b6b58942252db0297f90dca68a9e977d361410923fd28967a2db31ace66789761c687a60dcb6e35e1e5fff86fe826c9f342c5d9a361af2ecff6bf040ec34acf140a5e64dc0b76b457cfe0dd2671f900557c0ff6aae34a2915623d8f4446b01ce88b58c61aa899675ead5275507bbb563bb5650d54cbeb92a03631b42e721f236cd3c482a6fc3bbc92bd413919a37785f61a2799983564b23bef9bf8bd91c9bd3e99e5b4f6afadabc88c070473e0be84d2a2980497211ed350a72e9b0e1e3c6cf7d8618197ca3504edefbb0305e0cc67cb0fa01d0477462c7560430fa7da3f235da4243648a2381529086640ee52baef89149e7d62d0b0174ebbdc7e58a6ec55b34de981a4ca6e8fd52f3ef1e10893d641fa81bb05a2ba66c6d3311159e4d0355e8fa69b85c3366dfc2babed5f757e3d8283c8c205340fc2bbf2464c2716c9457cf4a10ed560168c9469751cab346c46fd40754a6546a08cfff3bef5d15e7ba95c8417207184c9eea7c9130d56786224270b3916ced768498ac33ad692b5e85638a8c1574470830ec533278a07c1a5ac1c15f1ee9a38777399994364185eee396d587cdbf3044f10a576ba6c537657d36f48c4e694af343174fbc7db97b5885bc70c0dfdf8e592926fbe1470eb86ea93d1c6598347e6422b9fe5e188218dffa77eae5a09213b2e5e7a0cf6b8a69f4dcdd1d51d020f6ac9cb06f3d07d6c11bf8d45f25b7d2daeec9dd265351c11e3e746a150df95f43ba5f9d3d9bdb55e2cc7b8b15e560a21742dc8f05201d476abc4bc2c5e575a8623a110d1ef07b62b2e6b048653373e965662796bb686a43085613561a71f44a002b0853ef4ca2b10e1fa77b245e497ad30493875fdbe17afcac65f0a9d8d3b1daa1b440c6ad3d163df0fd3fabeb136e22d0d5a2f8abb9abec98fdba90e7274c8e39f42f57447364d4280b9b67a7acdfa10fcdf437a7669d6116f43b56d4f17bf4d3d5472e3060eb31383405ec80eef9f3e1682dec7611857530bb4112c1952c9e47832c8916232e30aaa47779648f07f7392d4d8e11efb2ba913e5b1a647e24e85fd5a4e0605b44a705c7c67c79a13a1bd0682571e08545e2a96822fddf6f29e4c723d450100332a5", 0x1000}, {&(0x7f0000001380)="f12e27760393093e99edcc3961a9482c8517b5a81679093293351b9aea5df12504094900cbe48da29383ff618d61df42914c09a0f466da1254834ef4b1ceb14834b38b41f0e8285a47de6bb23b4c84c623400ea4a57a7000bb70f180c8ff024eeb12912fe4408076fb8483356e20c9257b370b67af1ffc94c75c6e2b1880efcc05a1d57e4df6023bc7a90f26ab9997e0a2bb4d014aa9decc2f27047c851babbc7cc8dc09c892bd52d95f06c771547ba4b97eecdb707779156099e03de3fa9446ce2835dc34ce34d75f02b79f488a6c65d6984e338f6f72c6e1c45275a09d2714e3e724ae68da6a2d10872592aa243cbc53cae2ec2e5ee8b7e2ee9a009e6866d9e3ddbffd820dee1fb5971ebba14f212a578962a5da254c079cb170e4185c21331126a9c95afc61aa02ffbf858fc73b0e77d42a40ac6895fce09a5ac07c85363e67481fe38fbe440165a6446c332538c7562e13cffc508666f622344e7fb7d799ab84e226e681097521d8b75b3c13c1c83361bbf46af64d9f6676e6361c1f682e58ef2071cdd6b6b7fc31a076812bd60a3ef9ebd5a9564af2886a5b1e465214bb3eec352105dd313ea811a10029eb0c55955079b6ab7cd28300167543b86e94278fccec0fac451a72c01a45ccd6e0b5252df574c31528fe8c1bd3668cc85426d30d8377ef425c63aa6e4862b48b94d97de8d56218813e21995d707257cbdf2a283c41c3d5cd968bd52ef4740f315aa9799b78095d45063a7835d9c64e6645c69a8ee6905ade0773da188c642a7657b83b988f4a922930a0410a5d44678b568db787190f4511158e06dc65e03d6ae5a8331ed77203abb392951168d9ea50411729f57da41662058e2054c25b95f7c7e45db1612cf9a0f0b49cab3d73a2b82d38c6d17c6187d8f798e12365d77398fc61fa7a629014d8a8e90a3a4b26514532740bbdca3adcbed1b092d0f31d976aca1938fc4d90db9b76deedaf99916a002b39f86826805ef72cf3cd08844ce4648193105f9b339c2ea9d9e1ce2f455d4dc6adbac60373c1ba1864c6352af912255f34c98f5a84c199212993d273c5e8d036515646c5df2bab3f91faf86337e4ab4783e3a6291d334d47acec6e0e31570ac5a078a1d0543d2b7add9a7aed790fd9d2ab9d32dc1a20c951ce559f6b0104c89e6b9b22bc3eb36c935db0c930efef8bc713e18169053ea3f9cd71f576657ea5d0be8b11ddd5f5119ad2472c30247a39726b6eb358237f3caf0d2bd2c4a674f119fbf5835b2b86f5019bb69b6bc20369f43bb09bfbbd0df06df4787c7e2654f9ce3bc9439a461fd31da94fb17fb75a777df9ca39d199372c4537a6b405419c4f4264c5bdde74765ca4ecffc666f5cd7dc2f46c59241aeb5445cb5a44eeba6803cdafecff800d0173e460d6136fa6c78923d0ebb3b1d91c18050bec30914840643a1b73b5c47c8a9fcb5fd1a57473eed3dc8ce6a6c07c8aa77dc291116da7ef52406e13b09339ca7967b96d07b4db0837bfeace6d15946a652a06f86de179416300a6b820aacc5c7c2dfc3d9e02208d99a11b547185d5c0f28e2cba203c87bbceb5ef4f22453fd2d52fcefc65698696d84f4959031a3a1afb4b120821f37f8bbf000dfa5543b20287b08926b736ee1dcbaa549385ceed54b69643a513dfd24d5af7abc3c9ab41ce237f73d1de5cbb9039a20259a0b59df2d365b2bab88a69089b8ea7ef48dee8fb3cbb658b69109a3964339f1526e9d814671041973d3908aefbb688c5e9baabf3e3ed26efc01c62536465c72a4e8f95e5a95cef9a2239c0ebfafe2e8eb5ee8d97eec997dce04e28b30a7d7317c1459f17017db80e6b36de741a0dd4f40d271e73b69a4bca155e520b71e650ceafe32f67735349889c4c1228cee579b1d1e1d74e83caf03c2267f39fa098c93513a46c667ea6c0cfb063ebf0c4eaf8621950b080ff8389241e582033025198967940798c19811b3d0b97c0905c78c8ea2a58576694d8e68cf59a02585d6b9d3fd76f637b5456052991b93bb6bb3746764fc3825ae814460cd603cf5d156dd82d2dcee9e4c7f1f0694843da6160f82e133675abaf8ca5addfebdc04a37a07fd5a486c5d24aa4ae6385010848c525f2922705270cd1502c7dd02ff3d72a2376624c1a6c89c60d766a36de6705b229f2e0ac495af680253aba01ad9933869f3c8685f4f25badd26704be121eb77f56f4230f635bcfcbbed7d7e7560045c360e792e18281273b982f590b9d0c655d820d24f4ebce5a34cd4295cf743139c4c9fd02d62aafdef548eb620cd6ba758d9bc0c7efa8f2f0b32dd7765432451bfcc50d19e2295da57fec1159b92c648737371654829b20bf2f4d2ee22f2e6a0f84f47463500bec3bd909bd1536e6edb66590424bdeaa4cce0a95193a32cb147d458d78bb7fc258374e915cf67485dbf9d1304bdfa726da0be1396ed56d2cedab3b1aa52312d7190a6fb9eadced26a67c4a38702529efa6fe4d534709e6e3e2d5036a6f3aef15ea6276459a8dbbdd54d97694dfa2fcf0374ec1cd4a944ff2977a9fe24f9cfc0628782cdb50e642a84e22fc711e31146474a627543b2d95b7b5b4faa94846919ce4a230dfd85bab87e0f0012bfd463e8c41bf561e071b5ac1b242bbba04b21dfe3a28fff3b6e4c96b72f0d9a7b2d861bf042ecb8eb6a976ff61eb2d64dd4cdc49aa63089eaece39c22986ccca144dec6410991f24a8c2af9e4d1b959c33f142c591a7e453d5a8b6f040e6eddc9f81817d70d478780d8f4bb610e7cfe223a3762570b4a866920430618a10bbf5039a507aca583f8733aedbf6991c1efac9a373bebf9ba402fd3f8ee935f372b79947c8a85d7257fe12ba369d5c3c52f41c7d9c20276c33adb5e0258aaf5e8877c1d437273b98f3baa667edf8c336205417a28feb8e73cdd0d3df188da8ad7a22c3830456b1113277b94f9402f275c37c46c2b7da36cf5ac5c7b02e9e008aeafd3b7f4bb8a9c6026d4da88baf5f0b13779e3960ab1607ccdc783df9c1d7746cbe9aa0fb78e4ba5e2abdc89262af0980162e38dd541aa201228374bb85a6d1321197dca038096621e06696243a17ecf7b46cf5c06d8b4c313e8da46fbc03aaad92c138dd112fc66526f9a52ce83a05b0daef65ac315c761d53ea1a65f63b9e7380aa034c792633d28fecf646758d76e5b90ef5b3be204a3440d0ecdb408dc053354f5c4e946dded8d2e52c5de8e8baf7e72af943b4e4267c150359a11443b1cca54feb27d8ee21f14cd7e8bb39f7f4ebf13341968f6e7a2a40a12afdc9e294667c23ced959f77fbd2ca6310c0638d3c82f45e66a1d5120cb2d5496663a8f3cbe439170a26ec88422a1dfaea4016303e55a282a85362b100b9ceb0975d088d59fe32a9b2997f9d7d51760b830813c6766bc4b96b713e10195af00f070d0859b7aae57429d520a42063c40193c714a918a8a68297994da4a460a4c121e052a1918e4faab82022ce15a3859d876220ebf54b244ef27173a1fca2c1bb310afd913bd63b35124b05017f4d40d5109c7e158f350cd865cf6ec3ec2814e16c86ffc5a4109f6eb69d4574f7edd13808b186c136c87269221e04c5232be8b9c1caedfd6978768bc66c4895cae79944f0fb20c13a3e048b610a88d86989351185a355ab3e9ff9b83bc50d79e015dff8d2186d6018f302f977d88f59b72ba6c352a31f9ab9b51671e8eb78fb92eede78b23b276e230cac12da0c9cb0c31d881f539536ffb7bb72e5f51b0cf2edab1f4d8f24906a929a795aa76b81bb7a8a8e67935d93eb68a2a4f4fed783523894eeea50363465917b1611f1cd4215517cdfc6eb73cfc259ec5f9b3e2136adcb783a2d7b17d004d80a71e6b469819f9bdd26643aa4d966b6dff6340580bd1984a87e4ae15bfa96939eb9c80a53f62513a8848b8e6a0a9137a8b2d7dbe666d0e1f39176250d04ce40eae0fc758e510888dfc81733c8ec309b63dc003b341b6a1792c53a88456a490a186fd14edb33aa6a08893fa453b507f944df3952f46dc60067589978de7ac406ce8e55f31ecab1beed24acaddaf572946de9c7e3beb67ce6e67d6ba2e02fcf182c65abac362ee0bce223788af7774338770b5e9068efbcc4919c13d3f31485becaef8c4eeb03ae97f8bece292e4dd7d254d666f046e37c8e45ee9f28f6ebcf6d5d000a95d4e98f040ede554df92a0049ca932037de55843f6e77c90701a30d17fc6f08eea1d236669928d28a83a52a030ab3247f3307fef011e8a96d4d6b4fd27b9861a64391d67374b88558e9b5b1407912e29d2bf71689d8aa1853a214233ade2b4971298a0a3086b4b35778af955d9094991d985d67ab0d7d4f7a53b4e27cc0b1ce72afd6180cd94ef64486d6a61f464d811c54ddb2f7aade99dc2269453c061d4508c0b75aee1549c41f78a41ed5a63d44f23892c19f90252c5e339c2b3b06d70be5e783d9a7cea03f358fdd98488fb72c17f5ccf8c855271474abb9b1823d036d6819415845faa221b6299e5224b8832b525b64e4c476f090e7978e66cbd44e4e3f7f9f9ba7340840424890a58584cee9f60ecf7b7de387919174684e0cf6dc6d2dd48e18a1c88eca5f3e005bc98ae26424fa93bfe286e7b8f61bee37c47c655a9001ee81897f797b070038d3f2766f97382995c96b625e09bc8d4f462d568a04b0e0404892689e958b22656d109979696ba7d656836bb330ca277c879fcf7749039f37f944d272b7f353ba6ac172f4b74f2fc85ac18a242f523e1d6944d80e58d51279f15b7c366c03e7b3a1edefdef92b6e91984d7f8c6ed22346e409b5a3b2d343411aa067a859f44a64378e1707124001519b6c6d082884a0d9e8b1b76006084b9f36015195cb6729b2b3f662765661d6df42c09d931a", 0xd87}], 0x7)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
madvise(&(0x7f0000585000/0x4000)=nil, 0x4000, 0x3)


r0 = msgget$private(0x0, 0x20)
msgget$private(0x0, 0x100)
socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000001240)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
getsockopt$sock_cred(r1, 0xffff, 0x1022, &(0x7f0000000140)={<r2=>0x0, <r3=>0x0, <r4=>0x0}, &(0x7f0000000100)=0x1)
r5 = msgget$private(0x0, 0xfffffffffffffb28)
msgctl$IPC_SET(r5, 0x1, &(0x7f0000000280)={{0x7, r3, 0x0, 0x0, r4, 0x1a9, 0xfdf7}, 0x80000004, 0x9, r2, 0x0, 0xc, 0x7, 0xfffffffffffff805, 0x840})
socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000001240))
msgsnd(0x0, &(0x7f0000000040)=ANY=[], 0x0, 0x1000)
r6 = msgget$private(0x0, 0xffffffffffffffed)
msgctl$IPC_STAT(r0, 0x2, &(0x7f0000000040))
getgid()
getsockopt$sock_cred(0xffffffffffffffff, 0xffff, 0x1022, &(0x7f0000001440), &(0x7f0000001480)=0xc)
r7 = socket(0x18, 0x1, 0x0)
connect$unix(r7, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7, 0x0}, 0x8)
getsockname$inet(r7, &(0x7f00000000c0), &(0x7f0000000000)=0xc)
getsockopt$SO_PEERCRED(r7, 0xffff, 0x1022, &(0x7f00000014c0), 0xc)
r8 = socket(0x18, 0x1, 0x0)
setsockopt(r8, 0x29, 0x40, 0x0, 0x0)
msgsnd(r6, &(0x7f0000000200)=ANY=[@ANYBLOB="0000000000000000c5e69a38643490a83bd5611e72ec2833e3dc81b28ca02c8906b724ae81"], 0x19, 0x0)
msgsnd(r6, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r1 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r1, 0x800c5718, &(0x7f0000000000))


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x10004, 0x0)
ioctl$FIOSETOWN(r0, 0x80047410, &(0x7f0000000140)=0xf6)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
unlink(&(0x7f0000001280)='./file0\x00')


r0 = compat_30_socket(0x2, 0x3, 0x0)
setsockopt$sock_linger(r0, 0xffff, 0x80, &(0x7f0000000080), 0x8)


r0 = syz_usb_connect$printer(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x2, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x51, "", {{{0x9, 0x5, 0x1, 0x2, 0x0, 0x0, 0x8, 0x79}}, [{{0x9, 0x5, 0x82, 0x2, 0x200, 0x3f, 0x0, 0x7}}]}}}]}}]}}, 0x0)
recvmsg(r0, &(0x7f0000002940)={0x0, 0x0, &(0x7f00000018c0)=[{0x0}], 0x1, 0x0}, 0x0)
compat_30_socket(0x0, 0x0, 0x3f)
syz_usb_connect(0x0, 0x24, &(0x7f0000002a00)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x7, 0x0, 0x0, [{}]}}]}}, 0x0)
syz_usb_connect$uac1(0x0, 0x81, &(0x7f0000003740)={{0x12, 0x1, 0x110, 0x0, 0x0, 0x0, 0x8, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x6f, 0x3, 0x1, 0x0, 0xa0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{0xa, 0x24, 0x1, 0x4}, [@selector_unit={0x5}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@format_type_i_continuous={0xb, 0x24, 0x2, 0x1, 0x81, 0x0, 0x0, 0x0, "8ea81e"}]}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7, 0x25, 0x1, 0x0, 0x0, 0x5}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x10, 0x2, 0x4c, 0x0, {0x7, 0x25, 0x1, 0x4}}}}}}}]}}, 0x0)


r0 = compat_30_socket(0x1d, 0x3, 0x0)
r1 = dup(r0)
sendmsg(r1, &(0x7f00000004c0)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000300)=[{0x10}], 0x10}, 0xd)


r0 = socket(0x18, 0x400000002, 0x0)
setsockopt(r0, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x3b03)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = dup(r0)
preadv(r1, &(0x7f0000000340)=[{&(0x7f0000000140)=""/241, 0xf1}], 0x1, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
_lwp_unpark_all(0x0, 0x0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2080002002, 0x40004200000028ac)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80047442, &(0x7f0000000000))


r0 = fcntl$dupfd(0xffffffffffffff9c, 0xb, 0xffffffffffffffff)
__fstat50(r0, 0x0)


open$dir(&(0x7f0000000040)='./file0\x00', 0x2e1, 0x0)
execve(&(0x7f0000000500)='./file0\x00', 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='cd9660\x00', &(0x7f00000000c0)='./file0\x00', 0x0, &(0x7f0000000100), 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)="b9", 0x1)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
chflags(&(0x7f00000000c0)='./file0\x00', 0x0)


msgctl$IPC_SET(0x0, 0x1, &(0x7f00000000c0)={{0x0, 0xffffffffffffffff, 0xffffffffffffffff, 0x0, 0xffffffffffffffff}})
connect$unix(0xffffffffffffffff, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
socket(0x0, 0x0, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x2)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047401, &(0x7f0000000140)={0xb})
ioctl$WSKBDIO_SETDEFAULTBELL(r0, 0x80105705, &(0x7f0000000080))


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000002c0)={<r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setsockopt$sock_timeval(r0, 0xffff, 0x100b, &(0x7f0000000140), 0x10)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x2000a71a)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
poll(&(0x7f00000000c0)=[{r0}, {r0}], 0x2, 0x49)


r0 = _lwp_self()
_lwp_detach(r0)
_lwp_exit()


mprotect(&(0x7f0000ffb000/0x3000)=nil, 0x3000, 0x0)
pipe(&(0x7f0000000b00))
mprotect(&(0x7f0000fff000/0x1000)=nil, 0x1000, 0x0)
mlock(&(0x7f0000ffb000/0x2000)=nil, 0x2000)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
mkdir(&(0x7f0000000040)='./file0/file0\x00', 0x0)
rename(&(0x7f0000000180)='./file0/file0\x00', &(0x7f00000001c0)='./file0/file0\x00')


fchown(0xffffffffffffffff, 0x0, 0xffffffffffffffff)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x18, 0x400000002, 0x0)
fpathconf(r1, 0x6)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_20_fstatfs(0xffffffffffffffff, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104306, &(0x7f0000000100)=0x620000)


minherit(&(0x7f0000ffd000/0x2000)=nil, 0x2000, 0x0)
mmap(&(0x7f0000ffc000/0x2000)=nil, 0x2000, 0xd49f275d97cc01bb, 0x1810, 0xffffffffffffffff, 0x0, 0x0)


setrlimit(0x0, &(0x7f0000000040)={0x1, 0x5})
setrlimit(0x0, &(0x7f0000000000)={0x1, 0x5})


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x400051c3)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = open(&(0x7f0000000080)='./file0\x00', 0x1, 0x0)
writev(r3, &(0x7f00000000c0)=[{0x0}], 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r1 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r1, 0x80045710, &(0x7f0000000080))


_ksem_open(&(0x7f0000000000), 0x0, 0x0, 0x0, 0x0)
execve(&(0x7f0000000480)='./file0\x00', 0x0, 0x0)
open(&(0x7f0000000c00)='./file0\x00', 0x0, 0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
socket$inet(0x2, 0x0, 0x0)
setsockopt$inet_opts(r0, 0x0, 0xc, &(0x7f0000000200)="eaff125c00000400", 0x8)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fchmodat(0xffffffffffffffff, 0x0, 0x0, 0x0)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
getpid()
fktrace(0xffffffffffffffff, 0x0, 0x4, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
poll(0x0, 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, 0xffffffffffffffff, 0x0, 0x0)
preadv(r0, &(0x7f0000000240)=[{0x0}], 0x1, 0x0)
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f0000000000))


getgroups(0x7, &(0x7f0000000040)=[0xffffffffffffffff, 0xffffffffffffffff, 0x0, 0x0, 0xffffffffffffffff, 0x0, <r0=>0xffffffffffffffff])
setregid(r0, r0)


r0 = socket(0x18, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x1000, &(0x7f0000000180), 0x4)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xa31a)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000001600)=[{&(0x7f0000000500)=""/4096, 0x1000}], 0x1, 0x0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000001780)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)


r0 = open(&(0x7f00000000c0)='./file0\x00', 0x205, 0x0)
fcntl$lock(r0, 0x9, &(0x7f0000000000)={0x0, 0x0, 0x0, 0x1000300010005})
r1 = open(&(0x7f0000000340)='./file0\x00', 0x0, 0x0)
flock(r1, 0x3)
r2 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
fcntl$lock(r2, 0x9, &(0x7f0000000140)={0xfffffffffffffffe, 0x0, 0x0, 0x280000000})


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
truncate(&(0x7f0000000040)='./file0\x00', 0x0, 0x4)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pwrite(r0, &(0x7f0000000300)='L', 0x1800, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
readv(r1, &(0x7f0000000280)=[{&(0x7f00000000c0)=""/166, 0xa6}], 0x1)
sendmsg$unix(r0, &(0x7f00000036c0)={0x0, 0x0, &(0x7f0000003340)=[{&(0x7f0000000300)="02227ffe61cfde9e579123a6e730da83a7f8de9d31d70b8276d15241cfa73a46fdb8795bf42665d42b6a10f08f9977207f8079d6d81a5989e95b6382d455cdff650d7da5c1102503381f83c2b4d813a7e513a7376fa3fa74019a042c7160deec37c75290fed44917b26784b57a56eeb8e5dceb86b67aace19b8f1810046c5108d7d590ae0576d0355e9b52eef0fbbcf69093cc323da042278447a9e30cb8e746149a6cc103c2cb20f05c2734fe75fe6d760b349500a77d22ad469923291ede5567e5e81f077e646fc880d9f8a6a402c79ca71378a5721871498745ea85a26eb404c4bf63541f74a4f59e412669b8b6498d3d9fb626309b01995adbd0d137c239a2279bb29597c8cbae5c3f06e1bc725431154a2597c09121145dca2b216d280aa24845883819b06298557d6a8ca8eb098a3bf07566edd2b063059b2a51d8369828c2929d986809267255a43f2ece4190b9071f7b9f6161474814a54ab1ebd166f93c6d80106f4b68ca7ad37c56e6a0d3ea28aac18d", 0x175}], 0x1, 0x0, 0x118}, 0x0)


syz_emit_ethernet(0x46, &(0x7f0000000040)=ANY=[@ANYBLOB])
connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e57f7f000001"], 0x1)
mprotect(&(0x7f0000320000/0x1000)=nil, 0x1000, 0x0)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
madvise(&(0x7f0000454000/0x1000)=nil, 0x1000, 0x0)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
sendto$inet(r2, &(0x7f0000000280)='9*', 0x2, 0x401, 0x0, 0x0)
connect$unix(0xffffffffffffffff, 0x0, 0x0)
sendto$inet(r2, &(0x7f00000000c0)='\x00', 0x1, 0x0, 0x0, 0x0)
linkat(0xffffffffffffffff, 0x0, 0xffffffffffffffff, 0x0, 0x0)
pwritev(0xffffffffffffffff, 0x0, 0x0, 0x0)
socket(0x0, 0x0, 0x0)
connect$unix(0xffffffffffffffff, 0x0, 0x0)
close(r2)
accept$unix(r1, 0x0, 0x0)
recvfrom$unix(r2, &(0x7f00000012c0)=""/4053, 0xfd5, 0x2, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = compat_30_socket(0x22, 0x3, 0x0)
getsockname$unix(r1, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x0)
open$dir(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
poll(&(0x7f0000000140)=[{}], 0x4e8, 0x0)


mlock(&(0x7f00007fd000/0x800000)=nil, 0x800000)
mprotect(&(0x7f00007fe000/0x800000)=nil, 0x800000, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x1203)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x40086484, 0x0)


__setitimer50(0x1, &(0x7f0000000080)={{}, {0x0, 0x6}}, 0x0)
__getitimer50(0x1, &(0x7f0000001240))


r0 = compat_30_socket(0x1f, 0x3, 0x0)
fcntl$dupfd(r0, 0xa, 0xffffffffffffffff)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000140)={<r0=>0xffffffffffffffff})
setsockopt$sock_int(r0, 0xffff, 0x400, &(0x7f0000000100), 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_clock_settime(0x0, &(0x7f0000000040))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80087205, &(0x7f0000000000))


r0 = socket$inet6(0x18, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
pipe(&(0x7f0000001200)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getsockopt(r0, 0x29, 0x32, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
socketpair(0x22, 0x3, 0x18, 0x0)


posix_spawn(0x0, 0x0, 0x0, &(0x7f0000000180), 0x0, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202"], 0x1)
r0 = socket(0x2, 0x3, 0x0)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
connect$unix(r0, &(0x7f0000000040)=@file={0x0, './file0\x00'}, 0xa)


mlock(&(0x7f0000bfd000/0x400000)=nil, 0x400000)
munmap(&(0x7f0000c12000/0x3000)=nil, 0x3000)


connect$unix(0xffffffffffffffff, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
close(r0)
r1 = socket(0x18, 0x1, 0x0)
close(r1)
socket(0x400000000018, 0x3, 0x0)
setsockopt(r1, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)


compat_30_fhopen(0x0, 0x0)


clock_nanosleep(0x20000000, 0x1, &(0x7f0000000100)={0x0, 0x7}, 0x0)


fchownat(0xffffffffffffff9c, 0x0, 0xffffffffffffffff, 0x0, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
recvfrom$inet(r0, 0x0, 0x0, 0x5865a9fdc876dbd7, 0x0, 0x0)


symlink(&(0x7f0000000000)='./file0\x00', &(0x7f0000000080)='./file0\x00')
unlinkat(0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0x8)


__select50(0x0, 0xfffffffffffffffe, 0x0, 0x0, 0x0)


r0 = compat_30_socket(0x12, 0x2, 0x0)
sendmsg$unix(r0, &(0x7f0000001700)={&(0x7f0000000000)=ANY=[], 0x1c, 0x0}, 0x3)


link(0x0, 0x0)


mkdir(&(0x7f0000000480)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='fdesc\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
unmount(&(0x7f0000000200)='./file0\x00', 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580), 0x1, 0x0)
r1 = socket(0x18, 0x2, 0x0)
getsockopt$sock_int(r1, 0xffff, 0x1007, &(0x7f0000000000), &(0x7f0000000040)=0x4)


_ksem_timedwait(0x0, &(0x7f0000000000))


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x110602, 0x0)
chflags(&(0x7f0000000040)='./file0\x00', 0x40000)
mmap(&(0x7f000016b000/0x4000)=nil, 0x4000, 0x3, 0x20011, r0, 0x0, 0x0)


socketpair(0x5, 0x0, 0x0, &(0x7f00000000c0))


compat_43_osetrlimit(0xb, &(0x7f0000000040)={0x8001, 0xb869})


setpriority(0x0, 0xffffffffffffffff, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000080)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = dup(r0)
getsockopt$SO_PEERCRED(r1, 0xffff, 0x1022, &(0x7f0000000040), 0xfffffffffffffef5)


mlock(&(0x7f0000c00000/0x400000)=nil, 0x400000)
mprotect(&(0x7f0000e64000/0x3000)=nil, 0x3000, 0x0)


_ksem_init(0x0, &(0x7f0000000000)=<r0=>0x50535244)
_ksem_getvalue(r0, &(0x7f0000000040))


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
mknodat(r0, &(0x7f0000000580)='./file0\x00', 0x1000, 0x0)


_lwp_setname(0x0, &(0x7f0000000000)='\x00')
_lwp_exit()


mkdir(&(0x7f0000000140)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f00000001c0)='./file0\x00', 0x0, 0x0)
compat_43_olseek(r0, 0x7, 0x0)
__getdents30(r0, 0x0, 0x80000001)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_ogethostname(&(0x7f0000000040)=""/114, 0x72)


mknod(&(0x7f0000000040)='./file0\x00', 0x8000, 0xffffffffffffffff)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x80000, 0x0)
truncate(&(0x7f0000000080)='./file0\x00', 0x0, 0x6)
pread(r0, &(0x7f0000000140)="12", 0x1, 0x0)


r0 = socket$inet6(0x18, 0x2, 0x0)
sendmsg(r0, &(0x7f0000000340)={&(0x7f0000000080), 0xc, 0x0, 0x0, &(0x7f0000000200)=[{0x10}], 0x10}, 0x209)


mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x200)
r0 = openat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x2, 0x0)
compat_43_ommap(&(0x7f0000ffd000/0x1000)=nil, 0x1000, 0x0, 0x10, r0, 0x0)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
symlinkat(&(0x7f00000001c0)='./file0\x00', r0, &(0x7f0000000200)='./file0\x00')
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
renameat(0xffffffffffffffff, 0x0, r1, &(0x7f0000000180)='./file0\x00')
pwritev(r0, &(0x7f0000000080)=[{&(0x7f00000006c0), 0xf0f75}], 0x1, 0x0)
truncate(&(0x7f0000000100)='./file0\x00', 0x0, 0x42000)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x0)
lchflags(&(0x7f0000000080)='./file0\x00', 0xbba7bcba484c4ed)


ioctl$WSDISPLAYIO_PUTWSCHAR(0xffffffffffffff9c, 0xc0105756, &(0x7f0000000080)={0x0, 0x0, 0xff, 0x7})
ioctl$FIOSEEKDATA(0xffffffffffffffff, 0xc0086661, &(0x7f0000000100))
fcntl$getflags(0xffffffffffffffff, 0x0)
unlinkat(0xffffffffffffffff, 0x0, 0x0)
open$dir(&(0x7f0000000240)='./file1\x00', 0x0, 0x200)
unlinkat(0xffffffffffffffff, &(0x7f0000000280)='./file1\x00', 0x800)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000440)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x10, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0xa0, 0xfd, [{{0x9, 0x4, 0x0, 0x80, 0x0, 0x7, 0x1, 0x0, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x400, 0x5}}}}}]}}]}}, &(0x7f0000000680)={0x0, 0x0, 0x5, &(0x7f00000004c0)={0x5, 0xf, 0x5}, 0x1, [{0x2, &(0x7f0000000500)=@string={0x2}}]})


syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f0000000240)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x10, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}}}]}}]}}, &(0x7f0000000740)={0x0, 0x0, 0x0, 0x0, 0x2, [{0x4, &(0x7f0000000340)=@lang_id={0x4}}, {0x4, &(0x7f0000000380)=@lang_id={0x4}}]})


pipe2(&(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
read(r0, &(0x7f0000000100)=""/102, 0x66)
write(r1, &(0x7f0000000200)='V', 0x1)


shmget(0x2, 0x4000, 0x240, &(0x7f0000ffc000/0x4000)=nil)


mkdirat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0)
pipe(&(0x7f0000000500)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
chmod(&(0x7f0000000280)='./file0\x00', 0x0)


fpathconf(0xffffffffffffffff, 0x0)


mknod$loop(&(0x7f0000000040)='./file0\x00', 0x2000, 0x1)
unlinkat(0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0xf, r1)
_ksem_open(0x0, 0x0, 0x0, 0x0, 0x0)


ioctl$FIOASYNC(0xffffffffffffff9c, 0x8004667d, &(0x7f0000000000))
socket(0x0, 0x0, 0x0)
syz_usb_connect$hid(0x0, 0x3f, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x20, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0x0, 0x20, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}, {{}, [{{0x9, 0x5, 0x2, 0x3, 0x0, 0x0, 0x0, 0x40}}]}}}]}}]}}, 0x0)
posix_spawn(0x0, &(0x7f00000004c0)='#.u{\x00', 0x0, 0x0, 0x0, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000001880)={{0x12, 0x1, 0x300, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x0, 0x40}}}}}]}}]}}, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000002000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
socketpair(0x18, 0x3, 0x2, 0x0)


compat_50___shmctl13$IPC_SET(0x0, 0x1, &(0x7f00000000c0)={{0x0, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0xc1c0526b, &(0x7f0000000040)={0x0, 0xc})


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000080)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000001780)={0x0, 0x0, &(0x7f0000001580)=[{&(0x7f00000000c0)="1a8472c29b77a83aadbb05484a85b0bf8159b3be0983f296b2a36e0c28701e21afd24afe164bce1996b2825f366d010ea5eeb1c2886a7f42cf61a897611e4ba15628c2b36ddbbe6b88e54c7d496edf450f3fb631229ddff529118eb21a47d2852e2143e222343cfe46f3d9f66aef7656cb19365a5e55281cc5ccade247c4d4ab8d7591dff8b405d6dea7b5a1255a2b92db", 0x91}], 0x1}, 0x0)
writev(r0, &(0x7f0000001440)=[{0x0}], 0x1)


socketpair(0x11, 0x3, 0x2, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__posix_fadvise50(r0, 0x0, 0x0, 0x0, 0x3)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580), 0x1, 0x0)
r1 = socket(0x18, 0x2, 0x0)
getsockopt$sock_int(r1, 0xffff, 0x2000, &(0x7f0000000000), &(0x7f0000000040)=0x4)


munlock(&(0x7f0000ffc000/0x3000)=nil, 0x3000)
mlock(&(0x7f0000ffd000/0x2000)=nil, 0x2000)
munmap(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
mlock(&(0x7f0000ffb000/0x2000)=nil, 0x2000)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
symlink(&(0x7f0000000080)='.\x00', &(0x7f0000000000)='./file0\x00')
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_30___lstat13(&(0x7f0000000040)='./file0\x00', &(0x7f0000000140))


recvfrom(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000000c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg(r0, &(0x7f00000006c0)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000480)}, 0x0)


symlink(&(0x7f0000000080)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', &(0x7f00000001c0)='./file0\x00')
open$dir(&(0x7f0000000200)='./file0\x00', 0x200, 0x0)


pipe2(&(0x7f0000000000), 0x0)
posix_spawn(0x0, &(0x7f0000000100)=':\xfd:-\x00', 0x0, 0x0, 0x0, 0x0)
writev(0xffffffffffffffff, 0x0, 0x0)
syz_usb_connect$printer(0x4, 0x2d, &(0x7f0000002cc0)={{0x12, 0x1, 0x200, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x6, 0x0, 0x9, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x0, 0x0, 0x51}}}}}]}}]}}, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000000c0)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
recvfrom$unix(r1, 0x0, 0x7ffffffff000, 0x0, &(0x7f0000000140)=@abs, 0x6e)
sendmsg$unix(r0, &(0x7f0000000340)={0x0, 0x0, &(0x7f0000000380)=[{&(0x7f0000000100)="ed", 0x1}], 0x1}, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x8004667c, &(0x7f0000000000))


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
mknodat(r0, &(0x7f00000000c0)='./file1\x00', 0x74b87a42dea7e36a, 0x0)
r1 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
mkdirat(r1, &(0x7f0000000000)='./file1\x00', 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETKEYREPEAT(r0, 0x20007462, 0x0)


clock_nanosleep(0x0, 0x0, 0xfffffffffffffffe, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x8004667d, &(0x7f0000000080))


writev(0xffffffffffffffff, &(0x7f0000000100)=[{&(0x7f0000000200)="d5c989a1c1ae95bcf1307e9d25ab00f88292381f7addd6e9b304a5ff7e418b8864690014ee67f761a6af07f62bd0e0b7c262b2b949aabc28fc374c93ef646fa3243d5dafb586103c2de4c8669057df4bc2fbd8b317c087e33294420d8318b13123df215013b75922dcefbb2d03099d9aedc4c322e7c36451295f8cc2e0df4b37597b8e40abf2aff32a69c70c2c6eddaadec565528c2c0097a0f29e2299ce23c669990000000000000000000000000000f679276ee9179b5b712f3d6fe929756334c6348cdfc9dbf5a24c25a6fa376bb5365d5b079696a1cd274582a95ac4382e13e945b2ebb635f13049ff171c2b8e3cd78413e46fd4dfa8bbbcf0a52530279057b12e6c2f0c150539a9d7eaec04f7dcd5ab50a571b3eb89b3aa8601939f6cd9d309ba4bfc05097c2feee5d12145b2fcaaca3f93c6c12053922f461db5900eee56a508771e3b38aea09d57e54b52257722b3e2600b4bd57632b17357b4f19282cc74b7f547da3a817930e7c61f000000f3193e52004b576b74af17e3353a76f80b7e14165fa600228c70b2d902c73c7147ed7da529df3829875a1d70d99b85a37719671064c4f24295ea2a83e0b1ec98dff2bca90106cf77339ac5f7937403172b1555a0f5685e5b4c0a91bdd55db626cda2b7a30f6577ebc69213c67c179e4b5353373a18", 0x1e5}], 0x1)
r0 = socket$inet6(0x18, 0x2, 0x0)
r1 = fcntl$dupfd(r0, 0x0, r0)
ioctl$WSKBDIO_SETBELL(r1, 0xc050756a, &(0x7f00000003c0))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
getsockopt$sock_cred(r0, 0xffff, 0x1022, &(0x7f0000000280)={0x0, 0x0, <r1=>0x0}, &(0x7f00000002c0)=0xc)
setregid(0x0, r1)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x6c, &(0x7f0000000000)='\x00\x00\x00\x00', 0x4)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
__fstat50(r0, &(0x7f0000001200)={<r1=>0x0})
mknod(&(0x7f0000000000)='./file0\x00', 0x2080002007, r1)
r2 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
pwrite(r2, &(0x7f0000000080)="91", 0xfffffff, 0x0)


mmap(&(0x7f0000ffc000/0x3000)=nil, 0x3000, 0x7, 0x410, 0xffffffffffffffff, 0x0, 0x0)
mlock(&(0x7f0000ffa000/0x4000)=nil, 0x4000)


openat(0xffffffffffffffff, &(0x7f0000000040)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket$unix(0x1, 0x5, 0x0)
compat_43_ogetsockname(r1, &(0x7f00000000c0)=""/62, &(0x7f0000000100)=0x3e)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x8004747d, &(0x7f0000000140)={0x38})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$inet(0x2, 0x1, 0x0)
listen(r1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getpeername(0xffffffffffffffff, 0x0, 0x0)
compat_43_ommap(&(0x7f0000ffe000/0x2000)=nil, 0x2000, 0x0, 0x10, r0, 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='puffs\x00', &(0x7f00000000c0)='./file0\x00', 0x0, &(0x7f0000000200)="94", 0x1)


pwritev(0xffffffffffffffff, &(0x7f00000011c0)=[{&(0x7f00000001c0)="63f09207917da4d3952928f62c83df7d02d97c5c3d4647db26ba7592f141e7f0de4ce52b33b6708aefd2560431b91a73c872bd04cb3ac49faa02591f35835e48322ff14994c4237421002567f118c18402a93d874d77ca7ad18eb7b52aba72200a21e14858cafbc3ba60d4be8fd30c08433c3537188d7bc390edf9f4f2d67293b1e44d03dcf66014c714200e60d81ad8518e902fee42aa52e1ae12bc6691846d928668d80b7dccb93d8001c0686e65caa3e5457bc87d9727400fd65e27a88f7cf68b8e79b3d4ba07068177b96b7fd659be1790a0f7f465699b11283568d9955816d5124696913b68fb42ae34145eb5f5cdf43fbb964bc700edf404d53f9238633b0051993d572f54940a089f01cf7b7a813bdb94d19ab19b098efc34e7f94fa0c3b637f1731eb73c1e276d019d14aa4643c479d50c42178789d658c9db1470096e943c944e3fc2decaedc76f1e9a0b004807ebb0ce557de5078314b8eab783551a43a15cc4ab6426513fb6ade589383322fa045b8b990391c8e8a10360eb755f4ea332f6c9e480ff1974c2ce873a273ed088de09b3e8d7b9e5453ae14d1fe857d6e2d224ef048508b2cd559002d00df59da20a18820e9037ebf36256e753287bde35e037e408f33022ab50e5edf4a37406e92037e9561908392e5a8ce888ae21bb573d5653392c60622564e885774d59e0cd2c64c4452db32266e1f77d703abdd5b9b9d350415823292c80165a6f05270107caf89c61ed357e634a6811675c044d45d61fe2d8f6969398efbb521ac1e1dc2c87134db9c7f3e5947eac6f1045386d0dc500a16a2b6d558b0871c0f1b44c0e9157822db74615dac668009e02b66ef1db0e7c2e7329130e2f7a413bc934852e97a0dfa20c878ae0c6d2a7efcd79178007263712fc23f2631eedd2bd7b5e3dd20ba8fe318d832a027294f752e582596dc2d172ced65cf9ba0bad2cfa0ea4eb001469209cb97e56976a837f4b74f117167eeb63e37dd24b98c0b2f22af35c6cb8146cf9c45fcb831d1ba042bb6088545853ecfd2d496ae01c593cbb64026000af4b872c98fe8c0e0f74e2cb7a0ae4e0e048a537e191fe6b3424274965e33adda31b348251e7699933ff8ba7beeaebfbb290817dcc5668f3b7f51db5e8e8c01a1bdde5ce14474292a7bccd5894e773d8b7ee731ea58821c23694c9920b2405132dadc95b4c686a196259604804f954ef4c5f2a94da5d0cdb20e2666acf197d38a9cd6fe017df0e32a20b64613b8b6af83c668913fd5948a5b24ec76fbb9a13bddcf9dc32792983adc8bce7c9e62e85cdd20ea0f73ef46c4e18c9e1b2925de6cb6058739fc84cbb637ccfc397e37e63bf2fa2702ecc00dfcf954d18690883a813121ada76fde60e9a333ac412dda053aa10eb0a35de455fe5f8dfe9277ce29e59dc365d9fe505025026573b8ada4d5a37e2826f76301d073fdb6405e49b88ff8182eb61274572c9ea5e967a0e9d8e5f18138452198b44188b4483e5799c615ef2e5d3c76ad4e65ea1221282053d97097b725933390840a8a4b80ba453cfe14053a5e3533864d23b4429178b470345e2dbcce681199ca58c9445e270bbb7092156f71758c7f5c7928a3f6a4b78d12b1dca7d80ec52b90e75a2cb92c6b67cfd6c4624c47ad6cdc54d2e3930fa377f00d281caa1fa7e9da597d838d12ea8ba9b7a935f2990cdae8409461580ca7fb548cbd8a66ed5a112f15fca93f611ff6117049256cf81133b538ac09668f40736829b281a3951ec7221d972b9200c818e3cdd25b5b8e93cc3617061578cd73b0dff3b0c1bada92b7642febd5bd38335987e385b4b4ca20b2faf749ea95ee450598c9f16006d4e44a417bd62aa1088a502f9fb182d807e177957c836a6e5c5a0da24b6b646d1d1f5b33ec67afa5ecde6089e97b9c6f985cc38944eb7199ad61f5db4c2219c4a1a34c9633e6f685061f12a3d2386f70069d201063fbf93d1892ea5a305d4e67894241d5ebb4a805c203460924639a7634e0099fba11d5e99ea5b9eefd5233597e31f5fee8783c203c3055c8513ff2e93fc47e707bef932991932a20e8bc7f28abe6b2bba5f2a297bea9999328074a5164fed80212302d5f8584c8acb0f41f9e0e37f6d84f83d0e5ed7afa1493f7722f76ccb16f6cea12ea1e9f3af1309af3356af27302faf882b383f6999f4ca9c658e4d717732a57a8c1f6a66b4fc3d68fdbc7a6f0c0f7f0aebc1c7edd401a0b231dae34b2ab3b2f19c6e23654555a30b0338b65c5cd83cc116900d4283787d93eda371b6fab68fecd136e8a73462d67a3b3670b5ea3fd3291b1e7f72a75d35440cfd725fa7934d9c06e35ffe7f8a55370b491af41619918442784a26900495eaa7ade2680b7b00dd5c02615ecc690db892db9c564141a0592903ecba3ddf437a0688cf47f29b33b09b4d4ac64b3f61e0e6afad99ec739b04dca255d13bc36482de4a51b433be459e9575f4bcc5b6fcfdf4916809fbf6c3ee949caed449f611340573b6b044bbb2326d099557b074ca0a181c750971adce07735c4aa38f2aa669d9c4245eab3975c409f5b8f4b264cc710b3b2f6062d7fc39719d1ecb7eb98e94377fcfb6c297cf1572dfc9fd6f6f2abbeb82c6566d03266c11a9f0b2a2237a59c4ec7479bdcd3a934eedcc1a41bf85984196cfe7f8f873ebc0546c81256dbe21c6de42a0655edd7d87ff6fd8ff477336b15a42177765f5f8ad8e8861f87aa283b78961934bdf5037690819244a47badcbb78a396a7d1f6fb3c29e96afd6423d4b218a7d4ea8ff9baf4ff22496eff00e0c520bd99aa300b9b01fc3e75036ec558e04f81ae9f2b322fd665d1fa7441984bfb736f6e12ee08c5fab5a9544e0bf0a460429201059d1322877ff74cc41f1a46cce993159fcc3f7536310ef2ddc374f89e15635c112fa701a18d8fecb21af199f3972f4f190b649e40f38f512732d971875a28b08186dad232c9eaa8978efa88741bff3d3abbd4dd734a8f6b46afe64cb2584edc4c3ba5550dfa9c9fc5acd3aeb07414a4a96d5665cc8d56650f0c5bea278e08471a9e73ff0a2abb207d63d5bcde414e4fafe469c93e4fec43da9ff9a41095a66dcb2a569166d0f4cdb69c86a6bb6de6be6b87a066c7764b9d9dc7daa87f2d2f9d4c36bec73125d01dccd3de5f0b23cd9214beb914d97372c9a7a4430cdd3780642cf8cd117f60fbbe0059a5d0a2fb4498c831b075a8208b61fe0a2b3ff9dedc7aea12cc64c85c1476be5cc4827ffd98d343a5f63c26743a342b089920ce9c3085b0d27cdac381f759313ea45ba84baf50d0ad3483a4f65ecbb0169dd0e4454c620e5eaaf91334af54082430cd03ef6424a644756e6ee09e339fa9c300fb2acc43f8edee42e609a13fdf11baff682861bf8a1b4319b2579e2cca6c15ac72d4d3c0de1e325f5b848208c9db7d662556ee6b1f16d8e59d28f639d90cf70c3baef711e67ef9dba9074c5cb2045ae527d7eb1370f196673a578296f131f64b922fe7a48ba61bee5394918c5c23601c3e4f152896bf227e1c9da7f68a8af5a53d4ec332f1f13b4114484c72111adaf467204ff8ca22f07a9f07c1f439a2dc5d5a038e8f771868197ec245d6a8b9e6e7b89c74834ce00d098db2c611dae131769b27d283633c60ea3c48bea8342ce85f418b36dbb6bc6e7ed702f7fbc8f79785ab29fa4f7c03dd0ae975f1d30c4862994354b35e511624a5afbd89fe60f8c8911096f169c1ba67fdee662d6352b9ef8fe98e406b0f37622573f5fb7f03540a5914cc8572ef311de4a741445047bb060944a224eccb42bd7253ead5a93ba34d05c14e0670f2f9e90aae87d18ec8d4946c283aeb32ccba5c6d263873930c99500d3d75b3384f1646a462d7f64842c4dfa6bfa6d39012e10ce77caaf9a099b70e87aa47962fc709bf2ca9d7bd24595f47d3e68c9d73175abb7a90018b26110abeeb890e8d1a2220b3574b94d57890c04898ca41c1fa17187f3090c95fe39e83616614e839c1ba0ba73f2f5ecf93e4b8403d9968b9e6421c51395eaf12654d7738c697c2f063d483cdedded2c0f7546ec83980efc1c3802c79852b75d6efde37885b7820cf9d711e7bed02c333475a247b488fa969350eb8f78d71289b596becdabe2c1a14f13e8c1226a66a4f2dbda7afb2a5c31c552024b4c1db060cf567e48ceb4e9005324b763bd1a7033fdecd6c7150f42572adb5676858eb4dbb70378bd49097f47ee41fbbfd4377c87852a4c698b9369f353d549230f8fb0104c1b55770527a9367b1af4441676cfeb3c37b161d33de99b2b6469fee5d55a0119731516c8282de9d83f111366482dc5e3e2b6cf0e6c27522fe0bb988eeaece2ac6aa860bdbc7b2e54da03307d3c42156bb07fe9fa0b62910cf81dc8ac4dcd6545a1ed95d7e8b5c9baad87e5d6581aa21237f286ca0e6e6baba85c5fd6df5a0c6fa9a31cb233d0e7e39dbe846f748714b7fa9c41e41f7eda31c7d7f27099fc8d50d298c9a166f612dcd726fa333d630d1cf85544b1b4a62f6caadac0ae9289d661824552c5adb9f129858fd2e9e36ccd9183dc97ed03f01ef5649701beb8f30019fd695fd81e6e68e34671c6557bf19b10e2a271abe1fd85f8e1b82e0703872477ca6af1fad85bb12256f7177d73940514b7dbb61b5f49e7f32f36db6945242a8ef66c81bdf81e509a4af2faa9de5262ed96bc5be95bd6d11693e4e7e5c9cf56f6ad87b7a2e5c76ae7114525debc6a5166d8a892dcb829b42f72eac1b847dbec1f07315864ae74c59d7d94009349c9afbaeba65ad440a29d0b20593f9ee670dd1332e3e13f6ae7300dddb391c4901ca1857bae3c99664fe0896525b19d6c1a755e9030b2b332930ae5e2825be57676f5cb9c66ac9337f294870eac3a801e3e9951f47be1ae3dd3bc5ca4d7b9ae8bce895ba86ec8ead499d6293c5d2825a130dbcb750c624f6f00f9f6175c45d1daec3785cc351de8d2b501b9dec5fa9d77f5a721dfa04e547d833c99e1a2da887db87a004235661c471a1113249243352dd4635b93009a6af46f9d1cba82f3e5e979120755bd7191004f492bf0c188895eba0976665405655893b5131253b91f624154814317134b0ce91b40204454a59ecca66cbc5fbdca75ee94cf13c7014878331813d07914864e49a039f77c79e5ed5ae312c0027e82f23a7adc2c43bf87bcac54cfca01f24d56cd0710c976427cb172c91c3dec5be91484a4ba9c95a2edef718635ce8a3abe15d39002f3bb2ea9b2dbbca7a9324e954901ad421ee47292aac050d1547b45e01c998f54a8f4be88f8983e32fec520fccbbc8d63e7b0bc9cbcc5e8d657c6e071ce3932b1de19ccefde684d1a1e85e684dee581d8d0c8f928007512af871461f30a9cffd774a2517d02541a0c45a50b03d64843a6a2f78c26a1eb75e7383aed9e6b136501a3cc41ace8e9930f70b5959fff69328fadb716f47af9cafc18d7ffe752de9ca6db306acca696c91efc3511eba6ce0b723747c1c84993e16e38726b1d046de1c405ef20466ca2b21e8062370ca49ee3fb3cf2165838504889df2bebca59d3511eb10429894713a2f24781a4e804b1a42865ef44fee702e991b4d0b91412c8a427a56e4deac9cb908d7716eee932eb611d97044e78635296a84e04069f47e60cdd9b5e73690387e383c535aaa0c74ca76d183ac94e959a1da9d12b6e873cac45e299995e18b7321489170a8a2304d91f253439ff4356c423d970cb192e77737c80751b49076699d7072b78b2afd6b", 0xff1}, {0x0}, {&(0x7f0000001200)="e9e684", 0x3}], 0x3, 0x0)
posix_spawn(0x0, 0x0, &(0x7f00000005c0)={0x0, 0xcd, &(0x7f0000000580)=@close}, 0x0, 0x0, 0x0)


r0 = socket(0x10, 0x2, 0x0)
ioctl$OFIOGETBMAP(r0, 0xc004667a, &(0x7f0000000080))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_futimes(r0, &(0x7f0000000140))


setreuid(0x0, 0xee00)
open$dir(&(0x7f0000000000)='./file0\x00', 0x110602, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
connect$unix(0xffffffffffffffff, &(0x7f0000000140)=ANY=[@ANYBLOB="0100e91f7189591e92"], 0x6e)
ioctl$FIOSETOWN(r0, 0xc0306370, &(0x7f0000000140))


mknod(&(0x7f0000000200)='./file0\x00', 0x2000, 0x0)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
fcntl$getown(r0, 0x5)


mkdirat(0xffffffffffffff9c, &(0x7f0000000240)='./file0\x00', 0x0)
faccessat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x1, 0x0)


compat_50_____semctl13$IPC_SET(0x0, 0x0, 0x1, &(0x7f00000002c0)=@val=0x620000)


socketpair(0x1d, 0x3, 0x0, &(0x7f0000000140))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000140)={<r0=>0xffffffffffffffff})
writev(r0, &(0x7f0000000800)=[{&(0x7f0000000300)="4b9022672e19f8a9cba553a59c4ee6b831e9f8aa19bcae1c10ee3d434e192a576dd7032ca81f57b84679976e9734c459cb1af7c9878ed5c9f179ba09e3f64043ab8da4954347bc1762eda854ccb1b6696f46d0810ad2e6f95d27f97bd54b519395e6ac66a04759eef2b9f2a4aed1c4928def647cf63df2561b6b4b643850511daeb32a4ffce7afd0f88ff87f65a34d22a2", 0x91}], 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setregid(0x0, 0xffffffffffffffff)


r0 = socket(0x2, 0x2, 0x0)
connect$unix(r0, &(0x7f0000000080)=ANY=[@ANYBLOB="b7022e"], 0x10)
sendmsg$unix(r0, &(0x7f0000001480)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000000)=ANY=[@ANYBLOB="1400000000000049"], 0x1c8}, 0x0)


modctl$MODCTL_UNLOAD(0x1, &(0x7f00000000c0)="1f17e3b03b798bb644b6fb5d243e5dd59468a80196a27ca9df7bf5452ccd3531")


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
mknod(&(0x7f00000012c0)='./file0\x00', 0x0, 0x0)


pipe(&(0x7f0000000000)={<r0=>0xffffffffffffffff})
r1 = getpid()
fcntl$setown(r0, 0x6, r1)
ioctl$FIOGETOWN(r0, 0x4004667b, &(0x7f00000000c0))


bind$inet(0xffffffffffffffff, &(0x7f0000000000)={0x2, 0x2}, 0xc)
__getlogin(0x0, 0x0)


shmctl$IPC_SET(0xffffffffffffffff, 0x1, 0xffffffffffffffff)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='kernfs\x00', &(0x7f00000000c0)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_30_getdents(r0, &(0x7f0000000240)=""/4096, 0xffff)
compat_43_ogetdirentries(r0, 0x0, 0x0, &(0x7f0000000100))


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
poll(&(0x7f00000000c0)=[{r0}], 0x1, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x16a3209f7ecaf1a2, 0x0)
compat_43_lstat43(&(0x7f0000000140)='./file0\x00', 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x1, r0, &(0x7f0000000080), 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000200)='ext2fs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000000)='5', 0x1)


r0 = shmget$private(0x0, 0x1000, 0x0, &(0x7f0000fff000/0x1000)=nil)
shmctl$IPC_SET(r0, 0x1, &(0x7f0000000140)={{0x2, 0x0, 0xee01, 0x0, 0xee00, 0x120}, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffffffffffffffff})
shmctl$IPC_STAT(r0, 0x2, &(0x7f0000000280)=""/201)


r0 = socket(0x18, 0x3, 0x0)
getsockopt(r0, 0x200000029, 0xa, 0x0, 0x0)


pipe2(0x0, 0x0)
pipe2(0x0, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000240)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x7, 0x1, 0x1, 0x5, "", {{{0x9, 0x5, 0x1, 0x2, 0x10, 0x9}}}}}]}}]}}, 0x0)


pwritev(0xffffffffffffffff, &(0x7f0000000040)=[{&(0x7f00000000c0)="4412", 0x2}], 0x1, 0x0)
connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="62020287e0000001"], 0x1)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x2, 0x2, 0x0)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f00000000c0)="071400000f370025ff149b33ff0f000000000000", 0x14)
sendto$unix(r0, 0x0, 0x0, 0x0, 0x0, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='kernfs\x00', &(0x7f00000000c0)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_30_getdents(r0, 0x0, 0xffff)
unmount(&(0x7f0000000100)='./file0\x00', 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt(r0, 0x0, 0x6, &(0x7f0000000000)="9d0e6a00", 0x4)


open(&(0x7f00000001c0)='./file0\x00', 0x300, 0x0)
r0 = open(&(0x7f00000001c0)='./file0\x00', 0x20, 0x0)
flock(r0, 0x1)


mlock(&(0x7f0000ffa000/0x3000)=nil, 0x3000)
munmap(&(0x7f000050f000/0x2000)=nil, 0x2000)
munlock(&(0x7f0000ffa000/0x2000)=nil, 0x2000)


compat_43_osethostname(&(0x7f0000000140)="e106c932adc735f45a732a1a9052f92c089a55475cbc5f3c8b1b455cbfbf682bfcde1b1f4e568031a14d669a89575fa19aabbe184433d0ad8bf408a00c078749d3989eae2afbec135d1a3264e815b713441339e3b3aa9a5eaf57a419a6e8bc", 0x5f)


r0 = compat_30_socket(0x12, 0x2, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x1004, &(0x7f0000000000)=0x80, 0x4)


syz_emit_ethernet(0x4f, &(0x7f0000000140))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
unmount(&(0x7f0000000040)='./file0\x00', 0x0)
compat_40_mount(&(0x7f00000000c0)='coda\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__posix_fadvise50(r0, 0x0, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x3b00)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
poll(&(0x7f0000000040)=[{r0}, {r0, 0x40}], 0x2, 0x3)


mprotect(&(0x7f0000001000/0x4000)=nil, 0x4000, 0x0)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000140)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
open$dir(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
sendmsg$unix(r1, &(0x7f0000000400)={0x0, 0x0, 0x0, 0x0, &(0x7f00000003c0)=ANY=[@ANYBLOB="28000000ffff000001"], 0x28}, 0x0)
ktrace(&(0x7f0000000000)='./file0\x00', 0x4, 0x100, 0x0)
recvmsg(r0, &(0x7f0000000300)={0x0, 0x0, 0x0, 0x0, &(0x7f0000001440)=""/225, 0xe1}, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x40046485, &(0x7f0000000040))


setreuid(0xee00, 0x0)
mknod(&(0x7f0000000080)='./bus\x00', 0x3080002000, 0x1600)
open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x205e, 0x200)
r0 = open(&(0x7f0000000480)='./file0\x00', 0x80000000000206, 0x0)
writev(r0, &(0x7f0000000b80)=[{0x0}], 0x1)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0xc6a8)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r2 = dup3(r0, r0, 0x0)
ioctl$FIOSETOWN(r2, 0x40184e69, 0x0)


_lwp_unpark_all(&(0x7f0000000000), 0x0, 0x0)


mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt(r0, 0x0, 0x14, &(0x7f0000000000)="9d0e6a00", 0x4)


mknod(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
utimensat(0xffffffffffffff9c, &(0x7f0000000380)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
getpriority(0x1, 0x0)


semctl$IPC_SET(0x0, 0x0, 0x1, &(0x7f00000001c0)={{0x9, 0x0, 0x0, 0xffffffffffffffff}})
mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getsockname$unix(r0, &(0x7f0000000340)=@file={0x0, ""/108}, &(0x7f00000003c0)=0x6e)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000480)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
compat_43_osend(r0, 0x0, 0x0, 0x0)


pipe(&(0x7f00000003c0)={<r0=>0xffffffffffffffff})
fcntl$lock(r0, 0xf4aec023a1731223, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__getlogin(0x0, 0x0)


r0 = socket$unix(0x1, 0x5, 0x0)
fktrace(r0, 0x4, 0x4, 0x0)
__posix_fchown(0x0, 0xffffffffffffffff, 0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
getsockopt$inet_opts(r0, 0x0, 0x17, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000000100)=[{0x0}, {&(0x7f0000000080)=""/82, 0x52}], 0x10000000000001e2, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setrlimit(0x3, &(0x7f0000000340))


r0 = socket(0x1f, 0x5, 0x2)
recvmmsg(r0, &(0x7f0000000640)={0x0}, 0x10, 0x9, 0x0)


r0 = socket(0x18, 0x1, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fcntl$dupfd(r0, 0xa, 0xffffffffffffffff)
pipe(0x0)


r0 = socket$unix(0x1, 0x2, 0x0)
fcntl$setown(r0, 0x2, 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
__mount50(&(0x7f00000000c0)='union\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000080)="87", 0x1)


pipe2(&(0x7f0000000780)={<r0=>0xffffffffffffffff}, 0x0)
r1 = getppid()
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r3=>0xffffffffffffffff})
r4 = getpid()
fktrace(r3, 0x0, 0x4, r4)
fcntl$setown(r0, 0x6, r1)


r0 = shmget$private(0x0, 0x1000, 0x0, &(0x7f0000ffd000/0x1000)=nil)
r1 = shmat(r0, &(0x7f0000ff9000/0x4000)=nil, 0x0)
shmdt(r1)


r0 = socket(0x18, 0x3, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x100a, 0x0, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f00000011c0)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = dup2(r0, r0)
symlinkat(&(0x7f0000000140)='./file0\x00', r1, &(0x7f0000000180)='./file0\x00')


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x0)
pathconf(&(0x7f0000000180)='./file0\x00', 0x6)


r0 = socket$unix(0x1, 0x2, 0x0)
sendmsg$unix(r0, &(0x7f0000001640)={&(0x7f0000000000)=@abs={0x0, 0x0, 0x3}, 0x8, &(0x7f0000001440)=[{&(0x7f0000000080)="e3253c0aa1b04fd7a4e9", 0xa}, {&(0x7f00000000c0)="db0d046485e0f412f22f28ab60dacb87d1cdb911388be788c81c7ec9d94f24b3c33360cc7fe3f5aa53f2d7653f91c25384d54b1163b9a7135926ac3df4fef6ad641e3160e0218c907be99ff709f41d4b4c157ded5d2979a44de255117987521688795758dd1ef661c744f9d0aa0fd5ad5a61847fc36430fc352b686163144bc5f0bf4fc6f2288676b0fd8e455327ec914c35a843aa37550f41120f936df7834a", 0xa0}, {&(0x7f0000000180)="119a30b82e57fc59a3b65da10111d323be4defff6f5ab08e20b2a180750b03c3a59783cebbe717d55e35d12116c4173b7327ac2f174b2961f39ed7906f43fb1f3bcc7617ecaffe4b07df7b1778bf625e0c5a2cabf25b7c384225efd4d1d9e8f08c14dbf588544bd14ae65479dd54a43fb4bf6400947b4087aad450ddb7a0d5593a7c4b37483b355a2c57e3418dc77a6a656a7a7df241e7b816986c88dd6a5461e0bdce6a45c2a20c8cba996d2a0a57dd94ef1e1e9d417e74af34ac88a89e0614cd80cd56e9143decb91564bab9766deac5facaf272ce30b70903745d1caabd", 0xdf}, {&(0x7f0000000280)="1ee9b86ddcd2b9b99daf649a78aabedb91ec0993040950c978812f8a4a33f7d06dc6604099e1306207c33a2bb71d8d70efd7b325a4742eef3af29052cb7aaff4e9fde1b2c9854d7805eab04cce85f345388f70ec72b40a4eaa55d205edf1ccbb5f6ffc955831efc55aa0f4f897de08043a210536a638472ff285b780228e9c625cb0c0feb3c329b9e4f8591963ebc7928557e3eee3149ea812843fcf98b196", 0x9f}, {&(0x7f0000000340)="b1a1fdd5cb1beba90be00d2fafc146d7f1c299f0229f52a17aa90dd830abc9135f42eea32c6a4fc847b77438871cc7701b959a46f520583948380fbc6274e43010cc6929d48aff33597658174c5967551e069207c60939187eedd73a6aa9ef744565a596f79ac5c7e2f1f5cfc0c0463142df191ac7b8b04b1382cbd88adf672168f233fb1245e6c3ce45083790214a11134295aa5ab8b41e4cb3830ffbac66509ff71e751e3ec4e84cf1d74b3e1325e5320c1b2be861c3cbda709c95781ccbae3f6d7ab807323a3ba108ae309c03106eb10fa5ec9ee648a40a0ed8a97938bc6979028f4aefa5d1f3978ba3e0db8c62cc0b708cd375cb395a035cdc09824a60413efac3414970ca2849570b0bd84247a415da02f173a3ac5aed06ff48fc25d83286f00ea0d09db6ca60dd081a335d57bb6fdb7a31c81b3f21a130a7004c62124fc56667e52d499a368dec78c37cc77579ab515314fe2567b7d32ed092939791b0915460bf5faa4bc7e0f8573ca4e526e1b52d1ebc2e6323a3313753d68c8f2a8cf4e5eba6d56025db566b6060c8a8fd0463e666e330becd8f61d459d5b85e773425512f4fbfbf726e94b99d4809c59ed91fd0af2878939be0920a4fdeeb5cfcdd520fdfd4e486112865521a5a700909d5424376cb8280b46ac6eb3ba50009ca71ae4bf0d54ea5aa83d0c22c661968106f8e1d76be392a976146630553d4c58a437ed5f5da4c7bb5e9a6d2e2d04fbad9d12db1afce90f4c59d2e3749d8c738b244396d0bf1b7ce43a05384e5084d3747de692d2955d0c2226e4b826ba57d8f0d17b9229fa4eaad14983a051b6f93d2baf53ea97a068d40ced905259e0edb0991261aa9261f30eace7397e15e7f53da6e82722a45825eb14bc670eed188e86ef88524a4d04a267e2c6a5ba56fdaf2530046218e2be0c62d996e136528c3d4acd80ed1cc103b6d2ff22606b9eca62f1fb3963ab858f559f9d8f15b6bf407f1f939e33ec466a6c33e5a7dd287d9109f318e3fbc6c29a68d94d225ab302ba43ad82b71c7753f8910684382b0ce23ccc74b1bddb344f64b8d02c3f63654d127127fb9b067ad9295fcfd12e4531dae4366f9aabd509900de3c1046905a42675aed1b69193ea76ab601cb2f67a5dc67e5dbd7a76453de59a9d800734eaf2b2d5e400aa0c0e1f97718faa1b6baf9ee624732df5c188746f6156f230717f5170f9eb9f313f8ef01ebc476586818301131775122ada6011c9b9ac3dc45bb79972a5aa52c0df57281ca1520147caba3790804e53f2968619fd59965659fce9b03a6bb3d05a3c5ac8873592a6063275b8b29a48a4a25845f25e10f60d5e7c555d64c803dc0cf45eb9b77b79fa4c0b8e9b2a899ec18dbef105c80950ae6e18bb4255d659fc9be8224a812fe07b37029910b90f34d38dd8e572e596f48ddf8a55709a4db2a3e7f402ee394b60b007158b0d39ddbedb058d88017d60b2898dea4efa2fc99a062d7fdcc2f6ee5644d38abe3749dd864f6f0ab5db572ec36b536f653e18268eea54956af44b4a74f83834411dc9146c7517a2ede57b7b0439202fc1dff42370cbc22daed7d204290250daeead8e85f5993c441aa9ed07bb83cf6974f7ae9f9b00366e7140d0e5268067d98405e9f7791ec0b77239bd69bd84382c72e0f1e8594f357e38a0043bf4679fedad4b7723a8ef7fae16e278e0199f93a694df99905ae1b71402e864511f92f487fa7b356a05bac42426f301ed569ea6a31a6ba3b9007e59eff93e1a6a71337513c31230050b9a010df115088d92ca021838eb9ce65a02547af5b6769c69f3a03fe5f44afd3fbded5baf987d8edafe3f2ccc4a4db8fd52694979ce6be99498316b12cc8b39bcb2ea1946af6d6fc7a3672e4dc0e19d8b18f68b7219c3e82557a38e6c577ad49b30983fc9ba58f42aa1379b998e304a3c68001e1dc71edb4ae5576561acb42a831b7797274ee5b5c52d7b667f15132d67ac2ad03e81455d147e8a49b73b65fc5d05767b9c66eb6e752dc552dc46ebde08132adec709ab4d13c8efd7cb691575065f00bdf2a8115b3e34d91f4f2e4552c2da2f0eae359eb055a01116b7b7e70ea275bc2725cc92008ed3eeac216", 0x5d9}], 0x5}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r2 = compat_30_socket(0x1f, 0x5, 0x0)
dup3(r1, r2, 0x0)


r0 = socket$unix(0x1, 0x1, 0x0)
r1 = socket$unix(0x1, 0x1, 0x0)
bind$unix(r1, &(0x7f0000003000)=@file={0x1, '\xe9\x1fq\x89Y\x1e\x923aK\x00'}, 0xc)
listen(r1, 0x0)
connect$unix(r0, &(0x7f0000000280)=@file={0x1, '\xe9\x1fq\x89Y\x1e\x923aK\x00'}, 0x6e)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
compat_43_oaccept(r1, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000200)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000002740)={0x0, 0x0, &(0x7f00000023c0)=[{&(0x7f00000013c0)="e02fc339be4df2cb7b06c761173f1b37db58bcfeba7bc4552b8bb1d21f2d6e7e69a6a971baccd8b204560c96acd94c11b28fee6bcb3f9838bc6aa0e2cb8b9404d0156296623ae1a858194038d7163ea5f859fe096e4d186d05155b8b93a6e9e05fedc8de15f535c19d61cf216b86ac21c12db9cd7080f8b0fac29c94bf116ba36a6f7bb86e8b9b887a5f965a58813a983e2e190be98c1310b166f6393eb4996f8013ac4706579537179acf7b2e0826118ee9a0967c2da0938af65c5a5bbf94a5837489f36e0bc379ee736044fa6f3daadad09f3591b42d71472f2f5686589152d3c4d8f3f486f4d3ec6c3106d415d593ca567351de8b6f6cd68e20c331bd77e9921448acc6f51abb8e783a9398e54b89403d5f6a1913cda71b771fb6f8a2ad605bfeee1eadb7a5563628fce3c911bcfce4dcdd42b9a9b20fd5546969fd2f399bd7e43f08ab94e470cfac8fb8bf463bd1418f06448a2967fe8ad6197cd4d112700951cdc6f19e24e0bf2770c2d52fd5566e4b5faee4b74108a6633f7ddd7960088ffab6269a5c0c2a597a50075a2c4f22a972f05fca959c0fa62401c48ae4342cede1913a76cabb44e0647481cd545ee053a1c1ed9906e48392e5e89aa5d53447b83a797a894248a1e05f4718f293ff7872c154969543ccf9a7ff1234c28f9bd12063da75e8295152ab6ef8e99154775dbe7fb8e29c8e960efa7e5a9e2818c155df672e6fe53cb0654110414f4677d2a868c8ccd6059c16a831e2df4e99f473c4580ae6bc48b7c0d03d32a475c530716818c627e1d2d237568bff4ce7a333a20f002a8f3347b666ae0b4d7c249655d68d93be642328ecbc848b90425812b4a61c0005c9c4927cf5e304513730bcef09eec9c64f8842b6f3506c70f98ff044ad3ffc17bd197bbd6a465fb2faeb572a8b12987e6682663294c994fa5c1b92ef301f370c08539aac3ef528e65cd9dbc9892d86eceaff888f37c05a3bf00827b634bdcd1c7fd6e2cfc781e46207532151f7f6747207e9fb82f243637cf406295d366ae7ee79c22130cc47593d596ab5fd27daff396fc301fb3903b8a2a7369e2ac3f7716bc6bf3a32c6aea9c9f989a74e2a4f3f80feaa9d85d03b5c9e6b9d0fda1c561cf2bb3ae1c916051b9227dabed79e4e95f83f7871752808ff6d882ace5682452cf0ba1b68c7a524c2bc0521fd78c66d65737b6ebeb7233c3c2b11aee10e3f69a2dc6b2b1842a84afcc624db3e5d3de65a6070e2d75f23115b24082fd3695065ecb7e4fd3fe70c2a796853c2a0ac40911bda5a77c32ecc409ac91bf71b90c563f7d78711164ba3647523b31b62cec072af3d51927c01aac9cb426231abed3a927f54b2dc9dad6ff6e8cafec537fd0da2092dca648e24d86ee423dfe4cd815a87f12b98a24508ac4cbeb3834f327880e0dcd74e4457be40726b5a02e44eea2915bb9074e58ebd4019672d23748bfc0b1699325860c51eb5589391639e5edb95e28ab8fb87ece627b09aa3b65d300e538f177189517eaec2be042dc6e96a5aa8bfb16f6e27a1c2c8d476f8561a7e46b441c4e5d356a9fed6a0a21d5b2e39becea6d494e361aa6f1d644d7e7b5825b4b6a8316eb271c82febd92ddc15bdd0b22296f1411db899982f6143ff9ace5f44d2682b1399b7d0763d9729c550f6f542bb40bbab0da12d29dcd78d16edb24df11e59215cfbaffdd2cf2ff4b87cc5cf09ebc1777802e30f72e5ae1a02721e20b4db0358cedcf59291fddb67e159ee09e43c0abac7474621f86a3720de0f370e5893f30557adfed4dad141461acf163e6398b7ece52be8f096b36858c834ddadd818b001a590fd74c57d9a5558354a2bc0bad83318083a2f8d9f99bb9ae584f055d10a5492a1f609df2a8a2c8faadfbb66793a7e9133ab814af51cfb5e987ce69134a8f921336f23dbc680771e2ff444423e25ac8ba50d886fad80699ba6027c640e0d27d23e81e67aaa7902b81b8a33420bd39beca9943ade34076b54b7d3956e00b78f30b98e2ec2a12fcfcc2a6824e289fe59e52e017f769a00542fe64d6fbcb021b43719cc59506619039fc5cfc18c5bc0358d8714f2a61dcbf51894bb220df04621e75bd943338340d3233245887119c8bb3dcc1bdd9fbb7644baa4fa9a4a7658e5143e0a75ee3804003cd62ac8d4bb8bca8c69c449783f4765f93631e5fed548f53e78a7ea9a5f7eaabbf9dfcbe76b9704b4812eabe4bc6565531d6c473863c306fe7d965f07fb84bc9b7fb3064bddc336ed34e7feb62ce83eb0dedd583b475512ccd58a69c1bc6077fe42e66710a7ef396e06c12d63f42dad9a93baf15059d75704eb6a2c3dc84012cd3905de124698c6225a2b670e2b451db8154b9986d22244243e2743fd9c3934323e212d3e2eb227fd585b50f697182726e6a9afd8234ee42c3cd80055163d1eb05d200c3e6284b59898c43b76b4b3977ef7ea8477348c2fe8fe4f43dd7f4aa80ad6646d45a4e1873ea0f5727394c7406df1928a5ada45db54f59c921b37e53c94b19a3d568b3203f2b66c3351c5c61beeef231be62edde7c63c669719a73300e1be053bd0bf4dc1f721871ad0497a811dda1a94a7ebc366e0aa14bfa8bf2347bf41275084421d8b4d449f8fc93f1c62ccd28e0b993ee21ae3eb08c2d0f773030ec71c40f1f3343b3db5c6201c2a1529abdcf0bf7fc0a995ca6b7b87abfe4a86e2e5be7aab288cf0603f1bf8cbd684dbf7522a5ab01d9b2a6718640b919957d4c898d1b212c150b477cfbf7138199897cdae25b362e727d35b6d7b3617770e36571b2becedfecfd9893ac448add67fd8c5b3da841a4c8957f4197f3fe6f0b66cb2092a2c8f4bd079d97d382d78e4eb9f5fde6cd36efd331140ccceb814a264936ad616741359332b0a5574122d1c8c1522e12056ed773f691dbf0fbc9a6bc965e0bdb0b076a7636a6eda2e0db4e59d4c949de1132b16e0351cd165ba9d101c9db463628bae6fa60132fc87b37b1f04deecb9dc71c7dc2732c957b98dc7ce405839f69a62c724096f83a8358101175c6d3940ac1a1639e872b3e5a0607f9ad5d1228aa7d82a17c55e17f816604411639da44f2df7dd6f2e8a6df79e23f450931f5aaa5875377353885f4f171a387ad708b2a5f10cf968d31d890238812e999096dab416365a522095da1d9adf2a4524a9ca110c7b45c455d4c5f4660211eee84d12ca92eccab312abb220818d6a2b0e6f82fea2b0feb290f4b700c035cab1191dde800caa012025306a9b1dc54f2cca8a7a1485ef98613d6067e355ca286a3a3c7342ee5fb9722cd801e1fb86714d03475dcb4128e503d74c69be1e0b8cff8da823f90b26b70c9e1fd51244d84f6551898f46edbc66f34bfbf6ce8fa2ea615c988f87f733fdf9df1ae8358b0b538dea2f485d128190a50ccfacea3f4e2835173e2c3e7b7dda63fae0943aff70a9f91cf3ab3d1e4fc4ee7da6082bb8bf1f685822dfcba0b139fbfa7ccf0c86686310b360f2e8b6f2b31a8d37746811e712ab7de5cfcee6e2ff2ea311", 0x9c1}], 0x1, &(0x7f0000002700)=[@cred={0x20, 0xffff, 0x2, 0xffffffffffffffff, 0x0, 0xffffffffffffffff}, @cred={0x20, 0xffff, 0x2, 0xffffffffffffffff}], 0x40}, 0x0)


r0 = socket$inet(0x2, 0x3, 0x2)
connect$inet(r0, &(0x7f00000002c0), 0x10)
sendto$inet(r0, 0x0, 0x0, 0x0, 0x0, 0x14)
recvmmsg(r0, &(0x7f0000003c80), 0x38e, 0x62, 0x0)


setreuid(0x0, 0xee00)
socketpair(0xa, 0x3, 0xef, &(0x7f0000000000))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
compat_43_osethostname(&(0x7f0000000b00)='\t', 0x1)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x14, &(0x7f0000000040)='\x00\x00\x00\x00', 0x4)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000140)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r2 = socket$unix(0x1, 0x1, 0x0)
dup2(r2, r0)
__posix_fadvise50(r0, 0x0, 0x0, 0x0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2080002002, 0x40004200000028ac)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f0000000000))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg(r0, &(0x7f0000000540)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000380)=[{0x10}, {0x10}], 0x20}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_lutimes(0x0, &(0x7f0000000100))


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
truncate(&(0x7f0000000100)='./file0\x00', 0x0, 0x100000000)
r1 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
pread(r1, &(0x7f0000000140)='6', 0x1, 0x0)
pwritev(r0, &(0x7f0000000080)=[{&(0x7f00000006c0), 0xf0f75}], 0x1, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x3d, &(0x7f0000000040)='\x00\x00\x00\x00', 0x4)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000005c0)={<r0=>0xffffffffffffffff})
sendmmsg(r0, &(0x7f0000000480)={&(0x7f0000000440)={&(0x7f0000000140), 0xe, &(0x7f00000003c0)=[{&(0x7f0000000600)="b3f07ba69647043148a54254ac7c4fd15a", 0x11}], 0x1, 0x0}, 0xd8}, 0x10, 0x0, 0x0)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r1, &(0x7f0000000700)=[{&(0x7f0000000100)='u', 0x1}], 0x1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r3 = socket(0x2, 0x2, 0x0)
connect$unix(r3, &(0x7f00000003c0)=ANY=[@ANYBLOB="82022e2fac"], 0x10)
r4 = fcntl$dupfd(r3, 0x0, r3)
write(r4, &(0x7f0000000a00)="88068b2d1e6530cec391269e53ceb933e9c43b31654b0587aff20f86979fd33fe54cf900740a659d2ad5267b808996d25c0a7ef886d9eb649e04697b16c4ffe89ea4af0289c96606fb34a2c6c3db3c6c5f4e176ff4bfd5a67a0c6cbe1080144c72c52e8e146c0822c834476fd2cb5052762674ba34803ae36a41fd30fdb4413cdb442be4728458e913afa845c46d41ee205254cbdc4221007ed95f10b1aef97e8a8defe2e5c7c90734a0a8f596af68572209aac9bee5ef977a7a7f7009a2651ac02b9fe16a50924d2a020f1229ac420ad2c2ca52ee81b90336918079717a5ea43fc901e6abc74391d3b596300ee019e38f91b2714d4ddc4ba64f08dfe26e2d88005d544409ffd5e2dff73aa1cf9289c0dfc7dfa539c8f2f94ca158ed90ff00f13122eb86069d708a48483e26caef802ef2738f77f5b12bc76524de8f2c540d8f815fdf3318caa71cf49f2d3d31524ee714509193946562b2790e4db88e8e3039bef3a55a98e506816e0bf9d88a4c46bd633efc23047ce60048395e9b47dfce8feea7c569fc804a6826b7721db7c6bdba8cca207c2c3548370ae251b969cedc6523eedc18028a3c146cacfa9868c0578d7e3921b5f574a3380a374b0d09326208ae25ef724d2c8a4d1665ea2121b8e948be26ec380fcf469312d4170eb404c92f5235b328e2fb67d75d89e9c892d8100a6514b0129822f78ff06fc9db979f59f15ca6353a71904b9fb22f87134fa19cba790fe60043ad8d76fa5ca0146aad3998b19eae6678ffe04ef6ba3eae8d5f834c42d54f0acc4e2f849c6033c1bcaec374bb047879ccce981c39f8c1ccd624e9f2190e413a51efc7c2f9d8c5ad1d1474d7ed9f7db0f8c5332678e6aca08748c119fef9250ba59e6db6747d9191775e8b2b095c3e702e691a10d66eb4df24ddb22a996da04c42e7e5e96d903388e1f7ae5c5e3bcda945923c436cada91494a53b3bb43b5ed1d8d831b621d96b0b187c0dc953acc73521fd976c6c52a61ed85984d92e23df24f2880ff58e1495e7d563f4c48ce90a5ebd67b95942ba19dc7cf5f3e981a600906a04fce6b0bc999b768c579b9855f696d962292b2f99645f8bfff67bfa066ff4206128c61932a091b5b5bb5ce618f05050d5fab8790a6fc9c0adb70c5a7bf09ce7310217e07633b028ec03c21644bcfad20fe00239329041a1d59252056e75c5c6a81a3f6b", 0x359)


compat_43_ocreat(&(0x7f0000000600)='./file0\x00', 0x0)
unlink(&(0x7f0000000a00)='./file0\x00')


compat_50_mknod(&(0x7f0000000580)='./file0\x00', 0x2000, 0x0)


r0 = socket(0x10, 0x2, 0x0)
sendmsg$unix(r0, &(0x7f0000000580)={&(0x7f0000000000)=@file={0x0, './file0\x00'}, 0xa, 0x0}, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104302, &(0x7f0000000100)=0x80000001)


r0 = semget$private(0x0, 0x6, 0x0)
semop(r0, &(0x7f00000002c0)=[{0x0, 0x638, 0x1800}], 0x1)
semop(r0, &(0x7f0000000040)=[{0x4, 0x1f}, {0x2, 0xb8, 0x1000}, {0x2, 0x401, 0x1000}, {0x0, 0x5, 0x800}, {0x1, 0x3, 0x1c00}, {0x2, 0x9}, {0x1, 0xfff, 0x1000}], 0x7)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getsockopt$sock_timeval(r0, 0xffff, 0x100c, 0x0, 0x0)


r0 = open$dir(&(0x7f0000001240)='./file0\x00', 0x4000040000000242, 0x0)
lseek(r0, 0x0, 0x40fff, 0x0)
r1 = dup2(r0, r0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r2, 0x0, 0x0)
writev(r1, &(0x7f00000004c0)=[{&(0x7f0000000180)='\t', 0x1}], 0x1)
pread(r1, &(0x7f00000000c0)="bd", 0xffffff78, 0xa83)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3900)
open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)


mmap(&(0x7f0000ffc000/0x1000)=nil, 0x1000, 0x2000002, 0x4109831, 0xffffffffffffffff, 0x0, 0x0)


r0 = compat_30_socket(0x1f, 0x3, 0x0)
__fstat50(r0, &(0x7f00000002c0))


_lwp_kill(0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = _lwp_self()
_lwp_unpark(r2, 0x0)


compat_50_nanosleep(&(0x7f0000000000), 0x0)
compat_50_clock_getres(0x0, &(0x7f0000000300))


rasctl(0x0, 0x1000000000000, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x0)
compat_30___lstat13(&(0x7f0000000080)='./file0\x00', &(0x7f0000001140))


r0 = socket(0x2, 0x2, 0x0)
connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="82023e2f66"], 0x10)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
sendmsg$unix(r0, &(0x7f0000001480)={0x0, 0x2e, 0x0, 0x0, &(0x7f0000001440)=[@rights={0x14, 0x7}, @cred={0xaa0114ac}], 0x18}, 0x0)


mkdir(&(0x7f0000000000)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', 0x0)
mkdir(&(0x7f0000000200)='./file0\x00', 0x0)
mkdir(&(0x7f0000000080)='./file0/control\x00', 0x0)
rename(&(0x7f00000002c0)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', &(0x7f0000000340)='./file0/control/file1\x00')
open(&(0x7f0000000180)='./file0/control/file1\x00', 0x0, 0x0)


pipe(&(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = fcntl$dupfd(r0, 0x3, 0xffffffffffffffff)
r2 = open(&(0x7f0000000080)='./file0\x00', 0x60e, 0x0)
mmap(&(0x7f0000595000/0x3000)=nil, 0x3000, 0x2, 0x4010, r1, 0x401, 0x0)
mmap(&(0x7f0000ffc000/0x2000)=nil, 0x2000, 0x5, 0x2010, r2, 0x0, 0x0)
r3 = fcntl$dupfd(r1, 0x0, r1)
write(r3, &(0x7f0000000100), 0xfffffe5d)
mmap(&(0x7f00003f8000/0x400000)=nil, 0x400000, 0x3, 0x5012, 0xffffffffffffffff, 0x0, 0x0)
setuid(0xee01)
socket(0x800000018, 0x2, 0x0)
setuid(0x0)
r4 = semget$private(0x0, 0x2, 0x189)
getsockopt$sock_cred(0xffffffffffffffff, 0xffff, 0x1022, &(0x7f0000000300)={0x0, 0x0, <r5=>0x0}, &(0x7f0000000340)=0xc)
semop(r4, &(0x7f00000002c0), 0x0)
semctl$IPC_SET(r4, 0x0, 0x1, &(0x7f0000000480)={{0x20000008}, 0x0, 0x0, 0x2})
semctl$IPC_SET(r4, 0x0, 0x1, &(0x7f0000000380)={{0x20010007, 0x0, r5, 0xffffffffffffffff, 0x0, 0x100010024, 0x7}, 0xc8a, 0x0, 0xfffbfffffffffffb})
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000280)={<r6=>0xffffffffffffffff})
getsockopt$sock_cred(r6, 0xffff, 0x1022, &(0x7f0000000400)={0x0, 0x0, <r7=>0x0}, &(0x7f0000000440)=0xc)
setregid(0x0, r7)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000280))


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6b0205087f"], 0x1)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x2, 0x1, 0x0)
connect$unix(r0, &(0x7f0000000000)=@file={0x0, './file0\x00'}, 0x10)
execve(0x0, 0x0, 0x0)


posix_spawn(0x0, 0x0, &(0x7f0000000040)={0x0, 0x1, 0xffffffffffffffff}, 0x0, 0x0, 0x0)


ioctl$NETBSD_DM_IOCTL(0xffffffffffffffff, 0xc010fd00, &(0x7f0000000100)={0x0})
r0 = openat$hdaudio(0xffffffffffffff9c, &(0x7f0000000140), 0x0, 0x0)
ioctl$HDAUDIO_AFG_WIDGET_INFO(0xffffffffffffffff, 0xc0104800, 0x0)
ioctl$HDAUDIO_FGRP_WIDGET_INFO(r0, 0xc0106803, &(0x7f0000000300)={0x0})
ioctl$HDAUDIO_FGRP_WIDGET_INFO(r0, 0xc0106803, 0x0)
__fstat50(0xffffffffffffffff, 0x0)
r1 = syz_usb_connect$printer(0x3, 0x2d, &(0x7f00000006c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x10, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1}}]}}]}}, &(0x7f0000000900)={0x0, 0x0, 0x5, &(0x7f0000000740)={0x5, 0xf, 0x5}, 0x2, [{0x0, 0x0}, {0x0, 0x0}]})
unlinkat(r1, &(0x7f0000000980)='./file0\x00', 0x0)


open(&(0x7f0000000000)='./file0\x00', 0x200, 0x0)
ktrace(&(0x7f0000000040)='./file0\x00', 0x4, 0x2, 0x0)
setegid(0x0)


r0 = socket(0x18, 0x2, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
getsockname$unix(r0, &(0x7f0000000040)=ANY=[], &(0x7f0000000100)=0x51)
r1 = socket$inet6(0x18, 0x2, 0x0)
sendmsg$unix(r1, &(0x7f00000001c0)={&(0x7f0000000040)=@abs={0x1800}, 0x1c, 0x0}, 0x0)


ktrace(0x0, 0x5, 0x40000d30, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xa31a)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x20004602, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0xc6a8)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
r1 = dup3(r0, r0, 0x0)
ioctl$FIOSETOWN(r1, 0x80284e67, &(0x7f0000000100))


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0xc0104306, &(0x7f0000000140))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff})
getsockopt$sock_cred(r0, 0x1, 0x11, 0x0, &(0x7f00000000c0))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0xc6a8)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r2 = dup3(r0, r0, 0x0)
ioctl$FIOSETOWN(r2, 0x40044e64, 0x0)


r0 = socket(0x1, 0x2, 0x0)
ioctl$FIONREAD(r0, 0x4004667f, &(0x7f00000028c0))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
dup2(r1, r0)


r0 = socket$inet6(0x18, 0x3, 0x0)
sendto$inet6(r0, &(0x7f0000000080)="88", 0x2000, 0x0, &(0x7f0000000000)={0x18, 0x0}, 0x1c)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x0)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = dup2(r0, r0)
ioctl$FIOSETOWN(r1, 0x40283104, &(0x7f0000000000))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r0, 0x800c5707, &(0x7f0000000080)=0x101)


preadv(0xffffffffffffffff, 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f00000027c0)={0x0, 0x0, &(0x7f0000002580)=[{&(0x7f0000000100)="292a3221eb3c0bbdefdd3a4de3546737fcf871abe8f6c4ecaa4ec5c47c7cd72d65acb9a8b5841f4e0d5e690d43675275f25186e05ef63129c2f78f2584965430cbdfbb764d6f53ba64031f34172027fa9beb5f3d8bff07f5e1064348ed659b2289c9557fa9d789eff63bf397822536a0dfaddd5a9edb2633dbd0b353a6a0d19695f2aa86bb7034dfc134a2c6da17532b69ec454e5505d4eec3b18ffbec9977a67213ec99654667c5cc67ef5e0cfbecbc591e2560efd4e5305f4536682fc8020f5dee2eba4df64b1440763510d063ec4938", 0xd1}, {&(0x7f0000000300)="53f4538fdc7ca2fc934e57449df965ea2cd3e38b7235b5afb4d02521b82e6b3e4a45eb870c25fa2a9e68af46023540fd6a7b41d0f1f1d18df5f331cba4234bd6d6b27604ddd3830c64b548ef50d76c6315237442b6d61b33bd552f1393bea7ee0abf09f3a7b687bd7593de55ec740df81d4c41bf854b0b1d793b602652acec3b800d35124b9604e64fd83ecf2f2289e1ae2f05b9c44fecbead6b311fc5c910813dedfbb4d36a5c7600b1da1e27146eef3b683a074882ddb175e052338438a0c36e07bbecc70a56bea84f9c886906bb64301f6dd9dc7142fb82e94bb99e9a2557876c46f5da15278873758c64dd93772410528e0f8e3443973c0779e88493e38274b8dcc24d0759245bc2d9a8718a1b79fb870c74b7d7188e5bd1da564c93db6c2d74c0482795bf7f043379fcaba725a632e2c710bdad05a707f9068a6d351111b0bd22e2a18787c3387caea10083e580799c8c8e58093d7052b20a4dc33f9b1ef2100f0eb95c492e1f57bb767b598a0ede68cc04df640fa5e2d583b33a1cd4522e867b09c194a8fa4c3489567266ec637091dd99ff95cf3b1e120636ca7f9a7aa86833a31c0deb46d13c35a36e511f827c9f113a715fd22a3ecd4eb7dc0ce8d640b9daa61449b688a5af41a7d63dccb3da7cfe02b0fa53964f4f912c962b0ece151fd11c81c274986231bee703d03bad39e9a5ce68f2968111a445625b5bcbc26ef8970ade1852e577b4410f0ea907ffbb7b5e14efe27d87d5a6212274f880ed0380fd45b9bae856f11eaa88d054558335a777a80a034f8c55e5f1be66714e6af1da1d876104632cc1f94c71acf81aa368ddb6fa71e627d350709a7e58755f3874cc2c680bfa05d0bea9f7a8472829c7f1d59c82446f0cfdfe447873a4e698d554c9efd92ada8fae0e921c85e87b1a3ece70576dfb93229eed7dff2e7abe3dc215e795bb6d82a133f06701560e37e2af9d1daae97a3740a89c0150c961164029428692b9f1f91d48bd1d223b2cea040522817860f2206630c406d69353c3784cc0108f4a99b4e1a26e8e1ec04f1f0dbdfd1bd8956b6e812104945084b11352799a349d7839c5689632ecc36c76c94b072ba900e08dbbcac8c6cae8c547ae6c76077767251f45a125d6712f8b28305cb10d705adab7fbb66a403dd84823f6acddf07b9c52bd318cc301e149ce51de965fbd80e9abbca49ba0333ae2d104907a8e5b449f0301ca0b839529190b0873ef5bc08041e440238960a19800433b2294573cb21ec31703ee10317841231ac98dcab71bdaad872f8196a8151c89baf98ccbfbf152f8f482575a1a9c1effb15f3eb75fa175edc4aaf2f4e6fab5c596a955d5c0ad2c6445c3665fff5777cb22ec241e910f8b721d019e001d41d15d58003d328c9e8b3af49b1ab2644d5d48fa4026e8de9eaa8edf68efffb574c4e940ea99b62530bef4a256b5525e3a37bfd5d096cf18ac6f0fd80befa76381a49f785ea08bc006501429444470780772be03146247f30579172072dbf075679ec5b637fe72d0df8cdb9c2b94189cff24db195fa3e5c6954a054454b39e674643a5291ca00558558bc8c40f8a13557549f07b0c8f3509ba598fe67f52e02ab74b8e048961f2dcbdc6fa9f28797a7f4c21d19890a6cafeb35b29c7678f43f62236a89569347ea5d4a33bb1eb5b4390f486c981d49df5a8b1ebc3df1c56a7cbb51b76751198b2848c0a80e8c97ab19edea3fdeec07147ab5c2f70d351c165a478cb1cb1bbc178e881e9b54cacafad923ac6bad8723e0ca2e30b86cd1739082f637bd80638033bd073591711819ce067b6cbd320229a37d3d01a392ce2fe5153ae0399d516f9d955ab656108f553adc586bcd08a7d64c30c39664234907408764e9bc5817b612257824f12cc01154a7e9328eee4c4f5fc8da874f7491d4d99adec83d5ee6bd83e1a640e83caaceae68a2bb3ba6f7423ccae583519848ed750dd3d3a919fe78296ac71df78d488a52852594274be603a6f6236987df65592b9b7579544c7446e96459b06185a532a91243ff187862bc9f2ff8f8060e04d8c8f7d72c68b59fc63f83a47b5ed8eeb9fecd0e5c023ea646d5833c5704aa3712a1c4a9e21de1f8317adf9ca1f4674c162f2dde59ce5e53f857994c22f70e5ce4357c2afc480b4653d39fdf5dc0009247154a54e622d3845ef62dd45601ec1037683db1c7c617d75f85ae4ff345ffc9342fea9cb792585fa348be8e11e8262a7f571ee96e314d1c7aff16297735bdd4f43d956d4979506a7643fc102102f7abb00cabf437c68d87e7a1a2c4c9d174dfe70abd3e4ac59a5853bf0ea8a74e68fba544e672eca2c190f00e4d2ff6d5f5dc8ff50584ffc3da5012f759c268900ebb0791a90f11ef9abbdd30a4eee1135a98b6e2fb8dc32aea36a2a352c90a14a6c0ef192d868e51aa4273022478a57446f6a60e104c7634197e8326db393d69c6ce1cbc9a31c73c4ac4c574916f9c00f", 0x6e4}], 0x2}, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
fcntl$lock(r1, 0x9, &(0x7f0000000000)={0x0, 0x0, 0x0, 0x1000200010005})


r0 = _lwp_self()
compat_60__lwp_park(0x0, 0x0, 0x0, 0x0)
compat_60__lwp_park(0x0, r0, 0x0, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x10, r0, 0x0, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x80104305, &(0x7f0000000100)=0x640004)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = dup(r0)
connect$unix(r1, &(0x7f0000000040)=@abs={0x0, 0x0, 0x0}, 0x8)


r0 = socket(0x18, 0x2, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
writev(r0, &(0x7f0000000540)=[{&(0x7f00000001c0)='+', 0x1}], 0x1)


__nanosleep50(&(0x7f0000000000)={0x0, 0x5}, &(0x7f0000000040))


r0 = socket(0x2, 0x3, 0x0)
fcntl$dupfd(r0, 0x0, r0)
fcntl$dupfd(0xffffffffffffff9c, 0x0, r0)
setegid(0xffffffffffffffff)
seteuid(0xffffffffffffffff)
setgid(0x0)
r1 = msgget$private(0x0, 0x1c)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000080)={0xffffffffffffffff, <r2=>0xffffffffffffffff})
getsockopt$sock_cred(r2, 0xffff, 0x1022, &(0x7f0000000140)={0x0, <r3=>0x0}, &(0x7f0000000100)=0x1)
seteuid(r3)
socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000012c0)={0xffffffffffffffff, <r4=>0xffffffffffffffff})
getsockopt$sock_cred(r4, 0xffff, 0x1022, &(0x7f0000000140)={0x0, 0x0, <r5=>0x0}, &(0x7f0000000100)=0xab33673602b13e1e)
setsockopt$sock_cred(0xffffffffffffffff, 0xffff, 0x1022, &(0x7f0000000040)={0x0, 0x0, r5}, 0xc)
r6 = open$dir(&(0x7f0000001240)='.\x00', 0x0, 0x0)
r7 = open(&(0x7f0000000040)='./file0\x00', 0x70e, 0x0)
pwritev(r7, &(0x7f00000004c0)=[{&(0x7f0000000080)="0002f350d37c45941a3c212047eac32c93b5433b483534a185cd98917be6019bb60efd4e3e363ba6ee0a236c879130a347b4f3ae02797b", 0x37}, {&(0x7f0000000100)="fe0232df69a0903826595070899e67b312e9829b05ef44c5d65d1d1c57fb70d9b4c2b95cc2e85c68b1ffc1f98f708800f5620c195564c17a952089bbc92024c8647520d6ceaca961a3525ace928f31363783402d9fae3b350409bf91adc7046aa145fbf949b0b84c6bc931c8b287508a09ff2aa2f5ae9cf504efe1dd28a25ad89128a943be72b714e7f98a895e30", 0x8e}], 0x2, 0x0)
mmap(&(0x7f0000000000/0x13000)=nil, 0x13000, 0x5, 0x10, r7, 0x0, 0x0)
getsockopt$sock_cred(r7, 0xffff, 0x1022, &(0x7f00000000c0), &(0x7f0000000180)=0xc)
fchown(r6, 0x0, r5)
msgctl$IPC_SET(r1, 0x1, &(0x7f0000000000)={{0x1f, r3, 0xffffffffffffffff, 0x0, r5, 0x202, 0x2}, 0xde3c, 0xffffffffffff97fd, 0xffffffffffffffff, 0xffffffffffffffff, 0x1, 0x0, 0x8, 0x8000000000000001})


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x41986465, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mlock(&(0x7f0000ffc000/0x4000)=nil, 0x4000)
mprotect(&(0x7f0000ffc000/0x3000)=nil, 0x3000, 0x0)
munlock(&(0x7f0000ffc000/0x4000)=nil, 0x4000)
madvise(&(0x7f0000ffe000/0x2000)=nil, 0x2000, 0x6)


r0 = compat_30_socket(0x22, 0x3, 0x0)
shutdown(r0, 0x0)
writev(r0, &(0x7f0000000580)=[{&(0x7f0000000080)="9bb3426f", 0x4}], 0x1)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000001340)='fdesc\x00', &(0x7f0000001380)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = dup2(r0, r0)
__getdents30(r1, 0x0, 0xfffffff9)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
open(&(0x7f0000000040)='./file0/file0\x00', 0x102a2, 0x0)
open(&(0x7f0000000080)='./file0/file0/file0\x00', 0x0, 0x0)


r0 = socket(0x18, 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
getsockname$unix(r0, 0x0, 0x0)


getsockopt$inet_opts(0xffffffffffffffff, 0x0, 0x0, &(0x7f0000000080)=""/214, &(0x7f0000000180)=0xd6)


_lwp_create(&(0x7f0000000740)={0x0, 0x0, {}, {}, {0x0, 0x0, '/dev/tprof\x00'}}, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
setuid(0xee01)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0x802c6300, &(0x7f0000000080)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @pci})


compat_43_orecvmsg(0xffffffffffffffff, &(0x7f0000000640)={0x0, 0x0, 0x0, 0x0, &(0x7f00000009c0)=""/4096, 0x1000}, 0x0)


mknodat(0xffffffffffffff9c, &(0x7f00000000c0)='./file0\x00', 0x1000, 0x0)
lchown(&(0x7f0000000040)='./file0\x00', 0xffffffffffffffff, 0x0)


setrlimit(0x8, &(0x7f0000000040))
pipe(0x0)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
__mount50(&(0x7f0000000080)='kernfs\x00', &(0x7f0000000100)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
openat(r0, &(0x7f0000000040)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
__posix_fadvise50(r0, 0x0, 0x0, 0x2, 0x4)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='nfs\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000000c0)="f4", 0x1)


connect$inet6(0xffffffffffffffff, 0x0, 0x0)
sendto$inet6(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0, 0x0)
r0 = __clone(0x400, 0x0)
compat_50_wait4(r0, &(0x7f0000000900), 0x0, &(0x7f0000000940))


socketpair$unix(0x1, 0x0, 0x0, 0x0)
pipe(0x0)
socket$unix(0x1, 0x2, 0x0)
__stat50(0x0, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x205e, 0x200)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
mmap(&(0x7f00000bc000/0x2000)=nil, 0x2000, 0x0, 0x12, r0, 0x0, 0x0)


clock_nanosleep(0x20000000, 0x0, &(0x7f0000000000), 0x0)
mincore(&(0x7f0000ffc000/0x4000)=nil, 0x4000, &(0x7f0000000640)=""/4096)


chroot(&(0x7f0000000040)='./file0\x00')


munmap(&(0x7f0000800000/0x800000)=nil, 0x800000)
shmget$private(0x0, 0x4000, 0x0, &(0x7f0000ff9000/0x4000)=nil)
shmget$private(0x0, 0x1000, 0x38, &(0x7f0000e04000/0x1000)=nil)
compat_30_socket(0x1f, 0x10000005, 0x2)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = shmget$private(0x0, 0x2000, 0x124, &(0x7f0000ff1000/0x2000)=nil)
compat_50___shmctl13$IPC_STAT(r1, 0x2, &(0x7f00000003c0)={{}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000340)="2594ecb421b868d895774fae92c4d4594237c5f082f6a3f5bac0d73b24d7f05d8e9801402139869bbdb8a84f1db579c32e79cb6beb4065c4f7849a86a94e1011770ddb557912f530917c"})


compat_43_osetrlimit(0x4, &(0x7f00000000c0)={0x0, 0x4})


__clone(0x0, 0x0)
semget(0x0, 0x1, 0x306)
fstatat(0xffffffffffffff9c, 0x0, 0x0, 0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
getsockopt(r0, 0x0, 0xa, 0x0, 0x0)


compat_50_____semctl13$GETALL(0xffffffffffffffff, 0x0, 0x6, &(0x7f0000000000)=@buf=&(0x7f0000000140)={{0x0, 0xffffffffffffffff}, 0x1000, 0x0, 0x0, 0x0})
mkdir(&(0x7f0000000240)='./file0\x00', 0x0)
compat_40_mount(&(0x7f00000000c0)='puffs\x00', &(0x7f0000000100)='./file0/../file0\x00', 0x0, &(0x7f0000000140)="1e")


symlink(&(0x7f0000000140)='..\x00', &(0x7f00000001c0)='./file0\x00')
chroot(&(0x7f0000000000)='./file0\x00')
chroot(&(0x7f00000000c0)='./file0\x00')


pipe(&(0x7f0000000040)={<r0=>0xffffffffffffffff})
write(r0, &(0x7f0000000340), 0xd4e688a67930cd)
close(r0)
execve(0x0, 0x0, 0x0)


utimensat(0xffffffffffffffff, 0x0, &(0x7f00000000c0)={{}, {0x0, 0x4}}, 0x0)
r0 = open$dir(&(0x7f0000001240)='./file0\x00', 0x40000400000002c2, 0x0)
pwritev(r0, &(0x7f0000000040)=[{&(0x7f0000000200)="b9", 0x1}], 0x1, 0x0)
lseek(r0, 0x0, 0x40fff, 0x0)
r1 = dup2(r0, r0)
writev(r1, &(0x7f0000000040), 0x1b)
r2 = open$dir(&(0x7f0000001240)='./file0\x00', 0x4000040000000242, 0x0)
lseek(r2, 0x0, 0x40fff, 0x0)
r3 = dup2(r2, r2)
writev(r3, &(0x7f00000004c0)=[{&(0x7f0000000180)='\t', 0x1}], 0x1)


syz_usb_connect$cdc_ncm(0x1, 0x6e, &(0x7f0000000800)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x8, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}}}}}]}}, &(0x7f0000000e40)={0x0, 0x0, 0x0, 0x0, 0x3, [{0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}]})


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(r0, 0x0, 0xd, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000028c0))
socketpair(0x1f, 0x3, 0x0, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x80104305, &(0x7f0000000100))


r0 = socket$inet(0x2, 0x2, 0x0)
getsockopt$inet_opts(r0, 0x0, 0x7, 0x0, 0x0)


pipe(&(0x7f0000000900))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
semget$private(0x0, 0x6, 0x89)
getsockopt$sock_cred(0xffffffffffffff9c, 0xffff, 0x1022, &(0x7f0000000000)={0x0, 0x0, <r1=>0x0}, &(0x7f0000000040)=0xc)
getgroups(0x8, &(0x7f0000000040)=[0xffffffffffffffff, r1, 0x0, 0x0, 0x0, <r2=>0x0, 0xffffffffffffffff, 0x0])
setegid(r2)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000040)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047401, &(0x7f0000000140)={0xa})


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000005c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
fcntl$setown(r0, 0xf, 0xffffffffffffffff)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
preadv(r0, &(0x7f0000001840)=[{0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}], 0x9, 0x0)


open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000340)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000ffe000/0x2000)=nil, 0x2000, 0x5, 0x11, r0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f00000001c0)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0x802c6300, &(0x7f0000000200)={0x1000000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @fbi_rgbmasks})


pipe(&(0x7f0000000180)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r2 = getpid()
fktrace(r0, 0x0, 0x4, r2)
mkdirat(0xffffffffffffffff, &(0x7f0000000100)='./bus\x00', 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r1, &(0x7f0000000400)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000000)=ANY=[@ANYBLOB="28000000ffff000001"], 0x28}, 0x0)
write(r1, 0x0, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
dup2(r1, r0)


fcntl$setown(0xffffffffffffffff, 0x7, 0xffffffffffffffff)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__clock_gettime50(0x20000000, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
fcntl$setown(r0, 0x6, 0xffffffffffffffff)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x3b03)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIONBIO(r0, 0x80045407, &(0x7f0000000400))


compat_50_select(0x0, 0x0, 0x0, 0x0, &(0x7f00000001c0)={0x0, 0x8000})


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80046476, &(0x7f0000000040))


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc1e85266, &(0x7f0000000000))


pipe2(&(0x7f0000000180)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
close(r0)
sendto$inet6(r0, 0x0, 0x0, 0x0, 0x0, 0x0)


mknod(&(0x7f0000000b40)='./file0\x00', 0x72165ce6fe0b2f18, 0x0)
__posix_chown(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)


pipe2(&(0x7f0000000080), 0x0)
connect$unix(0xffffffffffffffff, &(0x7f00000000c0)=@abs={0x62, 0x0, 0x0}, 0x8)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
mkdir(&(0x7f0000000080)='./file0/file0\x00', 0x0)
rename(&(0x7f0000000080)='./file0\x00', &(0x7f00000000c0)='./file0\x00')


semop(0x0, 0x0, 0x300)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
ioctl$WSKBDIO_GETLEDS(0xffffffffffffffff, 0x4004570c, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fcntl$setown(r0, 0x6, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
getgid()


syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f0000000140)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x10, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5, 0x24, 0x0, 0x3f}, {0xd}}}}]}}]}}, &(0x7f0000000380)={0x0, 0x0, 0x0, 0x0, 0x2, [{0x4, &(0x7f0000000280)=@string={0x4, 0x3, '<M'}}, {0x2, &(0x7f0000000680)=@string={0x2}}]})


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0x4020526a, &(0x7f0000000040))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x0)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x80047227, &(0x7f0000000100))


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='kernfs\x00', &(0x7f0000000100)='./file0/../file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
unmount(&(0x7f0000000000)='./file0/../file0\x00', 0x80000)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
preadv(r0, &(0x7f0000000540)=[{0x0}], 0x1, 0x0)


munmap(&(0x7f0000ff5000/0xb000)=nil, 0xb000)
__msync13(&(0x7f0000ffa000/0x2000)=nil, 0x80000000, 0x4)


r0 = socket(0x2, 0x2, 0x0)
connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="82023e"], 0x10)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
sendmsg$unix(r0, &(0x7f0000001480)={0x0, 0x0, 0x0}, 0x40c)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104304, &(0x7f0000000100)=0x620000)


modctl$MODCTL_LOAD(0x0, &(0x7f0000000080)={&(0x7f0000000000), 0x0, &(0x7f0000000040)="3c12d2", 0x3})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x18, 0x2, 0x0)
pread(r1, 0x0, 0x0, 0x0)


r0 = open$dir(&(0x7f0000001400)='.\x00', 0x0, 0x0)
symlinkat(&(0x7f0000000140)='./file0\x00', 0xffffffffffffff9c, &(0x7f00000001c0)='./file0\x00')
r1 = dup2(r0, r0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
renameat(r1, &(0x7f0000000000)='./file0\x00', r0, &(0x7f0000000100)='./file0\x00')


mknod(&(0x7f00000001c0)='./file0\x00', 0x2000, 0x0)
execve(&(0x7f0000000000)='./file1\x00', 0x0, 0x0)
chown(&(0x7f0000000380)='./file0\x00', 0x0, 0xffffffffffffffff)
unlink(&(0x7f0000000340)='./file0\x00')


r0 = socket(0x2, 0x1, 0x0)
shutdown(r0, 0x1)
setsockopt(r0, 0x0, 0x0, 0x0, 0x0)


accept$inet6(0xffffffffffffffff, 0x0, 0x0)


__clone(0x0, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000280)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)
fchown(0xffffffffffffffff, 0x0, 0x0)
__clone(0x0, 0x0)


symlinkat(&(0x7f0000000280)='./file0\x00', 0xffffffffffffff9c, &(0x7f00000003c0)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00')
rename(&(0x7f0000000140)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', &(0x7f0000000500)='./file0\x00')


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000200)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)='2', 0x1)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
chdir(&(0x7f0000000100)='./file0\x00')
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f00000001c0)='./file0\x00', 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0xc1c0526b, &(0x7f0000000040)={0x0, 0xa})


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x4010426e, &(0x7f0000000080))


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
r1 = socket(0x1f, 0x1, 0x0)
dup2(r0, r1)


__fhstat50(&(0x7f00000014c0)="1a7cd3728b08fc4ac163bb043dd7009d4b1a30689bfd54c4fded91b2", 0x1c, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = socket$inet6(0x22, 0x3, 0x0)
dup3(r1, r2, 0x0)


r0 = open$dir(&(0x7f0000002440)='./file0\x00', 0x40000400000806c1, 0x0)
compat_43_ogetdirentries(r0, 0x0, 0x0, 0x0)


sendto$inet6(0xffffffffffffffff, 0x0, 0x0, 0x8c, 0x0, 0x0)
r0 = open$dir(&(0x7f0000001240)='./file0\x00', 0x40000400000002c2, 0x0)
r1 = open(&(0x7f0000000480)='./file0\x00', 0x80400000000206, 0x0)
r2 = open(&(0x7f00000000c0)='./file0\x00', 0x80000000000206, 0x0)
ftruncate(r2, 0x7e2780e3, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x20011, r1, 0x0, 0x0)
r3 = open$dir(&(0x7f0000000840)='./file1\x00', 0x40000400000002c2, 0x0)
pwritev(r3, &(0x7f0000000080)=[{&(0x7f00000006c0), 0x100000}], 0x1, 0x0)
pwritev(r0, &(0x7f0000000080)=[{&(0x7f00000006c0), 0x100000}], 0x1, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000000c0)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f00000015c0)={0x0, 0x0, 0x0}, 0x0)
read(r1, &(0x7f0000001480)=""/146, 0x92)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg(0xffffffffffffffff, &(0x7f0000000140)={&(0x7f0000000100), 0xe, 0x0, 0x0, 0x0}, 0x0)
sendmsg(0xffffffffffffffff, &(0x7f0000001540)={0x0, 0x0, &(0x7f00000014c0)=[{&(0x7f0000000180)="a00f6479c13ff53f3fe7", 0xa}], 0x1, 0x0}, 0x0)
sendmmsg(r0, &(0x7f00000000c0)={0x0}, 0x10, 0x0, 0x0)


symlink(&(0x7f0000000240)='.\x00', &(0x7f0000000280)='./file0\x00')
r0 = open$dir(&(0x7f0000000840)='./file0\x00', 0x20, 0x0)
open$dir(&(0x7f0000000180)='./file0\x00', 0x24090, 0x0)
execve(0x0, 0x0, 0x0)
flock(r0, 0x1)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
r1 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r1, 0x80047401, &(0x7f0000000140)={0xb})
preadv(r0, &(0x7f0000000300)=[{&(0x7f0000000200)=""/246, 0xf6}], 0x1, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="a4027b"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
listen(r0, 0x0)
r1 = socket(0x2, 0x1, 0x0)
connect$unix(r1, &(0x7f0000000000)=ANY=[], 0x10)
dup2(r1, r0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x310, 0x0, 0x0, 0x0, 0x8, 0x46d, 0xb018, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x0, 0x0, 0x80}}}}}]}}]}}, &(0x7f00000004c0)={0x0, 0x0, 0x0, 0x0, 0x4, [{0xfd, 0x0}, {0x4, &(0x7f00000002c0)=@lang_id={0x4, 0x3, 0x414}}, {0x0, 0x0}, {0x2, &(0x7f0000000340)=@string={0x2}}]})


r0 = _lwp_self()
_lwp_suspend(r0)
_lwp_detach(r0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_50_clock_getres(0x0, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
shutdown(r0, 0x2)


r0 = socket(0x400000000018, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000100)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIOGETBMAP(r0, 0xc0306936, &(0x7f0000000040))


mknodat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x1000, 0x0)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x2, 0x0)
close(r0)


compat_43_ogethostname(&(0x7f0000001740)=""/89, 0x59)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$FIOASYNC(r0, 0x8004667d, &(0x7f0000000040))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000200)={<r0=>0xffffffffffffffff})
recvmmsg(r0, &(0x7f0000000100)={0x0}, 0x10, 0x0, 0x0)


r0 = open$dir(&(0x7f0000002040)='.\x00', 0x0, 0x0)
symlinkat(&(0x7f0000002000)='./file0\x00', r0, &(0x7f0000002080)='./file0\x00')


dup3(0xffffffffffffffff, 0xffffffffffffffff, 0x0)


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
compat_50_quotactl(&(0x7f0000000080)='./file0\x00', 0x30000, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
poll(&(0x7f0000000080)=[{r0, 0x4e}], 0x1, 0x0)


__clone(0x0, 0x0)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(r0, 0x0, 0x4, r1)
compat_50_wait4(r1, 0x0, 0x20, 0x0)


pipe2(0x0, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f00000003c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x9}}}}}]}}]}}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_orecvfrom(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0, 0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x19, &(0x7f0000000040)="eaff125c", 0x4)


compat_50_mknod(&(0x7f00000002c0)='./file0\x00', 0x2000, 0x4135)
open$dir(&(0x7f0000000040)='./file0\x00', 0x5, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104304, &(0x7f0000000100)=0x400000)


msgctl$IPC_SET(0x0, 0x1, &(0x7f00000000c0)={{0x0, 0xffffffffffffffff, 0x0, 0x0, 0x0, 0x102}})
r0 = socket(0x18, 0x2, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
r1 = socket(0x400000000018, 0x3, 0x0)
ioctl$FIOGETBMAP(r1, 0xc0106978, &(0x7f0000000040))


r0 = open$dir(&(0x7f00000000c0)='.\x00', 0x0, 0x0)
fcntl$setown(r0, 0x6, 0x0)


__posix_fadvise50(0xffffffffffffffff, 0x0, 0x0, 0x8000000000000001, 0x0)


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r0, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0x400004000011830a, 0x0)
truncate(&(0x7f0000000080)='./file0\x00', 0x0, 0x5)
write(r1, &(0x7f0000000780)="089267d3ff4f0b87969f", 0x100ad)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
setsockopt$sock_int(r0, 0xffff, 0x1001, &(0x7f0000000040), 0x4)
write(r0, &(0x7f0000000080)="bdfb", 0x2)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__posix_fadvise50(r0, 0x0, 0x0, 0x0, 0x3)


r0 = socket(0x18, 0x1, 0x0)
getsockopt(r0, 0x29, 0x29, 0x0, 0x0)


mlockall(0x3)
mmap(&(0x7f000006d000/0x1000)=nil, 0x1000, 0x1, 0x1010, 0xffffffffffffffff, 0x0, 0x0)


r0 = open$dir(&(0x7f0000000000)='.\x00', 0x0, 0x0)
ftruncate(r0, 0x0, 0x0)


open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
preadv(r0, &(0x7f0000000480)=[{&(0x7f0000000140)=""/122, 0x7a}], 0x1, 0x0)
mmap(&(0x7f0000bfe000/0x400000)=nil, 0x400000, 0x4, 0x10, r0, 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
truncate(&(0x7f0000000100)='./file0\x00', 0x0, 0x42000)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000000400)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0x40046302, &(0x7f0000000140))


mknod(&(0x7f00000006c0)='./file0\x00', 0x2000, 0x200028bf)
r0 = open(&(0x7f0000000480)='./file0\x00', 0x0, 0x0)
poll(&(0x7f00000000c0)=[{r0, 0x40}, {r0, 0x1}], 0x2, 0x5)


r0 = socket(0x1f, 0x5, 0x2)
listen(r0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
symlink(&(0x7f0000000080)='.\x00', &(0x7f0000000000)='./file0\x00')
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_40_mount(&(0x7f0000000000)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
__mount50(0x0, &(0x7f00000000c0)='./file0\x00', 0x1000000, 0x0, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x31, &(0x7f0000000140)="9513f3948292ae269282d455abab56620385feb9e9b3fda3181149ee114dd200a92ef2b465bbc11fcfdb71b72ce278fa941a79b7d45722a806d166b1bc4513bb05a76025938759964a53836bfd351fe9d2104012dc56fa2aa2786a7b4b39b7a51bf1baa51d3fb561c0ce637ef3c53f88edcc758d1e1eff1031571ebb9a54c1ea8426de968ad829470aa55d5b3eb81a62a35e0b41bc906838a88d756b2d17d0d7", 0xa0)
shutdown(r0, 0x2)


syz_emit_ethernet(0xe, &(0x7f0000000040))
shmget$private(0x0, 0x3000, 0x0, &(0x7f0000826000/0x3000)=nil)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
pwritev(0xffffffffffffffff, &(0x7f0000000200)=[{&(0x7f0000000280)="24a007e6c1b75bf338125a0d4dd453ec28369eeae76139bb096571ea7a60b4abb52e31237ab91f2e20c82395ff", 0x2d}, {&(0x7f00000004c0)="ae55deaab97a6e3f485f446be8eded9eef4b4d10474a1a755c39a0b94c54d5fb4b9d9087307d81816a57c52cf68046df49916634b9b289e4b964bac0e4fac327b0adc7c3cfb348a2b2399acff07a612a4fb8c646d22930ba5353fb8e0836", 0xfffffffffffffe59}], 0x2, 0x6)
writev(r0, &(0x7f0000000380)=[{&(0x7f0000000040)="981a6d584ba8db0a889b2d22ed85347e2808ba33755840b3b4119ae60d28eb27c809f0f017d599facad1", 0x2a}, {&(0x7f0000001100)="e50ce8512f2e8d8701778d5ecd3ee8709d9c2365fe141db057e1f7d4b23626ed89c8b11c4924ef2253fe360811e40ae61b3ab86ae06aa9afa0c44128d4160d23b2f8e91bc7d0d3a1a662e8abd758bb6a80af224bba5aadedcae558a62778e7e51616a41429e8043385babe65bc58c2a882a6faadd5f899511ba0c5087bbf61ca7bbaf6df1539a20492bfdf4b88b8b352c074b5447350d57e63b543e2fd288882028b41086457bced449fdf3f873e014fbf379cd714c164e29b57f00a44a2f1ed334fe96bd78daaf63d5e15b308bda9d88b9b7cd7635e36432333c1feee4fa1c076851fcfb738d181a2086be97275762a182caaacbfef22aaec824364e809ea6517527fd3437a84a2920d77e927008bb974f4fc3ba663a3057213340f244acf3175a75c3cd1365036c415b4d3ac225de1638ebb42d05ede84e8ca6e19e40c2c647f8030aa9ff24b1ac7dbedb1138c1171a68b6a8ec99629f0fe11a1b538fe662a9b758f3b6f2116f19db2437d8c4c78af6f12dc5f0316f6258237e35a00dc44aeb40d792efd5efeaef7948886655804a9aad2ef23e04592253376fab6697ed2fc88ed5c99c609a73df29f411442f74d4f28777d23ca8a954c80bf8ac97fac487dc773fc8dede773fac23e49863c9ed8e70e28b872338ac5e580f877ce7b561e3d1aaad1a0ec7f6f9e788d5c9f93c7e16fbedc911e57a92caeb4f84666dbaafb9925a43236513c8afb79022b91252a5b233fb9888fac17d0244b5efa47a84d6c683c6022000b6112dadd9aabc29f03264822a03f76a92a82f812ce9d5000ce10e315e1b7c3fc03d436c59b7e7fb3cb1a544fa7493a9a8f0a21b4c1a6beff7d2df77d8edc99bd561e2a678b6f6ef4216f03c6bedb8e508dd16dd29587f5b3b23a0278202e2583f2394924b83ff43f0ac25fe96d775998a60e9e7238bc5c9c737bb6a194eeb6f4b4e739881c21d20f24e852abab3817f474f772234b46698c29dc6366cd8ca70ccfbd1a063e2447214155ca24c120d18687388f9a8846c4469d6954a20507cdacd58e57a4604400872ea9deb1cfee44f1c2fdd353bb6f5ac71c49e4fbffe4f5761497704e9437cea4dcfabbd0c17c47f7b51f25526a0d703317188e732824601c570e0d82601202055225f703b937865f42a5360d9f989a1241c49cce4270f7af7bab25007eb80fc576f8290fc08d76b21eaec0b908ff67899e3131a7ce7a8cb41f319d2f2cabcdbf8b37a1eb7aca83fd605c903e9b9b94b9c353e4aa23fbadc5a0c495838768331811949d903362ff846176211492d124016c6e42e01918174ce4c1d57a60ead0e8aee422420b22b74c1da4e54a29d259d0754f84c83be7a8380a66952466c8d2b605d94c75422c8fbe02f53f52e999242466ef09d89ef01e3085352f23220fac1c4a4713279129e8133efac2cd9f17ae7f592a32cc0e69b54e98fff07395cfb252146c725b7923af7f0c601730d85db77aec5ac12fcc4f692e6ab0e2639bf0490e0bfe27b72c9b57ebe23c8e06fa2e8d59238f40a1d4592405cee01dc36a3e2c61f4b082785f61c7a1db2a601d47e493bac10f1ece42f6deb33d1927c3e4992591e4c270eb305d27a384437cfc2974531b2bf474010e7ad5e4103a3a2aea5e3b98df0c7685ea99a5d74898d66421d7680129f5aa7e47f65e0259983bbd931185ef1cc1bee92953b3a756067696201eea4e15a662b11ea5e00e6a470c71b998c67b3f791642a4ca37360978382bab5c7c85f007516ca468af81f74c19f792ded82fe7c6d8774ff789c53e24a868e07fac1f9800e8a815471d9cfb7782deef7126d268abd6dd04265d2a460a223cb9fe5cdc2f78d240e820307f3d26665554207ff8d0b77f834f9ed6f067f94b6baa6036e77860a02cbff9472d7a39e1ef4afbb5bc0302411090194ae603a234d933c50649abd8ac825d55d7dc384c78f555168870b47de0df4a89ee94d2636c91d054cd3caa7235990f907a7eb192c6dd0e2c7aa64122f72d3995d9188a4648d9c384717c5c7cdab8f5e24821836d99541c319ddffa52046a0cbea97db1195143bdfa9a24204e688d64b607d2d8f435a242eddd7585b791dc7ba2a3406bfb86a80c4e5289cf2424393e518fd7fa9ffbc093df758e1a8e9884cea3ed10f81961acc9b7df459e29a6f28f67436d86139e0853b0685a04ba43f4cacd943a41e1c1acb049429b22ec7412c5ac5f6890dc2f89048cb561b57bed156c77de33747c7950ee7944e5e7da8f1d736dcf5b26971d6e7a577d812e8c873155a70d85283c078006f87a0ee735cf50794667e3e5f415caa9e6171ab61e3425a50dd5e43c4f7c51530c9175cb75acad2fb3b85845b5c608821a63d93bd12a6234677b5cbb4098c00b9c869e3681c4cc865d9ba8f8a80ab317efe6652ddef53a1abd83234be48be139afeb5eef18204760ea71e697d8dd36dde38c7fe6913999ea8b5a8b1655f05e1796080969f9b9a0575186b37ba525700747e6329645aa0fd3e329d770d1031d98fbe09e83c40cdf7a2da646a933d6f4be4fa6fc065c13d045420e9a73fdaabe273dfe7d01666b619aabdbff824d527b4477c48215382f18ddfce268e0c907dbe68111a56906e7917d1bf9d6808e740b2ec7a82bacc051df14fc330d7fcf00fded2676158217349c79724408fdab1bf6f22ccbe2babe12d376d770aeee0830cec905c24692895e1c45d68c120aa3a5d071d205612d4af9fb4c05a41f5290207168b902db39dbfde95b7b95f5969d30e2bcc15989a1789254e5d1439e4a2d813e791e39913dd960528ff199bb6fc4d9e1f7c220a0dccff86bd04e7f4449046aae8baa2036f21c2a4929650a0c1edc32ff8b913c492b6127f236316911fac47ee8f478c442ae70fe9ff11b04ac450925204e35dbdaf1872e189e23ab5760fb7794ef5973c74a6fe977864d14e78ea89fecab05b384359343e7043deb47fa09f55fc9970093a739726d6a6cd3dd08dc8a9aba5b0f2c33018ab5346a2cf1efe13a3ec20e0c465552b2e046586ab9211dd853ae55817ad0c50bff690dad85e9be57cb3f35c8edcf2fd0d780d53f018a1d92355a7e6777c2809eca942409018f84f39449b5b8ed3bbd676c691b61cbb66fff9ac8e53a3067c00d53916801bd319e1e36db36cf40be70c80c9a4db429935ff72a249039efb072fe065f65d6d60bec152cee8838eb15887c271f89a7c5f9100cecc8ef835b071c7e5eb1a5c1c64cb69d3e9b122f72cbf02dad4fd1d962b9047796d8daddf121a3aa185c743f4f90d0ea5fd03994caf312cf8f030f09422f3d3154777bd85a588308d403e1d92b3c7e07b682b40e1d7eee339a398bf066ec1f79fca7b0c997d951494550934443ae0cf922969bfa92f040e105e3dab1d525c6d6f88d690a13a36823f82255f3deb6ccdc0091add382e13b27a5aa450d2fd33c3a9c4204b8dd26fe47aed4633e20ca91f978b76c23d17da9121ce68efa898f038f225cf2e064ad8c9c7c699c29e19bb66decc13ffbbc489b5c86c5d571fe694c941063a6198afed1433fe30bfca332c32ec030e6d7cc780f8f44dcec78e079b0bc86fff3e965350f84c82dc3a8fa55198175a5cbcabc370505dada469b58637d985afff49bd0ea5d44137f9370248f3b42f0b5116c1bca81f63a0af630af146ab1e221f0d97099eb7d9fde8353534799ebbdff97b2ca7683cc32390eedf7086f7d9347bffe3a7ea5e069cca29cea2b6b086e746bf27807d7f13a615440894dd42d47577176ec1a1f587f298e3211d09d5ac552013154828de0aa45373c818756b988054432081524232be2958c0839c99b043443975f17986bb21aa84147fc542bb3d4c69857d2085f47871280ea6bca425230683936b7606de3d5d4564aff740a1c4242bacb81539ac694735b5abdc118d4462b93e8b9cfa9c415230c86c0c4f9883e985d994c17d92468e6ed08656d45063cb7e025229d7cede48f8f5f6954b34a5adffb327c44589643dc04cdf681b47390cf355a40b369df6134bd038486666781ee011d8dc98856a0e0cc3b9b29969f6a59dceff816139c2886484c635386b4ac82f2bae6f5e17dd7fbd53aada5ae19a57732ca2aac53303ba60556efa202e049525d131bb2ef4eafa1a4d15a3833fd81ec3b69fee9af5c8e33d1614e02f2d1412a5b8ad88a61e38b1f6279008f0d3c3d1462b0f749bf5cb6d3fc7927b2c543b4fddb71b160ddbb465fecef53f677c24fce598997b30d432238e7a6b041c3cfbc3b7cdc9eda589039c8b20d16db749bca16c59c06a8f9450ba1a32f96f85e5bd8fe09bae7017d7d285344d717ba3db0f56269db435f33c0b263707d0cf48fa1f81d1c8db2bf1ae58a0d702b16d7417489559c44ced9ca1e6cb4db6d198a92171aaa59332b84f24dea5dd7efc7b247974d59ba7a2c1d0d654319c146ec58b0959254bf12fe7cdf4160d1809e65e5c92b7f773f29bfecd0bcc4725339b746ffad0e40471d286fc0d2c4abca94923293b7f2b66d279512edf944df50b83b0b1a54ee5b5cd5557c898ddfe28619fb3ac0f168f3f5d20da4bc9bf278176ae97e2255fb3b4ee055d8b38bff52be27e430213aafa776e248a29b3e1086e939c3803dcf33c5bb73a8f03337d15449b8430a0615d1ed90fe697e859ec7754a3a64d06379dffc1fdac02ad3dcda98a9dcf832fc929f705e215252b49826699b402c436e11768eb93ce3a2a6bad2324edb474e47c00a3253d3ee1fa3efbae1586f9f20f484b744201d9768a05f91c80e982e325fd943ae931303d5480771f6e498b4c03b8530abf306910641764b302126b3315fec4d7379e2af7a6ac3d0f031eaf3192f230b7baabb6d18831398c83f4aba6bba907d439b87d0924e220681ae4f1bf99915a5f3d3faf11e698eda101bece2d1ab85aeca4b55ec6919b213282fe96c81ef6c745baedcc8ee229497c7b2c4109798c7cc2d06798da808ee3205a9aecae7106a8c0cbb919538f14adfb4574372f77ebfbb6f2eb95d574cc16aa3b598ba35dc48402d776888cb6210bd2aee4bc9710ca08d41ac162380878aafd711d18ebe76b1907ae10f822f0efaf81258ef37a306bde0d01750eff1e5c8f2bad9fe1c0a1d04fc860ef1ae46afdcb86def6f4b13bbbe9a7ce027c690aeac0134f7a5908c8e38d74d687a148153705bb5cdfc52f17030e899863666c4c6f3dabbad2d1b43d2580969012ba1c960f5d1411b47c1358eb979a8bf78953836efc7733559d54d83d43bd777c60bc2df3880de7075016390e76e5f6eff17fe83aca79b57084592673ed96ddc4ad2cbaa798494c694ce35ad6ee4b3846dc898aae607eb006a0b1045a608134a2ee84e7bcd99fd9a3be05e9e0320fa3451a483323529306d934f8d4c6c1a18458436e7e52ca4f0bcb1d37a27453c35ff850d45675489d595d8caebc707dcdc2202eed60c87dc6efff601aff92559065d2ab21d6e1e81e14c1c6f747239829b44c0402228b71c3c721c092320eff18c84786f33e4775103a7533faf8d9bbab957a1529a6c8844ab10f0d78d1f2da8366ebaccc81a79d9f6f6f940c002e7bfa1d569d90c7e98262897f57b6203dc8a5c3def4b45b91bd729b1ba67b5b946fd9ac657e13598c7b7d50c648c3427c782ceea7a9c54ca512f647624363de9c0926166ff065975cedcc8c10a39a47eb5b2534f8cc53d8f7b6ab57eb9aefe59b9ea8e6ff2854bab43af4735bbd6c5", 0xfd7}], 0x2)
mmap(&(0x7f0000000000/0x13000)=nil, 0x13000, 0x5, 0x10, r0, 0x0, 0x0)
r1 = msgget$private(0x0, 0xffffffffffffffe6)
msgsnd(r1, &(0x7f0000000400)={0x0, "a23a4fceaa8d6e1752055e05ca04c2b9c94e9ae7d7ef4477f5630ea6054afe6946c39a89cb3dfc51b9b4ebe35c38271658485006a960364554349aa29c55b25f120ee1a6fb77d23f5f0a650bc32c4e528305abd76aa4874d3ae52e40c927f10048289bb7350dd6fd6690286150890d06adb77626fee0b45a79d6538585a6faf2"}, 0x88, 0x0)
r2 = msgget$private(0x0, 0xfffffffffffffffd)
msgsnd(r2, &(0x7f0000000040)=ANY=[], 0x0, 0x1000)
msgrcv(r2, &(0x7f0000000a40)=ANY=[@ANYRES64, @ANYRESHEX, @ANYBLOB="8231113e20e5a2c6d62093f9652c36c0e1d81ff8ceb1f7788ec1af0c99eb771d9c227d6df52cb89a40d73adeae4f4e1cded1e185790cf09d922484b7713a0ca68280a4b0f2242c39fbb0675942be7ab5bdb7844b507451f8603ab1eb980ec2dcebfda4fa44190091048df12c87da513292b03af1aec6d3ce65d68dff6c3d8d9949d1005bac0c6b16ed9e983015c827f5257c7b9f0f940ee9bb080aca7e61922efc23c80055b92e5287aac1690e64cf2fdd394eb9b3f79d1824000000", @ANYRESHEX, @ANYRES16, @ANYRESDEC=0x0, @ANYRES32], 0x1, 0x1, 0x0)
msgrcv(r2, &(0x7f00000000c0)={0x0, ""/65}, 0x49, 0x0, 0x1800)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202d77f"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
sendto$unix(r2, &(0x7f0000000140)='\x00', 0xffffff99, 0x1, 0x0, 0x0)
msgctl$IPC_SET(0xffffffffffffffff, 0x1, 0x0)
r3 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r3, 0x0, 0x0)
preadv(r3, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getgroups(0x0, 0x0)
sendmsg$unix(0xffffffffffffffff, &(0x7f00000004c0)={&(0x7f0000000040)=@file={0x3, './file0\x00'}, 0xa, &(0x7f0000000340)=[{0x0}, {&(0x7f0000000280)="0f57dd14a49f97ee2d57f0b7e2fb1fafd350b33aa6ab5ea29c213b50b22da212047f1e68aca8340d5faaf67777b2413d35b9fc93e02c7892f85d97e77345bb3ea743b7d4029056ea07871631e13fc7d9146540ab8f43ff0fd5fadfd879a4", 0x5e}], 0x2, 0x0, 0x70, 0x3}, 0x0)
recvmsg(0xffffffffffffffff, &(0x7f0000000180)={&(0x7f0000000380), 0xe, 0x0, 0x0, &(0x7f0000001280)=""/191, 0xbf}, 0x0)


_ksem_init(0x0, &(0x7f0000000080)=<r0=>0x50535244)
_ksem_close(r0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
chmod(&(0x7f0000000180)='./file0\x00', 0x23f)
setuid(0xee01)
mkdir(&(0x7f00000000c0)='./file0/file0\x00', 0x0)
chdir(&(0x7f0000000040)='./file0\x00')
chown(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)


mknod(&(0x7f0000000040)='./bus\x00', 0x800080008002, 0x3d00)
open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
link(&(0x7f00000000c0)='./bus\x00', &(0x7f0000000100)='./file0\x00')
rename(&(0x7f0000000140)='./bus\x00', &(0x7f0000000180)='./file0\x00')


pipe2(&(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
pread(r0, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbd63)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
fcntl$dupfd(0xffffffffffffffff, 0x0, 0xffffffffffffffff)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
dup2(r0, r1)


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = _lwp_self()
compat_60__lwp_park(0x0, 0x0, 0xffffffffffffffff, 0x0)
_lwp_suspend(r1)


r0 = _lwp_self()
compat_60__lwp_park(&(0x7f0000000080)={0xfffffffffffffffd}, r0, 0x0, 0x0)


r0 = msgget(0x1, 0x626)
r1 = msgget(0x1, 0x0)
msgrcv(r1, 0x0, 0x73, 0x0, 0x0)
msgsnd(r0, &(0x7f0000000340)={0x1, "f7cfad6b2bd807db7c7c2752d8f9e068110dc91cc5dedbef96f18ada6fbd698715ca8210e7554e98a1849f6e82b7a429d98565f6e5f635fceed40c7f4fb8571f69a16ae67bb7a429b089fe87f3f193606e3d6447efc6387be72c8f0ef0ef2643369d3877cda605a7c8325cd380068b107c710b5ee69ad690dad0566ac15c8e979f0d8c884462ecea3ecac7e3d6a3ded72b0455926928ff9fb778b9009d6c8f35bb9f569b0ab1aebbe65df478e0a74fea040b9e97646ed064d6ea43b0229189911f5a67d2ce1b44cbfee057a7a8bd9d5f1c2b49ee95c8dcb9efc5c7bbd41ad8603ff9c5f9c50730f416ac"}, 0xf2, 0x800)
msgctl$IPC_RMID(0x0, 0x0)


open(&(0x7f00000000c0)='./file0\x00', 0x205, 0x0)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
fcntl$lock(r0, 0x9, &(0x7f0000000000)={0x0, 0x0, 0x0, 0x1000300010005})


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
__wait450(0x0, 0x0, 0x0, &(0x7f00000001c0))


shmget(0x1, 0x3000, 0x0, &(0x7f0000ffb000/0x3000)=nil)
shmget$private(0x0, 0x3000, 0x0, &(0x7f0000ffa000/0x3000)=nil)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
ioctl$FIONREAD(r0, 0x41946465, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x205e, 0x200)
r0 = openat(0xffffffffffffff9c, &(0x7f0000001640)='./file0\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
munlock(&(0x7f00005f6000/0x3000)=nil, 0x3000)


r0 = socket$unix(0x1, 0x5, 0x0)
bind$unix(r0, &(0x7f0000000000)=@file={0x1, './file0\x00'}, 0x4)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
recvmmsg(r0, &(0x7f0000000500)={0x0}, 0x10, 0x0, 0x0)
recvmmsg(r0, 0x0, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580), 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x4010427b, 0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
getsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f000000be40)=""/241, &(0x7f000000bf40)=0xf1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
r2 = dup(r0)
fcntl$lock(r2, 0x7, &(0x7f0000000000))


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r2 = dup(r0)
fcntl$setflags(r2, 0x2, 0x1)


compat_50_clock_settime(0x0, 0x0)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x6}}]}}]}}, 0x0)


open$dir(&(0x7f0000000080)='./file0\x00', 0x40000400004002ca, 0x0)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
madvise(&(0x7f0000000000/0x4000)=nil, 0x4000, 0x2)
openat(0xffffffffffffffff, &(0x7f00000028c0)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
r1 = geteuid()
fchownat(r0, &(0x7f0000000100)='.\x00', r1, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
bind(r1, &(0x7f0000000240), 0xa)
r2 = dup2(r1, r0)
listen(r1, 0x0)
socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={<r3=>0xffffffffffffffff, <r4=>0xffffffffffffffff})
dup2(r4, r3)
r5 = dup2(r2, r1)
r6 = getpid()
fcntl$setown(r5, 0x6, r6)
connect$unix(r4, &(0x7f0000000000)=ANY=[@ANYBLOB="00012e2f66696c6530"], 0xa)


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
renameat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', r0, &(0x7f00000000c0)='./file0\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
symlink(&(0x7f0000000080)='.\x00', &(0x7f0000000000)='./file0\x00')
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
readlink(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r0, &(0x7f0000000340)=[{&(0x7f0000000100)='S', 0x1}], 0x1)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r2 = dup(r1)
r3 = getpid()
open(&(0x7f0000000240)='./file0\x00', 0x0, 0x0)
ktrace(&(0x7f00000000c0)='./file0\x00', 0x0, 0x40000110, r3)
preadv(r2, &(0x7f0000000240), 0x100000000000030f, 0x0)


__getfh30(&(0x7f0000000b00)='./file0\x00', 0x0, 0x0)


mlockall(0x5)
mlockall(0x3)


r0 = open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
writev(r0, &(0x7f00000000c0)=[{&(0x7f0000000140)='M', 0x1}], 0x1)
pwrite(r0, &(0x7f0000000100)='e', 0x100000, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(r0, 0x0, 0x4, r1)
munmap(&(0x7f0000ff7000/0x9000)=nil, 0x9000)
mprotect(&(0x7f0000ffb000/0x2000)=nil, 0x2000, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
munmap(&(0x7f0000202000/0x2000)=nil, 0x2000)
r2 = shmget$private(0x0, 0x2000, 0x0, &(0x7f0000200000/0x2000)=nil)
r3 = shmat(r2, &(0x7f0000202000/0x2000)=nil, 0x0)
shmdt(r3)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
symlinkat(0x0, 0xffffffffffffffff, &(0x7f0000000140)='./file0\x00')
__clock_gettime50(0x0, &(0x7f0000000000))


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202c17e7f000001"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x1002, &(0x7f0000001480), 0x4)
ioctl$FIONBIO(r1, 0x8004667e, &(0x7f0000000040)=0x2)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
connect$unix(r2, &(0x7f0000000440)=@file={0x1, './file0\x00'}, 0xa)
sendto$inet6(r2, &(0x7f0000000240)='\t', 0x1, 0x0, 0x0, 0x0)
accept$unix(r1, 0x0, 0x0)


r0 = socket(0x2, 0x3, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x100a, 0x0, 0x0)


compat_43_osetrlimit(0xb, &(0x7f0000000000))
compat_43_osetrlimit(0x0, &(0x7f0000000040))


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$FIOASYNC(r0, 0x40046486, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
compat_50__lwp_park(&(0x7f0000000100), 0x0, 0x0, 0x0)
_lwp_getname(0x0, &(0x7f0000000000)=""/2, 0x2)


swapctl$SWAP_STATS(0xa, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fchownat(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x600)


r0 = open(&(0x7f0000000480)='./file0\x00', 0x80000000000206, 0xfffffffffffffffd)
writev(r0, &(0x7f00000002c0)=[{&(0x7f0000000280)='#!', 0x2}, {&(0x7f00000000c0)="57dc10a770d9e890cdcccad84e73728726dd6632032e7dba9ce94b6fbd107ec5f1c367a2e89cc4c6fbfd0cd0a83309", 0x2f}], 0x2)
r1 = fcntl$dupfd(r0, 0x0, r0)
write(r1, &(0x7f0000000040)="04450a", 0x3)
execve(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)


mlock(&(0x7f0000ffa000/0x4000)=nil, 0x4000)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mlock(&(0x7f0000ffc000/0x4000)=nil, 0x4000)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
munlock(&(0x7f0000ffb000/0x2000)=nil, 0x2000)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x8030447c, &(0x7f0000000000))


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIONBIO(r0, 0xc0044266, &(0x7f0000000040))


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x7, r0, &(0x7f00000001c0), 0xaa9)


chown(0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000000c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendto(r0, &(0x7f0000000100)="aa", 0x1, 0x0, 0x0, 0x0)


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
compat_40_mount(0x0, &(0x7f0000000080)='./file0\x00', 0x6c3802a5992fae10, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x10)
sendto$inet6(r0, &(0x7f0000000080)="02", 0x358, 0x0, &(0x7f0000000000)={0x18, 0x0, 0x0, 0x20080fe}, 0x1c)


msgrcv(0x0, &(0x7f0000000000)={0x0, ""/147}, 0x9b, 0x1, 0x0)
msgsnd(0xffffffffffffffff, &(0x7f00000000c0)={0x2, "34a36656d8e7"}, 0xe, 0x0)
msgrcv(0x0, &(0x7f0000000100)={0x0, ""/57}, 0x41, 0x0, 0x1000)
r0 = msgget$private(0x0, 0x45e)
msgctl$IPC_RMID(0x0, 0x0)
msgrcv(r0, &(0x7f0000000180)={0x0, ""/223}, 0xe7, 0x2, 0x2000)
msgsnd(r0, &(0x7f0000000280)={0x1, "79498f4b2b859e52eb0307aef9abc83b526e127ef52bc1d37058017e0b7dad19592e46b830fc6a250e8683a52ef6deec15cf183c7ee91acbd17a070117c3d9a3c7076ee3a39c16f82e2e35dd2b7af56774354673dd6496e57ad185cb05e382f4894910a4a665d21821e56349f4a3784ae5a5579a6a36e435605182c0c1af0683c0fcc498eb6d114756fb97ad193fb6d25cb638672a527ade21c011a4efb6577dd6b8fb4fc59282d7c08074db75418644"}, 0xb8, 0x0)
r1 = msgget$private(0x0, 0x0)
msgrcv(r1, &(0x7f0000000340)={0x0, ""/84}, 0x5c, 0x3, 0xe1700363321d3540)
msgrcv(r0, &(0x7f00000003c0)={0x0, ""/34}, 0x2a, 0x1, 0x0)
r2 = msgget(0x1, 0x42)
msgrcv(r2, &(0x7f0000000400)={0x0, ""/104}, 0x70, 0x1, 0x800)
msgsnd(r2, &(0x7f0000000480)={0x0, "5f39657658d8ff5b9abbffd22674e2cdc072653c8a0f6ba97a466271dc39905edc2bdee1da54d1fe3c64773e986b891ac8dd6b060f5b19e040b4376b8fa26c311c7c4e68bdcc79e7dedfb05d82e44de33122a856b1529772a6cb327a35ea2442ab2982d3ef51a5cfac20091b715eec61b053ad92575149d925b8531af339704836bae11a8d9956139f1ac679f0f7a7ca6fa9a314c9e228b985d86e1b4f59b0dff9618a930921caa9619f9f76d5a3b92605b116027709343374"}, 0xc1, 0x800)
msgsnd(r0, &(0x7f0000000580)={0x2, "169ee9638ca7bd93293b3313372d74a9fcdc9340a28c7e874aa3dfe987056c4542fa4c219ca01e0615fda5eee53ce6edbf83e1bf0bc59ead160c9a2281e7917aed3c5fbbd0063f141d66541d17cc6c757255cba701c8ee2eaed8b6c1d8c68a4ef12df73136eff80796ffce04e6d7e8fc730679d3217429b95ebca82a73ad6d5fa406f4de01834201bf877480aecf5fc4e49214d7325d96737d4d2df4043e9a7955c5de43713c9354"}, 0xb0, 0x800)
msgrcv(r0, &(0x7f0000000640)={0x0, ""/201}, 0xd1, 0x1, 0x1000)


compat_30_getfh(&(0x7f0000000100)='./file1\x00', 0x0)
open$dir(&(0x7f0000000380)='./file0\x00', 0x0, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x3a, 0x12, 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000280)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000500)='./file0\x00', 0x0, 0x0)
__getdents30(r0, &(0x7f0000000540)=""/25, 0xfffffffa)


mknod(&(0x7f0000000040)='./file0\x00', 0x1000, 0x0)
open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
poll(&(0x7f0000000000)=[{}, {}], 0x4e8, 0x0)


syz_usb_connect$uac1(0x1, 0x71, &(0x7f00000001c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000200)='/proc/self/exe\x00', 0x0, 0x0)
compat_43_olseek(r1, 0x7f, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x803c7216, &(0x7f0000000100))


getsockname$inet(0xffffffffffffff9c, 0x0, &(0x7f0000000040))
compat_43_ogetrlimit(0x7, &(0x7f00000000c0))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = getpid()
fcntl$setown(r0, 0x6, r2)
dup2(r1, r0)


setreuid(0xee01, 0xee01)
lchown(&(0x7f0000000040)='.\x00', 0x0, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2003, 0x3d98)
open$dir(&(0x7f00000017c0)='./file0\x00', 0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002005, 0x4300)
r0 = open(&(0x7f0000000080)='./bus\x00', 0x0, 0x0)
ioctl$FIONSPACE(r0, 0x40187702, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x16a3209f7ecaf1a2, 0x0)
openat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0xc1c0526b, &(0x7f0000000040)={0x7, 0xa})


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r0, 0x8004667c, &(0x7f0000000080))


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x2000427e, 0x0)
ioctl$FIOASYNC(r0, 0x40044266, 0x0)


_ksem_unlink(&(0x7f0000000440)="e7")


fstatat(0xffffffffffffffff, &(0x7f00000001c0)='./file0\x00', 0x0, 0x0)


pipe(0x0)
connect$unix(0xffffffffffffffff, 0x0, 0x0)
open(&(0x7f0000000a40)='./file0/file0\x00', 0x0, 0x0)
openat$tprof(0xffffffffffffff9c, 0x0, 0x0, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104302, &(0x7f0000000100)=0x620000)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000140)={<r0=>0xffffffffffffffff})
sendmsg$unix(0xffffffffffffffff, &(0x7f0000000080)={0x0, 0x0, 0x0, 0x44, &(0x7f0000000000)=ANY=[@ANYBLOB="89004cbdffff000001"], 0x9}, 0x0)
sendmsg(r0, &(0x7f0000000380)={0x0, 0x32c, 0x0, 0x0, &(0x7f0000000000), 0x90}, 0x0)


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='fdesc\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
chdir(&(0x7f0000000200)='./file0\x00')
pathconf(&(0x7f00000000c0)='./file0\x00', 0x0)


mknod(&(0x7f0000000180)='./file0\x00', 0x1ffb, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
open$dir(&(0x7f0000000280)='./file0\x00', 0x1, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSKBDIO_GTYPE(r0, 0x40045700, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt(r0, 0x0, 0x14, &(0x7f0000000000)="9d0e6a00", 0x4)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0x82085269, &(0x7f0000000040)={0xfffffffc})


unlinkat(0xffffffffffffffff, 0x0, 0xee89714fba67bdaa)


r0 = _lwp_self()
compat_50__lwp_park(0x0, r0, 0x0, 0x0)


getpriority(0x0, 0x0)


__clone(0x1fe, 0x0)


pipe2(&(0x7f0000000180)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
ioctl$WSKBDIO_COMPLEXBELL(r0, 0x80105702, 0xfffffffffffffffe)


ioctl$WSMUXIO_REMOVE_DEVICE(0xffffffffffffffff, 0x80085762, &(0x7f0000000040)={0x1})
r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x6c, &(0x7f0000000040), 0x4)
setsockopt$inet6_MRT6_ADD_MFC(r0, 0x29, 0x68, &(0x7f00000000c0), 0x5c)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000140)='ptyfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, &(0x7f00000001c0))
compat_50_lutimes(&(0x7f0000000240)='./file0\x00', 0x0)


utimensat(0xffffffffffffffff, 0x0, 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
seteuid(0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0x20007478, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xb200)
open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000001340)='fdesc\x00', &(0x7f0000001380)='./file0\x00', 0x0, 0x0, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
truncate(&(0x7f0000000180)='./file0/../file0/file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
recvmmsg(0xffffffffffffffff, 0x0, 0x0, 0x0, &(0x7f00000039c0))


symlink(&(0x7f0000000140)='..\x00', &(0x7f00000001c0)='./file0\x00')
__mount50(&(0x7f0000000080)='fdesc\x00', &(0x7f0000000340)='./file0\x00', 0x0, 0x0, 0x0)
execve(&(0x7f0000000100)='./file1\x00', 0x0, 0x0)


mprotect(&(0x7f0000ffc000/0x2000)=nil, 0x2000, 0x0)
munlock(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
mlock(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
mprotect(&(0x7f0000ffc000/0x3000)=nil, 0x3000, 0x4)
mlock(&(0x7f0000ffc000/0x4000)=nil, 0x4000)


r0 = socket(0x10, 0x2, 0x0)
connect$inet(r0, &(0x7f0000000000)={0x10, 0x0}, 0xc)


r0 = socket(0x22, 0x3, 0x0)
writev(r0, &(0x7f00000007c0)=[{&(0x7f0000000400)="11926c07", 0x190}], 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
lseek(r0, 0x0, 0x5, 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fcntl$setstatus(r0, 0x4, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="82029d9c00ffffff"], 0x1)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x2, 0x3, 0x0)
bind(r0, &(0x7f0000000000), 0x10)


pipe2(0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000003200)={&(0x7f0000000040)=@abs, 0x6e, 0x0}, 0x240440d0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x100010a, 0x0)
pwritev(r0, &(0x7f00000014c0)=[{0x0}], 0x1, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
close(r0)
r1 = socket(0x18, 0x1, 0x0)
setsockopt(r1, 0x1000000029, 0x2e, 0x0, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)


socketpair(0x1f, 0x5, 0x0, 0x0)


pipe2(&(0x7f0000000d40)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
dup2(r1, r0)


r0 = socket$inet(0x2, 0x2, 0x0)
pipe(&(0x7f0000000280)={<r1=>0xffffffffffffffff})
r2 = dup2(r0, r1)
setsockopt$sock_timeval(r2, 0xffff, 0x1006, &(0x7f00000000c0), 0x10)


mknod(&(0x7f0000000080)='./file0\x00', 0x205e, 0x200)
r0 = openat(0xffffffffffffff9c, &(0x7f0000001640)='./file0\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
fork()


r0 = socket(0x11, 0x3, 0x0)
connect(r0, &(0x7f0000000000), 0xc)
listen(r0, 0x0)


pipe2(&(0x7f0000001240)={<r0=>0xffffffffffffffff}, 0x0)
writev(r0, &(0x7f0000000080)=[{0x0}, {0x0}, {&(0x7f0000000040)="b2", 0x1}], 0x3)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x8004667d, &(0x7f0000000180))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
posix_spawn(0x0, 0x0, 0xffffffffffffffff, 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000100)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
bind(r0, &(0x7f0000000000)=ANY=[@ANYBLOB="2d012e2f66696c6530"], 0xa)
truncate(&(0x7f0000001ac0)='./file0\x00', 0x0, 0x0)


r0 = socket(0x2, 0x2, 0x0)
connect$inet(r0, &(0x7f0000000080)={0x2, 0x2}, 0xc)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r1, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r3 = socket$inet6(0x18, 0x3, 0x0)
sendto$inet6(r3, &(0x7f0000000080)='3', 0x2000, 0x0, &(0x7f0000000000)={0x18, 0x0}, 0x1c)
r4 = socket$inet6(0x18, 0x3, 0x0)
sendto$inet6(r4, &(0x7f0000000080)=',', 0x2000, 0x0, &(0x7f0000000000)={0x18, 0x0}, 0x1c)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x8004667d, &(0x7f00000000c0)=0x2946)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047401, &(0x7f0000000140)={0x5})


r0 = compat_30_socket(0x1f, 0x5, 0x2)
getsockopt(r0, 0x2, 0x1, 0x0, 0x0)


pipe2(&(0x7f0000001600)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
ioctl$FIONREAD(r0, 0x4004667f, &(0x7f0000001440))


open$dir(&(0x7f0000000480)='./file2\x00', 0x200, 0x0)
open$dir(&(0x7f0000000000)='./file2\x00', 0x0, 0x0)


mlock(&(0x7f0000ffb000/0x3000)=nil, 0x3000)
mprotect(&(0x7f0000ffb000/0x4000)=nil, 0x4000, 0x0)
munmap(&(0x7f0000ffa000/0x6000)=nil, 0x6000)


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x0)
r0 = open(&(0x7f00000001c0)='./bus\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x80017472, &(0x7f0000000200))


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x200000000000d, &(0x7f0000000040)="eaff125c00000001", 0x8)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x8008722b, &(0x7f0000000100)={0x620025})


modctl$MODCTL_LOAD(0x0, &(0x7f0000000140)={0x0, 0x0, &(0x7f0000000080)="ec", 0x1})


r0 = semget(0x0, 0x0, 0x0)
__stat50(&(0x7f0000000800)='./file0\x00', 0x0)
____semctl50$IPC_STAT(r0, 0x0, 0x2, &(0x7f0000000980)=@buf=0x0)
socket$unix(0x1, 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0xf, r1)
__clock_gettime50(0x0, 0x0)


socketpair(0xf, 0x0, 0x0, &(0x7f0000000100))


__setitimer50(0x7, &(0x7f0000000000), 0x0)


compat_50_clock_settime(0x0, &(0x7f0000000000)={0x9})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
posix_spawn(0xffffffffffffffff, 0x0, &(0x7f0000000180)={0x0, 0x0, 0x0}, 0x0, 0x0, 0x0)


compat_43_osetrlimit(0x9, &(0x7f0000000040))
compat_30_socket(0x2, 0x3, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
preadv(0xffffffffffffffff, 0x0, 0x0, 0x0)
poll(&(0x7f0000000000)=[{r0}, {r0, 0x40}], 0x2, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setsockopt$inet_opts(r0, 0x6, 0x6, &(0x7f0000000040), 0x4)


mlock(&(0x7f0000ffb000/0x3000)=nil, 0x3000)
munlock(&(0x7f0000ffd000/0x1000)=nil, 0x1000)


mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x2903)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIOSEEKHOLE(r0, 0xc0184603, &(0x7f0000000040))


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047401, &(0x7f0000000140)={0x984})


mprotect(&(0x7f0000ffb000/0x2000)=nil, 0x2000, 0xb5c3864bc94d91aa)


pipe(&(0x7f0000000100)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
fcntl$lock(r0, 0x5, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
readv(r0, &(0x7f0000001580)=[{0x0}], 0x1)


r0 = socket$inet6(0x18, 0x3, 0x0)
connect$inet6(r0, &(0x7f00000000c0)={0x18, 0x2}, 0x1c)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
connect$inet6(r0, &(0x7f0000000000)={0x18, 0x1, 0x0, 0xfdffffff}, 0x1c)


open$dir(&(0x7f0000000200)='./file0\x00', 0x40000, 0x1)
setreuid(0xffffffffffffffff, 0x0)
r0 = semget$private(0x0, 0x2, 0x189)
getsockopt$sock_cred(0xffffffffffffffff, 0xffff, 0x1022, &(0x7f0000000300)={0x0, 0x0, <r1=>0x0}, &(0x7f0000000340)=0xc)
semop(r0, &(0x7f00000002c0), 0x0)
semctl$IPC_SET(r0, 0x0, 0x1, &(0x7f0000000480)={{0x20000008}, 0x0, 0x0, 0x2})
semctl$IPC_SET(r0, 0x0, 0x1, &(0x7f0000000380)={{0x20010007, 0x0, r1, 0xffffffffffffffff, 0x0, 0x100010024, 0x7}, 0xc8a, 0x0, 0xfffbfffffffffffb})
r2 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r3 = geteuid()
fchownat(r2, &(0x7f0000000040)='./file0\x00', r3, 0xffffffffffffffff, 0x0)
r4 = socket(0x10, 0x2, 0x0)
getsockopt$sock_cred(r4, 0x1, 0x11, &(0x7f0000caaffb)={0x0, 0x0, <r5=>0x0}, &(0x7f0000cab000)=0xc)
chown(&(0x7f00000000c0)='./file0\x00', 0x0, r5)
compat_20_statfs(&(0x7f0000000000)='./file0\x00', &(0x7f0000000040)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, {}, <r6=>0x0})
semctl$IPC_SET(r0, 0x0, 0x1, &(0x7f0000000180)={{0x6, r3, r5, r6, 0xffffffffffffffff, 0x7, 0x8}, 0x7, 0x0, 0x4})


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x4004667f, &(0x7f0000000040))


r0 = socket(0x1f, 0x3, 0x0)
setsockopt(r0, 0x1, 0x3, 0x0, 0x0)


r0 = socket(0x400000000018, 0x3, 0x0)
close(r0)
ioctl$WSMUXIO_REMOVE_DEVICE(0xffffffffffffffff, 0x80085762, &(0x7f0000000040)={0x1})
r1 = socket(0x18, 0x3, 0x3a)
setsockopt(r1, 0x29, 0x6c, &(0x7f0000000040), 0x4)
r2 = dup2(r0, r1)
setsockopt$inet6_MRT6_ADD_MIF(r2, 0x29, 0x66, &(0x7f0000000080)={0x0, 0x0, 0x0, 0xb}, 0xc)
close(r2)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x41c5)
r0 = open(&(0x7f0000000240)='./file0\x00', 0x5, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80085762, &(0x7f0000000100))


mlock(&(0x7f0000ffe000/0x2000)=nil, 0x2000)
mlock(&(0x7f0000fec000/0x14000)=nil, 0x14000)
sendto$inet(0xffffffffffffffff, 0x0, 0x0, 0x0, &(0x7f00000003c0)={0x2, 0x3}, 0xc)
madvise(&(0x7f0000ff5000/0x2000)=nil, 0x2000, 0x0)
munlock(&(0x7f0000fef000/0xc000)=nil, 0xc000)


syz_emit_ethernet(0xe, &(0x7f0000000040))
shmget$private(0x0, 0x3000, 0x0, &(0x7f0000826000/0x3000)=nil)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
pwritev(0xffffffffffffffff, &(0x7f0000000200)=[{&(0x7f0000000280)="24a007e6c1b75bf338125a0d4dd453ec28369eeae76139bb096571ea7a60b4abb52e31237ab91f2e20c82395ff", 0x2d}, {&(0x7f00000004c0)="ae55deaab97a6e3f485f446be8eded9eef4b4d10474a1a755c39a0b94c54d5fb4b9d9087307d81816a57c52cf68046df49916634b9b289e4b964bac0e4fac327b0adc7c3cfb348a2b2399acff07a612a4fb8c646d22930ba5353fb8e0836", 0xfffffffffffffe59}], 0x2, 0x6)
writev(r0, &(0x7f0000000380)=[{&(0x7f0000000040)="981a6d584ba8db0a889b2d22ed85347e2808ba33755840b3b4119ae60d28eb27c809f0f017d599facad1", 0x2a}, {&(0x7f0000001100)="e50ce8512f2e8d8701778d5ecd3ee8709d9c2365fe141db057e1f7d4b23626ed89c8b11c4924ef2253fe360811e40ae61b3ab86ae06aa9afa0c44128d4160d23b2f8e91bc7d0d3a1a662e8abd758bb6a80af224bba5aadedcae558a62778e7e51616a41429e8043385babe65bc58c2a882a6faadd5f899511ba0c5087bbf61ca7bbaf6df1539a20492bfdf4b88b8b352c074b5447350d57e63b543e2fd288882028b41086457bced449fdf3f873e014fbf379cd714c164e29b57f00a44a2f1ed334fe96bd78daaf63d5e15b308bda9d88b9b7cd7635e36432333c1feee4fa1c076851fcfb738d181a2086be97275762a182caaacbfef22aaec824364e809ea6517527fd3437a84a2920d77e927008bb974f4fc3ba663a3057213340f244acf3175a75c3cd1365036c415b4d3ac225de1638ebb42d05ede84e8ca6e19e40c2c647f8030aa9ff24b1ac7dbedb1138c1171a68b6a8ec99629f0fe11a1b538fe662a9b758f3b6f2116f19db2437d8c4c78af6f12dc5f0316f6258237e35a00dc44aeb40d792efd5efeaef7948886655804a9aad2ef23e04592253376fab6697ed2fc88ed5c99c609a73df29f411442f74d4f28777d23ca8a954c80bf8ac97fac487dc773fc8dede773fac23e49863c9ed8e70e28b872338ac5e580f877ce7b561e3d1aaad1a0ec7f6f9e788d5c9f93c7e16fbedc911e57a92caeb4f84666dbaafb9925a43236513c8afb79022b91252a5b233fb9888fac17d0244b5efa47a84d6c683c6022000b6112dadd9aabc29f03264822a03f76a92a82f812ce9d5000ce10e315e1b7c3fc03d436c59b7e7fb3cb1a544fa7493a9a8f0a21b4c1a6beff7d2df77d8edc99bd561e2a678b6f6ef4216f03c6bedb8e508dd16dd29587f5b3b23a0278202e2583f2394924b83ff43f0ac25fe96d775998a60e9e7238bc5c9c737bb6a194eeb6f4b4e739881c21d20f24e852abab3817f474f772234b46698c29dc6366cd8ca70ccfbd1a063e2447214155ca24c120d18687388f9a8846c4469d6954a20507cdacd58e57a4604400872ea9deb1cfee44f1c2fdd353bb6f5ac71c49e4fbffe4f5761497704e9437cea4dcfabbd0c17c47f7b51f25526a0d703317188e732824601c570e0d82601202055225f703b937865f42a5360d9f989a1241c49cce4270f7af7bab25007eb80fc576f8290fc08d76b21eaec0b908ff67899e3131a7ce7a8cb41f319d2f2cabcdbf8b37a1eb7aca83fd605c903e9b9b94b9c353e4aa23fbadc5a0c495838768331811949d903362ff846176211492d124016c6e42e01918174ce4c1d57a60ead0e8aee422420b22b74c1da4e54a29d259d0754f84c83be7a8380a66952466c8d2b605d94c75422c8fbe02f53f52e999242466ef09d89ef01e3085352f23220fac1c4a4713279129e8133efac2cd9f17ae7f592a32cc0e69b54e98fff07395cfb252146c725b7923af7f0c601730d85db77aec5ac12fcc4f692e6ab0e2639bf0490e0bfe27b72c9b57ebe23c8e06fa2e8d59238f40a1d4592405cee01dc36a3e2c61f4b082785f61c7a1db2a601d47e493bac10f1ece42f6deb33d1927c3e4992591e4c270eb305d27a384437cfc2974531b2bf474010e7ad5e4103a3a2aea5e3b98df0c7685ea99a5d74898d66421d7680129f5aa7e47f65e0259983bbd931185ef1cc1bee92953b3a756067696201eea4e15a662b11ea5e00e6a470c71b998c67b3f791642a4ca37360978382bab5c7c85f007516ca468af81f74c19f792ded82fe7c6d8774ff789c53e24a868e07fac1f9800e8a815471d9cfb7782deef7126d268abd6dd04265d2a460a223cb9fe5cdc2f78d240e820307f3d26665554207ff8d0b77f834f9ed6f067f94b6baa6036e77860a02cbff9472d7a39e1ef4afbb5bc0302411090194ae603a234d933c50649abd8ac825d55d7dc384c78f555168870b47de0df4a89ee94d2636c91d054cd3caa7235990f907a7eb192c6dd0e2c7aa64122f72d3995d9188a4648d9c384717c5c7cdab8f5e24821836d99541c319ddffa52046a0cbea97db1195143bdfa9a24204e688d64b607d2d8f435a242eddd7585b791dc7ba2a3406bfb86a80c4e5289cf2424393e518fd7fa9ffbc093df758e1a8e9884cea3ed10f81961acc9b7df459e29a6f28f67436d86139e0853b0685a04ba43f4cacd943a41e1c1acb049429b22ec7412c5ac5f6890dc2f89048cb561b57bed156c77de33747c7950ee7944e5e7da8f1d736dcf5b26971d6e7a577d812e8c873155a70d85283c078006f87a0ee735cf50794667e3e5f415caa9e6171ab61e3425a50dd5e43c4f7c51530c9175cb75acad2fb3b85845b5c608821a63d93bd12a6234677b5cbb4098c00b9c869e3681c4cc865d9ba8f8a80ab317efe6652ddef53a1abd83234be48be139afeb5eef18204760ea71e697d8dd36dde38c7fe6913999ea8b5a8b1655f05e1796080969f9b9a0575186b37ba525700747e6329645aa0fd3e329d770d1031d98fbe09e83c40cdf7a2da646a933d6f4be4fa6fc065c13d045420e9a73fdaabe273dfe7d01666b619aabdbff824d527b4477c48215382f18ddfce268e0c907dbe68111a56906e7917d1bf9d6808e740b2ec7a82bacc051df14fc330d7fcf00fded2676158217349c79724408fdab1bf6f22ccbe2babe12d376d770aeee0830cec905c24692895e1c45d68c120aa3a5d071d205612d4af9fb4c05a41f5290207168b902db39dbfde95b7b95f5969d30e2bcc15989a1789254e5d1439e4a2d813e791e39913dd960528ff199bb6fc4d9e1f7c220a0dccff86bd04e7f4449046aae8baa2036f21c2a4929650a0c1edc32ff8b913c492b6127f236316911fac47ee8f478c442ae70fe9ff11b04ac450925204e35dbdaf1872e189e23ab5760fb7794ef5973c74a6fe977864d14e78ea89fecab05b384359343e7043deb47fa09f55fc9970093a739726d6a6cd3dd08dc8a9aba5b0f2c33018ab5346a2cf1efe13a3ec20e0c465552b2e046586ab9211dd853ae55817ad0c50bff690dad85e9be57cb3f35c8edcf2fd0d780d53f018a1d92355a7e6777c2809eca942409018f84f39449b5b8ed3bbd676c691b61cbb66fff9ac8e53a3067c00d53916801bd319e1e36db36cf40be70c80c9a4db429935ff72a249039efb072fe065f65d6d60bec152cee8838eb15887c271f89a7c5f9100cecc8ef835b071c7e5eb1a5c1c64cb69d3e9b122f72cbf02dad4fd1d962b9047796d8daddf121a3aa185c743f4f90d0ea5fd03994caf312cf8f030f09422f3d3154777bd85a588308d403e1d92b3c7e07b682b40e1d7eee339a398bf066ec1f79fca7b0c997d951494550934443ae0cf922969bfa92f040e105e3dab1d525c6d6f88d690a13a36823f82255f3deb6ccdc0091add382e13b27a5aa450d2fd33c3a9c4204b8dd26fe47aed4633e20ca91f978b76c23d17da9121ce68efa898f038f225cf2e064ad8c9c7c699c29e19bb66decc13ffbbc489b5c86c5d571fe694c941063a6198afed1433fe30bfca332c32ec030e6d7cc780f8f44dcec78e079b0bc86fff3e965350f84c82dc3a8fa55198175a5cbcabc370505dada469b58637d985afff49bd0ea5d44137f9370248f3b42f0b5116c1bca81f63a0af630af146ab1e221f0d97099eb7d9fde8353534799ebbdff97b2ca7683cc32390eedf7086f7d9347bffe3a7ea5e069cca29cea2b6b086e746bf27807d7f13a615440894dd42d47577176ec1a1f587f298e3211d09d5ac552013154828de0aa45373c818756b988054432081524232be2958c0839c99b043443975f17986bb21aa84147fc542bb3d4c69857d2085f47871280ea6bca425230683936b7606de3d5d4564aff740a1c4242bacb81539ac694735b5abdc118d4462b93e8b9cfa9c415230c86c0c4f9883e985d994c17d92468e6ed08656d45063cb7e025229d7cede48f8f5f6954b34a5adffb327c44589643dc04cdf681b47390cf355a40b369df6134bd038486666781ee011d8dc98856a0e0cc3b9b29969f6a59dceff816139c2886484c635386b4ac82f2bae6f5e17dd7fbd53aada5ae19a57732ca2aac53303ba60556efa202e049525d131bb2ef4eafa1a4d15a3833fd81ec3b69fee9af5c8e33d1614e02f2d1412a5b8ad88a61e38b1f6279008f0d3c3d1462b0f749bf5cb6d3fc7927b2c543b4fddb71b160ddbb465fecef53f677c24fce598997b30d432238e7a6b041c3cfbc3b7cdc9eda589039c8b20d16db749bca16c59c06a8f9450ba1a32f96f85e5bd8fe09bae7017d7d285344d717ba3db0f56269db435f33c0b263707d0cf48fa1f81d1c8db2bf1ae58a0d702b16d7417489559c44ced9ca1e6cb4db6d198a92171aaa59332b84f24dea5dd7efc7b247974d59ba7a2c1d0d654319c146ec58b0959254bf12fe7cdf4160d1809e65e5c92b7f773f29bfecd0bcc4725339b746ffad0e40471d286fc0d2c4abca94923293b7f2b66d279512edf944df50b83b0b1a54ee5b5cd5557c898ddfe28619fb3ac0f168f3f5d20da4bc9bf278176ae97e2255fb3b4ee055d8b38bff52be27e430213aafa776e248a29b3e1086e939c3803dcf33c5bb73a8f03337d15449b8430a0615d1ed90fe697e859ec7754a3a64d06379dffc1fdac02ad3dcda98a9dcf832fc929f705e215252b49826699b402c436e11768eb93ce3a2a6bad2324edb474e47c00a3253d3ee1fa3efbae1586f9f20f484b744201d9768a05f91c80e982e325fd943ae931303d5480771f6e498b4c03b8530abf306910641764b302126b3315fec4d7379e2af7a6ac3d0f031eaf3192f230b7baabb6d18831398c83f4aba6bba907d439b87d0924e220681ae4f1bf99915a5f3d3faf11e698eda101bece2d1ab85aeca4b55ec6919b213282fe96c81ef6c745baedcc8ee229497c7b2c4109798c7cc2d06798da808ee3205a9aecae7106a8c0cbb919538f14adfb4574372f77ebfbb6f2eb95d574cc16aa3b598ba35dc48402d776888cb6210bd2aee4bc9710ca08d41ac162380878aafd711d18ebe76b1907ae10f822f0efaf81258ef37a306bde0d01750eff1e5c8f2bad9fe1c0a1d04fc860ef1ae46afdcb86def6f4b13bbbe9a7ce027c690aeac0134f7a5908c8e38d74d687a148153705bb5cdfc52f17030e899863666c4c6f3dabbad2d1b43d2580969012ba1c960f5d1411b47c1358eb979a8bf78953836efc7733559d54d83d43bd777c60bc2df3880de7075016390e76e5f6eff17fe83aca79b57084592673ed96ddc4ad2cbaa798494c694ce35ad6ee4b3846dc898aae607eb006a0b1045a608134a2ee84e7bcd99fd9a3be05e9e0320fa3451a483323529306d934f8d4c6c1a18458436e7e52ca4f0bcb1d37a27453c35ff850d45675489d595d8caebc707dcdc2202eed60c87dc6efff601aff92559065d2ab21d6e1e81e14c1c6f747239829b44c0402228b71c3c721c092320eff18c84786f33e4775103a7533faf8d9bbab957a1529a6c8844ab10f0d78d1f2da8366ebaccc81a79d9f6f6f940c002e7bfa1d569d90c7e98262897f57b6203dc8a5c3def4b45b91bd729b1ba67b5b946fd9ac657e13598c7b7d50c648c3427c782ceea7a9c54ca512f647624363de9c0926166ff065975cedcc8c10a39a47eb5b2534f8cc53d8f7b6ab57eb9aefe59b9ea8e6ff2854bab43af4735bbd6c5", 0xfd7}], 0x2)
mmap(&(0x7f0000000000/0x13000)=nil, 0x13000, 0x5, 0x10, r0, 0x0, 0x0)
r1 = msgget$private(0x0, 0xffffffffffffffe6)
msgsnd(r1, &(0x7f0000000400)={0x0, "a23a4fceaa8d6e1752055e05ca04c2b9c94e9ae7d7ef4477f5630ea6054afe6946c39a89cb3dfc51b9b4ebe35c38271658485006a960364554349aa29c55b25f120ee1a6fb77d23f5f0a650bc32c4e528305abd76aa4874d3ae52e40c927f10048289bb7350dd6fd6690286150890d06adb77626fee0b45a79d6538585a6faf2"}, 0x88, 0x0)


socket(0x25, 0x5, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000040)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
ioctl$FIONREAD(r1, 0x541b, 0x0)


clock_nanosleep(0x20000000, 0x0, &(0x7f0000000080)={0x0, 0x3}, &(0x7f00000000c0))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000002540)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
setsockopt(r0, 0x0, 0x2, &(0x7f0000000040)="17331219", 0x4)


mkdirat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0)
symlinkat(&(0x7f0000000480)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000500)='./file0/file0\x00')
execve(&(0x7f0000000040)='./file0/file0\x00', &(0x7f0000000100)=[&(0x7f0000000080)='*,-((\\,+\x00'], 0x0)


mmap(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0x0, 0xca521623b0dbbf8, 0xffffffffffffffff, 0x0, 0x0)


compat_50_mknod(&(0x7f00000002c0)='./file0\x00', 0x2000, 0x4135)
r0 = open$dir(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000000240)=[{&(0x7f0000000200)=""/1, 0x1}], 0x1, 0x0)


pipe(&(0x7f00000003c0)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
_lwp_ctl(0x0, &(0x7f0000000100)=0x0)


r0 = socket(0x18, 0x1, 0x0)
close(r0)
preadv(r0, &(0x7f0000000500)=[{0x0}], 0x1, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
pathconf(&(0x7f0000000040)='./file0\x00', 0x11)


r0 = socket$unix(0x1, 0x1, 0x0)
getsockname(r0, 0x0, &(0x7f0000000240))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x8008722b, &(0x7f0000000100)={0x0, 0x620000})


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
getsockopt$sock_int(r0, 0xffff, 0x1000, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x8008722b, &(0x7f0000000100))
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
dup2(r1, r0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
chflags(&(0x7f0000000280)='./file0\x00', 0x0)
unmount(&(0x7f0000000040)='./file0\x00', 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x8004746b, &(0x7f0000000140)={0x5})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$inet(0x2, 0x1, 0x0)
r2 = socket$inet(0x2, 0x1, 0x0)
listen(r2, 0x0)
dup2(r1, r2)


syz_usb_connect$uac1(0x0, 0xd4, &(0x7f0000000180)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0xc2, 0x3, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{}, [@mixer_unit={0x8, 0x24, 0x4, 0x0, 0x0, "5f07fa"}, @extension_unit={0x7}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@format_type_i_discrete={0x8, 0x24, 0x2, 0x1, 0x81}, @format_type_ii_discrete={0x9}, @as_header={0x7}, @as_header={0x7}, @as_header={0x7}, @as_header={0x7}]}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@format_type_i_discrete={0xb, 0x24, 0x2, 0x1, 0x0, 0x0, 0x0, 0x0, "e4f343"}, @format_type_i_discrete={0xb, 0x24, 0x2, 0x1, 0x0, 0x0, 0x0, 0x0, "3ea140"}, @format_type_ii_discrete={0x9}, @format_type_i_discrete={0x8}]}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, 0x0)


openat(0xffffffffffffffff, &(0x7f0000000080)='.\x00', 0x0, 0x0)
openat(0xffffffffffffffff, 0x0, 0x0, 0x0)
connect$unix(0xffffffffffffffff, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7, 0x0}, 0xfffffffffffffdee)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x4)
r0 = socket(0x1, 0x1, 0x0)
close(r0)
close(0xffffffffffffffff)
r1 = socket(0x18, 0x3, 0x0)
setsockopt(r1, 0x29, 0x3e, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)
write(r0, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000005c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmmsg(r0, &(0x7f00000003c0)={&(0x7f0000000380)={0x0, 0x0, &(0x7f0000000540)=[{&(0x7f00000004c0)="1fae206f42eabaa8cf2e", 0xa}], 0x1, 0x0}, 0x9}, 0x10, 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f0000000140)='./file0\x00', 0x0)
symlinkat(&(0x7f0000000340)='./file0\x00', r0, &(0x7f0000000380)='./file1\x00')
r1 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
r2 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
renameat(r1, &(0x7f00000000c0)='./file1\x00', r2, &(0x7f0000000080)='./file0/file0\x00')


syz_usb_connect$hid(0x0, 0x3f, &(0x7f0000000600)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x20, 0x5ac, 0x214, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x3, 0x1, 0x0, 0x0, {0x9}, {{}, [{{0x9, 0x5, 0x2, 0x3, 0x0, 0x81}}]}}}]}}]}}, 0x0)


msgctl$IPC_SET(0xffffffffffffffff, 0x1, &(0x7f0000000000)={{}, 0x0, 0x0, 0x0, 0xffffffffffffffff, 0x0, 0x10000000003})
getgroups(0x4, &(0x7f0000000040)=[0x0, 0x0, 0x0, 0xffffffffffffffff])
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
setsockopt(r0, 0x1000000029, 0x2e, &(0x7f0000000040)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)


mknod$loop(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x0)
link(&(0x7f0000000000)='./file0\x00', &(0x7f0000000040)='./file1\x00')
rename(&(0x7f0000000140)='./file0\x00', &(0x7f0000000180)='./file1\x00')


r0 = socket$inet(0x2, 0x3, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x1001, &(0x7f0000000000)=0x1000000, 0x4)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
setsockopt$sock_cred(r0, 0xffff, 0x1022, 0x0, 0x0)


r0 = open(&(0x7f00000000c0)='./file0\x00', 0x205, 0x0)
fcntl$lock(r0, 0x9, &(0x7f0000000000)={0x0, 0x0, 0x0, 0x1000300010005})
open(&(0x7f0000000040)='./file0\x00', 0x14, 0x0)


pipe2(&(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
ioctl$FIOASYNC(r1, 0x8004667d, &(0x7f0000000080)=0x3)
r2 = getpgrp()
fcntl$setown(r1, 0x6, r2)
dup2(r0, r1)


r0 = socket$inet(0x2, 0x2, 0x0)
r1 = socket$inet(0x2, 0x2, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setsockopt$inet_opts(r1, 0x0, 0x200000000000a, &(0x7f0000000000)='\x00', 0x1)
dup2(r0, r1)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
getegid()


clock_nanosleep(0x0, 0x0, &(0x7f0000000000)={0x0, 0x8000000000000001}, 0x0)


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x6131, 0x4000e03)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIOSEEKDATA(r0, 0xc00c4602, &(0x7f0000000040))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000840)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
poll(&(0x7f0000000280)=[{r0, 0x2}, {r0, 0x80}, {r1, 0x1}], 0x3, 0x80)


r0 = msgget$private(0x0, 0x0)
msgsnd(r0, &(0x7f00000006c0)=ANY=[@ANYBLOB="00e1"], 0x70, 0x0)
msgsnd(r0, &(0x7f0000000840)=ANY=[@ANYBLOB="03"], 0x6d, 0x0)
msgctl$IPC_RMID(r0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0xc008722a, &(0x7f0000000100))


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
mkdir(&(0x7f0000000000)='./file0/file0\x00', 0x0)


r0 = socket(0x18, 0x2, 0x0)
setsockopt(r0, 0x29, 0x9, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)


r0 = socket(0x800000018, 0x1, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x200, &(0x7f0000000000)=0x3, 0x4)
r1 = socket(0x800000018, 0x1, 0x0)
setsockopt$sock_int(r1, 0xffff, 0x200, &(0x7f0000000000)=0x3, 0x4)
bind$unix(r1, &(0x7f0000000080)=@abs={0x1f95d27d48731892, 0x7}, 0x1c)
bind$unix(r0, &(0x7f0000000080)=@abs={0x1f95d27d48731892, 0x7}, 0x1c)


r0 = socket(0x1, 0x4, 0x0)
fcntl$dupfd(0xffffffffffffffff, 0xa, 0xffffffffffffffff)
fcntl$dupfd(0xffffffffffffff9c, 0x0, r0)
setegid(0xffffffffffffffff)
seteuid(0xffffffffffffffff)
setgid(0x0)
r1 = msgget$private(0x0, 0x110)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000080)={0xffffffffffffffff, <r2=>0xffffffffffffffff})
getsockopt$sock_cred(r2, 0xffff, 0x1022, &(0x7f0000000140)={0x0, <r3=>0x0}, &(0x7f0000000100)=0x1)
seteuid(r3)
socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000012c0)={0xffffffffffffffff, <r4=>0xffffffffffffffff})
getsockopt$sock_cred(r4, 0xffff, 0x1022, &(0x7f0000000140)={0x0, 0x0, <r5=>0x0}, &(0x7f0000000100)=0xab33673602b13e1e)
setsockopt$sock_cred(0xffffffffffffffff, 0xffff, 0x1022, &(0x7f0000000040)={0x0, 0x0, r5}, 0xc)
open$dir(&(0x7f0000001240)='.\x00', 0x0, 0x0)
r6 = open(&(0x7f0000000040)='./file0\x00', 0x70e, 0x0)
pwritev(r6, &(0x7f00000004c0)=[{&(0x7f0000000080)="0002f350d37c45941a3c212047eac32c93b5433b483534a185cd98917be6019bb60efd4e3e363ba6ee0a236c879130a347b4f3ae02797b", 0x37}, {&(0x7f0000000100)="fe0232df69a0903826595070899e67b312e9829b05ef44c5d65d1d1c57fb70d9b4c2b95cc2e85c68b1ffc1f98f708800f5620c195564c17a952089bbc92024c8647520d6ceaca961a3525ace928f31363783402d9fae3b350409bf91adc7046aa145fbf949b0b84c6bc931c8b287508a09ff2aa2f5ae9cf504efe1dd28a25ad89128a943be72b714e7f98a895e30", 0x8e}], 0x2, 0x0)
mmap(&(0x7f0000000000/0x13000)=nil, 0x13000, 0x5, 0x10, r6, 0x0, 0x0)
msgctl$IPC_SET(r1, 0x1, &(0x7f0000000000)={{0x1f, r3, 0xffffffffffffffff, 0x0, r5, 0x202, 0x2}, 0xde3c, 0xffffffffffff97fd, 0xffffffffffffffff, 0xffffffffffffffff, 0x1, 0x0, 0x9, 0x8000000000000001})


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x2000740e, 0x0)


socketpair(0x1, 0x5, 0x0, &(0x7f0000000180)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = dup(r0)
shutdown(r2, 0x0)
sendmsg(r1, &(0x7f0000000140)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)
readv(r0, &(0x7f0000000300)=[{&(0x7f0000000340)=""/110, 0x6e}], 0x1)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x13, r0, &(0x7f0000000000), 0x0)


mknodat(0xffffffffffffff9c, &(0x7f0000000400)='./file7\x00', 0x0, 0x0)
mkdirat(0xffffffffffffff9c, &(0x7f0000000540)='./file7\x00', 0x0)


semget(0x3, 0x0, 0x6da)


semget$private(0x0, 0xeab614c76b9728f8, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc1c0526b, &(0x7f0000000000))


r0 = compat_30_socket(0x22, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r2 = fcntl$dupfd(r0, 0x0, r0)
write(r2, 0x0, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x23, 0x0, 0x0)


__getcwd(0x0, 0x1c)


r0 = socket(0x18, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x100a, &(0x7f0000000040), 0x4)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIONBIO(r0, 0x4010426e, 0x0)


r0 = socket$inet6(0x18, 0x1, 0x0)
shutdown(r0, 0x2)
setsockopt(r0, 0x0, 0x0, 0x0, 0x0)


r0 = compat_30_socket(0x1f, 0x5, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getsockname(r0, &(0x7f00000014c0)=@data, 0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
bind$inet(r0, &(0x7f0000000000)={0x2, 0x0}, 0xc)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0xc6a8)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = dup3(r0, r0, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
ioctl$FIOSETOWN(r1, 0xc0184e66, &(0x7f0000000100))


setrlimit(0x3, &(0x7f00000000c0)={0xffffd, 0x100000})


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80045721, &(0x7f00000000c0))


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x40287446, 0x0)


fchmodat(0xffffffffffffff9c, 0x0, 0x0, 0x4)


symlinkat(&(0x7f0000000000)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00')
readlink(&(0x7f0000000100)='./file0\x00', 0x0, 0x33)


ktrace(0x0, 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r1, &(0x7f0000000200)={0x0, 0xa, &(0x7f0000001300)=[{&(0x7f0000000080)="d3461e2432f1c4e415f6b177c5033ea95d8f8216c5e90eadaef915a5380a163f0fffb2de210700414e3c0875645883eb9642a1e271f6f57e3306070dce3e2d48bbfd156c2274913a34d74357709d49304c59e5a9cc9fcb1da9dc68ddd30460e4d18b0307877b96bbe073c88f0785d592ec98469dafb80bf9ebc45be7a36562ac8165ca6631700711164cfbbd9585481b814399a6ec5de0512fc90cc23c6706", 0x9f}, {&(0x7f0000001340)="7c53982e263d239fe91b2c95e6011d10df89c75e600d69775f5b605f3716f1d35935dcd078c7d57a3890782a6f3ae3797d522a895f228cfa18eef47fd4dbf1af81905c22f54a9c8eeacc444bdf93b390549a00d0523a16c724e92faa771ad65334bff9f6f8ca0b965b3c4663bc4dde0a265ad52ef0be00447788811733", 0x7d}, {&(0x7f0000000300)="de480d81ef669b73c52121015e73c4eaabe95d3eb3fffee9ea54ec71559427da1ece316c50e480a9f8b782b3922d1cb4dd54f864e07852e1a5a80f946d2fef8e72ef5cde836f12a44669e880f0181070f55b995c0c8386286a111abd63e2cc04ecb685153a2a59338a8594d6655a78d252f3fb7347e4f77e959603b031e4669bedc5f93d7dc4d63fd89cbd4b9791e0cadec5220d5ba6116f83829d94040d947444b281e20e9039fef254b96346f9e42171cb76bbc0456892a49502a43dff64d1182960c17031871d777dc7eeba4ee80bf69fb58e13b92eb2859b44094f1d3c1120d7b0c6ba61e7c5c1394e67348d59e553669e1afeb50c5ad263b1abbb40b57770f738b860e7b47e777d908fe671150095dc5d79a464f7d5a89d64524db8596db64bb47be89068dbbceec0d042e0972f629965ec4cc4ad0fb07e8d15a02a73ba6396ca9c898ed33a48d6ae7f9b7f453c8a3085e1269ff0553eed55459297fde8f55a399e63e48d970233cd9a1f368254f8e0450488dc750b2de2702b1807cd57be9d8575f84a8ee468ee13614877e17c28e4212d6d0fafcaabaf80ac38aafdbe7905a7bf5ce6b33a9a866bd74d8b5038daa75e0348d8afda7ea5d56b1ed69165b8f611ea19fe31eb65f4bf85279570021a6916935907a14de8d69b1abc88a1239eea1831e2ace855efb1312a0b6c2e1c9ae83511d324d847212199867e2561b21b303616e2d33045eb4b8e21e59eb932c99a8097daece1cc6d6bfd646ad8209690f1fe53c341f2e1a2ece944e50049ad07b71b07b5ff578c84eb9938ea18344bef5b905db7585a084bf7e24a627e35991539c4bb5b8ad24bc2c9eee1685359238497b27447c213b57660efb4b85782a9d15398ec9e61cd77eceea5232e4610a023294c95e908866ec3b096035835bbaaabb70265ee4a2c348c778281903806546b84c3a59c7fb4f633933070c4491af21a1362b443e30f1e4bb444ceba1a903dee5fbc992da20c3a60127b3bc696a8e358b46a59cf117dc68268a5f0698d305dc814a7945830467ae9a62233ac9d527fe8c1f873d8f41b6bd88d4635389fe172ce1bcb43a10c22cb9cda85df7f6247d3d15c2a5bf1c9620a594a3559b7b353e317364580120df2f3787de6f6386122126984b87dd6689dc418c6d352cdc644768e13c4b32d6cfda29e8b17f6a5914df124a20f0949c85258d5537e53ee00e21057d93f97bb53dbb23c6c12c439f485ce807c063d8966acbfe703c676723a8816995a971836ed51c5362ecbef1f4075548cf4c284740b3bf71e00d6749085f065ea448dd5f2e489d8c307c2310f4c53b1c598d07b1151194f63043c6e881d33644576a3ad3a1cd13d944293ee6781dd2a62a45df5e5b596caed684b0a5842e94114a308faf22969525bdda95ccc859081545a8b2fb5f2436594c089d7bb1a2d1f141c51e7a0707245a3b611c700ca8f7d04f68d36cbca9a731761914c8a3a72beb5f86aaff330e550163f547681014f7663bef548e22aae1101a5903eac52bac950fa41e4e63a6059d1f327f71ab7e9a4596144099f79fa1e883d444abf4011e5cc1183db76b5c6b7c3ad7315e3dffe36d498cb203aa83c1f973db5c0ccf0108d837fd493c274ffe4a900381265cecae6cef1b4a0642ed09c28b4646504580530af16c60d473ef91c5f7159faf9273765acac58434c52d33d911a8a6b0262e43f502765486700f63c7daa15dd92b269c94d4dfc1271eab2d495c9fe3d991e844319add091825c37175b256ea076dd068ede9457fe2e8559da17a4f79b76dfe744c4386aa97b6a36121fae50c7c51bd7c74cb6298b47b3ebad0b3a5ef420b434e84c167354b9346f8adda492ee159fe10717e53fe499995218fdb492d45f43f9c34e58d960206f70e44aa086020dc71efe6b5d1d0c81c0bf3fd9333afa75aca3e001a4c719e48c68c73fd3ad16a6ea9b03ae2e1cdc26ac0f7d6a16908b4b29601043c78c439d209e7481ba4f1f45846ee6f52044ed5995c7cdaaeefebfc4f34458f992a5e20c61d06e273faa0af6514a2ad2bf2010d69996cf70ddc447fadc388e3eeb44139628040fa9a3dec35f883164bc5665efae13bfde40432088181bc90223e5663200c3b0e80261b81d7e0f731037db32459cbd545617319aff256742f0d36b6a4ae2b75e1ce1d9d507e8c991addc95b37cfb94b99a175c1dd9e4f97f1c3df06463a718f180f09f28c576fedf968ca90b59094ffe5cf328afea31fee0f6aa1fdf310e03ecadf6a6105e5aefc44e1dda878f5264d405a4d378411d3be6321abc44be7cf30662f6ebfb3bc50429a8d68edb93e37e008028d020385119a950fb19de0f0d0bfd25bcbf8d128c9aa0c3cf380f6e6d4c32a01a11f9fb71ff4ebfe137052a27b9ef99c1d2e51877ad979eba3d9b445b078059ab6c8a6a4a0836173c569a96c59c515a7de3f3a827f9120d270a0c7ebcd2489c4d5e63aab46f15b4e491e92764aef869de78558656e20197f4d785ca61d05fd89d66c7bd76843f2227fee8798396dab3c7712c93275ccdef9c2baeb69db5f4f0376c44d76acc09e1d542dd1a7cf1d137442d39af8b4ec2cd693902d54e5aec8749eeddc2e4fa42f8445d3b8db540665a8261c825c8378dbe8a021d0db768dd0352ed42863e41348886888402fa8977f4b3f59a3d5cf40233e844488c7268a1129d54df1bf0f9ed75be4ae294c75209dd24babf35d73220c0363bfe77a95f24fada4f94ec56865def1ca99830dede307c40a01406c38ffdba1dfe4386ebe6f9c9ccfccd5ac41b8e984282577fd1c4cec02c59823155e50ce5455833c043d419bb710e6f3ed1880a55d5b34eaa9f5b47ca569d7da82f23560bd5287e43bd2c7ba357dc381a799b3bfb2999ba922c323e87184870be0fc0ae12e1470ebe131a664a918cb9276f60eb0bdbe03f44dea1c59d94162fa4609b31c6383a318b2e3bcf117a65b552cb5451e5b2d06c0299fe43efe8020c5bd1570b79d376a3d74a09977e8c1c61ee8c0a27602af44d00c51585bfdc27f52b50d0ef101e3a047f97b3238e50d1cf7f36dc103f234ca7efa463e891b1c61a561acf907a1fcaa3ece4c6affc05d4a5f134b829cbe0166c0c7b950d1aa0eaadeedc1ad47bc9a439eb95b3e3d54fb8e13241aac2187762bdbf7af249efb4c837925feec364271d153bfed144f7914c92d34bff3607fe246740cc40fad67bfe21fbd9724cd98de422bcf7d1fb62dc4f99efa87ee23742da53a5fdacc53d763cf5b0c81139f639c5de1599ac8beaf3a3b2bbc745d7ea687cded35a6475e32e8434e020816d1316e75efb17273e13fbf8c1e314755016572261963927a5ae8ad0eb0c34cdec45bd389acdd3f9d7e156b0bf83e0117a3278bcf0266e25bd5eefb8b0884b59f9c48cd8abeab6b906bc2b21ff60f759b2c453fb55807168ea68bb8a815acef9a97a792622f54a91ea31808f88ff505cc8d4d75a57582d4b20315ecef96d29c177b3cefd24237458995f07e371ec3010a35b034ea733740ca797cbeaf63e2644deaa5cbfee2f271b05e64494e4278c4c6381ae0ab2dc8d07ae69b150816a066bc06d43fd9b10a0f03ad4250feb7782366168832615019b274d3c02f0565c7ea538003685613e8bf67692394397bacb56aa7b789044afd9686cb84476d408a1d32b1d439c3d22a87320e6f19dc306692ad642c66592fe565ff195b079a1f0066a80263ed5651ec2b5cbfc8ac3eb65a870c815df77cac6001280f029c76ece448e235c7a867a0341d46f4feba8df1fe93665d5a4cb1d8a8c7c569ad25ac55ffc2920eafe858683bc407a65cd71dc4caf1fde5c4ae529bbf48d81d3338dea59575bd59c4b7a32a262c4caac4ebfbd85785122a1a81bfee939f9187687fd0ec1bc1f21c19607cd3ee6979bf7689a6a2d1a378302f603af939310acb7ac5feafef71cc6834b51f697a33d03b43d5742b5222f37a083af0a16a6ad0adbbfd2fdc79156b12b0cbc11aab47ffd9df4e323980741bdaf62f445dc30d4e62b5a70d6f6a88ebad1998dcb43801d5ffd04d64a1f07d7db2a41abdc0a62a34a9be513999c79b5046427e92189cf52c1cde44968496ee9a2c301917a62cb87f241398073b376e16701dbbe8d64728bfd7493db8235141c798cd9595ad567b7b79dd3208900a156497cbc03c735b1ca8f5292933ed9d44d40770c601bc0ab62ee0536b97d11199c659c94e5b3fcb4cc6fd783f310ca6ad18e4d583ba1a0c72023b964fe4808df7115ae7a5306b4a240b64397865d061b6ee374bceb883ea7d1005681e9f53826c4b1fd2c18f9f9247d412d46345e3b5abff19edda3aa23e2c36f618336c46b779ae3059da7d37f6f400d12bf30bf2dd735b2b579b4ef85b65c17c988eb2249dce6eb196e51246df735ba2bbf3e9863420c1069dc4b6a594a2f87114e8640508bc7441e27ddbc7cfa9d786fabd08a1bd3da1e878a3deabdb11c8ec4be3ba6e7eea7e9d94963d0065942192c85d03a3f09205ddf92206d66ccdf4441eb68e423083f39bd28167b0249879da93a9727f22a789910e14f011e95a25f3fcb3384444455737e4d727edd76765e79c8211f2581e2b145145fabb30f35bc989ef1c255598e2e09d4d33ee374366f0e62441e313e4446ba7a98de9230f9da889e5de65a303c59e1f9436f457f2237cec85d36a5570520fbe252d4ed87a55075ba308eb597305b3d190f450de7ad6fbd2140bc3cbc9a3c8b219c900d7887b69410cef27b4df8f648bd47f3d61d51537f63f2183c8060e8e21c1de4b77da4ec678baa3c8f80088f5cbad2e20d964ee4c468903c6380c267c8cd31f4632a05793742a9971c71850000000000000000ba82a8e1d3ae28ab564ccd8e3f01db406d749d5cc6e3cec4f2f76d5d63ee842cdd49183587ebfbe3697b680520dea6932383bdd333a0a72fdcc1f903d8d170576f4489337df32ee6197410c8d54a6469b898fa3cd59507e8e38d03a1c31e3f0ca950742126d671d31a07622ea8a69a2cd4432feb44bf8888c003bd41cd8f5cc04b6d1cdc36bba770b4654114041aa76e276d88456e4f41bca2b5e1964f88b9f67dd469cbfbf987267eb478baaf0d2e0b67", 0xe27}, {0x0}, {&(0x7f0000000240)="29446b8aa536cfed6e4f3671a9f673b38e177aed9155eac0758f7201f9144df94fceb6f4427b914890b456979765c4b166c1648853a6e8c7ef58ffef6262dbb636372fecdabfeebd6626e7e8367288da67025ecfe2cfc802df6d80ced3606aaf13f20c49e50ae4a01ca4c6320598114008d0d70ddefef2a4c87a9fc4681d5c3d0d99a58679d307189434ae43a5333217bb205d4e1d6622b0bff3bf69d10095b11f5a5578b0c3098ad833e2b63cd4", 0xae}], 0x5}, 0x0)
recvfrom$unix(r0, 0x0, 0x0, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
__msync13(&(0x7f00001fe000/0x2000)=nil, 0x0, 0x2)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580), 0x1, 0x0)
compat_50_select(0x40, &(0x7f0000000740), &(0x7f0000000780), 0x0, 0x0)


open$dir(&(0x7f0000000380)='./file1\x00', 0x0, 0x0)
symlinkat(&(0x7f0000000000)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00')
rename(&(0x7f0000000100)='./file0\x00', &(0x7f0000000140)='.\x00')
unlink(&(0x7f0000000080)='./file0\x00')


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000180))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0xc6a8)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
r2 = dup3(r0, r0, 0x0)
ioctl$FIOSETOWN(r2, 0x80044e65, &(0x7f0000000100))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000005c0)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
compat_43_orecvmsg(r1, &(0x7f0000000100)={0x0, 0x0, &(0x7f0000000080)={0x0}, 0x10, 0x0}, 0x0)
compat_43_orecvmsg(r1, &(0x7f0000000200)={0x0, 0x0, &(0x7f0000000040)={0x0}, 0x10, 0x0}, 0x0)
sendmmsg(r0, &(0x7f0000002e80)={0x0}, 0xffd5, 0x0, 0x0)


bind$inet(0xffffffffffffff9c, &(0x7f0000000000)={0x2, 0x0}, 0xc)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000000c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
accept$unix(r0, &(0x7f00000001c0)=@abs, &(0x7f0000000200)=0x8)


posix_spawn(0x0, &(0x7f0000001740)='[%-[\'/])*,@\x00', 0x0, 0x0, 0x0, 0x0)
bind(0xffffffffffffffff, 0x0, 0x0)


open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
unlink(&(0x7f0000001b40)='./file0\x00')


fcntl$getown(0xffffffffffffffff, 0x5)
pipe2(&(0x7f0000000040), 0x0)


open$dir(&(0x7f0000000080)='./file0\x00', 0x24923bd93a471356, 0x0)
execve(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)


syz_usb_connect$uac1(0x0, 0x71, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x20, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, 0x0)
syz_usb_connect(0x0, 0x24, &(0x7f0000000280)={{0x12, 0x1, 0x0, 0x72, 0xb4, 0xc4, 0x0, 0xdb0, 0x4600, 0x2265, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x64, 0x6, 0x11}}]}}]}}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000700)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x2, 0x0, {0x9}}}]}}]}}, 0x0)
fpathconf(0xffffffffffffffff, 0x0)


compat_50_clock_settime(0x0, &(0x7f0000000040)={0x0, 0x7fffffffffffffff})


_lwp_continue(0xffffffffffffffff)


syz_usb_connect$cdc_ncm(0x2, 0x6e, &(0x7f00000004c0)={{0x12, 0x1, 0x200, 0x2, 0x0, 0x0, 0x8, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}, {{0x9, 0x5, 0x81, 0x3, 0x0, 0x3f}}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{{0x9, 0x5, 0x82, 0x2, 0x0, 0x3}}, {{0x9, 0x5, 0x3, 0x2, 0x200}}}}}}}]}}, &(0x7f0000000bc0)={0x0, 0x0, 0x0, 0x0})


r0 = shmget$private(0x0, 0x1000, 0x0, &(0x7f0000ffd000/0x1000)=nil)
setreuid(0xee01, 0xee00)
shmctl$IPC_STAT(r0, 0x2, 0x0)


setrlimit(0x8, &(0x7f0000000bc0)={0xffffffff, 0xffffffffffffffff})


open$dir(&(0x7f0000000480)='./file2\x00', 0x200, 0x0)
__stat50(&(0x7f0000000500)='./file2\x00', &(0x7f0000000540))


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104306, &(0x7f0000000100)=0x400000)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r1, &(0x7f0000000700)=[{&(0x7f0000000100)='u', 0x1}], 0x1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r3 = socket(0x2, 0x2, 0x0)
connect$unix(r3, &(0x7f0000000140)=ANY=[@ANYBLOB="8202332f4f"], 0x10)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
r1 = open$dir(&(0x7f0000000040)='./file0/../file0\x00', 0x0, 0x0)
preadv(r1, &(0x7f0000000080)=[{&(0x7f0000001680)=""/4096, 0x1000}], 0x1, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x2903)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIOSEEKHOLE(r0, 0xc0104603, &(0x7f0000000040)=0x8007ffffffe)


r0 = socket(0x18, 0x2, 0x0)
getsockopt$inet_opts(r0, 0x29, 0x1b, 0x0, 0x0)


r0 = compat_30_socket(0x1f, 0x3, 0x0)
sendmsg$unix(r0, &(0x7f0000000540)={&(0x7f0000000000)=@file={0x0, './file0\x00'}, 0xa, 0x0, 0x0, &(0x7f0000000500)=[@cred={0x20, 0xffff, 0x2, 0x0, 0xffffffffffffffff}], 0x20}, 0x0)


r0 = _lwp_self()
_lwp_suspend(r0)
_lwp_wakeup(r0)
_lwp_continue(r0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x0, 0x0, 0xfffffffffffffffe, 0x500)


mlock(&(0x7f0000ffc000/0x4000)=nil, 0x4000)
munmap(&(0x7f0000ffe000/0x2000)=nil, 0x2000)
munlock(&(0x7f0000ffc000/0x4000)=nil, 0x4000)


minherit(&(0x7f0000ffb000/0x2000)=nil, 0x2000, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fork()


syz_emit_ethernet(0x66, &(0x7f00000001c0))


_ksem_open(&(0x7f0000000180)="2ff4", 0x200, 0x0, 0x0, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x400000000018, 0x3, 0x0)
ioctl$FIOGETBMAP(r1, 0x80047476, &(0x7f0000000040))


r0 = _lwp_self()
_lwp_exit()
_lwp_unpark(r0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580), 0x1, 0x0)
r1 = socket(0x18, 0x2, 0x0)
getsockopt$sock_int(r1, 0xffff, 0x1009, &(0x7f0000000000), &(0x7f0000000040)=0x4)


setregid(0xffffffffffffffff, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x80047227, &(0x7f0000000100))


symlinkat(&(0x7f0000000000)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000200)='./file0\x00')
readlinkat(0xffffffffffffff9c, &(0x7f0000000100)='./file0\x00', &(0x7f0000000140)=""/188, 0xbc)


r0 = socket(0x2, 0x2, 0x0)
getsockopt(r0, 0x11, 0x0, 0x0, 0x0)


r0 = socket(0x400000000018, 0x3, 0x0)
ioctl$FIOGETBMAP(r0, 0x8090697a, &(0x7f0000000040)=0x1)


r0 = getppid()
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
getsid(r0)


mlock(&(0x7f0000ffb000/0x3000)=nil, 0x3000)
mprotect(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0x2)
mprotect(&(0x7f0000ffe000/0x1000)=nil, 0x1000, 0x0)
mlock(&(0x7f0000ffc000/0x3000)=nil, 0x3000)


mlock(&(0x7f0000ffb000/0x4000)=nil, 0x4000)
munlock(&(0x7f0000ffc000/0x2000)=nil, 0x2000)
mlock(&(0x7f0000ffa000/0x3000)=nil, 0x3000)


r0 = compat_30_socket(0x1f, 0x3, 0x0)
getsockopt(r0, 0x1, 0x2, 0x0, 0x0)


compat_43_osetrlimit(0x17, &(0x7f0000000080))


r0 = open(&(0x7f00000000c0)='./file0\x00', 0x205, 0x0)
flock(r0, 0x1)
r1 = open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
fcntl$lock(r1, 0x8, &(0x7f0000000080)={0x0, 0x0, 0x0, 0x100000001})
r2 = open(&(0x7f0000000200)='./file0\x00', 0x0, 0x0)
fcntl$lock(r2, 0xd, &(0x7f0000000380)={0x0, 0x2, 0x0, 0x100000001})
r3 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
fcntl$lock(r3, 0x9, &(0x7f0000000280)={0xfffffffffffffffe, 0x0, 0xffffffffffffff7f, 0x280000000})


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x4004747c, &(0x7f0000000140))


syz_usb_connect$uac1(0x0, 0x79, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x67, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@format_type_i_continuous={0x8}]}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, &(0x7f0000000440)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, &(0x7f00000004c0)={0x0, 0x0, 0x0, 0x0, 0x3, [{0x2, &(0x7f00000000c0)=@string={0x2}}, {0x4, &(0x7f0000000100)=@string={0x4, 0x3, "2b03"}}, {0x2, &(0x7f00000001c0)=@string={0x2}}]})


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x2000427e, 0x0)
ioctl$FIOASYNC(r0, 0x4080426f, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
compat_43_stat43(&(0x7f0000000040)='./file0/../file0\x00', &(0x7f0000000240))


r0 = socket(0x18, 0x400000002, 0x0)
r1 = socket(0x10, 0x2, 0x0)
dup2(r0, r1)


mknod(&(0x7f0000000000)='./file0\x00', 0x6000, 0x0)
compat_43_ocreat(&(0x7f0000000080)='./file0\x00', 0x0)


r0 = open$dir(&(0x7f0000000080)='.\x00', 0x0, 0x0)
compat_43_ogetdirentries(r0, 0x0, 0x8001, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt(r0, 0x0, 0x3, &(0x7f0000000000)="9d0e6a00", 0x4)


mknod(&(0x7f0000000100)='./file0\x00', 0x1ffb, 0x0)
open(&(0x7f0000000000)='./file0\x00', 0xc02, 0x0)
openat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x5, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0106371, &(0x7f0000000180))


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
compat_43_oftruncate(r0, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fchdir(r0)


r0 = socket(0x22, 0x3, 0x0)
getsockname$inet6(r0, &(0x7f0000000000), 0xfffffffffffffffe)


swapctl$SWAP_STATS(0xa, 0x0, 0x80000000)


symlink(&(0x7f0000000080)='.\x00', &(0x7f0000000000)='./file0\x00')
compat_40_mount(&(0x7f0000000000)='overlay\x00', &(0x7f0000000380)='./file0\x00', 0x3, &(0x7f00000003c0))
open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = shmget$private(0x0, 0x4000, 0x0, &(0x7f0000ffb000/0x4000)=nil)
shmctl$IPC_SET(r1, 0x1, &(0x7f0000000140)={{0x0, 0x0, 0xffffffffffffffff, 0x0, 0xffffffffffffffff}})


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000005c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
setsockopt(r0, 0x0, 0x4, &(0x7f0000000080)='YIS\a', 0x4)


socketpair(0x5bdcabdc50f4a96, 0x0, 0x0, &(0x7f0000000000))


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x2000427e, 0x0)
ioctl$FIOASYNC(r0, 0x20004268, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendto$unix(r0, 0x0, 0x0, 0x0, 0x0, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r2, 0x0, 0x0)
recvmmsg(r1, &(0x7f0000000500)={0x0}, 0x10, 0x0, 0x0)


sendmsg$unix(0xffffffffffffffff, &(0x7f0000001700)={&(0x7f0000000040)=ANY=[@ANYBLOB="fb1840224488fedfff0202007e873194010003000002002002"], 0x1c, 0x0}, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x4)
r0 = socket(0x18, 0x3, 0x0)
sendmsg$unix(r0, &(0x7f0000001700)={0x0, 0x0, 0x0}, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000040)='.\x00', 0x0, 0x0)
fchdir(r0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket$inet(0x2, 0x3, 0x0)
connect$inet(r1, &(0x7f00000002c0), 0x10)


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
chroot(&(0x7f0000000140)='./file0\x00')
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x47e18047f957f23a, 0x0)
writev(r0, &(0x7f00000003c0), 0x100000000000024c)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
pwritev(r0, &(0x7f0000000400)=[{0x0}, {&(0x7f0000000100)="cb", 0x1}], 0x2, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
compat_90_fstatvfs1(0xffffffffffffffff, 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, 0xffffffffffffffff, 0x0, 0x0)


socket$inet(0x2, 0xa, 0x2)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x2, 0x1, 0x0)
r2 = getpid()
fktrace(0xffffffffffffffff, 0x0, 0x4, r2)
openat$hdaudio(0xffffffffffffff9c, &(0x7f0000000040), 0x20, 0x0)
r3 = dup(r1)
ioctl$WSDISPLAYIO_GBORDER(r3, 0x4004575b, 0x0)


_ksem_init(0x0, &(0x7f00000001c0)=<r0=>0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
_ksem_getvalue(r0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
recvfrom$unix(r0, 0x0, 0x0, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0, 0x24}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x1010, 0xffffffffffffffff, 0x0, 0x0)
r1 = socket(0x18, 0x3, 0x0)
bind$unix(r1, &(0x7f0000000080)=@abs={0x1f95d27d48731892, 0x5}, 0x1c)


r0 = compat_30_socket(0x1f, 0x5, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getsockname(r0, 0x0, 0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
pwrite(r0, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r1 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r1, 0x8010570e, &(0x7f0000000080))


r0 = socket(0x2, 0x2, 0x0)
ioctl$FIOASYNC(r0, 0x8004667d, &(0x7f0000000040)=0x7)
shutdown(r0, 0x1)


r0 = socket$inet(0x2, 0x2, 0x0)
pipe2(&(0x7f0000001700)={0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
r2 = fcntl$dupfd(r0, 0x0, r1)
ioctl$WSDISPLAYIO_SMODE(r2, 0x8004574c, &(0x7f0000000040))


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
close(r0)


r0 = socket$inet6(0x22, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
shutdown(r0, 0x1)


r0 = getsid(0x0)
ptrace(0x9, r0, 0x0, 0x0)
r1 = getsid(0x0)
setpgid(r1, 0x0)


mprotect(&(0x7f0000ffc000/0x3000)=nil, 0x3000, 0x0)
mincore(&(0x7f0000ffd000/0x3000)=nil, 0x3000, &(0x7f0000000000)=""/228)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x39, &(0x7f0000000000)="ca640000", 0x4)


munlock(&(0x7f00007fe000/0x800000)=nil, 0x2aa4280000000000)


mmap(&(0x7f0000000000/0x2000)=nil, 0x2000, 0x0, 0x1010, 0xffffffffffffffff, 0x0, 0x0)
utimensat(0xffffffffffffffff, 0x0, &(0x7f0000000300), 0x0)


mknod(&(0x7f0000000040)='./file0\x00', 0x1000, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
read(r0, &(0x7f0000000080)=""/207, 0xcf)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x2, 0x2, 0x0)
getsockopt(r1, 0x0, 0x15, &(0x7f0000000040)=""/14, &(0x7f0000000080)=0xe)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000240)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x0, 0x0, 0x7}}}}}]}}]}}, &(0x7f0000000480)={0x0, 0x0, 0x0, 0x0})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r2 = socket(0x800000018, 0x2, 0x0)
bind$unix(r2, &(0x7f0000000080)=@abs={0x1f95d27d48731892}, 0x1c)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
truncate(&(0x7f0000000100)='./file0\x00', 0x0, 0x4)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_30_getfh(&(0x7f0000000140)='./file0\x00', &(0x7f0000000180))


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x10004, 0x0)
ioctl$FIOSETOWN(r0, 0x80047480, &(0x7f0000000080))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
pipe2(&(0x7f0000000000)={<r1=>0xffffffffffffffff}, 0x0)
fcntl$dupfd(r1, 0xa, 0xffffffffffffffff)
__clone(0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc010636e, &(0x7f0000000180)=0x8002)


syz_usb_connect$printer(0x3, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x310, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x20, 0x10, [{{0x9, 0x4, 0x0, 0xfd, 0x1, 0x7, 0x1, 0x3, 0x6, "", {{{0x9, 0x5, 0x1, 0x2, 0x400, 0x2e, 0x2, 0x40}}, [{{0x9, 0x5, 0x82, 0x2, 0x400, 0x3, 0xef, 0x1}}]}}}]}}]}}, &(0x7f0000000100)={0xa, &(0x7f0000000040)={0xa, 0x6, 0x201, 0x4, 0x0, 0xd5, 0x20, 0x1f}, 0x2e, &(0x7f0000000080)={0x5, 0xf, 0x2e, 0x3, [@wireless={0xb, 0x10, 0x1, 0x2, 0xcd, 0x0, 0x0, 0x3, 0x5}, @ss_container_id={0x14, 0x10, 0x4, 0x7, "182e536dad5e86860c64b279ad1c9538"}, @ss_cap={0xa, 0x10, 0x3, 0x0, 0x8, 0x2, 0x81, 0x3ff}]}})
pipe2(&(0x7f0000000080)={<r0=>0xffffffffffffffff}, 0x0)
compat_43_oaccept(r0, 0x0, 0x0)
socket$inet6(0x18, 0x20000000, 0x2)
r1 = compat_30_fhopen(&(0x7f0000000140)={{[0x3, 0x1]}, {0x6, 0x1, "6477673dc858df661151e1cb316e1cc9"}}, 0x1)
bind$inet6(r1, 0x0, 0x0)


r0 = socket$inet6(0x18, 0x1, 0x0)
sendmsg(r0, &(0x7f0000002440)={0x0, 0x0, 0x0, 0x0, &(0x7f00000020c0)=[{0x10}], 0x10}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = socket$inet6(0x18, 0x2, 0x0)
listen(r3, 0x0)


r0 = socket(0x400000000018, 0x3, 0x0)
close(r0)
ioctl$WSMUXIO_REMOVE_DEVICE(0xffffffffffffffff, 0x80085762, &(0x7f0000000040)={0x1})
r1 = socket(0x18, 0x3, 0x3a)
setsockopt(r1, 0x29, 0x6c, &(0x7f0000000040), 0x4)
setsockopt$inet6_MRT6_ADD_MFC(r0, 0x29, 0x69, &(0x7f00000000c0)={{}, {0x18, 0x1}, 0x0, [0x0, 0x0, 0x0, 0x0, 0x0, 0x1000000]}, 0xe5)


pipe2(&(0x7f0000000040)={<r0=>0xffffffffffffffff}, 0x0)
poll(&(0x7f0000000140)=[{r0, 0x4}, {r0, 0x4}], 0x2, 0x2)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
pwritev(r1, &(0x7f0000000080)=[{&(0x7f00000006c0), 0xf0f75}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)


geteuid()
r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
__fstat50(r0, &(0x7f00000001c0)={<r1=>0x0})
mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, r1)
r2 = openat(r0, &(0x7f00000003c0)='./bus\x00', 0x0, 0x0)
preadv(r2, &(0x7f0000000040)=[{&(0x7f0000000280)=""/219, 0xdb}], 0x1, 0x0)


r0 = shmget$private(0x0, 0x4000, 0x0, &(0x7f0000ffa000/0x4000)=nil)
shmat(r0, &(0x7f0000ffc000/0x1000)=nil, 0x7000)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000006c0)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
recvfrom$unix(r0, &(0x7f00000000c0), 0x832f1f7d, 0x0, &(0x7f0000000000)=@abs, 0x7f7fffffefff)
write(r1, 0x0, 0x0)


open$dir(&(0x7f0000000000)='./file0\x00', 0xa4441, 0x0)
open$dir(&(0x7f0000000040)='./file0\x00', 0x4001, 0x0)


setsockopt$sock_cred(0xffffffffffffff9c, 0xffff, 0x1022, &(0x7f0000000040), 0xc)
madvise(&(0x7f0000000000/0xc000)=nil, 0xc000, 0x6)


pipe(&(0x7f0000000400)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(r0, 0x4, 0x80, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e5ff7f000001"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
sendto$unix(r2, &(0x7f00000011c0)='\x00', 0x1, 0x0, 0x0, 0x0)
close(r2)
bind$unix(r0, &(0x7f0000000140)=@abs={0x0, 0x0, 0x1}, 0x8)
r3 = accept$unix(r1, 0x0, 0x0)
write(r3, &(0x7f0000000280)="bdc077b5674607995e0341a027482ab6bcc3f425b9dfbbbb2aeff4aad6c9fe6cd2e656a8c63074ccd89157b521cb875fb9a0e07cfb28fef20f19fc3989814a842a5475b675771f23c83072de000db5351fec296343659c2b5722c9a75d96a0c24b7db6807e0053282d3e117d576814675106c4cbe7bbbceeec72ef7b2c059bd1b3dbe953ceb6f96a283743583e3bd932d9b535a381f6c936f6acad77aa416c645f378993a6bc1b696f4dd3c1bfd8e6bae6e3da7ead240e020dc7a2fab26fc2ad348f4bfb5a817cd65c7d887dd968e7f9b5df9cdac98d17aeb5bca39d5145573b84c389b9f9d6", 0xe6)
recvmsg(r2, &(0x7f0000000240)={0x0, 0x0, &(0x7f0000000100)}, 0x18c0)


symlinkat(&(0x7f0000000040)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000180)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00')
mknodat(0xffffffffffffff9c, &(0x7f00000002c0)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0x1000, 0x0)


syz_emit_ethernet(0x32, &(0x7f0000000240))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000005c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
setsockopt$sock_timeval(r0, 0xffff, 0x100c, &(0x7f0000000000)={0x0, 0xfffffffffffffffd}, 0x10)


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x11, 0x64, 0x0, 0x0)


munmap(&(0x7f0000ff9000/0x4000)=nil, 0x4000)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000ffc000/0x2000)=nil, 0x2000, 0x0, 0x1810, 0xffffffffffffffff, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
recvfrom$unix(r1, &(0x7f0000000040)=""/17, 0xfffffcc5, 0x840, 0x0, 0x0)
sendmsg$unix(r0, &(0x7f0000000600)={0x0, 0x0, &(0x7f00000016c0)=[{&(0x7f0000000080)="b4", 0x1}], 0x1}, 0x0)
dup2(r1, r0)


openat$tprof(0xffffffffffffff9c, &(0x7f0000000200), 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000a40)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
ioctl$FIOSETOWN(r0, 0x8004667c, &(0x7f0000000f40)=0x40)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setrlimit(0x0, &(0x7f0000000240))
setrlimit(0x7, &(0x7f0000000340))


r0 = socket(0x18, 0x1, 0x0)
connect$unix(0xffffffffffffffff, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
close(r0)
r1 = socket(0x18, 0x400000002, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)
connect$unix(r1, &(0x7f0000000040)=@file={0x0, './file0\x00'}, 0xa)


symlink(&(0x7f0000000080)='./file0\x00', &(0x7f00000000c0)='./file0\x00')
open(&(0x7f0000000100)='./file0\x00', 0x100, 0x0)


r0 = socket(0x2, 0x2, 0x0)
connect$unix(r0, &(0x7f0000000080)=ANY=[@ANYBLOB="b7022e"], 0x10)
sendmsg$unix(r0, &(0x7f0000001480)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000000)=ANY=[@ANYBLOB="140000000000000007"], 0x1c8}, 0x0)


pipe2(&(0x7f0000000080), 0x0)
socket$inet6(0x18, 0x0, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x1b, r0, &(0x7f0000000000), 0x9)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
mkdir(&(0x7f0000001380)='./file0/file0\x00', 0x0)


openat(0xffffffffffffffff, &(0x7f0000000000)='./file0\x00', 0x0, 0x0)
mprotect(&(0x7f0000ffd000/0x2000)=nil, 0x2000, 0x4)
mlock(&(0x7f0000ffe000/0x2000)=nil, 0x2000)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000000080)=[{&(0x7f0000000240)=""/4096, 0x1000}], 0x1, 0x0)


socket(0x11, 0x3, 0x2)


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x0)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x8004667c, &(0x7f0000000040)=0x79)


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x6131, 0x4000e03)
r0 = openat(0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
dup2(r1, r0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x402c7413, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
dup2(r0, r1)


_ksem_open(&(0x7f0000000240)="463cebb2b0999d9f9193bddc9be65628ae0bfd5fc754041f1ca1db5f8c78ca6a4a1aa4e63352a346a68da65f0192e6b5eb41b8a2ce9e4c36d9d7ce76fba42ee221d54c40b07c4996fda09fe602", 0x0, 0x0, 0x0, 0x0)
r0 = _lwp_self()
_lwp_unpark_all(&(0x7f00000000c0)=[r0], 0x40000000000000c7, 0x0)


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
mkdir(&(0x7f0000000000)='./file1\x00', 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x10, &(0x7f0000000340)=0x20001, 0x4)
sendto(r0, 0x0, 0x0, 0x7, &(0x7f0000000140), 0xe)


r0 = open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
writev(r0, &(0x7f00000000c0)=[{&(0x7f0000000140)="4d5b4b3898d5b140186840a77150b7f220a52300de16ee262ba9718a0582b4d2b36c7797605fd4627a68d7231bbe73bffa34a89741bd79681be3eac113e6a1ae3be444d5d28b9d63b2e152187a875a73", 0x50}], 0x1)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x5, 0x10, r0, 0x0, 0x0)
setrlimit(0x6, &(0x7f0000000000))
setrlimit(0x3, &(0x7f0000000040))
mmap(&(0x7f0000001000/0x2000)=nil, 0x2000, 0x0, 0x1010, 0xffffffffffffffff, 0x0, 0x0)
mlockall(0x1)


r0 = compat_30_socket(0x1f, 0x5, 0x2)
compat_30___fstat13(r0, &(0x7f0000000280))


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ktrace(0x0, 0x1, 0x0, 0xffffffffffffffff)


getgroups(0x7, &(0x7f0000000000)=[<r0=>0x0, 0x0, 0x0, 0xffffffffffffffff, <r1=>0xffffffffffffffff, 0x0, 0x0])
getgroups(0x6, &(0x7f0000000040)=[r1, 0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0])
getsockopt$sock_cred(0xffffffffffffffff, 0xffff, 0x1022, &(0x7f0000000080)={0x0, 0x0, <r2=>0x0}, &(0x7f00000000c0)=0xc)
setgid(r2)
r3 = socket(0x10, 0x1, 0xf6)
setegid(r1)
getegid()
getsockopt$sock_cred(r3, 0xffff, 0x1022, &(0x7f0000000100)={0x0, <r4=>0x0}, &(0x7f0000000140)=0xc)
lchown(&(0x7f0000000180)='./file0\x00', r4, r0)
r5 = accept(0xffffffffffffff9c, &(0x7f0000002540), &(0x7f0000002580)=0xc)
r6 = accept(r3, &(0x7f00000025c0), &(0x7f0000002600)=0x8)
sendmsg$unix(r3, &(0x7f0000002680)={&(0x7f00000001c0)=@file={0x0, './file0\x00'}, 0xa, &(0x7f00000024c0)=[{&(0x7f0000000200)="c43c473c16c7212707082c9236a4d029e4ebd750e3de77516f6802984c78f6117d49120450eba457a1fac7622602d9278a29a7ae5f125d21de046fdc328e25e67b4a67072a72b6c5188e46f2debfb5b1276611b0ec74fd6d93166143efbd9c9082e8f736afad20ddeda33a1a17278ba7089491a3c2e9c2c153539bb36ad4b924f57b9fe0b65acea0b2b63433ba1b95a453b899bce9b8346b44c01b692af1423dc7ffbe8e10997a54528d40abbbb714c8332d78e512e66897e9bd4d1144de8b975cf22ca7e8fedfa6de78ea022e3a58851d50073688252f4823f4854b6418be73e71d14136f0343ac55aafad09ac8445386aa6d3a99057e08a658a35fb2eece0ae1e110b28699d65d6070cd8a41e82832e97b279601b098e8098fa9d0f30f9209001cf27861db67ba364f18a220bdcd7b2cb4a0d4b749bcfab78b2311acd35b10c5b289b63eb98f259f563b2c7ab42d54395757605e12182023b5c8eec3cd353b21f92d8a8a7aecb587f5ddc757e3a8f9d12e6534461b06f9e53b873dd4611f95fa09155ddd760801249228733a883b70f48103d7410756c3b8fc7767eafaa43f4f2b6c47ca0cf2d2d5e32432096876361acea5df0081532cea02460f108d2bcda61aa8878bf5678a5bcf5665f41ac1631779bd0934277466e7e8753829f344ec8db63b8825ac4476f4edb932bfa2ef3fe5fb86a1e8f02e843ebf593934c54a5fdb4933fe91d7d1cb037755eff3cde9d40b7daa0d7d8b942368cb0bf35cd433f3de36725db6f4b6dc4b24cdda001e10a44b25a0f9bea9dec06ac43527871f944be8c5a63b8fd2ea15851f3389f66d5fb34077c2f91a2eee83bc30ddabd7b456f29081946e28803a1f284f47fe9294f71a27f949116802cb5c1d898d29404df42543e433b51feb73a8f3ac9eae3fae89aabea2b8756f3b98e706936e5b025f6eb6ba5b892e49f7cd6f52ffee56269fabcca660dc59bbc87986e9362713efa6f8f81f1104548c205f41d0241c5e379c34346137be6b704db7bbddf683c03aae3fb6deb759736302c7271c1c1b3054be9797a4277325415a1888361057e42301dfb63ea489973b624580127e6b14f0488e7e8d542f45e4fa6fbb88508999cd7f6445c73824930c5c31739c53cd392db4595144e56dfa82e63cd9d1d07031f3bf92f7c260390ffd0155336a6495cfefcb721af555003f400ddb906e297fe678c5784094512a4893f18b4ef48aba1f9300e7406db817961fc16943afd1203b0e259df7361971aaeccfbc02ac362caee60dc921dae4b8ebffdbe62a3317bd4a819abe4e5c4858f62efe60373e41bfe90f2fb3a70e12bce8406218f0251aedb2245cdbcdc5d4eb67d3a87d017bc7fe7ef797a0ca2784454b70931a447d5861f0bf56322f3d77959ce1794fdb4d066abd48751e3338e28e9a7abac680249383052af8fee7c3116e3e31342ded6c5b42604c9a38fa84e3848a198e8f2f82598d7ca11c15349935dbba3a4c84ca8adeb67db48fe27cbe844258ba81c2195b118bf62d71a1e7087f6e93a7c65d30c78a022fd4e2fe78f3480221002b8694f3c39dca943a2b4e642854921712d9de08c31872b3619983be3f5ea7d42714f41aff7d17f3c7f3c577df16ce8ad59eb4687dac215a1ec9e91dd7133053cb8f26c7e15db4cd7b59b1e7802047bde9f16b4a67a1b4b8e14dffc61e4273251c811d681bec979e65214429a280e4687a8140406d1980471b7603c60ee553699f049b8b09f03eccecbde3ae5b3a6437e33b0ef01b06a09e4900f1d1f5ec04d77a60556ebc616d1db72bd88fc110d792472a187f13eaaaa5d6c5a548a06a2906aaba1724936e32074593570b893810ab78cf5f539fb7c518d550776c57a04c08f20762c10e9ff02bf346c13318f7a55b81a8de357741e0b5c508460e7ba4afeebf4f93411e6e2631b4993c1c7ce03d8345ac98125ed78e1222b9b8a3936d388c86cd622f915de6f218a660bce2d36b04059bb047e739aacc4d1837abe4d999b5c6af6f85d8aea0e0ec168c4956e71032a9fffa8a5db04d8040964f06e7c06155987a5f94c94d19793022e250f167d564b16b75a7223b809785a58ffd40389fa6d2be311261a927a5e413ad1c38b9d22326a1b1d1b8b8d74c950d97c866463a401d0e6f26ae66883b5d985c392aec1b0237b29f962ea5709cd15e39eb4ba95139aa384f336dc50e0780a38a7933083ed184ac36de231265caa70fbadc51d5a8a4c5bd4cb7b58f771b4a048dee01f482c657cd55682ccbe86549277ea4c1b372602201cc72b97c6cd8b4adcb577db9d5e41cadbbef837fbbc169ae1e20a007f9cc12752ff2a0da79aa285652ee7f4ea88411515841e50f1276d49465e5fe3b8decc61ff7c2d3de671c6d3420b61a78806012f776fa88088383de6509d44b77e73cb93df3bd27274587ac8ffa775a7ad7732e768323ad3c017b13b4e022af993bfc78a68575253a2e478a4b1c6e140fc274448727aebf8203466fcd308530690c9db34c69922f6c8553a646f517e8ae0324318958d6c7631f69885c5e7b7b9d123884551abb83c644d6be1687db6c3012ccbbeb78bc4ced0c297d846a995e8671ddd61f3740cf62de52878bbe1617b60c8cdfd4ae68bd562ec01c3bd683da05cbfd26ad3c12ca99e99f9122b63fb59946ba8b3b81df5c6421429d59f56bc2a6bf809f33d14f5db5860054c1b9a759b1b2729c72c9cea965f34ffa9fc3aa8fcb579a8c741266e0d63a05bc96c87edda4970f95a5556e04b3942c24b3439c5bca6740afa6389468af39371c1b8286b589fb2de851505195b639674064328ee593075e4191a51481942e19227470c2f51a9d419829ee6daecf067b60c2f70148adc48924b40cff5d5f62f05dfb17876c419bb69dadd6c6437cecc6e9f54dddd749039b9e911e2123e1dc4f8b6bad4fe35001c3a3f9fc3df6035ec97c4d14c8140bd6e48becfeda657492f13c2ba0a25230aa987f87f1eff8a7af74601e53255c51f62a9b4ffa5ccf46918c587975006ff60703afc6436bb73a36bafeca0e02e907d3f1f3da6eeea56b6ba2e628d228730078442cd511f561c37f2d100f9e0b7d02557830e9297e5e21813d53888ec49e1291ae68e1dd3e32c83c842c3a3a8a325004b91161180eea708c48d9f00bc05630ae83ed6b95bb132470a2e63a3c238dae550da31844f12c0e6061be9353b1dd8e1819af99d28b24930a03ae87596efe820664618febc14c15eb74b08178117ac90dd7809aae9507e0e3405566d063ff537d54f3906afeed88049855e36ca857afbc321221b4cb5d245cf0036b8d5ce81f4d7a69296f2177254be91598bc28d8d7801b6bb29bbd5c69ba02bf6db1d10b5fce5de4e81a215c7efa55ba55363bd35f7528b3f425c1bfeeaf1ae8b7f5f19d812e8c58c982c731fc10f4d186a67a32fbd38edc6c97bbdbb294e92650c231afe5dd4e636415e057401507b9ce25b6c18a8e354cde51e7d58ce19d792a0b69ca6a0fb85ba4f3a7d81c67cda7e37af023780a8ff4924175730cb32aa2ce5ccd6716b14db6bebaf2719a25c464a0e295d44021f17f6ce6ed571f5d5b01823de37d3429a48dc200b2615e98c7997606371295919b09b6fdef75e0421028f31dd3dbe109bd7db82f17454018ff5cd7ed85ecfa8166a644e2ffb427f045fd8872523b5e0dfec3b83636608c3e7e65742e82768661e6a357971df17ea0acab0c9a04d59c60a4cf78b1f81dbe5a18fdd7f51d12a7156b0abbaf5e9e4945e34a08320e85d6fb14b9d21549a29c3e3b859607d8429f45d286550778cb90c58c42e54ebb8609e05f7bda46fc2f6ea4d114c7f31462597e5dd655540e8707b475250cf48108b7b42984463b212d66183a17ee2a7b5fd26ebe7f9f0fabe10ed76698b5e564f7f669fe6a6280e7e0deaf995375f7d0cddf5add22c8b41cb6f1c735d403ab5f972ab851daa9eb450ed85db1e38f46dcf50ee392866a01337b9ee3c501d4003887174fa2fd22c5e86002291c0033b373048965f6281274433bb5e3ff3998fa7b344108247eea350e0e1589afd851394fe15377bd9795435ac96a8fec06bfe22c047deac2758f19e333ddf3502d241d15cee373e03f7d0eb9d900ffaf7ece3af4708ffb09fc74b99de6a2e29bc7b7b089bd42090e366e9d736dfe8d42a612f2f0f89019affbf5777f9d1679da25ff0d269206254b111698a266ea71633fc598fe7330b66246f176b08d815c6fcb5c48c7e17ee0681da908fb0fdd8001f10ce0c8fd8d5ec688016641a7f203a8ec3e2f175fcb85456a011e5c9975c7d6ea15a6a6b6ab2ebd4238a80c5ccbf88004f9b96ce9255f04302c91c0d1f076d6dbeff8104604c73f367a12d94fe36b8aa87cc424b192a743343e9738f4142dcbeb5d011a8ab5d61cb5e305e6dd607a489f1c2dcfbb5633889165d88c2e6b708a311634b6830bf8057f9848a5b9623129630cf6f4c19618f479727f8fc375ed1b444bf8f9f7d7a1f1996e9553faf2005b876c5426eef2f6a14ede746ec4d7e8df23390aece8ca40ccc66da4fd00ac7cf3b6d59469765976bd06931368fd3ec324e7c004e3c3f2abf5c4c30fb660c0026e3a34e3aaa5899cc01df54f14fad265a778fd6acd9edd929a3a33f2ab984d180698f9e42267425fe41c8f7931d16d7f468d3af9b521402a0120b0fe4dcb3e46db9edd2908ff340905139747301311411e8df87bc1249e4fe34b6b4ebe27ea2a8dc5066b57e3b4c346de4626f4e72649033f8de0bb1a1a9011a6b5267716a4ad8040e22dcffae493f9e469ffbbd7a5964f4b4b7cc5f974a70e317daae6658b1d541978f229c5679d7f7080cbc3439c2733fd63457d0c103816163f5bdf92ed92ee4ff7bffd825ccdc9030eb1d009def139f482e88868f716758c83ed82844e86c0315d89eb406088d86109c86d688f9983b2980199e7601ceb4c20764645f08ff04905d5d2c737d2eb72d27de9b6606f813d7528543aa1939935d79336b7dc6d2d5e28872710a7ebc908d8a760aa2590bf188ae9ddb281f25bd9930f17484a48a5459b618cbc468b3b18b85364373ca286a136dc9d39b5964ff78947d4b270810c7ad5c05d4004076d8d53665d9ba8ec1c99a5cf68e4417b4c474587cee7ebcf35426fa54087a060fb13c412c0ecdd7a9895e788e37b28a2d6328d765b87d0269a4805b765eb5879d73bc23e7cd7db62f9441188392e8da6d3f9825a1ba6ae27cfb0c287f60fb3546d55ad784ff3bfb87d4c511249b2597883ad09f48e31842691562252a3df9c7fa8da64101ceef5d8b7d665140df638650770a9af81d09ab8fc8fa98dc32fff54965ecbb03abc330cd33c41feccaa68296c59082677dbb6bcf7ed919b17463043e177a946580e9fbc9067681470f25073b136add6a70d27194694adf45f36283ef8eeecfb0c560b6f30664a7e257e683b50ff276eb4243aabdcba84e18227d676b445469e42a61717f90ae5a35e3ad39e42a180fffeabb3ae84157ec837ab442598dcc177593b66315c703c24c680b1a180ab1112ddfe33b91875bfef8818842db56d79bd8a6dc9eeb393c4c7dc043b3cc4438981656602692800af686beef2d3851a1f764242f0b2675d917da210a0c4fb237b258ef4a13ff96079200c2d46535decacc4cb33c7d371dcd2e74b1886aab125854b75a80b49f130c98fd97494b7d5883b6df21ef7cdb65fd42799d467166bd5b9ab42274e225ccbfb3e812977322c7a0bd82f55cdf0b07ad61a040f5da5d064652bcaa69460edeef054", 0x1000}, {&(0x7f0000001200)="28d5bbfe1b3e37a6aacbdf9646dbc053148b19653341fde6fcb9353c1c455d5153ae227c11251494bbe852e3d81c23ba6fab8bd87d551e13457621910db306269ca71a49e1a9243c72c70fff5b7ab89da0ff2bebd9c0a4840c77f7d2c8e10d0cedb939ff75491aa0ba5b4f515d293eaed3243fb54d70faeed669", 0x7a}, {&(0x7f0000001280)="07e00ba8ca18a9e71d0e531e02cdcf174dad6a3f1bcba2ed5e2d1125f4d602d205", 0x21}, {&(0x7f00000012c0)="d8c0353b126c6e39d4e0e85d9c34d01c85eb63084c9c59621c779438d3a4915ee674630222510a058d02b802bd661b67b158e25614a146c77d78e7cb374c29d56b184d099f01e0cc6eaddd44f1dba2fbdf366e9909b47b5e46d3cc79f9a08e058034758119bf7e4c375202fe4a8d3582987f6a80ebd9f0a239756c5a75db15e97257c9a0ef750ab938fa3409c6ad86bdacb648f3ecdcd08ca97aed", 0x9b}, {&(0x7f0000001380)="679be754d1308cf5daa4acde3601ff0479d8d354f0f1eed03d820774035e92a3f7968fec27f830cb14b472c176f84649495cece1a8b39a83c57d74e031d484ffb463186bdef7f48812e7e04e2cb25cda7e4e4a4b17a893d1bdd734819506c392d3021bdd42fb29c45f6e9631fbc953b6e4f147e9d21339462687af94d071762062868affa9ed75168a0df5e03e703c0ee156a170f5c96924ba32053c6b24f1b03aa082d0cf24990453ad240ff68adcd71124f8e63edcfd4140da2ce8ceea72382e00e88bfa3acdd088ed3b19bb1892804b402b067fea2a2509707c00e1dc72df450b0ad4ec5d9fe2a6a6020e9c062eddf180be2b030cc83e9866853339b5323ee0b337b78f7849bacd81067f94f20a7cf237004c809cd72155c7939d328bd27a2c4f93d1702f28957a09619dbbdf1ee5b783b0f41f69f616c8cde9aedebc56989cf77f69f3cec72e3faecf58b94df73e6e60eef1e9341508cd8809725be0b883762668932cc1cdcf4485e634dc10be6132d22d07b602a0add1d66569f9dcd65fa49e2b465f1c2dda0082d601f1f9af24f487c53f4c19e764ecf3964902455048b7ab6e4b5712d7bc43bc00d454b8d25bef556b24be2d52aa2a488327e1fbcd4d73eae42e2150486b7b15bb14466a1f4355b4eb4b9447d62732a465cf518ebc23df32b48e08a23b342a967235039f438d47f664ed2b01fee5aa241fe9dad35358b25d990b35a20da90a071463782b91d539bfe9a651982dc5181454d3f98e939982736c75ce9a5b556d8c1a959b7d1808fdc5cffe525b091837b47ecef730bb34ce29da7a950fde00cf00c71eb7a469dc5549aec85ddfaa98bae929bd53d89a5fda55b4c0227529f0ad79673f735be4325b497a59841b910b514bf9ea926fb3b6f8c1bf5400e6ceb4624497dc61402520bc9080d4f9faa0f357756207d575f963880df3a43641a1fa4bcc42c2fc5e1fcf9601f786f4ebe49593783e96da184aa55d0b33ae20e7f2b118c07a7ef034b2b23fbad3b6415b976ff32aaa8b86bc8b81c9dfb39438095d60342885a33f8e9c9e7981a2aa5771c6f66730d3de5af44f7d2ec9c63311c4055292ce341574b6ab657dab43d7e832e34e8336f0e24deb63bfd5214be4e01f69b5b3532d0feb8b0fdfd9ad79eb60f8c064c5ea8dfc720e5d5d13089abc5bf13eb0bf39c31bd4260eadb48d55dd865d4d7767553301d3eae34a908d013e41a6997819c168beb05d558c96daa2355cf9967eb840179aca46d61182ef1bea89adb00c15b3e9b4c9b1546b39a8b6debdf6fc96814cf27598b7d9c2183cfa73a3c128dfa227ce1c54ccb468c377ffddd84c496e82553e9da3b4e196f5ff952347c2ba281a7aac1ba799797d788a12e9b1c095c3877e2a745ac79456516a0033d36f3697f1c92b9b8535e180098f1e00ae206e4954cb7287e890d7544ae8cdf9bd2f2e0ab3680528962d4f3ed39aed8561636f2f9b16f6151141031e4f61d96528319e5e643e4b364d6c2eb873c06cdf2cb6469c5918237e9ea8afee118d3c828e9c2debfb3820909bb59ca45378f5c6bd5ad7e8d4de239dee287fc10a4d998a809406577e7bb8ac2c64bf1d09793353d66e6bb28c6e94614c43f6d700025070cfb84f040bd2d6b218e34ed3d651ee6e4692c6e7c09481e1e585e5239513224bee9cf6cc97496aaa1a02942e0bc60457534d59e3c8cbe521f5ee2c8ebe78372153013cf7e8ea95b2f987e73ddf70591a3fd8ce0c7314643f8a5b50713e37df12d08074f7071f5a8bb580e750e7d23ffa2e73638e0d54d100107f99698efc3355414f1aba16978e6ee608606e55416ec2e4a1c5c182f1fee040e1d2ad4906490e6c531487f01594e649c3f442d823181bc1bfa091890ecad1f3d9eef7982755a5e989177bc5debb630c3b46296fb33793f45592941bce167b85eb33ca6d1aac068ac2ac6ab709f49754998dfc6c4e2481bd63e28a8e6e5e05979024943e555e137240231f06b97e7fed5b22d08579eeb069901c14ef98cf48c0754062db710d49608442862184af5607f3329ab1eaf1a9a93b135d21cb4a3e94fe4c24bb0d0445fdedc572a53189957f4a7bc26960ae941aea80f437ff8560f8d0986a2dfb5be2c7efe5e3bf5cc82ef1dfc6891af5159d09cd16f644fa95f2438e42ff8ce146e2bb575205b5db7026cdc5e75ce08dda0ba28bbe9d0f55353123c89e566deb65b6849fc9df3b2f0ca679ba5e4a385cf0face02179c051a89efaedd1945add957eb8bf757925bb4a95006d459f786b62703c557d36c463766a4f4e38f91113dadf5c3225f45fd20f6ada8d52906655b9f0d6afaf24880ae1e05d957b8dc5e46b34db25bc246c8d9a5ab71c6499855ab348acab9dd7d3729371c289d4a335dc1fb7c4d967b174d10286bfd5c1994dad137caa4376d176545229e7e87b43de74a75713daf9391d9da7777a3541b569eee60dbb369a052e13993d6d876227bc5b9d17e4c69ee8cc70eb19d4346a46fbb1b69a1758dd7a578413daeb3d7035b9d3d41043aff9aad3faea5b0f5d7bc4ec690399a30143a1bd09afd6615f953a5d518ae2d0ddf82a788a4072eb80a057ec6c705a48f52ab3cf3a06f9701ca2b9d81176fc17a43fa1ebb5fa63e6f962e0117edd3855eee8565e62db5bacfa03ee7f4ba0c6853e011e903d5a54cad5451e4d280ff929688e3e237fc00378e7a82730dae690c607f81e35d149567c8cd9a90007662750c53ed4c5593ab988460bee1bfab501e13e234a25cf38d44576fc6bae79555efd1e02b96e6638bae56312787af86d8c28d50a1252059dc76380b83218b0ccc32d5203a210037df0f08c7e95aabfc02d31cd689a60b5fb98032fa46a7f5b36a3a0e697ede249756721384a880e1c965c926b813f82a289845a2e671b4468ee6eb3f4d5dbc798aec39ae41fb785bcbd63787f687df4c5a2f7cb091bcbc0683adc9178ed4b9a60d0500fe5175337922aacc9374366b99bcce6076c753469397b5788e91393e195152e9cdb195bae35ecf8edcb1f33b405753827fb6fd1f74cec5aff3117d213e3806c8154babbfd0383683315f681f1143a27af1edc5bad0fea8380f771b86fee7034345e21347695762ed6c64e54800fee3b26dde574598e6ae1675e74e7bdac2988128e600d7f7948a1812312628b7dd10338380378ce24e9342bc809821d33e5fb6cdf0e8e1683f597e8e986703fba5e351d551603e05e086cd8cfa54c1d0437be11d7c2ea468f130b7d08981cbee4d0254cf2c1883511894090193d0783d4cc616cc562359febc00249bd3df3796f0018f8ec9979967bca689a028486919df3008f3d30c8175d802fa32051a9bc758aa4847a2c1304ee3c31f5a6a1aafadd76b82377923f083d59bef78283f78edbe0147c189005c4b1cf800a317ccc4d63ebd697bd25cd23823fbd38e75b805c54667bfd31bca7286d5dcc5d6dd9c5cc080dba3441d8b47d23fbf237c0996d7e6a52672d973e42b353ddcc1dbf2c9cb76b5b9b252abc64cffa44f01f687d052507f0f45d67acbd357f62721a1f5135d24b7149fae06f4453a4c5bf4c13e63f63668a628836781e858cab466d17be4bc96a63d68b193321fe956fe99afc19db0b45e87d350e4361639bde10298fc1f682f7afb5985f6b4c80929db215aa2b8f0d21ac2ff225078af65caeed544d5f4df6f76ff0957772342956a8fc3602ace53c8d2606303397a0235e0131064b0e43e321ecbbdcdef22a2fab3adead86e6344d7451c0d68a92d697f6ac0db3dd6e5b803daf8af89bc025b2721537ff62bf6191f05bb6efa378f6356e6ec1262622f5f12cb466610484e449430a270390d4935188ee1516b98631ede88b84eb5b548af28398ef3246f2ec78904cbb873cf2d0a23c93838694fcb3a1ac554431374a80f8b57482490813a9b0b8e7a59800a1525a1c732068c74c1c444e3856a6cddc462dd39abec0b97afeb40bbb721cbb6c4e5ba7a59aa4609d8ca06986d7481fe3d77724cdfb0b951bb2eaac4d6f333be24d6453fae7fd9bd8397169d6b98a2f3c48c2f31d090f00b31e0eebf497d6dabd8f08552c7d55ab7b529221c371bbdb4eb9f9643b47edf31dde3f36698e499e57cd28c35739aeae57374ba8ebdae7ea8e7a1919e5ce9648c022c267f8e5bae1d0d7ba90469867920566a18ceabf4a83f940ff8500eae4207b9053b58e01e187cfef7b7bd138d08d1799c275850fddb54bcf70d831019cb8e88e7f37a773077f42e356e29814f7d24cb7ae381056fcf40940448e133b30bc522035d1698e3b9cc7a6e56f2161c3f5af3c070e75351361d66d5a67c6a762317bb94040307017403b5ab0703ac855fefc2e4281c7d931394823a9b742cab560c55d76f569a63fc51518df78191b52b84241324d2e578e9471b498e5adf5c86ce03bee372922dd606e9faf4d65954f871f48aaba2251bf852676e97c2ba79688cd79f8021b6e6dc5a649ffd1e160462164717a0d46830e0b94eab4c878ef9919fad7d9d96b017513798babc539d7f39dc3b68f61455d330a3fd7cf1a80ec1fd9c63ecd4a7920c1e77935de259b311e7bc09eee54f6945cffdbb3116827be07d2b737450d442286b3e98943c299079c3f8f80851b356b5075cad311066d90a0c7f80f39c3701fc9bc659b30a034f2deb9b2879e94a28fa46e92b8d399c5827648d9f87f113578d5e80537f6488cdd60e87369a1cbc52bcb7bdb3dde595f7d1f58eb64be9f4d5f062419fa503d3ad95fb21476f9dc399add8a461dce53fa6223765202fa19f2b6682fe6dbc10c0606cd1479247a169ddff0b9b68c734a0c264eba85c344563c3f1d2bc3722e8f06d01824157de37ed9537e41ad2d7ecddcfd0551e44f2dc77c79cee049093ebfa0768b61436c41b092e530b15c3c8eb8ce38e8b6bbd29274092c27f089482cfb97e6979b3c686080bc34fc1091629a196d79243c4a0fc547d33032b4ccf4767891d66c6f19f2e020d83c0d053e029329913f405c5241e87e00f04d215d8563679ec522b62e998c2472ba93b30d6520318b19235a4a064d1ca68417745e0e34a4d2a61f8de2ac6e0421032e11b77600275a19864976e291594edc9b1e5f2210766e574f6c1089accef63a530f645e8863787b85b1d24a58536a6537a5213b7cffb5be67d2a269116f5f06284e9d413387b3783848cdc974ba96a5103c0466fcbe1252253ad8d8451c2f6d78654f6d297159cc30ed719c635d250918936a8c9597e93fa557a3169c4c2efb0e4094e1fedca578d36bad21a37ae370459f510b79f45e1dcff7ff568cc4d5b8a14e7772b448932e7f6515346a7bef29faa25a91448b89b3aa98bdc40d1bdad13bc79f71ccff582ae05746e3dc4b325d4f7690d9c7dd0194cd1412e1b33dcb33fa8cf92c9ff443beaef8e4ebe4054544a20ce8db8acb629994e3e9285b43b0f383e3276f766c52da88d6c527a9b226a7c7179afb48a93ef64915da0bfdd6b49f4265e170288e171f2f376b4a4d2cdebff6c8aaa4fecd664725384960eefae30ed3ca5df4d542822c0731cb217a46e824fe2d768399c0651460bba27f8539d65210eb69e2b05c2b21bb00af9767827b2591925fafaa544ab97e45547b689cf802936ea128d09ffa0ca5f75a9d309f02971af51c65ee04e3629d5c902748314ad429fc1bc445861a9cd845cb43acc8f3e2d474c0a601cb28bb5284d2f8a333df5d7ba7cfd96f11788f058ee7eb8b798599b184d0ce217d65d39c23b398946f30630eaba32f829615ea4b78e6a55", 0x1000}, {&(0x7f0000002380)="32483c5161837daeb49db51457c903a3155337f82e4f83496d5eee3be703e73e3b840e7add80832bf4992d9b55881ee33bcf51a3e14e9d88f52ab67c28a0ec6f1b0afeb54d7c74339c54dd0f76ecfd77262f42ddad689e689916f4e968d958f0a8321d892a9a7f891b9f9de3925ab4a1646b5a376bae3d9fb6de79e43c8c4167160701d8a692eb37a8dcb9a5c565dccce3505f0e81312cfd06b409fbcdec30", 0x9f}, {&(0x7f0000002440)="4aa2e18865e61ef48606c0c8f952ad5b8365fed918d2bbb5b1604d1dee66b003e63188257d84fd7b11e006fcc267c4823d1ea175b875998d4113655d84cf7e0d624656148412dd6a8958427aa7493da4666cfaad6f6493f3955570377df85f25fa72d13d0a3d5d24acf736a2b750b41ac06896ebb664a405f1fcbc", 0x7b}], 0x7, &(0x7f0000002640)=[@rights={0x18, 0xffff, 0x1, [r3]}, @rights={0x20, 0xffff, 0x1, [r3, r3, r5, r6]}], 0x38, 0x4}, 0x4)
mkdir(&(0x7f00000027c0)='./file0\x00', 0x22)
getgid()


fcntl$getown(0xffffffffffffffff, 0x5)
__getfh30(0x0, 0x0, 0x0)
ioctl$WSDISPLAYIO_GTYPE(0xffffffffffffffff, 0x40045740, 0x0)
mkdirat(0xffffffffffffffff, 0x0, 0x0)
__clone(0x0, 0x0)


r0 = open(&(0x7f0000000000)='./file0\x00', 0xcc8b40, 0x0)
fcntl$dupfd(r0, 0x18, 0xffffffffffffffff)


r0 = socket(0x2, 0x1, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f0000000000)="bf", 0x1)


mknod(&(0x7f0000000040)='./file0\x00', 0x1000, 0x0)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
preadv(r0, &(0x7f00000000c0)=[{0x0}], 0x1, 0x0)


compat_50_setitimer(0x0, 0xffffffffffffffff, 0x0)


madvise(&(0x7f0000ffa000/0x1000)=nil, 0xffffffffdf005fff, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)
compat_50_wait4(0xffffffffffffffff, 0x0, 0x0, 0x0)
ptrace(0xc, r0, &(0x7f0000000080), 0x8)


r0 = socket$unix(0x1, 0x1, 0x0)
getsockopt$sock_timeval(r0, 0xffff, 0x1006, 0x0, 0x0)


utimensat(0xffffffffffffffff, 0x0, &(0x7f00000000c0)={{}, {0x0, 0x4}}, 0x0)
writev(0xffffffffffffffff, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r0 = open$dir(&(0x7f0000001240)='./file0\x00', 0x40000400000002c2, 0x0)
lseek(r0, 0x0, 0x40fff, 0x0)
r1 = dup2(r0, r0)
writev(r1, &(0x7f0000000100), 0x1000000000000161)
writev(r1, &(0x7f0000000040), 0x1b)
r2 = open$dir(&(0x7f0000001240)='./file0\x00', 0x0, 0x0)
r3 = dup2(r2, r2)
pread(r3, &(0x7f00000000c0)="bd", 0xffffff78, 0xa83)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSMOUSEIO_SCALIBCOORDS(r0, 0x4004720e, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
compat_50_lutimes(0x0, &(0x7f0000000080))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000140)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
semop(0x0, &(0x7f00000000c0)=[{}, {0x0, 0x0, 0x800}], 0x2)
sendmmsg(r0, &(0x7f0000000040)={0x0}, 0xb6, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
bind(r0, &(0x7f0000000380)=ANY=[@ANYBLOB="2d012e2f66696c6530"], 0xa)
setgroups(0x0, 0x0)
chmod(&(0x7f0000000340)='./file0\x00', 0xc00)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
msgrcv(0x0, 0x0, 0xfffffffffffffd6e, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
setsockopt$sock_int(r0, 0xffff, 0x8, &(0x7f0000000300)=0x400, 0x4)


r0 = openat(0xffffffffffffff9c, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
_lwp_create(0x0, 0x0, 0x0)


compat_43_olseek(0xffffffffffffffff, 0x0, 0x0)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
r1 = getpid()
ktrace(&(0x7f0000000000)='./file0\x00', 0x0, 0x153a, r1)
pwritev(r0, &(0x7f0000000080)=[{0x0, 0x100000}], 0x1, 0x0)


r0 = open$dir(&(0x7f0000000040)='./file1\x00', 0x200, 0x0)
fcntl$lock(r0, 0x1, 0x0)


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
__mount50(0x0, &(0x7f0000000080)='./file0\x00', 0xa7f3bfadd97d16bc, &(0x7f00000000c0), 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
r1 = socket(0x18, 0x1, 0x0)
setsockopt(r1, 0x29, 0x31, &(0x7f0000000140)="9513", 0x2)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000000)='overlay\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
lchmod(&(0x7f0000000040)='./file0\x00', 0x0)


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
setrlimit(0x8, &(0x7f00000000c0))
ktrace(&(0x7f0000000000)='./file0\x00', 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
dup3(r1, r0, 0x0)


r0 = compat_30_socket(0x12, 0x2, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50___fstat30(r0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580), 0x1, 0x0)
compat_50_select(0x40, &(0x7f0000000740), 0x0, 0x0, &(0x7f0000000800))


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000180)={<r0=>0xffffffffffffffff})
fcntl$setown(r0, 0x6, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendto(r0, &(0x7f0000000100)="b12bda1976dd1db40252ad5c99f6211a51f02b29618466b52a765ba187ba0135ec92aa465af599e072bdaaf2afde6583bd7ccdb76ed6abb8a712cfef8b1641109f1fc1dbe6104a3d9d674d4be051fdad697b3d72b7a5a87d653479cc767200a661edf8944f7fcb7238ef5e4e550d19d9536bfdec841d1f751d8bface9d65a4b4728ba8d07313e57433a281eaf8c47cfc4dc6ce177219e054dc6c54d0228c5be27833fbcd75a0461dfd2a151d574eeef4b895d3373d415ae728bb843a628d93102169ba9c7a3ed21621c23f74d47bd0b2ad40ccb32f446238a7d586080f960eea7d1c4217d437f29665e39e0da966415b7747a59364283ca09e4b14164062c237c00713286274607b245bc6f3e154393c27f60faab9362d9d8280304b24c364d19bd4617680c00b254b1263a9962f6aac9b19021f9aabca21b869630a879e173afba5ace219d635ef34781f16ffc4060f891ac4afe9e18eff463268e8b5d8cc222297660623119a0cf70be6064db970621bc742a1c46871d58f027493d9f5b0dc2700392fac4db33a13cffd554840781ba8483da60f51ad6c53bc37e7cbe1816d2259b9c14efe3f0aa2302c49269d28a2817ce7cc54d1da8e24c2d3e0533dd05b7871594bc42524fd90c05c1fb9c44d46136a6d4d2acf3ce6ed96dd28b588c7b226d777c18619db16e2333a3da205d361246f73efaf9595e833b0952bcd44af3d0342dd080fd07a06abb4617885a7eeb15a95be68db73cdcbc1b0f827a81374b483eb6b82b914ff9d9581b96fcf006229f277987390b8d03e2a3b0cfa6512b18180c1dcab591b4c10967cc4c4fb3c43a5b1899f2792d96f528f376330aba10eb73a64343a8d06969151f38f02dfd7f31e124dc1c88a8ac6c48253ad6dc14eef5df5c50d9d975d48e4933af523723f796e59f5b8a80558065a4de3bf896b8a51183b6a2324b65a8f4bd3a1071838c88febe95fcc7067f77ba53e1bfda2bc2644898f7911364c1f43137e831129ee6d1723c6765ede69e49b42888a8ffa855e79bfd2bb3be06010436f896f80d6ec31851578f4eee9da676db78336035cc9fde74b95203de98ff5b3921348ceec79bb3867d2d77a1a4a69b7a3c230efc40519b80f1a153239a7ae89ef8e0025cca379c621f54f9e9ac71c4da7731c89c805ddebdf45348e6daa834e3621c948b561a66d28c9255441d395bbea5baa930426bd7403adc5b1019bf25594ad562b4485e4de35ad490eed70baa41912823fb4c643a98ff273c3428896af12d6e5fd439d2bfb8a76f7e4a849617e004ef6447dc28979751305d8d17e3a473bd36cfabe4d7299db33ff190d064559d67e3c8abdf104ebb1834f66ef82010373b025224a5c2d895f50d21ad59fbdf5d315f216ea1f9f4a3f2400a50a2c1c1faf1286dfd73afe7a220710614a310e7c972cc1564cb76e9fa76e31b63de719710fe8af8029faf19f68c87cec91ba6a84d854e729222a4a16b1d92c223e406f6647631e50a889525fcb7a88625c08e49477856b29da556a2becf96c1125b3a2322fdbad592cced93f104fb9233d033f6c81e7b6f515fa10e4716ba62e64114e9c679fbd2318ac146fcf996c2d8d7569be1644a57dd458f999612119ccc8027058399bcbf10eb4286d9b068be654fa59d4da4de3e062564fac7b4b53a23ec49671157d12ba3fba062f2afeb22c6aabed8dd673369da5605c29d33f6739a6149cd6abca0d3e6995e422fdfe4418f17a49c585c65d93e3994c8a7e245e6fb232e7a1880d0b168929980da334dda8aa6232776c92d2104b618b16b58c2b3418a5c15a503934d98e15465988073aa21d3463beb02be5523d503e7b2fe2d9b338b1fd7d025b6610188f416edd27370b8b23dbb2efd1f218d50702113287b568c36c4c531ee70641160cda80a4e1e756eb8c2159b666df0a2229b625d4308ca3b233b7d280cab5a36fd13378b748e5c179cec9f15e988f81283265248ae5cba5d53bfe584c176972204d1e1b8eb1c0b493e44f18cc562494f6eca008b3cf94f991d74990f6a495994109c47776a1a09576d37f80ccc01017e0fd2e0adcefb0b4c85d00813b2c9e7a228a3f91d61d4d55aa61b8b5e92d09c104ad3e731570c187a187295b9c6bc3a1310e94d601760907b913b0e8ef7e5188d812a20d20a90caf27e5aa6e4ad98d905dbdfb664ab5a5383b5dfe97d88cc3f2ca9300f8dcd86d2a241f37d66fd54d2faf6bda9ecc01fdae52f6a6d192f472502fb7da8092ddcc4237a8428ac475b1ca53d381241942e90099d2e9e480df3d52f984ab82eed697253d05fe314265e7090514c91fed501d1d5d1db78f2767f4d7cdb9e7e451e46a2401e83511557de8370b7b92309e124828aeb9d076deb84043c88b790b14fbc2e66735177ff379af5162d02bb0576cf3d2fcd144cb77ad99ab9352ed35e31cd004a7c570e7208ae329c3f56dfea3481d3d2a321b16c6e1aa72d31c23a0b3f0e915571ab9f250b41e18762c1ad979a1571581fa5b03a87f4c26f780ba1022551847cb4f84c66602399142de42cd0cd3d6775a147281e1817632f1b9e6cea6b1ef4c4cd834aa61e5cb2d87afb6fdf620f15ea46ee4bd9dcc6830c8fbf659c32b70afe13a982ede9e0bfb7ac7c47eb5708677a56a1fae1d9685fc2cdfea36b0447701a55b2c3a1db4b9cc6740dd34892978b559a1c4ae5c27b8255a4390f901c70356ce5b9abaeb8d770c670f27c50b89eff647dd9a24e24796be3e0d8dc4b8537a0b94fa0def5de4f95650a30b1dedfc51ae73898fcde6d284d27a19f5ffe1e310a606b4a3c4952d50087dc6caa47dc9dd49d370b7cf80af0b0b5b2819f7e36feef379298a682d3054cac0f237b25274a47dfaa7328548754915041735cda90e", 0x801, 0x0, 0x0, 0x0)


_ksem_open(&(0x7f00000000c0)="d288b3e8457d55eaae499306bf8e60156886b8656437cbb01a021a0750ebe885cf417333d0daa88f48600d2290699d70067f946d124794c6b82fda6b8a649eeda3c721b28726b590e43cb7a522b31b017d70f426880e449bb65d564e0d710db85813aaf5ecf4c8b42a6185c4738cf82ff08f738fca662a0c31dc8fe6f87c307ccdac4e757595361505e4ab781b7ccf13fb35bcf0deccbc66158bae150f472eb4b8bec0fe58825fb749bf24fb48092e4568f82cbb5ebbc892e627765619464f3fb903a8d87a946f674ffcd3ded346299e324b4145bffcac56e7187097a36a73f58b4961cda3b4c0996d6ce04f304cc9fac1ee9d1bab87d8b4c8c1b51b9f069bc14618e20106bad95c1553c0fd689c8f218dc0372aa60512ff2dd43c5fb481cec5ae8034a2731497d78809852a93b6688a651312c3b09afc81cad136392638ade0aa64a8f61dd9bfc51cd3176f0d34da176bdfad36d96346c89691b06f879393b52583b3de9461c580be65a52da3d35842cd3bc508a3392c26955d42426cb5c333af3c164eee31992d709583da4fd298413e77509bc7b28eedf28677ad8302edc4cc3b4a2f614d2bf45630c6e81239c7272260c44298db6ac9bea7016c124f40496a8bf7cf737e20cf2c2b9d2170dca35016015281077f02ef5288ab57a8f8f4f3221bae48f30559df14cfdbd3d338c8551d1062b69b4ca92f3d443aa2aabf2e", 0x0, 0x0, 0x0, 0x0)


mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0xc, &(0x7f0000000140)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
setsockopt(r0, 0x80000000000029, 0xc, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x7, r0, &(0x7f0000000000), 0x0)


openat$wscons(0xffffffffffffff9c, &(0x7f0000000000), 0x814b2f1e9777094e, 0x0)


execve(&(0x7f0000000f40)='./file0\x00', 0x0, &(0x7f00000012c0)=[&(0x7f00000010c0)='@&-\x00'])


ioctl$WSMUXIO_INJECTEVENT(0xffffffffffffffff, 0x80185760, &(0x7f0000000000)={0x0, 0x0, {0x0, 0x10000000000001}})
fcntl$lock(0xffffffffffffffff, 0x0, &(0x7f00000000c0)={0x0, 0x0, 0x3ffffffffffe})
connect$unix(0xffffffffffffffff, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
close(r0)
r1 = socket(0x18, 0x1, 0x0)
close(r1)
socket(0x400000000018, 0x3, 0x0)
setsockopt(r1, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)
write(r1, 0x0, 0x0)


r0 = socket(0x1, 0x5, 0x0)
close(r0)
r1 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(0xffffffffffffffff, 0x0, 0x0, &(0x7f0000000000)="0100a56700009eff", 0x8)
connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="82029d9cffffff"], 0x1)
connect(r0, &(0x7f0000000000)=ANY=[], 0x10)
writev(r1, &(0x7f0000001080)=[{&(0x7f0000000040)="6f8678ae0af456c40c63d514fe494d269e0d5c54251c4198095b039b7c2d9d42749b933c91bc7896f0b31d5c789d6c3428863dbcae931862d8f961a38ba5e018201c9ddd8c9026e55332ee51f8e1d3e7011c084a14cdf9e42ac33d8f8d229669176b6b16848b74b1ed9f0e611ae9409afee3fb7789a415adc93f946f2a46ec386dfb06c4ac24da8bdefce43534de9e9bfffe99a344e3cae8b0df79415fa416aa72d1c1e5f6cde05e2fcac013b0bc8006362245c5e2702a01455eb5c4b5ab860a2567739b5c1a8e6c0d5c4a5493f969d8c6739eadfe32725de5b9dc37d590462550bb7c47d451f67c0658602021fc95a13d008bd78dd92312a04a9d9f96fe71d76c39c860e5ee6069484c8840ae35eafdab9d027b1c286948948c2fa55f32ed7f7a615b8a41fcf6109d42449adedc90a2dfd381d5b242b0527e77003419ed355f9c10d53c8ce75f9878673c80be42ffd6e7b846f5117bc2f62aefa9a28c3e46b1c9b6de7357efdc5adaee0c937c823969f40e9f03300ea0f0934eac81fd50b82f2c016cd58981f77e7735f65df7fc11afb12df7ce63179c3fd0936bccfedab4e4465b78ab33d4f0fdca5b83b950079f16a6c7607635e187f51d53828c2a4d46208828ec17ad32df903725283cb19363d8d6a5aa52267880efb55fc8a447126377675ece231bea44e8cf9cf5c2caf23386412f02924fe082eb0319bf86cbe951ab916b19d99bfe092fba287cfdfb166d0fca8a23afd61c299fde3a944bcc0a5075760b608fd3993acfcb1cff3554a8c53822ea66a367fc177a9f35cb7394e8cac7759809d7c98a9b6de0198a17f48431e370a183d0bb195596cfca7aaf32ec0b6e5d5f6751e9683fce5a0004a8ecd965e5be250a04909887e5b6de8988c01ac4ad05884b31bfa9a591a4b1ab241e6a84e1b56abd1b68a2e4d078d9d98103d81531cc4e0a835f8afcef71bfe563add1c1b5014ea58c792db81ac107ea11dce671b78112dee501aa9275952cb49d756ae84504849f75bcc3dfa9cfcb9552829ecbd0d4dd69577700045feb9a95fc83e5ae637500021206e5d3f5c849dd5566e19ca6570315f95f303eb1b6c42c70434bef87ced630fc4ac6ff4e8e70ed32a9822193773e257fca658836fc288e274535e216e914b95ec145f61f89a854d4c69acd15f39e313af45d791dd2ff55ed702a8908c8016f96b1ec9d3bb87eb82116665b9e1c390168bf1c829de3570a29e51de9b70c5fdb837952b1d45b89a563dada59df471aaf4b0187a92427fa79686551bda380875dfe7370fb2308a4e5c41254ca21f28ba64a02c83a5c37b45997c302affcea12431cff228027df1a6271a1f4838a6be0c2fc7b56889309cd9b6793155066c620160ff3a29b25c342adb2ed2e991446715928f7b44156dec5298c4f944d7dfdcbaccd6f133fd4f2576694d637caf5beedac35e6f7e29e504b022d5c06f7436db8794bdae6265c05395057bd896c78afbaeede2e2a573774b7ac9fc50c5bf83821904bf74a543e96c4cdd63aa8ac4a74b738d449062ec6bb7e6b1fc7fa210f45f630811b77497c37928620d6c2d61a73e2db0b95bb1c5864357cb3388375262840e8d885e8bc60d877a2d73335935fc36ceeaf1cb3227df15abffa0ca0127ccaadbe174818014859f36e0d503018333c8a5e27880ededb3d93bcda4844373ac2755c6263585e0926b2648c84cf710c5c51642d853ff1db28e3c34e4b07da42af76e55295033b3e7293555b8158c355ee1251d6b898333fd66e66cc1c720f4f7e048cc2cadb84ba30b8462c6564c51acc074f0b576c03b193d7ba146597edcc44647926639f12a3a53b5dde14cdbd79564c9d7bf56c5cbb89b9d6c5c2a5ce70fc4f30aaa10dd32603090a2f3d91dc2fd5a279084bfba93fd82cc050bbe90b224b76ca3fb91dfbf60f99d4d20064a078135aef90b15928ff406402c96cc54aa4f0a3df26d2940aabc8516725365917ec644dc0b33d0d1fb889669fe80c164b7c747ccbb0f513d3ac26452e8594d9b5061afce8b274c136d448afe0df37da38e134ac201014264a965af5b5d240a4e25473", 0x5c1}], 0x1)


fcntl$lock(0xffffffffffffffff, 0x0, &(0x7f00000000c0)={0x0, 0x0, 0x3ffffffffffe})
connect$unix(0xffffffffffffffff, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
close(r0)
r1 = socket(0x18, 0x1, 0x0)
socket(0x0, 0x0, 0x0)
setsockopt(r1, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)


pipe(&(0x7f0000000200)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
close(r0)
fcntl$dupfd(r1, 0x0, r1)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x4030647d, &(0x7f0000000040))


ioctl$WSDISPLAYIO_GMODE(0xffffffffffffffff, 0x4004574b, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f00000000c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


posix_spawn(0x0, &(0x7f0000000000)='\x00', &(0x7f0000000080)={0x0, 0x0, 0x0}, &(0x7f00000000c0), 0x0, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)
compat_50_wait4(0xffffffffffffffff, 0x0, 0x0, 0x0)
ptrace(0xd, r0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
__select50(0x0, 0x0, 0x0, 0x0, &(0x7f0000000240))


r0 = compat_30_socket(0x22, 0x3, 0x0)
sendmsg$unix(r0, &(0x7f00000003c0)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000380)=[@rights={0x10}], 0x10}, 0x5)


r0 = socket(0x1f, 0x1, 0x0)
setsockopt$inet6_MRT6_ADD_MFC(r0, 0x3, 0x3, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_90_fhstatvfs1(&(0x7f0000000080)="9fbac749d30358d95bee98118cc6283fd9d9b308dc3cd7dd8ac7b0f1d5", 0x1d, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0xc0106371, &(0x7f0000000180))


mprotect(&(0x7f0000ff5000/0x9000)=nil, 0x9000, 0x2000000)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__msync13(&(0x7f00001fe000/0x2000)=nil, 0x0, 0x2)


r0 = openat(0xffffffffffffffff, &(0x7f0000000100)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__fstat50(0xffffffffffffffff, 0x0)


r0 = socket(0x2, 0x2, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x10, &(0x7f0000000040)=0x38, 0x4)
connect$unix(r0, &(0x7f0000000000)=ANY=[@ANYBLOB="8b0221"], 0x10)


socketpair$unix(0x1, 0x1, 0x0, 0x0)


mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0xc, &(0x7f0000000140)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
setsockopt(r0, 0x29, 0xd, &(0x7f0000000140)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x31, 0x0, 0x0)
getsockopt(r0, 0x29, 0x3e, 0x0, 0x0)


r0 = socket(0x2, 0x1, 0x0)
r1 = socket(0x2, 0x1, 0x0)
setsockopt(r1, 0x6, 0x4, &(0x7f0000003740)="125637b3", 0x4)
connect$unix(r1, &(0x7f0000000000)=@file={0xbd5699bc1ec0282, './file0\x00'}, 0x10)
dup2(r1, r0)
connect$unix(r0, &(0x7f0000000000)=@file={0xbd5699bc1ec0282, './file0\x00'}, 0x10)


syz_usb_connect$uac1(0x0, 0xaa, &(0x7f00000004c0)={{0x12, 0x1, 0x110, 0x0, 0x0, 0x0, 0x10, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x98, 0x3, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{}, [@extension_unit={0xd, 0x24, 0x8, 0x4, 0x0, 0x0, "ae02f128ef4d"}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x10, 0x3, 0x84, 0x7f, {0x7, 0x25, 0x1, 0x0, 0x3, 0xe4f}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@format_type_i_continuous={0xb, 0x24, 0x2, 0x1, 0x1, 0x4, 0x3, 0x6, "", "a74a9d"}, @format_type_i_continuous={0xb, 0x24, 0x2, 0x1, 0x80, 0x0, 0x3, 0x7, "efbf58"}, @format_type_ii_discrete={0xf, 0x24, 0x2, 0x2, 0xffff, 0x0, 0x6, "318893a20da0"}, @as_header={0x7, 0x24, 0x1, 0xff, 0x4, 0x6}]}, {{0x9, 0x5, 0x82, 0x9, 0x40, 0x80, 0x5, 0x5, {0x7}}}}}}}]}}, &(0x7f00000006c0)={0xa, &(0x7f0000000300)={0xa, 0x6, 0x310, 0x0, 0xf7, 0x5, 0x0, 0xff}, 0x12, &(0x7f0000000580)={0x5, 0xf, 0x12, 0x3, [@generic={0x3}, @ext_cap={0x7, 0x10, 0x2, 0x0, 0x9, 0x8, 0x6}, @ptm_cap={0x3}]}, 0x2, [{0x0, 0x0}, {0x4, &(0x7f0000000680)=@lang_id={0x4, 0x3, 0x2801}}]})


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = _lwp_self()
_lwp_continue(r2)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='kernfs\x00', &(0x7f0000000100)='./file0/../file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
unmount(&(0x7f0000000000)='./file0/../file0\x00', 0x80000)
preadv(r0, &(0x7f0000000540)=[{0x0}], 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50___msgctl13$IPC_SET(0xffffffffffffffff, 0x1, &(0x7f0000000280)={{}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


poll(&(0x7f0000000000), 0x6, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
dup(0xffffffffffffffff)


symlinkat(&(0x7f0000000580)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00')
open$dir(&(0x7f0000000100)='./file0\x00', 0x8240, 0x0)
r0 = getuid()
fchownat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', r0, 0x0, 0x0)


posix_spawn(0x0, &(0x7f0000000400)='/d///wscons\x00', 0x0, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = compat_30_socket(0x22, 0x3, 0x0)
writev(r3, &(0x7f0000000580)=[{0x0}], 0x1)


r0 = fcntl$dupfd(0xffffffffffffff9c, 0xb, 0xffffffffffffffff)
fcntl$setown(r0, 0x5, 0xffffffffffffffff)


openat$hdaudio(0xffffffffffffff9c, &(0x7f0000000140), 0x0, 0x0)


r0 = socket$unix(0x1, 0x1, 0x0)
bind$unix(r0, &(0x7f0000003000)=@file={0x1, '\xe9\x1fq\x89Y\x1e\x923aK\x00'}, 0xc)
listen(r0, 0x0)
listen(r0, 0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
setsockopt(r0, 0x0, 0x2, &(0x7f00000000c0)='\x00\x00\x00\x00', 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$inet(0x2, 0x2, 0x0)
r2 = dup3(r1, r1, 0x0)
getsockname$unix(r2, &(0x7f0000000740)=@abs, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='fdesc\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)
compat_50_lutimes(&(0x7f0000000300)='./file0\x00', 0x0)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
fcntl$lock(r0, 0xf, &(0x7f00000002c0))


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000340)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f00000011c0)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
unlink(&(0x7f0000000180)='./file0\x00')
pathconf(&(0x7f0000000080)='./file0\x00', 0x3)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000001580), 0x10000011, 0x0)


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
__getfh30(&(0x7f00000001c0)='./file0\x00', &(0x7f0000000140)=""/34, &(0x7f0000000240)=0x8001)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x20004268, 0x0)


socket(0x10, 0x0, 0x61e)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x2b1, 0x0)
mmap(&(0x7f0000ffc000/0x3000)=nil, 0x3000, 0x0, 0x2011, r0, 0x0, 0x0)
r1 = dup(r0)
mmap(&(0x7f0000ffa000/0x4000)=nil, 0x4000, 0x0, 0x2811, r1, 0x0, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000200)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)='2', 0x1)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_30_getdents(r0, &(0x7f0000000240)=""/4096, 0x3f)


r0 = socket$unix(0x1, 0x1, 0x0)
pwritev(r0, &(0x7f0000001480)=[{0x0}], 0x1, 0x0)


r0 = _lwp_self()
_lwp_unpark_all(&(0x7f00000002c0)=[r0, 0xffffffffffffffff], 0x2, 0x0)


socket$inet(0x2, 0x5, 0x106)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_30___fhstat30(0xffffffffffffffff, 0x0)


fork()
__clone(0x100, 0x0)
compat_50_wait4(0x0, 0x0, 0x1, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x5)
setsockopt$sock_int(r0, 0xffff, 0x1003, &(0x7f0000000340)=0x5, 0x4)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202021a24"], 0x1)
r0 = socket(0x2, 0x2, 0x0)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f00000000c0)="071400000f370025ff149b33ff0f000000000000", 0x14)
writev(r0, &(0x7f0000000380)=[{&(0x7f0000000100)="0f378be9063a37e1de4c610a219accfe445eabf530ee3180ed80c15299c2e24977f8029539b3a4888ec89e7f26dbdceacba871725a52ce2ee57a6e662ec40df1ef66c133cab96cd7d55bfd6b5386396741a5a0f9883c960fc06c535fa9aae374903c75289ff23c74cc8d4f77accfd439f393a67c3cf14a796f2d8ef2a76bf0bf98cdf310c11c542a99206c7eee90ab72ce3ab0090314328c1fcea7cb000f80e033cc35f1f32ec9e23e9f4a80fc049b5ace5f895812c149c50926b3139ddb2aec540372c51873c98ba4c6d0d25e06104665b637b3b0f09bdf455fea749f2dbfdffe08067750beb4ec5465e0d2420b2e", 0xef}, {&(0x7f0000000240)="17d7177828d37c00f1e1abed97a75a10313f9ffce03707fda6a8ad6e137958448fc173b3c023fc3fb049f16c341174a83bbd5ec85164a53c1eced994e2e96165bdde83dab4ea53713129a2b26dcad2c75c010999913aa6532c46a6d85f0050fbf86d95ea039688a85a6577674be340dea29c3254f17762136960d0f6155b22c7", 0x80}, {&(0x7f00000002c0)="b320", 0x2}], 0x3)


__clone(0x0, 0x0)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
__wait450(r1, 0x0, 0x0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
preadv(r0, &(0x7f00000001c0)=[{&(0x7f0000001140)=""/4108, 0x100c}, {&(0x7f0000000100)=""/149, 0x95}], 0x2, 0x0)


poll(0x0, 0x0, 0xfffffc00)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(0xffffffffffffffff, &(0x7f0000001580)={&(0x7f0000000040)=@abs={0x0, 0x0, 0x3}, 0x8, &(0x7f0000001480)=[{&(0x7f0000000080)="62e0d6682ae540af245c6a7d1882208d", 0x10}], 0x1}, 0x0)
recvmmsg(r0, &(0x7f0000001500)={0x0}, 0x10, 0x0, 0x0)


r0 = open$dir(&(0x7f0000000240)='./file0\x00', 0x40000400000002c2, 0x0)
pwritev(r0, &(0x7f0000000080)=[{&(0x7f00000006c0), 0x100000}], 0x1, 0x0)
rename(&(0x7f0000000100)='./file0\x00', &(0x7f0000000140)='./file0\x00')
open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)


setrlimit(0x3, &(0x7f00000000c0)={0x2001, 0x100000})


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendto$unix(r0, 0x0, 0x0, 0x0, 0x0, 0x0)
recvmmsg(r1, &(0x7f0000000500)={&(0x7f0000000000)={0x0, 0x0, 0x0, 0x0, 0x0}}, 0x10, 0x0, 0x0)


pipe2(&(0x7f0000000080)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
getpeername$unix(r0, 0x0, 0x0)


r0 = socket$inet6(0x18, 0x1, 0x0)
shutdown(r0, 0x2)
shutdown(r0, 0x1)


r0 = msgget$private(0x0, 0x0)
msgrcv(r0, &(0x7f0000000180)=ANY=[], 0x9a, 0xd6d0c418f59fe7d3, 0x1000)
msgsnd(r0, &(0x7f0000000140)=ANY=[@ANYRES16], 0x104, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = socket(0x22, 0x3, 0x0)
compat_43_ogetpeername(r2, &(0x7f0000000000)=""/78, &(0x7f0000000080)=0x4e)


mknod(&(0x7f00000001c0)='./bus\x00', 0x80002005, 0x5300)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc01c5005, &(0x7f0000000000))


pipe(&(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f00000028c0)={0x0, 0x0, &(0x7f00000026c0)=[{0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}], 0x9}, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x1203)
open(0x0, 0x0, 0x0)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = dup(r0)
r2 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_SETPARAM(r1, 0xc0205753, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
r1 = socket(0x18, 0x3, 0x0)
setsockopt(r1, 0x29, 0x19, &(0x7f00000001c0)="65ea174f977310c6d2d6878f50da7f98", 0x10)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x2)
accept$inet6(r0, &(0x7f0000000040), &(0x7f0000000080)=0xc)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
recvmsg(r1, &(0x7f0000000280)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x402)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={<r2=>0xffffffffffffffff})
sendmsg$unix(r2, &(0x7f00000003c0)={0x0, 0x0, 0x0}, 0x0)
pipe(&(0x7f0000000400)={<r3=>0xffffffffffffffff})
r4 = getpid()
fktrace(r3, 0x0, 0x4, r4)
r5 = msgget$private(0x0, 0x10)
compat_50___msgctl13$IPC_STAT(r5, 0x2, &(0x7f0000000280)={{}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000180)={0x0, 0x5, 0x5, 0x6}, &(0x7f0000000240)={&(0x7f0000000200)={&(0x7f00000001c0)={0x0, 0x7, 0x2, 0x80}, 0x8, 0x2, 0x2}, 0x8, 0x8, 0x9}})


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
setuid(0xee01)
__utimes50(&(0x7f0000000300)='./file0\x00', 0x0)


open$dir(&(0x7f0000000040)='./file1\x00', 0x0, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f0000000140)='./file1\x00', 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getppid()
pipe(&(0x7f00000003c0)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = msgget$private(0x0, 0xfffffffffffffffd)
msgrcv(r3, &(0x7f0000000180)={0x0, ""/22}, 0x1e, 0x1, 0x1000)
msgsnd(r3, &(0x7f00000004c0)=ANY=[@ANYBLOB="01000000000000006a7715654efe871cafd76aa1f42cea9f4002f1cfc714017cad389c4f76700e21ea2cdcd9cd825e18cbc6470b2622c5d92cd672bf8af95a41ad28e68af18951239c57fb9ec635488cb9cdf45811b290efaf3c5d518de65835619e637d97761a0476f6b0f1f4a14c6523e5fef1ea452fcd38aed8a4fce400fc7c831a7418f363bca3f999e1f5141a3898cc8d3f3f4bb04b731cb084542e16c2eb931a11c92b9877722265eba7f8ec5212bfc96c9017d3f1f5950f2778a9332a6ef5d465a8bf45859f98096837403c39fb94a49795064eb7df0f8232e9f09cc6b60efe3019c871e77e39eab0797c4745f7417153a85fbd1eae7234ca5a8336731f0f310ae68292593d71cde4de0caf5bcaea9da4822395e093946eeba13d65777d075ab35c09b2faaa910fcedbaea66e00000000d631d26bc07aea019d580900"], 0x149, 0x0)


compat_43_ommap(&(0x7f00001af000/0x3000)=nil, 0xfffffffffffff002, 0x0, 0x0, 0xffffffffffffffff, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0xb125)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = dup(r0)
ioctl$WSDISPLAYIO_DELSCREEN(r1, 0x8008574f, &(0x7f0000000080))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_30_getdents(0xffffffffffffffff, 0x0, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
fcntl$dupfd(r0, 0xda0994c69e06c3ee, 0xffffffffffffffff)


pipe2(&(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
r2 = dup2(r0, r1)
ioctl$FIONWRITE(r2, 0x40046679, &(0x7f0000000000))


pipe(&(0x7f0000000f40)={<r0=>0xffffffffffffffff})
write(r0, 0x0, 0x19)


r0 = shmget$private(0x0, 0x4000, 0x0, &(0x7f0000ffc000/0x4000)=nil)
shmctl$SHM_LOCK(r0, 0x3)
shmctl$SHM_UNLOCK(r0, 0x4)
syz_usb_connect$uac1(0x0, 0x88, &(0x7f00000000c0)=ANY=[], 0x0)
compat_50___shmctl13$SHM_LOCK(r0, 0x3)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
truncate(&(0x7f0000000040)='./file0\x00', 0x0, 0x4)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pwrite(r0, &(0x7f0000000300)='L', 0x1, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
lchflags(&(0x7f0000000080)='./file0\x00', 0x30009)
rmdir(&(0x7f0000001c80)='./file0\x00')


mknod(&(0x7f0000000000)='./bus\x00', 0x800080002002, 0x3d00)
open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
open$dir(&(0x7f0000000080)='./bus\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_30_fhstat(&(0x7f0000000180)={{}, {0x0, 0x0, "898a79351881b49f38c546c309f314a1"}}, 0x0)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0, 0x2e}], 0x1, 0x0)
flock(r0, 0x8)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSMOUSEIO_SETREPEAT(r0, 0x8004667e, &(0x7f0000000600))


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x3b00)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x20005100, 0x0)


munlock(&(0x7f0000ffc000/0x3000)=nil, 0x3000)
mlock(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
minherit(&(0x7f0000ffa000/0x4000)=nil, 0x4000, 0x0)
munmap(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
mlock(&(0x7f0000ffa000/0x1000)=nil, 0x1000)


compat_50_setitimer(0x0, &(0x7f0000000000)={{}, {0x200000000020}}, 0x0)
__setitimer50(0x0, 0x0, &(0x7f00000000c0))


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
r2 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(r0, 0x0, 0x4, r1)
_lwp_setname(0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r3 = compat_30_socket(0x1d, 0x3, 0x0)
ioctl$OFIOGETBMAP(r3, 0xc004667a, &(0x7f0000000000))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
compat_50_utimes(0x0, &(0x7f0000000180))


socketpair(0x1, 0x20000001, 0x0, &(0x7f0000000000))
recvmsg(0xffffffffffffffff, &(0x7f00000020c0)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
connect$inet6(0xffffffffffffffff, &(0x7f0000000040)={0x18, 0x3}, 0x1c)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000240)={<r1=>0xffffffffffffffff})
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
writev(r1, &(0x7f0000001540)=[{0x0}, {0x0}], 0x2)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000000400)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETSCROLL(r0, 0x802c6300, &(0x7f0000000080)={0x0, 0x1})


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
pipe2(&(0x7f0000000100)={<r1=>0xffffffffffffffff}, 0x0)
__fstat50(r1, &(0x7f0000000140))


r0 = socket$unix(0x1, 0x5, 0x0)
bind$unix(r0, &(0x7f0000000140)=ANY=[@ANYBLOB="01002e1e"], 0xa)


rasctl(0x0, 0x1, 0x0)
rasctl(&(0x7f0000000200), 0x98, 0x0)
rasctl(0x0, 0x1, 0x1)


mknodat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x1000, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
poll(&(0x7f0000000000)=[{r0}], 0x1, 0x0)
writev(r0, &(0x7f00000005c0)=[{&(0x7f0000000340)='\n', 0xfffffc73}], 0x1)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0xc0104302, &(0x7f0000000100))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = geteuid()
lchown(&(0x7f0000000000)='./file0\x00', r1, 0xffffffffffffffff)
openat(0xffffffffffffffff, &(0x7f0000000200)='/proc/self/exe\x00', 0x0, 0x0)
r2 = semget$private(0x0, 0x4, 0x24)
ptrace(0x6, 0x0, &(0x7f0000000180), 0x0)
semctl$IPC_SET(r2, 0x0, 0x1, &(0x7f0000000100)={{0x8000, r1, 0x0, r1, 0x0, 0x80, 0x2df}, 0xfc0, 0x8, 0x3})
__fhstat50(&(0x7f0000000080)="bf44938cbe8ec69d735e4a8f88441615071d84ff772a265ec007144d5a2050ec7f7e7609c1f4dc7904ef22d43ed7a3650d7db454980af72510621b4b", 0x3c, &(0x7f00000000c0)={0x0, 0x0, 0x0, 0x0, 0x0, <r3=>0x0})
__posix_lchown(0xffffffffffffffff, r1, r3)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000001600)={<r0=>0xffffffffffffffff})
getsockopt$sock_int(r0, 0xffff, 0x400, 0x0, 0x0)


syz_usb_connect$uac1(0x0, 0x8d, &(0x7f00000004c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x10, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x7b, 0x3, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{}, [@extension_unit={0x7}, @extension_unit={0xc, 0x24, 0x8, 0x0, 0x0, 0x0, "b7b75cf675"}, @processing_unit={0x9, 0x24, 0x7, 0x0, 0x0, 0x0, "2e9c"}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x3, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, 0x0)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)
fdatasync(0xffffffffffffffff)
fork()


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x9, &(0x7f0000000000)="0100f1c40000000002000000", 0xc)
setsockopt$inet_opts(r0, 0x0, 0x200000000000a, &(0x7f0000000000), 0x0)


compat_30_fhopen(&(0x7f0000000040)={{}, {0x0, 0x0, "718cfb4c4924d911955605536145e787"}}, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
lseek(r0, 0x0, 0x5, 0x1)


openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
modctl$MODCTL_UNLOAD(0x2, &(0x7f0000000740))


compat_43_orecvmsg(0xffffffffffffffff, &(0x7f0000000640)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
dup2(r0, r1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
r1 = open(&(0x7f0000000200)='./file0\x00', 0x0, 0x0)
fcntl$lock(r1, 0xf, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001700)=[{0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}], 0x9, 0x0)


sendmsg$unix(0xffffffffffffffff, &(0x7f0000001700)={&(0x7f0000000100)=ANY=[@ANYBLOB="fd1812c9e06812e3ff"], 0x1c, 0x0}, 0x0)
r0 = socket(0x18, 0x1, 0x0)
connect$unix(0xffffffffffffffff, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
close(r0)
r1 = socket(0x18, 0x400000002, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)
sendmsg$unix(r1, &(0x7f0000001700)={0x0, 0x0, 0x0}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fchown(0xffffffffffffffff, 0x0, 0x0)


mkdir(&(0x7f0000000480)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', 0x0)
__mount50(&(0x7f00000000c0)='kernfs\x00', &(0x7f0000000040)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', 0x5e3b260537802ab5, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
fchdir(r0)


syz_usb_connect$cdc_ncm(0x0, 0x7b, &(0x7f0000000000)=ANY=[@ANYBLOB="12010001020000082505a1a440000102030109026900020189b4040904000001020d"], 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x1f, 0x5, 0x0)
r2 = socket(0x1f, 0x5, 0x2)
dup2(r1, r2)


mknod(&(0x7f0000000040)='./file0\x00', 0x1000, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
poll(&(0x7f0000000000)=[{r0}], 0x1, 0x3)


setuid(0xffffffffffffffff)
chdir(&(0x7f0000000000)='.\x00')
getsockopt$SO_PEERCRED(0xffffffffffffff9c, 0xffff, 0x1022, &(0x7f0000000040), 0x65)
msgget$private(0x0, 0xf6)
msgget$private(0x0, 0x2000000186)
r0 = msgget$private(0x0, 0x2000000186)
acct(&(0x7f0000000080)='./file1\x00')
r1 = msgget$private(0x0, 0x400)
msgctl$IPC_SET(r1, 0x1, &(0x7f0000000240)={{0x0, 0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0, 0x4}, 0xf9, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7})
msgrcv(r1, &(0x7f0000000200)={0x0, ""/98}, 0x6a, 0x1, 0x1800)
msgctl$IPC_SET(r0, 0x1, &(0x7f0000000f80)={{0x2, 0x0, 0x0, 0x0, 0x0, 0x18, 0x2f9f}, 0x6, 0x1, 0xffffffffffffffff, 0x0, 0x40, 0x2000000000000009, 0x8001, 0x2})
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000100)={0xffffffffffffffff, <r2=>0xffffffffffffffff})
getsockopt$sock_cred(r2, 0xffff, 0x1022, &(0x7f0000000140)={0x0, <r3=>0x0}, &(0x7f00000001c0)=0xc)
setreuid(0x0, r3)
msgctl$IPC_RMID(r0, 0x0)


pipe(&(0x7f0000000180)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x2, r1)
socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000200)={<r2=>0xffffffffffffffff})
getpeername$unix(r2, 0x0, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(r0, 0x6, 0x6, &(0x7f0000000040), 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mknodat(r0, &(0x7f0000000100)='./file0\x00', 0x1000, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='tmpfs\x00', &(0x7f0000000080)='./file0\x00', 0x12000512, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x1, 0x0)
ioctl$OFIOGETBMAP(r0, 0x8004667c, &(0x7f0000000000))


r0 = open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
writev(r0, &(0x7f00000000c0)=[{&(0x7f0000000140)='M', 0x1}], 0x1)
mmap(&(0x7f0000000000/0x1000)=nil, 0x1000, 0x5, 0x10, r0, 0x0, 0x0)
pwrite(r0, &(0x7f0000000100)='e', 0x100000, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='kernfs\x00', &(0x7f00000000c0)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_30_getdents(r0, &(0x7f0000000180)=""/4099, 0x101)


pipe(&(0x7f0000001200)={<r0=>0xffffffffffffffff})
fktrace(r0, 0x4, 0x80, 0xffffffffffffffff)


r0 = open$dir(&(0x7f00000000c0)='.\x00', 0x0, 0x0)
close(r0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047476, &(0x7f0000000140))


ioctl$WSMOUSEIO_SCALIBCOORDS(0xffffffffffffffff, 0x81205724, &(0x7f0000000180)={0x1ff, 0x0, 0x0, 0x0, 0x10})
r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x0, 0x80000000000000c, &(0x7f0000000180), 0x14)


compat_43_ocreat(&(0x7f00000000c0)='./file0\x00', 0x0)
__mount50(0x0, &(0x7f0000000080)='./file0\x00', 0x400000, &(0x7f0000000100), 0x0)


mknod(&(0x7f00000006c0)='./file0\x00', 0x2000, 0x200028bf)
pathconf(&(0x7f0000000040)='./file0\x00', 0x11)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
_lwp_create(0x0, 0x0, 0x0)


__clone(0x0, &(0x7f0000000100))
r0 = fork()
compat_50_wait4(r0, 0x0, 0x0, 0x0)


compat_20_getfsstat(&(0x7f0000000080), 0x138, 0x3)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x8010427a, &(0x7f0000000040))


setsockopt(0xffffffffffffffff, 0x0, 0x0, &(0x7f0000000080)="bca20c89d4f9d26afae93dcda67200f4f8e9d6d172b1a75dc30a82bf8df5936d6bc398f4e0d81ed8ea7128ba2a7ad9b48bad69372eb52bd591cf105c694e11dc175df77553a86ac6a4f08ef8771d823122a13c09f0e3c1b950fba5d28c1fd8b5ac2fe47bf6ad07b662de6fccdbc85a1bfabcfce010a3b916914f4f6e232ec17d036e56406835dd16fdd5606ea2c7b43ce949af90e522aea8b253f9f8accaa8c167cf42268709dbd2fc9d528253aeeadcb81956b1d872b6f54475b9a56aef242204fa6d66e680ad590f258c5c38adb7f0536a61d0e5f6e22de866322fcda6b3b0247c095ff061d3c1447f5ab4bc333e509e484c4cb01a833ff739e8317c3b9630d78948ce17d9a18f952f149f092914043000eaf74f2aa865ca01c2b4b7d63d78dbd364ab8bc4e8f933c4b994e18e95605d29b88e411ae9e247822ea77971a4e37c507eebb713344379a439176c9632eac2cd344d1ceba217c27e1316f76cbfebbbff5bb81f04edc766d09e9dd966ea5796c7022e6218b887e69f2233f5c9f531dcc1414b61da2409051c83f279d60308128b7cdc9da846c94aee77ee46af5e09c49e8b1968cf357c0b6366cf5df61af9f5c26c7898af983f1bada6cb56d7419ca54ce9677343142ce453b8d538858533819833a8af3c9580cfa68fa891f1410a3fb41d54e767cc0b547f6dcd5dde743bd2d71e9039ab7a22e7d5938b459198a9fe31185b5c3da676303e20c64f04eb98571e2e4742c0f5f3b604a334e9b174916921ab4b0278f4379cee1aaf27e098eda7356a6817c44cdb8ca71d14607ec0b96eb6a7886efbad08fe044cd168b6791c8bdb9b55e3b597c4a527a540dddb72ee8cce570846ef559d01ff4cb988f861b8d491d1ce69c761b1d1e98f2b936dd3551b2fbce20780f1a2f80f594daece7e81df45f86f008cafb6d91579ed408ab31b650900cf85076eebbad9e6ee0cc9e081aa01a87bfd29898678b136f2ee36abc628d93d6326a066fb1e8a7a8293b0f23c58a12dfa0a2d5aa57c33c1db03fa0388a1d8d89bdd8c5292563015326f452e2b54ef44027796107f5f4d771a97059fdb0b15fea69a147d77bf3a57e6392f6d50c355e3f1cd1232ebebeae38bb67326f092c58515c7ef1904194260dce1a1a1c8fbcbf19041e4a6dc9dce1de856da91bc6ab1945acaeb86905ae65cda479ece5f02ba0257a2f23ba442e636c80dffe750a5d3aab22e84452df4bac6af6af1e0a3ef77c6a632ee541f9fc4b30390c6703c8b383c5d9252c195768b7e71ba29cd317fd3bdfdfcf163382527b6d2be52b1fad15d31b08335cf295a122de584727575e999561dbb281f306acdefde5bccb49b8106bac880ebf1ef56164e0f59d98d428e2a089f7bde284d9a40bd6f4b89e1cefe9aec8ed42f365fc200263ab2070215131db4764424569d80cd1f5ce1ce4ef2cadf08add2ad121be3c51cd510b2fe959d788d9655335498256605aebe7bcc129dacbc5634e23f173ea4bf1b103ba6a888c56d34525c6fe9aba8ebf2cd23abe07c191fd9f80ef0fa5c0b9c7bc6ca1663938842cd05450e6d66ed72a16be048cd911a165f9549686ce262ad86ccf8973e6ed41bec732378fb3197efb060de8349c49ac3c81f2725f10eba73dbe428b31771f6a15422df2d4e3d949fdef07be23be7275c2bf9a9fa623d71f81f89376f3dd66db314651bf9755000520ac1f661024440a29c0c10c52130b68777d6bc84892d53d4238fe29cc3d9e642e27d2c2ea9724a0d10206c81215cf38d6b3dc90ac014f709ec1b84e50f743bee67c56522671f14ed3a7eb20f117666bc2b25c250f9f4725a06d65b338acd19b80b0bbc79276edbdbb337bc2e72239fe9995fb0b825b4caa7c7848e88bc4aff7fc31514c4488060614ac1a5eec51ac1db5a38fb4effcfc11c976f088e4f48d325338066a62178922edce283bdcea8bed4acd1d9300b22740ce472c8345456c6ed64faf3e92903f3eae2dc8c28913d9273d15202a77f14b62a04adc504c3bf2ca2cab575e41dc806516c40a87087359a6ee77cbedae5f6ea7eff1e354e3c48743a051e70871410801fe199aa48ed3017afbdfca19950790ae7d58b612f65e5e4b9173d46f910c26ab167765178134e5757fc0b4b5dae7d2f48090284a5a64812c64cbeefc4dc1e45a78d22eaae4022a12172da3ada0b017532a8629ee279df654517f8da5ed200ae3adaf1c84b8aa01b2911e499ff0752777b56925f09c8b3e10e3423407fb6bfa0587cf6baed4abd713a32ca172ad86535d7a56e17426e93391dbc2986ee18dfd86eee448224721564a836e57ffb449a06298410361ce235ad6c92279eeccdb8decb40c6b2df73f816e2fea0a09b71ef70283c3f4c76d33a298440f4fde32398348f23139ec060ce5ba76994ccd978a60ad356b24e16d774a73cdac7c48b6c000c49134886a1319ef6fa07feb5aff6e92c6d930411a57a841fc80f9c7fea5eea3cd206fcb0533e1f59bf9e3ff15e6a0663bed347a39fb5a7ee364777ed8645d5d95b42b3fde6e21841314f832e2c9a49d15a0da81b60fe4075ace2683b3c1f4ca3123fb828bf132f66dc1bd47c648b25d4d2cabc8e8b2471e2fda63e134271525f889cc3138065bc414c4cfe2e0194adc98d7300dcca6285abd8ff98ed7f49ec536f55587e28b8d54f65437f7a80e38b39de4549780cf18cce70b4f35ea76a1f75ba60403ea93f821e47cb43dfc4a0985fa54fcf4f6f2a7278d9f71264e91bd251bae9e12d1190941584bc3c43fdb2e4b5cbff2ca2ed0833f38f27cf3bed520ad5e7b5da6f1cbe9548530eb4f8cfaee00cf1bbdc7ddaf8f78c5af4fae7dfb2ead30e49202da3e30cf8f01a1143ffff14154180cb0073136ac1237513240f2770", 0x801)


faccessat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0, 0x300)
__lstat50(&(0x7f0000000080)='./file0\x00', 0x0)
open$dir(&(0x7f0000000180)='./file0\x00', 0x0, 0x8)
syz_usb_connect$printer(0x0, 0x36, &(0x7f0000000240)={{0x12, 0x1, 0x310, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0xfa, "", {{{0x9, 0x5, 0x1, 0x2, 0x8, 0x0, 0x0, 0x78}}, [{{0x9, 0x5, 0x82, 0x2, 0x40, 0x1, 0x6, 0xa2}}]}}}]}}]}}, &(0x7f0000000640)={0xa, &(0x7f0000000280)={0xa, 0x6, 0x0, 0x6, 0x6, 0x0, 0x10, 0x6}, 0x23, &(0x7f00000002c0)={0x5, 0xf, 0x23, 0x2, [@ss_cap={0xa, 0x10, 0x3, 0x0, 0xb}, @ss_container_id={0x14, 0x10, 0x4, 0x0, "1793626f2e90a1a348f7b744735d8edb"}]}, 0x2, [{0x0, 0x0}, {0x0, 0x0}]})
linkat(0xffffffffffffffff, &(0x7f00000006c0)='./file0/../file0\x00', 0xffffffffffffffff, 0x0, 0x0)
__stat50(&(0x7f0000000a80)='./file0/../file0\x00', &(0x7f0000000ac0))
sync()
__lstat50(&(0x7f0000000f00)='./file0\x00', &(0x7f0000000f40))
recvfrom$unix(0xffffffffffffffff, 0x0, 0x0, 0x286, &(0x7f0000001200)=@file={0x0, './file0/../file0\x00'}, 0x13)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000340)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, 0x0)


open$dir(&(0x7f0000000080)='./file1\x00', 0x0, 0x0)
connect$unix(0xffffffffffffffff, &(0x7f0000000180)=ANY=[@ANYBLOB="0371002e2f6669"], 0xa)
sendto$unix(0xffffffffffffffff, 0x0, 0x0, 0x0, &(0x7f00000000c0)=@abs={0x1, 0x0, 0x1}, 0x8)
r0 = socket$inet6(0x18, 0x30000003, 0x0)
sendto$inet6(r0, &(0x7f0000000080)="89", 0x2000, 0x0, &(0x7f0000000000)={0x18, 0x0, 0x0, 0x20080fe}, 0x1c)


getsockopt$inet_opts(0xffffffffffffffff, 0x0, 0x0, &(0x7f0000000080)=""/214, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x2903)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x2, 0x0)
ioctl$FIOSEEKHOLE(r0, 0xc0204600, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
compat_50__lwp_park(&(0x7f0000000100), 0x0, 0x0, 0x0)


mlock(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
munlock(&(0x7f0000ffe000/0x2000)=nil, 0x2000)
fork()
mlock(&(0x7f0000ffe000/0x1000)=nil, 0x1000)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x2000a71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = dup(r0)
ioctl$FIONBIO(r1, 0x8010447b, &(0x7f0000000100))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x80047212, &(0x7f0000000100))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = _lwp_self()
_lwp_wait(0x0, 0x0)
r2 = _lwp_self()
_lwp_detach(r2)
_lwp_suspend(r1)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f00000011c0)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
__lutimes50(&(0x7f0000000040)='./file0\x00', &(0x7f00000000c0)={0xffffffffffffffff})


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0xc36e)
open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)


getpriority(0xd9b94af8270c7e5, 0x0)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x30, &(0x7f00000000c0)="22bd2cd98640b38eaff56f5dd4bafa68", 0x10)


shmget(0x1, 0x3000, 0x0, &(0x7f0000ffb000/0x3000)=nil)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x1f, 0x1, 0x0)
dup2(r0, r1)


munlock(&(0x7f0000ffc000/0x3000)=nil, 0x3000)
mlock(&(0x7f0000ffd000/0x2000)=nil, 0x2000)
munmap(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mlock(&(0x7f0000ffb000/0x2000)=nil, 0x2000)


r0 = socket$unix(0x1, 0x1, 0x0)
bind$unix(r0, &(0x7f0000000100)=@file={0x1, './file0\x00'}, 0x6e)
bind$unix(r0, &(0x7f0000000000)=@file={0x1, './file1\x00'}, 0x6e)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x404)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
write(r0, 0x0, 0x0)


syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x40, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}, {[], {{0x9, 0x5, 0x82, 0x2, 0x20}}, {{0x9, 0x5, 0x3, 0x2, 0x10}}}}}]}}]}}, 0x0)


r0 = socket(0x18, 0x3, 0x0)
recvmsg(r0, &(0x7f0000000700)={0x0, 0x0, &(0x7f00000005c0)=[{&(0x7f0000000340)=""/62, 0x3e}], 0x1, 0x0}, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)


setrlimit(0x2, &(0x7f0000000040)={0x8000000000000001, 0xffffffff00000001})


mkdir(&(0x7f0000001b80)='./file0/file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0x0)


symlink(&(0x7f0000000040)='./file0\x00', &(0x7f0000000080)='./file0\x00')
chmod(&(0x7f0000000140)='./file0\x00', 0x0)
unlinkat(0xffffffffffffff9c, &(0x7f0000000100)='./file0\x00', 0x0)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80047460, &(0x7f0000000000))


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
pathconf(&(0x7f0000000000)='./file0\x00', 0x4)


__lstat50(0x0, 0x0)
compat_20_statfs(0x0, 0x0)
fork()


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000000740)=[{0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}], 0x9, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
r1 = dup(r0)
getsockopt$inet_opts(r1, 0x0, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
compat_50___msgctl13$IPC_SET(0xffffffffffffffff, 0x1, &(0x7f0000000440)={{}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


unmount(&(0x7f00000000c0)='./file1\x00', 0x0)


setrlimit(0x0, &(0x7f0000000000))
getrlimit(0x0, &(0x7f0000000080))


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x2000740d, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(r0, 0x6, 0x5, &(0x7f0000000040), 0x4)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x6000, 0x400)
open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000100)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f00000001c0)='./file0\x00', 0x0, 0x0)
compat_30_getdents(r0, 0x0, 0xffffffe0)
__getdents30(r0, &(0x7f0000000200)=""/230, 0x401)


munmap(&(0x7f0000ffc000/0x4000)=nil, 0x4000)
r0 = shmget$private(0x0, 0x4000, 0x0, &(0x7f0000ffb000/0x4000)=nil)
shmat(r0, &(0x7f0000ffc000/0x1000)=nil, 0x2000)
madvise(&(0x7f0000fff000/0x1000)=nil, 0x1000, 0x3)


setrlimit(0x8, &(0x7f00000000c0))
openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)


r0 = getpgid(0x0)
getpgid(r0)


__clone(0x0, 0x0)


r0 = socket$unix(0x1, 0x1, 0x0)
bind$unix(r0, &(0x7f0000000040)=@file={0xa}, 0xa)
listen(r0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xb200)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x80)
fcntl$setstatus(r0, 0x4, 0x80000c)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000000)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
pathconf(&(0x7f0000000040)='./file0\x00', 0x7)


mknod(&(0x7f00000000c0)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047401, &(0x7f0000000140)={0xa})
close(r0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x100010a, 0x0)
ioctl$FIONBIO(r0, 0x80046475, &(0x7f0000000000))


madvise(&(0x7f0000ff9000/0x4000)=nil, 0x4000, 0xe)


mknod(&(0x7f00000006c0)='./file0\x00', 0x2000, 0x200028bf)
r0 = open(&(0x7f0000000480)='./file0\x00', 0x0, 0x0)
close(r0)
open(&(0x7f0000000000)='./file0\x00', 0x2, 0x0)
r1 = dup(r0)
write(r1, 0x0, 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
chmod(&(0x7f0000000180)='./file0\x00', 0x58)
setuid(0xee01)
r0 = open(&(0x7f0000000340)='./file0/file0\x00', 0x301, 0x0)
chmod(&(0x7f0000000100)='./file0/file0\x00', 0x400)
writev(r0, &(0x7f00000008c0)=[{&(0x7f0000000740)="ee", 0x1}], 0x1)


_ksem_init(0x0, &(0x7f0000000000)=<r0=>0x0)
_ksem_getvalue(r0, &(0x7f00000012c0))


mknod(&(0x7f0000000040)='./bus\x00', 0x2003, 0x4301)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0187700, &(0x7f0000000000))


setuid(0xee01)
r0 = semget(0x3, 0x0, 0x4)
getsockopt$sock_cred(0xffffffffffffff9c, 0xffff, 0x1022, &(0x7f0000000000)={0x0, <r1=>0x0, <r2=>0x0}, &(0x7f0000000040)=0xc)
semctl$IPC_SET(r0, 0x0, 0x1, &(0x7f00000001c0)={{0x6, r1, r2, 0xffffffffffffffff, 0xffffffffffffffff, 0x40, 0x8001}, 0x3, 0x8, 0x8})
r3 = semget$private(0x0, 0x4000000009, 0x100000010)
semop(r3, &(0x7f0000000480), 0xe)
semctl$GETVAL(r3, 0x4fa596d1f056515a, 0x5, &(0x7f00000194c0)=""/251)
semctl$GETZCNT(r3, 0x2, 0x7, &(0x7f0000000240)=""/67)
semop(r3, &(0x7f0000000280), 0x0)
semctl$IPC_SET(r3, 0x0, 0x1, &(0x7f0000000300)={{0xaf, 0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0, 0x40}, 0x40, 0xfff, 0x9})
semop(r3, &(0x7f0000000100)=[{0x1, 0x1}, {0x2, 0xc2f, 0x1000}, {0x2, 0x96e, 0x1800}, {0x0, 0x7, 0x1000}], 0x4)
semop(r3, &(0x7f0000000040), 0x0)
semop(r3, &(0x7f0000000040)=[{0x2, 0x0, 0x400}, {0x1, 0x21, 0x1000}, {0x2, 0x4, 0x1800}, {0x4, 0x20, 0x1800}, {0x4, 0x2, 0x800}], 0x5)
semop(r3, &(0x7f0000000080)=[{0x2, 0x1000, 0x800}, {0x2, 0x8, 0x1000}, {0x4, 0x5, 0x800}, {0x4, 0xfffb, 0x1800}, {0x1, 0x4, 0x1000}, {0x1, 0x3, 0x800}, {0x3, 0x5, 0x1000}, {0x0, 0x7, 0x800}, {0x2, 0x7, 0x1000}, {0x2, 0x3e0}], 0xa)
semop(r3, &(0x7f0000000100)=[{0x2, 0x5}, {0x0, 0x7ed}, {0x0, 0xc0e}, {0x5, 0x2327}], 0x12)
r4 = socket(0x18, 0x1, 0x0)
setuid(r1)
setsockopt(r4, 0x29, 0x80000000000000c, &(0x7f0000000180), 0x14)
r5 = semget$private(0x0, 0x0, 0x0)
semop(r5, &(0x7f0000000180)=[{0x5f8ce2611e9b27bd, 0x8a8, 0x1000}, {0x0, 0x7, 0x3000}, {0x4, 0x9, 0x1000}, {0x1, 0x5, 0x800}], 0x4)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)


r0 = socket(0x11, 0x3, 0x0)
sendto(r0, 0x0, 0x0, 0x0, &(0x7f0000000040), 0xc)


_ksem_timedwait(0x0, &(0x7f0000000340)={0xffffffffffffffff})


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
poll(&(0x7f0000000180)=[{r0, 0x40}], 0x1, 0x9)


__clock_getres50(0x40000000, 0x0)


r0 = compat_30_socket(0x22, 0x10000003, 0x0)
recvmsg(r0, &(0x7f0000000500)={0x0, 0x0, &(0x7f0000000440)=[{&(0x7f0000000040)=""/65, 0x41}], 0x1, 0x0}, 0x0)
close(r0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
__posix_chown(0x0, 0x0, 0xffffffffffffffff)


__clone(0x0, &(0x7f00000001c0)="dc")
_lwp_getname(0x0, &(0x7f00000000c0)=""/1, 0x1)


syz_usb_connect$hid(0x0, 0x3f, &(0x7f0000000040)={{0x12, 0x1, 0x300, 0x0, 0x0, 0x0, 0x20, 0x766, 0x204, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0x0, 0xa0, 0x6, [{{0x9, 0x4, 0x0, 0x53, 0x2, 0x3, 0x1, 0x0, 0x0, {0x9, 0x21, 0x0, 0x3}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0xfb}}, [{{0x9, 0x5, 0x2, 0x3, 0x8, 0x9, 0x0, 0x9}}]}}}]}}]}}, &(0x7f0000000280)={0xa, &(0x7f0000000080)={0xa, 0x6, 0x200, 0x8, 0x0, 0x1, 0x10, 0x2}, 0x19, &(0x7f00000000c0)={0x5, 0xf, 0x19, 0x1, [@ss_container_id={0x14, 0x10, 0x4, 0xbc, "b840bf5fd3a576d7032e63fc1c1631bd"}]}, 0x1, [{0x2, &(0x7f0000000180)=@string={0x2}}]})


r0 = compat_30_socket(0x1f, 0x3, 0x0)
r1 = compat_30_socket(0x1f, 0x3, 0x0)
dup2(r0, r1)


syz_usb_connect$printer(0x5, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


syz_usb_connect$uac1(0x0, 0x71, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, &(0x7f00000003c0)={0x0, 0x0, 0x5, &(0x7f00000001c0)={0x5, 0xf, 0x5}, 0x2, [{0x4, &(0x7f0000000200)=@lang_id={0x4}}, {0x4, &(0x7f0000000340)=@lang_id={0x4}}]})


compat_50_setitimer(0x3, &(0x7f0000000100)={{}, {0x0, 0x27be}}, 0x0)
compat_50_setitimer(0x3, 0x0, &(0x7f00000000c0))


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xb200)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
compat_43_fstat43(r0, &(0x7f00000004c0))


pipe(&(0x7f0000000040)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_43_fstat43(r0, &(0x7f0000000080))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0xc0106373, &(0x7f0000000080))


r0 = socket$inet6(0x18, 0x3, 0x0)
setsockopt$inet6_MRT6_ADD_MIF(r0, 0x29, 0x66, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
fcntl$lock(r1, 0x9, &(0x7f0000000000)={0x0, 0x0, 0x40, 0x1000200010005})


mknod(&(0x7f00000000c0)='./file0\x00', 0x2876, 0x40000800)
open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)


compat_50___fstat30(0xffffffffffffffff, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
pwrite(r0, &(0x7f00000000c0)="91", 0x1, 0x0)
ioctl$FIONBIO(0xffffffffffffffff, 0x40d0647a, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e5"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000), 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
listen(r1, 0x0)


_ksem_init(0x0, &(0x7f0000000040)=<r0=>0x50535244)
_ksem_wait(r0)
_ksem_post(r0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047401, &(0x7f0000000140)={0x5})
preadv(r0, &(0x7f00000000c0)=[{&(0x7f0000000180)=""/199, 0xc7}], 0x1, 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
__mount50(0xffffffffffffffff, &(0x7f0000000200)='./file0\x00', 0x0, 0x0, 0x0)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000440)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket(0x2, 0x1, 0x0)
connect$unix(r1, &(0x7f00000003c0)=ANY=[@ANYBLOB="82022e"], 0x10)


r0 = shmget$private(0x0, 0x2000, 0x0, &(0x7f0000ffc000/0x2000)=nil)
shmctl$SHM_LOCK(r0, 0xc)


__clone(0x4900, &(0x7f0000000700))


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000680)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendto$unix(r0, 0x0, 0x0, 0x0, &(0x7f0000000340)=@abs={0x1}, 0x6e)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000000)='overlay\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
r0 = openat(0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0x0, 0x0)
fcntl$lock(r0, 0xc2f97fe40000000a, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000006c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
getsockname$unix(r0, &(0x7f0000000100)=@abs, &(0x7f0000000180)=0x6e)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002005, 0x4300)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
close(r0)


rasctl(0x0, 0x0, 0x0)


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='ffs\x00', &(0x7f0000000080)='./file0\x00', 0x0, 0x0, 0x0)


execve(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
__getfh30(&(0x7f00000002c0)='./file0\x00', 0x0, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f00000006c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
open$dir(&(0x7f0000001040)='./file0\x00', 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
r2 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(r0, 0x0, 0x4, r1)
compat_50__lwp_park(&(0x7f00000000c0), 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
symlink(&(0x7f0000000080)='.\x00', &(0x7f0000000000)='./file0\x00')
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_40_mount(&(0x7f0000000000)='overlay\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
open(&(0x7f0000000100)='./file0\x00', 0x9, 0x0)
__mount50(&(0x7f0000000180)='kernfs\x00', &(0x7f00000000c0)='./file0\x00', 0x1000000, &(0x7f0000000140)="5cf78e3543c70fab6a2a36ca3d30ff10cd1357c9c24a61cf19c342", 0x1b)


r0 = socket(0x11, 0x3, 0x0)
setsockopt$sock_cred(r0, 0x22, 0x11, 0x0, 0x0)


open(&(0x7f0000000000)='./file0\x00', 0x200, 0x0)
access(&(0x7f0000000080)='./file0\x00', 0x4)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x0)
r0 = open(&(0x7f0000000480)='./file0\x00', 0x80000000000206, 0x0)
pread(r0, &(0x7f0000000140)="fc51729448449f3ed7383a7b2053805c4251ddb56f7b46c7af376c1bc0f2bb2ae532cfd75dded5e72ef14023b4f558c7f99f547683d37b4707484ea336601e5a1af0ebefa898783e4b76dec4fda977b7ab903a90d1929eef7786223a6238dee707a655c30354362b99c66fc0de157e80844e", 0x72, 0x6)


r0 = _lwp_self()
compat_50__lwp_park(0x0, 0x0, 0x0, 0x0)
_lwp_unpark_all(&(0x7f00000001c0)=[r0], 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = _lwp_self()
_lwp_suspend(r1)
_lwp_wakeup(r1)
_lwp_kill(r1, 0xf)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
fdatasync(r0)


r0 = socket(0x18, 0x2, 0x0)
getsockopt$inet_opts(r0, 0x29, 0x25, 0x0, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
fchownat(0xffffffffffffffff, &(0x7f0000000040)='./file0\x00', 0x0, 0xffffffffffffffff, 0x0)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x80104305, &(0x7f0000000040))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
recvmsg(r0, &(0x7f0000000700)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f00000000c0)='ext2fs\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
_ksem_timedwait(0x0, &(0x7f0000000200))


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x100010a, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIONBIO(r0, 0x80046475, &(0x7f0000000000))


swapctl$SWAP_ON(0x1, 0x0, 0xfffffff9)


compat_50_____semctl13$GETALL(0x0, 0x0, 0x6, &(0x7f0000000180)=@buf=&(0x7f0000000140)={{0x0, 0xffffffffffffffff}, 0x49, 0x4, 0x0, 0x0})
mkdir(&(0x7f0000000240)='./file0\x00', 0x0)
compat_40_mount(&(0x7f00000000c0)='puffs\x00', &(0x7f0000000100)='./file0/../file0\x00', 0x0, &(0x7f0000000140)="1e")


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000ec0)={<r0=>0xffffffffffffffff})
r1 = dup(r0)
setsockopt$sock_int(r1, 0xffff, 0x0, &(0x7f0000000380), 0x4)


mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x0)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
readv(r0, &(0x7f0000000680)=[{&(0x7f0000000000)=""/82, 0x52}], 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__clone(0x4200, &(0x7f00000002c0))


r0 = socket$inet6(0x18, 0x0, 0x2)
getsockopt(r0, 0x29, 0x25, 0x0, 0x0)
r1 = socket(0x18, 0x2, 0x0)
setsockopt(r1, 0x29, 0xe, &(0x7f0000000000)='\x00\x00\x00\x00', 0x4)
setsockopt$inet6_MRT6_ADD_MFC(r1, 0x29, 0x68, &(0x7f0000001600)={{0x18, 0x3, 0x4e4f, 0x925}, {0x18, 0x0, 0x3, 0x3}, 0x2, [0x2, 0x9, 0x3, 0x3ee, 0x6, 0xb814, 0x1, 0xffff]}, 0x3c)
r2 = socket(0x2, 0x2, 0x0)
setsockopt(r0, 0x7ff, 0x5, &(0x7f00000000c0)="ec7e58537e54198d512f6f9970c1543e590b4aa78b002cfcca6924e51e8aa17a79d467f3b4d0634954a47d8294e194be2af0669c43ee129c0e8a5302975aa259db", 0x41)
connect$unix(r2, &(0x7f0000000140)=ANY=[@ANYBLOB="82022e2f66696c6530004339d45a6c0a1ca6e254a7321cdbadddc408881060694e6605e5477ba24c187e21c14b42fa0781a6ce76f6c8cea6680d4b44063bd926f0"], 0x10)
connect$unix(r2, &(0x7f0000001440)=@file={0xbd5699bc1ec0282, './file0/file0\x00'}, 0x10)
setsockopt$inet6_MRT6_ADD_MFC(r0, 0x29, 0x68, &(0x7f0000000400)={{0x18, 0x0, 0x4, 0x4}, {0x18, 0x0, 0x5, 0x7}, 0xfff, [0x9, 0x8, 0x101, 0x3, 0xffffffff, 0x96, 0x7, 0x1f]}, 0x3c)
sendmsg$unix(r2, &(0x7f00000015c0)={&(0x7f0000000340)=ANY=[@ANYBLOB="00fd2d2f66696c65302f66696c653000"], 0x10, &(0x7f0000001540)=[{&(0x7f00000001c0)="3f72ab6fdb61380e02cc52cf002aa7ca17f51c6ad86c286b80b0a7d9eacf4a1754109f0a00e888094c67932f4ff256ed06dfa98dabb155c6d8f6c39d18d0980161577e28bb9b92d3e8b729a63f2efecb3df031095ffdcf2ea78dc600932381283410d241edd4ad9fb4e229a4f34f879de5c6deda5702f2cf6c55f39a88754bbe8c310e3ff3df6ddeeec750a5ab6f212f1fd39c0e4699bab523d50b9c87102be18b09b3229995ab827b819e5422321b5e8cf49c8dbd4daf3249dd6eb3aa9f1b4f4cad0a63ecf69664e1dc531faf7fb50e3b56c5f7ae0b915aab5376e2ad5baf30a4a940417cd906fc7e1e4d3738137e227beb3ea8e1d536de87a68751be", 0xfd}, {&(0x7f00000002c0)="5cfbdd04b518bc57c450e2717447662549e932a707ae0023b2400035481227787f77a2070636e390cf6d7928170a0049a684294284fb2ee4fae5d0a43e69eb46e7953e1ea8356b2bbce5df90726ea82d7e56", 0x52}, {&(0x7f0000001640)="3cd52a5d95bb6f93ddb7f7af1959d63c9c3dff3582fe9466538ebeca996a00ef8ce1a61e650285f419a830c4016d03a3e10ef4916a79566a8e402937148f61365e17b08c8ccf7a3fb3ba6b2109cc9328199f21b25a01648e5c16a08431c40c8809f268f2a580b1e9756bcea9cc0886c8318741fbce8be8bc6f5ea14601e00c6ddc7f1b7ae9d9f459e1677227e0f84d5f23d92c01ff2cb2388ad500a7ca6c99c10d60d3479dd1251245e102bcda552527480894f7325ec3661884e60abfee74a0fec0357cd07785b33798adedf67bc5feed41490a5e697387f948d67d2dac5f76d586735b103b2fcd7425c6645f0cb8823f92ff038c4dd1bc052864a5a780", 0xfe}, {&(0x7f0000000440)="e381a341555e55e24a09668d58aa89ce3d00d227acb08bfeb5231f1a9a097d15870d494cd60e3471547747d3e6d1931edbd3fc0c6ce22e82c559868bd963c740f9de8d93ccd0d5924adbbb0bfc468adc9e45af57cabfab4912b83a4fb0595a29acc3a3da8e1136d04aea4d24e6a7352dfc42f08f8b4690c3cc6dc078385e7963bafd8cb95bbcf7fc633e0e24f0a85c1288ef3daa08ad916827012cdd04ce58fcaae958e28b36655a246aab2945ed37163092efa529c6c19f022eb2ee233393dbf3a8b0477e0cceca057a7c6fa1cbc7bfad3fea8a0bce4aaae6a43e47a1ce3ce19a9cf83a0213b3c87622dc79abcbacae0e9af27d8b266d7555fafc42e7b33fd4fdef035be849afc0bb4020fd6cd1c3bdaf2be8c88b3e90fce9f80989402c6db0164f274aa724539304ec07afe90882a62c910d9ff25b3877f54fa52bdecca1385c45bf42f401204d2afbc5b148be4257883df4783d3e3e7dc0c9319942fb836007fbdb6ac65e9d6240c1d19426ecf69728e245a16ff565c58e1484b724d53316d31ba7f3147f5f38f4916e2e7cdb3f88574c670610c54b37269a529d85260673c3147c0c2448604bd3490bf8712de7f81d5d4359461a937bfaa89c6f35cdf62a7f22376f57123b2d6eacfaba74eb641a1cee773b2f3b7c2d6b4c7c5d7b0ea847db2829c47f65afb21c0e84379500f2150e9c9c25ec3b9d89c81dec48136dd80c815537c4a39e6c4f7712d452680efc4869f521611f4f6234a7c5ff0f710ccf83f22cfb5f4d3f7629a9d6ef854f3f19aefa20ad91be2e1d9b6a8119b40e60de091a88a248cec0cf5d3d5aaeb2e3610b333760d10d14b10748b1cfa2459294fa26d92c7f4ad79221888626568944cbc9c060d923881a525c528660db6c9da4027da4722e1cb0a366b93cd5e86a89b8f30fead02814d6430225917a1c4891431c3867e5df33d6e5589513e1a64fe41003a4b9726b46cccf9172030e99bb51cb5694bcc137d26b7deadd6648cc83176bfbf36c70b2c2b5e7d6089a2f253319ee4e1f17a9a8d2f82638df6b7a706c05d821303718d2327110cdd7b449ec8906eefc9cff667e275b5195c2f3734731364b84089e70fb618a653e6e7e2900f885fbeaeff1193ca06dee1968ae2435a1da39cc7dc3ddb7072d98e95ccde81e90548b21b44bf25ab0a471f693be47fbc57fbb0eeca40c062e6f3c362457399d9565233609233bbbba57c471c389d2cc0f165551a951c1f40a88688f0a8190b096b6b17cdf0f8724caac6ee6613e8d10c574204a2e0fb2112178e91a5c08e100c7362af05054aa131d16f7902c20562c6506d49ba4765c5e1d721562195e457be84a67700aa095dc31a46932d234d5a2bfa848637f760263a7c7d04378f2eafff1584a81ce0f1464c7421520ca99095da20f5877a66b7fb0b898435e5e7abd18f6930c4ce444b324f67ad15ba120bf415db6ede71afa43c0c541efcd906b90e37dff2d75032bd6df5374c692ac1b78e475e4ea1607de68556cdbea1b84b42a82b0af8281a7a5eca1366cdc4b066b5220b49146a12f50e71747eb42fabfb1ab900f3257c78f42dcd0a9ccdb8bad362c8c05a4626b9fff449cdb09b9f8198889e13e1ce6d944d64c204249fe9139e1701600d5d0607600bd19e2f9ca0a50e358411e25532a64b7b1fadfe2474657b6b794e428425e28b026b728ce1e4042fcc39cbef900fa74d0f5a635af939ee940b5cfbfb41327d1e4c41df78401bbb83210424064aac508f1a51f134a2d897f4eda5489a94b596c9f37293a0b453ab245b651efe62b91de8b4ab2869ef1d9723f200dfc0796b10657affaf1c60c445fafba7b93290c82a3db31152c432ccc34d83713fae8fa20e274de99953a4acf2ddc40acfd6433092eb1863b4e9314c986ff24978789d1b2b9c0781f13463ab3ff4f57dc26a28ee3a096ce0f73998104ee33f9ddc65ab66066003e3d5b0e0a4a09d7372511008e525db93ce50ac92221a73dbe2abc78e8649c218d534eda335b5f2200016a8bf784ac685a6ebd53f05ec897df1dc0da343fce70bddf6a1b186c9e91544dfa075e4c82636ce00fec40e1c914c8d2b3134aaa4c36c5002e9d3f9e207af04c857130f33759088eca656a62fa4642d6b660aed9d5ba03c797fc658992614ea18bb49a9cae16b3553f319c715b09711b57c481d0083aea68249c000a38b7bb09576378a1519b34e19e5af8de7935527a23db8c8cf8a53ed9dcfc86a4be1ed8296c456ac92ff6d8a5dcab5b1568363ae2bb56dbd974ad613fd391f1724444171b2a94d5b5f66ce56e99e4a2dbd4bf91b592148aa8dde3579451626611707e1676dcf3d0ba8b3ce091b222a0b76582bcad09fb179530c50ea8c1f4e4a7d6d1bf6eda3efa900a89c3c3c02a263ac6b242b492b12d26d4947fa15a004eaf107327ee5018a1c482aa4166ac46a7c5a3cb1d59f25c73fe4bc60047e9c9e5596a52f607ef9aec8f809956074a2d2f78809190f12255d2a7f3f572ea88956fba45538761fc098a139086b5e3a9de43bd404af533cacd12c24cd4cedb235d71de6d1fa52f1558bef6a1fde01f18b83bea4aee867bbe6b8b498516b92b659f16b011897792decda1957e9ec45223098605f2997361d3b79ed769671f13b021e50d5a1f8829c8e7e7ba65bf5fe7ddc46b64b265521322c145db0da667d634a629028a8e3a24de9f962d840ff983d77d2e9beaa892115fd714b22e69d676ccfdf1cd94a62eba123c054547b6005d3cd5be6ba6fb6847f7abf1b35182a9d709fade268a1d8e69572094cdbda3a3f23adb043ce20950ee48077807c4c0e89c75b790b09ddaf02f5a80aed0b788ac0a100ceb8c96287cb27fd7fe89b00deda5e27e888e9b8093fb7c19d97e0f4e642f3e78f438522471483b304959844e661ffec14be24f9497ef3a28199426b9b5ddc8643e3af336f159435f020b2c67fe0fbd57565b643b3a0bedee5fc3a28da48391cf0211f6545f2df28ca6764f81655c4f89931967413afe156d544ced1cbc5f454c924116cc3adfee4ab8eff44f278484ddcfa41b6e6ea9779b629776418705302d84a237dd085dff6b2b8fd3c3e8d5805d22edb9c3d1fe56caa144a8070fb22127125904ce88ea10ee1679f2a8f6d4ee2e4802b0be1693dbcd60e72036f142bcacd5b3e51bd2217de1c971b11537f555a1ed74bbf9802de118c10444a5529bbe156ff6335a88fa2b7116df96c6316dfff5b4c15e569756f17968091374d1add009b84e41dd3525acaa3fae128ff0be308c2804ac7a13c161ddf4cc04c5b41266a687eead6dee894cad27655b26019fa7aebda88de93de1f330ee83a4826e483cecd0d7edf42c5c61bf9553b814782d19018a48647eb55b377d9f37fd04640e3a9247d4635c7404cc2830a121bcf8e0bcb62b0060bcfc4f38ac4059edd56f3f3cbfc71a92a1e92c1503e4e7ad2f6ceda547de962cae6fe62ba000aa2d98160cdf076315862c25a4b6f5a10babc81ac0d307356d788cf851d2f7a87c395290d3995a92fb6702d1395421a86e8e870d26e9e5ae6042203fb1c0130f4e81ab990f73843cd81d4e0be005f6cbf06e1a1b1f98afe3039c61149b3730b75d9697236d42be615bbcc79d8f01e22db98cbc4bdba8ae2f426ccf468d464177fd51b6822b9934c90a0efd685cc8dae5b98ded622acaf65625c89e9c480afb7edeee073111ac923a54d1099cbdb8f2ba9f91912851c6c57b6b5880f0a9753b3ea5a11c44b6a50d39dc6dd5c7f18b81d72385632463d086749668fb4caf2de6bd575ad69ac144fbf11e3752153faf3bb86cc0ece52c97931b2c56ccadd61b75c6877552b3febca858d4ea8bdc6273d1174f2b93f465957320ca57c12b3a110a428f2768275f0c66484b39b36ce69c8ae79dcc61d714b168fead69d6ac49d4b6b91faf158d3d536d88e53e4a7e3f1ceca2e2116980196d9671f69bee4d61aba73a73f87ecd3c47aa09e8d2254c24894daaaa068c6b129b8ac01119d25a795c62ca4df07dff07877d62155d3ff1c46f10b2291289acd7cd87a985048e2b4a748ade295edf88203f39ed897f5198c7ba21da14a41f743107605399aa68608d3721fcce14623ff85a7f14bc488da69c53b5ea513591db7443cd12e656809728ce804ddbe93a768a09577422035784995d544b197d5b76417001a47f087533096f9ebe5cef2f352219676a092182290f95b1c87f9ced80cdff47f0bc53a0e714df1209145490a22ff8dc478427ccc6f76342ab634c2048e4163767a8073603a493dce3d5733ba113d47982904f5203c14cb99e674f1c0333843104b494ad31a9e01c97d07b27756021d0abf823c09203574db2e63c123ea0437c188274cb761e1c32cae3fd965f056c2d862813eb0caf9a05d058d5fda3e8ed36dea4369df0b1d87e5f34d4ab991f5b0a5fc6d40f6f30403f7a4d4479ff20d0233c89eb93ecd0f7727f70acc78db7ef68bb0f6047f7627064db888b603de7a68ac1818c8f1bedddcbb33c08b8bd75a90dd820e3bfdc6002b339dac84b644081607c3dc09f9f8f94db333a21af6d22d3aa567588a432ecfb2a3e4fee1781fce794d2c289112c9e4bd4576a9be1a426da39235d921a917395b6368e7878f472b56d73b07f747bf4dd2199063ae6e28de008569efb1d75b2df4c49a4c1596838a7f0a5d85cfd281e37124013ba031abfdbdc06ee40710fa47bf76bc2636f8d0bd3429b03ba4efba40e99ddd94e63724a078cc85236e1b212a803d09644d4ab106d6adbeffebf34e16554247f6e623bf656751c1cefb4dd0c9e7b61709d55c506b5cd00fbb521711ad77ab373cb11b646cbda0b0fae38b7761da11c85fc18d92842af79d4e2bac153c53ab1cb058b68c46509a01fc07d548afe90f5c5c5b48f8afc5393dcaf0e0951eae1dd97ba506ca47a6eeafb8e59675ab16ff0d75996e17582264300dee932690df7f852b12d0fdc7bfc501b5030ad805435268cfc7755e7f4ec207ade294360f3e3921fc8ef03fdca298f7607040ed849293a1761b128528f84875e142e26e5519ea838d4f67b819f11f9c65b850dbecfff91dd003ad01a06cb99598e2785eeb7840cbf53059f85e81bc7c3cf80455fe6b9fd29fa974889826f34cf88398546ef8cd844d2e5e77f8af0fb2b420181809a03bb739c7195c0275a2ab0ca205ebd29354bdd9813b6e7d761c524bb4724c8688960f08609b1fac2e35d80da2456d3a8e785191c5c1d4556c8d78dbfee6a5b58959386c9a87355fb43add6bdb12f2c095dc4ca715fd4eb945b1a67eb1f3f4991652f2111b9713dc8b6f7d68f7b295fcc30e09c9fe64dd0dd3e36a448d54ba2cee87f83b2ef14b6673d7ddcab6c1d8b4c07712e44b9ecfff5cc0d0339ba8946142cbe5b6a2d30845d47665016a793a3b37c635bc6cfd3fcfde5fa1003461a220f8fed974f55f2631572d483d78824589327692dbea460aa542a292b74209b1ba5ee138438477754f25bcc46374a7422807fbf3810dc242891d90e2fedfeec659238309a9d3a1e8f702fc958296b1fb993db177577b631719d2e496a21d9c5d9b494f8ae4c251f8a2972648072b7ec32624563af54f854b9868a3693618c37ffd7797ecaeec76d8bb032b8d64df4cfcfb07d1501e9049fcacb17609c8a9abecda76bcd622036658ab99556cf895d3917c445d8fa32c7ff3c17d3c6a50e46f14700e9b6dcb4780f90805712185f092911dc7fb4850e1ab479bf075d3e5900f71b1c42cd37e3d443e85388a9bc12420c7427f1c478ac", 0x1000}, {&(0x7f0000001480)="f34843f5aa7d014412a454bdf2a744552341e6d8f936b0281a08e19fc45f326d5f668d25b38509798db9b366a4148c549774a40755b8808efe380edb743b45427e82d2d34a4523be4bba9621e9392fb4ad1938225e14e33d033b70049019bcbda16af524de362aefb825b818b047d0383d3688d792a7c87d5ec1544cb4ecab07a6f0353400b816d2ec606e5e469c82d728bb5abecce7d3268bb0bd50b0d547f77bac5dfad7cf756ad36b", 0xaa}], 0x5, 0x0, 0x0, 0xf}, 0x609)
connect$inet6(r2, &(0x7f0000000380)={0x18, 0x2, 0x7, 0xe5}, 0xc)
r3 = socket(0x800000018, 0x2, 0x0)
bind$unix(r3, &(0x7f00000003c0)=@abs={0x1f95d27d48731892}, 0x8)
syz_emit_ethernet(0x76, &(0x7f0000000040))


writev(0xffffffffffffffff, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r0 = open$dir(&(0x7f0000001240)='./file0\x00', 0x40000400000002c2, 0x0)
r1 = fcntl$dupfd(r0, 0x0, r0)
r2 = dup2(r0, r1)
writev(r2, &(0x7f0000000100), 0x1000000000000161)
pwritev(r2, &(0x7f00000002c0)=[{&(0x7f0000001280)='\n', 0x1}], 0x1, 0x0)


syz_usb_connect$uac1(0x0, 0x71, &(0x7f0000000040)={{0x12, 0x1, 0x300, 0x0, 0x0, 0x0, 0x8, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, &(0x7f00000002c0)={0x0, 0x0, 0x8, &(0x7f0000000180)={0x5, 0xf, 0x8, 0x1, [@generic={0x3, 0x10, 0xa}]}})


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
fcntl$getown(r0, 0x5)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mknod(&(0x7f0000000180)='./file0\x00', 0x1ffb, 0x0)


mprotect(&(0x7f0000000000/0x4000)=nil, 0x4000, 0x0)
utimensat(0xffffffffffffffff, 0x0, &(0x7f0000000100), 0x0)


msgctl$IPC_SET(0x0, 0x1, &(0x7f0000001980))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
swapctl$SWAP_ON(0x7, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xa31a)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x40884601, &(0x7f0000000040))


r0 = socket$inet6(0x18, 0x3, 0x0)
bind$inet6(r0, &(0x7f0000000000)={0x18, 0x2}, 0xc)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
__lstat50(&(0x7f0000000080)='./bus\x00', &(0x7f0000001340))


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x30, &(0x7f0000000000)='\x00', 0x1)


open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
pipe(&(0x7f0000001200)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
truncate(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)


acct(0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
recvmsg(r0, &(0x7f0000000700)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r3 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r3, 0x0, 0x0)
preadv(r3, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r4 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r4, 0x0, 0x0)
preadv(r4, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getgid()
pipe(&(0x7f0000000400))
getpid()
compat_50___shmctl13$IPC_STAT(0x0, 0x2, &(0x7f0000000240)={{}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000180)="bb7bb430e8502f1e780ebc5e26b755d3adf1221b1e863fd0ca0924e5f904da08889877a1ce86d6af6f5d0a13f5af4adc5da18d3b1a8789cd7357e4d548e39833c336791fe3532d32cb111dd7f31e44443d40acf4e9ce4b007c4000600b589f9c6dae8d4e6a46e977e2f5aee001c57bf16f8f985208dd52f2116a574250ba122b8dfae59123ca4df47a5450f75848cf388215cc582005ebbb16fa6234c2ecfde981a2dd"})


munlock(&(0x7f0000ffc000/0x3000)=nil, 0x3000)
mlock(&(0x7f0000ffd000/0x2000)=nil, 0x2000)
munmap(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
madvise(&(0x7f0000ffc000/0x4000)=nil, 0x4000, 0x1)
mlock(&(0x7f0000ffb000/0x2000)=nil, 0x2000)


r0 = open$dir(&(0x7f0000000480)='./file2\x00', 0x200, 0x0)
writev(r0, &(0x7f0000000200)=[{0x0}], 0x1)


r0 = shmget$private(0x0, 0x1000, 0x0, &(0x7f0000ffc000/0x1000)=nil)
shmat(r0, &(0x7f0000ffc000/0x4000)=nil, 0x4000)
shmat(r0, &(0x7f0000ffd000/0x2000)=nil, 0x5000)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000140)='ptyfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, &(0x7f00000001c0))
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, 0xffffffffffffffff, 0x0, 0x0)
unmount(&(0x7f00000000c0)='./file0\x00', 0x0)
compat_43_fstat43(r0, &(0x7f0000000040))


sendmsg$unix(0xffffffffffffffff, &(0x7f0000000c40)={0x0, 0x0, 0x0}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
lseek(r0, 0x0, 0x0, 0x2)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000001280)={0x0, 0x0, &(0x7f0000001240)=[{0x0}], 0x1}, 0x0)


open$dir(&(0x7f0000002440)='./file0\x00', 0x40000400000806c1, 0x0)
r0 = getpid()
__clone(0x0, &(0x7f0000000000))
ktrace(&(0x7f0000000000)='./file0\x00', 0x0, 0x153a, r0)
__wait450(0x0, 0x0, 0x4, 0x0)


r0 = compat_30_socket(0x1f, 0x3, 0x0)
ioctl$WSDISPLAYIO_LDFONT(r0, 0x8030574d, &(0x7f00000000c0)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


compat_30_fhopen(&(0x7f0000000000)={{}, {0x0, 0x0, "d34ad9748e12e56399ef4984b91581c1"}}, 0x0)
syz_usb_connect$printer(0x5, 0x2d, &(0x7f0000001fc0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0xff, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x8d, 0x1, 0x7, 0x1, 0x3, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x200}}}}}]}}]}}, 0x0)


r0 = msgget$private(0x0, 0x0)
msgsnd(r0, &(0x7f0000000100)=ANY=[@ANYBLOB="028aa130e38a176778a2c82cc3c20f02ab1e90aaa405520f8a0fd6eee8c3bc14b7565649fa0344e3ab08"], 0x15, 0x0)
r1 = msgget$private(0x0, 0x243)
r2 = msgget$private(0x0, 0x109)
msgctl$IPC_RMID(r2, 0x0)
compat_50_____semctl13$GETALL(0x0, 0x0, 0x6, &(0x7f0000000180)=@buf=&(0x7f0000000140)={{0x0, <r3=>0xffffffffffffffff}, 0x49, 0x2, 0x0, 0x0})
r4 = semget$private(0x0, 0x3, 0x0)
semctl$GETZCNT(r4, 0x3, 0x7, &(0x7f0000000040)=""/103)
r5 = getuid()
setreuid(0xee00, r5)
____semctl50$GETALL(r4, 0x0, 0x6, &(0x7f0000000080)=@buf=&(0x7f0000000040)={{r5, r3, 0x8, 0x0, 0x80, 0x9, 0x1}, 0x8cd3, 0x2, 0x5, &(0x7f0000000000)={0x7f, 0x0, 0xbe8, 0x2}})
compat_50___shmctl13$IPC_STAT(0x0, 0x2, &(0x7f0000001380)={{0x0, 0x0, 0x0, <r6=>0x0}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, &(0x7f0000001400)="6251ec6bcd0204d834e1ecfad4a308f172323ef00ef2337c71aaf9c9d1e0cff7e26a046b255b69946c96e0831c5fe34c6095c3dddec3135a76f60eb644382f3aae5cb6e4075eb1a621f89b8542c4a1ccba14dbf7f791da6d92a32024d6d0f31bf9d7aa7551093a7a4608ff14e229ca7e02ede4951a9e370d7f83a2175bee9b366ae528e5a21979d923faf91e46290fc2676a68017a9ba5a94306013173780408d5f4c7ec40b023e834e00dd9f7b43f4357b8727a9e6025905c5895f5"})
__stat50(&(0x7f00000001c0)='./file0\x00', &(0x7f0000000200)={0x0, 0x0, 0x0, 0x0, 0x0, <r7=>0x0})
r8 = getpgid(0xffffffffffffffff)
msgctl$IPC_SET(r1, 0x1, &(0x7f00000002c0)={{0x7, r5, 0xffffffffffffffff, r6, r7, 0x0, 0x200}, 0x0, 0x3, r8, 0xffffffffffffffff, 0x20, 0xd0, 0x9, 0x8})


ioctl$WSMOUSEIO_SETREPEAT(0xffffffffffffffff, 0x80185728, &(0x7f0000000040))
munmap(&(0x7f0000ffd000/0x1000)=nil, 0x1000)
mincore(&(0x7f0000ffd000/0x2000)=nil, 0x2000, &(0x7f0000000100)=""/182)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = socket$unix(0x1, 0x2, 0x0)
compat_43_orecvfrom(r3, 0x0, 0x0, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
poll(0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0xc0306370, &(0x7f0000000140))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000001c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = socket$inet6(0x18, 0x3, 0x0)
dup2(r0, r1)
connect$inet6(r1, &(0x7f0000000040)={0x18, 0x2}, 0xc)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x0)
open$dir(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
poll(&(0x7f0000000140)=[{}], 0x4e8, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
__clock_gettime50(0x3, &(0x7f0000000040))


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xb200)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
fcntl$setstatus(r0, 0x4, 0xd0040)


r0 = socket$inet(0x2, 0x1, 0x0)
getsockopt$sock_linger(r0, 0xffff, 0x80, &(0x7f0000000000), &(0x7f00000016c0)=0x8)


mkdirat(0xffffffffffffff9c, &(0x7f0000000400)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f0000000140)='./file0\x00', 0x0)
mkdirat(r0, &(0x7f0000000000)='./file1\x00', 0x0)
r1 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
r2 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
r3 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
symlinkat(&(0x7f0000000080)='./file0/file0\x00', r3, &(0x7f00000000c0)='./file0/file0\x00')
renameat(r1, &(0x7f00000002c0)='./file1\x00', r2, &(0x7f0000000300)='./file0\x00')
unlinkat(r1, &(0x7f0000000040)='./file0/file0\x00', 0x0)


profil(0x0, 0x0, 0x0, 0x5)
profil(0x0, 0x0, 0x0, 0xb7)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000))
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
socket(0x0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
poll(&(0x7f0000000040)=[{}, {}, {}, {}, {}, {}, {}, {}], 0x8, 0x0)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
__mount50(&(0x7f0000000080)='kernfs\x00', &(0x7f0000000100)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIOGETBMAP(r0, 0xc008667a, &(0x7f0000000040))


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000040)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f0000000100)="02")


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000780)={<r0=>0xffffffffffffffff})
accept$unix(r0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x2, 0x1, 0x0)
shutdown(r1, 0x1)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
__fstat50(r0, &(0x7f0000000240)={<r1=>0x0})
mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, r1)
r2 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
dup2(r0, r2)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETKEYREPEAT(r0, 0x80283103, &(0x7f0000000100)={0x0, 0x6751})


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
symlink(&(0x7f00000000c0)='./file0\x00', &(0x7f0000000100)='./file0\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
getsockopt(r1, 0x0, 0x4, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x2000a71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOSETOWN(r0, 0xc020447f, &(0x7f00000000c0))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000000480)={&(0x7f0000000040)=@file={0x0, './file0\x00'}, 0xa, 0x0, 0x0, &(0x7f0000000440)=[@rights={0x28, 0xffff, 0x1, [0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff]}], 0x28, 0x4}, 0x9)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
_ksem_open(&(0x7f0000000400), 0x0, 0x0, 0x0, 0x0)


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
chroot(&(0x7f0000000140)='./file0\x00')
r0 = open(&(0x7f0000001640)='./file0\x00', 0x615, 0x0)
mmap(&(0x7f0000000000/0x2000)=nil, 0x2000, 0x0, 0x10, r0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x6131, 0x4000e03)
open(0x0, 0x0, 0x0)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
poll(&(0x7f00000000c0)=[{r0}], 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
swapctl$SWAP_GETDUMPDEV(0x8, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000200)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = dup2(r0, r1)
poll(&(0x7f0000000080)=[{r2}, {r0}, {r2}], 0x3, 0x0)
poll(&(0x7f0000000040)=[{}, {r2}], 0x2, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
dup3(r1, r0, 0x0)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x3e, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)


mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x0)
compat_50_quotactl(&(0x7f0000000000)='./file0\x00', 0x10000, 0xffffffffffffffff, &(0x7f00000000c0))


posix_spawn(0x0, 0x0, &(0x7f0000000a00)={0x0, 0x200, &(0x7f00000009c0)=@open={0x0, 0xffffffffffffff9c, {&(0x7f0000000980)=':[+\x00'}}}, 0x0, 0x0, 0x0)


munmap(&(0x7f0000ffe000/0x2000)=nil, 0x2000)
munmap(&(0x7f0000ffc000/0x4000)=nil, 0x4000)


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x100)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
fchmod(r0, 0xc4)
rmdir(&(0x7f0000000100)='./file0\x00')
mkdirat(r0, &(0x7f00000000c0)='./file0\x00', 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = dup2(r0, r0)
ioctl$FIOSETOWN(r1, 0x4004667f, &(0x7f0000000000))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
sendmmsg(r0, &(0x7f0000000580)={&(0x7f0000000540)={&(0x7f0000000040)=@family, 0xe, 0x0, 0x0, 0x0}, 0x6}, 0x10, 0x0, 0x0)


_ksem_unlink(&(0x7f0000000440))


mkdirat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0)
lchown(&(0x7f0000000180)='./file0\x00', 0xffffffffffffffff, 0xffffffffffffffff)


mlock(&(0x7f0000000000/0x3000)=nil, 0x3000)
mincore(&(0x7f0000008000/0x2000)=nil, 0x2000, &(0x7f0000000100)=""/69)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047465, &(0x7f0000000080))


mkdir(&(0x7f00000011c0)='./file0\x00', 0x0)
__mount50(0x0, &(0x7f0000000140)='./file0\x00', 0x400000, 0x0, 0x0)


r0 = socket(0x2, 0x3, 0x3)
getsockopt$inet_opts(r0, 0x0, 0x13, 0x0, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
_lwp_setname(0x0, &(0x7f0000000300)='}#\x00')


compat_30_getfh(&(0x7f0000000180)='./file2\x00', &(0x7f00000001c0))
open$dir(&(0x7f0000000380)='./file0\x00', 0x10, 0x8)
open$dir(&(0x7f0000000480)='./file2\x00', 0x200, 0x200)


r0 = __clone(0x0, 0x0)
compat_50_wait4(0x0, &(0x7f0000000380), 0x8, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
fktrace(r1, 0x0, 0x0, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x800080002002, 0x3d00)
link(&(0x7f00000000c0)='./bus\x00', &(0x7f0000000100)='./file0\x00')
chflags(&(0x7f0000000000)='./file0\x00', 0x50006)
rename(&(0x7f0000000140)='./bus\x00', &(0x7f0000000180)='./file0\x00')


r0 = socket$unix(0x1, 0x5, 0x0)
bind$unix(r0, &(0x7f0000004380)=@file={0x1, './file0\x00'}, 0x6e)
r1 = socket$unix(0x1, 0x2, 0x0)
dup3(r1, r0, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x29, 0x13, 0x0, 0x0)


compat_50_____semctl13$GETALL(0x0, 0x0, 0x6, &(0x7f0000000180)=@buf=&(0x7f0000000140)={{0x0, 0xffffffffffffffff}, 0x49, 0x2, 0x0, 0x0})
mkdir(&(0x7f0000000240)='./file0\x00', 0x0)
compat_40_mount(&(0x7f00000000c0)='puffs\x00', &(0x7f0000000100)='./file0/../file0\x00', 0x0, &(0x7f0000000140)="1e")


socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000000c0)={<r0=>0xffffffffffffffff})
sendmsg(r0, &(0x7f00000003c0)={0x0, 0x0, 0x0}, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000000c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
setsockopt(r0, 0x0, 0x0, &(0x7f0000000000)="7f8e69c0de4812c577d7bff09b83925b1d3b561874855e5e977cad9abd64dfcc1bbbc08eade233f559ad55193cab2a055842a95bb1098e109eb5a597ac4a4ac18b6dc7c5271f8753f0cc2e58b60d08933b8a91c784b5cf3d3e59741f4e11de68f902690df6939b02323404e1760340efd4e09685e0144b700e0466aab82b008b5fb557bb8e7d9b2ab360c23334587b08b85e72b5b367c35714b499572c5d0f299ccb7ecc5e25f30c07ed024256760be2860a0a57569cf9321c845148aef9076d94a54d63c5c35fef16027103ff128dfb2ef0dc01cf4dbe5da2a54cd57cd33e676e", 0xe1)


lchown(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
__fhopen40(&(0x7f0000000040)='L', 0x1, 0x0)
r0 = accept$unix(0xffffffffffffffff, &(0x7f0000000040)=@file={0x0, ""/72}, &(0x7f00000000c0)=0x4a)
__fhstat50(0x0, 0x0, &(0x7f00000027c0))
sendmsg$unix(0xffffffffffffffff, &(0x7f00000028c0)={0x0, 0x0, 0x0, 0x0, &(0x7f0000002880)=[@cred={0x20, 0xffff, 0x2, 0xffffffffffffffff, 0x0, 0xffffffffffffffff}], 0x20}, 0x5)
getgid()
fcntl$getown(r0, 0x5)
fork()
msgctl$IPC_SET(0x0, 0x1, 0x0)


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
open$dir(&(0x7f0000000100)='./file0/file0\x00', 0x200, 0x0)
rmdir(&(0x7f0000000180)='./file0\x00')
pathconf(&(0x7f0000000000)='./file0/file0\x00', 0xb)


syz_usb_connect$hid(0x0, 0x36, &(0x7f00000000c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)


ioctl$WSMUXIO_INJECTEVENT(0xffffffffffffffff, 0x80185760, &(0x7f0000000000)={0x0, 0x0, {0x0, 0x2}})
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x0, 0xc, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)


fcntl$dupfd(0xffffffffffffffff, 0xc, 0xffffffffffffffff)
fcntl$dupfd(0xffffffffffffffff, 0x0, 0xffffffffffffffff)
fcntl$lock(0xffffffffffffffff, 0x7, &(0x7f0000000040)={0x0, 0x0, 0x0, 0x0, 0xffffffffffffffff})
r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
preadv(r1, &(0x7f0000000300), 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r2 = __clone(0x0, &(0x7f00000011c0))
__wait450(r2, &(0x7f0000000140), 0x4, 0x0)


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
truncate(&(0x7f0000000180)='./file0\x00', 0x0, 0x6)
truncate(&(0x7f0000000140)='./file0\x00', 0x0, 0x1)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
truncate(&(0x7f0000000000)='./file0\x00', 0x0, 0x100000003)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000140)={<r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x400, &(0x7f0000000100), 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
compat_50_lutimes(&(0x7f0000000200)='./file0\x00', &(0x7f0000000240))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x400c5708, &(0x7f0000000000))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
pread(r1, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800002, 0x1, 0xe11, r0, 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_30_fhstat(0x0, 0x0)


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
compat_43_stat43(&(0x7f0000000080)='./file0\x00', &(0x7f0000000100))


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000200)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)='2', 0x1)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_30_getdents(r0, &(0x7f0000000240)=""/4096, 0xffff)
compat_43_ogetdirentries(r0, 0x0, 0x0, &(0x7f0000000100))


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
__clock_settime50(0x0, 0xfffffffffffffffe)


mkdir(&(0x7f0000000200)='./file0\x00', 0x0)
mkdir(&(0x7f0000000080)='./file0/control\x00', 0x0)
rename(&(0x7f00000000c0)='./file0\x00', &(0x7f0000000100)='./file0/control/file1\x00')


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0xc010636f, &(0x7f0000000180))


r0 = compat_30_socket(0x1d, 0x3, 0x0)
r1 = dup(r0)
sendmsg(r1, &(0x7f00000004c0)={0x0, 0x0, 0x0, 0x0, 0x0}, 0xd)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x10000)
compat_50_select(0x0, 0x0, 0x0, 0x0, &(0x7f0000000140))


mkdir(&(0x7f0000000140)='./file0\x00', 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = open(&(0x7f00000000c0)='./file0/../file0\x00', 0x0, 0x0)
fcntl$lock(r1, 0x7, &(0x7f0000000000)={0x0, 0x0, 0x0, 0x1000200010005})


mlock(&(0x7f0000c00000/0x400000)=nil, 0x400000)
mprotect(&(0x7f0000c00000/0x400000)=nil, 0x400000, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x85e)
open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)


rename(&(0x7f0000000040)='.\x00', &(0x7f0000000180)='./file0\x00')


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x41c5)
r0 = open(&(0x7f0000000240)='./file0\x00', 0x5, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80045719, &(0x7f0000000100))


r0 = getpid()
getpriority(0x0, r0)


syz_extract_tcp_res(0xffffffffffffffff, 0x0, 0x0)
syz_emit_ethernet(0x400e, &(0x7f0000000100)=ANY=[])


pipe(&(0x7f0000000180)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
fchflags(r2, 0x0)


r0 = socket(0x2, 0x2, 0x0)
connect$unix(r0, &(0x7f00000003c0)=ANY=[@ANYBLOB="82022e2fac14"], 0x10)
r1 = fcntl$dupfd(r0, 0x0, r0)
write(r1, &(0x7f0000000a00)="88068b2d1e6530cec391269e53ceb933e9c43b31654b0587aff20f86979fd33fe54cf900740a659d2ad5267b808996d25c0a7ef886d9eb649e04697b16c4ffe89ea4af0289c96606fb34a2c6c3db3c6c5f4e176ff4bfd5a67a0c6cbe1080144c72c52e8e146c0822c834476fd2cb5052762674ba34803ae36a41fd30fdb4413cdb442be4728458e913afa845c46d41ee205254cbdc4221007ed95f10b1aef97e8a8defe2e5c7c90734a0a8f596af68572209aac9bee5ef977a7a7f7009a2651ac02b9fe16a50924d2a020f1229ac420ad2c2ca52ee81b90336918079717a5ea43fc901e6abc74391d3b596300ee019e38f91b2714d4ddc4ba64f08dfe26e2d88005d544409ffd5e2dff73aa1cf9289c0dfc7dfa539c8f2f94ca158ed90ff00f13122eb86069d708a48483e26caef802ef2738f77f5b12bc76524de8f2c540d8f815fdf3318caa71cf49f2d3d31524ee714509193946562b2790e4db88e8e3039bef3a55a98e506816e0bf9d88a4c46bd633efc23047ce60048395e9b47dfce8feea7c569fc804a6826b7721db7c6bdba8cca207c2c3548370ae251b969cedc6523eedc18028a3c146cacfa9868c0578d7e3921b5f574a3380a374b0d09326208ae25ef724d2c8a4d1665ea2121b8e948be26ec380fcf469312d4170eb404c92f5235b328e2fb67d75d89e9c892d8100a6514b0129822f78ff06fc9db979f59f15ca6353a71904b9fb22f87134fa19cba790fe60043ad8d76fa5ca0146aad3998b19eae6678ffe04ef6ba3eae8d5f834c42d54f0acc4e2f849c6033c1bcaec374bb047879ccce981c39f8c1ccd624e9f2190e413a51efc7c2f9d8c5ad1d1474d7ed9f7db0f8c5332678e6aca08748c119fef9250ba59e6db6747d9191775e8b2b095c3e702e691a10d66eb4df24ddb22a996da04c42e7e5e96d903388e1f7ae5c5e3bcda945923c436cada91494a53b3bb43b5ed1d8d831b621d96b0b187c0dc953acc73521fd976c6c52a61ed85984d92e23df24f2880ff58e1495e7d563f4c48ce90a5ebd67b95942ba19dc7cf5f3e981a600906a04fce6b0bc999b768c579b9855f696d962292b2f99645f8bfff67bfa066ff4206128c61932a091b5b5bb5ce618f05050d5fab8790a6fc9c0adb70c5a7bf09ce7310217e07633b028ec03c21644bcfad20fe00239329041a1d59252056e75c5c6a81a3f6b41d7a33a03ade94f6d534a3a26d2a6ba33799440a6d4549fcd45b778eabf80c7d1d43da08739f33bea7f2aa91629b5feb2a45c81f88d7d9f99d4342259307f35b4fee133326d338a79a0b917ea04d9889acd64da30dec2acefce610ea844b651b965370000008000000000b7de5a9ec11ab1f6575f3a70084fd0ccb207fedfa5cbedeefda6e87380f48cdba53df036ac00318dad931d818fad663694059fc43b947482ab585b16d85ebf031aa6196893ab55025bce19d5d4ba0aec0a7d9a34627aa66e92ee08a5a0ea79bd20aaae3f2c0353c2499ffe7983d251002096680d8d901824ce8adc0a410ea3d3984f0860a0c93fc58939f668d7c656551dc99ed071030e27e0fac7eef93ba9e7551247cc6ef3542744d7be26f9356d96f7c89dcc035a47e441793e85f24d34bdb65939b53a067f619c23d47cc98bf44b78d89ac5153174600a446c48821712a003ba756a77f681d4051c248741cbb3f2a9deadfdb14e48eb784e07cb622aa0c72742cfbc9dfe5aec8065deddf253653a952494ce1c53ac2954e28b43cd46c0fe35cc18d44e5aaa12ccb0d182e02040cfc28a275ec6df8813845cef7cbd5c56acc9d1480b71e4a90828e6941051372f97db4b5d3a41feb66e7e69986d829cf02ceaa1e9078608997ec717c737c9800f6b452f9f3c54c2eb29253e638a638e300e68daf0abc7a66d951ac1735eb0429e0dd89436b91c3a65b551e0d48fbece5c30b7", 0x565)


r0 = __clone(0x0, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)
compat_50_wait4(0xffffffffffffffff, 0x0, 0x0, 0x0)
ptrace(0xe, r0, &(0x7f0000000000), 0x0)


recvfrom$inet6(0xffffffffffffff9c, 0x0, 0x0, 0x0, &(0x7f0000000040)={0x18, 0x2}, 0xc)
mlock(&(0x7f0000de5000/0xd000)=nil, 0xd000)
munmap(&(0x7f0000de4000/0x4000)=nil, 0x4000)
mprotect(&(0x7f0000de5000/0x1000)=nil, 0x1000, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_osendmsg(0xffffffffffffff9c, &(0x7f0000000080), 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x4008426f, &(0x7f0000000040))


r0 = socket$unix(0x1, 0x1, 0x0)
bind$unix(r0, &(0x7f00000001c0)=@file={0x1, '\xe9\x1fq\x89Y\x1e\x923aK\x00'}, 0x56)
listen(r0, 0x0)
r1 = socket$unix(0x1, 0x1, 0x0)
shutdown(r0, 0x0)
connect$unix(r1, &(0x7f0000000140)=@file={0x1, '\xe9\x1fq\x89Y\x1e\x923aK\x00'}, 0x6e)


compat_50_____semctl13$GETALL(0x0, 0x0, 0x6, &(0x7f0000000180)=@buf=&(0x7f0000000140)={{0x0, 0xffffffffffffffff}, 0x0, 0xb, 0x0, 0x0})
mkdir(&(0x7f0000000240)='./file0\x00', 0x0)
compat_40_mount(&(0x7f00000000c0)='puffs\x00', &(0x7f0000000100)='./file0/../file0\x00', 0x0, &(0x7f0000000140)="1e")


r0 = msgget$private(0x0, 0x0)
setuid(0xee00)
msgrcv(r0, 0x0, 0x0, 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = _lwp_self()
_lwp_detach(r2)


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)="b9", 0x1)
pathconf(&(0x7f00000016c0)='./file0\x00', 0x6)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="70023d"], 0x1)
r0 = socket(0x2, 0x2, 0x0)
r1 = socket$inet(0x2, 0x4000000000000001, 0x0)
r2 = dup2(r1, r0)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
bind$inet(r2, &(0x7f0000000040)={0x2, 0x3}, 0xc)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)="b9", 0x1)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
open$dir(&(0x7f0000000040)='./file0\x00', 0x10, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x205e, 0x200)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f00000bc000/0x2000)=nil, 0x2000, 0x0, 0x12, r0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_nanosleep(&(0x7f0000000100), 0x0)


bind$unix(0xffffffffffffffff, &(0x7f0000000040)=ANY=[@ANYBLOB='\x00\x00\x00\x00['], 0xa)
r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x9, &(0x7f0000000000)="03000000", 0x4)
setsockopt(r0, 0x29, 0xc, &(0x7f0000000040), 0x14)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xb200)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
open(&(0x7f0000000040)='./file0\x00', 0x70e, 0x0)


munmap(&(0x7f0000ffb000/0x2000)=nil, 0x2000)
madvise(&(0x7f0000ffb000/0x2000)=nil, 0x7fe4d2cdf000, 0x5)


r0 = msgget$private(0x0, 0x2000000186)
msgrcv(r0, &(0x7f00000002c0)={0x0, ""/142}, 0x96, 0x0, 0x0)
msgrcv(r0, &(0x7f0000000780)=ANY=[@ANYBLOB="21fa93aee36f45520499f0ff4da6af68c2d99957d167a668f5e5136e759f3d91a8c5b28b5f1c5c8f55f089cec3cb5b620c498ca424117c3b9111402841b4f45eff2df6f704f40d73d01b87e9a1a6803835c7c4aa2856a8d5ee520e168e5507be571f0248e3107e03f58b2444442c192d67986e755335b400000000000000005593d7a5102ba5bab69130a0228cd16d303ab9f06518bbdf74a4511da7d1b188491d2be093d72fad20fa6dd8a410298488eb2f5d586058c2872cbd0bf98845efdb167f4a83325e1dc8aaee1f225178000000000000062095fcc722b03e48920ae7aa19e128998f5d2fe041cd40382a8cf40c7810767e57266e90bed71aa54756ccb535b4155616e7128fa74894be58aab99bbd5450a7a99a6a4f0c3736a6d19cab93e4490ba487d3d58b00000000000089b2e49aa49f9c882f86555aa2e7fe6e1ba88aab277f3e60ae3c960b0620801d620c4325de060cc5e6d131da5e8552fe83c92005da1e3234a0650e780e611554774417"], 0x1, 0x1, 0x1000)
msgrcv(r0, &(0x7f0000000040)={0x0, ""/4}, 0xc, 0x1, 0x1000)
msgctl$IPC_RMID(r0, 0x0)
msgsnd(r0, &(0x7f0000000140)={0x2, "9e8bae09b4f759320a14510bf72ae44e0f55f4fa0d5eca5e65dacccd5535baa09c8eb42e3a9e3608a935e61c51b28343151d71beba"}, 0x3d, 0x800)
geteuid()
msgsnd(r0, &(0x7f00000000c0)={0x0, "5becba5c85345314041366b1b25e2c91"}, 0x18, 0x0)
fcntl$lock(0xffffffffffffffff, 0x0, &(0x7f0000000000)={0x0, 0x0, 0xffffffffffffffee, 0x1001300010005})
fcntl$dupfd(0xffffffffffffffff, 0xa, 0xffffffffffffffff)
semget$private(0x0, 0x3, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg(0xffffffffffffffff, &(0x7f0000000140)={&(0x7f0000000100), 0xe, 0x0, 0x0, 0x0}, 0x0)
sendmsg(0xffffffffffffffff, &(0x7f0000001540)={0x0, 0x0, &(0x7f00000014c0)=[{&(0x7f0000000180)="a00f6479c13ff53f3f", 0x9}], 0x1, 0x0}, 0x0)
sendmmsg(r0, &(0x7f00000000c0)={0x0}, 0x10, 0x0, 0x0)


bind$unix(0xffffffffffffffff, 0x0, 0xa)


symlink(&(0x7f0000000000)='./file0\x00', 0x0)
mkdirat(0xffffffffffffff9c, &(0x7f0000000200)='./file1\x00', 0x0)
rmdir(&(0x7f00000000c0)='./file1\x00')


r0 = socket$unix(0x1, 0x5, 0x0)
getpeername$unix(r0, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
pathconf(&(0x7f0000000040)='./file0\x00', 0x0)


open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
pipe(&(0x7f0000000400))
r0 = getpid()
fktrace(0xffffffffffffffff, 0x0, 0x4, r0)
pathconf(&(0x7f0000000000)='./file0\x00', 0x1)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0xf0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x3}}]}}]}}, 0x0)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r1, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r2 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r3 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r3, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
truncate(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
unmount(&(0x7f0000000240)='./file0\x00', 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
unmount(&(0x7f0000000040)='./file0\x00', 0x0)


mkdir(&(0x7f0000000600)='./file0\x00', 0x0)
mkdir(&(0x7f0000000280)='./file1\x00', 0x0)
mkdir(&(0x7f0000000040)='./file0/file0\x00', 0x0)
rename(&(0x7f0000000000)='./file1\x00', &(0x7f0000000140)='./file0\x00')


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
recvfrom$unix(r1, &(0x7f0000000040)=""/80, 0x50, 0x0, 0x0, 0x0)
dup2(r0, r1)


__clock_getres50(0x0, 0x0)


r0 = socket(0x11, 0x2, 0x0)
mmap(&(0x7f0000ffc000/0x2000)=nil, 0x7fffdf003000, 0x0, 0x11, r0, 0x0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x100010a, 0x0)
ioctl$FIONBIO(r0, 0x80046476, &(0x7f0000000000))


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x1000000029, 0x30, 0x0, 0x0)


open(&(0x7f0000000040)='./file0\x00', 0x10200, 0x0)
rename(&(0x7f0000000100)='./file0\x00', &(0x7f00000000c0)='./file1\x00')


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xa31a)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0, 0x24}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x1010, 0xffffffffffffffff, 0x0, 0x0)
r1 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r1, 0x20004602, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000180)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
setsockopt$sock_int(r0, 0xffff, 0x1, &(0x7f0000000140), 0x4)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmmsg(r0, &(0x7f0000000040)={0x0}, 0xb6, 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580), 0x1, 0x0)


open$dir(&(0x7f0000000000)='./file0\x00', 0x400004000011830a, 0x0)
pwritev(0xffffffffffffffff, &(0x7f0000001140)=[{&(0x7f0000000040)="4b6e6b26a755df6d479829b80228d7836a34af098664321091748589ff54ee29489cb71351b9059d8f1eb7009666b740998fba5509d52f1e96d1eaabe51bdcbe01ef0cdb5d177fb54dec491951afc1d9d02e54a448f1fa857e122c61e5c6b807afebe6b7c50b0823cffd73fd4ab7ebd43634d430b837a787abf2d45e4d669b6635dc76a4ea35108cea46c3e573c74f209b52be49e72de1cc433fac57fcd68abf80f744a1e4fa838322502f5a00a27f6843583452f45db3e79717f911c10715b5ab6708f618641fd4c6cad395e6fd779ce370177de8028b34213a3ce2a4e33cf985b40c642222c4c078e1ecf3cb20c90b1e3072ea76be8ef782471b97d35da353b81af33f415885a726b5e7daf526923f94652649eb441c8e5d011aad19632a2bf49c4297f330ddb48e94a0cde30230fc256bbed937108db2c660772bc3afe004b2e2151cd9645f47392416fc99d9c6be001d818024f0fdb35c9d78832a1daa81fc5c82d4a99e4464ad7030cb558d2403b8f77f81bb0b3b8a7067d3f60b8ab6230dfca6ae12a1f2dd7539d2a7ace6469aa2907958c89ae70395de01d67b57eb0a09c7dfb38824a9da278a2bdcd239a68a6d0f7ec773975c5935473dc37cf908039a774022767d6ce56929d49ea5a1bdf15cfc9860206e6c2df16326a14e653e69eeddc6dc9f84ac5122aa36272dd924a958d8af8fd6be3ca2fab74f13b95d5c416f79bcca88944f391b99ac5fdb59b0003e4c13c859d0b10d703ed25340f7f58aeb1b1e8e290308cc00529dfba720976b0b9e5971a9b86463751525127f500d23bbd9f549e758d7ab", 0x248}], 0x1, 0x0)
r0 = open(&(0x7f0000000340)='./file0\x00', 0x0, 0x0)
fcntl$lock(r0, 0x8, &(0x7f0000000280)={0x0, 0x0, 0x7, 0x100000000, 0xffffffffffffffff})


semop(0x0, &(0x7f0000000100)=[{}, {}, {0x0, 0x0, 0x1000}, {}, {}, {0x0, 0x8000}, {0x0, 0x0, 0x1000}], 0x7)
sendto$unix(0xffffffffffffffff, &(0x7f00000000c0)="b10005046000000000000847d7", 0xd, 0x0, 0x0, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x11, 0x3, 0x0)
bind(r0, &(0x7f0000000080), 0xc)
sendto$unix(r0, &(0x7f00000000c0)="b100050300000000000000000101000000000000cea1fea7fef96ecfc73fd3357ae26caa0416fa4f373f00acf00b7804be781e4991f7c8df5f882b297be1ab5b23ed00f4c807000000000000001f132e27acbdd602000d7d026ba8af63ff37282902e4fd89720fd3872babfbb770c1ffff00000f90006ee01bc43eaeacc50000fa02000000000000020208a371a3f80004000000040000000100"/164, 0xb1, 0x0, 0x0, 0xfffffffffffffd41)
r1 = semget$private(0x0, 0x1, 0x688)
accept$unix(r0, &(0x7f00000003c0), &(0x7f0000000400)=0x1f)
semctl$GETNCNT(r1, 0x4, 0x3, &(0x7f0000000580)=""/170)
r2 = socket(0x18, 0x1, 0x0)
close(r2)
semctl$GETVAL(r1, 0x2, 0x5, &(0x7f0000000440)=""/147)
connect$unix(r2, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)
recvfrom$unix(r2, &(0x7f00000002c0)=""/202, 0xca, 0x41, &(0x7f0000000200)=@abs={0x1, 0x0, 0x3}, 0x8)
semop(r1, &(0x7f0000000000)=[{0x0, 0x101, 0x1000}, {0x0, 0x4}, {0x3, 0x0, 0x800}], 0x3)
semctl$SETVAL(r1, 0x3, 0x8, &(0x7f0000000280)=0x200006)
semop(r1, &(0x7f0000000240)=[{0x4, 0x2, 0x1800}, {0x0, 0x6, 0x1800}, {0x1, 0x3991}, {0x1, 0x128, 0x3000}, {0x0, 0x5, 0x1800}, {0x3, 0x90, 0x800}, {0x3, 0xfff9, 0x800}, {0x0, 0x5, 0x1800}, {0x4, 0x5, 0x1000}, {0x0, 0xdb7f, 0x1000}], 0xa)
semctl$GETNCNT(r1, 0x3, 0x3, &(0x7f0000000100)=""/145)
semop(r1, &(0x7f00000001c0)=[{0x0, 0x9, 0x2000}, {0x3, 0x20}, {0x1, 0x0, 0x1000}, {0x0, 0x2, 0x1800}, {0x3, 0x735, 0x1800}, {0x3, 0x3f, 0x800}], 0x6)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000200)={<r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getsockopt$sock_cred(r0, 0xffff, 0x1006, 0x0, 0x0)


mknod(&(0x7f0000000180)='./file0\x00', 0x1ffb, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = open$dir(&(0x7f0000000280)='./file0\x00', 0x1, 0x0)
open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
dup3(r1, r0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
symlink(&(0x7f0000000080)='.\x00', &(0x7f0000000000)='./file0\x00')
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_40_mount(&(0x7f0000000000)='overlay\x00', &(0x7f0000000380)='./file0\x00', 0x3, &(0x7f00000003c0))
open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
__mount50(0x0, &(0x7f00000000c0)='./file0\x00', 0x0, 0x0, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
sendto$inet6(r0, &(0x7f0000000080)="02", 0x358, 0x0, &(0x7f0000000000)={0x18, 0x0}, 0x1c)


sendmsg$unix(0xffffffffffffff9c, &(0x7f0000001700)={&(0x7f0000000040)=ANY=[@ANYBLOB="6f18", @ANYRES64], 0x1c, 0x0}, 0x0)
ioctl$WSMUXIO_INJECTEVENT(0xffffffffffffffff, 0x80185760, &(0x7f0000000000)={0x0, 0x0, {0x0, 0x1}})
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x3, 0x5)
setsockopt(r0, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff31929648000001000000000000000000", 0x14)
sendmsg$unix(r0, &(0x7f0000001700)={0x0, 0xfffffffffffffeac, 0x0}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$inet(0x2, 0x3, 0x0)
sendto$inet(r1, &(0x7f00000009c0)="06", 0x358, 0x0, &(0x7f0000000a80)={0x2, 0x0}, 0x10)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000100)='.\x00', 0x0, 0x0)
open$dir(&(0x7f00000001c0)='./file1\x00', 0x8091, 0x18e)
symlinkat(&(0x7f00000000c0)='/\x00', r0, &(0x7f0000d06ff8)='./file0\x00')
chroot(&(0x7f0000000080)='./file0\x00')
socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={<r1=>0xffffffffffffffff, <r2=>0xffffffffffffffff})
recvmsg(r1, &(0x7f0000000140)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000400)=""/201, 0xc9}, 0x0)
sendmsg$unix(r2, &(0x7f0000000340)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000240)=ANY=[@ANYBLOB="28000000ffff00000100000008cd7f8603"], 0x28}, 0x0)


mlock(&(0x7f0000c00000/0x400000)=nil, 0x400000)
munlockall()
madvise(&(0x7f0000400000/0xc00000)=nil, 0xc00000, 0x6)


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(r0, 0x6, 0x2, &(0x7f0000000040), 0x4)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000340)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000000880)={0x0, 0x0, &(0x7f0000000600)=[{&(0x7f00000003c0)="de", 0x1}], 0x1}, 0x0)
poll(&(0x7f0000000080)=[{}, {}], 0x20000000000000c8, 0xffffffff)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000200)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f00000015c0)={&(0x7f00000003c0)=@abs={0x0, 0x0, 0x3}, 0x8, 0x0}, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000200)='fdesc\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)
open(&(0x7f0000000040)='./file0/../file0\x00', 0x0, 0x0)
unmount(&(0x7f0000000000)='./file0/../file0\x00', 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x80104305, &(0x7f0000000040)=0xd0a5)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000001600)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)
posix_spawn(0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f00000019c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)
socketpair(0x0, 0x20000000, 0x0, 0x0)


__fhopen40(0x0, 0x0, 0x0)


pipe(&(0x7f0000000180)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fchdir(r2)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0xd, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)


_ksem_init(0x0, &(0x7f00000004c0)=<r0=>0x0)
_ksem_timedwait(r0, &(0x7f0000000100)={0x7fffffffffffffff, 0x3b9ac9ff})
_ksem_post(r0)


socket$unix(0x1, 0x0, 0x0)


r0 = _lwp_self()
_lwp_setname(r0, 0x0)


mknod(&(0x7f0000000040)='./bus\x00', 0x2000, 0x4301)
r0 = open(&(0x7f0000000080)='./bus\x00', 0x0, 0x0)
ioctl$FIONSPACE(r0, 0x40187702, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
pipe2(&(0x7f0000000040)={0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
fcntl$setown(r1, 0x6, 0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f0000000280)="4500d713cc7bc27da43050b0", 0xc)


chflags(0x0, 0x0)


r0 = _lwp_self()
_lwp_exit()
r1 = _lwp_self()
_lwp_suspend(r1)
_lwp_detach(r0)


syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f00000026c0)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x0, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}}}]}}]}}, 0x0)
syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f0000002740)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x0, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}}}]}}]}}, 0x0)


r0 = socket(0x18, 0x2, 0x0)
setsockopt(r0, 0x29, 0x2a, &(0x7f0000000080)="00a0c7bb", 0x4)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff})
accept$unix(r0, 0x0, 0x0)


open$dir(&(0x7f0000002740)='./file0\x00', 0x121140, 0x0)
open$dir(&(0x7f0000000000)='./file0\x00', 0x20501, 0x0)
open$dir(&(0x7f0000000080)='./file0\x00', 0x201, 0x0)


shmget$private(0x0, 0x3000, 0x2b233f663f7f5df4, &(0x7f0000ffd000/0x3000)=nil)
shmat(0x0, &(0x7f0000ffc000/0x2000)=nil, 0x1000)
r0 = shmget$private(0x0, 0x3000, 0x2, &(0x7f0000ffd000/0x3000)=nil)
shmat(r0, &(0x7f0000ffc000/0x4000)=nil, 0x1000)
shmget$private(0x0, 0x2000, 0x182, &(0x7f0000ffd000/0x2000)=nil)
shmget$private(0x0, 0x1000, 0x401, &(0x7f0000ffc000/0x1000)=nil)
compat_50___shmctl13$SHM_LOCK(0xffffffffffffffff, 0x3)
compat_20_fstatfs(0xffffffffffffffff, &(0x7f0000000000)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, {}, <r1=>0x0})
getsockopt$sock_cred(0xffffffffffffffff, 0xffff, 0x11, &(0x7f0000000140)={0x0, <r2=>0x0}, &(0x7f0000000180)=0xc)
__fstat50(0xffffffffffffff9c, &(0x7f00000001c0)={0x0, 0x0, 0x0, 0x0, 0x0, <r3=>0x0})
shmctl$IPC_SET(r0, 0x1, &(0x7f0000000280)={{0x2, r1, 0xffffffffffffffff, r2, r3, 0x100, 0x5}, 0x5118, 0x81, 0x0, 0x0, 0x685f, 0x9, 0x40})
shmget$private(0x0, 0x1000, 0x400, &(0x7f0000ffc000/0x1000)=nil)
fork()


renameat(0xffffffffffffffff, 0x0, 0xffffffffffffffff, 0x0)


__select50(0x40, &(0x7f00000002c0)={0x5}, 0x0, 0x0, 0x0)
__select50(0x40, &(0x7f00000001c0)={0x5}, &(0x7f0000000200)={0x1}, 0x0, 0x0)


modctl$MODCTL_EXISTS(0x4, 0x620003)


r0 = socket(0x10, 0x2, 0x0)
bind$unix(r0, &(0x7f0000000140)=@abs={0x10, 0x2}, 0x6e)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80046475, &(0x7f0000000000))


mkdir(&(0x7f0000000080)='./file0\x00', 0x145)
open$dir(&(0x7f0000000300)='./file0\x00', 0x0, 0x0)
open$dir(&(0x7f00000000c0)='./file0/file0\x00', 0x0, 0x0)
rename(&(0x7f00000001c0)='./file0\x00', &(0x7f0000000200)='./file0/file0\x00')


mkdir(&(0x7f0000000040)='./file0\x00', 0x763029d7b933f8f8)
open$dir(&(0x7f0000000100)='./file0/file0\x00', 0x200, 0x0)
rmdir(&(0x7f0000000180)='./file0\x00')


compat_50_mknod(&(0x7f0000000500)='./file0\x00', 0x0, 0x0)
compat_50_mknod(&(0x7f0000000540)='./file0\x00', 0x0, 0x0)


modctl$MODCTL_EXISTS(0x4, 0x61ff7e)


open$dir(&(0x7f0000000240)='./file0\x00', 0x10e20, 0xdb0999445544dffd)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
r1 = socket(0x11, 0x3, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
dup2(r0, r1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_30_getdents(r0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
modctl$MODCTL_UNLOAD(0x2, &(0x7f0000000740))


symlink(&(0x7f0000000140)='..\x00', &(0x7f00000001c0)='./file0\x00')
chroot(&(0x7f0000000000)='./file0\x00')
__mount50(&(0x7f0000000080)='fdesc\x00', &(0x7f0000000340)='./file0\x00', 0x0, 0x0, 0x0)
linkat(0xffffffffffffff9c, &(0x7f0000000180)='./file0\x00', 0xffffffffffffff9c, &(0x7f00000002c0)='./file1\x00', 0x0)
compat_90_statvfs1(&(0x7f0000000300)='./file1\x00', &(0x7f0000001380), 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
__posix_fadvise50(r0, 0x0, 0x0, 0x0, 0x7)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setrlimit(0x0, &(0x7f00000000c0))
fork()


mincore(&(0x7f0000ffd000/0x1000)=nil, 0x20ffe000, &(0x7f0000000140)=""/118)


mknodat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x1000, 0x0)
openat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x1, 0x0)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x2, 0x0)
close(r0)


mknod(&(0x7f00000000c0)='./file0\x00', 0xe000, 0x0)
undelete(&(0x7f0000000080)='./file0\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_50_setitimer(0x3, &(0x7f00000000c0)={{}, {0x6}}, 0x0)
compat_50_getitimer(0x3, &(0x7f0000000080))


syz_usb_connect$hid(0x2, 0x3f, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x10, 0x56a, 0xe5, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}, {{}, [{{0x9, 0x5, 0x2, 0x3, 0x200}}]}}}]}}]}}, 0x0)


r0 = socket(0x2, 0x2, 0x0)
getsockopt(r0, 0x0, 0xd, 0x0, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0xb002, 0x0)
r0 = open$dir(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
ktrace(&(0x7f0000000040)='./file0\x00', 0x0, 0x0, 0x0)
r1 = fcntl$dupfd(r0, 0x0, r0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0, 0x24}], 0x1, 0x0)
ioctl$WSDISPLAYIO_SVIDEO(r1, 0x40047477, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
ioctl$WSDISPLAYIO_SMSGATTRS(r0, 0x8018575a, &(0x7f00000000c0))


r0 = msgget$private(0x0, 0x0)
setuid(0xee00)
msgsnd(r0, 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000005c0)={<r0=>0xffffffffffffffff})
sendmmsg(r0, &(0x7f0000000480)={&(0x7f0000000440)={0x0, 0x0, &(0x7f00000003c0)=[{&(0x7f0000000600)="b3f07ba69647043148a54254ac7c4fd15ad733d7458d702ac58176a844016d882d", 0x21}], 0x1, 0x0}, 0xd8}, 0x10, 0x0, 0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
sendto$inet(r0, 0x0, 0x0, 0x0, &(0x7f0000000200)={0x2, 0x0}, 0x10)


compat_30_socket(0x23, 0x0, 0x45)


r0 = socket$inet6(0x18, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
pipe(&(0x7f0000001200)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getsockopt(r0, 0x29, 0x4, 0x0, 0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
getsockopt$sock_cred(r0, 0xffff, 0x1022, 0x0, 0x0)


r0 = posix_spawn(0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
getpriority(0x2, r0)


semget(0x2, 0x0, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6102e154ee"], 0x1)
r0 = socket(0x2, 0x2, 0x0)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
write(r0, 0x0, 0x0)


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x10004, 0x0)
ioctl$FIOSETOWN(r0, 0x80067475, &(0x7f0000000140))


mknod(&(0x7f00000001c0)='./bus\x00', 0x80002005, 0x5300)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
dup2(r0, r1)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r1, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIONBIO(r0, 0x80044276, &(0x7f00000000c0))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
linkat(r0, 0x0, 0xffffffffffffffff, 0x0, 0x400)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
compat_43_ommap(&(0x7f0000ffd000/0x3000)=nil, 0xfffffffffffff000, 0x0, 0x0, r0, 0x0)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
__mount50(&(0x7f0000000240)='tmpfs\x00', &(0x7f0000000300)='./file0\x00', 0x0, 0x0, 0xffffffffffffff5b)


syz_usb_connect$cdc_ncm(0x1, 0x6e, &(0x7f0000000800)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x8, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}, {{0x9, 0x5, 0x81, 0x3, 0x3ff}}}}}}]}}, &(0x7f0000000e40)={0x0, 0x0, 0x0, 0x0, 0x3, [{0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}]})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000080)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
__getrusage50(0x0, 0x0)


linkat(0xffffffffffffffff, 0x0, 0xffffffffffffffff, 0x0, 0x400)


r0 = compat_30_socket(0x12, 0x2, 0x0)
ioctl$OFIOGETBMAP(r0, 0xc004667a, &(0x7f00000001c0))


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x40044278, &(0x7f00000000c0))


__select50(0x0, 0x0, 0x0, 0x0, &(0x7f00000000c0)={0x0, 0x80000001})


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x1000000029, 0x23, &(0x7f0000000000)="ebff", 0x2)


munmap(&(0x7f0000800000/0x800000)=nil, 0x800000)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = shmget$private(0x0, 0x2000, 0x0, &(0x7f0000ff1000/0x2000)=nil)
shmat(r1, &(0x7f000097d000/0x2000)=nil, 0x3000)


mlock(&(0x7f0000ffd000/0x1000)=nil, 0x1000)
mmap(&(0x7f0000ff9000/0x4000)=nil, 0xfffff, 0xd49f275d97cc01bb, 0x1810, 0xffffffffffffffff, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
fcntl$setown(r0, 0x6, 0xffffffffffffffff)


pipe2(&(0x7f0000000180)={<r0=>0xffffffffffffffff}, 0x0)
fcntl$lock(r0, 0xfffffffe, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000080)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendto(r0, 0x0, 0x0, 0x0, 0x0, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="92029f9cff"], 0x1)
r0 = socket(0x2, 0x2, 0x0)
bind(r0, &(0x7f0000000000), 0x10)


setpriority(0x2, 0xffffffffffffffff, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000340)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
pathconf(&(0x7f0000000080)='./file0\x00', 0x5)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
setsockopt$sock_timeval(r0, 0xffff, 0x1005, &(0x7f0000000040)={0x0, 0xffffffffffffffff}, 0x10)


r0 = getppid()
setpgid(0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setpgid(0x0, r0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fsync(r0)


sendmmsg(0xffffffffffffff9c, &(0x7f0000001980)={&(0x7f0000001940)={0x0, 0x0, &(0x7f0000001540)=[{&(0x7f0000000040)="f3e9", 0x2}], 0x1, 0x0}}, 0x10, 0x0, 0x0)
r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(r0, 0x6, 0x2, &(0x7f0000000040), 0x4)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xc16e)
open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)


r0 = socket(0x11, 0x3, 0x0)
getpeername$unix(r0, 0x0, &(0x7f0000000140))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
compat_50_futimes(r0, &(0x7f00000000c0)={0x0, 0x2})


setrlimit(0x6, &(0x7f0000000040))
mlockall(0x1)
mlockall(0x1)
mlockall(0x1)
mlockall(0x1)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x1800)
open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mlock(&(0x7f0000ffd000/0x1000)=nil, 0x1000)
munmap(&(0x7f0000ffc000/0x4000)=nil, 0x4000)
mmap(&(0x7f0000ff9000/0x4000)=nil, 0x6000, 0xd49f275d97cc01bb, 0x1810, 0xffffffffffffffff, 0x0, 0x0)


_lwp_wait(0x0, 0x0)
r0 = _lwp_self()
_lwp_wait(r0, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202d77f7f000001"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
setsockopt$sock_linger(r2, 0xffff, 0x80, &(0x7f0000000040)={0x5, 0x8}, 0x8)
close(r2)


setrlimit(0x6, &(0x7f0000000000))
mlockall(0x1)
fork()
__wait450(0x0, 0x0, 0x0, 0x0)


syz_emit_ethernet(0x4e, &(0x7f0000000840))


r0 = _lwp_self()
_lwp_suspend(r0)
_lwp_wait(r0, 0x0)
_lwp_wait(0x0, 0x0)


__clock_getres50(0x0, &(0x7f0000000000))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__nanosleep50(&(0x7f0000000580), &(0x7f00000005c0))


r0 = socket(0x2, 0x2, 0x0)
connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="82023e"], 0x10)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
sendmsg$unix(r0, &(0x7f0000001480)={0x0, 0x0, 0x0, 0x0, &(0x7f0000001880)=[@cred={0x20}], 0x20}, 0x0)


pipe(&(0x7f0000000240)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x529478f58bc034ca, r1)
__clone(0x0, 0x0)


mkdir(&(0x7f0000000140)='./file0\x00', 0x0)
__mount50(&(0x7f00000001c0)='tmpfs\x00', &(0x7f0000000200)='./file0\x00', 0x0, &(0x7f0000000240), 0x0)


mlockall(0x3)
mprotect(&(0x7f0000ffc000/0x3000)=nil, 0x3000, 0x0)
mprotect(&(0x7f0000ffc000/0x2000)=nil, 0x2000, 0x0)


pipe(&(0x7f00000014c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
sendmmsg(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0)


__setitimer50(0x1, &(0x7f0000000080)={{}, {0x0, 0x6}}, &(0x7f00000000c0))


pipe2(&(0x7f0000000080), 0x0)


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x3101)
open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f0000000140)='./file0\x00', 0x0)
mkdirat(r0, &(0x7f0000000000)='./file1\x00', 0x0)
r1 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
r2 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
r3 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
mkdirat(r3, &(0x7f0000000240)='./file0/file0\x00', 0x0)
renameat(r1, &(0x7f00000002c0)='./file1\x00', r2, &(0x7f0000000400)='./file0/file0\x00')


pipe(0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x615, 0x0)
mmap(&(0x7f0000001000/0x2000)=nil, 0x2000, 0x0, 0x10, r0, 0x0, 0x0)
madvise(&(0x7f0000000000/0x13000)=nil, 0x13000, 0x3)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$WSKBDIO_GETDEFAULTKEYREPEAT(r0, 0x20006470, 0x0)


sendmsg$unix(0xffffffffffffff9c, &(0x7f0000001700)={&(0x7f0000000100)=ANY=[@ANYBLOB="fb182e2b66692ba8d8"], 0x1c, 0x0}, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x2, 0x0)
sendmsg$unix(r0, &(0x7f0000001700)={0x0, 0x34, 0x0}, 0x0)


syz_usb_connect$cdc_ncm(0x0, 0x72, &(0x7f0000000880)={{0x12, 0x1, 0x300, 0x2, 0x0, 0x0, 0x20, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x60, 0x2, 0x1, 0x5, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6, 0x24, 0x1a, 0x39be}, [@acm={0x4, 0x24, 0x2, 0xe}]}, {{0x9, 0x5, 0x81, 0x3, 0x3ff}}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{{0x9, 0x5, 0x82, 0x2, 0x20, 0x0, 0x5}}, {{0x9, 0x5, 0x3, 0x2, 0x3ff, 0x0, 0x9e}}}}}}}]}}, &(0x7f0000000a40)={0x0, 0x0, 0x19, &(0x7f0000000940)={0x5, 0xf, 0x19, 0x1, [@ss_container_id={0x14, 0x10, 0x4, 0x0, "cb2bdb13c9229ffd24a706cf2e055823"}]}})


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x6000, 0x1203)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
write(r0, &(0x7f0000000080)='N', 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
fpathconf(r0, 0x11)


compat_50_clock_settime(0x0, &(0x7f0000000200)={0x1030002})


r0 = semget$private(0x0, 0x4, 0x0)
semop(r0, &(0x7f00000010c0)=[{}], 0x1)


minherit(&(0x7f0000ffb000/0x3000)=nil, 0x3000, 0x0)
mlock(&(0x7f0000ffb000/0x4000)=nil, 0x4000)
munlock(&(0x7f0000ffb000/0x3000)=nil, 0x3000)
mprotect(&(0x7f0000ffa000/0x4000)=nil, 0x4000, 0x0)
minherit(&(0x7f0000ffb000/0x3000)=nil, 0x3000, 0x1)
mlock(&(0x7f0000ff9000/0x7000)=nil, 0x7000)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80044276, &(0x7f00000000c0))


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e57f7f000001"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
setsockopt(r0, 0x6, 0xffffffff, 0x0, 0x0)
socket(0x18, 0x3, 0xf)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x800000018, 0x1, 0x0)
listen(r2, 0x0)
poll(&(0x7f0000000040)=[{r2, 0x1e11585f4e8fb0ef}], 0x1, 0x0)
shutdown(0xffffffffffffffff, 0x1)
fcntl$dupfd(0xffffffffffffffff, 0x0, 0xffffffffffffffff)
shutdown(r2, 0x1)
close(r2)
r3 = socket(0x2, 0x1, 0x0)
connect$unix(r3, &(0x7f0000000000)=ANY=[], 0x10)
sendto$inet(r3, &(0x7f0000000040)="919b5c5e5c069d14288be166", 0xb08f, 0x1, 0x0, 0x0)
r4 = accept$unix(r1, 0x0, 0x0)
recvfrom$unix(r4, &(0x7f00000000c0)=""/166, 0xffffffc7, 0x0, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
pathconf(&(0x7f0000000040)='./file0\x00', 0x5)


setrlimit(0x0, &(0x7f0000000000)={0x1f})


socketpair(0x0, 0x0, 0x3f, 0x0)


mknod(&(0x7f00000006c0)='./file0\x00', 0x2000, 0x200028bf)
r0 = open(&(0x7f0000000480)='./file0\x00', 0x0, 0x0)
poll(&(0x7f0000000000)=[{r0, 0xf3}, {r0, 0x4}], 0x2, 0x0)
close(r0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_clock_getres(0x40000000, 0x0)


compat_30_fhopen(0x0, 0xfe8d4aa054f93ffd)


setreuid(0xee00, 0xee00)
setuid(0xee01)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x3b03)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x70e, 0x0)
writev(r0, &(0x7f0000000000)=[{0x0}], 0x1)


open(&(0x7f0000000000)='./file0\x00', 0x200, 0x0)
ktrace(&(0x7f0000000040)='./file0\x00', 0x4, 0x2, 0x0)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
compat_90_fstatvfs1(r0, &(0x7f0000000a00), 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbd63)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = open(&(0x7f0000000340)='./file0\x00', 0x0, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
dup3(r1, r0, 0x0)


utimensat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSMUXIO_OINJECTEVENT(0xffffffffffffffff, 0x80185760, 0x0)
r1 = _lwp_self()
_lwp_suspend(r1)
_lwp_wakeup(r1)
_lwp_suspend(r1)
_lwp_continue(r1)


openat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x200, 0x0)
__mount50(&(0x7f0000000080)='kernfs\x00', &(0x7f00000000c0)='./file0\x00', 0x0, &(0x7f0000000100), 0x0)


____semctl50$SETVAL(0x0, 0x0, 0x8, &(0x7f0000000040))


compat_50___stat30(&(0x7f0000000000)='./file0\x00', 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x4080426f, &(0x7f00000000c0))


mknod(0x0, 0x9000, 0x0)


r0 = socket$unix(0x1, 0x2, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r3, 0x0, 0x0)
compat_43_orecvfrom(r0, 0x0, 0x0, 0x0, &(0x7f0000000180), 0x620000)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pwrite(r0, 0x0, 0x0, 0x0)


pipe2(&(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
__fstat50(r0, &(0x7f0000000040))


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x6000, 0x1203)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
write(r0, &(0x7f0000000080)='N', 0x1)


open$dir(&(0x7f0000000000)='./file0\x00', 0x200, 0x0)
utimensat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', &(0x7f0000000080)={{}, {0xffffffffffffffff}}, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x205e, 0x200)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
mmap(&(0x7f000012c000/0x4000)=nil, 0x4000, 0x4, 0x810, r0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0xc6a8)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = open$dir(&(0x7f0000000500)='./file0\x00', 0x0, 0x0)
r2 = dup2(r0, r1)
preadv(r2, &(0x7f0000000480)=[{&(0x7f0000000080)=""/38, 0x26}], 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0, 0xff01}], 0x1, 0x0)
clock_nanosleep(0x2, 0x0, &(0x7f0000000040), 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0, 0x24}], 0x1, 0x0)
chmod(0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r1=>0xffffffffffffffff})
getsockopt$sock_int(r1, 0xffff, 0x1003, &(0x7f0000000140), &(0x7f0000000180)=0x4)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x0)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = getpid()
r2 = compat_30_socket(0x11, 0x3, 0x0)
r3 = dup(r2)
fktrace(r3, 0x0, 0x4, r1)
compat_50___fstat30(r0, &(0x7f00000000c0))


mknodat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x8000, 0x202)
r0 = openat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0, 0x0)
close(r0)


r0 = socket(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x7, &(0x7f0000000080), 0x4)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000080)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000001300)={&(0x7f0000000000)=@file={0x0, './file0\x00'}, 0x6e, 0x0}, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x41c5)
r0 = open(&(0x7f0000000240)='./file0\x00', 0x5, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80085761, &(0x7f0000000100))


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
r1 = socket(0x11, 0x3, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r2 = fcntl$dupfd(r1, 0x0, r1)
fktrace(r2, 0x1, 0x80, 0xffffffffffffffff)


open$dir(&(0x7f0000000080)='./file1\x00', 0x0, 0x0)
connect$unix(0xffffffffffffffff, &(0x7f0000000180)=ANY=[@ANYBLOB="0371002e2f6669"], 0xa)
sendto$unix(0xffffffffffffffff, 0x0, 0x0, 0x0, &(0x7f00000000c0)=@abs={0x0, 0x0, 0x1}, 0x8)
r0 = socket$inet6(0x18, 0x30000003, 0x0)
sendto$inet6(r0, &(0x7f0000000080)="84", 0x2000, 0x0, &(0x7f0000000000)={0x18, 0x0, 0x0, 0x20080fe}, 0x1c)


mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x2903)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$WSMOUSEIO_GETID(r0, 0xc1085726, &(0x7f0000000300)={0x0, 0x100, "e5baa4cf6c83e847995c4bb208e0a5773fe969363561c15003cabd038c40373f1510fe9171c70506a313b1ed601104a52c84ea0beb6295daef39f92cba335faee7ae4f8851ba7390161ad27bab756b8c697bf5f52e90ffcc202763ac11955545201f17a3b495085bd68d17f3274c89ef3e4256b133c2ce3565785a9410d0b60e2be29afcaa6d6281e414b13e4762dd25a243669f4fc196b0dec05c985cb650ed695c3d3ff6b190d7318353df9acc5c34a8248586a3555343433da325b51c7393d3801a919f03aa756d374767d6f42fd439ff96b84a2a80f2b1d6d5402d341193de719e56570df37b1ac6c37594dd149519936c878e620f2c36b81aba70b1af4c"})


r0 = shmget$private(0x0, 0x4000, 0x0, &(0x7f0000ffc000/0x4000)=nil)
shmat(r0, &(0x7f0000ffb000/0x3000)=nil, 0x4000)
mlock(&(0x7f0000400000/0xc00000)=nil, 0xc00000)


readlinkat(0xffffffffffffffff, &(0x7f00000001c0)='./file0\x00', 0x0, 0x0)


pipe(&(0x7f0000000500)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_43_oftruncate(0xffffffffffffffff, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x2c, r0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
utimensat(0xffffffffffffffff, 0x0, &(0x7f00000000c0), 0x0)


symlinkat(&(0x7f0000001400)='./file0\x00', 0xffffffffffffffff, &(0x7f0000001440)='./file0\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_ommap(&(0x7f0000ffc000/0x4000)=nil, 0x4000, 0x0, 0xf784ffaa3fcc27ac, 0xffffffffffffffff, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
r1 = socket(0x18, 0x400000002, 0x0)
getsockname$unix(r1, &(0x7f00000000c0)=@abs, &(0x7f0000000380)=0x8)


r0 = socket$inet(0x2, 0x2, 0x0)
r1 = dup(r0)
recvfrom$inet(r1, 0x0, 0x0, 0x1, 0x0, 0x0)


r0 = compat_30_socket(0x1f, 0x3, 0x0)
sendto$inet(r0, 0x0, 0x0, 0x0, &(0x7f0000000240)={0x2, 0x1}, 0xc)


setpriority(0x1, 0xffffffffffffffff, 0x0)


mlock(&(0x7f0000ffb000/0x3000)=nil, 0x3000)
madvise(&(0x7f0000ffb000/0x4000)=nil, 0x4000, 0x0)


compat_50__lwp_park(&(0x7f0000000040), 0xffffffffffffffff, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0xc6a8)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = dup3(r0, r0, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOSETOWN(r1, 0xc0184e66, &(0x7f0000000100))


getsockopt$inet_opts(0xffffffffffffffff, 0x0, 0x0, 0x0, &(0x7f00000000c0))
getsockopt$SO_PEERCRED(0xffffffffffffff9c, 0xffff, 0x11, &(0x7f0000000100)={0x0, <r0=>0x0}, 0xc)
pipe(0x0)
open(&(0x7f00000001c0)='./file0\x00', 0x0, 0x0)
recvfrom$inet6(0xffffffffffffffff, &(0x7f0000000200)=""/11, 0xb, 0x100, 0x0, 0x0)
syz_usb_connect$hid(0x0, 0x3f, &(0x7f0000000240)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0xb0, 0x0, 0x1, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x8, {0x9, 0x21, 0x3, 0x80, 0x1, {0x22, 0x641}}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x20, 0x20, 0x1}}, [{{0x9, 0x5, 0x2, 0x3, 0x400, 0x7, 0x8, 0x6}}]}}}]}}]}}, &(0x7f0000000700)={0xa, &(0x7f0000000280)={0xa, 0x6, 0x300, 0x3, 0x9, 0x1f, 0x40, 0xf9}, 0x35, &(0x7f00000002c0)={0x5, 0xf, 0x35, 0x5, [@ssp_cap={0x14, 0x10, 0xa, 0x9, 0x2, 0x3, 0xf000, 0x0, [0xc0, 0x3fc0]}, @wireless={0xb, 0x10, 0x1, 0x4, 0x50, 0x0, 0x8, 0x1, 0x4}, @ext_cap={0x7, 0x10, 0x2, 0x10, 0x2, 0x3, 0x7f}, @ptm_cap={0x3}, @ext_cap={0x7, 0x10, 0x2, 0x6, 0x5, 0x6}]}, 0x6, [{0x4, &(0x7f0000000400)=@lang_id={0x4, 0x3, 0x804}}, {0x4, &(0x7f0000000480)=@lang_id={0x4, 0x3, 0x40d}}, {0x4, &(0x7f00000004c0)=@lang_id={0x4, 0x3, 0x4ff}}, {0x91, &(0x7f0000000500)=@string={0x91, 0x3, "9a9e4a6d1705aecefd192637f6a83ffe663fe42d299e8953646891ac5887428479687ecd0bf4aa3d28398270a90ee67d5964a33c3d6b242413fb961b8f47d253d7421b1d635a939c1382cb878300eed0b872217bf33c78738a82fea3687de6df3ea505693034f08c381c14c2c276fbbea64aa04d5add2f6375a55ea2ab6bfb7f83d13227096934e8afbe9cf52285b3"}}, {0x0, 0x0}, {0xc, &(0x7f00000006c0)=@string={0xc, 0x3, "b80a38b211ffd8b1cab3"}}]})
compat_12_fstat12(0xffffffffffffffff, &(0x7f00000007c0))
socket$inet6(0x18, 0x5, 0xe1)
compat_12_getdirentries(0xffffffffffffffff, &(0x7f0000000840)=""/178, 0x2, 0x0)
mkdirat(0xffffffffffffffff, 0x0, 0x20)
fchown(0xffffffffffffffff, r0, 0x0)


r0 = msgget$private(0x0, 0x0)
msgctl$IPC_STAT(r0, 0x2, &(0x7f0000000200)=""/34)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
openat(0xffffffffffffffff, 0x0, 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x2000746e, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000440)={<r0=>0xffffffffffffffff})
sendto$unix(r0, 0x0, 0x0, 0x4040085, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x30fd)
open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0xf, r1)
_ksem_close(0x0)


swapctl$SWAP_ON(0x1, &(0x7f0000001540)="a0a1ea2691674b158434f756eea1b3b9ef6089501a737842f57995a923853733880b360e0fe3e0febe82", 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
mknodat(r1, &(0x7f0000000180)='./file0\x00', 0x8000, 0x0)
r2 = openat(r0, &(0x7f0000000100)='./file0\x00', 0x0, 0x0)
dup2(r1, r2)


pipe(&(0x7f0000000040)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
writev(r0, &(0x7f0000000080)=[{0x0}], 0x10000000000000c2)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
close(0xffffffffffffffff)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_50_setitimer(0x0, &(0x7f00000000c0)={{}, {0x0, 0x4}}, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
pathconf(&(0x7f0000000040)='./file0\x00', 0x3)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000000c0)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000001600)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000200)=ANY=[@ANYBLOB="10000000ffff000001"], 0x10}, 0x0)
recvmsg(r1, &(0x7f00000014c0)={0x0, 0x0, &(0x7f0000001340)=[{&(0x7f0000000000)=""/73, 0x49}], 0x100000000000022c, 0x0}, 0xc42)
recvmsg(r1, &(0x7f0000000380)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)
write(r0, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000100)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r1, &(0x7f0000000400)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000000)=ANY=[@ANYBLOB="28000000ffff000001"], 0x28}, 0x0)
recvmsg(r0, &(0x7f0000000440)={0x0, 0x0, &(0x7f00000000c0)=[{&(0x7f0000000280)=""/138, 0x8a}], 0x1}, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x8008722b, &(0x7f0000000100)={0x400000})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket$inet(0x2, 0x3, 0x0)
setsockopt$inet_opts(r1, 0x0, 0x200000000000c, &(0x7f00000000c0)="eaff115c00000000", 0x8)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x8004667d, &(0x7f0000000000))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg(r0, &(0x7f00000028c0)={0x0, 0x0, &(0x7f00000027c0)=[{0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}], 0x9}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x400000000018, 0x3, 0x0)
setsockopt$sock_linger(r1, 0xffff, 0x80, &(0x7f0000000100), 0x8)


syz_emit_ethernet(0xfbf, &(0x7f0000000180))


poll(&(0x7f00000000c0)=[{}, {}, {}, {}, {}, {}, {}, {}, {}], 0x20000000000001d3, 0xfffff800)


pipe2(&(0x7f0000000740), 0x0)


symlinkat(&(0x7f00000001c0)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000200)='./file0\x00')
unlinkat(0xffffffffffffff9c, &(0x7f0000000240)='./file0\x00', 0x800)


_lwp_create(0xfffffffffffffffe, 0x0, 0x0)


symlinkat(&(0x7f0000000080)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00')
r0 = open$dir(&(0x7f0000000280)='.\x00', 0x0, 0x0)
readlinkat(r0, &(0x7f0000000240)='./file0\x00', &(0x7f0000002340)=""/143, 0x8f)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x4004426a, 0x0)


pipe2(&(0x7f0000000000)={<r0=>0xffffffffffffffff}, 0x0)
mkdirat(0xffffffffffffff9c, &(0x7f0000000280)='./file0\x00', 0x0)
r1 = open$dir(&(0x7f00000002c0)='./file0\x00', 0x0, 0x0)
dup2(r1, r0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbd63)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0x0, 0x0)
open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
dup2(r0, r1)


r0 = socket(0x10, 0x2, 0x0)
sendmsg$unix(r0, &(0x7f0000000280)={&(0x7f0000000000)=@abs={0x0, 0x0, 0x2}, 0x8, 0x0}, 0x9)


r0 = compat_30_socket(0x1f, 0x3, 0x0)
getsockopt(r0, 0x1, 0x3, 0x0, 0x0)


r0 = open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
writev(r0, &(0x7f00000000c0)=[{&(0x7f0000000140)="4d5b4b3898d5b140186840a77150b7f220a52300de16ee262ba9718a0582b4d2b36c7797605fd4627a68d7231bbe73bffa34a89741bd79681be3eac113e6a1ae3be444d5d28b9d63b2e152187a875a73", 0x50}], 0x1)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x5, 0x10, r0, 0x0, 0x0)
setrlimit(0x6, &(0x7f0000000000))
setrlimit(0x3, &(0x7f0000000040))
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x0, 0x10, r0, 0x0, 0x0)
mlockall(0x1)


__posix_lchown(0xffffffffffffffff, 0x0, 0x0)


ioctl$WSMUXIO_REMOVE_DEVICE(0xffffffffffffffff, 0x80085762, &(0x7f0000000040)={0x1})
r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x6c, &(0x7f0000000040), 0x4)
setsockopt$inet6_MRT6_ADD_MFC(r0, 0x29, 0x6b, 0x0, 0x0)


setpgid(0x0, 0xffffffffffffffff)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0xc03c526c, &(0x7f0000000040))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000100)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000002a00)={0x0, 0x0, 0x0}, 0x0)


r0 = socket$inet6(0x22, 0x3, 0x0)
setsockopt$sock_timeval(r0, 0xffff, 0x100c, &(0x7f0000000000)={0x0, 0xfff}, 0x10)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000180)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r3 = socket(0x2, 0x2, 0x0)
compat_43_orecv(r3, 0x0, 0x0, 0x181)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x18, 0x2, 0x0)
getsockopt$inet_opts(r1, 0x29, 0x2e, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000240)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x529478f58bc034ca, r2)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
ktrace(&(0x7f0000000080)='./file0\x00', 0x0, 0x80, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
compat_20_fstatfs(r2, &(0x7f0000000540))


mkdir(&(0x7f00000011c0)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='null\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x80, [{{0x9, 0x4, 0x0, 0xf7}}]}}]}}, 0x0)


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x6131, 0x4000e03)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x0, 0x10, r0, 0x0, 0x0)


open(&(0x7f0000000040)='./file0\x00', 0x70e, 0x0)
rename(&(0x7f0000000040)='..\x00', &(0x7f0000000000)='./file0\x00')


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x400c5104, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x1, 0x0)
poll(&(0x7f0000000080)=[{r0}], 0x1, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0)
__mount50(&(0x7f00000002c0)='ptyfs\x00', &(0x7f0000000040)='./file0\x00', 0x0, &(0x7f0000000340), 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_30_getdents(r0, &(0x7f0000000240)=""/4096, 0xffff)
r1 = dup2(r0, r0)
compat_43_ogetdirentries(r1, 0x0, 0x0, &(0x7f0000000140))


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
r1 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
symlinkat(&(0x7f0000000000)='./file0\x00', r1, &(0x7f0000000100)='./file0\x00')
r2 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
renameat(r2, &(0x7f0000000080)='./file0\x00', r0, &(0x7f0000000180)='./file1\x00')


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000200)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)='2', 0x1)
__posix_rename(0x0, 0x0)
r0 = open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
preadv(r0, &(0x7f00000004c0)=[{0x0}], 0x1, 0x0)


open$dir(&(0x7f0000000240)='./file0\x00', 0x200, 0x0)
r0 = geteuid()
lchown(&(0x7f00000000c0)='./file0\x00', r0, 0xffffffffffffffff)


_ksem_init(0x0, &(0x7f00000000c0)=<r0=>0x0)
_ksem_wait(r0)
r1 = openat(0xffffffffffffff9c, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
_ksem_post(r0)


mknod(&(0x7f0000000000)='./bus\x00', 0x6000, 0x6d4)
open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)


_ksem_init(0x0, &(0x7f00000001c0)=<r0=>0x0)
_ksem_timedwait(r0, &(0x7f0000000080))
_ksem_post(r0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
shmctl$IPC_SET(0x0, 0x1, &(0x7f0000000200)={{0x0, 0x0, 0xffffffffffffffff}})


munmap(&(0x7f0000ffb000/0x4000)=nil, 0x4000)
r0 = shmget$private(0x0, 0x4000, 0x0, &(0x7f0000ffc000/0x4000)=nil)
shmat(r0, &(0x7f0000ffb000/0x4000)=nil, 0x6000)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
rename(&(0x7f0000000080)='./bus\x00', &(0x7f0000000100)='./file0\x00')
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mkdir(&(0x7f0000000000)='./file0\x00', 0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x200000000000c, &(0x7f0000000240)="eaff125c00000000", 0x8)
setsockopt$inet_opts(r0, 0x0, 0x200000000000c, &(0x7f0000000080)="eaef125c00000000", 0x8)
setsockopt$inet_opts(r0, 0x0, 0x200000000000c, &(0x7f00000000c0)="eaff115c00000000", 0x8)


r0 = fork()
setpgid(r0, 0x0)
setpgid(0x0, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x108)
open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)


pipe2(&(0x7f0000000080)={<r0=>0xffffffffffffffff}, 0x0)
preadv(r0, &(0x7f0000000600)=[{0x0}], 0x1, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f00000000c0)='ffs\x00', 0x0, 0x0, 0x0, 0x0)
compat_40_mount(&(0x7f0000000000)='umap\x00', &(0x7f0000000040)='./file0\x00', 0x0, &(0x7f00000000c0))


r0 = getpid()
r1 = compat_30_socket(0x11, 0x3, 0x0)
r2 = dup(r1)
fktrace(r2, 0x0, 0x4, r0)
__getdents30(0xffffffffffffffff, 0x0, 0x0)


rmdir(&(0x7f0000000000)='./file0/file0\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x22, 0x3, 0x0)
compat_43_orecvmsg(r1, &(0x7f0000000240)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x6000, 0x84000)
openat(0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0x0, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x4100000016, r0, 0x0, 0x0)
ptrace(0x7, r0, &(0x7f0000000000), 0x4)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$FIOASYNC(r0, 0x8004646d, &(0x7f0000000040))


r0 = openat$hdaudio(0xffffffffffffff9c, &(0x7f00000003c0), 0x0, 0x0)
fchroot(r0)


syz_usb_connect(0x0, 0x6a, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x1c, 0x88, 0xe6, 0x8, 0xbda, 0x817c, 0xfd17, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x58, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x7, 0x25, 0x12, 0x6, 0x0, [], [{{0x9, 0x5, 0x1}}, {{0x9, 0x5, 0xf}}, {{0x9, 0x5, 0x5}}, {{0x9, 0x5, 0x0, 0x0, 0x0, 0x6, 0x0, 0x65, [@uac_iso={0x7, 0x25, 0x1, 0x0, 0x4f}]}}, {{0x9, 0x5, 0xf, 0x0, 0x20}}, {{0x9, 0x5, 0x0, 0x0, 0x50, 0x5}}, {{0x9, 0x5, 0x0, 0x1}}]}}]}}]}}, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = socket(0x1f, 0x5, 0x0)
listen(r2, 0x0)


open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r2 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
fdatasync(r2)


mknod(&(0x7f0000000040)='./bus\x00', 0x2003, 0x4301)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x40047704, 0x0)


r0 = compat_30_socket(0x11, 0x3, 0x0)
r1 = dup(r0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSKBDIO_GETSCROLL(r1, 0x400c5717, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2080002007, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
pwrite(r0, &(0x7f0000000080)="91", 0xfffffff, 0x9)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
pipe2(&(0x7f0000001600)={0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
ioctl$FIONREAD(r1, 0x4004667f, &(0x7f0000001440))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0xc6a8)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
r1 = dup3(r0, r0, 0x0)
ioctl$FIOSETOWN(r1, 0x80044e65, &(0x7f0000000100))


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, 0xffffffffffffffff, 0x0, 0x0)
r0 = open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
fcntl$lock(r0, 0x9, &(0x7f00000000c0)={0x0, 0x0, 0x1, 0x100000001})
open(&(0x7f0000000080)='./file0\x00', 0x40020, 0x0)
fcntl$lock(r0, 0x9, &(0x7f0000000000)={0x0, 0x0, 0x40, 0x1000200010005})


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
__getlogin(0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__getcwd(&(0x7f0000000000)=""/8, 0x8)


r0 = __clone(0x0, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)
compat_50_wait4(0xffffffffffffffff, 0x0, 0x0, 0x0)
ptrace(0x18, r0, &(0x7f0000000080), 0x8)


r0 = socket(0x18, 0x3, 0x1)
setsockopt(r0, 0x29, 0x6c, &(0x7f0000000040), 0x4)


modctl$MODCTL_LOAD(0x0, &(0x7f0000000200)={0xffffffffffffffff, 0x0, 0x0})


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_ogetdirentries(0xffffffffffffffff, 0x0, 0x0, 0x0)


r0 = socket(0x1f, 0x1, 0x0)
listen(r0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x0, 0x10, 0xffffffffffffffff, 0x0, 0x0)
unmount(&(0x7f0000000040)='./file0\x00', 0x0)
compat_50_mknod(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)


modctl$MODCTL_LOAD(0x0, &(0x7f0000001180)={&(0x7f0000000100), 0x0, 0x0})


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80105702, &(0x7f0000000000)=0x2)


setpriority(0x0, 0x0, 0xc2d)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x1203)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x81986467, &(0x7f0000000080))


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
r1 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
preadv(0xffffffffffffffff, 0x0, 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
pwritev(r0, &(0x7f0000000080)=[{&(0x7f00000006c0), 0xf0f75}], 0x1, 0x0)
truncate(&(0x7f0000000100)='./file0\x00', 0x0, 0x42000)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, 0xffffffffffffffff, 0x0, 0x0)
preadv(0xffffffffffffffff, 0x0, 0x0, 0x0)
renameat(0xffffffffffffffff, 0x0, 0xffffffffffffffff, &(0x7f0000000200)='./file0\x00')


r0 = socket(0x18, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x1001, &(0x7f0000000040), 0x4)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
chmod(&(0x7f0000000180)='./file0\x00', 0x23f)
chdir(&(0x7f0000000040)='./file0\x00')
mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x0, 0x10, 0xffffffffffffffff, 0x0, 0x0)
mkdir(&(0x7f0000000140)='./file1\x00', 0x0)
rename(&(0x7f0000000100)='./file1\x00', &(0x7f00000001c0)='./file0\x00')


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
sendto$unix(r0, 0x0, 0x0, 0x8040, &(0x7f0000000100)=@file={0x0, './file1\x00'}, 0x6e)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendto$unix(r0, &(0x7f0000000040)="79a52cefe0b4bc5340ba3e3945e0b130f6423a37", 0x14, 0x0, &(0x7f0000000100)=@abs={0x0, 0x0, 0x0}, 0x8)


fcntl$lock(0xffffffffffffffff, 0x8, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt(r0, 0x0, 0x17, &(0x7f0000000000)="9d0e6a00", 0x4)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000000940)={&(0x7f0000000040)=@abs, 0x6e, 0x0, 0x0, &(0x7f0000000800)=[@rights], 0x18}, 0x0)


open(&(0x7f0000000000)='./file0\x00', 0x200, 0x0)
compat_50_utimes(&(0x7f0000000080)='./file0\x00', &(0x7f00000000c0)={0x0, 0x3ffffffe})


r0 = shmget$private(0x0, 0x4000, 0x0, &(0x7f0000ff9000/0x4000)=nil)
shmctl$SHM_LOCK(r0, 0x3)
shmctl$IPC_RMID(r0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x47287211, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
fcntl$setstatus(r0, 0x4, 0x5000c)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580), 0x1, 0x0)
fcntl$getflags(r0, 0x1)


r0 = socket$inet(0x2, 0x2, 0x0)
getsockname(r0, 0x0, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
minherit(&(0x7f0000ffb000/0x2000)=nil, 0x2000, 0x0)
mlock(&(0x7f0000ffd000/0x1000)=nil, 0x1000)
mmap(&(0x7f0000ff9000/0x4000)=nil, 0x6000, 0xd49f275d97cc01bb, 0x1810, 0xffffffffffffffff, 0x0, 0x0)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, &(0x7f00000001c0)={0x0, 0x0, 0x0, 0x0, 0x2, [{0x4, &(0x7f00000000c0)=@lang_id={0x4}}, {0x0, 0x0}]})


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r1, &(0x7f00000000c0)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000100)=ANY=[@ANYBLOB="28000000ffff000001"], 0x28}, 0x0)
r2 = fcntl$dupfd(r1, 0x0, r0)
sendmsg$unix(r2, &(0x7f00000012c0)={0x0, 0x0, 0x0}, 0x0)
recvmsg(r0, &(0x7f0000000000)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000180)=""/88, 0x58}, 0x0)


fstatat(0xffffffffffffff9c, 0x0, 0x0, 0x0)


getrlimit(0x7, &(0x7f00000010c0))


semop(0x0, 0x0, 0x9)
semget$private(0x0, 0x3, 0x0)
getpriority(0x2, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
pipe(&(0x7f0000000240)={<r1=>0xffffffffffffffff})
fktrace(r1, 0x0, 0x0, 0x0)
ptrace(0x1b, r0, &(0x7f0000000000), 0x9)


r0 = __clone(0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000001200)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ktrace(0x0, 0x1, 0x40000000, r0)


open(&(0x7f0000000480)='./file0\x00', 0x80000000000206, 0xfffffffffffffffd)
r0 = open(&(0x7f0000000180)='./file0\x00', 0x60e, 0x0)
pwrite(r0, &(0x7f0000000100)="c8a84e", 0xff96, 0x0)
mmap(&(0x7f0000000000/0x13000)=nil, 0x13000, 0x5, 0x10, r0, 0x0, 0x0)
truncate(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2080002002, 0x40004200000028ac)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x4004667f, &(0x7f0000000000))


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_50___stat30(&(0x7f0000000000)='./file0\x00', 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x25, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)


mlock(&(0x7f0000800000/0x800000)=nil, 0x800000)
minherit(&(0x7f00009d0000/0x2000)=nil, 0x2000, 0x0)
munmap(&(0x7f0000400000/0xc00000)=nil, 0xc00000)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
__fstat50(r0, &(0x7f0000000240)={<r1=>0x0})
mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, r1)
r2 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r2, 0x40046474, &(0x7f0000000000))


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x1a, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)


compat_43_ocreat(&(0x7f0000000000)='./file0\x00', 0x0)
ktrace(&(0x7f0000000080)='./file0\x00', 0x3000000, 0x2, 0x0)


r0 = socket(0x18, 0x3, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
writev(r0, &(0x7f0000000240)=[{&(0x7f0000000140)="2129585b77bab4b7", 0x8}], 0x1)


mknod(&(0x7f0000000080)='./bus\x00', 0x2080002002, 0x40004200000028ac)
r0 = open(&(0x7f0000000400)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80047457, &(0x7f0000000040)=0x7)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg(0xffffffffffffffff, &(0x7f0000000140)={&(0x7f0000000100), 0xe, 0x0, 0x0, 0x0}, 0x0)
sendmsg(0xffffffffffffffff, &(0x7f0000001540)={0x0, 0x0, &(0x7f00000014c0)=[{&(0x7f0000000180)="a00f6479c13ff53f3fe7def00a4377c6", 0x10}], 0x1, 0x0}, 0x0)
sendmmsg(r0, &(0x7f00000000c0)={0x0}, 0x10, 0x0, 0x0)


open(&(0x7f0000000000)='./file0\x00', 0x200, 0x0)
compat_50___lstat30(&(0x7f0000000140)='./file0\x00', &(0x7f0000000180))


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getrlimit(0x0, 0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f0000001480)="0a718fda9b96eea67467c3b2549c56dcd10ed1a9273deb8b6c471ba95f11f4c4966e1610116aa32665b19f8bb96ed2d8b7ef0cd9628a00c38954e243000ee16ef7595f812cc8ca4cd7de24bde38739a3e7e380e5d2", 0x55)
r1 = openat(0xffffffffffffff9c, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r2 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r2, 0xc0185005, &(0x7f0000000000))
preadv(r2, &(0x7f0000001880)=[{&(0x7f0000001500)=""/51, 0x33}, {&(0x7f0000001680)=""/206, 0xce}, {&(0x7f0000001780)=""/148, 0x94}, {&(0x7f0000001600)=""/38, 0x26}, {&(0x7f0000001840)=""/45, 0x2d}], 0x5, 0x80)
r3 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r3, 0x0, 0x0)
preadv(r3, &(0x7f0000001380)=[{&(0x7f0000000000)=""/161, 0xa1}, {&(0x7f00000000c0)=""/185, 0xb9}, {&(0x7f0000000180)=""/4096, 0x1000}, {&(0x7f0000001180)=""/108, 0x6c}, {&(0x7f0000001200)=""/200, 0xc8}, {&(0x7f0000001300)=""/124, 0x7c}], 0x6, 0x7)
ioctl$FIOASYNC(0xffffffffffffffff, 0xc0185005, &(0x7f0000000000))
mmap(&(0x7f000058d000/0x3000)=nil, 0x3000, 0x4, 0x10, 0xffffffffffffffff, 0x0, 0xffff)
preadv(r1, &(0x7f0000001580)=[{0x0, 0x11}], 0x1, 0x0)
__fstat50(r0, 0x0)
r4 = paccept(r0, &(0x7f0000001400)=@data, &(0x7f0000001540)=0xe, 0x70000000)
r5 = accept$inet6(r4, &(0x7f00000015c0), &(0x7f0000001440)=0xc)
dup2(r3, r5)
r6 = msgget$private(0x0, 0x0)
msgsnd(r6, &(0x7f0000001280)={0x2}, 0x8, 0x0)
msgrcv(r6, &(0x7f0000000000)={0x0, ""/1}, 0x9, 0x0, 0x800)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x16fc)
open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x11, r0, 0x0, 0x0)
compat_43_osethostname(&(0x7f0000000b00)='\t', 0x1)


__nanosleep50(&(0x7f00000019c0), &(0x7f0000002d80))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000001c0))
pipe(0x0)
accept$unix(0xffffffffffffffff, 0x0, 0x0)


mknod(&(0x7f0000000040)='./bus\x00', 0x2003, 0x4301)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0107705, &(0x7f0000000000))


minherit(&(0x7f0000ffc000/0x4000)=nil, 0x4000, 0x0)
mprotect(&(0x7f0000ffa000/0x3000)=nil, 0x3000, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0xc6a8)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
r2 = dup3(r0, r0, 0x0)
ioctl$FIOSETOWN(r2, 0x80084e68, &(0x7f0000000100))


openat$dm(0xffffffffffffff9c, &(0x7f0000000b80), 0x0, 0x0)


mknod(&(0x7f0000000740)='./file0\x00', 0x1000, 0x0)
__mount50(&(0x7f0000000000)='null\x00', &(0x7f0000000040)='./file0\x00', 0x0, &(0x7f00000000c0), 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket(0x2, 0x2, 0x0)
connect$unix(r1, &(0x7f0000000000)=ANY=[@ANYBLOB="8b0221fdffffffff"], 0x10)


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
truncate(&(0x7f0000000240)='./file0\x00', 0x0, 0x800000006)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, 0xffffffffffffffff, 0x0, 0x0)
truncate(&(0x7f0000000200)='./file0\x00', 0x0, 0x7ffffd000)


mknod$loop(&(0x7f0000000000)='./file0\x00', 0x0, 0x1)
compat_30_fhopen(&(0x7f0000000040)={{}, {0x6, 0x0, "1d0f8f4830f05422cd40748c1f9bf36f"}}, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000140)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
pathconf(0x0, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x18, &(0x7f0000000040)='\x00\x00\x00\x00', 0x4)


r0 = open$dir(&(0x7f0000001240)='./file0\x00', 0x4000040000000242, 0x0)
lseek(r0, 0x0, 0x40fff, 0x0)
r1 = dup2(r0, r0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r2, 0x0, 0x0)
writev(r1, &(0x7f00000004c0)=[{&(0x7f0000000180)='\t', 0x1}], 0x1)


socketpair(0x0, 0xbc3beab7f4984980, 0x0, 0x0)


mknod(&(0x7f0000000140)='./file0\x00', 0xe004, 0xffffffffffffffff)
open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
symlink(&(0x7f00000000c0)='./file0\x00', &(0x7f0000000100)='./file0\x00')


openat(0xffffffffffffff9c, &(0x7f00000001c0)='./file0\x00', 0x200, 0x0)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000))
syz_usb_connect$printer(0x0, 0x2d, &(0x7f00000000c0)=ANY=[], 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xa31a)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80044603, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mknodat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0xc0e99db6de761f86, 0x0)


_lwp_unpark_all(0xffffffffffffffff, 0xfe, 0x0)


r0 = open(&(0x7f0000000000)='./file0\x00', 0x200, 0x0)
compat_43_ommap(&(0x7f0000ffe000/0x2000)=nil, 0x2000, 0x0, 0x0, r0, 0x7fffffffffffdfff)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff})
getpeername$unix(r0, 0x0, 0x0)


setreuid(0xee00, 0xee01)
r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x31, 0x0, 0x0)


r0 = socket(0x18, 0x2, 0x0)
getsockopt$inet_opts(r0, 0x29, 0x14, 0x0, 0x0)


mknod(&(0x7f0000000040)='./bus\x00', 0x2003, 0x4301)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
preadv(r0, &(0x7f00000000c0)=[{&(0x7f0000000080)=""/57, 0x39}], 0x1, 0x0)


mprotect(&(0x7f0000ffb000/0x2000)=nil, 0xffffffffdf004fff, 0x0)


socketpair$unix(0x1, 0x0, 0x0, 0x0)


r0 = __clone(0x0, &(0x7f0000000000))
ptrace(0x9, r0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
compat_43_fstat43(r0, &(0x7f00000004c0))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = getpid()
pipe(&(0x7f0000000040)={0xffffffffffffffff, <r2=>0xffffffffffffffff})
fktrace(r2, 0x0, 0x4, r1)
__futimes50(r0, 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
chmod(&(0x7f0000000180)='./file0\x00', 0x23f)
chdir(&(0x7f0000000040)='./file0\x00')
r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f0000000140)='./file1\x00', 0x0)
rename(&(0x7f0000000100)='./file1\x00', &(0x7f00000001c0)='./file0\x00')


r0 = socket$inet(0x2, 0x3, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f0000000240)="892ce63374f2b34aa2f534cfe09099aff2526dc6ad1502b8cc21739e5ca051569b2dab5f32e32a93e55039be", 0x2c)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
getsockopt$inet_opts(r0, 0x0, 0x1, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
symlink(&(0x7f0000000080)='.\x00', &(0x7f0000000000)='./file0\x00')
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
chroot(&(0x7f0000000040)='./file0\x00')
__mount50(0x0, &(0x7f00000001c0)='./file0\x00', 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000100)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__posix_fchown(0x0, 0x0, 0xffffffffffffffff)


r0 = socket$unix(0x1, 0x2, 0x0)
compat_43_orecvfrom(r0, 0x0, 0x0, 0x0, &(0x7f0000000180), 0x620000)
r1 = socket$unix(0x1, 0x2, 0x0)
compat_43_orecvfrom(r1, 0x0, 0x0, 0x0, &(0x7f0000000180), 0x620000)


pipe(&(0x7f0000000140))
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000001600)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x0, 0x9d}}}}}]}}]}}, 0x0)
syz_usb_connect$printer(0x0, 0x0, 0x0, 0x0)


pipe(&(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
__posix_lchown(r0, 0xffffffffffffffff, 0x0)


openat(0xffffffffffffffff, 0x0, 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, 0xffffffffffffffff, 0x0, 0x0)
sendmmsg(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0)
r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
ptrace(0x8, r0, 0x0, 0x0)


compat_50_mknod(&(0x7f00000003c0)='./file0\x00', 0x2000, 0xa71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
readv(r0, &(0x7f0000000340)=[{0x0}], 0x1)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
lchown(&(0x7f0000000380)='./file0\x00', 0xee00, 0xee00)
__utimes50(&(0x7f00000003c0)='./file0\x00', &(0x7f0000000400))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
ioctl$HDAUDIO_FGRP_WIDGET_INFO(0xffffffffffffffff, 0xc0106803, &(0x7f0000000000)={0x0})
mmap(&(0x7f0000000000/0x4000)=nil, 0x4000, 0x0, 0x1011, 0xffffffffffffffff, 0x0, 0x0)


open$dir(0x0, 0x0, 0x0)
renameat(0xffffffffffffffff, 0x0, 0xffffffffffffffff, 0x0)
symlinkat(&(0x7f0000000000)='./file1\x00', 0xffffffffffffff9c, &(0x7f0000000080)='./file1\x00')


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x10004, 0x0)
ioctl$FIOSETOWN(r0, 0x2000746f, 0x0)


socketpair(0x0, 0x0, 0x0, 0x0)
setpgid(0xffffffffffffffff, 0x0)


r0 = socket(0x22, 0x3, 0x6)
writev(r0, &(0x7f00000007c0)=[{&(0x7f0000000400)="11926c07", 0x4}], 0x1)


r0 = compat_30_socket(0x1f, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_LDFONT(r0, 0xc004730a, &(0x7f00000000c0)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$inet6(0x18, 0x1, 0x0)
ioctl$OFIOGETBMAP(r1, 0xc004667a, &(0x7f00000004c0))


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x10004, 0x0)
ioctl$FIOSETOWN(r0, 0x80047480, &(0x7f0000000140)=0xf6)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fchflags(0xffffffffffffffff, 0x0)


pipe(&(0x7f0000000180)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_50_nanosleep(&(0x7f0000000100), 0x0)


compat_43_ommap(&(0x7f0000ffb000/0x3000)=nil, 0x3000, 0x0, 0x35d46a50536078b, 0xffffffffffffffff, 0x0)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
truncate(&(0x7f0000000040)='./file0\x00', 0x0, 0x4)
pwrite(r0, &(0x7f0000000300)='L', 0x1, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x202)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
mmap(&(0x7f0000563000/0x1000)=nil, 0x1000, 0x0, 0x10, r0, 0x0, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2000, 0x450a)
r0 = open(&(0x7f00000002c0)='./bus\x00', 0x2, 0x0)
r1 = open(&(0x7f0000000040)='./file0\x00', 0x70e, 0x0)
pwritev(r1, &(0x7f0000000240)=[{&(0x7f0000000140)="ba", 0x1}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x5, 0x10, r1, 0x0, 0x0)
close(0xffffffffffffffff)
read(r0, &(0x7f0000000480)=""/4096, 0x1000)
ioctl$WSMUXIO_INJECTEVENT(r0, 0x80185760, &(0x7f0000000080))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000140)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
recvfrom$unix(r0, 0x0, 0x0, 0x40010062, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
bind$unix(r0, &(0x7f0000000040)=@file={0x1, './file0\x00'}, 0xa)
open$dir(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)


pipe2(&(0x7f0000000000)={<r0=>0xffffffffffffffff}, 0x4)
read(r0, &(0x7f0000000100)=""/179, 0xb3)


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockname$inet6(r0, &(0x7f0000000040), &(0x7f0000000080)=0xc)


socket(0x2c, 0x3, 0x27cc)


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x29, 0xb, 0x0, 0x0)


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
open$dir(0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0xc0306370, &(0x7f0000000140))


r0 = compat_30_socket(0x12, 0x2, 0x0)
listen(r0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000001e80)={<r0=>0xffffffffffffffff})
connect$unix(r0, &(0x7f0000001ec0)=@abs={0x0, 0x0, 0x0}, 0x8)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
poll(&(0x7f0000000080)=[{}], 0x4e8, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_30_getdents(r0, &(0x7f0000001240)=""/4096, 0xffff)


ioctl$WSMUXIO_INJECTEVENT(0xffffffffffffffff, 0x80185760, &(0x7f0000000000)={0x0, 0x0, {0x0, 0x1}})
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x4)
r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0xc, &(0x7f0000000140)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
setsockopt(r0, 0x80000000000029, 0xc, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
setsockopt(r0, 0x29, 0xd, &(0x7f0000000140)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x1, 0x0)
ioctl$WSMOUSEIO_SETREPEAT(r0, 0x8010572b, &(0x7f0000000000)={0x4})


profil(0x0, 0x0, 0x0, 0x0)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x400004000011830a, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
chflags(&(0x7f0000000680)='./file0\x00', 0x40008)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
write(r0, &(0x7f0000000780)='\b', 0x1)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000005c0)={<r0=>0xffffffffffffffff})
r1 = posix_spawn(0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
fktrace(r0, 0x0, 0x2, r1)


mkdirat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='msdos\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)


r0 = socket(0x18, 0x1, 0x0)
getsockopt(r0, 0x0, 0x9, 0x0, 0x0)


symlinkat(&(0x7f00000000c0)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000100)='./file0\x00')
readlinkat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x11, 0x3, 0x0)
getpeername$inet(r1, &(0x7f0000000000), &(0x7f0000000040)=0xc)


modctl$MODCTL_STAT(0x4, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
r2 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(r0, 0x0, 0x4, r1)
r3 = _lwp_self()
_lwp_wakeup(r3)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f00000017c0)={0x0, 0x0, &(0x7f0000001700)=[{0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}], 0x9}, 0x0)


compat_30_socket(0x2, 0x10000000, 0x50)


r0 = compat_30_socket(0x1f, 0x10000005, 0x2)
getsockopt(r0, 0x2, 0x3, 0x0, 0x0)


setpriority(0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
__setitimer50(0x0, &(0x7f0000000140), 0x0)


____semctl50$SETALL(0x0, 0x0, 0x9, &(0x7f0000000080)=@buf=0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x400000000018, 0x3, 0x0)
fcntl$lock(r1, 0xe, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000002580)={<r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
getpeername(r0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
compat_43_olseek(r1, 0x0, 0x0)


open(&(0x7f0000000000)='./file0\x00', 0x200, 0x0)
ktrace(&(0x7f0000000140)='./file0\x00', 0x0, 0x60e, 0xffffffffffffffff)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x404)
r0 = open$dir(&(0x7f0000000100)='./file0\x00', 0x2, 0x0)
r1 = dup(r0)
pwrite(r1, 0x0, 0x1000000, 0x0)


ioctl$WSMUXIO_INJECTEVENT(0xffffffffffffff9c, 0x80185760, &(0x7f0000000000)={0x0, 0x0, {0x5}})
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
sendmsg(r0, &(0x7f0000000600)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000300)=[{0x10}], 0x10}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
open(0x0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
fpathconf(r0, 0x5)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0xf, r1)
chroot(0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
getsockopt$sock_int(0xffffffffffffffff, 0xffff, 0x0, 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r2 = compat_30_socket(0x1f, 0x5, 0x0)
dup3(r1, r2, 0x1000000)


mknodat(0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0x1000, 0x0)
__lstat50(&(0x7f0000000180)='./file0\x00', &(0x7f00000001c0))


setrlimit(0x3, &(0x7f0000000040)={0x41ff, 0x100000})


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)="b9", 0x1)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
mknodat(r0, &(0x7f00000001c0)='./file0\x00', 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000013c0)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000001800)={0x0, 0x0, &(0x7f00000017c0)=[{&(0x7f0000001580)="a4b0fedd3fb90e0377c1b71d85a0518707c5682c0e0cb4f7936bffed8fb76e5585547db72179ad416993c020e4cc9f53f98890ffcdad5df34f462c5abb2e118f221481c485c1e30414cd1c63f6306a33719e2e6d48621b8ca7c61b5396c5f83f79a004db6734d2499e73c0f9427b5b", 0x6f}, {&(0x7f0000001600)="01e175ccc3f8423bc65ec13287e31d598c1905612b6949e6ce6e68b9b9bad05ef63574e36533d71b14ac59ce05da85f6a74b7e7c85ad483f325a26c4aa2fe04d82c0c115781162d24846fe33267bb70170ef287b3357aa2290ca00a63161b4d6bd0818d8e4332deba750aed5518280e8cd32030325d7d68b093dba1deeab133f90a5da5963203a37817a87d2b3c84b60c82558ffede968b04c6f16bb70286fe318ad578db74fe9299a19feb1c4d90d1b86510910ee674102833ccadb7d21683b7b2782d915bf09c6a72a5709cdfb2a7c9e6d42423b9a38763baed277ae8ba7f598fcc078ed4febae45e5f9965dd02034f82ed2a09d127e0d76558e22df", 0xfd}, {&(0x7f0000001700)="7bb8ed9a7a8a997c9a2ecd187ad5f1e4a168", 0x12}, {&(0x7f0000001740)="db56decad5f616a0c1b330f7a979fcaff4f429", 0x13}], 0x4}, 0x80a)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = getppid()
setpriority(0x1, r1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
flock(r0, 0x5)


r0 = socket$inet(0x2, 0x4, 0x0)
setsockopt$inet_opts(0xffffffffffffffff, 0x0, 0x0, &(0x7f0000000000), 0x0)
setsockopt$inet_opts(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0)
close(r0)
pipe(&(0x7f0000000000)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = socket(0x18, 0x1, 0x0)
setsockopt(r2, 0x29, 0xa, &(0x7f0000000040)='\x00\x00\x00\x00', 0x4)
setsockopt(r2, 0x29, 0x800000000000009, 0x0, 0x0)
close(r2)
socket(0x2, 0x4000, 0x80)
r3 = fcntl$dupfd(r1, 0x3, 0xffffffffffffffff)
fcntl$dupfd(r3, 0x0, r3)
write(r3, &(0x7f0000000100), 0xfffffe5d)
execve(0x0, 0x0, 0x0)


r0 = socket(0x18, 0x1, 0x0)
mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
open$dir(&(0x7f0000000140)='./bus\x00', 0x0, 0x0)
fcntl$dupfd(r0, 0xa, 0xffffffffffffffff)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80047465, &(0x7f0000000040))


r0 = open$dir(&(0x7f0000000040)='.\x00', 0x0, 0x0)
mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
chroot(&(0x7f00000000c0)='./file0\x00')
linkat(r0, &(0x7f0000000080)='./file0\x00', 0xffffffffffffffff, &(0x7f0000000100)='./file0/file1\x00', 0x0)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x400, 0x0, 0x0, 0x40}}}}}]}}]}}, 0x0)
compat_43_oaccept(0xffffffffffffffff, 0x0, 0x0)
bind$inet6(0xffffffffffffffff, 0x0, 0x0)


syz_emit_ethernet(0x5e, &(0x7f0000000140)=ANY=[@ANYBLOB="aaaaaaaaaaaa447902af093386dd7e068d2358937a9e2f0028000000000000000000000000000000000085f34039bca6369a0000000010000000018900907800000000000000000000000000e7"])
r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x9, &(0x7f0000000000)="01000000", 0x4)
setsockopt(r0, 0x29, 0x80000000000000c, &(0x7f0000000180), 0x14)


pipe(&(0x7f0000000140)={<r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r2 = getpid()
fktrace(r0, 0x0, 0x4, r2)
swapctl$SWAP_CTL(0x9, 0x0, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
bind$inet(r0, &(0x7f0000000000)={0x2, 0x1}, 0xc)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x18, 0x1, 0x0)
setsockopt(r1, 0x29, 0x27, &(0x7f0000000040)='\x00\x00\x00\x00', 0x4)


unlinkat(0xffffffffffffffff, &(0x7f00000004c0)='./file1\x00', 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
rasctl(0x0, 0x1, 0x0)
rasctl(&(0x7f0000000200), 0x98, 0x0)
rasctl(0x0, 0x1, 0x1)


mkdir(&(0x7f00000011c0)='./file0\x00', 0x0)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
__stat50(&(0x7f0000000040)='./file0\x00', &(0x7f0000000080))


r0 = socket$unix(0x1, 0x1, 0x0)
getsockopt$sock_int(r0, 0xffff, 0x1002, 0x0, 0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f0000000240)="892ce63374f2b34aa2f534cfe09099aff2526dc6ad1502b8cc21739e5ca051569b2dab5f32e32a93e55039be", 0x2c)
getsockopt$inet_opts(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getsockopt$inet_opts(r0, 0x0, 0x1, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOSETOWN(r0, 0x80104267, &(0x7f0000000040))


mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x5599)
open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000100)={<r0=>0xffffffffffffffff})
getsockopt$sock_int(r0, 0xffff, 0x20, 0x0, 0x0)


profil(0x0, 0x0, 0x6386, 0x5)
compat_30_getfh(&(0x7f0000001000)='./file0\x00', &(0x7f0000001040))
syz_usb_connect$uac1(0x6, 0xb6, &(0x7f0000001080)={{0x12, 0x1, 0x201, 0x0, 0x0, 0x0, 0x0, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0xa4, 0x3, 0x1, 0x2, 0x70, 0x5, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{0xa, 0x24, 0x1, 0x5, 0x2}}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x20, 0x40, 0x0, 0x1, {0x7, 0x25, 0x1, 0x0, 0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@format_type_ii_discrete={0x9, 0x24, 0x2, 0x2, 0xfff7, 0x2, 0x6}, @format_type_ii_discrete={0x11, 0x24, 0x2, 0x2, 0x8, 0x0, 0x9, "9f8fa16fbb48e3db"}, @format_type_ii_discrete={0xb, 0x24, 0x2, 0x2, 0x1000, 0x0, 0xff, 'At'}, @format_type_ii_discrete={0x12, 0x24, 0x2, 0x2, 0x3f, 0x0, 0x3, "f606f6a56fbecec346"}, @format_type_ii_discrete={0xe, 0x24, 0x2, 0x2, 0x5, 0x200, 0x3, "d83b4d6d3e"}]}, {{0x9, 0x5, 0x82, 0x9, 0x10, 0x0, 0x0, 0x80, {0x7, 0x25, 0x1, 0x3, 0x8}}}}}}}]}}, &(0x7f00000012c0)={0x0, 0x0, 0x0, 0x0, 0x3, [{0x4, &(0x7f0000001200)=@lang_id={0x4, 0x3, 0x458}}, {0x4, &(0x7f0000001240)=@lang_id={0x4, 0x3, 0x44f}}, {0x1d, &(0x7f0000001280)=@string={0x1d, 0x3, "dacc7b967dcfe2e588c412c43a8cbefda5c5d9e9e0b1e16ad71a9a"}}]})
r0 = fork()
setpgid(r0, 0xffffffffffffffff)
open$dir(&(0x7f0000001640)='./file0\x00', 0x400000, 0x40)


modctl$MODCTL_LOAD(0x0, &(0x7f0000000140)={&(0x7f0000000000)="1ecbe316c0f67024d40dd8b69050109c06662c0a6e3813bd7543dca753c0315a08cf4bfa8f16d8d9873a4c91215253c64c9d03b8b8e381866b8f08d280a5e7199ac87365838ceab0187e5a169754042d0676881e3cc80495449e9734c3b896407c58a8cb4d7fb78e69f677c46c14addef4c6f0eba02e75699bb83de2bc35c9ca6564c121c90225eacf89e0c3f06e80b998997eb1b5afe666e68786dab8e0e5717ac0d9b965d3690798b7b338c5f88006f95f01964eb0601dcdddf28b6d43c28337363650c961", 0x2, &(0x7f0000000100)="6057576f173465f0112f14c23dba0eb48f52c4af3e9915a65c220bf9c6f441f3426e6d0cef9cc93ce2", 0xd})


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104302, &(0x7f0000000100))


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2040, 0xb41d)
open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_90_getvfsstat(&(0x7f0000001680), 0x8d0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0x100)
symlink(&(0x7f0000000040)='./file0\x00', &(0x7f0000000080)='./file1\x00')
lchown(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
open(&(0x7f00000000c0)='./file1\x00', 0x0, 0x0)


r0 = shmget$private(0x0, 0x1000, 0x648, &(0x7f0000ffe000/0x1000)=nil)
shmctl$SHM_LOCK(r0, 0x3)
__getcwd(&(0x7f0000000000)=""/12, 0xc)
r1 = shmget$private(0x0, 0x2000, 0x0, &(0x7f0000ffe000/0x2000)=nil)
compat_50___shmctl13$SHM_LOCK(r1, 0x3)
shmctl$SHM_LOCK(0xffffffffffffffff, 0x3)
compat_50___shmctl13$IPC_STAT(0xffffffffffffffff, 0x2, &(0x7f0000000080)={{}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000040)="1c2bc063f210e28b5abb2ee8e2ed05b4a4af861c795fdc9edb7370dab1abb2e1f545fec2dc889d46b635"})
__stat50(&(0x7f0000000100)='./file0\x00', &(0x7f0000000140))


mknodat(0xffffffffffffffff, &(0x7f0000000300)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_20_fstatfs(r0, &(0x7f0000000140))


utimensat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', &(0x7f0000000040)={{}, {0x8b36}}, 0x0)
r0 = __fhopen40(0x0, 0x0, 0x80000)
accept$unix(r0, 0x0, &(0x7f00000011c0))
__getitimer50(0x2, &(0x7f0000001200))
pipe2(&(0x7f00000013c0)={0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
fktrace(r1, 0x1, 0x800, 0x0)
swapctl$SWAP_ON(0x1, &(0x7f0000001540)="a0a1ea2691674b158434f756eea1b3b9ef6089501a737842f57995a923853733880b360e0fe3e0febe821838004608fc36f7d01cdccf748f9c4692e5b4dd564d1f1a2fede8964c4e0b4ce0a9beda5583755a0fe5ac027ee4a02ffd4309aa4151a5ec693fba3dccc26903d38cc5945c5a8d1eab53c0fa4cb6ecd704a9ba", 0x81)
posix_spawn(0x0, &(0x7f00000015c0)=':\x00', &(0x7f0000001640)={0x0, 0x0, &(0x7f0000001600)=@dup={0x1, 0xffffffffffffffff, {0x9}}}, &(0x7f0000001680)={0x1, 0xffffffffffffffff, {0xffff}, 0x1, {[0x3ff, 0x20, 0xfefe, 0x2]}, {[0x5, 0x83, 0x6, 0x3]}}, &(0x7f00000016c0)=['##[\xae\x00', '@&(\x00', ',%\x00', '*}[\x00', '[\'\x00'], &(0x7f0000001700)=['\']\x00', '\'\'\x00', '\x00'])


__setitimer50(0x0, &(0x7f0000000080)={{}, {0x0, 0x6}}, 0x0)
accept$unix(0xffffffffffffffff, 0x0, &(0x7f00000011c0))
__getitimer50(0x2, &(0x7f0000001200))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000200)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
recvmmsg(r0, &(0x7f0000000140)={0x0}, 0x10, 0x2108, 0x0)


compat_50_mknod(&(0x7f0000000140)='./file0\x00', 0x8000, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
readv(r1, &(0x7f0000001580)=[{0x0}], 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, 0xffffffffffffffff, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mknod(&(0x7f0000000000)='./file0\x00', 0x2080002007, 0x0)
r1 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
pwrite(r1, &(0x7f0000000080)="91", 0xfffffff, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x80104301, &(0x7f0000000100))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSMUXIO_OINJECTEVENT(r0, 0x80185760, &(0x7f0000000180))


mknod(&(0x7f00000001c0)='./bus\x00', 0x80002005, 0x5300)
r0 = open(&(0x7f0000000080)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x8004667d, &(0x7f0000000040))


open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
acct(&(0x7f0000000080)='./file0\x00')


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0xa, r0, &(0x7f0000000080), 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__select50(0xfffffffffffffc4c, &(0x7f0000000040), 0x0, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x2000427e, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
pipe2(&(0x7f0000000040)={0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x1000004)
write(r1, &(0x7f0000000000)=',', 0x78c5b5dd454f79f1)


socketpair$unix(0x1, 0x5, 0x0, 0x0)


open$dir(&(0x7f0000000040)='./file0\x00', 0x40000400000002c1, 0x0)
ktrace(&(0x7f0000000200)='./file0\x00', 0x4, 0x1334, 0x0)
setreuid(0xffffffffffffffff, 0x0)


compat_43_ommap(&(0x7f0000ffe000/0x2000)=nil, 0x2000, 0x0, 0x0, 0xffffffffffffff9c, 0x0)
__lstat50(&(0x7f0000001680)='./file0\x00', 0x0)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000001900))
posix_spawn(0xffffffffffffffff, 0x0, 0x0, &(0x7f0000001b80), 0x0, 0x0)


munmap(&(0x7f0000ffc000/0x4000)=nil, 0x4000)
r0 = shmget$private(0x0, 0x1000, 0x0, &(0x7f0000ffc000/0x1000)=nil)
r1 = shmget$private(0x0, 0x2000, 0x916cec9924d988a8, &(0x7f0000ffc000/0x2000)=nil)
shmat(r1, &(0x7f0000fff000/0x1000)=nil, 0x0)
shmat(r0, &(0x7f0000ffd000/0x3000)=nil, 0x0)
shmdt(0x0)


r0 = socket$inet6(0x18, 0x2, 0x0)
setsockopt$inet6_MRT6_DEL_MFC(r0, 0x29, 0x3b, 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r2 = getpid()
fktrace(r0, 0x0, 0x4, r2)
r3 = socket(0x2, 0x2, 0x0)
getsockopt(r3, 0x0, 0x19, 0x0, 0x0)


compat_30_socket(0x2, 0x0, 0x0)


r0 = compat_30_socket(0x1f, 0x5, 0x0)
shutdown(r0, 0x1)


semget(0x3, 0x4, 0x208)
r0 = semget(0x3, 0x0, 0x0)
semctl$IPC_RMID(r0, 0x0, 0x0)
semctl$SETVAL(0x0, 0x0, 0x8, &(0x7f0000000000))


__mount50(&(0x7f0000000000)='tmpfs\x00', 0x0, 0x0, 0x0, 0x0)
modctl$MODCTL_LOAD(0x0, &(0x7f0000000100)={&(0x7f0000000000), 0x0, 0x0})


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x30, &(0x7f0000000140)="9518f3948292ae269282d455abab56620385feb9e9b3fda3181149ee114dd200a92ef2b465bbc11fcfdb71b72ce278fa941a79b7d45722a806d166b1bc4513bb05a76025938759964a53836bfd351fe9d2104012dc56fa2aa2786a7b4b39b7a51bf1baa51d3fb561c0ce637ef3c53f88edcc758d1e1eff1031571ebb9a54c1ea8426de968ad829470aa55d5b3eb81a62a35e0b41bc", 0x95)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
__clock_settime50(0x0, &(0x7f00000002c0))


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
poll(&(0x7f0000000000)=[{}], 0x1, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x41c5)
r0 = open(&(0x7f0000000240)='./file0\x00', 0x5, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80045713, &(0x7f0000000100))


mlock(&(0x7f0000d22000/0x2000)=nil, 0x2000)
mincore(&(0x7f0000d21000/0x4000)=nil, 0x4000, &(0x7f0000000000)=""/17)
munmap(&(0x7f0000d24000/0x3000)=nil, 0x3000)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
_lwp_wait(0x0, 0x0)
_lwp_exit()


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r1, &(0x7f0000000100)={0x0, 0x0, 0x0}, 0x0)
sendmsg$unix(r1, &(0x7f0000000600)={0x0, 0x0, 0x0}, 0x0)
sendmsg$unix(r1, &(0x7f0000000480)={0x0, 0xa, &(0x7f0000000440)=[{&(0x7f00000002c0)="a6", 0x1}], 0x1}, 0x0)
read(r0, &(0x7f0000000140)=""/224, 0xe0)


r0 = socket$inet(0x2, 0x2, 0x0)
r1 = dup(r0)
sendmsg$unix(r1, &(0x7f0000002600)={&(0x7f0000000000)=@abs={0x0, 0x0, 0x0}, 0x8, 0x0, 0x0, &(0x7f0000002540)=[@rights={0x10}], 0x10}, 0x405)


syz_usb_connect(0x0, 0x24, &(0x7f0000000d40)={{0x12, 0x1, 0x0, 0xa2, 0x90, 0xb0, 0x8, 0x409, 0x249, 0x8d52, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x55, 0xf9, 0xdd}}]}}]}}, &(0x7f0000000080)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})


ptrace(0x400, 0x0, 0x0, 0x0)


r0 = compat_30_socket(0x1f, 0x20000001, 0x0)
getsockopt(r0, 0x3, 0x3, 0x0, 0x0)


r0 = socket$unix(0x1, 0x1, 0x0)
bind$unix(r0, &(0x7f0000003000)=@file={0x1, '\xe9\x1fq\x89Y\x1e\x923aK\x00'}, 0xc)
r1 = socket$unix(0x1, 0x2, 0x0)
connect$unix(r1, &(0x7f0000000280)=@file={0x1, '\xe9\x1fq\x89Y\x1e\x923aK\x00'}, 0xd)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r0, &(0x7f0000000340)=[{0x0}], 0x1)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x8004746c, &(0x7f0000000040))


r0 = socket$unix(0x1, 0x5, 0x0)
getsockname(r0, 0xffffffffffffffff, &(0x7f0000000000))


mlock(&(0x7f0000ffb000/0x4000)=nil, 0x4000)
munmap(&(0x7f0000ffb000/0x3000)=nil, 0x3000)
mmap(&(0x7f0000ffc000/0x3000)=nil, 0x3000, 0x0, 0x1011, 0xffffffffffffffff, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
fcntl$setstatus(r0, 0x5, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
read(r0, 0x0, 0x0)


socket$inet6(0x5, 0x0, 0xff)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x2000a71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
__fstat50(r0, &(0x7f0000000600))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000cc0)={<r0=>0xffffffffffffffff})
sendmsg(r0, &(0x7f00000005c0)={0x0, 0x0, 0x0}, 0x40)


r0 = open$dir(&(0x7f0000000480)='./file2\x00', 0x200, 0x0)
ioctl$FIONSPACE(r0, 0x40046678, &(0x7f0000000040))


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x0)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000000000)=[{&(0x7f0000001240)=""/4096, 0x1000}], 0x1, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0xc0044266, &(0x7f0000000080))


compat_30_fhopen(0x0, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000380)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


r0 = compat_30_socket(0x1f, 0x5, 0x0)
compat_43_ogetsockname(r0, &(0x7f0000000000)=""/29, &(0x7f0000000040)=0x1d)


pipe2(0xfffffffffffffffe, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000140)='.\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f0000000000)='./file0\x00', 0x0)
symlinkat(&(0x7f0000000100)='./file0\x00', r0, &(0x7f0000000180)='./file0\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
ioctl$FIOGETBMAP(r0, 0xc008667a, &(0x7f0000000000))


r0 = msgget$private(0x0, 0x0)
msgctl$IPC_SET(r0, 0xb, &(0x7f00000003c0))


syz_usb_connect$cdc_ecm(0x2, 0x56, &(0x7f0000000340)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x10, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x44, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}, {[{{0x9, 0x5, 0x81, 0x3, 0x200}}], {{0x9, 0x5, 0x82, 0x2, 0x10}}, {{0x9, 0x5, 0x3, 0x2, 0x200}}}}}]}}]}}, 0x0)


r0 = __clone(0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)
compat_50_wait4(0xffffffffffffffff, 0x0, 0x0, 0x0)
ptrace(0x4, r0, &(0x7f0000000000), 0x3)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x2, 0x2, 0x0)
getsockopt(r1, 0x0, 0x3, &(0x7f0000000040)=""/14, &(0x7f0000000080)=0xe)


bind$unix(0xffffffffffffffff, 0x0, 0x0)
r0 = getpid()
fcntl$setown(0xffffffffffffff9c, 0x6, r0)
getuid()
pipe2(&(0x7f0000000140)={0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
getpeername$inet(r1, 0x0, 0x0)
r2 = semget$private(0x0, 0x1, 0x7c0)
semctl$GETVAL(r2, 0x3, 0x5, &(0x7f0000000440)=""/254)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x6000, 0x1203)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
readlinkat(0xffffffffffffffff, &(0x7f00000000c0)='./file0\x00', 0x0, 0x0)


__fhopen40(0x0, 0x0, 0x0)
__getitimer50(0x0, 0x0)
pipe(0x0)
posix_spawn(0x0, &(0x7f00000015c0)=':\x00', 0x0, 0x0, 0x0, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x2903)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x0, 0x12, r0, 0x0, 0x0)


accept$unix(0xffffffffffffffff, &(0x7f0000000540)=@abs, 0x0)


symlinkat(&(0x7f0000000000)='.\x00', 0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00')
r0 = open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
symlinkat(&(0x7f00000003c0)='./file0/../file0\x00', r0, &(0x7f00000001c0)='./file0/../file0\x00')
fchmod(r0, 0x4d)
rename(&(0x7f0000000300)='./file0/../file0\x00', &(0x7f0000000380)='./file0\x00')


r0 = open$dir(&(0x7f0000001240)='./file0\x00', 0x4000040000000242, 0x0)
lseek(r0, 0x0, 0x40fff, 0x0)
r1 = dup2(r0, r0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r2, 0x0, 0x0)
writev(r1, &(0x7f00000004c0)=[{&(0x7f0000000180)='\t', 0x1}], 0x1)
pread(r1, &(0x7f00000000c0)="bd", 0xffffff78, 0xa83)


compat_43_ogethostname(&(0x7f0000000000)=""/45, 0x2d)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r1, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
openat(0xffffffffffffffff, 0x0, 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r2 = open$dir(&(0x7f0000001240)='./file0\x00', 0x40000400000002c2, 0x0)
r3 = dup2(r2, r2)
writev(r3, &(0x7f0000000100), 0x1000000000000161)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0xf, r1)
__getcwd(&(0x7f0000000040)=""/21, 0xfe8a)


mknod(&(0x7f0000000000)='./bus\x00', 0x100000000204f, 0x5900)
r0 = open(&(0x7f0000000640)='./bus\x00', 0x0, 0x0)
poll(&(0x7f0000000200)=[{r0, 0x20}, {r0, 0x1}, {0xffffffffffffffff, 0xaf}, {0xffffffffffffffff, 0x20}, {0xffffffffffffffff, 0x4}, {r0, 0x20}, {r0, 0x4}], 0x7, 0x4)
close(r0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000080)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
__getrusage50(0xffffffffffffffff, 0x0)


pipe2(&(0x7f0000000040), 0x0)
getgroups(0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x8008574f, &(0x7f0000000080))


r0 = socket$inet6(0x18, 0x1, 0x0)
shutdown(r0, 0x2)
listen(r0, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)
compat_50_wait4(0xffffffffffffffff, 0x0, 0x0, 0x0)
ptrace(0x20, r0, 0x0, 0x0)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
poll(0x0, 0x0, 0xffffff0f)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x8010426d, &(0x7f0000000040))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000000500)={0x0, 0x0, 0x0, 0x0, &(0x7f00000004c0)=[@cred], 0x20}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__msync13(&(0x7f00005af000/0x3000)=nil, 0x0, 0x5)


r0 = socket$inet(0x2, 0x2, 0x0)
compat_43_ogetsockname(r0, &(0x7f00000000c0)=""/193, &(0x7f00000001c0)=0xc1)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
fstatat(0xffffffffffffffff, &(0x7f0000000040)='./file0\x00', 0x0, 0x0)


compat_43_osendmsg(0xffffffffffffffff, 0xffffffffffffffff, 0x0)


openat(0xffffffffffffffff, &(0x7f0000000580)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0x0, 0x0)
socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
recvmsg(r0, &(0x7f0000000040)={0x0, 0x0, &(0x7f0000000500)=[{&(0x7f0000000080)=""/110}, {&(0x7f0000000100)=""/218}, {&(0x7f0000000200)=""/42}, {&(0x7f0000000240)=""/30}, {&(0x7f0000000280)=""/197}, {&(0x7f00000003c0)=""/29}, {&(0x7f0000000400)=""/247}], 0x1000000000000081, 0x0}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000180)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket(0x2, 0x2, 0x0)
__fstat50(r1, &(0x7f0000000040))


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_50_getrusage(0x0, &(0x7f0000000000))


socket$inet6(0x18, 0x30000003, 0x0)


compat_50_____semctl13$GETALL(0x0, 0x0, 0x6, &(0x7f0000000300))


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0x801c5268, &(0x7f0000000040))


r0 = socket(0x18, 0x2, 0x0)
setsockopt(r0, 0x29, 0x33, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
compat_30___fstat13(r0, &(0x7f0000000040))


__fhstat50(&(0x7f0000001a00)="3248eaa28a5a36f3c08e1f0184fb547a6797285236398fc786567ad4", 0x1c, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
swapctl$SWAP_CTL(0x9, 0x0, 0x0)


r0 = socket(0x18, 0x2, 0x0)
setsockopt(r0, 0x1000000000029, 0x9, &(0x7f0000000180)="01000000", 0x4)
sendmsg$unix(r0, &(0x7f0000001700)={&(0x7f0000000000)=ANY=[@ANYBLOB="fb182e2b666902e3ff01"], 0x1c, 0x0}, 0x0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x201, 0x0, 0x0, 0x0, 0x10, 0x1b96, 0x8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, &(0x7f0000000500)={0x0, 0x0, 0x14b, &(0x7f0000000580)=ANY=[@ANYBLOB="050f"]})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setreuid(0xffffffffffffffff, 0xee00)


r0 = socket(0x18, 0x3, 0x3a)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
sendmsg(r0, &(0x7f0000000300)={0x0, 0x0, 0x0, 0x0, &(0x7f00000002c0)=ANY=[@ANYBLOB="1068866a587eb100000020000000001000009d502f6ba00b89b45ccc8c2280ed8c2db15c9c8902d8a83598b4576836132a5a8e47"], 0x10}, 0x0)
sendto$unix(r0, &(0x7f0000000400)="b100050460000000000009000002000000000000cea1fea7fef96ecfc73fd3354f376336acf00b7848fbfc591e4991f7c8df94da000086e1aa5b0100000001001f132e27acb5d602000d7d026ba8af63ffff0729180000000000004ab56cd8a48615807ad4844f2c4adff58c965b2461125991a3380cb893d30dc1d97c26077cd40647698f3a27129c74852ff59aa11e3a6fbf13e828b503110ec3467933efbbdc6a013c7deb0f7a7c58fea840f44278fb9a0545b0a0513339351047b28dcd25aad90b22215810d5cfaf4e2255acd4aea90615fc49b96ec21b0b6571858676401614ca452d3d32095c49aa37b171c0a147ec9d90bb7fcbd6c1afbd81a52740d0a35bd1818b91b0906152822fd8a4258d0554649ad7c8371684", 0x119, 0x1, 0x0, 0x0)
sendto$unix(0xffffffffffffffff, &(0x7f00000000c0)="b1000502", 0x5f, 0x0, 0x0, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
munmap(&(0x7f00000f2000/0x2000)=nil, 0x2000)
mmap(&(0x7f00001e1000/0x4000)=nil, 0x4000, 0x2, 0x3011, 0xffffffffffffffff, 0x6, 0x0)
r1 = dup(0xffffffffffffffff)
linkat(r1, &(0x7f0000000000)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x4)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000340)={0xffffffffffffffff, <r2=>0xffffffffffffffff})
sendto$unix(r2, &(0x7f0000000380)="ff516afda469af8a19022d4858c8f82e87308829e1454c14782a7a3328a5121df5d4760060cb5b21b6cc13c1651855363590d57bc8bb370c37509e4453e90f99e307d585e26129dc048f215d112384f248043a7f1bbee8b374dc3e1d28565e444f59aa896374f7a3", 0x68, 0x1, &(0x7f0000000540), 0xa)
r3 = socket(0x11, 0x3, 0x0)
r4 = accept$inet(r1, &(0x7f0000000080), &(0x7f0000000180)=0xc)
setsockopt(r4, 0x2, 0xfffffff9, &(0x7f00000001c0)="9ea87bfaf50eb06b228076ec1ee5bdd312fd2aa0175d5f758828331c603c47b1a4e557b57cf29d1b6a3ca76aa50e3cc9493fc16f31dba85655fa82c2de2f8b4e094055b4b09296a1cfa8afd77f5c4636fca0ba4fd4c0c310205dae5a85bb03df8b23fdae118c23bd921382e817579c220808fe0a65909c018f87ed887d7dd221e574762184a7a04148779d9cb7b59d2a53308a4e0462918697ed67896040923ea05d71ecd0acd17bcb", 0xa9)
sendto$unix(r3, &(0x7f00000000c0)="b100050300000000000000000101000000000000cea1fea7fef96ecfc73fd3357ae26caa0416fa4f373f00acf00b7804be781e4991f7c8df5f882b297be1ab5b23ed00f4c807000000000000001f132e27acbdd602000d7d026ba8af63ff37282902e4fd89720fd3872babfbb770c1ffff00000f90006ee01bc43eaeacc50000fa02000000000000020208a371a3f80004000000040000000100"/164, 0xb1, 0x0, 0x0, 0xfffffffffffffd41)
r5 = socket(0x18, 0x3, 0x3a)
connect$unix(r5, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
sendmsg(r5, &(0x7f0000000300)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000780)=[{0x10}], 0x10}, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)="b9", 0x1)
unmount(&(0x7f0000000700)='./file0\x00', 0x0)


pipe(0x0)
pipe2(&(0x7f00000013c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
fktrace(r0, 0x0, 0x0, 0x0)


r0 = __clone(0x0, &(0x7f0000000000))
compat_50_wait4(0x0, 0x0, 0x4, 0x0)
__clone(0x0, 0x0)
__wait450(r0, 0x0, 0x0, 0x0)


compat_50_select(0x40, &(0x7f0000000000)={0x100}, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
socketpair(0x1f, 0x5, 0x0, 0x0)


_lwp_ctl(0x0, &(0x7f00000000c0)=0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x40000, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)


sendmsg$unix(0xffffffffffffffff, 0x0, 0x0)


syz_usb_connect$hid(0x2, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x4d8, 0xf372, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)


__select50(0x0, 0x0, 0x0, &(0x7f0000001d40), &(0x7f0000001d80)={0x0, 0x9})


r0 = socket$inet(0x2, 0x2, 0x0)
getsockopt$inet_opts(r0, 0x0, 0x14, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbd63)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
compat_43_fstat43(r0, &(0x7f0000000240))


r0 = socket$inet(0x10, 0x2, 0x0)
shutdown(r0, 0x2)


setuid(0xee00)
setpriority(0x1, 0x0, 0x0)


munmap(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
mlock(&(0x7f0000ffc000/0x3000)=nil, 0x3000)


syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f0000000100)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x20, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x3, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}, {[], {{0x9, 0x5, 0x82, 0x2, 0x3ff}}}}}]}}]}}, &(0x7f0000000480)={0x0, 0x0, 0x0, 0x0, 0x2, [{0x4, &(0x7f00000003c0)=@lang_id={0x4}}, {0x0, 0x0}]})


connect$inet6(0xffffffffffffffff, &(0x7f0000000140)={0x18, 0x3}, 0xc)
r0 = socket$inet6(0x18, 0x3, 0x0)
sendto$inet6(r0, &(0x7f0000000080)="85", 0x2000, 0x0, &(0x7f0000000000)={0x18, 0x0, 0x0, 0x20080fe}, 0x1c)


__clone(0x0, 0x0)
r0 = getppid()
ptrace(0x9, r0, 0x0, 0x0)


recvmmsg(0xffffffffffffffff, 0x0, 0x0, 0x0, &(0x7f0000000e40))


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000140)='ptyfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, &(0x7f00000001c0))
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
unmount(&(0x7f00000000c0)='./file0\x00', 0x0)


r0 = socket(0x18, 0x3, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
sendto$inet(r0, 0x0, 0x0, 0x0, &(0x7f0000000040)={0x2, 0x1}, 0xc)


symlink(&(0x7f0000000100)='./file0\x00', &(0x7f0000000000)='./file0\x00')
lchflags(&(0x7f0000000040)='./file0\x00', 0x20000)
unlink(&(0x7f0000000480)='./file0\x00')


ioctl$WSDISPLAYIO_DELSCREEN(0xffffffffffffffff, 0x80067409, &(0x7f0000000140)={0x0, 0xfcffffff})
mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0xc0104302, &(0x7f0000000140))


pipe(&(0x7f0000000080)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
ioctl$FIOSETOWN(r0, 0x8004667c, &(0x7f00000000c0))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$unix(0x1, 0x1, 0x0)
getsockopt$sock_int(r1, 0xffff, 0x1004, &(0x7f0000000000), &(0x7f0000000080)=0x4)


open$dir(&(0x7f0000001240)='./file0\x00', 0x40000400000002c2, 0x0)
r0 = open$dir(&(0x7f0000001240)='./file0\x00', 0x0, 0x0)
compat_43_olseek(r0, 0x0, 0x1)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x4008426f, 0x0)


pipe(&(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
fcntl$lock(r0, 0x59131c27d2b94e24, &(0x7f0000000040))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x40046369, 0x0)


socketpair(0x29, 0x0, 0x0, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x2903)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
ioctl$FIOSEEKHOLE(r0, 0xc0104603, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$inet(0x2, 0x1, 0x0)
setsockopt$sock_timeval(r1, 0xffff, 0x1005, 0x0, 0x0)


r0 = socket$unix(0x1, 0x5, 0x0)
ioctl$FIOSEEKDATA(r0, 0x80047476, &(0x7f0000000000)=0xfffffffffffffffb)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0xc0047206, &(0x7f0000000100))


swapctl$SWAP_OFF(0x2, 0x0)
open$dir(&(0x7f0000000380)='./file0\x00', 0x200, 0x0)


swapctl$SWAP_GETDUMPDEV(0x6, 0xffffffffffffffff, 0xffff)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580), 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x40044266, 0x0)


r0 = socket(0x1f, 0x5, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x1003, &(0x7f0000001580)=0x8001, 0x4)


pipe2(&(0x7f0000000000)={<r0=>0xffffffffffffffff}, 0x0)
pread(r0, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x40046369, &(0x7f0000000140))


socket$unix(0x1, 0x5, 0x0)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000001900))
compat_30_socket(0x10, 0x0, 0x1)


r0 = socket(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x1a, &(0x7f0000000080), 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x400000000018, 0x3, 0x0)
r2 = socket(0x18, 0x3, 0x0)
pipe(&(0x7f0000000400)={<r3=>0xffffffffffffffff})
r4 = getpid()
fktrace(r3, 0x0, 0x4, r4)
r5 = dup2(r1, r2)
bind$unix(r5, &(0x7f0000000000)=@file={0x0, './file0\x00'}, 0xa)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80047469, &(0x7f0000000000))


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x3b03)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0xc0385102, &(0x7f0000000400))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
socketpair(0x10, 0x2, 0x0, 0x0)


setreuid(0xee00, 0x0)
seteuid(0x0)


r0 = msgget$private(0x0, 0x0)
msgsnd(r0, &(0x7f00000006c0)=ANY=[@ANYBLOB="00e1"], 0x70, 0x0)
msgrcv(r0, 0x0, 0x0, 0x0, 0x3000)
msgctl$IPC_STAT(r0, 0x2, &(0x7f0000000680)=""/9)


r0 = semget(0x0, 0x0, 0x0)
____semctl50$IPC_STAT(r0, 0x0, 0x2, &(0x7f00000009c0))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000000400)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0x84086307, &(0x7f0000000140)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @pad="1b944b4fd15a0981fa3a9791cde8505dcbc0316f82b129691e9a0e26c7821ae8"})


r0 = compat_30_socket(0x1f, 0x5, 0x0)
getsockopt$inet_opts(r0, 0x4, 0x2, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
getsockopt$sock_int(r0, 0xffff, 0x1003, &(0x7f0000000140), &(0x7f0000000180)=0x4)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0x8004667c, &(0x7f00000000c0)={0xfffffffffffffffc, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @pci})


mknod(&(0x7f0000000080)='./file0\x00', 0x205e, 0x200)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
mmap(&(0x7f00000bc000/0x3000)=nil, 0x800008, 0x90, 0x10, r0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x6000, 0x0)
pathconf(&(0x7f0000000240)='./file0\x00', 0x2)


syz_usb_connect(0x0, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x1c, 0x88, 0xe6, 0x8, 0xbda, 0x817c, 0xfd17, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x25, 0x12, 0x6, 0x0, [], [{{0x9, 0x5, 0x0, 0x1, 0x3ff}}]}}]}}]}}, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
fcntl$getown(r0, 0x6)
compat_43_ocreat(&(0x7f00000001c0)='./file0\x00', 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
__fstat50(r0, &(0x7f00000000c0)={<r1=>0x0})
mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, r1)
r2 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r2, 0x40d0647a, &(0x7f0000000000))


r0 = socket$inet(0x2, 0x3, 0x0)
getsockopt(r0, 0x0, 0x3, 0x0, 0x0)


mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0xc, &(0x7f0000000140)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
r1 = socket(0x18, 0x1, 0x0)
setsockopt(r1, 0x80000000000029, 0xc, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)


open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)


mmap(&(0x7f0000ffa000/0x3000)=nil, 0x3000, 0x0, 0x10613f3014bdf67c, 0xffffffffffffffff, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
recvmmsg(r0, &(0x7f00000014c0), 0x400039b, 0x102, 0x0)
utimensat(0xffffffffffffffff, 0x0, &(0x7f000000cc40)={{0x0, 0xea60}, {0x0, 0xea60}}, 0x0)
sendmmsg(r1, &(0x7f0000004880), 0x1, 0x0, 0x0)


r0 = open$dir(&(0x7f0000000000)='.\x00', 0x0, 0x0)
mknodat(r0, &(0x7f0000000040)='./file0\x00', 0x1000, 0x0)
utimensat(r0, &(0x7f0000000100)='./file0\x00', 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbd63)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
fcntl$setstatus(r0, 0x4, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
_ksem_destroy(0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000380)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg(r0, &(0x7f0000004300)={0x0, 0x0, &(0x7f0000003080)=[{&(0x7f0000000540)="79aa6d2980b7c2034929e8b867c492", 0x2000054f}, {&(0x7f0000000d80)="14bf38a413e7da9ce4ea708ec745c72d92c7d65b5fc0bd27e788978c113cec269ada9172c0b82968782fd08b9cec84aca1916ac860842809d3d71ff0bc6a61288df439e63808d9e542082398bfdde26b646f58ad9663dd6e38e90d673bb9440d28804c1928eea2b43be583c4c0808b5ee9b2e28514b75d13634c4c34e7f926e77fd4eed24a7efcbac861eae6c2a91bf339da1f25c7f8adee7e1d078846900f38109c800bb590a5e19ed1b4b70b7d5f57989a61e37bd58ba2be684ffccc7bbfae3ac4290e5fc5b663cc336dd650f4ed5510ca4e72251396d6392b2b7490bcc00a1cb9b8c2ff3466ffe15dd8f5aec7d34f0dd7689a88933cc7d759e5af15ce889e18bda8bda177a50c50b87b4342d1c8ca8fe4a9d4c7f4181381972ddb64f089e1d833cda35a65aa7bec7a48b2c463b8591ccd2f759929cfc1b8dd95c5f703a3639818e68c67fa2319604f1f485f7c46250a5586e9f6efedf85b2834085c9ab4c2a8e570164e442432380aec5f6629ea6f1d024ce09b79b021311600e55ca08d21ae4d55a1546d198fdb2f69f00b1fe15748045faf0728ee90cdde1958987ae95465764230b06510725cdca3dd07eeaf4bf99945686bbf4c4f0b74d895897f31e3704efda837040627cad08e7c9ea7aa2e8e0148f8d80a04b41d8508c2d9c2821a282e61a417ab5ad15c1350b23093682f90d834e3a26587c61a92b4faebd1b10d5cf6c71c56e5be4b02b115f3faf3606a548f0f1d26931cb559bd7b5194bd93462f5637333bfa11f7780fb52ca25ed5c3651263506625d9bcb34ea934f97185ccfc3998eb42b3f91afbd7bf11d571ae108d18cfa990b1240cd2c714b2edc6a65be542fa99d1d18bd1e3cbb6fb4e23c847897e40d0d60b6c0bbacf97fd9b52508549ae740c98c4e1295002a19dd1e14d2fb21815b6267c76ed67fa1f255a6c486f4c82eb97b7e00e3198f176d9357be54dc4be23d9938dd3495b6944de974bfcd8cb46b36d570e7387358e39981615de1b9653d3125f9517697416047575c58f2a037cd49277d4c4cd15086cca6bf4b6bbf54854299dc599d9ee639c97423fcc78cc10e4e97347462598ce4abf784a284aa7beda43f1e2877cb351ade26fb9a19a2f7e9d6ee2dcf6e8e8c693cc458213a3107dc95d47d2d14b22a48345e96369f17c1dd46384cc25bb1a8c4357fbccc092a465d4217f7e7bd073e187d4df5d10c7cd35d6712135bd75c44d0a5783b691363f8e17c18de9e40c6910a71aecd805a17a57229b368187598602815b6054ab6c5278240e3568869a07e5c799a17445e64899a47db04996d73d87437fa2a19252c4d26be186a4d86a59ccf9d374ea3d8d3234d5721eb7bf0c5347c9aa0881ba19da18264ee7072fa49e36749638a4368c68ecf5287f7925f298dd071095571dd82f622cdcbb666492ac2f60044455491b313879048ead37f5b69240a385f28ac612127950a8c097529a7063d43fa92d9998a5671147f70a524d41ce4c8a1ae82f9c78767578e791146df0896d8ec79c3cd27122e008a2a42d5e7256b4d95192f9cab00176c80c923f4ece872e28adda1d886b812a073dc97e8189d581c73fc86456dd53fd043dea1732409f9f1516fcc08c77c253bc9fe15d1c1e3d4979c19b6ab3dea788c10f3d3aa730377bcce520a8a6ea6466c22f838171c6f69d0b5085d2ff1843342ed555689e5139304648fc7794bf6c3db21276bc3610c848f63d32c70c7304ac7e8bc37adbea4564fd580603c84f24bf13c8317fa640d3467786d3e9a47ea21ba5f6678cedb803617dddc66a5a1af9e4bcca97fd83220996802aa20dda55120c9262f7935ffa70d7ee81ca560dc580d05e1c47cce1d3087ae91ec4c4ed6cb7c7a65e617e6952b744111944ebac700bee4a3db5a71cd9bbf46244839285269bbfa6a17c9391fb0999f3a5c95f0aae9a74be0f312a0e988eff7951f882db62af730c8fba7dcf2f6b443a68daf8e2f44b36dafb25d2dec60ce42128ba0c3e56d409258eea03019f6426ace2b3188788422c54e6cb58c6eb8833478c77c50e7b0ce4e53efff663f89ad6fb1b175307d589656aec4a1ab6a9a9bce7d77f3b05ac80fa27971edfccdd2ab420fcdeac1e2e0f3a2d05e44a7c5d46a5b9a9c5587cec5d5a28996338656ed975bbbc6c1c8349fb53653bcea4143037a5020f2f8b3cc6fc7f899534f1c97df67d80bdb8d541afc830c4526e4adb28a57859a99ba46f331467d88ec0e603566a411e20aa2db0e2953741cb51f95b70624d9b2ef2734dd84cc654aed061d77dde004aab60b5cafe2b2a6ddd973225d19e8f8b7e6cdb7183ff43345562cc9a726dcea9a7eb91274374f66a16e107bf9e7bf65f22987670596ac41e6b1093c367339bacbe658c7e6036dcaccabc497658543d773fdc7ec889fe0a38e72619274e1ab0e122a900308fa61321afbc8180387a6216c37f6e305bd38da354037c5b5549bed3e3970cb3c8ff3d9908f53bfe0927a476bb66918bc6b6d04a6b3f8c7bb4d8128bda516c43e1522089ab1bd4fe5c0ce4c5418f6b34c67e2928bba36df64d617deeff87c44aa904af91ff516a0192c5ecbab385c448465b46bc88cc4935208da2d05fad63f393f32830090e6726d69ce1c7f20a4fab53fe3a2ea8583eb15fd1df2b61984c8e46bc0991ecab5484e0135e2aecb93a0bdc0bbd8c6471f72c95a98d1155d579a1f1545e5f7c2243a0a3ec57ec6d6bd8fddf0a1b55cb954a5977e28c0fa30e44e77b48b402d3122d6c7e5c96df2f50ebb2191fdc7e285d4d4480f147e2419ee62d63fdbd1a6a1234b2b54d9302d9a48563e002b3a2dc2c8cb1eb7cb5cbf0bb62627c91ad89c827e143673a6ae78df0d66287d125cc840cf7da6230d4872ad3ee19cdf20bb715c3067703e7209f039f5fe74fc5375e89a27ecd99320700dc959741bd2ce81673d55a169f0a48546e3b8d70cba32b46b0df1e482c46e6e99960a4f35781894ba1d349a67bc6464b95b7294f51ee1cc2934173d6ff4b589e330e8d98a6a66375fc4d6d6fe905d4ef5becd5128ac6459fc5de75d17a5e941ce3c2fbead30dd010faaf4c20d20e0236acfa44adb5582cec206225990ae40a77979b1cbb92174be7a6c5f1babcc47ebb01175b1c34429c63d234e9aa4b7fa64140e51be9dd448dba155a831fe20f7dc64e1c10692ca632127c36ada61f84f63aee6accc8d8d66dca83e3eb2a2ce1dcd763662cbccc41af18995ceb487603e6294c196e3699637def88cd824f2f9fa81363a89c7718d82bf2dd2fea5491b4c8424904d0ef8d63e7e61a0b22b648fc48ca804ac6f2144bcf3d19f030af605ede2cec7aee5570083d343cebdd2870d0bda14f2beb6aa928cbf48b2d4efd7f8e52209d01fb0ba17177c8dd6840d0e3e410992d9f425417d0cf1e196943bbd066f4d37720377f16395845f030e00897671947a1a80b414619c959dd44e46cad74de62e30feed8a0de8836fa8888fc36ebabfc82e6466fbacc45f43fe577508c9cf0e0544ea6a1d93dd3890f27b8cabda9a993fb7bf0f7f59bd0f12b0d8718c97021d39cc8be01bd8b05af35c0d75092767064a5c668abd3f32b395fce3d620db9f17fe6a74ea7048043822bc7c969a50ce3167937cc885675e2a54dd61203417682e9b249d60074d6f3950511e1a0a224b6a1e7ce67b0f24006c313a7bc0e33fdc73886e2ac4283c1d59865fb3398e9ca456b1cc3dd752795d5c14034c640584457e7753d45b95db5e0e1b892a451e59f2e55bffde94ad1d9d0016a5c36d902eae4ed2fc4e0ff80ffe642f47380e3c619bb620e04910222428008250f780c4f7d50b2a7469b63c3cf656a2d26eb6032f4c0741a0d5f40c6bca01c776e5f88488f8adf0a0f7fec161d424241ad556056608b4bcd2a6030681fdc986785f8322851c58d55f666c405c6abcba0782d5926cba6cdcde175745aea7726115b5ae1349cee07ffc282fac31edad2fe5088f9278c74f39741c482e9a0e416b7157114e84b4e23fb9421aacce330f700d0f5f513be3dd106c3b202a6a7e970ccb2b80025995ea50cb3973f86600b89a7fed2e9f00fd52b9fb29f798332da6ff45044a99ccff491858bf976d9a4855e8fc383f5df0058bb9012b45e9288a8bd10a2e73505ade277598fed77aeb9c0eb0c8d137ed64bb56537f5b7f54b4f3d00322523e141a445a8ddfc5a010b5f76bcf40237e9dd0c1f9fde93b0458b8ec4f1977c246bef85155d76151801b13d847e84b83e426c5928b87e4d6496bde53c5cac846826c74a34036b04a97769c064cdc9478d909a429967356038acddce20891085e4dbce052e89ddbcb91652e32072ae9a5790cddf9970ee46313cc709121ab876a11495176cc9413de39dc0bc2af33d70df2fbd92de499eec8c17459dd55c0587e9be653491d999c4c4708eac561e30a3024a7695fe046b745f6a33b2996a58e561e0a2e28fbbd3ed5aa3fac7b50536bdbae9b67fe66dc9521450ecbb4ad334a2c7c56c61ea0b0466122934851eadcff143895abc4fca46c140dfb8b47cd392c89c477b9d843c48e4373ab620e7a3fb65be0cac8610616bba728197e021b7f1bb42902d35f494dc234c0efa6c707c833ccd090b1ddad55b6060eada813eefdaba91412c5146587d5f4ce596a32830cf2d682e3f83ad4208fc06cbb9eac989dc4254dc0deaf540aed1c4fb7c43394daffbe617cf8e7a48963cfc7fd2f3ff98e10d229f1f57bfa3806d32475a0a4c53c70ed6f775216339f3a26e4bbc9c55917836a308818320344004b091ddf16876d432e1108a0b025fba0e381f5a05d268798a8536e6649dfdc7ee6f22890d8598bae0df886c17d627fc97704344dedd6c1ba8d1a33405a2e102ebfce0341cb34aa32a97da84aa32f0ceb769b9d01df5eb5f87f26823b9615c7ddd01c63fc33e24d264b18d5750c924eee75dbb6d918fdcf3400f5adca01895ed65304b261c3eb07b74b0229185f085a79181de96a329801f6b72d3b7ed94346548d70ea39b6c4b7ded38678ce1b67e4d34549834d4a35b56f30ea9d2852b8a3f62fe112b30a7bd8c142b31e92c89d3d35f164f8a4134d1c9d467d59ce4ecf15daf3daebd577183501f4f8ff8480f3ef0e404ef2d9666f8d82f349e42a925ce3d9d19d80c216a1ed906c63d4a1aeb559578b18e60fe7a79fcb90daf16b895df1c74248b037a788003d11b521680d48c43f82b362c9c283e0e5bf574a0d8e01765df3530006ed21bdcfad78f36134f41ce8f02417e0979b8d28ce8fd0e1584ccb59327f765273b268f62d168fe21becdd0dd483479d92d96093444546fe70b9c11488a58832397c599ae29c6d9c30de2e801b51d0670ef7352423a1d847b91f4ecb9bab564b450f97263f4cc9007b7daf6a5288fc2206fff141935b305111eff63856e0bb81c779fc01400ba30e2e79744b1e395f24ce89879cfa45867dec166f7206d11bcfabdb70d97dd72f277ba4751be991830fc474b3de1617be13fa08d9d3dc99a9355c73625405d7421efec2dcb4469d3c2d5aa47602de3d327405b992659732ed19a4b2a74e2d1dd3e90012b1c19f4f77e0212598e26703ba9d62354fa7640854178f0f04ce4526b960312d4bad32bf1cd55004b517f02591356478f457f21e033e77f4eeab39d730910753fdbeeb382fd3790b5b11498b8cd9c1e85bc3f00d242fe71e877e8bdb06b450e0833818cce366190b26d6b515ecb7ba510cde4e1e93ababb939dea1cbec7b76b5fac56738bd35aa0b08164cb1886f26335b8f76b", 0x1000}, {&(0x7f0000001d80)="444264c6a68c04cf350ce6c141dc9c47f96adfd06edc5a4978d4fc75fd09b230fd640d29f135de576468f778fe03b4e67bdd22806b956087ac1f9029cc3cb39d48c4397649ee5d98bdeec1f1d5fe6f37b01d2934ea30f94d792c8fca5594046993375b8b5fcd00b1132ff83da9fd6f056b14fedcb86ec12a825fdb5ac7b14fef866278ddb66984b9141439fe666d1ca770c91f80829415bbd25072d4ab37133774da5118bd4d74ffacdd07f70d81deae607c07f863b32c9ef092b3ff79b6fc75b89386cd66af38a228627de7762c6f6b3c80436c5d5d894c5d73e768b18d3d087c6dad45fb87721bbc739fd02660f3b3728e2085e72bd44b8320f2d34ed8b20e802f1253cdca3944bf3413012ce44180974e76a81a16730f6b47557019c4ff340d92180997479f682b60d5b293047aa1a350a6474775b06f4c5022b12e63b805dfed40174dfb9c1862a2c4b8b98f5d239c5cde6f294fff7c97207330fd8e1be6d7713229ee1a13e0e4bede735f48e1ec020c0600e4c6abc1170b4f664e65f82cbd79acdc4eaae57b72d41d7472f8ae973bc444da5997a92546b1520299fac2e4de960a7df4a48401e99e94dba76d0dbd7fd50734ece48dd099d3367b25de1b2ad9d55ed9d5a1432dca0daba7f7f8b90fcf46baf70810855cf722d9bd6db8a55f5b886596d8f4609c211746135a7ee33d54ceedaf9f0edebaaa02f2c1b0594773025c42bdf9f03c4a8a5e0fa5554c1043c43243ab0c9a6658c97c7413d46fe3b3680e363359865c82179fc9e23f4f2590e67bfe6a6763c34478f844b79ccdbe3635403ac3050e2c1ccff8a7953d01bd58b560520c2fd3ee004d1b9a30200c90a165002285fb2d1e34897ee1b40c99d61451b742a00648638bb4e134db7d1179de500f36f836c31e0f2a0f834e69024b730ae0f91771ae32929b02fdeab12695f65c753a8ec8b730671308a327010831730207caf4aeedf1fd2d7868ea70abd7a1f5db616701ec96a1ff9210df7995e4873ed7b1164797cbc68da567735947e73f72472470618dc99a713021402b895f15287b09e72bedb033ed93ebf24b6db65717a5e6aafd130d8a97b2faff428f08a7ef0a55a570258f2cdc3d5e885c52c1c9d74bffaa5b443d8fbb4763eb4043da1f33ac7b9391e3d2b9f6ff087b6f7d58a2fd93c1f9f1d97e7fb818c96b178f8130c455c622eed95ab03dc2ccf56eef103d88c8b31fd66917ad35611dc715628d27efdca93d9c6f3d8a7cdeeffb780ccde4a76ac0d9e7a6b18fec8993c7dff7b31c2ef7577e6de7326b604dc1b1b122072a604af846130ec695345fbbb7790e4a32523ae464dec733c8888738a0b61cef09e43431b90fc1ced5bc0bebc7325198daaefcdb452eadaeaeac4ff85e4471c1a200ebb3ffc12d133e9aab19da6b08de2303b7227d080ab2104bc333bb3aed6e3dac33f9fd8cafee99d1816f935f582b362d1fdad28e8be3a75dcd2d1fde5eb9e17c4de32a46b453593987e556801561900f914208bae1a134b29405d0a3e5a336a898425388f180f4d13d1362fa52bbac70ea436007cf932882aad97ac55e02838d0712bf132a23e40e4c6721417e33d46fcc74bfba3dc1e12aa974f136f815f635260f03c3f8f1552945c12f4a9e9cb45fa5d4273daaeb298831412f949e2ee85d69bb5c8c9a03026307e3feb36e7abd8bb356281a010e14b0cf8fdcc309b0af3375c0e092fcfcbc46693b453a8c3040c026d43701734c82b7f1e768d9ab30e9e9e5f412094cbcd8bd309d62618fe5cc83fa0225aef4bfae419795cb0100fe3a993e45ea353971c518efea35c36c6170457727a7eb7dad1c11396cdf3f37fa21118e87dcbf242de3396eaca7b4c3e6f8b92d642ac1127ae0fea1fa3f9d6854d4d83402a495847b1265d3d4305a03fbcbb1b1806e4038246d87bc2f5ed0b0aad35257944074155a2bdb7caa1c09cd05f18de2ac82e5a25700226dd5c47303480a1f6f76cf04f7cc7c288d94b434111849a2b0baa1e696b2e75532c031aca3409a48219ec601bb2aa342519c58bf15cef33e248ba936e74a41c42ffc51eec3ac03863fd22d9132af4375cd96a43e5130a08bdcb25c4fd087789b3565b4520870d7ab8000bcddb202390c4ab8c6608dc85ee0406362f93bb71c267fe24f8469ba45167b4016337489f30dfc4e0ed94b3251757da223b16801f94d5190a151b37545a476303768b9536a28fd019f30d711a81dedd4b5896dd2da3231ccf1197665e6e36c2d2558d451148b177c65477b8c588bfab20bae4795c6d5998e0f1ec841a7feaaaf0c1d2ed1f6eef6508fb7fce7d7f32d2a23cc58bcb7f8089c897bc2ae823f700cf33639fb918708c356d9d594579c627731b0a62c7e112e5e389ad747c1e1e5034293668a83256895ba846750b0848b82a69695e4ac4e1bcb61b7c1995cfa50e49ed45acc598e2b513ed021981ccffafe8c6a5313d9edeabecc9d544865c69bdf76554125b1bd5d64af52efbdd07d80a83d67c8dc8a6a4d7a4d658e7626c89b9deffcfc3d1b24097d95bdff2746f86ca45b17428939c39dd812188afb65c7355d3520ad2548638896cb2e82f3356c78ae32aab53b0a5b0f65db4b60c0fb8d11ffdd024646900ad0932e81fe42e8e0e692db1a178292ab21e37d6645b52596d1e4e744200daeb6c0ce527b7f39861f26529a8acb4be32f5bd58fc6b11df8667a09e947f93bf626a5b0504d61bb67afa49bce06b019f933824760bf2daa6d8c9e060db62bb89252e55eb93b95a3f8726254437ab8d4114054c7656b2e3aa30818ccf6bcf0c0bf29f938d52fbdb3263ea6d3c9402c5aefc663f0c541182cb88b46c9038437cf3c67829ff3ddb59dea16803dc60c07297be37d8e1153928905f21b46aaf9916c1fdc06a639633381b44b5445d3d7473e014b205b6c596902ad85cb99358dc686ff0759421763492a86427992ad05a826c07ec83de9d49f5a9fb66c358be5adee485514d6c66813bfa13cf073f819ba636c0fec1f17983b44596421fcc650a30b908efa30bcc899972ba339f59650461a5bff46612005f0c4ddfd4f5dba53dd3573e030fa0666d85a3ae63b3093571ad36d28f397c9c7b3479fd4ddc9f3a257c4a511afe88edb4b8635673ea9b5554bf28e2ce1f47c827d3da2a3216f72724caec30890e37e6252062dd0323b984e79a8b9c8c583b397de3ff3974b9a09023cbde2f789b9c99bd570c81a03777aa9b1b6e06ac5ba39cb24767fd019c2d02d978a4ee378381278a4430d9074c1dd3fdd3ebb3aa7685dfbfc8f7c052ff21a2da3a1cc2d9c5e2c8df186d2a784580488a0a073e9c626df6babaff02222f0e7ef2b59e31ee2788fd8916fd71f5eaa9c54c1375896cf613b55df3a873cdae070f557d587768d82dc17bf813dc5087c1408d9dcce2e670c4846c3fe5e60006f1644758e7ee396128fa2f9263156ab8108708fe79182460c456727d4e84ce61932da9dabde3ebb2118c70a9b288567b8e30a25e970c7f42c4fd491711d074c25eed563f9a94ffa3370fbbb035cc9f5bbf93dbd30dd1ede1fa9307f4b6da8d91c83928a6fce5f7816d0145345ef973219a0ba1c35f0643fea79f64542fd6621102da6624408c6c87ff6cf6f1de9e1245d3d8801a9caa2aadda341b3474b265d6a7dce009d67b4737d971aae23ce22d8f93346879267301d62a045baa911187187dd3b58dd07f588ba04950996c5715fe8fdee18a63c61acf4cc22619662f26508a32dc1e1145f3c09c56fd79385db82adb8b8345920a8d1f9db46196c07085f3e09db8ed8e427717f7091714a41d8f3ae74be6a360d46217fe62ffc98dc9e8b3767b55ed73ca5590d745927c810d9c0df32ec3e09baca5dd3b9540b63004a822d088716b6786bf7b1f4e86bbd1457fce37535f0bef9f0b11b3960c09900ac27b00ed67331110e19e586828ab931a8d86d739e65f897c1f642dd835cf21acc6126bcf77fc2469f4dcdc6c47d7baa3b10d6667999f170eabb4501730d12c10b5097cf8a36c5b15a86293ff548e1177e62f6b9f676ba652c372614755aa2ba19bce1fc6f046a8af1e9e7fd76b682c41590ec318ef13947cdb0c285290c8867df342f169479678ab49abf6c71656f27d6b44348816c7c1cbcc79a04c2fe5398b5ce9cdb5b590087bd07fe87cdcceb148b6163c7aa7b01c1aa4e51ea1c40626d63c8db9ea0174e54f0bfb058779ed4b83b48278c29edb76d0c0694625493af05eaba30d48ac2c57f484d3aea79d6482908c79635e164e4e5e07740f8b8e6c91e15c61327ebc46ea18f0241bd3387f1efac2e2f209dbe04b4571a5e4dad56730617c20b471049433653e30e09e71f857782844531e2e0f08a3bcc9ca258359a83f760ffacc97242406b1d2b40bd7769dbb6c64ad48dba13e3907a3059bf40b58af19be58843664519de43083b68bb4a39ed335d9c19a5f5cec9dc63dd43b8b3e9e626be9294d3fa00104ca691f546f66a8c3c71b8183d430a34923d0020b8b19684403211491ed3308f395fb5b3e5cd7671ae02e363eec130a51fa7434176de0386123e9f1f8782f74a6452e6f3b306d3cd4bc5cab74654b000e48cd3ba0e00c61e44ec554d0a0116aa15cc0339e7198b879872e909e3e36e5142288004e5dd518846d071c3ba480c565fe8d1690327a111c33ad19221d1b5c30ee870bacb648f7be395e47e75fbaa0d658fb87476664cc6f6f5f8851a040e93279d63144a1fcffaaf8ccc4799595b81ecc1850d154af17fdad6349c876b32f186578c015678160266d52048bfcb22fc7500b2e379725e635e0e84ee3bfd071020208b985ad6169b14f0541cdcec951c97a2275bbe63eefabc8c372b8543e4bb564e1a9e070701bb230cfceeddef9e905ed2334901d02ff87afa288703840068b64ed51055a0e73352ab72e8cd78b31a8c1d1dfac43bf3947244cd9059cf4fa70cea7bf37c1be507fd625a431ed0094ac051fd382935d77b618d6dc1ca26bdde45198e028c80ce592474cd43096aa07550caf54deffa68b2ec0e7becc57bf35aa74a0512cb708b6aec61dae9e075c17473675f0e8f84a0d7a26fbed136e307b1a1c7e34c237b5106f7cb7fba4b994c5bcd1216395d74289690e46e302b7e39d3c1e4796cd7335fb79202c1f80a29fb3432448a14f2452444328773df4d73233c5bc2ae08577f8a206ea61fcd327151eb1f2a08aa1dd2589be045d2350c662761b939fa5df5415a4ff96955aa6ec6ae02e7970c9108e3bec805f899b602c17f18c0b79274fe8bc1cade033d8fcd1d634370f1b6905a8f8d1a90", 0xeb2}], 0x3}, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x400051c3)
ktrace(&(0x7f0000000040)='./file0\x00', 0x0, 0x0, 0x0)


pipe2(&(0x7f0000000000), 0x400000)


fktrace(0xffffffffffffffff, 0x0, 0x0, 0x0)


access(&(0x7f0000000000)='./file0\x00', 0x0)


r0 = socket(0x18, 0x3, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
sendmsg(r0, &(0x7f0000000140)={0x0, 0x0, &(0x7f0000000280)=[{&(0x7f0000000040)="c5e1ed", 0x3}, {&(0x7f0000000a80)="3aa960dcde", 0x5}], 0x2, &(0x7f0000000840)=[{0x10}], 0x14}, 0x0)


mkdir(&(0x7f0000000100)='./file0\x00', 0x0)


fork()
__clone(0x0, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_wait4(0x0, 0x0, 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0, 0xff01}], 0x1, 0x0)
compat_50_utimes(&(0x7f0000000200)='./bus/file0\x00', &(0x7f0000000240))


open$dir(&(0x7f0000000200)='./file0\x00', 0x18310, 0x0)
r0 = getpid()
ktrace(&(0x7f0000000000)='./file0\x00', 0x0, 0x153a, r0)
r1 = open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x13000)=nil, 0x13000, 0x5, 0x10, r1, 0x0, 0x0)
setrlimit(0x0, &(0x7f0000000080))


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fchmod(r0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
_lwp_unpark_all(0x0, 0x0, 0x0)


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x0, 0x10, 0xffffffffffffffff, 0x0, 0x0)
r0 = open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
fcntl$lock(r0, 0x9, &(0x7f00000000c0)={0x0, 0x0, 0x0, 0x100000001})
fcntl$lock(r0, 0x9, &(0x7f0000000000)={0x0, 0x0, 0x40, 0x1000200010005})


compat_50_setitimer(0x0, &(0x7f0000000080)={{}, {0x7fffffffffffffff}}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x18, 0x1, 0x0)
setsockopt(r1, 0x29, 0x19, 0x0, 0x0)


renameat(0xffffffffffffff9c, 0x0, 0xffffffffffffffff, 0x0)
syz_usb_connect$hid(0x0, 0x3f, &(0x7f00000000c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0x0, 0x40, 0x0, [{{0x9, 0x4, 0x0, 0xff, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9, 0x21, 0x3, 0x53}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x7b, 0x0, 0x7f}}, [{{0x9, 0x5, 0x2, 0x3, 0x0, 0x0, 0x0, 0x1}}]}}}]}}]}}, 0x0)
compat_50___shmctl13$IPC_STAT(0x0, 0x2, &(0x7f0000001480)={{}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


r0 = openat(0xffffffffffffffff, &(0x7f0000000200)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000580)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = socket(0x11, 0x3, 0x0)
setsockopt$sock_int(r3, 0xffff, 0x200, &(0x7f0000000040), 0x4)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000200)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)='2', 0x1)
pathconf(&(0x7f00000000c0)='./file0\x00', 0x7)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
dup2(r1, r0)
poll(&(0x7f0000000040)=[{r0, 0x40}], 0x1, 0x0)


r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x200, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
fcntl$setown(0xffffffffffffffff, 0x6, 0x0)
dup3(r0, r1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
compat_60__lwp_park(&(0x7f0000000000), 0x0, 0x0, 0x0)


r0 = socket(0x1f, 0x50000003, 0x0)
setsockopt$sock_linger(r0, 0xffff, 0x80, &(0x7f0000000000), 0x8)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000005c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
setsockopt(r0, 0x0, 0x0, 0x0, 0x0)


pipe(0x0)
pipe2(0x0, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000001780)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x2, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x6, {0x9, 0x21, 0x7, 0xb8}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x7, 0x53}}}}}]}}]}}, 0x0)


dup(0xffffffffffffffff)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104304, &(0x7f0000000100)=0x615003)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50___lstat30(0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000480)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
sendmmsg(r0, &(0x7f0000000040)={0x0}, 0xb6, 0x0, 0x0)


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
mknod(&(0x7f0000000040)='./file0/file0\x00', 0x6000, 0x0)
__posix_rename(&(0x7f0000000080)='./file0/file0\x00', &(0x7f00000000c0)='./file1\x00')


socketpair(0x1f, 0x1, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000000700)={0x0, 0x0, &(0x7f0000000180)=[{0x0}, {&(0x7f0000000100)="ec", 0x1}], 0x2}, 0x0)


r0 = open$dir(&(0x7f0000000000)='.\x00', 0x0, 0x0)
pipe2(&(0x7f00000000c0)={0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
r2 = dup(r1)
flock(r0, 0x2)
r3 = open$dir(&(0x7f0000000700)='.\x00', 0x0, 0x0)
fcntl$lock(r3, 0x9, &(0x7f0000000040)={0x0, 0x0, 0x0, 0x100000000})
pipe2(&(0x7f00000000c0)={0xffffffffffffffff, <r4=>0xffffffffffffffff}, 0x0)
r5 = dup(r4)
dup2(r5, r3)
dup2(r2, r0)


r0 = posix_spawn(0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
setpgid(0x0, r0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x0)
unlinkat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x8)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000280)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
getsockopt(r0, 0x0, 0x3, 0x0, 0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x200000000000c, &(0x7f0000000240)="eaff125c00000000", 0x8)
setsockopt$inet_opts(r0, 0x0, 0xc, &(0x7f0000000040)="eaff125c00000000", 0x8)


sendmsg$unix(0xffffffffffffffff, &(0x7f0000001700)={&(0x7f0000000040)=ANY=[@ANYBLOB="fb182e0b3d9a0900000000000063"], 0x1c, 0x0}, 0x0)
ioctl$WSMUXIO_INJECTEVENT(0xffffffffffffffff, 0x80185760, &(0x7f0000000000)={0x0, 0x0, {0x0, 0x1}})
r0 = socket(0x18, 0x400000002, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
setsockopt(r0, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
r1 = socket(0x18, 0x2, 0x0)
r2 = dup2(r0, r1)
sendmsg$unix(r2, &(0x7f0000001700)={0x0, 0x0, 0x0}, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x8004747f, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__fhstat50(&(0x7f0000001180)="af78d7fb5c084a52c087f82f3297d3a2b4d4bc60830c41d35cd000b80a8c9938", 0x20, 0x0)


pipe(&(0x7f00000003c0)={<r0=>0xffffffffffffffff})
writev(r0, &(0x7f0000001540), 0x5)


r0 = compat_30_socket(0x1f, 0x10000005, 0x2)
getsockname$inet(r0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0xc010447d, &(0x7f0000000000))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_20_getfsstat(&(0x7f0000000080), 0xffffffffffffff38, 0x0)


pipe2(&(0x7f0000000000), 0x0)
socket$inet(0x2, 0x0, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000001600)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
connect$inet(r0, &(0x7f00000002c0), 0x10)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getpeername$inet(r0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbd63)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
fcntl$dupfd(0xffffffffffffffff, 0x0, 0xffffffffffffffff)
pipe(&(0x7f0000000240)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x529478f58bc034ca, r2)
r3 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
dup2(r0, r3)


r0 = socket(0x2, 0x1, 0x0)
shutdown(r0, 0x1)
getsockname(r0, 0x0, 0x0)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f00000006c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e5"], 0x1)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000), 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
sendto$inet(r2, &(0x7f00000001c0)='x', 0x1, 0x3, 0x0, 0x0)
close(r2)
accept$unix(r1, 0x0, 0x0)
recvfrom$inet(r2, 0x0, 0x0, 0x801, 0x0, 0x0)
recvmsg(r2, &(0x7f0000000180)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x843)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000680)={<r0=>0xffffffffffffffff})
getsockopt(r0, 0x0, 0x0, &(0x7f0000000080)=""/151, &(0x7f0000000140)=0x97)


mmap(&(0x7f0000ffc000/0x1000)=nil, 0x1000, 0x0, 0x13, 0xffffffffffffffff, 0x0, 0x0)
bind$unix(0xffffffffffffffff, 0x0, 0x0)
openat$wscons(0xffffffffffffff9c, &(0x7f0000001280), 0xb0230, 0x0)
socket$inet6(0x18, 0x0, 0x1)
fchflags(0xffffffffffffffff, 0x40003)
syz_usb_connect$printer(0x3, 0x0, 0x0, &(0x7f0000001b80)={0xa, &(0x7f00000018c0)={0xa, 0x6, 0x300, 0x0, 0x0, 0x0, 0x0, 0x7d}, 0x0, 0x0, 0x2, [{0x0, 0x0}, {0x4, &(0x7f0000001a40)=@lang_id={0x4}}]})


mkdir(&(0x7f0000000600)='./file0\x00', 0x0)
mkdir(&(0x7f0000000040)='./file0/file0\x00', 0x0)
rename(&(0x7f0000000100)='./file0/file0\x00', &(0x7f0000000140)='./file0\x00')


r0 = socket$inet(0x2, 0x3, 0x0)
getsockopt$inet_opts(r0, 0x0, 0x0, 0x0, 0x0)


compat_43_osetrlimit(0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000100)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000100)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
getsockopt$sock_cred(r1, 0xffff, 0x1022, &(0x7f0000000140)={<r2=>0x0}, &(0x7f0000000180)=0xc)
fcntl$setown(r0, 0x6, r2)


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(r0, 0x6, 0x5, &(0x7f0000000040), 0x4)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f0000000100))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000140)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000002480)={0x0, 0x0, &(0x7f00000023c0)=[{&(0x7f0000000200)="7427ac64ffee7d4a89039597dd9a07f918c52b0fe56b77a1de3db8fcf447738ffd961d984c27a557a9b27ca3e518f02cb812e53b9d7a762889aefb5859bb4108bde95d418bb2eaa5b44c5d44b9463cb594424899fd60a605eb56873562ec46eeab3513af87296f0252a77826ab3a5016375163c9f9b85a3191059b7c3b8e056a62ad2333a3738539d90a8214be155ff330", 0x91}], 0x1}, 0x0)


mkdir(&(0x7f0000000240)='./file0\x00', 0x0)
chown(&(0x7f0000000000)='./file0\x00', 0xee01, 0xee01)
chown(&(0x7f0000000040)='./file0\x00', 0x0, 0xffffffffffffffff)
chown(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x2000a71a)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
poll(&(0x7f00000000c0)=[{r0}], 0x1, 0x0)


r0 = socket$unix(0x1, 0x2, 0x0)
sendmsg$unix(r0, &(0x7f0000000a00)={&(0x7f0000000140)=ANY=[], 0xa, 0x0, 0x0, &(0x7f0000000880)=ANY=[@ANYBLOB="10000000ffff000001"], 0x10}, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x4100000016, r0, 0x0, 0x0)


r0 = compat_30_socket(0x1f, 0x10000005, 0x2)
shutdown(r0, 0x2)


r0 = openat(0xffffffffffffff9c, &(0x7f00000002c0)='.\x00', 0x0, 0x0)
r1 = fcntl$dupfd(r0, 0x0, r0)
symlinkat(&(0x7f0000000080)='/\x00', r1, &(0x7f00000000c0)='./file0\x00')
rmdir(&(0x7f0000000140)='./file0/file0\x00')
mkdirat(r1, &(0x7f0000000100)='./file0/file0\x00', 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000340)={<r0=>0xffffffffffffffff})
sendmsg(r0, &(0x7f0000001740)={&(0x7f0000000080), 0xa, 0x0, 0x0, &(0x7f0000002b40)=ANY=[], 0x1390}, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f0000000080))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff})
sendto$unix(r0, 0x0, 0x7, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f00000003c0)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
__wait450(0x0, 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000000080)={0x0, 0x0, &(0x7f0000002580)=[{&(0x7f00000000c0)="bd", 0x1}, {&(0x7f0000000140)="e6", 0x1}], 0x100000000000030b, 0x0, 0x0, 0x406}, 0x400)


____semctl50$GETZCNT(0x0, 0x0, 0x7)
____semctl50$GETZCNT(0x0, 0x0, 0x7)
r0 = semget(0x2, 0x3, 0x3e21eeafccb5b983)
____semctl50$IPC_RMID(r0, 0x0, 0x0)
r1 = geteuid()
fstatat(0xffffffffffffffff, &(0x7f0000000000)='./file0\x00', &(0x7f0000000040)={0x0, 0x0, 0x0, 0x0, 0x0, <r2=>0x0}, 0x100)
semctl$IPC_SET(r0, 0x0, 0x1, &(0x7f0000000100)={{0xff, r1, 0x0, 0x0, r2, 0x20, 0x8000}, 0x8, 0xe73, 0x2})
r3 = open$dir(&(0x7f0000000180)='./file0\x00', 0x80000, 0x20)
compat_50___shmctl13$IPC_STAT(0xffffffffffffffff, 0x2, &(0x7f00000002c0)={{0x0, 0x0, 0x0, <r4=>0x0}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000200)="45c059cc76a4583c150098cca76c0f6af3a80091660090daf7fa8a83b7d418a6861eeff63f9c1c430105a86c79b2e106e58eeb16ed38132a8bd5fb105c6b12a4f54a48f78ed87af9258f80ef019f80ff07b94050686492d07f3f3d833584b03446e319bcb6f7b8f0a19d036b6beb51783c883022685c844a44737a3ee691315edf3d8a080f0e441b"})
r5 = getgid()
fchownat(r3, &(0x7f00000001c0)='./file0/file0\x00', r4, r5, 0x400)
fstatat(r3, &(0x7f0000000340)='./file0/file0\x00', &(0x7f0000000380)={0x0, 0x0, 0x0, 0x0, <r6=>0x0}, 0xa00)
r7 = semget$private(0x0, 0x3, 0x74)
compat_50_____semctl13$IPC_SET(r7, 0x0, 0x1, &(0x7f0000000480)=@array=&(0x7f0000000440))
open$dir(&(0x7f00000004c0)='./file0/file0\x00', 0x20, 0x20)
r8 = semget(0x3, 0x3, 0x112)
fstatat(r3, &(0x7f0000000500)='./file0\x00', &(0x7f0000000540)={0x0, 0x0, 0x0, 0x0, 0x0, <r9=>0x0}, 0x600)
____semctl50$IPC_STAT(r8, 0x0, 0x2, &(0x7f0000000680)=@buf=&(0x7f0000000640)={{r6, r9, 0x3f, 0xbc81, 0x100, 0x2, 0x9}, 0x101, 0xc5, 0x80, &(0x7f0000000600)={0x3, 0x2, 0x0, 0xb31}})
____semctl50$GETALL(r7, 0x0, 0x6, &(0x7f0000000a40)=@buf=&(0x7f0000000a00)={{0x0, 0x0, 0x5, 0x1ff, 0x100, 0x0, 0xd97}, 0x2, 0x2, 0x6f4, &(0x7f00000009c0)={0xe272, 0x0, 0x8, 0xff}})


truncate(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r0 = open(&(0x7f0000000340)='./file0\x00', 0x300, 0x0)
flock(r0, 0x1)
r1 = open(&(0x7f0000000200)='./file0\x00', 0x0, 0x0)
fcntl$lock(r1, 0xd, &(0x7f0000000040)={0x0, 0x0, 0x0, 0x100000001})
r2 = open(&(0x7f00000000c0)='./file0\x00', 0x205, 0x0)
fcntl$lock(r2, 0x9, &(0x7f0000000000)={0x0, 0x0, 0x0, 0x1000300010005})
r3 = open(&(0x7f0000000100)='./file0\x00', 0x201, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1)
fcntl$lock(r3, 0x9, &(0x7f0000000000))
flock(r0, 0x8)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2360, 0x5a31)
open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)


open(&(0x7f0000000140)='./file0\x00', 0x38a80, 0x0)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x1, 0x0)
mmap(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0x1, 0x10, r0, 0x0, 0x0)


munmap(&(0x7f0000800000/0x800000)=nil, 0x800000)
shmget$private(0x0, 0x4000, 0x0, &(0x7f0000ff9000/0x4000)=nil)
r0 = shmget$private(0x0, 0x1000, 0x38, &(0x7f0000e04000/0x1000)=nil)
r1 = compat_30_socket(0x1f, 0x10000005, 0x2)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOSEEKHOLE(r1, 0xc0086662, &(0x7f0000000000))
compat_50___shmctl13$SHM_LOCK(r0, 0x3)
r3 = shmget$private(0x0, 0x2000, 0x0, &(0x7f0000a52000/0x2000)=nil)
mmap(&(0x7f0000b90000/0x1000)=nil, 0x1000, 0x4, 0xe7cc283e9427a5fa, 0xffffffffffffffff, 0x0, 0x3ff)
r4 = shmget$private(0x0, 0x2000, 0x124, &(0x7f0000ff1000/0x2000)=nil)
compat_50___shmctl13$IPC_STAT(0xffffffffffffffff, 0x2, &(0x7f0000000100)={{0x0, 0x0, 0x0, 0x0, <r5=>0x0}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000000)="d5840659241a06b2f9356189e6cd0c0bbb715307a58c03af7ef3ed26332fa33a06136ad5dc680362c459d39239a9818bb26a62442be611ac0624072890e96850a6669853eea71029b17aa89ab4b97ab6cefa74139ae178290564fe37c436e553dd0b5ad1227d05d173b4a0fa27670963fda1b73556fc111b680db04f29814d3e395fe5f3b311e97b5010f006bc497443bbb38a990c548c3e2aec038ed9abdd542fc08dfa152e6815dd543130c611fc6b4c74b320587bad0e30309e758e2ffb1efa7e21ea88719f91caa9ea09973e4faa15b33a71f7a1644f8f8a7a188e27b611d431e065a6e8afe8b346138193120ce87768800adb6aa9b392c3"})
shmctl$IPC_SET(r4, 0x1, &(0x7f0000000180)={{0x4, 0xffffffffffffffff, r5, 0xffffffffffffffff, 0x0, 0x1, 0xfff}, 0x5, 0x2, 0x0, 0xffffffffffffffff, 0x4, 0xf5, 0x100000001})
compat_50___shmctl13$IPC_STAT(r4, 0x2, &(0x7f00000003c0)={{}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, &(0x7f0000000340)="2594ecb421b868d895774fae92c4d4594237c5f082f6a3f5bac0d73b24d7f05d8e9801402139869bbdb8a84f1db579c32e79cb6beb4065c4f7849a86a94e1011770ddb557912f530917c"})
shmctl$SHM_LOCK(r4, 0x3)
shmat(r3, &(0x7f0000a34000/0x1000)=nil, 0x0)
shmat(r3, &(0x7f0000f06000/0x2000)=nil, 0x0)
fork()


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x3b00)
r0 = open$dir(&(0x7f0000000100)='./file0\x00', 0x2, 0x0)
openat(0xffffffffffffffff, 0x0, 0x0, 0x0)
writev(r0, &(0x7f00000005c0)=[{&(0x7f0000000000)="0ae0aa2b07a2dd94d6cab59db578287a93c229f019e0c0d5", 0x18}, {&(0x7f0000000080)="b34da7c460f67f0c2e697cc3c4822d61815d4106b7c77463", 0x18}, {0x0}], 0x3)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
mknodat(r1, &(0x7f0000000000)='./file0\x00', 0x8000, 0x0)


minherit(&(0x7f0000ffd000/0x1000)=nil, 0x1000, 0x4)
mincore(&(0x7f0000ffd000/0x3000)=nil, 0x3000, &(0x7f0000000000)=""/108)
madvise(&(0x7f0000ffc000/0x4000)=nil, 0x4000, 0x0)
compat_43_ommap(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0x2, 0x200, 0xffffffffffffffff, 0xaa2)
munlock(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
mlock(&(0x7f0000ffb000/0x3000)=nil, 0x3000)
mprotect(&(0x7f0000ffa000/0x3000)=nil, 0x3000, 0x1)
mprotect(&(0x7f0000ffa000/0x4000)=nil, 0x4000, 0x2)
mlock(&(0x7f0000ffa000/0x3000)=nil, 0x3000)
pwritev(0xffffffffffffff9c, &(0x7f0000002100)=[{&(0x7f0000000100)="10cebc081bfdf8f836b9a6c586b92a2a16ff953b8901eb6661e70a5fda0641b60ed886cd5bcf93ce8ded62d74a53b210b5258d73519e42c4edf9d2646c487d078507ab70b0e3b1af6d37640ee1933b4636f409137a6f0d1b4e4b44e40128a96dea7e187a44fb7270e8c02ff7ae1a27fa265aa2112a4e7c639b8899000dc4ea1f3a9e007a48947e175fde063ccd92a59b71a6eaed23993d7b4258957fda171f816430eda87abffd8009582167af0dd1eb7e41b48c11b5b064c82832c4ccaddab59c3c8469cb00b455525b578225139590c9236562e226f2152e5d88d1ea4d2e277cc201d955cffc5063b4e3ea542801dc6c89d1ddcf74135fa99ed91a009869ac45c2da27e5ad0611c4a9e938208f13c6ddcc59fc2f8e5087ef21fdab3433fcd43dec128860c22b324a80730b03b5aac312a6a013bb7b98e75d8ffe69c3c1d48f015f7e87e5c7f91d3eddbe9f86ec83684eab766e0536d7c79e26244055a3bd81db1a9e24da997f9786d74a524879c33fa882552266bcf40ef02efa3ea7218c879ec73525e644ac63e4d0470e0aaa7313de6fa4f94844c8cf60fa028cd2955b2238a5c0ba9fe6864aaf120a992683bd23737dd5faec0d2d46305c17bd056a7f7fbca5d1e36f053313fba1ee60410226c6f6fce7db868cda93f2a534a66dbdd4bab836493f8b013c7bb7e12b8dfe525e471852d32afd7b3c38649ee87cad17a324fa7d42a1850656e07434e137a8cc5c78612cb724fb4be1899c541f0356f3a243bab40db65ae05b8b329f5f30920b5021de0d3e465fbc6b5299654f7b85041fd9d93b8bd61bca0952b386e1fb2398dc704e4e4cf0366defbe3f8785910944d493360bb4916bfe2f0d8044ebaaf97fcdd48171562825c500e07e903625598a2fd2172e4fb0f2fa8a82fb530645925a3241ac2b3b86c429099a5db635b35687b1f91b8adf55dcfaf33507c508e41f0edb921742b93a29ac01d595f3f60cc1d0f0a8a18bf71af55e7d16076053c5d72d64a5da5eb6fc31eb68cf9b94a1fc242b226164f677306f05a43972f9e2023812594560a2236dd8c97d7fd34d6106a51ed4c37705fa4deafafb46918606988a7f032166af0a5456356a1f09a7f9ade48382f9b4e5958324b0a033e4f4d74416ec836b671f16efdd4df58ae9ba714a7ec133cde3dfe694155b0dcf077c4fa3a386e18b62ab195b4a2e29b3f18ca1e2948d4522332875329e3d7eb46728c0a6211d0ab15f8f07e31fbb851e2e7ef01fdef6540bc093f9b6ea51f74a60f29b3adb034e4278b8420b2b8be2e811fabcc90666ea33a12bb2204ebd4747771b56bc9fd9ef00a491f8e58cb4c1915b2b055e87dd90b7acb56693d4a94387ace0463f85840f992af6ad5ed6ea6c6e45019efbe663b4036d0f849c40de4f4c9fe3a2aba6983f01d009f0bf7438d404347585b597943592eb215dc2535e132c91dde68789624147404d00e65a17412ca3a9430b0209febfc7389279598a4965c1f7083bb00b4978f59bc01b0fff4493d9199a4401eaa63ba1b15fa2f11ac8ad6c1548709cdc084b1fb60ae40f5294726deff9fdbb46abdb09a54c9e61a10f9ee8d2c349aff005a64eea4c717e7fc61a08862b1799c360c4666d4662c37b47b8be8eae822a2e173318ffda597355a391afb06e203ffadd277e048e5dd535dd956ef3f6e061e3d73881ee1be98e3e7b86cffdad6001746d260b582264e4bbc142d646d6fedd55b1112802dd8c58d489c218e3059fcab45727c779a6a058616c68c24b47315518a5612ea99548c64978ac03d596f93358915a8bb05ecdba676ddbc93c59a5c373264cd4a45c876f5434a91f88e9a8587a0ae2d23768f645d72d488cf66f1a3867afd91e7d35c365c159e5fa63f32c67d26cad6e6dcb3021b339b68dbb1cd3575b3007dec51e376b6e999bfbcd1bee8e70a080501a04f5df0b9926f87387837cbf68118b13faa3d4ccc01845e17fa7cc31e9ba8be46e71211eb3ab0edfcb42041d2ec8e47d5c24afb90631732fd92c445dfcfb13ff93efb48024aa0dff5311a7208640d29760d98cffe8cf220fd58a6ea6050a60f720b9260cf2e3af90a3cf7d0b26cdb6133c42206adaf0c0c666b4f4326072a522b9c8dd501feab4b31ee0a318f7cfdf0a85d22b3a497e637f144d983f4c468ce1cae4050d91f0c85a9136d3235836ec8396ea817bf9c907ce9ae082532ad3023b373030b3dce18966c76e2d671334d980953ee0b246734a2bf1567eec6a1de8a21975a1b40ef9c89d0f9ac5a6d33db65e2c4068f5b86d2b1270b3678ded1b283b8771967f9a2a79e155a550d0b7e0ce50015e0c20383ccf3b7e8a8d7b7e2f4a1d8d532d1d32810f4d1d30b315a9df554cff7e7165b28cc5a924cc2caa59d079431f4e3e78677347bca075094ca81cb21ee4a34249f2919906fc0f5852fb8fe61e35c27a9b9dd0a1e559847b744c53f5ad689e39ed27fb6adba73d92ded08c3afd75ffbe6822987f6e4990e30c6679717f7c2c1d81e1ce9a22d45ddb814075d46e675a0808d0b8715a6075316f77cd34a8ca5822819360ed76ba92aa4049648f0fe87aa642bdcf10ddcd7836336453d5e0c89005ba75f4c58f069223848fdbae8020ee9f4c1b08be915c476f234e99c67deae0bf1a6012ce5e9642f257015fee152be6a8d6d181aa84b733268edac2bf85c3f7c6ca29bab2628aad205bb38a18364646363a69c4333f56b6f23c549dad7fe1f21586022ce4037a850220c2174c217c2031aa1c3f47e17c7c275db23795987b8bbcd714624063aa2fe6a9cdf936b613c3afdc6fea05218a79cec16487a3e0c2098e3bb3e2b2218d25b6652ed738631e8abdec9936da351dab5ac1a7b509972125b8c59996c8589b8deedf077978056f31ad422beecdee347b7e61c1120c3448f45af0d8f2c847e4ce1c3679d8c45562f064156443f19145389ed10d0077cb5208e580337d957692c1aa96a60a758fa8a5e025a42aa8e370a9ab1a546fe534524b29e9cb7f87d2dff487bb6adb7a9062f0ed500a286d5f9569654a8eeee384ffad62a1a0d85044653a875161dcfdf7b5f3aaa3acb3207ad1b71534908c0aae7527f97cadb3f02595d4a35d593d3fc6f583a62480cacaf39d3f2cd65d79767962e516ea184752b895e4091578da731d0a120a57028d449609fbff9e8a9b4a37151fe4e7183f2683ab17f54df1aea43134ef26c1bdc330d052413b5f44842437e18fef70795edffe024a56b39220f929e5609d1f38e15268cda4da97fbb9975da9b27a9fd4c5f5cafe8dcd7af1a73142f6c28ff8c17ad821cc5f42688a6f852df49bdd8c10cf1e98ced8176f5d48fdedca10700819b2ecdf0775cd100f1889f2bbc527c1f43f4d7c414b2beb9028dd4d8f5b54faa13dd9e7481064fe889e8344e8847281b1f79f27f23faaafaf1dfdcbffa6f01f71b0b61ebd9b6eeba437bcc687bbae1f987142270df53ba4103f0c2a9c157414856c454d6c04f8f1f61fbb2aa7b49d07374e2960368f972d354b8ea2600abce86cc2cc0aecbc72e3d7385f3614d1dd3311b5f0ce442957b328a2d2907e4b5a4dac4c7e7c3a9d544aeb9e2e85234a7629eea1c7aa555461fe7791a6a158133a0cb0c0405b7a2aa708ea27b874b4a238b2827944fc97b44ea7913170aa20588eea8ad9e68043cd4e31019588f44b85fe43da749c664782c57040c2d3a3b7b18877c2ea043b442899be2467df1f1283d50a9ee20b876cf655d58a881df923680853b0c12e82f8bfadedfe9d5d2541e94c2cec6984beb66447ded74df4e5354d9ad6c4a388e4609011b8af26e84fd3e426239c072648df683214f61ac6656c68e0524a4055b3d1b8108f14f6735f76c6fe3f930c3e2442f3c062cd70b01d1a9405a730a4aebeca9bda251b19d08e50c2ac5cc1bc073245730dd4c0d52f152abe7489e73230c185595ee740325147b877c808019d711351ddfaae872a652309ca6ef6f3b89565ea812bbf77ee0b08866ffd52530379d53f843f20e35e45c386bff93a3fc60f4df79752d843db6985ca0440a729b05d9298391b3fe0a5c6bce37247bb070b44eea782e209f564fab533a72201d5a1e906869bdc2cde1e90c47fb0a30debf04ad92c478d508e21e6c7ab9d08dc18a68c2c0fc6399f9430df54e8d1e10da6c58a511ef8ccd24d5c7be88113daaaa6e33b56bc77006e00ac14b35f44ec40d689e62dbead79422c800c822b9329d49297bd96fefca4a0b8f14302bbcac09f7c6df48734a6f2480150b599a28c16b79fcfcca0fe754a894319e4cf3440e21d0d77dac4ceb35fb20b7d9bb7d1d8de0cfa29593db6c1fe6e274c5ded5c2c8c4d7446eade184acff1cd42550cd8a6d1a6451d19cbf481af610a06916c1243576a15200d6591b26817584395599463f30af0309453774e4a7a7b166ce7d02552edf3a72d9730ec8306dd0e2be9065a9b365dceb699430980599b9b0553c96db3d2c215fd64312da3c65397924566b762258630ece28d7aee1f82ffcc061f398793b5ff26d0fa5df73b341c242ad9e614bc82422cd2d4e3690d942100329a098a9c95aa1f807f563525d6e0cb2637354f8b22e2105323138303cb4a7930eb33e6da96c019b556ace9a23d151785ec07f6ad09012baaeb7b84ca96f536eae63c63db5a27b42d310f2df9b02c68b8b45b647f2533c0ba2d00c855ccd2e2071fb22b284c4fdc2bf5c75012649aacd3b45a6b2059b36a2cd1d8ad9c627e5f70a1dd4d024bf3dbe467cfa7ca7adb94e12ebfa018261c5253d89e8628ec8859c01808edda3d21f11aadd5368aadcc5d245e982d538a64bb7f752a9a0b30423134b475518eeef233a01b6ec8f848a6a38cc3c7c007d53bea3de9704d8625995a190460f2a2eb967ee479df5332faeb84970b60e1b1431292be09159cd8e0fad9089c21a21cc7c631d454b7ed5e728fd634dabc0a5f3e7fb1a51b5b82144367a54bd7da433a7b6dbb3d9ab1641225b11124cdf40fff8023ed287b904b7ed31c690056cd159d73945249a2b84dd2", 0xdee}], 0x1, 0xfffffffffffffffa)
r0 = openat$hdaudio(0xffffffffffffff9c, &(0x7f0000002140), 0x800, 0x0)
fktrace(r0, 0x0, 0x4000, 0x0)
fork()
compat_12_msync(&(0x7f0000ffe000/0x1000)=nil, 0x10000)
madvise(&(0x7f0000ffb000/0x3000)=nil, 0x3000, 0x3)


open$dir(&(0x7f0000000000)='./file0\x00', 0x200, 0x84)
ktrace(&(0x7f0000000080)='./file0\x00', 0x4, 0x204, 0xffffffffffffffff)


mknodat(0xffffffffffffffff, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xb200)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r2 = dup3(r0, r1, 0x0)
write(r2, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
faccessat(0xffffffffffffffff, 0x0, 0xc, 0x0)


pipe(&(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
writev(r1, &(0x7f0000000700)=[{&(0x7f0000000a00)='B', 0x1}], 0x1)
write(r0, &(0x7f0000000340), 0xd4e688a67930cd)
write(r1, &(0x7f0000000040), 0xfeea)
close(r0)
execve(0x0, 0x0, 0x0)
mmap(&(0x7f0000000000/0x4000)=nil, 0x4000, 0x0, 0x10, 0xffffffffffffffff, 0x0, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x3e, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$inet6(0x18, 0x3, 0x0)
setsockopt$sock_int(r1, 0xffff, 0x1000, 0x0, 0x0)


r0 = socket(0x10, 0x2, 0x0)
sendmsg$unix(r0, &(0x7f0000000580)={&(0x7f0000000000)=@file={0x10, './file0\x00'}, 0x10, 0x0}, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202c1"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
sendto$inet6(r2, &(0x7f0000000300)="94", 0x1, 0x0, 0x0, 0x0)
r3 = accept$unix(r1, 0x0, 0x0)
write(r3, &(0x7f0000000240)="ddee43ab8734abe72e8289e48f848dcae6855abde176689fd16e6ad78a4f273bc1d2e3f3f911c89a2472d11399504870b9fb0e36d8892e80897800000000d20a2c82125431700f50a6b8d22db0ec0ff3a1800280b535084f92a7499cd07afbd1d0b82c439984f338ffbf66decf2ba302619dc2dcef33b15aa3d89beb2b70ddbd84311f09a2639749e5f4c3669169eb4be5a922395a28149f4d2ec0588916c297ac43adfbb5775ab949938e2baef7e6fa3703487dc7cd0255926c0ec9", 0xfffffea5)
dup2(r1, r3)


r0 = socket(0x18, 0x400000002, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
setsockopt(r0, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
close(r0)


open(&(0x7f0000000100)='./file0\x00', 0x461e, 0x0)
r0 = socket$unix(0x1, 0x2, 0x0)
r1 = getpid()
ktrace(&(0x7f0000000040)='./file0\x00', 0x0, 0x40001912, r1)
sendmsg$unix(r0, &(0x7f0000000800)={&(0x7f0000000180)=@file={0x0, './file0\x00'}, 0xa, 0x0, 0x0, &(0x7f0000000740)=[@rights={0x10}], 0x10}, 0x0)


rasctl(0x0, 0x9, 0x0)
rasctl(&(0x7f0000001200), 0x8, 0x0)
rasctl(&(0x7f00000013c0), 0x8, 0x0)


recvmmsg(0xffffffffffffffff, &(0x7f0000000780)={0x0}, 0x10, 0x0, 0x0)
compat_50_clock_gettime(0x3, &(0x7f00000007c0))


compat_50_mknod(&(0x7f00000003c0)='./file0\x00', 0x2000, 0xa71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
readv(r0, &(0x7f0000000340)=[{0x0}], 0x1)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
setsockopt(0xffffffffffffffff, 0x0, 0x0, &(0x7f0000000040)="ebffcbff13b9fd81", 0x8)
semget$private(0x0, 0x3, 0x0)
ioctl$FIONBIO(r0, 0x8006740a, &(0x7f0000000040)=0xfffffffe)


mknod(&(0x7f0000000080)='./bus\x00', 0x2080002002, 0x40004200000028ac)
r0 = open(&(0x7f0000000400)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x40047441, &(0x7f0000000040))


r0 = compat_30_socket(0x12, 0x2, 0x0)
ioctl$FIONSPACE(r0, 0x40046678, &(0x7f0000000040))


chdir(0x0)
mkdir(&(0x7f0000000140)='./file1\x00', 0x0)
rename(&(0x7f00000000c0)='./file1\x00', &(0x7f0000000080)='./file2\x00')
__posix_rename(&(0x7f0000000100)='./file2\x00', &(0x7f00000001c0)='./file1\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
getpgrp()


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
truncate(&(0x7f0000000240)='./file0\x00', 0x0, 0x800000006)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
truncate(&(0x7f0000000200)='./file0\x00', 0x0, 0x7ffffd000)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000001340)='coda\x00', &(0x7f0000001380)='./file0\x00', 0x0, 0x0, 0x0)


mknod(&(0x7f0000000040)='./bus\x00', 0x2000, 0x4301)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
r1 = dup(r0)
ioctl$FIOASYNC(r1, 0x8004667d, &(0x7f0000000100))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)


r0 = socket(0x11, 0x3, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x8, &(0x7f0000000040)=0x41c, 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(r1, 0x6, 0x5, &(0x7f0000000040), 0x4)


munmap(&(0x7f0000000000/0x3000)=nil, 0x3000)
r0 = socket$inet(0x2, 0x3, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x0, &(0x7f0000000000)="a6", 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x18, 0x1, 0x0)
setsockopt(r1, 0x29, 0xa, &(0x7f0000000040)='\x00\x00\x00\x00', 0x4)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000000400)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETSCROLL(r0, 0x802c6300, &(0x7f0000000080)={0x1})


r0 = socket$unix(0x1, 0x2, 0x0)
compat_43_orecvfrom(r0, 0x0, 0x0, 0x0, &(0x7f0000000100)="2ed4b0faab8631d8dcf31fad82e859e3cd1a279183a6885174a2d3884e5c36d9f65df1b380d3cc4415a77d73eef0ed53ee2e02c8bc41ba3a0caba49d02dd1cfda96fa18d1a94f2d0bc1ecdff7f128d1be1e63491d261a28e6dde8b41dc3975cc65c50474996986bdb5739ed4adb4edffab4cd31285f41041804bf0556889a03440c22193ab74b1fcb16ca647fcf56e9900bea5f4d84f26ffac03abfdfbd47ad5181b04d134edd6a498e7f2a67a5902240bcac566", 0xffffffffffffff1a)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e5"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000), 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
close(r2)
accept$unix(r1, 0x0, 0x0)
shutdown(r2, 0x2)


_lwp_setname(0x0, &(0x7f0000000000)='}}&--}:\x00')
_lwp_setname(0x0, &(0x7f0000000080)='}}&--}:\x00')


mkdir(&(0x7f0000000080)='.\x00', 0x0)


r0 = shmget$private(0x0, 0x4000, 0x0, &(0x7f0000ffc000/0x4000)=nil)
munmap(&(0x7f0000ff7000/0x9000)=nil, 0x9000)
shmat(r0, &(0x7f0000ffb000/0x4000)=nil, 0x0)
mprotect(&(0x7f0000ffb000/0x2000)=nil, 0x2000, 0xd2)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000001480)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
listen(r0, 0x0)


pipe2(&(0x7f0000000000)={<r0=>0xffffffffffffffff}, 0x0)
poll(&(0x7f00000000c0)=[{r0, 0xe3}], 0x1, 0xf48)
close(r0)


syz_usb_connect$uac1(0x0, 0x71, &(0x7f0000000000)={{0x12, 0x1, 0x310, 0x0, 0x0, 0x0, 0x8, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, &(0x7f00000002c0)={0x0, 0x0, 0xc, &(0x7f0000000140)={0x5, 0xf, 0xc, 0x1, [@ext_cap={0x7, 0x10, 0x2, 0xe}]}})


__clock_gettime50(0x60000001, &(0x7f0000000140))


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r1, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r2 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r3 = socket(0x1f, 0x5, 0x0)
sendmsg(r3, &(0x7f00000028c0)={0x0, 0x0, 0x0, 0x0, &(0x7f0000002700)=[{0x10}], 0x10}, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
fcntl$setstatus(r0, 0x6, 0x0)


shmget$private(0x0, 0x800000, 0x0, &(0x7f00007fd000/0x800000)=nil)
r0 = shmget$private(0x0, 0x2000, 0x0, &(0x7f0000ffb000/0x2000)=nil)
shmctl$SHM_UNLOCK(r0, 0x4)
shmat(r0, &(0x7f0000bc4000/0x4000)=nil, 0x3000)
r1 = semget$private(0x0, 0x2, 0x48)
semctl$SETALL(r1, 0x0, 0x9, &(0x7f0000000000)=[0x0, 0xc7, 0x4])
semctl$SETALL(r1, 0x0, 0x9, &(0x7f0000000040)=[0x2b7f, 0x3, 0x0, 0x8, 0x4, 0x0, 0x401, 0x1000])


mkdir(&(0x7f0000000480)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='fdesc\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000080)='kernfs\x00', &(0x7f00000000c0)='./file0\x00', 0x0, 0x0, 0x0)
unmount(&(0x7f0000000000)='./file0\x00', 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000000400)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETSCROLL(r0, 0xc0046303, &(0x7f0000000000))


r0 = socket$inet6(0x18, 0x3, 0x0)
__select50(0x40, &(0x7f00000000c0), 0x0, &(0x7f0000000140)={0x8}, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
__select50(0x40, &(0x7f0000000000), 0x0, 0x0, 0x0)
shutdown(r0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000535000/0x2000)=nil, 0x2000, 0x0, 0x1010, 0xffffffffffffffff, 0x0, 0x0)
compat_43_ommap(&(0x7f0000ffe000/0x1000)=nil, 0x1000, 0x0, 0x26f, 0xffffffffffffffff, 0x0)


r0 = socket(0x11, 0x3, 0x0)
recvfrom$unix(r0, 0x0, 0x0, 0x10043, 0x0, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x80104305, &(0x7f0000000100)=0x620000)


mkdir(&(0x7f0000000480)='./file0\x00', 0x0)
__mount50(&(0x7f0000000180)='kernfs\x00', &(0x7f0000000080)='./file0\x00', 0x0, 0x0, 0x0)
open(&(0x7f0000000040)='./file0\x00', 0x0, 0x1)
r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
unmount(&(0x7f0000000000)='./file0\x00', 0x0)


minherit(&(0x7f0000ffa000/0x4000)=nil, 0x4000, 0x3)


__clock_gettime50(0x0, &(0x7f0000000000))
__clock_getres50(0x0, &(0x7f0000000080))


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='null\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)="87", 0x1)


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x200000000000c, &(0x7f0000000240)="eaff125c00000000", 0x8)
setsockopt$inet_opts(r0, 0x0, 0x200000000000c, &(0x7f00000000c0)="eaff115c00000000", 0x8)
setsockopt$inet_opts(r0, 0x0, 0x200000000000d, &(0x7f0000000040)="eaff125c00000000", 0x8)


setpgid(0xffffffffffffffff, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000000c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
connect$unix(r0, &(0x7f0000000240)=@abs={0x0, 0x0, 0x1}, 0x8)


openat$dm(0xffffffffffffff9c, 0x0, 0x0, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000700)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)


r0 = open(&(0x7f0000000180)='.\x00', 0x0, 0x0)
symlinkat(&(0x7f00000002c0)='./file0/file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', r0, &(0x7f0000000500)='./file0\x00')
open(&(0x7f0000000000)='./file0/file0\x00', 0x0, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0xb002, 0x0)
r0 = open$dir(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
ktrace(&(0x7f0000000040)='./file0\x00', 0x0, 0x0, 0x0)
r1 = fcntl$dupfd(r0, 0x0, r0)
ioctl$WSDISPLAYIO_SVIDEO(r1, 0x40047477, &(0x7f0000000080))


posix_spawn(0x0, &(0x7f0000000400)='/dev/wscons\x00', 0x0, 0x0, 0x0, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80104307, &(0x7f0000000140))


compat_50_____semctl13$GETALL(0xffffffffffffffff, 0x0, 0x6, &(0x7f00000014c0)=@buf=&(0x7f0000000140)={{}, 0x0, 0xffffffffffffffff, 0x0, 0x0})
mkdir(&(0x7f0000000240)='./file0\x00', 0x0)
compat_40_mount(&(0x7f00000000c0)='puffs\x00', &(0x7f0000000100)='./file0/../file0\x00', 0x0, &(0x7f0000000140)="1e")


r0 = compat_30_socket(0x12, 0x2, 0x0)
getsockname$unix(r0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x8008722b, &(0x7f0000000100))
open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{}]}}]}}, 0x0)
ioctl$HDAUDIO_FGRP_SETCONFIG(0xffffffffffffffff, 0xc0106802, 0x0)


compat_43_osethostname(&(0x7f0000000000)="577141a782a7b1dadbc3ce04740b0243f774d1989ad91c83fff9877d7f7a8375900c8824e302d30f45d7d80ff026005f77fb8de219bfe2e3fe3e3d1b5f9d10682d78fbbda512b56273aa2bb3d278d1ef843a7534d6b36a1d758ed752f8aaf3f513b5d83909c6798acc3ffe2833ba21feff836ad4b380178fe4be1052aafcb061c574608c1a885486a8d93835a6b68cf8f9dcfc49953c49555a8da980fea2f998e1116e169c334851dcdbb91562987714e57abd1c19b764a361d8224dc6e496f7673572be43fa75f0988b120d75f852813d2d294455e48351dd36bfbd2828b0a2c9145c610df118eaffcb4e982f4fc671454fa3661ad1b04cad96ee3fd0d6f7c611", 0x101)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
compat_43_ogetdirentries(r0, 0x0, 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
open$dir(&(0x7f0000000000)='./file0/file0\x00', 0x200, 0x0)
open$dir(0x0, 0x0, 0x0)
renameat(0xffffffffffffffff, 0x0, 0xffffffffffffffff, 0x0)
r0 = open$dir(&(0x7f0000000340)='./file0\x00', 0x0, 0x0)
symlinkat(&(0x7f0000000300)='./file0\x00', r0, &(0x7f0000000380)='./file0/file0\x00')


r0 = socket$inet6(0x18, 0x3, 0x0)
recvfrom(r0, 0x0, 0x0, 0x1, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
semctl$SETALL(0x0, 0x0, 0x9, 0x0)


setuid(0xffffffffffffffff)
setgroups(0x0, 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
chmod(&(0x7f00000000c0)='./file0\x00', 0x4)
setuid(0xee01)
chdir(&(0x7f0000000040)='./file0\x00')
r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x1010, r0, 0x0, 0x0)
mkdir(&(0x7f0000000140)='./file1\x00', 0x0)


shmget$private(0x0, 0x6875debd000, 0x0, &(0x7f0000ffc000/0x1000)=nil)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
lchown(&(0x7f0000000380)='./file0\x00', 0xee00, 0xee00)
lchflags(&(0x7f0000000180)='./file0\x00', 0x0)


r0 = socket(0x2, 0x3, 0x6)
sendto(r0, 0x0, 0x0, 0x0, &(0x7f00000000c0), 0x80)


r0 = socket(0x18, 0x1, 0x0)
close(r0)
close(0xffffffffffffffff)
r1 = socket(0x800000018, 0x1, 0x0)
listen(r1, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)


getgroups(0x4000000000000170, &(0x7f0000001440))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
getpeername$unix(r0, &(0x7f0000000040)=@abs, &(0x7f00000000c0)=0x6e)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x3b03)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0xc0045103, &(0x7f0000000400))


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x80104307, &(0x7f0000000100))


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000340)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f00000011c0)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000000240)=[{0x0}], 0x1, 0x0)


syz_usb_connect(0x5, 0x24, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{}]}}]}}, 0x0)
pipe(0x0)
connect$unix(0xffffffffffffffff, &(0x7f0000000800)=@file={0x0, './file0\x00'}, 0xa)
open(&(0x7f0000000840)='./file1\x00', 0x0, 0x0)
open(&(0x7f0000000a40)='./file0/file0\x00', 0x0, 0x0)
openat$tprof(0xffffffffffffff9c, &(0x7f0000000b40), 0x200000, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
posix_spawn(0x0, 0x0, &(0x7f0000000100)={0x0, 0x1, &(0x7f0000000080)=@open={0x0, 0xffffffffffffff9c, {&(0x7f0000000040)='\x00'}}}, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket$unix(0x1, 0x2, 0x0)
bind$unix(r1, &(0x7f00000006c0)=@file={0x1, '\xe9\x1fq\x89Y\x1e\x923aK\x00'}, 0x6e)
connect$unix(r1, &(0x7f0000000080)=@file={0x1, '\xe9\x1fq\x89Y\x1e\x923aK\x00'}, 0x6e)


r0 = socket(0x18, 0x2, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x800, &(0x7f0000000080)=0x3, 0x4)
getsockopt$inet_opts(r0, 0x29, 0x32, 0x0, 0x0)


r0 = socket(0x400000000018, 0x3, 0x0)
poll(&(0x7f0000000140)=[{r0, 0x80}], 0x1, 0x8001)
r1 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x12, r0, 0x0, 0x0)


pipe2(&(0x7f0000000040)={<r0=>0xffffffffffffffff}, 0x1000004)
read(r0, &(0x7f0000000100)=""/21, 0x15)


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
pwrite(r0, 0x0, 0x2, 0x0)


r0 = open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
fcntl$lock(r0, 0x9318374ec63abe0a, 0x0)


semctl$GETALL(0xffffffffffffffff, 0x0, 0x6, &(0x7f0000000000))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff})
getsockname$unix(r0, &(0x7f0000001800)=@file={0x0, ""/4104}, &(0x7f0000000000)=0x100a)


acct(&(0x7f0000000000)='./file0\x00')
minherit(&(0x7f0000ffd000/0x2000)=nil, 0x2000, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$unix(0x1, 0x2, 0x0)
getsockopt$sock_timeval(r1, 0xffff, 0x1006, &(0x7f0000000100), &(0x7f0000000140)=0x10)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
execve(0x0, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
open$dir(&(0x7f00000000c0)='./file0\x00', 0x20, 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
chmod(&(0x7f0000000180)='./file0\x00', 0x23f)
setuid(0xee01)
mkdir(&(0x7f00000000c0)='./file0/file0\x00', 0xfffffffffffffffd)
chdir(&(0x7f0000000040)='./file0\x00')
mkdir(&(0x7f0000000140)='./file1\x00', 0x192)
r0 = semget$private(0x0, 0x4, 0x18b)
semctl$IPC_SET(r0, 0x0, 0x1, &(0x7f0000000480)={{0x20000008, 0x0, 0x0, 0x0, 0x0, 0x100010088, 0x207}, 0x6, 0x7, 0x3})
semctl$GETALL(r0, 0x0, 0x6, &(0x7f0000000040)=""/49)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000140)={<r1=>0xffffffffffffffff})
getsockopt$sock_cred(0xffffffffffffffff, 0xffff, 0x1022, &(0x7f0000000000)={0x0, 0x0, <r2=>0x0}, &(0x7f00000003c0)=0xc)
semctl$GETNCNT(r0, 0x0, 0x3, &(0x7f0000000300)=""/148)
r3 = geteuid()
semctl$IPC_SET(r0, 0x0, 0x1, &(0x7f0000000040)={{0x9, r3, 0xffffffffffffffff, 0x0, r2, 0x60}, 0x4, 0x400004, 0x8})
getuid()
connect$unix(r1, &(0x7f0000000240)=@abs={0x0, 0x0, 0x2}, 0x8)
open$dir(&(0x7f00000002c0)='./file0/../file0\x00', 0x10, 0xd0)


syz_usb_connect$uac1(0x0, 0x8d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x10, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x7b, 0x3, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{}, [@processing_unit={0x8, 0x24, 0x7, 0x0, 0x0, 0x0, 'h'}, @selector_unit={0x8, 0x24, 0x5, 0x0, 0x0, "4ec4e6"}, @input_terminal={0xc}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x6, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000001380)=[{0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}], 0x9, 0x0)


mlock(&(0x7f0000ffb000/0x2000)=nil, 0x2000)
r0 = shmget(0x2, 0x4000, 0x0, &(0x7f0000ffc000/0x4000)=nil)
munmap(&(0x7f0000ffb000/0x1000)=nil, 0x1000)
minherit(&(0x7f0000ffb000/0x4000)=nil, 0x4000, 0x0)
shmat(r0, &(0x7f0000ffb000/0x2000)=nil, 0x0)


r0 = __clone(0x0, 0x0)
compat_50_wait4(r0, 0x0, 0x8, &(0x7f0000000040))
ptrace(0x9, r0, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f00000011c0)='./file0\x00', 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000140)='./file0\x00', 0x1, 0x0)
ioctl$WSKBDIO_COMPLEXBELL(r0, 0x8004667e, &(0x7f0000000100))


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r1, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r3=>0xffffffffffffffff})
sendmsg(r3, &(0x7f0000000180)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000940)=ANY=[], 0x268}, 0x0)


compat_30_getfh(&(0x7f0000000100)='./file1\x00', 0x0)
compat_30_getfh(&(0x7f0000000180)='./file2\x00', 0x0)
open$dir(&(0x7f0000000380)='./file0\x00', 0x0, 0x0)
open$dir(&(0x7f0000000480)='./file2\x00', 0x200, 0x0)


r0 = socket$unix(0x1, 0x5, 0x0)
__fstat50(r0, &(0x7f0000001580))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa603)
open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x2000740d, 0x0)


r0 = socket$unix(0x1, 0x2, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x1000, 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
_lwp_wakeup(0x0)


open(&(0x7f0000000480)='./file0\x00', 0x80000000000206, 0xfffffffffffffffd)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x2, 0x0)
writev(r0, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
dup2(r0, r1)
writev(r1, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)


syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f00000000c0)={{0x12, 0x1, 0x300, 0x2, 0x0, 0x0, 0x10, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}}}]}}]}}, &(0x7f0000000540)={0x0, 0x0, 0x8, &(0x7f00000002c0)={0x5, 0xf, 0x8, 0x1, [@generic={0x3}]}})


mknod(&(0x7f00000001c0)='./bus\x00', 0x80002005, 0x5300)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000))
ioctl$FIOASYNC(r0, 0xc0185005, &(0x7f0000000000))


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r1, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r2 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
fdatasync(r1)
r3 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r3, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)


sendmsg$unix(0xffffffffffffffff, &(0x7f0000001600)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000000)=ANY=[@ANYBLOB="10000000ffff0000017e90e84f"], 0x10}, 0x0)
r0 = socket$inet6(0x18, 0x1, 0x0)
connect$inet6(r0, &(0x7f00000000c0)={0x18, 0x2}, 0x1c)
connect$inet6(r0, &(0x7f0000000000)={0x18, 0x1}, 0x1c)


open$dir(&(0x7f0000000480)='./file2\x00', 0x200, 0x0)
compat_30___stat13(&(0x7f00000000c0)='./file2\x00', &(0x7f0000000100))


open$dir(&(0x7f0000000000)='./file0\x00', 0x400004000011830a, 0x0)
lchflags(&(0x7f0000000080)='./file0\x00', 0x20000)
truncate(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_30_getdents(r0, &(0x7f0000001240)=""/4096, 0xffff)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x8380, 0x1850, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)


mprotect(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x5)
r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt$sock_timeval(r0, 0xffff, 0x1005, &(0x7f0000000040), &(0x7f0000000080)=0x10)


mlock(&(0x7f0000ffd000/0x1000)=nil, 0x1000)
mprotect(&(0x7f0000ff4000/0x2000)=nil, 0x2000, 0x0)
munlock(&(0x7f0000ffd000/0x1000)=nil, 0x1000)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580), 0x1, 0x0)
r1 = socket(0x2, 0x2, 0x0)
bind(r1, &(0x7f0000000080)=@data="d8abd0d4d668c20d57e8db712555", 0x10)


__clone(0x2800, 0x0)


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, 0xffffffffffffffff, 0x0, 0x0)
r0 = open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
fcntl$lock(r0, 0x9, &(0x7f00000000c0)={0x0, 0x0, 0x1, 0x100000001})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
fcntl$lock(r0, 0x9, &(0x7f0000000000))


setrlimit(0x4, &(0x7f0000000040)={0x0, 0x4})


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x41c5)
r0 = open(&(0x7f0000000240)='./file0\x00', 0x5, 0x0)
ioctl$OFIOGETBMAP(r0, 0x8004667c, &(0x7f0000000100))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x40047226, 0x0)


compat_30___lstat13(&(0x7f00000004c0)='./file0\x00', 0x0)
compat_30_getfh(&(0x7f0000000880)='./file0\x00', 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x3b00)
r0 = open$dir(&(0x7f0000000100)='./file0\x00', 0x2, 0x0)
r1 = dup(r0)
poll(&(0x7f0000000340)=[{r0, 0x4}, {r1, 0x1}], 0x2, 0x0)


socketpair(0x10, 0x2, 0x9, &(0x7f0000000640))


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xa31a)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0, 0x24}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x1010, 0xffffffffffffffff, 0x0, 0x0)
r1 = open$dir(&(0x7f0000001240)='./file0\x00', 0x4000040000000242, 0x0)
r2 = dup2(r1, r1)
writev(r2, &(0x7f00000004c0)=[{0x0}], 0x1)


open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
acct(&(0x7f0000000200)='./file0\x00')
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
link(&(0x7f0000000140)='./file0\x00', &(0x7f0000000180)='./file0\x00')


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x80044279, &(0x7f0000000080))


r0 = socket$unix(0x1, 0x1, 0x0)
sendto$unix(r0, &(0x7f0000000000)="ac", 0x1, 0x4041, 0x0, 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x3b00)
r0 = open$dir(&(0x7f0000000100)='./file0\x00', 0x2, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
dup2(r1, r0)


_lwp_create(&(0x7f0000000e80)={0x0, 0x0, {}, {}, {0x0, 0x0, 'X[\x00'}}, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x1f, 0x5, 0x0)
setsockopt$sock_int(r1, 0xffff, 0x1003, &(0x7f0000001580), 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setrlimit(0x0, &(0x7f00000000c0))
setrlimit(0x8, &(0x7f0000000580))


r0 = socket(0x18, 0x2, 0x0)
setsockopt(r0, 0x29, 0x16, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x2000745f, 0x0)


__setitimer50(0x0, &(0x7f0000000000), 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0, 0xff01}], 0x1, 0x0)
pread(r0, 0x0, 0x0, 0x0)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
openat(0xffffffffffffffff, 0x0, 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80047470, &(0x7f0000000000))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000080)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000000640)={0x0, 0x0, &(0x7f00000005c0)=[{&(0x7f0000000040)="1d3ae4a46a4268ded46396cf59a3ebef0895411b085c37b4749e7078b76c7d53fca569a69b3972d22e43c975e76015adc1ab0670d76ea13b24428f681e699733", 0x40}, {&(0x7f0000000300)="c6a51a7f79a6c303180e3e26e91826bf198cdcf5a8851934ba4d7eaf4f89cc4b9ca80dd5c26e338ff5fd84b968e208b092461958a61d42e588e0a832", 0x3c}, {&(0x7f0000000380)="8f16", 0x2}, {&(0x7f0000000500)="e350f2b4a202e6eaa16d4514b200da24881706", 0x13}], 0x4}, 0x0)
sendmsg$unix(r0, &(0x7f00000000c0)={0x0, 0x0, &(0x7f00000003c0)=[{&(0x7f0000000340)="05", 0x1}, {&(0x7f0000000100)="e61dbe7205", 0x5}, {&(0x7f0000000140)="bfc04444d312a06bea09e9068019dce745f7535bdca5d96b54a344b988c6e9ded4113aee699150242d2768cd6960a4fb8ef27fc4342e4f542a2cfe7bd7511559c338c9b385f154b47197274e917fd6b5449c4b22b80edf10341dea3c83ff16be9d9c9c27018e63a8e494ed7861ebf0ac99f0971d2fb1086e51f2b4", 0x7b}, {&(0x7f00000001c0)="edbf1c10aa1ebd8bdb60f56799954bc0", 0x10}], 0x4}, 0x0)


r0 = socket(0x18, 0x1, 0x0)
r1 = socket(0x18, 0x2, 0x0)
setsockopt(r1, 0x1000000000029, 0x4, &(0x7f0000000000)="06000000", 0x4)
dup2(r1, r0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)


r0 = socket$inet6(0x18, 0x2, 0x0)
sendto$inet6(r0, &(0x7f0000000080)="02", 0x358, 0x0, &(0x7f0000000000)={0x18, 0x0, 0x0, 0x20080fe}, 0x1c)


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
modctl$MODCTL_LOAD(0x0, &(0x7f0000001400)={0x0, 0x0, 0x0})


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80047466, &(0x7f0000000000))


madvise(&(0x7f0000ffd000/0x2000)=nil, 0x2000, 0x0)
mlock(&(0x7f0000ffc000/0x3000)=nil, 0x3000)
munlock(&(0x7f0000ffc000/0x2000)=nil, 0x2000)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x8004745c, &(0x7f0000000040))


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='umap\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f0000000180)='4', 0x1)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000200)={<r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getsockopt$sock_cred(r0, 0xffff, 0x1005, 0x0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2080002002, 0x40004200000028ac)
r0 = open(&(0x7f0000000400)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x4004667b, &(0x7f0000000000))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
r1 = socket(0x18, 0x3, 0x0)
setsockopt(r1, 0x29, 0x17, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
recvfrom$unix(r0, 0x0, 0x1000000, 0x0, 0x0, 0x0)
setuid(0x0)
fork()
sendto(r1, 0x0, 0x0, 0x0, 0x0, 0x0)


mlock(&(0x7f0000ffa000/0x5000)=nil, 0x5000)
mprotect(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0x0)
munlock(&(0x7f0000400000/0xc00000)=nil, 0xc00000)


munmap(&(0x7f0000ffc000/0x4000)=nil, 0x4000)
munmap(&(0x7f0000ffc000/0x4000)=nil, 0x4000)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000140)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0x40045265, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r0, 0x20005701, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x6143)
open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)


fcntl$getown(0xffffffffffffffff, 0x5)
fchmod(0xffffffffffffffff, 0x0)
socket$unix(0x1, 0x0, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f00000003c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x9, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9, 0x21, 0x0, 0x7}}}]}}]}}, 0x0)
recvmmsg(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f00000003c0)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000140)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


compat_50_____semctl13$IPC_STAT(0x0, 0x0, 0x2, &(0x7f00000011c0)=@buf=0x0)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='kernfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
r1 = socket$unix(0x1, 0x5, 0x0)
dup2(r1, r0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f0000000140)='./file0\x00', 0x0)
mkdirat(r0, &(0x7f00000001c0)='./file1\x00', 0x0)
r1 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
r2 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
linkat(r2, &(0x7f00000000c0)='./file1/file0\x00', 0xffffffffffffffff, 0x0, 0x0)
r3 = open$dir(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
renameat(r1, &(0x7f00000002c0)='./file1\x00', r3, &(0x7f0000000400)='./file0/file0\x00')


open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
setuid(0xee01)
__posix_rename(&(0x7f0000000000)='./file0\x00', &(0x7f0000000040)='./file0\x00')


r0 = socket$inet(0x2, 0x3, 0x8)
setsockopt$inet_opts(r0, 0x0, 0x2, &(0x7f0000000000)="430304d7", 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000001200)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
setreuid(0xee00, 0x0)


r0 = socket$inet6(0x18, 0x1, 0x0)
poll(&(0x7f0000000040)=[{r0, 0x40}], 0x1, 0x0)
r1 = dup2(r0, r0)
shutdown(r0, 0x0)
poll(&(0x7f0000000000)=[{r1, 0x94e46385108de53d}], 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_clock_settime(0x2, &(0x7f0000000100))


r0 = socket$inet(0x2, 0x3, 0x0)
sendto$inet(r0, &(0x7f00000009c0)="06", 0x2000, 0x4, &(0x7f0000000a80)={0x2, 0x0}, 0x10)


unmount(0x0, 0x0)


compat_90_statvfs1(&(0x7f0000000440)='./file1\x00', 0x0, 0x0)
symlink(&(0x7f0000000d80)='./file0/file0\x00', &(0x7f0000000dc0)='./file0\x00')


pipe(&(0x7f0000000180)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r3 = socket(0x18, 0x1, 0x0)
setsockopt(r3, 0x29, 0x800000000000009, 0x0, 0x0)


pipe2(&(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x1000004)
write(r1, &(0x7f0000000000)=',', 0x78c5b5dd454f79f1)
r2 = socket(0x2, 0x2, 0x0)
r3 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r3, 0x0, 0x0)
preadv(r3, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
dup2(r2, r0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000140)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(0xffffffffffffffff, &(0x7f0000000080)={0x0, 0x0, 0x0, 0x44, &(0x7f0000000000)=ANY=[@ANYBLOB="89000000ffff000001"], 0x9}, 0x0)
sendto$unix(r0, 0x0, 0x0, 0x0, 0x0, 0x0)
sendmsg(r0, &(0x7f0000000380)={0x0, 0x32c, 0x0, 0x0, &(0x7f0000000000), 0x90}, 0x0)
dup2(r0, r1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400))
r1 = getpid()
fktrace(0xffffffffffffffff, 0x0, 0x4, r1)
fcntl$lock(r0, 0x7, &(0x7f0000000000))


mincore(&(0x7f0000ffc000/0x4000)=nil, 0x4000, 0xfffffffffffffffe)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000000)='.\x00', 0x0, 0x0)
__fstat50(r0, &(0x7f0000000240)={<r1=>0x0})
mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, r1)
r2 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r3 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r3, 0x0, 0x0)
preadv(r3, &(0x7f0000001580)=[{0x0, 0x24}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x1010, 0xffffffffffffffff, 0x0, 0x0)
preadv(r2, &(0x7f00000004c0)=[{&(0x7f00000000c0)=""/109, 0x6d}], 0x1, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff})
recvmsg(r0, 0xffffffffffffffff, 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x3b00)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x20005101, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
rasctl(0x0, 0x0, 0x2)


r0 = __clone(0x0, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)
compat_50_wait4(0xffffffffffffffff, 0x0, 0x0, 0x0)
ptrace(0xe, r0, &(0x7f0000000080), 0x0)


syz_emit_ethernet(0xda, &(0x7f0000000600))


fcntl$lock(0xffffffffffffffff, 0x7, 0x0)
r0 = syz_usb_connect$hid(0x2, 0x3f, &(0x7f0000000040)={{0x12, 0x1, 0x201, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0x2, 0x40, 0x20, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x3, 0x1, 0x0, 0x0, {0x9, 0x21, 0x1f, 0x0, 0x1, {0x22, 0xac7}}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x1, 0x90, 0x4}}, [{{0x9, 0x5, 0x2, 0x3, 0x0, 0xc, 0x0, 0x81}}]}}}]}}]}}, &(0x7f0000000280)={0xa, &(0x7f0000000080)={0xa, 0x6, 0x0, 0xfd, 0x0, 0x1f, 0x40}, 0x5, &(0x7f00000000c0)={0x5, 0xf, 0x5}, 0x2, [{0x0, 0x0}, {0x0, 0x0}]})
ioctl$FIOASYNC(r0, 0x8004667d, 0x0)
r1 = compat_30_fhopen(&(0x7f00000006c0)={{[0x1]}, {0x5, 0x0, "983128943bed8f4df3bba2968af18a2c"}}, 0x8)
preadv(r1, 0x0, 0x0, 0x2)
flock(0xffffffffffffffff, 0x4)
fork()
shmget$private(0x0, 0x4000, 0x400, &(0x7f0000ffc000/0x4000)=nil)


mknod(&(0x7f0000000080)='./bus\x00', 0x2080002002, 0x40004200000028ac)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffff9c, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIONBIO(r0, 0x8004667d, &(0x7f0000000000))


pipe2(&(0x7f0000000080)={<r0=>0xffffffffffffffff}, 0x0)
readv(r0, &(0x7f0000000380)=[{0x0}, {0x0}], 0x2)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSMOUSEIO_SETREPEAT(r0, 0x80185728, &(0x7f0000000000))


__nanosleep50(&(0x7f0000000140), &(0x7f0000000180))


r0 = compat_30_socket(0x22, 0x10000003, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
close(r0)


mknod(&(0x7f0000000000)='./bus\x00', 0x800080002002, 0x3d00)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
close(r0)


mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0)
posix_spawn(0xffffffffffffffff, 0x0, &(0x7f0000001500)={0x0, 0x0, 0x0}, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0xc020447f, &(0x7f0000000000))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000001c0)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000000640)={0x0, 0x0, 0x0}, 0x0)
recvfrom$unix(r1, 0x0, 0x0, 0x0, 0x0, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x7, r0, &(0x7f00000001c0), 0x6)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
semop(0x0, 0x0, 0x0)
semop(0x0, 0xffffffffffffffff, 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = compat_30_socket(0x1f, 0x10000005, 0x2)
getsockopt(r1, 0x2, 0x3, 0x0, 0x0)
open$dir(0x0, 0x0, 0x0)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f00000000c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


mknod(&(0x7f0000001140)='./file0\x00', 0x0, 0x0)


socketpair(0x0, 0x0, 0x0, &(0x7f00000000c0))


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
compat_40_mount(&(0x7f0000000100)='overlay\x00', &(0x7f0000000200)='./file0\x00', 0x0, &(0x7f0000000240))
fchownat(0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0xffffffffffffffff, 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f0000000000)='./file1\x00', 0x0)
faccessat(r0, &(0x7f0000000100)='./file1\x00', 0x2, 0x0)


r0 = socket$inet6(0x18, 0x1, 0x0)
r1 = dup2(r0, r0)
poll(&(0x7f0000000040)=[{r1, 0x84}], 0x1, 0x48a)
close(r0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000300)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
bind$unix(r0, &(0x7f0000000100)=@abs={0x1}, 0x6e)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x40046678, &(0x7f0000000040))


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(0xffffffffffffffff, 0x0, 0x0, &(0x7f0000000000)="7ef37faeebfa5c257d9aa250d260d58d4d03747caa149531d4ad2da0607a590fbf2dd4ec0e33448de52814cdc2ce22fde16e67dcad132b8a10d951fe03fb2fc3c4b3495e", 0x44)
setsockopt$inet_opts(r0, 0x6, 0x6, &(0x7f0000000040), 0x4)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x0, 0xa, &(0x7f0000000000)="03000000", 0x4)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x3b00)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x20005403, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x31, 0x0, 0x0)
getsockopt(r0, 0x29, 0x31, 0x0, 0x0)


compat_50_setitimer(0x0, &(0x7f0000000100)={{0x0, 0xfffffffffffffff9}}, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
sendto$inet6(r0, &(0x7f0000000080)=':', 0x2000, 0x0, &(0x7f0000000000)={0x18, 0x0}, 0x1c)


openat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x200, 0x0)
r0 = getpid()
ktrace(&(0x7f0000000000)='./file0\x00', 0x0, 0x153a, r0)
r1 = socket(0x10000000002, 0x2, 0x0)
recvmsg(r1, &(0x7f0000000080)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)


paccept(0xffffffffffffffff, 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000001c0)={<r0=>0xffffffffffffffff})
getsockopt$sock_cred(r0, 0xffff, 0x1022, &(0x7f0000000080)={0x0, 0x0, <r1=>0x0}, &(0x7f00000000c0)=0xc)
setregid(r1, 0xffffffffffffffff)
setregid(0xffffffffffffffff, r1)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x26, r0, &(0x7f0000000080), 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
open$dir(&(0x7f0000000000)='./file0\x00', 0x110602, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0, 0x24}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x1010, 0xffffffffffffffff, 0x0, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x5)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xa31a)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
pwritev(r0, &(0x7f00000003c0), 0x273, 0x0)


symlink(&(0x7f0000000080)='.\x00', &(0x7f00000000c0)='./file1\x00')
r0 = open$dir(&(0x7f0000000000)='./file1\x00', 0x0, 0x0)
linkat(r0, &(0x7f0000002440)='./file1\x00', r0, &(0x7f0000002480)='./file0\x00', 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
_ksem_init(0x0, &(0x7f0000000140))


mkdir(&(0x7f0000000040)='./file0\x00', 0x5b)
mkdir(&(0x7f0000000080)='./file0/file0\x00', 0x0)
setreuid(0xee00, 0x0)
r0 = getuid()
chown(&(0x7f0000000000)='./file0\x00', r0, 0x0)
r1 = getuid()
lchown(&(0x7f0000000040)='.\x00', r1, 0x0)
setuid(r1)
rename(&(0x7f0000000180)='./file0\x00', &(0x7f0000000240)='./file0/file0\x00')


r0 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
r3 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r3, 0x0, 0x0)
preadv(r3, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(r1, 0x0, 0x4, r2)
read(r0, 0x0, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
open(&(0x7f0000000040)='./bus\x00', 0x80000, 0x38)
ioctl$OFIOGETBMAP(r0, 0xc0104306, &(0x7f0000000140)=0xfcffffff)
r1 = socket$inet6(0x18, 0x3, 0x0)
sendto$inet6(r1, &(0x7f0000000080)=')', 0x2000, 0x0, &(0x7f0000000000)={0x18, 0x0, 0x0, 0x20080fe}, 0x1c)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
r4 = msgget$private(0x0, 0x2)
msgsnd(r4, &(0x7f00000006c0)=ANY=[@ANYBLOB="00e100000000000095d2f332de84eb6e161d390811e4d09d73a8cb658af7f3b728863e79f6af43b26cac8093f76a70490cc3a6d6eb1d8eccc70dc1f45ec2cf4adc4a9976bab26944e9ce9e441c1c3b83152678575300dcd494f235d431daaf23e7c78aa3ed2c9e5e9d5edcd8fc685f3d6739683c8a9c74a48e3d339a1f6ee8947afb3390499f083434c3de124b0bd35194c30a41bb9dfc7f7838419eff84c3621e6099080d48b82efc569ac7eba639f9d540df088ac865a0b72f77cdb69820244bb333339b37a9a49713ca33add858558f67ca69ce51cfc7178c3eddc470a2f22a0b36a7f779c17d4d61a37514802c6612f71f3564558c3f21d6a7"], 0x70, 0x800)


socket$unix(0x1, 0x0, 0x0)
r0 = socket$unix(0x1, 0x2, 0x0)
fcntl$getown(r0, 0x5)


bind$inet6(0xffffffffffffffff, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000340)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
open(&(0x7f00000002c0)='./file0\x00', 0x20, 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
chroot(&(0x7f0000000040)='./file0\x00')
__getvfsstat90(0x0, 0x0, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x31, 0x0, 0x0)
getsockopt(r0, 0x29, 0x32, 0x0, 0x0)


pipe2(&(0x7f0000000040), 0x0)
r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x1a, &(0x7f0000000040), 0x4)


ktrace(0x0, 0x1, 0x8, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_BELL(r0, 0x20005701)


preadv(0xffffffffffffffff, &(0x7f0000000440)=[{&(0x7f0000000080)=""/120, 0x78}, {&(0x7f0000001640)=""/4087, 0xff7}], 0x2, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
pwritev(r0, &(0x7f00000003c0), 0x273, 0x0)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x2, 0x10, r0, 0x0, 0x0)
mlock(&(0x7f0000000000/0x2000)=nil, 0x2000)
pwritev(r0, &(0x7f0000000580)=[{&(0x7f0000000200)='<', 0x1}], 0x1, 0x0)
munmap(&(0x7f0000000000/0x4000)=nil, 0x4000)


syz_usb_connect$cdc_ncm(0x0, 0x7b, &(0x7f0000000000)=ANY=[@ANYBLOB="12010001020000082505a1a440000102030109026900020189b4040904000001020d00000b"], &(0x7f00000005c0)={0x0, 0x0, 0x0, 0x0, 0x2, [{0x2, &(0x7f0000000100)=@string={0x2}}, {0x0, 0x0}]})


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x2000a71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x8014447e, &(0x7f00000000c0))


pipe2(0x0, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000240)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


pipe2(&(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
close(r1)
r2 = fcntl$dupfd(r0, 0x0, r0)
read(r2, &(0x7f0000000080)=""/204, 0xcc)


r0 = socket$inet6(0x18, 0x3, 0x0)
sendto$inet6(r0, &(0x7f0000000080)="87", 0x2000, 0x0, &(0x7f0000000000)={0x18, 0x0}, 0x1c)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000340)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
lchflags(&(0x7f0000000000)='./file0\x00', 0x3)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = dup(r0)
utimensat(r1, 0x0, &(0x7f00000002c0), 0x0)


r0 = _lwp_self()
r1 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
_lwp_kill(r0, 0x13)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000140)='umap\x00', &(0x7f0000000100)='./file0/../file0\x00', 0x0, 0x0, 0x0)


r0 = socket(0x18, 0x400000002, 0x0)
setsockopt(r0, 0x1000000029, 0x19, &(0x7f0000000000)="10000000290000003e0000003048e699", 0x10)


mkdirat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0)
r0 = open(&(0x7f0000000380)='./file0\x00', 0x0, 0x0)
__getdents30(r0, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
getsockopt(r0, 0x0, 0x4, 0x0, 0x0)


r0 = socket(0x2, 0x2, 0x0)
compat_43_fstat43(r0, &(0x7f0000000000))


r0 = socket(0x2, 0x1, 0x0)
r1 = dup(r0)
listen(r1, 0x0)
ioctl$FIONBIO(r1, 0x8004667e, &(0x7f0000000040)=0x2)
accept$unix(r1, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000480)={<r0=>0xffffffffffffffff})
sendto$unix(r0, &(0x7f00000004c0)="3488cd3a8396d75b2f576a6356938283a941f6170cf986ba18843ad75f9231a3d97d0543cc684f3724fc083c11a42a0b637e6de1ad533e00ff1dc1a1030c7d4da35da8276257c645f61c619184cb1b5f4520805841c0ab8585cce7fde65e167993bc34dda2919a6053b32ea13c27f1d727009a58c1678d0ffa95f907328772767e5f01dd55485fa1392d40614d3c10174046701017a85f7c4cb579b8f71aa861b17712e44b6c4fe4c1b05713f9213bbe43b7a25bb99200bb3e645dc5393d024f7dc28a99290c9a8b4367a6ea393fc33672c54bf9572fe26fc4891f239dbf57f9e20b7766490fd6c059c647acac6c604c7f632e7f0fb9a3318a5927e2e5e17ff4b4e3c9854e5415ec905e24581b3e32ac5604692d8dc3efe77fa401e8c9144780721ff3b0039e4113f9412202cce104221258330df009a80d4df2ae6434ae9054b3eb80c189f929d1dc9712eac523ef56513545aafbab689431b0d83cac279249da4b92c73650e4a5c0c0221dc43a0be945", 0x171, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0x0)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = dup3(r0, r0, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOSETOWN(r1, 0xc0184e66, &(0x7f0000000100))


r0 = open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
writev(r0, &(0x7f00000000c0)=[{&(0x7f0000000140)='M', 0x1}], 0x1)
compat_43_olseek(r0, 0x8, 0x1)


semctl$IPC_SET(0xffffffffffffffff, 0x0, 0x1, &(0x7f0000000080)={{0x0, 0x0, 0x0, 0x0, 0x0, 0x1c0}})
r0 = socket(0x18, 0x3, 0x0)
bind$unix(r0, &(0x7f0000000080)=@abs={0x1f95d27d48731892, 0x7}, 0x1c)


r0 = fork()
r1 = fork()
setpgid(r0, r1)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
pathconf(&(0x7f0000000000)='./file0\x00', 0x2)


syz_usb_connect$printer(0x0, 0x0, 0x0, &(0x7f0000000600)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x1d, &(0x7f00000001c0)=@string={0x1d, 0x3, "b1410f7229ea46b0720b0deec69b5d3915701082a5a527ca7063bc"}}]})
mkdir(&(0x7f0000000240)='./file0\x00', 0x0)
compat_40_mount(&(0x7f00000000c0)='puffs\x00', &(0x7f0000000100)='./file0/../file0\x00', 0x0, &(0x7f0000000140)="1e")


mknod(&(0x7f0000000100)='./file0\x00', 0x1ffb, 0x0)
open(&(0x7f0000000000)='./file0\x00', 0xc02, 0x0)
openat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x1a, 0x0)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
__mount50(&(0x7f0000000080)='kernfs\x00', &(0x7f0000000100)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000001200)=[{0x0}], 0x1, 0x0)


r0 = fcntl$dupfd(0xffffffffffffff9c, 0xb, 0xffffffffffffffff)
compat_43_fstat43(r0, 0x0)


pipe2(&(0x7f0000001780), 0x1400000)


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
pwrite(r0, &(0x7f00000000c0)="91", 0x1, 0x0)


open$dir(&(0x7f0000001380)='./file0\x00', 0x200, 0x0)
open$dir(&(0x7f0000001380)='./file0\x00', 0x10b80, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x80046366, &(0x7f0000000180))


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xb200)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f0000000040))


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="62020207e0000001"], 0x1)
r0 = socket(0x2, 0x3, 0x0)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
sendto$unix(r0, 0x0, 0x0, 0x0, 0x0, 0x0)


fcntl$getown(0xffffffffffffffff, 0x5)


openat$wscons(0xffffffffffffff9c, &(0x7f0000000d40), 0x200, 0x0)


____semctl50$IPC_STAT(0x0, 0x0, 0x2, &(0x7f00000009c0))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x0)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
poll(&(0x7f00000000c0)=[{r0}, {r0}], 0x2, 0x49)


syz_usb_connect$uac1(0x0, 0x71, &(0x7f0000000000)={{0x12, 0x1, 0x201, 0x0, 0x0, 0x0, 0x20, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x3f, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, &(0x7f0000000240)={0x0, 0x0, 0x10, &(0x7f0000000100)={0x5, 0xf, 0x10, 0x1, [@wireless={0xb}]}})


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x1207)
open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)


r0 = _lwp_self()
clock_nanosleep(0x40000000, 0x0, &(0x7f0000000440)={0x8}, 0x0)
_lwp_wakeup(r0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000700)={<r0=>0xffffffffffffffff})
fcntl$setstatus(r0, 0x4, 0x4c)
read(r0, &(0x7f0000000040)=""/194, 0xc2)


getpeername$unix(0xffffffffffffff9c, &(0x7f0000000000)=@abs, 0x0)
lchflags(&(0x7f00000000c0)='./file0\x00', 0x0)
socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000480))
pipe(0x0)
syz_usb_connect$hid(0x4, 0x3f, &(0x7f0000000640)={{0x12, 0x1, 0xc10fc2bf7092e8dd, 0x0, 0x0, 0x0, 0xff, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0x0, 0x10, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x4, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x749}}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0xb1}}, [{{0x9, 0x5, 0x2, 0x3, 0x3ff, 0x1f, 0xb2}}]}}}]}}]}}, 0x0)


socketpair(0x10, 0x0, 0x0, &(0x7f0000000340))


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50___msgctl13$IPC_STAT(0x0, 0x2, 0x0)


r0 = socket(0x18, 0x1, 0x0)
getsockopt$sock_int(r0, 0xffff, 0x1004, 0x0, 0x0)


openat$hdaudio(0xffffffffffffff9c, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbd63)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = socket(0x18, 0x3, 0x0)
connect$unix(r2, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x13}, 0x1c)


r0 = socket(0x1f, 0x3, 0x0)
sendto$inet(r0, 0x0, 0x0, 0x805, &(0x7f0000000180)={0x2, 0x2}, 0xc)


compat_50___shmctl13$IPC_SET(0x0, 0x1, 0xffffffffffffffff)


compat_50_____semctl13$GETVAL(0x0, 0x0, 0x5)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x2000427e, 0x0)
ioctl$FIOASYNC(r0, 0x4010427b, 0x0)


pipe(&(0x7f0000000340)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
readv(r0, &(0x7f0000000240)=[{&(0x7f0000000140)=""/120, 0x78}], 0x1)
r2 = open(&(0x7f0000000180)='./file0\x00', 0x60e, 0x0)
pwrite(r2, &(0x7f00000003c0)='\x00', 0x1, 0x0)
mmap(&(0x7f0000000000/0x13000)=nil, 0x13000, 0x5, 0x10, r2, 0x0, 0x0)
write(r2, &(0x7f0000000000)='*', 0x1)
execve(0x0, 0x0, 0x0)
mprotect(&(0x7f0000001000/0x3000)=nil, 0x3000, 0x0)
write(r1, &(0x7f0000000480)='p', 0x1)


ioctl$WSMUXIO_REMOVE_DEVICE(0xffffffffffffffff, 0x80085762, &(0x7f0000000040)={0x1})
r0 = socket(0x400000000018, 0x3, 0x3a)
setsockopt(r0, 0x29, 0x6c, &(0x7f0000000040), 0x4)
setsockopt$inet6_MRT6_ADD_MIF(r0, 0x29, 0x67, &(0x7f00000000c0), 0xc)


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x29, 0x39, 0x0, 0x0)


setrlimit(0x3b43e10430392dd5, &(0x7f0000000000))


r0 = socket$inet(0x2, 0x1, 0x0)
shutdown(r0, 0x1)
recvmsg(r0, &(0x7f00000032c0)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x1)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
fcntl$setown(r0, 0x6, 0x0)


minherit(&(0x7f0000ff9000/0x4000)=nil, 0x4000, 0x0)
mincore(&(0x7f0000ffe000/0x1000)=nil, 0x1000, 0xffffffffffffffff)


syz_usb_connect$uac1(0x0, 0x76, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x10, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x64, 0x3, 0x1, 0x2, 0x60, 0x3, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{}, [@mixer_unit={0x5}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7, 0x25, 0x1, 0x0, 0x0, 0x5}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x5, 0x0, 0x0, {0x7, 0x25, 0x1, 0x0, 0x0, 0x1}}}}}}}]}}, 0x0)
syz_usb_connect$uac1(0x3, 0x96, &(0x7f0000000540)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x84, 0x3, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{}, [@selector_unit={0x5}, @selector_unit={0x5}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@format_type_i_continuous={0xb, 0x24, 0x2, 0x1, 0x80, 0x0, 0x7f, 0x0, "", "6e32c1"}]}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7, 0x25, 0x1, 0x3, 0x0, 0x1}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@as_header={0x7, 0x24, 0x1, 0x81}, @format_type_ii_discrete={0x9, 0x24, 0x2, 0x2, 0x6, 0x54}]}, {{0x9, 0x5, 0x82, 0x9, 0x3ff, 0x30, 0x0, 0x8, {0x7}}}}}}}]}}, &(0x7f0000000740)={0xa, &(0x7f0000000640)={0xa, 0x6, 0x200, 0x1, 0x20, 0x4, 0x0, 0x40}, 0x5, &(0x7f0000000680)={0x5, 0xf, 0x5}, 0x2, [{0x0, 0x0}, {0x0, 0x0}]})
syz_usb_connect$uac1(0x2, 0x96, &(0x7f0000000780)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8f, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x84, 0x3, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{}, [@output_terminal={0x9, 0x24, 0x3, 0x3, 0x3fb, 0x5, 0x0, 0x5}, @feature_unit={0x9, 0x24, 0x6, 0x6, 0x0, 0x1, [0x0]}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@format_type_ii_discrete={0xb, 0x24, 0x2, 0x2, 0x0, 0xb2, 0x0, "be95"}, @format_type_i_discrete={0x8, 0x24, 0x2, 0x1, 0x0, 0x0, 0x0, 0x8}]}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x6, 0x1, 0x0, {0x7, 0x25, 0x1, 0x81, 0x40, 0x101}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, &(0x7f00000009c0)={0xa, &(0x7f0000000880)={0xa, 0x6, 0x0, 0x9, 0x0, 0x0, 0x8}, 0x0, 0x0})


r0 = socket(0x18, 0x2, 0x0)
sendmsg$unix(r0, &(0x7f0000000280)={&(0x7f00000002c0)=@file={0x0, './file0\x00'}, 0xa, 0x0, 0x0, &(0x7f0000000200)=[@rights={0x10}, @rights={0x10}], 0x20}, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x4004667f, &(0x7f0000000140))


r0 = socket$unix(0x1, 0x5, 0x0)
bind$unix(r0, &(0x7f0000001fc0)=@file={0x1, './file0\x00'}, 0x6e)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xd78)
open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)


minherit(&(0x7f0000ffc000/0x2000)=nil, 0x2000, 0x0)
minherit(&(0x7f0000ffa000/0x3000)=nil, 0x3000, 0x0)
munmap(&(0x7f0000ffb000/0x4000)=nil, 0x4000)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="70023d020b"], 0x1)
r0 = socket(0x2, 0x2, 0x0)
r1 = socket$inet(0x2, 0x4000000000000001, 0x0)
setsockopt$inet_opts(r1, 0x0, 0x1, &(0x7f00000002c0)="ee08665d19ac14d5e51348771197a7728420aef61715f7b1c3d4b3830c921bf0817a0000000000006a89dbdf", 0x2c)
accept(0xffffffffffffffff, 0x0, 0x0)
sendto$unix(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0, 0x0)
dup2(r1, r0)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
shutdown(r1, 0x1)


_ksem_init(0x0, &(0x7f00000004c0)=<r0=>0x0)
_ksem_trywait(r0)


msgget(0x2, 0x0)


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
__mount50(&(0x7f0000000200)='fdesc\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_30_getdents(r0, 0x0, 0x24)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0x20007479, 0x0)


r0 = msgget$private(0x0, 0xfffffffffffffffd)
msgsnd(r0, &(0x7f0000001200)=ANY=[@ANYRESOCT, @ANYRES64, @ANYRES16, @ANYRES32, @ANYRES64], 0x1, 0x7fc)
msgrcv(r0, &(0x7f0000000240)={0x0, ""/252}, 0x104, 0x0, 0x2000)


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x29, 0x18, 0x0, 0x0)


open$dir(&(0x7f0000000140)='./file0\x00', 0x200, 0x0)
r0 = getpgid(0x0)
ktrace(&(0x7f0000000040)='./file0\x00', 0x4, 0xa, 0x0)
ktrace(&(0x7f0000000280)='./file0\x00', 0x4, 0xb00, r0)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000240)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x1203)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0xc004667a, &(0x7f0000000000))


r0 = socket$unix(0x1, 0x1, 0x0)
r1 = dup(r0)
setsockopt$inet_opts(r1, 0x0, 0x1, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000140)={<r0=>0xffffffffffffffff})
sendmsg$unix(0xffffffffffffffff, &(0x7f0000000080)={0x0, 0x0, 0x0, 0x44, &(0x7f0000000000)=ANY=[@ANYBLOB="89000000ffff000001"], 0x9}, 0x0)
fstatat(0xffffffffffffffff, &(0x7f0000000040)='./file0\x00', 0x0, 0x0)
sendmsg(r0, &(0x7f0000000380)={0x0, 0x32c, 0x0, 0x0, &(0x7f0000000000), 0x90}, 0x0)


setreuid(0x0, 0xee00)
mknodat(0xffffffffffffff9c, &(0x7f0000006dc0)='./file0\x00', 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f0000000000))


r0 = socket$inet(0x2, 0x3, 0x0)
getsockopt$inet_opts(r0, 0x0, 0x18, 0x0, 0x0)


r0 = socket$unix(0x10, 0x2, 0x0)
compat_30___fstat13(r0, &(0x7f0000000080))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
open$dir(&(0x7f0000000300)='./file0\x00', 0x200, 0x0)
fchownat(0xffffffffffffff9c, &(0x7f0000000380)='./file0\x00', 0x0, 0x0, 0x0)


socket$inet6(0x18, 0x3, 0x3c)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x4010427b, &(0x7f00000000c0))


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x200000100000010, r0, &(0x7f0000000100), 0x4)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000000000)=[{&(0x7f0000001240)=""/4096, 0x1000}], 0x1, 0x0)


symlinkat(&(0x7f0000000000)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00')
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, &(0x7f0000000200)={0x0, 0x0, 0x0, 0x0})
mkdirat(0xffffffffffffffff, &(0x7f0000000240)='./file0\x00', 0x0)
__stat50(&(0x7f0000000340)='./file0\x00', 0x0)


r0 = socket(0x400000000018, 0x3, 0x0)
open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
r1 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
ioctl$FIOGETBMAP(r0, 0xc0106914, &(0x7f0000000040))


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0xc1c0526b, &(0x7f0000000040)={0xfffffffc, 0xa})


r0 = open(&(0x7f0000000480)='./file0\x00', 0x80000000000206, 0x0)
ioctl$FIOGETBMAP(r0, 0xc008667a, &(0x7f0000000000)=0xfffffffffffffff7)


setrlimit(0x1, &(0x7f00000000c0)={0x9, 0xffffffffffffffff})
sync()


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)="b9", 0x1)
pathconf(&(0x7f00000001c0)='./file0\x00', 0x0)


compat_50_getrusage(0xffffffffffffffff, &(0x7f0000000100))


r0 = fcntl$dupfd(0xffffffffffffff9c, 0xb, 0xffffffffffffffff)
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f00000002c0))


mlock(&(0x7f0000ffb000/0x4000)=nil, 0x4000)
munlock(&(0x7f0000ffb000/0x3000)=nil, 0x3000)
mprotect(&(0x7f0000ffa000/0x2000)=nil, 0x2000, 0x0)
madvise(&(0x7f0000ffb000/0x3000)=nil, 0x3000, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
modctl$MODCTL_LOAD(0x0, &(0x7f00000000c0)={&(0x7f0000000100), 0x0, &(0x7f0000000400)='\t', 0x1})


r0 = open(&(0x7f0000000040)='./file0\x00', 0x70e, 0x0)
pwritev(r0, &(0x7f00000003c0)=[{&(0x7f0000000740)="90c3fe67eb586898600425f2f573e0d1ac83c18d65c8e22066c0d389fe894a974c8d45aaf9846f9b3aec3213d2a6ac68a0b0632688ca0fab3647175abf22fea120c9b3bb77ca60c128295bf234505356095dbf9e50a4a5079723b57fed8ef0a251b91e67e1f5d347d5b668a390a25beea3962e7c10b8d9f53f5c82b5eacc26757d", 0x81}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x5, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
rmdir(&(0x7f0000000080)='./file1\x00')


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000001c0)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendto(r1, 0x0, 0x0, 0xa, 0x0, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r2, 0x0, 0x0)
recvmsg(r0, &(0x7f0000000180)={0x0, 0x0, &(0x7f0000000040)=[{&(0x7f00000002c0)=""/209, 0xd1}], 0x1, 0x0}, 0x450)


compat_30_fhopen(0x0, 0x0)
openat$wscons(0xffffffffffffff9c, 0x0, 0x0, 0x0)


mknod(&(0x7f0000000180)='./file0\x00', 0x2000, 0xffffffffffffffff)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$FIOASYNC(r0, 0x4030647d, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0xc004667a, &(0x7f0000000040))


mkdir(&(0x7f0000000100)='./file0\x00', 0x0)
unlink(&(0x7f0000000140)='./file0/file0\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
poll(&(0x7f0000000080)=[{}], 0x4e8, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80104305, &(0x7f0000000140))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000003c0)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000000000)={0x0, 0x0, &(0x7f0000000dc0)=[{&(0x7f0000000a00)="bd", 0xfffffcac}], 0x1}, 0x0)
recvmsg(r1, &(0x7f0000000740)={0x0, 0xfffffffffffffe0a, &(0x7f00000005c0)=[{&(0x7f0000000140)=""/237, 0xed}, {0x0}, {0x0}], 0x10000000000000a7, 0x0}, 0x0)


setuid(0xee01)
chroot(0x0)


compat_50_setitimer(0x0, &(0x7f00000026c0)={{}, {0x30}}, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
compat_50_setitimer(0x2, &(0x7f0000000140), 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
__getvfsstat90(0x0, 0x0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$FIOASYNC(r0, 0xc018647c, &(0x7f0000000040))


compat_50_clock_getres(0x40000000, 0x0)
open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
pathconf(&(0x7f00000001c0)='./file0\x00', 0x0)
__clock_gettime50(0x20000000, 0x0)
mknodat(0xffffffffffffffff, 0x0, 0x0, 0x0)
fcntl$setflags(0xffffffffffffffff, 0x2, 0x0)
setgroups(0x3, &(0x7f0000000380)=[0x0, 0x0, 0x0])
compat_30_fhstatvfs1(&(0x7f0000000400)={{}, {0x6, 0x0, "544f05b37d536c33875e70ab7900b4f2"}}, 0x0, 0x1)
posix_spawn(0x0, &(0x7f0000000d40)='!]-\'\x00', &(0x7f0000000e00)={0x4, 0x0, 0x0}, &(0x7f0000000e40)={0x0, 0x0, {}, 0x8, {}, {[0x4, 0x2, 0x0, 0x8]}}, 0x0, &(0x7f0000000ec0)=['\xa8\\#\x00', '$$:%)}\x00', '@\x00', '&{&$\x00', '\\#(%*(\x00'])


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000000040)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
__lutimes50(&(0x7f0000000080)='./file0\x00', &(0x7f00000000c0))


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r3, &(0x7f0000000700)=[{&(0x7f0000000100)='u', 0x1}], 0x1)
r4 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r4, 0x0, 0x0)
preadv(r4, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r5 = socket(0x2, 0x2, 0x0)
connect$unix(r5, &(0x7f00000003c0)=ANY=[@ANYBLOB="82022e2fac"], 0x10)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_50_quotactl(0x0, 0x0, 0xffffffffffffffff, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000200)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)='2', 0x1)
chmod(&(0x7f0000000040)='./file0\x00', 0x0)


madvise(&(0x7f0000ff3000/0x1000)=nil, 0x1000, 0x2)
mlock(&(0x7f0000ff2000/0xe000)=nil, 0xe000)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x4, &(0x7f0000000000)="ca640000", 0x4)


compat_50___msgctl13$IPC_SET(0xffffffffffffffff, 0x1, &(0x7f00000004c0)={{}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0, 0x24}], 0x1, 0x0)
munlock(&(0x7f00007fe000/0x800000)=nil, 0x2aa4280000000000)


r0 = fcntl$dupfd(0xffffffffffffff9c, 0xb, 0xffffffffffffffff)
r1 = dup3(r0, r0, 0x0)
compat_43_ommap(&(0x7f0000ffe000/0x2000)=nil, 0x1d4000, 0x0, 0x0, r1, 0x0)


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='fdesc\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
pathconf(&(0x7f0000000100)='./file0\x00', 0x11)


mkdir(&(0x7f0000000040)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0x0)
mkdirat(0xffffffffffffff9c, &(0x7f0000000380)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/file0\x00', 0x0)
rmdir(&(0x7f0000000480)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00')


swapctl$SWAP_DUMPDEV(0x7, 0x0, 0x0)
swapctl$SWAP_DUMPDEV(0x7, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000000c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
listen(r0, 0x0)


modctl$MODCTL_LOAD(0x4, &(0x7f0000001300)={&(0x7f0000000040), 0x5, 0x0})


utimensat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', &(0x7f0000000040)={{}, {0x8b36}}, 0x0)
r0 = __fhopen40(0x0, 0x0, 0x80000)
accept$unix(r0, 0x0, &(0x7f00000011c0))
__getitimer50(0x2, &(0x7f0000001200))
pipe2(&(0x7f00000013c0)={0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
fktrace(r1, 0x1, 0x800, 0x0)
swapctl$SWAP_ON(0x1, &(0x7f0000001540)="a0a1ea2691674b158434f756eea1b3b9ef6089501a737842f57995a923853733880b360e0fe3e0febe821838004608fc36f7d01cdccf748f9c4692e5b4dd564d1f1a2fede8964c4e0b4ce0a9beda5583755a0fe5ac027ee4a02ffd4309aa4151a5ec693fba3dccc26903d38cc5945c5a8d1eab53c0fa4cb6ecd704a9", 0x81)
posix_spawn(0x0, &(0x7f00000015c0)=':\x00', &(0x7f0000001640)={0x0, 0x0, &(0x7f0000001600)=@dup={0x1, 0xffffffffffffffff, {0x9}}}, &(0x7f0000001680)={0x1, 0xffffffffffffffff, {0xffff}, 0x1, {[0x3ff, 0x20, 0xfefe, 0x2]}, {[0x5, 0x83, 0x6, 0x3]}}, &(0x7f00000016c0)=['##[\xae\x00', '@&(\x00', ',%\x00', '*}[\x00', '[\'\x00'], &(0x7f0000001700)=['\']\x00', '\'\'\x00', '\x00'])


_ksem_timedwait(0x0, 0x0)


r0 = socket(0x400000000018, 0x1, 0x0)
ioctl$FIOGETBMAP(r0, 0xc0106914, &(0x7f0000000140))


r0 = openat(0xffffffffffffffff, &(0x7f0000000180)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket(0x18, 0x2, 0x0)
ioctl$FIONREAD(r1, 0x4004667f, &(0x7f0000000000))


compat_20_statfs(&(0x7f0000000200)='./file0\x00', 0x0)


compat_50_clock_gettime(0x1, 0x0)


semget$private(0x0, 0x2, 0x200)
compat_14___semctl$SETALL(0x0, 0x0, 0x9, 0x0)
compat_20_fstatfs(0xffffffffffffff9c, 0x0)
semget(0x0, 0x0, 0x88c)
semget(0x3, 0x0, 0x0)


setuid(0xee01)
swapctl$SWAP_DUMPDEV(0x7, 0x0, 0x0)


____semctl50$SETVAL(0x0, 0x0, 0x8, &(0x7f00000001c0)=@buf=&(0x7f00000000c0)={{}, 0x0, 0x0, 0x0, 0x0})
mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)


r0 = open(&(0x7f0000000480)='./file0\x00', 0x80000000000206, 0xc)
writev(r0, &(0x7f0000000280)=[{&(0x7f0000000100)="7f", 0x1}], 0x1)
writev(r0, &(0x7f0000000180)=[{&(0x7f0000000080)='E', 0x1}], 0x1)
writev(r0, &(0x7f0000000400)=[{&(0x7f00000001c0)="4c003325f884bad71919cdd8adc755aa340df8d75cefd6fb13f19d4d6c82097fc06e5c0537a345b5c7f626e176142947f2f89649b599387e3fea798ffb30", 0x3e}], 0x1)
execve(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6102e154ee"], 0x1)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x2, 0x2, 0x0)
bind(r0, &(0x7f0000000000), 0x10)
r1 = socket(0x2, 0x2, 0x0)
dup2(r0, r1)
connect$unix(r1, &(0x7f0000000000)=ANY=[], 0x10)
write(r1, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getrlimit(0xb00, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
__fstat50(r0, &(0x7f00000001c0))


r0 = socket(0x18, 0x2, 0x0)
getsockopt$inet_opts(r0, 0x29, 0x26, 0x0, 0x0)


r0 = socket(0x11, 0x3, 0x0)
r1 = socket(0x11, 0x3, 0x0)
dup2(r0, r1)


mkdir(&(0x7f0000000100)='./file1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0x0)
r0 = open$dir(&(0x7f0000000500)='./file1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0x10, 0x0)
flock(r0, 0x1)


r0 = compat_30_socket(0x1f, 0x10000005, 0x2)
getsockopt(r0, 0x2, 0x6, 0x0, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047401, &(0x7f0000000140))


pipe(&(0x7f0000000180)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
umask(0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x4004426a, 0x0)


ioctl$WSMUXIO_INJECTEVENT(0xffffffffffffffff, 0x80185760, &(0x7f0000000000)={0x0, 0x0, {0x0, 0x2}})
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0xd, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0xc020447f, &(0x7f0000000000)=0x2)


pipe2(&(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x4)
writev(r0, &(0x7f0000000100)=[{&(0x7f0000000300)="000000000000000077afd5ea9c415cba2da16706281f1de0fccdb69b32bed32925815535813ea45b2ff7ec7e6767c84b5ccc34d71d7089575cffea0bf5f70557d1d2b19e52cae9de81f82471c3af992e271b3017c03f2d0d414a81a46a3b6f7563ef6c5b2c7c8630363aafc72249436021eb28002d8cb0ea258ab07db7323e63b3809b2fa7a8c4701b17f715468134bdeda8b0e73ad782d938952278d576e7caaf898897d78a9020e40201f3f01c703b1af24bdf6ac1db82f753dc61701300f39c8335b0faaec1d787161acdced0be81310fc64498b860f11f9d472f2c8bbb91", 0xfffffe10}, {&(0x7f0000000200)="e57d5c29ca595cc9c4134675c137f595da582240494627821ba7955943426211c56a42d1098a47e208d9ea1c302fd69422218fb8859572d8d2582a736546d81b20d98826a6f21e747512dbf7fbe8dddc87f12269cee988a09b31fe3b1d639b6b9e36fb46899c7527ea4329fb0fc5bea408f32c2922a5d9b298f9ce0f4aa218c4fa720568c0e3a4126282e2e91bd644c2f3bb600b8215e96e963740c137aa2a657fbcec4a7858168715c116a1bc2dba7b4c8d2469cd462a14c249f731e0f615b303d564a3933ff11a4246280d0341f528a2453f", 0xd3}, {&(0x7f0000000080)="0f4d3ee2b900a5331dbf373e05773c52328def666a8f37f6689908a03f1408676a6b9006aa0f038c932e49a6241f43c482e599e8bff69a9af8b021c041"}], 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$inet(0x2, 0x1, 0x0)
compat_43_ogetsockname(r1, 0x0, 0x0)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x200, 0x0)
fchflags(r0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa859)
open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)


shmat(0x0, &(0x7f0000ffc000/0x4000)=nil, 0x4000)
shmat(0x0, &(0x7f0000ffc000/0x2000)=nil, 0x4000)


pipe2(&(0x7f0000000200)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
fcntl$setstatus(r0, 0x4, 0x40)
write(r1, &(0x7f0000000080)="c9", 0x1)


mkdirat(0xffffffffffffff9c, &(0x7f0000000280)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f0000000040)='./file0\x00', 0x0)
faccessat(r0, &(0x7f0000000080)='./file0\x00', 0x7, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = socket(0x18, 0x2, 0x0)
connect$unix(r1, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
dup3(r0, r1, 0x0)


compat_50___shmctl13$IPC_SET(0x0, 0x1, &(0x7f0000000680)={{}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIONREAD(r0, 0x40044271, &(0x7f00000005c0))


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_40_mount(&(0x7f0000000080)='union\x00', &(0x7f00000000c0)='./file0\x00', 0x0, &(0x7f0000000100)="1e4f84")


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000180)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = getpgid(0x0)
setpgid(0x0, r3)


mknod(&(0x7f0000000000)='./file0\x00', 0x0, 0xffffffffffffffff)
compat_50___lstat30(&(0x7f0000000100)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0x0)
mknod(&(0x7f0000000600)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0x6000, 0x0)
chmod(&(0x7f0000000c00)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/file0/file1\x00', 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x15, r0, 0x0, 0x0)


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x10, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}, {{0x9, 0x5, 0x81, 0x3, 0x40, 0x7}}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{{0x9, 0x5, 0x82, 0x2, 0x1ef}}, {{0x9, 0x5, 0x3, 0x2, 0x400}}}}}}}]}}, &(0x7f0000000400)={0x0, 0x0, 0x0, 0x0, 0x2, [{0x4, &(0x7f00000002c0)=@lang_id={0x4}}, {0x46, &(0x7f0000000300)=@string={0x46, 0x3, "91fc67f8217286731f96068dba56c4d28a1ca3b9de4c8087ba189d97757d03a201499109d62489edf29fd9cdd4fec30148b2322d2139be5257301f18f3c13ae112d96f3e"}}]})


mknodat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x20, 0x1)
connect$unix(0xffffffffffffff9c, 0xfffffffffffffffe, 0x0)
fchownat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0, 0x0, 0xa00)
r0 = semget(0x2, 0x2, 0x240)
compat_50_____semctl13$IPC_STAT(r0, 0x0, 0x2, &(0x7f0000000180)=@array=&(0x7f0000000140)=0xfff8)
compat_50___shmctl13$IPC_STAT(0xffffffffffffffff, 0x2, &(0x7f00000002c0)={{0x0, <r1=>0x0, 0x0, 0x0, <r2=>0x0}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, &(0x7f00000001c0)="3598148252f87a5aa42650016dd741a8156ccd40877ca088f0e131ceb3a3e30a87ea09de4b95d1fa2eca489b0ea67a91abe9dfa8deab2eee889d49bfde0cb14b3440b78771f3143fc88eac247342d1e58d949f3469b57eb475d0d94c741b92f8f57a92177ac5f029153aa2cd8453729d1e3e875506d1bb1896cc3f9118453902e578eeb37ff961d9b8f61317775d84dd2f1a8bb983e3eb58e04d6e817578a180734edfac3ed2dd818956b407ea9c3b4810960c13fa87e1a6af9f987682940c70daded426338851548fa19df03a5a83557951a6dfa7"})
r3 = getpid()
msgctl$IPC_SET(0x0, 0x1, &(0x7f0000000340)={{0x5, 0x0, 0x0, 0x0, r2, 0x800, 0x8}, 0x4, 0x1, 0xffffffffffffffff, r3, 0xc2, 0x80000000, 0x5, 0x7})
ptrace(0x0, r3, &(0x7f00000003c0), 0x0)
compat_14_msgctl$IPC_STAT(0x0, 0x2, &(0x7f0000000480)={{}, &(0x7f0000000400)={0x0, 0x8, 0x3, 0x3}, &(0x7f0000000440)={0x0, 0x4, 0x81, 0x7}})
chmod(&(0x7f0000000540)='./file0\x00', 0x80)
r4 = semget(0x3, 0x0, 0x8)
____semctl50$GETVAL(r4, 0x0, 0x5)
compat_50_____semctl13$IPC_STAT(r4, 0x0, 0x2, &(0x7f0000000600)=@buf=&(0x7f00000005c0)={{r1, 0x0, 0x7, 0x7, 0x4, 0x40, 0x400}, 0x9, 0xf554, 0x827, &(0x7f0000000580)={0x2, 0x8, 0x1, 0x9}})
lchown(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
__stat50(&(0x7f0000000680)='./file0/file0\x00', &(0x7f00000006c0))
compat_50_____semctl13$IPC_SET(r0, 0x0, 0x1, &(0x7f00000007c0)=@array=&(0x7f0000000780)=0x9)
getpid()
ptrace(0x0, 0x0, &(0x7f0000000800), 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000200)='./file1\x00', 0x0)
unlinkat(0xffffffffffffff9c, &(0x7f0000000040)='./file1\x00', 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f00000000c0)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0)="432eaeaa1ec4d579454c1d94775aff36785fb44aac123ec175d74e7313bd923d96c4da82ff74057a47b7c2deb08d7be16c611c4cd4974fb22bc4823bf4bf13745c787424c3a1d834778347f0c069c42f1ef16e393d278ba8dd7408c9d3e2222dfe22ae14e099543ce64b4bc45f23d17898c7fab007ef54474585a81113d3955c09f973e93b8a7325", 0x88)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x0, 0x10, 0xffffffffffffffff, 0x0, 0x0)
unmount(&(0x7f0000000040)='./file0\x00', 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
_ksem_timedwait(0x0, &(0x7f0000000700))


mlock(&(0x7f0000ff7000/0x4000)=nil, 0x4000)
minherit(&(0x7f0000ffa000/0x4000)=nil, 0x4000, 0x0)
mprotect(&(0x7f0000ff9000/0x2000)=nil, 0x2000, 0x0)


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='ffs\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000000c0)='n', 0x1)


fdatasync(0xffffffffffffffff)
fork()


mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x0)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x80047462, &(0x7f0000000040))


symlink(&(0x7f0000000080)='.\x00', &(0x7f0000000000)='./file0\x00')
compat_40_mount(&(0x7f0000000000)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
__mount50(0x0, &(0x7f00000000c0)='./file0\x00', 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000200)='/proc/self/exe\x00', 0x0, 0x0)
flock(r1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mincore(&(0x7f00001fe000/0x3000)=nil, 0x3000, &(0x7f0000000140)=""/32)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
ioctl$FIOGETBMAP(r0, 0xc008667a, &(0x7f00000000c0))


r0 = open$dir(&(0x7f0000000100)='.\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f0000000180)='./file1\x00', 0x0)
r1 = getuid()
fchownat(r0, &(0x7f0000000000)='./file1\x00', r1, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000000000)=[{0x0}], 0x2c, 0x0)


clock_nanosleep(0xffc99a3b, 0x0, &(0x7f0000000040), 0x0)


__msync13(&(0x7f0000ffb000/0x3000)=nil, 0x0, 0x8)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000680)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
ioctl$FIOSETOWN(r0, 0x8004667c, &(0x7f0000000000))


setsockopt$sock_int(0xffffffffffffffff, 0xffff, 0x0, &(0x7f0000000140), 0x4)
mlock(&(0x7f0000ffc000/0x2000)=nil, 0x2000)
munmap(&(0x7f0000ff9000/0x4000)=nil, 0x4000)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIONBIO(r0, 0x8004667c, &(0x7f00000000c0))


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETKEYREPEAT(r0, 0x40067408, &(0x7f0000000100))


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x6000, 0x0)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_90_statvfs1(&(0x7f0000000740)='./file0\x00', &(0x7f0000000780), 0x0)


r0 = socket$inet6(0x18, 0x1, 0x0)
close(r0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x2000a71a)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
read(r0, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000880)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f0000000940))


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x1203)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000000840)=[{&(0x7f00000004c0)=""/155, 0x9b}], 0x1, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0xc6a8)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = dup3(r0, r0, 0x0)
ioctl$FIOSETOWN(r1, 0x8004667c, &(0x7f00000000c0))


compat_50_setitimer(0x1, &(0x7f0000000100)={{}, {0x0, 0x7fff}}, 0x0)
msgctl$IPC_STAT(0xffffffffffffffff, 0x2, 0x0)
compat_50_setitimer(0x1, &(0x7f0000000700), &(0x7f0000000740))


r0 = openat(0xffffffffffffff9c, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_utimes(0x0, &(0x7f0000000040))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x0)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r2 = dup3(r1, r0, 0x0)
openat(r2, &(0x7f0000000080)='./file0\x00', 0x0, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x1021, &(0x7f0000000000), 0x4)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000001200)={0x0, 0x0, &(0x7f00000011c0)=[{&(0x7f0000001100)="99", 0xfffffdef}], 0x1}, 0x0)


chroot(&(0x7f0000000000)='./file0\x00')
r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
accept$inet6(0xffffffffffffffff, 0x0, &(0x7f0000000240))
ptrace(0x5, r0, &(0x7f0000000080), 0x0)


preadv(0xffffffffffffffff, &(0x7f0000002640)=[{&(0x7f0000000300)=""/87, 0x57}], 0x1, 0x0)
r0 = open(&(0x7f00000002c0)='./file0\x00', 0x611, 0x0)
pwritev(r0, &(0x7f00000003c0), 0x273, 0x0)
mmap(&(0x7f0000000000/0x10000)=nil, 0x10000, 0x2, 0x11, r0, 0x0, 0x0)
pread(0xffffffffffffffff, &(0x7f00000000c0)="a6", 0x1, 0x0)
mmap(&(0x7f0000000000/0xb36000)=nil, 0xb36000, 0x0, 0x800000000009031, 0xffffffffffffffff, 0x0, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x202a, 0x44086333)
mknod(&(0x7f0000000000)='./bus\x00', 0x2000, 0x4086333)
r0 = open(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
r1 = open$dir(&(0x7f0000000300)='./bus\x00', 0x0, 0x80)
dup2(r1, r0)


r0 = socket(0x10, 0x2, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
shutdown(r0, 0x1)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
read(r0, 0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x20007447, 0x0)


pipe(&(0x7f0000000000)={<r0=>0xffffffffffffffff})
r1 = compat_30_socket(0x1d, 0x3, 0x0)
dup3(r0, r1, 0x0)


r0 = open$dir(&(0x7f0000000240)='./file0\x00', 0x40000400000002c2, 0x0)
fcntl$dupfd(0xffffffffffffffff, 0x0, 0xffffffffffffffff)
accept$inet6(0xffffffffffffff9c, 0x0, 0x0)
pwritev(r0, &(0x7f0000000080)=[{&(0x7f00000006c0), 0x100000}], 0x1, 0x0)
lchmod(&(0x7f0000000000)='./file0\x00', 0x0)
rename(&(0x7f0000000100)='./file0\x00', 0x0)
open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)


r0 = fcntl$dupfd(0xffffffffffffffff, 0xb, 0xffffffffffffffff)
r1 = fcntl$dupfd(r0, 0x0, r0)
compat_43_ommap(&(0x7f0000ffa000/0x2000)=nil, 0x2000, 0x0, 0x0, r1, 0x200000)


compat_43_osethostid(0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x1, 0x0)
ioctl$OFIOGETBMAP(r0, 0x8004667d, &(0x7f0000000000))


r0 = socket$unix(0x1, 0x1, 0x0)
poll(&(0x7f0000000000)=[{r0}, {r0}, {r0, 0x3}], 0x3, 0x0)


open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
fdatasync(r0)


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x29, 0x65, 0x0, 0x0)


r0 = semget$private(0x0, 0x4000000009, 0x0)
semop(r0, &(0x7f0000000100)=[{0x0, 0xd5db}], 0x1)
semop(r0, &(0x7f0000000000)=[{0x0, 0x8}], 0x1)
semop(r0, &(0x7f0000001400)=[{}], 0x1)
semctl$IPC_RMID(r0, 0x0, 0x0)


munmap(&(0x7f0000ff7000/0x1000)=nil, 0x1000)
pipe2(&(0x7f0000000140), 0x0)
mlock(&(0x7f0000ff6000/0x4000)=nil, 0x4000)


compat_43_ogetrlimit(0xacb6470e8216a2a6, 0x0)


ioctl$WSMUXIO_INJECTEVENT(0xffffffffffffffff, 0x80185760, &(0x7f0000000000)={0x0, 0x0, {0x0, 0x6}})
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x80000000000029, 0xc, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)


r0 = socket(0x18, 0x2, 0x0)
sendto$inet6(r0, 0x0, 0x0, 0x9, &(0x7f0000000140)={0x18, 0x2}, 0xc)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
__getfh30(&(0x7f0000000180)='./bus\x00', &(0x7f00000002c0)=""/253, &(0x7f00000001c0)=0x20)


mlock(&(0x7f0000c00000/0x400000)=nil, 0x400000)
munmap(&(0x7f0000e3f000/0x3000)=nil, 0x3000)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000003c0)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f00000019c0)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000280)=ANY=[@ANYBLOB="10000000010000000100000022"], 0x10}, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x28, r0, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendto(r0, &(0x7f0000000280)='\x00', 0x1, 0x0, 0x0, 0x0)
recvmsg(r1, &(0x7f0000000800)={0x0, 0x0, &(0x7f0000000700)=[{&(0x7f0000000400)=""/170, 0xaa}], 0x1, 0x0}, 0x842)
r2 = openat(0xffffffffffffff9c, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)


_ksem_open(0x0, 0x0, 0x0, 0x0, 0x0)


modctl$MODCTL_LOAD(0x2, 0xffffffffffffffff)


__fhstatvfs190(0x0, 0x0, 0x0, 0x0)


__posix_chown(0x0, 0x0, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f00000008c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)
openat$wscons(0xffffffffffffff9c, 0x0, 0x0, 0x0)


pipe2(&(0x7f0000000080)={<r0=>0xffffffffffffffff}, 0x0)
fcntl$setflags(r0, 0x2, 0x0)


setreuid(0x0, 0xee00)
r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
posix_spawn(0x0, &(0x7f0000000000)='\x04#{[\x00', &(0x7f00000000c0)={0x0, 0x0, 0x0}, &(0x7f0000000100), 0x0, 0x0)


mkdir(&(0x7f0000000480)='./file0\x00', 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0, 0x24}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x1010, 0xffffffffffffffff, 0x0, 0x0)
__mount50(&(0x7f0000000040)='fdesc\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x2a, &(0x7f0000000040)='\x00\x00\x00\x00', 0x4)


ioctl$WSMUXIO_REMOVE_DEVICE(0xffffffffffffffff, 0x80085762, &(0x7f0000000040)={0x1})
r0 = socket(0x400000000018, 0x3, 0x3a)
setsockopt(r0, 0x29, 0x6c, &(0x7f0000000040), 0x4)
setsockopt$inet6_MRT6_ADD_MIF(r0, 0x29, 0x66, &(0x7f0000000080)={0x0, 0x0, 0x0, 0x43}, 0xc)
setsockopt$inet6_MRT6_ADD_MFC(r0, 0x29, 0x65, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(r1, 0x6, 0x3, &(0x7f0000000040), 0x4)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


r0 = socket$inet6(0x18, 0x2, 0x0)
listen(r0, 0x0)


utimensat(0xffffffffffffff9c, 0x0, 0xffffffffffffffff, 0x0)


mknod(&(0x7f0000000640)='./file0\x00', 0x0, 0xffffffffffffffff)
open(&(0x7f00000018c0)='./file0\x00', 0x0, 0x0)


compat_20_getfsstat(0x0, 0x0, 0x0)


utimensat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', &(0x7f0000000040), 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x2000a71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0xc020447f, &(0x7f00000000c0))


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0x80207443, &(0x7f00000000c0)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @pci})


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
open(&(0x7f0000000000)='./file0\x00', 0x4, 0x0)


r0 = socket$unix(0x1, 0x1, 0x0)
poll(&(0x7f0000000040)=[{r0, 0x4}], 0x1, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000200)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000002740)={0x0, 0x0, &(0x7f00000023c0)=[{&(0x7f00000013c0)="e02fc339be4df2cb7b06c761173f1b37db58bcfeba7bc4552b8bb1d21f2d6e7e69a6a971baccd8b204560c96acd94c11b28fee6bcb3f9838bc6aa0e2cb8b9404d0156296623ae1a858194038d7163ea5f859fe096e4d186d05155b8b93a6e9e05fedc8de15f535c19d61cf216b86ac21c12db9cd7080f8b0fac29c94bf116ba36a6f7bb86e8b9b887a5f965a58813a983e2e190be98c1310b166f6393eb4996f8013ac4706579537179acf7b2e0826118ee9a0967c2da0938af65c5a5bbf94a5837489f36e0bc379ee736044fa6f3daadad09f3591b42d71472f2f5686589152d3c4d8f3f486f4d3ec6c3106d415d593ca567351de8b6f6cd68e20c331bd77e9921448acc6f51abb8e783a9398e54b89403d5f6a1913cda71b771fb6f8a2ad605bfeee1eadb7a5563628fce3c911bcfce4dcdd42b9a9b20fd5546969fd2f399bd7e43f08ab94e470cfac8fb8bf463bd1418f06448a2967fe8ad6197cd4d112700951cdc6f19e24e0bf2770c2d52fd5566e4b5faee4b74108a6633f7ddd7960088ffab6269a5c0c2a597a50075a2c4f22a972f05fca959c0fa62401c48ae4342cede1913a76cabb44e0647481cd545ee053a1c1ed9906e48392e5e89aa5d53447b83a797a894248a1e05f4718f293ff7872c154969543ccf9a7ff1234c28f9bd12063da75e8295152ab6ef8e99154775dbe7fb8e29c8e960efa7e5a9e2818c155df672e6fe53cb0654110414f4677d2a868c8ccd6059c16a831e2df4e99f473c4580ae6bc48b7c0d03d32a475c530716818c627e1d2d237568bff4ce7a333a20f002a8f3347b666ae0b4d7c249655d68d93be642328ecbc848b90425812b4a61c0005c9c4927cf5e304513730bcef09eec9c64f8842b6f3506c70f98ff044ad3ffc17bd197bbd6a465fb2faeb572a8b12987e6682663294c994fa5c1b92ef301f370c08539aac3ef528e65cd9dbc9892d86eceaff888f37c05a3bf00827b634bdcd1c7fd6e2cfc781e46207532151f7f6747207e9fb82f243637cf406295d366ae7ee79c22130cc47593d596ab5fd27daff396fc301fb3903b8a2a7369e2ac3f7716bc6bf3a32c6aea9c9f989a74e2a4f3f80feaa9d85d03b5c9e6b9d0fda1c561cf2bb3ae1c916051b9227dabed79e4e95f83f7871752808ff6d882ace5682452cf0ba1b68c7a524c2bc0521fd78c66d65737b6ebeb7233c3c", 0x359}], 0x1}, 0x0)


mkdir(&(0x7f0000000480)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='fdesc\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)
r0 = openat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)


semctl$IPC_SET(0x0, 0x0, 0x1, 0xfffffffffffffffe)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
socketpair(0x1f, 0x3, 0x0, 0x0)


mknod(&(0x7f0000000240)='./file0\x00', 0x6000, 0x0)
compat_43_stat43(&(0x7f0000000280)='./file0\x00', &(0x7f00000002c0))


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
compat_43_ogetdirentries(r0, &(0x7f0000000200)=""/234, 0x4000, &(0x7f0000000300))


__select50(0x40, &(0x7f0000000000), &(0x7f0000000040), 0x0, &(0x7f00000000c0)={0xffffffffffffffff})


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
truncate(&(0x7f0000000040)='./file0\x00', 0x0, 0x4)
pwrite(r0, &(0x7f0000000300)='L', 0x1800, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
compat_50___fstat30(r0, &(0x7f0000000080))


mknod(&(0x7f0000000000)='./file0\x00', 0xb002, 0x0)
open$dir(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
ktrace(&(0x7f0000000040)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
pread(r0, 0x0, 0x0, 0x0)


syz_usb_connect(0x0, 0x26b, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0xe5, 0xe1, 0x77, 0x40, 0x20d6, 0x8c8f, 0xfbbc, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x259, 0x1, 0xe1, 0x7, 0x40, 0x1, [{{0x9, 0x4, 0x0, 0x0, 0xa, 0xff, 0x5d, 0x81, 0x0, [@generic={0x17, 0x0, "7c13e6011e6f79979c0338cdef675ab3673d8824fc"}], [{{0x9, 0x5, 0x80, 0x0, 0x3ff, 0x1, 0x5, 0x5}}, {{0x9, 0x5, 0xe, 0xd, 0x200, 0xf8, 0x80, 0x3f}}, {{0x9, 0x5, 0x0, 0x57519ae53dd1b74d, 0x8, 0x7, 0x9, 0xb5, [@generic={0x47, 0x10, "6d1f259ffb3325d118e8ebd6b7b72fb49b696481f2b825ba2a5a0c17ce4c51ead63ce37586c365d00a56b3d09778cda72b233f6cf700336edf95d3d6a4b330194020f9d497"}, @uac_iso={0x7, 0x25, 0x1, 0x1, 0x5, 0x1}]}}, {{0x9, 0x5, 0xf, 0x0, 0x40, 0x0, 0x20, 0x0, [@uac_iso={0x7, 0x25, 0x1, 0x80, 0x3, 0xd14}]}}, {{0x9, 0x5, 0xb, 0x0, 0x40, 0x11, 0x2, 0x40, [@uac_iso={0x7, 0x25, 0x1, 0x105, 0x1, 0x8}, @generic={0xa, 0xd, "036a2d3f3646f74b"}]}}, {{0x9, 0x5, 0xf, 0x10, 0x40, 0xff, 0x0, 0x7, [@uac_iso={0x7, 0x25, 0x1, 0x81, 0x0, 0x1}, @uac_iso={0x7, 0x25, 0x1, 0x2, 0x3f, 0x299f}]}}, {{0x9, 0x5, 0xd, 0x4, 0x10, 0x9, 0x8, 0x2}}, {{0x9, 0x5, 0xa, 0x1, 0x8, 0x40, 0x1, 0x1b, [@generic={0xc1, 0x2, "4f2ac2fd1384025a69985784df935e722c4e598a4adae4b7dc6611ad75c65073a6337f22cd4d966f41848a2ef4b4d8e438687e8bb3b595413cb8bba197214ad51c5fc05a8642e89dce7bb40fd3991db3d056460ff152b659158465af7f93ffbce75a341d5f1d2f9caa891ad87cc9ca4834cf4ed7ea4f941ccdc5f7b6b37b875ce5fb969227d9a7d4b63d7ffa72c8c2f4032a6bdec6c7b500db66627289a002c94b54b342b497c4980fa8b802c9b753dc71c2a832028c023382a39bc0151b0c"}, @uac_iso={0x7, 0x25, 0x1, 0x81, 0xfe, 0x5}]}}, {{0x9, 0x5, 0xd, 0x0, 0x8, 0x8e, 0x7f, 0x7}}, {{0x9, 0x5, 0x0, 0x0, 0x0, 0x9, 0x8, 0x4, [@generic={0x93, 0x0, "5b3e3dde1e1be9af9db425fde5d258f20607f7485e12e0e3a6e6477dd73352e68eb669161cd5286f8e640255c3cc252cbd568ea3c1ae248ca16d433d5aee2fe77d48579a7844ac57f5de92d3f2c1420ba1cdb420b12254cb8f011c587feb1d8670b60ec47a8cd4978e0d3a77d9831075fde540990c116ea1bd9bf7612b7b044c2287e2f3e588d3ddbe369406f7bb4628bc"}, @uac_iso={0x7, 0x25, 0x1, 0x1, 0x7}]}}]}}]}}]}}, &(0x7f0000000880)={0xa, &(0x7f0000000340)={0xa, 0x6, 0x250, 0x7f, 0xff, 0x4, 0x20, 0x20}, 0x21, &(0x7f0000000380)={0x5, 0xf, 0x21, 0x1, [@ssp_cap={0x1c, 0x10, 0xa, 0xff, 0x4, 0x3, 0xf, 0x6, [0x3f00, 0xff0000, 0x3fc0, 0xc0c0]}]}, 0x8, [{0x4, &(0x7f00000003c0)=@lang_id={0x4, 0x3, 0x81d}}, {0x4, &(0x7f0000000400)=@lang_id={0x4, 0x3, 0x407}}, {0xcd, &(0x7f0000000440)=@string={0xcd, 0x3, "9b7a4c39b4f5b23b98f26e41ac7cc676b8028c39a5b7399efc3aa333b876dafbb8bbb1beede9f0039602b921f0908e8dda16e80dfd82d2b876217e52e01014c9d965faa9f6fc7592fcb9092a9c77b4269576ec2e0d16a65b93bfb210e35a749c5ddf7a33ccbbad27dbda9a8186cf066ae3bc57df37fb65b6b6cbc9e0e01df13bad76b6a4e19ac19148e7ab2f2b99eb5643e01bcd6c2d3189fa49ea3b5a8099e4913f26eb0a4abf4dec62d9815f1dd8ffadd86908b9fedd4e3bb9f4bd591a5363995f13b644132ea0482958"}}, {0xaa, &(0x7f0000000540)=@string={0xaa, 0x3, "ad2d14402f9f40eed18412558d76ebf703ab27fe80d5cd47de01b59a9cafdc589214896ccf707fc4ffca080775bfef2011beb52fa22ebac692d8af098f8b87b40ccfcd33c86a5fc8576e1bded0117f1c3a8528b9f6fc45dd5faa5f30d426ba1755a6ea993c35418b7bdbae1117ce23627f9b58f4afca37f26999754ae2e22c6d49c5838019ac0a390c898ea0b8aab12bff1986622bf2563f68a42e54ce7f52d5c7d8e3c755549da0"}}, {0xc4, &(0x7f0000000600)=@string={0xc4, 0x3, "bc630d619e027f9b6b0cd7b675ad1d0d4fc23ad6393d166eea7ca31ea8bad9af82e18a58215915a502c93a888fa4e8c9fb46e638ea640ee2f3a69f76a426b5849e95b590c55a63920952368f7b97486341148765a67c16692da31382c00115e456456f50ee970eef16fec9c3f2d3ab40a2dede533db8a8073ea49b82c687095315f02d0a507089e2b1ddae5bfca23aa61a3f6146f4cf13789b1a294bba9b09b81ca75e8205c93634b6b13c4aa7937795276afa42c0de7184b79d3176303b1cdec5be"}}, {0x4, &(0x7f0000000700)=@lang_id={0x4, 0x3, 0x2c1c}}, {0x4, &(0x7f0000000740)=@lang_id={0x4, 0x3, 0xc0c}}, {0xe4, &(0x7f0000000780)=@string={0xe4, 0x3, "90821b8a1818c8e895fe3776cce7a32c7bca3b1d7f206a2e8c522c196490537a0cd8e7cba87a0a917997d1b1465734a2c5859e09ce4a3e234aacff026d2483f835c78e2e21199ba0d1e23312e37c795c27d3565163a87701444f662b4b5e5cfa3b9f87423d97a3c5b31e30c98f1b82791125ad848b097972e7a4925ff8c00802585c7866557cc2fe1c4a5db12abfebfb2df02dafd0ac89506bbc033b219ff812ad2e2e2bd9876f1d7929c3f2c68c91fd5c47e6262b159e0673592a87c000186750b9fc55f97ed9c15aaa1427a63b6352a4255eb47a54548c9745f9d675a482c15dfc"}}]})
syz_usb_connect$hid(0x6, 0x3f, &(0x7f0000000900)={{0x12, 0x1, 0x200, 0x0, 0x0, 0x0, 0x20, 0x56a, 0xe5, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0x81, 0x30, 0x2, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x3, 0x1, 0x3, 0x20, {0x9, 0x21, 0xb59a, 0x13, 0x1, {0x22, 0xa}}, {{{0x9, 0x5, 0x81, 0x3, 0x40, 0x1, 0x2, 0x2}}, [{{0x9, 0x5, 0x2, 0x3, 0x40, 0x5, 0x8, 0xe0}}]}}}]}}]}}, &(0x7f0000000f40)={0xa, &(0x7f0000000940)={0xa, 0x6, 0x200, 0x65, 0x7, 0x9, 0x8, 0x4}, 0xd2, &(0x7f0000000980)={0x5, 0xf, 0xd2, 0x6, [@generic={0x4e, 0x10, 0x3, "079faf905b05aaa9816106055c2887b696f82971c0bf5a81880bb7996502352411cc85bce1a56361d7ec33daf68f94ff991d3e6981e37f3d701442cab9bd2454ba0ab4fae9a54323a6340f"}, @ss_container_id={0x14, 0x10, 0x4, 0x3f, "72ab5e12c69decec8242ba341eedbce9"}, @ss_cap={0xa, 0x10, 0x3, 0x2, 0x1, 0x3, 0xc1}, @ss_cap={0xa, 0x10, 0x3, 0x0, 0x4, 0x5, 0x0, 0x3}, @generic={0x54, 0x10, 0x2, "edf61ce17ba6c20bb2d6683cf08711fbf68ac936221ea0890ac2892377f8605bc8c8c7c04904cd26e50b206cbd33e4b4c1dfe9d605d3d9105a16819fa5d8147a2bbadb9bd2ed6bc59333b427bcb5c5756a"}, @ptm_cap={0x3}]}, 0x8, [{0x4, &(0x7f0000000a80)=@lang_id={0x4, 0x3, 0x861}}, {0xd7, &(0x7f0000000ac0)=@string={0xd7, 0x3, "de6e459cc4a72b04680b580257085c4afa10acad64615663273ed936c115857379b2529ad21bdf135bd296578e35538d07f436e4f3e2f9e8f077ff72ba2c7f00c99b213f01b5b5eb148169da61833a11ff27e155afcc5115a3c23316493c7decd422058d297deade89a05b4a025aef9afa6fedd39b837193cc59407cc192edfaacb2af0bb12beee571ce2ecd83a3fce7705ac27322d6d43be2c6ad71e2243d48da2d0f1bff41c3e3ec10c1d68f1d1aefc4b52548dd329c2c82ea857e30750b6574eca8687f5a0c92b9b2311c8faebb2cc127fd9e5d"}}, {0x4, &(0x7f0000000bc0)=@lang_id={0x4, 0x3, 0xfcff}}, {0x9f, &(0x7f0000000c00)=@string={0x9f, 0x3, "782cdc2021d9d4de10861ee0d6515021ebe7811d8caeab24c40833bf647e0038f3213fa30fb10d362befb0001b43255376da4e62b94daa16ccf40c5b91fe99e4f175692f392c637ff395456e4920922eeded36b9c9fa01069dc13c7ff0f94dc8f548f3a2b46a60a95f5a3f28020e28bb3282e3500dde4a6af2b331fa35662ee492612de6b1d75408ab9beba4d60997ff74dfd99ff32bb30ff05626172b"}}, {0xfe, &(0x7f0000000cc0)=@string={0xfe, 0x3, "a2c7c92b17702de998aafda5112b6ac0f57ac56f4f439f7c8329b729e7208da46996b28a4b68431f8979d055d09c5ba1cafc0881837d180bfd7dbfb7be5f55d75090522044f918052e3eaad176aaedd9627004bfa67c0d0ce1808cdfd91d18bfa551b13bed77504e2b291986972adb422da2b1d9fa0dd42bf8ea26eae41cbad9de9dff07556d3077df9bfc54051234e9ab09da31d0aec035b13924923b86ae3fa316230352773db2002dc74e327d98b25553053eff3d904eacc70eeef9a6a919a812559ce86ccc30f4f0a6acae123d5ff616927c15a5e804e720ec3232072a781fa8f738e3b2764ab6880392b028f43f0f9248083b7b4784d1acba5c"}}, {0x4, &(0x7f0000000dc0)=@lang_id={0x4, 0x3, 0x3409}}, {0x4, &(0x7f0000000e00)=@lang_id={0x4, 0x3, 0x100c}}, {0xf7, &(0x7f0000000e40)=@string={0xf7, 0x3, "c545a8cb96aac31408e82c5c52d77da5dbc78cfff6307a5a63c50956e5acf13ae7486ffa1862049fef2b7736d3b64d41a4be612ef379f2600d1e7455ffd7ae51b9ce42aaec13c3b310535881efd198cb225d9de5139bf3a4184b3bd9642389c74977db33c7278577d03efdbf0d4e56ce7257eb5d2de475bde71e9501880840d9a9d1e021bbd09c34d858b5f5101008b97afedd522a64e6c48961f15141e8e0bffdadacde001df594b1e0b033a8a82462abaa75d3762cd91d10c3ae2e83185f10e48e2a3735c100c0ac75fff0596ea7e4a2347d0305c7a0b34094980402641285a8e178dbd8f6a24fb0a682a548879c76a29e5e6f48"}}]})
syz_usb_connect(0x4, 0x168, &(0x7f0000000fc0)={{0x12, 0x1, 0x200, 0x4d, 0xab, 0xdf, 0x8, 0x499, 0x1007, 0xfaee, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x156, 0x1, 0x9, 0x20, 0x0, 0xf, [{{0x9, 0x4, 0x71, 0x0, 0x1, 0x27, 0x98, 0xd3, 0x9, [@cdc_ncm={{0xb, 0x24, 0x6, 0x0, 0x1, "849800211f2d"}, {0x5, 0x24, 0x0, 0x6}, {0xd, 0x24, 0xf, 0x1, 0x9, 0x5, 0x1, 0x80}, {0x6, 0x24, 0x1a, 0x9, 0x4}, [@dmm={0x7, 0x24, 0x14, 0x4, 0xc86}, @mbim={0xc, 0x24, 0x1b, 0x0, 0x1, 0xb4, 0x9, 0x3ff, 0x2}, @dmm={0x7, 0x24, 0x14, 0x5, 0x2}, @mdlm_detail={0xef, 0x24, 0x13, 0xaf, "21c1fdd7bc52fd8f3c0a6de5ff0bda4c2e404f3eedd818855027e7f13bbc99546dfd7bcbbd6cc8373d166ef024549aedad74b9198be086dd37974f737646d22994a09934e8858ffedb48198c62a7af6d97b91a0f55f3b17486bfe8187cbd54000ac6f23484a6fad5e08e29a40ec50eac4a5dae858b044fddfc16b054784a494191930763507d53d444b82c5b97ce7e061efc03ca0d58089eb42b3353cf5cc4d669595efe7cfe44bbf35f3ce3d712fadaffdd5b466c6b08b4372c331d0d563edafd3abe8bc2e1484824dc4ffb4debb41182c8f2f2c4e8f9d4eaf0621932269b2edea4c3b18c411661580f76"}, @mbim_extended={0x8, 0x24, 0x1c, 0xfff, 0x0, 0x7f}, @network_terminal={0x7, 0x24, 0xa, 0x0, 0x5, 0x8, 0x77}]}], [{{0x9, 0x5, 0x1, 0x10, 0x400, 0x20, 0xe7}}]}}]}}]}}, &(0x7f00000012c0)={0xa, &(0x7f0000001140)={0xa, 0x6, 0x250, 0x6, 0x5d, 0x1, 0x18, 0x7f}, 0x3a, &(0x7f0000001180)={0x5, 0xf, 0x3a, 0x4, [@wireless={0xb, 0x10, 0x1, 0x8, 0xa0, 0x6, 0x40, 0x8, 0x2}, @wireless={0xb, 0x10, 0x1, 0xc, 0x1, 0x4, 0x8, 0x9, 0x5}, @ssp_cap={0x1c, 0x10, 0xa, 0x2, 0x4, 0x10000, 0xf00, 0x4, [0x1bf30, 0xc0, 0x3f0f, 0x0]}, @ptm_cap={0x3}]}, 0x4, [{0x4, &(0x7f00000011c0)=@lang_id={0x4, 0x3, 0x415}}, {0x4, &(0x7f0000001200)=@lang_id={0x4, 0x3, 0x1404}}, {0x4, &(0x7f0000001240)=@lang_id={0x4, 0x3, 0x4c0a}}, {0x4, &(0x7f0000001280)=@lang_id={0x4, 0x3, 0x810}}]})


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x4004667b, 0x0)


modctl$MODCTL_LOAD(0x0, &(0x7f0000001300)={&(0x7f00000002c0), 0x0, &(0x7f00000012c0)=']', 0x1})


fstatat(0xffffffffffffffff, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0xc6a8)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r2 = dup3(r0, r0, 0x0)
dup2(r1, r2)


__setitimer50(0x0, &(0x7f0000000080)={{}, {0x80000000}}, &(0x7f00000000c0))


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
pathconf(&(0x7f0000000000)='./file0\x00', 0xa)


r0 = socket$unix(0x1, 0x1, 0x0)
setsockopt$inet6_MRT6_ADD_MFC(r0, 0x29, 0x68, 0x0, 0x0)


_lwp_setname(0x0, 0xfffffffffffffffe)


paccept(0xffffffffffffff9c, 0x0, 0x0, 0x9e7d301e33f443b9)


compat_43_osetrlimit(0x9, &(0x7f0000000000))
compat_30_socket(0x1f, 0x5, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getpriority(0x2, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
ioctl$WSMUXIO_INJECTEVENT(0xffffffffffffffff, 0x80185760, &(0x7f0000000000)={0x0, 0x0, {0x0, 0x4000000000003}})
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
close(r0)
r1 = socket(0x1, 0x1, 0x0)
close(r1)
r2 = socket(0x18, 0x400000002, 0x0)
setsockopt(r2, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
connect$unix(r1, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)
getsockopt(r0, 0x200000029, 0x2c, 0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000140)='./bus\x00', 0x0, 0x0)
r1 = dup(r0)
preadv(r1, &(0x7f0000000100)=[{&(0x7f0000000280)=""/4096, 0x1000}], 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x1f, 0x5, 0x0)
setsockopt$sock_int(r1, 0xffff, 0x1004, &(0x7f0000001580), 0x4)


compat_50_clock_settime(0x3, &(0x7f0000000000))


compat_30_fhopen(&(0x7f0000001b00)={{[0x1b01, 0x1ae1b]}, {0x0, 0x0, "aab4b80d123c4731d49738c810ed9e15"}}, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x2, 0x0)
write(r0, 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
mknod(&(0x7f00000000c0)='./file0\x00', 0xe000, 0x0)
undelete(&(0x7f0000000080)='./file0\x00')


compat_50_nanosleep(0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
setsockopt$sock_timeval(r0, 0xffff, 0x1005, &(0x7f0000002100)={0x0, 0x6}, 0x10)
sendmsg$unix(r0, &(0x7f0000002500)={0x0, 0x0, &(0x7f0000002440)=[{&(0x7f0000000080)="d657d1009d7dac7b3a673dface3180884525710623d68f79e6954451e16a6eab05eca656a81d4f8b9c5793ac68dd72d4fcef7b4b456afd048eedf0c3bc5aa740aac9e73d7dc1ef89be", 0x49}, {&(0x7f0000000100)="829a16ad8520a9be63b9bb750f8b1b2dad796eaec3d5141d925469e7e63c67685dc69c3017aa6f653fbec66365d88383fdf9b1919b0b9ef60d47e53b4e2d0cafa45df64e277b944e28f025cf580588e8d566377c588e15a49db26efafb8ee8dae9961f32189e831672a4f1c133bef91f3ed001168601ddb4e1e218ec5c80b526eb2564f74fa50c311986532797ce5abfb1bf0b4d609b7c24995d7433ab2a61f23337295c9186bc353fd1546bfe7c9c86e53d4ecc793801bbf56b4c88d84ee946137e995eb995816963afa23d8a541a243c95ef671cad53779a4cce265d938b54ec3b77ad18fa3fbf75494c0536684d4c2d412804bab7af21f443e526122b907f41bdd3e47e42dc0cbdb538106edb9d4524d167918d0f97b06950db83bff9052a4840ecfbf98e86c50148c4a24c6c5c49c5c0f9544602698d03a1e0a8d7efb38f6b2f1836f987bb52bc77b091e7fd03322a4aa21c3ce9fa4ce9b848c74f9c1d2525e3b67d635998433046e84508f8b5cbdc7be316e8e61af6d70436691965522b64844cfdb657940e3c7d60c8ec7c7ba44d74231045ad47124547fc1e4a6f484dc0d1fcac5d64a82f5ad9511499ec55cca572c009103d446b880029341d8f71604e6addc086feaf05cfc1ac85c79b47be8b32238ddcdd3a70ddd9386f9c3b1650b89801345740b98edb78eca6cb16fa87584c25318d8c70fd308b022a97b3975e882111ec2b1e7ed6b62a5071bfacd99764e206a93b4f427a8a6a307277abd56c77a459adb6ff16ba87f9c9c218044ae794b7ebf2e89fe52fe350d360fae019c4273795f544c24eaff801a56e95c8b26fe9126fc300e0c43813ab016e6372a486cc41f7273de6a75d05a52e57526bcacb9e94917a5377762a97992ece826f61129279acfbc1574b59b59e689b419de950251616e808544ea25621b8aef5f82b1f0244c34a8bdafc70036b165424d2571eb4f74e543b730845e2c8d44da281c73f62c5a4d9a89454b60f79c15cf217aeb030508064c163b391e7e7ad08057dc132e4ef2a7332e90f53f73001d03ef7fbc70fc3e661cfd46452ec50ab97d12d082dbf82a8b6b749c1afce85f701a733397d9cd6ee8b252dd9744103b229c0a8f2ec15fe40c03ebcb9a6292da8bac792cf1201660b765a0a77b00755202756a45837220c6404ec275f5b792f550ac8472c46054f3c4fadf4e442f2de27b89a2647f32cbcf7ee05dced82ee4cbc704900b8c85834d38537d49711aee128ed505795130b8b8f8a09da3d78735b3c7b3adeb926d38bcdf3fe2ae54efa9546408bf42d557f3e4d0f6fc4a60a1a06b39facf92b5669a46000348526e5284b3366f141f77d016a75b1add4b6388cca0edc45584da3bb46dc8dbfc28b2cbb45e2dbc2444213347d4d37bb5672ef0e48af998581220fa7080070217afc559af44f4b408644bb990c7043c57803be2b3d65458426faae9f808bb9f4fbb44ab0f8b91590199488e77a967489d532a835ee897ac407677e1f398b1d9d782d9780629c44f6dde288aa029b63b614995329f3fcec3d502cb183473ef1905c2490e77c5aecbd60a6b568173c686d2cca9b6d434fe66474bdacd53365323e485777ec1fbe341b1f6c2f6db1e28a844e8f1e0b77a341f19b3e2acaf77d772aef0920c846bc375b54a3e05e688112b64dd9dd50a004db083d641768ef014def34fdbab61c7416125080f79761cc9de6dbe32c8b56a285d1bb0c961193e4c82924ff4ed2bb99904ca66adf1c68429de40b1b40ca23e12479ab94682a4b65568c09f8c810c7f7a592b0a024546a528b4a1deaffc0b148731e98a5f8b2e760abb6cebee687ff8a08c74b665d73445a334d49b2c4c37254ab2c3fc9c5d968de58d83c661a90e2f78ff7863a4c6e49acbd06accc25c78133da3bf1a5f14b90636b381b6785a4d58bcd19467dec332bfdf53b520cf315df7d0120f7dd428b56a2619547a00c9e206c0f8f5761d3ab15f3a4eff7ef80a494da85c6eb316d9679283c94fbff96742b9ef9e64ad361372ffb699e894a82501d51857f889aa9309ab72f069a90ccbb77599f02503190d35fde072adcb7eb06728cbe17ca19d82c6ae603727eed910db343f3dd67ce1821a11eeea2669c72aa0f36d46f06cd3d6bea5b41c8ca5a16d610636c19a60114a6775ad51a331413cec467a39ebd060ac8b46a86117002124af4173ab51616a66328990e7f6d074bb195dcd06014ef21a44fd11af6d9756d0aa64d17f06f52fe4226a099ae4a2b75623f7c7c101995e1546eb9cd380202661916cd746180ac3efebbd429e33cda93c2091a0ff414f41d6cbfe4f2f1f09cbcebcff40d1bb480c5bc22ca2b36c7f0d5fb289a3017f7d5800c3632901c1d8e6fe341d9d063a873dc7032dddf512d4f288a7017dc4912fb56a875218cc5c31932ffaa090fe5253d03c06c03255bd035fd0f0a5ebe13c0fd06c339921c508154a40bc5a6a8de83fa868a0ef46a52019cf809b1f5358f1c9299109e1848ddac8764b86647922813e62ba6edfd118ea1a7aab7dbec6caa7dbd6904a8c44a0918edca2f43da79f31fb59e6b94525162f49d8d69ed79e8f6c2ac283a48f0c2337ebbda064a262877cbf40c89e6edb62512194091ebcbc7c385d77c49a77c2ca3a7123c3c59e40e2db03eea4ca7f903ea2201c3186189ec0c06795f265f678bf8a22261924d12e25a5a716862c7f3323ba3b4602e407b204ce47a592d9a7aa2689de280b8f3b792b7514343cab8f4393de84256f1161c15c2089abf6cd39d15fab8b2f3fe153e3123c7fe2f82a66b5dded2b62f117cac2a06c267c95962ce986cfbddd41d7b1aee157593a81cb7dd81143f02f939e54b6d930ff19100b24b886651981024a8aadb7f627dc9c90b07b7d2282fd7820aebc61bc3bc17c65d1c03ac1e6e6222107aefbddb0e91c8e2b0ef27c103f43dbc02f2430cf7e1095198c5cf5086ab41ace15750c4dacec958369329d0f065ea3d1e962e44095ff7e428b5808028f051f558e56dc4997ef3a47c975fb7bae100a3fc83cc34bb069fb72d0a74ed354bb72f9c3f0323addc87bf6de6f8ffedd3f76f1d73f6497b15b833e1e90bb111bf067c576b909fe3bc80cd3429ed941245bf3570c7ac4d3397b284862c3b3f4f5b6dbf8a7d956f46987b41fd8151d44b75fd1924334518e56bef67985c4d7a8ea2fe068ac7df878824ec248e05605cd1d8fcb8ac32e5ebf69fde4f48fa9ea3afe53a12b5ae1507eb61c3de919cb6b67a22813b85f4ee7bb9253eea98cd7bec5a2e828c19d1eb58c67d83c98277d4e11c7d6d81ce8eafbae1d1cb5afe00717b650ebae364e0763e9381f0fdbcebc66c49905c37e27173e25eeb6ce2a1143fb135a9cb31cf46aa156faf4cf4f6b656282bdfcb3e6202622b2d9d26b8ee86af4d82ea25db1df0a281d2269bf20990ff117b9e3930189201b04be8247882a165b3daf31d1e97953eea5505fca5ea07fb25f2a3aad2431af0671c2d64fcfad8d0ffd04f9ea3449d78ea377f3fab88714f3c0267bdf7ab97840ecf6df7e2ccf5237436dec210fb7f8eb7804732e29ad40002dabce27927d6ac59e1b6bf66c7f6a242dd8accb44a6383282fc058d38885e2e1f9a0fc562778b6b0c6583560f48c5568a97837bf6da8bcedab3ccca947769f321b6caa2eb62094532220e05261ef73b1b539f9886fe4034262fe88a9dc123f73749ef30651f8ef67cf94343b86233c5f7d6d38071361d3f1ac27f6749a733c6f88b7b3b7f7ea09662c350d69f7a2d5c58a4df0462d1fdd70bdf680d6a81548b342bfa727ce61fc9a62dc7734b7d8e186bcbd4b1a886957db7e14405b0bac715518c4313cb5c6d9474a4ce9245a6b882415439802b7b1de8ae50fd1e19459908be1d1f29b2710d233f1d6366cb6fc5e8588ba975936afba32ea562c65c9c4f410472e4de083f1c80acd387b14cca58511c1e13839e5b7768f166184f490b48ab4879cbf3c7cf7b0b6c0e144de00c3c5323c665f87d3bce7fd49419932e14661a0dda7915cf3b3c0a1589aa3640695f02ed41a7235f595a71c770e446e57c14c448ab2f1a6ba55d0f9e2414dd0a37bb6ee58afb0a9b567ffdbc99272c6d83cd9b681fea9bdfb01dd8d17ea128f42155f979d0a44cd88e343d0e4b0fe8fe0cb83f143f7cebfd4dcd9be4288517ed71991adc2fca30b07449f901b52d9331f7e452a1619f42e8bfc4bb5c6ebf7e16f4b5da6ffe3f500825fdb49f79741e0dab193b304e181fb041add0c280cc8c06843a7127506ad108cc991a55a8dc83ffdb60eda24c2ebafeb375f061a0fcfaf062e77ce04654ce575c2dc1e0fc9dd651bc7cb77b56cb37c53d61e92bc7a5d7ecc60b45b04264003d9f5e1e736ff8dcc505000c899f95d0b1e58cab66a4024662704ba977fa0b78cdb816b40e88b7d3d390698caeab694990ca1ee88d237750c65f90429ce56663c0a3d7e00ba3a14d656865caf8f5505d8216c17c634fff266522da5822eb9be1666b7ee88c9e3945905e1477b034839faed1c3665f0c402607f537d392a3c7f3ea4d81e3bbdcb312e91178c064bb602f404f7ab6e24508e60575cddc2974f2007905fedf7dcbd5f9155acbbe55eb29bedeb8cc6087f04003883b0761e00f427ae1ae18a9a03a8e370d5b0f7e9d11c2d7b718864f481d1c17f18de3ccb976fdc73184313de1a5b6adb5b7884909335a101ab4e0a276bf9ed38d1a3ece0bb4d6e6a2d326104a28d5cf2d85ceb952df7c26713ab28d3bd6d6c44c9afd58882315394bbd98ab50d97a6ed2b1c1502c5cc803a7ea38b3879639f49e3bf185aab23f07a50823105ea562b05c0f0ae8cc68d92e233baf43fe2ff19159a1cb89816efdcd99409696007ce31d4adb3ce832ac015b1ca265947226d489beff5101c96767d2f909538ae7434335a22cb9808eb1ce33a9375d1017d0a9cd5a94706e5b5fc5fb75b04d375019616669c30125fc1400d1c086ded04e2ee4ca1a6b44359f0a27f23d02e5caab97e94e10407a1b916cfb4fc8bf3a0cd235427ba8a675db9e5742835bf101880475a05e1fd5e0953e48b9e686f64f8dac9113cda26b81f837489362461879801beacd9c8e96457f0c23d4873840b3f89106b4dc4357f6e619bda074e31a551836086ad6afbd789f94b6de13fcb1adfa5c5dd46ad1ba2eb1a6812b2f6fd5bc29a0157c9bb11ae37cdc65fa044506f14ed8047d4f766744bb2b307916cd714a22b3e70cb69659ae5529274fbfa315fc0d4a12c3459d9674b4d23e14df133356741d39faa81a188c0fad22a0f89d17f1d0be6e940985327f5675e5258ed7711758c3b91e1f35b06f7953abad4812373ca0774f4fd699441b341187f5773d3f7f6b6e777485283ee3047a1299d6d4007944d6fbcde13358140ccf6960d58d4aa2f5e985582b8d1166da96fe8692d2d6b7f3dd0d5bb5c3ddb21a0a205922ec85c5cd59828a39287e29d7d350f7621b027901705bfe45c78608257e5e9ec63550fa54232f4dc23752c68407ec95678d79599d2542205c64937a3206927e1f982def8fa0030b8a1259f337e5009145e4407822d93ef8f9a5ffd8ff21b35cace76b174b77bdc3aad54510a79a9756fa41800e86098abb495183b236883e608b6d4bc15cd6c47195f76ef6a111aa41ccc95be6de0a573e98d40b9e26c1ec2a5600c0284076211ef018802b3bcf5f1358ca76c50588b54a77b0f149f0e851693698b8dd4d2d5a6c8aa1d63c4dd012e3122992d3f182330123847930ac6bd1f582d0279b60881a2", 0x1000}, {&(0x7f0000001100)="cd892ce8d9d0a1cda6abac69352a2c00d7fc22a6cfa0c43100e080d3e73c578d503ae2e1af219b9bb5613ec4e5ca4a46cb3b8e20b532474f0dd24a959aa0e8631a19982799e57756f6ddc99659ae668ad3e64eb4a6ff1589f016e02d5a030324528b88ac0790505e7ab8c10c3d0adf5a4523a707862e75e6dc0841da6db879ffb6ed512f63a94fd9b63b8522c8a16f2cc2b3a653b1c4a04a698348e64616af6da1356ed66425d4d1b361daabb3f535026109d223cd9a192757586f386e2efa3c7887a9cbbe06f5cf20ef044f37095576d4b567c3865e0da14a978fc857bd0c", 0xdf}, {&(0x7f0000001200)="31828011c5c94b6275fe0900a21268b1e8be4e1f6457006e13b8ceb2b4c0088eddf1cda929ac20849cf2c23a6a28d535e87f41c647b24332e63e4a288632f76d36dc7a50c438fb085574d150d199409153623922f593916c49b60d374d387712ddb7bd22734f", 0x66}, {&(0x7f0000001280)="0673aed56aa4e60f2e34f1ba218654761e1912807cf77d55361ed42c81082de6b6e8b741de45f32d8d3f3558adba5051eaafcf91c1f8c358b67ea8de9d77f30e01bec931d9810ed23b1bc7ff5f7ff86d79feec179c9403316faa299c36e33bd86aec0c60e710c29e96b75ea7b2c9ae620a63551ad535df5adc382bdf20aa85ec5ec7098df7cb6aa2df968d7da743d86be18ae45da7804d5edd322b760e89ab303b7decd647395dea178b36e582de19cebd176976917c7acc8ddb1ab423c5e11386a25cf36da374ac9f962db52b3028e6aee2c4c36cdc912ad9b4ce6eb4039bbd3be5c5379ad205f24d70e4db520ccdddfc3d7b6d345e1d9381945bbc22d3aa8f79c8457ee0433ed0fc10eb24ec06ecdce31d748da3f3d1cfe9fed5bf60cd33beab4946125779192324d7e3052d628901cdf24044911928d5ddaedad10e0ef91436b4f9165cfc86e30ee9c6f7ec6fc189a8a1a72d4bf21a356c21931ef63c488fc6276477ea83e3b8d86ebfb28d4cc369336ef031b272986ea62608bd226b8bba9f1542af5ff8dce7e00aad12bc29ec5f5917400844e51d9a0688e56633e2c8fa3482fa10205b26a10fa1d375e091d19276512d5a777ca7334f3d5f0799e3b9c482b24d6bd79a4f9d9927f4420b7b341982b49c986bf54c406ba68cfdd61b34771eb7383a98214355b038c16fb84b926314f9f2db2c010488725f35ad26b481cbe7fcdf8ed6e647dbe2a1f8b3bd90e6635e2b7c06876f202f9ea639974471a3de9f1e879d63d8db5011af46880292b47108d3cf780ac4b0f69e759c1b0035b7fed21ec4d985236c8a0e00a36e637a58af216d01328d6fd93ea1305c15db46b1c8fc567712d2320249695edbb4a44ab367a9d9a7e16e67d81ad6ac9b40bbba337a51ebafce76a6a025addc7fb57872bd75696fa704a72251936688e94b89b037ed1e2348198a52b859b5cd8844e62c4f42f8209a96a5238d964b393e0a581c6a3b7e83b2156a391b3eebaee412d14745c327d7cd23a1b7f52bd6c50615ed916893e18b429464d5972cd6f8233340ae8616020215e0bbe5802087dcafcff9647ccee001cf3a148aec1c13a944b78c40833fe35173700a50b2d4b7fd551b789ea46c05ba1eeaa9fb1cbfc6cb5b65c86c5751c063e1f379f503b504e01dfa8b9d50ef57cd0b6a4a10b6fd668251ee27cd12b43b4e8bf838a2b6ee1e6abcc5420d0ad17867ece275cb40d64e8c55481cc82395e571e41a05a302eb483f1d3c82d8085eb1efebff5a888bfdf1b9b1c8a244bc016c913041e89f7b7dd488ae8ec47187f32a638504002d065f7c2c80d5bb4a39df8ec550930ca587247a48eb7e812ebb16695d97841a88db65050beccb571c859eaa6eb4648703a8c265f46b067e5c02e5d96a862d1c7d9fd53684d7c33ed9f0c1aa22938779826f9db5bc5f571509a9bb98fc12353be18790c25b518f61caf6f3200e5b47b7bcbe3b5973b1de9b450405f131bc4b0b5aee4cfbab3f6197adb82621a25389dff63b886653c83281fce47508f89e7915bd4ab3eec64fd6461e0165e94aa9386c4e98295c9c3078a3a999fab01bdc61df7f7e998f171e7955287168fdf56d735295fbde7c882499b9bfa6c2b36c0f48f3e1f3139a8ec720ce364911328168f6c40a7f29728ed4b2fd5a428e1f316bb56455298a7d77f711091c21a8602b951374da6ba890d8425c1c6c12363606d723c60eeef18256fd8fc9349664ef113e06ecf2a2b212a04ca7868a96d789593d2b0b2829559fe4a9e3793bd26c54806e2d4f6d10055e1fcf607375c60ee4f23c1ed931db256a3d014583654ba1db3293b859c877d2c90065aaabcc067576d223c681d2faaaea3d858226e1092cdcbe545e020086c41102c02cf2ff409ab16436b75f76221281898609e00813e6781ed8595f95ddd2909d96483ac3ece167b884bec2586aacd2a154bcb827e87d4b248ce7b191533596e5339718f99576231bdf166e6e55c2aad7edec06111067269560893df697f8f3b88953be1f26a1df3debb3067b08e6abc47cfb80100e7edcaa2aa3571b4348f43a3c391e13ee990f4d0e0bdfb4b0b934e5215e244001f8a71a21ddc6c8fa3631a0928067d2934754db11998f77f0946bc468efeb428c0368b99b26ca86a6fdde4d736e55002efccfda846e5a56cd8d31dff58aabac5555d4358648c36156556e55bb83743f7bb5fe73b7ad41342e7ddeb5cbbcc7316ab0e32a36abca29955f9e554404df28c7e39663ec71dca1d6bc48e9c5fe06e2c1ebbc45ebaf39cf14d7cee0e19c1389c3abe28b53143ae725a02de12cf81331b60cefc2003b7a8193c41f8480f002bc2dd9ae086f9841a0dd4fe1d546ca44d020b71876a5bf0caaf6bb15730a6fe9eb871960c40f754687aa85bf438ed071bf840feb887558029fa46582dc3c20a4190abf99b90c0922d5b4d36012b39261907ceb8e7cc060a0db932c41597fdb1dcb02b564b76315bfbadc95c5f9c060b6b1e3c5c9bcd1d5a30ea063224076664a2dd1ab34d36f007bf4102474d55e7c8b76a98f2865edf949c6fe5d66733b12f7bee28b5bac584c57b152d8ab6ce585b80ba36bc73d7aa49fe3e7401d2a5c52475e08f40a215542d3f37f34444839be3fb2941b6304903bdeeda735dc174a3974ccf1a6ddf863c5e115f47e15d02664015f2b9fde7543acdcb2baefa0928cd8c41aa62cde1389810e752b7d02a506a6d10ce80a9803667199c764ef51a10238ca856ad21b4f2e05362fbd77459c7dea77fcd1ff37391d6f0b24593a22558cc9935ea1ebc86f4638b06d4b7707d798f4478d9756e326a0be9b750b4bb893a9cb8cf7069f076e932fcff59d3d042f4585ed2d32284bc540a00b56b2d04d4f4c7e80dc53c203b56fd851acf54c279f3cf67658abae555e77c61bbf3d055eb10e7d90790fb8dc3770ccadf980334387d16052f8b68aa67d8a1f1551e883793dec4d574b53f846d8d6091b9078520486194795d36271ada54820c879f15d81b8b68ebe6eaf63f5e873eb92ac7ad815d1a2f5e6c922c6db64412726376a63d3d1539aa37c142a12962284e10a2f98a56c4f530a2fcb8a5615be53aba3d5c9d347a9e6ce99ef7f6d847442137084acd817a0081b8c70271707fe68d44a068c58263890743aa8b48a8b4d575631c4871c4e4ea0fa496a91aec8abc0b944316bfb87c79bb17566455e53bee15e78c4ec21fe5eee9cacae1f16ad2da0dac5b1b3b10e9e2e71744793ec5b3a8628b6db7213dc1fbe3e05ec3f4b790d7177f511772863bfe010811845676268999cd0929d181143700d0c659d359d9711a74126c28cc263ef2b55ae1b41099249d9f1e7ae7d3a9ea3ff3a111cb418ae7918bcf39b927aec6375667e97451da5c314d99e6532e034420568fdd7c9e3e1bcb1910ae04c50e499b3c8557e7234b098d9791016d0b9560ba547e8dc678c3ac268c84b4dc202be97b21323d5abac5791472ad719110d05e89862b45384fdc05330bba560c4235115d50d324d1f7bcce07dc9c81c5c7e8a39efa0d9872ace372a0a10206519a82712baa07549a43849b936e5c23311022e99a79a7fff48c5c458a9c268168e23372974077eca6e45b264cf973df64f2ef9162b42747fcc09ca46ab82f1fec8a0a5b31cdff1b4af91b2bddec36d184fc157076ea7d7fc6c0720030fa71c4b30c2ddcf6714ac6c0cc9cc6707457e2af1d248501d5e7df9e081aa9d9f8cc234fdc9b91e8052fd15d123f1ff250d081a3acf7162bc2115d4f6bce3c18352defc16415dc4bb1420da0e39b08fb631c54d681eb136926a53fcfbfc53c6eac2fc8954bd06ad56f83f2fb43d965653e11b4fd3dc4a536a1484d17af4cc200b388858d2c89cc45647c6aa3bf7e953ffaa94ba32661837dff7d30eb261a3f217e82c9cedaf192eb604caec1d4d5319353aa9c5d169fc3142852bcc5f628cf55c0bae957abae8fc329619ec84bc08d8c82d6f412e2e2c53b5e42e96d16dad86aed624951c88be6e51d3f7fe0a1a6121776f090d4c1640a0fd87709f7d5a526c010f2e69a0175c087cd4ed354f2ae97f7cd4943d498eb07a7b1634c3193969a72cb8f82f399001b9c464f16a541b08d2547bfed5101d88f7236c91488ede9568f53785317dfab082e3d5677c375e55809216eff70e59b16bd120b25677a64a58d0dcf35110cc0ddc8c418594068d462db8d1707ac3f26005e86815f44863b7ae12467caecabf40bfbae9abcaee20b7e1f9a99ebb38060aa12c541f8f8e1db49ae40a8be00b1600cc003475464263f97c6c27fefd5543bbbe759fe032795810bbe280876cc79000ae44655bd779b6f3d31f3325c0a37fa3815fe7e99dea441038936e8e336480440160e8324e129e4d2b4f77f28014a7c1ae0e2b5a22cd41456c3197d523b67429fb4172d114d8c7138851c4433f6bceb003a2fbd435fa14839242e4eb226181b72481fae0a308cb9e393f6aae485e33a72cfc39e3be741f61b4554579c676b6d1a4d6152141d9419c732b6350d4f2d5011e2abad2d38458616528ae6120b9eb28197f7d8cbe20440d519d4f07e524591c2b3a093a6616b02be5b97a066e437c11e158e57de6173962ddbc1a61b77d3c21fde1cfbea186d2e13b34d8a5b5fc96cbc5b64d2e2ec93ead6bcba957f4ffbc4d9237533071439629e43e69b152ba5bf012b7b5ce95700fae639dfdd05a4812e46bc3c936d6cb46d7ff0ba88cf7883f44fac925922595d4980fe32729c0997db723f933f6e4b0b59165137c14df302dd578b3d917786f205b7ba6da5da68cb3a66b806c3f307764bc7094005abdfa54cbe39abce858f202ed4bc0b12619518dd46d09b090b210b5514d0f63ffcbd01da1f8fff438edd9d640d8245e53c6dcb657ce2c47b59e9b2cad9e835accf0eb4ede7ef217f022fe6dd7dd39bcebabae6a65a42fb341a6f889eb3a0a967f41762512e18f2b748f811f837f61b850168ccc73354dea2d0cc5aca15815414bdfb12dbf05d2484cf6ffdc53735123d0b40fae019ce669a4a5d514967d47bffb68c00d99d9d3bce837fe89c3e1b6afe374451a61d87acda916f928bec0a919ac9a1a89d19f546e9325f68328d1a8aa63c5fd92e1704fe8b521434a6e7c196e0835d79949b7576f7a3947aa32fd8d12796922ebec4b17940c90d07fdc3dfc2e4725378c62ebd1dc43b8e76a37dca4b9c0fd3d9dd52acb845ad39d033b", 0xe73}], 0x5}, 0x0)


openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mknod(&(0x7f0000000040)='./bus\x00', 0x2003, 0x4301)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0107705, &(0x7f0000000000))


mknod(&(0x7f0000000080)='./file0\x00', 0x205e, 0x200)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r2 = dup2(r0, r1)
r3 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r3, 0x0, 0x0)
pread(r2, &(0x7f0000000100)='#', 0x1, 0x0)


open$dir(&(0x7f0000000040)='.\x00', 0x0, 0x0)
mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
chroot(&(0x7f00000000c0)='./file0\x00')
chroot(&(0x7f00000002c0)='./file0\x00')


mkdir(&(0x7f0000000140)='./file0\x00', 0x0)
mkdir(&(0x7f0000000000)='./file1\x00', 0x0)
chown(&(0x7f0000000080)='./file1\x00', 0xffffffffffffffff, 0x0)


_lwp_unpark_all(0xffffffffffffffff, 0x4, 0x0)


semget$private(0x0, 0x3, 0x228)


shmat(0x0, &(0x7f0000ffb000/0x3000)=nil, 0x5000)
mlock(&(0x7f0000ff7000/0x9000)=nil, 0x9000)


r0 = socket(0x18, 0x2, 0x0)
recvmmsg(r0, &(0x7f0000000240)={&(0x7f0000000200)={0x0, 0x0, 0x0, 0x0, 0x0}, 0xfffffffc}, 0x10, 0x0, 0x0)


mlock(&(0x7f0000ffa000/0x5000)=nil, 0x5000)
minherit(&(0x7f0000ffc000/0x1000)=nil, 0x1000, 0x0)
munlock(&(0x7f0000ffb000/0x3000)=nil, 0x3000)
mlock(&(0x7f0000ffd000/0x3000)=nil, 0x3000)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000001280)={<r0=>0xffffffffffffffff})
compat_50___msgctl13$IPC_STAT(0x0, 0x2, &(0x7f00000020c0)={{}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, &(0x7f0000002000)={&(0x7f0000001fc0)={&(0x7f0000001f80)}}, 0x0})
recvmmsg(r0, &(0x7f0000001d40)={0x0}, 0x10, 0x0, 0x0)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x400004000011830a, 0x0)
chflags(&(0x7f0000000680)='./file0\x00', 0x40008)
write(r0, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
shutdown(r1, 0x0)
sendmsg$unix(0xffffffffffffffff, &(0x7f0000000400)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000000)=ANY=[@ANYBLOB="28000000ffff000001"]}, 0x0)
sendmsg(r0, &(0x7f0000000380)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000000), 0x28}, 0x0)


mlockall(0x0)


r0 = socket(0x2, 0x1, 0x0)
r1 = dup(r0)
ioctl$WSDISPLAYIO_GET_FBINFO(r1, 0xc0485768, &(0x7f0000000240)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @pad="62ac03fe34cd65dadd5c39602807216580036e05d6f52eacc823d23d2eeb1521"})


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
acct(&(0x7f0000000080)='./file0\x00')


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000001340)='fdesc\x00', &(0x7f0000001380)='./file0\x00', 0x0, 0x0, 0x0)
pathconf(&(0x7f0000000740)='./file0\x00', 0x5)


r0 = open$dir(&(0x7f0000000380)='./file0\x00', 0x2b1, 0x0)
fcntl$lock(r0, 0x9, &(0x7f00000002c0)={0x0, 0x0, 0x0, 0x100000001, 0xffffffffffffffff})


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000040)='ptyfs\x00', &(0x7f0000000080)='./file0\x00', 0x8008020, &(0x7f00000004c0))
__mount50(0x0, &(0x7f0000000100)='./file0/file0\x00', 0x0, 0x0, 0x0)


fork()
fork()


r0 = socket$inet(0x2, 0x3, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f0000000380)="f32c0248a241ca0aab6a17ae6587799d640e11362f980e911da1a8a18648825e8ef66c1ee20bd0ed2ec8988d", 0x2c)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__getdents30(r0, 0x0, 0x0)


mkdir(&(0x7f0000000000)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', 0x0)
rename(&(0x7f00000002c0)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', &(0x7f0000000340)='./file0/control/file1\x00')


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x6000, 0x80001203)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_SCURSOR(r0, 0x20007288, 0x0)


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
truncate(&(0x7f0000000180)='./file0\x00', 0x0, 0x6)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
truncate(&(0x7f0000000000)='./file0\x00', 0x0, 0x100000003)


socket(0x11, 0xa, 0x0)
socket(0x11, 0xa, 0x0)


mknod$loop(&(0x7f0000000000)='./file0\x00', 0x0, 0x1)
open$dir(&(0x7f0000000080)='./file0\x00', 0x40000, 0xcc)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000001340)='fdesc\x00', &(0x7f0000001380)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = dup2(r0, r0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r2, 0x0, 0x0)
sendto$unix(r2, 0x0, 0x0, 0x0, 0x0, 0x0)
mknodat(r1, &(0x7f00000000c0)='./file0/../file0\x00', 0x2, 0x7)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x2000a71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOSETOWN(r0, 0x8014447e, &(0x7f00000000c0))


ioctl$WSDISPLAYIO_GCURPOS(0xffffffffffffffff, 0x40085746, &(0x7f0000000100))
r0 = semget$private(0x0, 0x5, 0x62)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r1, 0x8008574f, &(0x7f0000000140)={0x8, 0x1})
semctl$IPC_STAT(r0, 0x0, 0x2, &(0x7f0000000000)=""/239)
ioctl$WSKBDIO_GTYPE(r1, 0x40045700, &(0x7f0000000180))


r0 = socket$inet6(0x18, 0x3, 0x0)
setsockopt$sock_int(r0, 0x3a, 0x0, 0x0, 0x0)


setsockopt$sock_cred(0xffffffffffffffff, 0xffff, 0x11, 0x0, 0x2)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = getpid()
setpriority(0x0, r1, 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='efs\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f0000000180)="8a", 0x1)


pipe(&(0x7f0000000400)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_30___fstat13(r0, 0x0)


r0 = open(&(0x7f0000000000)='./file0\x00', 0x200, 0x0)
compat_43_ommap(&(0x7f0000ffe000/0x2000)=nil, 0x2000, 0x2, 0x10, r0, 0x0)


__msync13(&(0x7f0000ffc000/0x3000)=nil, 0x0, 0x7)


_ksem_open(&(0x7f0000000040), 0x0, 0x0, 0x0, &(0x7f0000001040))


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
poll(&(0x7f0000000240)=[{r0, 0x4}], 0x1, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80044270, &(0x7f00000000c0))


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000180)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
recvfrom$unix(r0, 0x0, 0x0, 0x843, 0x0, 0x0)


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x10004, 0x0)
ioctl$FIOSETOWN(r0, 0x40046679, &(0x7f0000000140))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
sync()


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000000c0)={<r0=>0xffffffffffffffff})
pipe2(&(0x7f0000000180)={<r1=>0xffffffffffffffff}, 0x0)
dup2(r0, r1)
setrlimit(0x8, &(0x7f0000000080))
accept$inet6(r1, 0x0, 0x0)


r0 = socket$inet6(0x18, 0x1, 0x0)
listen(r0, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x1, &(0x7f0000000040)=0x3, 0x4)
bind$inet6(r0, &(0x7f0000000000)={0x18, 0x3}, 0xc)


r0 = socket(0x10, 0x2, 0x0)
listen(r0, 0x0)


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x1203)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETENCODING(r0, 0x80045710, &(0x7f0000000040))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
chown(&(0x7f0000001100)='./file0\x00', 0x0, 0x0)


r0 = msgget$private(0x0, 0x0)
msgsnd(r0, 0x0, 0x70, 0x800)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETKEYREPEAT(r0, 0x80283103, &(0x7f0000000100))


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x1b, r0, 0x0, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
fktrace(r1, 0x0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x100000010, r0, &(0x7f0000000100), 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
madvise(&(0x7f0000585000/0x4000)=nil, 0x4000, 0x5)


r0 = socket$inet(0x2, 0x3, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x6, &(0x7f0000000000), 0x4)


r0 = fcntl$dupfd(0xffffffffffffff9c, 0xb, 0xffffffffffffffff)
read(r0, 0x0, 0x0)


linkat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0xffffffffffffff9c, 0x0, 0x0)
compat_30_getfh(0x0, 0x0)
__stat50(&(0x7f0000000280)='./file2\x00', 0x0)
open$dir(0x0, 0x10, 0x8)
open$dir(&(0x7f0000000480)='./file2\x00', 0x200, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff})
getsockopt$sock_int(r0, 0xffff, 0x1009, 0x0, 0x0)


readv(0xffffffffffffffff, &(0x7f0000000040)=[{&(0x7f0000001200)=""/4100, 0x1004}], 0x1)
r0 = socket(0x400000000018, 0x3, 0x0)
ioctl$FIOGETBMAP(r0, 0xc0106978, &(0x7f0000000040)=0xfcffffff00000000)


r0 = _lwp_self()
_lwp_suspend(r0)
_lwp_wait(0x0, &(0x7f0000000140)=0xffffffffffffffff)
_lwp_wakeup(0x0)
_lwp_exit()
_lwp_wakeup(0x0)


r0 = fcntl$dupfd(0xffffffffffffff9c, 0xb, 0xffffffffffffffff)
ioctl$KDSETRAD(r0, 0x20004b43)


connect$unix(0xffffffffffffffff, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
r1 = socket$inet6(0x18, 0x3, 0x80)
setsockopt$inet6_MRT6_ADD_MIF(r1, 0x29, 0x1a, &(0x7f0000000040), 0x4)
close(r0)
close(0xffffffffffffffff)
r2 = dup(r1)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)
write(r2, &(0x7f0000000100)="8d17720759c2462f707c37abcfd5100f1ab8bbc871a5f46199f3a21567633f4e502851b08e3f524a983351f801e2862000cf35947cd46a368abc98ec77a2b3e4e88f9af3cab146170bc7c76164b14bb43c1cf1fe0bff0534e2e538d459f508e43877cf0dd2e6a585a148a6541731957aa02716699c74f67b7a6e7e03952eeda15944403bb061150e8d9a14c1f658817bd50344620369fa63d8628b7b014e15c3", 0xa0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2080002002, 0x40004200000028ac)
r0 = open(&(0x7f0000000400)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80047458, &(0x7f0000000040)=0x7)


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x29, 0xd, 0x0, 0x0)


compat_50_____semctl13$IPC_SET(0x0, 0x0, 0x1, 0x0)
__fhopen40(0x0, 0x0, 0x800)
socket$unix(0x1, 0x5, 0x0)
fork()
sendmsg$unix(0xffffffffffffffff, 0x0, 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
posix_spawn(0x0, &(0x7f0000000200)='[,$]]\x00', 0x0, 0x0, &(0x7f0000000580), &(0x7f0000000240))


__mount50(&(0x7f0000000140)='coda\x00', &(0x7f0000001180)='.\x00', 0x0, &(0x7f0000001200)="b713", 0x2)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = _lwp_self()
_lwp_suspend(r1)
_lwp_kill(r1, 0xb)
_lwp_continue(r1)


r0 = __clone(0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x4, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
ptrace(0x4, r0, &(0x7f0000000280), 0x0)
ptrace(0x8000000000000001, r0, &(0x7f0000000040), 0x0)


r0 = compat_30_socket(0x1d, 0x3, 0x0)
shutdown(r0, 0x2)


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000000)='ffs\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000000c0))


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt(r0, 0x6, 0x6, 0x0, 0x0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x93a, 0x8001, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, &(0x7f0000000180)={0x0, 0x0, 0x25, 0x0, 0x1, [{0x2, &(0x7f00000000c0)=@string={0x2}}]})


r0 = socket$unix(0x1, 0x5, 0x0)
sendmsg$unix(r0, &(0x7f0000000140)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000100)=[@cred={0x20}], 0x20}, 0x484edd0bc0a60979)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0xc1205266, &(0x7f0000000040)={0xfffffffc, 0xa})


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000000400)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETSCROLL(r0, 0x800c5718, &(0x7f0000000000))


ioctl$WSMUXIO_INJECTEVENT(0xffffffffffffffff, 0x80185760, &(0x7f0000000000)={0x0, 0x6, {0x0, 0x1}})
r0 = socket(0x18, 0x1, 0x0)
close(r0)
r1 = socket(0x18, 0x400000002, 0x0)
connect$unix(0xffffffffffffffff, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
socket(0x0, 0x0, 0x0)
setsockopt(r1, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)


__select50(0x0, 0x0, 0x0, 0x0, 0xffffffffffffffff)


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r0 = open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
fcntl$lock(r0, 0x9, &(0x7f00000000c0)={0x0, 0x0, 0x0, 0x100000001})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
fcntl$lock(r0, 0x9, &(0x7f0000000000))


r0 = compat_30_socket(0x1f, 0x3, 0x0)
getsockname$unix(r0, 0x0, 0x0)


compat_50_setitimer(0x0, &(0x7f0000000080)={{}, {0xfffffffffffffff8}}, 0x0)


pipe(0x0)
pipe2(&(0x7f00000013c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
fktrace(r0, 0x1, 0x800, 0x0)


mmap(&(0x7f0000ffc000/0x4000)=nil, 0x4000, 0x0, 0xfb56e9730dac3cf6, 0xffffffffffffffff, 0x0, 0x0)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x4060e, 0x0)
read(r0, 0x0, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e5"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
compat_43_osend(r2, &(0x7f0000000240)="2ef7ba9c", 0x4, 0x0)
sendto$inet(r2, &(0x7f0000000280)='9', 0xfd98, 0x401, 0x0, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_30_getdents(r0, 0x0, 0x101)


openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x1, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
listen(r0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_43_ommap(&(0x7f0000ffe000/0x1000)=nil, 0x1000, 0x0, 0x26f, 0xffffffffffffffff, 0x0)


compat_20_fstatfs(0xffffffffffffffff, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000140)='.\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f0000000000)='./file0\x00', 0x0)
symlinkat(&(0x7f00000000c0)='./file0\x00', r0, &(0x7f0000000180)='./file0/file0\x00')
utimensat(r0, &(0x7f0000001680)='./file0/file0\x00', &(0x7f00000016c0)={{0x0, 0xffffffffffffffff}}, 0x2)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x8010427f, &(0x7f0000000040))


mknod(&(0x7f0000000640)='./file0\x00', 0x8000, 0xffffffffffffffff)
pipe2(0x0, 0x0)
open(&(0x7f00000018c0)='./file0\x00', 0x0, 0x0)


syz_usb_connect$printer(0x4, 0x36, &(0x7f00000002c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x20, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x7, 0x1, 0x2, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x10, 0x9, 0x2}}, [{{0x9, 0x5, 0x82, 0x2, 0x0, 0xe}}]}}}]}}]}}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket(0x2, 0x2, 0x0)
r2 = socket$inet(0x2, 0x4000000000000001, 0x0)
dup2(r2, r1)
__fstat50(r1, &(0x7f0000000100))


r0 = compat_30_socket(0x11, 0x3, 0x18)
close(r0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
bind$unix(r0, &(0x7f0000000040)=@file={0x1, './file0\x00'}, 0xa)
compat_43_ogetpeername(r1, &(0x7f0000000240)=""/52, &(0x7f0000000280)=0x34)


msgsnd(0x0, &(0x7f0000000200), 0x8, 0x0)
__clone(0x0, &(0x7f00000002c0))
semget(0x0, 0x0, 0x0)


_ksem_init(0x0, &(0x7f00000001c0)=<r0=>0x0)
_ksem_getvalue(r0, 0x0)


r0 = open(&(0x7f0000000480)='./file0\x00', 0x80400000000206, 0x0)
r1 = open(&(0x7f00000000c0)='./file0\x00', 0x80000000000206, 0x0)
ftruncate(r1, 0x7e2780e3, 0x0)
mlockall(0x2)
r2 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
mmap(&(0x7f0000e00000/0x200000)=nil, 0x200000, 0x1, 0x12, r2, 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x20011, r0, 0x0, 0x0)
mmap(&(0x7f000015f000/0x3000)=nil, 0x3000, 0x6, 0x12, r2, 0x0, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
pwrite(r0, 0x0, 0x0, 0x0)


modctl$MODCTL_LOAD(0x0, &(0x7f0000001180)={&(0x7f0000000000), 0x0, &(0x7f0000000080)="3c29507ba5aa3e", 0x7})


pipe2(&(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
read(r0, &(0x7f0000000100)=""/102, 0x66)
write(r1, &(0x7f0000000200)='V', 0x4000)


symlinkat(&(0x7f0000000040)='\x00', 0xffffffffffffff9c, &(0x7f0000000180)='./file0\x00')
openat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f00000000c0)='ext2fs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)="e1670aed18ed5bf9", 0x8)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0x82085269, &(0x7f0000000040)={0x0, 0xa})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket$inet(0x2, 0x2, 0x0)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000001800)={0xffffffffffffffff, <r2=>0xffffffffffffffff})
recvmsg(r2, &(0x7f0000001500)={0x0, 0x0, &(0x7f0000001580)=[{&(0x7f00000001c0)=""/210, 0xd2}], 0x1, 0x0}, 0x0)
dup2(r1, r2)


mkdir(&(0x7f0000000280)='./file1\x00', 0x0)
__mount50(&(0x7f0000000040)='efs\x00', &(0x7f0000000000)='./file1\x00', 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
r1 = open$dir(&(0x7f0000000040)='./file0/../file0\x00', 0x0, 0x0)
mkdirat(r1, &(0x7f0000000180)='./file0\x00', 0x0)
fchmodat(r1, &(0x7f0000000080)='./file0/../file0\x00', 0x0, 0x200)


compat_50_quotactl(0x0, 0x0, 0xffffffffffffffff, 0x0)


writev(0xffffffffffffffff, &(0x7f0000000040)=[{&(0x7f0000000280)="903f79d03bf2f62a444528d671e3aebc37421fc53e2f4743c2dea19d5bdcd7cbaa3bcf29c1f563a7bae1183cccaf1abf1913bab8bc6d1ed2e545efa479e612bb6c81323d4773023044c67bc7e3035689ff4b53b1c130ffe3a0a17aecc658494dcbb3c063c642490aee30806f1c365f5a172ec8f5d6a4c99b78fb5861ab18beb73fa8bf6366597edea56b7f6be7ed26e82b14f3135bb080a43b134dbaab39c881bd23f07da9906a669b2dbe56fee74ae3eaa8bd0cf9bac69ac464225f28ebcce3e27da7d61c", 0xc5}], 0x1)
connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="7f000001"], 0x1)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x2001000000009, &(0x7f0000000000)="3c380652", 0x4)
setsockopt$inet_opts(r0, 0x0, 0x200000000000d, &(0x7f0000000340), 0x8)


open$dir(&(0x7f0000000000)='./file0\x00', 0x200, 0x400)
__posix_chown(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000040)='/\x00', 0x0, 0x0)
open$dir(&(0x7f00000000c0)='./file0\x00', 0x80, 0x10)
renameat(r0, &(0x7f00000001c0)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00')
mkdirat(r0, &(0x7f0000000000)='./file0\x00', 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
r1 = shmget$private(0x0, 0x3000, 0x0, &(0x7f0000ffb000/0x3000)=nil)
compat_50___shmctl13$IPC_SET(r1, 0x1, &(0x7f00000001c0)={{0x0, 0xffffffffffffffff, 0x0, 0xffffffffffffffff}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


mkdir(&(0x7f00000002c0)='./file0\x00', 0x0)
symlinkat(&(0x7f0000000000)='./file0\x00', 0xffffffffffffff9c, &(0x7f00000000c0)='./file1\x00')
linkat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000140)='./file1\x00', 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000200)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)


r0 = _lwp_self()
_lwp_wait(0x0, 0x0)
_lwp_wait(r0, 0x0)
_lwp_wakeup(r0)
r1 = _lwp_self()
_lwp_detach(r1)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e57f7f000001"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
madvise(&(0x7f0000454000/0x1000)=nil, 0x1000, 0x0)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
sendto$inet(r2, &(0x7f0000000100)='9.', 0x2, 0x401, 0x0, 0x0)
connect$unix(0xffffffffffffffff, 0x0, 0x0)
socket$inet(0x2, 0x0, 0x0)
sendto$inet(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0, 0x0)
linkat(0xffffffffffffffff, 0x0, 0xffffffffffffffff, 0x0, 0x0)
sendto$unix(r2, &(0x7f0000000140)="e573567e231ba9342aa02c1e742436e1c538b46455416b4f76d81e8cc62c392bc95379896f70e9bfb13d9ccb5468c0ebf397725e432c262bfafaa8bab01f746b03e800c30d64ff6df3558b7df8e6359815969a6ff57e8a94792b8b271ae9c5bfa200110575abd4e4cc1962d63f335f116cce813033123a4bda8c7e2af6e7a859374d347d165c9c3e61ef96ced0e94c0a15d5c7df269946d8276549462732b276aed9c1ab8ccc61c4c113c7ea91c60459d04d1f8be1b84c1c1ced7c69daf0262712d3f2c62e36f6084913fdb72b47bc648b6f30727a41f9256981933b2a6f59e501feaa797c458050c495a65a9a898ae856d8aa7015e18ee7a01724ded374df54d2d04f3e3458dcb3bc65861c3c3cb5235d6b229da81a3a347ea3acbbba21bf166110403193d41d1e78e020b009c9b8dcbc1479d2b6adc5e78016d6dd8854b1f07715889f3de84768de3f21c098d1be8ffbf5477eccd79bf39bc1416002bb367b07c99da7b2e81248695529f76be64905d7a6cfa5d2f3cb86e313e2c26c290cdbce997f3e657c306eb01921010f04aa9f983b49f5cfbb410f4f08aabc787919e46fa4add0947ca82f890915c17010b7179ba100790115b717d8b044be5e4a502645d0d13328f8ee09acc237767a4fbe2b95e1d6784096bacff61777bf2d79c23ba5ad83ef36a21f27f241c632a7e9433361ffbbe338ff1e8a5627ba6b7caf71dae3bd587781ae6beea5d17954c582f56de606fe035aa6359dca55e14c2fd468e046b1696d56661c153ffebc1534e73325d525bc80c33edd523203803968a1bfc2746e80eeb6293a06c257eb41160b51d41aadf34a8233d480315bee37929420a40351736a9a750bc694c8e0b585389cfcb3bb2df3e81d37892188e2ef23a4121358512f3f15bbb27dc480de362a2d15007241f7c36a8ba72a6958d4d120846fae8798ff5759ce293464db85a2a3ae112e7a0913a24e1f11215a143130ab35d05a18ada67624d5fa84313b8684f2be5cbdf5c60104456243c9952aed2d5e22a9f579ca5eecd4ddd41bbf8de647a87a84d8106c8da9897c7a478c3c88ee655a5ff20c22ab47b099c4e09e1d114c31810941280249392fceca88dc213b5445eca90840d9cc2c3a51af7fb1c80d0d94eb38f3507624e4af4f8b8596d339e6848dcc35247f9e39c6448ea1e7d85d1ac3f289169b1ede95cf4511da1de736beb5f56023a469ec27a35fe791e6b7772a7348b77e0f26ee1c84d4aa8a6a41849ad7444ffd0df4ecc1c8063cb0694138ecb8db81cf27d223e6ecdaa233939a5d668bdc1e8d15fae035abb1f3856a8b186da2df146aa3cf17cb2ff5c2c4f6a63b885dda2a19f818b1c68b61677b77840b9c16f43aa801e7527969ef5cdd9713fdf3aefe992b567405668075d1f3e3b62bfd3109c24131c2bf2da716ed2424694a5fc0c15ef37cbe3d222631ac0351a1fdf8ff1d71fb940c9bac5d67b19c90e3dc709782a74e12fa3671321ed7764b1398b5a303173fe916520dc3f521a59628873a875626dea1d3655a86ce767dc75d06e1d0c8c02d34e062d639e4cee5c9a143d9ca34cd6cd4f39660433b34b6ac973bfb41aa62472aa36d47ced9fcb1e6c804a6fedc28fe3079944582a0e9e9882656b9820a6a2522c974b0ffa5596eb3e97aa18d789df53da7c0da1026e0658a7fdb7472dcb0383b094562687f3224e158c2ab01c38317bc1739b1413bbb578dd75cc0c1fad6ded4260c9ae4c3eb89035a732e41280893f9312667351c6c215d764a2720c4fe0c6c4e3770f128a03b4cb7884c81a1b749d9301918fcd04b51ffaa0cee0dd2f0d8fd90618c423853061882d1a82dd07889a83b83c08157ece2d71767ce2ef33f9c1513fe7bd375e76ca13941f6d74cf89009480326ea9811e0327d7030b40b2f374b2f7defd7011603b9504c6c3b2fbbcc50e028126ffdc46c02daee745ad375f66f1d2dc62e0ec410d7e8eb80e7eab2a320f6cfa8987d58ae79f3d5989237d07974545d43950e267fbdc20ddb464f3f8fa4c1033af48e47404611043d93168e39131c3c901f2101fe4cc03e906ad20055a22731e9c1501019dc588336f3b941933dab34be85ebc525981172330511f7e3d2c1e7d4a1207355ca3c462c0ac2f82f661d171342d84f00bbbbddd6043e5114074d83e322e5ae0ac665252a2b950b5b19f0fb490ebe9542f73a326ac838ece8f1ae57eceea9b3ca8a241fe89d68797971fab4ecb0de3584f6d50a16cdc81f96111b7a7ff1a569ca23cd3673ff998d2953c875aa3dd7bc354a7cdc31f112ffabf68952a420830567a4b7494b89ad2b4fd9ec8c857a219a554fdf4b85f6022288f0a58f5fe405850f246f9d8f83ff299cba7204939dba22efd6012d5defb915714cdaafe4120c8beeddc4a3dda3077d746f07983c1fc27310e28aeacf716183632da97c7fcc53d7ae9688bc7f3bcc192bf80f6effc3be01f7e1e1ff82121cb7710cd1f1b65f6ed54e4ceecaf43d70361aad4671e679fa8e273e45ddd4a4a7b736d626f72739233dda7f972f4fb3b4778c1e4dafec63550a8f63891026eeaeadc87352a13c84ca15231953fbf4649f16c10efd493bafda32b724fe526010a5fb72aa9862e9081cff92fa40fc2bdaccd48ce5488b6ba891e0b582ab19b814b7e367596de1717b8829bbdf566aea04e1ebfc3c571594dc73891de9765f761c0137d38a5cac0ddd6703726cb7ae7a78369f379bef2162d4cc722648666675db7c9032e4bd99ee2baf9a556c4731e2681ef060a1c9ca86e5280f453e2a30df0d5ed4cd54971a0839e42f76ead0fe45a2e757f2805a6fab8e9064af968209a46aee703a22c8b8077c1b6c7beab77991b9e2952cc6691486c485ee921059c911447cffdbdbb0816674c03ad538438271eb33e1cb5213b717483a228a9bbe289922b40919b44f36713c736ee7e8e6bf04df2f41ea74a710262849f0ad348b8600055c83e982f5d2ea5b232ffdf46b2a5c2209d46190c2aba5c1581dbb4ed717006a2be0dcd9682cad51569ca71ba7d6454e2013fc5855b6ef3010ca21f512c13322f0caddab9343043e903e1441085efddf957ba9ce3f495cec12ebf94776cd7b4298cdb052156f13073d9695f37fd9660f69a6e4fb5a68d6c6e7e4e5d2487823f467cae6e9f36dea09eb8fb3fc8776b98d38aeadb9e01d9529d3397f553655f548c46f48980fba54b113a0881f33118da4938a2aad265b56c19f0b437d1814684acf54e107754e27925073cb56b4366d19c1f16ee373eda028e8d01d5f2312f4d36e960e74cf93836a20e8b813573d64663cdf0129224ff92f3e6b50c8e768d12196a14ebb655e6f95fd973d9cff80c4703c763030f4dc4be5ea4a6e42d23fcf04da3fab8888e9e5c8ff329d527e27cab5e1b8fe2989548f025be5ff302ccdc4f3fbe9874ff9913aba4533e475f00625d339b1d8daf4cff2df7147631394696fa2da08254130285279eba5e53a43a2a7471724b2f9dd8117a4e8f319410d37ee1ef2d0ff52dccbe35ff0a2c43b3d337c05df64073eff884a5e930979295cfe92bbc50b390a66eefafc2c45a53603b0ce2be7fc0b37b9728f202339e9ef85a8ec0934c66ee8461486055cf4778aaffa7d873b2ff5abd1fb344fa195b1f5879443cced8e4506fd7b2684585ebbed5e69b5dc2a611769e47a2114cfcf7e3a1cfba752d77701c9e22acc159cba5607417922b4b6c267b8d0eb23f58390a8966611c4416f1751e1b15a510c20bf82b1c5432a02e6a591bf3f9f3220eb8acd5b7b184a7ebefb6c4e60fa91037142cee6f030adf435e55d2f6020b984abf26870fcf10c81562234ceacde2a150e6e3ccf3df18b78773c98e7edd6eaaa97a6f3bd51440cd328f3fe1b258bd36e418f0ec01752850e32ef285508ccfcf4291198d7fc70556dcfbc0f9b8104c6da9da84b828859dd27366ed97854c3da0de7268c1c047ff366f9974f21a7cc7acdb7b92d68c8ecb856af19c2c8501ddb6eea41c6c003d2efe5522219e919bb041fdf5592396c41217c7a9b48262392a4fabed7b3f24253b17a4c73f92623382b34b458eeb24ba99f3bdd0224272538ec6b20df376e1caac4f10898c9dc06f77d73684fe75ccdd32a8532defac5ce518339dd3d1fa6c3ba179999f4196052c30a5308a39d2599df7e875dc0258f9e592acf6ec10ab64fc251439583a1c28dcfb5b71ce88f68f0c06248f7875228abf8c314607cb10e0e0df665b723c421d1d39feadd34f971e51cfaee8c271d8770b48e2e5441c03b8edc4681f7b93f22e7438741280d8789dd22438f1ea897036a74e3372100bbe0ebf7fbb8305cb00aa1a63dc6b2cd61d0beacc70b8bab912da3be7162c0d0fa3ba0b60aaa26af6b9e94347ccb10af7d03f37fd8cf4959601972631a9729587fee37fefce687b4a5af151e8fd140640eb7a3117f987658abb299b2584f6e9d47745b38b3d4024bea6b3cab3d4960986ae6699a320021fe449ad83a461f091a446ef0119f171478e4f841f5d6dc97992a7d0f789641fab05dfedb73d7da2287deba21413156fe5e8a07ba6a6fc68ac4c94c3071760806fd3c71aeb0c53947c47ef579b501d15f49680483fc04e3d4e22e7652390fdc0b49e204c104f38a74ab9075892f9ab9e9f9767d8101b700702227b06b3bd07c6655e89f43a1c83234917d837ae9a49ab2537744e2948af1a65ecff9d1766f73f4c9449de08c51d46da58bb315cb80efff89c6b6b8762db2f2cf90128d26c53c9a4f04746b9028a6dda9867a7449eda88c59f4e21e383db73f74da074715263355602dfa72f02b77f8cd80133696787830c579527f147f10089c051060138e62eec4143a891719278b27596db9587f84233be85d56dc5a25759b53b0af128d1ab35d9f088ae6ef95b34b8b6dd0c0f2e3b5f4d7803707a88b32113a2a63c07c0f582386af3fa6d68fa6c133e6f7f4637fa1d0da42682c242d882f214cc8a43b34b05a0f37560636e221f389f13169d3a8d025aa34c7cc57fc2e12d0295408b7f8790f8814d7acd126c27bd87773bd7e65ff0717766ff958e41ac3f96e44dc319c21057b223ca6a9ea6e576cde06c7dc2796b8ea16717f762e09671770cf0fb90c161c10366b47a581f4b30b1eeed4a7bdf6da72ebb7f2d250b36432649db8d90b51235c5e5601705fb8fb54c5fe43cd5690056bcfa0b0b6dd44599d3fbffea10323c1f21d11f07740e003d8a6c76abeb5125963a96e05e32ad7082b4ced9228c9dd83545ac17074837ef48a8bafacf915ce580cb4271ab72ff9c2022e7e4f59693b60a77033905617297985956217957f5a428ec25b7df0372810a89c8029f5847ee56bb27daf4b2e48cd9d125d68bc4fad9552502c2ce2ce7b4b70986ac7abcd582e2385a89fa67a830cccf16a865fb5570416437f46725bea8bc808e9eac685633c79c5273432a0a999b8732346ae91e58a09ff1f418a7dcd9ccebe1816a3e77078bcab1a6409ebda99dca1a06ac8a3068c9a9238234716a41c647759be500d12de79b58c802406e131e8ac4e41528b5fc8683ff2e1f316bd87f4253c6db6c11fd4a34b5bf1842383d00db30a3feda18dea78ab933ea36910701565cc8ca7590517cdcf3a97abc0d35d83a096da3ab422d1667393fe28b4f758fbd1a0d5e04e75fc537fa45291b0fc79d539c5f6c7892fb5b32684b2a05fb42f8ae6a8f17983446f7d18d975a57b5e817e62f5ffad18bf6743c18ed7d5c0ca9c7e4d24eb826fac4c9ddd361bc2961906b940585dab60c230ae", 0x1000, 0x2, 0x0, 0x0)
pwritev(0xffffffffffffffff, 0x0, 0x0, 0x0)
socket(0x0, 0x0, 0x0)
connect$unix(0xffffffffffffffff, 0x0, 0x0)
r3 = accept$unix(r1, 0x0, 0x0)
recvfrom$unix(r3, &(0x7f00000012c0)=""/4053, 0xfd5, 0x40, 0x0, 0x0)


pipe2(&(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
mmap(&(0x7f0000ffb000/0x2000)=nil, 0x2000, 0x0, 0x10, r0, 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
r2 = open$dir(&(0x7f0000000040)='./file0/../file0\x00', 0x0, 0x0)
mkdirat(r2, &(0x7f0000000180)='./file0\x00', 0x0)
fchmodat(r2, &(0x7f0000000080)='./file0/../file0\x00', 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000000c0)={<r0=>0xffffffffffffffff})
sendto(r0, 0x0, 0x0, 0xa, 0x0, 0x0)


shmget$private(0x0, 0x3000, 0x0, &(0x7f0000ffa000/0x3000)=nil)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
_ksem_unlink(0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
pathconf(&(0x7f0000000100)='./file0\x00', 0xe)


__setitimer50(0x0, 0x0, &(0x7f0000000400))


r0 = openat(0xffffffffffffff9c, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0, 0x11}], 0x1, 0x0)
__clock_settime50(0x0, &(0x7f0000000000))


open$dir(&(0x7f0000000000)='./file0\x00', 0x400004000011830a, 0x0)
chflags(&(0x7f0000000680)='./file0\x00', 0x40008)
open(&(0x7f0000000080)='./file0\x00', 0x60e, 0x0)


syz_usb_connect$cdc_ncm(0x0, 0x7b, &(0x7f0000000000)=ANY=[@ANYBLOB="12010001020000082505a1a440000102030109026900020189b4040904000001020d00000b24"], 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580), 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000400)={<r1=>0xffffffffffffffff, <r2=>0xffffffffffffffff})
write(r1, 0x0, 0x0)
poll(&(0x7f0000000000)=[{r2, 0x1}, {r2}], 0x2, 0x0)


mknod(&(0x7f0000000180)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x60e, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pwrite(r0, &(0x7f0000000100)="89", 0xfffffff, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000100)={<r0=>0xffffffffffffffff})
sendmsg(r0, &(0x7f00000006c0)={&(0x7f0000000000), 0x80, 0x0}, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000140)='.\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f0000000000)='./file0\x00', 0x0)
mkdirat(r0, &(0x7f0000000040)='./file0/file0\x00', 0x0)
open$dir(&(0x7f0000000200)='./file0/file0\x00', 0x0, 0x0)


syz_usb_connect$cdc_ncm(0x0, 0x7b, &(0x7f0000000000)=ANY=[@ANYBLOB="12010001020000082505a1a44000010203010902"], &(0x7f00000005c0)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x2, &(0x7f0000000100)=@string={0x2}}]})


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='ext2fs\x00', &(0x7f00000000c0)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x0, 0x10, 0xffffffffffffffff, 0x0, 0x0)
compat_43_lstat43(&(0x7f00000000c0)='./file0\x00', &(0x7f00000001c0))


chdir(0x0)
mkdir(&(0x7f0000000140)='./file1\x00', 0x0)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
bind$unix(r0, &(0x7f0000000040)=@file={0x1, './file0\x00'}, 0xa)
rename(&(0x7f00000000c0)='./file1\x00', &(0x7f0000000080)='./file2\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000140)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r1, &(0x7f0000000400)={0x0, 0x0, 0x0, 0x0, &(0x7f00000003c0)=ANY=[@ANYBLOB="28000000ffff000001"], 0x28}, 0x0)


ioctl$WSMUXIO_INJECTEVENT(0xffffffffffffffff, 0x80185760, &(0x7f0000000000)={0x0, 0x0, {0x100000000000000}})
connect$unix(0xffffffffffffffff, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
r1 = socket$inet6(0x18, 0x3, 0x0)
close(r0)
r2 = socket(0x18, 0x2, 0x0)
close(r2)
dup(r1)
setsockopt(r2, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)


mknod$loop(&(0x7f0000000140)='./file0\x00', 0xf000, 0x0)


compat_50_setitimer(0x2, &(0x7f0000000040)={{}, {0x40}}, 0x0)
compat_50_setitimer(0x2, &(0x7f0000000180), 0x0)
compat_50_getitimer(0x2, &(0x7f0000000000))


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x10004, 0x0)
ioctl$FIOSETOWN(r0, 0x80047465, &(0x7f0000000140))


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x18, r0, 0x0, 0x0)


posix_spawn(0x0, 0x0, &(0x7f0000000100)={0x0, 0x0, 0x0}, 0x0, 0x0, 0x0)
__clone(0x0, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
readlinkat(0xffffffffffffffff, 0x0, 0x0, 0x0)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
unlink(&(0x7f0000000100)='./file0/file0\x00')


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x2000427e, 0x0)
ioctl$FIOASYNC(r0, 0x40044271, 0x0)


open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
truncate(&(0x7f0000000100)='./file0\x00', 0x0, 0x5)
acct(&(0x7f0000000080)='./file0\x00')


_ksem_init(0x0, &(0x7f0000000000)=<r0=>0x0)
_ksem_timedwait(r0, &(0x7f0000000140))
_ksem_destroy(r0)


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000000000)={{0x12, 0x1, 0x250, 0x2, 0x0, 0x0, 0x10, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}, {{0x9, 0x5, 0x81, 0x3, 0x0, 0x3}}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{}, {{0x9, 0x5, 0x3, 0x2, 0x8}}}}}}}]}}, &(0x7f0000000340)={0x0, 0x0, 0xc, &(0x7f0000000100)={0x5, 0xf, 0xc, 0x1, [@ext_cap={0x7}]}, 0x3, [{0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}]})


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000005c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
fcntl$setown(r0, 0xe, 0xffffffffffffffff)


msgctl$IPC_SET(0x0, 0x1, &(0x7f00000000c0)={{0x0, 0x0, 0x0, 0x0, 0x0, 0x100}})
r0 = socket(0x18, 0x3, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x13}, 0x1c)


mlock(&(0x7f0000800000/0x800000)=nil, 0x800000)
madvise(&(0x7f0000d88000/0x2000)=nil, 0x2000, 0x6)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
__fstat50(r0, &(0x7f0000000240)={<r1=>0x0})
mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, r1)
r2 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r2, 0x40086487, 0x0)


r0 = semget$private(0x0, 0x3, 0x0)
semop(r0, &(0x7f0000000100)=[{0x2, 0xd5db}], 0x1)
semop(r0, &(0x7f00000000c0)=[{0x0, 0xfffb}, {0x0, 0x7f}], 0x2)
semop(r0, &(0x7f0000001400)=[{0x0, 0x200}], 0x1)


setreuid(0x0, 0xee01)
socket$inet6(0x2, 0x3, 0x3)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x0)
fchownat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0, 0x0, 0x200)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
_lwp_getprivate()


r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
compat_30_getdents(r0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
__select50(0x40, &(0x7f0000000100), &(0x7f0000000140)={0x681e}, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa51a)
open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
r3 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
fork()
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r3, 0x0, 0x0)
preadv(r3, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(r1, 0x0, 0x4, r2)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)


r0 = socket(0x2, 0x2, 0x0)
connect$unix(r0, &(0x7f0000000000)=ANY=[@ANYBLOB="b7022ee020"], 0x10)
sendmsg$unix(r0, &(0x7f0000001480)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000080)=[@rights={0x10, 0x7}, @cred={0x20}], 0x30}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mknod(&(0x7f0000000040)='./bus\x00', 0x2000, 0x4301)
r1 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r1, 0xc0107705, &(0x7f0000000000))


r0 = open$dir(&(0x7f00000001c0)='./file0\x00', 0x21a, 0x0)
writev(r0, &(0x7f0000003a00)=[{&(0x7f0000000880)="e83cab2bbdae9c5eb27f3a0e3496ad872059c8e92cefec13d1ea36c6a6d86d0c675e59743b8b6dc641b83f5194e605e03af8eaea74ce3a1016e9201f2df7e223124289d71420b3b03005e972b8b532e7d810ffefb35943747b42f6a56be3aacf1df72e5f7ff9ad27dd8e2a934901d83eb37a8256948bd4b616437237641393024915af25f96183a135ac64dfb6931d17c30d7dabcc30873e32a50474b5b414c592f6713c349da641a9d3aa360f89a2440344df87e454ac39afa3071f3d1f621ace8bbc870592930113d7f57e9548842df92bedcd13885f151f75ace58e6d24b9ccb4032da4040173f32152cae00f1f29e44e587f7e64b6af3d7ef115d95bbf0e7d7aa53546930d3438143a46e67026ce07c6702dcc91676c8499c57c29496d9be3c0be60e0f5ac2d2001d686fb4e5ad52c8dee903568709ace23aac25ef5275d9e6494c04b7d2b6a863885fa3f56922f3ba99a94b68a911d2041aac8f4cf6d5aa1670df7586776d156864af0964ac24f2fad5f524cf9439b1ac76f51ccc59e5d1e543e008ad2b0b5b8e419aefaa359f007777d49cca2c36025f3afe24ce35b0ba353518f32bc77b1ed9e5ac7b4b8a9eeec412824a6fbf6963aa1d4ba94dd5a50f9c196d090cddeda28644f36a04b8b5fb495fc7dad334754781ce55af1379c82317b9a8f8847c17f493acc9c3d0b8602239c9b9addfa0a2662ed1827a9914a1307c03dfffc76392f202b300a0955ed16fd6ea06c902ff113f7b43990f4ec05b3e7d0412748e5cf56fedf5682265ac2dae763fe5a1dbc628744da88a08209684edc23239bd9346293f7da8c6115b4ebe421f8c14bbe1b84a5f44dc158d353ab7a6be7d4574353e9449d5b4fde5ea2568db369513ae7457b48bb8c4df0c804562aad89229bdebac0845459180338d1a5d956083eb2cfc5e2c69fd90447a5dcc66c33914f926a7f52f4650f6668977fcb341ba34205e87631f3f4e444c5ab5e87c7e61aa0a81e5422d42465341ebb6c1c0eca7c172a5c6484f41e9b9bb44a344ed47464bbf5de30d57c00dcfc9cee0f4f608faf2666bf0f85f4254eae815052d018f894d8d037833bb4fa0d2328b04cd64d57e6472154a8afe9d2284b6d296c6fbca1b9f6a6fe233004eaed32cfd8791afeded59d54afa8faefccabc4e4ef4a138e8eea90a35f1e3056a997a568b0a2334c9d34a2a2ff90e3a011e9f066bcd4c2695ec72d08decc50bcb92f94a8c8fa22b241dc14a0e9abbf6dde62d77828cb3e3a4e71ca312f94729c4a9112483f23da83b79b1958034e548ea8fe1c76f85f2f443deac98a30fb2f69ff24d4439e370c0215ce25061cfc9d733510d75a3704d96c9c289a9b6c5f48877a3ed4db106eaeb775456ceca161ef3d9b93b601e35aca3a62097ca2fd0c1a3dd838499557a325554cf1ad8798a291fb6fafbf4880491bf2b3e13e592237c173927f156339fa40bc5a21b87fcb2e1a7655cec7b7060b9cd00feeb7d8ee3283d84d225febb8b4cb24529d37a4d94677a921d047240774fc4c797964259c71274429984702f0764d362b2a846e9d6a459243c1be4efafab53a0cda349f1839bf04cb41f3fabbaaeabfd8ff33bd38d0e04770f3bf8895cdad098282045c86b802bd4d34e4fed2eba6259e4dcce8ee921cb676d2329452a772b7a6da37f38c66e24c693560312be443503cff3687a5b4a024b7c17ca89649b278e9ea25695905403bce84ee36fd8d928dd6ed6282e0e8a70313c72f9cdf67985c4097e14aa762e64396b397eed288332043a1bc4c67e0d3b916dc7fee721e0f59e79c831707ea96f60ad61756c74b7e3695f5e626cf532ab557d745c1c2c1c126d6facad7d4ed023edb8aec83242219664c2719133c7326a274e9d6a92006893f880d9316d4afc278861d6c353456d48cfba0e0243522654c92d8199854c08f46ca04fabcf2d81e3f9f67ea6e9ad1480d2154fb8848c822912d16af1c662b678c79cea532e069d4ca35a3c29702b0cc8bdf20d7b40581a7ce691219d449e67a350f2ff02b061fe9c54d1c3be23785f30feb497c36837006f335599b1d06ed4da21769081454f6715a544b6a44a2b9fbf454fe8b0e1418731e9da6fd17013bc8e1ffa11f92175284f195260f6ec0baeb50f85af0e499fbf08e42b32a8f4e01cc710a0c2bec489749d3ddea1f1e267f2125270da39f13a55ecb06e1304e5ba1b81a5f3091538518dba5f32bb2562f7078376c43e76c39c32b9dd2928bd0deee5c7c6fc9c9162057b0ffdabe7f2e4c455cd63136952bb865aa32284565ec7f4f49876d39f9035bf795f8cca83b5392947ce9688384c5ea0c4f740d671ff07937a157fbd82ceabbcba070b48c0268d1324fba2ab1727f785812f69b7cc7ca0cfefc4505434673252cc5d066b114094a961da65f949d8d964cb22d870b6b36dcecc000e91fe895704275d235285357c7f1ad26a24a2740e5343de24d14d400124fc37e76585a11c716d6c05282a4d002a860662ecb890e253309fbc4c7bb61163271912b2f4d7c4b7b4759c4a2e45574915109704094d1a1e8bc00955a92b667d9f70a0f4d76fca554e02d50f5172edcee2e8e08c2b3713b2d1bef6c22c892bdc8ccb8ce4d8e8ece1f07c15169805e58110fa01d2b1531f6f410c6db5aacf24d5c9e4372b44e9c164f5cb409409c2d38fd86de7d5460c95786aceccf1169fd1454bb53e4325b58b3ed4b2bfffaeb4312939ded7f15ec695de26e15598a36226f5b72d8c7bdc7472e3bd1991308b2fc20e08af8e6e5687c958b7858a9804969c9dadce7aa59508f66b58b5294717a060725fae154e5a8305d92d4a2e8e545ba822f7d27f4487d08aabaff738510acd6b0dedba29f4d0c5f926ee440d26b57eeb67ddd6f951ef7902d0ef8571700aaf2e70a425ddaa866d0ed516f7cb35fbfeb43c23469737855e8c71243104cff9f346f62cd4b089a5c8a1518737ed34e0d1969a5cf2d2f2755cac4c01fb1167c91884ca818c412bf32288abfae49747cf9ce5f25821aad48185dc93fc7edb1fb78d3716e82c65016546242751a6396398ef8cef018e50f0ea7e6e1322c9f188268996e8c8b2797cb9f941cde8b1dbd8bcac9326f0f0db6482b0cc00ee148d3d8b98368cc31a30e5ce980add96ef9de79f5324b6968f831dfe8c4732a844500e5d4dcfd46c3456b2f1f3b9cb62998bab461f43da97df6ba81cf63fcfe03882feb4e16ecd2e9a8d867c7f561bcb09aef4ddab8d4ad24e0e6e5c1328c583f4bf1e775a184d3233c0433ab8ddcdf3c7f63833687dfa70a2b7cc33dd6126223a5a506b787dcf1e8d6b49ab93b1d05f490242000b18332e144a463ac23a1ecd9c600781e0fc1c90cfef9b9357afdcebbe379d1dea7bbbd3a43f35f3b7da3d11872ec59d16e60b12ef35a4119b94685405d4743b1dfad68799213c712fab622a26b59ad686be6fd7f20a7c280e313fd2298c2e01425613a455a75255a6db323a0dc873ee922db41102c39cc7a2100d0d4bda280cb06e0070fa4585cf900cf0a8d77397aa62d30d148e54c92a93f4b1f1754566ae3e930b999a777c687d328e7e12993a5f77487da96fc0ba7ea2c456a135c13650ebd725eaab1669faef6303255c4a3a24afb985ea50ac54e6256be05b0e8cc3a503e7b61bbcd9654c312426732d1f1dd893eacd9895e1025f39a69e7040c8beac139b75a19dd1f31a363f51c787aa1d0f6b18f5ed58092b0c33e91862cab325766d58c9e39a21cf0496389f625d3e423c4c5b1fd1a15ec593029cf134eccd2ac451530951ba16cb02ae1cc89968c5de67821427bb3822c6f51271229a176b966c7d111b62aaf48fc8c44a332ae3815546960ce7aff5819a74de42c616d1558800d49485e5bbd75512a4361c8be86b81a533c7562ce60baadbe3c75ea6d23b1c873751e48b758f4f3788a7277bf2fd6be035176bce03073175aa3b1b4049eb45d6f2ae496e84d4436b3ab3a27b2e635cce87789a302b80e501f5ccf9f2b343d98d496650156d7eaae12d581434ab1cea0164e843fa627fdb58f5fc86c00ec8553d2c6e729cf44e0e089f0986c6f33f8d901ad0ca002f144e6260420a5b668c87c89ca5eb43fbb6553e34f97d0d11d92a7f9eba9afdb3dd5c87e2542c57589561baaa46dbc2997d82930428bf2b9f3d289342106196ccf3c25eb47e6c11abbac5d98fc5423c057e6d992f313ebefa1eed876889b1db523687d778a135f162809f3fb251e08bef3b85524f1b545d3596e2aee6efb7651b6ff8f0b70c9a12506ca6afa01a4eef4e127cda7755f1855c7d119345a729d695f1a74d78c8894108210ebe4658cbb165556cd850ec752918bdd9e804a6bf4c32119bf58b47b0fe4556600d3c87e99485b140850ef665d1665aee4c60467aee6a36993ee5ef59897a11bb18947071ee71eed8589b846a99593917dc759a17e25dbebca0dce5aacc983990696730cb6526991f3dd573676695a0a1278212903be0409c8aaa25447b99e57e3e5754c657d729298dbc18c75966667df59577df71b3abdccc4992990ff44dd94c3eb7c20f68b00df89e1635e3cbc8ba30e285b29a45208a91824793f9721f955ae0227d5bfd114b1e000803932fa4bceb9ad22391daa4de87ef3050311a506b3401f031618bd672a70d7fa602e82ed438b3c7d5870215179f4ba9f008e0f2d98ea45109c4db15ba7b97abf37cc25d6794bb2df8b1430fda4b1b26f3467ea0ceae7c8a8c7b239734303b97d3a969d70ba3d482b4cfa0309d344c6d9d481c72d2259e44d1f06db96e21e61d5b5c76e5f1e020003e84f5a3648293a6c4b484af000a7acf089cb6a119f7548f60d92fe50817d20170f6e60b9b150adf762aaa7f2dea8dd2380daa9b31c9d2be59ef8873f6e316ab5f38c7b2822f8a32e8c611cb939a641804d3bfb1a46bc5c0599c4f71dde41cf8c7e113b7227e3a9861ac60438156d6cdc9ae409a6ecf9aff54b5d2ac9f34057b20313949a93d623c6f5c28815f67227694cd1f5d37d2a0ae6bef7e42f3ad547386fe554e353715698151bdbe0853fdf603db057fae46d08a68fa17de7a3218662c5c0b52ed331ccdad55fd618d29e6aba9d7fcc85229566f7438ed514fb58e23ffa7eb1c87eb4cd5ba24060086abce1371f2e22d0b148a3de2f9b52e1627ccc82b67e21506e6fa7f5e538d75944848fbb1a3ea527bb56f08cfed725e8714b13a1b3abe4296953f158db62f9ff234afcba606b836809a0b60a0dc8598cb9e3108c0923443ca17b93c47d401339c9b638889c127a00b1db3f4dc31ff299819247c0a62926c527151609ce1c5dfdb4524e85dee7a32f4f7f1a20f074d117b037976f9b9feed2508e4ef7afe9b40dae688dbd3075a52d755462985d5c29686f4ef01e84868fc19716549ae502d478f22551672efae5336d34d5b5d5d1a67eeeae9a090490b8001208e6483eb19bd295a6495b2197d5660fd3b1050cd0aba5d180c1a4d847ccb403159a6a09e89489541a5cf957b8e6dc3856af2d89252ea394a73939a840809bc56ce7a36d3ec8ebcca78c69cf67dac809e92edec20b0f51a5f6631bcf4a3b49d96dc524f3e8e410d03af3da53ec1e282d23d12cee79d2b43c6c7549b4637457acd07a8a8bd5f67be69e303092e5ba265b0350498cec9f323cc9b920d7f648c229ec488a950d10f3402e82d08b113ac53b700c239ccd6cdf7bd4695df43ea1557ffc767dca9867afc08e5a174a44520b707c73edef3729233e191d8149ac9dc8251983eff97916630eb1ce3e5a80a1", 0x1000}, {&(0x7f0000001880)="1ced5edd1fb3bac2b2d079696c0d1716bbf07bd90abceb5b8a34ed0e36a4fac2fe32a5697a8e12ed594c6eb632683d329aa7c895e6a117f9ea0d76ce1b4adf0dfc5962ee18cf5e9b52ccdeac9b394d8eefc4486405b4e3740777e9ff3b6408ce137745ea266c0d5209522a8596adfad6c3043d9923e2f010b1019a9ccf5ddf406508b3c9fcdf69b279ee5905b49cdcfa7e746d4f813d711ea39478d634259f50afa925deb758f7b29503f836816446e4b6cb85d45e7d1e03181a784662bbf6813f54cf367b703ba4192dc8e78695cc9029788fc9c877a7abf53507844523242092d11a633b8e81f4860340f02d0370f2d977a2b993c3ac4ea36466460eec1298f5557175678145c9eb94fdeb0abcdad405094890b81c98bc8c3136ac644df1baf383cb26d3dbe0873348b9ad1c5a7629f7a0aa8ac213e3a7bda0cbf704f2fd56ef08176f940590b36134a8e25b37670bf5b00381c7c4e0086e2344c496efe2f4627d7d0c07a2ff4e6def165128e1bdbc1257ad2beaffeca581abdccfd4e83ef562b1b58a496750d3737aaa6ff1485351f9c5abd1a63fe1edf28b2708999bf316e4aa574bd5f872f99187f41de7edca83ff841bf04d897d3e11c4e1bb4190b5ebdb1ae6551e9ac6f771de3f6c2e46ccc6bb322f22239a9968c8ea5ab660584a09fc9fc1432d3dce547b6fbf93593b9f0bbec301ac49f5b5ddf7d74cccddcd36acbb939779b0dc86d63aebeffc541fe053c67826fd86a2478139f7b899b8769b56a96f663ccf597117b902af0145ccfcb1484d9a156a26e75004a635a9e4457db5421421f43660dc09c1408fc5701b550b07be4672ef35db0ea77d2107bf7beb21cda7807ceb34d1f71f2913c09ea0abce4fedb26b1a3e47f9106b22bcbaa26fd0a96ca41be21c6729d81b53a464ba8a56ee4cbf2b5864b130e16fd30d5d0917b2ed4c928a7e41782f7ce2506f6c9f4fd68aeb21414b105c7bc7a4fe39070e5ad8d509d74576e4367715d4c81f546572a5fc8d1b81f1eef7a537d6e6f2362b26357935218828d7367562465fc705e295bb27a034391a61626ea42b19e572109396bddfa9af617e1e4be0f00dce3023b1ed774067f6f7cee09ef0b33152b84ba11fa795aea26dc14761af01de3e2f8506c5ce38382fb37604674dc186f42569ec8f8262b1d6a4ec1a6d54449762ac5437572ce4ceb5b8e3af7f6e27879eb183b34b617934adf335fec6a2a5682cc39a50f02b1537399883f05e2956d6770d8f5a9e511a138546fbc67a42c9193e1631cb667054f6662fc6c424caa313579762117a5e06bc5331e81712648a512f44c4140c62f0cd594e5e4ae752c0a6e65e4f93b25308f729e44b5fca6adef6261ac5ca6c2f8235c1456289b823f2a49c08935ad557b21764e842c26a1fe245d83a897c1ffb2507c7c70ac529018a4ff863f8f879667fec09318c5bf4f6984801d33c94931bd9d0f739debd2f0fc1ab4e5fe60f8c075e958690bec6517c2f205f9984115c52d6b8d13c07290f4b0ab83f967a8f0364907b656d82ca80d70d54e02fc474ab52840ffacb59c91244a111742fe301f2eac45e7f913ebaea6a5d24dcbd92d77ac38776ba503ab737be5a3f21a25ef0f960925106920a9baa60d8581b440941a3ebec2f7dac2e4bca217772fe8494e70d7b91492dc60bcf856559688472ec6e1839a478ab890cf6bf8f869110aa2fe6e2b4e36db88d20556a1165804486091ee29f057312672b456f368d9ab1c98a9574d9d39e17ddac8c62d47697095792e9492217dd70c89a72a4daf3efa3e2513b6a6c8eaf85094039410b52f9bb98b3cf836588c1371a8e072399f5d2500f2d15699cf21e47db6f55d304d7e100b4d28c0b1a23df86748863fd286321578abfba8a92735fe1a9737c8c56aad1bd212c36b6a398c995c7cff16eabf0f2c7f1c35b328bfc347078e9a4a902526f4298e1d0793351bb554e72ddaa9d9cafdd275eb17f10722ae47c979b66389d986fa6a923bf3198c1057d9771e96be7cd7d853099c3069cc2d369c79b9f0fb9da1f53bceb14f14a460111768b3dacecf71a072637cf916d526ce087d39964f8b254aafa2011f7eb1c053c7e7e7fc1e7dd8dd3e1a86ffdb28bba0d2ea687394fccb1e294f105c9d03acedea1d15606e7cd2e6b680d4750de6b0f58e0acd72e2b223468f75e9e5b39e7fee354288b01b239580b2b65228b5db1a5827a5872018f1cec15d99a44ecd2b363a9652878c5a873e9930ea7e4bd3187c837f383e06e3b5ce007dfb6af9a3a27eb6f9c7e17a5a335b619a69c368fa2fb2222c4e707f3f1c5048556760b5bf46d4e3619b36baea1cf28a76ed8d7c8d74c1314c359dabefc486139bb0455c94466eba5a6e770f1dc2fe935ac884fadfe2f88471749f6c0c8fc41fadcec4715968e2d33d9e040212e0a0254afd5d950c9d840cb60f59bd1c8b5addfa1fa570827a3a797bf91f3a41955b29bf480a349f2af10d828df58190221971485935c2d3037e859f8c7ffab4d5045caa16c9712fe01d9ad2636b06f6b401ada8146ecc98af41c4f9f3c5ebf3ccd644fd91281c8f1751e39be39f182f6c062290e0428fe6f19d1c7e98eff9c40e576aa4e73188bea911036c7fcee5cb2cb7c7c58cafb9ba250623bdbcd77cf121ead1c724b445f1f446824b0a8d111603be8859d0c24f8c6bd8929455ddcd94301f219eff3682aa3cfdca4091d8a4cae55a1b5b288fbb74e4cd47712ecbdfc1587081cc34e77b5aaeed96832fc9242aefd5b20ec4f211943c5ff79b6c239e888b9c09d2c67e87c860f78d1dddc32b063c6ebd3cde22794a6174a1d76fd924baef0135f5eb26d82eca43a43fef82f8c13c0ffd65a2519654f6f4682c194c72596875231d88b2f1f009aff84f80880d9cba10d30de93b7dd140f7764ebd323a889736752aa13b43f356a7cefe904351013e6c88461d1fe9ece2df39db97f20cf0fcaa10a990bb6b4b87d47774f22decb4d237968b017435259c832e28dcde3bc8a1d3489c4a8a967aafb310f1309f999047a150cbbcd842b5d039890b0b25a0a696c674da82869d69d4837e29b46ecf8c658da8c5edcc51c8e4d35041ccec6fe364264499970ba82801783c7fbf25bf958f68ee7d373e8b2dc965d76e749a23677651702bc23dec41465e8c360bdb52bae8c3f161338c3dce9d4459285498bd37f8c6d59aba5aaffe0b92c0e7651bc8e931029cf63b47bb1e8d849e12a6fe90bbbfdf6ab5eaad31895d7b75dcbb7558f22d8822f1d20a0c6ae2ecdd9643929aee3cef5bc06b7e481b554dbd75a87a4ec5d3752094d011bdce2dc023b81dda84f66c582bc05051af14ddf051c7d119923f0a7df269df538e6eca1d6403f4ece2c1c92ed1bd447294253337659193e3c4efda5f8b5d0b171335bc64ad04c71c8356668caee1e3d9676a17c3c57c9a13451cacc957551d027d9f06f1021719be32dde3d0329007667afad9e83fd010b2a78cab2bebc13950c5cd3e2535e9572c26d170612013317d307f744ba0d1eb85a037260ebb40ee11ba5829e8e2d203d96f92ca0a52f0a7c6266acc6794eb010953929214220e2a65cf94bb5313efa344dd1159c2ded1ddb69269fc2407cdebdb29809171de3fb7fc316de94506d2005a0dd49e275ae96069c27d527ffaab597ea8c74cfa1c39e2d4c8a99ff7152d5edd253dc7096b1bbba8308801c1f0134b069bc0f659cdce11f6a69619361c7b1443730ab43d8b8b1cd153980eb25cf48bf279acd29c2ccaf8c253e52cb98622d3ce6c6c265f7a2c885f0a79ed8b634d58182db1c2eed2bc01ddb65a39419f5d7e971431b6cd72f810885e05738e65f444f67305057a5b44044c8c178e08111cbe4c7b596fe3c8365bdfcb75704f266678e0cc6e82c13cecfb743300a8fd46bbf79c2c46fd9ad94dd78b8b9edba812a740c355c52072d040bcf98f0b3e2cb951d770ecc7b8d6adb909d28dc11cf7d2c30562da6c7a4830061058e46bcfc3828c680f32afe201ada06da44d87bc657b02ad6314e6988f889e65ae386fc1bb985f3406b6a19e1294c68482c2d44c50de8a3917b6663063ff965376b6ce0cafe4a701dbc25ba0511fb86b437004118f12c7e06d359092d72f6d85293c3674dc48909716e0b68c2e5c884930dda922da2f3e9d22943fd156e44a4e461d284a01338117ed0e8a19eeaf330da0e2944a9b09607700d62ebfb937525baee71812d7826ad137b84c33e737afb6a861cf5ee399a4f0833c55a8eb30d34d6f0587ea2d43ed830c4722fb31b310b652de4ac47e79d0df3ca8d91054fff650452a828fedbf2c55739139955312e7a737f468f37dfcca048efd87f3a497e719df9fbbf652e129ff2c444addaa5b618340c858a770a57e02183658ae9d528bc227fc6770af377edc1cf5eef0fb8030549c18f4c756a6a95897a948b5b010bf2c54278e27503081cf163910290f45dd4cf5e0b2e2146dc0822b6c53dd8e90c7b4acb1694918511311fe41d3b7edb09fc757969eb67f87a21f3fee3da098584f6d01ea6da038078e0bb2505b493c62aff28ece1efa4a148e97a3be04e3d7bcc129b446fbca57062da3ea1b8a5c3b345423af46e5ca6fdc1e6d5ad49ad850a220620a8d8e88d5066d635cef52a52317c92257e8e3035d6aabf5a0be0a11cdedb4fe7cad179c53dbe877e37e6f96834d107eae4a9e10f5d20422d888a6b3f0971724334f8261049aab068fd86d7bbbeae3b6e5451ffb2cc96a1abb9ad71c0a8746e5b6417155fe2c9d0a3e5f23f387eb3a174db6d80f8346784231e21651d13c3d29e3a7472170a87e1a356f621f53792a81ba2544860a2345f4a982b358591556262151a0767490d5cfc4cb4e657b2efe6bf2246dceb42b0e7636e4ea63408c263dffb7584a4ececba8c5e17c50356eecd23fd626be8ec240a5b21890cf48d2c616a94cdabedef1265a5a0f5e65f3902814a9b8688aed1ef68ae98aae518b98c97d6fe5bbc2030063254e0160b934e8eb613fb3a068b813939ff79cc0b9997d560f3f8022f446de8299042ad7f8c100fb65e100e7a2a5c442e94e022e18cdd598bf55b65d65a350b33beb14bc8deb905dcf95f2ae660ab98b3e42e58158bc9f3852bc6d3f4d4f0a4c6661ee9608cf2b4da9c40115efcfdefbd9b3076f88e37e3b7f31d046b5bebfabfbe37b877964b8e3652e4c9ac51e3abcfd07cd5316b74a830a21762b976734227238f4605db3ca98a6fe8a09211be2a2ee062d79ebc806e1165859951cf633071b7c15d5d5e370e0b246f47ebee0e82763520e972c70f49b40f9be0d86ff109166ded241a005162fcc076c3eff2caf7351b6adecdcf6cb44d4c602f170dbf030e4cccd429c121b601113fe9c3b59a52fa3137bb1f9118455fd4bad63e870361f55e052d60d1c396fd4dd5e8a6d18c5662647091ebfb5822aafbcecdb4ea8c3683cf4d86e694d11f947f64b70cd31b1be0e1075fd0d201bf2684d248b837c782ce82da59aa46085b1804baf608cb7fb0fed246bf063e67419ba46eb9d79bba856df4812adf2c5f3fcd9c9cfe13c97c2d284749a3f78108bed16b4b0bdaf5fc9414cd575491f019af1684f41714850c15eee1d00006ae50ab05ee86fd0074db8fcd14d2a10f1c6f21e3edd205928637e7548fd8267b71210aac4599e15ff576f026cd2c41dfa959b8645663e3417aa45053af9922aaab56b3c412da7e2b380026f382d050357fbe52736f933c1a5abda7667c9a720831848596fd87c48eb0a255de5848fc00", 0xffa}, {&(0x7f0000000400)="e5685d3626be2bab0ebd17e71221edfe2501921b81d596e3f1fc035956f024feda6d4681567b86daf4e51c7a329ea02069f692db21d83dda4e70c1f713f31f61b7d321bfd9d339710082474ee5f2421c684f1006e93a75b3a840407fc64c164d1e9facb3e8e433e0e91a4e1762b7bc85eb352902735849d4afecd9921181b086", 0x80}, {&(0x7f0000000000)="96316b868daf7842256ddf5ae39fdadc58e8ec0429d37b0cca308cf9632a484500e5f9fd64982afe8bcb304700271401f1", 0x31}, {&(0x7f0000000580)="81f6a68b4750bdc700585d910f453342a146c1a3a25dad53528a5a7cb81c5ab4bdcb73e2f59ffee654e023adb26d90ea622e0656596e0cb6eed0b8f809e50c7e8a2f5e28daf8d0403f90aa054572b39d32198bfe42bbc4cdbe34f4033e0e95af34c17caa3b02127250fbe577560a73a08d190d66e126255dd84b27cd75", 0x7d}, {&(0x7f0000002880)="89e841b1f0a3f4c057227b367c9fd7738fd47fa87c16a50b64c08b8df8fe60d69a665f8774afb7b6d81ab625f8adbac6cc150cb0d26ac2a539f8196d7967d3706a64b967d4fef789dc1a6a9ae10f477465b590f33863e4836fad698b794e6832085c31529bd0b4e7b6fba395d360b36cb427a5c2ccd06ff43affcfb6ffd781b482d18dac286af79086cec3b0000407ddaba93def2f3ed1c685d1c4a89cea3b2fd6b61318f7334dc702680e4f94cca266c9abac44e2fb4d4e925acfa9aab3e16b792f43d9f7698b7cd8d2be8bfe9c6b4365debe374e566af24becd643e7a468298d58e6b74021641d86238a58e77f893900887015cde5ad6926150dc0f5814274c6819bddb717a6e4af4bbbbb0a90f221e1f065dc24f9d47845ed2003fa0b4f7ac96d2dc8a53a8c25f0cd1dc2b3b62a0be4587e479a969db6dbd9add1a6ce439eb8c8196507c28ec07fa63e9ac4f4edad654f80339a739e18f3d2ac45bbba4c21c3d57688072b82453a42cf28644c87d9338aa380378e12d050060caa7e0dd56614f568248818e6bd12daab839fe30a629d9c20a50fdf277d7bbbf7c524594edc8d9882d001b1ee58aa3e025ada0870c9df4ef83985d78d3166dd45673881d1cb3944b776746ff4b178da9fcf8fa4b04504a42ecfa428b08ba3ecd3afc95f1aed3a0c6a05870c1d6a4ba97de5891ae70a6f4efc8815ed74b6a7e1a0437542334705736a66b4cbcc024949c2c02d856ba5a2cdd7f54cc4cd95b56844c5a1160f7d50401840d5f53a1408eff26675aea88293546c6f8369084b5ae5d05bd149665ecd6102ba49ad09edb1c15103db1a6ed1b8c62efa8c2d1f660ca773b433282f7796a70170d68e33f63a6e0829e4932ddcf6427ef39eac36e706c6d0c727c42322409212172fef44ebeb072b055839d4a27c21a63a8f4bbd1fb52b4824eb725374c65604f4a306680397e655d8d46bcdb3911d8cbb33e5345d0513c97ca9433a792510a1940a39bf442c0a7218c7ebed86f2df078447e4642fb048919fa610583317925156399cb83759669dd4b4d13dc69678fb994459d3a33c10511ad0c44abdc08367f3bc63b4771fb630341017a8fdf2b1b74d6202195f5c19f36d730ff1e5d5e6a7cf5ed740a56873f443610c5a745c0f7767963fe78500de33da0bded836db7a0cad90e8878f6fe53db8db5b8366e73e76cf9ff7162f02be8246851e8e355253a4e6113212cfb2c7637d45264a1168fc83256aaa0779e35b720334340df74a230b232d2bd226b42489847e51d0087abed9acaecda520c598d8d1d49bd88d6a9687adfd1d0b3dc5f4c60c19d820b9c4478a9e67421e5bd5442c1192218c908908a593e6b62b6ea0d43c2dc9301c6e03c891d8231d0a0f4a39b5f1269c52027b5b038f9c2da1770a5bcb3082b173f6d037376036611934f2502cc12d02adca5e1848fc4a32b17de13f6b2c22418d290cb8bcedcb4a941b55641d68b5e2e2e242aa282cbd57b3923afce882112355f58a03b54bf70f63e3923b22bda27bf99581c615ff74b3aa332e7ec4bcf4f2e84785d703229a059f2f700f29b3fa0fa289da52a42a9a6cf96079c47eb0ccf1c331022a49f0aadfc8c738f78c049454a39b51813c591aa4bf0f69d6c1e7b2dcb7a14bb6928e8c36772d0b85805acee5b09d697d782259b7ba56e2169cb4ec436d9053999013da1d8418e439a9f9344850b660c5b8602d9b3859cade6d11a5c0fd82d307fb44401d9d3235709b63f2ec535d0016e0c1c86708f517753f0d2afa3fcf23219592db5a4d57a3527fb6a2d6253a4cd42cdb679f64b11a4829002cfbb63fe8ead47ce27f90b791aabb146f5dbab339ad4dc6278a8ec126c87ce554a368d5c2492e7010228663dacb9b8f936a576d88db59efe7b046bc493010d78fe041900d7c6059584756e743809995813eb0a8caeccbbf854f7dbee6a04439caa30c943b92a0062cb4cc56796c31f2c0bdd69817a9f9a8c8ea85303b2c3aee10eec9ffaac9e9d3990229bfee9cb0ccc68a13921c5864237714c5e612493e5bb5514c97dbe04c181970e7bd14655c22d27a78bfe4f77fa245eced37df47b6ef1f7f5ab9b2e3eb220b34141bb43ce2a145b3b747a5de2b297013445256bef51ab4c2f5cd1bd9c28e8e6d88cd8eef4713a90387a966aacf93b0e797134c695e998fc38fe981d773941d85fb39f5465abc22b98b40488e7dba320eed308bcdd885458bcbbe7bcb8670e3319ad8fbdbb9a382a7a5e331c5d9cb9eb29a90bea6fb1e40066027a4bf3b905001e4390d76f23dda3caaa0a95679ab8b09c53826afacbbd60f67bc6187d9960a1a6bd2230409fa1bad864f3e64be5d5de046065262ef431615b3609bcdf5f1b72bcd186c5eeadd6a0a834ac6bfc85d864335583eb359a4bfafd1986a1f7aff29164336f63bfc396a57db26e97f0ae78513d060819315291395ac7bea2de729820eb4252e6c4b4a44c21aab3aef3c06c6ca447df27624947efc5503fe386db860e74050d8d719c3ff645f5f4a94fc1c4649c36ae32ba433561693da43f6bdaab035ff7b4cf66393e1e07aefaf3937a02f56dfc5186357ff7c203b5f4744d2bed8d04023a12731ca7298e9ccd45c8c4ed8eb27b14e3ae8e49dc177c805c758b425eb093de33e5c9ef46641adfcd19ba22fb4b3ff8237771e38907c928a7dd25c2a1cc056e2e2c31b1991c2a16aaf4d002098dc919aa0e95d9e848348986dd4cf80e34321d1eef183ad6d823b3cf5bddda2fee817487c3579bdac9b91c7261920a5dd2a79d9d748dae36f579c8435df004ea6d529d63b75c7dc77ec16b98985379b516949cdb385d5e6ea2ec425e4d5b014073eef51685b5c390ec2c57523c2d508f08a3319a243eb1e918f715b625be4cc26ce53991632ce154bfc3a3f97ad158c2d06d8cf404d76aecfd677d2c29903969d6cdfc303e582123c992dd39a5c42d443729402fa059db33468e13d736d43d11c2297e29641e58d676605a596ea1abc60b9b6a1954e92e7256add72d55a099bbcfe55a742d7ec618350c2846f65b9282aeea946dcf399d8c4d0c5200738056a184d4c24bb6709a7e44f3e6df1203132ba71d86c54436b054368b40e723872811a7402c7cd0c328a0d9427fb77850ddf8738f68e3ab90fb6674c44e841d2a42a6a15512885931fea4b2cb9245ecbe91bb978f82de036917463c62b7a77e7e317e257bcacbdf79beb9a4bca911bd68ddbe21faa4d3b46c567bd5988843d8149193e48c9e67347029af029f990d66184fb105cc48f964f9d26f072e5ba4a7f32295d8e886ce569610c5a4ac0a5b9ec1c114bc39a4b0e4e2a452de7d41c10a1220d19c1640a278e839a0837661aab2bce2b116cfa6efb5a414aecc7b199a2b331637fcb9ce145c8a4a87b00add4307f0b3e9b384b4c0ad6b9ccdc7300fcbb0f950f08f0060c9ddc70b914231809f3c510d5aa3bee745686d9a291b0239af93ff81d5a6cbf125539e12c7c0c6d7b67275d5d8a575840236c0098cbba91f8961ab4c29286ba2d0559f2a032fad6c1ba9dfcc1ebc1f833eca1acb0aced9768715fec81f84b1399173dd415f8ac5f984e4667c75b0329aa9d40a5086dd587bf4ef8788cc4fa682683b50371024e20af9e5e262644b821a02f4d2a72f18dae2f6fd7b02bd1393300045d2c1512062b6f40811a8392a75b3bf1bb504c3a1e70a57b6a00244f3a1403a66b873907f25df8dfe6a912984cdb074e12ac152a7993c97690f8f5f9650611a1c96f8c4627064947b83f5e69589add2fbc76737ef3d2706d3416a9aa7afb4e328ce9f56609479b58fee4d9829cf201113b8fa421f6618b7736972df8421d85b63c1341099656ba8513fb59d6110d5460e7210697597e42d670c21a5ce0ff9a180fce84e98d3f005e4f43dfb03bcc97219e31b671f71adefac007140e37e6b39af6499100eb5a8888a391a7f2d0b768dc8242177dacbbd2d1c0c8823b5dcaf6b6d0cab39d95d1ff455cd8ffc5ba88a2e0926b6bbd473662824288913b95e0a7344f38769fabb10cfa59f26aa4ca67133695d290dc3e80a2562399fddb4e5319338da6ea9804b08b7b0946e344fa88504a6bd3449290779a792800e75b61324e4e50ac9d3f230a692bfb8b0851bef49c613ff05ffe2ce250c879e19decf1c4804791c8d1be3f988c5c8a45616c45c50a9d5cd216dd3a25573c7a0cfd9ae7a7198f95d598dc88100cb74561a999f3b018dff786ff08ba30f96eb6777b2473f2531d058f9c07452dafc2e2ae5e52a82c0970d2dcaaf8e98afa0ff35f4bcfb7486a73915846da89010ea40a95b250fdab3c397498da42248553c881c0ec13b2e30da1bdafbf23eb2822f92f1e53da949eee62b769966e2f8e681afa1e6a867fe88dfcd2329e659309f1ff7e04735334c9cd49e1ffe5633613236be7dbebb5b8637f510aaaca3d3cd7cfe9beeaf40004f9f059d89614c01fc4ff84f3950addac200b054dddd593b34015f9a04f449f7c02629c79c39e2aed527a40dc265c4285bef8fa8d5403662a9b22d1e3fd7344482db63b97dd03ef2fe414f65dae959c81496c752495d9093a32053ee58da44975eef52c3caaff126f52879caf3a5b0ea00452e24e84cd270e0477b1ae14e1d4bf51220f93790e90d9b59be3d88b59d84df65e9659c842b33f84e2f9f4cce2e7c2b959a60336793c3c1578443b378ef46e8262784db5a17099df0a67c3da97fd210643f4c3c44618a22eff93bc497a33d92eb7fd313c5224ad2fedb85dd9cfa65317092c2d89a496807203b1079312ab39e764c3927200d0bdbe9cc6fd8ecebb6d203f0194222876f80ea88fb2412651cafbb0c0248b34aa921f0a5b5cf35ac5664f019e70b99a1dc915c25882328bda802436b1d4ef6249dbfa4dd33346cb26aa98f58ecc1e3041e7cdf2456be0d8c297616143e18275089fba5f1d3c41f28f24d193d84869286a80d5ac2d3ecde8057cc33c46d8dae9231cd87d8228b78c19023d6591d2ae83f9095fcdf1924b01d2983f98b3d865374e296612252939f79e9e5894bb6870a67dd997c15d05f167c98fdc49ee7c9b12024d48ccbea6e4d181a5876259ff4cbd5ca0924dc0de3ffe0af0752349845fcc8c4552f0e1411a9b2260e29f89dadb343caea73fe017a8a987eebe8a9b8474642d978e160e47bae30f577621d85b6db5245510871d86e9a2809e7ac2fdb15346139f1b07d7ae051bbaefb85e6729baf96338fa39047ac77adaadf44bd4d10f1e2e26bb15415cf685bfa34e6d17b1b952985dfe6635ae1efc838929e90038965b6d18b6b7b1305fe1dcbc88bb39daf64295cd9f3e53b2471f9fb030d7e1b51efcdbab96cf4d7d836d6507de5f", 0xed9}], 0x6)


r0 = socket$inet6(0x18, 0x30000003, 0x0)
sendto$inet6(r0, &(0x7f0000000080)="04", 0x2000, 0x0, &(0x7f0000000000)={0x18, 0x0}, 0x1c)


modctl$MODCTL_LOAD(0x0, &(0x7f0000001180)={&(0x7f0000000000), 0x0, &(0x7f0000000080)="3c29507ba5aa3bcf20", 0x9})


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000000100)={{0x12, 0x1, 0x310, 0x2, 0x0, 0x0, 0x20, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}}}}}]}}, &(0x7f0000000440)={0x0, 0x0, 0x8, &(0x7f0000000200)={0x5, 0xf, 0x8, 0x1, [@generic={0x3}]}})


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x3a, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbd63)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
open(0x0, 0x0, 0x0)
ioctl$FIONREAD(r0, 0x4004667f, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_ogethostid()


compat_50_mknod(&(0x7f00000002c0)='./file0\x00', 0x2000, 0x4135)
open$dir(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)


open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
preadv(0xffffffffffffffff, &(0x7f0000001380)=[{0x0}], 0x1, 0x0)
setsockopt$sock_linger(0xffffffffffffffff, 0xffff, 0x80, 0x0, 0x0)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000080)={<r2=>0xffffffffffffffff, <r3=>0xffffffffffffffff})
sendto$unix(r2, &(0x7f00000001c0)="bf4ca2e465b8cebc79a52ad18f95355965ce6a29e7985c1b99f8980d00512d9278eaddd245c0473b92257eabb48f28f4ac06a8db164277e5763370ee106c3674ddd9a8caf81000"/85, 0xfe5c, 0x400, 0x0, 0x12)
dup2(r3, r2)


r0 = _lwp_self()
_lwp_kill(r0, 0xb)


utimensat(0xffffffffffffffff, 0x0, &(0x7f0000000480)={{0x0, 0x3fffffff}, {0x0, 0x3fffffff}}, 0x0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000001780)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x1203)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x4010427b, 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x3b00)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
dup2(r1, r0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xb200)
r0 = open(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_COMPLEXBELL(0xffffffffffffffff, 0x8004667e, 0x0)
ioctl$FIOASYNC(r0, 0x8004667d, &(0x7f00000000c0))


syz_usb_connect$hid(0x2, 0x36, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x10, 0x1e7d, 0x319c, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x1, 0x0, 0x0, {0x9}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x1f}}}}}]}}]}}, 0x0)


mlock(&(0x7f0000c00000/0x400000)=nil, 0x400000)
madvise(&(0x7f0000400000/0xc00000)=nil, 0xc00000, 0x6)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
__setitimer50(0x0, 0x0, &(0x7f00000000c0))


__fhopen40(&(0x7f0000000100)="66b514e8eb26f07733e3ec0fba29ba92b7732407fe464fa9a386c763", 0x1c, 0x0)


mknodat(0xffffffffffffff9c, &(0x7f0000000040)='.\x00', 0x1300, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
__setitimer50(0x0, 0x0, 0x0)


r0 = socket(0x400000000018, 0x3, 0x0)
ioctl$FIOGETBMAP(r0, 0xc0906984, &(0x7f0000000040))


r0 = open(&(0x7f0000000000)='./file0\x00', 0x200, 0x18b)
ktrace(&(0x7f0000000100)='./file0\x00', 0x0, 0x4000060a, 0x0)
pread(r0, &(0x7f00000001c0)="84ed65bc7cf1cf744f903dc2f660f9e677e70189653183712bdd0cefa6ef082b6e6e98c4404a776652360d9cc73214e999ad348cc0b74251723a97d2a60a970e74", 0x41, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__getfh30(&(0x7f00000000c0)='./file0\x00', &(0x7f0000001600)=""/4096, &(0x7f0000000100)=0x3f)


__setitimer50(0x1, &(0x7f0000000080)={{}, {0x80000000}}, 0x0)


__clock_settime50(0x20000001, &(0x7f0000000100))


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x4004427c, &(0x7f0000000040))


r0 = msgget$private(0x0, 0x0)
msgsnd(r0, &(0x7f00000006c0)=ANY=[@ANYBLOB="00e100000000000095d2f332de84eb6e161d390811e4d09d73a8cb658af7f3b728863e79f6af43b26cac8093f76a70490cc3a6d6eb1d8eccc70dc1f45ec2cf4adc4a9976bab26944e9ce9e441c1c3b83152678575300dcd494f235d431daaf23e7c78aa3ed2c9e5e9d5edcd8fc685f3d6739683c8a9c74a48e3d339a1f6ee8947afb3390499f083434c3de124b0bd35194c30a41bb9dfc7f7838419eff84c3621e6099080d48b82efc569ac7eba639f9d540df088ac865a0b72f77cdb69820244bb333339b37a9a49713ca33add858558f67ca69ce51cfc7178c3eddc470a2f22a0b36a7f779c17d4d61a37514802c6612f71f3564558c3f21d6a7"], 0x70, 0x800)
msgsnd(r0, &(0x7f0000000840)=ANY=[@ANYBLOB="0300000000000000c5e94674dbd3604cde5c3557da6461341bbefbac102b3895e0140869a1845fe194ac26f999f83cbdb241ab396b30c0d5d524ba717d8e5c5e2f873559783e450b4d87abe348cb20ebf83ab0ce789d687b03d617130b6ac1571170e268ce1d0b4bbd8bf45c5fd340a61305979b0bf685e45f57392649d8248976549ce08056e03c959080cbf5e012d6635b3b58174bd552e9c513f2acc71bb2c9788a895fc07478c415c6aa3db3cd6b47f2e874c2c9d63886179802e5606fb276950cca74cf527bf968ceba0e8125af4bd5000000000000000000000000000000c6bd6dd52135696ea9f082ad0938b93df3eca8aad08910b7e8ee4403738cb1dbb0c104f09ad91582087e0eef0e43c90e92357a3ef5407833d2a6ec79c4594886691ad2cac8b2e1796506fade88f319a97d9a40c749f3a53994efba60bcc277b0a17c6e5bee5a8698f73b55972dc1c52a0b69bbbf91228a81ebf27c3732ced1949ff053da397baae03e894a58defd824bac1ac4ea092b2d476d374f122423d47361c20a5575804aeaaf479fe73daebf5309cb45fa4eb8bfe4165315a5f49b632e69ac567667569d953be148"], 0x6d, 0x0)
msgctl$IPC_RMID(r0, 0x0)


open(&(0x7f0000000000)='./file0\x00', 0x200, 0x0)
ktrace(&(0x7f0000000080)='./file0\x00', 0x0, 0xc00, 0xffffffffffffffff)
ktrace(&(0x7f0000000100)='./file0\x00', 0x0, 0x40000508, 0xffffffffffffffff)


syz_emit_ethernet(0x268, &(0x7f0000000180))


syz_usb_connect$printer(0x0, 0x0, 0x0, 0x0)
openat$i2c(0xffffffffffffff9c, &(0x7f0000002280), 0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x6000, 0x1ff)
open(&(0x7f0000000040)='./file0\x00', 0x2000a, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000005c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
compat_50_select(0x40, &(0x7f0000000100)={0x9}, 0x0, 0x0, 0x0)
sendto$unix(r0, &(0x7f0000000000)="eb", 0x1, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mlockall(0x3)


r0 = socket$unix(0x1, 0x5, 0x0)
setsockopt$sock_linger(r0, 0xffff, 0x80, &(0x7f0000000040)={0x1f}, 0x8)


syz_usb_disconnect(0xffffffffffffffff)


socketpair$unix(0x1, 0x2, 0x0, 0x0)
mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
pwritev(r1, &(0x7f0000000080)=[{&(0x7f00000006c0), 0xf0f75}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
open(0x0, 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, 0xffffffffffffffff, 0x0, 0x0)


r0 = socket(0x22, 0x3, 0x0)
recvfrom$unix(r0, 0x0, 0x0, 0x459, 0x0, 0x0)


__clone(0x100, &(0x7f0000000100))
compat_50_wait4(0x0, &(0x7f00000003c0), 0xa, &(0x7f0000000400))


r0 = socket$unix(0x1, 0x1, 0x0)
poll(&(0x7f0000000040)=[{r0, 0x4}], 0x1, 0x0)
shutdown(r0, 0x2)


mknod(&(0x7f00000003c0)='.\x00', 0x0, 0xffffffffffffffff)


open(&(0x7f0000000180)='./file0\x00', 0x60e, 0x0)
r0 = getpid()
ktrace(&(0x7f0000000000)='./file0\x00', 0x0, 0x153a, r0)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000180)={<r1=>0xffffffffffffffff})
sendmsg$unix(0xffffffffffffffff, &(0x7f0000000300)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000000)=ANY=[@ANYBLOB="10000000ffff000001"], 0x9}, 0x0)
sendmsg(r1, &(0x7f0000000380)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000000), 0x10}, 0x0)


madvise(&(0x7f0000ffc000/0x3000)=nil, 0x3000, 0x0)
munlock(&(0x7f0000ffc000/0x3000)=nil, 0x3000)


r0 = socket(0x22, 0x3, 0x0)
sendmsg$unix(r0, &(0x7f0000000180)={0x0, 0x0, &(0x7f0000000140)=[{&(0x7f0000000040)='=', 0x1}], 0x1}, 0x1)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x20007402, 0x0)


pipe(&(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
preadv(r0, &(0x7f0000001040)=[{0x0}], 0x1, 0x0)


setreuid(0xee00, 0x0)
pipe(&(0x7f0000000380)={<r0=>0xffffffffffffffff})
fktrace(r0, 0x4, 0x4, 0x0)


shmget(0x2, 0x2000, 0x0, &(0x7f0000ffe000/0x2000)=nil)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x28, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
pipe(&(0x7f0000000580)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
compat_43_ogetdirentries(r0, 0x0, 0x0, &(0x7f0000000100))


symlinkat(&(0x7f0000000c00)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0xffffffffffffff9c, &(0x7f00000003c0)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00')
fchmodat(0xffffffffffffff9c, &(0x7f0000000000)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0x0, 0x200)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x2, 0x2, 0x0)
getsockopt(r1, 0x0, 0x12, &(0x7f0000000040)=""/14, &(0x7f0000000080)=0xe)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r0, &(0x7f0000000240)=[{0x0}], 0x1, 0x0)
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f0000000000))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_GETMAP(r0, 0xc010570d, &(0x7f00000001c0)={0x0, 0x0})


r0 = _lwp_self()
r1 = _lwp_self()
_lwp_suspend(r1)
_lwp_suspend(r1)
r2 = _lwp_self()
_lwp_detach(r2)
_lwp_wakeup(r0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x2000a71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = dup(r0)
ioctl$FIONBIO(r1, 0x8004667e, &(0x7f0000000100))


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
preadv(r0, &(0x7f0000001500)=[{&(0x7f0000001340)=""/163, 0xa3}], 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
profil(0x0, 0x0, 0x0, 0x7a)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000340)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
setsockopt$sock_int(r0, 0xffff, 0x20, &(0x7f0000000380)=0x5, 0x4)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendto$unix(r0, 0x0, 0x0, 0x0, 0x0, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
dup3(r2, r0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r2, 0x0, 0x0)
recvmmsg(r1, &(0x7f0000000500)={0x0}, 0x10, 0x0, 0x0)


_lwp_setname(0x0, &(0x7f0000000240)='-\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
modctl$MODCTL_UNLOAD(0x2, &(0x7f0000000740))


r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
__fstat50(r0, &(0x7f00000000c0)={<r1=>0x0})
mknod(&(0x7f0000000000)='./file0\x00', 0x8100800080002002, r1)
r2 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
pwrite(r2, 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000000c0)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
read(r1, &(0x7f0000000100)=""/227, 0xe3)
r2 = dup2(r0, r1)
writev(r2, &(0x7f0000000000)=[{&(0x7f0000000200)="6d327016a3f81030172268dddfeb2b166b988caf1e59ada48d3ae0cee12d872bba251443b886a5e9553a9488e33ea1229f64f01e233e3faf0f6d875db8686d4c51d801d59c6159f2e1d5374133dae0fab4235cc3a5b8c6146572449bcd40a0d836c937717249c4c8e03d8bb6660e60432c3459e2e6ec44a3083c6d4b1a9a584e26798cf0011be5772a7ea0b5df833938a4607e748b0667d12badb0893b7d553b67d5a23db271ab9d7d38ef9331eab509c1a6a314427d24c6470166522ff0423637208195db05f25af1072714ba2c12dda4ebbdf0086228d4320d83178aa0af168812ab7cb4b6d1fe54b5eeda260fe51271d0824a0d2970c2cd", 0xf9}, {&(0x7f0000000300)="d0dd3de7c5c12867d8b2bb21f07b0455736972b17bd0c035a898ea625a6995a61064189f54c855b558f03c2a248c416f055439428eeecf60910f78550282b1dde4cc1f0c32162ff629102296232449c51f1d9f57242d4de6e7fecf17a6753319b55a7435c73c5f4915b29edb85c16306d34a9e5d6f7feb08", 0x78}], 0x2)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x26, &(0x7f0000000040)='\x00\x00\x00\x00', 0x4)


munmap(&(0x7f0000ffb000/0x3000)=nil, 0x3000)
__msync13(&(0x7f0000ffc000/0x2000)=nil, 0x0, 0x4)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
compat_90_fstatvfs1(r0, &(0x7f0000000440), 0x0)


compat_50_wait4(0x0, 0x0, 0x8, 0x0)
syz_usb_connect$printer(0x0, 0x0, 0x0, 0x0)
geteuid()
__clone(0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, &(0x7f0000000640))
r0 = socket$inet(0x2, 0x0, 0x0)
mkdirat(0xffffffffffffffff, 0x0, 0x1)
getsockopt$SO_PEERCRED(r0, 0xffff, 0x11, &(0x7f0000000780), 0xc)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0xf, r1)
_ksem_post(0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
semop(0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
__fstat50(r0, &(0x7f0000000240)={<r1=>0x0})
mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, r1)
r2 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
preadv(r2, &(0x7f00000004c0)=[{&(0x7f00000000c0)=""/109, 0x6d}], 0x1, 0x0)


compat_43_osetrlimit(0x9, &(0x7f0000000000))
socket$inet6(0x11, 0x60000003, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendto(r0, 0x0, 0x0, 0x0, 0x0, 0x0)
sendto(r0, 0x0, 0x0, 0x0, 0x0, 0x0)
recvmsg(r1, &(0x7f00000002c0)={0x0, 0x0, &(0x7f0000000180)=[{&(0x7f0000000080)=""/230, 0xe6}], 0x1, 0x0}, 0x0)


r0 = msgget$private(0x0, 0x0)
msgsnd(r0, 0x0, 0x6d, 0x0)
msgctl$IPC_RMID(r0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__getitimer50(0x7d1d05d89f4b2b19, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000240)='./bus\x00', 0x0, 0x0)
r1 = socket(0x18, 0x3, 0x0)
dup2(r1, r0)


r0 = open(&(0x7f0000000480)='./file0\x00', 0x80000000000206, 0xfffffffffffffffc)
r1 = socket(0x18, 0x4, 0x0)
connect$unix(r1, &(0x7f0000000200)=ANY=[@ANYBLOB="01182e2f66696c6530003f9c1e2e0d1a10e05f4eb3126cc7ad09d28b1e9f208b28ed56db919b5b337163c11d4c9d49e8a95505ebb531ebf5fbd64026b7158ef96d3d05258a48fcf0a49b233d3c8d3d6fbd04b3dfb3f9db7fe27f3e540f05786848d64a121ac6837db2f6341c29bdd4e1d6a6b2"], 0xa)
r2 = dup2(r1, 0xffffffffffffffff)
dup(r2)
r3 = semget$private(0x0, 0x3, 0x182)
semop(r3, &(0x7f00000002c0)=[{0x0, 0x3}, {0x0, 0x401, 0x1800}, {0x6, 0x38a, 0x1000}, {0x3, 0xee, 0xc00}, {0x1, 0x1, 0x800}], 0x5)
semop(r3, &(0x7f0000000380), 0x1d)
semctl$SETVAL(r3, 0x0, 0x8, &(0x7f0000000580)=0xb)
semctl$IPC_STAT(r3, 0x0, 0x2, &(0x7f0000000140)=""/165)
semctl$SETVAL(0x0, 0x3, 0x8, &(0x7f0000000080))
getsockopt$SO_PEERCRED(0xffffffffffffffff, 0xffff, 0x1022, &(0x7f0000000280)={0x0, <r4=>0x0}, 0xc)
getsockopt$sock_cred(0xffffffffffffffff, 0xffff, 0x1022, &(0x7f00000002c0)={0x0, <r5=>0x0}, &(0x7f0000000300)=0xc)
semctl$IPC_SET(r3, 0x0, 0x1, &(0x7f0000000340)={{0x3, r4, 0x0, r5, 0x0, 0x11}, 0xecdc, 0x1ff})
getsockopt$sock_cred(r0, 0xffff, 0x1022, &(0x7f0000000280), &(0x7f0000000440)=0xc)
getegid()
sendmsg$unix(r0, &(0x7f00000004c0)={&(0x7f0000000040)=@abs={0x0, 0x0, 0x1}, 0x8, &(0x7f00000000c0)=[{&(0x7f00000003c0)="4b438f8b243c79562a3ccc75c91ac61c292d830838f2bbae1bb0ca183bc96a201d132d147f01213073cbf9efd827aadaa3d480dbccfba0650fb41d0f8f2743944e62541e27594ace760dc942ded80b3c098b74fd8c859cc21fcb1c92fc4d79f231b5c7476fdfb059da39d2550d5ccf7acf8da5a5113ce32c71d403", 0x7b}, {&(0x7f0000000680)="d8816ce158015b4a323faad5192796af7bc5d448da77927dc88ce642632f3e009b6bd02f6782c220d9845c3023886c8505542279b0df98a3d631baaf9f675f6dabe3886d754ee42e70d0124683c5ccad1df46c22c412c4a2a697ab984c200ad930386feb6e14945d1e4c6b8d765eb0ec61dc8a6b73ef21e02f7194e917b35dbf80f0f7c69378dc2c919804e30846313ea07f036e59c5569d2cecb87e6210ecc01fd9da11596755b33e059fd942144597dc68c0d5f3ad9a72137b9d63373fd086432998593569c61421415ff1cd8e6092777edac918e33ff8ef408e98b4102bb073fe806bb9abb795db1366188fd8e05b1738dd5fa39455b3b65e158c65d21eeb019bf83f4ad2a15eebf8b60be832443a590afcbfb15a36d077dd71c37b2fc280f1d40e69ee3ef864ade6e4e12c5d88b119a5ef1ad3b1e050bda578bfb565e6207b56aadae23c66e43f146a9a345b3abfcf5a9992b916c31432e6afbea6d6ff3c39b9179aa559ca5ceff4b3c7bbe1a58fae76f7193f7271625f4e8649055d8593b8ec181bd225d903dcc238b3708fed8f93e9e71792befec0ed2e9f7150ebda533c8093b0edec664b756b1dfac996e959dc166eb18dcf76359cf60911f9fddb3b054848045b782d5a7d65d239661a998011208283f0ed86d0d7267de5c78c8da70ec3c26a553d1fb84f1a7d5b0d7d2bf1cd9d2b0373f19d7344f6c43031cba24a7b354b0b73239676e32a290e7b3efac8c6a46f791b17ce42c4fbacd31a775987cfa76d50ded8b3809feeeb3a065908cdbdfa5f82677ac49426ccb2017328aa02a44a5bb761429191a998df4b15c2c1b69b4ae34f7ff4794910b1e96d78312913403b3cf2931560f553ddbcc9e53d867a010d02f21238d693e9d53617f6fae241fb7ea55e9b02934be6bc7fa9d272e38be7db66c8fb6ac8af359672d71ad0fd98731b565d6676ed6e91ff7423361d98f747a8a258c2b28e0ce32003691bd46f1a914b01ddc5212d95820784806fa9ea3c188fb3bbf37c595dcb114ee9edc3da1a7008e716fbcd191768dc09852bf8cb2e2711311b30abda4ca50798df6bfa3e4acba7ae846bc9a4dc78e4cf074576637eb33c0b3fb90e95aaf1808d614e4cc43df25efea5e2e22bef8324ec7a627b0ad105a97186331dd1a3c12d87a04b320a7ccb3e65182e1e109fa88de542d95484c23a82a9ff4cb6f547f0bf24441e33aec251938a2d2221536442c6bcd1a0bc6c831d3dfbed4dcae6d8d3c5a137f9a8bd774e0f77f0310a9c8479cdb663747facfbe6eb9879107c5251f05f7b340eb9925ee191b54905614e08c3a031a7335490ccf30cae1074298b1f5540be1bf93559d2b15f4348b18d0e7d0096e7bf4bc0631e908964415037838ce408518addf53da22830ab86e1cc80351f04c4bf79da536d03581f7ad5fa8d0645074d5a71054c07aebbc21f0796f74fb7c302627655531a603a593c54e2fe41d282f5c90420b269c6fc4166b7e668130fc4baf47be7ccd65e75e2ee9269b203160009b407ef221894c814c7c353d621be3e89951d7624d719f741d909cb9255966dbd170e7b9f00f8868e1d22e22dd97219cd9f8318fc7467c00da645f0631760ee9ad749fa433b2b8a8ed9c6014a85c7be00a79001767a72ad92de6bf69ec3512cce2a8b1f81504642a2fabba10efce9cfaa91f7d06f2ea9c5e0353d7eab5fb9ad0da9d95d68d0082fff4125d51edffd1209a4587fb05bdfb35255808820f75337adddb92d025f9d36e80044e3826a4863b223fcc080257e8ff8a6f0aa7cb6cb92dcdf907673c68959b685f1c2681e05ba2a837709fd94b0511384d196a3da70bb2c1e36e56fe56562171f5d8178804957026572ee5ccaf87ab835e3104334255eada8b856023dde468bddbcab801ae01f8634add21d4a29c6e345729ea07c29561ef48f5bc0ef7fc87bada2aea70a396674028d71f9533c03c5472bb88d1f59007a1beb4856efd26566f08870e0e63b4c68fc5253cb49e59d10c2f797a50cb34e5e4bd99a7d41b8d8c6c0fd7c964650376ea87f0fe1bcd3ef41eeb0b1e9febb0d14c9c4a593c0c234f2025690e3ed26767eae6645e0850e031d4c0c0caef2a49e49024c15f7fe6b57a3835f1c0c9ac5099a225c6628e84a36aff87d963572c9c498c1dc8f1f62b633d8fe083fb24056fd390f33204366d2376a2baf7ce1be5fc4d524b9d0dbaef3f39ac412e014a471577a78df9a04ff6ecaa46856e10e5c899ae1df5ab09185faa400990263466338ea1e27820bc30596fec060d849b2c6222d7af73afadacc05ebe5eadc6aed583f2bb89449e3762129d804d08fa0f07a8d2f2f792240dbc04f7e697d2d94cb9ce314f4f603b81eb564b454f8593a74f6e745016fec756b6011fa64d416cdb9634a3ce37cc2fe729739fab1d08c8fd12cb30d4e7c231e0fda43610d64d1dd8a5b1888ad97e1c119fa81cfda37f3baa843dcc04ec8a8aa810c109cdde34d569175b0a446da04c39069acfd350f79fedf5dc694342641deb66e2bed306b9a8c931e4173558f924cbdc8279768157db49f6ced63a6136c9cf14b96d2799000ce5cdfd069682ea4692bbb6145491a4c32d767dc1e3534a5e1f9a13656cbe84bd84b57c03cbedda0dbd50f14f34bf1447e1912f614c9ae9df5d14d5f4d84b2224168b8804db0f101378e8a1e1870b78180455e136c4aee79b9c8db8341f76251072139d18f94a2687f0bd33a08b34d8e71c9f1fb91bcac4c05f6fdfbf09210a4d95554166d79ae420ac09526f43fd772b64ae2b9683173d882edf15fb5a7537f349a0b922810dcee1d724cdd09a8391c5efce45b93a498e038b0ec5fe7bf63a8d3a1e49044bda11b74f367a9215ed57bf26245c40fef20758105484173be338b10453094353cb335239a8a5e6cdce01c838a4e2c563d1a35260c7ffd1a9abfa661a2c5e26c120d584d21ff4fb31e859cafc0007d1a6cb5449b68961205c9a83758cd9c717740919acec943d7e54f487e814b141b58fe9b6cbf3d8c43edf25331f6be6f0f3024fdcee05ca484c978332f04ffc77c56716ac1dce5827eb8dd82979524b6b58ee429897db936a627a94d436063abd4b26fef52f90505783ba2b6b0d4d501db19b83eb7100b8cfbbc506a59569f78829065b4a81cd6c6800cdf1c41600e890e72fd7b564ea317c9648536f46fae273ac1a1321cc9dec1ca4787d6cee8fbe0f85089a751feed14a0b4e0189e1dca882992b4aa1686d505610ef674a33455ad82a88cb21ee7193e43f2434ac35d3cf83de3b160d245fddeeb6cd1adbea7dea2d7d9ed1fdb4f208e4ed8528d794fa47e863134bab7da59329c2d966bdc2e2470a5c4f47078d4041594481443a617b69430a59d8270513df5310750e9d4129f8d8871e891e729886524ac026f08d063acd3b6439099bf2cf376fe5979018be3d030e58415d3ed9847c8c7d329b56efc01d5eac38a0a821fef8d720ff5268fb724e793900fca28091af9c32ce634cae6cb16a4c75534895d6bda7f6e73abc10e20c5328166ec37a7f5dff1c3619c87be22f6586177341e2a2758c1c6d73e16a980b6cf6309b2279d19c97e94154797d9f671b5fcd6ad47fbe80433c485f2f372e0f6a340f6b4d71bfcfaa1e5682bed160f16b31b64340bd878f3eb46c27e5d68cf214dbf9f7e4591dbe794a5ed0cbb8c3a367b72ce578b3b80760d3bfa611b3335c344d24f2faa410354fa02115e3f73e98095e469f27d1d4588527ba053ec474ac96d59d3a50bdb9c620de86d6d5f60cce3c58c1db7f4a9857fbe6ca9d0aa964618e5c39cddbbbf7ba69484f628c18b61ea9c9ac8b03c84aa4c91700b82820b6d79167c5d59d4c6f6d4ddba8e7671a4d37de52ffdcf08b44d07eb175dddf339f6fb3aa19636912ee23552b39d78ab898b76369d4342d1b925aebc735af6c13d74b1db81eaf5ed55d14e020e0a37a2c17e3326bfb603b72d15546b3545ea1c99b51029f4b4e7b9d4f7a5559d1edb916bf4715b57e9138a9d9c08dabd27c83a772481132f72b2d823a2dc78223b312d84a390c280dc695c065c226e6a74d5089b858ac023c3cc44a0dc345626d9182ba35cc777eee9d604b0a3114a6fa546b3ecf6910c4702510bbc6038cb9c3efd90e4852e7bce2c18a2e9c70833bba7154b0238a5d1e4676cd882f75dd14ce5f875281c774da07a7637ed5ba93d0061244d323026df0d139db55119f361d5172338163c131798ecae96c9f7fe24c68fa016fe2d4565761627576dd8f90a5b202e40a00dac3fcb3b566284a4e08ecbb4285c7cb970fb4c7031db1981ec81e628c9f3c2125e74658b6c14dce3435df83499eec6a811c3358ade0cd227d980526a0fd1e8b059d87f293e4424ae7613c949dfeab13027f3c24421ba807d8dcc3b59ae875c6d7ce28a6391ddff7c63a34f6051dbad3b73f4cb33040676d0dbb784b10baffe0c3a8796d10ac82da50ff9db18cc8a0c9aff086ea2c0f200aa45f6b29a935cc303aaad707b0e2984af4b7f8d1b9c665f06d3ee1b4c71aa7dd15bae20c762a18638b4179f3cf3a858f80598ccad012baaeb60499dcf25b83a4155ab4f5fa09836b3c50a49ff91bafbd96b6e0129036b83e0a96deb3df78a322b05e79233ef01e2e1f3d1832e0bd0dced9765b32bcac486b9f36fdba9f615de2b396584f376b4be66560bc4fdadb68566135c8d68abd4603c28dbd2be5f330bb1c0feec6ea63ec1438e40c10492935ad912f1de2f5d49c7e3ebfb6eb6ba6c26590029b141f569762867c4370516dadf2e3a0127311aecfb0e48fdcc8051e6ae4e7e72ad86021259e06232aacfeb321bca3c10fd6ecea5640f362e3c7d8adebfd2faac17b7aa387e7ef657f2812d5e8e0984031e7bd40a814417d920235432cc60286ebabf482109f0c853ca84a97806a040c622f6cffe2711f8d7da6c8194848d4f2c72e5d9a48872634f3273d4d55b4c8cc55195128d8ec8bf0c0d9a0b4d1b33a508adfeb9e8f0b15b789a03e0e1523b5fc08ea77ff13987b3d838f0068ec83abfe34baba0f2e0fd0693ea51e1e47087683ac6a941a9e5e446631f62b2fecf78c7e973b0a1c2117453c1c88055292a0ad5868e8c85a56b06a90557e85c006b53e36d110d6ffd577323b737c4193f1107e8ccdeda9d7123e624b33568e02f5cccb0202af024ddfe5a83ae8939942dc9f1ac77a74775c8bb6aa497bc88dcbd59f54b8f7a0c40b655bee7b6577ac2af8d36e6c3f6f7a25c69a571b89b133caddb1cd5bf5ccd023177fe27a13cb10154ae2f0378ce0b9af55c7de49486cf8a1f49e258c4782ea6ce0b61988fa9e534af3993872b443f6fe63f877ad1f1f266d580a0433e5fe4f2d691387530881cc48ffd67b23223496419eec39f77c40bae5eb2adea7f87f149062aed9f52142c985f86cdd38c904cd74bde4146a3630f091b25c633b7996741775cabee8f63b01cadd83be7c812761b24a2bbe23fffc00b87542ff168ba7d8e67875ed82c3362b939ed02b99799dbd1f5b74ae3fe602ddaf76a4a6d6dc0548f5cd879f2e9b0e21e669570283d89d6c5851a2735556de86c9fde89ef98e54d85370c49ab659c4f1b36976ce62abed856079a41ba566c12e7e9098e5f9705eb601884d16aa9bdb1ed1cdb161efdf177b9fd40c0fcf4218d98afa22da1b0ffd01ef4d6bfb08e637102a6e986f3553346d40bdd4190d50d34c4f48234409ee3ef77ee6436df6c531f51f5de05a461ba34ee731aa8119554dd4e90f026d56cf26c6a6e09b6176afb6e84b5fff2", 0x1000}, {&(0x7f0000001680)="57ef701722c57abe95c01cedd6fa45716af15fbd4190781738e56567b28b844dacf5cf3f1cb8fb296ffe38e17d4395c7cc5e0cd6fe337c42dfa15dc3b3f0800f3a6fc7ef3a37101c39f18173ade605433aceb2f3bddecd5041114a7fd285331701fe74401edd114b63e0eedf5c599cee2a98d7fa6ee8", 0x76}], 0x3, &(0x7f0000000980)=ANY=[], 0xa0}, 0x8)
writev(r0, &(0x7f0000000640)=[{&(0x7f00000005c0)="232170d2b0d47a0d8ad47d3218e521b794db1176034e60bd078d84fe9a9f5826e44c8624857176f624b6270139cfb4cee291cc77924a2133e175e0f53d29d4a9160e0cdb9c9c06f2804e8e01d4a0fe95734ea9499b908109", 0x58}], 0x1)
write(r0, &(0x7f0000000100)='\x00', 0x1)
execve(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)


r0 = socket$unix(0x1, 0x5, 0x0)
bind$unix(r0, &(0x7f0000004c40)=@file={0x1, './file0\x00'}, 0x6e)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
openat(0xffffffffffffffff, 0x0, 0x0, 0x0)
r1 = socket(0x18, 0x2, 0x0)
r2 = getpid()
fktrace(r1, 0x0, 0x858, r2)
r3 = socket(0x18, 0x2, 0x0)
recvmmsg(r3, &(0x7f0000000240)={0x0}, 0x10, 0x0, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
__fstat50(r0, &(0x7f00000000c0)={<r1=>0x0})
mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, r1)
r2 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
preadv(r2, &(0x7f0000000000)=[{&(0x7f0000001240)=""/4096, 0x1000}], 0x1, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000340)={<r0=>0xffffffffffffffff})
setsockopt$sock_int(r0, 0xffff, 0x100, 0x0, 0x0)


open$dir(&(0x7f0000000080)='./file0\x00', 0x200, 0x0)
ktrace(&(0x7f00000001c0)='./file0\x00', 0x4, 0x2, 0x0)
unlink(&(0x7f00000000c0)='./file0\x00')


shmget$private(0x0, 0x2000, 0xc66, &(0x7f0000ffc000/0x2000)=nil)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580), 0x1, 0x0)
setregid(0xffffffffffffffff, 0x0)


r0 = _lwp_self()
_lwp_detach(r0)
r1 = _lwp_self()
_lwp_detach(r1)


r0 = shmget$private(0x0, 0x2000, 0x0, &(0x7f0000ffb000/0x2000)=nil)
shmctl$SHM_UNLOCK(r0, 0x4)


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r0, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_oftruncate(r0, 0x5)


syz_usb_connect$uac1(0x0, 0xa3, &(0x7f0000000440)=ANY=[@ANYBLOB="12011001000000086b1d01014000010203010902910003010810090904000000010100020109090401000001020000090401010101020000112402016f0340089690fa6597022086ab0724014004021009050109400017070007250102000200090402000001020000090402010101020000092402768ffb4fb1d40e9939f7490101010108a811240201050440212914084104a782ff83090582090004ee0602272501007c0400fc8937515590929474b605dfa9312993cc13c641d4d9e19972f13c28054beee8230926eff4b8c9aa4993b3158640c9d9e51c5bb9c0d68ac5b869022901f49d838ff9357d9b5fd6d7002b98a892ded85f46c5c70570ec980ebb05d5bd705470fdd3b5ed1dbc6ea74da8787532a3c912f71873305f"], &(0x7f0000000200)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x4, &(0x7f0000000180)=@lang_id={0x4}}]})
faccessat(0xffffffffffffffff, 0x0, 0x0, 0x0)


pipe2(&(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x1000004)
close(r0)
writev(r1, &(0x7f0000000400)=[{0x0}], 0x1)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
lchflags(&(0x7f0000000380)='./file0\x00', 0x40000)
pwritev(r0, &(0x7f0000000080)=[{0x0}], 0x1, 0x0)


r0 = socket(0x18, 0x400000002, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
setsockopt(r0, 0x1000000029, 0x30, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e699", 0x10)


pipe(&(0x7f0000000640)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
fcntl$lock(r0, 0x9, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x25, r0, 0x0, 0x0)


__clone(0x0, 0x0)
__clone(0x200, 0x0)


r0 = socket(0x2, 0x1, 0x0)
r1 = dup(r0)
listen(r1, 0x0)
accept$unix(r1, 0x0, 0x0)
close(r1)


r0 = socket(0x1, 0x5, 0x0)
close(r0)
socket$inet(0x2, 0x3, 0x0)
connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="82029d9cffffffff"], 0x1)
connect(r0, &(0x7f0000000000)=ANY=[], 0x10)
write(r0, &(0x7f0000001200)="1d3549ccfa01044b905c13d620df2ec40abd8037cd2fbc008687593291eaaea6bb73c1001e5af5b0841190a94e85bd24d37b1cee34962fa9f1e800d796e7c8b1f8c3a8a93c93f1a34eba4d32ac2a4475caa583de1cb42a414f3ba7e702f31f87b6e837391c3a0cc8174b70f87579c4d241fa28bfdeb07198783b79d6a8b9235efe1961047c705fe399d24647a7b56f4fdf204e32e3263f221e325f7f95353ad1ee16ab62852bb272a55e5b6a67b1db54da9f3bdc1d8f6bfd84f0ec51f655981c8cb064b8758fca5c92d1f16699e7becf9358ac576782e0dd1158c7e9b568b42563dbf36f7efbfad6e702bb99967774a1b151fe9ed896799f829e2502addfdaeb68ec303ead82eee1deb80484ac806fb2f14369f324f30ba47e79ef4e4e330bcd5f31e2e4b81cc36ee5ce4ccc723b2afeadb6bd922bb9bd2e2e913b41ad1e815142eb35081061f4b15ead10e392321befb2c1f69b8eaa3f33876a19015afbefe00528ab3b66c3aa938b32a51cdc66f2e19e829eee25f82c43765a19ff2c3ed91b3e3a687fe5bfc141f5958278d731044edd00b851e13bc64cfd274a79be9903837d6c7715797fc720e1dd0ad49f4111346c6ec442c877a0dad1b746c4be4ae8d8dc9afae7014add05cd64c3c1813f5b0bf2077538e6bb19675fd9e8b28b66f4707732960bd8938c153df707a6bed29c724921449c0bafbd878263541e29fa11724d352c53804d82e62f5d8ee845c3956e5950c81cab684f86a7b36735832fee5b19b4e5155908a98c8c2edb6baf6c6bd6b9a69e9237ed713d2b81cb3a57f5e3db12a92353fcd0b2de8f13ed9f25a00134c5a561fb774f525a50e1e3c2adbd9a3925fb8ef19d93f5cf909a403ae188c0f99d865f786b23c3301ae56537068a0dae555f82bd481aa9b05506036884ea62ad965a299d161568b329f56c19609fe3fc1186aaf4e4d3de15064a5af6a4020ece28004a94c9a3c1165c89c50dc7bfc9c81da3a7e07dd53fb82541b0bb3d831c2ad2055ad89772e25541473a6c7ba49e5a25530cf1a8459eb339a5208f67fbb5d491c665793217b5aa479c769cdcf9a38b89f484cac8be2163e6ddcdbccf044fec12891249e3eb4b06d61bae853ddee0221fbe093dac76c5e2589919011c41ff81b4c50c4c983ae6bae4531890d36492ceb1ad151c8d32c35840e71b62b2cb97952d1048fbe9766f357cb5169a7f008b6a20f5e40e7d66303013556590431d708c7d8eefb3e4a83774497fe98f6dec55202a806a17942e92a9ff67b2d8723088bb6512f1ae760649d7429048b186063ba598cc6339ff0bc6c76039d9ff4c7c8e95be0821389f2307667f204cb4f193e314f6d43cb15874374d4801a5a173baa0674abb92cdd9a919b6123f41dcf9169e5b653b843f9859b3d7ebc3c0119d7ff27c5adda4ed2c6f929cda831fde286b7e334108dbdef9f9389788527116deed2e10f94d374d416998f31a2da28c06b7dd2c870c701394c1f1186b9add30c499ead22995f7eff9c6dd24bb404ba46811b277b44bd1e4df245024b2ecfad66370d7dbf0f64921cc4aba46bb9406445dc43ee4cb5964c9496c5c9f920a9d5c5d146fe5bbe8129077cdecd8e47b8ae9bf3179894162a348bb8e35c4a224eb99fbd8c20fc6c913a205066cbe2bcba209cd560c9a648865ee7e29e4d9eb2f2f6e51883a004bfac7da89526517a844fc87bde7b6b4904cf9becb94f39701c05a2d3db28d819b564ca7d51ca55232ae38ac2c8168f6c6cd78dbe782596ebd16a8c6c65cc335b58ed99b54000dddcd58dd20b0807c040205d289c00b8499e73579c3e13739a7348c9da582ba1094204d5f39d6a7b64bb4a12eb4228121be34cae8df1023e1b031e1e507857616325fa50758d463f40f2535869f35977917cadbcfb39f1de6433d19c403fc95ce97c12989d6d006fc60d074c012dfeeada1248951d548adb0506edda347b5957333bb418fdb3b3d368ea4c426be063caf4471fd7da72bbeb32d4f4dae3c83a1491917bab5befd8da774bcdbe5a0d5c95faca6462d54d565eb3aabad844f64ee5e608e02c9ae6eddf35bf630094541ba83796c0ed43fa42d7333427fffde6703093c17eb4fc81992d2b801066ee1d667c5a5b9e586abdcb43cce08b87befe1315a9bfe06c72fcc01015c0218c7d9a61bc86d17a658299c3f163a0d233d71adc87104b933e96b57c44c2f7b703ef519ba075895c5efd3f3770f0006466e075d3b3834fe76a7e2f6af7e46ebb6217d61394e02f620b7a4d7c7df4fedca23b30ec834549be9baf81e1e51284b18f8befc342c56ea324067c4fe16f6ae07c2027b78e1c8cec616578c25d9385ddef03708a7addece98725474251060239bb41f84e4616a84bca2e5b8dd36677820328e60d3c6fb061291182fd569976a06fcb2eb8f0f1b41ac8a8a191349b7940f76cdbbc18ef16b2b5e27c3ddc3ae4308c080122d9267f4f6f893711db7b495e6c8bb6c9cc7ab6102844ba0dca2988114a973e74225f0705cee5301e94fa1566da6fa18c60a46ca2e7419b706024d56caf3502b0c3271b1132d161052f3a68706e10906a05f73c41360261ee545e886c8181311bf0c3a07ce711b545c6dd5ce7f21568ab7aaf64aa63c6d3ebae433d4c552767e6e161c5ad440682554cce154bf54e4be93611b8914ad9c3057c585b4e8be7b790cc84ca4496b160f7a49cb97dcbce4849c361693b7a5277b6ed60b1943e676ae1fda0d84d5f146b768353596aa3aeab7fdfb2b46546f42039f2073813d232e23", 0x7b5)


r0 = msgget$private(0x0, 0x0)
msgsnd(r0, &(0x7f0000000380)={0x3}, 0x0, 0x0)
msgrcv(r0, &(0x7f0000000100)={0x0, ""/62}, 0x46, 0x3, 0x0)


openat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x200, 0x0)
r0 = getpgid(0x0)
ktrace(&(0x7f00000001c0)='./file0\x00', 0x1f0b7e00fd0975ef, 0x1104, r0)


socket(0x10, 0x2, 0x0)


compat_30___lstat13(&(0x7f00000003c0)='\x00', 0x0)


r0 = socket(0x2, 0x1, 0x0)
shutdown(r0, 0x1)
connect$inet(r0, &(0x7f0000000000)={0x2, 0x2}, 0xc)


_lwp_create(&(0x7f00000005c0)={0x4, 0x0, {}, {}, {0x0, 0x0, ',\x00'}}, 0x0, 0x0)


getgroups(0x40000000000001e7, &(0x7f0000000000)=[0x0, 0x0, <r0=>0x0])
setregid(r0, 0x0)
r1 = getgid()
setegid(r1)
r2 = getgid()
setgid(r2)


pipe2(&(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
fcntl$dupfd(r0, 0x90010000, 0xffffffffffffffff)


compat_50_mknod(&(0x7f00000002c0)='./file0\x00', 0x2000, 0x4135)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{&(0x7f0000001480)=""/241, 0xf1}], 0x1, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x1004, &(0x7f0000000000)=0x6, 0x4)


_lwp_getprivate()


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)="b9", 0x1)
compat_30_getfh(&(0x7f0000000000)='./file0\x00', 0x0)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000140)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x3}}]}}, &(0x7f00000004c0)={0x0, 0x0, 0x0, 0x0, 0x3, [{0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}]})


mknod(&(0x7f0000000040)='./bus\x00', 0x2003, 0x4301)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x80187701, &(0x7f0000000000))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0106371, &(0x7f0000000180))


paccept(0xffffffffffffffff, 0x0, 0x0, 0x20000000)


mknod(&(0x7f0000000040)='./file1\x00', 0x1000, 0x0)
open(&(0x7f0000000080)='./file1\x00', 0x6, 0x0)
__select50(0x40, &(0x7f0000000a00)={0x8}, &(0x7f0000000a40)={0x8}, 0x0, 0x0)


pipe(0x0)
pipe(0x0)
openat(0xffffffffffffffff, 0x0, 0x8, 0x100)
lchmod(&(0x7f0000000480)='./file0\x00', 0x2)
syz_usb_connect$hid(0x4, 0x3f, &(0x7f0000000500)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0x0, 0x0, 0x36, [{{0x9, 0x4, 0x0, 0x2, 0x1, 0x3, 0x1, 0x3, 0x6, {0x9, 0x21, 0x6, 0x7, 0x1, {0x22, 0x4fd}}, {{{0x9, 0x5, 0x81, 0x3, 0x400, 0x6, 0x7, 0x8}}, [{{0x9, 0x5, 0x2, 0x3, 0x20, 0x92, 0x20, 0x8}}]}}}]}}]}}, &(0x7f0000000780)={0xa, &(0x7f0000000540)={0xa, 0x6, 0x110, 0xfc, 0x80, 0x7f, 0x8, 0x6}, 0x2d, &(0x7f0000000580)={0x5, 0xf, 0x2d, 0x2, [@ss_container_id={0x14, 0x10, 0x4, 0x5, "13c95088daeb34d6a40e1b315ad235c1"}, @ss_container_id={0x14, 0x10, 0x4, 0x6, "d71fdcaff95618d54714760dd4730eff"}]}, 0x4, [{0x4, &(0x7f00000005c0)=@lang_id={0x4, 0x3, 0x820}}, {0x4, &(0x7f0000000600)=@lang_id={0x4, 0x3, 0x407}}, {0xe9, &(0x7f0000000640)=@string={0xe9, 0x3, "dac6db559ee803223afca4cde8cae2e3793b1c6a54a7879a5d3985ca706b84a9e61ec8a3ca3dad714712ec71b2295b060a51526dd25a3ab133666cb0ccbed669eb2ce97f7845e7f9b1221e8a99016200bde42033b0f4a7e7e5afc96c728eb0f15a1628faaa159352629dddc72fc9a0a98e37a0548903634c431406cc676dd7dd2e5efce38b797743f1253c58839cc642cba0f794ac1ed35f7b01f7779bd21200e835844203a81fea231e7140a6f8eb6e7cc112ab6d7c2d340730bb85039deb1a0dc871fe69bf2a4065c95d15ce180af28a74a4b0c834abc212068bbc8be6c6e38f1d13852dbfb8"}}, {0x4, &(0x7f0000000740)=@lang_id={0x4, 0x3, 0x82c}}]})


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000180)='tmpfs\x00', &(0x7f0000000100)='./file0\x00', 0x0, &(0x7f0000000140)="85", 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
r1 = socket$inet(0x2, 0x3, 0x0)
sendto$inet(r1, &(0x7f00000009c0)="06", 0x2000, 0x0, &(0x7f0000000a80)={0x2, 0x0}, 0x10)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000001440)={0x0, 0x0, &(0x7f0000001480)=[{&(0x7f0000000240)="e0", 0x1}, {&(0x7f0000000380)="e771160418b746a9a9a6508a2b9c0ef51504d63bd3b45a0dfe2eafbb7fa9f1ae4e904b86efb380fd9ed421808337d87a7eb36159d0f16770b8ae32cf394c5b22563b6b36ae8f5721d1f739f9a6edd3a1f2107f8301809a55a09357ba77177f7dc307920493c0501c43454974f16a4e09034032ea8cfa7da2ec2697cb463438f5355d735addb89ac6a0996c49c742937fccc55ba1c006b826c50e973543448cb91eaedd6244375dc53fa1c7470c1069731918d9baef516a51ad3f2f505528dfdab94b091dd5a10a387ae2e0656718109b879bad9cba7de9ec368ddf35b22b57d952e85225585b4e80baed46466fbf6f0398aaed91695cdc50fe0e3ae29d66bcfa18ac55f5cb7ae25693a5dd6276298fd6673dfeaa0c55bd7023c87b9b2c7c1f6e9a85f1ed2df038f59bdf87dff90ff83556278fa50ef5d760c4ce74599f101920279a386eeb084e2f2223275a3d5f03c2c6eb1bbd7296261963c591db2ddaef0c59ba57550a6e53b3e7cb5f20031b49ec4022fe65fd458446c4ae7f1fd738b396022346bc4081c986156c83b503a0e56d7d8fca71397c0ad1d302f224223881ccf588b27e9e6abad91a1638b20fe7e2c695a998cffb5b13fec58d8b9c3494413d9cb97322f96a4c5cac1fbd3617e81d7ffe13589e59e5971a99aef869f016cf36197389645dd6d9fef8a82d4c6fbaecf1663505adf566934a198011ea42d5ee09c5800a157cd3a8d6200a1399bd9b7dc8e42ff4f369036f77a526a6c8f84c1a6a0ecbe245e2ac7285150eabca2aa2ee245839279ab5bedc81566308e73fe6a6c5648243e3498e9eb577a4fee0391f7b9b9f771ad11c6bf94a9ad99da72c7fb6ea34ddf03475b840cfc0b44fbc783cd0041130f62b0a647b52e29786dfc9f4ca987d8ca9ee187d42221be62e0256afb33149647c16ada7f28234483cbd015ec33ca8e9c57901909edcc90cfc9912f5d973dd740bb3ff39bf5a6d71d0b0ca8e64a6cd28ad2b904c7857440e017d07829d447ecfa4001c770c398343eb046b4fdd52dc726d3557f4ab38b406f7bfac83959bfb7515496a16fd9e0c8fdd7091fc730ba0449849e4522bd2ece1c71efaab4907219e3afa66e72555154f5b69c29c763613ee4cc995733ce95a868b0de108e8a106bc679cc2cc99a812f4435ef6817c410caa5f7ff6043f976f9caa78a7afe93e705213ac3f26df9dc4b1869b46f5199347f56de64bfec18df0773c13d0bb561e273387fc8a332963983c507b3998aa6b89031d231b435a17326a7680eec12920bf0c31b8e3f0de07f18f3fce1270d0dacc82d3681ffcd615a55ba2791c04218927c3a7370f27aa02a15ae82a730102332e34ca152153eda726cd91d3cb137ba5c8cc8dab7014923c05c42ed8b5ba19a068c58be482e6fd22a506f8eea451bd5be2a7e3a6b262c57e3ba72b1af82d4f8a24adf4b3b424f017b44542ed5773c209b5381393a5f851f05f787efcb33be9e6c195c13364b343d175d3867251df1c74496b8ca6af2c8ed69853ef7b24b03e02158f20c5739ccb91503f40b318e372e277125854e5894f59178187ec4e95b8afbf7a678f80e7fdffa8d74b4337c3befec23ce9d6eaba7667b396403460ea2664e90a786aa9fcb83e7144077a1e28bc03a5b09dd3e2f8c5b07a3b51b66795878ea202ef7b4682a655abf07079ed8e0c7052e754efd3194671ee4302cef9d3ee556617f71df04ff76bb6f1166166440613632460d5799da5cf3eadac5bc896abf83271baa629b336ae2234c7858869ec6205e5dc398b7468326ae2c3fa4d678d6545a820e537513e8e8b38403d0fd1a12cff2f61f3ec737f5d8985f50c7bbc1a8f316363a3e01d4f96ff61f5857e518067b56595deab70edc71460d7f2c22f32f8e7b7a1e39b5a6542dd8f139bfeb94c39782ad98adb713ad7212eafd1af8bee12260f5b3da6115c3b3adfa08297989dda4efa74cd77a53d124f03410de0aefad2ec0f067e4a27a4d1ed3fa4213c488c2c2151a73b3742a5ca33b5a2fefb9a69c9ad043093d8218f344d24b3f86651d4f7e0b6af2fa225206a07328cb66f07215466bfcd9fee95a637e82a56d3894ff11688ee14825019b38fa7066d50f910af8f1681477c18d3b4d83ab52d9b1df1b66b04642a9e9607bb283e6202daf55943b9e4d84dfb3562890e8e0c5be0fe6ef0b429ece0300ce253e0e87362b43f1204cf61ca448347d91ba52b1b59c120f662e25bbe0ed4b8735a545a2d39ab3f97f1e82c414917c4a89392cd0d4f7b43808a14a0cb5e544660d9a1ea08aa0bc94e4470e76cde8daecb89ef1d631b07457294a34b3aa30035f802bcb3b8db53bd817e5f6f0f2ed981ba76b2e6d57692edfc22443e79b6b5abd9fd3c24ddbac8d17b4aa1bb41171bd702d4f50a7369bae16e7ce0db0658161d287aa6b6d411028e1f00937147462f179f25c8425376c47a2e06e3904f96b85a65fd5acd285fb6bdafb4db200159bd7e095825d3af818b78b6145440b4552a74dd842f138c6b9530a47f596df4e071203c5d6618aedbf06802c995a038c821147cd3785191da54b3ecc457cc210ad080044d0ad2ffd4c8f8462568f3c19fbe20eca95952d7762157cbef955416643788ef217c8821b2c1142bfaf7cd087ad58c2ddbfd7590079de3d0e5d0bdc1b096a685ef64d92a46b9dd6a7b449f4960cb012e291274127096c45f02c606afdd831e848e011da93d5aa43ee834c405edd94a6e818b13bdc739c11ff50afbe05d5110101bdacc3879b9821db5c2cc95cbf28c12d9f523ba451846d080a2f03743ff277963b5865656a89a2cf94e8ee615bdd454b274cc836057727400f3f397cde0dffe36eb9971499bff4adcd52dc73d900023d38bdb76ad7c26e27fc25971482e4c0312d154c684d2e6059b60101c23839741a66599a5654bf8895630556ed8b7b6bbb3bca3a86464a3a4034d46e1a35aea3394f5113f4fcdd9a8d53cea3b3eb892d5c5e40e807294130e36f3fa7eb4fa3f431760df8bfba81a57e4dfcf1b04af37163c6909d070060bd4850ebd9df1bd78f965d7d73bde86cbe239dd0ee4cca7a51cd3542dced4908d27425d84148396f778d9e82f56557df2962615750e1cb282dd014f9862390bb94c3496d55a35a3b2e9b106e276434208c78d9c2faa227abe1501f7593ec28b8bdb73cbdc933879f72936176d61aa142a198b695dbcc097784a06984a42154d5395b83f6caa4c78b9dc3a5bc854391b1649eb981644e31592d419deff44318eeaa2c25bc8b1876556c7aac8a565c913f1e6990da8574c3a23bc0ae9b80f69df79ff6de77feb26321cb31580e4025e22145780234e9f9e2163425968a492cbc884edfd16f13f65b2951605d8707a15a3399268a4e7daf0d5379caa300fbb7c64abe4ed8517983c192c0a8e8dfbe42524f2d02b5230c76d72c2c50438d7ac04d7ccf14a2c9faea3ed944e75252ca4ef2c5c7dbd9774e94c0c8519f4620ae21869a8ec8df41eefe3a8e05dda76e0a0ff1cdc9e1cf15785f0db28a14e8b30292f20ac840b2de5907523a8e8277509c9d586eb8da17a7aba1756fe852b54c1abbb4e52134d506e4b960a05fad51956127cd76d499fb420c9fb5d89d5b70793420ebb38cb0cb82de2376f276cca3c98455d775e6b45b3920cb0aedfb97c301ffc0e6340ea3427f3be14208ed530d66c1966db537af2617e4ccc0531529eaff12e9e5e6cf02cadeb8c2f9e4537cd34de510ef1ab04af5687396db2b6a8e8599ff54cfba8d4f379c10b5d8716de0243d5740674ed021442f3f04ff3d4dfc632365f48c8c9f51d7233d32258a656ba1577277add80a22e3468dd6635c25a3284a69f38023b9370b71528e48b509d79d4ee068b32c18dcb5d0f229f1abe1cc7e44750a60bf161adda8e494368d58e7826c60faffb02e6611a11b2381b3f7c3d5543de5a63ae2eeb268ffe500eced76e5aaa1628aee57928c25e8afe18e706496e7c901d3524a1df740e556cad463bdc9a7dd7841682c6683c624f5cf5852863237b5ad0e853d2372246ca85a7d816d1ea92c910387ccb5492e3b3558d6b956e7b198266ba700bb95f0bcbc586e26cad67032b4ab4a7588843f1cea1c2735e16cd780fe0f9946d5f78080e9fc37961718ebe68083ee241e29bd00d46804320c7ba533e537f735ae9932e1a236ba7ea60ab3595825c2049196c11d2f5859affbf5ea743effecb7eb81f74ce8ffc4cc832a3febc427195a337589206fd1b27facd898f6a6a3617af12e3f64c3bcb254daac2c86c7599dc1cb4dda1fcbb052ed9c64fd2cae77bed4d1201c8b49fcd6ab5a169f9330ab6e06bc55f0fdb0bd628c33a793dc37fd19a736d47b24133de75c27d290e7bacf9d4419237faff31954e95c60cc3adf7319b8ef34eccd87305dead9e6ded51e1bc75f44db2dc5e20832c036a92954e18634bca1cc2de7abe5a8840aec58d955ad30410c0da2806284e5c997341a912e78280b410b625293dc82e3d9bae12d5873e65d921f284b8bd7f7df591908acccc49281769d559e52dd45674f734d3ba1aa7fa66bc68e0fb1f55be469a52dad3f3ef4d4df704f841703abd68339db0b6349a027bd54059e28046a43521d2984bb7eb46891df964f08d14f9293ea3fd95df58382bf206d4453f10abec80e02211353e8efa8fe8b1c354615be8ef3e774ca8671e1b827bde2fea62279d394808323645495123db88f3665b73a297287fc8f6415e66d17d6a525cf2fb3da7c47ab4879d5b821b21803d680623c7bb8c1c73a8fa17fca8050005f3b2811b02e24b47b590ea601adcb12d6def5b9b5e15a9befcfedd748d7a4ddc92a661d2af2a55abcb7e75e484cf3095b46f8a3833f1182e296949e8f17305821fc3f092611709e3beda119cfb67bd0a6096954fe024599b0448157da94d89ae72996ac5ef10b63a758bb3b4c5c733a58102c16cad9d7d24879b47d590e8d29a03e499d335acad90afc90862f3dd4ff9a6f9a715c3df3ba0899a2d2824ed06aa71d034c1e491e2511553b322bf2020f7b1e6fb0b98b575a46d5587a866a1baa649ef601a479681664b73e5d63bf7496e2b372a5a4038044ba4cce981aab5e1850e8e55412b2693eafe0df07049c7c9a61d8240a5ff6f30ef4d63ba1060bc1d3b4296e0e9178d5703ee95482afe71ba9fd25d682384fc1556fd95fcafc02ab8bc0495b296149bd8298f96714abb3fbc81bb0868defbbda602a4a0bf071b7fce612a03045045f79e1060c4f73c37be4c7987fd708eff4f68eb9f34cf5e4fd8c6dd8e8d9affe2767271ff4f12313f87c74809fad9188a7ca6110d799b738eeece5bf4de9c35ad57b064a7084f588bb612106232213582478235cf15a2295478960d7823ef39a64eab07e9b2b196fe7ed2347fc7442cf5b74bfdf80fb96f8a9f1b404c6f2d955e72886894bba37ead5fe22be1075894b8f74db6014defbb1795d9c9b4d06c4b2cd884bc37eb55067a0ea56e9a08b893e528bc72423738e2761e0d09b7c96fc0862281652edc5d4af81180729d9e7fcf278edaff6b0cb1caa5dca0a402f9faa16b8d2f4c9c47e71f8a481d1f17783749bf9363ce6aeefd43c825911873b64510a376722da00022da299cbc37684ecabfdd616a470a78c1a04137c3402c091872714b961e7b8a94ea3b000634cdea28f8148b346f6fdcc4a0ab8d6459ee6bcc8843e028718f835d2042be8382cb7e16cf26c686d9cef0f141cc913831d453c4834c8c2b410fe0b19e911182f6a30d73d05dffeab050fe2f5be388", 0x1000}], 0x2}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
symlink(&(0x7f0000000080)='.\x00', &(0x7f0000000000)='./file0\x00')
chroot(&(0x7f0000000040)='./file0\x00')
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_40_mount(&(0x7f0000000000)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))


open(&(0x7f0000001700)='./file0\x00', 0x0, 0x0)
open$dir(&(0x7f0000001780)='./file0\x00', 0x10200, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000200)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)='2', 0x1)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_30_getdents(r0, 0x0, 0x0)


socket$inet(0x2, 0xa, 0x1)


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x29, 0x3f, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0xc6a8)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = dup3(r0, r0, 0x0)
ioctl$FIOSETOWN(r1, 0x40044e64, &(0x7f0000000100))


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000240)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x6000, 0xa318)
open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
getsockopt$inet_opts(r0, 0x6, 0x9, 0x0, &(0x7f00000000c0))


r0 = socket$inet(0x2, 0x3, 0x0)
getsockopt$inet_opts(r0, 0x0, 0x2, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000540)={<r0=>0xffffffffffffffff})
connect$unix(r0, &(0x7f0000000040)=@abs={0x1}, 0x6e)


syz_usb_connect$uac1(0x0, 0x71, &(0x7f0000000b00)={{0x5, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, 0x0)


mkdir(&(0x7f0000000240)='./file0\x00', 0x0)
mkdirat(0xffffffffffffff9c, &(0x7f00000000c0)='./file0/file0\x00', 0x0)
compat_40_mount(&(0x7f0000000000)='overlay\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
faccessat(0xffffffffffffff9c, &(0x7f0000000040)='./file0/file0\x00', 0x0, 0x0)


swapctl$SWAP_STATS(0xa, 0x0, 0xe408)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
compat_43_osendmsg(0xffffffffffffffff, &(0x7f0000001340), 0x0)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r1, &(0x7f0000000700)=[{&(0x7f0000000100)='u', 0x1}], 0x1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r3 = socket(0x2, 0x2, 0x0)
connect$unix(r3, &(0x7f00000003c0)=ANY=[@ANYBLOB="82022e2fac"], 0x10)
r4 = fcntl$dupfd(r3, 0x0, r3)
write(r4, 0x0, 0x0)


compat_43_osetrlimit(0x9, &(0x7f0000000000))
compat_30_socket(0x1f, 0x3, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000001600)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000200)=ANY=[@ANYBLOB="10000000ffff000001"], 0x10}, 0x0)
recvmsg(r1, &(0x7f00000014c0)={0x0, 0x0, &(0x7f0000001340)=[{&(0x7f0000000000)=""/73, 0x49}], 0x100000000000022c}, 0xc42)
write(r0, 0x0, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
__select50(0x40, &(0x7f00000000c0), 0x0, &(0x7f0000000140)={0x8}, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__select50(0x40, &(0x7f0000000000), 0x0, 0x0, 0x0)
shutdown(r0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)


r0 = getgid()
getgroups(0x2, &(0x7f0000000040)=[0x0, r0])
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r1 = socket(0x18, 0x1, 0x0)
setsockopt(r1, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
setsockopt(r1, 0x1000000029, 0x2e, &(0x7f0000000040)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)


syz_usb_connect$printer(0x2, 0x2d, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x60, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x7, 0x1, 0x0, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x40}}}}}]}}]}}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
clock_nanosleep(0x13072868b96912a0, 0x0, &(0x7f0000000100), 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = dup2(r0, r1)
sendto$inet(r2, 0x0, 0x0, 0x0, &(0x7f0000000100)={0x2, 0x2}, 0xc)


mmap(&(0x7f0000ffa000/0x3000)=nil, 0x3000, 0x0, 0x7904f2b1b213f778, 0xffffffffffffff9c, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
__futimes50(r1, &(0x7f0000000000))


mkdirat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0)
__mount50(&(0x7f00000002c0)='puffs\x00', &(0x7f0000000300)='./file0\x00', 0x0, &(0x7f0000000340)="36ccf4a4b3b5f36f160099ddf7068264f06759dd4836272c139f745e36e2803444742d82b536b3a42cf386d8cd708312f27307e2d622177dee64fdefef92dfed99d348bc37d6741796c9178b67d64eb8973e1a4efcb0bb6300646bf4e18d8c8e8a248304bca6a7a49f99e8725340e73b6b006be66898dec6bec63842c9b969306a642538325a8c54a244cbca315ebe9bc1b86b6470999d1b6df17730afd2be07978bd7f048d65bac2f805ed8d0c6b9997b31a7ace8f1663a2c4e654a7b2d3e47dfd929bc611f685b838b25dd81ec9b616531419ce4dc2772360bcf6803de84f2f723da49e509aa5e47c881bae226fea81aeaea3d4188495156214360d10a00d7da1487bf629f7c648e7a0543e3b87bad9225abd9889665382cc60058d4f5731ef4b8e04acf097a63b5bbe6676e5d4d0fb1a916bf9dde8101f433466727c014bff0b6f634c9076a5c34cea431afc074adee6bcbc3acc0f540139f2ed1bc1b945943a9bcf9f2926c4a67647063a76679c061b59d0b94709ba46b9ffd05c3dfb8c864e87c75590bb573fdee7fbc9d934d7a161847331ae0c63ca7922bee9c7eedeb3a24c2ff6834dd5a178a0b2b52bb8e9d403c1f5bc55ae96ae4742c5d9a8ac42c802c17fca5a84763b8664ad1131bf9063eaf681332ce6d78c57b61753e5099a9f6a69e47baf9272cc929492d02446a1c2ead171dfe148291c1bbeab996439c924c9d2f45d1272288a151ce942b9a39f15f3ab718a56463691cfd6ff40c04847b4dbcaeab05c663828f549ade630f0e9d5b4b3c468afc846dc755ea1807d5b502d8a4e29da2bdf2db00de2d3271828a9940a3454fd876132bb722db4c875667aa7ae3107f78a0300c40ac7d25d83e7f85c19d8704301abe80e9dbc3a645279bbd358f3852424715c897f178cb7646f2ab3b6bca52977189a4d7fadd6e4f7c81e74e5fd57e098ef75c06bf1e93c5eb3785295473bfaf2498debf837eedb8de4a18c2c441dc026dff8904cce6809d82c6302dec138095836f0529f1a8148b7d4b636b64e89b6ebf220fbd914692d3fdeb5a61610a0ee05715951e4159f8ffc14b01a8069e5ff35ac0b2112093bb804fdc18c3f34a173738624ed6079c4ac269e119ca89f619370ebfe6f541daf1e1a766e9ad8f5893882d39998d5ebd9dbce80105c036952250345acc35480fcd3f067dd0ecc3e12426eb0f48fc433efe3c725a65adc763a8ee55daa2a6bd65be050304f9d025bc9816ded95dd24104511b4b5c743ea2e8560eaa7d743bd043310393374c664b4fd531c34809fbdd599f8b8a06e26132a0945902cebd727c2c8c2fc32a6f521a22813fffce0bfc3491daaf5a447b2fe54e65511c6e00422fe62653329facce60cfd54c9d116a2801d2a95f554a2a18a9616c8b4af66df4a9ad46a7d9f7627cb397027c34fbcaa2e980e211c1e8267f24607243beb22c1f2a6dabb48e5f8c3ec86263baddc5a4263e7d2e45447c5c44dbf0efc07f9b1bb520d9b065bb7020c8339d29e7a486856256b619417a515c8f4bd6138db05902c2d68cbfe127052fb3d8549436d2c51c882002a511dc98ca6d1c09b34c4973b09acbd869987664b99f64df080c51bef79f16b271c3897e52b57fd1c15e796068b16c1f686029e8b8c07ae87253931745c926e84b07b99610f149e9f192239e49e7290fcb2216244685a781780a6c2d85e04dc8b3e3966da45262999b226f0c0fb7795a69182fdf0618b7ea7e7f0f9c58db06d62c4eeef0d43b06b3121d6265a85ca0cd49f052008696e2fcf9de14eece68d117fcc228c046c15232a19174910289c07c46cd4946117bf2e30f96db9c71af9e17268afea6225080a464d0b966c9710cbe1bb10c74a948585b85733084c50c5e34642db01fdc85b1cf0be809b44417a8e98a162dd5b3bd995ab76aa34578441ccf8c812d28ad36513747eef684d73d1cdb82f5edf45cbeb73440ebae984ab836159ad37a8f2e43967e1716cc38b4387a07931929722ce10e135aee9712eec76e0adc50622fa9ee559c38a6ae5f4d8dd9f42851858983d3d26d0fdda26b9b8d25274c3d7c7bd10ee84f6fcde50ed343460ccdb6e7903eca0ffbaf79fbccde51bcca75b575a31da8010df929fd84d8281f37dac0853b561f8bdbe4d5666b6185b54058d3f63462b8367655fbacd2604a945e47bfed981360e3d472a15f7f8d361a9b2ba012c475f7ca57342c04dbdfa196c46a5829ab74821f55f101e1248fcb44b61e70fb368c53aac5e7cf699d786ebaa4dc3d25129c901db34b3fe4d3562e978bfa4bfe5454d81d0c9d751dcf65a2b1c8831ae0b9677698b5f125b39baa15bb4a21c7724a3937aea7f4bf87c94e6a91e67a56449b5862e0bf65bc9d529a0bef93490e6b850277b821a10a489ad5c5542eec035592e28242e9d20a6bcbe76e8f7f0504e94ff79fdd86ba5951530566658fc0c2a2361b36eef15bc18cdcaeeaf5b8cf57353946bd9cd15a28da647e4b70242b9e9e2d63979073618c41424dff116a3f9456962ca0097d6e0774b0538a9580636ff83dba0939af81c22832b3ae134841dd2d92eb75f97ff6e1ad6f37069084b35a7f19dd2de249d744c85cb7e577c3125948fee77b35317354668a8130988da6707181cd6f4fa3a3fcbe38054a86b66704fe6bd4eec2be44c0d1e65d17abdd59449210cf46bb2da279128e7f03b9d75382c1020ae71bfc85ab8b7d0cb7d4d6fb2d3b1c939193331a25444dd5ac9ff6142b7520200931eba86e70e26943f47aaa76e31e439cd16f8c4450c0def294e0af1a2c03c833ba8ad8c88f4208034a1274f3f9450dafd41e7168f29b0b3c7c00623b52d07ab1b4060828fa411528f189d1f9ca611b54d0f6bff23bc82bec554d758dcd481bb934c9190feb8ed026f439bbc41eda6aff0e195a10730f3d79b2e2bee0ebc94ca358a97d66d1fc75f9bae1ff296e6b5cfa71a9f8f1ae8bc0f3d1053929fe9cc1de3adb72d5c9a185d71d528398949f1e72571907dabfc7103770f1619ca5d33a04e6a49e4fcf1f839aa8cd76549d11cbb8c2345fa43f5e1c77b2d67e6c0908089a9ef059a755f22f15c8242f7d2d3584293d0dfd67d01a49516df346f764c86f3d567a0384c3bdbb90507c34393d11b03ebc7d2b8c42c71001cfefc6a8ca37f660a86fe5a913b66390bb18fb8fd4937a60e8abaf8c633a58c34e6cd9e73d327dbb56c8ddf93240069d22f656a6d7aa335a701ac2a67a944c05bc3a973c7fccd9d3767101088c108e7f1b4250cb20afca5ea306d385dad3fd5fa6fa3ea513157a4785c872ac647876a30873a5601706003a94b46b0a4280e66bb18a383463b429f8cbcbd2d3552cd73118fda4d757623452529c67b31717a197415ce6da2ec13f27efc5217b8a1351a6b7d46159da5fdc73b46767c78282cd96e2eaaba4e68887c537b9160d137764adfc6631fd85e635eae3753522040dd7a7d9da7d787b14b1ba5bb16164f2514ae1de1464b8793238bee1fa77c2bcc9074247b78132a4120073fc013dfc739fdebbdf283358b99a7db28e4ae9f83893b91c670e9abc92de7b955c6e573a0970f12d994a991503d1c410ef71ee224608eca6933622ccc7f8b4644618893388e3d8407f66b17a5bc269ab4203b551c96b022260ca5c7ccee8e384e87ccf63c8e3d747711aed37eb21c44e65f63065486fb4f2a46aaef73191b8b5419ddc654b58a8d8f4838402967e09faef8bbd0d9c81192d7e8d8b711f721b681b6cac5342e73e95d1fd2004b599c04d404ad6948ac88c048876b54c232603341d31cd39c3f87b8777e21319104283f86e30abc598d46a36b3358f309a46c570c6564bfec489e4f5f95c7312179a8608d4638b212b0288bbd340b1098bb3371e63804a46e13f63b00fbd0c1e4370cac3a85ca57d5b901791925d943c61c1341ca3e8f23fea68a88240978bc7f21708b8a35ad6275552c15f21e042fedd730f1b8bf3f5b1df01983d22c77060de783671e272a5cbfb98e4a730450f7cb25223111a28adc9082cfbbd3a5b5a31104948cc309808339d21a625b34a022b6968dd1b1743f062a54f0dd7e9fb1185a264ec29cf044357fa975d65138cc003cf6f51806bf0944a1d2a01346fcf58ab21ead1bf6feff286263f47e48f0fa234192fe04516b3f73b0d006c94772dd7cc53246098e9883b10b9aa632d12b01677ce393b4ec3400301f270058333570de693cf99180c2d09621565212651df37a2d357c176cef5de16347da8409034998b696727eb85cb47ccc9aed6469abb62f2a7cb35017cbf69b9f4c03d13f7c45b192396ac1891d90af64cc20f825dbd0f07dca3d1646904bec878a034a0738598bb311632f01502b5b27ae744dff5a3fa1472deeb33721f6f186bda54c3d0639ec9173005b15c0420e55cc24962ccd517ed0f8bdea53a87d212a48e4f40704d6006fc2e79cc8d0565366a92f5a00e0bc7d87e56c933425837cb3ad2d8ddccc4061bc5381a948321425db5204d61ffcc4b2793056eba3b3f52ba6afd87b8fe7d468e55de7d453c46183faac1f310ff2525095e0cd9a235c6ed8769628258b096b3acf5d8332ada814ea0820b9d8410bfc92c59af38b484b8814c4a53faf22472e4ec3b94a502c1a58a69dec08131b74581bdb16c9c441c53040bc445364e28aed572352ac3a4322884028d5ef6f14987f5f560e907726eddd7cf7f31b709dfd0d027a262aba6c7f525eaca015c08e0248f236f683496705f4997f2de97549d01c23d853ca1948f179ccaa9836be62064a0ff8a4b4ca9994e430b4d054489f9589c0904174481ff10af286566ee7021b24c3e790b4905c082b6453803d017687e6ba550b4c7aba5e72433641f7a0823db41b86ee94a35bc30eb4a8ab1de88bc5afd5c71dcd19e524372efdc11233a0301a8b0f632fa94892ca9be8eb799dc0b11abc76e4a35a9a6781de71fc6c795e47099831b47589f1a6ecb6bca83400d53696aeb162b6ea8a4763fa88c92378a01c7a7fe1b1bd9bbb5b8e0976a4a55f95f8e11d5e1120585cc5863beb5287248ff3f25a3ac717987e53529777393e12a48566e88c5f90113a0783fed34a05709e91673a47e63d97a9ca50b59677341ec3d7ab0f98b9753ba61b82c1245c924310ffff36909ba3d1e53f9d1908be69763ab7c7ec4963763de82a44490cd5d87a147095f78add67bae86bed828c9990662f5ba6e7d54acfc5f077635312c7ce2c5bb6293dd843ce739ca1500900db8e34a6cc2a558e8749aa739dd16480cd314aa5dd3470ce74f820b262555167e2b5193876307a5de6fff8799ef6214508458a8e8d3e92a1e377d7701de4458baf8a4935aea8c5fbc36dbdcc15dcc818c18e540da9f91fdf085e5a9e7f631a9a5134b36074855fddc99611602cdf88b595d5b551e849770e1bab43a4cbae0cc20388022deb2ec1b7be09b530933d6ac831017f2e91283c837fdae86cc21c48a412f937b7ccd02ae3824669a1f260eea1ae9a5f831be37616627db700d66c3f1577b174de0e4722993cd5f6f46d8fc5fef81d7e0eaa071deae9da9d2bb5974c4c185676672173001db587c42a3aba8a32925a2e78684dedde588426dbf510d270fc3d07fc67da0181925d0", 0xf90)


r0 = socket(0x18, 0x3, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
compat_43_ogetpeername(r0, &(0x7f0000000100)=""/232, &(0x7f0000000000)=0xe8)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
bind(r0, &(0x7f0000000380)=ANY=[@ANYBLOB="2d012e2f66696c6530"], 0xa)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r1=>0xffffffffffffffff, <r2=>0xffffffffffffffff})
r3 = dup2(r2, r1)
getgroups(0x7, &(0x7f00000000c0)=[0x0, 0x0, <r4=>0x0, 0xffffffffffffffff, 0x0, 0x0, 0x0])
setegid(r4)
setgroups(0x0, 0x0)
chmod(&(0x7f0000000340)='./file0\x00', 0x107)
setreuid(0xee00, 0x0)
r5 = getuid()
seteuid(r5)
connect$unix(r3, &(0x7f0000000040)=@file={0x1bcfa69870ae01e4, './file0\x00'}, 0xa)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x50001, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
dup2(r1, r0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x4004570c, &(0x7f0000000000))


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f0000000140)='./file1\x00', 0x0)
rename(&(0x7f0000000100)='./file1\x00', &(0x7f00000001c0)='./file0\x00')


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000000340)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000180)=ANY=[@ANYBLOB="28000000ffff000001000000083a000000a5"], 0x28}, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x40086484, 0x0)


mprotect(&(0x7f0000ffc000/0x1000)=nil, 0x1000, 0x0)
mlock(&(0x7f0000ffa000/0x4000)=nil, 0x4000)
mprotect(&(0x7f0000ffb000/0x3000)=nil, 0x3000, 0x1)
mlock(&(0x7f0000ffd000/0x3000)=nil, 0x3000)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
ioctl$FIOSETOWN(r1, 0x8004667c, &(0x7f0000000080))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80045710, &(0x7f0000000080))


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000200)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)='2', 0x1)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
chdir(&(0x7f0000000100)='./file0\x00')
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)


r0 = compat_30_socket(0x1f, 0x20000001, 0x0)
recvfrom(r0, 0x0, 0x0, 0x1, 0x0, 0x0)


mlock(&(0x7f0000ffb000/0x1000)=nil, 0x1000)
__msync13(&(0x7f0000ffb000/0x2000)=nil, 0x0, 0x1)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
connect(r0, &(0x7f0000000000), 0xfffffffffffffe6b)


r0 = socket$inet(0x2, 0x3, 0x0)
connect$inet(r0, &(0x7f00000002c0), 0x10)
getpeername$inet(r0, 0x0, 0x0)


r0 = compat_30_socket(0x22, 0x3, 0x0)
shutdown(r0, 0x2)


munmap(&(0x7f0000ffc000/0x3000)=nil, 0x3000)
r0 = shmget$private(0x0, 0x3000, 0x0, &(0x7f0000826000/0x3000)=nil)
shmctl$IPC_SET(r0, 0x1, &(0x7f0000000080)={{0x0, 0x0, 0xffffffffffffffff, 0x0, 0xffffffffffffffff}, 0x0, 0x0, 0xffffffffffffffff})
shmat(r0, &(0x7f0000ffc000/0x4000)=nil, 0x0)
mlock(&(0x7f0000fec000/0x14000)=nil, 0x14000)
munlock(&(0x7f0000ffd000/0x2000)=nil, 0x2000)
mlock(&(0x7f0000ffd000/0x2000)=nil, 0x2000)


clock_nanosleep(0x40000000, 0x0, &(0x7f0000000000)={0x0, 0x4}, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x8004667c, &(0x7f0000000000))


socketpair(0x2, 0x3, 0xac, &(0x7f00000000c0))


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
posix_spawn(0x0, 0x0, &(0x7f0000000200)={0x0, 0x0, 0x0}, 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000000c0)={<r0=>0xffffffffffffffff})
sendmsg(r0, &(0x7f0000000380)={0x0, 0x0, &(0x7f0000000340)=[{0x0}, {0x0}, {&(0x7f0000000780)="0e", 0x1}], 0x3}, 0x0)


pipe(&(0x7f0000000640)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
fcntl$lock(r0, 0x1, 0x0)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80047470, &(0x7f0000000000))


r0 = socket(0x18, 0x400000002, 0x0)
setsockopt(r0, 0x1000000029, 0x19, &(0x7f0000000000)="1000000029000000170000003048e699", 0x10)


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f00000003c0)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
openat$hdaudio(0xffffffffffffff9c, 0x0, 0x40000, 0x0)
__fhstat50(&(0x7f0000000280)="5cb62d4806fd2a7614af710c47ec8e78643baca7119fdcc72f10d21bb6", 0x1d, &(0x7f0000000300))


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x0)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000000000)=[{&(0x7f00000022c0)=""/4103, 0xfffffffffffffe79}, {&(0x7f00000011c0)=""/146, 0x92}], 0x2, 0x0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000180)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)


syz_usb_connect$cdc_ncm(0x1, 0x7c, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x8, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x6a, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x6, 0x24, 0x6, 0x0, 0x1, '6'}, {0x5}, {0xd, 0x24, 0xf, 0x1, 0x0, 0x3, 0x0, 0x1f}, {0x6, 0x24, 0x1a, 0x7, 0x4}, [@call_mgmt={0x5}, @mbim_extended={0x8, 0x24, 0x1c, 0x232, 0x7}]}, {{0x9, 0x5, 0x81, 0x3, 0x400, 0x0, 0x49}}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{{0x9, 0x5, 0x82, 0x2, 0x0, 0x7, 0x0, 0x1}}, {{0x9, 0x5, 0x3, 0x2, 0x0, 0x0, 0x6}}}}}}}]}}, &(0x7f0000000280)={0x0, 0x0, 0x0, 0x0, 0x4, [{0x4, &(0x7f0000000180)=@lang_id={0x4}}, {0x4, &(0x7f00000001c0)=@lang_id={0x4}}, {0x0, 0x0}, {0x0, 0x0}]})


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
mkdirat(0xffffffffffffff9c, &(0x7f0000000000)='./file0/file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
renameat(0xffffffffffffff9c, &(0x7f00000001c0)='./file0/file0\x00', r0, &(0x7f0000000200)='./file0/file0\x00')


bind$inet(0xffffffffffffffff, 0x0, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000300)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x81, 0x0, 0x7, 0x1, 0x3, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x0, 0x0, 0x0, 0x1}}}}}]}}]}}, 0x0)


pipe(&(0x7f0000000180)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = socket$inet(0x2, 0x3, 0x0)
compat_43_orecv(r2, 0x0, 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f00000001c0)='./file0\x00', 0x0)
renameat(0xffffffffffffff9c, &(0x7f0000000180)='./file0/../file0\x00', 0xffffffffffffffff, &(0x7f0000000100)='./file0\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
dup2(r0, r1)


mknod(&(0x7f0000000b40)='./file0\x00', 0x72165ce6fe0b2f18, 0x0)
mkdir(&(0x7f0000000000)='./file0\x00', 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
compat_40_mount(0x0, 0x0, 0x10000000, 0x0)
__stat50(0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__msync13(&(0x7f00001fe000/0x2000)=nil, 0x0, 0x2)


symlink(&(0x7f0000000040)='./file0\x00', &(0x7f0000000080)='./file1\x00')
chown(&(0x7f0000000100)='./file2\x00', 0xffffffffffffffff, 0xffffffffffffffff)
open(&(0x7f00000000c0)='./file1\x00', 0x0, 0x0)
rename(&(0x7f0000000000)='./file1\x00', &(0x7f0000000180)='./file2\x00')


mknod(&(0x7f0000000080)='./file0\x00', 0x205e, 0x200)
ktrace(&(0x7f0000000040)='./file0\x00', 0x0, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000140)='ptyfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, &(0x7f00000001c0))
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
unmount(&(0x7f00000000c0)='./file0\x00', 0x0)
compat_43_fstat43(r0, &(0x7f0000000040))


mlock(&(0x7f00007fd000/0x800000)=nil, 0x800000)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0xc, r0, &(0x7f0000000080), 0x0)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000200)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000080)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(0xffffffffffffffff, &(0x7f0000000600)={0x0, 0x0, &(0x7f00000000c0)=[{&(0x7f00000001c0)="05b472666ceeaa579d83a65cc69e5322acf108cc4ff880a145d541a0e7a36c02c192a3bd4379e4e2fe983620eaf1ca72", 0x30}], 0x1}, 0x0)
sendmmsg(r0, &(0x7f0000000040)={0x0}, 0xb6, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__posix_rename(0x0, 0x0)


munmap(&(0x7f0000001000/0x2000)=nil, 0x2000)
r0 = shmget(0xffffffffffffffff, 0x2000, 0x0, &(0x7f0000000000/0x2000)=nil)
shmat(r0, &(0x7f0000001000/0x2000)=nil, 0x0)
shmctl$IPC_RMID(r0, 0x0)
shmat(r0, &(0x7f0000002000/0x1000)=nil, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
fpathconf(r0, 0x6)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fcntl$dupfd(r0, 0x0, 0xffffffffffffffff)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x22, 0x3, 0x0)
compat_43_ogetpeername(r1, 0x0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2080002002, 0x40004200000028ac)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x8004667d, &(0x7f00000000c0)=0x1f)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f00000011c0)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
poll(&(0x7f0000000140)=[{r0}], 0x1, 0x0)


readv(0xffffffffffffffff, &(0x7f0000000040)=[{&(0x7f0000001200)=""/4100, 0x1004}], 0x1)
r0 = socket(0x400000000018, 0x3, 0x0)
ioctl$FIOGETBMAP(r0, 0xc0106924, &(0x7f0000000040))


open$dir(&(0x7f0000000180)='./file0\x00', 0x80200, 0x0)
r0 = getpid()
ktrace(&(0x7f0000000000)='./file0\x00', 0x0, 0x153a, r0)
fork()


r0 = compat_30_socket(0x1f, 0x10000005, 0x2)
setsockopt$inet6_MRT6_DEL_MFC(r0, 0x2, 0x69, 0x0, 0x0)


compat_43_orecv(0xffffffffffffffff, 0x0, 0x0, 0x0)


symlink(&(0x7f00000000c0)='.\x00', &(0x7f0000000000)='./file0\x00')
chflags(&(0x7f0000000140)='./file0\x00', 0x60001)
rename(&(0x7f0000000180)='./file0\x00', &(0x7f00000001c0)='./file0\x00')


r0 = __clone(0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)
compat_50_wait4(0xffffffffffffffff, 0x0, 0x0, 0x0)
ptrace(0x7, r0, &(0x7f0000000000), 0x9)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x4004745d, 0x0)


posix_spawn(0xffffffffffffffff, &(0x7f00000002c0)='//ev/wscons\x00', 0x0, 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = fcntl$dupfd(r0, 0x0, r1)
bind(r2, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x0)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIONBIO(r0, 0x4004667f, 0x0)


mkdir(&(0x7f00000011c0)='./file0\x00', 0x0)
__mount50(&(0x7f00000000c0)='ntfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_30___fhstat30(0x0, 0x0)


r0 = getpid()
getpriority(0x1, r0)


_ksem_init(0x0, &(0x7f0000000040)=<r0=>0x50535244)
_ksem_timedwait(r0, &(0x7f0000000000))
_ksem_destroy(r0)


compat_50_setitimer(0x4, 0x0, &(0x7f0000003f00))


r0 = socket(0x800000018, 0x1, 0x0)
r1 = socket(0x800000018, 0x1, 0x0)
bind$unix(r1, &(0x7f0000000080)=@abs={0x1f95d27d48731892, 0x7}, 0x1c)
bind$unix(r0, &(0x7f0000000080)=@abs={0x1f95d27d48731892, 0x7}, 0x1c)


swapctl$SWAP_CTL(0x5, 0x0, 0x0)


r0 = socket(0x18, 0x2, 0x0)
getsockopt$inet_opts(r0, 0x29, 0x11, 0x0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
pathconf(&(0x7f0000000000)='./bus\x00', 0x0)


compat_43_ocreat(&(0x7f00000000c0)='./file0\x00', 0x0)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_50___lstat30(&(0x7f0000000100)='./file0\x00', &(0x7f0000000180))


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xa31a)
open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x0)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_20_statfs(&(0x7f0000000080)='./file0\x00', &(0x7f0000000280))


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104304, &(0x7f0000000100)=0x63d000)


__stat50(&(0x7f00000002c0)='./file0\x00', 0x0)


compat_50_nanosleep(&(0x7f0000000000)={0x0, 0x100000000}, &(0x7f0000000080))


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x10004, 0x0)
ioctl$FIOSETOWN(r0, 0x80047460, &(0x7f0000000080))


_ksem_init(0x0, &(0x7f00000001c0)=<r0=>0x0)
_ksem_destroy(r0)


shmat(0x0, &(0x7f0000ff9000/0x4000)=nil, 0x0)


r0 = compat_30_socket(0x1f, 0x5, 0x0)
getsockopt$inet_opts(r0, 0x4, 0x1, 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_select(0x40, &(0x7f0000000100), 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x6000, 0x0)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_30_getfh(&(0x7f0000000180)='./file0\x00', &(0x7f00000001c0))


socketpair(0xb5c3d1bbdb3d0844, 0x0, 0x0, &(0x7f0000000000))


madvise(&(0x7f0000000000/0x4000)=nil, 0x4000, 0x2)
mincore(&(0x7f0000000000/0x400000)=nil, 0x400000, &(0x7f00000000c0)=""/176)
pipe2(&(0x7f0000001600), 0x0)


setgroups(0xffffffffffffffe8, 0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
connect$inet(r0, &(0x7f0000000140)={0x2, 0x0}, 0x10)
dup3(r1, r0, 0x0)


socket(0x22, 0x3, 0x0)
r0 = socket(0x22, 0x3, 0x0)
compat_43_osend(r0, &(0x7f00000001c0)="a88a9d04", 0x4, 0x0)
r1 = socket(0x22, 0x3, 0x0)
compat_43_osend(r1, &(0x7f00000001c0)="a88a9d04", 0x4, 0x0)
compat_43_orecvmsg(r1, &(0x7f00000005c0)={&(0x7f0000000000), 0xe, &(0x7f0000000500)={&(0x7f00000000c0)=""/198, 0xc6}, 0x10, &(0x7f0000000540)=""/99, 0x63}, 0x4380)


mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt(r0, 0x0, 0x5, &(0x7f0000000000)="9d0e6a00", 0x4)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80045719, &(0x7f0000000000))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
pipe(&(0x7f00000001c0)={<r1=>0xffffffffffffffff})
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fcntl$getflags(r1, 0x3)


paccept(0xffffffffffffffff, 0x0, 0x0, 0x40000000)


r0 = socket(0x1f, 0x50000003, 0x0)
shutdown(r0, 0x1)


r0 = compat_30_socket(0x1f, 0x5, 0x0)
getsockopt$inet_opts(r0, 0x4, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbd63)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
readv(r0, &(0x7f0000000100)=[{0x0}], 0x1)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r3 = _lwp_self()
_lwp_continue(r3)


swapctl$SWAP_NSWAP(0xf)


linkat(0xffffffffffffff9c, 0x0, 0xffffffffffffff9c, 0x0, 0x0)


__lutimes50(0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000240)='./bus\x00', 0x0, 0x0)
r1 = socket(0x18, 0x3, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
dup2(r1, r0)


utimensat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0, 0x0)
swapctl$SWAP_ON(0x1, 0x0, 0x81)


compat_43_ocreat(&(0x7f0000000000)='./file0\x00', 0x0)
ktrace(&(0x7f0000000200)='./file0\x00', 0x4, 0xf, 0x0)
compat_43_stat43(&(0x7f00000000c0)='./file0\x00', &(0x7f0000000100))


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x0)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0xf, r1)
access(&(0x7f0000000080)='./bus\x00', 0x0)


msgctl$IPC_SET(0x0, 0x1, &(0x7f00000000c0)={{0x0, 0xffffffffffffffff, 0x0, 0x0, 0x0, 0xf8}})
r0 = socket(0x18, 0x2, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x3b03)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x4004510a, &(0x7f0000000400))


munmap(&(0x7f0000001000/0x2000)=nil, 0x2000)
r0 = shmget(0xffffffffffffffff, 0x2000, 0x0, &(0x7f0000000000/0x2000)=nil)
shmat(r0, &(0x7f0000001000/0x2000)=nil, 0x0)
mlock(&(0x7f0000002000/0x1000)=nil, 0x1000)
madvise(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x4)


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x29, 0x30, 0x0, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt(r0, 0x0, 0x12, &(0x7f0000000000)="9d0e6a00", 0x4)


mkdir(&(0x7f0000000000)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', 0x0)
compat_40_mount(&(0x7f0000000080)='msdos\x00', &(0x7f00000001c0)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', 0x0, &(0x7f0000000240)="27bd95d881aefa0655ed435afcc44f683fab4835b6d37b35ba11e8de4566210b94de4879153759a8ed1a3a30630b3649350b002980653ea2fd8a357fda7c7f5297a048ab2db26d9f8ecebf6b8cc517463aa8d452c78ccaf23e0356a410a4e6ea24b598e183744f3e07444e32dd2b387134aead7ee5cd2dff1f2a5013c79aea2874b39b6fa2bd1c4391f7a89eed8c02fcfd8236f136")


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x8004747e, &(0x7f0000000140))


compat_43_ocreat(&(0x7f0000000040)='./file0\x00', 0x0)
compat_30___lstat13(&(0x7f0000000000)='./file0\x00', 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000000400)='./file0\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000001440)=[{&(0x7f0000000080)=""/103, 0x67}], 0x1, 0x0)


r0 = socket(0x18, 0x1, 0x0)
r1 = socket(0x18, 0x2, 0x0)
getsockname$unix(r1, &(0x7f00000000c0)=ANY=[], &(0x7f0000000040)=0xffffffffffffff19)
socket(0x0, 0x0, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mprotect(&(0x7f00003a2000/0x1000)=nil, 0x1000, 0x0)


compat_43_osetrlimit(0x7, &(0x7f0000000040)={0x0, 0xe0d6})


mknod(&(0x7f0000000080)='./file0\x00', 0x205e, 0x200)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f00000bc000/0x2000)=nil, 0xbb72000, 0x0, 0x10, r0, 0x0, 0x0)


mmap(&(0x7f0000ffb000/0x4000)=nil, 0xfffffffffffff000, 0x0, 0x10, 0xffffffffffffffff, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000001200)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
sendmsg$unix(r0, &(0x7f0000000440)={0x0, 0x0, 0x0}, 0x0)


minherit(&(0x7f0000c35000/0x4000)=nil, 0x4000, 0x0)
madvise(&(0x7f0000ffe000/0x1000)=nil, 0x1000, 0x0)
madvise(&(0x7f0000a04000/0x2000)=nil, 0x2000, 0x0)


r0 = socket$unix(0x1, 0x1, 0x0)
bind$unix(r0, &(0x7f0000000a40)=@abs={0x8, 0x0, 0x0}, 0x8)


setreuid(0xee01, 0xee01)
r0 = shmget$private(0x0, 0x3000, 0x0, &(0x7f0000ffb000/0x3000)=nil)
shmctl$SHM_LOCK(r0, 0x3)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
r1 = socket(0x22, 0x3, 0x0)
compat_43_orecvmsg(r1, &(0x7f0000000240)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000000400)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0xc0046303, &(0x7f0000000140)={0xfcffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @pad="1b944b4fd15a0981fa3a9791cde8505dcbc0316f82b129691e9a0e26c7821ae8"})


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x40185727, &(0x7f00000000c0))


pipe(&(0x7f0000000100)={<r0=>0xffffffffffffffff})
fcntl$setown(r0, 0x6, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_ogetrlimit(0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
r1 = compat_30_socket(0x2, 0x3, 0x0)
getsockopt(r1, 0x0, 0x1, &(0x7f0000000040)=""/45, &(0x7f0000000080)=0x2d)


r0 = open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x0, 0x2810, r0, 0x0, 0x0)
mmap(&(0x7f0000000000/0x2000)=nil, 0x2000, 0x0, 0x10, r0, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='cd9660\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)


r0 = socket(0x2, 0x3, 0x0)
bind$inet6(r0, &(0x7f0000000180)={0x18, 0x0}, 0xfffffffffffffeb7)


syz_usb_connect$uac1(0x0, 0x71, &(0x7f0000000540)={{0x12, 0x1, 0x340, 0x0, 0x0, 0x0, 0x20, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, &(0x7f0000000940)={0x0, 0x0, 0x0, 0x0})


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000000400)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSKBDIO_SETSCROLL(r0, 0x802c6300, &(0x7f0000000080))


r0 = socket(0x18, 0x1, 0x0)
listen(r0, 0x0)
accept(r0, 0x0, 0x0)
shutdown(r0, 0x2)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x0)
pathconf(&(0x7f0000000000)='./file0\x00', 0x1)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000100)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f00000006c0)={0x0, 0x0, &(0x7f0000000600)=[{&(0x7f0000000180)="64a96ee0a7dd416e7d12bacc4dae6cbf8d3dbbef45669b8e1279a59afce6", 0x1e}, {&(0x7f00000001c0)="8166eac4fde70bfa2adeddf71b8b91e56e31e7e9ec8ad10a6934fc4fa14f42f3c92c3a516503f14db3708c71b8f0ddd20c7352b01ccb5fdae11633409510a0cdc5a64fe3809946d6b95d4e2e40edb53865d03f21717a6666577354b5b24206c46aa93e76b7cb9fd2cb92d049498daf20bc0c55a43055dea13433b49b55663ace6f88f9bd3fd114d4fc30eb58b0729b5c36ede7d8e827f34d587af375f833f7f562", 0xa1}, {&(0x7f0000000280)="8f61c7ea6b9adf67ca27da4d2e0fe5dda90c8637c2d6be58c9d7dbffe7295b84f61c6d5ab5f469530d61905de962de3ac961", 0x32}, {&(0x7f00000002c0)="980165e58901f9145764b11d2cc4a357f282b57f8f14fc0f3b4f6ff8a68cc42a108ecf0e73351fbce0357bf03c083f13de124e5cce6f5121fd277a049ebf777b1c6a3963ec669e35f1bf2faa90623eeff3f79dc4b0ea7f16cd3db51292aa1f300a1de76bdca298091914845502f2150d1433f40db8f5cfec92abb565d99ed68a22c719157055adb975ea248e433541eb085e7569293257c47b4fafdd1e2e3553dc962d3926d2c9e4a589bbc2939947bc7f3698ae03f62cf96e4cbe940261cb0f9d", 0xc1}, {&(0x7f00000003c0)="dda9308cb0c04ab4262f6bac73cb2b15d34aa5506b60758adfe66c5534fb64063318c19e855b2cfaee24e0723598655749dab62a7d414081f6c9a0777ca1ffed1d63bb27a9927a6fcc8bdf3b1de0260c2fe29bf38d62b71c66", 0x59}, {&(0x7f0000000440)="a11e9e668036ccecfa18b35bb4b2bc0c6f30a35fbe9decafa3d4111213f40aabc632c5778acbc23c08cc9ede833154c3b32b63b95d74dd12621ffcf141dbd23b5793755fe314", 0x46}, {&(0x7f0000001480)="751af6800fbcd9fc58cc542b6ae054f95e7a20e3156bbaa8111adf7c597735d756a89d71d3c8b5681ae2a5a655c3dfa05fd7af5f4d9f76e857ffc4ac1dfb83fecdb5b3cf6be1a6e04d4f1d16f8de3d3755a687216d7a5df0c0cca94fd1bbd34952cc57dbd81a295ecbd7930e150e00cbd132c41a8d86de01297d6e1b97b44f8ee2174029b1b2fe267e45f2b653d5cdf32a8c82d990591426d0edcb91184a3e233b811296cac5d062d5527189799aa80f817ba2621ae4592e93d85a042545b667bff118e351ecdf2609e9c641cd64dbd1d8c1bfe1ec7592cbc8f97b3bbd85150b27ca2d7f6eaafe141a402707e7b5241b4296ff3263f3edc41911ecba56893576143d29b5e432c5bd0c120573df24d03d666ec183453acd68cc663f894a26c18a744eb628232849cdf66de84fcd7b745d515b465de84fe420a17dcf0c458ab59ed00a68119e3d926d851cafaf2b33c9c1b4df485eee9cecc4c7b82cc0af02f751bbaba879367b8f91e4cd859e1ebd4e7efeb8a5f5b1633cf077e047e8bccf84ebf05f75f95c788f3c7692a7a0735c2f9f3ebf366f5f058e9453b76bc480387a56afed201c204cd94abf7ff1c2be12b88ae1d0837362d96267082f2f1ab9054b695935a1d1cdc658245aa8aca4a975b116dd4ad0ff50a721db8b101cde026cea37fed3b0e4c15eab2a301f4256f1e802574ba9c0f61fe527a013075d8bc30f5eb795b2638b119f11cdea2641cf564916972d30a3aafa2bd4e45c73933b40cb1fe8f0c122743f51a9fd0b2b24dc76faca959c8761ab33ceef7c936ef7b4120a8341fc2fdf78b1ac0210835bda43e957edd49bc14b43e4d99425e3e5ffc44419293fdbaa60571ac79944901be496a004fb55cbdc8fbde4f124a0abc44adf155495a5269c2979bd66cb56d31c879dac9b6fe4c39fbfc299d260e63c3a3ec49bf65bfc55f28ea25006e6dce371ce64a2bd6fd0fc360e8d967d3c991768cec27e915b0b5739ab5c8e8b372e3f9b3d4429bad11c9b1e502e58acc881a2c8c44459f904932d1e7482b14ecd9f207dc670b3cf6015c380ac403dd5847b18fcb15ba0300b3dd95e408bbcf048fcf28590d6266c1edf3237157d706bcea5a6452a67150cb1d6884614f597da51ddcd22b461a01b9df7aab0a20ee9d21d71f78666e2618c06f15fc907cf5532e209bcba75df6a3725c5d7fb62436c9e701c22701530cd1ece0112c5772f76a997a138e859dfe814568fad9c2ea592e83065abcfa52fbd618f70a38204b2e62ebe1b0e9414175a8ab2e6016df911c8a2866cd4a228cd28613a40ee2bb05e002c3fe603ebea6af865b70fdc9d507c3e3dcbad950f317d669a0f2b7ba5b04509260e21679c06ec80cc0f5ac3fc2e5cbfcb7caf0f8783fa13aea5c17c1017aca7e7e93e144a6937a088984252648dd4883fe320578c4246a82d260f02e39ccc8c172043fa0abe6f6d59bbed0e5dd1a3651f58502b21a04adb71d57715d309e344c890294e52e9d4dcc6228f215945649cb8b6dcedea00b7ce8b529bca5a95646c94fd4e9139dc58ca64e3529b20c027b47cd3f611194538eec8eebb783a8774e27e6c5b4263406c7ae4b5fb31b66f725b4a8ef314cc2399ab07fc4750cf106df73be53b90d0ac1b2987f72bf7052e3880d5aa518906492622ddcbae16e0e6abbdcbb30ac7afe612251536d5023a6f9538955122cc02af2e8321a493b93df56a44d8825bc979a6d18261efdb070b6295362dc7bae5b17c1c41157136f6ec76ecc38a224fcc97243096839269613cd3b17f08ce2678530f042efb85ee34e006dedb6e1bc79125f0e50418fd234611713349c9b038abc8cd3c01c9641226b9b9602e9d4d98eb6966ca05ad8d7195650261900c4a32f2129642cde15a50ad95dbf6b36957d65ab45bb4872ea0161c779ad33af110164b42957d88d6a2bdb0dbac7d17fd79c57ca2ef6ba40da820f38db8f7dfb12312a06ccb55782cba3e4246ab064dac285c15c92ec4b548ffa6f7f3defa3f15adddc2f7b30ea7307957f5ffd4836a63df670cffe5b35d47a3a2a97923534a2ee7d57adde0b331f316489a7a6ae53ecf368a0b25e29842d9b15130e7e28753743d2d2b6fa8643311d1935d0cbe48c3f549d54ff9c9d6a6f3fc73", 0x5f0}, {0x0}, {0x0}], 0x9}, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
listen(r0, 0x0)
bind$inet(r0, &(0x7f0000000300)={0x2, 0x2}, 0xc)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
dup2(r1, r0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x1000)
open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)


mlock(&(0x7f0000ffc000/0x2000)=nil, 0x2000)
munmap(&(0x7f0000ffd000/0x2000)=nil, 0x2000)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x40047481, &(0x7f0000000140))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80045710, &(0x7f0000000000)=0x46)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
profil(0x0, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSMOUSEIO_SETREPEAT(r0, 0x8010572b, &(0x7f0000000000))


mkdir(&(0x7f00000011c0)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='null\x00', &(0x7f0000000180)='./file0\x00', 0x0, &(0x7f00000000c0), 0x0)


mknodat(0xffffffffffffffff, &(0x7f0000000100)='./file0\x00', 0x1000, 0x0)


ioctl$WSMUXIO_REMOVE_DEVICE(0xffffffffffffffff, 0x80085762, &(0x7f0000000040)={0x1})
r0 = socket(0x18, 0x3, 0x3a)
setsockopt(r0, 0x29, 0x6c, &(0x7f0000000040), 0x4)
r1 = socket(0x18, 0x3, 0x3a)
setsockopt(r1, 0x29, 0x6c, &(0x7f0000000040), 0x4)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendto$unix(r0, 0x0, 0x0, 0x0, &(0x7f0000000100)=@file={0x0, './file0\x00'}, 0xa)


__nanosleep50(0xffffffffffffffff, 0x0)


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
__posix_chown(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x1, 0x0)
ioctl$WSMOUSEIO_SETREPEAT(r0, 0x80047476, &(0x7f0000000000))


mkdir(&(0x7f0000000240)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='nfs\x00', &(0x7f0000000080)='./file0/../file0\x00', 0x0, 0xffffffffffffffff, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x2f2a)
open(&(0x7f0000000400)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
preadv(r0, 0xfffffffffffffffe, 0x1, 0x0)


compat_50_____semctl13$IPC_STAT(0x0, 0x0, 0x2, &(0x7f0000000380)=@array=0x0)


r0 = compat_30_socket(0x1f, 0x10000003, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
recvmmsg(r0, &(0x7f0000000a00)={0x0}, 0x10, 0x0, 0x0)


open(&(0x7f0000000480)='./file0\x00', 0x80000000000206, 0xfffffffffffffffd)
writev(0xffffffffffffffff, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r0 = open$dir(&(0x7f0000001240)='./file0\x00', 0x40000400000002c2, 0x0)
fcntl$lock(0xffffffffffffffff, 0x0, &(0x7f0000000040)={0x0, 0x0, 0x5})
r1 = dup2(r0, r0)
writev(r1, &(0x7f0000000100), 0x1000000000000161)
writev(r1, &(0x7f0000000100)=[{&(0x7f0000000080)="cd44acca6cec5c13af65fffe635f150da3d2e8030c7688b0daea2199faf9a529c7bd90f23a666acf66"}, {&(0x7f0000000180)="f117e666ec67e3abab9bbf19e9fef578262af2eaa6cfa19b0d44aebcecb98806d15ebf83c026e1d2b982984ae8bfc054bc93b91dfcdf2089066e31156859dcc7ecb881c8fcd29b871536d1f76c49619e0d9c6a4db8acc3f60c762e488f7f558f6eccb4194e48c6cfb3b7d6191b75146a663e7146cc74b6cbc2fecd093a58823eb6c94ec62b39cf9bc6c200"}], 0x60)
socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000140)={<r2=>0xffffffffffffffff})
getsockname$unix(r2, &(0x7f00000017c0)=@file={0x0, ""/4092}, &(0x7f0000000040)=0xffe)
close(r2)
open(&(0x7f0000000000)='./file0\x00', 0x42, 0x1c0)
r3 = open$dir(&(0x7f0000001240)='./file0\x00', 0x0, 0x0)
r4 = dup2(r3, r3)
pread(r4, &(0x7f00000000c0)="bd", 0xffffff78, 0xa83)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSMOUSEIO_SCALIBCOORDS(r0, 0x81145724, &(0x7f0000001400)={0x0, 0x0, 0x0, 0x0, 0x10})


____semctl50$GETALL(0xffffffffffffffff, 0x0, 0x6, &(0x7f0000000000))


symlinkat(0xfffffffffffffffe, 0xffffffffffffffff, 0x0)


r0 = socket(0x400000000018, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fcntl$getown(r0, 0x5)


compat_43_osetrlimit(0x9, &(0x7f0000000000))
socket$inet6(0x18, 0x60000003, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_osendmsg(0xffffffffffffffff, &(0x7f0000000040), 0x0)


open(&(0x7f0000000080)='.\x00', 0xfffff81c, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000480)={<r0=>0xffffffffffffffff})
ioctl$FIONREAD(r0, 0x4004667f, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0, 0xff01}], 0x1, 0x0)
dup3(r1, r0, 0x63d8dc2879064c92)


r0 = socket(0x400000000018, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000100)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOGETBMAP(r0, 0xc0106978, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
recvmmsg(0xffffffffffffffff, 0x0, 0x0, 0x0, &(0x7f00000039c0))


r0 = socket$unix(0x1, 0x5, 0x0)
ioctl$FIOASYNC(r0, 0x8004667d, &(0x7f0000000040))


socketpair(0x22, 0x3, 0x2, 0x0)


mknod(&(0x7f0000000140)='./file0\x00', 0x8000, 0xffffffffffffffff)
syz_usb_connect$printer(0x0, 0x36, &(0x7f0000000680)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x30, 0x81, [{{0x9, 0x4, 0x0, 0x40, 0x0, 0x7, 0x1, 0x0, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x0, 0x0, 0x0, 0x7f}}, [{{0x9, 0x5, 0x82, 0x2, 0x400, 0x5}}]}}}]}}]}}, &(0x7f0000000880)={0x0, 0x0, 0x5, &(0x7f0000000700)={0x5, 0xf, 0x5}, 0x1, [{0x4, &(0x7f0000000840)=@lang_id={0x4, 0x3, 0x861}}]})
faccessat(0xffffffffffffffff, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
paccept(r0, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80105703, &(0x7f0000000000)=0x5)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket(0x18, 0x1, 0x0)
connect$unix(r1, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)


r0 = semget$private(0x0, 0x5, 0x0)
semctl$GETNCNT(r0, 0x4, 0xe, &(0x7f0000000000)=""/239)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r0, 0x400c570a, &(0x7f0000000080))


modctl$MODCTL_LOAD(0x4, &(0x7f0000000080)={&(0x7f0000000180), 0x3, 0x0})


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = _lwp_self()
_lwp_kill(r2, 0x0)


openat$tprof(0xffffffffffffff9c, &(0x7f0000000040), 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
fork()
posix_spawn(0x0, &(0x7f0000000200)='[,$]]\x00', 0x0, 0x0, &(0x7f0000000580), 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x0)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_30___stat13(&(0x7f0000000080)='./file0\x00', &(0x7f0000000100))


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
compat_50_wait4(0x0, 0x0, 0x257f9514ba17b560, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000280)='./file0\x00', 0x0)
r0 = open(&(0x7f00000002c0)='./file0\x00', 0x0, 0x0)
symlinkat(&(0x7f0000000100)='./file0\x00', r0, &(0x7f0000000140)='./file0\x00')
utimensat(r0, &(0x7f0000000000)='./file0\x00', 0x0, 0x0)
unlinkat(r0, &(0x7f0000000040)='./file0\x00', 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
pipe(&(0x7f0000001200)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getsockopt(r0, 0x29, 0x3e, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mlock(&(0x7f0000ffb000/0x4000)=nil, 0x4000)
munmap(&(0x7f0000ffc000/0x1000)=nil, 0x1000)
mprotect(&(0x7f0000ffc000/0x3000)=nil, 0x3000, 0x4)


minherit(&(0x7f0000ffd000/0x2000)=nil, 0x2000, 0x2)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fork()


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x6000, 0x1ff)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pread(r0, &(0x7f0000000100)='O', 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000180)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r1 = open(&(0x7f0000001c40)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r1, 0x84086307, &(0x7f0000000140))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000019c0)={<r0=>0xffffffffffffffff})
__fstat50(r0, &(0x7f0000001480))


compat_50_mknod(&(0x7f0000000400)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000010c0)={<r0=>0xffffffffffffffff})
writev(r0, &(0x7f0000001200), 0x100000d5)


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
truncate(&(0x7f0000000080)='./file0\x00', 0x0, 0x1000000000000)


__setitimer50(0x0, &(0x7f0000000000)={{}, {0x81}}, 0x0)
__setitimer50(0x2, &(0x7f0000000180)={{}, {0x0, 0x9}}, &(0x7f00000001c0))


r0 = socket(0x2, 0x3, 0x0)
bind(r0, &(0x7f00000011c0)=@len=0x78, 0xe)


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
unlink(&(0x7f0000001580)='./file0/file0\x00')
unlink(&(0x7f0000000080)='./file0\x00')


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000100)={<r0=>0xffffffffffffffff})
sendmsg(r0, &(0x7f0000003400)={0x0, 0x0, 0x0, 0x0, &(0x7f00000022c0)=[{0x28, 0x0, 0x0, "42b74629af641e85319563865d4b86f10a"}, {0xc8, 0x0, 0x0, "6103207f14af5c9ab32a4c789bbe7fccd22e9f239d2b1dbbe4bb49d765f9251628cf8bf5f0accbcd882396a8c7a7513205e3158bfcbf2c4a8adaefc4925bb0456e7123f5e4efbd7d0273e06e6631b50547926ed4e8a085af783291b70135cea93f8d288e78ebdd4295956e38634d35aa414355b37a0385d85890f9d15ccc6121e11113d5ef3a9358018aac830c6f16a94c41808bf30487a818fa815ee76970a10dd2f1b6072fd82e15c14a7ef96ef70bcc"}, {0xf08, 0x0, 0x0, "cc33a624e7c4f0eda4834d4e982e32d830f53422ac128e334ed054c1324608ff6987baf1f67622209ff4f13151a2ee2e30ab220d142925715213f5b8dd8ddef2e5c60f2c08424dbf245980ec9147e228c11cbb589b5257a97990725bf746c7e130f63798aee11a58a5f15ae014eb847bfdd62ce294dc897a29fc7714828409e926f2f7ed34bb707543f794ff3304dded44db43e0ae5bf27993b4522afebf07ac6ef9f0066c621dfe340ac6169a6baf9e25f2051a77d8feda5039734b5d65ca6cf7da2a7713e51c8686610f6093426df83841326c7e00494eabb7651952791a8488dce5c0caf954ae5498d6ae42bac673c51acb34af54bd6e205afd1790ddefacabda5d370d78ff7dab54aa73e4f56825420929b5edd79d5c2985210386e41b9c14009ecaecfe6b2f1cc47b1c284010feca33f3dca6972a4dec8b623e20b9b99a5477da8c88c3338e25ea7097d2611ce8ce5ca5d98182478e821a897c1aa40e5b92e68f77fe0ea001d0539600634b3f96fe4638efdfd7917e772a22f0439b48d002eee077aa375c10110133a28fddafadbd71b258f1553b84259f91482a7f7d78abc3a8160f4d7e90a5b07036e647966ca5931018d6f7570eaaffd84f393014764577240bfd8c64fa9a7d84fd254d295dc7a71f98e937da007544df12d374b0d32aee467afeecd431ef3167de4be3311bba5786244ef388cd9931db6b44b1f229c0fbc50c7804083871a53356a78aae4b780cfb3e48204defb73eacabfe9178749459b376f76e639161b12d39fe0743030684a1ab9dddb5725ef5f391dd73bf0a6cc23ebf59e41487ffe5cf5a5d081d260f2a5bcf0b219f82835df8f3364071e81b74808d9e726fec12cf6821aa4c7d3f51c6083c898c41efd12414c9466b4ba81eb9382f6502f83073685b749332c9797b24fcf5a0cfa9af2306b9112f6ed09473cd9eba9a3e1460dda58c238f0da83e95671c436866f6f820180b687dae29c763dd2cbffaf050fcff05e0ec3abeac114c725ab8e887078df6119206822f374f4fbf95545459ac1ef80a431217b061f5af6ecff5d6f16667e6415c659d52909f15aa5df2876bf1bcd75f4875d7f34f181bc6163de476df5871e4f190bb72f6a626a3840f2365a7d451c79e3fed6b8ab1b6c7c5317640bc0f49c6ca80b3188ccbe0ec456ca5f28c0a0b35a4364ba4e6852437f7ed14175235e1fdfc60374cb4c8016c6168470b9e4ad62dd1dce99e65b3f1442a6e653ccf997233d2ef06c6e8913fef91f43af4bb5eb44d403d09576916db1296dfc68f4fe4bbd5a77c2d4dcd1ef055b084b8d35ecf9a949e8c734b579ef119a7203de2cd967a6622a6c79e683b2524de1a520868c74cb6186eb3bc45280d9ccc0c968b6fc72722f62488838eae002de904f04497198f2ab321e431ddf5c1011786aa90ddd2be8a536e9294280db85784de4255810c18567843ad34bf353225c334eade799cbcafae82a5eb6a949040d114ca497079d5f5e117dee4cfbe4e463c6d46fe651d1830da72b39e3eb5a0fd41c41e4b72ab40b0a51564e4334722661193602ebf253ce6a0e8e73ecea680ea446ff11c4fc3da1453a4eae6982dd5fcabaa8895fa568982d9cf6e6213fa0ca5ba21dcc8cd304a9f6dd0f749caef15e19b2fd2f0671a53a7df1eb0c962d6d81ad1efc3db174fc98b2f05844a73dd484b25bca65755e12d9288d4cde433721372a64592adcedf65e388cdf244ddb9012d6068a693f83aeb60805fdaff6463e245cb310e8fbbe22f8396895652fc09d3fc2389d499f4bb1f493ce390b4c650c25fbcb2c5bef1ba774aade27c3ad3be74d49bec45394da22423bef5f1d7fb22135fe4be243adb79a29d9ce94051fdb606eaf8b99bd1d6678b67fc634715341e7de39a90c7f31d5e275c43f1221593ed236ff218b684b359ed9d164368f48349f2805998ff51ea9785845b77dc3657e96c972728e5d175c0a7cca50e16e08dbed1590bcf0f30694b1e28c2c73af914c9c1d2d7316299d816f7152020e88314aadb5b4e40215efa734ff7dd2bc3974e870ecf08e5061610f84cc6eb67523912744aaab36cc745a22c0ffe6e7173339725b73de00b816247401338d28e93dbd61da993219b5d61f2ac872c9e89b744154075782492c15e7377b8d7ffc4d62370ed5f8480db5d33698e4fbed6af2ee853b7696980fa9492556c219af07581e575f8e253eeedf5ae0099593d9272b236ff09bfdf6b7b499401029c98c1256ccf00fee4f0efe9fe33ea66923f89e194240af620946a4521a4c5dbe87acad5864b224c45a74e346d3890cbb065e0c477282ab66eb5b4437779eea7473407016346cbe7293eabf171564a4ab689e483f3e69edb040dc752ce754d817cf89a766679aeb4c60396f57cd54518c79d2b85041cca3710ecefae6fe1833659f0dfc469913dcf71523ef90156daeb0da06ad54eea66e871f5543c01c19b2baee8ceddd3862da4cb715ebdd44f9391e08009c9d75dcaecff44c2f48bc47b4553674b4f521a1aee5b4768e86dd8ad3d03e8dd830e4a00b93984402733b3149620d892a394625a662a10af12a76371bb50a967e2ba5e9e772543bc9581ede8ecfad99d6d71179ae24f8a11790b057731853322177769d9d3fa077ac483f2ee0ab0ce221a955b75cd55dd462f04125f3d25965d47105a0a9720e094946ebadab5ef6f5bd19b734a3d155373d97d52f39144e41d485361fdd2d222f746c248bed5b2b3ec2c9928051bbf9f9e8e45ebd374debfeabd8bfa67d224b81b4d7a302e32b6dee0937a8865bdb76984ee00597809bbb567bcfbc07aaa66987b24ca771682b8b3af81b6dba937427dc5636c6b0db95471b008d6e850ee77a2a3c156ac668aa701a0f14dec08a6474d85e3528637cb3d30920ee6a0a128bd632d6fff7dd1fa04ef502fbc21bf4a63edf1076cd6df24958f4906d62c8811a30a1c3075efa385f42238fbbe907c480b47df19f673997018986354b18757105a731cd2d87f7c2f0984f7f83be85639587be3d7f7ec813ec8867e5102e812df36d5d7dbc1597f81d7160bde2c8652eceb0fce8a426e5434f282e20d089b7d0a1d81cfbd6d7f548f2906098917535fe1c3c10cf67aadaf50153c948150a31e25b029e4bf330ac7bcefa9957009ae46ac7a72ed88850afbdf9896a258bba53ed4736badfb67ee01c03dc2b7cb02c7a8a4e81189ec7a15c1cd0b9c36a26754b7ae440acf93d8baec8ab2eee81682859c17f10d9720969bc52d28d287a50e1823199236fbf4c4d6bd91bb1cf5fc9f2470127afc5d2fc5d969df9a6fc49f1923093040b729f2c907018d0d636d1d8d03d0538302c7f17f08808405fdec0f59897c7b8ccd3b6bde72b9edd763e8b56ecb75b357c35c8aab85bd8018e60806433ca18fbd98a2168a0c83a1165b2a13b35d4f3defe5e87f3e3315cf2a3ff9c63dcbfe7830a7a44f782a5a4e0d8fef5424a27dcc942efbdbc2e4b0d9c34b1d81f2f12759e93d868fe969b75f1f37572adcc5707303f759e8d29348ed524cf0a6cb1345668d0e065b435fc0eb4ef3683dcc06e35428e6442a4e5a7f46a45a0a0a2cff997ac7e0584c3538eb7481f7c8d0cb6d6f774de399157fac2fcb4a9cac7310e957c0cf0c5a1250c4ce7b0b0966012d74d5db831724fb4e3ddf784723d9f1bd1aced6aaf52028a6dc5b21924af07b7bc35b4cae051b965ed7c155ffbd6c4405ddf147f526c31e9de5932786a6ef3afc612fc4c911b18ef1163ef61e6a71ce5610f4331684325b087d80c16f6915c4270287057cafa00eec693018316062003f3c6ef2470c165e6ee1b809f2623ab756ba1309c00fc09bc3129b72e770e4a0db7cb2d83fb101d35a3f14b95ebb445969492a7fd4cc147d7c6aa880e69e885913efa5a749dad21a4251141f035e2b1c2d1d0030765b9948ca9794b26d300170d8491000d5f8a4b2aaa2eaa94caee3225e8be15d3cd3d7d2f9e58900b115781973e99220a132a2cc2046681e74df0db5225695d133bd95e01d4ca46612ad6121a76b48484630138cea4ffe1d3bfeeedbb865bfb7e734e29b3d68a967bec9ed58cf32719f567e99f4514b617444e7d3eec9a2f4e3558d4659b96313bbfa0d4bfd53207ac2fac4057665aea7f913136610f2c7226f3073b3f734715c1b3034082fe83133c7bda6a16916eef73d7c9e7def04108ff1f50ffd9666ef66be06df729340d3359fd7fc7b9f0d7cff72d6cf26536c512f099b9c3cad5917bf2da066c187960bd8e81877e39e89ff0cbedc10b13cb7622c948d325a104e57203f5efc2469e72b9631d81aab117324e5a1415c8425125515a52fd0d366965a7caef56a2fd61385e5ca9ac263c03bb145a837084054dfe45d9617fa1b75606833bb6b6486d8c967bd23e03919d83fdd37f25a7aeb2d288ac0702f8b5b61e76a95a27745081bdb52df8698408018d429dc2a5f8699b8c53faabf83ef9319a18016ac10f05be4d72162c4f0b09f971e87df1fc4600f797128fbfdda1fc053b44427ef1cec04232ac19dcd65509b59d39093ef6521af83f3437de1cdd83d0041d844f6fbe8e283cdf6578518b82d9b7cfa17ec64f5196ff57097feb1888a6cb54c721ddc489d7d636184468ecdf7442ad9d92f96b62be8891d105597b886d008b9183d1f3e11f078255574a0a490c8f2066113a299b47b23fb35574199b56e230e97cc1226fd60a2c42d555730a32da6ab14db6939c4981f3d05ba97ae3fdf2be3322b47c0625344848ca670e9155dc7748736199781b4f6ca98337f7982e36fd48fa5235beac377398b9569d43d0504d213e2505b6867a6bcaf1b5a6f127afe5d298d40d7a00da1585f4bf126bcb35f099e8a8c1ea67d3b6df296828590bce3c3eef1cc2682b6e5ba71412dedacaaa3b26162ed0bc7bd89e979850147ecd9c27df751b852797eb664e3d46067cd8e4ba34c88deced6ab25c2d9ad125f12e7fe8fb611f9b0f36c8f6f3292c3b7feef1b5d4585304358904485d0b4ba42c42cb687f50a639ee51419bde5cdfc39229cbffff794db9d2e150008174a839e3af7b758c567f2152df54e599a0d53bc38c3eb22816d9c39796ab1d1cec8f193defc4466d8b4412ac15aa3f8251d317a4dc8df37a20e3bf8fd19b670191ad9cbf963b5e8b3e6333d0068e08dfb704e2a8bf5085575d102a880fd198cdfba653c3ad45f8180094c80317bec8b8749ddbe304d2b43066196802c7ff1feee76450d9b1c1b93c50aee12f670e4f890d3cf9b5dd0991bb5486bb854ab40a52ba1cd894f94143954d9a8f526a20f687012c58160c0029ea856d8510b3fbe602466c3f4806e0f614f5d7939b3adcc8887c95eb2f6316fddae7b71082a65958a6d0cb1830bb234434c627135678352e2523505"}], 0xff8}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
setgroups(0x1, &(0x7f0000000040)=[0x0])


openat$i2c(0xffffffffffffff9c, &(0x7f00000000c0), 0x0, 0x0)
semget(0x1, 0x0, 0x1)
compat_50___shmctl13$IPC_STAT(0xffffffffffffffff, 0x2, 0x0)
compat_50_____semctl13$IPC_SET(0x0, 0x0, 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
lchown(&(0x7f0000000200)='./file0/file0\x00', 0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xb200)
open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
open$dir(&(0x7f00000001c0)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
getsid(0x0)


r0 = compat_30_socket(0x1f, 0x10000005, 0x2)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getsockopt(r0, 0x2, 0x4, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff})
compat_43_orecv(r0, 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg(r0, 0xffffffffffffffff, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x29, r0, &(0x7f0000000080), 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0xc, r0, 0x0, 0x0)


clock_nanosleep(0x2, 0x0, &(0x7f0000000100), &(0x7f0000000140))


r0 = compat_30_socket(0x1d, 0x3, 0x0)
sendto$inet6(r0, 0x0, 0x0, 0x0, 0x0, 0x0)


r0 = open(&(0x7f0000000000)='./file0\x00', 0x822abb609a08562d, 0x0)
mmap(&(0x7f0000ffe000/0x1000)=nil, 0x1000, 0x2, 0x10, r0, 0x0, 0x0)
mlock(&(0x7f0000ffd000/0x1000)=nil, 0x1000)
mlock(&(0x7f0000ffb000/0x4000)=nil, 0x4000)


setreuid(0xee00, 0x0)
r0 = getuid()
setreuid(0xee00, r0)
compat_30_fhopen(0x0, 0x0)


rasctl(0x0, 0x7, 0x0)
fork()


r0 = open$dir(&(0x7f0000001240)='./file0\x00', 0x4000040000000242, 0x0)
lseek(r0, 0x0, 0x0, 0x2)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x39, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
fchroot(r1)


getppid()


sendmsg$unix(0xffffffffffffffff, &(0x7f0000001700)={&(0x7f0000000040)=ANY=[@ANYBLOB="fd1812"], 0x1c, 0x0}, 0x0)
ioctl$WSMUXIO_INJECTEVENT(0xffffffffffffffff, 0x80185760, &(0x7f0000000000)={0x0, 0x0, {0x100000000000000, 0x3}})
r0 = socket(0x18, 0x400000002, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
setsockopt(r0, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
sendmsg$unix(r0, &(0x7f0000001700)={0x0, 0x0, 0x0}, 0x0)


_lwp_setname(0x0, &(0x7f0000000000)='\x00')
r0 = _lwp_self()
_lwp_getname(r0, &(0x7f0000000180)=""/1, 0x1)


r0 = socket$inet(0x2, 0x3, 0x1)
bind$inet(r0, &(0x7f0000000080)={0x2, 0x0}, 0x10)


r0 = __clone(0x0, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)
compat_50_wait4(0xffffffffffffffff, 0x0, 0x0, 0x0)
ptrace(0x12, r0, &(0x7f0000000080), 0x8)


open$dir(&(0x7f0000000000)='./file0\x00', 0x10aca, 0x0)
unlink(&(0x7f0000000040)='.\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
symlink(&(0x7f0000000100)='./file0\x00', &(0x7f0000000140)='./file0\x00')


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
____semctl50$GETALL(0x0, 0x0, 0x6, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = compat_30_socket(0x1f, 0x3, 0x0)
compat_43_fstat43(r1, &(0x7f0000000040))


poll(0xfffffffffffffffe, 0xc, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff})
setsockopt$sock_timeval(r0, 0xffff, 0x1005, &(0x7f00000001c0)={0xfffffffffffff800}, 0x10)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = _lwp_self()
_lwp_unpark(r1, 0x0)


syz_usb_connect$uac1(0x0, 0x71, &(0x7f0000000180)={{0x12, 0x1, 0x201, 0x0, 0x0, 0x0, 0x10, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r3 = fork()
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(r0, 0x0, 0x4, r1)
setpgid(r3, 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x2000427e, 0x0)
ioctl$FIOASYNC(r0, 0x4004426a, 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
chroot(&(0x7f0000000040)='./file0\x00')
__getvfsstat90(&(0x7f0000000d80), 0xce0, 0x2)


madvise(&(0x7f0000ffe000/0x1000)=nil, 0x1000, 0x0)
mlock(&(0x7f0000ffc000/0x1000)=nil, 0x1000)
mmap(&(0x7f0000ffc000/0x2000)=nil, 0x2000, 0xd49f275d97cc01bb, 0x1810, 0xffffffffffffffff, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendto(r0, &(0x7f0000000280)='\x00', 0x1, 0x0, 0x0, 0x0)
recvmsg(r1, &(0x7f0000000800)={0x0, 0x0, &(0x7f0000000700)=[{&(0x7f0000000400)=""/170, 0xaa}], 0x1}, 0x842)
r2 = openat(0xffffffffffffff9c, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = getpgrp()
getpriority(0x0, r1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setgid(0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000000400)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0xc02c6301, &(0x7f0000000140)={0xfcffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @pad="1b944b4fd15a0981fa3a9791cde8505dcbc0316f82b129691e9a0e26c7821ae8"})


pipe(&(0x7f0000000000)={<r0=>0xffffffffffffffff})
readv(r0, &(0x7f0000000680)=[{0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}, {0x0}], 0x9)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0xc0306370, &(0x7f0000000140)=0x9)


openat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x10260, 0x0)
ktrace(&(0x7f0000000040)='./file0\x00', 0x4, 0x8, 0xffffffffffffffff)
ktrace(&(0x7f0000000100)='./file0\x00', 0x2, 0x0, 0x0)


msgsnd(0x0, &(0x7f00000001c0), 0xffffffffffffff78, 0x0)


r0 = compat_30_socket(0x2, 0x3, 0x0)
recvfrom$inet6(r0, 0x0, 0x0, 0x60699de585d35899, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
unmount(&(0x7f0000000040)='./file0\x00', 0x0)
compat_43_lstat43(&(0x7f00000000c0)='./file0\x00', &(0x7f00000001c0))


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
mkdir(&(0x7f0000000080)='./file0/file0\x00', 0x0)
chmod(&(0x7f0000000180)='./file0\x00', 0x23f)
setuid(0xee01)
chdir(&(0x7f0000000040)='./file0\x00')
rmdir(&(0x7f0000000240)='./file0\x00')


setgroups(0xd, &(0x7f00000000c0))
r0 = semget(0x0, 0x3, 0x84)
getsockopt$SO_PEERCRED(0xffffffffffffffff, 0xffff, 0x11, &(0x7f0000000000)={0x0, 0x0, <r1=>0x0}, 0xc)
____semctl50$IPC_STAT(r0, 0x0, 0x2, &(0x7f0000000080)=@buf=&(0x7f0000000040)={{0x0, r1, 0x8, 0xff, 0x4, 0x70, 0x8}, 0x104, 0x1, 0x9b, 0xfffffffffffffffe})
semget(0x1, 0x2, 0x0)
__stat50(&(0x7f00000000c0)='./file0\x00', &(0x7f0000000100))
r2 = semget$private(0x0, 0x4, 0x0)
__fhstat50(&(0x7f0000001540)="d88e9bba0a11123eb71c90b45220bc0681e3fd19910e0eded0ba6b5a71d6222e0eec0bba2c1542e6b3dc52b673f8aa157f1f0900a54e6cd97d2183af9234ea4d30449b4b0595277a2a2243b347a8de50a310c51a6a246e28b365ae38576f480b5400b61e00a2ee0ec24134d6f752080105465137c2a075408f8f8f51c4a304", 0x7f, &(0x7f00000015c0)={0x0, 0x0, 0x0, 0x0, <r3=>0x0})
r4 = getegid()
compat_50_____semctl13$SETVAL(r2, 0x0, 0x8, &(0x7f0000001700)=@buf=&(0x7f00000016c0)={{r3, r4, 0x1ff, 0x3, 0x1, 0x63bc, 0x800}, 0x2, 0x1, 0x7ff, &(0x7f0000001680)={0x1000, 0x80}})
compat_20_statfs(&(0x7f0000000240)='./file0\x00', &(0x7f0000000280)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, {}, <r5=>0x0})
compat_50_____semctl13$IPC_SET(r2, 0x0, 0x1, &(0x7f0000000440)=@buf=&(0x7f0000000400)={{r5, 0x0, 0x0, 0x10001, 0x40, 0xa24, 0x3}, 0x6, 0xffffffff, 0x2, &(0x7f00000003c0)={0x6, 0x80000000, 0xff80, 0x8000}})


mlock(&(0x7f0000c00000/0x400000)=nil, 0x400000)
munlock(&(0x7f0000ff9000/0x4000)=nil, 0x4000)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_____semctl13$SETALL(0x0, 0x0, 0x9, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0x40045265, &(0x7f0000000040))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(r0, 0x6, 0x3, &(0x7f0000000040), 0x4)


symlink(&(0x7f0000000000)='./file0/file0\x00', &(0x7f0000000300)='./file1\x00')
symlink(&(0x7f0000000180)='./file0\x00', &(0x7f00000001c0)='./file0\x00')
chmod(&(0x7f0000000140)='./file2\x00', 0x0)
linkat(0xffffffffffffff9c, &(0x7f0000000040)='./file1\x00', 0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$unix(0x1, 0x5, 0x0)
getsockopt$sock_linger(r1, 0xffff, 0x80, 0x0, 0x0)


__nanosleep50(&(0x7f0000000000)={0x0, 0x3000000000}, &(0x7f0000000040))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000340)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
recvmsg(r0, &(0x7f0000000000)={0x0, 0x0, &(0x7f0000000580), 0x1000000000000177}, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f00000000c0)='cd9660\x00', &(0x7f0000000100)='./file0\x00', 0x1, &(0x7f0000000140), 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000680)='./file0\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f0000000140)='./file0\x00', 0x0)
mkdirat(r0, &(0x7f0000000000)='./file1\x00', 0x0)
open$dir(&(0x7f00000001c0)='./file0/file0\x00', 0x0, 0x0)
r1 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
r2 = open$dir(&(0x7f0000000380)='./file0\x00', 0x0, 0x0)
mkdirat(r1, &(0x7f0000000180)='./file0/file0\x00', 0x0)
renameat(r1, &(0x7f00000002c0)='./file1\x00', r2, &(0x7f0000000340)='./file0\x00')


compat_43_ogetrlimit(0x0, &(0x7f0000000000))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
pathconf(&(0x7f00000000c0)='./file0\x00', 0x7)


getpgid(0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = socket(0x2, 0x2, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
shutdown(r1, 0x1)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x3b00)
r0 = open$dir(&(0x7f0000000100)='./file0\x00', 0x2, 0x0)
r1 = dup(r0)
pwrite(r1, &(0x7f0000000140)="0b2d257d80fc2b2a2f38e08c7022dd020d388ca7417a272fec3c4260d8d11aa37177c51e21a8196704ca553c02f6ac672f25b5a96f785b410b011df174a13eb7dec80ba7b11e6263833e75fc79ce1904374d2990d759a60890e3e9e67667b382e748b60f59d6550db6c41289143054cd5dc8bfd22b431c5867ce0379746c2ebbe41b137fe6acb96fc4f447de9cef257cf1be1d408de26583e85e9eb0f32612f7de7d227a5798a8fbfa243a4bbbeee1ca2dedf3c15e24f97d2d1979a2d13ee748c62b517b07fd76fc67c04cc498cf5519a91d54202b330bbaf219add1c757840e2376058872ba92eaa19904a9817b37fcc8c9693b9c0d74103b412939ff3c03d962ef8e0dc2e5c654ec93f37c5c2fa47c365930d913033edb01de9506c3a6b1797b84e53e89a89909f76750a0f793855a7581b1071383ab54959c9f962372b2b9499de99f7d8d42ff50d6521f6c13a9ef96d30f9a400ebc59f57fc2f94a99e64e89107e6bea5963059166fb064e0ab1b3637cf4414966de1a1c3ff3da743dfc159b400cb0d53b9beb9b2be6587a2bb4df277a4f946c29227f9505ac223de6bac3f4421b95715f75957238afa25b65e6a23c35880b6a644cd64598235f2139389a576f69e6fdeb0d0829891c1e4225260791ca0d2d973e764e6aa7215284d7380e5756e4607f492aecdaa5a79b064b036ba6ab2cd6cb3471be57dd576f9f79989c92f13367cd91d713aa098e5882c2504e1d74cb22a84a64b6a8677580f65d8105681782c3bb9d1a94c9e477f2f962286a0088ce7103bf63440d27f32d3a55d3f9cdb25e3475616188b430405af8523c7d659320e34d84bfa141fca9b118a1f9b51cbb5190966b38ec92d50c321c289bf38616b7bfe611885041d3c7d4d769ad232a289a16ea62cde2e4717cf5d120a42c3a51e97b28307da7edb91d5afab9e393a8322100b2ce8ca6bd0a0225f056ebf2111dd2f2ee186805795b1f260a0ee8cdebf344e2e1bf71029a5822379075069663164194291a4374d62d03908b71e10a22a54af4da50ae3a43b286be41a9532acd74977143e4cd134b78559bc21943e2e9c926836e742f6be29229135d8143c68a85b2918cc60e042c23ac60976a0a177a6ff0d3201f47cb76f42c0d4f4b772b23bc37adf7c7570dad77b63f497e704be00510412b95a904de12d174a871519bc337edfa41e9729dcadd6a4097fc2c9f6cf3a0e2c913d9ac34347906e1bd1d300e4d956494d54639cb27dccfd9cea117776c544068801a80b79174be675dc322a4db30ae9995b7637e8408a06884d9edc924f066096b30cd7d3fd88a6ea3a87967b7cd620e6f28812580044cbfd6a575038f89bc4b521df96b75616d30bbc14b4adf348e48c0da8d418d3396748c8ba2d670c716d0c506dfa0bc857ebb405109857239b61dfcd04340f0d4c4a4fb6d486ce80909d1b87a3f4cfb1ab100a8bb801871d490e8ba4a750405f24a4c961b9a35f36bb07c99ce388e4842ef86bd719ce1879d8712966eb2739847b0e58859ce82071db7f721b432b25513bc8a4de917f70ddbdc759d5400e2368170ee400ee38cf69fba8b19fc1da65936ed02e84a49133816482d2dc98114a9b46f6462c3864791115002af69101e4319d5310138a29f45de5700cbc528a8abbfaf143a5791d50310a510fb1e6b94bfde00f3a5013f80e9083eebc80c71ca26f1f12b6759c535b84763cd475cba5731b220745aca2c8bbb4f6fcdd3a35fcc334939d7dedeec73e4f51ff016a2bfffbe8304b619b9c6f4bfebef474d6cf498bd177f379fd4f93ca772ec85a9a28d4e4215c51b90ee7e663edaf82139bef49b6b2124c0e976a7735d54b6505abce5ce263c82c0532d7fb1768895cbc00ea56591d8ce6068c894412c7b77d8c1d1b164fa56885a38b226421978619cc435ffaa07f1d6d6cd30d2e01d914951a50836920b49dff2310699693e882b819a1d284053bace570dffc5b9030c366775f9407640b704badb8ee6427e3e08185abc1788d4826354a24c19a7af8609edaad0a33a791f672065d51ac9c2fe528e8298e949feff75fc1f446e3220ccd151c47440a6442cd3af48986813cb1cdb39148027e133008bee16714199e0fcb7f0e8545af141f2b0979b0183c8a3928fb5a87a2043075feb88900782eae426f45a771be93471488523e0183bb00909815cad2bc80d528fd59bc005f10deffa254a3f322a69fdd951e08234c086a8eb9c762ae7fe7201bdb10f4bf603ef788d595bd19fae61eb0f940eb1714d067be39873a80512a239befb84c7801e0f4c234a1a160381fac5b8f27e924d4844b5ae79049c1a0d3c855696760dcc3c8cd5ee05c550d3ac708342408cc8a9e04c568b1fcdd5276dd8bdf19c8d14aa10ed6036352acc8c6e6c9b910db13db94326838bfca642abb2d9bcea0ea82f496211687d8a6b8a9f3f4280eb025d476958a74969faff1b4588ea5bb3903572cfdff9241c1f8ba2249f6bfb08328c27b810877738fc83b511fc05b0f006eaee2dd6", 0x700, 0x0)


setreuid(0xee00, 0x0)
r0 = getsid(0x0)
ptrace(0x9, r0, 0x0, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202d77f7f000001"], 0x1)
connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="d4020207e0"], 0x1)
r0 = socket(0x2, 0x2, 0x0)
r1 = socket$inet(0x2, 0x3, 0x0)
setsockopt$inet_opts(r1, 0x0, 0x100000000000000a, &(0x7f0000000040)='\x00', 0x1)
dup2(r1, r0)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
sendmsg$unix(r0, &(0x7f0000000440)={0x0, 0x0, 0x0}, 0x0)


sendmsg(0xffffffffffffffff, &(0x7f0000000100)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000000)=ANY=[@ANYBLOB="200000000000008d742c007b80309905f87c8a5b6701000000000000000000c41400000029"], 0x38}, 0x0)
r0 = socket(0x18, 0x2, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
sendmsg(r0, &(0x7f0000000100)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x3afc)
open$dir(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)


mknod(&(0x7f00000000c0)='./file1\x00', 0x2000, 0x902)
acct(&(0x7f0000000000)='./file1\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
__fhstat50(&(0x7f0000001180)="af78d7fb5c084a52c087f82f3297d3a2b4d4bc60830c41d35cd000b80a8c9938", 0x20, 0x0)


msgget$private(0x0, 0x0)


__fhopen40(&(0x7f0000000780)="fcfb2b86bbe05bad9b856c2b5b2547a3a1f9fed837c087af3047122cfd4e77d8", 0x20, 0x0)


r0 = socket(0x18, 0x3, 0x0)
setsockopt$inet6_MRT6_ADD_MIF(r0, 0x29, 0x1a, 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0x0)
socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff})
bind$unix(r0, &(0x7f00000000c0)=@file={0x1, './file0\x00'}, 0x6e)


r0 = socket$unix(0x1, 0x1, 0x0)
r1 = socket$unix(0x1, 0x1, 0x0)
bind$unix(r1, &(0x7f0000003000)=@file={0x1, '\xe9\x1fq\x89Y\x1e\x923aK\x00'}, 0x6e)
listen(r1, 0x0)
connect$unix(r0, &(0x7f0000000280)=@file={0x1, '\xe9\x1fq\x89Y\x1e\x923aK\x00'}, 0x6e)
r2 = dup3(r1, r0, 0x0)
r3 = accept$inet6(r2, 0x0, 0x0)
getpeername$inet6(r3, 0x0, &(0x7f0000000180))


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
symlinkat(&(0x7f0000000040)='./file0\x00', 0xffffffffffffff9c, &(0x7f00000000c0)='./file0\x00')
rename(&(0x7f0000000180)='./file0\x00', &(0x7f0000000080)='./file0\x00')


r0 = compat_30_socket(0x11, 0x3, 0x0)
shutdown(r0, 0x2)


setreuid(0xee01, 0xee01)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
fktrace(r0, 0x0, 0x4, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000002c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(0xffffffffffffffff, &(0x7f00000029c0)={&(0x7f00000001c0)=@abs={0x0, 0x0, 0x2}, 0x8, 0x0}, 0x0)
sendmmsg(r0, &(0x7f0000000000)={0x0}, 0x10, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x18, 0x1, 0x0)
setsockopt(r1, 0x29, 0x2f, &(0x7f0000000040)='\x00\x00\x00\x00', 0x4)


mknod(&(0x7f0000000100)='./file0\x00', 0x1ffb, 0x0)
open(&(0x7f0000000000)='./file0\x00', 0xc02, 0x0)
openat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x4000000, 0x0)


symlink(&(0x7f0000000080)='.\x00', &(0x7f0000000000)='./file0\x00')
compat_40_mount(&(0x7f0000000000)='overlay\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
rename(&(0x7f0000000140)='./file0\x00', &(0x7f0000000180)='./file1\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_ogethostname(0x0, 0x0)


rasctl(0x0, 0x9, 0x0)
rasctl(&(0x7f0000001200), 0x8, 0x0)
rasctl(0x0, 0x0, 0x2)


r0 = socket(0x1f, 0x5, 0x2)
setsockopt$sock_int(r0, 0xffff, 0x1002, &(0x7f0000000000)=0x8, 0x4)


r0 = socket$inet6(0x18, 0x2, 0x0)
connect$inet6(r0, &(0x7f0000000000)={0x18, 0x0}, 0xc)


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
__setitimer50(0x0, 0x0, &(0x7f0000000040))


syz_usb_connect$cdc_ncm(0x0, 0xac, &(0x7f0000000600)=ANY=[@ANYBLOB="12011001020000102505a1a44000010203010902"], 0x0)
syz_usb_connect$cdc_ncm(0x0, 0x75, &(0x7f0000000b00)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x0, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x63, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5, 0x24, 0x0, 0x5}, {0xd}, {0x6}, [@dmm={0x7}]}, {{0x9, 0x5, 0x81, 0x3, 0x3ff, 0x0, 0x3}}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{{0x9, 0x5, 0x82, 0x2, 0x20}}, {{0x9, 0x5, 0x3, 0x2, 0x20}}}}}}}]}}, 0x0)


open(&(0x7f0000000080)='./file0\x00', 0x200, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000100)={{0x12, 0x1, 0x250, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x0, 0x0, 0x23}}}}}]}}]}}, 0x0)
openat$wscons(0xffffffffffffff9c, &(0x7f00000004c0), 0x0, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f00000005c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0xa0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x725}}}}]}}]}}, 0x0)
ioctl$WSDISPLAYIO_GBORDER(0xffffffffffffffff, 0x4004575b, 0x0)
posix_spawn(0x0, 0x0, &(0x7f0000000dc0)={0x0, 0xfd, &(0x7f0000000d80)=@open={0x0, 0xffffffffffffffff, {0x0}}}, &(0x7f0000000e00)={0x0, 0xffffffffffffffff, {}, 0x0, {[0x0, 0x0, 0x0, 0x1]}}, &(0x7f0000000e40), 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000000c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
connect$unix(r0, &(0x7f0000000100)=@abs={0x0, 0x0, 0x1}, 0x8)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x40044271, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000240)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r1, &(0x7f0000000140)={0x0, 0xfffffffffffffdc2, &(0x7f0000000000)=[{&(0x7f0000000080)='4', 0x1}], 0x1}, 0x0)
sendto$unix(r1, 0x0, 0x0, 0x0, 0x0, 0x0)
recvfrom$unix(r0, &(0x7f0000000280)=""/200, 0xc8, 0x42, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_90_fstatvfs1(r0, &(0x7f0000000a80), 0x3)


r0 = accept(0xffffffffffffffff, 0x0, &(0x7f0000000040))
mmap(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0x2, 0x10, 0xffffffffffffffff, 0x0, 0x0)
accept$inet(r0, &(0x7f0000000080), 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f00000002c0)={{0x12, 0x1, 0x110, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x7, 0x20, 0x4, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x7, 0x1, 0x2, 0x3, "", {{{0x9, 0x5, 0x1, 0x2, 0x10, 0x0, 0x0, 0xb5}}}}}]}}]}}, &(0x7f0000000800)={0x0, 0x0, 0x0, 0x0, 0x3, [{0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}]})


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
msgget(0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000480)='./file0\x00', 0x0)
r0 = openat(0xffffffffffffff9c, &(0x7f0000000100)='./file0\x00', 0x0, 0x0)
mmap(&(0x7f0000ffb000/0x2000)=nil, 0x2000, 0x0, 0x10, r0, 0x0, 0x0)


munmap(&(0x7f0000ffc000/0x4000)=nil, 0x4000)
r0 = shmget$private(0x0, 0x1000, 0x0, &(0x7f0000ffc000/0x1000)=nil)
r1 = shmget$private(0x0, 0x1000, 0x0, &(0x7f0000ffc000/0x1000)=nil)
shmat(r1, &(0x7f0000fff000/0x1000)=nil, 0x0)
shmat(r0, &(0x7f0000ffd000/0x3000)=nil, 0x1000)
__clone(0x0, 0x0)


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_30_getdents(r0, 0x0, 0xffff)


pipe(&(0x7f00000014c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
fchroot(0xffffffffffffffff)


r0 = socket$inet6(0x18, 0x1, 0x0)
sendmsg(r0, &(0x7f00000000c0)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000300)=[{0x10}], 0x10}, 0x1)


r0 = socket$inet(0x2, 0x3, 0x0)
getsockname(r0, 0x0, &(0x7f0000000040))


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
fchroot(r0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6b02e18b7f"], 0x1)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x2, 0x2, 0x0)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
sendto$unix(r0, 0x0, 0x0, 0x0, &(0x7f0000000000)=@file={0x0, './file0/file0\x00'}, 0x10)


r0 = compat_30_socket(0x1f, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_LDFONT(r0, 0xc0386207, &(0x7f00000000c0)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe2(0x0, 0x800002)


compat_30_fhopen(&(0x7f00000003c0)={{}, {0x6, 0x0, "244dea333241f44844076e09e8f71d25"}}, 0x0)


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000001c0)='./bus\x00', 0x10004, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0, 0xff01}], 0x1, 0x0)
__lutimes50(&(0x7f0000000000)='./bus\x00', &(0x7f0000000040)={0x9, 0x8})
r2 = getuid()
setreuid(0xee00, r2)
__clone(0x43ff, &(0x7f0000000200)="ee9a22082e8265ebf72b2c3f4f90e812f60919360f4cc384ce711a2969dd45009e7020bcf6a66d769f70a9d89a667b7951520872ec78781c37966d5a4349c26a834caab7f70fe5ac0d9a651fbf54e6388175411c2214ee961bd86f5a")
r3 = getpid()
fktrace(0xffffffffffffffff, 0x0, 0x4, r3)
ioctl$FIOSETOWN(r0, 0x20007461, 0x0)


setpriority(0x2, 0x0, 0x81)


setuid(0xee01)
compat_30_getfh(0x0, 0x0)


_ksem_init(0x0, &(0x7f0000000000)=<r0=>0x50535244)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
_ksem_destroy(r0)


r0 = __clone(0x0, &(0x7f0000000000))
__clone(0x0, 0x0)
__wait450(r0, 0x0, 0x4, 0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
getsockopt$inet_opts(r0, 0x0, 0x1, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x8010426d, &(0x7f0000000040))


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIONBIO(r0, 0x4020426b, 0x0)


__fhstat50(&(0x7f0000000200)="88", 0x1, 0x0)
compat_50_____semctl13$GETALL(0x0, 0x0, 0x6, &(0x7f00000017c0))


posix_spawn(0xffffffffffffffff, 0x0, &(0x7f0000001b40)={0x0, 0x0, 0x0}, &(0x7f0000001b80), 0x0, 0x0)


poll(&(0x7f0000000040)=[{}, {}, {}], 0x342, 0x0)


r0 = socket(0x18, 0x3, 0x0)
sendto$inet6(r0, 0x0, 0x0, 0x1, &(0x7f0000001000)={0x18, 0x1}, 0xc)


syz_usb_connect$printer(0x0, 0x36, &(0x7f0000000240)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x0, "", {{}, [{}]}}}]}}]}}, 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
chmod(&(0x7f0000000180)='./file0\x00', 0x23f)
chdir(&(0x7f0000000040)='./file0\x00')
mkdir(&(0x7f0000000140)='./file1\x00', 0x0)
rename(&(0x7f0000000100)='./file1\x00', &(0x7f00000001c0)='./file0\x00')


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x40044274, &(0x7f00000000c0))


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
recvmsg(r0, &(0x7f0000000280)={&(0x7f0000000180), 0xc, &(0x7f0000000140)=[{&(0x7f00000001c0)=""/88, 0xfffffffffffffeed}], 0x1, &(0x7f00000002c0)=""/206, 0xce}, 0x0)


r0 = compat_30_socket(0x1d, 0x3, 0x0)
r1 = dup(r0)
compat_50___fstat30(r1, &(0x7f0000000200))


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
open(&(0x7f0000000100)='./file0/file0\x00', 0x3020a, 0x0)
rename(&(0x7f00000000c0)='./file0\x00', &(0x7f0000000140)='./file0/file0\x00')


r0 = socket$inet(0x2, 0x1, 0x0)
getsockopt(r0, 0x6, 0x1, 0x0, 0x0)


mlock(&(0x7f0000ffc000/0x2000)=nil, 0x2000)
munlock(&(0x7f0000ffc000/0x4000)=nil, 0x4000)
madvise(&(0x7f0000ffa000/0x3000)=nil, 0x3000, 0x6)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='coda\x00', &(0x7f0000000040)='./file0\x00', 0x0, &(0x7f0000000240)="2da147cfddbea94ebe87449557fb49fa0606b07ea81de6f97f521c4cc5d74e1750eb875dff18c05920ee4f2745c3ebb0aec16536bc97c06f4d4da1ab146ab2488f4d6d5bdfb0e60a62092ef662882631ea4c06da989edce7fa319fa385e17e654572988efab60f8aeafe1332f925ec0eed04d5d271836fcfb8fe7389776d1f26932c5e72768dd76c913f5a8efbc9967e3f4a97d645849ab347a50dcf980c4a218da2a32711aa653014d14ac47272c552e952669b1dd8a74aa4fc239536a6069235c74a5f56e3aba88d1790c9cdec393fd02d91278c073bf9620c5fbc42f00c2eb1738c4a5dcbab97dd0d00", 0xeb)
unmount(0x0, 0x0)
rmdir(&(0x7f0000000080)='./file0\x00')


compat_30_getfh(0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x2, 0x2, 0x0)
getsockopt(r1, 0x0, 0x13, &(0x7f0000000040)=""/14, &(0x7f0000000080)=0xe)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
preadv(r0, &(0x7f0000001300)=[{0x0}, {0x0}], 0x2, 0x0)


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000300)='procfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, &(0x7f0000000780))


execve(&(0x7f0000000000)='./file0\x00', &(0x7f0000000180)=[&(0x7f0000000040)='\x00'], 0x0)
open$dir(&(0x7f0000001800)='./file0\x00', 0x0, 0x0)
symlink(&(0x7f0000000240)='./file0\x00', &(0x7f0000001700)='./file0/file0\x00')


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x1d, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x10, 0x2, 0x0)
dup3(r0, r1, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
__getvfsstat90(&(0x7f0000001140), 0xfffffddc, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r0 = open$dir(&(0x7f0000000100)='./file0\x00', 0x40000400000806c1, 0x0)
write(r0, 0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047410, &(0x7f0000000080))


semop(0xffffffffffffffff, 0x0, 0x0)


r0 = socket(0x18, 0x2, 0x0)
getsockopt$inet_opts(r0, 0x29, 0x3e, 0x0, 0x0)


__select50(0x40, &(0x7f0000001cc0)={0x6}, &(0x7f0000001d00), &(0x7f0000001d40)={0x4}, &(0x7f0000001d80)={0x20})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__getcwd(0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_30_fhopen(0x0, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
compat_50_futimes(r2, 0x0)


ftruncate(0xffffffffffffff9c, 0x0, 0xfffffffeffffffff)


syz_usb_connect$hid(0x0, 0x0, 0x0, 0x0)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000a40))


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='fdesc\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f00000000c0)='overlay\x00', &(0x7f0000000000)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
pathconf(&(0x7f0000000100)='./file0\x00', 0x0)
lchflags(&(0x7f0000000080)='./file0\x00', 0x0)


setpriority(0x0, 0x0, 0x0)
setpriority(0x2, 0x0, 0xfffffffe)


_lwp_suspend(0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000140)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x8004667d, &(0x7f0000000080))


mkdir(&(0x7f0000000240)='./file0\x00', 0x0)
compat_40_mount(0x0, 0x0, 0x0, &(0x7f0000000280)="60c9784169a5eb40280b6e4c3f0b058e7bcc3616f59ed1f55da2407a0316e3be3362bbfded8d27d0c74ae15082198e9b807189f6d93edf067eafda4fc5ad8d0baf0b70957c7c35a491ca8a19c7da69329a74f872213d45cbf244c9a0545b331af17591a71e264e6569dcf740de19e0a74d76cd40b893e59c3005ce38cd9673d581d436e0359a58602c9330ca8d310a26ded6d954ce6c4fe0df782acd7e2b3dc90ec8fa83ee10115bd57c07a03eb70c07393510428429c25d6449dcb3756386eb4877da303bba8533fd732f0afe559c0e6b27c33af8bd158a460f9ad25b395e1ff6d4a80387b967d3417a6c01960aeb82a20b877cd3c22f2f3d255e31fc9ec3dc985216e400a921624e5b53458a8938146c56f1f502d5ac39713f31701f0fb3109979c9880c55b5da58de21e60cbb08e8182cc3021fc6567ba179b82655f48480d9b67579550b3325d7e66ca308f517fec1eb59d0c612df3560f9f84f04117319126e7664eda9dd1ca003312043948d27456bc68ea495488e644bcfe14c2bbbd2053489ae5d1eb77132ebf82cd4f7b198739a77d32e550347d6b2982558aa004dd081dd4703b8276224ec7fdf7c5f21b19e474137ff8c05bf0803f55220dae0ca8fab62e41b3ce8342c917a827b2f1e76d93c71c45a0ce35c6adbdd9b69de3f1a1251154348f369271403ab7eeaa4961667487284ce782857984c7a4f3a9b53a90508e9467a74471994a4fc2f3a45e94f2c6f3aea202474b4e0f847232a67de59bcdfa397cfa6441b780a88d9e1988ad8e90a6537d2e7b20cdac20fae8889f2069fdd943ce144f84923a7d41548b7e44e1c57118d32650869c113dc7e1c3dc778c8392cc376b2530741cf46375b0aa0eee649846b02bf9f3abfe07d34362653c18a0068b41bcb650fd8459c51fbdbc63b448f2b62ed3c2435210c3bbeda610360a4e330f35064fa941261cc60ffc1dc9bc2c9f4fe84498319a7ee8cd33ab07a12262fef605e4df26f9b0043d36b01822e415ceb1b7701d9183afaca4718ee059f4a101da75eb26d10ef9ba56acd74051ef1eca6e3af98d349a03fd02aa9276da4de61cc6095dfd3fed39fecb90747a28cdf164b6d703cd1ac6371577192c03132c707a076a4a657712710024579c7764a817f1a9b55e4308a2a13dce41340b781ad920eecf03abbf48746c4a5b8740c9411275968fe1174366e82d9d0b40c947ca1333c123753c756c514596607545133ceb92f2f9923ebc547733f65470e1a3968b0d191c1b89af819e0c789f03f0e6bed4e03877b72460789916cf1467f5ca0f2ba08806e2062fc1c8eb043b3f160c44cfdb4f6062f9a5a8e0bb1d07172dbef09382de2a86265231cd80ecf68c8d8bf7dedbe93f5a6dc56ba6ae190ff27d48763ca0b48392e730b0433279e3fc987783ac2e44bfc62fa44c387c1fe15cec1d6a0ce33aa9cd6fbf7351f1e4d8acad90bec2098cced9a9edc02c9f20d93e812323842fa66aa5871fd3dddf3d3a1b1360b6f38ccfa828802c24d05ac6c24b9e1899c96c3628ebf0c458ed692c788a9561fc91f17dc64199253400196fe410b1e129dfa6dd4dc462a08da6bb114169e5bc1ecdb1d5d36ab07a84e0c3883cde6ad0e137251fed5dd3e64d0d04c21fed1e70443cde7a47973c474bb288e4f13e245ae48828ae4cc73bb6aae5ea6b4a92acd78f804473be6768a230ff80ccc807a6928fe79c12ba85a37db2ad4420cdb1161c2388ea430a1479c25049a0d44f6bb8962bfe6a1da5f5ab6b59c57bcdc6cc66219168f2df8845e6494183ff10100432a2b5b6ab535b0f6f70da58fe7c0c0f269849f94cdb8f31ff0a2e5c1f359fe04ddff3adb80f719df093adc469d5e0f1517538e235d72688371acf808250cc1a651811c8ce02aadf098f15ef37079e02ad7d375437a72fa6108ac855f003beff64e3580967facbb381866f22cbb9d45e498fd28e868dd11aef28d7a31dd44d448d022c98c52c1038d1d2fb95151b24a43312e8e2d0da6e306686977adb45f62b552949651b9d8df34674bbb7fce47c31f7756d5a1eb89f640a92aed882f5ac03ea76eab34167e8eaecd4f38695e1420d2ae38e22da85a4e941f7f4e6939891bf64bc9720a1f65e93b40b605cb24fd83052677b1c26d1c4ca6613f460849035cc86755d31f109c19e75a8023c882a4f3600776290f61bc644a518518576059339ce173bdcd76751a59430d7af4905b21d6c35aecf96f759ef636c4a4bdd69d85e49d7a1e996c96a1db8f76e01fa665d5491e8b4300d7dc8da8345c791ff18547a20077a2a8ed1649f5593dcca4ac8a06c8013a5619626d475a141a2fe53a4f865716d03f234f133682ab2f6b6d047ef60b543f5b9586d44a054f6a3565bd766c2ccdcec5518f9b65ef4cf63f09743d09b9d51cb3ad2d1ef27187a84d517e0f52e13214514375c8cf214883969f9c45f81cff934ac8924c2996571b5b71ec2cfbcee1c1aa332c7e378df9ef4f9b8b375b02fa8e9659ae98d9aeb0fbde941b37dfc9b36ffa3ca739900810109decbdf32d2a415f591b5af613a30a6a782f94031f5c8906efea3a0a169c257b5a5b643b6b945e1a0162fe18bdb77e47a787398716ab3883418ed3cfd054abd31c619a4363e8feb40a4c92f1f5056fa7b5d221a7a8007479f5f83587d385b5daeb61e0b277b127a00c6f60e13539973fffbffdaa8fe4c83c16d6117c4d5194f060cd434a659daba4f49d96b797a82fa4379179c624478c70cb3bc034c5e3f7ad5da5df9ea044ff42113fc48afd6d9cd1553f45643357ee56b2335ae9560ad77ad509efc3f4eadf5afdb9218676c1afc2b573e4718ec93fc8209c8a95d947c6106cad2437702cfd6feeb8d90f859ff6709c04fbededad9691e050328ecc41f96b7aacdbde57561f8811e5c59e7a3a32fe99cc477d31d599ed946f99e9a5e0f243d3d644b5d6cd956c349844899e351b74e593e57f32b65ed12521c7e754fd6d8413cb84bd8c3d22e946aeab1f753294f1dc6564f70ccb3dd1d31ae8abd85085937038761408b0769e8956b59ad1250ace314cba134e5924e46e83e57b3b79461233901b1f80533fb535864caf72841639ae376ac82e0218c93e60333792261e10c3def74f1042d2d071a3184ddcf93f70cd4f2a2740a1bca1eaff560d7f56ab697602ca4c2c3592447015a0a1436b24075f0f58761b0abb1f7e4ccc570922f11b30baf1da7de2099a5ea671d49a29b1fb762a2390643dfbc2c17e533fffd196367a114104a7c3c94a3f0e6e24ff54e947bdf9bc2ec0896b70cb87ae95bfc2bec99d3612409a3068e4b626bcc380fcfb4eb4c0023f83b78f409f61177c1798246a0f71cb41822c44f52bd3b9a19504f84c192e0364863fac1c26eee83b27864332de995892214e2a97ec2e11fb4ee2bbfdc3053b967191b76688c461cb37f7a1652a958825a8ad7426466c9ba232b9c1ee996d1731cab9d6804309551d031d57329a497aceb12cb62215d051293c02ced4e2e5869f4f84bcc8c9a72becd291d42bd1ad01a2d86741c8114be18b461796aa2973ad5a30f572a81b8191edf2d85c85d4a359bd13c6afafbc1404630039c629acaa8116210952d5d61eda37b791d70fa056c8c3d4647f0c02850951ac9ca3eb75c075755c95197c7d99f8c42ed6cfbe50adbcb1199fe4682aec0e24576e85733c82a7a109d9f9888040bb960a460d68fbd8f8726c81267ef879719c1b0d08c16e0b5fb9ee19bd25455d6286e38f7073f8f50d5bbba6c8be02eb8c62765a84dcff25178a67ba3bbf068618b6dc41268fd407ba126407fdebffcd4dff00526b86f81015badb077e99470e54f0a0a03ed4ba1e2aa36feca31d2fef28cb4f970f3398fe408799eefe8a3cfe7c297ac2963e0c59f98e50c622461f7e42fc11dd6006778cc150fcfd4caf4bf6c61c058d2aca3a44922dad6faaa82f6830047108110bbc3e6b88c02c8fee0ff04b978a1f498a08a4f99159f4d9c378ef6bb730f702a45ecb7d096490066a0c589289a8580519ee400ca2abeeda89911e3a3c3053dd2fcea923d8bbd4bf63b5f64418474989b62e2d5c24831274572ff21556bfbc4284921e695caf6743e7fc4cb9706eb24383d2b010067cb763fd33b38161dbccd72e6cb523cde526169345b0814c25342322121165b1bbd09ef1e5c24998b3d78fda4fd6473ec9f3489d4c1649649725bd7af8c45198e620fca96d01b0eaf599ec5803bd83b0e31c09c90c172a505192c9edccf19162d5c485ebb94df9ca0c35bf594348ef6b1f072cab0a804979f4b2428a9bffb16afd7024921ce799f9d6c67bc33752765ce784f0eeb185853322c93bab54370d1db5774ea5bf6119889958f32c6f8e44491615bca36ba4db3e490522d452606d5670a65917b5ede4d9a03e032f8d9b9a3d971e97f50a230c2d148cd8b2d22767379e6b847b77b95a4451")
compat_40_mount(&(0x7f00000000c0)='puffs\x00', &(0x7f0000000100)='./file0/../file0\x00', 0x0, &(0x7f0000000140)="1e")


mknod(&(0x7f00000001c0)='./file0\x00', 0x2000, 0x200028bf)
open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
mknod(&(0x7f0000000040)='./bus\x00', 0x2007, 0x40004004000828a6)
open(&(0x7f0000000400)='./bus\x00', 0x0, 0x0)
open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xb200)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
writev(r0, &(0x7f0000000540)=[{0x0}], 0x1)


syz_usb_connect$hid(0x0, 0x36, &(0x7f00000002c0)={{0x12, 0x1, 0x250, 0x0, 0x0, 0x0, 0x8, 0x5ac, 0x24b, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, &(0x7f0000000640)={0x0, 0x0, 0x19, &(0x7f0000000340)={0x5, 0xf, 0x19, 0x1, [@ss_container_id={0x14, 0x10, 0x4, 0x0, "2069f4785a2235140c7a71b7da987a38"}]}})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_futimes(0xffffffffffffffff, 0x0)


pipe2(&(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
readv(r0, &(0x7f0000001200)=[{&(0x7f0000000040)=""/211, 0xd3}], 0x1)
ioctl$FIOASYNC(r1, 0x8004667d, &(0x7f0000001240)=0x8000)
dup2(r1, r0)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x13, &(0x7f0000000000)="ca640000", 0x4)


_ksem_init(0x0, 0x0)


r0 = _lwp_self()
compat_50__lwp_park(&(0x7f0000000140)={0x0, 0x100000001}, r0, 0x0, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e57f7f000001"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x0, &(0x7f0000000080)=0x47, 0x4)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
sendto$inet(r2, &(0x7f0000000040)="97e8", 0x2, 0x401, 0x0, 0x0)
r3 = accept$unix(r1, 0x0, 0x0)
shutdown(r3, 0x2)


r0 = openat(0xffffffffffffffff, &(0x7f0000000040)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_30_fhopen(0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000140)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r0, &(0x7f00000004c0)=[{0x0}], 0x1, 0x0)


openat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x200, 0x0)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000006c0)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = getpid()
ktrace(&(0x7f0000000000)='./file0\x00', 0x0, 0x150a, r2)
recvfrom$unix(r0, &(0x7f00000000c0), 0x832f1f7d, 0x0, &(0x7f0000000000)=@abs, 0x20000000)
write(r1, 0x0, 0x0)


__fhstatvfs190(0x0, 0x0, 0x0, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000001fc0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x30}}]}}]}}, &(0x7f00000021c0)={0x0, 0x0, 0x0, 0x0, 0x2, [{0x0, 0x0}, {0x0, 0x0}]})


r0 = socket(0x400000000018, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000100)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_ogetsockname(r0, 0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x800080002002, 0x3d00)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000000280)=[{&(0x7f0000000200)=""/75, 0x4b}], 0x1, 0x0)


rasctl(0x0, 0x8001, 0x0)
rasctl(0x0, 0x0, 0x1)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
rasctl(0x0, 0x2, 0x0)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x28, &(0x7f0000000000)="ca640000", 0x4)


compat_90_fhstatvfs1(0x0, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x4020426b, 0x0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000040)={{0x12, 0x1, 0x300, 0x0, 0x0, 0x0, 0x20, 0x766, 0x204, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)


r0 = socket(0x1, 0x1, 0x0)
sendto(r0, 0x0, 0x0, 0x40804, 0x0, 0xfffffffffffffed1)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x803c721e, &(0x7f0000000100))


getsockopt$SO_PEERCRED(0xffffffffffffffff, 0xffff, 0x11, &(0x7f0000000100), 0xc)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000140)='./bus\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0xc1205266, &(0x7f0000000100)={0x0, 0xa})


pipe2(&(0x7f0000000040), 0x0)


__stat50(&(0x7f0000000280)='./file2\x00', 0x0)
open$dir(&(0x7f0000000480)='./file2\x00', 0x200, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
socketpair(0x2, 0x1, 0x0, 0x0)


preadv(0xffffffffffffffff, &(0x7f0000000440)=[{&(0x7f0000000080)=""/120, 0x78}, {&(0x7f0000000640)=""/4096, 0x1000}], 0x2, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
pwritev(r0, &(0x7f00000003c0), 0x273, 0x0)
mmap(&(0x7f0000001000/0x4000)=nil, 0x4000, 0x4, 0x10, r0, 0x0, 0x0)
pwritev(r0, &(0x7f0000000480)=[{&(0x7f0000000100)="79fa88548f669ec369c356fbbd900c6350d76c829e58f86a3204b7f63e2d92fe2ffa164fbafb578414e082005b639512f14162d84f004f4ddd1ba0f04ef848d17125787ca0ba39822a6de4b4400773d275eca4901f0651de5a951539aa4e79d02d3b043b8c64cacea0baaef9a74a589f7eacc9537d588906d2c108a13b9c813dae32cdc7054013a632834a5d0dc8c096153069199fb74489bafbb6475688aa8467b7af6759fd4b25fbf50540dd9e7e87bdef6a32fbc2fc475a1e", 0xba}, {&(0x7f0000001640)="2af962e1c5e54704654ed2ba8a7a14e2e9bd9099286c003a2c27e43e51e2b41ee7fda6eb7ccdaa907310ec097440abdec9140e22267eaea22c827f6f534533f7dc75d77414a677c47ccbfdc1f3b48813ae414b4ccc9a07cb5cf2fd491060a8a4387d7d966fda2fd726a942dfac9474db2140a4177f8b4bdd8784e0ff5f00c82482889a6f18dbb2d130ba194b254bd4ca663c0db71def3e6f70428f83bb8be14cdcae5820de8ab477f3a97720e235519021ee19ae9e73a24260a31a9ddc24fc3354d447028f44758654b2258f46c50780e1e47e2a02af1ffd8f4b5f34d4760638d868cdddf549df4b0f3195f6cf82e69e32b4383429f25b3c3e6872993e57316eddc73339556039051ee00104e74701049980940e128eced765eb5a054a75ade53a6f34268ea35af5b1f9a7311856a1f14254731ccf9ee6456bea21762b116c21e377679b64fc3410b478a86411b194be65fcea016f5518f3324117a84cecd6dc8b1c5f44c6c6225c5bc290c1b2d4b7a461c889e0e64747b1767fa1ac11b14c6b194ed3a2477aa29b9782db64144082c880e61ead61593d8b203109672841ecc4195f85de284368ba102a7b11ad2870968ce4212b95ca2e4596f9fb557a774b8a3a6f67eadbda77decfc46de83e97d3f5da9f2de7803c0dc804ad3b2b6108cea9bb7e66bbd9357de603d05e066657c06446a02863f326b3f9eac6761fb1ca382df5ba5fbd9be1a2c2953bb39894345c724c7c45baf645cb6c01e1dd485e05ece756e87af169ecf11e963c490f1fbdcf8edc26b34da5de75dd57da375e0a2ba8b32ae343c413084ced81cac373978edacd299d94d3efc1396356f6e7b08c1649bfb8c7deb0e6b7597e4204fba22261cebf9a1cb587b0c6014e833f01e2a467f29fa107b405ae90d8bac2628916edbd8f16802871f921f51cce1d17f12faa47375f3e575a3253d81dba571ef2fd1bb0549a0564ac52782f0ec4ad0e738b7256cda9130a7a9106c5a255eed8d9a8eaef536dcbde87e67526b7ff62afc4cce906bb557e750e46c2330d3e831ef8617735d335e6c83583ab18a4ed299253199248a49d129fc107eda89da6a362e65855025b5a1b0f69fd313e230365afd990bd9168b046937ed5ff6bcb1f1e2770240e6685ceccd5127da188ea8169f5701d8526f6a7f6a904a9978529840eedd106e1f0cada6762176cb76e448a55f46402cbc8fcb7955cd6ea3329c7584457fbcef78e1ae23f4cd6f03fb58ebf699c0a4618d046092fce42442a0abcb3482a711a38465834f3152483a395d643a4e3d8b4275adf738de8c3d6f55605d147c67c02e13e059c5c2858ca886ccc4ffc6ec63fe9f28de0b665a872189b8c69873dd0265f29fa2e729ceb61d94d7718d49085eb59365a6dbc9e81ce6017c14f813d13d80c6bcf3c3b9c236f9f00bf2e6f8d8520a263faec956ba75ec614d3b518a5a4014202de308134ca57d11cee33a09eb860d0a08d95ca82f944d8a91cf2c7d39817279186424f747bfe120d34d0cc5a53d22ddd10414c3290ecb21f6fed51c8fbd775458a3c481f0df8cf4a7e144a850c82d11a1dbf95c620b34d5a31dac6098cf936273a4933db17cc2ffe95c83922a05ad3e37671caba3d3fdd30d325fbb3ab2a04e75e2330e31ce722a0e8138d80897b45e1f9821737da378cef82570d68576f420227081022bbb2972fba5cc82f7608192afebd643ad83e81e2eb0e54f711e9a0b8b227fc6cec58391aea0fa98820758d52c89c3626a19af2b1083a7a5da772a9e94ddb952e98a2c37de6df66736dbe2044ca0d9396cf7747bdbb164d618aa1d1f6fd005041bd208dd5d880c48b3117570fabf13bfcc5d01c7471dc0c7a46a250230e597451696e6b90a87ea18ca0811ee3e705b85eba3e311f4bb8eec5bbca080013f60a81b63c04570d9980a4e18d1f19e39cdc0927fba06cf1217630dd61852c7439cefec451f1715d978a9f994a8a9ed881dcb2b01638fb1eb4efaa46afc159b14246dd38dec8c7b23d1b6cd13050fa8f55cc348cb36c4c24f7005f866a6b0fbc61e42fa492a6d17ad84b726a23383fbabd64e60cefdc5e87feb3c3fd6e97340c3d093a2b17f410a154aa5e3e29b9e7c1e1f63fb11763f5cd333a1aa258bbe586bcb25baa442fc8f4ad1e5312cecc885ddac358058ef9c94e2da6e2764efc26e12636eb6158e4130db88513a174935ed7b465f9b4d6bb1a3e5fbd95b29140523e1e8e265e9e1e938c456d9265a0d9ab90cbf8f620ca862ddf2abcb565aceced1f22a2dbcd5a3ef04eba4574e55e2e03eca861b2df7d50d70d253e24a52390fd4a64d1a660f8178ea091c02f91ad0112c23fbbc7439dfa0d63256a22b7c1dc10e127f657b0c3246dcbeb0f427c738dd19bfe75d7c9eafd38b5030516434676da37d774d92d5bfeeb606f7c0ce1588589ce512705e2f1ecd8ab40e2d86ba14a7388ee987cb33bee7127c020541ed1520da5d7f747790848c7359d0856884211180af1c41226d4b84b095bb3c495e839b5689ef38c933634b220021576c255d1f4906dfc7f0b9cbdb325165d1deb8f78fae445610e2ae23acf2261b096fd412d4586d3205251eef1a529e8e8de0a3fe309ef75442fc1505026ce07a314040e8a268626f99099e3486d0c469d297386f1b7fab2b971531517fa069b1cd015b6552b033dea1fd26d522957d2b1703bfd333cef856196777c6d886bad0b07d1e2d1106b37912536ae2da28c05b10aa3d5a4c3159986b095e1e26f25bab0ca0d8574a0c7a6802b14ce420ad135a8e7a46360597713e09bd4cec8ef0cd6a0596e6784f9c29d50ec56d463496f35df7c23f53f22f465a04993eb370c26176a6703df2b044fecb42f2642cffa05500ad987d93da06dd72651c69320dc420aba3a2e01f7f5164dcd971f3a07ae8b6c747f99fa948bb1f554096ee7fb44b1d9da5a8b82c38538ee808c261ef2dc24cdc0eebfb729ef277c12d16ab5e3bed864028a36c5256ac5a1d0ee74f01944d3ec5bb6e3d197e701497efe1f92ad979bb329147770acd2de309e1ac9372925b28bafbd45a8152cdf69369ef68d879189156763017c4f715a7dacbf359510faced93f71a7ab8d61db632db1f0bcdc0dcc48bb17b790c2c3e6b875530ad4d1312a6b2e96293b88604829585fa865a55030105a4e0b3560aabe48b94094ec5d6881cd063107707947397d64938950e1c76fda58973dff06214aeabd500cb75802f8355ae7db90b0c702c5c47e1a247a85fe0b674d1ffdb47477e2b153acc57e804bbf169c92b48ac6473e52b0890b12b3ee6d44df20e36e1d794a7107455c592205d22df92cdd90695c2f35f9946ee628792c234931d0cf33e1ec8f39a52200ed198c289872707b8d08b82e6325a9a23947e9621e0bb4c52755fec88a9bdc45d3adf8d015a3f06b9018403fc9fd04064b866222adff72ee9b0000bc888ce733b17971f371a3e5853ee69fad6d0f86bcde0272ee437f32d21388d5f4c695382ae8536a712f9da848bd769f29c96922cfd8cd473c434d7e678c9d32492bcdfb8efd22e180964b3e71503020df30bd0274c9168dd4d713d880e57ecaa39b42b7f36dcd21e5fc6ce2c21cbbd09b5331ef277ebca800676c4f30786d82f0d84c5d2e4bbaa7f20cb080572c88587e862174b58b57ec18850317ae4c63982ba884ec5582e9ceea67d560066282199304f27c697f3a703931d858e1e056115f5b3c6469ec788405aa226f713d0b073d56cdc5ff43f7d4c6579a6a6691dbd6504d9d2d572da5bb68fc18a03f771da90737719621f6ac88535b986775a37e98cc65e8efe02b184751e2642862151813d2273d70a60c157cfc2a37c84f4be0b78e215e29ca90cb5d28d1151619d45d06d4135842cfc59c7d7ee9a04a7b27ee14baf300934782647d0481ec62ab47f6d4d13fb8558c1813894482139bb0cccddcdf15c6cefc43a255fe7c83a13a5c49ab7c93581ec4c0b688389d8d2fa5a8d43124a628032151eaeb3f29a3f5af79a1c6e2993df1ad5f5b15f5fd41112a1674429813727fdd37585d1d20f57d5ee7cac82bb4f965b3c45d42652867db81514d4069a0bb4b7e62b4945714c2891fe4a51a8f7a7fb6e967f7e0d125a8d7e3e820bc5bea1c4b464b020e71590b2431add02d86d958b0e1469bea9b89e086bdb8c18eb04c94cbb926c9da817f3928d1d2d3b49261d4ce175a573c9183b3345c4301a84c77493e5dbf993e156f9ab25658030e1ab3cb006211792e7b3486da3e6fd2269170f09d157dbd6c9222d90bda93672dddb7f807decee5862a03f4dafeab47b25c5e27ec07068907fae76798da976b7b4aef9b82acfeb590997eae56b5c2323e7211cb8207172a28b8d646d5ed1b48fefa8ec8f8dbee98262e8ca80a814dabeaf1af94ccefcf40dcac403cb3615a3754725615e748f3c42c3531f87563ddb26d636d6bedd380e5423ed20a29f73d22e9939b8fe3c93b0bb4fcc463f3018d6696f2dbd7ae01a40fe9ec82a500036c419f427d5fd1d69d371b937571b9719445fbd60ac6ae32ca15865270e9e6e940d25593a3c8fe4af496020b5ad81b0827e3330fcec63dba90de5de6b4f22472674a3fff1a3d498155946095afda0044312e545bb8e1222e08cde22a94d4c6525b18da5e03071194c20610c82a8a4faf592b3393c5a7d832f71636688f2c56f017be19d99acecdf91d83863131b0b7e3ba4efa9da8186612785bde62ae95c82a12c90651cb549b47a395a9115820c1fcdb3003f1fa21107a3f6b9da71bee535277227d2aaa0521e40259855ebfdcec802f37166a3924122aea49c3a8b8d333d2ad882e01bb8c2fae89091ddfcb02b0d2cc6e468fbc9eb134b3264add9fb9136ca354482f30cd850d5dba04e72e94d01778aedab2b5fc3c082ffa553b88f2d0a03b5d71b0d0707a728f0c7aafb94b6b3837813bf64fee4760ff88ee17cee3f800ae4273ad8f84b0a5b270b0838f8bd60d9224a6291b658394846305b9a0d40bae145fb42eb468c269caf2686b0a1b927f14eac926f2cec1385d9c879002535c21820dbe956894be338a34a7ba1bc48899f1bf0b9123d82c8d925b99f6cce4070fe7c869ad68a97b788ba6d712e889d337fc395a7742696918041027cbeb64fa5563090ebbe902ffde5fc696a19cf5edfe992a7fdb52970c286e09546dc657b683ecaa820505bbd111e07be241570e88f504ebfb3fcd3d9347d9b2c1f325a1efcc1bca68d457b47a9fcdda55d686aa3b61188e45f8c4fcbc554e3a664523e1f62b4ecc2130c89c4430cd5c2f8734fd56ad52a9c1df1cbe44fb57d24b4dbae343a85a2015d68632b8826bb384b27bc1e5856644354e853d8654b59b5c3328908e424e31662388ed3c21cc9ff67a2a5945a0f41036bff4629b8c7e5044d955f104d351697e6ceff018b5ce60a8e2681e5b8305b61c8c5ba64f4f12e81b5bac4036f0e430be25cf6d5a8121f2f049d0cd28393d3c24228e8bd883456ed06be7aedb2f50d6985d7e53fd2c1e14d952563b135381979dd462c235f7f4962e25cdf2e604bdb30319a91f9029acf43b861d7d3b70d2e6472b1e6cce496cbfdd91d61f399bbe0b3dc693688536dc82c31a14e79ecbfd1ebb63f6672eba0b6a8170c4d9fcb28495b9f0fee6e0beaed39925210f62266264365a54dbc72f799004e85a67d0033680aa7d6b4c6cce0575b2174f6ab50f17a1afd17537676ca9b591246c8e2e89daaf73651b0a4be40fb9828ac7f9a706f028e41b20c5f66c1b22db7e6dfafa80fd66c1", 0x1000}, {&(0x7f0000000200)="d52a99f9fffe24c1293c548c5fcd5b3651173dbd7776c6601e09c57863ac110b4c01bfc26baae6824ec9148a5ada8b5302dfe527ac911ddb7b3d98f26ad98a50f4a276aed9da867a2def1a19fd8a2b3f8b5bc8a6a4dba5b25c96603f2d056821f6bf2b9b504aea568bd7c3c94427f2", 0x6f}, {&(0x7f0000000000)="8445eb076a258d23383c93b0d599e45aef43d1c61aaa5735d519827f2ffd818947044e", 0x23}, {&(0x7f0000000280)="3d02d40bd9a566f6468e80a20b1e", 0xe}, {&(0x7f00000002c0)="95dc03700b870b0f4fc802415f7dd7934ed1e56a3c1625eca7ab5cd25cf7e35cd4ee31c077bbfe934580c669d8b93f37d0d97f760785efd73446ff8c17cea7f047171fa83862d85f9a1e75fea66cbe24f5d21159adb865f5c3c84ba9107cc3a84d2abf45ddc4746a2dfde546b9e1686a835c6963e5ff13b9bdcbc270f238ace6854efac7e9ea9760019d1cf5943a74c49cfb7baee2c8c874ce7fdba2d7eb627c704dceb25e3b26dd72f3ba882fcc8b4d7f599886ed7b44f45abb70890dfa0136624c7dcc88abb87008a746f7fd8b2046700882ee14944d58", 0xd8}, {&(0x7f00000003c0)="8fbbd4925ecb605d5e223bad1d2ac8c0afb975f6daa70c6d7600d32d29a09e2056a4ea9162be9972df3b689bed3a25aec8d8104cb59549e01d1a2b1834bb4403895eddb9476534191089a9e46f586688eda87b307e1bdf2cf753e0403570c743e17dbf570a34ef95e36c6eb4a9e29fc87051f1ca59", 0x75}], 0x7, 0x1)
mmap(&(0x7f0000003000/0x2000)=nil, 0x2000, 0x4, 0x10, r0, 0x0, 0x0)
mlock(&(0x7f0000001000/0x4000)=nil, 0x4000)


r0 = socket(0x400000000018, 0x3, 0x0)
ioctl$WSMUXIO_REMOVE_DEVICE(0xffffffffffffffff, 0x80085762, &(0x7f0000000040)={0x1})
r1 = socket(0x18, 0x3, 0x3a)
setsockopt(r1, 0x29, 0x6c, &(0x7f0000000040), 0x4)
r2 = dup2(r1, r0)
setsockopt$inet6_MRT6_ADD_MIF(r2, 0x29, 0x66, &(0x7f0000000000)={0x0, 0x0, 0x0, 0x3}, 0xc)
setsockopt$inet6_MRT6_ADD_MFC(r1, 0x29, 0x68, &(0x7f00000000c0), 0x5c)
setsockopt$inet6_MRT6_ADD_MFC(r0, 0x29, 0x65, 0x0, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000140)='.\x00', 0x0, 0x0)
symlinkat(&(0x7f0000000100)='./file0\x00', r0, &(0x7f0000000180)='./file0\x00')
r1 = dup(r0)
utimensat(r1, &(0x7f0000000040)='./file0\x00', 0x0, 0x2)


r0 = socket(0x800000018, 0x2, 0x0)
bind$unix(r0, &(0x7f0000000080)=@abs={0x1f95d27d48731892}, 0x1c)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_osend(0xffffffffffffffff, 0x0, 0x0, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x3a, 0x0, 0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x20007447, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x2000722c, 0x0)


mknod(&(0x7f0000000180)='./bus\x00', 0x6000, 0x514)
open(&(0x7f00000001c0)='./bus\x00', 0x0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2080002002, 0x40004200000028ac)
r0 = open(&(0x7f0000000400)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x8004667c, &(0x7f0000000040))


pipe(&(0x7f0000000040)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_90_getvfsstat(0x0, 0x0, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x29, 0x17, 0x0, 0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x18, &(0x7f0000000040)="eaff125c", 0x4)


fcntl$dupfd(0xffffffffffffffff, 0x0, 0xffffffffffffffff)
r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
__clone(0x0, &(0x7f00000011c0))
__wait450(0x0, &(0x7f0000000140), 0x4, 0x0)


pipe(&(0x7f0000000400)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = posix_spawn(0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
fktrace(r0, 0x1, 0x8, r1)


mkdir(&(0x7f0000000600)='./file0\x00', 0x0)
__mount50(&(0x7f00000000c0)='fdesc\x00', &(0x7f0000000040)='./file0\x00', 0x0, 0x0, 0x0)
unmount(&(0x7f0000000280)='./file0\x00', 0x0)


pipe(&(0x7f00000014c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
getsid(0x0)


swapctl$SWAP_STATS(0x6, &(0x7f0000001c80)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, 0x6)


setuid(0xee01)
setgroups(0x1, &(0x7f00000002c0)=[0x0])


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
fcntl$setstatus(r0, 0x4, 0x0)
fcntl$lock(0xffffffffffffffff, 0x0, 0x0)


symlink(&(0x7f0000000180)='./file0\x00', &(0x7f00000001c0)='./file0\x00')
mkdir(&(0x7f0000000040)='./file0/file0\x00', 0x0)


swapctl$SWAP_NSWAP(0x3)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000340)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
pathconf(&(0x7f0000000040)='./file0\x00', 0x1)


recvmmsg(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0)
mlock(&(0x7f0000ffe000/0x1000)=nil, 0xffffffffdf001fff)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(0x0, &(0x7f0000000180)='./file0\x00', 0x0, &(0x7f00000001c0))
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
unmount(&(0x7f00000000c0)='./file0\x00', 0x0)
__posix_chown(&(0x7f0000000200)='./file0\x00', 0x0, 0xffffffffffffffff)


compat_30_fhstat(0xffffffffffffffff, 0x0)


r0 = socket(0x18, 0x1, 0x0)
getsockopt(r0, 0x6, 0x0, 0x0, 0x0)


syz_usb_connect$printer(0x0, 0x36, &(0x7f0000000440)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x400, 0x5}}, [{{0x9, 0x5, 0x82, 0x2, 0x10, 0x2, 0x0, 0xf7}}]}}}]}}]}}, &(0x7f0000000680)={0x0, 0x0, 0x0, 0x0, 0x2, [{0x2, &(0x7f0000000500)=@string={0x2}}, {0x4, &(0x7f0000000640)=@lang_id={0x4}}]})


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000002c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = open(&(0x7f0000000040)='./file0\x00', 0x70e, 0x0)
r2 = getpid()
ktrace(&(0x7f00000000c0)='./file0\x00', 0x0, 0x40000110, r2)
sendmsg$unix(r0, &(0x7f0000003fc0)={0x0, 0x0, &(0x7f0000003b40)=[{&(0x7f00000003c0)="695da0d554da0a2b763827f40457514427d7522ec71fa5c64e6bc68cf09ea35cd020daf6fe8a1ee878d7247619c77966cd3f0fbf9c22248dde7299b51e324227e67d820854bd", 0x46}, {&(0x7f00000016c0)="c7552733409327d362e201b227ec367137710cd87e330ac3322f51195134b3a1e0e1148cf9902cfad9627583f7025c4fd6ce5773a3acd39a2ba8391492f1c8eb24fe9ac2dd092deb98f1f154fcdbc0a07c0b06cff1c8d15848760e48b10822009a9603e1b6f7cd0bd7bb9c08dc3ffab5eb4c65f9dc785e45338c32a7163b527387860118ddbbd5aedd2df19bda6c3b09efe33cae13961baa", 0x98}, {&(0x7f0000001780)="e7dbda059783da2bc2cea28b27fa53ca09acc06fac0457b70c746e4d3079a1bd9ba81f358ce2abc143c08059c3de2f4dfa0d1be170319e11bf1d7e31791a2666702dfe78e9093130e328165d5615afffcf8f6e90a71ab380f2282e47917ac1ac8b1510c3e65fce523023c911bea94a4e4a8b4df7a9a195688b28931939d264962eab0e4b647b2bc006132a0e295ce835e04d7ba6386b1e38ba9c", 0x9a}, {&(0x7f0000000300)="df3144a26c6f15678c539226956176374fd692fcbdef2de9cf928265e2f9a8695e1dc100c1ca500e56a5d2ea32487c71098de0d060c5073b006380ed01001b04e45904791600f56b097ab4621ccc", 0x4e}, {&(0x7f00000018c0)="3198603925b6512619275e428ec2db070749aa0d73aa697e46cac599979c6164f793560d6dfc76a880b99aa81875d7d80e63ac61e7d79a49a8c91d8ed422b732b3c0c7ed425f209c507205d514fb692436161634e69234b4cf68dcc7b1e398b65e989c0dc6c0bead6e97882a8499323af4b188c2c3976a6a3d271eefc38fb3e8eecdfd7ca0c0dc6cbbf08b012bb5d4715cb62b071bae8370fa2f0452e6dffe35f47e5de137ff3403ebd829b8faebc2fa2aacaf47498e499dcacfb5282c66c81d78f899ca9fafa98e60b575284888636ec10d0408956e60f492", 0xd9}, {&(0x7f00000001c0)="fc4bdd1c00fda2144dc3e13b507f345fb63f7fcd5863ae27d469b40e9a8e29261dd62e06423dcc64771115920ccccf7d0213d9ddfe99f512a65371c98af97aada46709167f237f629e6c8b40cafd07a1dfadef2aa136891d8661a173db5b19e2dfbc064a74d5edb446cc4f2fb12a4b96eaae79c4926ff786f60f29af8ef23ca2a4039b56f63b46fe8db5bcefd56fbbbaf141f7b534bfdbcabca6d15da3076263f0541a3a2e0cdee83845c62db867c6d65c49058418c32d1c576bead1ab9559206b", 0xc1}, {&(0x7f0000000000)="3e0ee6d516f27159b44c5a66e75ceb44c5e1c610df0322764e8fdc0e31ded4602e7b94659ad54410fb85e18a7a01786d585b5c731a0473b7b4c0c6198e0255c842c2c7321a0b56f497aed33c39c40fa9986f20efade79849c784556b1cdaceeae17a555bddfdfd238cb9e4eb8e0b2e4a4e066372090000007128406a522b1ca808caa819608b766a9394f2e511a5e5449815f49ee8bf0862aafefd8146097450cede127d683be943a5c4424ad756dbb547e853130a2ec5adabf058baa46bc8a8c880e9d390c2f85dd080fe6b2da90014cd9d238551d6e487853b21031f95a771dff3350c95cca4f08731ab40639e7f0ab0aaa8c7f34c2a8e31bae1593f264c09ab95b23784b748e07662f9e309b457930000000000", 0x115}, {&(0x7f0000001b40)="ce0c2459a3c3be6289bf8b9ddcc9baec7c0f63a4411b7798f6dcb865c865eb60337b8aa063534332c29b9bb9715d5090e2afc5789051bb9e05c7ca733743209fcdc18d4baa2c8da2045e55200dae6e5c598f80ecab2beddb7deb63719acdee1ff8944aa3d890e7ba2cac28791cf79e920e9d131c901f6e764aef36694c3d99e2d796837a63dd9426a0a38dc4962ec23235feb3533446c12a65f364ff435573931a939f214294c6967ebb02b34d54da03edcd537e13d8e8bf1eb7659432d89a78e818d5f816c77e1d81ea36aa3c8a21d1cfcb08a044f1912e17da6e907f0001f5e677d7589c40e4f2119fe6621cb06edf39d2c53ed3070e5ee0d5cbea11170f7850e5667f01cbd9f520e38830d886c7a17ec31fcbad148c495b046d2f04e5e0390ffb720211cc5b4295fe7e9813b78f8c96bc98100204c61e8edc7e7b7577a8c566d0630c45e0816cc8515db521b108b30fc547835ad4dc1c954001fa24a9046598a08d0da1681bc24a39a554b2918223dfd45e064aa9ee8bc28f1018ce3632cd9fec854c62574c27de11c692b352c1b1a504b2dae64eaf3fbbfac3f8d3d3eb00401bd4ca45a59f7577c05a4b5b2e9a3874e1a37e24640a15265bc9339222b4097e8232ee5458e8e344ff41df96ab45f2d3448ceffd8a7f33dd4704c5ac9152649497fe22ac9ae528dcea16a8548eaa9a650467853f348aab77670b023deb9a4f5a8e2da23bd02b9a17c5c15af540625d7e93d30297562e4b33315573a201c732f4a9e0c8d378bda6aae77ad8c399d8c54bb5e0b69a26c0fa2a5506e0", 0x23c}], 0x8, &(0x7f0000000580)=[@cred={0x20, 0xffff, 0x0, 0x0, 0xffffffffffffffff}, @cred={0x20, 0xffff, 0x0, 0x0, 0x0, 0xffffffffffffffff}, @rights={0x28, 0xffff, 0x1, [0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff]}, @rights={0x30, 0xffff, 0x1, [0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff]}, @rights={0x28, 0xffff, 0x1, [0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff]}, @rights={0x28, 0xffff, 0x1, [0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff]}, @cred={0x20, 0xffff, 0x0, 0x0, 0xffffffffffffffff, 0xffffffffffffffff}, @cred={0x20, 0xffff, 0x0, 0xffffffffffffffff, 0xffffffffffffffff}, @rights={0x28, 0xffff, 0x1, [0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff]}], 0x150}, 0x0)
dup2(r1, r0)
execve(0x0, 0x0, 0x0)


setreuid(0xee00, 0x0)
r0 = getuid()
lchown(&(0x7f0000000040)='.\x00', r0, 0x0)
utimensat(0xffffffffffffff9c, &(0x7f00000000c0)='.\x00', 0x0, 0x0)


mknod(&(0x7f0000000040)='./file2\x00', 0x2000, 0x0)
r0 = open(&(0x7f0000000080)='./file2\x00', 0x0, 0x0)
fcntl$getown(r0, 0x5)


r0 = socket$inet(0x2, 0x3, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x12, &(0x7f0000000040)='\x00', 0x4)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x2000427e, 0x0)
ioctl$FIOASYNC(r0, 0x4004667f, 0x0)


r0 = socket$unix(0x1, 0x2, 0x0)
bind$unix(r0, &(0x7f00000000c0)=@file={0x1, './file0\x00'}, 0x6e)
sendmsg$unix(r0, &(0x7f00000015c0)={&(0x7f0000000000)=@file={0x1, './file0\x00'}, 0x6e, &(0x7f0000001540)=[{0x0}, {&(0x7f0000000180)='o', 0x1}], 0x2}, 0x0)


ptrace(0x0, 0xffffffffffffffff, 0x0, 0x0)
ptrace(0x0, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffff9c, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x8008722b, &(0x7f0000000100))


r0 = socket$inet(0x2, 0x1, 0x0)
getsockopt$inet_opts(r0, 0x6, 0x5, 0x0, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
compat_50_futimes(r0, &(0x7f0000000180))


compat_50_select(0x0, 0x0, 0x0, 0x0, &(0x7f00000000c0)={0x0, 0x80000000})


open$dir(&(0x7f0000000000)='./file0/../file0\x00', 0x200, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
socketpair(0x2, 0x3, 0x0, 0x0)


r0 = msgget(0x1, 0x2)
msgrcv(r0, &(0x7f0000000000)={0x0, ""/138}, 0x92, 0x0, 0x1800)
msgsnd(r0, &(0x7f00000000c0)={0x1, "780cd21aab2265c217d41349dbbbd88e70d5ec548d723d7c5e155ac54f97aefc49f65f3dd8349908d89df778078931e27e437ca63f474f561c00a8bca5d32d42dd0a2458e8123000a089bd46ba21f2696be97879c2a14f2876015141c0d8b4a6cc563dcfbd23a86a2fe3c395ad05220ec13e45e46dfc8238f3b983bf6af46e8b2d3dea56f4e3bed4a6ff5b622d6dbccced3952804516e045cee859ce87a9917ef6112e6b0409e7485fc30a5572277b93d3bb4a872aa72ebcc81a876a3b1954e1197cebdb1913cfd838e81034eb88331a78eb2768201d25bfd83d90a612186fb9998d99b318922ff27c7768"}, 0xf3, 0x800)
r1 = msgget$private(0x0, 0x0)
msgctl$IPC_RMID(r1, 0x0)
r2 = msgget(0x1, 0x0)
msgrcv(r2, &(0x7f00000001c0)={0x0, ""/7}, 0xf, 0x3, 0x800)
r3 = msgget$private(0x0, 0xcd)
msgrcv(r3, &(0x7f0000000200)={0x0, ""/25}, 0x21, 0x3, 0x0)
msgsnd(r3, &(0x7f0000000280)={0x7c373a2471cf9f22, "011cb8aa19fd106fa024d2f26f477b656f7124481d1d01d3312725eaad751f6724e2f662bb5eb4b0a4a9035d28c459e368db4b267936f9aef721943b752c1a1f385162e71d1fc6fb81ae51248e38412b680e0a183f60b375801b43f91e0d"}, 0x66, 0x800)
msgsnd(r3, &(0x7f00000005c0)={0x2, "9ce77f4ccfff1440ccb2183f53c54e093ed9999b2d6b0a6d6fdec6dc874f98746573675b760c75b92e0a5c5fed1328b161e405aff0f67e4f"}, 0x40, 0x800)


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000001c0)='./bus\x00', 0x10004, 0x0)
ioctl$FIOSETOWN(r0, 0x40047477, 0x0)


open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
r0 = open(&(0x7f0000000200)='./file0\x00', 0x0, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x6)
preadv(r0, &(0x7f0000000580)=[{&(0x7f0000000380)=""/57, 0x39}], 0x1, 0x0)
truncate(&(0x7f0000000100)='./file0\x00', 0x0, 0x5)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000000)='.\x00', 0x0, 0x0)
r1 = fcntl$dupfd(r0, 0x0, r0)
r2 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r2, 0x0, 0x0)
symlinkat(&(0x7f0000000080)='/\x00', r1, &(0x7f00000000c0)='./file0\x00')
faccessat(r1, &(0x7f0000000280)='./file0\x00', 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
recvmmsg(r0, &(0x7f0000000500)={0x0}, 0x10, 0x0, 0x0)
recvmmsg(r0, 0x0, 0x0, 0x0, 0x0)


r0 = socket(0x18, 0x400000002, 0x0)
r1 = socket(0x18, 0x2, 0x0)
r2 = dup2(r0, r1)
ioctl$WSMOUSEIO_GETID(r2, 0xc1085726, &(0x7f0000000240)={0x0, 0x100, "7293ede01b740171ecd4227462a4b7058f49b77e11c606681a94e30df18dc91e0a8ac510ecf7496fc018501906d1c1d597ad01a2e28a034cc31f16f2cdb0888258302a7561838e36554ca23ef4ded459857b5c39f86a9e75e2351bff3972700844b092eecb4466ba842c3b845b8192afd702de6f504385244721823c1477ef2c65272815c64ab85daf1c8f5b8d3e0901806695afb64a4a8502c4d78578061ede1aedf026cd50c121dd781c53a8ae17a9fdd8d0fde8722dca16b2a403ddc76f06e47bd2b35864601301c515ac9ad3e6ed3ac9d2f775f92170c85a9133d33892c8a220194fcaa7e71d518622d89a3a16919dc069c33636741d33b57003b3066e8f"})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
__getfh30(&(0x7f0000000200)='./bus\x00', &(0x7f0000000100)=""/189, &(0x7f00000001c0)=0x400000000)


r0 = socket$inet6(0x18, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
pipe(&(0x7f0000001200)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getsockopt(r0, 0x29, 0x2c, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
fcntl$setstatus(r0, 0x4, 0x40)


syz_usb_connect$uac1(0x0, 0x71, &(0x7f0000000ac0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x10, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, 0x0)


getsid(0xffffffffffffffff)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x0, 0x10, r0, 0x0, 0x0)
compat_20_getfsstat(&(0x7f0000000080), 0xffffffffffffff38, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
modctl$MODCTL_STAT(0x4, &(0x7f0000001080)={&(0x7f0000000080)=""/4096, 0x1000})


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x29, 0x1d, 0x0, 0x0)


clock_nanosleep(0x0, 0x1, &(0x7f0000000040), &(0x7f0000000080))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_fstat43(0xffffffffffffffff, 0x0)


compat_50_getrusage(0x1, 0x0)


getgroups(0x4, &(0x7f0000000040)=[0x0, 0x0, 0x0, <r0=>0xffffffffffffffff])
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r1 = socket(0x18, 0x1, 0x0)
setsockopt(r1, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
setsockopt(r1, 0x1000000029, 0x2e, &(0x7f0000000040)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
r2 = socket$unix(0x1, 0x5, 0x0)
r3 = socket$inet(0x2, 0x2, 0x0)
dup2(r3, r2)
getsockopt$SO_PEERCRED(0xffffffffffffffff, 0xffff, 0x1022, &(0x7f0000001100)={<r4=>0x0}, 0xc)
r5 = semget$private(0x0, 0x6, 0x89)
semctl$GETNCNT(r5, 0x53292f3ac3f01e93, 0x3, &(0x7f0000001280)=""/128)
semop(r5, &(0x7f0000001240)=[{0x3, 0x8, 0x1000}, {0x4, 0x9, 0x800}, {0x4, 0x3, 0x1800}, {0x2, 0x0, 0x1000}], 0x4)
semctl$GETALL(r5, 0x0, 0x6, &(0x7f0000000100)=""/4096)
semctl$IPC_SET(r5, 0x0, 0x1, &(0x7f0000001140)={{0x85, 0xffffffffffffffff, r0, 0x0, r0, 0x10, 0x40c}, 0x5, 0x1, 0x20000000000008})
getegid()
socket$unix(0x1, 0x2, 0x0)
accept$inet6(r1, &(0x7f00000000c0), &(0x7f0000001200)=0xc)
setsockopt$sock_cred(r2, 0xffff, 0x1022, &(0x7f00000011c0)={r4}, 0xc)
setgroups(0x1, &(0x7f0000000080)=[0x0])


execve(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
bind$unix(0xffffffffffffffff, 0x0, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f00000006c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x2, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x6df}}, {{{0x9, 0x5, 0x81, 0x3, 0x3ff}}}}}]}}]}}, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f00000010c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x3f, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x3, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x123}}}}]}}]}}, 0x0)
socket$inet6(0x18, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000001600)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000200)=ANY=[@ANYBLOB="10000000ffff000001"], 0x10}, 0x0)
sendmsg$unix(0xffffffffffffffff, 0x0, 0x0)
recvmsg(r1, &(0x7f0000000ac0)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000900)=""/110, 0x6e}, 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
compat_30_getfh(&(0x7f0000000080)='./file0\x00', &(0x7f0000001140))


r0 = socket$inet6(0x18, 0x30000003, 0x0)
recvmmsg(r0, 0x0, 0x0, 0x0, &(0x7f0000000000)={0x0, 0x3b9ac9ff})


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e57f7f000001"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
pipe2(0x0, 0x0)
pipe(0x0)
setsockopt$sock_int(0xffffffffffffffff, 0xffff, 0x0, 0x0, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
sendto$inet(r2, &(0x7f0000000100)="39f7", 0x2, 0x401, 0x0, 0x0)
close(r2)
r3 = accept$unix(r1, 0x0, 0x0)
setsockopt$sock_int(r3, 0xffff, 0x100, &(0x7f0000000040)=0x5, 0x4)
recvfrom$unix(r2, 0x0, 0x0, 0x23, 0x0, 0x0)


r0 = socket(0x11, 0x3, 0x0)
write(r0, &(0x7f0000000080)="c6814203", 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_osethostname(&(0x7f0000002100)="9f", 0x1)


mprotect(&(0x7f0000ff9000/0x4000)=nil, 0x4000, 0x0)
open$dir(&(0x7f0000000140)='./file1\x00', 0x0, 0x0)
minherit(&(0x7f0000ff9000/0x1000)=nil, 0x1000, 0x0)


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000001c0)='./bus\x00', 0x10004, 0x0)
ioctl$OFIOGETBMAP(r0, 0x40287446, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
fcntl$setown(r0, 0x6, 0x0)


setrlimit(0x8, &(0x7f00000000c0))
socket$unix(0x1, 0x1, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x31, 0x0, 0x0)
getsockopt(r0, 0x29, 0x23, 0x0, 0x0)


r0 = socket$unix(0x1, 0x1, 0x0)
r1 = socket$unix(0x1, 0x1, 0x0)
bind$unix(r1, &(0x7f0000003000)=@file={0x1, '\xe9\x1fq\x89Y\x1e\x923aK\x00'}, 0xc)
listen(r1, 0x0)
connect$unix(r0, &(0x7f0000000280)=@file={0x1, '\xe9\x1fq\x89Y\x1e\x923aK\x00'}, 0x6e)
paccept(r1, 0x0, 0x0, 0x0)


mlock(&(0x7f00006fa000/0x800000)=nil, 0x800000)
mprotect(&(0x7f0000400000/0xc00000)=nil, 0xc00000, 0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
sendmsg(r0, &(0x7f0000001a00)={&(0x7f00000001c0), 0xc, &(0x7f0000000340)=[{0x0}, {0x0}, {0x0}], 0x3, &(0x7f0000000380)=[{0x98, 0x0, 0x0, "ebcc5a9dd7753946c74c53721581b2cd3d6076562a7f77de5f119c34fcaa40ae0d17fe12049151a9275390521696e247f8a6153db77a546ed9ff1a447957297536af4416ec69f23169a8c8476761ccb32fda6c2070c416c02739427225a629717b25585f2aa40fee86b2973d6d9b515ff2fa4d83979186d66f49e6218ee96e2a61"}, {0x770, 0x0, 0x0, "97d60a073af6ec19c3bd4f2f5a635a29186d356d6691a9622cee577e4ef33795dfabbb40eb90c35318d004dad8541d49ace6fcb448bd544e83ef3e143571527a5706400b22a561ac9352a5d4f524a6baf27b1dacb8a13a41b0b4e8b7c550fc15236783e36634ea5eb4415a98c844c2375f0cfadbcac097d469862d7d814692a75ae644bf940b77daf67984ccf701feceba1d8fb8680959e59d4e805ad0aa0299963dec510c5d0ee03b40824e145648c17294ad7e24e8dcc5e4446c9dcad6624849353fc495363d3fa86d3a96a525839057dd81ea429cacd0c40a99b033c644bd479ccaefecb11a205a3c210b0f7715b49ef1cbe78fec7ebcd4f4ec1dfa4d4d20a48081e6fe4feae43dd35987dd45583f8d4388055aeaaae11df6d6a35ca2539b6865a09158e0fcf4374e8309528e03d282b32fcac34ac3b3c50d6d80e56159baa94d7e5a48196677f2b665d56ada41ab54692d3483e15f44c243e42c17967c6d1047b218376329ef3236a4d69899cf5234fb7e2dd3ad7da2af6db88dd54ce73d7823a6d99ecec29fa436c7401b6151ae361c83c09d5c5884033ccc599078ff8c6456eb7589183ff054e9281a671ebd0f7bb2f7755699d0c4cccc046d24e10fb331639e0ffbaaeb749240cd586924733abd837c0610e9a952c05b3b77f61b7cad0fcfbbe0fcded1a97b05b8a6c732d9484f9945f2437c0431b63d831ab124b4cc2ff25fa5e7a931f813fe6923cc3f633a8996b2ea8fc7965245098789624a66c903a304af3e9c5ccf87476851c9b4017d3f78542644f01e698f89ad76039f119a64098a9e13aa6b529c3338cfe6cfa1428b2fab89d3eff963a4507308b53f9dd8815f70682b6ffb220211723f8f71c095db45b419aae0c555dd89b5f2c9cf2eaefeef3b69bd97019573b44790e2f395932374d8ee97f21c468fe34bfdb16ddca9b47628a566ff08ebd7a910c27e3733f1088103a417c35af0a53ca0ed6b6434e076ff955da941f885052665ccb1e7186ec02c3f69cf927f54fac0be4f9efcccce210142101288c3a0cc45273379f5ea36cb9f8823ea55c36ef7fb5f8951cacb6f853ceb524f515c3f71388b9870fae62761dc71848dc46f153c809b34f92f87ac70c95a4821cc063fbf7fa02cafe5dabda6848e464beb6d21da21f89c170d7249190a6740d85f1079a6b194bced1854b08c2ff01a8b916640468951321c33ef45c30c16aad8bed33637fdd8514e0e7f2a1b0a64ca52d5511918d8b5cfe1e20c28fb7bd70df9a6d4fda461920780cca2b0622eb9feb4814ac7a3e0342cf23cad3fe1a2ce2108fe690ffdf5b9b5d8f3193113e2aaab87c543cb2fbcd52858fba54338ba9a3a0741b38c27491238863ab768dbf52d690b27e3a7f2beb43a18982491926606dd379132238bc200d9b0f0aa02eba4c1e06d6a0cc7dc999fb8ab78824e8cb615113c2a4618571f16c7a70ed947227db141ec2016290c75a5b07980ad178b00eb6c2e793801560ad804586d6a452fecd06b31ca1fc79afbf796482b93ea8d62658a241c94e92693c8bfcff156c1e25ffcaabddc90c6b629cb46e1a2e583a3ecd4d7c3bd89793fafac1d72806e79dc75f7a5716e101840be873a9bb4d782ce1fb9a367afa5d86080297e83e3604ca14fa21cfe1810648751d796a7926706b9edcccca820d3e7fbdbb01bfb937e396e8c2ba11001620e6071fd6ef575cc9a90834ad61b82e746dca1fb659919908089edd4441bcd969116dc3bdc08b5e9b557175fea000dbc565b394e972eae2c48056b559dd054d8c107d6c0b2478c9a5e78134a28df7656e370ed5b1defdf6a8ad0905b2cd5961aeb55f80d812bdcee5046b8da5b83e4a51c507142579792ee6d43ca69bbfeadce33c114597701253b8b4772fde2f3f56aeb0dc12e708b32615e8915864fdfe88b8ce4861c19899a60dbfdcdb14cfc1a1c02ff41c13c40c7ca47601470629c32d2c6fadae3a54df92dde4237bd187278add098e0eb362076727d075c66058ed5d17ffa581d418cf62c7a5608907a7b0f91b7151b0ec4affc42f335aa07585e0f611a704c0ecc4febd9414685360360e3389a0b38c24c7d74a0e8a4f54e9dab6c9b001f9170dd48a6428bfc61fbcdebf0ca3af2a8e4bba8aa664be31116136c4bd46037906641d3dced0dead38c5d9c3a63d5b6c08c98a560bead4123bfd1ed234185a04615b4189523a3456bcb626d88234134683094bce244ca457321e682668c98da04ac5befbc2381d449a8a14d30d159f6c2ed0a68ab28272f09526a4c00bc4784ac79dae90d81ee159f26c2792600e3acfb4ec42763e28358af0f9d6c13432978a5f23bbceedf55474c809fdbd53429d1d2df3c13b73f5dcb12a2efe3914047cb90773361b360ffe06ce68a2222bef812c218ec32e2b80ed952b34e570cd548e0e062f067957fe5df0a82ca15e889a0c91c69804dbd64b5b73d2aa87270fc785893735e0154f76e085e98b54d5145fb81480ff4c0a8f2e3db9e7df9e6fc54518cc93f8c4e10b12038b0debe344032cf3ce7ac62d551155c9a15759d5efe48c446b42f820e71fb4e318bf9da53d5691d279babafcda915ad9019b0e62f0419391e6f443949a5989ce1a45e22f33fbde8d2299cdb0a1c0529ec29e778f525f2d930"}], 0x808}, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbd63)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, 0xffffffffffffffff, 0x0, 0x0)
fcntl$dupfd(0xffffffffffffffff, 0x0, 0xffffffffffffffff)
pipe(&(0x7f0000000240)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x529478f58bc034ca, r2)
r3 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
dup2(r0, r3)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000c61000/0x2000)=nil, 0x2000, 0x0, 0x10, r0, 0x0, 0x0)
madvise(&(0x7f0000400000/0xc00000)=nil, 0xc00000, 0x6)


shmget(0x1, 0xd000, 0x0, &(0x7f0000ff1000/0xd000)=nil)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_90_statvfs1(&(0x7f0000000040)='./file0\x00', &(0x7f0000000440), 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e0b)
r0 = open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0xc0187220, &(0x7f00000000c0)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @pci})


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="70023d020b"], 0x1)
r0 = socket(0x2, 0x2, 0x0)
r1 = socket$inet(0x2, 0x4000000000000001, 0x0)
r2 = dup2(r1, r0)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
writev(r2, &(0x7f00000002c0)=[{0x0}], 0x1)


r0 = compat_30_socket(0x22, 0x10000003, 0x0)
getsockopt(r0, 0x0, 0x0, 0x0, 0x0)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
r0 = openat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = socket(0x10, 0x2, 0x0)
connect$inet(r1, &(0x7f0000000000)={0x10, 0x0}, 0x10)
dup2(r0, r1)


r0 = __clone(0x0, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)
writev(0xffffffffffffffff, &(0x7f0000000700)=[{0x0}], 0x1)
execve(0x0, 0x0, 0x0)
compat_50_wait4(0xffffffffffffffff, 0x0, 0x0, 0x0)
ptrace(0x12, r0, &(0x7f0000000080), 0x8)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__setitimer50(0x0, &(0x7f0000000000), 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x0, r3)
ioctl$FIOASYNC(r0, 0xc0405267, &(0x7f0000000000))


r0 = msgget$private(0x0, 0x3b0)
msgsnd(r0, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__msync13(&(0x7f00001fe000/0x2000)=nil, 0x2, 0x2)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x80104267, &(0x7f0000000040))
r1 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
dup3(r1, r0, 0x0)


__fhopen40(&(0x7f0000000000)="a5b6f3cbf9565e", 0x7, 0x0)
compat_50_____semctl13$IPC_SET(0x0, 0x0, 0x1, &(0x7f0000000000))


clock_nanosleep(0x0, 0x0, &(0x7f0000000000), &(0x7f0000000040))


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x21, r0, &(0x7f0000000080), 0x3)


openat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x10260, 0x0)
link(&(0x7f00000000c0)='./file0\x00', &(0x7f0000000100)='.\x00')


r0 = compat_30_socket(0x1f, 0x5, 0x0)
ioctl$FIOSEEKHOLE(r0, 0xc0086662, &(0x7f0000000000))


r0 = socket$inet6(0x22, 0x3, 0x0)
setsockopt$sock_cred(r0, 0x22, 0x11, 0x0, 0x0)
pipe(&(0x7f00000001c0)={<r1=>0xffffffffffffffff})
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580), 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r2, 0x0, 0x0)
connect$inet6(r2, &(0x7f0000000000)={0x18, 0x2, 0x8001, 0x6}, 0xc)
r3 = getpid()
fktrace(r1, 0x0, 0x4, r3)
r4 = semget$private(0x0, 0x3, 0x200)
compat_50_____semctl13$GETALL(r4, 0x0, 0x6, &(0x7f0000000180)=@buf=&(0x7f0000000140)={{0x0, 0x0, 0x0, 0x48, 0x80, 0x8001, 0xffffffffffff93b6}, 0x5, 0x0, 0x100, &(0x7f0000000100)={0x9, 0x9, 0x9, 0x5}})


mlock(&(0x7f0000c00000/0x400000)=nil, 0x400000)
mlock(&(0x7f0000ffd000/0x3000)=nil, 0x3000)


ptrace(0x4, 0xffffffffffffffff, &(0x7f0000000100), 0x1aa564e5)
__msync13(&(0x7f0000ffa000/0x4000)=nil, 0x1, 0x0)
__msync13(&(0x7f0000ffc000/0x3000)=nil, 0x0, 0x0)
shmget$private(0x0, 0x2000, 0x4, &(0x7f0000ff7000/0x2000)=nil)
r0 = __clone(0x0, &(0x7f00000011c0))
__wait450(r0, &(0x7f0000000140), 0x4, 0x0)
shmget$private(0x0, 0x1000, 0x0, &(0x7f0000ff8000/0x1000)=nil)
getgroups(0x1, &(0x7f0000000340)=[0x0])


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
readlinkat(0xffffffffffffffff, 0x0, 0x0, 0x0)
mkdirat(r0, &(0x7f0000000000)='./file1\x00', 0x0)
r1 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
r2 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
renameat(r1, &(0x7f00000002c0)='./file1\x00', r2, &(0x7f0000000300)='./file0\x00')


_ksem_init(0x0, &(0x7f0000000540)=<r0=>0x0)
_ksem_getvalue(r0, &(0x7f0000000680))


__fhstat50(0x0, 0x0, 0x0)


munmap(&(0x7f0000ffc000/0x4000)=nil, 0x4000)
r0 = shmget$private(0x0, 0x4000, 0x0, &(0x7f0000ffc000/0x4000)=nil)
shmat(r0, &(0x7f0000ffc000/0x4000)=nil, 0x6000)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x0)
open$dir(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
poll(&(0x7f0000000140)=[{}], 0x4e8, 0x0)


mprotect(&(0x7f0000ffb000/0x4000)=nil, 0x100000000004000, 0x0)


r0 = compat_30_socket(0x12, 0x2, 0x0)
sendmsg$unix(r0, &(0x7f0000001480)={&(0x7f0000000000)=@abs={0x0, 0x0, 0x3}, 0x8, 0x0}, 0x0)


r0 = open(&(0x7f0000000180)='./file0\x00', 0x60e, 0x0)
pwrite(r0, &(0x7f0000000100)="c8a84e", 0xff96, 0x0)
mmap(&(0x7f0000000000/0x13000)=nil, 0x13000, 0x5, 0x10, r0, 0x0, 0x0)
utimensat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', &(0x7f0000000000), 0x0)


_ksem_open(&(0x7f0000000180)="2ff4", 0xa00, 0x0, 0x0, &(0x7f0000000040))


pipe2(&(0x7f0000000080)={<r0=>0xffffffffffffffff}, 0x0)
r1 = getpgid(0x0)
r2 = socket$unix(0x1, 0x2, 0x0)
fcntl$setown(r2, 0x6, r1)
dup2(r0, r2)


r0 = socket$inet(0x2, 0x2, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f0000000100)="5ecd5cd31f8960929e3ce6fccbbf9e419b374af6cc41ef513ac2b550a29341f05db1461a6b0d1e2f7754d18a45716e7ea4af7fdae4987674f150f9566e90573531a61b9a1a5bbdfb8ec6f13f76438d4753fd2ad68913c8a71a9ec474d2ce9671986db39a66c7b8d654c4f6febea0332c25df698235d431b14f0939ca", 0x7c)


symlink(&(0x7f0000000000)='./file0\x00', &(0x7f0000000040)='./file0\x00')
undelete(&(0x7f0000000280)='./file0\x00')


mkdir(&(0x7f00000001c0)='./file0\x00', 0x0)
chmod(&(0x7f0000000180)='./file0\x00', 0x23f)
chdir(&(0x7f00000001c0)='./file0\x00')
setuid(0xee01)
open$dir(&(0x7f0000000080)='./file0\x00', 0x400000002c2, 0x0)
__posix_rename(&(0x7f0000000000)='./file0\x00', &(0x7f0000000040)='./file0\x00')


r0 = open(&(0x7f0000000040)='./file0\x00', 0x70e, 0x0)
pwritev(r0, &(0x7f0000000240)=[{&(0x7f0000000140)="ba", 0x1}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x5, 0x10, r0, 0x0, 0x0)
compat_43_osetrlimit(0x0, &(0x7f0000000280))


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendto$unix(r0, 0x0, 0x0, 0x0, 0x0, 0x0)
recvmmsg(r1, &(0x7f0000000500)={0x0}, 0x10, 0x2, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x80104301, &(0x7f0000000100)=0x620000)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0xc010636c, &(0x7f0000000080))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
modctl$MODCTL_STAT(0x4, &(0x7f0000001080)={&(0x7f0000000080)=""/4096, 0x1000})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setreuid(0x0, 0xffffffffffffffff)


____semctl50$SETALL(0x0, 0x0, 0x9, &(0x7f0000000080)=@buf=0x0)
semctl$IPC_STAT(0x0, 0x0, 0x2, &(0x7f0000000240)=""/109)


pipe2(&(0x7f0000000280)={<r0=>0xffffffffffffffff}, 0x0)
pipe2(&(0x7f0000000100)={<r1=>0xffffffffffffffff}, 0x0)
pipe2(&(0x7f0000000280)={<r2=>0xffffffffffffffff}, 0x0)
pipe2(&(0x7f0000000280)={<r3=>0xffffffffffffffff}, 0x0)
poll(&(0x7f0000000080)=[{0xffffffffffffff9c}, {}, {r0, 0x4}, {r1, 0x4}, {r2}, {r3, 0x40}], 0x6, 0x0)


r0 = socket$unix(0x1, 0x2, 0x0)
setsockopt$sock_timeval(r0, 0xffff, 0x100b, &(0x7f0000000200)={0x40}, 0x10)


r0 = socket$inet(0x2, 0x1, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setsockopt$inet_opts(r0, 0x6, 0x7, &(0x7f0000000040), 0x4)
getpid()
fktrace(0xffffffffffffffff, 0x0, 0x4, 0x0)
r2 = socket(0x18, 0x1, 0x0)
setsockopt(r2, 0x29, 0x32, 0x0, 0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x200000000000c, &(0x7f0000000240)="eaff125c00000000", 0x8)
setsockopt$inet_opts(r0, 0x0, 0x200000000000d, &(0x7f0000000040)="eaff125c00000001", 0x8)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x80047465, &(0x7f0000000040))


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x80087467, &(0x7f0000000040))


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
recvmmsg(r0, 0x0, 0x0, 0x0, 0x0)


compat_50_____semctl13$GETALL(0xffffffffffffffff, 0x0, 0x6, &(0x7f0000000180)=@buf=&(0x7f0000000140)={{0x0, 0x0, 0x0, 0x0, 0x8}, 0x0, 0x0, 0x0, 0x0})
mkdir(&(0x7f0000000240)='./file0\x00', 0x0)
compat_40_mount(&(0x7f00000000c0)='puffs\x00', &(0x7f0000000100)='./file0/../file0\x00', 0x0, &(0x7f0000000140)="1e")


open$dir(&(0x7f0000002440)='./file0\x00', 0x200, 0x0)
unlinkat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x8)


r0 = socket(0x400000000018, 0x3, 0x0)
fcntl$lock(r0, 0xe, 0x0)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x14, &(0x7f0000000000)="ca640000", 0x4)


mkdir(&(0x7f0000000480)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', 0x0)
__mount50(&(0x7f00000000c0)='kernfs\x00', &(0x7f0000000040)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', 0x0, 0x0, 0x0)
pathconf(&(0x7f0000000100)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', 0x1)


socketpair(0x1, 0x3, 0x3f, &(0x7f0000000000))


posix_spawn(0x0, &(0x7f00000015c0)=':\x00', 0x0, 0x0, 0x0, 0x0)


compat_50_____semctl13$GETALL(0xffffffffffffffff, 0x0, 0x6, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e57f7f000001"], 0x1)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000), 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
getsockopt$sock_int(0xffffffffffffffff, 0xffff, 0x0, 0x0, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x3, 0x1010, 0xffffffffffffffff, 0x0, 0x0)
r3 = socket(0x11, 0x800000003, 0x0)
socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000180))
getsockopt$sock_int(0xffffffffffffff9c, 0xffff, 0x1002, 0x0, 0x0)
getsockname(r3, &(0x7f0000000040), &(0x7f00000000c0)=0x8)
sendto$unix(r2, &(0x7f0000000180)="858e", 0x2, 0x5, 0x0, 0x0)
r4 = accept$unix(r1, 0x0, &(0x7f0000000080))
recvmsg(r4, &(0x7f0000000600)={&(0x7f0000000100), 0xc, &(0x7f0000000500)=[{&(0x7f0000000280)=""/225, 0xe1}, {&(0x7f0000000380)=""/118, 0x76}, {&(0x7f0000000140)=""/61, 0x3d}, {&(0x7f00000001c0)}, {&(0x7f0000000400)=""/184, 0xb8}, {&(0x7f00000004c0)}], 0x6, &(0x7f0000000580)=""/101, 0x65}, 0x41)


open$dir(&(0x7f0000000100)='./file0\x00', 0x24a, 0x0)
faccessat(0xffffffffffffff9c, &(0x7f0000001380)='./file0\x00', 0x0, 0x0)


r0 = socket(0x11, 0x3, 0x0)
sendmsg$unix(r0, &(0x7f0000002440)={0x0, 0x0, &(0x7f0000002380)=[{&(0x7f0000000040)="f0", 0x1}], 0x1, &(0x7f0000002400)=[@cred={0x20, 0xffff, 0x2, 0xffffffffffffffff, 0xffffffffffffffff}], 0x20}, 0x0)


syz_usb_connect$printer(0x0, 0x36, &(0x7f00000000c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x200}}, [{}]}}}]}}]}}, 0x0)


openat$tprof(0xffffffffffffff9c, &(0x7f0000000000), 0x0, 0x0)


syz_usb_connect$printer(0x0, 0x36, &(0x7f0000001600)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x0, "", {{}, [{{0x9, 0x5, 0x82, 0x2, 0x0, 0x0, 0x3e}}]}}}]}}]}}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
minherit(&(0x7f000012c000/0x3000)=nil, 0x3000, 0x4)
fork()


r0 = socket$inet6(0x18, 0x60000003, 0x0)
sendto$inet6(r0, &(0x7f0000000080)=',', 0x4a8, 0x0, &(0x7f0000000000)={0x18, 0x0}, 0x1c)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open(&(0x7f00000002c0)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSKBDIO_GETLEDS(r0, 0x20007402, 0x0)


mkdir(&(0x7f00000001c0)='./file0\x00', 0x0)
mkdir(&(0x7f0000001240)='./file0\x00', 0x0)


r0 = socket(0x18, 0x400000002, 0x0)
__fstat50(r0, &(0x7f00000000c0))


profil(0x0, 0x0, 0x0, 0xffff429f)


symlink(&(0x7f0000000000)='./file0\x00', &(0x7f0000000100)='./file2\x00')
mknod$loop(&(0x7f00000000c0)='./file0\x00', 0x6001, 0x0)
open(&(0x7f0000000080)='./file1\x00', 0x0, 0x0)
chroot(&(0x7f0000000180)='./file2\x00')


compat_43_osethostname(&(0x7f0000000140)="e1", 0x1)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
setsockopt$sock_int(r0, 0xffff, 0x400, 0x0, 0x0)


poll(0xfffffffffffffffe, 0x4c, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSMOUSEIO_SETREPEAT(r0, 0x80047476, &(0x7f0000000000))


open$dir(&(0x7f0000000040)='./file0\x00', 0x20240, 0xc6)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0785101, &(0x7f0000000180)=0x8e93)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r3 = socket(0x18, 0x3, 0x1)
setsockopt(r3, 0x29, 0x1a, &(0x7f0000000040), 0x4)


mknod(&(0x7f0000000040)='./bus\x00', 0x800080002002, 0x5bcd)
r0 = open(&(0x7f0000000080)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f00000000c0))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r3=>0xffffffffffffffff})
getsockopt(r3, 0x0, 0x1, 0x0, 0x0)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fchown(r0, 0xffffffffffffffff, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80067409, &(0x7f0000000080))


preadv(0xffffffffffffffff, &(0x7f0000000440)=[{&(0x7f0000000080)=""/120, 0x78}, {&(0x7f0000000640)=""/4096, 0x1000}], 0x2, 0x0)
r0 = open(&(0x7f0000001640)='./file0\x00', 0x615, 0x0)
pwritev(r0, &(0x7f00000003c0), 0x273, 0x0)
mmap(&(0x7f0000000000/0x2000)=nil, 0x2000, 0x2, 0x11, r0, 0x0, 0x0)
semop(0x0, &(0x7f0000001680)=[{}], 0x1)
madvise(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x4)
mlock(&(0x7f0000001000/0x4000)=nil, 0x4000)


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
read(r0, &(0x7f0000001200)=""/40, 0x1c)


r0 = socket$inet(0x2, 0x3, 0x0)
getsockopt(r0, 0x0, 0xb, 0x0, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
compat_43_fstat43(r0, &(0x7f00000001c0))


r0 = compat_30_socket(0x12, 0x2, 0x0)
shutdown(r0, 0x1)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
writev(r0, &(0x7f0000000380)=[{0x0}], 0x1)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000140)='.\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f0000000000)='./file0\x00', 0x0)
r1 = dup(r0)
r2 = fcntl$dupfd(r1, 0x0, r1)
faccessat(r2, &(0x7f00000001c0)='./file0\x00', 0x3, 0x0)


compat_50_clock_getres(0x0, &(0x7f0000000040))


profil(0x0, 0x0, 0x0, 0x5)
profil(0x0, 0x0, 0x0, 0x0)


sendmsg$unix(0xffffffffffffffff, &(0x7f0000001600)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000000)=ANY=[@ANYBLOB="10000000ffff0000017e90e84fa5499a38e45398dd1a68ad7e"], 0x10}, 0x0)
r0 = socket$inet6(0x18, 0x3, 0x0)
connect$inet6(r0, &(0x7f00000000c0)={0x18, 0x2}, 0x1c)
connect$inet6(r0, &(0x7f0000000000)={0x18, 0x1}, 0x1c)


mlock(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
minherit(&(0x7f0000ffd000/0x2000)=nil, 0x2000, 0x4)
__clone(0x0, 0x0)


mknod$loop(&(0x7f0000000140)='./file0\x00', 0x2000, 0x1)
open(&(0x7f0000000200)='./file0\x00', 0x0, 0x0)


r0 = socket(0x11, 0xa, 0x0)
connect$unix(r0, &(0x7f0000000100)=@abs, 0x6e)


connect$inet(0xffffffffffffff9c, 0x0, 0x3)


ioctl$HDAUDIO_FGRP_INFO(0xffffffffffffffff, 0xc0106800, &(0x7f0000000080)={&(0x7f0000000000)="f6828bd02db2d8254e648453e6845536e11c63adf233b49df9d0e7ce1f6a25c0bc35adb229ad3f04aa65c0906cd53c", 0x2f})
r0 = openat$hdaudio(0xffffffffffffff9c, &(0x7f00000000c0), 0x80, 0x0)
ioctl$HDAUDIO_FGRP_CODEC_INFO(r0, 0xc0106804, 0x0)
ioctl$HDAUDIO_AFG_CODEC_INFO(0xffffffffffffffff, 0xc0104801, &(0x7f0000000400)={&(0x7f0000000300)="662f30a14ea7dbd357199ff97ca8e2ab9a737cf423fca59499f29ad5743c19d0134f73de0331dbeff5bdd8407e8c1a59041cb8b2a42209557afd7be711056bc042dbdf10fa7bb115ddab747f26d6900bf69b7961aad297331807269cb961122c827a3ccdaab3560a5f26c144482e877abce93d7cb8f9be51f9db0a298541562318ff61eabb7fcbef85418a839dc834114f3040d274c9eecfb58b59a70f104cc62b7f3f49e8c702cbfa6d016b5f4c67148e6b2346ee3d38151da8eadc3a1a", 0xbe})
socketpair(0x6, 0x0, 0x0, 0x0)
connect$inet(0xffffffffffffffff, 0x0, 0x0)
compat_43_orecvmsg(0xffffffffffffffff, 0x0, 0x0)
ioctl$NETBSD_DM_IOCTL(0xffffffffffffff9c, 0xc010fd00, &(0x7f0000000780)={&(0x7f0000000680)="45800acbd5dc8df6b88baf17f2f1151c4d283cfd178d42f2654c0ed2253475ef041b26eb96f57f64cd4a3ea65e296a02c9ffbe86229cc72612dc9e6d81c8363314bc605472d3b1c41341cf29a094d741d0caf7dbd2f11a681f55c662121433e4f3b69bbd7205bae28a663a63cbf274038e0a6ed1bcb5e7bc18ace3dab7051e6e98c6bcc35580852dd6f0f66778c73898e4542c2fb088ea", 0x97})
syz_usb_connect$hid(0x2, 0x36, &(0x7f00000007c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x4, 0x0, 0x40, [{{0x9, 0x4, 0x0, 0x9, 0x2, 0x3, 0x1, 0x2, 0x26, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x664}}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x87, 0xff, 0x3f}}}}}]}}]}}, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000ac0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0xff, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x607c38cd269706f8, 0x0, [{{0x9, 0x4, 0x0, 0x5, 0x2, 0x3, 0x1, 0x0, 0x0, {0x9, 0x21, 0x2a6, 0x0, 0x1, {0x22, 0x890}}, {{{0x9, 0x5, 0x81, 0x3, 0x3ff, 0x0, 0x3f, 0x4}}}}}]}}]}}, &(0x7f0000000dc0)={0x0, 0x0, 0x1b, &(0x7f0000000b40)={0x5, 0xf, 0x1b, 0x2, [@ss_cap={0xa, 0x10, 0x3, 0x0, 0x9, 0xe1, 0x80}, @ssp_cap={0xc, 0x10, 0xa, 0x0, 0x0, 0x5}]}, 0x8, [{0xf, &(0x7f0000000b80)=@string={0xf, 0x3, "3ae3a3aef40ccd98468efc35cd"}}, {0x1a, &(0x7f0000000bc0)=@string={0x1a, 0x3, "c935e4cfc7da4cad1bec2fa25cf5a554b69cfc890521af0d"}}, {0x4, &(0x7f0000000c00)=@lang_id={0x4}}, {0x4, &(0x7f0000000c40)=@lang_id={0x4, 0x3, 0x827}}, {0x0, 0x0}, {0x29, &(0x7f0000000cc0)=@string={0x29, 0x3, "fca7a3f9d0a22ad2ce794f1cb0cad66e749d9a70ba0b886fe98f3c5139cbe0ce145f8d7adc3ca8"}}, {0x4, &(0x7f0000000d00)=@lang_id={0x4, 0x3, 0x405}}, {0x0, 0x0}]})
recvfrom$inet(0xffffffffffffff9c, &(0x7f0000000e40)=""/51, 0x33, 0xc0, &(0x7f0000000e80)={0x2, 0x2}, 0xc)
compat_90_fstatvfs1(0xffffffffffffffff, 0x0, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e5"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
r3 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r3, 0x0, 0x0)
preadv(r3, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
accept$unix(r1, 0x0, 0x0)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
flock(r0, 0x6)


mkdir(&(0x7f0000000480)='./file0\x00', 0x0)
__mount50(&(0x7f0000000180)='kernfs\x00', &(0x7f0000000080)='./file0\x00', 0x0, 0x0, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
unmount(&(0x7f0000000000)='./file0\x00', 0x0)
preadv(r0, &(0x7f0000000140)=[{&(0x7f00000000c0)=""/66, 0x42}], 0x1, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)
compat_50_wait4(0xffffffffffffffff, 0x0, 0x0, 0x0)
ptrace(0xd, r0, &(0x7f0000000080), 0x8)


r0 = socket$inet(0x2, 0x3, 0x0)
getsockname$inet(r0, 0x0, &(0x7f00000000c0))


symlinkat(&(0x7f0000000540)='./file1/file0\x00', 0xffffffffffffff9c, 0x0)


open(&(0x7f0000001640)='./file0\x00', 0x615, 0x0)
r0 = open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x1, 0x10, r0, 0x0, 0x0)
madvise(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x2)
fcntl$lock(0xffffffffffffffff, 0x7, &(0x7f0000001680))


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
open$dir(0x0, 0x0, 0x0)
truncate(&(0x7f0000000080)='./file0\x00', 0x0, 0x3)
r1 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r2 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r2, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
_lwp_suspend(0x0)


modctl$MODCTL_LOAD(0x0, &(0x7f00000000c0)={&(0x7f0000000100), 0x0, &(0x7f0000000400)='\t</', 0x3})


r0 = openat(0xffffffffffffff9c, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
__posix_fadvise50(r0, 0x0, 0x0, 0x0, 0x3)


syz_usb_connect$cdc_ncm(0x0, 0x98, &(0x7f0000000100)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x10, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x86, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}, [@mdlm={0x15}, @mdlm={0x15}]}}}}}]}}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_futimes(r0, &(0x7f0000000040))


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104302, &(0x7f0000000100))


socket$unix(0x1, 0x1, 0x0)


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x10004, 0x0)
ioctl$FIOSETOWN(r0, 0x40087468, &(0x7f0000000140))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000480)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmmsg(r1, &(0x7f0000000040)={0x0}, 0xb6, 0x0, 0x0)
close(r0)


r0 = open$dir(&(0x7f0000000080)='.\x00', 0x0, 0x0)
mknodat(r0, &(0x7f0000000000)='./file0\x00', 0x8000, 0x0)
linkat(r0, &(0x7f0000000040)='./file0\x00', r0, &(0x7f00000001c0)='./file0\x00', 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
getsockopt$sock_cred(r0, 0xffff, 0x1022, &(0x7f0000000040)={0x0, 0x0, <r1=>0x0}, &(0x7f0000000080)=0xc)
setregid(r1, 0x0)
setregid(r1, 0x0)


mkdir(&(0x7f0000000140)='./file0\x00', 0x0)
rename(&(0x7f0000000000)='./file0/../file0\x00', &(0x7f0000000040)='./file0\x00')


fktrace(0xffffffffffffffff, 0x0, 0x0, 0x0)
posix_spawn(0x0, &(0x7f00000015c0)=':\x00', 0x0, 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000100)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
getsockopt$sock_int(r0, 0xffff, 0x1004, &(0x7f0000000100), &(0x7f0000000140)=0x4)


compat_43_ogetpeername(0xffffffffffffffff, 0x0, 0x0)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
truncate(&(0x7f0000000080)='./file0\x00', 0x0, 0x1ff)
r1 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r2 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r2, &(0x7f0000000180)=[{&(0x7f0000000380)="a4e6db7acb19ad59269aff5bc605940154bdabb10b96a79e77c87da3b27d67b471d768b518320f667076713ec5d1df9427c7d2ec88e810020dc69ca887192d62ad22ebfc6ecc1f9c21cb7620e53ff7d4bdfc1e4b5496837bb74ec2fdee961459a2faafae61c921abc38f44fcb07723754d3eea33b6598d9808337ed238978b7da89a2113e9777d4e444e7511450c7f4fce1cd8a4a6036efa4b93e2f776456e3f6ef09b2e62e78496f39633e4d1e0959829c46bdadd0d2ecadd5fb24531743fad6616f63fbc9a1b245d52250909df459c0af917bf469f3c8afc7a0689ba789e238a30b2f1e4d5089f7822607af3de4783f9c1bc0c4807d10e64c3b54eb476e83ee859b51851b57d1e678695389057947695e09f25b7b582fa3e146732ff786d02e8faa70eba654628171d45ba28999042257cd424f19afa971cc27774d942a5a7d2166aea4db855482e7486411a3a648b5e4638f06ab30aee10f3f27c6320dbf0d924a226af5fbf5aa8e601df0c57c1285de3ade6652f6a4c03c993d46cb83f8efd5cf704387ad4492ceb407ca092a73d3f5e8ad76737a35ef2707b05722635a9c93302a5f1bd7d8dd62811b22ad4437df0383e760d0d474c5f94e59a817d1ba7fe04731bacc9ea05ad2d0ebd539c841216975b4ee4052af159029f12af0f221fdb1ccf49dce90006c42b817b2743952f302022b8fee2e17f173c73929be1906411dbb25476edfc1a108a028574a875dd4b34191b4aadbff142e490a1a2ef389ecbdc90f027449812c0daf5deb969c872fb964baf4d13da795b2affe1a1bc0444c96b6d054f8a4cef96d298a7272344cb029d5335c57030c2222881b51083097239c00309180c3088104fb882f7222fad85d6aebc56fe36222689b35c740b3200c698c6187596dbc2740859a64b4c6755cceae5e0153b889ae6d4725aa9023fc9293bcc56f703596fd41355bafc47e332c8158899296d75cc4fd317a7c783763d2d9f9a0797b9d0d7915c455cbdf6ca5a9232bbcca844d21d848066a648d0451e342a80d3cb334a78666a48ab91fede3ef77fc3d2b8d73593e9bba2c7ea66b186cb5f820a3c94d677b3fc0e5427330b54cfecb8bf8241ca5bcd56bb77c4210f8cd48af1b3376f0a12164bee6fe73ffa32b58b22302cb240121849956d7cdcf4f925ec7b20b7ed8bfdd9d61ddd2925b6c601d6b475d75def10a5e5860f90660b9e811bef08b2e3d7b6575bb9d9498619e9115f77b98c124b44f0424e30f0f90a2955939013037f155468e0b24cedb631aed4ea8960130cb39d3ae9e19b4cf31f7dc0b6389d355d6afd2128efadbf356085011aa68061e5144c6151777695ed2108d5efc4e75883165cce2ee06f68efe15aea1304c971e4bd996a66c8fd9c4bf1c8b68a37f4477c8efe09c5fc0601ef0e847dbe01e70b6f913de8e16fbbd2c6a8a772fa25b59036640bbcfa6ecd9b2de4a2bf09d5901784e0350c989690d143db0a59420765896c477889dc130f14493425232fd9d6b961606414b55e9d7e095fd54e84d4fe192db8053e9ac5bcb1120c251948a47903986ca33a4d68b8bfb989c7a1b32a2a63d378009aa76afbb7054356d29a363ab672d41a9be7fd5cc98124e4c6a24f9ffd24250b1fe521079ab86c06c0a043de27def8717e67c15409f2fafb1a42f0d9eb60fc4a70d39109bf90be221b13ecfd0d661f8c26a3ddff31c8a79a450523bf8c916e249b19d36e98c59ba6df83ac1ae68fa63ce6818a547b9aefa7f2b02c7a5f4ca732a2c30d144a04a1bdb29b6a4c2fd01774d27c1fa60878b937c04915f8eb64de66513e9c6c02b6f970bcfa934e230502b3ded3a13551126b114159116d49e75b729251f554879c9d6d3c140fafa95b8acbf8ef825cdf5968684fa303a50903dac7ce5d61ef9b5c48565412578cd54ec0a1c0e161af236398d8849b52e10dd9101f9f48a190eccb0e58b35a73ad6c2d2f4656561f2f1dc69217e1c44d6ea9c184eb658957e49cc3b3da6fe8d27c513e373e55b5539ee48eb292f23cc1facbb2dbd2479b45ea7abfb8fcd1ceb8c836db70a12ff7e2216fca2e403d0a1c9fe0e41c77e7e07c26864ab2b9f9bdbeafd2d7d65e66ea4ac368a3eef03a7097b09afce30e1d98ce780aac49c4927c113f8712eae21b835f59086a816174ea", 0x602}], 0x1)


munlockall()


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
dup2(r1, r0)


r0 = getppid()
setpgid(0x0, 0x0)
setpgid(0x0, r0)


r0 = socket(0x18, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x100a, &(0x7f0000000040), 0x4)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x2000722c, 0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x200000000000c, &(0x7f00000000c0)="eaff115c00000000", 0x8)
setsockopt$inet_opts(r0, 0x0, 0x200000000000c, &(0x7f0000000000)="ea00005c00000000", 0x8)
setsockopt$inet_opts(r0, 0x0, 0x200000000000d, &(0x7f0000000040)="eaff125c00000000", 0x8)


r0 = open(&(0x7f0000000000)='./file0\x00', 0xcc8b40, 0x0)
ioctl$FIOGETBMAP(r0, 0xc008667a, &(0x7f0000000080)=0xd338)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000001340)='fdesc\x00', &(0x7f0000001380)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
open$dir(0x0, 0x200, 0x0)
compat_43_ogetdirentries(r0, 0x0, 0x0, &(0x7f0000000100))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
_lwp_unpark_all(&(0x7f0000000280)=[0x0], 0x1, 0x0)


r0 = fcntl$dupfd(0xffffffffffffffff, 0xb, 0xffffffffffffffff)
writev(r0, &(0x7f0000000980)=[{0x0}], 0x1)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={<r2=>0xffffffffffffffff})
recvmmsg(r2, 0x0, 0x0, 0x0, 0x0)


posix_spawn(0x0, &(0x7f0000000180)='!(-\x00', &(0x7f0000000240)={0x4, 0x1, &(0x7f0000000200)=@open={0x0, 0xffffffffffffffff, {&(0x7f00000001c0)='\\\\\x00', 0x0, 0x3}}}, &(0x7f0000000280)={0x80, 0xffffffffffffffff, {0x1}, 0x401, {[0x8, 0x8000, 0x1ff, 0x1]}, {[0x8, 0xff, 0x5, 0x6]}}, &(0x7f00000002c0)=['^\x00', '/\x00', ',^W{&\\]$\'\x00', '@[\x00', '&(\x00', '-', '\x00', '#}\x00', 'T-+(4\x00'], &(0x7f0000000300)=['\x00', '\x00', '\xf9!)\x00', '\x00', '}\x00', '\x00'])


r0 = socket$inet(0x2, 0x2, 0x0)
connect$inet(r0, &(0x7f00000014c0)={0x2, 0x0}, 0x10)
connect$inet(r0, &(0x7f0000000040)={0x2, 0x0}, 0x10)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
__fhopen40(0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
__select50(0x40, &(0x7f0000000bc0), &(0x7f0000000c00)={0x1}, 0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000140)='./bus\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0x4020526a, &(0x7f0000000100))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000180))
r1 = getpid()
fktrace(0xffffffffffffffff, 0x0, 0x4, r1)
compat_30___stat13(&(0x7f00000000c0)='./file0\x00', &(0x7f0000000100))


pipe2(&(0x7f0000000f00)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
writev(r1, &(0x7f00000004c0)=[{&(0x7f0000000100)="f1", 0x1}], 0x1)
r2 = fcntl$dupfd(r0, 0x0, r1)
poll(&(0x7f00000005c0)=[{r2, 0x1}], 0x1, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x8008722b, &(0x7f0000000100))
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
dup2(r1, r0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
unmount(&(0x7f0000000040)='./file0\x00', 0x0)
compat_50_mknod(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_50_getitimer(0x0, &(0x7f0000002780))


connect$unix(0xffffffffffffffff, 0x0, 0x0)
connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="62020287e0000001"], 0x1)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x2, 0x2, 0x0)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f00000000c0)="071400000f370025ff149b33ff0f000000000000", 0x14)
sendto$unix(r0, 0x0, 0x0, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
socketpair(0x1f, 0x5, 0x2, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x2000747b, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f000030d000/0x3000)=nil, 0x3000, 0x0, 0x7b054b07d5cffef9, 0xffffffffffffffff, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000180)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
setpgid(0x0, 0x0)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x110602, 0x0)
chflags(&(0x7f0000000040)='./file0\x00', 0x40000)
mmap(&(0x7f000016b000/0x4000)=nil, 0x4000, 0x0, 0x20011, r0, 0x0, 0x0)


r0 = socket(0x18, 0x3, 0x0)
sendto$inet6(r0, &(0x7f0000000180)="13", 0x2000, 0x0, &(0x7f0000000140)={0x18, 0x3}, 0xc)


__posix_fchown(0x0, 0x0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0xc018647c, &(0x7f0000000040))


r0 = compat_30_socket(0x1f, 0x20000001, 0x0)
getsockopt(r0, 0x3, 0x0, 0x0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2080002002, 0x40004200000028ac)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80047457, &(0x7f0000000000))


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x29, 0x3d, 0x0, 0x0)


setrlimit(0x2, &(0x7f0000000040))


r0 = socket$unix(0x1, 0x2, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x10, &(0x7f0000000040)=0x80000000, 0x4)


compat_43_osethostname(&(0x7f0000002080)="eef8ae2a918f76a66b0cee8eac3e75038de4b85c27da9b3a1182bc37589987034185c8150242054325a6ddf3f04d32d3c1a5c3c2f772cf566257a894cf9783c742535ddb2b687adb55dfbab91519164a540c163a29c5da8346ad5690b786298d5440c6bdf64b7eda317fc42fe41733ed8074530a31b9b8be616e40361a8d64cb8e3e7cec0024198669166f795deef026511f6a85990999cfd475fe98c35c1047bdd954974701c6a23c6c2aeaedf9668075c696ccde48139dda2c5f5866e2c10562fded376f4ce1d0ffc24bd592e9aeaaeec437ed00c0312f86deba36928003214ddc14a11c3943f26a3e114a027aa3cad88472ad78af5b7e2996f89ce76dee7d", 0x100)


openat$dm(0xffffffffffffff9c, &(0x7f0000002240), 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbd63)
r0 = openat(0xffffffffffffffff, 0x0, 0x0, 0x0)
writev(0xffffffffffffffff, &(0x7f0000000300), 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x0, 0x10, r0, 0x0, 0x0)
r1 = open(&(0x7f00000000c0)='./file0\x00', 0x10341, 0x0)
writev(r1, &(0x7f0000000300)=[{0x0}], 0x1)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
fktrace(r0, 0x0, 0x4, 0x0)
pipe(&(0x7f0000000400)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(r1, 0x2, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x5606)
open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
openat(0xffffffffffffff9c, &(0x7f0000001640)='./file0\x00', 0x0, 0x0)


r0 = getpid()
ptrace(0x9, r0, 0x0, 0x0)


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
compat_40_mount(&(0x7f0000000100)='overlay\x00', &(0x7f0000000200)='./file0\x00', 0x0, &(0x7f0000000240))


r0 = socket(0x1f, 0x1, 0x0)
setsockopt$inet6_MRT6_ADD_MFC(r0, 0x3, 0x68, 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
open$dir(&(0x7f0000000000)='./file0\x00', 0xc3bbdd015ff9e3d5, 0x0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000a00)=ANY=[@ANYBLOB="1201000009f102206d041cc340000000000109022400010000a00009040000030001010009240000000122050009058103"], 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
acct(&(0x7f0000000040)='./bus\x00')


ktrace(&(0x7f0000000080)='./file0\x00', 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
madvise(&(0x7f0000ffc000/0x1000)=nil, 0x1000, 0x0)
munmap(&(0x7f0000ffd000/0x3000)=nil, 0x3000)


preadv(0xffffffffffffffff, &(0x7f0000000080)=[{&(0x7f0000000000)=""/126, 0x7e}], 0x1, 0x0)
compat_43_osendmsg(0xffffffffffffff9c, &(0x7f0000000080), 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x40045265, &(0x7f0000000000))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000200)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)='2', 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket$inet(0x2, 0x3, 0x0)
compat_43_fstat43(r1, &(0x7f00000015c0))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r1 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r1, 0x80045710, &(0x7f0000000000))


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$FIOASYNC(r0, 0x80206468, &(0x7f0000000040))


munmap(&(0x7f0000001000/0x2000)=nil, 0x2000)
r0 = shmget(0xffffffffffffffff, 0x2000, 0x0, &(0x7f0000000000/0x2000)=nil)
shmat(r0, &(0x7f0000001000/0x2000)=nil, 0x0)
r1 = open(&(0x7f0000001640)='./file0\x00', 0x615, 0x0)
madvise(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x2)
pwritev(r1, &(0x7f0000000040)=[{&(0x7f0000000000)="ecdc802be22f16c29b9b2fd5c7"}], 0x100000000000034c, 0xfffffffffffffffc)


r0 = openat(0xffffffffffffff9c, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_60__lwp_park(&(0x7f0000000040), 0x0, 0x0, 0x0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000100)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x3, 0x1, 0x0, 0x0, {0x9}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x0, 0x0, 0x23}}}}}]}}]}}, &(0x7f0000000340)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})
renameat(0xffffffffffffffff, &(0x7f00000003c0)='./file0\x00', 0xffffffffffffffff, 0x0)
ioctl$KDSKBMODE(0xffffffffffffffff, 0x20004b07)
ioctl$VT_GETACTIVE(0xffffffffffffffff, 0x40047607, 0x0)
posix_spawn(0x0, 0x0, &(0x7f0000000dc0)={0x0, 0xfd, 0x0}, &(0x7f0000000e00)={0x0, 0xffffffffffffffff, {}, 0x0, {[0x0, 0x7d31]}}, 0x0, &(0x7f0000000e80))
fcntl$setown(0xffffffffffffffff, 0x6, 0x0)


r0 = open(&(0x7f00000000c0)='./file0\x00', 0x10341, 0x0)
writev(r0, &(0x7f0000000300)=[{&(0x7f0000000000)="ef", 0xfffffffffffffe38}], 0x37)


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000000100)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x40, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5, 0x24, 0x0, 0x7}, {0xd}, {0x6}}}}}}]}}, &(0x7f0000000600)={0x0, 0x0, 0x0, 0x0, 0x2, [{0x0, 0x0}, {0x0, 0x0}]})


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
chroot(&(0x7f0000000140)='./file0\x00')
open$dir(&(0x7f0000000100)='./file0\x00', 0x200, 0x0)
open(&(0x7f0000001640)='./file0\x00', 0x615, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = dup3(r1, r0, 0x0)
getsockopt$inet_opts(r2, 0x0, 0x1, 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
mkdirat(0xffffffffffffff9c, &(0x7f00000000c0)='./file0/file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
unlinkat(r0, &(0x7f0000000080)='./file0\x00', 0x2800)


openat$hdaudio(0xffffffffffffff9c, &(0x7f0000000540), 0x200, 0x0)


open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
truncate(&(0x7f0000000100)='./file0\x00', 0x0, 0x100000000)
truncate(&(0x7f0000000140)='./file0\x00', 0x0, 0x1fc0000000)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r1, &(0x7f0000000340)=[{&(0x7f00000003c0)=':', 0x1}], 0x1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
writev(r1, &(0x7f0000000180)=[{&(0x7f0000000080)="823febb24b", 0x5}, {&(0x7f0000001680)="3224aeb823f648a73e4d3bfcb6014d3096b3d1858fb078970eb362250725b3f9293e1313033419623fc6427ba8508c285cc0db5eb7f38a5325c65c61267846533af718286e4d10eebd7f08bc21914a72e0eb686b61e0bb2f515a6a899744fa8312a54d44169751b66b6a1dfd22c8b658cce86ff10f94c516d4f258f828f9e3fb7cb1cf9237fc7a4bce8b5cd5bc71ca1e9dc412644b707c6b8a9ca3f5037c45d5afcda9c30e6c2a8ab9b1f072ed8414e02995de8b20a5242014ba972cb78d0f693d217594f07bef8e9b88a98974a9778fdab33b87fe4ffecea90a49cea6fa538a444ee7ef0ec18509bf4e4e62e22bfb667ae607e309796541cde72b0e4ffdd3e8a085c7d8a00d61a5d3ab0c51908dfe18100808075a968012c461aca7d1efd390f5a3e097432e413cc51f8cf011c17aeb855e622fa31d51810c3dc8a30b3a8e0ac8d45bb556622ccd0054ba0be36ac2755d42378860e9beb27fc203d947ba48306ef72decf127c2c8f17484aa19124e9918de7b05ab82562688e7c1a90719861269533a42ccc0ae0285d70a48f0e02d8721a49384ac8241a94ddb491c3c28d26d6ca99a735ccc954a9292e4f470cd882b110ec13655be68e48074e8e32310cf1164bf4f79f92b2be4d0d37709d341423bac49596710de3f37ebfb94c96803fe039ec59238f5f2b53686ac510146eb567cd2900242ef386b995050660816a11969db924c78256a6ad35205cd9b3790194e3e9d7805d41019717af4afcd4bd6db3435da9355e599e33a46c944f6f67b8698dc3a352ffd8328a23a7840c8c2737ca463704aa9652ab9cb2ea95c738c30edf519fb561c9e5e68af0cec0705534ddff49a0cfb5b7ea990fcffbbbfbadb94eadfe43f7dbd9c69bfd07c549a8ccfcd0b77d82cf17e3ac5e596edca626af99441e3d244b2f1365713d4c9e23830c03633687f3acd02d1bc71ff2855986aad869ab258d415fc5417fce6a5fd1c7c396a509ad05f52d7cea675d44f9d710bacda699cf9d3b05f0ebe41ffd628cc9f02b63cbeed7cdb90638b290dc348d0513b32229cc8b02adea9725d40d22aa2547d8bc105497c5310ae19c2244b40c3538c2a8143feba1e6c1e4a8b7e8dea31842947fa566fb4326880c39eb5f9247e2377c366b5e7716292a4e3678c459098ebf04a06cbe7c32943f1838ce8f189db23f3bdd9f78b6a545423cb6b87b25ab8ac715f198d7be1d3f62a00033b5a193d97935166effda21355f2febb555eac6290a9af25e083c5d6c0ecdea684f00944ab4d4ab891d63a5b959c8b46f1d3b56e6ea4889e0ab72efa09546224142983ee1b9b82ac91aab6c22eeb906ff1c22b2fdf24f6b709aa86c1062416e0ee5b29a7358e870469784b65fe5a24cdfdbc1146792432ecd2f034157c1fbcc1c7d6e071bc7f303e5e6380018712a53476e90c640d790da08e46632386aa3ebcf0d2b71f122105b7d3130bb3a9cf17fe42983424e7645c3ce5024d331897492e5b5204f305c66b83a69fc81b4246bffb8ab08dede62775c7047b7aaa58845a19ee399691ded646c6d85be906187b5c1ddc395791f1c40c26b48ad7e379bf8d9fbe8c7ff5f03c8aad582af3e6a80644717e2f0ae6896e6e869cca91f7f9eed538021ea2a2590b3048011bb8f5d4431481e1fd30d1061fcaef1c0221e3c1bf5557759b3028fa9e917db8060d4a9967bc123100308941697a77469ea51240d87c8d6eac81e50b730fd69b4927435d70e3e75c42f6b577029affdc956aca9fc340babac81e7b3f0d6615bf17a2264bd432918aa90f1132f3b399bd993083d34484a55ccc8177129314c0f9635b5f789ea62561edee33587c72a0883266e2814f44ec4ee00a3fd0c1cef836b9b2ad41d6d94805841ea4f6bc022970237b587fce70cdbfd281de29194a1190b4a55853cae29aec18dbea0e6a974c1055affab5323e81691cfcab1e1c4650256ea84402c44a0c2ca8be041e3737e11d0c5d5b666ca5f96ba2ad2e5d4dd4246d0e2c249cdc56627e92b68e93a312fb2fc544fce658cffd726ad854c4c3f8c17a865675078d2adc09cf53dc995ab81ff899d90474996bf86c4a3dc81c93d06975e8b2fb043f94d1ec1b0f118fba99f0596120e72f3c1668d76587a203c1bb2cd616f0ff662b09318e70368d20c7ab70cded11ab7aaef846ba0d972bacb59f4543745c5e6e63b3756788fb609b094b1e6b7f2462848e7e95818b9524c2df81db18d397643da51d42a85f26836302ada0e96ace2fccfe97a01fe5f5db2426fa618cf124036b2502e7b7cc378e5836b9d957f3d82b039286d26da337ed0c14226a5f1b0858cacc3181199d40e3107a582c5272a2c1db4bf2a55e319bc3ca89badd81e627a0a226682fa9f9909456d314d99fe7b984a8f6f6449b14239564da6aea5719cb5d774375ac36c9fcca19d1eaad4c526ad035942ce86889c443192408104aeb7f820bb90be9287731c7378b196d3407e0abead97896f08ebe681dc0029a0b1156f52b44bdf76508368dc1cdb0d72ef8fab2bebb827ecdbf98312cb2456376db38aa1336c02947eb97d995262de6e3e9869d36eaf12222e96e07d4abdfe76893c282d26875dbd777410d275ddf5d70829e36287d60d70ac7db6590ca186db78c901a04ff90224a2ff728ced7bfb7fbbd74f747f0ca5eb338434a226df7b23cb35350caaab7e8c05bd68f6a7fb4aa9dc5fd89a80fb97f6124dc10a5de738ee50520e497b594f7297dd5917fcdb32b00fc3fa76e13f327022ad7b9ada0e4a48d45400c4e8a91e25d76ff32fffc267a4006138090b6c4dec932751ae65f210f1cf5966c947fa15891268de37b5005251da747e5eeb1b2768cd6a68f140caf877b602c7d87413cf628a61d520c2fbc315f500d3c4dfb412f623e5cfb51b98f2b5e856e013fe9988c4e209c328ce4d13dfd50e0ad447e2f2d7485b4b36c6590a3cf95e58adccabbdd3e31fbfec16e9b3848897bf2bee9d04c405e1944cee289b6746182046231afef474154fb2d3c3ae65ac8370ac9fc464b30e15aa3dd6e1d032b571be25bfb6632e7af99de546d8c62316115d7df483f39735580996feb5bcd7e342a3b2fc55161df88a74013527f5cf9b435ede32f55b4dd1f58f9b8792f401d25ffabcd70faabf33ab930f066793c986671439ed55dfe8594640b4621678695485fdd4dc137de7c0a1685953f8c7482beb672b88624008a2cea576d352e860b4547ea0c57e05c6f1c6afc6228e54f6e34e5d4737e86d70a4c3c812c771a134a2bad8580c3a777f95f0875cd3b67db7dc955a3966bd201151363b65794f51b297cd2eb664f6857a4a54ceb3e85d2c4aaaf41cf635460ab6cfc9a19bec408a64a9e7ea6367be0365477ecdb127fbab8ae6e1441c0e5b07cafe07380ad72a0b0c192734b4189268d36c7bbefca0a5501edb1f451cd4c1d881b55b66e8a3454f4f5f4e3e73f542316b0b562a7f64f48cc501c6b52b9909d5c28baee370a2cbf37e889cf39ec73529fe3d44727d177312afd911d479b97cfaee33685db9ee719b6efebb63bed56be1e5a5c23ce8bfacc044c93d207ec06419902581bbb6408ce717aa2a34e274c7e4e7e60be6a2520612f834becb5feff7ee38747d6189c47cd7294af41345f0460e34e097f3c7e52f15b1a5eab3d3b44a270aa6e7aefd5ef4b83d01336e92238c46328a3f6de075dba12c37162c19c19c6f42d4a3460f7ee0c4e25d62cc811a04b8952894fa6487af667f657dbcba178ab4375b1723d95a2aa183d1c70532a5871ff9d4b13ff657eb09d01a140b7ca71b69b12eb7b27555bad2497d23bd01c882a5bb9165378dc256c9d7d70e7a2d010a16475caf9318a8005bc2dd221cd846980e59e7db61b7445121a002094983632be82813e782496ef7b5c811874784201cab65d7d15d9492a494b1704c395842d903273daf593b9e6c74a3876cf02cd3e682121a9992e0ee365c726af8f867e7c015b6dd65a76e354617e5482f6b926f8aa721e4078c67f168365d805f3e009bd03e677b215c55ee5d887a4793cf9ff73231e7ac0f40f2f6163cd418785ee7046384e9012fb631435e3c6570a5c6e187d75361198479789dc3d7b830aaba047977755d68e0a7b1d638d4192a4dd5b8f49efa4a27d264079fc7d28e193bf06befd5a871201016b935b9ada1ad38da60632c85b405ed6baad282058d3d8a29f257d349658532f113de4cbb73bfc171e1eb146f6dc61a60cb14438b42b4113e1b335e52fc611fd638e14f5d4ad736cd6215dc0f5493c2e422febf0680d766a42c55f8ae60b14ec9f0d9e1e3f3ffaec02eaa74440e7d682ece202b21766a3c988936b9b90db0fe1a45bce1b4d51f5e97cf9d01ce430b5fe2bee157f5e1071ca9151f5a68ea9a31c16ad98cc73518f40e037c8db5d439b81a0c02ae9251a77812900400d2577957c5818fdd3de70e5d5ac3bee95b6ae4e7d20847665671144c05a6e9761fc7de7b0e3de574fd12bc53f0ba5a77ac65f55cb89cd0fbfa89f3ee118a031b99a131d6dfb2fc47f4e3b177de23e8360822a09e008c84e6e5e4b7c952abfcd27f6a675bfcea8ba76452f00a0b33f7007871fa7fa788c334cc20e17c8b896c9f5bba74f9f3bae5b5c76d09b953e3a2534827ada1163e23eb1f675ed2a3647d8f8d3e0c564849ddd0f873b494787cbefbb43fc5f8dc8c47a3b9cb0269dbd138b0cb90be1080c6ce7bfe0ea87714ef11350390d922b55e96de81b6ee68d1fafb8529e23a9da941b6971007784ea0c931cac040cbe7d6540f7e3e3771425c40ec7fa4f37392e3b878774229be9794e6768622fcf10c48a3c7dc5f98e006b53293c4a028d08c56175e5f4b9ea07fee653190a7f3df6ad2ffa62bd28d61ea548c9a88ef278751b296ea6966321db38e08cc34f345a072c9c5c9292c1d6c106ed2877d1f195256cf53eac14c669a7ebaea83503388fed0c48abe535fa50dba1e74bb7c6dd162a2317dc1a74df66eb735701866e9839cd8f18372c833e0990a2543b94ac47847c45a4be7d19cbca0393316910343831b77f11acbe36e35fdee86d04ff8614972c71caad90317698c745bb6f5a81e81bb2edb951a59cea22fd692483d742174b0bf5b969df42968edabdaf7d6fad9cdb51eb097f0bd27f77e2c6b06c1d02b718f85e8b035f3723de64170f6c5208e226ba25103d399fdbc665710db902f93d3fb0b41f5958cb17ef777c652346217bb78b34b376f444042bb49950dcf9a7b3235a0c5bc8c6d50eb817ffdd11a505d18df915db5358ccb0daa204392199e9c6cad760459a307103a6ab6e41dcbe2e803f1246f16ba313250a29025946093281203fd1eaa0fbb761bf8fe7468fc57d2633e1b3f0279bc2141e47041388bb2081b5a7c9beddb77bbf568672e1d79b16d75064850df22cdb6448602e53fe9006dbaef18527cb230365e3009228dd0b86a6bc91a5fdfdfa53408e6080db8464162837baed9c15ae1b6c86b73ba34ab85abdf15c6ed8f81bd5b883b910048875428d613fd589e78f930ec8be6992a02761ff17a5c023063efce74a7b212b9a790b14c32ec692f8fd19896337b363200a3c07d06a069d8d417339fe7d972b1d75e4442491204f1de7f5bb8916c02c6c88feb472a0bf4d52acaa4c23dddc0da40cf43d0abc359d63c87b689a54f16ca283c8b8d78e629dc26029ff0312fe987e27c0bb880573bbd65f5dfd2c0810557250896f77ce6fcb4c5d238d7f65d8b0ef1db7f69b4e0a2297bdffe4ef562891d7d8ef7b89e14326", 0xffb}], 0x2)


r0 = getpgid(0x0)
r1 = getpid()
setpgid(r1, r0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x1, 0x0)
ioctl$WSMOUSEIO_SETREPEAT(r0, 0x80185728, &(0x7f0000000000))


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0x80067411, &(0x7f00000000c0)={0xfffffffffffffffc, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @pci})


r0 = socket$unix(0x1, 0x2, 0x0)
sendmmsg(r0, 0x0, 0x2a, 0x0, 0x0)


mknod(&(0x7f0000000040)='./file0\x00', 0x1000, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
write(r0, 0x0, 0x0)


setgroups(0x1, &(0x7f0000000000)=[0x0])


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setrlimit(0x0, &(0x7f0000000240))
setrlimit(0xb, &(0x7f0000000340))


setreuid(0x0, 0xee00)
r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setpriority(0x1, 0x0, 0x0)


mlock(&(0x7f0000ffe000/0x1000)=nil, 0x1000)
accept$unix(0xffffffffffffff9c, 0x0, &(0x7f00000000c0))
munmap(&(0x7f0000ffd000/0x2000)=nil, 0x2000)
mprotect(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
sendto$unix(r0, 0x0, 0x0, 0x0, &(0x7f0000000100)=@abs, 0x6e)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000240)={<r0=>0xffffffffffffffff})
getpeername$unix(r0, 0x0, 0xfffffffffffffffe)


symlink(&(0x7f0000000000)='./file0\x00', &(0x7f0000000040)='./file0\x00')
rename(&(0x7f0000000100)='./file0\x00', &(0x7f0000000140)='./file1\x00')


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000140)='./bus\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0xc1205266, &(0x7f0000000100)={0x0, 0xfffffffc})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
r1 = open$dir(&(0x7f0000000040)='./file0/../file0\x00', 0x0, 0x0)
mkdirat(r1, &(0x7f0000000180)='./file0\x00', 0x0)
rename(&(0x7f00000000c0)='./file0/file0\x00', &(0x7f0000000100)='./file0\x00')
compat_20_statfs(&(0x7f0000000080)='./file0/file0\x00', &(0x7f00000001c0))


_lwp_setname(0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = dup2(r1, r0)
recvfrom$unix(r2, &(0x7f0000000080)=""/167, 0x10b, 0x162, &(0x7f0000000140)=@abs, 0x0)


shmget$private(0x0, 0x0, 0x0, &(0x7f0000b39000/0x3000)=nil)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x8002, 0x0)
r1 = getpgrp()
openat(0xffffffffffffffff, &(0x7f00000000c0)='./file0\x00', 0x201, 0x8)
fcntl$setown(r0, 0x6, r1)
fcntl$setown(r0, 0x6, r1)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000080)={0xffffffffffffffff, <r2=>0xffffffffffffffff})
getsockopt$sock_cred(r2, 0xffff, 0x1022, &(0x7f0000000140)={0x0, <r3=>0x0}, &(0x7f0000000100)=0x1)
seteuid(r3)
r4 = semget$private(0x0, 0x5, 0x288)
semop(r4, &(0x7f00000002c0)=[{0x2, 0x0, 0x1000}, {0x0, 0x6, 0x1800}], 0x2)
semop(r4, 0xffffffffffffffff, 0x4)
getsockopt$sock_cred(0xffffffffffffff9c, 0xffff, 0x1022, &(0x7f0000000140)={0x0, 0x0, <r5=>0x0}, &(0x7f0000000180)=0xc)
recvfrom$inet(0xffffffffffffff9c, &(0x7f00000012c0)=""/158, 0x9e, 0x40, &(0x7f0000000240)={0x2, 0x1}, 0xc)
semctl$IPC_SET(r4, 0x0, 0x1, &(0x7f00000001c0)={{0x9, 0x0, 0x0, 0xffffffffffffffff, r5, 0x63, 0x4e}, 0x100, 0x321f, 0x8000})
semop(r4, &(0x7f00000013c0)=[{0x4, 0x9}, {0x4, 0x7}, {0x4, 0x7}, {0x3, 0x795b, 0x800}, {0x2, 0xf49e, 0x800}, {0x3, 0x3f, 0x1000}, {0x4, 0x6, 0x1000}], 0x7)
sendmsg$unix(0xffffffffffffffff, &(0x7f0000001380)={&(0x7f0000001640)=ANY=[@ANYBLOB="faff2e2f66696c6e312f66696c65300007448410eb628d48246359d2c725d42bf580eb415261b3434def3d5a08d05125db762dfd7ca0645206f0de4bdcbbdc2e4fdc6ca147a636e0221844035a0dc8ba198e62200a2d3f99667b98735fee051ecabd8260da9c52139e3214db1f171a8a28d0a48a4a7873c223b3d725d307fd8d1d4da9f76211eba57e0b239d5b75d69c91e368f04308b5988f8888d9e2c0e78c1b09996b925c8e03601a6afd3328f5c001b27a1d46a46bb75b01e7cb98381b97d568f618fccac2de96938da88679eb6dd1f29f8a446e65f63037000000000000"], 0x10, &(0x7f0000001280)=[{&(0x7f0000000280)="b40ade7e395c6e3708a9748e71be14ec9f20a2066edf491f1a4fce3cc3b5b1fe9c678a0dd40b0035c97358c1a887d18eb680e2f030227149d443c352620e335cbf7f8d07c9706b73daef8fbed43e1fc1eda6c2e6473f917f659d8a2c61f7f271c920618c998164110efc2bf6fbdac967fcecaa992f28c8717453fbcd91eaa37edd4c5a1acf406ddfc786251d08bd2b298e599c2578e461bcc466229d11a6350328756a8de879f7ca86088b8f6ca98349f9007b6da98a08f12167c4af30c0ee0d12f474fe529ed5a3e0f1f61fa80cfc6611d8d2b6a5d3de84b89b2daf28f27bc7b876d3cd9f9e2c22a482aa1c7c76c088ed78e6070d78955850049ef3cb99493af5412002be3448c8ff30484e7a40a076ca0f383be4e02c0880062a567555a54459fe875d0cefaf2c7317c68fe9a2727943f7c47b93c97d6de61915f64808989fe15d29ea2ef2e8280653aa861acef859e31cfba42d412aa19e0be449916c9ae632bd48bb84ee50370c61c872529fffb8e3f2e2d7b5eab1ffb02f39c7d09f074dcff9421b0a4bc844ef638055f586003b16909ad52438ca41b8acd89e182639af331bdc16015b14fed5c5203d03305540c4b776f2a5b4505bd70cae39efc0fa9567c07b68283d91f5b169d48e5a71154b8bf7493d4aaad3acf02df075c54df633e3809a9a65d0d3f9d55126da725b2144417ff5c6412f0910775c3aed790e982757f17e551251ff948072daf4387333a5a78be9c24e720000934285ba4139747a641460a2b90fe3289938c54f05a3c5981a6fe6862a0d6f660ade2a303e8d0726dbd3b59f67553a1bac0e46f5192ddede1080be5a1f7a0bf864d3bb2f988d7eac63c29b34009c78334b129a5508a5593906d5c3b79ace0e6b86db9d8b28049d9b1f6dffb6e2b5207a7208f0e24fa22ebecdb2ea733e7b959c88b2b6e26428e3c0bba49864468faca530486aaddc37e72755f502245593da7a9c5ff18234f6a21e6026430f740ac65f580303a913b928f782115aab90f0af0648ad366679fdeff398e612197db7899a32164bf833f177848d80a5137fc78d9860f8ee3780a75e49baac0cde4039f98f1803eca796fd589128d25e28e01a4320fb45220ab52780469a1b8a3b602416ff3189622dc73e9f865237ca057598939cc3c6e8ed41806a7d0fee3c8624562f14e8b5faa7a6fe16f49ec2dbc218fcad8b1f4b01128f6368d42030837c8f67420bdcad21366f390349bc9cfd7f142da5f83a756910abadeab2388e33d0a649fa82824c6b872d9a7de193e4e993805f22069388fe39ebe841101bf79c186526a49bef3238d0b05c82c7ddb648c6a571314ab11592fc19761d1ae49eb08713a6cae7db64d2442133af851042fdf8f92509768f92cad3491a07f7f1c52c64f659349e6ac9022bed7939068333bc375148f942460ab5631e6ca0aaa06996a76e94c5625d8035f268d6e72200309eb1929094d09cc73cdc679192bf51a33b8665710ff489d0d37c7cca720c3ee459d5e534d0072d21ff287bbd718306edd230d6499169eeeaa05a4863c5acfc04b57db62d66a77023a374b53f256ee51d18ed1417902dfdfa403985c3080f5f09f3d2efada7d64d6267636fe4b3685d1c3eb78ff8553e31ffc46fb3704605221f145ddd15b00560ad7ca91535c159b5488350e8bb7bcf14da7e1866c9c65291da51bb93711f096eab62d15e2f09e68979ea49621da47c5d4b6b9aa273b024efa8554a55211150c279f0fa48bfcfcef310f9dfad1a9f7c7f7f741a7677d8d5a9db386ae08cf5bc5020ac6654d03af4ab1c9702c6fded2a8ab1b8919a0693f69fa28eec3a490e61c14f036ae90bf44848433f12bc59f0edd39f705096908915d1ac4ecc76147758a2c0374c7a6bbd0bf250fdd68d6fce031d781a3219f242ff04ac0051f863e1f161dd222102a7ad7f5809e2f5b7b0bd7faa5dca39710dd04e414445f80857388c731f04f5610609a32bea4554f605acd95faf902ea10f6a069f2822b34f229418041aed06ef1eed55f45ea538b30c2a98bb5b5423a82c2a601b900b9c4d96469b1b16ec0d7bcf9de6821457e122d67cfbb690e1d10b6179c58387f9ff760816dff2c75c16488c26e9b22278fb9450a3949c2d001e95c6a063b2dca3793fd02133047db95f02b887f4cd46f49e918427e5ef89f15dc3d79717abf4e23c73f8489e3f1caed107b8217895502aa1920430e9c69494259e9891058b463c3937c5cf7f7f9e73ba6b9e877adafd114525abb2e222a4ec24c6b472413eae874f1e86b1150ed2db399fb99ce2689a536ee5bdc53f7297cf8af7eacaadb0f7bb75864b815946e73932cb04cd22f17ccaab6a184cc25d469c80c0d412478785c02b88c70cd9fb5fd80bec139371f37159f8fde18b8203a5753669b5cbea5aa9dc5c6619b49abc5ed00a63ce9a0f7d347e1fb3ddc443955b5072d084defdf076e6468ecaadc95e1df351b6bec9d6e2c13f88f7cc4647deb73610868e924808c68055114456044119d71653561f55ab362c8c34baa0350e82951f29c80a18ce1f01880fb08e9691515b4f8cccbe2190953d455a1874a6eaaab75045b8dd1021ea9a7fc3a455f262b68edce46d6784158425e954d7414ffff3b9e931447331551268f1d11b680302ce0b40f400a91d5dd5b14e4d0cb03cc7c664763f33b03c85f7868eb8f41f878f7601b996ec3213f1f00a77ce763955f8b738d0c2541a9ece404841bfe270ee82949216ec7ddf0598855b4d9b57cb2a521591f1fd2e12981df4c3515cded3ed5f25c677dd163424f43c9e930c23995ff886984218331cdc54c32d8676ec6a37629950b178da270ae8c569ff04758faa8ce44b62cbbd262f74e486a6aeff33623bdd115f963d7f427db63be5b7d379227b55bb4bfb9010eec7af8d2d2600245e8bac4427722366b1069141a98a95b44d70f7676e46c1ffdd6ba1903399e62fd06738f22063d423a384d11960b9fe64b9750b45ccb853bba2b93841784ab0f976f900578df32bce188c1754114012254796aaf05976986b2f60ab458cb0981993e8dde61b239ba05127f1bd592d29423c9ef6b8f46c71a36e4acff49ce0856eed72c492936eb858cb5fbf1eb420d57ad44c6dd9fd2d4e31ab99117b327063a90f221e45842fb0bd61a8c4e51bafb225df8d7ce04631b86346e5550d591e6f979ec3aec77dfe1a11858a21af8fa86252a459159f6c51f938a9f592b9db58fd7270360ee7f1d70227dbfa5f3e2f830a654d17d699702fa0a04ddd8d346f64d68757e7beeb169c6d6194cdc4a004136d8eef0b550a621ac4d9257bdc7403e8128d8094b8d61ea456488e6fb3a956b3eff874266b7a0cf77a287ac2d97c508d46331f59aa19c862d87c84add1076079d2bfa9049091a9e943b89ad6aea8ea4f856c3dbc3c959b10c6dd240c71cc3027305bbee8db409caa007f0eb8ac07427e177d1a7dedf5abad55a3e7e27c250704d156895676c7619eafad59747d1cf4656f2287858935a061a48e645bd59772b9ff1b9d2faaadbb86e846aa93bbb6d52626819b18e275977e027bb86184244da2602e7a0fb2305c1015078e6590ba35ebf5fdaa708467b4fe43f667bb8dafae9ea95a5143236e0db74edf3d101e537ace161688e676ce200871b8d469b9e1f89cea4d73c21e2085674de53bbcd8add7cef1b9ca9318cef8aeaf25faeae4dc797877bd1965e01aa1dbcb86bd72f6595856e752345238cb104976bca531febf34230768ae7515f6145873c743343bff2d609e314b5549b704cc4ff4be2c1857327f04d3dd3523aadb5c10a449c59ca566175600c3f7dafba471cea303dfd20cc041e3d1f225c3baaeff68f85c1f23f46fb8fe485089ae04bb8c6dc83970c88f5d068ab6b83175e2ab39cdd4ef10a162470bd392037b15c6d056263a6e9b5115e4fe68621f92d0d6e1092170c587552004d3104da13fa2459f1469e0664d8344da0a66ca4a174a8295022fee8f7937bf201148b97c6f89a79dbd75b08802f53a022df3f6a405c6bc00d96cb89e2976130067348910218a9c1e9f78d3705d6c685539219513e0328520f1e7581b8da4c03476ebe8f3284dd1f2a403a6801cb026dd62bb34690aeca8fbb1eac39a33666d230aaa5f506d602b9382ba079508d43f129aef8d3da060abd1c8a6746f9ba11b3f450d8467950b414f32a49f13fc6ff3595610cb272e72e6f31e994966dc5b239a0675022b9d3f8c1c867beda62e27836d4178a84ce01b57edfe44089ee819b2dee273c872f5c611f37116f1e061ddd2897767917adebbb270d1d534f21aa29393fb7c82e00d887c8337ad095241ac5d25d62a99a3180603e44ec1c254e36a77a9492ded4fd34ceae33a91934a6510aa791f2ce5e8271d442d86031f4443b8ca6028ae8f0e867b5e3b02542419224ec4b56096af98c252e6ac33a607380017a1534b36cb069747e68363e74e14a83c525fded13c2b17975a89408440b69f702bf4f379441bdfd90cb16d6abe2d54145f090d571e216e8de593e067f5c46db5ab0d0354b920566f585a81eea15f53771a94b70227ea48b0ad1d1ab862110956c74a40a8441d5ff982306adb3e7a649b8f47a26f2377409453e58f4dbf21e12d54e30c73c3fe96971f243cf20235b73f4c32e2b550fe95a0a26d04155fb77d0643eae0782a8855da5f233f74d913c52763d477cc6fa7a4c8367bec300a2876351b15241e7a23511cc18f5117fb331c72c77e3400e61fc5b5f723b10e80a4ecc758b64cb13060feedee9b54c071bb95589cc9c98bce22df5d5d685d731aab6f435297c5ad38ee3a08f1fb16140d54bc45f1acba1580e35019dde9b25177c84c685d64a7984eacb5be3418fff00c43acd97358e8ea1d4ae19cdae61d2508e8a46715c63e96683fac0bac850bb3d85230d60be52f99689952db8f4a41da092e0414ab917f9ff64f3819a25120c1397fc22f33290575114f0907f5341b2e7572e1d1bf9af201bf91d86c27d59f97e2c7f52040efd299b0635e4169e8cab1619dd6d27e913f671de609d7efd512a3295c2671609afaab9f58fa3bce7ebc88dab66fe9c16a4979f80de730e4bcf7d9d63950f9a35d8dcc6b5a3ea809c0d35ab6e44dcf51aaf1fd72dbe5d6a8693aa3efe181be51d01dde2c0f1cb05dbf7188b38c5d28ec98454b6bdbaa5be1951566c65c57cd6929c15e5150f53eec40bdd868f0bf33ef3bd9a764532f3afd177a469cddd9f7e27c918c8075cafaecbb1b1e3550fc763a9c0678f8a4c540d8dc3b413f2b37681c755ded2f2b9192b8e02c12ecd969d22a561bfa9f00e1395ef33290efb26b56738a62bccb0de692a3e3d56178d8efee2427a34990f12816b9629ed95bc98740dbccf210232e1ebeff083d945c507da12e44448cb906d3b69bead76c10b514f066369466526597b6f40a8d74b533b2edbba1423ec9fe527d6c3859fb38a13b15740ab6fab766a4ddafd051ca01f25ecc45b2d68f6d239901345f41fb087b8843ac7f9a5a6de8c5a93148476781ea7bd49d334a9b79b3c1fd6365b5f310a3e0d7cdc51f8f3c9273e28d473c81a26cb7cd3e57b90c856386b2233cbfc62909275eaf25faea88a531ad2e2ec1ed7e62b895417b8dd9caa9993f0327d49ad80123a8354f55edaf6f8900e12395dc6a34838dcef43967a81e5b7ee46b9cdeef1abe221145a9583b1ac8ba1a22b06980cc1baaa2d4e2a424b263460d11c094728e02fcd688a7c2ff319c0700"/4096, 0x1000}], 0x1, &(0x7f00000015c0)=[@cred={0x20}, @cred={0x20, 0xffff, 0x0, 0x0, r3}, @cred={0x20, 0xffff, 0x0, 0x0, r3, r5}], 0x60, 0xd}, 0x4)
setsockopt(0xffffffffffffffff, 0x80000000, 0x1, &(0x7f00000014c0)="3c7f1f182629821d105c3ebdfd7495a680aaef141e3f84f42faa221226b813e841717bf4267c317c315135684b65b9c0c55ace6f43aa3ce1a64d9578e39d5a73de1e2db072a0a26b7dab92dd298518523f0616b1094867cb339754f558bceedc24aa98266a0a2fe4279a96703f26cb0a3e3785d378246536d8efd1495a6b326b97ce5166f541e9cbfcf3a403f99172d7dd6ecb9a70649dfa6fd5fa55b6e328ce1c28ebecc01f6b37584487f069633cdc3e278639a9ab868e173bc87e0ff8b2b42f2dbd850a0f8eaf", 0xc8)
getsockopt$SO_PEERCRED(0xffffffffffffff9c, 0xffff, 0x1022, &(0x7f0000000000), 0xc)
getsockopt$sock_cred(0xffffffffffffff9c, 0xffff, 0x1022, &(0x7f0000000040), &(0x7f0000000080)=0xc)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000240)='lfs\x00', &(0x7f0000000080)='./file0\x00', 0x0, 0x0, 0x0)


socketpair(0x10, 0x2, 0x0, &(0x7f0000000400))


mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x802)
openat(0xffffffffffffff9c, &(0x7f00000000c0)='./file0\x00', 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
fcntl$setstatus(r0, 0x4, 0x0)


setreuid(0x0, 0xee01)
r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x16, &(0x7f0000000040), 0x4)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f0000000180)="fceab6cf3cba10d395465932249062df6688e8ce1a70561d7adda969f009b638f6c98a5316c30765e079504ef07bd41034ca66b745bd79beec2262a5118b0d9a27323d934bc77b4cbf2728f5e1a2e2bd688d47c97d542378308007", 0x5b)
socket(0x2, 0x2, 0x2)
setsockopt$inet_opts(r0, 0x0, 0x0, &(0x7f0000000000)="aa6b8f295f8d697e21d2d8b43e12bce9a502e961087d216d9715a4c708c9bbb2da5e24b55867a4", 0x27)
r1 = socket$inet(0x2, 0x4000, 0x49)
setsockopt(r1, 0x0, 0x4, &(0x7f0000000040)="01000000", 0x4)
msgget$private(0x0, 0x193)
r2 = msgget$private(0x0, 0x2)
msgsnd(r2, &(0x7f0000000500)={0x3, "c5e94674dbd3604cde5c3557da6461341b1f80b87c9e9c0a41a1995045af2238befbac102b3895e0140869a1845fe194ac26f999f83cbdb241ab396b30c0d5d524ba717d8e5c5e2f873559783e450b4d87abe348cb20ebf83ab0ce789d687b03d617130b6a"}, 0x6d, 0x0)
msgrcv(r2, &(0x7f0000000580)={0x0, ""/18}, 0xffffffffffffff98, 0x1, 0x3000)
msgsnd(r2, &(0x7f0000000200)=ANY=[@ANYBLOB="0300000000040000db4436958b193c67b6ce0c093bb0bbf3b3245230033f58052ebd418aa658361916fd47a14a3a74259ca57335615dd09efc3ddf520906bf778f89cf9aa9da5328755e3b05d90ba5ba0c00faff96c620f0866300000000"], 0xb, 0x800)
msgctl$IPC_STAT(r2, 0x2, &(0x7f0000000680)=""/9)
msgrcv(r2, &(0x7f0000000000)=ANY=[@ANYBLOB="00000000000000000000000000000000000000000000000800000000000000000000000000000000000000000000000021000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000600"/168], 0xa8, 0x2, 0x1000)
msgsnd(r2, &(0x7f0000000140)=ANY=[@ANYBLOB="0100000000000000bed026e4a61a5a31474bcb3caaae961351edc3de9f0f51f11ee4b78001b2755bbc86636ce1323c266151b7cedff6ab0d4575ceb3868bcb851e5bb5bd9870720be6fdb977fe6ab5d8a3574d7e584505de9e3b5292f2e5bb3588b2c1930f164befb0e8bfdba2df86bc557b60b6a18873a8ff91636435a38f1b5f51f8a565f36cf892768f23258eb980351c5078d1202e08dcf5"], 0x97, 0x0)
msgrcv(r2, &(0x7f00000002c0)=ANY=[@ANYBLOB="00000000000000000000000000004adc000000000000000004000000000000000000000000000000000000000000000000000000000000000000005db3d275bdca7b70f1a856cb358995279cc6b2ef38d9d705ea1145095bf2852a7597f26e5233fcf0706172af4a1826"], 0x3b, 0x0, 0x1000)


mlock(&(0x7f0000ffc000/0x4000)=nil, 0x4000)
munlockall()


posix_spawn(0x0, 0x0, 0x0, 0x0, 0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
openat(0xffffffffffffffff, 0x0, 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80017472, &(0x7f0000000080))


compat_50_mknod(&(0x7f0000000500)='./file0\x00', 0x0, 0x0)
pipe2(0x0, 0x0)
compat_43_oaccept(0xffffffffffffffff, 0x0, 0x0)
pipe2(0x0, 0x0)
socketpair$unix(0x1, 0x0, 0x0, 0x0)
compat_50_mknod(&(0x7f0000000580)='./file0\x00', 0x2000, 0x101)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r0, 0x8010570e, &(0x7f0000000080)=0x1000000)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104304, &(0x7f0000000100))


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='nfs\x00', &(0x7f00000005c0)='./file0\x00', 0x0, &(0x7f00000000c0), 0x0)


mkdir(&(0x7f00000011c0)='./file0\x00', 0x0)
__mount50(&(0x7f00000000c0)='ntfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180), 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x205e, 0x200)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = dup3(r0, r0, 0x0)
ioctl$WSDISPLAYIO_DOBLIT(r1, 0xc0245769, &(0x7f0000000040))


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
open$dir(&(0x7f0000000140)='./file0\x00', 0x210098, 0x0)


__fhstat50(&(0x7f00000000c0)="5ef8b61fa22dfecf4bcfa8d0868570dc674f0b6ecf0ce1dd61fd3a99", 0x1c, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r3 = socket(0x2, 0x3, 0x0)
shutdown(r3, 0x1)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
__utimes50(&(0x7f0000000000)='.\x00', 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_50___fstat30(r0, &(0x7f0000000100))


r0 = open(&(0x7f00000000c0)='./file0\x00', 0x10341, 0x0)
writev(r0, &(0x7f0000000300)=[{&(0x7f0000000000)="ef", 0x6f593}], 0x1)
writev(r0, &(0x7f0000000040)=[{&(0x7f00000031c0)="2939e1897f1b81ceaff6c61f34ba2acd56f346c1f9ac83b95b2d7fa2c231f30d8b3729d4e57191a94adcb6326fb3d39fbf56df75d9295c4d228b41d7133ad84d91fa33c2e7dd56af64d543f9c1eb50f52dbe4e948da539be0fe181cd2a85923222214ff3ef0ef35a8cb4ff7faa4863610161009d80e21ec17f826fc5afc2c7b5991f36c50f2ae4e310b7e115afbfeb03f7ddf990e6c674141a588236ef8a1b2492ea836f2e9c62ec3a1026e80dfbcf211a3f55e717ea12b33f3f6f803eb84515b083470bb7d5814cb4f20711f69e9dfd7d482a1f1d7c0f85a42789957c067b9bf94f8eab825bd36cc45d6872ebb5354e90f77e230971ff851c5e412ea279d3a844de465b78614b62b1248a36978446600061e80c8b15aa4bb4892d5a268e3a928af1a0f510cc37f9bdfd888b559296fdff616aa9e1ba71f29db121dd3cbdd92bad93b16874df688147a0f988c4aaac4bc0d3bc", 0x153}, {&(0x7f0000000500)="25bf07e15526432593072af4ca1abb356c68691bb10ee507420b686533eb6fb0ff04caf6e3d05a480e2041c8229d9fd93ddac26d49fca6eb67cd70eb39e6ccb3d693c6aff914561792877095ae7b22e42dbe832967393e87a64f76052607877d47865c58ee5544c27d3282de85510f47c52d1a10224585046816b1dc06c5cbc90b15cc78e287a966937aae5e0ecdf9e283924a60a566dd62fdf1bfa5a309e9fb8472561fc37eea5e09a171aea2d5704b883f70754cd9a095a9f3cdf3140bce790c6eff0448cffef6921421cdf946e9774ba7cb6b214246a04dc4e1ac08bac7a40440e1df1e1e92648de00bbe54d605274202930da780dddbadfd00814467e8c13a152fb80e9b50f4523cf21123427738cfb5d74a74a361bdc1167799261248de205d7c4fd733b32e6c25d63c279f172d527581e05fc5e61e7dd105aff98de22833a9ea725a04c274d01d5634153738d175474223890a8d41d34e7b3b9124950474cf13296cab9040a17016f4f0dad453a0ee479a18cea90ea4f72fc52bbea7fe716e8ccb8dfb25d70d4802b7067d3b95bf93001f240c3af87b3f29683e13540533ce5db6cea2fa35b952c07f37da693df21469a2d2f763c1bda0c7b33d0c72b4f106589c8336daf77d81ddf9949a45fa9b811e103ccef838d71c933c4fc8f5df3d6bdc8695ee9839e60ebe8d30c116f524a60312d32ca9ca8d62c57c4ffd20e1de757247e0474033ba22d629dd9289740312296a905b2d4ba71424d067d3a64d7cc976c39a6ad11f0b56a31cd3e521295fed8802e20a56d47d6015e42f822bcb1aa21fc944c0c6481e1b51e7cebb0d6598f859ad3a109ee1aa952c8055ab345e907c29974acd2f2148fd4345554c5a026ddeeac43751b3d24b09d549df608bd4586ae52b6a7216b72fe6c05162ca98e5d934dcd6c734e68ca57d343b9dd33011298543fa337b24cd7fecd4d7e361f781c3b7f8c5f00d29a0843058e687be57b0c44e6c57f816b9e1f4407f4c044c5437e10c177be2093a420e2e5ee4be6064500f67fa0197e5358bb2857aa10949ab797287cb82ec7251952aa6b3619719bd9b687ac6c691bd8ab36bff106752c048a9214eeabe2081dce9268815ea35ac0c96e3031771398e51c05fc8e11e16504af2f548349705e00a1fe4ed02abd79a89266a40cda620746ee4b5891e69ba35aa6dceaeafc64a219b73451e9d964fb67d487bc8b168488b04f01e9da750c3cde4031368c79e3bab8af137bf976a589e2b7a5fea2dc2f8eb801ff82245fde8f78ddd49d94b44f1509194893595badf45f3cc8fc3a736da8aa8b7aafcb24972d5a4a5596ba9cd04c1bcf81cfb2866fc79656c50d7e06571804ecb119b2448c15c12f79f0ce88525a1245ed82405aa0bac4ac5a66ed7a47607e1488a10e56461f9f623e40f861f0ed2ddb1cbdbeeb9b45e4edf47cf3e9a24e6a80ae8eb26ea3b0057afb0d11d98bef47f485333b24e74c5753aaf7aeb6ef5fa79596b0481aa857aa6a7180113533f32c729dcf30530e60c30875dd2cec9869e1af12da27b326dee90adcd6d53a1d3ca3ffb477b066e67db1788e340f5900d8a5556961b487b29d92b805aa53241c375d7ac05426d0e236a1a61cc8a2b61080dcb9647dbdc6a4f7ac88b68e8ec0fcc4ec0fe800d06758014c2a87f69a38f04b799b0afdb1c6476edab8cc5e870e147786c524dd77d08f5a63435218303f39a34e204f65f417eb2567bd24a3b9b17b7245399d4e6e404a84c306a62843853f83c59544499756775a49124fa82449fd95d9486c6bf4c8338910a20a9270aba9a339f5d9e9fb3d21aacdf129640952ea84e862c3a40d2aea992b11bfe1d2815ae77d57488d280b79ac8fd5746c6f6cd78303c8787c9e83f0be9cbb50f1203ab3875bd79af57940e799285a20f2954ba3cd7344598e103124b2a89f8289333e45f5d7daafa57e2582faf8438d6ceff1612ea747bdeac067e27356f0e8371166bd015353674e661845f6e5c10a901223142132caf834986b305d8a132cc368e0d843a3375879e76c1aa2a8291562f30eeb0a02bdecf4c716c1bf33526547651b96d4b698af0f42840d5d9aa479de9213f9f24251ac48eca991cded1dbbc78fe940237444bbc19e4cdfb362a7a44adc929a489d0e289da5355ad697120e7a1b91f2981153e827808be59dd37648c52f50e7c562bbb58c606af4c7f1830e976a506aef61997c85f89930abb8a5128e6a24db7d3d6ae988d7d8b63e58ea3c51e6b6dd45978c7986d2e4a39010cb0c59631127e152689559d10496dc871cd724dc379a1e4e21d30becc3f12a2d93ac9462cf1d8dfa5d1deada6571ea4ea33b3c13cf5118b43df805f568bf9cedd871097c5569d973e12297a17489190f5bae278caa8d05d045952fe7cf4a195866e9f1fd762d7c3367af08d0ddd9cc1b265d24cc03e7805f6c2fa17c79b45467490cb86b8142f44f76ecd5d034baaf03a77d1c94a2a4c6bbb6cf5a0f91914e4693ac3a987cae3c336c458c28e64e1bedeaae4e83ccee13d3980bea53ec94258f536095882019896d462160080fd648d1259e790468f435f5b211b9944b806c852636c858e5c2b0190ee7eac6332e03b26edd5603a9b8a11271a470ef5f230f50988260abd44ca68df3e86e4f066dc4422b5d8cf51cb451d73c42809702bf54cda070d077b6cc727111c5e026c75d2c4bed857f15cda69ae532ea45e6441201bfb31faa98fb4254e13cba8699f6174c583ed991443fdcb5b11ed69be8991da173304e33633078005a29c3b5510c71e2b8daf02d30437bf556e31d2d3e63c8d21fa8933d163d228ed81d46e0096efd1b454f802e2f1892a647b6c552fe262b90700e478f4fb8d3d791a3eb67c54fef2b517367052b60c051638bd63cf84388f635035d914d6ed6e0d1b09cc301259bd787f49ad5b11f04c0221c4563e4056d60fd4b04208cfe6b4687920afe5336197d8a40edb6a1990946f4d4c07637a5f690ab66a9a9f89de245af4b0f59b64a25ca116f12ae02ae7c5dfa762b78ea3494413a8ae08cddecd2270d9cb8a2fb8bcab0e7dc37db20e2a62e1bfd97ce79677315c15e6bffa3f14ad405cecbc21eac826afbec14f6516630e9e15ac38d7b3d86381754984cb26c113f1e4678b7a08f6594be4a552a7afd4eb6222d25b0795236af86557f52ec5dc1665264cdf782b04e07854ad4a70c6745e6ccc0234c58c8fbdae3c658fc7388f216877a5e66d7773f3afc1e35d429ccbea42d36c3d3370f5947924b65648b48ed03c90284b70ad", 0x91b}], 0x2)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104302, &(0x7f0000000100)=0x640000)


r0 = getppid()
setpriority(0x1, r0, 0x0)


r0 = socket(0x2, 0x1, 0x0)
r1 = socket(0x2, 0x1, 0x0)
poll(&(0x7f0000000100)=[{r0, 0x2e56c061b4eb0e7}], 0x1, 0x0)
dup2(r1, r0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000fff000/0x1000)=nil, 0x1000, 0x6, 0x1010, 0xffffffffffffffff, 0x0, 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x8004427d, &(0x7f0000000040))


r0 = socket$inet6(0x18, 0x3, 0x0)
connect$inet6(r0, &(0x7f00000000c0)={0x18, 0x2}, 0x1c)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
connect$inet6(r0, &(0x7f0000000000)={0x18, 0x1}, 0x1c)


r0 = socket$inet(0x2, 0x1, 0x0)
getsockopt$inet_opts(r0, 0x6, 0x7, 0x0, &(0x7f0000000100))


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x100010a, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIONBIO(r0, 0x80046476, &(0x7f0000000000))


pipe2(&(0x7f0000000240)={<r0=>0xffffffffffffffff}, 0x0)
ioctl$FIOSETOWN(r0, 0x8004667c, &(0x7f00000000c0)=0x80000000)


preadv(0xffffffffffffffff, &(0x7f0000000440)=[{&(0x7f0000000080)=""/120, 0x78}], 0x1, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
pwritev(r0, &(0x7f00000003c0), 0x273, 0x0)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x2, 0x11, r0, 0x0, 0x0)
__fstat50(r0, &(0x7f0000000140))


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
setgid(0x0)


mknodat(0xffffffffffffff9c, &(0x7f0000000100)='./file0\x00', 0x8000, 0x0)
mkdirat(0xffffffffffffff9c, &(0x7f0000000080)='./file1\x00', 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50__lwp_park(&(0x7f0000000000), 0xffffffffffffffff, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
readlinkat(r0, &(0x7f0000000140)='./file0\x00', 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
ioctl$WSDISPLAYIO_SMSGATTRS(r0, 0x40046678, &(0x7f0000000440))


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x400051c4)
ktrace(&(0x7f0000000040)='./file0\x00', 0x0, 0x0, 0x0)


r0 = getpid()
getsockopt$SO_PEERCRED(0xffffffffffffffff, 0xffff, 0x1022, &(0x7f0000000000)={0x0, <r1=>0x0, <r2=>0x0}, 0xc)
r3 = getpgrp()
r4 = shmget(0x3, 0x3000, 0x632, &(0x7f0000ffb000/0x3000)=nil)
r5 = getgid()
r6 = getpid()
r7 = getppid()
getpgid(r7)
shmctl$IPC_SET(r4, 0x1, &(0x7f0000000200)={{0x0, 0xffffffffffffffff, r2, r1, r5, 0x192, 0x81}, 0xffffffff, 0x0, r6, r7, 0x80, 0xffffffff, 0xaf})
ktrace(&(0x7f00000001c0)='./file0\x00', 0x0, 0x40000000, r3)
r8 = shmget$private(0x0, 0x1000, 0x80, &(0x7f0000ffd000/0x1000)=nil)
getsockopt$sock_cred(0xffffffffffffff9c, 0xffff, 0x1022, &(0x7f00000002c0)={<r9=>0x0}, &(0x7f0000000300)=0xc)
r10 = getpid()
shmctl$IPC_SET(r8, 0x1, &(0x7f0000000340)={{0x3f1, r1, r5, r1, r5, 0x128, 0x5}, 0x559b, 0x9, r9, r10, 0x1, 0x1f})
getsockopt$sock_cred(0xffffffffffffff9c, 0xffff, 0x1022, &(0x7f0000000040)={0x0, <r11=>0x0}, &(0x7f0000000080)=0xc)
r12 = getgid()
chown(&(0x7f0000000280)='./file0\x00', r1, r12)
getgroups(0x5, &(0x7f00000000c0)=[0x0, 0x0, 0xffffffffffffffff, 0x0, <r13=>0xffffffffffffffff])
getsockopt$SO_PEERCRED(0xffffffffffffff9c, 0xffff, 0x1022, &(0x7f0000000100)={<r14=>0x0}, 0xc)
shmctl$IPC_SET(0x0, 0x1, &(0x7f0000000140)={{0x0, r1, 0x0, r11, r13, 0x9d, 0x9}, 0x2, 0x1ff, r0, r14, 0x4, 0x79, 0x1ff})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, 0xffffffffffffffff, 0x0, 0x0)
pipe2(&(0x7f0000000000)={0xffffffffffffffff, <r2=>0xffffffffffffffff}, 0x0)
dup2(r1, r2)


pipe2(&(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
write(r0, &(0x7f0000000200)='V', 0xa371f52a756bc3fa)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
__fstat50(r0, 0x0)
__fstat50(r0, &(0x7f0000000140))


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x29, 0x1c, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000100)={<r0=>0xffffffffffffffff})
shutdown(r0, 0x5)


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
__mount50(0x0, &(0x7f00000000c0)='./file0\x00', 0x140ccddf9a038de1, 0x0, 0x0)


open$dir(&(0x7f0000000080)='./file1\x00', 0x0, 0x0)
r0 = socket$inet6(0x18, 0x30000003, 0x0)
sendto$inet6(r0, &(0x7f0000000080)=')', 0x358, 0x0, &(0x7f0000000000)={0x18, 0x0, 0x0, 0x20080fe}, 0x1c)


compat_43_osetrlimit(0x9, &(0x7f0000000000))
compat_30_socket(0x22, 0x3, 0x0)


symlinkat(&(0x7f0000000000)='./file1\x00', 0xffffffffffffff9c, &(0x7f0000000080)='./file1\x00')
open$dir(&(0x7f0000000040)='./file1\x00', 0x0, 0x0)
open$dir(&(0x7f00000000c0)='./file1/file0\x00', 0x0, 0x0)


pipe(&(0x7f0000000240)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x529478f58bc034ca, r1)
compat_43_osethostname(0x0, 0x0)


__fhopen40(0x0, 0x0, 0x0)
__getitimer50(0x0, 0x0)
pipe(0x0)
utimensat(0xffffffffffffffff, 0x0, 0x0, 0x0)
posix_spawn(0x0, &(0x7f00000015c0)=':\x00', 0x0, 0x0, 0x0, 0x0)


setuid(0xee01)
r0 = shmget$private(0x0, 0x3000, 0x0, &(0x7f0000ffd000/0x3000)=nil)
shmat(r0, &(0x7f0000ffd000/0x3000)=nil, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
readv(r0, &(0x7f00000002c0)=[{0x0}], 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
posix_spawn(0x0, &(0x7f0000000200)='@F2*-:-\x00', 0x0, 0x0, &(0x7f0000000180), 0x0)


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x20, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}, {{0x9, 0x5, 0x81, 0x3, 0x400}}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{{0x9, 0x5, 0x82, 0x2, 0x200}}, {{0x9, 0x5, 0x3, 0x2, 0x200}}}}}}}]}}, &(0x7f0000000280)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80017472, &(0x7f0000000080))


r0 = compat_30_socket(0x1f, 0x3, 0x0)
ioctl$WSDISPLAYIO_LDFONT(r0, 0xc0386207, &(0x7f00000000c0)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


r0 = openat(0xffffffffffffffff, &(0x7f0000000040)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = _lwp_self()
_lwp_exit()
_lwp_wait(r1, 0x0)


utimensat(0xffffffffffffffff, 0x0, &(0x7f0000000040)={{}, {0x0, 0x7}}, 0x0)
writev(0xffffffffffffffff, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r0 = open$dir(&(0x7f0000001240)='./file0\x00', 0x40000400000002c2, 0x0)
r1 = dup2(r0, r0)
writev(r1, &(0x7f0000000100), 0x1000000000000161)
writev(r1, &(0x7f0000000040), 0x1000000000000059)
r2 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r3 = dup2(r2, r2)
pread(r3, &(0x7f00000000c0)="bd", 0xffffff78, 0xa83)


shmget(0x2, 0x4000, 0x0, &(0x7f0000ffb000/0x4000)=nil)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x310, 0x0, 0x0, 0x0, 0x8, 0x46d, 0xb018, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, &(0x7f00000004c0)={0x0, 0x0, 0x19, &(0x7f0000000080)={0x5, 0xf, 0x19, 0x1, [@ss_container_id={0x14, 0x10, 0x4, 0x0, "9fad53376059db0d1e8dba1b0c142747"}]}})


open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
__getdents30(0xffffffffffffffff, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
ktrace(&(0x7f0000001500)='./file0\x00', 0x0, 0x2, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202d77f7f000001"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
pipe(0x0)
r3 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r3, 0x0, 0x0)
preadv(r3, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setsockopt$sock_linger(r2, 0xffff, 0x80, &(0x7f0000000080)={0x0, 0x7}, 0x8)
close(r2)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f00000014c0)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
acct(0x0)


r0 = socket(0x18, 0x2, 0x0)
setsockopt(r0, 0x1000000000029, 0x1b, &(0x7f0000000000)="06000000", 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
linkat(0xffffffffffffff9c, 0x0, 0xffffffffffffffff, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r2 = dup(r0)
ioctl$WSDISPLAYIO_DELSCREEN(r2, 0x20007288, 0x0)


mknod(&(0x7f0000000640)='./file0\x00', 0x0, 0xffffffffffffffff)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xb000)
open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000240)='./file0\x00', 0x0)


compat_43_osendmsg(0xffffffffffffffff, &(0x7f0000000080)="c2", 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
ioctl$FIOASYNC(r1, 0x8004667d, &(0x7f0000000240)=0xffffffe6)


r0 = socket$unix(0x1, 0x1, 0x0)
r1 = socket$unix(0x1, 0x1, 0x0)
pipe(&(0x7f0000000200)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
bind$unix(r1, &(0x7f0000003000)=@file={0x1, '\xe9\x1fq\x89Y\x1e\x923aK\x00'}, 0x6e)
listen(r1, 0x0)
connect$unix(r0, &(0x7f0000000280)=@file={0x1, '\xe9\x1fq\x89Y\x1e\x923aK\x00'}, 0x6e)
r4 = dup3(r1, r0, 0x0)
accept$inet6(r4, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = compat_30_socket(0x1f, 0x3, 0x0)
shutdown(r1, 0x1)


getsockname$inet6(0xffffffffffffff9c, &(0x7f0000000000), &(0x7f0000000040)=0xc)
mincore(&(0x7f0000ffd000/0x3000)=nil, 0x3000, &(0x7f00000000c0)=""/80)
__clone(0x0, &(0x7f0000000240)="1cdfc031a07e9c6e878cf24e419c2252c2da6ce1842191f4a07fe6ecaf9b7a8c17a4a5fda3")
fstatat(0xffffffffffffffff, &(0x7f0000000280)='./file0\x00', &(0x7f00000002c0), 0x500)
__clone(0x400, &(0x7f0000000380)="f34fe90752794a7cb3dfef88ac9c29c95d122e3462b5")
getgid()
__fhstat50(&(0x7f00000008c0)="4dcf375f9f0ceab255c90f423ffdbc875682053941afee5164df0fa268ba", 0x1e, &(0x7f0000000900))


r0 = socket(0x10, 0x2, 0x0)
getsockname$inet6(r0, 0x0, &(0x7f0000000140))


compat_60__lwp_park(&(0x7f0000000040), 0xffffffffffffffff, 0x0, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x31, 0x0, 0x0)
getsockopt(r0, 0x29, 0x2e, 0x0, 0x0)


accept(0xffffffffffffff9c, &(0x7f0000000000), &(0x7f0000000040)=0xe)
r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(r0, 0x6, 0x5, &(0x7f0000000040), 0x4)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000001c0)={<r0=>0xffffffffffffffff})
getsockopt$sock_cred(r0, 0xffff, 0x11, 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x0, 0x10, 0xffffffffffffffff, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000001840)=[{0x0}], 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x80000, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x2, 0x10, r0, 0x0, 0x0)
r1 = socket(0x400000000018, 0x3, 0x0)
ioctl$FIOGETBMAP(r1, 0xc0106978, &(0x7f0000000040))


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r0, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0x400004000011830a, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0, 0xff01}], 0x1, 0x0)
write(r1, &(0x7f0000000780)="089267d3ff4f0b87969f", 0x100ad)


pipe(&(0x7f0000000180)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x0, 0x10, 0xffffffffffffffff, 0x0, 0x0)
ioctl$FIONWRITE(r0, 0x40046679, &(0x7f0000000080))


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x11, 0x0, 0x0, 0x0)


syz_usb_connect$uac1(0x0, 0x71, &(0x7f0000000180)={{0x12, 0x1, 0x201, 0x0, 0x0, 0x0, 0x10, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, &(0x7f0000000600)={0x0, 0x0, 0x5, &(0x7f00000002c0)={0x5, 0xf, 0x5}})


compat_43_ocreat(&(0x7f0000000600)='./file0\x00', 0x0)
__getfh30(&(0x7f0000000700)='./file0\x00', 0x0, &(0x7f00000007c0))


getsockopt$SO_PEERCRED(0xffffffffffffffff, 0xffff, 0x11, &(0x7f0000000000), 0xfffffffffffffd35)


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
open(&(0x7f0000001740)='./file0\x00', 0x1, 0x0)


__clone(0x0, &(0x7f0000000100))
fork()
setpgid(0x0, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x402c7413, 0x0)


r0 = compat_30_socket(0x1d, 0x3, 0x0)
getsockname$unix(r0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ftruncate(0xffffffffffffffff, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000002c0)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000001940)={0x0, 0x0, &(0x7f00000017c0)=[{&(0x7f0000000340)="dc9ff47b8b0a0b682e907b9897dfb207d3893143d90446919ef6a12308fd6b39d6ae1299847ca06e77579b58f4bd5b49b88fc625a4500766c2e8d16df50d86527582aa6053b107ff6abad39ee27b0bc2d0898b20a4ddf1416c91c490bc1bb219097fcc821b846b40a74d9c18a79134dfd6c205936335b1b0cee49afa09d581f66561f9aa8394d62f4b7ce65613d55581e716439cc3284c9d0ecd6a00e71dacd14f16e1cc875fd1534446f3e0c2f6ba0842c63951d2a244d5a240348728d6b22968ebe03c0abfc79a1e506101da3ef0bad342d9fc6d597c27628e9060eab4694ab98a3b95cc335c42e1a44a8be885a37d29b081b783262c02b8eddd5ccd4e08", 0xff}, {&(0x7f0000000440)="dc4653f9d13c5ba1e17407105b88d59212ef28e557e1b0816028053bb3209c98ff688065b2560c857172c230712e4b89df6c9e31ea1685d952d22a79140dfe544e46cee26dca1706d4188a82332a07058c85533dd6f62b7b7808917819ea1e092ee8c511773ce2441943a8f50d90f505c5f115c308e10aa80a014f37979acbafeb352ac477fc1bfad93d0fca4c6111df12028a33c60fc4e9ab6082d84ed92e666221e1ac4763ebb86872cd451cb16559b96e3ff21e5bf9425e714870dc526bbb7ce9c2", 0xc3}, {&(0x7f0000000540)="cca2deb067d7020e3517276b3d51ca2566cf3ecfaac48670ec1f4328f63042820c6b4dbfa81ad15311d770bffa62205455eb2d0e3ee3f4064145adf784b42511a4973296b84129b04cc001bf172eb67ece65eeace703b87bc99dfc776e222b287bbeac428d388c3954583ecb5724d302e8d3d9a5f58cb173400bdf267575dbd190a528912a1b4aa34f59ce8bea223a367d9313a3d8fc820fcafb8018", 0x9c}, {&(0x7f0000000600)="e7447fec744f4d08ed0ca215328468", 0xf}, {&(0x7f0000000640)="2fe43f089e496eb6327f39a501febfab814f29", 0x13}, {&(0x7f0000000680)="85312a3ae5ed67cf4a961cc1e3bc4066b3913230ca97878edc8e9b1f62ba8607ffd3edb3f4b049248e805f253031dbd4f3ab33c5a9965b06027d4fb214714fdf1f73698585669de785e9cd67702ec2ab00148015a24d2552a4ff466e8630bb065142ce514f776c35dc55ec2069f3a4f71eccb6649a48cbf004f8ec36cd7b978099c889c751d91f5b6d6b6b929b2dfd", 0x8f}, {&(0x7f0000000740)="c9d5ce2f67c8ebd7be8cbea29969d1eb4a8eb70b1e96f6f68346c390ed0affb466b5a6335fc22b980ada24c1c2634ca17794be6d01f5e0338f8a662c9f2284bc5e96d75e081bb3348557a2dceaa9fde8405686e7d0194b9a641c8573b8cd6da062af9ba3f62a52b2427a6d1f743d7b58cdb8598b782b78d3b97fa378d8a372f2fc4240b906f2517ec024c7704293a389885eac429f1906f578b92991a34c18760fd0f80ef3f2199c11552af523f198fb8c3e6eb398aa3e9a171a6200a679a4967085805fa3adb2d14bdf4783314ddec62444ff4eaea72bf2d974d4e9519db398a5b90ab7ea397bf906517a2362e3ea3c28afa7f9b91460e14b57f149645939516a556a308986336f7aa1aac3983b5d4a0430a40dff118c7248aaab01cb909a146d57f905cac7b0c74a951d344fe86d1f08bba0d4419ea61f9f4c20d2b8461771b31c2bf103b93709e3db4a67ecb6513b3b2dfcc60af01e6caf4ef59e5459cc2911bc1532295d52d7d3b975c33e10f141a24b837549ecb7042682854d45eab2572a5d5d821ca346c27608d87203c3c6ad2979d6545cdeeed38e8a204788d3f24a8e0f05c052865d86e2ec0976999058bb8c4254603035054980505dc93f91a7cb7bc5e46fb25756bfe941ac37902babcfd05572e82eee43cf932f9c87e9e5df11f7eedb86dc8a35c6123c2f5bf05170a04b9a6953db78b9da1f378355f7558e1d02d14502ffbc91af9460c9402984cda3d7efbc07180e7001d6ead06d6f994fdd60a67dca5fa311d3e1cfdb66081cd36865bdce653aa71c9de8b389c12b6eb42484a3036cc4469792566dc7b752520fa13905f54a4d7025a39b7f0848736c941b100a5c638ff6c7dcf86df08f08254416b7c8c833c92054d51e974963ce3ec458f6497ee3377484c2df4a4448b0918c2aebe8ca5e78fcd7d54cdd357895887f244301d7df6f1013732b30670bf086bb33a015f6d5da9d20c2a36760c2430630ba6a28e54b8b82e848c3ee6fd207baea7483d4515c9b2a2b0f3f8db984da021ea711110f78db806d04f45a9fde1b8d6f676fa3307a9fe380ae803bcdd2a709c8807143a95d46f515ca2f4a2da74690501a5c7085ddad3e56a9c9f66070bacd1b3d28574f60b5cae0bbe11e6e01ba4e6073215ff42b718a96ea48b5b82c04bbb578226f74b99fb5a023ee62d6eba415abddd32b9dbd802427e2ea47b72c118ca479f6a4357fd180df9f444543a183d1b6984bb5175b741c681087a36fbc1fa239b26eb0cd92fb7ae89e6b3bcb984b3c5dcd14971dc081de93240fff1dfd8fc8033f4716dace18af06bab19744222ee7c4bb523fc7e2cf8e7cc2c121cebb64bcafcdefb64820332d75f47b182b9ed23548d306ed9e2e3843860eca493cb5bcba1115d2e1566e00e2eda1181800bc47a0af4721e8656b57c954fb8e14a228b20ed8fe7774638200fd6a629a89b1666200c455c2c5a4d9924c3f951b8d136c39ee5ccce040e0383503fd40ea762df19b669cec939b407d4ef89a74ea7da558e79a5f5762fce6d1b6a751a8267711a97e755c01138bc350325890c05b72b8ce403333aeae192e9d7a12d4bb43b488e0b1de2f7b69b3ac4d8e4181b68a14b2b23bd2ae173fc87f77ff86ef49df620a78d9f20b9e6f3d16083a1817d052f4fdf7070060e2711803e2071971e991d7b76f509e693d453d61273f3d53a239badfb88ec302d5c5afa356d8976a7430b2b0d6e0a9d36696b7e911418702938bc47bf9dd978e3e1c85d0d5a2bc0565590e91ec4649e5fdfe9c", 0x4f2}], 0x7}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
posix_spawn(0x0, &(0x7f0000000200)='\x18\x00', 0x0, 0x0, &(0x7f0000000380), 0x0)


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x6131, 0x4000e03)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
ioctl$FIOSEEKDATA(r0, 0xc0184603, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
compat_60__lwp_park(&(0x7f0000000000), 0x0, 0xffffffffffffffff, 0x0)


____semctl50$SETALL(0x0, 0x0, 0x9, &(0x7f00000005c0)=@buf=0x0)
pipe2(&(0x7f0000001500)={<r0=>0xffffffffffffffff}, 0x0)
__fstat50(r0, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r1 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r1, 0xc020447f, &(0x7f0000000000))


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
dup2(r0, r0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
unmount(&(0x7f0000000040)='./file0\x00', 0x0)
compat_50_mknod(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
r1 = semget$private(0x0, 0x4000000009, 0x0)
semop(r1, &(0x7f0000000000), 0x22)
semop(r1, &(0x7f0000000240)=[{0x4, 0xffff}, {0x4, 0x18, 0x800}, {0x2, 0xffd6, 0x1800}, {0x1, 0x37, 0x1800}, {0x2, 0x5}, {0x1, 0x0, 0x1000}], 0x6)
semctl$GETZCNT(r1, 0x0, 0x7, &(0x7f0000000540)=""/233)
semop(r1, &(0x7f0000000000)=[{0x3, 0x7fff, 0x1400}, {0x3, 0x1, 0x800}, {0x1, 0x23, 0x1800}, {0x2, 0x22, 0x1800}], 0x4)
getsockopt$SO_PEERCRED(0xffffffffffffffff, 0xffff, 0x1022, &(0x7f0000000000)={0x0, 0x0, <r2=>0x0}, 0xc)
semctl$IPC_SET(r1, 0x0, 0x1, &(0x7f00000003c0)={{0x105, 0x0, r2, 0x0, 0x0, 0x9d, 0x32e}, 0x18040008000, 0x9, 0x3})


mknod(&(0x7f0000000000)='./file0\x00', 0x1000, 0x0)
acct(0x0)
unlink(&(0x7f0000000200)='./file0\x00')
open$dir(&(0x7f00000000c0)='./file0\x00', 0x20380, 0x0)
execve(0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50___fstat30(0xffffffffffffffff, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2001, 0xbc64)
r0 = open(&(0x7f0000001c40)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$FIOASYNC(r0, 0x8004667d, &(0x7f0000000100))


sync()
getsockopt$SO_PEERCRED(0xffffffffffffff9c, 0xffff, 0x1022, 0x0, 0x0)
setgroups(0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000040)='ptyfs\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000004c0))
open$dir(&(0x7f0000000000)='./file0\x00', 0x280, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2080002002, 0x40004200000028ac)
r0 = open(&(0x7f0000000400)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x40047459, &(0x7f0000000040))


pipe(&(0x7f0000000080)={<r0=>0xffffffffffffffff})
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f00000000c0)=0x4)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
__posix_fadvise50(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_getrusage(0xffffffffffffffff, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f00000011c0)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
open$dir(&(0x7f0000001140)='./file0/file0\x00', 0x200, 0x0)
unmount(&(0x7f0000000040)='./file0\x00', 0x0)


getpgrp()


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
__mount50(&(0x7f0000000080)='kernfs\x00', &(0x7f0000000100)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_30_getdents(r0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
compat_50_setitimer(0x0, &(0x7f0000000100), 0x0)


fchmodat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0, 0x0)
chroot(&(0x7f0000000040)='./file0\x00')


open$dir(&(0x7f0000000480)='./file2\x00', 0x200, 0x0)


pipe2(&(0x7f00000016c0)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
write(r1, &(0x7f0000000180)='<', 0x1)
close(r0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000000c0)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
setsockopt$sock_timeval(r1, 0xffff, 0x1006, &(0x7f0000000000)={0x7fff}, 0x10)
recvfrom$unix(r1, &(0x7f0000000140)=""/142, 0x8e, 0x0, 0x0, 0x0)
sendto$unix(r0, &(0x7f0000000100)="b5", 0x1, 0x0, 0x0, 0x0)


r0 = _lwp_self()
_lwp_suspend(r0)
_lwp_wakeup(r0)
_lwp_suspend(r0)
_lwp_exit()


syz_emit_ethernet(0x32, &(0x7f0000000180))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0xc0106373, &(0x7f00000000c0)=0x3)


syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f00000001c0)={{0x12, 0x1, 0x300, 0x2, 0x0, 0x0, 0x8, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}}}]}}]}}, &(0x7f0000000300)={0x0, 0x0, 0x5, &(0x7f0000000240)={0x5, 0xf, 0x5}})


mkdir(&(0x7f0000000000)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', 0x0)
__mount50(&(0x7f0000000100)='union\x00', &(0x7f0000000140)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000180)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
ioctl$FIONREAD(r0, 0x4004667f, &(0x7f0000000000))


pipe(&(0x7f0000001200)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x44776d4ac65d68f8, r1)
compat_43_ogethostname(&(0x7f0000000080)=""/192, 0xc0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
recvmmsg(r0, 0x0, 0xffffffffffffff9d, 0x0, &(0x7f0000001400)={0x77359400})


lchflags(&(0x7f00000000c0)='./file0\x00', 0x0)


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x200, 0x0)
r1 = fcntl$dupfd(r0, 0x0, r0)
fsync(r1)


mlock(&(0x7f0000ffb000/0x3000)=nil, 0x3000)
mlock(&(0x7f0000de5000/0xd000)=nil, 0xd000)
munmap(&(0x7f0000de4000/0x4000)=nil, 0x4000)
mprotect(&(0x7f0000de5000/0x1000)=nil, 0x1000, 0x0)


minherit(&(0x7f0000ffb000/0x2000)=nil, 0x2000, 0x0)
fork()


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
modctl$MODCTL_UNLOAD(0x1, &(0x7f0000002880))


__clone(0x0, &(0x7f0000000100))
fork()
compat_50_wait4(0xffffffffffffffff, 0x0, 0x4, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_50_utimes(&(0x7f0000000080)='./file0\x00', 0x0)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x70e, 0x0)
ktrace(&(0x7f0000000080)='./file0\x00', 0x4, 0x1436, 0x0)
pwritev(r0, &(0x7f0000000000)=[{0x0}, {&(0x7f0000000500)='>', 0x1}], 0x2, 0x0)


pipe(&(0x7f0000000440)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r3 = socket$inet(0x10, 0x2, 0x0)
getsockname$inet(r3, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
symlink(&(0x7f0000000080)='.\x00', &(0x7f0000000000)='./file0\x00')
chroot(&(0x7f00000001c0)='./file0\x00')
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
execve(&(0x7f0000000040)='./file0/file0\x00', 0x0, 0x0)


pipe2(0x0, 0x800000)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x20007210, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__posix_fadvise50(r0, 0x0, 0x0, 0x4, 0x7)


open(&(0x7f0000000480)='./file0\x00', 0x80000000000206, 0x0)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = open$dir(&(0x7f0000001240)='./file0\x00', 0x0, 0x0)
r3 = dup2(r2, r2)
pread(r3, 0x0, 0x0, 0x0)


r0 = compat_30_socket(0x1f, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000180)={0xffffffffffffffff, <r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
getsockopt(r0, 0x1, 0x2, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2040, 0xb41d)
open(&(0x7f0000000140)='./file0\x00', 0x800, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000001340)='fdesc\x00', &(0x7f0000001380)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = dup2(r0, r0)
fcntl$setstatus(r1, 0x4, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
sendmsg(r0, &(0x7f0000001640)={&(0x7f0000000000), 0xc, &(0x7f00000012c0)=[{0x0}, {0x0}, {0x0}], 0x3, &(0x7f0000001300)=[{0x10}], 0x10}, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0xc1205266, &(0x7f0000000040)={0x3, 0xa})


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x2000747a, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
connect$inet6(r0, &(0x7f00000000c0)={0x18, 0x2}, 0x1c)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
connect$inet6(r0, &(0x7f0000000000)={0x18, 0x1, 0x0, 0xfdffffff}, 0x1c)


mknodat(0xffffffffffffff9c, &(0x7f0000002340)='./file0\x00', 0x8000, 0x0)
compat_50___lstat30(&(0x7f0000002d00)='./file0\x00', &(0x7f00000001c0))


r0 = socket(0x10, 0x2, 0x0)
sendmsg$unix(r0, &(0x7f0000000280)={&(0x7f0000000000)=@abs={0x0, 0x0, 0x2}, 0x8, 0x0, 0x0, &(0x7f0000000200)=[@rights={0x10}], 0x10}, 0x9)


pipe(&(0x7f00000001c0)={<r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580), 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
r2 = getpid()
fktrace(r0, 0x0, 0x4, r2)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000040)={<r3=>0xffffffffffffffff})
listen(r3, 0x0)


syz_usb_connect$uac1(0x0, 0x71, &(0x7f0000000bc0)={{0x12, 0x1, 0x310, 0x0, 0x0, 0x0, 0x8, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, &(0x7f0000000fc0)={0x0, 0x0, 0x19, &(0x7f0000000cc0)={0x5, 0xf, 0x19, 0x1, [@ssp_cap={0x14, 0x10, 0xa, 0x0, 0x2, 0x0, 0x0, 0x0, [0x0, 0x0]}]}})


mlock(&(0x7f0000ff7000/0x4000)=nil, 0x4000)
mlock(&(0x7f0000ff9000/0x3000)=nil, 0x3000)
munlock(&(0x7f0000ff7000/0x4000)=nil, 0x4000)


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000001c0)='./bus\x00', 0x10004, 0x0)
ioctl$FIOSETOWN(r0, 0x80017472, &(0x7f0000000200)=0x70a)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r0, &(0x7f0000000700)=[{0x0}], 0x1)


posix_spawn(0xffffffffffffffff, &(0x7f0000001780)=',.\x00', 0x0, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x2000427e, 0x0)
ioctl$FIOASYNC(r0, 0x40044274, 0x0)


pipe2(&(0x7f0000000780)={<r0=>0xffffffffffffffff}, 0x0)
fcntl$lock(r0, 0x9, 0x0)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x2b, &(0x7f0000000000)="ca640000", 0x4)


r0 = socket(0x2, 0x1, 0x84)
bind(r0, &(0x7f0000000000), 0x10)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
ioctl$FIOASYNC(r1, 0x40046678, &(0x7f0000000200))


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0xf, r0, 0x0, 0x0)


symlink(&(0x7f0000000140)='./file0\x00', &(0x7f0000000580)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00')
open(&(0x7f0000000440)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
seteuid(0x0)


fork()
__getlogin(&(0x7f0000001200)=""/18, 0x12)


r0 = socket$unix(0x1, 0x2, 0x0)
setsockopt$sock_linger(r0, 0xffff, 0x80, &(0x7f0000000000)={0x0, 0xfffffffb}, 0x8)


mknodat(0xffffffffffffff9c, &(0x7f0000000100)='./file0\x00', 0x8000, 0x0)
utimensat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0, 0x0)


socketpair(0x1e, 0x4, 0x0, &(0x7f0000000380))


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x2)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047401, &(0x7f0000000140)={0xb})
open(&(0x7f00000000c0)='./file0\x00', 0x2, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x1203)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = dup(r0)
ioctl$WSDISPLAYIO_SETPARAM(r1, 0xc0205753, &(0x7f0000000100))


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x7, r0, &(0x7f00000001c0), 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
mkdirat(0xffffffffffffff9c, &(0x7f00000000c0)='./file0/file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
fchownat(r0, &(0x7f0000000080)='./file0\x00', 0x0, 0x0, 0x2000)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
swapctl$SWAP_OFF(0x2, &(0x7f0000000000))


r0 = socket(0x400000000018, 0x3, 0x0)
ioctl$FIOGETBMAP(r0, 0x80906979, &(0x7f0000000040))


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(0x0, 0x0, 0x0, &(0x7f00000003c0)="ab", 0x1)
compat_40_mount(&(0x7f0000000340)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
fsync(r2)


r0 = open(&(0x7f00000000c0)='./file0\x00', 0x205, 0x0)
fcntl$lock(r0, 0x9, &(0x7f0000000000)={0x0, 0x0, 0x3, 0x1000300010005})
fcntl$lock(r0, 0x9, &(0x7f0000000000)={0x1, 0x0, 0x0, 0x1000300010009})


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x1, 0x0)
ioctl$WSMOUSEIO_SETREPEAT(r0, 0x8010572b, &(0x7f0000000000)={0x4, 0x4})


socketpair$unix(0x1, 0x5, 0x0, 0xffffffffffffffff)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000180)={<r0=>0xffffffffffffffff})
setsockopt$sock_int(r0, 0xffff, 0x10, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x0)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
read(r0, 0x0, 0x20080)


__select50(0x0, 0x0, &(0x7f0000001d00), 0x0, &(0x7f0000001d80)={0x0, 0x9})


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x40044266, &(0x7f0000000040))


syz_usb_connect$uac1(0x0, 0x9c, &(0x7f0000000540)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x20, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x8a, 0x3, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0, 0x0, {{}, [@selector_unit={0x5, 0x24, 0x5, 0x5}, @feature_unit={0xb, 0x24, 0x6, 0x0, 0x0, 0x2, [0x0, 0x0]}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@format_type_ii_discrete={0xa, 0x24, 0x2, 0x2, 0x0, 0x0, 0x0, '&'}, @format_type_i_discrete={0x8, 0x24, 0x2, 0x1, 0x2, 0x1}]}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {[@format_type_ii_discrete={0x9}]}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7, 0x25, 0x1, 0x0, 0x0, 0x2}}}}}}}]}}, &(0x7f0000000940)={0x0, 0x0, 0x0, 0x0, 0x2, [{0x0, 0x0}, {0x0, 0x0}]})


r0 = compat_30_socket(0x22, 0x3, 0x0)
r1 = fcntl$dupfd(r0, 0x0, r0)
ioctl$WSDISPLAYIO_GMODE(r1, 0x4004574b, 0xfffffffffffffffe)


madvise(&(0x7f0000ffd000/0x2000)=nil, 0x2000, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
recvmmsg(r0, &(0x7f00000014c0), 0x1, 0x102, 0x0)
sendmmsg(r1, &(0x7f0000004880), 0x1, 0x0, 0x0)


open(&(0x7f0000000000)='./file0\x00', 0x200, 0x0)
ktrace(&(0x7f0000000040)='./file0\x00', 0x0, 0xd00, 0xffffffffffffffff)


openat$hdaudio(0xffffffffffffff9c, &(0x7f0000000c40), 0x0, 0x0)
_ksem_init(0x0, &(0x7f0000000c00))
compat_50_select(0x40, &(0x7f00000009c0)={0xffffffffffffffff}, 0x0, 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0xf, r1)
_ksem_wait(0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)
compat_50_wait4(0xffffffffffffffff, 0x0, 0x0, 0x0)
ptrace(0x20, r0, 0x0, 0x100000001)


ptrace(0x0, 0x0, 0x0, 0x0)
r0 = getpid()
ptrace(0x5, r0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIONBIO(r0, 0xc0044266, &(0x7f00000000c0))


r0 = openat(0xffffffffffffff9c, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket$inet(0x2, 0x2, 0x0)
ioctl$FIONSPACE(r1, 0x40047307, &(0x7f0000000100))


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
fktrace(r1, 0x0, 0x0, 0x0)
compat_50_wait4(0xffffffffffffffff, 0x0, 0x20000, &(0x7f0000000140))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
writev(r0, &(0x7f0000000340)=[{0x0}], 0x1)


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000000100)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x40, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}, {{0x9, 0x5, 0x81, 0x3, 0x0, 0x0, 0x0, 0x7}}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{{0x9, 0x5, 0x82, 0x2, 0x3ff}}}}}}}]}}, &(0x7f0000000600)={0x0, 0x0, 0x0, 0x0, 0x2, [{0x2, &(0x7f0000000240)=@string={0x2}}, {0x4, &(0x7f00000002c0)=@lang_id={0x4}}]})


open(&(0x7f00000006c0)='./file0\x00', 0x200, 0x0)
__stat50(&(0x7f00000003c0)='./file0\x00', &(0x7f0000001740))


madvise(&(0x7f0000ffb000/0x2000)=nil, 0x2000, 0x0)
mlock(&(0x7f0000ffc000/0x2000)=nil, 0x2000)


madvise(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0x0)
mprotect(&(0x7f0000ffe000/0x1000)=nil, 0x1000, 0x0)
madvise(&(0x7f0000ffc000/0x4000)=nil, 0x4000, 0x0)
mlock(&(0x7f0000ffc000/0x2000)=nil, 0x2000)


swapctl$SWAP_OFF(0x2, &(0x7f0000001240)="9fb9d6c6b33ebb095b08462014f0adb3ebe27f9e95ccbfc8e7bc76be232dd337c48d4406e31eeb13a162b054afcbba71a1bdb51a394761302156f9a371f3c45a9230fe4da0a425510294541ce40960c9291b515962b4b495717c1e411c6d3e3837328d5cf152284945524759ba4d74b0833ab722e6eb164edf07710b7e389d15ce897c048251803149b94489b0fe82567e0624bffc0795bf161f0c733239d2f5ba2e69ddffe21046ec34244cf9566c2c7a6b070ce874dee8ee3a6e7503a7b804edff4f4bc53c38ab5c5e2918185829db7216bb36012f2d0218c21e2f03867017095703f804c996a2b4b4e4a5e27207f613a1a94d3cf25144540eaf3b307114bac951acb4b96565b012fcc3e253f6441281068dc9697809c1038346a6dd8a6c3ab847cbec9fd074fd06bb81f29f6e2ac70908a264ff98b25e18ac92d4d81846b0d3d955d78fbe90a2dcea86d0ee0ba52ed084ba1fe0db9fec144b5abf9f7fac53271a8eb0cfcd03166f60f81962cff828a7a5f211653c73bd957cb322e30a010c27e8c4b073f52c672540f0d69b615e767a392d5529f5c51363baa451cf4ac9afd9737b8acc673dd69572715cb195d4c80cd483cecf2402188ea4ffc336e3d4fb52bc93ed99920f854a32a1634651b8e5e128281fbd0c7f57e9fadca8b4695909ca37c7dc3d1f32129d71b03092181a8d5a5ed4ebac4e3129d5787db5824842b7e63181a602666126cddfb9eb3c939264d489bc4abbe42dd6c9ce9a8bc1de6463269c6af91bcdccee604a46aee77195ecca21fb9868e86e299b7e28b2482d72820297ac2a0ee0d531dece27de8b3a4031632a8cfe4e6d900ee0f38b3ad8846545e7a74b6e5d9ce4fbf45a6c6eb04d329736a6b9237c8f96ead3e51f1b473607bdb5ecacd52f5050142045d062ee9d7ba2cadd02fef7d5254ebd1fe462ca60bcf0a67887cdf94f46c7a8b8d6895b2f13f3cb2accdd3e80fcae52b3b38ec9efcd0bfb9a307421f76e476ad6feba10aa94573609df79d8b0e1ef8dbc3d1f9ce466876e5e2daa713d3ec8112e848da737bfd810eef6c3e4d2de4832aae5e369640ed5bcc43cd4cebd951e5dfac4770cee75bd22ee3e691ae14f50603c55bd92f597806324d58286f48f8935f3e6b2e4eabad779e2d26c2949cfe5b17f79493558889f43cd1aff62fe8a23de7f240b244f248623abe5e81d5f242a408e0a5850a0f4098310de4664b31da30aa4d6de301c96e64b7570b4e6675ee87e3a0149f81d87eade019358f4f08c9ca554d7be8a71fd11c55850ac6ee2af68c0b21aa97b344d4e7c1fb592bd8777013843f0d36d708e8c75619767a9993b5b130377760807833b0e59db9cab41976b0f4bcd165ec11775b1edab96b194b503c31ef9e1c512b7bb40fbba536f0517062f7e92d981fa5ae1c19bb356a03651a182b67f53d770e798")


mlock(&(0x7f0000ffc000/0x2000)=nil, 0x2000)
mlock(&(0x7f0000ffc000/0x4000)=nil, 0x4000)


r0 = socket$inet6(0x18, 0x3, 0x0)
sendto$inet6(r0, &(0x7f0000000080)=':', 0x358, 0x0, &(0x7f0000000000)={0x18, 0x0}, 0x1c)


_lwp_create(0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x0)
pathconf(&(0x7f0000000040)='./file0\x00', 0x5)


r0 = socket$unix(0x1, 0x2, 0x0)
setsockopt$sock_timeval(r0, 0xffff, 0x1005, &(0x7f0000000040)={0x2000000000}, 0x10)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000080)='.\x00', 0x0, 0x0)
__fstat50(r0, &(0x7f0000000240)={<r1=>0x0})
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r2, 0x0, 0x0)
mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, r1)
r3 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r3, 0x4010647f, &(0x7f0000000000))


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x200000000000c, &(0x7f00000000c0)="eaff115c00000000", 0x8)
setsockopt$inet_opts(r0, 0x0, 0x200000000000c, &(0x7f0000000000)="ea00005c00000000", 0x8)
close(r0)


posix_spawn(0x0, &(0x7f0000000240)='/*+^:@\x00', 0x0, 0x0, 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f00000001c0)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f0000000140)='./file0\x00', 0x0)
renameat(0xffffffffffffff9c, &(0x7f0000000180)='./file0/../file0\x00', r0, &(0x7f0000000100)='./file0\x00')


r0 = socket$unix(0x1, 0x2, 0x0)
connect(r0, 0xfffffffffffffffe, 0x47)


r0 = socket$inet(0x2, 0x3, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x200000000000c, &(0x7f0000000240)="eaff125c00000000", 0x8)


lchmod(0x0, 0x0)


mlock(&(0x7f0000fff000/0x1000)=nil, 0x1000)
mmap(&(0x7f0000fff000/0x1000)=nil, 0x1000, 0x6, 0x1010, 0xffffffffffffffff, 0x0, 0x0)


pipe2(&(0x7f0000000500), 0x0)


pipe2(&(0x7f00000013c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
fktrace(r0, 0x0, 0x800, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
compat_50_futimes(r0, &(0x7f0000000200))


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000140)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000000400)={0x0, 0x0, 0x0, 0x0, &(0x7f00000003c0)=ANY=[@ANYBLOB="0f000000ffff000001"], 0x28}, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
readv(r0, &(0x7f0000001300)=[{0x0}], 0x1)


mkdirat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x0)
execve(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)


pipe2(&(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
mmap(&(0x7f0000ffa000/0x3000)=nil, 0x3000, 0x0, 0x10, r0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
clock_nanosleep(0x13072868b96912a0, 0x0, &(0x7f0000000100), 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0xc6a8)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r2 = dup3(r0, r0, 0x0)
ioctl$FIOSETOWN(r2, 0x80284e67, &(0x7f0000000100))


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f0000000040)=0x1)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047401, &(0x7f0000000140)={0xa})


compat_20_statfs(&(0x7f0000001340)='./file0\x00', 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000000c0)={<r0=>0xffffffffffffffff})
recvmsg(r0, &(0x7f0000001700)={0x0, 0x0, 0x0}, 0x0)


compat_43_ommap(&(0x7f0000ffe000/0x1000)=nil, 0x1000, 0x0, 0x0, 0xffffffffffffff9c, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_43_ogethostid()


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x2, 0x10, r0, 0x0, 0x0)
r1 = socket(0x18, 0x1, 0x0)
ioctl$FIOGETBMAP(r1, 0xc0106926, &(0x7f0000000040))


__clock_settime50(0x0, 0x0)


syz_usb_connect$hid(0x801, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x458, 0x153, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, &(0x7f00000003c0)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__select50(0xff01, &(0x7f0000000bc0), &(0x7f0000000c00), &(0x7f0000000c40), 0x0)


compat_50_mknod(&(0x7f00000002c0)='./file0\x00', 0x2000, 0x4135)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={<r1=>0xffffffffffffffff})
dup3(r1, r0, 0x0)


_lwp_create(&(0x7f0000000000)={0x0, &(0x7f0000000340)={0x0, &(0x7f0000000840)={0x0, 0x0, {}, {}, {0x0, 0x0, ' \x00\x00\x00\x00\x00\x00\x00z\xd7\xe2\xde\xbcK\xfeH\x11\xcc\x87\xa6\x02\xc9=\xfa'}}, {}, {}, {0x0, 0x0, '\\-,\xff+5'}}, {}, {}, {0x0, 0x0, '}\x00'}}, 0x0, 0x0)
_lwp_unpark_all(&(0x7f0000000800), 0x4000000000000245, 0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
ioctl$FIOGETBMAP(r0, 0xc008667a, &(0x7f0000000000))


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000001c0)='./bus\x00', 0x10004, 0x0)
ioctl$FIOSETOWN(r0, 0x80017472, &(0x7f0000000200)=0xfcffffff)


lchown(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_90_statvfs1(&(0x7f0000000440)='./file1\x00', &(0x7f0000000480), 0x1)
munlock(&(0x7f0000ffb000/0x3000)=nil, 0x3000)
symlink(&(0x7f0000000d80)='./file0/file0\x00', &(0x7f0000000dc0)='./file0\x00')
shmget$private(0x0, 0x1000, 0x9, &(0x7f0000ffd000/0x1000)=nil)
compat_50___shmctl13$IPC_STAT(0x0, 0x2, 0x0)
compat_50_quotactl(&(0x7f0000001180)='./file0\x00', 0x1000, 0x0, &(0x7f00000011c0)="2800299c3c959e25124d4d7c4016b5f21323de06bcd494584b434e6800933aee6b4dd8c8d9d7159e37731cfdf3af828b5803d286cadd8e0d4e1cf688d9854f55ecb6786c359e80d05b98b489859339c325a04dbf7633c7ce345ac7c7bc357ccaec30480c64a04f27d669861cb0c618e8b2a1331184102ee20596af96966491e19c6ece95ffa6d524d141873c511ec23adb9a3c5ed962")


r0 = shmget$private(0x0, 0x1000, 0x39, &(0x7f0000ffe000/0x1000)=nil)
setreuid(0x0, 0xee01)
shmctl$IPC_STAT(r0, 0x2, &(0x7f0000000000)=""/225)


r0 = socket$inet(0x2, 0x3, 0x0)
getsockopt$inet_opts(r0, 0x0, 0x5, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mknod(&(0x7f0000000040)='./file0/file0\x00', 0xe010, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x407a)
open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e57f7f000001"], 0x1)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x2, 0x1, 0x0)
dup2(0xffffffffffffffff, 0xffffffffffffffff)
writev(0xffffffffffffffff, &(0x7f0000000400)=[{&(0x7f0000000100)="237da1aaa868f7624c95521ef94d262ae2dd1ef3d534ecd0036366dbabc6f73fe1e738190efcc9f9a8c04158620eb02ca3d104bbeb2ce6ff41470f584b0994879c5a1698161dc4f0504416f30f", 0x4d}, {&(0x7f00000001c0)="1279d2b66391c265b8f37db1fad641a553dc6a61b95561a14288a5d640ac", 0x1e}, {&(0x7f00000002c0)="48945deaa1b07a32d58bcb96faa7f89d97a3", 0x12}, {&(0x7f0000000300)="c17442ed66da88ba6fdb3d98cc34f44e0db87fd7015428deafec78baab445c5d01616b53a4a64cd05239c243a3b27827116d3193381f0fdc15baca3eb19068028ee82b9b3f745ec8878b277c9c523bbbc1cbdc7678a687fbe227c6d2a27820d1cb5933ab818a83798c9fdf669c25ed1d83136e66828f1d67e03d5324d4293510d983abde98710efe1aab08f8e841051ae83f230f45b641bf3f1aae9367c858ba56d7feb7df0533d02c0d6dfb79e693d4bb3f5ed9e4e9ba3c23455feb0d44651cbbf68513b041540b8c9e7c898d465080af8ed656e73ac6ad67b9f455b8286d89bab80b0536b5ce94a264c0995b45bcf98bd1c7836550fd", 0xf7}], 0x4)
bind(r0, &(0x7f0000000000)=ANY=[@ANYBLOB='x\x00'], 0x10)
r1 = dup(r0)
r2 = socket(0x18, 0x2, 0x0)
setsockopt(r2, 0x29, 0x2b, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)
accept(r2, &(0x7f0000000080), &(0x7f00000000c0)=0xc)
listen(r1, 0x0)
r3 = socket(0x2, 0x1, 0x0)
connect$unix(r3, &(0x7f0000000000)=ANY=[], 0x10)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x3, 0x1010, 0xffffffffffffffff, 0x0, 0x0)
munlock(&(0x7f0000fff000/0x1000)=nil, 0x1000)
sendto$unix(r3, &(0x7f0000000180), 0x46, 0x5, 0x0, 0x0)
close(r3)
accept$unix(r0, &(0x7f0000000280), &(0x7f0000000040)=0x1002)
write(r3, &(0x7f00000012c0)="d542853c7d630a991ff095ac454db8e699fd30f7e6f5552158c05a8fb9ea7945d1451109fba4992329afb3f6744fdcf3a52e84bfefd31bc9f99824f208c348da3454b814b98549ec57518fe8fddc2e9129f8ba995046144544a75ada3c7c40c15d6c95163bf35a24a1b68a0d99acbe700126eb0569e07c070712918426a6265147ec14cdb2c5c04dd0e249ac5902038a2b0881006f662fdf1c23f299eb1ea3d8190f398a572e3e14dba99bef4bd6ceb8f46c8fd81137155defe1ce48c654a2aab027565b1eb568656b810428d24d81fc82943aa87641fbe57e8278f06a8d4739ca38571e08bec98bc6870d4f4bf68705db1ae0beef1b5c9c0a0810ec7800855aabfe3527c92174a55a9c2246e80f2175f46c9cc4578e29cee26f8be639057c25952638413441f352683ffe0d986b2cd90869066b9fd7912572b8b6fc3ff245aaa4b47bfb897cc7a6312da6ddee9fbafa9edabfc3cefe919800fea64211768c7263613febe434c5c1a8c5af18415776085a2573ceb30f1de5cd18b079a71a927d459f840f0fa50e1eb8b77d56bd29fa24a108a05c47403dedf802a2e501f1e14790dc7ee90a55cbdcbd26fecf35fab07b5ce7fe5432eb07cfd6a73bf0642c3d83c07f6b718de42a705cda72940f3772e753f741b47121ed17ba850b630a2a4052389d1e6efb03704f124e24ac863f8fda22837cd2a1ca357096f836ee20ad2c7017a0a746ce5879d8e60a6679b428b508e507145782230d00019dc8c9d566759aaefb76b854af35cc6efd37e884ac1f4f4c72b5964ad9aa1c0f4114a2838dc7d6e019d62a523d668f8eb012bc3b307eea057d6c1ceffb24ed6c4806e4ed0185903d24163c26d898b18895aaec90c95d2570935825e039551d8202546903bb9c484d6b2f6e724d324794810e25c9830e37acca5e74e32a6f08b7ddaa861a06042f795a1c5ae1902d953f88eaa0c12f820b404b75cf2f1e9b108417013a4c75bb0ee7f1fd3a478c10a88660b5d49c239c3ea9ab85d4510aec3283ee5e993b3689b9d367c5f479326ebef9043264650eb56c5bba9d8dd52ae28d7304e810ca2a17a8ea5b0ed400dfde111931e781c4ef6620dda778b97ee3b669bab5532dce56d365303c63320ceb89798106c69a165762f82fe8a3ecfa5094b34ee6eda176cce5e987a391bd54146470a41c915f980a92a3f1c1d8c646aed971989f9d8bbdc18b29e83ba5353d63a07f898f8eeff40f767afcd9e96dcb50a77a21147be4f20deff24ca9d58d67d38cbe5d800fecd42f95a4b254d7cf460472a4ff4b7d4e180e2eb3ec27c6e7f0aad1a0365dba9825d501a309381deb5973282d93e5cbfb174ea2e66c80375904f4e043d533304291c37d15335bbfb1496571ddc136da5cef2fa0e2dd2eba947a6028a4e900451cc3365a2ac274c40c34b62067c09cea335647564549daa3a7e3a1069fb472d41d0a7aa6738fe8b040bf08afb4b855214cad017226a04918069da85412442c496265403e265d71f0d553fa0244fe8d69d1c457a99b954d579fef6aa0a702739374e006a6086d047632878a84fa68da0bb20dc57baa439807a71fc086c07cfe1951b372ca7031fc9227594a64a21c5158645569a9247672923d1d011d167858fc0090115c3b65013f1f6612f09b007123927a9c0676be897f290cac9301551108b97f96f933305843b5e315973de83d42a799aa653ec7207544c4f225b1c33dd25bc574741379c5a5189359126246299c33fe2d0bcb8162c932fe7cac065fcf5ca3e0fd674cac3ba39dbfe968df8293865f22c765d484decfa59c624b5ff0aa3dab62d8539691beaad2c67eb18ab6b5ac2092c6888e648d23f8cdcfb135156e2abe6f4a3f1cd2261e6a1089dbd701a3c68599eefa128e22656a07c2b8fc4830f2cfdf7ac3055532db2dc9d2f814abeb845e9ef8af91647cd87612669b9e2c505bcfa173e65951bc51858ad5294175cf2be7a886d8faac942f9f12e2c4e5a9e2eb6ebbb861d80d195e8c8eb2b56cfd65fa73cc75fbe3dff7bf9434cb8294b891d0bb9d0d40f737974fc5d7e61e2ca1f29cc67a0ac0e16ed58e8118849421cc772c37b610256118b18e40956b6d46c53f22c468b567c0bc4f530dec20176fe35616716394d2bd1f58320f367121ac51dc1d74b20fb053747223944890899e3d3c2cab35236a6e0da2ca2919c58fdf45bba654fa05bebf16c918aec7a7c286bee7f83ee310dbbce9243f6d5d3ead726cefcb19ed0f2c1d4a8dbb98ca392ec4d846bf89bbf418bd87522e9a20272a79993263481635c00cdff8d9bfced071ac867f088ed9446b3f7914bd977fc21b54f34e9c09d1149a87a26ac2ef2edfe76e6ebf329128e6ac356c9d69839a1526c37313b4b0c56553f62665b94ef72e59b7df90d9da79737e69e75529461eb8c6eb6ec344dd8119b784903f3e6e855ea79b2aba444f39447af1de01fde7cc7c27b17324db81a060359bd4aaeeab42b282e59ec23fb2d5c4f0ccc1fc6eba1fdcbb17e006e8910e6a0a90469f813ebcca7376be5a959d0bab42e0acb82706a9dfce72db80e4c6569b62328e82e68e1b3e77531d17166cc86e193885bce1ac08d90a08b05b1af76befdb5d1fb2563de90a440d42c992ab16545cd858ee2b7b823218b0585ea6e15020c89d9c56f8cbfc6c871a24d6dec62169cc51cb8acba03d7f5b286133a09b5562d2d0cc9d0c8a6730f730c31d0c0c5311306cadc0d4d37809b6e9113445cc7f23f83b57d107714552116ef366e2932a896aa82801928220ef4304a263ef77d22804e2e5bfcc3cadba56f33cca92e65168720941e1c4c843af9f1d6a56a2ecd6113e27850da10fc99062d0945f93973a023942b533377d5ef71e7f4a98350c28d1a8cfc38190a285a27173a9b1c2869e4e62412796645925dbf614695b2010a7cb8cb1b63a39408b3e019be907b79bd67bfc7ae31dc37bfa88853d923fdc7e212f2ed587d8cacdcc1f48ed5ec7cc7e8b3eacfbb551c28dae0bb796ea603d44bbc6a8d14b7b82730c0cd056ed850adb05bb8fd53f668011918c4bcdd5ca2ad8cce029e31be43892470006190f5a7183ac311ebbf40b0a6ac815a2641f8e36fa1b7f2e7bfa69599e0ca7ce8f2fd0be00c098e07f8d856c28986fcbaaa157aef9c8ea954aa102c06152929cc984abf2c8bc3e677ef7a3574bfec80f4a41d9d363faebef157403d0f3a82b1d0ddd9de7cad28346abc5ef903b24ef53d6e3481058fc8df960a86e822facb7aefe102473af7fa63b8f25edc7af5b4c8ddf37ed8f44a056d2f2806505eab7cd7621cfadab595422bc6bb3b32105bcb2ef75bfc58958fc0b17d125d2c8052e7c6d2300d2f1b24312fcf0d2f692e9aa883729f44758dbe895cff39b37907f58218175f180fd49f0fda6548972822b53da7eacd7d24b617668ee1e5825f59c4f08df68e5eb37f346e518aea0fc90406c5108b6752a51dc72888e3e6e4b49f3750d22e52db23a4c4f7858876926e4dbd1255826edd5b20e2188357cd686516995208af0817f1ecf63953c61d46da2fee1adbbfdfc2547472a8e0e341b042dbf7cf6f8d2674ee526752b589dd548d749fc5f142f42226967edcbbe9e7dcd60d68e365e473cc6db3be2673622613bf0a31eefbc5161ff9b46d78489214ff129756ab23fc9a9cad4e0ac135f13d4452e489957eae1787199816b934a427954c3eb961f8383d4535cc35a354ecc045c4f26fc4cfcb8bc35de1983ebe1bf716299188b1bfce1dbb6cef7294bd7f68a0090b65a4f36ebd01190a4147daf5d8fc3b20d6a557568b48e6e21dd73e2d13b1ba1ef24d44572a7c16a582274bba0876aca5d60b43e677e42a8dc1b1f84448f31d9f8fe2da40c456bf7f513f849a302eb99f0e8a27c46abf8b0b8cab15e74e25697e2f5e0ed5fbaab57ebaf8d7857450c00eaffe7c7cb576da8f378494a0340256ff6d41f5a18609e4901a6a3a752062fe7e3f1caee44ff002229ffcd4b4c24e9f5f9465ae1ff9febc0a42678b1d824229c4335bbb941fd54c758843f0280c4a73d273d79fc80a37d2ececb9c28d317a438d674b765c77e603c6c04b13e0f310990e7dbe9f566cbe5ac59515495f63a878bfcdeab4a0cdc30e55ebfbf7b07ac3912f56419fb33cf6afa6e4dfc3e585c8fea4883d07106987b6be5c7d0e86210b0c86fc059ca352d3ee66a2df44407a40421cf6c92f846449973ae6a1e658d971a5374e598de33afd71d9172ea131f7b20b0311a80a2968c42b4e8f3ba4cd2168517edcb664e5318f7592720b8cfb69ad7f2e7ebd549d34bf29f9dd0c83c1854d54d6c3b079e3cf5a94a1b7c0787b94e8919e504f60488cb9aeb9879fe2a1079ea0ab80849f1db19b20f44b2344a22d275528a1ad04a627ebebb233963d98645c7217ef1f1b8b566449cf2e793731e85f166b91d354c64cf88a3a3f92439fe1cefee353589e6a7e9e8c72938c24fbbdc92e01e1e3a31322da7208ec08776f7322b2aaaeb9bfcac0c7877bde249e0caf9d6ae7ca7a4e3a5fda34ba682ad10129e8bd7849f6eb1e97732fcd678111361b7d39c5e8f43dd9f88890241a07bc04964f42b1e1c684bb57b22b2f79a4cded6f03132df90e179b9e97dce47dd49e09d1f0cd60b80516b73a81de7c865a217db95c993814240cf03e18618369b028a0bd13cc717bc3147f194b64ac2128c69c3821f33ad217a6f1c7f9e85653b004b17eb27821812a80349f3c390fe90c3a12dfc13dbf35a84909dd3ca6ad9f510a92d786a58486b0830050ed6a96ab0b1bb1e9246b12065b9bc10230f38153c3196b344f4e73ad8784ead0318894946a077402d04d2dfff23cde6a70c12c6f7840f73a5d28c2778ff39d01ded4b4606725f9b1acf6e6065a4dcf900d4d0fcfce5c9a47136feebf6a42486222c711743aa8128b7d7624c3b04353049b49f0188b721446b47bb774885aaa98434e353953672fbd0eff009c9667527e6618072b615548b10022697f88b144824e4ec1b431ba23d2c0a545adbf15704ee1302ae994d05278c5e3853db0d76fc41c8f1462ae00a2e0ff30b33aab5778b8a0864b5e1781a00ded6941845ffad1ef39c30f74f4e20c6389b599c99b43f908ca8ab83b9dd96b7b21400a65aa9d8a9942df18b92c5c643056b52431db4db7cf7ad747a33ecca0ff4e0b2ed2803f6b2ba6e2ab41cc4ebe53d5dd9eb31dd001a48eeb1034770b1f02b38bcdb29f1ffe698b651c5fb42a6e2e9f85da6f8c261b88fd81184aadff8a5633c4d60511e5a433e9dc6b78f6f034f96d8f01c214309f30fc5cf6b3b7ca89d17a50c98d522ac1f1910ded7c4d873fed74c65d9b27ab7d750a5c3a3da0c2da426fb9adba5c61281a05d1e63bb97e04d62be7fce3cdb3012ec6f8b6ec8542e41c9048ef4c88ddbcf36ebea7bc0acafbcfb29bdc897b8acff86430a91c1eb2a6e233b11279cf56bf3ae3d00fe02bf4f0a3210b624f9bfb9969cc1e30e4130eb3fd30fa6d9ee0428e7ad73711026d0b82c80459643e729acb92fad638d8d1dd19880cc830e7465a57032416f87069db04d2e278ed467982075513fd465b467dbd60cc076eb943526f4ddd8ed41c6e99eae03062b1760b98ed3b1039047d5a56f1f4c4140c8ae28c7708e3843c23665fe8beb155cb2190e2f7d5276b202e9262347437d12c99470f00c0c990f160cb2bdd6a1ea840cbca208532c2cd7d55bd34c18d06373ded9eb4bd4c59cecc9163aa84873955bff8081c0135671591a48853b528460aa75726162533e5e87ce19497ce5", 0x1000)
poll(&(0x7f0000000200)=[{r1, 0x8}, {r3, 0x100}, {0xffffffffffffffff, 0x1}, {r3, 0x80}, {0xffffffffffffffff, 0x2}, {0xffffffffffffffff, 0x100}, {r3, 0x4}, {r1, 0x3}, {r1, 0x4}], 0x9, 0x6b46)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0x4020526a, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc03c526c, &(0x7f0000000000))


mknod(&(0x7f0000000080)='./file0\x00', 0x205e, 0x200)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
preadv(r0, &(0x7f0000001300)=[{&(0x7f0000000140)=""/4096, 0x1000}, {&(0x7f0000001680)=""/4096, 0x1000}], 0x2, 0x0)


ioctl$WSMUXIO_INJECTEVENT(0xffffffffffffff9c, 0x80185760, &(0x7f0000000000)={0x0, 0x0, {0x5}})
connect$unix(0xffffffffffffffff, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x1, 0x0)
close(r0)
r1 = socket(0x18, 0x1, 0x0)
close(r1)
socket(0x400000000018, 0x3, 0x0)
setsockopt(r1, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)


execve(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
fstatat(0xffffffffffffff9c, &(0x7f0000000400)='./file0\x00', 0x0, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f00000006c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
openat$tprof(0xffffffffffffff9c, &(0x7f0000001400), 0x0, 0x0)
socket$inet6(0x18, 0x0, 0x48)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x0, 0x1, "", {{{0x9, 0x5, 0x1, 0x2, 0x0, 0x0, 0x0, 0x1}}}}}]}}]}}, 0x0)


munlock(&(0x7f0000ffc000/0x3000)=nil, 0x3000)
mlock(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
munmap(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
madvise(&(0x7f0000ffb000/0x1000)=nil, 0x1000, 0x0)
mlock(&(0x7f0000ffb000/0x1000)=nil, 0x1000)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e57f7f000001"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
setsockopt$sock_int(r0, 0xffff, 0x1, &(0x7f0000000080)=0x47, 0x4)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
sendto$inet(r2, &(0x7f00000000c0)='_', 0x1, 0x401, 0x0, 0x0)
close(r2)
accept$unix(r1, 0x0, 0x0)
recvfrom$unix(r2, 0x0, 0x0, 0x23, 0x0, 0x0)


r0 = socket$unix(0x1, 0x2, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_orecvfrom(r0, 0x0, 0x0, 0x0, &(0x7f0000000180), 0x620000)


mlock(&(0x7f0000ffc000/0x1000)=nil, 0x7ffffffffff)


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000001c0)='./bus\x00', 0x10004, 0x0)
open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x80017472, &(0x7f0000000200)=0x1c)


r0 = msgget$private(0x0, 0x0)
msgsnd(r0, &(0x7f00000008c0)=ANY=[@ANYBLOB="210000a75a17db"], 0xee, 0x0)
msgsnd(r0, &(0x7f0000000100)=ANY=[@ANYBLOB="0300000000000000f1164cfc18b47e504645b8efb2a1eb2b0f850405a4fcd610adde9bc2145adbca087341ee4f84755411bc07e32c618afd4091239d6e6427151fb24175dba80f7ff1558f57fcf20ce4ccd8e0aaec6088a264842cf2f5ff9b2c65e92d1de90749e727ff43306df08c7d5510697ed8809780fb9299bd3a05d7de1812636e8c427418463f828e05e5ba01e9555634961cc9c25b4fc85612566fe44a529e0039abb1c300000000000000000000009b63f603dc6f858a2212e46e4624721fc9717391cab4b29db8101fe557ea89cffc86b5a9335c2c5955e082161985004f4ddfece72c89555a800fa690cf838d2b10b00b1b484f5ce9dcf61cef3231eec08c70c674ea5a5c7679eba644e0599d25df09f069d02484d8512e50bf8572db39d1bac06a75d66ceeeda4afa5e3f56f0f353782a78f3d8943afdb3e8cc5b0e25c45dab74a81"], 0xb3, 0x800)
msgrcv(r0, &(0x7f0000000040)={0x0, ""/147}, 0x9b, 0x3, 0x1800)


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
open$dir(&(0x7f0000000000)='./file0/file0\x00', 0x200, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104304, &(0x7f0000000100)=0x62000f)


symlink(&(0x7f0000000140)='..\x00', &(0x7f00000001c0)='./file0\x00')
chroot(&(0x7f0000000000)='./file0\x00')
__mount50(&(0x7f0000000080)='fdesc\x00', &(0x7f0000000340)='./file0\x00', 0x0, 0x0, 0x0)
execve(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="70023d020b"], 0x1)
r0 = socket(0x2, 0x2, 0x0)
r1 = socket$inet(0x2, 0x4000000000000001, 0x0)
r2 = dup2(r1, r0)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
recvfrom$unix(r2, &(0x7f0000000040)=""/60, 0x3c, 0x0, 0x0, 0x0)
shutdown(r0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000001cc0)={0x0, 0x0, &(0x7f0000001a00)=[{0x0}, {&(0x7f0000001400)="94", 0x1}], 0x2}, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0xe, r0, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f00000011c0)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
open$dir(&(0x7f0000001140)='./file0/file0\x00', 0x200, 0x0)
unmount(&(0x7f0000000080)='./file0/file0\x00', 0x0)


mknodat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x8000, 0x0)
execve(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x0, 0x9, &(0x7f0000000000)="03000000", 0x4)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000005c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
setsockopt$sock_timeval(r0, 0xffff, 0x100c, &(0x7f0000000000)={0xfffffffffffffffe}, 0x10)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_50___stat30(&(0x7f0000000000)='./file0\x00', &(0x7f0000000140))


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x0)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047465, &(0x7f0000000080))


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x29, 0x23, 0x0, 0x0)


clock_nanosleep(0x0, 0x1, &(0x7f0000000000)={0x8000000000000000}, 0x0)


modctl$MODCTL_UNLOAD(0x2, &(0x7f0000000180)="237ecf69b17f")


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
fcntl$setown(r0, 0xa, 0xffffffffffffffff)


r0 = socket$inet6(0x18, 0x2, 0x0)
shutdown(r0, 0x2)
setsockopt$sock_int(r0, 0xffff, 0x1001, &(0x7f0000000380), 0x4)


pipe(&(0x7f0000000000)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_60__lwp_park(0xffffffffffffffff, 0x0, 0x0, 0x0)


r0 = socket(0x18, 0x1, 0x0)
mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
open(&(0x7f00000000c0)='./bus\x00', 0x10004, 0x0)
fcntl$dupfd(r0, 0xa, 0xffffffffffffffff)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
unlinkat(r0, &(0x7f0000000140)='./file0\x00', 0x0)


shmget$private(0x0, 0x800000, 0x0, &(0x7f0000800000/0x800000)=nil)


open(&(0x7f0000000040)='./file1\x00', 0x480c0, 0x0)
open(&(0x7f0000000000)='./file1\x00', 0x103c03, 0x20)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__clone(0xd00, 0x0)


r0 = msgget$private(0x0, 0x0)
msgrcv(r0, &(0x7f0000000100)={0x0, ""/4085}, 0xffd, 0x0, 0x0)
msgsnd(r0, &(0x7f0000001240)=ANY=[@ANYBLOB='\x00\x00\x00\b'], 0xdc, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x20007202, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0xc0785101, &(0x7f0000000040))


msgget(0x2, 0x6ca)
msgrcv(0xffffffffffffffff, &(0x7f0000000000)={0x0, ""/248}, 0x100, 0x3, 0x1800)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = msgget$private(0x0, 0x0)
msgctl$IPC_STAT(r1, 0x2, &(0x7f0000000680)=""/9)


minherit(&(0x7f0000ffc000/0x4000)=nil, 0x4002, 0x0)
munlock(&(0x7f0000ffc000/0x1000)=nil, 0x1000)


r0 = socket(0x18, 0x2, 0x0)
getsockopt$inet_opts(r0, 0x29, 0x4, 0x0, 0x0)


pipe(&(0x7f0000000100)={<r0=>0xffffffffffffffff})
ioctl$VT_RELDISP(r0, 0x5605)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fchownat(r2, &(0x7f0000000340)='./file0\x00', 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x400051c3)
open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
open$dir(&(0x7f00000005c0)='./file0\x00', 0x0, 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000001340)='fdesc\x00', &(0x7f0000001380)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_43_ogetdirentries(r0, 0x0, 0x0, &(0x7f0000000100))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
faccessat(0xffffffffffffffff, &(0x7f0000000040)='./file0\x00', 0x0, 0x0)


pipe2(&(0x7f0000001780)={<r0=>0xffffffffffffffff}, 0x0)
poll(&(0x7f0000001980)=[{r0}], 0x1, 0x1)


r0 = compat_30_socket(0x12, 0x2, 0x0)
__fstat50(r0, &(0x7f00000001c0))


getpid()
syz_usb_connect$printer(0x5, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x310, 0x0, 0x0, 0x0, 0x40, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0xc0, 0x6, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x7, 0x1, 0x0, 0x9, "", {{{0x9, 0x5, 0x1, 0x2, 0x20}}}}}]}}]}}, &(0x7f00000003c0)={0xa, &(0x7f0000000040)={0xa, 0x6, 0x100, 0x4}, 0x1c, &(0x7f0000000180)={0x5, 0xf, 0x1c, 0x2, [@ptm_cap={0x3}, @ss_container_id={0x14, 0x10, 0x4, 0x0, "aa628a2246ccb9b750ba2cf74cf51c2d"}]}, 0x2, [{0x4, &(0x7f00000001c0)=@lang_id={0x4, 0x3, 0x100a}}, {0x0, 0x0}]})


fork()
__wait450(0x0, 0x0, 0x1, 0x0)


munmap(&(0x7f0000001000/0x2000)=nil, 0x2000)
r0 = shmget(0xffffffffffffffff, 0x2000, 0x0, &(0x7f0000000000/0x2000)=nil)
shmat(r0, &(0x7f0000001000/0x2000)=nil, 0x0)
r1 = open(&(0x7f0000000080)='./file0\x00', 0x615, 0x0)
pwritev(r1, &(0x7f0000000040)=[{&(0x7f0000000000)="ecdc802be22f16c29b9b2fd5c7"}], 0x100000000000034c, 0xfffffffffffffffc)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket$inet(0x2, 0x2, 0x0)
compat_43_ogetsockname(r1, &(0x7f0000000300)=""/3, &(0x7f0000000340)=0x3)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000400)='sysvbfs\x00', &(0x7f0000000440)='./file0\x00', 0x0, 0x0, 0x0)


swapctl$SWAP_DUMPDEV(0x8, &(0x7f00000029c0)='`', 0x0)


r0 = socket$inet6(0x18, 0x1, 0x0)
setsockopt(r0, 0x6, 0x1, 0x0, 0x0)


r0 = compat_30_socket(0x1f, 0x5, 0x0)
setsockopt$inet6_MRT6_DEL_MFC(r0, 0x4, 0x69, 0x0, 0x0)


mprotect(&(0x7f0000ffb000/0x1000)=nil, 0x1000, 0x0)
mprotect(&(0x7f0000ffb000/0x4000)=nil, 0x4000, 0x0)


compat_43_osetrlimit(0x9, &(0x7f0000000040))
socket$inet(0x2, 0x2, 0x0)


clock_nanosleep(0x13072868b96912a0, 0x0, &(0x7f0000000100), 0x0)


symlinkat(&(0x7f0000000040)='./file0/../file0/file0\x00', 0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00')
linkat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0xffffffffffffff9c, &(0x7f00000000c0)='./file0/../file0\x00', 0x0)


mkdir(&(0x7f0000000600)='./file0\x00', 0x0)
__mount50(&(0x7f0000000180)='mfs\x00', &(0x7f00000001c0)='./file0\x00', 0x0, 0x0, 0x0)


pipe2(&(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
close(r1)
r2 = socket(0x18, 0x2, 0x0)
setsockopt(r2, 0x29, 0x23, &(0x7f0000000080)='\x00\x00', 0x8)
dup2(r0, r1)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000140)='./bus\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0xc1205266, &(0x7f0000000100))


mprotect(&(0x7f0000001000/0x800000)=nil, 0x800000, 0x1)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000a40)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000001680)={0x0, 0x0, 0x0}, 0x0)


__clock_gettime50(0x1, 0x0)


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
compat_30_getdents(r0, &(0x7f0000000240)=""/4096, 0xffff)
r1 = dup2(r0, r0)
__getdents30(r1, 0x0, 0x401)


pipe(&(0x7f00000014c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = shmget$private(0x0, 0x2000, 0x0, &(0x7f0000ff1000/0x2000)=nil)
compat_50___shmctl13$IPC_STAT(r2, 0x2, &(0x7f0000000100)={{}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


compat_43_osetrlimit(0x9, &(0x7f0000000000))
socket$inet(0x2, 0x1, 0x0)


madvise(&(0x7f0000ffd000/0x1000)=nil, 0x1000, 0x0)
mlock(&(0x7f0000ffc000/0x4000)=nil, 0x4000)
munlock(&(0x7f0000ffd000/0x1000)=nil, 0x1000)


r0 = socket$inet(0x2, 0x3, 0x0)
getsockopt$inet_opts(r0, 0x0, 0x1a, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$inet(0x2, 0x3, 0x0)
compat_43_orecv(r1, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
compat_50_setitimer(0x0, &(0x7f0000000440), 0x0)


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x40, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0xf0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5, 0x24, 0x0, 0x4}, {0xd}, {0x6}}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{{0x9, 0x5, 0x82, 0x2, 0x0, 0x1}}, {{0x9, 0x5, 0x3, 0x2, 0x0, 0x40}}}}}}}]}}, &(0x7f0000000180)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x4, &(0x7f0000000140)=@lang_id={0x4, 0x3, 0x421}}]})
__getrusage50(0x0, &(0x7f00000001c0))
syz_usb_connect$hid(0x3, 0x36, &(0x7f0000000280)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x40, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x8, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x0, 0x81, 0xd3}}}}}]}}]}}, 0x0)
pipe(&(0x7f0000000f40))
openat$i2c(0xffffffffffffff9c, 0x0, 0x2, 0x0)
accept$inet6(0xffffffffffffffff, 0x0, &(0x7f0000001340))
compat_14___semctl$GETNCNT(0x0, 0x0, 0x3)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x400)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pread(r0, &(0x7f0000000140)="99", 0x1, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e57f7f000001"], 0x1)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x2, 0x1, 0x0)
dup2(0xffffffffffffffff, 0xffffffffffffffff)
writev(0xffffffffffffffff, &(0x7f0000000400)=[{&(0x7f0000000100)="237da1aaa868f7624c95521ef94d262ae2dd1ef3d534ecd0036366dbabc6f73fe1e738190efcc9f9a8c04158620eb02ca3d104bbeb2ce6ff41470f584b0994879c5a1698161dc4f0504416f30f", 0x4d}, {&(0x7f00000001c0)="1279d2b66391c265b8f37db1fad641a553dc6a61b95561a14288a5d640ac", 0x1e}, {&(0x7f00000002c0)="48945deaa1b07a32d58bcb96faa7f89d97a3", 0x12}, {&(0x7f0000000300)="c17442ed66da88ba6fdb3d98cc34f44e0db87fd7015428deafec78baab445c5d01616b53a4a64cd05239c243a3b27827116d3193381f0fdc15baca3eb19068028ee82b9b3f745ec8878b277c9c523bbbc1cbdc7678a687fbe227c6d2a27820d1cb5933ab818a83798c9fdf669c25ed1d83136e66828f1d67e03d5324d4293510d983abde98710efe1aab08f8e841051ae83f230f45b641bf3f1aae9367c858ba56d7feb7df0533d02c0d6dfb79e693d4bb3f5ed9e4e9ba3c23455feb0d44651cbbf68513b041540b8c9e7c898d465080af8ed656e73ac6ad67b9f455b8286d89bab80b0536b5ce94a264c0995b45bcf98bd1c7836550fd", 0xf7}], 0x4)
bind(r0, &(0x7f0000000000), 0x10)
r1 = dup(r0)
r2 = socket(0x18, 0x2, 0x0)
setsockopt(r2, 0x29, 0x2b, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)
accept(r2, 0x0, 0x0)
listen(r1, 0x0)
r3 = socket(0x2, 0x1, 0x0)
connect$unix(r3, &(0x7f0000000000)=ANY=[], 0x10)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x3, 0x1010, 0xffffffffffffffff, 0x0, 0x0)
munlock(&(0x7f0000fff000/0x1000)=nil, 0x1000)
sendto$unix(r3, &(0x7f0000000180)="85", 0x1, 0x5, 0x0, 0x0)
close(r3)
accept$unix(r0, &(0x7f0000000440)=ANY=[@ANYBLOB="0061d3c7e4dfb6b7391f6cb03a0663da854cf53c7493dbd0339f2e9b18cd750c29c7041ee2003bda13aa0000d23b72c19087b4367f2f4312b36416407d91f7c9c6f22f4efdf12261ba50cda1866a9e8f80989422ea227b4ab1ea2b212ec6feb89bd0fa6a8c524dc948e50042989c71787eb96007ced5b359208aed1a4f3792b48a46172a712173707f81a4e6d86298b5d5c4810e2bcba841b63cc0002084146163609629871df834485917a4d57ab074c7"], &(0x7f0000000040)=0x1002)
poll(&(0x7f0000000200)=[{r1, 0x8}, {r3, 0x100}, {0xffffffffffffffff, 0x1}, {r3, 0x80}, {0xffffffffffffffff, 0x2}, {0xffffffffffffffff, 0x100}, {r3, 0x4}, {r1, 0x3}, {r1, 0x4}], 0x9, 0x6b46)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0xf009)
open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)


r0 = compat_30_socket(0x2, 0x3, 0x0)
setsockopt$sock_linger(r0, 0xffff, 0x80, 0x0, 0x0)


symlink(&(0x7f0000000080)='./file1\x00', &(0x7f0000000000)='./file0\x00')
openat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x200, 0x0)
chflags(&(0x7f0000000400)='./file1\x00', 0x68100)
rename(&(0x7f00000000c0)='./file1\x00', &(0x7f0000000100)='./file0\x00')


syz_usb_connect(0x0, 0x24, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x5b}}]}}]}}, 0x0)
pipe(0x0)


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f0000000040)="5c518f04", 0x4)


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x8, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}, {{0x9, 0x5, 0x81, 0x3, 0x0, 0x3}}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{}, {{0x9, 0x5, 0x3, 0x2, 0x400, 0x0, 0x4c}}}}}}}]}}, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbd63)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
readv(r0, &(0x7f0000000100)=[{0x0}], 0x1)


mmap(&(0x7f0000000000/0xb36000)=nil, 0xb36000, 0x300000a, 0x800000000049031, 0xffffffffffffffff, 0x0, 0x0)
madvise(&(0x7f0000000000/0x600000)=nil, 0x600003, 0x17)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000003c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
connect$unix(r0, &(0x7f0000000380)=@file={0x0, './file0\x00'}, 0xa)


r0 = socket$inet(0x2, 0x2, 0x0)
sendto(r0, 0x0, 0x0, 0xb, &(0x7f0000000000), 0xc)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendto(r1, 0x0, 0x0, 0x408, 0x0, 0x0)
recvmsg(r0, &(0x7f0000000340)={&(0x7f0000000100), 0x8, &(0x7f0000000240)=[{&(0x7f0000000140)=""/254, 0xfe}], 0x1, &(0x7f00000003c0)=""/139, 0x61}, 0x2)


socketpair(0x11, 0x0, 0x0, &(0x7f0000001100))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r1, &(0x7f0000000400)={0x0, 0x0, 0x0, 0x0, &(0x7f00000003c0)=ANY=[@ANYBLOB="28000000ffff000001"], 0x28}, 0x0)
recvmsg(r0, &(0x7f0000000300)={0x0, 0xffffffffffffffc4, 0x0, 0x0, &(0x7f0000001440)=""/217, 0xd9}, 0x40042)


socket$unix(0x22, 0x3, 0x21)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x8010426d, &(0x7f0000000080))


mprotect(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0x0)
mprotect(&(0x7f0000ffa000/0x4000)=nil, 0x4000, 0x2)
munmap(&(0x7f0000ffc000/0x4000)=nil, 0x4000)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = shmget$private(0x0, 0x4000, 0x0, &(0x7f0000ffc000/0x4000)=nil)
shmctl$SHM_LOCK(r1, 0x3)
shmctl$SHM_UNLOCK(r1, 0x4)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x70e, 0x0)
pwritev(r0, &(0x7f00000003c0)=[{&(0x7f0000000740)="90c3fe67eb586898600425f2f573e0d1ac83c18d65c8e22066c0d389fe894a974c8d45aaf9846f9b3aec3213d2a6ac68a0b0632688ca0fab3647175abf22fea120c9b3bb77ca60c128295bf234505356095dbf9e50a4a5079723b57fed8ef0a251b91e67e1f5d347d5b668a390a25beea3962e7c10b8d9f53f5c82b5eacc26757d14f2fa6be9a2cbb2cfacc5e906dfd1e3208364bb049bd84682cec454327b6a1522c332ea628b0cb672e9e7247818f900e017c7cb9303e6b505059f34d3fb9df3993b7535fa269859e24b2802782224d7d5c13c21d4eee4f8621037c3d78695ad9a278978b26c46049befba997acb9ac407791cdf6046f9f71e36d09827a4493c17a0921dc38af76420c885862413c6ed4f7fe335a5547ee2d7c65d735b189214606da83f9be40faef7438cbfe1ed0439c45506672cda99d1c3471259d08198e13683ef6b08d5c54bfb991dcca6919362e1a0b65844e9194c2d7fd257281fbcae0694eb4c1e7121b6a2c19d7c82054126e2146349c1c8489aada96f3a8400c78d1da37d5228e5aa36b139a8d5957e8209712744b81352d093315d238f5a0c3cb694e5bd546af01421ace28b2e266c33488bccf4815baf3226156e050704a0b7fe058bf69a49e52ac968a0", 0x1cb}], 0x1, 0x0)
writev(r0, &(0x7f0000000000)=[{&(0x7f00000004c0)="0ec465cdab1ab6925cb81235dbb17399c070dde203e502106f690d9947364fe3569560e73bfa9012263c0ef6eb626ad79d51e7b4607879072ca33809a85443bef8e011b3e2e63de6f9637ca6e422106a6a1762b67f560814eef6dcb3f39a2e51600251fbad1ee82088a75ca3764729af3f3d4d967ce8aadb3c3fd7f9ae4f4c83b2", 0x35a}], 0x1)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x5, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f00000000c0)='./file0/file0\x00', 0x0)
mkdir(&(0x7f0000000180)='./file0/file0/../file0\x00', 0x0)
mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
rmdir(&(0x7f0000000080)='./file1\x00')
symlink(&(0x7f0000000100)='./file0/file0\x00', &(0x7f0000000140)='./file0\x00')


mknod(&(0x7f00000001c0)='./bus\x00', 0x80002005, 0x5300)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x80145003, &(0x7f0000000000))


r0 = _lwp_self()
_lwp_getname(r0, &(0x7f00000001c0)=""/3, 0x3)


swapctl$SWAP_DUMPDEV(0x7, &(0x7f0000000000)='/', 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x2903)
open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
poll(&(0x7f0000000140)=[{}], 0x4e8, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__clock_gettime50(0x60000001, 0x0)


socket(0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000001240)='./file0\x00', 0x4000040000000242, 0x0)
lseek(r0, 0x0, 0x40fff, 0x0)
r1 = dup2(r0, r0)
writev(r1, &(0x7f00000004c0)=[{&(0x7f0000000180)='\t', 0x1}], 0x1)
pread(r1, &(0x7f00000000c0)="bd", 0xffffff78, 0xa83)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x400051c3)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
fcntl$setstatus(r0, 0x4, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket(0x400000000018, 0x3, 0x0)
ioctl$FIOGETBMAP(r1, 0xc0106924, &(0x7f0000000040))


r0 = socket(0x2, 0x2, 0x0)
connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="8b0229"], 0x1)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r1 = socket(0x2, 0x2, 0x0)
bind(r1, &(0x7f0000000000), 0x10)
r2 = socket(0x2, 0x2, 0x0)
dup2(r1, r2)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
bind(r0, &(0x7f0000000000), 0x10)


modctl$MODCTL_LOAD(0x0, &(0x7f00000001c0)={&(0x7f0000000100), 0x0, 0x0})


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
dup2(r1, r0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='efs\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f0000000180)="9f85354b2568c0e2ede1ad88ead99c8b", 0x10)


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x29, 0x9, 0x0, 0x0)


mlock(&(0x7f0000ffb000/0x1000)=nil, 0x1000)
mlock(&(0x7f0000ffc000/0x2000)=nil, 0x2000)


compat_50_select(0x40, &(0x7f0000000100), 0x0, &(0x7f0000000180)={0xff}, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
sendmsg(r0, &(0x7f0000000080)={&(0x7f0000000000), 0x8, 0x0, 0x0, &(0x7f0000001100)=[{0x10}], 0x10}, 0x1)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xa31a)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0, 0x24}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x1010, 0xffffffffffffffff, 0x0, 0x0)
r1 = open$dir(&(0x7f0000001240)='./file0\x00', 0x4000040000000242, 0x0)
writev(r1, &(0x7f0000000400)=[{&(0x7f0000001680)="e770605bc00c8994d23589cf47d4970070159a1e0de933c7ac6c97d415e2b3ff269109dceb9680d43891749e4fade7ca5bd84a25b6b3316331e9f968ff17935b10fe95e0bf1d7c61d903a9ee99e49b934f702940c1eaea30a7b705b45e8a35fb902446efc539689f7597ccba5167e6a75f8b4426cd91ea8c29e0ca62a752f5fc1a124ba20745a8e9755bee7d26bf6435aa577f2de281e060aa5125fb1e384a99c6d4ae0e06ec6b8fa53c6328331725d5ebe0e1bcadc7ca07dcffaba604cb768d67d65af42074259cbe92f0c797aacc24649405faf3a1b1c3ec79142ec5c6ebf6c7bd4accb81bb9234d54d8d678dda87f01bedd1a2ebf46532f6b70fb659b034f45fbcae4821571b8d65f0e1e38877198edcbb76abd4429b6c888e236969749aaafca5e2f4584b9232c3f144997e31a7318bd672d953be9205a1cea89ca9500814f64d98b55e9f270fd64234509e5539a8e30500f98105ac2912f997e22e558b6f6921577e83bc89ea87326601aecdb5e56211611ec23122f48ab8d1668e56acf79fb73c9833002cbc7b8a10cbc10f8eb0b26a2183687624331bd937a8fac23749df75d346e0d9807b02ba2dab2433730761093197113539112fdbc9b03b9554832a8f0cac89981cdd39cc3d379383cfdf76d15ab6232feac0914136106bb5f437e860487ff9d47f3dd0a694cf89e3d20b42f657484473dccb384e9aacc659c1f", 0x200}, {&(0x7f0000000040)="e0", 0x1}], 0x2)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x1, 0x0)
ioctl$WSMOUSEIO_SETREPEAT(r0, 0x8010572b, &(0x7f0000000000)={0x40000000000008, 0x1000395})


pipe2(&(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
fktrace(r1, 0x4, 0x4, 0xffffffffffffffff)
read(r0, 0x0, 0x0)


mlock(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
munlock(&(0x7f0000ffe000/0x2000)=nil, 0x2000)
munmap(&(0x7f0000ff9000/0x4000)=nil, 0x4000)
madvise(&(0x7f0000ffc000/0x4000)=nil, 0x4000, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
recvmsg(0xffffffffffffffff, &(0x7f00000002c0)={&(0x7f0000000080)=@data, 0xe, 0x0, 0x0, 0x0}, 0x0)
sendmmsg(r1, &(0x7f00000001c0)={0x0}, 0x10, 0x0, 0x0)
bind(r0, &(0x7f0000000040)=@family=0x18, 0xe)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
unmount(&(0x7f0000000040)='./file0\x00', 0x80000)


mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x2903)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$FIOSEEKHOLE(r0, 0xc0204600, &(0x7f0000000040))


swapctl$SWAP_CTL(0x8, &(0x7f0000000080), 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbd63)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x0, 0x10, 0xffffffffffffffff, 0x0, 0x0)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
poll(&(0x7f00000000c0)=[{r0}], 0x1, 0x0)


r0 = semget$private(0x0, 0x1, 0x408)
compat_50_____semctl13$IPC_STAT(r0, 0x0, 0x2, &(0x7f0000000000))
socketpair(0x2, 0x3, 0x7, &(0x7f0000000040)={<r1=>0xffffffffffffffff})
setsockopt$inet6_MRT6_ADD_MIF(r1, 0x29, 0x66, &(0x7f0000000080)={0x100, 0x0, 0x9, 0xfff7, 0x50f}, 0xc)
compat_50_____semctl13$SETVAL(r0, 0x0, 0x8, &(0x7f0000000100)=@array=&(0x7f00000000c0)=0xfffc)
semctl$IPC_SET(r0, 0x0, 0x1, &(0x7f0000000140)={{0x1, 0xffffffffffffffff, 0x0, 0x0, 0xffffffffffffffff, 0x40}, 0x6, 0x6, 0x800})
semget$private(0x0, 0x4, 0x0)
ioctl$WSKBDIO_GETDEFAULTKEYREPEAT(0xffffffffffffff9c, 0x400c570a, &(0x7f00000001c0))
__fhstat50(&(0x7f0000000200)="1dc27a1e72d7054f268cc0da53e3f5c22410b20ce2c7d591eeacdbcdaa4d368e43bb019cfe60cb69bc69e903d84f799c7c24e8eb2fe1e5b0e9b9535ea4c2908da839be6cf6d5c733f993c9593e13020aafdc8bba790219dcbe21a8f30419e2051732fbf7e851277d4af31fa37a407788eaf198fb0329c21faafec0d81bce9ef5f30890fc7b415f7a8a8c04e3a188c61dd8ca08b7ec80c47645c40404b3d94b470ea88b01461aaad2131ab1b92e5ca50635006f", 0xb3, &(0x7f00000002c0)={0x0, 0x0, 0x0, 0x0, <r2=>0x0, <r3=>0x0})
fchown(r1, 0xffffffffffffffff, r3)
compat_14___semctl$SETVAL(r0, 0x0, 0x8, &(0x7f0000000380)=@val=0x81)
getgid()
r4 = semget(0x2, 0x4, 0x100)
compat_14___semctl$GETPID(r4, 0x0, 0x4)
r5 = socket(0x2, 0x20000000, 0x3)
getsockopt$sock_int(r5, 0xffff, 0x200, &(0x7f00000003c0), &(0x7f0000000400)=0x4)
fchownat(0xffffffffffffff9c, &(0x7f0000000440)='./file0\x00', r2, 0x0, 0xa00)
____semctl50$GETALL(r0, 0x0, 0x6, &(0x7f0000000500)=@array=&(0x7f00000004c0)=0x3ff)


compat_50_setitimer(0x3, &(0x7f0000000000)={{}, {0x0, 0xf423f}}, 0x0)


r0 = socket(0x2, 0x1, 0x0)
r1 = dup(r0)
ioctl$WSMUXIO_LIST_DEVICES(r1, 0xc1045763, &(0x7f0000000040))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0xc6a8)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x70e, 0x0)
pwritev(r0, &(0x7f00000003c0)=[{0x0}], 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000001200)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
mknodat(0xffffffffffffffff, &(0x7f0000000140)='./file0\x00', 0x1000, 0x0)


openat$tprof(0xffffffffffffff9c, &(0x7f0000000000), 0x0, 0x0)
swapctl$SWAP_ON(0x1, &(0x7f0000000000), 0x0)


msgsnd(0x0, &(0x7f00000000c0)=ANY=[@ANYBLOB="0100000000000000bed026e4a61a5a31474bcb3caaae961351edc3de9f0f51f11ee4b78001b2755bbc86636ce1323c266151b7cedff6ab0d4575ceb3868bcb851e5bb5bd9870720be6fdb977fe6ab5d8a3574d7e584505de9e3b5292f2e5bb3588b2c1930f164befb0e8bfdba2df86bc557b60b6a18873a8ff91636435a38f1b5f51f8a565f3"], 0xa, 0x0)
mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0xc0104306, &(0x7f0000000140))


socket$inet6(0x18, 0x3, 0xff)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
madvise(&(0x7f0000637000/0x1000)=nil, 0x1000, 0x4)


compat_43_orecvmsg(0xffffffffffffffff, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
__clock_settime50(0x0, &(0x7f0000000000))


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r1, &(0x7f0000001700)={0x0, 0x0, 0x0}, 0x0)
pipe(&(0x7f0000000100)={0xffffffffffffffff, <r2=>0xffffffffffffffff})
r3 = dup2(r0, r2)
recvfrom$unix(r3, 0x0, 0x26, 0x42, 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x0)
fchmodat(0xffffffffffffff9c, &(0x7f0000000200)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
renameat(r0, &(0x7f0000000080)='./file0\x00', 0xffffffffffffffff, &(0x7f0000000140)='./file0\x00')


symlinkat(&(0x7f0000000040)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00')
readlinkat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$inet(0x2, 0x3, 0x0)
sendto$inet(r1, &(0x7f00000009c0)="06", 0x2000, 0x0, &(0x7f0000000a80)={0x2, 0x0}, 0x10)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x2, 0x2, 0x0)
getsockopt(r1, 0x0, 0x17, &(0x7f0000000040)=""/14, &(0x7f0000000080)=0xe)


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
fcntl$lock(r0, 0x7, &(0x7f0000000080)={0x0, 0x0, 0x0, 0x0, 0xffffffffffffffff})


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_50_lutimes(&(0x7f0000000240)='./file0\x00', 0x0)


mknod(&(0x7f00000006c0)='./file0\x00', 0x2000, 0x200028bf)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = fcntl$dupfd(r0, 0x0, r0)
ioctl$WSDISPLAYIO_LINEBYTES(r1, 0x4004575f, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
symlink(&(0x7f0000000080)='.\x00', &(0x7f0000000000)='./file0\x00')
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_40_mount(&(0x7f0000000000)='overlay\x00', &(0x7f0000000380)='./file0\x00', 0x3, &(0x7f00000003c0))
open(0x0, 0x0, 0x0)
compat_43_lstat43(&(0x7f0000000140)='./file0\x00', &(0x7f0000000180))


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000000c0)='L', 0x1)


mknod(&(0x7f00000000c0)='./file0\x00', 0x8000, 0xe02)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
readv(r0, &(0x7f0000001300)=[{&(0x7f0000000000)=""/77, 0x4d}, {&(0x7f0000000300)=""/4096, 0x1000}], 0x2)
close(r0)


r0 = open(&(0x7f00000000c0)='./file0\x00', 0x205, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
fcntl$lock(r0, 0x9, &(0x7f0000000000)={0x0, 0x0, 0x0, 0x1000300010005})


compat_43_osetrlimit(0x9, &(0x7f0000000000))
socketpair$unix(0x1, 0x1, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000580)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
writev(r0, 0x0, 0x51)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202"], 0x1)
r0 = socket(0x2, 0x3, 0x0)
connect$unix(r0, &(0x7f0000000000)=ANY=[], 0x10)
sendto$unix(r0, 0x0, 0x0, 0x0, &(0x7f0000001080)=@file={0x0, './file0\x00'}, 0xa)


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x10004, 0x0)
ioctl$FIOSETOWN(r0, 0x40047473, &(0x7f0000000140))


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0/file0\x00', 0x0)


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='fdesc\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
pathconf(&(0x7f0000000100)='./file0\x00', 0x3)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x3d, &(0x7f0000000000)="ca640000", 0x4)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x2000746f, 0x0)
ioctl$WSDISPLAYIO_SVIDEO(r0, 0x80045745, &(0x7f0000000080))


mlock(&(0x7f0000c00000/0x400000)=nil, 0x400000)
munlockall()
mlock(&(0x7f0000ffc000/0x2000)=nil, 0x2000)
madvise(&(0x7f0000400000/0xc00000)=nil, 0xc00000, 0x6)
munmap(&(0x7f0000c00000/0x400000)=nil, 0x400000)


compat_50_____semctl13$GETALL(0x0, 0x0, 0x6, 0xffffffffffffffff)


____semctl50$IPC_STAT(0x0, 0x0, 0x2, &(0x7f0000001580)=@buf=0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r0, &(0x7f0000000340)=[{0x0}], 0x1)


openat(0xffffffffffffff9c, &(0x7f00000001c0)='./file0\x00', 0x200, 0x0)
__lstat50(&(0x7f0000001680)='./file0\x00', &(0x7f00000016c0))


madvise(&(0x7f0000ffc000/0x3000)=nil, 0x3000, 0x0)
munmap(&(0x7f0000ffa000/0x3000)=nil, 0x3000)
mlock(&(0x7f0000ffd000/0x3000)=nil, 0x3000)


r0 = socket(0x18, 0x3, 0x0)
setsockopt$sock_timeval(r0, 0xffff, 0x1006, &(0x7f0000000080), 0x10)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f00000002c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x19, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_50___stat30(&(0x7f0000000140)='./file0\x00', &(0x7f0000000300))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000005c0)={<r0=>0xffffffffffffffff})
sendmmsg(r0, &(0x7f0000000480)={&(0x7f0000000440)={&(0x7f0000000140), 0xe, &(0x7f00000003c0)=[{&(0x7f0000000600)="b3f07ba69647043148a54254ac7c4fd15ad7", 0x12}], 0x1, 0x0}, 0xd8}, 0x10, 0x0, 0x0)


mknod(&(0x7f00000006c0)='./file0\x00', 0x2000, 0x200028bf)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
fcntl$setown(r0, 0x6, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000007c0)={<r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
sendmsg$unix(r0, &(0x7f0000000a40)={0x0, 0x0, 0x0}, 0x6)


__posix_chown(0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000200)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
accept$unix(r0, 0x0, 0x0)


mknod(&(0x7f0000000300)='./file0\x00', 0x2000, 0x0)
r0 = open(&(0x7f00000002c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_GETLEDS(r0, 0x40047463, 0x0)


compat_30_socket(0x0, 0x0, 0x7f)


r0 = socket(0x400000000018, 0x3, 0x0)
ioctl$FIOGETBMAP(r0, 0x40047307, &(0x7f0000000040))


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
chmod(&(0x7f0000000180)='./file0\x00', 0x58)
rename(&(0x7f0000000280)='./file0/file0\x00', &(0x7f0000000440)='./file1\x00')
setuid(0xee01)
r0 = open(&(0x7f0000000340)='./file0/file0\x00', 0x301, 0xbb)
chmod(&(0x7f00000002c0)='./file1\x00', 0x92d3d8db0df24e69)
chmod(&(0x7f0000000480)='./file0\x00', 0x3d2)
writev(r0, &(0x7f00000008c0)=[{&(0x7f0000000740)="ee993190c431b9768ff21b8917c5dc9794e3c35bbbec443d2c7d5f0717711c2bc047a4640babca42e56d85f2611e61d24e99ed394b05b895fb51b2c7528bc08ac883e620e29741e84d610121b84d5a2ca691b4d13d0ed6d33136292c42673cc0716a285fdd6e0996e4d6c01f0c94edb4b79ae12f4b1b261b5aa23d3bafaf860ff362e65a10", 0x85}, {&(0x7f0000000300)="22ddc9fabc37605e8a102dfe3a5d90cac7a6202f2ae9ff497dc9eda8d72333f742443f7355ec10cca409d3cfdde36e556da43997a42581ade8", 0x39}, {&(0x7f0000002740)="037a7ebd40587a77720f0fc52ea280c833e80780a5e87794fe707b30800d61d3b0b8f3d94f368d40b0e5267f54cd2b52b3c56944663545b11e8e1f768478c343b6a4834fb2571632c442c97b5dc544e5af1d3bc53236b381127fae383d24206331d5dc218b8dd1b6d238e32bb202e01cfbc45effff6a15aaef3ef56ca518cab6bac5436de90b5e28b9e41906e402c57e6438d628c171c66229bdd3682137db2f9031dd2dc94ccabe23ccdbd9c09e4bbf3b8118cf8a5d890e1d7b924cbe800cda8adf504e997e804c1f0134917ef329305de596994fe73568f7b88d08bdd4474b8d7e88fe5de4283ab8f8fd92fc35f52e50ef91f19e34958982ec7c35ef620d3381f8eddc86c39648358779ceaba122dd7a49bfd0e53300014b3c6633848ece2fb984ccbd6786db72eff97784d9a3f1dd9a79c72bd5c96fede93ffb303c3e3bc64b9030ed4c3ec2d0a1775b4a01d7d1d75c4198ec0414f746b42c1bd40e2b034ad2a6f9402d614bc5577ac0d7e7e446238bd7dddfde97fa3b1afaa53113c35712a3bd894fd43d037cb75723d608b988431874d6bb42d6d37ac21169089f15928aaf1daf651adcfade270554684ce492961bd56f0aaa2953e2303ff89b206403566a06c3fd3c104fea2d7f4ee0e6abb958e043744ca04b46d3d8e08bfd1464130bc89fcde3fa5008eeb651e4349971a5855a6352b93487106aa2f5b96ffc2494d39c5a8576c85e53a21d6fa033d0d092584ea714477ec4ff46f7bc976c3191fb3344b382e2aeae2dbd38ac9af143b18c098c494e946387b5055bee42f44ec74cc900ce71096f153cc5dafa0ba0390f0e19aabae59e9186d567f622244457d827d552dbffd8457e8b6a56c4f149778ff33733d9279f29a5f54936d2aa40e448b8f7db7eb02c166bd8ab20407afaff3f150ad70d01f08419ee210ad6cda74fcbfc3fe79bc38812590998bd8c8dc7cde4fa42a8ebe3e19cce5e0d5dd63d928c6c48c36cf8f6fa987cdf261a136bab9d0181fe1ac906c26852ba3f2923ecf4f9b062741d23b05d58fe74ffb7f1e66f00dae377449ec78f55c1b231ac8aba066b75a136b637901c05fe4533377dce790e47e4d055b52910d3516c711eafbc5fb4dcaa1c379ff6adc6ff101e472c4d165f5ac25819db4cd1c9e4172476c6b458c58bfd1a881448ac788bc26dfe59803e2a44f69cb891c00f3e059ebfb15630fa0347438f1298966c6128cf0aef6ff1d0e57ab5a21f12ace40b4580d7a6bcf776d846beadbc00978e250417e443ad5784e4d5255271a3aa85f58659c2ea1c71b6393b05609a07fb4e2e47ea8f996621ce7e21a359808088c89c09a0bec5d8619cf2d12fcde9a22c1142e894b5881dad2477d390d39a49d8d59292b8fa21c0457e6ef51325bca418da006bce71964cf9f4ad81f37ae8f4def3342b79361020f9868593649395f304e16b50a0da3701f206eafd0bbd0b26423b4659e076733019eede119e4fd718b26620233c99f16c90a20b6d348b5faceb96f90cd2e8ffd329617a73668af8e6bf4722d2d2ce765afa4eca6b10587ddbce7c3389f66562b7c97d63bf1792f63d08886e92a7bb2e528a00cb959f923727df1530616fc6c786dc2261e0aefa55a2f45a933803b98341ad3b2865504bf70b94fa16db0a73d385a2a64b309e2520e2ad199b324b33bdf5bdaac64755b01a80a58cd503aee0b2beaa79a7a6f7f5347f59cd462873f682b622d92be60862a6104155b73831ca88335e926d2fcb6b524b76ef406b3ed83b9bbb44be8300ea33bd949df4ef8dd94864f349c0aac1db16ac7cf04d3dc436c39ee8c76a7fd50d7e79d5a0b0c95dfdb175937f7333c177b60221dab82680fa89dedbb7ef6010b918dddb1bb8da6265bcde6089526ee252ae100943956d0b8adae543293c5eebaa29bf761409d5047f59306166c36ba1c7900ef8d37a79bc2de33131d7c4488f6c47204ddbdadb03c56bebb23edc76b2d7312dd4a6144080b6f2149c3c7d2339c7d5a274be3fab5d797f389a73c6267fd85f3fb80d86d94908cd54c4ccea41cd57b7e1f2eb1f39f1fc8aedf0c0f10ad76748b1ec07b5a40e9d4fa48e90f35d481607e1646413c70019b37a2dfa1fe994c5633e9f665acf1d7577dfb7436892813c0ffcd16c759a2d28f5ac52f480c3e49af22e0cc2e4de7e26406933726b6303415b9e7419f9edfb890f243adb989a4cbeafab8b1889447fe61b5e9a0a5203075cc08fd7b36d7021a1bbce5d4aac0285cc5d31b8dc86874bf9b2b6d30e0a46e8ca468d033e6f0ea41caad46bc8bca1f1819e2da1e164a9af3b804b374c63cf29adedd24a3e2d5aef305efc1f10e863986785fa876dff1774f532fb8dbdab110111841dc4cdb3e6297f8a1b7617afe4ae31ae95e996901279d3d47a8664dfd77efdb42dbbeb967d3eacc096d1288c02d4c494c069607ec6f5204f14edbb56919c0e8e28c61c68246ad5242bc7ef70a1579eb9fc3cf79d1780835b61b6555f6f348bd1f838b6426442f0fa1a85027407b8a21ebc166c9ec1ceea1db357193bd574ff1b97bbe1c692be31e4ebe2da18625938c1b46cbb426de47e4031fd65c1a1fdf3cef0004251bb0e07197af1073a1d6f59a7377e181fab495c3d76131bceadedb989f2ceaccb1360935da60093b8cb21ff77c37e507b334567bb929bd64892ae70fa74ce51abc2a761b114cbab7f2546dfaf0eac0365ad55790e39b40e70f91cb14561163f48617d31d39384e02b8e64af228d61593bbf700464cd45b3769b8ed7f124a9f26c9491d30435e04ad0819e0e0bac01356553ab22ddf26d96711c6f1ce5e19b443b818c600b0152ca310869ce3c63ceda5de9db5deac1684413aa682d402dba7ba2fb9000bcf5d57c7cde4894ca8905bf8f8b6d0f45bfe76f44425e708310a835a590be58d80d744823123dbcb0089f7086d5300ed5727bd6376178611926198082428797f86d2b1d4f556660316e575a116e3086e6d0bd599abc5a7e5678a3f4b24cd33f08906a93dfb247dfe796b5b28b4a3aa1fd90186bf6bc0d7d9eebdc783e77bd548cf56e347094505d1b868c57962aa8c2bf4c1f1eae8782076e86c21a73864f10476c846111f5c57ae2fe3d9cc7945a2fe8a6a18f261458aa3d26ccd068dfdc1bd9d0960254671c2d0492050e9c1cd0121e3c814419f92e8ce156eb2f8f5583091ffc0256f90a83f4176c4f9b5bb077bf5c99befedc321aedec003182c18768657961d81728c1db17c22ac44b8d1cce657a425ab454fadbfcc285749c3dce372da616e873711906f230ed7026fc9ab00d6dc5c8cadac021cce577250895f1aa543a9c62ec778fff315d551db79702c6e2f8aec38a8c83e137596a2499be545f2ba16cd33260525e783b6ce441b58037f1f260a9a66953b26377a2787b454a7b1e8cdf6b31cd50f4cf320aa61ee522f6fd85895b2fd56d871f4e1bc8dd12a532c13db154593c1de02a3eec9fe1c5bde28c875b87520effae4f6cb0ab09b02828b8d5e83a5a5b4713cf6dbb764704ad4797ae75a6de9402d717a1d3e4b1d0437d8226e9e7016960d8dd9b8d5950dce5ce9243d30b724f16bbc9562dac97fcd8e24b52e079ea8c40e23c7b4d5a6c41328ccd32be144fb2b798cd3b6c1d30df1fefd39a5b8d2a35f309e7a9c4bbdaf0e8f1141f13ac5a8fe135ecd4cc31aca889fbbc0ebf90fad3438f5d2fb45182f1704e9515a2e17596e0bd3f5f18d0c0b2280382d8cd2b3f69d518abc05e2ab3252b64a4ea88dc1774bec2e288e4a18ad173d90fff66a8cdb8a610626b1613fe507658509108b5a29246b10ef54b8b28616cb9ef6ab1a7aa853bceb98da576da4c422b1892fb0aee15969c37a2e1e0512e99fceb1df2ee660556de96e522633653ebc92548ee54311669c4b2bb4a33b5bfc102d38d1ca91772e460f29d2e2b7adc79063dce45089688ec6746645ea8cdd72bf616fec7351c6815909dc794ca732c5707caf1f470a0ffe5ea1f62471ec24c1d4e659dc8c168b6cfe59d5f417ab94cb0310e0f1c9fa6246380ea3d94f34b2f74f11010216aa66413d54d5824678c5c20485756cdbe6c2ced317b5e6136130a59d5c7f6eeba0a3772f77f6a23635edd818d7ec53282803a44d2da3135fb67825c590abad3fc4e317ab1f2d0f90b9e25828046d5c731838eda72f5d9901ea8f0fbca93edf3f42d3291780e631302222b274fb29e2e3c332e66732723b13f352fab6dcd85652d6b40740ed8a3872395d2668aef4bc79858bab7b956395e9da981ce0a5505b42021810b39f7bc616918d7b5be885de84c7e84fa04af0e660b3d1f0e3b43ee229e44b46916a633033f6d8abab87d718619431c1764fd4e729d7b9b540fddb32c4ecce713b558dad98f227093b5fcda43a6556b8ee6877144f24bfdca3257f8f71e0906fc411b7897e5cf30fd0634e41c93babc1fdc2c7a05bc63643e6b9b38d0b84aac0e2aadc9b8ce7746962608860a3d6cc2d03f33ec8753437dcd5428d9525cb773442b7b9eed10eb4f7371781e672d8c2f666785bf01a5b427203f8f0665c205c71d37a2a92b130f183e90e035ce698b0eefd1d72734d4b54b2fa92780ca6ea509461234864e65646193505856617da129035697bd4512f508ab758956f49451feca19256d4c5cee1fda1f9b5dbb57b26832192e42488aca8591a529390492d2ba53b49155854158804b27cd3ef910b4da50a0b98813546e0787a6d56f60ec1e99fbad4a1fb2a9f066647958cb075d6535acc570a30e8e4fe472664219845b193acc8184b59cbfcec50307009d008da6f56e43a5792064bdfb271305fc1939fccf4dbda4c04578e8bb6c095d8fa870b72f972e8cf4e77a1a9594fcc4d14872f43ca76e9f746740a0f3aedb644478f9801bf42e691092383f604183829a880d919f05b4bfa9268e02d46f77955cc4d2eef2f9999604e03f1e69ee0687cfefd8a1fd066f3bfb8a92b136d8ad6f59034aa8eadb21446981b5e7c1ba9e0b2b52afae1cdbf62691c9e3377789f24b518778222b71c0f3b58bf54ceaea6d0c8f52e88944e0417d42456ab6f1d44443577538c0392e7ad74394ba7003ee19ac326c645128e8294440aba927d49176f29d07b8a6d55351e6a8bd5b2abe71ba7e0ceb29df716ea382d20c266605a3fac32a6603df143e7b8febc1d9dfdc0047f6233c4441100b1476a8b98eee77b3a4858b155c5513334a0c165ec0143cca49bd89de7558cae4f009668409ebad4a91d608789dde82232edfe366b23d6a2506d45b6282007d5c0fa600f661693d6676679e2e42cab17c312ec4668af341400ab066150e7ffd1ca6624ed25885b490f68d910d5c9a713d395cc24ecd107e8a0086c2b6babeec1557c031796d055b3333bc7259dede158361b528377d0b57b718f8b1138c3f406314691fc99b0e8b7acaa3daf9ddf77916b868b116fab5e59bfbc50dc61b41074b620025a8f91b0454466f0daf0d8e547f80e78953a0b2d3491acce73159b771ef78be3305ab7a04dd0d1839639e91118b9de6ca5cd6acf8aacd33c8bd38bd32c8cfd0152c195a2b03cd86ef34f01f92b0c49c8afd2795751d8914e57833f650c07d6abebf4e2779b62215c4e744aa048f559317f5d3ee5d148caa5b6391d7aa03a7e37ba33477417226a3d5e909776ec10188c764b35d90516c7281d72a11176f20f2743d4e455f4a9d9a58d6cc3e162ca99a49c9eced19b154c8d504019942dfddadc303efc1be5aee44ffbb37d6b21b7f68526ab9cd46deb5ce4580eb7ee3c9430b", 0x1000}, {&(0x7f00000001c0)="527bcf4d75bd83027f38d9f9184a5ee705fc92019a92b39adbfde852b3f344c8308e920b55c8586e051d2890cb82db90bbd6a35f72c1bc5e29", 0x39}, {&(0x7f0000000800)="11b06d8a183bc5956c545f54842831e32e4a2851966471ea9d6b22ece3725308e02b3513c8803ced24a9007e71ecada01365cf9d1d4844affff253851907bcf405483425fc26ec0f261c9e1f094aa1a725271aea0d9834342f01eec6b519a812279f3e5bd89157d8b7115e42a515259392a555821177886849e0192b19838338aa4255532b1035d6f887de2d3d45617b375034528a10a9072778cd2e972306350f1f53b49a", 0xa5}], 0x5)
rename(&(0x7f0000000080)='./file0/file0\x00', &(0x7f00000000c0)='./file0\x00')
r1 = semget$private(0x0, 0x1, 0x0)
semctl$GETVAL(r1, 0x0, 0x5, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(r0, 0x6, 0x1, &(0x7f0000000180)="248e7cb7", 0x4)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
truncate(&(0x7f00000003c0)='./file0\x00', 0x0, 0x6)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
lchmod(&(0x7f0000000340)='./file0\x00', 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
socketpair(0x0, 0x0, 0x0, 0x0)


r0 = _lwp_self()
_lwp_kill(r0, 0x13)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xb200)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
dup3(r0, r1, 0x0)


mmap(&(0x7f0000ffc000/0x3000)=nil, 0x3000, 0x0, 0x369b9e1e91551174, 0xffffffffffffffff, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_oftruncate(r0, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x205e, 0x200)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f0000000040))


msgctl$IPC_SET(0x0, 0x1, &(0x7f00000000c0)={{0x0, 0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0, 0x3}})
r0 = socket(0x18, 0x2, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)


compat_43_ogethostid()


mknod(&(0x7f00000000c0)='./file0\x00', 0x6000, 0xe02)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
readv(r0, &(0x7f0000001300)=[{&(0x7f0000000000)=""/77, 0x4d}], 0x1)


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='fdesc\x00', &(0x7f00000000c0)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0xc0306370, &(0x7f0000000880))


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000001c40)={<r0=>0xffffffffffffffff})
sendmsg(r0, &(0x7f00000040c0)={0x0, 0x0, &(0x7f0000003f00)=[{&(0x7f0000001dc0)="e8f27aeef8fe8114e5bc9cb0cb9372ecc5a717bf3b832f73b87626e7fafa30504f0c910ab3b79c66cfe84d2fd65dff6d45e263fd94b15d335c65719e5e39f2871dc49a32fb5e9a2dc675cd527dae51afb3c7bf262e74240748c4f5f5e1320553a96c1a17fef9b190423becb514b0fc8412858014be2e1ca610e9b485feb939fc842160e54d2355122a523c27f230f8f639b58b0cfaecca987dc7e49904fbb6f004ccac3faf9e9de1503bb4d1d6be0387b63e60c8cd9eaaf161e8b0c960af0e329955", 0xc2}, {&(0x7f0000001ec0)="56ce2e7307063de4262871ddf3398f99238ad921313b2cde2192054c2424cf26b2e56a52901d66892f3063", 0x2b}, {&(0x7f0000001f00)="f27ea192984b34bde7d6b1905d45e32d636a6b7d1f2b48720c899978f478a6bc45d423de5ddc9cee548da12e67ffc7a94906182e92b289d552926698e8476774c9e35ddbcb63a18113bdb12cc7e5d71411bd7cc566cd5b4913480d58c8424f8318fbe03891db5acc7222d7236aa6489f30bc501cca6f631f0b91d9a8f27e462f9a7cd847bc2ce3af96b788434c1f7b7cc0b02c687bff71cfae17dc9661f9746af7f0e031fb19a61f36ea625818f6b64a5bd73894eb27e6751fbe376c17ca9ddd3f6be3d094cfd33c97dcd0a76e36bee9bc3feadd4114cb923a93cda59184e66138dbc55e65011d7009ea27dc9394be929c30d89896fa5ba2216c9abb6605397119614298676c2df1399534a77c40ae55a9e72bf6182ed15e766f2266bc67468fea1188828e5bd2d6790777d6223d4c40eec22fca9f2b4688d54371be36ecc754a723a468f443ade4aa9b69a208243738f877905769ecbbae00ecf425645fe467f040a28c8f3839297bb830d041babd3d7b177df51e4e770b980594c4e812bd07dab4ecc3d541954a1905c22af8fe0f50e352183e16a5791285f4b7f6892e837910720e5fc4e1700f2ee70964b32593a666558b0df871e7f99ce43b110ce0b5dd79cffc3fddb01ddc5651c38a1583e5b5dc381d7312d0bb885147d854baf4b6d2a7eb5b91ed2bc85091f3cdca29b3e5cbaf5c30d889b83e446339e21d15491fcb8c88fb70155edef4a71c238bc033b17820b7ff23e8a220fec7f898bb5dbaa0762348921d5801df9abf257d54ff12f064bb40dfb6f5966110ba9f2fe8b41676dbda124d09ebd1667266f7d6770a5b10977a59080eba8460136ed9cbccc2b49e29b7990a28a39daaa285b17d516b478c48d15cbec2b7d8aa45b314302449e87703119ff6dbd1bb3dcb983934fbc4b0bb070ef3e37e765e16e7dd7f9eab715f72d934412de3a1b8278cd037207edc604294368d37742d1f2b84583b40d2a703a8aba2478e1ce9523e591b7407a6b8547fe69acaecc6ae40cf286d2ffab5085bb28031b47fd99ff67f5240da8b247436707683d353d7905efb77676d892ec39d1edcbc8f8e2cf12fd7742ec692fa14cc937aa3c41ce798fcd3feb3de883a565d10f1e4048894f3bbdeb7db98a007f11ca35cb60e25d3312393192e8ccb0fd8e573293f819f45b813076afae27bc37c72a8120cb3aae303467388de01b23d7c43cc29f9bdf2707be77599a9a281470c700a4810e390225774fc2011a71839b9b2d7775959de4d1e8aae234c2aea5362962866c6416995061744120f0af86be78b04622ab25bd7c12443c0824622a6cfa8976b50c59896d05d672779c45d84ddc0c833d5d81ac7ac6649a63c880c818c644024c3203cac6a615ed65faf39f0b8765c5a9a868ec107448be92d2f5af2b418ac95d249f2967f19090546f7d11453f7af81e8913318b9016e295f71d23d5456448431479d869dd82974586ad79edacaa90d499f04fa97b5d8294f76689ebd3c12ed2902779a64ae4430ffb3ed5bfba7ee4b48129f13fffa73240dfb57f5c2beb12286705928fdf6586aa4dd00910c9c3e349a0723c041e13b1ba4018d72702ebcede1f9e1da129be53d92415ddf02ace72d90a200e824360ebc468ffc184ba91692b83810d4e3809b69c37c139e69d8710378134a55666d8aab09e68102c6f8237fbc33c1165b2138b19a4e13e55b6ed24f055aed3b347a00edd9f616c8cc6a08998f949bd29f57acfcba6cad5569046752c3b9fb0e68a82df61094aafda0737d6a4d7e0405201e2b6969ccdbcb5127f1e1d6002c944fa6a5d93c6675ad75968ce50f20056138e485c7659f437940fd02e7ccc06486d9dc3244e8601090f35ab043d73665b33a66de4659492c71fa70bc43a2a9dc1790e400840a8ad861916c1e711e9cea3071fb72db9297291a93e09a6eb2149d4b58d56a873037d8145da016e1f9b7efbcbc14dfeff92d3be3d0811fa1f1698dd496f152c1d532484ffba6099e3cb576310bb7503c49129fd5fdbfd35613577ec8e7ebeb964b3e3f901111ea7eb622f64d9cdaf569acbc23fd4692211c3daea33cfed1bbe1541b961f5665b6e332957d45af319a1295f130cf7ba3e162f36ee2b95c4d7bf826a5c00feb926a2d932d4f0126acba3c0108ff9289a5d6a71ca955c62963deaeb08c72aa74e99242a66f96fcf2b6cd1e9e99eb526e3dc962be29e7b2c7818ac2507120d0dd8121c607086ccfe737b291f32250c9064fb44bca66f76df7c6500c76b33ce089a6058e8d94405b3099e9467cdb9f2c5940ed070352b4a332e024d14c64d47e71d765ccb54f9b3b248149c3f108d5b3de06ad6080fdb18672953e2f6562cbf91fe36f49a36904f283db215274c38d44f6eaf152b1dbc9762c7fdc5e38a0076193afe259c76d807cbe3f568f734bcf4c2c4f468586dae3372fd8f5996a392f942c028002161f89b5d7e40e4e22b2fb98cc3e934786327da17cbeede1a291097b7991369a9b0442c63af8b63e75394429e75d95e341de83b66eb057c423820bc68aad2493daddc0b0e8611579fdc372ce194738979c94aacb77f2a3a715b462b7bd77d759f659fb870a747d452ffec789d545d3e57f0217d577142ec3269e1ff03d83b902c9ce6b188b232fb9d8517f8e077b88dc8eb9298f01d67325adf18914c2e625fc2708b3ec87b7a88a7090de157b74bf328c1ad6751756c632230298c80d8f4a8597e6b1eced50dc987992d3c569db2711f37f8095d683602cdc4d241fb0155c10ee9b0c3a719e5b6ff562bb9a422d1d946e64b8d85129c686c74c5f2cec43303f3c75969f4c0fc7efc6a61d00164ba3716ca3f31fa72aa5e4816b01f63fd6dccffdfc648959cdfb37f34166073b4de849f119cc2ab1258e017db5ca553bd2dc4b06372c20906319048046805159acca62417d19be81a443699593e3c834cf2a6a2d1ad959fd120438994584dd5b9fd9cbc7d91358d5856ca4eaef0db31b40736713ce033d82315130cc2531a995132ad6ac1f95957b6ad2df454bfcf3779abd2e4b25b61b45cff511bf9944f7518c48e4260fbb7b6d127af8fc9060bc969d3ac63af9e9beadcae8ba5473cb9e18c2db14ec8f54a8b23d5c524bb5735c6c6f6ab4a533dab12fbfcadc087c7a7d5f9e1c5f62a23f3e1b956fa7d4cae02c72a8f9d3e4099fdc5a9aecb88f9cff3e339aa4b13deefc09358b3e2d51919f3ce3859546db709577fac97c1aa70eaff8e030288eabc6739009352b33d7fba3f9b71da7afb672958b6907b5024d4c01f6a482576225e96f5bf986fc843eb0daac54d9afe081cb33f1a290f2661c902caf07c717b09daabb5617eaf3a6c9a3c1fe6f913974cf0a9f4d508bc4e1de2e18f511b123b446978471eed9f674b3d8ecbc56baa704b46ccf7df791e3cfa2b719153bb0325ac3f6f12d0ee8770ac62151026e28e53f3d2197c5efeb57a9f6c31a35016a83b9c951ce60d3dc8e0e5874640821d2c5a5e357a7dfcce1f735a4ca437bd7f44cf96ecfc2dc87a2e578ce91903c63859b4367168ec7f6c320df38b2df5ba032da85edba7fb73320cd0fba27650cce7d203b98eedbe95a1123b7c0245709180b9688c65b066bfa5af55967cc2e600d3c401729b701e42e64a421959295e627d5b215146f4ae7b4308e7c4eac79ee39d5c294d8bc89bc6ac076c32b5bb0f1f8f74b1b8375eada1c477573d11e866b86e49714ad013414231221e7e730aafd4b57da2b8e9b345072df8d4196a88354c8edfc29ec72b715d508de77bb33294668201de982cd2a51213afb786385804de5651b5f749d4fee3f122110504bffaca148f3ad18517a6c4b5c8f42980477367575d29688cd8f5092250c5a348a1dfa1b4ca871c478ed234a9ec13d80a0bebb159423981e24eb07ac7cb4a5111389e1c9ac69c1532292fcd970bc1c70996e76f9dbf8157ed6792c811bda218385c67dcfaa920ccaec978d27b6646e6d3381c37883bf3cbba00d352bc537fed5e14a8eff299ca3b968f61d7a9e0c78eb75bf088e21a8961fb44ca582a0e81d184debac89de87ff83d4c5be47a8eec755cb9894768e3e268c19ece5a398611c828116679caf7242da20913d4a4e45f9e28b2a621af207919d24279d5727e1da8e989e28d61f5021135bcdb09cd18c1d2796e07d1f17009eb4b5bfcce214a017098f358815fc16b80a30f366d832b9ebb3124a6e3ddabcc3b7b79fcbf70f3e6d70b1fa88e7992a182e0d3f7d2a8899bf32214674b5c3515cdd94f6e4b42d6e63f599b5f96c48efe68e0f52f9f023877bc76215f80aaeeba7e3eae882e7311ab2e08ace70e07c95c2674d374c080f2a3395e88de84f944f756471cc636f1eeb0839b0af58b7c47c2e0b7e7d95d8103d058f651eae139a6d03ba482fece09a7e442acad1603a69a695519e6b7fc54fb0a2457e5aba2aa9191fee5ebe1fd06a60aeecacd4b19f0c1605829284e74099f2c9e9b0e224a4ddc69bfadb05c0a1ef70db8c78715137cd081b01e80bdbcee6a42f870787d8fe98b428de9c1738711295b779d887d9756b3284149e90db13578872358ffbbb6cad10c4b93e451778fa1aa11b5e0f96c618064b5feb0811aa8551d515f405c275e3c3168be5221485090055f35c67ae95d2acd32a08e6181eafa6d1e975e79a5f87a76384ddfd910c1233d7c7c60fb640fe5b04f95e5733de7673b2b657c044e5b3131d8ff69dc971156a5fbdb3e3a934f80514ee51951602315b6ca8f86cc5edda5c9a52f78629128db54f181947e75a7d5cc1264c44095d9db6eedd9840cb3b337e256ccacbed9b9f7134280cfc2964bdb6e30c86ece5d634d9cde26d6f4c345f48d2a58fc7afac7897c8ab333ee14d40807f04f1f87f5ead675b504b80ff73df6427d04d8d8d7cb603bd7c72b410b7859837fda35b5fe160daea83126cfd67b491be9d0084347474740af34cf7976adeefebe54b7c8f66bef4fc2c46c02989ceaaa36f86895f398b60df6db41cdce1c605d6725996bfe6122627ebd4da87c596716089ead4c82cbfc7af601efdfd967540b2947743d6c4c5fde7e911354c91fcc113bb98294e421de4e364b0dfa7de10cc412bce74c8520bd59d800427986c8655e3a8e4354944709451055ef2d80ddcc1eaaaa5f0774c99097cb12e115296462540ac0ecf2c3a774f9678ac092147d1f6e672a317d4fdc5d3071cb6f44cbdb3321a55d8e95c04c23d137e92c62711b18d54502325a0c0e97d272dc9e1527cbb443bc734ded919dcf1922522abbb691975b5c53bcc52dc2c59493bd62ddfbad0a58695bbc32cc9410fc79657c30c0a7db9135122e7c5cd663d5dbbd6709b74307aeac7ea0b1d7fd69980749d98f7f791eaab8c7a615746ddbae7e7f58d1b7496efe3738a9107d27bca9f0073ad4ff2f81a85ff1ac5e81fb531fc3f5cd96aaaa73afaf1da8ebcd1de671945f9a8fc64974821fec3aad3d493f2fad86a72558ba12757274ee2fad15260c8f9e7b2bd6b185a7feb8efc54f9eb417dc6edf95160b54045c8a736fc898071d043c1e45150f24a1f9f0341ada2ae2a06ed39f1eed45bef85dd106d88e9672590ba005f2944c3c7216beaefeb8d602ee60bea57b3016e4be04bd74c372e7638964075b13cd0d97d6b880c3d3b76d0cacf2b66ede7efe83fe180902867f24d6082553700919c70b4b10c82a19961a20ff13406a4f86c53505fa319d2933d652ee890fab7bedbf5b07fdcc0696acbe631eacbf59d0bd75bb8baf81907dd0e499a375a9b0544a00ccdec", 0x1000}, {&(0x7f0000002f00)="7268ac664db16d70140383b7b885834e395d93bf254936f344499a921f456d6af47bc5da945f052331ca0f3bb7a78723e45f347360aef546541b907799b0bd0dc90cf77a4c9cd36e7853dc79e469bf562913c09aa29cc4ea8fc7a167d4bd0c7efac286675be2780d764e120506b7c85284cb87928fb6cde0cfffa19d1a5451d8c093be04a595e299ccb099a67005a134335b22b13444fec3648fc358f5853c4e99d137f3eac552c74782f51a297a573878b3c667437b348735b8e5ceb2859864252dc5547d384a83be44cf8a0a89f6dea55b9eb68d83fb2db5a9faa8a182c9b9927dbb8c5cb2724d3479203078b3ba998247bd79560c77b106254c117ef5c56ce40a6893d60d877abf15459363a74afab2b47774fca4757a7b92bd9708da6d491ca09af17bfbb9d26775c3855909518ae9ca15454f31f7fd1ac2b4bac73109df69c2191b5a553c10bbea0a74ea2342c5e044d6967bbf492361716d6278fe2fa876de45bddafe762f9536019b58ae4cc84fd929a0ac548a75720d963303c9629c2618be816af929226b7026f9c568374baa13f486fe8d489f04e50a1992ff02f52d1e9da078213d50241acf09219dbc39d719430836abaaa4183acc14b92cbb05db27ddcc547c6c235c91a76babc3f2204e8d52560f7d9874c8810cf0f6f479e26c058a85c178dbae2f23f5dbf478cf01e709b651cdd264ae1d0df68bb984f1f61053128ca4bdfa5ea0ef642e910f1e2c5e326beae90e0e18172fc9f6a0e549e35c55c368a903486ee6cee21c3a22ac018681ce838276b377ae5d2542611972bb56cffcff1045cf02d4757a6eb2e139e7edb35d5076f55b81b1c8b8f6fbb90db52e8caa2348e8305e27e42825e8fc4d6805099e6b0610f1ba782fc921e1c518414414b3bdffb278bb3f81494eda8bed46fb5b8b1e305f407e3dfce7bfaa83760058a18e4023d2fcc1f2e15eee8251552b09edb93f3c3c0ccd17c581a4c62d1bb7fb5e819febafb920f22a3fd2cc817f40d062b96d82225d3b22fe2ec1c0fd9cb2b40c92f8ebf216642462287df59728ac3e8eb7becac03c3f3ef37ceb6130c668d21ffca1171f99ac301327200510bd6aa9d6ba5508d845301686bd17b32a74445a2486e8233392cc5de6bad38097c4ee3ffa0b51eca431fe17e077849e04db3e7e410e2b36f4c3e7f1ce8d9df61bb3f5031348530c87f6d6272186f964774d1df15647335efd701b96f65c1b9e45b4c069a501a7c8bd62abf0e47b310055456a099ffb1a67bfe259303bb4222a3250010c595903eff7af927ef33b107fe46a57081577e5746c70fc694870d2e3ec4856452f3809ff6df55d326043d7edb2f9cc0a4faa933db26ba1041961da5212f56a9025fe2df476242b2e895e439c2e1ea6588ee32366ac44c8ac48f7f32af74846c5a893908534911b4a2e7bba7cf78d56cbdb2e2fe00810605a0dfc9d881d699e811627334cc1c4b2e6fd28e04dc116999187a30fd275fba1c1bdf269daa64ac2d2ac7b9666bba75a4e6ad6b0c8f97930f4ca80cdcac85260220dbb6bfc6d05c3473593dbfd9aa3e1f5e85a4722e712c448159fed1007e697c58c4f180883a4ac4f5c4030426c16874a60edfc01e72910152d0eaaab112b497ad300b653ac57ccebd8237d901a3690b6557e153c9deda6d108586a48bffa8407f962eef1b9cb580b6566eddbcfee21c4213e6b8cf0a176e3ef5280f22fcb1c425b862bd33a1afed6a8bab0a42f6de3d1cd3a42a06b533a7f44e3fee06c3264ad77d739ce89751a01e3ad1483a72a82c49408a176a0f8a1e753e9b76381f584fca91cfe0cd2239643851a6a6cedd52826dbb1c2caa4d465fd02b512e306e1b6dd25d0f462cc02e7fb4bf426312fb01e9ff07fb2667659ffcf64a466887d20ccbc77e7cb48f1a6b7bfab884a8e060890539ff81d9bc3d6d7e344d6aafb9746d4f41c086058df52bb9b036ba8c55537fc1d83a303427cc7ffd0d6456463d439736d05346e93c6d6690b217ffb4edc104329f1579fba2febd60dfa035a715402285f44f27431f092f0b9195b65f23e9622a58ef2c7663ed7bcae5299c4ea4cea8418213dddb141b00b1ce4971b3e55dbbc01bd8d868119a4249ed1c2217087889d1f85f06574f06c6093d847d2f590feb058e987571d39e17d3078c92280f3a393ca3567eb8dbff73dd0205de82459df27e540800ab0069e2de6ee19581dd78fda4287b504d5c00e648afc30aa8808320f403b686c2381725a76df6f5bd275f85c6696e4a9a9afbd1ea863f52a9a19986bfa383164c3d5477292edda69529e67629e4f3d803a056257ee0a77236bbf4a2cd11dfd55216e78e3433e123d1dc1f7f8ba0fec7eaa8d3f99ab4b6cab1f0b2f88dd44b6fc680df6ca338bef49c3a13c3350e55c540dbff3330ba1d601f41535f710b98b155c43b4db0659066c6cd8d55baef5a7fb4a0beb2718ef82dc3101d335e5051e3ec5db52c6392a8e422d6a9e558f0274e132d8e590fcc24e533ca7183ff39d9b2d0262973d68ec428f92ef1406a9bbd77911e891ece39666a4f26f1568b509a1f3dc2f36da2d23a04f18ef7621836b6b5444cb0574d6a09fbd888bbb60a87cc9566f29fb8da96dfa1116ba6cdf87177437f6989b743e23f3f203fdf7bdf16f385a4aa150a572911a58ac909e951417363e36e76b8bf8ad58465628c68d3d501208250f6a5f5aa41d5b294d762752a1115f222356742e3864436dcddf97e93ac252f5038c0efe50b2f52940d7ecffc50ffeee0ec4b675bcd67d4fcb3290d24903f817e397d677e14cd1993ae92a131819e3f4aeff4fde7101ccc1a3d5f3df52f9e978120abcfd5d53e5b06b74413b1974b2049b455ee8b925dd1e3daec846e7a12f101daef38bff1cc0a73ecf7783e612cd1d95535157c45235ce2811ba959c823eb8e8a4403e7d5b51f0ddf42d89f2e652358d3b52bfc43edfa4f4ac77df10f1ec6de9efa44599c20f451129dbe699e23e46dc898e66f040f0a1156bc5c80e6f9271d716cbec084674e6a791c1f2585ac51a41df20c5cf7a3fb18bb783d59421e645d41551791106761ee5020815f355e7c93474f55b8b4ea6635b5db662bb0e35e0947ecc549d9437b8652a462ecde36b01b2223572f37b4d0ec2abd1243129d3d3368157a45026281cb62dd1e010ca3dfd793b86e1ef04b891e367bb47106e3b526d92438df0af1e8eb99c8f03cbd1a376b7abb6076a0972f1e0f35285f116dcbffb6679e9d702e87e5b70ffaea33baeef0450b995610111d5dd35df300ffb6e11d17159ab5e30be0da484c0b03c97cc4aa30fbd2fc0598d52c412d933a730ac2e8753757d4e076c4942493b7953f124c5537aeb6694ecfeb65d501c7881c6e4a0de19f17ba980d2e0b61c1480a087807a02c4fa6253ad38e3fc0f46ca934b37f5511c0c43651f932f28ac3aef41e966b14a49ebf219cf8f01da46ab85de49c411c4c21d62569d6561d03be4ced7f204cd7c08bdf048909735b0221bb566282e0d525794c78c4c27ddad57bfe95a6f60ba3fc17905fc977d51e3446b21f318ef4d68d9962afe4a2d05080ab95c5870905fa8e2c07bf2341cec6e5085cac51ec6a703c6aa07ba5f32b3c8ccf21895f004eb84b1dc36c05552b3b1ec4348b19b539286dbc89be1339a4cbf0749f84af3ef1c102050caac2bf262e2986313aa250fde545f0d1b83a60af8c6064ef3a759354873e53b2ea22c26a9f70a90231b830241734705eb8354943d86d4d4028a4fe37af32f71b6cdde4ffbf270b1b002df3f691e430277bda49d15d7299939b1e680c431a4dcd18245c387db9f8c4be02e5650a0ef8f84b52814a02bb52c44671d90a19a53280ea3473e7bcc9503f33e6963dce448f5c536bc773ec6711473102b4370ec721c6441ccfeb295312d540b41b535cf913b8b0c73b56254eb595e77dbe123c15d352279a5ae8963bf6d746b8e5124de75365e0f2aec8117b47e53ae312079c938389d82086ef45e9af1234169c33b6cdb552d5ce87109e082cd5312ef99cccd0dbcb89f9ac57db37a4e596685465dae54ae4079cbb9abff729c2fe44d9cc68a0b677824f697eaeb198b4009841930aeeccf13308aaa821af7b9345ec97206922ddc67ec942a71726505e4fdf960b4eb4ddad0e68cecf37fb221ed47cf29e379dc72c3d14af66228b15c416a5d140818a6212ad8ee8e06d11619e03c04f3aaca8c562371b3df72f0472eae0a4cbd6fbb8cced28e7627f497e115a47abfe366a0deab48726ea0734e0d3ca8bf4821948df8b68284d5fb265fd0206f3d71f2ff7ebb83545a9b758fa9ff7138a83ceca6e5dec08bbfdf067ec4478d44676f5ccd2d9b657eefa75f9ca4336ae47bdc3e72d15e00c5eaa9e51c4c1d834b93a22403fce6e152c4d93a544d4ca3c06b013c02a66337bb02cda0cd886dd6a7c9f7e5ee3f6fcfa17496b827fb853d614dabf4a08d341222d857a960b7de3121300e6f9de28b238f69833e5b6f5d3f6acfb25cf9e25bc8be9a87e0e978bb5e4fd7ed1670d53b1887fbd8eecf8ce3c6cb1357ae7d4287f7f569db43bd6876a4049b9fa588ed53d4c356ff988611613a34633f4667f1d077d97706f06546c549b9fdedc6b19f2c429cb0dd71196549921883ba2802a9a25668ed1083789ce747ed8e400c523969c939c7475dd6c5eb1b009d94c193ef0d40387d6f3ba044d543d95539dd86f90eed63d2fd5ad313c83accaeca4c401623609c3b66e6250eb9a77b10be4d2be4eec24191e85c7b4d7a718e8d48d7f4943871eec5793c8e40a6ab1b9b4d25fd60db85b161b157038cf5e241351a1cc36476cedfcb6118cfdc7ad8eb936d25bed9c5d2942a474c07faa802738bf449dc7937b87f78db8c1fde3cb05159697e29069f1368585f059463c715c5d51d4bc5c4847c9253b583ed422813aa19b01d3888aaff1cec01c62fd6f4b6ad89b4d05475bc67054e0181b27eca9698fed546d994ce37646ea177", 0xdd4}], 0x4, 0x0, 0x2d0e}, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
open$dir(&(0x7f0000000080)='./file1\x00', 0x0, 0x0)
r0 = socket$inet6(0x18, 0x30000003, 0x0)
sendto$inet6(r0, &(0x7f0000000080)="86", 0x2000, 0x0, &(0x7f0000000000)={0x18, 0x0}, 0x1c)


syz_usb_connect$printer(0x0, 0x0, 0x0, &(0x7f0000000240)={0xa, &(0x7f0000000040)={0xa, 0x6, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5}, 0x0, 0x0})
r0 = socket(0x400000000018, 0x3, 0x0)
ioctl$FIOGETBMAP(r0, 0xc0106926, &(0x7f0000000040)=0x8f000000)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIONBIO(r0, 0x40044274, 0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
getsockopt$inet_opts(r0, 0x0, 0x12, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket$inet6(0x18, 0x3, 0x0)
ioctl$FIONWRITE(r1, 0x40046679, &(0x7f0000000200))


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setsockopt$sock_timeval(r0, 0xffff, 0x1005, &(0x7f0000000300), 0x10)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
poll(&(0x7f0000000080)=[{r0}, {r0, 0x1}], 0x2, 0x0)
shutdown(r0, 0x0)


pipe2(&(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
fcntl$setstatus(r0, 0x4, 0x44)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOSETOWN(r0, 0x8010447b, &(0x7f0000000080))


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x40047477, &(0x7f0000000000))


__getcwd(0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xa31a)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80184605, &(0x7f0000000040))


compat_30_fhstatvfs1(&(0x7f0000001680)={{}, {0x0, 0x0, "13ed836892b5cfbb41263fafcb63fe5d"}}, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
recvmmsg(0xffffffffffffffff, 0x0, 0x0, 0x0, &(0x7f0000000840))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mknod(&(0x7f0000000040)='./file0/file0\x00', 0x6000, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104304, &(0x7f0000000100)=0x62004d)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
pipe2(&(0x7f0000001600)={0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
ioctl$FIONREAD(r1, 0x40047477, &(0x7f0000001440))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETSCROLL(r0, 0x42307225, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000100)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000000340)={0x0, 0x0, &(0x7f0000000300)=[{&(0x7f0000000180)="961e0e94cd72b582f7b1935a42a38f0d95402d", 0x13}, {&(0x7f0000000240)="f8", 0x1}, {0x0}], 0x3}, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x31, &(0x7f0000000140)="95", 0x1)


setpriority(0x2, 0x0, 0x101)


pipe(&(0x7f0000000500)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r2 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
r3 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r3, 0x0, 0x0)
preadv(r3, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r2, 0xc0104302, &(0x7f0000000040))


pipe2(0x0, 0x0)
pipe2(0x0, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000240)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


_lwp_kill(0x0, 0x400)


getsockopt$sock_cred(0xffffffffffffffff, 0xffff, 0x11, 0x0, &(0x7f0000000040))
r0 = semget(0x0, 0x0, 0x0)
compat_50_____semctl13$IPC_STAT(r0, 0x0, 0x2, &(0x7f0000000440))


__setitimer50(0x4, 0x0, &(0x7f0000000200))


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='kernfs\x00', &(0x7f0000000100)='./file0/../file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
unmount(&(0x7f0000000000)='./file0/../file0\x00', 0x80000)
openat(r0, &(0x7f00000001c0)='./file0/../file0\x00', 0x0, 0x0)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = getpid()
fcntl$setown(r0, 0x6, r1)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x400051c3)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETENCODING(r0, 0x80046476, &(0x7f0000000080))


r0 = socket(0x2, 0x2, 0x0)
ioctl$KDGETLED(r0, 0x5451, 0x0)


_lwp_wait(0xffffffffffffffff, 0x0)


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000000540)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x10, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}}}}}]}}, &(0x7f00000006c0)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})


r0 = socket(0x2, 0x3, 0x0)
shutdown(r0, 0x1)


open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
ktrace(&(0x7f0000000080)='./file0\x00', 0x4, 0x102, 0xffffffffffffffff)
link(&(0x7f0000000140)='./file0\x00', &(0x7f0000000180)='./file1\x00')


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000000)='overlay\x00', &(0x7f0000000380)='./file0\x00', 0x1, &(0x7f00000003c0))
open$dir(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
unmount(&(0x7f0000000040)='./file0\x00', 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
flock(r0, 0x2)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff})
recvmsg(r0, &(0x7f0000000540)={0x0, 0xff9, &(0x7f0000000000)=[{0x0}, {0x0}], 0x20c, &(0x7f0000000080)=""/147, 0x89}, 0x43)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0xc1887213, &(0x7f0000000100))


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x60e, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000080)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
preadv(r0, &(0x7f0000000740)=[{0x0}, {0x0}, {0x0}], 0x3, 0x0)


__posix_fadvise50(0xffffffffffffffff, 0x0, 0xfffffffffffffffc, 0x0, 0x0)


r0 = compat_30_socket(0x1d, 0x3, 0x0)
getpeername$unix(r0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x10, 0x0)
ioctl$OFIOGETBMAP(r0, 0x4004570f, &(0x7f0000000080))


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x29, 0x3a, 0x0, 0x0)


r0 = open(&(0x7f0000000480)='./file0\x00', 0x80000000000206, 0xfffffffffffffffd)
writev(r0, &(0x7f0000000100)=[{&(0x7f0000000140)='#!', 0x2}], 0x1)
write(r0, &(0x7f0000000240)="25f3404992cc0000000000000000006405ad6d532ce73dd65ba5baa57da2538d09903899f2fac2531be6bcae16bb943c74eff2a0bac7c7253e0a", 0x3a)
execve(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580), 0x1, 0x0)
compat_43_ogetsockname(0xffffffffffffffff, 0x0, 0x0)


compat_43_ocreat(&(0x7f0000000000)='./file0\x00', 0x0)
faccessat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x1, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0)
__mount50(&(0x7f00000002c0)='ptyfs\x00', &(0x7f0000000040)='./file0\x00', 0x0, &(0x7f0000000340), 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
__getdents30(r0, 0x0, 0xff)


r0 = getpid()
r1 = compat_30_socket(0x11, 0x3, 0x0)
r2 = dup(r1)
fktrace(r2, 0x0, 0x4, r0)
profil(0x0, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fsync(0xffffffffffffffff)


syz_emit_ethernet(0x36, &(0x7f0000000080))
r0 = socket(0x800000018, 0x3, 0x0)
bind$unix(r0, &(0x7f0000000080)=@abs={0x1f95d27d48731892}, 0x1c)


pipe(&(0x7f00000002c0)={<r0=>0xffffffffffffffff})
sendto$unix(r0, 0x0, 0x0, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000002c0)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GCURSOR(r0, 0x40045720, &(0x7f0000000200)={0x0, 0x0, {}, {}, {0x0, 0x0, 0x0, 0x0, 0x0}, {}, 0x0, 0x0})


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x8004667d, &(0x7f0000000080))


r0 = compat_30_socket(0x1f, 0x5, 0x0)
ioctl$FIOSEEKHOLE(r0, 0xc004730a, &(0x7f0000000000))


setregid(0x0, 0xee01)
r0 = getegid()
setregid(r0, 0xffffffffffffffff)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000140)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
writev(r0, &(0x7f0000000580)=[{0x0}], 0x1)
connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="82029d9cffff"], 0x1)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
recvmsg(r1, &(0x7f0000000000)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)


r0 = compat_30_socket(0x11, 0x3, 0x0)
getsockopt(r0, 0x0, 0x0, 0x0, 0x0)


mknod(&(0x7f0000000100)='./file0\x00', 0x1ffb, 0x0)
openat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x4, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
symlink(&(0x7f0000000080)='.\x00', &(0x7f0000000000)='./file0\x00')
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_40_mount(&(0x7f0000000000)='overlay\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
__mount50(&(0x7f0000000180)='kernfs\x00', &(0x7f00000000c0)='./file0\x00', 0x1000000, &(0x7f0000000140)="5cf78e3543c70fab6a2a36ca3d30ff10cd1357c9c24a61cf19c342", 0x1b)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
chroot(&(0x7f0000000380)='./file0/../file0\x00')
symlink(&(0x7f0000000140)='..\x00', &(0x7f0000000040)='./file0\x00')
rename(&(0x7f0000000000)='./file0/../file0\x00', &(0x7f0000000080)='./file0/../file0\x00')


r0 = compat_30_socket(0x1f, 0x10000005, 0x2)
ioctl$FIOSEEKHOLE(r0, 0xc0086662, &(0x7f0000000000))


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0xb, r0, 0x0, 0x9)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2004, 0xc6a8)
openat(0xffffffffffffffff, &(0x7f00000000c0)='./file0\x00', 0x20, 0x4)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
r4 = dup3(r0, r0, 0x0)
ioctl$FIOSETOWN(r4, 0x80084e68, &(0x7f0000000100))


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000001c0)='./bus\x00', 0x10004, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0, 0xff01}], 0x1, 0x0)
ioctl$FIOSETOWN(r0, 0x4004741a, 0x0)


mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x0, 0x10, 0xffffffffffffffff, 0x0, 0x0)
mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000000200)='procfs\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)='2', 0x1)
pathconf(&(0x7f0000000040)='./file0\x00', 0x2)


mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f0000000140)='./file0\x00', 0x0)
mkdirat(r0, &(0x7f00000001c0)='./file1\x00', 0x0)
r1 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
r2 = open$dir(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
renameat(r1, &(0x7f00000002c0)='./file1\x00', r2, &(0x7f0000000400)='./file0/file0\x00')
r3 = open$dir(&(0x7f0000000640)='./file0\x00', 0x0, 0x0)
renameat(r3, &(0x7f00000000c0)='./file0/file0\x00', r1, &(0x7f0000000080)='./file2\x00')


shmget(0x3, 0x4000, 0x0, &(0x7f0000ffb000/0x4000)=nil)


mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x7)
openat(0xffffffffffffff9c, &(0x7f00000002c0)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000180)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = socket(0x18, 0x2, 0x0)
setsockopt(r3, 0x11, 0x64, 0x0, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$WSMOUSEIO_SCALIBCOORDS(r0, 0x81145724, &(0x7f00000000c0)={0x0, 0x0, 0x0, 0x0, 0x10})


fcntl$lock(0xffffffffffffffff, 0x9, &(0x7f00000003c0))


pipe2(&(0x7f0000001780), 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt$inet_opts(r0, 0x6, 0x7, &(0x7f0000000040), 0x4)


socketpair(0x0, 0xb64a06100bbf7506, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
unlink(0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x41c5)
r0 = open(&(0x7f0000000240)='./file0\x00', 0x5, 0x0)
ioctl$OFIOGETBMAP(r0, 0x8004667e, &(0x7f0000000100))


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
socket(0x0, 0x3, 0x0)
syz_usb_connect$hid(0x6, 0x3f, &(0x7f00000006c0)={{0x12, 0x1, 0x310, 0x0, 0x0, 0x0, 0xff, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0x80, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x5, 0x0, 0x3, 0x1, 0x0, 0x31, {0x9, 0x21, 0x7, 0x7}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x0, 0x7, 0x7f}}, [{{0x9, 0x5, 0x2, 0x3, 0x0, 0x0, 0x0, 0x92}}]}}}]}}]}}, &(0x7f0000000c40)={0x0, 0x0, 0x5, &(0x7f0000000740)={0x5, 0xf, 0x5}})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
pipe2(&(0x7f0000000040)={0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
fcntl$setstatus(r1, 0x4, 0x1010044)


r0 = semget$private(0x0, 0x7, 0x0)
semop(r0, &(0x7f0000000000)=[{0x0, 0xc7}, {}], 0x2)


_ksem_open(&(0x7f0000000000), 0x0, 0x0, 0x0, &(0x7f0000000040))


socketpair(0x2c, 0x0, 0x0, 0x0)


socketpair(0x28, 0x0, 0x7fffffff, &(0x7f0000000480))


r0 = socket$unix(0x1, 0x5, 0x0)
ioctl$FIOSETOWN(r0, 0x80047476, &(0x7f0000000000))


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x17, &(0x7f0000000000)='\x00\x00\x00\x00', 0x4)


mknod(&(0x7f0000000180)='./file0\x00', 0x1ffb, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = open$dir(&(0x7f0000000280)='./file0\x00', 0x1, 0x0)
r2 = dup3(r1, r0, 0x0)
ioctl$FIOGETBMAP(r2, 0x40046678, &(0x7f0000000080))


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
unlink(&(0x7f0000000000)='./file0\x00')


truncate(0x0, 0x0, 0xfffffffeffffffff)


pipe(&(0x7f0000000080)={<r0=>0xffffffffffffffff})
fcntl$lock(r0, 0x9, 0x0)


mkdir(&(0x7f0000000040)='./file1\x00', 0x0)
mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
mkdir(&(0x7f0000000180)='./file1/file0\x00', 0x0)
renameat(0xffffffffffffff9c, &(0x7f00000000c0)='./file1/file0\x00', 0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__stat50(0x0, 0x0)


compat_50_mknod(&(0x7f00000002c0)='./file0\x00', 0x2000, 0x4135)
r0 = open$dir(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
poll(&(0x7f0000000000)=[{r0, 0x3}], 0x1, 0x8)


getsockopt$SO_PEERCRED(0xffffffffffffffff, 0xffff, 0x11, &(0x7f0000000040), 0xc)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
bind(r1, &(0x7f0000000440)=ANY=[@ANYBLOB="2d012e2f66696c6530"], 0xa)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r2=>0xffffffffffffffff})
connect$unix(r2, &(0x7f0000000000)=ANY=[@ANYBLOB="00012e2f66696c6530"], 0xa)
socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r3=>0xffffffffffffffff})
socketpair$unix(0x1, 0x0, 0x0, 0x0)
sendto$unix(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0, 0xa)
connect$unix(r3, &(0x7f0000000000)=ANY=[], 0xa)
dup2(r1, r0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580), 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = socket(0x18, 0x3, 0x0)
compat_30___fstat13(r1, &(0x7f0000000100))


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xa31a)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
r1 = socket$unix(0x1, 0x2, 0x0)
r2 = fcntl$dupfd(r1, 0x0, r1)
ioctl$FIONBIO(r2, 0x8004667e, &(0x7f0000000040))
mmap(&(0x7f00000fc000/0x3000)=nil, 0x3000, 0x4, 0x210, r2, 0x0, 0x80000000)
preadv(r0, &(0x7f0000001580)=[{0x0, 0x24}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x1010, 0xffffffffffffffff, 0x0, 0x0)
r3 = open$dir(&(0x7f0000001240)='./file0\x00', 0x4000040000000242, 0x0)
r4 = socket$unix(0x1, 0x2, 0x0)
r5 = fcntl$dupfd(r4, 0x0, r4)
ioctl$FIONBIO(r5, 0x8004667e, &(0x7f0000000040))
r6 = dup2(r5, r3)
writev(r6, &(0x7f00000004c0)=[{0x0}], 0x1)


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000005540)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x8, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}}}}}]}}, &(0x7f0000005ac0)={0x0, 0x0, 0x0, 0x0, 0x1, [{0x0, 0x0}]})


setuid(0xee01)
__fhstat50(0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
connect$unix(r0, &(0x7f0000000080)=@file={0x0, './file0\x00'}, 0x6e)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000001340)='fdesc\x00', &(0x7f0000001380)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = dup2(r0, r0)
__getdents30(r1, &(0x7f0000001240)=""/219, 0xfffffff9)
compat_43_ogetdirentries(r0, 0x0, 0x0, &(0x7f0000000100))


r0 = socket$unix(0x1, 0x1, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000001200)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
setsockopt$sock_timeval(r0, 0xffff, 0x100b, &(0x7f0000000000), 0x10)


sendmsg$unix(0xffffffffffffffff, &(0x7f0000001700)={&(0x7f0000000040)=ANY=[@ANYBLOB="fb1840224488fedfff0202001300020020afe36e6736f97ace"], 0x1c, 0x0}, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x4)
r0 = socket(0x18, 0x3, 0x0)
sendmsg$unix(r0, &(0x7f0000001700)={0x0, 0x0, 0x0}, 0x0)


recvmsg(0xffffffffffffffff, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0xfffffffffffffffe)
r1 = socket$unix(0x1, 0x1, 0x0)
getsockopt$sock_cred(r0, 0xffff, 0x11, &(0x7f0000000040), &(0x7f0000000080)=0xc)
getsockopt(r1, 0x0, 0x3, 0x0, 0x0)
r2 = msgget$private(0x0, 0x200)
msgctl$IPC_SET(r2, 0x1, &(0x7f0000000180)={{0x6, 0x0, 0x0, 0x0, 0x0, 0x80, 0x9}, 0x40, 0x10001, 0x0, 0xffffffffffffffff, 0xfff, 0x800, 0x1ff, 0x9})


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000100)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000000c40)={0x0, 0x0, &(0x7f0000000180)=[{0x0, 0xfffffffffffffdcb}], 0x1}, 0x0)


__stat50(0x0, 0x0)


__clock_settime50(0x0, &(0x7f00000002c0)={0x3a4fc87f})


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x41c5)
r0 = open(&(0x7f0000000240)='./file0\x00', 0x5, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80185760, &(0x7f0000000100))


pipe2(&(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
read(r0, &(0x7f0000000100)=""/102, 0x66)
write(r1, &(0x7f0000000200)='V', 0x1)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000180)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
fktrace(0xffffffffffffffff, 0x0, 0x0, 0x0)
readlinkat(0xffffffffffffffff, &(0x7f0000000040)='./file0\x00', 0x0, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x31, 0x0, 0x0)
getsockopt(r0, 0x29, 0x2a, 0x0, 0x0)


r0 = socket(0x10, 0x2, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
listen(r0, 0x0)


setreuid(0x0, 0xee00)
posix_spawn(0x0, 0x0, 0x0, 0x0, 0x0, 0x0)


symlinkat(&(0x7f0000000100)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0xffffffffffffff9c, &(0x7f0000000200)='./file0\x00')
readlinkat(0xffffffffffffff9c, &(0x7f0000000240)='./file0\x00', 0x0, 0x42)


r0 = socket(0x18, 0x2, 0x0)
getsockopt$inet_opts(r0, 0x29, 0x2b, 0x0, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
compat_50_utimes(&(0x7f0000000200)='./file0\x00', &(0x7f0000000280))


__getrusage50(0x2aa7d5fbddf37f25, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
compat_50___fstat30(r0, &(0x7f0000000000))


r0 = socket$inet(0x2, 0x3, 0x0)
connect$inet(r0, &(0x7f0000000000)={0x2, 0x0}, 0x10)
connect$inet(r0, &(0x7f0000000140)={0x2, 0x0}, 0x10)


r0 = shmget$private(0x0, 0x4000, 0x0, &(0x7f0000ffc000/0x4000)=nil)
compat_50___shmctl13$IPC_SET(r0, 0x1, &(0x7f0000000800)={{0x400}, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104306, &(0x7f0000000100))


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000140)={<r0=>0xffffffffffffffff})
recvmmsg(r0, &(0x7f0000000180)={&(0x7f0000001e80)={0x0, 0x0, 0x0, 0x0, 0x0}}, 0xffffffffffffff9c, 0x0, &(0x7f0000001f00))


socketpair(0x1, 0x3, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff})
connect(r0, &(0x7f0000000080), 0x32)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
_ksem_close(0x0)


_lwp_wait(0x0, 0x0)
r0 = _lwp_self()
_lwp_detach(r0)
_lwp_exit()
r1 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
symlink(&(0x7f0000000040)='./bus\x00', &(0x7f0000000100)='./file0\x00')
r1 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
dup3(r0, r1, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
getsockname(r0, 0x0, &(0x7f0000000100))


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
chroot(&(0x7f0000000040)='./file0\x00')
r0 = __clone(0x0, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047401, &(0x7f0000000140)={0x5})
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x8008574f, &(0x7f0000000040))


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104308, &(0x7f0000000040)=0x640001)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbd63)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0x0, 0x0)
open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
dup2(r0, r1)


r0 = _lwp_self()
_lwp_suspend(r0)
r1 = _lwp_self()
_lwp_suspend(r1)
_lwp_wakeup(r1)
_lwp_continue(r1)


mprotect(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x0)
r0 = socket$unix(0x1, 0x5, 0x0)
setsockopt$sock_timeval(r0, 0xffff, 0x0, &(0x7f0000000000), 0x10)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r0, 0x8004667d, &(0x7f0000000080)=0x300)


r0 = socket$unix(0x1, 0x1, 0x0)
setsockopt$sock_linger(r0, 0xffff, 0x1006, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000380)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
connect$unix(r0, &(0x7f0000000240)=@file={0x1, '.\x00'}, 0x6e)


openat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x7b152fc57dfdffb0, 0x0)
ktrace(&(0x7f0000000040)='./file0/file0\x00', 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_oaccept(0xffffffffffffffff, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getsockname$unix(0xffffffffffffffff, 0x0, 0x0)
r1 = _lwp_self()
_lwp_suspend(r1)
_lwp_continue(r1)
r2 = _lwp_self()
_lwp_detach(r2)


poll(&(0x7f0000000000)=[{}, {}], 0x1ffffffffffffeeb, 0x0)


mmap(&(0x7f0000000000/0x4000)=nil, 0x4000, 0x5, 0x1011, 0xffffffffffffffff, 0x0, 0x0)
shmctl$IPC_SET(0x0, 0x1, &(0x7f0000000040)={{0x0, 0xffffffffffffffff}, 0x0, 0x0, 0xffffffffffffffff})


r0 = openat(0xffffffffffffff9c, &(0x7f00000001c0)='./file0\x00', 0x200, 0x0)
mmap(&(0x7f0000ffc000/0x3000)=nil, 0x3000, 0x0, 0x10, r0, 0x0, 0xfffffffffffff000)


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc1e85266, &(0x7f0000000000))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r2 = open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
fcntl$lock(r2, 0x9318374ec63abe0a, 0x0)


setpriority(0xb255885bd0eda34d, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
recvfrom$unix(r0, 0x0, 0x0, 0x10022, 0x0, 0x0)


setgroups(0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = _lwp_self()
compat_60__lwp_park(0x0, r1, 0x0, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x4)
getsockopt(r0, 0x29, 0x1a, 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff})
r1 = socket$unix(0x1, 0x1, 0x0)
dup2(r1, r0)
sendmsg$unix(0xffffffffffffffff, &(0x7f0000000400)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000000)=ANY=[@ANYBLOB="10000000ffff000001"], 0x9}, 0x0)
sendmsg(r0, &(0x7f0000000380)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000000), 0x10}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
minherit(&(0x7f00003c1000/0x1000)=nil, 0x1000, 0x5)


pipe(&(0x7f0000000100)={<r0=>0xffffffffffffffff})
r1 = getpid()
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(r0, 0x0, 0x4, r1)
shmat(0x0, &(0x7f0000202000/0x2000)=nil, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x1d, &(0x7f0000000080)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x4)


open(&(0x7f0000000000)='./file0\x00', 0x200, 0x0)
__getfh30(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
writev(r0, &(0x7f00000027c0)=[{0x0}], 0x1)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x3b00)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x20005101, 0x0)


r0 = open$dir(&(0x7f0000000080)='.\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_ogetdirentries(r0, 0x0, 0x8001, 0x0)


r0 = socket(0x18, 0x400000002, 0x0)
r1 = openat(0xffffffffffffff9c, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
recvmsg(r0, &(0x7f0000002640)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x20007202, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
recvmsg(r1, &(0x7f00000004c0)={0x0, 0x0, &(0x7f0000000440)=[{&(0x7f0000000080)=""/126, 0x7e}], 0x1, 0x0}, 0x842)
sendmsg$unix(r0, &(0x7f0000000500)={0x0, 0x0, &(0x7f0000000780)=[{&(0x7f0000000200)="a6a6871fb8864e5f0742ab6bbe673e94d245944175a941023fa8dc0aaa7017b663073a55854e77815dad98fe9403f08e334c642bf9ed5bd307242cda5176d70cbd8b857d91e81aef28f449c992fd1e1385a7be6323d58fd6212d118288e735ac6d2c268b69a3516ca2ad276a0d89aeac8a4f6984db3ea8a9695dbfb69d98a4", 0x7f}], 0x1, 0x0, 0xa0}, 0x0)


r0 = shmget(0x2, 0x4000, 0x0, &(0x7f0000ffb000/0x4000)=nil)
compat_50___shmctl13$SHM_LOCK(r0, 0x3)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x4004427c, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000140)={<r0=>0xffffffffffffffff})
setsockopt$sock_int(r0, 0xffff, 0x400, &(0x7f0000000000)=0x6, 0x4)


open$dir(&(0x7f0000000240)='./file0\x00', 0x200, 0x0)
open$dir(&(0x7f0000000040)='./file1\x00', 0x200, 0x0)


pipe(&(0x7f0000001200))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3100)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x1, 0x0)
ioctl$WSMOUSEIO_SETREPEAT(r0, 0x80185728, &(0x7f0000000000)={0x0, 0x1000395})


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt(r0, 0x0, 0x13, &(0x7f0000000000)="9d0e6a00", 0x4)


access(0x0, 0x8)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x40287448, &(0x7f0000000040))


__posix_chown(&(0x7f0000000080)='./file0\x00', 0xffffffffffffffff, 0x0)
fstatat(0xffffffffffffff9c, &(0x7f00000000c0)='./file0\x00', 0x0, 0x0)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
link(&(0x7f0000001580)='./file0\x00', &(0x7f00000015c0)='./file1\x00')


open$dir(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
open$dir(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
_lwp_getname(0x0, &(0x7f0000000080)=""/5, 0x5)


modctl$MODCTL_EXISTS(0x3, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f00000011c0)='./file0\x00', 0x0)


open(&(0x7f0000000000)='./file0\x00', 0x200, 0x0)
compat_50_utimes(&(0x7f0000000080)='./file0\x00', &(0x7f00000000c0)={0x0, 0x3fffffff})


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000200))
socket$inet6(0x18, 0x2, 0x0)
socket$inet6(0x18, 0x3, 0x0)
mknod(&(0x7f0000000140)='./bus\x00', 0x2000, 0x1606)
open$dir(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)


r0 = open$dir(&(0x7f0000001f40)='.\x00', 0x0, 0x0)
mkdirat(r0, &(0x7f00000000c0)='./file0\x00', 0x0)
mkdir(&(0x7f0000000040)='./file0/file0\x00', 0x0)
r1 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
renameat(r0, &(0x7f0000000080)='./file0\x00', r1, &(0x7f0000000100)='./file0/file0\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$inet(0x2, 0x1, 0x0)
setsockopt$sock_timeval(r1, 0xffff, 0x0, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
mknodat(r0, &(0x7f0000000580)='./file0\x00', 0x0, 0x0)


rasctl(0x0, 0x0, 0x1)


shmctl$IPC_SET(0x0, 0x1, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80044275, &(0x7f00000000c0))


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_____semctl13$IPC_SET(0x0, 0x0, 0x1, &(0x7f0000000100)=@array=0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000040)={<r0=>0xffffffffffffffff})
setsockopt$sock_int(r0, 0xffff, 0x1000, &(0x7f0000000080), 0x4)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__select50(0x0, 0x0, &(0x7f00000005c0), &(0x7f0000000600), &(0x7f0000000640))


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x3b00)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x8004667d, &(0x7f0000000080)=0x1699)
ioctl$FIOASYNC(r0, 0x8004667d, &(0x7f0000000100)=0x3f)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x26, &(0x7f0000000000)="ca640000", 0x4)


r0 = compat_30_socket(0x12, 0x2, 0x0)
compat_43_orecvfrom(r0, 0x0, 0x0, 0x44a3, 0x0, 0x0)


setuid(0xee01)
__getfh30(0x0, 0x0, 0x0)


__fhstatvfs190(0x0, 0x0, 0x0, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000001fc0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)


__lstat50(&(0x7f0000001680)='./file0\x00', 0x0)


setuid(0xffffffffffffffff)
r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0xe, &(0x7f0000000000)="02000000", 0x4)
setsockopt(r0, 0xffff, 0x1, &(0x7f0000000040)="aeb566f5", 0x4)
getsockopt$SO_PEERCRED(r0, 0xffff, 0x1022, &(0x7f0000000080)={0x0, <r1=>0x0}, 0xc)
r2 = getuid()
getsockopt$sock_cred(0xffffffffffffff9c, 0xffff, 0x1022, &(0x7f0000000100)={0x0, 0x0, <r3=>0x0}, &(0x7f0000000140)=0xc)
r4 = semget$private(0x0, 0x2, 0x189)
getsockopt$sock_cred(0xffffffffffffffff, 0xffff, 0x1022, &(0x7f0000000300)={<r5=>0x0, 0x0, <r6=>0x0}, &(0x7f0000000340)=0xc)
semctl$IPC_SET(r4, 0x0, 0x1, &(0x7f00000001c0)={{0x20000008, 0x0, 0x0, 0x0, r3, 0x1000101e8, 0x208}, 0x3, 0xa1, 0x2})
semctl$IPC_SET(r4, 0x0, 0x1, &(0x7f0000000540)={{0x20010007, r1, r6, 0x0, r3, 0x100010124, 0x7}, 0x1000000000010801, 0x8000000000e, 0xfffffffffffffffb})
msgctl$IPC_SET(0x0, 0x1, &(0x7f0000000080)={{0x800, 0x0, 0x0, 0x0, r6, 0x0, 0xffff}, 0x5, 0xf851, r5, r5, 0x3, 0xffffffffffff8001, 0x7, 0xdc})
getsockopt$sock_cred(0xffffffffffffff9c, 0xffff, 0x1022, &(0x7f0000000100)={0x0, 0x0, <r7=>0x0}, &(0x7f0000000140)=0xc)
r8 = semget$private(0x0, 0x3, 0x189)
semctl$IPC_SET(r8, 0x0, 0x1, &(0x7f00000001c0)={{0x20000008, 0x0, 0x0, 0x0, r7, 0x1000101e8, 0x208}, 0x6, 0xa1, 0x2})
semctl$IPC_SET(r8, 0x0, 0x1, &(0x7f0000000540)={{0x20010007, 0x0, 0x0, 0xffffffffffffffff, 0x0, 0x100010024, 0x7}, 0x10001, 0x8000000000e, 0xfffffffffffffffb})
msgctl$IPC_SET(0x0, 0x1, &(0x7f0000000080)={{0x800, 0x0, 0x0, 0x0, 0x0, 0x0, 0xffff}, 0x5, 0xf851, 0x0, 0x0, 0x3, 0xffffffffffff8001, 0x7})
r9 = getpgrp()
msgctl$IPC_SET(0xffffffffffffffff, 0x1, &(0x7f00000000c0)={{0x80000001, r1, 0x0, r2, 0xffffffffffffffff, 0x40, 0x1}, 0x40, 0x200, r9, 0x0, 0x0, 0x100, 0xd2, 0x1f})
listen(r0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = compat_30_socket(0x1f, 0x3, 0x0)
r2 = compat_30_socket(0x1f, 0x3, 0x0)
dup2(r1, r2)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000a40)={<r0=>0xffffffffffffffff})
recvmmsg(r0, &(0x7f00000012c0)={0x0}, 0x10, 0x0, &(0x7f0000001300))


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_50_setitimer(0x2, &(0x7f0000000080)={{}, {0x6}}, 0x0)
compat_50_setitimer(0x2, &(0x7f0000000340), 0x0)
compat_50_getitimer(0x2, &(0x7f0000000000))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x0)
chmod(&(0x7f0000000080)='./file0\x00', 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000380)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
close(r0)
sendmsg(r1, &(0x7f0000004300)={0x0, 0x0, 0x0}, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x20003101, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendto(r0, &(0x7f0000000280)='\x00', 0x1, 0x0, 0x0, 0x0)
recvmsg(r1, &(0x7f0000000800)={0x0, 0x0, &(0x7f0000000700)=[{0x0}, {0x0}, {&(0x7f0000000400)=""/170, 0xb5}, {0x0}, {0x0}, {0x0, 0xffffff6c}, {0x0}, {0x0}, {0x0}], 0x9, 0x0}, 0x842)
r2 = openat(0xffffffffffffff9c, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
shutdown(r1, 0x0)


syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x20, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}, {[], {}, {{0x9, 0x5, 0x3, 0x2, 0x8}}}}}]}}]}}, 0x0)


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000380)={{0x12, 0x1, 0x310, 0x0, 0x0, 0x0, 0x40, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x40, 0x0, [{{0x9, 0x4, 0x0, 0x3f, 0x2, 0x7, 0x1, 0x1, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x200}}}}}]}}]}}, &(0x7f0000000480)={0x0, 0x0, 0x0, 0x0})
syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f00000004c0)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x0, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5, 0x24, 0x0, 0x8}, {0xd, 0x24, 0xf, 0x1, 0x0, 0x0, 0x0, 0x9}, {0x6, 0x24, 0x1a, 0x7}}}}}}]}}, &(0x7f00000005c0)={0x0, 0x0, 0x0, 0x0})
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000600)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x5ac, 0x214, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
syz_usb_connect$cdc_ecm(0x0, 0x58, &(0x7f00000007c0)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x0, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x46, 0x1, 0x1, 0x5, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, [@acm={0x4}, @dmm={0x7}]}, {[], {{0x9, 0x5, 0x82, 0x2, 0x0, 0x0, 0x0, 0x5}}}}}]}}]}}, 0x0)
syz_usb_connect(0x0, 0x2d, &(0x7f0000000ac0)={{0x12, 0x1, 0x200, 0x4b, 0x38, 0xbb, 0x0, 0x12d1, 0x1423, 0x0, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x51, 0xce, 0x27, 0x0, [], [{}]}}]}}]}}, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000001100)={{0x12, 0x1, 0x201, 0x0, 0x0, 0x0, 0x0, 0x46d, 0xc517, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9, 0x21, 0x0, 0x9c}}}]}}]}}, 0x0)
syz_usb_connect$cdc_ncm(0x0, 0x83, &(0x7f0000003340)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x0, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x71, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}, [@mdlm={0x15}]}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{}, {{0x9, 0x5, 0x3, 0x2, 0x0, 0x4}}}}}}}]}}, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
geteuid()


mkdir(&(0x7f0000000180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='fdesc\x00', &(0x7f0000000140)='./file0\x00', 0x0, 0x0, 0x0)
__getfh30(&(0x7f0000000200)='./file0\x00', 0x0, 0x0)


_ksem_open(&(0x7f0000000000), 0xa00, 0x0, 0x0, &(0x7f0000000040))


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
semget(0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
ioctl$FIONREAD(r0, 0x4004667f, 0x0)


syz_usb_connect(0x0, 0x24, &(0x7f0000000040)={{0x12, 0x1, 0x0, 0x72, 0x2f, 0x3, 0x10, 0x12d1, 0x1464, 0x9053, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0xff, 0xff, 0xff}}]}}]}}, 0x0)


r0 = open(&(0x7f00000000c0)='./file0\x00', 0x201, 0x0)
fcntl$lock(r0, 0x9, &(0x7f0000000000)={0x0, 0x2, 0xffffffffffffff7f, 0x1000300010005})
r1 = open(&(0x7f00000000c0)='./file0\x00', 0x201, 0x0)
fcntl$lock(r1, 0x9, &(0x7f0000000000)={0x3, 0x2, 0xffffffffffffff7f, 0x1000300010005})
r2 = open(&(0x7f0000000200)='./file0\x00', 0x0, 0x0)
fcntl$lock(r2, 0xd, &(0x7f00000002c0)={0x0, 0x2, 0x0, 0x100000005})


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000180)={<r0=>0xffffffffffffffff})
setsockopt$sock_timeval(r0, 0xffff, 0x1006, &(0x7f00000001c0)={0x0, 0x100}, 0x10)
recvmsg(r0, &(0x7f00000000c0)={0x0, 0x0, &(0x7f0000000100)=[{&(0x7f0000000080)=""/35, 0x23}], 0x10000000000002ca, 0x0}, 0x0)


pipe2(&(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x1000004)
write(r1, &(0x7f0000000000)=',', 0x78c5b5dd454f79f1)
r2 = socket(0x2, 0x2, 0x0)
dup2(r2, r0)


r0 = __clone(0x0, 0x0)
ptrace(0x800000009, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
fktrace(r1, 0x0, 0x0, 0x0)
__wait450(0x0, &(0x7f0000000140), 0x0, &(0x7f0000000180))


modctl$MODCTL_STAT(0x5, 0x0)


__clone(0x0, &(0x7f0000000000)="62304ef46048b90e0ce343b54a9776")


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x10, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0xec, {0x9}}}]}}]}}, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000200)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
recvmsg(r0, &(0x7f0000000400)={&(0x7f0000000280), 0x80, &(0x7f0000000380)=[{&(0x7f0000000300)=""/83, 0x53}], 0x1, &(0x7f00000003c0)=""/38, 0x26}, 0x0)
sendmsg(r1, &(0x7f00000006c0)={0x0, 0x0, &(0x7f0000000640)=[{&(0x7f00000005c0)="ec9eebb40a7cabcc7d46b3e51379d4b811b2f149e2d137bfa74ba05dfec30f3d3c7c1bfad1c6e29108294f93da1a0a4d97cbb4f81bd5b18792d909adbe733003ba3f82971b1f5d5912d03ebd4857f54286d089", 0x53}], 0x1}, 0x0)


mknod(&(0x7f0000000140)='./file0\x00', 0x1000, 0x0)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_50___stat30(&(0x7f0000000040)='./file0\x00', &(0x7f0000000180))


mlock(&(0x7f0000ffa000/0x1000)=nil, 0x1000)
mprotect(&(0x7f0000ffa000/0x4000)=nil, 0x4000, 0x0)
minherit(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendto(r0, &(0x7f0000000280)='\x00', 0x1, 0x0, 0x0, 0x0)
recvmsg(r1, &(0x7f0000000800)={0x0, 0x0, &(0x7f0000000700)=[{0x0}, {0x0}, {&(0x7f0000000400)=""/170, 0xb5}, {0x0}, {0x0}, {0x0, 0xffffff6c}, {0x0}, {0x0}, {0x0}], 0x9, 0x0}, 0x842)
shutdown(r1, 0x0)


r0 = socket$inet6(0x18, 0x2, 0x0)
sendto$inet6(r0, &(0x7f0000000080)="86", 0x2000, 0x0, &(0x7f0000000000)={0x18, 0x0, 0x0, 0x20080fe}, 0x1c)


mkdirat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000280)='msdos\x00', &(0x7f0000001580)='./file0\x00', 0x0, &(0x7f00000015c0)="4bc0330b2491810b7e7f972ad9078329832ccfba26f286fd957a4bc69f380c0c0b4dd7ca3cdc85f3e78083b18f1c6e2b81a31bfd35c61e0ab482dcd1e72bb25f4f63a1c593898577c0e6d82af1c6d698a273b00c60ae8ccfe1b6808c181cbbec8c20b2e6f89871e0b9742c7ff1eadc85b1f6a270fba7dbdbf726153e90f97f4c697202d8c7722df67d839a8b42cc38593ada23e4978a352db1")


mknod(&(0x7f0000000000)='./bus\x00', 0x8100800080002002, 0x2e00)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DSSCROLL(r0, 0xc1c0526b, &(0x7f0000000040)={0x0, 0xa})


compat_50_nanosleep(&(0x7f0000000000), &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
socketpair(0x2, 0x0, 0x0, 0x0)


mlock(&(0x7f0000de2000/0x4000)=nil, 0x4000)
mprotect(&(0x7f0000de2000/0x3000)=nil, 0x3000, 0x0)
mlock(&(0x7f0000ffa000/0x4000)=nil, 0x4000)
mprotect(&(0x7f0000ffa000/0x1000)=nil, 0x1000, 0x0)
munmap(&(0x7f0000de4000/0x4000)=nil, 0x4000)


compat_30___stat13(0x0, 0x0)


r0 = compat_30_socket(0x22, 0x3, 0x0)
getsockname$unix(r0, 0x0, 0x0)


r0 = socket$unix(0x1, 0x5, 0x0)
setsockopt$sock_timeval(r0, 0xffff, 0x100b, &(0x7f00000002c0)={0x61b58000000000}, 0x10)


pipe2(&(0x7f00000016c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
ioctl$FIOSETOWN(r0, 0x8004667c, &(0x7f0000000280)=0x80000001)


_lwp_ctl(0x0, 0x0)


pipe2(&(0x7f0000000040)={<r0=>0xffffffffffffffff}, 0x0)
socket$unix(0x1, 0x0, 0x0)
recvmmsg(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0)
fsync(r0)


r0 = msgget(0x1, 0x0)
msgsnd(r0, &(0x7f0000000280)={0x2}, 0x8, 0x800)


unlinkat(0xffffffffffffffff, &(0x7f0000000280)='./file1\x00', 0x0)


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000000)='overlay\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
r0 = openat(0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00', 0x0, 0x0)
fcntl$lock(r0, 0xc2f97fe4800a0622, 0x0)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
__mount50(&(0x7f0000000080)='kernfs\x00', &(0x7f0000000100)='./file0\x00', 0x0, 0x0, 0x0)
rmdir(&(0x7f0000000940)='./file0/file0\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = compat_30_socket(0x1f, 0x3, 0x0)
getsockname$unix(r1, 0x0, 0x0)


pipe2(&(0x7f0000000080)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
fchdir(r0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e57f7f000001"], 0x1)
r0 = socket(0x2, 0x3, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)


faccessat(0xffffffffffffff9c, &(0x7f0000000000)='.\x00', 0x0, 0x100)
ioctl$TPROF_IOC_GETSTAT(0xffffffffffffffff, 0x40305404, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000340)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f00000011c0)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
__posix_chown(&(0x7f00000000c0)='./file0\x00', 0xffffffffffffffff, 0x0)
compat_30___lstat13(&(0x7f0000000040)='./file0\x00', &(0x7f0000000100))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r0, 0x40105706, &(0x7f0000000080))


mknodat(0xffffffffffffff9c, &(0x7f0000000000)='./file1\x00', 0x1000, 0x0)
open$dir(&(0x7f0000000040)='./file1\x00', 0x0, 0x0)
open$dir(&(0x7f0000000140)='./file1\x00', 0x1, 0x0)


r0 = compat_30_socket(0x1f, 0x3, 0x0)
ioctl$WSDISPLAYIO_LDFONT(r0, 0xc0386206, &(0x7f00000000c0)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
pwrite(r1, 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_43_ogetrlimit(0xb, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002005, 0x4300)
open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
open(&(0x7f0000000080)='./bus\x00', 0x0, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
poll(&(0x7f0000000180)=[{}, {}], 0x2, 0x0)


connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="6202e5"], 0x1)
r0 = socket(0x2, 0x1, 0x0)
bind(r0, &(0x7f0000000000)=ANY=[], 0x10)
r1 = dup(r0)
listen(r1, 0x0)
r2 = socket(0x2, 0x1, 0x0)
connect$unix(r2, &(0x7f0000000000)=ANY=[], 0x10)
r3 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r3, 0x0, 0x0)
preadv(r3, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
sendto$inet(r2, &(0x7f0000000280), 0x14c, 0x0, 0x0, 0x0)


r0 = shmget$private(0x0, 0x3000, 0x0, &(0x7f0000ffd000/0x3000)=nil)
pipe(&(0x7f00000003c0)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
shmctl$IPC_SET(r0, 0x1, &(0x7f0000000300)={{}, 0x7fffffff, 0xfff, 0x0, r2, 0x8, 0x8000000000000000, 0x3})


posix_spawn(0x0, 0x0, &(0x7f0000000080)={0x0, 0xffff, 0x0}, 0x0, 0x0, 0x0)


r0 = shmget$private(0x0, 0x400000, 0x0, &(0x7f0000c00000/0x400000)=nil)
shmat(r0, &(0x7f0000c17000/0x2000)=nil, 0x4000)
r1 = shmget$private(0x0, 0x1000, 0x0, &(0x7f0000fff000/0x1000)=nil)
shmat(r1, &(0x7f0000d56000/0x4000)=nil, 0x7000)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
compat_43_stat43(&(0x7f0000000040)='./file0\x00', &(0x7f00000001c0))


r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x40000400000806c1, 0x0)
truncate(&(0x7f0000000100)='./file0\x00', 0x0, 0x100000000)
pwritev(r0, &(0x7f0000000080)=[{&(0x7f00000006c0), 0xf0f75}], 0x1, 0x0)


chroot(&(0x7f0000000100)='.\x00')
execve(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x18, 0x2, 0x0)
getsockname$unix(r1, 0x0, 0x0)


r0 = getegid()
lchown(&(0x7f0000000000)='.\x00', 0xffffffffffffffff, r0)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
__mount50(&(0x7f0000000180)='procfs\x00', &(0x7f00000001c0)='./file0\x00', 0x0, &(0x7f0000000240)='o', 0x1)
pathconf(&(0x7f0000000200)='./file0\x00', 0x5)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
getpgid(r2)


r0 = socket$inet(0x2, 0x2, 0x0)
listen(r0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
socketpair(0x18, 0x3, 0x0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000080)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0x0)
open(&(0x7f0000000380)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0x0, 0x0)


pipe(0x0)
syz_usb_connect$hid(0x0, 0x0, 0x0, 0x0)
compat_12_fstat12(0xffffffffffffffff, 0x0)
compat_12_getdirentries(0xffffffffffffffff, 0x0, 0x2, &(0x7f0000000900)=0x2)
mkdirat(0xffffffffffffffff, &(0x7f0000000980)='./file0\x00', 0x20)


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x9, &(0x7f0000001280)="00000001", 0x4)


r0 = socket$unix(0x1, 0x5, 0x0)
readv(r0, &(0x7f0000000400)=[{&(0x7f0000000000)=""/196, 0xc4}], 0x1)


mlock(&(0x7f0000ffa000/0x2000)=nil, 0x2000)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mlock(&(0x7f0000ffb000/0x2000)=nil, 0x2000)


_ksem_open(&(0x7f0000001080)='/', 0x200, 0x0, 0x0, &(0x7f0000001140))


pipe2(&(0x7f0000000040)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
dup3(r1, r0, 0x0)
poll(&(0x7f00000000c0)=[{r1}], 0x1, 0x0)


mkdir(&(0x7f0000000000)='./file1\x00', 0x0)
unlink(&(0x7f0000001200)='./file0\x00')


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x8010427f, &(0x7f00000000c0)=0x7)


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000000c0)='./bus\x00', 0x10004, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$FIOSETOWN(r0, 0x20007402, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__posix_fadvise50(r0, 0x0, 0x0, 0x0, 0x4)


socketpair(0x1f, 0x5, 0x2, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = compat_30_socket(0x1f, 0x10000005, 0x2)
compat_50___fstat30(r1, 0x0)


setrlimit(0xa, &(0x7f0000000000))
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000198000/0x3000)=nil, 0x3000, 0x0, 0x10, r0, 0x0, 0x0)


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x514)
r0 = open(&(0x7f00000001c0)='./bus\x00', 0x10004, 0x0)
ioctl$FIOSETOWN(r0, 0x2000740e, 0x0)


pipe(&(0x7f0000000100)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
fcntl$lock(r0, 0xa, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
r2 = accept(r0, &(0x7f0000000000)=@data, &(0x7f0000000040)=0xe)
fktrace(r2, 0x0, 0x4, r1)
bind(r2, &(0x7f0000000100)=@data="ccbed18a4e59d2c07b613d4122d7", 0xe)
fktrace(r0, 0x4, 0x80000000, r1)
munmap(&(0x7f0000202000/0x2000)=nil, 0x2000)
r3 = shmget$private(0x0, 0x2000, 0x0, &(0x7f0000200000/0x2000)=nil)
r4 = shmat(r3, &(0x7f0000202000/0x2000)=nil, 0x0)
r5 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r5, 0x0, 0x0)
preadv(r5, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
shmdt(r4)


open(&(0x7f0000001640)='./file0\x00', 0x615, 0x0)
r0 = open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x1, 0x10, r0, 0x0, 0x0)
madvise(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x2)
posix_spawn(0xffffffffffffffff, 0x0, 0x0, &(0x7f00000000c0)={0x0, 0xffffffffffffffff}, 0x0, 0x0)


setreuid(0xffffffffffffffff, 0xee00)
mknodat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x0, 0x0)


syz_usb_connect$cdc_ncm(0x0, 0x6e, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x10, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x3, 0x60, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6}}, {{0x9, 0x5, 0x81, 0x3, 0x3ff, 0x8, 0x0, 0x9}}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{{0x9, 0x5, 0x82, 0x2, 0x40}}, {{0x9, 0x5, 0x3, 0x2, 0x400}}}}}}}]}}, 0x0)


r0 = _lwp_self()
_lwp_exit()
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
_lwp_detach(r0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket(0x18, 0x1, 0x0)
setsockopt(r1, 0x29, 0x3a, &(0x7f0000000000)="03", 0x1)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
fchmod(r2, 0x0)


getgroups(0x40000000000001e7, &(0x7f0000000000)=[0x0, <r0=>0x0, 0x0])
setgid(r0)


mknodat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0xe06a, 0x0)
mkdir(&(0x7f0000000080)='./file1\x00', 0x0)


r0 = socket(0x1f, 0x3, 0x0)
setsockopt(r0, 0x1, 0x1, 0x0, 0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
munmap(&(0x7f0000000000/0xc00000)=nil, 0xc00000)
sendto$inet(r0, 0x0, 0x64, 0x0, &(0x7f0000000000)={0x2, 0x3}, 0x3)


ioctl$WSKBDIO_SETSCROLL(0xffffffffffffff9c, 0x800c5718, &(0x7f0000000300))
__getitimer50(0x0, &(0x7f0000000040))


chroot(0x0)


unlink(&(0x7f0000003a80)='./file0\x00')


r0 = socket(0x400000000018, 0x3, 0x0)
ioctl$FIOGETBMAP(r0, 0xc0906935, &(0x7f0000000040))


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
undelete(&(0x7f0000000080)='./file0\x00')


r0 = socket$inet6(0x18, 0x30000003, 0x0)
sendto$inet6(r0, &(0x7f0000000080)="83", 0x2000, 0x0, &(0x7f0000000000)={0x18, 0x0}, 0x1c)


__lstat50(&(0x7f0000000000)='./file0\x00', 0x0)
fsync(0xffffffffffffffff)
__posix_chown(0x0, 0x0, 0x0)
compat_50___shmctl13$IPC_SET(0xffffffffffffffff, 0x1, 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f00000008c0)={{0x12, 0x1, 0x201, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0x10, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x7, 0x1, 0x1, 0x0, "", {{{0x9, 0x5, 0x1, 0x2, 0x3ff, 0x81, 0x4, 0x3}}}}}]}}]}}, &(0x7f0000000b00)={0xa, &(0x7f0000000900)={0xa, 0x6, 0x201, 0x0, 0x8, 0x0, 0x0, 0x3}, 0x13, &(0x7f0000000940)={0x5, 0xf, 0x13, 0x2, [@wireless={0xb, 0x10, 0x1, 0x0, 0x0, 0x6, 0x3f, 0x800}, @ptm_cap={0x3}]}, 0x4, [{0x4, &(0x7f0000000a00)=@lang_id={0x4, 0x3, 0x342c}}, {0x4, &(0x7f0000000a40)=@lang_id={0x4, 0x3, 0x421}}, {0x0, 0x0}, {0xa, &(0x7f0000000ac0)=@string={0xa, 0x3, "295131102ed32183"}}]})
openat$dm(0xffffffffffffff9c, &(0x7f0000000b80), 0x10000, 0x0)
fork()
openat$wscons(0xffffffffffffff9c, 0x0, 0x0, 0x0)
dup2(0xffffffffffffffff, 0xffffffffffffffff)
compat_14_shmctl$IPC_RMID(0xffffffffffffffff, 0x0)
fcntl$getown(0xffffffffffffffff, 0x5)


r0 = socket$unix(0x1, 0x1, 0x0)
r1 = dup(r0)
getsockopt$sock_int(r1, 0xffff, 0x1007, 0x0, 0x0)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(r0, 0x0, 0x4, r1)
ptrace(0x101, 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000180)={0xffffffffffffffff, <r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$FIONBIO(r0, 0x20004269, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x80047401, &(0x7f0000000140)={0xa})
open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)


setreuid(0xee00, 0xffffffffffffffff)
setreuid(0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000080)={<r0=>0xffffffffffffffff})
connect$unix(0xffffffffffffffff, &(0x7f0000000000)=ANY=[@ANYBLOB="62020207e0000001"], 0x1)
r1 = socket(0x2, 0x2, 0x0)
r2 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r2, 0x0, 0x100000000000000a, &(0x7f0000000040)='\x00', 0x1)
dup2(r2, r1)
connect$unix(r1, &(0x7f0000000000)=ANY=[], 0x10)
dup2(r1, r0)
r3 = dup(r0)
sendto$inet6(r3, 0x0, 0x0, 0x0, 0x0, 0x0)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x2f, &(0x7f0000000040)='\x00\x00\x00\x00', 0x4)


compat_43_osendmsg(0xffffffffffffffff, &(0x7f0000000080), 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
shmget(0x0, 0x2000, 0xeafddaafe8074b37, &(0x7f0000ffb000/0x2000)=nil)


open(&(0x7f0000000380)='./file0\x00', 0x200, 0x0)
compat_50_quotactl(&(0x7f0000000000)='./file0\x00', 0x10002, 0xffffffffffffffff, &(0x7f0000000040))


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x8004747d, &(0x7f0000000140))


pipe2(&(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff}, 0x0)
r2 = socket(0x2, 0x2, 0x0)
dup2(r2, r0)
__fstat50(r1, &(0x7f0000000040))


syz_usb_connect$cdc_ecm(0x4, 0x4d, &(0x7f0000000000)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x0, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}}}]}}]}}, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
recvmsg(r0, &(0x7f0000000680)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)


symlinkat(&(0x7f0000000580)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00')
ktrace(&(0x7f0000000500)='./file0\x00', 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r1, &(0x7f00000001c0)={0x0, 0x0, 0x0, 0x0, 0x0, 0xfffffffffffffe6b}, 0x0)
recvmsg(r0, &(0x7f00000002c0)={&(0x7f0000001540), 0x101, 0x0, 0x0, 0x0}, 0x0)


compat_30_fhopen(0x0, 0x1c9460ac7fe1869f)


__lstat50(&(0x7f0000000000)='./file0\x00', 0x0)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f00000008c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, 0x0)
fork()
openat$wscons(0xffffffffffffff9c, &(0x7f0000000c80), 0x0, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000180)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETKEYREPEAT(r0, 0x40107458, &(0x7f0000000100))


compat_30_fhopen(&(0x7f0000000700)={{}, {0x0, 0x0, "04fa0c803b39ee4756ff98f57d3ec3d4"}}, 0x0)
openat$wscons(0xffffffffffffff9c, &(0x7f0000000740), 0x0, 0x0)


symlinkat(&(0x7f0000000000)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00')
ktrace(&(0x7f0000000080)='./file0\x00', 0x0, 0x80, 0xffffffffffffffff)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000100)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = dup(r0)
sendto$unix(r2, 0x0, 0x0, 0x0, 0x0, 0x0)
read(r1, 0x0, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0104304, &(0x7f0000000100)=0x615000)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_lutimes(&(0x7f0000000200)='./file0\x00', &(0x7f0000000240))


semctl$GETALL(0x0, 0x0, 0x6, 0x0)


r0 = openat$tprof(0xffffffffffffff9c, 0x0, 0x0, 0x0)
r1 = dup3(r0, 0xffffffffffffffff, 0x0)
r2 = syz_usb_connect$hid(0x4, 0x3f, &(0x7f00000010c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x2d, 0x1, 0x1, 0x0, 0x0, 0x2, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x3, 0x1, 0x1, 0xfd, {0x9, 0x21, 0x0, 0x6}, {{}, [{{0x9, 0x5, 0x2, 0x3, 0x400, 0x0, 0x0, 0xc0}}]}}}]}}]}}, &(0x7f00000015c0)={0xa, &(0x7f0000001100)={0xa, 0x6, 0x0, 0x1, 0x8, 0x0, 0x0, 0x1}, 0x1c, &(0x7f0000001140)={0x5, 0xf, 0x1c, 0x2, [@ptm_cap={0x3}, @ss_container_id={0x14, 0x10, 0x4, 0xcf, "30dd02377bfc1895ae17ae130ca2424a"}]}, 0x3, [{0x0, 0x0}, {0x0, 0x0}, {0x4, &(0x7f0000001540)=@lang_id={0x4}}]})
ioctl$TPROF_IOC_GETSTAT(0xffffffffffffffff, 0x40305404, 0x0)
ioctl$HDAUDIO_FGRP_SETCONFIG(r1, 0xc0106802, 0x0)
ioctl$TPROF_IOC_START(r0, 0x80185402, 0x0)
compat_30_fhopen(&(0x7f0000001800)={{[0x11]}, {0x0, 0x0, "f29971df8c7a1a2a497e7a6cf5e21e04"}}, 0x0)
socket$inet(0x2, 0x0, 0x0)
ioctl$FIONREAD(0xffffffffffffffff, 0x4004667f, 0x0)
compat_90_fstatvfs1(r2, 0x0, 0x1)
ioctl$HDAUDIO_AFG_WIDGET_INFO(0xffffffffffffffff, 0xc0104800, &(0x7f00000032c0)={0x0})
compat_43_olseek(0xffffffffffffffff, 0x0, 0x7)


r0 = _lwp_self()
_lwp_unpark(r0, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000180)='./file0\x00', 0x0)
r0 = open$dir(&(0x7f0000000140)='./file0\x00', 0x0, 0x0)
symlinkat(&(0x7f0000000000)='./file0\x00', r0, &(0x7f00000000c0)='./file0\x00')


open(&(0x7f0000000300)='./file0\x00', 0x200, 0x0)
ktrace(&(0x7f0000000080)='./file0\x00', 0x0, 0x0, 0xffffffffffffffff)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80104301, &(0x7f0000000140))


preadv(0xffffffffffffffff, &(0x7f0000000440)=[{&(0x7f0000000080)=""/120, 0x78}, {&(0x7f0000001640)=""/4087, 0xff7}], 0x2, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
pwritev(r0, &(0x7f00000003c0), 0x273, 0x0)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x2, 0x11, r0, 0x0, 0x0)
mlock(&(0x7f0000000000/0x2000)=nil, 0x2000)
writev(r0, &(0x7f0000000200)=[{&(0x7f0000000000)="bd", 0x1}], 0x1)


syz_usb_connect(0x0, 0x24, &(0x7f0000000000)={{0x12, 0x1, 0x110, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x12, 0x1, 0x0, 0x0, 0x0, 0x0, [{}]}}]}}, 0x0)
pipe(0x0)
__fhopen40(&(0x7f0000000700)="4f9689e09fca1527cc77e3109bd8260559f376631427cab61f18db05", 0x1c, 0x0)


r0 = getppid()
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, &(0x7f0000000000), 0x0, 0x0)


munmap(&(0x7f0000fff000/0x1000)=nil, 0x1000)
mprotect(&(0x7f0000ffc000/0x4000)=nil, 0x4000, 0x0)


mlock(&(0x7f0000ffb000/0x3000)=nil, 0x3000)
mlock(&(0x7f0000de5000/0xd000)=nil, 0xd000)
munlock(&(0x7f0000ffc000/0x2000)=nil, 0x2000)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
setuid(0xee01)
__mount50(&(0x7f00000000c0)='efs\x00', &(0x7f0000000100)='./file0\x00', 0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x8010427a, &(0x7f0000000040))


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0x31, 0x0, 0x0)
getsockopt(r0, 0x29, 0x30, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
poll(&(0x7f0000000000)=[{r0, 0x40}], 0x1, 0x4)


sendmsg$unix(0xffffffffffffffff, &(0x7f0000001700)={&(0x7f0000000040)=ANY=[@ANYBLOB="fd182ec9666812e3ff"], 0x1c, 0x0}, 0x0)
ioctl$WSMUXIO_INJECTEVENT(0xffffffffffffffff, 0x80185760, &(0x7f0000000000)={0x0, 0x0, {0x0, 0x3}})
r0 = socket(0x18, 0x1, 0x0)
connect$unix(0xffffffffffffffff, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
close(r0)
r1 = socket(0x18, 0x400000002, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
setsockopt(r1, 0x1000000029, 0x2e, &(0x7f0000000000)="ebffcbff13b9fd812eaa4e713048e69931929648", 0x14)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x0, 0x7}, 0x1c)
r2 = dup2(r1, r1)
sendmsg$unix(r2, &(0x7f0000001700)={0x0, 0x0, 0x0}, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x8030647e, &(0x7f0000000000))


pipe(&(0x7f0000000040)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
writev(r0, &(0x7f00000024c0)=[{&(0x7f0000000140)="b281f5036fc9d9b31b83f7062a3402b0345aa8f5d1296cfa387cb3592e61d80ddfffa452beff651993419a87eb665769f345601cf7d3584fb9bc84d780508fd1689074309d43bf74c89649f535b5f3705eac56817fc1ddcf8e23e2fa87530cfb56610a0e3f31d71c337a0743e4886fd4d2c692008dfd446bca3fff2671c88500e1f4e9e2edd74a90b108ca7566088f3add584627597bc9d80ca1320962e3036f730bd7e16446cbff604956fde6148b761329dc4c243e41d277012e8e092bc5a16712a3f5415a175cb91d819a304c8a29caee73e76dbb", 0x7ffff000}, {&(0x7f0000000240)="f5cc80bf716dc7d317bb89eb9b5b548f1ef0c4626c33e90ad1da1a988675c3acb31d1f1f93c4ffe9484ada7f358ed92e7f3a18fe5f572103436ac311b82e415157e05d89497c54c3b7f09697054ea2dfb302a81a2c10985563198555bc5521ccd9bbd92f939f7f43915ca081538a067976c576149c4083b5aaf7e9d89f602f3439302563a299d5f797d000c0ef", 0x8d}, {&(0x7f0000000300)="98945134ff66d2c196e00731937797e1c55bdf08f016b8fbf039a5908104114e1f7f36538d441cb8c0bcb6d31d47454f2d8d46b7ffb5246531473ba327fb6cccb6e72ae77627e16a92e4a2223b991edfbfa5d64ac948aa20ad661786c76ae54b3056c1b3e25cbf9730ffa8f2124de36057a6d624d1944a69f86a245faa431d162196ccaf4f2d5d7192b1492eb1e8a85af51ae8c1207bdf0c5e075dc8a49b4816e7c3d796413d88346942aa40535f8940d2069b76f90d0524719102bb8d985814f1705abe23e97986cc72ad089525f357e049a3c3a87466c0f962cca526e113aa29b0062322d23d9113088db3f3f0d73d3294869373f304aaad66b3b1537cb32c3f5b5a6552cd2fa08ca2292cea68974703c6f52942df66761355e06c4a27b83269b1c13672e45c1918e80cfde03ac09625a0cfc624f6d591e85e007647e6dd8e44634339285e617be7b0081972b51f4ec73e3a36e2cd89ec78d1167a3f6f132523a42e461e9c0e3d203a645bf292aae572a11f46b3c451a9791dd6f382c456add5beae655eaeec909151e6ed37b6e55020837ad01ec3393452f7f8142e1131ff57db2f743834a4bb5882469f9b1b9d77a5f0ef9d37a9502b29ab9cc29d0d7c0ada025226a0c3fb790bde4de2571c8e2594b0e1c886f700340fb70d095d2543e41b43b5d6adb34640da91e84269d29430c94944cb230c1a886b923d1b8c2170115631f3deb576366e6ebf1ee1f3a4d39489c8a5d6ba1684203724a778cec91b9af01ef305c197437ffb6811358d1601730b984460b6b675b8b9993c960459b1850d3a3c0f49daa0cd7bc0fd60a57b1f0eb4a805dc0f1ff5327ba7a03833075fb7e318723a9db48e1fcc7f02aaf1d650d98003ad1f5cd797ff76c6cbe55f04e0c964a8f5ac8bd07c11662ff0f43ac814833b1c75ad1b82141dd8ccf50de9a3839c384a6af0f8002dcb18f41a34ad4cf1f1a42e25f8bce254512270090d0a8067cdcd67a3d1f9f740802ad29eaf0f806dd3b0b02624baea2cf3e42f3ef8267532b0bc615f31f5e36893e659e6bef1058038fe0405d6518cbfa448385a8dddb2e97cf3a1df8608ab4084ca16d3917a15624218f1a8edebba9aa700707aac7a4bb9c404e487c9bf339c5192e42d3d9dc220fce393e29c7a28573ce1c80e3716a1c12b2412bc6732f34788c0474c2f87db96e31a94ceb0024e854e332e7250dd59a85f6a0e0e58aa3fc081d4a5d38aa0ca4f637ed942b41a032aa175acec4cd1aa08b98b41391f03b8f1afe883c3ab20209b066a336e19a36161647066a538db185658e67e569e0b7dfefbcee0e568c208f24ada72814dbe82eca69c2f6f59a3c1721495d2744495cb20758c86cbfb9fb465d96fecae57964aa0ba9e01c52174652e43915e1685d3a77bbe45ca1d75ae0cc5523336327df52e17ba8bde6e14d02305d8132e783d0af64818278ed8f5a87a298f6de23e162f58d76a2f66d8709ef7203d456b1748b0e634a6864d53d2c909714e74e6ac0dc0bf68ac099fee8a62e1a63c40996a8f4fc06932cdca0b780aabd18b7468dddf734d7bb21bfe62d950f7edec150d72832639bdf30d521065f5b0ed31270fe4c96e2f5ff7a67b1ea253bc643f8271df044c5e83260a86e27a63d31d1452cfa2efd57971ab49241ac705d1a6a840a642b8f4432edd256dc74bb4fa260b01ec5b65be00af7aa36e19196f22c0d6634dad1f024ca2b9269fd96030f39bf21743f1d11b2ccb973b586c2bff807ec98264993a27125c07ea8ff600e988e9695f070c3208e7c4538a3ad7071932a8fe9d9e272b7e22fa30afa6131c60c114a906392635205f486a88309ce3761048f6f610b4380e70d9023ccd15e8f10bcad1ee6c8fd233ddda18d5c4457948fcb92dafd418a2f029534f05ef5ee08fe319a541456fbc1c0f65b16b2857e82af427c42c53efe58bb4d146c8ee8fdcefb33a05debefb5d3ec0e1bd6388ff8ab1ae75f215afd8a3fc0b81f1dbd3b8bb6474b848d3640d592da49a65ef19bef91b25fe41260f37d5e31d0231f29654a1a39cb5c03c0f5745067991620c97bea4a55c59911a0f4405a47e82841f77f8b9e3bf853eafa501c8232dee176613ecab0a39ad26345163f5890cdcb31aff048c4c9a86e8fdf17e3c9b8977cc6d8fc7d11840e1d0e8b6a518af06e3923470a29a0f0c7d82711fece5ad6ac9965f1effb6ddd2d6aa7c7b7dbc0140d4d5f1542b3c98dbeab7e3a5684a83af2a770302ee8a4d1f4f5c3004f16279ae9b1fe2f33af4860e4fed63e2520dd108e3de2e74032e0394bb2dd3ba837ab264278c3f441d0063538395a76b6a1e3e621e01aca15f2ed30ff07f71f606ac437267670ab75ac89422fcead8c2bc996365b78a90ca5d1b9d0bb1f8e13df45d1a96d66830fa91c9a4edb072aa03eeaa235fd01d30546fa4999189cbceae937d7ec46c9fb51cfdb455928d81a0ffd8b234037528ff3d194c7548fe07af8faa4e26a653f99d488d507e284dfff5d741396efab725d735c6095ef243c78f40aa7280ca669d3a35db7ccc3f4be2bc431b63185f8086014d8912bcd0e2e0d01e3eee314f24ef36387f30aa78c5eeed6e4c98e8ccd7102d5e802bbc17a56d1b7ddf8f5ddfb61407fa02d7123789ae633c37fa4a3e02265abafa54262e0cec8c8b0dd02fcb389384419064f0183ec267faf0a4d9fbceb0bcdb279311ab4a7aa55ca3788e18645bcec28951399ee1deec00c839891d8a3c2c4f31a2c54e50acbff064adc2b7b4bd7aefabedffb7b0f23e51fa106c7f47429088e3bbbdccb1ddc699e97a5ef4a5980cc0315e57d3976889c90bd13e5f81d027b4a7ae62cb85a33079a2d5beb6f451fbb8f97ec02e90d26bbde9d7f2c10c43125c41a9757d6a740e2cad2456702ca9bc2bd76aa613b27fd6e685b0ec122d7334f9d980d826d066f20149c0e29be3a0fef9076dd1410e9286d1e6611b20668a0222e48a6a579ffa3f5164d6a5c57ff20f31eaccc642ffa35e9aba7a19d5ac24a4cfcc1b18f30772650954e7002dd13697fd540a60a4b17d0b7870ba3bf86298b184f76e37c6c3bb3de4b6f1e5aed0c004e817c06ffb622e1f0ea62627b98f685ebd4cd77189fa11e2de6342e8253d6d306643b7b0143bb48f11fd35b5a696e2327e1dae82bae483adc2ccac74940c8786ae88460f4161594831b828dc7df5523a6cee056e4853085e8f754f44a5c68b87827d3bd15fb65d66ed13063b7640851b58d4968971267210601a036dc8293c3ea425e69625f01577fdd7d083268e5dfe2bd070fb2fde812d6964037559b2e28f32ad2f1366a87165cbb47ec357c4e3de17457db5d049bd21301f0d086f1904d3907262b2b75794d2818ad2d15deb5727d63b49be09098fe3080c3a458072e7597f4b985a63d6c4b5806d6d4681b4d284201010da4384819b4a67a1095fe898d131b5a2e6fcb4b029897d9fbdbdbefd1e8514d4d715687eabff9e0406decd976261955f0efa385e4e19d8ad81ea4c98272bba6ae5bd01c83f9ce12d01ee391b23c2e20dc7c07d4b7d9d66a5a84344c863ae4299f159bc07b5eca9474041d9c415c3bd28a35026400827c9ff6ccee2f3dbb8254289210c754eca9bd6beef34bb75c646d34d3a6465c40a708783b50bbac4225fe2b34802b6ae617736c529bf9a1d2f8a28449397674a393ba262f629d99737e010fa9809d9ea08a17234b8a8bf9cd79606ab46a2b57b0a0712ce14b22b60143ae7e01d2b66c17fa5e043fd06a783fe41e15c7189e07668b9f94d72f3a6e0330023bf2ac9c8c5fe404f092a22b2cc8b8ee252e9d60b5382ab82c170eaebbab5bb1ff985f7f4028d5f22643cafce2f0851da16d7b9c718657ab7d9b30272662e7cad52166d9d67a301d9db5221e3957c361b0e973a0d41af571c266387eba01b3019d48a7cba30338f4c54780349b17b6eb1955487dc155c12d100d0a93882629a2eb19e61081e6c6ab224aa3c12591f70d657c5d69b864c00517c54d266399275c88e32bd112791b7e41b8f3d92684d686f2414a14be7678954b3bb7bf9060b34f970989bf6f7bf887f482982b15c1472b26fb0f670c6fe0edce8753ec418706d2078d2b9b315950e75daf751f1d085fbd10dd032a363984f2907d15b04d3536f3d113bf2b74ada6c2e52ff5483d98e631389679b2f94797c04a6cd0e00ca120943b29ba6f73c738fb4f1da74bd7c2c186ed7511ac9758c6030804f94d7a46a5210e420669ccf25a0bf374b9a5cf2b1c7d61128db221dd511475a8f25a504a68c115e9318d557a851617c0ffc581c55be1716893ba644add34c09137621cec54f46c7fdd233dbb36019afce670c9917c1a929e9fcb022f61aeb9d7142643e148a5593598e1538677a023fb670f6281ca03d890b7e3baf8e375f275f58a140abe65ac8e01c573791aec9958d7f91bdc433d74996d8a31dc1bff54e2a1c5d6a8157fcd47c25453b62d4cdcc1c30b38a5e43bb5adc387ebd851c922be6b215133068ae191086e004dbcf1553c66c78345d7cb3d0a9a33df0857dce247bd9ebbaba9e5dac9341863735ebb1bd4f8daad76018b58a5550220244855ec8a406d4d2b614f88b03b7dc481b6ca42e86f717d6e64cae826d01dc04891eee312bf27b8ff2d48601dbf1a1215e241a727d42f1319388dffa349e6c58032b12d166fab60dd875b89d0d7d3d33a4d03789b9e1a52a20a0661d80c4eb98556b3acdb240cd3a15c513c09686645324cff53dbe55d8e0752f98edcd66cfc49084a066c7a21012583575c705c6ae6f3eda8bd72aa90a1caf5d7caf50bc32b34a63af993fc501f357f3e475ab9ff6108773e39f8a3d767dda58afc1aa17ac923cb351229e8269460027d2ed18ee600f7469372268781b627c9f8cc2bb5b6e478e3083d0cfc072846857ed4c6b9f9afd7905c5f8a72d35002c6c0d0ea30307844612d426fb13a61b45809187440865b711b960551996d75acd8821c8bea61b75984574758a8bca6b45235eea75244cb770b770ed86aecb09f2065dee9c0a851a9e63cbe8807d0c043ad63b7aa12cdbdfe8b71ded462376e326474c09b7d7f3c994d550871d9086107c308487831d94103914a385680e327e02ecd63c9bb9a439646b2a856364597f6d868e2c06a3be7816f100b0621fbf66d0704bfe03ab0a5b428ae49e4e4c77cf6cf5247a828397e61463127d004b5e19b4011ccd725bc2ed2a721155e2ef7b8a0730eb084fbf50140c3ef3089294ab85e73db0dd6b539def46d391977b09b9c649d7686c", 0xe9d}], 0x3)
writev(r0, &(0x7f0000001500)=[{&(0x7f00000000c0)='a', 0x1}], 0x1)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0xc0104304, &(0x7f0000000100))


mkdir(&(0x7f0000000600)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='msdos\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)='2', 0x1)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r1, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
pwritev(0xffffffffffffffff, 0x0, 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r3 = open$dir(&(0x7f0000001240)='./file0\x00', 0x40000400000002c2, 0x0)
r4 = dup2(r3, r3)
writev(r4, &(0x7f0000000100), 0x1000000000000161)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x0)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
poll(&(0x7f0000000040)=[{r0}], 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(0x0)
r1 = getpid()
fktrace(0xffffffffffffffff, 0x0, 0x4, r1)
r2 = compat_30_socket(0x22, 0x3, 0x0)
ioctl$OFIOGETBMAP(r2, 0xc004667a, &(0x7f0000000000)=0x1000)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
minherit(&(0x7f000012c000/0x3000)=nil, 0x3000, 0x4)
fork()


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000340)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f00000011c0)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
open(&(0x7f00000002c0)='./file0\x00', 0x20, 0x0)


sendmsg$unix(0xffffffffffffff9c, &(0x7f0000001700)={&(0x7f0000000100)=ANY=[], 0x1c, 0x0}, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x2, 0x0)
sendmsg$unix(r0, &(0x7f0000001700)={0x0, 0x34, 0x0}, 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x0)
r0 = open$dir(&(0x7f0000000100)='./file0\x00', 0x2, 0x0)
r1 = dup(r0)
pwrite(r1, &(0x7f0000000140)="0b2d257d80fc2b2a2f38e08c7022dd020d388ca7417a272fec3c4260d8d11aa37177c51e21a8196704ca553c02f6ac672f25b5a96f785b410b011df174a13eb7dec80ba7b11e6263833e75fc79ce1904374d2990d759a60890e3e9e67667b382e748b60f59d6550db6c41289143054cd5dc8bfd22b431c5867ce0379746c2ebbe41b137fe6acb96fc4f447de9cef257cf1be1d408de26583e85e9eb0f32612f7de7d227a5798a8fbfa243a4bbbeee1ca2dedf3c15e24f97d2d1979a2d13ee748c62b517b07fd76fc67c04cc498cf5519a91d54202b330bbaf219add1c757840e2376058872ba92eaa19904a9817b37fcc8c9693b9c0d74103b412939ff3c03d962ef8e0dc2e5c654ec93f37c5c2fa47c365930d913033edb01de9506c3a6b1797b84e53e89a89909f76750a0f793855a7581b1071383ab54959c9f962372b2b9499de99f7d8d42ff50d6521f6c13a9ef96d30f9a400ebc59f57fc2f94a99e64e89107e6bea5963059166fb064e0ab1b3637cf4414966de1a1c3ff3da743dfc159b400cb0d53b9beb9b2be6587a2bb4df277a4f946c29227f9505ac223de6bac3f4421b95715f75957238afa25b65e6a23c35880b6a644cd64598235f2139389a576f69e6fdeb0d0829891c1e4225260791ca0d2d973e764e6aa7215284d7380e5756e4607f492aecdaa5a79b064b036ba6ab2cd6cb3471be57dd576f9f79989c92f13367cd91d713aa098e5882c2504e1d74cb22a84a64b6a8677580f65d8105681782c3bb9d1a94c9e477f2f962286a0088ce7103bf63440d27f32d3a55d3f9cdb25e3475616188b430405af8523c7d659320e34d84bfa141fca9b118a1f9b51cbb5190966b38ec92d50c321c289bf38616b7bfe611885041d3c7d4d769ad232a289a16ea62cde2e4717cf5d120a42c3a51e97b28307da7edb91d5afab9e393a8322100b2ce8ca6bd0a0225f056ebf2111dd2f2ee186805795b1f260a0ee8cdebf344e2e1bf71029a5822379075069663164194291a4374d62d03908b71e10a22a54af4da50ae3a43b286be41a9532acd74977143e4cd134b78559bc21943e2e9c926836e742f6be29229135d8143c68a85b2918cc60e042c23ac60976a0a177a6ff0d3201f47cb76f42c0d4f4b772b23bc37adf7c7570dad77b63f497e704be00510412b95a904de12d174a871519bc337edfa41e9729dcadd6a4097fc2c9f6cf3a0e2c913d9ac34347906e1bd1d300e4d956494d54639cb27dccfd9cea117776c544068801a80b79174be675dc322a4db30ae9995b7637e8408a06884d9edc924f066096b30cd7d3fd88a6ea3a87967b7cd620e6f28812580044cbfd6a575038f89bc4b521df96b75616d30bbc14b4adf348e48c0da8d418d3396748c8ba2d670c716d0c506dfa0bc857ebb405109857239b61dfcd04340f0d4c4a4fb6d486ce80909d1b87a3f4cfb1ab100a8bb801871d490e8ba4a750405f24a4c961b9a35f36bb07c99ce388e4842ef86bd719ce1879d8712966eb2739847b0e58859ce82071db7f721b432b25513bc8a4de917f70ddbdc759d5400e2368170ee400ee38cf69fba8b19fc1da65936ed02e84a49133816482d2dc98114a9b46f6462c3864791115002af69101e4319d5310138a29f45de5700cbc528a8abbfaf143a5791d50310a510fb1e6b94bfde00f3a5013f80e9083eebc80c71ca26f1f12b6759c535b84763cd475cba5731b220745aca2c8bbb4f6fcdd3a35fcc334939d7dedeec73e4f51ff016a2bfffbe8304b619b9c6f4bfebef474d6cf498bd177f379fd4f93ca772ec85a9a28d4e4215c51b90ee7e663edaf82139bef49b6b2124c0e976a7735d54b6505abce5ce263c82c0532d7fb1768895cbc00ea56591d8ce6068c894412c7b77d8c1d1b164fa56885a38b226421978619cc435ffaa07f1d6d6cd30d2e01d914951a50836920b49dff2310699693e882b819a1d284053bace570dffc5b9030c366775f9407640b704badb8ee6427e3e08185abc1788d4826354a24c19a7af8609edaad0a33a791f672065d51ac9c2fe528e8298e949feff75fc1f446e3220ccd151c47440a6442cd3af48986813cb1cdb39148027e133008bee16714199e0fcb7f0e8545af141f2b0979b0183c8a3928fb5a87a2043075feb88900782eae426f45a771be93471488523e0183bb00909815cad2bc80d528fd59bc005f10deffa254a3f322a69fdd951e08234c086a8eb9c762ae7fe7201bdb10f4bf603ef788d595bd19fae61eb0f940eb1714d067be39873a80512a239befb84c7801e0f4c234a1a160381fac5b8f27e924d4844b5ae79049c1a0d3c855696760dcc3c8cd5ee05c550d3ac708342408cc8a9e04c568b1fcdd5276dd8bdf19c8d14aa10ed6036352acc8c6e6c9b910db13db94326838bfca642abb2d9bcea0ea82f496211687d8a6b8a9f3f4280eb025d476958a74969faff1b4588ea5bb3903572cfdff9241c1f8ba2249f6bfb08328c27b810877738fc83b511fc05b0f006eaee2dd687d948a38ad3130ee0210852918acf876bf22311bd67e9e50f266273ed7ca54ef3f86cd6c8b86782fac073042f65ed91ac5c5fc8f9ed9fff5db336b030bf6ea7a57bc7dbb318e9e7bdf50573cac811eac3db68d4a6c7c6386c786a4733e3abadf47967684216365355b7fed98b7245e65ce9444584d80b9eec1bea79dbf539c1a5986bfed3c181e834780716eaa38d7db2156abac6d3f5383d4951a3a8545deb0dc8cd63e19f878b672e454bd53f3bed2a66c1d2fa5fcb37755e7167dae1109a61b8cddca1af783ebe239bdc3fafc8b2a09994ff2589b3e092fc4e420f5d0c235cb37fd06ff85bbcc9a25f6e6cc458115e382c347947b9ac73ed5f5dd4d94060", 0x800, 0x0)


r0 = socket$unix(0x1, 0x2, 0x0)
r1 = dup2(r0, r0)
listen(r1, 0x0)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x8006740a, &(0x7f0000000040))
r1 = dup2(r0, r0)
ioctl$FIOASYNC(r1, 0x8004667d, &(0x7f0000000000))


r0 = _lwp_self()
_lwp_suspend(r0)
_lwp_wakeup(r0)
_lwp_kill(r0, 0xf)
_lwp_kill(r0, 0x3)
_lwp_kill(r0, 0x5)


_ksem_init(0x80000001, &(0x7f0000000000)=<r0=>0x50535244)
_ksem_trywait(r0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000180)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
writev(r1, &(0x7f00000001c0)=[{&(0x7f0000000080)="0bbdfb27328eb6152c8e9b70e12ceb5fd823d7943ecbe49f54a7637872a3d6f23c8dcf9af674a253a6144007840433565c46c40182a5f4e01c5f5543bcf97a8b4200418f9c1e61b4a1f4be4f27a0b940cea38c267cf371141efa43f59341e486f30c5294bfcc670219b601c6d16973f3805bb040bf3f07f8eee4a04839a48e297df318471dd2fc195e06c18262b0759ca7", 0x91}], 0x1)
sendmsg$unix(r1, &(0x7f0000000000)={0x0, 0x0, 0x0}, 0x0)
recvfrom$unix(r0, &(0x7f0000000240)=""/4096, 0x1000, 0x840, 0x0, 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
compat_43_lstat43(&(0x7f0000001500)='./file0\x00', &(0x7f00000025c0))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getpgrp()
setpriority(0x2, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
r1 = socket(0x18, 0x1, 0x0)
setsockopt(r1, 0x29, 0x11, &(0x7f0000000040)='\x00\x00\x00\x00', 0x4)


mknod(&(0x7f0000000080)='./file0\x00', 0x205e, 0x200)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$FIOSETOWN(r0, 0x8004667d, &(0x7f0000000040))


r0 = socket(0x2, 0x3, 0xfb)
setsockopt$inet_opts(r0, 0x0, 0x1, &(0x7f0000000040)="8305000000000000", 0x8)
r1 = msgget$private(0x0, 0x20000003c4)
msgrcv(r1, &(0x7f00000006c0)=ANY=[@ANYRESDEC], 0xc, 0x0, 0x800)
msgrcv(r1, &(0x7f0000001c80)={0x0, ""/235}, 0xf3, 0x3, 0x0)
msgsnd(r1, &(0x7f00000000c0)=ANY=[@ANYBLOB="0100000000000000879eb016506a73893dfd865e70bf6696dc6dc55332c90dd34929aea8e633cf2ffcf83c41dab5b5514858bbae38a77897e76c92d5db24fa86e697b61b7457702f9afa93"], 0x50, 0x800)
msgsnd(r1, &(0x7f00000000c0)={0x1, "bd93ecba720f4d45622a583f0c021491847d060e72d7e75a96db23c09fce6e8ecc9c8a3950678cf48f2b1ee5cc11044ee1d73223486a34390b4cebb4ad88817867480c7ecbbfaef27e16fa3bab20e80c98e6ada4cd"}, 0x5d, 0x0)
r2 = msgget$private(0x0, 0x20000003c4)
msgrcv(r2, &(0x7f00000006c0)=ANY=[@ANYRESDEC], 0xc, 0x0, 0x800)
msgrcv(r2, &(0x7f0000001c80)={0x0, ""/235}, 0xf3, 0x3, 0x0)
msgsnd(r2, &(0x7f00000000c0)=ANY=[@ANYBLOB="0100000000000000879eb016506a73893dfd865e70bf6696dc6dc55332c90dd34929aea8e633cf2ffcf83c41dab5b5514858bbae38a77897e76c92d5db24fa86e697b61b7457702f9afa93"], 0x50, 0x800)
msgsnd(r2, &(0x7f00000000c0)={0x1, "bd93ecba720f4d45622a583f0c021491847d060e72d7e75a96db23c09fce6e8ecc9c8a3950678cf48f2b1ee5cc11044ee1d73223486a34390b4cebb4ad88817867480c7ecbbfaef27e16fa3bab20e80c98e6ada4cd"}, 0x5d, 0x0)
msgsnd(r2, &(0x7f0000000080)={0x3, "6e2549096b04a219f9f6fb3ae6ee937d62b0ba9816c0b31dd140b8888754e262b7327b387561f5ebae9be1f7b0163ecd42cda67034299b3a58dd53b93613594aaa24e8e667f36a07a549b1332c7c9df3a87c0aec5abbe7ca5334f371182be35cd718c8bed09a2035f313bac37436b6e44b083a72"}, 0x7c, 0x800)


r0 = open(&(0x7f0000000080)='./file0\x00', 0x612, 0x0)
pwrite(r0, &(0x7f0000000100)='e', 0x100000, 0x0)
writev(0xffffffffffffffff, 0x0, 0x0)
r1 = open$dir(&(0x7f0000001240)='./file0\x00', 0x0, 0x0)
pwrite(0xffffffffffffffff, 0x0, 0x0, 0x0)
dup2(0xffffffffffffffff, 0xffffffffffffffff)
ioctl$OFIOGETBMAP(r1, 0xc004667a, &(0x7f0000000000)=0x2e)


r0 = open(&(0x7f0000000240)='./file0\x00', 0x611, 0x0)
setuid(0xee01)
close(r0)


r0 = semget(0x0, 0x0, 0x0)
____semctl50$IPC_STAT(r0, 0x0, 0x2, &(0x7f0000000980)=@buf=&(0x7f0000000940)={{}, 0x0, 0x0, 0x0, 0x0})


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x8008722b, &(0x7f0000000100)={0x62000c})


mknod(&(0x7f0000000100)='./file0\x00', 0x1ffb, 0x0)
openat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x5, 0x0)


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
setuid(0xee01)
unmount(&(0x7f0000000240)='./file0\x00', 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000000400)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0x802c6300, &(0x7f0000000140)={0x1})


_ksem_init(0x0, &(0x7f00000004c0)=<r0=>0x0)
_ksem_close(r0)


r0 = fcntl$dupfd(0xffffffffffffff9c, 0xb, 0xffffffffffffffff)
compat_43_ommap(&(0x7f0000ffb000/0x2000)=nil, 0x2000, 0x1, 0x0, r0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f00000007c0)={<r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
sendmsg$unix(r0, &(0x7f0000000a40)={0x0, 0x0, 0x0}, 0x7)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
pipe(&(0x7f0000000240)={<r1=>0xffffffffffffffff})
fktrace(r1, 0x0, 0x0, 0x0)
ptrace(0x1a, r0, &(0x7f0000000000), 0x9)


mkdir(&(0x7f0000000600)='./file0\x00', 0x0)
__mount50(&(0x7f0000000180)='mfs\x00', &(0x7f00000001c0)='./file0\x00', 0x0, &(0x7f0000000200)="c7", 0x1)


r0 = semget(0x2, 0x0, 0x0)
semop(r0, 0x0, 0x4c)


compat_50_nanosleep(0xffffffffffffffff, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000002640)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg(r0, &(0x7f0000004d80)={0x0, 0x0, &(0x7f0000003900)=[{&(0x7f0000002800)="c70112e85a57afc00a04829b5d3d880b1c1930b9edf8ac4cfe19a63b5d17f6a829e91709137cd920ad2d18481078cf43e5985e363370f7afa058d39cd08d89dbc0f7346f3914df0647b5f7f44df6314b67c507d49eb4bfb9b3eca475ab7558572caa1ac47aa5990c8ee8e868b2dd110350766041fffde60f355105dec402f8dd4619482b765ad123cefaa2bad2bb3c8e75c6c14ee088522fdef5b4a22c9852fe5f0adec07ad77cf0f38985093e185f5e7a0aa62ba485ee2110b6fda1910803c562b0b5c077b34d6caeff2b1ed51b4deacc087ff1264fd798c559109b653b7434ce3e7191483b10584b606dde40c2462df1b8d727819ae6eef7d8f80317495dc1b74be8a3ac35463abbab5350d0d1ba71eecf8b4bdc41f3dfcdd66638955f21df28b015d7a484cad878f200d459e9053587912891eec62765ba6e760a801e801e815658355d0c76717a59aeae2f5f8fdf610507055ab46567ad5170f852cd9ce7a6c07ceb3e223b17960d543e6c32481c84f2ced564b99cf1b15dcd747f5423d152ae50830e61f8b97178362eceba5ff2df3b0de019827616a430a4909861552ab167341f2f2314735ccce381dc8ab217e30a61da0ef3ac6e10fb4643bc560606f5078d3865bf8301e5caeb1c4db8c380c29c175bf218e68d18087844f8f7f7260ddaad0fb95f56e4fcf1267f8e9924d7479c78ab1cf5289ad417c4de29d65c6983c89b2abe32e5d73ba93ae8c9fa00b20a60b93d5be0ea8f9d2cd05502ca9f0a38338c13fb63d22f5c0e58c5256c8ca229d13d74dbb344e751ab4028147c99173f47a8dff75c46f321911e03da4812984d0b8fc7af15f12a567116964493e856e173ed625d09756f6c30ecce36dbd9b900d7796ae3ea6e3a313d49b25562da71059bfbe1523eda4a1fcaa6deed4f970969b1e583f405119811441507bb589520e914aa46f5f989bdcc9c24f186caadbcf0b83cd3d3eeddc86000418ef634e1dec1fdcedb7fa7ff3a57f964f820cd59577d55488d354bceb953eba9b01e469609990ac78db5c43bb588bbc24bff34866cecce298d970c8e35dab1945318e09883e1255df28399e51c24592102f68935d6732af440b0ad54f68d394bccf1ccc06362c02131f1a94d998ec94ba6714fb56c13b824b362d2fb98886da9bd019aa15be19b6286afeb0b8f48bf8a81eb5aaa8bcf166f2a7add4b3ecb1994364c370ac9e335ae3cfcf54facd6aac1e667fe05d7256dbfcaa450e84246d269822d58ba016916ae1eeeadc3e72c3112a29b5a9761032d2deaa87aa394b996d527fd6d6e1960c2e6438f6b778bf2c0f244664b37f726f11904fb5a61370f3d9576bf22b1537c6da47af50f0673d5b43eadc03e5166d08fd233951c99b198256de34c449fac06cdd956ef2e053f318a19776b7e387644a6ebe1102cdae0e52d4b5aac545f37d321ee58b1ac7feb01bd3370ddcaf87b8d43ff6ce62f07f6186a9cdd59fd902ade016259ff33af39c079559884b44c82ff791b1449d83766a1070d439d6875c247c1c7de12b9b7f889afec38572572e8f759467f7bd886dfa70c9eb45847c32d536a402d5d5fb486e806025d304cea152a9d730a1de685ecd9a600c4b107083d0ab280ba2e767f45519e988fefbafae3d2a78b89d1339515936d59261f3571d7a9ab0d2447ee6dc525d799af700f06bb24ef8996588195cefc085637d74fa1196b1453760e70cacd67fc0bcc5298dbb22bf6d78765644937b11f6693109bb05602cc7125726f66b87c3f035a33e44d095dec50ce5c00c717834691d5d0019f218aefede0da9aeaa67c0a1fd12459104fef0ff5c1478e75dfeb424c29a5bea9b9785c1f7f13e66a5ef07c63bdabb3540d80268e98a5ceec0224692973de1abee68dc607d1d8848ebf2263520c7723d56291704e5eeaa1ee91aa1d020419efa8530e554cc91861f4aa39c80f943d8d1e375747aaf1dbb15642bf33d3f42af0af4cd54bce64b36b8014b6244c721366731cf52e65e28d290e08b2fa5c99299015dc7f352d3b75ddc2ddd51dee4581c9c040ed18d58702922d39dfba0de5e77bd8e9f0d32b858ba72e6c78beefbe35dc26c6862adef7c7f6b310a0d07e39240c9945c8d915c535294c7568c3c101eea8fcb3842fb6199f16041f4f526701c8a1fdadcb442fe039d38bbad7768af82008f8c513a3f3e83a3cd1f9e31281def4d5ad8dbc938aebd14d024b9329f00d68f62479494b8fb49bad9703845d60af014a5f5bda5a7d7dc6aa0d1d8c3ee194f21791028d78a817fdb26574ec6dbdfe61d65aa9837fc8000b6a8cf04fdd8edfcf116a35970ec132f820570d15d6a56991cdb8fc38baa1aa248aeb16bb14cb58a87673afc6f160173dead2211bc367eada8929baddbc0d789c442297b79f33b3be9b790d36c5b4a813fcbc6aea73d1478d45f714ab2e4632e7b7181540e1183e32d190028764dd486c119407127982a0c63e1a307806e40c6d463c04af385f6df31ff06cec5363e6f9cb3edfe2c2eae1375f7fa65f9154fd080ddb2af7e802d810ce5d229c54da8761cfa188c22ead6247c12163d9661a0159040938f5efded377d087d884931bf04f1a84b36af9bd00a8194c2746ba7102230166144388b893944f53bd472b28da7d8ec191641e13b19691381bf17b5882b2e759e27e0c3fe15772264ab979410f8e804978339c07339e78cae391b68dd518637f47897894501ce6da3923c7524678108d0f80e488aa73675d0076cdc2389ccbaaf4846ed0df0db0ecba2b2c1bf7198bd6ccb5b9a34ca366ef8862828ecc0908a6f632fa070fd5ac7c4e5b561f21e43ef9d84813d5422fe21e664e55c991b4ec254a2c5feb9607395a105fdee971367c4fefff0bd289fe08fb6fa11a8ed8c3bba814fdd1086baaf7a06cbe3519568f3547bae405cee953e8b84ef5c8d9662f903b1a46b2ece33e6d88a78eb85ee1f80c15a909732802e0111550b19959511a9b90ed772cfbaba8a5cdd156fdd411182721b566aa89813591fc1e02760b61ad09489908ee1633b98e564cbee56393be83e13df4ffea34b705ffe3de4364125d91cbdda224aa702ac3f2142f216a2882b06703a5e928969b7c41b593a14418f1862e1a4d62797ac10469ea781ee5c0eb023f9762426d31b66859703af42c2661a67b26aa4399735e0d9b869624b587ed6115bccdd86399dc8da9b508fd0143accf54b4ed136b7eb9a291e457ae6d067c860d410d9449c62030f438cd6c5685b65814068e32be90247815323555c2a2d580259387ce1895b1a10b4b9a0468010da84a879cfd4301bbe3eb3fb2d3114577c3974e93c952b221599e01ae73c4cc283510c4e269e11b22d17d02a3147f531c597b73b0610d320b284b2d63aafadd1ef451189e6882994f92b49972dcfe6d314f910bc7b6515df4dbf230b2d9e7bd98c8bcd62cd2d362043fdd8376fa34b32ce24435d75f24538976ea38cfa0238e2ae9c5aa8b5e7dbce5ee3a88e0bc7bc2a7b318923861130f0bfa1f5b65d1a097a844e4bda3a124c5a4ddb312e97b04bba7ab1fafc2b17737eb28f12c3c24f9ceeb6ca99f3561297f7fb7c7cd7d5b44795439dfd59876bedfe517a308b11ae60ad1277a92afde7357760ceb2065e86470108d25c9bf327f61f6130178b4c322ccaa7c77588d8edded412ef067a2b9254c3f0681939b714072e3d94a39957b64c5329c0dc4ad7f6059ec7c69fedf98e5fa48772410cfb4f2e0231e401f1c32ed430ec46d7844b8cdb82aab940a627c008a1265ae361d6c52d68208e0a9624c432dfd6c69d8bdb4998fcb7d8381e811bbe0caad72c54a54d5b3731a8f8e1dd04b4e4fbdcfbc61d30bcdf857484035a3b13bb54b9e2b5514dfb24ee10d3cffec1e29cbf04c21814e94cd9e6246de41c9655f1b8982d8bd8ed5dbcaa3c092564d8d614c7289c1d36d21a1fade3d214ea74c7c7e526f4e885df7c4fcc16c5a6ec273350082532d3712f07fe34f021c2696dbd20c11bedcd41d102abfd07c2614920fc6168cb4cf0d59a5dd6f9299979c694b0f2ec0384430f028ba7bebe3f964a3fcd11b94ba6ea606f0fa1575bda2e70382e14c3fde1b58f745808ae1b5366b351e9c535c7eb2428e45165838507675dfd4c2c43457f4475d4d7f5a36f72e0eca7e0e335ded4a34616d0eba6d47927d7392baf195bafda21067bbe9697e31f4acdde315db71542f2c6ff784f11c33a39d2367a7cb307f1e6f0517f64484abeb3080d02f07643c94d8ae2f45027b8019fe594f785dfb14d7d2ac88a89192f9b72887fd28d760aeab90f0da1ba727378c8b01ba4391a86021f8c0ab18e5160cc27135776543fcfbad0619dcb3374a4d17ea841be819158b8e4b22fc7902ab67edc72e8417f0cf627ef8e2b40773e265a1184db672363b77808590910661bfd6bb9638367cd75f6da65a70a87242b86154a81d500bfb6953e465e5d511b87d6fbdce0d5408c6d18df9cc4ebc8c76011a1fcfa52f2979263ee5c2f03b2a59b0043997ac7f3e3c8de583244b8fad4a9d9ae83417bd14b4f088bc74d451e704a7dd0ee239c456fe59687c026a533852685dcad9a7265b39eb4109210c9315c510396591e1322b15437537ec2730c38a4ee628ecbb9336de3e82a9e7ae303725acddcc5c2c56bbe136b9c8706277052b127a0a720851ab9a9dab03804463f4ab364508", 0xd01}], 0x1}, 0x0)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x7)
lchown(&(0x7f0000000380)='./file0\x00', 0xee00, 0xee00)
mkdir(&(0x7f0000000040)='./file0/file0\x00', 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
rename(&(0x7f0000000180)='./file0/file0\x00', &(0x7f00000001c0)='./file0/file0\x00')


mkdir(&(0x7f0000001240)='./file1\x00', 0x4d)
symlink(&(0x7f00000000c0)='./file1\x00', &(0x7f0000000180)='./file0\x00')
rename(&(0x7f0000000100)='./file1/../file0\x00', &(0x7f0000000140)='./file1\x00')


r0 = socket(0x18, 0x2, 0x0)
setsockopt(r0, 0x29, 0x27, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
setpgid(0x0, 0x0)
getpriority(0x1, r1)


r0 = _lwp_self()
_lwp_suspend(r0)
_lwp_suspend(0x0)
r1 = _lwp_self()
_lwp_wait(r1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = dup2(r0, r0)
fchflags(r1, 0x0)


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x2000746f, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = socket$inet(0x2, 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
setsockopt$inet_opts(r1, 0x6, 0x2, &(0x7f0000000040), 0x4)


syz_usb_connect$cdc_ecm(0x0, 0x4d, &(0x7f0000000000)={{0x12, 0x1, 0x300, 0x2, 0x0, 0x0, 0x8, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}}}]}}]}}, &(0x7f0000000580)={0x0, 0x0, 0x19, &(0x7f0000000080)={0x5, 0xf, 0x19, 0x1, [@ss_container_id={0x14, 0x10, 0x4, 0x0, "9d2d149c7e3c9f404695ebd97a1a5481"}]}})


open$dir(&(0x7f0000001240)='./file0\x00', 0x0, 0x0)
open(&(0x7f0000001280)='./file0\x00', 0x203c2, 0x0)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000180)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000380)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)


setrlimit(0x6, &(0x7f0000000000)={0x0, 0x2})
mlockall(0x1)
fork()
__clone(0x0, 0x0)
__wait450(0x0, 0x0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x4, &(0x7f0000000180))
setrlimit(0x5, 0x0)
ptrace(0x1, 0x0, 0x0, 0x0)


__posix_fadvise50(0xffffffffffffffff, 0x0, 0x7fffffffffffffff, 0x1, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x200, 0x0)
flock(r0, 0xa)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x40047400, &(0x7f0000000140))


mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xfff)
open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x8004667d, &(0x7f0000000100))


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r1 = compat_30_socket(0x1f, 0x3, 0x0)
listen(r1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_30___fstat13(0xffffffffffffffff, 0x0)


umask(0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
sendto$inet(r0, 0x0, 0x0, 0x405, &(0x7f0000000040)={0x2, 0x3}, 0xc)


r0 = socket$inet(0x2, 0x1, 0x0)
connect$inet(r0, &(0x7f0000000000), 0x10)
connect$inet(r0, &(0x7f0000000040), 0x10)


mkdir(&(0x7f0000000000)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', 0x0)
mkdir(&(0x7f0000000200)='./file0\x00', 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
open(&(0x7f00000000c0)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', 0x0, 0x0)
mkdir(&(0x7f0000000080)='./file0/control\x00', 0x0)
rename(&(0x7f00000002c0)='\x13\x13w\xc5\xfc5\xd4\x14T\xd5\xd4\x1d)\xad\x1a`)Y\x81F\xe6\xbe\x16nA\xad\r\xbd@T\x03<\x9f3\xbb\xda\x82$\xa2\xf3\xd7r\xe7cnH\xb3<\xbfp\x83r\xe8\xf1\xb9\x93>\xc5\x12wC\xbe\"\x06 \x9e\xf0-\xf9\xcb\xf2\xf6\xe8\x80\xd38/\x00', &(0x7f0000000340)='./file0/control/file1\x00')


pipe(&(0x7f0000000080)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
writev(r1, &(0x7f0000000900)=[{&(0x7f0000000a00)='B', 0x1}], 0x1)
r2 = getppid()
fcntl$setown(r0, 0x6, r2)
write(r0, &(0x7f0000000340), 0xd4e688a67930cd)
write(r1, &(0x7f0000000040), 0xfeea)
close(r0)
execve(0x0, 0x0, 0x0)


compat_50_nanosleep(&(0x7f0000000200)={0x0, 0x6275}, &(0x7f0000000140))


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
__mount50(&(0x7f0000001340)='fdesc\x00', &(0x7f0000001380)='./file0\x00', 0x0, 0x0, 0x0)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
poll(&(0x7f0000000140)=[{r0}], 0x1, 0x0)


mkdirat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='ptyfs\x00', &(0x7f00000000c0)='./file0\x00', 0x0, &(0x7f0000000280)="12", 0x1)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r0, 0x0, 0x4, r2)
compat_30___fstat13(r1, &(0x7f0000000000))


pipe(&(0x7f0000000100)={<r0=>0xffffffffffffffff})
r1 = getpgrp()
fcntl$setown(r0, 0x6, r1)
fcntl$setown(r0, 0x6, 0x0)


open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
rename(&(0x7f0000000240)='./file0\x00', 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x17fc)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
r3 = open(&(0x7f0000000280)='./file0\x00', 0x0, 0x0)
ioctl$FIONBIO(r3, 0x20004268, 0x0)


r0 = socket$unix(0x11, 0x3, 0x21)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
read(r0, &(0x7f0000000040)=""/40, 0x28)
dup2(r1, r0)


symlinkat(&(0x7f0000000000)='./file1\x00', 0xffffffffffffff9c, &(0x7f0000000080)='./file1\x00')
mkdirat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x0)
mkdirat(0xffffffffffffff9c, &(0x7f00000000c0)='./file0/file0\x00', 0x0)


openat(0xffffffffffffff9c, &(0x7f0000000780)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0x200, 0x0)
symlink(&(0x7f0000000480)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', &(0x7f0000000000)='./file0\x00')
rename(&(0x7f0000000180)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', &(0x7f0000000140)='./file0\x00')


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)
__select50(0x0, 0x0, 0x0, 0x0, &(0x7f0000000040))


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='kernfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, 0x0, 0x0)
lchflags(&(0x7f0000000040)='./file0\x00', 0x0)


open$dir(&(0x7f00000001c0)='./file0\x00', 0x200, 0x0)
ktrace(&(0x7f0000000000)='./file0\x00', 0x7, 0x40001b26, 0x0)


mknod(&(0x7f00000001c0)='./bus\x00', 0x80002005, 0x5300)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0145002, &(0x7f0000000000))


r0 = socket$inet(0x2, 0x2, 0x0)
getsockopt(r0, 0x0, 0x15, 0x0, 0x0)


r0 = openat(0xffffffffffffff9c, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
__posix_fadvise50(r0, 0x0, 0x0, 0x0, 0x0)


r0 = compat_30_socket(0x1d, 0x3, 0x0)
r1 = dup(r0)
writev(r1, &(0x7f00000013c0)=[{&(0x7f0000000300)="76c58a09f0e5a206ef1e9724d33354ba", 0x10}], 0x1)


mlock(&(0x7f0000800000/0x800000)=nil, 0x800000)
munmap(&(0x7f0000400000/0xc00000)=nil, 0xc00000)


r0 = shmget$private(0x0, 0x11000, 0x0, &(0x7f0000fef000/0x11000)=nil)
shmctl$SHM_LOCK(r0, 0x3)


r0 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
mlockall(0x1)
compat_60__lwp_park(0x0, 0x0, 0x0, 0x0)


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
r1 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
writev(r1, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)
r2 = openat(0xffffffffffffffff, &(0x7f00000001c0)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
openat(0xffffffffffffffff, 0x0, 0x0, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r3 = open$dir(&(0x7f0000000000)='./file0\x00', 0xa, 0x0)
truncate(&(0x7f0000000080)='./file0\x00', 0x0, 0x8000)
writev(r3, &(0x7f0000000340)=[{&(0x7f0000000000), 0x2cfea}], 0x1000000000000013)


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0x200000000000b, &(0x7f0000000680)='\x00', 0x1)
setsockopt(r0, 0x0, 0xd, 0x0, 0x0)


open$dir(&(0x7f00000008c0)='./file0\x00', 0xfa270a2305aba1a, 0x0)
open$dir(&(0x7f00000000c0)='./file0/file0\x00', 0x0, 0x0)


syz_usb_connect$uac1(0x1, 0x71, &(0x7f00000005c0)={{0x12, 0x1, 0x250, 0x0, 0x0, 0x0, 0x8, 0x1d6b, 0x101, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5f, 0x3, 0x1, 0x0, 0x0, 0x0, {{}, {}, {0x9, 0x4, 0x1, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x1, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}, {}, {0x9, 0x4, 0x2, 0x1, 0x1, 0x1, 0x2, 0x0, 0x0, {}, {{0x9, 0x5, 0x82, 0x9, 0x0, 0x0, 0x0, 0x0, {0x7}}}}}}}]}}, &(0x7f0000000c40)={0x0, 0x0, 0x55, &(0x7f0000000700)={0x5, 0xf, 0x55, 0x2, [@ssp_cap={0x14, 0x10, 0xa, 0x0, 0x2, 0x0, 0x0, 0x0, [0x0, 0x0]}, @generic={0x3c, 0x10, 0xa, "d32b235cbce3d791bab78f0e9f5e1614f018148cba44b883b9545faed6b84ff81358ecdda89902842e31f49cf567cc8022e5ac92c154748ef4"}]}, 0x3, [{0x0, 0x0}, {0x0, 0x0}, {0x0, 0x0}]})


mknod(&(0x7f0000000b40)='./file0\x00', 0x72165ce6fe0b2f18, 0x0)
compat_50_lutimes(&(0x7f0000000000)='./file0\x00', &(0x7f00000001c0))


r0 = open(&(0x7f0000000040)='./file0\x00', 0x200, 0x0)
truncate(&(0x7f00000000c0)='./file0\x00', 0x0, 0x800000000)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r2 = socket(0x18, 0x400000002, 0x0)
r3 = socket(0x18, 0x2, 0x0)
r4 = dup2(r2, r3)
compat_50___fstat30(r4, &(0x7f0000000240))


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x17fc)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = dup2(r0, r0)
ioctl$FIOSETOWN(r1, 0x8020426c, &(0x7f0000000000)=0xddbd)


lchown(&(0x7f0000001980)='./file0\x00', 0xffffffffffffffff, 0x0)
undelete(&(0x7f0000000040)='./file0\x00')


preadv(0xffffffffffffffff, &(0x7f0000000480)=[{0x0}, {0x0}, {0x0}, {&(0x7f0000000300)=""/166, 0xa6}], 0x4, 0x0)
socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={<r0=>0xffffffffffffffff})
sendmmsg(r0, &(0x7f0000000340)={0x0}, 0x10, 0x0, 0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
connect$inet(r0, &(0x7f00000005c0)={0x2, 0x3}, 0xc)


writev(0xffffffffffffffff, &(0x7f0000000340)=[{&(0x7f0000000040)="5bfdf7b0b678270b6fa4c7481c9e1840d641b4979ba4da64070ec321bc139dedb7a45ca17930647d45c1ca82d3ea1a5ee316cc553f223cc0426f7275d26aff3f1efb", 0x42}], 0x1)
writev(0xffffffffffffffff, &(0x7f0000001480)=[{&(0x7f0000000380)}, {0x0}], 0x2)
r0 = socket$inet6(0x18, 0x3, 0x0)
sendto$inet6(r0, &(0x7f0000000080)="03", 0x2000, 0x0, &(0x7f0000000000)={0x18, 0x0}, 0x1c)


open(&(0x7f0000000100)='./file0\x00', 0x0, 0x0)
__clone(0xd00, 0x0)
__wait450(0x0, &(0x7f0000000000), 0x8, 0x0)


mknod(&(0x7f00000000c0)='./bus\x00', 0x2040, 0x4f4b)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x80104301, &(0x7f0000000100)=0x641000)


ioctl$WSDISPLAYIO_SET_POLLING(0xffffffffffffffff, 0x80045767, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2080002002, 0x40004200000028ac)
r0 = open(&(0x7f0000000400)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f0000000040)=0x7)


mknod(&(0x7f0000000040)='./file0\x00', 0x1000, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x60e, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
r3 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r3, 0x0, 0x0)
preadv(r3, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(r1, 0x0, 0x4, r2)
read(r0, &(0x7f0000000080)=""/207, 0xcf)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x19, &(0x7f00000001c0)="65ea174f977310c6d2d6878f50da7f98", 0x10)


open(&(0x7f0000000080)='./file0\x00', 0x300, 0x0)
chown(&(0x7f0000000040)='./file0\x00', 0xffffffffffffffff, 0xffffffffffffffff)


compat_50_setitimer(0x0, &(0x7f0000000000)={{}, {0x200000000020}}, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
__setitimer50(0x0, 0x0, &(0x7f00000000c0))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580), 0x1, 0x0)
r1 = socket$inet(0x2, 0x3, 0x0)
r2 = socket$inet(0x2, 0x3, 0x0)
dup2(r1, r2)


r0 = socket$inet6(0x22, 0x3, 0x0)
writev(r0, &(0x7f00000000c0)=[{&(0x7f0000000180)="71f02414", 0x4}], 0x1)


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
mlockall(0x2)


r0 = socket(0x18, 0x1, 0x0)
setsockopt(r0, 0x29, 0xa, 0x0, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt(r0, 0x29, 0x33, 0x0, 0x0)


modctl$MODCTL_LOAD(0x0, &(0x7f0000002900)={&(0x7f00000027c0), 0x0, &(0x7f00000028c0)='<', 0x1})


r0 = socket$inet(0x2, 0x3, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000180)={0xffffffffffffffff, <r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
setsockopt$inet_opts(r0, 0x0, 0x13, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x2000, 0x4000e03)
open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000ec0)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = dup3(r0, r0, 0x0)
dup2(r0, r1)
sendmsg$unix(r2, &(0x7f0000000480)={0x0, 0x0, 0x0}, 0x2)


fork()
socket$unix(0x1, 0x5, 0x0)
__clone(0x0, 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_50_wait4(0x0, 0x0, 0x1, 0x0)


r0 = getpid()
r1 = compat_30_socket(0x11, 0x3, 0x0)
r2 = dup(r1)
fktrace(r2, 0x0, 0x4, r0)
compat_50_clock_gettime(0x0, &(0x7f0000000080))


mknod(&(0x7f00000001c0)='./bus\x00', 0x80002005, 0x5300)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0xc0185005, &(0x7f0000000000))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa71a)
r0 = open$dir(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOSETOWN(r0, 0x8030447c, &(0x7f0000000080))


mkdir(&(0x7f0000000040)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='puffs\x00', &(0x7f0000000080)='./file0\x00', 0x0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580), 0x1, 0x0)
_ksem_getvalue(0x0, 0x0)


compat_50_mknod(&(0x7f0000000080)='./file0\x00', 0x6131, 0x4000e03)
r0 = open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
ioctl$FIOSEEKDATA(r0, 0xc0184603, &(0x7f0000000040)=0xfffffffffffffffc)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fstatat(r0, &(0x7f0000000200)='./bus/file0\x00', 0x0, 0x0)


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000340)={<r0=>0xffffffffffffffff})
sendmsg$unix(r0, &(0x7f0000000880)={&(0x7f0000000280)=@file={0x0, './file0\x00'}, 0xffffffffffffff2a, &(0x7f0000000600)=[{&(0x7f0000000100)="de8a48ea67822ca3c103795a7866fbb10537016be808913eec32a751127d611df3e6959ea5558af228fc2a224a425c0712be6f982fbec3f5cbd6317f4f04379d0f7e5ed606751b578fa25da01bab6890e8772a3d1f7d9f03d1a8961300c749a2a6f21b808ed431ae48ec8da2305e1e72f2f47a22adfc4c981bbbb8494fb612c151a88dea9a7a99f9994092d50d27e617e73e42bc299cf06c9e02b85f2585f4d0876f3f622f465b6605652902ec48167a740e595bab8e59a0606a689ddba930e0b304880566b26a8d736c41c576fc8569dd66a77378ec1f82132a0a381922eabd79e4f33f0e8bee2969fe0d8172", 0xed}, {&(0x7f0000000200)="2f13387f2c3c39de4327bb8ce86403240585af109e68882b8efb154f464901426d9060ac34f941560fa711d45af3743a955eacccbaaf0099272a97719a89514feab431e448c9ce8b4a80e5d4e8e423f7967624a1b634dc58f52e42573af706a021699a8bd8ba909f41748e8c7f8be935e07af9", 0x73}, {&(0x7f0000000540)="4582561ebbf9e3f6e38fa57b1d6a4c36b9", 0x11}], 0x3, &(0x7f00000007c0), 0xfffffffffffffd3a}, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
r3 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r3, 0x0, 0x0)
preadv(r3, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fktrace(r1, 0x0, 0x4, r2)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x10000001b, r0, &(0x7f0000000100), 0x4)


ioctl$HDAUDIO_FGRP_GETCONFIG(0xffffffffffffffff, 0xc0106801, &(0x7f0000000080)={0x0})
ioctl$HDAUDIO_FGRP_GETCONFIG(0xffffffffffffff9c, 0xc0106801, 0x0)
syz_usb_connect$cdc_ncm(0x2, 0x6e, &(0x7f0000000180)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x8, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x5c, 0x2, 0x1, 0x0, 0x0, 0x0, {{0x9, 0x4, 0x0, 0x0, 0x1, 0x2, 0xd, 0x0, 0x0, {{0x5}, {0x5}, {0xd}, {0x6, 0x24, 0x1a, 0xfff}}, {{0x9, 0x5, 0x81, 0x3, 0x8}}}, {}, {0x9, 0x4, 0x1, 0x1, 0x2, 0x2, 0xd, 0x0, 0x0, "", {{{0x9, 0x5, 0x82, 0x2, 0x30, 0x4, 0x2}}, {{0x9, 0x5, 0x3, 0x2, 0x20, 0x0, 0x4, 0x9}}}}}}}]}}, 0x0)
msgctl$IPC_STAT(0x0, 0x2, &(0x7f00000015c0)=""/92)


__clone(0x0, &(0x7f00000002c0))


symlinkat(&(0x7f0000000000)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000140)='./file0\x00')
chown(&(0x7f0000000240)='./file0\x00', 0x0, 0xffffffffffffffff)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000012c0)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
sendmsg$unix(r1, &(0x7f0000000040)={0x0, 0x0, &(0x7f0000002380)=[{&(0x7f0000000080)="795a44b5fd76b27d1000b8dfe4e2b39d45bd999e9ddad9a6b0c8ad1a4a51ec92d3753fd0e4d0cd832751c75ae13243a69016e4c67b172e732964e091f9f9a82963add1202ca0a87b99b9ae53b60d454bc71c51f7fe325e68850f91259d9358cdc50f6c1f3d360a15bf0e027341de0c8a6a2884d9ef1a40f421f88e90783a5e1b51c926bb181578d94ef4d40862a431a70dcd9fc2638b8aaed947c799c70f4428b06b73314ae7d1184208bf68fdea26e4c7951f96b1e45cf6710d2049041f9ab820de06803ebe5a25", 0xc8}, {&(0x7f0000000180)="b18105852e1436e7fd0a0a578965cd8a7faadf3c62864cf05d35da190d636134ab26c939c8bc506b6195c65b35dda98e5979c163776c7074d1c5d8f3f6042a4d56b1f775279f6b18084bcdb87d30f2f6eb126dc53f83779bf20688f09ca54d846bd59098d38956699ad0fc102d472d2ecf9f5f777f5dbf6a61ce83121ef10f602f8b5dfdf1574cd863664a1bc9a8bc587a2b443c563a05e23619cee912800ee4b822a776c968bc4f30", 0xa9}, {&(0x7f0000001340)="81a884ca334988fa14c15416fb5a234d2dd91b1569565588cde725a8baa30853b3c0850a01eef07dcf52dd79b8e269341ae9aeb2045da81e3b75d91229066f4879d90cede21e008f20a39af573d6858e3ecb61dd1b3687f8de889b3971b9834e2e83e5213bdd58e1cd5b096e2f6752beca95ce873daec78f9d82045a238df19b2b67744640947277b3de2265ebbdc5b303179472207568735490618503381e181ec94ff41e964bba1c8ba7281c20fffa9b3d8b3088ab95825f408e4b84681fcc95211f2d552629e2ab8e340a7f9caee3f7f24f9af641ff4d6ef0247c5d84347dd2e47ec42a2094f74f298af38dd61664dda4cd50e197aabb6df433577986dc7461dff3f5b56c59ad11ff3212b3cbcfc6e1074558d27fac3a8244bf5a43f7eb6c386107f3612f30447b94e6902baab2faeb682ea2895e9b03b1e0666f03de14561e6cbfd6a3a00e53e343dfdfea25cd8925e1f2557b8d25c1b1a689a3c904dbc7f48c769abaac6a07d4e07674c25801ade17398f9788519ac53b8aa8a5e28de04538075c0ecb23fa9a11ce52aa311bbaea4bf0301bb293994e9e86508f4c3e7f7d59ba1c57a44644ada63a31569000e8d7df9cfd943e9fd69e7085e308ac6e5e18705d54d0891ee963b7471111abf7f6f05fda5fbf08a5a83235bc04536909081dee9483159dc509f662f2f99a6b69c05d1aebb52ae88e251c905031f8963e95ddf089f784bc0eb1646b7a3b99d844059e98297b129037258ba9e1af238268a898228bea5c078d44b0fd3e67c2f34b1ec86b34414cca9e386c83a9d272edabeb66cb4d0769d27eb43c7c4502dadf4c9cbe4204ce7d64620a8d7b7b1bb340eca4f32aaab259f0487398205bc85fded5109a00fa2e0f1103438effa883118caacb56e677d9ae86802e8fe3032626cc85cf9ed01327a1d95d07b1e7a2f0f3112593488b0ade520bcb85a3883ded9de795226c23678bdfc013d43d3769d15ceed4e3c11631b0ab79bcbcbc5f1778e97dd84af76568c13fbd0218c1840ce959f0b5baad77943e59062020d6246c0ac02fc449498735b68a6c14fa42fc7b532d1c4cfbbce21a2d9e8f942fedd702e1f40885e849a6b392824e1434a57d4022722bd50d67a09b0ecd753e51ee6eea99a8fe70a827cd2c2b4e7c0c5423b3450430063b284d005776f3561a132b0532e2bfd453e9c5a3a04b20e47e25837fb1cc9b38a692aaad715015330f332a809ca658603d5ab5677027ea44c3f097e4260007ce9598a6aa802030f2154afccd58bb4dbcac5971e9d0906f8846cb4c965a03641801f7b66acd7d51b2ce9b4befe591934fe9b7f157a062972457e6a00c226a7ddba4146b339d13369670912234aea5f6ed09b30b0a95c9390d0471f846f7bd6b775e1d830ec1779c53b79c2a9fe6a71a65abaff69da7bff0ec9f7ebf6c84eb570eaf319303a38d6ad0b0c2a07d3dc1f32e0524cb49efbe6c3e6622a063ba0b9e62569e58970b841ba13777ebfc8e61de7a12d55d32005a07731d70979a633e02b2068e86cc4031c90375f2b726acef6fa9d6984328b5725f717b36a38b373b78ee7a21ea5fab35073f95d2810dab4b94df4e452ad41893f05c688aed0c3fc028c118d57d4151cc3f678974edd64be90352ed6e03eafbdcfadf3a71dea6afdb98f9a81324bee13a72ee3c7337919934e2a0a4b049bb294ae1c3cd5c16c70ba8d28ff4ecd29c1a0a62df97f0070bd4724952fc3e2565519c2449da0f80dac7b022a1a860a972d4345fb93b8464fb6d2e89e146fcfd96266340f96b6983cd90d9b84ff8f42b017bf9bbcc7242f6fd4a12d2ec310b64fa9f735d7ea4edfa0a5e8aa0d801c63afcfa9b2828fb1281b7dca38966ad5df3b1684b411db9f90be8e5d8528424c04cf17059503793f394f3a5336dc6fb4015bc82da9dee0db8ef2279ca0bac95837a6d565401e391590da66d3b33fec6b3d89de22d732b644b4fd1202d6779eca5f5c5d2e6093eae46e9c7a5e8bea9693092997c90383bbbbd3822da8bff3e8b2a6e5b51ef2314e2558711896fbffce36dc6c50a122ee103cff2e95e247d89335f6dbb1b4e7de106e7eca36d54191fdaba5442b3f44f26c03fd42d1bec40eeeb92e8a595829e4984c6dce986157cdefb2a4a7a1343cca684781acefec1368be8fd57cb1388261f84a749795f3429a1f7ea4bcb74032e13a3125f1957c7fe86f0ac3ef20dd609de0fc722a8b04567aa8b0cc0d0c633982caae3597975381ad784305cc52904aef8b9d498e290004e0e341b6df4a17135ffb6535029eefc320d088e53b949edab4ea0ae362cf46cf800c160cd5a1bc292681641ccc7b1fb9d8a9d2f5ae421fdceaa934037de430e977a5f39e7f61fda6926af45e15d05a89ab527f00cd2cb35a3ae2e72d3d5123a25c5c6ce5323cdb1609e4b95e0be4d025e2a1ecdbd71fa1141b4c394cd9e34e303bbde6e23864f0b9ae66be3f41c73961521e557fba61ee35bd8abae0610061f3f2e5cff7dd4728061c8cb637d8cb52f0f14f21fd93bb486220929bc09b3f3d171eb4c22b87105f55bf37b9b7e451221b41e8eda8665c5d6d2b603ba1e238ba11aeab52601909e321fd3ed8091ea58e2e7001b248fa51525fcc3b10feec6980f2961d471c683387a0e0a624d9229bc484046f25f2cd9fe6d4da832ad409a3cc1e863186d88e32ca75c503caab884a7a534f9c70c5dc7e533eb8d8a9809997b3b624ae880e0c32f731b88a39f392da69592bc4a9c3b684f6001fd1a44499690636f86054dbc8fbd6913b1671da54daf1de6cb49412bb987cfa501f7594c4de970f124ede76b9faf7d3ca5f01375fdcb9c6ce061c424067c584a493d55bb99f5323f8e6277e16846572f0795e3d0d0abf55dca58cb4adfc39595541e4274f6ec751bf282c8e4e34dd37735c91ca557e8716ab54ab9edc3cf83d652db584edba92b419ed8d32d3a5a150d92f5e7d73a1fcc90068d9dcb97232c7ba4aaba6a27ffc5b1e23a55ae92e8d1cc714bdbdedd97de8b025f2c4cea1b5e9982b1abb6f15de5fa48314e2a90dd1c80f0ca81c6a8523b48e90f576f96b936855c275f4368ca1f4fc00697ed13fb8d608670d4117ed19612e67b354619d7ce6bd55d4c81c92df493c6d7a890ece9855fc8406a6e3882c270681458f6f66d58a7c45f9d4a99baa71ea82c16ab477c1a37cb43b410fe49770b8e4448cb9d9d7e27dff637e1b8fc5316bc2a49ca33d0d57ee98f1eb2c1429ff7677e6f5abcadfa055d8a69b8e31ac1cb1488bbef18912433e0ea0ef0c2093a9f3ab0403aeef49f883ada96bada4aec2d39237e303cdf5a7a4246df64b261ce5d26e6c4894c2da5fdb9410c07ed495efaecf5fb723e2784e5470818472f570eeab402038153291adae63d458fd2bf19b9d735179d52df6742052e2ff0c92311f47922b7a3f077afd284aae8b552596d160e0f10e7fdc8c91a72dcc939ac5bb12f6af4963cf239e85111965faaa98dfaf7e76841c88e83d5d8de12ef763e116e3b961604728f5a272d926590f4aafa7b3598d59d4c23adb9b8d46f1821f23eff90d5b489ee907b57a7b2634d5d9c39abc1e41f251bfb00b8f6db6f2f55985e9f1f515855b209a3e2c2892dd8b54334a215fca52aef1815b1ab6c7b2fb767f0f19d1465b46566e836263b3a264cea330a6f7d122bdfda5c2135884e40847676b7dc9efd4774ac8519a3a798027969e4a14cbfaed83ccf934cbd839d21eca3e2971953a830db45d3ccd903898d1f239473017d7ce6f6cf269050a402d2ad684419cf048112db5cd6b5ba2d5d6cfba9f464b0cd5c5fe1fe058542c86c8212c52ee96393abcaa81b1a446dc53d46e139721d9359b0932925c406699ced653d3288090d4703a52bd3dd2d688a89c9d0d4140bb66cd440954fabdb17f8143c3956d6435434e03523f55a3305d033b270ec018ebfa76d91adea1ddbee289674cc3f224c823d29653ee3c97c8e4417e0e8738cf255f6f9f9429b9f03ab12e38c014815a51a3a559c214c60cd75fb09b5c9620f1fa9c92b00fbe181100179ff5b2496614e88a3c5edb97d5883f12bb0200980d98d63f6e8b78b5572040332c65ac3bbf9dc08cb1f0994dac9a8aa37fd5614f775bdbb921f97496c6a17adb3028a4f186e33adb1bdb7d5eb5ebecb2b3abdbce8447142f272b15fbab740f8010cc22f6d78418ba791d59c1f94033e93d4747f6c2429d6699f1e08df900f8b75dd747648485cc1956d31e65240aefc96d26d47ec4c41e0325ea0f80b3b2f3286aa8a92f3f61874d91478311cff7e5fb2d0d98e82b3ec416355d822fcce74a5e2997d5edcad20e9b43daa83e7d26efd7478412e4a2a1a23fa8ef1e3c2c9924a91339e2ef9e1b8887bfb6c789cb2cc9610a3e836fbb34b4defc7a1cfdf0629a1e5132ba80851afe55e430991773928d264b75ce75e322332a7d8d1e8ba264f5f605b05c8581fe8eecc5c9ac13f48448d210b7b9e086e1fd8bde60e11aa8c65151567a4ce15e902c2215684fe21a29042e7e95f648212cdb0cfed9d82221c9437197bcf2433a021004039f76de4d46af89003762c140eb4f13a8fcea53f6c5196dbb5b11852033d8d7e9a7a63ec9c790304f2fca81abb49ca4e8c54011ce29142ff798fd58bf61b46615539fddeb0f54c8ff396248768a6e5d128f67646923246cc44e39ebd6c278fed2dfde34d313c39d3397ab6c0ea308aaa4766c057ee3bfbdae9a583a6533761379faac5c0f0221cc45b1462430598156cbc90bc2467dab4aa5ced3f0d3f07802e14fba71a020bf9d5979417b987115752efe0f3af53174168d9738c199ac4606c1d17b5a226a48377514389454a65efd8b59f265032f35770bd0cb3a3bd784a3e4a175a827802601edb3106dd2582dbed2252668f41a40a42330de5c5c0ab7176ad970aeab61dcfe73975400db10417e907c55cefe8d79dbeead73225f01770103414a96ed806998421daa88e0fcedf9d05e3e6ddf43a3c158f4215164e590b77f6524f6fcbbdcc8dad367829fd490dc922b4f4b52fa8f6495213039cd8f45e87a6c5161a49d0c61b40b44fb2d732811f0b36dd834ccca20757afb2ff2dbbd5e7dc3db91106dac2de482191cfe83379cfd897b600e022b8b74efe18586b7a7129e4edadab92c79d8583db6ff2f9806ad60647e8658eb7fe8342f660b4ea70c5950d73d8c61e409bc9a9f7bdb623cebeef5e00da8f447145c705804d7afdd2d1db453e3be91cc9140b68e40866fefb6811d009e650ac19e7733a8339687d1ad11ef6c3d99fb81408c19f2a5c27e903534a489651312a158e1afd01cd6fe69fc07473c8f495247608a92a88df9035d8c5662e55cf25d3ed1bf128ffe08190d1e026120405896e47121564f3c427e3ff18f91588845c03f2c7384cd16a184afd9947b09099c910184909cf7e371251da2ede465bf4726bd0ac8bcc591652f054b12b896f2369fc5fc1a503ffca6e48e7fa623bed4aaba259da4c6e1dad97eb3ea4df09d67666f63ca2ac5d7375710f515b9a205b243fdde993d2791288dc2a796d98d3e9852991b89b280df9b74af5651b0cfbdddbb6f21a1b757f2dae00ed94f65e9a13d1caa1513590db77d34313133f9e7bf15293b3d17360e8a7f1477e7f298d1d7723f86c95ff8f07523cc089d47dc10fea732cb7097ce2351b33f6fc496f70620e4887d5bd8cef5e864717c5d6788613aa2927988bf6b5a5de2c8d1880071f7d78d83738440bf7223fcd16146cbf12888893573d789955235ab994d35575c680106", 0x1000}, {&(0x7f0000003380)="3ea00bd9cdf1b1c62e6911d1d7082a0bdc723d0f1a42729d1ea62f2efcadc78f554de5664d71611cfe1c70f703d2794287550e7190608c7546fe0b47b4d06597ed78772d637aad0f8f7d783fe8b961ffc8604cf61104e655dd8f25f36756e00a3f4d486b8d5601eab9878dfa17cd1e643cfe2d7b0f8e4f6db5cf74ab4eee291c4f8639700fd96445754cee64014b1fd5a18848ee3c03ecd844d147c199cf55b4156f74c589aeac7e9452fc4f5ecb2a05b194285681727314f698b44e8a0816d42c326e9f2d167b5bb28bd714c8d8cf91b74926f9bf144eaa7b1bdb44f8345f0ac875fa9be451b788091e72963c984512fb7467a5b984a20e58bb5d652184b6b464a388f8f82929836afce5ce1418d4ff7ded7027afcc5baecd75a6a11a294f8a956e1acb091d9ffef3818b8b215aec994f68dce46f677f7f1f603db14f9ab7eb63dda54bb40ea07d04d756938d740f920a72468dcee72a86a40b8682dc7abbe58f65582f6736218843c2f5e53b8d14ebd2fb12d9d1bd5b4cfd711d7f30b6921b7e8d3d2922f41cce13d98d48560913c6c0156714498e8c4e9365ea56ca71afe900675f80464df80192e6d7f90597adeff23c019c524b554358219195a2f8e7b3b83174c4cb31c9c5c5c6559209fa2ab88c832548803befcbc09b2d143173eee3ebc501a32df04681a362dbf06e34717640d7ff7aaf5b39a920daf6e4d5c261a50bece98b71c121eaeb69ba0497edf44e8288f53b3767eec07a701cca579274adcd73d6d5992c2ac2362fb0483393b10ff6efccfd93dba5a06feb58180f95de2c58c28bd8b5d0ad4b7047e8c6e6c3724081b61350e40244f8d96dd95dc734114d33154b137f80ae58f0b5dc5d8a06859758cc757271d9d4402f72f1f292021d6a38b762604b32d95a696d682151aaf819718dfecd8aae6963cd1f214218bba0013e3742cb8ff79e1ccc0ba4d30f2b70774a4c5e71852ca2d591b8da3e50e5a1ec3c3aa88ff353360fcae51ba862c05c23b1e6ad83521b9e7e943972a4803316793ec6f43522e4816518af002f605049906c34b43da719c88a53eb2474c0c4e986e5b49c89285cdacdfc1d2a54b0ffc8133933436bfde5d61882a0b7450daec22180f1272252b3c2a14c5284e5a227242bf7f36c95e9e7d6a5de1273a97d939d7a85f18bb04f4b63afbee18195b054a7d9a1c067445c5806cdad0a4a9c78d6e309ae0b7a0e85aef4bf3bed769c338f3ba33dfae0da2f7bce966f0fcb41901a0673987e8f11d8c7c2bafd124c7f9c6dd2149bd1c7cab1f9bba61fb8bb08965bf716d8b3dc2b2c6db3ba7e4228528d814dca11ffdda27bc5a7a7b5e598869d15cc65d073e84f13794c4fa03380e8654c5ef5dbdbb1ef0fd0776feab2572281c8ad93f9d68ea6db3ec3ae3314ca0205caa00f3a49f8ccb5fa8aa01a5b712ae5290b6db3dbc3e751efd57e9f32ef8db2cc4c97b38045adf70e9cc6aa9bded732674ea5a63aa75d417e833c7966b559910a10f1f8979a862f697a08b903e1ddee09de52856b3a0c89f5848ab4c6575812fab2c68a4bd9067e42ca70c1d245f83febfb5fd845e67368cfff1f11899280ea4901021178d6260b9d66aa7fdf8a8b39e9870ad3712b5a35b148b31bd30ca82aabf52f5ded51a275152241a46c067c2e4917120b66bdb5202306da14817ae3c87581c1bf98449161136ef804b01887d36269cc5214ed9f7be8e45889783018e9cf092b4baf5fac26a90aa1e909b6f6560f4c68afbc9a7fe0d2b7d178659a844d95380ed0716a3562cbff206fd515f611878496563d3a272d76ab134b69350fad5aef166c7644430b8d69170b36148da9bb6466f2314a293a453fd5507bb2f455b0a51499db5c8cdea697333da9c705a3217744a5363fb44fbabd662c2ec3a6c775ef92ef0e9496e656f1a1b4097657212533e609f548bfdd96c46b2706dff4ba606bec86fa979a36091b52a4f72cbaa51640f46fa762befdb07c781e285c1d28b46e789584c177bee4702a35b084614f044cb597a24aa9817c1cf0eeaeb7a3f4052c21c017e0cc3bce17c61602f7161758b125feba791fc0dde2044ffc37f218ed68dc64d1a4ff7743aa9f559bc5afb456a21dc71c77295d1f65b0c1a2c5bf8db20763416399c5ab4e0ddc84271cc2d2f908bcd3e12525b4a4f352adf023df07106426121ce5e2dc130e32b161359bdf20423f2ae5c293e43f0e517d57940a68846c5376c3a9b914ed9c6157182946ee2c3f4cf0d79f0bbff3719938074ad73fed1a43fa9fc0dfcfaa2be84b553a3059a339d6e4dee3133519467f5481560d0aa16ae1de18910f6979749c9de946175b090512262e58d918ba4695b85a50bdc5b507388c0a2d922778b9357b00064b270700f3e3d0904a6d436e25f08c09965c02cad70070a9c8eb4cecddc234a7f68eef640466a68b499b3aee294fa983e160185ac39ed5e084bd268761e3d98771db9ff83c4e902bd9b1bff7c5726244ef5baa85ff0b18bfed3f8060091dc71ff308a4c412caff18b9e8b228225dff0be53854bac85517728638229df802c578f64271ddb73b00bb61cbc16b97abe27a1f6be84c8dee8991e83deee1f85b344eeb19b5ed1c101142898bcbbf15c6f19c77db263a0fbe5a0d01d6bf13e16f4efe5b5fc483718135c7ee567bbe77518164e78a49b9a54c2921efa09f3151b5690effc96f085d132841cc54a94ec51850dd9cc7011e9eee82bb367fb0ed833e8060f3df3cfaac6ecbbe7c4ce4dee5643ea6cdc0169af502786677a15fe4232e67e32abc90e9d4cd492f865fb0b714e4a2286156fadcebfffd40c152b78c92207b6328f7a10cca6a0093de35399ea68940f7b0254a178dc00f08eabd22e92c76f6c493f4a85f1ca155c140695b704fb8d56a14f935c5852aaacb538800783f245516223de979b982ce3733631a27ecf43ab890e4ab4e4288dfff32a47560c13370922f760d0613aa66b3ef07a39fa39b20f3db9e20192f7faad6eed899a7443de3b36f70e4e30df1e37fc1a6c911c1d2922d056bc3ea968d6617a40264ad6fd8871c4407cd88e38a0fbd062faff1aea94a73778435c37f20bce156baf10aa5dd4296cbc8064d0e768e8d7a2af149268b8957a0e1f980f44c2fab8e2777cc293b9e6fff70f9ed7ed4e58b9c90e7de39a48575edcc2771c7ec5f5999ffd44d54fd2bb9836b5b0c021a57d6597b994c45ec703928ddffd999d49893bb57ee6f19a68f8b1fb72b3efb5d5159752d2b47dab9808d2096be80608d4a1ead8c553933835fcbf9035d3a2b2cb5f4668e7729c7e8a737773a98ad178f6353f559eb7b0bc67bab9330dce79cd2b3b8b1f29c3bd3247760edd9fc0c122b6953f26f68acbd6abb14fc806333e32e9863bfa001bc054348c9ecc32aac38256469b8e343c113864a887077497a259648c7e2656efc595153a6872d535a776d1cf7fbf411a84ce2f7358c2d7251d49063466d217907d1977ab972f202160b69838f643a3dd3b87489e4b664646a0129131e0209e5ee1eaba085f9492a9e3aa8f6d52c3cc23994a2b3843e5cd58585ef2e2cae424d9966a5dbc7818489fafa85d58e5609681c8bc64d641965d7ab48659314dfa035d4d3584d5d4b3651214074c4c8fad4124f76419cab4ab07f934ea9bdd17e5e44691ed5abc122f3df4939296d76471d1b0949434ccf2f18476218182341892ef892054bbe2bd0de486aa7abdb9d75d3dbc8b2cae80b54963819ae68edaabc4ee0b76592d5cf0d3146e85d8ef9ececc01840e05b5a6a07d63ce42e075d97cdd20b2fb64e3fdb6d1bc58b0304967b6c58c62f2fc9e3821c94346705dd200cff9e94b4446cd5d3d4ad46a1d17e67511e06c9bd45b54ae92956d51008eee945362d4bfea54ba28f507a4efaf86841b82066b754c7a5c7e48dc6d10d294965fc04db2d70e2df152b09731f55a641d084e69f4df377a83ba79088d29771891350b848538ea8141eed4512e5cbf87811213b576d5f0f72c74ccd4523d0f526ff6159e2b34bf3fbb68a787222477fad0357d052ab99674a8e2e2c7e00e26391ffc7f2e06a61f6ba7d75ad2ef2e037b0acde99789e123895d6dde3fc1c43c8f80550cba11bb681325808a1131698a6f9869a361288133ac45ecfd26b6b72151282607b63dde0298892d1a49f835473915b52f1b94e16f631fad20b4a02625eb6bd486d08949a54a629d15b18a146926e30127b684b511fff80d25437e029d069e127f11fc2cf008d7f36ac54020d366c58a1297aef5f85f0d4120699f574914030241d8f6210a61a4b4f12e121487a464160f115847a53da15a2587cc5f4e609873ea65fa71f8af161ad99939ede01f419778af61b0c0331f9d451aa7de7f40c97132a21468b0ab0f7c196f7e45d78ebfb2b9d1c1b48eda780f5e74df7a736add9b19a1eeb123e421210b3b579fb67381f56d15afdf628431f96da84da3a46cac58b7fcf4d726a85f22213341d5cb88cc1ff25ddf091f71edea3b921a6644264ca05f87b7ca7e8e68580c2b2b4a49b8288c97e966d33247658c1e9eb029b6d96d3e481e94b5362b45dcab153b4a90d3954321b48d146643cb27e68933150ae7f18ed615c1fb0e4f5ce8e039a203db7c956cee70a60a32a5bf8860ce93831159304257334aaa5adc4cb412dcc24f87928bb9c7ce4be92da8a9d68e4464827269a5241ff85cac8e729ade94f9e1d97724d5aa6446f1cecd5d5da8b046ea4a7ccea394fa4d6f4c057a5c0494fa0a5764f36c4b77ae2750e31d294858867050ea7dee70007cb8cfd2703d28a6907a39de80732c051e71162ba7ae9a298864dea847b88d89e24e9a50b463165a9bf50d3b357eb01e3ec2f5de432d2c06836051382b7ed1be1d929e48daea85615db68251246bb9b57a414e3f17715b2ffc29518359b4a7aaae9f863eda2eb05f0dbcc394b19756d5d20fb84e8389140c7cd977ca2128920d437ff5379bd4554917e2cd849351a4f00e96f5a7442c4a1f96e0c35503a2ad131d0eb8a2cbd2c9d25903eacc370e6f5dec14ba0d6a797d68b2f2e83af367028a5117e1239045bf880858ddb9557adcea94ad7e9c868fb02202fc8ee0154754cc6073e1d7a540c87df077cda13258b62c994357ab02bb7c7e3451437b67f5ae246dcf21c297c22a766aab08054e93fd8168c45dfd73ca52b062ffb89a77045d81ff8079c541c27823637644ede9df213ceb931ecd7041a821027a645df4a6a162229783a0ddcf368b74c947efe", 0xe90}], 0x4}, 0x0)
recvfrom$unix(r0, &(0x7f0000000240)=""/4096, 0x1000, 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000000100)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
recvmsg(r1, &(0x7f0000000140)={&(0x7f0000000000), 0xe, &(0x7f00000000c0)=[{&(0x7f0000000380)=""/4096, 0x1000}], 0x1}, 0x0)
sendmsg$unix(r0, &(0x7f0000000340)={0x0, 0x0, 0x0}, 0x0)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0x40046485, 0x0)


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0xb, r0, 0x0, 0x0)


r0 = socket(0x1f, 0x50000003, 0x0)
sendmsg$unix(r0, &(0x7f0000000800)={&(0x7f0000000000)=ANY=[], 0xa, 0x0, 0x0, &(0x7f0000000700)=[@cred={0x20, 0xffff, 0x2, 0x0, 0x0, 0xffffffffffffffff}], 0x20}, 0x1)


r0 = socket(0x2, 0x1, 0x0)
r1 = socket(0x2, 0x1, 0x0)
poll(&(0x7f0000000100)=[{r0, 0x2e56c061b4eb0e7}], 0x1, 0x0)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
dup2(r1, r0)


symlinkat(&(0x7f00000000c0)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000100)='./file0\x00')
unlinkat(0xffffffffffffff9c, &(0x7f0000000000)='./file0/../file0\x00', 0x0)
fchmodat(0xffffffffffffff9c, &(0x7f0000000280)='./file0/../file0\x00', 0x0, 0x0)


r0 = __clone(0x0, 0x0)
compat_90_getvfsstat(&(0x7f0000000980), 0x8d0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x2a, r0, &(0x7f0000000940), 0x0)


symlinkat(&(0x7f0000000100)='./file0\x00', 0xffffffffffffff9c, &(0x7f0000000180)='.\x00')


semctl$GETALL(0x0, 0x0, 0x6, &(0x7f0000000000)=""/4096)


mknod(&(0x7f0000000080)='./file0\x00', 0x8100800080002002, 0x2e00)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
lchmod(&(0x7f0000000480)='./file0\x00', 0x0)


compat_50_mknod(&(0x7f00000000c0)='./file0\x00', 0x2000, 0x404)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
write(r0, &(0x7f0000000080)='N', 0x1)


syz_emit_ethernet(0x3e, &(0x7f00000001c0))


__getcwd(&(0x7f0000000100)=""/27, 0x1b)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
ioctl$FIONBIO(r0, 0x80047466, &(0x7f0000000000))


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000340)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
pathconf(&(0x7f0000000240)='./file0\x00', 0x2)


r0 = open$dir(&(0x7f0000001240)='./file0\x00', 0x4000040000000242, 0x0)
lseek(r0, 0x0, 0x40fff, 0x0)
r1 = dup2(r0, r0)
writev(r1, &(0x7f00000004c0)=[{&(0x7f0000000180)='\t', 0x1}], 0x1)
pread(r1, &(0x7f00000000c0)="bd", 0xffffff78, 0xa83)


open$dir(&(0x7f0000000080)='./file1\x00', 0x0, 0x0)
r0 = socket$inet6(0x18, 0x30000003, 0x0)
openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
ktrace(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0, 0xffffffffffffffff)
sendto$inet6(r0, &(0x7f0000000080)="86", 0x2000, 0x0, &(0x7f0000000000)={0x18, 0x0, 0x0, 0x20080fe}, 0x1c)


madvise(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0x0)
mlock(&(0x7f0000ffd000/0x3000)=nil, 0x3000)
mlock(&(0x7f0000ffc000/0x1000)=nil, 0x1000)
mmap(&(0x7f0000ffc000/0x2000)=nil, 0x2000, 0xd49f275d97cc01bb, 0x1810, 0xffffffffffffffff, 0x0, 0x0)


readlinkat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000640)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
shutdown(r0, 0x2)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
compat_43_osethostid(0x0)
r3 = socket(0x400000000018, 0x3, 0x0)
r4 = socket(0x18, 0x3, 0x0)
r5 = dup2(r3, r4)
setsockopt$inet6_MRT6_ADD_MIF(r5, 0x29, 0x66, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000f00)={<r0=>0xffffffffffffffff})
sendmmsg(r0, &(0x7f0000007580), 0x2, 0x0, 0x0)


undelete(&(0x7f0000001080)='.\x00')


symlink(&(0x7f0000000080)='.\x00', &(0x7f0000000000)='./file0\x00')
chroot(&(0x7f0000000440)='./file0\x00')
compat_40_mount(&(0x7f0000000000)='ptyfs\x00', &(0x7f0000000340)='./file0\x00', 0x0, &(0x7f00000003c0))


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
r2 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r2, 0x0, 0x0)
preadv(r2, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
writev(r0, &(0x7f0000000380)=[{0x0}, {0x0}, {0x0}], 0x3)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x0)
r0 = open(&(0x7f0000000240)='./file0\x00', 0x5, 0x0)
__getdents30(r0, 0x0, 0x0)


munmap(&(0x7f0000ffc000/0x4000)=nil, 0x4000)
r0 = shmget$private(0x0, 0x3000, 0x0, &(0x7f0000ffd000/0x3000)=nil)
shmat(r0, &(0x7f0000ffd000/0x3000)=nil, 0x0)
shmat(r0, &(0x7f0000ffc000/0x4000)=nil, 0x0)


munmap(&(0x7f0000ffc000/0x4000)=nil, 0x4000)
madvise(&(0x7f0000ffb000/0x4000)=nil, 0x4000, 0x6)


mkdir(&(0x7f00000001c0)='./file0\x00', 0x0)
chdir(&(0x7f00000001c0)='./file0\x00')
setuid(0xee01)
__posix_rename(&(0x7f0000000000)='./file0\x00', &(0x7f0000000040)='./file0\x00')


syz_usb_connect$printer(0x0, 0x2d, &(0x7f0000000000)={{0x12, 0x1, 0x310, 0x0, 0x0, 0x0, 0x40, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1}}]}}, &(0x7f00000002c0)={0x0, 0x0, 0x36, &(0x7f0000000080)={0x5, 0xf, 0x36, 0x4, [@ssp_cap={0x14, 0x10, 0xa, 0x0, 0x2, 0x0, 0x0, 0x0, [0x0, 0x0]}, @wireless={0xb}, @ext_cap={0x7}, @wireless={0xb}]}})


r0 = compat_30_socket(0x1f, 0x3, 0x0)
ioctl$WSDISPLAYIO_LDFONT(r0, 0xc0386208, &(0x7f00000000c0)={0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})


pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r0, 0x0, 0x4, r2)
r3 = dup2(r1, r0)
r4 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r4, 0x0, 0x0)
preadv(r4, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
r5 = getpid()
fktrace(r3, 0x4, 0x4000, r5)


lchflags(0x0, 0x0)
mknod(&(0x7f0000000140)='./file0\x00', 0x8000, 0xffffffffffffffff)
lchflags(0x0, 0x0)
compat_20_statfs(&(0x7f0000000200)='./file0\x00', &(0x7f0000000240))


swapctl$SWAP_ON(0x1, 0x0, 0x0)
posix_spawn(0x0, &(0x7f00000015c0)=':\x00', 0x0, 0x0, 0x0, 0x0)


r0 = socket$inet(0x2, 0x1, 0x0)
getsockopt$inet_opts(r0, 0x6, 0x6, 0x0, 0x0)


_ksem_timedwait(0x0, 0xfffffffffffffffe)


open(&(0x7f0000000040)='./file0\x00', 0x70e, 0x0)
ktrace(&(0x7f0000000080)='./file0\x00', 0x4, 0x1436, 0x0)
getrlimit(0x0, 0x0)


ioctl$WSKBDIO_GTYPE(0xffffffffffffffff, 0x40045700, &(0x7f0000000000))
r0 = open(&(0x7f0000000080)='./file0\x00', 0x200, 0x800)
ioctl$VT_GETACTIVE(r0, 0x40047607, 0x7)
ioctl$WSMOUSEIO_GTYPE(0xffffffffffffff9c, 0x40045720, &(0x7f00000000c0))
syz_usb_connect$hid(0x1, 0x36, &(0x7f0000000100)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x2, [{{0x9, 0x4, 0x0, 0x0, 0x2, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, 0x0)
renameat(0xffffffffffffffff, 0x0, 0xffffffffffffffff, &(0x7f0000000400)='./file0\x00')
openat$wscons(0xffffffffffffff9c, &(0x7f00000004c0), 0x0, 0x0)
posix_spawn(0x0, &(0x7f0000000d00)='+++]\x00', 0x0, 0x0, 0x0, 0x0)


_ksem_init(0x0, &(0x7f0000000000)=<r0=>0x50535244)
r1 = openat(0xffffffffffffff9c, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
_ksem_destroy(r0)
_ksem_post(r0)


clock_nanosleep(0xf5f3eec2e31ccd5f, 0x0, &(0x7f0000000000), 0x0)


r0 = socket(0x1f, 0x1, 0x0)
shutdown(r0, 0x2)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0xb, &(0x7f0000000040)='\x00\x00\x00\x00', 0x4)


mkdir(&(0x7f0000000200)='./file0\x00', 0x0)
__mount50(0x0, &(0x7f00000000c0)='./file0\x00', 0x0, 0x0, 0x0)
compat_40_mount(&(0x7f0000000040)='coda\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000000c0))


openat(0xffffffffffffff9c, &(0x7f00000001c0)='./file0\x00', 0x200, 0x0)
fchmodat(0xffffffffffffff9c, &(0x7f0000000100)='./file0\x00', 0x0, 0x200)


r0 = socket$inet(0x2, 0x2, 0x0)
setsockopt$inet_opts(r0, 0x0, 0xc, &(0x7f0000000040)="eaff125c00000001", 0x8)


getgroups(0x8, &(0x7f0000000040)=[0xffffffffffffffff, 0x0, 0x0, 0xffffffffffffffff, 0x0, 0x0, <r0=>0x0, 0x0])
setuid(0xee01)
setegid(r0)


modctl$MODCTL_UNLOAD(0x1, &(0x7f0000000000))


swapctl$SWAP_ON(0x1, &(0x7f0000000100)="d178a8c2341c7204f339467a8ab25922f96b9dbd1d16577db0ad4709b9bd49ce67c0d8c07e5d2d8a0225", 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r0, 0x8010570e, &(0x7f0000000080))


mknod(&(0x7f0000000080)='./bus\x00', 0x2080002002, 0x40004200000028ac)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x8004745a, &(0x7f0000000a00))


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000001200)={0xffffffffffffffff, <r1=>0xffffffffffffffff})
read(r1, 0x0, 0x0)


r0 = socket(0x2, 0x2, 0x0)
setsockopt(r0, 0x0, 0x3, 0x0, 0x0)


r0 = socket(0x11, 0x3, 0x0)
r1 = dup2(r0, r0)
ioctl$WSKBDIO_SETSCROLL(r1, 0x800c5718, &(0x7f0000000040))


r0 = open(&(0x7f00000000c0)='./file0\x00', 0x10341, 0x0)
writev(r0, &(0x7f0000000300)=[{&(0x7f0000000080)="ef", 0x1}], 0x1)
writev(r0, &(0x7f0000000040)=[{&(0x7f00000031c0)="2939e1897f1b81ceaff6c61f34ba2acd56f346c1f9ac83b95b2d7fa2c231f30d8b3729d4e57191a94adcb6326fb3d39fbf56df75d9295c4d228b41d7133ad84d91fa33c2e7dd56af64d543f9c1eb50f52dbe4e948da539be0fe181cd2a85923222214ff3ef0ef35a8cb4ff7faa4863610161009d80e21ec17f826fc5afc2c7b5991f36c50f2ae4e310b7e115afbfeb03f7ddf990e6c674141a588236ef8a1b2492ea836f2e9c62ec3a1026e80dfbcf211a3f55e717ea12b33f3f6f803eb84515b083470bb7d5814cb4f20711f69e9dfd7d482a1f1d7c0f85a42789957c067b9bf94f8eab825bd36cc45d6872ebb5354e90f77e230971ff851c5e412ea279d3a844de465b78614b62b1248a36978446600061e80c8b15aa4bb4892d5a268e3a928af1a0f510cc37f9bdfd888b559296fdff616aa9e1ba71f29db121dd3cbdd92bad93b16874df688147a0f988c4aaac4bc0d3bc", 0x153}, {&(0x7f0000000500)="25bf07e15526432593072af4ca1abb356c68691bb10ee507420b686533eb6fb0ff04caf6e3d05a480e2041c8229d9fd93ddac26d49fca6eb67cd70eb39e6ccb3d693c6aff914561792877095ae7b22e42dbe832967393e87a64f76052607877d47865c58ee5544c27d3282de85510f47c52d1a10224585046816b1dc06c5cbc90b15cc78e287a966937aae5e0ecdf9e283924a60a566dd62fdf1bfa5a309e9fb8472561fc37eea5e09a171aea2d5704b883f70754cd9a095a9f3cdf3140bce790c6eff0448cffef6921421cdf946e9774ba7cb6b214246a04dc4e1ac08bac7a40440e1df1e1e92648de00bbe54d605274202930da780dddbadfd00814467e8c13a152fb80e9b50f4523cf21123427738cfb5d74a74a361bdc1167799261248de205d7c4fd733b32e6c25d63c279f172d527581e05fc5e61e7dd105aff98de22833a9ea725a04c274d01d5634153738d175474223890a8d41d34e7b3b9124950474cf13296cab9040a17016f4f0dad453a0ee479a18cea90ea4f72fc52bbea7fe716e8ccb8dfb25d70d4802b7067d3b95bf93001f240c3af87b3f29683e13540533ce5db6cea2fa35b952c07f37da693df21469a2d2f763c1bda0c7b33d0c72b4f106589c8336daf77d81ddf9949a45fa9b811e103ccef838d71c933c4fc8f5df3d6bdc8695ee9839e60ebe8d30c116f524a60312d32ca9ca8d62c57c4ffd20e1de757247e0474033ba22d629dd9289740312296a905b2d4ba71424d067d3a64d7cc976c39a6ad11f0b56a31cd3e521295fed8802e20a56d47d6015e42f822bcb1aa21fc944c0c6481e1b51e7cebb0d6598f859ad3a109ee1aa952c8055ab345e907c29974acd2f2148fd4345554c5a026ddeeac43751b3d24b09d549df608bd4586ae52b6a7216b72fe6c05162ca98e5d934dcd6c734e68ca57d343b9dd33011298543fa337b24cd7fecd4d7e361f781c3b7f8c5f00d29a0843058e687be57b0c44e6c57f816b9e1f4407f4c044c5437e10c177be2093a420e2e5ee4be6064500f67fa0197e5358bb2857aa10949ab797287cb82ec7251952aa6b3619719bd9b687ac6c691bd8ab36bff106752c048a9214eeabe2081dce9268815ea35ac0c96e3031771398e51c05fc8e11e16504af2f548349705e00a1fe4ed02abd79a89266a40cda620746ee4b5891e69ba35aa6dceaeafc64a219b73451e9d964fb67d487bc8b168488b04f01e9da750c3cde4031368c79e3bab8af137bf976a589e2b7a5fea2dc2f8eb801ff82245fde8f78ddd49d94b44f1509194893595badf45f3cc8fc3a736da8aa8b7aafcb24972d5a4a5596ba9cd04c1bcf81cfb2866fc79656c50d7e06571804ecb119b2448c15c12f79f0ce88525a1245ed82405aa0bac4ac5a66ed7a47607e1488a10e56461f9f623e40f861f0ed2ddb1cbdbeeb9b45e4edf47cf3e9a24e6a80ae8eb26ea3b0057afb0d11d98bef47f485333b24e74c5753aaf7aeb6ef5fa79596b0481aa857aa6a7180113533f32c729dcf30530e60c30875dd2cec9869e1af12da27b326dee90adcd6d53a1d3ca3ffb477b066e67db1788e340f5900d8a5556961b487b29d92b805aa53241c375d7ac05426d0e236a1a61cc8a2b61080dcb9647dbdc6a4f7ac88b68e8ec0fcc4ec0fe800d06758014c2a87f69a38f04b799b0afdb1c6476edab8cc5e870e147786c524dd77d08f5a63435218303f39a34e204f65f417eb2567bd24a3b9b17b7245399d4e6e404a84c306a62843853f83c59544499756775a49124fa82449fd95d9486c6bf4c8338910a20a9270aba9a339f5d9e9fb3d21aacdf129640952ea84e862c3a40d2aea992b11bfe1d2815ae77d57488d280b79ac8fd5746c6f6cd78303c8787c9e83f0be9cbb50f1203ab3875bd79af57940e799285a20f2954ba3cd7344598e103124b2a89f8289333e45f5d7daafa57e2582faf8438d6ceff1612ea747bdeac067e27356f0e8371166bd015353674e661845f6e5c10a901223142132caf834986b305d8a132cc368e0d843a3375879e76c1aa2a8291562f30eeb0a02bdecf4c716c1bf33526547651b96d4b698af0f42840d5d9aa479de9213f9f24251ac48eca991cded1dbbc78fe940237444bbc19e4cdfb362a7a44adc929a489d0e289da5355ad697120e7a1b91f2981153e827808be59dd37648c52f50e7c562bbb58c606af4c7f1830e976a506aef61997c85f89930abb8a5128e6a24db7d3d6ae988d7d8b63e58ea3c51e6b6dd45978c7986d2e4a39010cb0c59631127e152689559d10496dc871cd724dc379a1e4e21d30becc3f12a2d93ac9462cf1d8dfa5d1deada6571ea4ea33b3c13cf5118b43df805f568bf9cedd871097c5569d973e12297a17489190f5bae278caa8d05d045952fe7cf4a1", 0x6ad}], 0x2)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f00000016c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
shutdown(r0, 0x2)


__stat50(&(0x7f0000000a80)='./file0\x00', 0x0)


mknodat(0xffffffffffffff9c, &(0x7f0000000080)='./file0\x00', 0x1005, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x70e, 0x0)
pwritev(r0, &(0x7f0000000000)=[{0x0}], 0x1, 0x0)


r0 = socket$inet6(0x18, 0x1, 0x0)
r1 = fcntl$dupfd(r0, 0x0, r0)
ioctl$WSKBDIO_GETMAP(r1, 0xc028756b, &(0x7f0000000040)={0x0, 0x0})


mknod(&(0x7f0000000000)='./bus\x00', 0x80002007, 0x6d4)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x2000745e, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
getegid()


geteuid()


socketpair$unix(0x1, 0x5, 0x0, &(0x7f0000000000))
r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(0x0)
poll(&(0x7f0000000040)=[{}], 0x1, 0x0)


r0 = socket$unix(0x1, 0x2, 0x0)
getsockopt$sock_timeval(r0, 0xffff, 0x1006, &(0x7f0000000100), &(0x7f0000000140)=0x10)


chroot(&(0x7f0000000000)='./file0\x00')


socketpair(0x2, 0x3, 0x0, &(0x7f00000003c0))


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xbc64)
r0 = open(&(0x7f0000001c40)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_GET_FBINFO(r0, 0xc02c6301, &(0x7f0000000140))


semget(0x0, 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3e07)
r0 = open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffff9c, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0, 0x11}], 0x1, 0x0)
ioctl$WSKBDIO_SETDEFAULTKEYREPEAT(r0, 0x8008722b, &(0x7f0000000100))


r0 = socket(0x1f, 0x3, 0x0)
setsockopt(r0, 0x1, 0x2, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
mknod(&(0x7f0000000040)='./file0/file0\x00', 0xe000, 0xffffffffffffffff)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
rename(&(0x7f0000000080)='./bus\x00', &(0x7f0000000100)='./file0\x00')
r0 = open(&(0x7f0000000000)='./file0\x00', 0x60e, 0x0)
pwrite(r0, 0x0, 0x0, 0x0)


fktrace(0xffffffffffffff9c, 0x0, 0xf, 0xffffffffffffffff)
syz_usb_connect$printer(0x0, 0x2d, &(0x7f00000002c0)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x525, 0xa4a8, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x1b, 0x1, 0x1, 0x0, 0xa0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1}}]}}]}}, 0x0)


mkdir(&(0x7f0000000480)='./file0\x00', 0x0)
__mount50(&(0x7f0000000100)='cd9660\x00', &(0x7f0000000140)='./file0\x00', 0x0, &(0x7f0000000180)="02", 0x1)


symlinkat(&(0x7f0000000040)='./file0aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\x00', 0xffffffffffffff9c, &(0x7f0000000180)='./file0\x00')
fchmodat(0xffffffffffffff9c, &(0x7f0000000000)='./file0/file0\x00', 0x0, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
ioctl$FIOASYNC(r0, 0xc010636c, &(0x7f0000000180))


sendmsg(0xffffffffffffffff, &(0x7f0000000100)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000140)=ANY=[@ANYBLOB="200000000000008d742c007b80309900f87c8a5b6701000000000000000000c414000000290000002a"], 0x38}, 0x0)
r0 = socket(0x18, 0x2, 0x0)
connect$unix(r0, &(0x7f00000000c0)=@abs={0x682eb13985c518e6, 0x7}, 0x1c)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
sendmsg(r0, &(0x7f0000000100)={0x0, 0x0, 0x0, 0x0, 0x0}, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000100)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
sendmmsg(0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0)
r1 = __clone(0x0, 0x0)
ptrace(0x9, r1, 0x0, 0x0)
ptrace(0x8, r1, 0x0, 0x0)
compat_50_wait4(r1, 0x0, 0x3, &(0x7f0000000040))


compat_50_mknod(&(0x7f0000000100)='./file0\x00', 0x2000, 0x400)
r0 = open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
pread(r0, &(0x7f0000000140)="99", 0x1, 0x0)


preadv(0xffffffffffffffff, &(0x7f0000000440)=[{&(0x7f0000000080)=""/120, 0x78}, {&(0x7f0000001640)=""/4087, 0xff7}], 0x2, 0x0)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
pwritev(r0, &(0x7f00000003c0), 0x273, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x3000)=nil, 0x3000, 0x2, 0x11, r0, 0x0, 0x0)
mlock(&(0x7f0000000000/0x2000)=nil, 0x2000)


r0 = compat_30_socket(0x12, 0x2, 0x0)
poll(&(0x7f0000000380)=[{r0, 0x4}], 0x1, 0x0)


r0 = socket(0x18, 0x2, 0x0)
getsockopt$inet_opts(r0, 0x29, 0x24, 0x0, 0x0)


mknod(&(0x7f0000000080)='./file0\x00', 0x205e, 0x200)
r0 = openat(0xffffffffffffff9c, &(0x7f0000001640)='./file0\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x0, 0x10, r0, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f0000000000)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
sendmsg(r0, &(0x7f0000000500)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000400)=[{0x10, 0x1, 0x1}], 0x10}, 0x0)


mkdir(&(0x7f0000000480)='./file0\x00', 0x0)
r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
r1 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
fcntl$setown(r1, 0xf, 0x0)


fork()
recvfrom$inet6(0xffffffffffffff9c, 0x0, 0x0, 0x0, 0x0, 0x0)


mknod(&(0x7f0000000040)='./bus\x00', 0x2000, 0x4301)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
poll(&(0x7f0000000640)=[{r0}], 0x1, 0x0)


mknod$loop(&(0x7f0000000000)='./file0\x00', 0x6000, 0x0)
acct(&(0x7f00000013c0)='./file0\x00')


open$dir(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
getegid()
mknod$loop(&(0x7f00000006c0)='./file0\x00', 0x2000, 0x1)


pipe2(&(0x7f0000000140)={0xffffffffffffffff, <r0=>0xffffffffffffffff}, 0x0)
ioctl$FIONBIO(r0, 0x8004667e, &(0x7f0000000040)=0x8001)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x0, 0x0)
ioctl$OFIOGETBMAP(r0, 0x80105705, &(0x7f0000000000))


mknod(&(0x7f0000000000)='./bus\x00', 0x800080002002, 0x3d00)
r0 = open(&(0x7f0000000000)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x80085500, &(0x7f0000000040))


readlinkat(0xffffffffffffffff, 0x0, 0x0, 0x0)


mknod(&(0x7f00000004c0)='./file0\x00', 0x2000, 0x43a1)
open$dir(&(0x7f0000000500)='./file0\x00', 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
setrlimit(0x0, &(0x7f0000000240))
setrlimit(0x2, &(0x7f0000000340))


sendmsg$unix(0xffffffffffffffff, &(0x7f0000001640)={0x0, 0x0, 0x0, 0x0, &(0x7f0000000080)=[@cred={0x20}], 0x20}, 0x0)
mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
r0 = socket(0x18, 0x2, 0x0)
setsockopt(r0, 0x29, 0x9, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000000)={{0x12, 0x1, 0x300, 0x0, 0x0, 0x0, 0x10, 0x408, 0x3001, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9}}}]}}]}}, &(0x7f0000000500)={0x0, 0x0, 0xc, &(0x7f0000000180)={0x5, 0xf, 0xc, 0x1, [@ext_cap={0x7}]}, 0x2, [{0x2, &(0x7f0000000080)=@string={0x2}}, {0x2, &(0x7f0000000200)=@string={0x2}}]})


syz_usb_connect$cdc_ecm(0x3, 0x4d, &(0x7f0000000140)={{0x12, 0x1, 0x0, 0x2, 0x0, 0x0, 0x0, 0x525, 0xa4a1, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x3b, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x2, 0x6, 0x0, 0x0, {{0x5}, {0x5}, {0xd}}}}]}}]}}, 0x0)


renameat(0xffffffffffffffff, &(0x7f00000008c0)='./file1\x00', 0xffffffffffffffff, &(0x7f00000009c0)='./file0\x00')


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000340)='ptyfs\x00', &(0x7f0000000380)='./file0\x00', 0x0, &(0x7f00000003c0))
__mount50(&(0x7f0000000000)='overlay\x00', &(0x7f00000011c0)='./file0\x00', 0x0, &(0x7f00000001c0), 0x0)
pathconf(&(0x7f0000000040)='./file0\x00', 0xe)


r0 = _lwp_self()
_lwp_suspend(r0)
_lwp_continue(r0)
_lwp_wait(r0, 0x0)
r1 = _lwp_self()
_lwp_detach(r1)
_lwp_exit()


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
__clock_gettime50(0x40000000, 0x0)


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0xb200)
r0 = open$dir(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
fcntl$setstatus(r0, 0x5, 0x0)


open(0x0, 0x0, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000100)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9, 0x21, 0x0, 0x0, 0x1, {0x22, 0x67}}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x9, 0x0, 0x23}}}}}]}}]}}, 0x0)
syz_usb_connect$hid(0x0, 0x36, &(0x7f00000005c0)={{0x12, 0x1, 0x300, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x29, 0xa0, 0x1, [{{0x9, 0x4, 0x0, 0x0, 0x0, 0x3, 0x1, 0x0, 0x0, {0x9, 0x21, 0x0, 0x60}, {{{0x9, 0x5, 0x81, 0x3, 0x0, 0x0, 0x5}}}}}]}}]}}, 0x0)
posix_spawn(0x0, &(0x7f0000000d00)='+++]\x00', 0x0, 0x0, 0x0, 0x0)


mknod(&(0x7f00000001c0)='./bus\x00', 0x2000, 0x6b2)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
open$dir(0x0, 0x0, 0x0)
ioctl$FIONBIO(r0, 0x8006740a, &(0x7f0000000040)=0x1100)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
r1 = open$dir(&(0x7f0000000040)='./file0/../file0\x00', 0x0, 0x0)
mkdirat(r1, &(0x7f0000000180)='./file0\x00', 0x0)
fchmodat(r1, &(0x7f0000000080)='./file0/../file0\x00', 0x0, 0x0)


mknod(&(0x7f0000000040)='./bus\x00', 0x2003, 0x4301)
r0 = open(&(0x7f0000000040)='./bus\x00', 0x0, 0x0)
ioctl$FIOASYNC(r0, 0x20007703, 0x0)


compat_30_fhopen(&(0x7f0000000000)={{}, {0x0, 0x0, "d34ad9748e12e56399ef4984b91581c1"}}, 0x0)


socketpair$unix(0x1, 0x2, 0x0, &(0x7f0000002c00)={<r0=>0xffffffffffffffff})
__posix_fadvise50(r0, 0x0, 0x0, 0x1, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
open$dir(&(0x7f0000000040)='./file0/../file0\x00', 0x0, 0x0)
chdir(&(0x7f0000000000)='./file0\x00')


mknod(&(0x7f0000000180)='./bus\x00', 0x80002005, 0x0)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
__lutimes50(&(0x7f0000000040)='./bus\x00', 0x0)


mprotect(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x5)
posix_spawn(0xffffffffffffffff, 0x0, 0x0, &(0x7f0000001540)={0x0, 0xffffffffffffffff}, 0x0, 0x0)


mkdir(&(0x7f0000001180)='./file0\x00', 0x0)
compat_40_mount(&(0x7f0000000140)='ptyfs\x00', &(0x7f0000000180)='./file0\x00', 0x0, &(0x7f00000001c0))
open(&(0x7f0000000000)='./file0\x00', 0x0, 0x0)
unmount(&(0x7f00000000c0)='./file0\x00', 0x0)


mkdir(&(0x7f00000000c0)='./file0\x00', 0x0)
__mount50(&(0x7f0000000080)='ntfs\x00', &(0x7f00000001c0)='./file0\x00', 0x0, &(0x7f0000000200)="a6", 0x1)


r0 = socket(0x2, 0x2, 0x0)
connect$unix(r0, &(0x7f0000000000)=ANY=[@ANYBLOB="8b0221"], 0x10)
getpeername(r0, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r0, 0x0, 0x0)
ioctl$WSKBDIO_SETKEYREPEAT(0xffffffffffffffff, 0x800c5707, 0x0)
compat_20_getfsstat(&(0x7f0000000080), 0xffffffffffffff38, 0x0)


mknodat(0xffffffffffffff9c, &(0x7f0000000000)='./file0\x00', 0x0, 0x0)
rmdir(&(0x7f0000000080)='.\x00')


r0 = getpid()
r1 = compat_30_socket(0x11, 0x3, 0x0)
r2 = dup(r1)
fktrace(r2, 0x0, 0x4, r0)
__clock_settime50(0x0, &(0x7f0000000000)={0x248e00000})


r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
ptrace(0x9, r0, &(0x7f00000001c0), 0xaa9)


syz_usb_connect$hid(0x0, 0x36, &(0x7f0000000080)={{0x12, 0x1, 0x0, 0x0, 0x0, 0x0, 0x8, 0x5ac, 0x220, 0x40, 0x1, 0x2, 0x3, 0x1, [{{0x9, 0x2, 0x24, 0x1, 0x1, 0x0, 0x0, 0x0, [{{0x9, 0x4, 0x0, 0x0, 0x1, 0x3, 0x1, 0x0, 0x0, {0x9}, {{{0x9, 0x5, 0x81, 0x3, 0x40}}}}}]}}]}}, &(0x7f0000000280)={0x0, 0x0, 0x0, 0x0, 0x3, [{0x4, &(0x7f0000000140)=@lang_id={0x4}}, {0x4, &(0x7f0000000180)=@lang_id={0x4}}, {0x2, &(0x7f00000001c0)=@string={0x2}}]})


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0, 0xff01}], 0x1, 0x0)
__fhopen40(0x0, 0x0, 0x800000)


r0 = msgget$private(0x0, 0x0)
msgsnd(r0, &(0x7f0000000c00)=ANY=[@ANYBLOB="84"], 0x44b, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
posix_spawn(0x0, &(0x7f0000000200)='q/\x00', 0x0, 0x0, 0x0, 0x0)


socketpair$unix(0x1, 0x1, 0x0, &(0x7f00000002c0)={0xffffffffffffffff, <r0=>0xffffffffffffffff})
r1 = openat(0xffffffffffffffff, &(0x7f0000000500)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
sendmsg$unix(r0, &(0x7f0000000940)={0x0, 0x0, 0x0}, 0x0)


readv(0xffffffffffffffff, &(0x7f0000000040)=[{&(0x7f0000001200)=""/4100, 0x1004}], 0x1)
r0 = socket(0x400000000018, 0x3, 0x0)
ioctl$FIOGETBMAP(r0, 0xc0106926, &(0x7f0000000040))


compat_50_mknod(&(0x7f0000000000)='./file0\x00', 0x2000, 0x100)
open(&(0x7f00000000c0)='./file0\x00', 0x0, 0x0)


mknod(&(0x7f0000000000)='./file0\x00', 0x2876, 0x40000800)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
ioctl$WSDISPLAYIO_DELSCREEN(r0, 0x40067412, &(0x7f0000000140))


pipe(&(0x7f0000001200)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
compat_43_osetrlimit(0x0, 0xfffffffffffffffe)


ioctl$WSKBDIO_GETDEFAULTBELL(0xffffffffffffffff, 0x40105706, 0x0)


setuid(0xee01)
fchroot(0xffffffffffffffff)


compat_30___fhstat30(&(0x7f0000000040)={{[0x1b01]}, {0x0, 0x0, "74d2823e579c3168e13532dc59f738bb"}}, 0x0)


__getfh30(0x0, 0x0, &(0x7f0000000380))
r0 = __clone(0x0, 0x0)
ptrace(0x9, r0, 0x0, 0x0)
compat_50_wait4(0x0, 0x0, 0x0, 0x0)
ptrace(0x5, r0, &(0x7f0000000280), 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
fpathconf(r0, 0x11)


r0 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
posix_spawn(0x0, &(0x7f0000000200)='[,$]]\x00', 0x0, 0x0, &(0x7f0000000580), &(0x7f0000000040))


symlink(&(0x7f0000000080)='./file0\x00', &(0x7f0000000040)='./file0\x00')
compat_30___lstat13(&(0x7f0000000000)='./file0\x00', &(0x7f00000000c0))


mkdir(&(0x7f0000000000)='./file0\x00', 0x0)
__mount50(&(0x7f0000000040)='mfs\x00', &(0x7f0000000080)='./file0\x00', 0x0, &(0x7f0000000300)="82adda516a5af04e074a9251f62175698a3e9054cd43ffb68d24365b0f5bdee619ee5a43d3225709194a64e3df29b4d4a248a3e64b1ba455d5d2b106b03fddfcbf9fe1ea7e79ca82fd2cbbb894e50d4f4bf65076f55b037c943fe1049317915e3980a9eba2b26a5d9c596dee461a234ccc362a06c4426bf88c1599492f0fb1f400d342c6b327997ed61d91231a0225ecf38fcac397a4c9ae", 0x98)


r0 = socket(0x18, 0x3, 0x0)
setsockopt(r0, 0x29, 0x24, &(0x7f0000000080)='\x00\x00\x00\x00', 0x4)


openat$tprof(0xffffffffffffff9c, &(0x7f0000000100), 0xe000000, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0x3000)
r0 = open(&(0x7f0000000040)='./file0\x00', 0x611, 0x0)
ioctl$OFIOGETBMAP(r0, 0x40105704, &(0x7f0000000000))


r0 = socket(0x400000000018, 0x3, 0x0)
ioctl$FIOGETBMAP(r0, 0xc0106926, &(0x7f0000000040))


mkdir(&(0x7f0000000080)='./file0\x00', 0x0)
pipe(&(0x7f0000000400)={<r0=>0xffffffffffffffff})
r1 = getpid()
fktrace(r0, 0x0, 0x4, r1)
fchownat(0xffffffffffffff9c, &(0x7f00000000c0)='./file0\x00', 0xffffffffffffffff, 0x0, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000000040)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
compat_30_fhstatvfs1(&(0x7f0000000000)={{}, {0x0, 0x0, "6539b28f0812fff5d3a9db5a809f0246"}}, 0x0, 0x0)


r0 = socket(0x1f, 0x3, 0x0)
setsockopt(r0, 0x1, 0x0, 0x0, 0x0)


r0 = socket$inet(0x2, 0x3, 0x0)
sendto(r0, 0x0, 0x2, 0x0, &(0x7f0000000080), 0xc)


mknod(&(0x7f0000000080)='./bus\x00', 0x2000, 0xd03)
r0 = open(&(0x7f0000000200)='./bus\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000000000)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
pipe(&(0x7f0000000400)={<r2=>0xffffffffffffffff})
r3 = getpid()
fktrace(r2, 0x0, 0x4, r3)
ioctl$WSKBDIO_GETDEFAULTKEYREPEAT(r0, 0x40046483, 0x0)


compat_50_mknod(&(0x7f0000000040)='./file0\x00', 0x2000, 0xa059)
r0 = open(&(0x7f0000000080)='./file0\x00', 0x0, 0x0)
r1 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r1, 0x0, 0x0)
preadv(r1, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
mmap(&(0x7f0000000000/0x200000)=nil, 0x200000, 0x3, 0x10, r1, 0x0, 0x0)
poll(&(0x7f0000000000)=[{r0}, {r0, 0x40}], 0x2, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
modctl$MODCTL_LOAD(0x3, 0x0)


r0 = openat(0xffffffffffffffff, &(0x7f0000001640)='/proc/self/exe\x00', 0x0, 0x0)
mmap(&(0x7f0000000000/0x800000)=nil, 0x800000, 0x1, 0x10, r0, 0x0, 0x0)
pipe(&(0x7f0000000400)={<r1=>0xffffffffffffffff})
r2 = getpid()
fktrace(r1, 0x0, 0x4, r2)
preadv(r0, &(0x7f0000001580)=[{0x0}], 0x1, 0x0)
rasctl(0x0, 0x98, 0x0)
rasctl(0x0, 0x0, 0x1)


r0 = socket$inet(0x2, 0x1, 0x0)
setsockopt(r0, 0x6, 0x6, &(0x7f0000000000)="9d0e6a00", 0x4)


setgroups(0x2, 0x0)


r0 = socket$inet6(0x18, 0x3, 0x0)
getsockopt$sock_int(r0, 0xffff, 0x1001, 0x0, 0x0)


mknod(0x0, 0x0, 0x0)

_netbsd_keccakf1600100%of 4
-----------
SUMMARY100%of 4

uftdi_attach---of 37
uftdi_childdet---of 3
uftdi_detach---of 3
uftdi_get_status---of 2
uftdi_match75%of 4
uftdi_open---of 4
uftdi_param---of 40
uftdi_read---of 4
uftdi_set---of 11
uftdi_write---of 3
-----------
SUMMARY75%of 4

module_hook_exit100%of 1
module_hook_init---of 1
module_hook_set---of 5
module_hook_tryenter100%of 3
module_hook_unset---of 5
-----------
SUMMARY100%of 4

bt_init---of 1
hci_ctloutput_wrapper100%of 1
l2cap_ctloutput_wrapper100%of 1
rfcomm_ctloutput_wrapper100%of 1
sco_ctloutput_wrapper100%of 1
-----------
SUMMARY100%of 4

fillmapentry67%of 6
ksym_upcase100%of 10
wskbd_compose_value---of 19
wskbd_get_mapentry---of 26
wskbd_init_keymap50%of 6
wskbd_load_keymap90%of 39
-----------
SUMMARY86%of 61

pmap_pv_init---of 1
pmap_pv_track---of 7
pmap_pv_tracked31%of 13
pmap_pv_untrack---of 13
-----------
SUMMARY31%of 13

chacha_core_sse2---of 5
chacha_stream_sse244%of 16
chacha_stream_xor_sse2---of 21
hchacha_sse2---of 3
xchacha_stream_sse2---of 3
xchacha_stream_xor_sse2---of 3
-----------
SUMMARY44%of 16

vfs_hooks_attach67%of 6
vfs_hooks_detach62%of 13
vfs_hooks_init---of 1
vfs_hooks_reexport---of 5
vfs_hooks_unmount100%of 5
-----------
SUMMARY71%of 24

-----------
SUMMARY---of 0

at_pcbconnect.part.020%of 42
at_pcbsetaddr14%of 38
ddp_abort_wrapper---of 3
ddp_accept_wrapper---of 3
ddp_attach_wrapper86%of 7
ddp_bind_wrapper60%of 5
ddp_connect2_wrapper67%of 3
ddp_connect_wrapper58%of 14
ddp_detach47%of 13
ddp_detach_wrapper100%of 1
ddp_disconnect_wrapper---of 7
ddp_init---of 1
ddp_ioctl_wrapper100%of 1
ddp_listen_wrapper67%of 3
ddp_peeraddr_wrapper---of 3
ddp_purgeif_wrapper---of 1
ddp_rcvd_wrapper---of 3
ddp_recvoob_wrapper67%of 3
ddp_search---of 15
ddp_send_wrapper60%of 15
ddp_sendoob_wrapper67%of 3
ddp_shutdown_wrapper67%of 3
ddp_sockaddr_wrapper50%of 8
ddp_stat_wrapper67%of 3
sysctl_net_atalk_ddp_setup---of 1
sysctl_net_atalk_ddp_stats---of 1
-----------
SUMMARY39%of 162

strnlen100%of 4
-----------
SUMMARY100%of 4

adjtime193%of 13
clock_getres172%of 7
clock_settime1100%of 4
dogetitimer100%of 7
dosetitimer89%of 34
dotimer_gettime---of 6
dotimer_settime---of 31
itimer_callout---of 29
itimer_decr---of 18
itimer_fini---of 4
itimer_gettime75%of 24
itimer_init43%of 19
itimer_lock---of 1
itimer_lock_held---of 1
itimer_poison---of 13
itimer_settime37%of 61
itimer_unlock---of 1
nanosleep176%of 37
ptimer_fire---of 7
ptimer_free---of 15
ptimer_intr---of 20
ptimer_tick---of 10
ptimers_alloc84%of 6
ptimers_free---of 49
settime---of 1
settime1.constprop.058%of 14
settimeofday1100%of 8
sys___adjtime50---of 10
sys___clock_getres5086%of 7
sys___clock_gettime50100%of 3
sys___clock_settime50100%of 5
sys___getitimer50100%of 3
sys___gettimeofday50---of 5
sys___nanosleep50100%of 6
sys___setitimer50100%of 10
sys___settimeofday50---of 7
sys___timer_gettime50---of 6
sys___timer_settime50---of 6
sys_clock_getcpuclockid2---of 11
sys_clock_nanosleep100%of 7
sys_timer_create---of 1
sys_timer_delete---of 20
sys_timer_getoverrun---of 6
time_init---of 1
time_wraps---of 5
timer_create1---of 28
-----------
SUMMARY71%of 275

clockrnd_get---of 3
clockrnd_sample---of 6
get_intr_timecount---of 1
getticks100%of 1
hardclock---of 12
initclocks---of 13
schedclock---of 2
startprofclock67%of 6
statclock---of 38
stopprofclock67%of 6
sysctl_kern_clockrate---of 2
-----------
SUMMARY70%of 13

bt_alloc69%of 16
bt_free75%of 8
bt_freetrim12%of 35
bt_insbusy84%of 12
bt_insfree71%of 31
bt_insseg78%of 9
bt_refill_locked31%of 39
bt_rembusy70%of 10
bt_remfree.constprop.070%of 10
bt_remseg73%of 11
pool_page_alloc_vmem_meta100%of 3
pool_page_free_vmem_meta---of 1
qc_poolpage_alloc80%of 5
qc_poolpage_free---of 1
vmem_add---of 1
vmem_add162%of 13
vmem_alloc66%of 23
vmem_create---of 3
vmem_destroy---of 8
vmem_destroy1---of 20
vmem_dump---of 10
vmem_fit58%of 28
vmem_free75%of 8
vmem_init---of 77
vmem_print---of 1
vmem_printall---of 2
vmem_rehash_all---of 36
vmem_rehash_all_kick---of 1
vmem_rehash_start---of 3
vmem_roundup_size---of 1
vmem_size---of 8
vmem_subsystem_init---of 1
vmem_whatis---of 9
vmem_xalloc67%of 112
vmem_xcreate---of 3
vmem_xfree54%of 15
vmem_xfree_bt75%of 27
vmem_xfreeall---of 8
-----------
SUMMARY60%of 415

pci_devioctl---of 5
pciioctl89%of 26
pcimmap82%of 11
pciopen100%of 2
-----------
SUMMARY88%of 39

curlwp_bindx---of 3
sysctl_net_inet6_udp6_stats---of 1
udp6_abort_wrapper---of 5
udp6_accept_wrapper---of 3
udp6_attach_wrapper72%of 7
udp6_bind_wrapper50%of 6
udp6_connect2_wrapper67%of 3
udp6_connect_wrapper70%of 10
udp6_ctlinput12%of 17
udp6_ctloutput59%of 12
udp6_detach_wrapper50%of 6
udp6_disconnect_wrapper56%of 9
udp6_init---of 1
udp6_input---of 22
udp6_input_checksum---of 10
udp6_ioctl_wrapper100%of 1
udp6_listen_wrapper67%of 3
udp6_notify---of 12
udp6_output65%of 106
udp6_peeraddr_wrapper---of 8
udp6_purgeif_wrapper100%of 1
udp6_rcvd_wrapper---of 3
udp6_realinput---of 53
udp6_recvoob_wrapper67%of 3
udp6_send_wrapper45%of 9
udp6_sendoob_wrapper67%of 3
udp6_sendup---of 26
udp6_shutdown_wrapper100%of 1
udp6_sockaddr_wrapper50%of 8
udp6_stat_wrapper67%of 3
udp6_statinc---of 3
-----------
SUMMARY59%of 208

umodem_attach---of 3
umodem_detach---of 1
umodem_match50%of 4
-----------
SUMMARY50%of 4

-----------
SUMMARY---of 0

rf_ConfigureDAGFuncs100%of 1
rf_DiskReadFuncForThreads---of 2
rf_DiskReadMirrorIdleFunc---of 1
rf_DiskUndoFunc---of 1
rf_DiskWriteFuncForThreads---of 2
rf_GenericWakeupFunc---of 11
rf_NullNodeFunc---of 1
rf_NullNodeUndoFunc---of 1
rf_RecoveryXorFunc---of 13
rf_RegularXorFunc---of 12
rf_SimpleXorFunc---of 12
rf_TerminateFunc---of 1
rf_XorIntoBuffer---of 4
rf_bxor---of 4
rf_longword_bxor---of 18
-----------
SUMMARY100%of 1

if_smsc_modcmd---of 2
smsc_attach---of 26
smsc_chip_init.isra.0---of 19
smsc_match100%of 2
smsc_readreg---of 5
smsc_setoe_locked.isra.0---of 13
smsc_uno_init---of 3
smsc_uno_ioctl---of 4
smsc_uno_mcast---of 12
smsc_uno_miibus_readreg---of 8
smsc_uno_miibus_statchg---of 19
smsc_uno_miibus_writereg---of 6
smsc_uno_rx_loop---of 24
smsc_uno_stop---of 2
smsc_uno_tx_prepare---of 3
smsc_wait_for_bits---of 6
smsc_writereg---of 5
-----------
SUMMARY100%of 2

rb_tree_find_node100%of 4
rb_tree_find_node_geq100%of 7
rb_tree_find_node_leq---of 7
rb_tree_init100%of 1
rb_tree_insert_node72%of 96
rb_tree_iterate65%of 14
rb_tree_removal_rebalance66%of 102
rb_tree_remove_node65%of 102
rb_tree_reparent_nodes.constprop.072%of 14
-----------
SUMMARY69%of 340

hci_ioctl_pcb22%of 33
-----------
SUMMARY22%of 33

if_ppp_modcmd---of 11
ppp_ccp---of 29
ppp_ccp_closed50%of 4
ppp_clone_create---of 2
ppp_clone_destroy40%of 10
ppp_create21%of 34
ppp_dequeue---of 56
ppp_get_compressor_noload---of 9
ppp_register_compressor---of 15
ppp_restart---of 1
ppp_unregister_compressor---of 14
pppalloc54%of 15
pppdealloc47%of 28
pppdumpm---of 13
pppintr---of 110
pppioctl5%of 96
pppoutput---of 52
ppppktin---of 6
pppsioctl---of 17
-----------
SUMMARY21%of 187

_bus_dma_alloc_bouncebuf---of 10
_bus_dma_uiomove.isra.0---of 14
_bus_dmamap_destroy.isra.0---of 5
_bus_dmamap_load_buffer.constprop.0.isra.073%of 11
_bus_dmamap_load_busaddr.constprop.078%of 18
_bus_dmamem_alloc_range.constprop.0---of 21
_bus_dmamem_free.constprop.0---of 7
_bus_dmamem_map.constprop.0---of 12
_bus_dmamem_unmap.constprop.0---of 11
bus_dma_tag_create---of 25
bus_dma_tag_destroy---of 3
bus_dmamap_create---of 27
bus_dmamap_destroy---of 6
bus_dmamap_load41%of 22
bus_dmamap_load_mbuf44%of 50
bus_dmamap_load_raw---of 20
bus_dmamap_load_uio---of 22
bus_dmamap_sync28%of 43
bus_dmamap_unload---of 8
bus_dmamem_alloc---of 9
bus_dmamem_free---of 6
bus_dmamem_map---of 7
bus_dmamem_mmap---of 16
bus_dmamem_unmap---of 6
bus_dmatag_destroy---of 8
bus_dmatag_subregion---of 22
-----------
SUMMARY46%of 144

_prop_object_copyout80%of 10
prop_array_copyin---of 7
prop_array_copyin_ioctl---of 8
prop_array_copyin_ioctl_size---of 8
prop_array_copyin_size---of 7
prop_array_copyout---of 1
prop_array_copyout_ioctl---of 3
prop_dictionary_copyin---of 7
prop_dictionary_copyin_ioctl88%of 8
prop_dictionary_copyin_ioctl_size---of 8
prop_dictionary_copyin_size---of 7
prop_dictionary_copyout---of 1
prop_dictionary_copyout_ioctl100%of 3
prop_kern_init---of 2
-----------
SUMMARY86%of 21

sco_attach_pcb63%of 16
sco_bind_pcb---of 4
sco_connect_pcb---of 19
sco_detach_pcb67%of 12
sco_disconnect_pcb---of 5
sco_getopt80%of 5
sco_listen_pcb75%of 4
sco_peeraddr_pcb---of 1
sco_send_pcb---of 20
sco_setopt100%of 1
sco_sockaddr_pcb100%of 1
-----------
SUMMARY70%of 39

pckbport_attach---of 4
pckbport_attach_slot---of 5
pckbport_cleanqueue---of 14
pckbport_cleanup---of 5
pckbport_cmdresponse---of 29
pckbport_cnattach---of 3
pckbport_enqueue_cmd45%of 45
pckbport_flush---of 1
pckbport_init_slotdata---of 7
pckbport_poll_cmd86%of 7
pckbport_poll_cmd139%of 18
pckbport_poll_data---of 5
pckbport_set_inputhandler---of 3
pckbport_set_poll---of 1
pckbport_slot_enable100%of 1
pckbport_start14%of 23
pckbport_xt_translation100%of 1
pckbportintr---of 6
pckbportprint---of 4
-----------
SUMMARY40%of 95

ether_add_vlantag---of 20
ether_addmulti65%of 17
ether_aton_r---of 26
ether_bpf_mtap---of 10
ether_crc32_be---of 7
ether_crc32_le---of 3
ether_del_vlantag---of 16
ether_delmulti75%of 16
ether_disable_vlan_mtu---of 6
ether_enable_vlan_mtu---of 5
ether_ifattach---of 5
ether_ifdetach---of 16
ether_ifdetachhook_disestablish---of 2
ether_ifdetachhook_establish---of 3
ether_inject_vlantag---of 20
ether_input---of 118
ether_ioctl13%of 31
ether_ioctl_reinit---of 8
ether_multiaddr75%of 12
ether_multicast_sysctl---of 20
ether_output41%of 96
ether_set_ifflags_cb---of 1
ether_set_vlan_cb---of 1
ether_snprintf---of 3
ether_sprintf---of 2
ether_strip_vlantag---of 12
etherinit---of 1
-----------
SUMMARY44%of 172

_icmp6_input---of 400
icmp6_ctloutput91%of 11
icmp6_error60%of 79
icmp6_error2---of 8
icmp6_init---of 1
icmp6_input---of 1
icmp6_mtudisc_callback_register---of 9
icmp6_mtudisc_timeout---of 9
icmp6_mtudisc_update---of 33
icmp6_redirect_diag.constprop.0---of 1
icmp6_redirect_input---of 69
icmp6_redirect_output---of 55
icmp6_redirect_timeout---of 7
icmp6_reflect38%of 51
icmp6_statinc---of 3
ni6_nametodns.constprop.0---of 49
sysctl_net_inet6_icmp6_nd6---of 3
sysctl_net_inet6_icmp6_redirtimeout---of 10
sysctl_net_inet6_icmp6_stats---of 1
-----------
SUMMARY54%of 141

rf_AllocDiskStructures.constprop.088%of 8
rf_AutoConfigureDisks---of 32
rf_CheckLabels---of 93
rf_ConfigureDisk---of 15
rf_ConfigureDisks14%of 37
rf_ConfigureSpareDisks---of 18
rf_add_hot_spare---of 18
rf_delete_component---of 1
rf_incorporate_hot_spare---of 1
rf_remove_hot_spare---of 3
-----------
SUMMARY27%of 45

sleepq_abort---of 4
sleepq_block83%of 29
sleepq_changepri67%of 3
sleepq_enqueue69%of 16
sleepq_init100%of 1
sleepq_insert.part.084%of 30
sleepq_lendpri67%of 3
sleepq_reinsert69%of 16
sleepq_remove61%of 23
sleepq_timeout---of 13
sleepq_transfer---of 16
sleepq_uncatch---of 1
sleepq_unsleep67%of 6
sleepq_wake73%of 11
sleeptab_init---of 4
-----------
SUMMARY74%of 138

tmpfs_done---of 1
tmpfs_fhtovp---of 20
tmpfs_init---of 1
tmpfs_modcmd---of 6
tmpfs_mount12%of 35
tmpfs_root---of 6
tmpfs_snapshot---of 1
tmpfs_start---of 1
tmpfs_statvfs67%of 3
tmpfs_sync---of 1
tmpfs_unmount---of 16
tmpfs_vget---of 1
tmpfs_vptofh---of 6
-----------
SUMMARY16%of 38

m_add50%of 4
m_adj95%of 19
m_align39%of 21
m_apply---of 17
m_cat---of 10
m_clget72%of 7
m_copy_internal66%of 52
m_copy_pkthdr50%of 8
m_copyback84%of 6
m_copyback_cow54%of 13
m_copyback_internal32%of 98
m_copydata67%of 6
m_copydata.part.065%of 14
m_copym100%of 1
m_copypacket70%of 26
m_copyup---of 18
m_defrag---of 63
m_devget---of 25
m_dup---of 1
m_ensure_contig66%of 23
m_ext_free74%of 23
m_free88%of 8
m_freem100%of 2
m_get75%of 8
m_getcl---of 7
m_gethdr100%of 3
m_getptr---of 7
m_makewritable---of 14
m_move_pkthdr50%of 8
m_prepend72%of 14
m_print---of 39
m_pulldown61%of 69
m_pullup67%of 9
m_remove_pkthdr---of 3
m_split---of 1
m_split_internal---of 33
m_tag_copy---of 3
m_tag_copy_chain30%of 10
m_tag_delete---of 1
m_tag_delete_chain34%of 6
m_tag_find50%of 6
m_tag_free---of 1
m_tag_get---of 4
m_tag_prepend---of 3
m_tag_unlink---of 6
m_verify_packet---of 20
mb_ctor100%of 1
mb_drain---of 21
mbinit---of 9
mbstat_convert_to_user_cb---of 1
mbstat_type_add---of 1
sysctl_kern_mbuf---of 11
sysctl_kern_mbuf_stats---of 1
-----------
SUMMARY58%of 465

if_udav_modcmd---of 2
udav_attach---of 26
udav_csr_read---of 5
udav_csr_write1.isra.0---of 3
udav_match100%of 2
udav_uno_init---of 30
udav_uno_mcast---of 36
udav_uno_mii_read_reg---of 9
udav_uno_mii_statchg---of 6
udav_uno_mii_write_reg---of 10
udav_uno_rx_loop---of 7
udav_uno_stop---of 16
udav_uno_tx_prepare---of 6
-----------
SUMMARY100%of 2

-----------
SUMMARY---of 0

if_urndis_modcmd---of 2
urndis_attach---of 59
urndis_ctrl_handle---of 34
urndis_ctrl_init---of 6
urndis_ctrl_recv---of 6
urndis_ctrl_send---of 4
urndis_ctrl_set.constprop.0---of 7
urndis_match72%of 7
urndis_uno_init---of 4
urndis_uno_rx_loop---of 15
urndis_uno_tx_prepare---of 3
-----------
SUMMARY72%of 7

uhub_attach---of 79
uhub_childdet---of 14
uhub_detach---of 18
uhub_explore3%of 114
uhub_intr84%of 12
uhub_match100%of 3
uhub_rescan---of 3
-----------
SUMMARY13%of 129

aue_attach---of 28
aue_csr_read_1---of 4
aue_csr_read_2---of 4
aue_csr_write_1.isra.0---of 3
aue_match43%of 7
aue_reset---of 11
aue_uno_init---of 6
aue_uno_intr---of 3
aue_uno_mcast---of 17
aue_uno_mii_read_reg---of 8
aue_uno_mii_statchg---of 19
aue_uno_mii_write_reg---of 7
aue_uno_rx_loop---of 6
aue_uno_stop---of 1
aue_uno_tx_prepare---of 3
if_aue_modcmd---of 2
-----------
SUMMARY43%of 7

sme_initial_refresh---of 9
sme_preinit---of 1
sme_sensor_dictionary_get---of 8
sme_update_dictionary---of 11
sme_update_sensor_dictionary---of 32
sme_userset_dictionary---of 64
sysmon_envsys_acquire---of 11
sysmon_envsys_create---of 1
sysmon_envsys_destroy---of 15
sysmon_envsys_destroy_plist---of 13
sysmon_envsys_find---of 5
sysmon_envsys_find_40---of 6
sysmon_envsys_fini---of 4
sysmon_envsys_foreach_sensor---of 8
sysmon_envsys_get_max_value---of 8
sysmon_envsys_init---of 3
sysmon_envsys_modcmd---of 10
sysmon_envsys_refresh_sensor---of 7
sysmon_envsys_register---of 92
sysmon_envsys_release---of 5
sysmon_envsys_sensor_attach---of 14
sysmon_envsys_sensor_detach---of 26
sysmon_envsys_unregister---of 19
sysmonclose_envsys100%of 1
sysmonioctl_envsys3%of 93
sysmonopen_envsys100%of 1
-----------
SUMMARY5%of 95

_rf_ShutdownCreate100%of 1
rf_ShutdownList67%of 3
-----------
SUMMARY75%of 4

ufs_blkatoff69%of 16
ufs_dirbad---of 2
ufs_dirbadentry64%of 19
ufs_dircompact.constprop.045%of 63
ufs_dirempty85%of 20
ufs_direnter71%of 17
ufs_dirremove55%of 33
ufs_dirrewrite67%of 15
ufs_lookup82%of 159
ufs_makedirentry100%of 3
-----------
SUMMARY70%of 345

if_rum_modcmd---of 2
rum_ack_rate---of 4
rum_activate---of 1
rum_amrr_timeout---of 1
rum_amrr_update---of 3
rum_attach---of 39
rum_bbp_read---of 8
rum_bbp_write---of 5
rum_detach---of 3
rum_eeprom_read---of 3
rum_free_tx_list---of 5
rum_init---of 63
rum_ioctl---of 15
rum_match100%of 2
rum_media_change---of 5
rum_newassoc---of 1
rum_newstate---of 1
rum_next_scan---of 3
rum_read_multi---of 3
rum_rf_write---of 5
rum_rxeof---of 28
rum_select_antenna---of 1
rum_select_band---of 16
rum_set_chan---of 16
rum_setup_tx_desc---of 15
rum_start---of 32
rum_stop.constprop.0---of 15
rum_task---of 38
rum_tx_data---of 53
rum_txeof---of 7
rum_txtime---of 9
rum_watchdog---of 5
rum_write_multi---of 8
-----------
SUMMARY100%of 2

pseye_activate---of 4
pseye_attach---of 81
pseye_childdet---of 4
pseye_close---of 1
pseye_detach---of 11
pseye_enum_format---of 4
pseye_get_businfo---of 1
pseye_get_devname---of 1
pseye_get_format---of 1
pseye_get_framerate---of 1
pseye_getreg---of 4
pseye_led---of 4
pseye_match60%of 5
pseye_modcmd---of 2
pseye_open---of 4
pseye_sccb_setreg---of 6
pseye_set_format---of 1
pseye_set_framerate---of 1
pseye_setreg---of 3
pseye_start_transfer---of 4
pseye_stop_transfer---of 3
pseye_transfer_thread---of 20
pseye_try_format---of 1
-----------
SUMMARY60%of 5

do_ksem_init66%of 23
do_ksem_open60%of 42
do_ksem_wait94%of 15
ksem_close_fop84%of 6
ksem_create100%of 9
ksem_free34%of 18
ksem_get85%of 13
ksem_listener_cb100%of 7
ksem_lookup72%of 7
ksem_lookup_pshared_locked88%of 8
ksem_modcmd---of 8
ksem_perm75%of 4
ksem_read_fop---of 4
ksem_release82%of 11
ksem_stat_fop---of 3
ksem_sysfini---of 7
sys__ksem_close84%of 6
sys__ksem_destroy70%of 23
sys__ksem_getvalue80%of 5
sys__ksem_init100%of 1
sys__ksem_open100%of 1
sys__ksem_post88%of 8
sys__ksem_timedwait100%of 5
sys__ksem_trywait100%of 1
sys__ksem_unlink34%of 21
sys__ksem_wait100%of 1
-----------
SUMMARY70%of 235

sco_abort_wrapper---of 5
sco_accept_wrapper---of 8
sco_attach_wrapper70%of 13
sco_bind_wrapper---of 10
sco_complete---of 8
sco_connect2_wrapper75%of 4
sco_connect_wrapper---of 10
sco_connected---of 1
sco_connecting---of 1
sco_ctloutput100%of 6
sco_detach50%of 4
sco_detach_wrapper100%of 1
sco_disconnect_wrapper---of 5
sco_disconnected---of 1
sco_input---of 14
sco_ioctl_wrapper100%of 1
sco_listen_wrapper80%of 5
sco_newconn---of 3
sco_peeraddr_wrapper---of 9
sco_purgeif_wrapper---of 1
sco_rcvd_wrapper---of 3
sco_recvoob_wrapper---of 3
sco_send_wrapper62%of 13
sco_sendoob_wrapper67%of 3
sco_shutdown_wrapper67%of 3
sco_sockaddr_wrapper45%of 9
sco_stat_wrapper67%of 3
-----------
SUMMARY68%of 65

fault_close---of 1
fault_inject31%of 13
fault_ioctl---of 27
fault_lwp_free---of 2
fault_modcmd---of 4
fault_open---of 1
-----------
SUMMARY31%of 13

efs_done---of 1
efs_fhtovp---of 8
efs_init---of 1
efs_loadvnode---of 20
efs_modcmd---of 6
efs_mount16%of 39
efs_root---of 3
efs_start---of 1
efs_statvfs---of 1
efs_unmount---of 3
efs_vget---of 4
efs_vptofh---of 4
-----------
SUMMARY16%of 39

addsymtab---of 60
addsymtab_compar---of 5
findsym---of 17
ksyms_addsyms_elf---of 33
ksyms_addsyms_explicit---of 10
ksyms_available---of 1
ksyms_get_mod---of 12
ksyms_getname---of 30
ksyms_getval---of 3
ksyms_getval_unlocked---of 13
ksyms_hdr_init---of 1
ksyms_init---of 2
ksyms_mod_foreach---of 10
ksyms_modload---of 3
ksyms_modunload---of 33
ksyms_sift---of 14
ksyms_sizes_calc---of 11
ksymsclose---of 3
ksymsioctl---of 41
ksymsmmap---of 12
ksymsopen4%of 53
ksymsread---of 11
ksymsseek---of 9
ksymsstat---of 1
-----------
SUMMARY4%of 53

-----------
SUMMARY---of 0

uxrcom_attach---of 3
uxrcom_detach---of 1
uxrcom_match40%of 5
uxrcom_param---of 20
uxrcom_set---of 7
-----------
SUMMARY40%of 5

kmem_alloc58%of 7
kmem_asprintf---of 3
kmem_create_caches.constprop.0---of 10
kmem_free60%of 5
kmem_init---of 1
kmem_intr_alloc63%of 24
kmem_intr_free66%of 23
kmem_intr_zalloc100%of 3
kmem_roundup_size100%of 1
kmem_strdupsize80%of 5
kmem_strfree100%of 2
kmem_strndup---of 7
kmem_tmpbuf_alloc---of 3
kmem_tmpbuf_free---of 2
kmem_zalloc63%of 8
-----------
SUMMARY67%of 78

joyattach---of 4
joyclose---of 1
joydetach---of 1
joyioctl---of 9
joyopen40%of 5
joyread---of 27
-----------
SUMMARY40%of 5

-----------
SUMMARY---of 0

clock_gettime192%of 24
clock_timeleft---of 5
gettimeleft73%of 18
inittimeleft100%of 8
itimerfix100%of 7
itimespecfix---of 7
timespecaddok54%of 13
timespecsubok65%of 14
ts2timo96%of 24
tshzto100%of 3
tshztoup100%of 3
tstohz100%of 1
tvhzto---of 3
tvtohz77%of 13
-----------
SUMMARY83%of 128

cgd_attach50%of 2
cgd_cipher---of 23
cgd_destroy_one_worker73%of 11
cgd_detach80%of 5
cgd_diskstart---of 14
cgd_diskstart2---of 3
cgd_dumpblocks---of 10
cgd_enqueue---of 8
cgd_ioctl_clr---of 3
cgd_iodone2---of 7
cgd_match---of 1
cgd_modcmd---of 2
cgd_process---of 6
cgd_selftest---of 20
cgdattach---of 4
cgdclose73%of 11
cgddump---of 5
cgdioctl12%of 78
cgdiodone---of 13
cgdopen54%of 26
cgdread67%of 6
cgdsize---of 5
cgdstrategy---of 5
cgdwrite67%of 6
-----------
SUMMARY36%of 145

compat_ifmediareq_post---of 19
compat_ifmediareq_pre75%of 8
ifmedia_80_fini---of 5
ifmedia_80_init---of 1
-----------
SUMMARY75%of 8

umap_modcmd---of 6
umapfs_mount33%of 28
umapfs_sysctl_setup---of 1
umapfs_unmount---of 6
-----------
SUMMARY33%of 28

_kernel_lock75%of 31
_kernel_lock_dump---of 1
_kernel_locked_p100%of 1
_kernel_unlock68%of 28
assert_sleepable67%of 12
kernel_lock_init---of 1
kernel_lock_trace_ipi---of 4
-----------
SUMMARY71%of 72

sd_diskstart61%of 33
sd_dumpblocks---of 3
sd_firstopen---of 18
sd_flush100%of 4
sd_get_parms---of 47
sd_get_parms_page4---of 18
sd_get_parms_page5---of 19
sd_interpret_sense30%of 24
sd_iosize---of 1
sd_label50%of 2
sd_lastclose---of 7
sd_mode_sense75%of 4
sd_read_capacity---of 8
sd_shutdown---of 5
sd_suspend---of 5
sd_validate_blksize---of 11
sdattach---of 20
sdclose100%of 1
sddetach---of 5
sddone50%of 6
sddump---of 5
sdioctl71%of 58
sdmatch---of 1
sdminphys50%of 6
sdopen72%of 7
sdread100%of 1
sdrestart---of 1
sdsize---of 4
sdstart100%of 1
sdstrategy50%of 6
sdwrite100%of 1
-----------
SUMMARY62%of 154

callout_ack---of 5
callout_active60%of 5
callout_destroy63%of 8
callout_expired---of 5
callout_halt74%of 15
callout_hardclock---of 13
callout_init84%of 6
callout_init_cpu---of 6
callout_invoking60%of 5
callout_pending60%of 5
callout_reset50%of 8
callout_schedule60%of 5
callout_schedule_locked72%of 14
callout_setfunc50%of 8
callout_softclock---of 35
callout_startup---of 5
callout_stop67%of 9
callout_wait12%of 25
db_show_callout---of 7
db_show_callout_bucket.constprop.0---of 6
-----------
SUMMARY54%of 113

-----------
SUMMARY---of 0

_x86_memio_map---of 5
_x86_memio_unmap---of 16
bus_space_alloc---of 10
bus_space_barrier17%of 6
bus_space_copy_region_1---of 11
bus_space_copy_region_2---of 11
bus_space_copy_region_4---of 11
bus_space_free---of 6
bus_space_is_equal---of 4
bus_space_map---of 9
bus_space_mmap100%of 3
bus_space_release---of 13
bus_space_reservation_map---of 12
bus_space_reservation_unmap---of 6
bus_space_reservation_unmap1---of 14
bus_space_reserve---of 15
bus_space_reserve_subregion---of 17
bus_space_set_multi_1---of 7
bus_space_set_multi_2---of 7
bus_space_set_multi_4---of 7
bus_space_set_region_1---of 6
bus_space_set_region_2---of 6
bus_space_set_region_4---of 5
bus_space_subregion---of 1
bus_space_tag_create---of 19
bus_space_tag_destroy---of 1
bus_space_unmap---of 7
bus_space_vaddr---of 4
x86_bus_space_init---of 1
x86_bus_space_mallocok---of 1
x86_mem_add_mapping---of 12
-----------
SUMMARY45%of 9

slurm_attach---of 1
slurm_detach---of 3
slurm_get_info---of 8
slurm_match50%of 4
slurm_search---of 2
slurm_set_info---of 4
slurm_si470x_await_stc.isra.0---of 3
-----------
SUMMARY50%of 4

do_tcpinit---of 1
ipsec4_hdrsiz_tcp---of 6
ipsec6_hdrsiz_tcp---of 6
tcp6_ctlinput10%of 22
tcp6_mtudisc---of 8
tcp6_mtudisc_callback---of 1
tcp6_notify---of 20
tcp6_quench---of 2
tcp_close41%of 42
tcp_ctlinput6%of 36
tcp_drain---of 15
tcp_drainstub---of 1
tcp_drop62%of 18
tcp_established---of 40
tcp_fasttimo---of 2
tcp_freeq---of 3
tcp_freeq.part.012%of 25
tcp_hdrsz---of 4
tcp_init---of 1
tcp_init_common---of 6
tcp_iss_secret_init---of 1
tcp_mss_from_peer---of 29
tcp_mss_to_advertise77%of 13
tcp_mtudisc---of 8
tcp_mtudisc_callback---of 1
tcp_new_iss84%of 6
tcp_new_iss150%of 6
tcp_newtcpcb89%of 9
tcp_notify---of 20
tcp_optlen100%of 2
tcp_quench---of 2
tcp_respond---of 105
tcp_rmx_rtt---of 28
tcp_statadd---of 3
tcp_statinc---of 3
tcp_tcpcb_template---of 11
tcp_template68%of 40
-----------
SUMMARY42%of 219

in6_addrscope100%of 11
in6_clearscope86%of 7
in6_getscopename---of 11
in6_setscope90%of 20
in6_setzoneid100%of 8
sa6_embedscope60%of 15
sa6_recoverscope73%of 11
scope6_addr2default---of 13
scope6_ifattach100%of 1
scope6_ifdetach100%of 1
scope6_init---of 1
sockaddr_in6_externalize---of 3
-----------
SUMMARY84%of 74

ffs_balloc23%of 307
-----------
SUMMARY23%of 307

pty__getmp---of 1
pty_allocvp---of 7
pty_getvattr---of 1
pty_makename80%of 5
-----------
SUMMARY80%of 5

nvlist_add_binary---of 7
nvlist_add_bool72%of 7
nvlist_add_bool_array---of 7
nvlist_add_null---of 7
nvlist_add_number72%of 7
nvlist_add_number_array---of 7
nvlist_add_nvlist72%of 7
nvlist_add_nvlist_array---of 7
nvlist_add_nvpair---of 10
nvlist_add_string72%of 7
nvlist_add_string_array---of 7
nvlist_add_stringf---of 8
nvlist_add_stringv---of 7
nvlist_append_bool_array---of 13
nvlist_append_number_array---of 13
nvlist_append_nvlist_array70%of 13
nvlist_append_string_array---of 13
nvlist_clone65%of 14
nvlist_create80%of 5
nvlist_destroy70%of 10
nvlist_empty---of 12
nvlist_error---of 6
nvlist_exists100%of 1
nvlist_exists_binary---of 1
nvlist_exists_bool---of 1
nvlist_exists_bool_array---of 1
nvlist_exists_null---of 1
nvlist_exists_number100%of 1
nvlist_exists_number_array---of 1
nvlist_exists_nvlist---of 1
nvlist_exists_nvlist_array---of 1
nvlist_exists_string---of 1
nvlist_exists_string_array---of 1
nvlist_exists_type---of 9
nvlist_find62%of 21
nvlist_first_nvpair---of 5
nvlist_flags58%of 7
nvlist_free---of 1
nvlist_free_binary---of 1
nvlist_free_bool---of 1
nvlist_free_bool_array---of 1
nvlist_free_null---of 1
nvlist_free_number---of 1
nvlist_free_number_array---of 1
nvlist_free_nvlist---of 1
nvlist_free_nvlist_array---of 1
nvlist_free_nvpair---of 7
nvlist_free_string---of 1
nvlist_free_string_array---of 1
nvlist_free_type---of 11
nvlist_get_array_next58%of 7
nvlist_get_array_next_nvpair---of 5
nvlist_get_binary---of 3
nvlist_get_bool---of 3
nvlist_get_bool_array---of 3
nvlist_get_number---of 3
nvlist_get_number_array---of 3
nvlist_get_nvlist---of 3
nvlist_get_nvlist_array---of 3
nvlist_get_nvpair---of 1
nvlist_get_nvpair_parent---of 5
nvlist_get_pararr---of 10
nvlist_get_parent78%of 9
nvlist_get_string---of 3
nvlist_get_string_array---of 3
nvlist_in_array60%of 5
nvlist_move_binary---of 8
nvlist_move_bool_array---of 8
nvlist_move_number_array---of 8
nvlist_move_nvlist---of 12
nvlist_move_nvlist_array---of 14
nvlist_move_nvpair67%of 12
nvlist_move_string---of 8
nvlist_move_string_array---of 11
nvlist_ndescriptors---of 1
nvlist_next---of 19
nvlist_next_nvpair60%of 10
nvlist_pack61%of 71
nvlist_pack_header58%of 7
nvlist_prev_nvpair---of 9
nvlist_remove_nvpair58%of 7
nvlist_report_missing---of 1
nvlist_set_array_next63%of 8
nvlist_set_error---of 5
nvlist_set_flags58%of 7
nvlist_set_parent60%of 5
nvlist_size57%of 50
nvlist_take_binary---of 3
nvlist_take_bool---of 3
nvlist_take_bool_array---of 3
nvlist_take_number---of 3
nvlist_take_number_array---of 3
nvlist_take_nvlist---of 3
nvlist_take_nvlist_array---of 3
nvlist_take_string---of 3
nvlist_take_string_array---of 3
nvlist_unpack11%of 47
nvlist_unpack_header34%of 12
-----------
SUMMARY55%of 357

rf_ASMCheckStatus---of 11
rf_ASMParityAdjust---of 5
rf_AllocASMHeaderListElem---of 1
rf_AllocASMList---of 4
rf_AllocAccessStripeMapHeader---of 1
rf_AllocFailedStripeStruct---of 1
rf_AllocPDAList---of 4
rf_AllocPhysDiskAddr---of 1
rf_AllocVFPListElem---of 1
rf_AllocVPListElem---of 1
rf_CheckStripeForFailures---of 11
rf_ConfigureMapModule100%of 1
rf_FreeASMHeaderListElem---of 1
rf_FreeAccessStripeMap---of 33
rf_FreeAccessStripeMapHeader---of 1
rf_FreeFailedStripeStruct---of 1
rf_FreePhysDiskAddr---of 1
rf_FreeVFPListElem---of 1
rf_FreeVPListElem---of 1
rf_MapAccess---of 22
rf_MarkFailuresInASMList---of 12
rf_PrintFullAccessStripeMap---of 20
rf_ShutdownMapModule---of 1
-----------
SUMMARY100%of 1

msdos_modcmd---of 6
msdosfs_fhtovp---of 8
msdosfs_mount25%of 41
msdosfs_mountfs---of 81
msdosfs_mountroot---of 9
msdosfs_root---of 4
msdosfs_start---of 1
msdosfs_statvfs---of 1
msdosfs_sync---of 11
msdosfs_sync_selector---of 9
msdosfs_sysctl_setup---of 1
msdosfs_unmount---of 7
msdosfs_vget---of 1
msdosfs_vptofh---of 5
update_mp---of 19
-----------
SUMMARY25%of 41

ata_addref---of 5
ata_channel_attach---of 6
ata_channel_detach---of 2
ata_channel_freeze---of 3
ata_channel_thaw_locked---of 6
ata_deactivate_xfer---of 24
ata_delay---of 5
ata_delref---of 4
ata_dmaerr---of 7
ata_exec_xfer---of 16
ata_free_xfer---of 8
ata_get_params---of 32
ata_get_xfer---of 2
ata_is_thread_run---of 6
ata_kill_active---of 4
ata_kill_pending---of 16
ata_print_modes---of 30
ata_probe_caps---of 95
ata_set_mode---of 7
ata_thread_run---of 39
ata_timo_xfer_check---of 6
ata_wait_cmd---of 5
ata_waitdrain_xfer_check---of 3
ata_xfer_start---of 17
atabus_alloc_drives---of 10
atabus_attach---of 9
atabus_childdetached---of 21
atabus_detach---of 29
atabus_free_drives---of 15
atabus_init---of 1
atabus_match---of 6
atabus_rescan---of 13
atabus_resume---of 5
atabus_suspend---of 6
atabus_thread---of 43
atabusclose---of 1
atabusconfig---of 29
atabusconfig_thread---of 44
atabusioctl---of 19
atabusopen40%of 5
atabusprint---of 3
atacmd_toncq---of 9
ataprint---of 3
atastart---of 74
-----------
SUMMARY40%of 5

chacha_core_sse2_impl---of 1
chacha_probe_sse2---of 1
chacha_stream_sse2_impl100%of 1
chacha_stream_xor_sse2_impl---of 1
hchacha_sse2_impl---of 1
xchacha_stream_sse2_impl---of 1
xchacha_stream_xor_sse2_impl---of 1
-----------
SUMMARY100%of 1

secmodel_adjust_behavior---of 8
secmodel_deregister---of 12
secmodel_eval---of 5
secmodel_init---of 1
secmodel_lookup---of 6
secmodel_nsecmodels100%of 1
secmodel_register---of 11
secmodel_setinfo---of 1
-----------
SUMMARY100%of 1

fontmatchfunc---of 7
wsfontattach---of 1
wsfontclose---of 1
wsfontioctl17%of 12
wsfontopen100%of 4
-----------
SUMMARY38%of 16

sys_nomodule---of 1
syscall_disestablish---of 19
syscall_establish---of 11
trace_enter63%of 8
trace_exit63%of 8
trace_is_enabled100%of 4
-----------
SUMMARY70%of 20

devmon_insert37%of 19
drvctl_close23%of 18
drvctl_command_get_properties---of 8
drvctl_fini---of 1
drvctl_init---of 1
drvctl_ioctl46%of 71
drvctl_modcmd---of 13
drvctl_poll60%of 5
drvctl_read100%of 1
drvctl_stat100%of 1
drvctl_write100%of 1
drvctlopen100%of 3
rescanbus30%of 20
-----------
SUMMARY42%of 139

_if_byindex---of 4
_if_down61%of 23
doifioctl49%of 131
if_acquire67%of 3
if_activate_sadl---of 17
if_addr_init---of 8
if_alloc---of 1
if_alloc_sadl67%of 3
if_attach100%of 1
if_attachdomain---of 11
if_attachdomain1100%of 5
if_byindex100%of 5
if_clone_attach---of 6
if_clone_detach---of 8
if_clone_list88%of 8
if_clone_lookup34%of 27
if_deactivate---of 3
if_deferred_start_common---of 4
if_deferred_start_init---of 3
if_deferred_start_softint---of 1
if_delroute_matcher100%of 1
if_detach47%of 88
if_dl_create100%of 1
if_do_dad---of 4
if_domain_link_state_change---of 5
if_down67%of 3
if_down_locked---of 3
if_export_if_data100%of 1
if_flags_set---of 15
if_free---of 1
if_free_sadl54%of 13
if_get62%of 18
if_get_byindex100%of 5
if_get_bylla---of 13
if_held---of 1
if_init---of 3
if_initialize35%of 26
if_initname100%of 1
if_input---of 9
if_ioctl50%of 4
if_is_deactivated100%of 1
if_link_state_change39%of 21
if_link_state_change_work---of 26
if_linkstate_change_disestablish---of 1
if_linkstate_change_establish---of 1
if_listener_cb75%of 4
if_mcast_op67%of 3
if_nullinit---of 1
if_nullioctl---of 1
if_nulloutput---of 1
if_nulltransmit---of 1
if_percpuq_create86%of 7
if_percpuq_destroy---of 2
if_percpuq_drops---of 1
if_percpuq_enqueue---of 9
if_percpuq_init_ifq100%of 1
if_percpuq_purge_ifq---of 4
if_percpuq_softint---of 10
if_purgeaddrs20%of 15
if_put100%of 2
if_register49%of 60
if_schedule_deferred_start---of 8
if_sdl_sysctl---of 14
if_set_sadl---of 4
if_slowtimo_intr---of 10
if_slowtimo_work---of 5
if_stop---of 3
if_transmit75%of 12
if_transmit_lock---of 1
if_tunnel_alloc_ro_percpu---of 1
if_tunnel_check_nesting---of 7
if_tunnel_free_ro_percpu---of 1
if_tunnel_ro_fini_pc---of 1
if_tunnel_ro_init_pc---of 1
if_tunnel_ro_percpu_rtcache_free---of 1
if_tunnel_rtcache_free_pc---of 1
if_up---of 1
if_up_locked---of 10
ifa_acquire100%of 1
ifa_held---of 1
ifa_ifwithaddr81%of 26
ifa_ifwithaddr_psref---of 3
ifa_ifwithaf---of 19
ifa_ifwithdstaddr70%of 23
ifa_ifwithdstaddr_psref---of 3
ifa_ifwithladdr---of 4
ifa_ifwithladdr_psref100%of 5
ifa_ifwithnet53%of 48
ifa_ifwithnet_psref---of 3
ifa_insert21%of 44
ifa_is_destroying100%of 1
ifa_psref_init---of 1
ifa_release100%of 2
ifa_remove45%of 29
ifaddrpref_ioctl---of 25
ifafree67%of 6
ifaof_ifpforaddr73%of 22
ifaof_ifpforaddr_psref100%of 3
ifaref---of 1
ifinit---of 1
ifinit1---of 7
ifinit_post---of 3
ifioctl_common---of 70
ifpromisc---of 1
ifpromisc_locked---of 10
ifq_enqueue100%of 1
ifq_enqueue2---of 15
ifreq_setaddr70%of 10
ifunit---of 19
link_rtrequest---of 10
p2p_rtrequest---of 27
sysctl_if_watchdog---of 7
sysctl_net_ip6_pktq_drops---of 1
sysctl_net_ip6_pktq_items---of 1
sysctl_net_ip6_pktq_maxlen---of 1
sysctl_net_ip_pktq_drops---of 1
sysctl_net_ip_pktq_items---of 1
sysctl_net_ip_pktq_maxlen---of 1
sysctl_net_pktq_setup.constprop.0---of 3
sysctl_percpuq_drops_handler---of 3
-----------
SUMMARY53%of 714

in6_print62%of 67
sin6_print---of 4
-----------
SUMMARY62%of 67

-----------
SUMMARY---of 0

playtone---of 13
spkr_attach100%of 3
spkr_childdet---of 3
spkr_detach72%of 7
spkr_modcmd---of 2
spkr_rescan---of 3
spkrclose---of 4
spkrioctl---of 21
spkropen50%of 4
spkrwrite---of 102
-----------
SUMMARY72%of 14

strlcat82%of 16
-----------
SUMMARY82%of 16

do_udpinit---of 1
sysctl_net_inet_udp_stats---of 1
udp4_input_checksum.part.0---of 14
udp4_sendup---of 26
udp_abort_wrapper---of 3
udp_accept_wrapper---of 3
udp_attach_wrapper67%of 9
udp_bind_wrapper45%of 9
udp_connect2_wrapper67%of 3
udp_connect_wrapper60%of 10
udp_ctlinput20%of 10
udp_ctloutput59%of 12
udp_detach_wrapper60%of 5
udp_disconnect_wrapper50%of 6
udp_init---of 2
udp_init_common---of 2
udp_input---of 79
udp_input_checksum---of 6
udp_ioctl_wrapper100%of 1
udp_listen_wrapper67%of 3
udp_notify---of 12
udp_output87%of 30
udp_peeraddr_wrapper50%of 8
udp_purgeif_wrapper100%of 1
udp_rcvd_wrapper---of 3
udp_recvoob_wrapper67%of 3
udp_send78%of 18
udp_send_wrapper100%of 1
udp_sendoob_wrapper67%of 3
udp_shutdown_wrapper67%of 3
udp_sockaddr_wrapper50%of 8
udp_stat_wrapper67%of 3
udp_statinc---of 3
-----------
SUMMARY65%of 146

in6_losing---of 13
in6_pcballoc79%of 23
in6_pcbbind70%of 33
in6_pcbbind_port69%of 44
in6_pcbconnect72%of 69
in6_pcbdetach67%of 27
in6_pcbdisconnect---of 6
in6_pcbinit---of 2
in6_pcblookup_bind---of 53
in6_pcblookup_connect38%of 24
in6_pcblookup_port25%of 84
in6_pcbnotify---of 57
in6_pcbpurgeif63%of 8
in6_pcbpurgeif035%of 32
in6_pcbrtentry43%of 26
in6_pcbrtentry_unref100%of 1
in6_pcbstate100%of 2
in6_pcbstate.part.061%of 23
in6_rtchange---of 2
in6_setpeeraddr100%of 2
in6_setsockaddr100%of 2
in6pcb_poolinit---of 1
-----------
SUMMARY54%of 400

chkdq67%of 6
chkiq67%of 6
dqdone---of 1
dqflush---of 7
dqget---of 44
dqinit---of 1
dqref---of 4
dqreinit---of 17
dqrele---of 2
dqrele.part.0---of 18
getinoquota---of 11
qsync---of 5
quota_handle_cmd11%of 59
ufsquota_free60%of 5
ufsquota_init100%of 1
-----------
SUMMARY24%of 77

allocbuf42%of 56
bawrite60%of 5
bbusy67%of 18
bdwrite62%of 34
binvalbuf---of 7
bio_doread75%of 16
biodone43%of 7
biodone260%of 15
biointr---of 15
biowait70%of 10
bread100%of 6
breadn60%of 10
brelse100%of 1
brelsel43%of 96
bremfree55%of 24
buf_destroy100%of 1
buf_drain58%of 7
buf_init100%of 1
buf_memcalc---of 14
buf_nbuf---of 1
buf_setvalimit---of 4
buf_trim---of 14
bufhash_stats---of 10
bufinit---of 16
bufinit2---of 2
bufpool_page_alloc100%of 2
bufpool_page_free---of 1
bwrite76%of 29
getblk50%of 38
geteblk---of 10
getiobuf100%of 5
getnewbuf12%of 62
incore80%of 10
nestiobuf_done100%of 2
nestiobuf_done.part.067%of 9
nestiobuf_iodone---of 9
nestiobuf_setup80%of 5
putiobuf100%of 1
sysctl_bufvm_update---of 22
sysctl_dobuf---of 30
vfs_bufstats---of 9
vn_bwrite100%of 1
-----------
SUMMARY51%of 471

vhci_activate---of 4
vhci_allocx100%of 1
vhci_attach---of 5
vhci_device_ctrl_abort---of 13
vhci_device_ctrl_start74%of 38
vhci_device_ctrl_transfer100%of 1
vhci_fd_close67%of 3
vhci_fd_ioctl67%of 18
vhci_fd_open100%of 4
vhci_fd_read68%of 28
vhci_fd_write70%of 30
vhci_freex60%of 5
vhci_get_lock---of 1
vhci_match---of 1
vhci_open63%of 8
vhci_pkt_destroy.constprop.070%of 30
vhci_root_intr_abort---of 10
vhci_root_intr_close---of 4
vhci_root_intr_done58%of 7
vhci_root_intr_start75%of 8
vhci_root_intr_transfer---of 8
vhci_roothub_ctrl58%of 21
vhci_usb_detach54%of 45
vhciattach---of 3
-----------
SUMMARY66%of 247

-----------
SUMMARY---of 0

sysctl_hw_uhso_setup---of 4
uhso_attach---of 71
uhso_bulk_abort---of 1
uhso_bulk_clean---of 17
uhso_bulk_control---of 7
uhso_bulk_detach---of 1
uhso_bulk_init---of 8
uhso_bulk_intr---of 16
uhso_bulk_read---of 3
uhso_bulk_write---of 3
uhso_detach---of 11
uhso_get_endpoint---of 6
uhso_ifnet_abort---of 1
uhso_ifnet_detach---of 1
uhso_ifnet_init---of 5
uhso_ifnet_ioctl---of 14
uhso_ifnet_output---of 5
uhso_ifnet_read_cb---of 37
uhso_ifnet_start---of 16
uhso_ifnet_write_cb---of 11
uhso_match60%of 5
uhso_mux_abort---of 3
uhso_mux_clean---of 1
uhso_mux_control---of 1
uhso_mux_detach---of 3
uhso_mux_init---of 4
uhso_mux_intr---of 11
uhso_mux_read---of 6
uhso_mux_write---of 3
uhso_tty_clean---of 7
uhso_tty_close---of 6
uhso_tty_ioctl---of 19
uhso_tty_open---of 23
uhso_tty_param---of 9
uhso_tty_poll---of 4
uhso_tty_read---of 4
uhso_tty_read_cb---of 13
uhso_tty_start---of 6
uhso_tty_tty---of 1
uhso_tty_write---of 4
uhso_tty_write_cb---of 10
-----------
SUMMARY60%of 5

-----------
SUMMARY---of 0

ipi_broadcast---of 20
ipi_cpu_handler---of 8
ipi_mark_pending67%of 9
ipi_msg_cpu_handler---of 10
ipi_multicast---of 21
ipi_percpu_init---of 4
ipi_register---of 7
ipi_sysinit---of 1
ipi_trigger75%of 4
ipi_trigger_broadcast---of 1
ipi_trigger_multi---of 1
ipi_trigger_multi_internal---of 16
ipi_unicast---of 17
ipi_unregister---of 7
ipi_wait---of 11
-----------
SUMMARY70%of 13

_mountlist_iterator_next59%of 39
_mountlist_next---of 11
dounmount79%of 28
makefstype---of 3
mount_domount77%of 69
mount_finispecific---of 1
mount_getspecific100%of 1
mount_initspecific---of 2
mount_setspecific---of 1
mount_specific_key_create---of 1
mount_specific_key_delete---of 1
mountlist_append67%of 3
mountlist_iterator_destroy77%of 13
mountlist_iterator_init67%of 6
mountlist_iterator_next100%of 1
mountlist_iterator_trynext---of 1
mountlist_remove75%of 16
rawdev_mounted100%of 14
vflush58%of 35
vfs_busy58%of 7
vfs_getnewfsid90%of 10
vfs_getvfs100%of 6
vfs_insmntque68%of 25
vfs_mount_sysinit---of 1
vfs_mountalloc60%of 5
vfs_mountedon100%of 6
vfs_mountroot---of 40
vfs_ref50%of 4
vfs_rele100%of 2
vfs_rele.part.080%of 5
vfs_rootmountalloc---of 12
vfs_shutdown---of 5
vfs_sync_all---of 3
vfs_trybusy---of 8
vfs_unbusy50%of 4
vfs_unmount_forceone---of 7
vfs_unmount_next---of 10
vfs_unmountall---of 1
vfs_unmountall1---of 14
vfs_vnode_iterator_destroy20%of 15
vfs_vnode_iterator_init67%of 6
vfs_vnode_iterator_next100%of 1
vfs_vnode_iterator_next175%of 31
-----------
SUMMARY71%of 352

-----------
SUMMARY---of 0

uao_create49%of 29
uao_detach70%of 26
uao_dropswap50%of 4
uao_dropswap_range19%of 43
uao_find_swhash_elt.isra.025%of 12
uao_find_swslot75%of 8
uao_get61%of 64
uao_init---of 2
uao_pagein_page---of 5
uao_put66%of 23
uao_reference100%of 2
uao_set_pgfl---of 5
uao_set_swslot30%of 27
uao_swap_off---of 26
-----------
SUMMARY49%of 238

wsdisplay_addscreen---of 18
wsdisplay_addscreen_print---of 5
wsdisplay_cfg_ioctl---of 23
wsdisplay_closescreen---of 7
wsdisplay_cnattach---of 17
wsdisplay_cndetach---of 3
wsdisplay_cnputc---of 7
wsdisplay_common_attach---of 21
wsdisplay_delscreen---of 18
wsdisplay_dosync---of 15
wsdisplay_emul_attach---of 5
wsdisplay_emul_detach---of 6
wsdisplay_emul_match---of 6
wsdisplay_emulbell---of 7
wsdisplay_emulinput---of 8
wsdisplay_getactivescreen---of 3
wsdisplay_getc---of 3
wsdisplay_handlex---of 5
wsdisplay_internal_ioctl---of 42
wsdisplay_isconsole---of 1
wsdisplay_kbdholdscreen---of 4
wsdisplay_kbdinput---of 14
wsdisplay_maxscreenidx---of 1
wsdisplay_noemul_attach---of 1
wsdisplay_noemul_match---of 1
wsdisplay_param---of 1
wsdisplay_pollc---of 5
wsdisplay_preattach---of 17
wsdisplay_reset---of 7
wsdisplay_screenstate---of 4
wsdisplay_screentype_pick---of 8
wsdisplay_scroll---of 9
wsdisplay_set_cons_kbd---of 1
wsdisplay_set_console_kbd---of 4
wsdisplay_stat_inject---of 4
wsdisplay_stat_ioctl---of 5
wsdisplay_suspend---of 8
wsdisplay_swdone_cb---of 5
wsdisplay_switch---of 23
wsdisplay_switch1---of 18
wsdisplay_switch1_cb---of 1
wsdisplay_switch2---of 16
wsdisplay_switch2_cb---of 1
wsdisplay_switch3---of 15
wsdisplay_switch3_cb---of 1
wsdisplay_unset_cons_kbd---of 1
wsdisplay_update_rawkbd---of 11
wsdisplayclose---of 19
wsdisplaydevprint---of 3
wsdisplayioctl---of 15
wsdisplaykqfilter---of 5
wsdisplaymmap---of 5
wsdisplayopen14%of 15
wsdisplayparam---of 1
wsdisplaypoll---of 7
wsdisplayread---of 7
wsdisplaystart---of 19
wsdisplaystop---of 4
wsdisplaytty---of 7
wsdisplaywrite---of 5
wsemuldisplaydevprint---of 3
wsscreen_attach_sync---of 5
wsscreen_detach_sync---of 6
wsscreen_lookup_sync---of 4
wsscreen_switchwait---of 12
-----------
SUMMARY14%of 15

tcp_output60%of 473
tcp_setpersist84%of 6
-----------
SUMMARY60%of 479

compat_80_modstat52%of 27
kern_mod_80_fini---of 3
kern_mod_80_init---of 1
-----------
SUMMARY52%of 27

sm_init_once---of 1
sysmon_attach_minor---of 6
sysmon_fini---of 4
sysmon_init---of 5
sysmon_modcmd---of 10
sysmonclose84%of 6
sysmonioctl100%of 4
sysmonkqfilter---of 4
sysmonopen75%of 8
sysmonpoll50%of 4
sysmonread50%of 4
-----------
SUMMARY74%of 26

ffs_indirtrunc60%of 70
ffs_itimes83%of 34
ffs_truncate67%of 224
ffs_update73%of 40
-----------
SUMMARY68%of 368

percpu_alloc100%of 1
percpu_backend_alloc---of 14
percpu_cpu_swap---of 6
percpu_create29%of 21
percpu_foreach80%of 5
percpu_foreach_xcall100%of 3
percpu_free21%of 24
percpu_getptr_remote67%of 3
percpu_getref67%of 3
percpu_init---of 1
percpu_init_cpu---of 14
percpu_putref100%of 1
percpu_traverse_enter---of 1
percpu_traverse_exit---of 1
percpu_xcfunc---of 3
-----------
SUMMARY40%of 61

compat_30_sys___fhstat3067%of 3
compat_30_sys___fstat13100%of 3
compat_30_sys___lstat13100%of 3
compat_30_sys___stat13100%of 3
compat_30_sys_fhopen100%of 1
compat_30_sys_fhstat67%of 3
compat_30_sys_fhstatvfs167%of 3
compat_30_sys_getdents80%of 29
compat_30_sys_getfh100%of 7
cvtstat100%of 1
vfs_syscalls_30_fini---of 1
vfs_syscalls_30_init---of 1
-----------
SUMMARY84%of 56

drm_dev_alloc---of 6
drm_dev_enter---of 3
drm_dev_exit---of 1
drm_dev_fini---of 6
drm_dev_get---of 3
drm_dev_init---of 21
drm_dev_put---of 2
drm_dev_put.part.0---of 12
drm_dev_register---of 23
drm_dev_set_unique---of 9
drm_dev_unplug---of 1
drm_dev_unregister---of 14
drm_minor_acquire30%of 10
drm_minor_alloc---of 9
drm_minor_free---of 6
drm_minor_get_slot.part.0---of 1
drm_minor_register---of 7
drm_minor_release---of 2
drm_put_dev---of 3
kasprintf.constprop.0---of 1
kvasprintf.constprop.0---of 6
-----------
SUMMARY30%of 10

uplcom_attach---of 46
uplcom_childdet---of 3
uplcom_close---of 5
uplcom_detach---of 7
uplcom_get_status---of 2
uplcom_intr---of 14
uplcom_match100%of 2
uplcom_open---of 18
uplcom_param---of 28
uplcom_set---of 10
uplcom_set_line_state---of 8
-----------
SUMMARY100%of 2

uipaq_attach---of 19
uipaq_childdet---of 3
uipaq_detach---of 5
uipaq_match100%of 2
uipaq_open---of 2
uipaq_set---of 24
-----------
SUMMARY100%of 2

ufs_done---of 2
ufs_fhtovp---of 12
ufs_init---of 2
ufs_modcmd---of 8
ufs_quotactl100%of 3
ufs_reinit---of 1
ufs_root---of 3
ufs_start---of 1
ufs_vget---of 4
-----------
SUMMARY100%of 3

irmce_activate---of 1
irmce_attach---of 30
irmce_childdet---of 2
irmce_close---of 5
irmce_detach---of 16
irmce_match50%of 4
irmce_modcmd---of 2
irmce_open---of 9
irmce_print---of 3
irmce_read---of 85
irmce_rescan---of 3
irmce_setparams---of 4
irmce_write---of 1
-----------
SUMMARY50%of 4

compat_20_sys_fhstatfs---of 8
compat_20_sys_fstatfs100%of 5
compat_20_sys_getfsstat100%of 1
compat_20_sys_statfs100%of 5
statvfs_to_statfs12_copy100%of 19
vfs_syscalls_20_fini---of 1
vfs_syscalls_20_init---of 1
-----------
SUMMARY100%of 30

ccd_60_fini---of 3
ccd_60_init---of 1
compat_60_ccdioctl100%of 1
-----------
SUMMARY100%of 1

mutex_obj_alloc100%of 1
mutex_obj_ctor100%of 1
mutex_obj_free72%of 7
mutex_obj_hold60%of 5
mutex_obj_init---of 1
mutex_obj_refcnt---of 1
mutex_obj_tryalloc---of 3
-----------
SUMMARY72%of 14

compat_50_sys___ntp_gettime30---of 5
compat_50_sys_adjtime---of 12
compat_50_sys_aio_suspend---of 10
compat_50_sys_clock_getres100%of 4
compat_50_sys_clock_gettime100%of 3
compat_50_sys_clock_settime100%of 3
compat_50_sys_getitimer100%of 3
compat_50_sys_getrusage100%of 3
compat_50_sys_gettimeofday---of 5
compat_50_sys_mq_timedreceive---of 6
compat_50_sys_mq_timedsend---of 5
compat_50_sys_nanosleep100%of 6
compat_50_sys_setitimer100%of 7
compat_50_sys_settimeofday---of 3
compat_50_sys_timer_gettime---of 3
compat_50_sys_timer_settime---of 6
compat_sysctl_time---of 1
kern_time_50_fini---of 1
kern_time_50_init---of 1
rusage_to_rusage50100%of 1
-----------
SUMMARY100%of 30

lpt_attach_subr---of 1
lpt_detach_subr---of 1
lptclose---of 8
lptintr---of 7
lptioctl---of 1
lptnotready---of 8
lptopen9%of 23
lptpushbytes---of 17
lptsoftintr---of 1
lptwakeup---of 1
lptwrite---of 5
-----------
SUMMARY9%of 23

_uvm_map_check.constprop.040%of 5
_uvm_map_sanity---of 12
_uvm_mapent_check75%of 8
_uvm_tree_sanity---of 37
sysctl_user_va0_disable---of 8
sysctl_uvmmap_setup---of 1
sysctl_vmproc---of 62
uvm_findspace_invariants60%of 5
uvm_map84%of 18
uvm_map_advice93%of 14
uvm_map_checkprot---of 8
uvm_map_clean89%of 42
uvm_map_clip_end100%of 3
uvm_map_clip_start100%of 3
uvm_map_compare_key---of 3
uvm_map_compare_nodes80%of 10
uvm_map_enter82%of 119
uvm_map_extract53%of 119
uvm_map_findspace78%of 127
uvm_map_inherit100%of 13
uvm_map_init---of 1
uvm_map_init_caches---of 1
uvm_map_lock_entry100%of 4
uvm_map_lookup_entry80%of 24
uvm_map_pageable92%of 81
uvm_map_pageable_all71%of 55
uvm_map_prepare82%of 48
uvm_map_printit---of 7
uvm_map_protect94%of 47
uvm_map_protect_user100%of 3
uvm_map_reference---of 1
uvm_map_replace52%of 39
uvm_map_reserve---of 1
uvm_map_setup---of 1
uvm_map_space_avail.constprop.076%of 25
uvm_map_submap---of 16
uvm_map_unlock_entry100%of 4
uvm_map_willneed90%of 19
uvm_mapent_clone100%of 8
uvm_mapent_splitadj72%of 14
uvm_mapent_trymerge79%of 73
uvm_rb_fixup76%of 25
uvm_rb_insert70%of 10
uvm_rb_remove95%of 20
uvm_unmap186%of 7
uvm_unmap_detach82%of 11
uvm_unmap_remove80%of 53
uvm_voaddr_acquire---of 42
uvm_voaddr_compare---of 11
uvm_voaddr_release---of 15
uvm_whatis---of 10
uvmspace_addref60%of 5
uvmspace_alloc100%of 2
uvmspace_exec---of 14
uvmspace_fork90%of 46
uvmspace_free20%of 15
uvmspace_init---of 5
uvmspace_share60%of 5
uvmspace_spawn---of 1
vm_map_busy60%of 5
vm_map_lock100%of 7
vm_map_lock_read100%of 1
vm_map_lock_try---of 5
vm_map_locked_p---of 1
vm_map_unbusy---of 3
vm_map_unlock67%of 6
vm_map_unlock_read100%of 1
-----------
SUMMARY78%of 1149

change_owner89%of 17
change_root100%of 5
chdir_lookup100%of 9
do_fhstat56%of 9
do_fhstatvfs56%of 9
do_open100%of 14
do_posix_mknodat---of 5
do_sys_accessat100%of 24
do_sys_chdir---of 3
do_sys_chmodat100%of 11
do_sys_chownat100%of 11
do_sys_fchdir67%of 15
do_sys_fstatvfs100%of 3
do_sys_getvfsstat95%of 19
do_sys_linkat100%of 28
do_sys_mkdir---of 1
do_sys_mkdirat79%of 23
do_sys_mkfifoat100%of 17
do_sys_mknod100%of 1
do_sys_mknodat94%of 30
do_sys_mount51%of 89
do_sys_openat87%of 15
do_sys_pstatvfs100%of 3
do_sys_quotactl---of 48
do_sys_quotactl_cursorget---of 15
do_sys_readlinkat89%of 17
do_sys_rename---of 1
do_sys_renameat78%of 99
do_sys_stat100%of 1
do_sys_statat88%of 16
do_sys_symlink---of 1
do_sys_symlinkat82%of 27
do_sys_sync45%of 9
do_sys_unlink---of 1
do_sys_unlinkat90%of 28
do_sys_utimens---of 1
do_sys_utimensat93%of 41
do_sys_utimes94%of 15
dofhopen58%of 26
dorevoke---of 4
dostatvfs78%of 22
fd_open---of 4
filt_fs---of 6
filt_fsattach---of 1
filt_fsdetach---of 1
kern_pathconf100%of 5
open_setfp100%of 9
sync_vnode_filter---of 3
sys___fhopen40100%of 1
sys___fhstat5067%of 3
sys___fhstatvfs19067%of 3
sys___fstatvfs190---of 5
sys___futimes50100%of 3
sys___getdents30100%of 7
sys___getfh30100%of 11
sys___getvfsstat90100%of 1
sys___lstat50100%of 3
sys___lutimes50100%of 1
sys___mknod50100%of 5
sys___mount50100%of 1
sys___posix_chown100%of 3
sys___posix_fchown67%of 3
sys___posix_lchown67%of 3
sys___posix_rename100%of 1
sys___quotactl---of 3
sys___stat50100%of 3
sys___statvfs190---of 5
sys___utimes50100%of 1
sys_access100%of 1
sys_chdir100%of 3
sys_chflags100%of 3
sys_chmod100%of 3
sys_chown100%of 3
sys_chroot100%of 4
sys_faccessat100%of 1
sys_fchdir100%of 1
sys_fchflags100%of 3
sys_fchmod100%of 3
sys_fchmodat100%of 1
sys_fchown100%of 3
sys_fchownat100%of 1
sys_fchroot100%of 8
sys_fdatasync100%of 3
sys_fdiscard---of 10
sys_fstatat100%of 4
sys_fsync100%of 3
sys_fsync_range---of 16
sys_ftruncate100%of 9
sys_futimens---of 3
sys_lchflags100%of 3
sys_lchmod100%of 3
sys_lchown100%of 3
sys_link100%of 1
sys_linkat100%of 1
sys_lpathconf---of 1
sys_lseek100%of 6
sys_mkdir100%of 1
sys_mkdirat100%of 1
sys_mkfifo---of 1
sys_mkfifoat---of 1
sys_mknodat100%of 5
sys_open100%of 3
sys_openat100%of 3
sys_pathconf100%of 1
sys_posix_fallocate---of 12
sys_pread100%of 8
sys_preadv100%of 1
sys_pwrite100%of 8
sys_pwritev100%of 1
sys_readlink100%of 1
sys_readlinkat100%of 1
sys_rename100%of 1
sys_renameat100%of 1
sys_revoke---of 3
sys_rmdir100%of 1
sys_symlink100%of 1
sys_symlinkat100%of 1
sys_sync100%of 1
sys_truncate100%of 9
sys_umask100%of 1
sys_undelete94%of 15
sys_unlink100%of 1
sys_unlinkat100%of 1
sys_unmount93%of 13
sys_utimensat100%of 2
vfs_composefh86%of 7
vfs_composefh_alloc70%of 10
vfs_composefh_free---of 1
vfs_copyinfh_alloc100%of 9
vfs_copyinfh_free---of 1
vfs_evfilt_fs_init---of 1
vfs_fhtovp---of 4
vfs_syncwait---of 19
-----------
SUMMARY85%of 902

handle_modctl_load93%of 14
sys_modctl64%of 52
-----------
SUMMARY70%of 66

if_mue_modcmd---of 2
mue_attach---of 110
mue_csr_read---of 5
mue_csr_write.isra.0---of 4
mue_eeprom_getbyte---of 8
mue_eeprom_present---of 13
mue_match100%of 2
mue_read_otp---of 6
mue_read_otp_raw---of 7
mue_sethwcsum_locked---of 10
mue_setmtu_locked---of 3
mue_uno_init---of 3
mue_uno_ioctl---of 5
mue_uno_mcast---of 29
mue_uno_mii_read_reg---of 8
mue_uno_mii_statchg---of 26
mue_uno_mii_write_reg---of 6
mue_uno_rx_loop---of 29
mue_uno_stop---of 2
mue_uno_tx_prepare---of 30
mue_wait_for_bits---of 6
-----------
SUMMARY100%of 2

lookup_crossmount.isra.081%of 26
lookup_for_nfsd---of 8
lookup_for_nfsd_index---of 38
lookup_once91%of 44
lookup_parsepath95%of 20
namei67%of 6
namei_cleanup72%of 7
namei_hash---of 10
namei_init58%of 7
namei_simple_kernel100%of 1
namei_simple_user100%of 1
namei_tryemulroot43%of 448
nameiat_simple_kernel75%of 12
nameiat_simple_user93%of 13
pathbuf_assimilate100%of 1
pathbuf_copyin100%of 5
pathbuf_copystring100%of 1
pathbuf_create75%of 4
pathbuf_create_raw75%of 4
pathbuf_destroy60%of 5
pathbuf_maybe_copyin86%of 7
pathbuf_stringcopy_get100%of 3
pathbuf_stringcopy_put67%of 6
relookup47%of 39
-----------
SUMMARY54%of 660

dead_bmap---of 1
dead_default_error---of 1
dead_getpages---of 4
dead_inactive---of 1
dead_ioctl---of 1
dead_link---of 1
dead_lookup100%of 1
dead_open---of 1
dead_poll---of 1
dead_print---of 1
dead_putpages---of 1
dead_read100%of 2
dead_remove---of 1
dead_rename---of 7
dead_rmdir---of 1
dead_strategy---of 1
dead_write---of 1
-----------
SUMMARY100%of 3

-----------
SUMMARY---of 0

athn_usb_abort_pipes---of 8
athn_usb_activate---of 4
athn_usb_attach---of 57
athn_usb_attachhook---of 57
athn_usb_bcneof---of 3
athn_usb_close_pipes---of 10
athn_usb_create_node.isra.0---of 7
athn_usb_detach---of 24
athn_usb_do_async.part.0---of 3
athn_usb_htc_connect_svc---of 12
athn_usb_htc_msg.part.0---of 1
athn_usb_init---of 1
athn_usb_init_locked---of 37
athn_usb_intr---of 44
athn_usb_ioctl---of 18
athn_usb_match100%of 2
athn_usb_media_change---of 5
athn_usb_newassoc---of 5
athn_usb_newassoc_cb---of 3
athn_usb_newstate---of 3
athn_usb_newstate_cb---of 20
athn_usb_node_cleanup---of 5
athn_usb_node_cleanup_cb---of 3
athn_usb_read---of 8
athn_usb_rx_enable---of 1
athn_usb_rx_frame---of 46
athn_usb_rxeof---of 34
athn_usb_start---of 62
athn_usb_stop---of 1
athn_usb_stop_locked---of 12
athn_usb_switch_chan.constprop.0.isra.0---of 21
athn_usb_task---of 5
athn_usb_txeof---of 10
athn_usb_updateslot---of 3
athn_usb_updateslot_cb---of 1
athn_usb_wait_msg---of 6
athn_usb_watchdog---of 5
athn_usb_wmi_xcmd.part.0---of 12
athn_usb_wmieof---of 2
athn_usb_write---of 3
athn_usb_write_barrier---of 4
if_athn_usb_modcmd---of 2
-----------
SUMMARY100%of 2

in6_cksum77%of 21
-----------
SUMMARY77%of 21

set_dte_rate---of 12
set_dtrrts---of 9
set_line_control---of 12
uchcom_attach---of 29
uchcom_childdet---of 3
uchcom_close---of 5
uchcom_detach---of 7
uchcom_get_status---of 2
uchcom_intr---of 6
uchcom_match100%of 2
uchcom_open---of 15
uchcom_param---of 4
uchcom_set---of 11
-----------
SUMMARY100%of 2

idr_cache_dtor---of 5
idr_cache_warning---of 6
idr_tree_compare_key---of 4
idr_tree_compare_nodes---of 4
linux_idr_alloc---of 20
linux_idr_destroy---of 3
linux_idr_find67%of 3
linux_idr_for_each---of 4
linux_idr_get_next---of 3
linux_idr_init---of 1
linux_idr_init_base---of 2
linux_idr_is_empty---of 1
linux_idr_module_fini---of 1
linux_idr_module_init---of 1
linux_idr_preload---of 16
linux_idr_preload_end---of 5
linux_idr_remove---of 5
linux_idr_replace---of 4
-----------
SUMMARY67%of 3

uipad_attach---of 7
uipad_detach---of 1
uipad_match100%of 2
-----------
SUMMARY100%of 2

dk_open_parent---of 9
dkclose73%of 11
dkdiscard---of 15
dkdump---of 12
dkioctl82%of 16
dkiodone100%of 5
dklastclose---of 10
dkminphys80%of 5
dkopen33%of 31
dkread86%of 7
dkrestart---of 1
dksize72%of 7
dkstart72%of 14
dkstrategy60%of 10
dkwedge_add---of 72
dkwedge_attach---of 2
dkwedge_compute_pdev---of 9
dkwedge_del---of 4
dkwedge_delall---of 1
dkwedge_delall1---of 12
dkwedge_detach---of 26
dkwedge_discover---of 19
dkwedge_find.constprop.0---of 7
dkwedge_find_by_parent---of 7
dkwedge_find_by_wname---of 7
dkwedge_find_partition---of 9
dkwedge_get_parent_name---of 5
dkwedge_init---of 24
dkwedge_list45%of 9
dkwedge_lookup84%of 6
dkwedge_match---of 1
dkwedge_print_wnames---of 5
dkwedge_read---of 10
dkwrite86%of 7
-----------
SUMMARY65%of 128

pserialize_create100%of 1
pserialize_destroy---of 1
pserialize_in_read_section100%of 3
pserialize_init---of 1
pserialize_not_in_read_section100%of 1
pserialize_perform63%of 8
pserialize_read_enter100%of 1
pserialize_read_exit60%of 5
-----------
SUMMARY74%of 19

_mutex_init78%of 9
mutex_abort---of 2
mutex_destroy70%of 10
mutex_dump---of 1
mutex_enter79%of 94
mutex_exit73%of 22
mutex_init100%of 1
mutex_oncpu86%of 7
mutex_ownable75%of 4
mutex_owned86%of 7
mutex_owner67%of 3
mutex_owner_running---of 5
mutex_spin_retry---of 18
mutex_tryenter80%of 20
-----------
SUMMARY78%of 177

encap4_input---of 52
encap6_ctlinput7%of 30
encap6_input---of 7
encap6_lookup---of 44
encap_add---of 21
encap_attach---of 51
encap_attach_func---of 9
encap_detach---of 33
encap_init---of 2
encap_lock_enter---of 7
encap_lock_exit---of 3
encap_lock_held---of 1
encapinit---of 2
-----------
SUMMARY7%of 30

copystr75%of 8
-----------
SUMMARY75%of 8

-----------
SUMMARY---of 0

hashdone---of 3
hashinit76%of 25
hashstat_register---of 3
hashstat_sysctl---of 24
sysctl_hash_setup---of 1
-----------
SUMMARY76%of 25

key_abort_wrapper---of 3
key_accept_wrapper---of 3
key_attach_wrapper62%of 18
key_bind_wrapper---of 3
key_connect2_wrapper67%of 3
key_connect_wrapper---of 3
key_detach_wrapper56%of 9
key_disconnect_wrapper50%of 6
key_init_so---of 1
key_ioctl_wrapper100%of 1
key_listen_wrapper---of 3
key_output65%of 17
key_peeraddr_wrapper55%of 11
key_pr_init---of 1
key_purgeif_wrapper---of 1
key_rcvd_wrapper---of 3
key_recvoob_wrapper---of 3
key_send_wrapper60%of 5
key_sendoob_wrapper67%of 3
key_sendup0---of 31
key_sendup_mbuf---of 42
key_shutdown_wrapper67%of 3
key_sockaddr_wrapper55%of 11
key_stat_wrapper67%of 3
-----------
SUMMARY60%of 90

add_suspensor---of 20
any_suspensor---of 7
complete_suspension---of 7
device_pmf_add_suspensor---of 8
device_pmf_remove_suspensor.isra.0---of 7
input_activity_handler---of 2
input_idle---of 3
pmf_check_system_drivers---of 10
pmf_class_display_deregister---of 13
pmf_class_display_register---of 7
pmf_class_input_deregister---of 1
pmf_class_input_register---of 3
pmf_class_network_register---of 1
pmf_class_network_resume---of 9
pmf_class_network_suspend---of 1
pmf_device_deregister100%of 1
pmf_device_descendants_release---of 5
pmf_device_descendants_resume---of 8
pmf_device_recursive_resume---of 5
pmf_device_recursive_suspend---of 7
pmf_device_register175%of 4
pmf_device_resume---of 7
pmf_device_resume_locked---of 13
pmf_device_subtree_release---of 1
pmf_device_subtree_resume---of 4
pmf_device_suspend---of 9
pmf_device_suspend_locked---of 3
pmf_device_suspend_locked.part.0---of 10
pmf_event_deregister83%of 17
pmf_event_inject---of 10
pmf_event_register67%of 3
pmf_event_worker---of 11
pmf_get_platform---of 4
pmf_init---of 9
pmf_qual_recursive_copy---of 1
pmf_self_suspensor_init---of 1
pmf_set_platform---of 4
pmf_suspend_worker---of 15
pmf_system_bus_resume---of 8
pmf_system_resume---of 13
pmf_system_shutdown---of 10
pmf_system_suspend---of 14
remove_suspensor---of 14
sysctl_pmf_setup---of 1
-----------
SUMMARY80%of 25

child_return---of 5
fork163%of 139
sys___clone100%of 14
sys___vfork14---of 1
sys_fork100%of 1
sys_vfork---of 1
-----------
SUMMARY67%of 154

bufq_fcfs_cancel---of 16
bufq_fcfs_fini67%of 3
bufq_fcfs_get79%of 14
bufq_fcfs_init100%of 1
bufq_fcfs_modcmd---of 6
bufq_fcfs_put67%of 3
-----------
SUMMARY77%of 21

strncpy100%of 6
-----------
SUMMARY100%of 6

rfcomm_init---of 1
rfcomm_session_alloc59%of 12
rfcomm_session_complete---of 18
rfcomm_session_connected---of 12
rfcomm_session_disconnected---of 4
rfcomm_session_free---of 18
rfcomm_session_input---of 165
rfcomm_session_linkmode---of 21
rfcomm_session_lookup---of 23
rfcomm_session_newconn---of 4
rfcomm_session_send_frame---of 9
rfcomm_session_send_mcc---of 14
rfcomm_session_send_uih---of 36
rfcomm_session_timeout---of 8
-----------
SUMMARY59%of 12

mfs_done---of 2
mfs_init---of 2
mfs_modcmd---of 6
mfs_mount39%of 26
mfs_mountroot---of 6
mfs_reinit---of 1
mfs_start---of 16
mfs_statvfs---of 3
mfs_sysctl_setup---of 1
-----------
SUMMARY39%of 26

-----------
SUMMARY---of 0

if_kue_modcmd---of 2
kue_attach---of 37
kue_detach---of 3
kue_match100%of 2
kue_uno_init---of 1
kue_uno_mcast---of 15
kue_uno_rx_loop---of 4
kue_uno_tx_prepare---of 4
-----------
SUMMARY100%of 2

change_displayparam---of 11
wskbd_activate---of 3
wskbd_add_mux---of 7
wskbd_attach---of 23
wskbd_cnattach---of 6
wskbd_cnbell---of 3
wskbd_cndetach---of 3
wskbd_cngetc---of 14
wskbd_cnpollc---of 4
wskbd_detach---of 16
wskbd_displayioctl69%of 76
wskbd_do_ioctl59%of 12
wskbd_hotkey_deregister---of 3
wskbd_hotkey_register---of 6
wskbd_input---of 22
wskbd_match---of 6
wskbd_mux_close---of 6
wskbd_mux_open---of 7
wskbd_pickfree---of 5
wskbd_rawinput---of 4
wskbd_repeat---of 8
wskbd_set_console_display---of 3
wskbd_set_display---of 15
wskbd_set_evtrans---of 1
wskbd_suspend---of 1
wskbd_translate---of 231
wskbdclose86%of 7
wskbddevprint---of 3
wskbdioctl100%of 1
wskbdkqfilter---of 3
wskbdopen75%of 12
wskbdpoll67%of 3
wskbdread---of 6
-----------
SUMMARY70%of 111

wsmux_attach_sc---of 29
wsmux_create---of 1
wsmux_detach_sc---of 18
wsmux_do_close20%of 5
wsmux_do_displayioctl---of 10
wsmux_do_ioctl25%of 45
wsmux_do_open15%of 7
wsmux_evsrc_set_display---of 6
wsmux_getmux60%of 5
wsmux_mux_close---of 1
wsmux_mux_open---of 7
wsmux_set_display---of 14
wsmuxclose100%of 4
wsmuxioctl100%of 1
wsmuxkqfilter---of 5
wsmuxopen89%of 9
wsmuxpoll80%of 5
wsmuxread80%of 5
-----------
SUMMARY44%of 86

bufq_disksort_cancel---of 16
bufq_disksort_fini67%of 3
bufq_disksort_get15%of 14
bufq_disksort_init100%of 1
bufq_disksort_modcmd---of 6
bufq_disksort_put---of 44
-----------
SUMMARY28%of 18

veriexecattach---of 1
veriexecclose---of 3
veriexecioctl---of 35
veriexecopen84%of 6
-----------
SUMMARY84%of 6

nd6_cache_lladdr---of 71
nd6_create---of 5
nd6_free---of 17
nd6_ifattach80%of 5
nd6_ifdetach60%of 5
nd6_init---of 3
nd6_ioctl---of 28
nd6_is_addr_neighbor---of 24
nd6_llinfo_holdsrc---of 5
nd6_llinfo_missed---of 9
nd6_llinfo_output---of 1
nd6_llinfo_reachable---of 1
nd6_llinfo_release_pkts---of 13
nd6_llinfo_retrans---of 1
nd6_lookup---of 2
nd6_need_cache---of 3
nd6_nud_enabled---of 1
nd6_nud_hint---of 2
nd6_option_init---of 2
nd6_options---of 38
nd6_purge50%of 4
nd6_purge_entry---of 8
nd6_resolve---of 16
nd6_rtrequest---of 50
nd6_setifflags---of 56
nd6_slowtimo---of 11
nd6_sysctl---of 4
nd6_timer---of 1
nd6_timer_work---of 26
-----------
SUMMARY65%of 14

uep_activate---of 4
uep_attach---of 17
uep_childdet---of 3
uep_detach---of 5
uep_disable---of 7
uep_enable---of 7
uep_intr---of 11
uep_ioctl---of 6
uep_match60%of 5
-----------
SUMMARY60%of 5

radio_attach_mi---of 1
radioattach---of 1
radioclose---of 3
radiodetach---of 1
radioioctl---of 9
radioopen40%of 5
radioprint---of 3
radioprobe---of 1
-----------
SUMMARY40%of 5

ugensa_attach---of 29
ugensa_childdet---of 3
ugensa_detach---of 5
ugensa_match100%of 2
-----------
SUMMARY100%of 2

compat_raid_50_modcmd---of 8
raidframe_ioctl_508%of 27
-----------
SUMMARY8%of 27

VOP_ABORTOP86%of 7
VOP_ACCESS100%of 4
VOP_ACCESSX100%of 4
VOP_ACLCHECK---of 7
VOP_ADVLOCK100%of 4
VOP_BMAP86%of 7
VOP_BWRITE58%of 7
VOP_CLOSE50%of 10
VOP_CLOSEEXTATTR---of 4
VOP_CREATE80%of 10
VOP_DELETEEXTATTR---of 4
VOP_FALLOCATE---of 4
VOP_FCNTL100%of 4
VOP_FDISCARD---of 4
VOP_FSYNC100%of 4
VOP_GETACL---of 7
VOP_GETATTR100%of 4
VOP_GETEXTATTR---of 4
VOP_GETPAGES75%of 4
VOP_INACTIVE100%of 4
VOP_IOCTL100%of 4
VOP_ISLOCKED100%of 4
VOP_KQFILTER---of 7
VOP_LINK70%of 10
VOP_LISTEXTATTR---of 4
VOP_LOCK86%of 21
VOP_LOOKUP88%of 8
VOP_MKDIR70%of 10
VOP_MKNOD80%of 10
VOP_MMAP86%of 7
VOP_OPEN88%of 8
VOP_OPENEXTATTR---of 4
VOP_PARSEPATH100%of 4
VOP_PATHCONF100%of 4
VOP_POLL86%of 7
VOP_PRINT---of 7
VOP_PUTPAGES100%of 4
VOP_READ88%of 8
VOP_READDIR100%of 4
VOP_READLINK100%of 4
VOP_RECLAIM100%of 4
VOP_REMOVE58%of 14
VOP_RENAME86%of 7
VOP_REVOKE75%of 4
VOP_RMDIR58%of 14
VOP_SEEK86%of 7
VOP_SETACL---of 8
VOP_SETATTR36%of 17
VOP_SETEXTATTR---of 4
VOP_STRATEGY75%of 4
VOP_SYMLINK70%of 10
VOP_UNLOCK100%of 4
VOP_WHITEOUT100%of 4
VOP_WRITE40%of 15
-----------
SUMMARY77%of 294

hardupdate57%of 30
ntp_adjtime186%of 57
ntp_gettime---of 1
ntp_init---of 1
ntp_timestatus75%of 8
ntp_update_second30%of 27
sys___ntp_gettime50---of 4
sys_ntp_adjtime---of 6
sysctl_kern_ntptime---of 1
sysctl_kern_ntptime_setup---of 1
-----------
SUMMARY66%of 122

-----------
SUMMARY---of 0

layer_access100%of 5
layer_bmap---of 1
layer_bypass90%of 28
layer_close40%of 5
layer_fsync34%of 6
layer_getattr100%of 3
layer_getpages---of 10
layer_inactive100%of 1
layer_lookup66%of 23
layer_open84%of 6
layer_print---of 1
layer_putpages63%of 8
layer_reclaim80%of 5
layer_remove---of 3
layer_rename43%of 7
layer_revoke---of 1
layer_rmdir---of 3
layer_setattr100%of 13
-----------
SUMMARY76%of 110

tcp_congctl_bystruct---of 8
tcp_congctl_fillnames---of 3
tcp_congctl_init---of 8
tcp_congctl_register---of 8
tcp_congctl_release86%of 7
tcp_congctl_select65%of 20
tcp_congctl_unregister---of 19
tcp_cubic_congestion_exp---of 6
tcp_cubic_fast_retransmit---of 4
tcp_cubic_newack---of 19
tcp_cubic_slow_retransmit---of 2
tcp_newreno_fast_retransmit---of 6
tcp_newreno_fast_retransmit_newack---of 16
tcp_newreno_newack---of 2
tcp_reno_congestion_exp---of 2
tcp_reno_do_fast_retransmit---of 5
tcp_reno_fast_retransmit---of 3
tcp_reno_fast_retransmit_newack---of 7
tcp_reno_newack---of 13
tcp_reno_slow_retransmit---of 2
-----------
SUMMARY71%of 27

RealPrintPSStatusTable---of 25
rf_AllocPSStatus---of 1
rf_ConfigurePSStatus100%of 1
rf_FreePSStatus---of 1
rf_FreeParityStripeStatusTable---of 3
rf_InitPSStatus---of 1
rf_LookupRUStatus---of 10
rf_MakeParityStripeStatusTable---of 3
rf_PSStatusDelete---of 8
rf_PrintPSStatusTable---of 1
rf_RemoveFromActiveReconTable---of 16
rf_ShutdownPSStatus100%of 1
-----------
SUMMARY100%of 2

-----------
SUMMARY---of 0

compat_50_iflist---of 3
compat_50_route_abort_wrapper---of 3
compat_50_route_accept_wrapper---of 3
compat_50_route_attach_wrapper72%of 7
compat_50_route_bind_wrapper---of 3
compat_50_route_connect2_wrapper67%of 3
compat_50_route_connect_wrapper67%of 3
compat_50_route_detach_wrapper58%of 7
compat_50_route_disconnect_wrapper50%of 6
compat_50_route_enqueue---of 7
compat_50_route_filter17%of 31
compat_50_route_init---of 1
compat_50_route_intr---of 6
compat_50_route_ioctl_wrapper100%of 1
compat_50_route_listen_wrapper67%of 3
compat_50_route_output18%of 155
compat_50_route_peeraddr_wrapper55%of 11
compat_50_route_purgeif_wrapper---of 1
compat_50_route_rcvd_wrapper---of 3
compat_50_route_recvoob_wrapper67%of 3
compat_50_route_send_wrapper60%of 5
compat_50_route_sendoob_wrapper67%of 3
compat_50_route_shutdown_wrapper67%of 3
compat_50_route_sockaddr_wrapper55%of 11
compat_50_route_stat_wrapper67%of 3
compat_50_rt_addrmsg---of 1
compat_50_rt_addrmsg0---of 31
compat_50_rt_addrmsg_rt---of 1
compat_50_rt_addrmsg_src---of 1
compat_50_rt_ieee80211msg---of 20
compat_50_rt_ifannouncemsg---of 7
compat_50_rt_ifmsg---of 10
compat_50_rt_missmsg---of 9
compat_50_rt_msg1---of 24
compat_50_rt_oifmsg60%of 5
route_ctloutput46%of 24
route_get_sdl_index---of 3
route_output_report---of 12
rt_adjustcount100%of 4
rt_getlen---of 7
rt_makeifannouncemsg---of 1
rt_msg2.constprop.0---of 18
rt_pr_init---of 1
rtsock_50_fini---of 19
rtsock_50_init---of 1
-----------
SUMMARY32%of 288

genfs_insane_rename55%of 35
genfs_rename_cache_purge---of 32
genfs_rename_exit53%of 38
genfs_rename_knote55%of 51
genfs_rename_lock.constprop.059%of 114
genfs_sane_rename59%of 247
genfs_ufslike_remove_check_permitted50%of 22
genfs_ufslike_remove_check_possible100%of 1
genfs_ufslike_rename_check_permitted58%of 54
genfs_ufslike_rename_check_possible100%of 5
-----------
SUMMARY58%of 567

_rt_free---of 11
_rtcache_init60%of 20
db_print_sa---of 6
db_show_routes---of 1
db_show_rtentry---of 6
ifa_ifwithroute_psref---of 30
route_listener_cb50%of 4
rt_check_reject_route58%of 7
rt_delete_matched_entries20%of 15
rt_free---of 9
rt_free_work---of 3
rt_get_ifa---of 4
rt_getifa---of 21
rt_getifp---of 7
rt_gettag---of 1
rt_ifa_addlocal---of 11
rt_ifa_connected---of 9
rt_ifa_remlocal---of 12
rt_init---of 3
rt_maskedcopy---of 4
rt_newmsg---of 3
rt_replace_ifa---of 8
rt_setgate---of 19
rt_settag---of 5
rt_timer_add---of 35
rt_timer_count---of 1
rt_timer_queue_change---of 1
rt_timer_queue_create---of 12
rt_timer_queue_destroy---of 32
rt_timer_timer---of 1
rt_timer_work---of 28
rt_unref50%of 6
rt_update---of 56
rt_update_finish---of 1
rt_update_prepare---of 8
rt_walktree---of 1
rtalloc1---of 1
rtalloc1_locked.constprop.090%of 10
rtcache_copy---of 32
rtcache_free67%of 3
rtcache_init100%of 1
rtcache_init_noclone---of 1
rtcache_lookup274%of 23
rtcache_percpu_alloc---of 1
rtcache_percpu_init_cpu---of 1
rtcache_setdst43%of 19
rtcache_update100%of 1
rtcache_validate88%of 8
rtinit---of 23
rtredirect---of 45
rtrequest---of 1
rtrequest1---of 109
rtrequest_newmsg---of 9
-----------
SUMMARY59%of 117

random_close100%of 1
random_ioctl100%of 3
random_kqfilter---of 6
random_open100%of 2
random_poll75%of 4
random_read100%of 5
random_write84%of 12
rndattach---of 1
-----------
SUMMARY89%of 27

ip_ctloutput82%of 112
ip_fragment45%of 45
ip_freemoptions100%of 4
ip_get_membership91%of 22
ip_getmoptions30%of 27
ip_getoptval100%of 8
ip_if_output86%of 7
ip_multicast_if69%of 19
ip_optcopy---of 15
ip_optlen100%of 5
ip_output63%of 268
ip_pktinfo_prepare18%of 23
ip_setmoptions97%of 55
ip_setpktopts85%of 19
-----------
SUMMARY68%of 614

compat_60_sys__lwp_park100%of 6
kern_time_60_fini---of 1
kern_time_60_init---of 1
-----------
SUMMARY100%of 6

CheckForNewMinHeadSep.constprop.0---of 13
ComputePSDiskOffsets---of 28
ForceReconReadDoneProc---of 4
IssueNextReadRequest---of 14
ProcessReconEvent---of 23
ReconReadDoneProc---of 7
ReconWriteDoneProc---of 4
TryToRead---of 31
rf_ConfigureReconstruction100%of 1
rf_ContinueReconstructFailedDisk---of 60
rf_ForceOrBlockRecon---of 21
rf_ReconstructFailedDisk---of 6
rf_ReconstructFailedDiskBasic---of 10
rf_ReconstructInPlace---of 18
rf_ShutdownReconstruction100%of 1
rf_UnblockRecon---of 10
rf_WakeupHeadSepCBWaiters---of 5
-----------
SUMMARY100%of 2

kpause55%of 22
kpreempt68%of 37
kpreempt_disable67%of 3
kpreempt_disabled100%of 6
kpreempt_enable75%of 8
mi_switch60%of 110
mtsleep46%of 22
preempt67%of 18
preempt_needed73%of 11
preempt_point75%of 12
sched_changepri59%of 12
sched_lendpri75%of 12
sched_pstats---of 48
sched_unsleep---of 1
setrunnable65%of 28
suspendsched---of 41
synch_init---of 1
syncobj_noowner100%of 1
tsleep48%of 21
updatertime100%of 6
wakeup100%of 2
yield57%of 16
-----------
SUMMARY63%of 347

cpu_initclocks---of 1
cpu_intr_p67%of 6
cpu_kpreempt_disabled100%of 1
cpu_kpreempt_enter86%of 7
cpu_kpreempt_exit84%of 6
cpu_need_proftick---of 5
cpu_need_resched58%of 14
cpu_signotify60%of 5
get_booted_kernel---of 6
init_x86_clusters---of 28
init_x86_msgbuf---of 11
init_x86_vm---of 25
intr_findpic---of 5
lookup_bootinfo---of 4
machdep_init---of 1
mm_md_physacc100%of 6
sysctl_machdep_booted_kernel---of 6
sysctl_machdep_bootmethod---of 4
sysctl_machdep_cpu_idle---of 1
sysctl_machdep_diskinfo---of 3
sysctl_machdep_hypervisor---of 3
sysctl_machdep_setup---of 1
sysctl_machdep_tsc_enable---of 8
x86_add_cluster---of 23
x86_cpu_idle_get---of 1
x86_cpu_idle_init---of 3
x86_cpu_idle_set---of 1
x86_cpu_is_lcall---of 5
x86_listener_cb40%of 5
x86_load_region---of 7
x86_parse_clusters.isra.0---of 12
x86_reset---of 7
x86_rndseed---of 11
x86_select_freelist---of 6
x86_startup---of 1
-----------
SUMMARY70%of 50

acpi_md_OsDisableInterrupt---of 1
acpi_md_OsEnableInterrupt---of 1
acpi_md_OsGetPhysicalAddress---of 3
acpi_md_OsGetRootPointer---of 9
acpi_md_OsInitialize---of 1
acpi_md_OsInstallInterruptHandler---of 3
acpi_md_OsMapMemory---of 2
acpi_md_OsReadable---of 10
acpi_md_OsRemoveInterruptHandler---of 1
acpi_md_OsUnmapMemory---of 1
acpi_md_OsWritable---of 10
acpi_md_callback---of 3
acpi_md_findoverride---of 5
acpi_md_intr_disestablish---of 1
acpi_md_intr_establish---of 29
acpi_md_intr_mask---of 1
acpi_md_intr_unmask---of 1
acpi_md_mcfg_read---of 3
acpi_md_mcfg_validate---of 23
acpi_md_mcfg_write---of 3
acpi_md_ncpus---of 1
device_acpi_register58%of 7
-----------
SUMMARY58%of 7

npf_rule_alloc---of 16
npf_rule_conclude---of 2
npf_rule_export---of 15
npf_rule_free---of 12
npf_rule_getid---of 3
npf_rule_getnat---of 1
npf_rule_getrproc---of 3
npf_rule_setcode---of 3
npf_rule_setnat---of 3
npf_rule_setrproc---of 1
npf_ruleset_add---of 28
npf_ruleset_create---of 1
npf_ruleset_destroy---of 26
npf_ruleset_dump---of 6
npf_ruleset_export34%of 9
npf_ruleset_findnat---of 5
npf_ruleset_flush---of 21
npf_ruleset_freealg---of 4
npf_ruleset_gc---of 9
npf_ruleset_insert---of 19
npf_ruleset_inspect---of 53
npf_ruleset_list---of 11
npf_ruleset_lookup---of 6
npf_ruleset_reload---of 35
npf_ruleset_remkey---of 18
npf_ruleset_remove---of 15
npf_ruleset_unlink---of 13
-----------
SUMMARY34%of 9

uvm_pgflcache_alloc65%of 14
uvm_pgflcache_fill69%of 19
uvm_pgflcache_fini_cpu---of 4
uvm_pgflcache_free73%of 11
uvm_pgflcache_init---of 1
uvm_pgflcache_pause---of 3
uvm_pgflcache_resume---of 8
uvm_pgflcache_spill65%of 14
uvm_pgflcache_start---of 10
-----------
SUMMARY68%of 58

exec_script_makecmds80%of 40
exec_script_modcmd---of 7
-----------
SUMMARY80%of 40

axen_attach---of 49
axen_cmd---of 4
axen_match100%of 2
axen_setoe_locked---of 25
axen_uno_init---of 3
axen_uno_ioctl---of 4
axen_uno_mcast---of 14
axen_uno_mii_read_reg---of 10
axen_uno_mii_statchg---of 20
axen_uno_mii_write_reg---of 6
axen_uno_rx_loop---of 31
axen_uno_stop---of 3
axen_uno_tx_prepare---of 9
if_axen_modcmd---of 2
-----------
SUMMARY100%of 2

elf64_check_header---of 6
elf64_copyargs---of 3
elf64_free_emul_arg---of 3
elf64_load_psection81%of 21
elf64_populate_auxv---of 18
exec_elf64_makecmds38%of 133
exec_elf64_modcmd---of 6
netbsd_elf64_note30%of 34
netbsd_elf64_probe100%of 3
netbsd_elf64_signature94%of 15
-----------
SUMMARY46%of 206

deframe_rd_ur---of 12
filt_udsirrdetach---of 1
filt_udsirread---of 1
filt_udsirwdetach---of 1
filt_udsirwrite---of 1
udsir_activate---of 4
udsir_attach---of 14
udsir_childdet---of 3
udsir_close---of 20
udsir_detach---of 20
udsir_get_speeds---of 4
udsir_get_turnarounds---of 4
udsir_kqfilter---of 5
udsir_match67%of 3
udsir_open---of 10
udsir_poll---of 8
udsir_rd_cb---of 21
udsir_read---of 19
udsir_set_params---of 7
udsir_start_read.isra.0---of 7
udsir_thread---of 19
udsir_write---of 25
-----------
SUMMARY67%of 3

ubc_alloc.constprop.056%of 87
ubc_fault62%of 47
ubc_init---of 12
ubc_purge62%of 26
ubc_release42%of 50
ubc_uiomove89%of 17
ubc_zerorange100%of 3
ubchash_stats---of 10
-----------
SUMMARY58%of 230

cache_activate---of 13
cache_compare_nodes84%of 6
cache_cpu_init---of 1
cache_cross_mount100%of 1
cache_deactivate10%of 21
cache_enter47%of 75
cache_enter_id80%of 5
cache_enter_mount60%of 10
cache_have_id84%of 6
cache_lookup57%of 100
cache_lookup_entry65%of 17
cache_lookup_linked73%of 83
cache_lookup_mount67%of 3
cache_lookup_raw---of 1
cache_purge142%of 24
cache_purge_children84%of 6
cache_purgevfs100%of 4
cache_remove73%of 48
cache_revlookup52%of 74
cache_stat_sysctl---of 5
cache_update_stats---of 27
cache_vdir_filter100%of 1
cache_vnode_fini60%of 5
cache_vnode_init100%of 1
namecache_count_2passes60%of 10
namecache_count_pass260%of 10
namecache_print---of 12
nchinit---of 5
-----------
SUMMARY59%of 510

-----------
SUMMARY---of 0

chkdq1---of 33
chkiq1---of 33
dq1get---of 18
dq1sync---of 13
q1sync---of 17
quota1_handle_cmd_get---of 13
quota1_handle_cmd_put---of 51
quota1_handle_cmd_quotaoff---of 20
quota1_handle_cmd_quotaon20%of 35
quota1_umount---of 7
-----------
SUMMARY20%of 35

ktd_callout---of 1
ktd_lookup85%of 13
ktdrel60%of 10
ktealloc100%of 6
ktesethdrlen---of 1
ktr_csw80%of 25
ktr_emul80%of 5
ktr_execarg---of 5
ktr_execenv---of 5
ktr_execfd---of 5
ktr_genio100%of 5
ktr_geniov100%of 3
ktr_io73%of 18
ktr_kuser100%of 6
ktr_mib---of 5
ktr_mibio100%of 5
ktr_namei100%of 5
ktr_namei2---of 5
ktr_point100%of 1
ktr_psig---of 10
ktr_syscall100%of 7
ktr_sysret90%of 10
ktrace_common88%of 49
ktrace_listener_cb100%of 9
ktrace_thread---of 22
ktraddentry75%of 39
ktradref60%of 5
ktrcanset60%of 5
ktrderef75%of 4
ktrderefall93%of 13
ktrinit---of 1
ktrops95%of 17
ktrsetchildren86%of 7
ktruser---of 12
ktrwrite---of 20
sys_fktrace100%of 5
sys_utrace---of 1
-----------
SUMMARY85%of 272

ocryptof_ioctl10%of 21
-----------
SUMMARY10%of 21

hash_value---of 3
old_sysctl85%of 13
random_address_init---of 1
sys___sysctl---of 13
sysctl_alloc67%of 12
sysctl_copyin100%of 5
sysctl_copyinstr---of 5
sysctl_copyout100%of 5
sysctl_create53%of 169
sysctl_createv77%of 91
sysctl_cvt_in84%of 12
sysctl_describe---of 59
sysctl_destroy75%of 55
sysctl_destroyv---of 25
sysctl_dispatch24%of 21
sysctl_finalize---of 1
sysctl_free---of 23
sysctl_init---of 3
sysctl_locate76%of 29
sysctl_lock100%of 3
sysctl_log_print---of 9
sysctl_lookup73%of 48
sysctl_map_flags---of 5
sysctl_mmap---of 12
sysctl_needfunc---of 6
sysctl_notavail---of 4
sysctl_null---of 1
sysctl_query---of 35
sysctl_relock---of 3
sysctl_teardown79%of 14
sysctl_unlock---of 1
-----------
SUMMARY67%of 477

uvm_availmem100%of 1
uvm_cpu_attach---of 4
uvm_page_init---of 22
uvm_page_lookup_freelist67%of 3
uvm_page_numa_load---of 3
uvm_page_owner_locked_p100%of 7
uvm_page_physget---of 3
uvm_page_physget_freelist---of 11
uvm_page_print_freelists---of 6
uvm_page_printall---of 9
uvm_page_printit---of 23
uvm_page_rebucket---of 16
uvm_page_recolor---of 1
uvm_page_redim---of 61
uvm_page_unbusy60%of 22
uvm_pageactivate67%of 6
uvm_pagealloc_pgb58%of 33
uvm_pagealloc_pgfl64%of 11
uvm_pagealloc_strat56%of 77
uvm_pageboot_alloc---of 16
uvm_pagecopy100%of 1
uvm_pagedeactivate63%of 8
uvm_pagedequeue67%of 6
uvm_pageenqueue72%of 7
uvm_pagefree47%of 78
uvm_pageinsert_object78%of 18
uvm_pageismanaged---of 1
uvm_pagelock100%of 1
uvm_pagelock2---of 3
uvm_pagelookup60%of 10
uvm_pagereadonly_p82%of 11
uvm_pagerealloc30%of 20
uvm_pageremove_object78%of 18
uvm_pagereplace---of 22
uvm_pageunlock100%of 3
uvm_pageunlock2---of 11
uvm_pageunwire59%of 12
uvm_pagewait58%of 7
uvm_pagewakeup75%of 4
uvm_pagewanted_p---of 7
uvm_pagewire63%of 8
uvm_pagezero---of 1
uvm_pgfl_lock---of 1
uvm_pgfl_unlock---of 1
uvm_phys_to_vm_page100%of 3
uvm_setpagesize---of 8
uvm_vm_page_to_phys100%of 1
-----------
SUMMARY60%of 376

extensions_modcmd---of 9
secmodel_extensions_network_cb30%of 10
secmodel_extensions_process_cb40%of 10
secmodel_extensions_system_cb30%of 10
secmodel_extensions_vnode_cb56%of 9
sysctl_extensions_curtain_handler---of 10
sysctl_extensions_user_handler---of 10
sysctl_security_extensions_setup---of 1
-----------
SUMMARY39%of 39

shm_delete_mapping70%of 10
shm_find_segment_by_shmid86%of 7
shm_find_segment_perm_by_index---of 5
shm_free_segment50%of 4
shmctl189%of 27
shmexit---of 18
shmfini---of 6
shmfork100%of 6
shminit---of 10
shmmap_getprivate56%of 9
sys___shmctl50100%of 7
sys_shmat84%of 24
sys_shmdt71%of 17
sys_shmget75%of 52
sysctl_ipc_shm_setup---of 1
sysctl_ipc_shmmax---of 5
sysctl_ipc_shmmaxpgs---of 5
sysctl_ipc_shmmni---of 23
-----------
SUMMARY79%of 163

compat_ifconf63%of 53
uipc_syscalls_40_fini---of 3
uipc_syscalls_40_init---of 1
-----------
SUMMARY63%of 53

alloc_required---of 6
module_autoload34%of 9
module_builtin_add---of 35
module_builtin_remove---of 22
module_builtin_require_force---of 3
module_compatible---of 1
module_do_builtin---of 49
module_do_load10%of 142
module_do_unload12%of 50
module_enqueue---of 12
module_error100%of 1
module_fetch_info---of 6
module_find_section---of 5
module_getspecific---of 1
module_hold---of 1
module_init---of 12
module_init_class---of 42
module_kernel---of 1
module_listener_cb75%of 4
module_load100%of 5
module_load_evcnt---of 6
module_load_sysctl---of 6
module_lookup84%of 6
module_name---of 1
module_prime---of 22
module_print67%of 3
module_print_list---of 10
module_register_callbacks---of 5
module_rele---of 3
module_setspecific---of 1
module_source---of 1
module_specific_key_create---of 1
module_specific_key_delete---of 1
module_start_unload_thread---of 2
module_thread---of 17
module_thread_kick---of 1
module_unload100%of 3
module_unregister_callbacks---of 13
module_whatis---of 8
sysctl_module_autotime---of 5
-----------
SUMMARY19%of 223

-----------
SUMMARY---of 0

crypto_attach---of 1
crypto_detach---of 1
crypto_match---of 1
crypto_modcmd---of 2
cryptoattach---of 1
cryptodev_cb---of 1
cryptodev_csefind---of 6
cryptodev_mcb---of 3
cryptodev_mop---of 77
cryptodev_msession---of 3
cryptodev_op---of 82
cryptodev_session23%of 49
cryptodevkey_cb---of 1
cryptodevkey_mcb---of 3
cryptof_close12%of 17
cryptof_ioctl9%of 277
cryptof_poll100%of 7
cryptof_read100%of 1
cryptof_stat100%of 1
cryptof_write100%of 1
cryptoopen100%of 4
cryptoread---of 1
cryptoselect---of 1
cryptowrite---of 1
csedelete.isra.0---of 17
-----------
SUMMARY15%of 357

in_addmulti65%of 14
in_addrhash_insert---of 1
in_addrhash_insert_locked---of 22
in_addrhash_remove---of 5
in_addrhash_remove_locked---of 20
in_broadcast67%of 24
in_canforward---of 5
in_control67%of 3
in_control05%of 289
in_delmulti46%of 11
in_direct---of 26
in_domifattach100%of 1
in_domifdetach100%of 1
in_first_multi---of 3
in_if_down100%of 1
in_if_link_down50%of 20
in_if_link_state_change---of 5
in_if_link_up---of 3
in_if_link_up.part.0---of 28
in_if_up---of 3
in_ifinit---of 83
in_init---of 1
in_len2mask---of 4
in_lltable_create---of 32
in_lltable_delete---of 10
in_lltable_destroy_lle---of 3
in_lltable_dump_entry---of 5
in_lltable_fill_sa_entry---of 1
in_lltable_free_entry---of 6
in_lltable_hash---of 1
in_lltable_lookup67%of 12
in_lltable_match_prefix---of 4
in_localaddr45%of 18
in_lookup_multi72%of 7
in_multi_group84%of 6
in_multi_lock100%of 1
in_multi_lock_held100%of 1
in_multi_unlock100%of 1
in_multicast_sysctl---of 30
in_next_multi---of 8
in_purgeaddr---of 29
in_purgeif100%of 1
in_scrubaddr---of 34
in_scrubprefix.part.0.isra.0---of 25
in_selectsrc36%of 56
in_setmaxmtu---of 14
in_socktrim---of 4
in_tunnel_validate---of 16
-----------
SUMMARY24%of 467

ualea_attach---of 12
ualea_detach---of 11
ualea_get---of 3
ualea_match100%of 2
ualea_modcmd---of 2
ualea_xfer---of 12
ualea_xfer_done---of 6
-----------
SUMMARY100%of 2

rf_ConfigureAllocList100%of 1
rf_FreeAllocList---of 5
rf_ShutdownAllocList---of 1
rf_real_AddToAllocList75%of 4
rf_real_MakeAllocList100%of 1
-----------
SUMMARY84%of 6

kill1---of 20
sigaction1---of 65
sigaltstack1---of 9
sigpending1---of 1
sigprocmask1---of 22
sigsuspend1---of 4
sigsuspendsetup17%of 12
sigsuspendteardown29%of 14
sigtimedwait1---of 45
sys_____sigtimedwait50---of 1
sys___sigaction_sigtramp---of 10
sys___sigaltstack14---of 10
sys___sigpending14---of 1
sys___sigprocmask14---of 8
sys___sigsuspend14---of 6
sys_getcontext---of 1
sys_kill---of 1
sys_setcontext---of 4
sys_sigqueueinfo---of 3
-----------
SUMMARY24%of 26

-----------
SUMMARY---of 0

KernelWakeupFunc---of 15
raid_detach---of 18
raid_diskstart---of 21
raid_dumpblocks---of 30
raid_init_component_label---of 3
raid_lastclose---of 3
raid_match---of 1
raid_modcmd---of 17
raidclose57%of 16
raiddone---of 1
raiddump---of 4
raidfetch_component_label---of 3
raidflush_component_label---of 1
raidget79%of 14
raidget_component_label---of 1
raidinit---of 3
raidioctl26%of 213
raidmarkclean---of 1
raidmarkdirty---of 1
raidopen65%of 17
raidput38%of 8
raidread75%of 4
raidread_component_area---of 5
raidread_component_label---of 6
raidsize---of 4
raidstart---of 5
raidstrategy67%of 6
raidwrite75%of 4
raidwrite_component_area.constprop.0---of 3
raidwrite_component_label.isra.0---of 5
rf_CopybackThread---of 1
rf_DispatchKernelIO---of 19
rf_ReconThread---of 5
rf_ReconstructInPlaceThread---of 5
rf_RewriteParityThread---of 6
rf_UnconfigureVnodes75%of 4
rf_auto_config_set---of 13
rf_autoconfig---of 51
rf_buf_queue_check---of 4
rf_check_copyback_status_ext---of 3
rf_check_parityrewrite_status_ext---of 4
rf_check_recon_status_ext---of 4
rf_close_component25%of 4
rf_construct53%of 23
rf_containsboot.part.0---of 7
rf_create_auto_sets9%of 24
rf_fail_disk---of 5
rf_fail_disk.part.0---of 8
rf_find_raid_components41%of 47
rf_get_component---of 6
rf_get_component_label---of 5
rf_get_info---of 10
rf_get_raid100%of 1
rf_get_unit---of 1
rf_getdisksize---of 3
rf_have_enough_components---of 22
rf_inited---of 1
rf_markalldirty---of 12
rf_paritymap_kern_read---of 9
rf_paritymap_kern_write---of 6
rf_pool_init100%of 1
rf_print_component_label---of 3
rf_reasonable_label---of 17
rf_release_all_vps.isra.0---of 4
rf_swap_label---of 4
rf_sync_component_cache---of 5
rf_sync_component_caches---of 11
rf_update_component_labels---of 19
-----------
SUMMARY36%of 386

mfs_bmap---of 7
mfs_close---of 8
mfs_doio67%of 6
mfs_inactive67%of 3
mfs_open---of 3
mfs_print---of 3
mfs_reclaim100%of 3
mfs_strategy40%of 15
-----------
SUMMARY56%of 27

midi_pcppi_attach---of 1
midi_pcppi_close67%of 3
midi_pcppi_detach---of 3
midi_pcppi_match---of 1
midi_pcppi_off---of 3
midi_pcppi_on---of 3
midi_pcppi_repitchv---of 3
-----------
SUMMARY67%of 3

npf_table_check---of 12
npf_table_create---of 12
npf_table_destroy---of 26
npf_table_flush---of 5
npf_table_gc---of 3
npf_table_gc.part.0---of 11
npf_table_getid---of 1
npf_table_getsome---of 7
npf_table_insert---of 36
npf_table_list---of 11
npf_table_lookup---of 17
npf_table_remove---of 29
npf_tableset_create---of 1
npf_tableset_destroy---of 7
npf_tableset_export43%of 7
npf_tableset_getbyid---of 5
npf_tableset_getbyname40%of 5
npf_tableset_insert---of 5
npf_tableset_reload---of 12
npf_tableset_swap---of 5
npf_tableset_sysfini---of 1
npf_tableset_sysinit---of 1
table_generic_list---of 8
table_ipset_flush---of 10
table_tree_flush---of 10
-----------
SUMMARY42%of 12

_key_msg2sp---of 64
key_acquire---of 107
key_align---of 27
key_alloc_mbuf---of 24
key_alloc_mbuf_simple---of 8
key_api_acquire---of 45
key_api_add---of 66
key_api_delete---of 67
key_api_dump---of 69
key_api_flush---of 55
key_api_get---of 34
key_api_getspi---of 116
key_api_nat_map---of 22
key_api_promisc---of 12
key_api_register---of 60
key_api_spdadd---of 1
key_api_spddelete---of 23
key_api_spddelete2---of 1
key_api_spddump---of 39
key_api_spdflush---of 41
key_api_spdget---of 19
key_api_update---of 87
key_bb_match_withmask---of 6
key_checkrequest---of 12
key_checkspidup.part.0---of 10
key_checktunnelsanity---of 1
key_clear_xform.part.0---of 4
key_delsav---of 4
key_destroy_sav---of 8
key_destroy_sp---of 10
key_do_init---of 5
key_expire.isra.0---of 25
key_free_sp---of 3
key_freereg28%of 18
key_freesaval---of 14
key_gather_mbuf---of 40
key_get_used---of 4
key_getacqbyseq---of 6
key_getcomb_ah---of 35
key_getmsgbuf_x1---of 10
key_getsah---of 21
key_getsavbyspi---of 18
key_getspbyid---of 21
key_gettunnel---of 31
key_handle_natt_info.part.0---of 33
key_havesp---of 3
key_init---of 4
key_init_sp---of 4
key_init_spidx_bymsghdr---of 1
key_init_xform---of 32
key_ismyaddr---of 37
key_kpi_spdadd---of 3
key_kpi_spddelete2---of 3
key_lookup_and_remove_sav---of 35
key_lookup_and_remove_sp---of 11
key_lookup_sa---of 53
key_lookup_sa_bysaidx---of 20
key_lookup_sp_byspidx---of 28
key_msg2sp---of 1
key_newreqid---of 3
key_newsah---of 22
key_newsav---of 24
key_newsp---of 3
key_parse---of 75
key_portcomp---of 7
key_portfromsaddr---of 5
key_random---of 1
key_randomfill---of 1
key_sa_chgstate---of 98
key_sa_recordxfer---of 8
key_sa_ref---of 2
key_sa_refcnt---of 1
key_sa_routechange---of 22
key_sa_unref---of 5
key_sad_pserialize_perform---of 5
key_sah_unref---of 3
key_saidx_match---of 20
key_savlut_writer_insert_head---of 27
key_senderror---of 6
key_set_natt_ports---of 24
key_setdumpsa---of 65
key_setdumpsp---of 14
key_setident---of 28
key_setsadbaddr---of 7
key_setsadbsa---of 5
key_setsadbxport---of 3
key_setsadbxsa2---of 3
key_setsaval---of 54
key_setsecasidx---of 14
key_sockaddr_match---of 20
key_socksplist_add---of 13
key_sp2msg---of 14
key_sp_ref---of 2
key_sp_refcnt---of 1
key_sp_unref---of 5
key_spdadd---of 137
key_spddelete2---of 45
key_spidx_match_exactly---of 5
key_spidx_match_withmask---of 60
key_sum_lifetime_counters---of 1
key_timehandler---of 2
key_timehandler_work---of 173
key_unlink_sav---of 27
key_unlink_sp---of 19
key_update_used---of 4
key_validate_savlist---of 18
key_wait_sav---of 5
pslist_writer_insert_after---of 15
pslist_writer_insert_head---of 12
sysctl_net_key_dumpsa---of 54
sysctl_net_key_dumpsp---of 28
sysctl_net_key_stats---of 1
-----------
SUMMARY28%of 18

explicit_memset100%of 1
-----------
SUMMARY100%of 1

pageflush_selector---of 7
puffs_modcmd---of 6
puffs_vfsop_done---of 1
puffs_vfsop_extattrctl---of 14
puffs_vfsop_fhtovp---of 16
puffs_vfsop_init---of 1
puffs_vfsop_loadvnode---of 5
puffs_vfsop_mount75%of 58
puffs_vfsop_root---of 6
puffs_vfsop_snapshot---of 1
puffs_vfsop_start---of 3
puffs_vfsop_statvfs---of 9
puffs_vfsop_sync---of 14
puffs_vfsop_unmount---of 32
puffs_vfsop_vptofh---of 33
-----------
SUMMARY75%of 58

sack_dump---of 2
sack_inserthole---of 17
sack_removehole---of 11
tcp_del_sackholes---of 7
tcp_free_sackholes50%of 4
tcp_new_dsack---of 2
tcp_sack_adjust---of 8
tcp_sack_init---of 1
tcp_sack_numblks60%of 5
tcp_sack_option---of 52
tcp_sack_output---of 8
-----------
SUMMARY56%of 9

procfs_allocvp100%of 1
procfs_doemul---of 1
procfs_proc_find84%of 6
procfs_proc_lock100%of 5
procfs_proc_unlock100%of 1
procfs_revoke_selector---of 5
procfs_revoke_vnodes---of 16
procfs_rw26%of 35
procfs_use_linux_compat100%of 1
vfs_findname---of 5
vfs_getuserstr---of 9
-----------
SUMMARY45%of 49

-----------
SUMMARY---of 0

cpu_bootconf---of 3
cpu_rootconf---of 6
device_register28%of 11
findroot---of 48
is_valid_disk---of 9
match_bootdisk---of 10
match_bootwedge---of 10
matchbiosdisks---of 26
-----------
SUMMARY28%of 11

ppsratecheck92%of 12
ratecheck---of 9
-----------
SUMMARY92%of 12

cttyioctl---of 9
cttykqfilter---of 4
cttyopen50%of 4
cttypoll---of 5
cttyread---of 4
cttywrite---of 4
-----------
SUMMARY50%of 4

cpu_lwp_fork75%of 8
cpu_lwp_free50%of 4
cpu_lwp_free250%of 4
cpu_proc_fork100%of 1
cpu_uarea_alloc60%of 5
cpu_uarea_free---of 5
kvtop---of 3
vmapbuf80%of 5
vunmapbuf---of 3
-----------
SUMMARY67%of 27

kauth_accmode_to_action100%of 8
kauth_authorize_action---of 4
kauth_authorize_action_internal79%of 14
kauth_authorize_device75%of 4
kauth_authorize_device_passthru---of 4
kauth_authorize_device_spec75%of 4
kauth_authorize_device_tty75%of 4
kauth_authorize_generic---of 4
kauth_authorize_machdep75%of 4
kauth_authorize_network100%of 4
kauth_authorize_process100%of 4
kauth_authorize_system100%of 4
kauth_authorize_vnode100%of 4
kauth_cred_alloc100%of 1
kauth_cred_clone100%of 1
kauth_cred_clone156%of 18
kauth_cred_copy---of 11
kauth_cred_dup56%of 9
kauth_cred_free60%of 10
kauth_cred_get100%of 1
kauth_cred_getdata---of 10
kauth_cred_getegid58%of 7
kauth_cred_geteuid58%of 7
kauth_cred_getgid58%of 7
kauth_cred_getgroups72%of 7
kauth_cred_getrefcnt---of 7
kauth_cred_getsvgid58%of 7
kauth_cred_getsvuid58%of 7
kauth_cred_getuid58%of 7
kauth_cred_group56%of 9
kauth_cred_groupmember70%of 10
kauth_cred_hold56%of 9
kauth_cred_hook.isra.080%of 5
kauth_cred_ismember_gid70%of 13
kauth_cred_ngroups58%of 7
kauth_cred_setdata---of 10
kauth_cred_setegid56%of 9
kauth_cred_seteuid56%of 9
kauth_cred_setgid56%of 9
kauth_cred_setgroups69%of 16
kauth_cred_setsvgid56%of 9
kauth_cred_setsvuid56%of 9
kauth_cred_setuid56%of 9
kauth_cred_to_uucred---of 14
kauth_cred_topcred---of 10
kauth_cred_toucred---of 10
kauth_cred_uidmatch45%of 18
kauth_cred_uucmp---of 18
kauth_deregister_key---of 3
kauth_deregister_scope---of 8
kauth_extattr_action---of 4
kauth_ifindscope---of 6
kauth_init---of 1
kauth_listen_scope---of 4
kauth_proc_chroot80%of 5
kauth_proc_fork100%of 1
kauth_proc_setgroups100%of 6
kauth_register_key---of 5
kauth_register_scope---of 12
kauth_unlisten_scope---of 8
kauth_uucred_to_cred---of 10
-----------
SUMMARY66%of 296

kernfs_done---of 1
kernfs_get_rrootdev25%of 4
kernfs_init---of 1
kernfs_loadvnode62%of 34
kernfs_modcmd---of 6
kernfs_mount72%of 7
kernfs_root75%of 4
kernfs_start100%of 1
kernfs_sync100%of 1
kernfs_sysctl_setup---of 1
kernfs_unmount100%of 4
kernfs_vget---of 1
-----------
SUMMARY66%of 55

midisyn_adj_level---of 3
midisyn_adj_pitch.isra.0---of 3
midisyn_allocvoice---of 11
midisyn_attackv_vel---of 7
midisyn_chan_releasev58%of 7
midisyn_channelmsg30%of 17
midisyn_close86%of 7
midisyn_commonmsg---of 1
midisyn_freevoice---of 4
midisyn_get_locks---of 1
midisyn_getinfo67%of 3
midisyn_init---of 8
midisyn_ioctl---of 5
midisyn_mp2hz18---of 9
midisyn_notify74%of 34
midisyn_open60%of 10
midisyn_sysex---of 1
midisyn_sysrt---of 1
midisyn_upd_level23%of 9
midisyn_upd_pitch34%of 9
midisyn_vol2cB---of 5
-----------
SUMMARY56%of 96

filt_irframetrdetach---of 1
filt_irframetread---of 1
filt_irframetwdetach---of 1
filt_irframetwrite---of 3
irframet_attach100%of 1
irframet_close---of 5
irframet_detach100%of 1
irframet_get_speeds67%of 3
irframet_get_turnarounds---of 1
irframet_kqfilter---of 5
irframet_open---of 1
irframet_poll---of 7
irframet_read---of 11
irframet_set_params---of 5
irframet_write---of 9
irframetclose84%of 6
irframetinput---of 25
irframetioctl---of 8
irframetopen100%of 6
irframetstart---of 3
irframettyattach---of 1
irt_buffer.part.0---of 10
irt_delay---of 3
irt_ioctl.isra.0---of 3
irt_putc---of 9
irt_timeout---of 1
irts_actisys---of 9
irts_girbil---of 15
irts_jeteye---of 5
irts_litelink---of 14
irts_none---of 1
irts_tekram---of 24
-----------
SUMMARY89%of 17

ov_mount72%of 14
ov_unmount100%of 6
overlay_modcmd---of 6
overlay_sysctl_setup---of 1
-----------
SUMMARY80%of 20

uvm_deallocate100%of 2
-----------
SUMMARY100%of 2

calcru65%of 42
donice89%of 9
dosetrlimit100%of 6
dosetrlimit.part.0100%of 26
getrusage1100%of 4
lim_addref100%of 1
lim_copy30%of 10
lim_free75%of 4
lim_privatise75%of 4
lim_setcorename---of 2
pstatscopy100%of 1
pstatsfree100%of 1
resource_init---of 1
resource_listener_cb89%of 9
ruadd75%of 8
rulwps75%of 4
ruspace---of 1
sys___getrusage50100%of 3
sys_getpriority96%of 24
sys_getrlimit100%of 3
sys_setpriority93%of 28
sys_setrlimit100%of 7
sysctl_proc_corename---of 20
sysctl_proc_findproc---of 8
sysctl_proc_paxflags---of 9
sysctl_proc_plimit---of 17
sysctl_proc_stop---of 13
-----------
SUMMARY84%of 194

-----------
SUMMARY---of 0

compat_40_sys_mount100%of 1
vfs_syscalls_40_fini---of 1
vfs_syscalls_40_init---of 1
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

change_keepalive45%of 9
sysctl_inpcblist---of 41
sysctl_net_inet_ip_ports---of 16
sysctl_net_inet_tcp_ident---of 64
sysctl_net_inet_tcp_mssdflt---of 5
sysctl_net_inet_tcp_setup2.constprop.0---of 2
sysctl_net_inet_tcp_stats---of 1
sysctl_tcp_congctl---of 5
sysctl_tcp_init_win---of 5
sysctl_tcp_keep---of 6
sysctl_update_tcpcb_template---of 5
tcp_abort_wrapper100%of 3
tcp_accept_wrapper86%of 7
tcp_attach_wrapper60%of 27
tcp_bind_wrapper91%of 11
tcp_connect2_wrapper75%of 4
tcp_connect_wrapper82%of 27
tcp_ctloutput90%of 55
tcp_detach_wrapper75%of 4
tcp_disconnect1100%of 11
tcp_disconnect_wrapper100%of 3
tcp_getpcb86%of 7
tcp_ioctl_wrapper100%of 4
tcp_listen_wrapper100%of 10
tcp_peeraddr_wrapper100%of 1
tcp_purgeif_wrapper100%of 4
tcp_rcvd_wrapper100%of 5
tcp_recvoob_wrapper100%of 12
tcp_send_wrapper100%of 6
tcp_sendoob_wrapper85%of 13
tcp_shutdown_wrapper100%of 5
tcp_sockaddr_wrapper100%of 7
tcp_stat_wrapper67%of 3
tcp_usrclosed66%of 23
tcp_usrreq_init---of 1
-----------
SUMMARY84%of 261

bus_space_handle_is_equal---of 1
default_bus_space_is_equal---of 1
devenodev100%of 1
enoioctl---of 1
enosys100%of 1
enxio---of 1
eopnotsupp100%of 1
kobj_renamespace100%of 1
nullret---of 1
sa_stacks1---of 1
-----------
SUMMARY100%of 4

if_urtwn_modcmd---of 2
urtwn_activate---of 4
urtwn_alloc_rx_list---of 13
urtwn_alloc_tx_list---of 16
urtwn_attach---of 102
urtwn_bb_init---of 44
urtwn_calib_to---of 2
urtwn_calib_to_cb---of 19
urtwn_cam_init---of 11
urtwn_close_pipes---of 8
urtwn_detach---of 3
urtwn_do_async---of 5
urtwn_edca_init---of 3
urtwn_efuse_read_1---of 8
urtwn_efuse_switch_power---of 7
urtwn_fw_cmd---of 18
urtwn_fw_reset---of 7
urtwn_get_tx_data---of 13
urtwn_init---of 56
urtwn_ioctl---of 15
urtwn_iq_calib.constprop.0---of 27
urtwn_lc_calib---of 17
urtwn_llt_init---of 14
urtwn_llt_write---of 6
urtwn_load_firmware---of 51
urtwn_mac_init---of 11
urtwn_match100%of 2
urtwn_media_change---of 5
urtwn_newassoc---of 1
urtwn_newstate---of 1
urtwn_newstate_cb---of 62
urtwn_next_scan---of 4
urtwn_pa_bias_init---of 11
urtwn_put_tx_data---of 3
urtwn_r88e_dma_init---of 11
urtwn_r88e_fw_reset---of 7
urtwn_r88e_power_on---of 26
urtwn_r88e_rf_write---of 1
urtwn_r92c_dma_init---of 24
urtwn_r92c_power_on---of 29
urtwn_r92c_rf_write---of 1
urtwn_r92e_power_on---of 18
urtwn_r92e_rf_write---of 1
urtwn_ra_init.isra.0---of 24
urtwn_read_2---of 3
urtwn_read_4---of 3
urtwn_reset---of 4
urtwn_rf_init---of 18
urtwn_rf_read---of 15
urtwn_rxeof---of 101
urtwn_rxfilter_init---of 3
urtwn_set_chan.constprop.0---of 68
urtwn_set_led.part.0---of 24
urtwn_set_nettype0_msr---of 5
urtwn_start---of 38
urtwn_stop---of 37
urtwn_task---of 13
urtwn_tx---of 51
urtwn_txeof---of 8
urtwn_watchdog---of 5
urtwn_wme_update---of 3
urtwn_wme_update_cb---of 4
urtwn_write_region_1---of 3
-----------
SUMMARY100%of 2

compat_cvtcmd---of 3
compat_cvtcmd.part.0---of 11
compat_ifioctl---of 19
do_compat_cvtcmd67%of 3
if_43_fini---of 5
if_43_init---of 1
-----------
SUMMARY67%of 3

usb_delay_ms---of 4
usb_delay_ms_locked75%of 4
usb_disconnect_port---of 12
usb_findproduct---of 6
usb_findvendor---of 6
usb_free_device---of 21
usbd_attach_roothub---of 3
usbd_attachinterfaces55%of 22
usbd_attachwholedevice80%of 5
usbd_delay_ms---of 4
usbd_delay_ms_locked---of 4
usbd_devinfo.constprop.067%of 3
usbd_devinfo_alloc100%of 1
usbd_devinfo_free100%of 1
usbd_devinfo_vp30%of 24
usbd_endpoint_acquire---of 5
usbd_endpoint_release67%of 3
usbd_errstr---of 5
usbd_fill_deviceinfo27%of 34
usbd_fill_iface_data67%of 36
usbd_find_edesc---of 11
usbd_find_idesc92%of 12
usbd_free_iface_data67%of 9
usbd_get_device_strings76%of 25
usbd_iface_exlock67%of 3
usbd_iface_fini50%of 12
usbd_iface_lock---of 7
usbd_iface_locked---of 1
usbd_iface_piperef---of 5
usbd_iface_pipeunref---of 7
usbd_iface_unlock67%of 3
usbd_ifprint50%of 4
usbd_kill_pipe---of 1
usbd_new_device64%of 49
usbd_print80%of 5
usbd_printBCD---of 1
usbd_probe_and_attach82%of 16
usbd_properties100%of 6
usbd_reattach_device---of 20
usbd_reload_device_desc---of 5
usbd_remove_device100%of 9
usbd_reset_port86%of 7
usbd_set_config_index54%of 47
usbd_set_config_no---of 8
usbd_setup_pipe---of 1
usbd_setup_pipe_flags36%of 17
-----------
SUMMARY60%of 357

uvm_analloc64%of 11
uvm_anfree45%of 27
uvm_anon_ctor100%of 1
uvm_anon_dropswap50%of 2
uvm_anon_init---of 1
uvm_anon_lockloanpg---of 12
uvm_anon_pagein---of 13
uvm_anon_release---of 22
-----------
SUMMARY52%of 41

child_psignal75%of 8
coredump_elf32---of 3
coredump_elf64---of 3
coredump_netbsd---of 3
coredump_netbsd32---of 3
eventswitch---of 43
eventswitchchild---of 3
execsigs---of 15
filt_sigattach---of 1
filt_sigdetach---of 1
filt_signal---of 4
getucontext---of 6
issignal41%of 67
killpg1---of 31
killproc---of 3
kpgsignal---of 12
kpsendsig67%of 3
kpsignal71%of 17
kpsignal250%of 108
ksiginfo_alloc70%of 10
ksiginfo_exechook---of 3
ksiginfo_free---of 2
ksiginfo_queue_drain0---of 13
pgsignal---of 5
postsig45%of 29
proc_stop_callout---of 13
proc_stop_done---of 11
proc_stop_lwps---of 19
proc_stoptrace---of 28
proc_unstop56%of 27
psignal---of 5
sendsig40%of 5
sendsig_reset38%of 8
setucontext---of 10
sigacts_ctor100%of 1
sigactsfree---of 2
sigactsinit100%of 4
sigactsunshare---of 3
sigchecktrace45%of 9
sigclear30%of 24
sigclearall---of 4
sigexit---of 46
sigget40%of 15
siggetinfo59%of 29
siginit---of 15
sigismasked---of 3
sigispending67%of 6
signal_init---of 1
signal_listener_cb40%of 5
signotify67%of 3
sigpost49%of 49
sigput74%of 19
sigswitch---of 34
sigswitch_unlock_and_switch_away---of 34
trapsignal35%of 26
-----------
SUMMARY50%of 472

uarea_poolpage_alloc60%of 5
uarea_poolpage_free---of 2
uarea_system_poolpage_alloc50%of 4
uarea_system_poolpage_free---of 2
uvm_idle---of 4
uvm_init_limits---of 3
uvm_kernacc---of 1
uvm_lwp_exit100%of 4
uvm_lwp_fork100%of 1
uvm_lwp_getuarea100%of 1
uvm_lwp_setuarea100%of 1
uvm_proc_exit---of 6
uvm_proc_fork100%of 4
uvm_scheduler---of 16
uvm_uarea_alloc100%of 1
uvm_uarea_free---of 1
uvm_uarea_init---of 1
uvm_uarea_system_alloc100%of 1
uvm_uarea_system_free---of 1
uvm_vslock100%of 1
uvm_vsunlock100%of 1
-----------
SUMMARY84%of 24

debug_init---of 6
freecheck_in17%of 6
freecheck_out12%of 9
-----------
SUMMARY14%of 15

ccd_components_sysctl---of 20
ccd_info_sysctl---of 9
ccd_modcmd---of 2
ccd_units_sysctl---of 8
ccdattach---of 1
ccdclose---of 12
ccddetach---of 1
ccdget31%of 13
ccdgetdefaultlabel---of 1
ccdgetdisklabel---of 17
ccdioctl15%of 164
ccdiodone---of 18
ccdopen63%of 16
ccdread67%of 6
ccdsize---of 10
ccdstart---of 36
ccdstrategy---of 13
ccdthread---of 15
ccdwrite---of 6
printiinfo---of 4
sysctl_kern_ccd_setup---of 4
-----------
SUMMARY22%of 199

scatter_buf_set_size---of 16
v4l2_audio_to_video_audio---of 6
v4l2_format_to_video_format.part.0---of 14
v4l2_input_to_video_input---of 13
v4l2_tuner_to_video_tuner---of 12
v4l2id_to_control_id---of 3
video_activate---of 4
video_attach---of 2
video_attach_mi---of 1
video_audio_to_v4l2_audio---of 6
video_detach---of 1
video_format_to_v4l2_format---of 14
video_input_to_v4l2_input---of 13
video_match---of 1
video_print---of 3
video_stream_realloc_bufs.part.0---of 16
video_stream_sample_done---of 7
video_stream_setup_bufs---of 15
video_submit_payload---of 23
videoclose---of 20
videoioctl---of 201
videommap---of 6
videoopen25%of 8
videopoll---of 12
videoread---of 39
videowrite---of 1
-----------
SUMMARY25%of 8

nvpair_allocv100%of 4
nvpair_append_bool_array---of 9
nvpair_append_number_array---of 9
nvpair_append_nvlist_array---of 25
nvpair_append_string_array---of 12
nvpair_assert50%of 4
nvpair_clone27%of 34
nvpair_create_binary---of 6
nvpair_create_bool100%of 4
nvpair_create_bool_array---of 6
nvpair_create_null---of 4
nvpair_create_number100%of 4
nvpair_create_number_array---of 6
nvpair_create_nvlist63%of 8
nvpair_create_nvlist_array100%of 4
nvpair_create_nvlist_array.part.057%of 16
nvpair_create_string80%of 5
nvpair_create_string_array---of 4
nvpair_create_string_array.part.0---of 11
nvpair_create_stringf---of 3
nvpair_create_stringv---of 3
nvpair_free67%of 15
nvpair_free_structure---of 7
nvpair_get_binary---of 9
nvpair_get_bool---of 5
nvpair_get_bool_array---of 9
nvpair_get_number---of 5
nvpair_get_number_array---of 9
nvpair_get_nvlist58%of 7
nvpair_get_nvlist_array67%of 9
nvpair_get_string58%of 7
nvpair_get_string_array---of 9
nvpair_header_size100%of 1
nvpair_init_datasize63%of 8
nvpair_insert57%of 16
nvpair_move_binary---of 7
nvpair_move_bool_array---of 7
nvpair_move_number_array---of 5
nvpair_move_nvlist---of 7
nvpair_move_nvlist_array---of 16
nvpair_move_string---of 6
nvpair_move_string_array---of 8
nvpair_name60%of 5
nvpair_next58%of 7
nvpair_nvlist60%of 5
nvpair_pack_binary---of 9
nvpair_pack_bool56%of 9
nvpair_pack_bool_array---of 9
nvpair_pack_header55%of 11
nvpair_pack_null---of 7
nvpair_pack_number56%of 9
nvpair_pack_number_array---of 9
nvpair_pack_nvlist_array_next60%of 5
nvpair_pack_nvlist_up60%of 5
nvpair_pack_string56%of 9
nvpair_pack_string_array---of 17
nvpair_prev---of 7
nvpair_remove65%of 34
nvpair_size60%of 5
nvpair_type60%of 5
nvpair_type_string---of 4
nvpair_unpack---of 6
nvpair_unpack_binary---of 7
nvpair_unpack_bool---of 7
nvpair_unpack_bool_array---of 10
nvpair_unpack_header---of 13
nvpair_unpack_null---of 4
nvpair_unpack_number---of 8
nvpair_unpack_number_array---of 14
nvpair_unpack_nvlist---of 8
nvpair_unpack_nvlist_array---of 17
nvpair_unpack_string---of 8
nvpair_unpack_string_array---of 18
-----------
SUMMARY59%of 255

rn_addmask---of 38
rn_addroute---of 69
rn_delayedinit---of 2
rn_delete---of 1
rn_delete1---of 75
rn_init---of 22
rn_inithead---of 4
rn_inithead0---of 1
rn_insert---of 19
rn_lookup---of 8
rn_match95%of 38
rn_new_radix_mask---of 9
rn_newpair---of 1
rn_refines---of 15
rn_satisfies_leaf.isra.058%of 7
rn_search---of 3
rn_search_m---of 6
rn_search_matched100%of 11
rn_walktree---of 11
-----------
SUMMARY92%of 56

-----------
SUMMARY---of 0

mount_listener_cb80%of 5
usermount_common_policy---of 5
vfs_attach---of 10
vfs_delref100%of 1
vfs_detach---of 14
vfs_opv_free---of 2
vfs_opv_init---of 18
vfs_reinit---of 5
vfsinit---of 9
vn_default_error---of 1
-----------
SUMMARY84%of 6

clock_secs_to_ymdhms96%of 23
clock_ymdhms_to_secs---of 21
-----------
SUMMARY96%of 23

ipsec4_output---of 21
ipsec6_check_policy---of 9
ipsec6_udp_cksum---of 10
ipsec_address---of 4
ipsec_attach---of 1
ipsec_checkpolicy.constprop.0---of 21
ipsec_chkreplay---of 11
ipsec_delete_pcbpolicy73%of 11
ipsec_get_policy---of 18
ipsec_get_reqlevel---of 47
ipsec_getpolicybyaddr---of 13
ipsec_getpolicybysock---of 90
ipsec_hdrsiz---of 12
ipsec_in_reject---of 24
ipsec_init_pcbpolicy50%of 10
ipsec_invalpcbcache.isra.045%of 9
ipsec_invalpcbcacheall---of 3
ipsec_ip_input_checkpolicy---of 7
ipsec_logsastr---of 13
ipsec_mtu---of 13
ipsec_pcbconn67%of 3
ipsec_pcbdisconn67%of 3
ipsec_set_policy30%of 31
ipsec_setspidx---of 76
ipsec_sp_hdrsiz---of 21
ipsec_updatereplay---of 30
key_get_default_sp---of 12
xform_init---of 8
xform_register---of 1
-----------
SUMMARY45%of 67

if_run_modcmd---of 2
run_activate---of 4
run_adjust_freq_offset.isra.0---of 3
run_attach---of 155
run_bbp_read---of 10
run_bbp_write.isra.0---of 6
run_calibrate_cb---of 3
run_calibrate_to---of 2
run_detach---of 5
run_do_async.part.0---of 3
run_eeprom_read_2---of 3
run_efuse_read---of 15
run_efuse_read_2---of 1
run_free_rx_ring---of 6
run_free_tx_ring---of 5
run_init---of 251
run_ioctl---of 11
run_load_microcode---of 20
run_match100%of 2
run_mcu_cmd---of 7
run_media_change---of 11
run_newassoc---of 13
run_newstate---of 3
run_newstate_cb---of 43
run_next_scan---of 2
run_node_alloc---of 1
run_read---of 3
run_reset---of 1
run_rt2870_rf_write.isra.0---of 6
run_rt3070_filter_calib.isra.0---of 11
run_rt3070_rf_read.isra.0---of 10
run_rt3070_rf_write.isra.0---of 6
run_rxeof---of 75
run_set_chan.isra.0---of 241
run_set_region_4.constprop.0.isra.0---of 10
run_start---of 69
run_stop.constprop.0---of 12
run_task---of 4
run_txeof---of 7
run_updateedca---of 3
run_updateedca_cb---of 3
run_updateslot---of 2
run_updateslot_cb---of 2
run_watchdog---of 5
run_write---of 1
run_write_region_1---of 11
-----------
SUMMARY100%of 2

cd_bounce_buffer_done---of 16
cd_diskstart---of 19
cd_firstopen---of 32
cd_interpret_sense---of 24
cd_iosize---of 1
cd_label---of 3
cd_lastclose---of 3
cd_make_bounce_buffer.part.0---of 5
cd_mode_select.constprop.0---of 4
cd_mode_sense.constprop.0---of 4
cd_read_subchannel.constprop.0---of 3
cd_read_toc.constprop.0---of 3
cd_set_pa_immed.constprop.0---of 12
cd_setchan.constprop.0---of 11
cdattach---of 8
cdclose---of 1
cddetach---of 5
cddone---of 4
cddump---of 1
cdioctl---of 253
cdmatch---of 1
cdminphys---of 4
cdopen34%of 6
cdread---of 1
cdreadmsaddr---of 8
cdrestart---of 1
cdsize---of 1
cdstart---of 1
cdstrategy---of 18
cdwrite---of 1
do_cdioreadentries---of 10
mmc_do_reserve_track---of 1
mmc_doclose---of 1
mmc_gettrackinfo---of 97
read_cd_capacity---of 16
-----------
SUMMARY34%of 6

ffs_acls---of 16
ffs_cgupdate---of 16
ffs_deinit_vnode---of 4
ffs_done---of 2
ffs_extattrctl---of 4
ffs_fhtovp---of 22
ffs_flushfiles---of 19
ffs_init---of 2
ffs_init_vnode78%of 9
ffs_loadvnode---of 12
ffs_modcmd---of 8
ffs_mount9%of 89
ffs_mountfs14%of 113
ffs_mountroot---of 7
ffs_newvnode50%of 66
ffs_oldfscompat_read.part.0---of 12
ffs_reinit---of 1
ffs_reload---of 63
ffs_sbupdate---of 10
ffs_snapshot_cb67%of 3
ffs_statvfs100%of 3
ffs_superblock_validate.part.0---of 31
ffs_sync30%of 31
ffs_sync_selector89%of 9
ffs_sysctl_setup---of 1
ffs_unmount---of 32
ffs_vfs_fsync45%of 20
ffs_vptofh100%of 4
-----------
SUMMARY29%of 347

chglwpcnt67%of 3
chgproccnt67%of 3
chgsbsize100%of 5
chgsemcnt67%of 3
sysctl_kern_uidinfo_cnt---of 6
uid_find46%of 11
uid_init---of 2
uid_stats---of 10
-----------
SUMMARY64%of 25

prop_dictionary_get_bool---of 3
prop_dictionary_get_cstring---of 6
prop_dictionary_get_cstring_nocopy---of 4
prop_dictionary_get_data---of 5
prop_dictionary_get_dict---of 3
prop_dictionary_get_int---of 1
prop_dictionary_get_int16---of 1
prop_dictionary_get_int32---of 1
prop_dictionary_get_int64---of 1
prop_dictionary_get_int8---of 1
prop_dictionary_get_intptr---of 1
prop_dictionary_get_long---of 1
prop_dictionary_get_longlong---of 1
prop_dictionary_get_schar---of 1
prop_dictionary_get_short---of 1
prop_dictionary_get_string50%of 4
prop_dictionary_get_uchar---of 1
prop_dictionary_get_uint---of 1
prop_dictionary_get_uint16---of 1
prop_dictionary_get_uint32---of 1
prop_dictionary_get_uint64---of 1
prop_dictionary_get_uint8---of 1
prop_dictionary_get_uintptr---of 1
prop_dictionary_get_ulong---of 1
prop_dictionary_get_ulonglong---of 1
prop_dictionary_get_ushort---of 1
prop_dictionary_set_and_rel100%of 3
prop_dictionary_set_bool---of 3
prop_dictionary_set_cstring---of 3
prop_dictionary_set_cstring_nocopy---of 3
prop_dictionary_set_data---of 3
prop_dictionary_set_data_nocopy---of 3
prop_dictionary_set_int---of 3
prop_dictionary_set_int16---of 3
prop_dictionary_set_int32---of 3
prop_dictionary_set_int64---of 3
prop_dictionary_set_int8---of 3
prop_dictionary_set_intptr---of 3
prop_dictionary_set_long---of 3
prop_dictionary_set_longlong---of 3
prop_dictionary_set_schar---of 3
prop_dictionary_set_short---of 3
prop_dictionary_set_string100%of 3
prop_dictionary_set_string_nocopy100%of 3
prop_dictionary_set_uchar---of 3
prop_dictionary_set_uint---of 3
prop_dictionary_set_uint16100%of 3
prop_dictionary_set_uint32---of 3
prop_dictionary_set_uint64---of 3
prop_dictionary_set_uint8100%of 3
prop_dictionary_set_uintptr---of 3
prop_dictionary_set_ulong---of 3
prop_dictionary_set_ulonglong---of 3
prop_dictionary_set_ushort---of 3
-----------
SUMMARY90%of 19

copyout_msg_control83%of 29
copyout_sockname100%of 17
copyout_sockname_sb75%of 16
do_sys_accept83%of 41
do_sys_bind---of 3
do_sys_connect71%of 27
do_sys_getpeername80%of 10
do_sys_getsockname75%of 8
do_sys_peeloff100%of 1
do_sys_recvmsg100%of 3
do_sys_recvmsg_so75%of 56
do_sys_sendmsg86%of 7
do_sys_sendmsg_so93%of 64
free_control_mbuf---of 7
free_rights70%of 10
getsockopt.constprop.089%of 17
makesocket100%of 7
sockargs80%of 5
sockargs.part.087%of 30
sys___socket30100%of 3
sys_accept60%of 5
sys_bind100%of 11
sys_connect100%of 9
sys_getpeername100%of 3
sys_getsockname100%of 3
sys_getsockopt100%of 1
sys_getsockopt2---of 1
sys_listen100%of 3
sys_paccept58%of 7
sys_recvfrom100%of 6
sys_recvmmsg98%of 45
sys_recvmsg100%of 15
sys_sendmmsg100%of 12
sys_sendmsg100%of 3
sys_sendto100%of 1
sys_setsockopt100%of 12
sys_shutdown75%of 8
sys_socketpair89%of 17
-----------
SUMMARY87%of 512

uvm_obj_clean_p67%of 3
uvm_obj_destroy80%of 5
uvm_obj_init100%of 4
uvm_obj_nowriteback_p67%of 3
uvm_obj_page_clear_dirty60%of 5
uvm_obj_page_clear_writeback60%of 5
uvm_obj_page_dirty_p60%of 5
uvm_obj_page_set_dirty60%of 5
uvm_obj_page_set_writeback60%of 5
uvm_obj_page_writeback_p60%of 5
uvm_obj_setlock80%of 5
uvm_obj_unwirepages72%of 7
uvm_obj_wirepages58%of 26
uvm_object_printit---of 14
-----------
SUMMARY66%of 83

compat_43_sys_getrlimit100%of 9
compat_43_sys_setrlimit100%of 3
kern_resource_43_fini---of 1
kern_resource_43_init---of 1
-----------
SUMMARY100%of 12

compat_90_sys_fhstatvfs167%of 3
compat_90_sys_fstatvfs1100%of 3
compat_90_sys_getvfsstat100%of 1
compat_90_sys_statvfs1100%of 3
statvfs_to_statvfs90_copy100%of 1
vfs_syscalls_90_fini---of 1
vfs_syscalls_90_init---of 1
-----------
SUMMARY91%of 11

-----------
SUMMARY---of 0

fd_motor_off---of 1
fd_motor_on---of 4
fd_mountroot_hook---of 4
fd_nvtotype---of 6
fd_set_geometry---of 6
fd_set_motor---of 9
fdattach---of 5
fdc_childdet---of 3
fdcattach---of 6
fdcdetach---of 3
fdcfinishattach---of 15
fdcintr---of 1
fdcintr1---of 83
fdcintrcb---of 1
fdclose---of 1
fdcpstatus---of 6
fdcresult---of 10
fdcresume---of 1
fdcretry---of 9
fdcstart---of 6
fdcstatus---of 3
fdcsuspend---of 7
fdctimeout---of 3
fddetach---of 3
fdfinish---of 13
fdformat---of 3
fdioctl---of 41
fdopen23%of 9
fdprint---of 4
fdprobe---of 9
fdread---of 1
fdstart---of 6
fdstrategy---of 16
fdwrite---of 1
out_fdc---of 4
-----------
SUMMARY23%of 9

umct_attach---of 26
umct_childdet---of 3
umct_close---of 5
umct_detach---of 7
umct_get_status---of 2
umct_intr---of 10
umct_match100%of 2
umct_open---of 5
umct_param---of 12
umct_set---of 10
umct_set_baudrate---of 5
umct_set_line_state---of 4
-----------
SUMMARY100%of 2

del_m6if---of 11
expire_upcalls---of 12
ip6_mdq---of 84
ip6_mforward---of 60
ip6_mrouter_detach20%of 15
ip6_mrouter_done58%of 14
ip6_mrouter_get34%of 6
ip6_mrouter_set32%of 70
mrt6_ioctl42%of 12
pim6_init---of 1
pim6_input---of 33
socket_send---of 11
sysctl_net_inet6_pim6_stats---of 1
-----------
SUMMARY35%of 117

npf_mk_algs---of 9
npf_mk_connlist---of 8
npf_mk_natlist---of 10
npf_mk_params---of 14
npf_mk_rprocs---of 16
npf_mk_rules---of 13
npf_mk_singlenat---of 12
npf_mk_singlerule---of 19
npf_mk_table.constprop.0---of 15
npf_mk_table_entries---of 12
npf_mk_tables.isra.0---of 13
npfctl_run_op26%of 51
npfctl_table19%of 11
npfctl_table_replace---of 7
-----------
SUMMARY25%of 62

deframe_rd_ur---of 7
filt_ustirrdetach---of 1
filt_ustirread---of 1
filt_ustirwdetach---of 1
filt_ustirwrite---of 1
ustir_activate---of 4
ustir_attach---of 17
ustir_childdet---of 3
ustir_close---of 20
ustir_detach---of 22
ustir_get_speeds---of 4
ustir_get_turnarounds---of 4
ustir_kqfilter---of 5
ustir_match67%of 3
ustir_open---of 10
ustir_poll---of 8
ustir_rd_cb---of 21
ustir_read---of 19
ustir_set_params---of 17
ustir_start_read.isra.0---of 7
ustir_thread---of 33
ustir_write---of 25
-----------
SUMMARY67%of 3

-----------
SUMMARY---of 0

copypktopts---of 32
ip6_clearpktopts56%of 60
ip6_copyexthdr.part.0---of 8
ip6_copypktopts---of 4
ip6_ctloutput80%of 316
ip6_freemoptions28%of 11
ip6_freepcbopts---of 2
ip6_get_membership87%of 23
ip6_if_output64%of 11
ip6_initpktopts---of 1
ip6_mloopback---of 7
ip6_optlen60%of 10
ip6_output38%of 309
ip6_pcbopt100%of 5
ip6_raw_ctloutput93%of 14
ip6_setpktopt74%of 104
ip6_setpktopts93%of 14
ip6_splithdr---of 6
m_reset_rcvif---of 3
rtcache_invariants.isra.034%of 3
-----------
SUMMARY62%of 880

minphys100%of 2
physio75%of 58
physio_biodone50%of 10
physio_done---of 20
physio_init---of 3
-----------
SUMMARY72%of 70

kernconfig_is_held100%of 1
kernconfig_lock50%of 6
kernconfig_lock_init---of 1
kernconfig_unlock67%of 6
-----------
SUMMARY62%of 13

rf_ConfigureDiskQueue---of 1
rf_ConfigureDiskQueueSystem100%of 1
rf_ConfigureDiskQueues---of 13
rf_CreateDiskQueueData---of 7
rf_DiskIOComplete---of 8
rf_DiskIOEnqueue---of 6
rf_DiskIOPromote---of 3
rf_FreeDiskQueueData---of 1
rf_ShutdownDiskQueue---of 1
rf_ShutdownDiskQueueSystem100%of 1
-----------
SUMMARY100%of 2

module_load_vfs24%of 52
module_load_vfs_init---of 1
-----------
SUMMARY24%of 52

cdce_attach---of 48
cdce_match75%of 4
cdce_uno_rx_loop---of 6
cdce_uno_tx_prepare---of 5
if_cdce_modcmd---of 2
-----------
SUMMARY75%of 4

-----------
SUMMARY---of 0

amr_attach---of 91
amr_ccb_alloc---of 4
amr_ccb_dump---of 3
amr_ccb_enqueue---of 13
amr_ccb_free---of 1
amr_ccb_map---of 12
amr_ccb_poll---of 12
amr_ccb_unmap---of 6
amr_ccb_wait---of 2
amr_drive_state---of 7
amr_enquire---of 6
amr_intr---of 27
amr_match---of 10
amr_modcmd---of 1
amr_print---of 3
amr_quartz_get_work---of 5
amr_quartz_submit---of 7
amr_quartz_thread---of 10
amr_rescan---of 7
amr_shutdown---of 7
amr_std_get_work---of 3
amr_std_submit---of 5
amr_std_thread---of 25
amr_teardown---of 21
amrclose---of 1
amrioctl---of 21
amropen50%of 4
-----------
SUMMARY50%of 4

otus_activate---of 4
otus_attach---of 39
otus_attachhook---of 37
otus_calib_to---of 3
otus_close_pipes---of 22
otus_cmd---of 11
otus_detach---of 5
otus_do_async---of 3
otus_get_delta_slope---of 5
otus_init---of 17
otus_ioctl---of 30
otus_led_newstate_type3---of 8
otus_load_firmware---of 10
otus_match100%of 2
otus_media_change---of 11
otus_newassoc---of 7
otus_newstate---of 1
otus_newstate_cb---of 10
otus_next_scan---of 3
otus_node_alloc---of 1
otus_rxeof---of 63
otus_set_chan---of 90
otus_start---of 2
otus_start.part.0---of 80
otus_stop---of 4
otus_task---of 4
otus_txeof---of 9
otus_updateslot---of 1
otus_updateslot_cb---of 1
otus_updateslot_cb_locked---of 4
otus_watchdog---of 5
otus_write---of 6
otus_write_barrier---of 7
-----------
SUMMARY100%of 2

closef80%of 15
fbadop_close---of 1
fbadop_ioctl---of 1
fbadop_read100%of 1
fbadop_stat100%of 1
fbadop_write100%of 1
fd_abort59%of 17
fd_affix53%of 17
fd_alloc75%of 59
fd_allocfile62%of 18
fd_clone60%of 5
fd_close74%of 30
fd_closeexec---of 19
fd_copy60%of 92
fd_dtab_alloc67%of 3
fd_dtab_free---of 3
fd_dup63%of 8
fd_dup279%of 19
fd_dupopen90%of 10
fd_free8%of 81
fd_getfile74%of 15
fd_getfile2---of 9
fd_getsock100%of 5
fd_getsock1100%of 5
fd_getvnode86%of 7
fd_hold100%of 1
fd_init---of 41
fd_isused60%of 5
fd_map_alloc---of 3
fd_map_free---of 5
fd_next_zero75%of 27
fd_putvnode58%of 21
fd_set_exclose80%of 5
fd_share100%of 1
fd_sys_init---of 7
fd_tryexpand---of 45
fd_unused71%of 65
fd_used61%of 33
fdfile_ctor100%of 1
fdfile_dtor100%of 1
fgetdummy---of 1
fgetown100%of 3
file_ctor67%of 9
file_dtor60%of 10
filedesc_ctor100%of 3
filedesc_dtor---of 2
filedescopen100%of 1
fnullop_fcntl100%of 2
fnullop_kqfilter---of 1
fnullop_poll100%of 1
fownsignal73%of 11
fputdummy---of 1
fsetown100%of 12
sysctl_file_marker_reset---of 14
sysctl_kern_file---of 41
sysctl_kern_file2---of 61
-----------
SUMMARY62%of 621

compat_ifdatareq25%of 8
uipc_syscalls_50_fini---of 3
uipc_syscalls_50_init---of 1
-----------
SUMMARY25%of 8

accept_filt_add---of 14
accept_filt_clear18%of 17
accept_filt_del---of 11
accept_filt_get---of 7
accept_filt_getopt43%of 7
accept_filt_setopt31%of 39
accept_filter_init---of 2
accept_filter_init0---of 1
-----------
SUMMARY29%of 63

-----------
SUMMARY---of 0

exec_read84%of 6
exec_setup_stack80%of 10
kill_vmcmds100%of 7
new_vmcmd85%of 13
vmcmd_map_pagedvn---of 13
vmcmd_map_readvn---of 4
vmcmd_map_zero---of 5
vmcmd_readvn---of 8
vmcmdset_extend67%of 6
-----------
SUMMARY84%of 42

-----------
SUMMARY---of 0

npf_ext_construct---of 9
npf_ext_fini---of 3
npf_ext_init---of 1
npf_ext_lookup---of 9
npf_ext_register---of 9
npf_ext_unregister---of 18
npf_rproc_acquire---of 1
npf_rproc_assign---of 3
npf_rproc_create---of 3
npf_rproc_getname---of 1
npf_rproc_release---of 9
npf_rproc_run---of 16
npf_rprocset_create---of 1
npf_rprocset_destroy---of 10
npf_rprocset_export67%of 3
npf_rprocset_insert---of 6
npf_rprocset_lookup---of 4
-----------
SUMMARY67%of 3

filt_uirdardetach---of 1
filt_uirdaread---of 1
uirda_activate---of 4
uirda_attach---of 26
uirda_childdet---of 3
uirda_close---of 13
uirda_detach---of 17
uirda_get_speeds---of 20
uirda_get_turnarounds---of 18
uirda_kqfilter---of 4
uirda_match60%of 5
uirda_open---of 9
uirda_poll---of 5
uirda_rd_cb---of 7
uirda_read---of 14
uirda_set_params---of 30
uirda_start_read.isra.0---of 4
uirda_write---of 10
usbd_get_class_desc---of 1
-----------
SUMMARY60%of 5

u3g_attach---of 26
u3g_childdet---of 4
u3g_close---of 1
u3g_detach---of 11
u3g_get_status---of 1
u3g_intr---of 16
u3g_match17%of 12
u3g_open---of 10
u3g_read---of 7
u3g_set---of 10
u3g_write---of 1
-----------
SUMMARY17%of 12

-----------
SUMMARY---of 0

cwd_sys_init---of 2
cwdexec---of 4
cwdfree17%of 6
cwdi_ctor100%of 1
cwdi_dtor---of 1
cwdinit86%of 7
cwdshare100%of 1
cwdunshare---of 2
-----------
SUMMARY60%of 15

if_loop_modcmd---of 5
loioctl45%of 9
loop_clone_create---of 3
loop_clone_destroy---of 4
loop_rtrequest---of 2
loopattach---of 1
loopinit---of 2
looutput38%of 45
-----------
SUMMARY39%of 54

usb_desc_iter_init---of 1
usb_desc_iter_next---of 10
usb_desc_iter_next_interface---of 15
usb_desc_iter_next_non_interface---of 11
usb_desc_iter_peek---of 8
usb_detach_waitold---of 2
usb_detach_wakeupold---of 1
usb_find_desc---of 15
usb_find_desc_if---of 18
usbd_bulk_transfer---of 3
usbd_clear_endpoint_feature---of 1
usbd_clear_hub_feature---of 1
usbd_clear_port_feature100%of 1
usbd_get_bos_desc---of 6
usbd_get_config---of 1
usbd_get_config_desc84%of 6
usbd_get_config_desc_full---of 4
usbd_get_desc80%of 5
usbd_get_device_desc100%of 1
usbd_get_device_status---of 1
usbd_get_hid_descriptor---of 8
usbd_get_hub_status---of 1
usbd_get_initial_ddesc100%of 4
usbd_get_port_status100%of 1
usbd_get_port_status_ext---of 1
usbd_get_protocol---of 3
usbd_get_report---of 3
usbd_get_report_descriptor---of 1
usbd_get_string_desc100%of 6
usbd_intr_transfer---of 3
usbd_read_report_desc---of 6
usbd_set_address100%of 1
usbd_set_config100%of 1
usbd_set_hub_feature---of 1
usbd_set_idle---of 3
usbd_set_port_feature---of 1
usbd_set_port_u1_timeout---of 1
usbd_set_port_u2_timeout---of 1
usbd_set_protocol---of 3
usbd_set_report---of 3
-----------
SUMMARY93%of 26

blocks_in_journal---of 7
cgaccount---of 100
expunge---of 52
ffs_copyonwrite---of 59
ffs_snapblkfree5%of 82
ffs_snapgone---of 13
ffs_snapremove---of 79
ffs_snapshot---of 266
ffs_snapshot_fini---of 5
ffs_snapshot_init100%of 1
ffs_snapshot_mount---of 33
ffs_snapshot_read---of 21
ffs_snapshot_unmount---of 19
fullacct---of 3
indiracct---of 25
mapacct---of 27
rwfsblk.isra.0---of 1
snapacct---of 57
snapblkaddr---of 31
snapshot_expunge_selector---of 9
syncsnap---of 11
wrsnapblk---of 6
-----------
SUMMARY7%of 83

bintime2timo---of 6
cv_broadcast80%of 5
cv_destroy67%of 6
cv_enter50%of 20
cv_has_waiters100%of 1
cv_init67%of 3
cv_is_valid75%of 4
cv_signal80%of 5
cv_timedwait67%of 3
cv_timedwait_sig67%of 3
cv_timedwaitbt---of 19
cv_timedwaitbt_sig---of 19
cv_unsleep63%of 8
cv_wait67%of 3
cv_wait_sig67%of 3
cv_wakeup_all67%of 9
cv_wakeup_one73%of 11
-----------
SUMMARY66%of 84

rw_obj_alloc100%of 1
rw_obj_ctor100%of 1
rw_obj_free72%of 7
rw_obj_hold60%of 5
rw_obj_init---of 1
rw_obj_refcnt100%of 1
rw_obj_tryalloc100%of 3
-----------
SUMMARY78%of 18

close_in_jack.part.0---of 4
close_out_jack.part.0---of 7
free_all_endpoints---of 6
in_intr---of 21
out_intr---of 8
out_jack_output---of 22
out_solicit---of 1
out_solicit_locked---of 20
umidi_activate---of 10
umidi_attach---of 240
umidi_channelmsg---of 7
umidi_childdet---of 9
umidi_close---of 14
umidi_commonmsg---of 8
umidi_detach---of 21
umidi_get_locks---of 1
umidi_getinfo---of 8
umidi_match100%of 3
umidi_open---of 41
umidi_rtmsg---of 7
umidi_sysex---of 9
unbind_jacks_from_mididev---of 18
-----------
SUMMARY100%of 3

ah6_ctlinput_wrapper100%of 1
encap6_ctlinput_wrapper100%of 1
esp6_ctlinput_wrapper100%of 1
icmp6_ctloutput_wrapper100%of 1
in6_dom_init---of 1
rip6_ctlinput_wrapper100%of 1
rip6_ctloutput_wrapper100%of 1
tcp6_ctlinput_wrapper100%of 1
tcp6_init---of 1
tcp_ctloutput_wrapper100%of 1
udp6_ctlinput_wrapper100%of 1
udp6_ctloutput_wrapper100%of 1
-----------
SUMMARY100%of 10

specificdata_container_unlink.constprop.0---of 8
specificdata_destroy_datum---of 8
specificdata_domain_create---of 1
specificdata_domain_delete---of 1
specificdata_fini25%of 4
specificdata_getspecific50%of 4
specificdata_getspecific_unlocked50%of 4
specificdata_init100%of 1
specificdata_key_create---of 11
specificdata_key_delete---of 5
specificdata_setspecific---of 16
-----------
SUMMARY47%of 13

compat_30_sys_socket100%of 2
uipc_syscalls_30_fini---of 1
uipc_syscalls_30_init---of 1
-----------
SUMMARY100%of 2

do_sys_wait88%of 8
do_sys_waitid58%of 132
exit1---of 115
exit_lwps---of 26
exit_psignal---of 9
lwp_lock---of 10
match_process57%of 25
proc_changeparent63%of 8
proc_reparent64%of 22
sys___wait450100%of 8
sys_exit---of 3
sys_wait6---of 13
-----------
SUMMARY62%of 203

acct_chkfree60%of 10
acct_init---of 1
acct_process---of 48
acct_stop80%of 10
acctwatch---of 10
sys_acct82%of 22
-----------
SUMMARY77%of 42

fss_attach75%of 4
fss_bs_indir---of 10
fss_bs_io---of 3
fss_bs_thread---of 56
fss_close47%of 15
fss_copy_on_write---of 42
fss_detach84%of 6
fss_dump---of 1
fss_ioctl10%of 99
fss_match---of 1
fss_open86%of 7
fss_read100%of 1
fss_size---of 1
fss_softc_alloc---of 10
fss_softc_free---of 15
fss_strategy60%of 10
fss_unmount_hook54%of 13
fss_write100%of 1
fssattach---of 2
-----------
SUMMARY29%of 156

ufs_gro_directory_empty_p48%of 19
ufs_gro_genealogy55%of 71
ufs_gro_lock_directory50%of 10
ufs_gro_lookup53%of 21
ufs_gro_remove46%of 37
ufs_gro_remove_check_permitted48%of 23
ufs_gro_remove_check_possible48%of 23
ufs_gro_rename57%of 200
ufs_gro_rename_check_permitted47%of 49
ufs_gro_rename_check_possible47%of 49
ufs_rename100%of 1
ufs_rename_ulr_overlap_p63%of 16
ufs_rmdired_p58%of 7
ufs_sane_rename100%of 1
-----------
SUMMARY53%of 527

compat_50_kevent_fetch_timeout---of 5
compat_50_sys_kevent---of 1
compat_50_sys_pollts---of 7
compat_50_sys_pselect---of 7
compat_50_sys_select100%of 6
kern_select_50_fini---of 1
kern_select_50_init---of 1
-----------
SUMMARY100%of 6

aubtfwl_attach---of 3
aubtfwl_attach_hook---of 14
aubtfwl_detach---of 1
aubtfwl_firmware_load---of 26
aubtfwl_match75%of 4
-----------
SUMMARY75%of 4

raw_ctlinput100%of 1
raw_input82%of 33
raw_send77%of 13
raw_setpeeraddr100%of 1
raw_setsockaddr100%of 1
raw_usrreq---of 69
-----------
SUMMARY82%of 49

sys___syscall100%of 7
sys_syscall---of 7
sys_syscall_biglockcheck67%of 3
-----------
SUMMARY90%of 10

cpu_lookup54%of 13
cpu_setintr30%of 17
cpu_setstate28%of 18
cpu_ucode_load42%of 12
cpu_xc_intr---of 1
cpu_xc_nointr---of 1
cpu_xc_offline---of 28
cpu_xc_online---of 1
cpuctl_ioctl100%of 19
cpuctlattach---of 2
mi_cpu_attach---of 12
-----------
SUMMARY52%of 79

bufq_alloc55%of 24
bufq_cancel---of 1
bufq_drain50%of 2
bufq_free67%of 3
bufq_get100%of 1
bufq_getstrategyname100%of 1
bufq_init---of 3
bufq_move---of 2
bufq_peek100%of 1
bufq_put100%of 1
bufq_register---of 1
bufq_unregister---of 8
sysctl_kern_bufq_strategies---of 29
-----------
SUMMARY61%of 33

cpu_spawn_return---of 1
md_child_return---of 1
mi_userret68%of 34
syscall80%of 20
syscall_intern100%of 1
-----------
SUMMARY73%of 55

addlog---of 5
aprint_debug---of 5
aprint_debug_dev---of 9
aprint_debug_ifnet---of 7
aprint_error---of 1
aprint_error_dev67%of 3
aprint_error_ifnet---of 3
aprint_error_internal88%of 8
aprint_get_error_count---of 3
aprint_naive40%of 5
aprint_naive_dev---of 9
aprint_naive_ifnet---of 7
aprint_normal100%of 1
aprint_normal_dev67%of 3
aprint_normal_ifnet---of 3
aprint_normal_internal88%of 8
aprint_verbose---of 6
aprint_verbose_dev---of 3
aprint_verbose_ifnet---of 3
aprint_verbose_internal---of 7
db_printf---of 3
db_vprintf---of 3
device_printf---of 5
klogpri---of 3
kprintf48%of 286
kprintf_init---of 4
kprintf_internal.constprop.0100%of 1
kprintf_lock---of 2
kprintf_unlock75%of 4
log72%of 7
logpri---of 5
panic---of 1
printf100%of 3
printf_flags---of 3
printf_nolog---of 3
printf_nostamp---of 3
printf_tolog---of 3
putchar87%of 23
putone64%of 33
snprintf84%of 6
tablefull67%of 3
tprintf---of 7
tprintf_close---of 2
tprintf_open---of 4
ttyprintf---of 1
twiddle---of 3
uprintf---of 4
uprintf_locked---of 4
vasprintf---of 6
vlog---of 7
vpanic---of 20
vprintf100%of 4
vprintf_flags---of 3
vsnprintf84%of 6
-----------
SUMMARY56%of 404

htable_foreach_lle84%of 6
htable_free_tbl100%of 1
htable_link_entry---of 9
htable_prefix_free---of 5
htable_prefix_free_cb---of 8
htable_unlink_entry---of 13
lla_rt_output---of 64
llentry_alloc---of 9
llentry_free---of 23
llentry_pool_get---of 3
llentry_pool_put---of 1
lltable_allocate_htbl100%of 4
lltable_drain---of 8
lltable_drop_entry_queue---of 9
lltable_dump_entry---of 18
lltable_fill_sa_entry---of 1
lltable_foreach_lle---of 1
lltable_free86%of 7
lltable_free_cb---of 6
lltable_free_entry---of 1
lltable_get_af---of 1
lltable_get_ifp---of 1
lltable_link100%of 1
lltable_link_entry---of 1
lltable_prefix_free---of 5
lltable_purge_entries58%of 7
lltable_sysctl_dump---of 8
lltable_unlink_entry---of 1
lltableinit---of 1
-----------
SUMMARY81%of 26

if_vioif_modcmd---of 1
vioif_attach---of 168
vioif_config_change---of 1
vioif_ctl_softint---of 1
vioif_ctrl_intr---of 4
vioif_ctrl_release---of 5
vioif_ctrl_send_command---of 17
vioif_deferred_transmit---of 1
vioif_dmamap_create_load.constprop.0---of 7
vioif_finalize_teardown---of 3
vioif_free_rx_mbuf---of 1
vioif_ifflags.part.0---of 20
vioif_ifflags_cb---of 5
vioif_init---of 21
vioif_ioctl100%of 5
vioif_match---of 1
vioif_populate_rx_mbufs_locked---of 21
vioif_rx_deq_locked---of 13
vioif_rx_filter16%of 26
vioif_rx_handle---of 3
vioif_rx_handle_locked---of 6
vioif_rx_intr---of 5
vioif_rx_sched_handle---of 6
vioif_send_common_locked59%of 36
vioif_set_rx_filter---of 12
vioif_start100%of 1
vioif_stop---of 26
vioif_transmit---of 7
vioif_tx_deq_locked---of 7
vioif_tx_handle---of 3
vioif_tx_handle_locked---of 10
vioif_tx_intr---of 5
vioif_tx_queue_clear---of 3
vioif_tx_sched_handle---of 6
vioif_update_link_status---of 10
vioif_watchdog---of 3
vioif_work_add---of 6
vioif_workq_work---of 3
-----------
SUMMARY46%of 68

npf_param_allocgroup---of 1
npf_param_check---of 4
npf_param_fini---of 8
npf_param_freegroup---of 1
npf_param_init---of 1
npf_param_register---of 9
npf_param_thmap_alloc---of 1
npf_param_thmap_free---of 1
npf_params_export100%of 5
npfk_param_get---of 3
npfk_param_set---of 5
-----------
SUMMARY100%of 5

hdaudio_attach---of 88
hdaudio_attach_fg---of 3
hdaudio_childdet---of 5
hdaudio_command---of 1
hdaudio_command_unlocked---of 15
hdaudio_config_print---of 5
hdaudio_corb_config---of 19
hdaudio_corb_start---of 14
hdaudio_detach---of 7
hdaudio_dma_alloc---of 11
hdaudio_dma_free---of 2
hdaudio_dma_free.part.0---of 1
hdaudio_findproduct---of 6
hdaudio_findvendor---of 6
hdaudio_intr---of 21
hdaudio_intr_enable---of 1
hdaudio_modcmd---of 2
hdaudio_rescan---of 6
hdaudio_reset---of 51
hdaudio_resume---of 13
hdaudio_rirb_config---of 7
hdaudio_rirb_dequeue---of 25
hdaudio_rirb_start---of 16
hdaudio_stream_disestablish---of 5
hdaudio_stream_establish---of 10
hdaudio_stream_param---of 24
hdaudio_stream_reset---of 20
hdaudio_stream_start---of 30
hdaudio_stream_stop---of 7
hdaudio_stream_tag---of 4
hdaudioclose---of 1
hdaudioioctl---of 78
hdaudioopen100%of 2
-----------
SUMMARY100%of 2

-----------
SUMMARY---of 0

filt_soempty---of 13
filt_solisten---of 9
filt_sordetach---of 8
filt_soread---of 15
filt_sowdetach---of 8
filt_sowrite---of 22
fsocreate90%of 10
sbsavetimestamp---of 8
sbsync67%of 9
so_setsockopt---of 14
soabort67%of 9
soaccept75%of 8
sobind78%of 9
socket_listener_cb75%of 8
sockopt_destroy100%of 3
sockopt_get100%of 3
sockopt_getint100%of 3
sockopt_getmbuf63%of 8
sockopt_init86%of 7
sockopt_set100%of 10
sockopt_setint100%of 4
sockopt_setmbuf69%of 16
soclose65%of 39
soconnect89%of 9
soconnect267%of 3
socreate100%of 3
socreate.part.090%of 20
sodisconnect84%of 6
sofamily---of 5
sofree72%of 21
sogetopt91%of 32
sohasoutofband---of 1
soinit---of 5
soinit1---of 2
sokvaalloc---of 8
sokvafree---of 1
solisten85%of 13
soloanfree---of 3
soo_kqfilter---of 13
sopendfree_thread---of 12
sopoll87%of 45
soreceive81%of 248
sorestart67%of 6
sorflush70%of 10
sosend78%of 130
sosetopt83%of 91
soshutdown86%of 7
sysctl_kern_sbmax---of 4
sysctl_kern_somaxkva---of 5
sysctl_kern_sooptions---of 5
-----------
SUMMARY81%of 790

pckbc_attach---of 19
pckbc_attach_slot---of 7
pckbc_cnattach---of 9
pckbc_intr_establish---of 1
pckbc_is_console---of 5
pckbc_poll_data167%of 15
pckbc_put8042cmd---of 5
pckbc_resume---of 4
pckbc_send_cmd75%of 4
pckbc_send_devcmd84%of 6
pckbc_set_poll---of 4
pckbc_slot_enable80%of 5
pckbc_xt_translation39%of 13
pckbcintr---of 10
pckbcintr_hard---of 12
pckbcintr_soft---of 3
-----------
SUMMARY63%of 43

devhandle_compare---of 12
devhandle_impl_inherit---of 1
devhandle_invalid---of 1
devhandle_is_valid---of 3
devhandle_lookup_device_call---of 5
devhandle_type---of 3
device_activation---of 2
device_attached_to_iattr---of 5
device_call_generic---of 6
device_cfattach---of 1
device_cfdata100%of 1
device_cfdriver100%of 1
device_class100%of 1
device_enumerate_children---of 1
device_handle---of 1
device_has_power---of 1
device_is_a80%of 5
device_is_active100%of 1
device_is_enabled---of 1
device_locator---of 3
device_parent100%of 1
device_private100%of 4
device_properties100%of 1
device_set_handle---of 1
device_set_private---of 7
device_unit100%of 1
device_xname100%of 1
-----------
SUMMARY95%of 17

link_abort---of 4
link_accept---of 4
link_attach50%of 4
link_bind---of 4
link_connect---of 4
link_connect2---of 4
link_detach---of 3
link_disconnect---of 4
link_ioctl4%of 55
link_listen50%of 4
link_peeraddr---of 4
link_purgeif100%of 1
link_rcvd---of 4
link_recvoob50%of 4
link_send50%of 4
link_sendoob67%of 3
link_shutdown50%of 4
link_sockaddr50%of 4
link_stat50%of 4
sockaddr_dl_alloc---of 4
sockaddr_dl_cmp---of 29
sockaddr_dl_init60%of 10
sockaddr_dl_measure100%of 1
sockaddr_dl_setaddr---of 4
-----------
SUMMARY27%of 98

-----------
SUMMARY---of 0

scsipi_print_sense_data_real30%of 47
scsipi_print_sense_real100%of 1
scsiverbose_modcmd---of 7
-----------
SUMMARY32%of 48

mc146818_read---of 1
mc146818_write---of 1
rtc_get_ymdhms---of 21
rtc_register---of 1
rtc_set_ymdhms67%of 9
rtcget84%of 6
-----------
SUMMARY74%of 15

ld_rbto_compare_key100%of 4
ld_rbto_compare_nodes100%of 4
lockdebug_abort---of 3
lockdebug_abort1---of 4
lockdebug_alloc60%of 44
lockdebug_barrier53%of 19
lockdebug_dismiss---of 1
lockdebug_dump---of 7
lockdebug_free72%of 14
lockdebug_lock_print---of 10
lockdebug_locked73%of 18
lockdebug_mem_check34%of 9
lockdebug_more.part.077%of 13
lockdebug_show_all_locks---of 25
lockdebug_show_lockstats---of 12
lockdebug_unlocked72%of 46
lockdebug_wantlock77%of 21
-----------
SUMMARY68%of 192

do_fcntl_lock100%of 15
do_posix_fadvise100%of 21
do_sys_fstat100%of 3
dodup100%of 7
sys___fstat50100%of 4
sys___posix_fadvise50100%of 1
sys_close75%of 4
sys_dup100%of 3
sys_dup2100%of 1
sys_dup3100%of 1
sys_fcntl90%of 76
sys_flock100%of 12
sys_fpathconf100%of 7
sys_pipe100%of 3
sys_pipe2100%of 4
-----------
SUMMARY95%of 162

do_lwp_create---of 4
lwp_park59%of 24
lwp_unpark91%of 11
mi_startlwp---of 4
sys____lwp_park6038%of 8
sys__lwp_continue62%of 13
sys__lwp_create42%of 12
sys__lwp_ctl100%of 4
sys__lwp_detach92%of 12
sys__lwp_exit100%of 1
sys__lwp_getname73%of 18
sys__lwp_getprivate100%of 1
sys__lwp_kill100%of 6
sys__lwp_self100%of 1
sys__lwp_setname73%of 18
sys__lwp_setprivate---of 1
sys__lwp_suspend75%of 24
sys__lwp_unpark100%of 1
sys__lwp_unpark_all100%of 11
sys__lwp_wait100%of 4
sys__lwp_wakeup67%of 18
-----------
SUMMARY73%of 187

kern_free100%of 3
kern_malloc100%of 9
kern_realloc80%of 10
-----------
SUMMARY91%of 22

in6_get_hw_ifid---of 40
in6_ifattach---of 64
in6_ifdetach100%of 1
in6_nigroup---of 15
-----------
SUMMARY100%of 1

ural_activate---of 4
ural_amrr_timeout---of 1
ural_amrr_update---of 3
ural_attach---of 28
ural_bbp_read---of 6
ural_bbp_write---of 4
ural_detach---of 3
ural_eeprom_read---of 3
ural_init---of 65
ural_ioctl---of 15
ural_match100%of 2
ural_media_change---of 5
ural_newstate---of 1
ural_next_scan---of 2
ural_read---of 4
ural_read_multi.constprop.0---of 3
ural_reset---of 4
ural_rf_write---of 4
ural_rxeof---of 27
ural_set_chan---of 26
ural_setup_tx_desc---of 16
ural_start---of 58
ural_stop.constprop.0---of 18
ural_task---of 39
ural_txeof---of 7
ural_txtime.constprop.0---of 8
ural_watchdog---of 5
ural_write---of 3
-----------
SUMMARY100%of 2

stub_compat_70_unp_addsockcred---of 1
uipc_ctloutput76%of 29
uipc_init---of 4
unp_abort---of 9
unp_accept50%of 18
unp_addsockcred---of 5
unp_attach71%of 24
unp_bind72%of 32
unp_connect81%of 42
unp_connect1.constprop.065%of 14
unp_connect284%of 6
unp_detach62%of 36
unp_discard_later75%of 8
unp_discard_now---of 6
unp_disconnect60%of 5
unp_disconnect179%of 14
unp_dispose100%of 9
unp_drop34%of 9
unp_externalize59%of 24
unp_ioctl100%of 1
unp_listen63%of 8
unp_mark---of 9
unp_peeraddr50%of 8
unp_rcvd59%of 17
unp_recvoob50%of 4
unp_resetlock67%of 6
unp_send72%of 128
unp_sendoob67%of 3
unp_setaddr86%of 7
unp_setpeerlocks58%of 14
unp_shutdown80%of 10
unp_sockaddr50%of 8
unp_stat84%of 12
unp_thread---of 135
-----------
SUMMARY69%of 496

-----------
SUMMARY---of 0

uvm_pagecheckdirty82%of 11
uvm_pagegetdirty63%of 8
uvm_pagemarkdirty74%of 41
-----------
SUMMARY74%of 60

if_upl_modcmd---of 2
upl_attach---of 17
upl_input---of 4
upl_match100%of 2
upl_output---of 1
upl_uno_ioctl---of 3
upl_uno_rx_loop---of 1
upl_uno_tx_prepare---of 3
-----------
SUMMARY100%of 2

roothub_ctrl_abort---of 6
roothub_ctrl_start29%of 59
roothub_ctrl_transfer100%of 1
usb_makelangtbl---of 8
usb_makestrdesc---of 8
-----------
SUMMARY30%of 60

ntfs_calccfree---of 9
ntfs_done---of 1
ntfs_fhtovp---of 4
ntfs_init---of 1
ntfs_loadvnode---of 35
ntfs_modcmd---of 6
ntfs_mount32%of 19
ntfs_mountfs---of 45
ntfs_mountroot---of 7
ntfs_reinit---of 1
ntfs_root---of 4
ntfs_start---of 1
ntfs_statvfs---of 1
ntfs_sync---of 1
ntfs_unmount---of 18
ntfs_vget---of 1
ntfs_vgetex---of 10
ntfs_vptofh---of 4
-----------
SUMMARY32%of 19

copy_procargs---of 29
copy_procargs_sysctl_cb---of 1
copyin_psstrings---of 6
fill_eproc---of 40
fill_kproc2---of 66
fixjobc85%of 20
get_expose_address---of 1
orphanpg67%of 6
pg_delete58%of 14
pg_remove40%of 15
pgid_in_session67%of 9
pgrp_find84%of 6
pidtbl_dump---of 17
proc0_init---of 24
proc_active_lwp---of 9
proc_alloc43%of 7
proc_alloc_lwpid67%of 15
proc_alloc_pid72%of 7
proc_alloc_pid_slot65%of 40
proc_crmod_enter50%of 4
proc_crmod_leave88%of 16
proc_ctor100%of 1
proc_enterpgrp71%of 81
proc_find100%of 1
proc_find_internal93%of 14
proc_find_locked---of 8
proc_find_lwp77%of 13
proc_find_lwp_acquire_proc---of 12
proc_find_lwp_unlocked65%of 20
proc_find_lwpid100%of 1
proc_find_raw100%of 8
proc_finispecific---of 1
proc_free_lwpid25%of 12
proc_free_mem100%of 3
proc_free_pid67%of 3
proc_free_pid_internal63%of 8
proc_getauxv---of 8
proc_getspecific---of 1
proc_initspecific---of 2
proc_leavepgrp67%of 12
proc_listener_cb70%of 10
proc_sesshold---of 3
proc_sessrele40%of 10
proc_setspecific---of 1
proc_specific_key_create---of 1
proc_specific_key_delete---of 1
proc_uidmatch---of 7
proc_vmspace_getref80%of 5
procinit---of 8
procinit_sysctl---of 1
proclist_foreach_call74%of 23
sysctl_doeproc---of 92
sysctl_kern_proc_args---of 54
sysctl_security_expose_address---of 6
-----------
SUMMARY69%of 384

-----------
SUMMARY---of 0

cpu_attach---of 60
cpu_boot_secondary_processors---of 14
cpu_broadcast_halt---of 1
cpu_childdetached---of 8
cpu_debug_dump---of 5
cpu_defer---of 1
cpu_get_tsc_freq---of 18
cpu_hatch---of 11
cpu_init---of 27
cpu_init_first---of 3
cpu_init_idle_lwps---of 6
cpu_init_msrs---of 4
cpu_kick100%of 1
cpu_load_pmap100%of 1
cpu_match---of 1
cpu_rescan---of 11
cpu_resume---of 7
cpu_shutdown---of 4
cpu_stop---of 7
cpu_suspend---of 3
mp_cpu_start---of 14
mp_cpu_start_cleanup---of 1
x86_cpu_idle_halt---of 5
x86_cpu_idle_mwait---of 4
-----------
SUMMARY100%of 2

nd_hint---of 7
sorwakeup---of 6
sowwakeup---of 6
syn_cache_add---of 57
syn_cache_cleanup20%of 10
syn_cache_get---of 76
syn_cache_init---of 2
syn_cache_insert---of 38
syn_cache_lookup---of 11
syn_cache_reset---of 10
syn_cache_respond---of 53
syn_cache_rm---of 18
syn_cache_timer---of 15
syn_cache_unreach---of 13
tcp6_input---of 7
tcp_dooptions.constprop.0---of 34
tcp_fields_to_net---of 1
tcp_input---of 545
tcp_input_checksum---of 21
tcp_pulloutofband---of 6
tcp_reass---of 179
tcp_setup_ack---of 5
tcp_xmit_timer---of 18
tcpipqent_alloc---of 1
tcpipqent_free---of 1
tcpipqent_init---of 1
-----------
SUMMARY20%of 10

ufs_deleteextattr---of 6
ufs_extattr_autostart---of 25
ufs_extattr_enable.constprop.0---of 29
ufs_extattr_enable_with_open---of 5
ufs_extattr_get_header.isra.0---of 14
ufs_extattr_lookup---of 18
ufs_extattr_rm.constprop.0---of 16
ufs_extattr_start---of 9
ufs_extattr_stop---of 21
ufs_extattr_subdir.isra.0---of 36
ufs_extattr_uepm_destroy---of 5
ufs_extattr_uepm_init---of 1
ufs_extattr_uepm_unlock---of 5
ufs_extattr_vnode_inactive15%of 7
ufs_extattrctl---of 19
ufs_getextattr---of 28
ufs_listextattr---of 27
ufs_setextattr---of 70
-----------
SUMMARY15%of 7

drm_close---of 5
drm_fop_mmap---of 6
drm_ioctl_shim---of 5
drm_kqfilter---of 3
drm_legacy_mmap---of 3
drm_open29%of 28
drm_poll---of 5
drm_read---of 44
drm_stat---of 1
filt_drm_detach---of 1
filt_drm_event---of 11
-----------
SUMMARY29%of 28

-----------
SUMMARY---of 0

process_machdep_doxstate89%of 9
process_machdep_validfpu---of 1
process_read_dbregs100%of 1
process_read_fpregs---of 1
process_read_regs75%of 4
process_set_pc100%of 4
process_sstep75%of 4
process_write_dbregs100%of 3
process_write_fpregs---of 1
process_write_regs---of 10
ptrace_machdep_dorequest69%of 19
-----------
SUMMARY80%of 44

cpu_frequency---of 1
cpu_hascounter100%of 1
rdtsc_cpuid---of 1
rdtsc_lfence---of 1
rdtsc_mfence---of 1
tsc_apply_cpu---of 3
tsc_delay---of 3
tsc_get_timecount38%of 8
tsc_is_invariant---of 15
tsc_post_ap---of 4
tsc_read_bp---of 9
tsc_setfunc---of 8
tsc_sync_ap---of 2
tsc_sync_bp---of 6
tsc_sync_drift---of 2
tsc_tc_init---of 11
tsc_tc_reset---of 2
tsc_user_disable---of 1
tsc_user_enable---of 1
-----------
SUMMARY45%of 9

-----------
SUMMARY---of 0

mos_attach---of 29
mos_match100%of 2
mos_reg_read_1---of 5
mos_reg_write_1---of 4
mos_reg_write_2.constprop.0.isra.0---of 4
mos_reset.part.0---of 1
mos_uno_init---of 6
mos_uno_mcast---of 16
mos_uno_mii_read_reg---of 12
mos_uno_mii_statchg---of 14
mos_uno_mii_write_reg---of 6
mos_uno_rx_loop---of 6
mos_uno_stop---of 2
mos_uno_tx_prepare---of 3
-----------
SUMMARY100%of 2

rf_AllocCallbackFuncDesc---of 1
rf_AllocCallbackValueDesc---of 1
rf_ConfigureCallback100%of 1
rf_FreeCallbackFuncDesc---of 1
rf_FreeCallbackValueDesc---of 1
rf_ShutdownCallback100%of 1
-----------
SUMMARY100%of 2

pax_aslr_exec_offset84%of 6
pax_aslr_init_vm---of 8
pax_aslr_mmap55%of 22
pax_aslr_offset74%of 19
pax_aslr_rtld_offset---of 6
pax_aslr_stack78%of 9
pax_aslr_stack_gap75%of 8
pax_init---of 1
pax_mprotect_maxprotect100%of 3
pax_mprotect_prot75%of 4
pax_mprotect_validate80%of 5
pax_segvguard12%of 36
pax_segvguard_cleanup10%of 11
pax_set_flags---of 3
pax_setup_elf_flags74%of 15
sysctl_security_pax_setup---of 1
-----------
SUMMARY51%of 138

process_domem100%of 5
-----------
SUMMARY100%of 5

rf_ConfigureNWayXor100%of 2
rf_ShutdownNWayXor---of 4
rf_nWayXor1---of 4
rf_nWayXor2---of 6
rf_nWayXor3---of 6
rf_nWayXor4---of 6
rf_nWayXor5---of 6
rf_nWayXor6---of 6
rf_nWayXor7---of 6
rf_nWayXor8---of 6
rf_nWayXor9---of 6
-----------
SUMMARY100%of 2

ptyfs_allocvp100%of 1
ptyfs_clr_active---of 5
ptyfs_get_node62%of 13
ptyfs_hashdone---of 1
ptyfs_hashinit---of 1
ptyfs_next_active45%of 9
ptyfs_set_active---of 9
-----------
SUMMARY57%of 23

utoppy_activate---of 4
utoppy_add_path---of 15
utoppy_attach---of 28
utoppy_cancel---of 9
utoppy_detach---of 15
utoppy_match67%of 3
utoppy_readdir_next---of 10
utoppy_readfile_next---of 12
utoppy_recv_packet---of 17
utoppy_send_packet---of 16
utoppy_turbo_mode---of 6
utoppyclose---of 13
utoppyioctl---of 57
utoppyopen---of 16
utoppyread---of 37
utoppywrite---of 22
-----------
SUMMARY67%of 3

compat_30_vndioctl100%of 4
vnd_30_fini---of 3
vnd_30_init---of 1
-----------
SUMMARY100%of 4

rf_ResetDebugOptions100%of 1
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

_prop_string_equals---of 6
_prop_string_externalize84%of 6
_prop_string_free84%of 12
_prop_string_init---of 1
_prop_string_instantiate88%of 8
_prop_string_internalize---of 14
_prop_string_rb_compare_key---of 3
_prop_string_rb_compare_nodes60%of 5
prop_string_append---of 11
prop_string_append_cstring---of 10
prop_string_compare---of 8
prop_string_compare_string---of 5
prop_string_copy---of 9
prop_string_copy_mutable---of 7
prop_string_copy_value---of 7
prop_string_create---of 3
prop_string_create_copy100%of 1
prop_string_create_cstring---of 7
prop_string_create_cstring_nocopy---of 3
prop_string_create_format72%of 7
prop_string_create_nocopy67%of 3
prop_string_cstring---of 6
prop_string_cstring_nocopy---of 6
prop_string_equals---of 6
prop_string_equals_cstring---of 5
prop_string_equals_string---of 5
prop_string_mutable---of 5
prop_string_size---of 5
prop_string_value---of 5
-----------
SUMMARY79%of 42

amap_add65%of 20
amap_adjref_anons.constprop.086%of 7
amap_alloc72%of 7
amap_alloc153%of 17
amap_copy83%of 40
amap_cow_now54%of 30
amap_ctor80%of 10
amap_dtor60%of 10
amap_extend75%of 98
amap_free75%of 8
amap_lookup59%of 12
amap_lookups62%of 13
amap_pp_adjref91%of 52
amap_pp_establish75%of 12
amap_ref100%of 3
amap_share_protect---of 23
amap_splitref64%of 11
amap_swap_off---of 66
amap_unadd62%of 13
amap_unref86%of 7
amap_wipeout73%of 18
amap_wiperange86%of 21
uvm_amap_init---of 1
-----------
SUMMARY74%of 409

compat_50_sys___msgctl13100%of 8
-----------
SUMMARY100%of 8

in6_addmulti50%of 22
in6_delmulti---of 1
in6_delmulti_locked67%of 6
in6_joingroup60%of 5
in6_leavegroup100%of 3
in6_lookup_and_delete_multi---of 3
in6_lookup_multi86%of 7
in6_mkludge_sysctl---of 4
in6_multi_group---of 1
in6_multi_lock---of 1
in6_multi_locked60%of 5
in6_multi_unlock---of 1
in6_multicast_sysctl---of 47
in6_purge_multi20%of 10
in6_sysctl_multicast_setup---of 1
in6m_destroy64%of 22
in6m_unref---of 4
mld_init---of 1
mld_input---of 52
mld_sendpkt---of 38
mld_start_listening67%of 9
mld_starttimer---of 7
mld_stoptimer50%of 4
mld_timeo---of 7
-----------
SUMMARY59%of 93

cue_attach---of 19
cue_csr_read_2---of 4
cue_csr_write_1.isra.0---of 3
cue_match100%of 2
cue_reset---of 6
cue_uno_init---of 4
cue_uno_mcast---of 23
cue_uno_rx_loop---of 5
cue_uno_stop---of 1
cue_uno_tick---of 3
cue_uno_tx_prepare---of 3
if_cue_modcmd---of 2
-----------
SUMMARY100%of 2

coredump10%of 51
coredump_modcmd---of 20
coredump_offset---of 1
coredump_write---of 8
-----------
SUMMARY10%of 51

ptmattach---of 5
ptmclose---of 1
ptmioctl---of 21
ptmopen20%of 10
pty_alloc_master---of 17
pty_fill_ptmget100%of 8
pty_getmp100%of 3
pty_grant_slave88%of 8
pty_makedev75%of 4
pty_sethandler---of 1
pty_vn_open.part.0---of 6
-----------
SUMMARY70%of 33

rf_ConfigureDebugMem100%of 1
-----------
SUMMARY100%of 1

-----------
SUMMARY---of 0

virtio_pci_adjust_config_region---of 4
virtio_pci_alloc_interrupts---of 23
virtio_pci_attach---of 52
virtio_pci_detach---of 12
virtio_pci_establish_msix_interrupts.isra.0---of 32
virtio_pci_find_cap---of 14
virtio_pci_free_interrupts---of 9
virtio_pci_intr---of 9
virtio_pci_kick_09100%of 1
virtio_pci_kick_10---of 1
virtio_pci_match---of 6
virtio_pci_modcmd---of 1
virtio_pci_msix_config_intr---of 4
virtio_pci_msix_queue_intr---of 5
virtio_pci_negotiate_features_09---of 1
virtio_pci_negotiate_features_10---of 5
virtio_pci_read_queue_size_09---of 1
virtio_pci_read_queue_size_10---of 1
virtio_pci_rescan---of 3
virtio_pci_set_status_09---of 3
virtio_pci_set_status_10---of 3
virtio_pci_setup_interrupts_09---of 14
virtio_pci_setup_interrupts_10---of 14
virtio_pci_setup_queue_09---of 4
virtio_pci_setup_queue_10---of 9
-----------
SUMMARY100%of 1

pfil_add_hook71%of 17
pfil_add_ihook---of 12
pfil_head_create82%of 11
pfil_head_destroy63%of 8
pfil_head_get100%of 5
pfil_init---of 1
pfil_list_add48%of 19
pfil_list_remove.isra.0---of 11
pfil_remove_hook---of 7
pfil_remove_ihook---of 10
pfil_run_addrhooks---of 1
pfil_run_arg67%of 9
pfil_run_hooks45%of 27
pfil_run_ifhooks100%of 1
-----------
SUMMARY61%of 97

copyin_pid---of 5
copyin_proc---of 3
copyin_vmspace86%of 7
copyout_proc---of 3
copyout_vmspace86%of 7
ioctl_copyin---of 4
ioctl_copyout---of 4
ucas_32---of 1
ucas_ptr---of 1
ufetch_16---of 1
ufetch_32---of 1
ufetch_64---of 1
ufetch_8---of 1
uio_setup_sysspace100%of 1
uiomove85%of 20
uiomove_frombuf---of 4
ureadc---of 12
ustore_16---of 1
ustore_32---of 1
ustore_char100%of 1
ustore_ptr---of 1
-----------
SUMMARY87%of 36

exec_aout_makecmds60%of 10
exec_aout_modcmd---of 6
exec_aout_prep_nmagic---of 3
exec_aout_prep_omagic---of 3
exec_aout_prep_zmagic---of 5
-----------
SUMMARY60%of 10

-----------
SUMMARY---of 0

iostat_alloc---of 3
iostat_busy75%of 16
iostat_find---of 6
iostat_free---of 13
iostat_init---of 1
iostat_isbusy---of 1
iostat_rename---of 1
iostat_seek---of 1
iostat_unbusy46%of 11
iostat_wait75%of 8
iostati_getnames---of 17
sysctl_hw_disknames---of 1
sysctl_hw_iostatnames---of 1
sysctl_hw_iostats---of 14
timermac90%of 10
-----------
SUMMARY72%of 45

ah4_ctlinput19%of 11
ah6_ctlinput15%of 14
esp4_ctlinput19%of 11
esp6_ctlinput15%of 14
sysctl_ipsec---of 8
sysctl_net_inet6_ipsec6_setup---of 1
sysctl_net_inet_ah_stats---of 1
sysctl_net_inet_esp_stats---of 1
sysctl_net_inet_ipcomp_stats---of 1
sysctl_net_inet_ipip_stats---of 1
sysctl_net_inet_ipsec_setup---of 3
sysctl_net_inet_ipsec_stats---of 1
sysctl_net_ipsec_enabled---of 7
-----------
SUMMARY16%of 50

export---of 67
free_netcred---of 6
mountd_set_exports_list---of 50
netexport_check---of 16
netexport_clear---of 18
netexport_fini---of 6
netexport_hasexports---of 4
netexport_init---of 1
netexport_rdlock---of 1
netexport_rdunlock---of 1
netexport_unmount18%of 17
nfs_export_update_30---of 6
sacheck---of 8
-----------
SUMMARY18%of 17

rf_CauseReconEvent---of 1
rf_ConfigureReconEvent100%of 1
rf_DrainReconEventQueue---of 3
rf_FreeReconEventDesc---of 1
rf_GetNextReconEvent---of 17
rf_ShutdownReconEvent100%of 1
-----------
SUMMARY100%of 2

compat_uvm_swap_stats50100%of 1
swapent50_cvt100%of 1
uvm_50_fini---of 1
uvm_50_init---of 1
-----------
SUMMARY100%of 2

enter14---of 13
midictl_change32%of 51
midictl_close100%of 1
midictl_nrpn_read---of 5
midictl_open63%of 8
midictl_read24%of 21
midictl_rpn_read60%of 5
read1470%of 10
store_extract.constprop.058%of 7
store_idx100%of 7
store_locate75%of 8
store_thread---of 15
store_update.constprop.070%of 10
-----------
SUMMARY48%of 128

cpu_intr_count67%of 3
cpu_intr_init---of 6
cpu_intr_redistribute---of 40
interrupt_construct_intrids---of 17
interrupt_destruct_intrids---of 2
interrupt_distribute---of 1
interrupt_distribute_handler---of 6
interrupt_get_assigned---of 3
interrupt_get_available---of 5
interrupt_get_count---of 3
interrupt_get_devname---of 5
intr_activate_xcall---of 7
intr_allocate_io_intrsource---of 7
intr_allocate_slot---of 35
intr_allocate_slot_cpu---of 17
intr_biglock_wrapper---of 3
intr_create_intrid---of 21
intr_deactivate_xcall---of 9
intr_default_setup---of 2
intr_disestablish---of 11
intr_disestablish_xcall---of 19
intr_distribute_locked---of 60
intr_establish---of 1
intr_establish_xcall---of 14
intr_establish_xname---of 56
intr_free_io_intrsource---of 6
intr_free_io_intrsource_direct---of 11
intr_get_io_intrsource---of 6
intr_hwunmask_xcall---of 6
intr_is_affinity_intrsource---of 7
intr_mask---of 6
intr_mask_internal---of 5
intr_mask_xcall---of 17
intr_printconfig---of 11
intr_redistribute_xc_s1---of 4
intr_redistribute_xc_s2---of 1
intr_redistribute_xc_t---of 1
intr_string---of 8
intr_unmask---of 3
x86_intr_get_assigned---of 7
x86_intr_get_count---of 13
x86_intr_get_devname---of 8
x86_nmi---of 1
-----------
SUMMARY67%of 3

kern_uipc_socket_50_fini---of 7
kern_uipc_socket_50_init---of 1
uipc_socket_50_getopt1100%of 7
uipc_socket_50_sbts---of 4
uipc_socket_50_setopt185%of 26
-----------
SUMMARY88%of 33

rf_AllocRaidAccDesc---of 4
rf_BootRaidframe---of 7
rf_Configure32%of 91
rf_DoAccess---of 6
rf_FailDisk---of 5
rf_FreeEmergBuffers50%of 4
rf_FreeRaidAccDesc---of 9
rf_ResumeNewRequests---of 4
rf_Shutdown---of 16
rf_ShutdownRDFreeList100%of 1
rf_SignalQuiescenceLock---of 2
rf_SuspendNewRequestsAndWait---of 4
rf_UnconfigureArray---of 3
rf_destroy_mutex_cond---of 1
rf_print_panic_message---of 1
rf_print_unable_to_add_shutdown---of 1
rf_print_unable_to_init_mutex---of 1
-----------
SUMMARY34%of 96

uvm_vnp_setsize67%of 15
uvm_vnp_setwritesize55%of 11
uvn_detach100%of 1
uvn_findpage75%of 43
uvn_findpages100%of 19
uvn_get80%of 20
uvn_markdirty80%of 5
uvn_put67%of 3
uvn_reference100%of 1
uvn_text_p67%of 3
-----------
SUMMARY77%of 121

in_losing---of 13
in_pcballoc77%of 21
in_pcbbind100%of 11
in_pcbbind_port59%of 53
in_pcbbindableaddr100%of 3
in_pcbbindableaddr.part.063%of 16
in_pcbconnect67%of 66
in_pcbdetach68%of 25
in_pcbdisconnect80%of 5
in_pcbinit---of 2
in_pcblookup_bind---of 27
in_pcblookup_connect25%of 29
in_pcblookup_port38%of 43
in_pcbnotify---of 11
in_pcbnotifyall---of 8
in_pcbpurgeif75%of 8
in_pcbpurgeif047%of 13
in_pcbrtentry100%of 3
in_pcbrtentry_unref100%of 1
in_pcbstate100%of 2
in_pcbstate.part.068%of 25
in_purgeifmcast16%of 13
in_rtchange---of 2
in_setpeeraddr100%of 2
in_setsockaddr100%of 2
inpcb_poolinit---of 1
-----------
SUMMARY59%of 341

ip6_get_prevhdr---of 10
ip6_getdstifaddr67%of 3
ip6_hopopts_input59%of 39
ip6_init---of 16
ip6_lasthdr86%of 7
ip6_nexthdr79%of 23
ip6_notify_pmtu---of 14
ip6_pullexthdr---of 19
ip6_savecontrol---of 46
ip6_statinc---of 3
ip6_unknown_opt100%of 8
ip6intr---of 185
sysctl_net_inet6_ip6_stats---of 1
-----------
SUMMARY72%of 80

procfs_listener_cb60%of 5
procfs_loadvnode30%of 34
procfs_modcmd---of 9
procfs_mount67%of 12
procfs_root75%of 4
procfs_start100%of 1
procfs_statvfs100%of 1
procfs_sync100%of 1
procfs_sysctl_setup---of 1
procfs_unmount100%of 4
procfs_vget---of 1
-----------
SUMMARY50%of 62

uaudio_activate---of 4
uaudio_attach---of 228
uaudio_chan_abort---of 6
uaudio_chan_alloc_buffers.constprop.0---of 6
uaudio_chan_open---of 8
uaudio_chan_pintr---of 10
uaudio_chan_ptransfer---of 16
uaudio_chan_rintr---of 16
uaudio_chan_rtransfer.part.0---of 7
uaudio_childdet---of 3
uaudio_ctl_get.constprop.0---of 12
uaudio_ctl_set.constprop.0---of 11
uaudio_detach---of 22
uaudio_determine_class---of 17
uaudio_get.constprop.0---of 5
uaudio_get_cluster---of 12
uaudio_get_locks---of 1
uaudio_get_props---of 6
uaudio_getdev---of 3
uaudio_halt_in_dma---of 9
uaudio_halt_out_dma---of 9
uaudio_io_terminaltype---of 44
uaudio_match50%of 4
uaudio_merge_terminal_list---of 14
uaudio_mixer_add_ctl---of 23
uaudio_mixer_get_port---of 28
uaudio_mixer_set_port---of 18
uaudio_open---of 6
uaudio_query_devinfo---of 15
uaudio_query_format---of 1
uaudio_round_blocksize---of 8
uaudio_set_format---of 23
uaudio_trigger_input---of 15
uaudio_trigger_output---of 13
-----------
SUMMARY50%of 4

sysctl_kern_veriexec_algorithms---of 9
sysctl_kern_veriexec_setup---of 1
sysctl_kern_veriexec_strict---of 5
veriexec_convert---of 5
veriexec_dump---of 4
veriexec_file_add---of 44
veriexec_file_convert---of 3
veriexec_file_delete---of 7
veriexec_file_dump---of 2
veriexec_file_free---of 6
veriexec_file_purge_cb---of 2
veriexec_file_report---of 10
veriexec_file_verify---of 47
veriexec_flush---of 5
veriexec_fp_status---of 21
veriexec_fpops_add---of 27
veriexec_init---of 9
veriexec_listener_cb50%of 6
veriexec_lookup---of 1
veriexec_mountspecific_dtor---of 2
veriexec_openchk15%of 20
veriexec_purge---of 3
veriexec_raw_cb72%of 14
veriexec_removechk43%of 7
veriexec_renamechk11%of 28
veriexec_table_delete---of 5
veriexec_unmountchk---of 15
veriexec_verify38%of 8
-----------
SUMMARY31%of 83

compat_50_sys___sigtimedwait---of 3
compat_50_sys__lwp_park100%of 7
compat_50_sys_wait4100%of 8
kern_50_fini---of 1
kern_50_init---of 1
tscopyin---of 5
tscopyout---of 3
-----------
SUMMARY100%of 15

_bpf_change_type50%of 10
_bpf_deregister_track_event---of 17
_bpf_mtap---of 11
_bpf_mtap2---of 10
_bpf_mtap_af---of 7
_bpf_mtap_sl_in---of 14
_bpf_mtap_sl_out---of 1
_bpf_mtap_softint---of 14
_bpf_mtap_softint_init---of 10
_bpf_register_track_event---of 10
_bpfattach70%of 10
_bpfdetach52%of 37
bpf_attachd---of 17
bpf_close55%of 35
bpf_deliver---of 46
bpf_detachd---of 22
bpf_free_filter45%of 9
bpf_ioctl56%of 152
bpf_jit_freecode---of 3
bpf_jit_generate---of 3
bpf_kqfilter---of 3
bpf_mcpy---of 6
bpf_modcmd---of 4
bpf_mtap_si---of 5
bpf_poll64%of 11
bpf_read10%of 20
bpf_stat100%of 1
bpf_stats---of 1
bpf_sysctl_gstats_handler---of 3
bpf_timed_out---of 6
bpf_write7%of 65
bpfopen72%of 14
filt_bpfrdetach---of 1
filt_bpfread---of 3
reset_d60%of 5
sysctl_net_bpf_maxbufsize---of 5
sysctl_net_bpf_peers---of 28
sysctl_net_bpf_setup---of 3
-----------
SUMMARY45%of 369

npf_conn_check---of 10
npf_conn_destroy---of 9
npf_conn_establish---of 40
npf_conn_expire---of 1
npf_conn_expired---of 7
npf_conn_export---of 10
npf_conn_find---of 7
npf_conn_fini---of 5
npf_conn_getnat---of 1
npf_conn_import---of 22
npf_conn_init---of 3
npf_conn_inspect---of 20
npf_conn_load---of 20
npf_conn_lookup---of 12
npf_conn_pass---of 11
npf_conn_print---of 5
npf_conn_release---of 9
npf_conn_remove---of 8
npf_conn_setnat---of 24
npf_conn_setpass---of 13
npf_conn_tracking---of 5
npf_conn_worker---of 3
npf_conndb_export34%of 12
-----------
SUMMARY34%of 12

compat_raid_80_modcmd---of 8
raidframe_ioctl_806%of 37
-----------
SUMMARY6%of 37

axe_attach---of 161
axe_ax88772_phywake---of 20
axe_cmd---of 4
axe_match100%of 2
axe_uno_init---of 52
axe_uno_mcast---of 24
axe_uno_mii_read_reg---of 15
axe_uno_mii_statchg---of 22
axe_uno_mii_write_reg---of 10
axe_uno_rx_loop---of 27
axe_uno_stop---of 2
axe_uno_tx_prepare---of 11
if_axe_modcmd---of 2
-----------
SUMMARY100%of 2

bdev_cancel63%of 8
bdev_close86%of 7
bdev_detached---of 8
bdev_discard---of 7
bdev_dump---of 5
bdev_flags---of 5
bdev_ioctl100%of 7
bdev_open81%of 21
bdev_size73%of 11
bdev_strategy80%of 10
bdev_type100%of 5
bdevsw_getname100%of 8
bdevsw_lookup100%of 4
bdevsw_lookup_major---of 6
cdev_cancel63%of 8
cdev_close100%of 7
cdev_detached100%of 8
cdev_discard---of 7
cdev_flags---of 5
cdev_ioctl100%of 7
cdev_kqfilter---of 7
cdev_mmap100%of 7
cdev_open81%of 21
cdev_poll100%of 7
cdev_read100%of 7
cdev_stop86%of 7
cdev_tty---of 6
cdev_type100%of 5
cdev_write100%of 7
cdevsw_getname100%of 8
cdevsw_lookup100%of 4
cdevsw_lookup_major84%of 6
dev_minor_unit---of 1
devsw_attach---of 93
devsw_blk2chr93%of 13
devsw_blk2name---of 14
devsw_chr2blk93%of 13
devsw_detach---of 1
devsw_detach_locked---of 38
devsw_init---of 5
devsw_name2blk87%of 15
devsw_name2chr80%of 15
nommap---of 1
-----------
SUMMARY89%of 246

workqueue_create69%of 16
workqueue_destroy79%of 19
workqueue_enqueue37%of 11
workqueue_exit---of 5
workqueue_initqueue55%of 11
workqueue_q_wait37%of 11
workqueue_wait23%of 9
workqueue_worker---of 17
-----------
SUMMARY55%of 77

sb_max_set---of 4
sbappend89%of 9
sbappendaddr85%of 32
sbappendaddrchain---of 30
sbappendcontrol79%of 28
sbappendrecord86%of 14
sbappendstream58%of 7
sbcompress84%of 37
sbcreatecontrol---of 3
sbcreatecontrol132%of 16
sbdrop71%of 34
sbdroprecord77%of 13
sbflush58%of 14
sbinsertoob---of 17
sblock79%of 14
sbrelease67%of 3
sbreserve77%of 13
sbunlock60%of 5
sbwait60%of 10
socantrcvmore63%of 8
socantsendmore63%of 8
socket_print---of 35
sofindproc---of 39
soget100%of 4
soinit2---of 1
soisconnected63%of 29
soisconnecting67%of 3
soisdisconnected65%of 14
soisdisconnecting50%of 14
solocked100%of 1
solocked2100%of 3
solockreset67%of 3
solockretry---of 8
sonewconn46%of 31
soput58%of 7
soqinsque67%of 15
soqremque59%of 29
soreserve80%of 15
soroverflow23%of 9
sosetlock75%of 4
sowait50%of 12
sowakeup60%of 10
-----------
SUMMARY67%of 468

_isa_dma_may_bounce---of 5
device_isa_register31%of 13
isa_attach_hook---of 3
isa_detach_hook---of 1
isa_intr_alloc---of 18
isa_intr_disestablish---of 4
isa_intr_establish---of 1
isa_intr_establish_xname---of 10
isa_intr_evcnt---of 1
isa_mem_alloc---of 1
isa_mem_free---of 1
-----------
SUMMARY31%of 13

domain_attach---of 11
domain_init_timers---of 1
domaininit---of 11
domaininit_post---of 1
pfctlinput100%of 6
pfctlinput2---of 8
pffasttimo---of 7
pffinddomain100%of 6
pffindproto100%of 15
pffindtype100%of 10
pfslowtimo---of 7
sockaddr_addr---of 8
sockaddr_alloc---of 3
sockaddr_any---of 7
sockaddr_any_by_family---of 7
sockaddr_anyaddr---of 14
sockaddr_checklen84%of 6
sockaddr_cmp50%of 16
sockaddr_const_addr---of 8
sockaddr_copy67%of 3
sockaddr_dup80%of 5
sockaddr_externalize---of 11
sockaddr_format---of 21
sockaddr_free100%of 1
sockaddr_getsize_by_family67%of 3
sysctl_unpcblist---of 59
-----------
SUMMARY84%of 71

iskmemdev---of 4
iskmemvp100%of 5
rawio_listener_cb100%of 2
spec_advlock100%of 1
spec_bmap---of 7
spec_close63%of 53
spec_fdiscard---of 9
spec_fsync100%of 5
spec_inactive67%of 3
spec_init---of 1
spec_io_drain30%of 10
spec_io_enter89%of 9
spec_io_exit.isra.077%of 13
spec_ioctl78%of 9
spec_kqfilter---of 5
spec_lookup---of 1
spec_mmap80%of 5
spec_node_destroy59%of 17
spec_node_getmountedfs67%of 3
spec_node_init77%of 13
spec_node_lookup_by_dev75%of 20
spec_node_lookup_by_mount---of 16
spec_node_revoke41%of 27
spec_node_setmountedfs47%of 13
spec_open70%of 78
spec_open.cold---of 1
spec_pathconf100%of 11
spec_poll100%of 6
spec_print---of 1
spec_read56%of 29
spec_reclaim50%of 6
spec_strategy72%of 14
spec_write50%of 30
-----------
SUMMARY66%of 382

proc_regio100%of 10
process_dodbregs75%of 4
process_dofpregs---of 4
process_doregs75%of 4
process_read_lwpstatus---of 1
process_validdbregs100%of 1
process_validfpregs---of 1
process_validregs100%of 1
ptrace_read_lwpstatus50%of 6
ptrace_update_lwp45%of 9
-----------
SUMMARY72%of 35

_prop_array_add86%of 7
_prop_array_alloc---of 7
_prop_array_emergency_free---of 3
_prop_array_equals---of 14
_prop_array_equals_finish---of 1
_prop_array_expand80%of 5
_prop_array_externalize---of 20
_prop_array_free58%of 19
_prop_array_internalize---of 5
_prop_array_internalize_body---of 8
_prop_array_internalize_continue---of 10
_prop_array_iterator_next_object---of 4
_prop_array_iterator_next_object_locked---of 9
_prop_array_iterator_reset---of 7
prop_array_add100%of 4
prop_array_capacity---of 4
prop_array_copy---of 8
prop_array_copy_mutable---of 3
prop_array_count---of 4
prop_array_create100%of 3
prop_array_create_with_capacity---of 1
prop_array_ensure_capacity---of 6
prop_array_equals---of 6
prop_array_externalize---of 7
prop_array_get---of 7
prop_array_internalize---of 1
prop_array_iterator---of 7
prop_array_make_immutable---of 3
prop_array_mutable---of 1
prop_array_remove---of 11
prop_array_set59%of 12
-----------
SUMMARY70%of 50

crypto_checkdriver88%of 8
crypto_checkdriver_uninit---of 6
crypto_crp_q_is_busy_pc---of 3
crypto_crp_qs_init_pc---of 1
crypto_destroy.constprop.0---of 9
crypto_dispatch---of 37
crypto_done---of 14
crypto_driver_clear.part.0---of 3
crypto_freereq---of 6
crypto_freesession---of 2
crypto_freesession.part.0---of 16
crypto_get_driverid---of 17
crypto_getfeat72%of 14
crypto_getreq---of 17
crypto_init---of 5
crypto_init0---of 8
crypto_init_finalize---of 4
crypto_invoke---of 20
crypto_kdispatch---of 26
crypto_kdone---of 14
crypto_kfreereq---of 4
crypto_kgetreq---of 14
crypto_kinvoke.constprop.0---of 23
crypto_kregister---of 10
crypto_newsession39%of 31
crypto_register---of 11
crypto_unblock---of 12
crypto_unregister---of 7
crypto_unregister_all---of 10
crypto_unregister_locked---of 16
cryptointr---of 80
cryptoret_softint---of 35
opencrypto_modcmd---of 2
sysctl_opencrypto_kq_drops---of 8
sysctl_opencrypto_kq_len---of 8
sysctl_opencrypto_kq_maxlen---of 12
sysctl_opencrypto_q_drops---of 8
sysctl_opencrypto_q_len---of 8
sysctl_opencrypto_q_maxlen---of 12
-----------
SUMMARY55%of 53

dogetrandom52%of 33
sys_getrandom---of 5
-----------
SUMMARY52%of 33

sysmon_wdog_critpoll25%of 4
sysmon_wdog_find40%of 5
sysmon_wdog_fini---of 4
sysmon_wdog_fini.part.0---of 4
sysmon_wdog_init---of 7
sysmon_wdog_ktickle---of 5
sysmon_wdog_modcmd---of 8
sysmon_wdog_ref---of 1
sysmon_wdog_register---of 14
sysmon_wdog_release---of 3
sysmon_wdog_setmode---of 11
sysmon_wdog_shutdown---of 5
sysmon_wdog_unregister---of 13
sysmonclose_wdog---of 5
sysmonioctl_wdog49%of 27
sysmonopen_wdog100%of 1
wdog_preinit---of 1
-----------
SUMMARY46%of 37

runq_init---of 1
sched_bestcpu85%of 26
sched_catchlwp---of 34
sched_cpuattach---of 7
sched_curcpu_runnable_p---of 1
sched_dequeue74%of 41
sched_enqueue53%of 40
sched_idle---of 57
sched_lwp_stats---of 9
sched_nextlwp70%of 10
sched_preempted72%of 21
sched_print_runqueue---of 20
sched_resched_cpu91%of 22
sched_resched_lwp78%of 9
sched_takecpu74%of 50
sched_vforkexec---of 6
sysctl_sched_setup---of 4
-----------
SUMMARY73%of 219

do_filereadv95%of 38
do_filewritev96%of 42
dofileread92%of 12
dofilewrite88%of 16
sys_ioctl80%of 55
sys_read100%of 5
sys_readv100%of 1
sys_write100%of 5
sys_writev100%of 1
-----------
SUMMARY90%of 175

send_bulkmsg.constprop.0---of 19
u3g_bulk_ata_eject.isra.0---of 1
u3g_bulk_scsi_eject---of 1
umodeswitch_attach---of 3
umodeswitch_detach---of 1
umodeswitch_match7%of 30
-----------
SUMMARY7%of 30

rfcomm_abort_wrapper---of 5
rfcomm_accept_wrapper---of 8
rfcomm_attach_wrapper70%of 20
rfcomm_bind_wrapper---of 10
rfcomm_complete---of 6
rfcomm_connect2_wrapper75%of 4
rfcomm_connect_wrapper---of 10
rfcomm_connected---of 3
rfcomm_connecting---of 2
rfcomm_ctloutput100%of 6
rfcomm_detach50%of 4
rfcomm_detach_wrapper100%of 1
rfcomm_disconnect_wrapper---of 5
rfcomm_disconnected---of 3
rfcomm_input---of 15
rfcomm_ioctl_wrapper---of 1
rfcomm_linkmode---of 8
rfcomm_listen_wrapper80%of 5
rfcomm_newconn---of 3
rfcomm_peeraddr_wrapper---of 9
rfcomm_purgeif_wrapper---of 1
rfcomm_rcvd_wrapper---of 10
rfcomm_recvoob_wrapper67%of 3
rfcomm_send_wrapper---of 11
rfcomm_sendoob_wrapper---of 3
rfcomm_shutdown_wrapper67%of 3
rfcomm_sockaddr_wrapper45%of 9
rfcomm_stat_wrapper---of 3
-----------
SUMMARY70%of 55

kobj_load_vfs100%of 1
-----------
SUMMARY100%of 1

if_stats_fini---of 2
if_stats_init100%of 1
if_stats_to_if_data100%of 1
if_stats_to_if_data_cb---of 5
-----------
SUMMARY100%of 2

cir_attach---of 5
cir_detach---of 1
cir_match---of 1
cir_modcmd---of 2
circlose---of 5
cirioctl---of 7
ciropen29%of 7
cirpoll---of 7
cirread---of 4
cirwrite---of 4
-----------
SUMMARY29%of 7

cpu_count70%of 10
cpu_count_sync93%of 13
cpu_getmodel---of 1
cpu_setmodel---of 1
cpu_softintr_p100%of 1
cpu_topology_fake---of 8
cpu_topology_init---of 70
cpu_topology_set---of 2
cpu_topology_setspeed---of 1
mi_cpu_init---of 6
-----------
SUMMARY84%of 24

union_modcmd---of 6
union_mount28%of 36
union_renamelock_enter---of 1
union_renamelock_exit---of 1
union_root---of 7
union_start---of 1
union_statvfs---of 7
union_sync---of 1
union_unmount---of 11
union_unmount_selector---of 3
union_vget---of 1
unionfs_sysctl_setup---of 1
-----------
SUMMARY28%of 36

sysctl_basenode_init---of 1
sysctl_hw_machine_arch---of 1
sysctl_hwbase_setup---of 3
sysctl_kernbase_setup---of 1
sysctl_setlen84%of 6
-----------
SUMMARY84%of 6

elf32_check_header---of 6
elf32_copyargs---of 3
elf32_free_emul_arg---of 3
elf32_load_psection---of 21
elf32_populate_auxv---of 18
exec_elf32_makecmds3%of 135
exec_elf32_modcmd---of 2
netbsd_elf32_note---of 34
netbsd_elf32_probe---of 3
netbsd_elf32_signature---of 15
-----------
SUMMARY3%of 135

rt_addaddr---of 2
rt_assert_inactive---of 2
rt_deladdr---of 4
rt_gettable75%of 4
rt_inithead---of 3
rt_lookup---of 3
rt_matchaddr100%of 3
rt_refines---of 1
rt_walktree_visitor100%of 1
rtbl_init---of 4
rtbl_search_matched_entry100%of 3
rtbl_walktree---of 3
-----------
SUMMARY91%of 11

auvitek_activate---of 3
auvitek_attach---of 41
auvitek_attach_tuner---of 3
auvitek_childdet---of 1
auvitek_detach---of 8
auvitek_match67%of 3
auvitek_modcmd---of 2
auvitek_read_1---of 3
auvitek_rescan---of 1
auvitek_write_1---of 3
-----------
SUMMARY67%of 3

nv_calloc100%of 3
nv_free100%of 2
nv_strdup100%of 3
nvlist_copyin72%of 7
nvlist_copyout86%of 7
-----------
SUMMARY87%of 22

-----------
SUMMARY---of 0

lun_compar---of 4
scsi_probe_bus---of 47
scsi_probe_device---of 67
scsibus_discover_thread---of 15
scsibus_init---of 1
scsibusattach---of 20
scsibusclose100%of 1
scsibusdetach---of 6
scsibusioctl56%of 9
scsibusmatch---of 4
scsibusopen100%of 5
scsibusprint---of 5
scsibusrescan---of 7
scsidevdetached---of 4
-----------
SUMMARY74%of 15

uvm_io75%of 16
-----------
SUMMARY75%of 16

compat_50_sys___fhstat40---of 3
compat_50_sys___fstat30100%of 3
compat_50_sys___lstat30100%of 3
compat_50_sys___stat30100%of 3
compat_50_sys_futimes100%of 7
compat_50_sys_lfs_segwait---of 1
compat_50_sys_lutimes100%of 5
compat_50_sys_mknod100%of 1
compat_50_sys_utimes100%of 5
cvtstat100%of 1
vfs_syscalls_50_fini---of 1
vfs_syscalls_50_init---of 1
-----------
SUMMARY100%of 28

npf_active_p100%of 1
npf_autounload_p---of 3
npf_dev_close100%of 1
npf_dev_ioctl100%of 17
npf_dev_open100%of 1
npf_dev_poll---of 1
npf_dev_read100%of 1
npf_ebr_create---of 1
npf_ebr_destroy---of 1
npf_ebr_enter---of 4
npf_ebr_exit---of 3
npf_ebr_full_sync---of 1
npf_ebr_incrit_p---of 4
npf_ebr_register---of 2
npf_ebr_unregister---of 2
npf_ifaddrhook---of 4
npf_ifhook100%of 3
npf_ifop_flush---of 6
npf_ifop_getmeta---of 1
npf_ifop_getname100%of 1
npf_ifop_lookup---of 1
npf_ifop_setmeta100%of 1
npf_modcmd---of 10
npf_pfil_register53%of 17
npf_pfil_unregister50%of 8
npfos_packet_handler---of 1
-----------
SUMMARY77%of 51

lwp0_init---of 8
lwp_addref60%of 5
lwp_alive60%of 5
lwp_continue78%of 9
lwp_create59%of 77
lwp_ctl_alloc10%of 40
lwp_ctl_exit---of 9
lwp_ctl_free24%of 26
lwp_ctor100%of 3
lwp_delref100%of 1
lwp_delref263%of 8
lwp_drainrefs50%of 6
lwp_dtor75%of 4
lwp_exit42%of 74
lwp_find72%of 7
lwp_find2---of 21
lwp_find_first56%of 9
lwp_free71%of 54
lwp_locked100%of 4
lwp_migrate---of 17
lwp_need_userret72%of 7
lwp_pctr100%of 1
lwp_setlock60%of 5
lwp_setprivate---of 1
lwp_start---of 20
lwp_startup---of 22
lwp_suspend64%of 19
lwp_thread_cleanup50%of 4
lwp_trylock63%of 8
lwp_unlock_to60%of 5
lwp_unsleep67%of 3
lwp_unstop---of 25
lwp_update_creds100%of 2
lwp_userret48%of 48
lwp_wait85%of 45
lwp_whatis---of 5
lwpinit---of 3
sysctl_kern_maxlwp---of 8
-----------
SUMMARY55%of 479

-----------
SUMMARY---of 0

cpu_ucode_apply25%of 8
cpu_ucode_get_version29%of 7
cpu_ucode_md_open75%of 4
-----------
SUMMARY37%of 19

-----------
SUMMARY---of 0

ulpt_activate---of 4
ulpt_attach---of 28
ulpt_detach---of 9
ulpt_match100%of 3
ulpt_read_cb---of 4
ulpt_status---of 3
ulpt_tick---of 3
ulptclose---of 17
ulptioctl---of 2
ulptopen7%of 30
ulptread---of 16
ulptwrite---of 14
-----------
SUMMARY16%of 33

filt_midirdetach---of 1
filt_midiread---of 9
filt_midiwdetach---of 1
filt_midiwrite---of 14
midi_attach---of 9
midi_attach_mi---of 3
midi_fst.part.030%of 58
midi_getinfo100%of 2
midi_in---of 38
midi_intr_out---of 28
midi_msg_out62%of 18
midi_out---of 4
midi_rcv_asense---of 7
midi_register_hw_if_ext50%of 2
midi_softint---of 4
midi_unit_count100%of 4
midi_writebytes100%of 5
midi_xmt_asense---of 8
midiactivate---of 3
midiattach---of 7
midiclose50%of 4
mididetach---of 10
midiioctl---of 17
midikqfilter---of 7
midiopen70%of 10
midipoll---of 20
midiprobe---of 1
midiread---of 42
midiwrite---of 26
real_writebytes60%of 76
-----------
SUMMARY53%of 179

nd_attach_domain---of 3
nd_nud_hint---of 11
nd_resolve59%of 31
nd_set_timer---of 37
nd_timer---of 70
-----------
SUMMARY59%of 31

twe_add_unit---of 20
twe_aen_enqueue---of 23
twe_aen_get.part.0---of 12
twe_aen_handler---of 7
twe_attach---of 52
twe_ccb_alloc---of 11
twe_ccb_alloc_wait---of 8
twe_ccb_enqueue---of 8
twe_ccb_free---of 4
twe_ccb_map---of 21
twe_ccb_poll---of 6
twe_ccb_submit---of 5
twe_ccb_unmap---of 7
twe_ccb_wait_handler---of 1
twe_describe_code---of 4
twe_intr---of 12
twe_match---of 4
twe_modcmd---of 1
twe_param_get---of 14
twe_param_get_1---of 3
twe_param_get_2---of 3
twe_param_get_4---of 3
twe_poll---of 10
twe_print---of 3
twe_recompute_openings---of 7
twe_register_callbacks---of 1
twe_rescan---of 3
twe_reset---of 21
twe_status_check---of 8
tweclose---of 1
tweioctl---of 47
tweopen50%of 4
-----------
SUMMARY50%of 4

turnstile_block68%of 116
turnstile_changepri100%of 1
turnstile_ctor100%of 1
turnstile_exit100%of 1
turnstile_init---of 2
turnstile_lookup100%of 5
turnstile_print---of 9
turnstile_unsleep---of 1
turnstile_wakeup60%of 86
-----------
SUMMARY66%of 210

compat_43_sys_getpagesize---of 1
compat_43_sys_mmap100%of 13
vm_43_fini---of 1
vm_43_init---of 1
-----------
SUMMARY100%of 13

in6_control67%of 6
in6_control14%of 132
in6_domifattach100%of 1
in6_domifdetach100%of 1
in6_if_down100%of 1
in6_if_link_down50%of 20
in6_if_link_state_change---of 3
in6_if_link_up---of 32
in6_if_up---of 1
in6_ifaddlocal---of 5
in6_ifaddprefix---of 8
in6_ifawithifp45%of 60
in6_ifremlocal---of 27
in6_ifremprefix.part.0.isra.0---of 21
in6_in_2_v4mapin6100%of 1
in6_init---of 1
in6_is_addr_deprecated---of 14
in6_lltable_create---of 24
in6_lltable_delete---of 10
in6_lltable_destroy_lle---of 3
in6_lltable_dump_entry---of 5
in6_lltable_fill_sa_entry---of 1
in6_lltable_free_entry---of 3
in6_lltable_hash---of 1
in6_lltable_lookup---of 12
in6_lltable_match_prefix---of 7
in6_localaddr81%of 21
in6_mask2len---of 17
in6_matchlen100%of 6
in6_prefixlen2mask---of 6
in6_purge_mcast_references75%of 12
in6_purgeaddr---of 32
in6_purgeif100%of 1
in6_sin6_2_sin---of 1
in6_sin6_2_sin_in_sock---of 1
in6_sin_2_v4mapsin6---of 1
in6_sin_2_v4mapsin6_in_sock---of 1
in6_tunnel_validate---of 3
in6_update_ifa---of 1
in6_update_ifa1---of 212
in6ifa_ifpforlinklocal---of 16
in6ifa_ifpforlinklocal_psref---of 3
in6ifa_ifpwithaddr---of 14
in6ifa_ifpwithaddr_psref---of 3
in6ifa_ifwithaddr---of 14
-----------
SUMMARY32%of 262

-----------
SUMMARY---of 0

schednetisr---of 1
softint_block---of 3
softint_disestablish70%of 10
softint_dispatch---of 51
softint_establish60%of 20
softint_init---of 12
softint_init_isr---of 3
softint_schedule80%of 10
softint_schedule_cpu72%of 7
softint_thread---of 1
-----------
SUMMARY69%of 47

vioscsi_attach---of 42
vioscsi_detach---of 11
vioscsi_free_reqs.part.0---of 3
vioscsi_match---of 1
vioscsi_modcmd---of 1
vioscsi_scsipi_request64%of 38
vioscsi_vq_done---of 13
-----------
SUMMARY64%of 38

ubsa_attach---of 26
ubsa_childdet---of 5
ubsa_detach---of 5
ubsa_match100%of 2
-----------
SUMMARY100%of 2

uvm_page_physload---of 11
uvm_page_physunload---of 21
uvm_page_physunload_force---of 13
uvm_physseg_find100%of 7
uvm_physseg_get_avail_end---of 7
uvm_physseg_get_avail_start---of 7
uvm_physseg_get_end---of 7
uvm_physseg_get_first---of 1
uvm_physseg_get_free_list84%of 6
uvm_physseg_get_highest_frame---of 3
uvm_physseg_get_last---of 1
uvm_physseg_get_next---of 7
uvm_physseg_get_pg84%of 6
uvm_physseg_get_prev---of 7
uvm_physseg_get_start---of 7
uvm_physseg_get_start_hint---of 6
uvm_physseg_init_seg---of 13
uvm_physseg_plug---of 28
uvm_physseg_seg_alloc_from_slab---of 16
uvm_physseg_seg_chomp_slab---of 5
uvm_physseg_set_start_hint---of 6
uvm_physseg_unplug---of 40
uvm_physseg_valid_p---of 7
-----------
SUMMARY90%of 19

bounds_check_with_label---of 15
bounds_check_with_mediasize38%of 8
convertdisklabel---of 18
disk_attach---of 1
disk_begindetach---of 5
disk_busy100%of 1
disk_destroy100%of 1
disk_detach---of 3
disk_find---of 5
disk_init100%of 1
disk_ioctl58%of 26
disk_isbusy---of 1
disk_read_sectors---of 4
disk_rename---of 1
disk_set_info---of 26
disk_unbusy100%of 1
disk_wait100%of 1
diskerr---of 12
disklabel_dev_unit100%of 1
-----------
SUMMARY60%of 40

_vstate_assert64%of 19
holdrele---of 1
holdrelel47%of 13
lru_requeue80%of 25
vcache_alloc100%of 1
vcache_dealloc---of 3
vcache_free60%of 15
vcache_get61%of 51
vcache_make_anon---of 34
vcache_new63%of 35
vcache_reclaim52%of 70
vcache_rekey_enter---of 33
vcache_rekey_exit---of 43
vcache_stats---of 10
vcache_tryvget84%of 6
vcache_vget39%of 13
vdead_check75%of 8
vdrain_thread---of 72
vfs_drainvnodes---of 17
vfs_vnode_sysinit---of 6
vgone84%of 6
vhold---of 1
vholdl46%of 11
vnalloc_marker100%of 1
vnfree_marker67%of 3
vnis_marker100%of 1
vnpanic---of 1
vput75%of 8
vrecycle65%of 14
vref60%of 5
vrefcnt67%of 3
vrele100%of 2
vrele_async---of 2
vrele_flush64%of 46
vrelel66%of 105
vrevoke60%of 22
vrevoke_suspend_next.part.058%of 7
vshareilock100%of 1
vshareklist67%of 3
vstate_assert_change69%of 19
vstate_assert_get60%of 5
vstate_assert_wait_stable43%of 7
vtryrele75%of 8
vwakeup67%of 9
-----------
SUMMARY63%of 542

usbd_find_quirk75%of 8
usbd_get_desc_fake---of 12
-----------
SUMMARY75%of 8

uhidev_attach---of 67
uhidev_childdet---of 7
uhidev_close---of 41
uhidev_config_exit---of 5
uhidev_detach---of 19
uhidev_get_report---of 1
uhidev_get_report_desc---of 1
uhidev_intr---of 12
uhidev_match56%of 9
uhidev_open---of 43
uhidev_set_report---of 4
uhidev_stop---of 8
uhidev_write---of 21
uhidev_write_async---of 13
uhidev_write_callback---of 6
uhidevprint---of 6
-----------
SUMMARY56%of 9

kthread_create64%of 55
kthread_exit---of 10
kthread_fpu_enter---of 18
kthread_fpu_exit---of 22
kthread_join---of 9
kthread_sysinit---of 1
-----------
SUMMARY64%of 55

semctl177%of 64
semexit---of 15
semfini---of 8
seminit---of 10
seminit_exithook---of 1
semrealloc.part.0---of 43
semu_alloc---of 17
semundo_adjust69%of 19
semundo_clear70%of 13
sys_____semctl5093%of 13
sys_semconfig---of 3
sys_semget89%of 26
sys_semop79%of 73
sysctl_ipc_sem_setup---of 4
sysctl_ipc_semmni---of 7
sysctl_ipc_semmns---of 7
sysctl_ipc_semmnu---of 7
-----------
SUMMARY79%of 208

-----------
SUMMARY---of 0

ptyfs_access100%of 6
ptyfs_advlock50%of 4
ptyfs_close67%of 6
ptyfs_getattr86%of 7
ptyfs_inactive67%of 3
ptyfs_ioctl67%of 3
ptyfs_itimes92%of 12
ptyfs_kqfilter---of 4
ptyfs_lookup63%of 16
ptyfs_open50%of 4
ptyfs_pathconf82%of 11
ptyfs_poll---of 4
ptyfs_print---of 1
ptyfs_read29%of 7
ptyfs_readdir68%of 28
ptyfs_reclaim100%of 1
ptyfs_setattr74%of 46
ptyfs_write---of 6
-----------
SUMMARY72%of 154

-----------
SUMMARY---of 0

filt_logrdetach---of 1
filt_logread67%of 9
initmsgbuf---of 9
logclose---of 1
loginit---of 1
logioctl---of 8
logkqfilter---of 4
logopen50%of 4
logpoll---of 5
logputchar50%of 16
logread---of 17
logsoftintr---of 2
logwakeup80%of 5
sysctl_msgbuf---of 18
-----------
SUMMARY59%of 34

-----------
SUMMARY---of 0

bintime---of 4
binuptime100%of 8
dtrace_getnanotime---of 2
dummy_get_timecount---of 1
getbinboottime---of 1
getbintime---of 5
getbinuptime---of 2
getmicroboottime100%of 1
getmicrotime100%of 2
getmicrouptime100%of 2
getnanoboottime---of 1
getnanotime100%of 2
getnanouptime100%of 2
inittimecounter---of 3
microtime100%of 4
microuptime100%of 1
nanotime100%of 4
nanouptime100%of 1
pps_capture---of 7
pps_event---of 1
pps_init72%of 7
pps_ioctl67%of 12
pps_ref_event---of 46
sysctl_kern_timecounter_choice---of 12
sysctl_kern_timecounter_hardware---of 14
sysctl_timecounter_setup---of 3
tc_detach---of 16
tc_getfrequency100%of 1
tc_gonebad---of 1
tc_init---of 14
tc_pick---of 8
tc_setclock86%of 7
tc_ticktock---of 5
tc_windup71%of 24
-----------
SUMMARY83%of 78

fill_lwp---of 20
sysctl_consdev---of 7
sysctl_debug_setup---of 7
sysctl_hw_cnmagic---of 6
sysctl_hw_misc_setup---of 1
sysctl_hw_usermem---of 7
sysctl_kern_boottime---of 1
sysctl_kern_cpid---of 15
sysctl_kern_cptime---of 19
sysctl_kern_defcorename---of 5
sysctl_kern_drivers---of 15
sysctl_kern_forkfsleep---of 8
sysctl_kern_hostid50%of 4
sysctl_kern_lwp---of 70
sysctl_kern_maxproc---of 5
sysctl_kern_maxptys---of 4
sysctl_kern_maxvnodes---of 9
sysctl_kern_messages---of 16
sysctl_kern_root_partition---of 1
sysctl_kern_rtc_offset---of 8
sysctl_kern_setup---of 1
sysctl_root_device---of 1
sysctl_security_setidcore---of 5
sysctl_security_setidcorename---of 6
-----------
SUMMARY50%of 4

sys_ktrace100%of 14
-----------
SUMMARY100%of 14

ipcperm100%of 3
sysctl_ipc_setup---of 1
sysctl_kern_sysvipc---of 30
sysv_ipc_modcmd---of 21
sysvipc_listener_cb75%of 20
sysvipcfini---of 3
sysvipcinit---of 3
-----------
SUMMARY79%of 23

uberry_attach---of 18
uberry_detach---of 1
uberry_match100%of 2
-----------
SUMMARY100%of 2

ufs_bmap100%of 5
ufs_bmaparray52%of 103
ufs_getlbns93%of 13
ufs_issequential100%of 3
-----------
SUMMARY59%of 124

fpu_area_restore34%of 15
fpu_area_save50%of 8
fpu_clear---of 19
fpu_handle_deferred100%of 1
fpu_kern_enter70%of 10
fpu_kern_leave67%of 3
fpu_lwp_abandon67%of 3
fpu_lwp_fork75%of 12
fpu_save---of 5
fpu_set_default_cw---of 14
fpu_sigreset55%of 11
fpu_switch63%of 8
fpudna---of 2
fpuinit---of 1
fpuinit_mxcsr_mask---of 2
fputrap---of 8
fxrstor---of 1
fxrstor64---of 1
fxsave---of 1
fxsave64---of 1
process_read_fpregs_s87---of 11
process_read_fpregs_xmm73%of 11
process_read_xstate27%of 41
process_verify_xstate67%of 6
process_write_fpregs_s87---of 12
process_write_fpregs_xmm---of 12
process_write_xstate37%of 46
xrstor100%of 1
xrstor64100%of 1
xsave---of 1
xsave64---of 1
xsaveopt---of 1
xsaveopt64100%of 1
-----------
SUMMARY48%of 178

scsipi_alloc_periph---of 5
scsipi_command100%of 3
scsipi_dtype---of 4
scsipi_free_periph---of 1
scsipi_inqmatch---of 10
scsipi_load_verbose---of 2
scsipi_print_sense_data_stub---of 3
scsipi_print_sense_stub---of 4
-----------
SUMMARY100%of 3

enforce_rlimit_fsize80%of 10
vn_bdev_open---of 5
vn_bdev_openpath---of 4
vn_close80%of 5
vn_closefile100%of 1
vn_extattr_get---of 6
vn_extattr_rm---of 7
vn_extattr_set---of 4
vn_fcntl100%of 1
vn_fifo_bypass100%of 1
vn_ioctl85%of 20
vn_knote_attach---of 10
vn_knote_detach---of 22
vn_kqfilter---of 1
vn_lock57%of 32
vn_markexec100%of 4
vn_marktext23%of 9
vn_mmap91%of 54
vn_open77%of 65
vn_openchk100%of 15
vn_poll100%of 1
vn_rdwr56%of 18
vn_read87%of 15
vn_readdir58%of 19
vn_seek96%of 22
vn_stat100%of 11
vn_statfile100%of 1
vn_write100%of 22
vn_writechk100%of 3
-----------
SUMMARY80%of 329

stuirda_attach---of 1
stuirda_fwload---of 32
stuirda_match100%of 2
stuirda_write---of 14
-----------
SUMMARY100%of 2

irf_set_params.part.0---of 15
irframe_attach74%of 19
irframe_detach100%of 1
irframe_match---of 1
irframeclose---of 4
irframeioctl---of 11
irframekqfilter---of 4
irframeopen---of 7
irframepoll---of 5
irframeread---of 7
irframewrite---of 7
-----------
SUMMARY75%of 20

npf_config_create---of 1
npf_config_destroy---of 9
npf_config_enter100%of 1
npf_config_exit100%of 1
npf_config_fini---of 1
npf_config_init---of 2
npf_config_load---of 16
npf_config_locked_p100%of 1
npf_config_natset---of 6
npf_config_read_enter---of 1
npf_config_read_exit---of 1
npf_config_ruleset---of 6
npf_config_sync---of 3
npf_config_tableset---of 6
npf_default_pass---of 6
-----------
SUMMARY100%of 3

filt_pipedetach---of 6
filt_piperead70%of 10
filt_pipewrite---of 9
pipe1100%of 12
pipe_close100%of 3
pipe_create70%of 10
pipe_ctor80%of 5
pipe_dtor100%of 2
pipe_free_kmem100%of 6
pipe_init---of 4
pipe_ioctl100%of 15
pipe_kqfilter---of 7
pipe_poll100%of 19
pipe_read89%of 42
pipe_restart100%of 1
pipe_stat100%of 4
pipe_write80%of 69
pipeclose.part.072%of 25
pipelock65%of 17
sysctl_kern_pipe_setup---of 1
-----------
SUMMARY84%of 240

udv_attach64%of 38
udv_detach15%of 14
udv_fault54%of 30
udv_init---of 1
udv_reference100%of 1
-----------
SUMMARY52%of 83

layer_node_create80%of 5
-----------
SUMMARY80%of 5

ptrace_copyin_piod100%of 3
ptrace_copyin_siginfo67%of 3
ptrace_copyout_lwpstatus100%of 1
ptrace_copyout_piod---of 3
ptrace_copyout_siginfo---of 3
ptrace_modcmd---of 6
sys_ptrace100%of 1
-----------
SUMMARY88%of 8

chacha_core---of 1
chacha_md_init---of 5
chacha_modcmd---of 9
chacha_stream100%of 1
chacha_stream_xor---of 1
hchacha---of 1
sysctl_kern_crypto_chacha_selected---of 1
sysctl_kern_crypto_chacha_setup---of 1
xchacha_stream---of 1
xchacha_stream_xor---of 1
-----------
SUMMARY100%of 1

coda_done---of 4
coda_fhtovp---of 21
coda_init---of 4
coda_loadvnode---of 3
coda_modcmd---of 6
coda_mount45%of 29
coda_nb_statvfs---of 10
coda_root---of 20
coda_start---of 5
coda_sync---of 5
coda_unmount---of 11
coda_vfsopstats_init---of 2
coda_vget---of 5
coda_vptofh---of 5
devtomp---of 3
getNewVnode---of 7
sysctl_vfs_coda_setup---of 1
-----------
SUMMARY45%of 29

in_undefer_cksum---of 25
in_undefer_cksum_tcpudp54%of 13
ip_tso_output---of 7
tcp4_segment---of 47
-----------
SUMMARY54%of 13

mountnfs---of 18
nfs_decode_args---of 68
nfs_fhtovp---of 9
nfs_fsinfo---of 70
nfs_modcmd---of 6
nfs_mount21%of 24
nfs_mountroot---of 13
nfs_root---of 4
nfs_start---of 1
nfs_statvfs---of 66
nfs_sync---of 7
nfs_sync_selector---of 6
nfs_sysctl_init---of 1
nfs_unmount---of 15
nfs_vfs_done---of 1
nfs_vfs_init---of 1
nfs_vget---of 1
nfs_vptofh---of 4
sysctl_vfs_nfs_iothreads---of 4
-----------
SUMMARY21%of 24

compat_60_ptmget_ioctl100%of 8
compat_60_ptmioctl---of 3
compat_60_ttioctl100%of 3
kern_tty_60_fini---of 5
kern_tty_60_init---of 1
-----------
SUMMARY100%of 11

ufs_balloc_range85%of 13
ufs_inactive67%of 30
ufs_reclaim55%of 11
ufs_truncate_all100%of 5
ufs_truncate_retry64%of 11
-----------
SUMMARY70%of 70

ptyfs__allocvp72%of 7
ptyfs__getmp58%of 7
ptyfs__getvattr100%of 1
ptyfs__makename48%of 17
ptyfs_done---of 1
ptyfs_init---of 1
ptyfs_loadvnode70%of 10
ptyfs_modcmd---of 6
ptyfs_mount60%of 20
ptyfs_root75%of 4
ptyfs_start100%of 1
ptyfs_sync100%of 1
ptyfs_sysctl_setup---of 1
ptyfs_unmount64%of 22
ptyfs_vget---of 1
-----------
SUMMARY63%of 90

entpool_enter82%of 22
entpool_enter_nostir---of 11
entpool_extract56%of 9
entpool_selftest---of 35
entpool_stir---of 6
-----------
SUMMARY75%of 31

_prop_number_alloc73%of 11
_prop_number_equals---of 7
_prop_number_externalize86%of 7
_prop_number_free100%of 1
_prop_number_init---of 1
_prop_number_internalize---of 11
_prop_number_lock67%of 3
_prop_number_rb_compare_key63%of 8
_prop_number_rb_compare_nodes63%of 8
_prop_number_unlock100%of 1
prop_number_copy---of 5
prop_number_create_integer---of 1
prop_number_create_signed---of 1
prop_number_create_unsigned100%of 1
prop_number_create_unsigned_integer---of 1
prop_number_equals---of 6
prop_number_equals_integer---of 7
prop_number_equals_signed---of 7
prop_number_equals_unsigned---of 7
prop_number_equals_unsigned_integer---of 7
prop_number_int16_value---of 8
prop_number_int32_value---of 8
prop_number_int64_value---of 6
prop_number_int8_value---of 8
prop_number_int_value---of 8
prop_number_integer_value---of 5
prop_number_intptr_value---of 6
prop_number_long_value---of 6
prop_number_longlong_value---of 6
prop_number_schar_value---of 8
prop_number_short_value---of 8
prop_number_signed_value---of 5
prop_number_size---of 10
prop_number_uchar_value---of 6
prop_number_uint16_value---of 6
prop_number_uint32_value---of 6
prop_number_uint64_value---of 7
prop_number_uint8_value---of 6
prop_number_uint_value---of 6
prop_number_uintptr_value---of 7
prop_number_ulong_value---of 7
prop_number_ulonglong_value---of 7
prop_number_unsigned---of 1
prop_number_unsigned_integer_value---of 5
prop_number_unsigned_value---of 5
prop_number_ushort_value---of 6
-----------
SUMMARY73%of 40

scsipi_adapter_addref23%of 9
scsipi_adapter_delref17%of 6
scsipi_adapter_enable---of 5
scsipi_adapter_ioctl34%of 6
scsipi_adapter_minphys100%of 4
scsipi_adapter_request75%of 8
scsipi_async_event---of 40
scsipi_channel_freeze---of 3
scsipi_channel_init---of 7
scsipi_channel_shutdown---of 4
scsipi_channel_thaw---of 9
scsipi_channel_timed_thaw---of 9
scsipi_complete23%of 67
scsipi_completion_thread---of 29
scsipi_done---of 36
scsipi_enqueue25%of 36
scsipi_execute_xs63%of 45
scsipi_free_opcodeinfo---of 3
scsipi_get_opcodeinfo---of 19
scsipi_get_xs47%of 41
scsipi_init---of 2
scsipi_inquire---of 6
scsipi_insert_periph---of 10
scsipi_interpret_sense19%of 55
scsipi_kill_pending---of 3
scsipi_lookup_periph---of 1
scsipi_lookup_periph_internal---of 15
scsipi_lookup_periph_locked---of 1
scsipi_mode_select100%of 1
scsipi_mode_select_big---of 1
scsipi_mode_sense100%of 1
scsipi_mode_sense_big---of 1
scsipi_periph_freeze---of 1
scsipi_periph_freeze_locked---of 1
scsipi_periph_thaw---of 1
scsipi_periph_thaw_locked60%of 5
scsipi_periph_timed_thaw---of 3
scsipi_prevent---of 3
scsipi_print_cdb67%of 3
scsipi_put_xs64%of 30
scsipi_remove_periph---of 8
scsipi_run_queue45%of 52
scsipi_set_xfer_mode---of 6
scsipi_start---of 2
scsipi_sync_factor_to_freq---of 7
scsipi_sync_factor_to_period---of 7
scsipi_sync_period_to_factor---of 7
scsipi_target_detach---of 23
scsipi_test_unit_ready---of 4
scsipi_thread_call_callback---of 9
scsipi_wait_drain---of 4
-----------
SUMMARY40%of 369

scsi_async_event_xfer_mode---of 32
scsi_change_def---of 1
scsi_fc_sas_async_event_xfer_mode---of 8
scsi_kill_pending---of 2
scsi_print_addr100%of 3
scsi_scsipi_cmd50%of 2
-----------
SUMMARY80%of 5

SHA224_256_Final91%of 11
SHA224_Final---of 1
SHA224_Init---of 3
SHA224_Transform---of 1
SHA224_Update---of 3
SHA256_Final100%of 1
SHA256_Init100%of 3
SHA256_Transform100%of 6
SHA256_Update100%of 3
SHA256_Update.part.059%of 12
SHA384_Final---of 4
SHA384_Init---of 3
SHA384_Transform---of 1
SHA384_Update---of 3
SHA512_Final---of 4
SHA512_Init---of 3
SHA512_Last---of 8
SHA512_Transform---of 6
SHA512_Update---of 3
SHA512_Update.part.0---of 19
-----------
SUMMARY84%of 36

umass_fixup_sony---of 2
umass_init_insystem---of 1
umass_init_shuttle---of 1
umass_lookup100%of 1
-----------
SUMMARY100%of 1

btuart_attach50%of 2
btuart_detach---of 3
btuart_disable---of 12
btuart_enable---of 3
btuart_match---of 1
btuart_output_acl---of 5
btuart_output_cmd---of 5
btuart_output_sco---of 5
btuart_stats---of 3
btuartattach---of 4
btuartclose---of 4
btuartinput---of 37
btuartioctl100%of 1
btuartopen67%of 12
btuartstart---of 34
-----------
SUMMARY67%of 15

compat_43_sys_creat100%of 1
compat_43_sys_fstat100%of 3
compat_43_sys_ftruncate100%of 1
compat_43_sys_getdirentries78%of 40
compat_43_sys_lseek100%of 1
compat_43_sys_lstat100%of 3
compat_43_sys_quota---of 1
compat_43_sys_stat100%of 3
compat_43_sys_truncate---of 1
cvtstat100%of 3
cvttimespec84%of 6
vfs_syscalls_43_fini---of 1
vfs_syscalls_43_init---of 1
-----------
SUMMARY84%of 61

kern_pset_destroy---of 14
psets_init---of 1
psets_listener_cb50%of 4
psid_validate---of 8
sys__pset_bind---of 37
sys_pset_assign---of 56
sys_pset_create---of 14
sys_pset_destroy---of 3
sysctl_pset_setup---of 4
sysctl_psets_list---of 7
sysctl_psets_max---of 11
-----------
SUMMARY50%of 4

do_disable---of 3
do_enable28%of 22
pms_disable---of 1
pms_enable100%of 3
pms_ioctl80%of 5
pms_reset_thread---of 10
pms_resume---of 12
pms_sliced_command---of 4
pms_suspend---of 4
pmsattach---of 17
pmsinput---of 37
pmsprobe---of 8
-----------
SUMMARY44%of 30

ufs_accessx50%of 22
ufs_advlock100%of 1
ufs_bufio64%of 25
ufs_check_permitted100%of 4
ufs_close100%of 3
ufs_create80%of 5
ufs_do_nfs4_acl_inheritance.constprop.0---of 3
ufs_getattr64%of 19
ufs_gop_alloc73%of 11
ufs_gop_markupdate84%of 6
ufs_link54%of 28
ufs_makeinode38%of 45
ufs_mkdir40%of 66
ufs_mknod63%of 8
ufs_open100%of 4
ufs_pathconf87%of 15
ufs_print---of 3
ufs_readdir78%of 40
ufs_readlink54%of 13
ufs_remove74%of 15
ufs_rmdir68%of 25
ufs_setattr72%of 121
ufs_strategy56%of 27
ufs_symlink64%of 22
ufs_vinit59%of 12
ufs_whiteout57%of 16
ufsfifo_close100%of 3
ufsfifo_read100%of 1
ufsfifo_write100%of 1
ufsspec_close100%of 3
ufsspec_read100%of 3
ufsspec_write100%of 3
-----------
SUMMARY63%of 567

soo_close100%of 3
soo_ioctl78%of 49
soo_poll100%of 1
soo_read100%of 1
soo_restart100%of 1
soo_stat67%of 6
soo_write100%of 1
-----------
SUMMARY80%of 62

genfs_renamelock_enter100%of 1
genfs_renamelock_exit100%of 1
genfs_statvfs100%of 1
genfs_suspendctl75%of 8
-----------
SUMMARY82%of 11

lfs_done---of 1
lfs_extattrctl---of 1
lfs_fhtovp---of 10
lfs_flushfiles---of 22
lfs_gop_write---of 97
lfs_init---of 1
lfs_init_vnode---of 8
lfs_issequential_hole---of 15
lfs_loadvnode---of 85
lfs_modcmd---of 10
lfs_mount14%of 53
lfs_mountfs---of 220
lfs_mountroot---of 14
lfs_newvnode---of 36
lfs_reinit---of 1
lfs_resize_fs---of 127
lfs_statvfs---of 37
lfs_sync---of 13
lfs_sysctl_setup---of 5
lfs_unmount---of 8
lfs_vget---of 4
lfs_vinit---of 70
lfs_vptofh---of 7
lfs_writerd---of 68
sysctl_lfs_dostats---of 5
-----------
SUMMARY14%of 53

uvisor_attach---of 46
uvisor_childdet---of 5
uvisor_close---of 4
uvisor_detach---of 7
uvisor_match100%of 2
uvisor_open---of 2
-----------
SUMMARY100%of 2

compat_50_vndioctl75%of 4
vnd_50_fini---of 3
vnd_50_init---of 1
-----------
SUMMARY75%of 4

b_to_q77%of 21
catq100%of 2
clalloc---of 4
clfree---of 5
firstc---of 6
getc78%of 9
ndflush24%of 13
ndqb31%of 13
nextc---of 12
putc87%of 15
q_to_b---of 14
unputc20%of 10
-----------
SUMMARY57%of 83

_prop_dict_init---of 1
_prop_dict_keysym_equals---of 1
_prop_dict_keysym_externalize---of 6
_prop_dict_keysym_free100%of 1
_prop_dict_keysym_put29%of 7
_prop_dict_keysym_rb_compare_key100%of 1
_prop_dict_keysym_rb_compare_nodes100%of 1
_prop_dict_lookup84%of 12
_prop_dictionary_alloc---of 7
_prop_dictionary_emergency_free---of 5
_prop_dictionary_equals---of 18
_prop_dictionary_equals_finish---of 1
_prop_dictionary_expand80%of 5
_prop_dictionary_externalize85%of 26
_prop_dictionary_free58%of 21
_prop_dictionary_get.part.040%of 10
_prop_dictionary_internalize---of 8
_prop_dictionary_internalize_body---of 14
_prop_dictionary_internalize_continue---of 9
_prop_dictionary_iterator_next_object---of 4
_prop_dictionary_iterator_next_object_locked78%of 9
_prop_dictionary_iterator_reset---of 4
_prop_dictionary_lock67%of 3
_prop_dictionary_unlock100%of 1
prop_dictionary_all_keys---of 8
prop_dictionary_copy---of 8
prop_dictionary_copy_mutable---of 6
prop_dictionary_count---of 4
prop_dictionary_create100%of 3
prop_dictionary_create_with_capacity---of 1
prop_dictionary_ensure_capacity---of 6
prop_dictionary_equals---of 6
prop_dictionary_externalize86%of 7
prop_dictionary_get100%of 6
prop_dictionary_get_keysym---of 7
prop_dictionary_internalize100%of 1
prop_dictionary_iterator---of 7
prop_dictionary_keysym_cstring_nocopy---of 5
prop_dictionary_keysym_equals---of 6
prop_dictionary_keysym_value---of 5
prop_dictionary_make_immutable---of 3
prop_dictionary_remove---of 14
prop_dictionary_remove_keysym---of 5
prop_dictionary_set70%of 39
prop_dictionary_set_keysym---of 6
-----------
SUMMARY72%of 153

scsipi_do_ioctl45%of 43
scsipi_ioctl_init---of 1
scsipi_user_done---of 16
scsistrategy28%of 22
-----------
SUMMARY39%of 65

dkcksum100%of 3
dkcksum_sized---of 3
-----------
SUMMARY100%of 3

fdesc_loadvnode29%of 14
fdesc_modcmd---of 6
fdesc_mount84%of 6
fdesc_root100%of 1
fdesc_start100%of 1
fdesc_sync100%of 1
fdesc_sysctl_setup---of 1
fdesc_unmount84%of 6
fdesc_vget---of 1
-----------
SUMMARY59%of 29

cpu_ucode_intel_apply---of 28
cpu_ucode_intel_firmware_open100%of 7
cpu_ucode_intel_get_version---of 6
intel_getcurrentucode100%of 1
-----------
SUMMARY100%of 8

vfs_quotactl_cursoratend---of 1
vfs_quotactl_cursorclose---of 1
vfs_quotactl_cursorget---of 1
vfs_quotactl_cursoropen---of 1
vfs_quotactl_cursorrewind---of 1
vfs_quotactl_cursorskipidtype---of 1
vfs_quotactl_del---of 1
vfs_quotactl_get100%of 1
vfs_quotactl_idtypestat---of 1
vfs_quotactl_objtypestat---of 1
vfs_quotactl_put---of 1
vfs_quotactl_quotaoff---of 1
vfs_quotactl_quotaon100%of 1
vfs_quotactl_stat---of 1
-----------
SUMMARY100%of 2

dk_attach---of 4
dk_close67%of 12
dk_detach---of 3
dk_discard---of 15
dk_done---of 1
dk_done1---of 9
dk_drain---of 3
dk_dump---of 41
dk_getdefaultlabel100%of 3
dk_getdisklabel---of 20
dk_init100%of 1
dk_ioctl53%of 40
dk_open72%of 21
dk_size---of 9
dk_start70%of 20
dk_strategy100%of 2
dk_strategy156%of 9
dk_strategy_defer---of 3
dk_strategy_pending---of 7
dk_subr_modcmd---of 2
dk_translate71%of 17
-----------
SUMMARY65%of 125

-----------
SUMMARY---of 0

uvm_aio_aiodone80%of 10
uvm_aio_aiodone_pages30%of 74
uvm_pager_init---of 6
uvm_pager_realloc_emerg---of 10
uvm_pageratop60%of 5
uvm_pagermapin53%of 19
uvm_pagermapout60%of 10
-----------
SUMMARY42%of 118

filt_genfsdetach---of 1
filt_genfsread---of 9
filt_genfsvnode---of 12
filt_genfswrite---of 9
genfs_abortop100%of 1
genfs_access67%of 3
genfs_accessx100%of 4
genfs_badop---of 1
genfs_can_access92%of 23
genfs_can_access_acl_nfs4---of 61
genfs_can_access_acl_posix1e---of 61
genfs_can_chflags100%of 4
genfs_can_chmod100%of 11
genfs_can_chown75%of 8
genfs_can_chtimes100%of 6
genfs_can_extattr---of 5
genfs_can_sticky100%of 3
genfs_deadlock62%of 13
genfs_deadunlock100%of 1
genfs_ebadf---of 1
genfs_einval100%of 1
genfs_enoioctl100%of 1
genfs_eopnotsupp56%of 20
genfs_erofs_link---of 1
genfs_fcntl100%of 2
genfs_islocked100%of 4
genfs_kqfilter---of 6
genfs_lock84%of 12
genfs_mmap100%of 1
genfs_node_destroy100%of 1
genfs_node_init100%of 1
genfs_node_rdlock100%of 1
genfs_node_rdtrylock100%of 1
genfs_node_unlock100%of 1
genfs_node_wrlock100%of 1
genfs_node_wrlocked100%of 1
genfs_null_putpages67%of 3
genfs_nullop100%of 1
genfs_parsepath100%of 4
genfs_pathconf100%of 6
genfs_poll100%of 1
genfs_revoke67%of 3
genfs_seek100%of 2
genfs_size---of 1
genfs_unlock100%of 1
-----------
SUMMARY85%of 146

fops_pad_close67%of 3
fops_pad_ioctl100%of 1
fops_pad_kqfilter---of 1
fops_pad_mmap---of 1
fops_pad_poll---of 1
fops_pad_read50%of 14
fops_pad_stat100%of 1
fops_pad_write---of 1
pad_attach60%of 5
pad_childdet75%of 4
pad_detach84%of 6
pad_done_output---of 1
pad_get_locks100%of 1
pad_get_port---of 6
pad_get_props100%of 1
pad_getdev---of 1
pad_halt_output---of 3
pad_match---of 1
pad_modcmd---of 2
pad_open54%of 13
pad_query_devinfo91%of 11
pad_query_format100%of 1
pad_set_format67%of 3
pad_set_port---of 6
pad_start_output---of 16
pad_swvol_codec---of 2
padattach---of 2
-----------
SUMMARY69%of 64

wsbell_add_mux---of 7
wsbelldevprint100%of 3
-----------
SUMMARY100%of 3

uslsa_attach---of 17
uslsa_childdet---of 3
uslsa_close---of 3
uslsa_detach---of 3
uslsa_get_status---of 6
uslsa_ioctl---of 4
uslsa_match100%of 2
uslsa_open---of 4
uslsa_param---of 25
uslsa_request_set---of 2
uslsa_set---of 10
-----------
SUMMARY100%of 2

null_modcmd---of 6
nullfs_mount40%of 15
nullfs_sysctl_setup---of 1
nullfs_unmount---of 6
-----------
SUMMARY40%of 15

cprng_fast---of 6
cprng_fast32100%of 1
cprng_fast64---of 1
cprng_fast_buf_short62%of 18
cprng_fast_init---of 1
cprng_fast_init_cpu---of 1
-----------
SUMMARY64%of 19

in_cksum67%of 3
-----------
SUMMARY67%of 3

cflag2lcr50%of 8
com_attach_subr---of 61
com_break---of 7
com_cleanup---of 3
com_common_getc---of 17
com_config---of 11
com_detach---of 13
com_hwiflow20%of 5
com_iflush43%of 7
com_init_regs---of 1
com_init_regs_stride---of 3
com_init_regs_stride_width---of 5
com_intr_poll---of 1
com_is_console---of 6
com_loadchannelregs43%of 21
com_modem---of 8
com_probe_subr---of 3
com_read_1100%of 1
com_read_4---of 1
com_resume---of 1
com_shutdown37%of 22
com_suspend---of 1
com_to_tiocm94%of 15
com_write_1100%of 1
com_write_4---of 1
com_write_multi_1100%of 1
com_write_multi_4---of 3
comclose100%of 7
comcnattach---of 1
comcnattach1---of 3
comcngetc---of 1
comcnpollc---of 1
comcnputc29%of 14
comdiag---of 3
comhwiflow---of 12
cominit---of 21
comintr---of 87
comioctl97%of 26
comopen74%of 49
comparam74%of 64
compoll100%of 4
comprobe1---of 1
comread100%of 4
comsoft---of 48
comspeed75%of 8
comstart100%of 11
comstop50%of 4
comtty---of 1
comwrite100%of 4
tiocm_to_com80%of 10
-----------
SUMMARY70%of 286

pcq_create---of 5
pcq_destroy---of 1
pcq_get---of 9
pcq_maxitems---of 1
pcq_peek67%of 3
pcq_put86%of 7
-----------
SUMMARY80%of 10

ufsdirhash_add---of 17
ufsdirhash_adjfree---of 15
ufsdirhash_build9%of 83
ufsdirhash_checkblock---of 27
ufsdirhash_dirtrunc---of 18
ufsdirhash_done---of 3
ufsdirhash_enduseful---of 11
ufsdirhash_findfree---of 36
ufsdirhash_findslot---of 15
ufsdirhash_free---of 20
ufsdirhash_init---of 5
ufsdirhash_lookup40%of 87
ufsdirhash_move---of 7
ufsdirhash_newblk---of 9
ufsdirhash_remove---of 23
ufsdirhash_sysctl_init---of 1
-----------
SUMMARY25%of 170

popcount100%of 1
-----------
SUMMARY100%of 1

kernfs_access84%of 6
kernfs_addentry---of 9
kernfs_alloctype---of 10
kernfs_close29%of 7
kernfs_default_fileop_getattr---of 1
kernfs_default_xread40%of 5
kernfs_default_xwrite---of 8
kernfs_getattr34%of 15
kernfs_getpages---of 4
kernfs_inactive100%of 1
kernfs_ioctl29%of 7
kernfs_lookup52%of 27
kernfs_open29%of 7
kernfs_pathconf100%of 11
kernfs_print---of 1
kernfs_read60%of 5
kernfs_readdir32%of 73
kernfs_reclaim46%of 11
kernfs_setattr100%of 1
kernfs_setdirentfileno86%of 7
kernfs_try_fileop---of 7
kernfs_try_xread---of 7
kernfs_try_xwrite---of 7
kernfs_write---of 5
kernfs_xread.constprop.0---of 21
kfsfileoptree_SPLAY---of 20
kfsfileoptree_SPLAY_INSERT---of 10
kfsfileoptree_SPLAY_MINMAX---of 12
kfsfileoptree_SPLAY_REMOVE---of 7
-----------
SUMMARY45%of 183

do_setresgid94%of 44
do_setresuid92%of 46
sys___getlogin100%of 1
sys___setlogin---of 11
sys_getegid100%of 1
sys_geteuid100%of 1
sys_getgid---of 1
sys_getgid_with_egid100%of 1
sys_getgroups100%of 4
sys_getpgid100%of 5
sys_getpgrp100%of 1
sys_getpid---of 1
sys_getpid_with_ppid100%of 1
sys_getppid100%of 1
sys_getsid100%of 5
sys_getuid---of 1
sys_getuid_with_euid100%of 1
sys_issetugid---of 1
sys_setegid100%of 1
sys_seteuid100%of 1
sys_setgid100%of 1
sys_setgroups100%of 4
sys_setpgid100%of 6
sys_setregid100%of 6
sys_setreuid100%of 6
sys_setsid---of 1
sys_setuid100%of 1
-----------
SUMMARY95%of 138

addupc_intr---of 5
addupc_task---of 10
sys_profil100%of 5
-----------
SUMMARY100%of 5

buildcontext---of 1
cpu_dump---of 3
cpu_dump_mempagecnt---of 3
cpu_dump_prep_sparse---of 1
cpu_dumpconf28%of 11
cpu_dumpsize---of 2
cpu_fsgs_reload---of 5
cpu_getmcontext67%of 3
cpu_init_idt---of 1
cpu_init_tss---of 1
cpu_mcontext_validate15%of 21
cpu_reboot---of 30
cpu_reset---of 1
cpu_segregs32_zero---of 7
cpu_segregs64_zero---of 7
cpu_setmcontext---of 13
cpu_startup---of 11
dodumpsys---of 28
dump_header_addbytes---of 4
dump_header_addseg---of 1
dump_header_finish---of 1
dump_header_flush---of 1
dump_header_start---of 1
dump_misc_init---of 2
dump_misc_init.part.0---of 9
dump_seg_count_range---of 1
dump_seg_iter---of 15
dump_seg_prep---of 4
dumpsys_seg---of 11
idt_vec_init_cpu_md---of 4
idt_vec_init_cpu_md.part.0---of 3
init_bootspace---of 1
init_slotspace---of 1
init_x86_64---of 16
mm_md_direct_mapped_io---of 1
mm_md_direct_mapped_phys100%of 1
mm_md_kernacc---of 15
reserve_dumppages---of 1
sendsig_sigcontext---of 1
sendsig_siginfo62%of 13
set_mem_segment---of 1
set_sys_segment---of 1
setgate---of 1
setregion---of 1
setregs---of 2
sparse_dump_mark---of 15
sparse_dump_reset---of 1
unsetgate---of 1
-----------
SUMMARY35%of 49

Fhash---of 4
algo_bsd78%of 9
algo_doublehash---of 10
algo_hash---of 8
algo_randinc---of 9
algo_random_pick---of 6
algo_random_start---of 9
check_suitable_port87%of 22
iscompletetuple---of 13
pcb_getports84%of 12
portalgo_algo_index_select100%of 5
portalgo_randport67%of 18
sysctl_portalgo_available---of 4
sysctl_portalgo_reserve4---of 1
sysctl_portalgo_reserve6---of 1
sysctl_portalgo_selected---of 10
sysctl_portalgo_selected4---of 1
sysctl_portalgo_selected6---of 1
-----------
SUMMARY81%of 66

exec_netbsd32_makecmds45%of 9
netbsd32_exec_aout_prep_nmagic---of 3
netbsd32_exec_aout_prep_omagic---of 3
netbsd32_exec_aout_prep_zmagic---of 5
-----------
SUMMARY45%of 9

_psref_held---of 15
psref_acquire70%of 13
psref_class_create---of 1
psref_class_destroy---of 3
psref_copy---of 14
psref_cpu_drained_p---of 2
psref_held---of 1
psref_release67%of 24
psref_target_destroy59%of 12
psref_target_init100%of 1
psreffed_p_xc---of 2
-----------
SUMMARY66%of 50

-----------
SUMMARY---of 0

tmpfs_bytes_max100%of 5
tmpfs_dirent_get---of 4
tmpfs_dirent_put---of 3
tmpfs_mem_decr---of 3
tmpfs_mem_incr---of 4
tmpfs_mem_info---of 5
tmpfs_mntmem_destroy---of 3
tmpfs_mntmem_init---of 1
tmpfs_mntmem_set---of 3
tmpfs_node_get---of 6
tmpfs_node_put---of 3
tmpfs_pages_avail100%of 1
tmpfs_strname_alloc---of 6
tmpfs_strname_free---of 5
tmpfs_strname_neqlen---of 3
-----------
SUMMARY100%of 6

cfargs_canonicalize73%of 11
cfdata_ifattr---of 1
cfiattr_lookup50%of 16
cfparent_match87%of 15
config_attach67%of 3
config_attach_internal63%of 32
config_attach_pseudo56%of 9
config_cfattach_attach---of 15
config_cfattach_detach---of 19
config_cfattach_lookup100%of 9
config_cfdata_attach---of 17
config_cfdata_detach---of 27
config_cfdriver_attach---of 13
config_cfdriver_detach---of 16
config_cfdriver_lookup---of 5
config_collect_garbage75%of 16
config_create_interruptthreads---of 1
config_create_mountrootthreads---of 5
config_deactivate---of 13
config_defer---of 9
config_deferred---of 3
config_detach60%of 64
config_detach_all---of 14
config_detach_children80%of 10
config_detach_commit40%of 5
config_detach_exit40%of 5
config_devalloc83%of 70
config_devfree72%of 7
config_devlink60%of 5
config_devunlink70%of 23
config_dump_garbage69%of 22
config_finalize---of 33
config_finalize_mountroot---of 7
config_finalize_register---of 10
config_fini_component---of 7
config_found65%of 14
config_init---of 5
config_init_component---of 6
config_init_mi---of 3
config_interrupts---of 9
config_interrupts_thread---of 15
config_match80%of 5
config_mountroot---of 9
config_mountroot_thread---of 13
config_pending_decr54%of 15
config_pending_incr72%of 7
config_probe100%of 1
config_process_deferred16%of 19
config_rootfound---of 6
config_rootsearch---of 6
config_search64%of 11
config_search_internal90%of 30
config_stdsubmatch84%of 12
config_twiddle_fn---of 3
config_twiddle_init---of 3
device_active---of 4
device_active_deregister---of 11
device_active_register---of 18
device_compatible_lookup---of 8
device_compatible_lookup_id---of 4
device_compatible_lookup_strlist---of 7
device_compatible_match---of 8
device_compatible_match_id---of 4
device_compatible_match_strlist---of 7
device_compatible_plookup---of 8
device_compatible_plookup_strlist---of 7
device_compatible_pmatch---of 8
device_compatible_pmatch_strlist---of 7
device_find_by_driver_unit---of 10
device_find_by_xname86%of 7
device_getlock---of 1
device_lookup100%of 5
device_lookup_acquire84%of 12
device_lookup_private100%of 5
device_pmf_bus_deregister50%of 2
device_pmf_bus_private---of 1
device_pmf_bus_register---of 1
device_pmf_bus_resume---of 7
device_pmf_bus_shutdown---of 3
device_pmf_bus_suspend---of 8
device_pmf_class_deregister50%of 2
device_pmf_class_private---of 1
device_pmf_class_register---of 1
device_pmf_class_resume---of 8
device_pmf_class_suspend---of 7
device_pmf_driver_child_register60%of 5
device_pmf_driver_deregister80%of 5
device_pmf_driver_register100%of 1
device_pmf_driver_resume---of 8
device_pmf_driver_set_child_register---of 1
device_pmf_driver_shutdown---of 3
device_pmf_driver_suspend---of 8
device_pmf_is_registered---of 1
device_pmf_lock---of 7
device_pmf_unlock---of 5
device_release100%of 1
deviter_first100%of 1
deviter_init64%of 19
deviter_next62%of 13
deviter_next184%of 12
deviter_reinit84%of 6
deviter_release100%of 4
devmon_report_device85%of 13
frob_cfattachvec---of 15
frob_cfdrivervec---of 9
ifattr_match---of 4
mapply100%of 5
no_devmon_insert---of 1
shutdown_first---of 6
shutdown_next---of 5
strarray_match---of 6
strarray_pmatch---of 6
-----------
SUMMARY71%of 554

dead_newvnode50%of 10
dead_panic---of 1
-----------
SUMMARY50%of 10

procfs_access84%of 6
procfs_close40%of 5
procfs_dir.constprop.0---of 23
procfs_getattr36%of 54
procfs_getpages---of 4
procfs_inactive100%of 1
procfs_lookup56%of 70
procfs_open47%of 15
procfs_pathconf82%of 11
procfs_print---of 1
procfs_readdir28%of 118
procfs_readlink27%of 42
procfs_reclaim100%of 1
procfs_root_readdir_callback90%of 10
procfs_setattr100%of 1
procfs_validfile80%of 5
procfs_validfile_linux40%of 5
-----------
SUMMARY42%of 344

inittodr---of 1
resettodr60%of 5
todr_attach---of 5
todr_init---of 1
todr_lock---of 1
todr_lock_owned---of 1
todr_save_systime52%of 27
todr_set_systime---of 56
todr_unlock---of 1
-----------
SUMMARY54%of 32

-----------
SUMMARY---of 0

bufq_priocscan_cancel---of 8
bufq_priocscan_fini---of 3
bufq_priocscan_get62%of 39
bufq_priocscan_init---of 2
bufq_priocscan_modcmd---of 6
bufq_priocscan_put50%of 2
cscan_tree_compare_key48%of 17
cscan_tree_compare_nodes38%of 27
-----------
SUMMARY51%of 85

secmodel_securelevel_device_cb65%of 20
secmodel_securelevel_init---of 1
secmodel_securelevel_machdep_cb80%of 5
secmodel_securelevel_network_cb67%of 6
secmodel_securelevel_process_cb72%of 7
secmodel_securelevel_start---of 1
secmodel_securelevel_stop---of 1
secmodel_securelevel_sysctl---of 6
secmodel_securelevel_system_cb73%of 11
secmodel_securelevel_vnode_cb75%of 4
securelevel_eval---of 3
securelevel_modcmd---of 9
sysctl_security_securelevel_setup---of 1
-----------
SUMMARY70%of 53

rf_ConfigureAccessTrace100%of 1
rf_LogTraceRec---of 14
rf_ShutdownAccessTrace---of 1
-----------
SUMMARY100%of 1

file_free---of 12
fileassoc_add---of 52
fileassoc_clear---of 5
fileassoc_deregister---of 8
fileassoc_file_delete34%of 12
fileassoc_file_lookup25%of 20
fileassoc_init---of 3
fileassoc_lookup---of 3
fileassoc_register---of 12
fileassoc_table_clear---of 12
fileassoc_table_delete---of 7
fileassoc_table_run---of 12
table_dtor---of 5
-----------
SUMMARY29%of 32

kmeminit_nkmempages---of 2
uvm_km_alloc63%of 29
uvm_km_bootstrap---of 17
uvm_km_check_empty57%of 16
uvm_km_free50%of 12
uvm_km_init---of 1
uvm_km_kmem_alloc63%of 16
uvm_km_kmem_free100%of 1
uvm_km_pgremove56%of 20
uvm_km_pgremove_intrsafe70%of 26
uvm_km_protect---of 1
uvm_km_suballoc---of 10
uvm_km_va_starved_p---of 4
-----------
SUMMARY61%of 120

ra_startio75%of 12
uvm_ra_allocctx100%of 3
uvm_ra_freectx---of 3
uvm_ra_init---of 1
uvm_ra_request93%of 27
uvm_readahead100%of 1
-----------
SUMMARY89%of 43

filt_tunrdetach---of 1
filt_tunread---of 12
if_tun_modcmd---of 10
tun_clone_create---of 9
tun_clone_destroy---of 24
tun_find_unit100%of 5
tun_find_zunit22%of 14
tun_i_softintr---of 3
tun_ioctl---of 36
tun_o_softintr---of 3
tun_output---of 59
tunclose29%of 25
tunioctl97%of 27
tunkqfilter---of 6
tunopen78%of 9
tunpoll67%of 12
tunread21%of 24
tunwrite18%of 45
-----------
SUMMARY43%of 161

doubletrap---of 3
mi_userret48%of 34
nmitrap---of 4
startlwp---of 3
trap52%of 124
trap_print---of 5
-----------
SUMMARY51%of 158

rf_AllocBuffer---of 1
rf_AllocDAGHeader---of 1
rf_AllocDAGList---of 1
rf_AllocDAGNode---of 1
rf_AllocDAGPCache---of 1
rf_AllocFuncList---of 1
rf_AllocIOBuffer---of 6
rf_AllocStripeBuffer---of 6
rf_ConfigureDAGs100%of 1
rf_FreeDAG---of 14
rf_FreeDAGHeader---of 1
rf_FreeDAGList---of 1
rf_FreeDAGNode---of 5
rf_FreeDAGPCache---of 1
rf_FreeFuncList---of 1
rf_FreeIOBuffer---of 4
rf_FreeStripeBuffer---of 4
rf_GenerateFailedAccessASMs---of 22
rf_InitNode---of 24
rf_MapUnaccessedPortionOfStripe---of 7
rf_PDAOverlap---of 1
rf_RangeRestrictPDA---of 6
rf_SelectMirrorDiskIdle---of 13
rf_ShutdownDAGs100%of 1
rf_redirect_asm---of 11
-----------
SUMMARY100%of 2

rip6_abort_wrapper---of 3
rip6_accept_wrapper---of 3
rip6_attach78%of 9
rip6_attach_wrapper100%of 1
rip6_bind_wrapper76%of 25
rip6_connect2_wrapper67%of 3
rip6_connect_wrapper73%of 22
rip6_ctlinput13%of 16
rip6_ctloutput94%of 16
rip6_detach70%of 10
rip6_detach_wrapper100%of 1
rip6_disconnect_wrapper72%of 7
rip6_init---of 1
rip6_input---of 39
rip6_ioctl_wrapper100%of 1
rip6_listen_wrapper67%of 3
rip6_output86%of 70
rip6_peeraddr_wrapper50%of 8
rip6_purgeif_wrapper100%of 1
rip6_rcvd_wrapper---of 3
rip6_recvoob_wrapper67%of 3
rip6_sbappendaddr---of 13
rip6_send_wrapper73%of 18
rip6_sendoob_wrapper67%of 3
rip6_shutdown_wrapper67%of 3
rip6_sockaddr_wrapper50%of 8
rip6_stat_wrapper67%of 3
sysctl_net_inet6_raw6_stats---of 1
-----------
SUMMARY73%of 231

-----------
SUMMARY---of 0

au_get_gain---of 35
au_get_lr_value---of 9
au_get_monitor_gain---of 7
au_get_port---of 24
au_portof---of 9
au_set_gain---of 35
au_set_lr_value---of 7
au_set_port---of 32
au_setup_ports---of 20
audio_apply_stage---of 33
audio_attach_mi75%of 4
audio_close---of 11
audio_diagnostic_filter_arg---of 8
audio_diagnostic_format2---of 12
audio_diagnostic_ring---of 15
audio_encoding_name---of 5
audio_exlock_enter---of 8
audio_exlock_mutex_enter---of 8
audio_exlock_mutex_exit---of 3
audio_file_setinfo---of 81
audio_free_usrbuf---of 5
audio_get_port---of 5
audio_hw_probe64%of 30
audio_hw_set_format58%of 7
audio_hw_setinfo---of 62
audio_hw_validate_format---of 16
audio_indexof_format---of 15
audio_mixer_capture.constprop.0---of 12
audio_mixer_destroy85%of 13
audio_mixer_init50%of 64
audio_mixers_get_format---of 6
audio_mixers_init47%of 15
audio_mixers_set_format---of 26
audio_modcmd---of 6
audio_open.constprop.0---of 97
audio_pintr---of 22
audio_pmixer_halt---of 5
audio_pmixer_output---of 9
audio_pmixer_process---of 144
audio_pmixer_start---of 10
audio_printf---of 1
audio_query_devinfo67%of 3
audio_query_format100%of 7
audio_resume---of 18
audio_rintr---of 95
audio_rmixer_halt---of 5
audio_rmixer_input---of 13
audio_rmixer_start---of 7
audio_set_port---of 5
audio_softintr_rd---of 8
audio_softintr_wr---of 12
audio_suspend---of 7
audio_sysctl_blk_ms---of 14
audio_sysctl_multiuser---of 6
audio_track_chmix_dupLR---of 7
audio_track_chmix_expand---of 6
audio_track_chmix_mixLR---of 2
audio_track_chmix_shrink---of 4
audio_track_chvol---of 10
audio_track_clear.constprop.0---of 17
audio_track_destroy---of 13
audio_track_drain---of 21
audio_track_freq_down---of 30
audio_track_freq_up---of 37
audio_track_init_chmix---of 20
audio_track_init_chvol---of 13
audio_track_init_codec---of 30
audio_track_init_freq---of 14
audio_track_play---of 140
audio_track_readablebytes---of 7
audio_track_set_format---of 49
audio_track_setinfo_check---of 44
audio_track_waitio---of 11
audio_unlink---of 25
audio_volume_down---of 8
audio_volume_toggle---of 7
audio_volume_up---of 8
audio_write.isra.0---of 49
audioactivate---of 3
audioattach45%of 74
audiobellclose---of 11
audiobellopen---of 6
audiobellsetrate---of 9
audiobellwrite---of 7
audioclose---of 26
audiodetach62%of 21
audiogetinfo---of 27
audioioctl---of 67
audiokqfilter---of 14
audiomatch100%of 1
audiommap---of 33
audioopen---of 21
audiopoll---of 30
audioprint40%of 5
audioread---of 146
audiorescan---of 1
audiosearch75%of 4
audiostat---of 9
audiounit100%of 1
audiowrite---of 13
filt_audioread_detach---of 1
filt_audioread_event---of 8
filt_audiowrite_detach---of 1
filt_audiowrite_event---of 4
mixer_async_remove---of 8
mixer_init.constprop.050%of 55
mixer_ioctl---of 32
mixer_signal---of 6
-----------
SUMMARY55%of 304

upgt_activate---of 4
upgt_attach---of 33
upgt_attach_hook---of 109
upgt_detach---of 24
upgt_init---of 13
upgt_ioctl---of 12
upgt_match100%of 3
upgt_media_change---of 4
upgt_newassoc---of 1
upgt_newstate---of 1
upgt_newstate_task---of 8
upgt_next_scan---of 2
upgt_rx_cb---of 38
upgt_set_channel.isra.0---of 5
upgt_set_led---of 13
upgt_set_led_blink---of 1
upgt_set_macfilter.isra.0---of 10
upgt_start---of 43
upgt_tx_task---of 24
upgt_watchdog---of 2
-----------
SUMMARY100%of 3

ttyerrpoll---of 1
ttyldisc_attach---of 44
ttyldisc_default100%of 1
ttyldisc_detach---of 23
ttyldisc_init---of 4
ttyldisc_lookup45%of 9
ttyldisc_lookup_bynum91%of 11
ttyldisc_release100%of 2
ttyldisc_release.part.084%of 6
ttynullioctl100%of 1
-----------
SUMMARY77%of 30

uvm_page_array_advance50%of 4
uvm_page_array_clear67%of 3
uvm_page_array_fill74%of 38
uvm_page_array_fill_and_peek86%of 7
uvm_page_array_fini100%of 1
uvm_page_array_init100%of 1
uvm_page_array_peek80%of 5
-----------
SUMMARY75%of 59

compat_43_sys_getdtablesize---of 1
compat_43_sys_gethostid100%of 1
compat_43_sys_gethostname100%of 1
compat_43_sys_getkerninfo---of 26
compat_43_sys_sethostid100%of 1
compat_43_sys_sethostname100%of 1
kern_info_43_fini---of 1
kern_info_43_init---of 1
-----------
SUMMARY100%of 4

do_sched_getparam---of 4
do_sched_getparam.part.0---of 19
do_sched_setparam---of 51
sched_init---of 4
sched_listener_cb29%of 7
sys__sched_getaffinity---of 22
sys__sched_getparam---of 7
sys__sched_protect---of 27
sys__sched_setaffinity---of 47
sys__sched_setparam---of 3
sys_sched_yield---of 1
-----------
SUMMARY29%of 7

clockctl_50_fini---of 3
clockctl_50_init---of 1
compat50_clockctlioctl96%of 24
-----------
SUMMARY96%of 24

rfcomm_attach_pcb55%of 11
rfcomm_bind_pcb---of 3
rfcomm_connect_pcb---of 26
rfcomm_detach_pcb58%of 7
rfcomm_disconnect_pcb---of 10
rfcomm_getopt63%of 8
rfcomm_listen_pcb43%of 33
rfcomm_peeraddr_pcb---of 1
rfcomm_rcvd_pcb43%of 7
rfcomm_send_pcb---of 7
rfcomm_setopt24%of 13
rfcomm_sockaddr_pcb100%of 1
-----------
SUMMARY45%of 80

cow_change_done---of 3
cow_change_enter---of 13
fscow_disestablish---of 57
fscow_establish---of 49
fscow_run63%of 35
fstrans_alloc_lwp_info54%of 64
fstrans_done84%of 36
fstrans_dump---of 60
fstrans_getstate---of 11
fstrans_held73%of 11
fstrans_init---of 3
fstrans_is_owner80%of 10
fstrans_lwp_dtor79%of 14
fstrans_lwp_pcc67%of 6
fstrans_lwp_pcd63%of 8
fstrans_mount100%of 3
fstrans_mount_dtor59%of 17
fstrans_mount_get---of 15
fstrans_setstate78%of 35
fstrans_start94%of 16
fstrans_start_lazy---of 18
fstrans_start_nowait86%of 14
fstrans_unmount25%of 48
vfs_resume78%of 9
vfs_suspend75%of 16
-----------
SUMMARY65%of 342

ext2fs_cgupdate---of 7
ext2fs_done---of 1
ext2fs_fhtovp---of 12
ext2fs_flushfiles---of 3
ext2fs_init---of 1
ext2fs_init_vnode---of 7
ext2fs_loadvnode---of 10
ext2fs_loadvnode_content.isra.0---of 12
ext2fs_modcmd---of 6
ext2fs_mount13%of 56
ext2fs_mountfs---of 22
ext2fs_mountroot---of 10
ext2fs_newvnode---of 42
ext2fs_reinit---of 1
ext2fs_reload---of 22
ext2fs_sbfill.part.0---of 35
ext2fs_sbupdate---of 4
ext2fs_set_inode_guid---of 2
ext2fs_statvfs---of 24
ext2fs_sync---of 19
ext2fs_sync_selector---of 8
ext2fs_sysctl_setup---of 1
ext2fs_unmount---of 12
ext2fs_vptofh---of 4
-----------
SUMMARY13%of 56

npf_ifmap_copylogname---of 7
npf_ifmap_copyname---of 1
npf_ifmap_fini---of 1
npf_ifmap_flush---of 5
npf_ifmap_getid---of 1
npf_ifmap_init---of 3
npf_ifmap_lookup43%of 7
npf_ifmap_register---of 8
npfk_ifmap_attach100%of 1
npfk_ifmap_detach100%of 1
-----------
SUMMARY56%of 9

getcwd_common81%of 47
getcwd_scandir72%of 25
proc_isunder100%of 5
sys___getcwd100%of 7
vn_isunder100%of 1
vnode_to_path67%of 12
-----------
SUMMARY80%of 97

drm_guarantee_initialized60%of 5
drm_init---of 8
drm_irq_by_busid---of 1
drmkms_modcmd---of 8
-----------
SUMMARY60%of 5

kcpuset_atomic_clear67%of 3
kcpuset_atomic_set67%of 3
kcpuset_atomicly_intersect---of 4
kcpuset_atomicly_merge---of 4
kcpuset_atomicly_remove---of 4
kcpuset_clear---of 6
kcpuset_clone---of 1
kcpuset_copy67%of 6
kcpuset_copyin---of 9
kcpuset_copyout---of 9
kcpuset_countset100%of 3
kcpuset_create40%of 10
kcpuset_destroy50%of 6
kcpuset_export_u32---of 3
kcpuset_ffs---of 5
kcpuset_ffs_intersecting---of 5
kcpuset_fill---of 6
kcpuset_intersect---of 2
kcpuset_intersecting_p---of 4
kcpuset_isotherset---of 6
kcpuset_isset60%of 10
kcpuset_iszero---of 4
kcpuset_match100%of 1
kcpuset_merge100%of 2
kcpuset_remove---of 2
kcpuset_set---of 6
kcpuset_sysinit---of 10
kcpuset_unuse---of 10
kcpuset_use---of 3
kcpuset_zero67%of 6
-----------
SUMMARY62%of 50

sys___msync13100%of 19
sys_madvise100%of 11
sys_mincore85%of 39
sys_minherit100%of 7
sys_mlock100%of 9
sys_mlockall100%of 4
sys_mmap87%of 61
sys_mprotect100%of 7
sys_munlock100%of 7
sys_munlockall100%of 1
sys_munmap100%of 9
uvm_default_mapaddr75%of 4
uvm_mmap.part.095%of 36
uvm_mmap_anon100%of 5
uvm_mmap_dev---of 9
-----------
SUMMARY93%of 219

arp_dad_find---of 7
arp_dad_start---of 23
arp_dad_stop---of 5
arp_dad_stoptimer---of 13
arp_dad_timer---of 38
arp_drain---of 1
arp_drainstub---of 1
arp_fasttimo---of 2
arp_free---of 10
arp_ifinit---of 8
arp_init---of 1
arp_llinfo_holdsrc---of 5
arp_llinfo_missed---of 7
arp_llinfo_output---of 27
arp_llinfo_reachable---of 1
arp_llinfo_retrans---of 1
arp_nud_enabled---of 1
arp_nud_hint---of 2
arp_rtrequest---of 55
arp_setgate---of 7
arp_stat_add---of 1
arpannounce---of 6
arpannounce1---of 6
arpintr---of 25
arpioctl---of 1
arplookup50%of 6
arprequest---of 20
arpresolve50%of 24
in_arpinput---of 192
revarpinput---of 31
revarpwhoarewe---of 13
sysctl_net_inet_arp_stats---of 1
-----------
SUMMARY50%of 30

-----------
SUMMARY---of 0

cnbell---of 3
cnclose---of 3
cnflush67%of 3
cngetc---of 6
cngetsn---of 23
cnhalt---of 3
cnioctl54%of 13
cnkqfilter---of 9
cnopen55%of 11
cnpoll56%of 9
cnpollc---of 7
cnputc100%of 5
cnread58%of 7
cnwrite56%of 9
-----------
SUMMARY60%of 57

hci_abort_wrapper---of 3
hci_accept_wrapper---of 3
hci_attach_wrapper73%of 18
hci_bind_wrapper---of 18
hci_connect2_wrapper67%of 3
hci_connect_wrapper---of 13
hci_ctloutput74%of 15
hci_detach53%of 19
hci_detach_wrapper100%of 1
hci_device_cb14%of 15
hci_disconnect_wrapper---of 6
hci_drop---of 6
hci_init---of 2
hci_ioctl_wrapper100%of 1
hci_listen_wrapper34%of 3
hci_mtap---of 44
hci_peeraddr_wrapper---of 9
hci_purgeif_wrapper---of 1
hci_rcvd_wrapper---of 3
hci_recvoob_wrapper67%of 3
hci_send_wrapper36%of 28
hci_sendoob_wrapper67%of 3
hci_shutdown_wrapper67%of 3
hci_sockaddr_wrapper45%of 9
hci_stat_wrapper67%of 3
-----------
SUMMARY51%of 124

raw_attach73%of 11
raw_detach72%of 14
raw_disconnect50%of 2
-----------
SUMMARY71%of 27

-----------
SUMMARY---of 0

uark_attach---of 18
uark_detach---of 3
uark_get_status---of 2
uark_match100%of 2
uark_open---of 2
uark_param---of 13
-----------
SUMMARY100%of 2

compat_50_quota_modcmd---of 6
compat_50_sys_quotactl59%of 17
-----------
SUMMARY59%of 17

filt_ttyrdetach---of 1
filt_ttyread---of 4
filt_ttywdetach---of 1
filt_ttywrite---of 9
nullmodem---of 7
sysctl_kern_tty_qsize---of 4
tputchar82%of 11
ttioctl67%of 155
ttnread75%of 8
ttpoll70%of 23
ttread20%of 78
ttrstrt---of 5
ttsetwater100%of 5
ttspeedtab100%of 5
ttstart---of 3
ttwakeup80%of 5
ttwrite67%of 54
tty_alloc---of 3
tty_attach---of 3
tty_detach---of 13
tty_free---of 15
tty_get_qsize72%of 14
tty_getctrlchar67%of 3
tty_init---of 3
tty_listener_cb100%of 4
tty_set_qsize---of 4
tty_setctrlchar67%of 3
tty_try_xonxoff---of 11
tty_unit---of 1
ttycancel---of 1
ttychars100%of 1
ttycheckoutq---of 1
ttycheckoutq_wlock30%of 10
ttyclose60%of 5
ttyecho75%of 16
ttyflush75%of 12
ttygetinfo11%of 57
ttyinput100%of 3
ttyinput_wlock34%of 187
ttykqfilter---of 6
ttylclose100%of 5
ttylopen80%of 5
ttymodem78%of 9
ttyopen60%of 10
ttyoutput81%of 42
ttypause---of 7
ttypend---of 6
ttyprintf_nolock.constprop.0100%of 1
ttypull100%of 3
ttyputinfo75%of 4
ttyretype---of 9
ttyrub---of 26
ttyrubo---of 4
ttysig84%of 6
ttysigintr---of 44
ttysleep73%of 11
ttywait---of 1
ttywait_timo70%of 10
ttywflush60%of 5
-----------
SUMMARY53%of 770

st_interpret_sense---of 99
st_load---of 11
st_mode_select---of 9
st_rdpos---of 12
st_rewind.constprop.0---of 8
st_setpos---of 3
st_space---of 44
st_unmount---of 11
st_write_filemarks---of 15
stattach---of 19
stclose---of 17
stdetach---of 1
stdone---of 15
stdump---of 1
stioctl---of 73
stopen3%of 73
stread---of 1
strestart---of 1
ststart---of 38
ststrategy---of 12
stwrite---of 1
-----------
SUMMARY3%of 73

cpu_puc_cnprobe---of 4
device_pci_register12%of 35
pci_attach_hook---of 28
pci_bridge_foreach---of 1
pci_bridge_hook---of 4
pci_bus_maxdevs---of 2
pci_chipset_tag_create---of 18
pci_chipset_tag_destroy---of 1
pci_conf_lock39%of 13
pci_conf_read48%of 21
pci_conf_select40%of 5
pci_conf_unlock100%of 3
pci_conf_write---of 22
pci_decompose_tag37%of 19
pci_device_foreach---of 1
pci_device_foreach_min---of 13
pci_make_tag44%of 16
pci_mode_detect---of 4
pci_mode_detect.part.0---of 20
pci_mode_set---of 4
x86_genfb_resume---of 5
x86_genfb_set_mapreg---of 1
x86_genfb_setmode---of 6
x86_genfb_suspend---of 1
-----------
SUMMARY34%of 112

add8.constprop.0100%of 6
hash_df.constprop.0100%of 1
hash_df_block100%of 5
nist_sha256_hash_drbg_destroy---of 1
nist_sha256_hash_drbg_generate82%of 11
nist_sha256_hash_drbg_initialize---of 32
nist_sha256_hash_drbg_instantiate100%of 1
nist_sha256_hash_drbg_reseed---of 1
-----------
SUMMARY92%of 24

cpu_ipi58%of 7
x86_broadcast_ipi100%of 7
x86_ipi_ast---of 1
x86_ipi_generic---of 1
x86_ipi_halt---of 2
x86_ipi_handler---of 4
x86_ipi_kpreempt---of 1
x86_ipi_reload_mtrr---of 2
x86_ipi_synch_fpu---of 1
x86_ipi_xcall---of 1
x86_send_ipi86%of 7
xc_send_ipi72%of 7
-----------
SUMMARY79%of 28

-----------
SUMMARY---of 0

filt_sequencerrdetach---of 1
filt_sequencerread---of 7
filt_sequencerwdetach---of 1
filt_sequencerwrite---of 7
midiseq_in---of 1
seq_do_command30%of 41
seq_do_timing19%of 22
seq_drain.isra.034%of 6
seq_input_event---of 8
seq_reset88%of 8
seq_softintr---of 21
seq_startoutput88%of 8
seq_timeout---of 7
seq_timer_waitabs---of 9
sequencer_enter63%of 8
sequencerattach---of 1
sequencerclose84%of 18
sequencerget32%of 16
sequencerioctl52%of 29
sequencerkqfilter---of 6
sequenceropen82%of 27
sequencerpoll52%of 25
sequencerread34%of 18
sequencerwrite28%of 58
-----------
SUMMARY46%of 284

usscanner_activate---of 4
usscanner_attach---of 35
usscanner_childdet---of 3
usscanner_cleanup---of 12
usscanner_cmd_cb---of 12
usscanner_data_cb---of 12
usscanner_detach---of 13
usscanner_intr_cb---of 1
usscanner_match50%of 4
usscanner_scsipi_minphys---of 3
usscanner_scsipi_request---of 7
usscanner_sensecmd_cb---of 7
usscanner_sensedata_cb---of 9
-----------
SUMMARY50%of 4

firmware_close---of 1
firmware_free---of 1
firmware_get_size---of 1
firmware_malloc---of 1
firmware_open53%of 23
firmware_path_next77%of 17
firmware_read---of 1
sysctl_hw_firmware_path---of 9
sysctl_hw_firmware_setup---of 4
-----------
SUMMARY63%of 40

resetpriority67%of 9
sched_lwp_collect50%of 10
sched_lwp_fork100%of 1
sched_nice58%of 14
sched_proc_exit56%of 18
sched_proc_fork67%of 3
sched_pstats_hook---of 8
sched_schedclock---of 14
sched_setrunnable84%of 12
sched_tick---of 12
sysctl_sched_4bsd_setup---of 4
sysctl_sched_rtts---of 1
-----------
SUMMARY63%of 67

xc__highpri_intr---of 9
xc_barrier100%of 1
xc_broadcast74%of 23
xc_encode_ipl67%of 3
xc_init_cpu---of 12
xc_ipi_handler60%of 5
xc_thread---of 9
xc_unicast40%of 33
xc_wait92%of 12
-----------
SUMMARY62%of 77

secmodel_suser_device_cb100%of 4
secmodel_suser_generic_cb---of 4
secmodel_suser_machdep_cb75%of 4
secmodel_suser_network_cb56%of 18
secmodel_suser_process_cb100%of 5
secmodel_suser_start---of 1
secmodel_suser_stop---of 1
secmodel_suser_system_cb77%of 13
secmodel_suser_vnode_cb100%of 4
suser_eval---of 3
suser_modcmd---of 9
sysctl_security_suser_setup---of 1
-----------
SUMMARY75%of 48

-----------
SUMMARY---of 0

phtree_SPLAY100%of 14
phtree_SPLAY_INSERT100%of 8
phtree_SPLAY_MINMAX---of 12
phtree_SPLAY_REMOVE84%of 6
pool_allocated---of 9
pool_allocator_free58%of 7
pool_cache_bootstrap---of 40
pool_cache_bootstrap_destroy---of 26
pool_cache_cpu_init---of 3
pool_cache_cpu_init1---of 7
pool_cache_destroy---of 1
pool_cache_destruct_object100%of 3
pool_cache_get_paddr43%of 28
pool_cache_get_slow35%of 38
pool_cache_init---of 3
pool_cache_invalidate---of 16
pool_cache_invalidate_groups.part.0---of 13
pool_cache_nget---of 1
pool_cache_nput---of 1
pool_cache_prime---of 1
pool_cache_put_paddr71%of 17
pool_cache_reclaim---of 1
pool_cache_set_drain_hook---of 3
pool_cache_sethardlimit---of 1
pool_cache_sethiwat---of 1
pool_cache_setlowat---of 9
pool_cache_transfer---of 13
pool_chk---of 10
pool_chk_page---of 15
pool_destroy59%of 41
pool_do_put73%of 95
pool_drain---of 12
pool_get69%of 121
pool_grow76%of 66
pool_init67%of 95
pool_nget---of 1
pool_nput---of 1
pool_page_alloc100%of 3
pool_page_alloc_meta100%of 3
pool_page_free100%of 1
pool_page_free_meta---of 1
pool_pcg_put.isra.0---of 12
pool_pcg_trunc---of 12
pool_prime80%of 5
pool_print1---of 54
pool_printall---of 2
pool_printit---of 3
pool_put89%of 9
pool_reclaim---of 27
pool_set_drain_hook---of 3
pool_sethardlimit---of 1
pool_sethiwat100%of 1
pool_setlowat---of 8
pool_subsystem_init---of 5
pool_sysctl---of 23
pool_totalpages---of 1
pool_totalpages_locked---of 6
pool_update_curpage80%of 5
pool_whatis---of 62
pr_pagelist_free80%of 10
pr_rmpage70%of 26
sysctl_pool_setup---of 1
-----------
SUMMARY69%of 602

compat43_set_accrights45%of 9
compat_43_sys_accept43%of 7
compat_43_sys_getpeername100%of 6
compat_43_sys_getsockname100%of 6
compat_43_sys_recv100%of 1
compat_43_sys_recvfrom100%of 7
compat_43_sys_recvmsg66%of 23
compat_43_sys_send100%of 1
compat_43_sys_sendmsg72%of 7
uipc_syscalls_43_fini---of 1
uipc_syscalls_43_init---of 1
-----------
SUMMARY72%of 67

compat_50_rnd_ioctl82%of 11
rndpseudo_50_fini---of 3
rndpseudo_50_init---of 1
-----------
SUMMARY82%of 11

_prop_generic_internalize34%of 12
_prop_object_externalize_append_char73%of 11
_prop_object_externalize_append_cstring100%of 4
_prop_object_externalize_append_encoded_cstring46%of 11
_prop_object_externalize_context_alloc80%of 5
_prop_object_externalize_context_free100%of 1
_prop_object_externalize_empty_tag---of 11
_prop_object_externalize_end_tag100%of 8
_prop_object_externalize_footer100%of 3
_prop_object_externalize_header100%of 5
_prop_object_externalize_start_tag100%of 9
_prop_object_init100%of 1
_prop_object_internalize_by_tag---of 17
_prop_object_internalize_context_alloc58%of 26
_prop_object_internalize_context_free---of 1
_prop_object_internalize_decode_string---of 38
_prop_object_internalize_find_tag36%of 74
_prop_object_internalize_match---of 3
prop_object_equals---of 1
prop_object_equals_with_error---of 17
prop_object_iterator_next---of 1
prop_object_iterator_release100%of 1
prop_object_iterator_reset---of 1
prop_object_release39%of 36
prop_object_retain50%of 2
prop_object_type100%of 4
-----------
SUMMARY54%of 213

-----------
SUMMARY---of 0

l2cap_attach_pcb55%of 11
l2cap_bind_pcb100%of 3
l2cap_connect_pcb---of 23
l2cap_detach_pcb29%of 14
l2cap_disconnect_pcb---of 8
l2cap_getopt88%of 8
l2cap_listen_pcb23%of 45
l2cap_peeraddr_pcb---of 1
l2cap_send_pcb---of 19
l2cap_setopt16%of 13
l2cap_sockaddr_pcb100%of 1
-----------
SUMMARY35%of 95

ppp_timeout---of 1
pppasyncctlp---of 1
pppasyncrelinq72%of 7
pppasyncstart---of 70
pppclose75%of 4
pppdumpb---of 5
pppdumpframe---of 17
pppgetm88%of 8
pppinput---of 106
ppplogchar---of 12
pppopen82%of 11
pppread35%of 23
pppstart---of 13
ppptioctl8%of 76
pppwrite---of 30
-----------
SUMMARY30%of 129

mq_close_fop---of 7
mq_handle_open---of 50
mq_listener_cb67%of 3
mq_poll_fop---of 8
mq_recv1---of 43
mq_send1---of 56
mq_stat_fop---of 1
mqueue_destroy---of 18
mqueue_get---of 7
mqueue_lookup---of 7
mqueue_modcmd---of 8
mqueue_print_list---of 2
mqueue_sysctl_init---of 3
sys___mq_timedreceive50---of 5
sys___mq_timedsend50---of 4
sys_mq_close---of 1
sys_mq_getattr---of 5
sys_mq_notify---of 13
sys_mq_open---of 7
sys_mq_receive---of 3
sys_mq_send---of 1
sys_mq_setattr---of 12
sys_mq_unlink---of 19
-----------
SUMMARY67%of 3

fifo_bmap---of 7
fifo_close72%of 21
fifo_inactive---of 1
fifo_ioctl100%of 7
fifo_kqfilter---of 10
fifo_lookup---of 1
fifo_open78%of 61
fifo_pathconf---of 9
fifo_poll60%of 47
fifo_print---of 3
fifo_read92%of 12
fifo_socantrcvmore50%of 6
fifo_write67%of 3
filt_fifordetach---of 8
filt_fiforead---of 12
filt_fifowdetach---of 8
filt_fifowrite---of 17
-----------
SUMMARY72%of 157

coda_abortop---of 1
coda_access---of 11
coda_bmap---of 3
coda_close---of 14
coda_create---of 20
coda_fsync---of 12
coda_getattr---of 21
coda_getpages---of 21
coda_grab_vnode---of 11
coda_inactive---of 13
coda_ioctl---of 33
coda_islocked---of 5
coda_link---of 24
coda_lock---of 5
coda_lookup---of 36
coda_mkdir---of 21
coda_open---of 20
coda_pathconf---of 1
coda_print_vattr---of 15
coda_putpages---of 9
coda_rdwr---of 32
coda_read---of 5
coda_readdir---of 51
coda_readlink---of 16
coda_reclaim---of 13
coda_remove---of 23
coda_rename---of 26
coda_rmdir---of 14
coda_setattr---of 11
coda_strategy---of 3
coda_symlink---of 15
coda_unlock---of 5
coda_vnodeopstats_init100%of 2
coda_vop_error---of 5
coda_vop_nop---of 5
coda_write---of 5
make_coda_node---of 11
-----------
SUMMARY100%of 2

_rw_init67%of 3
rw_abort---of 2
rw_destroy75%of 4
rw_downgrade---of 40
rw_dump---of 1
rw_enter70%of 76
rw_init67%of 3
rw_lock_held75%of 4
rw_lock_op75%of 4
rw_oncpu86%of 7
rw_owner100%of 4
rw_owner_running---of 3
rw_read_held80%of 5
rw_tryupgrade75%of 24
rw_vector_exit70%of 46
rw_vector_tryenter84%of 18
rw_write_held75%of 4
-----------
SUMMARY74%of 202

cpu_speculation_init---of 18
mds_detect_method---of 9
mds_set_name---of 7
mitigation_mds_apply_cpu---of 6
mitigation_mds_change_cpu---of 7
mitigation_taa_change_cpu---of 6
mitigation_v2_apply_cpu---of 16
mitigation_v2_change_cpu---of 9
mitigation_v4_change_cpu---of 6
speculation_barrier100%of 5
sysctl_machdep_mds_mitigated---of 10
sysctl_machdep_spectreV2_mitigated---of 9
sysctl_machdep_spectreV4_mitigated---of 10
sysctl_machdep_taa_mitigated---of 11
sysctl_speculation_init---of 1
taa_detect_method---of 12
taa_set_name---of 7
v2_detect_method---of 13
v2_set_name---of 7
v4_detect_method---of 19
v4_set_name---of 9
-----------
SUMMARY100%of 5

pcppi_attach---of 3
pcppi_attach_speaker---of 3
pcppi_bell---of 1
pcppi_bell_callout---of 3
pcppi_bell_locked18%of 17
pcppi_detach---of 5
pcppi_isa_attach---of 3
pcppi_match---of 18
pcppi_pckbd_bell---of 2
pcppi_rescan---of 1
pcppisearch---of 3
-----------
SUMMARY18%of 17

compat_50_sys___shmctl13100%of 8
-----------
SUMMARY100%of 8

DAGExecutionThread---of 22
FireNode---of 5
FireNodeList.part.0---of 12
NodeReady---of 8
rf_ConfigureEngine67%of 6
rf_DispatchDAG---of 15
rf_FinishNode---of 65
rf_RaidIOThread---of 21
rf_ShutdownEngine100%of 5
-----------
SUMMARY82%of 11

cd9660_fhtovp---of 7
cd9660_loadvnode---of 30
cd9660_modcmd---of 6
cd9660_mount29%of 25
cd9660_mountroot---of 7
cd9660_root---of 4
cd9660_start---of 1
cd9660_statvfs---of 1
cd9660_sync---of 1
cd9660_sysctl_setup---of 1
cd9660_unmount---of 6
cd9660_vget---of 4
cd9660_vptofh---of 4
iso_makemp.isra.0---of 8
iso_mountfs---of 59
-----------
SUMMARY29%of 25

igmp_fasttimo---of 14
igmp_init---of 1
igmp_input---of 102
igmp_joingroup53%of 21
igmp_leavegroup86%of 7
igmp_purgeif36%of 14
igmp_sendpkt67%of 12
igmp_slowtimo---of 6
sysctl_net_inet_igmp_stats---of 1
-----------
SUMMARY56%of 54

addrsel_policy_init---of 1
in6_pcbsetport100%of 7
in6_selecthlim84%of 6
in6_selecthlim_rt90%of 10
in6_selectroute50%of 46
in6_selectsrc77%of 132
in6_src_ioctl---of 28
lookup_addrsel_policy13%of 16
sysctl_net_inet6_addrctlpolicy---of 20
-----------
SUMMARY68%of 217

-----------
SUMMARY---of 0

pollcommon100%of 11
sel_do_scan83%of 69
selclear70%of 10
selcommon96%of 24
seldestroy67%of 12
selinit100%of 1
selnotify50%of 44
selrecord70%of 10
selrecord_knote---of 1
selremove_knote---of 1
selsysinit---of 3
seltrue100%of 1
sys___pollts50---of 6
sys___pselect50---of 6
sys___select50100%of 6
sys_poll100%of 3
sysctl_select_setup---of 1
-----------
SUMMARY77%of 191

in4_cksum37%of 11
-----------
SUMMARY37%of 11

do_ptrace75%of 237
ptrace_common_modcmd---of 7
ptrace_doio36%of 25
ptrace_listener_cb86%of 7
ptrace_lwpstatus72%of 21
-----------
SUMMARY72%of 290

x86_dbregs_abandon100%of 1
x86_dbregs_clear---of 6
x86_dbregs_init---of 1
x86_dbregs_read100%of 3
x86_dbregs_restore---of 4
x86_dbregs_save---of 4
x86_dbregs_store_dr6---of 5
x86_dbregs_switch17%of 6
x86_dbregs_user_trap---of 13
x86_dbregs_validate100%of 5
x86_dbregs_write67%of 3
-----------
SUMMARY67%of 18

-----------
SUMMARY---of 0

db_usb_xfer---of 3
db_usb_xferlist---of 6
filt_usbrdetach---of 1
filt_usbread---of 3
usb_activate---of 4
usb_add_event40%of 10
usb_add_task---of 11
usb_async_intr---of 3
usb_attach---of 9
usb_childdet---of 5
usb_detach---of 10
usb_discover---of 15
usb_doattach---of 13
usb_event_thread---of 7
usb_get_next_event---of 11
usb_in_event_thread86%of 7
usb_match---of 1
usb_needs_explore67%of 3
usb_needs_reattach---of 3
usb_once_init---of 7
usb_rem_task---of 20
usb_rem_task_wait38%of 32
usb_schedsoftintr---of 4
usb_soft_intr---of 1
usb_task_pending---of 1
usb_task_thread---of 26
usbclose---of 4
usbctlprint---of 3
usbd_add_dev_event100%of 1
usbd_add_drv_event---of 1
usbioctl---of 38
usbkqfilter---of 4
usbopen---of 7
usbpoll---of 6
usbread---of 19
-----------
SUMMARY48%of 53

firmware_load.constprop.0---of 7
udl_alloc_screen---of 5
udl_attach---of 72
udl_cmd_send---of 3
udl_cmd_send_async---of 12
udl_cmd_send_async_cb---of 18
udl_cmdq_get---of 13
udl_copy_rect---of 7
udl_copycols---of 1
udl_copyrows---of 5
udl_ctrl_msg---of 3
udl_cursor---of 5
udl_detach---of 26
udl_erasecols---of 1
udl_eraserows---of 1
udl_fill_rect---of 7
udl_free_screen---of 1
udl_ioctl---of 43
udl_match100%of 2
udl_mmap---of 10
udl_putchar---of 23
udl_show_screen---of 1
udl_update_thread---of 17
-----------
SUMMARY100%of 2

hci_attach_pcb53%of 17
hci_complete_sco---of 4
hci_detach_pcb---of 7
hci_disable---of 24
hci_enable---of 9
hci_input_acl---of 5
hci_input_event---of 5
hci_input_sco---of 5
hci_intr---of 36
hci_num_cmds---of 8
hci_output_acl---of 1
hci_output_cmd---of 3
hci_output_sco---of 1
hci_send_cmd---of 14
hci_unit_lookup20%of 10
-----------
SUMMARY41%of 27

aa_dorangeroute---of 12
aa_dosingleroute---of 3
at_broadcast---of 8
at_control6%of 72
at_ifinit---of 40
at_purgeaddr---of 13
at_purgeif---of 1
at_scrub.part.0.isra.0---of 7
-----------
SUMMARY6%of 72

md_attach---of 4
md_detach---of 7
md_set_disklabel---of 4
mdattach---of 2
mdclose---of 11
mdioctl---of 34
mdopen25%of 16
mdread---of 4
mdsize---of 5
mdstrategy---of 15
mdwrite---of 4
-----------
SUMMARY25%of 16

if_ure_modcmd---of 2
ure_attach---of 67
ure_ctl.isra.0---of 10
ure_init_fifo---of 19
ure_match100%of 2
ure_ocp_reg_write---of 1
ure_read_2---of 1
ure_reset---of 6
ure_uno_init---of 1
ure_uno_mcast---of 15
ure_uno_mii_read_reg---of 6
ure_uno_mii_write_reg---of 3
ure_uno_miibus_statchg---of 8
ure_uno_rx_loop---of 34
ure_uno_stop---of 1
ure_uno_tx_prepare---of 20
ure_write_2.isra.0---of 3
-----------
SUMMARY100%of 2

l2cap_abort_wrapper---of 5
l2cap_accept_wrapper---of 8
l2cap_attach_wrapper70%of 13
l2cap_bind_wrapper---of 10
l2cap_complete---of 8
l2cap_connect2_wrapper75%of 4
l2cap_connect_wrapper---of 10
l2cap_connected---of 1
l2cap_connecting---of 1
l2cap_ctloutput100%of 6
l2cap_detach50%of 4
l2cap_detach_wrapper100%of 1
l2cap_disconnect_wrapper---of 5
l2cap_disconnected---of 1
l2cap_input---of 13
l2cap_ioctl_wrapper100%of 1
l2cap_linkmode---of 8
l2cap_listen_wrapper80%of 5
l2cap_newconn---of 3
l2cap_peeraddr_wrapper---of 9
l2cap_purgeif_wrapper---of 1
l2cap_rcvd_wrapper---of 3
l2cap_recvoob_wrapper67%of 3
l2cap_send_wrapper---of 16
l2cap_sendoob_wrapper---of 3
l2cap_shutdown_wrapper67%of 3
l2cap_sockaddr_wrapper45%of 9
l2cap_stat_wrapper67%of 3
-----------
SUMMARY70%of 52

delay_msr---of 2
delay_tc---of 7
device_hyperv_register15%of 21
hyperv_attach---of 17
hyperv_detach---of 5
hyperv_early_init---of 9
hyperv_get_timecount---of 1
hyperv_get_vcpuid---of 4
hyperv_hypercall---of 3
hyperv_hypercall_enabled---of 4
hyperv_hypercall_intr---of 1
hyperv_identify---of 4
hyperv_init---of 18
hyperv_init_cpu---of 8
hyperv_init_hypercall---of 6
hyperv_intr---of 1
hyperv_is_gen1---of 1
hyperv_match---of 5
hyperv_modcmd---of 4
hyperv_probe---of 6
hyperv_send_eom---of 1
hyperv_set_event_proc---of 1
hyperv_set_message_proc---of 1
hyperv_synic_supported---of 1
hyperv_tc64_rdmsr---of 1
hyperv_tc64_tsc---of 6
hyperv_tsc_timecount---of 1
vmbus_deinit_interrupts_md---of 7
vmbus_deinit_synic_md---of 1
vmbus_init_interrupts_md---of 9
vmbus_init_synic_md---of 1
x86_genfb_resume---of 1
x86_genfb_setmode---of 1
x86_genfb_suspend---of 1
-----------
SUMMARY15%of 21

-----------
SUMMARY---of 0

extattr_check_cred---of 3
extattr_delete_vp---of 5
extattr_get_vp---of 13
extattr_list_vp---of 7
extattr_set_vp---of 13
sys_extattr_delete_fd---of 4
sys_extattr_delete_file---of 4
sys_extattr_delete_link---of 4
sys_extattr_get_fd---of 4
sys_extattr_get_file---of 4
sys_extattr_get_link---of 4
sys_extattr_list_fd---of 3
sys_extattr_list_file---of 3
sys_extattr_list_link---of 3
sys_extattr_set_fd---of 4
sys_extattr_set_file---of 4
sys_extattr_set_link---of 4
sys_extattrctl---of 15
sys_fgetxattr---of 4
sys_flistxattr---of 11
sys_fremovexattr---of 4
sys_fsetxattr---of 7
sys_getxattr---of 4
sys_lgetxattr---of 4
sys_listxattr---of 11
sys_llistxattr---of 11
sys_lremovexattr---of 4
sys_lsetxattr---of 7
sys_removexattr---of 4
sys_setxattr---of 7
vfs_stdextattrctl67%of 3
xattr_native---of 6
-----------
SUMMARY67%of 3

_rt_setmetrics---of 18
route_abort_wrapper---of 3
route_accept_wrapper---of 3
route_attach_wrapper72%of 7
route_bind_wrapper---of 3
route_connect2_wrapper67%of 3
route_connect_wrapper---of 3
route_ctloutput21%of 24
route_detach_wrapper58%of 7
route_disconnect_wrapper50%of 6
route_enqueue86%of 7
route_filter75%of 31
route_get_sdl_index---of 3
route_init---of 1
route_intr---of 6
route_ioctl_wrapper100%of 1
route_listen_wrapper---of 3
route_output13%of 137
route_output_report---of 12
route_peeraddr_wrapper55%of 11
route_purgeif_wrapper---of 1
route_rcvd_wrapper---of 3
route_recvoob_wrapper67%of 3
route_send_wrapper60%of 5
route_sendoob_wrapper67%of 3
route_shutdown_wrapper34%of 3
route_sockaddr_wrapper55%of 11
route_stat_wrapper67%of 3
rt_addrmsg---of 1
rt_addrmsg0---of 31
rt_addrmsg_rt---of 1
rt_addrmsg_src---of 1
rt_adjustcount100%of 4
rt_clonedmsg---of 3
rt_getlen43%of 7
rt_ieee80211msg---of 18
rt_ifannouncemsg80%of 5
rt_ifmsg88%of 8
rt_makeifannouncemsg100%of 1
rt_missmsg86%of 7
rt_msg175%of 24
rt_msg2---of 28
rt_msg3---of 1
rt_pr_init---of 1
rt_setmetrics---of 1
sysctl_dumpentry---of 17
sysctl_net_route_setup---of 1
sysctl_rtable---of 99
-----------
SUMMARY42%of 318

lf_advlock48%of 139
lf_alloc78%of 9
lf_clearlock92%of 12
lf_ctor100%of 1
lf_dtor100%of 1
lf_findoverlap80%of 35
lf_getblock100%of 6
lf_init---of 1
lf_split60%of 5
lf_wakelock72%of 14
-----------
SUMMARY60%of 222

localcount_acquire50%of 4
localcount_debug_refcnt---of 1
localcount_drain45%of 9
localcount_fini67%of 3
localcount_init100%of 1
localcount_release45%of 9
localcount_xc---of 1
-----------
SUMMARY50%of 26

strlcpy82%of 11
-----------
SUMMARY82%of 11

virtio_alloc_vq---of 28
virtio_attach_failed---of 8
virtio_child---of 1
virtio_child_attach_failed---of 1
virtio_child_attach_finish---of 15
virtio_child_attach_set_vqs---of 6
virtio_child_attach_start---of 1
virtio_child_detach---of 2
virtio_dequeue---of 12
virtio_dequeue_commit---of 12
virtio_dmat100%of 1
virtio_enqueue69%of 16
virtio_enqueue_abort---of 15
virtio_enqueue_commit66%of 32
virtio_enqueue_p65%of 17
virtio_enqueue_prep63%of 8
virtio_enqueue_reserve39%of 44
virtio_features---of 1
virtio_free_vq---of 8
virtio_init_vq---of 19
virtio_intrhand---of 1
virtio_modcmd---of 1
virtio_negotiate_features---of 4
virtio_postpone_intr---of 13
virtio_postpone_intr_far---of 5
virtio_postpone_intr_smart---of 5
virtio_print_device_type---of 3
virtio_read_device_config_1---of 1
virtio_read_device_config_2---of 3
virtio_read_device_config_4---of 3
virtio_read_device_config_8---of 5
virtio_read_device_config_le_2---of 3
virtio_read_device_config_le_4---of 3
virtio_reinit_end---of 1
virtio_reinit_start---of 9
virtio_reset---of 1
virtio_rw16---of 5
virtio_rw3250%of 6
virtio_rw6450%of 6
virtio_set_status---of 1
virtio_soft_intr---of 3
virtio_start_vq_intr---of 18
virtio_stop_vq_intr---of 12
virtio_vq_intr---of 6
virtio_vq_intrhand---of 3
virtio_vq_is_enqueued---of 10
virtio_write_device_config_1---of 1
virtio_write_device_config_2---of 3
virtio_write_device_config_4---of 3
virtio_write_device_config_8---of 8
virtio_write_device_config_le_2---of 3
virtio_write_device_config_le_4---of 3
-----------
SUMMARY56%of 130

i82489_ipi73%of 11
lapic_boot_init---of 59
lapic_calibrate_timer---of 53
lapic_clockintr---of 1
lapic_cpu_number---of 5
lapic_delay72%of 25
lapic_dump---of 22
lapic_enable---of 3
lapic_get_timecount---of 13
lapic_hwmask---of 3
lapic_hwunmask---of 3
lapic_initclock---of 4
lapic_is_x2apic---of 4
lapic_readreg---of 5
lapic_reset---of 15
lapic_set_lvt---of 33
lapic_write_tpri---of 1
lapic_writereg---of 3
x2apic_ipi---of 3
x86_ipi_init---of 16
x86_ipi_startup---of 13
-----------
SUMMARY73%of 36

rf_AllocMCPair---of 1
rf_ConfigureMCPair100%of 1
rf_FreeMCPair---of 1
rf_MCPairWakeupFunc---of 1
rf_ShutdownMCPair100%of 1
-----------
SUMMARY100%of 2

check_exec64%of 49
check_posix_spawn72%of 7
copyargs---of 20
copyinargstrs.constprop.073%of 11
do_posix_spawn55%of 61
exec_add---of 29
exec_free_emul_arg---of 6
exec_init---of 26
exec_makepathbuf100%of 14
exec_pool_alloc---of 1
exec_pool_free---of 1
exec_remove---of 27
exec_sigcode_alloc---of 12
exec_sigcode_free---of 8
exec_vm_minaddr---of 3
execve167%of 3
execve_fetch_element100%of 1
execve_free_data---of 8
execve_loadvm65%of 62
execve_runproc---of 167
posix_spawn_fa_free100%of 8
spawn_exec_data_release67%of 6
spawn_return---of 61
sys_execve100%of 1
sys_fexecve---of 1
sys_posix_spawn95%of 35
-----------
SUMMARY70%of 258

ld_config_interrupts---of 1
ld_discard---of 10
ld_diskstart---of 15
ld_dumpblocks---of 3
ld_flush---of 8
ld_iosize---of 2
ld_lastclose---of 1
ld_modcmd---of 1
ld_resume---of 3
ld_shutdown---of 4
ld_suspend---of 10
ldadjqparam---of 1
ldattach---of 25
ldbegindetach---of 4
ldclose---of 1
lddiscard---of 1
lddiscardend---of 3
lddone---of 3
lddump---of 4
ldenddetach---of 8
ldioctl---of 10
ldminphys---of 3
ldopen50%of 4
ldread---of 1
ldsize---of 4
ldstrategy---of 1
ldwrite---of 1
-----------
SUMMARY50%of 4

ffs_bufrd64%of 47
ffs_bufwr66%of 52
ffs_fsync13%of 40
ffs_full_fsync28%of 33
ffs_gop_size100%of 5
ffs_read82%of 22
ffs_reclaim75%of 16
ffs_spec_fsync50%of 10
ffs_write87%of 60
ufs_post_read_update45%of 9
ufs_post_write_update75%of 24
-----------
SUMMARY61%of 318

pktq_barrier54%of 13
pktq_collect_counts---of 3
pktq_create---of 4
pktq_dequeue---of 9
pktq_destroy---of 1
pktq_enqueue63%of 8
pktq_fini_cpu---of 3
pktq_flush---of 7
pktq_get_count---of 6
pktq_init_cpu---of 1
pktq_rps_hash---of 5
pktq_rps_hash_curcpu---of 1
pktq_rps_hash_toeplitz---of 10
pktq_rps_hash_toeplitz_othercpus---of 5
pktq_rps_hash_zero---of 1
pktq_set_maxlen---of 14
pktq_set_maxlen_cpu---of 1
sysctl_pktq_count---of 1
sysctl_pktq_maxlen---of 4
sysctl_pktq_rps_hash_handler---of 18
-----------
SUMMARY58%of 21

-----------
SUMMARY---of 0

usb_match_device67%of 6
usb_transfer_complete66%of 63
usbd_abort_default_pipe---of 1
usbd_abort_pipe100%of 1
usbd_ar_pipe30%of 27
usbd_clear_endpoint_stall---of 5
usbd_clear_endpoint_stall_async---of 1
usbd_clear_endpoint_stall_task---of 5
usbd_clear_endpoint_toggle---of 3
usbd_close_pipe50%of 12
usbd_create_xfer53%of 23
usbd_destroy_xfer60%of 5
usbd_device2interface_handle---of 4
usbd_do_request100%of 1
usbd_do_request_flags100%of 1
usbd_do_request_len57%of 16
usbd_dopoll---of 1
usbd_endpoint_count---of 5
usbd_free_xfer72%of 7
usbd_get_buffer100%of 1
usbd_get_config_descriptor---of 3
usbd_get_device_descriptor---of 4
usbd_get_endpoint_descriptor---of 5
usbd_get_interface---of 1
usbd_get_interface_altindex---of 1
usbd_get_interface_descriptor67%of 3
usbd_get_no_alts---of 9
usbd_get_pipe0---of 1
usbd_get_quirks---of 4
usbd_get_string---of 1
usbd_get_string067%of 27
usbd_get_xfer_status---of 8
usbd_interface2device_handle---of 1
usbd_interface2endpoint_descriptor---of 3
usbd_interface_count---of 4
usbd_open_pipe---of 1
usbd_open_pipe_intr---of 10
usbd_open_pipe_ival---of 17
usbd_pipe2device_handle---of 3
usbd_ratecheck---of 1
usbd_resume_pipe67%of 3
usbd_set_interface---of 7
usbd_set_polling---of 4
usbd_setup_default_xfer---of 3
usbd_setup_isoc_xfer---of 2
usbd_setup_xfer---of 3
usbd_start_next64%of 19
usbd_suspend_pipe---of 1
usbd_sync_transfer---of 1
usbd_sync_transfer_sig---of 1
usbd_transfer56%of 61
usbd_xfer_abort---of 6
usbd_xfer_cancel_timeout_async---of 15
usbd_xfer_probe_timeout---of 33
usbd_xfer_schedule_timeout---of 13
usbd_xfer_timeout---of 3
usbd_xfer_timeout_task---of 6
usbd_xfer_trycomplete---of 6
-----------
SUMMARY58%of 276

bwfm_usb_alloc_rx_list---of 7
bwfm_usb_alloc_tx_list---of 10
bwfm_usb_attach---of 21
bwfm_usb_attachhook---of 24
bwfm_usb_detach---of 11
bwfm_usb_dl_cmd---of 3
bwfm_usb_free_rx_list---of 3
bwfm_usb_free_tx_list---of 3
bwfm_usb_load_microcode---of 23
bwfm_usb_match100%of 2
bwfm_usb_newbuf---of 5
bwfm_usb_rxctl---of 6
bwfm_usb_rxeof---of 10
bwfm_usb_txcheck---of 5
bwfm_usb_txctl---of 3
bwfm_usb_txdata---of 17
bwfm_usb_txeof---of 9
-----------
SUMMARY100%of 2

zyd_activate---of 4
zyd_al2210_init---of 11
zyd_al2210_set_channel---of 3
zyd_al2210_switch_radio---of 1
zyd_al2230_init---of 12
zyd_al2230_init_b---of 8
zyd_al2230_set_channel---of 1
zyd_al2230_switch_radio---of 5
zyd_al7230B_init---of 16
zyd_al7230B_set_channel---of 6
zyd_al7230B_switch_radio---of 4
zyd_amrr_timeout---of 4
zyd_attach---of 3
zyd_attachhook---of 93
zyd_close_pipes---of 5
zyd_cmd---of 15
zyd_detach---of 3
zyd_gct_init---of 8
zyd_gct_set_channel---of 1
zyd_gct_switch_radio---of 1
zyd_init---of 34
zyd_intr---of 25
zyd_ioctl---of 10
zyd_iter_func---of 1
zyd_match100%of 2
zyd_maxim2_init---of 14
zyd_maxim2_set_channel---of 14
zyd_maxim2_switch_radio---of 1
zyd_maxim_init---of 14
zyd_maxim_set_channel---of 14
zyd_maxim_switch_radio---of 1
zyd_media_change---of 5
zyd_newassoc---of 4
zyd_newstate---of 3
zyd_next_scan---of 2
zyd_node_alloc---of 1
zyd_plcp_signal---of 3
zyd_rfmd_init---of 8
zyd_rfmd_set_channel---of 1
zyd_rfmd_switch_radio---of 4
zyd_rfwrite---of 9
zyd_rx_data---of 15
zyd_rxeof---of 16
zyd_set_chan---of 10
zyd_start---of 108
zyd_stop.constprop.0---of 11
zyd_task---of 21
zyd_txeof---of 7
zyd_watchdog---of 5
-----------
SUMMARY100%of 2

rf_ConfigureCopyback100%of 1
rf_ContinueCopyback---of 18
rf_CopybackOne---of 8
rf_CopybackReadDoneProc---of 3
rf_CopybackReconstructedData---of 15
rf_CopybackWriteDoneProc---of 4
-----------
SUMMARY100%of 1

if_urtw_modcmd---of 2
urtw_8180_set_anaparam.constprop.0---of 8
urtw_8185_set_anaparam2.constprop.0---of 8
urtw_8185_tx_antenna.constprop.0---of 3
urtw_8187_write_phy---of 5
urtw_8187b_init---of 74
urtw_8225_read---of 35
urtw_8225_rf_init---of 37
urtw_8225_rf_set_chan---of 15
urtw_8225_rf_set_sens---of 7
urtw_8225_set_txpwrlvl---of 16
urtw_8225_setgain---of 5
urtw_8225_usb_init---of 12
urtw_8225_write_c---of 13
urtw_8225v2_b_rf_init---of 70
urtw_8225v2_b_rf_set_chan---of 40
urtw_8225v2_rf_init---of 51
urtw_8225v2_rf_set_chan---of 15
urtw_8225v2_set_txpwrlvl---of 15
urtw_activate---of 3
urtw_alloc_rx_data_list---of 9
urtw_alloc_tx_data_list---of 9
urtw_attach---of 65
urtw_detach---of 10
urtw_eprom_ck---of 6
urtw_eprom_cs---of 6
urtw_eprom_read32---of 22
urtw_eprom_writebit---of 6
urtw_free_rx_data_list---of 6
urtw_free_tx_data_list---of 10
urtw_init---of 69
urtw_ioctl---of 14
urtw_led_ctl---of 13
urtw_led_off.constprop.0.isra.0---of 6
urtw_led_on.constprop.0.isra.0---of 6
urtw_ledtask---of 1
urtw_ledusbtask---of 22
urtw_match100%of 2
urtw_media_change---of 5
urtw_newstate---of 1
urtw_next_scan---of 4
urtw_open_pipes---of 22
urtw_read8_c.constprop.0---of 1
urtw_rx_enable---of 15
urtw_rxeof---of 42
urtw_set_chan---of 9
urtw_set_macaddr---of 3
urtw_set_mode---of 3
urtw_start---of 32
urtw_stop.constprop.0---of 9
urtw_task---of 18
urtw_tx_enable---of 11
urtw_tx_start.constprop.0---of 42
urtw_txeof_low---of 7
urtw_txeof_normal---of 7
urtw_update_msr---of 10
urtw_watchdog---of 5
urtw_write16_c---of 1
urtw_write32_c---of 1
urtw_write8_c---of 1
urtw_write8e---of 1
-----------
SUMMARY100%of 2

npf_alg_conn---of 9
npf_alg_construct---of 6
npf_alg_destroy---of 5
npf_alg_exec---of 9
npf_alg_export86%of 7
npf_alg_fini---of 1
npf_alg_init---of 1
npf_alg_lookup---of 8
npf_alg_match---of 13
npf_alg_register---of 16
npf_alg_unregister---of 9
-----------
SUMMARY86%of 7

compat_43_ttioctl90%of 97
kern_tty_43_fini---of 3
kern_tty_43_init---of 1
ttcompatgetflags70%of 40
-----------
SUMMARY84%of 137

compat_50_sys_____semctl13100%of 12
-----------
SUMMARY100%of 12

filt_ptcrdetach---of 1
filt_ptcread---of 20
filt_ptcwdetach---of 1
filt_ptcwrite---of 14
ptcclose100%of 1
ptckqfilter---of 5
ptcopen100%of 5
ptcpoll---of 39
ptcread22%of 28
ptcwakeup88%of 8
ptcwrite21%of 34
ptsclose100%of 1
ptsopen77%of 13
ptspoll---of 3
ptsread---of 29
ptsstart63%of 8
ptsstop80%of 10
ptswrite---of 3
pty_check34%of 15
pty_isfree67%of 9
pty_maxptys---of 7
ptyattach---of 3
ptyioctl76%of 57
ptytty---of 1
-----------
SUMMARY56%of 189

lookup_ifnet_table75%of 4
npf_ifaddr_flush50%of 2
npf_ifaddr_sync20%of 10
npf_ifaddr_syncall84%of 6
replace_ifnet_table---of 5
-----------
SUMMARY50%of 22

rf_AcquireStripeLock---of 73
rf_ConfigureStripeLockFreeList80%of 5
rf_ConfigureStripeLocks84%of 6
rf_RaidShutdownStripeLocks100%of 3
rf_ReleaseStripeLock---of 135
rf_ShutdownStripeLockFreeList100%of 1
-----------
SUMMARY87%of 15

layerfs_fhtovp---of 7
layerfs_loadvnode60%of 5
layerfs_modcmd---of 2
layerfs_quotactl---of 1
layerfs_renamelock_enter100%of 1
layerfs_renamelock_exit100%of 1
layerfs_root75%of 4
layerfs_snapshot---of 1
layerfs_start100%of 1
layerfs_statvfs100%of 3
layerfs_suspendctl---of 1
layerfs_sync100%of 1
layerfs_vget---of 7
layerfs_vptofh100%of 1
sysctl_vfs_layerfs_setup---of 1
-----------
SUMMARY83%of 17

clockctl_listener_cb80%of 5
clockctl_modcmd---of 8
clockctlclose100%of 1
clockctlioctl93%of 26
clockctlopen100%of 1
-----------
SUMMARY91%of 33

umass_activate---of 4
umass_attach---of 113
umass_bbb_reset---of 7
umass_bbb_state---of 69
umass_bbb_transfer---of 24
umass_cbi_adsc---of 10
umass_cbi_reset---of 9
umass_cbi_state---of 60
umass_cbi_transfer---of 17
umass_childdet---of 3
umass_clear_endpoint_stall---of 4
umass_detach---of 19
umass_disco---of 10
umass_match30%of 10
umass_transfer_done---of 3
umass_transfer_reset---of 4
-----------
SUMMARY30%of 10

pat_init---of 2
pmap_activate70%of 10
pmap_bootstrap---of 41
pmap_changeprot_local---of 10
pmap_check_pv.constprop.074%of 23
pmap_clear_attrs63%of 16
pmap_compare_key---of 4
pmap_compare_nodes80%of 5
pmap_copy_page75%of 4
pmap_create100%of 3
pmap_ctor65%of 14
pmap_deactivate67%of 18
pmap_destroy---of 2
pmap_destroy.part.066%of 26
pmap_drain_pv47%of 15
pmap_dtor63%of 8
pmap_dump---of 15
pmap_enter75%of 4
pmap_enter_ma65%of 252
pmap_extract79%of 19
pmap_find_ptp72%of 14
pmap_fork13%of 8
pmap_free_ptp73%of 29
pmap_free_pv62%of 39
pmap_get_physpage---of 10
pmap_growkernel---of 36
pmap_init---of 1
pmap_init_tmp_pgtbl---of 12
pmap_is_curpmap---of 4
pmap_kenter_ma67%of 9
pmap_kremove100%of 1
pmap_kremove175%of 16
pmap_kremove_local---of 1
pmap_ldt_cleanup---of 4
pmap_ldt_sync---of 3
pmap_ldt_xcall---of 4
pmap_load75%of 16
pmap_load156%of 9
pmap_lookup_pv68%of 25
pmap_map_ptes---of 3
pmap_map_ptes.part.080%of 10
pmap_page_remove100%of 1
pmap_pat_flags43%of 7
pmap_pdes_valid100%of 9
pmap_pp_clear_attrs62%of 26
pmap_pp_remove69%of 86
pmap_pv_clear_attrs---of 3
pmap_pv_remove---of 3
pmap_pvp_ctor84%of 12
pmap_pvp_dtor50%of 4
pmap_reactivate63%of 8
pmap_reference---of 1
pmap_remove82%of 43
pmap_remove_all---of 85
pmap_remove_pte53%of 40
pmap_remove_pv60%of 61
pmap_resident_count100%of 1
pmap_sync_pv.isra.074%of 63
pmap_test_attrs---of 26
pmap_unget_ptp---of 28
pmap_unmap_pte67%of 6
pmap_unmap_ptes75%of 16
pmap_unwire62%of 21
pmap_update80%of 5
pmap_update.part.071%of 17
pmap_virtual_space---of 1
pmap_vpage_cpu_init---of 2
pmap_vpage_cpualloc---of 9
pmap_wired_count100%of 1
pmap_write_protect80%of 29
pmap_zero_page67%of 3
slotspace_rand---of 13
vtophys100%of 3
x86_mmap_flags100%of 2
-----------
SUMMARY68%of 1057

if_url_modcmd---of 2
url_attach---of 26
url_match100%of 2
url_mem---of 9
url_reset.part.0---of 13
url_uno_init---of 20
url_uno_mcast---of 26
url_uno_mii_read_reg---of 13
url_uno_mii_statchg---of 1
url_uno_mii_write_reg---of 9
url_uno_rx_loop---of 6
url_uno_stop---of 2
url_uno_tx_prepare---of 7
-----------
SUMMARY100%of 2

pmap_tlb_cpu_init---of 1
pmap_tlb_init---of 1
pmap_tlb_intr---of 16
pmap_tlb_shootdown85%of 19
pmap_tlb_shootnow79%of 46
-----------
SUMMARY80%of 65

fdesc_getattr27%of 23
fdesc_inactive60%of 5
fdesc_init---of 1
fdesc_ioctl67%of 3
fdesc_kqfilter---of 6
fdesc_lookup29%of 49
fdesc_open50%of 4
fdesc_pathconf64%of 11
fdesc_poll75%of 4
fdesc_print---of 1
fdesc_read67%of 3
fdesc_readdir61%of 41
fdesc_readlink---of 4
fdesc_reclaim100%of 1
fdesc_setattr40%of 5
fdesc_write---of 3
-----------
SUMMARY45%of 149

sw_reg_biodone---of 1
sw_reg_iodone---of 14
sw_reg_start.part.0---of 8
swap_off---of 22
swapdrum_getsdp80%of 10
swaplist_find64%of 25
swaplist_insert64%of 30
swaplist_trim75%of 16
swapsys_lock---of 1
swapsys_unlock---of 1
swread100%of 1
swstrategy13%of 24
swwrite100%of 1
sys_swapctl58%of 80
sysctl_uvmswap_setup---of 1
uvm_swap_alloc---of 33
uvm_swap_free---of 8
uvm_swap_get---of 8
uvm_swap_init---of 9
uvm_swap_io---of 58
uvm_swap_markbad---of 5
uvm_swap_put---of 2
uvm_swap_shutdown---of 22
uvm_swap_stats83%of 23
uvm_swapisfull---of 6
-----------
SUMMARY60%of 210

ffs_alloc68%of 28
ffs_alloccg64%of 72
ffs_alloccgblk.constprop.028%of 76
ffs_blkalloc---of 8
ffs_blkalloc_ump---of 53
ffs_blkfree23%of 18
ffs_blkfree_cg48%of 17
ffs_blkfree_common.isra.020%of 135
ffs_blkfree_snap---of 16
ffs_blkfree_td---of 9
ffs_blkpref_ufs1---of 34
ffs_blkpref_ufs265%of 34
ffs_check_bad_allocation72%of 7
ffs_checkfreefile---of 23
ffs_discard_finish---of 6
ffs_discard_init---of 4
ffs_discardcb---of 4
ffs_freefile54%of 15
ffs_freefile_common.isra.049%of 45
ffs_freefile_snap---of 16
ffs_fserr---of 5
ffs_hashalloc93%of 13
ffs_mapsearch65%of 28
ffs_nodealloccg58%of 109
ffs_realloccg57%of 126
ffs_valloc79%of 75
ffs_vfree100%of 1
ufs_rw32.part.0---of 1
-----------
SUMMARY51%of 799

sl_compress_init100%of 3
sl_compress_setup---of 5
sl_compress_tcp---of 69
sl_uncompress_tcp---of 9
sl_uncompress_tcp_core---of 48
slcompress_modcmd---of 2
-----------
SUMMARY100%of 3

dnvlist_get_binary---of 5
dnvlist_get_bool---of 3
dnvlist_get_number67%of 3
dnvlist_get_nvlist---of 3
dnvlist_get_string---of 3
dnvlist_take_binary---of 5
dnvlist_take_bool---of 3
dnvlist_take_number---of 3
dnvlist_take_nvlist---of 3
dnvlist_take_string---of 3
-----------
SUMMARY67%of 3

-----------
SUMMARY---of 0

uvm_fault_internal61%of 408
uvm_fault_lower_enter63%of 53
uvm_fault_unwire100%of 1
uvm_fault_unwire_locked67%of 27
uvm_fault_upper_enter57%of 30
uvm_fault_wire88%of 8
uvmfault_anonget12%of 87
uvmfault_promote54%of 52
uvmfault_unlockall---of 6
uvmfault_update_stats80%of 5
-----------
SUMMARY55%of 671

-----------
SUMMARY---of 0

emdtv_activate---of 3
emdtv_attach---of 29
emdtv_childdet---of 4
emdtv_default_board_init---of 1
emdtv_detach---of 8
emdtv_gpio_ctl---of 19
emdtv_match100%of 2
emdtv_modcmd---of 2
emdtv_read_1---of 1
emdtv_read_multi_1---of 8
emdtv_rescan---of 1
emdtv_write_1---of 1
emdtv_write_multi_1---of 9
-----------
SUMMARY100%of 2

check_sigcontext32---of 24
cpu_coredump32---of 8
cpu_exec_aout_makecmds100%of 1
cpu_getmcontext32---of 3
cpu_mcontext32_validate---of 24
cpu_mcontext32from64_validate---of 1
cpu_setmcontext32---of 13
netbsd32_buildcontext---of 4
netbsd32_machdep_md_fini---of 7
netbsd32_machdep_md_init---of 1
netbsd32_machine32---of 1
netbsd32_process_doxmmregs17%of 12
netbsd32_process_read_dbregs---of 1
netbsd32_process_read_fpregs---of 1
netbsd32_process_read_regs---of 1
netbsd32_process_write_dbregs---of 6
netbsd32_process_write_fpregs---of 1
netbsd32_process_write_regs---of 23
netbsd32_ptrace_translate_request---of 3
netbsd32_sendsig_siginfo---of 13
netbsd32_setregs---of 2
netbsd32_sysarch---of 49
netbsd32_vm_default_addr---of 4
startlwp32---of 36
-----------
SUMMARY24%of 13

filt_wseventrdetach---of 1
filt_wseventread---of 6
sysctl_wsevent_setup---of 4
wsevent_copyout_events---of 8
wsevent_fini67%of 3
wsevent_init67%of 3
wsevent_inject---of 13
wsevent_intr---of 2
wsevent_kqfilter---of 3
wsevent_poll100%of 4
wsevent_read40%of 15
wsevent_setversion100%of 5
wsevent_wakeup---of 4
-----------
SUMMARY64%of 30

lockstat_alloc---of 8
lockstat_close---of 3
lockstat_event---of 37
lockstat_free---of 4
lockstat_init_tables---of 16
lockstat_ioctl16%of 19
lockstat_open75%of 4
lockstat_read---of 5
lockstat_start---of 3
lockstat_stop---of 16
lockstatattach---of 1
-----------
SUMMARY27%of 23

-----------
SUMMARY---of 0

compat_70_unp_addsockcred100%of 7
uipc_usrreq_70_fini---of 3
uipc_usrreq_70_init---of 1
-----------
SUMMARY100%of 7

-----------
SUMMARY---of 0

critpollhook_disestablish---of 1
critpollhook_establish---of 8
docritpollhooks100%of 2
doexechooks---of 2
doexithooks---of 2
doforkhooks50%of 2
domountroothook---of 4
dopowerhooks---of 14
doshutdownhooks---of 9
exechook_disestablish100%of 1
exechook_establish75%of 8
exithook_disestablish---of 1
exithook_establish---of 8
forkhook_disestablish---of 1
forkhook_establish---of 8
hook_disestablish.isra.059%of 12
mountroothook_destroy---of 9
mountroothook_disestablish---of 1
mountroothook_establish---of 8
powerhook_disestablish---of 16
powerhook_establish---of 8
shutdownhook_disestablish---of 1
shutdownhook_establish---of 8
simplehook_create100%of 1
simplehook_destroy---of 12
simplehook_disestablish---of 25
simplehook_dohooks---of 22
simplehook_establish---of 6
simplehook_has_hooks---of 1
-----------
SUMMARY70%of 26

strncmp100%of 6
-----------
SUMMARY100%of 6

spkr_audio_attach67%of 3
spkr_audio_childdet---of 1
spkr_audio_detach100%of 3
spkr_audio_probe100%of 1
spkr_audio_rescan---of 1
spkr_audio_tone---of 3
-----------
SUMMARY86%of 7

umcs7840_attach---of 39
umcs7840_change_task---of 4
umcs7840_childdet---of 4
umcs7840_detach---of 11
umcs7840_get_UART_reg---of 3
umcs7840_get_reg---of 3
umcs7840_get_status---of 4
umcs7840_intr---of 19
umcs7840_match100%of 2
umcs7840_param---of 18
umcs7840_port_close---of 9
umcs7840_port_open---of 32
umcs7840_set---of 13
umcs7840_set_UART_reg---of 3
umcs7840_set_baudrate---of 18
umcs7840_set_reg---of 3
-----------
SUMMARY100%of 2

_lwp_getspecific_by_lwp---of 1
lwp_finispecific100%of 1
lwp_getspecific100%of 1
lwp_initspecific50%of 2
lwp_setspecific---of 1
lwp_setspecific_by_lwp---of 1
lwp_specific_key_create---of 1
lwp_specific_key_delete---of 1
lwpinit_specificdata---of 2
-----------
SUMMARY75%of 4

VFS_EXTATTRCTL100%of 1
VFS_FHTOVP60%of 5
VFS_MOUNT100%of 1
VFS_QUOTACTL60%of 5
VFS_ROOT100%of 5
VFS_SNAPSHOT---of 5
VFS_START100%of 5
VFS_STATVFS100%of 5
VFS_SUSPENDCTL100%of 5
VFS_SYNC100%of 5
VFS_UNMOUNT100%of 1
VFS_VPTOFH100%of 5
bdevvp100%of 1
bgetvp65%of 20
brelvp64%of 25
cdevvp100%of 1
copy_statvfs_info100%of 2
printlockedvnodes---of 7
reassignbuf78%of 36
sched_sync---of 30
set_statvfs_info84%of 18
setrootfstime---of 1
sysctl_kern_vnode---of 17
sysctl_vfs_generic_fstypes---of 14
vattr_null100%of 1
vdevgone70%of 10
vfinddev100%of 1
vflushbuf66%of 23
vfs_buf_print---of 1
vfs_getopsbyname100%of 5
vfs_mount_print---of 16
vfs_mount_print_all---of 2
vfs_syncer_add_to_worklist80%of 15
vfs_syncer_remove_from_worklist60%of 5
vfs_timestamp50%of 6
vfs_unixify_accmode100%of 7
vfs_vnode_lock_print---of 7
vfs_vnode_print---of 5
vinvalbuf56%of 29
vn_syncer_add132%of 22
vn_syncer_add_to_worklist67%of 3
vn_syncer_remove_from_worklist72%of 14
vntblinit---of 3
vprint---of 4
vprint_common---of 9
vstate_name---of 4
vtruncbuf80%of 20
vtype2dt---of 1
-----------
SUMMARY72%of 307

rip_abort_wrapper---of 3
rip_accept_wrapper---of 3
rip_attach_wrapper67%of 9
rip_bind_wrapper79%of 19
rip_connect2_wrapper67%of 3
rip_connect_pcb100%of 6
rip_connect_wrapper60%of 10
rip_ctlinput20%of 10
rip_ctloutput82%of 22
rip_detach_wrapper60%of 5
rip_disconnect_wrapper50%of 6
rip_init---of 1
rip_input---of 24
rip_ioctl_wrapper100%of 1
rip_listen_wrapper67%of 3
rip_output50%of 42
rip_pcbnotify---of 9
rip_peeraddr_wrapper50%of 8
rip_purgeif_wrapper100%of 1
rip_rcvd_wrapper---of 3
rip_recvoob_wrapper67%of 3
rip_sbappendaddr---of 15
rip_send_wrapper78%of 18
rip_sendoob_wrapper67%of 3
rip_shutdown_wrapper67%of 3
rip_sockaddr_wrapper50%of 8
rip_stat_wrapper67%of 3
-----------
SUMMARY64%of 183

usb_guid_cmp---of 8
uvideo_activate---of 4
uvideo_attach---of 237
uvideo_childdet---of 5
uvideo_close---of 2
uvideo_detach---of 16
uvideo_enum_format---of 7
uvideo_get_businfo---of 1
uvideo_get_control_group---of 6
uvideo_get_devname---of 1
uvideo_get_format---of 3
uvideo_get_framerate---of 6
uvideo_match100%of 2
uvideo_open---of 3
uvideo_set_control_group---of 10
uvideo_set_format---of 13
uvideo_set_framerate---of 6
uvideo_start_transfer---of 24
uvideo_stop_transfer---of 1
uvideo_stream_guess_format---of 9
uvideo_stream_probe_and_commit---of 6
uvideo_stream_recv_bulk_transfer---of 5
uvideo_stream_recv_isoc_complete---of 10
uvideo_stream_recv_isoc_start1.isra.0---of 3
uvideo_stream_recv_process---of 7
uvideo_stream_stop_xfer---of 21
uvideo_try_format---of 3
-----------
SUMMARY100%of 2

compare_ugen---of 4
compare_ugen_key---of 4
filt_ugenrdetach---of 1
filt_ugenread_bulk---of 6
filt_ugenread_intr---of 3
filt_ugenread_isoc---of 6
filt_ugenwrite_bulk---of 6
ugen_activate---of 4
ugen_attach100%of 1
ugen_bulkra_intr---of 15
ugen_bulkwb_intr---of 15
ugen_detach---of 22
ugen_do_close---of 29
ugen_get_alt_index---of 3
ugen_get_cdesc---of 7
ugen_isoc_rintr---of 15
ugen_match100%of 5
ugen_modcmd---of 4
ugen_set_config---of 28
ugenclose---of 11
ugenif_acquire29%of 7
ugenif_attach32%of 22
ugenif_match---of 1
ugenintr---of 7
ugenioctl---of 138
ugenkqfilter---of 16
ugenopen6%of 50
ugenpoll---of 30
ugenread---of 79
ugenwrite---of 52
-----------
SUMMARY22%of 85

_rnd_add_uint32---of 1
_rnd_add_uint64---of 1
entropy_account_cpu20%of 35
entropy_bootrequest---of 5
entropy_consolidate84%of 6
entropy_consolidate_xc---of 27
entropy_cpu_get67%of 3
entropy_cpu_put58%of 7
entropy_enter65%of 14
entropy_enter_early---of 7
entropy_epoch100%of 1
entropy_extract38%of 29
entropy_fini_cpu---of 1
entropy_init_cpu---of 3
entropy_init_late_cpu---of 3
entropy_ioctl66%of 88
entropy_kqfilter---of 8
entropy_notify---of 20
entropy_pending_cpu---of 3
entropy_poll55%of 11
entropy_ready---of 1
entropy_request---of 16
entropy_reset_xc---of 5
entropy_softintr---of 3
entropy_thread---of 9
filt_entropy_read_detach---of 3
filt_entropy_read_event---of 17
rnd_add_data70%of 26
rnd_add_data_132%of 48
rnd_add_data_sync---of 1
rnd_add_uint32100%of 1
rnd_attach_source---of 35
rnd_detach_source---of 19
rnd_init---of 19
rnd_init_softint---of 7
rnd_lock_sources34%of 15
rnd_seed---of 20
rnd_system_ioctl100%of 1
rnd_unlock_sources72%of 7
rndsource_entropybits_cpu67%of 3
rndsource_setcb---of 1
rndsource_to_user55%of 11
rndsource_to_user_est60%of 5
rndsource_to_user_est_cpu72%of 7
sysctl_entropy_consolidate---of 7
sysctl_entropy_gather---of 7
-----------
SUMMARY52%of 318

-----------
SUMMARY---of 0

gang_lookup_init67%of 18
radix_tree_alloc_node60%of 5
radix_tree_await_memory---of 4
radix_tree_clear_tag59%of 31
radix_tree_empty_tagged_tree_p67%of 3
radix_tree_empty_tree_p100%of 1
radix_tree_fini_tree50%of 4
radix_tree_gang_lookup_node71%of 55
radix_tree_gang_lookup_node_reverse---of 58
radix_tree_gang_lookup_tagged_node74%of 65
radix_tree_gang_lookup_tagged_node_reverse67%of 68
radix_tree_get_tag88%of 16
radix_tree_grow64%of 11
radix_tree_init---of 2
radix_tree_init_tree100%of 1
radix_tree_insert_node74%of 26
radix_tree_lookup_node92%of 12
radix_tree_lookup_ptr71%of 41
radix_tree_node_ctor67%of 3
radix_tree_remove_node62%of 36
radix_tree_replace_node---of 18
radix_tree_set_tag60%of 22
radix_tree_sum_node100%of 1
radix_tree_undo_insert_node---of 15
-----------
SUMMARY69%of 419

-----------
SUMMARY---of 0

filt_putter---of 4
filt_putterdetach---of 1
putter_attach34%of 9
putter_detach42%of 12
putter_fop_close40%of 23
putter_fop_ioctl100%of 2
putter_fop_kqfilter---of 6
putter_fop_poll---of 8
putter_fop_read---of 13
putter_fop_stat100%of 1
putter_fop_write34%of 12
putter_modcmd---of 2
putter_notify---of 1
putter_register---of 4
putterattach---of 1
puttercdclose---of 1
puttercdopen52%of 29
-----------
SUMMARY45%of 88

udsbr_activate---of 4
udsbr_attach---of 3
udsbr_detach---of 5
udsbr_get_info---of 2
udsbr_match50%of 4
udsbr_req---of 3
udsbr_set_info---of 5
-----------
SUMMARY50%of 4

-----------
SUMMARY---of 0

ras_fork80%of 5
ras_lookup65%of 17
ras_purgeall100%of 8
sys_rasctl97%of 28
-----------
SUMMARY87%of 58

mm_init---of 4
mm_ioctl80%of 5
mm_mmap100%of 9
mm_open100%of 1
mm_readwrite42%of 36
-----------
SUMMARY57%of 51

ubt_abortdealloc---of 44
ubt_activate---of 4
ubt_attach---of 32
ubt_detach---of 7
ubt_disable---of 2
ubt_enable---of 21
ubt_match58%of 14
ubt_mbufload---of 5
ubt_recv_acl_complete---of 15
ubt_recv_acl_start.part.0---of 4
ubt_recv_event---of 8
ubt_recv_sco_complete---of 36
ubt_recv_sco_start1---of 6
ubt_set_isoc_config---of 18
ubt_stats---of 3
ubt_sysctl_config---of 7
ubt_xmit_acl---of 6
ubt_xmit_acl_complete---of 7
ubt_xmit_acl_start.part.0---of 8
ubt_xmit_cmd---of 5
ubt_xmit_cmd_complete---of 8
ubt_xmit_cmd_start---of 11
ubt_xmit_sco---of 9
ubt_xmit_sco_complete---of 16
ubt_xmit_sco_start1---of 18
-----------
SUMMARY58%of 14

vnd_alloc---of 1
vnd_attach50%of 2
vnd_detach43%of 7
vnd_free---of 1
vnd_match---of 1
vnd_modcmd---of 2
vndattach---of 2
vndclose65%of 20
vnddoclear.part.0---of 33
vnddump---of 1
vndgetdefaultlabel---of 1
vndioctl25%of 129
vndioctl_get73%of 11
vndiodone---of 7
vndopen52%of 37
vndread67%of 6
vndsize---of 10
vndstrategy---of 28
vndthread---of 77
vndwrite---of 6
vnode_has_op.isra.0---of 5
-----------
SUMMARY38%of 212

atu_activate---of 4
atu_attach---of 57
atu_complete_attach---of 19
atu_detach---of 3
atu_init---of 21
atu_initial_config---of 21
atu_ioctl---of 4
atu_match84%of 6
atu_media_change---of 4
atu_media_status---of 1
atu_newstate---of 6
atu_rxeof---of 21
atu_send_mib---of 7
atu_start---of 38
atu_stop---of 23
atu_switch_radio---of 8
atu_task---of 14
atu_txeof---of 13
atu_usb_request---of 4
atu_wait_completion---of 9
atu_watchdog---of 11
-----------
SUMMARY84%of 6

wsmouse_activate---of 3
wsmouse_add_mux---of 7
wsmouse_attach---of 6
wsmouse_detach---of 12
wsmouse_do_ioctl100%of 24
wsmouse_handle_params34%of 21
wsmouse_input---of 54
wsmouse_match---of 1
wsmouse_mux_close---of 1
wsmouse_mux_open---of 4
wsmouse_precision_scroll---of 10
wsmouse_repeat---of 15
wsmouseclose---of 3
wsmousedevprint---of 3
wsmousedoioctl---of 3
wsmousedoopen40%of 5
wsmouseioctl67%of 3
wsmousekqfilter---of 3
wsmouseopen86%of 7
wsmousepoll67%of 3
wsmouseread---of 6
-----------
SUMMARY69%of 63

-----------
SUMMARY---of 0

ah4_ctlinput_wrapper100%of 1
esp4_ctlinput_wrapper100%of 1
rip_ctlinput_wrapper100%of 1
rip_ctloutput_wrapper100%of 1
sockaddr_in_addr---of 5
sockaddr_in_cmp---of 5
sockaddr_in_const_addr---of 5
tcp_ctlinput_wrapper100%of 1
tcp_ctloutput_wrapper100%of 1
udp_ctlinput_wrapper100%of 1
udp_ctloutput_wrapper100%of 1
-----------
SUMMARY100%of 8

genfs_compat_getpages---of 24
genfs_compat_gop_write---of 1
genfs_dio_iodone---of 6
genfs_directio37%of 38
genfs_do_io76%of 33
genfs_do_putpages69%of 169
genfs_getpages70%of 242
genfs_gop_putrange100%of 1
genfs_gop_write100%of 1
genfs_gop_write_rwmap---of 1
genfs_putpages100%of 1
genfs_rel_pages88%of 8
-----------
SUMMARY68%of 493

ffs_clrblock34%of 6
ffs_clusteracct80%of 49
ffs_fragacct91%of 11
ffs_getblk82%of 11
ffs_isblock43%of 7
ffs_isfreeblock43%of 7
ffs_load_inode45%of 9
ffs_setblock34%of 6
-----------
SUMMARY68%of 106

uvscom_attach---of 25
uvscom_childdet---of 3
uvscom_close---of 8
uvscom_detach---of 7
uvscom_get_status---of 2
uvscom_intr---of 16
uvscom_match100%of 2
uvscom_open---of 12
uvscom_param---of 18
uvscom_set---of 13
uvscom_set_line.isra.0---of 3
-----------
SUMMARY100%of 2

ukyopon_attach---of 4
ukyopon_close---of 1
ukyopon_detach---of 1
ukyopon_get_status---of 3
ukyopon_ioctl---of 4
ukyopon_match40%of 5
ukyopon_open---of 1
ukyopon_param---of 1
ukyopon_set---of 1
-----------
SUMMARY40%of 5

-----------
SUMMARY---of 0

pckbd_bell---of 2
pckbd_cnattach---of 4
pckbd_cnbell---of 2
pckbd_cngetc---of 12
pckbd_cnpollc---of 1
pckbd_decode.part.0---of 20
pckbd_enable75%of 12
pckbd_hookup_bell---of 2
pckbd_init---of 1
pckbd_input---of 15
pckbd_ioctl60%of 15
pckbd_resume---of 5
pckbd_scancode_translate---of 20
pckbd_scancode_translate.part.0---of 17
pckbd_set_leds---of 5
pckbd_set_xtscancode39%of 13
pckbd_suspend---of 3
pckbd_unhook_bell---of 3
pckbdattach---of 8
pckbdprobe---of 10
-----------
SUMMARY58%of 40

msg_freehdr70%of 13
msgctl192%of 23
msgfini---of 9
msginit---of 19
msgrcv184%of 50
msgrealloc.part.0---of 51
msgsnd178%of 53
sys___msgctl50100%of 7
sys_msgget95%of 20
sys_msgrcv100%of 1
sys_msgsnd100%of 1
sysctl_ipc_msg_setup---of 4
sysctl_ipc_msgmni---of 7
sysctl_ipc_msgseg---of 7
-----------
SUMMARY84%of 168

memcmp100%of 8
-----------
SUMMARY100%of 8

filt_fileattach---of 1
filt_kqdetach---of 1
filt_kqueue---of 4
filt_nopevent---of 1
filt_proc---of 3
filt_procattach---of 6
filt_procdetach---of 10
filt_seltrue---of 1
filt_timer---of 1
filt_timerattach---of 17
filt_timercompute---of 26
filt_timerdetach---of 1
filt_timerexpire---of 5
filt_timertouch---of 35
filt_user---of 1
filt_userattach---of 1
filt_usertouch---of 14
filter_event55%of 11
filter_touch---of 5
kevent1---of 234
kevent_fetch_changes---of 1
kevent_put_events---of 1
kfilter_byfilter---of 10
kfilter_byname---of 6
kfilter_byname_sys---of 7
kfilter_byname_user---of 8
kfilter_register---of 21
kfilter_unregister---of 12
klist_fini20%of 5
klist_init100%of 1
klist_insert---of 1
klist_remove---of 5
kn_wait_flux---of 4
knote100%of 5
knote_activate_locked67%of 9
knote_clear_eof---of 1
knote_detach---of 49
knote_detach_quiesce---of 26
knote_fdclose---of 4
knote_proc_exec---of 9
knote_proc_exit---of 10
knote_proc_fork---of 17
knote_proc_fork_track---of 30
knote_set_eof---of 1
kqueue1---of 3
kqueue_check67%of 15
kqueue_close---of 13
kqueue_doclose.constprop.0---of 10
kqueue_fcntl---of 1
kqueue_init---of 1
kqueue_ioctl---of 9
kqueue_kqfilter---of 5
kqueue_listener_cb50%of 4
kqueue_poll---of 5
kqueue_printit---of 9
kqueue_restart---of 3
kqueue_stat---of 1
seltrue_kqfilter---of 4
sys___kevent50---of 1
sys_kqueue---of 1
sys_kqueue1---of 1
-----------
SUMMARY62%of 50

_prop_stack_init100%of 1
_prop_stack_pop37%of 22
_prop_stack_push56%of 9
-----------
SUMMARY44%of 32

min_check---of 7
uvmpdpol_balancequeue---of 52
uvmpdpol_estimatepageable---of 4
uvmpdpol_idle---of 12
uvmpdpol_init---of 1
uvmpdpol_init_cpu---of 1
uvmpdpol_needsscan_p---of 1
uvmpdpol_pageactivate58%of 7
uvmpdpol_pageactivate_locked56%of 9
uvmpdpol_pageactivate_p80%of 5
uvmpdpol_pagedeactivate58%of 7
uvmpdpol_pagedeactivate_locked56%of 27
uvmpdpol_pagedequeue58%of 7
uvmpdpol_pagedequeue_locked69%of 35
uvmpdpol_pageenqueue58%of 7
uvmpdpol_pageisqueued_p84%of 6
uvmpdpol_pagerealize84%of 12
uvmpdpol_pagerealize_locked82%of 11
uvmpdpol_scanfini---of 11
uvmpdpol_scaninit---of 35
uvmpdpol_selectvictim---of 42
uvmpdpol_sysctlsetup---of 1
uvmpdpol_tune---of 3
-----------
SUMMARY67%of 133

npf_getkernctx100%of 1
npf_setkernctx---of 1
npf_stats_clear_cb---of 1
npf_stats_collect---of 1
npf_stats_dec---of 1
npf_stats_inc---of 1
npfk_create---of 3
npfk_destroy---of 1
npfk_gc---of 1
npfk_getarg---of 1
npfk_load---of 1
npfk_stats100%of 1
npfk_stats_clear---of 1
npfk_sysfini---of 1
npfk_sysinit---of 1
npfk_thread_register---of 1
npfk_thread_unregister---of 1
-----------
SUMMARY100%of 2

umidi_get_quirk_data_from_type---of 6
umidi_print_quirk---of 5
umidi_search_quirk42%of 12
-----------
SUMMARY42%of 12